summaryrefslogtreecommitdiffstats
path: root/api/src/Makefile.am
diff options
context:
space:
mode:
authorAmar Tumballi <amarts@redhat.com>2012-09-06 00:44:04 +0530
committerAnand Avati <avati@redhat.com>2012-10-02 20:07:16 -0700
commitec79583cfd2ae10998dedfeb0c2c2a83a81cdec3 (patch)
tree55703f648d71cba9317395e54a77c8904ce99160 /api/src/Makefile.am
parent3372f198a8272b3467944c759be9975ee8f0aa02 (diff)
dict: add new API 'dict_add()'
* this new API is used only when we are sure that there is no replacing keys, ie. in dict_unserialize(). Change-Id: I383dffc65056ebdaf0ab19727f7dc14ec7017fc1 Signed-off-by: Amar Tumballi <amarts@redhat.com> BUG: 850917 Reviewed-on: http://review.gluster.org/3844 Tested-by: Gluster Build System <jenkins@build.gluster.com> Reviewed-by: Anand Avati <avati@redhat.com>
Diffstat (limited to 'api/src/Makefile.am')
0 files changed, 0 insertions, 0 deletions
none' style='width: 99.6%;'/> -rw-r--r--COMMITMENT46
-rw-r--r--CONTRIBUTING25
-rw-r--r--CONTRIBUTING.md114
-rw-r--r--INSTALL35
-rw-r--r--MAINTAINERS510
-rw-r--r--Makefile.am53
-rw-r--r--README1
-rw-r--r--README.md46
-rw-r--r--THANKS2
-rw-r--r--api/Makefile.am1
-rw-r--r--api/examples/Makefile.am6
-rw-r--r--api/examples/README36
-rwxr-xr-xapi/examples/autogen.sh5
-rw-r--r--api/examples/configure.ac12
-rwxr-xr-xapi/examples/getvolfile.py45
-rw-r--r--api/examples/glfsxmp.c1811
-rw-r--r--api/src/Makefile.am43
-rw-r--r--api/src/README.Symbol_Versions3
-rw-r--r--api/src/gfapi-messages.h147
-rw-r--r--api/src/gfapi.aliases201
-rw-r--r--api/src/gfapi.map283
-rw-r--r--api/src/glfs-fops.c6445
-rw-r--r--api/src/glfs-handleops.c2655
-rw-r--r--api/src/glfs-handles.h355
-rw-r--r--api/src/glfs-internal.h756
-rw-r--r--api/src/glfs-master.c183
-rw-r--r--api/src/glfs-mem-types.h35
-rw-r--r--api/src/glfs-mgmt.c1049
-rw-r--r--api/src/glfs-resolve.c1199
-rw-r--r--api/src/glfs.c1806
-rw-r--r--api/src/glfs.h1485
-rw-r--r--argp-standalone/Makefile.am38
-rw-r--r--argp-standalone/acinclude.m41084
-rw-r--r--argp-standalone/argp-ba.c26
-rw-r--r--argp-standalone/argp-eexst.c36
-rw-r--r--argp-standalone/argp-fmtstream.c475
-rw-r--r--argp-standalone/argp-fmtstream.h319
-rw-r--r--argp-standalone/argp-help.c1849
-rw-r--r--argp-standalone/argp-namefrob.h96
-rw-r--r--argp-standalone/argp-parse.c1305
-rw-r--r--argp-standalone/argp-pv.c25
-rw-r--r--argp-standalone/argp-pvh.c32
-rw-r--r--argp-standalone/argp.h602
-rwxr-xr-xargp-standalone/autogen.sh6
-rw-r--r--argp-standalone/configure.ac102
-rw-r--r--argp-standalone/mempcpy.c21
-rw-r--r--argp-standalone/strcasecmp.c29
-rw-r--r--argp-standalone/strchrnul.c23
-rw-r--r--argp-standalone/strndup.c34
-rw-r--r--argp-standalone/vsnprintf.c839
-rwxr-xr-xautogen.sh96
-rw-r--r--booster/Makefile.am1
-rw-r--r--booster/src/Makefile.am21
-rw-r--r--booster/src/booster-fd.c342
-rw-r--r--booster/src/booster-fd.h83
-rw-r--r--booster/src/booster.c3172
-rw-r--r--booster/src/booster_fstab.c452
-rw-r--r--booster/src/booster_fstab.h83
-rw-r--r--booster/src/booster_stat.c188
-rwxr-xr-xbuild-aux/checkpatch.pl4326
-rwxr-xr-xbuild-aux/config.guess.dist14
-rwxr-xr-xbuild-aux/config.sub.dist14
-rwxr-xr-xbuild-aux/pkg-version64
-rw-r--r--cli/src/Makefile.am37
-rw-r--r--cli/src/cli-cmd-global.c195
-rw-r--r--cli/src/cli-cmd-misc.c152
-rw-r--r--cli/src/cli-cmd-parser.c7121
-rw-r--r--cli/src/cli-cmd-peer.c433
-rw-r--r--cli/src/cli-cmd-snapshot.c135
-rw-r--r--cli/src/cli-cmd-system.c793
-rw-r--r--cli/src/cli-cmd-volume.c4284
-rw-r--r--cli/src/cli-cmd.c580
-rw-r--r--cli/src/cli-cmd.h173
-rw-r--r--cli/src/cli-mem-types.h41
-rw-r--r--cli/src/cli-quotad-client.c146
-rw-r--r--cli/src/cli-quotad-client.h29
-rw-r--r--cli/src/cli-rl.c535
-rw-r--r--cli/src/cli-rpc-ops.c14878
-rw-r--r--cli/src/cli-xml-output.c7431
-rw-r--r--cli/src/cli.c1173
-rw-r--r--cli/src/cli.h561
-rw-r--r--cli/src/input.c134
-rw-r--r--cli/src/registry.c524
-rw-r--r--configure.ac1717
-rw-r--r--contrib/aclocal/mkdirp.m4146
-rw-r--r--contrib/aclocal/python.m4209
-rw-r--r--contrib/fuse-include/fuse-mount.h1
-rw-r--r--contrib/fuse-include/fuse_kernel.h578
-rw-r--r--contrib/fuse-include/fuse_kernel_macfuse.h461
-rw-r--r--contrib/fuse-lib/misc.c3
-rw-r--r--contrib/fuse-lib/mount-common.c35
-rw-r--r--contrib/fuse-lib/mount-gluster-compat.h66
-rw-r--r--contrib/fuse-lib/mount.c270
-rw-r--r--contrib/fuse-util/Makefile.am4
-rw-r--r--contrib/fuse-util/fusermount.c37
-rw-r--r--contrib/ipaddr-py/COPYING202
-rw-r--r--contrib/ipaddr-py/MANIFEST.in3
-rw-r--r--contrib/ipaddr-py/OWNERS4
-rw-r--r--contrib/ipaddr-py/README8
-rw-r--r--contrib/ipaddr-py/ipaddr.py1907
-rwxr-xr-xcontrib/ipaddr-py/ipaddr_test.py1099
-rwxr-xr-xcontrib/ipaddr-py/setup.py36
-rwxr-xr-xcontrib/ipaddr-py/test-2to3.sh15
-rw-r--r--contrib/libexecinfo/execinfo.c442
-rw-r--r--contrib/libexecinfo/execinfo_compat.h51
-rw-r--r--contrib/libgen/basename_r.c6
-rw-r--r--contrib/libgen/dirname_r.c4
-rw-r--r--contrib/macfuse/fuse_param.h97
-rw-r--r--contrib/macfuse/mount_darwin.c459
-rw-r--r--contrib/mount/mntent.c260
-rw-r--r--contrib/mount/mntent_compat.h37
-rw-r--r--contrib/timer-wheel/find_last_bit.c61
-rw-r--r--contrib/timer-wheel/timer-wheel.c374
-rw-r--r--contrib/timer-wheel/timer-wheel.h77
-rw-r--r--contrib/umountd/Makefile.am11
-rw-r--r--contrib/umountd/umountd.c255
-rw-r--r--contrib/userspace-rcu/rculist-extra.h42
-rw-r--r--contrib/userspace-rcu/static-wfcqueue.h685
-rw-r--r--contrib/userspace-rcu/static-wfstack.h455
-rw-r--r--contrib/userspace-rcu/wfcqueue.h216
-rw-r--r--contrib/userspace-rcu/wfstack.h178
-rw-r--r--contrib/uuid/clear.c43
-rw-r--r--contrib/uuid/compare.c55
-rw-r--r--contrib/uuid/copy.c45
-rw-r--r--contrib/uuid/gen_uuid.c679
-rw-r--r--contrib/uuid/gen_uuid_nt.c92
-rw-r--r--contrib/uuid/isnull.c48
-rw-r--r--contrib/uuid/pack.c69
-rw-r--r--contrib/uuid/parse.c79
-rw-r--r--contrib/uuid/tst_uuid.c180
-rw-r--r--contrib/uuid/unpack.c63
-rw-r--r--contrib/uuid/unparse.c76
-rw-r--r--contrib/uuid/uuid.h104
-rw-r--r--contrib/uuid/uuidP.h63
-rw-r--r--contrib/uuid/uuid_time.c171
-rw-r--r--contrib/uuid/uuid_types.h.in50
-rw-r--r--contrib/uuid/uuidd.h54
-rw-r--r--contrib/xxhash/xxhash.c1029
-rw-r--r--contrib/xxhash/xxhash.h328
-rw-r--r--contrib/xxhash/xxhsum.c1301
-rw-r--r--doc/Makefile.am12
-rw-r--r--doc/README.md26
-rw-r--r--doc/admin-guide/en-US/Administration_Guide.ent4
-rw-r--r--doc/admin-guide/en-US/Administration_Guide.xml27
-rw-r--r--doc/admin-guide/en-US/Author_Group.xml17
-rw-r--r--doc/admin-guide/en-US/Book_Info.xml28
-rw-r--r--doc/admin-guide/en-US/Chapter.xml33
-rw-r--r--doc/admin-guide/en-US/Preface.xml24
-rw-r--r--doc/admin-guide/en-US/Revision_History.xml27
-rw-r--r--doc/admin-guide/en-US/admin_ACLs.xml206
-rw-r--r--doc/admin-guide/en-US/admin_Hadoop.xml244
-rw-r--r--doc/admin-guide/en-US/admin_UFO.xml1588
-rw-r--r--doc/admin-guide/en-US/admin_commandref.xml334
-rw-r--r--doc/admin-guide/en-US/admin_console.xml28
-rw-r--r--doc/admin-guide/en-US/admin_directory_Quota.xml179
-rw-r--r--doc/admin-guide/en-US/admin_geo-replication.xml732
-rw-r--r--doc/admin-guide/en-US/admin_managing_volumes.xml735
-rw-r--r--doc/admin-guide/en-US/admin_monitoring_workload.xml878
-rw-r--r--doc/admin-guide/en-US/admin_setting_volumes.xml325
-rw-r--r--doc/admin-guide/en-US/admin_settingup_clients.xml511
-rw-r--r--doc/admin-guide/en-US/admin_start_stop_daemon.xml56
-rw-r--r--doc/admin-guide/en-US/admin_storage_pools.xml57
-rw-r--r--doc/admin-guide/en-US/admin_troubleshooting.xml509
-rw-r--r--doc/admin-guide/en-US/gfs_introduction.xml54
-rw-r--r--doc/admin-guide/en-US/glossary.xml126
-rw-r--r--doc/admin-guide/en-US/images/640px-GlusterFS_3.2_Architecture.pngbin97477 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Distributed_Replicated_Volume.pngbin51326 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Distributed_Striped_Replicated_Volume.pngbin57210 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Distributed_Striped_Volume.pngbin53781 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Distributed_Volume.pngbin47211 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Geo-Rep03_Internet.pngbin131824 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Geo-Rep04_Cascading.pngbin187341 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Geo-Rep_LAN.pngbin163417 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Geo-Rep_WAN.pngbin96291 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/GlusterFS_3.2_Architecture.pngbin133597 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Hadoop_Architecture.pngbin43815 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Replicated_Volume.pngbin44077 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Striped_Replicated_Volume.pngbin50193 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/Striped_Volume.pngbin43316 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/UFO_Architecture.pngbin72139 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/VSA_Architecture.pngbin38875 -> 0 bytes-rw-r--r--doc/admin-guide/en-US/images/arhitecture.png13
-rw-r--r--doc/admin-guide/en-US/images/icon.svg19
-rw-r--r--doc/admin-guide/publican.cfg12
-rw-r--r--doc/debugging/analyzing-regression-cores.md54
-rw-r--r--doc/debugging/gfid-to-path.md68
-rw-r--r--doc/debugging/mem-alloc-list.md19
-rw-r--r--doc/debugging/split-brain.md264
-rw-r--r--doc/debugging/statedump.md414
-rw-r--r--doc/developer-guide/Language-Bindings.md45
-rw-r--r--doc/developer-guide/README.md81
-rw-r--r--doc/developer-guide/Using-Gluster-Test-Framework.md271
-rw-r--r--doc/developer-guide/adding-fops.md18
-rw-r--r--doc/developer-guide/afr-locks-evolution.md91
-rw-r--r--doc/developer-guide/afr-self-heal-daemon.md92
-rw-r--r--doc/developer-guide/afr.md191
-rw-r--r--doc/developer-guide/brickmux-thread-reduction.md64
-rw-r--r--doc/developer-guide/coding-standard.md697
-rw-r--r--doc/developer-guide/commit-guidelines.md136
-rw-r--r--doc/developer-guide/daemon-management-framework.md38
-rw-r--r--doc/developer-guide/datastructure-inode.md221
-rw-r--r--doc/developer-guide/datastructure-iobuf.md259
-rw-r--r--doc/developer-guide/datastructure-mem-pool.md124
-rw-r--r--doc/developer-guide/dirops-transactions-in-dht.md273
-rw-r--r--doc/developer-guide/ec-implementation.md588
-rw-r--r--doc/developer-guide/fuse-interrupt.md211
-rw-r--r--doc/developer-guide/gfapi-symbol-versions.md270
-rw-r--r--doc/developer-guide/identifying-resource-leaks.md200
-rw-r--r--doc/developer-guide/logging-guidelines.md132
-rw-r--r--doc/developer-guide/network_compression.md71
-rw-r--r--doc/developer-guide/options-to-contribute.md212
-rw-r--r--doc/developer-guide/posix.md59
-rw-r--r--doc/developer-guide/rpc-for-glusterfs.changes-done.txt (renamed from doc/legacy/rpc-for-glusterfs.changes-done.txt)0
-rw-r--r--doc/developer-guide/rpc-for-glusterfs.new-versions.md32
-rw-r--r--doc/developer-guide/syncop.md72
-rw-r--r--doc/developer-guide/thread-naming.md104
-rw-r--r--doc/developer-guide/translator-development.md683
-rw-r--r--doc/developer-guide/unittest.md228
-rw-r--r--doc/developer-guide/versioning.md44
-rw-r--r--doc/developer-guide/write-behind.md56
-rw-r--r--doc/developer-guide/writing-a-cloudsync-plugin.md164
-rw-r--r--doc/developer-guide/xlator-classification.md221
-rw-r--r--doc/examples/legacy/Makefile.am8
-rw-r--r--doc/examples/legacy/README13
-rw-r--r--doc/examples/legacy/filter.vol23
-rw-r--r--doc/examples/legacy/io-cache.vol31
-rw-r--r--doc/examples/legacy/io-threads.vol22
-rw-r--r--doc/examples/legacy/posix-locks.vol19
-rw-r--r--doc/examples/legacy/protocol-client.vol12
-rw-r--r--doc/examples/legacy/protocol-server.vol21
-rw-r--r--doc/examples/legacy/read-ahead.vol24
-rw-r--r--doc/examples/legacy/replicate.vol118
-rw-r--r--doc/examples/legacy/stripe.vol120
-rw-r--r--doc/examples/legacy/trace.vol21
-rw-r--r--doc/examples/legacy/trash.vol20
-rw-r--r--doc/examples/legacy/write-behind.vol27
-rw-r--r--doc/features/ctime.md68
-rw-r--r--doc/features/ganesha-ha.md43
-rw-r--r--doc/gluster.8280
-rw-r--r--doc/glusterd.824
-rw-r--r--doc/glusterd.vol8
-rw-r--r--doc/glusterfs.8104
-rw-r--r--doc/glusterfs.vol.sample53
-rw-r--r--doc/glusterfsd.834
-rw-r--r--doc/glusterfsd.vol.sample44
-rw-r--r--doc/legacy/authentication.txt112
-rw-r--r--doc/legacy/booster.txt54
-rw-r--r--doc/legacy/coding-standard.pdfbin68627 -> 0 bytes-rw-r--r--doc/legacy/coding-standard.tex385
-rw-r--r--doc/legacy/errno.list.bsd.txt376
-rw-r--r--doc/legacy/errno.list.linux.txt1586
-rw-r--r--doc/legacy/errno.list.macosx.txt1513
-rw-r--r--doc/legacy/errno.list.solaris.txt206
-rw-r--r--doc/legacy/get_put_api_using_xattr.txt22
-rw-r--r--doc/legacy/hacker-guide/Makefile.am8
-rw-r--r--doc/legacy/hacker-guide/adding-fops.txt33
-rw-r--r--doc/legacy/hacker-guide/bdb.txt70
-rw-r--r--doc/legacy/hacker-guide/call-stub.txt1033
-rw-r--r--doc/legacy/hacker-guide/hacker-guide.tex309
-rw-r--r--doc/legacy/hacker-guide/lock-ahead.txt80
-rw-r--r--doc/legacy/hacker-guide/posix.txt59
-rw-r--r--doc/legacy/hacker-guide/replicate.txt206
-rw-r--r--doc/legacy/hacker-guide/write-behind.txt45
-rw-r--r--doc/legacy/handling-options.txt13
-rw-r--r--doc/legacy/mac-related-xattrs.txt21
-rw-r--r--doc/legacy/porting_guide.txt45
-rw-r--r--doc/legacy/replicate.lyx797
-rw-r--r--doc/legacy/replicate.pdfbin109057 -> 0 bytes-rw-r--r--doc/legacy/solaris-related-xattrs.txt44
-rw-r--r--doc/legacy/stat-prefetch-design.txt154
-rw-r--r--doc/legacy/translator-options.txt224
-rw-r--r--doc/mount.glusterfs.8154
-rw-r--r--doc/qa/legacy/qa-client.vol170
-rw-r--r--doc/qa/legacy/qa-high-avail-client.vol17
-rw-r--r--doc/qa/legacy/qa-high-avail-server.vol344
-rw-r--r--doc/qa/legacy/qa-server.vol284
-rw-r--r--doc/user-guide/legacy/Makefile.am3
-rw-r--r--doc/user-guide/legacy/advanced-stripe.odgbin12648 -> 0 bytes-rw-r--r--doc/user-guide/legacy/advanced-stripe.pdfbin13382 -> 0 bytes-rw-r--r--doc/user-guide/legacy/colonO-icon.jpgbin779 -> 0 bytes-rw-r--r--doc/user-guide/legacy/fdl.texi454
-rw-r--r--doc/user-guide/legacy/fuse.odgbin13190 -> 0 bytes-rw-r--r--doc/user-guide/legacy/fuse.pdfbin14948 -> 0 bytes-rw-r--r--doc/user-guide/legacy/ha.odgbin37290 -> 0 bytes-rw-r--r--doc/user-guide/legacy/ha.pdfbin19403 -> 0 bytes-rw-r--r--doc/user-guide/legacy/stripe.odgbin10188 -> 0 bytes-rw-r--r--doc/user-guide/legacy/stripe.pdfbin11941 -> 0 bytes-rw-r--r--doc/user-guide/legacy/unify.odgbin12955 -> 0 bytes-rw-r--r--doc/user-guide/legacy/unify.pdfbin18969 -> 0 bytes-rw-r--r--doc/user-guide/legacy/user-guide.info2697
-rw-r--r--doc/user-guide/legacy/user-guide.pdfbin353986 -> 0 bytes-rw-r--r--doc/user-guide/legacy/user-guide.texi2246
-rw-r--r--doc/user-guide/legacy/xlator.odgbin12169 -> 0 bytes-rw-r--r--doc/user-guide/legacy/xlator.pdfbin14358 -> 0 bytes-rw-r--r--events/Makefile.am8
-rw-r--r--events/eventskeygen.py243
-rw-r--r--events/src/Makefile.am41
-rw-r--r--events/src/__init__.py10
-rw-r--r--events/src/eventsapiconf.py.in59
-rw-r--r--events/src/eventsconfig.json5
-rw-r--r--events/src/gf_event.py60
-rw-r--r--events/src/glustereventsd.py159
-rw-r--r--events/src/handlers.py40
-rw-r--r--events/src/peer_eventsapi.py669
-rw-r--r--events/src/utils.py445
-rw-r--r--events/tools/Makefile.am6
-rw-r--r--events/tools/eventsdash.py75
-rw-r--r--extras/LinuxRPM/Makefile.am55
-rwxr-xr-xextras/LinuxRPM/make_glusterrpms9
-rw-r--r--extras/MacOSX/Portfile26
-rw-r--r--extras/MacOSX/README.MacOSX84
-rw-r--r--extras/Makefile.am80
-rw-r--r--extras/Ubuntu/README.Ubuntu14
-rw-r--r--extras/Ubuntu/glusterfs-server.conf (renamed from extras/Ubuntu/glusterd.conf)0
-rw-r--r--extras/Ubuntu/mounting-glusterfs.conf5
-rw-r--r--extras/backend-cleanup.sh2
-rw-r--r--extras/benchmarking/Makefile.am4
-rw-r--r--extras/benchmarking/glfs-bm.c534
-rw-r--r--extras/benchmarking/rdd.c1025
-rwxr-xr-xextras/check_goto.pl45
-rwxr-xr-xextras/clang-checker.sh301
-rw-r--r--extras/cliutils/Makefile.am4
-rw-r--r--extras/cliutils/README.md233
-rw-r--r--extras/cliutils/__init__.py31
-rw-r--r--extras/cliutils/cliutils.py237
-rwxr-xr-xextras/collect-system-stats.sh52
-rw-r--r--extras/command-completion/Makefile6
-rw-r--r--extras/command-completion/README5
-rw-r--r--extras/command-completion/gluster.bash492
-rwxr-xr-xextras/control-cpu-load.sh116
-rwxr-xr-xextras/control-mem.sh128
-rw-r--r--extras/create_new_xlator/README.md24
-rwxr-xr-xextras/create_new_xlator/generate_xlator.py208
-rw-r--r--extras/create_new_xlator/new-xlator.c.tmpl151
-rw-r--r--extras/devel-tools/devel-vagrant/Vagrantfile165
-rw-r--r--extras/devel-tools/devel-vagrant/ansible/roles/cluster/tasks/main.yml5
-rw-r--r--extras/devel-tools/devel-vagrant/ansible/roles/compile-gluster/tasks/main.yml29
-rw-r--r--extras/devel-tools/devel-vagrant/ansible/roles/install-pkgs/tasks/main.yml72
-rw-r--r--extras/devel-tools/devel-vagrant/ansible/roles/iptables/tasks/main.yml3
-rw-r--r--extras/devel-tools/devel-vagrant/ansible/roles/prepare-brick/tasks/main.yml30
-rw-r--r--extras/devel-tools/devel-vagrant/ansible/roles/selinux/tasks/main.yml3
-rw-r--r--extras/devel-tools/devel-vagrant/ansible/roles/service/tasks/main.yml21
-rw-r--r--extras/devel-tools/devel-vagrant/ansible/setup.yml23
-rw-r--r--extras/devel-tools/devel-vagrant/bootstrap.sh3
-rwxr-xr-xextras/devel-tools/devel-vagrant/up.sh4
-rw-r--r--extras/devel-tools/gdb_macros84
-rwxr-xr-xextras/devel-tools/print-backtrace.sh115
-rwxr-xr-xextras/devel-tools/strace-brick.sh55
-rw-r--r--extras/distributed-testing/README28
-rw-r--r--extras/distributed-testing/distributed-test-build-env20
-rwxr-xr-xextras/distributed-testing/distributed-test-build.sh27
-rw-r--r--extras/distributed-testing/distributed-test-env48
-rwxr-xr-xextras/distributed-testing/distributed-test-runner.py859
-rwxr-xr-xextras/distributed-testing/distributed-test.sh95
-rw-r--r--extras/ec-heal-script/README.md69
-rwxr-xr-xextras/ec-heal-script/correct_pending_heals.sh415
-rwxr-xr-xextras/ec-heal-script/gfid_needing_heal_parallel.sh278
-rwxr-xr-xextras/failed-tests.py180
-rw-r--r--extras/firewalld/Makefile.am8
-rw-r--r--extras/firewalld/glusterfs.xml14
-rw-r--r--extras/ganesha/Makefile.am2
-rw-r--r--extras/ganesha/config/Makefile.am4
-rw-r--r--extras/ganesha/config/ganesha-ha.conf.sample19
-rw-r--r--extras/ganesha/ocf/Makefile.am11
-rw-r--r--extras/ganesha/ocf/ganesha_grace221
-rw-r--r--extras/ganesha/ocf/ganesha_mon234
-rw-r--r--extras/ganesha/ocf/ganesha_nfsd167
-rw-r--r--extras/ganesha/scripts/Makefile.am6
-rwxr-xr-xextras/ganesha/scripts/create-export-ganesha.sh92
-rwxr-xr-xextras/ganesha/scripts/dbus-send.sh70
-rw-r--r--extras/ganesha/scripts/ganesha-ha.sh1199
-rwxr-xr-xextras/ganesha/scripts/generate-epoch.py48
-rwxr-xr-xextras/generate-xdr-files.sh107
-rw-r--r--extras/geo-rep/Makefile.am16
-rw-r--r--extras/geo-rep/generate-gfid-file.sh70
-rwxr-xr-xextras/geo-rep/get-gfid.sh7
-rw-r--r--extras/geo-rep/gsync-sync-gfid.c109
-rw-r--r--extras/geo-rep/gsync-upgrade.sh123
-rw-r--r--extras/geo-rep/schedule_georep.py.in492
-rw-r--r--extras/geo-rep/slave-upgrade.sh102
-rwxr-xr-xextras/gfid-to-dirname.sh46
-rwxr-xr-xextras/git-branch-diff.py285
-rw-r--r--extras/gluster-rsyslog-5.8.conf51
-rw-r--r--extras/gluster-rsyslog-7.2.conf76
-rw-r--r--extras/glusterd-sysconfig6
-rw-r--r--extras/glusterd.vol.in15
-rw-r--r--extras/glusterfs-georep-logrotate61
-rwxr-xr-xextras/glusterfs-georep-upgrade.py77
-rw-r--r--extras/glusterfs-logrotate63
-rw-r--r--extras/glusterfs-mode.el225
-rw-r--r--extras/glusterfs.vim21
-rwxr-xr-x[-rw-r--r--]extras/gnfs-loganalyse.py23
-rw-r--r--extras/group-db-workload12
-rw-r--r--extras/group-distributed-virt10
-rw-r--r--extras/group-gluster-block27
-rw-r--r--extras/group-metadata-cache6
-rw-r--r--extras/group-nl-cache5
-rw-r--r--extras/group-samba11
-rw-r--r--extras/group-virt.example24
-rw-r--r--extras/hook-scripts/Makefile.am8
-rw-r--r--extras/hook-scripts/S29CTDBsetup.sh69
-rwxr-xr-xextras/hook-scripts/S30samba-start.sh62
-rwxr-xr-xextras/hook-scripts/S30samba-stop.sh60
-rwxr-xr-xextras/hook-scripts/S40ufo-stop.py24
-rwxr-xr-xextras/hook-scripts/S56glusterd-geo-rep-create-post.sh104
-rw-r--r--extras/hook-scripts/add-brick/Makefile.am2
-rw-r--r--extras/hook-scripts/add-brick/post/Makefile.am6
-rwxr-xr-xextras/hook-scripts/add-brick/post/S10selinux-label-brick.sh100
-rwxr-xr-xextras/hook-scripts/add-brick/post/S13create-subdir-mounts.sh86
-rwxr-xr-xextras/hook-scripts/add-brick/post/disabled-quota-root-xattr-heal.sh145
-rw-r--r--extras/hook-scripts/add-brick/pre/Makefile.am6
-rwxr-xr-xextras/hook-scripts/add-brick/pre/S28Quota-enable-root-xattr-heal.sh101
-rw-r--r--extras/hook-scripts/create/Makefile.am1
-rw-r--r--extras/hook-scripts/create/post/Makefile.am8
-rwxr-xr-xextras/hook-scripts/create/post/S10selinux-label-brick.sh65
-rw-r--r--extras/hook-scripts/delete/Makefile.am1
-rw-r--r--extras/hook-scripts/delete/pre/Makefile.am8
-rwxr-xr-xextras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh73
-rw-r--r--extras/hook-scripts/reset/Makefile.am2
-rw-r--r--extras/hook-scripts/reset/post/Makefile.am1
-rw-r--r--extras/hook-scripts/reset/pre/Makefile.am1
-rw-r--r--extras/hook-scripts/set/Makefile.am2
-rw-r--r--extras/hook-scripts/set/post/Makefile.am6
-rwxr-xr-xextras/hook-scripts/set/post/S30samba-set.sh161
-rwxr-xr-xextras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh136
-rw-r--r--extras/hook-scripts/start/Makefile.am2
-rw-r--r--extras/hook-scripts/start/post/Makefile.am6
-rwxr-xr-xextras/hook-scripts/start/post/S29CTDBsetup.sh84
-rwxr-xr-xextras/hook-scripts/start/post/S30samba-start.sh145
-rwxr-xr-xextras/hook-scripts/start/post/S31ganesha-start.sh122
-rw-r--r--extras/hook-scripts/stop/Makefile.am2
-rw-r--r--extras/hook-scripts/stop/pre/Makefile.am6
-rwxr-xr-xextras/hook-scripts/stop/pre/S29CTDB-teardown.sh62
-rwxr-xr-xextras/hook-scripts/stop/pre/S30samba-stop.sh77
-rwxr-xr-xextras/identify-hangs.sh53
-rw-r--r--extras/init.d/Makefile.am33
-rw-r--r--extras/init.d/glusterd-FreeBSD.in24
-rwxr-xr-xextras/init.d/glusterd-Redhat.in115
-rwxr-xr-xextras/init.d/glusterd-SuSE.in10
-rw-r--r--extras/init.d/glusterd.plist.in2
-rw-r--r--extras/init.d/glustereventsd-Debian.in91
-rw-r--r--extras/init.d/glustereventsd-FreeBSD.in19
-rw-r--r--extras/init.d/glustereventsd-Redhat.in129
-rw-r--r--extras/logger.conf.example13
-rwxr-xr-xextras/mount-shared-storage.sh39
-rw-r--r--extras/ocf/Makefile.am11
-rwxr-xr-xextras/ocf/glusterd.in212
-rwxr-xr-xextras/ocf/volume.in276
-rw-r--r--extras/peer_add_secret_pub.in70
-rwxr-xr-xextras/post-upgrade-script-for-quota.sh65
-rwxr-xr-xextras/pre-upgrade-script-for-quota.sh18
-rwxr-xr-xextras/profiler/glusterfs-profiler27
-rw-r--r--extras/python/Makefile.am7
-rw-r--r--extras/python/__init__.py2
-rwxr-xr-xextras/quota-metadata-cleanup.sh24
-rwxr-xr-xextras/quota-remove-xattr.sh24
-rwxr-xr-xextras/quota/contri-add.sh (renamed from extras/contri-add.sh)0
-rwxr-xr-xextras/quota/log_accounting.sh26
-rwxr-xr-xextras/quota/quota_fsck.py377
-rwxr-xr-xextras/quota/xattr_analysis.py73
-rwxr-xr-xextras/rebalance.py309
-rw-r--r--extras/run-gluster.tmpfiles.in2
-rw-r--r--extras/snap_scheduler/Makefile.am9
-rw-r--r--extras/snap_scheduler/README.md125
-rw-r--r--extras/snap_scheduler/conf.py.in11
-rwxr-xr-xextras/snap_scheduler/gcron.py190
-rwxr-xr-xextras/snap_scheduler/snap_scheduler.py941
-rwxr-xr-xextras/statedumpparse.rb208
-rwxr-xr-xextras/stop-all-gluster-processes.sh193
-rw-r--r--extras/stripe-merge.c697
-rw-r--r--extras/systemd/Makefile.am17
-rw-r--r--extras/systemd/gluster-ta-volume.service.in13
-rw-r--r--extras/systemd/glusterd.service.in26
-rw-r--r--extras/systemd/glustereventsd.service.in16
-rw-r--r--extras/systemd/glusterfssharedstorage.service.in13
-rwxr-xr-xextras/test/bug-920583.t50
-rwxr-xr-xextras/test/gluster_commands.sh40
-rw-r--r--extras/test/ld-preload-test/README4
-rw-r--r--extras/test/ld-preload-test/ld-preload-lib.c620
-rw-r--r--extras/test/ld-preload-test/ld-preload-test.c527
-rw-r--r--extras/test/open-fd-tests.c85
-rwxr-xr-xextras/test/run.sh23
-rwxr-xr-xextras/test/stop_glusterd.sh23
-rw-r--r--extras/test/test-ffop.c971
-rwxr-xr-xextras/thin-arbiter/setup-thin-arbiter.sh184
-rw-r--r--extras/thin-arbiter/thin-arbiter.vol57
-rw-r--r--extras/volfilter.py168
-rw-r--r--extras/who-wrote-glusterfs/gitdm.aliases58
-rw-r--r--extras/who-wrote-glusterfs/gitdm.config8
-rw-r--r--extras/who-wrote-glusterfs/gitdm.domain-map29
-rwxr-xr-xextras/who-wrote-glusterfs/who-wrote-glusterfs.sh50
-rw-r--r--geo-replication/Makefile.am8
-rw-r--r--geo-replication/gsyncd.conf.in349
-rw-r--r--geo-replication/setup.py32
-rw-r--r--geo-replication/src/Makefile.am48
-rw-r--r--geo-replication/src/gsyncd.c402
-rwxr-xr-xgeo-replication/src/gverify.sh276
-rw-r--r--geo-replication/src/peer_georep-sshkey.py.in116
-rwxr-xr-xgeo-replication/src/peer_gsec_create.in24
-rw-r--r--geo-replication/src/peer_mountbroker.in211
-rw-r--r--geo-replication/src/peer_mountbroker.py.in401
-rw-r--r--geo-replication/src/procdiggy.c136
-rw-r--r--geo-replication/src/procdiggy.h21
-rwxr-xr-xgeo-replication/src/set_geo_rep_pem_keys.sh58
-rw-r--r--geo-replication/syncdaemon/Makefile.am8
-rw-r--r--geo-replication/syncdaemon/README.md (renamed from xlators/features/marker/utils/syncdaemon/README.md)42
-rw-r--r--geo-replication/syncdaemon/__codecheck.py (renamed from xlators/features/marker/utils/syncdaemon/__codecheck.py)20
-rw-r--r--geo-replication/syncdaemon/__init__.py9
-rw-r--r--geo-replication/syncdaemon/argsupgrade.py359
-rw-r--r--geo-replication/syncdaemon/conf.py.in17
-rw-r--r--geo-replication/syncdaemon/gsyncd.py325
-rw-r--r--geo-replication/syncdaemon/gsyncdconfig.py485
-rw-r--r--geo-replication/syncdaemon/gsyncdstatus.py419
-rw-r--r--geo-replication/syncdaemon/libcxattr.py112
-rw-r--r--geo-replication/syncdaemon/libgfchangelog.py143
-rw-r--r--geo-replication/syncdaemon/logutils.py77
-rw-r--r--geo-replication/syncdaemon/master.py2020
-rw-r--r--geo-replication/syncdaemon/monitor.py395
-rw-r--r--geo-replication/syncdaemon/py2py3.py184
-rw-r--r--geo-replication/syncdaemon/rconf.py31
-rw-r--r--geo-replication/syncdaemon/repce.py (renamed from xlators/features/marker/utils/syncdaemon/repce.py)82
-rw-r--r--geo-replication/syncdaemon/resource.py1583
-rw-r--r--geo-replication/syncdaemon/subcmds.py335
-rw-r--r--geo-replication/syncdaemon/syncdutils.py1115
-rw-r--r--geo-replication/test-requirements.txt7
-rw-r--r--geo-replication/tests/__init__.py9
-rw-r--r--geo-replication/tests/unit/__init__.py9
-rwxr-xr-xgeo-replication/tests/unit/test_gsyncdstatus.py193
-rw-r--r--geo-replication/tests/unit/test_syncdutils.py29
-rw-r--r--geo-replication/tox.ini32
-rw-r--r--geo-replication/unittests.sh9
-rw-r--r--glusterfs-api.pc.in12
-rw-r--r--glusterfs-hadoop/0.20.2/conf/core-site.xml38
-rw-r--r--glusterfs-hadoop/0.20.2/pom.xml36
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickClass.java109
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickRepl.java52
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSXattr.java472
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEInputStream.java205
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEOutputStream.java86
-rw-r--r--glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFileSystem.java492
-rw-r--r--glusterfs-hadoop/0.20.2/src/test/java/org/apache/hadoop/fs/glusterfs/AppTest.java38
-rwxr-xr-xglusterfs-hadoop/0.20.2/tools/build-deploy-jar.py212
-rw-r--r--glusterfs-hadoop/COPYING202
-rw-r--r--glusterfs-hadoop/README182
-rw-r--r--glusterfs.spec.in2192
-rw-r--r--glusterfsd/src/Makefile.am39
-rw-r--r--glusterfsd/src/gf_attach.c241
-rw-r--r--glusterfsd/src/glusterfsd-mem-types.h36
-rw-r--r--glusterfsd/src/glusterfsd-messages.h93
-rw-r--r--glusterfsd/src/glusterfsd-mgmt.c4522
-rw-r--r--glusterfsd/src/glusterfsd.c3675
-rw-r--r--glusterfsd/src/glusterfsd.h204
-rw-r--r--heal/Makefile.am (renamed from scheduler/nufa/Makefile.am)2
-rw-r--r--heal/src/Makefile.am29
-rw-r--r--heal/src/glfs-heal.c1793
-rw-r--r--libgfchangelog.pc.in12
-rw-r--r--libglusterd/Makefile.am (renamed from mod_glusterfs/apache/2.2/Makefile.am)2
-rw-r--r--libglusterd/src/Makefile.am31
-rw-r--r--libglusterd/src/gd-common-utils.c78
-rw-r--r--libglusterd/src/gd-common-utils.h28
-rw-r--r--libglusterd/src/libglusterd.sym2
-rw-r--r--libglusterfs/src/Makefile.am138
-rw-r--r--libglusterfs/src/async.c720
-rw-r--r--libglusterfs/src/byte-order.h301
-rw-r--r--libglusterfs/src/call-stub.c4909
-rw-r--r--libglusterfs/src/call-stub.h1134
-rw-r--r--libglusterfs/src/changelog.h115
-rw-r--r--libglusterfs/src/checksum.c51
-rw-r--r--libglusterfs/src/circ-buff.c275
-rw-r--r--libglusterfs/src/circ-buff.h63
-rw-r--r--libglusterfs/src/client_t.c825
-rw-r--r--libglusterfs/src/cluster-syncop.c1261
-rw-r--r--libglusterfs/src/common-utils.c6270
-rw-r--r--libglusterfs/src/common-utils.h518
-rw-r--r--libglusterfs/src/compat-errno.c1731
-rw-r--r--libglusterfs/src/compat-errno.h231
-rw-r--r--libglusterfs/src/compat.c907
-rw-r--r--libglusterfs/src/compat.h393
-rw-r--r--libglusterfs/src/ctx.c97
-rw-r--r--libglusterfs/src/daemon.c69
-rw-r--r--libglusterfs/src/default-args.c1651
-rw-r--r--libglusterfs/src/defaults-tmpl.c247
-rw-r--r--libglusterfs/src/defaults.c1354
-rw-r--r--libglusterfs/src/defaults.h673
-rw-r--r--libglusterfs/src/dict.c4389
-rw-r--r--libglusterfs/src/dict.h231
-rw-r--r--libglusterfs/src/event-epoll.c1032
-rw-r--r--libglusterfs/src/event-history.c83
-rw-r--r--libglusterfs/src/event-history.h43
-rw-r--r--libglusterfs/src/event-poll.c513
-rw-r--r--libglusterfs/src/event.c1064
-rw-r--r--libglusterfs/src/event.h81
-rw-r--r--libglusterfs/src/events.c136
-rw-r--r--libglusterfs/src/fd-lk.c663
-rw-r--r--libglusterfs/src/fd-lk.h66
-rw-r--r--libglusterfs/src/fd.c1714
-rw-r--r--libglusterfs/src/fd.h197
-rwxr-xr-xlibglusterfs/src/gen-defaults.py81
-rwxr-xr-xlibglusterfs/src/generator.py777
-rw-r--r--libglusterfs/src/gf-dirent.c315
-rw-r--r--libglusterfs/src/gf-dirent.h57
-rw-r--r--libglusterfs/src/gidcache.c211
-rw-r--r--libglusterfs/src/globals.c567
-rw-r--r--libglusterfs/src/globals.h45
-rw-r--r--libglusterfs/src/glusterfs.h429
-rw-r--r--libglusterfs/src/glusterfs/async.h209
-rw-r--r--libglusterfs/src/glusterfs/atomic.h459
-rw-r--r--libglusterfs/src/glusterfs/byte-order.h279
-rw-r--r--libglusterfs/src/glusterfs/call-stub.h622
-rw-r--r--libglusterfs/src/glusterfs/checksum.h (renamed from libglusterfs/src/checksum.h)6
-rw-r--r--libglusterfs/src/glusterfs/circ-buff.h61
-rw-r--r--libglusterfs/src/glusterfs/client_t.h147
-rw-r--r--libglusterfs/src/glusterfs/cluster-syncop.h227
-rw-r--r--libglusterfs/src/glusterfs/common-utils.h1256
-rw-r--r--libglusterfs/src/glusterfs/compat-errno.h238
-rw-r--r--libglusterfs/src/glusterfs/compat-uuid.h71
-rw-r--r--libglusterfs/src/glusterfs/compat.h544
-rw-r--r--libglusterfs/src/glusterfs/daemon.h (renamed from libglusterfs/src/daemon.h)11
-rw-r--r--libglusterfs/src/glusterfs/default-args.h455
-rw-r--r--libglusterfs/src/glusterfs/defaults.h1275
-rw-r--r--libglusterfs/src/glusterfs/dict.h420
-rw-r--r--libglusterfs/src/glusterfs/event-history.h40
-rw-r--r--libglusterfs/src/glusterfs/events.h34
-rw-r--r--libglusterfs/src/glusterfs/fd-lk.h59
-rw-r--r--libglusterfs/src/glusterfs/fd.h169
-rw-r--r--libglusterfs/src/glusterfs/gf-dirent.h71
-rw-r--r--libglusterfs/src/glusterfs/gf-event.h140
-rw-r--r--libglusterfs/src/glusterfs/gidcache.h60
-rw-r--r--libglusterfs/src/glusterfs/glfs-message-id.h102
-rw-r--r--libglusterfs/src/glusterfs/globals.h188
-rw-r--r--libglusterfs/src/glusterfs/glusterfs-acl.h162
-rw-r--r--libglusterfs/src/glusterfs/glusterfs-fops.h241
-rw-r--r--libglusterfs/src/glusterfs/glusterfs.h838
-rw-r--r--libglusterfs/src/glusterfs/graph-utils.h (renamed from libglusterfs/src/graph-utils.h)12
-rw-r--r--libglusterfs/src/glusterfs/hashfn.h (renamed from libglusterfs/src/hashfn.h)12
-rw-r--r--libglusterfs/src/glusterfs/iatt.h489
-rw-r--r--libglusterfs/src/glusterfs/inode.h306
-rw-r--r--libglusterfs/src/glusterfs/iobuf.h194
-rw-r--r--libglusterfs/src/glusterfs/latency.h33
-rw-r--r--libglusterfs/src/glusterfs/libglusterfs-messages.h245
-rw-r--r--libglusterfs/src/glusterfs/list.h273
-rw-r--r--libglusterfs/src/glusterfs/lkowner.h93
-rw-r--r--libglusterfs/src/glusterfs/locking.h84
-rw-r--r--libglusterfs/src/glusterfs/logging.h383
-rw-r--r--libglusterfs/src/glusterfs/lvm-defaults.h20
-rw-r--r--libglusterfs/src/glusterfs/mem-pool.h336
-rw-r--r--libglusterfs/src/glusterfs/mem-types.h139
-rw-r--r--libglusterfs/src/glusterfs/monitoring.h21
-rw-r--r--libglusterfs/src/glusterfs/options.h327
-rw-r--r--libglusterfs/src/glusterfs/parse-utils.h50
-rw-r--r--libglusterfs/src/glusterfs/quota-common-utils.h68
-rw-r--r--libglusterfs/src/glusterfs/rbthash.h75
-rw-r--r--libglusterfs/src/glusterfs/refcount.h101
-rw-r--r--libglusterfs/src/glusterfs/revision.h1
-rw-r--r--libglusterfs/src/glusterfs/rot-buffs.h125
-rw-r--r--libglusterfs/src/glusterfs/run.h (renamed from libglusterfs/src/run.h)63
-rw-r--r--libglusterfs/src/glusterfs/stack.h555
-rw-r--r--libglusterfs/src/glusterfs/statedump.h132
-rw-r--r--libglusterfs/src/glusterfs/store.h112
-rw-r--r--libglusterfs/src/glusterfs/strfd.h34
-rw-r--r--libglusterfs/src/glusterfs/syncop-utils.h54
-rw-r--r--libglusterfs/src/glusterfs/syncop.h718
-rw-r--r--libglusterfs/src/glusterfs/syscall.h278
-rw-r--r--libglusterfs/src/glusterfs/template-component-messages.h28
-rw-r--r--libglusterfs/src/glusterfs/throttle-tbf.h74
-rw-r--r--libglusterfs/src/glusterfs/timer.h56
-rw-r--r--libglusterfs/src/glusterfs/timespec.h33
-rw-r--r--libglusterfs/src/glusterfs/trie.h52
-rw-r--r--libglusterfs/src/glusterfs/upcall-utils.h110
-rw-r--r--libglusterfs/src/glusterfs/xlator.h1106
-rw-r--r--libglusterfs/src/graph-print.c221
-rw-r--r--libglusterfs/src/graph.c1959
-rw-r--r--libglusterfs/src/graph.l66
-rw-r--r--libglusterfs/src/graph.y273
-rw-r--r--libglusterfs/src/hashfn.c269
-rw-r--r--libglusterfs/src/iatt.h313
-rw-r--r--libglusterfs/src/inode.c3390
-rw-r--r--libglusterfs/src/inode.h202
-rw-r--r--libglusterfs/src/iobuf.c1613
-rw-r--r--libglusterfs/src/iobuf.h161
-rw-r--r--libglusterfs/src/latency.c186
-rw-r--r--libglusterfs/src/latency.h27
-rw-r--r--libglusterfs/src/libglusterfs.sym1193
-rw-r--r--libglusterfs/src/list.h178
-rw-r--r--libglusterfs/src/lkowner.h83
-rw-r--r--libglusterfs/src/locking.c27
-rw-r--r--libglusterfs/src/locking.h40
-rw-r--r--libglusterfs/src/logging.c2756
-rw-r--r--libglusterfs/src/logging.h148
-rw-r--r--libglusterfs/src/mem-pool.c1220
-rw-r--r--libglusterfs/src/mem-pool.h163
-rw-r--r--libglusterfs/src/mem-types.h107
-rw-r--r--libglusterfs/src/monitoring.c282
-rw-r--r--libglusterfs/src/options.c1816
-rw-r--r--libglusterfs/src/options.h254
-rw-r--r--libglusterfs/src/parse-utils.c174
-rw-r--r--libglusterfs/src/quota-common-utils.c241
-rw-r--r--libglusterfs/src/rbthash.c667
-rw-r--r--libglusterfs/src/rbthash.h77
-rw-r--r--libglusterfs/src/refcount.c108
-rw-r--r--libglusterfs/src/revision.h1
-rw-r--r--libglusterfs/src/rot-buffs.c490
-rw-r--r--libglusterfs/src/run.c780
-rw-r--r--libglusterfs/src/scheduler.c82
-rw-r--r--libglusterfs/src/scheduler.h32
-rw-r--r--libglusterfs/src/stack.c646
-rw-r--r--libglusterfs/src/stack.h429
-rw-r--r--libglusterfs/src/statedump.c1336
-rw-r--r--libglusterfs/src/statedump.h85
-rw-r--r--libglusterfs/src/store.c744
-rw-r--r--libglusterfs/src/strfd.c93
-rw-r--r--libglusterfs/src/syncop-utils.c669
-rw-r--r--libglusterfs/src/syncop.c3824
-rw-r--r--libglusterfs/src/syncop.h205
-rw-r--r--libglusterfs/src/syscall.c756
-rw-r--r--libglusterfs/src/syscall.h142
-rw-r--r--libglusterfs/src/throttle-tbf.c290
-rw-r--r--libglusterfs/src/timer.c382
-rw-r--r--libglusterfs/src/timer.h61
-rw-r--r--libglusterfs/src/timespec.c129
-rw-r--r--libglusterfs/src/trie.c491
-rw-r--r--libglusterfs/src/trie.h51
-rw-r--r--libglusterfs/src/unittest/global_mock.c25
-rw-r--r--libglusterfs/src/unittest/log_mock.c52
-rw-r--r--libglusterfs/src/unittest/mem_pool_unittest.c483
-rw-r--r--libglusterfs/src/unittest/unittest.h47
-rw-r--r--libglusterfs/src/xlator.c1880
-rw-r--r--libglusterfs/src/xlator.h877
-rw-r--r--libglusterfsclient/src/Makefile.am16
-rw-r--r--libglusterfsclient/src/libglusterfsclient-dentry.c404
-rwxr-xr-xlibglusterfsclient/src/libglusterfsclient-internals.h289
-rwxr-xr-xlibglusterfsclient/src/libglusterfsclient.c8158
-rwxr-xr-xlibglusterfsclient/src/libglusterfsclient.h1363
-rw-r--r--mod_glusterfs/Makefile.am3
-rw-r--r--mod_glusterfs/apache/1.3/src/Makefile.am30
-rw-r--r--mod_glusterfs/apache/1.3/src/README.txt107
-rw-r--r--mod_glusterfs/apache/1.3/src/mod_glusterfs.c507
-rw-r--r--mod_glusterfs/apache/2.2/src/Makefile.am31
-rw-r--r--mod_glusterfs/apache/2.2/src/README.txt105
-rw-r--r--mod_glusterfs/apache/2.2/src/mod_glusterfs.c3627
-rw-r--r--mod_glusterfs/apache/Makefile.am10
-rw-r--r--mod_glusterfs/lighttpd/1.4/Makefile.am3
-rw-r--r--mod_glusterfs/lighttpd/1.4/Makefile.am.diff29
-rw-r--r--mod_glusterfs/lighttpd/1.4/README.txt57
-rw-r--r--mod_glusterfs/lighttpd/1.4/mod_glusterfs.c1820
-rw-r--r--mod_glusterfs/lighttpd/1.4/mod_glusterfs.h32
-rw-r--r--mod_glusterfs/lighttpd/1.5/Makefile.am3
-rw-r--r--mod_glusterfs/lighttpd/1.5/Makefile.am.diff29
-rw-r--r--mod_glusterfs/lighttpd/1.5/README.txt57
-rw-r--r--mod_glusterfs/lighttpd/1.5/mod_glusterfs.c1476
-rw-r--r--mod_glusterfs/lighttpd/1.5/mod_glusterfs.h29
-rw-r--r--mod_glusterfs/lighttpd/Makefile.am3
-rwxr-xr-xrfc.sh259
-rw-r--r--rpc/Makefile.am2
-rw-r--r--rpc/rpc-lib/src/Makefile.am27
-rw-r--r--rpc/rpc-lib/src/auth-glusterfs.c506
-rw-r--r--rpc/rpc-lib/src/auth-null.c53
-rw-r--r--rpc/rpc-lib/src/auth-unix.c92
-rw-r--r--rpc/rpc-lib/src/autoscale-threads.c22
-rw-r--r--rpc/rpc-lib/src/libgfrpc.sym68
-rw-r--r--rpc/rpc-lib/src/mgmt-pmap.c147
-rw-r--r--rpc/rpc-lib/src/protocol-common.h490
-rw-r--r--rpc/rpc-lib/src/rpc-clnt-ping.c357
-rw-r--r--rpc/rpc-lib/src/rpc-clnt-ping.h16
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.c3073
-rw-r--r--rpc/rpc-lib/src/rpc-clnt.h269
-rw-r--r--rpc/rpc-lib/src/rpc-drc.c855
-rw-r--r--rpc/rpc-lib/src/rpc-drc.h97
-rw-r--r--rpc/rpc-lib/src/rpc-lib-messages.h34
-rw-r--r--rpc/rpc-lib/src/rpc-transport.c945
-rw-r--r--rpc/rpc-lib/src/rpc-transport.h321
-rw-r--r--rpc/rpc-lib/src/rpcsvc-auth.c786
-rw-r--r--rpc/rpc-lib/src/rpcsvc-common.h116
-rw-r--r--rpc/rpc-lib/src/rpcsvc.c4549
-rw-r--r--rpc/rpc-lib/src/rpcsvc.h802
-rw-r--r--rpc/rpc-lib/src/xdr-common.h77
-rw-r--r--rpc/rpc-lib/src/xdr-rpc.c272
-rw-r--r--rpc/rpc-lib/src/xdr-rpc.h82
-rw-r--r--rpc/rpc-lib/src/xdr-rpcclnt.c130
-rw-r--r--rpc/rpc-lib/src/xdr-rpcclnt.h28
-rw-r--r--rpc/rpc-transport/Makefile.am2
-rw-r--r--rpc/rpc-transport/rdma/src/Makefile.am20
-rw-r--r--rpc/rpc-transport/rdma/src/name.c694
-rw-r--r--rpc/rpc-transport/rdma/src/name.h39
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.c5094
-rw-r--r--rpc/rpc-transport/rdma/src/rdma.h403
-rw-r--r--rpc/rpc-transport/socket/src/Makefile.am19
-rw-r--r--rpc/rpc-transport/socket/src/name.c1171
-rw-r--r--rpc/rpc-transport/socket/src/name.h22
-rw-r--r--rpc/rpc-transport/socket/src/socket-mem-types.h22
-rw-r--r--rpc/rpc-transport/socket/src/socket.c6297
-rw-r--r--rpc/rpc-transport/socket/src/socket.h331
-rw-r--r--rpc/xdr/src/.gitignore25
-rw-r--r--rpc/xdr/src/Makefile.am107
-rw-r--r--rpc/xdr/src/acl3-xdr.x52
-rw-r--r--rpc/xdr/src/changelog-xdr.x42
-rw-r--r--rpc/xdr/src/cli1-xdr.c459
-rw-r--r--rpc/xdr/src/cli1-xdr.h369
-rw-r--r--rpc/xdr/src/cli1-xdr.x245
-rw-r--r--rpc/xdr/src/glusterd1-xdr.c502
-rw-r--r--rpc/xdr/src/glusterd1-xdr.h264
-rw-r--r--rpc/xdr/src/glusterd1-xdr.x122
-rw-r--r--rpc/xdr/src/glusterfs3-xdr.c1922
-rw-r--r--rpc/xdr/src/glusterfs3-xdr.h1295
-rw-r--r--rpc/xdr/src/glusterfs3-xdr.x446
-rw-r--r--rpc/xdr/src/glusterfs3.h1113
-rw-r--r--rpc/xdr/src/glusterfs4-xdr.x797
-rw-r--r--rpc/xdr/src/libgfxdr.sym350
-rw-r--r--rpc/xdr/src/mount3udp.x26
-rw-r--r--rpc/xdr/src/msg-nfs3.c467
-rw-r--r--rpc/xdr/src/msg-nfs3.h163
-rw-r--r--rpc/xdr/src/nlm4-xdr.c254
-rw-r--r--rpc/xdr/src/nlm4-xdr.h267
-rw-r--r--rpc/xdr/src/nlm4-xdr.x (renamed from rpc/xdr/src/nlm4.x)78
-rw-r--r--rpc/xdr/src/nlmcbk-xdr.c37
-rw-r--r--rpc/xdr/src/nlmcbk-xdr.h74
-rw-r--r--rpc/xdr/src/nlmcbk.x33
-rw-r--r--rpc/xdr/src/nsm-xdr.c105
-rw-r--r--rpc/xdr/src/nsm-xdr.h95
-rw-r--r--rpc/xdr/src/nsm-xdr.x (renamed from rpc/xdr/src/nsm.x)19
-rw-r--r--rpc/xdr/src/portmap-xdr.c246
-rw-r--r--rpc/xdr/src/portmap-xdr.h139
-rw-r--r--rpc/xdr/src/portmap-xdr.x27
-rw-r--r--rpc/xdr/src/rpc-common-xdr.c232
-rw-r--r--rpc/xdr/src/rpc-common-xdr.h113
-rw-r--r--rpc/xdr/src/rpc-common-xdr.x37
-rw-r--r--rpc/xdr/src/rpc-pragmas.h28
-rw-r--r--rpc/xdr/src/xdr-generic.c148
-rw-r--r--rpc/xdr/src/xdr-generic.h78
-rw-r--r--rpc/xdr/src/xdr-nfs3.c2620
-rw-r--r--rpc/xdr/src/xdr-nfs3.h1447
-rwxr-xr-xrun-tests-in-vagrant.sh311
-rwxr-xr-xrun-tests.sh615
-rw-r--r--scheduler/Makefile.am3
-rw-r--r--scheduler/alu/src/Makefile.am14
-rw-r--r--scheduler/alu/src/alu-mem-types.h35
-rw-r--r--scheduler/alu/src/alu.c1019
-rw-r--r--scheduler/alu/src/alu.h89
-rw-r--r--scheduler/nufa/src/Makefile.am12
-rw-r--r--scheduler/nufa/src/nufa-mem-types.h33
-rw-r--r--scheduler/nufa/src/nufa.c429
-rw-r--r--scheduler/random/src/Makefile.am14
-rw-r--r--scheduler/random/src/random-mem-types.h32
-rw-r--r--scheduler/random/src/random.c305
-rw-r--r--scheduler/random/src/random.h46
-rw-r--r--scheduler/rr/Makefile.am3
-rw-r--r--scheduler/rr/src/Makefile.am13
-rw-r--r--scheduler/rr/src/rr-mem-types.h32
-rw-r--r--scheduler/rr/src/rr-options.c256
-rw-r--r--scheduler/rr/src/rr-options.h34
-rw-r--r--scheduler/rr/src/rr.c567
-rw-r--r--scheduler/rr/src/rr.h70
-rw-r--r--scheduler/switch/Makefile.am3
-rw-r--r--scheduler/switch/src/Makefile.am12
-rw-r--r--scheduler/switch/src/switch-mem-types.h33
-rw-r--r--scheduler/switch/src/switch.c451
-rw-r--r--site.h.in44
-rwxr-xr-xsmoke.sh83
l---------submit-for-review.sh1
-rw-r--r--swift/1.4.8/README22
-rw-r--r--swift/1.4.8/gluster-swift-plugin.spec60
-rw-r--r--swift/1.4.8/gluster-swift.spec396
-rw-r--r--swift/1.4.8/plugins/DiskDir.py484
-rw-r--r--swift/1.4.8/plugins/DiskFile.py316
-rw-r--r--swift/1.4.8/plugins/Glusterfs.py131
-rw-r--r--swift/1.4.8/plugins/__init__.py16
-rw-r--r--swift/1.4.8/plugins/conf/account-server/1.conf22
-rw-r--r--swift/1.4.8/plugins/conf/account.builderbin786843 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/account.ring.gzbin739 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/container-server/1.conf24
-rw-r--r--swift/1.4.8/plugins/conf/container.builderbin786843 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/container.ring.gzbin741 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/fs.conf9
-rw-r--r--swift/1.4.8/plugins/conf/object-server/1.conf22
-rw-r--r--swift/1.4.8/plugins/conf/object.builderbin786843 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/object.ring.gzbin738 -> 0 bytes-rw-r--r--swift/1.4.8/plugins/conf/proxy-server.conf21
-rw-r--r--swift/1.4.8/plugins/conf/swift.conf7
-rw-r--r--swift/1.4.8/plugins/constraints.py97
-rw-r--r--swift/1.4.8/plugins/utils.py679
-rw-r--r--swift/1.4.8/swift.diff797
-rw-r--r--tests/00-geo-rep/00-georep-verify-non-root-setup.t294
-rw-r--r--tests/00-geo-rep/00-georep-verify-setup.t110
-rw-r--r--tests/00-geo-rep/01-georep-glusterd-tests.t213
-rw-r--r--tests/00-geo-rep/bug-1600145.t109
-rw-r--r--tests/00-geo-rep/bug-1708603.t63
-rw-r--r--tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t234
-rw-r--r--tests/00-geo-rep/georep-basic-dr-rsync.t258
-rw-r--r--tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t227
-rw-r--r--tests/00-geo-rep/georep-basic-dr-tarssh.t227
-rw-r--r--tests/00-geo-rep/georep-basic-rsync-ec.t224
-rw-r--r--tests/00-geo-rep/georep-basic-tarssh-ec.t223
-rw-r--r--tests/00-geo-rep/georep-config-upgrade.t132
-rw-r--r--tests/00-geo-rep/georep-stderr-hang.t128
-rw-r--r--tests/00-geo-rep/georep-upgrade.t79
-rw-r--r--tests/00-geo-rep/gsyncd.conf.old47
-rw-r--r--tests/000-flaky/basic_afr_split-brain-favorite-child-policy.t203
-rw-r--r--tests/000-flaky/basic_changelog_changelog-snapshot.t60
-rw-r--r--tests/000-flaky/basic_distribute_rebal-all-nodes-migrate.t142
-rw-r--r--tests/000-flaky/basic_ec_ec-quorum-count-partial-failure.t50
-rw-r--r--tests/000-flaky/basic_mount-nfs-auth.t342
-rw-r--r--tests/000-flaky/bugs_core_multiplex-limit-issue-151.t56
-rw-r--r--tests/000-flaky/bugs_distribute_bug-1117851.t101
-rw-r--r--tests/000-flaky/bugs_distribute_bug-1122443.t61
-rw-r--r--tests/000-flaky/bugs_glusterd_bug-857330/common.rc57
-rwxr-xr-xtests/000-flaky/bugs_glusterd_bug-857330/normal.t69
-rwxr-xr-xtests/000-flaky/bugs_glusterd_bug-857330/xml.t83
-rw-r--r--tests/000-flaky/bugs_glusterd_quorum-value-check.t37
-rw-r--r--tests/000-flaky/bugs_nfs_bug-1116503.t47
-rw-r--r--tests/000-flaky/features_lock-migration_lkmigration-set-option.t34
-rw-r--r--tests/README.md43
-rw-r--r--tests/afr.rc123
-rwxr-xr-xtests/basic/0symbol-check.t46
-rw-r--r--tests/basic/afr/add-brick-self-heal.t74
-rw-r--r--tests/basic/afr/afr-anon-inode-no-quorum.t63
-rw-r--r--tests/basic/afr/afr-anon-inode.t114
-rw-r--r--tests/basic/afr/afr-no-fsync.t20
-rw-r--r--tests/basic/afr/afr-read-hash-mode.t56
-rw-r--r--tests/basic/afr/afr-seek.t55
-rw-r--r--tests/basic/afr/afr-up.t28
-rw-r--r--tests/basic/afr/arbiter-add-brick.t86
-rw-r--r--tests/basic/afr/arbiter-cli.t30
-rw-r--r--tests/basic/afr/arbiter-mount.t48
-rw-r--r--tests/basic/afr/arbiter-remove-brick.t36
-rw-r--r--tests/basic/afr/arbiter-statfs.t41
-rw-r--r--tests/basic/afr/arbiter.t92
-rwxr-xr-xtests/basic/afr/client-side-heal.t95
-rw-r--r--tests/basic/afr/data-self-heal.t209
-rw-r--r--tests/basic/afr/durability-off.t46
-rw-r--r--tests/basic/afr/entry-self-heal-anon-dir-off.t459
-rw-r--r--tests/basic/afr/entry-self-heal.t447
-rw-r--r--tests/basic/afr/gfid-heal.t33
-rw-r--r--tests/basic/afr/gfid-mismatch-resolution-with-cli.t168
-rw-r--r--tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t229
-rw-r--r--tests/basic/afr/gfid-mismatch.t32
-rw-r--r--tests/basic/afr/gfid-self-heal.t145
-rw-r--r--tests/basic/afr/granular-esh/add-brick.t80
-rw-r--r--tests/basic/afr/granular-esh/cli.t114
-rw-r--r--tests/basic/afr/granular-esh/conservative-merge.t138
-rw-r--r--tests/basic/afr/granular-esh/granular-esh.t168
-rw-r--r--tests/basic/afr/granular-esh/granular-indices-but-non-granular-heal.t76
-rw-r--r--tests/basic/afr/granular-esh/replace-brick.t76
-rw-r--r--tests/basic/afr/halo.t61
-rw-r--r--tests/basic/afr/heal-info.t36
-rw-r--r--tests/basic/afr/heal-quota.t35
-rw-r--r--tests/basic/afr/inodelk.t87
-rw-r--r--tests/basic/afr/lk-quorum.t257
-rw-r--r--tests/basic/afr/metadata-self-heal.t133
-rw-r--r--tests/basic/afr/name-self-heal.t112
-rw-r--r--tests/basic/afr/quorum.t97
-rw-r--r--tests/basic/afr/read-subvol-data.t33
-rw-r--r--tests/basic/afr/read-subvol-entry.t35
-rw-r--r--tests/basic/afr/rename-data-loss.t72
-rw-r--r--tests/basic/afr/replace-brick-self-heal.t64
-rw-r--r--tests/basic/afr/resolve.t55
-rw-r--r--tests/basic/afr/root-squash-self-heal.t27
-rw-r--r--tests/basic/afr/self-heal.t244
-rw-r--r--tests/basic/afr/self-heald.t193
-rw-r--r--tests/basic/afr/sparse-file-self-heal.t181
-rw-r--r--tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t124
-rw-r--r--tests/basic/afr/split-brain-heal-info.t62
-rw-r--r--tests/basic/afr/split-brain-healing-ctime.t252
-rw-r--r--tests/basic/afr/split-brain-healing.t261
-rw-r--r--tests/basic/afr/split-brain-open.t38
-rw-r--r--tests/basic/afr/split-brain-resolution.t105
-rw-r--r--tests/basic/afr/stale-file-lookup.t30
-rw-r--r--tests/basic/afr/ta-check-locks.t68
-rw-r--r--tests/basic/afr/ta-read.t64
-rw-r--r--tests/basic/afr/ta-shd.t49
-rw-r--r--tests/basic/afr/ta-write-on-bad-brick.t51
-rw-r--r--tests/basic/afr/ta.t54
-rw-r--r--tests/basic/afr/tarissue.t39
-rw-r--r--tests/basic/all_squash.t74
-rwxr-xr-xtests/basic/cdc.t148
-rw-r--r--tests/basic/changelog/changelog-api.t37
-rw-r--r--tests/basic/changelog/changelog-history.t91
-rw-r--r--tests/basic/changelog/changelog-rename.t44
-rw-r--r--tests/basic/changelog/history-api.t42
-rw-r--r--tests/basic/cloudsync-sanity.t29
-rw-r--r--tests/basic/ctime/ctime-ec-heal.t70
-rw-r--r--tests/basic/ctime/ctime-ec-rebalance.t43
-rw-r--r--tests/basic/ctime/ctime-glfs-init.c68
-rw-r--r--tests/basic/ctime/ctime-glfs-init.t23
-rw-r--r--tests/basic/ctime/ctime-heal-symlinks.t65
-rw-r--r--tests/basic/ctime/ctime-mdata-legacy-files.t83
-rw-r--r--tests/basic/ctime/ctime-noatime.t49
-rw-r--r--tests/basic/ctime/ctime-readdir.c29
-rw-r--r--tests/basic/ctime/ctime-readdir.t50
-rw-r--r--tests/basic/ctime/ctime-rep-heal.t70
-rw-r--r--tests/basic/ctime/ctime-rep-rebalance.t41
-rw-r--r--tests/basic/ctime/ctime-utimesat.t28
-rw-r--r--tests/basic/distribute/brick-down.t83
-rw-r--r--tests/basic/distribute/bug-1265677-use-readdirp.t38
-rw-r--r--tests/basic/distribute/debug-xattrs.t54
-rw-r--r--tests/basic/distribute/dir-heal.t145
-rw-r--r--tests/basic/distribute/file-create.t120
-rw-r--r--tests/basic/distribute/file-rename.t1021
-rw-r--r--tests/basic/distribute/force-migration.t50
-rw-r--r--tests/basic/distribute/lookup.t54
-rw-r--r--tests/basic/distribute/non-root-unlink-stale-linkto.t51
-rw-r--r--tests/basic/distribute/spare_file_rebalance.t51
-rw-r--r--tests/basic/distribute/throttle-rebal.t56
-rw-r--r--tests/basic/ec/dht-rename.t19
-rw-r--r--tests/basic/ec/ec-1468261.t95
-rw-r--r--tests/basic/ec/ec-3-1.t14
-rw-r--r--tests/basic/ec/ec-4-1.t14
-rw-r--r--tests/basic/ec/ec-5-2.t14
-rw-r--r--tests/basic/ec/ec-6-2.t14
-rw-r--r--tests/basic/ec/ec-anonymous-fd.t42
-rw-r--r--tests/basic/ec/ec-background-heals.t105
-rw-r--r--tests/basic/ec/ec-badfd.c124
-rwxr-xr-xtests/basic/ec/ec-badfd.t26
-rw-r--r--tests/basic/ec/ec-common145
-rw-r--r--tests/basic/ec/ec-cpu-extensions.t62
-rwxr-xr-xtests/basic/ec/ec-data-heal.t75
-rw-r--r--tests/basic/ec/ec-dirty-flags.t23
-rw-r--r--tests/basic/ec/ec-discard.t205
-rw-r--r--tests/basic/ec/ec-fallocate.t72
-rw-r--r--tests/basic/ec/ec-fast-fgetxattr.c129
-rwxr-xr-xtests/basic/ec/ec-fast-fgetxattr.t40
-rw-r--r--tests/basic/ec/ec-fix-openfd.t111
-rw-r--r--tests/basic/ec/ec-internal-xattrs.t45
-rw-r--r--tests/basic/ec/ec-new-entry.t70
-rw-r--r--tests/basic/ec/ec-notify.t101
-rw-r--r--tests/basic/ec/ec-optimistic-changelog.t153
-rw-r--r--tests/basic/ec/ec-quorum-count.t167
-rw-r--r--tests/basic/ec/ec-read-mask.t114
-rw-r--r--tests/basic/ec/ec-read-policy.t52
-rw-r--r--tests/basic/ec/ec-readdir.t46
-rw-r--r--tests/basic/ec/ec-rebalance.t61
-rw-r--r--tests/basic/ec/ec-reset-brick.t50
-rw-r--r--tests/basic/ec/ec-root-heal.t34
-rw-r--r--tests/basic/ec/ec-seek.t58
-rw-r--r--tests/basic/ec/ec-stripe.t227
-rw-r--r--tests/basic/ec/ec-up.t28
-rw-r--r--tests/basic/ec/ec.t259
-rw-r--r--tests/basic/ec/gfapi-ec-open-truncate.c171
-rw-r--r--tests/basic/ec/gfapi-ec-open-truncate.t48
-rw-r--r--tests/basic/ec/heal-info.t74
-rw-r--r--tests/basic/ec/lock-contention.t62
-rwxr-xr-xtests/basic/ec/nfs.t29
-rwxr-xr-xtests/basic/ec/quota.t45
-rw-r--r--tests/basic/ec/self-heal-read-write-fail.t69
-rw-r--r--tests/basic/ec/self-heal.t235
-rw-r--r--tests/basic/ec/statedump.t28
-rw-r--r--tests/basic/exports_parsing.t57
-rw-r--r--tests/basic/fencing/afr-lock-heal-advanced.c227
-rw-r--r--tests/basic/fencing/afr-lock-heal-advanced.t115
-rw-r--r--tests/basic/fencing/afr-lock-heal-basic.c182
-rw-r--r--tests/basic/fencing/afr-lock-heal-basic.t102
-rw-r--r--tests/basic/fencing/fence-basic.c229
-rwxr-xr-xtests/basic/fencing/fence-basic.t31
-rw-r--r--tests/basic/fencing/fencing-crash-conistency.t62
-rw-r--r--tests/basic/fencing/test-fence-option.t37
-rw-r--r--tests/basic/fop-sampling.t61
-rw-r--r--tests/basic/fops-sanity.c1033
-rwxr-xr-xtests/basic/fops-sanity.t27
-rw-r--r--tests/basic/fuse/Makefile12
-rw-r--r--tests/basic/fuse/active-io-graph-switch.t65
-rw-r--r--tests/basic/fuse/seek.c82
-rwxr-xr-xtests/basic/geo-replication/marker-xattrs.t80
-rw-r--r--tests/basic/gfapi/Makefile22
-rwxr-xr-xtests/basic/gfapi/anonymous_fd.t26
-rw-r--r--tests/basic/gfapi/anonymous_fd_read_write.c106
-rw-r--r--tests/basic/gfapi/bug-1241104.c93
-rwxr-xr-xtests/basic/gfapi/bug-1241104.t30
-rw-r--r--tests/basic/gfapi/bug-1507896.c49
-rw-r--r--tests/basic/gfapi/bug-1507896.t33
-rw-r--r--tests/basic/gfapi/bug1283983.c122
-rwxr-xr-xtests/basic/gfapi/bug1283983.sh33
-rw-r--r--tests/basic/gfapi/bug1291259.c181
-rwxr-xr-xtests/basic/gfapi/bug1291259.t32
-rw-r--r--tests/basic/gfapi/bug1613098.c96
-rwxr-xr-xtests/basic/gfapi/bug1613098.t22
-rw-r--r--tests/basic/gfapi/gfapi-async-calls-test.c494
-rw-r--r--tests/basic/gfapi/gfapi-async-calls-test.t26
-rw-r--r--tests/basic/gfapi/gfapi-copy-file-range.t82
-rw-r--r--tests/basic/gfapi/gfapi-dup.c84
-rwxr-xr-xtests/basic/gfapi/gfapi-dup.t27
-rw-r--r--tests/basic/gfapi/gfapi-graph-switch-open-fd.t44
-rw-r--r--tests/basic/gfapi/gfapi-keep-writing.c129
-rw-r--r--tests/basic/gfapi/gfapi-load-volfile.c65
-rw-r--r--tests/basic/gfapi/gfapi-load-volfile.t28
-rw-r--r--tests/basic/gfapi/gfapi-ssl-load-volfile-test.c127
-rwxr-xr-xtests/basic/gfapi/gfapi-ssl-load-volfile-test.t76
-rw-r--r--tests/basic/gfapi/gfapi-ssl-test.c124
-rwxr-xr-xtests/basic/gfapi/gfapi-ssl-test.t61
-rw-r--r--tests/basic/gfapi/gfapi-statx-basic.c184
-rwxr-xr-xtests/basic/gfapi/gfapi-statx-basic.t30
-rw-r--r--tests/basic/gfapi/gfapi-trunc.c89
-rw-r--r--tests/basic/gfapi/gfapi-trunc.t22
-rw-r--r--tests/basic/gfapi/glfd-lkowner.c214
-rwxr-xr-xtests/basic/gfapi/glfd-lkowner.t27
-rw-r--r--tests/basic/gfapi/glfs-copy-file-range.c180
-rw-r--r--tests/basic/gfapi/glfs_h_creat_open.c118
-rwxr-xr-xtests/basic/gfapi/glfs_h_creat_open.t27
-rw-r--r--tests/basic/gfapi/glfs_sysrq.c60
-rwxr-xr-xtests/basic/gfapi/glfs_sysrq.t39
-rw-r--r--tests/basic/gfapi/glfs_xreaddirplus_r.c242
-rwxr-xr-xtests/basic/gfapi/glfs_xreaddirplus_r.t28
-rw-r--r--tests/basic/gfapi/glfsxmp-coverage.c1900
-rw-r--r--tests/basic/gfapi/glfsxmp.t30
-rw-r--r--tests/basic/gfapi/libgfapi-fini-hang.c62
-rwxr-xr-xtests/basic/gfapi/libgfapi-fini-hang.t42
-rw-r--r--tests/basic/gfapi/mandatory-lock-optimal.c532
-rw-r--r--tests/basic/gfapi/mandatory-lock-optimal.t38
-rw-r--r--tests/basic/gfapi/protocol-client-ssl.vol.in15
-rw-r--r--tests/basic/gfapi/protocol-client.vol.in14
-rw-r--r--tests/basic/gfapi/seek.c99
-rw-r--r--tests/basic/gfapi/sink.t13
-rw-r--r--tests/basic/gfapi/sink.vol24
-rw-r--r--tests/basic/gfapi/upcall-cache-invalidate.c209
-rwxr-xr-xtests/basic/gfapi/upcall-cache-invalidate.t30
-rw-r--r--tests/basic/gfapi/upcall-register-api.c286
-rwxr-xr-xtests/basic/gfapi/upcall-register-api.t30
-rw-r--r--tests/basic/gfid-access.t80
-rwxr-xr-xtests/basic/gfproxy.t71
-rw-r--r--tests/basic/global-threading.t104
-rw-r--r--tests/basic/glusterd-restart-shd-mux.t96
-rw-r--r--tests/basic/glusterd/arbiter-volume-probe.t25
-rw-r--r--tests/basic/glusterd/check-cloudsync-ancestry.t48
-rw-r--r--tests/basic/glusterd/disperse-create.t73
-rw-r--r--tests/basic/glusterd/heald.t92
-rw-r--r--tests/basic/glusterd/thin-arbiter-volume-probe.t25
-rw-r--r--tests/basic/glusterd/thin-arbiter-volume.t45
-rw-r--r--tests/basic/glusterd/volfile_server_switch.t46
-rw-r--r--tests/basic/glusterd/volume-brick-count.t61
-rw-r--r--tests/basic/glusterfsd-args.t5
-rw-r--r--tests/basic/graph-cleanup-brick-down-shd-mux.t64
-rw-r--r--tests/basic/hardlink-limit.t44
-rw-r--r--tests/basic/inode-leak.t31
-rw-r--r--tests/basic/inode-quota-enforcing.t100
-rw-r--r--tests/basic/ios-dump.t43
-rw-r--r--tests/basic/jbr/jbr-volgen.t39
-rwxr-xr-xtests/basic/jbr/jbr.t38
-rw-r--r--tests/basic/logchecks-messages.h105
-rw-r--r--tests/basic/logchecks.c214
-rw-r--r--tests/basic/md-cache/bug-1317785.t34
-rwxr-xr-xtests/basic/md-cache/bug-1418249.t20
-rwxr-xr-xtests/basic/meta.t45
-rw-r--r--tests/basic/metadisp/fsyncdir.c29
-rw-r--r--tests/basic/metadisp/ftruncate.c34
-rw-r--r--tests/basic/metadisp/fxattr.c107
-rw-r--r--tests/basic/metadisp/gfs-fsetxattr.c141
-rw-r--r--tests/basic/metadisp/metadisp.t316
-rw-r--r--tests/basic/metadisp/metadisp.vol14
-rw-r--r--tests/basic/mgmt_v3-locks.t120
-rw-r--r--tests/basic/mount-options.disabled143
-rwxr-xr-xtests/basic/mount.t85
-rw-r--r--tests/basic/mpx-compat.t53
-rw-r--r--tests/basic/multiple-volume-shd-mux.t46
-rw-r--r--tests/basic/multiplex.t78
-rw-r--r--tests/basic/namespace.t131
-rw-r--r--tests/basic/netgroup_parsing.t56
-rwxr-xr-xtests/basic/nl-cache.t98
-rw-r--r--tests/basic/nufa.t41
-rwxr-xr-xtests/basic/op_errnos.t34
-rw-r--r--tests/basic/open-behind/open-behind.t183
-rw-r--r--tests/basic/open-behind/tester-fd.c99
-rw-r--r--tests/basic/open-behind/tester.c444
-rw-r--r--tests/basic/open-behind/tester.h145
-rw-r--r--tests/basic/open-fd-snap-delete.t74
-rw-r--r--tests/basic/peer-parsing.t52
-rw-r--r--tests/basic/pgfid-feat.t35
-rwxr-xr-xtests/basic/playground/template-xlator-sanity.t43
-rw-r--r--tests/basic/posix/shared-statfs.t58
-rw-r--r--tests/basic/posix/zero-fill-enospace.c67
-rw-r--r--tests/basic/posix/zero-fill-enospace.t35
-rwxr-xr-xtests/basic/posixonly.t31
-rw-r--r--tests/basic/quick-read-with-upcall.t72
-rwxr-xr-xtests/basic/quota-ancestry-building.t71
-rwxr-xr-xtests/basic/quota-anon-fd-nfs.t117
-rwxr-xr-xtests/basic/quota-nfs.t66
-rw-r--r--tests/basic/quota-rename.t37
-rw-r--r--tests/basic/quota.c89
-rwxr-xr-xtests/basic/quota.t236
-rwxr-xr-xtests/basic/quota_aux_mount.t53
-rwxr-xr-xtests/basic/rpc-coverage.sh (renamed from extras/rpc-coverage.sh)57
-rwxr-xr-xtests/basic/rpc-coverage.t25
-rw-r--r--tests/basic/sdfs-sanity.t28
-rw-r--r--tests/basic/seek.c182
-rw-r--r--tests/basic/shd-mux-afr.t70
-rw-r--r--tests/basic/shd-mux-ec.t75
-rw-r--r--tests/basic/stats-dump.t55
-rwxr-xr-xtests/basic/symbol-check.sh114
-rwxr-xr-xtests/basic/trace.t55
-rw-r--r--tests/basic/uss.t421
-rw-r--r--tests/basic/volfile-sanity.t29
-rw-r--r--tests/basic/volume-scale-shd-mux.t116
-rw-r--r--tests/basic/volume-snap-scheduler.t49
-rwxr-xr-xtests/basic/volume-snapshot-clone.t129
-rwxr-xr-xtests/basic/volume-snapshot-xml.t72
-rwxr-xr-xtests/basic/volume-snapshot.t172
-rw-r--r--tests/basic/volume-status.t114
-rw-r--r--tests/basic/volume.t60
-rw-r--r--tests/basic/xlator-pass-through-sanity.t22
-rw-r--r--tests/bitrot/br-signer-threads-config-1797869.t73
-rw-r--r--tests/bitrot/br-state-check.t83
-rw-r--r--tests/bitrot/br-stub.c195
-rw-r--r--tests/bitrot/br-stub.t66
-rw-r--r--tests/bitrot/bug-1207627-bitrot-scrub-status.t52
-rw-r--r--tests/bitrot/bug-1221914.t51
-rw-r--r--tests/bitrot/bug-1244613.t95
-rw-r--r--tests/bitrot/bug-1294786.t95
-rw-r--r--tests/bitrot/bug-1373520.t71
-rw-r--r--tests/bitrot/bug-1700078.t87
-rw-r--r--tests/bitrot/bug-internal-xattrs-check-1243391.t42
-rw-r--r--tests/bugs/access-control/bug-1051896.c94
-rw-r--r--tests/bugs/access-control/bug-1051896.t34
-rw-r--r--tests/bugs/access-control/bug-1387241.c18
-rw-r--r--tests/bugs/access-control/bug-1387241.t36
-rw-r--r--tests/bugs/access-control/bug-887098-gmount-crash.t42
-rw-r--r--tests/bugs/access-control/bug-958691.t52
-rwxr-xr-xtests/bugs/bitrot/1207029-bitrot-daemon-should-start-on-valid-node.t57
-rw-r--r--tests/bugs/bitrot/1209751-bitrot-scrub-tunable-reset.t48
-rw-r--r--tests/bugs/bitrot/1209752-volume-status-should-show-bitrot-scrub-info.t70
-rw-r--r--tests/bugs/bitrot/1209818-vol-info-show-scrub-process-properly.t48
-rw-r--r--tests/bugs/bitrot/bug-1210684-scrub-pause-resume-error-handling.t39
-rw-r--r--tests/bugs/bitrot/bug-1227996.t57
-rw-r--r--tests/bugs/bitrot/bug-1228680.t48
-rw-r--r--tests/bugs/bitrot/bug-1229134-bitd-not-support-vol-set.t38
-rw-r--r--tests/bugs/bitrot/bug-1245981.t55
-rw-r--r--tests/bugs/bitrot/bug-1288490.t48
-rwxr-xr-xtests/bugs/bug-1064147.t72
-rw-r--r--tests/bugs/bug-1110262.t72
-rw-r--r--tests/bugs/bug-1138841.t25
-rwxr-xr-xtests/bugs/bug-1258069.t30
-rw-r--r--tests/bugs/bug-1368312.t86
-rw-r--r--tests/bugs/bug-1371806.t81
-rw-r--r--tests/bugs/bug-1371806_1.t48
-rw-r--r--tests/bugs/bug-1371806_2.t52
-rw-r--r--tests/bugs/bug-1371806_3.t63
-rw-r--r--tests/bugs/bug-1371806_acl.t96
-rw-r--r--tests/bugs/bug-1584517.t70
-rw-r--r--tests/bugs/bug-1620580.t67
-rw-r--r--tests/bugs/bug-1694920.t63
-rw-r--r--tests/bugs/bug-1702299.t67
-rwxr-xr-xtests/bugs/changelog/bug-1208470.t40
-rw-r--r--tests/bugs/changelog/bug-1211327.t46
-rw-r--r--tests/bugs/changelog/bug-1225542.t30
-rw-r--r--tests/bugs/changelog/bug-1321955.t60
-rw-r--r--tests/bugs/cli/bug-1004218.t26
-rw-r--r--tests/bugs/cli/bug-1022905.t37
-rw-r--r--tests/bugs/cli/bug-1030580.t53
-rw-r--r--tests/bugs/cli/bug-1047378.t12
-rw-r--r--tests/bugs/cli/bug-1047416.t71
-rw-r--r--tests/bugs/cli/bug-1077682.t24
-rwxr-xr-xtests/bugs/cli/bug-1087487.t23
-rw-r--r--tests/bugs/cli/bug-1113476.t45
-rw-r--r--tests/bugs/cli/bug-1169302.c79
-rwxr-xr-xtests/bugs/cli/bug-1169302.t55
-rwxr-xr-xtests/bugs/cli/bug-1320388.t44
-rw-r--r--tests/bugs/cli/bug-1353156-get-state-cli-validations.t147
-rw-r--r--tests/bugs/cli/bug-1378842-volume-get-all.t23
-rw-r--r--tests/bugs/cli/bug-764638.t13
-rwxr-xr-xtests/bugs/cli/bug-822830.t64
-rw-r--r--tests/bugs/cli/bug-867252.t41
-rwxr-xr-xtests/bugs/cli/bug-921215.t13
-rw-r--r--tests/bugs/cli/bug-949298.t12
-rw-r--r--tests/bugs/cli/bug-961307.t20
-rwxr-xr-xtests/bugs/cli/bug-969193.t13
-rw-r--r--tests/bugs/cli/bug-977246.t21
-rw-r--r--tests/bugs/cli/bug-982174.t36
-rw-r--r--tests/bugs/cli/bug-983317-volume-get.t45
-rw-r--r--tests/bugs/core/949327.t23
-rw-r--r--tests/bugs/core/brick-mux-fd-cleanup.t78
-rw-r--r--tests/bugs/core/bug-1110917.t39
-rw-r--r--tests/bugs/core/bug-1111557.t12
-rw-r--r--tests/bugs/core/bug-1117951.t24
-rw-r--r--tests/bugs/core/bug-1119582.t24
-rw-r--r--tests/bugs/core/bug-1135514-allow-setxattr-with-null-value.t18
-rwxr-xr-xtests/bugs/core/bug-1168803-snapd-option-validation-fix.t30
-rwxr-xr-xtests/bugs/core/bug-1402841.t-mt-dir-scan-race.t42
-rw-r--r--tests/bugs/core/bug-1421721-mpx-toggle.t25
-rw-r--r--tests/bugs/core/bug-1432542-mpx-restart-crash.t116
-rw-r--r--tests/bugs/core/bug-1650403.t113
-rw-r--r--tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t33
-rw-r--r--tests/bugs/core/bug-834465.c60
-rwxr-xr-xtests/bugs/core/bug-834465.t51
-rw-r--r--tests/bugs/core/bug-845213.t19
-rw-r--r--tests/bugs/core/bug-903336.t13
-rwxr-xr-xtests/bugs/core/bug-908146.t30
-rw-r--r--tests/bugs/core/bug-913544.t24
-rwxr-xr-xtests/bugs/core/bug-924075.t23
-rwxr-xr-xtests/bugs/core/bug-927616.t65
-rw-r--r--tests/bugs/core/bug-949242.t55
-rw-r--r--tests/bugs/core/bug-986429.t19
-rwxr-xr-xtests/bugs/core/io-stats-1322825.t67
-rwxr-xr-xtests/bugs/core/log-bug-1362520.t43
-rwxr-xr-xtests/bugs/ctime/issue-832.t32
-rw-r--r--tests/bugs/distribute/bug-1042725.t49
-rwxr-xr-xtests/bugs/distribute/bug-1066798.t88
-rwxr-xr-xtests/bugs/distribute/bug-1086228.t34
-rwxr-xr-xtests/bugs/distribute/bug-1088231.t173
-rw-r--r--tests/bugs/distribute/bug-1099890.t130
-rwxr-xr-xtests/bugs/distribute/bug-1125824.t103
-rwxr-xr-xtests/bugs/distribute/bug-1161156.t58
-rwxr-xr-xtests/bugs/distribute/bug-1161311.t164
-rw-r--r--tests/bugs/distribute/bug-1190734.t93
-rw-r--r--tests/bugs/distribute/bug-1193636.c68
-rw-r--r--tests/bugs/distribute/bug-1193636.t74
-rwxr-xr-xtests/bugs/distribute/bug-1204140.t22
-rw-r--r--tests/bugs/distribute/bug-1247563.t62
-rw-r--r--tests/bugs/distribute/bug-1368012.t51
-rw-r--r--tests/bugs/distribute/bug-1389697.t42
-rw-r--r--tests/bugs/distribute/bug-1543279.t67
-rw-r--r--tests/bugs/distribute/bug-1600379.t54
-rw-r--r--tests/bugs/distribute/bug-1667804.t63
-rwxr-xr-xtests/bugs/distribute/bug-1786679.t69
-rwxr-xr-xtests/bugs/distribute/bug-853258.t47
-rw-r--r--tests/bugs/distribute/bug-860663.c41
-rw-r--r--tests/bugs/distribute/bug-860663.t56
-rw-r--r--tests/bugs/distribute/bug-862967.t59
-rwxr-xr-xtests/bugs/distribute/bug-882278.t73
-rwxr-xr-xtests/bugs/distribute/bug-884455.t84
-rwxr-xr-xtests/bugs/distribute/bug-884597.t173
-rwxr-xr-xtests/bugs/distribute/bug-907072.t49
-rwxr-xr-xtests/bugs/distribute/bug-912564.t92
-rwxr-xr-xtests/bugs/distribute/bug-915554.t76
-rwxr-xr-xtests/bugs/distribute/bug-921408.t90
-rwxr-xr-xtests/bugs/distribute/bug-924265.t35
-rw-r--r--tests/bugs/distribute/bug-961615.t34
-rwxr-xr-xtests/bugs/distribute/bug-973073.t48
-rwxr-xr-xtests/bugs/distribute/issue-1327.t33
-rwxr-xr-xtests/bugs/distribute/overlap.py59
-rw-r--r--tests/bugs/ec/bug-1161621.t42
-rw-r--r--tests/bugs/ec/bug-1161886.c53
-rw-r--r--tests/bugs/ec/bug-1161886.t44
-rw-r--r--tests/bugs/ec/bug-1179050.t23
-rw-r--r--tests/bugs/ec/bug-1187474.t43
-rw-r--r--tests/bugs/ec/bug-1188145.t50
-rw-r--r--tests/bugs/ec/bug-1227869.t33
-rw-r--r--tests/bugs/ec/bug-1236065.t95
-rw-r--r--tests/bugs/ec/bug-1251446.t50
-rwxr-xr-xtests/bugs/ec/bug-1304988.t42
-rw-r--r--tests/bugs/ec/bug-1547662.t41
-rw-r--r--tests/bugs/ec/bug-1699866-check-reopen-fd.t34
-rw-r--r--tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t54
-rwxr-xr-xtests/bugs/error-gen/bug-767095.t51
-rw-r--r--tests/bugs/fuse/bug-1030208.t35
-rw-r--r--tests/bugs/fuse/bug-1126048.c43
-rwxr-xr-xtests/bugs/fuse/bug-1126048.t30
-rw-r--r--tests/bugs/fuse/bug-1283103.t59
-rw-r--r--tests/bugs/fuse/bug-1309462.t50
-rw-r--r--tests/bugs/fuse/bug-1336818.t52
-rwxr-xr-xtests/bugs/fuse/bug-858215.t79
-rw-r--r--tests/bugs/fuse/bug-858488-min-free-disk.t108
-rwxr-xr-xtests/bugs/fuse/bug-924726.t45
-rw-r--r--tests/bugs/fuse/bug-963678.t57
-rwxr-xr-xtests/bugs/fuse/bug-983477.t53
-rw-r--r--tests/bugs/fuse/bug-985074.t54
-rwxr-xr-xtests/bugs/fuse/many-groups-for-acl.t123
-rwxr-xr-xtests/bugs/fuse/setup.sh20
-rwxr-xr-xtests/bugs/fuse/teardown.sh5
-rw-r--r--tests/bugs/geo-replication/bug-1111490.t35
-rw-r--r--tests/bugs/geo-replication/bug-1296496.t44
-rwxr-xr-xtests/bugs/geo-replication/bug-877293.t41
-rw-r--r--tests/bugs/gfapi/bug-1032894.t33
-rw-r--r--tests/bugs/gfapi/bug-1093594.c311
-rwxr-xr-xtests/bugs/gfapi/bug-1093594.t22
-rwxr-xr-xtests/bugs/gfapi/bug-1319374-THIS-crash.t27
-rw-r--r--tests/bugs/gfapi/bug-1319374.c131
-rw-r--r--tests/bugs/gfapi/bug-1447266/1460514.c150
-rw-r--r--tests/bugs/gfapi/bug-1447266/1460514.t26
-rw-r--r--tests/bugs/gfapi/bug-1447266/bug-1447266.c107
-rw-r--r--tests/bugs/gfapi/bug-1447266/bug-1447266.t60
-rw-r--r--tests/bugs/gfapi/bug-1630804/gfapi-bz1630804.c112
-rw-r--r--tests/bugs/gfapi/bug-1630804/gfapi-bz1630804.t25
-rw-r--r--tests/bugs/gfapi/glfs_vol_set_IO_ERR.c163
-rwxr-xr-xtests/bugs/gfapi/glfs_vol_set_IO_ERR.t20
-rwxr-xr-xtests/bugs/glusterd/859927/repl.t59
-rw-r--r--tests/bugs/glusterd/add-brick-and-validate-replicated-volume-options.t110
-rw-r--r--tests/bugs/glusterd/brick-mux-validation-in-cluster.t108
-rw-r--r--tests/bugs/glusterd/brick-mux-validation.t104
-rw-r--r--tests/bugs/glusterd/brick-mux.t81
-rw-r--r--tests/bugs/glusterd/brick-order-check-add-brick.t61
-rwxr-xr-xtests/bugs/glusterd/bug-1070734.t80
-rw-r--r--tests/bugs/glusterd/bug-1085330-and-bug-916549.t93
-rwxr-xr-xtests/bugs/glusterd/bug-1091935-brick-order-check-from-cli-to-glusterd.t93
-rw-r--r--tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t44
-rw-r--r--tests/bugs/glusterd/bug-1242875-do-not-pass-volinfo-quota.t38
-rw-r--r--tests/bugs/glusterd/bug-1482906-peer-file-blank-line.t29
-rw-r--r--tests/bugs/glusterd/bug-1595320.t93
-rw-r--r--tests/bugs/glusterd/bug-1696046.t113
-rw-r--r--tests/bugs/glusterd/bug-1699339.t73
-rw-r--r--tests/bugs/glusterd/bug-1720566.t50
-rw-r--r--tests/bugs/glusterd/bug-824753-file-locker.c46
-rwxr-xr-xtests/bugs/glusterd/bug-824753.t45
-rw-r--r--tests/bugs/glusterd/bug-948729/bug-948729-force.t103
-rw-r--r--tests/bugs/glusterd/bug-948729/bug-948729-mode-script.t77
-rw-r--r--tests/bugs/glusterd/bug-948729/bug-948729.t80
-rw-r--r--tests/bugs/glusterd/bug-949930.t29
-rw-r--r--tests/bugs/glusterd/check_elastic_server.t63
-rw-r--r--tests/bugs/glusterd/daemon-log-level-option.t93
-rw-r--r--tests/bugs/glusterd/df-results-post-replace-brick-operations.t61
-rw-r--r--tests/bugs/glusterd/mgmt-handshake-and-volume-sync-post-glusterd-restart.t71
-rw-r--r--tests/bugs/glusterd/optimized-basic-testcases-in-cluster.t115
-rw-r--r--tests/bugs/glusterd/optimized-basic-testcases.t305
-rw-r--r--tests/bugs/glusterd/quorum-validation.t122
-rw-r--r--tests/bugs/glusterd/rebalance-in-cluster.t52
-rw-r--r--tests/bugs/glusterd/rebalance-operations-in-single-node.t131
-rw-r--r--tests/bugs/glusterd/remove-brick-in-cluster.t60
-rw-r--r--tests/bugs/glusterd/remove-brick-testcases.t119
-rw-r--r--tests/bugs/glusterd/remove-brick-validation.t68
-rw-r--r--tests/bugs/glusterd/removing-multiple-bricks-in-single-remove-brick-command.t80
-rw-r--r--tests/bugs/glusterd/replace-brick-operations.t48
-rw-r--r--tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t63
-rw-r--r--tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t54
-rw-r--r--tests/bugs/glusterd/snapshot-operations.t50
-rw-r--r--tests/bugs/glusterd/sync-post-glusterd-restart.t54
-rw-r--r--tests/bugs/glusterd/validating-options-for-replicated-volume.t142
-rw-r--r--tests/bugs/glusterd/validating-server-quorum.t125
-rwxr-xr-xtests/bugs/glusterfs-server/bug-852147.t85
-rwxr-xr-xtests/bugs/glusterfs-server/bug-861542.t50
-rwxr-xr-xtests/bugs/glusterfs-server/bug-864222.t30
-rw-r--r--tests/bugs/glusterfs-server/bug-873549.t17
-rwxr-xr-xtests/bugs/glusterfs-server/bug-877992.t63
-rwxr-xr-xtests/bugs/glusterfs-server/bug-887145.t99
-rw-r--r--tests/bugs/glusterfs-server/bug-889996.t19
-rwxr-xr-xtests/bugs/glusterfs-server/bug-904300.t65
-rw-r--r--tests/bugs/glusterfs-server/bug-905864.c83
-rw-r--r--tests/bugs/glusterfs-server/bug-905864.t32
-rwxr-xr-xtests/bugs/glusterfs-server/bug-912297.t44
-rw-r--r--tests/bugs/glusterfs/bug-1482528.t100
-rwxr-xr-xtests/bugs/glusterfs/bug-811493.t18
-rwxr-xr-xtests/bugs/glusterfs/bug-844688.t55
-rw-r--r--tests/bugs/glusterfs/bug-848251.t52
-rwxr-xr-xtests/bugs/glusterfs/bug-853690.t91
-rw-r--r--tests/bugs/glusterfs/bug-856455.t43
-rw-r--r--tests/bugs/glusterfs/bug-860297.t13
-rw-r--r--tests/bugs/glusterfs/bug-861015-index.t36
-rw-r--r--tests/bugs/glusterfs/bug-861015-log.t29
-rw-r--r--tests/bugs/glusterfs/bug-866459.t44
-rw-r--r--tests/bugs/glusterfs/bug-867253.t64
-rw-r--r--tests/bugs/glusterfs/bug-869724.t37
-rwxr-xr-xtests/bugs/glusterfs/bug-872923.t59
-rw-r--r--tests/bugs/glusterfs/bug-873962-spb.t40
-rwxr-xr-xtests/bugs/glusterfs/bug-873962.t107
-rwxr-xr-xtests/bugs/glusterfs/bug-879490.t37
-rwxr-xr-xtests/bugs/glusterfs/bug-879494.t37
-rwxr-xr-xtests/bugs/glusterfs/bug-892730.t77
-rw-r--r--tests/bugs/glusterfs/bug-893338.t34
-rwxr-xr-xtests/bugs/glusterfs/bug-893378.t75
-rw-r--r--tests/bugs/glusterfs/bug-895235.t23
-rwxr-xr-xtests/bugs/glusterfs/bug-896431.t89
-rwxr-xr-xtests/bugs/glusterfs/bug-902610.t66
-rw-r--r--tests/bugs/glusterfs/bug-906646.t97
-rw-r--r--tests/bugs/glusterfs/getlk_owner.c124
-rw-r--r--tests/bugs/heal-symlinks.t65
-rw-r--r--tests/bugs/index/bug-1559004-EMLINK-handling.t91
-rw-r--r--tests/bugs/io-cache/bug-858242.c84
-rwxr-xr-xtests/bugs/io-cache/bug-858242.t28
-rw-r--r--tests/bugs/io-cache/bug-read-hang.c125
-rwxr-xr-xtests/bugs/io-cache/bug-read-hang.t34
-rwxr-xr-xtests/bugs/io-stats/bug-1598548.t41
-rwxr-xr-xtests/bugs/logging/bug-823081.t41
-rwxr-xr-xtests/bugs/md-cache/afr-stale-read.t44
-rwxr-xr-xtests/bugs/md-cache/bug-1211863.t69
-rwxr-xr-xtests/bugs/md-cache/bug-1211863_unlink.t49
-rw-r--r--tests/bugs/md-cache/bug-1476324.t27
-rwxr-xr-xtests/bugs/md-cache/bug-1632503.t24
-rw-r--r--tests/bugs/md-cache/bug-1726205.t22
-rwxr-xr-xtests/bugs/md-cache/setxattr-prepoststat.t38
-rwxr-xr-xtests/bugs/nfs/bug-1053579.t114
-rw-r--r--tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t24
-rw-r--r--tests/bugs/nfs/bug-1157223-symlink-mounting.t126
-rw-r--r--tests/bugs/nfs/bug-1161092-nfs-acls.t39
-rwxr-xr-xtests/bugs/nfs/bug-1166862.t69
-rw-r--r--tests/bugs/nfs/bug-1210338.c31
-rw-r--r--tests/bugs/nfs/bug-1210338.t30
-rwxr-xr-xtests/bugs/nfs/bug-1302948.t13
-rwxr-xr-xtests/bugs/nfs/bug-847622.t39
-rwxr-xr-xtests/bugs/nfs/bug-877885.t39
-rwxr-xr-xtests/bugs/nfs/bug-904065.t100
-rwxr-xr-xtests/bugs/nfs/bug-915280.t54
-rwxr-xr-xtests/bugs/nfs/bug-970070.t13
-rwxr-xr-xtests/bugs/nfs/bug-974972.t41
-rw-r--r--tests/bugs/nfs/showmount-many-clients.t43
-rwxr-xr-xtests/bugs/nfs/socket-as-fifo.py33
-rw-r--r--tests/bugs/nfs/socket-as-fifo.t25
-rw-r--r--tests/bugs/nfs/subdir-trailing-slash.t32
-rwxr-xr-xtests/bugs/nfs/zero-atime.t33
-rwxr-xr-xtests/bugs/nl-cache/bug-1451588.t25
-rw-r--r--tests/bugs/posix/bug-1034716.t60
-rwxr-xr-xtests/bugs/posix/bug-1040275-brick-uid-reset-on-volume-restart.t60
-rwxr-xr-xtests/bugs/posix/bug-1113960.t98
-rwxr-xr-xtests/bugs/posix/bug-1122028.t51
-rw-r--r--tests/bugs/posix/bug-1175711.c37
-rwxr-xr-xtests/bugs/posix/bug-1175711.t30
-rw-r--r--tests/bugs/posix/bug-1360679.t36
-rwxr-xr-xtests/bugs/posix/bug-1619720.t58
-rw-r--r--tests/bugs/posix/bug-1651445.t54
-rw-r--r--tests/bugs/posix/bug-765380.t39
-rwxr-xr-xtests/bugs/posix/bug-990028.t157
-rw-r--r--tests/bugs/posix/bug-gfid-path.t70
-rw-r--r--tests/bugs/posix/disallow-gfid-volumeid-fremovexattr.c104
-rwxr-xr-xtests/bugs/posix/disallow-gfid-volumeid-fremovexattr.t21
-rw-r--r--tests/bugs/posix/disallow-gfid-volumeid-removexattr.t26
-rw-r--r--tests/bugs/protocol/bug-1321578.t82
-rw-r--r--tests/bugs/protocol/bug-1390914.t36
-rw-r--r--tests/bugs/protocol/bug-1433815-auth-allow.t40
-rwxr-xr-xtests/bugs/protocol/bug-762989.t40
-rwxr-xr-xtests/bugs/protocol/bug-808400-dist.t32
-rw-r--r--tests/bugs/protocol/bug-808400-fcntl.c124
-rw-r--r--tests/bugs/protocol/bug-808400-flock.c100
-rwxr-xr-xtests/bugs/protocol/bug-808400-repl.t31
-rwxr-xr-xtests/bugs/protocol/bug-808400.t35
-rwxr-xr-xtests/bugs/quick-read/bug-846240.t59
-rwxr-xr-xtests/bugs/quick-read/bz1523599/bz1523599.t32
-rw-r--r--tests/bugs/quick-read/bz1523599/test_bz1523599.c198
-rw-r--r--tests/bugs/quota/afr-quota-xattr-mdata-heal.t140
-rw-r--r--tests/bugs/quota/bug-1035576.t53
-rw-r--r--tests/bugs/quota/bug-1038598.t52
-rw-r--r--tests/bugs/quota/bug-1087198.t86
-rwxr-xr-xtests/bugs/quota/bug-1104692.t26
-rw-r--r--tests/bugs/quota/bug-1153964.t81
-rw-r--r--tests/bugs/quota/bug-1178130.t44
-rw-r--r--tests/bugs/quota/bug-1235182.t61
-rw-r--r--tests/bugs/quota/bug-1243798.t46
-rw-r--r--tests/bugs/quota/bug-1250582-volume-reset-should-not-remove-quota-quota-deem-statfs.t53
-rw-r--r--tests/bugs/quota/bug-1260545.t53
-rw-r--r--tests/bugs/quota/bug-1287996.t21
-rw-r--r--tests/bugs/quota/bug-1292020.t28
-rw-r--r--tests/bugs/quota/bug-1293601.t33
-rw-r--r--tests/bugs/read-only/bug-1134822-read-only-default-in-graph.t67
-rw-r--r--tests/bugs/readdir-ahead/bug-1390050.c72
-rw-r--r--tests/bugs/readdir-ahead/bug-1390050.t29
-rwxr-xr-xtests/bugs/readdir-ahead/bug-1436090.t44
-rwxr-xr-xtests/bugs/readdir-ahead/bug-1439640.t31
-rwxr-xr-xtests/bugs/readdir-ahead/bug-1446516.t21
-rwxr-xr-xtests/bugs/readdir-ahead/bug-1512437.t23
-rw-r--r--tests/bugs/readdir-ahead/bug-1670253-consistent-metadata.t23
-rw-r--r--tests/bugs/replicate/886998/strict-readdir.t52
-rwxr-xr-xtests/bugs/replicate/bug-1015990-rep.t61
-rwxr-xr-xtests/bugs/replicate/bug-1015990.t69
-rw-r--r--tests/bugs/replicate/bug-1032927.t32
-rwxr-xr-xtests/bugs/replicate/bug-1037501.t104
-rwxr-xr-xtests/bugs/replicate/bug-1046624.t47
-rw-r--r--tests/bugs/replicate/bug-1058797.t48
-rw-r--r--tests/bugs/replicate/bug-1101647.t31
-rw-r--r--tests/bugs/replicate/bug-1130892.t76
-rw-r--r--tests/bugs/replicate/bug-1132102.t28
-rw-r--r--tests/bugs/replicate/bug-1134691-afr-lookup-metadata-heal.t56
-rw-r--r--tests/bugs/replicate/bug-1139230.t58
-rw-r--r--tests/bugs/replicate/bug-1180545.t77
-rw-r--r--tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t57
-rw-r--r--tests/bugs/replicate/bug-1221481-allow-fops-on-dir-split-brain.t42
-rw-r--r--tests/bugs/replicate/bug-1238398-split-brain-resolution.t51
-rw-r--r--tests/bugs/replicate/bug-1238508-self-heal.t51
-rw-r--r--tests/bugs/replicate/bug-1250170-fsync.c56
-rw-r--r--tests/bugs/replicate/bug-1250170-fsync.t35
-rw-r--r--tests/bugs/replicate/bug-1266876-allow-reset-brick-for-same-path.t54
-rw-r--r--tests/bugs/replicate/bug-1292379.t58
-rw-r--r--tests/bugs/replicate/bug-1297695.t43
-rw-r--r--tests/bugs/replicate/bug-1305031-block-reads-on-metadata-sbrain.t40
-rw-r--r--tests/bugs/replicate/bug-1325792.t25
-rw-r--r--tests/bugs/replicate/bug-1335652.t29
-rw-r--r--tests/bugs/replicate/bug-1340623-mkdir-fails-remove-brick-started.t46
-rw-r--r--tests/bugs/replicate/bug-1341650.t63
-rw-r--r--tests/bugs/replicate/bug-1363721.t118
-rw-r--r--tests/bugs/replicate/bug-1365455.t54
-rw-r--r--tests/bugs/replicate/bug-1386188-sbrain-fav-child.t82
-rw-r--r--tests/bugs/replicate/bug-1402730.t47
-rw-r--r--tests/bugs/replicate/bug-1408712.t101
-rw-r--r--tests/bugs/replicate/bug-1417522-block-split-brain-resolution.t69
-rw-r--r--tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t79
-rw-r--r--tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t46
-rw-r--r--tests/bugs/replicate/bug-1448804-check-quorum-type-values.t47
-rw-r--r--tests/bugs/replicate/bug-1473026.t31
-rw-r--r--tests/bugs/replicate/bug-1477169-entry-selfheal-rename.t52
-rw-r--r--tests/bugs/replicate/bug-1480525.t18
-rw-r--r--tests/bugs/replicate/bug-1493415-gfid-heal.t78
-rw-r--r--tests/bugs/replicate/bug-1498570-client-iot-graph-check.t48
-rwxr-xr-xtests/bugs/replicate/bug-1539358-split-brain-detection.t89
-rw-r--r--tests/bugs/replicate/bug-1561129-enospc.t24
-rw-r--r--tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t72
-rw-r--r--tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t128
-rw-r--r--tests/bugs/replicate/bug-1626994-info-split-brain.t62
-rw-r--r--tests/bugs/replicate/bug-1637249-gfid-heal.t149
-rw-r--r--tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t45
-rw-r--r--tests/bugs/replicate/bug-1655050-dir-sbrain-size-policy.t55
-rwxr-xr-xtests/bugs/replicate/bug-1655052-sbrain-policy-same-size.t55
-rw-r--r--tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t95
-rw-r--r--tests/bugs/replicate/bug-1657783-do-not-update-read-subvol-on-rename-link.t40
-rw-r--r--tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t38
-rwxr-xr-xtests/bugs/replicate/bug-1696599-io-hang.t47
-rw-r--r--tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t136
-rw-r--r--tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t116
-rw-r--r--tests/bugs/replicate/bug-1728770-pass-xattrs.t52
-rw-r--r--tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t102
-rw-r--r--tests/bugs/replicate/bug-1744548-heal-timeout.t47
-rw-r--r--tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t89
-rw-r--r--tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t111
-rw-r--r--tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t74
-rw-r--r--tests/bugs/replicate/bug-1801624-entry-heal.t58
-rw-r--r--tests/bugs/replicate/bug-765564.t86
-rwxr-xr-xtests/bugs/replicate/bug-767585-gfid.t42
-rwxr-xr-xtests/bugs/replicate/bug-802417.t120
-rw-r--r--tests/bugs/replicate/bug-821056.t52
-rwxr-xr-xtests/bugs/replicate/bug-830665.t127
-rwxr-xr-xtests/bugs/replicate/bug-859581.t53
-rwxr-xr-xtests/bugs/replicate/bug-865825.t82
-rw-r--r--tests/bugs/replicate/bug-880898.t30
-rw-r--r--tests/bugs/replicate/bug-884328.t12
-rw-r--r--tests/bugs/replicate/bug-886998.t52
-rw-r--r--tests/bugs/replicate/bug-888174.t62
-rw-r--r--tests/bugs/replicate/bug-913051.t56
-rw-r--r--tests/bugs/replicate/bug-916226.t26
-rw-r--r--tests/bugs/replicate/bug-918437-sh-mtime.t71
-rwxr-xr-xtests/bugs/replicate/bug-921231.t31
-rw-r--r--tests/bugs/replicate/bug-957877.t33
-rw-r--r--tests/bugs/replicate/bug-976800.t29
-rwxr-xr-xtests/bugs/replicate/bug-977797.t96
-rw-r--r--tests/bugs/replicate/bug-978794.t29
-rwxr-xr-xtests/bugs/replicate/bug-979365.t47
-rwxr-xr-xtests/bugs/replicate/bug-986905.t27
-rw-r--r--tests/bugs/replicate/issue-1254-prioritize-enospc.t80
-rw-r--r--tests/bugs/replicate/mdata-heal-no-xattrs.t59
-rw-r--r--tests/bugs/replicate/ta-inode-refresh-read.t40
-rwxr-xr-xtests/bugs/rpc/bug-1043886.t58
-rwxr-xr-xtests/bugs/rpc/bug-847624.t29
-rw-r--r--tests/bugs/rpc/bug-884452.t47
-rwxr-xr-xtests/bugs/rpc/bug-921072.t132
-rwxr-xr-xtests/bugs/rpc/bug-954057.t63
-rw-r--r--tests/bugs/shard/bug-1245547.t35
-rw-r--r--tests/bugs/shard/bug-1248887.t39
-rw-r--r--tests/bugs/shard/bug-1250855.t34
-rw-r--r--tests/bugs/shard/bug-1251824.t109
-rw-r--r--tests/bugs/shard/bug-1256580.t34
-rw-r--r--tests/bugs/shard/bug-1258334.t40
-rw-r--r--tests/bugs/shard/bug-1259651.t40
-rw-r--r--tests/bugs/shard/bug-1260637.t42
-rw-r--r--tests/bugs/shard/bug-1261773.t14
-rw-r--r--tests/bugs/shard/bug-1272986.t35
-rw-r--r--tests/bugs/shard/bug-1342298.t23
-rw-r--r--tests/bugs/shard/bug-1468483.t58
-rw-r--r--tests/bugs/shard/bug-1488546.t25
-rw-r--r--tests/bugs/shard/bug-1568521-EEXIST.t91
-rw-r--r--tests/bugs/shard/bug-1568521.t53
-rw-r--r--tests/bugs/shard/bug-1605056-2.t34
-rw-r--r--tests/bugs/shard/bug-1605056.t63
-rw-r--r--tests/bugs/shard/bug-1669077.t29
-rw-r--r--tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t34
-rw-r--r--tests/bugs/shard/bug-1696136.c122
-rw-r--r--tests/bugs/shard/bug-1696136.t33
-rw-r--r--tests/bugs/shard/bug-1705884.t32
-rw-r--r--tests/bugs/shard/bug-1738419.t29
-rw-r--r--tests/bugs/shard/bug-shard-discard.c70
-rw-r--r--tests/bugs/shard/bug-shard-discard.t65
-rw-r--r--tests/bugs/shard/bug-shard-zerofill.c60
-rw-r--r--tests/bugs/shard/bug-shard-zerofill.t46
-rw-r--r--tests/bugs/shard/configure-lru-limit.t52
-rw-r--r--tests/bugs/shard/issue-1243.t43
-rw-r--r--tests/bugs/shard/issue-1281.t34
-rw-r--r--tests/bugs/shard/issue-1425.t45
-rw-r--r--tests/bugs/shard/parallel-truncate-read.t48
-rw-r--r--tests/bugs/shard/shard-append-test.c183
-rw-r--r--tests/bugs/shard/shard-append-test.t32
-rw-r--r--tests/bugs/shard/shard-fallocate.c113
-rw-r--r--tests/bugs/shard/shard-inode-refcount-test.t30
-rw-r--r--tests/bugs/shard/unlinks-and-renames.t333
-rw-r--r--tests/bugs/shard/zero-flag.t76
-rwxr-xr-xtests/bugs/snapshot/bug-1045333.t44
-rwxr-xr-xtests/bugs/snapshot/bug-1049834.t44
-rw-r--r--tests/bugs/snapshot/bug-1064768.t20
-rw-r--r--tests/bugs/snapshot/bug-1087203.t103
-rwxr-xr-xtests/bugs/snapshot/bug-1090042.t26
-rw-r--r--tests/bugs/snapshot/bug-1109770.t65
-rw-r--r--tests/bugs/snapshot/bug-1109889.t74
-rwxr-xr-xtests/bugs/snapshot/bug-1111041.t40
-rw-r--r--tests/bugs/snapshot/bug-1112613.t49
-rw-r--r--tests/bugs/snapshot/bug-1113975.t38
-rw-r--r--tests/bugs/snapshot/bug-1155042-dont-display-deactivated-snapshots.t36
-rwxr-xr-xtests/bugs/snapshot/bug-1157991.t30
-rwxr-xr-xtests/bugs/snapshot/bug-1162462.t38
-rwxr-xr-xtests/bugs/snapshot/bug-1162498.t56
-rwxr-xr-xtests/bugs/snapshot/bug-1166197.t52
-rw-r--r--tests/bugs/snapshot/bug-1167580-set-proper-uid-and-gid-during-nfs-access.t205
-rw-r--r--tests/bugs/snapshot/bug-1168875.t96
-rw-r--r--tests/bugs/snapshot/bug-1178079.t24
-rw-r--r--tests/bugs/snapshot/bug-1202436-calculate-quota-cksum-during-snap-restore.t37
-rw-r--r--tests/bugs/snapshot/bug-1205592.t30
-rw-r--r--tests/bugs/snapshot/bug-1227646.t31
-rwxr-xr-xtests/bugs/snapshot/bug-1232430.t22
-rwxr-xr-xtests/bugs/snapshot/bug-1250387.t26
-rw-r--r--tests/bugs/snapshot/bug-1260848.t28
-rwxr-xr-xtests/bugs/snapshot/bug-1275616.t50
-rw-r--r--tests/bugs/snapshot/bug-1279327.t29
-rw-r--r--tests/bugs/snapshot/bug-1316437.t29
-rw-r--r--tests/bugs/snapshot/bug-1322772-real-path-fix-for-snapshot.t56
-rwxr-xr-xtests/bugs/snapshot/bug-1399598-uss-with-ssl.t108
-rw-r--r--tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t133
-rw-r--r--tests/bugs/snapshot/bug-1512451-snapshot-creation-failed-after-brick-reset.t39
-rw-r--r--tests/bugs/snapshot/bug-1597662.t58
-rw-r--r--tests/bugs/snapshot/bug-1618004-fix-memory-corruption-in-snap-import.t48
-rw-r--r--tests/bugs/stripe/bug-1002207.t55
-rw-r--r--tests/bugs/stripe/bug-1111454.t20
-rwxr-xr-xtests/bugs/trace/bug-797171.t41
-rwxr-xr-xtests/bugs/transport/bug-873367.t45
-rw-r--r--tests/bugs/unclassified/bug-1034085.t31
-rw-r--r--tests/bugs/unclassified/bug-1357397.t35
-rw-r--r--tests/bugs/unclassified/bug-874498.t64
-rw-r--r--tests/bugs/unclassified/bug-991622.t35
-rwxr-xr-xtests/bugs/upcall/bug-1227204.t29
-rwxr-xr-xtests/bugs/upcall/bug-1369430.t43
-rwxr-xr-xtests/bugs/upcall/bug-1394131.t29
-rwxr-xr-xtests/bugs/upcall/bug-1422776.t30
-rwxr-xr-xtests/bugs/upcall/bug-1458127.t36
-rwxr-xr-xtests/bugs/upcall/bug-upcall-stat.t39
-rw-r--r--tests/bugs/write-behind/bug-1058663.c123
-rw-r--r--tests/bugs/write-behind/bug-1058663.t28
-rw-r--r--tests/bugs/write-behind/bug-1279730.c149
-rwxr-xr-xtests/bugs/write-behind/bug-1279730.t37
-rw-r--r--tests/bugs/write-behind/issue-884.c267
-rwxr-xr-xtests/bugs/write-behind/issue-884.t40
-rw-r--r--tests/changelog.rc9
-rw-r--r--tests/cleanup.sh4
-rw-r--r--tests/cluster.rc218
-rw-r--r--tests/common-utils.rc7
-rw-r--r--tests/configfiles/bad_exports9
-rw-r--r--tests/configfiles/bad_netgroups5
-rw-r--r--tests/configfiles/big_exports10
-rw-r--r--tests/configfiles/exports1
-rw-r--r--tests/configfiles/exports-v61
-rw-r--r--tests/configfiles/exports_bad_opt1
-rw-r--r--tests/configfiles/netgroups4
-rw-r--r--tests/dht.rc174
-rw-r--r--tests/ec.rc18
-rw-r--r--tests/env.rc.in42
-rw-r--r--tests/experimental/.gitignore7
-rw-r--r--tests/fallocate.rc19
-rw-r--r--tests/fdl.rc12
-rwxr-xr-xtests/features/delay-gen.t52
-rw-r--r--tests/features/dh1024.pem5
-rw-r--r--tests/features/fdl-overflow.t72
-rw-r--r--tests/features/fdl.t44
-rw-r--r--tests/features/flock_interrupt.t32
-rw-r--r--tests/features/fuse-lru-limit.t43
-rw-r--r--tests/features/glfs-lease-recall.c372
-rw-r--r--tests/features/glfs-lease.c717
-rwxr-xr-xtests/features/glfs-lease.t31
-rw-r--r--tests/features/interrupt.t71
-rwxr-xr-xtests/features/ipc.t38
-rwxr-xr-xtests/features/ipctest.py28
-rw-r--r--tests/features/lock_revocation.t54
-rw-r--r--tests/features/mandatory-lock-forced.c143
-rw-r--r--tests/features/mandatory-lock-forced.t80
-rwxr-xr-xtests/features/nuke.t41
-rw-r--r--tests/features/open_and_sleep.c27
-rw-r--r--tests/features/openssl.cnf.in41
-rwxr-xr-xtests/features/readdir-ahead.t45
-rw-r--r--tests/features/recon.t59
-rwxr-xr-xtests/features/ssl-authz.t121
-rw-r--r--tests/features/ssl-ciphers.t244
-rw-r--r--tests/features/subdir-mount.t121
-rwxr-xr-xtests/features/trash.t247
-rwxr-xr-xtests/features/unhashed-auto.t125
-rwxr-xr-xtests/features/weighted-rebalance.t78
-rwxr-xr-xtests/features/worm.t117
-rw-r--r--tests/features/worm_sh.t75
-rw-r--r--tests/fileio.rc61
-rw-r--r--tests/geo-rep.rc495
-rw-r--r--tests/gfid2path/block-mount-access.t51
-rw-r--r--tests/gfid2path/get-gfid-to-path.t72
-rw-r--r--tests/gfid2path/gfid2path_fuse.t166
-rw-r--r--tests/gfid2path/gfid2path_nfs.t152
-rw-r--r--tests/glusterfind/glusterfind-basic.t84
-rw-r--r--tests/include.rc1350
-rw-r--r--tests/line-coverage/afr-heal-info.t43
-rwxr-xr-xtests/line-coverage/arbiter-coverage.t32
-rw-r--r--tests/line-coverage/cli-peer-and-volume-operations.t135
-rw-r--r--tests/line-coverage/cli-volume-top-profile-coverage.t62
-rwxr-xr-xtests/line-coverage/errorgen-coverage.t42
-rw-r--r--tests/line-coverage/log-and-brick-ops-negative-case.t82
-rwxr-xr-xtests/line-coverage/meta-max-coverage.t33
-rw-r--r--tests/line-coverage/namespace-linecoverage.t39
-rwxr-xr-xtests/line-coverage/old-protocol.t37
-rwxr-xr-xtests/line-coverage/quiesce-coverage.t44
-rw-r--r--tests/line-coverage/shard-coverage.t33
-rw-r--r--tests/line-coverage/some-features-in-libglusterfs.t67
-rw-r--r--tests/line-coverage/volfile-with-all-graph-syntax.t73
-rw-r--r--tests/nfs.rc70
-rwxr-xr-xtests/performance/open-behind.t75
-rw-r--r--tests/performance/quick-read.t55
-rw-r--r--tests/snapshot.rc484
-rw-r--r--tests/ssl.rc35
-rw-r--r--tests/thin-arbiter.rc613
-rw-r--r--tests/traps.rc22
-rw-r--r--tests/utils/arequal-checksum.c633
-rw-r--r--tests/utils/changelog/changelog.h125
-rw-r--r--tests/utils/changelog/get-history.c71
-rw-r--r--tests/utils/changelog/test-changelog-api.c98
-rw-r--r--tests/utils/changelog/test-history-api.c111
-rw-r--r--tests/utils/changelogparser.py236
-rwxr-xr-xtests/utils/create-files.py593
-rw-r--r--tests/utils/get-mdata-xattr.c152
-rwxr-xr-xtests/utils/getfattr.py133
-rwxr-xr-xtests/utils/gfid-access.py124
-rw-r--r--tests/utils/libcxattr.py108
-rwxr-xr-xtests/utils/pidof.py45
-rw-r--r--tests/utils/py2py3.py186
-rwxr-xr-xtests/utils/setfattr.py77
-rwxr-xr-xtests/utils/testn.sh16
-rw-r--r--tests/vagrant/vagrant-template-centos6/Vagrantfile55
-rw-r--r--tests/vagrant/vagrant-template-centos6/roles/daemon-services/tasks/main.yml3
-rw-r--r--tests/vagrant/vagrant-template-centos6/roles/fix-localhost/tasks/main.yml6
-rw-r--r--tests/vagrant/vagrant-template-centos6/roles/install-pkgs/tasks/main.yml92
-rw-r--r--tests/vagrant/vagrant-template-centos6/roles/iptables/tasks/main.yml3
-rw-r--r--tests/vagrant/vagrant-template-centos6/roles/mock-user/tasks/main.yml3
-rw-r--r--tests/vagrant/vagrant-template-centos6/roles/prepare-brick/tasks/main.yml6
-rw-r--r--tests/vagrant/vagrant-template-centos6/roles/remove-gluster-pkgs/tasks/main.yml4
-rw-r--r--tests/vagrant/vagrant-template-centos6/setup.yml15
-rw-r--r--tests/vagrant/vagrant-template-fedora/Vagrantfile56
-rw-r--r--tests/vagrant/vagrant-template-fedora/roles/daemon-services/tasks/main.yml3
-rw-r--r--tests/vagrant/vagrant-template-fedora/roles/fix-localhost/tasks/main.yml6
-rw-r--r--tests/vagrant/vagrant-template-fedora/roles/install-pkgs/tasks/main.yml85
-rw-r--r--tests/vagrant/vagrant-template-fedora/roles/iptables/tasks/main.yml3
-rw-r--r--tests/vagrant/vagrant-template-fedora/roles/mock-user/tasks/main.yml3
-rw-r--r--tests/vagrant/vagrant-template-fedora/roles/prepare-brick/tasks/main.yml6
-rw-r--r--tests/vagrant/vagrant-template-fedora/roles/remove-gluster-pkgs/tasks/main.yml4
-rw-r--r--tests/vagrant/vagrant-template-fedora/roles/selinux/tasks/main.yml3
-rw-r--r--tests/vagrant/vagrant-template-fedora/setup.yml16
-rw-r--r--tests/volume.rc1002
-rw-r--r--tools/Makefile.am3
-rw-r--r--tools/gfind_missing_files/Makefile.am32
-rw-r--r--tools/gfind_missing_files/gcrawler.c581
-rw-r--r--tools/gfind_missing_files/gfid_to_path.py162
-rw-r--r--tools/gfind_missing_files/gfid_to_path.sh43
-rw-r--r--tools/gfind_missing_files/gfind_missing_files.sh119
-rw-r--r--tools/glusterfind/Makefile.am24
-rwxr-xr-xtools/glusterfind/S57glusterfind-delete-post.py69
-rw-r--r--tools/glusterfind/glusterfind.in18
-rw-r--r--tools/glusterfind/src/Makefile.am16
-rw-r--r--tools/glusterfind/src/__init__.py9
-rw-r--r--tools/glusterfind/src/brickfind.py118
-rw-r--r--tools/glusterfind/src/changelog.py469
-rw-r--r--tools/glusterfind/src/changelogdata.py440
-rw-r--r--tools/glusterfind/src/conf.py31
-rw-r--r--tools/glusterfind/src/gfind_py2py3.py88
-rw-r--r--tools/glusterfind/src/libgfchangelog.py92
-rw-r--r--tools/glusterfind/src/main.py921
-rw-r--r--tools/glusterfind/src/nodeagent.py139
-rw-r--r--tools/glusterfind/src/tool.conf.in10
-rw-r--r--tools/glusterfind/src/utils.py267
-rw-r--r--tools/setgfid2path/Makefile.am5
-rw-r--r--tools/setgfid2path/gluster-setgfid2path.854
-rw-r--r--tools/setgfid2path/src/Makefile.am16
-rw-r--r--tools/setgfid2path/src/main.c130
-rw-r--r--xlators/Makefile.am12
-rw-r--r--xlators/bindings/Makefile.am1
-rw-r--r--xlators/bindings/python/src/Makefile.am19
-rw-r--r--xlators/bindings/python/src/gluster.py47
-rw-r--r--xlators/bindings/python/src/glusterstack.py55
-rw-r--r--xlators/bindings/python/src/glustertypes.py167
-rw-r--r--xlators/bindings/python/src/python.c232
-rw-r--r--xlators/bindings/python/src/testxlator.py56
-rw-r--r--xlators/cluster/Makefile.am2
-rw-r--r--xlators/cluster/afr/src/Makefile.am34
-rw-r--r--xlators/cluster/afr/src/afr-common.c10188
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.c869
-rw-r--r--xlators/cluster/afr/src/afr-dir-read.h21
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.c2586
-rw-r--r--xlators/cluster/afr/src/afr-dir-write.h33
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.c2660
-rw-r--r--xlators/cluster/afr/src/afr-inode-read.h33
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.c3659
-rw-r--r--xlators/cluster/afr/src/afr-inode-write.h81
-rw-r--r--xlators/cluster/afr/src/afr-lk-common.c2738
-rw-r--r--xlators/cluster/afr/src/afr-mem-types.h51
-rw-r--r--xlators/cluster/afr/src/afr-messages.h167
-rw-r--r--xlators/cluster/afr/src/afr-open.c666
-rw-r--r--xlators/cluster/afr/src/afr-read-txn.c494
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.c810
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-algorithm.h32
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.c4581
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-common.h128
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-data.c2006
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-entry.c3389
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-metadata.c997
-rw-r--r--xlators/cluster/afr/src/afr-self-heal-name.c616
-rw-r--r--xlators/cluster/afr/src/afr-self-heal.h371
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.c2479
-rw-r--r--xlators/cluster/afr/src/afr-self-heald.h89
-rw-r--r--xlators/cluster/afr/src/afr-transaction.c3552
-rw-r--r--xlators/cluster/afr/src/afr-transaction.h67
-rw-r--r--xlators/cluster/afr/src/afr.c1710
-rw-r--r--xlators/cluster/afr/src/afr.h2036
-rw-r--r--xlators/cluster/afr/src/pump.c2636
-rw-r--r--xlators/cluster/afr/src/pump.h78
-rw-r--r--xlators/cluster/dht/src/Makefile.am33
-rw-r--r--xlators/cluster/dht/src/dht-common.c13979
-rw-r--r--xlators/cluster/dht/src/dht-common.h1842
-rw-r--r--xlators/cluster/dht/src/dht-diskusage.c700
-rw-r--r--xlators/cluster/dht/src/dht-hashfn.c125
-rw-r--r--xlators/cluster/dht/src/dht-helper.c2705
-rw-r--r--xlators/cluster/dht/src/dht-inode-read.c2143
-rw-r--r--xlators/cluster/dht/src/dht-inode-write.c1674
-rw-r--r--xlators/cluster/dht/src/dht-layout.c1189
-rw-r--r--xlators/cluster/dht/src/dht-linkfile.c406
-rw-r--r--xlators/cluster/dht/src/dht-lock.c1392
-rw-r--r--xlators/cluster/dht/src/dht-lock.h91
-rw-r--r--xlators/cluster/dht/src/dht-mem-types.h37
-rw-r--r--xlators/cluster/dht/src/dht-messages.h386
-rw-r--r--xlators/cluster/dht/src/dht-rebalance.c5580
-rw-r--r--xlators/cluster/dht/src/dht-rename.c2441
-rw-r--r--xlators/cluster/dht/src/dht-selfheal.c3059
-rw-r--r--xlators/cluster/dht/src/dht-shared.c1104
-rw-r--r--xlators/cluster/dht/src/dht.c692
-rw-r--r--xlators/cluster/dht/src/nufa.c1142
-rw-r--r--xlators/cluster/dht/src/switch.c1617
-rw-r--r--xlators/cluster/dht/src/unittest/dht_layout_mock.c73
-rw-r--r--xlators/cluster/dht/src/unittest/dht_layout_unittest.c127
-rw-r--r--xlators/cluster/ec/Makefile.am (renamed from libglusterfsclient/Makefile.am)0
-rw-r--r--xlators/cluster/ec/src/Makefile.am83
-rw-r--r--xlators/cluster/ec/src/ec-code-avx.c109
-rw-r--r--xlators/cluster/ec/src/ec-code-avx.h18
-rw-r--r--xlators/cluster/ec/src/ec-code-c.c11679
-rw-r--r--xlators/cluster/ec/src/ec-code-c.h27
-rw-r--r--xlators/cluster/ec/src/ec-code-intel.c594
-rw-r--r--xlators/cluster/ec/src/ec-code-intel.h191
-rw-r--r--xlators/cluster/ec/src/ec-code-sse.c101
-rw-r--r--xlators/cluster/ec/src/ec-code-sse.h18
-rw-r--r--xlators/cluster/ec/src/ec-code-x64.c144
-rw-r--r--xlators/cluster/ec/src/ec-code-x64.h18
-rw-r--r--xlators/cluster/ec/src/ec-code.c1060
-rw-r--r--xlators/cluster/ec/src/ec-code.h44
-rw-r--r--xlators/cluster/ec/src/ec-combine.c995
-rw-r--r--xlators/cluster/ec/src/ec-combine.h44
-rw-r--r--xlators/cluster/ec/src/ec-common.c3042
-rw-r--r--xlators/cluster/ec/src/ec-common.h234
-rw-r--r--xlators/cluster/ec/src/ec-data.c288
-rw-r--r--xlators/cluster/ec/src/ec-data.h35
-rw-r--r--xlators/cluster/ec/src/ec-dir-read.c647
-rw-r--r--xlators/cluster/ec/src/ec-dir-write.c1487
-rw-r--r--xlators/cluster/ec/src/ec-fops.h254
-rw-r--r--xlators/cluster/ec/src/ec-galois.c183
-rw-r--r--xlators/cluster/ec/src/ec-galois.h32
-rw-r--r--xlators/cluster/ec/src/ec-generic.c1591
-rw-r--r--xlators/cluster/ec/src/ec-gf8.c5882
-rw-r--r--xlators/cluster/ec/src/ec-gf8.h18
-rw-r--r--xlators/cluster/ec/src/ec-heal.c3367
-rw-r--r--xlators/cluster/ec/src/ec-heald.c681
-rw-r--r--xlators/cluster/ec/src/ec-heald.h30
-rw-r--r--xlators/cluster/ec/src/ec-helpers.c867
-rw-r--r--xlators/cluster/ec/src/ec-helpers.h200
-rw-r--r--xlators/cluster/ec/src/ec-inode-read.c2046
-rw-r--r--xlators/cluster/ec/src/ec-inode-write.c2369
-rw-r--r--xlators/cluster/ec/src/ec-locks.c1128
-rw-r--r--xlators/cluster/ec/src/ec-mem-types.h30
-rw-r--r--xlators/cluster/ec/src/ec-messages.h61
-rw-r--r--xlators/cluster/ec/src/ec-method.c433
-rw-r--r--xlators/cluster/ec/src/ec-method.h48
-rw-r--r--xlators/cluster/ec/src/ec-types.h690
-rw-r--r--xlators/cluster/ec/src/ec.c1873
-rw-r--r--xlators/cluster/ec/src/ec.h34
-rw-r--r--xlators/cluster/ha/Makefile.am3
-rw-r--r--xlators/cluster/ha/src/Makefile.am15
-rw-r--r--xlators/cluster/ha/src/ha-helpers.c204
-rw-r--r--xlators/cluster/ha/src/ha-mem-types.h37
-rw-r--r--xlators/cluster/ha/src/ha.c4023
-rw-r--r--xlators/cluster/ha/src/ha.h63
-rw-r--r--xlators/cluster/map/Makefile.am3
-rw-r--r--xlators/cluster/map/src/Makefile.am15
-rw-r--r--xlators/cluster/map/src/map-helper.c358
-rw-r--r--xlators/cluster/map/src/map-mem-types.h35
-rw-r--r--xlators/cluster/map/src/map.c2577
-rw-r--r--xlators/cluster/map/src/map.h77
-rw-r--r--xlators/cluster/stripe/Makefile.am3
-rw-r--r--xlators/cluster/stripe/src/Makefile.am19
-rw-r--r--xlators/cluster/stripe/src/stripe-helpers.c590
-rw-r--r--xlators/cluster/stripe/src/stripe-mem-types.h30
-rw-r--r--xlators/cluster/stripe/src/stripe.c4969
-rw-r--r--xlators/cluster/stripe/src/stripe.h285
-rw-r--r--xlators/cluster/unify/Makefile.am3
-rw-r--r--xlators/cluster/unify/src/Makefile.am16
-rw-r--r--xlators/cluster/unify/src/unify-mem-types.h41
-rw-r--r--xlators/cluster/unify/src/unify-self-heal.c1239
-rw-r--r--xlators/cluster/unify/src/unify.c4589
-rw-r--r--xlators/cluster/unify/src/unify.h146
-rw-r--r--xlators/debug/Makefile.am2
-rw-r--r--xlators/debug/delay-gen/Makefile.am (renamed from scheduler/random/Makefile.am)2
-rw-r--r--xlators/debug/delay-gen/src/Makefile.am11
-rw-r--r--xlators/debug/delay-gen/src/delay-gen-mem-types.h21
-rw-r--r--xlators/debug/delay-gen/src/delay-gen-messages.h26
-rw-r--r--xlators/debug/delay-gen/src/delay-gen.c697
-rw-r--r--xlators/debug/delay-gen/src/delay-gen.h27
-rw-r--r--xlators/debug/error-gen/src/Makefile.am8
-rw-r--r--xlators/debug/error-gen/src/error-gen-mem-types.h26
-rw-r--r--xlators/debug/error-gen/src/error-gen.c2973
-rw-r--r--xlators/debug/error-gen/src/error-gen.h61
-rw-r--r--xlators/debug/io-stats/src/Makefile.am10
-rw-r--r--xlators/debug/io-stats/src/io-stats-mem-types.h38
-rw-r--r--xlators/debug/io-stats/src/io-stats.c5908
-rw-r--r--xlators/debug/sink/Makefile.am (renamed from scheduler/alu/Makefile.am)1
-rw-r--r--xlators/debug/sink/src/Makefile.am14
-rw-r--r--xlators/debug/sink/src/sink.c94
-rw-r--r--xlators/debug/trace/src/Makefile.am9
-rw-r--r--xlators/debug/trace/src/trace-mem-types.h20
-rw-r--r--xlators/debug/trace/src/trace.c4911
-rw-r--r--xlators/debug/trace/src/trace.h55
-rw-r--r--xlators/encryption/Makefile.am3
-rw-r--r--xlators/encryption/rot-13/Makefile.am3
-rw-r--r--xlators/encryption/rot-13/src/rot-13.c210
-rw-r--r--xlators/encryption/rot-13/src/rot-13.h33
-rw-r--r--xlators/features/Makefile.am13
-rw-r--r--xlators/features/arbiter/Makefile.am3
-rw-r--r--xlators/features/arbiter/src/Makefile.am19
-rw-r--r--xlators/features/arbiter/src/arbiter-mem-types.h18
-rw-r--r--xlators/features/arbiter/src/arbiter.c380
-rw-r--r--xlators/features/arbiter/src/arbiter.h21
-rw-r--r--xlators/features/barrier/Makefile.am3
-rw-r--r--xlators/features/barrier/src/Makefile.am17
-rw-r--r--xlators/features/barrier/src/barrier-mem-types.h20
-rw-r--r--xlators/features/barrier/src/barrier.c809
-rw-r--r--xlators/features/barrier/src/barrier.h89
-rw-r--r--xlators/features/bit-rot/Makefile.am (renamed from rpc/rpc-transport/rdma/Makefile.am)0
-rw-r--r--xlators/features/bit-rot/src/Makefile.am1
-rw-r--r--xlators/features/bit-rot/src/bitd/Makefile.am23
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h101
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c78
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h50
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.c2070
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-scrub.h46
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-ssm.c124
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot-ssm.h38
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.c2232
-rw-r--r--xlators/features/bit-rot/src/bitd/bit-rot.h302
-rw-r--r--xlators/features/bit-rot/src/stub/Makefile.am20
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-common.h178
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-object-version.h30
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c796
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h36
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h117
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.c3590
-rw-r--r--xlators/features/bit-rot/src/stub/bit-rot-stub.h515
-rw-r--r--xlators/features/changelog/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/Makefile.am3
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes-multi.c90
-rw-r--r--xlators/features/changelog/lib/examples/c/get-changes.c93
-rw-r--r--xlators/features/changelog/lib/examples/c/get-history.c116
-rwxr-xr-xxlators/features/changelog/lib/examples/python/changes.py34
-rw-r--r--xlators/features/changelog/lib/examples/python/libgfchangelog.py71
-rw-r--r--xlators/features/changelog/lib/src/Makefile.am35
-rw-r--r--xlators/features/changelog/lib/src/changelog-lib-messages.h74
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-api.c224
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.c170
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-helpers.h255
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-journal-handler.c1029
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-journal.h116
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-reborp.c413
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-rpc.c98
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog-rpc.h28
-rw-r--r--xlators/features/changelog/lib/src/gf-changelog.c652
-rw-r--r--xlators/features/changelog/lib/src/gf-history-changelog.c1020
-rw-r--r--xlators/features/changelog/src/Makefile.am29
-rw-r--r--xlators/features/changelog/src/changelog-barrier.c131
-rw-r--r--xlators/features/changelog/src/changelog-encoders.c232
-rw-r--r--xlators/features/changelog/src/changelog-encoders.h50
-rw-r--r--xlators/features/changelog/src/changelog-ev-handle.c412
-rw-r--r--xlators/features/changelog/src/changelog-ev-handle.h136
-rw-r--r--xlators/features/changelog/src/changelog-helpers.c1977
-rw-r--r--xlators/features/changelog/src/changelog-helpers.h716
-rw-r--r--xlators/features/changelog/src/changelog-mem-types.h34
-rw-r--r--xlators/features/changelog/src/changelog-messages.h172
-rw-r--r--xlators/features/changelog/src/changelog-misc.h131
-rw-r--r--xlators/features/changelog/src/changelog-rpc-common.c359
-rw-r--r--xlators/features/changelog/src/changelog-rpc-common.h85
-rw-r--r--xlators/features/changelog/src/changelog-rpc.c440
-rw-r--r--xlators/features/changelog/src/changelog-rpc.h31
-rw-r--r--xlators/features/changelog/src/changelog-rt.c66
-rw-r--r--xlators/features/changelog/src/changelog-rt.h33
-rw-r--r--xlators/features/changelog/src/changelog.c2989
-rw-r--r--xlators/features/cloudsync/Makefile.am3
-rw-r--r--xlators/features/cloudsync/src/Makefile.am46
-rw-r--r--xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c30
-rw-r--r--xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h24
-rw-r--r--xlators/features/cloudsync/src/cloudsync-common.c60
-rw-r--r--xlators/features/cloudsync/src/cloudsync-common.h134
-rwxr-xr-xxlators/features/cloudsync/src/cloudsync-fops-c.py324
-rwxr-xr-xxlators/features/cloudsync/src/cloudsync-fops-h.py31
-rw-r--r--xlators/features/cloudsync/src/cloudsync-mem-types.h22
-rw-r--r--xlators/features/cloudsync/src/cloudsync-messages.h16
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/Makefile.am3
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am11
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/Makefile.am3
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile.am12
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h19
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c584
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h50
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.sym1
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am3
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am12
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h203
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h30
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym1
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h19
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c842
-rw-r--r--xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h84
-rw-r--r--xlators/features/cloudsync/src/cloudsync.c2076
-rw-r--r--xlators/features/cloudsync/src/cloudsync.h123
-rw-r--r--xlators/features/compress/Makefile.am3
-rw-r--r--xlators/features/compress/src/Makefile.am19
-rw-r--r--xlators/features/compress/src/cdc-helper.c527
-rw-r--r--xlators/features/compress/src/cdc-mem-types.h23
-rw-r--r--xlators/features/compress/src/cdc.c348
-rw-r--r--xlators/features/compress/src/cdc.h99
-rw-r--r--xlators/features/filter/Makefile.am3
-rw-r--r--xlators/features/filter/src/Makefile.am15
-rw-r--r--xlators/features/filter/src/filter-mem-types.h30
-rw-r--r--xlators/features/filter/src/filter.c1744
-rw-r--r--xlators/features/gfid-access/Makefile.am (renamed from xlators/bindings/python/Makefile.am)0
-rw-r--r--xlators/features/gfid-access/src/Makefile.am16
-rw-r--r--xlators/features/gfid-access/src/gfid-access-mem-types.h22
-rw-r--r--xlators/features/gfid-access/src/gfid-access.c1420
-rw-r--r--xlators/features/gfid-access/src/gfid-access.h107
-rw-r--r--xlators/features/index/src/Makefile.am14
-rw-r--r--xlators/features/index/src/index-mem-types.h37
-rw-r--r--xlators/features/index/src/index-messages.h33
-rw-r--r--xlators/features/index/src/index.c3256
-rw-r--r--xlators/features/index/src/index.h110
-rw-r--r--xlators/features/leases/Makefile.am3
-rw-r--r--xlators/features/leases/src/Makefile.am20
-rw-r--r--xlators/features/leases/src/leases-internal.c1412
-rw-r--r--xlators/features/leases/src/leases-mem-types.h27
-rw-r--r--xlators/features/leases/src/leases-messages.h33
-rw-r--r--xlators/features/leases/src/leases.c1168
-rw-r--r--xlators/features/leases/src/leases.h259
-rw-r--r--xlators/features/locks/src/Makefile.am19
-rw-r--r--xlators/features/locks/src/clear.c718
-rw-r--r--xlators/features/locks/src/clear.h93
-rw-r--r--xlators/features/locks/src/common.c1973
-rw-r--r--xlators/features/locks/src/common.h280
-rw-r--r--xlators/features/locks/src/entrylk.c1506
-rw-r--r--xlators/features/locks/src/inodelk.c1514
-rw-r--r--xlators/features/locks/src/locks-mem-types.h43
-rw-r--r--xlators/features/locks/src/locks.h348
-rw-r--r--xlators/features/locks/src/pl-messages.h29
-rw-r--r--xlators/features/locks/src/posix.c6032
-rw-r--r--xlators/features/locks/src/reservelk.c611
-rw-r--r--xlators/features/locks/tests/unit-test.c118
-rw-r--r--xlators/features/mac-compat/Makefile.am3
-rw-r--r--xlators/features/mac-compat/src/Makefile.am13
-rw-r--r--xlators/features/mac-compat/src/mac-compat.c248
-rw-r--r--xlators/features/marker/Makefile.am2
-rw-r--r--xlators/features/marker/src/Makefile.am19
-rw-r--r--xlators/features/marker/src/marker-common.c109
-rw-r--r--xlators/features/marker/src/marker-common.h37
-rw-r--r--xlators/features/marker/src/marker-mem-types.h41
-rw-r--r--xlators/features/marker/src/marker-quota-helper.c599
-rw-r--r--xlators/features/marker/src/marker-quota-helper.h103
-rw-r--r--xlators/features/marker/src/marker-quota.c4039
-rw-r--r--xlators/features/marker/src/marker-quota.h213
-rw-r--r--xlators/features/marker/src/marker.c4496
-rw-r--r--xlators/features/marker/src/marker.h239
-rw-r--r--xlators/features/marker/utils/Makefile.am3
-rw-r--r--xlators/features/marker/utils/src/Makefile.am22
-rw-r--r--xlators/features/marker/utils/src/gsyncd.c345
-rw-r--r--xlators/features/marker/utils/src/procdiggy.c129
-rw-r--r--xlators/features/marker/utils/src/procdiggy.h30
-rw-r--r--xlators/features/marker/utils/syncdaemon/Makefile.am6
-rw-r--r--xlators/features/marker/utils/syncdaemon/__init__.py0
-rw-r--r--xlators/features/marker/utils/syncdaemon/configinterface.py224
-rw-r--r--xlators/features/marker/utils/syncdaemon/gconf.py20
-rw-r--r--xlators/features/marker/utils/syncdaemon/gsyncd.py400
-rw-r--r--xlators/features/marker/utils/syncdaemon/libcxattr.py72
-rw-r--r--xlators/features/marker/utils/syncdaemon/master.py886
-rw-r--r--xlators/features/marker/utils/syncdaemon/monitor.py129
-rw-r--r--xlators/features/marker/utils/syncdaemon/resource.py945
-rw-r--r--xlators/features/marker/utils/syncdaemon/syncdutils.py282
-rw-r--r--xlators/features/metadisp/Makefile.am3
-rw-r--r--xlators/features/metadisp/src/Makefile.am38
-rw-r--r--xlators/features/metadisp/src/backend.c45
-rw-r--r--xlators/features/metadisp/src/fops-tmpl.c10
-rw-r--r--xlators/features/metadisp/src/gen-fops.py160
-rw-r--r--xlators/features/metadisp/src/metadisp-create.c101
-rw-r--r--xlators/features/metadisp/src/metadisp-fops.h51
-rw-r--r--xlators/features/metadisp/src/metadisp-fsync.c54
-rw-r--r--xlators/features/metadisp/src/metadisp-lookup.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-open.c70
-rw-r--r--xlators/features/metadisp/src/metadisp-readdir.c65
-rw-r--r--xlators/features/metadisp/src/metadisp-setattr.c90
-rw-r--r--xlators/features/metadisp/src/metadisp-stat.c124
-rw-r--r--xlators/features/metadisp/src/metadisp-unlink.c160
-rw-r--r--xlators/features/metadisp/src/metadisp.c46
-rw-r--r--xlators/features/metadisp/src/metadisp.h45
-rw-r--r--xlators/features/namespace/Makefile.am3
-rw-r--r--xlators/features/namespace/src/Makefile.am17
-rw-r--r--xlators/features/namespace/src/namespace.c1344
-rw-r--r--xlators/features/namespace/src/namespace.h23
-rw-r--r--xlators/features/path-convertor/Makefile.am3
-rw-r--r--xlators/features/path-convertor/src/Makefile.am14
-rw-r--r--xlators/features/path-convertor/src/path-mem-types.h32
-rw-r--r--xlators/features/path-convertor/src/path.c1239
-rw-r--r--xlators/features/quiesce/src/Makefile.am10
-rw-r--r--xlators/features/quiesce/src/quiesce-mem-types.h27
-rw-r--r--xlators/features/quiesce/src/quiesce-messages.h28
-rw-r--r--xlators/features/quiesce/src/quiesce.c3401
-rw-r--r--xlators/features/quiesce/src/quiesce.h88
-rw-r--r--xlators/features/quota/src/Makefile.am29
-rw-r--r--xlators/features/quota/src/quota-enforcer-client.c503
-rw-r--r--xlators/features/quota/src/quota-mem-types.h45
-rw-r--r--xlators/features/quota/src/quota-messages.h39
-rw-r--r--xlators/features/quota/src/quota.c6961
-rw-r--r--xlators/features/quota/src/quota.h389
-rw-r--r--xlators/features/quota/src/quotad-aggregator.c494
-rw-r--r--xlators/features/quota/src/quotad-aggregator.h38
-rw-r--r--xlators/features/quota/src/quotad-helpers.c107
-rw-r--r--xlators/features/quota/src/quotad-helpers.h24
-rw-r--r--xlators/features/quota/src/quotad.c245
-rw-r--r--xlators/features/read-only/src/Makefile.am14
-rw-r--r--xlators/features/read-only/src/read-only-common.c435
-rw-r--r--xlators/features/read-only/src/read-only-common.h140
-rw-r--r--xlators/features/read-only/src/read-only-mem-types.h20
-rw-r--r--xlators/features/read-only/src/read-only.c191
-rw-r--r--xlators/features/read-only/src/read-only.h37
-rw-r--r--xlators/features/read-only/src/worm-helper.c395
-rw-r--r--xlators/features/read-only/src/worm-helper.h44
-rw-r--r--xlators/features/read-only/src/worm.c750
-rw-r--r--xlators/features/sdfs/Makefile.am3
-rw-r--r--xlators/features/sdfs/src/Makefile.am19
-rw-r--r--xlators/features/sdfs/src/sdfs-messages.h67
-rw-r--r--xlators/features/sdfs/src/sdfs.c1479
-rw-r--r--xlators/features/sdfs/src/sdfs.h49
-rw-r--r--xlators/features/selinux/Makefile.am3
-rw-r--r--xlators/features/selinux/src/Makefile.am20
-rw-r--r--xlators/features/selinux/src/selinux-mem-types.h19
-rw-r--r--xlators/features/selinux/src/selinux-messages.h30
-rw-r--r--xlators/features/selinux/src/selinux.c323
-rw-r--r--xlators/features/selinux/src/selinux.h24
-rw-r--r--xlators/features/shard/Makefile.am3
-rw-r--r--xlators/features/shard/src/Makefile.am17
-rw-r--r--xlators/features/shard/src/shard-mem-types.h24
-rw-r--r--xlators/features/shard/src/shard-messages.h39
-rw-r--r--xlators/features/shard/src/shard.c7382
-rw-r--r--xlators/features/shard/src/shard.h348
-rw-r--r--xlators/features/snapview-client/Makefile.am1
-rw-r--r--xlators/features/snapview-client/src/Makefile.am16
-rw-r--r--xlators/features/snapview-client/src/snapview-client-mem-types.h24
-rw-r--r--xlators/features/snapview-client/src/snapview-client-messages.h71
-rw-r--r--xlators/features/snapview-client/src/snapview-client.c2791
-rw-r--r--xlators/features/snapview-client/src/snapview-client.h101
-rw-r--r--xlators/features/snapview-server/Makefile.am1
-rw-r--r--xlators/features/snapview-server/src/Makefile.am25
-rw-r--r--xlators/features/snapview-server/src/snapview-server-helpers.c715
-rw-r--r--xlators/features/snapview-server/src/snapview-server-mem-types.h25
-rw-r--r--xlators/features/snapview-server/src/snapview-server-messages.h54
-rw-r--r--xlators/features/snapview-server/src/snapview-server-mgmt.c524
-rw-r--r--xlators/features/snapview-server/src/snapview-server.c2720
-rw-r--r--xlators/features/snapview-server/src/snapview-server.h255
-rw-r--r--xlators/features/thin-arbiter/Makefile.am3
-rw-r--r--xlators/features/thin-arbiter/src/Makefile.am22
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h19
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter-messages.h28
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter.c661
-rw-r--r--xlators/features/thin-arbiter/src/thin-arbiter.h59
-rw-r--r--xlators/features/trash/src/Makefile.am12
-rw-r--r--xlators/features/trash/src/trash-mem-types.h32
-rw-r--r--xlators/features/trash/src/trash.c3749
-rw-r--r--xlators/features/trash/src/trash.h128
-rw-r--r--xlators/features/upcall/Makefile.am3
-rw-r--r--xlators/features/upcall/src/Makefile.am23
-rw-r--r--xlators/features/upcall/src/upcall-cache-invalidation.h18
-rw-r--r--xlators/features/upcall/src/upcall-internal.c689
-rw-r--r--xlators/features/upcall/src/upcall-mem-types.h23
-rw-r--r--xlators/features/upcall/src/upcall-messages.h29
-rw-r--r--xlators/features/upcall/src/upcall.c2505
-rw-r--r--xlators/features/upcall/src/upcall.h131
-rw-r--r--xlators/features/utime/Makefile.am3
-rw-r--r--xlators/features/utime/src/Makefile.am41
-rw-r--r--xlators/features/utime/src/utime-autogen-fops-tmpl.c28
-rw-r--r--xlators/features/utime/src/utime-autogen-fops-tmpl.h22
-rwxr-xr-xxlators/features/utime/src/utime-gen-fops-c.py147
-rwxr-xr-xxlators/features/utime/src/utime-gen-fops-h.py35
-rw-r--r--xlators/features/utime/src/utime-helpers.c110
-rw-r--r--xlators/features/utime/src/utime-helpers.h25
-rw-r--r--xlators/features/utime/src/utime-mem-types.h21
-rw-r--r--xlators/features/utime/src/utime-messages.h29
-rw-r--r--xlators/features/utime/src/utime.c392
-rw-r--r--xlators/features/utime/src/utime.h23
-rw-r--r--xlators/lib/src/libxlator.c745
-rw-r--r--xlators/lib/src/libxlator.h204
-rw-r--r--xlators/meta/Makefile.am2
-rw-r--r--xlators/meta/src/Makefile.am48
-rw-r--r--xlators/meta/src/active-link.c34
-rw-r--r--xlators/meta/src/cmdline-file.c39
-rw-r--r--xlators/meta/src/frames-file.c107
-rw-r--r--xlators/meta/src/graph-dir.c98
-rw-r--r--xlators/meta/src/graphs-dir.c67
-rw-r--r--xlators/meta/src/history-file.c44
-rw-r--r--xlators/meta/src/logfile-link.c34
-rw-r--r--xlators/meta/src/logging-dir.c44
-rw-r--r--xlators/meta/src/loglevel-file.c50
-rw-r--r--xlators/meta/src/mallinfo-file.c36
-rw-r--r--xlators/meta/src/measure-file.c49
-rw-r--r--xlators/meta/src/meminfo-file.c44
-rw-r--r--xlators/meta/src/meta-defaults.c655
-rw-r--r--xlators/meta/src/meta-helpers.c332
-rw-r--r--xlators/meta/src/meta-hooks.h48
-rw-r--r--xlators/meta/src/meta-mem-types.h36
-rw-r--r--xlators/meta/src/meta.c1355
-rw-r--r--xlators/meta/src/meta.h162
-rw-r--r--xlators/meta/src/misc.c67
-rw-r--r--xlators/meta/src/misc.h31
-rw-r--r--xlators/meta/src/name-file.c44
-rw-r--r--xlators/meta/src/option-file.c45
-rw-r--r--xlators/meta/src/options-dir.c65
-rw-r--r--xlators/meta/src/private-file.c44
-rw-r--r--xlators/meta/src/process_uuid-file.c37
-rw-r--r--xlators/meta/src/profile-file.c44
-rw-r--r--xlators/meta/src/root-dir.c77
-rw-r--r--xlators/meta/src/subvolume-link.c56
-rw-r--r--xlators/meta/src/subvolumes-dir.c62
-rw-r--r--xlators/meta/src/top-link.c40
-rw-r--r--xlators/meta/src/tree.c179
-rw-r--r--xlators/meta/src/tree.h35
-rw-r--r--xlators/meta/src/type-file.c44
-rw-r--r--xlators/meta/src/version-file.c37
-rw-r--r--xlators/meta/src/view-dir.c33
-rw-r--r--xlators/meta/src/view.c258
-rw-r--r--xlators/meta/src/view.h32
-rw-r--r--xlators/meta/src/volfile-file.c79
-rw-r--r--xlators/meta/src/xlator-dir.c97
-rw-r--r--xlators/mgmt/glusterd/src/Makefile.am79
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitd-svc.c206
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitd-svc.h40
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-bitrot.c822
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-brick-ops.c3817
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-helper.c21
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-helper.h21
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c191
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h53
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-errno.h33
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-ganesha.c927
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.c7850
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-geo-rep.h52
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c235
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.h51
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c478
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h47
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handler.c8291
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-handshake.c2855
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.c971
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-hooks.h111
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.c870
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-locks.h57
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-log-ops.c480
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mem-types.h115
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-messages.h451
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c1144
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.c3114
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mgmt.h97
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.c1213
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-mountbroker.h57
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-nfs-svc.c228
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-nfs-svc.h27
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.c11003
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-op-sm.h369
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-peer-utils.c1058
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-peer-utils.h82
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.c886
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-pmap.h85
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c152
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-proc-mgmt.h44
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.c2736
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quota.h17
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quotad-svc.c217
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-quotad-svc.h31
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rcu.h36
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rebalance.c1807
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-replace-brick.c2413
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-reset-brick.c376
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-rpc-ops.c3880
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-scrub-svc.c207
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-scrub-svc.h45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-server-quorum.c486
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-server-quorum.h46
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c153
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.c796
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-shd-svc.h45
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.c2238
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-sm.h272
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.c75
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.h32
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.c478
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapd-svc.h42
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c4290
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h169
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-snapshot.c10087
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-statedump.c243
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-statedump.h18
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.c6509
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-store.h264
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.c1047
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-helper.h72
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c536
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h112
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.c2285
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-syncop.h131
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-tierd-svc-helper.c207
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.c17926
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-utils.h923
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.c8627
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volgen.h371
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-ops.c4453
-rw-r--r--xlators/mgmt/glusterd/src/glusterd-volume-set.c3146
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.c2977
-rw-r--r--xlators/mgmt/glusterd/src/glusterd.h1566
-rw-r--r--xlators/mount/fuse/src/Makefile.am16
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.c9681
-rw-r--r--xlators/mount/fuse/src/fuse-bridge.h758
-rw-r--r--xlators/mount/fuse/src/fuse-helpers.c970
-rw-r--r--xlators/mount/fuse/src/fuse-mem-types.h44
-rw-r--r--xlators/mount/fuse/src/fuse-resolve.c940
-rw-r--r--xlators/mount/fuse/utils/Makefile.am9
-rwxr-xr-xxlators/mount/fuse/utils/mount.glusterfs.in935
-rwxr-xr-xxlators/mount/fuse/utils/mount_glusterfs.in618
-rw-r--r--xlators/nfs/server/src/Makefile.am35
-rw-r--r--xlators/nfs/server/src/acl3.c933
-rw-r--r--xlators/nfs/server/src/acl3.h42
-rw-r--r--xlators/nfs/server/src/auth-cache.c496
-rw-r--r--xlators/nfs/server/src/auth-cache.h52
-rw-r--r--xlators/nfs/server/src/exports.c1484
-rw-r--r--xlators/nfs/server/src/exports.h93
-rw-r--r--xlators/nfs/server/src/mount3-auth.c642
-rw-r--r--xlators/nfs/server/src/mount3-auth.h59
-rw-r--r--xlators/nfs/server/src/mount3.c5270
-rw-r--r--xlators/nfs/server/src/mount3.h210
-rw-r--r--xlators/nfs/server/src/mount3udp_svc.c329
-rw-r--r--xlators/nfs/server/src/netgroups.c1161
-rw-r--r--xlators/nfs/server/src/netgroups.h53
-rw-r--r--xlators/nfs/server/src/nfs-common.c648
-rw-r--r--xlators/nfs/server/src/nfs-common.h74
-rw-r--r--xlators/nfs/server/src/nfs-fops.c2366
-rw-r--r--xlators/nfs/server/src/nfs-fops.h322
-rw-r--r--xlators/nfs/server/src/nfs-generics.c354
-rw-r--r--xlators/nfs/server/src/nfs-generics.h168
-rw-r--r--xlators/nfs/server/src/nfs-inodes.c790
-rw-r--r--xlators/nfs/server/src/nfs-inodes.h82
-rw-r--r--xlators/nfs/server/src/nfs-mem-types.h81
-rw-r--r--xlators/nfs/server/src/nfs-messages.h102
-rw-r--r--xlators/nfs/server/src/nfs.c2840
-rw-r--r--xlators/nfs/server/src/nfs.h202
-rw-r--r--xlators/nfs/server/src/nfs3-fh.c357
-rw-r--r--xlators/nfs/server/src/nfs3-fh.h135
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.c4999
-rw-r--r--xlators/nfs/server/src/nfs3-helpers.h297
-rw-r--r--xlators/nfs/server/src/nfs3.c9034
-rw-r--r--xlators/nfs/server/src/nfs3.h399
-rw-r--r--xlators/nfs/server/src/nfsserver.sym12
-rw-r--r--xlators/nfs/server/src/nlm4.c4223
-rw-r--r--xlators/nfs/server/src/nlm4.h124
-rw-r--r--xlators/nfs/server/src/nlmcbk_svc.c182
-rw-r--r--xlators/performance/Makefile.am3
-rw-r--r--xlators/performance/io-cache/src/Makefile.am11
-rw-r--r--xlators/performance/io-cache/src/io-cache-messages.h69
-rw-r--r--xlators/performance/io-cache/src/io-cache.c3002
-rw-r--r--xlators/performance/io-cache/src/io-cache.h408
-rw-r--r--xlators/performance/io-cache/src/ioc-inode.c287
-rw-r--r--xlators/performance/io-cache/src/ioc-mem-types.h24
-rw-r--r--xlators/performance/io-cache/src/page.c1443
-rw-r--r--xlators/performance/io-threads/src/Makefile.am10
-rw-r--r--xlators/performance/io-threads/src/io-threads-messages.h41
-rw-r--r--xlators/performance/io-threads/src/io-threads.c3316
-rw-r--r--xlators/performance/io-threads/src/io-threads.h107
-rw-r--r--xlators/performance/io-threads/src/iot-mem-types.h9
-rw-r--r--xlators/performance/md-cache/src/Makefile.am14
-rw-r--r--xlators/performance/md-cache/src/md-cache-mem-types.h13
-rw-r--r--xlators/performance/md-cache/src/md-cache-messages.h29
-rw-r--r--xlators/performance/md-cache/src/md-cache.c4509
-rw-r--r--xlators/performance/nl-cache/Makefile.am3
-rw-r--r--xlators/performance/nl-cache/src/Makefile.am12
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-helper.c1201
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-mem-types.h27
-rw-r--r--xlators/performance/nl-cache/src/nl-cache-messages.h29
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.c840
-rw-r--r--xlators/performance/nl-cache/src/nl-cache.h175
-rw-r--r--xlators/performance/open-behind/Makefile.am1
-rw-r--r--xlators/performance/open-behind/src/Makefile.am16
-rw-r--r--xlators/performance/open-behind/src/open-behind-mem-types.h22
-rw-r--r--xlators/performance/open-behind/src/open-behind-messages.h32
-rw-r--r--xlators/performance/open-behind/src/open-behind.c1101
-rw-r--r--xlators/performance/quick-read/src/Makefile.am10
-rw-r--r--xlators/performance/quick-read/src/quick-read-mem-types.h15
-rw-r--r--xlators/performance/quick-read/src/quick-read-messages.h31
-rw-r--r--xlators/performance/quick-read/src/quick-read.c4780
-rw-r--r--xlators/performance/quick-read/src/quick-read.h131
-rw-r--r--xlators/performance/read-ahead/src/Makefile.am10
-rw-r--r--xlators/performance/read-ahead/src/page.c846
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-mem-types.h17
-rw-r--r--xlators/performance/read-ahead/src/read-ahead-messages.h31
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.c1775
-rw-r--r--xlators/performance/read-ahead/src/read-ahead.h183
-rw-r--r--xlators/performance/readdir-ahead/Makefile.am3
-rw-r--r--xlators/performance/readdir-ahead/src/Makefile.am18
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-mem-types.h24
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead-messages.h30
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.c1382
-rw-r--r--xlators/performance/readdir-ahead/src/readdir-ahead.h98
-rw-r--r--xlators/performance/symlink-cache/Makefile.am3
-rw-r--r--xlators/performance/symlink-cache/src/Makefile.am12
-rw-r--r--xlators/performance/symlink-cache/src/symlink-cache.c400
-rw-r--r--xlators/performance/write-behind/src/Makefile.am10
-rw-r--r--xlators/performance/write-behind/src/write-behind-mem-types.h15
-rw-r--r--xlators/performance/write-behind/src/write-behind-messages.h31
-rw-r--r--xlators/performance/write-behind/src/write-behind.c5075
-rw-r--r--xlators/playground/Makefile.am2
-rw-r--r--xlators/playground/rot-13/Makefile.am (renamed from mod_glusterfs/apache/1.3/Makefile.am)0
-rw-r--r--xlators/playground/rot-13/src/Makefile.am (renamed from xlators/encryption/rot-13/src/Makefile.am)8
-rw-r--r--xlators/playground/rot-13/src/rot-13.c166
-rw-r--r--xlators/playground/rot-13/src/rot-13.h18
-rw-r--r--xlators/playground/template/Makefile.am2
-rw-r--r--xlators/playground/template/src/Makefile.am17
-rw-r--r--xlators/playground/template/src/template.c186
-rw-r--r--xlators/playground/template/src/template.h43
-rw-r--r--xlators/protocol/auth/addr/src/Makefile.am8
-rw-r--r--xlators/protocol/auth/addr/src/addr.c492
-rw-r--r--xlators/protocol/auth/login/src/Makefile.am10
-rw-r--r--xlators/protocol/auth/login/src/login.c300
-rw-r--r--xlators/protocol/client/src/Makefile.am18
-rw-r--r--xlators/protocol/client/src/client-callback.c316
-rw-r--r--xlators/protocol/client/src/client-common.c3589
-rw-r--r--xlators/protocol/client/src/client-common.h630
-rw-r--r--xlators/protocol/client/src/client-handshake.c2842
-rw-r--r--xlators/protocol/client/src/client-helpers.c1003
-rw-r--r--xlators/protocol/client/src/client-lk.c1106
-rw-r--r--xlators/protocol/client/src/client-mem-types.h17
-rw-r--r--xlators/protocol/client/src/client-messages.h174
-rw-r--r--xlators/protocol/client/src/client-rpc-fops.c6079
-rw-r--r--xlators/protocol/client/src/client-rpc-fops_v2.c6177
-rw-r--r--xlators/protocol/client/src/client.c4365
-rw-r--r--xlators/protocol/client/src/client.h545
-rw-r--r--xlators/protocol/client/src/client3_1-fops.c6005
-rw-r--r--xlators/protocol/legacy/Makefile.am3
-rw-r--r--xlators/protocol/legacy/client/Makefile.am3
-rw-r--r--xlators/protocol/legacy/client/src/Makefile.am21
-rw-r--r--xlators/protocol/legacy/client/src/client-mem-types.h43
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.c6683
-rw-r--r--xlators/protocol/legacy/client/src/client-protocol.h178
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.c196
-rw-r--r--xlators/protocol/legacy/client/src/saved-frames.h79
-rw-r--r--xlators/protocol/legacy/lib/Makefile.am3
-rw-r--r--xlators/protocol/legacy/lib/src/Makefile.am14
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.c108
-rw-r--r--xlators/protocol/legacy/lib/src/protocol.h1118
-rw-r--r--xlators/protocol/legacy/lib/src/transport.c422
-rw-r--r--xlators/protocol/legacy/lib/src/transport.h106
-rw-r--r--xlators/protocol/legacy/server/Makefile.am3
-rw-r--r--xlators/protocol/legacy/server/src/Makefile.am26
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.c249
-rw-r--r--xlators/protocol/legacy/server/src/authenticate.h60
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.c622
-rw-r--r--xlators/protocol/legacy/server/src/server-helpers.h48
-rw-r--r--xlators/protocol/legacy/server/src/server-mem-types.h39
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.c6587
-rw-r--r--xlators/protocol/legacy/server/src/server-protocol.h191
-rw-r--r--xlators/protocol/legacy/server/src/server-resolve.c658
-rw-r--r--xlators/protocol/legacy/transport/Makefile.am3
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs-mem-types.h39
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.c2625
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/ib-verbs.h220
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.c712
-rw-r--r--xlators/protocol/legacy/transport/ib-verbs/src/name.h47
-rw-r--r--xlators/protocol/legacy/transport/socket/Makefile.am1
-rw-r--r--xlators/protocol/legacy/transport/socket/src/Makefile.am19
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.c740
-rw-r--r--xlators/protocol/legacy/transport/socket/src/name.h44
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket-mem-types.h36
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.c1625
-rw-r--r--xlators/protocol/legacy/transport/socket/src/socket.h129
-rw-r--r--xlators/protocol/server/src/Makefile.am30
-rw-r--r--xlators/protocol/server/src/authenticate.c387
-rw-r--r--xlators/protocol/server/src/authenticate.h58
-rw-r--r--xlators/protocol/server/src/server-common.c842
-rw-r--r--xlators/protocol/server/src/server-common.h199
-rw-r--r--xlators/protocol/server/src/server-handshake.c1406
-rw-r--r--xlators/protocol/server/src/server-helpers.c2418
-rw-r--r--xlators/protocol/server/src/server-helpers.h133
-rw-r--r--xlators/protocol/server/src/server-mem-types.h42
-rw-r--r--xlators/protocol/server/src/server-messages.h168
-rw-r--r--xlators/protocol/server/src/server-resolve.c929
-rw-r--r--xlators/protocol/server/src/server-rpc-fops.c6084
-rw-r--r--xlators/protocol/server/src/server-rpc-fops_v2.c6031
-rw-r--r--xlators/protocol/server/src/server.c2704
-rw-r--r--xlators/protocol/server/src/server.h335
-rw-r--r--xlators/protocol/server/src/server3_1-fops.c5744
-rw-r--r--xlators/storage/Makefile.am2
-rw-r--r--xlators/storage/bdb/Makefile.am3
-rw-r--r--xlators/storage/bdb/src/Makefile.am18
-rw-r--r--xlators/storage/bdb/src/bctx.c341
-rw-r--r--xlators/storage/bdb/src/bdb-ll.c1464
-rw-r--r--xlators/storage/bdb/src/bdb-mem-types.h42
-rw-r--r--xlators/storage/bdb/src/bdb.c3603
-rw-r--r--xlators/storage/bdb/src/bdb.h530
-rw-r--r--xlators/storage/posix/src/Makefile.am25
-rw-r--r--xlators/storage/posix/src/posix-aio.c556
-rw-r--r--xlators/storage/posix/src/posix-aio.h34
-rw-r--r--xlators/storage/posix/src/posix-common.c1524
-rw-r--r--xlators/storage/posix/src/posix-entry-ops.c2496
-rw-r--r--xlators/storage/posix/src/posix-gfid-path.c243
-rw-r--r--xlators/storage/posix/src/posix-gfid-path.h28
-rw-r--r--xlators/storage/posix/src/posix-handle.c1463
-rw-r--r--xlators/storage/posix/src/posix-handle.h336
-rw-r--r--xlators/storage/posix/src/posix-helpers.c4143
-rw-r--r--xlators/storage/posix/src/posix-inode-fd-ops.c6004
-rw-r--r--xlators/storage/posix/src/posix-inode-handle.h118
-rw-r--r--xlators/storage/posix/src/posix-mem-types.h39
-rw-r--r--xlators/storage/posix/src/posix-messages.h74
-rw-r--r--xlators/storage/posix/src/posix-metadata-disk.h31
-rw-r--r--xlators/storage/posix/src/posix-metadata.c916
-rw-r--r--xlators/storage/posix/src/posix-metadata.h71
-rw-r--r--xlators/storage/posix/src/posix.c4369
-rw-r--r--xlators/storage/posix/src/posix.h733
-rw-r--r--xlators/system/posix-acl/src/Makefile.am17
-rw-r--r--xlators/system/posix-acl/src/posix-acl-mem-types.h23
-rw-r--r--xlators/system/posix-acl/src/posix-acl-messages.h28
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.c253
-rw-r--r--xlators/system/posix-acl/src/posix-acl-xattr.h53
-rw-r--r--xlators/system/posix-acl/src/posix-acl.c3186
-rw-r--r--xlators/system/posix-acl/src/posix-acl.h104
-rw-r--r--xlators/xlator.sym1
2631 files changed, 674109 insertions, 346045 deletions
diff --git a/.clang-format b/.clang-format
new file mode 100644
index 00000000000..84c2efe3fad
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,107 @@
+---
+Language: Cpp
+# BasedOnStyle: Chromium
+AccessModifierOffset: -1
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: false
+AlignConsecutiveDeclarations: false
+AlignEscapedNewlines: Right
+AlignOperands: true
+AlignTrailingComments: true
+AllowAllParametersOfDeclarationOnNextLine: false
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: Inline
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: All
+AlwaysBreakAfterReturnType: All
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+BinPackArguments: true
+BinPackParameters: true
+BraceWrapping:
+ AfterClass: false
+ AfterControlStatement: false
+ AfterEnum: false
+ AfterFunction: true
+ AfterNamespace: false
+ AfterObjCDeclaration: false
+ AfterStruct: false
+ AfterUnion: false
+ BeforeCatch: false
+ BeforeElse: false
+ IndentBraces: false
+ SplitEmptyFunction: true
+ SplitEmptyRecord: true
+ SplitEmptyNamespace: true
+BreakBeforeBinaryOperators: None
+BreakBeforeBraces: Linux
+BreakBeforeInheritanceComma: false
+BreakBeforeTernaryOperators: true
+BreakConstructorInitializersBeforeComma: false
+BreakConstructorInitializers: BeforeColon
+BreakAfterJavaFieldAnnotations: false
+BreakStringLiterals: true
+ColumnLimit: 80
+CommentPragmas: '^ IWYU pragma:'
+CompactNamespaces: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+ConstructorInitializerIndentWidth: 4
+ContinuationIndentWidth: 4
+Cpp11BracedListStyle: true
+DerivePointerAlignment: false
+DisableFormat: false
+ExperimentalAutoDetectBinPacking: false
+FixNamespaceComments: true
+ForEachMacros:
+ - foreach
+ - Q_FOREACH
+ - BOOST_FOREACH
+IncludeCategories:
+ - Regex: '^<.*\.h>'
+ Priority: 1
+ - Regex: '^<.*'
+ Priority: 2
+ - Regex: '.*'
+ Priority: 3
+IncludeIsMainRegex: '([-_](test|unittest))?$'
+IndentCaseLabels: true
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+JavaScriptQuotes: Leave
+JavaScriptWrapImports: true
+KeepEmptyLinesAtTheStartOfBlocks: false
+MacroBlockBegin: ''
+MacroBlockEnd: ''
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
+PenaltyBreakAssignment: 200
+PenaltyBreakBeforeFirstCallParameter: 1
+PenaltyBreakComment: 300
+PenaltyBreakFirstLessLess: 120
+PenaltyBreakString: 1000
+PenaltyExcessCharacter: 1000000
+PenaltyReturnTypeOnItsOwnLine: 200
+PointerAlignment: Right
+ReflowComments: true
+SortIncludes: false
+SortUsingDeclarations: true
+SpaceAfterCStyleCast: false
+SpaceAfterTemplateKeyword: true
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: ControlStatements
+SpaceInEmptyParentheses: false
+SpacesBeforeTrailingComments: 2
+SpacesInAngles: false
+SpacesInContainerLiterals: true
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: Auto
+TabWidth: 8
+UseTab: Never
+...
diff --git a/.github/ISSUE_TEMPLATE b/.github/ISSUE_TEMPLATE
new file mode 100644
index 00000000000..386ed2d8dd5
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE
@@ -0,0 +1,30 @@
+<!-- Please use this template while reporting an issue, providing as much information as possible. Failure to do so may result in a delayed response. Thank you! -->
+
+**Description of problem:**
+
+
+**The exact command to reproduce the issue**:
+
+
+**The full output of the command that failed**:
+<details>
+
+
+
+</details>
+
+**Expected results:**
+
+
+**Additional info:**
+
+
+**- The output of the `gluster volume info` command**:
+<details>
+
+
+
+</details>
+
+**- The operating system / glusterfs version**:
+
diff --git a/swift/1.4.8/plugins/conf/db_file.db b/.github/PULL_REQUEST_TEMPLATE
index e69de29bb2d..e69de29bb2d 100644
--- a/swift/1.4.8/plugins/conf/db_file.db
+++ b/.github/PULL_REQUEST_TEMPLATE
diff --git a/.github/RELEASE_TRACKER_TEMPLATE b/.github/RELEASE_TRACKER_TEMPLATE
new file mode 100644
index 00000000000..502bbd5556c
--- /dev/null
+++ b/.github/RELEASE_TRACKER_TEMPLATE
@@ -0,0 +1,12 @@
+<!-- Please use this template while creating a tracker issue -->
+
+**Description of problem:**
+A tracker issue to track the issues that will be fixed as a part of this release
+
+
+**Major or minor release**:
+
+
+**Release version**:
+
+
diff --git a/.github/stale.yml b/.github/stale.yml
new file mode 100644
index 00000000000..460e327c6ea
--- /dev/null
+++ b/.github/stale.yml
@@ -0,0 +1,25 @@
+# Number of days of inactivity before an issue becomes stale
+daysUntilStale: 210
+# Number of days of inactivity before a stale issue is closed
+daysUntilClose: 15
+# Issues with these labels will never be considered stale
+exemptLabels:
+ - pinned
+ - security
+# Label to use when marking an issue as stale
+staleLabel: wontfix
+
+# Comment to post when marking an issue as stale. Set to `false` to disable
+markComment: >
+ Thank you for your contributions.
+
+ Noticed that this issue is not having any activity in last ~6 months! We
+ are marking this issue as stale because it has not had recent activity.
+
+ It will be closed in 2 weeks if no one responds with a comment here.
+
+
+# Comment to post when closing a stale issue. Set to `false` to disable
+closeComment: >
+ Closing this issue as there was no update since my last update on issue.
+ If this is an issue which is still valid, feel free to open it.
diff --git a/.gitignore b/.gitignore
index c5371b26436..fc5ba586f8e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,31 +3,123 @@ autom4te.cache
build
config.*
configure
+cscope.*
+tags
depcomp
+INSTALL
install-sh
ltmain.sh
+Makefile
Makefile.in
missing
+stamp-h1
+stamp-h2
+test-driver
+*compile
+*.gcda
+*.gcno
*.sw?
*~
-*lo
-*la
-*o
+*.lo
+*.la
+*.o
+*.tar.gz
+*.rpm
+*.diff
+*.patch
.libs
-Makefile
-stamp-h1
+.deps
+.dirstamp
+
+# Softlinks to test and log
+log
+*.vol
+.clang-format
+
+# cmocka unit tests
+*.log
+*.trs
+*_unittest
# Generated files
-extras/init.d/glusterfs-server.plist
-extras/init.d/glusterfsd-Debian
-extras/init.d/glusterfsd-Redhat
-extras/init.d/glusterfsd-SuSE
+site.h
+*.py[co]
+api/examples/__init__.py
+api/examples/setup.py
+api/src/gfapi.map
+cli/src/gluster
+contrib/fuse-util/fusermount-glusterfs
+extras/geo-rep/gsync-sync-gfid
+extras/geo-rep/schedule_georep.py
+extras/snap_scheduler/conf.py
+extras/init.d/glusterd-Debian
+extras/init.d/glusterd-FreeBSD
+extras/init.d/glusterd-Redhat
+extras/init.d/glusterd-SuSE
+extras/init.d/glusterd.plist
+extras/ocf/glusterd
+extras/ocf/volume
+extras/run-gluster.tmpfiles
+extras/systemd/glusterd.service
+extras/systemd/gluster-ta-volume.service
+extras/systemd/glusterfssharedstorage.service
+extras/who-wrote-glusterfs/gitdm
+geo-replication/.tox
+geo-replication/gsyncd.conf
+geo-replication/src/gsyncd
+geo-replication/src/peer_gsec_create
+geo-replication/src/peer_mountbroker
+geo-replication/src/peer_mountbroker.py
+geo-replication/src/set_geo_rep_pem_keys.sh
+geo-replication/src/peer_georep-sshkey.py
+geo-replication/syncdaemon.egg-info
+geo-replication/syncdaemon/conf.py
+geo-replication/tests/unit/.coverage
+geo-replication/tests/unit/cover
+geo-replication/tests/unit/coverage.xml
+geo-replication/tests/unit/nosetests.xml
+geo-replication/tests/unit/results.html
+glusterfs-api.pc
glusterfs.spec
-libtool
-xlators/mount/fuse/utils/mount.glusterfs
-xlators/mount/fuse/utils/mount_glusterfs
-argp-standalone/libargp.a
+glusterfsd/src/glusterd
+glusterfsd/src/glusterfs
glusterfsd/src/glusterfsd
-libglusterfs/src/spec.lex.c
+glusterfsd/src/gf_attach
+heal/src/glfsheal
+libgfchangelog.pc
+libglusterfs/src/graph.lex.c
libglusterfs/src/y.tab.c
libglusterfs/src/y.tab.h
+libglusterfs/src/defaults.c
+libglusterfs/src/cli1-xdr.h
+libglusterfs/src/protocol-common.h
+libtool
+# copied XDR for cyclic libglusterfs <-> rpc-header dependency
+run-tests.sh
+!tests/basic/fuse/Makefile
+!tests/basic/gfapi/Makefile
+tests/env.rc
+tests/utils/arequal-checksum
+xlators/features/glupy/src/__init__.py
+xlators/features/glupy/src/setup.py
+xlators/mount/fuse/utils/mount.glusterfs
+xlators/mount/fuse/utils/mount_glusterfs
+extras/peer_add_secret_pub
+tools/gfind_missing_files/gcrawler
+tools/glusterfind/glusterfind
+tools/glusterfind/src/tool.conf
+# Eventing
+events/src/eventsapiconf.py
+extras/systemd/glustereventsd.service
+events/src/eventtypes.py
+libglusterfs/src/eventtypes.h
+extras/init.d/glustereventsd-Debian
+extras/init.d/glustereventsd-FreeBSD
+extras/init.d/glustereventsd-Redhat
+tools/setgfid2path/src/gluster-setgfid2path
+xlators/features/cloudsync/src/cloudsync-autogen-fops.c
+xlators/features/cloudsync/src/cloudsync-autogen-fops.h
+xlators/features/utime/src/utime-autogen-fops.c
+xlators/features/utime/src/utime-autogen-fops.h
+tests/basic/metadisp/ftruncate
+xlators/features/metadisp/src/fops.c
diff --git a/.mailmap b/.mailmap
new file mode 100644
index 00000000000..141a1667ffa
--- /dev/null
+++ b/.mailmap
@@ -0,0 +1,43 @@
+# .mailmap, see 'git short-log --help' for details
+#
+# This file needs to match extras/who-wrote-glusterfs/gitdm.aliases.
+#
+# Listing of contributors that filed patches with different email addresses.
+# Format: <name> <main-email> <alias> [<alias> ...]
+#
+
+Amar Tumballi <amarts@redhat.com> <amar@gluster.com> <amar@del.gluster.com>
+Anand Avati <avati@redhat.com> <avati@gluster.com> <avati@dev.gluster.com> <avati@amp.gluster.com> <avati@blackhole.gluster.com>
+Anush Shetty <ashetty@redhat.com> <anush@gluster.com>
+Csaba Henk <csaba@redhat.com> <csaba@gluster.com> <csaba@lowlife.hu> <csaba@zresearch.com>
+Günther Deschner <gd@redhat.com> <gd@samba.org>
+Harshavardhana <fharshav@redhat.com> <harsha@gluster.com> <harsha@zresearch.com> <harsha@dev.gluster.com> <harsha@harshavardhana.net>
+Ji-Hyeon Gim <potatogim@gluesys.com> <potatogim@potatogim.net>
+Justin Clift <justin@gluster.org> <jclift@redhat.com>
+Kaleb S. KEITHLEY <kkeithle@redhat.com> <kkeithle@f16node1.kkeithle.usersys.redhat.com> <kkeithle@linux.keithley.org>
+Kaushal M <kaushal@redhat.com> <kaushal@gluster.com>
+Kaushik BV <kbudiger@redhat.com> <kaushikbv@gluster.com>
+Krishna Srinivas <ksriniva@redhat.com> <krishna@gluster.com> <krishna@zresearch.com> <krishna@guest-laptop>
+Krishnan Parthasarathi <kparthas@redhat.com> <kp@gluster.com>
+Louis Zuckerman <louiszuckerman@gmail.com> <me@louiszuckerman.com>
+M S Vishwanath Bhat <vbhat@redhat.com> <msvbhat@gmail.com> <vishwanath@gluster.com>
+Michael Adam <madam@redhat.com> <obnox@samba.org>
+Oleksandr Natalenko <oleksandr@natalenko.name> <o.natalenko@lanet.ua>
+Patrick Uiterwijk <puiterwijk@fedoraproject.org> <patrick@puiterwijk.org>
+Pavan Sondur <pavan@gluster.com> <pavan@dev.gluster.com>
+Pete Zaitcev <zaitcev@kotori.zaitcev.us> <zaitcev@yahoo.com>
+Pranith Kumar K <pkarampu@redhat.com> <pranithk@gluster.com>
+Prashanth Pai <ppai@redhat.com> <nullpai@gmail.com>
+Raghavendra Bhat <raghavendra@redhat.com> <raghavendrabhat@gluster.com>
+Raghavendra G <rgowdapp@redhat.com> <raghavendra@gluster.com> <raghavendra@zresearch.com>
+Rahul C S <rahulcs@redhat.com> <rahulcssjce@gmail.com>
+Rajesh Amaravathi <rajesh@redhat.com> <rajesh@gluster.com> <rajesh.amaravathi@gmail.com>
+Ravishankar N <ravishankar@redhat.com> <root@ravi2.(none)>
+Sakshi Bansal <sabansal@redhat.com> <sabansal@localhost.localdomain>
+Shehjar Tikoo <shehjart@gluster.com> <shehjart@zresearch.com>
+Venky Shankar <vshankar@redhat.com> <venky@gluster.com>
+Vijay Bellur <vbellur@redhat.com> <vijay@gluster.com> <vijay@dev.gluster.com>
+Vijaykumar Koppad <vkoppad@redhat.com> <vijaykumar.koppad@gmail.com>
+Vijaikumar Mallikarjuna <vmallika@redhat.com>
+Vikas Gorur <vikas@gluster.com> <vikas@zresearch.com>
+shishir gowda <sgowda@redhat.com> <shishirng@gluster.com>
diff --git a/.testignore b/.testignore
new file mode 100644
index 00000000000..fe8f838bf2b
--- /dev/null
+++ b/.testignore
@@ -0,0 +1,64 @@
+.github/ISSUE_TEMPLATE
+.github/PULL_REQUEST_TEMPLATE
+.github/stale.yml
+.gitignore
+.mailmap
+.testignore
+.clang-format
+rfc.sh
+submit-for-review.sh
+AUTHORS
+CONTRIBUTING.md
+COPYING-GPLV2
+COPYING-LGPLV3
+ChangeLog
+INSTALL
+MAINTAINERS
+NEWS
+README.md
+THANKS
+COMMITMENT
+api/examples/README
+api/examples/getvolfile.py
+api/src/README.Symbol_Versions
+build-aux/checkpatch.pl
+contrib/fuse-lib/COPYING.LIB
+contrib/fuse-util/COPYING
+contrib/macfuse/COPYING.txt
+doc/*
+extras/FreeBSD/README.FreeBSD
+extras/Solaris/README.solaris
+extras/Ubuntu/README.Ubuntu
+extras/benchmarking/README
+extras/cliutils/README.md
+extras/command-completion/README
+extras/create_new_xlator/README.md
+extras/glusterfs.vim
+extras/glusterfs-logrotate
+extras/glusterfs-georep-logrotate
+extras/init.d/glusterd-Debian.in
+extras/init.d/glusterd-FreeBSD.in
+extras/init.d/glusterd-Redhat.in
+extras/init.d/glusterd-SuSE.in
+extras/init.d/glusterd.plist.in
+extras/init.d/glustereventsd-Debian.in
+extras/init.d/glustereventsd-FreeBSD.in
+extras/init.d/glustereventsd-Redhat.in
+extras/init.d/rhel5-load-fuse.modules
+extras/logger.conf.example
+extras/snap_scheduler/README.md
+extras/test/ld-preload-test/README
+extras/who-wrote-glusterfs/*
+extras/distributed-testing/*
+geo-replication/syncdaemon/README.md
+geo-replication/test-requirements.txt
+rpc/xdr/src/.gitignore
+tests/README.md
+xlators/experimental/README.md
+xlators/experimental/dht2/README.md
+xlators/experimental/dht2/TODO.md
+xlators/experimental/posix2/README.md
+xlators/experimental/posix2/TODO.md
+xlators/features/glupy/doc/README.md
+xlators/features/glupy/doc/TESTING
+xlators/features/glupy/doc/test.vol
diff --git a/COMMITMENT b/COMMITMENT
new file mode 100644
index 00000000000..16b75efcf29
--- /dev/null
+++ b/COMMITMENT
@@ -0,0 +1,46 @@
+Common Cure Rights Commitment
+Version 1.0
+
+Before filing or continuing to prosecute any legal proceeding or claim
+(other than a Defensive Action) arising from termination of a Covered
+License, we commit to extend to the person or entity ('you') accused
+of violating the Covered License the following provisions regarding
+cure and reinstatement, taken from GPL version 3. As used here, the
+term 'this License' refers to the specific Covered License being
+enforced.
+
+ However, if you cease all violation of this License, then your
+ license from a particular copyright holder is reinstated (a)
+ provisionally, unless and until the copyright holder explicitly
+ and finally terminates your license, and (b) permanently, if the
+ copyright holder fails to notify you of the violation by some
+ reasonable means prior to 60 days after the cessation.
+
+ Moreover, your license from a particular copyright holder is
+ reinstated permanently if the copyright holder notifies you of the
+ violation by some reasonable means, this is the first time you
+ have received notice of violation of this License (for any work)
+ from that copyright holder, and you cure the violation prior to 30
+ days after your receipt of the notice.
+
+We intend this Commitment to be irrevocable, and binding and
+enforceable against us and assignees of or successors to our
+copyrights.
+
+Definitions
+
+'Covered License' means the GNU General Public License, version 2
+(GPLv2), the GNU Lesser General Public License, version 2.1
+(LGPLv2.1), or the GNU Library General Public License, version 2
+(LGPLv2), all as published by the Free Software Foundation.
+
+'Defensive Action' means a legal proceeding or claim that We bring
+against you in response to a prior proceeding or claim initiated by
+you or your affiliate.
+
+'We' means each contributor to this repository as of the date of
+inclusion of this file, including subsidiaries of a corporate
+contributor.
+
+This work is available under a Creative Commons Attribution-ShareAlike
+4.0 International license (https://creativecommons.org/licenses/by-sa/4.0/).
diff --git a/CONTRIBUTING b/CONTRIBUTING
deleted file mode 100644
index 8b3baa7e5f6..00000000000
--- a/CONTRIBUTING
+++ /dev/null
@@ -1,25 +0,0 @@
- Developer's Certificate of Origin 1.1
-
- By making a contribution to this project, I certify that:
-
- (a) The contribution was created in whole or in part by me and I
- have the right to submit it under the open source license
- indicated in the file; or
-
- (b) The contribution is based upon previous work that, to the best
- of my knowledge, is covered under an appropriate open source
- license and I have the right under that license to submit that
- work with modifications, whether created in whole or in part
- by me, under the same open source license (unless I am
- permitted to submit under a different license), as indicated
- in the file; or
-
- (c) The contribution was provided directly to me by some other
- person who certified (a), (b) or (c) and I have not modified
- it.
-
- (d) I understand and agree that this project and the contribution
- are public and that a record of the contribution (including all
- personal information I submit with it, including my sign-off) is
- maintained indefinitely and may be redistributed consistent with
- this project or the open source license(s) involved. \ No newline at end of file
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 00000000000..65fc3497104
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,114 @@
+# GlusterFS project Contribution guidelines
+
+## Development Workflow
+
+We follow most of the details as per the [document here](https://help.github.com/en/github/collaborating-with-issues-and-pull-requests). If you are not aware of the github workflow, it is recommended to go through them before continuing here.
+
+
+#### Get the Repository setup
+
+0. Fork Repository
+ - Fork [GlusterFS repository](https://github.com/gluster/glusterfs/fork).
+
+1. Clone Repository
+ - Clone the glusterfs repo freshly from github using below steps.
+
+```
+ git clone git@github.com:${username}/glusterfs.git
+ cd glusterfs/
+ git remote add upstream git@github.com:gluster/glusterfs.git
+```
+
+About two tasks are one time for the life time. You can continue to use the same repository for all the work in future.
+
+#### Development & Other flows
+
+0. Issue:
+ - Make sure there is an issue filed for the task you are working on.
+ - If it is not filed, open the issue with all the description.
+ - If it is a bug fix, add label "Type:Bug".
+ - If it is an RFC, provide all the documentation, and request for "DocApproved", and "SpecApproved" label.
+
+1. Code:
+ - Start coding
+ - Build and test locally
+ - Make sure clang-format is installed and is run on the patch.
+
+2. Keep up-to-date
+ - GlusterFS is a large project with many developers, so there would be one or the other patch everyday.
+ - It is critical for developer to be up-to-date with `devel` repo to be Conflict-Free when PR is opened.
+ - Git provides many options to keep up-to-date, below is one of them
+```
+ git fetch upstream
+ git rebase upstream/devel
+```
+ - It is recommended you keep pushing to your repo every day, so you don't loose any work.
+ - It can be done by `./rfc.sh` (or `git push origin HEAD:issueNNN`)
+
+2. Commit Message / PR description:
+ - The name of the branch on your personal fork can start with issueNNNN, followed by anything of your choice.
+ - PRs continue to have the title of format "component: \<title\>", like it is practiced now.
+ - When you open a PR, having a reference Issue for the commit is mandatory in GlusterFS.
+ - Commit message can have, either `Fixes: #NNNN` or `Updates: #NNNN` in a separate line in the commit message.
+ - Here, NNNN is the Issue ID in glusterfs repository.
+ - Each commit needs the author to have the "Signed-off-by: Name \<email\>" line.
+ - Can do this by `-s` option for `git commit`.
+ - If the PR is not ready for review, apply the label `work-in-progress`.
+ - Check the availability of "Draft PR" is present for you, if yes, use that instead.
+
+3. Tests:
+ - All the required smoke tests would be auto-triggered.
+ - Developers get a chance to retrigger the smoke tests using **"/recheck smoke"** as comment.
+ - The "regression" tests would be triggered by a comment **"/run regression"** from developers in the [@gluster-maintainers](https://github.com/orgs/gluster/teams/gluster-maintainers) group.
+ - Ask for help as comment in PR if you have any questions about the process!
+
+4. Review Process:
+ - `+2` : is equivalent to "Approve" from the people in the maintainer's group.
+ - `+1` : can be given by a maintainer/reviewer by explicitly stating that in the comment.
+ - `-1` : provide details on required changes and pick "Request Changes" while submitting your review.
+ - `-2` : done by adding the `DO-NOT-MERGE` label.
+
+ - Any further discussions can happen as comments in the PR.
+
+5. Making changes:
+ - There are 2 approaches to submit changes done after addressing review comments.
+ - Commit changes as a new commit on top of the original commits in the branch, and push the changes to same branch (issueNNNN)
+ - Commit changes into the same commit with `--amend` option, and do a push to the same branch with `--force` option.
+
+6. Merging:
+ - GlusterFS project follows 'Squash and Merge' method
+ - This is mainly to preserve the historic Gerrit method of one patch in `git log` for one URL link.
+ - This also makes every merge a complete patch, which has passed all tests.
+ - The merging of the patch is expected to be done by the maintainers.
+ - It can be done when all the tests (smoke and regression) pass.
+ - When the PR has 'Approved' flag from corresponding maintainer.
+ - If you feel there is delay, feel free to add a comment, discuss the same in Slack channel, or send email.
+
+## By contributing to this project, the contributor would need to agree to below.
+
+### Developer's Certificate of Origin 1.1
+
+By making a contribution to this project, I certify that:
+
+(a) The contribution was created in whole or in part by me and I
+ have the right to submit it under the open source license
+ indicated in the file; or
+
+(b) The contribution is based upon previous work that, to the best
+ of my knowledge, is covered under an appropriate open source
+ license and I have the right under that license to submit that
+ work with modifications, whether created in whole or in part
+ by me, under the same open source license (unless I am
+ permitted to submit under a different license), as indicated
+ in the file; or
+
+(c) The contribution was provided directly to me by some other
+ person who certified (a), (b) or (c) and I have not modified
+ it.
+
+(d) I understand and agree that this project and the contribution
+ are public and that a record of the contribution (including all
+ personal information I submit with it, including my sign-off) is
+ maintained indefinitely and may be redistributed consistent with
+ this project or the open source license(s) involved.
+
diff --git a/INSTALL b/INSTALL
index 88e28999df9..a56390e54fb 100644
--- a/INSTALL
+++ b/INSTALL
@@ -1,32 +1,47 @@
Installation Instructions
*************************
-Run ./configure after untaring the package.
+0. If you have cloned from git, run ./autogen.sh.
+
+1. Run ./configure.
bash# ./configure
GlusterFS configure summary
===========================
- FUSE client : yes
- Infiniband verbs : yes
- epoll IO multiplex : yes
- Berkeley-DB : yes
- libglusterfsclient : yes
- mod_glusterfs : yes
- argp-standalone : no
+ GlusterFS configure summary
+ ===========================
+ FUSE client : yes
+ Infiniband verbs : yes
+ epoll IO multiplex : yes
+ argp-standalone : no
+ fusermount : yes
+ readline : yes
+ georeplication : yes
+ Linux-AIO : yes
+ Enable Debug : no
+ Block Device xlator : yes
+ glupy : yes
+ Use syslog : yes
+ XML output : yes
+ QEMU Block formats : yes
+ Encryption xlator : yes
+
The configure summary will tell you what all components will be built with
GlusterFS. Other than 'argp-standalone' if something else says 'no', that
feature in GlusterFS will not be built. 'argp-standalone' package will only
be used if the system doesn't have a proper argp package installed.
-Now just run 'make' and later run 'make install' to install the package.
+2. Now just run 'make' and later run 'make install' to install the package.
bash# make
bash# make install
-Installation complete :-)
+Installation completed :-)
bash# glusterfs --version
Make sure your version is the latest from the release, and the one you
just installed :-)
+
+For more information on GlusterFS installation refer# http://docs.gluster.org/en/latest/Developer-guide/Building-GlusterFS/
diff --git a/MAINTAINERS b/MAINTAINERS
new file mode 100644
index 00000000000..953e8755fd9
--- /dev/null
+++ b/MAINTAINERS
@@ -0,0 +1,510 @@
+GlusterFS Maintainers
+=====================
+
+The intention of this file is not to establish who owns what portions of the
+code base, but to provide a set of names that developers can consult when they
+have a question about a particular subset and also to provide a set of names
+to be CC'd when submitting a patch to obtain appropriate review.
+
+In general, if you have a question about inclusion of a patch, you should
+consult gluster-devel@gluster.org and not any specific individual privately.
+
+Descriptions of section entries:
+
+ M: Main contact that knows and takes care of this area
+ L: Mailing list that is relevant to this area
+ W: Web-page with status/info
+ Q: Patchwork web based patch tracking system site
+ T: SCM tree type and location. Type is one of: git, hg, quilt, stgit.
+ S: Status, one of the following:
+ Supported: Someone is actually paid to look after this.
+ Maintained: Someone actually looks after it.
+ Odd Fixes: It has a maintainer but they don't have time to do
+ much other than throw the odd patch in. See below.
+ Orphan: No current maintainer [but maybe you could take the
+ role as you write your new code].
+ Obsolete: Old code. Something tagged obsolete generally means
+ it has been replaced by a better system and you
+ should be using that.
+ F: Files and directories with wildcard patterns.
+ A trailing slash includes all files and subdirectory files.
+ F: drivers/net/ all files in and below drivers/net
+ F: drivers/net/* all files in drivers/net, but not below
+ F: */net/* all files in "any top level directory"/net
+ One pattern per line. Multiple F: lines acceptable.
+ X: Files and directories that are NOT maintained, same rules as F:
+ Files exclusions are tested before file matches.
+ Can be useful for excluding a specific subdirectory, for instance:
+ F: net/
+ X: net/ipv6/
+ matches all files in and below net excluding net/ipv6/
+ K: Keyword perl extended regex pattern to match content in a
+ patch or file. For instance:
+ K: of_get_profile
+ matches patches or files that contain "of_get_profile"
+ K: \b(printk|pr_(info|err))\b
+ matches patches or files that contain one or more of the words
+ printk, pr_info or pr_err
+ One regex pattern per line. Multiple K: lines acceptable.
+ P: Peer for a component
+
+
+General Project Architects
+--------------------------
+M: Amar Tumballi <amarts@gmail.com>
+M: Xavier Hernandez <xhernandez@redhat.com>
+P: Pranith Karampuri <pranith.karampuri@phonepe.com>
+P: Atin Mukherjee <amukherj@redhat.com>
+
+xlators:
+--------
+Access Control List (ACL)
+M: Raghavendra Talur <rtalur@redhat.com>
+P: Jiffin Tony Thottan <jthottan@redhat.com>
+S: Maintained
+F: xlators/system/posix-acl/
+
+Arbiter
+M: Ravishankar N <ravishankar@redhat.com>
+P: Pranith Karampuri <pranith.karampuri@phonepe.com>
+S: Maintained
+F: xlators/features/arbiter/
+
+Automatic File Replication (AFR)
+M: Pranith Karampuri <pranith.karampuri@phonepe.com>
+M: Ravishankar N <ravishankar@redhat.com>
+P: Karthik US <ksubrahm@redhat.com>
+S: Maintained
+F: xlators/cluster/afr/
+
+Barrier
+M: Raghavendra Bhat <rabhat@redhat.com>
+P: Atin Mukherjee <amukherj@redhat.com>
+S: Maintained
+F: xlators/features/barrier
+
+BitRot
+M: Kotresh HR <khiremat@redhat.com>
+P: Raghavendra Bhat <rabhat@redhat.com>
+S: Maintained
+F: xlators/features/bit-rot/
+
+Changelog
+M: Aravinda V K <avishwan@redhat.com>
+P: Kotresh HR <khiremat@redhat.com>
+S: Maintained
+F: xlators/features/changelog/
+
+Distributed Hashing Table (DHT)
+P: Susant Palai <spalai@redhat.com>
+S: Maintained
+F: xlators/cluster/dht/
+
+Erasure Coding
+M: Pranith Karampuri <pranith.karampuri@phonepe.com>
+M: Xavier Hernandez <xhernandez@redhat.com>
+P: Ashish Pandey <aspandey@redhat.com>
+S: Maintained
+F: xlators/cluster/ec/
+
+Error-gen
+M: Raghavendra Talur <rtalur@redhat.com>
+S: Maintained
+F: xlators/debug/error-gen/
+
+FUSE Bridge
+M: Csaba Henk <chenk@redhat.com>
+P: Niels de Vos <ndevos@redhat.com>
+S: Maintained
+F: xlators/mount/
+
+Index
+M: Pranith Karampuri <pranith.karampuri@phonepe.com>
+P: Ravishankar N <ravishankar@redhat.com>
+S: Maintained
+F: xlators/features/index/
+
+IO Cache
+P: Mohammed Rafi KC <rafi.kavungal@iternity.com>
+S: Maintained
+F: xlators/performance/io-cache/
+
+IO Statistics
+M: Krutika Dhananjay <kdhananj@redhat.com>
+M: Shyam Ranganathan <srangana@redhat.com>
+S: Maintained
+F: xlators/debug/io-stats/
+
+IO threads
+M: Pranith Karampuri <pranith.karampuri@phonepe.com>
+P: Ravishankar N <ravishankar@redhat.com>
+S: Maintained
+F: xlators/performance/io-threads/
+
+Leases
+M: Poornima G <pgurusid@redhat.com>
+P: Niels de Vos <ndevos@redhat.com>
+P: Soumya Koduri <skoduri@redhat.com>
+S: Maintained
+F: xlators/features/leases/
+
+Locks
+M: Krutika Dhananjay <kdhananj@redhat.com>
+P: Xavier Hernandez <xhernandez@redhat.com>
+S: Maintained
+F: xlators/features/locks/
+
+Marker
+M: Kotresh HR <khiremat@redhat.com>
+S: Maintained
+F: xlators/features/marker/
+
+Meta
+M: Mohammed Rafi KC <rafi.kavungal@iternity.com>
+S: Maintained
+F: xlators/features/meta/
+
+Metadata-cache
+M: Poornima G <pgurusid@redhat.com>
+P: Soumya Koduri <skoduri@redhat.com>
+S: Maintained
+F: xlators/performance/md-cache/
+
+Negative-lookup Cache
+M: Poornima G <pgurusid@redhat.com>
+P: Pranith Karampuri <pranith.karampuri@phonepe.com>
+S: Maintained
+F: xlators/performance/nl-cache/
+
+gNFS
+M: Jiffin Tony Thottan <jthottan@redhat.com>
+P: Xie Changlong <xiechanglong@cmss.chinamobile.com>
+P: Amar Tumballi <amarts@gmail.com>
+S: Odd Fixes
+F: xlators/nfs/server/
+
+Open-behind
+S: Maintained
+F: xlators/performance/open-behind/
+
+Posix:
+M: Raghavendra Bhat <raghavendra@redhat.com>
+P: Kotresh HR <khiremat@redhat.com>
+P: Krutika Dhananjay <kdhananj@redhat.com>
+S: Maintained
+F: xlators/storage/posix/
+
+Quick-read
+S: Maintained
+F: xlators/performance/quick-read/
+
+Quota
+M: Shyamsundar Ranganathan <srangana@redhat.com>
+P: Hari Gowtham <hgowtham@redhat.com>
+S: Maintained
+F: xlators/features/quota/
+
+Read-ahead
+P: Csaba Henk <chenk@redhat.com>
+S: Maintained
+F: xlators/performance/read-ahead/
+
+Readdir-ahead
+S: Maintained
+F: xlators/performance/readdir-ahead/
+
+Sharding
+M: Krutika Dhananjay <kdhananj@redhat.com>
+P: Xavier Hernandez <xhernandez@redhat.com>
+S: Maintained
+F: xlators/features/shard/
+
+Trash
+M: Anoop C S <anoopcs@redhat.com>
+M: Jiffin Tony Thottan <jthottan@redhat.com>
+S: Maintained
+F: xlators/features/trash/
+
+Upcall
+M: Poornima G <pgurusid@redhat.com>
+M: Soumya Koduri <skoduri@redhat.com>
+P: Niels de Vos <ndevos@redhat.com>
+S: Maintained
+F: xlators/features/upcall/
+
+Write-behind
+P: Csaba Henk <chenk@redhat.com>
+S: Maintained
+F: xlators/performance/write-behind/
+
+Write Once Read Many
+P: Karthik US <ksubrahm@redhat.com>
+S: Maintained
+F: xlators/features/read-only/
+
+Cloudsync
+M: Susant Kumar Palai <spalai@redhat.com>
+S: Maintained
+F: xlators/features/cloudsync/
+
+Other bits of code:
+-------------------
+
+Doc
+M: Humble Chirammal <hchiramm@redhat.com>
+M: Raghavendra Talur <rtalur@redhat.com>
+S: Maintained
+F: doc/
+
+Geo Replication
+M: Aravinda V K <avishwan@redhat.com>
+M: Kotresh HR <khiremat@redhat.com>
+M: Sunny Kumar <sunkumar@redhat.com>
+S: Maintained
+F: geo-replication/
+
+Glusterfind
+M: Aravinda VK <avishwan@redhat.com>
+S: Maintained
+F: tools/glusterfind/
+
+libgfapi
+M: Niels de Vos <ndevos@redhat.com>
+P: Poornima G <pgurusid@redhat.com>
+P: Shyamsundar Ranganathan <srangana@redhat.com>
+P: Soumya Koduri <skoduri@redhat.com>
+S: Maintained
+F: api/
+
+libglusterfs
+M: Amar Tumballi <amarts@gmail.com>
+M: Xavier Hernandez <xhernandez@redhat.com>
+M: Jeff Darcy <jeff@pl.atyp.us>
+P: Kaleb Keithley <kkeithle@redhat.com>
+P: Niels de Vos <ndevos@redhat.com>
+P: Pranith Karampuri <pranith.karampuri@phonepe.com>
+P: Shyamsundar Ranganathan <srangana@redhat.com>
+S: Maintained
+F: libglusterfs/
+
+xxhash
+M: Aravinda VK <avishwan@redhat.com>
+M: Kotresh HR <khiremat@redhat.com>
+P: Yaniv Kaul <ykaul@redhat.com>
+S: Maintained
+F: contrib/xxhash/
+T: https://github.com/Cyan4973/xxHash.git
+
+Management Daemon - glusterd
+M: Atin Mukherjee <amukherj@redhat.com>
+M: Mohit Agrawal <moagrawa@redhat.com>
+M: Sanju Rakonde <srakonde@redhat.com>
+S: Maintained
+F: cli/
+F: xlators/mgmt/glusterd/
+
+Protocol
+M: Niels de Vos <ndevos@redhat.com>
+P: Mohammed Rafi KC <rafi.kavungal@iternity.com>
+S: Maintained
+F: xlators/protocol/
+
+Remote Procedure Call subsystem
+P: Mohit Agrawal <moagrawa@redhat.com>
+S: Maintained
+F: rpc/rpc-lib/
+F: rpc/xdr/
+
+Snapshot
+M: Raghavendra Bhat <raghavendra@redhat.com>
+P: Mohammed Rafi KC <rafi.kavungal@iternity.com>
+P: Sunny Kumar <sunkumar@redhat.com>
+S: Maintained
+F: xlators/mgmt/glusterd/src/glusterd-snap*
+F: extras/snap-scheduler.py
+
+Socket subsystem
+P: Krutika Dhananjay <kdhananj@redhat.com>
+P: Milind Changire <mchangir@redhat.com>
+P: Mohammed Rafi KC <rafi.kavungal@iternity.com>
+P: Mohit Agrawal <moagrawa@redhat.com>
+S: Maintained
+F: rpc/rpc-transport/socket/
+
+Testing - .t framework
+M: Raghavendra Talur <rtalur@redhat.com>
+S: Maintained
+F: tests/
+
+Utilities
+M: Aravinda VK <avishwan@redhat.com>
+P: Niels de Vos <ndevos@redhat.com>
+P: Raghavendra Talur <rtalur@redhat.com>
+P: Sachidanda Urs <surs@redhat.com>
+S: Maintained
+F: extras/
+
+Events APIs
+M: Aravinda VK <avishwan@redhat.com>
+S: Maintained
+F: events/
+F: libglusterfs/src/events*
+F: libglusterfs/src/eventtypes*
+F: extras/systemd/glustereventsd*
+
+Distribution Specific:
+----------------------
+Build:
+M: Niels de Vos <ndevos@redhat.com>
+M: Hari Gowtham <hgowtham@redhat.com>
+P: Anoop C S <anoopcs@redhat.com>
+P: Raghavendra Talur <rtalur@redhat.com>
+P: Rinku Kothiya <rkothiya@redhat.com>
+S: Maintained
+
+Debian packages on download.gluster.org
+M: packaging@gluster.org
+M: Kaleb Keithley <kkeithle@redhat.com>
+P: Sheetal Pamecha <spamecha@redhat.com>
+P: Shwetha Acharya <sacharya@redhat.com>
+S: Maintained
+W: http://download.gluster.org/pub/gluster/glusterfs/LATEST/Debian/Debian.README
+T: https://github.com/gluster/glusterfs-debian.git
+
+OpenSuSE
+M: packaging@gluster.org
+M: Kaleb Keithley <kkeithle@redhat.com>
+P: Sheetal Pamecha <spamecha@redhat.com>
+P: Shwetha Acharya <sacharya@redhat.com>
+S: Maintained
+W: https://build.opensuse.org/repositories/home:glusterfs
+W: https://download.gluster.org/pub/gluster/glusterfs/LATEST/SuSE/SuSE.README
+T: https://github.com/gluster/glusterfs-suse.git
+
+Packages for the CentOS Storage SIG
+M: centos-devel@centos.org
+M: Niels de Vos <ndevos@redhat.com>
+P: Kaleb Keithley <kkeithle@redhat.com>
+S: Maintained
+W: https://wiki.centos.org/SpecialInterestGroup/Storage/Gluster
+T: https://github.com/CentOS-Storage-SIG/glusterfs.git
+
+Ubuntu PPA
+M: packaging@gluster.org
+M: Kaleb Keithley <kkeithle@redhat.com>
+P: Sheetal Pamecha <spamecha@redhat.com>
+P: Shwetha Acharya <sacharya@redhat.com>
+S: Maintained
+W: https://launchpad.net/~gluster
+W: http://download.gluster.org/pub/gluster/glusterfs/LATEST/Ubuntu/Ubuntu.README
+T: https://github.com/gluster/glusterfs-debian.git
+
+Related projects
+----------------
+Gluster Block
+M: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
+M: Xiubo Li <xiubli@redhat.com>
+S: Maintained
+T: https://github.com/gluster/gluster-block.git
+
+GlusterFS core-utils
+M: Anoop C S <anoopcs@redhat.com>
+S: Maintained
+T: https://github.com/gluster/glusterfs-coreutils.git
+
+NFS-Ganesha FSAL plugin
+M: Jiffin Tony Thottan <jthottan@redhat.com>
+M: Kaleb Keithley <kkeithle@redhat.com>
+M: Soumya Koduri <skoduri@redhat.com>
+S: Maintained
+T: git://github.com/nfs-ganesha/nfs-ganesha.git
+F: src/nfs-ganesha~/src/FSAL/FSAL_GLUSTER/
+
+QEMU integration
+M: Niels de Vos <ndevos@redhat.com>
+M: Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
+S: Maintained
+T: git://git.qemu.org/qemu.git
+F: block/gluster.c
+
+Samba VFS plugin
+M: Anoop C S <anoopcs@redhat.com>
+M: Raghavendra Talur <rtalur@redhat.com>
+M: Michael Adam <madam@redhat.com>
+M: Poornima G <pgurusid@redhat.com>
+S: Maintained
+T: git://git.samba.org/samba.git
+F: source3/modules/vfs_glusterfs.c
+
+Storhaug
+M: Jose A. Rivera <jarrpa@redhat.com>
+P: Kaleb Keithley <kkeithle@redhat.com>
+S: Maintained
+T: https://github.com/linux-ha-storage/storhaug.git
+
+Testing - Glusto-Tests
+M: Jonathan Holloway <jholloway@redhat.com>
+M: Vijay Bhaskar Reddy Avuthu <vavuthu@redhat.com>
+M: Akarsha Rai <akrai@redhat.com>
+S: Maintained
+T: https://github.com/gluster/glusto-tests.git
+
+Wireshark dissectors
+M: Niels de Vos <ndevos@redhat.com>
+S: Maintained
+W: https://forge.gluster.org/wireshark
+T: http://code.wireshark.org/git/wireshark
+F: epan/dissectors/packet-gluster*
+
+Infrastructure
+--------------
+
+Platform
+M: Michael Scherer <misc@redhat.com>
+P: Shyamsundar Ranganathan <srangana@redhat.com>
+P: Amar Tumballi <amarts@gmail.com>
+
+Continuous Integration
+M: Michael Scherer <misc@redhat.com>
+M: Deepshikha Khandelwal <dkhandel@redhat.com>
+P: Niels de Vos <ndevos@redhat.com>
+
+Special Thanks
+--------------
+
+GlusterFS would not be possible without the contributions of:
+
+
+M: Vijay Bellur <vbellur@redhat.com>
+M: Jeff Darcy <jeff@pl.atyp.us>
+M: Shreyas Siravara <sshreyas@fb.com>
+M: Kaushal M <kaushal@redhat.com>
+M: Nigel Babu
+M: Prashanth Pai
+P: Sanoj Unnikrishnan
+P: Milind Changire <mchangir@redhat.com>
+P: Sunil Kumar Acharya <sheggodu@redhat.com>
+M: Samikshan Bairagya <samikshan@gmail.com>
+M: Chris Hertel
+M: M. Mohan Kumar <mohan@in.ibm.com>
+M: Shishir Gowda <gowda.shishir@gmail.com>
+M: Brian Foster <bfoster@redhat.com>
+M: Anand Avati <avati@cs.stanford.edu>
+M: Dennis Schafroth <dennis@schafroth.com>
+M: Harshavardhana <harsha@harshavardhana.net>
+M: Krishnan Parthasarathi
+M: Justin Clift <justin@gluster.org>
+M: Venky Shankar <vshankar@redhat.com>
+M: Shravan Chandrashekar <shravantc99@gmail.com>
+M: Joseph Fernandes
+M: Vijaikumar Mallikarjuna
+M: Anand Subramanian
+M: Bharata B Rao <bharata@linux.vnet.ibm.com>
+M: Rajesh Joseph
+M: Dan Lambright
+M: Jay Vyas
+M: Luis Pabon
+M: Ira Cooper
+M: Shwetha Panduranga
+M: Nithya Balachandran
+M: Raghavendra Gowdappa
diff --git a/Makefile.am b/Makefile.am
index 6693bb8767a..98ea5c1038d 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -1,14 +1,55 @@
-EXTRA_DIST = autogen.sh COPYING-GPLV2 COPYING-LGPLV3 INSTALL README AUTHORS THANKS NEWS glusterfs.spec
+SOURCES = site.h
-SUBDIRS = argp-standalone libglusterfs rpc xlators glusterfsd $(FUSERMOUNT_SUBDIR) doc extras cli
+EXTRA_DIST = autogen.sh \
+ COPYING-GPLV2 COPYING-LGPLV3 COMMITMENT \
+ INSTALL README.md AUTHORS THANKS NEWS \
+ glusterfs.spec glusterfs-api.pc.in libgfchangelog.pc.in \
+ run-tests.sh \
+ build-aux/pkg-version \
+ contrib/umountd \
+ $(shell find $(top_srcdir)/tests -type f -print)
-CLEANFILES =
+
+SUBDIRS = $(ARGP_STANDALONE_DIR) libglusterfs rpc libglusterd api \
+ glusterfsd xlators $(FUSERMOUNT_SUBDIR) doc extras cli heal \
+ @SYNCDAEMON_SUBDIR@ @UMOUNTD_SUBDIR@ tools events
+
+pkgconfigdir = @pkgconfigdir@
+pkgconfig_DATA = glusterfs-api.pc libgfchangelog.pc
+
+CLEANFILES = glusterfs-api.pc libgfchangelog.pc contrib/umountd/Makefile
+
+clean-local:
+ find . -name '*.o' -o -name '*.lo' -o -name '.Po' | xargs rm -f
gitclean: distclean
find . -name Makefile.in -exec rm -f {} \;
- find . -name Makefile -exec rm -f {} \;
find . -name mount.glusterfs -exec rm -f {} \;
+ find . -name .deps -o -name .libs | xargs rm -rf
rm -fr autom4te.cache
rm -f missing aclocal.m4 config.h.in config.guess config.sub ltmain.sh install-sh configure depcomp
- rm -fr argp-standalone/autom4te.cache
- rm -f argp-standalone/aclocal.m4 argp-standalone/config.h.in argp-standalone/configure argp-standalone/depcomp argp-standalone/install-sh argp-standalone/missing
+
+# dist-hook gets executed with 'make dist', this is the only target getting
+# executed, a dist-hook in other Makefile.am files seem to get ignored.
+dist-hook: gen-VERSION gen-ChangeLog
+ -rm -fr $(distdir)/contrib/umountd/.deps
+ -rm -f $(distdir)/events/src/eventtypes.py
+ -rm -f $(distdir)/tests/env.rc
+ -cp -f $(top_srcdir)/build-aux/config.sub.dist $(distdir)/config.sub
+ -cp -f $(top_srcdir)/build-aux/config.guess.dist $(distdir)/config.guess
+
+.PHONY: gen-VERSION gen-ChangeLog clang-check
+
+clang-check:
+ @$(top_srcdir)/extras/clang-checker.sh
+
+gen-ChangeLog:
+ (cd $(srcdir) && git diff && echo ===== git log ==== && git log) > $(distdir)/ChangeLog
+
+.PHONY : gen-VERSION
+gen-VERSION:
+ if test -d $(top_srcdir)/.git; then \
+ cd $(top_srcdir); \
+ ./build-aux/pkg-version --full \
+ > $(abs_top_builddir)/$(distdir)/VERSION; \
+ fi
diff --git a/README b/README
deleted file mode 100644
index ce648743ee9..00000000000
--- a/README
+++ /dev/null
@@ -1 +0,0 @@
-For more info, please visit http://www.gluster.org/.
diff --git a/README.md b/README.md
new file mode 100644
index 00000000000..9d68e033782
--- /dev/null
+++ b/README.md
@@ -0,0 +1,46 @@
+# Gluster
+ Gluster is a software defined distributed storage that can scale to several
+ petabytes. It provides interfaces for object, block and file storage.
+
+## Development
+ The development workflow is documented in [Contributors guide](CONTRIBUTING.md)
+
+## Documentation
+ The Gluster documentation can be found at [Gluster Docs](http://docs.gluster.org).
+
+## Deployment
+ Quick instructions to build and install can be found in [INSTALL](INSTALL) file.
+
+## Testing
+
+ GlusterFS source contains some functional tests under `tests/` directory. All
+ these tests are run against every patch submitted for review. If you want your
+ patch to be tested, please add a `.t` test file as part of your patch submission.
+ You can also submit a patch to only add a `.t` file for the test case you are
+ aware of.
+
+ To run these tests, on your test-machine, just run `./run-tests.sh`. Don't run
+ this on a machine where you have 'production' glusterfs is running, as it would
+ blindly kill all gluster processes in each runs.
+
+ If you are sending a patch, and want to validate one or few specific tests, then
+ run a single test by running the below command.
+
+```
+ bash# /bin/bash ${path_to_gluster}/tests/basic/rpc-coverage.t
+```
+
+ You can also use `prove` tool if available in your machine, as follows.
+
+```
+ bash# prove -vmfe '/bin/bash' ${path_to_gluster}/tests/basic/rpc-coverage.t
+```
+
+
+## Maintainers
+ The list of Gluster maintainers is available in [MAINTAINERS](MAINTAINERS) file.
+
+## License
+ Gluster is dual licensed under [GPLV2](COPYING-GPLV2) and [LGPLV3+](COPYING-LGPLV3).
+
+ Please visit the [Gluster Home Page](http://www.gluster.org/) to find out more about Gluster.
diff --git a/THANKS b/THANKS
index e1ce5105d79..d056d00bb72 100644
--- a/THANKS
+++ b/THANKS
@@ -1 +1 @@
-For all of you, who use the product and help us making it more robust, useful, popular..
+For all of you, who use the product and help us make it more robust, useful, and popular.
diff --git a/api/Makefile.am b/api/Makefile.am
new file mode 100644
index 00000000000..f0ad1ee971c
--- /dev/null
+++ b/api/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src examples
diff --git a/api/examples/Makefile.am b/api/examples/Makefile.am
new file mode 100644
index 00000000000..7112c81d6a7
--- /dev/null
+++ b/api/examples/Makefile.am
@@ -0,0 +1,6 @@
+# The bits needed for glfsxmp
+EXTRA_PROGRAMS = glfsxmp
+glfsxmp_SOURCES = glfsxmp.c
+glfsxmp_CFLAGS = $(GLFS_CFLAGS) -Wall
+glfsxmp_LDADD = $(GLFS_LIBS) -lrt
+
diff --git a/api/examples/README b/api/examples/README
new file mode 100644
index 00000000000..4d2b521f779
--- /dev/null
+++ b/api/examples/README
@@ -0,0 +1,36 @@
+This is an example application which uses libgfapi. It is
+a complete autotools based build system which demonstrates the
+required changes in configure.ac, Makefile.am etc to successfuly
+detect for and build an application against libgfapi.
+
+There are two approaches to building a libgfapi based application:
+
+1. In the presence of pkg-config in your build system.
+This is the recommended approach which is also used in this example.
+For this approach to work, you need to build glusterfs by passing
+--pkgconfigdir=/usr/lib64/pkgconfig (or the appropriate directory)
+in your distro. This already happens if you build RPMs with the
+glusterfs.spec provided in glusterfs.git. You will also need to
+install glusterfs-api RPM.
+
+2. In the absence of pkg-config in your build system.
+Make sure your LDFLAGS includes -L/path/to/lib where libgfapi.so is
+installed and -I/path/to/include/glusterfs where the 'api' directory
+containing the headers are available.
+
+glfsxmp.c
+=========
+
+glfsxmp.c is an example application which uses libgfapi
+
+Compilation Steps For glfsxmp.c
+===============================
+
+1. $./autogen.sh
+2. $./configure
+
+Note: Before running ./configure , as mentioned above, you need to
+ take care of #1 or #2 i.e. pkg-config path or LDFLAGS and
+ -I/<path> with correct values.
+
+3. $make glfsxmp
diff --git a/api/examples/autogen.sh b/api/examples/autogen.sh
new file mode 100755
index 00000000000..1fee6be11bb
--- /dev/null
+++ b/api/examples/autogen.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+
+aclocal
+autoconf
+automake --foreign
diff --git a/api/examples/configure.ac b/api/examples/configure.ac
new file mode 100644
index 00000000000..b80177a4e87
--- /dev/null
+++ b/api/examples/configure.ac
@@ -0,0 +1,12 @@
+
+AC_INIT([glfs-test],[0.1],[gluster-devel@nongu.org])
+
+AM_INIT_AUTOMAKE
+
+AC_CONFIG_FILES([Makefile])
+
+AC_PROG_CC
+
+PKG_CHECK_MODULES([GLFS], [glusterfs-api >= 3])
+
+AC_OUTPUT
diff --git a/api/examples/getvolfile.py b/api/examples/getvolfile.py
new file mode 100755
index 00000000000..3b2c8ab5a15
--- /dev/null
+++ b/api/examples/getvolfile.py
@@ -0,0 +1,45 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+import ctypes
+import ctypes.util
+
+api = ctypes.CDLL("libgfapi.so")
+api.glfs_get_volfile.argtypes = [ctypes.c_void_p,
+ ctypes.c_void_p,
+ ctypes.c_ulong]
+api.glfs_get_volfile.restype = ctypes.c_long
+
+
+def get_volfile(host, volume):
+ # This is set to a large value to exercise the "buffer not big enough"
+ # path. More realistically, you'd just start with a huge buffer.
+ BUF_LEN = 0
+ fs = api.glfs_new(volume)
+ # api.glfs_set_logging(fs,"/dev/stderr",7)
+ api.glfs_set_volfile_server(fs, "tcp", host, 24007)
+ api.glfs_init(fs)
+ vbuf = ctypes.create_string_buffer(BUF_LEN)
+ vlen = api.glfs_get_volfile(fs, vbuf, BUF_LEN)
+ if vlen < 0:
+ vlen = BUF_LEN - vlen
+ vbuf = ctypes.create_string_buffer(vlen)
+ vlen = api.glfs_get_volfile(fs, vbuf, vlen)
+ api.glfs_fini(fs)
+ if vlen <= 0:
+ return vlen
+ return vbuf.value[:vlen]
+
+if __name__ == "__main__":
+ import sys
+
+ try:
+ res = get_volfile(*sys.argv[1:3])
+ except:
+ print("fetching volfile failed (volume not started?)")
+
+ try:
+ for line in res.split('\n'):
+ print(line)
+ except:
+ print("bad return value %s" % res)
diff --git a/api/examples/glfsxmp.c b/api/examples/glfsxmp.c
new file mode 100644
index 00000000000..a55616ef739
--- /dev/null
+++ b/api/examples/glfsxmp.c
@@ -0,0 +1,1811 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <string.h>
+#include <time.h>
+
+#define TEST_STR_LEN 2048
+
+int
+test_dirops(glfs_t *fs)
+{
+ glfs_fd_t *fd = NULL;
+ char buf[512];
+ struct dirent *entry = NULL;
+
+ fd = glfs_opendir(fs, "/");
+ if (!fd) {
+ fprintf(stderr, "/: %s\n", strerror(errno));
+ return -1;
+ }
+
+ fprintf(stderr, "Entries:\n");
+ while (glfs_readdir_r(fd, (struct dirent *)buf, &entry), entry) {
+ fprintf(stderr, "%s: %lu\n", entry->d_name, glfs_telldir(fd));
+ }
+
+ glfs_closedir(fd);
+ return 0;
+}
+
+int
+test_xattr(glfs_t *fs)
+{
+ char *filename = "/filename2";
+ char *linkfile = "/linkfile";
+ glfs_fd_t *fd = NULL;
+ char buf[512];
+ char *ptr;
+ int ret;
+
+ ret = glfs_setxattr(fs, filename, "user.testkey", "testval", 8, 0);
+ fprintf(stderr, "setxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_setxattr(fs, filename, "user.testkey2", "testval", 8, 0);
+ fprintf(stderr, "setxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_getxattr(fs, filename, "user.testkey", buf, 512);
+ fprintf(stderr, "getxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_listxattr(fs, filename, buf, 512);
+ fprintf(stderr, "listxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_symlink(fs, "filename", linkfile);
+ fprintf(stderr, "symlink(%s %s): %s\n", filename, linkfile,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_readlink(fs, linkfile, buf, 512);
+ fprintf(stderr, "readlink(%s) : %d (%s)\n", filename, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_lsetxattr(fs, filename, "user.testkey3", "testval", 8, 0);
+ fprintf(stderr, "lsetxattr(%s) : %d (%s)\n", linkfile, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_llistxattr(fs, linkfile, buf, 512);
+ fprintf(stderr, "llistxattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_lgetxattr(fs, filename, "user.testkey3", buf, 512);
+ fprintf(stderr, "lgetxattr(%s): %d (%s)\n", linkfile, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ for (ptr = buf; ptr < buf + ret; ptr++) {
+ printf("key=%s\n", ptr);
+ ptr += strlen(ptr);
+ }
+
+ ret = glfs_removexattr(fs, filename, "user.testkey2");
+ fprintf(stderr, "removexattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ fd = glfs_open(fs, filename, O_RDWR);
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ ret = glfs_fsetxattr(fd, "user.testkey2", "testval", 8, 0);
+ fprintf(stderr, "fsetxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_fgetxattr(fd, "user.testkey2", buf, 512);
+ fprintf(stderr, "fgetxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_flistxattr(fd, buf, 512);
+ fprintf(stderr, "flistxattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ for (ptr = buf; ptr < buf + ret; ptr++) {
+ printf("key=%s\n", ptr);
+ ptr += strlen(ptr);
+ }
+
+ ret = glfs_fremovexattr(fd, "user.testkey2");
+ fprintf(stderr, "fremovexattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ glfs_close(fd);
+
+ return 0;
+}
+
+int
+test_chdir(glfs_t *fs)
+{
+ int ret = -1;
+ char *dir = "/dir";
+ char *topdir = "/topdir";
+ char *linkdir = "/linkdir";
+ char *linkdir2 = "/linkdir2";
+ char *subdir = "./subdir";
+ char *respath = NULL;
+ char pathbuf[4096];
+
+ ret = glfs_mkdir(fs, topdir, 0755);
+ fprintf(stderr, "mkdir(%s): %s\n", topdir, strerror(errno));
+ if (ret)
+ return -1;
+
+ ret = glfs_mkdir(fs, dir, 0755);
+ fprintf(stderr, "mkdir(%s): %s\n", dir, strerror(errno));
+ if (ret)
+ return -1;
+
+ respath = glfs_getcwd(fs, pathbuf, 4096);
+ fprintf(stdout, "getcwd() = %s\n", respath);
+
+ ret = glfs_symlink(fs, "topdir", linkdir);
+ if (ret) {
+ fprintf(stderr, "symlink(%s, %s): %s\n", topdir, linkdir,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_chdir(fs, linkdir);
+ if (ret) {
+ fprintf(stderr, "chdir(%s): %s\n", linkdir, strerror(errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd(fs, pathbuf, 4096);
+ fprintf(stdout, "getcwd() = %s\n", respath);
+
+ respath = glfs_realpath(fs, subdir, pathbuf);
+ if (respath) {
+ fprintf(stderr, "realpath(%s) worked unexpectedly: %s\n", subdir,
+ respath);
+ return -1;
+ }
+
+ ret = glfs_mkdir(fs, subdir, 0755);
+ if (ret) {
+ fprintf(stderr, "mkdir(%s): %s\n", subdir, strerror(errno));
+ return -1;
+ }
+
+ respath = glfs_realpath(fs, subdir, pathbuf);
+ if (!respath) {
+ fprintf(stderr, "realpath(%s): %s\n", subdir, strerror(errno));
+ } else {
+ fprintf(stdout, "realpath(%s) = %s\n", subdir, respath);
+ }
+
+ ret = glfs_chdir(fs, subdir);
+ if (ret) {
+ fprintf(stderr, "chdir(%s): %s\n", subdir, strerror(errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd(fs, pathbuf, 4096);
+ fprintf(stdout, "getcwd() = %s\n", respath);
+
+ respath = glfs_realpath(fs, "/linkdir/subdir", pathbuf);
+ if (!respath) {
+ fprintf(stderr, "realpath(/linkdir/subdir): %s\n", strerror(errno));
+ } else {
+ fprintf(stdout, "realpath(/linkdir/subdir) = %s\n", respath);
+ }
+
+ return 0;
+}
+
+#ifdef DEBUG
+static void
+peek_stat(struct stat *sb)
+{
+ printf("Dumping stat information:\n");
+ printf("File type: ");
+
+ switch (sb->st_mode & S_IFMT) {
+ case S_IFBLK:
+ printf("block device\n");
+ break;
+ case S_IFCHR:
+ printf("character device\n");
+ break;
+ case S_IFDIR:
+ printf("directory\n");
+ break;
+ case S_IFIFO:
+ printf("FIFO/pipe\n");
+ break;
+ case S_IFLNK:
+ printf("symlink\n");
+ break;
+ case S_IFREG:
+ printf("regular file\n");
+ break;
+ case S_IFSOCK:
+ printf("socket\n");
+ break;
+ default:
+ printf("unknown?\n");
+ break;
+ }
+
+ printf("I-node number: %ld\n", (long)sb->st_ino);
+
+ printf("Mode: %lo (octal)\n",
+ (unsigned long)sb->st_mode);
+
+ printf("Link count: %ld\n", (long)sb->st_nlink);
+ printf("Ownership: UID=%ld GID=%ld\n", (long)sb->st_uid,
+ (long)sb->st_gid);
+
+ printf("Preferred I/O block size: %ld bytes\n", (long)sb->st_blksize);
+ printf("File size: %lld bytes\n", (long long)sb->st_size);
+ printf("Blocks allocated: %lld\n", (long long)sb->st_blocks);
+
+ printf("Last status change: %s", ctime(&sb->st_ctime));
+ printf("Last file access: %s", ctime(&sb->st_atime));
+ printf("Last file modification: %s", ctime(&sb->st_mtime));
+
+ return;
+}
+
+static void
+peek_handle(unsigned char *glid)
+{
+ int i;
+
+ for (i = 0; i < GFAPI_HANDLE_LENGTH; i++) {
+ printf(":%02x:", glid[i]);
+ }
+ printf("\n");
+}
+#else /* DEBUG */
+static void
+peek_stat(struct stat *sb)
+{
+ return;
+}
+
+static void
+peek_handle(unsigned char *id)
+{
+ return;
+}
+#endif /* DEBUG */
+
+glfs_t *fs = NULL;
+char *full_parent_name = "/testdir", *parent_name = "testdir";
+
+void
+test_h_unlink(void)
+{
+ char *my_dir = "unlinkdir";
+ char *my_file = "file.txt";
+ char *my_subdir = "dir1";
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL,
+ *subdir = NULL, *subleaf = NULL;
+ struct stat sb;
+ int ret;
+
+ printf("glfs_h_unlink tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
+ if (dir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_creat(fs, dir, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ subdir = glfs_h_mkdir(fs, dir, my_subdir, 0755, &sb);
+ if (subdir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_subdir, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ subleaf = glfs_h_creat(fs, subdir, my_file, O_CREAT, 0644, &sb);
+ if (subleaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, subdir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non empty directory */
+ ret = glfs_h_unlink(fs, dir, my_subdir);
+ if ((ret && errno != ENOTEMPTY) || (ret == 0)) {
+ fprintf(stderr,
+ "glfs_h_unlink: error unlinking %s: it is non empty: %s\n",
+ my_subdir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink regular file */
+ ret = glfs_h_unlink(fs, subdir, my_file);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_file, subdir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink directory */
+ ret = glfs_h_unlink(fs, dir, my_subdir);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_subdir, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink regular file */
+ ret = glfs_h_unlink(fs, dir, my_file);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non-existent regular file */
+ ret = glfs_h_unlink(fs, dir, my_file);
+ if ((ret && errno != ENOENT) || (ret == 0)) {
+ fprintf(stderr,
+ "glfs_h_unlink: error unlinking non-existent %s: invalid errno "
+ ",%d, %s\n",
+ my_file, ret, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non-existent directory */
+ ret = glfs_h_unlink(fs, dir, my_subdir);
+ if ((ret && errno != ENOENT) || (ret == 0)) {
+ fprintf(stderr,
+ "glfs_h_unlink: error unlinking non-existent %s: invalid "
+ "errno ,%d, %s\n",
+ my_subdir, ret, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink directory */
+ ret = glfs_h_unlink(fs, parent, my_dir);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_dir, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ printf("glfs_h_unlink tests: PASSED\n");
+
+out:
+ if (dir)
+ glfs_h_close(dir);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (subdir)
+ glfs_h_close(subdir);
+ if (subleaf)
+ glfs_h_close(subleaf);
+ if (parent)
+ glfs_h_close(parent);
+
+ return;
+}
+
+void
+test_h_getsetattrs(void)
+{
+ char *my_dir = "attrdir";
+ char *my_file = "attrfile.txt";
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL;
+ struct stat sb, retsb;
+ int ret, valid;
+ struct timespec timestamp;
+
+ printf("glfs_h_getattrs and setattrs tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
+ if (dir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, dir, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ ret = glfs_h_getattrs(fs, dir, &retsb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error %s: from (%p),%s\n", my_dir,
+ dir, strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&retsb);
+ /* TODO: Compare stat information */
+
+ retsb.st_mode = 00666;
+ retsb.st_uid = 1000;
+ retsb.st_gid = 1001;
+ ret = clock_gettime(CLOCK_REALTIME, &timestamp);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ retsb.st_atim = timestamp;
+ retsb.st_mtim = timestamp;
+ valid = GFAPI_SET_ATTR_MODE | GFAPI_SET_ATTR_UID | GFAPI_SET_ATTR_GID |
+ GFAPI_SET_ATTR_ATIME | GFAPI_SET_ATTR_MTIME;
+ peek_stat(&retsb);
+
+ ret = glfs_h_setattrs(fs, dir, &retsb, valid);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_setattrs: error %s: from (%p),%s\n", my_dir,
+ dir, strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ memset(&retsb, 0, sizeof(struct stat));
+ ret = glfs_h_stat(fs, dir, &retsb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_stat: error %s: from (%p),%s\n", my_dir, dir,
+ strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&retsb);
+
+ printf("glfs_h_getattrs and setattrs tests: PASSED\n");
+out:
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (dir)
+ glfs_h_close(dir);
+
+ return;
+}
+
+void
+test_h_truncate(void)
+{
+ char *my_dir = "truncatedir";
+ char *my_file = "file.txt";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL;
+ struct stat sb;
+ glfs_fd_t *fd = NULL;
+ char buf[32];
+ off_t offset = 0;
+ int ret = 0;
+
+ printf("glfs_h_truncate tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
+ if (parent == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ fd = glfs_h_open(fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_h_open: error on open of %s: %s\n", my_file,
+ strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ memcpy(buf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write(fd, buf, 32, 0);
+
+ /* run tests */
+ /* truncate lower */
+ offset = 30;
+ ret = glfs_h_truncate(fs, leaf, offset);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error for %s (%p),%s\n", my_file,
+ leaf, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf(stderr, "glfs_h_truncate: post size mismatch\n");
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ /* truncate higher */
+ offset = 32;
+ ret = glfs_h_truncate(fs, leaf, offset);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error for %s (%p),%s\n", my_file,
+ leaf, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf(stderr, "glfs_h_truncate: post size mismatch\n");
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ /* truncate equal */
+ offset = 30;
+ ret = glfs_h_truncate(fs, leaf, offset);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error for %s (%p),%s\n", my_file,
+ leaf, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf(stderr, "glfs_h_truncate: post size mismatch\n");
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ printf("glfs_h_truncate tests: PASSED\n");
+out:
+ if (fd)
+ glfs_close(fd);
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+
+ return;
+}
+
+void
+test_h_links(void)
+{
+ char *my_dir = "linkdir";
+ char *my_file = "file.txt";
+ char *my_symlnk = "slnk.txt";
+ char *my_lnk = "lnk.txt";
+ char *linksrc_dir = "dir1";
+ char *linktgt_dir = "dir2";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *dirsrc = NULL, *dirtgt = NULL, *dleaf = NULL;
+ struct glfs_object *ln1 = NULL;
+ struct stat sb;
+ int ret;
+ char *buf = NULL;
+
+ printf("glfs_h_link(s) tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
+ if (parent == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirsrc = glfs_h_mkdir(fs, parent, linksrc_dir, 0755, &sb);
+ if (dirsrc == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ linksrc_dir, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirtgt = glfs_h_mkdir(fs, parent, linktgt_dir, 0755, &sb);
+ if (dirtgt == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ linktgt_dir, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dleaf = glfs_h_creat(fs, dirsrc, my_file, O_CREAT, 0644, &sb);
+ if (dleaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dirsrc, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* run tests */
+ /* sym link: /testdir/linkdir/file.txt to ./slnk.txt */
+ ln1 = glfs_h_symlink(fs, parent, my_symlnk, "./file.txt", &sb);
+ if (ln1 == NULL) {
+ fprintf(stderr, "glfs_h_symlink: error creating %s: from (%p),%s\n",
+ my_symlnk, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ buf = calloc(1024, sizeof(char));
+ if (buf == NULL) {
+ fprintf(stderr, "Error allocating memory\n");
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+
+ ret = glfs_h_readlink(fs, ln1, buf, 1024);
+ if (ret <= 0) {
+ fprintf(stderr, "glfs_h_readlink: error reading %s: from (%p),%s\n",
+ my_symlnk, ln1, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ if (!(strncmp(buf, my_symlnk, strlen(my_symlnk)))) {
+ fprintf(stderr,
+ "glfs_h_readlink: error mismatch in link name: actual %s: "
+ "retrieved %s\n",
+ my_symlnk, buf);
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+
+ /* link: /testdir/linkdir/file.txt to ./lnk.txt */
+ ret = glfs_h_link(fs, leaf, parent, my_lnk);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_link: error creating %s: from (%p),%s\n",
+ my_lnk, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ /* TODO: Should write content to a file and read from the link */
+
+ /* link: /testdir/linkdir/dir1/file.txt to ../dir2/slnk.txt */
+ ret = glfs_h_link(fs, dleaf, dirtgt, my_lnk);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_link: error creating %s: from (%p),%s\n",
+ my_lnk, dirtgt, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ /* TODO: Should write content to a file and read from the link */
+
+ printf("glfs_h_link(s) tests: PASSED\n");
+
+out:
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (dirsrc)
+ glfs_h_close(dirsrc);
+ if (dirtgt)
+ glfs_h_close(dirtgt);
+ if (dleaf)
+ glfs_h_close(dleaf);
+ if (ln1)
+ glfs_h_close(ln1);
+ if (buf)
+ free(buf);
+
+ return;
+}
+
+void
+test_h_rename(void)
+{
+ char *my_dir = "renamedir";
+ char *my_file = "file.txt";
+ char *src_dir = "dir1";
+ char *tgt_dir = "dir2";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *dirsrc = NULL, *dirtgt = NULL, *dleaf = NULL;
+ struct stat sb;
+ int ret;
+
+ printf("glfs_h_rename tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
+ if (parent == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirsrc = glfs_h_mkdir(fs, parent, src_dir, 0755, &sb);
+ if (dirsrc == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ src_dir, parent, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirtgt = glfs_h_mkdir(fs, parent, tgt_dir, 0755, &sb);
+ if (dirtgt == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ tgt_dir, parent, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dleaf = glfs_h_creat(fs, dirsrc, my_file, O_CREAT, 0644, &sb);
+ if (dleaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dirsrc, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* run tests */
+ /* Rename file.txt -> file1.txt */
+ ret = glfs_h_rename(fs, parent, "file.txt", parent, "file1.txt");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "file.txt", "file1.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir1/file.txt -> file.txt */
+ ret = glfs_h_rename(fs, dirsrc, "file.txt", parent, "file.txt");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s/%s to %s (%s)\n",
+ src_dir, "file.txt", "file.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename file1.txt -> file.txt (exists) */
+ ret = glfs_h_rename(fs, parent, "file1.txt", parent, "file.txt");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "file.txt", "file.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir1 -> dir3 */
+ ret = glfs_h_rename(fs, parent, "dir1", parent, "dir3");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n", "dir1",
+ "dir3", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir2 ->dir3 (exists) */
+ ret = glfs_h_rename(fs, parent, "dir2", parent, "dir3");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n", "dir2",
+ "dir3", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename file.txt -> dir3 (fail) */
+ ret = glfs_h_rename(fs, parent, "file.txt", parent, "dir3");
+ if (ret == 0) {
+ fprintf(stderr, "glfs_h_rename: NO error renaming %s to %s (%s)\n",
+ "file.txt", "dir3", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir3 -> file.txt (fail) */
+ ret = glfs_h_rename(fs, parent, "dir3", parent, "file.txt");
+ if (ret == 0) {
+ fprintf(stderr, "glfs_h_rename: NO error renaming %s to %s (%s)\n",
+ "dir3", "file.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ printf("glfs_h_rename tests: PASSED\n");
+
+out:
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (dirsrc)
+ glfs_h_close(dirsrc);
+ if (dirtgt)
+ glfs_h_close(dirtgt);
+ if (dleaf)
+ glfs_h_close(dleaf);
+
+ return;
+}
+
+void
+assimilatetime(struct timespec *ts, struct timespec ts_st,
+ struct timespec ts_ed)
+{
+ if ((ts_ed.tv_nsec - ts_st.tv_nsec) < 0) {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec - 1;
+ ts->tv_nsec += 1000000000 + ts_ed.tv_nsec - ts_st.tv_nsec;
+ } else {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec;
+ ts->tv_nsec += ts_ed.tv_nsec - ts_st.tv_nsec;
+ }
+
+ if (ts->tv_nsec > 1000000000) {
+ ts->tv_nsec = ts->tv_nsec - 1000000000;
+ ts->tv_sec += 1;
+ }
+
+ return;
+}
+
+#define MAX_FILES_CREATE 10
+#define MAXPATHNAME 512
+void
+test_h_performance(void)
+{
+ char *my_dir = "perftest", *full_dir_path = "/testdir/perftest";
+ char *my_file = "file_", my_file_name[MAXPATHNAME];
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL;
+ struct stat sb;
+ int ret, i;
+ struct glfs_fd *fd;
+ struct timespec c_ts = {0, 0}, c_ts_st, c_ts_ed;
+ struct timespec o_ts = {0, 0}, o_ts_st, o_ts_ed;
+
+ printf("glfs_h_performance tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
+ if (dir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* create performance */
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf(my_file_name, "%s%d", my_file, i);
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_lookupat(fs, dir, my_file_name, &sb, 0);
+ if (leaf != NULL) {
+ fprintf(stderr, "glfs_h_lookup: exists %s\n", my_file_name);
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_creat(fs, dir, my_file_name, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&c_ts, c_ts_st, c_ts_ed);
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&o_ts, o_ts_st, o_ts_ed);
+
+ printf("Creation performance (handle based):\n\t# empty files:%d\n",
+ MAX_FILES_CREATE);
+ printf("\tOverall time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n", o_ts.tv_sec,
+ o_ts.tv_nsec);
+ printf("\tcreate call time time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ c_ts.tv_sec, c_ts.tv_nsec);
+
+ /* create using path */
+ c_ts.tv_sec = o_ts.tv_sec = 0;
+ c_ts.tv_nsec = o_ts.tv_nsec = 0;
+
+ sprintf(my_file_name, "%s1", full_dir_path);
+ ret = glfs_mkdir(fs, my_file_name, 0755);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_mkdir: error creating %s: from (%p),%s\n", my_dir,
+ parent, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf(my_file_name, "%s1/%sn%d", full_dir_path, my_file, i);
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ ret = glfs_stat(fs, my_file_name, &sb);
+ if (ret == 0) {
+ fprintf(stderr, "glfs_stat: exists %s\n", my_file_name);
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ fd = glfs_creat(fs, my_file_name, O_CREAT, 0644);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&c_ts, c_ts_st, c_ts_ed);
+ glfs_close(fd);
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&o_ts, o_ts_st, o_ts_ed);
+
+ printf("Creation performance (path based):\n\t# empty files:%d\n",
+ MAX_FILES_CREATE);
+ printf("\tOverall time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n", o_ts.tv_sec,
+ o_ts.tv_nsec);
+ printf("\tcreate call time time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ c_ts.tv_sec, c_ts.tv_nsec);
+out:
+ return;
+}
+
+int
+test_handleops(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL, *tmp = NULL;
+ char readbuf[32], writebuf[32];
+ unsigned char leaf_handle[GFAPI_HANDLE_LENGTH];
+
+ char *full_leaf_name = "/testdir/testfile.txt", *leaf_name = "testfile.txt",
+ *relative_leaf_name = "testdir/testfile.txt";
+ char *leaf_name1 = "testfile1.txt";
+ char *full_newparent_name = "/testdir/dir1", *newparent_name = "dir1";
+ char *full_newnod_name = "/testdir/nod1", *newnod_name = "nod1";
+
+ /* Initialize test area */
+ ret = glfs_mkdir(fs, full_parent_name, 0755);
+ if (ret != 0 && errno != EEXIST) {
+ fprintf(stderr, "%s: (%p) %s\n", full_parent_name, fd, strerror(errno));
+ printf("Test initialization failed on volume %s\n", argv[1]);
+ goto out;
+ } else if (ret != 0) {
+ printf("Found test directory %s to be existing\n", full_parent_name);
+ printf("Cleanup test directory and restart tests\n");
+ goto out;
+ }
+
+ fd = glfs_creat(fs, full_leaf_name, O_CREAT, 0644);
+ if (fd == NULL) {
+ fprintf(stderr, "%s: (%p) %s\n", full_leaf_name, fd, strerror(errno));
+ printf("Test initialization failed on volume %s\n", argv[1]);
+ goto out;
+ }
+ glfs_close(fd);
+
+ printf("Initialized the test area, within volume %s\n", argv[1]);
+
+ /* Handle based APIs test area */
+
+ /* glfs_lookupat test */
+ printf("glfs_h_lookupat tests: In Progress\n");
+ /* start at root of the volume */
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n", "/",
+ NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* lookup a parent within root */
+ parent = glfs_h_lookupat(fs, root, parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ parent_name, root, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* lookup a leaf/child within the parent */
+ leaf = glfs_h_lookupat(fs, parent, leaf_name, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ leaf_name, parent, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* reset */
+ glfs_h_close(root);
+ root = NULL;
+ glfs_h_close(leaf);
+ leaf = NULL;
+ glfs_h_close(parent);
+ parent = NULL;
+
+ /* check absolute paths */
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n", "/",
+ NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_lookupat(fs, NULL, full_leaf_name, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_leaf_name, parent, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* reset */
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ /* check multiple component paths */
+ leaf = glfs_h_lookupat(fs, root, relative_leaf_name, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ relative_leaf_name, parent, strerror(errno));
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* reset */
+ glfs_h_close(root);
+ root = NULL;
+ glfs_h_close(parent);
+ parent = NULL;
+
+ /* check symlinks in path */
+
+ /* TODO: -ve test cases */
+ /* parent invalid
+ * path invalid
+ * path does not exist after some components
+ * no parent, but relative path
+ * parent and full path? -ve?
+ */
+
+ printf("glfs_h_lookupat tests: PASSED\n");
+
+ /* glfs_openat test */
+ printf("glfs_h_open tests: In Progress\n");
+ fd = glfs_h_open(fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_h_open: error on open of %s: %s\n",
+ full_leaf_name, strerror(errno));
+ printf("glfs_h_open tests: FAILED\n");
+ goto out;
+ }
+
+ /* test read/write based on fd */
+ memcpy(writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write(fd, writebuf, 32, 0);
+
+ glfs_lseek(fd, 0, SEEK_SET);
+
+ ret = glfs_read(fd, readbuf, 32, 0);
+ if (memcmp(readbuf, writebuf, 32)) {
+ printf("Failed to read what I wrote: %s %s\n", readbuf, writebuf);
+ glfs_close(fd);
+ printf("glfs_h_open tests: FAILED\n");
+ goto out;
+ }
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+ glfs_close(fd);
+
+ printf("glfs_h_open tests: PASSED\n");
+
+ /* Create tests */
+ printf("glfs_h_creat tests: In Progress\n");
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, leaf_name1, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error on create of %s: from (%p),%s\n",
+ leaf_name1, parent, strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_creat(fs, parent, leaf_name1, O_CREAT | O_EXCL, 0644, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf(stderr,
+ "glfs_h_creat: existing file, leaf = (%p), errno = %s\n", leaf,
+ strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+ }
+
+ tmp = glfs_h_creat(fs, root, parent_name, O_CREAT, 0644, &sb);
+ if (tmp != NULL || !(errno == EISDIR || errno == EINVAL)) {
+ fprintf(stderr, "glfs_h_creat: dir create, tmp = (%p), errno = %s\n",
+ leaf, strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ if (tmp != NULL) {
+ glfs_h_close(tmp);
+ tmp = NULL;
+ }
+ }
+
+ /* TODO: Other combinations and -ve cases as applicable */
+ printf("glfs_h_creat tests: PASSED\n");
+
+ /* extract handle and create from handle test */
+ printf(
+ "glfs_h_extract_handle and glfs_h_create_from_handle tests: In "
+ "Progress\n");
+ /* TODO: Change the lookup to create below for a GIFD recovery failure,
+ * that needs to be fixed */
+ leaf = glfs_h_lookupat(fs, parent, leaf_name1, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ leaf_name1, parent, strerror(errno));
+ printf("glfs_h_extract_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ ret = glfs_h_extract_handle(leaf, leaf_handle, GFAPI_HANDLE_LENGTH);
+ if (ret < 0) {
+ fprintf(stderr,
+ "glfs_h_extract_handle: error extracting handle of %s: %s\n",
+ full_leaf_name, strerror(errno));
+ printf("glfs_h_extract_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_handle(leaf_handle);
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_create_from_handle(fs, leaf_handle, GFAPI_HANDLE_LENGTH, &sb);
+ if (leaf == NULL) {
+ fprintf(
+ stderr,
+ "glfs_h_create_from_handle: error on create of %s: from (%p),%s\n",
+ leaf_name1, leaf_handle, strerror(errno));
+ printf("glfs_h_create_from_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ fd = glfs_h_open(fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_h_open: error on open of %s: %s\n",
+ full_leaf_name, strerror(errno));
+ printf("glfs_h_create_from_handle tests: FAILED\n");
+ goto out;
+ }
+
+ /* test read/write based on fd */
+ memcpy(writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write(fd, writebuf, 32, 0);
+
+ glfs_lseek(fd, 0, SEEK_SET);
+
+ ret = glfs_read(fd, readbuf, 32, 0);
+ if (memcmp(readbuf, writebuf, 32)) {
+ printf("Failed to read what I wrote: %s %s\n", writebuf, writebuf);
+ printf("glfs_h_create_from_handle tests: FAILED\n");
+ glfs_close(fd);
+ goto out;
+ }
+
+ glfs_close(fd);
+ glfs_h_close(leaf);
+ leaf = NULL;
+ glfs_h_close(parent);
+ parent = NULL;
+
+ printf(
+ "glfs_h_extract_handle and glfs_h_create_from_handle tests: PASSED\n");
+
+ /* Mkdir tests */
+ printf("glfs_h_mkdir tests: In Progress\n");
+
+ ret = glfs_rmdir(fs, full_newparent_name);
+ if (ret && errno != ENOENT) {
+ fprintf(stderr, "glfs_rmdir: Failed for %s: %s\n", full_newparent_name,
+ strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_mkdir(fs, parent, newparent_name, 0755, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error on mkdir of %s: from (%p),%s\n",
+ newparent_name, parent, strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_mkdir(fs, parent, newparent_name, 0755, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf(stderr,
+ "glfs_h_mkdir: existing directory, leaf = (%p), errno = %s\n",
+ leaf, strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+ }
+
+ glfs_h_close(parent);
+ parent = NULL;
+
+ printf("glfs_h_mkdir tests: PASSED\n");
+
+ /* Mknod tests */
+ printf("glfs_h_mknod tests: In Progress\n");
+ ret = glfs_unlink(fs, full_newnod_name);
+ if (ret && errno != ENOENT) {
+ fprintf(stderr, "glfs_unlink: Failed for %s: %s\n", full_newnod_name,
+ strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_mknod(fs, parent, newnod_name, S_IFIFO, 0, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error on mkdir of %s: from (%p),%s\n",
+ newnod_name, parent, strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* TODO: create op on a FIFO node hangs, need to check and fix
+ tmp = glfs_h_creat (fs, parent, newnod_name, O_CREAT, 0644, &sb);
+ if (tmp != NULL || errno != EINVAL) {
+ fprintf (stderr, "glfs_h_creat: node create, tmp = (%p), errno =
+ %s\n", tmp, strerror (errno)); printf ("glfs_h_creat/mknod tests:
+ FAILED\n"); if (tmp != NULL) { glfs_h_close(tmp); tmp = NULL;
+ }
+ } */
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_mknod(fs, parent, newnod_name, 0644, 0, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf(stderr,
+ "glfs_h_mknod: existing node, leaf = (%p), errno = %s\n", leaf,
+ strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+ }
+
+ glfs_h_close(parent);
+ parent = NULL;
+
+ printf("glfs_h_mknod tests: PASSED\n");
+
+ /* unlink tests */
+ test_h_unlink();
+
+ /* TODO: opendir tests */
+
+ /* getattr tests */
+ test_h_getsetattrs();
+
+ /* TODO: setattr tests */
+
+ /* truncate tests */
+ test_h_truncate();
+
+ /* link tests */
+ test_h_links();
+
+ /* rename tests */
+ test_h_rename();
+
+ /* performance tests */
+ test_h_performance();
+
+ /* END: New APIs test area */
+
+out:
+ /* Cleanup glfs handles */
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+
+ return ret;
+}
+
+int
+test_write_apis(glfs_t *fs)
+{
+ /* Add more content here */
+ /* Some apis we can get are */
+ /*
+ 0. glfs_set_xlator_option()
+
+ Read/Write combinations:
+ . glfs_{p,}readv/{p,}writev
+ . glfs_pread/pwrite
+
+ tests/basic/gfapi/gfapi-async-calls-test.c
+ . glfs_read_async/write_async
+ . glfs_pread_async/pwrite_async
+ . glfs_readv_async/writev_async
+ . glfs_preadv_async/pwritev_async
+
+ . ftruncate/ftruncate_async
+ . fsync/fsync_async
+ . fdatasync/fdatasync_async
+
+ */
+
+ glfs_fd_t *fd = NULL;
+ char *filename = "/filename2";
+ int flags = O_RDWR;
+ char *buf = "some bytes!";
+ char writestr[TEST_STR_LEN];
+ struct iovec iov = {&writestr, TEST_STR_LEN};
+ int ret, i;
+
+ for (i = 0; i < TEST_STR_LEN; i++)
+ writestr[i] = 0x11;
+
+ fd = glfs_open(fs, filename, flags);
+ if (!fd)
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ ret = glfs_writev(fd, &iov, 1, flags);
+ if (ret < 0) {
+ fprintf(stderr, "writev(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ ret = glfs_pwrite(fd, buf, 10, 4, flags, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "pwrite(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ ret = glfs_pwritev(fd, &iov, 1, 4, flags);
+ if (ret < 0) {
+ fprintf(stderr, "pwritev(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ return 0;
+}
+
+int
+test_metadata_ops(glfs_t *fs, glfs_t *fs2)
+{
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_stat gsb = {
+ 0,
+ };
+ struct statvfs sfs;
+ char readbuf[32];
+ char writebuf[32];
+
+ char *filename = "/filename2";
+ int ret;
+
+ ret = glfs_lstat(fs, filename, &sb);
+ fprintf(stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ fd = glfs_creat(fs, filename, O_RDWR, 0644);
+ fprintf(stderr, "creat(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ fd2 = glfs_open(fs2, filename, O_RDWR);
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ glfs_lseek(fd2, 0, SEEK_SET);
+
+ ret = glfs_read(fd2, readbuf, 32, 0);
+
+ printf("read %d, %s", ret, readbuf);
+
+ /* get stat */
+ ret = glfs_fstat(fd2, &sb);
+
+ ret = glfs_access(fs, filename, R_OK);
+
+ /* set stat */
+ /* TODO: got some errors, need to fix */
+ /* ret = glfs_fsetattr(fd2, &gsb); */
+
+ glfs_close(fd);
+ glfs_close(fd2);
+
+ filename = "/filename3";
+ ret = glfs_mknod(fs, filename, S_IFIFO, 0);
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_lstat(fs, filename, &sb);
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_rename(fs, filename, "/filename4");
+ fprintf(stderr, "rename(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_unlink(fs, "/filename4");
+ fprintf(stderr, "unlink(%s): (%d) %s\n", "/filename4", ret,
+ strerror(errno));
+
+ filename = "/dirname2";
+ ret = glfs_mkdir(fs, filename, 0);
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_lstat(fs, filename, &sb);
+ fprintf(stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_rmdir(fs, filename);
+ fprintf(stderr, "rmdir(%s): (%d) %s\n", filename, ret, strerror(errno));
+}
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs2 = NULL;
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_stat gsb = {
+ 0,
+ };
+ struct statvfs sfs;
+ char readbuf[32];
+ char writebuf[32];
+
+ char *filename = "/filename2";
+
+ if (argc != 3) {
+ printf("Expect following args\n\t%s <volname> <hostname>\n", argv[0]);
+ return -1;
+ }
+
+ fs = glfs_new(argv[1]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ // ret = glfs_set_volfile (fs, "/tmp/posix.vol");
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[2], 24007);
+
+ // ret = glfs_set_volfile_server (fs, "unix", "/tmp/gluster.sock", 0);
+
+ ret = glfs_set_logging(fs, "/dev/stderr", 7);
+
+ ret = glfs_init(fs);
+
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+
+ if (ret)
+ goto out;
+
+ sleep(2);
+
+ fs2 = glfs_new(argv[1]);
+ if (!fs2) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ // ret = glfs_set_volfile (fs2, "/tmp/posix.vol");
+
+ ret = glfs_set_volfile_server(fs2, "tcp", argv[2], 24007);
+
+ ret = glfs_set_logging(fs2, "/dev/stderr", 7);
+
+ ret = glfs_init(fs2);
+
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+
+ test_metadata_ops(fs, fs2);
+
+ test_dirops(fs);
+
+ test_xattr(fs);
+
+ test_chdir(fs);
+
+ test_handleops(argc, argv);
+ // done
+
+ /* Test some extra apis */
+ test_write_apis(fs);
+
+ glfs_statvfs(fs, "/", &sfs);
+
+ glfs_fini(fs);
+ glfs_fini(fs2);
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/api/src/Makefile.am b/api/src/Makefile.am
new file mode 100644
index 00000000000..7f9a7d17b35
--- /dev/null
+++ b/api/src/Makefile.am
@@ -0,0 +1,43 @@
+lib_LTLIBRARIES = libgfapi.la
+noinst_HEADERS = glfs-mem-types.h glfs-internal.h gfapi-messages.h
+libgfapi_HEADERS = glfs.h glfs-handles.h
+libgfapidir = $(includedir)/glusterfs/api
+
+EXTRA_DIST = gfapi.map gfapi.aliases
+
+libgfapi_la_SOURCES = glfs.c glfs-mgmt.c glfs-fops.c glfs-resolve.c \
+ glfs-handleops.c
+libgfapi_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la
+
+libgfapi_la_LDFLAGS = -version-info $(GFAPI_LT_VERSION) $(GF_LDFLAGS) \
+ $(GFAPI_EXTRA_LDFLAGS) $(ACL_LIBS)
+
+libgfapi_la_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_builddir)/rpc/xdr/src \
+ -DDATADIR=\"$(localstatedir)\" \
+ -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+xlator_LTLIBRARIES = api.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mount
+# workaround for broken parallel install support in automake with LTLIBRARIES
+# http://debbugs.gnu.org/cgi/bugreport.cgi?bug=7328
+install_xlatorLTLIBRARIES = install-xlatorLTLIBRARIES
+$(install_xlatorLTLIBRARIES): install-libLTLIBRARIES
+
+api_la_SOURCES = glfs-master.c
+api_la_DEPENDENCIES = libgfapi.la
+api_la_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_builddir)/rpc/xdr/src
+api_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+#api_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS) $(GF_LDFLAGS)
+api_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/api/src/libgfapi.la
diff --git a/api/src/README.Symbol_Versions b/api/src/README.Symbol_Versions
new file mode 100644
index 00000000000..b6ec95f9311
--- /dev/null
+++ b/api/src/README.Symbol_Versions
@@ -0,0 +1,3 @@
+
+See ../../doc/developer-guide/gfapi-symbol-versions.md
+
diff --git a/api/src/gfapi-messages.h b/api/src/gfapi-messages.h
new file mode 100644
index 00000000000..b9223940416
--- /dev/null
+++ b/api/src/gfapi-messages.h
@@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2015-2018 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ * */
+
+#ifndef _GFAPI_MESSAGES_H__
+#define _GFAPI_MESSAGES_H__
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(API, API_MSG_MEM_ACCT_INIT_FAILED, API_MSG_MASTER_XLATOR_INIT_FAILED,
+ API_MSG_GFAPI_XLATOR_INIT_FAILED, API_MSG_VOLFILE_OPEN_FAILED,
+ API_MSG_VOL_SPEC_FILE_ERROR, API_MSG_GLFS_FSOBJ_NULL,
+ API_MSG_INVALID_ENTRY, API_MSG_FSMUTEX_LOCK_FAILED,
+ API_MSG_COND_WAIT_FAILED, API_MSG_FSMUTEX_UNLOCK_FAILED,
+ API_MSG_INODE_REFRESH_FAILED, API_MSG_GRAPH_CONSTRUCT_FAILED,
+ API_MSG_API_XLATOR_ERROR, API_MSG_XDR_PAYLOAD_FAILED,
+ API_MSG_GET_VOLINFO_CBK_FAILED, API_MSG_FETCH_VOLUUID_FAILED,
+ API_MSG_INSUFF_SIZE, API_MSG_FRAME_CREAT_FAILED,
+ API_MSG_DICT_SET_FAILED, API_MSG_XDR_DECODE_FAILED,
+ API_MSG_GET_VOLFILE_FAILED, API_MSG_WRONG_OPVERSION,
+ API_MSG_DICT_SERIALIZE_FAILED, API_MSG_REMOTE_HOST_CONN_FAILED,
+ API_MSG_VOLFILE_SERVER_EXHAUST, API_MSG_CREATE_RPC_CLIENT_FAILED,
+ API_MSG_REG_NOTIFY_FUNC_FAILED, API_MSG_REG_CBK_FUNC_FAILED,
+ API_MSG_GET_CWD_FAILED, API_MSG_FGETXATTR_FAILED,
+ API_MSG_LOCKINFO_KEY_MISSING, API_MSG_FSETXATTR_FAILED,
+ API_MSG_FSYNC_FAILED, API_MSG_FDCREATE_FAILED,
+ API_MSG_INODE_PATH_FAILED, API_MSG_SYNCOP_OPEN_FAILED,
+ API_MSG_LOCK_MIGRATE_FAILED, API_MSG_OPENFD_SKIPPED,
+ API_MSG_FIRST_LOOKUP_GRAPH_FAILED, API_MSG_CWD_GRAPH_REF_FAILED,
+ API_MSG_SWITCHED_GRAPH, API_MSG_XDR_RESPONSE_DECODE_FAILED,
+ API_MSG_VOLFILE_INFO, API_MSG_VOLFILE_CONNECTING, API_MSG_NEW_GRAPH,
+ API_MSG_ALLOC_FAILED, API_MSG_CREATE_HANDLE_FAILED,
+ API_MSG_INODE_LINK_FAILED, API_MSG_STATEDUMP_FAILED,
+ API_MSG_XREADDIRP_R_FAILED, API_MSG_LOCK_INSERT_MERGE_FAILED,
+ API_MSG_SETTING_LOCK_TYPE_FAILED, API_MSG_INODE_FIND_FAILED,
+ API_MSG_FDCTX_SET_FAILED, API_MSG_UPCALL_SYNCOP_FAILED,
+ API_MSG_INVALID_ARG, API_MSG_UPCALL_EVENT_NULL_RECEIVED,
+ API_MSG_FLAGS_HANDLE, API_MSG_FDCREATE_FAILED_ON_GRAPH,
+ API_MSG_TRANS_RDMA_DEP, API_MSG_TRANS_NOT_SUPPORTED,
+ API_MSG_FS_NOT_INIT, API_MSG_INVALID_SYSRQ,
+ API_MSG_DECODE_XDR_FAILED, API_MSG_NULL, API_MSG_CALL_NOT_SUCCESSFUL,
+ API_MSG_CALL_NOT_VALID, API_MSG_UNABLE_TO_DEL,
+ API_MSG_REMOTE_HOST_DISCONN, API_MSG_HANDLE_NOT_SET);
+
+#define API_MSG_ALLOC_FAILED_STR "Upcall allocation failed"
+#define API_MSG_LOCK_INSERT_MERGE_FAILED_STR \
+ "Lock insertion and splitting/merging failed"
+#define API_MSG_SETTING_LOCK_TYPE_FAILED_STR "Setting lock type failed"
+
+#define API_MSG_INVALID_ARG_STR "Invalid"
+#define API_MSG_INVALID_ENTRY_STR "Upcall entry validation failed"
+#define API_MSG_INODE_FIND_FAILED_STR "Unable to find inode entry"
+#define API_MSG_CREATE_HANDLE_FAILED_STR "handle creation failed"
+#define API_MSG_UPCALL_EVENT_NULL_RECEIVED_STR \
+ "Upcall_EVENT_NULL received. Skipping it"
+#define API_MSG_UPCALL_SYNCOP_FAILED_STR "Synctask for upcall failed"
+#define API_MSG_FDCREATE_FAILED_STR "Allocating anonymous fd failed"
+#define API_MSG_XREADDIRP_R_FAILED_STR "glfs_x_readdirp_r failed"
+#define API_MSG_FDCTX_SET_FAILED_STR "Setting fd ctx failed"
+#define API_MSG_FLAGS_HANDLE_STR "arg not set. Flags handled are"
+#define API_MSG_INODE_REFRESH_FAILED_STR "inode refresh failed"
+#define API_MSG_INODE_LINK_FAILED_STR "inode linking failed"
+#define API_MSG_GET_CWD_FAILED_STR "Failed to get cwd"
+#define API_MSG_FGETXATTR_FAILED_STR "fgetxattr failed"
+#define API_MSG_LOCKINFO_KEY_MISSING_STR "missing lockinfo key"
+#define API_MSG_FSYNC_FAILED_STR "fsync() failed"
+#define API_MSG_FDCREATE_FAILED_ON_GRAPH_STR "fd_create failed on graph"
+#define API_MSG_INODE_PATH_FAILED_STR "inode_path failed"
+#define API_MSG_SYNCOP_OPEN_FAILED_STR "syncop_open failed"
+#define API_MSG_LOCK_MIGRATE_FAILED_STR "lock migration failed on graph"
+#define API_MSG_OPENFD_SKIPPED_STR "skipping openfd in graph"
+#define API_MSG_FIRST_LOOKUP_GRAPH_FAILED_STR "first lookup on graph failed"
+#define API_MSG_CWD_GRAPH_REF_FAILED_STR "cwd refresh of graph failed"
+#define API_MSG_SWITCHED_GRAPH_STR "switched to graph"
+#define API_MSG_FSETXATTR_FAILED_STR "fsetxattr failed"
+#define API_MSG_MEM_ACCT_INIT_FAILED_STR "Memory accounting init failed"
+#define API_MSG_MASTER_XLATOR_INIT_FAILED_STR \
+ "master xlator for initialization failed"
+#define API_MSG_GFAPI_XLATOR_INIT_FAILED_STR \
+ "failed to initialize gfapi translator"
+#define API_MSG_VOLFILE_OPEN_FAILED_STR "volume file open failed"
+#define API_MSG_VOL_SPEC_FILE_ERROR_STR "Cannot reach volume specification file"
+#define API_MSG_TRANS_RDMA_DEP_STR \
+ "transport RDMA is deprecated, falling back to tcp"
+#define API_MSG_TRANS_NOT_SUPPORTED_STR \
+ "transport is not supported, possible values tcp|unix"
+#define API_MSG_GLFS_FSOBJ_NULL_STR "fs is NULL"
+#define API_MSG_FS_NOT_INIT_STR "fs is not properly initialized"
+#define API_MSG_FSMUTEX_LOCK_FAILED_STR \
+ "pthread lock on glfs mutex, returned error"
+#define API_MSG_FSMUTEX_UNLOCK_FAILED_STR \
+ "pthread unlock on glfs mutex, returned error"
+#define API_MSG_COND_WAIT_FAILED_STR "cond wait failed"
+#define API_MSG_INVALID_SYSRQ_STR "not a valid sysrq"
+#define API_MSG_GRAPH_CONSTRUCT_FAILED_STR "failed to construct the graph"
+#define API_MSG_API_XLATOR_ERROR_STR \
+ "api master xlator cannot be specified in volume file"
+#define API_MSG_STATEDUMP_FAILED_STR "statedump failed"
+#define API_MSG_DECODE_XDR_FAILED_STR \
+ "Failed to decode xdr response for GF_CBK_STATEDUMP"
+#define API_MSG_NULL_STR "NULL"
+#define API_MSG_XDR_PAYLOAD_FAILED_STR "failed to create XDR payload"
+#define API_MSG_CALL_NOT_SUCCESSFUL_STR \
+ "GET_VOLUME_INFO RPC call is not successful"
+#define API_MSG_XDR_RESPONSE_DECODE_FAILED_STR \
+ "Failed to decode xdr response for GET_VOLUME_INFO"
+#define API_MSG_CALL_NOT_VALID_STR \
+ "Response received for GET_VOLUME_INFO RPC is not valid"
+#define API_MSG_GET_VOLINFO_CBK_FAILED_STR \
+ "In GET_VOLUME_INFO cbk, received error"
+#define API_MSG_FETCH_VOLUUID_FAILED_STR "Unable to fetch volume UUID"
+#define API_MSG_INSUFF_SIZE_STR "Insufficient size passed"
+#define API_MSG_FRAME_CREAT_FAILED_STR "failed to create the frame"
+#define API_MSG_DICT_SET_FAILED_STR "failed to set"
+#define API_MSG_XDR_DECODE_FAILED_STR "XDR decoding error"
+#define API_MSG_GET_VOLFILE_FAILED_STR "failed to get the volume file"
+#define API_MSG_VOLFILE_INFO_STR "No change in volfile, continuing"
+#define API_MSG_UNABLE_TO_DEL_STR "unable to delete file"
+#define API_MSG_WRONG_OPVERSION_STR \
+ "Server is operating at an op-version which is not supported"
+#define API_MSG_DICT_SERIALIZE_FAILED_STR "Failed to serialize dictionary"
+#define API_MSG_REMOTE_HOST_CONN_FAILED_STR "Failed to connect to remote-host"
+#define API_MSG_REMOTE_HOST_DISCONN_STR "disconnected from remote-host"
+#define API_MSG_VOLFILE_SERVER_EXHAUST_STR "Exhausted all volfile servers"
+#define API_MSG_VOLFILE_CONNECTING_STR "connecting to next volfile server"
+#define API_MSG_CREATE_RPC_CLIENT_FAILED_STR "failed to create rpc clnt"
+#define API_MSG_REG_NOTIFY_FUNC_FAILED_STR "failed to register notify function"
+#define API_MSG_REG_CBK_FUNC_FAILED_STR "failed to register callback function"
+#define API_MSG_NEW_GRAPH_STR "New graph coming up"
+#define API_MSG_HANDLE_NOT_SET_STR "handle not set. Flags handled for xstat are"
+#endif /* !_GFAPI_MESSAGES_H__ */
diff --git a/api/src/gfapi.aliases b/api/src/gfapi.aliases
new file mode 100644
index 00000000000..bc639e6b99f
--- /dev/null
+++ b/api/src/gfapi.aliases
@@ -0,0 +1,201 @@
+
+_priv_glfs_loc_touchup _glfs_loc_touchup$GFAPI_PRIVATE_3.4.0
+_priv_glfs_active_subvol _glfs_active_subvol$GFAPI_PRIVATE_3.4.0
+_priv_glfs_subvol_done _glfs_subvol_done$GFAPI_PRIVATE_3.4.0
+_priv_glfs_init_done _glfs_init_done$GFAPI_PRIVATE_3.4.0
+_priv_glfs_resolve_at _glfs_resolve_at$GFAPI_PRIVATE_3.4.0
+
+_pub_glfs_new _glfs_new$GFAPI_3.4.0
+_pub_glfs_set_volfile _glfs_set_volfile$GFAPI_3.4.0
+_pub_glfs_set_volfile_server _glfs_set_volfile_server$GFAPI_3.4.0
+_pub_glfs_set_logging _glfs_set_logging$GFAPI_3.4.0
+_pub_glfs_init _glfs_init$GFAPI_3.4.0
+_pub_glfs_fini _glfs_fini$GFAPI_3.4.0
+_pub_glfs_open _glfs_open$GFAPI_3.4.0
+_pub_glfs_creat _glfs_creat$GFAPI_3.4.0
+_pub_glfs_close _glfs_close$GFAPI_3.4.0
+_pub_glfs_from_glfd _glfs_from_glfd$GFAPI_3.4.0
+_pub_glfs_set_xlator_option _glfs_set_xlator_option$GFAPI_3.4.0
+_pub_glfs_read _glfs_read$GFAPI_3.4.0
+_pub_glfs_write _glfs_write$GFAPI_3.4.0
+_pub_glfs_readv _glfs_readv$GFAPI_3.4.0
+_pub_glfs_writev _glfs_writev$GFAPI_3.4.0
+_pub_glfs_pread34 _glfs_pread$GFAPI_3.4.0
+_pub_glfs_pwrite34 _glfs_pwrite$GFAPI_3.4.0
+_pub_glfs_pread_async34 _glfs_pread_async$GFAPI_3.4.0
+_pub_glfs_pwrite_async34 _glfs_pwrite_async$GFAPI_3.4.0
+_pub_glfs_preadv _glfs_preadv$GFAPI_3.4.0
+_pub_glfs_pwritev _glfs_pwritev$GFAPI_3.4.0
+_pub_glfs_lseek _glfs_lseek$GFAPI_3.4.0
+_pub_glfs_ftruncate34 _glfs_ftruncate$GFAPI_3.4.0
+_pub_glfs_ftruncate_async34 _glfs_ftruncate_async$GFAPI_3.4.0
+_pub_glfs_lstat _glfs_lstat$GFAPI_3.4.0
+_pub_glfs_stat _glfs_stat$GFAPI_3.4.0
+_pub_glfs_fstat _glfs_fstat$GFAPI_3.4.0
+_pub_glfs_fsync34 _glfs_fsync$GFAPI_3.4.0
+_pub_glfs_fsync_async34 _glfs_fsync_async$GFAPI_3.4.0
+_pub_glfs_fdatasync34 _glfs_fdatasync$GFAPI_3.4.0
+_pub_glfs_fdatasync_async34 _glfs_fdatasync_async$GFAPI_3.4.0
+_pub_glfs_access _glfs_access$GFAPI_3.4.0
+_pub_glfs_symlink _glfs_symlink$GFAPI_3.4.0
+_pub_glfs_readlink _glfs_readlink$GFAPI_3.4.0
+_pub_glfs_mknod _glfs_mknod$GFAPI_3.4.0
+_pub_glfs_mkdir _glfs_mkdir$GFAPI_3.4.0
+_pub_glfs_unlink _glfs_unlink$GFAPI_3.4.0
+_pub_glfs_rmdir _glfs_rmdir$GFAPI_3.4.0
+_pub_glfs_rename _glfs_rename$GFAPI_3.4.0
+_pub_glfs_link _glfs_link$GFAPI_3.4.0
+_pub_glfs_opendir _glfs_opendir$GFAPI_3.4.0
+_pub_glfs_readdir_r _glfs_readdir_r$GFAPI_3.4.0
+_pub_glfs_readdirplus_r _glfs_readdirplus_r$GFAPI_3.4.0
+_pub_glfs_telldir _glfs_telldir$GFAPI_3.4.0
+_pub_glfs_seekdir _glfs_seekdir$GFAPI_3.4.0
+_pub_glfs_closedir _glfs_closedir$GFAPI_3.4.0
+_pub_glfs_statvfs _glfs_statvfs$GFAPI_3.4.0
+_pub_glfs_chmod _glfs_chmod$GFAPI_3.4.0
+_pub_glfs_fchmod _glfs_fchmod$GFAPI_3.4.0
+_pub_glfs_chown _glfs_chown$GFAPI_3.4.0
+_pub_glfs_lchown _glfs_lchown$GFAPI_3.4.0
+_pub_glfs_fchown _glfs_fchown$GFAPI_3.4.0
+_pub_glfs_utimens _glfs_utimens$GFAPI_3.4.0
+_pub_glfs_lutimens _glfs_lutimens$GFAPI_3.4.0
+_pub_glfs_futimens _glfs_futimens$GFAPI_3.4.0
+_pub_glfs_getxattr _glfs_getxattr$GFAPI_3.4.0
+_pub_glfs_lgetxattr _glfs_lgetxattr$GFAPI_3.4.0
+_pub_glfs_fgetxattr _glfs_fgetxattr$GFAPI_3.4.0
+_pub_glfs_listxattr _glfs_listxattr$GFAPI_3.4.0
+_pub_glfs_llistxattr _glfs_llistxattr$GFAPI_3.4.0
+_pub_glfs_flistxattr _glfs_flistxattr$GFAPI_3.4.0
+_pub_glfs_setxattr _glfs_setxattr$GFAPI_3.4.0
+_pub_glfs_lsetxattr _glfs_lsetxattr$GFAPI_3.4.0
+_pub_glfs_fsetxattr _glfs_fsetxattr$GFAPI_3.4.0
+_pub_glfs_removexattr _glfs_removexattr$GFAPI_3.4.0
+_pub_glfs_lremovexattr _glfs_lremovexattr$GFAPI_3.4.0
+_pub_glfs_fremovexattr _glfs_fremovexattr$GFAPI_3.4.0
+_pub_glfs_getcwd _glfs_getcwd$GFAPI_3.4.0
+_pub_glfs_chdir _glfs_chdir$GFAPI_3.4.0
+_pub_glfs_fchdir _glfs_fchdir$GFAPI_3.4.0
+_pub_glfs_realpath34 _glfs_realpath$GFAPI_3.4.0
+_pub_glfs_posix_lock _glfs_posix_lock$GFAPI_3.4.0
+_pub_glfs_dup _glfs_dup$GFAPI_3.4.0
+
+_pub_glfs_setfsuid _glfs_setfsuid$GFAPI_3.4.2
+_pub_glfs_setfsgid _glfs_setfsgid$GFAPI_3.4.2
+_pub_glfs_setfsgroups _glfs_setfsgroups$GFAPI_3.4.2
+_pub_glfs_h_lookupat34 _glfs_h_lookupat$GFAPI_3.4.2
+_pub_glfs_h_creat _glfs_h_creat$GFAPI_3.4.2
+_pub_glfs_h_mkdir _glfs_h_mkdir$GFAPI_3.4.2
+_pub_glfs_h_mknod _glfs_h_mknod$GFAPI_3.4.2
+_pub_glfs_h_symlink _glfs_h_symlink$GFAPI_3.4.2
+_pub_glfs_h_unlink _glfs_h_unlink$GFAPI_3.4.2
+_pub_glfs_h_close _glfs_h_close$GFAPI_3.4.2
+_pub_glfs_h_truncate _glfs_h_truncate$GFAPI_3.4.2
+_pub_glfs_h_stat _glfs_h_stat$GFAPI_3.4.2
+_pub_glfs_h_getattrs _glfs_h_getattrs$GFAPI_3.4.2
+_pub_glfs_h_setattrs _glfs_h_setattrs$GFAPI_3.4.2
+_pub_glfs_h_readlink _glfs_h_readlink$GFAPI_3.4.2
+_pub_glfs_h_link _glfs_h_link$GFAPI_3.4.2
+_pub_glfs_h_rename _glfs_h_rename$GFAPI_3.4.2
+_pub_glfs_h_extract_handle _glfs_h_extract_handle$GFAPI_3.4.2
+_pub_glfs_h_create_from_handle _glfs_h_create_from_handle$GFAPI_3.4.2
+_pub_glfs_h_opendir _glfs_h_opendir$GFAPI_3.4.2
+_pub_glfs_h_open _glfs_h_open$GFAPI_3.4.2
+
+_pub_glfs_get_volumeid _glfs_get_volumeid$GFAPI_3.5.0
+_pub_glfs_readdir _glfs_readdir$GFAPI_3.5.0
+_pub_glfs_readdirplus _glfs_readdirplus$GFAPI_3.5.0
+_pub_glfs_fallocate _glfs_fallocate$GFAPI_3.5.0
+_pub_glfs_discard _glfs_discard$GFAPI_3.5.0
+_pub_glfs_zerofill _glfs_zerofill$GFAPI_3.5.0
+_pub_glfs_caller_specific_init _glfs_caller_specific_init$GFAPI_3.5.0
+_pub_glfs_h_setxattrs _glfs_h_setxattrs$GFAPI_3.5.0
+
+_pub_glfs_unset_volfile_server _glfs_unset_volfile_server$GFAPI_3.5.1
+_pub_glfs_h_getxattrs _glfs_h_getxattrs$GFAPI_3.5.1
+_pub_glfs_h_removexattrs _glfs_h_removexattrs$GFAPI_3.5.1
+
+_pub_glfs_get_volfile _glfs_get_volfile$GFAPI_3.6.0
+_pub_glfs_h_access _glfs_h_access$GFAPI_3.6.0
+
+_pub_glfs_h_poll_upcall370 _glfs_h_poll_upcall$GFAPI_3.7.0
+_pub_glfs_h_acl_set _glfs_h_acl_set$GFAPI_3.7.0
+_pub_glfs_h_acl_get _glfs_h_acl_get$GFAPI_3.7.0
+_pub_glfs_h_statfs _glfs_h_statfs$GFAPI_3.7.0
+_pub_glfs_h_anonymous_read _glfs_h_anonymous_read$GFAPI_3.7.0
+_pub_glfs_h_anonymous_write _glfs_h_anonymous_write$GFAPI_3.7.0
+
+_priv_glfs_free_from_ctx _glfs_free_from_ctx$GFAPI_PRIVATE_3.7.0
+_priv_glfs_new_from_ctx _glfs_new_from_ctx$GFAPI_PRIVATE_3.7.0
+_priv_glfs_resolve _glfs_resolve$GFAPI_PRIVATE_3.7.0
+_priv_glfs_process_upcall_event _glfs_process_upcall_event$GFAPI_PRIVATE_3.7.0
+
+_pub_glfs_h_lookupat _glfs_h_lookupat$GFAPI_3.7.4
+
+_pub_glfs_truncate _glfs_truncate$GFAPI_3.7.15
+
+_pub_glfs_free _glfs_free$GFAPI_3.7.16
+_pub_glfs_h_poll_upcall _glfs_h_poll_upcall$GFAPI_3.7.16
+_pub_glfs_upcall_get_fs _glfs_upcall_get_fs$GFAPI_3.7.16
+_pub_glfs_upcall_get_reason _glfs_upcall_get_reason$GFAPI_3.7.16
+_pub_glfs_upcall_get_event _glfs_upcall_get_event$GFAPI_3.7.16
+_pub_glfs_upcall_inode_get_object _glfs_upcall_inode_get_object$GFAPI_3.7.16
+_pub_glfs_upcall_inode_get_flags _glfs_upcall_inode_get_flags$GFAPI_3.7.16
+_pub_glfs_upcall_inode_get_stat _glfs_upcall_inode_get_stat$GFAPI_3.7.16
+_pub_glfs_upcall_inode_get_expire _glfs_upcall_inode_get_expire$GFAPI_3.7.16
+_pub_glfs_upcall_inode_get_pobject _glfs_upcall_inode_get_pobject$GFAPI_3.7.16
+_pub_glfs_upcall_inode_get_pstat _glfs_upcall_inode_get_pstat$GFAPI_3.7.16
+_pub_glfs_upcall_inode_get_oldpobject _glfs_upcall_inode_get_oldpobject$GFAPI_3.7.16
+_pub_glfs_upcall_inode_get_oldpstat _glfs_upcall_inode_get_oldpstat$GFAPI_3.7.16
+
+_pub_glfs_realpath _glfs_realpath$GFAPI_3.7.17
+
+_pub_glfs_sysrq _glfs_sysrq$GFAPI_3.10.0
+
+_pub_glfs_fd_set_lkowner _glfs_fd_set_lkowner$GFAPI_3.10.7
+
+_pub_glfs_xreaddirplus_r _glfs_xreaddirplus_r$GFAPI_3.11.0
+_pub_glfs_xreaddirplus_r_get_stat _glfs_xreaddirplus_r_get_stat$GFAPI_3.11.0
+_pub_glfs_xreaddirplus_r_get_object _glfs_xreaddirplus_r_get_object$GFAPI_3.11.0
+_pub_glfs_object_copy _glfs_object_copy$GFAPI_3.11.0
+
+_priv_glfs_ipc _glfs_ipc$GFAPI_3.12.0
+
+_pub_glfs_upcall_register _glfs_upcall_register$GFAPI_3.13.0
+_pub_glfs_upcall_unregister _glfs_upcall_unregister$GFAPI_3.13.0
+
+_pub_glfs_setfsleaseid _glfs_setfsleaseid$GFAPI_4.0.0
+_pub_glfs_file_lock _glfs_file_lock$GFAPI_4.0.0
+_pub_glfs_lease _glfs_lease$GFAPI_4.0.0
+_pub_glfs_h_lease _glfs_h_lease$GFAPI_4.0.0
+_pub_glfs_upcall_lease_get_object _glfs_upcall_lease_get_object$GFAPI_4.1.6
+_pub_glfs_upcall_lease_get_lease_type _glfs_upcall_lease_get_lease_type$GFAPI_4.1.6
+
+_priv_glfs_statx _glfs_statx$GFAPI_6.0
+_priv_glfs_iatt_from_statx _glfs_iatt_from_statx$GFAPI_6.0
+_priv_glfs_setfspid _glfs_setfspid$GFAPI_6.1
+
+_pub_glfs_read_async _glfs_read_async$GFAPI_6.0
+_pub_glfs_write_async _glfs_write_async$GFAPI_6.0
+_pub_glfs_readv_async _glfs_readv_async$GFAPI_6.0
+_pub_glfs_writev_async _glfs_writev_async$GFAPI_6.0
+_pub_glfs_pread _glfs_pread$GFAPI_6.0
+_pub_glfs_pwrite _glfs_pwrite$GFAPI_6.0
+_pub_glfs_pread_async _glfs_pread_async$GFAPI_6.0
+_pub_glfs_pwrite_async _glfs_pwrite_async$GFAPI_6.0
+_pub_glfs_preadv_async _glfs_preadv_async$GFAPI_6.0
+_pub_glfs_pwritev_async _glfs_pwritev_async$GFAPI_6.0
+_pub_glfs_fsync _glfs_fsync$GFAPI_6.0
+_pub_glfs_fsync_async _glfs_fsync_async$GFAPI_6.0
+_pub_glfs_fdatasync _glfs_fdatasync$GFAPI_6.0
+_pub_glfs_fdatasync_async _glfs_fdatasync_async$GFAPI_6.0
+_pub_glfs_ftruncate _glfs_ftruncate$GFAPI_6.0
+_pub_glfs_ftruncate_async _glfs_ftruncate_async$GFAPI_6.0
+_pub_glfs_discard_async _glfs_discard_async$GFAPI_6.0
+_pub_glfs_zerofill_async _glfs_zerofill_async$GFAPI_6.0
+_pub_glfs_copy_file_range _glfs_copy_file_range$GFAPI_6.0
+_pub_glfs_fsetattr _glfs_fsetattr$GFAPI_6.0
+_pub_glfs_setattr _glfs_setattr$GFAPI_6.0
+
+_pub_glfs_set_statedump_path _glfs_set_statedump_path@GFAPI_7.0
+
+_pub_glfs_h_creat_open _glfs_h_creat_open@GFAPI_6.6
diff --git a/api/src/gfapi.map b/api/src/gfapi.map
new file mode 100644
index 00000000000..228ac47c084
--- /dev/null
+++ b/api/src/gfapi.map
@@ -0,0 +1,283 @@
+
+GFAPI_PRIVATE_3.4.0 {
+ global:
+ glfs_loc_touchup;
+ glfs_active_subvol;
+ glfs_subvol_done;
+ glfs_init_done;
+ glfs_resolve_at;
+ local: *;
+};
+
+GFAPI_3.4.0 {
+ global:
+ glfs_new;
+ glfs_set_volfile;
+ glfs_set_volfile_server;
+ glfs_set_logging;
+ glfs_init;
+ glfs_fini;
+ glfs_open;
+ glfs_creat;
+ glfs_close;
+ glfs_from_glfd;
+ glfs_set_xlator_option;
+ glfs_read;
+ glfs_write;
+ glfs_read_async;
+ glfs_write_async;
+ glfs_readv;
+ glfs_writev;
+ glfs_readv_async;
+ glfs_writev_async;
+ glfs_pread;
+ glfs_pwrite;
+ glfs_pread_async;
+ glfs_pwrite_async;
+ glfs_preadv;
+ glfs_pwritev;
+ glfs_preadv_async;
+ glfs_pwritev_async;
+ glfs_lseek;
+ glfs_ftruncate;
+ glfs_ftruncate_async;
+ glfs_lstat;
+ glfs_stat;
+ glfs_fstat;
+ glfs_fsync;
+ glfs_fsync_async;
+ glfs_fdatasync;
+ glfs_fdatasync_async;
+ glfs_access;
+ glfs_symlink;
+ glfs_readlink;
+ glfs_mknod;
+ glfs_mkdir;
+ glfs_unlink;
+ glfs_rmdir;
+ glfs_rename;
+ glfs_link;
+ glfs_opendir;
+ glfs_readdir_r;
+ glfs_readdirplus_r;
+ glfs_telldir;
+ glfs_seekdir;
+ glfs_closedir;
+ glfs_statvfs;
+ glfs_chmod;
+ glfs_fchmod;
+ glfs_chown;
+ glfs_lchown;
+ glfs_fchown;
+ glfs_utimens;
+ glfs_lutimens;
+ glfs_futimens;
+ glfs_getxattr;
+ glfs_lgetxattr;
+ glfs_fgetxattr;
+ glfs_listxattr;
+ glfs_llistxattr;
+ glfs_flistxattr;
+ glfs_setxattr;
+ glfs_lsetxattr;
+ glfs_fsetxattr;
+ glfs_removexattr;
+ glfs_lremovexattr;
+ glfs_fremovexattr;
+ glfs_getcwd;
+ glfs_chdir;
+ glfs_fchdir;
+ glfs_realpath;
+ glfs_posix_lock;
+ glfs_dup;
+} GFAPI_PRIVATE_3.4.0;
+
+GFAPI_3.4.2 {
+ global:
+ glfs_setfsuid;
+ glfs_setfsgid;
+ glfs_setfsgroups;
+ glfs_h_creat;
+ glfs_h_mkdir;
+ glfs_h_mknod;
+ glfs_h_symlink;
+ glfs_h_unlink;
+ glfs_h_close;
+ glfs_h_truncate;
+ glfs_h_stat;
+ glfs_h_getattrs;
+ glfs_h_setattrs;
+ glfs_h_readlink;
+ glfs_h_link;
+ glfs_h_rename;
+ glfs_h_extract_handle;
+ glfs_h_create_from_handle;
+ glfs_h_opendir;
+ glfs_h_open;
+ glfs_h_lookupat;
+} GFAPI_3.4.0;
+
+GFAPI_3.5.0 {
+ global:
+ glfs_get_volumeid;
+ glfs_readdir;
+ glfs_readdirplus;
+ glfs_fallocate;
+ glfs_discard;
+ glfs_discard_async;
+ glfs_zerofill;
+ glfs_zerofill_async;
+ glfs_caller_specific_init;
+ glfs_h_setxattrs;
+} GFAPI_3.4.2;
+
+GFAPI_3.5.1 {
+ global:
+ glfs_unset_volfile_server;
+ glfs_h_getxattrs;
+ glfs_h_removexattrs;
+} GFAPI_3.5.0;
+
+GFAPI_3.6.0 {
+ global:
+ glfs_get_volfile;
+ glfs_h_access;
+} GFAPI_3.5.1;
+
+GFAPI_3.7.0 {
+ global:
+ glfs_h_poll_upcall;
+ glfs_h_acl_set;
+ glfs_h_acl_get;
+ glfs_h_statfs;
+ glfs_h_anonymous_read;
+ glfs_h_anonymous_write;
+} GFAPI_3.6.0;
+
+GFAPI_PRIVATE_3.7.0 {
+ global:
+ glfs_free_from_ctx;
+ glfs_new_from_ctx;
+ glfs_resolve;
+ glfs_process_upcall_event;
+} GFAPI_3.7.0;
+
+GFAPI_3.7.4 {
+ global:
+ glfs_h_lookupat;
+} GFAPI_PRIVATE_3.7.0;
+
+GFAPI_3.7.15 {
+ global:
+ glfs_truncate;
+} GFAPI_3.7.4;
+
+GFAPI_3.7.16 {
+ global:
+ glfs_free;
+ glfs_upcall_get_fs;
+ glfs_upcall_get_reason;
+ glfs_upcall_get_event;
+ glfs_upcall_inode_get_object;
+ glfs_upcall_inode_get_flags;
+ glfs_upcall_inode_get_stat;
+ glfs_upcall_inode_get_expire;
+ glfs_upcall_inode_get_pobject;
+ glfs_upcall_inode_get_pstat;
+ glfs_upcall_inode_get_oldpobject;
+ glfs_upcall_inode_get_oldpstat;
+ glfs_h_poll_upcall;
+} GFAPI_3.7.15;
+
+GFAPI_3.7.17 {
+ global:
+ glfs_realpath;
+} GFAPI_3.7.16;
+
+GFAPI_3.10.0 {
+ global:
+ glfs_sysrq;
+} GFAPI_3.7.17;
+
+GFAPI_3.10.7 {
+ global:
+ glfs_fd_set_lkowner;
+} GFAPI_3.10.0;
+
+GFAPI_3.11.0 {
+ glfs_xreaddirplus_r;
+ glfs_xreaddirplus_r_get_stat;
+ glfs_xreaddirplus_r_get_object;
+ glfs_object_copy;
+} GFAPI_3.10.7;
+
+GFAPI_PRIVATE_3.12.0 {
+ global:
+ glfs_ipc;
+} GFAPI_3.11.0;
+
+GFAPI_3.13.0 {
+ global:
+ glfs_upcall_register;
+ glfs_upcall_unregister;
+} GFAPI_PRIVATE_3.12.0;
+
+GFAPI_4.0.0 {
+ global:
+ glfs_setfsleaseid;
+ glfs_file_lock;
+ glfs_lease;
+ glfs_h_lease;
+} GFAPI_3.13.0;
+
+GFAPI_4.1.6 {
+ global:
+ glfs_upcall_lease_get_object;
+ glfs_upcall_lease_get_lease_type;
+} GFAPI_4.0.0;
+
+GFAPI_PRIVATE_6.0 {
+ global:
+ glfs_statx;
+ glfs_iatt_from_statx;
+} GFAPI_4.1.6;
+
+GFAPI_6.0 {
+ global:
+ glfs_read_async;
+ glfs_write_async;
+ glfs_readv_async;
+ glfs_writev_async;
+ glfs_pread;
+ glfs_pwrite;
+ glfs_pread_async;
+ glfs_pwrite_async;
+ glfs_preadv_async;
+ glfs_pwritev_async;
+ glfs_fsync;
+ glfs_fsync_async;
+ glfs_fdatasync;
+ glfs_fdatasync_async;
+ glfs_ftruncate;
+ glfs_ftruncate_async;
+ glfs_discard_async;
+ glfs_zerofill_async;
+ glfs_copy_file_range;
+ glfs_setattr;
+ glfs_fsetattr;
+} GFAPI_PRIVATE_6.0;
+
+GFAPI_PRIVATE_6.1 {
+ global:
+ glfs_setfspid;
+} GFAPI_6.0;
+
+GFAPI_6.6 {
+ global:
+ glfs_h_creat_open;
+} GFAPI_PRIVATE_6.1;
+
+GFAPI_7.0 {
+ global:
+ glfs_set_statedump_path;
+} GFAPI_6.6;
diff --git a/api/src/glfs-fops.c b/api/src/glfs-fops.c
new file mode 100644
index 00000000000..6aa3c5602d1
--- /dev/null
+++ b/api/src/glfs-fops.c
@@ -0,0 +1,6445 @@
+
+/*
+ Copyright (c) 2012-2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* for SEEK_HOLE and SEEK_DATA */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <unistd.h>
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+#include <glusterfs/syncop.h>
+#include "glfs.h"
+#include "gfapi-messages.h"
+#include <glusterfs/compat-errno.h>
+#include <limits.h>
+#include "glusterfs3.h"
+#include <glusterfs/iatt.h>
+
+#ifdef NAME_MAX
+#define GF_NAME_MAX NAME_MAX
+#else
+#define GF_NAME_MAX 255
+#endif
+
+struct upcall_syncop_args {
+ struct glfs *fs;
+ struct gf_upcall upcall_data;
+};
+
+#define READDIRBUF_SIZE (sizeof(struct dirent) + GF_NAME_MAX + 1)
+
+typedef void (*glfs_io_cbk34)(glfs_fd_t *fd, ssize_t ret, void *data);
+
+/*
+ * This function will mark glfd for deletion and decrement its refcount.
+ */
+int
+glfs_mark_glfd_for_deletion(struct glfs_fd *glfd)
+{
+ LOCK(&glfd->lock);
+ {
+ glfd->state = GLFD_CLOSE;
+ }
+ UNLOCK(&glfd->lock);
+
+ GF_REF_PUT(glfd);
+
+ return 0;
+}
+
+/* This function is useful for all async fops. There is chance that glfd is
+ * closed before async fop is completed. When glfd is closed we change the
+ * state to GLFD_CLOSE.
+ *
+ * This function will return _gf_true if the glfd is still valid else return
+ * _gf_false.
+ */
+gf_boolean_t
+glfs_is_glfd_still_valid(struct glfs_fd *glfd)
+{
+ gf_boolean_t ret = _gf_false;
+
+ LOCK(&glfd->lock);
+ {
+ if (glfd->state != GLFD_CLOSE)
+ ret = _gf_true;
+ }
+ UNLOCK(&glfd->lock);
+
+ return ret;
+}
+
+void
+glfd_set_state_bind(struct glfs_fd *glfd)
+{
+ LOCK(&glfd->lock);
+ {
+ glfd->state = GLFD_OPEN;
+ }
+ UNLOCK(&glfd->lock);
+
+ fd_bind(glfd->fd);
+ glfs_fd_bind(glfd);
+
+ return;
+}
+
+/*
+ * This routine is called when an upcall event of type
+ * 'GF_UPCALL_CACHE_INVALIDATION' is received.
+ * It makes a copy of the contents of the upcall cache-invalidation
+ * data received into an entry which is stored in the upcall list
+ * maintained by gfapi.
+ */
+int
+glfs_get_upcall_cache_invalidation(struct gf_upcall *to_up_data,
+ struct gf_upcall *from_up_data)
+{
+ struct gf_upcall_cache_invalidation *ca_data = NULL;
+ struct gf_upcall_cache_invalidation *f_ca_data = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, to_up_data, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, from_up_data, out);
+
+ f_ca_data = from_up_data->data;
+ GF_VALIDATE_OR_GOTO(THIS->name, f_ca_data, out);
+
+ ca_data = GF_CALLOC(1, sizeof(*ca_data), glfs_mt_upcall_entry_t);
+
+ if (!ca_data) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
+ goto out;
+ }
+
+ to_up_data->data = ca_data;
+
+ ca_data->flags = f_ca_data->flags;
+ ca_data->expire_time_attr = f_ca_data->expire_time_attr;
+ ca_data->stat = f_ca_data->stat;
+ ca_data->p_stat = f_ca_data->p_stat;
+ ca_data->oldp_stat = f_ca_data->oldp_stat;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glfs_get_upcall_lease(struct gf_upcall *to_up_data,
+ struct gf_upcall *from_up_data)
+{
+ struct gf_upcall_recall_lease *ca_data = NULL;
+ struct gf_upcall_recall_lease *f_ca_data = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, to_up_data, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, from_up_data, out);
+
+ f_ca_data = from_up_data->data;
+ GF_VALIDATE_OR_GOTO(THIS->name, f_ca_data, out);
+
+ ca_data = GF_CALLOC(1, sizeof(*ca_data), glfs_mt_upcall_entry_t);
+
+ if (!ca_data) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
+ goto out;
+ }
+
+ to_up_data->data = ca_data;
+
+ ca_data->lease_type = f_ca_data->lease_type;
+ gf_uuid_copy(ca_data->tid, f_ca_data->tid);
+ ca_data->dict = f_ca_data->dict;
+
+ ret = 0;
+out:
+ return ret;
+}
+int
+glfs_loc_link(loc_t *loc, struct iatt *iatt)
+{
+ int ret = -1;
+ inode_t *old_inode = NULL;
+ uint64_t ctx_value = LOOKUP_NOT_NEEDED;
+
+ if (!loc->inode) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ old_inode = loc->inode;
+
+ /* If the inode already exists in the cache, the inode
+ * returned here points to the existing one. We need
+ * to update loc.inode accordingly.
+ */
+ loc->inode = inode_link(loc->inode, loc->parent, loc->name, iatt);
+ if (loc->inode) {
+ inode_ctx_set(loc->inode, THIS, &ctx_value);
+ inode_lookup(loc->inode);
+ inode_unref(old_inode);
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+
+ return ret;
+}
+
+void
+glfs_iatt_to_stat(struct glfs *fs, struct iatt *iatt, struct stat *stat)
+{
+ iatt_to_stat(iatt, stat);
+ stat->st_dev = fs->dev_id;
+}
+
+void
+glfs_iatt_to_statx(struct glfs *fs, const struct iatt *iatt,
+ struct glfs_stat *statx)
+{
+ statx->glfs_st_mask = 0;
+
+ statx->glfs_st_mode = 0;
+ if (IATT_TYPE_VALID(iatt->ia_flags)) {
+ statx->glfs_st_mode |= st_mode_type_from_ia(iatt->ia_type);
+ statx->glfs_st_mask |= GLFS_STAT_TYPE;
+ }
+
+ if (IATT_MODE_VALID(iatt->ia_flags)) {
+ statx->glfs_st_mode |= st_mode_prot_from_ia(iatt->ia_prot);
+ statx->glfs_st_mask |= GLFS_STAT_MODE;
+ }
+
+ if (IATT_NLINK_VALID(iatt->ia_flags)) {
+ statx->glfs_st_nlink = iatt->ia_nlink;
+ statx->glfs_st_mask |= GLFS_STAT_NLINK;
+ }
+
+ if (IATT_UID_VALID(iatt->ia_flags)) {
+ statx->glfs_st_uid = iatt->ia_uid;
+ statx->glfs_st_mask |= GLFS_STAT_UID;
+ }
+
+ if (IATT_GID_VALID(iatt->ia_flags)) {
+ statx->glfs_st_gid = iatt->ia_gid;
+ statx->glfs_st_mask |= GLFS_STAT_GID;
+ }
+
+ if (IATT_ATIME_VALID(iatt->ia_flags)) {
+ statx->glfs_st_atime.tv_sec = iatt->ia_atime;
+ statx->glfs_st_atime.tv_nsec = iatt->ia_atime_nsec;
+ statx->glfs_st_mask |= GLFS_STAT_ATIME;
+ }
+
+ if (IATT_MTIME_VALID(iatt->ia_flags)) {
+ statx->glfs_st_mtime.tv_sec = iatt->ia_mtime;
+ statx->glfs_st_mtime.tv_nsec = iatt->ia_mtime_nsec;
+ statx->glfs_st_mask |= GLFS_STAT_MTIME;
+ }
+
+ if (IATT_CTIME_VALID(iatt->ia_flags)) {
+ statx->glfs_st_ctime.tv_sec = iatt->ia_ctime;
+ statx->glfs_st_ctime.tv_nsec = iatt->ia_ctime_nsec;
+ statx->glfs_st_mask |= GLFS_STAT_CTIME;
+ }
+
+ if (IATT_BTIME_VALID(iatt->ia_flags)) {
+ statx->glfs_st_btime.tv_sec = iatt->ia_btime;
+ statx->glfs_st_btime.tv_nsec = iatt->ia_btime_nsec;
+ statx->glfs_st_mask |= GLFS_STAT_BTIME;
+ }
+
+ if (IATT_INO_VALID(iatt->ia_flags)) {
+ statx->glfs_st_ino = iatt->ia_ino;
+ statx->glfs_st_mask |= GLFS_STAT_INO;
+ }
+
+ if (IATT_SIZE_VALID(iatt->ia_flags)) {
+ statx->glfs_st_size = iatt->ia_size;
+ statx->glfs_st_mask |= GLFS_STAT_SIZE;
+ }
+
+ if (IATT_BLOCKS_VALID(iatt->ia_flags)) {
+ statx->glfs_st_blocks = iatt->ia_blocks;
+ statx->glfs_st_mask |= GLFS_STAT_BLOCKS;
+ }
+
+ /* unconditionally present, encode as is */
+ statx->glfs_st_blksize = iatt->ia_blksize;
+ statx->glfs_st_rdev_major = ia_major(iatt->ia_rdev);
+ statx->glfs_st_rdev_minor = ia_minor(iatt->ia_rdev);
+ statx->glfs_st_dev_major = ia_major(fs->dev_id);
+ statx->glfs_st_dev_minor = ia_minor(fs->dev_id);
+
+ /* At present we do not read any localFS attributes and pass them along,
+ * so setting this to 0. As we start supporting file attributes we can
+ * populate the same here as well */
+ statx->glfs_st_attributes = 0;
+ statx->glfs_st_attributes_mask = 0;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_iatt_from_statx, 6.0)
+void
+priv_glfs_iatt_from_statx(struct iatt *iatt, const struct glfs_stat *statx)
+{
+ /* Most code in xlators are not checking validity flags before accessing
+ the items. Hence zero everything before setting valid items */
+ memset(iatt, 0, sizeof(struct iatt));
+
+ if (GLFS_STAT_TYPE_VALID(statx->glfs_st_mask)) {
+ iatt->ia_type = ia_type_from_st_mode(statx->glfs_st_mode);
+ iatt->ia_flags |= IATT_TYPE;
+ }
+
+ if (GLFS_STAT_MODE_VALID(statx->glfs_st_mask)) {
+ iatt->ia_prot = ia_prot_from_st_mode(statx->glfs_st_mode);
+ iatt->ia_flags |= IATT_MODE;
+ }
+
+ if (GLFS_STAT_NLINK_VALID(statx->glfs_st_mask)) {
+ iatt->ia_nlink = statx->glfs_st_nlink;
+ iatt->ia_flags |= IATT_NLINK;
+ }
+
+ if (GLFS_STAT_UID_VALID(statx->glfs_st_mask)) {
+ iatt->ia_uid = statx->glfs_st_uid;
+ iatt->ia_flags |= IATT_UID;
+ }
+
+ if (GLFS_STAT_GID_VALID(statx->glfs_st_mask)) {
+ iatt->ia_gid = statx->glfs_st_gid;
+ iatt->ia_flags |= IATT_GID;
+ }
+
+ if (GLFS_STAT_ATIME_VALID(statx->glfs_st_mask)) {
+ iatt->ia_atime = statx->glfs_st_atime.tv_sec;
+ iatt->ia_atime_nsec = statx->glfs_st_atime.tv_nsec;
+ iatt->ia_flags |= IATT_ATIME;
+ }
+
+ if (GLFS_STAT_MTIME_VALID(statx->glfs_st_mask)) {
+ iatt->ia_mtime = statx->glfs_st_mtime.tv_sec;
+ iatt->ia_mtime_nsec = statx->glfs_st_mtime.tv_nsec;
+ iatt->ia_flags |= IATT_MTIME;
+ }
+
+ if (GLFS_STAT_CTIME_VALID(statx->glfs_st_mask)) {
+ iatt->ia_ctime = statx->glfs_st_ctime.tv_sec;
+ iatt->ia_ctime_nsec = statx->glfs_st_ctime.tv_nsec;
+ iatt->ia_flags |= IATT_CTIME;
+ }
+
+ if (GLFS_STAT_BTIME_VALID(statx->glfs_st_mask)) {
+ iatt->ia_btime = statx->glfs_st_btime.tv_sec;
+ iatt->ia_btime_nsec = statx->glfs_st_btime.tv_nsec;
+ iatt->ia_flags |= IATT_BTIME;
+ }
+
+ if (GLFS_STAT_INO_VALID(statx->glfs_st_mask)) {
+ iatt->ia_ino = statx->glfs_st_ino;
+ iatt->ia_flags |= IATT_INO;
+ }
+
+ if (GLFS_STAT_SIZE_VALID(statx->glfs_st_mask)) {
+ iatt->ia_size = statx->glfs_st_size;
+ iatt->ia_flags |= IATT_SIZE;
+ }
+
+ if (GLFS_STAT_BLOCKS_VALID(statx->glfs_st_mask)) {
+ iatt->ia_blocks = statx->glfs_st_blocks;
+ iatt->ia_flags |= IATT_BLOCKS;
+ }
+
+ /* unconditionally present, encode as is */
+ iatt->ia_blksize = statx->glfs_st_blksize;
+ iatt->ia_rdev = makedev(statx->glfs_st_rdev_major,
+ statx->glfs_st_rdev_minor);
+ iatt->ia_dev = makedev(statx->glfs_st_dev_major, statx->glfs_st_dev_minor);
+ iatt->ia_attributes = statx->glfs_st_attributes;
+ iatt->ia_attributes_mask = statx->glfs_st_attributes_mask;
+}
+
+void
+glfsflags_from_gfapiflags(struct glfs_stat *stat, int *glvalid)
+{
+ *glvalid = 0;
+ if (stat->glfs_st_mask & GLFS_STAT_MODE) {
+ *glvalid |= GF_SET_ATTR_MODE;
+ }
+
+ if (stat->glfs_st_mask & GLFS_STAT_SIZE) {
+ *glvalid |= GF_SET_ATTR_SIZE;
+ }
+
+ if (stat->glfs_st_mask & GLFS_STAT_UID) {
+ *glvalid |= GF_SET_ATTR_UID;
+ }
+
+ if (stat->glfs_st_mask & GLFS_STAT_GID) {
+ *glvalid |= GF_SET_ATTR_GID;
+ }
+
+ if (stat->glfs_st_mask & GLFS_STAT_ATIME) {
+ *glvalid |= GF_SET_ATTR_ATIME;
+ }
+
+ if (stat->glfs_st_mask & GLFS_STAT_MTIME) {
+ *glvalid |= GF_SET_ATTR_MTIME;
+ }
+}
+
+int
+glfs_loc_unlink(loc_t *loc)
+{
+ inode_unlink(loc->inode, loc->parent, loc->name);
+
+ /* since glfs_h_* objects hold a reference to inode
+ * it is safe to keep lookup count to '0' */
+ if (!inode_has_dentry(loc->inode))
+ inode_forget(loc->inode, 0);
+
+ return 0;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_open, 3.4.0)
+struct glfs_fd *
+pub_glfs_open(struct glfs *fs, const char *path, int flags)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ glfd = glfs_fd_new(fs);
+ if (!glfd)
+ goto out;
+
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (IA_ISDIR(iatt.ia_type)) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ if (!IA_ISREG(iatt.ia_type)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (glfd->fd) {
+ /* Retry. Safe to touch glfd->fd as we
+ still have not glfs_fd_bind() yet.
+ */
+ fd_unref(glfd->fd);
+ glfd->fd = NULL;
+ }
+
+ glfd->fd = fd_create(loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ glfd->fd->flags = flags;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_open(subvol, &loc, flags, glfd->fd, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+out:
+ loc_wipe(&loc);
+
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ if (ret && glfd) {
+ GF_REF_PUT(glfd);
+ glfd = NULL;
+ } else if (glfd) {
+ glfd_set_state_bind(glfd);
+ }
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return glfd;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_close, 3.4.0)
+int
+pub_glfs_close(struct glfs_fd *glfd)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ fd_t *fd = NULL;
+ struct glfs *fs = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ if (glfd->lk_owner.len != 0) {
+ ret = syncopctx_setfslkowner(&glfd->lk_owner);
+ if (ret)
+ goto out;
+ }
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_flush(subvol, fd, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ fs = glfd->fs;
+
+ if (fd)
+ fd_unref(fd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_mark_glfd_for_deletion(glfd);
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lstat, 3.4.0)
+int
+pub_glfs_lstat(struct glfs *fs, const char *path, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0 && stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_stat, 3.4.0)
+int
+pub_glfs_stat(struct glfs *fs, const char *path, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0 && stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_statx, 6.0)
+int
+priv_glfs_statx(struct glfs *fs, const char *path, const unsigned int mask,
+ struct glfs_stat *statxbuf)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (path == NULL) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (mask & ~GLFS_STAT_ALL) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0 && statxbuf)
+ glfs_iatt_to_statx(fs, &iatt, statxbuf);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fstat, 3.4.0)
+int
+pub_glfs_fstat(struct glfs_fd *glfd, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ fd_t *fd = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fstat(subvol, fd, &iatt, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret == 0 && stat)
+ glfs_iatt_to_stat(glfd->fs, &iatt, stat);
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_creat, 3.4.0)
+struct glfs_fd *
+pub_glfs_creat(struct glfs *fs, const char *path, int flags, mode_t mode)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ glfd = glfs_fd_new(fs);
+ if (!glfd)
+ goto out;
+
+ /* This must be glfs_resolve() and NOT glfs_lresolve().
+ That is because open("name", O_CREAT) where "name"
+ is a danging symlink must create the dangling
+ destination.
+ */
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ if (loc.inode) {
+ if (flags & O_EXCL) {
+ ret = -1;
+ errno = EEXIST;
+ goto out;
+ }
+
+ if (IA_ISDIR(iatt.ia_type)) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ if (!IA_ISREG(iatt.ia_type)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+ }
+
+ if (ret == -1 && errno == ENOENT) {
+ loc.inode = inode_new(loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ if (glfd->fd) {
+ /* Retry. Safe to touch glfd->fd as we
+ still have not glfs_fd_bind() yet.
+ */
+ fd_unref(glfd->fd);
+ glfd->fd = NULL;
+ }
+
+ glfd->fd = fd_create(loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ glfd->fd->flags = flags;
+
+ if (get_fop_attr_thrd_key(&xattr_req))
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+ if (ret == 0) {
+ ret = syncop_open(subvol, &loc, flags, glfd->fd, xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ } else {
+ ret = syncop_create(subvol, &loc, flags, mode, glfd->fd, &iatt,
+ xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ }
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link(&loc, &iatt);
+out:
+ loc_wipe(&loc);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ if (ret && glfd) {
+ GF_REF_PUT(glfd);
+ glfd = NULL;
+ } else if (glfd) {
+ glfd_set_state_bind(glfd);
+ }
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return glfd;
+}
+
+#ifdef HAVE_SEEK_HOLE
+static int
+glfs_seek(struct glfs_fd *glfd, off_t offset, int whence)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ gf_seek_what_t what = 0;
+ off_t off = -1;
+
+ switch (whence) {
+ case SEEK_DATA:
+ what = GF_SEEK_DATA;
+ break;
+ case SEEK_HOLE:
+ what = GF_SEEK_HOLE;
+ break;
+ default:
+ /* other SEEK_* do not make sense, all operations get an offset
+ * and the position in the fd is not tracked */
+ errno = EINVAL;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto done;
+ }
+
+ ret = syncop_seek(subvol, fd, offset, what, NULL, &off);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret != -1)
+ glfd->offset = off;
+
+done:
+ if (fd)
+ fd_unref(fd);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+out:
+ return ret;
+}
+#endif
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lseek, 3.4.0)
+off_t
+pub_glfs_lseek(struct glfs_fd *glfd, off_t offset, int whence)
+{
+ struct stat sb = {
+ 0,
+ };
+ int ret = -1;
+ off_t off = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ switch (whence) {
+ case SEEK_SET:
+ glfd->offset = offset;
+ ret = 0;
+ break;
+ case SEEK_CUR:
+ glfd->offset += offset;
+ ret = 0;
+ break;
+ case SEEK_END:
+ ret = pub_glfs_fstat(glfd, &sb);
+ if (ret) {
+ /* seek cannot fail :O */
+ break;
+ }
+ glfd->offset = sb.st_size + offset;
+ break;
+#ifdef HAVE_SEEK_HOLE
+ case SEEK_DATA:
+ case SEEK_HOLE:
+ ret = glfs_seek(glfd, offset, whence);
+ break;
+#endif
+ default:
+ errno = EINVAL;
+ }
+
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ __GLFS_EXIT_FS;
+
+ if (ret != -1)
+ off = glfd->offset;
+
+ return off;
+
+invalid_fs:
+ return -1;
+}
+
+static ssize_t
+glfs_preadv_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags, struct glfs_stat *poststat)
+{
+ xlator_t *subvol = NULL;
+ ssize_t ret = -1;
+ ssize_t size = -1;
+ struct iovec *iov = NULL;
+ int cnt = 0;
+ struct iobref *iobref = NULL;
+ fd_t *fd = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ size = iov_length(iovec, iovcnt);
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_readv(subvol, fd, size, offset, 0, &iov, &cnt, &iobref, &iatt,
+ fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret >= 0 && poststat)
+ glfs_iatt_to_statx(glfd->fs, &iatt, poststat);
+
+ if (ret <= 0)
+ goto out;
+
+ size = iov_copy(iovec, iovcnt, iov, cnt); /* FIXME!!! */
+
+ glfd->offset = (offset + size);
+
+ ret = size;
+out:
+ if (iov)
+ GF_FREE(iov);
+ if (iobref)
+ iobref_unref(iobref);
+
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_preadv, 3.4.0)
+ssize_t
+pub_glfs_preadv(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags)
+{
+ return glfs_preadv_common(glfd, iovec, iovcnt, offset, flags, NULL);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_read, 3.4.0)
+ssize_t
+pub_glfs_read(struct glfs_fd *glfd, void *buf, size_t count, int flags)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = pub_glfs_preadv(glfd, &iov, 1, glfd->offset, flags);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_pread34, glfs_pread, 3.4.0)
+ssize_t
+pub_glfs_pread34(struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
+ int flags)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = pub_glfs_preadv(glfd, &iov, 1, offset, flags);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pread, 6.0)
+ssize_t
+pub_glfs_pread(struct glfs_fd *glfd, void *buf, size_t count, off_t offset,
+ int flags, struct glfs_stat *poststat)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv_common(glfd, &iov, 1, offset, flags, poststat);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readv, 3.4.0)
+ssize_t
+pub_glfs_readv(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags)
+{
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ ret = pub_glfs_preadv(glfd, iov, count, glfd->offset, flags);
+
+ return ret;
+}
+
+struct glfs_io {
+ struct glfs_fd *glfd;
+ int op;
+ off_t offset;
+ struct iovec *iov;
+ int count;
+ int flags;
+ gf_boolean_t oldcb;
+ union {
+ glfs_io_cbk34 fn34;
+ glfs_io_cbk fn;
+ };
+ void *data;
+};
+
+static int
+glfs_io_async_cbk(int op_ret, int op_errno, call_frame_t *frame, void *cookie,
+ struct iovec *iovec, int count, struct iatt *prebuf,
+ struct iatt *postbuf)
+{
+ struct glfs_io *gio = NULL;
+ xlator_t *subvol = NULL;
+ struct glfs *fs = NULL;
+ struct glfs_fd *glfd = NULL;
+ int ret = -1;
+ struct glfs_stat prestat = {}, *prestatp = NULL;
+ struct glfs_stat poststat = {}, *poststatp = NULL;
+
+ GF_VALIDATE_OR_GOTO("gfapi", frame, inval);
+ GF_VALIDATE_OR_GOTO("gfapi", cookie, inval);
+
+ gio = frame->local;
+ frame->local = NULL;
+ subvol = cookie;
+ glfd = gio->glfd;
+ fs = glfd->fs;
+
+ if (!glfs_is_glfd_still_valid(glfd))
+ goto err;
+
+ if (op_ret <= 0) {
+ goto out;
+ } else if (gio->op == GF_FOP_READ) {
+ if (!iovec) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_ret = iov_copy(gio->iov, gio->count, iovec, count);
+ glfd->offset = gio->offset + op_ret;
+ } else if (gio->op == GF_FOP_WRITE) {
+ glfd->offset = gio->offset + gio->iov->iov_len;
+ }
+
+out:
+ errno = op_errno;
+ if (gio->oldcb) {
+ gio->fn34(gio->glfd, op_ret, gio->data);
+ } else {
+ if (prebuf) {
+ prestatp = &prestat;
+ glfs_iatt_to_statx(fs, prebuf, prestatp);
+ }
+
+ if (postbuf) {
+ poststatp = &poststat;
+ glfs_iatt_to_statx(fs, postbuf, poststatp);
+ }
+
+ gio->fn(gio->glfd, op_ret, prestatp, poststatp, gio->data);
+ }
+err:
+ fd_unref(glfd->fd);
+ /* Since the async operation is complete
+ * release the ref taken during the start
+ * of async operation
+ */
+ GF_REF_PUT(glfd);
+
+ GF_FREE(gio->iov);
+ GF_FREE(gio);
+ STACK_DESTROY(frame->root);
+ glfs_subvol_done(fs, subvol);
+
+ ret = 0;
+inval:
+ return ret;
+}
+
+static int
+glfs_preadv_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iovec *iovec, int count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ glfs_io_async_cbk(op_ret, op_errno, frame, cookie, iovec, count, NULL,
+ stbuf);
+
+ return 0;
+}
+
+static int
+glfs_preadv_async_common(struct glfs_fd *glfd, const struct iovec *iovec,
+ int count, off_t offset, int flags, gf_boolean_t oldcb,
+ glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ xlator_t *subvol = NULL;
+ struct glfs *fs = NULL;
+ fd_t *fd = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ fs = glfd->fs;
+
+ frame = syncop_create_frame(THIS);
+ if (!frame) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio = GF_CALLOC(1, sizeof(*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->iov = iov_dup(iovec, count);
+ if (!gio->iov) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->op = GF_FOP_READ;
+ gio->glfd = glfd;
+ gio->count = count;
+ gio->offset = offset;
+ gio->flags = flags;
+ gio->oldcb = oldcb;
+ gio->fn = fn;
+ gio->data = data;
+
+ frame->local = gio;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ STACK_WIND_COOKIE(frame, glfs_preadv_async_cbk, subvol, subvol,
+ subvol->fops->readv, fd, iov_length(iovec, count), offset,
+ flags, fop_attr);
+
+out:
+ if (ret) {
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (gio) {
+ GF_FREE(gio->iov);
+ GF_FREE(gio);
+ }
+ if (frame) {
+ STACK_DESTROY(frame->root);
+ }
+ glfs_subvol_done(fs, subvol);
+ }
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ __GLFS_EXIT_FS;
+
+ return ret;
+
+invalid_fs:
+ return -1;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_preadv_async34, glfs_preadv_async, 3.4.0)
+int
+pub_glfs_preadv_async34(struct glfs_fd *glfd, const struct iovec *iovec,
+ int count, off_t offset, int flags, glfs_io_cbk34 fn,
+ void *data)
+{
+ return glfs_preadv_async_common(glfd, iovec, count, offset, flags, _gf_true,
+ (void *)fn, data);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_preadv_async, 6.0)
+int
+pub_glfs_preadv_async(struct glfs_fd *glfd, const struct iovec *iovec,
+ int count, off_t offset, int flags, glfs_io_cbk fn,
+ void *data)
+{
+ return glfs_preadv_async_common(glfd, iovec, count, offset, flags,
+ _gf_false, fn, data);
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_read_async34, glfs_read_async, 3.4.0)
+int
+pub_glfs_read_async34(struct glfs_fd *glfd, void *buf, size_t count, int flags,
+ glfs_io_cbk34 fn, void *data)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv_async_common(glfd, &iov, 1, glfd->offset, flags, _gf_true,
+ (void *)fn, data);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_read_async, 6.0)
+int
+pub_glfs_read_async(struct glfs_fd *glfd, void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv_async_common(glfd, &iov, 1, glfd->offset, flags,
+ _gf_false, fn, data);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_pread_async34, glfs_pread_async, 3.4.0)
+int
+pub_glfs_pread_async34(struct glfs_fd *glfd, void *buf, size_t count,
+ off_t offset, int flags, glfs_io_cbk34 fn, void *data)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv_async_common(glfd, &iov, 1, offset, flags, _gf_true,
+ (void *)fn, data);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pread_async, 6.0)
+int
+pub_glfs_pread_async(struct glfs_fd *glfd, void *buf, size_t count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ iov.iov_base = buf;
+ iov.iov_len = count;
+
+ ret = glfs_preadv_async_common(glfd, &iov, 1, offset, flags, _gf_false, fn,
+ data);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_readv_async34, glfs_readv_async, 3.4.0)
+int
+pub_glfs_readv_async34(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk34 fn, void *data)
+{
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ ret = glfs_preadv_async_common(glfd, iov, count, glfd->offset, flags,
+ _gf_true, (void *)fn, data);
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readv_async, 6.0)
+int
+pub_glfs_readv_async(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data)
+{
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ ret = glfs_preadv_async_common(glfd, iov, count, glfd->offset, flags,
+ _gf_false, fn, data);
+ return ret;
+}
+
+static ssize_t
+glfs_pwritev_common(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iovec iov = {
+ 0,
+ };
+ fd_t *fd = NULL;
+ struct iatt preiatt =
+ {
+ 0,
+ },
+ postiatt = {
+ 0,
+ };
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = iobuf_copy(subvol->ctx->iobuf_pool, iovec, iovcnt, &iobref, &iobuf,
+ &iov);
+ if (ret)
+ goto out;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_writev(subvol, fd, &iov, 1, offset, iobref, flags, &preiatt,
+ &postiatt, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret >= 0) {
+ if (prestat)
+ glfs_iatt_to_statx(glfd->fs, &preiatt, prestat);
+ if (poststat)
+ glfs_iatt_to_statx(glfd->fs, &postiatt, poststat);
+ }
+
+ if (ret <= 0)
+ goto out;
+
+ glfd->offset = (offset + iov.iov_len);
+out:
+ if (iobuf)
+ iobuf_unref(iobuf);
+ if (iobref)
+ iobref_unref(iobref);
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_copy_file_range, 6.0)
+ssize_t
+pub_glfs_copy_file_range(struct glfs_fd *glfd_in, off64_t *off_in,
+ struct glfs_fd *glfd_out, off64_t *off_out, size_t len,
+ unsigned int flags, struct glfs_stat *statbuf,
+ struct glfs_stat *prestat, struct glfs_stat *poststat)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ fd_t *fd_in = NULL;
+ fd_t *fd_out = NULL;
+ struct iatt preiatt =
+ {
+ 0,
+ },
+ iattbuf =
+ {
+ 0,
+ },
+ postiatt = {
+ 0,
+ };
+ dict_t *fop_attr = NULL;
+ off64_t pos_in;
+ off64_t pos_out;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd_in, invalid_fs);
+ __GLFS_ENTRY_VALIDATE_FD(glfd_out, invalid_fs);
+
+ GF_REF_GET(glfd_in);
+ GF_REF_GET(glfd_out);
+
+ if (glfd_in->fs != glfd_out->fs) {
+ ret = -1;
+ errno = EXDEV;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(glfd_in->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd_in = glfs_resolve_fd(glfd_in->fs, subvol, glfd_in);
+ if (!fd_in) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ fd_out = glfs_resolve_fd(glfd_out->fs, subvol, glfd_out);
+ if (!fd_out) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ /*
+ * This is based on how the vfs layer in the kernel handles
+ * copy_file_range call. Upon receiving it follows the
+ * below method to consider the offset.
+ * if (off_in != NULL)
+ * use the value off_in to perform the op
+ * else if off_in == NULL
+ * use the current file offset position to perform the op
+ *
+ * For gfapi, glfd->offset is used. For a freshly opened
+ * fd, the offset is set to 0.
+ */
+ if (off_in)
+ pos_in = *off_in;
+ else
+ pos_in = glfd_in->offset;
+
+ if (off_out)
+ pos_out = *off_out;
+ else
+ pos_out = glfd_out->offset;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_copy_file_range(subvol, fd_in, pos_in, fd_out, pos_out, len,
+ flags, &iattbuf, &preiatt, &postiatt, fop_attr,
+ NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret >= 0) {
+ pos_in += ret;
+ pos_out += ret;
+
+ if (off_in)
+ *off_in = pos_in;
+ if (off_out)
+ *off_out = pos_out;
+
+ if (statbuf)
+ glfs_iatt_to_statx(glfd_in->fs, &iattbuf, statbuf);
+ if (prestat)
+ glfs_iatt_to_statx(glfd_in->fs, &preiatt, prestat);
+ if (poststat)
+ glfs_iatt_to_statx(glfd_in->fs, &postiatt, poststat);
+ }
+
+ if (ret <= 0)
+ goto out;
+
+ /*
+ * If *off_in is NULL, then there is no offset info that can
+ * obtained from the input argument. Hence follow below method.
+ * If *off_in is NULL, then
+ * glfd->offset = offset + ret;
+ * else
+ * do nothing.
+ *
+ * According to the man page of copy_file_range, if off_in is
+ * NULL, then the offset of the source file is advanced by
+ * the return value of the fop. The same applies to off_out as
+ * well. Otherwise, if *off_in is not NULL, then the offset
+ * is not advanced by the filesystem. The entity which sends
+ * the copy_file_range call is supposed to advance the offset
+ * value in its buffer (pointed to by *off_in or *off_out)
+ * by the return value of copy_file_range.
+ */
+ if (!off_in)
+ glfd_in->offset += ret;
+
+ if (!off_out)
+ glfd_out->offset += ret;
+
+out:
+ if (fd_in)
+ fd_unref(fd_in);
+ if (fd_out)
+ fd_unref(fd_out);
+ if (glfd_in)
+ GF_REF_PUT(glfd_in);
+ if (glfd_out)
+ GF_REF_PUT(glfd_out);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd_in->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwritev, 3.4.0)
+ssize_t
+pub_glfs_pwritev(struct glfs_fd *glfd, const struct iovec *iovec, int iovcnt,
+ off_t offset, int flags)
+{
+ return glfs_pwritev_common(glfd, iovec, iovcnt, offset, flags, NULL, NULL);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_write, 3.4.0)
+ssize_t
+pub_glfs_write(struct glfs_fd *glfd, const void *buf, size_t count, int flags)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = pub_glfs_pwritev(glfd, &iov, 1, glfd->offset, flags);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_writev, 3.4.0)
+ssize_t
+pub_glfs_writev(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags)
+{
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ ret = pub_glfs_pwritev(glfd, iov, count, glfd->offset, flags);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_pwrite34, glfs_pwrite, 3.4.0)
+ssize_t
+pub_glfs_pwrite34(struct glfs_fd *glfd, const void *buf, size_t count,
+ off_t offset, int flags)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = pub_glfs_pwritev(glfd, &iov, 1, offset, flags);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwrite, 6.0)
+ssize_t
+pub_glfs_pwrite(struct glfs_fd *glfd, const void *buf, size_t count,
+ off_t offset, int flags, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev_common(glfd, &iov, 1, offset, flags, prestat, poststat);
+
+ return ret;
+}
+
+extern glfs_t *
+pub_glfs_from_glfd(glfs_fd_t *);
+
+static int
+glfs_pwritev_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ glfs_io_async_cbk(op_ret, op_errno, frame, cookie, NULL, 0, prebuf,
+ postbuf);
+
+ return 0;
+}
+
+static int
+glfs_pwritev_async_common(struct glfs_fd *glfd, const struct iovec *iovec,
+ int count, off_t offset, int flags,
+ gf_boolean_t oldcb, glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ /* Need to take explicit ref so that the fd
+ * is not destroyed before the fop is complete
+ */
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ gio = GF_CALLOC(1, sizeof(*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->op = GF_FOP_WRITE;
+ gio->glfd = glfd;
+ gio->offset = offset;
+ gio->flags = flags;
+ gio->oldcb = oldcb;
+ gio->fn = fn;
+ gio->data = data;
+ gio->count = 1;
+ gio->iov = GF_CALLOC(gio->count, sizeof(*(gio->iov)), gf_common_mt_iovec);
+ if (!gio->iov) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = iobuf_copy(subvol->ctx->iobuf_pool, iovec, count, &iobref, &iobuf,
+ gio->iov);
+ if (ret)
+ goto out;
+
+ frame = syncop_create_frame(THIS);
+ if (!frame) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ frame->local = gio;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ STACK_WIND_COOKIE(frame, glfs_pwritev_async_cbk, subvol, subvol,
+ subvol->fops->writev, fd, gio->iov, gio->count, offset,
+ flags, iobref, fop_attr);
+
+ ret = 0;
+out:
+ if (ret) {
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ GF_FREE(gio);
+ /*
+ * If there is any error condition check after the frame
+ * creation, we have to destroy the frame root.
+ */
+ glfs_subvol_done(glfd->fs, subvol);
+ }
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ if (iobuf)
+ iobuf_unref(iobuf);
+ if (iobref)
+ iobref_unref(iobref);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_pwritev_async34, glfs_pwritev_async, 3.4.0)
+int
+pub_glfs_pwritev_async34(struct glfs_fd *glfd, const struct iovec *iovec,
+ int count, off_t offset, int flags, glfs_io_cbk34 fn,
+ void *data)
+{
+ return glfs_pwritev_async_common(glfd, iovec, count, offset, flags,
+ _gf_true, (void *)fn, data);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwritev_async, 6.0)
+int
+pub_glfs_pwritev_async(struct glfs_fd *glfd, const struct iovec *iovec,
+ int count, off_t offset, int flags, glfs_io_cbk fn,
+ void *data)
+{
+ return glfs_pwritev_async_common(glfd, iovec, count, offset, flags,
+ _gf_false, fn, data);
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_write_async34, glfs_write_async, 3.4.0)
+int
+pub_glfs_write_async34(struct glfs_fd *glfd, const void *buf, size_t count,
+ int flags, glfs_io_cbk34 fn, void *data)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev_async_common(glfd, &iov, 1, glfd->offset, flags,
+ _gf_true, (void *)fn, data);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_write_async, 6.0)
+int
+pub_glfs_write_async(struct glfs_fd *glfd, const void *buf, size_t count,
+ int flags, glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev_async_common(glfd, &iov, 1, glfd->offset, flags,
+ _gf_false, fn, data);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_pwrite_async34, glfs_pwrite_async, 3.4.0)
+int
+pub_glfs_pwrite_async34(struct glfs_fd *glfd, const void *buf, int count,
+ off_t offset, int flags, glfs_io_cbk34 fn, void *data)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev_async_common(glfd, &iov, 1, offset, flags, _gf_true,
+ (void *)fn, data);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_pwrite_async, 6.0)
+int
+pub_glfs_pwrite_async(struct glfs_fd *glfd, const void *buf, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = glfs_pwritev_async_common(glfd, &iov, 1, offset, flags, _gf_false, fn,
+ data);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_writev_async34, glfs_writev_async, 3.4.0)
+int
+pub_glfs_writev_async34(struct glfs_fd *glfd, const struct iovec *iov,
+ int count, int flags, glfs_io_cbk34 fn, void *data)
+{
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ ret = glfs_pwritev_async_common(glfd, iov, count, glfd->offset, flags,
+ _gf_true, (void *)fn, data);
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_writev_async, 6.0)
+int
+pub_glfs_writev_async(struct glfs_fd *glfd, const struct iovec *iov, int count,
+ int flags, glfs_io_cbk fn, void *data)
+{
+ ssize_t ret = 0;
+
+ if (glfd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ ret = glfs_pwritev_async_common(glfd, iov, count, glfd->offset, flags,
+ _gf_false, fn, data);
+ return ret;
+}
+
+static int
+glfs_fsync_common(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ struct iatt preiatt =
+ {
+ 0,
+ },
+ postiatt = {
+ 0,
+ };
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_fsync(subvol, fd, 0, &preiatt, &postiatt, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret >= 0) {
+ if (prestat)
+ glfs_iatt_to_statx(glfd->fs, &preiatt, prestat);
+ if (poststat)
+ glfs_iatt_to_statx(glfd->fs, &postiatt, poststat);
+ }
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_fsync34, glfs_fsync, 3.4.0)
+int
+pub_glfs_fsync34(struct glfs_fd *glfd)
+{
+ return glfs_fsync_common(glfd, NULL, NULL);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsync, 6.0)
+int
+pub_glfs_fsync(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
+{
+ return glfs_fsync_common(glfd, prestat, poststat);
+}
+
+static int
+glfs_fsync_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ glfs_io_async_cbk(op_ret, op_errno, frame, cookie, NULL, 0, prebuf,
+ postbuf);
+
+ return 0;
+}
+
+static int
+glfs_fsync_async_common(struct glfs_fd *glfd, gf_boolean_t oldcb,
+ glfs_io_cbk fn, void *data, int dataonly)
+{
+ struct glfs_io *gio = NULL;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ /* Need to take explicit ref so that the fd
+ * is not destroyed before the fop is complete
+ */
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ frame = syncop_create_frame(THIS);
+ if (!frame) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio = GF_CALLOC(1, sizeof(*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ gio->op = GF_FOP_FSYNC;
+ gio->glfd = glfd;
+ gio->flags = dataonly;
+ gio->oldcb = oldcb;
+ gio->fn = fn;
+ gio->data = data;
+
+ frame->local = gio;
+
+ STACK_WIND_COOKIE(frame, glfs_fsync_async_cbk, subvol, subvol,
+ subvol->fops->fsync, fd, dataonly, NULL);
+
+out:
+ if (ret) {
+ if (fd)
+ fd_unref(fd);
+ GF_REF_PUT(glfd);
+ GF_FREE(gio);
+ if (frame)
+ STACK_DESTROY(frame->root);
+ glfs_subvol_done(glfd->fs, subvol);
+ }
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_fsync_async34, glfs_fsync_async, 3.4.0)
+int
+pub_glfs_fsync_async34(struct glfs_fd *glfd, glfs_io_cbk34 fn, void *data)
+{
+ int ret = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ ret = glfs_fsync_async_common(glfd, _gf_true, (void *)fn, data, 0);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsync_async, 6.0)
+int
+pub_glfs_fsync_async(struct glfs_fd *glfd, glfs_io_cbk fn, void *data)
+{
+ int ret = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ ret = glfs_fsync_async_common(glfd, _gf_false, fn, data, 0);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+static int
+glfs_fdatasync_common(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ struct iatt preiatt =
+ {
+ 0,
+ },
+ postiatt = {
+ 0,
+ };
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_fsync(subvol, fd, 1, &preiatt, &postiatt, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret >= 0) {
+ if (prestat)
+ glfs_iatt_to_statx(glfd->fs, &preiatt, prestat);
+ if (poststat)
+ glfs_iatt_to_statx(glfd->fs, &postiatt, poststat);
+ }
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_fdatasync34, glfs_fdatasync, 3.4.0)
+int
+pub_glfs_fdatasync34(struct glfs_fd *glfd)
+{
+ return glfs_fdatasync_common(glfd, NULL, NULL);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fdatasync, 6.0)
+int
+pub_glfs_fdatasync(struct glfs_fd *glfd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat)
+{
+ return glfs_fdatasync_common(glfd, prestat, poststat);
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_fdatasync_async34, glfs_fdatasync_async, 3.4.0)
+int
+pub_glfs_fdatasync_async34(struct glfs_fd *glfd, glfs_io_cbk34 fn, void *data)
+{
+ int ret = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ ret = glfs_fsync_async_common(glfd, _gf_true, (void *)fn, data, 1);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fdatasync_async, 6.0)
+int
+pub_glfs_fdatasync_async(struct glfs_fd *glfd, glfs_io_cbk fn, void *data)
+{
+ int ret = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ ret = glfs_fsync_async_common(glfd, _gf_false, fn, data, 1);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+static int
+glfs_ftruncate_common(struct glfs_fd *glfd, off_t offset,
+ struct glfs_stat *prestat, struct glfs_stat *poststat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ struct iatt preiatt =
+ {
+ 0,
+ },
+ postiatt = {
+ 0,
+ };
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_ftruncate(subvol, fd, offset, &preiatt, &postiatt, fop_attr,
+ NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret >= 0) {
+ if (prestat)
+ glfs_iatt_to_statx(glfd->fs, &preiatt, prestat);
+ if (poststat)
+ glfs_iatt_to_statx(glfd->fs, &postiatt, poststat);
+ }
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_ftruncate34, glfs_ftruncate, 3.4.0)
+int
+pub_glfs_ftruncate34(struct glfs_fd *glfd, off_t offset)
+{
+ return glfs_ftruncate_common(glfd, offset, NULL, NULL);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ftruncate, 6.0)
+int
+pub_glfs_ftruncate(struct glfs_fd *glfd, off_t offset,
+ struct glfs_stat *prestat, struct glfs_stat *poststat)
+{
+ return glfs_ftruncate_common(glfd, offset, prestat, poststat);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_truncate, 3.7.15)
+int
+pub_glfs_truncate(struct glfs *fs, const char *path, off_t length)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_truncate(subvol, &loc, length, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+static int
+glfs_ftruncate_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ glfs_io_async_cbk(op_ret, op_errno, frame, cookie, NULL, 0, prebuf,
+ postbuf);
+
+ return 0;
+}
+
+static int
+glfs_ftruncate_async_common(struct glfs_fd *glfd, off_t offset,
+ gf_boolean_t oldcb, glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ /* Need to take explicit ref so that the fd
+ * is not destroyed before the fop is complete
+ */
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ frame = syncop_create_frame(THIS);
+ if (!frame) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio = GF_CALLOC(1, sizeof(*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->op = GF_FOP_FTRUNCATE;
+ gio->glfd = glfd;
+ gio->offset = offset;
+ gio->oldcb = oldcb;
+ gio->fn = fn;
+ gio->data = data;
+
+ frame->local = gio;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ STACK_WIND_COOKIE(frame, glfs_ftruncate_async_cbk, subvol, subvol,
+ subvol->fops->ftruncate, fd, offset, fop_attr);
+
+ ret = 0;
+
+out:
+ if (ret) {
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ GF_FREE(gio);
+ if (frame)
+ STACK_DESTROY(frame->root);
+ glfs_subvol_done(glfd->fs, subvol);
+ }
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_ftruncate_async34, glfs_ftruncate_async, 3.4.0)
+int
+pub_glfs_ftruncate_async34(struct glfs_fd *glfd, off_t offset, glfs_io_cbk34 fn,
+ void *data)
+{
+ return glfs_ftruncate_async_common(glfd, offset, _gf_true, (void *)fn,
+ data);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_ftruncate_async, 6.0)
+int
+pub_glfs_ftruncate_async(struct glfs_fd *glfd, off_t offset, glfs_io_cbk fn,
+ void *data)
+{
+ return glfs_ftruncate_async_common(glfd, offset, _gf_false, fn, data);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_access, 3.4.0)
+int
+pub_glfs_access(struct glfs *fs, const char *path, int mode)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_access(subvol, &loc, mode, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_symlink, 3.4.0)
+int
+pub_glfs_symlink(struct glfs *fs, const char *data, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (loc.inode) {
+ errno = EEXIST;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ /* ret == -1 && errno == ENOENT */
+ loc.inode = inode_new(loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_symlink(subvol, &loc, data, &iatt, xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link(&loc, &iatt);
+out:
+ loc_wipe(&loc);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readlink, 3.4.0)
+int
+pub_glfs_readlink(struct glfs *fs, const char *path, char *buf, size_t bufsiz)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+ char *linkval = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (iatt.ia_type != IA_IFLNK) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = syncop_readlink(subvol, &loc, &linkval, bufsiz, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret > 0) {
+ memcpy(buf, linkval, ret);
+ GF_FREE(linkval);
+ }
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_mknod, 3.4.0)
+int
+pub_glfs_mknod(struct glfs *fs, const char *path, mode_t mode, dev_t dev)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (loc.inode) {
+ errno = EEXIST;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ /* ret == -1 && errno == ENOENT */
+ loc.inode = inode_new(loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_mknod(subvol, &loc, mode, dev, &iatt, xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link(&loc, &iatt);
+out:
+ loc_wipe(&loc);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_mkdir, 3.4.0)
+int
+pub_glfs_mkdir(struct glfs *fs, const char *path, mode_t mode)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (loc.inode) {
+ errno = EEXIST;
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == -1 && errno != ENOENT)
+ /* Any other type of error is fatal */
+ goto out;
+
+ if (ret == -1 && errno == ENOENT && !loc.parent)
+ /* The parent directory or an ancestor even
+ higher does not exist
+ */
+ goto out;
+
+ /* ret == -1 && errno == ENOENT */
+ loc.inode = inode_new(loc.parent->table);
+ if (!loc.inode) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_mkdir(subvol, &loc, mode, &iatt, xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_link(&loc, &iatt);
+out:
+ loc_wipe(&loc);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_unlink, 3.4.0)
+int
+pub_glfs_unlink(struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (iatt.ia_type == IA_IFDIR) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ /* TODO: Add leaseid */
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_unlink(&loc);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_rmdir, 3.4.0)
+int
+pub_glfs_rmdir(struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (iatt.ia_type != IA_IFDIR) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ ret = syncop_rmdir(subvol, &loc, 0, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret == 0)
+ ret = glfs_loc_unlink(&loc);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_rename, 3.4.0)
+int
+pub_glfs_rename(struct glfs *fs, const char *oldpath, const char *newpath)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t oldloc = {
+ 0,
+ };
+ loc_t newloc = {
+ 0,
+ };
+ struct iatt oldiatt = {
+ 0,
+ };
+ struct iatt newiatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, oldpath, &oldloc, &oldiatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &oldloc, retry);
+
+ if (ret)
+ goto out;
+retrynew:
+ ret = glfs_lresolve(fs, subvol, newpath, &newloc, &newiatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &newloc, retrynew);
+
+ if (ret && errno != ENOENT && newloc.parent)
+ goto out;
+
+ if (newiatt.ia_type != IA_INVAL) {
+ if ((oldiatt.ia_type == IA_IFDIR) != (newiatt.ia_type == IA_IFDIR)) {
+ /* Either both old and new must be dirs,
+ * or both must be non-dirs. Else, fail.
+ */
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+ }
+
+ /* TODO: - check if new or old is a prefix of the other, and fail EINVAL
+ * - Add leaseid */
+
+ ret = syncop_rename(subvol, &oldloc, &newloc, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret == -1 && errno == ESTALE) {
+ if (reval < DEFAULT_REVAL_COUNT) {
+ reval++;
+ loc_wipe(&oldloc);
+ loc_wipe(&newloc);
+ goto retry;
+ }
+ }
+
+ if (ret == 0) {
+ inode_rename(oldloc.parent->table, oldloc.parent, oldloc.name,
+ newloc.parent, newloc.name, oldloc.inode, &oldiatt);
+
+ if (newloc.inode && !inode_has_dentry(newloc.inode))
+ inode_forget(newloc.inode, 0);
+ }
+out:
+ loc_wipe(&oldloc);
+ loc_wipe(&newloc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_link, 3.4.0)
+int
+pub_glfs_link(struct glfs *fs, const char *oldpath, const char *newpath)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t oldloc = {
+ 0,
+ };
+ loc_t newloc = {
+ 0,
+ };
+ struct iatt oldiatt = {
+ 0,
+ };
+ struct iatt newiatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_lresolve(fs, subvol, oldpath, &oldloc, &oldiatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &oldloc, retry);
+
+ if (ret)
+ goto out;
+retrynew:
+ ret = glfs_lresolve(fs, subvol, newpath, &newloc, &newiatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &newloc, retrynew);
+
+ if (ret == 0) {
+ ret = -1;
+ errno = EEXIST;
+ goto out;
+ }
+
+ if (oldiatt.ia_type == IA_IFDIR) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ /* Filling the inode of the hard link to be same as that of the
+ original file
+ */
+ if (newloc.inode) {
+ inode_unref(newloc.inode);
+ newloc.inode = NULL;
+ }
+ newloc.inode = inode_ref(oldloc.inode);
+
+ ret = syncop_link(subvol, &oldloc, &newloc, &newiatt, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret == -1 && errno == ESTALE) {
+ loc_wipe(&oldloc);
+ loc_wipe(&newloc);
+ if (reval--)
+ goto retry;
+ }
+
+ if (ret == 0)
+ ret = glfs_loc_link(&newloc, &newiatt);
+out:
+ loc_wipe(&oldloc);
+ loc_wipe(&newloc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_opendir, 3.4.0)
+struct glfs_fd *
+pub_glfs_opendir(struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ glfd = glfs_fd_new(fs);
+ if (!glfd)
+ goto out;
+
+ INIT_LIST_HEAD(&glfd->entries);
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (!IA_ISDIR(iatt.ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ if (glfd->fd) {
+ /* Retry. Safe to touch glfd->fd as we
+ still have not glfs_fd_bind() yet.
+ */
+ fd_unref(glfd->fd);
+ glfd->fd = NULL;
+ }
+
+ glfd->fd = fd_create(loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_opendir(subvol, &loc, glfd->fd, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+out:
+ loc_wipe(&loc);
+
+ if (ret && glfd) {
+ GF_REF_PUT(glfd);
+ glfd = NULL;
+ } else if (glfd) {
+ glfd_set_state_bind(glfd);
+ }
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return glfd;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_closedir, 3.4.0)
+int
+pub_glfs_closedir(struct glfs_fd *glfd)
+{
+ int ret = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ gf_dirent_free(list_entry(&glfd->entries, gf_dirent_t, list));
+
+ glfs_mark_glfd_for_deletion(glfd);
+
+ __GLFS_EXIT_FS;
+
+ ret = 0;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_telldir, 3.4.0)
+long
+pub_glfs_telldir(struct glfs_fd *fd)
+{
+ if (fd == NULL) {
+ errno = EBADF;
+ return -1;
+ }
+
+ return fd->offset;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_seekdir, 3.4.0)
+void
+pub_glfs_seekdir(struct glfs_fd *fd, long offset)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+
+ if (fd == NULL) {
+ errno = EBADF;
+ return;
+ }
+
+ if (fd->offset == offset)
+ return;
+
+ fd->offset = offset;
+ fd->next = NULL;
+
+ list_for_each_entry_safe(entry, tmp, &fd->entries, list)
+ {
+ if (entry->d_off != offset)
+ continue;
+
+ if (&tmp->list != &fd->entries) {
+ /* found! */
+ fd->next = tmp;
+ return;
+ }
+ }
+ /* could not find entry at requested offset in the cache.
+ next readdir_r() will result in glfd_entry_refresh()
+ */
+}
+
+static int
+glfs_discard_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop_stbuf, struct iatt *postop_stbuf,
+ dict_t *xdata)
+{
+ glfs_io_async_cbk(op_ret, op_errno, frame, cookie, NULL, 0, preop_stbuf,
+ postop_stbuf);
+
+ return 0;
+}
+
+static int
+glfs_discard_async_common(struct glfs_fd *glfd, off_t offset, size_t len,
+ gf_boolean_t oldcb, glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ /* Need to take explicit ref so that the fd
+ * is not destroyed before the fop is complete
+ */
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ frame = syncop_create_frame(THIS);
+ if (!frame) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio = GF_CALLOC(1, sizeof(*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->op = GF_FOP_DISCARD;
+ gio->glfd = glfd;
+ gio->offset = offset;
+ gio->count = len;
+ gio->oldcb = oldcb;
+ gio->fn = fn;
+ gio->data = data;
+
+ frame->local = gio;
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ STACK_WIND_COOKIE(frame, glfs_discard_async_cbk, subvol, subvol,
+ subvol->fops->discard, fd, offset, len, fop_attr);
+
+ ret = 0;
+out:
+ if (ret) {
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ GF_FREE(gio);
+ if (frame)
+ STACK_DESTROY(frame->root);
+ glfs_subvol_done(glfd->fs, subvol);
+ }
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_discard_async35, glfs_discard_async, 3.5.0)
+int
+pub_glfs_discard_async35(struct glfs_fd *glfd, off_t offset, size_t len,
+ glfs_io_cbk34 fn, void *data)
+{
+ return glfs_discard_async_common(glfd, offset, len, _gf_true, (void *)fn,
+ data);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_discard_async, 6.0)
+int
+pub_glfs_discard_async(struct glfs_fd *glfd, off_t offset, size_t len,
+ glfs_io_cbk fn, void *data)
+{
+ return glfs_discard_async_common(glfd, offset, len, _gf_false, fn, data);
+}
+
+static int
+glfs_zerofill_async_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop_stbuf, struct iatt *postop_stbuf,
+ dict_t *xdata)
+{
+ glfs_io_async_cbk(op_ret, op_errno, frame, cookie, NULL, 0, preop_stbuf,
+ postop_stbuf);
+
+ return 0;
+}
+
+static int
+glfs_zerofill_async_common(struct glfs_fd *glfd, off_t offset, off_t len,
+ gf_boolean_t oldcb, glfs_io_cbk fn, void *data)
+{
+ struct glfs_io *gio = NULL;
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ /* Need to take explicit ref so that the fd
+ * is not destroyed before the fop is complete
+ */
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ frame = syncop_create_frame(THIS);
+ if (!frame) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio = GF_CALLOC(1, sizeof(*gio), glfs_mt_glfs_io_t);
+ if (!gio) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gio->op = GF_FOP_ZEROFILL;
+ gio->glfd = glfd;
+ gio->offset = offset;
+ gio->count = len;
+ gio->oldcb = oldcb;
+ gio->fn = fn;
+ gio->data = data;
+
+ frame->local = gio;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ STACK_WIND_COOKIE(frame, glfs_zerofill_async_cbk, subvol, subvol,
+ subvol->fops->zerofill, fd, offset, len, fop_attr);
+ ret = 0;
+out:
+ if (ret) {
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ GF_FREE(gio);
+ if (frame)
+ STACK_DESTROY(frame->root);
+ glfs_subvol_done(glfd->fs, subvol);
+ }
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_zerofill_async35, glfs_zerofill_async, 3.5.0)
+int
+pub_glfs_zerofill_async35(struct glfs_fd *glfd, off_t offset, off_t len,
+ glfs_io_cbk34 fn, void *data)
+{
+ return glfs_zerofill_async_common(glfd, offset, len, _gf_true, (void *)fn,
+ data);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_zerofill_async, 6.0)
+int
+pub_glfs_zerofill_async(struct glfs_fd *glfd, off_t offset, off_t len,
+ glfs_io_cbk fn, void *data)
+{
+ return glfs_zerofill_async_common(glfd, offset, len, _gf_false, fn, data);
+}
+
+void
+gf_dirent_to_dirent(gf_dirent_t *gf_dirent, struct dirent *dirent)
+{
+ dirent->d_ino = gf_dirent->d_ino;
+
+#ifdef _DIRENT_HAVE_D_OFF
+ dirent->d_off = gf_dirent->d_off;
+#endif
+
+#ifdef _DIRENT_HAVE_D_TYPE
+ dirent->d_type = gf_dirent->d_type;
+#endif
+
+#ifdef _DIRENT_HAVE_D_NAMLEN
+ dirent->d_namlen = strlen(gf_dirent->d_name);
+#endif
+
+ snprintf(dirent->d_name, NAME_MAX + 1, "%s", gf_dirent->d_name);
+}
+
+int
+glfd_entry_refresh(struct glfs_fd *glfd, int plus)
+{
+ xlator_t *subvol = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t old;
+ gf_dirent_t *entry = NULL;
+ int ret = -1;
+ fd_t *fd = NULL;
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ if (fd->inode->ia_type != IA_IFDIR) {
+ ret = -1;
+ errno = EBADF;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&entries.list);
+ INIT_LIST_HEAD(&old.list);
+
+ if (plus)
+ ret = syncop_readdirp(subvol, fd, 131072, glfd->offset, &entries, NULL,
+ NULL);
+ else
+ ret = syncop_readdir(subvol, fd, 131072, glfd->offset, &entries, NULL,
+ NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret >= 0) {
+ if (plus) {
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ if ((!entry->inode && (!IA_ISDIR(entry->d_stat.ia_type))) ||
+ ((entry->d_stat.ia_ctime == 0) &&
+ strcmp(entry->d_name, ".") &&
+ strcmp(entry->d_name, ".."))) {
+ /* entry->inode for directories will be
+ * always set to null to force a lookup
+ * on the dentry. Hence to not degrade
+ * readdir performance, we skip lookups
+ * for directory entries. Also we will have
+ * proper stat if directory present on
+ * hashed subvolume.
+ *
+ * In addition, if the stat is invalid, force
+ * lookup to fetch proper stat.
+ */
+ gf_fill_iatt_for_dirent(entry, fd->inode, subvol);
+ }
+ }
+
+ gf_link_inodes_from_dirent(THIS, fd->inode, &entries);
+ }
+
+ list_splice_init(&glfd->entries, &old.list);
+ list_splice_init(&entries.list, &glfd->entries);
+
+ /* spurious errno is dangerous for glfd_entry_next() */
+ errno = 0;
+ }
+
+ if (ret > 0)
+ glfd->next = list_entry(glfd->entries.next, gf_dirent_t, list);
+
+ gf_dirent_free(&old);
+out:
+ if (fd)
+ fd_unref(fd);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ return ret;
+}
+
+gf_dirent_t *
+glfd_entry_next(struct glfs_fd *glfd, int plus)
+{
+ gf_dirent_t *entry = NULL;
+ int ret = -1;
+
+ if (!glfd->offset || !glfd->next) {
+ ret = glfd_entry_refresh(glfd, plus);
+ if (ret < 0)
+ return NULL;
+ }
+
+ entry = glfd->next;
+ if (!entry)
+ return NULL;
+
+ if (&entry->next->list == &glfd->entries)
+ glfd->next = NULL;
+ else
+ glfd->next = entry->next;
+
+ glfd->offset = entry->d_off;
+
+ return entry;
+}
+
+struct dirent *
+glfs_readdirbuf_get(struct glfs_fd *glfd)
+{
+ struct dirent *buf = NULL;
+
+ LOCK(&glfd->fd->lock);
+ {
+ buf = glfd->readdirbuf;
+ if (buf) {
+ memset(buf, 0, READDIRBUF_SIZE);
+ goto unlock;
+ }
+
+ buf = GF_CALLOC(1, READDIRBUF_SIZE, glfs_mt_readdirbuf_t);
+ if (!buf) {
+ errno = ENOMEM;
+ goto unlock;
+ }
+
+ glfd->readdirbuf = buf;
+ }
+unlock:
+ UNLOCK(&glfd->fd->lock);
+
+ return buf;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdirplus_r, 3.4.0)
+int
+pub_glfs_readdirplus_r(struct glfs_fd *glfd, struct stat *stat,
+ struct dirent *ext, struct dirent **res)
+{
+ int ret = 0;
+ gf_dirent_t *entry = NULL;
+ struct dirent *buf = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ errno = 0;
+
+ if (ext)
+ buf = ext;
+ else
+ buf = glfs_readdirbuf_get(glfd);
+
+ if (!buf) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ entry = glfd_entry_next(glfd, !!stat);
+ if (errno)
+ ret = -1;
+
+ if (res) {
+ if (entry)
+ *res = buf;
+ else
+ *res = NULL;
+ }
+
+ if (entry) {
+ gf_dirent_to_dirent(entry, buf);
+ if (stat)
+ glfs_iatt_to_stat(glfd->fs, &entry->d_stat, stat);
+ }
+
+out:
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ __GLFS_EXIT_FS;
+
+ return ret;
+
+invalid_fs:
+ return -1;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdir_r, 3.4.0)
+int
+pub_glfs_readdir_r(struct glfs_fd *glfd, struct dirent *buf,
+ struct dirent **res)
+{
+ return pub_glfs_readdirplus_r(glfd, 0, buf, res);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdirplus, 3.5.0)
+struct dirent *
+pub_glfs_readdirplus(struct glfs_fd *glfd, struct stat *stat)
+{
+ struct dirent *res = NULL;
+ int ret = -1;
+
+ ret = pub_glfs_readdirplus_r(glfd, stat, NULL, &res);
+ if (ret)
+ return NULL;
+
+ return res;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_readdir, 3.5.0)
+struct dirent *
+pub_glfs_readdir(struct glfs_fd *glfd)
+{
+ return pub_glfs_readdirplus(glfd, NULL);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_statvfs, 3.4.0)
+int
+pub_glfs_statvfs(struct glfs *fs, const char *path, struct statvfs *buf)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_statfs(subvol, &loc, buf, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setattr, 6.0)
+int
+pub_glfs_setattr(struct glfs *fs, const char *path, struct glfs_stat *stat,
+ int follow)
+{
+ int ret = -1;
+ int glvalid;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt riatt = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ GF_VALIDATE_OR_GOTO("glfs_setattr", stat, out);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ if (follow)
+ ret = glfs_resolve(fs, subvol, path, &loc, &riatt, reval);
+ else
+ ret = glfs_lresolve(fs, subvol, path, &loc, &riatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ glfs_iatt_from_statx(&iatt, stat);
+ glfsflags_from_gfapiflags(stat, &glvalid);
+
+ /* TODO : Add leaseid */
+ ret = syncop_setattr(subvol, &loc, &iatt, glvalid, 0, 0, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsetattr, 6.0)
+int
+pub_glfs_fsetattr(struct glfs_fd *glfd, struct glfs_stat *stat)
+{
+ int ret = -1;
+ int glvalid;
+ struct iatt iatt = {
+ 0,
+ };
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ GF_VALIDATE_OR_GOTO("glfs_fsetattr", stat, out);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ glfs_iatt_from_statx(&iatt, stat);
+ glfsflags_from_gfapiflags(stat, &glvalid);
+
+ /* TODO : Add leaseid */
+ ret = syncop_fsetattr(subvol, fd, &iatt, glvalid, 0, 0, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chmod, 3.4.0)
+int
+pub_glfs_chmod(struct glfs *fs, const char *path, mode_t mode)
+{
+ int ret = -1;
+ struct glfs_stat stat = {
+ 0,
+ };
+
+ stat.glfs_st_mode = mode;
+ stat.glfs_st_mask = GLFS_STAT_MODE;
+
+ ret = glfs_setattr(fs, path, &stat, 1);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchmod, 3.4.0)
+int
+pub_glfs_fchmod(struct glfs_fd *glfd, mode_t mode)
+{
+ int ret = -1;
+ struct glfs_stat stat = {
+ 0,
+ };
+
+ stat.glfs_st_mode = mode;
+ stat.glfs_st_mask = GLFS_STAT_MODE;
+
+ ret = glfs_fsetattr(glfd, &stat);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chown, 3.4.0)
+int
+pub_glfs_chown(struct glfs *fs, const char *path, uid_t uid, gid_t gid)
+{
+ int ret = 0;
+ struct glfs_stat stat = {
+ 0,
+ };
+
+ if (uid != (uid_t)-1) {
+ stat.glfs_st_uid = uid;
+ stat.glfs_st_mask = GLFS_STAT_UID;
+ }
+
+ if (gid != (uid_t)-1) {
+ stat.glfs_st_gid = gid;
+ stat.glfs_st_mask = stat.glfs_st_mask | GLFS_STAT_GID;
+ }
+
+ if (stat.glfs_st_mask)
+ ret = glfs_setattr(fs, path, &stat, 1);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lchown, 3.4.0)
+int
+pub_glfs_lchown(struct glfs *fs, const char *path, uid_t uid, gid_t gid)
+{
+ int ret = 0;
+ struct glfs_stat stat = {
+ 0,
+ };
+
+ if (uid != (uid_t)-1) {
+ stat.glfs_st_uid = uid;
+ stat.glfs_st_mask = GLFS_STAT_UID;
+ }
+
+ if (gid != (uid_t)-1) {
+ stat.glfs_st_gid = gid;
+ stat.glfs_st_mask = stat.glfs_st_mask | GLFS_STAT_GID;
+ }
+
+ if (stat.glfs_st_mask)
+ ret = glfs_setattr(fs, path, &stat, 0);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchown, 3.4.0)
+int
+pub_glfs_fchown(struct glfs_fd *glfd, uid_t uid, gid_t gid)
+{
+ int ret = 0;
+ struct glfs_stat stat = {
+ 0,
+ };
+
+ if (uid != (uid_t)-1) {
+ stat.glfs_st_uid = uid;
+ stat.glfs_st_mask = GLFS_STAT_UID;
+ }
+
+ if (gid != (uid_t)-1) {
+ stat.glfs_st_gid = gid;
+ stat.glfs_st_mask = stat.glfs_st_mask | GLFS_STAT_GID;
+ }
+
+ if (stat.glfs_st_mask)
+ ret = glfs_fsetattr(glfd, &stat);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_utimens, 3.4.0)
+int
+pub_glfs_utimens(struct glfs *fs, const char *path,
+ const struct timespec times[2])
+{
+ int ret = -1;
+ struct glfs_stat stat = {
+ 0,
+ };
+
+ stat.glfs_st_atime = times[0];
+ stat.glfs_st_mtime = times[1];
+
+ stat.glfs_st_mask = GLFS_STAT_ATIME | GLFS_STAT_MTIME;
+
+ ret = glfs_setattr(fs, path, &stat, 1);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lutimens, 3.4.0)
+int
+pub_glfs_lutimens(struct glfs *fs, const char *path,
+ const struct timespec times[2])
+{
+ int ret = -1;
+ struct glfs_stat stat = {
+ 0,
+ };
+
+ stat.glfs_st_atime = times[0];
+ stat.glfs_st_mtime = times[1];
+
+ stat.glfs_st_mask = GLFS_STAT_ATIME | GLFS_STAT_MTIME;
+
+ ret = glfs_setattr(fs, path, &stat, 0);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_futimens, 3.4.0)
+int
+pub_glfs_futimens(struct glfs_fd *glfd, const struct timespec times[2])
+{
+ int ret = -1;
+ struct glfs_stat stat = {
+ 0,
+ };
+
+ stat.glfs_st_atime = times[0];
+ stat.glfs_st_mtime = times[1];
+
+ stat.glfs_st_mask = GLFS_STAT_ATIME | GLFS_STAT_MTIME;
+
+ ret = glfs_fsetattr(glfd, &stat);
+
+ return ret;
+}
+
+int
+glfs_getxattr_process(void *value, size_t size, dict_t *xattr, const char *name)
+{
+ data_t *data = NULL;
+ int ret = -1;
+
+ data = dict_get(xattr, (char *)name);
+ if (!data) {
+ errno = ENODATA;
+ ret = -1;
+ goto out;
+ }
+
+ ret = data->len;
+ if (!value || !size)
+ goto out;
+
+ if (size < ret) {
+ ret = -1;
+ errno = ERANGE;
+ goto out;
+ }
+
+ memcpy(value, data->data, ret);
+out:
+ return ret;
+}
+
+ssize_t
+glfs_getxattr_common(struct glfs *fs, const char *path, const char *name,
+ void *value, size_t size, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (!name || *name == '\0') {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (strlen(name) > GF_XATTR_NAME_MAX) {
+ ret = -1;
+ errno = ENAMETOOLONG;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+retry:
+ if (follow)
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_getxattr(subvol, &loc, &xattr, name, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = glfs_getxattr_process(value, size, xattr, name);
+out:
+ loc_wipe(&loc);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_getxattr, 3.4.0)
+ssize_t
+pub_glfs_getxattr(struct glfs *fs, const char *path, const char *name,
+ void *value, size_t size)
+{
+ return glfs_getxattr_common(fs, path, name, value, size, 1);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lgetxattr, 3.4.0)
+ssize_t
+pub_glfs_lgetxattr(struct glfs *fs, const char *path, const char *name,
+ void *value, size_t size)
+{
+ return glfs_getxattr_common(fs, path, name, value, size, 0);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fgetxattr, 3.4.0)
+ssize_t
+pub_glfs_fgetxattr(struct glfs_fd *glfd, const char *name, void *value,
+ size_t size)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ fd_t *fd = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ if (!name || *name == '\0') {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (strlen(name) > GF_XATTR_NAME_MAX) {
+ ret = -1;
+ errno = ENAMETOOLONG;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fgetxattr(subvol, fd, &xattr, name, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret)
+ goto out;
+
+ ret = glfs_getxattr_process(value, size, xattr, name);
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (xattr)
+ dict_unref(xattr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+int
+glfs_listxattr_process(void *value, size_t size, dict_t *xattr)
+{
+ int ret = -1;
+
+ if (!xattr)
+ goto out;
+
+ ret = dict_keys_join(NULL, 0, xattr, NULL);
+
+ if (!value || !size)
+ goto out;
+
+ if (size < ret) {
+ ret = -1;
+ errno = ERANGE;
+ } else {
+ dict_keys_join(value, size, xattr, NULL);
+ }
+
+out:
+ return ret;
+}
+
+ssize_t
+glfs_listxattr_common(struct glfs *fs, const char *path, void *value,
+ size_t size, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+retry:
+ if (follow)
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_getxattr(subvol, &loc, &xattr, NULL, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = glfs_listxattr_process(value, size, xattr);
+out:
+ loc_wipe(&loc);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_listxattr, 3.4.0)
+ssize_t
+pub_glfs_listxattr(struct glfs *fs, const char *path, void *value, size_t size)
+{
+ return glfs_listxattr_common(fs, path, value, size, 1);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_llistxattr, 3.4.0)
+ssize_t
+pub_glfs_llistxattr(struct glfs *fs, const char *path, void *value, size_t size)
+{
+ return glfs_listxattr_common(fs, path, value, size, 0);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_flistxattr, 3.4.0)
+ssize_t
+pub_glfs_flistxattr(struct glfs_fd *glfd, void *value, size_t size)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ fd_t *fd = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fgetxattr(subvol, fd, &xattr, NULL, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret)
+ goto out;
+
+ ret = glfs_listxattr_process(value, size, xattr);
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (xattr)
+ dict_unref(xattr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+int
+glfs_setxattr_common(struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags, int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ int reval = 0;
+ void *value_cp = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (!name || *name == '\0') {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (strlen(name) > GF_XATTR_NAME_MAX) {
+ ret = -1;
+ errno = ENAMETOOLONG;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+retry:
+ if (follow)
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ value_cp = gf_memdup(value, size);
+ GF_CHECK_ALLOC_AND_LOG(subvol->name, value_cp, ret,
+ "Failed to"
+ " duplicate setxattr value",
+ out);
+
+ xattr = dict_for_key_value(name, value_cp, size, _gf_false);
+ if (!xattr) {
+ GF_FREE(value_cp);
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_setxattr(subvol, &loc, xattr, flags, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+out:
+ loc_wipe(&loc);
+ if (xattr)
+ dict_unref(xattr);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setxattr, 3.4.0)
+int
+pub_glfs_setxattr(struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return glfs_setxattr_common(fs, path, name, value, size, flags, 1);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lsetxattr, 3.4.0)
+int
+pub_glfs_lsetxattr(struct glfs *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags)
+{
+ return glfs_setxattr_common(fs, path, name, value, size, flags, 0);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fsetxattr, 3.4.0)
+int
+pub_glfs_fsetxattr(struct glfs_fd *glfd, const char *name, const void *value,
+ size_t size, int flags)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ fd_t *fd = NULL;
+ void *value_cp = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ if (!name || *name == '\0') {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (strlen(name) > GF_XATTR_NAME_MAX) {
+ ret = -1;
+ errno = ENAMETOOLONG;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ value_cp = gf_memdup(value, size);
+ GF_CHECK_ALLOC_AND_LOG(subvol->name, value_cp, ret,
+ "Failed to"
+ " duplicate setxattr value",
+ out);
+
+ xattr = dict_for_key_value(name, value_cp, size, _gf_false);
+ if (!xattr) {
+ GF_FREE(value_cp);
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_fsetxattr(subvol, fd, xattr, flags, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ if (xattr)
+ dict_unref(xattr);
+
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+int
+glfs_removexattr_common(struct glfs *fs, const char *path, const char *name,
+ int follow)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ if (follow)
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+ else
+ ret = glfs_lresolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ ret = syncop_removexattr(subvol, &loc, name, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_removexattr, 3.4.0)
+int
+pub_glfs_removexattr(struct glfs *fs, const char *path, const char *name)
+{
+ return glfs_removexattr_common(fs, path, name, 1);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lremovexattr, 3.4.0)
+int
+pub_glfs_lremovexattr(struct glfs *fs, const char *path, const char *name)
+{
+ return glfs_removexattr_common(fs, path, name, 0);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fremovexattr, 3.4.0)
+int
+pub_glfs_fremovexattr(struct glfs_fd *glfd, const char *name)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = syncop_fremovexattr(subvol, fd, name, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fallocate, 3.5.0)
+int
+pub_glfs_fallocate(struct glfs_fd *glfd, int keep_size, off_t offset,
+ size_t len)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_fallocate(subvol, fd, keep_size, offset, len, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_discard, 3.5.0)
+int
+pub_glfs_discard(struct glfs_fd *glfd, off_t offset, size_t len)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_discard(subvol, fd, offset, len, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_zerofill, 3.5.0)
+int
+pub_glfs_zerofill(struct glfs_fd *glfd, off_t offset, off_t len)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ dict_t *fop_attr = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_zerofill(subvol, fd, offset, len, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_chdir, 3.4.0)
+int
+pub_glfs_chdir(struct glfs *fs, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (!IA_ISDIR(iatt.ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ glfs_cwd_set(fs, loc.inode);
+
+out:
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fchdir, 3.4.0)
+int
+pub_glfs_fchdir(struct glfs_fd *glfd)
+{
+ int ret = -1;
+ inode_t *inode = NULL;
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ inode = fd->inode;
+
+ if (!IA_ISDIR(inode->ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ glfs_cwd_set(glfd->fs, inode);
+ ret = 0;
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+static gf_boolean_t warn_realpath = _gf_true; /* log once */
+
+static char *
+glfs_realpath_common(struct glfs *fs, const char *path, char *resolved_path,
+ gf_boolean_t warn_deprecated)
+{
+ int ret = -1;
+ char *retpath = NULL;
+ char *allocpath = NULL;
+ xlator_t *subvol = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int reval = 0;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (resolved_path)
+ retpath = resolved_path;
+ else if (warn_deprecated) {
+ retpath = allocpath = malloc(PATH_MAX + 1);
+ if (warn_realpath) {
+ warn_realpath = _gf_false;
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "this application "
+ "is compiled against an old version of "
+ "libgfapi, it should use glfs_free() to "
+ "release the path returned by "
+ "glfs_realpath()");
+ }
+ } else {
+ retpath = allocpath = GLFS_CALLOC(1, PATH_MAX + 1, NULL,
+ glfs_mt_realpath_t);
+ }
+
+ if (!retpath) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+retry:
+ ret = glfs_resolve(fs, subvol, path, &loc, &iatt, reval);
+
+ ESTALE_RETRY(ret, errno, reval, &loc, retry);
+
+ if (ret)
+ goto out;
+
+ if (loc.path) {
+ snprintf(retpath, PATH_MAX + 1, "%s", loc.path);
+ }
+
+out:
+ loc_wipe(&loc);
+
+ if (ret == -1) {
+ if (warn_deprecated && allocpath)
+ free(allocpath);
+ else if (allocpath)
+ GLFS_FREE(allocpath);
+ retpath = NULL;
+ }
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return retpath;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_realpath34, glfs_realpath, 3.4.0)
+char *
+pub_glfs_realpath34(struct glfs *fs, const char *path, char *resolved_path)
+{
+ return glfs_realpath_common(fs, path, resolved_path, _gf_true);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_realpath, 3.7.17)
+char *
+pub_glfs_realpath(struct glfs *fs, const char *path, char *resolved_path)
+{
+ return glfs_realpath_common(fs, path, resolved_path, _gf_false);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_getcwd, 3.4.0)
+char *
+pub_glfs_getcwd(struct glfs *fs, char *buf, size_t n)
+{
+ int ret = -1;
+ inode_t *inode = NULL;
+ char *path = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (!buf || n < 2) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ inode = glfs_cwd_get(fs);
+
+ if (!inode) {
+ strncpy(buf, "/", n);
+ ret = 0;
+ goto out;
+ }
+
+ ret = inode_path(inode, 0, &path);
+ if (n <= ret) {
+ ret = -1;
+ errno = ERANGE;
+ goto out;
+ }
+
+ strncpy(buf, path, n);
+ ret = 0;
+out:
+ GF_FREE(path);
+
+ if (inode)
+ inode_unref(inode);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ if (ret < 0)
+ return NULL;
+
+ return buf;
+}
+
+static void
+gf_flock_to_flock(struct gf_flock *gf_flock, struct flock *flock)
+{
+ flock->l_type = gf_flock->l_type;
+ flock->l_whence = gf_flock->l_whence;
+ flock->l_start = gf_flock->l_start;
+ flock->l_len = gf_flock->l_len;
+ flock->l_pid = gf_flock->l_pid;
+}
+
+static void
+gf_flock_from_flock(struct gf_flock *gf_flock, struct flock *flock)
+{
+ gf_flock->l_type = flock->l_type;
+ gf_flock->l_whence = flock->l_whence;
+ gf_flock->l_start = flock->l_start;
+ gf_flock->l_len = flock->l_len;
+ gf_flock->l_pid = flock->l_pid;
+}
+
+static int
+glfs_lock_common(struct glfs_fd *glfd, int cmd, struct flock *flock,
+ dict_t *xdata)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ struct gf_flock gf_flock = {
+ 0,
+ };
+ struct gf_flock saved_flock = {
+ 0,
+ };
+ fd_t *fd = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ if (!flock) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ GF_REF_GET(glfd);
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ /* Generate glusterfs flock structure from client flock
+ * structure to be processed by server */
+ gf_flock_from_flock(&gf_flock, flock);
+
+ /* Keep another copy of flock for split/merge of locks
+ * at client side */
+ gf_flock_from_flock(&saved_flock, flock);
+
+ if (glfd->lk_owner.len != 0) {
+ ret = syncopctx_setfslkowner(&glfd->lk_owner);
+
+ if (ret)
+ goto out;
+ }
+
+ ret = get_fop_attr_thrd_key(&xdata);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_lk(subvol, fd, cmd, &gf_flock, xdata, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* Convert back from gf_flock to flock as expected by application */
+ gf_flock_to_flock(&gf_flock, flock);
+
+ if (ret == 0 && (cmd == F_SETLK || cmd == F_SETLKW)) {
+ ret = fd_lk_insert_and_merge(fd, cmd, &saved_flock);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ API_MSG_LOCK_INSERT_MERGE_FAILED, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
+ ret = 0;
+ }
+ }
+
+out:
+ if (fd)
+ fd_unref(fd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_file_lock, 4.0.0)
+int
+pub_glfs_file_lock(struct glfs_fd *glfd, int cmd, struct flock *flock,
+ glfs_lock_mode_t lk_mode)
+{
+ int ret = -1;
+ dict_t *xdata_in = NULL;
+
+ if (lk_mode == GLFS_LK_MANDATORY) {
+ /* Create a new dictionary */
+ xdata_in = dict_new();
+ if (xdata_in == NULL) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /* Set GF_LK_MANDATORY internally within dictionary to map
+ * GLFS_LK_MANDATORY */
+ ret = dict_set_uint32(xdata_in, GF_LOCK_MODE, GF_LK_MANDATORY);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ API_MSG_SETTING_LOCK_TYPE_FAILED, NULL);
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ ret = glfs_lock_common(glfd, cmd, flock, xdata_in);
+out:
+ if (xdata_in)
+ dict_unref(xdata_in);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_posix_lock, 3.4.0)
+int
+pub_glfs_posix_lock(struct glfs_fd *glfd, int cmd, struct flock *flock)
+{
+ return glfs_lock_common(glfd, cmd, flock, NULL);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fd_set_lkowner, 3.10.7)
+int
+pub_glfs_fd_set_lkowner(struct glfs_fd *glfd, void *data, int len)
+{
+ int ret = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ if (!GF_REF_GET(glfd)) {
+ goto invalid_fs;
+ }
+
+ GF_VALIDATE_OR_GOTO(THIS->name, data, out);
+
+ if ((len <= 0) || (len > GFAPI_MAX_LOCK_OWNER_LEN)) {
+ errno = EINVAL;
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ARG,
+ "lk_owner len=%d", len, NULL);
+ goto out;
+ }
+
+ glfd->lk_owner.len = len;
+
+ memcpy(glfd->lk_owner.data, data, len);
+
+ ret = 0;
+out:
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_dup, 3.4.0)
+struct glfs_fd *
+pub_glfs_dup(struct glfs_fd *glfd)
+{
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ struct glfs_fd *dupfd = NULL;
+ struct glfs *fs = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ fs = glfd->fs;
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(fs, subvol, glfd);
+ if (!fd) {
+ errno = EBADFD;
+ goto out;
+ }
+
+ dupfd = glfs_fd_new(fs);
+ if (!dupfd) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ dupfd->fd = fd_ref(fd);
+ dupfd->state = glfd->state;
+out:
+ if (fd)
+ fd_unref(fd);
+ if (dupfd)
+ glfs_fd_bind(dupfd);
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return dupfd;
+}
+
+static void
+glfs_enqueue_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data)
+{
+ int ret = -1;
+ upcall_entry *u_list = NULL;
+
+ if (!fs || !upcall_data)
+ goto out;
+
+ u_list = GF_CALLOC(1, sizeof(*u_list), glfs_mt_upcall_entry_t);
+
+ if (!u_list) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&u_list->upcall_list);
+
+ gf_uuid_copy(u_list->upcall_data.gfid, upcall_data->gfid);
+ u_list->upcall_data.event_type = upcall_data->event_type;
+
+ switch (upcall_data->event_type) {
+ case GF_UPCALL_CACHE_INVALIDATION:
+ ret = glfs_get_upcall_cache_invalidation(&u_list->upcall_data,
+ upcall_data);
+ break;
+ case GF_UPCALL_RECALL_LEASE:
+ ret = glfs_get_upcall_lease(&u_list->upcall_data, upcall_data);
+ break;
+ default:
+ break;
+ }
+
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ENTRY, NULL);
+ goto out;
+ }
+
+ pthread_mutex_lock(&fs->upcall_list_mutex);
+ {
+ list_add_tail(&u_list->upcall_list, &fs->upcall_list);
+ }
+ pthread_mutex_unlock(&fs->upcall_list_mutex);
+
+ ret = 0;
+
+out:
+ if (ret && u_list) {
+ GF_FREE(u_list->upcall_data.data);
+ GF_FREE(u_list);
+ }
+}
+
+static void
+glfs_free_upcall_lease(void *to_free)
+{
+ struct glfs_upcall_lease *arg = to_free;
+
+ if (!arg)
+ return;
+
+ if (arg->object)
+ glfs_h_close(arg->object);
+
+ GF_FREE(arg);
+}
+
+int
+glfs_recall_lease_fd(struct glfs *fs, struct gf_upcall *up_data)
+{
+ struct gf_upcall_recall_lease *recall_lease = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ inode_t *inode = NULL;
+ struct glfs_fd *glfd = NULL;
+ struct glfs_fd *tmp = NULL;
+ struct list_head glfd_list;
+ fd_t *fd = NULL;
+ uint64_t value = 0;
+ struct glfs_lease lease = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("gfapi", up_data, out);
+ GF_VALIDATE_OR_GOTO("gfapi", fs, out);
+
+ recall_lease = up_data->data;
+ GF_VALIDATE_OR_GOTO("gfapi", recall_lease, out);
+
+ INIT_LIST_HEAD(&glfd_list);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ gf_msg_debug(THIS->name, 0, "Recall lease received for gfid:%s",
+ uuid_utoa(up_data->gfid));
+
+ inode = inode_find(subvol->itable, up_data->gfid);
+ if (!inode) {
+ ret = -1;
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INODE_FIND_FAILED,
+ "gfid=%s", uuid_utoa(up_data->gfid), "graph_id=%d",
+ subvol->graph->id, NULL);
+ goto out;
+ }
+
+ LOCK(&inode->lock);
+ {
+ list_for_each_entry(fd, &inode->fd_list, inode_list)
+ {
+ ret = fd_ctx_get(fd, subvol, &value);
+ glfd = (struct glfs_fd *)(uintptr_t)value;
+ if (glfd) {
+ gf_msg_trace(THIS->name, 0, "glfd (%p) has held lease", glfd);
+ GF_REF_GET(glfd);
+ list_add_tail(&glfd->list, &glfd_list);
+ }
+ }
+ }
+ UNLOCK(&inode->lock);
+
+ if (!list_empty(&glfd_list)) {
+ list_for_each_entry_safe(glfd, tmp, &glfd_list, list)
+ {
+ LOCK(&glfd->lock);
+ {
+ if (glfd->state != GLFD_CLOSE) {
+ gf_msg_trace(THIS->name, 0,
+ "glfd (%p) has held lease, "
+ "calling recall cbk",
+ glfd);
+ glfd->cbk(lease, glfd->cookie);
+ }
+ }
+ UNLOCK(&glfd->lock);
+
+ list_del_init(&glfd->list);
+ GF_REF_PUT(glfd);
+ }
+ }
+
+out:
+ return ret;
+}
+
+static int
+glfs_recall_lease_upcall(struct glfs *fs, struct glfs_upcall *up_arg,
+ struct gf_upcall *up_data)
+{
+ struct gf_upcall_recall_lease *recall_lease = NULL;
+ struct glfs_object *object = NULL;
+ xlator_t *subvol = NULL;
+ int ret = -1;
+ struct glfs_upcall_lease *up_lease_arg = NULL;
+
+ GF_VALIDATE_OR_GOTO("gfapi", up_data, out);
+ GF_VALIDATE_OR_GOTO("gfapi", fs, out);
+
+ recall_lease = up_data->data;
+ GF_VALIDATE_OR_GOTO("gfapi", recall_lease, out);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ gf_msg_debug(THIS->name, 0, "Recall lease received for gfid:%s",
+ uuid_utoa(up_data->gfid));
+
+ object = glfs_h_find_handle(fs, up_data->gfid, GFAPI_HANDLE_LENGTH);
+ if (!object) {
+ /* The reason handle creation will fail is because we
+ * couldn't find the inode in the gfapi inode table.
+ *
+ * But since application would have taken inode_ref, the
+ * only case when this can happen is when it has closed
+ * the handle and hence will no more be interested in
+ * the upcall for this particular gfid.
+ */
+ gf_smsg(THIS->name, GF_LOG_DEBUG, errno, API_MSG_CREATE_HANDLE_FAILED,
+ "gfid=%s", uuid_utoa(up_data->gfid), NULL);
+ errno = ESTALE;
+ goto out;
+ }
+
+ up_lease_arg = GF_CALLOC(1, sizeof(struct glfs_upcall_lease),
+ glfs_mt_upcall_inode_t);
+ up_lease_arg->object = object;
+
+ GF_VALIDATE_OR_GOTO("glfs_recall_lease", up_lease_arg, out);
+
+ up_lease_arg->lease_type = recall_lease->lease_type;
+
+ up_arg->reason = GLFS_UPCALL_RECALL_LEASE;
+ up_arg->event = up_lease_arg;
+ up_arg->free_event = glfs_free_upcall_lease;
+
+ ret = 0;
+
+out:
+ if (ret) {
+ /* Close p_object and oldp_object as well if being referenced.*/
+ if (object)
+ glfs_h_close(object);
+
+ /* Set reason to prevent applications from using ->event */
+ up_arg->reason = GF_UPCALL_EVENT_NULL;
+ }
+ return ret;
+}
+
+static int
+upcall_syncop_args_free(struct upcall_syncop_args *args)
+{
+ dict_t *dict = NULL;
+ struct gf_upcall *upcall_data = NULL;
+
+ if (args) {
+ upcall_data = &args->upcall_data;
+ switch (upcall_data->event_type) {
+ case GF_UPCALL_CACHE_INVALIDATION:
+ dict = ((struct gf_upcall_cache_invalidation *)(upcall_data
+ ->data))
+ ->dict;
+ break;
+ case GF_UPCALL_RECALL_LEASE:
+ dict = ((struct gf_upcall_recall_lease *)(upcall_data->data))
+ ->dict;
+ break;
+ }
+ if (dict)
+ dict_unref(dict);
+
+ GF_FREE(upcall_data->client_uid);
+ GF_FREE(upcall_data->data);
+ }
+ GF_FREE(args);
+ return 0;
+}
+
+static int
+glfs_upcall_syncop_cbk(int ret, call_frame_t *frame, void *opaque)
+{
+ struct upcall_syncop_args *args = opaque;
+
+ (void)upcall_syncop_args_free(args);
+
+ return 0;
+}
+
+static int
+glfs_cbk_upcall_syncop(void *opaque)
+{
+ struct upcall_syncop_args *args = opaque;
+ struct gf_upcall *upcall_data = NULL;
+ struct glfs_upcall *up_arg = NULL;
+ struct glfs *fs;
+ int ret = -1;
+
+ fs = args->fs;
+ upcall_data = &args->upcall_data;
+
+ if (!upcall_data) {
+ goto out;
+ }
+
+ up_arg = GLFS_CALLOC(1, sizeof(struct gf_upcall), glfs_release_upcall,
+ glfs_mt_upcall_entry_t);
+ if (!up_arg) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
+ goto out;
+ }
+
+ switch (upcall_data->event_type) {
+ case GF_UPCALL_CACHE_INVALIDATION:
+ ret = glfs_h_poll_cache_invalidation(fs, up_arg, upcall_data);
+ break;
+ case GF_UPCALL_RECALL_LEASE:
+ ret = glfs_recall_lease_upcall(fs, up_arg, upcall_data);
+ break;
+ default:
+ errno = EINVAL;
+ }
+
+ /* It could so happen that the file which got
+ * upcall notification may have got deleted by
+ * the same client. In such cases up_arg->reason
+ * is set to GLFS_UPCALL_EVENT_NULL. No need to
+ * send upcall then
+ */
+ if (up_arg->reason == GLFS_UPCALL_EVENT_NULL) {
+ gf_smsg(THIS->name, GF_LOG_DEBUG, errno,
+ API_MSG_UPCALL_EVENT_NULL_RECEIVED, NULL);
+ ret = 0;
+ GLFS_FREE(up_arg);
+ goto out;
+ } else if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ENTRY, NULL);
+ GLFS_FREE(up_arg);
+ goto out;
+ }
+
+ if (fs->up_cbk && up_arg)
+ (fs->up_cbk)(up_arg, fs->up_data);
+
+ /* application takes care of calling glfs_free on up_arg post
+ * their processing */
+
+out:
+ return ret;
+}
+
+static struct gf_upcall_cache_invalidation *
+gf_copy_cache_invalidation(struct gf_upcall_cache_invalidation *src)
+{
+ struct gf_upcall_cache_invalidation *dst = NULL;
+
+ if (!src)
+ goto out;
+
+ dst = GF_CALLOC(1, sizeof(struct gf_upcall_cache_invalidation),
+ glfs_mt_upcall_entry_t);
+
+ if (!dst) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
+ goto out;
+ }
+
+ dst->flags = src->flags;
+ dst->expire_time_attr = src->expire_time_attr;
+ dst->stat = src->stat;
+ dst->p_stat = src->p_stat;
+ dst->oldp_stat = src->oldp_stat;
+
+ if (src->dict)
+ dst->dict = dict_copy_with_ref(src->dict, NULL);
+
+ return dst;
+out:
+ return NULL;
+}
+
+static struct gf_upcall_recall_lease *
+gf_copy_recall_lease(struct gf_upcall_recall_lease *src)
+{
+ struct gf_upcall_recall_lease *dst = NULL;
+
+ if (!src)
+ goto out;
+
+ dst = GF_CALLOC(1, sizeof(struct gf_upcall_recall_lease),
+ glfs_mt_upcall_entry_t);
+
+ if (!dst) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED, "entry",
+ NULL);
+ goto out;
+ }
+
+ dst->lease_type = src->lease_type;
+ memcpy(dst->tid, src->tid, 16);
+
+ if (src->dict)
+ dst->dict = dict_copy_with_ref(src->dict, NULL);
+
+ return dst;
+out:
+ return NULL;
+}
+
+static struct upcall_syncop_args *
+upcall_syncop_args_init(struct glfs *fs, struct gf_upcall *upcall_data)
+{
+ struct upcall_syncop_args *args = NULL;
+ int ret = -1;
+ struct gf_upcall *t_data = NULL;
+
+ if (!fs || !upcall_data)
+ goto out;
+
+ args = GF_CALLOC(1, sizeof(struct upcall_syncop_args),
+ glfs_mt_upcall_entry_t);
+ if (!args) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_ALLOC_FAILED,
+ "syncop args", NULL);
+ goto out;
+ }
+
+ /* Note: we are not taking any ref on fs here.
+ * Ideally applications have to unregister for upcall events
+ * or stop polling for upcall events before performing
+ * glfs_fini. And as for outstanding synctasks created, we wait
+ * for all syncenv threads to finish tasks before cleaning up the
+ * fs->ctx. Hence it seems safe to process these callback
+ * notification without taking any lock/ref.
+ */
+ args->fs = fs;
+ t_data = &(args->upcall_data);
+ t_data->client_uid = gf_strdup(upcall_data->client_uid);
+
+ gf_uuid_copy(t_data->gfid, upcall_data->gfid);
+ t_data->event_type = upcall_data->event_type;
+
+ switch (t_data->event_type) {
+ case GF_UPCALL_CACHE_INVALIDATION:
+ t_data->data = gf_copy_cache_invalidation(
+ (struct gf_upcall_cache_invalidation *)upcall_data->data);
+ break;
+ case GF_UPCALL_RECALL_LEASE:
+ t_data->data = gf_copy_recall_lease(
+ (struct gf_upcall_recall_lease *)upcall_data->data);
+ break;
+ }
+
+ if (!t_data->data)
+ goto out;
+
+ return args;
+out:
+ if (ret) {
+ if (args) {
+ GF_FREE(args->upcall_data.client_uid);
+ GF_FREE(args);
+ }
+ }
+
+ return NULL;
+}
+
+static void
+glfs_cbk_upcall_data(struct glfs *fs, struct gf_upcall *upcall_data)
+{
+ struct upcall_syncop_args *args = NULL;
+ int ret = -1;
+
+ if (!fs || !upcall_data)
+ goto out;
+
+ if (!(fs->upcall_events & upcall_data->event_type)) {
+ /* ignore events which application hasn't registered*/
+ goto out;
+ }
+
+ args = upcall_syncop_args_init(fs, upcall_data);
+
+ if (!args)
+ goto out;
+
+ ret = synctask_new(THIS->ctx->env, glfs_cbk_upcall_syncop,
+ glfs_upcall_syncop_cbk, NULL, args);
+ /* should we retry incase of failure? */
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_UPCALL_SYNCOP_FAILED,
+ "event_type=%d", upcall_data->event_type, "gfid=%s",
+ (char *)(upcall_data->gfid), NULL);
+ upcall_syncop_args_free(args);
+ }
+
+out:
+ return;
+}
+
+/*
+ * This routine is called in case of any notification received
+ * from the server. All the upcall events are queued up in a list
+ * to be read by the applications.
+ *
+ * In case if the application registers a cbk function, that shall
+ * be called by this routine in case of any event received.
+ * The cbk fn is responsible for notifying the
+ * applications the way it desires for each event queued (for eg.,
+ * can raise a signal or broadcast a cond variable etc.)
+ *
+ * Otherwise all the upcall events are queued up in a list
+ * to be read/polled by the applications.
+ */
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_process_upcall_event, 3.7.0)
+void
+priv_glfs_process_upcall_event(struct glfs *fs, void *data)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ struct gf_upcall *upcall_data = NULL;
+
+ DECLARE_OLD_THIS;
+
+ gf_msg_debug(THIS->name, 0, "Upcall gfapi callback is called");
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, err);
+
+ if (!data)
+ goto out;
+
+ /* Unlike in I/O path, "glfs_fini" would not have freed
+ * 'fs' by the time we take lock as it waits for all epoll
+ * threads to exit including this
+ */
+ pthread_mutex_lock(&fs->mutex);
+ {
+ ctx = fs->ctx;
+
+ /* if we're not interested in upcalls (anymore), skip them */
+ if (ctx->cleanup_started || !fs->cache_upcalls) {
+ pthread_mutex_unlock(&fs->mutex);
+ goto out;
+ }
+
+ fs->pin_refcnt++;
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+ upcall_data = (struct gf_upcall *)data;
+
+ gf_msg_trace(THIS->name, 0, "Upcall gfapi gfid = %s",
+ (char *)(upcall_data->gfid));
+
+ /* *
+ * TODO: RECALL LEASE for each glfd
+ *
+ * In case of RECALL_LEASE, we could associate separate
+ * cbk function for each glfd either by
+ * - extending pub_glfs_lease to accept new args (recall_cbk_fn, cookie)
+ * - or by defining new API "glfs_register_recall_cbk_fn (glfd,
+ * recall_cbk_fn, cookie) . In such cases, flag it and instead of calling
+ * below upcall functions, define a new one to go through the glfd list and
+ * invoke each of theirs recall_cbk_fn.
+ * */
+
+ if (fs->up_cbk) { /* upcall cbk registered */
+ (void)glfs_cbk_upcall_data(fs, upcall_data);
+ } else {
+ (void)glfs_enqueue_upcall_data(fs, upcall_data);
+ }
+
+ pthread_mutex_lock(&fs->mutex);
+ {
+ fs->pin_refcnt--;
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+out:
+ __GLFS_EXIT_FS;
+err:
+ return;
+}
+
+ssize_t
+glfs_anonymous_pwritev(struct glfs *fs, struct glfs_object *object,
+ const struct iovec *iovec, int iovcnt, off_t offset,
+ int flags)
+{
+ xlator_t *subvol = NULL;
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iovec iov = {
+ 0,
+ };
+ inode_t *inode = NULL;
+ fd_t *fd = NULL;
+ int ret = -1;
+ size_t size = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ ret = -1;
+ errno = ESTALE;
+ goto out;
+ }
+
+ fd = fd_anonymous(inode);
+ if (!fd) {
+ ret = -1;
+ gf_smsg("gfapi", GF_LOG_ERROR, ENOMEM, API_MSG_FDCREATE_FAILED, NULL);
+ errno = ENOMEM;
+ goto out;
+ }
+
+ size = iov_length(iovec, iovcnt);
+
+ iobuf = iobuf_get2(subvol->ctx->iobuf_pool, size);
+ if (!iobuf) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ iobref = iobref_new();
+ if (!iobref) {
+ iobuf_unref(iobuf);
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = iobref_add(iobref, iobuf);
+ if (ret) {
+ iobuf_unref(iobuf);
+ iobref_unref(iobref);
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ iov_unload(iobuf_ptr(iobuf), iovec, iovcnt);
+
+ iov.iov_base = iobuf_ptr(iobuf);
+ iov.iov_len = size;
+
+ /* TODO : set leaseid */
+ ret = syncop_writev(subvol, fd, &iov, 1, offset, iobref, flags, NULL, NULL,
+ NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ iobuf_unref(iobuf);
+ iobref_unref(iobref);
+
+ if (ret <= 0)
+ goto out;
+
+out:
+
+ if (fd)
+ fd_unref(fd);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+ssize_t
+glfs_anonymous_preadv(struct glfs *fs, struct glfs_object *object,
+ const struct iovec *iovec, int iovcnt, off_t offset,
+ int flags)
+{
+ xlator_t *subvol = NULL;
+ struct iovec *iov = NULL;
+ struct iobref *iobref = NULL;
+ inode_t *inode = NULL;
+ fd_t *fd = NULL;
+ int cnt = 0;
+ ssize_t ret = -1;
+ ssize_t size = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ ret = -1;
+ errno = ESTALE;
+ goto out;
+ }
+
+ fd = fd_anonymous(inode);
+ if (!fd) {
+ ret = -1;
+ gf_smsg("gfapi", GF_LOG_ERROR, ENOMEM, API_MSG_FDCREATE_FAILED, NULL);
+ errno = ENOMEM;
+ goto out;
+ }
+
+ size = iov_length(iovec, iovcnt);
+
+ /* TODO : set leaseid */
+ ret = syncop_readv(subvol, fd, size, offset, flags, &iov, &cnt, &iobref,
+ NULL, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret <= 0)
+ goto out;
+
+ size = iov_copy(iovec, iovcnt, iov, cnt);
+
+ ret = size;
+out:
+ if (iov)
+ GF_FREE(iov);
+ if (iobref)
+ iobref_unref(iobref);
+ if (fd)
+ fd_unref(fd);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+static void
+glfs_release_xreaddirp_stat(void *ptr)
+{
+ struct glfs_xreaddirp_stat *to_free = ptr;
+
+ if (to_free->object)
+ glfs_h_close(to_free->object);
+}
+
+/*
+ * Given glfd of a directory, this function does readdirp and returns
+ * xstat along with dirents.
+ */
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_r, 3.11.0)
+int
+pub_glfs_xreaddirplus_r(struct glfs_fd *glfd, uint32_t flags,
+ struct glfs_xreaddirp_stat **xstat_p,
+ struct dirent *ext, struct dirent **res)
+{
+ int ret = -1;
+ gf_dirent_t *entry = NULL;
+ struct dirent *buf = NULL;
+ struct glfs_xreaddirp_stat *xstat = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ GF_VALIDATE_OR_GOTO(THIS->name, xstat_p, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, res, out);
+
+ errno = 0;
+
+ if (ext)
+ buf = ext;
+ else
+ buf = glfs_readdirbuf_get(glfd);
+
+ if (!buf)
+ goto out;
+
+ xstat = GLFS_CALLOC(1, sizeof(struct glfs_xreaddirp_stat),
+ glfs_release_xreaddirp_stat, glfs_mt_xreaddirp_stat_t);
+
+ if (!xstat)
+ goto out;
+
+ /* this is readdirplus operation */
+ entry = glfd_entry_next(glfd, 1);
+
+ /* XXX: Ideally when we reach EOD, errno should have been
+ * set to ENOENT. But that doesn't seem to be the case.
+ *
+ * The only way to confirm if its EOD at this point is that
+ * errno == 0 and entry == NULL
+ */
+ if (errno)
+ goto out;
+
+ if (!entry) {
+ /* reached EOD, ret = 0 */
+ ret = 0;
+ *res = NULL;
+ *xstat_p = NULL;
+
+ /* free xstat as applications shall not be using it */
+ GLFS_FREE(xstat);
+
+ goto out;
+ }
+
+ *res = buf;
+ gf_dirent_to_dirent(entry, buf);
+
+ if (flags & GFAPI_XREADDIRP_STAT) {
+ glfs_iatt_to_stat(glfd->fs, &entry->d_stat, &xstat->st);
+ xstat->flags_handled |= GFAPI_XREADDIRP_STAT;
+ }
+
+ if ((flags & GFAPI_XREADDIRP_HANDLE) &&
+ /* skip . and .. */
+ strcmp(buf->d_name, ".") && strcmp(buf->d_name, "..")) {
+ /* Now create object.
+ * We can use "glfs_h_find_handle" as well as inodes would have
+ * already got linked as part of 'gf_link_inodes_from_dirent' */
+ xstat->object = glfs_h_create_from_handle(
+ glfd->fs, entry->d_stat.ia_gfid, GFAPI_HANDLE_LENGTH, NULL);
+
+ if (xstat->object) { /* success */
+ /* note: xstat->object->inode->ref is taken
+ * This shall be unref'ed when application does
+ * glfs_free(xstat) */
+ xstat->flags_handled |= GFAPI_XREADDIRP_HANDLE;
+ }
+ }
+
+ ret = xstat->flags_handled;
+ *xstat_p = xstat;
+
+ gf_msg_debug(THIS->name, 0,
+ "xreaddirp- requested_flags (%x) , processed_flags (%x)",
+ flags, xstat->flags_handled);
+
+out:
+ GF_REF_PUT(glfd);
+
+ if (ret < 0) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, errno, API_MSG_XREADDIRP_R_FAILED,
+ "reason=%s", strerror(errno), NULL);
+
+ if (xstat)
+ GLFS_FREE(xstat);
+ }
+
+ __GLFS_EXIT_FS;
+
+ return ret;
+
+invalid_fs:
+ return -1;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_stat, 3.11.0)
+struct stat *
+pub_glfs_xreaddirplus_get_stat(struct glfs_xreaddirp_stat *xstat)
+{
+ GF_VALIDATE_OR_GOTO("glfs_xreaddirplus_get_stat", xstat, out);
+
+ if (!xstat->flags_handled & GFAPI_XREADDIRP_STAT)
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_FLAGS_HANDLE,
+ "GFAPI_XREADDIRP_STAT"
+ "xstat=%p",
+ xstat, "handles=%x", xstat->flags_handled, NULL);
+ return &xstat->st;
+
+out:
+ return NULL;
+}
+
+void
+gf_lease_to_glfs_lease(struct gf_lease *gf_lease, struct glfs_lease *lease)
+{
+ u_int lease_type = gf_lease->lease_type;
+ lease->cmd = gf_lease->cmd;
+ lease->lease_type = lease_type;
+ memcpy(lease->lease_id, gf_lease->lease_id, LEASE_ID_SIZE);
+}
+
+void
+glfs_lease_to_gf_lease(struct glfs_lease *lease, struct gf_lease *gf_lease)
+{
+ u_int lease_type = lease->lease_type;
+ gf_lease->cmd = lease->cmd;
+ gf_lease->lease_type = lease_type;
+ memcpy(gf_lease->lease_id, lease->lease_id, LEASE_ID_SIZE);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_lease, 4.0.0)
+int
+pub_glfs_lease(struct glfs_fd *glfd, struct glfs_lease *lease,
+ glfs_recall_cbk fn, void *data)
+{
+ int ret = -1;
+ loc_t loc = {
+ 0,
+ };
+ xlator_t *subvol = NULL;
+ fd_t *fd = NULL;
+ struct gf_lease gf_lease = {
+ 0,
+ };
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FD(glfd, invalid_fs);
+
+ GF_REF_GET(glfd);
+
+ if (!is_valid_lease_id(lease->lease_id)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ subvol = glfs_active_subvol(glfd->fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ fd = glfs_resolve_fd(glfd->fs, subvol, glfd);
+ if (!fd) {
+ ret = -1;
+ errno = EBADFD;
+ goto out;
+ }
+
+ switch (lease->lease_type) {
+ case GLFS_RD_LEASE:
+ if ((fd->flags != O_RDONLY) && !(fd->flags & O_RDWR)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+ break;
+ case GLFS_RW_LEASE:
+ if (!((fd->flags & O_WRONLY) || (fd->flags & O_RDWR))) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+ break;
+ default:
+ if (lease->cmd != GLFS_GET_LEASE) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+ break;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(fd->inode, loc, out);
+
+ glfs_lease_to_gf_lease(lease, &gf_lease);
+
+ ret = syncop_lease(subvol, &loc, &gf_lease, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ gf_lease_to_glfs_lease(&gf_lease, lease);
+
+ /* TODO: Add leases for client replay
+ if (ret == 0 && (cmd == F_SETLK || cmd == F_SETLKW))
+ fd_lk_insert_and_merge (fd, cmd, &saved_flock);
+ */
+ if (ret == 0) {
+ ret = fd_ctx_set(glfd->fd, subvol, (uint64_t)(long)glfd);
+ if (ret) {
+ gf_smsg(subvol->name, GF_LOG_ERROR, ENOMEM,
+ API_MSG_FDCTX_SET_FAILED, "fd=%p", glfd->fd, NULL);
+ goto out;
+ }
+ glfd->cbk = fn;
+ glfd->cookie = data;
+ }
+
+out:
+
+ if (glfd)
+ GF_REF_PUT(glfd);
+
+ if (subvol)
+ glfs_subvol_done(glfd->fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
diff --git a/api/src/glfs-handleops.c b/api/src/glfs-handleops.c
new file mode 100644
index 00000000000..53c2ee896f9
--- /dev/null
+++ b/api/src/glfs-handleops.c
@@ -0,0 +1,2655 @@
+/*
+ * Copyright (c) 2013-2018 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+#include <glusterfs/syncop.h>
+#include "glfs.h"
+#include "glfs-handles.h"
+#include "gfapi-messages.h"
+
+int
+glfs_listxattr_process(void *value, size_t size, dict_t *xattr);
+
+void
+glfs_iatt_from_stat(struct stat *stat, int valid, struct iatt *iatt,
+ int *glvalid)
+{
+ /* validate in args */
+ if ((stat == NULL) || (iatt == NULL) || (glvalid == NULL)) {
+ errno = EINVAL;
+ return;
+ }
+
+ *glvalid = 0;
+
+ if (valid & GFAPI_SET_ATTR_MODE) {
+ iatt->ia_prot = ia_prot_from_st_mode(stat->st_mode);
+ *glvalid |= GF_SET_ATTR_MODE;
+ }
+
+ if (valid & GFAPI_SET_ATTR_UID) {
+ iatt->ia_uid = stat->st_uid;
+ *glvalid |= GF_SET_ATTR_UID;
+ }
+
+ if (valid & GFAPI_SET_ATTR_GID) {
+ iatt->ia_gid = stat->st_gid;
+ *glvalid |= GF_SET_ATTR_GID;
+ }
+
+ if (valid & GFAPI_SET_ATTR_ATIME) {
+ iatt->ia_atime = stat->st_atime;
+ iatt->ia_atime_nsec = ST_ATIM_NSEC(stat);
+ *glvalid |= GF_SET_ATTR_ATIME;
+ }
+
+ if (valid & GFAPI_SET_ATTR_MTIME) {
+ iatt->ia_mtime = stat->st_mtime;
+ iatt->ia_mtime_nsec = ST_MTIM_NSEC(stat);
+ *glvalid |= GF_SET_ATTR_MTIME;
+ }
+
+ return;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_lookupat, 3.7.4)
+struct glfs_object *
+pub_glfs_h_lookupat(struct glfs *fs, struct glfs_object *parent,
+ const char *path, struct stat *stat, int follow)
+{
+ int ret = 0;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ struct glfs_object *object = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if (path == NULL) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ if (parent) {
+ inode = glfs_resolve_inode(fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+ }
+
+ /* fop/op */
+ ret = glfs_resolve_at(fs, subvol, inode, path, &loc, &iatt, follow, 0);
+
+ /* populate out args */
+ if (!ret) {
+ if (stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+
+ ret = glfs_create_object(&loc, &object);
+ }
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return object;
+}
+
+GFAPI_SYMVER_PUBLIC(glfs_h_lookupat34, glfs_h_lookupat, 3.4.2)
+struct glfs_object *
+pub_glfs_h_lookupat34(struct glfs *fs, struct glfs_object *parent,
+ const char *path, struct stat *stat)
+{
+ return pub_glfs_h_lookupat(fs, parent, path, stat, 0);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_statfs, 3.7.0)
+int
+pub_glfs_h_statfs(struct glfs *fs, struct glfs_object *object,
+ struct statvfs *statvfs)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL || statvfs == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_statfs(subvol, &loc, statvfs, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ loc_wipe(&loc);
+
+out:
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_stat, 3.4.2)
+int
+pub_glfs_h_stat(struct glfs *fs, struct glfs_object *object, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_stat(subvol, &loc, &iatt, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* populate out args */
+ if (!ret && stat) {
+ glfs_iatt_to_stat(fs, &iatt, stat);
+ }
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_getattrs, 3.4.2)
+int
+pub_glfs_h_getattrs(struct glfs *fs, struct glfs_object *object,
+ struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ ret = 0;
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* fop/op */
+ ret = glfs_resolve_base(fs, subvol, inode, &iatt);
+
+ /* populate out args */
+ if (!ret && stat) {
+ glfs_iatt_to_stat(fs, &iatt, stat);
+ }
+
+out:
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+int
+glfs_h_getxattrs_common(struct glfs *fs, struct glfs_object *object,
+ dict_t **xattr, const char *name,
+ gf_boolean_t is_listxattr)
+{
+ int ret = 0;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (!is_listxattr) {
+ if (!name || *name == '\0') {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (strlen(name) > GF_XATTR_NAME_MAX) {
+ errno = ENAMETOOLONG;
+ return -1;
+ }
+ }
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ ret = syncop_getxattr(subvol, &loc, xattr, name, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_getxattrs, 3.5.1)
+int
+pub_glfs_h_getxattrs(struct glfs *fs, struct glfs_object *object,
+ const char *name, void *value, size_t size)
+{
+ int ret = -1;
+ dict_t *xattr = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ ret = glfs_h_getxattrs_common(fs, object, &xattr, name, (name == NULL));
+ if (ret)
+ goto out;
+
+ /* If @name is NULL, means get all the xattrs (i.e listxattr). */
+ if (name)
+ ret = glfs_getxattr_process(value, size, xattr, name);
+ else
+ ret = glfs_listxattr_process(value, size, xattr);
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_setattrs, 3.4.2)
+int
+pub_glfs_h_setattrs(struct glfs *fs, struct glfs_object *object,
+ struct stat *stat, int valid)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ int glvalid = 0;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL) || (stat == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* map valid masks from in args */
+ glfs_iatt_from_stat(stat, valid, &iatt, &glvalid);
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_setattr(subvol, &loc, &iatt, glvalid, 0, 0, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_setxattrs, 3.5.0)
+int
+pub_glfs_h_setxattrs(struct glfs *fs, struct glfs_object *object,
+ const char *name, const void *value, size_t size,
+ int flags)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ void *value_cp = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL) || (name == NULL) || (value == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (!name || *name == '\0') {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (strlen(name) > GF_XATTR_NAME_MAX) {
+ errno = ENAMETOOLONG;
+ return -1;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ value_cp = gf_memdup(value, size);
+ GF_CHECK_ALLOC_AND_LOG(subvol->name, value_cp, ret,
+ "Failed to"
+ " duplicate setxattr value",
+ out);
+
+ xattr = dict_for_key_value(name, value_cp, size, _gf_false);
+ if (!xattr) {
+ GF_FREE(value_cp);
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_setxattr(subvol, &loc, xattr, flags, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_removexattrs, 3.5.1)
+int
+pub_glfs_h_removexattrs(struct glfs *fs, struct glfs_object *object,
+ const char *name)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL) || (name == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_removexattr(subvol, &loc, name, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_open, 3.4.2)
+struct glfs_fd *
+pub_glfs_h_open(struct glfs *fs, struct glfs_object *object, int flags)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ dict_t *fop_attr = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* check types to open */
+ if (IA_ISDIR(inode->ia_type)) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ if (!IA_ISREG(inode->ia_type)) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ glfd = glfs_fd_new(fs);
+ if (!glfd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ glfd->fd = fd_create(inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ glfd->fd->flags = flags;
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ ret = syncop_open(subvol, &loc, flags, glfd->fd, fop_attr, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ glfd->fd->flags = flags;
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ if (ret && glfd) {
+ GF_REF_PUT(glfd);
+ glfd = NULL;
+ } else if (glfd) {
+ glfd_set_state_bind(glfd);
+ }
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return glfd;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat, 3.4.2)
+struct glfs_object *
+pub_glfs_h_creat(struct glfs *fs, struct glfs_object *parent, const char *path,
+ int flags, mode_t mode, struct stat *stat)
+{
+ int ret = -1;
+ fd_t *fd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, parent);
+ if (!inode) {
+ ret = -1;
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE(inode, loc, ret, errno, out, path);
+
+ fd = fd_create(loc.inode, getpid());
+ if (!fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ fd->flags = flags;
+
+ /* fop/op */
+ ret = syncop_create(subvol, &loc, flags, mode, fd, &iatt, xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* populate out args */
+ if (ret == 0) {
+ ret = glfs_loc_link(&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+
+ ret = glfs_create_object(&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ /* Release the held reference */
+ glfs_h_close(object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ if (fd)
+ fd_unref(fd);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_creat_open, 6.6)
+struct glfs_object *
+pub_glfs_h_creat_open(struct glfs *fs, struct glfs_object *parent,
+ const char *path, int flags, mode_t mode,
+ struct stat *stat, struct glfs_fd **out_fd)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+ dict_t *fop_attr = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL) ||
+ (out_fd == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, parent);
+ if (!inode) {
+ ret = -1;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE(inode, loc, ret, errno, out, path);
+
+ glfd = glfs_fd_new(fs);
+ if (!glfd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ glfd->fd = fd_create(loc.inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+ glfd->fd->flags = flags;
+
+ ret = get_fop_attr_thrd_key(&fop_attr);
+ if (ret)
+ gf_msg_debug("gfapi", 0, "Getting leaseid from thread failed");
+
+ /* fop/op */
+ ret = syncop_create(subvol, &loc, flags, mode, glfd->fd, &iatt, xattr_req,
+ NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* populate out args */
+ if (ret == 0) {
+ glfd->fd->flags = flags;
+
+ ret = glfs_loc_link(&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+
+ ret = glfs_create_object(&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ /* Release the held reference */
+ glfs_h_close(object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (fop_attr)
+ dict_unref(fop_attr);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ if (ret && glfd) {
+ GF_REF_PUT(glfd);
+ } else if (glfd) {
+ glfd_set_state_bind(glfd);
+ *out_fd = glfd;
+ }
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_mkdir, 3.4.2)
+struct glfs_object *
+pub_glfs_h_mkdir(struct glfs *fs, struct glfs_object *parent, const char *path,
+ mode_t mode, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE(inode, loc, ret, errno, out, path);
+
+ /* fop/op */
+ ret = syncop_mkdir(subvol, &loc, mode, &iatt, xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* populate out args */
+ if (ret == 0) {
+ ret = glfs_loc_link(&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+
+ ret = glfs_create_object(&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ glfs_h_close(object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_mknod, 3.4.2)
+struct glfs_object *
+pub_glfs_h_mknod(struct glfs *fs, struct glfs_object *parent, const char *path,
+ mode_t mode, dev_t dev, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE(inode, loc, ret, errno, out, path);
+
+ /* fop/op */
+ ret = syncop_mknod(subvol, &loc, mode, dev, &iatt, xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* populate out args */
+ if (ret == 0) {
+ ret = glfs_loc_link(&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+
+ ret = glfs_create_object(&loc, &object);
+ }
+out:
+ if (ret && object != NULL) {
+ glfs_h_close(object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_unlink, 3.4.2)
+int
+pub_glfs_h_unlink(struct glfs *fs, struct glfs_object *parent, const char *path)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ /* validate in args */
+ if ((fs == NULL) || (parent == NULL) || (path == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ ret = glfs_resolve_at(fs, subvol, inode, path, &loc, NULL, 0, 0);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (!IA_ISDIR(loc.inode->ia_type)) {
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret != 0) {
+ goto out;
+ }
+ } else {
+ ret = syncop_rmdir(subvol, &loc, 0, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret != 0) {
+ goto out;
+ }
+ }
+
+ if (ret == 0)
+ ret = glfs_loc_unlink(&loc);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_opendir, 3.4.2)
+struct glfs_fd *
+pub_glfs_h_opendir(struct glfs *fs, struct glfs_object *object)
+{
+ int ret = -1;
+ struct glfs_fd *glfd = NULL;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ if (!IA_ISDIR(inode->ia_type)) {
+ ret = -1;
+ errno = ENOTDIR;
+ goto out;
+ }
+
+ glfd = glfs_fd_new(fs);
+ if (!glfd)
+ goto out;
+
+ INIT_LIST_HEAD(&glfd->entries);
+
+ glfd->fd = fd_create(inode, getpid());
+ if (!glfd->fd) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_opendir(subvol, &loc, glfd->fd, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (ret && glfd) {
+ GF_REF_PUT(glfd);
+ glfd = NULL;
+ } else if (glfd) {
+ glfd_set_state_bind(glfd);
+ }
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return glfd;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_access, 3.6.0)
+int
+pub_glfs_h_access(struct glfs *fs, struct glfs_object *object, int mask)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return ret;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+
+ ret = syncop_access(subvol, &loc, mask, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_extract_handle, 3.4.2)
+ssize_t
+pub_glfs_h_extract_handle(struct glfs_object *object, unsigned char *handle,
+ int len)
+{
+ ssize_t ret = -1;
+
+ /* validate in args */
+ if (object == NULL) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (!handle || !len) {
+ ret = GFAPI_HANDLE_LENGTH;
+ goto out;
+ }
+
+ if (len < GFAPI_HANDLE_LENGTH) {
+ errno = ERANGE;
+ goto out;
+ }
+
+ memcpy(handle, object->gfid, GFAPI_HANDLE_LENGTH);
+
+ ret = GFAPI_HANDLE_LENGTH;
+
+out:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_create_from_handle, 3.4.2)
+struct glfs_object *
+pub_glfs_h_create_from_handle(struct glfs *fs, unsigned char *handle, int len,
+ struct stat *stat)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ struct iatt iatt = {
+ 0,
+ };
+ inode_t *newinode = NULL;
+ xlator_t *subvol = NULL;
+ struct glfs_object *object = NULL;
+ uint64_t ctx_value = LOOKUP_NOT_NEEDED;
+ gf_boolean_t lookup_needed = _gf_false;
+
+ /* validate in args */
+ if ((fs == NULL) || (handle == NULL) || (len != GFAPI_HANDLE_LENGTH)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ memcpy(loc.gfid, handle, GFAPI_HANDLE_LENGTH);
+
+ /* make sure the gfid received is valid */
+ GF_VALIDATE_OR_GOTO("glfs_h_create_from_handle",
+ !(gf_uuid_is_null(loc.gfid)), out);
+
+ newinode = inode_find(subvol->itable, loc.gfid);
+ if (newinode) {
+ if (!stat) /* No need of lookup */
+ goto found;
+
+ lookup_needed = inode_needs_lookup(newinode, THIS);
+ if (lookup_needed) {
+ loc.inode = newinode;
+ } else {
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(newinode, loc, fill_out);
+
+ /* fop/op */
+ ret = syncop_stat(subvol, &loc, &iatt, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret) {
+ fill_out:
+ /* Drop the reference hold in inode_find */
+ inode_unref(newinode);
+ goto out;
+ }
+
+ glfs_iatt_to_stat(fs, &iatt, stat);
+ goto found;
+ }
+ } else {
+ loc.inode = inode_new(subvol->itable);
+ if (!loc.inode) {
+ errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ ret = syncop_lookup(subvol, &loc, &iatt, 0, 0, 0);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret) {
+ gf_smsg(subvol->name, GF_LOG_WARNING, errno,
+ API_MSG_INODE_REFRESH_FAILED, "gfid=%s", uuid_utoa(loc.gfid),
+ "error=%s", strerror(errno), NULL);
+ goto out;
+ }
+
+ newinode = inode_link(loc.inode, 0, 0, &iatt);
+ if (newinode) {
+ if (newinode == loc.inode) {
+ inode_ctx_set(newinode, THIS, &ctx_value);
+ }
+ inode_lookup(newinode);
+ } else {
+ gf_smsg(subvol->name, GF_LOG_WARNING, errno, API_MSG_INODE_LINK_FAILED,
+ "gfid=%s", uuid_utoa(loc.gfid), NULL);
+ goto out;
+ }
+
+ /* populate stat */
+ if (stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+
+found:
+ object = GF_CALLOC(1, sizeof(struct glfs_object), glfs_mt_glfs_object_t);
+ if (object == NULL) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ /* populate the return object */
+ object->inode = newinode;
+ gf_uuid_copy(object->gfid, object->inode->gfid);
+
+out:
+ /* TODO: Check where the inode ref is being held? */
+ loc_wipe(&loc);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_close, 3.4.2)
+int
+pub_glfs_h_close(struct glfs_object *object)
+{
+ /* since glfs_h_* objects hold a reference to inode
+ * it is safe to keep lookup count to '0' */
+ inode_forget(object->inode, 0);
+ inode_unref(object->inode);
+ GF_FREE(object);
+
+ return 0;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_truncate, 3.4.2)
+int
+pub_glfs_h_truncate(struct glfs *fs, struct glfs_object *object, off_t offset)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if (object == NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_truncate(subvol, &loc, (off_t)offset, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* populate out args */
+ if (ret == 0)
+ ret = glfs_loc_unlink(&loc);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_symlink, 3.4.2)
+struct glfs_object *
+pub_glfs_h_symlink(struct glfs *fs, struct glfs_object *parent,
+ const char *name, const char *data, struct stat *stat)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ struct glfs_object *object = NULL;
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if ((parent == NULL) || (name == NULL) || (data == NULL)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, parent);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_PINODE(inode, loc, ret, errno, out, name);
+
+ /* fop/op */
+ ret = syncop_symlink(subvol, &loc, data, &iatt, xattr_req, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* populate out args */
+ if (ret == 0) {
+ ret = glfs_loc_link(&loc, &iatt);
+ if (ret != 0) {
+ goto out;
+ }
+
+ if (stat)
+ glfs_iatt_to_stat(fs, &iatt, stat);
+
+ ret = glfs_create_object(&loc, &object);
+ }
+
+out:
+ if (ret && object != NULL) {
+ pub_glfs_h_close(object);
+ object = NULL;
+ }
+
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_readlink, 3.4.2)
+int
+pub_glfs_h_readlink(struct glfs *fs, struct glfs_object *object, char *buf,
+ size_t bufsiz)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ char *linkval = NULL;
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if ((object == NULL) || (buf == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ /* fop/op */
+ ret = syncop_readlink(subvol, &loc, &linkval, bufsiz, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ /* populate out args */
+ if (ret > 0)
+ memcpy(buf, linkval, ret);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (linkval)
+ GF_FREE(linkval);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_link, 3.4.2)
+int
+pub_glfs_h_link(struct glfs *fs, struct glfs_object *linksrc,
+ struct glfs_object *parent, const char *name)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ inode_t *pinode = NULL;
+ loc_t oldloc = {
+ 0,
+ };
+ loc_t newloc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if ((linksrc == NULL) || (parent == NULL) || (name == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, linksrc);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ if (inode->ia_type == IA_IFDIR) {
+ ret = -1;
+ errno = EISDIR;
+ goto out;
+ }
+
+ GLFS_LOC_FILL_INODE(inode, oldloc, out);
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ pinode = glfs_resolve_inode(fs, subvol, parent);
+ if (!pinode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* setup newloc based on parent */
+ newloc.parent = inode_ref(pinode);
+ newloc.name = name;
+ ret = glfs_loc_touchup(&newloc);
+ if (ret != 0) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ /* Filling the inode of the hard link to be same as that of the
+ * original file
+ */
+ newloc.inode = inode_ref(inode);
+
+ /* fop/op */
+ ret = syncop_link(subvol, &oldloc, &newloc, &iatt, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret == 0)
+ ret = glfs_loc_link(&newloc, &iatt);
+out:
+ loc_wipe(&oldloc);
+ loc_wipe(&newloc);
+
+ if (inode)
+ inode_unref(inode);
+
+ if (pinode)
+ inode_unref(pinode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_rename, 3.4.2)
+int
+pub_glfs_h_rename(struct glfs *fs, struct glfs_object *olddir,
+ const char *oldname, struct glfs_object *newdir,
+ const char *newname)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *oldpinode = NULL;
+ inode_t *newpinode = NULL;
+ loc_t oldloc = {
+ 0,
+ };
+ loc_t newloc = {
+ 0,
+ };
+ struct iatt oldiatt = {
+ 0,
+ };
+ struct iatt newiatt = {
+ 0,
+ };
+
+ DECLARE_OLD_THIS;
+
+ /* validate in args */
+ if ((olddir == NULL) || (oldname == NULL) || (newdir == NULL) ||
+ (newname == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ oldpinode = glfs_resolve_inode(fs, subvol, olddir);
+ if (!oldpinode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ ret = glfs_resolve_at(fs, subvol, oldpinode, oldname, &oldloc, &oldiatt, 0,
+ 0);
+ if (ret != 0) {
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ newpinode = glfs_resolve_inode(fs, subvol, newdir);
+ if (!newpinode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ ret = glfs_resolve_at(fs, subvol, newpinode, newname, &newloc, &newiatt, 0,
+ 0);
+
+ if (ret && errno != ENOENT && newloc.parent)
+ goto out;
+
+ if (newiatt.ia_type != IA_INVAL) {
+ if ((oldiatt.ia_type == IA_IFDIR) != (newiatt.ia_type == IA_IFDIR)) {
+ /* Either both old and new must be dirs,
+ * or both must be non-dirs. Else, fail.
+ */
+ ret = -1;
+ errno = EEXIST;
+ goto out;
+ }
+ }
+
+ /* TODO: check if new or old is a prefix of the other, and fail EINVAL */
+
+ ret = syncop_rename(subvol, &oldloc, &newloc, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret == 0) {
+ inode_rename(oldloc.parent->table, oldloc.parent, oldloc.name,
+ newloc.parent, newloc.name, oldloc.inode, &oldiatt);
+
+ if (newloc.inode && !inode_has_dentry(newloc.inode))
+ inode_forget(newloc.inode, 0);
+ }
+
+out:
+ loc_wipe(&oldloc);
+ loc_wipe(&newloc);
+
+ if (oldpinode)
+ inode_unref(oldpinode);
+
+ if (newpinode)
+ inode_unref(newpinode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+/*
+ * Given a handle/gfid, find if the corresponding inode is present in
+ * the inode table. If yes create and return the corresponding glfs_object.
+ */
+struct glfs_object *
+glfs_h_find_handle(struct glfs *fs, unsigned char *handle, int len)
+{
+ inode_t *newinode = NULL;
+ xlator_t *subvol = NULL;
+ struct glfs_object *object = NULL;
+ uuid_t gfid;
+
+ /* validate in args */
+ if ((fs == NULL) || (handle == NULL) || (len != GFAPI_HANDLE_LENGTH)) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ errno = EIO;
+ goto out;
+ }
+
+ memcpy(gfid, handle, GFAPI_HANDLE_LENGTH);
+
+ /* make sure the gfid received is valid */
+ GF_VALIDATE_OR_GOTO("glfs_h_find_handle", !(gf_uuid_is_null(gfid)), out);
+
+ newinode = inode_find(subvol->itable, gfid);
+ if (!newinode) {
+ goto out;
+ }
+
+ object = GF_CALLOC(1, sizeof(struct glfs_object), glfs_mt_glfs_object_t);
+ if (object == NULL) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ /* populate the return object. The ref taken here
+ * is un'refed when the application does glfs_h_close() */
+ object->inode = inode_ref(newinode);
+ gf_uuid_copy(object->gfid, object->inode->gfid);
+
+out:
+ /* inode_find takes a reference. Unref it. */
+ if (newinode)
+ inode_unref(newinode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return object;
+}
+
+static void
+glfs_free_upcall_inode(void *to_free)
+{
+ struct glfs_upcall_inode *arg = to_free;
+
+ if (!arg)
+ return;
+
+ if (arg->object)
+ glfs_h_close(arg->object);
+ if (arg->p_object)
+ glfs_h_close(arg->p_object);
+ if (arg->oldp_object)
+ glfs_h_close(arg->oldp_object);
+
+ GF_FREE(arg);
+}
+
+int
+glfs_h_poll_cache_invalidation(struct glfs *fs, struct glfs_upcall *up_arg,
+ struct gf_upcall *upcall_data)
+{
+ int ret = -1;
+ struct glfs_object *p_object = NULL;
+ struct glfs_object *oldp_object = NULL;
+ struct glfs_object *object = NULL;
+ struct gf_upcall_cache_invalidation *ca_data = NULL;
+ struct glfs_upcall_inode *up_inode_arg = NULL;
+
+ ca_data = upcall_data->data;
+ GF_VALIDATE_OR_GOTO("glfs_h_poll_cache_invalidation", ca_data, out);
+
+ object = glfs_h_find_handle(fs, upcall_data->gfid, GFAPI_HANDLE_LENGTH);
+ if (!object) {
+ /* The reason handle creation will fail is because we
+ * couldn't find the inode in the gfapi inode table.
+ *
+ * But since application would have taken inode_ref, the
+ * only case when this can happen is when it has closed
+ * the handle and hence will no more be interested in
+ * the upcall for this particular gfid.
+ */
+ gf_smsg(THIS->name, GF_LOG_DEBUG, errno, API_MSG_CREATE_HANDLE_FAILED,
+ "gfid=%s", uuid_utoa(upcall_data->gfid), NULL);
+ errno = ESTALE;
+ goto out;
+ }
+
+ up_inode_arg = GF_CALLOC(1, sizeof(struct glfs_upcall_inode),
+ glfs_mt_upcall_inode_t);
+ GF_VALIDATE_OR_GOTO("glfs_h_poll_cache_invalidation", up_inode_arg, out);
+
+ up_inode_arg->object = object;
+ up_inode_arg->flags = ca_data->flags;
+ up_inode_arg->expire_time_attr = ca_data->expire_time_attr;
+
+ /* XXX: Update stat as well in case of UP_*_TIMES.
+ * This will be addressed as part of INODE_UPDATE */
+ if (ca_data->flags & GFAPI_INODE_UPDATE_FLAGS) {
+ glfs_iatt_to_stat(fs, &ca_data->stat, &up_inode_arg->buf);
+ }
+
+ if (ca_data->flags & GFAPI_UP_PARENT_TIMES) {
+ p_object = glfs_h_find_handle(fs, ca_data->p_stat.ia_gfid,
+ GFAPI_HANDLE_LENGTH);
+ if (!p_object) {
+ gf_smsg(THIS->name, GF_LOG_DEBUG, errno,
+ API_MSG_CREATE_HANDLE_FAILED, "gfid=%s",
+ uuid_utoa(ca_data->p_stat.ia_gfid), NULL);
+ errno = ESTALE;
+ goto out;
+ }
+
+ glfs_iatt_to_stat(fs, &ca_data->p_stat, &up_inode_arg->p_buf);
+ }
+ up_inode_arg->p_object = p_object;
+
+ /* In case of RENAME, update old parent as well */
+ if (ca_data->flags & GFAPI_UP_RENAME) {
+ oldp_object = glfs_h_find_handle(fs, ca_data->oldp_stat.ia_gfid,
+ GFAPI_HANDLE_LENGTH);
+ if (!oldp_object) {
+ gf_smsg(THIS->name, GF_LOG_DEBUG, errno,
+ API_MSG_CREATE_HANDLE_FAILED, "gfid=%s",
+ uuid_utoa(ca_data->oldp_stat.ia_gfid), NULL);
+ errno = ESTALE;
+ /* By the time we receive upcall old parent_dir may
+ * have got removed. We still need to send upcall
+ * for the file/dir and current parent handles. */
+ up_inode_arg->oldp_object = NULL;
+ ret = 0;
+ }
+
+ glfs_iatt_to_stat(fs, &ca_data->oldp_stat, &up_inode_arg->oldp_buf);
+ }
+ up_inode_arg->oldp_object = oldp_object;
+
+ up_arg->reason = GLFS_UPCALL_INODE_INVALIDATE;
+ up_arg->event = up_inode_arg;
+ up_arg->free_event = glfs_free_upcall_inode;
+
+ ret = 0;
+
+out:
+ if (ret) {
+ /* Close p_object and oldp_object as well if being referenced.*/
+ if (object)
+ glfs_h_close(object);
+
+ /* Set reason to prevent applications from using ->event */
+ up_arg->reason = GLFS_UPCALL_EVENT_NULL;
+ GF_FREE(up_inode_arg);
+ }
+ return ret;
+}
+
+void
+glfs_release_upcall(void *ptr)
+{
+ struct glfs_upcall *to_free = ptr;
+
+ if (to_free->event)
+ to_free->free_event(to_free->event);
+}
+
+/*
+ * This API is used to poll for upcall events stored in the upcall list.
+ * Current users of this API is NFS-Ganesha. In case of any event received, it
+ * will be mapped appropriately into 'glfs_upcall' along with the handle object
+ * to be passed to NFS-Ganesha.
+ *
+ * On success, applications need to check if up_arg is not-NULL or errno is not
+ * ENOENT. glfs_upcall_get_reason() can be used to decide what kind of event
+ * has been received.
+ *
+ * Current supported upcall_events:
+ * GLFS_UPCALL_INODE_INVALIDATE
+ *
+ * After processing the event, applications need to free 'up_arg' by calling
+ * glfs_free().
+ *
+ * Also similar to I/Os, the application should ideally stop polling before
+ * calling glfs_fini(..). Hence making an assumption that 'fs' & ctx structures
+ * cannot be freed while in this routine.
+ */
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_poll_upcall, 3.7.16)
+int
+pub_glfs_h_poll_upcall(struct glfs *fs, struct glfs_upcall **up_arg)
+{
+ upcall_entry *u_list = NULL;
+ upcall_entry *tmp = NULL;
+ xlator_t *subvol = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = -1;
+ struct gf_upcall *upcall_data = NULL;
+
+ DECLARE_OLD_THIS;
+
+ if (!up_arg) {
+ errno = EINVAL;
+ goto err;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, err);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ errno = EIO;
+ goto restore;
+ }
+
+ /* Ideally applications should stop polling before calling
+ * 'glfs_fini'. Yet cross check if cleanup has started. */
+ pthread_mutex_lock(&fs->mutex);
+ {
+ ctx = fs->ctx;
+
+ if (ctx->cleanup_started) {
+ pthread_mutex_unlock(&fs->mutex);
+ goto out;
+ }
+
+ fs->pin_refcnt++;
+
+ /* once we call this function, the applications seems to be
+ * interested in events, enable caching them */
+ fs->cache_upcalls = _gf_true;
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+ pthread_mutex_lock(&fs->upcall_list_mutex);
+ {
+ list_for_each_entry_safe(u_list, tmp, &fs->upcall_list, upcall_list)
+ {
+ list_del_init(&u_list->upcall_list);
+ upcall_data = &u_list->upcall_data;
+ break;
+ }
+ }
+ /* No other thread can delete this entry. So unlock it */
+ pthread_mutex_unlock(&fs->upcall_list_mutex);
+
+ if (upcall_data) {
+ switch (upcall_data->event_type) {
+ case GF_UPCALL_CACHE_INVALIDATION:
+ *up_arg = GLFS_CALLOC(1, sizeof(struct gf_upcall),
+ glfs_release_upcall,
+ glfs_mt_upcall_entry_t);
+ if (!*up_arg) {
+ errno = ENOMEM;
+ break; /* goto free u_list */
+ }
+
+ /* XXX: Need to revisit this to support
+ * GLFS_UPCALL_INODE_UPDATE if required. */
+ ret = glfs_h_poll_cache_invalidation(fs, *up_arg, upcall_data);
+ if (ret || (*up_arg)->reason == GLFS_UPCALL_EVENT_NULL) {
+ /* It could so happen that the file which got
+ * upcall notification may have got deleted by
+ * the same client. Irrespective of the error,
+ * return with an error or success+ENOENT. */
+ if ((*up_arg)->reason == GLFS_UPCALL_EVENT_NULL)
+ errno = ENOENT;
+
+ GLFS_FREE(*up_arg);
+ *up_arg = NULL;
+ }
+ break;
+ case GF_UPCALL_RECALL_LEASE:
+ gf_log("glfs_h_poll_upcall", GF_LOG_DEBUG,
+ "UPCALL_RECALL_LEASE is not implemented yet");
+ /* fallthrough till we support leases */
+ case GF_UPCALL_EVENT_NULL:
+ /* no 'default:' label, to force handling all upcall events */
+ errno = ENOENT;
+ break;
+ }
+
+ GF_FREE(u_list->upcall_data.data);
+ GF_FREE(u_list);
+ } else {
+ /* fs->upcall_list was empty, no upcall events cached */
+ errno = ENOENT;
+ }
+
+ ret = 0;
+
+out:
+ pthread_mutex_lock(&fs->mutex);
+ {
+ fs->pin_refcnt--;
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+ glfs_subvol_done(fs, subvol);
+
+restore:
+ __GLFS_EXIT_FS;
+err:
+ return ret;
+}
+
+static gf_boolean_t log_upcall370 = _gf_true; /* log once */
+
+/* The old glfs_h_poll_upcall interface requires intimate knowledge of the
+ * structures that are returned to the calling application. This is not
+ * recommended, as the returned structures need to returned correctly (handles
+ * closed, memory free'd with the unavailable GF_FREE(), and possibly more.)
+ *
+ * To the best of our knowledge, only NFS-Ganesha uses the upcall events
+ * through gfapi. We keep this backwards compatibility function around so that
+ * applications using the existing implementation do not break.
+ *
+ * WARNING: this function will be removed in the future.
+ */
+GFAPI_SYMVER_PUBLIC(glfs_h_poll_upcall370, glfs_h_poll_upcall, 3.7.0)
+int
+pub_glfs_h_poll_upcall370(struct glfs *fs, struct glfs_callback_arg *up_arg)
+{
+ struct glfs_upcall *upcall = NULL;
+ int ret = -1;
+
+ if (log_upcall370) {
+ log_upcall370 = _gf_false;
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "this application is "
+ "compiled against an old version of libgfapi, it "
+ "should use glfs_free() to release the structure "
+ "returned by glfs_h_poll_upcall() - for more details, "
+ "see http://review.gluster.org/14701");
+ }
+
+ ret = pub_glfs_h_poll_upcall(fs, &upcall);
+ if (ret == 0) {
+ up_arg->fs = fs;
+ if ((errno == ENOENT) || !upcall || !upcall->event) {
+ up_arg->reason = GLFS_UPCALL_EVENT_NULL;
+ goto out;
+ }
+
+ up_arg->reason = upcall->reason;
+
+ if (upcall->reason == GLFS_UPCALL_INODE_INVALIDATE) {
+ struct glfs_callback_inode_arg *cb_inode = NULL;
+ struct glfs_upcall_inode *up_inode = NULL;
+
+ cb_inode = GF_CALLOC(1, sizeof(struct glfs_callback_inode_arg),
+ glfs_mt_upcall_inode_t);
+ if (!cb_inode) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ up_inode = upcall->event;
+
+ /* copy attributes one by one, the memory layout might
+ * be different between the old glfs_callback_inode_arg
+ * and new glfs_upcall_inode */
+ cb_inode->object = up_inode->object;
+ cb_inode->flags = up_inode->flags;
+ memcpy(&cb_inode->buf, &up_inode->buf, sizeof(struct stat));
+ cb_inode->expire_time_attr = up_inode->expire_time_attr;
+ cb_inode->p_object = up_inode->p_object;
+ memcpy(&cb_inode->p_buf, &up_inode->p_buf, sizeof(struct stat));
+ cb_inode->oldp_object = up_inode->oldp_object;
+ memcpy(&cb_inode->oldp_buf, &up_inode->oldp_buf,
+ sizeof(struct stat));
+
+ up_arg->event_arg = cb_inode;
+ }
+ }
+
+out:
+ if (upcall) {
+ /* we can not use glfs_free() here, objects need to stay */
+ GF_FREE(upcall->event);
+ GF_FREE(upcall);
+ }
+
+ return ret;
+}
+
+#ifdef HAVE_ACL_LIBACL_H
+#include <glusterfs/glusterfs-acl.h>
+#include <acl/libacl.h>
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_set, 3.7.0)
+int
+pub_glfs_h_acl_set(struct glfs *fs, struct glfs_object *object,
+ const acl_type_t type, const acl_t acl)
+{
+ int ret = -1;
+ char *acl_s = NULL;
+ const char *acl_key = NULL;
+ struct glfs_object *new_object = NULL;
+
+ DECLARE_OLD_THIS;
+
+ if (!object || !acl) {
+ errno = EINVAL;
+ return ret;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ acl_key = gf_posix_acl_get_key(type);
+ if (!acl_key)
+ goto out;
+
+ acl_s = acl_to_any_text(acl, NULL, ',', TEXT_ABBREVIATE | TEXT_NUMERIC_IDS);
+ if (!acl_s)
+ goto out;
+
+ if (IA_ISLNK(object->inode->ia_type)) {
+ new_object = glfs_h_resolve_symlink(fs, object);
+ if (new_object == NULL)
+ goto out;
+ } else
+ new_object = object;
+
+ ret = pub_glfs_h_setxattrs(fs, new_object, acl_key, acl_s,
+ strlen(acl_s) + 1, 0);
+
+ acl_free(acl_s);
+
+out:
+ if (IA_ISLNK(object->inode->ia_type) && new_object)
+ glfs_h_close(new_object);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_get, 3.7.0)
+acl_t
+pub_glfs_h_acl_get(struct glfs *fs, struct glfs_object *object,
+ const acl_type_t type)
+{
+ int ret = 0;
+ acl_t acl = NULL;
+ char *acl_s = NULL;
+ dict_t *xattr = NULL;
+ const char *acl_key = NULL;
+ struct glfs_object *new_object = NULL;
+
+ DECLARE_OLD_THIS;
+
+ if (!object) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ acl_key = gf_posix_acl_get_key(type);
+ if (!acl_key)
+ goto out;
+
+ if (IA_ISLNK(object->inode->ia_type)) {
+ new_object = glfs_h_resolve_symlink(fs, object);
+ if (new_object == NULL)
+ goto out;
+ } else
+ new_object = object;
+
+ ret = glfs_h_getxattrs_common(fs, new_object, &xattr, acl_key, _gf_false);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str(xattr, (char *)acl_key, &acl_s);
+ if (ret)
+ goto out;
+
+ acl = acl_from_text(acl_s);
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+
+ if (IA_ISLNK(object->inode->ia_type) && new_object)
+ glfs_h_close(new_object);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return acl;
+}
+#else /* !HAVE_ACL_LIBACL_H */
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_get, 3.7.0)
+acl_t
+pub_glfs_h_acl_get(struct glfs *fs, struct glfs_object *object,
+ const acl_type_t type)
+{
+ errno = ENOTSUP;
+ return NULL;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_acl_set, 3.7.0)
+int
+pub_glfs_h_acl_set(struct glfs *fs, struct glfs_object *object,
+ const acl_type_t type, const acl_t acl)
+{
+ errno = ENOTSUP;
+ return -1;
+}
+#endif
+
+/* The API to perform read using anonymous fd */
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_anonymous_read, 3.7.0)
+ssize_t
+pub_glfs_h_anonymous_read(struct glfs *fs, struct glfs_object *object,
+ const void *buf, size_t count, off_t offset)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = glfs_anonymous_preadv(fs, object, &iov, 1, offset, 0);
+
+ return ret;
+}
+
+/* The API to perform write using anonymous fd */
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_anonymous_write, 3.7.0)
+ssize_t
+pub_glfs_h_anonymous_write(struct glfs *fs, struct glfs_object *object,
+ const void *buf, size_t count, off_t offset)
+{
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t ret = 0;
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ iov.iov_base = (void *)buf;
+ iov.iov_len = count;
+
+ ret = glfs_anonymous_pwritev(fs, object, &iov, 1, offset, 0);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_object_copy, 3.11.0)
+struct glfs_object *
+pub_glfs_object_copy(struct glfs_object *src)
+{
+ struct glfs_object *object = NULL;
+
+ GF_VALIDATE_OR_GOTO("glfs_dup_object", src, out);
+
+ object = GF_CALLOC(1, sizeof(struct glfs_object), glfs_mt_glfs_object_t);
+ if (object == NULL) {
+ errno = ENOMEM;
+ gf_smsg(THIS->name, GF_LOG_WARNING, errno, API_MSG_CREATE_HANDLE_FAILED,
+ "glfs_dup_object gfid=%s", uuid_utoa(src->inode->gfid), NULL);
+ return NULL;
+ }
+
+ object->inode = inode_ref(src->inode);
+ gf_uuid_copy(object->gfid, src->inode->gfid);
+
+out:
+ return object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_xreaddirplus_get_object, 3.11.0)
+struct glfs_object *
+pub_glfs_xreaddirplus_get_object(struct glfs_xreaddirp_stat *xstat)
+{
+ GF_VALIDATE_OR_GOTO("glfs_xreaddirplus_get_object", xstat, out);
+
+ if (!(xstat->flags_handled & GFAPI_XREADDIRP_HANDLE))
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_HANDLE_NOT_SET,
+ "GFAPI_XREADDIRP_HANDLE xstat=%p", xstat, "handle=%x",
+ xstat->flags_handled, NULL);
+
+ return xstat->object;
+
+out:
+ return NULL;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_h_lease, 4.0.0)
+int
+pub_glfs_h_lease(struct glfs *fs, struct glfs_object *object,
+ struct glfs_lease *lease)
+{
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct gf_lease gf_lease = {
+ 0,
+ };
+
+ /* validate in args */
+ if ((fs == NULL) || (object == NULL)) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* get the active volume */
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ /* get/refresh the in arg objects inode in correlation to the xlator */
+ inode = glfs_resolve_inode(fs, subvol, object);
+ if (!inode) {
+ errno = ESTALE;
+ goto out;
+ }
+
+ /* populate loc */
+ GLFS_LOC_FILL_INODE(inode, loc, out);
+
+ glfs_lease_to_gf_lease(lease, &gf_lease);
+
+ ret = syncop_lease(subvol, &loc, &gf_lease, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ gf_lease_to_glfs_lease(&gf_lease, lease);
+
+out:
+ loc_wipe(&loc);
+
+ if (inode)
+ inode_unref(inode);
+
+ glfs_subvol_done(fs, subvol);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
diff --git a/api/src/glfs-handles.h b/api/src/glfs-handles.h
new file mode 100644
index 00000000000..4d039b9c76b
--- /dev/null
+++ b/api/src/glfs-handles.h
@@ -0,0 +1,355 @@
+/*
+ Copyright (c) 2013-2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLFS_HANDLES_H
+#define _GLFS_HANDLES_H
+
+#include "glfs.h"
+
+/* GLFS OBJECT BASED OPERATIONS
+ *
+ * The following APIs are introduced to provide an API framework that can work
+ * with gluster objects (files and directories), instead of absolute paths.
+ *
+ * The following API set can be related to the POSIX *at interfaces (like
+ * openat (2)). The intention of these APIs is to be able to operate based
+ * on parent object and looking up or creating child objects within, OR to be
+ * used on the actual object thus looked up or created, and retrieve information
+ * regarding the same.
+ *
+ * The APIs also provide for generating an opaque invariant handle to the
+ * object, that can later be used to lookup the object, instead of the regular
+ * glfs_h_* variants. The APIs that provide this behaviour are,
+ * glfs_h_extract_handle and glfs_h_create_from_handle.
+ *
+ * The object handles can be transitioned to fd based operations as supported
+ * by glfs.h calls, using the glfs_h_open call. This provides a way to move
+ * from objects to fd's akin to moving from path to fd for required operations.
+ *
+ * NOTE: The opaque invariant handle is the GFID of the object in reality, but
+ * maintained as an opaque data value, for potential internal changes to the
+ * same without impacting the caller.
+ *
+ * NOTE: Currently looking up an object can create multiple object handles to
+ * the same, i.e distinct glfs_object *. Hence each such looked up or received
+ * handle from other calls, would need to be closed. In the future, for a given
+ * object these pointers would be the same, and an ease of use API to forget all
+ * instances of this bject would be provided (instead of a per lookup close).
+ * This should not change the APIs in their current form.
+ *
+ */
+
+/* Handle length for object handles returned from glfs_h_extract_handle or
+ * glfs_h_create_from_handle */
+#define GFAPI_HANDLE_LENGTH 16
+
+/* These flags should be in sync to the ones defined in upcall.h */
+#define GFAPI_UP_NLINK 0x00000001 /* update nlink */
+#define GFAPI_UP_MODE 0x00000002 /* update mode and ctime */
+#define GFAPI_UP_OWN 0x00000004 /* update mode,uid,gid and ctime */
+#define GFAPI_UP_SIZE 0x00000008 /* update fsize */
+#define GFAPI_UP_TIMES 0x00000010 /* update all times */
+#define GFAPI_UP_ATIME 0x00000020 /* update atime only */
+#define GFAPI_UP_PERM \
+ 0x00000040 /* update fields needed for \
+ permission checking */
+#define GFAPI_UP_RENAME \
+ 0x00000080 /* this is a rename op - \
+ delete the cache entry */
+#define GFAPI_UP_FORGET \
+ 0x00000100 /* inode_forget on server side - \
+ invalidate the cache entry */
+#define GFAPI_UP_PARENT_TIMES 0x00000200 /* update parent dir times */
+
+#define GFAPI_INODE_UPDATE_FLAGS \
+ (GFAPI_UP_NLINK | GFAPI_UP_MODE | GFAPI_UP_OWN | GFAPI_UP_SIZE | \
+ GFAPI_UP_TIMES | GFAPI_UP_ATIME)
+
+/* Portability non glibc c++ build systems */
+#ifndef __THROW
+#if defined __cplusplus
+#define __THROW throw()
+#else
+#define __THROW
+#endif
+#endif
+
+__BEGIN_DECLS
+
+/*
+ * Notes:
+ *
+ * The file object handle. One per looked up, created file/directory
+ *
+ * This had been introduced to facilitate gfid/inode based gfapi
+ * - a requirement introduced by nfs-ganesha
+ */
+struct glfs_object;
+typedef struct glfs_object glfs_object_t;
+
+/* Functions for getting details about the glfs_upcall_inode
+ *
+ * None of the pointers returned by the below functions should be free()'d,
+ * glfs_free()'d or glfs_h_close()'d by the application.
+ *
+ * Releasing of the structures is done by passing the glfs_upcall pointer
+ * to glfs_free().
+ */
+struct glfs_upcall_inode;
+typedef struct glfs_upcall_inode glfs_upcall_inode_t;
+
+glfs_object_t *
+glfs_upcall_inode_get_object(glfs_upcall_inode_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_inode_get_object, 3.7.16);
+
+uint64_t
+glfs_upcall_inode_get_flags(glfs_upcall_inode_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_inode_get_flags, 3.7.16);
+
+struct stat *
+glfs_upcall_inode_get_stat(glfs_upcall_inode_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_inode_get_stat, 3.7.16);
+
+uint64_t
+glfs_upcall_inode_get_expire(glfs_upcall_inode_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_inode_get_expire, 3.7.16);
+
+glfs_object_t *
+glfs_upcall_inode_get_pobject(glfs_upcall_inode_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_inode_get_pobject, 3.7.16);
+
+struct stat *
+glfs_upcall_inode_get_pstat(glfs_upcall_inode_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_inode_get_pstat, 3.7.16);
+
+glfs_object_t *
+glfs_upcall_inode_get_oldpobject(glfs_upcall_inode_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_inode_get_oldpobject, 3.7.16);
+
+struct stat *
+glfs_upcall_inode_get_oldpstat(glfs_upcall_inode_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_inode_get_oldpstat, 3.7.16);
+
+/* Handle based operations */
+/* Operations that generate handles */
+glfs_object_t *
+glfs_h_lookupat(glfs_t *fs, glfs_object_t *parent, const char *path,
+ struct stat *stat, int follow) __THROW
+ GFAPI_PUBLIC(glfs_h_lookupat, 3.7.4);
+
+glfs_object_t *
+glfs_h_creat(glfs_t *fs, glfs_object_t *parent, const char *path, int flags,
+ mode_t mode, struct stat *sb) __THROW
+ GFAPI_PUBLIC(glfs_h_creat, 3.4.2);
+
+glfs_object_t *
+glfs_h_mkdir(glfs_t *fs, glfs_object_t *parent, const char *path, mode_t flags,
+ struct stat *sb) __THROW GFAPI_PUBLIC(glfs_h_mkdir, 3.4.2);
+
+glfs_object_t *
+glfs_h_mknod(glfs_t *fs, glfs_object_t *parent, const char *path, mode_t mode,
+ dev_t dev, struct stat *sb) __THROW
+ GFAPI_PUBLIC(glfs_h_mknod, 3.4.2);
+
+glfs_object_t *
+glfs_h_symlink(glfs_t *fs, glfs_object_t *parent, const char *name,
+ const char *data, struct stat *stat) __THROW
+ GFAPI_PUBLIC(glfs_h_symlink, 3.4.2);
+
+/* Operations on the actual objects */
+int
+glfs_h_unlink(glfs_t *fs, glfs_object_t *parent, const char *path) __THROW
+ GFAPI_PUBLIC(glfs_h_unlink, 3.4.2);
+
+int
+glfs_h_close(glfs_object_t *object) __THROW GFAPI_PUBLIC(glfs_h_close, 3.4.2);
+
+int
+glfs_caller_specific_init(void *uid_caller_key, void *gid_caller_key,
+ void *future) __THROW
+ GFAPI_PUBLIC(glfs_caller_specific_init, 3.5.0);
+
+int
+glfs_h_truncate(glfs_t *fs, glfs_object_t *object, off_t offset) __THROW
+ GFAPI_PUBLIC(glfs_h_truncate, 3.4.2);
+
+int
+glfs_h_stat(glfs_t *fs, glfs_object_t *object, struct stat *stat) __THROW
+ GFAPI_PUBLIC(glfs_h_stat, 3.4.2);
+
+int
+glfs_h_statfs(glfs_t *fs, glfs_object_t *object, struct statvfs *stat) __THROW
+ GFAPI_PUBLIC(glfs_h_statfs, 3.7.0);
+
+int
+glfs_h_getattrs(glfs_t *fs, glfs_object_t *object, struct stat *stat) __THROW
+ GFAPI_PUBLIC(glfs_h_getattrs, 3.4.2);
+
+int
+glfs_h_getxattrs(glfs_t *fs, glfs_object_t *object, const char *name,
+ void *value, size_t size) __THROW
+ GFAPI_PUBLIC(glfs_h_getxattrs, 3.5.1);
+
+int
+glfs_h_setattrs(glfs_t *fs, glfs_object_t *object, struct stat *sb,
+ int valid) __THROW GFAPI_PUBLIC(glfs_h_setattrs, 3.4.2);
+
+int
+glfs_h_setxattrs(glfs_t *fs, glfs_object_t *object, const char *name,
+ const void *value, size_t size, int flags) __THROW
+ GFAPI_PUBLIC(glfs_h_setxattrs, 3.5.0);
+
+int
+glfs_h_readlink(glfs_t *fs, glfs_object_t *object, char *buf,
+ size_t bufsiz) __THROW GFAPI_PUBLIC(glfs_h_readlink, 3.4.2);
+
+int
+glfs_h_link(glfs_t *fs, glfs_object_t *linktgt, glfs_object_t *parent,
+ const char *name) __THROW GFAPI_PUBLIC(glfs_h_link, 3.4.2);
+
+int
+glfs_h_rename(glfs_t *fs, glfs_object_t *olddir, const char *oldname,
+ glfs_object_t *newdir, const char *newname) __THROW
+ GFAPI_PUBLIC(glfs_h_rename, 3.4.2);
+
+int
+glfs_h_removexattrs(glfs_t *fs, glfs_object_t *object, const char *name) __THROW
+ GFAPI_PUBLIC(glfs_h_removexattrs, 3.5.1);
+
+/* Operations enabling opaque invariant handle to object transitions */
+ssize_t
+glfs_h_extract_handle(glfs_object_t *object, unsigned char *handle,
+ int len) __THROW
+ GFAPI_PUBLIC(glfs_h_extract_handle, 3.4.2);
+
+/* Given a handle, looks up the inode and creates glfs_object.
+ * In addition, if provided 'stat', copies the inode attributes
+ */
+glfs_object_t *
+glfs_h_create_from_handle(glfs_t *fs, unsigned char *handle, int len,
+ struct stat *stat) __THROW
+ GFAPI_PUBLIC(glfs_h_create_from_handle, 3.4.2);
+
+/* Operations enabling object handles to fd transitions */
+glfs_fd_t *
+glfs_h_opendir(glfs_t *fs, glfs_object_t *object) __THROW
+ GFAPI_PUBLIC(glfs_h_opendir, 3.4.2);
+
+glfs_fd_t *
+glfs_h_open(glfs_t *fs, glfs_object_t *object, int flags) __THROW
+ GFAPI_PUBLIC(glfs_h_open, 3.4.2);
+
+int
+glfs_h_access(glfs_t *fs, glfs_object_t *object, int mask) __THROW
+ GFAPI_PUBLIC(glfs_h_access, 3.6.0);
+
+struct glfs_object *
+glfs_h_creat_open(struct glfs *fs, struct glfs_object *parent, const char *path,
+ int flags, mode_t mode, struct stat *stat,
+ struct glfs_fd **out_fd) __THROW
+ GFAPI_PUBLIC(glfs_h_creat_open, 6.6);
+/*
+ SYNOPSIS
+
+ glfs_h_poll_upcall: Poll for upcall events given a 'glfs' object.
+
+ DESCRIPTION
+
+ This API is used to poll for upcall events stored in the
+ upcall list. Current users of this API is NFS-Ganesha.
+ In case of any event received, it will be mapped appropriately
+ into 'glfs_upcall' along with the handle('glfs_object') to be
+ passed to NFS-Ganesha.
+
+ In case of success, applications need to check the value of
+ cbk->handle to be NON NULL before processing the upcall
+ events.
+
+ PARAMETERS
+
+ @fs: glfs object to poll the upcall events for
+ @cbk: Pointer that will contain an upcall event for use by the application.
+ Application is responsible for free'ing the structure with glfs_free().
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Error condition, mostly due to out of memory.
+
+*/
+
+int
+glfs_h_poll_upcall(glfs_t *fs, glfs_upcall_t **cbk) __THROW
+ GFAPI_PUBLIC(glfs_h_poll_upcall, 3.7.16);
+
+int
+glfs_h_acl_set(glfs_t *fs, glfs_object_t *object, const acl_type_t type,
+ const acl_t acl) __THROW GFAPI_PUBLIC(glfs_h_acl_set, 3.7.0);
+
+acl_t
+glfs_h_acl_get(glfs_t *fs, glfs_object_t *object, const acl_type_t type) __THROW
+ GFAPI_PUBLIC(glfs_h_acl_get, 3.7.0);
+
+size_t
+glfs_h_anonymous_write(glfs_t *fs, glfs_object_t *object, const void *buf,
+ size_t count, off_t offset) __THROW
+ GFAPI_PUBLIC(glfs_h_anonymous_write, 3.7.0);
+
+ssize_t
+glfs_h_anonymous_read(glfs_t *fs, glfs_object_t *object, const void *buf,
+ size_t count, off_t offset) __THROW
+ GFAPI_PUBLIC(glfs_h_anonymous_read, 3.7.0);
+
+/*
+ * Caution: The object returned by this object gets freed as part
+ * of 'glfs_free(xstat)'. Make sure to have a copy using 'glfs_object_copy()'
+ * to use post that.
+ */
+glfs_object_t *
+glfs_xreaddirplus_get_object(struct glfs_xreaddirp_stat *xstat) __THROW
+ GFAPI_PUBLIC(glfs_xreaddirplus_get_object, 3.11.0);
+
+/* Applications should close the object returned by this routine
+ * explicitly using 'glfs_h_close()'
+ */
+glfs_object_t *
+glfs_object_copy(glfs_object_t *src) __THROW
+ GFAPI_PUBLIC(glfs_object_copy, 3.11.0);
+
+int
+glfs_h_lease(glfs_t *fs, glfs_object_t *object, glfs_lease_t *lease) __THROW
+ GFAPI_PUBLIC(glfs_h_lease, 4.0.0);
+
+glfs_object_t *
+glfs_h_find_handle(glfs_t *fs, unsigned char *handle, int len) __THROW
+ GFAPI_PUBLIC(glfs_h_lease, 4.0.0);
+
+/* Functions for getting details about the glfs_upcall_lease
+ *
+ * None of the pointers returned by the below functions should be free()'d,
+ * glfs_free()'d or glfs_h_close()'d by the application.
+ *
+ * Releasing of the structures is done by passing the glfs_upcall pointer
+ * to glfs_free().
+ */
+struct glfs_upcall_lease;
+typedef struct glfs_upcall_lease glfs_upcall_lease_t;
+
+glfs_object_t *
+glfs_upcall_lease_get_object(glfs_upcall_lease_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_lease_get_object, 4.1.6);
+
+uint32_t
+glfs_upcall_lease_get_lease_type(glfs_upcall_lease_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_lease_get_lease_type, 4.1.6);
+
+__END_DECLS
+
+#endif /* !_GLFS_HANDLES_H */
diff --git a/api/src/glfs-internal.h b/api/src/glfs-internal.h
new file mode 100644
index 00000000000..7cc3b18a104
--- /dev/null
+++ b/api/src/glfs-internal.h
@@ -0,0 +1,756 @@
+/*
+ Copyright (c) 2012-2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLFS_INTERNAL_H
+#define _GLFS_INTERNAL_H
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/upcall-utils.h>
+#include "glfs-handles.h"
+#include <glusterfs/refcount.h>
+#include <glusterfs/syncop.h>
+
+#define GLFS_SYMLINK_MAX_FOLLOW 2048
+
+#define DEFAULT_REVAL_COUNT 1
+
+/*
+ * According to pthread mutex and conditional variable ( cond,
+ * child_down_count, upcall mutex and mutex) initialization of struct glfs
+ * members, below GLFS_INIT_* flags are set in 'pthread_flags' member of struct
+ * glfs. The flags are set from glfs_init() and glfs_new_from_ctx() functions
+ * as part of fs inititialization.
+ *
+ * These flag bits are validated in glfs_fini() to destroy all or partially
+ * initialized mutex and conditional variables of glfs object.
+ * If you introduce new pthread mutex or conditional variable in glfs object,
+ * please make sure you have a flag bit intorduced here for proper cleanup
+ * in glfs_fini().
+ *
+ */
+
+#define PTHREAD_MUTEX_INIT(mutex, attr, flags, mask, label) \
+ do { \
+ int __ret = -1; \
+ __ret = pthread_mutex_init(mutex, attr); \
+ if (__ret == 0) \
+ flags |= mask; \
+ else \
+ goto label; \
+ } while (0)
+
+#define PTHREAD_MUTEX_DESTROY(mutex, flags, mask) \
+ do { \
+ if (flags & mask) \
+ (void)pthread_mutex_destroy(mutex); \
+ } while (0)
+
+#define PTHREAD_COND_INIT(cond, attr, flags, mask, label) \
+ do { \
+ int __ret = -1; \
+ __ret = pthread_cond_init(cond, attr); \
+ if (__ret == 0) \
+ flags |= mask; \
+ else \
+ goto label; \
+ } while (0)
+
+#define PTHREAD_COND_DESTROY(cond, flags, mask) \
+ do { \
+ if (flags & mask) \
+ (void)pthread_cond_destroy(cond); \
+ } while (0)
+
+#define GLFS_INIT_MUTEX 0x00000001 /* pthread_mutex_flag */
+#define GLFS_INIT_COND 0x00000002 /* pthread_cond_flag */
+#define GLFS_INIT_COND_CHILD 0x00000004 /* pthread_cond_child_down_flag */
+#define GLFS_INIT_MUTEX_UPCALL 0x00000008 /* pthread_mutex_upcall_flag */
+
+#ifndef GF_DARWIN_HOST_OS
+#ifndef GFAPI_PUBLIC
+#define GFAPI_PUBLIC(sym, ver) /**/
+#endif
+#ifndef GFAPI_PRIVATE
+#define GFAPI_PRIVATE(sym, ver) /**/
+#endif
+#if __GNUC__ >= 10
+#define GFAPI_SYMVER_PUBLIC_DEFAULT(fn, ver) \
+ __attribute__((__symver__(STR(fn) "@@GFAPI_" STR(ver))))
+
+#define GFAPI_SYMVER_PRIVATE_DEFAULT(fn, ver) \
+ __attribute__((__symver__(STR(fn) "@@GFAPI_PRIVATE_" STR(ver))))
+
+#define GFAPI_SYMVER_PUBLIC(fn1, fn2, ver) \
+ __attribute__((__symver__(STR(fn2) "@GFAPI_" STR(ver))))
+
+#define GFAPI_SYMVER_PRIVATE(fn1, fn2, ver) \
+ __attribute__((__symver__(STR(fn2) "@GFAPI_PRIVATE_" STR(ver))))
+
+#else
+#define GFAPI_SYMVER_PUBLIC_DEFAULT(fn, ver) \
+ asm(".symver pub_" STR(fn) ", " STR(fn) "@@GFAPI_" STR(ver));
+
+#define GFAPI_SYMVER_PRIVATE_DEFAULT(fn, ver) \
+ asm(".symver priv_" STR(fn) ", " STR(fn) "@@GFAPI_PRIVATE_" STR(ver));
+
+#define GFAPI_SYMVER_PUBLIC(fn1, fn2, ver) \
+ asm(".symver pub_" STR(fn1) ", " STR(fn2) "@GFAPI_" STR(ver));
+
+#define GFAPI_SYMVER_PRIVATE(fn1, fn2, ver) \
+ asm(".symver priv_" STR(fn1) ", " STR(fn2) "@GFAPI_PRIVATE_" STR(ver));
+#endif
+#define STR(str) #str
+#else
+#ifndef GFAPI_PUBLIC
+#define GFAPI_PUBLIC(sym, ver) __asm("_" __STRING(sym) "$GFAPI_" __STRING(ver));
+#endif
+#ifndef GFAPI_PRIVATE
+#define GFAPI_PRIVATE(sym, ver) \
+ __asm("_" __STRING(sym) "$GFAPI_PRIVATE_" __STRING(ver));
+#endif
+#define GFAPI_SYMVER_PUBLIC_DEFAULT(fn, dotver) /**/
+#define GFAPI_SYMVER_PRIVATE_DEFAULT(fn, dotver) /**/
+#define GFAPI_SYMVER_PUBLIC(fn1, fn2, dotver) /**/
+#define GFAPI_SYMVER_PRIVATE(fn1, fn2, dotver) /**/
+#endif
+
+#define ESTALE_RETRY(ret, errno, reval, loc, label) \
+ do { \
+ if (ret == -1 && errno == ESTALE) { \
+ if (reval < DEFAULT_REVAL_COUNT) { \
+ reval++; \
+ loc_wipe(loc); \
+ goto label; \
+ } \
+ } \
+ } while (0)
+
+#define GLFS_LOC_FILL_INODE(oinode, loc, label) \
+ do { \
+ loc.inode = inode_ref(oinode); \
+ gf_uuid_copy(loc.gfid, oinode->gfid); \
+ ret = glfs_loc_touchup(&loc); \
+ if (ret != 0) { \
+ errno = EINVAL; \
+ goto label; \
+ } \
+ } while (0)
+
+#define GLFS_LOC_FILL_PINODE(pinode, loc, ret, errno, label, path) \
+ do { \
+ loc.inode = inode_new(pinode->table); \
+ if (!loc.inode) { \
+ ret = -1; \
+ errno = ENOMEM; \
+ goto label; \
+ } \
+ loc.parent = inode_ref(pinode); \
+ loc.name = path; \
+ ret = glfs_loc_touchup(&loc); \
+ if (ret != 0) { \
+ errno = EINVAL; \
+ goto label; \
+ } \
+ } while (0)
+
+struct glfs;
+
+struct _upcall_entry {
+ struct list_head upcall_list;
+ struct gf_upcall upcall_data;
+};
+typedef struct _upcall_entry upcall_entry;
+
+typedef int (*glfs_init_cbk)(struct glfs *fs, int ret);
+
+struct glfs {
+ char *volname;
+ uuid_t vol_uuid;
+
+ glusterfs_ctx_t *ctx;
+
+ pthread_t poller;
+
+ glfs_init_cbk init_cbk;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ pthread_cond_t child_down_cond; /* for broadcasting CHILD_DOWN */
+ int init;
+ int ret;
+ int err;
+
+ xlator_t *active_subvol; /* active graph */
+ xlator_t *mip_subvol; /* graph for which migration is in
+ * progress */
+ xlator_t *next_subvol; /* Any new graph is put to
+ * next_subvol, the graph in
+ * next_subvol can either be moved
+ * to mip_subvol (if any IO picks it
+ * up for migration), or be
+ * destroyed (if there is a new
+ * graph, and this was never picked
+ * for migration) */
+ xlator_t *old_subvol;
+
+ char *oldvolfile;
+ ssize_t oldvollen;
+
+ inode_t *cwd;
+
+ uint32_t dev_id; /* Used to fill st_dev in struct stat */
+
+ struct list_head openfds;
+
+ gf_boolean_t migration_in_progress;
+
+ gf_boolean_t cache_upcalls; /* add upcalls to the upcall_list? */
+ struct list_head upcall_list;
+ pthread_mutex_t upcall_list_mutex; /* mutex for upcall entry list */
+
+ uint32_t pin_refcnt;
+ uint32_t pthread_flags; /* GLFS_INIT_* # defines set this flag */
+
+ uint32_t upcall_events; /* Mask of upcall events application
+ * is interested in */
+ glfs_upcall_cbk up_cbk; /* upcall cbk function to be registered */
+ void *up_data; /* Opaque data provided by application
+ * during upcall registration */
+ struct list_head waitq; /* waiting synctasks */
+};
+
+/* This enum is used to maintain the state of glfd. In case of async fops
+ * fd might be closed before the actual fop is complete. Therefore we need
+ * to track whether the fd is closed or not, instead actually closing it.*/
+enum glfs_fd_state { GLFD_INIT, GLFD_OPEN, GLFD_CLOSE };
+
+struct glfs_fd {
+ struct list_head openfds;
+ struct list_head list;
+ GF_REF_DECL;
+ struct glfs *fs;
+ enum glfs_fd_state state;
+ off_t offset;
+ fd_t *fd; /* Currently guared by @fs->mutex. TODO: per-glfd lock */
+ struct list_head entries;
+ gf_dirent_t *next;
+ struct dirent *readdirbuf;
+ gf_lkowner_t lk_owner;
+ glfs_leaseid_t lease_id; /* Stores lease_id of client in glfd */
+ gf_lock_t lock; /* lock taken before updating fd state */
+ glfs_recall_cbk cbk;
+ void *cookie;
+};
+
+/* glfs object handle introduced for the alternate gfapi implementation based
+ on glfs handles/gfid/inode
+*/
+struct glfs_object {
+ inode_t *inode;
+ uuid_t gfid;
+};
+
+struct glfs_upcall {
+ struct glfs *fs; /* glfs object */
+ enum glfs_upcall_reason reason; /* Upcall event type */
+ void *event; /* changes based in the event type */
+ void (*free_event)(void *); /* free event after the usage */
+};
+
+struct glfs_upcall_inode {
+ struct glfs_object *object; /* Object which need to be acted upon */
+ int flags; /* Cache UPDATE/INVALIDATE flags */
+ struct stat buf; /* Latest stat of this entry */
+ unsigned int expire_time_attr; /* the amount of time for which
+ * the application need to cache
+ * this entry */
+ struct glfs_object *p_object; /* parent Object to be updated */
+ struct stat p_buf; /* Latest stat of parent dir handle */
+ struct glfs_object *oldp_object; /* Old parent Object to be updated */
+ struct stat oldp_buf; /* Latest stat of old parent dir handle */
+};
+
+struct glfs_upcall_lease {
+ struct glfs_object *object; /* Object which need to be acted upon */
+ uint32_t lease_type; /* Lease type to which client can downgrade to*/
+};
+
+struct glfs_upcall_lease_fd {
+ uint32_t lease_type; /* Lease type to which client can downgrade to*/
+ void *fd_cookie; /* Object which need to be acted upon */
+};
+
+struct glfs_xreaddirp_stat {
+ struct stat
+ st; /* Stat for that dirent - corresponds to GFAPI_XREADDIRP_STAT */
+ struct glfs_object *object; /* handled for GFAPI_XREADDIRP_HANDLE */
+ uint32_t flags_handled; /* final set of flags successfulyy handled */
+};
+
+#define DEFAULT_EVENT_POOL_SIZE 16384
+#define GF_MEMPOOL_COUNT_OF_DICT_T 4096
+#define GF_MEMPOOL_COUNT_OF_DATA_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
+#define GF_MEMPOOL_COUNT_OF_DATA_PAIR_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
+
+#define GF_MEMPOOL_COUNT_OF_LRU_BUF_T 256
+
+typedef void(glfs_mem_release_t)(void *ptr);
+
+struct glfs_mem_header {
+ uint32_t magic;
+ size_t nmemb;
+ size_t size;
+ glfs_mem_release_t *release;
+};
+
+#define GLFS_MEM_HEADER_SIZE (sizeof(struct glfs_mem_header))
+#define GLFS_MEM_HEADER_MAGIC 0x20170830
+
+static inline void *
+__glfs_calloc(size_t nmemb, size_t size, glfs_mem_release_t release,
+ uint32_t type, const char *typestr)
+{
+ struct glfs_mem_header *header = NULL;
+
+ header = __gf_calloc(nmemb, (size + GLFS_MEM_HEADER_SIZE), type, typestr);
+ if (!header)
+ return NULL;
+
+ header->magic = GLFS_MEM_HEADER_MAGIC;
+ header->nmemb = nmemb;
+ header->size = size;
+ header->release = release;
+
+ return header + 1;
+}
+
+static inline void *
+__glfs_malloc(size_t size, glfs_mem_release_t release, uint32_t type,
+ const char *typestr)
+{
+ struct glfs_mem_header *header = NULL;
+
+ header = __gf_malloc((size + GLFS_MEM_HEADER_SIZE), type, typestr);
+ if (!header)
+ return NULL;
+
+ header->magic = GLFS_MEM_HEADER_MAGIC;
+ header->nmemb = 1;
+ header->size = size;
+ header->release = release;
+
+ return header + 1;
+}
+
+static inline void *
+__glfs_realloc(void *ptr, size_t size)
+{
+ struct glfs_mem_header *old_header = NULL;
+ struct glfs_mem_header *new_header = NULL;
+ struct glfs_mem_header tmp_header;
+ void *new_ptr = NULL;
+
+ GF_ASSERT(NULL != ptr);
+
+ old_header = (struct glfs_mem_header *)(ptr - GLFS_MEM_HEADER_SIZE);
+ GF_ASSERT(old_header->magic == GLFS_MEM_HEADER_MAGIC);
+ tmp_header = *old_header;
+
+ new_ptr = __gf_realloc(old_header, (size + GLFS_MEM_HEADER_SIZE));
+ if (!new_ptr)
+ return NULL;
+
+ new_header = (struct glfs_mem_header *)new_ptr;
+ *new_header = tmp_header;
+ new_header->size = size;
+
+ return new_header + 1;
+}
+
+static inline void
+__glfs_free(void *free_ptr)
+{
+ struct glfs_mem_header *header = NULL;
+ void *release_ptr = NULL;
+ int i = 0;
+
+ if (!free_ptr)
+ return;
+
+ header = (struct glfs_mem_header *)(free_ptr - GLFS_MEM_HEADER_SIZE);
+ GF_ASSERT(header->magic == GLFS_MEM_HEADER_MAGIC);
+
+ if (header->release) {
+ release_ptr = free_ptr;
+ for (i = 0; i < header->nmemb; i++) {
+ header->release(release_ptr);
+ release_ptr += header->size;
+ }
+ }
+
+ __gf_free(header);
+}
+
+#define GLFS_CALLOC(nmemb, size, release, type) \
+ __glfs_calloc(nmemb, size, release, type, #type)
+
+#define GLFS_MALLOC(size, release, type) \
+ __glfs_malloc(size, release, type, #type)
+
+#define GLFS_REALLOC(ptr, size) __glfs_realloc(ptr, size)
+
+#define GLFS_FREE(free_ptr) __glfs_free(free_ptr)
+
+int
+glfs_mgmt_init(struct glfs *fs);
+void
+glfs_init_done(struct glfs *fs, int ret) GFAPI_PRIVATE(glfs_init_done, 3.4.0);
+int
+glfs_process_volfp(struct glfs *fs, FILE *fp);
+int
+glfs_resolve(struct glfs *fs, xlator_t *subvol, const char *path, loc_t *loc,
+ struct iatt *iatt, int reval) GFAPI_PRIVATE(glfs_resolve, 3.7.0);
+int
+glfs_lresolve(struct glfs *fs, xlator_t *subvol, const char *path, loc_t *loc,
+ struct iatt *iatt, int reval);
+fd_t *
+glfs_resolve_fd(struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd);
+
+fd_t *
+__glfs_migrate_fd(struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd);
+
+int
+glfs_first_lookup(xlator_t *subvol);
+
+void
+glfs_process_upcall_event(struct glfs *fs, void *data)
+ GFAPI_PRIVATE(glfs_process_upcall_event, 3.7.0);
+
+#define __GLFS_ENTRY_VALIDATE_FS(fs, label) \
+ do { \
+ if (!fs) { \
+ errno = EINVAL; \
+ goto label; \
+ } \
+ old_THIS = THIS; \
+ THIS = fs->ctx->master; \
+ } while (0)
+
+#define __GLFS_EXIT_FS \
+ do { \
+ THIS = old_THIS; \
+ } while (0)
+
+#define __GLFS_ENTRY_VALIDATE_FD(glfd, label) \
+ do { \
+ if (!glfd || !glfd->fd || !glfd->fd->inode || \
+ glfd->state != GLFD_OPEN) { \
+ errno = EBADF; \
+ goto label; \
+ } \
+ old_THIS = THIS; \
+ THIS = glfd->fd->inode->table->xl->ctx->master; \
+ } while (0)
+
+#define __GLFS_LOCK_WAIT(fs) \
+ do { \
+ struct synctask *task = NULL; \
+ \
+ task = synctask_get(); \
+ \
+ if (task) { \
+ list_add_tail(&task->waitq, &fs->waitq); \
+ pthread_mutex_unlock(&fs->mutex); \
+ synctask_yield(task, NULL); \
+ pthread_mutex_lock(&fs->mutex); \
+ } else { \
+ /* non-synctask */ \
+ pthread_cond_wait(&fs->cond, &fs->mutex); \
+ } \
+ } while (0)
+
+#define __GLFS_SYNCTASK_WAKE(fs) \
+ do { \
+ struct synctask *waittask = NULL; \
+ \
+ while (!list_empty(&fs->waitq)) { \
+ waittask = list_entry(fs->waitq.next, struct synctask, waitq); \
+ list_del_init(&waittask->waitq); \
+ synctask_wake(waittask); \
+ } \
+ } while (0)
+
+/*
+ By default all lock attempts from user context must
+ use glfs_lock() and glfs_unlock(). This allows
+ for a safe implementation of graph migration where
+ we can give up the mutex during syncop calls so
+ that bottom up calls (particularly CHILD_UP notify)
+ can do a mutex_lock() on @glfs without deadlocking
+ the filesystem.
+
+ All the fops should wait for graph migration to finish
+ before starting the fops. Therefore these functions should
+ call glfs_lock with wait_for_migration as true. But waiting
+ for migration to finish in call-back path can result thread
+ dead-locks. The reason for this is we only have finite
+ number of epoll threads. so if we wait on epoll threads
+ there will not be any thread left to handle outstanding
+ rpc replies.
+*/
+static inline int
+glfs_lock(struct glfs *fs, gf_boolean_t wait_for_migration)
+{
+ pthread_mutex_lock(&fs->mutex);
+
+ while (!fs->init)
+ __GLFS_LOCK_WAIT(fs);
+
+ while (wait_for_migration && fs->migration_in_progress)
+ __GLFS_LOCK_WAIT(fs);
+
+ return 0;
+}
+
+static inline void
+glfs_unlock(struct glfs *fs)
+{
+ pthread_mutex_unlock(&fs->mutex);
+}
+
+struct glfs_fd *
+glfs_fd_new(struct glfs *fs);
+void
+glfs_fd_bind(struct glfs_fd *glfd);
+void
+glfd_set_state_bind(struct glfs_fd *glfd);
+
+xlator_t *
+glfs_active_subvol(struct glfs *fs) GFAPI_PRIVATE(glfs_active_subvol, 3.4.0);
+xlator_t *
+__glfs_active_subvol(struct glfs *fs);
+void
+glfs_subvol_done(struct glfs *fs, xlator_t *subvol)
+ GFAPI_PRIVATE(glfs_subvol_done, 3.4.0);
+
+inode_t *
+glfs_refresh_inode(xlator_t *subvol, inode_t *inode);
+
+inode_t *
+glfs_cwd_get(struct glfs *fs);
+int
+glfs_cwd_set(struct glfs *fs, inode_t *inode);
+inode_t *
+glfs_resolve_inode(struct glfs *fs, xlator_t *subvol,
+ struct glfs_object *object);
+int
+glfs_create_object(loc_t *loc, struct glfs_object **retobject);
+int
+__glfs_cwd_set(struct glfs *fs, inode_t *inode);
+
+int
+glfs_resolve_base(struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ struct iatt *iatt);
+
+int
+glfs_resolve_at(struct glfs *fs, xlator_t *subvol, inode_t *at,
+ const char *origpath, loc_t *loc, struct iatt *iatt, int follow,
+ int reval) GFAPI_PRIVATE(glfs_resolve_at, 3.4.0);
+int
+glfs_loc_touchup(loc_t *loc) GFAPI_PRIVATE(glfs_loc_touchup, 3.4.0);
+void
+glfs_iatt_to_stat(struct glfs *fs, struct iatt *iatt, struct stat *stat);
+void
+glfs_iatt_from_stat(struct stat *stat, int valid, struct iatt *iatt,
+ int *gvalid);
+int
+glfs_loc_link(loc_t *loc, struct iatt *iatt);
+int
+glfs_loc_unlink(loc_t *loc);
+int
+glfs_getxattr_process(void *value, size_t size, dict_t *xattr,
+ const char *name);
+
+/* Sends RPC call to glusterd to fetch required volume info */
+int
+glfs_get_volume_info(struct glfs *fs);
+
+/*
+ SYNOPSIS
+
+ glfs_new_from_ctx: Creates a virtual mount object by taking a
+ glusterfs_ctx_t object.
+
+ DESCRIPTION
+
+ glfs_new_from_ctx() is not same as glfs_new(). It takes the
+ glusterfs_ctx_t object instead of creating one by glusterfs_ctx_new().
+ Again the usage is restricted to NFS MOUNT over UDP i.e. in
+ glfs_resolve_at() which would take fs object as input but never use
+ (purpose is not to change the ABI of glfs_resolve_at()).
+
+ PARAMETERS
+
+ @ctx: glusterfs_ctx_t object
+
+ RETURN VALUES
+
+ fs : Pointer to the newly created glfs_t object.
+ NULL : Otherwise.
+*/
+
+struct glfs *
+glfs_new_from_ctx(glusterfs_ctx_t *ctx) GFAPI_PRIVATE(glfs_new_from_ctx, 3.7.0);
+
+/*
+ SYNOPSIS
+
+ glfs_free_from_ctx: Free up the memory occupied by glfs_t object
+ created by glfs_new_from_ctx().
+
+ DESCRIPTION
+
+ The glfs_t object allocated by glfs_new_from_ctx() must be released
+ by the caller using this routine. The usage can be found
+ at glfs_fini() or NFS, MOUNT over UDP i.e.
+ __mnt3udp_get_export_subdir_inode ()
+ => glfs_resolve_at().
+
+ PARAMETERS
+
+ @fs: The glfs_t object to be deallocated.
+
+ RETURN VALUES
+
+ void
+*/
+
+void
+glfs_free_from_ctx(struct glfs *fs) GFAPI_PRIVATE(glfs_free_from_ctx, 3.7.0);
+
+int
+glfs_recall_lease_fd(struct glfs *fs, struct gf_upcall *up_data);
+
+int
+glfs_get_upcall_cache_invalidation(struct gf_upcall *to_up_data,
+ struct gf_upcall *from_up_data);
+int
+glfs_h_poll_cache_invalidation(struct glfs *fs, struct glfs_upcall *up_arg,
+ struct gf_upcall *upcall_data);
+
+ssize_t
+glfs_anonymous_preadv(struct glfs *fs, struct glfs_object *object,
+ const struct iovec *iovec, int iovcnt, off_t offset,
+ int flags);
+ssize_t
+glfs_anonymous_pwritev(struct glfs *fs, struct glfs_object *object,
+ const struct iovec *iovec, int iovcnt, off_t offset,
+ int flags);
+
+struct glfs_object *
+glfs_h_resolve_symlink(struct glfs *fs, struct glfs_object *object);
+
+/* Deprecated structures that were passed to client applications, replaced by
+ * accessor functions. Do not use these in new applications, and update older
+ * usage.
+ *
+ * See http://review.gluster.org/14701 for more details.
+ *
+ * WARNING: These structures will be removed in the future.
+ */
+struct glfs_callback_arg {
+ struct glfs *fs;
+ enum glfs_upcall_reason reason;
+ void *event_arg;
+};
+
+struct glfs_callback_inode_arg {
+ struct glfs_object *object; /* Object which need to be acted upon */
+ int flags; /* Cache UPDATE/INVALIDATE flags */
+ struct stat buf; /* Latest stat of this entry */
+ unsigned int expire_time_attr; /* the amount of time for which
+ * the application need to cache
+ * this entry
+ */
+ struct glfs_object *p_object; /* parent Object to be updated */
+ struct stat p_buf; /* Latest stat of parent dir handle */
+ struct glfs_object *oldp_object; /* Old parent Object
+ * to be updated */
+ struct stat oldp_buf; /* Latest stat of old parent
+ * dir handle */
+};
+struct dirent *
+glfs_readdirbuf_get(struct glfs_fd *glfd);
+
+gf_dirent_t *
+glfd_entry_next(struct glfs_fd *glfd, int plus);
+
+void
+gf_dirent_to_dirent(gf_dirent_t *gf_dirent, struct dirent *dirent);
+
+void
+gf_lease_to_glfs_lease(struct gf_lease *gf_lease, struct glfs_lease *lease);
+
+void
+glfs_lease_to_gf_lease(struct glfs_lease *lease, struct gf_lease *gf_lease);
+
+void
+glfs_release_upcall(void *ptr);
+
+int
+get_fop_attr_glfd(dict_t **fop_attr, struct glfs_fd *glfd);
+
+int
+set_fop_attr_glfd(struct glfs_fd *glfd);
+
+int
+get_fop_attr_thrd_key(dict_t **fop_attr);
+
+void
+unset_fop_attr(dict_t **fop_attr);
+
+/*
+ SYNOPSIS
+ glfs_statx: Fetch extended file attributes for the given path.
+
+ DESCRIPTION
+ This function fetches extended file attributes for the given path.
+
+ PARAMETERS
+ @fs: The 'virtual mount' object referencing a volume, under which file exists.
+ @path: Path of the file within the virtual mount.
+ @mask: Requested extended file attributes mask, (See mask defines above)
+
+ RETURN VALUES
+ -1 : Failure. @errno will be set with the type of failure.
+ 0 : Filled in statxbuf with appropriate masks for valid items in the
+ structure.
+
+ ERRNO VALUES
+ EINVAL: fs is invalid
+ EINVAL: mask has unsupported bits set
+ Other errors as returned by stat(2)
+ */
+
+int
+glfs_statx(struct glfs *fs, const char *path, unsigned int mask,
+ struct glfs_stat *statxbuf) GFAPI_PRIVATE(glfs_statx, 6.0);
+
+void
+glfs_iatt_from_statx(struct iatt *, const struct glfs_stat *)
+ GFAPI_PRIVATE(glfs_iatt_from_statx, 6.0);
+
+/*
+ * This API is a per thread setting, similar to glfs_setfs{u/g}id, because of
+ * the call to syncopctx_setfspid.
+ */
+int
+glfs_setfspid(struct glfs *, pid_t) GFAPI_PRIVATE(glfs_setfspid, 6.1);
+#endif /* !_GLFS_INTERNAL_H */
diff --git a/api/src/glfs-master.c b/api/src/glfs-master.c
new file mode 100644
index 00000000000..100dcc16cc0
--- /dev/null
+++ b/api/src/glfs-master.c
@@ -0,0 +1,183 @@
+/*
+ Copyright (c) 2012-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+
+#include <glusterfs/glusterfs.h>
+
+#include "glfs-internal.h"
+#include "glfs-mem-types.h"
+#include "gfapi-messages.h"
+
+int
+graph_setup(struct glfs *fs, glusterfs_graph_t *graph)
+{
+ xlator_t *new_subvol = NULL;
+ xlator_t *old_subvol = NULL;
+ inode_table_t *itable = NULL;
+ int ret = -1;
+
+ new_subvol = graph->top;
+
+ /* This is called in a bottom-up context, it should specifically
+ NOT be glfs_lock()
+ */
+ pthread_mutex_lock(&fs->mutex);
+ {
+ if (new_subvol->switched || new_subvol == fs->active_subvol ||
+ new_subvol == fs->next_subvol || new_subvol == fs->mip_subvol) {
+ /* Spurious CHILD_UP event on old graph */
+ ret = 0;
+ goto unlock;
+ }
+
+ if (!new_subvol->itable) {
+ itable = inode_table_new(131072, new_subvol);
+ if (!itable) {
+ errno = ENOMEM;
+ ret = -1;
+ goto unlock;
+ }
+
+ new_subvol->itable = itable;
+ }
+
+ old_subvol = fs->next_subvol;
+ fs->next_subvol = new_subvol;
+ fs->next_subvol->winds++; /* first ref */
+ ret = 0;
+ }
+unlock:
+ pthread_mutex_unlock(&fs->mutex);
+
+ if (old_subvol)
+ /* wasn't picked up so far, skip */
+ glfs_subvol_done(fs, old_subvol);
+
+ return ret;
+}
+
+int
+notify(xlator_t *this, int event, void *data, ...)
+{
+ glusterfs_graph_t *graph = NULL;
+ struct glfs *fs = NULL;
+
+ graph = data;
+ fs = this->private;
+
+ switch (event) {
+ case GF_EVENT_GRAPH_NEW:
+ gf_smsg(this->name, GF_LOG_INFO, 0, API_MSG_NEW_GRAPH,
+ "graph-uuid=%s",
+ uuid_utoa((unsigned char *)graph->graph_uuid), "id=%d",
+ graph->id, NULL);
+ break;
+ case GF_EVENT_CHILD_UP:
+ pthread_mutex_lock(&fs->mutex);
+ {
+ graph->used = 1;
+ }
+ pthread_mutex_unlock(&fs->mutex);
+ graph_setup(fs, graph);
+ glfs_init_done(fs, 0);
+ break;
+ case GF_EVENT_CHILD_DOWN:
+ pthread_mutex_lock(&fs->mutex);
+ {
+ graph->used = 0;
+ pthread_cond_broadcast(&fs->child_down_cond);
+ }
+ pthread_mutex_unlock(&fs->mutex);
+ glfs_init_done(fs, 1);
+ break;
+ case GF_EVENT_CHILD_CONNECTING:
+ break;
+ case GF_EVENT_UPCALL:
+ glfs_process_upcall_event(fs, data);
+ break;
+ default:
+ gf_msg_debug(this->name, 0, "got notify event %d", event);
+ break;
+ }
+
+ return 0;
+}
+
+int
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, glfs_mt_end + 1);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, API_MSG_MEM_ACCT_INIT_FAILED,
+ NULL);
+ return ret;
+ }
+
+ return 0;
+}
+
+int
+init(xlator_t *this)
+{
+ return 0;
+}
+
+void
+fini(xlator_t *this)
+{
+}
+
+/* place-holder fops */
+int
+glfs_forget(xlator_t *this, inode_t *inode)
+{
+ return 0;
+}
+
+int
+glfs_release(xlator_t *this, fd_t *fd)
+{
+ return 0;
+}
+
+int
+glfs_releasedir(xlator_t *this, fd_t *fd)
+{
+ return 0;
+}
+
+struct xlator_dumpops dumpops;
+
+struct xlator_fops fops;
+
+struct xlator_cbks cbks = {
+ .forget = glfs_forget,
+ .release = glfs_release,
+ .releasedir = glfs_releasedir,
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .identifier = "glfs-api",
+ .category = GF_MAINTAINED,
+};
diff --git a/api/src/glfs-mem-types.h b/api/src/glfs-mem-types.h
new file mode 100644
index 00000000000..bfa325a3ad9
--- /dev/null
+++ b/api/src/glfs-mem-types.h
@@ -0,0 +1,35 @@
+/*
+ Copyright (c) 2012-2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLFS_MEM_TYPES_H
+#define _GLFS_MEM_TYPES_H
+
+#include <glusterfs/mem-types.h>
+
+#define GF_MEM_TYPE_START (gf_common_mt_end + 1)
+
+enum glfs_mem_types_ {
+ glfs_mt_call_pool_t = GF_MEM_TYPE_START,
+ glfs_mt_xlator_t,
+ glfs_mt_glfs_fd_t,
+ glfs_mt_glfs_io_t,
+ glfs_mt_volfile_t,
+ glfs_mt_xlator_cmdline_option_t,
+ glfs_mt_server_cmdline_t,
+ glfs_mt_glfs_object_t,
+ glfs_mt_readdirbuf_t,
+ glfs_mt_upcall_entry_t,
+ glfs_mt_acl_t,
+ glfs_mt_upcall_inode_t,
+ glfs_mt_realpath_t,
+ glfs_mt_xreaddirp_stat_t,
+ glfs_mt_end
+};
+#endif
diff --git a/api/src/glfs-mgmt.c b/api/src/glfs-mgmt.c
new file mode 100644
index 00000000000..7c82b8cd162
--- /dev/null
+++ b/api/src/glfs-mgmt.c
@@ -0,0 +1,1049 @@
+/*
+ Copyright (c) 2012-2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <pthread.h>
+
+#include <glusterfs/glusterfs.h>
+#include "glfs.h"
+#include <glusterfs/dict.h>
+
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "xdr-generic.h"
+#include "rpc-common-xdr.h"
+
+#include <glusterfs/syncop.h>
+
+#include "glfs-internal.h"
+#include "gfapi-messages.h"
+#include <glusterfs/syscall.h>
+
+int
+glfs_volfile_fetch(struct glfs *fs);
+int32_t
+glfs_get_volume_info_rpc(call_frame_t *frame, xlator_t *this, struct glfs *fs);
+
+int
+glfs_process_volfp(struct glfs *fs, FILE *fp)
+{
+ glusterfs_graph_t *graph = NULL;
+ int ret = -1;
+ xlator_t *trav = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = fs->ctx;
+ graph = glusterfs_graph_construct(fp);
+ if (!graph) {
+ gf_smsg("glfs", GF_LOG_ERROR, errno, API_MSG_GRAPH_CONSTRUCT_FAILED,
+ NULL);
+ goto out;
+ }
+
+ for (trav = graph->first; trav; trav = trav->next) {
+ if (strcmp(trav->type, "mount/api") == 0) {
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_API_XLATOR_ERROR,
+ NULL);
+ goto out;
+ }
+ }
+
+ ret = glusterfs_graph_prepare(graph, ctx, fs->volname);
+ if (ret) {
+ glusterfs_graph_destroy(graph);
+ goto out;
+ }
+
+ ret = glusterfs_graph_activate(graph, ctx);
+
+ if (ret) {
+ glusterfs_graph_destroy(graph);
+ goto out;
+ }
+
+ gf_log_dump_graph(fp, graph);
+
+ ret = 0;
+out:
+ if (fp)
+ fclose(fp);
+
+ if (!ctx->active) {
+ ret = -1;
+ }
+
+ return ret;
+}
+
+int
+mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ struct glfs *fs = NULL;
+ xlator_t *this = NULL;
+
+ this = mydata;
+ fs = this->private;
+
+ glfs_volfile_fetch(fs);
+
+ return 0;
+}
+
+int
+mgmt_cbk_event(struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ return 0;
+}
+
+static int
+mgmt_cbk_statedump(struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ struct glfs *fs = NULL;
+ xlator_t *this = NULL;
+ gf_statedump target_pid = {
+ 0,
+ };
+ struct iovec *iov = NULL;
+ int ret = -1;
+
+ this = mydata;
+ if (!this) {
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_NULL, "mydata", NULL);
+ errno = EINVAL;
+ goto out;
+ }
+
+ fs = this->private;
+ if (!fs) {
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_NULL, "glfs", NULL);
+ errno = EINVAL;
+ goto out;
+ }
+
+ iov = (struct iovec *)data;
+ if (!iov) {
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_NULL, "iovec data", NULL);
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &target_pid, (xdrproc_t)xdr_gf_statedump);
+ if (ret < 0) {
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_DECODE_XDR_FAILED, NULL);
+ goto out;
+ }
+
+ gf_msg_trace("glfs", 0, "statedump requested for pid: %d", target_pid.pid);
+
+ if ((uint64_t)getpid() == target_pid.pid) {
+ gf_msg_debug("glfs", 0, "Taking statedump for pid: %d", target_pid.pid);
+
+ ret = glfs_sysrq(fs, GLFS_SYSRQ_STATEDUMP);
+ if (ret < 0) {
+ gf_smsg("glfs", GF_LOG_INFO, 0, API_MSG_STATEDUMP_FAILED, NULL);
+ }
+ }
+out:
+ return ret;
+}
+
+static rpcclnt_cb_actor_t mgmt_cbk_actors[GF_CBK_MAXVALUE] = {
+ [GF_CBK_FETCHSPEC] = {"FETCHSPEC", mgmt_cbk_spec, GF_CBK_FETCHSPEC},
+ [GF_CBK_EVENT_NOTIFY] = {"EVENTNOTIFY", mgmt_cbk_event,
+ GF_CBK_EVENT_NOTIFY},
+ [GF_CBK_STATEDUMP] = {"STATEDUMP", mgmt_cbk_statedump, GF_CBK_STATEDUMP},
+};
+
+static struct rpcclnt_cb_program mgmt_cbk_prog = {
+ .progname = "GlusterFS Callback",
+ .prognum = GLUSTER_CBK_PROGRAM,
+ .progver = GLUSTER_CBK_VERSION,
+ .actors = mgmt_cbk_actors,
+ .numactors = GF_CBK_MAXVALUE,
+};
+
+static char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_NULL] = "NULL",
+ [GF_HNDSK_SETVOLUME] = "SETVOLUME",
+ [GF_HNDSK_GETSPEC] = "GETSPEC",
+ [GF_HNDSK_PING] = "PING",
+ [GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY",
+ [GF_HNDSK_GET_VOLUME_INFO] = "GETVOLUMEINFO",
+};
+
+static rpc_clnt_prog_t clnt_handshake_prog = {
+ .progname = "GlusterFS Handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .procnames = clnt_handshake_procs,
+};
+
+int
+mgmt_submit_request(void *req, call_frame_t *frame, glusterfs_ctx_t *ctx,
+ rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn,
+ xdrproc_t xdrproc)
+{
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {
+ 0,
+ };
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ ssize_t xdr_size = 0;
+
+ iobref = iobref_new();
+ if (!iobref) {
+ goto out;
+ }
+
+ if (req) {
+ xdr_size = xdr_sizeof(xdrproc, req);
+
+ iobuf = iobuf_get2(ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ iobref_add(iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize(iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic(iov, req, xdrproc);
+ if (ret == -1) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, API_MSG_XDR_PAYLOAD_FAILED,
+ NULL);
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit(ctx->mgmt, prog, procnum, cbkfn, &iov, count, NULL, 0,
+ iobref, frame, NULL, 0, NULL, 0, NULL);
+
+out:
+ if (iobref)
+ iobref_unref(iobref);
+
+ if (iobuf)
+ iobuf_unref(iobuf);
+ return ret;
+}
+
+/*
+ * Callback routine for 'GF_HNDSK_GET_VOLUME_INFO' rpc request
+ */
+int
+mgmt_get_volinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int ret = 0;
+ char *volume_id_str = NULL;
+ dict_t *dict = NULL;
+ gf_get_volume_info_rsp rsp = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ struct glfs *fs = NULL;
+ struct syncargs *args;
+
+ frame = myframe;
+ ctx = frame->this->ctx;
+ args = frame->local;
+
+ if (!ctx) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, EINVAL, API_MSG_NULL,
+ "context", NULL);
+ errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ fs = ((xlator_t *)ctx->master)->private;
+
+ if (-1 == req->rpc_status) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, EINVAL,
+ API_MSG_CALL_NOT_SUCCESSFUL, NULL);
+ errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_get_volume_info_rsp);
+
+ if (ret < 0) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, 0,
+ API_MSG_XDR_RESPONSE_DECODE_FAILED, NULL);
+ goto out;
+ }
+
+ gf_msg_debug(frame->this->name, 0,
+ "Received resp to GET_VOLUME_INFO "
+ "RPC: %d",
+ rsp.op_ret);
+
+ if (rsp.op_ret == -1) {
+ errno = rsp.op_errno;
+ ret = -1;
+ goto out;
+ }
+
+ if (!rsp.dict.dict_len) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, EINVAL, API_MSG_CALL_NOT_VALID,
+ NULL);
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(dict, "volume_id", &volume_id_str);
+ if (ret) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (volume_id_str) {
+ gf_msg_debug(frame->this->name, 0, "Volume Id: %s", volume_id_str);
+ pthread_mutex_lock(&fs->mutex);
+ gf_uuid_parse(volume_id_str, fs->vol_uuid);
+ pthread_mutex_unlock(&fs->mutex);
+ }
+
+ if (ret) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, errno,
+ API_MSG_GET_VOLINFO_CBK_FAILED, "error=%s", strerror(errno),
+ NULL);
+ }
+
+ if (dict)
+ dict_unref(dict);
+
+ if (rsp.dict.dict_val)
+ free(rsp.dict.dict_val);
+
+ if (rsp.op_errstr)
+ free(rsp.op_errstr);
+
+ gf_msg_debug(frame->this->name, 0, "Returning: %d", ret);
+
+ __wake(args);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volumeid, 3.5.0)
+int
+pub_glfs_get_volumeid(struct glfs *fs, char *volid, size_t size)
+{
+ /* TODO: Define a global macro to store UUID size */
+ size_t uuid_size = 16;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ pthread_mutex_lock(&fs->mutex);
+ {
+ /* check if the volume uuid is initialized */
+ if (!gf_uuid_is_null(fs->vol_uuid)) {
+ pthread_mutex_unlock(&fs->mutex);
+ goto done;
+ }
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+ /* Need to fetch volume_uuid */
+ glfs_get_volume_info(fs);
+
+ if (gf_uuid_is_null(fs->vol_uuid)) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, EINVAL, API_MSG_FETCH_VOLUUID_FAILED,
+ NULL);
+ goto out;
+ }
+
+done:
+ if (!volid || !size) {
+ gf_msg_debug(THIS->name, 0, "volumeid/size is null");
+ __GLFS_EXIT_FS;
+ return uuid_size;
+ }
+
+ if (size < uuid_size) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ERANGE, API_MSG_INSUFF_SIZE, NULL);
+ errno = ERANGE;
+ goto out;
+ }
+
+ memcpy(volid, fs->vol_uuid, uuid_size);
+
+ __GLFS_EXIT_FS;
+
+ return uuid_size;
+
+out:
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return -1;
+}
+
+int
+glfs_get_volume_info(struct glfs *fs)
+{
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ struct syncargs args = {
+ 0,
+ };
+ int ret = 0;
+
+ ctx = fs->ctx;
+ frame = create_frame(THIS, ctx->pool);
+ if (!frame) {
+ gf_smsg("glfs", GF_LOG_ERROR, ENOMEM, API_MSG_FRAME_CREAT_FAILED, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ frame->local = &args;
+
+ __yawn((&args));
+
+ ret = glfs_get_volume_info_rpc(frame, THIS, fs);
+ if (ret)
+ goto out;
+
+ __yield((&args));
+
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+
+out:
+ return ret;
+}
+
+int32_t
+glfs_get_volume_info_rpc(call_frame_t *frame, xlator_t *this, struct glfs *fs)
+{
+ gf_get_volume_info_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
+ dict_t *dict = NULL;
+ int32_t flags = 0;
+
+ if (!frame || !this || !fs) {
+ ret = -1;
+ goto out;
+ }
+
+ ctx = fs->ctx;
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ if (fs->volname) {
+ ret = dict_set_str(dict, "volname", fs->volname);
+ if (ret)
+ goto out;
+ }
+
+ // Set the flags for the fields which we are interested in
+ flags = (int32_t)GF_GET_VOLUME_UUID; // ctx->flags;
+ ret = dict_set_int32(dict, "flags", flags);
+ if (ret) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, EINVAL,
+ API_MSG_DICT_SET_FAILED, "flags", NULL);
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+
+ ret = mgmt_submit_request(&req, frame, ctx, &clnt_handshake_prog,
+ GF_HNDSK_GET_VOLUME_INFO, mgmt_get_volinfo_cbk,
+ (xdrproc_t)xdr_gf_get_volume_info_req);
+out:
+ if (dict) {
+ dict_unref(dict);
+ }
+
+ GF_FREE(req.dict.dict_val);
+
+ return ret;
+}
+
+static int
+glusterfs_oldvolfile_update(struct glfs *fs, char *volfile, ssize_t size)
+{
+ int ret = -1;
+
+ pthread_mutex_lock(&fs->mutex);
+
+ fs->oldvollen = size;
+ if (!fs->oldvolfile) {
+ fs->oldvolfile = CALLOC(1, size + 1);
+ } else {
+ fs->oldvolfile = REALLOC(fs->oldvolfile, size + 1);
+ }
+
+ if (!fs->oldvolfile) {
+ fs->oldvollen = 0;
+ } else {
+ memcpy(fs->oldvolfile, volfile, size);
+ fs->oldvollen = size;
+ ret = 0;
+ }
+
+ pthread_mutex_unlock(&fs->mutex);
+
+ return ret;
+}
+
+int
+glfs_mgmt_getspec_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_getspec_rsp rsp = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+ ssize_t size = 0;
+ FILE *tmpfp = NULL;
+ int need_retry = 0;
+ struct glfs *fs = NULL;
+ dict_t *dict = NULL;
+ char *servers_list = NULL;
+ int tmp_fd = -1;
+ char template[] = "/tmp/gfapi.volfile.XXXXXX";
+
+ frame = myframe;
+ ctx = frame->this->ctx;
+
+ if (!ctx) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, EINVAL, API_MSG_NULL,
+ "context", NULL);
+ errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ fs = ((xlator_t *)ctx->master)->private;
+
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ need_retry = 1;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
+ if (ret < 0) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, 0, API_MSG_XDR_DECODE_FAILED,
+ NULL);
+ ret = -1;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_smsg(frame->this->name, GF_LOG_ERROR, rsp.op_errno,
+ API_MSG_GET_VOLFILE_FAILED, "from server", NULL);
+ ret = -1;
+ errno = rsp.op_errno;
+ goto out;
+ }
+
+ if (!rsp.xdata.xdata_len) {
+ goto volfile;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.xdata.xdata_val, rsp.xdata.xdata_len, &dict);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "failed to unserialize xdata to dictionary");
+ goto out;
+ }
+ dict->extra_stdfree = rsp.xdata.xdata_val;
+
+ /* glusterd2 only */
+ ret = dict_get_str(dict, "servers-list", &servers_list);
+ if (ret) {
+ goto volfile;
+ }
+
+ gf_log(frame->this->name, GF_LOG_INFO,
+ "Received list of available volfile servers: %s", servers_list);
+
+ ret = gf_process_getspec_servers_list(&ctx->cmd_args, servers_list);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "Failed (%s) to process servers list: %s", strerror(errno),
+ servers_list);
+ }
+
+volfile:
+ ret = 0;
+ size = rsp.op_ret;
+
+ pthread_mutex_lock(&fs->mutex);
+ if ((size == fs->oldvollen) &&
+ (memcmp(fs->oldvolfile, rsp.spec, size) == 0)) {
+ pthread_mutex_unlock(&fs->mutex);
+ gf_smsg(frame->this->name, GF_LOG_INFO, 0, API_MSG_VOLFILE_INFO, NULL);
+ goto out;
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+ tmp_fd = mkstemp(template);
+ if (-1 == tmp_fd) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Calling unlink so that when the file is closed or program
+ * terminates the temporary file is deleted.
+ */
+ ret = sys_unlink(template);
+ if (ret < 0) {
+ gf_smsg(frame->this->name, GF_LOG_INFO, 0, API_MSG_UNABLE_TO_DEL,
+ "template=%s", template, NULL);
+ ret = 0;
+ }
+
+ tmpfp = fdopen(tmp_fd, "w+b");
+ if (!tmpfp) {
+ ret = -1;
+ goto out;
+ }
+
+ fwrite(rsp.spec, size, 1, tmpfp);
+ fflush(tmpfp);
+ if (ferror(tmpfp)) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Check if only options have changed. No need to reload the
+ * volfile if topology hasn't changed.
+ * glusterfs_volfile_reconfigure returns 3 possible return states
+ * return 0 =======> reconfiguration of options has succeeded
+ * return 1 =======> the graph has to be reconstructed and all
+ * the xlators should be inited return -1(or -ve) =======> Some Internal
+ * Error occurred during the operation
+ */
+
+ pthread_mutex_lock(&fs->mutex);
+ ret = gf_volfile_reconfigure(fs->oldvollen, tmpfp, fs->ctx, fs->oldvolfile);
+ pthread_mutex_unlock(&fs->mutex);
+
+ if (ret == 0) {
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "No need to re-load "
+ "volfile, reconfigure done");
+ ret = glusterfs_oldvolfile_update(fs, rsp.spec, size);
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_msg_debug("glusterfsd-mgmt", 0, "Reconfigure failed !!");
+ goto out;
+ }
+
+ ret = glfs_process_volfp(fs, tmpfp);
+ /* tmpfp closed */
+ tmpfp = NULL;
+ tmp_fd = -1;
+ if (ret)
+ goto out;
+
+ ret = glusterfs_oldvolfile_update(fs, rsp.spec, size);
+out:
+ STACK_DESTROY(frame->root);
+
+ if (rsp.spec)
+ free(rsp.spec);
+
+ if (dict)
+ dict_unref(dict);
+
+ // Stop if server is running at an unsupported op-version
+ if (ENOTSUP == ret) {
+ gf_smsg("mgmt", GF_LOG_ERROR, ENOTSUP, API_MSG_WRONG_OPVERSION, NULL);
+ errno = ENOTSUP;
+ glfs_init_done(fs, -1);
+ }
+
+ if (ret && ctx && !ctx->active) {
+ /* Do it only for the first time */
+ /* Failed to get the volume file, something wrong,
+ restart the process */
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, EINVAL, API_MSG_GET_VOLFILE_FAILED,
+ "key=%s", ctx->cmd_args.volfile_id, NULL);
+ if (!need_retry) {
+ if (!errno)
+ errno = EINVAL;
+ glfs_init_done(fs, -1);
+ }
+ }
+
+ if (tmpfp)
+ fclose(tmpfp);
+ else if (tmp_fd != -1)
+ sys_close(tmp_fd);
+
+ return 0;
+}
+
+int
+glfs_volfile_fetch(struct glfs *fs)
+{
+ cmd_args_t *cmd_args = NULL;
+ gf_getspec_req req = {
+ 0,
+ };
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ dict_t *dict = NULL;
+
+ ctx = fs->ctx;
+ cmd_args = &ctx->cmd_args;
+
+ req.key = cmd_args->volfile_id;
+ req.flags = 0;
+
+ dict = dict_new();
+ if (!dict) {
+ goto out;
+ }
+
+ // Set the supported min and max op-versions, so glusterd can make a
+ // decision
+ ret = dict_set_int32(dict, "min-op-version", GD_OP_VERSION_MIN);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, EINVAL, API_MSG_DICT_SET_FAILED,
+ "min-op-version", NULL);
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "max-op-version", GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, EINVAL, API_MSG_DICT_SET_FAILED,
+ "max-op-version", NULL);
+ goto out;
+ }
+
+ /* Ask for a list of volfile (glusterd2 only) servers */
+ if (GF_CLIENT_PROCESS == ctx->process_mode) {
+ req.flags = req.flags | GF_GETSPEC_FLAG_SERVERS_LIST;
+ }
+
+ ret = dict_allocate_and_serialize(dict, &req.xdata.xdata_val,
+ &req.xdata.xdata_len);
+ if (ret < 0) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, API_MSG_DICT_SERIALIZE_FAILED,
+ NULL);
+ goto out;
+ }
+
+ frame = create_frame(THIS, ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = mgmt_submit_request(&req, frame, ctx, &clnt_handshake_prog,
+ GF_HNDSK_GETSPEC, glfs_mgmt_getspec_cbk,
+ (xdrproc_t)xdr_gf_getspec_req);
+out:
+ if (req.xdata.xdata_val)
+ GF_FREE(req.xdata.xdata_val);
+ if (dict)
+ dict_unref(dict);
+
+ return ret;
+}
+
+static int
+mgmt_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data)
+{
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ server_cmdline_t *server = NULL;
+ rpc_transport_t *rpc_trans = NULL;
+ struct glfs *fs = NULL;
+ int ret = 0;
+ struct dnscache6 *dnscache = NULL;
+
+ this = mydata;
+ rpc_trans = rpc->conn.trans;
+
+ ctx = this->ctx;
+ if (!ctx)
+ goto out;
+
+ fs = ((xlator_t *)ctx->master)->private;
+
+ switch (event) {
+ case RPC_CLNT_DISCONNECT:
+ if (!ctx->active) {
+ if (rpc_trans->connect_failed)
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, 0,
+ API_MSG_REMOTE_HOST_CONN_FAILED, "server=%s",
+ ctx->cmd_args.volfile_server, NULL);
+ else
+ gf_smsg("glfs-mgmt", GF_LOG_INFO, 0,
+ API_MSG_REMOTE_HOST_CONN_FAILED, "server=%s",
+ ctx->cmd_args.volfile_server, NULL);
+
+ if (!rpc->disabled) {
+ /*
+ * Check if dnscache is exhausted for current server
+ * and continue until cache is exhausted
+ */
+ dnscache = rpc_trans->dnscache;
+ if (dnscache && dnscache->next) {
+ break;
+ }
+ }
+ server = ctx->cmd_args.curr_server;
+ if (server->list.next == &ctx->cmd_args.volfile_servers) {
+ errno = ENOTCONN;
+ gf_smsg("glfs-mgmt", GF_LOG_INFO, ENOTCONN,
+ API_MSG_VOLFILE_SERVER_EXHAUST, NULL);
+ glfs_init_done(fs, -1);
+ break;
+ }
+ server = list_entry(server->list.next, typeof(*server), list);
+ ctx->cmd_args.curr_server = server;
+ ctx->cmd_args.volfile_server_port = server->port;
+ ctx->cmd_args.volfile_server = server->volfile_server;
+ ctx->cmd_args.volfile_server_transport = server->transport;
+
+ ret = dict_set_str(rpc_trans->options, "transport-type",
+ server->transport);
+ if (ret != 0) {
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, ENOTCONN,
+ API_MSG_DICT_SET_FAILED, "transport-type=%s",
+ server->transport, NULL);
+ errno = ENOTCONN;
+ glfs_init_done(fs, -1);
+ break;
+ }
+
+ if (strcmp(server->transport, "unix") == 0) {
+ ret = dict_set_str(rpc_trans->options,
+ "transport.socket.connect-path",
+ server->volfile_server);
+ if (ret != 0) {
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, ENOTCONN,
+ API_MSG_DICT_SET_FAILED,
+ "socket.connect-path=%s",
+ server->volfile_server, NULL);
+ errno = ENOTCONN;
+ glfs_init_done(fs, -1);
+ break;
+ }
+ /* delete the remote-host and remote-port keys
+ * in case they were set while looping through
+ * list of volfile servers previously
+ */
+ dict_del(rpc_trans->options, "remote-host");
+ dict_del(rpc_trans->options, "remote-port");
+ } else {
+ ret = dict_set_int32(rpc_trans->options, "remote-port",
+ server->port);
+ if (ret != 0) {
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, ENOTCONN,
+ API_MSG_DICT_SET_FAILED, "remote-port=%d",
+ server->port, NULL);
+ errno = ENOTCONN;
+ glfs_init_done(fs, -1);
+ break;
+ }
+
+ ret = dict_set_str(rpc_trans->options, "remote-host",
+ server->volfile_server);
+ if (ret != 0) {
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, ENOTCONN,
+ API_MSG_DICT_SET_FAILED, "remote-host=%s",
+ server->volfile_server, NULL);
+ errno = ENOTCONN;
+ glfs_init_done(fs, -1);
+ break;
+ }
+ /* delete the "transport.socket.connect-path"
+ * key in case if it was set while looping
+ * through list of volfile servers previously
+ */
+ dict_del(rpc_trans->options,
+ "transport.socket.connect-path");
+ }
+
+ gf_smsg("glfs-mgmt", GF_LOG_INFO, 0, API_MSG_VOLFILE_CONNECTING,
+ "server=%s", server->volfile_server, "port=%d",
+ server->port, "transport=%s", server->transport, NULL);
+ }
+ break;
+ case RPC_CLNT_CONNECT:
+ ret = glfs_volfile_fetch(fs);
+ if (ret && (ctx->active == NULL)) {
+ /* Do it only for the first time */
+ /* Exit the process.. there are some wrong options */
+ gf_smsg("glfs-mgmt", GF_LOG_ERROR, EINVAL,
+ API_MSG_GET_VOLFILE_FAILED, "key=%s",
+ ctx->cmd_args.volfile_id, NULL);
+ errno = EINVAL;
+ glfs_init_done(fs, -1);
+ }
+
+ break;
+ default:
+ break;
+ }
+out:
+ return 0;
+}
+
+int
+glusterfs_mgmt_notify(int32_t op, void *data, ...)
+{
+ int ret = 0;
+
+ switch (op) {
+ case GF_EN_DEFRAG_STATUS:
+ break;
+
+ default:
+ break;
+ }
+
+ return ret;
+}
+
+int
+glfs_mgmt_init(struct glfs *fs)
+{
+ cmd_args_t *cmd_args = NULL;
+ struct rpc_clnt *rpc = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
+ int port = GF_DEFAULT_BASE_PORT;
+ char *host = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = fs->ctx;
+ cmd_args = &ctx->cmd_args;
+
+ if (ctx->mgmt)
+ return 0;
+
+ options = dict_new();
+ if (!options)
+ goto out;
+
+ if (cmd_args->volfile_server_port)
+ port = cmd_args->volfile_server_port;
+
+ if (cmd_args->volfile_server) {
+ host = cmd_args->volfile_server;
+ } else if (cmd_args->volfile_server_transport &&
+ !strcmp(cmd_args->volfile_server_transport, "unix")) {
+ host = DEFAULT_GLUSTERD_SOCKFILE;
+ } else {
+ host = "localhost";
+ }
+
+ if (cmd_args->volfile_server_transport &&
+ !strcmp(cmd_args->volfile_server_transport, "unix")) {
+ ret = rpc_transport_unix_options_build(options, host, 0);
+ } else {
+ xlator_cmdline_option_t *opt = find_xlator_option_in_cmd_args_t(
+ "address-family", cmd_args);
+ ret = rpc_transport_inet_options_build(options, host, port,
+ (opt ? opt->value : NULL));
+ }
+
+ if (ret)
+ goto out;
+
+ rpc = rpc_clnt_new(options, THIS, THIS->name, 8);
+ if (!rpc) {
+ ret = -1;
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, API_MSG_CREATE_RPC_CLIENT_FAILED,
+ NULL);
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify(rpc, mgmt_rpc_notify, THIS);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, API_MSG_REG_NOTIFY_FUNC_FAILED,
+ NULL);
+ goto out;
+ }
+
+ ret = rpcclnt_cbk_program_register(rpc, &mgmt_cbk_prog, THIS);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, API_MSG_REG_CBK_FUNC_FAILED,
+ NULL);
+ goto out;
+ }
+
+ ctx->notify = glusterfs_mgmt_notify;
+
+ /* This value should be set before doing the 'rpc_clnt_start()' as
+ the notify function uses this variable */
+ ctx->mgmt = rpc;
+
+ ret = rpc_clnt_start(rpc);
+out:
+ if (options)
+ dict_unref(options);
+ return ret;
+}
diff --git a/api/src/glfs-resolve.c b/api/src/glfs-resolve.c
new file mode 100644
index 00000000000..8a393ecb464
--- /dev/null
+++ b/api/src/glfs-resolve.c
@@ -0,0 +1,1199 @@
+/*
+ Copyright (c) 2012-2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <limits.h>
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/stack.h>
+#include <glusterfs/gf-event.h>
+#include "glfs-mem-types.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/call-stub.h>
+#include "gfapi-messages.h"
+#include <glusterfs/inode.h>
+#include "glfs-internal.h"
+
+#define graphid_str(subvol) \
+ (uuid_utoa((unsigned char *)subvol->graph->graph_uuid))
+int
+glfs_first_lookup_safe(xlator_t *subvol)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+
+ loc.inode = subvol->itable->root;
+ memset(loc.gfid, 0, 16);
+ loc.gfid[15] = 1;
+ loc.path = "/";
+ loc.name = "";
+
+ ret = syncop_lookup(subvol, &loc, 0, 0, 0, 0);
+ DECODE_SYNCOP_ERR(ret);
+
+ gf_msg_debug(subvol->name, 0, "first lookup complete %d", ret);
+
+ return ret;
+}
+
+int
+__glfs_first_lookup(struct glfs *fs, xlator_t *subvol)
+{
+ int ret = -1;
+
+ fs->migration_in_progress = 1;
+ pthread_mutex_unlock(&fs->mutex);
+ {
+ ret = glfs_first_lookup_safe(subvol);
+ }
+ pthread_mutex_lock(&fs->mutex);
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast(&fs->cond);
+
+ /* wake up other waiting tasks */
+ __GLFS_SYNCTASK_WAKE(fs);
+
+ return ret;
+}
+
+/**
+ * We have to check if need_lookup flag is set in both old and the new inodes.
+ * If its set in oldinode, then directly go ahead and do an explicit lookup.
+ * But if its not set in the oldinode, then check if the newinode is linked
+ * via readdirp. If so an explicit lookup is needed on the new inode, so that
+ * below xlators can set their respective contexts.
+ */
+inode_t *
+glfs_refresh_inode_safe(xlator_t *subvol, inode_t *oldinode,
+ gf_boolean_t need_lookup)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ struct iatt iatt = {
+ 0,
+ };
+ inode_t *newinode = NULL;
+ gf_boolean_t lookup_needed = _gf_false;
+ uint64_t ctx_value = LOOKUP_NOT_NEEDED;
+
+ if (!oldinode)
+ return NULL;
+
+ if (!need_lookup && oldinode->table->xl == subvol)
+ return inode_ref(oldinode);
+
+ newinode = inode_find(subvol->itable, oldinode->gfid);
+ if (!need_lookup && newinode) {
+ lookup_needed = inode_needs_lookup(newinode, THIS);
+ if (!lookup_needed)
+ return newinode;
+ }
+
+ gf_uuid_copy(loc.gfid, oldinode->gfid);
+ if (!newinode)
+ loc.inode = inode_new(subvol->itable);
+ else
+ loc.inode = newinode;
+
+ if (!loc.inode)
+ return NULL;
+
+ ret = syncop_lookup(subvol, &loc, &iatt, 0, 0, 0);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret) {
+ gf_smsg(subvol->name, GF_LOG_WARNING, errno,
+ API_MSG_INODE_REFRESH_FAILED, "gfid=%s",
+ uuid_utoa(oldinode->gfid), "err=%s", strerror(errno), NULL);
+ loc_wipe(&loc);
+ return NULL;
+ }
+
+ newinode = inode_link(loc.inode, 0, 0, &iatt);
+ if (newinode) {
+ if (newinode == loc.inode)
+ inode_ctx_set(newinode, THIS, &ctx_value);
+ inode_lookup(newinode);
+ } else {
+ gf_smsg(subvol->name, GF_LOG_WARNING, errno, API_MSG_INODE_LINK_FAILED,
+ "gfid=%s", uuid_utoa((unsigned char *)&iatt.ia_gfid), NULL);
+ }
+
+ loc_wipe(&loc);
+
+ return newinode;
+}
+
+inode_t *
+__glfs_refresh_inode(struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ gf_boolean_t need_lookup)
+{
+ inode_t *newinode = NULL;
+
+ fs->migration_in_progress = 1;
+ pthread_mutex_unlock(&fs->mutex);
+ {
+ newinode = glfs_refresh_inode_safe(subvol, inode, need_lookup);
+ }
+ pthread_mutex_lock(&fs->mutex);
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast(&fs->cond);
+
+ /* wake up other waiting tasks */
+ __GLFS_SYNCTASK_WAKE(fs);
+
+ return newinode;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_loc_touchup, 3.4.0)
+int
+priv_glfs_loc_touchup(loc_t *loc)
+{
+ int ret = 0;
+
+ ret = loc_touchup(loc, loc->name);
+ if (ret < 0) {
+ errno = -ret;
+ ret = -1;
+ }
+
+ return ret;
+}
+
+int
+glfs_resolve_symlink(struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ char **lpath)
+{
+ loc_t loc = {
+ 0,
+ };
+ char *path = NULL;
+ char *rpath = NULL;
+ int ret = -1;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ ret = inode_path(inode, NULL, &rpath);
+ if (ret < 0)
+ goto out;
+ loc.path = rpath;
+
+ ret = syncop_readlink(subvol, &loc, &path, 4096, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+
+ if (ret < 0)
+ goto out;
+
+ if (lpath)
+ *lpath = path;
+out:
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
+glfs_resolve_base(struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ struct iatt *iatt)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ char *path = NULL;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ ret = inode_path(loc.inode, NULL, &path);
+ loc.path = path;
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_lookup(subvol, &loc, iatt, NULL, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ loc_wipe(&loc);
+
+ return ret;
+}
+/*
+ * This function can be used to call named lookup on root.
+ * If you use glfs_resolve_base, that will be a nameless lookup.
+ */
+static int
+glfs_resolve_root(struct glfs *fs, xlator_t *subvol, inode_t *inode,
+ struct iatt *iatt)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ char *path = NULL;
+
+ loc.inode = inode_ref(inode);
+
+ ret = inode_path(loc.inode, ".", &path);
+ loc.path = path;
+ loc.name = ".";
+ /* Having a value in loc.name will help to bypass md-cache check for
+ * nameless lookup.
+ * TODO: Re-visit on nameless lookup and md-cache.
+ * Github issue : https://github.com/gluster/glusterfs/issues/232
+ */
+ loc.parent = inode_ref(inode);
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_lookup(subvol, &loc, iatt, NULL, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+out:
+ loc_wipe(&loc);
+
+ return ret;
+}
+
+inode_t *
+glfs_resolve_component(struct glfs *fs, xlator_t *subvol, inode_t *parent,
+ const char *component, struct iatt *iatt,
+ int force_lookup)
+{
+ loc_t loc = {
+ 0,
+ };
+ inode_t *inode = NULL;
+ inode_t *temp_parent = NULL;
+ int reval = 0;
+ int ret = -1;
+ int glret = -1;
+ struct iatt ciatt = {
+ 0,
+ };
+ uuid_t gfid;
+ dict_t *xattr_req = NULL;
+ uint64_t ctx_value = LOOKUP_NOT_NEEDED;
+
+ loc.parent = inode_ref(parent);
+ gf_uuid_copy(loc.pargfid, parent->gfid);
+
+ if (__is_root_gfid(parent->gfid) &&
+ ((strcmp(component, ".") == 0) || (strcmp(component, "..") == 0) ||
+ (strcmp(component, "") == 0))) {
+ if (!force_lookup) {
+ inode = inode_ref(parent);
+ } else {
+ ret = glfs_resolve_root(fs, subvol, parent, &ciatt);
+ if (!ret)
+ inode = inode_ref(parent);
+ }
+ goto found;
+ }
+ /* *
+ * if the component name is either "." or "..", it will try to
+ * resolve that if inode has a proper parent (named lookup).
+ *
+ * Below condition works like this
+ *
+ * Example 1 :
+ * Path /out_dir/dir/in_dir/.
+ * In put values :
+ * parent = in_dir
+ * component : "."
+ *
+ * Out put values:
+ * parent : dir
+ * component : "in_dir"
+ *
+ * Example 2 :
+ * Path /out_dir/dir/in_dir/..
+ * In put values :
+ * parent = in_dir
+ * component : ".."
+ *
+ * Out put values:
+ * parent : output_dir
+ * component : "dir"
+ *
+ * In case of nameless lookup, both "." and ".." retained
+ */
+
+ if (strcmp(component, ".") == 0) {
+ loc.inode = inode_ref(parent);
+ temp_parent = inode_parent(loc.inode, 0, 0);
+ if (temp_parent) {
+ inode_unref(loc.parent);
+ loc.parent = temp_parent;
+ gf_uuid_copy(loc.pargfid, temp_parent->gfid);
+ inode_find_directory_name(loc.inode, &loc.name);
+ }
+
+ } else if (strcmp(component, "..") == 0) {
+ loc.inode = inode_parent(parent, 0, 0);
+ if (loc.inode) {
+ temp_parent = inode_parent(loc.inode, 0, 0);
+ if (temp_parent) {
+ inode_unref(loc.parent);
+ loc.parent = temp_parent;
+ gf_uuid_copy(loc.pargfid, temp_parent->gfid);
+ inode_find_directory_name(loc.inode, &loc.name);
+ } else if (__is_root_gfid(loc.inode->gfid)) {
+ inode_unref(loc.parent);
+ loc.parent = inode_ref(loc.inode);
+ gf_uuid_copy(loc.pargfid, loc.inode->gfid);
+ loc.name = ".";
+ } else {
+ inode_unref(loc.inode);
+ loc.inode = NULL;
+ }
+ }
+ } else
+ loc.inode = inode_grep(parent->table, parent, component);
+
+ if (!loc.name)
+ loc.name = component;
+
+ if (loc.inode) {
+ gf_uuid_copy(loc.gfid, loc.inode->gfid);
+ reval = 1;
+
+ if (!(force_lookup || inode_needs_lookup(loc.inode, THIS))) {
+ inode = inode_ref(loc.inode);
+ goto found;
+ }
+ } else {
+ gf_uuid_generate(gfid);
+ loc.inode = inode_new(parent->table);
+ if (!loc.inode) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ glret = priv_glfs_loc_touchup(&loc);
+ if (glret < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_lookup(subvol, &loc, &ciatt, NULL, xattr_req, NULL);
+ if (ret && reval) {
+ /*
+ * A stale mapping might exist for a dentry/inode that has been
+ * removed from another client.
+ */
+ if (-ret == ENOENT) {
+ inode_unlink(loc.inode, loc.parent, loc.name);
+ if (!inode_has_dentry(loc.inode))
+ inode_forget(loc.inode, 0);
+ }
+
+ inode_unref(loc.inode);
+ gf_uuid_clear(loc.gfid);
+ loc.inode = inode_new(parent->table);
+ if (!loc.inode) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", gfid, true);
+ if (ret) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_lookup(subvol, &loc, &ciatt, NULL, xattr_req, NULL);
+ }
+ DECODE_SYNCOP_ERR(ret);
+ if (ret)
+ goto out;
+
+ inode = inode_link(loc.inode, loc.parent, component, &ciatt);
+
+ if (!inode) {
+ gf_smsg(subvol->name, GF_LOG_WARNING, errno, API_MSG_INODE_LINK_FAILED,
+ "gfid=%s", uuid_utoa((unsigned char *)&ciatt.ia_gfid), NULL);
+ goto out;
+ } else if (inode == loc.inode)
+ inode_ctx_set(inode, THIS, &ctx_value);
+found:
+ if (inode) {
+ ciatt.ia_type = inode->ia_type;
+ inode_lookup(inode);
+ }
+ if (iatt)
+ *iatt = ciatt;
+out:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ loc_wipe(&loc);
+
+ return inode;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_resolve_at, 3.4.0)
+int
+priv_glfs_resolve_at(struct glfs *fs, xlator_t *subvol, inode_t *at,
+ const char *origpath, loc_t *loc, struct iatt *iatt,
+ int follow, int reval)
+{
+ inode_t *inode = NULL;
+ inode_t *parent = NULL;
+ char *saveptr = NULL;
+ char *path = NULL;
+ char *component = NULL;
+ char *next_component = NULL;
+ int ret = -1;
+ struct iatt ciatt = {
+ 0,
+ };
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (origpath[0] == '\0') {
+ errno = EINVAL;
+ goto invalid_fs;
+ }
+
+ parent = NULL;
+ if (at && origpath[0] != '/') {
+ /* A relative resolution of a path which starts with '/'
+ is equal to an absolute path resolution.
+ */
+ inode = inode_ref(at);
+ } else {
+ inode = inode_ref(subvol->itable->root);
+
+ if (strcmp(origpath, "/") == 0)
+ glfs_resolve_root(fs, subvol, inode, &ciatt);
+ }
+
+ path = gf_strdup(origpath);
+ if (!path)
+ goto invalid_fs;
+
+ for (component = strtok_r(path, "/", &saveptr); component;
+ component = next_component) {
+ next_component = strtok_r(NULL, "/", &saveptr);
+
+ if (parent)
+ inode_unref(parent);
+ parent = inode;
+ inode = glfs_resolve_component(fs, subvol, parent, component, &ciatt,
+ /* force hard lookup on the last
+ component, as the caller
+ wants proper iatt filled
+ */
+ (reval || (!next_component && iatt)));
+ if (!inode) {
+ ret = -1;
+ break;
+ }
+
+ if (IA_ISLNK(ciatt.ia_type) && (next_component || follow)) {
+ /* If the component is not the last piece,
+ then following it is necessary even if
+ not requested by the caller
+ */
+ char *lpath = NULL;
+ loc_t sym_loc = {
+ 0,
+ };
+
+ if (follow > GLFS_SYMLINK_MAX_FOLLOW) {
+ errno = ELOOP;
+ ret = -1;
+ if (inode) {
+ inode_unref(inode);
+ inode = NULL;
+ }
+ break;
+ }
+
+ ret = glfs_resolve_symlink(fs, subvol, inode, &lpath);
+ inode_unref(inode);
+ inode = NULL;
+ if (ret < 0)
+ break;
+
+ ret = priv_glfs_resolve_at(fs, subvol, parent, lpath, &sym_loc,
+ /* followed iatt becomes the
+ component iatt
+ */
+ &ciatt,
+ /* always recurisvely follow while
+ following symlink
+ */
+ follow + 1, reval);
+ if (ret == 0)
+ inode = inode_ref(sym_loc.inode);
+ loc_wipe(&sym_loc);
+ GF_FREE(lpath);
+ }
+
+ if (!next_component)
+ break;
+
+ if (!IA_ISDIR(ciatt.ia_type)) {
+ /* next_component exists and this component is
+ not a directory
+ */
+ inode_unref(inode);
+ inode = NULL;
+ ret = -1;
+ errno = ENOTDIR;
+ break;
+ }
+ }
+
+ if (parent && next_component)
+ /* resolution failed mid-way */
+ goto out;
+
+ /* At this point, all components up to the last parent directory
+ have been resolved successfully (@parent). Resolution of basename
+ might have failed (@inode) if at all.
+ */
+
+ loc->parent = parent;
+ if (parent) {
+ gf_uuid_copy(loc->pargfid, parent->gfid);
+ loc->name = component;
+ }
+
+ loc->inode = inode;
+ if (inode) {
+ gf_uuid_copy(loc->gfid, inode->gfid);
+ if (iatt)
+ *iatt = ciatt;
+ ret = 0;
+ }
+
+ if (priv_glfs_loc_touchup(loc) < 0) {
+ ret = -1;
+ }
+out:
+ GF_FREE(path);
+ __GLFS_EXIT_FS;
+
+ /* do NOT loc_wipe here as only last component might be missing */
+invalid_fs:
+ return ret;
+}
+
+int
+glfs_resolve_path(struct glfs *fs, xlator_t *subvol, const char *origpath,
+ loc_t *loc, struct iatt *iatt, int follow, int reval)
+{
+ int ret = -1;
+ inode_t *cwd = NULL;
+
+ if (origpath[0] == '/')
+ return priv_glfs_resolve_at(fs, subvol, NULL, origpath, loc, iatt,
+ follow, reval);
+
+ cwd = glfs_cwd_get(fs);
+ if (NULL == cwd) {
+ gf_smsg(subvol->name, GF_LOG_WARNING, EIO, API_MSG_GET_CWD_FAILED,
+ NULL);
+ errno = EIO;
+ goto out;
+ }
+
+ ret = priv_glfs_resolve_at(fs, subvol, cwd, origpath, loc, iatt, follow,
+ reval);
+ if (cwd)
+ inode_unref(cwd);
+
+out:
+ return ret;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_resolve, 3.7.0)
+int
+priv_glfs_resolve(struct glfs *fs, xlator_t *subvol, const char *origpath,
+ loc_t *loc, struct iatt *iatt, int reval)
+{
+ int ret = -1;
+
+ ret = glfs_resolve_path(fs, subvol, origpath, loc, iatt, 1, reval);
+
+ return ret;
+}
+
+int
+glfs_lresolve(struct glfs *fs, xlator_t *subvol, const char *origpath,
+ loc_t *loc, struct iatt *iatt, int reval)
+{
+ int ret = -1;
+
+ ret = glfs_resolve_path(fs, subvol, origpath, loc, iatt, 0, reval);
+
+ return ret;
+}
+
+int
+glfs_migrate_fd_locks_safe(struct glfs *fs, xlator_t *oldsubvol, fd_t *oldfd,
+ xlator_t *newsubvol, fd_t *newfd)
+{
+ dict_t *lockinfo = NULL;
+ int ret = 0;
+ char uuid1[64];
+
+ if (!oldfd->lk_ctx || fd_lk_ctx_empty(oldfd->lk_ctx))
+ return 0;
+
+ newfd->lk_ctx = fd_lk_ctx_ref(oldfd->lk_ctx);
+
+ ret = syncop_fgetxattr(oldsubvol, oldfd, &lockinfo, GF_XATTR_LOCKINFO_KEY,
+ NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret < 0) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno, API_MSG_FGETXATTR_FAILED,
+ "gfid=%s", uuid_utoa_r(oldfd->inode->gfid, uuid1), "err=%s",
+ strerror(errno), "subvol=%s", graphid_str(oldsubvol), "id=%d",
+ oldsubvol->graph->id, NULL);
+ goto out;
+ }
+
+ if (!dict_get(lockinfo, GF_XATTR_LOCKINFO_KEY)) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, 0, API_MSG_LOCKINFO_KEY_MISSING,
+ "gfid=%s", uuid_utoa_r(oldfd->inode->gfid, uuid1), "subvol=%s",
+ graphid_str(oldsubvol), "id=%d", oldsubvol->graph->id, NULL);
+ goto out;
+ }
+
+ ret = syncop_fsetxattr(newsubvol, newfd, lockinfo, 0, NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret < 0) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, 0, API_MSG_FSETXATTR_FAILED,
+ "gfid=%s", uuid_utoa_r(newfd->inode->gfid, uuid1), "err=%s",
+ strerror(errno), "subvol=%s", graphid_str(newsubvol), "id=%d",
+ newsubvol->graph->id, NULL);
+ goto out;
+ }
+out:
+ if (lockinfo)
+ dict_unref(lockinfo);
+ return ret;
+}
+
+fd_t *
+glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd)
+{
+ fd_t *newfd = NULL;
+ inode_t *oldinode = NULL;
+ inode_t *newinode = NULL;
+ xlator_t *oldsubvol = NULL;
+ int ret = -1;
+ loc_t loc = {
+ 0,
+ };
+ char uuid1[64];
+ dict_t *xdata = NULL;
+
+ oldinode = oldfd->inode;
+ oldsubvol = oldinode->table->xl;
+
+ if (oldsubvol == newsubvol)
+ return fd_ref(oldfd);
+
+ if (!oldsubvol->switched) {
+ xdata = dict_new();
+ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, ENOMEM, API_MSG_FSYNC_FAILED,
+ "err=%s", "last-fsync set failed", "gfid=%s",
+ uuid_utoa_r(oldfd->inode->gfid, uuid1), "subvol=%s",
+ graphid_str(oldsubvol), "id=%d", oldsubvol->graph->id,
+ NULL);
+ }
+
+ ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, xdata, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ if (ret) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno, API_MSG_FSYNC_FAILED,
+ "err=%s", strerror(errno), "gfid=%s",
+ uuid_utoa_r(oldfd->inode->gfid, uuid1), "subvol=%s",
+ graphid_str(oldsubvol), "id=%d", oldsubvol->graph->id,
+ NULL);
+ }
+ }
+
+ newinode = glfs_refresh_inode_safe(newsubvol, oldinode, _gf_false);
+ if (!newinode) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno,
+ API_MSG_INODE_REFRESH_FAILED, "gfid=%s",
+ uuid_utoa_r(oldinode->gfid, uuid1), "err=%s", strerror(errno),
+ "subvol=%s", graphid_str(newsubvol), "id=%d",
+ newsubvol->graph->id, NULL);
+ goto out;
+ }
+
+ newfd = fd_create(newinode, getpid());
+ if (!newfd) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno,
+ API_MSG_FDCREATE_FAILED_ON_GRAPH, "gfid=%s",
+ uuid_utoa_r(newinode->gfid, uuid1), "err=%s", strerror(errno),
+ "subvol=%s", graphid_str(newsubvol), "id=%d",
+ newsubvol->graph->id, NULL);
+ goto out;
+ }
+
+ loc.inode = inode_ref(newinode);
+
+ ret = inode_path(oldfd->inode, NULL, (char **)&loc.path);
+ if (ret < 0) {
+ gf_smsg(fs->volname, GF_LOG_INFO, 0, API_MSG_INODE_PATH_FAILED, NULL);
+ goto out;
+ }
+
+ gf_uuid_copy(loc.gfid, oldinode->gfid);
+
+ if (IA_ISDIR(oldinode->ia_type))
+ ret = syncop_opendir(newsubvol, &loc, newfd, NULL, NULL);
+ else
+ ret = syncop_open(newsubvol, &loc,
+ oldfd->flags & ~(O_TRUNC | O_EXCL | O_CREAT), newfd,
+ NULL, NULL);
+ DECODE_SYNCOP_ERR(ret);
+ loc_wipe(&loc);
+
+ if (ret) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno, API_MSG_SYNCOP_OPEN_FAILED,
+ "type=%s", IA_ISDIR(oldinode->ia_type) ? "dir" : "", "gfid=%s",
+ uuid_utoa_r(newinode->gfid, uuid1), "err=%s", strerror(errno),
+ "subvol=%s", graphid_str(newsubvol), "id=%d",
+ newsubvol->graph->id, NULL);
+ goto out;
+ }
+
+ ret = glfs_migrate_fd_locks_safe(fs, oldsubvol, oldfd, newsubvol, newfd);
+
+ if (ret) {
+ gf_smsg(fs->volname, GF_LOG_WARNING, errno, API_MSG_LOCK_MIGRATE_FAILED,
+ "gfid=%s", uuid_utoa_r(newinode->gfid, uuid1), "err=%s",
+ strerror(errno), "subvol=%s", graphid_str(newsubvol), "id=%d",
+ newsubvol->graph->id, NULL);
+ goto out;
+ }
+
+ newfd->flags = oldfd->flags;
+ fd_bind(newfd);
+out:
+ if (newinode)
+ inode_unref(newinode);
+
+ if (ret) {
+ fd_unref(newfd);
+ newfd = NULL;
+ }
+
+ if (xdata)
+ dict_unref(xdata);
+
+ return newfd;
+}
+
+fd_t *
+__glfs_migrate_fd(struct glfs *fs, xlator_t *newsubvol, struct glfs_fd *glfd)
+{
+ fd_t *oldfd = NULL;
+ fd_t *newfd = NULL;
+
+ oldfd = glfd->fd;
+
+ fs->migration_in_progress = 1;
+ pthread_mutex_unlock(&fs->mutex);
+ {
+ newfd = glfs_migrate_fd_safe(fs, newsubvol, oldfd);
+ }
+ pthread_mutex_lock(&fs->mutex);
+ fs->migration_in_progress = 0;
+ pthread_cond_broadcast(&fs->cond);
+
+ /* wake up other waiting tasks */
+ __GLFS_SYNCTASK_WAKE(fs);
+
+ return newfd;
+}
+
+fd_t *
+__glfs_resolve_fd(struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd)
+{
+ fd_t *fd = NULL;
+
+ if (glfd->fd->inode->table->xl == subvol)
+ return fd_ref(glfd->fd);
+
+ fd = __glfs_migrate_fd(fs, subvol, glfd);
+ if (!fd)
+ return NULL;
+
+ if (subvol == fs->active_subvol) {
+ fd_unref(glfd->fd);
+ glfd->fd = fd_ref(fd);
+ }
+
+ return fd;
+}
+
+fd_t *
+glfs_resolve_fd(struct glfs *fs, xlator_t *subvol, struct glfs_fd *glfd)
+{
+ fd_t *fd = NULL;
+
+ glfs_lock(fs, _gf_true);
+ {
+ fd = __glfs_resolve_fd(fs, subvol, glfd);
+ }
+ glfs_unlock(fs);
+
+ return fd;
+}
+
+void
+__glfs_migrate_openfds(struct glfs *fs, xlator_t *subvol)
+{
+ struct glfs_fd *glfd = NULL;
+ fd_t *fd = NULL;
+
+ list_for_each_entry(glfd, &fs->openfds, openfds)
+ {
+ if (gf_uuid_is_null(glfd->fd->inode->gfid)) {
+ gf_smsg(fs->volname, GF_LOG_INFO, 0, API_MSG_OPENFD_SKIPPED,
+ "glfd=%p", glfd, "glfd->fd=%p", glfd->fd, "subvol=%s",
+ graphid_str(subvol), "id=%d", subvol->graph->id, NULL);
+ /* create in progress, defer */
+ continue;
+ }
+
+ fd = __glfs_migrate_fd(fs, subvol, glfd);
+ if (fd) {
+ fd_unref(glfd->fd);
+ glfd->fd = fd;
+ }
+ }
+}
+
+/* Note that though it appears that this function executes under fs->mutex,
+ * it is not fully executed under fs->mutex. i.e. there are functions like
+ * __glfs_first_lookup, __glfs_refresh_inode, __glfs_migrate_openfds which
+ * unlocks fs->mutex before sending any network fop, and reacquire fs->mutex
+ * once the fop is complete. Hence the variable read from fs at the start of the
+ * function need not have the same value by the end of the function.
+ */
+xlator_t *
+__glfs_active_subvol(struct glfs *fs)
+{
+ xlator_t *new_subvol = NULL;
+ int ret = -1;
+ inode_t *new_cwd = NULL;
+
+ if (!fs->next_subvol)
+ return fs->active_subvol;
+
+ new_subvol = fs->mip_subvol = fs->next_subvol;
+ fs->next_subvol = NULL;
+
+ ret = __glfs_first_lookup(fs, new_subvol);
+ if (ret) {
+ gf_smsg(fs->volname, GF_LOG_INFO, errno,
+ API_MSG_FIRST_LOOKUP_GRAPH_FAILED, "subvol=%s",
+ graphid_str(new_subvol), "id=%d", new_subvol->graph->id,
+ "err=%s", strerror(errno), NULL);
+ return NULL;
+ }
+
+ if (fs->cwd) {
+ new_cwd = __glfs_refresh_inode(fs, new_subvol, fs->cwd, _gf_false);
+
+ if (!new_cwd) {
+ char buf1[64];
+ gf_smsg(fs->volname, GF_LOG_INFO, errno,
+ API_MSG_CWD_GRAPH_REF_FAILED, "buf=%s",
+ uuid_utoa_r(fs->cwd->gfid, buf1), "subvol=%s",
+ graphid_str(new_subvol), "id=%d", new_subvol->graph->id,
+ "err=%s", strerror(errno), NULL);
+ return NULL;
+ }
+ }
+
+ __glfs_migrate_openfds(fs, new_subvol);
+ /* TODO: Migrate the fds and inodes which have leases to the new graph
+ * (issue #350)*/
+
+ /* switching @active_subvol and @cwd
+ should be atomic
+ */
+ fs->old_subvol = fs->active_subvol;
+ fs->active_subvol = fs->mip_subvol;
+ fs->mip_subvol = NULL;
+
+ if (new_cwd) {
+ __glfs_cwd_set(fs, new_cwd);
+ inode_unref(new_cwd);
+ }
+
+ gf_smsg(fs->volname, GF_LOG_INFO, 0, API_MSG_SWITCHED_GRAPH, "subvol=%s",
+ graphid_str(new_subvol), "id=%d", new_subvol->graph->id, NULL);
+
+ return new_subvol;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_subvol_done, 3.4.0)
+void
+priv_glfs_subvol_done(struct glfs *fs, xlator_t *subvol)
+{
+ int ref = 0;
+ xlator_t *active_subvol = NULL;
+
+ if (!subvol)
+ return;
+
+ /* For decrementing subvol->wind ref count we need not check/wait for
+ * migration-in-progress flag.
+ * Also glfs_subvol_done is called in call-back path therefore waiting
+ * for migration-in-progress flag can lead to dead-lock.
+ */
+ glfs_lock(fs, _gf_false);
+ {
+ ref = (--subvol->winds);
+ active_subvol = fs->active_subvol;
+ }
+ glfs_unlock(fs);
+
+ if (ref == 0) {
+ assert(subvol != active_subvol);
+ xlator_notify(subvol, GF_EVENT_PARENT_DOWN, subvol, NULL);
+ }
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_active_subvol, 3.4.0)
+xlator_t *
+priv_glfs_active_subvol(struct glfs *fs)
+{
+ xlator_t *subvol = NULL;
+ xlator_t *old_subvol = NULL;
+
+ glfs_lock(fs, _gf_true);
+ {
+ subvol = __glfs_active_subvol(fs);
+
+ if (subvol)
+ subvol->winds++;
+
+ if (fs->old_subvol) {
+ old_subvol = fs->old_subvol;
+ fs->old_subvol = NULL;
+ old_subvol->switched = 1;
+ }
+ }
+ glfs_unlock(fs);
+
+ if (old_subvol)
+ priv_glfs_subvol_done(fs, old_subvol);
+
+ return subvol;
+}
+
+int
+__glfs_cwd_set(struct glfs *fs, inode_t *inode)
+{
+ if (inode->table->xl != fs->active_subvol) {
+ inode = __glfs_refresh_inode(fs, fs->active_subvol, inode, _gf_false);
+ if (!inode)
+ return -1;
+ } else {
+ inode_ref(inode);
+ }
+
+ if (fs->cwd)
+ inode_unref(fs->cwd);
+
+ fs->cwd = inode;
+
+ return 0;
+}
+
+int
+glfs_cwd_set(struct glfs *fs, inode_t *inode)
+{
+ int ret = 0;
+
+ glfs_lock(fs, _gf_true);
+ {
+ ret = __glfs_cwd_set(fs, inode);
+ }
+ glfs_unlock(fs);
+
+ return ret;
+}
+
+inode_t *
+__glfs_cwd_get(struct glfs *fs)
+{
+ inode_t *cwd = NULL;
+
+ if (!fs->cwd)
+ return NULL;
+
+ if (fs->cwd->table->xl == fs->active_subvol) {
+ cwd = inode_ref(fs->cwd);
+ return cwd;
+ }
+
+ cwd = __glfs_refresh_inode(fs, fs->active_subvol, fs->cwd, _gf_false);
+
+ return cwd;
+}
+
+inode_t *
+glfs_cwd_get(struct glfs *fs)
+{
+ inode_t *cwd = NULL;
+
+ glfs_lock(fs, _gf_true);
+ {
+ cwd = __glfs_cwd_get(fs);
+ }
+ glfs_unlock(fs);
+
+ return cwd;
+}
+
+inode_t *
+__glfs_resolve_inode(struct glfs *fs, xlator_t *subvol,
+ struct glfs_object *object)
+{
+ inode_t *inode = NULL;
+ gf_boolean_t lookup_needed = _gf_false;
+
+ lookup_needed = inode_needs_lookup(object->inode, THIS);
+
+ if (!lookup_needed && object->inode->table->xl == subvol)
+ return inode_ref(object->inode);
+
+ inode = __glfs_refresh_inode(fs, fs->active_subvol, object->inode,
+ lookup_needed);
+ if (!inode)
+ return NULL;
+
+ if (subvol == fs->active_subvol) {
+ inode_unref(object->inode);
+ object->inode = inode_ref(inode);
+ }
+
+ return inode;
+}
+
+inode_t *
+glfs_resolve_inode(struct glfs *fs, xlator_t *subvol,
+ struct glfs_object *object)
+{
+ inode_t *inode = NULL;
+
+ glfs_lock(fs, _gf_true);
+ {
+ inode = __glfs_resolve_inode(fs, subvol, object);
+ }
+ glfs_unlock(fs);
+
+ return inode;
+}
+
+int
+glfs_create_object(loc_t *loc, struct glfs_object **retobject)
+{
+ struct glfs_object *object = NULL;
+
+ object = GF_CALLOC(1, sizeof(struct glfs_object), glfs_mt_glfs_object_t);
+ if (object == NULL) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ object->inode = loc->inode;
+ gf_uuid_copy(object->gfid, object->inode->gfid);
+
+ /* we hold the reference */
+ loc->inode = NULL;
+
+ *retobject = object;
+
+ return 0;
+}
+
+struct glfs_object *
+glfs_h_resolve_symlink(struct glfs *fs, struct glfs_object *object)
+{
+ xlator_t *subvol = NULL;
+ loc_t sym_loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ char *lpath = NULL;
+ int ret = 0;
+ struct glfs_object *target_object = NULL;
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ ret = glfs_resolve_symlink(fs, subvol, object->inode, &lpath);
+ if (ret < 0)
+ goto out;
+
+ ret = glfs_resolve_at(fs, subvol, NULL, lpath, &sym_loc, &iatt,
+ /* always recurisvely follow while
+ following symlink
+ */
+ 1, 0);
+ if (ret == 0)
+ ret = glfs_create_object(&sym_loc, &target_object);
+
+out:
+ loc_wipe(&sym_loc);
+ GF_FREE(lpath);
+ return target_object;
+}
diff --git a/api/src/glfs.c b/api/src/glfs.c
new file mode 100644
index 00000000000..b4bf1423f6d
--- /dev/null
+++ b/api/src/glfs.c
@@ -0,0 +1,1806 @@
+/*
+ Copyright (c) 2012-2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/*
+ TODO:
+ - set proper pid/lk_owner to call frames (currently buried in syncop)
+ - fix logging.c/h to store logfp and loglevel in glusterfs_ctx_t and
+ reach it via THIS.
+ - update syncop functions to accept/return xdata. ???
+ - protocol/client to reconnect immediately after portmap disconnect.
+ - handle SEEK_END failure in _lseek()
+ - handle umask (per filesystem?)
+ - make itables LRU based
+ - 0-copy for readv/writev
+ - reconcile the open/creat mess
+*/
+
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <limits.h>
+#ifdef GF_LINUX_HOST_OS
+#include <sys/prctl.h>
+#endif
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/stack.h>
+#include <glusterfs/gf-event.h>
+#include "glfs-mem-types.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/hashfn.h>
+#include "rpc-clnt.h"
+#include <glusterfs/statedump.h>
+#include <glusterfs/syscall.h>
+
+#include "gfapi-messages.h"
+#include "glfs.h"
+#include "glfs-internal.h"
+
+static gf_boolean_t
+vol_assigned(cmd_args_t *args)
+{
+ return args->volfile || args->volfile_server;
+}
+
+static int
+glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
+{
+ call_pool_t *pool = NULL;
+ int ret = -1;
+
+ if (!ctx) {
+ goto err;
+ }
+
+ ret = xlator_mem_acct_init(THIS, glfs_mt_end + 1);
+ if (ret != 0) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, API_MSG_MEM_ACCT_INIT_FAILED,
+ NULL);
+ return ret;
+ }
+
+ /* reset ret to -1 so that we don't need to explicitly
+ * set it in all error paths before "goto err"
+ */
+
+ ret = -1;
+
+ ctx->process_uuid = generate_glusterfs_ctx_id();
+ if (!ctx->process_uuid) {
+ goto err;
+ }
+
+ ctx->page_size = 128 * GF_UNIT_KB;
+
+ ctx->iobuf_pool = iobuf_pool_new();
+ if (!ctx->iobuf_pool) {
+ goto err;
+ }
+
+ ctx->event_pool = gf_event_pool_new(DEFAULT_EVENT_POOL_SIZE,
+ STARTING_EVENT_THREADS);
+ if (!ctx->event_pool) {
+ goto err;
+ }
+
+ ctx->env = syncenv_new(0, 0, 0);
+ if (!ctx->env) {
+ goto err;
+ }
+
+ pool = GF_CALLOC(1, sizeof(call_pool_t), glfs_mt_call_pool_t);
+ if (!pool) {
+ goto err;
+ }
+
+ /* frame_mem_pool size 112 * 4k */
+ pool->frame_mem_pool = mem_pool_new(call_frame_t, 4096);
+ if (!pool->frame_mem_pool) {
+ goto err;
+ }
+ /* stack_mem_pool size 256 * 1024 */
+ pool->stack_mem_pool = mem_pool_new(call_stack_t, 1024);
+ if (!pool->stack_mem_pool) {
+ goto err;
+ }
+
+ ctx->stub_mem_pool = mem_pool_new(call_stub_t, 1024);
+ if (!ctx->stub_mem_pool) {
+ goto err;
+ }
+
+ ctx->dict_pool = mem_pool_new(dict_t, GF_MEMPOOL_COUNT_OF_DICT_T);
+ if (!ctx->dict_pool)
+ goto err;
+
+ ctx->dict_pair_pool = mem_pool_new(data_pair_t,
+ GF_MEMPOOL_COUNT_OF_DATA_PAIR_T);
+ if (!ctx->dict_pair_pool)
+ goto err;
+
+ ctx->dict_data_pool = mem_pool_new(data_t, GF_MEMPOOL_COUNT_OF_DATA_T);
+ if (!ctx->dict_data_pool)
+ goto err;
+
+ ctx->logbuf_pool = mem_pool_new(log_buf_t, GF_MEMPOOL_COUNT_OF_LRU_BUF_T);
+ if (!ctx->logbuf_pool)
+ goto err;
+
+ INIT_LIST_HEAD(&pool->all_frames);
+ INIT_LIST_HEAD(&ctx->cmd_args.xlator_options);
+ INIT_LIST_HEAD(&ctx->cmd_args.volfile_servers);
+
+ LOCK_INIT(&pool->lock);
+ ctx->pool = pool;
+
+ ret = 0;
+err:
+ if (ret && pool) {
+ if (pool->frame_mem_pool)
+ mem_pool_destroy(pool->frame_mem_pool);
+ if (pool->stack_mem_pool)
+ mem_pool_destroy(pool->stack_mem_pool);
+ GF_FREE(pool);
+ }
+
+ if (ret && ctx) {
+ if (ctx->stub_mem_pool)
+ mem_pool_destroy(ctx->stub_mem_pool);
+ if (ctx->dict_pool)
+ mem_pool_destroy(ctx->dict_pool);
+ if (ctx->dict_data_pool)
+ mem_pool_destroy(ctx->dict_data_pool);
+ if (ctx->dict_pair_pool)
+ mem_pool_destroy(ctx->dict_pair_pool);
+ if (ctx->logbuf_pool)
+ mem_pool_destroy(ctx->logbuf_pool);
+ }
+
+ return ret;
+}
+
+static int
+create_master(struct glfs *fs)
+{
+ int ret = 0;
+ xlator_t *master = NULL;
+
+ master = GF_CALLOC(1, sizeof(*master), glfs_mt_xlator_t);
+ if (!master)
+ goto err;
+
+ master->name = gf_strdup("gfapi");
+ if (!master->name)
+ goto err;
+
+ if (xlator_set_type(master, "mount/api") == -1) {
+ gf_smsg("glfs", GF_LOG_ERROR, 0, API_MSG_MASTER_XLATOR_INIT_FAILED,
+ "name=%s", fs->volname, NULL);
+ goto err;
+ }
+
+ master->ctx = fs->ctx;
+ master->private = fs;
+ master->options = dict_new();
+ if (!master->options)
+ goto err;
+
+ ret = xlator_init(master);
+ if (ret) {
+ gf_smsg("glfs", GF_LOG_ERROR, 0, API_MSG_GFAPI_XLATOR_INIT_FAILED,
+ NULL);
+ goto err;
+ }
+
+ fs->ctx->master = master;
+ THIS = master;
+
+ return 0;
+
+err:
+ if (master) {
+ xlator_destroy(master);
+ }
+
+ return -1;
+}
+
+static FILE *
+get_volfp(struct glfs *fs)
+{
+ cmd_args_t *cmd_args = NULL;
+ FILE *specfp = NULL;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ if ((specfp = fopen(cmd_args->volfile, "r")) == NULL) {
+ gf_smsg("glfs", GF_LOG_ERROR, errno, API_MSG_VOLFILE_OPEN_FAILED,
+ "file=%s", cmd_args->volfile, "err=%s", strerror(errno), NULL);
+ return NULL;
+ }
+
+ gf_msg_debug("glfs", 0, "loading volume file %s", cmd_args->volfile);
+
+ return specfp;
+}
+
+int
+glfs_volumes_init(struct glfs *fs)
+{
+ FILE *fp = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ if (!vol_assigned(cmd_args))
+ return -1;
+
+ if (sys_access(SECURE_ACCESS_FILE, F_OK) == 0) {
+ fs->ctx->secure_mgmt = 1;
+ fs->ctx->ssl_cert_depth = glusterfs_read_secure_access_file();
+ }
+
+ if (cmd_args->volfile_server) {
+ ret = glfs_mgmt_init(fs);
+ goto out;
+ }
+
+ fp = get_volfp(fs);
+
+ if (!fp) {
+ gf_smsg("glfs", GF_LOG_ERROR, ENOENT, API_MSG_VOL_SPEC_FILE_ERROR,
+ NULL);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glfs_process_volfp(fs, fp);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+
+///////////////////////////////////////////////////////////////////////////////
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_xlator_option, 3.4.0)
+int
+pub_glfs_set_xlator_option(struct glfs *fs, const char *xlator, const char *key,
+ const char *value)
+{
+ xlator_cmdline_option_t *option = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ option = GF_CALLOC(1, sizeof(*option), glfs_mt_xlator_cmdline_option_t);
+ if (!option)
+ goto enomem;
+
+ INIT_LIST_HEAD(&option->cmd_args);
+
+ option->volume = gf_strdup(xlator);
+ if (!option->volume)
+ goto enomem;
+ option->key = gf_strdup(key);
+ if (!option->key)
+ goto enomem;
+ option->value = gf_strdup(value);
+ if (!option->value)
+ goto enomem;
+
+ list_add(&option->cmd_args, &fs->ctx->cmd_args.xlator_options);
+
+ __GLFS_EXIT_FS;
+
+ return 0;
+enomem:
+ errno = ENOMEM;
+
+ if (!option) {
+ __GLFS_EXIT_FS;
+ return -1;
+ }
+
+ GF_FREE(option->volume);
+ GF_FREE(option->key);
+ GF_FREE(option->value);
+ GF_FREE(option);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return -1;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_unset_volfile_server, 3.5.1)
+int
+pub_glfs_unset_volfile_server(struct glfs *fs, const char *transport,
+ const char *host, const int port)
+{
+ cmd_args_t *cmd_args = NULL;
+ server_cmdline_t *server = NULL;
+ server_cmdline_t *tmp = NULL;
+ char *transport_val = NULL;
+ int port_val = 0;
+ int ret = -1;
+
+ if (!fs || !host) {
+ errno = EINVAL;
+ return ret;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ if (transport) {
+ transport_val = gf_strdup(transport);
+ } else {
+ transport_val = gf_strdup(GF_DEFAULT_VOLFILE_TRANSPORT);
+ }
+
+ if (!transport_val) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ if (port) {
+ port_val = port;
+ } else {
+ port_val = GF_DEFAULT_BASE_PORT;
+ }
+
+ list_for_each_entry_safe(server, tmp, &cmd_args->curr_server->list, list)
+ {
+ if (!server->volfile_server || !server->transport)
+ continue;
+ if ((!strcmp(server->volfile_server, host) &&
+ !strcmp(server->transport, transport_val) &&
+ (server->port == port_val))) {
+ list_del(&server->list);
+ ret = 0;
+ goto out;
+ }
+ }
+
+out:
+ GF_FREE(transport_val);
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_volfile_server, 3.4.0)
+int
+pub_glfs_set_volfile_server(struct glfs *fs, const char *transport,
+ const char *host, int port)
+{
+ cmd_args_t *cmd_args = NULL;
+ int ret = -1;
+ char *server_host = NULL;
+ char *server_transport = NULL;
+
+ if (!fs || !host) {
+ errno = EINVAL;
+ return ret;
+ }
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ cmd_args = &fs->ctx->cmd_args;
+ cmd_args->max_connect_attempts = 1;
+
+ server_host = gf_strdup(host);
+ if (!server_host) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ if (transport) {
+ /* volfile fetch support over tcp|unix only */
+ if (!strcmp(transport, "tcp") || !strcmp(transport, "unix")) {
+ server_transport = gf_strdup(transport);
+ } else if (!strcmp(transport, "rdma")) {
+ server_transport = gf_strdup(GF_DEFAULT_VOLFILE_TRANSPORT);
+ gf_smsg("glfs", GF_LOG_WARNING, EINVAL, API_MSG_TRANS_RDMA_DEP,
+ NULL);
+ } else {
+ gf_smsg("glfs", GF_LOG_TRACE, EINVAL, API_MSG_TRANS_NOT_SUPPORTED,
+ "transport=%s", transport, NULL);
+ goto out;
+ }
+ } else {
+ server_transport = gf_strdup(GF_DEFAULT_VOLFILE_TRANSPORT);
+ }
+
+ if (!server_transport) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ if (!port) {
+ port = GF_DEFAULT_BASE_PORT;
+ }
+
+ if (!strcmp(server_transport, "unix")) {
+ port = 0;
+ }
+
+ ret = gf_set_volfile_server_common(cmd_args, server_host, server_transport,
+ port);
+ if (ret) {
+ gf_log("glfs", GF_LOG_ERROR, "failed to set volfile server: %s",
+ strerror(errno));
+ }
+
+out:
+ if (server_host) {
+ GF_FREE(server_host);
+ }
+
+ if (server_transport) {
+ GF_FREE(server_transport);
+ }
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+/* *
+ * Used to free the arguments allocated by glfs_set_volfile_server()
+ */
+static void
+glfs_free_volfile_servers(cmd_args_t *cmd_args)
+{
+ server_cmdline_t *server = NULL;
+ server_cmdline_t *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, cmd_args, out);
+
+ list_for_each_entry_safe(server, tmp, &cmd_args->volfile_servers, list)
+ {
+ list_del_init(&server->list);
+ GF_FREE(server->volfile_server);
+ GF_FREE(server->transport);
+ GF_FREE(server);
+ }
+ cmd_args->curr_server = NULL;
+out:
+ return;
+}
+
+static void
+glfs_free_xlator_options(cmd_args_t *cmd_args)
+{
+ xlator_cmdline_option_t *xo = NULL;
+ xlator_cmdline_option_t *tmp_xo = NULL;
+
+ if (!&(cmd_args->xlator_options))
+ return;
+
+ list_for_each_entry_safe(xo, tmp_xo, &cmd_args->xlator_options, cmd_args)
+ {
+ list_del_init(&xo->cmd_args);
+ GF_FREE(xo->volume);
+ GF_FREE(xo->key);
+ GF_FREE(xo->value);
+ GF_FREE(xo);
+ }
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsuid, 3.4.2)
+int
+pub_glfs_setfsuid(uid_t fsuid)
+{
+ /* TODO:
+ * - Set the THIS and restore it appropriately
+ */
+ return syncopctx_setfsuid(&fsuid);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsgid, 3.4.2)
+int
+pub_glfs_setfsgid(gid_t fsgid)
+{
+ /* TODO:
+ * - Set the THIS and restore it appropriately
+ */
+ return syncopctx_setfsgid(&fsgid);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsgroups, 3.4.2)
+int
+pub_glfs_setfsgroups(size_t size, const gid_t *list)
+{
+ /* TODO:
+ * - Set the THIS and restore it appropriately
+ */
+ return syncopctx_setfsgroups(size, list);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_setfsleaseid, 4.0.0)
+int
+pub_glfs_setfsleaseid(glfs_leaseid_t leaseid)
+{
+ int ret = -1;
+ char *gleaseid = NULL;
+
+ gleaseid = gf_leaseid_get();
+ if (gleaseid) {
+ if (leaseid)
+ memcpy(gleaseid, leaseid, LEASE_ID_SIZE);
+ else /* reset leaseid */
+ memset(gleaseid, 0, LEASE_ID_SIZE);
+ ret = 0;
+ }
+
+ if (ret)
+ gf_log("glfs", GF_LOG_ERROR, "failed to set leaseid: %s",
+ strerror(errno));
+ return ret;
+}
+
+int
+get_fop_attr_glfd(dict_t **fop_attr, struct glfs_fd *glfd)
+{
+ char *leaseid = NULL;
+ int ret = 0;
+ gf_boolean_t dict_create = _gf_false;
+
+ leaseid = GF_MALLOC(LEASE_ID_SIZE, gf_common_mt_char);
+ GF_CHECK_ALLOC_AND_LOG("gfapi", leaseid, ret, "lease id alloc failed", out);
+ memcpy(leaseid, glfd->lease_id, LEASE_ID_SIZE);
+ if (*fop_attr == NULL) {
+ *fop_attr = dict_new();
+ dict_create = _gf_true;
+ }
+ GF_CHECK_ALLOC_AND_LOG("gfapi", *fop_attr, ret, "dict_new failed", out);
+ ret = dict_set_bin(*fop_attr, "lease-id", leaseid, LEASE_ID_SIZE);
+out:
+ if (ret) {
+ GF_FREE(leaseid);
+ if (dict_create) {
+ if (*fop_attr)
+ dict_unref(*fop_attr);
+ *fop_attr = NULL;
+ }
+ }
+ return ret;
+}
+
+int
+set_fop_attr_glfd(struct glfs_fd *glfd)
+{
+ char *lease_id = NULL;
+ int ret = -1;
+
+ lease_id = gf_existing_leaseid();
+ if (lease_id) {
+ memcpy(glfd->lease_id, lease_id, LEASE_ID_SIZE);
+ ret = 0;
+ }
+ return ret;
+}
+
+int
+get_fop_attr_thrd_key(dict_t **fop_attr)
+{
+ char *existing_leaseid = NULL, *leaseid = NULL;
+ int ret = 0;
+ gf_boolean_t dict_create = _gf_false;
+
+ existing_leaseid = gf_existing_leaseid();
+ if (existing_leaseid) {
+ leaseid = GF_MALLOC(LEASE_ID_SIZE, gf_common_mt_char);
+ GF_CHECK_ALLOC_AND_LOG("gfapi", leaseid, ret, "lease id alloc failed",
+ out);
+ memcpy(leaseid, existing_leaseid, LEASE_ID_SIZE);
+ if (*fop_attr == NULL) {
+ *fop_attr = dict_new();
+ dict_create = _gf_true;
+ }
+ GF_CHECK_ALLOC_AND_LOG("gfapi", *fop_attr, ret, "dict_new failed", out);
+ ret = dict_set_bin(*fop_attr, "lease-id", leaseid, LEASE_ID_SIZE);
+ }
+
+out:
+ if (ret) {
+ GF_FREE(leaseid);
+ if (dict_create) {
+ if (*fop_attr)
+ dict_unref(*fop_attr);
+ *fop_attr = NULL;
+ }
+ }
+ return ret;
+}
+
+void
+unset_fop_attr(dict_t **fop_attr)
+{
+ char *lease_id = NULL;
+ lease_id = gf_existing_leaseid();
+ if (lease_id)
+ memset(lease_id, 0, LEASE_ID_SIZE);
+ if (*fop_attr) {
+ dict_unref(*fop_attr);
+ *fop_attr = NULL;
+ }
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_from_glfd, 3.4.0)
+struct glfs *
+pub_glfs_from_glfd(struct glfs_fd *glfd)
+{
+ if (glfd == NULL) {
+ errno = EBADF;
+ return NULL;
+ }
+
+ return glfd->fs;
+}
+
+static void
+glfs_fd_destroy(struct glfs_fd *glfd)
+{
+ if (!glfd)
+ return;
+
+ glfs_lock(glfd->fs, _gf_true);
+ {
+ list_del_init(&glfd->openfds);
+ }
+ glfs_unlock(glfd->fs);
+
+ if (glfd->fd) {
+ fd_unref(glfd->fd);
+ glfd->fd = NULL;
+ }
+
+ GF_FREE(glfd->readdirbuf);
+
+ GF_FREE(glfd);
+}
+
+struct glfs_fd *
+glfs_fd_new(struct glfs *fs)
+{
+ struct glfs_fd *glfd = NULL;
+
+ glfd = GF_CALLOC(1, sizeof(*glfd), glfs_mt_glfs_fd_t);
+ if (!glfd)
+ return NULL;
+
+ glfd->fs = fs;
+
+ INIT_LIST_HEAD(&glfd->openfds);
+
+ GF_REF_INIT(glfd, glfs_fd_destroy);
+
+ return glfd;
+}
+
+void
+glfs_fd_bind(struct glfs_fd *glfd)
+{
+ struct glfs *fs = NULL;
+
+ fs = glfd->fs;
+
+ glfs_lock(fs, _gf_true);
+ {
+ list_add_tail(&glfd->openfds, &fs->openfds);
+ }
+ glfs_unlock(fs);
+}
+
+static void *
+glfs_poller(void *data)
+{
+ struct glfs *fs = NULL;
+
+ fs = data;
+
+ gf_event_dispatch(fs->ctx->event_pool);
+
+ return NULL;
+}
+
+static struct glfs *
+glfs_new_fs(const char *volname)
+{
+ struct glfs *fs = NULL;
+
+ fs = CALLOC(1, sizeof(*fs));
+ if (!fs)
+ return NULL;
+
+ INIT_LIST_HEAD(&fs->openfds);
+ INIT_LIST_HEAD(&fs->upcall_list);
+ INIT_LIST_HEAD(&fs->waitq);
+
+ PTHREAD_MUTEX_INIT(&fs->mutex, NULL, fs->pthread_flags, GLFS_INIT_MUTEX,
+ err);
+
+ PTHREAD_COND_INIT(&fs->cond, NULL, fs->pthread_flags, GLFS_INIT_COND, err);
+
+ PTHREAD_COND_INIT(&fs->child_down_cond, NULL, fs->pthread_flags,
+ GLFS_INIT_COND_CHILD, err);
+
+ PTHREAD_MUTEX_INIT(&fs->upcall_list_mutex, NULL, fs->pthread_flags,
+ GLFS_INIT_MUTEX_UPCALL, err);
+
+ fs->volname = strdup(volname);
+ if (!fs->volname)
+ goto err;
+
+ fs->pin_refcnt = 0;
+ fs->upcall_events = 0;
+ fs->up_cbk = NULL;
+ fs->up_data = NULL;
+
+ return fs;
+
+err:
+ glfs_free_from_ctx(fs);
+ return NULL;
+}
+
+extern xlator_t global_xlator;
+extern glusterfs_ctx_t *global_ctx;
+extern pthread_mutex_t global_ctx_mutex;
+
+static int
+glfs_init_global_ctx()
+{
+ int ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
+
+ pthread_mutex_lock(&global_ctx_mutex);
+ {
+ if (global_xlator.ctx)
+ goto unlock;
+
+ ctx = glusterfs_ctx_new();
+ if (!ctx) {
+ ret = -1;
+ goto unlock;
+ }
+
+ gf_log_globals_init(ctx, GF_LOG_NONE);
+
+ global_ctx = ctx;
+ global_xlator.ctx = global_ctx;
+
+ ret = glusterfs_ctx_defaults_init(ctx);
+ if (ret) {
+ global_ctx = NULL;
+ global_xlator.ctx = NULL;
+ goto unlock;
+ }
+ }
+unlock:
+ pthread_mutex_unlock(&global_ctx_mutex);
+
+ if (ret)
+ FREE(ctx);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_new, 3.4.0)
+struct glfs *
+pub_glfs_new(const char *volname)
+{
+ if (!volname) {
+ errno = EINVAL;
+ return NULL;
+ }
+
+ struct glfs *fs = NULL;
+ int i = 0;
+ int ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_t *old_THIS = NULL;
+ char pname[16] = "";
+ char msg[32] = "";
+
+ if (volname[0] == '/' || volname[0] == '-') {
+ if (strncmp(volname, "/snaps/", 7) == 0) {
+ goto label;
+ }
+ errno = EINVAL;
+ return NULL;
+ }
+
+ for (i = 0; i < strlen(volname); i++) {
+ if (!isalnum(volname[i]) && (volname[i] != '_') &&
+ (volname[i] != '-')) {
+ errno = EINVAL;
+ return NULL;
+ }
+ }
+
+label:
+ /*
+ * Do this as soon as possible in case something else depends on
+ * pool allocations.
+ */
+ mem_pools_init();
+
+ fs = glfs_new_fs(volname);
+ if (!fs)
+ goto out;
+
+ ctx = glusterfs_ctx_new();
+ if (!ctx)
+ goto out;
+
+ /* first globals init, for gf_mem_acct_enable_set () */
+
+ ret = glusterfs_globals_init(ctx);
+ if (ret)
+ goto out;
+
+ old_THIS = THIS;
+ ret = glfs_init_global_ctx();
+ if (ret)
+ goto out;
+
+ /* then ctx_defaults_init, for xlator_mem_acct_init(THIS) */
+
+ ret = glusterfs_ctx_defaults_init(ctx);
+ if (ret)
+ goto out;
+
+ fs->ctx = ctx;
+ fs->ctx->process_mode = GF_CLIENT_PROCESS;
+
+ ret = glfs_set_logging(fs, "/dev/null", 0);
+ if (ret)
+ goto out;
+
+ fs->ctx->cmd_args.volfile_id = gf_strdup(volname);
+ if (!(fs->ctx->cmd_args.volfile_id)) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = -1;
+#ifdef GF_LINUX_HOST_OS
+ ret = prctl(PR_GET_NAME, (unsigned long)pname, 0, 0, 0);
+#endif
+ if (ret)
+ fs->ctx->cmd_args.process_name = gf_strdup("gfapi");
+ else {
+ snprintf(msg, sizeof(msg), "gfapi.%s", pname);
+ fs->ctx->cmd_args.process_name = gf_strdup(msg);
+ }
+ ret = 0;
+
+out:
+ if (ret) {
+ if (fs) {
+ glfs_fini(fs);
+ fs = NULL;
+ } else {
+ /* glfs_fini() calls mem_pools_fini() too */
+ mem_pools_fini();
+ }
+ }
+
+ if (old_THIS)
+ THIS = old_THIS;
+
+ return fs;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_new_from_ctx, 3.7.0)
+struct glfs *
+priv_glfs_new_from_ctx(glusterfs_ctx_t *ctx)
+{
+ struct glfs *fs = NULL;
+
+ if (!ctx)
+ goto out;
+
+ fs = glfs_new_fs("");
+ if (!fs)
+ goto out;
+
+ fs->ctx = ctx;
+
+out:
+ return fs;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_free_from_ctx, 3.7.0)
+void
+priv_glfs_free_from_ctx(struct glfs *fs)
+{
+ upcall_entry *u_list = NULL;
+ upcall_entry *tmp = NULL;
+
+ if (!fs)
+ return;
+
+ /* cleanup upcall structures */
+ list_for_each_entry_safe(u_list, tmp, &fs->upcall_list, upcall_list)
+ {
+ list_del_init(&u_list->upcall_list);
+ GF_FREE(u_list->upcall_data.data);
+ GF_FREE(u_list);
+ }
+
+ PTHREAD_MUTEX_DESTROY(&fs->mutex, fs->pthread_flags, GLFS_INIT_MUTEX);
+
+ PTHREAD_COND_DESTROY(&fs->cond, fs->pthread_flags, GLFS_INIT_COND);
+
+ PTHREAD_COND_DESTROY(&fs->child_down_cond, fs->pthread_flags,
+ GLFS_INIT_COND_CHILD);
+
+ PTHREAD_MUTEX_DESTROY(&fs->upcall_list_mutex, fs->pthread_flags,
+ GLFS_INIT_MUTEX_UPCALL);
+
+ if (fs->oldvolfile)
+ FREE(fs->oldvolfile);
+
+ FREE(fs->volname);
+
+ FREE(fs);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_volfile, 3.4.0)
+int
+pub_glfs_set_volfile(struct glfs *fs, const char *volfile)
+{
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &fs->ctx->cmd_args;
+
+ if (vol_assigned(cmd_args))
+ return -1;
+
+ cmd_args->volfile = gf_strdup(volfile);
+ if (!cmd_args->volfile)
+ return -1;
+ return 0;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_logging, 3.4.0)
+int
+pub_glfs_set_logging(struct glfs *fs, const char *logfile, int loglevel)
+{
+ int ret = -1;
+ char *tmplog = NULL;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (!logfile) {
+ ret = gf_set_log_file_path(&fs->ctx->cmd_args, fs->ctx);
+ if (ret)
+ goto out;
+ tmplog = fs->ctx->cmd_args.log_file;
+ } else {
+ tmplog = (char *)logfile;
+ }
+
+ /* finish log set parameters before init */
+ if (loglevel >= 0)
+ gf_log_set_loglevel(fs->ctx, loglevel);
+
+ ret = gf_log_init(fs->ctx, tmplog, NULL);
+ if (ret)
+ goto out;
+
+ ret = gf_log_inject_timer_event(fs->ctx);
+ if (ret)
+ goto out;
+
+out:
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+int
+glfs_init_wait(struct glfs *fs)
+{
+ int ret = -1;
+
+ /* Always a top-down call, use glfs_lock() */
+ glfs_lock(fs, _gf_true);
+ {
+ while (!fs->init)
+ pthread_cond_wait(&fs->cond, &fs->mutex);
+ ret = fs->ret;
+ errno = fs->err;
+ }
+ glfs_unlock(fs);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_init_done, 3.4.0)
+void
+priv_glfs_init_done(struct glfs *fs, int ret)
+{
+ glfs_init_cbk init_cbk;
+
+ if (!fs) {
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_GLFS_FSOBJ_NULL, NULL);
+ goto out;
+ }
+
+ init_cbk = fs->init_cbk;
+
+ /* Always a bottom-up call, use mutex_lock() */
+ pthread_mutex_lock(&fs->mutex);
+ {
+ fs->init = 1;
+ fs->ret = ret;
+ fs->err = errno;
+
+ if (!init_cbk)
+ pthread_cond_broadcast(&fs->cond);
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+ if (init_cbk)
+ init_cbk(fs, ret);
+out:
+ return;
+}
+
+int
+glfs_init_common(struct glfs *fs)
+{
+ int ret = -1;
+
+ ret = create_master(fs);
+ if (ret)
+ return ret;
+
+ ret = gf_thread_create(&fs->poller, NULL, glfs_poller, fs, "glfspoll");
+ if (ret)
+ return ret;
+
+ ret = glfs_volumes_init(fs);
+ if (ret)
+ return ret;
+
+ fs->dev_id = gf_dm_hashfn(fs->volname, strlen(fs->volname));
+ return ret;
+}
+
+int
+glfs_init_async(struct glfs *fs, glfs_init_cbk cbk)
+{
+ int ret = -1;
+
+ if (!fs || !fs->ctx) {
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_FS_NOT_INIT, NULL);
+ errno = EINVAL;
+ return ret;
+ }
+
+ fs->init_cbk = cbk;
+
+ ret = glfs_init_common(fs);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_init, 3.4.0)
+int
+pub_glfs_init(struct glfs *fs)
+{
+ int ret = -1;
+
+ DECLARE_OLD_THIS;
+
+ if (!fs || !fs->ctx) {
+ gf_smsg("glfs", GF_LOG_ERROR, EINVAL, API_MSG_FS_NOT_INIT, NULL);
+ errno = EINVAL;
+ return ret;
+ }
+
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ ret = glfs_init_common(fs);
+ if (ret)
+ goto out;
+
+ ret = glfs_init_wait(fs);
+out:
+ __GLFS_EXIT_FS;
+
+ /* Set the initial current working directory to "/" */
+ if (ret >= 0) {
+ ret = glfs_chdir(fs, "/");
+ }
+
+invalid_fs:
+ return ret;
+}
+
+static int
+glusterfs_ctx_destroy(glusterfs_ctx_t *ctx)
+{
+ call_pool_t *pool = NULL;
+ int ret = 0;
+ glusterfs_graph_t *trav_graph = NULL;
+ glusterfs_graph_t *tmp = NULL;
+
+ if (ctx == NULL)
+ return 0;
+
+ if (ctx->cmd_args.curr_server)
+ glfs_free_volfile_servers(&ctx->cmd_args);
+
+ glfs_free_xlator_options(&ctx->cmd_args);
+
+ /* For all the graphs, crawl through the xlator_t structs and free
+ * all its members except for the mem_acct member,
+ * as GF_FREE will be referencing it.
+ */
+ list_for_each_entry_safe(trav_graph, tmp, &ctx->graphs, list)
+ {
+ xlator_tree_free_members(trav_graph->first);
+ }
+
+ /* Free the memory pool */
+ if (ctx->stub_mem_pool)
+ mem_pool_destroy(ctx->stub_mem_pool);
+ if (ctx->dict_pool)
+ mem_pool_destroy(ctx->dict_pool);
+ if (ctx->dict_data_pool)
+ mem_pool_destroy(ctx->dict_data_pool);
+ if (ctx->dict_pair_pool)
+ mem_pool_destroy(ctx->dict_pair_pool);
+ if (ctx->logbuf_pool)
+ mem_pool_destroy(ctx->logbuf_pool);
+
+ pool = ctx->pool;
+ if (pool) {
+ if (pool->frame_mem_pool)
+ mem_pool_destroy(pool->frame_mem_pool);
+ if (pool->stack_mem_pool)
+ mem_pool_destroy(pool->stack_mem_pool);
+ LOCK_DESTROY(&pool->lock);
+ GF_FREE(pool);
+ }
+
+ /* Free the event pool */
+ ret = gf_event_pool_destroy(ctx->event_pool);
+
+ /* Free the iobuf pool */
+ iobuf_pool_destroy(ctx->iobuf_pool);
+
+ GF_FREE(ctx->process_uuid);
+ GF_FREE(ctx->cmd_args.volfile_id);
+ GF_FREE(ctx->cmd_args.process_name);
+
+ LOCK_DESTROY(&ctx->lock);
+ pthread_mutex_destroy(&ctx->notify_lock);
+ pthread_cond_destroy(&ctx->notify_cond);
+
+ /* Free all the graph structs and its containing xlator_t structs
+ * from this point there should be no reference to GF_FREE/GF_CALLOC
+ * as it will try to access mem_acct and the below function would
+ * have freed the same.
+ */
+ list_for_each_entry_safe(trav_graph, tmp, &ctx->graphs, list)
+ {
+ glusterfs_graph_destroy_residual(trav_graph);
+ }
+
+ GF_FREE(ctx->statedump_path);
+ FREE(ctx);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_fini, 3.4.0)
+int
+pub_glfs_fini(struct glfs *fs)
+{
+ int ret = -1;
+ int countdown = 100;
+ xlator_t *subvol = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *graph = NULL;
+ call_pool_t *call_pool = NULL;
+ int fs_init = 0;
+ int err = -1;
+ struct synctask *waittask = NULL;
+
+ DECLARE_OLD_THIS;
+
+ if (!fs) {
+ errno = EINVAL;
+ goto invalid_fs;
+ }
+
+ ctx = fs->ctx;
+ if (!ctx) {
+ goto free_fs;
+ }
+
+ THIS = fs->ctx->master;
+
+ if (ctx->mgmt) {
+ rpc_clnt_disable(ctx->mgmt);
+ }
+
+ call_pool = fs->ctx->pool;
+
+ /* Wake up any suspended synctasks */
+ while (!list_empty(&fs->waitq)) {
+ waittask = list_entry(fs->waitq.next, struct synctask, waitq);
+ list_del_init(&waittask->waitq);
+ synctask_wake(waittask);
+ }
+
+ while (countdown--) {
+ /* give some time for background frames to finish */
+ pthread_mutex_lock(&fs->mutex);
+ {
+ /* Do we need to increase countdown? */
+ if ((!call_pool->cnt) && (!fs->pin_refcnt)) {
+ gf_msg_trace("glfs", 0,
+ "call_pool_cnt - %" PRId64
+ ","
+ "pin_refcnt - %d",
+ call_pool->cnt, fs->pin_refcnt);
+
+ ctx->cleanup_started = 1;
+ pthread_mutex_unlock(&fs->mutex);
+ break;
+ }
+ }
+ pthread_mutex_unlock(&fs->mutex);
+ gf_nanosleep(100000 * GF_US_IN_NS);
+ }
+
+ /* leaked frames may exist, we ignore */
+
+ /*We deem glfs_fini as successful if there are no pending frames in the call
+ *pool*/
+ ret = (call_pool->cnt == 0) ? 0 : -1;
+
+ pthread_mutex_lock(&fs->mutex);
+ {
+ fs_init = fs->init;
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+ if (fs_init != 0) {
+ subvol = glfs_active_subvol(fs);
+ if (subvol) {
+ /* PARENT_DOWN within glfs_subvol_done() is issued
+ only on graph switch (new graph should activiate
+ and decrement the extra @winds count taken in
+ glfs_graph_setup()
+
+ Since we are explicitly destroying,
+ PARENT_DOWN is necessary
+ */
+ xlator_notify(subvol, GF_EVENT_PARENT_DOWN, subvol, 0);
+ /* Here we wait for GF_EVENT_CHILD_DOWN before exiting,
+ in case of asynchrnous cleanup
+ */
+ graph = subvol->graph;
+ err = pthread_mutex_lock(&fs->mutex);
+ if (err != 0) {
+ gf_smsg("glfs", GF_LOG_ERROR, err, API_MSG_FSMUTEX_LOCK_FAILED,
+ "error=%s", strerror(err), NULL);
+ goto fail;
+ }
+ /* check and wait for CHILD_DOWN for active subvol*/
+ {
+ while (graph->used) {
+ err = pthread_cond_wait(&fs->child_down_cond, &fs->mutex);
+ if (err != 0)
+ gf_smsg("glfs", GF_LOG_INFO, err,
+ API_MSG_COND_WAIT_FAILED, "name=%s",
+ subvol->name, "err=%s", strerror(err), NULL);
+ }
+ }
+
+ err = pthread_mutex_unlock(&fs->mutex);
+ if (err != 0) {
+ gf_smsg("glfs", GF_LOG_ERROR, err,
+ API_MSG_FSMUTEX_UNLOCK_FAILED, "error=%s",
+ strerror(err), NULL);
+ goto fail;
+ }
+ }
+ glfs_subvol_done(fs, subvol);
+ }
+
+ ctx->cleanup_started = 1;
+
+ if (fs_init != 0) {
+ /* Destroy all the inode tables of all the graphs.
+ * NOTE:
+ * - inode objects should be destroyed before calling fini()
+ * of each xlator, as fini() and forget() of the xlators
+ * can share few common locks or data structures, calling
+ * fini first might destroy those required by forget
+ * ( eg: in quick-read)
+ * - The call to inode_table_destroy_all is not required when
+ * the cleanup during graph switch is implemented to perform
+ * inode table destroy.
+ */
+ inode_table_destroy_all(ctx);
+
+ /* Call fini() of all the xlators in the active graph
+ * NOTE:
+ * - xlator fini() should be called before destroying any of
+ * the threads. (eg: fini() in protocol-client uses timer
+ * thread) */
+ glusterfs_graph_deactivate(ctx->active);
+
+ /* Join the syncenv_processor threads and cleanup
+ * syncenv resources*/
+ syncenv_destroy(ctx->env);
+
+ /* Join the poller thread */
+ if (gf_event_dispatch_destroy(ctx->event_pool) < 0)
+ ret = -1;
+ }
+
+ /* Avoid dispatching events to mgmt after freed,
+ * unreference mgmt after the event_dispatch_destroy */
+ if (ctx->mgmt) {
+ rpc_clnt_unref(ctx->mgmt);
+ ctx->mgmt = NULL;
+ }
+
+ /* log infra has to be brought down before destroying
+ * timer registry, as logging uses timer infra
+ */
+ if (gf_log_fini(ctx) != 0)
+ ret = -1;
+
+ /* Join the timer thread */
+ if (fs_init != 0) {
+ gf_timer_registry_destroy(ctx);
+ }
+
+ /* Destroy the context and the global pools */
+ if (glusterfs_ctx_destroy(ctx) != 0)
+ ret = -1;
+
+free_fs:
+ glfs_free_from_ctx(fs);
+
+ /*
+ * Do this as late as possible in case anything else has (or
+ * grows) a dependency on mem-pool allocations.
+ */
+ mem_pools_fini();
+
+fail:
+ if (!ret)
+ ret = err;
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_get_volfile, 3.6.0)
+ssize_t
+pub_glfs_get_volfile(struct glfs *fs, void *buf, size_t len)
+{
+ ssize_t res = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ glfs_lock(fs, _gf_true);
+ if (len >= fs->oldvollen) {
+ gf_msg_trace("glfs", 0, "copying %zu to %p", len, buf);
+ memcpy(buf, fs->oldvolfile, len);
+ res = len;
+ } else {
+ res = len - fs->oldvollen;
+ gf_msg_trace("glfs", 0, "buffer is %zd too short", -res);
+ }
+ glfs_unlock(fs);
+
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return res;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_ipc, 3.12.0)
+int
+priv_glfs_ipc(struct glfs *fs, int opcode, void *xd_in, void **xd_out)
+{
+ xlator_t *subvol = NULL;
+ int ret = -1;
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ subvol = glfs_active_subvol(fs);
+ if (!subvol) {
+ ret = -1;
+ errno = EIO;
+ goto out;
+ }
+
+ ret = syncop_ipc(subvol, opcode, (dict_t *)xd_in, (dict_t **)xd_out);
+ DECODE_SYNCOP_ERR(ret);
+
+out:
+ glfs_subvol_done(fs, subvol);
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PRIVATE_DEFAULT(glfs_setfspid, 6.1)
+int
+priv_glfs_setfspid(struct glfs *fs, pid_t pid)
+{
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ cmd_args = &fs->ctx->cmd_args;
+ cmd_args->client_pid = pid;
+ cmd_args->client_pid_set = 1;
+ ret = syncopctx_setfspid(&pid);
+
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_free, 3.7.16)
+void
+pub_glfs_free(void *ptr)
+{
+ GLFS_FREE(ptr);
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_fs, 3.7.16)
+struct glfs *
+pub_glfs_upcall_get_fs(struct glfs_upcall *arg)
+{
+ return arg->fs;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_reason, 3.7.16)
+enum glfs_upcall_reason
+pub_glfs_upcall_get_reason(struct glfs_upcall *arg)
+{
+ return arg->reason;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_get_event, 3.7.16)
+void *
+pub_glfs_upcall_get_event(struct glfs_upcall *arg)
+{
+ return arg->event;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_object, 3.7.16)
+struct glfs_object *
+pub_glfs_upcall_inode_get_object(struct glfs_upcall_inode *arg)
+{
+ return arg->object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_flags, 3.7.16)
+uint64_t
+pub_glfs_upcall_inode_get_flags(struct glfs_upcall_inode *arg)
+{
+ return arg->flags;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_stat, 3.7.16)
+struct stat *
+pub_glfs_upcall_inode_get_stat(struct glfs_upcall_inode *arg)
+{
+ return &arg->buf;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_expire, 3.7.16)
+uint64_t
+pub_glfs_upcall_inode_get_expire(struct glfs_upcall_inode *arg)
+{
+ return arg->expire_time_attr;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_pobject, 3.7.16)
+struct glfs_object *
+pub_glfs_upcall_inode_get_pobject(struct glfs_upcall_inode *arg)
+{
+ return arg->p_object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_pstat, 3.7.16)
+struct stat *
+pub_glfs_upcall_inode_get_pstat(struct glfs_upcall_inode *arg)
+{
+ return &arg->p_buf;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_oldpobject, 3.7.16)
+struct glfs_object *
+pub_glfs_upcall_inode_get_oldpobject(struct glfs_upcall_inode *arg)
+{
+ return arg->oldp_object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_inode_get_oldpstat, 3.7.16)
+struct stat *
+pub_glfs_upcall_inode_get_oldpstat(struct glfs_upcall_inode *arg)
+{
+ return &arg->oldp_buf;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_lease_get_object, 4.1.6)
+struct glfs_object *
+pub_glfs_upcall_lease_get_object(struct glfs_upcall_lease *arg)
+{
+ return arg->object;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_lease_get_lease_type, 4.1.6)
+uint32_t
+pub_glfs_upcall_lease_get_lease_type(struct glfs_upcall_lease *arg)
+{
+ return arg->lease_type;
+}
+
+/* definitions of the GLFS_SYSRQ_* chars are in glfs.h */
+static struct glfs_sysrq_help {
+ char sysrq;
+ char *msg;
+} glfs_sysrq_help[] = {{GLFS_SYSRQ_HELP, "(H)elp"},
+ {GLFS_SYSRQ_STATEDUMP, "(S)tatedump"},
+ {0, NULL}};
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_sysrq, 3.10.0)
+int
+pub_glfs_sysrq(struct glfs *fs, char sysrq)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+ int msg_len;
+ char msg[1024] = {
+ 0,
+ }; /* should not exceed 1024 chars */
+
+ if (!fs || !fs->ctx) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ ctx = fs->ctx;
+
+ switch (sysrq) {
+ case GLFS_SYSRQ_HELP: {
+ struct glfs_sysrq_help *usage = NULL;
+
+ for (usage = glfs_sysrq_help; usage->sysrq; usage++) {
+ msg_len = strlen(msg);
+ snprintf(msg + msg_len, /* append to msg */
+ sizeof(msg) - msg_len - 2,
+ /* - 2 for the " " + terminating \0 */
+ " %s", usage->msg);
+ }
+
+ /* not really an 'error', but make sure it gets logged */
+ gf_log("glfs", GF_LOG_ERROR, "available events: %s", msg);
+
+ break;
+ }
+ case GLFS_SYSRQ_STATEDUMP:
+ gf_proc_dump_info(SIGUSR1, ctx);
+ break;
+ default:
+ gf_smsg("glfs", GF_LOG_ERROR, ENOTSUP, API_MSG_INVALID_SYSRQ,
+ "sysrq=%c", sysrq, NULL);
+ errno = ENOTSUP;
+ ret = -1;
+ }
+out:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_register, 3.13.0)
+int
+pub_glfs_upcall_register(struct glfs *fs, uint32_t event_list,
+ glfs_upcall_cbk cbk, void *data)
+{
+ int ret = 0;
+
+ /* list of supported upcall events */
+ uint32_t up_events = (GLFS_EVENT_INODE_INVALIDATE |
+ GLFS_EVENT_RECALL_LEASE);
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ GF_VALIDATE_OR_GOTO(THIS->name, cbk, out);
+
+ /* Event list should be either GLFS_EVENT_ANY
+ * or list of supported individual events (up_events)
+ */
+ if ((event_list != GLFS_EVENT_ANY) && (event_list & ~up_events)) {
+ errno = EINVAL;
+ ret = -1;
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ARG,
+ "event_list=(0x%08x)", event_list, NULL);
+ goto out;
+ }
+
+ /* in case other thread does unregister */
+ pthread_mutex_lock(&fs->mutex);
+ {
+ if (event_list & GLFS_EVENT_INODE_INVALIDATE) {
+ /* @todo: Check if features.cache-invalidation is
+ * enabled.
+ */
+ fs->upcall_events |= GF_UPCALL_CACHE_INVALIDATION;
+ ret |= GLFS_EVENT_INODE_INVALIDATE;
+ }
+ if (event_list & GLFS_EVENT_RECALL_LEASE) {
+ /* @todo: Check if features.leases is enabled */
+ fs->upcall_events |= GF_UPCALL_RECALL_LEASE;
+ ret |= GLFS_EVENT_RECALL_LEASE;
+ }
+ /* Override cbk function if existing */
+ fs->up_cbk = cbk;
+ fs->up_data = data;
+ fs->cache_upcalls = _gf_true;
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+out:
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_upcall_unregister, 3.13.0)
+int
+pub_glfs_upcall_unregister(struct glfs *fs, uint32_t event_list)
+{
+ int ret = 0;
+ /* list of supported upcall events */
+ uint32_t up_events = (GLFS_EVENT_INODE_INVALIDATE |
+ GLFS_EVENT_RECALL_LEASE);
+
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ /* Event list should be either GLFS_EVENT_ANY
+ * or list of supported individual events (up_events)
+ */
+ if ((event_list != GLFS_EVENT_ANY) && (event_list & ~up_events)) {
+ errno = EINVAL;
+ ret = -1;
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, API_MSG_INVALID_ARG,
+ "event_list=(0x%08x)", event_list, NULL);
+ goto out;
+ }
+
+ pthread_mutex_lock(&fs->mutex);
+ {
+ /* We already checked if event_list contains list of supported
+ * upcall events. No other specific checks needed as of now for
+ * unregister */
+ fs->upcall_events &= ~(event_list);
+ ret |= ((event_list == GLFS_EVENT_ANY) ? up_events : event_list);
+
+ /* If there are no upcall events registered, reset cbk */
+ if (fs->upcall_events == 0) {
+ fs->up_cbk = NULL;
+ fs->up_data = NULL;
+ fs->cache_upcalls = _gf_false;
+ }
+ }
+ pthread_mutex_unlock(&fs->mutex);
+
+out:
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return ret;
+}
+
+GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_set_statedump_path, 7.0)
+int
+pub_glfs_set_statedump_path(struct glfs *fs, const char *path)
+{
+ struct stat st;
+ int ret;
+ DECLARE_OLD_THIS;
+ __GLFS_ENTRY_VALIDATE_FS(fs, invalid_fs);
+
+ if (!path) {
+ gf_log("glfs", GF_LOG_ERROR, "path is NULL");
+ errno = EINVAL;
+ goto err;
+ }
+
+ /* If path is not present OR, if it is directory AND has enough permission
+ * to create files, then proceed */
+ ret = sys_stat(path, &st);
+ if (ret && errno != ENOENT) {
+ gf_log("glfs", GF_LOG_ERROR, "%s: not a valid path (%s)", path,
+ strerror(errno));
+ errno = EINVAL;
+ goto err;
+ }
+
+ if (!ret) {
+ /* file is present, now check other things */
+ if (!S_ISDIR(st.st_mode)) {
+ gf_log("glfs", GF_LOG_ERROR, "%s: path is not directory", path);
+ errno = EINVAL;
+ goto err;
+ }
+ if (sys_access(path, W_OK | X_OK) < 0) {
+ gf_log("glfs", GF_LOG_ERROR,
+ "%s: path doesn't have write permission", path);
+ errno = EPERM;
+ goto err;
+ }
+ }
+
+ /* If set, it needs to be freed, so we don't have leak */
+ GF_FREE(fs->ctx->statedump_path);
+
+ fs->ctx->statedump_path = gf_strdup(path);
+ if (!fs->ctx->statedump_path) {
+ gf_log("glfs", GF_LOG_ERROR,
+ "%s: failed to set statedump path, no memory", path);
+ errno = ENOMEM;
+ goto err;
+ }
+
+ __GLFS_EXIT_FS;
+
+ return 0;
+err:
+ __GLFS_EXIT_FS;
+
+invalid_fs:
+ return -1;
+}
diff --git a/api/src/glfs.h b/api/src/glfs.h
new file mode 100644
index 00000000000..279d11d58ee
--- /dev/null
+++ b/api/src/glfs.h
@@ -0,0 +1,1485 @@
+/*
+ Copyright (c) 2012-2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLFS_H
+#define _GLFS_H
+
+/*
+ Enforce the following flags as libgfapi is built
+ with them, and we want programs linking against them to also
+ be built with these flags. This is necessary as it affects
+ some of the structures defined in libc headers (like struct stat)
+ and those definitions need to be consistently compiled in
+ both the library and the application.
+*/
+
+/* Values for valid flags to be used when using XXXsetattr, to set multiple
+ attribute values passed via the related stat structure.
+ */
+
+#define GFAPI_SET_ATTR_MODE 0x1
+#define GFAPI_SET_ATTR_UID 0x2
+#define GFAPI_SET_ATTR_GID 0x4
+#define GFAPI_SET_ATTR_SIZE 0x8
+#define GFAPI_SET_ATTR_ATIME 0x10
+#define GFAPI_SET_ATTR_MTIME 0x20
+
+#ifndef _FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 64
+#endif
+
+#ifndef __USE_FILE_OFFSET64
+#define __USE_FILE_OFFSET64
+#endif
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#include <sys/cdefs.h>
+#include <dirent.h>
+#include <sys/statvfs.h>
+#include <stdint.h>
+#include <sys/time.h>
+
+/*
+ * For off64_t to be defined, we need both
+ * __USE_LARGEFILE64 to be true and __off64_t_defnined to be
+ * false. But, making __USE_LARGEFILE64 true causes other issues
+ * such as redinition of stat and fstat to stat64 and fstat64
+ * respectively which again causes compilation issues.
+ * Without off64_t being defined, this will not compile as
+ * copy_file_range uses off64_t. Hence define it here. First
+ * check whether __off64_t_defined is true or not. <unistd.h>
+ * sets that flag when it defines off64_t. If __off64_t_defined
+ * is false and __USE_FILE_OFFSET64 is true, then go on to define
+ * off64_t using __off64_t.
+ */
+#ifndef GF_BSD_HOST_OS
+#if defined(__USE_FILE_OFFSET64) && !defined(__off64_t_defined)
+typedef __off64_t off64_t;
+#endif /* defined(__USE_FILE_OFFSET64) && !defined(__off64_t_defined) */
+#else
+#include <stdio.h>
+#ifndef _OFF64_T_DECLARED
+/*
+ * Including <stdio.h> (done above) should actually define
+ * _OFF64_T_DECLARED with off64_t data type being available
+ * for consumption. But, off64_t data type is not recognizable
+ * for FreeBSD versions less than 11. Hence, int64_t is typedefed
+ * to off64_t.
+ */
+#define _OFF64_T_DECLARED
+typedef int64_t off64_t;
+#endif /* _OFF64_T_DECLARED */
+#endif /* GF_BSD_HOST_OS */
+
+#if defined(HAVE_SYS_ACL_H) || (defined(USE_POSIX_ACLS) && USE_POSIX_ACLS)
+#include <sys/acl.h>
+#else
+typedef void *acl_t;
+typedef int acl_type_t;
+#endif
+
+/* Portability non glibc c++ build systems */
+#ifndef __THROW
+#if defined __cplusplus
+#define __THROW throw()
+#else
+#define __THROW
+#endif
+#endif
+
+#ifndef GF_DARWIN_HOST_OS
+#define GFAPI_PUBLIC(sym, ver) /**/
+#define GFAPI_PRIVATE(sym, ver) /**/
+#else
+#define GFAPI_PUBLIC(sym, ver) __asm("_" __STRING(sym) "$GFAPI_" __STRING(ver))
+#define GFAPI_PRIVATE(sym, ver) \
+ __asm("_" __STRING(sym) "$GFAPI_PRIVATE_" __STRING(ver))
+#endif
+
+__BEGIN_DECLS
+
+/* The filesystem object. One object per 'virtual mount' */
+struct glfs;
+typedef struct glfs glfs_t;
+
+/*
+ SYNOPSIS
+
+ glfs_new: Create a new 'virtual mount' object.
+
+ DESCRIPTION
+
+ This is most likely the very first function you will use. This function
+ will create a new glfs_t (virtual mount) object in memory.
+
+ On this newly created glfs_t, you need to be either set a volfile path
+ (glfs_set_volfile) or a volfile server (glfs_set_volfile_server).
+
+ The glfs_t object needs to be initialized with glfs_init() before you
+ can start issuing file operations on it.
+
+ PARAMETERS
+
+ @volname: Name of the volume. This identifies the server-side volume and
+ the fetched volfile (equivalent of --volfile-id command line
+ parameter to glusterfsd). When used with glfs_set_volfile() the
+ @volname has no effect (except for appearing in log messages).
+
+ RETURN VALUES
+
+ NULL : Out of memory condition.
+ Others : Pointer to the newly created glfs_t virtual mount object.
+
+*/
+
+glfs_t *
+glfs_new(const char *volname) __THROW GFAPI_PUBLIC(glfs_new, 3.4.0);
+
+/*
+ SYNOPSIS
+
+ glfs_set_volfile: Specify the path to the volume specification file.
+
+ DESCRIPTION
+
+ If you are using a static volume specification file (without dynamic
+ volume management abilities from the CLI), then specify the path to
+ the volume specification file.
+
+ This is incompatible with glfs_set_volfile_server().
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the volume
+ specification file.
+
+ @volfile: Path to the locally available volume specification file.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int
+glfs_set_volfile(glfs_t *fs, const char *volfile) __THROW
+ GFAPI_PUBLIC(glfs_set_volfile, 3.4.0);
+
+/*
+ SYNOPSIS
+
+ glfs_set_volfile_server: Specify the list of addresses for management server.
+
+ DESCRIPTION
+
+ This function specifies the list of addresses for the management server
+ (glusterd) to connect, and establish the volume configuration. The @volname
+ parameter passed to glfs_new() is the volume which will be virtually
+ mounted as the glfs_t object. All operations performed by the CLI at
+ the management server will automatically be reflected in the 'virtual
+ mount' object as it maintains a connection to glusterd and polls on
+ configuration change notifications.
+
+ This is incompatible with glfs_set_volfile().
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the volume
+ specification file.
+
+ @transport: String specifying the transport used to connect to the
+ management daemon. Specifying NULL will result in the
+ usage of the default (tcp) transport type. Permitted
+ values are "tcp" or "unix".
+
+ @host: String specifying the address where to find the management daemon.
+ Socket path, while using Unix domain socket as transport type.
+ This would either be
+ - FQDN (e.g : "storage01.company.com") or
+ - ASCII (e.g : "192.168.22.1") or
+ - Socket path (e.g : "/var/run/glusterd.socket")
+
+ NOTE: This API is special, multiple calls to this function with different
+ volfile servers, port or transport-type would create a list of volfile
+ servers which would be polled during `volfile_fetch_attempts()`
+
+ @port: The TCP port number where gluster management daemon is listening.
+ Specifying 0 uses the default port number GF_DEFAULT_BASE_PORT.
+ This parameter is unused if you are using a UNIX domain socket.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int
+glfs_set_volfile_server(glfs_t *fs, const char *transport, const char *host,
+ int port) __THROW
+ GFAPI_PUBLIC(glfs_set_volfile_server, 3.4.0);
+
+int
+glfs_unset_volfile_server(glfs_t *fs, const char *transport, const char *host,
+ int port) __THROW
+ GFAPI_PUBLIC(glfs_unset_volfile_server, 3.5.1);
+/*
+ SYNOPSIS
+
+ glfs_set_logging: Specify logging parameters.
+
+ DESCRIPTION
+
+ This function specifies logging parameters for the virtual mount.
+ Default log file is /dev/null.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the logging parameters.
+
+ @logfile: The logfile to be used for logging. Will be created if it does not
+ already exist (provided system permissions allow). If NULL, a new
+ logfile will be created in default log directory associated with
+ the glusterfs installation.
+
+ @loglevel: Numerical value specifying the degree of verbosity. Higher the
+ value, more verbose the logging.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int
+glfs_set_logging(glfs_t *fs, const char *logfile, int loglevel) __THROW
+ GFAPI_PUBLIC(glfs_set_logging, 3.4.0);
+
+/*
+ SYNOPSIS
+
+ glfs_init: Initialize the 'virtual mount'
+
+ DESCRIPTION
+
+ This function initializes the glfs_t object. This consists of many steps:
+ - Spawn a poll-loop thread.
+ - Establish connection to management daemon and receive volume specification.
+ - Construct translator graph and initialize graph.
+ - Wait for initialization (connecting to all bricks) to complete.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be initialized.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+*/
+
+int
+glfs_init(glfs_t *fs) __THROW GFAPI_PUBLIC(glfs_init, 3.4.0);
+
+/*
+ SYNOPSIS
+
+ glfs_fini: Cleanup and destroy the 'virtual mount'
+
+ DESCRIPTION
+
+ This function attempts to gracefully destroy glfs_t object. An attempt is
+ made to wait for all background processing to complete before returning.
+
+ glfs_fini() must be called after all operations on glfs_t is finished.
+
+ IMPORTANT
+
+ IT IS NECESSARY TO CALL glfs_fini() ON ALL THE INITIALIZED glfs_t
+ OBJECTS BEFORE TERMINATING THE PROGRAM. THERE MAY BE CACHED AND
+ UNWRITTEN / INCOMPLETE OPERATIONS STILL IN PROGRESS EVEN THOUGH THE
+ API CALLS HAVE RETURNED. glfs_fini() WILL WAIT FOR BACKGROUND OPERATIONS
+ TO COMPLETE BEFORE RETURNING, THEREBY MAKING IT SAFE FOR THE PROGRAM TO
+ EXIT.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be destroyed.
+
+ RETURN VALUES
+
+ 0 : Success.
+*/
+
+int
+glfs_fini(glfs_t *fs) __THROW GFAPI_PUBLIC(glfs_fini, 3.4.0);
+
+/*
+ SYNOPSIS
+
+ glfs_getvol: Get the volfile associated with a 'virtual mount'
+
+ DESCRIPTION
+
+ Sometimes it's useful e.g. for scripts to see the volfile, so that they
+ can parse it and find subvolumes to do things like split-brain resolution
+ or custom layouts. The API here was specifically intended to make access
+ e.g. from Python as simple as possible.
+
+ Note that the volume must be started (not necessarily mounted) for this
+ to work.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object for which a volfile is desired
+ @buf: Pointer to a place for the volfile length to be stored
+ @len: Length of @buf
+
+ RETURN VALUES
+
+ >0: filled N bytes of buffer
+ 0: no volfile available
+ <0: volfile length exceeds @len by N bytes (@buf unchanged)
+*/
+
+ssize_t
+glfs_get_volfile(glfs_t *fs, void *buf, size_t len) __THROW
+ GFAPI_PUBLIC(glfs_get_volfile, 3.6.0);
+
+/*
+ SYNOPSIS
+
+ glfs_get_volumeid: Copy the Volume UUID stored in the glfs object fs.
+
+ DESCRIPTION
+
+ This function when invoked for the first time sends RPC call to the
+ the management server (glusterd) to fetch volume uuid and stores it
+ in the glusterfs_context linked to the glfs object fs which can be used
+ in the subsequent calls. Later it parses that UUID to convert it from
+ canonical string format into an opaque byte array and copy it into
+ the volid array. In case if either of the input parameters, volid or
+ size, is NULL, number of bytes required to copy the volume UUID is returned.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be used to retrieve and store
+ volume's UUID.
+ @volid: Pointer to a place for the volume UUID to be stored
+ @size: Length of @volid
+
+ RETURN VALUES
+
+ -1 : Failure. @errno will be set with the type of failure.
+ Others : length of the volume UUID stored.
+*/
+
+int
+glfs_get_volumeid(glfs_t *fs, char *volid, size_t size) __THROW
+ GFAPI_PUBLIC(glfs_get_volumeid, 3.5.0);
+
+/*
+ * FILE OPERATION
+ *
+ * What follows are filesystem operations performed on the
+ * 'virtual mount'. The calls here are kept as close to
+ * the POSIX system calls as possible.
+ *
+ * Notes:
+ *
+ * - All paths specified, even if absolute, are relative to the
+ * root of the virtual mount and not the system root (/).
+ *
+ */
+
+/* The file descriptor object. One per open file/directory. */
+
+struct glfs_fd;
+typedef struct glfs_fd glfs_fd_t;
+
+/*
+ * Mask for request/result items in the struct glfs_stat.
+ *
+ * Query request/result mask for glfs_stat() (family of functions) and
+ * struct glfs_stat::glfs_st_mask.
+ *
+ * These bits should be set in the mask argument of glfs_stat() (family of
+ * functions) to request particular items when calling glfs_stat().
+ *
+ * NOTE: Lower order 32 bits are used to reflect statx(2) bits. For Gluster
+ * specific attrs/extensions, use higher order 32 bits.
+ *
+ */
+#define GLFS_STAT_TYPE 0x0000000000000001U /* Want/got stx_mode & S_IFMT */
+#define GLFS_STAT_MODE 0x0000000000000002U /* Want/got stx_mode & ~S_IFMT */
+#define GLFS_STAT_NLINK 0x0000000000000004U /* Want/got stx_nlink */
+#define GLFS_STAT_UID 0x0000000000000008U /* Want/got stx_uid */
+#define GLFS_STAT_GID 0x0000000000000010U /* Want/got stx_gid */
+#define GLFS_STAT_ATIME 0x0000000000000020U /* Want/got stx_atime */
+#define GLFS_STAT_MTIME 0x0000000000000040U /* Want/got stx_mtime */
+#define GLFS_STAT_CTIME 0x0000000000000080U /* Want/got stx_ctime */
+#define GLFS_STAT_INO 0x0000000000000100U /* Want/got stx_ino */
+#define GLFS_STAT_SIZE 0x0000000000000200U /* Want/got stx_size */
+#define GLFS_STAT_BLOCKS 0x0000000000000400U /* Want/got stx_blocks */
+#define GLFS_STAT_BASIC_STATS \
+ 0x00000000000007ffU /* Items in the normal stat struct */
+#define GLFS_STAT_BTIME 0x0000000000000800U /* Want/got stx_btime */
+#define GLFS_STAT_ALL 0x0000000000000fffU /* All currently supported flags */
+#define GLFS_STAT_RESERVED \
+ 0x8000000000000000U /* Reserved to denote future expansion */
+
+/* Macros for checking validity of struct glfs_stat members.*/
+#define GLFS_STAT_TYPE_VALID(stmask) (stmask & GLFS_STAT_TYPE)
+#define GLFS_STAT_MODE_VALID(stmask) (stmask & GLFS_STAT_MODE)
+#define GLFS_STAT_NLINK_VALID(stmask) (stmask & GLFS_STAT_NLINK)
+#define GLFS_STAT_UID_VALID(stmask) (stmask & GLFS_STAT_UID)
+#define GLFS_STAT_GID_VALID(stmask) (stmask & GLFS_STAT_GID)
+#define GLFS_STAT_ATIME_VALID(stmask) (stmask & GLFS_STAT_ATIME)
+#define GLFS_STAT_MTIME_VALID(stmask) (stmask & GLFS_STAT_MTIME)
+#define GLFS_STAT_CTIME_VALID(stmask) (stmask & GLFS_STAT_CTIME)
+#define GLFS_STAT_INO_VALID(stmask) (stmask & GLFS_STAT_INO)
+#define GLFS_STAT_SIZE_VALID(stmask) (stmask & GLFS_STAT_SIZE)
+#define GLFS_STAT_BLOCKS_VALID(stmask) (stmask & GLFS_STAT_BLOCKS)
+#define GLFS_STAT_BTIME_VALID(stmask) (stmask & GLFS_STAT_BTIME)
+#define GLFS_STAT_GFID_VALID(stmask) (stmask & GLFS_STAT_GFID)
+
+/*
+ * Attributes to be found in glfs_st_attributes and masked in
+ * glfs_st_attributes_mask.
+ *
+ * These give information about the features or the state of a file that might
+ * be of use to programs.
+ *
+ * NOTE: Lower order 32 bits are used to reflect statx(2) attribute bits. For
+ * Gluster specific attrs, use higher order 32 bits.
+ *
+ * NOTE: We do not support any file attributes or state as yet!
+ */
+#define GLFS_STAT_ATTR_RESERVED \
+ 0x8000000000000000U /* Reserved to denote future expansion */
+
+/* Extended file attribute structure.
+ *
+ * The caller passes a mask of what they're specifically interested in as a
+ * parameter to glfs_stat(). What glfs_stat() actually got will be indicated
+ * in glfs_st_mask upon return.
+ *
+ * For each bit in the mask argument:
+ *
+ * - if the datum is not supported:
+ *
+ * - the bit will be cleared, and
+ *
+ * - the datum value is undefined
+ *
+ * - otherwise, if explicitly requested:
+ *
+ * - the field will be filled in and the bit will be set;
+ *
+ * - otherwise, if not requested, but available in, it will be filled in
+ * anyway, and the bit will be set upon return;
+ *
+ * - otherwise the field and the bit will be cleared before returning.
+ *
+ */
+
+struct glfs_stat {
+ uint64_t glfs_st_mask; /* What results were written [uncond] */
+ uint64_t glfs_st_attributes; /* Flags conveying information about the file
+ [uncond] */
+ uint64_t glfs_st_attributes_mask; /* Mask to show what's supported in
+ st_attributes [ucond] */
+ struct timespec glfs_st_atime; /* Last access time */
+ struct timespec glfs_st_btime; /* File creation time */
+ struct timespec glfs_st_ctime; /* Last attribute change time */
+ struct timespec glfs_st_mtime; /* Last data modification time */
+ ino_t glfs_st_ino; /* Inode number */
+ off_t glfs_st_size; /* File size */
+ blkcnt_t glfs_st_blocks; /* Number of 512-byte blocks allocated */
+ uint32_t glfs_st_rdev_major; /* Device ID of special file [if bdev/cdev] */
+ uint32_t glfs_st_rdev_minor;
+ uint32_t glfs_st_dev_major; /* ID of device containing file [uncond] */
+ uint32_t glfs_st_dev_minor;
+ blksize_t glfs_st_blksize; /* Preferred general I/O size [uncond] */
+ nlink_t glfs_st_nlink; /* Number of hard links */
+ uid_t glfs_st_uid; /* User ID of owner */
+ gid_t glfs_st_gid; /* Group ID of owner */
+ mode_t glfs_st_mode; /* File mode */
+};
+
+#define GLFS_LEASE_ID_SIZE 16 /* 128bits */
+typedef char glfs_leaseid_t[GLFS_LEASE_ID_SIZE];
+
+/*
+ * PER THREAD IDENTITY MODIFIERS
+ *
+ * The following operations enable to set a per thread identity context
+ * for the glfs APIs to perform operations as. The calls here are kept as close
+ * to POSIX equivalents as possible.
+ *
+ * NOTES:
+ *
+ * - setgroups is a per thread setting, hence this is named as fsgroups to be
+ * close in naming to the fs(u/g)id APIs
+ * - Typical mode of operation is to set the IDs as required, with the
+ * supplementary groups being optionally set, make the glfs call and post the
+ * glfs operation set them back to eu/gid or uid/gid as appropriate to the
+ * caller
+ * - The groups once set, need to be unset by setting the size to 0 (in which
+ * case the list argument is a do not care)
+ * - In case of leases feature enables, setfsleaseid is used to set and reset
+ * leaseid before and after every I/O operation.
+ * - Once a process for a thread of operation choses to set the IDs, all glfs
+ * calls made from that thread would default to the IDs set for the thread.
+ * As a result use these APIs with care and ensure that the set IDs are
+ * reverted to global process defaults as required.
+ *
+ */
+int
+glfs_setfsuid(uid_t fsuid) __THROW GFAPI_PUBLIC(glfs_setfsuid, 3.4.2);
+
+int
+glfs_setfsgid(gid_t fsgid) __THROW GFAPI_PUBLIC(glfs_setfsgid, 3.4.2);
+
+int
+glfs_setfsgroups(size_t size, const gid_t *list) __THROW
+ GFAPI_PUBLIC(glfs_setfsgroups, 3.4.2);
+
+int
+glfs_setfsleaseid(glfs_leaseid_t leaseid) __THROW
+ GFAPI_PUBLIC(glfs_setfsleaseid, 4.0.0);
+
+/*
+ SYNOPSIS
+
+ glfs_open: Open a file.
+
+ DESCRIPTION
+
+ This function opens a file on a virtual mount.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be initialized.
+
+ @path: Path of the file within the virtual mount.
+
+ @flags: Open flags. See open(2). O_CREAT is not supported.
+ Use glfs_creat() for creating files.
+
+ RETURN VALUES
+
+ NULL : Failure. @errno will be set with the type of failure.
+ Others : Pointer to the opened glfs_fd_t.
+
+ */
+
+glfs_fd_t *
+glfs_open(glfs_t *fs, const char *path, int flags) __THROW
+ GFAPI_PUBLIC(glfs_open, 3.4.0);
+
+/*
+ SYNOPSIS
+
+ glfs_creat: Create a file.
+
+ DESCRIPTION
+
+ This function opens a file on a virtual mount.
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be initialized.
+
+ @path: Path of the file within the virtual mount.
+
+ @mode: Permission of the file to be created.
+
+ @flags: Create flags. See open(2). O_EXCL is supported.
+
+ RETURN VALUES
+
+ NULL : Failure. @errno will be set with the type of failure.
+ Others : Pointer to the opened glfs_fd_t.
+
+ */
+
+glfs_fd_t *
+glfs_creat(glfs_t *fs, const char *path, int flags, mode_t mode) __THROW
+ GFAPI_PUBLIC(glfs_creat, 3.4.0);
+
+int
+glfs_close(glfs_fd_t *fd) __THROW GFAPI_PUBLIC(glfs_close, 3.4.0);
+
+glfs_t *
+glfs_from_glfd(glfs_fd_t *fd) __THROW GFAPI_PUBLIC(glfs_from_glfd, 3.4.0);
+
+int
+glfs_set_xlator_option(glfs_t *fs, const char *xlator, const char *key,
+ const char *value) __THROW
+ GFAPI_PUBLIC(glfs_set_xlator_option, 3.4.0);
+
+/*
+
+ glfs_io_cbk
+
+ The following is the function type definition of the callback
+ function pointer which has to be provided by the caller to the
+ *_async() versions of the IO calls.
+
+ The callback function is called on completion of the requested
+ IO, and the appropriate return value is returned in @ret.
+
+ In case of an error in completing the IO, @ret will be -1 and
+ @errno will be set with the appropriate error.
+
+ @ret will be same as the return value of the non _async() variant
+ of the particular call
+
+ @data is the same context pointer provided by the caller at the
+ time of issuing the async IO call. This can be used by the
+ caller to differentiate different instances of the async requests
+ in a common callback function.
+
+ @prestat and @poststat are allocated on the stack, that are auto destroyed
+ post the callback function returns.
+*/
+
+typedef void (*glfs_io_cbk)(glfs_fd_t *fd, ssize_t ret,
+ struct glfs_stat *prestat,
+ struct glfs_stat *poststat, void *data);
+
+// glfs_{read,write}[_async]
+
+ssize_t
+glfs_read(glfs_fd_t *fd, void *buf, size_t count, int flags) __THROW
+ GFAPI_PUBLIC(glfs_read, 3.4.0);
+
+ssize_t
+glfs_write(glfs_fd_t *fd, const void *buf, size_t count, int flags) __THROW
+ GFAPI_PUBLIC(glfs_write, 3.4.0);
+
+int
+glfs_read_async(glfs_fd_t *fd, void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_read_async, 6.0);
+
+int
+glfs_write_async(glfs_fd_t *fd, const void *buf, size_t count, int flags,
+ glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_write_async, 6.0);
+
+// glfs_{read,write}v[_async]
+
+ssize_t
+glfs_readv(glfs_fd_t *fd, const struct iovec *iov, int iovcnt,
+ int flags) __THROW GFAPI_PUBLIC(glfs_readv, 3.4.0);
+
+ssize_t
+glfs_writev(glfs_fd_t *fd, const struct iovec *iov, int iovcnt,
+ int flags) __THROW GFAPI_PUBLIC(glfs_writev, 3.4.0);
+
+int
+glfs_readv_async(glfs_fd_t *fd, const struct iovec *iov, int count, int flags,
+ glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_readv_async, 6.0);
+
+int
+glfs_writev_async(glfs_fd_t *fd, const struct iovec *iov, int count, int flags,
+ glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_writev_async, 6.0);
+
+// glfs_p{read,write}[_async]
+
+ssize_t
+glfs_pread(glfs_fd_t *fd, void *buf, size_t count, off_t offset, int flags,
+ struct glfs_stat *poststat) __THROW GFAPI_PUBLIC(glfs_pread, 6.0);
+
+ssize_t
+glfs_pwrite(glfs_fd_t *fd, const void *buf, size_t count, off_t offset,
+ int flags, struct glfs_stat *prestat,
+ struct glfs_stat *poststat) __THROW GFAPI_PUBLIC(glfs_pwrite, 6.0);
+
+int
+glfs_pread_async(glfs_fd_t *fd, void *buf, size_t count, off_t offset,
+ int flags, glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_pread_async, 6.0);
+
+int
+glfs_pwrite_async(glfs_fd_t *fd, const void *buf, int count, off_t offset,
+ int flags, glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_pwrite_async, 6.0);
+
+// glfs_p{read,write}v[_async]
+
+ssize_t
+glfs_preadv(glfs_fd_t *fd, const struct iovec *iov, int iovcnt, off_t offset,
+ int flags) __THROW GFAPI_PUBLIC(glfs_preadv, 3.4.0);
+
+ssize_t
+glfs_pwritev(glfs_fd_t *fd, const struct iovec *iov, int iovcnt, off_t offset,
+ int flags) __THROW GFAPI_PUBLIC(glfs_pwritev, 3.4.0);
+
+int
+glfs_preadv_async(glfs_fd_t *fd, const struct iovec *iov, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_preadv_async, 6.0);
+
+int
+glfs_pwritev_async(glfs_fd_t *fd, const struct iovec *iov, int count,
+ off_t offset, int flags, glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_pwritev_async, 6.0);
+
+off_t
+glfs_lseek(glfs_fd_t *fd, off_t offset, int whence) __THROW
+ GFAPI_PUBLIC(glfs_lseek, 3.4.0);
+
+ssize_t
+glfs_copy_file_range(struct glfs_fd *glfd_in, off64_t *off_in,
+ struct glfs_fd *glfd_out, off64_t *off_out, size_t len,
+ unsigned int flags, struct glfs_stat *statbuf,
+ struct glfs_stat *prestat,
+ struct glfs_stat *poststat) __THROW
+ GFAPI_PUBLIC(glfs_copy_file_range, 6.0);
+
+int
+glfs_truncate(glfs_t *fs, const char *path, off_t length) __THROW
+ GFAPI_PUBLIC(glfs_truncate, 3.7.15);
+
+int
+glfs_ftruncate(glfs_fd_t *fd, off_t length, struct glfs_stat *prestat,
+ struct glfs_stat *poststat) __THROW
+ GFAPI_PUBLIC(glfs_ftruncate, 6.0);
+
+int
+glfs_ftruncate_async(glfs_fd_t *fd, off_t length, glfs_io_cbk fn,
+ void *data) __THROW
+ GFAPI_PUBLIC(glfs_ftruncate_async, 6.0);
+
+int
+glfs_lstat(glfs_t *fs, const char *path, struct stat *buf) __THROW
+ GFAPI_PUBLIC(glfs_lstat, 3.4.0);
+
+int
+glfs_stat(glfs_t *fs, const char *path, struct stat *buf) __THROW
+ GFAPI_PUBLIC(glfs_stat, 3.4.0);
+
+int
+glfs_fstat(glfs_fd_t *fd, struct stat *buf) __THROW
+ GFAPI_PUBLIC(glfs_fstat, 3.4.0);
+
+int
+glfs_fsync(glfs_fd_t *fd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat) __THROW GFAPI_PUBLIC(glfs_fsync, 6.0);
+
+int
+glfs_fsync_async(glfs_fd_t *fd, glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_fsync_async, 6.0);
+
+int
+glfs_fdatasync(glfs_fd_t *fd, struct glfs_stat *prestat,
+ struct glfs_stat *poststat) __THROW
+ GFAPI_PUBLIC(glfs_fdatasync, 6.0);
+
+int
+glfs_fdatasync_async(glfs_fd_t *fd, glfs_io_cbk fn, void *data) __THROW
+ GFAPI_PUBLIC(glfs_fdatasync_async, 6.0);
+
+int
+glfs_access(glfs_t *fs, const char *path, int mode) __THROW
+ GFAPI_PUBLIC(glfs_access, 3.4.0);
+
+int
+glfs_symlink(glfs_t *fs, const char *oldpath, const char *newpath) __THROW
+ GFAPI_PUBLIC(glfs_symlink, 3.4.0);
+
+int
+glfs_readlink(glfs_t *fs, const char *path, char *buf, size_t bufsiz) __THROW
+ GFAPI_PUBLIC(glfs_readlink, 3.4.0);
+
+int
+glfs_mknod(glfs_t *fs, const char *path, mode_t mode, dev_t dev) __THROW
+ GFAPI_PUBLIC(glfs_mknod, 3.4.0);
+
+int
+glfs_mkdir(glfs_t *fs, const char *path, mode_t mode) __THROW
+ GFAPI_PUBLIC(glfs_mkdir, 3.4.0);
+
+int
+glfs_unlink(glfs_t *fs, const char *path) __THROW
+ GFAPI_PUBLIC(glfs_unlink, 3.4.0);
+
+int
+glfs_rmdir(glfs_t *fs, const char *path) __THROW
+ GFAPI_PUBLIC(glfs_rmdir, 3.4.0);
+
+int
+glfs_rename(glfs_t *fs, const char *oldpath, const char *newpath) __THROW
+ GFAPI_PUBLIC(glfs_rename, 3.4.0);
+
+int
+glfs_link(glfs_t *fs, const char *oldpath, const char *newpath) __THROW
+ GFAPI_PUBLIC(glfs_link, 3.4.0);
+
+glfs_fd_t *
+glfs_opendir(glfs_t *fs, const char *path) __THROW
+ GFAPI_PUBLIC(glfs_opendir, 3.4.0);
+
+/*
+ * @glfs_readdir_r and @glfs_readdirplus_r ARE thread safe AND re-entrant,
+ * but the interface has ambiguity about the size of @dirent to be allocated
+ * before calling the APIs. 512 byte buffer (for @dirent) is sufficient for
+ * all known systems which are tested againt glusterfs/gfapi, but may be
+ * insufficient in the future.
+ */
+
+int
+glfs_readdir_r(glfs_fd_t *fd, struct dirent *dirent,
+ struct dirent **result) __THROW
+ GFAPI_PUBLIC(glfs_readdir_r, 3.4.0);
+
+int
+glfs_readdirplus_r(glfs_fd_t *fd, struct stat *stat, struct dirent *dirent,
+ struct dirent **result) __THROW
+ GFAPI_PUBLIC(glfs_readdirplus_r, 3.4.0);
+
+/*
+ * @glfs_readdir and @glfs_readdirplus are NEITHER thread safe NOR re-entrant
+ * when called on the same directory handle. However they ARE thread safe
+ * AND re-entrant when called on different directory handles (which may be
+ * referring to the same directory too.)
+ */
+
+struct dirent *
+glfs_readdir(glfs_fd_t *fd) __THROW GFAPI_PUBLIC(glfs_readdir, 3.5.0);
+
+struct dirent *
+glfs_readdirplus(glfs_fd_t *fd, struct stat *stat) __THROW
+ GFAPI_PUBLIC(glfs_readdirplus, 3.5.0);
+
+long
+glfs_telldir(glfs_fd_t *fd) __THROW GFAPI_PUBLIC(glfs_telldir, 3.4.0);
+
+void
+glfs_seekdir(glfs_fd_t *fd, long offset) __THROW
+ GFAPI_PUBLIC(glfs_seekdir, 3.4.0);
+
+int
+glfs_closedir(glfs_fd_t *fd) __THROW GFAPI_PUBLIC(glfs_closedir, 3.4.0);
+
+int
+glfs_statvfs(glfs_t *fs, const char *path, struct statvfs *buf) __THROW
+ GFAPI_PUBLIC(glfs_statvfs, 3.4.0);
+
+int
+glfs_chmod(glfs_t *fs, const char *path, mode_t mode) __THROW
+ GFAPI_PUBLIC(glfs_chmod, 3.4.0);
+
+int
+glfs_fchmod(glfs_fd_t *fd, mode_t mode) __THROW
+ GFAPI_PUBLIC(glfs_fchmod, 3.4.0);
+
+int
+glfs_chown(glfs_t *fs, const char *path, uid_t uid, gid_t gid) __THROW
+ GFAPI_PUBLIC(glfs_chown, 3.4.0);
+
+int
+glfs_lchown(glfs_t *fs, const char *path, uid_t uid, gid_t gid) __THROW
+ GFAPI_PUBLIC(glfs_lchown, 3.4.0);
+
+int
+glfs_fchown(glfs_fd_t *fd, uid_t uid, gid_t gid) __THROW
+ GFAPI_PUBLIC(glfs_fchown, 3.4.0);
+
+int
+glfs_utimens(glfs_t *fs, const char *path,
+ const struct timespec times[2]) __THROW
+ GFAPI_PUBLIC(glfs_utimens, 3.4.0);
+
+int
+glfs_lutimens(glfs_t *fs, const char *path,
+ const struct timespec times[2]) __THROW
+ GFAPI_PUBLIC(glfs_lutimens, 3.4.0);
+
+int
+glfs_futimens(glfs_fd_t *fd, const struct timespec times[2]) __THROW
+ GFAPI_PUBLIC(glfs_futimens, 3.4.0);
+
+ssize_t
+glfs_getxattr(glfs_t *fs, const char *path, const char *name, void *value,
+ size_t size) __THROW GFAPI_PUBLIC(glfs_getxattr, 3.4.0);
+
+ssize_t
+glfs_lgetxattr(glfs_t *fs, const char *path, const char *name, void *value,
+ size_t size) __THROW GFAPI_PUBLIC(glfs_lgetxattr, 3.4.0);
+
+ssize_t
+glfs_fgetxattr(glfs_fd_t *fd, const char *name, void *value,
+ size_t size) __THROW GFAPI_PUBLIC(glfs_fgetxattr, 3.4.0);
+
+ssize_t
+glfs_listxattr(glfs_t *fs, const char *path, void *value, size_t size) __THROW
+ GFAPI_PUBLIC(glfs_listxattr, 3.4.0);
+
+ssize_t
+glfs_llistxattr(glfs_t *fs, const char *path, void *value, size_t size) __THROW
+ GFAPI_PUBLIC(glfs_llistxattr, 3.4.0);
+
+ssize_t
+glfs_flistxattr(glfs_fd_t *fd, void *value, size_t size) __THROW
+ GFAPI_PUBLIC(glfs_flistxattr, 3.4.0);
+
+int
+glfs_setxattr(glfs_t *fs, const char *path, const char *name, const void *value,
+ size_t size, int flags) __THROW
+ GFAPI_PUBLIC(glfs_setxattr, 3.4.0);
+
+int
+glfs_lsetxattr(glfs_t *fs, const char *path, const char *name,
+ const void *value, size_t size, int flags) __THROW
+ GFAPI_PUBLIC(glfs_lsetxattr, 3.4.0);
+
+int
+glfs_fsetxattr(glfs_fd_t *fd, const char *name, const void *value, size_t size,
+ int flags) __THROW GFAPI_PUBLIC(glfs_fsetxattr, 3.4.0);
+
+int
+glfs_removexattr(glfs_t *fs, const char *path, const char *name) __THROW
+ GFAPI_PUBLIC(glfs_removexattr, 3.4.0);
+
+int
+glfs_lremovexattr(glfs_t *fs, const char *path, const char *name) __THROW
+ GFAPI_PUBLIC(glfs_lremovexattr, 3.4.0);
+
+int
+glfs_fremovexattr(glfs_fd_t *fd, const char *name) __THROW
+ GFAPI_PUBLIC(glfs_fremovexattr, 3.4.0);
+
+int
+glfs_fallocate(glfs_fd_t *fd, int keep_size, off_t offset, size_t len) __THROW
+ GFAPI_PUBLIC(glfs_fallocate, 3.5.0);
+
+int
+glfs_discard(glfs_fd_t *fd, off_t offset, size_t len) __THROW
+ GFAPI_PUBLIC(glfs_discard, 3.5.0);
+
+int
+glfs_discard_async(glfs_fd_t *fd, off_t length, size_t lent, glfs_io_cbk fn,
+ void *data) __THROW GFAPI_PUBLIC(glfs_discard_async, 6.0);
+
+int
+glfs_zerofill(glfs_fd_t *fd, off_t offset, off_t len) __THROW
+ GFAPI_PUBLIC(glfs_zerofill, 3.5.0);
+
+int
+glfs_zerofill_async(glfs_fd_t *fd, off_t length, off_t len, glfs_io_cbk fn,
+ void *data) __THROW GFAPI_PUBLIC(glfs_zerofill_async, 6.0);
+
+char *
+glfs_getcwd(glfs_t *fs, char *buf, size_t size) __THROW
+ GFAPI_PUBLIC(glfs_getcwd, 3.4.0);
+
+int
+glfs_chdir(glfs_t *fs, const char *path) __THROW
+ GFAPI_PUBLIC(glfs_chdir, 3.4.0);
+
+int
+glfs_fchdir(glfs_fd_t *fd) __THROW GFAPI_PUBLIC(glfs_fchdir, 3.4.0);
+
+char *
+glfs_realpath(glfs_t *fs, const char *path, char *resolved_path) __THROW
+ GFAPI_PUBLIC(glfs_realpath, 3.7.17);
+
+/*
+ * @cmd and @flock are as specified in man fcntl(2).
+ */
+int
+glfs_posix_lock(glfs_fd_t *fd, int cmd, struct flock *flock) __THROW
+ GFAPI_PUBLIC(glfs_posix_lock, 3.4.0);
+
+/*
+ SYNOPSIS
+
+ glfs_file_lock: Request extended byte range lock on a file
+
+ DESCRIPTION
+
+ This function is capable of requesting either advisory or mandatory type
+ byte range locks on a file.
+
+ Note: To set a unique owner key for locks based on a particular file
+ descriptor, make use of glfs_fd_set_lkowner() api to do so before
+ requesting lock via this api. This owner key will be further consumed
+ by other incoming data modifying file operations via the same file
+ descriptor.
+
+ PARAMETERS
+
+ @fd: File descriptor
+
+ @cmd: As specified in man fcntl(2).
+
+ @flock: As specified in man fcntl(2).
+
+ @lk_mode: Required lock type from options available with the
+ enum glfs_lock_mode_t defined below.
+
+ RETURN VALUES
+
+ 0 : Success. Lock has been granted.
+ -1 : Failure. @errno will be set indicating the type of failure.
+
+ */
+
+/* Lock modes used by glfs_file_lock() */
+enum glfs_lock_mode { GLFS_LK_ADVISORY = 0, GLFS_LK_MANDATORY };
+typedef enum glfs_lock_mode glfs_lock_mode_t;
+
+int
+glfs_file_lock(glfs_fd_t *fd, int cmd, struct flock *flock,
+ glfs_lock_mode_t lk_mode) __THROW
+ GFAPI_PUBLIC(glfs_file_lock, 3.13.0);
+
+glfs_fd_t *
+glfs_dup(glfs_fd_t *fd) __THROW GFAPI_PUBLIC(glfs_dup, 3.4.0);
+
+void
+glfs_free(void *ptr) __THROW GFAPI_PUBLIC(glfs_free, 3.7.16);
+
+/*
+ * glfs_sysrq: send a system request to the @fs instance
+ *
+ * Different commands for @sysrq are possible, the defines for these are listed
+ * below the function definition.
+ *
+ * This function always returns success if the @sysrq is recognized. The return
+ * value does not way anythin about the result of the @sysrq execution. Not all
+ * @sysrq command will be able to return a success/failure status.
+ */
+int
+glfs_sysrq(glfs_t *fs, char sysrq) __THROW GFAPI_PUBLIC(glfs_sysrq, 3.10.0);
+
+#define GLFS_SYSRQ_HELP 'h' /* log a message with supported sysrq commands */
+#define GLFS_SYSRQ_STATEDUMP 's' /* create a statedump */
+
+/*
+ * Structure returned as part of xreaddirplus
+ */
+struct glfs_xreaddirp_stat;
+typedef struct glfs_xreaddirp_stat glfs_xreaddirp_stat_t;
+
+/* Request flags to be used in XREADDIRP operation */
+#define GFAPI_XREADDIRP_NULL \
+ 0x00000000 /* by default, no stat will be fetched */
+#define GFAPI_XREADDIRP_STAT 0x00000001 /* Get stat */
+#define GFAPI_XREADDIRP_HANDLE 0x00000002 /* Get object handle */
+
+/*
+ * This stat structure returned gets freed as part of glfs_free(xstat)
+ */
+struct stat *
+glfs_xreaddirplus_get_stat(glfs_xreaddirp_stat_t *xstat) __THROW
+ GFAPI_PUBLIC(glfs_xreaddirplus_get_stat, 3.11.0);
+
+/*
+ * SYNOPSIS
+ *
+ * glfs_xreaddirplus_r: Extended Readirplus operation
+ *
+ * DESCRIPTION
+ *
+ * This API does readdirplus operation, but along with stat it can fetch other
+ * extra information like object handles etc for each of the dirents returned
+ * based on requested flags. On success it returns the set of flags successfully
+ * processed.
+ *
+ * Note that there are chances that some of the requested information may not be
+ * available or returned (for example if reached EOD). Ensure to validate the
+ * returned value to determine what flags have been successfully processed
+ * & set.
+ *
+ * PARAMETERS
+ *
+ * INPUT:
+ * @glfd: GFAPI file descriptor of the directory
+ * @flags: Flags determining xreaddirp_stat requested
+ * Current available values are:
+ * GFAPI_XREADDIRP_NULL
+ * GFAPI_XREADDIRP_STAT
+ * GFAPI_XREADDIRP_HANDLE
+ * @ext: Dirent struture to copy the values to
+ * (though optional recommended to be allocated by application
+ * esp., in multi-threaded environment)
+ *
+ * OUTPUT:
+ * @res: to store the next dirent value. If NULL and return value is '0',
+ * it means it reached end of the directory.
+ * @xstat_p: Pointer to contain all the requested data returned
+ * for that dirent. Application should make use of glfs_free() API
+ * to free this pointer and the variables returned by
+ * glfs_xreaddirplus_get_*() APIs.
+ *
+ * RETURN VALUE:
+ * >=0: SUCCESS (value contains the flags successfully processed)
+ * -1: FAILURE
+ */
+int
+glfs_xreaddirplus_r(glfs_fd_t *glfd, uint32_t flags,
+ glfs_xreaddirp_stat_t **xstat_p, struct dirent *ext,
+ struct dirent **res) __THROW
+ GFAPI_PUBLIC(glfs_xreaddirplus_r, 3.11.0);
+
+#define GFAPI_MAX_LOCK_OWNER_LEN 255
+
+/*
+ *
+ * DESCRIPTION
+ *
+ * This API allows application to set lk_owner on a fd.
+ * A glfd can be associated with only single lk_owner. In case if there
+ * is need to set another lk_owner, applications can make use of
+ * 'glfs_dup' to get duplicate glfd and set new lk_owner on that second
+ * glfd.
+ *
+ * Also its not recommended to override or clear lk_owner value as the
+ * same shall be used to flush any outstanding locks while closing the fd.
+ *
+ * PARAMETERS
+ *
+ * INPUT:
+ * @glfd: GFAPI file descriptor
+ * @len: Size of lk_owner buffer. Max value can be GFAPI_MAX_LOCK_OWNER_LEN
+ * @data: lk_owner data buffer.
+ *
+ * OUTPUT:
+ * 0: SUCCESS
+ * -1: FAILURE
+ */
+int
+glfs_fd_set_lkowner(glfs_fd_t *glfd, void *data, int len) __THROW
+ GFAPI_PUBLIC(glfs_fd_set_lkowner, 3.10.7);
+
+/*
+ * Applications (currently NFS-Ganesha) can make use of this
+ * structure to read upcall notifications sent by server either
+ * by polling or registering a callback function.
+ *
+ * On success, applications need to check for 'reason' to decide
+ * if any upcall event is received.
+ *
+ * Currently supported upcall_events -
+ * GLFS_UPCALL_INODE_INVALIDATE -
+ * 'event_arg' - glfs_upcall_inode
+ *
+ * After processing the event, applications need to free 'event_arg' with
+ * glfs_free().
+ *
+ * Also similar to I/Os, the application should ideally stop polling
+ * or unregister upcall_cbk function before calling glfs_fini(..).
+ * Hence making an assumption that 'fs' & ctx structures cannot be
+ * freed while in this routine.
+ */
+struct glfs_upcall;
+typedef struct glfs_upcall glfs_upcall_t;
+
+glfs_t *
+glfs_upcall_get_fs(glfs_upcall_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_get_fs, 3.7.16);
+
+enum glfs_upcall_reason {
+ GLFS_UPCALL_EVENT_NULL = 0,
+ GLFS_UPCALL_INODE_INVALIDATE, /* invalidate cache entry */
+ GLFS_UPCALL_RECALL_LEASE, /* recall lease */
+};
+typedef enum glfs_upcall_reason glfs_upcall_reason_t;
+
+glfs_upcall_reason_t
+glfs_upcall_get_reason(glfs_upcall_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_get_reason, 3.7.16);
+
+/*
+ * Applications first need to make use of above API i.e,
+ * "glfs_upcall_get_reason" to determine which upcall event it has
+ * received. Post that below API - "glfs_upcall_get_event" should
+ * be used to get corresponding upcall event object.
+ *
+ * Below are the upcall_reason and corresponding upcall_event objects:
+ * ==========================================================
+ * glfs_upcall_reason - event_object
+ * ==========================================================
+ * GLFS_UPCALL_EVENT_NULL - NULL
+ * GLFS_UPCALL_INODE_INVALIDATE - struct glfs_upcall_inode
+ * GLFS_UPCALL_RECALL_LEASE - struct glfs_upcall_lease
+ *
+ * After processing upcall event, glfs_free() should be called on the
+ * glfs_upcall.
+ */
+void *
+glfs_upcall_get_event(glfs_upcall_t *arg) __THROW
+ GFAPI_PUBLIC(glfs_upcall_get_event, 3.7.16);
+
+/*
+ * SYNOPSIS
+ *
+ * glfs_upcall_cbk: Upcall callback definition
+ *
+ * This is function type definition of the callback function pointer
+ * which has to be provided by the caller while registering for any
+ * upcall events.
+ *
+ * This function is called whenever any upcall which the application
+ * has registered for is received from the server.
+ *
+ * @up_arg: Upcall structure whose contents need to be interpreted by
+ * making use of glfs_upcall_* helper routines.
+ *
+ * @data: The same context pointer provided by the caller at the time of
+ * registering of upcall events. This may be used by the caller for any
+ * of its internal use while processing upcalls.
+ */
+typedef void (*glfs_upcall_cbk)(glfs_upcall_t *up_arg, void *data);
+
+/*
+ * List of upcall events supported by gluster/gfapi
+ */
+#define GLFS_EVENT_INODE_INVALIDATE 0x00000001 /* invalidate cache entry */
+#define GLFS_EVENT_RECALL_LEASE 0x00000002 /* Recall lease */
+#define GLFS_EVENT_ANY 0xffffffff /* for all the above events */
+
+/*
+ * SYNOPSIS
+ *
+ * glfs_upcall_register: Register for upcall events
+ *
+ * DESCRIPTION
+ *
+ * This function is used to register for various upcall events application
+ * is interested in and the callback function to be invoked when such
+ * events are triggered.
+ *
+ * Multiple calls of this routine shall override cbk function. That means
+ * only one cbk function can be used for all the upcall events registered
+ * and that shall be the one last updated.
+ *
+ * PARAMETERS:
+ *
+ * INPUT:
+ * @fs: The 'virtual mount' object
+ *
+ * @event_list: List of upcall events to be registered.
+ * Current available values are:
+ * - GLFS_EVENT_INODE_INVALIDATE
+ * - GLFS_EVENT_RECALL_LEASE
+ *
+ * @cbk: The cbk routine to be invoked in case of any upcall received
+ * @data: Any opaque pointer provided by caller which shall be using while
+ * making cbk calls. This pointer may be used by caller for any of its
+ * internal use while processing upcalls. Can be NULL.
+ *
+ * RETURN VALUE:
+ * >0: SUCCESS (value contains the events successfully registered)
+ * -1: FAILURE
+ */
+int
+glfs_upcall_register(glfs_t *fs, uint32_t event_list, glfs_upcall_cbk cbk,
+ void *data) __THROW
+ GFAPI_PUBLIC(glfs_upcall_register, 3.13.0);
+
+/*
+ * SYNOPSIS
+ *
+ * glfs_upcall_unregister: Unregister for upcall events
+ *
+ * DESCRIPTION
+ *
+ * This function is used to unregister the upcall events application
+ * is not interested in. In case if the caller unregisters all the events
+ * it has registered for, it shall no more receive any upcall event.
+ *
+ * PARAMETERS:
+ *
+ * INPUT:
+ * @fs: The 'virtual mount' object
+ *
+ * @event_list: List of upcall events to be unregistered.
+ * Current available values are:
+ * - GLFS_EVENT_INODE_INVALIDATE
+ * - GLFS_EVENT_RECALL_LEASE
+ * RETURN VALUE:
+ * >0: SUCCESS (value contains the events successfully unregistered)
+ * -1: FAILURE
+ */
+int
+glfs_upcall_unregister(glfs_t *fs, uint32_t event_list) __THROW
+ GFAPI_PUBLIC(glfs_upcall_unregister, 3.13.0);
+
+/* Lease Types */
+enum glfs_lease_types {
+ GLFS_LEASE_NONE = 0,
+ GLFS_RD_LEASE = 1,
+ GLFS_RW_LEASE = 2,
+};
+typedef enum glfs_lease_types glfs_lease_types_t;
+
+/* Lease cmds */
+enum glfs_lease_cmds {
+ GLFS_GET_LEASE = 1,
+ GLFS_SET_LEASE = 2,
+ GLFS_UNLK_LEASE = 3,
+};
+typedef enum glfs_lease_cmds glfs_lease_cmds_t;
+
+struct glfs_lease {
+ glfs_lease_cmds_t cmd;
+ glfs_lease_types_t lease_type;
+ glfs_leaseid_t lease_id;
+ unsigned int lease_flags;
+};
+typedef struct glfs_lease glfs_lease_t;
+
+typedef void (*glfs_recall_cbk)(glfs_lease_t lease, void *data);
+
+/*
+ SYNOPSIS
+
+ glfs_lease: Takes a lease on a file.
+
+ DESCRIPTION
+
+ This function takes lease on an open file.
+
+ PARAMETERS
+
+ @glfd: The fd of the file on which lease should be taken,
+ this fd is returned by glfs_open/glfs_create.
+
+ @lease: Struct that defines the lease operation to be performed
+ on the file.
+ @lease.cmd - Can be one of the following values
+ GF_GET_LEASE: Get the lease type currently present on the file,
+ lease.lease_type will contain GF_RD_LEASE
+ or GF_RW_LEASE or 0 if no leases.
+ GF_SET_LEASE: Set the lease of given lease.lease_type on the file.
+ GF_UNLK_LEASE: Unlock the lease present on the given fd.
+ Note that the every lease request should have
+ a corresponding unlk_lease.
+
+ @lease.lease_type - Can be one of the following values
+ GF_RD_LEASE: Read lease on a file, shared lease.
+ GF_RW_LEASE: Read-Write lease on a file, exclusive lease.
+
+ @lease.lease_id - A unique identification of lease, 128bits.
+
+ @fn: This is the function that is invoked when the lease has to be recalled
+ @data: It is a cookie, this pointer is returned as a part of recall
+
+ fn and data field are stored as a part of glfs_fd, hence if there are multiple
+ glfs_lease calls, each of them updates the fn and data fields. glfs_recall_cbk
+ will be invoked with the last updated fn and data
+
+ RETURN VALUES
+ 0: Successful completion
+ <0: Failure. @errno will be set with the type of failure
+*/
+
+int
+glfs_lease(glfs_fd_t *glfd, glfs_lease_t *lease, glfs_recall_cbk fn,
+ void *data) __THROW GFAPI_PUBLIC(glfs_lease, 4.0.0);
+
+/*
+ SYNOPSIS
+
+ glfs_fsetattr: Function to set attributes.
+ glfs_setattr: Function to set attributes
+
+ DESCRIPTION
+
+ The functions are used to set attributes on the file.
+
+ PARAMETERS
+
+ @glfs_fsetattr
+
+ @glfd: The fd of the file for which the attributes are to be set,
+ this fd is returned by glfs_open/glfs_create.
+
+ @glfs_setattr
+
+ @fs: File object.
+
+ @path: The path of the file that is being operated on.
+
+ @follow: Flag used to resolve symlink.
+
+
+ @stat: Struct that has information about the file.
+
+ @valid: This is the mask bit, that accepts GFAPI_SET_ATTR* masks.
+ Refer glfs.h to see the mask definitions.
+
+ Both functions are similar in functionality, just that the
+ func setattr() uses file path whereas the func fsetattr()
+ uses the fd.
+
+ RETURN VALUES
+ 0: Successful completion
+ <0: Failure. @errno will be set with the type of failure
+
+ */
+
+int
+glfs_fsetattr(struct glfs_fd *glfd, struct glfs_stat *stat) __THROW
+ GFAPI_PUBLIC(glfs_fsetattr, 6.0);
+
+int
+glfs_setattr(struct glfs *fs, const char *path, struct glfs_stat *stat,
+ int follow) __THROW GFAPI_PUBLIC(glfs_setattr, 6.0);
+
+/*
+ SYNOPSIS
+
+ glfs_set_statedump_path: Function to set statedump path.
+
+ DESCRIPTION
+
+ This function is used to set statedump directory
+
+ PARAMETERS
+
+ @fs: The 'virtual mount' object to be configured with the volume
+ specification file.
+
+ @path: statedump path. Should be a directory. But the API won't fail if the
+ directory doesn't exist yet, as one may create it later.
+
+ RETURN VALUES
+
+ 0 : Success.
+ -1 : Failure. @errno will be set with the type of failure.
+
+ */
+
+int
+glfs_set_statedump_path(struct glfs *fs, const char *path) __THROW
+ GFAPI_PUBLIC(glfs_set_statedump_path, 7.0);
+
+__END_DECLS
+#endif /* !_GLFS_H */
diff --git a/argp-standalone/Makefile.am b/argp-standalone/Makefile.am
deleted file mode 100644
index 4775d4876aa..00000000000
--- a/argp-standalone/Makefile.am
+++ /dev/null
@@ -1,38 +0,0 @@
-# From glibc
-
-# Copyright (C) 1997, 2003, 2004 Free Software Foundation, Inc.
-# This file is part of the GNU C Library.
-
-# The GNU C Library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Library General Public License as
-# published by the Free Software Foundation; either version 2 of the
-# License, or (at your option) any later version.
-
-# The GNU C Library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Library General Public License for more details.
-
-# You should have received a copy of the GNU Library General Public
-# License along with the GNU C Library; see the file COPYING.LIB. If
-# not, write to the Free Software Foundation, Inc.,
-# 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-AUTOMAKE_OPTIONS = foreign
-SUBDIRS = .
-
-LIBOBJS = @LIBOBJS@
-
-noinst_LIBRARIES = libargp.a
-
-noinst_HEADERS = argp.h argp-fmtstream.h argp-namefrob.h
-
-EXTRA_DIST = mempcpy.c strchrnul.c strndup.c strcasecmp.c vsnprintf.c autogen.sh
-
-# Leaves out argp-fs-xinl.c and argp-xinl.c
-libargp_a_SOURCES = argp-ba.c argp-eexst.c argp-fmtstream.c \
- argp-help.c argp-parse.c argp-pv.c \
- argp-pvh.c
-
-libargp_a_LIBADD = $(LIBOBJS)
-
-
diff --git a/argp-standalone/acinclude.m4 b/argp-standalone/acinclude.m4
deleted file mode 100644
index fb61e957dfa..00000000000
--- a/argp-standalone/acinclude.m4
+++ /dev/null
@@ -1,1084 +0,0 @@
-dnl Try to detect the type of the third arg to getsockname() et al
-AC_DEFUN([LSH_TYPE_SOCKLEN_T],
-[AH_TEMPLATE([socklen_t], [Length type used by getsockopt])
-AC_CACHE_CHECK([for socklen_t in sys/socket.h], ac_cv_type_socklen_t,
-[AC_EGREP_HEADER(socklen_t, sys/socket.h,
- [ac_cv_type_socklen_t=yes], [ac_cv_type_socklen_t=no])])
-if test $ac_cv_type_socklen_t = no; then
- AC_MSG_CHECKING(for AIX)
- AC_EGREP_CPP(yes, [
-#ifdef _AIX
- yes
-#endif
-],[
-AC_MSG_RESULT(yes)
-AC_DEFINE(socklen_t, size_t)
-],[
-AC_MSG_RESULT(no)
-AC_DEFINE(socklen_t, int)
-])
-fi
-])
-
-dnl Choose cc flags for compiling position independent code
-AC_DEFUN([LSH_CCPIC],
-[AC_MSG_CHECKING(CCPIC)
-AC_CACHE_VAL(lsh_cv_sys_ccpic,[
- if test -z "$CCPIC" ; then
- if test "$GCC" = yes ; then
- case `uname -sr` in
- BSD/OS*)
- case `uname -r` in
- 4.*) CCPIC="-fPIC";;
- *) CCPIC="";;
- esac
- ;;
- Darwin*)
- CCPIC="-fPIC"
- ;;
- SunOS\ 5.*)
- # Could also use -fPIC, if there are a large number of symbol reference
- CCPIC="-fPIC"
- ;;
- CYGWIN*)
- CCPIC=""
- ;;
- *)
- CCPIC="-fpic"
- ;;
- esac
- else
- case `uname -sr` in
- Darwin*)
- CCPIC="-fPIC"
- ;;
- IRIX*)
- CCPIC="-share"
- ;;
- hp*|HP*) CCPIC="+z"; ;;
- FreeBSD*) CCPIC="-fpic";;
- SCO_SV*) CCPIC="-KPIC -dy -Bdynamic";;
- UnixWare*|OpenUNIX*) CCPIC="-KPIC -dy -Bdynamic";;
- Solaris*) CCPIC="-KPIC -Bdynamic";;
- Windows_NT*) CCPIC="-shared" ;;
- esac
- fi
- fi
- OLD_CFLAGS="$CFLAGS"
- CFLAGS="$CFLAGS $CCPIC"
- AC_TRY_COMPILE([], [exit(0);],
- lsh_cv_sys_ccpic="$CCPIC", lsh_cv_sys_ccpic='')
- CFLAGS="$OLD_CFLAGS"
-])
-CCPIC="$lsh_cv_sys_ccpic"
-AC_MSG_RESULT($CCPIC)
-AC_SUBST([CCPIC])])
-
-dnl LSH_PATH_ADD(path-id, directory)
-AC_DEFUN([LSH_PATH_ADD],
-[AC_MSG_CHECKING($2)
-ac_exists=no
-if test -d "$2/." ; then
- ac_real_dir=`cd $2 && pwd`
- if test -n "$ac_real_dir" ; then
- ac_exists=yes
- for old in $1_REAL_DIRS ; do
- ac_found=no
- if test x$ac_real_dir = x$old ; then
- ac_found=yes;
- break;
- fi
- done
- if test $ac_found = yes ; then
- AC_MSG_RESULT(already added)
- else
- AC_MSG_RESULT(added)
- # LDFLAGS="$LDFLAGS -L $2"
- $1_REAL_DIRS="$ac_real_dir [$]$1_REAL_DIRS"
- $1_DIRS="$2 [$]$1_DIRS"
- fi
- fi
-fi
-if test $ac_exists = no ; then
- AC_MSG_RESULT(not found)
-fi
-])
-
-dnl LSH_RPATH_ADD(dir)
-AC_DEFUN([LSH_RPATH_ADD], [LSH_PATH_ADD(RPATH_CANDIDATE, $1)])
-
-dnl LSH_RPATH_INIT(candidates)
-AC_DEFUN([LSH_RPATH_INIT],
-[AC_MSG_CHECKING([for -R flag])
-RPATHFLAG=''
-case `uname -sr` in
- OSF1\ V4.*)
- RPATHFLAG="-rpath "
- ;;
- IRIX\ 6.*)
- RPATHFLAG="-rpath "
- ;;
- IRIX\ 5.*)
- RPATHFLAG="-rpath "
- ;;
- SunOS\ 5.*)
- if test "$TCC" = "yes"; then
- # tcc doesn't know about -R
- RPATHFLAG="-Wl,-R,"
- else
- RPATHFLAG=-R
- fi
- ;;
- Linux\ 2.*)
- RPATHFLAG="-Wl,-rpath,"
- ;;
- *)
- :
- ;;
-esac
-
-if test x$RPATHFLAG = x ; then
- AC_MSG_RESULT(none)
-else
- AC_MSG_RESULT([using $RPATHFLAG])
-fi
-
-RPATH_CANDIDATE_REAL_DIRS=''
-RPATH_CANDIDATE_DIRS=''
-
-AC_MSG_RESULT([Searching for libraries])
-
-for d in $1 ; do
- LSH_RPATH_ADD($d)
-done
-])
-
-dnl Try to execute a main program, and if it fails, try adding some
-dnl -R flag.
-dnl LSH_RPATH_FIX
-AC_DEFUN([LSH_RPATH_FIX],
-[if test $cross_compiling = no -a "x$RPATHFLAG" != x ; then
- ac_success=no
- AC_TRY_RUN([int main(int argc, char **argv) { return 0; }],
- ac_success=yes, ac_success=no, :)
-
- if test $ac_success = no ; then
- AC_MSG_CHECKING([Running simple test program failed. Trying -R flags])
-dnl echo RPATH_CANDIDATE_DIRS = $RPATH_CANDIDATE_DIRS
- ac_remaining_dirs=''
- ac_rpath_save_LDFLAGS="$LDFLAGS"
- for d in $RPATH_CANDIDATE_DIRS ; do
- if test $ac_success = yes ; then
- ac_remaining_dirs="$ac_remaining_dirs $d"
- else
- LDFLAGS="$RPATHFLAG$d $LDFLAGS"
-dnl echo LDFLAGS = $LDFLAGS
- AC_TRY_RUN([int main(int argc, char **argv) { return 0; }],
- [ac_success=yes
- ac_rpath_save_LDFLAGS="$LDFLAGS"
- AC_MSG_RESULT([adding $RPATHFLAG$d])
- ],
- [ac_remaining_dirs="$ac_remaining_dirs $d"], :)
- LDFLAGS="$ac_rpath_save_LDFLAGS"
- fi
- done
- RPATH_CANDIDATE_DIRS=$ac_remaining_dirs
- fi
- if test $ac_success = no ; then
- AC_MSG_RESULT(failed)
- fi
-fi
-])
-
-dnl Like AC_CHECK_LIB, but uses $KRB_LIBS rather than $LIBS.
-dnl LSH_CHECK_KRB_LIB(LIBRARY, FUNCTION, [, ACTION-IF-FOUND [,
-dnl ACTION-IF-NOT-FOUND [, OTHER-LIBRARIES]]])
-
-AC_DEFUN([LSH_CHECK_KRB_LIB],
-[AC_CHECK_LIB([$1], [$2],
- ifelse([$3], ,
- [[ac_tr_lib=HAVE_LIB`echo $1 | sed -e 's/[^a-zA-Z0-9_]/_/g' \
- -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/'`
- AC_DEFINE_UNQUOTED($ac_tr_lib)
- KRB_LIBS="-l$1 $KRB_LIBS"
- ]], [$3]),
- ifelse([$4], , , [$4
-])dnl
-, [$5 $KRB_LIBS])
-])
-
-dnl LSH_LIB_ARGP(ACTION-IF-OK, ACTION-IF-BAD)
-AC_DEFUN([LSH_LIB_ARGP],
-[ ac_argp_save_LIBS="$LIBS"
- ac_argp_save_LDFLAGS="$LDFLAGS"
- ac_argp_ok=no
- # First check if we can link with argp.
- AC_SEARCH_LIBS(argp_parse, argp,
- [ LSH_RPATH_FIX
- AC_CACHE_CHECK([for working argp],
- lsh_cv_lib_argp_works,
- [ AC_TRY_RUN(
-[#include <argp.h>
-#include <stdlib.h>
-
-static const struct argp_option
-options[] =
-{
- { NULL, 0, NULL, 0, NULL, 0 }
-};
-
-struct child_state
-{
- int n;
-};
-
-static error_t
-child_parser(int key, char *arg, struct argp_state *state)
-{
- struct child_state *input = (struct child_state *) state->input;
-
- switch(key)
- {
- default:
- return ARGP_ERR_UNKNOWN;
- case ARGP_KEY_END:
- if (!input->n)
- input->n = 1;
- break;
- }
- return 0;
-}
-
-const struct argp child_argp =
-{
- options,
- child_parser,
- NULL, NULL, NULL, NULL, NULL
-};
-
-struct main_state
-{
- struct child_state child;
- int m;
-};
-
-static error_t
-main_parser(int key, char *arg, struct argp_state *state)
-{
- struct main_state *input = (struct main_state *) state->input;
-
- switch(key)
- {
- default:
- return ARGP_ERR_UNKNOWN;
- case ARGP_KEY_INIT:
- state->child_inputs[0] = &input->child;
- break;
- case ARGP_KEY_END:
- if (!input->m)
- input->m = input->child.n;
-
- break;
- }
- return 0;
-}
-
-static const struct argp_child
-main_children[] =
-{
- { &child_argp, 0, "", 0 },
- { NULL, 0, NULL, 0}
-};
-
-static const struct argp
-main_argp =
-{ options, main_parser,
- NULL,
- NULL,
- main_children,
- NULL, NULL
-};
-
-int main(int argc, char **argv)
-{
- struct main_state input = { { 0 }, 0 };
- char *v[2] = { "foo", NULL };
-
- argp_parse(&main_argp, 1, v, 0, NULL, &input);
-
- if ( (input.m == 1) && (input.child.n == 1) )
- return 0;
- else
- return 1;
-}
-], lsh_cv_lib_argp_works=yes,
- lsh_cv_lib_argp_works=no,
- lsh_cv_lib_argp_works=no)])
-
- if test x$lsh_cv_lib_argp_works = xyes ; then
- ac_argp_ok=yes
- else
- # Reset link flags
- LIBS="$ac_argp_save_LIBS"
- LDFLAGS="$ac_argp_save_LDFLAGS"
- fi])
-
- if test x$ac_argp_ok = xyes ; then
- ifelse([$1],, true, [$1])
- else
- ifelse([$2],, true, [$2])
- fi
-])
-
-dnl LSH_GCC_ATTRIBUTES
-dnl Check for gcc's __attribute__ construction
-
-AC_DEFUN([LSH_GCC_ATTRIBUTES],
-[AC_CACHE_CHECK(for __attribute__,
- lsh_cv_c_attribute,
-[ AC_TRY_COMPILE([
-#include <stdlib.h>
-],
-[
-static void foo(void) __attribute__ ((noreturn));
-
-static void __attribute__ ((noreturn))
-foo(void)
-{
- exit(1);
-}
-],
-lsh_cv_c_attribute=yes,
-lsh_cv_c_attribute=no)])
-
-AH_TEMPLATE([HAVE_GCC_ATTRIBUTE], [Define if the compiler understands __attribute__])
-if test "x$lsh_cv_c_attribute" = "xyes"; then
- AC_DEFINE(HAVE_GCC_ATTRIBUTE)
-fi
-
-AH_BOTTOM(
-[#if __GNUC__ || HAVE_GCC_ATTRIBUTE
-# define NORETURN __attribute__ ((__noreturn__))
-# define PRINTF_STYLE(f, a) __attribute__ ((__format__ (__printf__, f, a)))
-# define UNUSED __attribute__ ((__unused__))
-#else
-# define NORETURN
-# define PRINTF_STYLE(f, a)
-# define UNUSED
-#endif
-])])
-
-AC_DEFUN([LSH_GCC_FUNCTION_NAME],
-[# Check for gcc's __FUNCTION__ variable
-AH_TEMPLATE([HAVE_GCC_FUNCTION],
- [Define if the compiler understands __FUNCTION__])
-AH_BOTTOM(
-[#if HAVE_GCC_FUNCTION
-# define FUNCTION_NAME __FUNCTION__
-#else
-# define FUNCTION_NAME "Unknown"
-#endif
-])
-
-AC_CACHE_CHECK(for __FUNCTION__,
- lsh_cv_c_FUNCTION,
- [ AC_TRY_COMPILE(,
- [ #if __GNUC__ == 3
- # error __FUNCTION__ is broken in gcc-3
- #endif
- void foo(void) { char c = __FUNCTION__[0]; } ],
- lsh_cv_c_FUNCTION=yes,
- lsh_cv_c_FUNCTION=no)])
-
-if test "x$lsh_cv_c_FUNCTION" = "xyes"; then
- AC_DEFINE(HAVE_GCC_FUNCTION)
-fi
-])
-
-# Check for alloca, and include the standard blurb in config.h
-AC_DEFUN([LSH_FUNC_ALLOCA],
-[AC_FUNC_ALLOCA
-AC_CHECK_HEADERS([malloc.h])
-AH_BOTTOM(
-[/* AIX requires this to be the first thing in the file. */
-#ifndef __GNUC__
-# if HAVE_ALLOCA_H
-# include <alloca.h>
-# else
-# ifdef _AIX
- #pragma alloca
-# else
-# ifndef alloca /* predefined by HP cc +Olibcalls */
-char *alloca ();
-# endif
-# endif
-# endif
-#else /* defined __GNUC__ */
-# if HAVE_ALLOCA_H
-# include <alloca.h>
-# endif
-#endif
-/* Needed for alloca on windows */
-#if HAVE_MALLOC_H
-# include <malloc.h>
-#endif
-])])
-
-AC_DEFUN([LSH_FUNC_STRERROR],
-[AC_CHECK_FUNCS(strerror)
-AH_BOTTOM(
-[#if HAVE_STRERROR
-#define STRERROR strerror
-#else
-#define STRERROR(x) (sys_errlist[x])
-#endif
-])])
-
-AC_DEFUN([LSH_FUNC_STRSIGNAL],
-[AC_CHECK_FUNCS(strsignal)
-AC_CHECK_DECLS([sys_siglist, _sys_siglist])
-AH_BOTTOM(
-[#if HAVE_STRSIGNAL
-# define STRSIGNAL strsignal
-#else /* !HAVE_STRSIGNAL */
-# if HAVE_DECL_SYS_SIGLIST
-# define STRSIGNAL(x) (sys_siglist[x])
-# else
-# if HAVE_DECL__SYS_SIGLIST
-# define STRSIGNAL(x) (_sys_siglist[x])
-# else
-# define STRSIGNAL(x) "Unknown signal"
-# if __GNUC__
-# warning Using dummy STRSIGNAL
-# endif
-# endif
-# endif
-#endif /* !HAVE_STRSIGNAL */
-])])
-
-dnl LSH_MAKE_CONDITIONAL(symbol, test)
-AC_DEFUN([LSH_MAKE_CONDITIONAL],
-[if $2 ; then
- IF_$1=''
- UNLESS_$1='# '
-else
- IF_$1='# '
- UNLESS_$1=''
-fi
-AC_SUBST(IF_$1)
-AC_SUBST(UNLESS_$1)])
-
-dnl LSH_DEPENDENCY_TRACKING
-
-dnl Defines compiler flags DEP_FLAGS to generate dependency
-dnl information, and DEP_PROCESS that is any shell commands needed for
-dnl massaging the dependency information further. Dependencies are
-dnl generated as a side effect of compilation. Dependency files
-dnl themselves are not treated as targets.
-
-AC_DEFUN([LSH_DEPENDENCY_TRACKING],
-[AC_ARG_ENABLE(dependency_tracking,
- AC_HELP_STRING([--disable-dependency-tracking],
- [Disable dependency tracking. Dependency tracking doesn't work with BSD make]),,
- [enable_dependency_tracking=yes])
-
-DEP_FLAGS=''
-DEP_PROCESS='true'
-if test x$enable_dependency_tracking = xyes ; then
- if test x$GCC = xyes ; then
- gcc_version=`gcc --version | head -1`
- case "$gcc_version" in
- 2.*|*[[!0-9.]]2.*)
- enable_dependency_tracking=no
- AC_MSG_WARN([Dependency tracking disabled, gcc-3.x is needed])
- ;;
- *)
- DEP_FLAGS='-MT $[]@ -MD -MP -MF $[]@.d'
- DEP_PROCESS='true'
- ;;
- esac
- else
- enable_dependency_tracking=no
- AC_MSG_WARN([Dependency tracking disabled])
- fi
-fi
-
-if test x$enable_dependency_tracking = xyes ; then
- DEP_INCLUDE='include '
-else
- DEP_INCLUDE='# '
-fi
-
-AC_SUBST([DEP_INCLUDE])
-AC_SUBST([DEP_FLAGS])
-AC_SUBST([DEP_PROCESS])])
-
-dnl @synopsis AX_CREATE_STDINT_H [( HEADER-TO-GENERATE [, HEADERS-TO-CHECK])]
-dnl
-dnl the "ISO C9X: 7.18 Integer types <stdint.h>" section requires the
-dnl existence of an include file <stdint.h> that defines a set of
-dnl typedefs, especially uint8_t,int32_t,uintptr_t.
-dnl Many older installations will not provide this file, but some will
-dnl have the very same definitions in <inttypes.h>. In other enviroments
-dnl we can use the inet-types in <sys/types.h> which would define the
-dnl typedefs int8_t and u_int8_t respectivly.
-dnl
-dnl This macros will create a local "_stdint.h" or the headerfile given as
-dnl an argument. In many cases that file will just "#include <stdint.h>"
-dnl or "#include <inttypes.h>", while in other environments it will provide
-dnl the set of basic 'stdint's definitions/typedefs:
-dnl int8_t,uint8_t,int16_t,uint16_t,int32_t,uint32_t,intptr_t,uintptr_t
-dnl int_least32_t.. int_fast32_t.. intmax_t
-dnl which may or may not rely on the definitions of other files,
-dnl or using the AC_CHECK_SIZEOF macro to determine the actual
-dnl sizeof each type.
-dnl
-dnl if your header files require the stdint-types you will want to create an
-dnl installable file mylib-int.h that all your other installable header
-dnl may include. So if you have a library package named "mylib", just use
-dnl AX_CREATE_STDINT_H(mylib-int.h)
-dnl in configure.ac and go to install that very header file in Makefile.am
-dnl along with the other headers (mylib.h) - and the mylib-specific headers
-dnl can simply use "#include <mylib-int.h>" to obtain the stdint-types.
-dnl
-dnl Remember, if the system already had a valid <stdint.h>, the generated
-dnl file will include it directly. No need for fuzzy HAVE_STDINT_H things...
-dnl
-dnl @, (status: used on new platforms) (see http://ac-archive.sf.net/gstdint/)
-dnl @version $Id: acinclude.m4,v 1.27 2004/11/23 21:27:35 nisse Exp $
-dnl @author Guido Draheim <guidod@gmx.de>
-
-AC_DEFUN([AX_CREATE_STDINT_H],
-[# ------ AX CREATE STDINT H -------------------------------------
-AC_MSG_CHECKING([for stdint types])
-ac_stdint_h=`echo ifelse($1, , _stdint.h, $1)`
-# try to shortcircuit - if the default include path of the compiler
-# can find a "stdint.h" header then we assume that all compilers can.
-AC_CACHE_VAL([ac_cv_header_stdint_t],[
-old_CXXFLAGS="$CXXFLAGS" ; CXXFLAGS=""
-old_CPPFLAGS="$CPPFLAGS" ; CPPFLAGS=""
-old_CFLAGS="$CFLAGS" ; CFLAGS=""
-AC_TRY_COMPILE([#include <stdint.h>],[int_least32_t v = 0;],
-[ac_cv_stdint_result="(assuming C99 compatible system)"
- ac_cv_header_stdint_t="stdint.h"; ],
-[ac_cv_header_stdint_t=""])
-CXXFLAGS="$old_CXXFLAGS"
-CPPFLAGS="$old_CPPFLAGS"
-CFLAGS="$old_CFLAGS" ])
-
-v="... $ac_cv_header_stdint_h"
-if test "$ac_stdint_h" = "stdint.h" ; then
- AC_MSG_RESULT([(are you sure you want them in ./stdint.h?)])
-elif test "$ac_stdint_h" = "inttypes.h" ; then
- AC_MSG_RESULT([(are you sure you want them in ./inttypes.h?)])
-elif test "_$ac_cv_header_stdint_t" = "_" ; then
- AC_MSG_RESULT([(putting them into $ac_stdint_h)$v])
-else
- ac_cv_header_stdint="$ac_cv_header_stdint_t"
- AC_MSG_RESULT([$ac_cv_header_stdint (shortcircuit)])
-fi
-
-if test "_$ac_cv_header_stdint_t" = "_" ; then # can not shortcircuit..
-
-dnl .....intro message done, now do a few system checks.....
-dnl btw, all CHECK_TYPE macros do automatically "DEFINE" a type, therefore
-dnl we use the autoconf implementation detail _AC CHECK_TYPE_NEW instead
-
-inttype_headers=`echo $2 | sed -e 's/,/ /g'`
-
-ac_cv_stdint_result="(no helpful system typedefs seen)"
-AC_CACHE_CHECK([for stdint uintptr_t], [ac_cv_header_stdint_x],[
- ac_cv_header_stdint_x="" # the 1997 typedefs (inttypes.h)
- AC_MSG_RESULT([(..)])
- for i in stdint.h inttypes.h sys/inttypes.h $inttype_headers ; do
- unset ac_cv_type_uintptr_t
- unset ac_cv_type_uint64_t
- _AC_CHECK_TYPE_NEW(uintptr_t,[ac_cv_header_stdint_x=$i],dnl
- continue,[#include <$i>])
- AC_CHECK_TYPE(uint64_t,[and64="/uint64_t"],[and64=""],[#include<$i>])
- ac_cv_stdint_result="(seen uintptr_t$and64 in $i)"
- break;
- done
- AC_MSG_CHECKING([for stdint uintptr_t])
- ])
-
-if test "_$ac_cv_header_stdint_x" = "_" ; then
-AC_CACHE_CHECK([for stdint uint32_t], [ac_cv_header_stdint_o],[
- ac_cv_header_stdint_o="" # the 1995 typedefs (sys/inttypes.h)
- AC_MSG_RESULT([(..)])
- for i in inttypes.h sys/inttypes.h stdint.h $inttype_headers ; do
- unset ac_cv_type_uint32_t
- unset ac_cv_type_uint64_t
- AC_CHECK_TYPE(uint32_t,[ac_cv_header_stdint_o=$i],dnl
- continue,[#include <$i>])
- AC_CHECK_TYPE(uint64_t,[and64="/uint64_t"],[and64=""],[#include<$i>])
- ac_cv_stdint_result="(seen uint32_t$and64 in $i)"
- break;
- done
- AC_MSG_CHECKING([for stdint uint32_t])
- ])
-fi
-
-if test "_$ac_cv_header_stdint_x" = "_" ; then
-if test "_$ac_cv_header_stdint_o" = "_" ; then
-AC_CACHE_CHECK([for stdint u_int32_t], [ac_cv_header_stdint_u],[
- ac_cv_header_stdint_u="" # the BSD typedefs (sys/types.h)
- AC_MSG_RESULT([(..)])
- for i in sys/types.h inttypes.h sys/inttypes.h $inttype_headers ; do
- unset ac_cv_type_u_int32_t
- unset ac_cv_type_u_int64_t
- AC_CHECK_TYPE(u_int32_t,[ac_cv_header_stdint_u=$i],dnl
- continue,[#include <$i>])
- AC_CHECK_TYPE(u_int64_t,[and64="/u_int64_t"],[and64=""],[#include<$i>])
- ac_cv_stdint_result="(seen u_int32_t$and64 in $i)"
- break;
- done
- AC_MSG_CHECKING([for stdint u_int32_t])
- ])
-fi fi
-
-dnl if there was no good C99 header file, do some typedef checks...
-if test "_$ac_cv_header_stdint_x" = "_" ; then
- AC_MSG_CHECKING([for stdint datatype model])
- AC_MSG_RESULT([(..)])
- AC_CHECK_SIZEOF(char)
- AC_CHECK_SIZEOF(short)
- AC_CHECK_SIZEOF(int)
- AC_CHECK_SIZEOF(long)
- AC_CHECK_SIZEOF(void*)
- ac_cv_stdint_char_model=""
- ac_cv_stdint_char_model="$ac_cv_stdint_char_model$ac_cv_sizeof_char"
- ac_cv_stdint_char_model="$ac_cv_stdint_char_model$ac_cv_sizeof_short"
- ac_cv_stdint_char_model="$ac_cv_stdint_char_model$ac_cv_sizeof_int"
- ac_cv_stdint_long_model=""
- ac_cv_stdint_long_model="$ac_cv_stdint_long_model$ac_cv_sizeof_int"
- ac_cv_stdint_long_model="$ac_cv_stdint_long_model$ac_cv_sizeof_long"
- ac_cv_stdint_long_model="$ac_cv_stdint_long_model$ac_cv_sizeof_voidp"
- name="$ac_cv_stdint_long_model"
- case "$ac_cv_stdint_char_model/$ac_cv_stdint_long_model" in
- 122/242) name="$name, IP16 (standard 16bit machine)" ;;
- 122/244) name="$name, LP32 (standard 32bit mac/win)" ;;
- 122/*) name="$name (unusual int16 model)" ;;
- 124/444) name="$name, ILP32 (standard 32bit unixish)" ;;
- 124/488) name="$name, LP64 (standard 64bit unixish)" ;;
- 124/448) name="$name, LLP64 (unusual 64bit unixish)" ;;
- 124/*) name="$name (unusual int32 model)" ;;
- 128/888) name="$name, ILP64 (unusual 64bit numeric)" ;;
- 128/*) name="$name (unusual int64 model)" ;;
- 222/*|444/*) name="$name (unusual dsptype)" ;;
- *) name="$name (very unusal model)" ;;
- esac
- AC_MSG_RESULT([combined for stdint datatype model... $name])
-fi
-
-if test "_$ac_cv_header_stdint_x" != "_" ; then
- ac_cv_header_stdint="$ac_cv_header_stdint_x"
-elif test "_$ac_cv_header_stdint_o" != "_" ; then
- ac_cv_header_stdint="$ac_cv_header_stdint_o"
-elif test "_$ac_cv_header_stdint_u" != "_" ; then
- ac_cv_header_stdint="$ac_cv_header_stdint_u"
-else
- ac_cv_header_stdint="stddef.h"
-fi
-
-AC_MSG_CHECKING([for extra inttypes in chosen header])
-AC_MSG_RESULT([($ac_cv_header_stdint)])
-dnl see if int_least and int_fast types are present in _this_ header.
-unset ac_cv_type_int_least32_t
-unset ac_cv_type_int_fast32_t
-AC_CHECK_TYPE(int_least32_t,,,[#include <$ac_cv_header_stdint>])
-AC_CHECK_TYPE(int_fast32_t,,,[#include<$ac_cv_header_stdint>])
-AC_CHECK_TYPE(intmax_t,,,[#include <$ac_cv_header_stdint>])
-
-fi # shortcircut to system "stdint.h"
-# ------------------ PREPARE VARIABLES ------------------------------
-if test "$GCC" = "yes" ; then
-ac_cv_stdint_message="using gnu compiler "`$CC --version | head -1`
-else
-ac_cv_stdint_message="using $CC"
-fi
-
-AC_MSG_RESULT([make use of $ac_cv_header_stdint in $ac_stdint_h dnl
-$ac_cv_stdint_result])
-
-# ----------------- DONE inttypes.h checks START header -------------
-AC_CONFIG_COMMANDS([$ac_stdint_h],[
-AC_MSG_NOTICE(creating $ac_stdint_h : $_ac_stdint_h)
-ac_stdint=$tmp/_stdint.h
-
-echo "#ifndef" $_ac_stdint_h >$ac_stdint
-echo "#define" $_ac_stdint_h "1" >>$ac_stdint
-echo "#ifndef" _GENERATED_STDINT_H >>$ac_stdint
-echo "#define" _GENERATED_STDINT_H '"'$PACKAGE $VERSION'"' >>$ac_stdint
-echo "/* generated $ac_cv_stdint_message */" >>$ac_stdint
-if test "_$ac_cv_header_stdint_t" != "_" ; then
-echo "#define _STDINT_HAVE_STDINT_H" "1" >>$ac_stdint
-fi
-
-cat >>$ac_stdint <<STDINT_EOF
-
-/* ................... shortcircuit part ........................... */
-
-#if defined HAVE_STDINT_H || defined _STDINT_HAVE_STDINT_H
-#include <stdint.h>
-#else
-#include <stddef.h>
-
-/* .................... configured part ............................ */
-
-STDINT_EOF
-
-echo "/* whether we have a C99 compatible stdint header file */" >>$ac_stdint
-if test "_$ac_cv_header_stdint_x" != "_" ; then
- ac_header="$ac_cv_header_stdint_x"
- echo "#define _STDINT_HEADER_INTPTR" '"'"$ac_header"'"' >>$ac_stdint
-else
- echo "/* #undef _STDINT_HEADER_INTPTR */" >>$ac_stdint
-fi
-
-echo "/* whether we have a C96 compatible inttypes header file */" >>$ac_stdint
-if test "_$ac_cv_header_stdint_o" != "_" ; then
- ac_header="$ac_cv_header_stdint_o"
- echo "#define _STDINT_HEADER_UINT32" '"'"$ac_header"'"' >>$ac_stdint
-else
- echo "/* #undef _STDINT_HEADER_UINT32 */" >>$ac_stdint
-fi
-
-echo "/* whether we have a BSD compatible inet types header */" >>$ac_stdint
-if test "_$ac_cv_header_stdint_u" != "_" ; then
- ac_header="$ac_cv_header_stdint_u"
- echo "#define _STDINT_HEADER_U_INT32" '"'"$ac_header"'"' >>$ac_stdint
-else
- echo "/* #undef _STDINT_HEADER_U_INT32 */" >>$ac_stdint
-fi
-
-echo "" >>$ac_stdint
-
-if test "_$ac_header" != "_" ; then if test "$ac_header" != "stddef.h" ; then
- echo "#include <$ac_header>" >>$ac_stdint
- echo "" >>$ac_stdint
-fi fi
-
-echo "/* which 64bit typedef has been found */" >>$ac_stdint
-if test "$ac_cv_type_uint64_t" = "yes" ; then
-echo "#define _STDINT_HAVE_UINT64_T" "1" >>$ac_stdint
-else
-echo "/* #undef _STDINT_HAVE_UINT64_T */" >>$ac_stdint
-fi
-if test "$ac_cv_type_u_int64_t" = "yes" ; then
-echo "#define _STDINT_HAVE_U_INT64_T" "1" >>$ac_stdint
-else
-echo "/* #undef _STDINT_HAVE_U_INT64_T */" >>$ac_stdint
-fi
-echo "" >>$ac_stdint
-
-echo "/* which type model has been detected */" >>$ac_stdint
-if test "_$ac_cv_stdint_char_model" != "_" ; then
-echo "#define _STDINT_CHAR_MODEL" "$ac_cv_stdint_char_model" >>$ac_stdint
-echo "#define _STDINT_LONG_MODEL" "$ac_cv_stdint_long_model" >>$ac_stdint
-else
-echo "/* #undef _STDINT_CHAR_MODEL // skipped */" >>$ac_stdint
-echo "/* #undef _STDINT_LONG_MODEL // skipped */" >>$ac_stdint
-fi
-echo "" >>$ac_stdint
-
-echo "/* whether int_least types were detected */" >>$ac_stdint
-if test "$ac_cv_type_int_least32_t" = "yes"; then
-echo "#define _STDINT_HAVE_INT_LEAST32_T" "1" >>$ac_stdint
-else
-echo "/* #undef _STDINT_HAVE_INT_LEAST32_T */" >>$ac_stdint
-fi
-echo "/* whether int_fast types were detected */" >>$ac_stdint
-if test "$ac_cv_type_int_fast32_t" = "yes"; then
-echo "#define _STDINT_HAVE_INT_FAST32_T" "1" >>$ac_stdint
-else
-echo "/* #undef _STDINT_HAVE_INT_FAST32_T */" >>$ac_stdint
-fi
-echo "/* whether intmax_t type was detected */" >>$ac_stdint
-if test "$ac_cv_type_intmax_t" = "yes"; then
-echo "#define _STDINT_HAVE_INTMAX_T" "1" >>$ac_stdint
-else
-echo "/* #undef _STDINT_HAVE_INTMAX_T */" >>$ac_stdint
-fi
-echo "" >>$ac_stdint
-
- cat >>$ac_stdint <<STDINT_EOF
-/* .................... detections part ............................ */
-
-/* whether we need to define bitspecific types from compiler base types */
-#ifndef _STDINT_HEADER_INTPTR
-#ifndef _STDINT_HEADER_UINT32
-#ifndef _STDINT_HEADER_U_INT32
-#define _STDINT_NEED_INT_MODEL_T
-#else
-#define _STDINT_HAVE_U_INT_TYPES
-#endif
-#endif
-#endif
-
-#ifdef _STDINT_HAVE_U_INT_TYPES
-#undef _STDINT_NEED_INT_MODEL_T
-#endif
-
-#ifdef _STDINT_CHAR_MODEL
-#if _STDINT_CHAR_MODEL+0 == 122 || _STDINT_CHAR_MODEL+0 == 124
-#ifndef _STDINT_BYTE_MODEL
-#define _STDINT_BYTE_MODEL 12
-#endif
-#endif
-#endif
-
-#ifndef _STDINT_HAVE_INT_LEAST32_T
-#define _STDINT_NEED_INT_LEAST_T
-#endif
-
-#ifndef _STDINT_HAVE_INT_FAST32_T
-#define _STDINT_NEED_INT_FAST_T
-#endif
-
-#ifndef _STDINT_HEADER_INTPTR
-#define _STDINT_NEED_INTPTR_T
-#ifndef _STDINT_HAVE_INTMAX_T
-#define _STDINT_NEED_INTMAX_T
-#endif
-#endif
-
-
-/* .................... definition part ............................ */
-
-/* some system headers have good uint64_t */
-#ifndef _HAVE_UINT64_T
-#if defined _STDINT_HAVE_UINT64_T || defined HAVE_UINT64_T
-#define _HAVE_UINT64_T
-#elif defined _STDINT_HAVE_U_INT64_T || defined HAVE_U_INT64_T
-#define _HAVE_UINT64_T
-typedef u_int64_t uint64_t;
-#endif
-#endif
-
-#ifndef _HAVE_UINT64_T
-/* .. here are some common heuristics using compiler runtime specifics */
-#if defined __STDC_VERSION__ && defined __STDC_VERSION__ >= 199901L
-#define _HAVE_UINT64_T
-typedef long long int64_t;
-typedef unsigned long long uint64_t;
-
-#elif !defined __STRICT_ANSI__
-#if defined _MSC_VER || defined __WATCOMC__ || defined __BORLANDC__
-#define _HAVE_UINT64_T
-typedef __int64 int64_t;
-typedef unsigned __int64 uint64_t;
-
-#elif defined __GNUC__ || defined __MWERKS__ || defined __ELF__
-/* note: all ELF-systems seem to have loff-support which needs 64-bit */
-#if !defined _NO_LONGLONG
-#define _HAVE_UINT64_T
-typedef long long int64_t;
-typedef unsigned long long uint64_t;
-#endif
-
-#elif defined __alpha || (defined __mips && defined _ABIN32)
-#if !defined _NO_LONGLONG
-typedef long int64_t;
-typedef unsigned long uint64_t;
-#endif
- /* compiler/cpu type to define int64_t */
-#endif
-#endif
-#endif
-
-#if defined _STDINT_HAVE_U_INT_TYPES
-/* int8_t int16_t int32_t defined by inet code, redeclare the u_intXX types */
-typedef u_int8_t uint8_t;
-typedef u_int16_t uint16_t;
-typedef u_int32_t uint32_t;
-
-/* glibc compatibility */
-#ifndef __int8_t_defined
-#define __int8_t_defined
-#endif
-#endif
-
-#ifdef _STDINT_NEED_INT_MODEL_T
-/* we must guess all the basic types. Apart from byte-adressable system, */
-/* there a few 32-bit-only dsp-systems that we guard with BYTE_MODEL 8-} */
-/* (btw, those nibble-addressable systems are way off, or so we assume) */
-
-dnl /* have a look at "64bit and data size neutrality" at */
-dnl /* http://unix.org/version2/whatsnew/login_64bit.html */
-dnl /* (the shorthand "ILP" types always have a "P" part) */
-
-#if defined _STDINT_BYTE_MODEL
-#if _STDINT_LONG_MODEL+0 == 242
-/* 2:4:2 = IP16 = a normal 16-bit system */
-typedef unsigned char uint8_t;
-typedef unsigned short uint16_t;
-typedef unsigned long uint32_t;
-#ifndef __int8_t_defined
-#define __int8_t_defined
-typedef char int8_t;
-typedef short int16_t;
-typedef long int32_t;
-#endif
-#elif _STDINT_LONG_MODEL+0 == 244 || _STDINT_LONG_MODEL == 444
-/* 2:4:4 = LP32 = a 32-bit system derived from a 16-bit */
-/* 4:4:4 = ILP32 = a normal 32-bit system */
-typedef unsigned char uint8_t;
-typedef unsigned short uint16_t;
-typedef unsigned int uint32_t;
-#ifndef __int8_t_defined
-#define __int8_t_defined
-typedef char int8_t;
-typedef short int16_t;
-typedef int int32_t;
-#endif
-#elif _STDINT_LONG_MODEL+0 == 484 || _STDINT_LONG_MODEL+0 == 488
-/* 4:8:4 = IP32 = a 32-bit system prepared for 64-bit */
-/* 4:8:8 = LP64 = a normal 64-bit system */
-typedef unsigned char uint8_t;
-typedef unsigned short uint16_t;
-typedef unsigned int uint32_t;
-#ifndef __int8_t_defined
-#define __int8_t_defined
-typedef char int8_t;
-typedef short int16_t;
-typedef int int32_t;
-#endif
-/* this system has a "long" of 64bit */
-#ifndef _HAVE_UINT64_T
-#define _HAVE_UINT64_T
-typedef unsigned long uint64_t;
-typedef long int64_t;
-#endif
-#elif _STDINT_LONG_MODEL+0 == 448
-/* LLP64 a 64-bit system derived from a 32-bit system */
-typedef unsigned char uint8_t;
-typedef unsigned short uint16_t;
-typedef unsigned int uint32_t;
-#ifndef __int8_t_defined
-#define __int8_t_defined
-typedef char int8_t;
-typedef short int16_t;
-typedef int int32_t;
-#endif
-/* assuming the system has a "long long" */
-#ifndef _HAVE_UINT64_T
-#define _HAVE_UINT64_T
-typedef unsigned long long uint64_t;
-typedef long long int64_t;
-#endif
-#else
-#define _STDINT_NO_INT32_T
-#endif
-#else
-#define _STDINT_NO_INT8_T
-#define _STDINT_NO_INT32_T
-#endif
-#endif
-
-/*
- * quote from SunOS-5.8 sys/inttypes.h:
- * Use at your own risk. As of February 1996, the committee is squarely
- * behind the fixed sized types; the "least" and "fast" types are still being
- * discussed. The probability that the "fast" types may be removed before
- * the standard is finalized is high enough that they are not currently
- * implemented.
- */
-
-#if defined _STDINT_NEED_INT_LEAST_T
-typedef int8_t int_least8_t;
-typedef int16_t int_least16_t;
-typedef int32_t int_least32_t;
-#ifdef _HAVE_UINT64_T
-typedef int64_t int_least64_t;
-#endif
-
-typedef uint8_t uint_least8_t;
-typedef uint16_t uint_least16_t;
-typedef uint32_t uint_least32_t;
-#ifdef _HAVE_UINT64_T
-typedef uint64_t uint_least64_t;
-#endif
- /* least types */
-#endif
-
-#if defined _STDINT_NEED_INT_FAST_T
-typedef int8_t int_fast8_t;
-typedef int int_fast16_t;
-typedef int32_t int_fast32_t;
-#ifdef _HAVE_UINT64_T
-typedef int64_t int_fast64_t;
-#endif
-
-typedef uint8_t uint_fast8_t;
-typedef unsigned uint_fast16_t;
-typedef uint32_t uint_fast32_t;
-#ifdef _HAVE_UINT64_T
-typedef uint64_t uint_fast64_t;
-#endif
- /* fast types */
-#endif
-
-#ifdef _STDINT_NEED_INTMAX_T
-#ifdef _HAVE_UINT64_T
-typedef int64_t intmax_t;
-typedef uint64_t uintmax_t;
-#else
-typedef long intmax_t;
-typedef unsigned long uintmax_t;
-#endif
-#endif
-
-#ifdef _STDINT_NEED_INTPTR_T
-#ifndef __intptr_t_defined
-#define __intptr_t_defined
-/* we encourage using "long" to store pointer values, never use "int" ! */
-#if _STDINT_LONG_MODEL+0 == 242 || _STDINT_LONG_MODEL+0 == 484
-typedef unsinged int uintptr_t;
-typedef int intptr_t;
-#elif _STDINT_LONG_MODEL+0 == 244 || _STDINT_LONG_MODEL+0 == 444
-typedef unsigned long uintptr_t;
-typedef long intptr_t;
-#elif _STDINT_LONG_MODEL+0 == 448 && defined _HAVE_UINT64_T
-typedef uint64_t uintptr_t;
-typedef int64_t intptr_t;
-#else /* matches typical system types ILP32 and LP64 - but not IP16 or LLP64 */
-typedef unsigned long uintptr_t;
-typedef long intptr_t;
-#endif
-#endif
-#endif
-
- /* shortcircuit*/
-#endif
- /* once */
-#endif
-#endif
-STDINT_EOF
- if cmp -s $ac_stdint_h $ac_stdint 2>/dev/null; then
- AC_MSG_NOTICE([$ac_stdint_h is unchanged])
- else
- ac_dir=`AS_DIRNAME(["$ac_stdint_h"])`
- AS_MKDIR_P(["$ac_dir"])
- rm -f $ac_stdint_h
- mv $ac_stdint $ac_stdint_h
- fi
-],[# variables for create stdint.h replacement
-PACKAGE="$PACKAGE"
-VERSION="$VERSION"
-ac_stdint_h="$ac_stdint_h"
-_ac_stdint_h=AS_TR_CPP(_$PACKAGE-$ac_stdint_h)
-ac_cv_stdint_message="$ac_cv_stdint_message"
-ac_cv_header_stdint_t="$ac_cv_header_stdint_t"
-ac_cv_header_stdint_x="$ac_cv_header_stdint_x"
-ac_cv_header_stdint_o="$ac_cv_header_stdint_o"
-ac_cv_header_stdint_u="$ac_cv_header_stdint_u"
-ac_cv_type_uint64_t="$ac_cv_type_uint64_t"
-ac_cv_type_u_int64_t="$ac_cv_type_u_int64_t"
-ac_cv_stdint_char_model="$ac_cv_stdint_char_model"
-ac_cv_stdint_long_model="$ac_cv_stdint_long_model"
-ac_cv_type_int_least32_t="$ac_cv_type_int_least32_t"
-ac_cv_type_int_fast32_t="$ac_cv_type_int_fast32_t"
-ac_cv_type_intmax_t="$ac_cv_type_intmax_t"
-])
-])
diff --git a/argp-standalone/argp-ba.c b/argp-standalone/argp-ba.c
deleted file mode 100644
index 0d3958c1151..00000000000
--- a/argp-standalone/argp-ba.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/* Default definition for ARGP_PROGRAM_BUG_ADDRESS.
- Copyright (C) 1996, 1997, 1999, 2004 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-/* If set by the user program, it should point to string that is the
- bug-reporting address for the program. It will be printed by argp_help if
- the ARGP_HELP_BUG_ADDR flag is set (as it is by various standard help
- messages), embedded in a sentence that says something like `Report bugs to
- ADDR.'. */
-const char *argp_program_bug_address = 0;
diff --git a/argp-standalone/argp-eexst.c b/argp-standalone/argp-eexst.c
deleted file mode 100644
index 46b27847ad4..00000000000
--- a/argp-standalone/argp-eexst.c
+++ /dev/null
@@ -1,36 +0,0 @@
-/* Default definition for ARGP_ERR_EXIT_STATUS
- Copyright (C) 1997 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#if HAVE_SYSEXITS_H
-# include <sysexits.h>
-#else
-# define EX_USAGE 64
-#endif
-
-#include "argp.h"
-
-/* The exit status that argp will use when exiting due to a parsing error.
- If not defined or set by the user program, this defaults to EX_USAGE from
- <sysexits.h>. */
-error_t argp_err_exit_status = EX_USAGE;
diff --git a/argp-standalone/argp-fmtstream.c b/argp-standalone/argp-fmtstream.c
deleted file mode 100644
index 7f792854fc3..00000000000
--- a/argp-standalone/argp-fmtstream.c
+++ /dev/null
@@ -1,475 +0,0 @@
-/* Word-wrapping and line-truncating streams
- Copyright (C) 1997, 1998, 1999, 2001 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-/* This package emulates glibc `line_wrap_stream' semantics for systems that
- don't have that. */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <stdarg.h>
-#include <ctype.h>
-
-#include "argp-fmtstream.h"
-#include "argp-namefrob.h"
-
-#ifndef ARGP_FMTSTREAM_USE_LINEWRAP
-
-#ifndef isblank
-#define isblank(ch) ((ch)==' ' || (ch)=='\t')
-#endif
-
-#if defined _LIBC && defined USE_IN_LIBIO
-# include <libio/libioP.h>
-# define __vsnprintf(s, l, f, a) _IO_vsnprintf (s, l, f, a)
-#endif
-
-#define INIT_BUF_SIZE 200
-#define PRINTF_SIZE_GUESS 150
-
-/* Return an argp_fmtstream that outputs to STREAM, and which prefixes lines
- written on it with LMARGIN spaces and limits them to RMARGIN columns
- total. If WMARGIN >= 0, words that extend past RMARGIN are wrapped by
- replacing the whitespace before them with a newline and WMARGIN spaces.
- Otherwise, chars beyond RMARGIN are simply dropped until a newline.
- Returns NULL if there was an error. */
-argp_fmtstream_t
-__argp_make_fmtstream (FILE *stream,
- size_t lmargin, size_t rmargin, ssize_t wmargin)
-{
- argp_fmtstream_t fs = malloc (sizeof (struct argp_fmtstream));
- if (fs)
- {
- fs->stream = stream;
-
- fs->lmargin = lmargin;
- fs->rmargin = rmargin;
- fs->wmargin = wmargin;
- fs->point_col = 0;
- fs->point_offs = 0;
-
- fs->buf = malloc (INIT_BUF_SIZE);
- if (! fs->buf)
- {
- free (fs);
- fs = 0;
- }
- else
- {
- fs->p = fs->buf;
- fs->end = fs->buf + INIT_BUF_SIZE;
- }
- }
-
- return fs;
-}
-#ifdef weak_alias
-weak_alias (__argp_make_fmtstream, argp_make_fmtstream)
-#endif
-
-/* Flush FS to its stream, and free it (but don't close the stream). */
-void
-__argp_fmtstream_free (argp_fmtstream_t fs)
-{
- __argp_fmtstream_update (fs);
- if (fs->p > fs->buf)
- FWRITE_UNLOCKED (fs->buf, 1, fs->p - fs->buf, fs->stream);
- free (fs->buf);
- free (fs);
-}
-#ifdef weak_alias
-weak_alias (__argp_fmtstream_free, argp_fmtstream_free)
-#endif
-
-/* Process FS's buffer so that line wrapping is done from POINT_OFFS to the
- end of its buffer. This code is mostly from glibc stdio/linewrap.c. */
-void
-__argp_fmtstream_update (argp_fmtstream_t fs)
-{
- char *buf, *nl;
- size_t len;
-
- /* Scan the buffer for newlines. */
- buf = fs->buf + fs->point_offs;
- while (buf < fs->p)
- {
- size_t r;
-
- if (fs->point_col == 0 && fs->lmargin != 0)
- {
- /* We are starting a new line. Print spaces to the left margin. */
- const size_t pad = fs->lmargin;
- if (fs->p + pad < fs->end)
- {
- /* We can fit in them in the buffer by moving the
- buffer text up and filling in the beginning. */
- memmove (buf + pad, buf, fs->p - buf);
- fs->p += pad; /* Compensate for bigger buffer. */
- memset (buf, ' ', pad); /* Fill in the spaces. */
- buf += pad; /* Don't bother searching them. */
- }
- else
- {
- /* No buffer space for spaces. Must flush. */
- size_t i;
- for (i = 0; i < pad; i++)
- PUTC_UNLOCKED (' ', fs->stream);
- }
- fs->point_col = pad;
- }
-
- len = fs->p - buf;
- nl = memchr (buf, '\n', len);
-
- if (fs->point_col < 0)
- fs->point_col = 0;
-
- if (!nl)
- {
- /* The buffer ends in a partial line. */
-
- if (fs->point_col + len < fs->rmargin)
- {
- /* The remaining buffer text is a partial line and fits
- within the maximum line width. Advance point for the
- characters to be written and stop scanning. */
- fs->point_col += len;
- break;
- }
- else
- /* Set the end-of-line pointer for the code below to
- the end of the buffer. */
- nl = fs->p;
- }
- else if (fs->point_col + (nl - buf) < (ssize_t) fs->rmargin)
- {
- /* The buffer contains a full line that fits within the maximum
- line width. Reset point and scan the next line. */
- fs->point_col = 0;
- buf = nl + 1;
- continue;
- }
-
- /* This line is too long. */
- r = fs->rmargin - 1;
-
- if (fs->wmargin < 0)
- {
- /* Truncate the line by overwriting the excess with the
- newline and anything after it in the buffer. */
- if (nl < fs->p)
- {
- memmove (buf + (r - fs->point_col), nl, fs->p - nl);
- fs->p -= buf + (r - fs->point_col) - nl;
- /* Reset point for the next line and start scanning it. */
- fs->point_col = 0;
- buf += r + 1; /* Skip full line plus \n. */
- }
- else
- {
- /* The buffer ends with a partial line that is beyond the
- maximum line width. Advance point for the characters
- written, and discard those past the max from the buffer. */
- fs->point_col += len;
- fs->p -= fs->point_col - r;
- break;
- }
- }
- else
- {
- /* Do word wrap. Go to the column just past the maximum line
- width and scan back for the beginning of the word there.
- Then insert a line break. */
-
- char *p, *nextline;
- int i;
-
- p = buf + (r + 1 - fs->point_col);
- while (p >= buf && !isblank (*p))
- --p;
- nextline = p + 1; /* This will begin the next line. */
-
- if (nextline > buf)
- {
- /* Swallow separating blanks. */
- if (p >= buf)
- do
- --p;
- while (p >= buf && isblank (*p));
- nl = p + 1; /* The newline will replace the first blank. */
- }
- else
- {
- /* A single word that is greater than the maximum line width.
- Oh well. Put it on an overlong line by itself. */
- p = buf + (r + 1 - fs->point_col);
- /* Find the end of the long word. */
- do
- ++p;
- while (p < nl && !isblank (*p));
- if (p == nl)
- {
- /* It already ends a line. No fussing required. */
- fs->point_col = 0;
- buf = nl + 1;
- continue;
- }
- /* We will move the newline to replace the first blank. */
- nl = p;
- /* Swallow separating blanks. */
- do
- ++p;
- while (isblank (*p));
- /* The next line will start here. */
- nextline = p;
- }
-
- /* Note: There are a bunch of tests below for
- NEXTLINE == BUF + LEN + 1; this case is where NL happens to fall
- at the end of the buffer, and NEXTLINE is in fact empty (and so
- we need not be careful to maintain its contents). */
-
- if (nextline == buf + len + 1
- ? fs->end - nl < fs->wmargin + 1
- : nextline - (nl + 1) < fs->wmargin)
- {
- /* The margin needs more blanks than we removed. */
- if (fs->end - fs->p > fs->wmargin + 1)
- /* Make some space for them. */
- {
- size_t mv = fs->p - nextline;
- memmove (nl + 1 + fs->wmargin, nextline, mv);
- nextline = nl + 1 + fs->wmargin;
- len = nextline + mv - buf;
- *nl++ = '\n';
- }
- else
- /* Output the first line so we can use the space. */
- {
- if (nl > fs->buf)
- FWRITE_UNLOCKED (fs->buf, 1, nl - fs->buf, fs->stream);
- PUTC_UNLOCKED ('\n', fs->stream);
- len += buf - fs->buf;
- nl = buf = fs->buf;
- }
- }
- else
- /* We can fit the newline and blanks in before
- the next word. */
- *nl++ = '\n';
-
- if (nextline - nl >= fs->wmargin
- || (nextline == buf + len + 1 && fs->end - nextline >= fs->wmargin))
- /* Add blanks up to the wrap margin column. */
- for (i = 0; i < fs->wmargin; ++i)
- *nl++ = ' ';
- else
- for (i = 0; i < fs->wmargin; ++i)
- PUTC_UNLOCKED (' ', fs->stream);
-
- /* Copy the tail of the original buffer into the current buffer
- position. */
- if (nl < nextline)
- memmove (nl, nextline, buf + len - nextline);
- len -= nextline - buf;
-
- /* Continue the scan on the remaining lines in the buffer. */
- buf = nl;
-
- /* Restore bufp to include all the remaining text. */
- fs->p = nl + len;
-
- /* Reset the counter of what has been output this line. If wmargin
- is 0, we want to avoid the lmargin getting added, so we set
- point_col to a magic value of -1 in that case. */
- fs->point_col = fs->wmargin ? fs->wmargin : -1;
- }
- }
-
- /* Remember that we've scanned as far as the end of the buffer. */
- fs->point_offs = fs->p - fs->buf;
-}
-
-/* Ensure that FS has space for AMOUNT more bytes in its buffer, either by
- growing the buffer, or by flushing it. True is returned iff we succeed. */
-int
-__argp_fmtstream_ensure (struct argp_fmtstream *fs, size_t amount)
-{
- if ((size_t) (fs->end - fs->p) < amount)
- {
- ssize_t wrote;
-
- /* Flush FS's buffer. */
- __argp_fmtstream_update (fs);
-
- wrote = FWRITE_UNLOCKED (fs->buf, 1, fs->p - fs->buf, fs->stream);
- if (wrote == fs->p - fs->buf)
- {
- fs->p = fs->buf;
- fs->point_offs = 0;
- }
- else
- {
- fs->p -= wrote;
- fs->point_offs -= wrote;
- memmove (fs->buf, fs->buf + wrote, fs->p - fs->buf);
- return 0;
- }
-
- if ((size_t) (fs->end - fs->buf) < amount)
- /* Gotta grow the buffer. */
- {
- size_t new_size = fs->end - fs->buf + amount;
- char *new_buf = realloc (fs->buf, new_size);
-
- if (! new_buf)
- {
- __set_errno (ENOMEM);
- return 0;
- }
-
- fs->buf = new_buf;
- fs->end = new_buf + new_size;
- fs->p = fs->buf;
- }
- }
-
- return 1;
-}
-
-ssize_t
-__argp_fmtstream_printf (struct argp_fmtstream *fs, const char *fmt, ...)
-{
- size_t out;
- size_t avail;
- size_t size_guess = PRINTF_SIZE_GUESS; /* How much space to reserve. */
-
- do
- {
- va_list args;
-
- if (! __argp_fmtstream_ensure (fs, size_guess))
- return -1;
-
- va_start (args, fmt);
- avail = fs->end - fs->p;
- out = __vsnprintf (fs->p, avail, fmt, args);
- va_end (args);
- if (out >= avail)
- size_guess = out + 1;
- }
- while (out >= avail);
-
- fs->p += out;
-
- return out;
-}
-#ifdef weak_alias
-weak_alias (__argp_fmtstream_printf, argp_fmtstream_printf)
-#endif
-
-/* Duplicate the inline definitions in argp-fmtstream.h, for compilers
- * that don't do inlining. */
-size_t
-__argp_fmtstream_write (argp_fmtstream_t __fs,
- __const char *__str, size_t __len)
-{
- if (__fs->p + __len <= __fs->end || __argp_fmtstream_ensure (__fs, __len))
- {
- memcpy (__fs->p, __str, __len);
- __fs->p += __len;
- return __len;
- }
- else
- return 0;
-}
-
-int
-__argp_fmtstream_puts (argp_fmtstream_t __fs, __const char *__str)
-{
- size_t __len = strlen (__str);
- if (__len)
- {
- size_t __wrote = __argp_fmtstream_write (__fs, __str, __len);
- return __wrote == __len ? 0 : -1;
- }
- else
- return 0;
-}
-
-int
-__argp_fmtstream_putc (argp_fmtstream_t __fs, int __ch)
-{
- if (__fs->p < __fs->end || __argp_fmtstream_ensure (__fs, 1))
- return *__fs->p++ = __ch;
- else
- return EOF;
-}
-
-/* Set __FS's left margin to __LMARGIN and return the old value. */
-size_t
-__argp_fmtstream_set_lmargin (argp_fmtstream_t __fs, size_t __lmargin)
-{
- size_t __old;
- if ((size_t) (__fs->p - __fs->buf) > __fs->point_offs)
- __argp_fmtstream_update (__fs);
- __old = __fs->lmargin;
- __fs->lmargin = __lmargin;
- return __old;
-}
-
-/* Set __FS's right margin to __RMARGIN and return the old value. */
-size_t
-__argp_fmtstream_set_rmargin (argp_fmtstream_t __fs, size_t __rmargin)
-{
- size_t __old;
- if ((size_t) (__fs->p - __fs->buf) > __fs->point_offs)
- __argp_fmtstream_update (__fs);
- __old = __fs->rmargin;
- __fs->rmargin = __rmargin;
- return __old;
-}
-
-/* Set FS's wrap margin to __WMARGIN and return the old value. */
-size_t
-__argp_fmtstream_set_wmargin (argp_fmtstream_t __fs, size_t __wmargin)
-{
- size_t __old;
- if ((size_t) (__fs->p - __fs->buf) > __fs->point_offs)
- __argp_fmtstream_update (__fs);
- __old = __fs->wmargin;
- __fs->wmargin = __wmargin;
- return __old;
-}
-
-/* Return the column number of the current output point in __FS. */
-size_t
-__argp_fmtstream_point (argp_fmtstream_t __fs)
-{
- if ((size_t) (__fs->p - __fs->buf) > __fs->point_offs)
- __argp_fmtstream_update (__fs);
- return __fs->point_col >= 0 ? __fs->point_col : 0;
-}
-
-#endif /* !ARGP_FMTSTREAM_USE_LINEWRAP */
diff --git a/argp-standalone/argp-fmtstream.h b/argp-standalone/argp-fmtstream.h
deleted file mode 100644
index e797b119ebd..00000000000
--- a/argp-standalone/argp-fmtstream.h
+++ /dev/null
@@ -1,319 +0,0 @@
-/* Word-wrapping and line-truncating streams.
- Copyright (C) 1997, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-/* This package emulates glibc `line_wrap_stream' semantics for systems that
- don't have that. If the system does have it, it is just a wrapper for
- that. This header file is only used internally while compiling argp, and
- shouldn't be installed. */
-
-#ifndef _ARGP_FMTSTREAM_H
-#define _ARGP_FMTSTREAM_H
-
-#include <stdio.h>
-#include <string.h>
-
-#if HAVE_UNISTD_H
-# include <unistd.h>
-#else
-/* This is a kludge to make the code compile on windows. Perhaps it
- would be better to just replace ssize_t with int through out the
- code. */
-# define ssize_t int
-#endif
-
-#if _LIBC || (defined (HAVE_FLOCKFILE) && defined(HAVE_PUTC_UNLOCKED) \
- && defined (HAVE_FPUTS_UNLOCKED) && defined (HAVE_FWRITE_UNLOCKED) )
-/* Use locking funxtions */
-# define FLOCKFILE(f) flockfile(f)
-# define FUNLOCKFILE(f) funlockfile(f)
-# define PUTC_UNLOCKED(c, f) putc_unlocked((c), (f))
-# define FPUTS_UNLOCKED(s, f) fputs_unlocked((s), (f))
-# define FWRITE_UNLOCKED(b, s, n, f) fwrite_unlocked((b), (s), (n), (f))
-#else
-/* Disable stdio locking */
-# define FLOCKFILE(f)
-# define FUNLOCKFILE(f)
-# define PUTC_UNLOCKED(c, f) putc((c), (f))
-# define FPUTS_UNLOCKED(s, f) fputs((s), (f))
-# define FWRITE_UNLOCKED(b, s, n, f) fwrite((b), (s), (n), (f))
-#endif /* No thread safe i/o */
-
-#if (_LIBC - 0 && !defined (USE_IN_LIBIO)) \
- || (defined (__GNU_LIBRARY__) && defined (HAVE_LINEWRAP_H))
-/* line_wrap_stream is available, so use that. */
-#define ARGP_FMTSTREAM_USE_LINEWRAP
-#endif
-
-#ifdef ARGP_FMTSTREAM_USE_LINEWRAP
-/* Just be a simple wrapper for line_wrap_stream; the semantics are
- *slightly* different, as line_wrap_stream doesn't actually make a new
- object, it just modifies the given stream (reversibly) to do
- line-wrapping. Since we control who uses this code, it doesn't matter. */
-
-#include <linewrap.h>
-
-typedef FILE *argp_fmtstream_t;
-
-#define argp_make_fmtstream line_wrap_stream
-#define __argp_make_fmtstream line_wrap_stream
-#define argp_fmtstream_free line_unwrap_stream
-#define __argp_fmtstream_free line_unwrap_stream
-
-#define __argp_fmtstream_putc(fs,ch) putc(ch,fs)
-#define argp_fmtstream_putc(fs,ch) putc(ch,fs)
-#define __argp_fmtstream_puts(fs,str) fputs(str,fs)
-#define argp_fmtstream_puts(fs,str) fputs(str,fs)
-#define __argp_fmtstream_write(fs,str,len) fwrite(str,1,len,fs)
-#define argp_fmtstream_write(fs,str,len) fwrite(str,1,len,fs)
-#define __argp_fmtstream_printf fprintf
-#define argp_fmtstream_printf fprintf
-
-#define __argp_fmtstream_lmargin line_wrap_lmargin
-#define argp_fmtstream_lmargin line_wrap_lmargin
-#define __argp_fmtstream_set_lmargin line_wrap_set_lmargin
-#define argp_fmtstream_set_lmargin line_wrap_set_lmargin
-#define __argp_fmtstream_rmargin line_wrap_rmargin
-#define argp_fmtstream_rmargin line_wrap_rmargin
-#define __argp_fmtstream_set_rmargin line_wrap_set_rmargin
-#define argp_fmtstream_set_rmargin line_wrap_set_rmargin
-#define __argp_fmtstream_wmargin line_wrap_wmargin
-#define argp_fmtstream_wmargin line_wrap_wmargin
-#define __argp_fmtstream_set_wmargin line_wrap_set_wmargin
-#define argp_fmtstream_set_wmargin line_wrap_set_wmargin
-#define __argp_fmtstream_point line_wrap_point
-#define argp_fmtstream_point line_wrap_point
-
-#else /* !ARGP_FMTSTREAM_USE_LINEWRAP */
-/* Guess we have to define our own version. */
-
-#ifndef __const
-#define __const const
-#endif
-
-
-struct argp_fmtstream
-{
- FILE *stream; /* The stream we're outputting to. */
-
- size_t lmargin, rmargin; /* Left and right margins. */
- ssize_t wmargin; /* Margin to wrap to, or -1 to truncate. */
-
- /* Point in buffer to which we've processed for wrapping, but not output. */
- size_t point_offs;
- /* Output column at POINT_OFFS, or -1 meaning 0 but don't add lmargin. */
- ssize_t point_col;
-
- char *buf; /* Output buffer. */
- char *p; /* Current end of text in BUF. */
- char *end; /* Absolute end of BUF. */
-};
-
-typedef struct argp_fmtstream *argp_fmtstream_t;
-
-/* Return an argp_fmtstream that outputs to STREAM, and which prefixes lines
- written on it with LMARGIN spaces and limits them to RMARGIN columns
- total. If WMARGIN >= 0, words that extend past RMARGIN are wrapped by
- replacing the whitespace before them with a newline and WMARGIN spaces.
- Otherwise, chars beyond RMARGIN are simply dropped until a newline.
- Returns NULL if there was an error. */
-extern argp_fmtstream_t __argp_make_fmtstream (FILE *__stream,
- size_t __lmargin,
- size_t __rmargin,
- ssize_t __wmargin);
-extern argp_fmtstream_t argp_make_fmtstream (FILE *__stream,
- size_t __lmargin,
- size_t __rmargin,
- ssize_t __wmargin);
-
-/* Flush __FS to its stream, and free it (but don't close the stream). */
-extern void __argp_fmtstream_free (argp_fmtstream_t __fs);
-extern void argp_fmtstream_free (argp_fmtstream_t __fs);
-
-extern ssize_t __argp_fmtstream_printf (argp_fmtstream_t __fs,
- __const char *__fmt, ...)
- PRINTF_STYLE(2,3);
-extern ssize_t argp_fmtstream_printf (argp_fmtstream_t __fs,
- __const char *__fmt, ...)
- PRINTF_STYLE(2,3);
-
-extern int __argp_fmtstream_putc (argp_fmtstream_t __fs, int __ch);
-extern int argp_fmtstream_putc (argp_fmtstream_t __fs, int __ch);
-
-extern int __argp_fmtstream_puts (argp_fmtstream_t __fs, __const char *__str);
-extern int argp_fmtstream_puts (argp_fmtstream_t __fs, __const char *__str);
-
-extern size_t __argp_fmtstream_write (argp_fmtstream_t __fs,
- __const char *__str, size_t __len);
-extern size_t argp_fmtstream_write (argp_fmtstream_t __fs,
- __const char *__str, size_t __len);
-
-/* Access macros for various bits of state. */
-#define argp_fmtstream_lmargin(__fs) ((__fs)->lmargin)
-#define argp_fmtstream_rmargin(__fs) ((__fs)->rmargin)
-#define argp_fmtstream_wmargin(__fs) ((__fs)->wmargin)
-#define __argp_fmtstream_lmargin argp_fmtstream_lmargin
-#define __argp_fmtstream_rmargin argp_fmtstream_rmargin
-#define __argp_fmtstream_wmargin argp_fmtstream_wmargin
-
-/* Set __FS's left margin to LMARGIN and return the old value. */
-extern size_t argp_fmtstream_set_lmargin (argp_fmtstream_t __fs,
- size_t __lmargin);
-extern size_t __argp_fmtstream_set_lmargin (argp_fmtstream_t __fs,
- size_t __lmargin);
-
-/* Set __FS's right margin to __RMARGIN and return the old value. */
-extern size_t argp_fmtstream_set_rmargin (argp_fmtstream_t __fs,
- size_t __rmargin);
-extern size_t __argp_fmtstream_set_rmargin (argp_fmtstream_t __fs,
- size_t __rmargin);
-
-/* Set __FS's wrap margin to __WMARGIN and return the old value. */
-extern size_t argp_fmtstream_set_wmargin (argp_fmtstream_t __fs,
- size_t __wmargin);
-extern size_t __argp_fmtstream_set_wmargin (argp_fmtstream_t __fs,
- size_t __wmargin);
-
-/* Return the column number of the current output point in __FS. */
-extern size_t argp_fmtstream_point (argp_fmtstream_t __fs);
-extern size_t __argp_fmtstream_point (argp_fmtstream_t __fs);
-
-/* Internal routines. */
-extern void _argp_fmtstream_update (argp_fmtstream_t __fs);
-extern void __argp_fmtstream_update (argp_fmtstream_t __fs);
-extern int _argp_fmtstream_ensure (argp_fmtstream_t __fs, size_t __amount);
-extern int __argp_fmtstream_ensure (argp_fmtstream_t __fs, size_t __amount);
-
-#ifdef __OPTIMIZE__
-/* Inline versions of above routines. */
-
-#if !_LIBC
-#define __argp_fmtstream_putc argp_fmtstream_putc
-#define __argp_fmtstream_puts argp_fmtstream_puts
-#define __argp_fmtstream_write argp_fmtstream_write
-#define __argp_fmtstream_set_lmargin argp_fmtstream_set_lmargin
-#define __argp_fmtstream_set_rmargin argp_fmtstream_set_rmargin
-#define __argp_fmtstream_set_wmargin argp_fmtstream_set_wmargin
-#define __argp_fmtstream_point argp_fmtstream_point
-#define __argp_fmtstream_update _argp_fmtstream_update
-#define __argp_fmtstream_ensure _argp_fmtstream_ensure
-#endif
-
-#ifndef ARGP_FS_EI
-#define ARGP_FS_EI extern inline
-#endif
-
-ARGP_FS_EI size_t
-__argp_fmtstream_write (argp_fmtstream_t __fs,
- __const char *__str, size_t __len)
-{
- if (__fs->p + __len <= __fs->end || __argp_fmtstream_ensure (__fs, __len))
- {
- memcpy (__fs->p, __str, __len);
- __fs->p += __len;
- return __len;
- }
- else
- return 0;
-}
-
-ARGP_FS_EI int
-__argp_fmtstream_puts (argp_fmtstream_t __fs, __const char *__str)
-{
- size_t __len = strlen (__str);
- if (__len)
- {
- size_t __wrote = __argp_fmtstream_write (__fs, __str, __len);
- return __wrote == __len ? 0 : -1;
- }
- else
- return 0;
-}
-
-ARGP_FS_EI int
-__argp_fmtstream_putc (argp_fmtstream_t __fs, int __ch)
-{
- if (__fs->p < __fs->end || __argp_fmtstream_ensure (__fs, 1))
- return *__fs->p++ = __ch;
- else
- return EOF;
-}
-
-/* Set __FS's left margin to __LMARGIN and return the old value. */
-ARGP_FS_EI size_t
-__argp_fmtstream_set_lmargin (argp_fmtstream_t __fs, size_t __lmargin)
-{
- size_t __old;
- if ((size_t) (__fs->p - __fs->buf) > __fs->point_offs)
- __argp_fmtstream_update (__fs);
- __old = __fs->lmargin;
- __fs->lmargin = __lmargin;
- return __old;
-}
-
-/* Set __FS's right margin to __RMARGIN and return the old value. */
-ARGP_FS_EI size_t
-__argp_fmtstream_set_rmargin (argp_fmtstream_t __fs, size_t __rmargin)
-{
- size_t __old;
- if ((size_t) (__fs->p - __fs->buf) > __fs->point_offs)
- __argp_fmtstream_update (__fs);
- __old = __fs->rmargin;
- __fs->rmargin = __rmargin;
- return __old;
-}
-
-/* Set FS's wrap margin to __WMARGIN and return the old value. */
-ARGP_FS_EI size_t
-__argp_fmtstream_set_wmargin (argp_fmtstream_t __fs, size_t __wmargin)
-{
- size_t __old;
- if ((size_t) (__fs->p - __fs->buf) > __fs->point_offs)
- __argp_fmtstream_update (__fs);
- __old = __fs->wmargin;
- __fs->wmargin = __wmargin;
- return __old;
-}
-
-/* Return the column number of the current output point in __FS. */
-ARGP_FS_EI size_t
-__argp_fmtstream_point (argp_fmtstream_t __fs)
-{
- if ((size_t) (__fs->p - __fs->buf) > __fs->point_offs)
- __argp_fmtstream_update (__fs);
- return __fs->point_col >= 0 ? __fs->point_col : 0;
-}
-
-#if !_LIBC
-#undef __argp_fmtstream_putc
-#undef __argp_fmtstream_puts
-#undef __argp_fmtstream_write
-#undef __argp_fmtstream_set_lmargin
-#undef __argp_fmtstream_set_rmargin
-#undef __argp_fmtstream_set_wmargin
-#undef __argp_fmtstream_point
-#undef __argp_fmtstream_update
-#undef __argp_fmtstream_ensure
-#endif
-
-#endif /* __OPTIMIZE__ */
-
-#endif /* ARGP_FMTSTREAM_USE_LINEWRAP */
-
-#endif /* argp-fmtstream.h */
diff --git a/argp-standalone/argp-help.c b/argp-standalone/argp-help.c
deleted file mode 100644
index ced78c4cb26..00000000000
--- a/argp-standalone/argp-help.c
+++ /dev/null
@@ -1,1849 +0,0 @@
-/* Hierarchial argument parsing help output
- Copyright (C) 1995,96,97,98,99,2000, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-#ifndef _GNU_SOURCE
-# define _GNU_SOURCE 1
-#endif
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#if HAVE_ALLOCA_H
-#include <alloca.h>
-#endif
-
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include <stdarg.h>
-#include <ctype.h>
-#if HAVE_MALLOC_H
-/* Needed, for alloca on windows */
-# include <malloc.h>
-#endif
-
-#ifndef _
-/* This is for other GNU distributions with internationalized messages. */
-# if defined HAVE_LIBINTL_H || defined _LIBC
-# include <libintl.h>
-# ifdef _LIBC
-# undef dgettext
-# define dgettext(domain, msgid) __dcgettext (domain, msgid, LC_MESSAGES)
-# endif
-# else
-# define dgettext(domain, msgid) (msgid)
-# endif
-#endif
-
-#include "argp.h"
-#include "argp-fmtstream.h"
-#include "argp-namefrob.h"
-
-
-#ifndef _LIBC
-# ifndef __strchrnul
-# define __strchrnul strchrnul
-# endif
-# ifndef __mempcpy
-# define __mempcpy mempcpy
-# endif
-/* We need to use a different name, as __strndup is likely a macro. */
-# define STRNDUP strndup
-# if HAVE_STRERROR
-# define STRERROR strerror
-# else
-# define STRERROR(x) (sys_errlist[x])
-# endif
-#else /* _LIBC */
-# define FLOCKFILE __flockfile
-# define FUNLOCKFILE __funlockfile
-# define STRNDUP __strndup
-# define STRERROR strerror
-#endif
-
-#if !_LIBC
-# if !HAVE_STRNDUP
-char *strndup (const char *s, size_t size);
-# endif /* !HAVE_STRNDUP */
-
-# if !HAVE_MEMPCPY
-void *mempcpy (void *to, const void *from, size_t size);
-# endif /* !HAVE_MEMPCPY */
-
-# if !HAVE_STRCHRNUL
-char *strchrnul(const char *s, int c);
-# endif /* !HAVE_STRCHRNUL */
-
-# if !HAVE_STRCASECMP
-int strcasecmp(const char *s1, const char *s2);
-#endif
-
-#endif /* !_LIBC */
-
-
-/* User-selectable (using an environment variable) formatting parameters.
-
- These may be specified in an environment variable called `ARGP_HELP_FMT',
- with a contents like: VAR1=VAL1,VAR2=VAL2,BOOLVAR2,no-BOOLVAR2
- Where VALn must be a positive integer. The list of variables is in the
- UPARAM_NAMES vector, below. */
-
-/* Default parameters. */
-#define DUP_ARGS 0 /* True if option argument can be duplicated. */
-#define DUP_ARGS_NOTE 1 /* True to print a note about duplicate args. */
-#define SHORT_OPT_COL 2 /* column in which short options start */
-#define LONG_OPT_COL 6 /* column in which long options start */
-#define DOC_OPT_COL 2 /* column in which doc options start */
-#define OPT_DOC_COL 29 /* column in which option text starts */
-#define HEADER_COL 1 /* column in which group headers are printed */
-#define USAGE_INDENT 12 /* indentation of wrapped usage lines */
-#define RMARGIN 79 /* right margin used for wrapping */
-
-/* User-selectable (using an environment variable) formatting parameters.
- They must all be of type `int' for the parsing code to work. */
-struct uparams
-{
- /* If true, arguments for an option are shown with both short and long
- options, even when a given option has both, e.g. `-x ARG, --longx=ARG'.
- If false, then if an option has both, the argument is only shown with
- the long one, e.g., `-x, --longx=ARG', and a message indicating that
- this really means both is printed below the options. */
- int dup_args;
-
- /* This is true if when DUP_ARGS is false, and some duplicate arguments have
- been suppressed, an explanatory message should be printed. */
- int dup_args_note;
-
- /* Various output columns. */
- int short_opt_col;
- int long_opt_col;
- int doc_opt_col;
- int opt_doc_col;
- int header_col;
- int usage_indent;
- int rmargin;
-
- int valid; /* True when the values in here are valid. */
-};
-
-/* This is a global variable, as user options are only ever read once. */
-static struct uparams uparams = {
- DUP_ARGS, DUP_ARGS_NOTE,
- SHORT_OPT_COL, LONG_OPT_COL, DOC_OPT_COL, OPT_DOC_COL, HEADER_COL,
- USAGE_INDENT, RMARGIN,
- 0
-};
-
-/* A particular uparam, and what the user name is. */
-struct uparam_name
-{
- const char *name; /* User name. */
- int is_bool; /* Whether it's `boolean'. */
- size_t uparams_offs; /* Location of the (int) field in UPARAMS. */
-};
-
-/* The name-field mappings we know about. */
-static const struct uparam_name uparam_names[] =
-{
- { "dup-args", 1, offsetof (struct uparams, dup_args) },
- { "dup-args-note", 1, offsetof (struct uparams, dup_args_note) },
- { "short-opt-col", 0, offsetof (struct uparams, short_opt_col) },
- { "long-opt-col", 0, offsetof (struct uparams, long_opt_col) },
- { "doc-opt-col", 0, offsetof (struct uparams, doc_opt_col) },
- { "opt-doc-col", 0, offsetof (struct uparams, opt_doc_col) },
- { "header-col", 0, offsetof (struct uparams, header_col) },
- { "usage-indent", 0, offsetof (struct uparams, usage_indent) },
- { "rmargin", 0, offsetof (struct uparams, rmargin) },
- { 0, 0, 0 }
-};
-
-/* Read user options from the environment, and fill in UPARAMS appropiately. */
-static void
-fill_in_uparams (const struct argp_state *state)
-{
-
- const char *var = getenv ("ARGP_HELP_FMT");
-
-#define SKIPWS(p) do { while (isspace (*p)) p++; } while (0);
-
- if (var)
- /* Parse var. */
- while (*var)
- {
- SKIPWS (var);
-
- if (isalpha (*var))
- {
- size_t var_len;
- const struct uparam_name *un;
- int unspec = 0, val = 0;
- const char *arg = var;
-
- while (isalnum (*arg) || *arg == '-' || *arg == '_')
- arg++;
- var_len = arg - var;
-
- SKIPWS (arg);
-
- if (*arg == '\0' || *arg == ',')
- unspec = 1;
- else if (*arg == '=')
- {
- arg++;
- SKIPWS (arg);
- }
-
- if (unspec)
- {
- if (var[0] == 'n' && var[1] == 'o' && var[2] == '-')
- {
- val = 0;
- var += 3;
- var_len -= 3;
- }
- else
- val = 1;
- }
- else if (isdigit (*arg))
- {
- val = atoi (arg);
- while (isdigit (*arg))
- arg++;
- SKIPWS (arg);
- }
-
- for (un = uparam_names; un->name; un++)
- if (strlen (un->name) == var_len
- && strncmp (var, un->name, var_len) == 0)
- {
- if (unspec && !un->is_bool)
- __argp_failure (state, 0, 0,
- dgettext (state->root_argp->argp_domain, "\
-%.*s: ARGP_HELP_FMT parameter requires a value"),
- (int) var_len, var);
- else
- *(int *)((char *)&uparams + un->uparams_offs) = val;
- break;
- }
- if (! un->name)
- __argp_failure (state, 0, 0,
- dgettext (state->root_argp->argp_domain, "\
-%.*s: Unknown ARGP_HELP_FMT parameter"),
- (int) var_len, var);
-
- var = arg;
- if (*var == ',')
- var++;
- }
- else if (*var)
- {
- __argp_failure (state, 0, 0,
- dgettext (state->root_argp->argp_domain,
- "Garbage in ARGP_HELP_FMT: %s"), var);
- break;
- }
- }
-}
-
-/* Returns true if OPT hasn't been marked invisible. Visibility only affects
- whether OPT is displayed or used in sorting, not option shadowing. */
-#define ovisible(opt) (! ((opt)->flags & OPTION_HIDDEN))
-
-/* Returns true if OPT is an alias for an earlier option. */
-#define oalias(opt) ((opt)->flags & OPTION_ALIAS)
-
-/* Returns true if OPT is an documentation-only entry. */
-#define odoc(opt) ((opt)->flags & OPTION_DOC)
-
-/* Returns true if OPT is the end-of-list marker for a list of options. */
-#define oend(opt) __option_is_end (opt)
-
-/* Returns true if OPT has a short option. */
-#define oshort(opt) __option_is_short (opt)
-
-/*
- The help format for a particular option is like:
-
- -xARG, -yARG, --long1=ARG, --long2=ARG Documentation...
-
- Where ARG will be omitted if there's no argument, for this option, or
- will be surrounded by "[" and "]" appropiately if the argument is
- optional. The documentation string is word-wrapped appropiately, and if
- the list of options is long enough, it will be started on a separate line.
- If there are no short options for a given option, the first long option is
- indented slighly in a way that's supposed to make most long options appear
- to be in a separate column.
-
- For example, the following output (from ps):
-
- -p PID, --pid=PID List the process PID
- --pgrp=PGRP List processes in the process group PGRP
- -P, -x, --no-parent Include processes without parents
- -Q, --all-fields Don't elide unusable fields (normally if there's
- some reason ps can't print a field for any
- process, it's removed from the output entirely)
- -r, --reverse, --gratuitously-long-reverse-option
- Reverse the order of any sort
- --session[=SID] Add the processes from the session SID (which
- defaults to the sid of the current process)
-
- Here are some more options:
- -f ZOT, --foonly=ZOT Glork a foonly
- -z, --zaza Snit a zar
-
- -?, --help Give this help list
- --usage Give a short usage message
- -V, --version Print program version
-
- The struct argp_option array for the above could look like:
-
- {
- {"pid", 'p', "PID", 0, "List the process PID"},
- {"pgrp", OPT_PGRP, "PGRP", 0, "List processes in the process group PGRP"},
- {"no-parent", 'P', 0, 0, "Include processes without parents"},
- {0, 'x', 0, OPTION_ALIAS},
- {"all-fields",'Q', 0, 0, "Don't elide unusable fields (normally"
- " if there's some reason ps can't"
- " print a field for any process, it's"
- " removed from the output entirely)" },
- {"reverse", 'r', 0, 0, "Reverse the order of any sort"},
- {"gratuitously-long-reverse-option", 0, 0, OPTION_ALIAS},
- {"session", OPT_SESS, "SID", OPTION_ARG_OPTIONAL,
- "Add the processes from the session"
- " SID (which defaults to the sid of"
- " the current process)" },
-
- {0,0,0,0, "Here are some more options:"},
- {"foonly", 'f', "ZOT", 0, "Glork a foonly"},
- {"zaza", 'z', 0, 0, "Snit a zar"},
-
- {0}
- }
-
- Note that the last three options are automatically supplied by argp_parse,
- unless you tell it not to with ARGP_NO_HELP.
-
-*/
-
-/* Returns true if CH occurs between BEG and END. */
-static int
-find_char (char ch, char *beg, char *end)
-{
- while (beg < end)
- if (*beg == ch)
- return 1;
- else
- beg++;
- return 0;
-}
-
-struct hol_cluster; /* fwd decl */
-
-struct hol_entry
-{
- /* First option. */
- const struct argp_option *opt;
- /* Number of options (including aliases). */
- unsigned num;
-
- /* A pointers into the HOL's short_options field, to the first short option
- letter for this entry. The order of the characters following this point
- corresponds to the order of options pointed to by OPT, and there are at
- most NUM. A short option recorded in a option following OPT is only
- valid if it occurs in the right place in SHORT_OPTIONS (otherwise it's
- probably been shadowed by some other entry). */
- char *short_options;
-
- /* Entries are sorted by their group first, in the order:
- 1, 2, ..., n, 0, -m, ..., -2, -1
- and then alphabetically within each group. The default is 0. */
- int group;
-
- /* The cluster of options this entry belongs to, or 0 if none. */
- struct hol_cluster *cluster;
-
- /* The argp from which this option came. */
- const struct argp *argp;
-};
-
-/* A cluster of entries to reflect the argp tree structure. */
-struct hol_cluster
-{
- /* A descriptive header printed before options in this cluster. */
- const char *header;
-
- /* Used to order clusters within the same group with the same parent,
- according to the order in which they occurred in the parent argp's child
- list. */
- int index;
-
- /* How to sort this cluster with respect to options and other clusters at the
- same depth (clusters always follow options in the same group). */
- int group;
-
- /* The cluster to which this cluster belongs, or 0 if it's at the base
- level. */
- struct hol_cluster *parent;
-
- /* The argp from which this cluster is (eventually) derived. */
- const struct argp *argp;
-
- /* The distance this cluster is from the root. */
- int depth;
-
- /* Clusters in a given hol are kept in a linked list, to make freeing them
- possible. */
- struct hol_cluster *next;
-};
-
-/* A list of options for help. */
-struct hol
-{
- /* An array of hol_entry's. */
- struct hol_entry *entries;
- /* The number of entries in this hol. If this field is zero, the others
- are undefined. */
- unsigned num_entries;
-
- /* A string containing all short options in this HOL. Each entry contains
- pointers into this string, so the order can't be messed with blindly. */
- char *short_options;
-
- /* Clusters of entries in this hol. */
- struct hol_cluster *clusters;
-};
-
-/* Create a struct hol from the options in ARGP. CLUSTER is the
- hol_cluster in which these entries occur, or 0, if at the root. */
-static struct hol *
-make_hol (const struct argp *argp, struct hol_cluster *cluster)
-{
- char *so;
- const struct argp_option *o;
- const struct argp_option *opts = argp->options;
- struct hol_entry *entry;
- unsigned num_short_options = 0;
- struct hol *hol = malloc (sizeof (struct hol));
-
- assert (hol);
-
- hol->num_entries = 0;
- hol->clusters = 0;
-
- if (opts)
- {
- int cur_group = 0;
-
- /* The first option must not be an alias. */
- assert (! oalias (opts));
-
- /* Calculate the space needed. */
- for (o = opts; ! oend (o); o++)
- {
- if (! oalias (o))
- hol->num_entries++;
- if (oshort (o))
- num_short_options++; /* This is an upper bound. */
- }
-
- hol->entries = malloc (sizeof (struct hol_entry) * hol->num_entries);
- hol->short_options = malloc (num_short_options + 1);
-
- assert (hol->entries && hol->short_options);
-
- /* Fill in the entries. */
- so = hol->short_options;
- for (o = opts, entry = hol->entries; ! oend (o); entry++)
- {
- entry->opt = o;
- entry->num = 0;
- entry->short_options = so;
- entry->group = cur_group =
- o->group
- ? o->group
- : ((!o->name && !o->key)
- ? cur_group + 1
- : cur_group);
- entry->cluster = cluster;
- entry->argp = argp;
-
- do
- {
- entry->num++;
- if (oshort (o) && ! find_char (o->key, hol->short_options, so))
- /* O has a valid short option which hasn't already been used.*/
- *so++ = o->key;
- o++;
- }
- while (! oend (o) && oalias (o));
- }
- *so = '\0'; /* null terminated so we can find the length */
- }
-
- return hol;
-}
-
-/* Add a new cluster to HOL, with the given GROUP and HEADER (taken from the
- associated argp child list entry), INDEX, and PARENT, and return a pointer
- to it. ARGP is the argp that this cluster results from. */
-static struct hol_cluster *
-hol_add_cluster (struct hol *hol, int group, const char *header, int index,
- struct hol_cluster *parent, const struct argp *argp)
-{
- struct hol_cluster *cl = malloc (sizeof (struct hol_cluster));
- if (cl)
- {
- cl->group = group;
- cl->header = header;
-
- cl->index = index;
- cl->parent = parent;
- cl->argp = argp;
- cl->depth = parent ? parent->depth + 1 : 0;
-
- cl->next = hol->clusters;
- hol->clusters = cl;
- }
- return cl;
-}
-
-/* Free HOL and any resources it uses. */
-static void
-hol_free (struct hol *hol)
-{
- struct hol_cluster *cl = hol->clusters;
-
- while (cl)
- {
- struct hol_cluster *next = cl->next;
- free (cl);
- cl = next;
- }
-
- if (hol->num_entries > 0)
- {
- free (hol->entries);
- free (hol->short_options);
- }
-
- free (hol);
-}
-
-static inline int
-hol_entry_short_iterate (const struct hol_entry *entry,
- int (*func)(const struct argp_option *opt,
- const struct argp_option *real,
- const char *domain, void *cookie),
- const char *domain, void *cookie)
-{
- unsigned nopts;
- int val = 0;
- const struct argp_option *opt, *real = entry->opt;
- char *so = entry->short_options;
-
- for (opt = real, nopts = entry->num; nopts > 0 && !val; opt++, nopts--)
- if (oshort (opt) && *so == opt->key)
- {
- if (!oalias (opt))
- real = opt;
- if (ovisible (opt))
- val = (*func)(opt, real, domain, cookie);
- so++;
- }
-
- return val;
-}
-
-static inline int
-hol_entry_long_iterate (const struct hol_entry *entry,
- int (*func)(const struct argp_option *opt,
- const struct argp_option *real,
- const char *domain, void *cookie),
- const char *domain, void *cookie)
-{
- unsigned nopts;
- int val = 0;
- const struct argp_option *opt, *real = entry->opt;
-
- for (opt = real, nopts = entry->num; nopts > 0 && !val; opt++, nopts--)
- if (opt->name)
- {
- if (!oalias (opt))
- real = opt;
- if (ovisible (opt))
- val = (*func)(opt, real, domain, cookie);
- }
-
- return val;
-}
-
-/* Iterator that returns true for the first short option. */
-static inline int
-until_short (const struct argp_option *opt, const struct argp_option *real UNUSED,
- const char *domain UNUSED, void *cookie UNUSED)
-{
- return oshort (opt) ? opt->key : 0;
-}
-
-/* Returns the first valid short option in ENTRY, or 0 if there is none. */
-static char
-hol_entry_first_short (const struct hol_entry *entry)
-{
- return hol_entry_short_iterate (entry, until_short,
- entry->argp->argp_domain, 0);
-}
-
-/* Returns the first valid long option in ENTRY, or 0 if there is none. */
-static const char *
-hol_entry_first_long (const struct hol_entry *entry)
-{
- const struct argp_option *opt;
- unsigned num;
- for (opt = entry->opt, num = entry->num; num > 0; opt++, num--)
- if (opt->name && ovisible (opt))
- return opt->name;
- return 0;
-}
-
-/* Returns the entry in HOL with the long option name NAME, or 0 if there is
- none. */
-static struct hol_entry *
-hol_find_entry (struct hol *hol, const char *name)
-{
- struct hol_entry *entry = hol->entries;
- unsigned num_entries = hol->num_entries;
-
- while (num_entries-- > 0)
- {
- const struct argp_option *opt = entry->opt;
- unsigned num_opts = entry->num;
-
- while (num_opts-- > 0)
- if (opt->name && ovisible (opt) && strcmp (opt->name, name) == 0)
- return entry;
- else
- opt++;
-
- entry++;
- }
-
- return 0;
-}
-
-/* If an entry with the long option NAME occurs in HOL, set it's special
- sort position to GROUP. */
-static void
-hol_set_group (struct hol *hol, const char *name, int group)
-{
- struct hol_entry *entry = hol_find_entry (hol, name);
- if (entry)
- entry->group = group;
-}
-
-/* Order by group: 0, 1, 2, ..., n, -m, ..., -2, -1.
- EQ is what to return if GROUP1 and GROUP2 are the same. */
-static int
-group_cmp (int group1, int group2, int eq)
-{
- if (group1 == group2)
- return eq;
- else if ((group1 < 0 && group2 < 0) || (group1 >= 0 && group2 >= 0))
- return group1 - group2;
- else
- return group2 - group1;
-}
-
-/* Compare clusters CL1 & CL2 by the order that they should appear in
- output. */
-static int
-hol_cluster_cmp (const struct hol_cluster *cl1, const struct hol_cluster *cl2)
-{
- /* If one cluster is deeper than the other, use its ancestor at the same
- level, so that finding the common ancestor is straightforward. */
- while (cl1->depth < cl2->depth)
- cl1 = cl1->parent;
- while (cl2->depth < cl1->depth)
- cl2 = cl2->parent;
-
- /* Now reduce both clusters to their ancestors at the point where both have
- a common parent; these can be directly compared. */
- while (cl1->parent != cl2->parent)
- cl1 = cl1->parent, cl2 = cl2->parent;
-
- return group_cmp (cl1->group, cl2->group, cl2->index - cl1->index);
-}
-
-/* Return the ancestor of CL that's just below the root (i.e., has a parent
- of 0). */
-static struct hol_cluster *
-hol_cluster_base (struct hol_cluster *cl)
-{
- while (cl->parent)
- cl = cl->parent;
- return cl;
-}
-
-/* Return true if CL1 is a child of CL2. */
-static int
-hol_cluster_is_child (const struct hol_cluster *cl1,
- const struct hol_cluster *cl2)
-{
- while (cl1 && cl1 != cl2)
- cl1 = cl1->parent;
- return cl1 == cl2;
-}
-
-/* Given the name of a OPTION_DOC option, modifies NAME to start at the tail
- that should be used for comparisons, and returns true iff it should be
- treated as a non-option. */
-
-/* FIXME: Can we use unsigned char * for the argument? */
-static int
-canon_doc_option (const char **name)
-{
- int non_opt;
- /* Skip initial whitespace. */
- while (isspace ( (unsigned char) **name))
- (*name)++;
- /* Decide whether this looks like an option (leading `-') or not. */
- non_opt = (**name != '-');
- /* Skip until part of name used for sorting. */
- while (**name && !isalnum ( (unsigned char) **name))
- (*name)++;
- return non_opt;
-}
-
-/* Order ENTRY1 & ENTRY2 by the order which they should appear in a help
- listing. */
-static int
-hol_entry_cmp (const struct hol_entry *entry1,
- const struct hol_entry *entry2)
-{
- /* The group numbers by which the entries should be ordered; if either is
- in a cluster, then this is just the group within the cluster. */
- int group1 = entry1->group, group2 = entry2->group;
-
- if (entry1->cluster != entry2->cluster)
- {
- /* The entries are not within the same cluster, so we can't compare them
- directly, we have to use the appropiate clustering level too. */
- if (! entry1->cluster)
- /* ENTRY1 is at the `base level', not in a cluster, so we have to
- compare it's group number with that of the base cluster in which
- ENTRY2 resides. Note that if they're in the same group, the
- clustered option always comes laster. */
- return group_cmp (group1, hol_cluster_base (entry2->cluster)->group, -1);
- else if (! entry2->cluster)
- /* Likewise, but ENTRY2's not in a cluster. */
- return group_cmp (hol_cluster_base (entry1->cluster)->group, group2, 1);
- else
- /* Both entries are in clusters, we can just compare the clusters. */
- return hol_cluster_cmp (entry1->cluster, entry2->cluster);
- }
- else if (group1 == group2)
- /* The entries are both in the same cluster and group, so compare them
- alphabetically. */
- {
- int short1 = hol_entry_first_short (entry1);
- int short2 = hol_entry_first_short (entry2);
- int doc1 = odoc (entry1->opt);
- int doc2 = odoc (entry2->opt);
- /* FIXME: Can we use unsigned char * instead? */
- const char *long1 = hol_entry_first_long (entry1);
- const char *long2 = hol_entry_first_long (entry2);
-
- if (doc1)
- doc1 = canon_doc_option (&long1);
- if (doc2)
- doc2 = canon_doc_option (&long2);
-
- if (doc1 != doc2)
- /* `documentation' options always follow normal options (or
- documentation options that *look* like normal options). */
- return doc1 - doc2;
- else if (!short1 && !short2 && long1 && long2)
- /* Only long options. */
- return __strcasecmp (long1, long2);
- else
- /* Compare short/short, long/short, short/long, using the first
- character of long options. Entries without *any* valid
- options (such as options with OPTION_HIDDEN set) will be put
- first, but as they're not displayed, it doesn't matter where
- they are. */
- {
- unsigned char first1 = short1 ? short1 : long1 ? *long1 : 0;
- unsigned char first2 = short2 ? short2 : long2 ? *long2 : 0;
-#ifdef _tolower
- int lower_cmp = _tolower (first1) - _tolower (first2);
-#else
- int lower_cmp = tolower (first1) - tolower (first2);
-#endif
- /* Compare ignoring case, except when the options are both the
- same letter, in which case lower-case always comes first. */
- /* NOTE: The subtraction below does the right thing
- even with eight-bit chars: first1 and first2 are
- converted to int *before* the subtraction. */
- return lower_cmp ? lower_cmp : first2 - first1;
- }
- }
- else
- /* Within the same cluster, but not the same group, so just compare
- groups. */
- return group_cmp (group1, group2, 0);
-}
-
-/* Version of hol_entry_cmp with correct signature for qsort. */
-static int
-hol_entry_qcmp (const void *entry1_v, const void *entry2_v)
-{
- return hol_entry_cmp (entry1_v, entry2_v);
-}
-
-/* Sort HOL by group and alphabetically by option name (with short options
- taking precedence over long). Since the sorting is for display purposes
- only, the shadowing of options isn't effected. */
-static void
-hol_sort (struct hol *hol)
-{
- if (hol->num_entries > 0)
- qsort (hol->entries, hol->num_entries, sizeof (struct hol_entry),
- hol_entry_qcmp);
-}
-
-/* Append MORE to HOL, destroying MORE in the process. Options in HOL shadow
- any in MORE with the same name. */
-static void
-hol_append (struct hol *hol, struct hol *more)
-{
- struct hol_cluster **cl_end = &hol->clusters;
-
- /* Steal MORE's cluster list, and add it to the end of HOL's. */
- while (*cl_end)
- cl_end = &(*cl_end)->next;
- *cl_end = more->clusters;
- more->clusters = 0;
-
- /* Merge entries. */
- if (more->num_entries > 0)
- {
- if (hol->num_entries == 0)
- {
- hol->num_entries = more->num_entries;
- hol->entries = more->entries;
- hol->short_options = more->short_options;
- more->num_entries = 0; /* Mark MORE's fields as invalid. */
- }
- else
- /* Append the entries in MORE to those in HOL, taking care to only add
- non-shadowed SHORT_OPTIONS values. */
- {
- unsigned left;
- char *so, *more_so;
- struct hol_entry *e;
- unsigned num_entries = hol->num_entries + more->num_entries;
- struct hol_entry *entries =
- malloc (num_entries * sizeof (struct hol_entry));
- unsigned hol_so_len = strlen (hol->short_options);
- char *short_options =
- malloc (hol_so_len + strlen (more->short_options) + 1);
-
- __mempcpy (__mempcpy (entries, hol->entries,
- hol->num_entries * sizeof (struct hol_entry)),
- more->entries,
- more->num_entries * sizeof (struct hol_entry));
-
- __mempcpy (short_options, hol->short_options, hol_so_len);
-
- /* Fix up the short options pointers from HOL. */
- for (e = entries, left = hol->num_entries; left > 0; e++, left--)
- e->short_options += (short_options - hol->short_options);
-
- /* Now add the short options from MORE, fixing up its entries
- too. */
- so = short_options + hol_so_len;
- more_so = more->short_options;
- for (left = more->num_entries; left > 0; e++, left--)
- {
- int opts_left;
- const struct argp_option *opt;
-
- e->short_options = so;
-
- for (opts_left = e->num, opt = e->opt; opts_left; opt++, opts_left--)
- {
- int ch = *more_so;
- if (oshort (opt) && ch == opt->key)
- /* The next short option in MORE_SO, CH, is from OPT. */
- {
- if (! find_char (ch, short_options,
- short_options + hol_so_len))
- /* The short option CH isn't shadowed by HOL's options,
- so add it to the sum. */
- *so++ = ch;
- more_so++;
- }
- }
- }
-
- *so = '\0';
-
- free (hol->entries);
- free (hol->short_options);
-
- hol->entries = entries;
- hol->num_entries = num_entries;
- hol->short_options = short_options;
- }
- }
-
- hol_free (more);
-}
-
-/* Inserts enough spaces to make sure STREAM is at column COL. */
-static void
-indent_to (argp_fmtstream_t stream, unsigned col)
-{
- int needed = col - __argp_fmtstream_point (stream);
- while (needed-- > 0)
- __argp_fmtstream_putc (stream, ' ');
-}
-
-/* Output to STREAM either a space, or a newline if there isn't room for at
- least ENSURE characters before the right margin. */
-static void
-space (argp_fmtstream_t stream, size_t ensure)
-{
- if (__argp_fmtstream_point (stream) + ensure
- >= __argp_fmtstream_rmargin (stream))
- __argp_fmtstream_putc (stream, '\n');
- else
- __argp_fmtstream_putc (stream, ' ');
-}
-
-/* If the option REAL has an argument, we print it in using the printf
- format REQ_FMT or OPT_FMT depending on whether it's a required or
- optional argument. */
-static void
-arg (const struct argp_option *real, const char *req_fmt, const char *opt_fmt,
- const char *domain UNUSED, argp_fmtstream_t stream)
-{
- if (real->arg)
- {
- if (real->flags & OPTION_ARG_OPTIONAL)
- __argp_fmtstream_printf (stream, opt_fmt,
- dgettext (domain, real->arg));
- else
- __argp_fmtstream_printf (stream, req_fmt,
- dgettext (domain, real->arg));
- }
-}
-
-/* Helper functions for hol_entry_help. */
-
-/* State used during the execution of hol_help. */
-struct hol_help_state
-{
- /* PREV_ENTRY should contain the previous entry printed, or 0. */
- struct hol_entry *prev_entry;
-
- /* If an entry is in a different group from the previous one, and SEP_GROUPS
- is true, then a blank line will be printed before any output. */
- int sep_groups;
-
- /* True if a duplicate option argument was suppressed (only ever set if
- UPARAMS.dup_args is false). */
- int suppressed_dup_arg;
-};
-
-/* Some state used while printing a help entry (used to communicate with
- helper functions). See the doc for hol_entry_help for more info, as most
- of the fields are copied from its arguments. */
-struct pentry_state
-{
- const struct hol_entry *entry;
- argp_fmtstream_t stream;
- struct hol_help_state *hhstate;
-
- /* True if nothing's been printed so far. */
- int first;
-
- /* If non-zero, the state that was used to print this help. */
- const struct argp_state *state;
-};
-
-/* If a user doc filter should be applied to DOC, do so. */
-static const char *
-filter_doc (const char *doc, int key, const struct argp *argp,
- const struct argp_state *state)
-{
- if (argp->help_filter)
- /* We must apply a user filter to this output. */
- {
- void *input = __argp_input (argp, state);
- return (*argp->help_filter) (key, doc, input);
- }
- else
- /* No filter. */
- return doc;
-}
-
-/* Prints STR as a header line, with the margin lines set appropiately, and
- notes the fact that groups should be separated with a blank line. ARGP is
- the argp that should dictate any user doc filtering to take place. Note
- that the previous wrap margin isn't restored, but the left margin is reset
- to 0. */
-static void
-print_header (const char *str, const struct argp *argp,
- struct pentry_state *pest)
-{
- const char *tstr = dgettext (argp->argp_domain, str);
- const char *fstr = filter_doc (tstr, ARGP_KEY_HELP_HEADER, argp, pest->state);
-
- if (fstr)
- {
- if (*fstr)
- {
- if (pest->hhstate->prev_entry)
- /* Precede with a blank line. */
- __argp_fmtstream_putc (pest->stream, '\n');
- indent_to (pest->stream, uparams.header_col);
- __argp_fmtstream_set_lmargin (pest->stream, uparams.header_col);
- __argp_fmtstream_set_wmargin (pest->stream, uparams.header_col);
- __argp_fmtstream_puts (pest->stream, fstr);
- __argp_fmtstream_set_lmargin (pest->stream, 0);
- __argp_fmtstream_putc (pest->stream, '\n');
- }
-
- pest->hhstate->sep_groups = 1; /* Separate subsequent groups. */
- }
-
- if (fstr != tstr)
- free ((char *) fstr);
-}
-
-/* Inserts a comma if this isn't the first item on the line, and then makes
- sure we're at least to column COL. If this *is* the first item on a line,
- prints any pending whitespace/headers that should precede this line. Also
- clears FIRST. */
-static void
-comma (unsigned col, struct pentry_state *pest)
-{
- if (pest->first)
- {
- const struct hol_entry *pe = pest->hhstate->prev_entry;
- const struct hol_cluster *cl = pest->entry->cluster;
-
- if (pest->hhstate->sep_groups && pe && pest->entry->group != pe->group)
- __argp_fmtstream_putc (pest->stream, '\n');
-
- if (cl && cl->header && *cl->header
- && (!pe
- || (pe->cluster != cl
- && !hol_cluster_is_child (pe->cluster, cl))))
- /* If we're changing clusters, then this must be the start of the
- ENTRY's cluster unless that is an ancestor of the previous one
- (in which case we had just popped into a sub-cluster for a bit).
- If so, then print the cluster's header line. */
- {
- int old_wm = __argp_fmtstream_wmargin (pest->stream);
- print_header (cl->header, cl->argp, pest);
- __argp_fmtstream_set_wmargin (pest->stream, old_wm);
- }
-
- pest->first = 0;
- }
- else
- __argp_fmtstream_puts (pest->stream, ", ");
-
- indent_to (pest->stream, col);
-}
-
-/* Print help for ENTRY to STREAM. */
-static void
-hol_entry_help (struct hol_entry *entry, const struct argp_state *state,
- argp_fmtstream_t stream, struct hol_help_state *hhstate)
-{
- unsigned num;
- const struct argp_option *real = entry->opt, *opt;
- char *so = entry->short_options;
- int have_long_opt = 0; /* We have any long options. */
- /* Saved margins. */
- int old_lm = __argp_fmtstream_set_lmargin (stream, 0);
- int old_wm = __argp_fmtstream_wmargin (stream);
- /* PEST is a state block holding some of our variables that we'd like to
- share with helper functions. */
-
- /* Decent initializers are a GNU extension, so don't use it here. */
- struct pentry_state pest;
- pest.entry = entry;
- pest.stream = stream;
- pest.hhstate = hhstate;
- pest.first = 1;
- pest.state = state;
-
- if (! odoc (real))
- for (opt = real, num = entry->num; num > 0; opt++, num--)
- if (opt->name && ovisible (opt))
- {
- have_long_opt = 1;
- break;
- }
-
- /* First emit short options. */
- __argp_fmtstream_set_wmargin (stream, uparams.short_opt_col); /* For truly bizarre cases. */
- for (opt = real, num = entry->num; num > 0; opt++, num--)
- if (oshort (opt) && opt->key == *so)
- /* OPT has a valid (non shadowed) short option. */
- {
- if (ovisible (opt))
- {
- comma (uparams.short_opt_col, &pest);
- __argp_fmtstream_putc (stream, '-');
- __argp_fmtstream_putc (stream, *so);
- if (!have_long_opt || uparams.dup_args)
- arg (real, " %s", "[%s]", state->root_argp->argp_domain, stream);
- else if (real->arg)
- hhstate->suppressed_dup_arg = 1;
- }
- so++;
- }
-
- /* Now, long options. */
- if (odoc (real))
- /* A `documentation' option. */
- {
- __argp_fmtstream_set_wmargin (stream, uparams.doc_opt_col);
- for (opt = real, num = entry->num; num > 0; opt++, num--)
- if (opt->name && ovisible (opt))
- {
- comma (uparams.doc_opt_col, &pest);
- /* Calling gettext here isn't quite right, since sorting will
- have been done on the original; but documentation options
- should be pretty rare anyway... */
- __argp_fmtstream_puts (stream,
- dgettext (state->root_argp->argp_domain,
- opt->name));
- }
- }
- else
- /* A real long option. */
- {
- int first_long_opt = 1;
-
- __argp_fmtstream_set_wmargin (stream, uparams.long_opt_col);
- for (opt = real, num = entry->num; num > 0; opt++, num--)
- if (opt->name && ovisible (opt))
- {
- comma (uparams.long_opt_col, &pest);
- __argp_fmtstream_printf (stream, "--%s", opt->name);
- if (first_long_opt || uparams.dup_args)
- arg (real, "=%s", "[=%s]", state->root_argp->argp_domain,
- stream);
- else if (real->arg)
- hhstate->suppressed_dup_arg = 1;
- }
- }
-
- /* Next, documentation strings. */
- __argp_fmtstream_set_lmargin (stream, 0);
-
- if (pest.first)
- {
- /* Didn't print any switches, what's up? */
- if (!oshort (real) && !real->name)
- /* This is a group header, print it nicely. */
- print_header (real->doc, entry->argp, &pest);
- else
- /* Just a totally shadowed option or null header; print nothing. */
- goto cleanup; /* Just return, after cleaning up. */
- }
- else
- {
- const char *tstr = real->doc ? dgettext (state->root_argp->argp_domain,
- real->doc) : 0;
- const char *fstr = filter_doc (tstr, real->key, entry->argp, state);
- if (fstr && *fstr)
- {
- unsigned int col = __argp_fmtstream_point (stream);
-
- __argp_fmtstream_set_lmargin (stream, uparams.opt_doc_col);
- __argp_fmtstream_set_wmargin (stream, uparams.opt_doc_col);
-
- if (col > (unsigned int) (uparams.opt_doc_col + 3))
- __argp_fmtstream_putc (stream, '\n');
- else if (col >= (unsigned int) uparams.opt_doc_col)
- __argp_fmtstream_puts (stream, " ");
- else
- indent_to (stream, uparams.opt_doc_col);
-
- __argp_fmtstream_puts (stream, fstr);
- }
- if (fstr && fstr != tstr)
- free ((char *) fstr);
-
- /* Reset the left margin. */
- __argp_fmtstream_set_lmargin (stream, 0);
- __argp_fmtstream_putc (stream, '\n');
- }
-
- hhstate->prev_entry = entry;
-
-cleanup:
- __argp_fmtstream_set_lmargin (stream, old_lm);
- __argp_fmtstream_set_wmargin (stream, old_wm);
-}
-
-/* Output a long help message about the options in HOL to STREAM. */
-static void
-hol_help (struct hol *hol, const struct argp_state *state,
- argp_fmtstream_t stream)
-{
- unsigned num;
- struct hol_entry *entry;
- struct hol_help_state hhstate = { 0, 0, 0 };
-
- for (entry = hol->entries, num = hol->num_entries; num > 0; entry++, num--)
- hol_entry_help (entry, state, stream, &hhstate);
-
- if (hhstate.suppressed_dup_arg && uparams.dup_args_note)
- {
- const char *tstr = dgettext (state->root_argp->argp_domain, "\
-Mandatory or optional arguments to long options are also mandatory or \
-optional for any corresponding short options.");
- const char *fstr = filter_doc (tstr, ARGP_KEY_HELP_DUP_ARGS_NOTE,
- state ? state->root_argp : 0, state);
- if (fstr && *fstr)
- {
- __argp_fmtstream_putc (stream, '\n');
- __argp_fmtstream_puts (stream, fstr);
- __argp_fmtstream_putc (stream, '\n');
- }
- if (fstr && fstr != tstr)
- free ((char *) fstr);
- }
-}
-
-/* Helper functions for hol_usage. */
-
-/* If OPT is a short option without an arg, append its key to the string
- pointer pointer to by COOKIE, and advance the pointer. */
-static int
-add_argless_short_opt (const struct argp_option *opt,
- const struct argp_option *real,
- const char *domain UNUSED, void *cookie)
-{
- char **snao_end = cookie;
- if (!(opt->arg || real->arg)
- && !((opt->flags | real->flags) & OPTION_NO_USAGE))
- *(*snao_end)++ = opt->key;
- return 0;
-}
-
-/* If OPT is a short option with an arg, output a usage entry for it to the
- stream pointed at by COOKIE. */
-static int
-usage_argful_short_opt (const struct argp_option *opt,
- const struct argp_option *real,
- const char *domain UNUSED, void *cookie)
-{
- argp_fmtstream_t stream = cookie;
- const char *arg = opt->arg;
- int flags = opt->flags | real->flags;
-
- if (! arg)
- arg = real->arg;
-
- if (arg && !(flags & OPTION_NO_USAGE))
- {
- arg = dgettext (domain, arg);
-
- if (flags & OPTION_ARG_OPTIONAL)
- __argp_fmtstream_printf (stream, " [-%c[%s]]", opt->key, arg);
- else
- {
- /* Manually do line wrapping so that it (probably) won't
- get wrapped at the embedded space. */
- space (stream, 6 + strlen (arg));
- __argp_fmtstream_printf (stream, "[-%c %s]", opt->key, arg);
- }
- }
-
- return 0;
-}
-
-/* Output a usage entry for the long option opt to the stream pointed at by
- COOKIE. */
-static int
-usage_long_opt (const struct argp_option *opt,
- const struct argp_option *real,
- const char *domain UNUSED, void *cookie)
-{
- argp_fmtstream_t stream = cookie;
- const char *arg = opt->arg;
- int flags = opt->flags | real->flags;
-
- if (! arg)
- arg = real->arg;
-
- if (! (flags & OPTION_NO_USAGE))
- {
- if (arg)
- {
- arg = dgettext (domain, arg);
- if (flags & OPTION_ARG_OPTIONAL)
- __argp_fmtstream_printf (stream, " [--%s[=%s]]", opt->name, arg);
- else
- __argp_fmtstream_printf (stream, " [--%s=%s]", opt->name, arg);
- }
- else
- __argp_fmtstream_printf (stream, " [--%s]", opt->name);
- }
-
- return 0;
-}
-
-/* Print a short usage description for the arguments in HOL to STREAM. */
-static void
-hol_usage (struct hol *hol, argp_fmtstream_t stream)
-{
- if (hol->num_entries > 0)
- {
- unsigned nentries;
- struct hol_entry *entry;
- char *short_no_arg_opts = alloca (strlen (hol->short_options) + 1);
- char *snao_end = short_no_arg_opts;
-
- /* First we put a list of short options without arguments. */
- for (entry = hol->entries, nentries = hol->num_entries
- ; nentries > 0
- ; entry++, nentries--)
- hol_entry_short_iterate (entry, add_argless_short_opt,
- entry->argp->argp_domain, &snao_end);
- if (snao_end > short_no_arg_opts)
- {
- *snao_end++ = 0;
- __argp_fmtstream_printf (stream, " [-%s]", short_no_arg_opts);
- }
-
- /* Now a list of short options *with* arguments. */
- for (entry = hol->entries, nentries = hol->num_entries
- ; nentries > 0
- ; entry++, nentries--)
- hol_entry_short_iterate (entry, usage_argful_short_opt,
- entry->argp->argp_domain, stream);
-
- /* Finally, a list of long options (whew!). */
- for (entry = hol->entries, nentries = hol->num_entries
- ; nentries > 0
- ; entry++, nentries--)
- hol_entry_long_iterate (entry, usage_long_opt,
- entry->argp->argp_domain, stream);
- }
-}
-
-/* Make a HOL containing all levels of options in ARGP. CLUSTER is the
- cluster in which ARGP's entries should be clustered, or 0. */
-static struct hol *
-argp_hol (const struct argp *argp, struct hol_cluster *cluster)
-{
- const struct argp_child *child = argp->children;
- struct hol *hol = make_hol (argp, cluster);
- if (child)
- while (child->argp)
- {
- struct hol_cluster *child_cluster =
- ((child->group || child->header)
- /* Put CHILD->argp within its own cluster. */
- ? hol_add_cluster (hol, child->group, child->header,
- child - argp->children, cluster, argp)
- /* Just merge it into the parent's cluster. */
- : cluster);
- hol_append (hol, argp_hol (child->argp, child_cluster)) ;
- child++;
- }
- return hol;
-}
-
-/* Calculate how many different levels with alternative args strings exist in
- ARGP. */
-static size_t
-argp_args_levels (const struct argp *argp)
-{
- size_t levels = 0;
- const struct argp_child *child = argp->children;
-
- if (argp->args_doc && strchr (argp->args_doc, '\n'))
- levels++;
-
- if (child)
- while (child->argp)
- levels += argp_args_levels ((child++)->argp);
-
- return levels;
-}
-
-/* Print all the non-option args documented in ARGP to STREAM. Any output is
- preceded by a space. LEVELS is a pointer to a byte vector the length
- returned by argp_args_levels; it should be initialized to zero, and
- updated by this routine for the next call if ADVANCE is true. True is
- returned as long as there are more patterns to output. */
-static int
-argp_args_usage (const struct argp *argp, const struct argp_state *state,
- char **levels, int advance, argp_fmtstream_t stream)
-{
- char *our_level = *levels;
- int multiple = 0;
- const struct argp_child *child = argp->children;
- const char *tdoc = dgettext (argp->argp_domain, argp->args_doc), *nl = 0;
- const char *fdoc = filter_doc (tdoc, ARGP_KEY_HELP_ARGS_DOC, argp, state);
-
- if (fdoc)
- {
- const char *cp = fdoc;
- nl = __strchrnul (cp, '\n');
- if (*nl != '\0')
- /* This is a `multi-level' args doc; advance to the correct position
- as determined by our state in LEVELS, and update LEVELS. */
- {
- int i;
- multiple = 1;
- for (i = 0; i < *our_level; i++)
- cp = nl + 1, nl = __strchrnul (cp, '\n');
- (*levels)++;
- }
-
- /* Manually do line wrapping so that it (probably) won't get wrapped at
- any embedded spaces. */
- space (stream, 1 + nl - cp);
-
- __argp_fmtstream_write (stream, cp, nl - cp);
- }
- if (fdoc && fdoc != tdoc)
- free ((char *)fdoc); /* Free user's modified doc string. */
-
- if (child)
- while (child->argp)
- advance = !argp_args_usage ((child++)->argp, state, levels, advance, stream);
-
- if (advance && multiple)
- {
- /* Need to increment our level. */
- if (*nl)
- /* There's more we can do here. */
- {
- (*our_level)++;
- advance = 0; /* Our parent shouldn't advance also. */
- }
- else if (*our_level > 0)
- /* We had multiple levels, but used them up; reset to zero. */
- *our_level = 0;
- }
-
- return !advance;
-}
-
-/* Print the documentation for ARGP to STREAM; if POST is false, then
- everything preceeding a `\v' character in the documentation strings (or
- the whole string, for those with none) is printed, otherwise, everything
- following the `\v' character (nothing for strings without). Each separate
- bit of documentation is separated a blank line, and if PRE_BLANK is true,
- then the first is as well. If FIRST_ONLY is true, only the first
- occurrence is output. Returns true if anything was output. */
-static int
-argp_doc (const struct argp *argp, const struct argp_state *state,
- int post, int pre_blank, int first_only,
- argp_fmtstream_t stream)
-{
- const char *text;
- const char *inp_text;
- void *input = 0;
- int anything = 0;
- size_t inp_text_limit = 0;
- const char *doc = dgettext (argp->argp_domain, argp->doc);
- const struct argp_child *child = argp->children;
-
- if (doc)
- {
- char *vt = strchr (doc, '\v');
- inp_text = post ? (vt ? vt + 1 : 0) : doc;
- inp_text_limit = (!post && vt) ? (vt - doc) : 0;
- }
- else
- inp_text = 0;
-
- if (argp->help_filter)
- /* We have to filter the doc strings. */
- {
- if (inp_text_limit)
- /* Copy INP_TEXT so that it's nul-terminated. */
- inp_text = STRNDUP (inp_text, inp_text_limit);
- input = __argp_input (argp, state);
- text =
- (*argp->help_filter) (post
- ? ARGP_KEY_HELP_POST_DOC
- : ARGP_KEY_HELP_PRE_DOC,
- inp_text, input);
- }
- else
- text = (const char *) inp_text;
-
- if (text)
- {
- if (pre_blank)
- __argp_fmtstream_putc (stream, '\n');
-
- if (text == inp_text && inp_text_limit)
- __argp_fmtstream_write (stream, inp_text, inp_text_limit);
- else
- __argp_fmtstream_puts (stream, text);
-
- if (__argp_fmtstream_point (stream) > __argp_fmtstream_lmargin (stream))
- __argp_fmtstream_putc (stream, '\n');
-
- anything = 1;
- }
-
- if (text && text != inp_text)
- free ((char *) text); /* Free TEXT returned from the help filter. */
- if (inp_text && inp_text_limit && argp->help_filter)
- free ((char *) inp_text); /* We copied INP_TEXT, so free it now. */
-
- if (post && argp->help_filter)
- /* Now see if we have to output a ARGP_KEY_HELP_EXTRA text. */
- {
- text = (*argp->help_filter) (ARGP_KEY_HELP_EXTRA, 0, input);
- if (text)
- {
- if (anything || pre_blank)
- __argp_fmtstream_putc (stream, '\n');
- __argp_fmtstream_puts (stream, text);
- free ((char *) text);
- if (__argp_fmtstream_point (stream)
- > __argp_fmtstream_lmargin (stream))
- __argp_fmtstream_putc (stream, '\n');
- anything = 1;
- }
- }
-
- if (child)
- while (child->argp && !(first_only && anything))
- anything |=
- argp_doc ((child++)->argp, state,
- post, anything || pre_blank, first_only,
- stream);
-
- return anything;
-}
-
-/* Output a usage message for ARGP to STREAM. If called from
- argp_state_help, STATE is the relevent parsing state. FLAGS are from the
- set ARGP_HELP_*. NAME is what to use wherever a `program name' is
- needed. */
-
-static void
-_help (const struct argp *argp, const struct argp_state *state, FILE *stream,
- unsigned flags, const char *name)
-{
- int anything = 0; /* Whether we've output anything. */
- struct hol *hol = 0;
- argp_fmtstream_t fs;
-
- if (! stream)
- return;
-
- FLOCKFILE (stream);
-
- if (! uparams.valid)
- fill_in_uparams (state);
-
- fs = __argp_make_fmtstream (stream, 0, uparams.rmargin, 0);
- if (! fs)
- {
- FUNLOCKFILE (stream);
- return;
- }
-
- if (flags & (ARGP_HELP_USAGE | ARGP_HELP_SHORT_USAGE | ARGP_HELP_LONG))
- {
- hol = argp_hol (argp, 0);
-
- /* If present, these options always come last. */
- hol_set_group (hol, "help", -1);
- hol_set_group (hol, "version", -1);
-
- hol_sort (hol);
- }
-
- if (flags & (ARGP_HELP_USAGE | ARGP_HELP_SHORT_USAGE))
- /* Print a short `Usage:' message. */
- {
- int first_pattern = 1, more_patterns;
- size_t num_pattern_levels = argp_args_levels (argp);
- char *pattern_levels = alloca (num_pattern_levels);
-
- memset (pattern_levels, 0, num_pattern_levels);
-
- do
- {
- int old_lm;
- int old_wm = __argp_fmtstream_set_wmargin (fs, uparams.usage_indent);
- char *levels = pattern_levels;
-
- if (first_pattern)
- __argp_fmtstream_printf (fs, "%s %s",
- dgettext (argp->argp_domain, "Usage:"),
- name);
- else
- __argp_fmtstream_printf (fs, "%s %s",
- dgettext (argp->argp_domain, " or: "),
- name);
-
- /* We set the lmargin as well as the wmargin, because hol_usage
- manually wraps options with newline to avoid annoying breaks. */
- old_lm = __argp_fmtstream_set_lmargin (fs, uparams.usage_indent);
-
- if (flags & ARGP_HELP_SHORT_USAGE)
- /* Just show where the options go. */
- {
- if (hol->num_entries > 0)
- __argp_fmtstream_puts (fs, dgettext (argp->argp_domain,
- " [OPTION...]"));
- }
- else
- /* Actually print the options. */
- {
- hol_usage (hol, fs);
- flags |= ARGP_HELP_SHORT_USAGE; /* But only do so once. */
- }
-
- more_patterns = argp_args_usage (argp, state, &levels, 1, fs);
-
- __argp_fmtstream_set_wmargin (fs, old_wm);
- __argp_fmtstream_set_lmargin (fs, old_lm);
-
- __argp_fmtstream_putc (fs, '\n');
- anything = 1;
-
- first_pattern = 0;
- }
- while (more_patterns);
- }
-
- if (flags & ARGP_HELP_PRE_DOC)
- anything |= argp_doc (argp, state, 0, 0, 1, fs);
-
- if (flags & ARGP_HELP_SEE)
- {
- __argp_fmtstream_printf (fs, dgettext (argp->argp_domain, "\
-Try `%s --help' or `%s --usage' for more information.\n"),
- name, name);
- anything = 1;
- }
-
- if (flags & ARGP_HELP_LONG)
- /* Print a long, detailed help message. */
- {
- /* Print info about all the options. */
- if (hol->num_entries > 0)
- {
- if (anything)
- __argp_fmtstream_putc (fs, '\n');
- hol_help (hol, state, fs);
- anything = 1;
- }
- }
-
- if (flags & ARGP_HELP_POST_DOC)
- /* Print any documentation strings at the end. */
- anything |= argp_doc (argp, state, 1, anything, 0, fs);
-
- if ((flags & ARGP_HELP_BUG_ADDR) && argp_program_bug_address)
- {
- if (anything)
- __argp_fmtstream_putc (fs, '\n');
- __argp_fmtstream_printf (fs, dgettext (argp->argp_domain,
- "Report bugs to %s.\n"),
- argp_program_bug_address);
- anything = 1;
- }
-
- FUNLOCKFILE (stream);
-
- if (hol)
- hol_free (hol);
-
- __argp_fmtstream_free (fs);
-}
-
-/* Output a usage message for ARGP to STREAM. FLAGS are from the set
- ARGP_HELP_*. NAME is what to use wherever a `program name' is needed. */
-void __argp_help (const struct argp *argp, FILE *stream,
- unsigned flags, char *name)
-{
- _help (argp, 0, stream, flags, name);
-}
-#ifdef weak_alias
-weak_alias (__argp_help, argp_help)
-#endif
-
-char *__argp_basename(char *name)
-{
- char *short_name = strrchr(name, '/');
- return short_name ? short_name + 1 : name;
-}
-
-char *
-__argp_short_program_name(const struct argp_state *state)
-{
- if (state)
- return state->name;
-#if HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME
- return program_invocation_short_name;
-#elif HAVE_DECL_PROGRAM_INVOCATION_NAME
- return __argp_basename(program_invocation_name);
-#else /* !HAVE_DECL_PROGRAM_INVOCATION_NAME */
- /* FIXME: What now? Miles suggests that it is better to use NULL,
- but currently the value is passed on directly to fputs_unlocked,
- so that requires more changes. */
-# if __GNUC__
- return "";
-# endif /* __GNUC__ */
-#endif /* !HAVE_DECL_PROGRAM_INVOCATION_NAME */
-}
-
-/* Output, if appropriate, a usage message for STATE to STREAM. FLAGS are
- from the set ARGP_HELP_*. */
-void
-__argp_state_help (const struct argp_state *state, FILE *stream, unsigned flags)
-{
- if ((!state || ! (state->flags & ARGP_NO_ERRS)) && stream)
- {
- if (state && (state->flags & ARGP_LONG_ONLY))
- flags |= ARGP_HELP_LONG_ONLY;
-
- _help (state ? state->root_argp : 0, state, stream, flags,
- __argp_short_program_name(state));
-
- if (!state || ! (state->flags & ARGP_NO_EXIT))
- {
- if (flags & ARGP_HELP_EXIT_ERR)
- exit (argp_err_exit_status);
- if (flags & ARGP_HELP_EXIT_OK)
- exit (0);
- }
- }
-}
-#ifdef weak_alias
-weak_alias (__argp_state_help, argp_state_help)
-#endif
-
-/* If appropriate, print the printf string FMT and following args, preceded
- by the program name and `:', to stderr, and followed by a `Try ... --help'
- message, then exit (1). */
-void
-__argp_error (const struct argp_state *state, const char *fmt, ...)
-{
- if (!state || !(state->flags & ARGP_NO_ERRS))
- {
- FILE *stream = state ? state->err_stream : stderr;
-
- if (stream)
- {
- va_list ap;
-
- FLOCKFILE (stream);
-
- FPUTS_UNLOCKED (__argp_short_program_name(state),
- stream);
- PUTC_UNLOCKED (':', stream);
- PUTC_UNLOCKED (' ', stream);
-
- va_start (ap, fmt);
- vfprintf (stream, fmt, ap);
- va_end (ap);
-
- PUTC_UNLOCKED ('\n', stream);
-
- __argp_state_help (state, stream, ARGP_HELP_STD_ERR);
-
- FUNLOCKFILE (stream);
- }
- }
-}
-#ifdef weak_alias
-weak_alias (__argp_error, argp_error)
-#endif
-
-/* Similar to the standard gnu error-reporting function error(), but will
- respect the ARGP_NO_EXIT and ARGP_NO_ERRS flags in STATE, and will print
- to STATE->err_stream. This is useful for argument parsing code that is
- shared between program startup (when exiting is desired) and runtime
- option parsing (when typically an error code is returned instead). The
- difference between this function and argp_error is that the latter is for
- *parsing errors*, and the former is for other problems that occur during
- parsing but don't reflect a (syntactic) problem with the input. */
-void
-__argp_failure (const struct argp_state *state, int status, int errnum,
- const char *fmt, ...)
-{
- if (!state || !(state->flags & ARGP_NO_ERRS))
- {
- FILE *stream = state ? state->err_stream : stderr;
-
- if (stream)
- {
- FLOCKFILE (stream);
-
- FPUTS_UNLOCKED (__argp_short_program_name(state),
- stream);
-
- if (fmt)
- {
- va_list ap;
-
- PUTC_UNLOCKED (':', stream);
- PUTC_UNLOCKED (' ', stream);
-
- va_start (ap, fmt);
- vfprintf (stream, fmt, ap);
- va_end (ap);
- }
-
- if (errnum)
- {
- PUTC_UNLOCKED (':', stream);
- PUTC_UNLOCKED (' ', stream);
- fputs (STRERROR (errnum), stream);
- }
-
- PUTC_UNLOCKED ('\n', stream);
-
- FUNLOCKFILE (stream);
-
- if (status && (!state || !(state->flags & ARGP_NO_EXIT)))
- exit (status);
- }
- }
-}
-#ifdef weak_alias
-weak_alias (__argp_failure, argp_failure)
-#endif
diff --git a/argp-standalone/argp-namefrob.h b/argp-standalone/argp-namefrob.h
deleted file mode 100644
index 0ce11481a7b..00000000000
--- a/argp-standalone/argp-namefrob.h
+++ /dev/null
@@ -1,96 +0,0 @@
-/* Name frobnication for compiling argp outside of glibc
- Copyright (C) 1997 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-#if !_LIBC
-/* This code is written for inclusion in gnu-libc, and uses names in the
- namespace reserved for libc. If we're not compiling in libc, define those
- names to be the normal ones instead. */
-
-/* argp-parse functions */
-#undef __argp_parse
-#define __argp_parse argp_parse
-#undef __option_is_end
-#define __option_is_end _option_is_end
-#undef __option_is_short
-#define __option_is_short _option_is_short
-#undef __argp_input
-#define __argp_input _argp_input
-
-/* argp-help functions */
-#undef __argp_help
-#define __argp_help argp_help
-#undef __argp_error
-#define __argp_error argp_error
-#undef __argp_failure
-#define __argp_failure argp_failure
-#undef __argp_state_help
-#define __argp_state_help argp_state_help
-#undef __argp_usage
-#define __argp_usage argp_usage
-#undef __argp_basename
-#define __argp_basename _argp_basename
-#undef __argp_short_program_name
-#define __argp_short_program_name _argp_short_program_name
-
-/* argp-fmtstream functions */
-#undef __argp_make_fmtstream
-#define __argp_make_fmtstream argp_make_fmtstream
-#undef __argp_fmtstream_free
-#define __argp_fmtstream_free argp_fmtstream_free
-#undef __argp_fmtstream_putc
-#define __argp_fmtstream_putc argp_fmtstream_putc
-#undef __argp_fmtstream_puts
-#define __argp_fmtstream_puts argp_fmtstream_puts
-#undef __argp_fmtstream_write
-#define __argp_fmtstream_write argp_fmtstream_write
-#undef __argp_fmtstream_printf
-#define __argp_fmtstream_printf argp_fmtstream_printf
-#undef __argp_fmtstream_set_lmargin
-#define __argp_fmtstream_set_lmargin argp_fmtstream_set_lmargin
-#undef __argp_fmtstream_set_rmargin
-#define __argp_fmtstream_set_rmargin argp_fmtstream_set_rmargin
-#undef __argp_fmtstream_set_wmargin
-#define __argp_fmtstream_set_wmargin argp_fmtstream_set_wmargin
-#undef __argp_fmtstream_point
-#define __argp_fmtstream_point argp_fmtstream_point
-#undef __argp_fmtstream_update
-#define __argp_fmtstream_update _argp_fmtstream_update
-#undef __argp_fmtstream_ensure
-#define __argp_fmtstream_ensure _argp_fmtstream_ensure
-#undef __argp_fmtstream_lmargin
-#define __argp_fmtstream_lmargin argp_fmtstream_lmargin
-#undef __argp_fmtstream_rmargin
-#define __argp_fmtstream_rmargin argp_fmtstream_rmargin
-#undef __argp_fmtstream_wmargin
-#define __argp_fmtstream_wmargin argp_fmtstream_wmargin
-
-/* normal libc functions we call */
-#undef __sleep
-#define __sleep sleep
-#undef __strcasecmp
-#define __strcasecmp strcasecmp
-#undef __vsnprintf
-#define __vsnprintf vsnprintf
-
-#endif /* !_LIBC */
-
-#ifndef __set_errno
-#define __set_errno(e) (errno = (e))
-#endif
diff --git a/argp-standalone/argp-parse.c b/argp-standalone/argp-parse.c
deleted file mode 100644
index 78f7bf139b6..00000000000
--- a/argp-standalone/argp-parse.c
+++ /dev/null
@@ -1,1305 +0,0 @@
-/* Hierarchial argument parsing
- Copyright (C) 1995, 96, 97, 98, 99, 2000,2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-#ifndef _GNU_SOURCE
-# define _GNU_SOURCE 1
-#endif
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#if HAVE_ALLOCA_H
-#include <alloca.h>
-#endif
-
-#include <stdlib.h>
-#include <string.h>
-#if HAVE_UNISTD_H
-# include <unistd.h>
-#endif
-#include <limits.h>
-#include <assert.h>
-
-#if HAVE_MALLOC_H
-/* Needed, for alloca on windows */
-# include <malloc.h>
-#endif
-
-#ifndef _
-/* This is for other GNU distributions with internationalized messages.
- When compiling libc, the _ macro is predefined. */
-# if defined HAVE_LIBINTL_H || defined _LIBC
-# include <libintl.h>
-# ifdef _LIBC
-# undef dgettext
-# define dgettext(domain, msgid) __dcgettext (domain, msgid, LC_MESSAGES)
-# endif
-# else
-# define dgettext(domain, msgid) (msgid)
-# define gettext(msgid) (msgid)
-# endif
-#endif
-#ifndef N_
-# define N_(msgid) (msgid)
-#endif
-
-#if _LIBC - 0
-#include <bits/libc-lock.h>
-#else
-#ifdef HAVE_CTHREADS_H
-#include <cthreads.h>
-#endif
-#endif /* _LIBC */
-
-#include "argp.h"
-#include "argp-namefrob.h"
-
-
-/* The meta-argument used to prevent any further arguments being interpreted
- as options. */
-#define QUOTE "--"
-
-/* EZ alias for ARGP_ERR_UNKNOWN. */
-#define EBADKEY ARGP_ERR_UNKNOWN
-
-
-/* Default options. */
-
-/* When argp is given the --HANG switch, _ARGP_HANG is set and argp will sleep
- for one second intervals, decrementing _ARGP_HANG until it's zero. Thus
- you can force the program to continue by attaching a debugger and setting
- it to 0 yourself. */
-volatile int _argp_hang;
-
-#define OPT_PROGNAME -2
-#define OPT_USAGE -3
-#if HAVE_SLEEP && HAVE_GETPID
-#define OPT_HANG -4
-#endif
-
-static const struct argp_option argp_default_options[] =
-{
- {"help", '?', 0, 0, N_("Give this help list"), -1},
- {"usage", OPT_USAGE, 0, 0, N_("Give a short usage message"), 0 },
- {"program-name",OPT_PROGNAME,"NAME", OPTION_HIDDEN,
- N_("Set the program name"), 0},
-#if OPT_HANG
- {"HANG", OPT_HANG, "SECS", OPTION_ARG_OPTIONAL | OPTION_HIDDEN,
- N_("Hang for SECS seconds (default 3600)"), 0 },
-#endif
- {0, 0, 0, 0, 0, 0}
-};
-
-static error_t
-argp_default_parser (int key, char *arg, struct argp_state *state)
-{
- switch (key)
- {
- case '?':
- __argp_state_help (state, state->out_stream, ARGP_HELP_STD_HELP);
- break;
- case OPT_USAGE:
- __argp_state_help (state, state->out_stream,
- ARGP_HELP_USAGE | ARGP_HELP_EXIT_OK);
- break;
-
- case OPT_PROGNAME: /* Set the program name. */
-#if HAVE_DECL_PROGRAM_INVOCATION_NAME
- program_invocation_name = arg;
-#endif
- /* [Note that some systems only have PROGRAM_INVOCATION_SHORT_NAME (aka
- __PROGNAME), in which case, PROGRAM_INVOCATION_NAME is just defined
- to be that, so we have to be a bit careful here.] */
-
- /* Update what we use for messages. */
-
- state->name = __argp_basename(arg);
-
-#if HAVE_DECL_PROGRAM_INVOCATION_SHORT_NAME
- program_invocation_short_name = state->name;
-#endif
-
- if ((state->flags & (ARGP_PARSE_ARGV0 | ARGP_NO_ERRS))
- == ARGP_PARSE_ARGV0)
- /* Update what getopt uses too. */
- state->argv[0] = arg;
-
- break;
-
-#if OPT_HANG
- case OPT_HANG:
- _argp_hang = atoi (arg ? arg : "3600");
- fprintf(state->err_stream, "%s: pid = %ld\n",
- state->name, (long) getpid());
- while (_argp_hang-- > 0)
- __sleep (1);
- break;
-#endif
-
- default:
- return EBADKEY;
- }
- return 0;
-}
-
-static const struct argp argp_default_argp =
- {argp_default_options, &argp_default_parser, NULL, NULL, NULL, NULL, "libc"};
-
-
-static const struct argp_option argp_version_options[] =
-{
- {"version", 'V', 0, 0, N_("Print program version"), -1},
- {0, 0, 0, 0, 0, 0 }
-};
-
-static error_t
-argp_version_parser (int key, char *arg UNUSED, struct argp_state *state)
-{
- switch (key)
- {
- case 'V':
- if (argp_program_version_hook)
- (*argp_program_version_hook) (state->out_stream, state);
- else if (argp_program_version)
- fprintf (state->out_stream, "%s\n", argp_program_version);
- else
- __argp_error (state, dgettext (state->root_argp->argp_domain,
- "(PROGRAM ERROR) No version known!?"));
- if (! (state->flags & ARGP_NO_EXIT))
- exit (0);
- break;
- default:
- return EBADKEY;
- }
- return 0;
-}
-
-static const struct argp argp_version_argp =
- {argp_version_options, &argp_version_parser, NULL, NULL, NULL, NULL, "libc"};
-
-
-
-/* The state of a `group' during parsing. Each group corresponds to a
- particular argp structure from the tree of such descending from the top
- level argp passed to argp_parse. */
-struct group
-{
- /* This group's parsing function. */
- argp_parser_t parser;
-
- /* Which argp this group is from. */
- const struct argp *argp;
-
- /* The number of non-option args sucessfully handled by this parser. */
- unsigned args_processed;
-
- /* This group's parser's parent's group. */
- struct group *parent;
- unsigned parent_index; /* And the our position in the parent. */
-
- /* These fields are swapped into and out of the state structure when
- calling this group's parser. */
- void *input, **child_inputs;
- void *hook;
-};
-
-/* Call GROUP's parser with KEY and ARG, swapping any group-specific info
- from STATE before calling, and back into state afterwards. If GROUP has
- no parser, EBADKEY is returned. */
-static error_t
-group_parse (struct group *group, struct argp_state *state, int key, char *arg)
-{
- if (group->parser)
- {
- error_t err;
- state->hook = group->hook;
- state->input = group->input;
- state->child_inputs = group->child_inputs;
- state->arg_num = group->args_processed;
- err = (*group->parser)(key, arg, state);
- group->hook = state->hook;
- return err;
- }
- else
- return EBADKEY;
-}
-
-struct parser
-{
- const struct argp *argp;
-
- const char *posixly_correct;
-
- /* True if there are only no-option arguments left, which are just
- passed verbatim with ARGP_KEY_ARG. This is set if we encounter a
- quote, or the end of the proper options, but may be cleared again
- if the user moves the next argument pointer backwards. */
- int args_only;
-
- /* Describe how to deal with options that follow non-option ARGV-elements.
-
- If the caller did not specify anything, the default is
- REQUIRE_ORDER if the environment variable POSIXLY_CORRECT is
- defined, PERMUTE otherwise.
-
- REQUIRE_ORDER means don't recognize them as options; stop option
- processing when the first non-option is seen. This is what Unix
- does. This mode of operation is selected by either setting the
- environment variable POSIXLY_CORRECT, or using `+' as the first
- character of the list of option characters.
-
- PERMUTE is the default. We permute the contents of ARGV as we
- scan, so that eventually all the non-options are at the end. This
- allows options to be given in any order, even with programs that
- were not written to expect this.
-
- RETURN_IN_ORDER is an option available to programs that were
- written to expect options and other ARGV-elements in any order
- and that care about the ordering of the two. We describe each
- non-option ARGV-element as if it were the argument of an option
- with character code 1. Using `-' as the first character of the
- list of option characters selects this mode of operation.
-
- */
- enum { REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER } ordering;
-
- /* A segment of non-option arguments that have been skipped for
- later processing, after all options. `first_nonopt' is the index
- in ARGV of the first of them; `last_nonopt' is the index after
- the last of them.
-
- If quoted or args_only is non-zero, this segment should be empty. */
-
- /* FIXME: I'd prefer to use unsigned, but it's more consistent to
- use the same type as for state.next. */
- int first_nonopt;
- int last_nonopt;
-
- /* String of all recognized short options. Needed for ARGP_LONG_ONLY. */
- /* FIXME: Perhaps change to a pointer to a suitable bitmap instead? */
- char *short_opts;
-
- /* For parsing combined short options. */
- char *nextchar;
-
- /* States of the various parsing groups. */
- struct group *groups;
- /* The end of the GROUPS array. */
- struct group *egroup;
- /* An vector containing storage for the CHILD_INPUTS field in all groups. */
- void **child_inputs;
-
- /* State block supplied to parsing routines. */
- struct argp_state state;
-
- /* Memory used by this parser. */
- void *storage;
-};
-
-/* Search for a group defining a short option. */
-static const struct argp_option *
-find_short_option(struct parser *parser, int key, struct group **p)
-{
- struct group *group;
-
- assert(key >= 0);
- assert(isascii(key));
-
- for (group = parser->groups; group < parser->egroup; group++)
- {
- const struct argp_option *opts;
-
- for (opts = group->argp->options; !__option_is_end(opts); opts++)
- if (opts->key == key)
- {
- *p = group;
- return opts;
- }
- }
- return NULL;
-}
-
-enum match_result { MATCH_EXACT, MATCH_PARTIAL, MATCH_NO };
-
-/* If defined, allow complete.el-like abbreviations of long options. */
-#ifndef ARGP_COMPLETE
-#define ARGP_COMPLETE 0
-#endif
-
-/* Matches an encountern long-option argument ARG against an option NAME.
- * ARG is terminated by NUL or '='. */
-static enum match_result
-match_option(const char *arg, const char *name)
-{
- unsigned i, j;
- for (i = j = 0;; i++, j++)
- {
- switch(arg[i])
- {
- case '\0':
- case '=':
- return name[j] ? MATCH_PARTIAL : MATCH_EXACT;
-#if ARGP_COMPLETE
- case '-':
- while (name[j] != '-')
- if (!name[j++])
- return MATCH_NO;
- break;
-#endif
- default:
- if (arg[i] != name[j])
- return MATCH_NO;
- }
- }
-}
-
-static const struct argp_option *
-find_long_option(struct parser *parser,
- const char *arg,
- struct group **p)
-{
- struct group *group;
-
- /* Partial match found so far. */
- struct group *matched_group = NULL;
- const struct argp_option *matched_option = NULL;
-
- /* Number of partial matches. */
- int num_partial = 0;
-
- for (group = parser->groups; group < parser->egroup; group++)
- {
- const struct argp_option *opts;
-
- for (opts = group->argp->options; !__option_is_end(opts); opts++)
- {
- if (!opts->name)
- continue;
- switch (match_option(arg, opts->name))
- {
- case MATCH_NO:
- break;
- case MATCH_PARTIAL:
- num_partial++;
-
- matched_group = group;
- matched_option = opts;
-
- break;
- case MATCH_EXACT:
- /* Exact match. */
- *p = group;
- return opts;
- }
- }
- }
- if (num_partial == 1)
- {
- *p = matched_group;
- return matched_option;
- }
-
- return NULL;
-}
-
-
-/* The next usable entries in the various parser tables being filled in by
- convert_options. */
-struct parser_convert_state
-{
- struct parser *parser;
- char *short_end;
- void **child_inputs_end;
-};
-
-/* Initialize GROUP from ARGP. If CVT->SHORT_END is non-NULL, short
- options are recorded in the short options string. Returns the next
- unused group entry. CVT holds state used during the conversion. */
-static struct group *
-convert_options (const struct argp *argp,
- struct group *parent, unsigned parent_index,
- struct group *group, struct parser_convert_state *cvt)
-{
- const struct argp_option *opt = argp->options;
- const struct argp_child *children = argp->children;
-
- if (opt || argp->parser)
- {
- /* This parser needs a group. */
- if (cvt->short_end)
- {
- /* Record any short options. */
- for ( ; !__option_is_end (opt); opt++)
- if (__option_is_short(opt))
- *cvt->short_end++ = opt->key;
- }
-
- group->parser = argp->parser;
- group->argp = argp;
- group->args_processed = 0;
- group->parent = parent;
- group->parent_index = parent_index;
- group->input = 0;
- group->hook = 0;
- group->child_inputs = 0;
-
- if (children)
- /* Assign GROUP's CHILD_INPUTS field some space from
- CVT->child_inputs_end.*/
- {
- unsigned num_children = 0;
- while (children[num_children].argp)
- num_children++;
- group->child_inputs = cvt->child_inputs_end;
- cvt->child_inputs_end += num_children;
- }
- parent = group++;
- }
- else
- parent = 0;
-
- if (children)
- {
- unsigned index = 0;
- while (children->argp)
- group =
- convert_options (children++->argp, parent, index++, group, cvt);
- }
-
- return group;
-}
-/* Allocate and initialize the group structures, so that they are
- ordered as if by traversing the corresponding argp parser tree in
- pre-order. Also build the list of short options, if that is needed. */
-static void
-parser_convert (struct parser *parser, const struct argp *argp)
-{
- struct parser_convert_state cvt;
-
- cvt.parser = parser;
- cvt.short_end = parser->short_opts;
- cvt.child_inputs_end = parser->child_inputs;
-
- parser->argp = argp;
-
- if (argp)
- parser->egroup = convert_options (argp, 0, 0, parser->groups, &cvt);
- else
- parser->egroup = parser->groups; /* No parsers at all! */
-
- if (parser->short_opts)
- *cvt.short_end ='\0';
-}
-
-/* Lengths of various parser fields which we will allocated. */
-struct parser_sizes
-{
- /* Needed only ARGP_LONG_ONLY */
- size_t short_len; /* Number of short options. */
-
- size_t num_groups; /* Group structures we allocate. */
- size_t num_child_inputs; /* Child input slots. */
-};
-
-/* For ARGP, increments the NUM_GROUPS field in SZS by the total
- number of argp structures descended from it, and the SHORT_LEN by
- the total number of short options. */
-static void
-calc_sizes (const struct argp *argp, struct parser_sizes *szs)
-{
- const struct argp_child *child = argp->children;
- const struct argp_option *opt = argp->options;
-
- if (opt || argp->parser)
- {
- /* This parser needs a group. */
- szs->num_groups++;
- if (opt)
- {
- while (__option_is_short (opt++))
- szs->short_len++;
- }
- }
-
- if (child)
- while (child->argp)
- {
- calc_sizes ((child++)->argp, szs);
- szs->num_child_inputs++;
- }
-}
-
-/* Initializes PARSER to parse ARGP in a manner described by FLAGS. */
-static error_t
-parser_init (struct parser *parser, const struct argp *argp,
- int argc, char **argv, int flags, void *input)
-{
- error_t err = 0;
- struct group *group;
- struct parser_sizes szs;
-
- parser->posixly_correct = getenv ("POSIXLY_CORRECT");
-
- if (flags & ARGP_IN_ORDER)
- parser->ordering = RETURN_IN_ORDER;
- else if (flags & ARGP_NO_ARGS)
- parser->ordering = REQUIRE_ORDER;
- else if (parser->posixly_correct)
- parser->ordering = REQUIRE_ORDER;
- else
- parser->ordering = PERMUTE;
-
- szs.short_len = 0;
- szs.num_groups = 0;
- szs.num_child_inputs = 0;
-
- if (argp)
- calc_sizes (argp, &szs);
-
- if (!(flags & ARGP_LONG_ONLY))
- /* We have no use for the short option array. */
- szs.short_len = 0;
-
- /* Lengths of the various bits of storage used by PARSER. */
-#define GLEN (szs.num_groups + 1) * sizeof (struct group)
-#define CLEN (szs.num_child_inputs * sizeof (void *))
-#define SLEN (szs.short_len + 1)
-#define STORAGE(offset) ((void *) (((char *) parser->storage) + (offset)))
-
- parser->storage = malloc (GLEN + CLEN + SLEN);
- if (! parser->storage)
- return ENOMEM;
-
- parser->groups = parser->storage;
-
- parser->child_inputs = STORAGE(GLEN);
- memset (parser->child_inputs, 0, szs.num_child_inputs * sizeof (void *));
-
- if (flags & ARGP_LONG_ONLY)
- parser->short_opts = STORAGE(GLEN + CLEN);
- else
- parser->short_opts = NULL;
-
- parser_convert (parser, argp);
-
- memset (&parser->state, 0, sizeof (struct argp_state));
-
- parser->state.root_argp = parser->argp;
- parser->state.argc = argc;
- parser->state.argv = argv;
- parser->state.flags = flags;
- parser->state.err_stream = stderr;
- parser->state.out_stream = stdout;
- parser->state.pstate = parser;
-
- parser->args_only = 0;
- parser->nextchar = NULL;
- parser->first_nonopt = parser->last_nonopt = 0;
-
- /* Call each parser for the first time, giving it a chance to propagate
- values to child parsers. */
- if (parser->groups < parser->egroup)
- parser->groups->input = input;
- for (group = parser->groups;
- group < parser->egroup && (!err || err == EBADKEY);
- group++)
- {
- if (group->parent)
- /* If a child parser, get the initial input value from the parent. */
- group->input = group->parent->child_inputs[group->parent_index];
-
- if (!group->parser
- && group->argp->children && group->argp->children->argp)
- /* For the special case where no parsing function is supplied for an
- argp, propagate its input to its first child, if any (this just
- makes very simple wrapper argps more convenient). */
- group->child_inputs[0] = group->input;
-
- err = group_parse (group, &parser->state, ARGP_KEY_INIT, 0);
- }
- if (err == EBADKEY)
- err = 0; /* Some parser didn't understand. */
-
- if (err)
- return err;
-
- if (argv[0] && !(parser->state.flags & ARGP_PARSE_ARGV0))
- /* There's an argv[0]; use it for messages. */
- {
- parser->state.name = __argp_basename(argv[0]);
-
- /* Don't parse it as an argument. */
- parser->state.next = 1;
- }
- else
- parser->state.name = __argp_short_program_name(NULL);
-
- return 0;
-}
-
-/* Free any storage consumed by PARSER (but not PARSER itself). */
-static error_t
-parser_finalize (struct parser *parser,
- error_t err, int arg_ebadkey, int *end_index)
-{
- struct group *group;
-
- if (err == EBADKEY && arg_ebadkey)
- /* Suppress errors generated by unparsed arguments. */
- err = 0;
-
- if (! err)
- {
- if (parser->state.next == parser->state.argc)
- /* We successfully parsed all arguments! Call all the parsers again,
- just a few more times... */
- {
- for (group = parser->groups;
- group < parser->egroup && (!err || err==EBADKEY);
- group++)
- if (group->args_processed == 0)
- err = group_parse (group, &parser->state, ARGP_KEY_NO_ARGS, 0);
- for (group = parser->egroup - 1;
- group >= parser->groups && (!err || err==EBADKEY);
- group--)
- err = group_parse (group, &parser->state, ARGP_KEY_END, 0);
-
- if (err == EBADKEY)
- err = 0; /* Some parser didn't understand. */
-
- /* Tell the user that all arguments are parsed. */
- if (end_index)
- *end_index = parser->state.next;
- }
- else if (end_index)
- /* Return any remaining arguments to the user. */
- *end_index = parser->state.next;
- else
- /* No way to return the remaining arguments, they must be bogus. */
- {
- if (!(parser->state.flags & ARGP_NO_ERRS)
- && parser->state.err_stream)
- fprintf (parser->state.err_stream,
- dgettext (parser->argp->argp_domain,
- "%s: Too many arguments\n"),
- parser->state.name);
- err = EBADKEY;
- }
- }
-
- /* Okay, we're all done, with either an error or success; call the parsers
- to indicate which one. */
-
- if (err)
- {
- /* Maybe print an error message. */
- if (err == EBADKEY)
- /* An appropriate message describing what the error was should have
- been printed earlier. */
- __argp_state_help (&parser->state, parser->state.err_stream,
- ARGP_HELP_STD_ERR);
-
- /* Since we didn't exit, give each parser an error indication. */
- for (group = parser->groups; group < parser->egroup; group++)
- group_parse (group, &parser->state, ARGP_KEY_ERROR, 0);
- }
- else
- /* Notify parsers of success, and propagate back values from parsers. */
- {
- /* We pass over the groups in reverse order so that child groups are
- given a chance to do there processing before passing back a value to
- the parent. */
- for (group = parser->egroup - 1
- ; group >= parser->groups && (!err || err == EBADKEY)
- ; group--)
- err = group_parse (group, &parser->state, ARGP_KEY_SUCCESS, 0);
- if (err == EBADKEY)
- err = 0; /* Some parser didn't understand. */
- }
-
- /* Call parsers once more, to do any final cleanup. Errors are ignored. */
- for (group = parser->egroup - 1; group >= parser->groups; group--)
- group_parse (group, &parser->state, ARGP_KEY_FINI, 0);
-
- if (err == EBADKEY)
- err = EINVAL;
-
- free (parser->storage);
-
- return err;
-}
-
-/* Call the user parsers to parse the non-option argument VAL, at the
- current position, returning any error. The state NEXT pointer
- should point to the argument; this function will adjust it
- correctly to reflect however many args actually end up being
- consumed. */
-static error_t
-parser_parse_arg (struct parser *parser, char *val)
-{
- /* Save the starting value of NEXT */
- int index = parser->state.next;
- error_t err = EBADKEY;
- struct group *group;
- int key = 0; /* Which of ARGP_KEY_ARG[S] we used. */
-
- /* Try to parse the argument in each parser. */
- for (group = parser->groups
- ; group < parser->egroup && err == EBADKEY
- ; group++)
- {
- parser->state.next++; /* For ARGP_KEY_ARG, consume the arg. */
- key = ARGP_KEY_ARG;
- err = group_parse (group, &parser->state, key, val);
-
- if (err == EBADKEY)
- /* This parser doesn't like ARGP_KEY_ARG; try ARGP_KEY_ARGS instead. */
- {
- parser->state.next--; /* For ARGP_KEY_ARGS, put back the arg. */
- key = ARGP_KEY_ARGS;
- err = group_parse (group, &parser->state, key, 0);
- }
- }
-
- if (! err)
- {
- if (key == ARGP_KEY_ARGS)
- /* The default for ARGP_KEY_ARGS is to assume that if NEXT isn't
- changed by the user, *all* arguments should be considered
- consumed. */
- parser->state.next = parser->state.argc;
-
- if (parser->state.next > index)
- /* Remember that we successfully processed a non-option
- argument -- but only if the user hasn't gotten tricky and set
- the clock back. */
- (--group)->args_processed += (parser->state.next - index);
- else
- /* The user wants to reparse some args, so try looking for options again. */
- parser->args_only = 0;
- }
-
- return err;
-}
-
-/* Exchange two adjacent subsequences of ARGV.
- One subsequence is elements [first_nonopt,last_nonopt)
- which contains all the non-options that have been skipped so far.
- The other is elements [last_nonopt,next), which contains all
- the options processed since those non-options were skipped.
-
- `first_nonopt' and `last_nonopt' are relocated so that they describe
- the new indices of the non-options in ARGV after they are moved. */
-
-static void
-exchange (struct parser *parser)
-{
- int bottom = parser->first_nonopt;
- int middle = parser->last_nonopt;
- int top = parser->state.next;
- char **argv = parser->state.argv;
-
- char *tem;
-
- /* Exchange the shorter segment with the far end of the longer segment.
- That puts the shorter segment into the right place.
- It leaves the longer segment in the right place overall,
- but it consists of two parts that need to be swapped next. */
-
- while (top > middle && middle > bottom)
- {
- if (top - middle > middle - bottom)
- {
- /* Bottom segment is the short one. */
- int len = middle - bottom;
- register int i;
-
- /* Swap it with the top part of the top segment. */
- for (i = 0; i < len; i++)
- {
- tem = argv[bottom + i];
- argv[bottom + i] = argv[top - (middle - bottom) + i];
- argv[top - (middle - bottom) + i] = tem;
- }
- /* Exclude the moved bottom segment from further swapping. */
- top -= len;
- }
- else
- {
- /* Top segment is the short one. */
- int len = top - middle;
- register int i;
-
- /* Swap it with the bottom part of the bottom segment. */
- for (i = 0; i < len; i++)
- {
- tem = argv[bottom + i];
- argv[bottom + i] = argv[middle + i];
- argv[middle + i] = tem;
- }
- /* Exclude the moved top segment from further swapping. */
- bottom += len;
- }
- }
-
- /* Update records for the slots the non-options now occupy. */
-
- parser->first_nonopt += (parser->state.next - parser->last_nonopt);
- parser->last_nonopt = parser->state.next;
-}
-
-
-
-enum arg_type { ARG_ARG, ARG_SHORT_OPTION,
- ARG_LONG_OPTION, ARG_LONG_ONLY_OPTION,
- ARG_QUOTE };
-
-static enum arg_type
-classify_arg(struct parser *parser, char *arg, char **opt)
-{
- if (arg[0] == '-')
- /* Looks like an option... */
- switch (arg[1])
- {
- case '\0':
- /* "-" is not an option. */
- return ARG_ARG;
- case '-':
- /* Long option, or quote. */
- if (!arg[2])
- return ARG_QUOTE;
-
- /* A long option. */
- if (opt)
- *opt = arg + 2;
- return ARG_LONG_OPTION;
-
- default:
- /* Short option. But if ARGP_LONG_ONLY, it can also be a long option. */
-
- if (opt)
- *opt = arg + 1;
-
- if (parser->state.flags & ARGP_LONG_ONLY)
- {
- /* Rules from getopt.c:
-
- If long_only and the ARGV-element has the form "-f",
- where f is a valid short option, don't consider it an
- abbreviated form of a long option that starts with f.
- Otherwise there would be no way to give the -f short
- option.
-
- On the other hand, if there's a long option "fubar" and
- the ARGV-element is "-fu", do consider that an
- abbreviation of the long option, just like "--fu", and
- not "-f" with arg "u".
-
- This distinction seems to be the most useful approach. */
-
- assert(parser->short_opts);
-
- if (arg[2] || !strchr(parser->short_opts, arg[1]))
- return ARG_LONG_ONLY_OPTION;
- }
-
- return ARG_SHORT_OPTION;
- }
-
- else
- return ARG_ARG;
-}
-
-/* Parse the next argument in PARSER (as indicated by PARSER->state.next).
- Any error from the parsers is returned, and *ARGP_EBADKEY indicates
- whether a value of EBADKEY is due to an unrecognized argument (which is
- generally not fatal). */
-static error_t
-parser_parse_next (struct parser *parser, int *arg_ebadkey)
-{
- if (parser->state.quoted && parser->state.next < parser->state.quoted)
- /* The next argument pointer has been moved to before the quoted
- region, so pretend we never saw the quoting `--', and start
- looking for options again. If the `--' is still there we'll just
- process it one more time. */
- parser->state.quoted = parser->args_only = 0;
-
- /* Give FIRST_NONOPT & LAST_NONOPT rational values if NEXT has been
- moved back by the user (who may also have changed the arguments). */
- if (parser->last_nonopt > parser->state.next)
- parser->last_nonopt = parser->state.next;
- if (parser->first_nonopt > parser->state.next)
- parser->first_nonopt = parser->state.next;
-
- if (parser->nextchar)
- /* Deal with short options. */
- {
- struct group *group;
- char c;
- const struct argp_option *option;
- char *value = NULL;;
-
- assert(!parser->args_only);
-
- c = *parser->nextchar++;
-
- option = find_short_option(parser, c, &group);
- if (!option)
- {
- if (parser->posixly_correct)
- /* 1003.2 specifies the format of this message. */
- fprintf (parser->state.err_stream,
- dgettext(parser->state.root_argp->argp_domain,
- "%s: illegal option -- %c\n"),
- parser->state.name, c);
- else
- fprintf (parser->state.err_stream,
- dgettext(parser->state.root_argp->argp_domain,
- "%s: invalid option -- %c\n"),
- parser->state.name, c);
-
- *arg_ebadkey = 0;
- return EBADKEY;
- }
-
- if (!*parser->nextchar)
- parser->nextchar = NULL;
-
- if (option->arg)
- {
- value = parser->nextchar;
- parser->nextchar = NULL;
-
- if (!value
- && !(option->flags & OPTION_ARG_OPTIONAL))
- /* We need an mandatory argument. */
- {
- if (parser->state.next == parser->state.argc)
- /* Missing argument */
- {
- /* 1003.2 specifies the format of this message. */
- fprintf (parser->state.err_stream,
- dgettext(parser->state.root_argp->argp_domain,
- "%s: option requires an argument -- %c\n"),
- parser->state.name, c);
-
- *arg_ebadkey = 0;
- return EBADKEY;
- }
- value = parser->state.argv[parser->state.next++];
- }
- }
- return group_parse(group, &parser->state,
- option->key, value);
- }
- else
- /* Advance to the next ARGV-element. */
- {
- if (parser->args_only)
- {
- *arg_ebadkey = 1;
- if (parser->state.next >= parser->state.argc)
- /* We're done. */
- return EBADKEY;
- else
- return parser_parse_arg(parser,
- parser->state.argv[parser->state.next]);
- }
-
- if (parser->state.next >= parser->state.argc)
- /* Almost done. If there are non-options that we skipped
- previously, we should process them now. */
- {
- *arg_ebadkey = 1;
- if (parser->first_nonopt != parser->last_nonopt)
- {
- exchange(parser);
-
- /* Start processing the arguments we skipped previously. */
- parser->state.next = parser->first_nonopt;
-
- parser->first_nonopt = parser->last_nonopt = 0;
-
- parser->args_only = 1;
- return 0;
- }
- else
- /* Indicate that we're really done. */
- return EBADKEY;
- }
- else
- /* Look for options. */
- {
- char *arg = parser->state.argv[parser->state.next];
-
- char *optstart;
- enum arg_type token = classify_arg(parser, arg, &optstart);
-
- switch (token)
- {
- case ARG_ARG:
- switch (parser->ordering)
- {
- case PERMUTE:
- if (parser->first_nonopt == parser->last_nonopt)
- /* Skipped sequence is empty; start a new one. */
- parser->first_nonopt = parser->last_nonopt = parser->state.next;
-
- else if (parser->last_nonopt != parser->state.next)
- /* We have a non-empty skipped sequence, and
- we're not at the end-point, so move it. */
- exchange(parser);
-
- assert(parser->last_nonopt == parser->state.next);
-
- /* Skip this argument for now. */
- parser->state.next++;
- parser->last_nonopt = parser->state.next;
-
- return 0;
-
- case REQUIRE_ORDER:
- /* Implicit quote before the first argument. */
- parser->args_only = 1;
- return 0;
-
- case RETURN_IN_ORDER:
- *arg_ebadkey = 1;
- return parser_parse_arg(parser, arg);
-
- default:
- abort();
- }
- case ARG_QUOTE:
- /* Skip it, then exchange with any previous non-options. */
- parser->state.next++;
- assert (parser->last_nonopt != parser->state.next);
-
- if (parser->first_nonopt != parser->last_nonopt)
- {
- exchange(parser);
-
- /* Start processing the skipped and the quoted
- arguments. */
-
- parser->state.quoted = parser->state.next = parser->first_nonopt;
-
- /* Also empty the skipped-list, to avoid confusion
- if the user resets the next pointer. */
- parser->first_nonopt = parser->last_nonopt = 0;
- }
- else
- parser->state.quoted = parser->state.next;
-
- parser->args_only = 1;
- return 0;
-
- case ARG_LONG_ONLY_OPTION:
- case ARG_LONG_OPTION:
- {
- struct group *group;
- const struct argp_option *option;
- char *value;
-
- parser->state.next++;
- option = find_long_option(parser, optstart, &group);
-
- if (!option)
- {
- /* NOTE: This includes any "=something" in the output. */
- fprintf (parser->state.err_stream,
- dgettext(parser->state.root_argp->argp_domain,
- "%s: unrecognized option `%s'\n"),
- parser->state.name, arg);
- *arg_ebadkey = 0;
- return EBADKEY;
- }
-
- value = strchr(optstart, '=');
- if (value)
- value++;
-
- if (value && !option->arg)
- /* Unexpected argument. */
- {
- if (token == ARG_LONG_OPTION)
- /* --option */
- fprintf (parser->state.err_stream,
- dgettext(parser->state.root_argp->argp_domain,
- "%s: option `--%s' doesn't allow an argument\n"),
- parser->state.name, option->name);
- else
- /* +option or -option */
- fprintf (parser->state.err_stream,
- dgettext(parser->state.root_argp->argp_domain,
- "%s: option `%c%s' doesn't allow an argument\n"),
- parser->state.name, arg[0], option->name);
-
- *arg_ebadkey = 0;
- return EBADKEY;
- }
-
- if (option->arg && !value
- && !(option->flags & OPTION_ARG_OPTIONAL))
- /* We need an mandatory argument. */
- {
- if (parser->state.next == parser->state.argc)
- /* Missing argument */
- {
- if (token == ARG_LONG_OPTION)
- /* --option */
- fprintf (parser->state.err_stream,
- dgettext(parser->state.root_argp->argp_domain,
- "%s: option `--%s' requires an argument\n"),
- parser->state.name, option->name);
- else
- /* +option or -option */
- fprintf (parser->state.err_stream,
- dgettext(parser->state.root_argp->argp_domain,
- "%s: option `%c%s' requires an argument\n"),
- parser->state.name, arg[0], option->name);
-
- *arg_ebadkey = 0;
- return EBADKEY;
- }
-
- value = parser->state.argv[parser->state.next++];
- }
- *arg_ebadkey = 0;
- return group_parse(group, &parser->state,
- option->key, value);
- }
- case ARG_SHORT_OPTION:
- parser->state.next++;
- parser->nextchar = optstart;
- return 0;
-
- default:
- abort();
- }
- }
- }
-}
-
-/* Parse the options strings in ARGC & ARGV according to the argp in ARGP.
- FLAGS is one of the ARGP_ flags above. If END_INDEX is non-NULL, the
- index in ARGV of the first unparsed option is returned in it. If an
- unknown option is present, EINVAL is returned; if some parser routine
- returned a non-zero value, it is returned; otherwise 0 is returned. */
-error_t
-__argp_parse (const struct argp *argp, int argc, char **argv, unsigned flags,
- int *end_index, void *input)
-{
- error_t err;
- struct parser parser;
-
- /* If true, then err == EBADKEY is a result of a non-option argument failing
- to be parsed (which in some cases isn't actually an error). */
- int arg_ebadkey = 0;
-
- if (! (flags & ARGP_NO_HELP))
- /* Add our own options. */
- {
- struct argp_child *child = alloca (4 * sizeof (struct argp_child));
- struct argp *top_argp = alloca (sizeof (struct argp));
-
- /* TOP_ARGP has no options, it just serves to group the user & default
- argps. */
- memset (top_argp, 0, sizeof (*top_argp));
- top_argp->children = child;
-
- memset (child, 0, 4 * sizeof (struct argp_child));
-
- if (argp)
- (child++)->argp = argp;
- (child++)->argp = &argp_default_argp;
- if (argp_program_version || argp_program_version_hook)
- (child++)->argp = &argp_version_argp;
- child->argp = 0;
-
- argp = top_argp;
- }
-
- /* Construct a parser for these arguments. */
- err = parser_init (&parser, argp, argc, argv, flags, input);
-
- if (! err)
- /* Parse! */
- {
- while (! err)
- err = parser_parse_next (&parser, &arg_ebadkey);
- err = parser_finalize (&parser, err, arg_ebadkey, end_index);
- }
-
- return err;
-}
-#ifdef weak_alias
-weak_alias (__argp_parse, argp_parse)
-#endif
-
-/* Return the input field for ARGP in the parser corresponding to STATE; used
- by the help routines. */
-void *
-__argp_input (const struct argp *argp, const struct argp_state *state)
-{
- if (state)
- {
- struct group *group;
- struct parser *parser = state->pstate;
-
- for (group = parser->groups; group < parser->egroup; group++)
- if (group->argp == argp)
- return group->input;
- }
-
- return 0;
-}
-#ifdef weak_alias
-weak_alias (__argp_input, _argp_input)
-#endif
-
-/* Defined here, in case a user is not inlining the definitions in
- * argp.h */
-void
-__argp_usage (__const struct argp_state *__state)
-{
- __argp_state_help (__state, stderr, ARGP_HELP_STD_USAGE);
-}
-
-int
-__option_is_short (__const struct argp_option *__opt)
-{
- if (__opt->flags & OPTION_DOC)
- return 0;
- else
- {
- int __key = __opt->key;
- /* FIXME: whether or not a particular key implies a short option
- * ought not to be locale dependent. */
- return __key > 0 && isprint (__key);
- }
-}
-
-int
-__option_is_end (__const struct argp_option *__opt)
-{
- return !__opt->key && !__opt->name && !__opt->doc && !__opt->group;
-}
diff --git a/argp-standalone/argp-pv.c b/argp-standalone/argp-pv.c
deleted file mode 100644
index d7d374a66bd..00000000000
--- a/argp-standalone/argp-pv.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/* Default definition for ARGP_PROGRAM_VERSION.
- Copyright (C) 1996, 1997, 1999, 2004 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-/* If set by the user program to a non-zero value, then a default option
- --version is added (unless the ARGP_NO_HELP flag is used), which will
- print this this string followed by a newline and exit (unless the
- ARGP_NO_EXIT flag is used). Overridden by ARGP_PROGRAM_VERSION_HOOK. */
-const char *argp_program_version = 0;
diff --git a/argp-standalone/argp-pvh.c b/argp-standalone/argp-pvh.c
deleted file mode 100644
index 829a1cda80d..00000000000
--- a/argp-standalone/argp-pvh.c
+++ /dev/null
@@ -1,32 +0,0 @@
-/* Default definition for ARGP_PROGRAM_VERSION_HOOK.
- Copyright (C) 1996, 1997, 1999, 2004 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include "argp.h"
-
-/* If set by the user program to a non-zero value, then a default option
- --version is added (unless the ARGP_NO_HELP flag is used), which calls
- this function with a stream to print the version to and a pointer to the
- current parsing state, and then exits (unless the ARGP_NO_EXIT flag is
- used). This variable takes precedent over ARGP_PROGRAM_VERSION. */
-void (*argp_program_version_hook) (FILE *stream, struct argp_state *state) = 0;
diff --git a/argp-standalone/argp.h b/argp-standalone/argp.h
deleted file mode 100644
index 29d3dfe9720..00000000000
--- a/argp-standalone/argp.h
+++ /dev/null
@@ -1,602 +0,0 @@
-/* Hierarchial argument parsing.
- Copyright (C) 1995, 96, 97, 98, 99, 2003 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
- Written by Miles Bader <miles@gnu.ai.mit.edu>.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Library General Public License as
- published by the Free Software Foundation; either version 2 of the
- License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Library General Public License for more details.
-
- You should have received a copy of the GNU Library General Public
- License along with the GNU C Library; see the file COPYING.LIB. If not,
- write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
- Boston, MA 02111-1307, USA. */
-
-#ifndef _ARGP_H
-#define _ARGP_H
-
-#include <stdio.h>
-#include <ctype.h>
-
-#define __need_error_t
-#include <errno.h>
-
-#ifndef __THROW
-# define __THROW
-#endif
-
-#ifndef __const
-# define __const const
-#endif
-
-#ifndef __error_t_defined
-typedef int error_t;
-# define __error_t_defined
-#endif
-
-/* FIXME: What's the right way to check for __restrict? Sun's cc seems
- not to have it. Perhaps it's easiest to just delete the use of
- __restrict from the prototypes. */
-#ifndef __restrict
-# ifndef __GNUC___
-# define __restrict
-# endif
-#endif
-
-/* NOTE: We can't use the autoconf tests, since this is supposed to be
- an installed header file and argp's config.h is of course not
- installed. */
-#ifndef PRINTF_STYLE
-# if __GNUC__ >= 2
-# define PRINTF_STYLE(f, a) __attribute__ ((__format__ (__printf__, f, a)))
-# else
-# define PRINTF_STYLE(f, a)
-# endif
-#endif
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* A description of a particular option. A pointer to an array of
- these is passed in the OPTIONS field of an argp structure. Each option
- entry can correspond to one long option and/or one short option; more
- names for the same option can be added by following an entry in an option
- array with options having the OPTION_ALIAS flag set. */
-struct argp_option
-{
- /* The long option name. For more than one name for the same option, you
- can use following options with the OPTION_ALIAS flag set. */
- __const char *name;
-
- /* What key is returned for this option. If > 0 and printable, then it's
- also accepted as a short option. */
- int key;
-
- /* If non-NULL, this is the name of the argument associated with this
- option, which is required unless the OPTION_ARG_OPTIONAL flag is set. */
- __const char *arg;
-
- /* OPTION_ flags. */
- int flags;
-
- /* The doc string for this option. If both NAME and KEY are 0, This string
- will be printed outdented from the normal option column, making it
- useful as a group header (it will be the first thing printed in its
- group); in this usage, it's conventional to end the string with a `:'. */
- __const char *doc;
-
- /* The group this option is in. In a long help message, options are sorted
- alphabetically within each group, and the groups presented in the order
- 0, 1, 2, ..., n, -m, ..., -2, -1. Every entry in an options array with
- if this field 0 will inherit the group number of the previous entry, or
- zero if it's the first one, unless its a group header (NAME and KEY both
- 0), in which case, the previous entry + 1 is the default. Automagic
- options such as --help are put into group -1. */
- int group;
-};
-
-/* The argument associated with this option is optional. */
-#define OPTION_ARG_OPTIONAL 0x1
-
-/* This option isn't displayed in any help messages. */
-#define OPTION_HIDDEN 0x2
-
-/* This option is an alias for the closest previous non-alias option. This
- means that it will be displayed in the same help entry, and will inherit
- fields other than NAME and KEY from the aliased option. */
-#define OPTION_ALIAS 0x4
-
-/* This option isn't actually an option (and so should be ignored by the
- actual option parser), but rather an arbitrary piece of documentation that
- should be displayed in much the same manner as the options. If this flag
- is set, then the option NAME field is displayed unmodified (e.g., no `--'
- prefix is added) at the left-margin (where a *short* option would normally
- be displayed), and the documentation string in the normal place. For
- purposes of sorting, any leading whitespace and puncuation is ignored,
- except that if the first non-whitespace character is not `-', this entry
- is displayed after all options (and OPTION_DOC entries with a leading `-')
- in the same group. */
-#define OPTION_DOC 0x8
-
-/* This option shouldn't be included in `long' usage messages (but is still
- included in help messages). This is mainly intended for options that are
- completely documented in an argp's ARGS_DOC field, in which case including
- the option in the generic usage list would be redundant. For instance,
- if ARGS_DOC is "FOO BAR\n-x BLAH", and the `-x' option's purpose is to
- distinguish these two cases, -x should probably be marked
- OPTION_NO_USAGE. */
-#define OPTION_NO_USAGE 0x10
-
-struct argp; /* fwd declare this type */
-struct argp_state; /* " */
-struct argp_child; /* " */
-
-/* The type of a pointer to an argp parsing function. */
-typedef error_t (*argp_parser_t) (int key, char *arg,
- struct argp_state *state);
-
-/* What to return for unrecognized keys. For special ARGP_KEY_ keys, such
- returns will simply be ignored. For user keys, this error will be turned
- into EINVAL (if the call to argp_parse is such that errors are propagated
- back to the user instead of exiting); returning EINVAL itself would result
- in an immediate stop to parsing in *all* cases. */
-#define ARGP_ERR_UNKNOWN E2BIG /* Hurd should never need E2BIG. XXX */
-
-/* Special values for the KEY argument to an argument parsing function.
- ARGP_ERR_UNKNOWN should be returned if they aren't understood.
-
- The sequence of keys to a parsing function is either (where each
- uppercased word should be prefixed by `ARGP_KEY_' and opt is a user key):
-
- INIT opt... NO_ARGS END SUCCESS -- No non-option arguments at all
- or INIT (opt | ARG)... END SUCCESS -- All non-option args parsed
- or INIT (opt | ARG)... SUCCESS -- Some non-option arg unrecognized
-
- The third case is where every parser returned ARGP_KEY_UNKNOWN for an
- argument, in which case parsing stops at that argument (returning the
- unparsed arguments to the caller of argp_parse if requested, or stopping
- with an error message if not).
-
- If an error occurs (either detected by argp, or because the parsing
- function returned an error value), then the parser is called with
- ARGP_KEY_ERROR, and no further calls are made. */
-
-/* This is not an option at all, but rather a command line argument. If a
- parser receiving this key returns success, the fact is recorded, and the
- ARGP_KEY_NO_ARGS case won't be used. HOWEVER, if while processing the
- argument, a parser function decrements the NEXT field of the state it's
- passed, the option won't be considered processed; this is to allow you to
- actually modify the argument (perhaps into an option), and have it
- processed again. */
-#define ARGP_KEY_ARG 0
-/* There are remaining arguments not parsed by any parser, which may be found
- starting at (STATE->argv + STATE->next). If success is returned, but
- STATE->next left untouched, it's assumed that all arguments were consume,
- otherwise, the parser should adjust STATE->next to reflect any arguments
- consumed. */
-#define ARGP_KEY_ARGS 0x1000006
-/* There are no more command line arguments at all. */
-#define ARGP_KEY_END 0x1000001
-/* Because it's common to want to do some special processing if there aren't
- any non-option args, user parsers are called with this key if they didn't
- successfully process any non-option arguments. Called just before
- ARGP_KEY_END (where more general validity checks on previously parsed
- arguments can take place). */
-#define ARGP_KEY_NO_ARGS 0x1000002
-/* Passed in before any parsing is done. Afterwards, the values of each
- element of the CHILD_INPUT field, if any, in the state structure is
- copied to each child's state to be the initial value of the INPUT field. */
-#define ARGP_KEY_INIT 0x1000003
-/* Use after all other keys, including SUCCESS & END. */
-#define ARGP_KEY_FINI 0x1000007
-/* Passed in when parsing has successfully been completed (even if there are
- still arguments remaining). */
-#define ARGP_KEY_SUCCESS 0x1000004
-/* Passed in if an error occurs. */
-#define ARGP_KEY_ERROR 0x1000005
-
-/* An argp structure contains a set of options declarations, a function to
- deal with parsing one, documentation string, a possible vector of child
- argp's, and perhaps a function to filter help output. When actually
- parsing options, getopt is called with the union of all the argp
- structures chained together through their CHILD pointers, with conflicts
- being resolved in favor of the first occurrence in the chain. */
-struct argp
-{
- /* An array of argp_option structures, terminated by an entry with both
- NAME and KEY having a value of 0. */
- __const struct argp_option *options;
-
- /* What to do with an option from this structure. KEY is the key
- associated with the option, and ARG is any associated argument (NULL if
- none was supplied). If KEY isn't understood, ARGP_ERR_UNKNOWN should be
- returned. If a non-zero, non-ARGP_ERR_UNKNOWN value is returned, then
- parsing is stopped immediately, and that value is returned from
- argp_parse(). For special (non-user-supplied) values of KEY, see the
- ARGP_KEY_ definitions below. */
- argp_parser_t parser;
-
- /* A string describing what other arguments are wanted by this program. It
- is only used by argp_usage to print the `Usage:' message. If it
- contains newlines, the strings separated by them are considered
- alternative usage patterns, and printed on separate lines (lines after
- the first are prefix by ` or: ' instead of `Usage:'). */
- __const char *args_doc;
-
- /* If non-NULL, a string containing extra text to be printed before and
- after the options in a long help message (separated by a vertical tab
- `\v' character). */
- __const char *doc;
-
- /* A vector of argp_children structures, terminated by a member with a 0
- argp field, pointing to child argps should be parsed with this one. Any
- conflicts are resolved in favor of this argp, or early argps in the
- CHILDREN list. This field is useful if you use libraries that supply
- their own argp structure, which you want to use in conjunction with your
- own. */
- __const struct argp_child *children;
-
- /* If non-zero, this should be a function to filter the output of help
- messages. KEY is either a key from an option, in which case TEXT is
- that option's help text, or a special key from the ARGP_KEY_HELP_
- defines, below, describing which other help text TEXT is. The function
- should return either TEXT, if it should be used as-is, a replacement
- string, which should be malloced, and will be freed by argp, or NULL,
- meaning `print nothing'. The value for TEXT is *after* any translation
- has been done, so if any of the replacement text also needs translation,
- that should be done by the filter function. INPUT is either the input
- supplied to argp_parse, or NULL, if argp_help was called directly. */
- char *(*help_filter) (int __key, __const char *__text, void *__input);
-
- /* If non-zero the strings used in the argp library are translated using
- the domain described by this string. Otherwise the currently installed
- default domain is used. */
- const char *argp_domain;
-};
-
-/* Possible KEY arguments to a help filter function. */
-#define ARGP_KEY_HELP_PRE_DOC 0x2000001 /* Help text preceeding options. */
-#define ARGP_KEY_HELP_POST_DOC 0x2000002 /* Help text following options. */
-#define ARGP_KEY_HELP_HEADER 0x2000003 /* Option header string. */
-#define ARGP_KEY_HELP_EXTRA 0x2000004 /* After all other documentation;
- TEXT is NULL for this key. */
-/* Explanatory note emitted when duplicate option arguments have been
- suppressed. */
-#define ARGP_KEY_HELP_DUP_ARGS_NOTE 0x2000005
-#define ARGP_KEY_HELP_ARGS_DOC 0x2000006 /* Argument doc string. */
-
-/* When an argp has a non-zero CHILDREN field, it should point to a vector of
- argp_child structures, each of which describes a subsidiary argp. */
-struct argp_child
-{
- /* The child parser. */
- __const struct argp *argp;
-
- /* Flags for this child. */
- int flags;
-
- /* If non-zero, an optional header to be printed in help output before the
- child options. As a side-effect, a non-zero value forces the child
- options to be grouped together; to achieve this effect without actually
- printing a header string, use a value of "". */
- __const char *header;
-
- /* Where to group the child options relative to the other (`consolidated')
- options in the parent argp; the values are the same as the GROUP field
- in argp_option structs, but all child-groupings follow parent options at
- a particular group level. If both this field and HEADER are zero, then
- they aren't grouped at all, but rather merged with the parent options
- (merging the child's grouping levels with the parents). */
- int group;
-};
-
-/* Parsing state. This is provided to parsing functions called by argp,
- which may examine and, as noted, modify fields. */
-struct argp_state
-{
- /* The top level ARGP being parsed. */
- __const struct argp *root_argp;
-
- /* The argument vector being parsed. May be modified. */
- int argc;
- char **argv;
-
- /* The index in ARGV of the next arg that to be parsed. May be modified. */
- int next;
-
- /* The flags supplied to argp_parse. May be modified. */
- unsigned flags;
-
- /* While calling a parsing function with a key of ARGP_KEY_ARG, this is the
- number of the current arg, starting at zero, and incremented after each
- such call returns. At all other times, this is the number of such
- arguments that have been processed. */
- unsigned arg_num;
-
- /* If non-zero, the index in ARGV of the first argument following a special
- `--' argument (which prevents anything following being interpreted as an
- option). Only set once argument parsing has proceeded past this point. */
- int quoted;
-
- /* An arbitrary pointer passed in from the user. */
- void *input;
- /* Values to pass to child parsers. This vector will be the same length as
- the number of children for the current parser. */
- void **child_inputs;
-
- /* For the parser's use. Initialized to 0. */
- void *hook;
-
- /* The name used when printing messages. This is initialized to ARGV[0],
- or PROGRAM_INVOCATION_NAME if that is unavailable. */
- char *name;
-
- /* Streams used when argp prints something. */
- FILE *err_stream; /* For errors; initialized to stderr. */
- FILE *out_stream; /* For information; initialized to stdout. */
-
- void *pstate; /* Private, for use by argp. */
-};
-
-/* Flags for argp_parse (note that the defaults are those that are
- convenient for program command line parsing): */
-
-/* Don't ignore the first element of ARGV. Normally (and always unless
- ARGP_NO_ERRS is set) the first element of the argument vector is
- skipped for option parsing purposes, as it corresponds to the program name
- in a command line. */
-#define ARGP_PARSE_ARGV0 0x01
-
-/* Don't print error messages for unknown options to stderr; unless this flag
- is set, ARGP_PARSE_ARGV0 is ignored, as ARGV[0] is used as the program
- name in the error messages. This flag implies ARGP_NO_EXIT (on the
- assumption that silent exiting upon errors is bad behaviour). */
-#define ARGP_NO_ERRS 0x02
-
-/* Don't parse any non-option args. Normally non-option args are parsed by
- calling the parse functions with a key of ARGP_KEY_ARG, and the actual arg
- as the value. Since it's impossible to know which parse function wants to
- handle it, each one is called in turn, until one returns 0 or an error
- other than ARGP_ERR_UNKNOWN; if an argument is handled by no one, the
- argp_parse returns prematurely (but with a return value of 0). If all
- args have been parsed without error, all parsing functions are called one
- last time with a key of ARGP_KEY_END. This flag needn't normally be set,
- as the normal behavior is to stop parsing as soon as some argument can't
- be handled. */
-#define ARGP_NO_ARGS 0x04
-
-/* Parse options and arguments in the same order they occur on the command
- line -- normally they're rearranged so that all options come first. */
-#define ARGP_IN_ORDER 0x08
-
-/* Don't provide the standard long option --help, which causes usage and
- option help information to be output to stdout, and exit (0) called. */
-#define ARGP_NO_HELP 0x10
-
-/* Don't exit on errors (they may still result in error messages). */
-#define ARGP_NO_EXIT 0x20
-
-/* Use the gnu getopt `long-only' rules for parsing arguments. */
-#define ARGP_LONG_ONLY 0x40
-
-/* Turns off any message-printing/exiting options. */
-#define ARGP_SILENT (ARGP_NO_EXIT | ARGP_NO_ERRS | ARGP_NO_HELP)
-
-/* Parse the options strings in ARGC & ARGV according to the options in ARGP.
- FLAGS is one of the ARGP_ flags above. If ARG_INDEX is non-NULL, the
- index in ARGV of the first unparsed option is returned in it. If an
- unknown option is present, ARGP_ERR_UNKNOWN is returned; if some parser
- routine returned a non-zero value, it is returned; otherwise 0 is
- returned. This function may also call exit unless the ARGP_NO_HELP flag
- is set. INPUT is a pointer to a value to be passed in to the parser. */
-extern error_t argp_parse (__const struct argp *__restrict argp,
- int argc, char **__restrict argv,
- unsigned flags, int *__restrict arg_index,
- void *__restrict input) __THROW;
-extern error_t __argp_parse (__const struct argp *__restrict argp,
- int argc, char **__restrict argv,
- unsigned flags, int *__restrict arg_index,
- void *__restrict input) __THROW;
-
-/* Global variables. */
-
-/* If defined or set by the user program to a non-zero value, then a default
- option --version is added (unless the ARGP_NO_HELP flag is used), which
- will print this string followed by a newline and exit (unless the
- ARGP_NO_EXIT flag is used). Overridden by ARGP_PROGRAM_VERSION_HOOK. */
-extern __const char *argp_program_version;
-
-/* If defined or set by the user program to a non-zero value, then a default
- option --version is added (unless the ARGP_NO_HELP flag is used), which
- calls this function with a stream to print the version to and a pointer to
- the current parsing state, and then exits (unless the ARGP_NO_EXIT flag is
- used). This variable takes precedent over ARGP_PROGRAM_VERSION. */
-extern void (*argp_program_version_hook) (FILE *__restrict __stream,
- struct argp_state *__restrict
- __state);
-
-/* If defined or set by the user program, it should point to string that is
- the bug-reporting address for the program. It will be printed by
- argp_help if the ARGP_HELP_BUG_ADDR flag is set (as it is by various
- standard help messages), embedded in a sentence that says something like
- `Report bugs to ADDR.'. */
-extern __const char *argp_program_bug_address;
-
-/* The exit status that argp will use when exiting due to a parsing error.
- If not defined or set by the user program, this defaults to EX_USAGE from
- <sysexits.h>. */
-extern error_t argp_err_exit_status;
-
-/* Flags for argp_help. */
-#define ARGP_HELP_USAGE 0x01 /* a Usage: message. */
-#define ARGP_HELP_SHORT_USAGE 0x02 /* " but don't actually print options. */
-#define ARGP_HELP_SEE 0x04 /* a `Try ... for more help' message. */
-#define ARGP_HELP_LONG 0x08 /* a long help message. */
-#define ARGP_HELP_PRE_DOC 0x10 /* doc string preceding long help. */
-#define ARGP_HELP_POST_DOC 0x20 /* doc string following long help. */
-#define ARGP_HELP_DOC (ARGP_HELP_PRE_DOC | ARGP_HELP_POST_DOC)
-#define ARGP_HELP_BUG_ADDR 0x40 /* bug report address */
-#define ARGP_HELP_LONG_ONLY 0x80 /* modify output appropriately to
- reflect ARGP_LONG_ONLY mode. */
-
-/* These ARGP_HELP flags are only understood by argp_state_help. */
-#define ARGP_HELP_EXIT_ERR 0x100 /* Call exit(1) instead of returning. */
-#define ARGP_HELP_EXIT_OK 0x200 /* Call exit(0) instead of returning. */
-
-/* The standard thing to do after a program command line parsing error, if an
- error message has already been printed. */
-#define ARGP_HELP_STD_ERR \
- (ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR)
-/* The standard thing to do after a program command line parsing error, if no
- more specific error message has been printed. */
-#define ARGP_HELP_STD_USAGE \
- (ARGP_HELP_SHORT_USAGE | ARGP_HELP_SEE | ARGP_HELP_EXIT_ERR)
-/* The standard thing to do in response to a --help option. */
-#define ARGP_HELP_STD_HELP \
- (ARGP_HELP_SHORT_USAGE | ARGP_HELP_LONG | ARGP_HELP_EXIT_OK \
- | ARGP_HELP_DOC | ARGP_HELP_BUG_ADDR)
-
-/* Output a usage message for ARGP to STREAM. FLAGS are from the set
- ARGP_HELP_*. */
-extern void argp_help (__const struct argp *__restrict __argp,
- FILE *__restrict __stream,
- unsigned __flags, char *__restrict __name) __THROW;
-extern void __argp_help (__const struct argp *__restrict __argp,
- FILE *__restrict __stream, unsigned __flags,
- char *__name) __THROW;
-
-/* The following routines are intended to be called from within an argp
- parsing routine (thus taking an argp_state structure as the first
- argument). They may or may not print an error message and exit, depending
- on the flags in STATE -- in any case, the caller should be prepared for
- them *not* to exit, and should return an appropiate error after calling
- them. [argp_usage & argp_error should probably be called argp_state_...,
- but they're used often enough that they should be short] */
-
-/* Output, if appropriate, a usage message for STATE to STREAM. FLAGS are
- from the set ARGP_HELP_*. */
-extern void argp_state_help (__const struct argp_state *__restrict __state,
- FILE *__restrict __stream,
- unsigned int __flags) __THROW;
-extern void __argp_state_help (__const struct argp_state *__restrict __state,
- FILE *__restrict __stream,
- unsigned int __flags) __THROW;
-
-/* Possibly output the standard usage message for ARGP to stderr and exit. */
-extern void argp_usage (__const struct argp_state *__state) __THROW;
-extern void __argp_usage (__const struct argp_state *__state) __THROW;
-
-/* If appropriate, print the printf string FMT and following args, preceded
- by the program name and `:', to stderr, and followed by a `Try ... --help'
- message, then exit (1). */
-extern void argp_error (__const struct argp_state *__restrict __state,
- __const char *__restrict __fmt, ...) __THROW
- PRINTF_STYLE(2,3);
-extern void __argp_error (__const struct argp_state *__restrict __state,
- __const char *__restrict __fmt, ...) __THROW
- PRINTF_STYLE(2,3);
-
-/* Similar to the standard gnu error-reporting function error(), but will
- respect the ARGP_NO_EXIT and ARGP_NO_ERRS flags in STATE, and will print
- to STATE->err_stream. This is useful for argument parsing code that is
- shared between program startup (when exiting is desired) and runtime
- option parsing (when typically an error code is returned instead). The
- difference between this function and argp_error is that the latter is for
- *parsing errors*, and the former is for other problems that occur during
- parsing but don't reflect a (syntactic) problem with the input. */
-extern void argp_failure (__const struct argp_state *__restrict __state,
- int __status, int __errnum,
- __const char *__restrict __fmt, ...) __THROW
- PRINTF_STYLE(4,5);
-extern void __argp_failure (__const struct argp_state *__restrict __state,
- int __status, int __errnum,
- __const char *__restrict __fmt, ...) __THROW
- PRINTF_STYLE(4,5);
-
-/* Returns true if the option OPT is a valid short option. */
-extern int _option_is_short (__const struct argp_option *__opt) __THROW;
-extern int __option_is_short (__const struct argp_option *__opt) __THROW;
-
-/* Returns true if the option OPT is in fact the last (unused) entry in an
- options array. */
-extern int _option_is_end (__const struct argp_option *__opt) __THROW;
-extern int __option_is_end (__const struct argp_option *__opt) __THROW;
-
-/* Return the input field for ARGP in the parser corresponding to STATE; used
- by the help routines. */
-extern void *_argp_input (__const struct argp *__restrict __argp,
- __const struct argp_state *__restrict __state)
- __THROW;
-extern void *__argp_input (__const struct argp *__restrict __argp,
- __const struct argp_state *__restrict __state)
- __THROW;
-
-/* Used for extracting the program name from argv[0] */
-extern char *_argp_basename(char *name) __THROW;
-extern char *__argp_basename(char *name) __THROW;
-
-/* Getting the program name given an argp state */
-extern char *
-_argp_short_program_name(const struct argp_state *state) __THROW;
-extern char *
-__argp_short_program_name(const struct argp_state *state) __THROW;
-
-
-#ifdef __USE_EXTERN_INLINES
-
-# if !_LIBC
-# define __argp_usage argp_usage
-# define __argp_state_help argp_state_help
-# define __option_is_short _option_is_short
-# define __option_is_end _option_is_end
-# endif
-
-# ifndef ARGP_EI
-# define ARGP_EI extern __inline__
-# endif
-
-ARGP_EI void
-__argp_usage (__const struct argp_state *__state)
-{
- __argp_state_help (__state, stderr, ARGP_HELP_STD_USAGE);
-}
-
-ARGP_EI int
-__option_is_short (__const struct argp_option *__opt)
-{
- if (__opt->flags & OPTION_DOC)
- return 0;
- else
- {
- int __key = __opt->key;
- return __key > 0 && isprint (__key);
- }
-}
-
-ARGP_EI int
-__option_is_end (__const struct argp_option *__opt)
-{
- return !__opt->key && !__opt->name && !__opt->doc && !__opt->group;
-}
-
-# if !_LIBC
-# undef __argp_usage
-# undef __argp_state_help
-# undef __option_is_short
-# undef __option_is_end
-# endif
-#endif /* Use extern inlines. */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* argp.h */
diff --git a/argp-standalone/autogen.sh b/argp-standalone/autogen.sh
deleted file mode 100755
index 8337353b5ae..00000000000
--- a/argp-standalone/autogen.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/sh
-
-aclocal -I .
-autoheader
-autoconf
-automake --add-missing --copy --foreign
diff --git a/argp-standalone/configure.ac b/argp-standalone/configure.ac
deleted file mode 100644
index 65ebc451815..00000000000
--- a/argp-standalone/configure.ac
+++ /dev/null
@@ -1,102 +0,0 @@
-dnl Process this file with autoconf to produce a configure script.
-
-dnl This configure.ac is only for building a standalone argp library.
-AC_INIT([argp], [standalone-1.3])
-AC_PREREQ(2.54)
-AC_CONFIG_SRCDIR([argp-ba.c])
-# Needed to stop autoconf from looking for files in parent directories.
-AC_CONFIG_AUX_DIR([.])
-
-AM_INIT_AUTOMAKE
-AM_CONFIG_HEADER(config.h)
-
-m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)])
-
-# GNU libc defaults to supplying the ISO C library functions only. The
-# _GNU_SOURCE define enables these extensions, in particular we want
-# errno.h to declare program_invocation_name. Enable it on all
-# systems; no problems have been reported with it so far.
-AC_GNU_SOURCE
-
-# Checks for programs.
-AC_PROG_CC
-AC_PROG_MAKE_SET
-AC_PROG_RANLIB
-AM_PROG_CC_STDC
-
-if test "x$am_cv_prog_cc_stdc" = xno ; then
- AC_ERROR([the C compiler doesn't handle ANSI-C])
-fi
-
-# Checks for libraries.
-
-# Checks for header files.
-AC_HEADER_STDC
-AC_CHECK_HEADERS(limits.h malloc.h unistd.h sysexits.h stdarg.h)
-
-# Checks for typedefs, structures, and compiler characteristics.
-AC_C_CONST
-AC_C_INLINE
-AC_TYPE_SIZE_T
-
-LSH_GCC_ATTRIBUTES
-
-# Checks for library functions.
-AC_FUNC_ALLOCA
-AC_FUNC_VPRINTF
-AC_CHECK_FUNCS(strerror sleep getpid snprintf)
-
-AC_REPLACE_FUNCS(mempcpy strndup strchrnul strcasecmp vsnprintf)
-
-dnl ARGP_CHECK_FUNC(includes, function-call [, if-found [, if-not-found]])
-AC_DEFUN([ARGP_CHECK_FUNC],
- [AS_VAR_PUSHDEF([ac_func], m4_substr([$2], 0, m4_index([$2], [(])))
- AS_VAR_PUSHDEF([ac_var], [ac_cv_func_call_]ac_func)
- AH_TEMPLATE(AS_TR_CPP(HAVE_[]ac_func),
- [Define to 1 if you have the `]ac_func[' function.])
- AC_CACHE_CHECK([for $2], ac_var,
- [AC_TRY_LINK([$1], [$2],
- [AS_VAR_SET(ac_var, yes)],
- [AS_VAR_SET(ac_var, no)])])
- if test AS_VAR_GET(ac_var) = yes ; then
- ifelse([$3],,
- [AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_[]ac_func))],
- [$3
-])
- else
- ifelse([$4],, true, [$4])
- fi
- AS_VAR_POPDEF([ac_var])
- AS_VAR_POPDEF([ac_func])
- ])
-
-# At least on freebsd, putc_unlocked is a macro, so the standard
-# AC_CHECK_FUNCS doesn't work well.
-ARGP_CHECK_FUNC([#include <stdio.h>], [putc_unlocked('x', stdout)])
-
-AC_CHECK_FUNCS(flockfile)
-AC_CHECK_FUNCS(fputs_unlocked fwrite_unlocked)
-
-# Used only by argp-test.c, so don't use AC_REPLACE_FUNCS.
-AC_CHECK_FUNCS(strdup asprintf)
-
-AC_CHECK_DECLS([program_invocation_name, program_invocation_short_name],
- [], [], [[#include <errno.h>]])
-
-# Set these flags *last*, or else the test programs won't compile
-if test x$GCC = xyes ; then
- # Using -ggdb3 makes (some versions of) Redhat's gcc-2.96 dump core
- if "$CC" --version | grep '^2\.96$' 1>/dev/null 2>&1; then
- true
- else
- CFLAGS="$CFLAGS -ggdb3"
- fi
- CFLAGS="$CFLAGS -Wall -W \
- -Wmissing-prototypes -Wmissing-declarations -Wstrict-prototypes \
- -Waggregate-return \
- -Wpointer-arith -Wbad-function-cast -Wnested-externs"
-fi
-
-CPPFLAGS="$CPPFLAGS -I$srcdir"
-
-AC_OUTPUT(Makefile)
diff --git a/argp-standalone/mempcpy.c b/argp-standalone/mempcpy.c
deleted file mode 100644
index 21d8bd2ed94..00000000000
--- a/argp-standalone/mempcpy.c
+++ /dev/null
@@ -1,21 +0,0 @@
-/* strndup.c
- *
- */
-
-/* Written by Niels Möller <nisse@lysator.liu.se>
- *
- * This file is hereby placed in the public domain.
- */
-
-#include <string.h>
-
-void *
-mempcpy (void *, const void *, size_t) ;
-
-void *
-mempcpy (void *to, const void *from, size_t size)
-{
- memcpy(to, from, size);
- return (char *) to + size;
-}
-
diff --git a/argp-standalone/strcasecmp.c b/argp-standalone/strcasecmp.c
deleted file mode 100644
index 9c1637232fd..00000000000
--- a/argp-standalone/strcasecmp.c
+++ /dev/null
@@ -1,29 +0,0 @@
-/* strcasecmp.c
- *
- */
-
-/* Written by Niels Möller <nisse@lysator.liu.se>
- *
- * This file is hereby placed in the public domain.
- */
-
-#include <ctype.h>
-int strcasecmp(const char *, const char *);
-
-int strcasecmp(const char *s1, const char *s2)
-{
- unsigned i;
-
- for (i = 0; s1[i] && s2[i]; i++)
- {
- unsigned char c1 = tolower( (unsigned char) s1[i]);
- unsigned char c2 = tolower( (unsigned char) s2[i]);
-
- if (c1 < c2)
- return -1;
- else if (c1 > c2)
- return 1;
- }
-
- return !s2[i] - !s1[i];
-}
diff --git a/argp-standalone/strchrnul.c b/argp-standalone/strchrnul.c
deleted file mode 100644
index ee4145e4eda..00000000000
--- a/argp-standalone/strchrnul.c
+++ /dev/null
@@ -1,23 +0,0 @@
-/* strchrnul.c
- *
- */
-
-/* Written by Niels Möller <nisse@lysator.liu.se>
- *
- * This file is hereby placed in the public domain.
- */
-
-/* FIXME: What is this function supposed to do? My guess is that it is
- * like strchr, but returns a pointer to the NUL character, not a NULL
- * pointer, if the character isn't found. */
-
-char *strchrnul(const char *, int );
-
-char *strchrnul(const char *s, int c)
-{
- const char *p = s;
- while (*p && (*p != c))
- p++;
-
- return (char *) p;
-}
diff --git a/argp-standalone/strndup.c b/argp-standalone/strndup.c
deleted file mode 100644
index 4147b7a2051..00000000000
--- a/argp-standalone/strndup.c
+++ /dev/null
@@ -1,34 +0,0 @@
-/* strndup.c
- *
- */
-
-/* Written by Niels Möller <nisse@lysator.liu.se>
- *
- * This file is hereby placed in the public domain.
- */
-
-#include <stdlib.h>
-#include <string.h>
-
-char *
-strndup (const char *, size_t);
-
-char *
-strndup (const char *s, size_t size)
-{
- char *r;
- char *end = memchr(s, 0, size);
-
- if (end)
- /* Length + 1 */
- size = end - s + 1;
-
- r = malloc(size);
-
- if (size)
- {
- memcpy(r, s, size-1);
- r[size-1] = '\0';
- }
- return r;
-}
diff --git a/argp-standalone/vsnprintf.c b/argp-standalone/vsnprintf.c
deleted file mode 100644
index 33c9a5d0042..00000000000
--- a/argp-standalone/vsnprintf.c
+++ /dev/null
@@ -1,839 +0,0 @@
-/* Copied from http://www.fiction.net/blong/programs/snprintf.c */
-
-/*
- * Copyright Patrick Powell 1995
- * This code is based on code written by Patrick Powell (papowell@astart.com)
- * It may be used for any purpose as long as this notice remains intact
- * on all source code distributions
- */
-
-/**************************************************************
- * Original:
- * Patrick Powell Tue Apr 11 09:48:21 PDT 1995
- * A bombproof version of doprnt (dopr) included.
- * Sigh. This sort of thing is always nasty do deal with. Note that
- * the version here does not include floating point...
- *
- * snprintf() is used instead of sprintf() as it does limit checks
- * for string length. This covers a nasty loophole.
- *
- * The other functions are there to prevent NULL pointers from
- * causing nast effects.
- *
- * More Recently:
- * Brandon Long <blong@fiction.net> 9/15/96 for mutt 0.43
- * This was ugly. It is still ugly. I opted out of floating point
- * numbers, but the formatter understands just about everything
- * from the normal C string format, at least as far as I can tell from
- * the Solaris 2.5 printf(3S) man page.
- *
- * Brandon Long <blong@fiction.net> 10/22/97 for mutt 0.87.1
- * Ok, added some minimal floating point support, which means this
- * probably requires libm on most operating systems. Don't yet
- * support the exponent (e,E) and sigfig (g,G). Also, fmtint()
- * was pretty badly broken, it just wasn't being exercised in ways
- * which showed it, so that's been fixed. Also, formated the code
- * to mutt conventions, and removed dead code left over from the
- * original. Also, there is now a builtin-test, just compile with:
- * gcc -DTEST_SNPRINTF -o snprintf snprintf.c -lm
- * and run snprintf for results.
- *
- * Thomas Roessler <roessler@guug.de> 01/27/98 for mutt 0.89i
- * The PGP code was using unsigned hexadecimal formats.
- * Unfortunately, unsigned formats simply didn't work.
- *
- * Michael Elkins <me@cs.hmc.edu> 03/05/98 for mutt 0.90.8
- * The original code assumed that both snprintf() and vsnprintf() were
- * missing. Some systems only have snprintf() but not vsnprintf(), so
- * the code is now broken down under HAVE_SNPRINTF and HAVE_VSNPRINTF.
- *
- * Andrew Tridgell (tridge@samba.org) Oct 1998
- * fixed handling of %.0f
- * added test for HAVE_LONG_DOUBLE
- *
- * Russ Allbery <rra@stanford.edu> 2000-08-26
- * fixed return value to comply with C99
- * fixed handling of snprintf(NULL, ...)
- *
- * Niels Möller <nisse@lysator.liu.se> 2004-03-05
- * fixed calls to isdigit to use unsigned char.
- * fixed calls to va_arg; short arguments are always passed as int.
- *
- **************************************************************/
-
-#if HAVE_CONFIG_H
-# include "config.h"
-#endif
-
-#if !defined(HAVE_SNPRINTF) || !defined(HAVE_VSNPRINTF)
-
-#include <string.h>
-#include <ctype.h>
-#include <sys/types.h>
-
-/* Define this as a fall through, HAVE_STDARG_H is probably already set */
-
-#define HAVE_VARARGS_H
-
-
-/* varargs declarations: */
-
-#if defined(HAVE_STDARG_H)
-# include <stdarg.h>
-# define HAVE_STDARGS /* let's hope that works everywhere (mj) */
-# define VA_LOCAL_DECL va_list ap
-# define VA_START(f) va_start(ap, f)
-# define VA_SHIFT(v,t) ; /* no-op for ANSI */
-# define VA_END va_end(ap)
-#else
-# if defined(HAVE_VARARGS_H)
-# include <varargs.h>
-# undef HAVE_STDARGS
-# define VA_LOCAL_DECL va_list ap
-# define VA_START(f) va_start(ap) /* f is ignored! */
-# define VA_SHIFT(v,t) v = va_arg(ap,t)
-# define VA_END va_end(ap)
-# else
-/*XX ** NO VARARGS ** XX*/
-# endif
-#endif
-
-#ifdef HAVE_LONG_DOUBLE
-#define LDOUBLE long double
-#else
-#define LDOUBLE double
-#endif
-
-int snprintf (char *str, size_t count, const char *fmt, ...);
-int vsnprintf (char *str, size_t count, const char *fmt, va_list arg);
-
-static int dopr (char *buffer, size_t maxlen, const char *format,
- va_list args);
-static int fmtstr (char *buffer, size_t *currlen, size_t maxlen,
- char *value, int flags, int min, int max);
-static int fmtint (char *buffer, size_t *currlen, size_t maxlen,
- long value, int base, int min, int max, int flags);
-static int fmtfp (char *buffer, size_t *currlen, size_t maxlen,
- LDOUBLE fvalue, int min, int max, int flags);
-static int dopr_outch (char *buffer, size_t *currlen, size_t maxlen, char c );
-
-/*
- * dopr(): poor man's version of doprintf
- */
-
-/* format read states */
-#define DP_S_DEFAULT 0
-#define DP_S_FLAGS 1
-#define DP_S_MIN 2
-#define DP_S_DOT 3
-#define DP_S_MAX 4
-#define DP_S_MOD 5
-#define DP_S_CONV 6
-#define DP_S_DONE 7
-
-/* format flags - Bits */
-#define DP_F_MINUS (1 << 0)
-#define DP_F_PLUS (1 << 1)
-#define DP_F_SPACE (1 << 2)
-#define DP_F_NUM (1 << 3)
-#define DP_F_ZERO (1 << 4)
-#define DP_F_UP (1 << 5)
-#define DP_F_UNSIGNED (1 << 6)
-
-/* Conversion Flags */
-#define DP_C_SHORT 1
-#define DP_C_LONG 2
-#define DP_C_LDOUBLE 3
-
-#define char_to_int(p) (p - '0')
-#define MAX(p,q) ((p >= q) ? p : q)
-#define MIN(p,q) ((p <= q) ? p : q)
-
-static int dopr (char *buffer, size_t maxlen, const char *format, va_list args)
-{
- unsigned char ch;
- long value;
- LDOUBLE fvalue;
- char *strvalue;
- int min;
- int max;
- int state;
- int flags;
- int cflags;
- int total;
- size_t currlen;
-
- state = DP_S_DEFAULT;
- currlen = flags = cflags = min = 0;
- max = -1;
- ch = *format++;
- total = 0;
-
- while (state != DP_S_DONE)
- {
- if (ch == '\0')
- state = DP_S_DONE;
-
- switch(state)
- {
- case DP_S_DEFAULT:
- if (ch == '%')
- state = DP_S_FLAGS;
- else
- total += dopr_outch (buffer, &currlen, maxlen, ch);
- ch = *format++;
- break;
- case DP_S_FLAGS:
- switch (ch)
- {
- case '-':
- flags |= DP_F_MINUS;
- ch = *format++;
- break;
- case '+':
- flags |= DP_F_PLUS;
- ch = *format++;
- break;
- case ' ':
- flags |= DP_F_SPACE;
- ch = *format++;
- break;
- case '#':
- flags |= DP_F_NUM;
- ch = *format++;
- break;
- case '0':
- flags |= DP_F_ZERO;
- ch = *format++;
- break;
- default:
- state = DP_S_MIN;
- break;
- }
- break;
- case DP_S_MIN:
- if (isdigit(ch))
- {
- min = 10*min + char_to_int (ch);
- ch = *format++;
- }
- else if (ch == '*')
- {
- min = va_arg (args, int);
- ch = *format++;
- state = DP_S_DOT;
- }
- else
- state = DP_S_DOT;
- break;
- case DP_S_DOT:
- if (ch == '.')
- {
- state = DP_S_MAX;
- ch = *format++;
- }
- else
- state = DP_S_MOD;
- break;
- case DP_S_MAX:
- if (isdigit(ch))
- {
- if (max < 0)
- max = 0;
- max = 10*max + char_to_int (ch);
- ch = *format++;
- }
- else if (ch == '*')
- {
- max = va_arg (args, int);
- ch = *format++;
- state = DP_S_MOD;
- }
- else
- state = DP_S_MOD;
- break;
- case DP_S_MOD:
- /* Currently, we don't support Long Long, bummer */
- switch (ch)
- {
- case 'h':
- cflags = DP_C_SHORT;
- ch = *format++;
- break;
- case 'l':
- cflags = DP_C_LONG;
- ch = *format++;
- break;
- case 'L':
- cflags = DP_C_LDOUBLE;
- ch = *format++;
- break;
- default:
- break;
- }
- state = DP_S_CONV;
- break;
- case DP_S_CONV:
- switch (ch)
- {
- case 'd':
- case 'i':
- if (cflags == DP_C_SHORT)
- value = (short) va_arg (args, int);
- else if (cflags == DP_C_LONG)
- value = va_arg (args, long int);
- else
- value = va_arg (args, int);
- total += fmtint (buffer, &currlen, maxlen, value, 10, min, max, flags);
- break;
- case 'o':
- flags |= DP_F_UNSIGNED;
- if (cflags == DP_C_SHORT)
- value = (unsigned short) va_arg (args, unsigned);
- else if (cflags == DP_C_LONG)
- value = va_arg (args, unsigned long int);
- else
- value = va_arg (args, unsigned int);
- total += fmtint (buffer, &currlen, maxlen, value, 8, min, max, flags);
- break;
- case 'u':
- flags |= DP_F_UNSIGNED;
- if (cflags == DP_C_SHORT)
- value = (unsigned short) va_arg (args, unsigned);
- else if (cflags == DP_C_LONG)
- value = va_arg (args, unsigned long int);
- else
- value = va_arg (args, unsigned int);
- total += fmtint (buffer, &currlen, maxlen, value, 10, min, max, flags);
- break;
- case 'X':
- flags |= DP_F_UP;
- case 'x':
- flags |= DP_F_UNSIGNED;
- if (cflags == DP_C_SHORT)
- value = (unsigned short) va_arg (args, unsigned);
- else if (cflags == DP_C_LONG)
- value = va_arg (args, unsigned long int);
- else
- value = va_arg (args, unsigned int);
- total += fmtint (buffer, &currlen, maxlen, value, 16, min, max, flags);
- break;
- case 'f':
- if (cflags == DP_C_LDOUBLE)
- fvalue = va_arg (args, LDOUBLE);
- else
- fvalue = va_arg (args, double);
- /* um, floating point? */
- total += fmtfp (buffer, &currlen, maxlen, fvalue, min, max, flags);
- break;
- case 'E':
- flags |= DP_F_UP;
- case 'e':
- if (cflags == DP_C_LDOUBLE)
- fvalue = va_arg (args, LDOUBLE);
- else
- fvalue = va_arg (args, double);
- break;
- case 'G':
- flags |= DP_F_UP;
- case 'g':
- if (cflags == DP_C_LDOUBLE)
- fvalue = va_arg (args, LDOUBLE);
- else
- fvalue = va_arg (args, double);
- break;
- case 'c':
- total += dopr_outch (buffer, &currlen, maxlen, va_arg (args, int));
- break;
- case 's':
- strvalue = va_arg (args, char *);
- total += fmtstr (buffer, &currlen, maxlen, strvalue, flags, min, max);
- break;
- case 'p':
- strvalue = va_arg (args, void *);
- total += fmtint (buffer, &currlen, maxlen, (long) strvalue, 16, min,
- max, flags);
- break;
- case 'n':
- if (cflags == DP_C_SHORT)
- {
- short int *num;
- num = va_arg (args, short int *);
- *num = currlen;
- }
- else if (cflags == DP_C_LONG)
- {
- long int *num;
- num = va_arg (args, long int *);
- *num = currlen;
- }
- else
- {
- int *num;
- num = va_arg (args, int *);
- *num = currlen;
- }
- break;
- case '%':
- total += dopr_outch (buffer, &currlen, maxlen, ch);
- break;
- case 'w':
- /* not supported yet, treat as next char */
- ch = *format++;
- break;
- default:
- /* Unknown, skip */
- break;
- }
- ch = *format++;
- state = DP_S_DEFAULT;
- flags = cflags = min = 0;
- max = -1;
- break;
- case DP_S_DONE:
- break;
- default:
- /* hmm? */
- break; /* some picky compilers need this */
- }
- }
- if (buffer != NULL)
- {
- if (currlen < maxlen - 1)
- buffer[currlen] = '\0';
- else
- buffer[maxlen - 1] = '\0';
- }
- return total;
-}
-
-static int fmtstr (char *buffer, size_t *currlen, size_t maxlen,
- char *value, int flags, int min, int max)
-{
- int padlen, strln; /* amount to pad */
- int cnt = 0;
- int total = 0;
-
- if (value == 0)
- {
- value = "<NULL>";
- }
-
- for (strln = 0; value[strln]; ++strln); /* strlen */
- if (max >= 0 && max < strln)
- strln = max;
- padlen = min - strln;
- if (padlen < 0)
- padlen = 0;
- if (flags & DP_F_MINUS)
- padlen = -padlen; /* Left Justify */
-
- while (padlen > 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, ' ');
- --padlen;
- }
- while (*value && ((max < 0) || (cnt < max)))
- {
- total += dopr_outch (buffer, currlen, maxlen, *value++);
- ++cnt;
- }
- while (padlen < 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, ' ');
- ++padlen;
- }
- return total;
-}
-
-/* Have to handle DP_F_NUM (ie 0x and 0 alternates) */
-
-static int fmtint (char *buffer, size_t *currlen, size_t maxlen,
- long value, int base, int min, int max, int flags)
-{
- int signvalue = 0;
- unsigned long uvalue;
- char convert[20];
- int place = 0;
- int spadlen = 0; /* amount to space pad */
- int zpadlen = 0; /* amount to zero pad */
- int caps = 0;
- int total = 0;
-
- if (max < 0)
- max = 0;
-
- uvalue = value;
-
- if(!(flags & DP_F_UNSIGNED))
- {
- if( value < 0 ) {
- signvalue = '-';
- uvalue = -value;
- }
- else
- if (flags & DP_F_PLUS) /* Do a sign (+/i) */
- signvalue = '+';
- else
- if (flags & DP_F_SPACE)
- signvalue = ' ';
- }
-
- if (flags & DP_F_UP) caps = 1; /* Should characters be upper case? */
-
- do {
- convert[place++] =
- (caps? "0123456789ABCDEF":"0123456789abcdef")
- [uvalue % (unsigned)base ];
- uvalue = (uvalue / (unsigned)base );
- } while(uvalue && (place < 20));
- if (place == 20) place--;
- convert[place] = 0;
-
- zpadlen = max - place;
- spadlen = min - MAX (max, place) - (signvalue ? 1 : 0);
- if (zpadlen < 0) zpadlen = 0;
- if (spadlen < 0) spadlen = 0;
- if (flags & DP_F_ZERO)
- {
- zpadlen = MAX(zpadlen, spadlen);
- spadlen = 0;
- }
- if (flags & DP_F_MINUS)
- spadlen = -spadlen; /* Left Justifty */
-
-#ifdef DEBUG_SNPRINTF
- dprint (1, (debugfile, "zpad: %d, spad: %d, min: %d, max: %d, place: %d\n",
- zpadlen, spadlen, min, max, place));
-#endif
-
- /* Spaces */
- while (spadlen > 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, ' ');
- --spadlen;
- }
-
- /* Sign */
- if (signvalue)
- total += dopr_outch (buffer, currlen, maxlen, signvalue);
-
- /* Zeros */
- if (zpadlen > 0)
- {
- while (zpadlen > 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, '0');
- --zpadlen;
- }
- }
-
- /* Digits */
- while (place > 0)
- total += dopr_outch (buffer, currlen, maxlen, convert[--place]);
-
- /* Left Justified spaces */
- while (spadlen < 0) {
- total += dopr_outch (buffer, currlen, maxlen, ' ');
- ++spadlen;
- }
-
- return total;
-}
-
-static LDOUBLE abs_val (LDOUBLE value)
-{
- LDOUBLE result = value;
-
- if (value < 0)
- result = -value;
-
- return result;
-}
-
-static LDOUBLE pow10_argp (int exp)
-{
- LDOUBLE result = 1;
-
- while (exp)
- {
- result *= 10;
- exp--;
- }
-
- return result;
-}
-
-static long round_argp (LDOUBLE value)
-{
- long intpart;
-
- intpart = value;
- value = value - intpart;
- if (value >= 0.5)
- intpart++;
-
- return intpart;
-}
-
-static int fmtfp (char *buffer, size_t *currlen, size_t maxlen,
- LDOUBLE fvalue, int min, int max, int flags)
-{
- int signvalue = 0;
- LDOUBLE ufvalue;
- char iconvert[20];
- char fconvert[20];
- int iplace = 0;
- int fplace = 0;
- int padlen = 0; /* amount to pad */
- int zpadlen = 0;
- int caps = 0;
- int total = 0;
- long intpart;
- long fracpart;
-
- /*
- * AIX manpage says the default is 0, but Solaris says the default
- * is 6, and sprintf on AIX defaults to 6
- */
- if (max < 0)
- max = 6;
-
- ufvalue = abs_val (fvalue);
-
- if (fvalue < 0)
- signvalue = '-';
- else
- if (flags & DP_F_PLUS) /* Do a sign (+/i) */
- signvalue = '+';
- else
- if (flags & DP_F_SPACE)
- signvalue = ' ';
-
-#if 0
- if (flags & DP_F_UP) caps = 1; /* Should characters be upper case? */
-#endif
-
- intpart = ufvalue;
-
- /*
- * Sorry, we only support 9 digits past the decimal because of our
- * conversion method
- */
- if (max > 9)
- max = 9;
-
- /* We "cheat" by converting the fractional part to integer by
- * multiplying by a factor of 10
- */
- fracpart = round_argp ((pow10_argp (max)) * (ufvalue - intpart));
-
- if (fracpart >= pow10_argp (max))
- {
- intpart++;
- fracpart -= pow10_argp (max);
- }
-
-#ifdef DEBUG_SNPRINTF
- dprint (1, (debugfile, "fmtfp: %f =? %d.%d\n", fvalue, intpart, fracpart));
-#endif
-
- /* Convert integer part */
- do {
- iconvert[iplace++] =
- (caps? "0123456789ABCDEF":"0123456789abcdef")[intpart % 10];
- intpart = (intpart / 10);
- } while(intpart && (iplace < 20));
- if (iplace == 20) iplace--;
- iconvert[iplace] = 0;
-
- /* Convert fractional part */
- do {
- fconvert[fplace++] =
- (caps? "0123456789ABCDEF":"0123456789abcdef")[fracpart % 10];
- fracpart = (fracpart / 10);
- } while(fracpart && (fplace < 20));
- if (fplace == 20) fplace--;
- fconvert[fplace] = 0;
-
- /* -1 for decimal point, another -1 if we are printing a sign */
- padlen = min - iplace - max - 1 - ((signvalue) ? 1 : 0);
- zpadlen = max - fplace;
- if (zpadlen < 0)
- zpadlen = 0;
- if (padlen < 0)
- padlen = 0;
- if (flags & DP_F_MINUS)
- padlen = -padlen; /* Left Justifty */
-
- if ((flags & DP_F_ZERO) && (padlen > 0))
- {
- if (signvalue)
- {
- total += dopr_outch (buffer, currlen, maxlen, signvalue);
- --padlen;
- signvalue = 0;
- }
- while (padlen > 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, '0');
- --padlen;
- }
- }
- while (padlen > 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, ' ');
- --padlen;
- }
- if (signvalue)
- total += dopr_outch (buffer, currlen, maxlen, signvalue);
-
- while (iplace > 0)
- total += dopr_outch (buffer, currlen, maxlen, iconvert[--iplace]);
-
- /*
- * Decimal point. This should probably use locale to find the correct
- * char to print out.
- */
- if (max > 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, '.');
-
- while (fplace > 0)
- total += dopr_outch (buffer, currlen, maxlen, fconvert[--fplace]);
- }
-
- while (zpadlen > 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, '0');
- --zpadlen;
- }
-
- while (padlen < 0)
- {
- total += dopr_outch (buffer, currlen, maxlen, ' ');
- ++padlen;
- }
-
- return total;
-}
-
-static int dopr_outch (char *buffer, size_t *currlen, size_t maxlen, char c)
-{
- if (*currlen + 1 < maxlen)
- buffer[(*currlen)++] = c;
- return 1;
-}
-
-#ifndef HAVE_VSNPRINTF
-int vsnprintf (char *str, size_t count, const char *fmt, va_list args)
-{
- if (str != NULL)
- str[0] = 0;
- return dopr(str, count, fmt, args);
-}
-#endif /* !HAVE_VSNPRINTF */
-
-#ifndef HAVE_SNPRINTF
-/* VARARGS3 */
-#ifdef HAVE_STDARGS
-int snprintf (char *str,size_t count,const char *fmt,...)
-#else
-int snprintf (va_alist) va_dcl
-#endif
-{
-#ifndef HAVE_STDARGS
- char *str;
- size_t count;
- char *fmt;
-#endif
- VA_LOCAL_DECL;
- int total;
-
- VA_START (fmt);
- VA_SHIFT (str, char *);
- VA_SHIFT (count, size_t );
- VA_SHIFT (fmt, char *);
- total = vsnprintf(str, count, fmt, ap);
- VA_END;
- return total;
-}
-#endif /* !HAVE_SNPRINTF */
-
-#ifdef TEST_SNPRINTF
-#ifndef LONG_STRING
-#define LONG_STRING 1024
-#endif
-int main (void)
-{
- char buf1[LONG_STRING];
- char buf2[LONG_STRING];
- char *fp_fmt[] = {
- "%-1.5f",
- "%1.5f",
- "%123.9f",
- "%10.5f",
- "% 10.5f",
- "%+22.9f",
- "%+4.9f",
- "%01.3f",
- "%4f",
- "%3.1f",
- "%3.2f",
- "%.0f",
- "%.1f",
- NULL
- };
- double fp_nums[] = { -1.5, 134.21, 91340.2, 341.1234, 0203.9, 0.96, 0.996,
- 0.9996, 1.996, 4.136, 0};
- char *int_fmt[] = {
- "%-1.5d",
- "%1.5d",
- "%123.9d",
- "%5.5d",
- "%10.5d",
- "% 10.5d",
- "%+22.33d",
- "%01.3d",
- "%4d",
- NULL
- };
- long int_nums[] = { -1, 134, 91340, 341, 0203, 0};
- int x, y;
- int fail = 0;
- int num = 0;
-
- printf ("Testing snprintf format codes against system sprintf...\n");
-
- for (x = 0; fp_fmt[x] != NULL ; x++)
- for (y = 0; fp_nums[y] != 0 ; y++)
- {
- snprintf (buf1, sizeof (buf1), fp_fmt[x], fp_nums[y]);
- sprintf (buf2, fp_fmt[x], fp_nums[y]);
- if (strcmp (buf1, buf2))
- {
- printf("snprintf doesn't match Format: %s\n\tsnprintf = %s\n\tsprintf = %s\n",
- fp_fmt[x], buf1, buf2);
- fail++;
- }
- num++;
- }
-
- for (x = 0; int_fmt[x] != NULL ; x++)
- for (y = 0; int_nums[y] != 0 ; y++)
- {
- snprintf (buf1, sizeof (buf1), int_fmt[x], int_nums[y]);
- sprintf (buf2, int_fmt[x], int_nums[y]);
- if (strcmp (buf1, buf2))
- {
- printf("snprintf doesn't match Format: %s\n\tsnprintf = %s\n\tsprintf = %s\n",
- int_fmt[x], buf1, buf2);
- fail++;
- }
- num++;
- }
- printf ("%d tests failed out of %d.\n", fail, num);
-}
-#endif /* SNPRINTF_TEST */
-
-#endif /* !HAVE_SNPRINTF */
diff --git a/autogen.sh b/autogen.sh
index e20408bf2ea..c8cdc3f89fa 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -1,8 +1,92 @@
#!/bin/sh
-aclocal
-autoheader
-(libtoolize --automake --copy --force || glibtoolize --automake --copy --force)
-autoconf
-automake --add-missing --copy --foreign
-cd argp-standalone;./autogen.sh
+echo
+echo ... GlusterFS autogen ...
+echo
+
+## Check all dependencies are present
+MISSING=""
+
+# Check for aclocal
+env aclocal --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ ACLOCAL=aclocal
+else
+ MISSING="$MISSING aclocal"
+fi
+
+# Check for autoconf
+env autoconf --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ AUTOCONF=autoconf
+else
+ MISSING="$MISSING autoconf"
+fi
+
+# Check for autoheader
+env autoheader --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ AUTOHEADER=autoheader
+else
+ MISSING="$MISSING autoheader"
+fi
+
+# Check for automake
+env automake --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ AUTOMAKE=automake
+else
+ MISSING="$MISSING automake"
+fi
+
+# Check for libtoolize or glibtoolize
+env libtoolize --version > /dev/null 2>&1
+if [ $? -eq 0 ]; then
+ # libtoolize was found, so use it
+ TOOL=libtoolize
+else
+ # libtoolize wasn't found, so check for glibtoolize
+ env glibtoolize --version > /dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ TOOL=glibtoolize
+ else
+ MISSING="$MISSING libtoolize/glibtoolize"
+ fi
+fi
+
+# Check for tar
+env tar -cf /dev/null /dev/null > /dev/null 2>&1
+if [ $? -ne 0 ]; then
+ MISSING="$MISSING tar"
+fi
+
+## If dependencies are missing, warn the user and abort
+if [ "x$MISSING" != "x" ]; then
+ echo "Aborting."
+ echo
+ echo "The following build tools are missing:"
+ echo
+ for pkg in $MISSING; do
+ echo " * $pkg"
+ done
+ echo
+ echo "Please install them and try again."
+ echo
+ exit 1
+fi
+
+## Do the autogeneration
+echo Running ${ACLOCAL}...
+$ACLOCAL -I ./contrib/aclocal
+echo Running ${AUTOHEADER}...
+$AUTOHEADER
+echo Running ${TOOL}...
+$TOOL --automake --copy --force
+echo Running ${AUTOCONF}...
+$AUTOCONF
+echo Running ${AUTOMAKE}...
+$AUTOMAKE --add-missing --force-missing --copy
+
+# Instruct user on next steps
+echo
+echo "Please proceed with configuring, compiling, and installing."
diff --git a/booster/Makefile.am b/booster/Makefile.am
deleted file mode 100644
index e1c45f3051c..00000000000
--- a/booster/Makefile.am
+++ /dev/null
@@ -1 +0,0 @@
-SUBDIRS=src \ No newline at end of file
diff --git a/booster/src/Makefile.am b/booster/src/Makefile.am
deleted file mode 100644
index d7d83abf5ee..00000000000
--- a/booster/src/Makefile.am
+++ /dev/null
@@ -1,21 +0,0 @@
-ldpreload_LTLIBRARIES = libglusterfs-booster.la
-ldpreloaddir = $(libdir)/glusterfs
-noinst_HEADERS = booster_fstab.h booster-fd.h
-libglusterfs_booster_la_SOURCES = booster.c booster_stat.c booster_fstab.c booster-fd.c
-libglusterfs_booster_la_CFLAGS = -I$(top_srcdir)/libglusterfsclient/src/ -D_GNU_SOURCE -D$(GF_HOST_OS) -fPIC -Wall \
- -pthread $(GF_BOOSTER_CFLAGS) -shared -nostartfiles
-libglusterfs_booster_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \
- -I$(top_srcdir)/libglusterfsclient/src \
- -I$(top_srcdir)/libglusterfs/src -DDATADIR=\"$(localstatedir)\" \
- -DCONFDIR=\"$(sysconfdir)/glusterfs\" $(ARGP_STANDALONE_CPPFLAGS)
-
-libglusterfs_booster_la_LDFLAGS = -module -avoidversion
-libglusterfs_booster_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(top_builddir)/libglusterfsclient/src/libglusterfsclient.la
-
-CLEANFILES =
-
-uninstall-local:
- rm -f $(DESTDIR)$(ldpreloaddir)/glusterfs-booster.so
-
-install-data-hook:
- ln -sf libglusterfs-booster.so $(DESTDIR)$(ldpreloaddir)/glusterfs-booster.so
diff --git a/booster/src/booster-fd.c b/booster/src/booster-fd.c
deleted file mode 100644
index fa5b0cde2d3..00000000000
--- a/booster/src/booster-fd.c
+++ /dev/null
@@ -1,342 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-
-#include "booster-fd.h"
-#include <logging.h>
-#include <mem-pool.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <common-utils.h>
-#include <string.h>
-
-#include <assert.h>
-
-extern fd_t *
-fd_ref (fd_t *fd);
-
-extern void
-fd_unref (fd_t *fd);
-/*
- Allocate in memory chunks of power of 2 starting from 1024B
- Assumes fdtable->lock is held
- */
-static inline uint
-gf_roundup_power_of_two (uint nr)
-{
- uint result = 1;
-
- if (nr < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Negative number passed");
- return -1;
- }
-
- while (result <= nr)
- result *= 2;
-
- return result;
-}
-
-#define BOOSTER_NFDBITS (sizeof (unsigned long))
-
-#define BOOSTER_FDMASK(d) (1UL << ((d) % BOOSTER_NFDBITS))
-#define BOOSTER_FDELT(d) (d / BOOSTER_NFDBITS)
-#define BOOSTER_FD_SET(set, d) (set->fd_bits[BOOSTER_FDELT(d)] |= BOOSTER_FDMASK(d))
-#define BOOSTER_FD_CLR(set, d) (set->fd_bits[BOOSTER_FDELT(d)] &= ~BOOSTER_FDMASK(d))
-#define BOOSTER_FD_ISSET(set, d) (set->fd_bits[BOOSTER_FDELT(d)] & BOOSTER_FDMASK(d))
-
-inline int
-booster_get_close_on_exec (booster_fdtable_t *fdtable, int fd)
-{
- return BOOSTER_FD_ISSET(fdtable->close_on_exec, fd);
-}
-
-inline void
-booster_set_close_on_exec (booster_fdtable_t *fdtable, int fd)
-{
- BOOSTER_FD_SET(fdtable->close_on_exec, fd);
-}
-
-int
-booster_fdtable_expand (booster_fdtable_t *fdtable, uint nr)
-{
- fd_t **oldfds = NULL, **tmp = NULL;
- uint oldmax_fds = -1;
- uint cpy = 0;
- int32_t ret = -1, bytes = 0;
- booster_fd_set_t *oldclose_on_exec = NULL;
-
- if (fdtable == NULL || nr < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Invalid argument");
- errno = EINVAL;
- ret = -1;
- goto out;
- }
-
- nr /= (1024 / sizeof (fd_t *));
- nr = gf_roundup_power_of_two (nr + 1);
- nr *= (1024 / sizeof (fd_t *));
-
- oldfds = fdtable->fds;
- oldmax_fds = fdtable->max_fds;
- oldclose_on_exec = fdtable->close_on_exec;
-
- fdtable->fds = CALLOC (nr, sizeof (fd_t *));
- if (fdtable->fds == NULL) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Memory allocation failed");
- fdtable->fds = oldfds;
- oldfds = NULL;
- ret = -1;
- goto out;
- }
-
- fdtable->max_fds = nr;
-
- if (oldfds) {
- cpy = oldmax_fds * sizeof (fd_t *);
- memcpy (fdtable->fds, oldfds, cpy);
- }
-
- /* nr will be either less than 8 or a multiple of 8 */
- bytes = nr/8;
- bytes = bytes ? bytes : 1;
- fdtable->close_on_exec = CALLOC (bytes, 1);
- if (fdtable->close_on_exec == NULL) {
- gf_log ("booster-fd", GF_LOG_ERROR, "Memory allocation "
- "failed");
- tmp = fdtable->fds;
- fdtable->fds = oldfds;
- oldfds = tmp;
- ret = -1;
- goto out;
- }
-
- if (oldclose_on_exec != NULL) {
- bytes = oldmax_fds/8;
- cpy = bytes ? bytes : 1;
- memcpy (fdtable->close_on_exec, oldclose_on_exec, cpy);
- }
- gf_log ("booster-fd", GF_LOG_TRACE, "FD-table expanded: Old: %d,New: %d"
- , oldmax_fds, nr);
- ret = 0;
-
-out:
- FREE (oldfds);
- FREE (oldclose_on_exec);
-
- return ret;
-}
-
-booster_fdtable_t *
-booster_fdtable_alloc (void)
-{
- booster_fdtable_t *fdtable = NULL;
- int32_t ret = -1;
-
- fdtable = CALLOC (1, sizeof (*fdtable));
- GF_VALIDATE_OR_GOTO ("booster-fd", fdtable, out);
-
- LOCK_INIT (&fdtable->lock);
-
- LOCK (&fdtable->lock);
- {
- ret = booster_fdtable_expand (fdtable, 0);
- }
- UNLOCK (&fdtable->lock);
-
- if (ret == -1) {
- gf_log ("booster-fd", GF_LOG_ERROR, "FD-table allocation "
- "failed");
- FREE (fdtable);
- fdtable = NULL;
- }
-
-out:
- return fdtable;
-}
-
-fd_t **
-__booster_fdtable_get_all_fds (booster_fdtable_t *fdtable, uint *count)
-{
- fd_t **fds = NULL;
-
- if (count == NULL)
- goto out;
-
- fds = fdtable->fds;
- fdtable->fds = calloc (fdtable->max_fds, sizeof (fd_t *));
- *count = fdtable->max_fds;
-
-out:
- return fds;
-}
-
-fd_t **
-booster_fdtable_get_all_fds (booster_fdtable_t *fdtable, uint *count)
-{
- fd_t **fds = NULL;
- if (!fdtable)
- return NULL;
-
- LOCK (&fdtable->lock);
- {
- fds = __booster_fdtable_get_all_fds (fdtable, count);
- }
- UNLOCK (&fdtable->lock);
-
- return fds;
-}
-
-void
-booster_fdtable_destroy (booster_fdtable_t *fdtable)
-{
- fd_t *fd = NULL;
- fd_t **fds = NULL;
- uint fd_count = 0;
- int i = 0;
-
- if (!fdtable)
- return;
-
- LOCK (&fdtable->lock);
- {
- fds = __booster_fdtable_get_all_fds (fdtable, &fd_count);
- FREE (fdtable->fds);
- }
- UNLOCK (&fdtable->lock);
-
- if (!fds)
- goto free_table;
-
- for (i = 0; i < fd_count; i++) {
- fd = fds[i];
- if (fd != NULL)
- fd_unref (fd);
- }
- FREE (fds);
-free_table:
- LOCK_DESTROY (&fdtable->lock);
- FREE (fdtable);
-}
-
-int
-booster_fd_unused_get (booster_fdtable_t *fdtable, fd_t *fdptr, int fd)
-{
- int ret = -1;
- int error = 0;
-
- if (fdtable == NULL || fdptr == NULL || fd < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "invalid argument");
- errno = EINVAL;
- return -1;
- }
-
- gf_log ("booster-fd", GF_LOG_TRACE, "Requested fd: %d", fd);
- LOCK (&fdtable->lock);
- {
- while (fdtable->max_fds < fd) {
- error = 0;
- error = booster_fdtable_expand (fdtable,
- fdtable->max_fds + 1);
- if (error) {
- gf_log ("booster-fd", GF_LOG_ERROR,
- "Cannot expand fdtable:%s",
- strerror (error));
- goto err;
- }
- }
-
- if (!fdtable->fds[fd]) {
- fdtable->fds[fd] = fdptr;
- fd_ref (fdptr);
- ret = fd;
- } else
- gf_log ("booster-fd", GF_LOG_ERROR, "Cannot allocate fd"
- " %d (slot not empty in fdtable)", fd);
- }
-err:
- UNLOCK (&fdtable->lock);
-
- return ret;
-}
-
-void
-booster_fd_put (booster_fdtable_t *fdtable, int fd)
-{
- fd_t *fdptr = NULL;
- if (fdtable == NULL || fd < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "invalid argument");
- return;
- }
-
- gf_log ("booster-fd", GF_LOG_TRACE, "FD put: %d", fd);
- if (!(fd < fdtable->max_fds)) {
- gf_log ("booster-fd", GF_LOG_ERROR, "FD not in booster fd"
- " table");
- return;
- }
-
- LOCK (&fdtable->lock);
- {
- fdptr = fdtable->fds[fd];
- fdtable->fds[fd] = NULL;
- }
- UNLOCK (&fdtable->lock);
-
- if (fdptr)
- fd_unref (fdptr);
-}
-
-fd_t *
-booster_fdptr_get (booster_fdtable_t *fdtable, int fd)
-{
- fd_t *fdptr = NULL;
-
- if (fdtable == NULL || fd < 0) {
- gf_log ("booster-fd", GF_LOG_ERROR, "invalid argument");
- errno = EINVAL;
- return NULL;
- }
-
- gf_log ("booster-fd", GF_LOG_TRACE, "FD ptr request: %d", fd);
- if (!(fd < fdtable->max_fds)) {
- gf_log ("booster-fd", GF_LOG_ERROR, "FD not in booster fd"
- " table");
- errno = EINVAL;
- return NULL;
- }
-
- LOCK (&fdtable->lock);
- {
- fdptr = fdtable->fds[fd];
- if (fdptr)
- fd_ref (fdptr);
- }
- UNLOCK (&fdtable->lock);
-
- return fdptr;
-}
-
-void
-booster_fdptr_put (fd_t *booster_fd)
-{
- if (booster_fd)
- fd_unref (booster_fd);
-}
diff --git a/booster/src/booster-fd.h b/booster/src/booster-fd.h
deleted file mode 100644
index 595a112bd8d..00000000000
--- a/booster/src/booster-fd.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- Copyright (c) 2010 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _BOOSTER_FD_H
-#define _BOOSTER_FD_H
-
-#include <libglusterfsclient.h>
-#include <locking.h>
-#include <list.h>
-
-/* This struct must be updated if the fd_t in fd.h changes.
- * We cannot include those headers here because unistd.h, included
- * by glusterfs headers, conflicts with the syscall prototypes we
- * define for booster.
- */
-struct _fd {
- pid_t pid;
- int32_t flags;
- int32_t refcount;
- struct list_head inode_list;
- struct _inode *inode;
- struct _dict *ctx;
- gf_lock_t lock; /* used ONLY for manipulating
- 'struct _fd_ctx' array (_ctx).*/
- struct _fd_ctx *_ctx;
-};
-typedef struct _fd fd_t;
-
-struct _booster_fd_set {
- unsigned long fd_bits[0];
-};
-typedef struct _booster_fd_set booster_fd_set_t;
-
-struct _booster_fdtable {
- booster_fd_set_t *close_on_exec;
- int refcount;
- unsigned int max_fds;
- gf_lock_t lock;
- fd_t **fds;
-};
-typedef struct _booster_fdtable booster_fdtable_t;
-
-void
-booster_set_close_on_exec (booster_fdtable_t *fdtable, int fd);
-
-int
-booster_get_close_on_exec (booster_fdtable_t *fdtable, int fd);
-
-extern int
-booster_fd_unused_get (booster_fdtable_t *fdtable, fd_t *fdptr, int fd);
-
-extern void
-booster_fd_put (booster_fdtable_t *fdtable, int fd);
-
-extern fd_t *
-booster_fdptr_get (booster_fdtable_t *fdtable, int fd);
-
-extern void
-booster_fdptr_put (fd_t *fdptr);
-
-extern void
-booster_fdtable_destroy (booster_fdtable_t *fdtable);
-
-booster_fdtable_t *
-booster_fdtable_alloc (void);
-
-#endif /* #ifndef _BOOSTER_FD_H */
diff --git a/booster/src/booster.c b/booster/src/booster.c
deleted file mode 100644
index c34ec114645..00000000000
--- a/booster/src/booster.c
+++ /dev/null
@@ -1,3172 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <dlfcn.h>
-#include <sys/types.h>
-#include <sys/uio.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <inttypes.h>
-#include <libglusterfsclient.h>
-#include <list.h>
-#include <pthread.h>
-#include <sys/xattr.h>
-#include <string.h>
-#include <assert.h>
-#include <errno.h>
-#include <ctype.h>
-#include <logging.h>
-#include <utime.h>
-#include <dirent.h>
-#include <sys/statfs.h>
-#include <sys/statvfs.h>
-#include <fcntl.h>
-#include "booster-fd.h"
-
-#ifndef GF_UNIT_KB
-#define GF_UNIT_KB 1024
-#endif
-
-static pthread_mutex_t cwdlock = PTHREAD_MUTEX_INITIALIZER;
-
-/* attr constructor registers this function with libc's
- * _init function as a function that must be called before
- * the main() of the program.
- */
-static void booster_lib_init (void) __attribute__((constructor));
-
-extern fd_t *
-fd_ref (fd_t *fd);
-
-extern void
-fd_unref (fd_t *fd);
-
-extern int pipe (int filedes[2]);
-/* We define these flags so that we can remove fcntl.h from the include path.
- * fcntl.h has certain defines and other lines of code that redirect the
- * application's open and open64 calls to the syscalls defined by
- * libc, for us, thats not a Good Thing (TM).
- */
-#ifndef GF_O_CREAT
-#define GF_O_CREAT 0x40
-#endif
-
-#ifndef GF_O_TRUNC
-#define GF_O_TRUNC 0x200
-#endif
-
-#ifndef GF_O_RDWR
-#define GF_O_RDWR 0x2
-#endif
-
-#ifndef GF_O_WRONLY
-#define GF_O_WRONLY 0x1
-#endif
-
-#ifndef UNIX_PATH_MAX
-#define UNIX_PATH_MAX 108
-#endif
-
-typedef enum {
- BOOSTER_OPEN,
- BOOSTER_CREAT
-} booster_op_t;
-
-struct _inode;
-struct _dict;
-
-ssize_t
-write (int fd, const void *buf, size_t count);
-
-/* open, open64, creat */
-static int (*real_open) (const char *pathname, int flags, ...);
-static int (*real_open64) (const char *pathname, int flags, ...);
-static int (*real_creat) (const char *pathname, mode_t mode);
-static int (*real_creat64) (const char *pathname, mode_t mode);
-
-/* read, readv, pread, pread64 */
-static ssize_t (*real_read) (int fd, void *buf, size_t count);
-static ssize_t (*real_readv) (int fd, const struct iovec *vector, int count);
-static ssize_t (*real_pread) (int fd, void *buf, size_t count,
- unsigned long offset);
-static ssize_t (*real_pread64) (int fd, void *buf, size_t count,
- uint64_t offset);
-
-/* write, writev, pwrite, pwrite64 */
-static ssize_t (*real_write) (int fd, const void *buf, size_t count);
-static ssize_t (*real_writev) (int fd, const struct iovec *vector, int count);
-static ssize_t (*real_pwrite) (int fd, const void *buf, size_t count,
- unsigned long offset);
-static ssize_t (*real_pwrite64) (int fd, const void *buf, size_t count,
- uint64_t offset);
-
-/* lseek, llseek, lseek64 */
-static off_t (*real_lseek) (int fildes, unsigned long offset, int whence);
-static off_t (*real_lseek64) (int fildes, uint64_t offset, int whence);
-
-/* close */
-static int (*real_close) (int fd);
-
-/* dup dup2 */
-static int (*real_dup) (int fd);
-static int (*real_dup2) (int oldfd, int newfd);
-
-static pid_t (*real_fork) (void);
-static int (*real_mkdir) (const char *pathname, mode_t mode);
-static int (*real_rmdir) (const char *pathname);
-static int (*real_chmod) (const char *pathname, mode_t mode);
-static int (*real_chown) (const char *pathname, uid_t owner, gid_t group);
-static int (*real_fchmod) (int fd, mode_t mode);
-static int (*real_fchown) (int fd, uid_t, gid_t gid);
-static int (*real_fsync) (int fd);
-static int (*real_ftruncate) (int fd, off_t length);
-static int (*real_ftruncate64) (int fd, loff_t length);
-static int (*real_link) (const char *oldpath, const char *newname);
-static int (*real_rename) (const char *oldpath, const char *newpath);
-static int (*real_utimes) (const char *path, const struct timeval times[2]);
-static int (*real_utime) (const char *path, const struct utimbuf *buf);
-static int (*real_mknod) (const char *path, mode_t mode, dev_t dev);
-static int (*real_mkfifo) (const char *path, mode_t mode);
-static int (*real_unlink) (const char *path);
-static int (*real_symlink) (const char *oldpath, const char *newpath);
-static int (*real_readlink) (const char *path, char *buf, size_t bufsize);
-static char * (*real_realpath) (const char *path, char *resolved);
-static DIR * (*real_opendir) (const char *path);
-static struct dirent * (*real_readdir) (DIR *dir);
-static struct dirent64 * (*real_readdir64) (DIR *dir);
-static int (*real_readdir_r) (DIR *dir, struct dirent *entry,
- struct dirent **result);
-static int (*real_readdir64_r) (DIR *dir, struct dirent64 *entry,
- struct dirent64 **result);
-static int (*real_closedir) (DIR *dh);
-static int (*real___xstat) (int ver, const char *path, struct stat *buf);
-static int (*real___xstat64) (int ver, const char *path, struct stat64 *buf);
-static int (*real_stat) (const char *path, struct stat *buf);
-static int (*real_stat64) (const char *path, struct stat64 *buf);
-static int (*real___fxstat) (int ver, int fd, struct stat *buf);
-static int (*real___fxstat64) (int ver, int fd, struct stat64 *buf);
-static int (*real_fstat) (int fd, struct stat *buf);
-static int (*real_fstat64) (int fd , struct stat64 *buf);
-static int (*real___lxstat) (int ver, const char *path, struct stat *buf);
-static int (*real___lxstat64) (int ver, const char *path, struct stat64 *buf);
-static int (*real_lstat) (const char *path, struct stat *buf);
-static int (*real_lstat64) (const char *path, struct stat64 *buf);
-static int (*real_statfs) (const char *path, struct statfs *buf);
-static int (*real_statfs64) (const char *path, struct statfs64 *buf);
-static int (*real_statvfs) (const char *path, struct statvfs *buf);
-static int (*real_statvfs64) (const char *path, struct statvfs64 *buf);
-static ssize_t (*real_getxattr) (const char *path, const char *name,
- void *value, size_t size);
-static ssize_t (*real_lgetxattr) (const char *path, const char *name,
- void *value, size_t size);
-static int (*real_remove) (const char* path);
-static int (*real_lchown) (const char *path, uid_t owner, gid_t group);
-static void (*real_rewinddir) (DIR *dirp);
-static void (*real_seekdir) (DIR *dirp, off_t offset);
-static off_t (*real_telldir) (DIR *dirp);
-
-static ssize_t (*real_sendfile) (int out_fd, int in_fd, off_t *offset,
- size_t count);
-static ssize_t (*real_sendfile64) (int out_fd, int in_fd, off_t *offset,
- size_t count);
-static int (*real_fcntl) (int fd, int cmd, ...);
-static int (*real_chdir) (const char *path);
-static int (*real_fchdir) (int fd);
-static char * (*real_getcwd) (char *buf, size_t size);
-static int (*real_truncate) (const char *path, off_t length);
-static int (*real_truncate64) (const char *path, loff_t length);
-static int (*real_setxattr) (const char *path, const char *name,
- const void *value, size_t size, int flags);
-static int (*real_lsetxattr) (const char *path, const char *name,
- const void *value, size_t size, int flags);
-static int (*real_fsetxattr) (int filedes, const char *name,
- const void *value, size_t size, int flags);
-
-
-#define RESOLVE(sym) do { \
- if (!real_##sym) \
- real_##sym = dlsym (RTLD_NEXT, #sym); \
- } while (0)
-
-/*TODO: set proper value */
-#define MOUNT_HASH_SIZE 256
-
-struct booster_mount {
- dev_t st_dev;
- glusterfs_handle_t handle;
- struct list_head device_list;
-};
-typedef struct booster_mount booster_mount_t;
-
-static booster_fdtable_t *booster_fdtable = NULL;
-
-extern int booster_configure (char *confpath);
-/* This is dup'ed every time VMP open/creat wants a new fd.
- * This is needed so we occupy an entry in the process' file
- * table.
- */
-int process_piped_fd = -1;
-
-static int
-booster_get_process_fd ()
-{
- return real_dup (process_piped_fd);
-}
-
-/* The following two define which file contains
- * the FSTAB configuration for VMP-based usage.
- */
-#define DEFAULT_BOOSTER_CONF CONFDIR"/booster.conf"
-#define BOOSTER_CONF_ENV_VAR "GLUSTERFS_BOOSTER_FSTAB"
-
-
-/* The following define which log file is used when
- * using the old mount point bypass approach.
- */
-#define BOOSTER_DEFAULT_LOG CONFDIR"/booster.log"
-#define BOOSTER_LOG_ENV_VAR "GLUSTERFS_BOOSTER_LOG"
-
-void
-do_open (int fd, const char *pathname, int flags, mode_t mode, booster_op_t op)
-{
- char *specfile = NULL;
- char *mount_point = NULL;
- int32_t size = 0;
- int32_t ret = -1;
- FILE *specfp = NULL;
- glusterfs_file_t fh = NULL;
- char *logfile = NULL;
- glusterfs_init_params_t iparams = {
- .loglevel = "error",
- .lookup_timeout = 600,
- .stat_timeout = 600,
- };
-
- gf_log ("booster", GF_LOG_DEBUG, "Opening using MPB: %s", pathname);
- size = fgetxattr (fd, "user.glusterfs-booster-volfile", NULL, 0);
- if (size == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-volfile not found: %s",
- strerror (errno));
- goto out;
- }
-
- specfile = calloc (1, size);
- if (!specfile) {
- gf_log ("booster", GF_LOG_ERROR, "Memory allocation failed");
- goto out;
- }
-
- ret = fgetxattr (fd, "user.glusterfs-booster-volfile", specfile,
- size);
- if (ret == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-volfile not found: %s",
- strerror (errno));
- goto out;
- }
-
- specfp = tmpfile ();
- if (!specfp) {
- gf_log ("booster", GF_LOG_ERROR, "Temp file creation failed"
- ": %s", strerror (errno));
- goto out;
- }
-
- ret = fwrite (specfile, size, 1, specfp);
- if (ret != 1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to write volfile: %s",
- strerror (errno));
- goto out;
- }
-
- fseek (specfp, 0L, SEEK_SET);
-
- size = fgetxattr (fd, "user.glusterfs-booster-mount", NULL, 0);
- if (size == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-mount not found: %s",
- strerror (errno));
- goto out;
- }
-
- mount_point = calloc (size, sizeof (char));
- if (!mount_point) {
- gf_log ("booster", GF_LOG_ERROR, "Memory allocation failed");
- goto out;
- }
-
- ret = fgetxattr (fd, "user.glusterfs-booster-mount", mount_point, size);
- if (ret == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Xattr "
- "user.glusterfs-booster-mount not found: %s",
- strerror (errno));
- goto out;
- }
-
- logfile = getenv (BOOSTER_LOG_ENV_VAR);
- if (logfile) {
- if (strlen (logfile) > 0)
- iparams.logfile = strdup (logfile);
- else
- iparams.logfile = strdup (BOOSTER_DEFAULT_LOG);
- } else {
- iparams.logfile = strdup (BOOSTER_DEFAULT_LOG);
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Using log-file: %s", iparams.logfile);
- iparams.specfp = specfp;
-
- ret = glusterfs_mount (mount_point, &iparams);
- if (ret == -1) {
- if (errno != EEXIST) {
- gf_log ("booster", GF_LOG_ERROR, "Mount failed over"
- " glusterfs");
- goto out;
- } else
- gf_log ("booster", GF_LOG_ERROR, "Already mounted");
- }
-
- switch (op) {
- case BOOSTER_OPEN:
- gf_log ("booster", GF_LOG_TRACE, "Booster open call");
- fh = glusterfs_open (pathname, flags, mode);
- break;
-
- case BOOSTER_CREAT:
- gf_log ("booster", GF_LOG_TRACE, "Booster create call");
- fh = glusterfs_creat (pathname, mode);
- break;
- }
-
- if (!fh) {
- gf_log ("booster", GF_LOG_ERROR, "Error performing operation");
- goto out;
- }
-
- if (booster_fd_unused_get (booster_fdtable, fh, fd) == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to get unused FD");
- goto out;
- }
- fh = NULL;
-
-out:
- if (specfile) {
- free (specfile);
- }
-
- if (specfp) {
- fclose (specfp);
- }
-
- if (mount_point) {
- free (mount_point);
- }
-
- if (fh) {
- glusterfs_close (fh);
- }
-
- return;
-}
-
-int
-vmp_open (const char *pathname, int flags, ...)
-{
- mode_t mode = 0;
- int fd = -1;
- glusterfs_file_t fh = NULL;
- va_list ap;
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- fh = glusterfs_open (pathname, flags, mode);
- }
- else
- fh = glusterfs_open (pathname, flags);
-
- if (!fh) {
- gf_log ("booster", GF_LOG_ERROR, "VMP open failed");
- goto out;
- }
-
- fd = booster_get_process_fd ();
- if (fd == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to create open fd");
- goto fh_close_out;
- }
-
- if (booster_fd_unused_get (booster_fdtable, fh, fd) == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to map fd into table");
- goto realfd_close_out;
- }
-
- return fd;
-
-realfd_close_out:
- real_close (fd);
- fd = -1;
-
-fh_close_out:
- glusterfs_close (fh);
-
-out:
- return fd;
-}
-
-#define BOOSTER_USE_OPEN64 1
-#define BOOSTER_DONT_USE_OPEN64 0
-
-int
-booster_open (const char *pathname, int use64, int flags, ...)
-{
- int ret = -1;
- mode_t mode = 0;
- va_list ap;
- int (*my_open) (const char *pathname, int flags, ...);
-
- if (!pathname) {
- errno = EINVAL;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Open: %s", pathname);
- /* First try opening through the virtual mount point.
- * The difference lies in the fact that:
- * 1. We depend on libglusterfsclient library to perform
- * the translation from the path to handle.
- * 2. We do not go to the file system for the fd, instead
- * we use booster_get_process_fd (), which returns a dup'ed
- * fd of a pipe created in booster_init.
- */
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
- ret = vmp_open (pathname, flags, mode);
- }
- else
- ret = vmp_open (pathname, flags);
-
- /* We receive an ENODEV if the VMP does not exist. If we
- * receive an error other than ENODEV, it means, there
- * actually was an error performing vmp_open. This must
- * be returned to the user.
- */
- if ((ret < 0) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Error in opening file over "
- " VMP: %s", strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "File opened");
- goto out;
- }
-
- if (use64) {
- gf_log ("booster", GF_LOG_TRACE, "Using 64-bit open");
- my_open = real_open64;
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Using 32-bit open");
- my_open = real_open;
- }
-
- /* It is possible the RESOLVE macro is not able
- * to resolve the symbol of a function, in that case
- * we dont want to seg-fault on calling a NULL functor.
- */
- if (my_open == NULL) {
- gf_log ("booster", GF_LOG_ERROR, "open not resolved");
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- ret = my_open (pathname, flags, mode);
- } else
- ret = my_open (pathname, flags);
-
- if (ret != -1) {
- do_open (ret, pathname, flags, mode, BOOSTER_OPEN);
- }
-
-out:
- return ret;
-}
-
-/* This is done to over-write existing definitions of open and open64 inside
- * libc with our own copies. __REDIRECT is provided by libc.
- *
- * XXX: This will not work anywhere other than libc based systems.
- */
-int __REDIRECT (booster_false_open, (__const char *__file, int __oflag, ...),
- open) __nonnull ((1));
-int __REDIRECT (booster_false_open64, (__const char *__file, int __oflag, ...),
- open64) __nonnull ((1));
-int
-booster_false_open (const char *pathname, int flags, ...)
-{
- int ret;
- mode_t mode = 0;
- va_list ap;
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- ret = booster_open (pathname, BOOSTER_DONT_USE_OPEN64, flags,
- mode);
- }
- else
- ret = booster_open (pathname, BOOSTER_DONT_USE_OPEN64, flags);
-
- return ret;
-}
-
-int
-booster_false_open64 (const char *pathname, int flags, ...)
-{
- int ret;
- mode_t mode = 0;
- va_list ap;
-
- if (flags & GF_O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
-
- ret = booster_open (pathname, BOOSTER_USE_OPEN64, flags, mode);
- }
- else
- ret = booster_open (pathname, BOOSTER_USE_OPEN64, flags);
-
- return ret;
-}
-
-int
-vmp_creat (const char *pathname, mode_t mode)
-{
- int fd = -1;
- glusterfs_file_t fh = NULL;
-
- fh = glusterfs_creat (pathname, mode);
- if (!fh) {
- gf_log ("booster", GF_LOG_ERROR, "Create failed: %s: %s",
- pathname, strerror (errno));
- goto out;
- }
-
- fd = booster_get_process_fd ();
- if (fd == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to create fd");
- goto close_out;
- }
-
- if ((booster_fd_unused_get (booster_fdtable, fh, fd)) == -1) {
- gf_log ("booster", GF_LOG_ERROR, "Failed to map unused fd");
- goto real_close_out;
- }
-
- return fd;
-
-real_close_out:
- real_close (fd);
- fd = -1;
-
-close_out:
- glusterfs_close (fh);
-
-out:
- return -1;
-}
-
-int __REDIRECT (booster_false_creat, (const char *pathname, mode_t mode),
- creat) __nonnull ((1));
-int __REDIRECT (booster_false_creat64, (const char *pathname, mode_t mode),
- creat64) __nonnull ((1));
-
-int
-booster_false_creat (const char *pathname, mode_t mode)
-{
- int ret = -1;
- if (!pathname) {
- errno = EINVAL;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Create: %s", pathname);
- ret = vmp_creat (pathname, mode);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "VMP create failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "File created");
- goto out;
- }
-
- if (real_creat == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_creat (pathname, mode);
-
- if (ret != -1) {
- do_open (ret, pathname, GF_O_WRONLY | GF_O_TRUNC, mode,
- BOOSTER_CREAT);
- } else
- gf_log ("booster", GF_LOG_ERROR, "real create failed: %s",
- strerror (errno));
-
-out:
- return ret;
-}
-
-
-int
-booster_false_creat64 (const char *pathname, mode_t mode)
-{
- int ret = -1;
- if (!pathname) {
- errno = EINVAL;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Create: %s", pathname);
- ret = vmp_creat (pathname, mode);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "VMP create failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "File created");
- goto out;
- }
-
- if (real_creat64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_creat64 (pathname, mode);
-
- if (ret != -1) {
- do_open (ret, pathname, GF_O_WRONLY | GF_O_TRUNC, mode,
- BOOSTER_CREAT);
- } else
- gf_log ("booster", GF_LOG_ERROR, "real create failed: %s",
- strerror (errno));
-
-out:
- return ret;
-}
-
-
-/* pread */
-
-ssize_t
-pread (int fd, void *buf, size_t count, unsigned long offset)
-{
- ssize_t ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "pread: fd %d, count %lu, offset %lu"
- ,fd, (long unsigned)count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not booster fd");
- if (real_pread == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pread (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_pread (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-pread64 (int fd, void *buf, size_t count, uint64_t offset)
-{
- ssize_t ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "pread64: fd %d, count %lu, offset %"
- PRIu64, fd, (long unsigned)count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not booster fd");
- if (real_pread64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pread64 (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_pread (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-read (int fd, void *buf, size_t count)
-{
- int ret;
- glusterfs_file_t glfs_fd;
-
- gf_log ("booster", GF_LOG_TRACE, "read: fd %d, count %lu", fd,
- (long unsigned)count);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not booster fd");
- if (real_read == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_read (fd, buf, count);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_read (glfs_fd, buf, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-readv (int fd, const struct iovec *vector, int count)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "readv: fd %d, iovecs %d", fd, count);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_readv == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_readv (fd, vector, count);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_readv (glfs_fd, vector, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-write (int fd, const void *buf, size_t count)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "write: fd %d, count %"GF_PRI_SIZET,
- fd, count);
-
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_write == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_write (fd, buf, count);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_write (glfs_fd, buf, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-ssize_t
-writev (int fd, const struct iovec *vector, int count)
-{
- int ret = 0;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "writev: fd %d, iovecs %d", fd, count);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_writev == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_writev (fd, vector, count);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_writev (glfs_fd, vector, count);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-pwrite (int fd, const void *buf, size_t count, unsigned long offset)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "pwrite: fd %d, count %"GF_PRI_SIZET
- ", offset %lu", fd, count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_pwrite == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pwrite (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_pwrite (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-ssize_t
-pwrite64 (int fd, const void *buf, size_t count, uint64_t offset)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "pwrite64: fd %d, count %"GF_PRI_SIZET
- ", offset %"PRIu64, fd, count, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_pwrite64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_pwrite64 (fd, buf, count, offset);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_pwrite (glfs_fd, buf, count, offset);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-int
-close (int fd)
-{
- int ret = -1;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "close: fd %d", fd);
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- booster_fd_put (booster_fdtable, fd);
- ret = glusterfs_close (glfs_fd);
- booster_fdptr_put (glfs_fd);
- }
-
- ret = real_close (fd);
-
- return ret;
-}
-
-#ifndef _LSEEK_DECLARED
-#define _LSEEK_DECLARED
-off_t
-lseek (int filedes, unsigned long offset, int whence)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "lseek: fd %d, offset %ld",
- filedes, offset);
-
- glfs_fd = booster_fdptr_get (booster_fdtable, filedes);
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_lseek (glfs_fd, offset, whence);
- booster_fdptr_put (glfs_fd);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_lseek == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_lseek (filedes, offset, whence);
- }
-
- return ret;
-}
-#endif
-
-off_t
-lseek64 (int filedes, uint64_t offset, int whence)
-{
- int ret;
- glusterfs_file_t glfs_fd = 0;
-
-
- gf_log ("booster", GF_LOG_TRACE, "lseek: fd %d, offset %"PRIu64,
- filedes, offset);
- glfs_fd = booster_fdptr_get (booster_fdtable, filedes);
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_lseek (glfs_fd, offset, whence);
- booster_fdptr_put (glfs_fd);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_lseek64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_lseek64 (filedes, offset, whence);
- }
-
- return ret;
-}
-
-int
-dup (int oldfd)
-{
- int ret = -1, new_fd = -1;
- glusterfs_file_t glfs_fd = 0;
-
- gf_log ("booster", GF_LOG_TRACE, "dup: fd %d", oldfd);
- glfs_fd = booster_fdptr_get (booster_fdtable, oldfd);
- new_fd = real_dup (oldfd);
-
- if (new_fd >=0 && glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = booster_fd_unused_get (booster_fdtable, glfs_fd,
- new_fd);
- fd_ref ((fd_t *)glfs_fd);
- if (ret == -1) {
- gf_log ("booster", GF_LOG_ERROR,"Failed to map new fd");
- real_close (new_fd);
- }
- }
-
- if (glfs_fd) {
- booster_fdptr_put (glfs_fd);
- }
-
- return new_fd;
-}
-
-
-int
-dup2 (int oldfd, int newfd)
-{
- int ret = -1;
- glusterfs_file_t old_glfs_fd = NULL, new_glfs_fd = NULL;
-
- if (oldfd == newfd) {
- return newfd;
- }
-
- old_glfs_fd = booster_fdptr_get (booster_fdtable, oldfd);
- new_glfs_fd = booster_fdptr_get (booster_fdtable, newfd);
-
- ret = real_dup2 (oldfd, newfd);
- if (ret >= 0) {
- if (new_glfs_fd) {
- glusterfs_close (new_glfs_fd);
- booster_fdptr_put (new_glfs_fd);
- booster_fd_put (booster_fdtable, newfd);
- new_glfs_fd = 0;
- }
-
- if (old_glfs_fd) {
- ret = booster_fd_unused_get (booster_fdtable,
- old_glfs_fd, newfd);
- fd_ref ((fd_t *)old_glfs_fd);
- if (ret == -1) {
- real_close (newfd);
- }
- }
- }
-
- if (old_glfs_fd) {
- booster_fdptr_put (old_glfs_fd);
- }
-
- if (new_glfs_fd) {
- booster_fdptr_put (new_glfs_fd);
- }
-
- return ret;
-}
-
-int
-mkdir (const char *pathname, mode_t mode)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "mkdir: path %s", pathname);
- ret = glusterfs_mkdir (pathname, mode);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "mkdir failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "directory created");
- return ret;
- }
-
- if (real_mkdir == NULL) {
- ret = -1;
- errno = ENOSYS;
- } else
- ret = real_mkdir (pathname, mode);
-
- return ret;
-}
-
-int
-rmdir (const char *pathname)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "rmdir: path %s", pathname);
- ret = glusterfs_rmdir (pathname);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "rmdir failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "directory removed");
- return ret;
- }
-
- if (real_rmdir == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_rmdir (pathname);
-
- return ret;
-}
-
-int
-chmod (const char *pathname, mode_t mode)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "chmod: path %s", pathname);
- ret = glusterfs_chmod (pathname, mode);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "chmod failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "chmod succeeded");
- return ret;
- }
-
- if (real_chmod == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_chmod (pathname, mode);
-
- return ret;
-}
-
-int
-chown (const char *pathname, uid_t owner, gid_t group)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "chown: path: %s", pathname);
- ret = glusterfs_chown (pathname, owner, group);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "chown failed: %s\n",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "chown succeeded");
- return ret;
- }
-
- if (real_chown == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_chown (pathname, owner, group);
-
- return ret;
-}
-
-int
-fchown (int fd, uid_t owner, gid_t group)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fchown: fd %d, uid %d, gid %d", fd,
- owner, group);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fchown == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_fchown (fd, owner, group);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fchown (fh, owner, group);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-
-#define MOUNT_TABLE_HASH_SIZE 256
-
-
-static void booster_cleanup (void);
-static int
-booster_init (void)
-{
- char *booster_conf_path = NULL;
- int ret = -1;
- int pipefd[2];
-
- booster_fdtable = booster_fdtable_alloc ();
- if (!booster_fdtable) {
- fprintf (stderr, "cannot allocate fdtable: %s\n",
- strerror (errno));
- goto err;
- }
-
- if (pipe (pipefd) == -1) {
- gf_log ("booster-fstab", GF_LOG_ERROR, "Pipe creation failed:%s"
- , strerror (errno));
- goto err;
- }
-
- process_piped_fd = pipefd[0];
- real_close (pipefd[1]);
- /* libglusterfsclient based VMPs should be inited only
- * after the file tables are inited so that if the socket
- * calls use the fd based syscalls, the fd tables are
- * correctly initialized to return a NULL handle, on which the
- * socket calls will fall-back to the real API.
- */
- booster_conf_path = getenv (BOOSTER_CONF_ENV_VAR);
- if (booster_conf_path != NULL) {
- if (strlen (booster_conf_path) > 0)
- ret = booster_configure (booster_conf_path);
- else {
- gf_log ("booster", GF_LOG_ERROR, "%s not defined, "
- "using default path: %s", BOOSTER_CONF_ENV_VAR,
- DEFAULT_BOOSTER_CONF);
- ret = booster_configure (DEFAULT_BOOSTER_CONF);
- }
- } else {
- gf_log ("booster", GF_LOG_ERROR, "%s not defined, using default"
- " path: %s", BOOSTER_CONF_ENV_VAR,DEFAULT_BOOSTER_CONF);
- ret = booster_configure (DEFAULT_BOOSTER_CONF);
- }
-
- atexit (booster_cleanup);
- if (ret == 0)
- gf_log ("booster", GF_LOG_DEBUG, "booster is inited");
- return 0;
-
-err:
- /* Sure we return an error value here
- * but who cares about booster.
- */
- return -1;
-}
-
-
-static void
-booster_cleanup (void)
-{
- /* Ideally, we should be de-initing the fd-table
- * here but the problem is that I've seen file accesses through booster
- * continuing while the atexit registered function is called. That means
- * , we cannot dealloc the fd-table since then there could be a crash
- * while trying to determine whether a given fd is for libc or for
- * libglusterfsclient.
- * We should be satisfied with having cleaned up glusterfs contexts.
- */
- glusterfs_umount_all ();
- glusterfs_reset ();
-}
-
-int
-fchmod (int fd, mode_t mode)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fchmod: fd %d, mode: 0x%x", fd, mode);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fchmod == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_fchmod (fd, mode);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fchmod (fh, mode);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int
-fsync (int fd)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fsync: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fsync == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_fsync (fd);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fsync (fh);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int __REDIRECT (booster_false_ftruncate, (int fd, off_t length),
- ftruncate);
-int __REDIRECT (booster_false_ftruncate64, (int fd, loff_t length),
- ftruncate64);
-
-int
-booster_false_ftruncate (int fd, off_t length)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "ftruncate: fd %d, length: %"PRIu64,fd
- , length);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_ftruncate == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_ftruncate (fd, length);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_ftruncate (fh, length);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int
-booster_false_ftruncate64 (int fd, loff_t length)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "ftruncate: fd %d, length: %"PRIu64,fd
- , length);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_ftruncate == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_ftruncate64 (fd, length);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_ftruncate (fh, length);
- booster_fdptr_put (fh);
- }
-
- return ret;
-}
-
-int
-link (const char *old, const char *new)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "link: old: %s, new: %s", old, new);
- ret = glusterfs_link (old, new);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Link failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "link call succeeded");
- return ret;
- }
-
- if (real_link == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_link (old, new);
-
- return ret;
-}
-
-int
-rename (const char *old, const char *new)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "link: old: %s, new: %s", old, new);
- ret = glusterfs_rename (old, new);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Rename failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "Rename succeeded");
- return ret;
- }
-
- if (real_rename == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_rename (old, new);
-
- return ret;
-}
-
-int
-utimes (const char *path, const struct timeval times[2])
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "utimes: path %s", path);
- ret = glusterfs_utimes (path, times);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "utimes failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "utimes succeeded");
- return ret;
- }
-
- if (real_utimes == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_utimes (path, times);
-
- return ret;
-}
-
-int
-utime (const char *path, const struct utimbuf *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "utime: path %s", path);
- ret = glusterfs_utime (path, buf);
-
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "utime failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "utime succeeded");
- return ret;
- }
-
- if (real_utime == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_utime (path, buf);
-
- return ret;
-}
-
-int
-mknod (const char *path, mode_t mode, dev_t dev)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "mknod: path %s", path);
- ret = glusterfs_mknod (path, mode, dev);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "mknod failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "mknod succeeded");
- return ret;
- }
-
- if (real_mknod) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_mknod (path, mode, dev);
-
- return ret;
-}
-
-int
-mkfifo (const char *path, mode_t mode)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "mkfifo: path %s", path);
- ret = glusterfs_mkfifo (path, mode);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "mkfifo failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "mkfifo succeeded");
- return ret;
- }
-
- if (real_mkfifo == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_mkfifo (path, mode);
-
- return ret;
-}
-
-int
-unlink (const char *path)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "unlink: path %s", path);
- ret = glusterfs_unlink (path);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "unlink failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "unlink succeeded");
- return ret;
- }
-
- if (real_unlink == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_unlink (path);
-
- return ret;
-}
-
-int
-symlink (const char *oldpath, const char *newpath)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "symlink: old: %s, new: %s",
- oldpath, newpath);
- ret = glusterfs_symlink (oldpath, newpath);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "symlink failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "symlink succeeded");
- return ret;
- }
-
- if (real_symlink == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_symlink (oldpath, newpath);
-
- return ret;
-}
-
-int
-readlink (const char *path, char *buf, size_t bufsize)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "readlink: path %s", path);
- ret = glusterfs_readlink (path, buf, bufsize);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "readlink failed: %s",
- strerror (errno));
- return ret;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "readlink succeeded");
- return ret;
- }
-
- if (real_readlink == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_readlink (path, buf, bufsize);
-
- return ret;
-}
-
-char *
-realpath (const char *path, char *resolved_path)
-{
- char *res = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "realpath: path %s", path);
- res = glusterfs_realpath (path, resolved_path);
- if ((res == NULL) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "realpath failed: %s",
- strerror (errno));
- return res;
- }
-
- if (res != NULL) {
- gf_log ("booster", GF_LOG_TRACE, "realpath succeeded");
- return res;
- }
-
- if (real_realpath == NULL) {
- errno = ENOSYS;
- res = NULL;
- } else
- res = real_realpath (path, resolved_path);
-
- return res;
-}
-
-#define BOOSTER_GL_DIR 1
-#define BOOSTER_POSIX_DIR 2
-
-struct booster_dir_handle {
- int type;
- void *dirh;
-};
-
-DIR *
-opendir (const char *path)
-{
- glusterfs_dir_t gdir = NULL;
- struct booster_dir_handle *bh = NULL;
- DIR *pdir = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "opendir: path: %s", path);
- bh = calloc (1, sizeof (struct booster_dir_handle));
- if (!bh) {
- gf_log ("booster", GF_LOG_ERROR, "memory allocation failed");
- errno = ENOMEM;
- goto out;
- }
-
- gdir = glusterfs_opendir (path);
- if (gdir) {
- gf_log ("booster", GF_LOG_TRACE, "Gluster dir opened");
- bh->type = BOOSTER_GL_DIR;
- bh->dirh = (void *)gdir;
- goto out;
- } else if ((!gdir) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "Opendir failed");
- goto free_out;
- }
-
- if (real_opendir == NULL) {
- errno = ENOSYS;
- goto free_out;
- }
-
- pdir = real_opendir (path);
-
- if (pdir) {
- bh->type = BOOSTER_POSIX_DIR;
- bh->dirh = (void *)pdir;
- goto out;
- }
-
-free_out:
- if (bh) {
- free (bh);
- bh = NULL;
- }
-out:
- return (DIR *)bh;
-}
-
-int __REDIRECT (booster_false_readdir_r, (DIR *dir, struct dirent *entry,
- struct dirent **result), readdir_r) __nonnull ((1));
-int __REDIRECT (booster_false_readdir64_r, (DIR *dir, struct dirent64 *entry,
- struct dirent64 **result), readdir64_r) __nonnull ((1));
-
-int
-booster_false_readdir_r (DIR *dir, struct dirent *entry, struct dirent **result)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- int ret = 0;
-
- if (!bh) {
- ret = errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir_r on gluster");
- ret = glusterfs_readdir_r ((glusterfs_dir_t)bh->dirh, entry,
- result);
-
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir_r on posix");
- if (real_readdir_r == NULL) {
- ret = errno = ENOSYS;
- goto out;
- }
-
- ret = real_readdir_r ((DIR *)bh->dirh, entry, result);
- } else {
- ret = errno = EINVAL;
- }
-
-out:
- return ret;
-}
-
-int
-booster_false_readdir64_r (DIR *dir, struct dirent64 *entry,
- struct dirent64 **result)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- int ret = 0;
-
- if (!bh) {
- ret = errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir_r on gluster");
- ret = glusterfs_readdir_r ((glusterfs_dir_t)bh->dirh,
- (struct dirent *)entry,
- (struct dirent **)result);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir_r on posix");
- if (real_readdir64_r == NULL) {
- ret = errno = ENOSYS;
- goto out;
- }
-
- ret = real_readdir64_r ((DIR *)bh->dirh, entry, result);
- } else {
- ret = errno = EINVAL;
- }
-
-out:
- return ret;
-}
-
-struct dirent *
-__REDIRECT (booster_false_readdir, (DIR *dir), readdir) __nonnull ((1));
-
-struct dirent64 *
-__REDIRECT (booster_false_readdir64, (DIR *dir), readdir64) __nonnull ((1));
-
-struct dirent *
-booster_false_readdir (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- struct dirent *dirp = NULL;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir on gluster");
- dirp = glusterfs_readdir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir on posix");
- if (real_readdir == NULL) {
- errno = ENOSYS;
- dirp = NULL;
- goto out;
- }
-
- dirp = real_readdir ((DIR *)bh->dirh);
- } else {
- dirp = NULL;
- errno = EINVAL;
- }
-
-out:
- return dirp;
-}
-
-struct dirent64 *
-booster_false_readdir64 (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- struct dirent64 *dirp = NULL;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir on gluster");
- dirp = glusterfs_readdir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "readdir on posix");
- if (real_readdir == NULL) {
- errno = ENOSYS;
- dirp = NULL;
- goto out;
- }
-
- dirp = real_readdir64 ((DIR *)bh->dirh);
- } else {
- dirp = NULL;
- errno = EINVAL;
- }
-
-out:
- return dirp;
-}
-
-int
-closedir (DIR *dh)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dh;
- int ret = -1;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "closedir on gluster");
- ret = glusterfs_closedir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "closedir on posix");
- if (real_closedir == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else
- ret = real_closedir ((DIR *)bh->dirh);
- } else {
- errno = EBADF;
- }
-
- if (ret == 0) {
- free (bh);
- bh = NULL;
- }
-out:
- return ret;
-}
-
-/* The real stat functions reside in booster_stat.c to
- * prevent clash with the statX prototype and functions
- * declared from sys/stat.h
- */
-int
-booster_xstat (int ver, const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "xstat: path: %s", path);
- ret = glusterfs_stat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "xstat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "xstat succeeded");
- goto out;
- }
-
- if (real___xstat == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real___xstat (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_xstat64 (int ver, const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_TRACE, "xstat64: path: %s", path);
- ret = glusterfs_stat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "xstat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "xstat64 succeeded");
- goto out;
- }
-
- if (real___xstat64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real___xstat64 (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_stat (const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "stat: path: %s", path);
- ret = glusterfs_stat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "stat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "stat succeeded");
- goto out;
- }
-
- if (real_stat != NULL)
- ret = real_stat (path, sbuf);
- else if (real___xstat != NULL)
- ret = real___xstat (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-
-out:
- return ret;
-}
-
-int
-booster_stat64 (const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_TRACE, "stat64: %s", path);
- ret = glusterfs_stat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "stat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "stat64 succeeded");
- goto out;
- }
-
- if (real_stat64 != NULL)
- ret = real_stat64 (path, sbuf);
- else if (real___xstat64 != NULL)
- ret = real___xstat64 (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-int
-booster_fxstat (int ver, int fd, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fxstat: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real___fxstat == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real___fxstat (ver, fd, sbuf);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fstat (fh, sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_fxstat64 (int ver, int fd, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fxstat64: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real___fxstat64 == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- ret = real___fxstat64 (ver, fd, sbuf);
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fstat (fh, (struct stat *)sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_fstat (int fd, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fstat: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fstat != NULL)
- ret = real_fstat (fd, sbuf);
- else if (real___fxstat != NULL)
- ret = real___fxstat (0, fd, sbuf);
- else {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fstat (fh, sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_fstat64 (int fd, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fstat64: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fstat64 != NULL)
- ret = real_fstat64 (fd, sbuf);
- else if (real___fxstat64 != NULL)
- /* Not sure how portable the use of 0 for
- * version number is but it works over glibc.
- * We need this because, I've
- * observed that all the above real* functors can be
- * NULL. In that case, this is our last and only option.
- */
- ret = real___fxstat64 (0, fd, sbuf);
- else {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fstat (fh, (struct stat *)sbuf);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-int
-booster_lxstat (int ver, const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lxstat: path %s", path);
- ret = glusterfs_lstat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lxstat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lxstat succeeded");
- goto out;
- }
-
- if (real___lxstat == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real___lxstat (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_lxstat64 (int ver, const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_TRACE, "lxstat64: path %s", path);
- ret = glusterfs_lstat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lxstat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lxstat64 succeeded");
- goto out;
- }
-
- if (real___lxstat64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real___lxstat64 (ver, path, sbuf);
-out:
- return ret;
-}
-
-int
-booster_lstat (const char *path, void *buf)
-{
- struct stat *sbuf = (struct stat *)buf;
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lstat: path %s", path);
- ret = glusterfs_lstat (path, sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lstat failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lstat succeeded");
- goto out;
- }
-
- if (real_lstat != NULL)
- ret = real_lstat (path, sbuf);
- else if (real___lxstat != NULL)
- ret = real___lxstat (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-
-out:
- return ret;
-}
-
-int
-booster_lstat64 (const char *path, void *buf)
-{
- int ret = -1;
- struct stat64 *sbuf = (struct stat64 *)buf;
-
- gf_log ("booster", GF_LOG_TRACE, "lstat64: path %s", path);
- ret = glusterfs_lstat (path, (struct stat *)sbuf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lstat64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lstat64 succeeded");
- goto out;
- }
-
- if (real_lstat64 != NULL)
- ret = real_lstat64 (path, sbuf);
- else if (real___lxstat64 != NULL)
- ret = real___lxstat64 (0, path, sbuf);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-int
-booster_statfs (const char *pathname, struct statfs *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "statfs: path %s", pathname);
- ret = glusterfs_statfs (pathname, buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statfs failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "statfs succeeded");
- goto out;
- }
-
- if (real_statfs == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statfs (pathname, buf);
-
-out:
- return ret;
-}
-
-int
-booster_statfs64 (const char *pathname, struct statfs64 *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "stat64: path %s", pathname);
- ret = glusterfs_statfs (pathname, (struct statfs *)buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statfs64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "statfs64 succeeded");
- goto out;
- }
-
- if (real_statfs64 == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statfs64 (pathname, buf);
-
-out:
- return ret;
-}
-
-int
-booster_statvfs (const char *pathname, struct statvfs *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "statvfs: path %s", pathname);
- ret = glusterfs_statvfs (pathname, buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statvfs failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "statvfs succeeded");
- goto out;
- }
-
- if (real_statvfs == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statvfs (pathname, buf);
-
-out:
- return ret;
-}
-
-int
-booster_statvfs64 (const char *pathname, struct statvfs64 *buf)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "statvfs64: path %s", pathname);
- ret = glusterfs_statvfs (pathname, (struct statvfs *)buf);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "statvfs64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "statvfs64 succeeded");
- goto out;
- }
-
- if (real_statvfs64 == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_statvfs64 (pathname, buf);
-
-out:
- return ret;
-}
-
-ssize_t
-getxattr (const char *path, const char *name, void *value, size_t size)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "getxattr: path %s, name %s", path,
- name);
- ret = glusterfs_getxattr (path, name, value, size);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "getxattr failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "getxattr succeeded");
- return ret;
- }
-
- if (real_getxattr == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_getxattr (path, name, value, size);
-out:
- return ret;
-}
-
-
-ssize_t
-lgetxattr (const char *path, const char *name, void *value, size_t size)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lgetxattr: path %s, name %s", path,
- name);
- ret = glusterfs_lgetxattr (path, name, value, size);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lgetxattr failed: %s",
- strerror (errno));
-
- goto out;
- }
-
- if (ret > 0) {
- gf_log ("booster", GF_LOG_TRACE, "lgetxattr succeeded");
- return ret;
- }
-
- if (real_lgetxattr == NULL) {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
-
- ret = real_lgetxattr (path, name, value, size);
-out:
- return ret;
-}
-
-int
-remove (const char *path)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "remove: %s", path);
- ret = glusterfs_remove (path);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "remove failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "remove succeeded");
- goto out;
- }
-
- if (real_remove == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_remove (path);
-
-out:
- return ret;
-}
-
-int
-lchown (const char *path, uid_t owner, gid_t group)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lchown: path %s", path);
- ret = glusterfs_lchown (path, owner, group);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lchown failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_ERROR, "lchown succeeded");
- goto out;
- }
-
- if (real_lchown == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
- ret = real_lchown (path, owner, group);
-
-out:
- return ret;
-}
-
-void
-booster_rewinddir (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "rewinddir on glusterfs");
- glusterfs_rewinddir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- if (real_rewinddir == NULL) {
- errno = ENOSYS;
- goto out;
- }
- gf_log ("booster", GF_LOG_TRACE, "rewinddir on posix");
- real_rewinddir ((DIR *)bh->dirh);
- } else
- errno = EINVAL;
-out:
- return;
-}
-
-void
-booster_seekdir (DIR *dir, off_t offset)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "seekdir on glusterfs");
- glusterfs_seekdir ((glusterfs_dir_t)bh->dirh, offset);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- if (real_seekdir == NULL) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "seekdir on posix");
- real_seekdir ((DIR *)bh->dirh, offset);
- } else
- errno = EINVAL;
-out:
- return;
-}
-
-off_t
-booster_telldir (DIR *dir)
-{
- struct booster_dir_handle *bh = (struct booster_dir_handle *)dir;
- off_t offset = -1;
-
- if (!bh) {
- errno = EFAULT;
- goto out;
- }
-
- if (bh->type == BOOSTER_GL_DIR) {
- gf_log ("booster", GF_LOG_TRACE, "telldir on glusterfs");
- offset = glusterfs_telldir ((glusterfs_dir_t)bh->dirh);
- } else if (bh->type == BOOSTER_POSIX_DIR) {
- if (real_telldir == NULL) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "telldir on posix");
- offset = real_telldir ((DIR *)bh->dirh);
- } else
- errno = EINVAL;
-out:
- return offset;
-}
-
-
-pid_t
-fork (void)
-{
- pid_t pid = 0;
- char child = 0;
-
- glusterfs_log_lock ();
- {
- pid = real_fork ();
- }
- glusterfs_log_unlock ();
-
- child = (pid == 0);
- if (child) {
- booster_cleanup ();
- booster_init ();
- }
-
- return pid;
-}
-
-ssize_t
-sendfile (int out_fd, int in_fd, off_t *offset, size_t count)
-{
- glusterfs_file_t in_fh = NULL;
- ssize_t ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "sendfile: in fd %d, out fd %d, offset"
- " %"PRIu64", count %"GF_PRI_SIZET, in_fd, out_fd, *offset,
- count);
- /*
- * handle sendfile in booster only if in_fd corresponds to a glusterfs
- * file handle
- */
- in_fh = booster_fdptr_get (booster_fdtable, in_fd);
- if (!in_fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_sendfile == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else {
- ret = real_sendfile (out_fd, in_fd, offset, count);
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_sendfile (out_fd, in_fh, offset, count);
- booster_fdptr_put (in_fh);
- }
-
- return ret;
-}
-
-ssize_t
-sendfile64 (int out_fd, int in_fd, off_t *offset, size_t count)
-{
- glusterfs_file_t in_fh = NULL;
- ssize_t ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "sendfile64: in fd %d, out fd %d,"
- " offset %"PRIu64", count %"GF_PRI_SIZET, in_fd, out_fd,
- *offset, count);
- /*
- * handle sendfile in booster only if in_fd corresponds to a glusterfs
- * file handle
- */
- in_fh = booster_fdptr_get (booster_fdtable, in_fd);
- if (!in_fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_sendfile64 == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else {
- ret = real_sendfile64 (out_fd, in_fd, offset, count);
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_sendfile (out_fd, in_fh, offset, count);
- booster_fdptr_put (in_fh);
- }
-
- return ret;
-}
-
-
-int
-fcntl (int fd, int cmd, ...)
-{
- va_list ap;
- int ret = -1;
- long arg = 0;
- struct flock *lock = NULL;
- glusterfs_file_t glfs_fd = 0;
-
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- gf_log ("booster", GF_LOG_TRACE, "fcntl: fd %d, cmd %d", fd, cmd);
- switch (cmd) {
- case F_DUPFD:
- ret = dup (fd);
- break;
- /*
- * FIXME: Consider this case when implementing F_DUPFD, F_GETFD
- * etc flags in libglusterfsclient. Commenting it out for
- * timebeing since it is defined only in linux kernel
- * versions >= 2.6.24.
- */
- /* case F_DUPFD_CLOEXEC: */
- case F_GETFD:
- if (glfs_fd != NULL) {
- ret = booster_get_close_on_exec (booster_fdtable, fd)
- ? FD_CLOEXEC : 0;
- } else {
- if (real_fcntl == NULL) {
- ret = -1;
- errno = ENOSYS;
- } else {
- ret = real_fcntl (fd, cmd);
- }
- }
- break;
-
- case F_GETFL:
- case F_GETOWN:
- case F_GETSIG:
- case F_GETLEASE:
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fcntl (glfs_fd, cmd);
- } else {
- if (!real_fcntl) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- ret = real_fcntl (fd, cmd);
- }
- break;
-
- case F_SETFD:
- if (glfs_fd != NULL) {
- booster_set_close_on_exec (booster_fdtable, fd);
- ret = 0;
- } else {
- if (real_fcntl == NULL) {
- ret = -1;
- errno = ENOSYS;
- } else {
- ret = real_fcntl (fd, cmd);
- }
- }
- break;
-
- case F_SETFL:
- case F_SETOWN:
- case F_SETSIG:
- case F_SETLEASE:
- case F_NOTIFY:
- va_start (ap, cmd);
- arg = va_arg (ap, long);
- va_end (ap);
-
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fcntl (glfs_fd, cmd, arg);
- } else {
- if (!real_fcntl) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- ret = real_fcntl (fd, cmd, arg);
- }
- break;
-
- case F_GETLK:
- case F_SETLK:
- case F_SETLKW:
-#if F_GETLK != F_GETLK64
- case F_GETLK64:
-#endif
-#if F_SETLK != F_SETLK64
- case F_SETLK64:
-#endif
-#if F_SETLKW != F_SETLKW64
- case F_SETLKW64:
-#endif
- va_start (ap, cmd);
- lock = va_arg (ap, struct flock *);
- va_end (ap);
-
- if (lock == NULL) {
- errno = EINVAL;
- goto out;
- }
-
- if (glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fcntl (glfs_fd, cmd, lock);
- } else {
- if (!real_fcntl) {
- errno = ENOSYS;
- goto out;
- }
-
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- ret = real_fcntl (fd, cmd, lock);
- }
- break;
-
- default:
- errno = EINVAL;
- break;
- }
-
-out:
- if (glfs_fd) {
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-int
-chdir (const char *path)
-{
- int ret = -1;
- char cwd[PATH_MAX];
- char *res = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "chdir: path %s", path);
-
- pthread_mutex_lock (&cwdlock);
- {
- res = glusterfs_getcwd (cwd, PATH_MAX);
- if (res == NULL) {
- gf_log ("booster", GF_LOG_ERROR, "getcwd failed: %s",
- strerror (errno));
- goto unlock;
- }
-
- ret = glusterfs_chdir (path);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "chdir failed: %s",
- strerror (errno));
- goto unlock;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "chdir succeeded");
- goto unlock;
- }
-
- if (real_chdir == NULL) {
- errno = ENOSYS;
- ret = -1;
- goto unlock;
- }
-
- ret = real_chdir (path);
- if (ret == -1) {
- glusterfs_chdir (cwd);
- }
- }
-unlock:
- pthread_mutex_unlock (&cwdlock);
-
- return ret;
-}
-
-
-int
-fchdir (int fd)
-{
- int ret = -1;
- glusterfs_file_t glfs_fd = 0;
- char cwd[PATH_MAX];
- char *res = NULL;
-
- glfs_fd = booster_fdptr_get (booster_fdtable, fd);
-
- if (!glfs_fd) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_write == NULL) {
- errno = ENOSYS;
- ret = -1;
- } else {
- ret = real_fchdir (fd);
- if (ret == 0) {
- res = real_getcwd (cwd, PATH_MAX);
- if (res == NULL) {
- gf_log ("booster", GF_LOG_ERROR,
- "getcwd failed (%s)",
- strerror (errno));
- ret = -1;
- } else {
- glusterfs_chdir (cwd);
- }
- }
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fchdir (glfs_fd);
- booster_fdptr_put (glfs_fd);
- }
-
- return ret;
-}
-
-
-char *
-getcwd (char *buf, size_t size)
-{
- char *res = NULL;
-
- res = glusterfs_getcwd (buf, size);
- if ((res == NULL) && (errno == ENODEV)) {
- res = real_getcwd (buf, size);
- }
-
- return res;
-}
-
-
-int __REDIRECT (booster_false_truncate, (const char *path, off_t length),
- truncate) __nonnull ((1));
-int __REDIRECT (booster_false_truncate64, (const char *path, loff_t length),
- truncate64) __nonnull ((1));;
-
-int
-booster_false_truncate (const char *path, off_t length)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "truncate: path (%s) length (%"PRIu64
- ")", path, length);
-
- ret = glusterfs_truncate (path, length);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "truncate failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "truncate succeeded");
- goto out;
- }
-
- if (real_truncate != NULL)
- ret = real_truncate (path, length);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-int
-booster_false_truncate64 (const char *path, loff_t length)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "truncate64: path (%s) length "
- "(%"PRIu64")", path, length);
-
- ret = glusterfs_truncate (path, length);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "truncate64 failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "truncate64 succeeded");
- goto out;
- }
-
- if (real_truncate64 != NULL)
- ret = real_truncate64 (path, length);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-int
-setxattr (const char *path, const char *name, const void *value, size_t size,
- int flags)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "setxattr: path: %s", path);
- ret = glusterfs_setxattr (path, name, value, size, flags);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "setxattr failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "setxattr succeeded");
- goto out;
- }
-
- if (real_setxattr != NULL)
- ret = real_setxattr (path, name, value, size, flags);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-int
-lsetxattr (const char *path, const char *name, const void *value, size_t size,
- int flags)
-{
- int ret = -1;
-
- gf_log ("booster", GF_LOG_TRACE, "lsetxattr: path: %s", path);
- ret = glusterfs_lsetxattr (path, name, value, size, flags);
- if ((ret == -1) && (errno != ENODEV)) {
- gf_log ("booster", GF_LOG_ERROR, "lsetxattr failed: %s",
- strerror (errno));
- goto out;
- }
-
- if (ret == 0) {
- gf_log ("booster", GF_LOG_TRACE, "lsetxattr succeeded");
- goto out;
- }
-
- if (real_lsetxattr != NULL)
- ret = real_lsetxattr (path, name, value, size, flags);
- else {
- errno = ENOSYS;
- ret = -1;
- goto out;
- }
-
-out:
- return ret;
-}
-
-
-int
-fsetxattr (int fd, const char *name, const void *value, size_t size, int flags)
-{
- int ret = -1;
- glusterfs_file_t fh = NULL;
-
- gf_log ("booster", GF_LOG_TRACE, "fsetxattr: fd %d", fd);
- fh = booster_fdptr_get (booster_fdtable, fd);
- if (!fh) {
- gf_log ("booster", GF_LOG_TRACE, "Not a booster fd");
- if (real_fsetxattr != NULL)
- ret = real_fsetxattr (fd, name, value, size, flags);
- else {
- ret = -1;
- errno = ENOSYS;
- goto out;
- }
- } else {
- gf_log ("booster", GF_LOG_TRACE, "Is a booster fd");
- ret = glusterfs_fsetxattr (fh, name, value, size, flags);
- booster_fdptr_put (fh);
- }
-
-out:
- return ret;
-}
-
-
-void
-booster_lib_init (void)
-{
-
- RESOLVE (open);
- RESOLVE (open64);
- RESOLVE (creat);
- RESOLVE (creat64);
-
- RESOLVE (read);
- RESOLVE (readv);
- RESOLVE (pread);
- RESOLVE (pread64);
-
- RESOLVE (write);
- RESOLVE (writev);
- RESOLVE (pwrite);
- RESOLVE (pwrite64);
-
- RESOLVE (lseek);
- RESOLVE (lseek64);
-
- RESOLVE (close);
-
- RESOLVE (dup);
- RESOLVE (dup2);
-
- RESOLVE (fork);
- RESOLVE (mkdir);
- RESOLVE (rmdir);
- RESOLVE (chmod);
- RESOLVE (chown);
- RESOLVE (fchmod);
- RESOLVE (fchown);
- RESOLVE (fsync);
- RESOLVE (ftruncate);
- RESOLVE (ftruncate64);
- RESOLVE (link);
- RESOLVE (rename);
- RESOLVE (utimes);
- RESOLVE (utime);
- RESOLVE (mknod);
- RESOLVE (mkfifo);
- RESOLVE (unlink);
- RESOLVE (symlink);
- RESOLVE (readlink);
- RESOLVE (realpath);
- RESOLVE (opendir);
- RESOLVE (readdir);
- RESOLVE (readdir64);
- RESOLVE (closedir);
- RESOLVE (__xstat);
- RESOLVE (__xstat64);
- RESOLVE (stat);
- RESOLVE (stat64);
- RESOLVE (__fxstat);
- RESOLVE (__fxstat64);
- RESOLVE (fstat);
- RESOLVE (fstat64);
- RESOLVE (__lxstat);
- RESOLVE (__lxstat64);
- RESOLVE (lstat);
- RESOLVE (lstat64);
- RESOLVE (statfs);
- RESOLVE (statfs64);
- RESOLVE (statvfs);
- RESOLVE (statvfs64);
- RESOLVE (getxattr);
- RESOLVE (lgetxattr);
- RESOLVE (remove);
- RESOLVE (lchown);
- RESOLVE (rewinddir);
- RESOLVE (seekdir);
- RESOLVE (telldir);
- RESOLVE (sendfile);
- RESOLVE (sendfile64);
- RESOLVE (readdir_r);
- RESOLVE (readdir64_r);
- RESOLVE (fcntl);
- RESOLVE (chdir);
- RESOLVE (fchdir);
- RESOLVE (getcwd);
- RESOLVE (truncate);
- RESOLVE (truncate64);
- RESOLVE (setxattr);
- RESOLVE (lsetxattr);
- RESOLVE (fsetxattr);
-
- /* This must be called after resolving real functions
- * above so that the socket based IO calls in libglusterfsclient
- * can fall back to a non-NULL real_XXX function pointer.
- * Calling booster_init before resolving the names above
- * results in seg-faults because the function symbols above are NULL.
- */
- booster_init ();
-}
-
diff --git a/booster/src/booster_fstab.c b/booster/src/booster_fstab.c
deleted file mode 100644
index 202249cadf3..00000000000
--- a/booster/src/booster_fstab.c
+++ /dev/null
@@ -1,452 +0,0 @@
-/* Utilities for reading/writing fstab, mtab, etc.
- Copyright (C) 1995-2000, 2001, 2002, 2003, 2006
- Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#include <alloca.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/types.h>
-#include "booster_fstab.h"
-#include <stdlib.h>
-#include <libglusterfsclient.h>
-#include <errno.h>
-
-/* The default timeout for inode and stat cache. */
-#define BOOSTER_DEFAULT_ATTR_TIMEO 5 /* In Secs */
-
-/* Prepare to begin reading and/or writing mount table entries from the
- beginning of FILE. MODE is as for `fopen'. */
-glusterfs_fstab_t *
-glusterfs_fstab_init (const char *file, const char *mode)
-{
- glusterfs_fstab_t *handle = NULL;
- handle = calloc (1, sizeof (glusterfs_fstab_t));
- if (!handle) {
- gf_log ("booster-fstab", GF_LOG_ERROR, "Memory allocation"
- " failed");
- goto out;
- }
-
- gf_log ("booster-fstab", GF_LOG_DEBUG, "FSTAB file: %s", file);
- FILE *result = fopen (file,mode);
- if (result != NULL) {
- handle->fp = result;
- } else {
- gf_log ("booster-fstab", GF_LOG_ERROR, "FSTAB file open failed:"
- " %s", strerror (errno));
- free (handle);
- handle = NULL;
- }
-
-out:
-
- return handle;
-}
-
-int
-glusterfs_fstab_close (glusterfs_fstab_t *h)
-{
- if (!h)
- return -1;
-
- if (h->fp)
- fclose (h->fp);
-
- return 0;
-}
-
-/* Since the values in a line are separated by spaces, a name cannot
- contain a space. Therefore some programs encode spaces in names
- by the strings "\040". We undo the encoding when reading an entry.
- The decoding happens in place. */
-static char *
-decode_name (char *buf)
-{
- char *rp = buf;
- char *wp = buf;
-
- do
- if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '4'
- && rp[3] == '0')
- {
- /* \040 is a SPACE. */
- *wp++ = ' ';
- rp += 3;
- }
- else if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '1'
- && rp[3] == '1')
- {
- /* \011 is a TAB. */
- *wp++ = '\t';
- rp += 3;
- }
- else if (rp[0] == '\\' && rp[1] == '0' && rp[2] == '1'
- && rp[3] == '2')
- {
- /* \012 is a NEWLINE. */
- *wp++ = '\n';
- rp += 3;
- }
- else if (rp[0] == '\\' && rp[1] == '\\')
- {
- /* We have to escape \\ to be able to represent all
- * characters. */
- *wp++ = '\\';
- rp += 1;
- }
- else if (rp[0] == '\\' && rp[1] == '1' && rp[2] == '3'
- && rp[3] == '4')
- {
- /* \134 is also \\. */
- *wp++ = '\\';
- rp += 3;
- }
- else
- *wp++ = *rp;
- while (*rp++ != '\0');
-
- return buf;
-}
-
-
-/* Read one mount table entry from STREAM. Returns a pointer to storage
- reused on the next call, or null for EOF or error (use feof/ferror to
- check). */
-struct glusterfs_mntent *
-__glusterfs_fstab_getent (FILE *stream, struct glusterfs_mntent *mp,
- char *buffer, int bufsiz)
-{
- char *cp;
- char *head;
-
- do
- {
- char *end_ptr;
-
- if (fgets (buffer, bufsiz, stream) == NULL)
- {
- return NULL;
- }
-
- end_ptr = strchr (buffer, '\n');
- if (end_ptr != NULL) /* chop newline */
- *end_ptr = '\0';
- else
- {
- /* Not the whole line was read. Do it now but forget
- * it. */
- char tmp[1024];
- while (fgets (tmp, sizeof tmp, stream) != NULL)
- if (strchr (tmp, '\n') != NULL)
- break;
- }
-
- head = buffer + strspn (buffer, " \t");
- /* skip empty lines and comment lines: */
- }
- while (head[0] == '\0' || head[0] == '#');
-
- cp = strsep (&head, " \t");
- mp->mnt_fsname = cp != NULL ? decode_name (cp) : (char *) "";
- if (head)
- head += strspn (head, " \t");
- cp = strsep (&head, " \t");
- mp->mnt_dir = cp != NULL ? decode_name (cp) : (char *) "";
- if (head)
- head += strspn (head, " \t");
- cp = strsep (&head, " \t");
- mp->mnt_type = cp != NULL ? decode_name (cp) : (char *) "";
- if (head)
- head += strspn (head, " \t");
- cp = strsep (&head, " \t");
- mp->mnt_opts = cp != NULL ? decode_name (cp) : (char *) "";
- switch (head ? sscanf (head, " %d %d ", &mp->mnt_freq,
- &mp->mnt_passno) : 0)
- {
- case 0:
- mp->mnt_freq = 0;
- case 1:
- mp->mnt_passno = 0;
- case 2:
- break;
- }
-
- return mp;
-}
-
-struct glusterfs_mntent *
-glusterfs_fstab_getent (glusterfs_fstab_t *h)
-{
- if (!h)
- return NULL;
-
- if (!h->fp)
- return NULL;
-
- return __glusterfs_fstab_getent (h->fp, &h->tmpent, h->buf,
- GF_MNTENT_BUFSIZE);
-}
-
-/* We have to use an encoding for names if they contain spaces or tabs.
- To be able to represent all characters we also have to escape the
- backslash itself. This "function" must be a macro since we use
- `alloca'. */
-#define encode_name(name) \
- do { \
- const char *rp = name; \
- \
- while (*rp != '\0') \
- if (*rp == ' ' || *rp == '\t' || *rp == '\\') \
- break; \
- else \
- ++rp; \
- \
- if (*rp != '\0') \
- { \
- /* In the worst case the length of the string \
- * can increase to four times the current \
- * length. */ \
- char *wp; \
- \
- rp = name; \
- name = wp = (char *) alloca (strlen (name) * 4 + 1); \
- \
- do { \
- if (*rp == ' ') \
- { \
- *wp++ = '\\'; \
- *wp++ = '0'; \
- *wp++ = '4'; \
- *wp++ = '0'; \
- } \
- else if (*rp == '\t') \
- { \
- *wp++ = '\\'; \
- *wp++ = '0'; \
- *wp++ = '1'; \
- *wp++ = '1'; \
- } \
- else if (*rp == '\n') \
- { \
- *wp++ = '\\'; \
- *wp++ = '0'; \
- *wp++ = '1'; \
- *wp++ = '2'; \
- } \
- else if (*rp == '\\') \
- { \
- *wp++ = '\\'; \
- *wp++ = '\\'; \
- } \
- else \
- *wp++ = *rp; \
- } while (*rp++ != '\0'); \
- } \
- } while (0) \
-
-
-int
-glusterfs_fstab_addent (glusterfs_fstab_t *h,
- const struct glusterfs_mntent *mnt)
-{
- struct glusterfs_mntent mntcopy = *mnt;
- if (!h)
- return -1;
-
- if (!h->fp)
- return -1;
-
- if (fseek (h->fp, 0, SEEK_END))
- return -1;
-
- /* Encode spaces and tabs in the names. */
- encode_name (mntcopy.mnt_fsname);
- encode_name (mntcopy.mnt_dir);
- encode_name (mntcopy.mnt_type);
- encode_name (mntcopy.mnt_opts);
-
- return (fprintf (h->fp, "%s %s %s %s %d %d\n",
- mntcopy.mnt_fsname,
- mntcopy.mnt_dir,
- mntcopy.mnt_type,
- mntcopy.mnt_opts,
- mntcopy.mnt_freq,
- mntcopy.mnt_passno)
- < 0 ? 1 : 0);
-}
-
-
-/* Search MNT->mnt_opts for an option matching OPT.
- Returns the address of the substring, or null if none found. */
-char *
-glusterfs_fstab_hasoption (const struct glusterfs_mntent *mnt, const char *opt)
-{
- const size_t optlen = strlen (opt);
- char *rest = mnt->mnt_opts, *p;
-
- while ((p = strstr (rest, opt)) != NULL)
- {
- if ((p == rest || p[-1] == ',')
- && (p[optlen] == '\0' || p[optlen] == '=' || p[optlen] == ','))
- return p;
-
- rest = strchr (p, ',');
- if (rest == NULL)
- break;
- ++rest;
- }
-
- return NULL;
-}
-
-void
-clean_init_params (glusterfs_init_params_t *ipars)
-{
- if (!ipars)
- return;
-
- if (ipars->volume_name)
- free (ipars->volume_name);
-
- if (ipars->specfile)
- free (ipars->specfile);
-
- if (ipars->logfile)
- free (ipars->logfile);
-
- if (ipars->loglevel)
- free (ipars->loglevel);
-
- return;
-}
-
-char *
-get_option_value (char *opt)
-{
- char *val = NULL;
- char *saveptr = NULL;
- char *copy_opt = NULL;
- char *retval = NULL;
-
- copy_opt = strdup (opt);
-
- /* Get the = before the value of the option. */
- val = index (copy_opt, '=');
- if (val) {
- /* Move to start of option */
- ++val;
-
- /* Now, to create a '\0' delimited string out of the
- * options string, first get the position where the
- * next option starts, that would be the next ','.
- */
- saveptr = index (val, ',');
- if (saveptr)
- *saveptr = '\0';
- retval = strdup (val);
- }
-
- free (copy_opt);
-
- return retval;
-}
-
-void
-booster_mount (struct glusterfs_mntent *ent)
-{
- char *opt = NULL;
- glusterfs_init_params_t ipars;
- time_t timeout = BOOSTER_DEFAULT_ATTR_TIMEO;
- char *timeostr = NULL;
- char *endptr = NULL;
-
- if (!ent)
- return;
-
- gf_log ("booster-fstab", GF_LOG_DEBUG, "Mount entry: volfile: %s,"
- " VMP: %s, Type: %s, Options: %s", ent->mnt_fsname,
- ent->mnt_dir, ent->mnt_type, ent->mnt_opts);
- if ((strcmp (ent->mnt_type, "glusterfs") != 0)) {
- gf_log ("booster-fstab", GF_LOG_ERROR, "Type is not glusterfs");
- return;
- }
-
- memset (&ipars, 0, sizeof (glusterfs_init_params_t));
- if (ent->mnt_fsname)
- ipars.specfile = strdup (ent->mnt_fsname);
-
- opt = glusterfs_fstab_hasoption (ent, "subvolume");
- if (opt)
- ipars.volume_name = get_option_value (opt);
-
- opt = glusterfs_fstab_hasoption (ent, "log-file");
- if (!opt)
- opt = glusterfs_fstab_hasoption (ent, "logfile");
-
- if (opt)
- ipars.logfile = get_option_value (opt);
-
- opt = glusterfs_fstab_hasoption (ent, "log-level");
- if (!opt)
- opt = glusterfs_fstab_hasoption (ent, "loglevel");
-
- if (opt)
- ipars.loglevel = get_option_value (opt);
-
- /* Attribute cache timeout */
- opt = glusterfs_fstab_hasoption (ent, "attr_timeout");
- if (opt) {
- timeostr = get_option_value (opt);
- if (timeostr)
- timeout = strtol (timeostr, &endptr, 10);
- }
-
- ipars.lookup_timeout = timeout;
- ipars.stat_timeout = timeout;
-
- if ((glusterfs_mount (ent->mnt_dir, &ipars)) == -1)
- gf_log ("booster-fstab", GF_LOG_ERROR, "VMP mounting failed");
-
- clean_init_params (&ipars);
-}
-
-int
-booster_configure (char *confpath)
-{
- int ret = -1;
- glusterfs_fstab_t *handle = NULL;
- struct glusterfs_mntent *ent = NULL;
-
- if (!confpath)
- goto out;
-
- handle = glusterfs_fstab_init (confpath, "r");
- if (!handle)
- goto out;
-
- while ((ent = glusterfs_fstab_getent (handle)) != NULL)
- booster_mount (ent);
-
- glusterfs_fstab_close (handle);
- ret = 0;
-out:
- return ret;
-}
-
-
diff --git a/booster/src/booster_fstab.h b/booster/src/booster_fstab.h
deleted file mode 100644
index 9bab04c5aa0..00000000000
--- a/booster/src/booster_fstab.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/* Utilities for reading/writing fstab, mtab, etc.
- Copyright (C) 1995, 1996, 1997, 1998, 1999 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifndef GLUSTERFS_FSTAB_MNTENT_H
-#define GLUSTERFS_FSTAB_MNTENT_H 1
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "compat.h"
-
-/* General filesystem types. */
-#define GF_MNTTYPE_IGNORE "ignore" /* Ignore this entry. */
-#define GF_MNTTYPE_NFS "nfs" /* Network file system. */
-#define GF_MNTTYPE_SWAP "swap" /* Swap device. */
-
-
-/* Generic mount options. */
-#define GF_MNTOPT_DEFAULTS "defaults" /* Use all default options. */
-#define GF_MNTOPT_RO "ro" /* Read only. */
-#define GF_MNTOPT_RW "rw" /* Read/write. */
-#define GF_MNTOPT_SUID "suid" /* Set uid allowed. */
-#define GF_MNTOPT_NOSUID "nosuid" /* No set uid allowed. */
-#define GF_MNTOPT_NOAUTO "noauto" /* Do not auto mount. */
-
-
-/* Structure describing a mount table entry. */
-struct glusterfs_mntent
-{
- char *mnt_fsname; /* Device or server for filesystem. */
- char *mnt_dir; /* Directory mounted on. */
- char *mnt_type; /* Type of filesystem: ufs, nfs, etc. */
- char *mnt_opts; /* Comma-separated options for fs. */
- int mnt_freq; /* Dump frequency (in days). */
- int mnt_passno; /* Pass number for `fsck'. */
-};
-
-#define GF_MNTENT_BUFSIZE 1024
-typedef struct glusterfs_fstab_handle {
- FILE *fp;
- char buf[GF_MNTENT_BUFSIZE];
- struct glusterfs_mntent tmpent;
-}glusterfs_fstab_t;
-
-
-/* Prepare to begin reading and/or writing mount table entries from the
- beginning of FILE. MODE is as for `fopen'. */
-extern glusterfs_fstab_t *glusterfs_fstab_init (const char *file,
- const char *mode);
-
-extern struct glusterfs_mntent *glusterfs_fstab_getent (glusterfs_fstab_t *h);
-
-/* Write the mount table entry described by MNT to STREAM.
- Return zero on success, nonzero on failure. */
-extern int glusterfs_fstab_addent (glusterfs_fstab_t *h,
- const struct glusterfs_mntent *mnt);
-
-/* Close a stream opened with `glusterfs_fstab_init'. */
-extern int glusterfs_fstab_close (glusterfs_fstab_t *h);
-
-/* Search MNT->mnt_opts for an option matching OPT.
- Returns the address of the substring, or null if none found. */
-extern char *glusterfs_fstab_hasoption (const struct glusterfs_mntent *mnt,
- const char *opt);
-
-#endif
diff --git a/booster/src/booster_stat.c b/booster/src/booster_stat.c
deleted file mode 100644
index 8f76cfe37dd..00000000000
--- a/booster/src/booster_stat.c
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <sys/types.h>
-
-extern int
-booster_stat (const char *path, void *buf);
-
-extern int
-booster_stat64 (const char *path, void *buf);
-
-extern int
-booster_xstat (int ver, const char *path, void *buf);
-
-extern int
-booster_xstat64 (int ver, const char *path, void *buf);
-
-extern int
-booster_fxstat (int ver, int fd, void *buf);
-extern int
-booster_fxstat64 (int ver, int fd, void *buf);
-extern int
-booster_fstat (int fd, void *buf);
-extern int
-booster_fstat64 (int fd, void *buf);
-
-extern int
-booster_lstat (const char *path, void *buf);
-extern int
-booster_lstat64 (const char *path, void *buf);
-extern int
-booster_lxstat (int ver, const char *path, void *buf);
-extern int
-booster_lxstat64 (int ver, const char *path, void *buf);
-
-
-extern int
-booster_statfs (const char *path, void *buf);
-extern int
-booster_statfs64 (const char *path, void *buf);
-
-extern int
-booster_statvfs (const char *path, void *buf);
-
-extern int
-booster_statvfs64 (const char *path, void *buf);
-
-extern void *
-booster_readdir (void *dir);
-
-extern void
-booster_rewinddir (void *dir);
-
-extern void
-booster_seekdir (void *dir, off_t offset);
-
-extern off_t
-booster_telldir (void *dir);
-
-int
-stat (const char *path, void *buf)
-{
- return booster_stat (path, buf);
-}
-
-int
-stat64 (const char *path, void *buf)
-{
- return booster_stat64 (path, buf);
-}
-
-int
-__xstat (int ver, const char *path, void *buf)
-{
- return booster_xstat (ver, path, buf);
-}
-
-int
-__xstat64 (int ver, const char *path, void *buf)
-{
- return booster_xstat64 (ver, path, buf);
-}
-
-int
-__fxstat (int ver, int fd, void *buf)
-{
- return booster_fxstat (ver, fd, buf);
-}
-
-int
-__fxstat64 (int ver, int fd, void *buf)
-{
- return booster_fxstat64 (ver, fd, buf);
-}
-
-int
-fstat (int fd, void *buf)
-{
- return booster_fstat (fd, buf);
-}
-
-int
-fstat64 (int fd, void *buf)
-{
- return booster_fstat64 (fd, buf);
-}
-
-int
-lstat (const char *path, void *buf)
-{
- return booster_lstat (path, buf);
-}
-
-int
-lstat64 (const char *path, void *buf)
-{
- return booster_lstat64 (path, buf);
-}
-
-int
-__lxstat (int ver, const char *path, void *buf)
-{
- return booster_lxstat (ver, path, buf);
-}
-
-int
-__lxstat64 (int ver, const char *path, void *buf)
-{
- return booster_lxstat64 (ver, path, buf);
-}
-
-int
-statfs (const char *pathname, void *buf)
-{
- return booster_statfs (pathname, buf);
-}
-
-int
-statfs64 (const char *pathname, void *buf)
-{
- return booster_statfs64 (pathname, buf);
-}
-
-int
-statvfs (const char *pathname, void *buf)
-{
- return booster_statvfs (pathname, buf);
-}
-
-int
-statvfs64 (const char *pathname, void *buf)
-{
- return booster_statvfs64 (pathname, buf);
-}
-
-void
-rewinddir (void *dir)
-{
- return booster_rewinddir (dir);
-}
-
-void
-seekdir (void *dir, off_t offset)
-{
- return booster_seekdir (dir, offset);
-}
-
-off_t
-telldir (void *dir)
-{
- return booster_telldir (dir);
-}
diff --git a/build-aux/checkpatch.pl b/build-aux/checkpatch.pl
new file mode 100755
index 00000000000..17ae4e4d579
--- /dev/null
+++ b/build-aux/checkpatch.pl
@@ -0,0 +1,4326 @@
+#!/usr/bin/perl -w
+# (c) 2001, Dave Jones. (the file handling bit)
+# (c) 2005, Joel Schopp <jschopp@austin.ibm.com> (the ugly bit)
+# (c) 2007,2008, Andy Whitcroft <apw@uk.ibm.com> (new conditions, test suite)
+# (c) 2008-2010 Andy Whitcroft <apw@canonical.com>
+# (c) 2014 Gluster Community <gluster-devel@gluster.org>
+# Licensed under the terms of the GNU GPL License version 2
+
+use strict;
+use POSIX;
+
+my $P = $0;
+$P =~ s@.*/@@g;
+
+my $V = '0.32.1';
+
+use Getopt::Long qw(:config no_auto_abbrev);
+
+my $quiet = 0;
+my $tree = 1;
+my $chk_signoff = 1;
+my $chk_patch = 1;
+my $tst_only;
+my $emacs = 0;
+my $terse = 0;
+my $file = 0;
+my $check = 0;
+my $check_orig = 0;
+my $summary = 1;
+my $mailback = 0;
+my $summary_file = 0;
+my $show_types = 0;
+my $fix = 0;
+my $fix_inplace = 0;
+my $root;
+my %debug;
+my %camelcase = ();
+my %use_type = ();
+my @use = ();
+my %ignore_type = ();
+my @ignore = ();
+my $help = 0;
+my $configuration_file = ".checkpatch.conf";
+my $max_line_length = 80;
+my $ignore_perl_version = 0;
+my $minimum_perl_version = 5.10.0;
+my $gerrit_url = $ENV{GERRIT_URL};
+
+sub help {
+ my ($exitcode) = @_;
+
+ print << "EOM";
+Usage: $P [OPTION]... [FILE]...
+Version: $V
+
+Options:
+ -q, --quiet quiet
+ --patch treat FILE as patchfile (default)
+ --emacs emacs compile window format
+ --gerrit-url=STRING URL the patch was reviewed at
+ --terse one line per report
+ -f, --file treat FILE as regular source file
+ --subjective, --strict enable more subjective tests
+ --types TYPE(,TYPE2...) show only these comma separated message types
+ --ignore TYPE(,TYPE2...) ignore various comma separated message types
+ --max-line-length=n set the maximum line length, if exceeded, warn
+ --show-types show the message "types" in the output
+ --root=PATH PATH to the glusterfs tree root
+ --no-summary suppress the per-file summary
+ --mailback only produce a report in case of warnings/errors
+ --summary-file include the filename in summary
+ --debug KEY=[0|1] turn on/off debugging of KEY, where KEY is one of
+ 'values', 'possible', 'type', and 'attr' (default
+ is all off)
+ --test-only=WORD report only warnings/errors containing WORD literally
+ --fix EXPERIMENTAL - may create horrible results
+ If correctable single-line errors exist, create
+ "<inputfile>.EXPERIMENTAL-checkpatch-fixes"
+ with potential errors corrected to the preferred
+ checkpatch style
+ --fix-inplace EXPERIMENTAL - may create horrible results
+ Is the same as --fix, but overwrites the input
+ file. It's your fault if there's no backup or git
+ --ignore-perl-version override checking of perl version. expect
+ runtime errors.
+ -h, --help, --version display this help and exit
+
+When FILE is - read standard input.
+EOM
+
+exit($exitcode);
+}
+
+my $conf = which_conf($configuration_file);
+if (-f $conf) {
+ my @conf_args;
+ open(my $conffile, '<', "$conf")
+ or warn "$P: Can't find a readable $configuration_file file $!\n";
+
+ while (<$conffile>) {
+ my $line = $_;
+
+ $line =~ s/\s*\n?$//g;
+ $line =~ s/^\s*//g;
+ $line =~ s/\s+/ /g;
+
+ next if ($line =~ m/^\s*#/);
+ next if ($line =~ m/^\s*$/);
+
+ my @words = split(" ", $line);
+ foreach my $word (@words) {
+ last if ($word =~ m/^#/);
+ push (@conf_args, $word);
+ }
+ }
+ close($conffile);
+ unshift(@ARGV, @conf_args) if @conf_args;
+}
+
+GetOptions(
+ 'q|quiet+' => \$quiet,
+ 'patch!' => \$chk_patch,
+ 'emacs!' => \$emacs,
+ 'gerrit-url=s' => \$gerrit_url,
+ 'terse!' => \$terse,
+ 'f|file!' => \$file,
+ 'subjective!' => \$check,
+ 'strict!' => \$check,
+ 'ignore=s' => \@ignore,
+ 'types=s' => \@use,
+ 'show-types!' => \$show_types,
+ 'max-line-length=i' => \$max_line_length,
+ 'root=s' => \$root,
+ 'summary!' => \$summary,
+ 'mailback!' => \$mailback,
+ 'summary-file!' => \$summary_file,
+ 'fix!' => \$fix,
+ 'fix-inplace!' => \$fix_inplace,
+ 'ignore-perl-version!' => \$ignore_perl_version,
+ 'debug=s' => \%debug,
+ 'test-only=s' => \$tst_only,
+ 'h|help' => \$help,
+ 'version' => \$help
+) or help(1);
+
+help(0) if ($help);
+
+$fix = 1 if ($fix_inplace);
+$check_orig = $check;
+
+my $exit = 0;
+
+if ($^V && $^V lt $minimum_perl_version) {
+ printf "$P: requires at least perl version %vd\n", $minimum_perl_version;
+ if (!$ignore_perl_version) {
+ exit(1);
+ }
+}
+
+if ($#ARGV < 0) {
+ print "$P: no input files\n";
+ exit(1);
+}
+
+sub hash_save_array_words {
+ my ($hashRef, $arrayRef) = @_;
+
+ my @array = split(/,/, join(',', @$arrayRef));
+ foreach my $word (@array) {
+ $word =~ s/\s*\n?$//g;
+ $word =~ s/^\s*//g;
+ $word =~ s/\s+/ /g;
+ $word =~ tr/[a-z]/[A-Z]/;
+
+ next if ($word =~ m/^\s*#/);
+ next if ($word =~ m/^\s*$/);
+
+ $hashRef->{$word}++;
+ }
+}
+
+sub hash_show_words {
+ my ($hashRef, $prefix) = @_;
+
+ if ($quiet == 0 && keys %$hashRef) {
+ print "NOTE: $prefix message types:";
+ foreach my $word (sort keys %$hashRef) {
+ print " $word";
+ }
+ print "\n\n";
+ }
+}
+
+hash_save_array_words(\%ignore_type, \@ignore);
+hash_save_array_words(\%use_type, \@use);
+
+my $dbg_values = 0;
+my $dbg_possible = 0;
+my $dbg_type = 0;
+my $dbg_attr = 0;
+for my $key (keys %debug) {
+ ## no critic
+ eval "\${dbg_$key} = '$debug{$key}';";
+ die "$@" if ($@);
+}
+
+my $rpt_cleaners = 0;
+
+if ($terse) {
+ $emacs = 1;
+ $quiet++;
+}
+
+if ($tree) {
+ if (defined $root) {
+ if (!top_of_glusterfs_tree($root)) {
+ die "$P: $root: --root does not point at a valid tree\n";
+ }
+ } else {
+ if (top_of_glusterfs_tree('.')) {
+ $root = '.';
+ } elsif ($0 =~ m@(.*)/extras/[^/]*$@ &&
+ top_of_glusterfs_tree($1)) {
+ $root = $1;
+ }
+ }
+
+ if (!defined $root) {
+ print "Must be run from the top-level dir. of a GlusterFS tree\n";
+ exit(2);
+ }
+}
+
+my $emitted_corrupt = 0;
+
+our $Ident = qr{
+ [A-Za-z_][A-Za-z\d_]*
+ (?:\s*\#\#\s*[A-Za-z_][A-Za-z\d_]*)*
+ }x;
+our $Storage = qr{extern|static|asmlinkage};
+our $Sparse = qr{
+ __user|
+ __kernel|
+ __force|
+ __iomem|
+ __must_check|
+ __init_refok|
+ __kprobes|
+ __ref|
+ __rcu
+ }x;
+our $InitAttributePrefix = qr{__(?:mem|cpu|dev|net_|)};
+our $InitAttributeData = qr{$InitAttributePrefix(?:initdata\b)};
+our $InitAttributeConst = qr{$InitAttributePrefix(?:initconst\b)};
+our $InitAttributeInit = qr{$InitAttributePrefix(?:init\b)};
+our $InitAttribute = qr{$InitAttributeData|$InitAttributeConst|$InitAttributeInit};
+
+# Notes to $Attribute:
+# We need \b after 'init' otherwise 'initconst' will cause a false positive in a check
+our $Attribute = qr{
+ const|
+ __percpu|
+ __nocast|
+ __safe|
+ __bitwise__|
+ __packed__|
+ __packed2__|
+ __naked|
+ __maybe_unused|
+ __always_unused|
+ __noreturn|
+ __used|
+ __cold|
+ __noclone|
+ __deprecated|
+ __read_mostly|
+ __kprobes|
+ $InitAttribute|
+ ____cacheline_aligned|
+ ____cacheline_aligned_in_smp|
+ ____cacheline_internodealigned_in_smp|
+ __weak
+ }x;
+our $Modifier;
+our $Inline = qr{inline|__always_inline|noinline|__inline|__inline__};
+our $Member = qr{->$Ident|\.$Ident|\[[^]]*\]};
+our $Lval = qr{$Ident(?:$Member)*};
+
+our $Int_type = qr{(?i)llu|ull|ll|lu|ul|l|u};
+our $Binary = qr{(?i)0b[01]+$Int_type?};
+our $Hex = qr{(?i)0x[0-9a-f]+$Int_type?};
+our $Int = qr{[0-9]+$Int_type?};
+our $Octal = qr{0[0-7]+$Int_type?};
+our $Float_hex = qr{(?i)0x[0-9a-f]+p-?[0-9]+[fl]?};
+our $Float_dec = qr{(?i)(?:[0-9]+\.[0-9]*|[0-9]*\.[0-9]+)(?:e-?[0-9]+)?[fl]?};
+our $Float_int = qr{(?i)[0-9]+e-?[0-9]+[fl]?};
+our $Float = qr{$Float_hex|$Float_dec|$Float_int};
+our $Constant = qr{$Float|$Binary|$Octal|$Hex|$Int};
+our $Assignment = qr{\*\=|/=|%=|\+=|-=|<<=|>>=|&=|\^=|\|=|=};
+our $Compare = qr{<=|>=|==|!=|<|(?<!-)>};
+our $Arithmetic = qr{\+|-|\*|\/|%};
+our $Operators = qr{
+ <=|>=|==|!=|
+ =>|->|<<|>>|<|>|!|~|
+ &&|\|\||,|\^|\+\+|--|&|\||$Arithmetic
+ }x;
+
+our $c90_Keywords = qr{do|for|while|if|else|return|goto|continue|switch|default|case|break}x;
+
+our $NonptrType;
+our $NonptrTypeWithAttr;
+our $Type;
+our $Declare;
+
+our $NON_ASCII_UTF8 = qr{
+ [\xC2-\xDF][\x80-\xBF] # non-overlong 2-byte
+ | \xE0[\xA0-\xBF][\x80-\xBF] # excluding overlongs
+ | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2} # straight 3-byte
+ | \xED[\x80-\x9F][\x80-\xBF] # excluding surrogates
+ | \xF0[\x90-\xBF][\x80-\xBF]{2} # planes 1-3
+ | [\xF1-\xF3][\x80-\xBF]{3} # planes 4-15
+ | \xF4[\x80-\x8F][\x80-\xBF]{2} # plane 16
+}x;
+
+our $UTF8 = qr{
+ [\x09\x0A\x0D\x20-\x7E] # ASCII
+ | $NON_ASCII_UTF8
+}x;
+
+our $typeTypedefs = qr{(?x:
+ (?:__)?(?:u|s|be|le)(?:8|16|32|64)|
+ atomic_t
+)};
+
+our $logFunctions = qr{(?x:
+ printk(?:_ratelimited|_once|)|
+ (?:[a-z0-9]+_){1,2}(?:printk|emerg|alert|crit|err|warning|warn|notice|info|debug|dbg|vdbg|devel|cont|WARN)(?:_ratelimited|_once|)|
+ WARN(?:_RATELIMIT|_ONCE|)|
+ panic|
+ MODULE_[A-Z_]+|
+ seq_vprintf|seq_printf|seq_puts
+)};
+
+our $signature_tags = qr{(?xi:
+ Signed-off-by:|
+ Acked-by:|
+ Tested-by:|
+ Reviewed-by:|
+ Reviewed-on:|
+ Reported-by:|
+ Original-author:|
+ Original-Author:|
+ Original-Authors:|
+ Suggested-by:|
+ To:|
+ Cc:
+)};
+
+our $url_tags = qr{http:|https:};
+
+our @typeList = (
+ qr{void},
+ qr{(?:unsigned\s+)?char},
+ qr{(?:unsigned\s+)?short},
+ qr{(?:unsigned\s+)?int},
+ qr{(?:unsigned\s+)?long},
+ qr{(?:unsigned\s+)?long\s+int},
+ qr{(?:unsigned\s+)?long\s+long},
+ qr{(?:unsigned\s+)?long\s+long\s+int},
+ qr{unsigned},
+ qr{float},
+ qr{double},
+ qr{bool},
+ qr{struct\s+$Ident},
+ qr{union\s+$Ident},
+ qr{enum\s+$Ident},
+ qr{${Ident}_t},
+ qr{${Ident}_handler},
+ qr{${Ident}_handler_fn},
+);
+our @typeListWithAttr = (
+ @typeList,
+ qr{struct\s+$InitAttribute\s+$Ident},
+ qr{union\s+$InitAttribute\s+$Ident},
+);
+
+our @modifierList = (
+ qr{fastcall},
+);
+
+our @mode_permission_funcs = (
+ ["module_param", 3],
+ ["module_param_(?:array|named|string)", 4],
+ ["module_param_array_named", 5],
+ ["debugfs_create_(?:file|u8|u16|u32|u64|x8|x16|x32|x64|size_t|atomic_t|bool|blob|regset32|u32_array)", 2],
+ ["proc_create(?:_data|)", 2],
+ ["(?:CLASS|DEVICE|SENSOR)_ATTR", 2],
+);
+
+#Create a search pattern for all these functions to speed up a loop below
+our $mode_perms_search = "";
+foreach my $entry (@mode_permission_funcs) {
+ $mode_perms_search .= '|' if ($mode_perms_search ne "");
+ $mode_perms_search .= $entry->[0];
+}
+
+our $declaration_macros = qr{(?x:
+ (?:$Storage\s+)?(?:DECLARE|DEFINE)_[A-Z]+\s*\(|
+ (?:$Storage\s+)?LIST_HEAD\s*\(
+)};
+
+our $allowed_asm_includes = qr{(?x:
+ irq|
+ memory
+)};
+# memory.h: ARM has a custom one
+
+sub build_types {
+ my $mods = "(?x:\n" . join("|\n ", @modifierList) . "\n)";
+ my $all = "(?x:\n" . join("|\n ", @typeList) . "\n)";
+ my $allWithAttr = "(?x:\n" . join("|\n ", @typeListWithAttr) . "\n)";
+ $Modifier = qr{(?:$Attribute|$Sparse|$mods)};
+ $NonptrType = qr{
+ (?:$Modifier\s+|const\s+)*
+ (?:
+ (?:typeof|__typeof__)\s*\([^\)]*\)|
+ (?:$typeTypedefs\b)|
+ (?:${all}\b)
+ )
+ (?:\s+$Modifier|\s+const)*
+ }x;
+ $NonptrTypeWithAttr = qr{
+ (?:$Modifier\s+|const\s+)*
+ (?:
+ (?:typeof|__typeof__)\s*\([^\)]*\)|
+ (?:$typeTypedefs\b)|
+ (?:${allWithAttr}\b)
+ )
+ (?:\s+$Modifier|\s+const)*
+ }x;
+ $Type = qr{
+ $NonptrType
+ (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*|\[\])+|(?:\s*\[\s*\])+)?
+ (?:\s+$Inline|\s+$Modifier)*
+ }x;
+ $Declare = qr{(?:$Storage\s+(?:$Inline\s+)?)?$Type};
+}
+build_types();
+
+our $Typecast = qr{\s*(\(\s*$NonptrType\s*\)){0,1}\s*};
+
+# Using $balanced_parens, $LvalOrFunc, or $FuncArg
+# requires at least perl version v5.10.0
+# Any use must be runtime checked with $^V
+
+our $balanced_parens = qr/(\((?:[^\(\)]++|(?-1))*\))/;
+our $LvalOrFunc = qr{((?:[\&\*]\s*)?$Lval)\s*($balanced_parens{0,1})\s*};
+our $FuncArg = qr{$Typecast{0,1}($LvalOrFunc|$Constant)};
+
+sub deparenthesize {
+ my ($string) = @_;
+ return "" if (!defined($string));
+
+ while ($string =~ /^\s*\(.*\)\s*$/) {
+ $string =~ s@^\s*\(\s*@@;
+ $string =~ s@\s*\)\s*$@@;
+ }
+
+ $string =~ s@\s+@ @g;
+
+ return $string;
+}
+
+sub seed_camelcase_file {
+ my ($file) = @_;
+
+ return if (!(-f $file));
+
+ local $/;
+
+ open(my $include_file, '<', "$file")
+ or warn "$P: Can't read '$file' $!\n";
+ my $text = <$include_file>;
+ close($include_file);
+
+ my @lines = split('\n', $text);
+
+ foreach my $line (@lines) {
+ next if ($line !~ /(?:[A-Z][a-z]|[a-z][A-Z])/);
+ if ($line =~ /^[ \t]*(?:#[ \t]*define|typedef\s+$Type)\s+(\w*(?:[A-Z][a-z]|[a-z][A-Z])\w*)/) {
+ $camelcase{$1} = 1;
+ } elsif ($line =~ /^\s*$Declare\s+(\w*(?:[A-Z][a-z]|[a-z][A-Z])\w*)\s*[\(\[,;]/) {
+ $camelcase{$1} = 1;
+ } elsif ($line =~ /^\s*(?:union|struct|enum)\s+(\w*(?:[A-Z][a-z]|[a-z][A-Z])\w*)\s*[;\{]/) {
+ $camelcase{$1} = 1;
+ }
+ }
+}
+
+my $camelcase_seeded = 0;
+sub seed_camelcase_includes {
+ return if ($camelcase_seeded);
+
+ my $files;
+ my $camelcase_cache = "";
+ my @include_files = ();
+
+ $camelcase_seeded = 1;
+
+ if (-e ".git") {
+ my $git_last_include_commit = `git log --no-merges --pretty=format:"%h%n" -1 -- include`;
+ chomp $git_last_include_commit;
+ $camelcase_cache = ".checkpatch-camelcase.git.$git_last_include_commit";
+ } else {
+ my $last_mod_date = 0;
+ $files = `find $root/include -name "*.h"`;
+ @include_files = split('\n', $files);
+ foreach my $file (@include_files) {
+ my $date = POSIX::strftime("%Y%m%d%H%M",
+ localtime((stat $file)[9]));
+ $last_mod_date = $date if ($last_mod_date < $date);
+ }
+ $camelcase_cache = ".checkpatch-camelcase.date.$last_mod_date";
+ }
+
+ if ($camelcase_cache ne "" && -f $camelcase_cache) {
+ open(my $camelcase_file, '<', "$camelcase_cache")
+ or warn "$P: Can't read '$camelcase_cache' $!\n";
+ while (<$camelcase_file>) {
+ chomp;
+ $camelcase{$_} = 1;
+ }
+ close($camelcase_file);
+ return;
+ }
+
+ if (-e ".git") {
+ $files = `git ls-files "include/*.h"`;
+ @include_files = split('\n', $files);
+ }
+
+ foreach my $file (@include_files) {
+ seed_camelcase_file($file);
+ }
+
+ if ($camelcase_cache ne "") {
+ unlink glob ".checkpatch-camelcase.*";
+ open(my $camelcase_file, '>', "$camelcase_cache")
+ or warn "$P: Can't write '$camelcase_cache' $!\n";
+ foreach (sort { lc($a) cmp lc($b) } keys(%camelcase)) {
+ print $camelcase_file ("$_\n");
+ }
+ close($camelcase_file);
+ }
+}
+
+$chk_signoff = 0 if ($file);
+
+my @rawlines = ();
+my @lines = ();
+my @fixed = ();
+my $vname;
+for my $filename (@ARGV) {
+ my $FILE;
+ if ($file) {
+ open($FILE, '-|', "diff -u /dev/null $filename") ||
+ die "$P: $filename: diff failed - $!\n";
+ } elsif ($filename eq '-') {
+ open($FILE, '<&STDIN');
+ } else {
+ open($FILE, '<', "$filename") ||
+ die "$P: $filename: open failed - $!\n";
+ }
+ if ($filename eq '-') {
+ $vname = 'Your patch';
+ } else {
+ $vname = $filename;
+ }
+ while (<$FILE>) {
+ chomp;
+ push(@rawlines, $_);
+ }
+ close($FILE);
+ if (!process($filename)) {
+ $exit = 1;
+ }
+ @rawlines = ();
+ @lines = ();
+ @fixed = ();
+}
+
+exit($exit);
+
+sub top_of_glusterfs_tree {
+ my ($root) = @_;
+
+ # Add here if the tree changes
+ my @tree_check = (
+ "api",
+ "AUTHORS",
+ "autogen.sh",
+ "build-aux",
+ "ChangeLog",
+ "cli",
+ "configure.ac",
+ "contrib",
+ "CONTRIBUTING",
+ "COPYING-GPLV2",
+ "COPYING-LGPLV3",
+ "doc",
+ "extras",
+ "geo-replication",
+ "glusterfs-api.pc.in",
+ "glusterfsd",
+ "glusterfs.spec.in",
+ "heal",
+ "INSTALL",
+ "libgfchangelog.pc.in",
+ "libglusterfs",
+ "MAINTAINERS",
+ "Makefile.am",
+ "NEWS",
+ "README.md",
+ "rfc.sh",
+ "rpc",
+ "run-tests.sh",
+ "tests",
+ "THANKS",
+ "xlators",
+ );
+
+ foreach my $check (@tree_check) {
+ if (! -e $root . '/' . $check) {
+ return 0;
+ }
+ }
+ return 1;
+}
+
+sub parse_email {
+ my ($formatted_email) = @_;
+
+ my $name = "";
+ my $address = "";
+ my $comment = "";
+
+ if ($formatted_email =~ /^(.*)<(\S+\@\S+)>(.*)$/) {
+ $name = $1;
+ $address = $2;
+ $comment = $3 if defined $3;
+ } elsif ($formatted_email =~ /^\s*<(\S+\@\S+)>(.*)$/) {
+ $address = $1;
+ $comment = $2 if defined $2;
+ } elsif ($formatted_email =~ /(\S+\@\S+)(.*)$/) {
+ $address = $1;
+ $comment = $2 if defined $2;
+ $formatted_email =~ s/$address.*$//;
+ $name = $formatted_email;
+ $name = trim($name);
+ $name =~ s/^\"|\"$//g;
+ # If there's a name left after stripping spaces and
+ # leading quotes, and the address doesn't have both
+ # leading and trailing angle brackets, the address
+ # is invalid. ie:
+ # "joe smith joe@smith.com" bad
+ # "joe smith <joe@smith.com" bad
+ if ($name ne "" && $address !~ /^<[^>]+>$/) {
+ $name = "";
+ $address = "";
+ $comment = "";
+ }
+ }
+
+ $name = trim($name);
+ $name =~ s/^\"|\"$//g;
+ $address = trim($address);
+ $address =~ s/^\<|\>$//g;
+
+ if ($name =~ /[^\w \-]/i) { ##has "must quote" chars
+ $name =~ s/(?<!\\)"/\\"/g; ##escape quotes
+ $name = "\"$name\"";
+ }
+
+ return ($name, $address, $comment);
+}
+
+sub format_email {
+ my ($name, $address) = @_;
+
+ my $formatted_email;
+
+ $name = trim($name);
+ $name =~ s/^\"|\"$//g;
+ $address = trim($address);
+
+ if ($name =~ /[^\w \-]/i) { ##has "must quote" chars
+ $name =~ s/(?<!\\)"/\\"/g; ##escape quotes
+ $name = "\"$name\"";
+ }
+
+ if ("$name" eq "") {
+ $formatted_email = "$address";
+ } else {
+ $formatted_email = "$name <$address>";
+ }
+
+ return $formatted_email;
+}
+
+sub which_conf {
+ my ($conf) = @_;
+
+ foreach my $path (split(/:/, ".:$ENV{HOME}:.scripts")) {
+ if (-e "$path/$conf") {
+ return "$path/$conf";
+ }
+ }
+
+ return "";
+}
+
+sub expand_tabs {
+ my ($str) = @_;
+
+ my $res = '';
+ my $n = 0;
+ for my $c (split(//, $str)) {
+ if ($c eq "\t") {
+ $res .= ' ';
+ $n++;
+ for (; ($n % 8) != 0; $n++) {
+ $res .= ' ';
+ }
+ next;
+ }
+ $res .= $c;
+ $n++;
+ }
+ return $res;
+}
+sub copy_spacing {
+ (my $res = shift) =~ tr/\t/ /c;
+ return $res;
+}
+
+sub line_stats {
+ my ($line) = @_;
+
+ # Drop the diff line leader and expand tabs
+ $line =~ s/^.//;
+ $line = expand_tabs($line);
+
+ # Pick the indent from the front of the line.
+ my ($white) = ($line =~ /^(\s*)/);
+
+ return (length($line), length($white));
+}
+
+my $sanitise_quote = '';
+
+sub sanitise_line_reset {
+ my ($in_comment) = @_;
+
+ if ($in_comment) {
+ $sanitise_quote = '*/';
+ } else {
+ $sanitise_quote = '';
+ }
+}
+sub sanitise_line {
+ my ($line) = @_;
+
+ my $res = '';
+ my $l = '';
+
+ my $qlen = 0;
+ my $off = 0;
+ my $c;
+
+ # Always copy over the diff marker.
+ $res = substr($line, 0, 1);
+
+ for ($off = 1; $off < length($line); $off++) {
+ $c = substr($line, $off, 1);
+
+ # Comments we are wacking completly including the begin
+ # and end, all to $;.
+ if ($sanitise_quote eq '' && substr($line, $off, 2) eq '/*') {
+ $sanitise_quote = '*/';
+
+ substr($res, $off, 2, "$;$;");
+ $off++;
+ next;
+ }
+ if ($sanitise_quote eq '*/' && substr($line, $off, 2) eq '*/') {
+ $sanitise_quote = '';
+ substr($res, $off, 2, "$;$;");
+ $off++;
+ next;
+ }
+ if ($sanitise_quote eq '' && substr($line, $off, 2) eq '//') {
+ $sanitise_quote = '//';
+
+ substr($res, $off, 2, $sanitise_quote);
+ $off++;
+ next;
+ }
+
+ # A \ in a string means ignore the next character.
+ if (($sanitise_quote eq "'" || $sanitise_quote eq '"') &&
+ $c eq "\\") {
+ substr($res, $off, 2, 'XX');
+ $off++;
+ next;
+ }
+ # Regular quotes.
+ if ($c eq "'" || $c eq '"') {
+ if ($sanitise_quote eq '') {
+ $sanitise_quote = $c;
+
+ substr($res, $off, 1, $c);
+ next;
+ } elsif ($sanitise_quote eq $c) {
+ $sanitise_quote = '';
+ }
+ }
+
+ #print "c<$c> SQ<$sanitise_quote>\n";
+ if ($off != 0 && $sanitise_quote eq '*/' && $c ne "\t") {
+ substr($res, $off, 1, $;);
+ } elsif ($off != 0 && $sanitise_quote eq '//' && $c ne "\t") {
+ substr($res, $off, 1, $;);
+ } elsif ($off != 0 && $sanitise_quote && $c ne "\t") {
+ substr($res, $off, 1, 'X');
+ } else {
+ substr($res, $off, 1, $c);
+ }
+ }
+
+ if ($sanitise_quote eq '//') {
+ $sanitise_quote = '';
+ }
+
+ # The pathname on a #include may be surrounded by '<' and '>'.
+ if ($res =~ /^.\s*\#\s*include\s+\<(.*)\>/) {
+ my $clean = 'X' x length($1);
+ $res =~ s@\<.*\>@<$clean>@;
+
+ # The whole of a #error is a string.
+ } elsif ($res =~ /^.\s*\#\s*(?:error|warning)\s+(.*)\b/) {
+ my $clean = 'X' x length($1);
+ $res =~ s@(\#\s*(?:error|warning)\s+).*@$1$clean@;
+ }
+
+ return $res;
+}
+
+sub get_quoted_string {
+ my ($line, $rawline) = @_;
+
+ return "" if ($line !~ m/(\"[X]+\")/g);
+ return substr($rawline, $-[0], $+[0] - $-[0]);
+}
+
+sub ctx_statement_block {
+ my ($linenr, $remain, $off) = @_;
+ my $line = $linenr - 1;
+ my $blk = '';
+ my $soff = $off;
+ my $coff = $off - 1;
+ my $coff_set = 0;
+
+ my $loff = 0;
+
+ my $type = '';
+ my $level = 0;
+ my @stack = ();
+ my $p;
+ my $c;
+ my $len = 0;
+
+ my $remainder;
+ while (1) {
+ @stack = (['', 0]) if ($#stack == -1);
+
+ #warn "CSB: blk<$blk> remain<$remain>\n";
+ # If we are about to drop off the end, pull in more
+ # context.
+ if ($off >= $len) {
+ for (; $remain > 0; $line++) {
+ last if (!defined $lines[$line]);
+ next if ($lines[$line] =~ /^-/);
+ $remain--;
+ $loff = $len;
+ $blk .= $lines[$line] . "\n";
+ $len = length($blk);
+ $line++;
+ last;
+ }
+ # Bail if there is no further context.
+ #warn "CSB: blk<$blk> off<$off> len<$len>\n";
+ if ($off >= $len) {
+ last;
+ }
+ if ($level == 0 && substr($blk, $off) =~ /^.\s*#\s*define/) {
+ $level++;
+ $type = '#';
+ }
+ }
+ $p = $c;
+ $c = substr($blk, $off, 1);
+ $remainder = substr($blk, $off);
+
+ #warn "CSB: c<$c> type<$type> level<$level> remainder<$remainder> coff_set<$coff_set>\n";
+
+ # Handle nested #if/#else.
+ if ($remainder =~ /^#\s*(?:ifndef|ifdef|if)\s/) {
+ push(@stack, [ $type, $level ]);
+ } elsif ($remainder =~ /^#\s*(?:else|elif)\b/) {
+ ($type, $level) = @{$stack[$#stack - 1]};
+ } elsif ($remainder =~ /^#\s*endif\b/) {
+ ($type, $level) = @{pop(@stack)};
+ }
+
+ # Statement ends at the ';' or a close '}' at the
+ # outermost level.
+ if ($level == 0 && $c eq ';') {
+ last;
+ }
+
+ # An else is really a conditional as long as its not else if
+ if ($level == 0 && $coff_set == 0 &&
+ (!defined($p) || $p =~ /(?:\s|\}|\+)/) &&
+ $remainder =~ /^(else)(?:\s|{)/ &&
+ $remainder !~ /^else\s+if\b/) {
+ $coff = $off + length($1) - 1;
+ $coff_set = 1;
+ #warn "CSB: mark coff<$coff> soff<$soff> 1<$1>\n";
+ #warn "[" . substr($blk, $soff, $coff - $soff + 1) . "]\n";
+ }
+
+ if (($type eq '' || $type eq '(') && $c eq '(') {
+ $level++;
+ $type = '(';
+ }
+ if ($type eq '(' && $c eq ')') {
+ $level--;
+ $type = ($level != 0)? '(' : '';
+
+ if ($level == 0 && $coff < $soff) {
+ $coff = $off;
+ $coff_set = 1;
+ #warn "CSB: mark coff<$coff>\n";
+ }
+ }
+ if (($type eq '' || $type eq '{') && $c eq '{') {
+ $level++;
+ $type = '{';
+ }
+ if ($type eq '{' && $c eq '}') {
+ $level--;
+ $type = ($level != 0)? '{' : '';
+
+ if ($level == 0) {
+ if (substr($blk, $off + 1, 1) eq ';') {
+ $off++;
+ }
+ last;
+ }
+ }
+ # Preprocessor commands end at the newline unless escaped.
+ if ($type eq '#' && $c eq "\n" && $p ne "\\") {
+ $level--;
+ $type = '';
+ $off++;
+ last;
+ }
+ $off++;
+ }
+ # We are truly at the end, so shuffle to the next line.
+ if ($off == $len) {
+ $loff = $len + 1;
+ $line++;
+ $remain--;
+ }
+
+ my $statement = substr($blk, $soff, $off - $soff + 1);
+ my $condition = substr($blk, $soff, $coff - $soff + 1);
+
+ #warn "STATEMENT<$statement>\n";
+ #warn "CONDITION<$condition>\n";
+
+ #print "coff<$coff> soff<$off> loff<$loff>\n";
+
+ return ($statement, $condition,
+ $line, $remain + 1, $off - $loff + 1, $level);
+}
+
+sub statement_lines {
+ my ($stmt) = @_;
+
+ # Strip the diff line prefixes and rip blank lines at start and end.
+ $stmt =~ s/(^|\n)./$1/g;
+ $stmt =~ s/^\s*//;
+ $stmt =~ s/\s*$//;
+
+ my @stmt_lines = ($stmt =~ /\n/g);
+
+ return $#stmt_lines + 2;
+}
+
+sub statement_rawlines {
+ my ($stmt) = @_;
+
+ my @stmt_lines = ($stmt =~ /\n/g);
+
+ return $#stmt_lines + 2;
+}
+
+sub statement_block_size {
+ my ($stmt) = @_;
+
+ $stmt =~ s/(^|\n)./$1/g;
+ $stmt =~ s/^\s*{//;
+ $stmt =~ s/}\s*$//;
+ $stmt =~ s/^\s*//;
+ $stmt =~ s/\s*$//;
+
+ my @stmt_lines = ($stmt =~ /\n/g);
+ my @stmt_statements = ($stmt =~ /;/g);
+
+ my $stmt_lines = $#stmt_lines + 2;
+ my $stmt_statements = $#stmt_statements + 1;
+
+ if ($stmt_lines > $stmt_statements) {
+ return $stmt_lines;
+ } else {
+ return $stmt_statements;
+ }
+}
+
+sub ctx_statement_full {
+ my ($linenr, $remain, $off) = @_;
+ my ($statement, $condition, $level);
+
+ my (@chunks);
+
+ # Grab the first conditional/block pair.
+ ($statement, $condition, $linenr, $remain, $off, $level) =
+ ctx_statement_block($linenr, $remain, $off);
+ #print "F: c<$condition> s<$statement> remain<$remain>\n";
+ push(@chunks, [ $condition, $statement ]);
+ if (!($remain > 0 && $condition =~ /^\s*(?:\n[+-])?\s*(?:if|else|do)\b/s)) {
+ return ($level, $linenr, @chunks);
+ }
+
+ # Pull in the following conditional/block pairs and see if they
+ # could continue the statement.
+ for (;;) {
+ ($statement, $condition, $linenr, $remain, $off, $level) =
+ ctx_statement_block($linenr, $remain, $off);
+ #print "C: c<$condition> s<$statement> remain<$remain>\n";
+ last if (!($remain > 0 && $condition =~ /^(?:\s*\n[+-])*\s*(?:else|do)\b/s));
+ #print "C: push\n";
+ push(@chunks, [ $condition, $statement ]);
+ }
+
+ return ($level, $linenr, @chunks);
+}
+
+sub ctx_block_get {
+ my ($linenr, $remain, $outer, $open, $close, $off) = @_;
+ my $line;
+ my $start = $linenr - 1;
+ my $blk = '';
+ my @o;
+ my @c;
+ my @res = ();
+
+ my $level = 0;
+ my @stack = ($level);
+ for ($line = $start; $remain > 0; $line++) {
+ next if ($rawlines[$line] =~ /^-/);
+ $remain--;
+
+ $blk .= $rawlines[$line];
+
+ # Handle nested #if/#else.
+ if ($lines[$line] =~ /^.\s*#\s*(?:ifndef|ifdef|if)\s/) {
+ push(@stack, $level);
+ } elsif ($lines[$line] =~ /^.\s*#\s*(?:else|elif)\b/) {
+ $level = $stack[$#stack - 1];
+ } elsif ($lines[$line] =~ /^.\s*#\s*endif\b/) {
+ $level = pop(@stack);
+ }
+
+ foreach my $c (split(//, $lines[$line])) {
+ ##print "C<$c>L<$level><$open$close>O<$off>\n";
+ if ($off > 0) {
+ $off--;
+ next;
+ }
+
+ if ($c eq $close && $level > 0) {
+ $level--;
+ last if ($level == 0);
+ } elsif ($c eq $open) {
+ $level++;
+ }
+ }
+
+ if (!$outer || $level <= 1) {
+ push(@res, $rawlines[$line]);
+ }
+
+ last if ($level == 0);
+ }
+
+ return ($level, @res);
+}
+sub ctx_block_outer {
+ my ($linenr, $remain) = @_;
+
+ my ($level, @r) = ctx_block_get($linenr, $remain, 1, '{', '}', 0);
+ return @r;
+}
+sub ctx_block {
+ my ($linenr, $remain) = @_;
+
+ my ($level, @r) = ctx_block_get($linenr, $remain, 0, '{', '}', 0);
+ return @r;
+}
+sub ctx_statement {
+ my ($linenr, $remain, $off) = @_;
+
+ my ($level, @r) = ctx_block_get($linenr, $remain, 0, '(', ')', $off);
+ return @r;
+}
+sub ctx_block_level {
+ my ($linenr, $remain) = @_;
+
+ return ctx_block_get($linenr, $remain, 0, '{', '}', 0);
+}
+sub ctx_statement_level {
+ my ($linenr, $remain, $off) = @_;
+
+ return ctx_block_get($linenr, $remain, 0, '(', ')', $off);
+}
+
+sub ctx_locate_comment {
+ my ($first_line, $end_line) = @_;
+
+ # Catch a comment on the end of the line itself.
+ my ($current_comment) = ($rawlines[$end_line - 1] =~ m@.*(/\*.*\*/)\s*(?:\\\s*)?$@);
+ return $current_comment if (defined $current_comment);
+
+ # Look through the context and try and figure out if there is a
+ # comment.
+ my $in_comment = 0;
+ $current_comment = '';
+ for (my $linenr = $first_line; $linenr < $end_line; $linenr++) {
+ my $line = $rawlines[$linenr - 1];
+ #warn " $line\n";
+ if ($linenr == $first_line and $line =~ m@^.\s*\*@) {
+ $in_comment = 1;
+ }
+ if ($line =~ m@/\*@) {
+ $in_comment = 1;
+ }
+ if (!$in_comment && $current_comment ne '') {
+ $current_comment = '';
+ }
+ $current_comment .= $line . "\n" if ($in_comment);
+ if ($line =~ m@\*/@) {
+ $in_comment = 0;
+ }
+ }
+
+ chomp($current_comment);
+ return($current_comment);
+}
+sub ctx_has_comment {
+ my ($first_line, $end_line) = @_;
+ my $cmt = ctx_locate_comment($first_line, $end_line);
+
+ ##print "LINE: $rawlines[$end_line - 1 ]\n";
+ ##print "CMMT: $cmt\n";
+
+ return ($cmt ne '');
+}
+
+sub raw_line {
+ my ($linenr, $cnt) = @_;
+
+ my $offset = $linenr - 1;
+ $cnt++;
+
+ my $line;
+ while ($cnt) {
+ $line = $rawlines[$offset++];
+ next if (defined($line) && $line =~ /^-/);
+ $cnt--;
+ }
+ return $line;
+}
+
+sub cat_vet {
+ my ($vet) = @_;
+ my ($res, $coded);
+
+ $res = '';
+ while ($vet =~ /([^[:cntrl:]]*)([[:cntrl:]]|$)/g) {
+ $res .= $1;
+ if ($2 ne '') {
+ $coded = sprintf("^%c", unpack('C', $2) + 64);
+ $res .= $coded;
+ }
+ }
+ $res =~ s/$/\$/;
+ return $res;
+}
+
+my $av_preprocessor = 0;
+my $av_pending;
+my @av_paren_type;
+my $av_pend_colon;
+
+sub annotate_reset {
+ $av_preprocessor = 0;
+ $av_pending = '_';
+ @av_paren_type = ('E');
+ $av_pend_colon = 'O';
+}
+
+sub annotate_values {
+ my ($stream, $type) = @_;
+
+ my $res;
+ my $var = '_' x length($stream);
+ my $cur = $stream;
+
+ print "$stream\n" if ($dbg_values > 1);
+
+ while (length($cur)) {
+ @av_paren_type = ('E') if ($#av_paren_type < 0);
+ print " <" . join('', @av_paren_type) .
+ "> <$type> <$av_pending>" if ($dbg_values > 1);
+ if ($cur =~ /^(\s+)/o) {
+ print "WS($1)\n" if ($dbg_values > 1);
+ if ($1 =~ /\n/ && $av_preprocessor) {
+ $type = pop(@av_paren_type);
+ $av_preprocessor = 0;
+ }
+
+ } elsif ($cur =~ /^(\(\s*$Type\s*)\)/ && $av_pending eq '_') {
+ print "CAST($1)\n" if ($dbg_values > 1);
+ push(@av_paren_type, $type);
+ $type = 'c';
+
+ } elsif ($cur =~ /^($Type)\s*(?:$Ident|,|\)|\(|\s*$)/) {
+ print "DECLARE($1)\n" if ($dbg_values > 1);
+ $type = 'T';
+
+ } elsif ($cur =~ /^($Modifier)\s*/) {
+ print "MODIFIER($1)\n" if ($dbg_values > 1);
+ $type = 'T';
+
+ } elsif ($cur =~ /^(\#\s*define\s*$Ident)(\(?)/o) {
+ print "DEFINE($1,$2)\n" if ($dbg_values > 1);
+ $av_preprocessor = 1;
+ push(@av_paren_type, $type);
+ if ($2 ne '') {
+ $av_pending = 'N';
+ }
+ $type = 'E';
+
+ } elsif ($cur =~ /^(\#\s*(?:undef\s*$Ident|include\b))/o) {
+ print "UNDEF($1)\n" if ($dbg_values > 1);
+ $av_preprocessor = 1;
+ push(@av_paren_type, $type);
+
+ } elsif ($cur =~ /^(\#\s*(?:ifdef|ifndef|if))/o) {
+ print "PRE_START($1)\n" if ($dbg_values > 1);
+ $av_preprocessor = 1;
+
+ push(@av_paren_type, $type);
+ push(@av_paren_type, $type);
+ $type = 'E';
+
+ } elsif ($cur =~ /^(\#\s*(?:else|elif))/o) {
+ print "PRE_RESTART($1)\n" if ($dbg_values > 1);
+ $av_preprocessor = 1;
+
+ push(@av_paren_type, $av_paren_type[$#av_paren_type]);
+
+ $type = 'E';
+
+ } elsif ($cur =~ /^(\#\s*(?:endif))/o) {
+ print "PRE_END($1)\n" if ($dbg_values > 1);
+
+ $av_preprocessor = 1;
+
+ # Assume all arms of the conditional end as this
+ # one does, and continue as if the #endif was not here.
+ pop(@av_paren_type);
+ push(@av_paren_type, $type);
+ $type = 'E';
+
+ } elsif ($cur =~ /^(\\\n)/o) {
+ print "PRECONT($1)\n" if ($dbg_values > 1);
+
+ } elsif ($cur =~ /^(__attribute__)\s*\(?/o) {
+ print "ATTR($1)\n" if ($dbg_values > 1);
+ $av_pending = $type;
+ $type = 'N';
+
+ } elsif ($cur =~ /^(sizeof)\s*(\()?/o) {
+ print "SIZEOF($1)\n" if ($dbg_values > 1);
+ if (defined $2) {
+ $av_pending = 'V';
+ }
+ $type = 'N';
+
+ } elsif ($cur =~ /^(if|while|for)\b/o) {
+ print "COND($1)\n" if ($dbg_values > 1);
+ $av_pending = 'E';
+ $type = 'N';
+
+ } elsif ($cur =~/^(case)/o) {
+ print "CASE($1)\n" if ($dbg_values > 1);
+ $av_pend_colon = 'C';
+ $type = 'N';
+
+ } elsif ($cur =~/^(return|else|goto|typeof|__typeof__)\b/o) {
+ print "KEYWORD($1)\n" if ($dbg_values > 1);
+ $type = 'N';
+
+ } elsif ($cur =~ /^(\()/o) {
+ print "PAREN('$1')\n" if ($dbg_values > 1);
+ push(@av_paren_type, $av_pending);
+ $av_pending = '_';
+ $type = 'N';
+
+ } elsif ($cur =~ /^(\))/o) {
+ my $new_type = pop(@av_paren_type);
+ if ($new_type ne '_') {
+ $type = $new_type;
+ print "PAREN('$1') -> $type\n"
+ if ($dbg_values > 1);
+ } else {
+ print "PAREN('$1')\n" if ($dbg_values > 1);
+ }
+
+ } elsif ($cur =~ /^($Ident)\s*\(/o) {
+ print "FUNC($1)\n" if ($dbg_values > 1);
+ $type = 'V';
+ $av_pending = 'V';
+
+ } elsif ($cur =~ /^($Ident\s*):(?:\s*\d+\s*(,|=|;))?/) {
+ if (defined $2 && $type eq 'C' || $type eq 'T') {
+ $av_pend_colon = 'B';
+ } elsif ($type eq 'E') {
+ $av_pend_colon = 'L';
+ }
+ print "IDENT_COLON($1,$type>$av_pend_colon)\n" if ($dbg_values > 1);
+ $type = 'V';
+
+ } elsif ($cur =~ /^($Ident|$Constant)/o) {
+ print "IDENT($1)\n" if ($dbg_values > 1);
+ $type = 'V';
+
+ } elsif ($cur =~ /^($Assignment)/o) {
+ print "ASSIGN($1)\n" if ($dbg_values > 1);
+ $type = 'N';
+
+ } elsif ($cur =~/^(;|{|})/) {
+ print "END($1)\n" if ($dbg_values > 1);
+ $type = 'E';
+ $av_pend_colon = 'O';
+
+ } elsif ($cur =~/^(,)/) {
+ print "COMMA($1)\n" if ($dbg_values > 1);
+ $type = 'C';
+
+ } elsif ($cur =~ /^(\?)/o) {
+ print "QUESTION($1)\n" if ($dbg_values > 1);
+ $type = 'N';
+
+ } elsif ($cur =~ /^(:)/o) {
+ print "COLON($1,$av_pend_colon)\n" if ($dbg_values > 1);
+
+ substr($var, length($res), 1, $av_pend_colon);
+ if ($av_pend_colon eq 'C' || $av_pend_colon eq 'L') {
+ $type = 'E';
+ } else {
+ $type = 'N';
+ }
+ $av_pend_colon = 'O';
+
+ } elsif ($cur =~ /^(\[)/o) {
+ print "CLOSE($1)\n" if ($dbg_values > 1);
+ $type = 'N';
+
+ } elsif ($cur =~ /^(-(?![->])|\+(?!\+)|\*|\&\&|\&)/o) {
+ my $variant;
+
+ print "OPV($1)\n" if ($dbg_values > 1);
+ if ($type eq 'V') {
+ $variant = 'B';
+ } else {
+ $variant = 'U';
+ }
+
+ substr($var, length($res), 1, $variant);
+ $type = 'N';
+
+ } elsif ($cur =~ /^($Operators)/o) {
+ print "OP($1)\n" if ($dbg_values > 1);
+ if ($1 ne '++' && $1 ne '--') {
+ $type = 'N';
+ }
+
+ } elsif ($cur =~ /(^.)/o) {
+ print "C($1)\n" if ($dbg_values > 1);
+ }
+ if (defined $1) {
+ $cur = substr($cur, length($1));
+ $res .= $type x length($1);
+ }
+ }
+
+ return ($res, $var);
+}
+
+sub possible {
+ my ($possible, $line) = @_;
+ my $notPermitted = qr{(?:
+ ^(?:
+ $Modifier|
+ $Storage|
+ $Type|
+ DEFINE_\S+
+ )$|
+ ^(?:
+ goto|
+ return|
+ case|
+ else|
+ asm|__asm__|
+ do|
+ \#|
+ \#\#|
+ )(?:\s|$)|
+ ^(?:typedef|struct|enum)\b
+ )}x;
+ warn "CHECK<$possible> ($line)\n" if ($dbg_possible > 2);
+ if ($possible !~ $notPermitted) {
+ # Check for modifiers.
+ $possible =~ s/\s*$Storage\s*//g;
+ $possible =~ s/\s*$Sparse\s*//g;
+ if ($possible =~ /^\s*$/) {
+
+ } elsif ($possible =~ /\s/) {
+ $possible =~ s/\s*$Type\s*//g;
+ for my $modifier (split(' ', $possible)) {
+ if ($modifier !~ $notPermitted) {
+ warn "MODIFIER: $modifier ($possible) ($line)\n" if ($dbg_possible);
+ push(@modifierList, $modifier);
+ }
+ }
+
+ } else {
+ warn "POSSIBLE: $possible ($line)\n" if ($dbg_possible);
+ push(@typeList, $possible);
+ }
+ build_types();
+ } else {
+ warn "NOTPOSS: $possible ($line)\n" if ($dbg_possible > 1);
+ }
+}
+
+my $prefix = '';
+
+sub show_type {
+ my ($type) = @_;
+
+ return defined $use_type{$type} if (scalar keys %use_type > 0);
+
+ return !defined $ignore_type{$type};
+}
+
+sub report {
+ my ($level, $type, $msg) = @_;
+
+ if (!show_type($type) ||
+ (defined $tst_only && $msg !~ /\Q$tst_only\E/)) {
+ return 0;
+ }
+ my $line;
+ if ($show_types) {
+ $line = "$prefix$level:$type: $msg\n";
+ } else {
+ $line = "$prefix$level: $msg\n";
+ }
+ $line = (split('\n', $line))[0] . "\n" if ($terse);
+
+ if ($quiet == 0) {
+ push(our @report, $line);
+ }
+ return 1;
+}
+
+sub report_dump {
+ our @report;
+}
+
+sub ERROR {
+ my ($type, $msg) = @_;
+
+ if (report("ERROR", $type, $msg)) {
+ our $clean = 0;
+ our $cnt_error++;
+ return 1;
+ }
+ return 0;
+}
+sub WARN {
+ my ($type, $msg) = @_;
+
+ if (report("WARNING", $type, $msg)) {
+ ## Warning is okay to submit
+ our $clean = 0;
+ our $cnt_warn++;
+ return 1;
+ }
+ return 0;
+}
+sub CHK {
+ my ($type, $msg) = @_;
+
+ if ($check && report("CHECK", $type, $msg)) {
+ our $clean = 0;
+ our $cnt_chk++;
+ return 1;
+ }
+ return 0;
+}
+
+sub check_absolute_file {
+ my ($absolute, $herecurr) = @_;
+ my $file = $absolute;
+
+ ##print "absolute<$absolute>\n";
+
+ # See if any suffix of this path is a path within the tree.
+ while ($file =~ s@^[^/]*/@@) {
+ if (-f "$root/$file") {
+ ##print "file<$file>\n";
+ last;
+ }
+ }
+ if (! -f _) {
+ return 0;
+ }
+
+ # It is, so see if the prefix is acceptable.
+ my $prefix = $absolute;
+ substr($prefix, -length($file)) = '';
+
+ ##print "prefix<$prefix>\n";
+ if ($prefix ne ".../") {
+ WARN("USE_RELATIVE_PATH",
+ "use relative pathname instead of absolute in changelog text\n" . $herecurr);
+ }
+}
+
+sub trim {
+ my ($string) = @_;
+
+ $string =~ s/^\s+|\s+$//g;
+
+ return $string;
+}
+
+sub ltrim {
+ my ($string) = @_;
+
+ $string =~ s/^\s+//;
+
+ return $string;
+}
+
+sub rtrim {
+ my ($string) = @_;
+
+ $string =~ s/\s+$//;
+
+ return $string;
+}
+
+sub string_find_replace {
+ my ($string, $find, $replace) = @_;
+
+ $string =~ s/$find/$replace/g;
+
+ return $string;
+}
+
+sub tabify {
+ my ($leading) = @_;
+
+ my $source_indent = 8;
+ my $max_spaces_before_tab = $source_indent - 1;
+ my $spaces_to_tab = " " x $source_indent;
+
+ #convert leading spaces to tabs
+ 1 while $leading =~ s@^([\t]*)$spaces_to_tab@$1\t@g;
+ #Remove spaces before a tab
+ 1 while $leading =~ s@^([\t]*)( {1,$max_spaces_before_tab})\t@$1\t@g;
+ return "$leading";
+}
+
+sub pos_last_openparen {
+ my ($line) = @_;
+
+ my $pos = 0;
+
+ my $opens = $line =~ tr/\(/\(/;
+ my $closes = $line =~ tr/\)/\)/;
+
+ my $last_openparen = 0;
+
+ if (($opens == 0) || ($closes >= $opens)) {
+ return -1;
+ }
+
+ my $len = length($line);
+
+ for ($pos = 0; $pos < $len; $pos++) {
+ my $string = substr($line, $pos);
+ if ($string =~ /^($FuncArg|$balanced_parens)/) {
+ $pos += length($1) - 1;
+ } elsif (substr($line, $pos, 1) eq '(') {
+ $last_openparen = $pos;
+ } elsif (index($string, '(') == -1) {
+ last;
+ }
+ }
+
+ return length(expand_tabs(substr($line, 0, $last_openparen))) + 1;
+}
+
+sub process {
+ my $filename = shift;
+
+ my $linenr=0;
+ my $prevline="";
+ my $prevrawline="";
+ my $stashline="";
+ my $stashrawline="";
+
+ my $length;
+ my $indent;
+ my $previndent=0;
+ my $stashindent=0;
+
+ our $clean = 1;
+ my $signoff = 0;
+ my $subject_trailing_dot = 0;
+ my $is_patch = 0;
+
+ my $in_header_lines = 1;
+ my $in_commit_log = 0; #Scanning lines before patch
+
+ my $non_utf8_charset = 0;
+
+ our @report = ();
+ our $cnt_lines = 0;
+ our $cnt_error = 0;
+ our $cnt_warn = 0;
+ our $cnt_chk = 0;
+
+ # Trace the real file/line as we go.
+ my $realfile = '';
+ my $realline = 0;
+ my $realcnt = 0;
+ my $here = '';
+ my $in_comment = 0;
+ my $comment_edge = 0;
+ my $first_line = 0;
+ my $p1_prefix = '';
+
+ my $prev_values = 'E';
+
+ # suppression flags
+ my %suppress_ifbraces;
+ my %suppress_whiletrailers;
+ my %suppress_export;
+ my $suppress_statement = 0;
+
+ my %signatures = ();
+
+ # Pre-scan the patch sanitizing the lines.
+ # Pre-scan the patch looking for any __setup documentation.
+ #
+ my @setup_docs = ();
+ my $setup_docs = 0;
+
+ my $camelcase_file_seeded = 0;
+
+ sanitise_line_reset();
+ my $line;
+ foreach my $rawline (@rawlines) {
+ $linenr++;
+ $line = $rawline;
+
+ push(@fixed, $rawline) if ($fix);
+
+ if ($rawline=~/^\+\+\+\s+(\S+)/) {
+ $setup_docs = 0;
+ if ($1 =~ m@Documentation/kernel-parameters.txt$@) {
+ $setup_docs = 1;
+ }
+ #next;
+ }
+ if ($rawline=~/^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) {
+ $realline=$1-1;
+ if (defined $2) {
+ $realcnt=$3+1;
+ } else {
+ $realcnt=1+1;
+ }
+ $in_comment = 0;
+
+ # Guestimate if this is a continuing comment. Run
+ # the context looking for a comment "edge". If this
+ # edge is a close comment then we must be in a comment
+ # at context start.
+ my $edge;
+ my $cnt = $realcnt;
+ for (my $ln = $linenr + 1; $cnt > 0; $ln++) {
+ next if (defined $rawlines[$ln - 1] &&
+ $rawlines[$ln - 1] =~ /^-/);
+ $cnt--;
+ #print "RAW<$rawlines[$ln - 1]>\n";
+ last if (!defined $rawlines[$ln - 1]);
+ if ($rawlines[$ln - 1] =~ m@(/\*|\*/)@ &&
+ $rawlines[$ln - 1] !~ m@"[^"]*(?:/\*|\*/)[^"]*"@) {
+ ($edge) = $1;
+ last;
+ }
+ }
+ if (defined $edge && $edge eq '*/') {
+ $in_comment = 1;
+ }
+
+ # Guestimate if this is a continuing comment. If this
+ # is the start of a diff block and this line starts
+ # ' *' then it is very likely a comment.
+ if (!defined $edge &&
+ $rawlines[$linenr] =~ m@^.\s*(?:\*\*+| \*)(?:\s|$)@)
+ {
+ $in_comment = 1;
+ }
+
+ ##print "COMMENT:$in_comment edge<$edge> $rawline\n";
+ sanitise_line_reset($in_comment);
+
+ } elsif ($realcnt && $rawline =~ /^(?:\+| |$)/) {
+ # Standardise the strings and chars within the input to
+ # simplify matching -- only bother with positive lines.
+ $line = sanitise_line($rawline);
+ }
+ push(@lines, $line);
+
+ if ($realcnt > 1) {
+ $realcnt-- if ($line =~ /^(?:\+| |$)/);
+ } else {
+ $realcnt = 0;
+ }
+
+ #print "==>$rawline\n";
+ #print "-->$line\n";
+
+ if ($setup_docs && $line =~ /^\+/) {
+ push(@setup_docs, $line);
+ }
+ }
+
+ $prefix = '';
+
+ $realcnt = 0;
+ $linenr = 0;
+ foreach my $line (@lines) {
+ $linenr++;
+ my $sline = $line; #copy of $line
+ $sline =~ s/$;/ /g; #with comments as spaces
+
+ my $rawline = $rawlines[$linenr - 1];
+
+#extract the line range in the file after the patch is applied
+ if ($line=~/^\@\@ -\d+(?:,\d+)? \+(\d+)(,(\d+))? \@\@/) {
+ $is_patch = 1;
+ $first_line = $linenr + 1;
+ $realline=$1-1;
+ if (defined $2) {
+ $realcnt=$3+1;
+ } else {
+ $realcnt=1+1;
+ }
+ annotate_reset();
+ $prev_values = 'E';
+
+ %suppress_ifbraces = ();
+ %suppress_whiletrailers = ();
+ %suppress_export = ();
+ $suppress_statement = 0;
+ next;
+
+# track the line number as we move through the hunk, note that
+# new versions of GNU diff omit the leading space on completely
+# blank context lines so we need to count that too.
+ } elsif ($line =~ /^( |\+|$)/) {
+ $realline++;
+ $realcnt-- if ($realcnt != 0);
+
+ # Measure the line length and indent.
+ ($length, $indent) = line_stats($rawline);
+
+ # Track the previous line.
+ ($prevline, $stashline) = ($stashline, $line);
+ ($previndent, $stashindent) = ($stashindent, $indent);
+ ($prevrawline, $stashrawline) = ($stashrawline, $rawline);
+
+ #warn "line<$line>\n";
+
+ } elsif ($realcnt == 1) {
+ $realcnt--;
+ }
+
+ my $hunk_line = ($realcnt != 0);
+
+#make up the handle for any error we report on this line
+ $prefix = "$filename:$realline: " if ($emacs && $file);
+ $prefix = "$filename:$linenr: " if ($emacs && !$file);
+
+ $here = "#$linenr: " if (!$file);
+ $here = "#$realline: " if ($file);
+
+ my $found_file = 0;
+ # extract the filename as it passes
+ if ($line =~ /^diff --git.*?(\S+)$/) {
+ $realfile = $1;
+ $realfile =~ s@^([^/]*)/@@ if (!$file);
+ $in_commit_log = 0;
+ $found_file = 1;
+ } elsif ($line =~ /^\+\+\+\s+(\S+)/) {
+ $realfile = $1;
+ $realfile =~ s@^([^/]*)/@@ if (!$file);
+ $in_commit_log = 0;
+
+ $p1_prefix = $1;
+ if (!$file && $tree && $p1_prefix ne '' &&
+ -e "$root/$p1_prefix") {
+ WARN("PATCH_PREFIX",
+ "patch prefix '$p1_prefix' exists, appears to be a -p0 patch\n");
+ }
+
+ $found_file = 1;
+ }
+
+ if ($found_file) {
+ if ($realfile =~ m@^(drivers/net/|net/)@) {
+ $check = 1;
+ } else {
+ $check = $check_orig;
+ }
+ next;
+ }
+
+ $here .= "FILE: $realfile:$realline:" if ($realcnt != 0);
+
+ my $hereline = "$here\n$rawline\n";
+ my $herecurr = "$here\n$rawline\n";
+ my $hereprev = "$here\n$prevrawline\n$rawline\n";
+
+ $cnt_lines++ if ($realcnt != 0);
+
+# Check for incorrect file permissions
+ if ($line =~ /^new (file )?mode.*[7531]\d{0,2}$/) {
+ my $permhere = $here . "FILE: $realfile\n";
+ if ($realfile !~ m@scripts/@ &&
+ $realfile !~ /\.(py|pl|awk|sh|t)$/) {
+ ERROR("EXECUTE_PERMISSIONS",
+ "do not set execute permissions for source files\n" . $permhere);
+ }
+ }
+
+ next if ($realfile =~ /(checkpatch.pl)/);
+ next if ($realfile =~ /\.(md|txt|doc|8|pdf|tex)$/);
+
+# Check that the subject does not have a trailing dot
+ if ($in_header_lines &&
+ $line =~ /^Subject: \[PATCH\] (.+)\.(\s*)$/) {
+ $subject_trailing_dot++;
+ }
+
+# Check the patch for a signoff:
+ if ($line =~ /^\s*signed-off-by:/i) {
+ $signoff++;
+ $in_commit_log = 0;
+ }
+
+# Check signature styles
+ if (!$in_header_lines &&
+ $line =~ /^(\s*)([a-z0-9_-]+by:|$signature_tags)(\s*)(.*)/i) {
+ my $space_before = $1;
+ my $sign_off = $2;
+ my $space_after = $3;
+ my $email = $4;
+ my $ucfirst_sign_off = ucfirst(lc($sign_off));
+
+ if ($sign_off !~ /$signature_tags/) {
+ WARN("BAD_SIGN_OFF",
+ "Non-standard signature: $sign_off\n" . $herecurr);
+ }
+ if (defined $space_before && $space_before ne "") {
+ if (WARN("BAD_SIGN_OFF",
+ "Do not use whitespace before $ucfirst_sign_off\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =
+ "$ucfirst_sign_off $email";
+ }
+ }
+ if ($sign_off =~ /-by:$/i && $sign_off ne $ucfirst_sign_off) {
+ if (WARN("BAD_SIGN_OFF",
+ "'$ucfirst_sign_off' is the preferred signature form\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =
+ "$ucfirst_sign_off $email";
+ }
+
+ }
+ if (!defined $space_after || $space_after ne " ") {
+ if (WARN("BAD_SIGN_OFF",
+ "Use a single space after $ucfirst_sign_off\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =
+ "$ucfirst_sign_off $email";
+ }
+ }
+
+ # Check if email is really Gerrit URL
+ if ($email =~ /^($url_tags)(.*)/) {
+ my $uri = $1;
+ my $url = $2;
+ if ($uri && $url !~ /$gerrit_url/) {
+ ERROR("BAD_URL",
+ "Unrecognized url address: '$email'\n" . $herecurr);
+ }
+ } else {
+ my ($email_name, $email_address, $comment) = parse_email($email);
+ my $suggested_email = format_email(($email_name, $email_address));
+ if ($suggested_email eq "") {
+ ERROR("BAD_SIGN_OFF",
+ "Unrecognized email address: '$email'\n" . $herecurr);
+ } else {
+ my $dequoted = $suggested_email;
+ $dequoted =~ s/^"//;
+ $dequoted =~ s/" </ </;
+ # Don't force email to have quotes
+ # Allow just an angle bracketed address
+ if ("$dequoted$comment" ne $email &&
+ "<$email_address>$comment" ne $email &&
+ "$suggested_email$comment" ne $email) {
+ WARN("BAD_SIGN_OFF",
+ "email address '$email' might be better as '$suggested_email$comment'\n" . $herecurr);
+ }
+ }
+ }
+
+# Check for duplicate signatures
+ my $sig_nospace = $line;
+ $sig_nospace =~ s/\s//g;
+ $sig_nospace = lc($sig_nospace);
+ if (defined $signatures{$sig_nospace}) {
+ WARN("BAD_SIGN_OFF",
+ "Duplicate signature\n" . $herecurr);
+ } else {
+ $signatures{$sig_nospace} = 1;
+ }
+ }
+
+# Check for wrappage within a valid hunk of the file
+ if ($realcnt != 0 && $line !~ m{^(?:\+|-| |\\ No newline|$)}) {
+ ERROR("CORRUPTED_PATCH",
+ "patch seems to be corrupt (line wrapped?)\n" .
+ $herecurr) if (!$emitted_corrupt++);
+ }
+
+# Check for absolute kernel paths.
+ if ($tree) {
+ while ($line =~ m{(?:^|\s)(/\S*)}g) {
+ my $file = $1;
+
+ if ($file =~ m{^(.*?)(?::\d+)+:?$} &&
+ check_absolute_file($1, $herecurr)) {
+ #
+ } else {
+ check_absolute_file($file, $herecurr);
+ }
+ }
+ }
+
+# UTF-8 regex found at http://www.w3.org/International/questions/qa-forms-utf-8.en.php
+ if (($realfile =~ /^$/ || $line =~ /^\+/) &&
+ $rawline !~ m/^$UTF8*$/) {
+ my ($utf8_prefix) = ($rawline =~ /^($UTF8*)/);
+
+ my $blank = copy_spacing($rawline);
+ my $ptr = substr($blank, 0, length($utf8_prefix)) . "^";
+ my $hereptr = "$hereline$ptr\n";
+
+ CHK("INVALID_UTF8",
+ "Invalid UTF-8, patch and commit message should be encoded in UTF-8\n" . $hereptr);
+ }
+
+# Check if it's the start of a commit log
+# (not a header line and we haven't seen the patch filename)
+ if ($in_header_lines && $realfile =~ /^$/ &&
+ $rawline !~ /^(commit\b|from\b|[\w-]+:).+$/i) {
+ $in_header_lines = 0;
+ $in_commit_log = 1;
+ }
+
+# Check if there is UTF-8 in a commit log when a mail header has explicitly
+# declined it, i.e defined some charset where it is missing.
+ if ($in_header_lines &&
+ $rawline =~ /^Content-Type:.+charset="(.+)".*$/ &&
+ $1 !~ /utf-8/i) {
+ $non_utf8_charset = 1;
+ }
+
+ if ($in_commit_log && $non_utf8_charset && $realfile =~ /^$/ &&
+ $rawline =~ /$NON_ASCII_UTF8/) {
+ WARN("UTF8_BEFORE_PATCH",
+ "8-bit UTF-8 used in possible commit log\n" . $herecurr);
+ }
+
+# ignore non-hunk lines and lines being removed
+ next if (!$hunk_line || $line =~ /^-/);
+
+#trailing whitespace
+ if ($line =~ /^\+.*\015/) {
+ my $herevet = "$here\n" . cat_vet($rawline) . "\n";
+ if (ERROR("DOS_LINE_ENDINGS",
+ "DOS line endings\n" . $herevet) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/[\s\015]+$//;
+ }
+ } elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) {
+ my $herevet = "$here\n" . cat_vet($rawline) . "\n";
+ if (ERROR("TRAILING_WHITESPACE",
+ "trailing whitespace\n" . $herevet) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/\s+$//;
+ }
+
+ $rpt_cleaners = 1;
+ }
+
+ if (($realfile =~ /Makefile.*/) &&
+ ($line =~ /\+(EXTRA_[A-Z]+FLAGS).*/)) {
+ my $flag = $1;
+ my $replacement = {
+ 'EXTRA_AFLAGS' => 'asflags-y',
+ 'EXTRA_CFLAGS' => 'ccflags-y',
+ 'EXTRA_CPPFLAGS' => 'cppflags-y',
+ 'EXTRA_LDFLAGS' => 'ldflags-y',
+ };
+
+ WARN("DEPRECATED_VARIABLE",
+ "Use of $flag is deprecated, please use \`$replacement->{$flag} instead.\n" . $herecurr) if ($replacement->{$flag});
+ }
+
+# check we are in .spec file, then ignore this hunk
+ next if ($realfile eq "glusterfs.spec.in");
+
+# check we are in a valid source file if not then ignore this hunk
+ next if ($realfile !~ /\.(h|c|pl|py|l|y|sh|in)$/);
+
+#line length limit
+ if ($line =~ /^\+/ && $prevrawline !~ /\/\*\*/ &&
+ $rawline !~ /^.\s*\*\s*\@$Ident\s/ &&
+ !($line =~ /^\+\s*$logFunctions\s*\(\s*(?:(KERN_\S+\s*|[^"]*))?"[X\t]*"\s*(?:|,|\)\s*;)\s*$/ ||
+ $line =~ /^\+\s*"[^"]*"\s*(?:\s*|,|\)\s*;)\s*$/) &&
+ $length > $max_line_length)
+ {
+ WARN("LONG_LINE",
+ "line over $max_line_length characters\n" . $herecurr);
+ }
+
+# check for spaces before a quoted newline
+ if ($rawline =~ /^.*\".*\s\\n/) {
+ if (WARN("QUOTED_WHITESPACE_BEFORE_NEWLINE",
+ "unnecessary whitespace before a quoted newline\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/^(\+.*\".*)\s+\\n/$1\\n/;
+ }
+
+ }
+
+# check for adding lines without a newline.
+ if ($line =~ /^\+/ && defined $lines[$linenr] && $lines[$linenr] =~ /^\\ No newline at end of file/) {
+ WARN("MISSING_EOF_NEWLINE",
+ "adding a line without newline at end of file\n" . $herecurr);
+ }
+
+# check we are in a valid source file C or perl if not then ignore this hunk
+ next if ($realfile !~ /\.(h|c|pl)$/);
+
+# check for space before tabs.
+ if ($rawline =~ /^\+/ && $rawline =~ / \t/) {
+ my $herevet = "$here\n" . cat_vet($rawline) . "\n";
+ if (WARN("SPACE_BEFORE_TAB",
+ "please, no space before tabs\n" . $herevet) &&
+ $fix) {
+ while ($fixed[$linenr - 1] =~
+ s/(^\+.*) {8,8}\t/$1\t\t/) {}
+ while ($fixed[$linenr - 1] =~
+ s/(^\+.*) +\t/$1\t/) {}
+ }
+ }
+
+# check for && or || at the start of a line
+ if ($rawline =~ /^\+\s*(&&|\|\|)/) {
+ CHK("LOGICAL_CONTINUATIONS",
+ "Logical continuations should be on the previous line\n" . $hereprev);
+ }
+
+# check multi-line statement indentation matches previous line
+ if ($^V && $^V ge 5.10.0 &&
+ $prevline =~ /^\+([ \t]*)((?:$c90_Keywords(?:\s+if)\s*)|(?:$Declare\s*)?(?:$Ident|\(\s*\*\s*$Ident\s*\))\s*|$Ident\s*=\s*$Ident\s*)\(.*(\&\&|\|\||,)\s*$/) {
+ $prevline =~ /^\+(\t*)(.*)$/;
+ my $oldindent = $1;
+ my $rest = $2;
+
+ my $pos = pos_last_openparen($rest);
+ if ($pos >= 0) {
+ $line =~ /^(\+| )([ \t]*)/;
+ my $newindent = $2;
+
+ my $goodtabindent = $oldindent .
+ "\t" x ($pos / 8) .
+ " " x ($pos % 8);
+ my $goodspaceindent = $oldindent . " " x $pos;
+
+ if ($newindent ne $goodtabindent &&
+ $newindent ne $goodspaceindent) {
+
+ if (CHK("PARENTHESIS_ALIGNMENT",
+ "Alignment should match open parenthesis\n" . $hereprev) &&
+ $fix && $line =~ /^\+/) {
+ $fixed[$linenr - 1] =~
+ s/^\+[ \t]*/\+$goodtabindent/;
+ }
+ }
+ }
+ }
+
+ if ($line =~ /^\+.*\*[ \t]*\)[ \t]+(?!$Assignment|$Arithmetic)/) {
+ if (CHK("SPACING",
+ "No space is necessary after a cast\n" . $hereprev) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/^(\+.*\*[ \t]*\))[ \t]+/$1/;
+ }
+ }
+
+
+# check for missing blank lines after declarations
+ if ($sline =~ /^\+\s+\S/ && #Not at char 1
+ # actual declarations
+ ($prevline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+ # foo bar; where foo is some local typedef or #define
+ $prevline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
+ # known declaration macros
+ $prevline =~ /^\+\s+$declaration_macros/) &&
+ # for "else if" which can look like "$Ident $Ident"
+ !($prevline =~ /^\+\s+$c90_Keywords\b/ ||
+ # other possible extensions of declaration lines
+ $prevline =~ /(?:$Compare|$Assignment|$Operators)\s*$/ ||
+ # not starting a section or a macro "\" extended line
+ $prevline =~ /(?:\{\s*|\\)$/) &&
+ # looks like a declaration
+ !($sline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+ # foo bar; where foo is some local typedef or #define
+ $sline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
+ # known declaration macros
+ $sline =~ /^\+\s+$declaration_macros/ ||
+ # start of struct or union or enum
+ $sline =~ /^\+\s+(?:union|struct|enum|typedef)\b/ ||
+ # start or end of block or continuation of declaration
+ $sline =~ /^\+\s+(?:$|[\{\}\.\#\"\?\:\(\[])/ ||
+ # bitfield continuation
+ $sline =~ /^\+\s+$Ident\s*:\s*\d+\s*[,;]/ ||
+ # other possible extensions of declaration lines
+ $sline =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/) &&
+ # indentation of previous and current line are the same
+ (($prevline =~ /\+(\s+)\S/) && $sline =~ /^\+$1\S/)) {
+ WARN("SPACING",
+ "Missing a blank line after declarations\n" . $hereprev);
+ }
+
+# check we are in a valid C source file if not then ignore this hunk
+ next if ($realfile !~ /\.(h|c)$/);
+
+# check for RCS/CVS revision markers
+ if ($rawline =~ /^\+.*\$(Revision|Log|Id)(?:\$|)/) {
+ WARN("CVS_KEYWORD",
+ "CVS style keyword markers, these will _not_ be updated\n". $herecurr);
+ }
+
+# Check for potential 'bare' types
+ my ($stat, $cond, $line_nr_next, $remain_next, $off_next,
+ $realline_next);
+#print "LINE<$line>\n";
+ if ($linenr >= $suppress_statement &&
+ $realcnt && $sline =~ /.\s*\S/) {
+ ($stat, $cond, $line_nr_next, $remain_next, $off_next) =
+ ctx_statement_block($linenr, $realcnt, 0);
+ $stat =~ s/\n./\n /g;
+ $cond =~ s/\n./\n /g;
+
+#print "linenr<$linenr> <$stat>\n";
+ # If this statement has no statement boundaries within
+ # it there is no point in retrying a statement scan
+ # until we hit end of it.
+ my $frag = $stat; $frag =~ s/;+\s*$//;
+ if ($frag !~ /(?:{|;)/) {
+#print "skip<$line_nr_next>\n";
+ $suppress_statement = $line_nr_next;
+ }
+
+ # Find the real next line.
+ $realline_next = $line_nr_next;
+ if (defined $realline_next &&
+ (!defined $lines[$realline_next - 1] ||
+ substr($lines[$realline_next - 1], $off_next) =~ /^\s*$/)) {
+ $realline_next++;
+ }
+
+ my $s = $stat;
+ $s =~ s/{.*$//s;
+
+ # Ignore goto labels.
+ if ($s =~ /$Ident:\*$/s) {
+
+ # Ignore functions being called
+ } elsif ($s =~ /^.\s*$Ident\s*\(/s) {
+
+ } elsif ($s =~ /^.\s*else\b/s) {
+
+ # declarations always start with types
+ } elsif ($prev_values eq 'E' && $s =~ /^.\s*(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?((?:\s*$Ident)+?)\b(?:\s+$Sparse)?\s*\**\s*(?:$Ident|\(\*[^\)]*\))(?:\s*$Modifier)?\s*(?:;|=|,|\()/s) {
+ my $type = $1;
+ $type =~ s/\s+/ /g;
+ possible($type, "A:" . $s);
+
+ # definitions in global scope can only start with types
+ } elsif ($s =~ /^.(?:$Storage\s+)?(?:$Inline\s+)?(?:const\s+)?($Ident)\b\s*(?!:)/s) {
+ possible($1, "B:" . $s);
+ }
+
+ # any (foo ... *) is a pointer cast, and foo is a type
+ while ($s =~ /\(($Ident)(?:\s+$Sparse)*[\s\*]+\s*\)/sg) {
+ possible($1, "C:" . $s);
+ }
+
+ # Check for any sort of function declaration.
+ # int foo(something bar, other baz);
+ # void (*store_gdt)(x86_descr_ptr *);
+ if ($prev_values eq 'E' && $s =~ /^(.(?:typedef\s*)?(?:(?:$Storage|$Inline)\s*)*\s*$Type\s*(?:\b$Ident|\(\*\s*$Ident\))\s*)\(/s) {
+ my ($name_len) = length($1);
+
+ my $ctx = $s;
+ substr($ctx, 0, $name_len + 1, '');
+ $ctx =~ s/\)[^\)]*$//;
+
+ for my $arg (split(/\s*,\s*/, $ctx)) {
+ if ($arg =~ /^(?:const\s+)?($Ident)(?:\s+$Sparse)*\s*\**\s*(:?\b$Ident)?$/s || $arg =~ /^($Ident)$/s) {
+
+ possible($1, "D:" . $s);
+ }
+ }
+ }
+
+ }
+
+#
+# Checks which may be anchored in the context.
+#
+
+# Check for switch () and associated case and default
+# statements should be at the same indent.
+ if ($line=~/\bswitch\s*\(.*\)/) {
+ my $err = '';
+ my $sep = '';
+ my @ctx = ctx_block_outer($linenr, $realcnt);
+ shift(@ctx);
+ for my $ctx (@ctx) {
+ my ($clen, $cindent) = line_stats($ctx);
+ if ($ctx =~ /^\+\s*(case\s+|default:)/ &&
+ $indent != $cindent) {
+ $err .= "$sep$ctx\n";
+ $sep = '';
+ } else {
+ $sep = "[...]\n";
+ }
+ }
+ if ($err ne '') {
+ ERROR("SWITCH_CASE_INDENT_LEVEL",
+ "switch and case should be at the same indent\n$hereline$err");
+ }
+ }
+
+# if/while/etc brace do not go on next line, unless defining a do while loop,
+# or if that brace on the next line is for something else
+ if ($line =~ /(.*)\b((?:if|while|for|switch)\s*\(|do\b|else\b)/ && $line !~ /^.\s*\#/) {
+ my $pre_ctx = "$1$2";
+
+ my ($level, @ctx) = ctx_statement_level($linenr, $realcnt, 0);
+
+ if ($line =~ /^\+\t{6,}/) {
+ WARN("DEEP_INDENTATION",
+ "Too many leading tabs - consider code refactoring\n" . $herecurr);
+ }
+
+ my $ctx_cnt = $realcnt - $#ctx - 1;
+ my $ctx = join("\n", @ctx);
+
+ my $ctx_ln = $linenr;
+ my $ctx_skip = $realcnt;
+
+ while ($ctx_skip > $ctx_cnt || ($ctx_skip == $ctx_cnt &&
+ defined $lines[$ctx_ln - 1] &&
+ $lines[$ctx_ln - 1] =~ /^-/)) {
+ ##print "SKIP<$ctx_skip> CNT<$ctx_cnt>\n";
+ $ctx_skip-- if (!defined $lines[$ctx_ln - 1] || $lines[$ctx_ln - 1] !~ /^-/);
+ $ctx_ln++;
+ }
+
+ #print "realcnt<$realcnt> ctx_cnt<$ctx_cnt>\n";
+ #print "pre<$pre_ctx>\nline<$line>\nctx<$ctx>\nnext<$lines[$ctx_ln - 1]>\n";
+
+ if ($ctx !~ /{\s*/ && defined($lines[$ctx_ln -1]) && $lines[$ctx_ln - 1] =~ /^\+\s*{/) {
+ ERROR("OPEN_BRACE",
+ "that open brace { should be on the previous line\n" .
+ "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n");
+ }
+ if ($level == 0 && $pre_ctx !~ /}\s*while\s*\($/ &&
+ $ctx =~ /\)\s*\;\s*$/ &&
+ defined $lines[$ctx_ln - 1])
+ {
+ my ($nlength, $nindent) = line_stats($lines[$ctx_ln - 1]);
+ if ($nindent > $indent) {
+ WARN("TRAILING_SEMICOLON",
+ "trailing semicolon indicates no statements, indent implies otherwise\n" .
+ "$here\n$ctx\n$rawlines[$ctx_ln - 1]\n");
+ }
+ }
+ }
+
+# Check relative indent for conditionals and blocks.
+ if ($line =~ /\b(?:(?:if|while|for)\s*\(|do\b)/ && $line !~ /^.\s*#/ && $line !~ /\}\s*while\s*/) {
+ ($stat, $cond, $line_nr_next, $remain_next, $off_next) =
+ ctx_statement_block($linenr, $realcnt, 0)
+ if (!defined $stat);
+ my ($s, $c) = ($stat, $cond);
+
+ substr($s, 0, length($c), '');
+
+ # Make sure we remove the line prefixes as we have
+ # none on the first line, and are going to readd them
+ # where necessary.
+ $s =~ s/\n./\n/gs;
+
+ # Find out how long the conditional actually is.
+ my @newlines = ($c =~ /\n/gs);
+ my $cond_lines = 1 + $#newlines;
+
+ # We want to check the first line inside the block
+ # starting at the end of the conditional, so remove:
+ # 1) any blank line termination
+ # 2) any opening brace { on end of the line
+ # 3) any do (...) {
+ my $continuation = 0;
+ my $check = 0;
+ $s =~ s/^.*\bdo\b//;
+ $s =~ s/^\s*{//;
+ if ($s =~ s/^\s*\\//) {
+ $continuation = 1;
+ }
+ if ($s =~ s/^\s*?\n//) {
+ $check = 1;
+ $cond_lines++;
+ }
+
+ # Also ignore a loop construct at the end of a
+ # preprocessor statement.
+ if (($prevline =~ /^.\s*#\s*define\s/ ||
+ $prevline =~ /\\\s*$/) && $continuation == 0) {
+ $check = 0;
+ }
+
+ my $cond_ptr = -1;
+ $continuation = 0;
+ while ($cond_ptr != $cond_lines) {
+ $cond_ptr = $cond_lines;
+
+ # If we see an #else/#elif then the code
+ # is not linear.
+ if ($s =~ /^\s*\#\s*(?:else|elif)/) {
+ $check = 0;
+ }
+
+ # Ignore:
+ # 1) blank lines, they should be at 0,
+ # 2) preprocessor lines, and
+ # 3) labels.
+ if ($continuation ||
+ $s =~ /^\s*?\n/ ||
+ $s =~ /^\s*#\s*?/ ||
+ $s =~ /^\s*$Ident\s*:/) {
+ $continuation = ($s =~ /^.*?\\\n/) ? 1 : 0;
+ if ($s =~ s/^.*?\n//) {
+ $cond_lines++;
+ }
+ }
+ }
+
+ my (undef, $sindent) = line_stats("+" . $s);
+ my $stat_real = raw_line($linenr, $cond_lines);
+
+ # Check if either of these lines are modified, else
+ # this is not this patch's fault.
+ if (!defined($stat_real) ||
+ $stat !~ /^\+/ && $stat_real !~ /^\+/) {
+ $check = 0;
+ }
+ if (defined($stat_real) && $cond_lines > 1) {
+ $stat_real = "[...]\n$stat_real";
+ }
+
+ #print "line<$line> prevline<$prevline> indent<$indent> sindent<$sindent> check<$check> continuation<$continuation> s<$s> cond_lines<$cond_lines> stat_real<$stat_real> stat<$stat>\n";
+
+ if ($check && (($sindent % 8) != 0 ||
+ ($sindent <= $indent && $s ne ''))) {
+ WARN("SUSPECT_CODE_INDENT",
+ "suspect code indent for conditional statements ($indent, $sindent)\n" . $herecurr . "$stat_real\n");
+ }
+ }
+
+ # Track the 'values' across context and added lines.
+ my $opline = $line; $opline =~ s/^./ /;
+ my ($curr_values, $curr_vars) =
+ annotate_values($opline . "\n", $prev_values);
+ $curr_values = $prev_values . $curr_values;
+ if ($dbg_values) {
+ my $outline = $opline; $outline =~ s/\t/ /g;
+ print "$linenr > .$outline\n";
+ print "$linenr > $curr_values\n";
+ print "$linenr > $curr_vars\n";
+ }
+ $prev_values = substr($curr_values, -1);
+
+#ignore lines not being added
+ next if ($line =~ /^[^\+]/);
+
+# TEST: allow direct testing of the type matcher.
+ if ($dbg_type) {
+ if ($line =~ /^.\s*$Declare\s*$/) {
+ ERROR("TEST_TYPE",
+ "TEST: is type\n" . $herecurr);
+ } elsif ($dbg_type > 1 && $line =~ /^.+($Declare)/) {
+ ERROR("TEST_NOT_TYPE",
+ "TEST: is not type ($1 is)\n". $herecurr);
+ }
+ next;
+ }
+# TEST: allow direct testing of the attribute matcher.
+ if ($dbg_attr) {
+ if ($line =~ /^.\s*$Modifier\s*$/) {
+ ERROR("TEST_ATTR",
+ "TEST: is attr\n" . $herecurr);
+ } elsif ($dbg_attr > 1 && $line =~ /^.+($Modifier)/) {
+ ERROR("TEST_NOT_ATTR",
+ "TEST: is not attr ($1 is)\n". $herecurr);
+ }
+ next;
+ }
+
+# check for initialisation to aggregates open brace on the next line
+ if ($line =~ /^.\s*{/ &&
+ $prevline =~ /(?:^|[^=])=\s*$/) {
+ ERROR("OPEN_BRACE",
+ "that open brace { should be on the previous line\n" . $hereprev);
+ }
+
+#
+# Checks which are anchored on the added line.
+#
+
+# check for malformed paths in #include statements (uses RAW line)
+ if ($rawline =~ m{^.\s*\#\s*include\s+[<"](.*)[">]}) {
+ my $path = $1;
+ if ($path =~ m{//}) {
+ ERROR("MALFORMED_INCLUDE",
+ "malformed #include filename\n" . $herecurr);
+ }
+ if ($path =~ "^uapi/" && $realfile =~ m@\binclude/uapi/@) {
+ ERROR("UAPI_INCLUDE",
+ "No #include in ...include/uapi/... should use a uapi/ path prefix\n" . $herecurr);
+ }
+ }
+
+# no C99 // comments
+ if ($line =~ m{//}) {
+ if (ERROR("C99_COMMENTS",
+ "do not use C99 // comments\n" . $herecurr) &&
+ $fix) {
+ my $line = $fixed[$linenr - 1];
+ if ($line =~ /\/\/(.*)$/) {
+ my $comment = trim($1);
+ $fixed[$linenr - 1] =~ s@\/\/(.*)$@/\* $comment \*/@;
+ }
+ }
+ }
+ # Remove C99 comments.
+ $line =~ s@//.*@@;
+ $opline =~ s@//.*@@;
+
+# EXPORT_SYMBOL should immediately follow the thing it is exporting, consider
+# the whole statement.
+#print "APW <$lines[$realline_next - 1]>\n";
+ if (defined $realline_next &&
+ exists $lines[$realline_next - 1] &&
+ !defined $suppress_export{$realline_next} &&
+ ($lines[$realline_next - 1] =~ /EXPORT_SYMBOL.*\((.*)\)/ ||
+ $lines[$realline_next - 1] =~ /EXPORT_UNUSED_SYMBOL.*\((.*)\)/)) {
+ # Handle definitions which produce identifiers with
+ # a prefix:
+ # XXX(foo);
+ # EXPORT_SYMBOL(something_foo);
+ my $name = $1;
+ if ($stat =~ /^(?:.\s*}\s*\n)?.([A-Z_]+)\s*\(\s*($Ident)/ &&
+ $name =~ /^${Ident}_$2/) {
+#print "FOO C name<$name>\n";
+ $suppress_export{$realline_next} = 1;
+
+ } elsif ($stat !~ /(?:
+ \n.}\s*$|
+ ^.DEFINE_$Ident\(\Q$name\E\)|
+ ^.DECLARE_$Ident\(\Q$name\E\)|
+ ^.LIST_HEAD\(\Q$name\E\)|
+ ^.(?:$Storage\s+)?$Type\s*\(\s*\*\s*\Q$name\E\s*\)\s*\(|
+ \b\Q$name\E(?:\s+$Attribute)*\s*(?:;|=|\[|\()
+ )/x) {
+#print "FOO A<$lines[$realline_next - 1]> stat<$stat> name<$name>\n";
+ $suppress_export{$realline_next} = 2;
+ } else {
+ $suppress_export{$realline_next} = 1;
+ }
+ }
+ if (!defined $suppress_export{$linenr} &&
+ $prevline =~ /^.\s*$/ &&
+ ($line =~ /EXPORT_SYMBOL.*\((.*)\)/ ||
+ $line =~ /EXPORT_UNUSED_SYMBOL.*\((.*)\)/)) {
+#print "FOO B <$lines[$linenr - 1]>\n";
+ $suppress_export{$linenr} = 2;
+ }
+ if (defined $suppress_export{$linenr} &&
+ $suppress_export{$linenr} == 2) {
+ WARN("EXPORT_SYMBOL",
+ "EXPORT_SYMBOL(foo); should immediately follow its function/variable\n" . $herecurr);
+ }
+
+# check for global initialisers.
+ if ($line =~ /^\+(\s*$Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/) {
+ if (ERROR("GLOBAL_INITIALISERS",
+ "do not initialise globals to 0 or NULL\n" .
+ $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/;
+ }
+ }
+# check for static initialisers.
+ if ($line =~ /^\+.*\bstatic\s.*=\s*(0|NULL|false)\s*;/) {
+ if (ERROR("INITIALISED_STATIC",
+ "do not initialise statics to 0 or NULL\n" .
+ $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/;
+ }
+ }
+
+# check for static const char * arrays.
+ if ($line =~ /\bstatic\s+const\s+char\s*\*\s*(\w+)\s*\[\s*\]\s*=\s*/) {
+ WARN("STATIC_CONST_CHAR_ARRAY",
+ "static const char * array should probably be static const char * const\n" .
+ $herecurr);
+ }
+
+# check for static char foo[] = "bar" declarations.
+ if ($line =~ /\bstatic\s+char\s+(\w+)\s*\[\s*\]\s*=\s*"/) {
+ WARN("STATIC_CONST_CHAR_ARRAY",
+ "static char array declaration should probably be static const char\n" .
+ $herecurr);
+ }
+
+# check for non-global char *foo[] = {"bar", ...} declarations.
+ if ($line =~ /^.\s+(?:static\s+|const\s+)?char\s+\*\s*\w+\s*\[\s*\]\s*=\s*\{/) {
+ WARN("STATIC_CONST_CHAR_ARRAY",
+ "char * array declaration might be better as static const\n" .
+ $herecurr);
+ }
+
+# check for function declarations without arguments like "int foo()"
+ if ($line =~ /(\b$Type\s+$Ident)\s*\(\s*\)/) {
+ if (ERROR("FUNCTION_WITHOUT_ARGS",
+ "Bad function definition - $1() should probably be $1(void)\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/(\b($Type)\s+($Ident))\s*\(\s*\)/$2 $3(void)/;
+ }
+ }
+
+# * goes on variable not on type
+ # (char*[ const])
+ while ($line =~ m{(\($NonptrType(\s*(?:$Modifier\b\s*|\*\s*)+)\))}g) {
+ #print "AA<$1>\n";
+ my ($ident, $from, $to) = ($1, $2, $2);
+
+ # Should start with a space.
+ $to =~ s/^(\S)/ $1/;
+ # Should not end with a space.
+ $to =~ s/\s+$//;
+ # '*'s should not have spaces between.
+ while ($to =~ s/\*\s+\*/\*\*/) {
+ }
+
+## print "1: from<$from> to<$to> ident<$ident>\n";
+ if ($from ne $to) {
+ if (ERROR("POINTER_LOCATION",
+ "\"(foo$from)\" should be \"(foo$to)\"\n" . $herecurr) &&
+ $fix) {
+ my $sub_from = $ident;
+ my $sub_to = $ident;
+ $sub_to =~ s/\Q$from\E/$to/;
+ $fixed[$linenr - 1] =~
+ s@\Q$sub_from\E@$sub_to@;
+ }
+ }
+ }
+ while ($line =~ m{(\b$NonptrType(\s*(?:$Modifier\b\s*|\*\s*)+)($Ident))}g) {
+ #print "BB<$1>\n";
+ my ($match, $from, $to, $ident) = ($1, $2, $2, $3);
+
+ # Should start with a space.
+ $to =~ s/^(\S)/ $1/;
+ # Should not end with a space.
+ $to =~ s/\s+$//;
+ # '*'s should not have spaces between.
+ while ($to =~ s/\*\s+\*/\*\*/) {
+ }
+ # Modifiers should have spaces.
+ $to =~ s/(\b$Modifier$)/$1 /;
+
+## print "2: from<$from> to<$to> ident<$ident>\n";
+ if ($from ne $to && $ident !~ /^$Modifier$/) {
+ if (ERROR("POINTER_LOCATION",
+ "\"foo${from}bar\" should be \"foo${to}bar\"\n" . $herecurr) &&
+ $fix) {
+
+ my $sub_from = $match;
+ my $sub_to = $match;
+ $sub_to =~ s/\Q$from\E/$to/;
+ $fixed[$linenr - 1] =~
+ s@\Q$sub_from\E@$sub_to@;
+ }
+ }
+ }
+
+# function brace can't be on same line, except for #defines of do while,
+# or if closed on same line
+ if (($line=~/$Type\s*$Ident\(.*\).*\s\{/) and
+ !($line=~/\#\s*define.*do\s\{/) and !($line=~/}/)) {
+ ERROR("OPEN_BRACE",
+ "open brace '{' following function declarations go on the next line\n" . $herecurr);
+ }
+
+# open braces for enum, union and struct go on the same line.
+ if ($line =~ /^.\s*{/ &&
+ $prevline =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident)?\s*$/) {
+ ERROR("OPEN_BRACE",
+ "open brace '{' following $1 go on the same line\n" . $hereprev);
+ }
+
+# missing space after union, struct or enum definition
+ if ($line =~ /^.\s*(?:typedef\s+)?(enum|union|struct)(?:\s+$Ident){1,2}[=\{]/) {
+ if (WARN("SPACING",
+ "missing space after $1 definition\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/^(.\s*(?:typedef\s+)?(?:enum|union|struct)(?:\s+$Ident){1,2})([=\{])/$1 $2/;
+ }
+ }
+
+# Function pointer declarations
+# check spacing between type, funcptr, and args
+# canonical declaration is "type (*funcptr)(args...)"
+ if ($line =~ /^.\s*($Declare)\((\s*)\*(\s*)($Ident)(\s*)\)(\s*)\(/) {
+ my $declare = $1;
+ my $pre_pointer_space = $2;
+ my $post_pointer_space = $3;
+ my $funcname = $4;
+ my $post_funcname_space = $5;
+ my $pre_args_space = $6;
+
+# the $Declare variable will capture all spaces after the type
+# so check it for a missing trailing missing space but pointer return types
+# don't need a space so don't warn for those.
+ my $post_declare_space = "";
+ if ($declare =~ /(\s+)$/) {
+ $post_declare_space = $1;
+ $declare = rtrim($declare);
+ }
+ if ($declare !~ /\*$/ && $post_declare_space =~ /^$/) {
+ WARN("SPACING",
+ "missing space after return type\n" . $herecurr);
+ $post_declare_space = " ";
+ }
+
+# unnecessary space "type ( *funcptr)(args...)"
+ if (defined $pre_pointer_space &&
+ $pre_pointer_space =~ /^\s/) {
+ WARN("SPACING",
+ "Unnecessary space after function pointer open parenthesis\n" . $herecurr);
+ }
+
+# unnecessary space "type (* funcptr)(args...)"
+ if (defined $post_pointer_space &&
+ $post_pointer_space =~ /^\s/) {
+ WARN("SPACING",
+ "Unnecessary space before function pointer name\n" . $herecurr);
+ }
+
+# unnecessary space "type (*funcptr )(args...)"
+ if (defined $post_funcname_space &&
+ $post_funcname_space =~ /^\s/) {
+ WARN("SPACING",
+ "Unnecessary space after function pointer name\n" . $herecurr);
+ }
+
+# unnecessary space "type (*funcptr) (args...)"
+ if (defined $pre_args_space &&
+ $pre_args_space =~ /^\s/) {
+ WARN("SPACING",
+ "Unnecessary space before function pointer arguments\n" . $herecurr);
+ }
+
+ if (show_type("SPACING") && $fix) {
+ $fixed[$linenr - 1] =~
+ s/^(.\s*)$Declare\s*\(\s*\*\s*$Ident\s*\)\s*\(/$1 . $declare . $post_declare_space . '(*' . $funcname . ')('/ex;
+ }
+ }
+
+# check for spacing round square brackets; allowed:
+# 1. with a type on the left -- int [] a;
+# 2. at the beginning of a line for slice initialisers -- [0...10] = 5,
+# 3. inside a curly brace -- = { [0...10] = 5 }
+ while ($line =~ /(.*?\s)\[/g) {
+ my ($where, $prefix) = ($-[1], $1);
+ if ($prefix !~ /$Type\s+$/ &&
+ ($where != 0 || $prefix !~ /^.\s+$/) &&
+ $prefix !~ /[{,]\s+$/) {
+ if (ERROR("BRACKET_SPACE",
+ "space prohibited before open square bracket '['\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/^(\+.*?)\s+\[/$1\[/;
+ }
+ }
+ }
+
+# Check operator spacing.
+ if (!($line=~/\#\s*include/)) {
+ my $fixed_line = "";
+ my $line_fixed = 0;
+
+ my $ops = qr{
+ <<=|>>=|<=|>=|==|!=|
+ \+=|-=|\*=|\/=|%=|\^=|\|=|&=|
+ =>|->|<<|>>|<|>|=|!|~|
+ &&|\|\||,|\^|\+\+|--|&|\||\+|-|\*|\/|%|
+ \?:|\?|:
+ }x;
+ my @elements = split(/($ops|;)/, $opline);
+
+## print("element count: <" . $#elements . ">\n");
+## foreach my $el (@elements) {
+## print("el: <$el>\n");
+## }
+
+ my @fix_elements = ();
+ my $off = 0;
+
+ foreach my $el (@elements) {
+ push(@fix_elements, substr($rawline, $off, length($el)));
+ $off += length($el);
+ }
+
+ $off = 0;
+
+ my $blank = copy_spacing($opline);
+ my $last_after = -1;
+
+ for (my $n = 0; $n < $#elements; $n += 2) {
+
+ my $good = $fix_elements[$n] . $fix_elements[$n + 1];
+
+## print("n: <$n> good: <$good>\n");
+
+ $off += length($elements[$n]);
+
+ # Pick up the preceding and succeeding characters.
+ my $ca = substr($opline, 0, $off);
+ my $cc = '';
+ if (length($opline) >= ($off + length($elements[$n + 1]))) {
+ $cc = substr($opline, $off + length($elements[$n + 1]));
+ }
+ my $cb = "$ca$;$cc";
+
+ my $a = '';
+ $a = 'V' if ($elements[$n] ne '');
+ $a = 'W' if ($elements[$n] =~ /\s$/);
+ $a = 'C' if ($elements[$n] =~ /$;$/);
+ $a = 'B' if ($elements[$n] =~ /(\[|\()$/);
+ $a = 'O' if ($elements[$n] eq '');
+ $a = 'E' if ($ca =~ /^\s*$/);
+
+ my $op = $elements[$n + 1];
+
+ my $c = '';
+ if (defined $elements[$n + 2]) {
+ $c = 'V' if ($elements[$n + 2] ne '');
+ $c = 'W' if ($elements[$n + 2] =~ /^\s/);
+ $c = 'C' if ($elements[$n + 2] =~ /^$;/);
+ $c = 'B' if ($elements[$n + 2] =~ /^(\)|\]|;)/);
+ $c = 'O' if ($elements[$n + 2] eq '');
+ $c = 'E' if ($elements[$n + 2] =~ /^\s*\\$/);
+ } else {
+ $c = 'E';
+ }
+
+ my $ctx = "${a}x${c}";
+
+ my $at = "(ctx:$ctx)";
+
+ my $ptr = substr($blank, 0, $off) . "^";
+ my $hereptr = "$hereline$ptr\n";
+
+ # Pull out the value of this operator.
+ my $op_type = substr($curr_values, $off + 1, 1);
+
+ # Get the full operator variant.
+ my $opv = $op . substr($curr_vars, $off, 1);
+
+ # Ignore operators passed as parameters.
+ if ($op_type ne 'V' &&
+ $ca =~ /\s$/ && $cc =~ /^\s*,/) {
+
+# # Ignore comments
+# } elsif ($op =~ /^$;+$/) {
+
+ # ; should have either the end of line or a space or \ after it
+ } elsif ($op eq ';') {
+ if ($ctx !~ /.x[WEBC]/ &&
+ $cc !~ /^\\/ && $cc !~ /^;/) {
+ if (ERROR("SPACING",
+ "space required after that '$op' $at\n" . $hereptr)) {
+ $good = $fix_elements[$n] . trim($fix_elements[$n + 1]) . " ";
+ $line_fixed = 1;
+ }
+ }
+
+ # // is a comment
+ } elsif ($op eq '//') {
+
+ # : when part of a bitfield
+ } elsif ($opv eq ':B') {
+ # skip the bitfield test for now
+
+ # No spaces for:
+ # ->
+ } elsif ($op eq '->') {
+ if ($ctx =~ /Wx.|.xW/) {
+ if (ERROR("SPACING",
+ "spaces prohibited around that '$op' $at\n" . $hereptr)) {
+ $good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]);
+ if (defined $fix_elements[$n + 2]) {
+ $fix_elements[$n + 2] =~ s/^\s+//;
+ }
+ $line_fixed = 1;
+ }
+ }
+
+ # , must have a space on the right.
+ } elsif ($op eq ',') {
+ if ($ctx !~ /.x[WEC]/ && $cc !~ /^}/) {
+ if (ERROR("SPACING",
+ "space required after that '$op' $at\n" . $hereptr)) {
+ $good = $fix_elements[$n] . trim($fix_elements[$n + 1]) . " ";
+ $line_fixed = 1;
+ $last_after = $n;
+ }
+ }
+
+ # '*' as part of a type definition -- reported already.
+ } elsif ($opv eq '*_') {
+ #warn "'*' is part of type\n";
+
+ # unary operators should have a space before and
+ # none after. May be left adjacent to another
+ # unary operator, or a cast
+ } elsif ($op eq '!' || $op eq '~' ||
+ $opv eq '*U' || $opv eq '-U' ||
+ $opv eq '&U' || $opv eq '&&U') {
+ if ($ctx !~ /[WEBC]x./ && $ca !~ /(?:\)|!|~|\*|-|\&|\||\+\+|\-\-|\{)$/) {
+ if (ERROR("SPACING",
+ "space required before that '$op' $at\n" . $hereptr)) {
+ if ($n != $last_after + 2) {
+ $good = $fix_elements[$n] . " " . ltrim($fix_elements[$n + 1]);
+ $line_fixed = 1;
+ }
+ }
+ }
+ if ($op eq '*' && $cc =~/\s*$Modifier\b/) {
+ # A unary '*' may be const
+
+ } elsif ($ctx =~ /.xW/) {
+ if (ERROR("SPACING",
+ "space prohibited after that '$op' $at\n" . $hereptr)) {
+ $good = $fix_elements[$n] . rtrim($fix_elements[$n + 1]);
+ if (defined $fix_elements[$n + 2]) {
+ $fix_elements[$n + 2] =~ s/^\s+//;
+ }
+ $line_fixed = 1;
+ }
+ }
+
+ # unary ++ and unary -- are allowed no space on one side.
+ } elsif ($op eq '++' or $op eq '--') {
+ if ($ctx !~ /[WEOBC]x[^W]/ && $ctx !~ /[^W]x[WOBEC]/) {
+ if (ERROR("SPACING",
+ "space required one side of that '$op' $at\n" . $hereptr)) {
+ $good = $fix_elements[$n] . trim($fix_elements[$n + 1]) . " ";
+ $line_fixed = 1;
+ }
+ }
+ if ($ctx =~ /Wx[BE]/ ||
+ ($ctx =~ /Wx./ && $cc =~ /^;/)) {
+ if (ERROR("SPACING",
+ "space prohibited before that '$op' $at\n" . $hereptr)) {
+ $good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]);
+ $line_fixed = 1;
+ }
+ }
+ if ($ctx =~ /ExW/) {
+ if (ERROR("SPACING",
+ "space prohibited after that '$op' $at\n" . $hereptr)) {
+ $good = $fix_elements[$n] . trim($fix_elements[$n + 1]);
+ if (defined $fix_elements[$n + 2]) {
+ $fix_elements[$n + 2] =~ s/^\s+//;
+ }
+ $line_fixed = 1;
+ }
+ }
+
+ # << and >> may either have or not have spaces both sides
+ } elsif ($op eq '<<' or $op eq '>>' or
+ $op eq '&' or $op eq '^' or $op eq '|' or
+ $op eq '+' or $op eq '-' or
+ $op eq '*' or $op eq '/' or
+ $op eq '%')
+ {
+ if ($ctx =~ /Wx[^WCE]|[^WCE]xW/) {
+ if (ERROR("SPACING",
+ "need consistent spacing around '$op' $at\n" . $hereptr)) {
+ $good = rtrim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " ";
+ if (defined $fix_elements[$n + 2]) {
+ $fix_elements[$n + 2] =~ s/^\s+//;
+ }
+ $line_fixed = 1;
+ }
+ }
+
+ # A colon needs no spaces before when it is
+ # terminating a case value or a label.
+ } elsif ($opv eq ':C' || $opv eq ':L') {
+ if ($ctx =~ /Wx./) {
+ if (ERROR("SPACING",
+ "space prohibited before that '$op' $at\n" . $hereptr)) {
+ $good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]);
+ $line_fixed = 1;
+ }
+ }
+
+ # All the others need spaces both sides.
+ } elsif ($ctx !~ /[EWC]x[CWE]/) {
+ my $ok = 0;
+
+ # Ignore email addresses <foo@bar>
+ if (($op eq '<' &&
+ $cc =~ /^\S+\@\S+>/) ||
+ ($op eq '>' &&
+ $ca =~ /<\S+\@\S+$/))
+ {
+ $ok = 1;
+ }
+
+ # messages are ERROR, but ?: are CHK
+ if ($ok == 0) {
+ my $msg_type = \&ERROR;
+ $msg_type = \&CHK if (($op eq '?:' || $op eq '?' || $op eq ':') && $ctx =~ /VxV/);
+
+ if (&{$msg_type}("SPACING",
+ "spaces required around that '$op' $at\n" . $hereptr)) {
+ $good = rtrim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " ";
+ if (defined $fix_elements[$n + 2]) {
+ $fix_elements[$n + 2] =~ s/^\s+//;
+ }
+ $line_fixed = 1;
+ }
+ }
+ }
+ $off += length($elements[$n + 1]);
+
+## print("n: <$n> GOOD: <$good>\n");
+
+ $fixed_line = $fixed_line . $good;
+ }
+
+ if (($#elements % 2) == 0) {
+ $fixed_line = $fixed_line . $fix_elements[$#elements];
+ }
+
+ if ($fix && $line_fixed && $fixed_line ne $fixed[$linenr - 1]) {
+ $fixed[$linenr - 1] = $fixed_line;
+ }
+
+
+ }
+
+# check for whitespace before a non-naked semicolon
+ if ($line =~ /^\+.*\S\s+;\s*$/) {
+ if (WARN("SPACING",
+ "space prohibited before semicolon\n" . $herecurr) &&
+ $fix) {
+ 1 while $fixed[$linenr - 1] =~
+ s/^(\+.*\S)\s+;/$1;/;
+ }
+ }
+
+# check for multiple assignments
+ if ($line =~ /^.\s*$Lval\s*=\s*$Lval\s*=(?!=)/) {
+ CHK("MULTIPLE_ASSIGNMENTS",
+ "multiple assignments should be avoided\n" . $herecurr);
+ }
+
+## # check for multiple declarations, allowing for a function declaration
+## # continuation.
+## if ($line =~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Ident.*/ &&
+## $line !~ /^.\s*$Type\s+$Ident(?:\s*=[^,{]*)?\s*,\s*$Type\s*$Ident.*/) {
+##
+## # Remove any bracketed sections to ensure we do not
+## # falsly report the parameters of functions.
+## my $ln = $line;
+## while ($ln =~ s/\([^\(\)]*\)//g) {
+## }
+## if ($ln =~ /,/) {
+## WARN("MULTIPLE_DECLARATION",
+## "declaring multiple variables together should be avoided\n" . $herecurr);
+## }
+## }
+
+#need space before brace following if, while, etc
+ if (($line =~ /\(.*\)\{/ && $line !~ /\($Type\)\{/) ||
+ $line =~ /do\{/) {
+ if (ERROR("SPACING",
+ "space required before the open brace '{'\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/^(\+.*(?:do|\)))\{/$1 {/;
+ }
+ }
+
+## # check for blank lines before declarations
+## if ($line =~ /^.\t+$Type\s+$Ident(?:\s*=.*)?;/ &&
+## $prevrawline =~ /^.\s*$/) {
+## WARN("SPACING",
+## "No blank lines before declarations\n" . $hereprev);
+## }
+##
+
+# closing brace should have a space following it when it has anything
+# on the line
+ if ($line =~ /}(?!(?:,|;|\)))\S/) {
+ if (ERROR("SPACING",
+ "space required after that close brace '}'\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/}((?!(?:,|;|\)))\S)/} $1/;
+ }
+ }
+
+# check spacing on square brackets
+ if ($line =~ /\[\s/ && $line !~ /\[\s*$/) {
+ if (ERROR("SPACING",
+ "space prohibited after that open square bracket '['\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/\[\s+/\[/;
+ }
+ }
+ if ($line =~ /\s\]/) {
+ if (ERROR("SPACING",
+ "space prohibited before that close square bracket ']'\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/\s+\]/\]/;
+ }
+ }
+
+# check spacing on parentheses
+ if ($line =~ /\(\s/ && $line !~ /\(\s*(?:\\)?$/ &&
+ $line !~ /for\s*\(\s+;/) {
+ if (ERROR("SPACING",
+ "space prohibited after that open parenthesis '('\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/\(\s+/\(/;
+ }
+ }
+ if ($line =~ /(\s+)\)/ && $line !~ /^.\s*\)/ &&
+ $line !~ /for\s*\(.*;\s+\)/ &&
+ $line !~ /:\s+\)/) {
+ if (ERROR("SPACING",
+ "space prohibited before that close parenthesis ')'\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/\s+\)/\)/;
+ }
+ }
+
+#goto labels aren't indented, allow a single space however
+ if ($line=~/^.\s+[A-Za-z\d_]+:(?![0-9]+)/ and
+ !($line=~/^. [A-Za-z\d_]+:/) and !($line=~/^.\s+default:/)) {
+ if (WARN("INDENTED_LABEL",
+ "labels should not be indented\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/^(.)\s+/$1/;
+ }
+ }
+
+# return is not a function
+ if (defined($stat) && $stat =~ /^.\s*return(\s*)\(/s) {
+ my $spacing = $1;
+ if ($^V && $^V ge 5.10.0 &&
+ $stat =~ /^.\s*return\s*($balanced_parens)\s*;\s*$/) {
+ my $value = $1;
+ $value = deparenthesize($value);
+ if ($value =~ m/^\s*$FuncArg\s*(?:\?|$)/) {
+ ERROR("RETURN_PARENTHESES",
+ "return is not a function, parentheses are not required\n" . $herecurr);
+ }
+ } elsif ($spacing !~ /\s+/) {
+ ERROR("SPACING",
+ "space required before the open parenthesis '('\n" . $herecurr);
+ }
+ }
+
+# unnecessary return in a void function
+# at end-of-function, with the previous line a single leading tab, then return;
+# and the line before that not a goto label target like "out:"
+ if ($sline =~ /^[ \+]}\s*$/ &&
+ $prevline =~ /^\+\treturn\s*;\s*$/ &&
+ $linenr >= 3 &&
+ $lines[$linenr - 3] =~ /^[ +]/ &&
+ $lines[$linenr - 3] !~ /^[ +]\s*$Ident\s*:/) {
+ WARN("RETURN_VOID",
+ "void function return statements are not generally useful\n" . $hereprev);
+ }
+
+# if statements using unnecessary parentheses - ie: if ((foo == bar))
+ if ($^V && $^V ge 5.10.0 &&
+ $line =~ /\bif\s*((?:\(\s*){2,})/) {
+ my $openparens = $1;
+ my $count = $openparens =~ tr@\(@\(@;
+ my $msg = "";
+ if ($line =~ /\bif\s*(?:\(\s*){$count,$count}$LvalOrFunc\s*($Compare)\s*$LvalOrFunc(?:\s*\)){$count,$count}/) {
+ my $comp = $4; #Not $1 because of $LvalOrFunc
+ $msg = " - maybe == should be = ?" if ($comp eq "==");
+ WARN("UNNECESSARY_PARENTHESES",
+ "Unnecessary parentheses$msg\n" . $herecurr);
+ }
+ }
+
+# Return of what appears to be an errno should normally be -'ve
+ if ($line =~ /^.\s*return\s*(E[A-Z]*)\s*;/) {
+ my $name = $1;
+ if ($name ne 'EOF' && $name ne 'ERROR') {
+ WARN("USE_NEGATIVE_ERRNO",
+ "return of an errno should typically be -ve (return -$1)\n" . $herecurr);
+ }
+ }
+
+# Need a space before open parenthesis after if, while etc
+ if ($line =~ /\b(if|while|for|switch)\(/) {
+ if (ERROR("SPACING",
+ "space required before the open parenthesis '('\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/\b(if|while|for|switch)\(/$1 \(/;
+ }
+ }
+
+# Check for illegal assignment in if conditional -- and check for trailing
+# statements after the conditional.
+ if ($line =~ /do\s*(?!{)/) {
+ ($stat, $cond, $line_nr_next, $remain_next, $off_next) =
+ ctx_statement_block($linenr, $realcnt, 0)
+ if (!defined $stat);
+ my ($stat_next) = ctx_statement_block($line_nr_next,
+ $remain_next, $off_next);
+ $stat_next =~ s/\n./\n /g;
+ ##print "stat<$stat> stat_next<$stat_next>\n";
+
+ if ($stat_next =~ /^\s*while\b/) {
+ # If the statement carries leading newlines,
+ # then count those as offsets.
+ my ($whitespace) =
+ ($stat_next =~ /^((?:\s*\n[+-])*\s*)/s);
+ my $offset =
+ statement_rawlines($whitespace) - 1;
+
+ $suppress_whiletrailers{$line_nr_next +
+ $offset} = 1;
+ }
+ }
+ if (!defined $suppress_whiletrailers{$linenr} &&
+ defined($stat) && defined($cond) &&
+ $line =~ /\b(?:if|while|for)\s*\(/ && $line !~ /^.\s*#/) {
+ my ($s, $c) = ($stat, $cond);
+
+ if ($c =~ /\bif\s*\(.*[^<>!=]=[^=].*/s) {
+ ERROR("ASSIGN_IN_IF",
+ "do not use assignment in if condition\n" . $herecurr);
+ }
+
+ # Find out what is on the end of the line after the
+ # conditional.
+ substr($s, 0, length($c), '');
+ $s =~ s/\n.*//g;
+ $s =~ s/$;//g; # Remove any comments
+ if (length($c) && $s !~ /^\s*{?\s*\\*\s*$/ &&
+ $c !~ /}\s*while\s*/)
+ {
+ # Find out how long the conditional actually is.
+ my @newlines = ($c =~ /\n/gs);
+ my $cond_lines = 1 + $#newlines;
+ my $stat_real = '';
+
+ $stat_real = raw_line($linenr, $cond_lines)
+ . "\n" if ($cond_lines);
+ if (defined($stat_real) && $cond_lines > 1) {
+ $stat_real = "[...]\n$stat_real";
+ }
+
+ ERROR("TRAILING_STATEMENTS",
+ "trailing statements should be on next line\n" . $herecurr . $stat_real);
+ }
+ }
+
+# Check for bitwise tests written as boolean
+ if ($line =~ /
+ (?:
+ (?:\[|\(|\&\&|\|\|)
+ \s*0[xX][0-9]+\s*
+ (?:\&\&|\|\|)
+ |
+ (?:\&\&|\|\|)
+ \s*0[xX][0-9]+\s*
+ (?:\&\&|\|\||\)|\])
+ )/x)
+ {
+ WARN("HEXADECIMAL_BOOLEAN_TEST",
+ "boolean test with hexadecimal, perhaps just 1 \& or \|?\n" . $herecurr);
+ }
+
+# if and else should not have general statements after it
+ if ($line =~ /^.\s*(?:}\s*)?else\b(.*)/) {
+ my $s = $1;
+ $s =~ s/$;//g; # Remove any comments
+ if ($s !~ /^\s*(?:\sif|(?:{|)\s*\\?\s*$)/) {
+ ERROR("TRAILING_STATEMENTS",
+ "trailing statements should be on next line\n" . $herecurr);
+ }
+ }
+# if should not continue a brace
+ if ($line =~ /}\s*if\b/) {
+ ERROR("TRAILING_STATEMENTS",
+ "trailing statements should be on next line\n" .
+ $herecurr);
+ }
+# case and default should not have general statements after them
+ if ($line =~ /^.\s*(?:case\s*.*|default\s*):/g &&
+ $line !~ /\G(?:
+ (?:\s*$;*)(?:\s*{)?(?:\s*$;*)(?:\s*\\)?\s*$|
+ \s*return\s+
+ )/xg)
+ {
+ ERROR("TRAILING_STATEMENTS",
+ "trailing statements should be on next line\n" . $herecurr);
+ }
+
+ # Check for }<nl>else {, these must be at the same
+ # indent level to be relevant to each other.
+ if ($prevline=~/}\s*$/ and $line=~/^.\s*else\s*/ and
+ $previndent == $indent) {
+ ERROR("ELSE_AFTER_BRACE",
+ "else should follow close brace '}'\n" . $hereprev);
+ }
+
+ if ($prevline=~/}\s*$/ and $line=~/^.\s*while\s*/ and
+ $previndent == $indent) {
+ my ($s, $c) = ctx_statement_block($linenr, $realcnt, 0);
+
+ # Find out what is on the end of the line after the
+ # conditional.
+ substr($s, 0, length($c), '');
+ $s =~ s/\n.*//g;
+
+ if ($s =~ /^\s*;/) {
+ ERROR("WHILE_AFTER_BRACE",
+ "while should follow close brace '}'\n" . $hereprev);
+ }
+ }
+
+#Specific variable tests
+ while ($line =~ m{($Constant|$Lval)}g) {
+ my $var = $1;
+
+#gcc binary extension
+ if ($var =~ /^$Binary$/) {
+ if (WARN("GCC_BINARY_CONSTANT",
+ "Avoid gcc v4.3+ binary constant extension: <$var>\n" . $herecurr) &&
+ $fix) {
+ my $hexval = sprintf("0x%x", oct($var));
+ $fixed[$linenr - 1] =~
+ s/\b$var\b/$hexval/;
+ }
+ }
+
+#CamelCase
+ if ($var !~ /^$Constant$/ &&
+ $var =~ /[A-Z][a-z]|[a-z][A-Z]/ &&
+#Ignore Page<foo> variants
+ $var !~ /^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ &&
+#Ignore SI style variants like nS, mV and dB (ie: max_uV, regulator_min_uA_show)
+ $var !~ /^(?:[a-z_]*?)_?[a-z][A-Z](?:_[a-z_]+)?$/) {
+ while ($var =~ m{($Ident)}g) {
+ my $word = $1;
+ next if ($word !~ /[A-Z][a-z]|[a-z][A-Z]/);
+ if ($check) {
+ seed_camelcase_includes();
+ if (!$file && !$camelcase_file_seeded) {
+ seed_camelcase_file($realfile);
+ $camelcase_file_seeded = 1;
+ }
+ }
+ if (!defined $camelcase{$word}) {
+ $camelcase{$word} = 1;
+ CHK("CAMELCASE",
+ "Avoid CamelCase: <$word>\n" . $herecurr);
+ }
+ }
+ }
+ }
+
+#no spaces allowed after \ in define
+ if ($line =~ /\#\s*define.*\\\s+$/) {
+ if (WARN("WHITESPACE_AFTER_LINE_CONTINUATION",
+ "Whitespace after \\ makes next lines useless\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/\s+$//;
+ }
+ }
+
+#warn if <asm/foo.h> is #included and <linux/foo.h> is available (uses RAW line)
+ if ($tree && $rawline =~ m{^.\s*\#\s*include\s*\<asm\/(.*)\.h\>}) {
+ my $file = "$1.h";
+ my $checkfile = "include/linux/$file";
+ if (-f "$root/$checkfile" &&
+ $realfile ne $checkfile &&
+ $1 !~ /$allowed_asm_includes/)
+ {
+ if ($realfile =~ m{^arch/}) {
+ CHK("ARCH_INCLUDE_LINUX",
+ "Consider using #include <linux/$file> instead of <asm/$file>\n" . $herecurr);
+ } else {
+ WARN("INCLUDE_LINUX",
+ "Use #include <linux/$file> instead of <asm/$file>\n" . $herecurr);
+ }
+ }
+ }
+
+# multi-statement macros should be enclosed in a do while loop, grab the
+# first statement and ensure its the whole macro if its not enclosed
+# in a known good container
+ if ($realfile !~ m@/vmlinux.lds.h$@ &&
+ $line =~ /^.\s*\#\s*define\s*$Ident(\()?/) {
+ my $ln = $linenr;
+ my $cnt = $realcnt;
+ my ($off, $dstat, $dcond, $rest);
+ my $ctx = '';
+ ($dstat, $dcond, $ln, $cnt, $off) =
+ ctx_statement_block($linenr, $realcnt, 0);
+ $ctx = $dstat;
+ #print "dstat<$dstat> dcond<$dcond> cnt<$cnt> off<$off>\n";
+ #print "LINE<$lines[$ln-1]> len<" . length($lines[$ln-1]) . "\n";
+
+ $dstat =~ s/^.\s*\#\s*define\s+$Ident(?:\([^\)]*\))?\s*//;
+ $dstat =~ s/$;//g;
+ $dstat =~ s/\\\n.//g;
+ $dstat =~ s/^\s*//s;
+ $dstat =~ s/\s*$//s;
+
+ # Flatten any parentheses and braces
+ while ($dstat =~ s/\([^\(\)]*\)/1/ ||
+ $dstat =~ s/\{[^\{\}]*\}/1/ ||
+ $dstat =~ s/\[[^\[\]]*\]/1/)
+ {
+ }
+
+ # Flatten any obvious string concatentation.
+ while ($dstat =~ s/("X*")\s*$Ident/$1/ ||
+ $dstat =~ s/$Ident\s*("X*")/$1/)
+ {
+ }
+
+ my $exceptions = qr{
+ $Declare|
+ module_param_named|
+ MODULE_PARM_DESC|
+ DECLARE_PER_CPU|
+ DEFINE_PER_CPU|
+ __typeof__\(|
+ union|
+ struct|
+ \.$Ident\s*=\s*|
+ ^\"|\"$
+ }x;
+ #print "REST<$rest> dstat<$dstat> ctx<$ctx>\n";
+ if ($dstat ne '' &&
+ $dstat !~ /^(?:$Ident|-?$Constant),$/ && # 10, // foo(),
+ $dstat !~ /^(?:$Ident|-?$Constant);$/ && # foo();
+ $dstat !~ /^[!~-]?(?:$Lval|$Constant)$/ && # 10 // foo() // !foo // ~foo // -foo // foo->bar // foo.bar->baz
+ $dstat !~ /^'X'$/ && # character constants
+ $dstat !~ /$exceptions/ &&
+ $dstat !~ /^\.$Ident\s*=/ && # .foo =
+ $dstat !~ /^(?:\#\s*$Ident|\#\s*$Constant)\s*$/ && # stringification #foo
+ $dstat !~ /^do\s*$Constant\s*while\s*$Constant;?$/ && # do {...} while (...); // do {...} while (...)
+ $dstat !~ /^for\s*$Constant$/ && # for (...)
+ $dstat !~ /^for\s*$Constant\s+(?:$Ident|-?$Constant)$/ && # for (...) bar()
+ $dstat !~ /^do\s*{/ && # do {...
+ $dstat !~ /^\(\{/ && # ({...
+ $ctx !~ /^.\s*#\s*define\s+TRACE_(?:SYSTEM|INCLUDE_FILE|INCLUDE_PATH)\b/)
+ {
+ $ctx =~ s/\n*$//;
+ my $herectx = $here . "\n";
+ my $cnt = statement_rawlines($ctx);
+
+ for (my $n = 0; $n < $cnt; $n++) {
+ $herectx .= raw_line($linenr, $n) . "\n";
+ }
+
+ if ($dstat =~ /;/) {
+ ERROR("MULTISTATEMENT_MACRO_USE_DO_WHILE",
+ "Macros with multiple statements should be enclosed in a do - while loop\n" . "$herectx");
+ } else {
+ ERROR("COMPLEX_MACRO",
+ "Macros with complex values should be enclosed in parenthesis\n" . "$herectx");
+ }
+ }
+
+# check for line continuations outside of #defines, preprocessor #, and asm
+
+ } else {
+ if ($prevline !~ /^..*\\$/ &&
+ $line !~ /^\+\s*\#.*\\$/ && # preprocessor
+ $line !~ /^\+.*\b(__asm__|asm)\b.*\\$/ && # asm
+ $line =~ /^\+.*\\$/) {
+ WARN("LINE_CONTINUATIONS",
+ "Avoid unnecessary line continuations\n" . $herecurr);
+ }
+ }
+
+# do {} while (0) macro tests:
+# single-statement macros do not need to be enclosed in do while (0) loop,
+# macro should not end with a semicolon
+ if ($^V && $^V ge 5.10.0 &&
+ $realfile !~ m@/vmlinux.lds.h$@ &&
+ $line =~ /^.\s*\#\s*define\s+$Ident(\()?/) {
+ my $ln = $linenr;
+ my $cnt = $realcnt;
+ my ($off, $dstat, $dcond, $rest);
+ my $ctx = '';
+ ($dstat, $dcond, $ln, $cnt, $off) =
+ ctx_statement_block($linenr, $realcnt, 0);
+ $ctx = $dstat;
+
+ $dstat =~ s/\\\n.//g;
+
+ if ($dstat =~ /^\+\s*#\s*define\s+$Ident\s*${balanced_parens}\s*do\s*{(.*)\s*}\s*while\s*\(\s*0\s*\)\s*([;\s]*)\s*$/) {
+ my $stmts = $2;
+ my $semis = $3;
+
+ $ctx =~ s/\n*$//;
+ my $cnt = statement_rawlines($ctx);
+ my $herectx = $here . "\n";
+
+ for (my $n = 0; $n < $cnt; $n++) {
+ $herectx .= raw_line($linenr, $n) . "\n";
+ }
+
+ if (($stmts =~ tr/;/;/) == 1 &&
+ $stmts !~ /^\s*(if|while|for|switch)\b/) {
+ WARN("SINGLE_STATEMENT_DO_WHILE_MACRO",
+ "Single statement macros should not use a do {} while (0) loop\n" . "$herectx");
+ }
+ if (defined $semis && $semis ne "") {
+ WARN("DO_WHILE_MACRO_WITH_TRAILING_SEMICOLON",
+ "do {} while (0) macros should not be semicolon terminated\n" . "$herectx");
+ }
+ } elsif ($dstat =~ /^\+\s*#\s*define\s+$Ident.*;\s*$/) {
+ $ctx =~ s/\n*$//;
+ my $cnt = statement_rawlines($ctx);
+ my $herectx = $here . "\n";
+
+ for (my $n = 0; $n < $cnt; $n++) {
+ $herectx .= raw_line($linenr, $n) . "\n";
+ }
+
+ WARN("TRAILING_SEMICOLON",
+ "macros should not use a trailing semicolon\n" . "$herectx");
+ }
+ }
+
+# make sure symbols are always wrapped with VMLINUX_SYMBOL() ...
+# all assignments may have only one of the following with an assignment:
+# .
+# ALIGN(...)
+# VMLINUX_SYMBOL(...)
+ if ($realfile eq 'vmlinux.lds.h' && $line =~ /(?:(?:^|\s)$Ident\s*=|=\s*$Ident(?:\s|$))/) {
+ WARN("MISSING_VMLINUX_SYMBOL",
+ "vmlinux.lds.h needs VMLINUX_SYMBOL() around C-visible symbols\n" . $herecurr);
+ }
+
+# check for redundant bracing round if etc
+ if ($line =~ /(^.*)\bif\b/ && $1 !~ /else\s*$/) {
+ my ($level, $endln, @chunks) =
+ ctx_statement_full($linenr, $realcnt, 1);
+ #print "chunks<$#chunks> linenr<$linenr> endln<$endln> level<$level>\n";
+ #print "APW: <<$chunks[1][0]>><<$chunks[1][1]>>\n";
+ if ($#chunks > 0 && $level == 0) {
+ my @allowed = ();
+ my $allow = 0;
+ my $seen = 0;
+ my $herectx = $here . "\n";
+ my $ln = $linenr - 1;
+ for my $chunk (@chunks) {
+ my ($cond, $block) = @{$chunk};
+
+ # If the condition carries leading newlines, then count those as offsets.
+ my ($whitespace) = ($cond =~ /^((?:\s*\n[+-])*\s*)/s);
+ my $offset = statement_rawlines($whitespace) - 1;
+
+ $allowed[$allow] = 0;
+ #print "COND<$cond> whitespace<$whitespace> offset<$offset>\n";
+
+ # We have looked at and allowed this specific line.
+ $suppress_ifbraces{$ln + $offset} = 1;
+
+ $herectx .= "$rawlines[$ln + $offset]\n[...]\n";
+ $ln += statement_rawlines($block) - 1;
+
+ substr($block, 0, length($cond), '');
+
+ $seen++ if ($block =~ /^\s*{/);
+
+ #print "cond<$cond> block<$block> allowed<$allowed[$allow]>\n";
+ if (statement_lines($cond) > 1) {
+ #print "APW: ALLOWED: cond<$cond>\n";
+ $allowed[$allow] = 1;
+ }
+ if ($block =~/\b(?:if|for|while)\b/) {
+ #print "APW: ALLOWED: block<$block>\n";
+ $allowed[$allow] = 1;
+ }
+ if (statement_block_size($block) > 1) {
+ #print "APW: ALLOWED: lines block<$block>\n";
+ $allowed[$allow] = 1;
+ }
+ $allow++;
+ }
+ if ($seen) {
+ my $sum_allowed = 0;
+ foreach (@allowed) {
+ $sum_allowed += $_;
+ }
+ if ($sum_allowed != 0 && $sum_allowed != $allow
+ && $seen != $allow) {
+ CHK("BRACES",
+ "braces {} should be used on all arms of this statement\n" . $herectx);
+ }
+ }
+ }
+ }
+ if (!defined $suppress_ifbraces{$linenr - 1} &&
+ $line =~ /\b(if|while|for|else)\b/) {
+ my $allowed = 0;
+
+ # Check the pre-context.
+ if (substr($line, 0, $-[0]) =~ /(\}\s*)$/) {
+ #print "APW: ALLOWED: pre<$1>\n";
+ $allowed = 1;
+ }
+
+ my ($level, $endln, @chunks) =
+ ctx_statement_full($linenr, $realcnt, $-[0]);
+
+ # Check the condition.
+ my ($cond, $block) = @{$chunks[0]};
+ #print "CHECKING<$linenr> cond<$cond> block<$block>\n";
+ if (defined $cond) {
+ substr($block, 0, length($cond), '');
+ }
+ if (statement_lines($cond) > 1) {
+ #print "APW: ALLOWED: cond<$cond>\n";
+ $allowed = 1;
+ }
+ if ($block =~/\b(?:if|for|while)\b/) {
+ #print "APW: ALLOWED: block<$block>\n";
+ $allowed = 1;
+ }
+ if (statement_block_size($block) > 1) {
+ #print "APW: ALLOWED: lines block<$block>\n";
+ $allowed = 1;
+ }
+ # Check the post-context.
+ if (defined $chunks[1]) {
+ my ($cond, $block) = @{$chunks[1]};
+ if (defined $cond) {
+ substr($block, 0, length($cond), '');
+ }
+ if ($block =~ /^\s*\{/) {
+ #print "APW: ALLOWED: chunk-1 block<$block>\n";
+ $allowed = 1;
+ }
+ }
+ }
+
+# check for unnecessary blank lines around braces
+ if (($line =~ /^.\s*}\s*$/ && $prevrawline =~ /^.\s*$/)) {
+ CHK("BRACES",
+ "Blank lines aren't necessary before a close brace '}'\n" . $hereprev);
+ }
+ if (($rawline =~ /^.\s*$/ && $prevline =~ /^..*{\s*$/)) {
+ CHK("BRACES",
+ "Blank lines aren't necessary after an open brace '{'\n" . $hereprev);
+ }
+
+# no volatiles please
+ my $asm_volatile = qr{\b(__asm__|asm)\s+(__volatile__|volatile)\b};
+ if ($line =~ /\bvolatile\b/ && $line !~ /$asm_volatile/) {
+ WARN("VOLATILE",
+ "Use of volatile is usually wrong: see Documentation/volatile-considered-harmful.txt\n" . $herecurr);
+ }
+
+# warn about #if 0
+ if ($line =~ /^.\s*\#\s*if\s+0\b/) {
+ CHK("REDUNDANT_CODE",
+ "if this code is redundant consider removing it\n" .
+ $herecurr);
+ }
+
+# check for needless "if (<foo>) fn(<foo>)" uses
+ if ($prevline =~ /\bif\s*\(\s*($Lval)\s*\)/) {
+ my $expr = '\s*\(\s*' . quotemeta($1) . '\s*\)\s*;';
+ if ($line =~ /\b(kfree|usb_free_urb|debugfs_remove(?:_recursive)?)$expr/) {
+ WARN('NEEDLESS_IF',
+ "$1(NULL) is safe this check is probably not required\n" . $hereprev);
+ }
+ }
+
+# check for bad placement of section $InitAttribute (e.g.: __initdata)
+ if ($line =~ /(\b$InitAttribute\b)/) {
+ my $attr = $1;
+ if ($line =~ /^\+\s*static\s+(?:const\s+)?(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*[=;]/) {
+ my $ptr = $1;
+ my $var = $2;
+ if ((($ptr =~ /\b(union|struct)\s+$attr\b/ &&
+ ERROR("MISPLACED_INIT",
+ "$attr should be placed after $var\n" . $herecurr)) ||
+ ($ptr !~ /\b(union|struct)\s+$attr\b/ &&
+ WARN("MISPLACED_INIT",
+ "$attr should be placed after $var\n" . $herecurr))) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e;
+ }
+ }
+ }
+
+# check for $InitAttributeData (ie: __initdata) with const
+ if ($line =~ /\bconst\b/ && $line =~ /($InitAttributeData)/) {
+ my $attr = $1;
+ $attr =~ /($InitAttributePrefix)(.*)/;
+ my $attr_prefix = $1;
+ my $attr_type = $2;
+ if (ERROR("INIT_ATTRIBUTE",
+ "Use of const init definition must use ${attr_prefix}initconst\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/$InitAttributeData/${attr_prefix}initconst/;
+ }
+ }
+
+# check for $InitAttributeConst (ie: __initconst) without const
+ if ($line !~ /\bconst\b/ && $line =~ /($InitAttributeConst)/) {
+ my $attr = $1;
+ if (ERROR("INIT_ATTRIBUTE",
+ "Use of $attr requires a separate use of const\n" . $herecurr) &&
+ $fix) {
+ my $lead = $fixed[$linenr - 1] =~
+ /(^\+\s*(?:static\s+))/;
+ $lead = rtrim($1);
+ $lead = "$lead " if ($lead !~ /^\+$/);
+ $lead = "${lead}const ";
+ $fixed[$linenr - 1] =~ s/(^\+\s*(?:static\s+))/$lead/;
+ }
+ }
+
+# don't use __constant_<foo> functions outside of include/uapi/
+ if ($realfile !~ m@^include/uapi/@ &&
+ $line =~ /(__constant_(?:htons|ntohs|[bl]e(?:16|32|64)_to_cpu|cpu_to_[bl]e(?:16|32|64)))\s*\(/) {
+ my $constant_func = $1;
+ my $func = $constant_func;
+ $func =~ s/^__constant_//;
+ if (WARN("CONSTANT_CONVERSION",
+ "$constant_func should be $func\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/\b$constant_func\b/$func/g;
+ }
+ }
+
+# prefer usleep_range over udelay
+ if ($line =~ /\budelay\s*\(\s*(\d+)\s*\)/) {
+ my $delay = $1;
+ # ignore udelay's < 10, however
+ if (! ($delay < 10) ) {
+ CHK("USLEEP_RANGE",
+ "usleep_range is preferred over udelay; see Documentation/timers/timers-howto.txt\n" . $herecurr);
+ }
+ if ($delay > 2000) {
+ WARN("LONG_UDELAY",
+ "long udelay - prefer mdelay; see arch/arm/include/asm/delay.h\n" . $herecurr);
+ }
+ }
+
+# warn about unexpectedly long msleep's
+ if ($line =~ /\bmsleep\s*\((\d+)\);/) {
+ if ($1 < 20) {
+ WARN("MSLEEP",
+ "msleep < 20ms can sleep for up to 20ms; see Documentation/timers/timers-howto.txt\n" . $herecurr);
+ }
+ }
+
+# check for comparisons of jiffies
+ if ($line =~ /\bjiffies\s*$Compare|$Compare\s*jiffies\b/) {
+ WARN("JIFFIES_COMPARISON",
+ "Comparing jiffies is almost always wrong; prefer time_after, time_before and friends\n" . $herecurr);
+ }
+
+# check for comparisons of get_jiffies_64()
+ if ($line =~ /\bget_jiffies_64\s*\(\s*\)\s*$Compare|$Compare\s*get_jiffies_64\s*\(\s*\)/) {
+ WARN("JIFFIES_COMPARISON",
+ "Comparing get_jiffies_64() is almost always wrong; prefer time_after64, time_before64 and friends\n" . $herecurr);
+ }
+
+# warn about spacing in #ifdefs
+ if ($line =~ /^.\s*\#\s*(ifdef|ifndef|elif)\s\s+/) {
+ if (ERROR("SPACING",
+ "exactly one space required after that #$1\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~
+ s/^(.\s*\#\s*(ifdef|ifndef|elif))\s{2,}/$1 /;
+ }
+
+ }
+
+# check for spinlock_t definitions without a comment.
+ if ($line =~ /^.\s*(struct\s+mutex|spinlock_t)\s+\S+;/ ||
+ $line =~ /^.\s*(DEFINE_MUTEX)\s*\(/) {
+ my $which = $1;
+ if (!ctx_has_comment($first_line, $linenr)) {
+ CHK("UNCOMMENTED_DEFINITION",
+ "$1 definition without comment\n" . $herecurr);
+ }
+ }
+# check for memory barriers without a comment.
+ if ($line =~ /\b(mb|rmb|wmb|read_barrier_depends|smp_mb|smp_rmb|smp_wmb|smp_read_barrier_depends)\(/) {
+ if (!ctx_has_comment($first_line, $linenr)) {
+ WARN("MEMORY_BARRIER",
+ "memory barrier without comment\n" . $herecurr);
+ }
+ }
+# check of hardware specific defines
+ if ($line =~ m@^.\s*\#\s*if.*\b(__i386__|__powerpc64__|__sun__|__s390x__)\b@ && $realfile !~ m@include/asm-@) {
+ CHK("ARCH_DEFINES",
+ "architecture specific defines should be avoided\n" . $herecurr);
+ }
+
+# Check that the storage class is at the beginning of a declaration
+ if ($line =~ /\b$Storage\b/ && $line !~ /^.\s*$Storage\b/) {
+ WARN("STORAGE_CLASS",
+ "storage class should be at the beginning of the declaration\n" . $herecurr)
+ }
+
+# check the location of the inline attribute, that it is between
+# storage class and type.
+ if ($line =~ /\b$Type\s+$Inline\b/ ||
+ $line =~ /\b$Inline\s+$Storage\b/) {
+ ERROR("INLINE_LOCATION",
+ "inline keyword should sit between storage class and type\n" . $herecurr);
+ }
+
+# Check for __inline__ and __inline, prefer inline
+ if ($realfile !~ m@\binclude/uapi/@ &&
+ $line =~ /\b(__inline__|__inline)\b/) {
+ if (WARN("INLINE",
+ "plain inline is preferred over $1\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/\b(__inline__|__inline)\b/inline/;
+
+ }
+ }
+
+# Check for __attribute__ packed, prefer __packed
+ if ($realfile !~ m@\binclude/uapi/@ &&
+ $line =~ /\b__attribute__\s*\(\s*\(.*\bpacked\b/) {
+ WARN("PREFER_PACKED",
+ "__packed is preferred over __attribute__((packed))\n" . $herecurr);
+ }
+
+# Check for __attribute__ aligned, prefer __aligned
+ if ($realfile !~ m@\binclude/uapi/@ &&
+ $line =~ /\b__attribute__\s*\(\s*\(.*aligned/) {
+ WARN("PREFER_ALIGNED",
+ "__aligned(size) is preferred over __attribute__((aligned(size)))\n" . $herecurr);
+ }
+
+# Check for __attribute__ format(printf, prefer __printf
+ if ($realfile !~ m@\binclude/uapi/@ &&
+ $line =~ /\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf/) {
+ if (WARN("PREFER_PRINTF",
+ "__printf(string-index, first-to-check) is preferred over __attribute__((format(printf, string-index, first-to-check)))\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)\)\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex;
+
+ }
+ }
+
+# Check for __attribute__ format(scanf, prefer __scanf
+ if ($realfile !~ m@\binclude/uapi/@ &&
+ $line =~ /\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\b/) {
+ if (WARN("PREFER_SCANF",
+ "__scanf(string-index, first-to-check) is preferred over __attribute__((format(scanf, string-index, first-to-check)))\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)\)\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex;
+ }
+ }
+
+# check for sizeof(&)
+ if ($line =~ /\bsizeof\s*\(\s*\&/) {
+ WARN("SIZEOF_ADDRESS",
+ "sizeof(& should be avoided\n" . $herecurr);
+ }
+
+# check for sizeof without parenthesis
+ if ($line =~ /\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/) {
+ if (WARN("SIZEOF_PARENTHESIS",
+ "sizeof $1 should be sizeof($1)\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex;
+ }
+ }
+
+# check for line continuations in quoted strings with odd counts of "
+ if ($rawline =~ /\\$/ && $rawline =~ tr/"/"/ % 2) {
+ WARN("LINE_CONTINUATIONS",
+ "Avoid line continuations in quoted strings\n" . $herecurr);
+ }
+
+# check for struct spinlock declarations
+ if ($line =~ /^.\s*\bstruct\s+spinlock\s+\w+\s*;/) {
+ WARN("USE_SPINLOCK_T",
+ "struct spinlock should be spinlock_t\n" . $herecurr);
+ }
+
+# check for seq_printf uses that could be seq_puts
+ if ($sline =~ /\bseq_printf\s*\(.*"\s*\)\s*;\s*$/) {
+ my $fmt = get_quoted_string($line, $rawline);
+ if ($fmt ne "" && $fmt !~ /[^\\]\%/) {
+ if (WARN("PREFER_SEQ_PUTS",
+ "Prefer seq_puts to seq_printf\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/\bseq_printf\b/seq_puts/;
+ }
+ }
+ }
+
+# Check for misused memsets
+ if ($^V && $^V ge 5.10.0 &&
+ defined $stat &&
+ $stat =~ /^\+(?:.*?)\bmemset\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\,\s*$FuncArg\s*\)/s) {
+
+ my $ms_addr = $2;
+ my $ms_val = $7;
+ my $ms_size = $12;
+
+ if ($ms_size =~ /^(0x|)0$/i) {
+ ERROR("MEMSET",
+ "memset to 0's uses 0 as the 2nd argument, not the 3rd\n" . "$here\n$stat\n");
+ } elsif ($ms_size =~ /^(0x|)1$/i) {
+ WARN("MEMSET",
+ "single byte memset is suspicious. Swapped 2nd/3rd argument?\n" . "$here\n$stat\n");
+ }
+ }
+
+# typecasts on min/max could be min_t/max_t
+ if ($^V && $^V ge 5.10.0 &&
+ defined $stat &&
+ $stat =~ /^\+(?:.*?)\b(min|max)\s*\(\s*$FuncArg\s*,\s*$FuncArg\s*\)/) {
+ if (defined $2 || defined $7) {
+ my $call = $1;
+ my $cast1 = deparenthesize($2);
+ my $arg1 = $3;
+ my $cast2 = deparenthesize($7);
+ my $arg2 = $8;
+ my $cast;
+
+ if ($cast1 ne "" && $cast2 ne "" && $cast1 ne $cast2) {
+ $cast = "$cast1 or $cast2";
+ } elsif ($cast1 ne "") {
+ $cast = $cast1;
+ } else {
+ $cast = $cast2;
+ }
+ WARN("MINMAX",
+ "$call() should probably be ${call}_t($cast, $arg1, $arg2)\n" . "$here\n$stat\n");
+ }
+ }
+
+# check usleep_range arguments
+ if ($^V && $^V ge 5.10.0 &&
+ defined $stat &&
+ $stat =~ /^\+(?:.*?)\busleep_range\s*\(\s*($FuncArg)\s*,\s*($FuncArg)\s*\)/) {
+ my $min = $1;
+ my $max = $7;
+ if ($min eq $max) {
+ WARN("USLEEP_RANGE",
+ "usleep_range should not use min == max args; see Documentation/timers/timers-howto.txt\n" . "$here\n$stat\n");
+ } elsif ($min =~ /^\d+$/ && $max =~ /^\d+$/ &&
+ $min > $max) {
+ WARN("USLEEP_RANGE",
+ "usleep_range args reversed, use min then max; see Documentation/timers/timers-howto.txt\n" . "$here\n$stat\n");
+ }
+ }
+
+# check for naked sscanf
+ if ($^V && $^V ge 5.10.0 &&
+ defined $stat &&
+ $line =~ /\bsscanf\b/ &&
+ ($stat !~ /$Ident\s*=\s*sscanf\s*$balanced_parens/ &&
+ $stat !~ /\bsscanf\s*$balanced_parens\s*(?:$Compare)/ &&
+ $stat !~ /(?:$Compare)\s*\bsscanf\s*$balanced_parens/)) {
+ my $lc = $stat =~ tr@\n@@;
+ $lc = $lc + $linenr;
+ my $stat_real = raw_line($linenr, 0);
+ for (my $count = $linenr + 1; $count <= $lc; $count++) {
+ $stat_real = $stat_real . "\n" . raw_line($count, 0);
+ }
+ WARN("NAKED_SSCANF",
+ "unchecked sscanf return value\n" . "$here\n$stat_real\n");
+ }
+
+# check for simple sscanf that should be kstrto<foo>
+ if ($^V && $^V ge 5.10.0 &&
+ defined $stat &&
+ $line =~ /\bsscanf\b/) {
+ my $lc = $stat =~ tr@\n@@;
+ $lc = $lc + $linenr;
+ my $stat_real = raw_line($linenr, 0);
+ for (my $count = $linenr + 1; $count <= $lc; $count++) {
+ $stat_real = $stat_real . "\n" . raw_line($count, 0);
+ }
+ if ($stat_real =~ /\bsscanf\b\s*\(\s*$FuncArg\s*,\s*("[^"]+")/) {
+ my $format = $6;
+ my $count = $format =~ tr@%@%@;
+ if ($count == 1 &&
+ $format =~ /^"\%(?i:ll[udxi]|[udxi]ll|ll|[hl]h?[udxi]|[udxi][hl]h?|[hl]h?|[udxi])"$/) {
+ WARN("SSCANF_TO_KSTRTO",
+ "Prefer kstrto<type> to single variable sscanf\n" . "$here\n$stat_real\n");
+ }
+ }
+ }
+
+# check for new externs in .h files.
+ if ($realfile =~ /\.h$/ &&
+ $line =~ /^\+\s*(extern\s+)$Type\s*$Ident\s*\(/s) {
+ if (CHK("AVOID_EXTERNS",
+ "extern prototypes should be avoided in .h files\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
+ }
+ }
+
+# check for new externs in .c files.
+ if ($realfile =~ /\.c$/ && defined $stat &&
+ $stat =~ /^.\s*(?:extern\s+)?$Type\s+($Ident)(\s*)\(/s)
+ {
+ my $function_name = $1;
+ my $paren_space = $2;
+
+ my $s = $stat;
+ if (defined $cond) {
+ substr($s, 0, length($cond), '');
+ }
+ if ($s =~ /^\s*;/ &&
+ $function_name ne 'uninitialized_var')
+ {
+ WARN("AVOID_EXTERNS",
+ "externs should be avoided in .c files\n" . $herecurr);
+ }
+
+ if ($paren_space =~ /\n/) {
+ WARN("FUNCTION_ARGUMENTS",
+ "arguments for function declarations should follow identifier\n" . $herecurr);
+ }
+
+ } elsif ($realfile =~ /\.c$/ && defined $stat &&
+ $stat =~ /^.\s*extern\s+/)
+ {
+ WARN("AVOID_EXTERNS",
+ "externs should be avoided in .c files\n" . $herecurr);
+ }
+
+# check for pointless casting of kmalloc return
+ if ($line =~ /\*\s*\)\s*[kv][czm]alloc(_node){0,1}\b/) {
+ WARN("UNNECESSARY_CASTS",
+ "unnecessary cast may hide bugs, see http://c-faq.com/malloc/mallocnocast.html\n" . $herecurr);
+ }
+
+# alloc style
+# p = alloc(sizeof(struct foo), ...) should be p = alloc(sizeof(*p), ...)
+ if ($^V && $^V ge 5.10.0 &&
+ $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*([kv][mz]alloc(?:_node)?)\s*\(\s*(sizeof\s*\(\s*struct\s+$Lval\s*\))/) {
+ CHK("ALLOC_SIZEOF_STRUCT",
+ "Prefer $3(sizeof(*$1)...) over $3($4...)\n" . $herecurr);
+ }
+
+# check for multiple semicolons
+ if ($line =~ /;\s*;\s*$/) {
+ if (WARN("ONE_SEMICOLON",
+ "Statements terminations use 1 semicolon\n" . $herecurr) &&
+ $fix) {
+ $fixed[$linenr - 1] =~ s/(\s*;\s*){2,}$/;/g;
+ }
+ }
+
+# check for case / default statements not preceeded by break/fallthrough/switch
+ if ($line =~ /^.\s*(?:case\s+(?:$Ident|$Constant)\s*|default):/) {
+ my $has_break = 0;
+ my $has_statement = 0;
+ my $count = 0;
+ my $prevline = $linenr;
+ while ($prevline > 1 && $count < 3 && !$has_break) {
+ $prevline--;
+ my $rline = $rawlines[$prevline - 1];
+ my $fline = $lines[$prevline - 1];
+ last if ($fline =~ /^\@\@/);
+ next if ($fline =~ /^\-/);
+ next if ($fline =~ /^.(?:\s*(?:case\s+(?:$Ident|$Constant)[\s$;]*|default):[\s$;]*)*$/);
+ $has_break = 1 if ($rline =~ /fall[\s_-]*(through|thru)/i);
+ next if ($fline =~ /^.[\s$;]*$/);
+ $has_statement = 1;
+ $count++;
+ $has_break = 1 if ($fline =~ /\bswitch\b|\b(?:break\s*;[\s$;]*$|return\b|goto\b|continue\b)/);
+ }
+ if (!$has_break && $has_statement) {
+ WARN("MISSING_BREAK",
+ "Possible switch case/default not preceeded by break or fallthrough comment\n" . $herecurr);
+ }
+ }
+
+# check for switch/default statements without a break;
+ if ($^V && $^V ge 5.10.0 &&
+ defined $stat &&
+ $stat =~ /^\+[$;\s]*(?:case[$;\s]+\w+[$;\s]*:[$;\s]*|)*[$;\s]*\bdefault[$;\s]*:[$;\s]*;/g) {
+ my $ctx = '';
+ my $herectx = $here . "\n";
+ my $cnt = statement_rawlines($stat);
+ for (my $n = 0; $n < $cnt; $n++) {
+ $herectx .= raw_line($linenr, $n) . "\n";
+ }
+ WARN("DEFAULT_NO_BREAK",
+ "switch default: should use break\n" . $herectx);
+ }
+
+# check for comparisons against true and false
+ if ($line =~ /\+\s*(.*?)\b(true|false|$Lval)\s*(==|\!=)\s*(true|false|$Lval)\b(.*)$/i) {
+ my $lead = $1;
+ my $arg = $2;
+ my $test = $3;
+ my $otype = $4;
+ my $trail = $5;
+ my $op = "!";
+
+ ($arg, $otype) = ($otype, $arg) if ($arg =~ /^(?:true|false)$/i);
+
+ my $type = lc($otype);
+ if ($type =~ /^(?:true|false)$/) {
+ if (("$test" eq "==" && "$type" eq "true") ||
+ ("$test" eq "!=" && "$type" eq "false")) {
+ $op = "";
+ }
+
+ CHK("BOOL_COMPARISON",
+ "Using comparison to $otype is error prone\n" . $herecurr);
+ }
+ }
+
+# check for semaphores initialized locked
+ if ($line =~ /^.\s*sema_init.+,\W?0\W?\)/) {
+ WARN("CONSIDER_COMPLETION",
+ "consider using a completion\n" . $herecurr);
+ }
+
+# check for %L{u,d,i} in strings
+ my $string;
+ while ($line =~ /(?:^|")([X\t]*)(?:"|$)/g) {
+ $string = substr($rawline, $-[1], $+[1] - $-[1]);
+ $string =~ s/%%/__/g;
+ if ($string =~ /(?<!%)%L[udi]/) {
+ WARN("PRINTF_L",
+ "\%Ld/%Lu are not-standard C, use %lld/%llu\n" . $herecurr);
+ last;
+ }
+ }
+
+
+# Mode permission misuses where it seems decimal should be octal
+# This uses a shortcut match to avoid unnecessary uses of a slow foreach loop
+ if ($^V && $^V ge 5.10.0 &&
+ $line =~ /$mode_perms_search/) {
+ foreach my $entry (@mode_permission_funcs) {
+ my $func = $entry->[0];
+ my $arg_pos = $entry->[1];
+
+ my $skip_args = "";
+ if ($arg_pos > 1) {
+ $arg_pos--;
+ $skip_args = "(?:\\s*$FuncArg\\s*,\\s*){$arg_pos,$arg_pos}";
+ }
+ my $test = "\\b$func\\s*\\(${skip_args}([\\d]+)\\s*[,\\)]";
+ if ($line =~ /$test/) {
+ my $val = $1;
+ $val = $6 if ($skip_args ne "");
+
+ if ($val !~ /^0$/ &&
+ (($val =~ /^$Int$/ && $val !~ /^$Octal$/) ||
+ length($val) ne 4)) {
+ ERROR("NON_OCTAL_PERMISSIONS",
+ "Use 4 digit octal (0777) not decimal permissions\n" . $herecurr);
+ }
+ }
+ }
+ }
+ }
+
+ # If we have no input at all, then there is nothing to report on
+ # so just keep quiet.
+ if ($#rawlines == -1) {
+ exit(0);
+ }
+
+ # In mailback mode only produce a report in the negative, for
+ # things that appear to be patches.
+ if ($mailback && ($clean == 1 || !$is_patch)) {
+ exit(0);
+ }
+
+ # This is not a patch, and we are are in 'no-patch' mode so
+ # just keep quiet.
+ if (!$chk_patch && !$is_patch) {
+ exit(0);
+ }
+
+ if (!$is_patch) {
+ ERROR("NOT_UNIFIED_DIFF",
+ "Does not appear to be a unified-diff format patch\n");
+ }
+ if ($is_patch && $subject_trailing_dot != 0) {
+ ERROR("SUBJECT_TRAILING_DOT",
+ "The subject of the patch should not end with a dot.\n");
+ }
+ if ($is_patch && $chk_signoff && $signoff == 0) {
+ ERROR("MISSING_SIGN_OFF",
+ "Missing Signed-off-by: line(s)\n");
+ }
+
+ print report_dump();
+ if ($summary && !($clean == 1 && $quiet == 1)) {
+ print "$filename " if ($summary_file);
+ if ($cnt_error > 0) {
+ print "Patch not according to coding guidelines! please fix.\n";
+ print "total: $cnt_error errors, $cnt_warn warnings, " .
+ (($check)? "$cnt_chk checks, " : "") .
+ "$cnt_lines lines checked\n"; exit 1;
+ } else {
+ print "total: $cnt_warn warnings, " .
+ (($check)? "$cnt_chk checks, " : "") .
+ "$cnt_lines lines checked\n";
+ print "Patch found to have warnings, please fix if necessary.\n" if ($cnt_warn > 0);
+ exit 2;
+ }
+ print "\n" if ($quiet == 0);
+ }
+
+ if ($quiet == 0) {
+
+ if ($^V lt 5.10.0) {
+ print("NOTE: perl $^V is not modern enough to detect all possible issues.\n");
+ print("An upgrade to at least perl v5.10.0 is suggested.\n\n");
+ }
+
+ # If there were whitespace errors which cleanpatch can fix
+ # then suggest that.
+ if ($rpt_cleaners) {
+ print "NOTE: whitespace errors detected, you may wish to use scripts/cleanpatch or\n";
+ print " scripts/cleanfile\n\n";
+ $rpt_cleaners = 0;
+ }
+ }
+
+ hash_show_words(\%use_type, "Used");
+ hash_show_words(\%ignore_type, "Ignored");
+
+ if ($clean == 0 && $fix && "@rawlines" ne "@fixed") {
+ my $newfile = $filename;
+ $newfile .= ".EXPERIMENTAL-checkpatch-fixes" if (!$fix_inplace);
+ my $linecount = 0;
+ my $f;
+
+ open($f, '>', $newfile)
+ or die "$P: Can't open $newfile for write\n";
+ foreach my $fixed_line (@fixed) {
+ $linecount++;
+ if ($file) {
+ if ($linecount > 3) {
+ $fixed_line =~ s/^\+//;
+ print $f $fixed_line. "\n";
+ }
+ } else {
+ print $f $fixed_line . "\n";
+ }
+ }
+ close($f);
+
+ if (!$quiet) {
+ print << "EOM";
+Wrote EXPERIMENTAL --fix correction(s) to '$newfile'
+
+Do _NOT_ trust the results written to this file.
+Do _NOT_ submit these changes without inspecting them for correctness.
+
+This EXPERIMENTAL file is simply a convenience to help rewrite patches.
+No warranties, expressed or implied...
+EOM
+ }
+ }
+
+ if ($clean == 1 && $quiet == 0) {
+ print "$vname has no obvious style problems and is ready for submission.\n"
+ }
+ if ($clean == 0 && $quiet == 0) {
+ print << "EOM";
+$vname has style problems, please review.
+
+If any of these errors are false positives, please report
+them to the maintainer, see MAINTAINERS
+EOM
+ }
+
+ return $clean;
+}
diff --git a/build-aux/config.guess.dist b/build-aux/config.guess.dist
new file mode 100755
index 00000000000..881ba7a0438
--- /dev/null
+++ b/build-aux/config.guess.dist
@@ -0,0 +1,14 @@
+#!/bin/sh
+#
+# This script is intentionally left empty. Distributions that package GlusterFS
+# may want to to replace it with an updated copy from the automake project.
+#
+
+cat << EOM
+It is not expected to execute this script. When you are building from a
+released tarball (generated with 'make dist'), you are expected to pass
+--build=... and --host=... to ./configure or replace this config.guess script
+in the sources with an updated version.
+EOM
+
+exit 0
diff --git a/build-aux/config.sub.dist b/build-aux/config.sub.dist
new file mode 100755
index 00000000000..c5a0dbad282
--- /dev/null
+++ b/build-aux/config.sub.dist
@@ -0,0 +1,14 @@
+#!/bin/sh
+#
+# This script is intentionally left empty. Distributions that package GlusterFS
+# may want to to replace it with an updated copy from the automake project.
+#
+
+cat << EOM
+It is not expected to execute this script. When you are building from a
+released tarball (generated with 'make dist'), you are expected to pass
+--build=... and --host=... to ./configure or replace this config.sub script in
+the sources with an updated version.
+EOM
+
+exit 0
diff --git a/build-aux/pkg-version b/build-aux/pkg-version
new file mode 100755
index 00000000000..17ceab70c03
--- /dev/null
+++ b/build-aux/pkg-version
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+# To override version/release from git,
+# create VERSION file containing text with version/release
+# eg. v3.4.0-1
+
+# One thing to note, If one does 'git clone --depth N glusterfs.git',
+# the git describe command doesn't work. Hence you notice below that
+# we have added timestamp as version (YYYY.MM.DD) and release (HH.mmss)
+PKG_VERSION=`cat VERSION 2> /dev/null || git describe --tags --match "v[0-9]*" 2>/dev/null`
+
+get_version()
+{
+ # tags and output versions:
+ # - v3.4.0 => 3.4.0 (upstream clean)
+ # - v3.4.0-1 => 3.4.0 (downstream clean)
+ # - v3.4.0-2-g34e62f => 3.4.0 (upstream dirty)
+ # - v3.4.0-1-2-g34e62f => 3.4.0 (downstream dirty)
+ AWK_VERSION='
+ BEGIN { FS="-" }
+ /^v[0-9]/ {
+ sub(/^v/,"") ; print $1
+ }'
+
+ version=$(echo $PKG_VERSION | awk "$AWK_VERSION" | tr -cd '[:alnum:].')
+ if [ "x${version}" == "x" ] ; then
+ version=$(date +%Y.%m.%d | tr -d '\n')
+ fi
+ echo $version | tr -d '\n'
+}
+
+get_release()
+{
+ # tags and output releases:
+ # - v3.4.0 => 0 (upstream clean)
+ # - v3.4.0-1 => 1 (downstream clean)
+ # - v3.4.0-2-g34e62f1 => 2.git34e62f1 (upstream dirty)
+ # - v3.4.0-1-2-g34e62f1 => 1.2.git34e62f1 (downstream dirty)
+ AWK_RELEASE='
+ BEGIN { FS="-"; OFS="." }
+ /^v[0-9]/ {
+ if (NF == 1) print 0
+ else if (NF == 2) print $2
+ else if (NF == 3) print $2, "git" substr($3, 2)
+ else if (NF == 4) print $2, $3, "git" substr($4, 2)
+ }'
+
+ release=$(echo $PKG_VERSION | awk "$AWK_RELEASE" | tr -cd '[:alnum:].')
+ if [ "x${release}" == "x" ] ; then
+ release=$(date +%H.%M%S | tr -d '\n')
+ fi
+ echo $release | tr -d '\n'
+}
+
+if test "x$1" = "x--full"; then
+ echo -n "v$(get_version)-$(get_release)"
+elif test "x$1" = "x--version"; then
+ get_version
+elif test "x$1" = "x--release"; then
+ get_release
+else
+ echo "usage: $0 [--full|--version|--release]"
+ exit 1
+fi
diff --git a/cli/src/Makefile.am b/cli/src/Makefile.am
index 80028361886..16063f27c7f 100644
--- a/cli/src/Makefile.am
+++ b/cli/src/Makefile.am
@@ -1,26 +1,37 @@
sbin_PROGRAMS = gluster
-gluster_SOURCES = cli.c registry.c input.c cli-cmd.c cli-rl.c \
+gluster_SOURCES = cli.c registry.c input.c cli-cmd.c cli-rl.c cli-cmd-global.c \
cli-cmd-volume.c cli-cmd-peer.c cli-rpc-ops.c cli-cmd-parser.c\
- cli-cmd-system.c cli-cmd-misc.c cli-xml-output.c
+ cli-cmd-system.c cli-cmd-misc.c cli-xml-output.c cli-quotad-client.c cli-cmd-snapshot.c
-gluster_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GF_LDADD)\
+gluster_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GF_LDADD) \
+ $(top_builddir)/libglusterd/src/libglusterd.la \
$(RLLIBS) $(top_builddir)/rpc/xdr/src/libgfxdr.la \
- $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(XML_LIBS)
-gluster_LDFLAGS = $(GF_LDFLAGS) $(GF_GLUSTERFS_LDFLAGS) $(LIBXML2_LIBS)
-noinst_HEADERS = cli.h cli-mem-types.h cli-cmd.h
+gluster_LDFLAGS = $(GF_LDFLAGS)
+noinst_HEADERS = cli.h cli-mem-types.h cli-cmd.h cli-quotad-client.h
-AM_CFLAGS = -fPIC -Wall -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src\
- -I$(top_srcdir)/rpc/xdr/src\
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/libglusterd/src \
+ -I$(top_builddir)/rpc/xdr/src \
-DDATADIR=\"$(localstatedir)\" \
- -DCONFDIR=\"$(sysconfdir)/glusterfs\" $(GF_GLUSTERFS_CFLAGS)\
- -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\
- -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) -DSBIN_DIR=\"$(sbindir)\"\
- $(LIBXML2_CFLAGS)
+ -DCONFDIR=\"$(sysconfdir)/glusterfs\" \
+ -DGSYNCD_PREFIX=\"$(GLUSTERFS_LIBEXECDIR)\"\
+ -DGLFSHEAL_PREFIX=\"$(GLUSTERFS_LIBEXECDIR)\"\
+ -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE)
+
+AM_CFLAGS = -Wall $(GF_CFLAGS) $(XML_CFLAGS)
CLEANFILES =
$(top_builddir)/libglusterfs/src/libglusterfs.la:
$(MAKE) -C $(top_builddir)/libglusterfs/src/ all
+
+$(top_builddir)/libglusterd/src/libglusterd.la:
+ $(MAKE) -C $(top_builddir)/libglusterd/src/ all
+
+install-data-hook:
+ $(mkdir_p) $(DESTDIR)$(localstatedir)/run/gluster
diff --git a/cli/src/cli-cmd-global.c b/cli/src/cli-cmd-global.c
new file mode 100644
index 00000000000..2c9a5f01bb1
--- /dev/null
+++ b/cli/src/cli-cmd-global.c
@@ -0,0 +1,195 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#include <sys/socket.h>
+#include <netdb.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <netinet/in.h>
+
+#include "cli.h"
+#include "cli-cmd.h"
+#include "cli-mem-types.h"
+#include "cli1-xdr.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/common-utils.h>
+
+int
+cli_cmd_global_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
+int
+cli_cmd_get_state_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
+
+int
+cli_cmd_ganesha_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
+
+struct cli_cmd global_cmds[] = {
+ {
+ "global help",
+ cli_cmd_global_help_cbk,
+ "list global commands",
+ },
+ {
+ "get-state [<daemon>] [[odir </path/to/output/dir/>] "
+ "[file <filename>]] [detail|volumeoptions]",
+ cli_cmd_get_state_cbk,
+ "Get local state representation of mentioned daemon",
+ },
+ {
+ "nfs-ganesha {enable| disable} ",
+ cli_cmd_ganesha_cbk,
+ "Enable/disable NFS-Ganesha support",
+ },
+ {NULL, NULL, NULL}};
+
+int
+cli_cmd_global_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
+{
+ struct cli_cmd *cmd = NULL;
+ struct cli_cmd *global_cmd = NULL;
+ int count = 0;
+
+ cmd = GF_MALLOC(sizeof(global_cmds), cli_mt_cli_cmd);
+ memcpy(cmd, global_cmds, sizeof(global_cmds));
+ count = (sizeof(global_cmds) / sizeof(struct cli_cmd));
+ cli_cmd_sort(cmd, count);
+
+ cli_out("\ngluster global commands");
+ cli_out("========================\n");
+ for (global_cmd = cmd; global_cmd->pattern; global_cmd++)
+ if (_gf_false == global_cmd->disable)
+ cli_out("%s - %s", global_cmd->pattern, global_cmd->desc);
+
+ cli_out("\n");
+ GF_FREE(cmd);
+ return 0;
+}
+
+int
+cli_cmd_global_register(struct cli_state *state)
+{
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
+ for (cmd = global_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+cli_cmd_ganesha_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+
+{
+ int sent = 0;
+ int parse_error = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ cli_local_t *local = NULL;
+ char *op_errstr = NULL;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GANESHA];
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ ret = cli_cmd_ganesha_parse(state, words, wordcount, &options, &op_errstr);
+ if (ret) {
+ if (op_errstr) {
+ cli_err("%s", op_errstr);
+ GF_FREE(op_errstr);
+ } else
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Setting global option failed");
+ }
+
+ CLI_STACK_DESTROY(frame);
+ return ret;
+}
+
+int
+cli_cmd_get_state_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int sent = 0;
+ int parse_error = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ cli_local_t *local = NULL;
+ char *op_errstr = NULL;
+
+ ret = cli_cmd_get_state_parse(state, words, wordcount, &options,
+ &op_errstr);
+
+ if (ret) {
+ if (op_errstr) {
+ cli_err("%s", op_errstr);
+ cli_usage_out(word->pattern);
+ GF_FREE(op_errstr);
+ } else
+ cli_usage_out(word->pattern);
+
+ parse_error = 1;
+ goto out;
+ }
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_STATE];
+ if (proc->fn)
+ ret = proc->fn(frame, THIS, options);
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Getting daemon state failed");
+ }
+
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
+}
diff --git a/cli/src/cli-cmd-misc.c b/cli/src/cli-cmd-misc.c
index 66d755fc2dc..e961d88da86 100644
--- a/cli/src/cli-cmd-misc.c
+++ b/cli/src/cli-cmd-misc.c
@@ -1,105 +1,117 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "cli.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
#include "protocol-common.h"
-extern struct rpc_clnt *global_rpc;
-
-extern rpc_clnt_prog_t *cli_rpc_prog;
-
extern struct cli_cmd volume_cmds[];
+extern struct cli_cmd bitrot_cmds[];
+extern struct cli_cmd quota_cmds[];
extern struct cli_cmd cli_probe_cmds[];
extern struct cli_cmd cli_log_cmds[];
extern struct cli_cmd cli_system_cmds[];
+extern struct cli_cmd cli_bd_cmds[];
+extern struct cli_cmd snapshot_cmds[];
+extern struct cli_cmd global_cmds[];
struct cli_cmd cli_misc_cmds[];
int
-cli_cmd_quit_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_quit_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- exit (0);
+ exit(0);
}
-int
-cli_cmd_display_help (struct cli_state *state, struct cli_cmd_word *in_word,
- const char **words, int wordcount)
+static gf_boolean_t
+cli_is_help_command(const char *pattern)
{
- struct cli_cmd *cmd[] = {volume_cmds, cli_probe_cmds,
- cli_misc_cmds, NULL};
- struct cli_cmd *cmd_ind = NULL;
- int i = 0;
-
- /* cli_system_cmds commands for internal usage
- they are not exposed
- */
- for (i=0; cmd[i]!=NULL; i++)
- for (cmd_ind = cmd[i]; cmd_ind->pattern; cmd_ind++)
- if (_gf_false == cmd_ind->disable)
- cli_out ("%s - %s", cmd_ind->pattern,
- cmd_ind->desc);
-
- return 0;
+ /* FixFixFix
+ * This is not the best way to determine whether
+ * this is a help command
+ */
+ if (strstr(pattern, "help"))
+ return _gf_true;
+
+ return _gf_false;
}
-struct cli_cmd cli_misc_cmds[] = {
- { "quit",
- cli_cmd_quit_cbk,
- "quit"},
+int
+cli_cmd_display_help(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
+{
+ static struct cli_cmd *cmd[] = {
+ cli_misc_cmds, cli_probe_cmds, volume_cmds, bitrot_cmds,
+ quota_cmds, snapshot_cmds, global_cmds, NULL};
+ struct cli_cmd *cmd_ind = NULL;
+ int i = 0;
+ gf_boolean_t list_all = _gf_false;
+
+ /* cli_system_cmds commands for internal usage
+ they are not exposed
+ */
+
+ /* If "help all" */
+ if (wordcount == 2)
+ list_all = _gf_true;
+
+ for (i = 0; cmd[i] != NULL; i++) {
+ for (cmd_ind = cmd[i]; cmd_ind->pattern; cmd_ind++) {
+ if ((_gf_false == cmd_ind->disable) &&
+ cli_is_help_command(cmd_ind->pattern)) {
+ if (list_all && (cmd_ind->cbk)) {
+ cmd_ind->cbk(state, in_word, words, wordcount);
+ } else {
+ cli_out(" %-25s- %s", cmd_ind->pattern, cmd_ind->desc);
+ }
+ }
+ }
+ }
+
+ cli_out("\n");
+ return 0;
+}
- { "help",
- cli_cmd_display_help,
- "display command options"},
+struct cli_cmd cli_help_cmds[] = {
+ {"help [all]", cli_cmd_display_help, "display help for command classes"},
- { "exit",
- cli_cmd_quit_cbk,
- "exit"},
+ {NULL, NULL, NULL}};
- { NULL, NULL, NULL }
-};
+struct cli_cmd cli_misc_cmds[] = {{"quit", cli_cmd_quit_cbk, "quit"},
+ {"exit", cli_cmd_quit_cbk, "exit"},
+ {NULL, NULL, NULL}};
int
-cli_cmd_misc_register (struct cli_state *state)
+cli_cmd_misc_register(struct cli_state *state)
{
- int ret = 0;
- struct cli_cmd *cmd = NULL;
-
- for (cmd = cli_misc_cmds; cmd->pattern; cmd++) {
-
- ret = cli_cmd_register (&state->tree, cmd);
- if (ret)
- goto out;
- }
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
+
+ for (cmd = cli_misc_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+
+ for (cmd = cli_help_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
out:
- return ret;
+ return ret;
}
diff --git a/cli/src/cli-cmd-parser.c b/cli/src/cli-cmd-parser.c
index 8c16588ae9c..34620b4a31b 100644
--- a/cli/src/cli-cmd-parser.c
+++ b/cli/src/cli-cmd-parser.c
@@ -1,2243 +1,5946 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include <fnmatch.h>
+#include <time.h>
#include "cli.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
-#include "dict.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/list.h>
#include "protocol-common.h"
#include "cli1-xdr.h"
+#define MAX_SNAP_DESCRIPTION_LEN 1024
+
+static struct snap_config_opt_vals_ snap_confopt_vals[] = {
+ {.op_name = "snap-max-hard-limit",
+ .question = "Changing snapshot-max-hard-limit "
+ "will limit the creation of new snapshots "
+ "if they exceed the new limit.\n"
+ "Do you want to continue?"},
+ {.op_name = "snap-max-soft-limit",
+ .question = "If Auto-delete is enabled, snap-max-soft-limit will"
+ " trigger deletion of oldest snapshot, on the "
+ "creation of new snapshot, when the "
+ "snap-max-soft-limit is reached.\n"
+ "Do you want to change the snap-max-soft-limit?"},
+ {.op_name = "both",
+ .question = "Changing snapshot-max-hard-limit "
+ "will limit the creation of new snapshots "
+ "if they exceed the new snapshot-max-hard-limit.\n"
+ "If Auto-delete is enabled, snap-max-soft-limit will"
+ " trigger deletion of oldest snapshot, on the "
+ "creation of new snapshot, when the "
+ "snap-max-soft-limit is reached.\n"
+ "Do you want to continue?"},
+ {
+ .op_name = NULL,
+ }};
+
+enum cli_snap_config_set_types {
+ GF_SNAP_CONFIG_SET_HARD = 0,
+ GF_SNAP_CONFIG_SET_SOFT = 1,
+ GF_SNAP_CONFIG_SET_BOTH = 2,
+};
+typedef enum cli_snap_config_set_types cli_snap_config_set_types;
+
+typedef struct _cli_brick {
+ struct list_head list;
+ const char *name;
+ int32_t len;
+} cli_brick_t;
+
+int
+cli_cmd_validate_volume(char *volname);
+
static const char *
-id_sel (void *wcon)
+id_sel(void *wcon)
{
- return (const char *)wcon;
+ return (const char *)wcon;
}
static char *
-str_getunamb (const char *tok, char **opwords)
+str_getunamb(const char *tok, char **opwords)
{
- return (char *)cli_getunamb (tok, (void **)opwords, id_sel);
+ return (char *)cli_getunamb(tok, (void **)opwords, id_sel);
}
int32_t
-cli_cmd_bricks_parse (const char **words, int wordcount, int brick_index,
- char **bricks, int *brick_count)
+cli_cmd_ta_brick_parse(const char **words, int wordcount, char **ta_brick)
{
- int ret = 0;
- char *tmp_list = NULL;
- char brick_list[120000] = {0,};
- char *space = " ";
- char *delimiter = NULL;
- char *host_name = NULL;
- char *free_list_ptr = NULL;
- char *tmpptr = NULL;
- int j = 0;
- int brick_list_len = 0;
- char *tmp_host = NULL;
-
- GF_ASSERT (words);
- GF_ASSERT (wordcount);
- GF_ASSERT (bricks);
- GF_ASSERT (brick_index > 0);
- GF_ASSERT (brick_index < wordcount);
-
- strncpy (brick_list, space, strlen (space));
- brick_list_len++;
- while (brick_index < wordcount) {
- if (validate_brick_name ((char *)words[brick_index])) {
- cli_err ("Wrong brick type: %s, use <HOSTNAME>:"
- "<export-dir-abs-path>", words[brick_index]);
- ret = -1;
- goto out;
- } else {
- delimiter = strrchr (words[brick_index], ':');
- ret = gf_canonicalize_path (delimiter + 1);
- if (ret)
- goto out;
- }
-
- if ((brick_list_len + strlen (words[brick_index]) + 1) > sizeof (brick_list)) {
- cli_err ("Total brick list is larger than a request. "
- "Can take (brick_count %d)", *brick_count);
- ret = -1;
- goto out;
- }
+ char *host_name = NULL;
+ char *tmp_host = NULL;
+ char *delimiter = NULL;
+ cli_brick_t *brick = NULL;
+ int ret = 0;
+
+ GF_ASSERT(words);
+ GF_ASSERT(wordcount);
+
+ if (validate_brick_name((char *)words[wordcount - 1])) {
+ cli_err(
+ "Wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ words[wordcount - 1]);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr(words[wordcount - 1], ':');
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret)
+ goto out;
+ }
- tmp_host = gf_strdup ((char *)words[brick_index]);
- if (!tmp_host) {
- gf_log ("cli", GF_LOG_ERROR, "Out of memory");
- ret = -1;
- goto out;
- }
- get_host_name (tmp_host, &host_name);
- if (!host_name) {
- ret = -1;
- gf_log("cli",GF_LOG_ERROR, "Unable to allocate "
- "memory");
- goto out;
- }
+ tmp_host = gf_strdup((char *)words[wordcount - 1]);
+ if (!tmp_host) {
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+ get_host_name(tmp_host, &host_name);
+ if (!host_name) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to retrieve "
+ "hostname");
+ goto out;
+ }
+
+ if (!(strcmp(host_name, "localhost") && strcmp(host_name, "127.0.0.1") &&
+ strncmp(host_name, "0.", 2))) {
+ cli_err(
+ "Please provide a valid hostname/ip other "
+ "than localhost, 127.0.0.1 or loopback "
+ "address (0.0.0.0 to 0.255.255.255).");
+ ret = -1;
+ goto out;
+ }
+ if (!valid_internet_address(host_name, _gf_false, _gf_false)) {
+ cli_err(
+ "internet address '%s' does not conform to "
+ "standards",
+ host_name);
+ }
+
+ brick = GF_MALLOC(sizeof(cli_brick_t), gf_common_list_node);
+ if (brick == NULL) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ goto out;
+ }
- if (!(strcmp (host_name, "localhost") &&
- strcmp (host_name, "127.0.0.1"))) {
- cli_err ("Please provide a valid hostname/ip other "
- "than localhost or 127.0.0.1");
- ret = -1;
- GF_FREE (tmp_host);
- goto out;
- }
- if (!valid_internet_address (host_name, _gf_false)) {
- cli_err ("internet address '%s' does not conform to "
- "standards", host_name);
- }
- GF_FREE (tmp_host);
- tmp_list = gf_strdup (brick_list + 1);
- if (free_list_ptr) {
- GF_FREE (free_list_ptr);
- free_list_ptr = NULL;
- }
- free_list_ptr = tmp_list;
- j = 0;
- while(j < *brick_count) {
- strtok_r (tmp_list, " ", &tmpptr);
- if (!(strcmp (tmp_list, words[brick_index]))) {
- ret = -1;
- cli_err ("Found duplicate"
- " exports %s",words[brick_index]);
- goto out;
- }
- tmp_list = tmpptr;
- j++;
- }
- strcat (brick_list, words[brick_index]);
- strcat (brick_list, " ");
- brick_list_len += (strlen (words[brick_index]) + 1);
- ++(*brick_count);
- ++brick_index;
- }
+ brick->name = words[wordcount - 1];
+ brick->len = strlen(words[wordcount - 1]);
+ *ta_brick = GF_MALLOC(brick->len + 3, gf_common_mt_char);
+ if (*ta_brick == NULL) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ goto out;
+ }
- *bricks = gf_strdup (brick_list);
- if (!*bricks)
- ret = -1;
+ strcat(*ta_brick, " ");
+ strcat(*ta_brick, brick->name);
+ strcat(*ta_brick, " ");
out:
- if (free_list_ptr)
- GF_FREE (free_list_ptr);
- return ret;
+ if (tmp_host) {
+ GF_FREE(tmp_host);
+ tmp_host = NULL;
+ }
+ if (brick) {
+ GF_FREE(brick);
+ brick = NULL;
+ }
+
+ return ret;
}
int32_t
-cli_cmd_volume_create_parse (const char **words, int wordcount, dict_t **options)
+cli_cmd_bricks_parse(const char **words, int wordcount, int brick_index,
+ char **bricks, int *brick_count)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- int ret = -1;
- gf1_cluster_type type = GF_CLUSTER_TYPE_NONE;
- int count = 1;
- int sub_count = 1;
- int brick_index = 0;
- int i = 0;
- char *trans_type = NULL;
- int32_t index = 0;
- char *bricks = NULL;
- int32_t brick_count = 0;
- char *opwords[] = { "replica", "stripe", "transport", NULL };
- char *w = NULL;
- int op_count = 0;
- int32_t replica_count = 1;
- int32_t stripe_count = 1;
-
- GF_ASSERT (words);
- GF_ASSERT (options);
+ int ret = 0;
+ char *delimiter = NULL;
+ char *host_name = NULL;
+ char *tmp_host = NULL;
+ char *bricks_str = NULL;
+ int len = 0;
+ int brick_list_len = 1; /* For initial space */
+ struct list_head brick_list = {
+ 0,
+ };
+ cli_brick_t *brick = NULL;
+
+ GF_ASSERT(words);
+ GF_ASSERT(wordcount);
+ GF_ASSERT(bricks);
+ GF_ASSERT(brick_index > 0);
+ GF_ASSERT(brick_index < wordcount);
+
+ INIT_LIST_HEAD(&brick_list);
+
+ while (brick_index < wordcount) {
+ if (validate_brick_name((char *)words[brick_index])) {
+ cli_err(
+ "Wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ words[brick_index]);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr(words[brick_index], ':');
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret)
+ goto out;
+ }
- dict = dict_new ();
+ tmp_host = gf_strdup((char *)words[brick_index]);
+ if (!tmp_host) {
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+ get_host_name(tmp_host, &host_name);
+ if (!host_name) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to allocate "
+ "memory");
+ GF_FREE(tmp_host);
+ goto out;
+ }
- if (!dict)
+ if (!(strcmp(host_name, "localhost") &&
+ strcmp(host_name, "127.0.0.1") && strncmp(host_name, "0.", 2))) {
+ cli_err(
+ "Please provide a valid hostname/ip other "
+ "than localhost, 127.0.0.1 or loopback "
+ "address (0.0.0.0 to 0.255.255.255).");
+ ret = -1;
+ GF_FREE(tmp_host);
+ goto out;
+ }
+ if (!valid_internet_address(host_name, _gf_false, _gf_false)) {
+ cli_err(
+ "internet address '%s' does not conform to "
+ "standards",
+ host_name);
+ }
+ GF_FREE(tmp_host);
+ list_for_each_entry(brick, &brick_list, list)
+ {
+ if (strcmp(brick->name, words[brick_index]) == 0) {
+ ret = -1;
+ cli_err("Found duplicate exports %s", words[brick_index]);
goto out;
+ }
+ }
- if (wordcount < 3)
- goto out;
+ brick = GF_MALLOC(sizeof(cli_brick_t), gf_common_list_node);
+ if (brick == NULL) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ goto out;
+ }
+ len = strlen(words[brick_index]);
+ brick->name = words[brick_index];
+ brick->len = len;
+ list_add_tail(&brick->list, &brick_list);
+
+ brick_list_len += len + 1; /* Brick name + space */
+ ++(*brick_count);
+ ++brick_index;
+ }
+
+ /* If brick count is not valid exit here */
+ if (!*brick_count) {
+ cli_err("No bricks specified");
+ ret = -1;
+ goto out;
+ }
- volname = (char *)words[2];
+ brick_list_len++; /* For terminating null char */
- GF_ASSERT (volname);
+ bricks_str = GF_MALLOC(brick_list_len, gf_common_mt_char);
+ if (bricks_str == NULL) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ goto out;
+ }
+ *bricks = bricks_str;
+ *bricks_str = ' ';
+ bricks_str++;
+ while (!list_empty(&brick_list)) {
+ brick = list_first_entry(&brick_list, cli_brick_t, list);
+ list_del_init(&brick->list);
+ memcpy(bricks_str, brick->name, brick->len);
+ bricks_str[brick->len] = ' ';
+ bricks_str += brick->len + 1;
+ GF_FREE(brick);
+ }
+ *bricks_str = 0;
- /* Validate the volume name here itself */
- {
- if (volname[0] == '-')
- goto out;
+out:
+ while (!list_empty(&brick_list)) {
+ brick = list_first_entry(&brick_list, cli_brick_t, list);
+ list_del_init(&brick->list);
+ GF_FREE(brick);
+ }
- if (!strcmp (volname, "all")) {
- cli_err ("\"all\" cannot be the name of a volume.");
- goto out;
- }
+ return ret;
+}
- if (strchr (volname, '/'))
- goto out;
+int32_t
+cli_cmd_create_disperse_check(struct cli_state *state, int *disperse,
+ int *redundancy, int *data, int count)
+{
+ int i = 0;
+ int tmp = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ char question[128];
+
+ const char *question1 =
+ "There isn't an optimal redundancy value "
+ "for this configuration. Do you want to "
+ "create the volume with redundancy 1 ?";
+
+ const char *question2 =
+ "The optimal redundancy for this "
+ "configuration is %d. Do you want to create "
+ "the volume with this value ?";
+
+ const char *question3 =
+ "This configuration is not optimal on most "
+ "workloads. Do you want to use it ?";
+
+ const char *question4 =
+ "Redundancy for this configuration is %d. "
+ "Do you want to create "
+ "the volume with this value ?";
+
+ if (*data > 0) {
+ if (*disperse > 0 && *redundancy > 0) {
+ if (*disperse != (*data + *redundancy)) {
+ cli_err(
+ "Disperse count(%d) should be equal "
+ "to sum of disperse-data count(%d) and "
+ "redundancy count(%d)",
+ *disperse, *data, *redundancy);
+ return -1;
+ }
+ } else if (*redundancy > 0) {
+ *disperse = *data + *redundancy;
+ } else if (*disperse > 0) {
+ *redundancy = *disperse - *data;
+ } else {
+ if ((count - *data) >= *data) {
+ cli_err(
+ "Please provide redundancy count "
+ "along with disperse-data count");
+ return -1;
+ } else {
+ sprintf(question, question4, count - *data);
+ answer = cli_cmd_get_confirmation(state, question);
+ if (answer == GF_ANSWER_NO)
+ return -1;
+ *redundancy = count - *data;
+ *disperse = count;
+ }
+ }
+ }
- if (strlen (volname) > 512)
- goto out;
+ if (*disperse <= 0) {
+ if (count < 3) {
+ cli_err(
+ "number of bricks must be greater "
+ "than 2");
- for (i = 0; i < strlen (volname); i++)
- if (!isalnum (volname[i]) && (volname[i] != '_') && (volname[i] != '-'))
- goto out;
+ return -1;
}
+ *disperse = count;
+ }
- if (wordcount < 4) {
- ret = -1;
- goto out;
+ if (*redundancy == -1) {
+ tmp = *disperse - 1;
+ for (i = tmp / 2; (i > 0) && ((tmp & -tmp) != tmp); i--, tmp--)
+ ;
+
+ if (i == 0) {
+ answer = cli_cmd_get_confirmation(state, question1);
+ if (answer == GF_ANSWER_NO)
+ return -1;
+
+ *redundancy = 1;
+ } else {
+ *redundancy = *disperse - tmp;
+ if (*redundancy > 1) {
+ sprintf(question, question2, *redundancy);
+ answer = cli_cmd_get_confirmation(state, question);
+ if (answer == GF_ANSWER_NO)
+ return -1;
+ }
}
- type = GF_CLUSTER_TYPE_NONE;
- index = 3;
+ tmp = 0;
+ } else {
+ tmp = *disperse - *redundancy;
+ }
- while (op_count < 3) {
- ret = -1;
- w = str_getunamb (words[index], opwords);
- if (!w) {
- break;
- } else if ((strcmp (w, "replica")) == 0) {
- switch (type) {
- case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
- case GF_CLUSTER_TYPE_REPLICATE:
- cli_err ("replica option given twice");
- goto out;
- case GF_CLUSTER_TYPE_NONE:
- type = GF_CLUSTER_TYPE_REPLICATE;
- break;
- case GF_CLUSTER_TYPE_STRIPE:
- type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
- break;
- }
+ if ((*redundancy < 1) || (*redundancy > (*disperse - 1) / 2)) {
+ cli_err(
+ "redundancy must be greater than or equal to 1 and "
+ "less than %d for a disperse %d volume",
+ (*disperse + 1) / 2, *disperse);
- if (wordcount < (index+2)) {
- ret = -1;
- goto out;
- }
- replica_count = strtol (words[index+1], NULL, 0);
- if (replica_count < 2) {
- cli_err ("replica count should be greater"
- " than 1");
- ret = -1;
- goto out;
- }
- ret = dict_set_int32 (dict, "replica-count", replica_count);
- if (ret)
- goto out;
-
- index += 2;
-
- } else if ((strcmp (w, "stripe")) == 0) {
- switch (type) {
- case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
- case GF_CLUSTER_TYPE_STRIPE:
- cli_err ("stripe option given twice");
- goto out;
- case GF_CLUSTER_TYPE_NONE:
- type = GF_CLUSTER_TYPE_STRIPE;
- break;
- case GF_CLUSTER_TYPE_REPLICATE:
- type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
- break;
- }
- if (wordcount < (index + 2)) {
- ret = -1;
- goto out;
- }
- stripe_count = strtol (words[index+1], NULL, 0);
- if (stripe_count < 2) {
- cli_err ("stripe count should be greater"
- " than 1");
- ret = -1;
- goto out;
- }
- ret = dict_set_int32 (dict, "stripe-count", stripe_count);
- if (ret)
- goto out;
+ return -1;
+ }
- index += 2;
+ if ((tmp & -tmp) != tmp) {
+ answer = cli_cmd_get_confirmation(state, question3);
+ if (answer == GF_ANSWER_NO)
+ return -1;
+ }
- } else if ((strcmp (w, "transport")) == 0) {
- if (trans_type) {
- cli_err ("'transport' option given more"
- " than one time");
- goto out;
- }
- if ((strcasecmp (words[index+1], "tcp") == 0)) {
- trans_type = gf_strdup ("tcp");
- } else if ((strcasecmp (words[index+1], "rdma") == 0)) {
- trans_type = gf_strdup ("rdma");
- } else if ((strcasecmp (words[index+1], "tcp,rdma") == 0) ||
- (strcasecmp (words[index+1], "rdma,tcp") == 0)) {
- trans_type = gf_strdup ("tcp,rdma");
- } else {
- gf_log ("", GF_LOG_ERROR, "incorrect transport"
- " protocol specified");
- ret = -1;
- goto out;
- }
- index += 2;
+ return 0;
+}
+
+static int32_t
+cli_validate_disperse_volume(char *word, gf1_cluster_type type,
+ const char **words, int32_t wordcount,
+ int32_t index, int32_t *disperse_count,
+ int32_t *redundancy_count, int32_t *data_count)
+{
+ int ret = -1;
+
+ switch (type) {
+ case GF_CLUSTER_TYPE_NONE:
+ case GF_CLUSTER_TYPE_DISPERSE:
+ if (strcmp(word, "disperse") == 0) {
+ if (*disperse_count >= 0) {
+ cli_err("disperse option given twice");
+ goto out;
+ }
+ if (wordcount < (index + 2)) {
+ goto out;
+ }
+ ret = gf_string2int(words[index + 1], disperse_count);
+ if (ret == -1 && errno == EINVAL) {
+ *disperse_count = 0;
+ ret = 1;
+ } else if (ret == -1) {
+ goto out;
} else {
- GF_ASSERT (!"opword mismatch");
- ret = -1;
+ if (*disperse_count < 3) {
+ cli_err(
+ "disperse count must "
+ "be greater than 2");
goto out;
+ }
+ ret = 2;
}
- op_count++;
- }
+ } else if (strcmp(word, "disperse-data") == 0) {
+ if (*data_count >= 0) {
+ cli_err("disperse-data option given twice");
+ goto out;
+ }
+ if (wordcount < (index + 2)) {
+ goto out;
+ }
+ ret = gf_string2int(words[index + 1], data_count);
+ if (ret == -1 || *data_count < 2) {
+ cli_err("disperse-data must be greater than 1");
+ goto out;
+ }
+ ret = 2;
+ } else if (strcmp(word, "redundancy") == 0) {
+ if (*redundancy_count >= 0) {
+ cli_err("redundancy option given twice");
+ goto out;
+ }
+ if (wordcount < (index + 2)) {
+ goto out;
+ }
+ ret = gf_string2int(words[index + 1], redundancy_count);
+ if (ret == -1 || *redundancy_count < 1) {
+ cli_err("redundancy must be greater than 0");
+ goto out;
+ }
+ ret = 2;
+ }
+ break;
+ case GF_CLUSTER_TYPE_REPLICATE:
+ cli_err(
+ "replicated-dispersed volume is not "
+ "supported");
+ goto out;
+ default:
+ cli_err("Invalid type given");
+ break;
+ }
+out:
+ return ret;
+}
- if (!trans_type)
- trans_type = gf_strdup ("tcp");
+int32_t
+cli_validate_volname(const char *volname)
+{
+ int32_t ret = -1;
+ int32_t i = -1;
+ int volname_len;
+ static const char *const invalid_volnames[] = {"volume",
+ "type",
+ "subvolumes",
+ "option",
+ "end-volume",
+ "all",
+ "volume_not_in_ring",
+ "description",
+ "force",
+ "snap-max-hard-limit",
+ "snap-max-soft-limit",
+ "auto-delete",
+ "activate-on-create",
+ NULL};
+
+ if (volname[0] == '-')
+ goto out;
- sub_count = stripe_count * replica_count;
+ for (i = 0; invalid_volnames[i]; i++) {
+ if (!strcmp(volname, invalid_volnames[i])) {
+ cli_err("\"%s\" cannot be the name of a volume.", volname);
+ goto out;
+ }
+ }
- /* reset the count value now */
- count = 1;
+ if (strchr(volname, '/'))
+ goto out;
- if (index >= wordcount) {
- ret = -1;
- goto out;
+ volname_len = strlen(volname);
+ if (volname_len > GD_VOLUME_NAME_MAX) {
+ cli_err("Volume name exceeds %d characters.", GD_VOLUME_NAME_MAX);
+ goto out;
+ }
+
+ for (i = 0; i < volname_len; i++) {
+ if (!isalnum(volname[i]) && (volname[i] != '_') &&
+ (volname[i] != '-')) {
+ cli_err(
+ "Volume name should not contain \"%c\""
+ " character.\nVolume names can only"
+ "contain alphanumeric, '-' and '_' "
+ "characters.",
+ volname[i]);
+ goto out;
}
+ }
- brick_index = index;
+ ret = 0;
+out:
+ return ret;
+}
- ret = cli_cmd_bricks_parse (words, wordcount, brick_index, &bricks,
- &brick_count);
- if (ret)
- goto out;
+int32_t
+cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **brick_list)
+{
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+ gf1_cluster_type type = GF_CLUSTER_TYPE_NONE;
+ int sub_count = 1;
+ int brick_index = 0;
+ char *trans_type = NULL;
+ int32_t index = 0;
+ char *bricks = NULL;
+ char *ta_brick = NULL;
+ int32_t brick_count = 0;
+ static char *opwords[] = {"replica", "stripe", "transport",
+ "disperse", "redundancy", "disperse-data",
+ "arbiter", "thin-arbiter", NULL};
+
+ char *w = NULL;
+ int op_count = 0;
+ int32_t replica_count = 1;
+ int32_t arbiter_count = 0;
+ int32_t thin_arbiter_count = 0;
+ int32_t stripe_count = 1;
+ int32_t disperse_count = -1;
+ int32_t redundancy_count = -1;
+ int32_t disperse_data_count = -1;
+ gf_boolean_t is_force = _gf_false;
+ int wc = wordcount;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question = NULL;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+
+ if (!dict)
+ goto out;
+
+ if (wordcount < 3)
+ goto out;
+
+ volname = (char *)words[2];
+
+ GF_ASSERT(volname);
- /* If brick-count is not valid when replica or stripe is
- given, exit here */
- if (!brick_count) {
- cli_err ("No bricks specified");
+ /* Validate the volume name here itself */
+ if (cli_validate_volname(volname) < 0)
+ goto out;
+
+ if (wordcount < 4) {
+ ret = -1;
+ goto out;
+ }
+
+ type = GF_CLUSTER_TYPE_NONE;
+ index = 3;
+
+ while (op_count < 3) {
+ ret = -1;
+ w = str_getunamb(words[index], opwords);
+ if (!w) {
+ break;
+ } else if ((strcmp(w, "replica")) == 0) {
+ switch (type) {
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ case GF_CLUSTER_TYPE_REPLICATE:
+ cli_err("replica option given twice");
+ goto out;
+ case GF_CLUSTER_TYPE_NONE:
+ type = GF_CLUSTER_TYPE_REPLICATE;
+ break;
+ case GF_CLUSTER_TYPE_STRIPE:
+ cli_err("stripe option not supported");
+ goto out;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ cli_err(
+ "replicated-dispersed volume is not "
+ "supported");
+ goto out;
+ default:
+ cli_err("Invalid type given");
+ goto out;
+ }
+
+ if (wordcount < (index + 2)) {
ret = -1;
goto out;
- }
+ }
- if (brick_count % sub_count) {
- if (type == GF_CLUSTER_TYPE_STRIPE)
- cli_err ("number of bricks is not a multiple of "
- "stripe count");
- else if (type == GF_CLUSTER_TYPE_REPLICATE)
- cli_err ("number of bricks is not a multiple of "
- "replica count");
- else
- cli_err ("number of bricks given doesn't match "
- "required count");
+ replica_count = strtol(words[index + 1], NULL, 0);
+ if (replica_count < 2) {
+ cli_err(
+ "replica count should be greater"
+ " than 1");
+ ret = -1;
+ goto out;
+ }
+
+ index += 2;
+ if (words[index]) {
+ if (!strcmp(words[index], "arbiter")) {
+ ret = gf_string2int(words[index + 1], &arbiter_count);
+ if ((ret == -1) || (arbiter_count != 1)) {
+ cli_err(
+ "For arbiter "
+ "configuration, "
+ "replica count must be"
+ " 2 and arbiter count "
+ "must be 1. The 3rd "
+ "brick of the replica "
+ "will be the arbiter");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32(dict, "arbiter-count", arbiter_count);
+ if (ret)
+ goto out;
+ index += 2;
+ } else if (!strcmp(words[index], "thin-arbiter")) {
+ ret = gf_string2int(words[index + 1], &thin_arbiter_count);
+ if ((ret == -1) || (thin_arbiter_count != 1) ||
+ (replica_count != 2)) {
+ cli_err(
+ "For thin-arbiter "
+ "configuration, "
+ "replica count must be"
+ " 2 and thin-arbiter count "
+ "must be 1. The 3rd "
+ "brick of the replica "
+ "will be the thin-arbiter brick");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32(dict, "thin-arbiter-count",
+ thin_arbiter_count);
+ if (ret)
+ goto out;
+ index += 2;
+ }
+ }
+
+ /* Do this to keep glusterd happy with sending
+ "replica 3 arbiter 1" options to server */
+ if ((arbiter_count == 1) && (replica_count == 2))
+ replica_count += arbiter_count;
+
+ if (replica_count == 2 && thin_arbiter_count == 0) {
+ if (strcmp(words[wordcount - 1], "force")) {
+ question =
+ "Replica 2 volumes are prone"
+ " to split-brain. Use "
+ "Arbiter or Replica 3 to "
+ "avoid this. See: "
+ "http://docs.gluster.org/en/latest/"
+ "Administrator%20Guide/"
+ "Split%20brain%20and%20ways%20to%20deal%20with%20it/."
+ "\nDo you still want to "
+ "continue?\n";
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Volume create "
+ "cancelled, exiting");
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+ ret = dict_set_int32(dict, "replica-count", replica_count);
+ if (ret)
+ goto out;
+ } else if ((strcmp(w, "stripe")) == 0) {
+ cli_err("stripe option not supported");
+ goto out;
+ } else if ((strcmp(w, "transport")) == 0) {
+ if (trans_type) {
+ cli_err(
+ "'transport' option given more"
+ " than one time");
+ goto out;
+ }
+ if ((strcasecmp(words[index + 1], "tcp") == 0)) {
+ trans_type = gf_strdup("tcp");
+ } else if ((strcasecmp(words[index + 1], "rdma") == 0)) {
+ trans_type = gf_strdup("rdma");
+ } else if ((strcasecmp(words[index + 1], "tcp,rdma") == 0) ||
+ (strcasecmp(words[index + 1], "rdma,tcp") == 0)) {
+ trans_type = gf_strdup("tcp,rdma");
+ } else {
+ gf_log("", GF_LOG_ERROR,
+ "incorrect transport"
+ " protocol specified");
ret = -1;
goto out;
+ }
+ index += 2;
+
+ } else if ((strcmp(w, "disperse") == 0) ||
+ (strcmp(w, "redundancy") == 0) ||
+ (strcmp(w, "disperse-data") == 0)) {
+ ret = cli_validate_disperse_volume(
+ w, type, words, wordcount, index, &disperse_count,
+ &redundancy_count, &disperse_data_count);
+ if (ret < 0)
+ goto out;
+ index += ret;
+ type = GF_CLUSTER_TYPE_DISPERSE;
+ } else if ((strcmp(w, "arbiter") == 0)) {
+ cli_err(
+ "arbiter option must be preceded by replica "
+ "option.");
+ ret = -1;
+ goto out;
+ } else if ((strcmp(w, "thin-arbiter") == 0)) {
+ cli_err(
+ "thin-arbiter option must be preceded by replica "
+ "option.");
+ ret = -1;
+ goto out;
+ } else {
+ GF_ASSERT(!"opword mismatch");
+ ret = -1;
+ goto out;
}
+ op_count++;
+ }
- /* Everything if parsed fine. start setting info in dict */
- ret = dict_set_str (dict, "volname", volname);
- if (ret)
- goto out;
+ if (!trans_type)
+ trans_type = gf_strdup("tcp");
- ret = dict_set_int32 (dict, "type", type);
- if (ret)
- goto out;
+ if (index >= wordcount) {
+ ret = -1;
+ goto out;
+ }
- ret = dict_set_dynstr (dict, "transport", trans_type);
+ brick_index = index;
+
+ if (strcmp(words[wordcount - 1], "force") == 0) {
+ is_force = _gf_true;
+ wc = wordcount - 1;
+ }
+
+ // Exclude the thin-arbiter-brick i.e. last brick in the bricks list
+ if (thin_arbiter_count == 1) {
+ ret = cli_cmd_bricks_parse(words, wc - 1, brick_index, &bricks,
+ &brick_count);
if (ret)
- goto out;
- trans_type = NULL;
+ goto out;
- ret = dict_set_dynstr (dict, "bricks", bricks);
+ ret = cli_cmd_ta_brick_parse(words, wc, &ta_brick);
+
+ } else {
+ ret = cli_cmd_bricks_parse(words, wc, brick_index, &bricks,
+ &brick_count);
+ }
+
+ if (ret)
+ goto out;
+
+ if (type == GF_CLUSTER_TYPE_DISPERSE) {
+ ret = cli_cmd_create_disperse_check(state, &disperse_count,
+ &redundancy_count,
+ &disperse_data_count, brick_count);
+ if (!ret)
+ ret = dict_set_int32(dict, "disperse-count", disperse_count);
+ if (!ret)
+ ret = dict_set_int32(dict, "redundancy-count", redundancy_count);
if (ret)
- goto out;
+ goto out;
+
+ sub_count = disperse_count;
+ } else
+ sub_count = stripe_count * replica_count;
- ret = dict_set_int32 (dict, "count", brick_count);
+ if (brick_count % sub_count) {
+ if (type == GF_CLUSTER_TYPE_STRIPE)
+ cli_err(
+ "number of bricks is not a multiple of "
+ "stripe count");
+ else if (type == GF_CLUSTER_TYPE_REPLICATE)
+ cli_err(
+ "number of bricks is not a multiple of "
+ "replica count");
+ else if (type == GF_CLUSTER_TYPE_DISPERSE)
+ cli_err(
+ "number of bricks is not a multiple of "
+ "disperse count");
+ else
+ cli_err(
+ "number of bricks given doesn't match "
+ "required count");
+
+ ret = -1;
+ goto out;
+ }
+
+ /* Everything is parsed fine. start setting info in dict */
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32(dict, "type", type);
+ if (ret)
+ goto out;
+
+ ret = dict_set_dynstr(dict, "transport", trans_type);
+ if (ret)
+ goto out;
+ trans_type = NULL;
+
+ ret = dict_set_dynstr(dict, "bricks", bricks);
+ if (ret)
+ goto out;
+
+ if (thin_arbiter_count == 1) {
+ ret = dict_set_dynstr(dict, "ta-brick", ta_brick);
if (ret)
- goto out;
+ goto out;
+ }
- *options = dict;
+ ret = dict_set_int32(dict, "count", brick_count);
+ if (ret)
+ goto out;
+ ret = dict_set_int32(dict, "force", is_force);
+ if (ret)
+ goto out;
+
+ *options = dict;
+ *brick_list = bricks;
out:
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Unable to parse create volume CLI");
- if (dict)
- dict_destroy (dict);
- }
+ if (ret) {
+ GF_FREE(bricks);
+ GF_FREE(ta_brick);
+ gf_log("cli", GF_LOG_ERROR, "Unable to parse create volume CLI");
+ if (dict)
+ dict_unref(dict);
+ }
- if (trans_type)
- GF_FREE (trans_type);
+ GF_FREE(trans_type);
- return ret;
+ return ret;
}
int32_t
-cli_cmd_volume_reset_parse (const char **words, int wordcount, dict_t **options)
+cli_cmd_volume_reset_parse(const char **words, int wordcount, dict_t **options)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- int ret = -1;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
- GF_ASSERT (words);
- GF_ASSERT (options);
+ GF_ASSERT(words);
+ GF_ASSERT(options);
- dict = dict_new ();
+ dict = dict_new();
- if (!dict)
- goto out;
+ if (!dict)
+ goto out;
- if (wordcount < 3)
- goto out;
+ if (wordcount < 3)
+ goto out;
- if (wordcount > 5)
- goto out;
+ if (wordcount > 5)
+ goto out;
- volname = (char *)words[2];
+ volname = (char *)words[2];
- if (!volname) {
- ret = -1;
- goto out;
- }
+ if (!volname) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
- ret = dict_set_str (dict, "volname", volname);
+ if (wordcount == 3) {
+ ret = dict_set_str(dict, "key", "all");
if (ret)
- goto out;
+ goto out;
+ }
- if (wordcount == 3) {
- ret = dict_set_str (dict, "key", "all");
- if (ret)
- goto out;
+ if (wordcount >= 4) {
+ if (!strcmp("force", (char *)words[3])) {
+ ret = dict_set_int32(dict, "force", 1);
+ if (ret)
+ goto out;
+ ret = dict_set_str(dict, "key", "all");
+ if (ret)
+ goto out;
+ } else {
+ ret = dict_set_str(dict, "key", (char *)words[3]);
+ if (ret)
+ goto out;
}
+ }
- if (wordcount >= 4) {
- if (!strcmp ("force", (char*)words[3])) {
- ret = dict_set_int32 (dict, "force", 1);
- if (ret)
- goto out;
- ret = dict_set_str (dict, "key", "all");
- if (ret)
- goto out;
- } else {
- ret = dict_set_str (dict, "key", (char *)words[3]);
- if (ret)
- goto out;
- }
+ if (wordcount == 5) {
+ if (strcmp("force", (char *)words[4])) {
+ ret = -1;
+ goto out;
+ } else {
+ ret = dict_set_int32(dict, "force", 1);
+ if (ret)
+ goto out;
}
+ }
+
+ *options = dict;
+
+out:
+ if (ret && dict) {
+ dict_unref(dict);
+ }
+
+ return ret;
+}
+
+int32_t
+cli_cmd_get_state_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **op_errstr)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+ char *odir = NULL;
+ char *filename = NULL;
+ char *daemon_name = NULL;
+ int count = 0;
+ uint32_t cmd = 0;
+
+ GF_VALIDATE_OR_GOTO("cli", options, out);
+ GF_VALIDATE_OR_GOTO("cli", words, out);
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
- if (wordcount == 5) {
- if (strcmp ("force", (char*)words[4])) {
+ if (wordcount < 1 || wordcount > 7) {
+ *op_errstr = gf_strdup(
+ "Problem parsing arguments."
+ " Check usage.");
+ goto out;
+ }
+
+ if (wordcount >= 1) {
+ gf_asprintf(&daemon_name, "%s", "glusterd");
+
+ for (count = 1; count < wordcount; count++) {
+ if (strcmp(words[count], "odir") == 0 ||
+ strcmp(words[count], "file") == 0) {
+ if (strcmp(words[count], "odir") == 0) {
+ if (++count < wordcount) {
+ odir = (char *)words[count];
+ continue;
+ } else {
ret = -1;
goto out;
+ }
+ } else if (strcmp(words[count], "file") == 0) {
+ if (++count < wordcount) {
+ filename = (char *)words[count];
+ continue;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ }
+ } else {
+ if (count > 1) {
+ if (count == wordcount - 1) {
+ if (strcmp(words[count], "detail") == 0) {
+ cmd = GF_CLI_GET_STATE_DETAIL;
+ continue;
+ } else if (strcmp(words[count], "volumeoptions") == 0) {
+ cmd = GF_CLI_GET_STATE_VOLOPTS;
+ continue;
+ }
+ } else {
+ *op_errstr = gf_strdup(
+ "Problem"
+ " parsing arguments. "
+ "Check usage.");
+ ret = -1;
+ goto out;
+ }
+ }
+ if (strcmp(words[count], "glusterd") == 0) {
+ continue;
} else {
- ret = dict_set_int32 (dict, "force", 1);
- if (ret)
- goto out;
+ if (count == wordcount - 1) {
+ if (strcmp(words[count], "detail") == 0) {
+ cmd = GF_CLI_GET_STATE_DETAIL;
+ continue;
+ } else if (strcmp(words[count], "volumeoptions") == 0) {
+ cmd = GF_CLI_GET_STATE_VOLOPTS;
+ continue;
+ }
+ }
+
+ *op_errstr = gf_strdup(
+ "glusterd is "
+ "the only supported daemon.");
+ ret = -1;
+ goto out;
}
+ }
}
+ ret = dict_set_dynstr(dict, "daemon", daemon_name);
+ if (ret) {
+ *op_errstr = gf_strdup(
+ "Command failed. Please check "
+ " log file for more details.");
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Setting daemon name to dictionary failed");
+ goto out;
+ }
+ daemon_name = NULL;
+
+ if (odir) {
+ ret = dict_set_str(dict, "odir", odir);
+ if (ret) {
+ *op_errstr = gf_strdup(
+ "Command failed. Please"
+ " check log file for"
+ " more details.");
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Setting output directory to"
+ "dictionary failed");
+ goto out;
+ }
+ }
+
+ if (filename) {
+ ret = dict_set_str(dict, "filename", filename);
+ if (ret) {
+ *op_errstr = gf_strdup(
+ "Command failed. Please"
+ " check log file for"
+ " more details.");
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Setting filename to dictionary failed");
+ goto out;
+ }
+ }
+
+ if (cmd) {
+ ret = dict_set_uint32(dict, "getstate-cmd", cmd);
+ if (ret) {
+ *op_errstr = gf_strdup(
+ "Command failed. Please"
+ " check log file for"
+ " more details.");
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Setting "
+ "get-state command type to dictionary "
+ "failed");
+ goto out;
+ }
+ }
+ }
+
+out:
+ if (dict)
*options = dict;
+ if (ret && dict)
+ dict_unref(dict);
+
+ GF_FREE(daemon_name);
+
+ return ret;
+}
+
+int32_t
+cli_cmd_inode_quota_parse(const char **words, int wordcount, dict_t **options)
+{
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+ if (!dict) {
+ gf_log("cli", GF_LOG_ERROR, "dict_new failed");
+ goto out;
+ }
+
+ if (wordcount != 4)
+ goto out;
+
+ volname = (char *)words[2];
+ if (!volname) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Validate the volume name here itself */
+ if (cli_validate_volname(volname) < 0)
+ goto out;
+
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret < 0)
+ goto out;
+
+ if (strcmp(words[3], "enable") != 0) {
+ cli_out("Invalid quota option : %s", words[3]);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "type", GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS);
+ if (ret < 0)
+ goto out;
+
+ *options = dict;
out:
- if (ret && dict) {
- dict_destroy (dict);
- }
+ if (ret < 0) {
+ if (dict)
+ dict_unref(dict);
+ }
- return ret;
+ return ret;
}
int32_t
-cli_cmd_quota_parse (const char **words, int wordcount, dict_t **options)
+cli_cmd_quota_parse(const char **words, int wordcount, dict_t **options)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- int ret = -1;
- int i = 0;
- char key[20] = {0, };
- uint64_t value = 0;
- gf_quota_type type = GF_QUOTA_OPTION_TYPE_NONE;
- char *opwords[] = { "enable", "disable", "limit-usage",
- "remove", "list", "version", NULL };
- char *w = NULL;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+ int i = -1;
+ char key[20] = {
+ 0,
+ };
+ int64_t value = 0;
+ gf_quota_type type = GF_QUOTA_OPTION_TYPE_NONE;
+ static char *opwords[] = {"enable",
+ "disable",
+ "limit-usage",
+ "remove",
+ "list",
+ "alert-time",
+ "soft-timeout",
+ "hard-timeout",
+ "default-soft-limit",
+ "limit-objects",
+ "list-objects",
+ "remove-objects",
+ NULL};
+ char *w = NULL;
+ uint32_t time = 0;
+ double percent = 0;
+ char *end_ptr = NULL;
+ int64_t limit = 0;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+ if (!dict) {
+ gf_log("cli", GF_LOG_ERROR, "dict_new failed");
+ goto out;
+ }
- GF_ASSERT (words);
- GF_ASSERT (options);
+ if (wordcount < 4) {
+ if ((wordcount == 3) && !(strcmp(words[2], "help"))) {
+ ret = 1;
+ }
+ goto out;
+ }
- dict = dict_new ();
- if (!dict)
- goto out;
+ volname = (char *)words[2];
+ if (!volname) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Validate the volume name here itself */
+ if (cli_validate_volname(volname) < 0)
+ goto out;
+
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret < 0)
+ goto out;
+
+ w = str_getunamb(words[3], opwords);
+ if (!w) {
+ cli_out("Invalid quota option : %s", words[3]);
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(w, "enable") == 0) {
+ if (wordcount == 4) {
+ type = GF_QUOTA_OPTION_TYPE_ENABLE;
+ ret = 0;
+ goto set_type;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (strcmp(w, "disable") == 0) {
+ if (wordcount == 4) {
+ type = GF_QUOTA_OPTION_TYPE_DISABLE;
+ ret = 0;
+ goto set_type;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (strcmp(w, "limit-usage") == 0) {
+ type = GF_QUOTA_OPTION_TYPE_LIMIT_USAGE;
+ } else if (strcmp(w, "limit-objects") == 0) {
+ type = GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS;
+ }
+
+ if (type == GF_QUOTA_OPTION_TYPE_LIMIT_USAGE ||
+ type == GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS) {
+ if (wordcount < 6 || wordcount > 7) {
+ ret = -1;
+ goto out;
+ }
- if (wordcount < 4)
+ if (words[4][0] != '/') {
+ cli_err("Please enter absolute path");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str(dict, "path", (char *)words[4]);
+ if (ret)
+ goto out;
+
+ if (!words[5]) {
+ cli_err("Please enter the limit value to be set");
+ ret = -1;
+ goto out;
+ }
+
+ if (type == GF_QUOTA_OPTION_TYPE_LIMIT_USAGE) {
+ ret = gf_string2bytesize_int64(words[5], &value);
+ if (ret != 0 || value <= 0) {
+ if (errno == ERANGE || value <= 0) {
+ ret = -1;
+ cli_err(
+ "Please enter an integer "
+ "value in the range of "
+ "(1 - %" PRId64 ")",
+ INT64_MAX);
+ } else
+ cli_err(
+ "Please enter a correct "
+ "value");
goto out;
+ }
+ } else {
+ errno = 0;
+ limit = strtol(words[5], &end_ptr, 10);
+ if (errno == ERANGE || errno == EINVAL || limit <= 0 ||
+ strcmp(end_ptr, "") != 0) {
+ ret = -1;
+ cli_err(
+ "Please enter an integer value in "
+ "the range 1 - %" PRId64,
+ INT64_MAX);
+ goto out;
+ }
+ }
- volname = (char *)words[2];
- if (!volname) {
+ ret = dict_set_str(dict, "hard-limit", (char *)words[5]);
+ if (ret < 0)
+ goto out;
+
+ if (wordcount == 7) {
+ ret = gf_string2percent(words[6], &percent);
+ if (ret != 0 || percent > 100) {
ret = -1;
+ cli_err(
+ "Please enter a correct value "
+ "in the range of 0 to 100");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "soft-limit", (char *)words[6]);
+ if (ret < 0)
goto out;
}
- /* Validate the volume name here itself */
- {
- if (volname[0] == '-')
- goto out;
+ goto set_type;
+ }
- if (!strcmp (volname, "all")) {
- cli_err ("\"all\" cannot be the name of a volume.");
- goto out;
- }
+ if (strcmp(w, "remove") == 0) {
+ if (wordcount != 5) {
+ ret = -1;
+ goto out;
+ }
- if (strchr (volname, '/'))
- goto out;
+ type = GF_QUOTA_OPTION_TYPE_REMOVE;
- if (strlen (volname) > 512)
- goto out;
+ if (words[4][0] != '/') {
+ cli_err("Please enter absolute path");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "path", (char *)words[4]);
+ if (ret < 0)
+ goto out;
+ goto set_type;
+ }
+
+ if (strcmp(w, "remove-objects") == 0) {
+ if (wordcount != 5) {
+ ret = -1;
+ goto out;
+ }
+
+ type = GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS;
- for (i = 0; i < strlen (volname); i++)
- if (!isalnum (volname[i]) && (volname[i] != '_') && (volname[i] != '-'))
- goto out;
+ if (words[4][0] != '/') {
+ cli_err("Please enter absolute path");
+ ret = -1;
+ goto out;
}
- ret = dict_set_str (dict, "volname", volname);
+ ret = dict_set_str(dict, "path", (char *)words[4]);
if (ret < 0)
+ goto out;
+ goto set_type;
+ }
+
+ if (strcmp(w, "list") == 0) {
+ type = GF_QUOTA_OPTION_TYPE_LIST;
+
+ if (words[4] && words[4][0] != '/') {
+ cli_err("Please enter absolute path");
+ ret = -1;
+ goto out;
+ }
+
+ i = 4;
+ while (i < wordcount) {
+ snprintf(key, 20, "path%d", i - 4);
+
+ ret = dict_set_str(dict, key, (char *)words[i++]);
+ if (ret < 0)
goto out;
+ }
- w = str_getunamb (words[3], opwords);
- if (!w) {
- ret = - 1;
+ ret = dict_set_int32(dict, "count", i - 4);
+ if (ret < 0)
+ goto out;
+
+ goto set_type;
+ }
+
+ if (strcmp(w, "list-objects") == 0) {
+ type = GF_QUOTA_OPTION_TYPE_LIST_OBJECTS;
+
+ i = 4;
+ while (i < wordcount) {
+ snprintf(key, 20, "path%d", i - 4);
+
+ ret = dict_set_str(dict, key, (char *)words[i++]);
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set "
+ "quota patch in request dictionary");
goto out;
+ }
}
- if (strcmp (w, "enable") == 0) {
- if (wordcount == 4) {
- type = GF_QUOTA_OPTION_TYPE_ENABLE;
- ret = 0;
- goto set_type;
- } else {
- ret = -1;
- goto out;
- }
+ ret = dict_set_int32(dict, "count", i - 4);
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set quota "
+ "limit count in request dictionary");
+ goto out;
}
- if (strcmp (w, "disable") == 0) {
- if (wordcount == 4) {
- type = GF_QUOTA_OPTION_TYPE_DISABLE;
- ret = 0;
- goto set_type;
- } else {
- ret = -1;
- goto out;
- }
+ goto set_type;
+ }
+
+ if (strcmp(w, "alert-time") == 0) {
+ if (wordcount != 5) {
+ ret = -1;
+ goto out;
}
+ type = GF_QUOTA_OPTION_TYPE_ALERT_TIME;
- if (strcmp (w, "limit-usage") == 0) {
- if (wordcount != 6) {
- ret = -1;
- goto out;
- }
+ ret = gf_string2time(words[4], &time);
+ if (ret) {
+ cli_err(
+ "Invalid argument %s. Please enter a valid "
+ "string",
+ words[4]);
+ goto out;
+ }
- type = GF_QUOTA_OPTION_TYPE_LIMIT_USAGE;
+ ret = dict_set_str(dict, "value", (char *)words[4]);
+ if (ret < 0)
+ goto out;
+ goto set_type;
+ }
+
+ if (strcmp(w, "soft-timeout") == 0) {
+ if (wordcount != 5) {
+ ret = -1;
+ goto out;
+ }
+ type = GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT;
- if (words[4][0] != '/') {
- cli_err ("Please enter absolute path");
+ ret = gf_string2time(words[4], &time);
+ if (ret) {
+ cli_err(
+ "Invalid argument %s. Please enter a valid "
+ "string",
+ words[4]);
+ goto out;
+ }
- return -2;
- }
- ret = dict_set_str (dict, "path", (char *) words[4]);
- if (ret)
- goto out;
+ ret = dict_set_str(dict, "value", (char *)words[4]);
+ if (ret < 0)
+ goto out;
+ goto set_type;
+ }
+
+ if (strcmp(w, "hard-timeout") == 0) {
+ if (wordcount != 5) {
+ ret = -1;
+ goto out;
+ }
+ type = GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT;
- if (!words[5]) {
- cli_err ("Please enter the limit value to be set");
+ ret = gf_string2time(words[4], &time);
+ if (ret) {
+ cli_err(
+ "Invalid argument %s. Please enter a valid "
+ "string",
+ words[4]);
+ goto out;
+ }
- return -2;
- }
+ ret = dict_set_str(dict, "value", (char *)words[4]);
+ if (ret < 0)
+ goto out;
+ goto set_type;
+ }
+ if (strcmp(w, "default-soft-limit") == 0) {
+ if (wordcount != 5) {
+ ret = -1;
+ goto out;
+ }
+ type = GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT;
- ret = gf_string2bytesize (words[5], &value);
- if (ret != 0) {
- cli_err ("Please enter a correct value");
- return -1;
- }
+ ret = dict_set_str(dict, "value", (char *)words[4]);
+ if (ret < 0)
+ goto out;
+ goto set_type;
+ } else {
+ GF_ASSERT(!"opword mismatch");
+ }
- ret = dict_set_str (dict, "limit", (char *) words[5]);
- if (ret < 0)
- goto out;
+set_type:
+ ret = dict_set_int32(dict, "type", type);
+ if (ret < 0)
+ goto out;
- goto set_type;
- }
- if (strcmp (w, "remove") == 0) {
- if (wordcount != 5) {
- ret = -1;
- goto out;
- }
+ *options = dict;
+out:
+ if (ret < 0) {
+ if (dict)
+ dict_unref(dict);
+ }
- type = GF_QUOTA_OPTION_TYPE_REMOVE;
+ return ret;
+}
- if (words[4][0] != '/') {
- cli_err ("Please enter absolute path");
+static gf_boolean_t
+cli_is_key_spl(char *key)
+{
+ return (strcmp(key, "group") == 0);
+}
- return -2;
- }
+static int32_t
+cli_add_key_group_value(dict_t *dict, const char *name, const char *value,
+ int32_t id, char **op_errstr)
+{
+ char *key = NULL;
+ char *data = NULL;
+ int32_t ret = -1;
- ret = dict_set_str (dict, "path", (char *) words[4]);
- if (ret < 0)
- goto out;
- goto set_type;
- }
+ ret = gf_asprintf(&key, "%s%d", name, id);
+ if (ret < 0) {
+ goto out;
+ }
+ data = gf_strdup(value);
+ if (data == NULL) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to allocate memory for data");
+ ret = -1;
+ goto out;
+ }
- if (strcmp (w, "list") == 0) {
- if (wordcount < 4) {
- ret = -1;
- goto out;
- }
+ ret = dict_set_dynstr(dict, key, data);
+ if (ret == 0) {
+ data = NULL;
+ }
- type = GF_QUOTA_OPTION_TYPE_LIST;
+out:
+ GF_FREE(key);
+ GF_FREE(data);
- i = 4;
- while (i < wordcount) {
- snprintf (key, 20, "path%d", i-4);
+ if ((ret != 0) && (op_errstr != NULL)) {
+ *op_errstr = gf_strdup("Failed to allocate memory");
+ }
- ret = dict_set_str (dict, key, (char *) words [i++]);
- if (ret < 0)
- goto out;
- }
+ return ret;
+}
- ret = dict_set_int32 (dict, "count", i - 4);
- if (ret < 0)
- goto out;
+static int
+cli_add_key_group(dict_t *dict, char *key, char *value, char **op_errstr)
+{
+ int ret = -1;
+ int opt_count = 0;
+ char *saveptr = NULL;
+ char *tok_key = NULL;
+ char *tok_val = NULL;
+ char *tagpath = NULL;
+ char line[PATH_MAX + 256] = {
+ 0,
+ };
+ FILE *fp = NULL;
+
+ ret = gf_asprintf(&tagpath, "%s/groups/%s", GLUSTERD_DEFAULT_WORKDIR,
+ value);
+ if (ret == -1) {
+ tagpath = NULL;
+ goto out;
+ }
- goto set_type;
+ fp = fopen(tagpath, "r");
+ if (!fp) {
+ ret = -1;
+ if (op_errstr) {
+ gf_asprintf(op_errstr,
+ "Unable to open file '%s'. "
+ "Error: %s",
+ tagpath, strerror(errno));
+ }
+ goto out;
+ }
+
+ opt_count = 0;
+ while (fgets(line, sizeof(line), fp) != NULL) {
+ if (strlen(line) >= sizeof(line) - 1) {
+ ret = -1;
+ if (op_errstr != NULL) {
+ *op_errstr = gf_strdup("Line too long");
+ }
+ goto out;
}
- if (strcmp (w, "version") == 0) {
- type = GF_QUOTA_OPTION_TYPE_VERSION;
- } else {
- GF_ASSERT (!"opword mismatch");
+ /* Treat line that start with "#" as comments */
+ if ('#' == line[0])
+ continue;
+
+ opt_count++;
+ tok_key = strtok_r(line, "=", &saveptr);
+ tok_val = strtok_r(NULL, "\r\n", &saveptr);
+ if (!tok_key || !tok_val) {
+ ret = -1;
+ if (op_errstr) {
+ gf_asprintf(op_errstr,
+ "'%s' file format "
+ "not valid.",
+ tagpath);
+ }
+ goto out;
}
-set_type:
- ret = dict_set_int32 (dict, "type", type);
- if (ret < 0)
- goto out;
+ ret = cli_add_key_group_value(dict, "key", tok_key, opt_count,
+ op_errstr);
+ if (ret != 0) {
+ goto out;
+ }
+ ret = cli_add_key_group_value(dict, "value", tok_val, opt_count,
+ op_errstr);
+ if (ret != 0) {
+ goto out;
+ }
+ }
- *options = dict;
-out:
- if (ret < 0) {
- if (dict)
- dict_destroy (dict);
+ if (!opt_count) {
+ ret = -1;
+ if (op_errstr) {
+ gf_asprintf(op_errstr, "'%s' file format not valid.", tagpath);
}
+ goto out;
+ }
+ ret = dict_set_int32(dict, "count", opt_count);
+out:
- return ret;
+ GF_FREE(tagpath);
+
+ if (fp)
+ fclose(fp);
+
+ return ret;
}
int32_t
-cli_cmd_volume_set_parse (const char **words, int wordcount, dict_t **options)
+cli_cmd_volume_set_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **op_errstr)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- int ret = -1;
- int count = 0;
- char *key = NULL;
- char *value = NULL;
- int i = 0;
- char str[50] = {0,};
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+ int count = 0;
+ char *key = NULL;
+ char *value = NULL;
+ int i = 0;
+ char str[50] = {
+ 0,
+ };
+ const char *question = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+
+ if (!dict)
+ goto out;
- GF_ASSERT (words);
- GF_ASSERT (options);
+ if (wordcount < 3)
+ goto out;
- dict = dict_new ();
+ volname = (char *)words[2];
- if (!dict)
- goto out;
+ GF_ASSERT(volname);
- if (wordcount < 3)
- goto out;
+ ret = dict_set_str(dict, "volname", volname);
- volname = (char *)words[2];
+ if (ret)
+ goto out;
- GF_ASSERT (volname);
+ if (!strcmp(volname, "all")) {
+ ret = dict_set_str(dict, "globalname", "All");
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set on global key failed.");
+ goto out;
+ }
- ret = dict_set_str (dict, "volname", volname);
+ ret = dict_set_int32(dict, "hold_global_locks", _gf_true);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set on global key failed.");
+ goto out;
+ }
+ }
+ if ((!strcmp(volname, "help") || !strcmp(volname, "help-xml")) &&
+ wordcount == 3) {
+ ret = dict_set_str(dict, volname, volname);
if (ret)
- goto out;
+ goto out;
- if ((!strcmp (volname, "help") || !strcmp (volname, "help-xml"))
- && wordcount == 3 ) {
- ret = dict_set_str (dict, volname, volname);
- if (ret)
- goto out;
- } else if (wordcount < 5) {
- ret = -1;
- goto out;
+ } else if (wordcount < 5) {
+ ret = -1;
+ goto out;
+
+ } else if (wordcount == 5 && cli_is_key_spl((char *)words[3])) {
+ key = (char *)words[3];
+ value = (char *)words[4];
+ if (!key || !value) {
+ ret = -1;
+ goto out;
}
- for (i = 3; i < wordcount; i+=2) {
+ ret = gf_strip_whitespace(value, strlen(value));
+ if (ret == -1)
+ goto out;
- key = (char *) words[i];
- value = (char *) words[i+1];
+ if (strlen(value) == 0) {
+ ret = -1;
+ goto out;
+ }
- if ( !key || !value) {
- ret = -1;
- goto out;
- }
+ ret = cli_add_key_group(dict, key, value, op_errstr);
+ if (ret == 0)
+ *options = dict;
+ goto out;
+ }
- count++;
+ for (i = 3; i < wordcount; i += 2) {
+ key = (char *)words[i];
+ value = (char *)words[i + 1];
- ret = gf_strip_whitespace (value, strlen (value));
- if (ret == -1)
- goto out;
+ if (!key || !value) {
+ ret = -1;
+ goto out;
+ }
- sprintf (str, "key%d", count);
- ret = dict_set_str (dict, str, key);
- if (ret)
- goto out;
+ count++;
- sprintf (str, "value%d", count);
- ret = dict_set_str (dict, str, value);
+ if (fnmatch("user.*", key, FNM_NOESCAPE) != 0) {
+ ret = gf_strip_whitespace(value, strlen(value));
+ if (ret == -1)
+ goto out;
+ }
- if (ret)
- goto out;
+ if (strlen(value) == 0) {
+ ret = -1;
+ goto out;
}
- ret = dict_set_int32 (dict, "count", wordcount-3);
+ if (cli_is_key_spl(key)) {
+ ret = -1;
+ goto out;
+ }
+
+ sprintf(str, "key%d", count);
+ ret = dict_set_str(dict, str, key);
+ if (ret)
+ goto out;
+
+ sprintf(str, "value%d", count);
+ ret = dict_set_str(dict, str, value);
if (ret)
+ goto out;
+
+ if ((!strcmp(key, "cluster.enable-shared-storage")) &&
+ (!strcmp(value, "disable"))) {
+ question =
+ "Disabling cluster.enable-shared-storage "
+ "will delete the shared storage volume"
+ "(gluster_shared_storage), which is used "
+ "by snapshot scheduler, geo-replication "
+ "and NFS-Ganesha. Do you still want to "
+ "continue?";
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Operation "
+ "cancelled, exiting");
+ *op_errstr = gf_strdup("Aborted by user.");
+ ret = -1;
+ goto out;
+ }
+ }
+ if ((!strcmp(key, "nfs.disable")) && (!strcmp(value, "off"))) {
+ question =
+ "Gluster NFS is being deprecated in favor "
+ "of NFS-Ganesha Enter \"yes\" to continue "
+ "using Gluster NFS";
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Operation "
+ "cancelled, exiting");
+ *op_errstr = gf_strdup("Aborted by user.");
+ ret = -1;
goto out;
+ }
+ }
+ }
- *options = dict;
+ ret = dict_set_int32(dict, "count", wordcount - 3);
+
+ if (ret)
+ goto out;
+
+ *options = dict;
out:
- if (ret) {
- if (dict)
- dict_destroy (dict);
- }
+ if (ret && dict)
+ dict_unref(dict);
- return ret;
+ return ret;
}
int32_t
-cli_cmd_volume_add_brick_parse (const char **words, int wordcount,
- dict_t **options)
+cli_cmd_volume_add_brick_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, int *ret_type)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- int ret = -1;
- int brick_count = 0, brick_index = 0;
- char *bricks = NULL;
- char *opwords_cl[] = { "replica", "stripe", NULL };
- gf1_cluster_type type = GF_CLUSTER_TYPE_NONE;
- int count = 1;
- char *w = NULL;
- int index;
-
- GF_ASSERT (words);
- GF_ASSERT (options);
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+ int brick_count = 0, brick_index = 0;
+ char *bricks = NULL;
+ static char *opwords_cl[] = {"replica", "stripe", NULL};
+ gf1_cluster_type type = GF_CLUSTER_TYPE_NONE;
+ int count = 1;
+ int arbiter_count = 0;
+ char *w = NULL;
+ int index;
+ gf_boolean_t is_force = _gf_false;
+ int wc = wordcount;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question = NULL;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+
+ if (!dict)
+ goto out;
- dict = dict_new ();
+ if (wordcount < 3)
+ goto out;
- if (!dict)
- goto out;
+ volname = (char *)words[2];
- if (wordcount < 3)
- goto out;
+ GF_ASSERT(volname);
- volname = (char *)words[2];
+ ret = dict_set_str(dict, "volname", volname);
- GF_ASSERT (volname);
+ if (ret)
+ goto out;
- ret = dict_set_str (dict, "volname", volname);
+ if (wordcount < 4) {
+ ret = -1;
+ goto out;
+ }
+ if (wordcount < 6) {
+ /* seems no options are given, go directly to the parse_brick */
+ brick_index = 3;
+ type = GF_CLUSTER_TYPE_NONE;
+ goto parse_bricks;
+ }
+ w = str_getunamb(words[3], opwords_cl);
+ if (!w) {
+ type = GF_CLUSTER_TYPE_NONE;
+ index = 3;
+ } else if ((strcmp(w, "replica")) == 0) {
+ type = GF_CLUSTER_TYPE_REPLICATE;
+ count = strtol(words[4], NULL, 0);
+ if (!count || (count < 2)) {
+ cli_err("replica count should be greater than 1");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32(dict, "replica-count", count);
if (ret)
- goto out;
-
- if (wordcount < 4) {
+ goto out;
+ index = 5;
+ if (words[index] && !strcmp(words[index], "arbiter")) {
+ arbiter_count = strtol(words[6], NULL, 0);
+ if (arbiter_count != 1 || count != 3) {
+ cli_err(
+ "For arbiter configuration, replica "
+ "count must be 3 and arbiter count "
+ "must be 1. The 3rd brick of the "
+ "replica will be the arbiter");
ret = -1;
goto out;
- }
- if (wordcount < 6) {
- /* seems no options are given, go directly to the parse_brick */
- brick_index = 3;
- type = GF_CLUSTER_TYPE_NONE;
- goto parse_bricks;
+ }
+ ret = dict_set_int32(dict, "arbiter-count", arbiter_count);
+ if (ret)
+ goto out;
+ index = 7;
}
- w = str_getunamb (words[3], opwords_cl);
- if (!w) {
- type = GF_CLUSTER_TYPE_NONE;
- index = 3;
- } else if ((strcmp (w, "replica")) == 0) {
- type = GF_CLUSTER_TYPE_REPLICATE;
- if (wordcount < 5) {
- ret = -1;
- goto out;
- }
- count = strtol (words[4], NULL, 0);
- if (!count || (count < 2)) {
- cli_err ("replica count should be greater than 1");
- ret = -1;
- goto out;
- }
- ret = dict_set_int32 (dict, "replica-count", count);
- if (ret)
- goto out;
- index = 5;
- } else if ((strcmp (w, "stripe")) == 0) {
- type = GF_CLUSTER_TYPE_STRIPE;
- if (wordcount < 5) {
- ret = -1;
- goto out;
+ if (count == 2) {
+ if (strcmp(words[wordcount - 1], "force")) {
+ question =
+ "Replica 2 volumes are prone to "
+ "split-brain. Use Arbiter or "
+ "Replica 3 to avoid this. See: "
+ "http://docs.gluster.org/en/latest/Administrator%20Guide/"
+ "Split%20brain%20and%20ways%20to%20deal%20with%20it/."
+ "\nDo you still want to continue?\n";
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Add brick"
+ " cancelled, exiting");
+ ret = -1;
+ goto out;
}
- count = strtol (words[4], NULL, 0);
- if (!count || (count < 2)) {
- cli_err ("stripe count should be greater than 1");
- ret = -1;
- goto out;
- }
- ret = dict_set_int32 (dict, "stripe-count", count);
- if (ret)
- goto out;
- index = 5;
- } else {
- GF_ASSERT (!"opword mismatch");
- ret = -1;
- goto out;
+ }
}
+ } else if ((strcmp(w, "stripe")) == 0) {
+ cli_err("stripe option not supported");
+ goto out;
+ } else {
+ GF_ASSERT(!"opword mismatch");
+ ret = -1;
+ goto out;
+ }
- brick_index = index;
+ brick_index = index;
parse_bricks:
- ret = cli_cmd_bricks_parse (words, wordcount, brick_index, &bricks,
- &brick_count);
- if (ret)
- goto out;
- ret = dict_set_dynstr (dict, "bricks", bricks);
- if (ret)
- goto out;
+ if (strcmp(words[wordcount - 1], "force") == 0) {
+ is_force = _gf_true;
+ wc = wordcount - 1;
+ }
- ret = dict_set_int32 (dict, "count", brick_count);
+ ret = cli_cmd_bricks_parse(words, wc, brick_index, &bricks, &brick_count);
+ if (ret)
+ goto out;
- if (ret)
- goto out;
+ ret = dict_set_dynstr(dict, "bricks", bricks);
+ if (ret)
+ goto out;
- *options = dict;
+ ret = dict_set_int32(dict, "count", brick_count);
+
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32(dict, "force", is_force);
+ if (ret)
+ goto out;
+
+ *options = dict;
out:
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Unable to parse add-brick CLI");
- if (dict)
- dict_destroy (dict);
- }
+ if (ret_type)
+ *ret_type = type;
- return ret;
-}
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to parse add-brick CLI");
+ if (dict)
+ dict_unref(dict);
+ }
+ return ret;
+}
int32_t
-cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
- dict_t **options, int *question)
+cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options,
+ int *question, int *brick_count,
+ int32_t *comm)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- char *delimiter = NULL;
- int ret = -1;
- char key[50];
- int brick_count = 0, brick_index = 0;
- int32_t tmp_index = 0;
- int32_t j = 0;
- char *tmp_brick = NULL;
- char *tmp_brick1 = NULL;
- char *type_opword[] = { "replica", NULL };
- char *opwords[] = { "start", "commit", "stop", "status",
- "force", NULL };
- char *w = NULL;
- int32_t command = GF_OP_CMD_NONE;
- long count = 0;
-
- GF_ASSERT (words);
- GF_ASSERT (options);
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char *delimiter = NULL;
+ int ret = -1;
+ char key[50];
+ int brick_index = 0;
+ int32_t tmp_index = 0;
+ int32_t j = 0;
+ char *tmp_brick = NULL;
+ char *tmp_brick1 = NULL;
+ static char *type_opword[] = {"replica", NULL};
+ static char *opwords[] = {"start", "commit", "stop",
+ "status", "force", NULL};
+ char *w = NULL;
+ int32_t command = GF_OP_CMD_NONE;
+ long count = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *ques = NULL;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ if (wordcount < 5)
+ goto out;
- if (wordcount < 4)
- goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
- dict = dict_new ();
- if (!dict)
- goto out;
+ volname = (char *)words[2];
- volname = (char *)words[2];
+ GF_ASSERT(volname);
- GF_ASSERT (volname);
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
- ret = dict_set_str (dict, "volname", volname);
- if (ret)
- goto out;
+ brick_index = 3;
+ w = str_getunamb(words[3], type_opword);
+ if (w && !strcmp("replica", w)) {
+ if (wordcount < 6) {
+ ret = -1;
+ goto out;
+ }
+ count = strtol(words[4], NULL, 0);
+ if (count < 1) {
+ cli_err(
+ "replica count should be greater than 0 in "
+ "case of remove-brick");
+ ret = -1;
+ goto out;
+ }
- brick_index = 3;
- w = str_getunamb (words[3], type_opword);
- if (w && !strcmp ("replica", w)) {
- if (wordcount < 5) {
- ret = -1;
- goto out;
- }
- count = strtol (words[4], NULL, 0);
- if (count < 1) {
- cli_err ("replica count should be greater than 0 in "
- "case of remove-brick");
- ret = -1;
- goto out;
+ if (count == 2) {
+ if (strcmp(words[wordcount - 1], "force")) {
+ ques =
+ "Replica 2 volumes are prone to "
+ "split-brain. Use Arbiter or Replica 3 "
+ "to avoid this. See: "
+ "http://docs.gluster.org/en/latest/Administrator%20Guide/"
+ "Split%20brain%20and%20ways%20to%20deal%20with%20it/."
+ "\nDo you still want to continue?\n";
+ answer = cli_cmd_get_confirmation(state, ques);
+ if (GF_ANSWER_NO == answer) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Remove "
+ "brick cancelled, exiting");
+ ret = -1;
+ goto out;
}
-
- ret = dict_set_int32 (dict, "replica-count", count);
- if (ret)
- goto out;
- brick_index = 5;
- } else if (w) {
- GF_ASSERT (!"opword mismatch");
+ }
}
- w = str_getunamb (words[wordcount - 1], opwords);
- if (!w) {
- /* Should be default 'force' */
- command = GF_OP_CMD_COMMIT_FORCE;
- if (question)
- *question = 1;
+ ret = dict_set_int32(dict, "replica-count", count);
+ if (ret)
+ goto out;
+ brick_index = 5;
+ } else if (w) {
+ GF_ASSERT(!"opword mismatch");
+ }
+
+ w = str_getunamb(words[wordcount - 1], opwords);
+ if (!w) {
+ ret = -1;
+ goto out;
+ } else {
+ /* handled this option */
+ wordcount--;
+ if (!strcmp("start", w)) {
+ command = GF_OP_CMD_START;
+ if (question)
+ *question = 1;
+ } else if (!strcmp("commit", w)) {
+ command = GF_OP_CMD_COMMIT;
+ } else if (!strcmp("stop", w)) {
+ command = GF_OP_CMD_STOP;
+ } else if (!strcmp("status", w)) {
+ command = GF_OP_CMD_STATUS;
+ } else if (!strcmp("force", w)) {
+ command = GF_OP_CMD_COMMIT_FORCE;
+ if (question)
+ *question = 1;
} else {
- /* handled this option */
- wordcount--;
- if (!strcmp ("start", w)) {
- command = GF_OP_CMD_START;
- } else if (!strcmp ("commit", w)) {
- command = GF_OP_CMD_COMMIT;
- if (question)
- *question = 1;
- } else if (!strcmp ("stop", w)) {
- command = GF_OP_CMD_STOP;
- } else if (!strcmp ("status", w)) {
- command = GF_OP_CMD_STATUS;
- } else if (!strcmp ("force", w)) {
- command = GF_OP_CMD_COMMIT_FORCE;
- if (question)
- *question = 1;
- } else {
- GF_ASSERT (!"opword mismatch");
- ret = -1;
- goto out;
- }
+ GF_ASSERT(!"opword mismatch");
+ ret = -1;
+ goto out;
}
+ }
- if (wordcount < 4) {
- ret = -1;
- goto out;
- }
+ ret = dict_set_int32(dict, "command", command);
+ if (ret)
+ gf_log("cli", GF_LOG_INFO, "failed to set 'command' %d", command);
- ret = dict_set_int32 (dict, "command", command);
- if (ret)
- gf_log ("cli", GF_LOG_INFO, "failed to set 'command' %d",
- command);
+ tmp_index = brick_index;
+ tmp_brick = GF_MALLOC(2048 * sizeof(*tmp_brick), gf_common_mt_char);
+ if (!tmp_brick) {
+ gf_log("", GF_LOG_ERROR,
+ "cli_cmd_volume_remove_brick_parse: "
+ "Unable to get memory");
+ ret = -1;
+ goto out;
+ }
- tmp_index = brick_index;
- tmp_brick = GF_MALLOC(2048 * sizeof(*tmp_brick), gf_common_mt_char);
+ tmp_brick1 = GF_MALLOC(2048 * sizeof(*tmp_brick1), gf_common_mt_char);
- if (!tmp_brick) {
- gf_log ("",GF_LOG_ERROR,"cli_cmd_volume_remove_brick_parse: "
- "Unable to get memory");
- ret = -1;
+ if (!tmp_brick1) {
+ gf_log("", GF_LOG_ERROR,
+ "cli_cmd_volume_remove_brick_parse: "
+ "Unable to get memory");
+ ret = -1;
+ goto out;
+ }
+
+ while (brick_index < wordcount) {
+ if (validate_brick_name((char *)words[brick_index])) {
+ cli_err(
+ "wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ words[brick_index]);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr(words[brick_index], ':');
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret)
goto out;
}
- tmp_brick1 = GF_MALLOC(2048 * sizeof(*tmp_brick1), gf_common_mt_char);
-
- if (!tmp_brick1) {
- gf_log ("",GF_LOG_ERROR,"cli_cmd_volume_remove_brick_parse: "
- "Unable to get memory");
+ j = tmp_index;
+ strcpy(tmp_brick, words[brick_index]);
+ while (j < brick_index) {
+ strcpy(tmp_brick1, words[j]);
+ if (!(strcmp(tmp_brick, tmp_brick1))) {
+ gf_log("", GF_LOG_ERROR,
+ "Duplicate bricks"
+ " found %s",
+ words[brick_index]);
+ cli_err("Duplicate bricks found %s", words[brick_index]);
ret = -1;
goto out;
+ }
+ j++;
}
+ snprintf(key, 50, "brick%d", ++(*brick_count));
+ ret = dict_set_str(dict, key, (char *)words[brick_index++]);
- while (brick_index < wordcount) {
- if (validate_brick_name ((char *)words[brick_index])) {
- cli_err ("wrong brick type: %s, use <HOSTNAME>:"
- "<export-dir-abs-path>", words[brick_index]);
- ret = -1;
- goto out;
- } else {
- delimiter = strrchr(words[brick_index], ':');
- ret = gf_canonicalize_path (delimiter + 1);
- if (ret)
- goto out;
- }
-
- j = tmp_index;
- strcpy(tmp_brick, words[brick_index]);
- while ( j < brick_index) {
- strcpy(tmp_brick1, words[j]);
- if (!(strcmp (tmp_brick, tmp_brick1))) {
- gf_log("",GF_LOG_ERROR, "Duplicate bricks"
- " found %s", words[brick_index]);
- cli_err("Duplicate bricks found %s",
- words[brick_index]);
- ret = -1;
- goto out;
- }
- j++;
- }
- snprintf (key, 50, "brick%d", ++brick_count);
- ret = dict_set_str (dict, key, (char *)words[brick_index++]);
-
- if (ret)
- goto out;
- }
+ if (ret)
+ goto out;
+ }
- ret = dict_set_int32 (dict, "count", brick_count);
+ if (command != GF_OP_CMD_STATUS && command != GF_OP_CMD_STOP) {
+ ret = dict_set_int32(dict, "count", *brick_count);
if (ret)
- goto out;
+ goto out;
+ }
- *options = dict;
+ *options = dict;
out:
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Unable to parse remove-brick CLI");
- if (dict)
- dict_destroy (dict);
- }
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to parse remove-brick CLI");
+ if (dict)
+ dict_unref(dict);
+ }
- if (tmp_brick)
- GF_FREE (tmp_brick);
- if (tmp_brick1)
- GF_FREE (tmp_brick1);
+ GF_FREE(tmp_brick);
+ GF_FREE(tmp_brick1);
- return ret;
-}
+ *comm = command;
+ return ret;
+}
int32_t
-cli_cmd_volume_replace_brick_parse (const char **words, int wordcount,
- dict_t **options)
+cli_cmd_brick_op_validate_bricks(const char **words, dict_t *dict, int src,
+ int dst)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- int ret = -1;
- int op_index = 0;
- char *delimiter = NULL;
- gf1_cli_replace_op replace_op = GF_REPLACE_OP_NONE;
- char *opwords[] = { "start", "commit", "pause", "abort", "status",
- NULL };
- char *w = NULL;
+ int ret = -1;
+ char *delimiter = NULL;
+
+ if (validate_brick_name((char *)words[src])) {
+ cli_err(
+ "wrong brick type: %s, use "
+ "<HOSTNAME>:<export-dir-abs-path>",
+ words[3]);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr((char *)words[src], '/');
+ ret = gf_canonicalize_path(delimiter);
+ if (ret)
+ goto out;
+ }
- GF_ASSERT (words);
- GF_ASSERT (options);
+ ret = dict_set_str(dict, "src-brick", (char *)words[src]);
+ if (ret)
+ goto out;
- dict = dict_new ();
+ if (dst == -1) {
+ ret = 0;
+ goto out;
+ }
- if (!dict)
- goto out;
+ if (validate_brick_name((char *)words[dst])) {
+ cli_err(
+ "wrong brick type: %s, use "
+ "<HOSTNAME>:<export-dir-abs-path>",
+ words[dst]);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr((char *)words[dst], '/');
+ ret = gf_canonicalize_path(delimiter);
+ if (ret)
+ goto out;
+ }
- if (wordcount < 3)
- goto out;
+ ret = dict_set_str(dict, "dst-brick", (char *)words[dst]);
+ if (ret)
+ goto out;
+ ret = 0;
+out:
+ return ret;
+}
- volname = (char *)words[2];
+int32_t
+cli_cmd_volume_reset_brick_parse(const char **words, int wordcount,
+ dict_t **options)
+{
+ int ret = -1;
+ char *volname = NULL;
+ dict_t *dict = NULL;
- GF_ASSERT (volname);
+ if (wordcount < 5 || wordcount > 7)
+ goto out;
- ret = dict_set_str (dict, "volname", volname);
+ dict = dict_new();
- if (ret)
- goto out;
+ if (!dict)
+ goto out;
- if (wordcount < 4) {
- ret = -1;
- goto out;
- }
+ volname = (char *)words[2];
- if (validate_brick_name ((char *)words[3])) {
- cli_err ("wrong brick type: %s, use "
- "<HOSTNAME>:<export-dir-abs-path>", words[3]);
- ret = -1;
- goto out;
- } else {
- delimiter = strrchr ((char *)words[3], ':');
- ret = gf_canonicalize_path (delimiter + 1);
- if (ret)
- goto out;
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ if (wordcount == 5) {
+ if (strcmp(words[4], "start")) {
+ cli_err(
+ "Invalid option '%s' for reset-brick. Please "
+ "enter valid reset-brick command",
+ words[4]);
+ ret = -1;
+ goto out;
}
- ret = dict_set_str (dict, "src-brick", (char *)words[3]);
+ ret = cli_cmd_brick_op_validate_bricks(words, dict, 3, -1);
if (ret)
- goto out;
+ goto out;
- if (wordcount < 5) {
- ret = -1;
- goto out;
+ ret = dict_set_str(dict, "operation", "GF_RESET_OP_START");
+ if (ret)
+ goto out;
+ } else if (wordcount == 6) {
+ if (strcmp(words[5], "commit")) {
+ cli_err(
+ "Invalid option '%s' for reset-brick. Please "
+ "enter valid reset-brick command",
+ words[5]);
+ ret = -1;
+ goto out;
}
- if (validate_brick_name ((char *)words[4])) {
- cli_err ("wrong brick type: %s, use "
- "<HOSTNAME>:<export-dir-abs-path>", words[4]);
- ret = -1;
- goto out;
- } else {
- delimiter = strrchr ((char *)words[4], ':');
- ret = gf_canonicalize_path (delimiter + 1);
- if (ret)
- goto out;
- }
+ ret = cli_cmd_brick_op_validate_bricks(words, dict, 3, 4);
+ if (ret)
+ goto out;
+ ret = dict_set_str(dict, "operation", "GF_RESET_OP_COMMIT");
+ if (ret)
+ goto out;
+ } else if (wordcount == 7) {
+ if (strcmp(words[5], "commit") || strcmp(words[6], "force")) {
+ cli_err(
+ "Invalid option '%s %s' for reset-brick. Please "
+ "enter valid reset-brick command",
+ words[5], words[6]);
+ ret = -1;
+ goto out;
+ }
- ret = dict_set_str (dict, "dst-brick", (char *)words[4]);
+ ret = cli_cmd_brick_op_validate_bricks(words, dict, 3, 4);
+ if (ret)
+ goto out;
+ ret = dict_set_str(dict, "operation", "GF_RESET_OP_COMMIT_FORCE");
if (ret)
- goto out;
+ goto out;
+ }
- op_index = 5;
- if ((wordcount < (op_index + 1))) {
- ret = -1;
- goto out;
- }
+ *options = dict;
- w = str_getunamb (words[op_index], opwords);
+out:
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to parse reset-brick CLI");
+ if (dict)
+ dict_unref(dict);
+ }
- if (!w) {
- } else if (!strcmp ("start", w)) {
- replace_op = GF_REPLACE_OP_START;
- } else if (!strcmp ("commit", w)) {
- replace_op = GF_REPLACE_OP_COMMIT;
- } else if (!strcmp ("pause", w)) {
- replace_op = GF_REPLACE_OP_PAUSE;
- } else if (!strcmp ("abort", w)) {
- replace_op = GF_REPLACE_OP_ABORT;
- } else if (!strcmp ("status", w)) {
- replace_op = GF_REPLACE_OP_STATUS;
- } else
- GF_ASSERT (!"opword mismatch");
-
- /* commit force option */
-
- op_index = 6;
-
- if (wordcount > (op_index + 1)) {
- ret = -1;
- goto out;
- }
+ return ret;
+}
- if (wordcount == (op_index + 1)) {
- if (replace_op != GF_REPLACE_OP_COMMIT) {
- ret = -1;
- goto out;
- }
- if (!strcmp ("force", words[op_index])) {
- replace_op = GF_REPLACE_OP_COMMIT_FORCE;
- }
- }
+int32_t
+cli_cmd_volume_replace_brick_parse(const char **words, int wordcount,
+ dict_t **options)
+{
+ int ret = -1;
+ char *volname = NULL;
+ dict_t *dict = NULL;
- if (replace_op == GF_REPLACE_OP_NONE) {
- ret = -1;
- goto out;
- }
+ GF_ASSERT(words);
+ GF_ASSERT(options);
- ret = dict_set_int32 (dict, "operation", (int32_t) replace_op);
+ if (wordcount != 7) {
+ ret = -1;
+ goto out;
+ }
- if (ret)
- goto out;
+ dict = dict_new();
+ if (!dict) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to allocate dictionary");
+ goto out;
+ }
- *options = dict;
+ volname = (char *)words[2];
+
+ GF_ASSERT(volname);
+
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_brick_op_validate_bricks(words, dict, 3, 4);
+ if (ret)
+ goto out;
+
+ /* commit force option */
+ if (strcmp("commit", words[5]) || strcmp("force", words[6])) {
+ cli_err(
+ "Invalid option '%s' '%s' for replace-brick. Please "
+ "enter valid replace-brick command",
+ words[5], words[6]);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "operation", "GF_REPLACE_OP_COMMIT_FORCE");
+ if (ret)
+ goto out;
+
+ *options = dict;
out:
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Unable to parse replace-brick CLI");
- if (dict)
- dict_destroy (dict);
- }
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to parse reset-brick CLI");
+ if (dict)
+ dict_unref(dict);
+ }
- return ret;
+ return ret;
}
int32_t
-cli_cmd_log_filename_parse (const char **words, int wordcount, dict_t **options)
+cli_cmd_log_filename_parse(const char **words, int wordcount, dict_t **options)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- char *str = NULL;
- int ret = -1;
- char *delimiter = NULL;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char *str = NULL;
+ int ret = -1;
+ char *delimiter = NULL;
- GF_ASSERT (words);
- GF_ASSERT (options);
+ GF_ASSERT(words);
+ GF_ASSERT(options);
- dict = dict_new ();
- if (!dict)
- goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
- volname = (char *)words[3];
- GF_ASSERT (volname);
+ volname = (char *)words[3];
+ GF_ASSERT(volname);
- ret = dict_set_str (dict, "volname", volname);
- if (ret)
- goto out;
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
- str = (char *)words[4];
- if (strchr (str, ':')) {
- delimiter = strchr (words[4], ':');
- if (!delimiter || delimiter == words[4]
- || *(delimiter+1) != '/') {
- cli_err ("wrong brick type: %s, use <HOSTNAME>:"
- "<export-dir-abs-path>", words[4]);
- ret = -1;
- goto out;
- } else {
- ret = gf_canonicalize_path (delimiter + 1);
- if (ret)
- goto out;
- }
- ret = dict_set_str (dict, "brick", str);
- if (ret)
- goto out;
- /* Path */
- str = (char *)words[5];
- ret = dict_set_str (dict, "path", str);
- if (ret)
- goto out;
+ str = (char *)words[4];
+ if (strchr(str, ':')) {
+ delimiter = strchr(words[4], ':');
+ if (!delimiter || delimiter == words[4] || *(delimiter + 1) != '/') {
+ cli_err(
+ "wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ words[4]);
+ ret = -1;
+ goto out;
} else {
- ret = dict_set_str (dict, "path", str);
- if (ret)
- goto out;
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret)
+ goto out;
}
+ ret = dict_set_str(dict, "brick", str);
+ if (ret)
+ goto out;
+ /* Path */
+ str = (char *)words[5];
+ ret = dict_set_str(dict, "path", str);
+ if (ret)
+ goto out;
+ } else {
+ ret = dict_set_str(dict, "path", str);
+ if (ret)
+ goto out;
+ }
- *options = dict;
+ *options = dict;
out:
- if (ret && dict)
- dict_destroy (dict);
+ if (ret && dict)
+ dict_unref(dict);
- return ret;
+ return ret;
}
int32_t
-cli_cmd_log_level_parse (const char **words, int worcount, dict_t **options)
+cli_cmd_log_level_parse(const char **words, int worcount, dict_t **options)
{
- dict_t *dict = NULL;
- int ret = -1;
-
- GF_ASSERT (words);
- GF_ASSERT (options);
-
- /*
- * loglevel command format:
- * > volume log level <VOL> <XLATOR[*]> <LOGLEVEL>
- * > volume log level colon-o posix WARNING
- * > volume log level colon-o replicate* DEBUG
- * > volume log level coon-o * TRACE
- */
-
- GF_ASSERT ((strncmp(words[0], "volume", 6) == 0));
- GF_ASSERT ((strncmp(words[1], "log", 3) == 0));
- GF_ASSERT ((strncmp(words[2], "level", 5) == 0));
-
- ret = glusterd_check_log_level(words[5]);
- if (ret == -1) {
- cli_err("Invalid log level [%s] specified", words[5]);
- cli_err("Valid values for loglevel: (DEBUG|WARNING|ERROR"
- "|CRITICAL|NONE|TRACE)");
- goto out;
- }
+ dict_t *dict = NULL;
+ int ret = -1;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ /*
+ * loglevel command format:
+ * > volume log level <VOL> <XLATOR[*]> <LOGLEVEL>
+ * > volume log level colon-o posix WARNING
+ * > volume log level colon-o replicate* DEBUG
+ * > volume log level coon-o * TRACE
+ */
+
+ GF_ASSERT((strncmp(words[0], "volume", 6) == 0));
+ GF_ASSERT((strncmp(words[1], "log", 3) == 0));
+ GF_ASSERT((strncmp(words[2], "level", 5) == 0));
+
+ ret = glusterd_check_log_level(words[5]);
+ if (ret == -1) {
+ cli_err("Invalid log level [%s] specified", words[5]);
+ cli_err(
+ "Valid values for loglevel: (DEBUG|WARNING|ERROR"
+ "|CRITICAL|NONE|TRACE)");
+ goto out;
+ }
- dict = dict_new ();
- if (!dict)
- goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
- GF_ASSERT(words[3]);
- GF_ASSERT(words[4]);
+ GF_ASSERT(words[3]);
+ GF_ASSERT(words[4]);
- ret = dict_set_str (dict, "volname", (char *)words[3]);
- if (ret)
- goto out;
+ ret = dict_set_str(dict, "volname", (char *)words[3]);
+ if (ret)
+ goto out;
- ret = dict_set_str (dict, "xlator", (char *)words[4]);
- if (ret)
- goto out;
+ ret = dict_set_str(dict, "xlator", (char *)words[4]);
+ if (ret)
+ goto out;
- ret = dict_set_str (dict, "loglevel", (char *)words[5]);
- if (ret)
- goto out;
+ ret = dict_set_str(dict, "loglevel", (char *)words[5]);
+ if (ret)
+ goto out;
- *options = dict;
+ *options = dict;
- out:
- if (ret && dict)
- dict_destroy (dict);
+out:
+ if (ret && dict)
+ dict_unref(dict);
- return ret;
+ return ret;
}
int32_t
-cli_cmd_log_locate_parse (const char **words, int wordcount, dict_t **options)
+cli_cmd_log_locate_parse(const char **words, int wordcount, dict_t **options)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- char *str = NULL;
- int ret = -1;
- char *delimiter = NULL;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char *str = NULL;
+ int ret = -1;
+ char *delimiter = NULL;
- GF_ASSERT (words);
- GF_ASSERT (options);
+ GF_ASSERT(words);
+ GF_ASSERT(options);
- dict = dict_new ();
- if (!dict)
- goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
- volname = (char *)words[3];
- GF_ASSERT (volname);
+ volname = (char *)words[3];
+ GF_ASSERT(volname);
- ret = dict_set_str (dict, "volname", volname);
- if (ret)
- goto out;
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
- if (words[4]) {
- delimiter = strchr (words[4], ':');
- if (!delimiter || delimiter == words[4]
- || *(delimiter+1) != '/') {
- cli_err ("wrong brick type: %s, use <HOSTNAME>:"
- "<export-dir-abs-path>", words[4]);
- ret = -1;
- goto out;
- } else {
- ret = gf_canonicalize_path (delimiter + 1);
- if (ret)
- goto out;
- }
- str = (char *)words[4];
- ret = dict_set_str (dict, "brick", str);
- if (ret)
- goto out;
+ if (words[4]) {
+ delimiter = strchr(words[4], ':');
+ if (!delimiter || delimiter == words[4] || *(delimiter + 1) != '/') {
+ cli_err(
+ "wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ words[4]);
+ ret = -1;
+ goto out;
+ } else {
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret)
+ goto out;
}
+ str = (char *)words[4];
+ ret = dict_set_str(dict, "brick", str);
+ if (ret)
+ goto out;
+ }
- *options = dict;
+ *options = dict;
out:
- if (ret && dict)
- dict_destroy (dict);
+ if (ret && dict)
+ dict_unref(dict);
- return ret;
+ return ret;
}
int32_t
-cli_cmd_log_rotate_parse (const char **words, int wordcount, dict_t **options)
+cli_cmd_log_rotate_parse(const char **words, int wordcount, dict_t **options)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- char *str = NULL;
- int ret = -1;
- char *delimiter = NULL;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char *str = NULL;
+ int ret = -1;
+ char *delimiter = NULL;
- GF_ASSERT (words);
- GF_ASSERT (options);
+ GF_ASSERT(words);
+ GF_ASSERT(options);
- dict = dict_new ();
- if (!dict)
- goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
- volname = (char *)words[3];
- GF_ASSERT (volname);
+ if (strcmp("rotate", words[3]) == 0)
+ volname = (char *)words[2];
+ GF_ASSERT(volname);
- ret = dict_set_str (dict, "volname", volname);
- if (ret)
- goto out;
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
- if (words[4]) {
- delimiter = strchr (words[4], ':');
- if (!delimiter || delimiter == words[4]
- || *(delimiter+1) != '/') {
- cli_err ("wrong brick type: %s, use <HOSTNAME>:"
- "<export-dir-abs-path>", words[4]);
- ret = -1;
- goto out;
- } else {
- ret = gf_canonicalize_path (delimiter + 1);
- if (ret)
- goto out;
- }
- str = (char *)words[4];
- ret = dict_set_str (dict, "brick", str);
- if (ret)
- goto out;
+ if (words[4]) {
+ delimiter = strchr(words[4], ':');
+ if (!delimiter || delimiter == words[4] || *(delimiter + 1) != '/') {
+ cli_err(
+ "wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ words[4]);
+ ret = -1;
+ goto out;
+ } else {
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret)
+ goto out;
}
+ str = (char *)words[4];
+ ret = dict_set_str(dict, "brick", str);
+ if (ret)
+ goto out;
+ }
- *options = dict;
+ *options = dict;
out:
- if (ret && dict)
- dict_destroy (dict);
+ if (ret && dict)
+ dict_unref(dict);
- return ret;
+ return ret;
}
static gf_boolean_t
-gsyncd_url_check (const char *w)
+gsyncd_url_check(const char *w)
{
- return !!strpbrk (w, ":/");
+ return !!strpbrk(w, ":/");
}
static gf_boolean_t
-gsyncd_glob_check (const char *w)
+valid_slave_gsyncd_url(const char *w)
{
- return !!strpbrk (w, "*?[");
+ if (strstr(w, ":::"))
+ return _gf_false;
+ else if (strstr(w, "::"))
+ return _gf_true;
+ else
+ return _gf_false;
}
-int32_t
-cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **options)
+static gf_boolean_t
+gsyncd_glob_check(const char *w)
{
- int32_t ret = -1;
- dict_t *dict = NULL;
- gf1_cli_gsync_set type = GF_GSYNC_OPTION_TYPE_NONE;
- char *append_str = NULL;
- size_t append_len = 0;
- char *subop = NULL;
- int i = 0;
- unsigned masteri = 0;
- unsigned slavei = 0;
- unsigned glob = 0;
- unsigned cmdi = 0;
- char *opwords[] = { "status", "start", "stop", "config",
- "log-rotate", NULL };
- char *w = NULL;
-
- GF_ASSERT (words);
- GF_ASSERT (options);
-
- dict = dict_new ();
- if (!dict)
- goto out;
-
- /* new syntax:
- *
- * volume geo-replication [$m [$s]] status
- * volume geo-replication [$m] $s config [[!]$opt [$val]]
- * volume geo-replication $m $s start|stop
- * volume geo-replication $m [$s] log-rotate
- */
+ return !!strpbrk(w, "*?[");
+}
- if (wordcount < 3)
+static int
+config_parse(const char **words, int wordcount, dict_t *dict, unsigned cmdi,
+ unsigned glob)
+{
+ int32_t ret = -1;
+ int32_t i = -1;
+ char *append_str = NULL;
+ size_t append_len = 0;
+ char *subop = NULL;
+ char *ret_chkpt = NULL;
+ struct tm checkpoint_time;
+ char chkpt_buf[20] = "";
+
+ switch ((wordcount - 1) - cmdi) {
+ case 0:
+ subop = gf_strdup("get-all");
+ break;
+ case 1:
+ if (words[cmdi + 1][0] == '!') {
+ (words[cmdi + 1])++;
+ if (gf_asprintf(&subop, "del%s", glob ? "-glob" : "") == -1)
+ subop = NULL;
+ } else
+ subop = gf_strdup("get");
+
+ ret = dict_set_str(dict, "op_name", ((char *)words[cmdi + 1]));
+ if (ret < 0)
goto out;
+ break;
+ default:
+ if (gf_asprintf(&subop, "set%s", glob ? "-glob" : "") == -1)
+ subop = NULL;
- for (i = 2; i <= 3 && i < wordcount - 1; i++) {
- if (gsyncd_glob_check (words[i]))
- glob = i;
- if (gsyncd_url_check (words[i])) {
- slavei = i;
- break;
- }
- }
+ ret = dict_set_str(dict, "op_name", ((char *)words[cmdi + 1]));
+ if (ret < 0)
+ goto out;
- if (glob && !slavei)
- /* glob is allowed only for config, thus it implies there is a
- * slave argument; but that might have not been recognized on
- * the first scan as it's url characteristics has been covered
- * by the glob syntax.
- *
- * In this case, the slave is perforce the last glob-word -- the
- * upcoming one is neither glob, nor url, so it's definitely not
- * the slave.
- */
- slavei = glob;
- if (slavei) {
- cmdi = slavei + 1;
- if (slavei == 3)
- masteri = 2;
- } else if (i <= 3) {
- /* no $s, can only be status cmd
- * (with either a single $m before it or nothing)
- * -- these conditions imply that i <= 3 after
- * the iteration and that i is the successor of
- * the (0 or 1 length) sequence of $m-s.
- */
- cmdi = i;
- if (i == 3)
- masteri = 2;
- } else
- goto out;
-
- /* now check if input really complies syntax
- * (in a somewhat redundant way, in favor
- * transparent soundness)
- */
+ /* join the varargs by spaces to get the op_value */
- if (masteri && gsyncd_url_check (words[masteri]))
- goto out;
- if (slavei && !glob && !gsyncd_url_check (words[slavei]))
+ for (i = cmdi + 2; i < wordcount; i++)
+ append_len += (strlen(words[i]) + 1);
+ /* trailing strcat will add two bytes, make space for that */
+ append_len++;
+
+ /* strcat is used on this allocation and hence expected to be
+ * initiatlized to 0. So GF_CALLOC is used.
+ */
+ append_str = GF_CALLOC(1, append_len, cli_mt_append_str);
+ if (!append_str) {
+ ret = -1;
goto out;
+ }
+
+ for (i = cmdi + 2; i < wordcount; i++) {
+ strcat(append_str, words[i]);
+ strcat(append_str, " ");
+ }
+ append_str[append_len - 2] = '\0';
+ /* "checkpoint now" is special: we resolve that "now" */
+ if ((strcmp(words[cmdi + 1], "checkpoint") == 0) &&
+ (strcmp(append_str, "now") == 0)) {
+ struct timeval tv = {
+ 0,
+ };
+
+ ret = gettimeofday(&tv, NULL);
+ if (ret == -1)
+ goto out;
- w = str_getunamb (words[cmdi], opwords);
- if (!w)
+ GF_FREE(append_str);
+ append_str = GF_MALLOC(300, cli_mt_append_str);
+ if (!append_str) {
+ ret = -1;
+ goto out;
+ }
+ snprintf(append_str, 300, "%" GF_PRI_SECOND, tv.tv_sec);
+ } else if ((strcmp(words[cmdi + 1], "checkpoint") == 0) &&
+ (strcmp(append_str, "now") != 0)) {
+ memset(&checkpoint_time, 0, sizeof(struct tm));
+ ret_chkpt = strptime(append_str, "%Y-%m-%d %H:%M:%S",
+ &checkpoint_time);
+
+ if (ret_chkpt == NULL || *ret_chkpt != '\0') {
+ ret = -1;
+ cli_err(
+ "Invalid Checkpoint label. Use format "
+ "\"Y-m-d H:M:S\", Example: 2016-10-25 15:30:45");
+ goto out;
+ }
+ GF_FREE(append_str);
+ append_str = GF_MALLOC(300, cli_mt_append_str);
+ if (!append_str) {
+ ret = -1;
+ goto out;
+ }
+ strftime(chkpt_buf, sizeof(chkpt_buf), "%s", &checkpoint_time);
+ snprintf(append_str, 300, "%s", chkpt_buf);
+ }
+
+ ret = dict_set_dynstr(dict, "op_value", append_str);
+ if (ret != 0) {
goto out;
+ }
+ append_str = NULL;
+ }
- if (strcmp (w, "status") == 0) {
- type = GF_GSYNC_OPTION_TYPE_STATUS;
+ ret = -1;
+ if (subop) {
+ ret = dict_set_dynstr(dict, "subop", subop);
+ if (!ret)
+ subop = NULL;
+ }
- if (slavei && !masteri)
- goto out;
- } else if (strcmp (w, "config") == 0) {
- type = GF_GSYNC_OPTION_TYPE_CONFIG;
+out:
+ GF_FREE(append_str);
+ GF_FREE(subop);
- if (!slavei)
- goto out;
- } else if (strcmp (w, "start") == 0) {
- type = GF_GSYNC_OPTION_TYPE_START;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- if (!masteri || !slavei)
- goto out;
- } else if (strcmp (w, "stop") == 0) {
- type = GF_GSYNC_OPTION_TYPE_STOP;
+/* ssh_port_parse: Parses and validates when ssh_port is given.
+ * ssh_index refers to index of ssh_port and
+ * type refers to either push-pem or no-verify
+ */
- if (!masteri || !slavei)
- goto out;
- } else if (strcmp(w, "log-rotate") == 0) {
- type = GF_GSYNC_OPTION_TYPE_ROTATE;
+static int32_t
+parse_ssh_port(const char **words, int wordcount, dict_t *dict, unsigned *cmdi,
+ int ssh_index, char *type)
+{
+ int ret = 0;
+ char *end_ptr = NULL;
+ int64_t limit = 0;
+
+ if (!strcmp((char *)words[ssh_index], "ssh-port")) {
+ if (strcmp((char *)words[ssh_index - 1], "create")) {
+ ret = -1;
+ goto out;
+ }
+ (*cmdi)++;
+ limit = strtol(words[ssh_index + 1], &end_ptr, 10);
+ if (errno == ERANGE || errno == EINVAL || limit <= 0 ||
+ strcmp(end_ptr, "") != 0) {
+ ret = -1;
+ cli_err("Please enter an integer value for ssh_port ");
+ goto out;
+ }
- if (slavei && !masteri)
- goto out;
- } else
- GF_ASSERT (!"opword mismatch");
+ ret = dict_set_int32(dict, "ssh_port", limit);
+ if (ret)
+ goto out;
+ (*cmdi)++;
+ } else if (strcmp((char *)words[ssh_index + 1], "create")) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, type, 1);
+ if (ret)
+ goto out;
+ (*cmdi)++;
- if (type != GF_GSYNC_OPTION_TYPE_CONFIG &&
- (cmdi < wordcount - 1 || glob))
+out:
+ return ret;
+}
+
+static int32_t
+force_push_pem_no_verify_parse(const char **words, int wordcount, dict_t *dict,
+ unsigned *cmdi)
+{
+ int32_t ret = 0;
+
+ if (!strcmp((char *)words[wordcount - 1], "force")) {
+ if ((strcmp((char *)words[wordcount - 2], "start")) &&
+ (strcmp((char *)words[wordcount - 2], "stop")) &&
+ (strcmp((char *)words[wordcount - 2], "create")) &&
+ (strcmp((char *)words[wordcount - 2], "no-verify")) &&
+ (strcmp((char *)words[wordcount - 2], "push-pem")) &&
+ (strcmp((char *)words[wordcount - 2], "pause")) &&
+ (strcmp((char *)words[wordcount - 2], "resume"))) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32n(dict, "force", SLEN("force"), 1);
+ if (ret)
+ goto out;
+ (*cmdi)++;
+
+ if (!strcmp((char *)words[wordcount - 2], "push-pem")) {
+ ret = parse_ssh_port(words, wordcount, dict, cmdi, wordcount - 4,
+ "push_pem");
+ if (ret)
+ goto out;
+ } else if (!strcmp((char *)words[wordcount - 2], "no-verify")) {
+ ret = parse_ssh_port(words, wordcount, dict, cmdi, wordcount - 4,
+ "no_verify");
+ if (ret)
goto out;
+ }
+ } else if (!strcmp((char *)words[wordcount - 1], "push-pem")) {
+ ret = parse_ssh_port(words, wordcount, dict, cmdi, wordcount - 3,
+ "push_pem");
+ if (ret)
+ goto out;
+ } else if (!strcmp((char *)words[wordcount - 1], "no-verify")) {
+ ret = parse_ssh_port(words, wordcount, dict, cmdi, wordcount - 3,
+ "no_verify");
+ if (ret)
+ goto out;
+ }
- /* If got so far, input is valid, assemble the message */
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- ret = 0;
+int32_t
+cli_cmd_gsync_set_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **errstr)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ gf1_cli_gsync_set type = GF_GSYNC_OPTION_TYPE_NONE;
+ int i = 0;
+ unsigned masteri = 0;
+ unsigned slavei = 0;
+ unsigned glob = 0;
+ unsigned cmdi = 0;
+ static char *opwords[] = {"create", "status", "start", "stop",
+ "config", "force", "delete", "ssh-port",
+ "no-verify", "push-pem", "detail", "pause",
+ "resume", NULL};
+ char *w = NULL;
+ char *save_ptr = NULL;
+ char *slave_temp = NULL;
+ char *token = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question = NULL;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
- if (masteri)
- ret = dict_set_str (dict, "master", (char *)words[masteri]);
- if (!ret && slavei)
- ret = dict_set_str (dict, "slave", (char *)words[slavei]);
- if (!ret)
- ret = dict_set_int32 (dict, "type", type);
- if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG) {
- switch ((wordcount - 1) - cmdi) {
- case 0:
- subop = gf_strdup ("get-all");
- break;
- case 1:
- if (words[cmdi + 1][0] == '!') {
- (words[cmdi + 1])++;
- if (gf_asprintf (&subop, "del%s", glob ? "-glob" : "") == -1)
- subop = NULL;
- } else
- subop = gf_strdup ("get");
-
- ret = dict_set_str (dict, "op_name", ((char *)words[cmdi + 1]));
- if (ret < 0)
- goto out;
- break;
- default:
- if (gf_asprintf (&subop, "set%s", glob ? "-glob" : "") == -1)
- subop = NULL;
+ /* new syntax:
+ *
+ * volume geo-replication $m $s create [[ssh-port n] [[no-verify] |
+ * [push-pem]]] [force] volume geo-replication [$m [$s]] status [detail]
+ * volume geo-replication [$m] $s config [[!]$opt [$val]]
+ * volume geo-replication $m $s start|stop [force]
+ * volume geo-replication $m $s delete [reset-sync-time]
+ * volume geo-replication $m $s pause [force]
+ * volume geo-replication $m $s resume [force]
+ */
+
+ if (wordcount < 3)
+ goto out;
- ret = dict_set_str (dict, "op_name", ((char *)words[cmdi + 1]));
- if (ret < 0)
- goto out;
+ for (i = 2; i <= 3 && i < wordcount - 1; i++) {
+ if (gsyncd_glob_check(words[i]))
+ glob = i;
+ if (gsyncd_url_check(words[i])) {
+ slavei = i;
+ break;
+ }
+ }
- /* join the varargs by spaces to get the op_value */
+ if (glob && !slavei)
+ /* glob is allowed only for config, thus it implies there is a
+ * slave argument; but that might have not been recognized on
+ * the first scan as it's url characteristics has been covered
+ * by the glob syntax.
+ *
+ * In this case, the slave is perforce the last glob-word -- the
+ * upcoming one is neither glob, nor url, so it's definitely not
+ * the slave.
+ */
+ slavei = glob;
+ if (slavei) {
+ cmdi = slavei + 1;
+ if (slavei == 3)
+ masteri = 2;
+ } else if (i <= 4) {
+ if (strtail("detail", (char *)words[wordcount - 1])) {
+ cmdi = wordcount - 2;
+ if (i == 4)
+ masteri = 2;
+ } else {
+ /* no $s, can only be status cmd
+ * (with either a single $m before it or nothing)
+ * -- these conditions imply that i <= 3 after
+ * the iteration and that i is the successor of
+ * the (0 or 1 length) sequence of $m-s.
+ */
+ cmdi = i;
+ if (i == 3)
+ masteri = 2;
+ }
+ } else
+ goto out;
- for (i = cmdi + 2; i < wordcount; i++)
- append_len += (strlen (words[i]) + 1);
- /* trailing strcat will add two bytes, make space for that */
- append_len++;
+ /* now check if input really complies syntax
+ * (in a somewhat redundant way, in favor
+ * transparent soundness)
+ */
- append_str = GF_CALLOC (1, append_len, cli_mt_append_str);
- if (!append_str) {
- ret = -1;
- goto out;
- }
+ if (masteri && gsyncd_url_check(words[masteri]))
+ goto out;
- for (i = cmdi + 2; i < wordcount; i++) {
- strcat (append_str, words[i]);
- strcat (append_str, " ");
- }
- append_str[append_len - 2] = '\0';
-
- /* "checkpoint now" is special: we resolve that "now" */
- if (strcmp (words[cmdi + 1], "checkpoint") == 0 &&
- strcmp (append_str, "now") == 0) {
- struct timeval tv = {0,};
-
- ret = gettimeofday (&tv, NULL);
- if (ret == -1)
- goto out; /* FIXME: free append_str? */
-
- GF_FREE (append_str);
- append_str = GF_CALLOC (1, 300, cli_mt_append_str);
- if (!append_str) {
- ret = -1;
- goto out;
- }
- strcpy (append_str, "as of ");
- gf_time_fmt (append_str + strlen ("as of "),
- 300 - strlen ("as of "),
- tv.tv_sec, gf_timefmt_FT);
- }
+ if (slavei && !glob && !valid_slave_gsyncd_url(words[slavei])) {
+ gf_asprintf(errstr, "Invalid slave url: %s", words[slavei]);
+ goto out;
+ }
- ret = dict_set_dynstr (dict, "op_value", append_str);
- }
+ w = str_getunamb(words[cmdi], opwords);
+ if (!w)
+ goto out;
+
+ if (strcmp(w, "create") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_CREATE;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else if (strcmp(w, "status") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_STATUS;
+
+ if (slavei && !masteri)
+ goto out;
+ } else if (strcmp(w, "config") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_CONFIG;
+
+ if (!slavei)
+ goto out;
+ } else if (strcmp(w, "start") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_START;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else if (strcmp(w, "stop") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_STOP;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else if (strcmp(w, "delete") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_DELETE;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else if (strcmp(w, "pause") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_PAUSE;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else if (strcmp(w, "resume") == 0) {
+ type = GF_GSYNC_OPTION_TYPE_RESUME;
+
+ if (!masteri || !slavei)
+ goto out;
+ } else
+ GF_ASSERT(!"opword mismatch");
+
+ ret = force_push_pem_no_verify_parse(words, wordcount, dict, &cmdi);
+ if (ret)
+ goto out;
+
+ if (strtail("detail", (char *)words[wordcount - 1])) {
+ if (!strtail("status", (char *)words[wordcount - 2])) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_uint32(dict, "status-detail", _gf_true);
+ if (ret)
+ goto out;
+ cmdi++;
+ }
+
+ if (type == GF_GSYNC_OPTION_TYPE_DELETE &&
+ !strcmp((char *)words[wordcount - 1], "reset-sync-time")) {
+ if (strcmp((char *)words[wordcount - 2], "delete")) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_uint32(dict, "reset-sync-time", _gf_true);
+ if (ret)
+ goto out;
+ cmdi++;
+ }
+
+ if (type != GF_GSYNC_OPTION_TYPE_CONFIG && (cmdi < wordcount - 1 || glob)) {
+ ret = -1;
+ goto out;
+ }
+ /* If got so far, input is valid, assemble the message */
+
+ ret = 0;
+
+ if (masteri) {
+ ret = dict_set_str(dict, "master", (char *)words[masteri]);
+ if (!ret)
+ ret = dict_set_str(dict, "volname", (char *)words[masteri]);
+ }
+ if (!ret && slavei) {
+ /* If geo-rep is created with root user using the syntax
+ * gluster vol geo-rep <mastervol> root@<slavehost> ...
+ * pass down only <slavehost> else pass as it is.
+ */
+ slave_temp = gf_strdup(words[slavei]);
+ if (slave_temp == NULL) {
+ ret = -1;
+ goto out;
+ }
+ token = strtok_r(slave_temp, "@", &save_ptr);
+ if (token && !strcmp(token, "root")) {
+ ret = dict_set_str(dict, "slave", (char *)words[slavei] + 5);
+ } else {
+ ret = dict_set_str(dict, "slave", (char *)words[slavei]);
+ }
+ }
+ if (!ret)
+ ret = dict_set_int32(dict, "type", type);
+ if (!ret && type == GF_GSYNC_OPTION_TYPE_CONFIG) {
+ if (!strcmp((char *)words[wordcount - 2], "ignore-deletes") &&
+ !strcmp((char *)words[wordcount - 1], "true")) {
+ question =
+ "There exists ~15 seconds delay for the option to take"
+ " effect from stime of the corresponding brick. Please"
+ " check the log for the time, the option is effective."
+ " Proceed";
+
+ answer = cli_cmd_get_confirmation(state, question);
+
+ if (GF_ANSWER_NO == answer) {
+ gf_log("cli", GF_LOG_INFO,
+ "Operation "
+ "cancelled, exiting");
+ *errstr = gf_strdup("Aborted by user.");
ret = -1;
- if (subop) {
- ret = dict_set_dynstr (dict, "subop", subop);
- if (!ret)
- subop = NULL;
- }
+ goto out;
+ }
}
-out:
- if (ret) {
- if (dict)
- dict_destroy (dict);
- if (append_str)
- GF_FREE (append_str);
- } else
- *options = dict;
+ ret = config_parse(words, wordcount, dict, cmdi, glob);
+ }
- if (subop)
- GF_FREE (subop);
+out:
+ if (slave_temp)
+ GF_FREE(slave_temp);
+ if (ret && dict)
+ dict_unref(dict);
+ else
+ *options = dict;
- return ret;
+ return ret;
}
int32_t
-cli_cmd_volume_profile_parse (const char **words, int wordcount,
- dict_t **options)
+cli_cmd_volume_profile_parse(const char **words, int wordcount,
+ dict_t **options)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- int ret = -1;
- gf1_cli_stats_op op = GF_CLI_STATS_NONE;
- char *opwords[] = { "start", "stop", "info", NULL };
- char *w = NULL;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int ret = -1;
+ gf1_cli_stats_op op = GF_CLI_STATS_NONE;
+ gf1_cli_info_op info_op = GF_CLI_INFO_NONE;
+ gf_boolean_t is_peek = _gf_false;
- GF_ASSERT (words);
- GF_ASSERT (options);
+ static char *opwords[] = {"start", "stop", "info", NULL};
+ char *w = NULL;
- dict = dict_new ();
- if (!dict)
- goto out;
+ GF_ASSERT(words);
+ GF_ASSERT(options);
- if (wordcount < 4 || wordcount >5)
- goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
- volname = (char *)words[2];
+ if (wordcount < 4)
+ goto out;
- ret = dict_set_str (dict, "volname", volname);
- if (ret)
- goto out;
+ volname = (char *)words[2];
- w = str_getunamb (words[3], opwords);
- if (!w) {
- ret = -1;
- goto out;
- }
- if (strcmp (w, "start") == 0) {
- op = GF_CLI_STATS_START;
- } else if (strcmp (w, "stop") == 0) {
- op = GF_CLI_STATS_STOP;
- } else if (strcmp (w, "info") == 0) {
- op = GF_CLI_STATS_INFO;
- } else
- GF_ASSERT (!"opword mismatch");
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
- ret = dict_set_int32 (dict, "op", (int32_t)op);
- if (ret)
- goto out;
+ w = str_getunamb(words[3], opwords);
+ if (!w) {
+ ret = -1;
+ goto out;
+ }
- if (wordcount == 5) {
- if (!strcmp (words[4], "nfs")) {
- ret = dict_set_int32 (dict, "nfs", _gf_true);
- if (ret)
- goto out;
+ if ((strcmp(w, "start") == 0 || strcmp(w, "stop") == 0) && wordcount > 5) {
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(w, "info") == 0 && wordcount > 7) {
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(w, "start") == 0) {
+ op = GF_CLI_STATS_START;
+ } else if (strcmp(w, "stop") == 0) {
+ op = GF_CLI_STATS_STOP;
+ } else if (strcmp(w, "info") == 0) {
+ op = GF_CLI_STATS_INFO;
+ info_op = GF_CLI_INFO_ALL;
+ if (wordcount > 4) {
+ if (strcmp(words[4], "incremental") == 0) {
+ info_op = GF_CLI_INFO_INCREMENTAL;
+ if (wordcount > 5 && strcmp(words[5], "peek") == 0) {
+ is_peek = _gf_true;
}
+ } else if (strcmp(words[4], "cumulative") == 0) {
+ info_op = GF_CLI_INFO_CUMULATIVE;
+ } else if (strcmp(words[4], "clear") == 0) {
+ info_op = GF_CLI_INFO_CLEAR;
+ } else if (strcmp(words[4], "peek") == 0) {
+ is_peek = _gf_true;
+ }
}
+ } else
+ GF_ASSERT(!"opword mismatch");
- *options = dict;
+ ret = dict_set_int32(dict, "op", (int32_t)op);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32(dict, "info-op", (int32_t)info_op);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32(dict, "peek", is_peek);
+ if (ret)
+ goto out;
+
+ if (!strcmp(words[wordcount - 1], "nfs")) {
+ ret = dict_set_int32(dict, "nfs", _gf_true);
+ if (ret)
+ goto out;
+ }
+
+ *options = dict;
out:
- if (ret && dict)
- dict_destroy (dict);
- return ret;
+ if (ret && dict)
+ dict_unref(dict);
+ return ret;
}
int32_t
-cli_cmd_volume_top_parse (const char **words, int wordcount,
- dict_t **options)
+cli_cmd_volume_top_parse(const char **words, int wordcount, dict_t **options)
{
- dict_t *dict = NULL;
- char *volname = NULL;
- char *value = NULL;
- char *key = NULL;
- int ret = -1;
- gf1_cli_stats_op op = GF_CLI_STATS_NONE;
- gf1_cli_top_op top_op = GF_CLI_TOP_NONE;
- int32_t list_cnt = -1;
- int index = 0;
- int perf = 0;
- uint32_t blk_size = 0;
- uint32_t count = 0;
- gf_boolean_t nfs = _gf_false;
- char *delimiter = NULL;
- char *opwords[] = { "open", "read", "write", "opendir",
- "readdir", "read-perf", "write-perf",
- "clear", NULL };
- char *w = NULL;
-
- GF_ASSERT (words);
- GF_ASSERT (options);
-
- dict = dict_new ();
- if (!dict)
- goto out;
-
- if (wordcount < 4)
- goto out;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ char *value = NULL;
+ char *key = NULL;
+ int ret = -1;
+ gf1_cli_stats_op op = GF_CLI_STATS_NONE;
+ gf1_cli_top_op top_op = GF_CLI_TOP_NONE;
+ int32_t list_cnt = -1;
+ int index = 0;
+ int perf = 0;
+ int32_t blk_size = 0;
+ int count = 0;
+ gf_boolean_t nfs = _gf_false;
+ char *delimiter = NULL;
+ static char *opwords[] = {"open", "read", "write",
+ "opendir", "readdir", "read-perf",
+ "write-perf", "clear", NULL};
+ char *w = NULL;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
- volname = (char *)words[2];
+ if (wordcount < 4)
+ goto out;
- ret = dict_set_str (dict, "volname", volname);
- if (ret)
- goto out;
+ volname = (char *)words[2];
- op = GF_CLI_STATS_TOP;
- ret = dict_set_int32 (dict, "op", (int32_t)op);
- if (ret)
- goto out;
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret)
+ goto out;
- w = str_getunamb (words[3], opwords);
- if (!w) {
- ret = -1;
- goto out;
- }
- if (strcmp (w, "open") == 0) {
- top_op = GF_CLI_TOP_OPEN;
- } else if (strcmp (w, "read") == 0) {
- top_op = GF_CLI_TOP_READ;
- } else if (strcmp (w, "write") == 0) {
- top_op = GF_CLI_TOP_WRITE;
- } else if (strcmp (w, "opendir") == 0) {
- top_op = GF_CLI_TOP_OPENDIR;
- } else if (strcmp (w, "readdir") == 0) {
- top_op = GF_CLI_TOP_READDIR;
- } else if (strcmp (w, "read-perf") == 0) {
- top_op = GF_CLI_TOP_READ_PERF;
- perf = 1;
- } else if (strcmp (w, "write-perf") == 0) {
- top_op = GF_CLI_TOP_WRITE_PERF;
- perf = 1;
- } else if (strcmp (w, "clear") == 0) {
- ret = dict_set_int32 (dict, "clear-stats", 1);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR,
- "Could not set clear-stats in dict");
- goto out;
- }
- } else
- GF_ASSERT (!"opword mismatch");
- ret = dict_set_int32 (dict, "top-op", (int32_t)top_op);
- if (ret)
- goto out;
+ op = GF_CLI_STATS_TOP;
+ ret = dict_set_int32(dict, "op", (int32_t)op);
+ if (ret)
+ goto out;
- if ((wordcount > 4) && !strcmp (words[4], "nfs")) {
- nfs = _gf_true;
- ret = dict_set_int32 (dict, "nfs", nfs);
- if (ret)
- goto out;
- index = 5;
- } else {
- index = 4;
+ w = str_getunamb(words[3], opwords);
+ if (!w) {
+ ret = -1;
+ goto out;
+ }
+ if (strcmp(w, "open") == 0) {
+ top_op = GF_CLI_TOP_OPEN;
+ } else if (strcmp(w, "read") == 0) {
+ top_op = GF_CLI_TOP_READ;
+ } else if (strcmp(w, "write") == 0) {
+ top_op = GF_CLI_TOP_WRITE;
+ } else if (strcmp(w, "opendir") == 0) {
+ top_op = GF_CLI_TOP_OPENDIR;
+ } else if (strcmp(w, "readdir") == 0) {
+ top_op = GF_CLI_TOP_READDIR;
+ } else if (strcmp(w, "read-perf") == 0) {
+ top_op = GF_CLI_TOP_READ_PERF;
+ perf = 1;
+ } else if (strcmp(w, "write-perf") == 0) {
+ top_op = GF_CLI_TOP_WRITE_PERF;
+ perf = 1;
+ } else if (strcmp(w, "clear") == 0) {
+ ret = dict_set_int32(dict, "clear-stats", 1);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not set clear-stats in dict");
+ goto out;
}
+ } else
+ GF_ASSERT(!"opword mismatch");
+ ret = dict_set_int32(dict, "top-op", (int32_t)top_op);
+ if (ret)
+ goto out;
- for (; index < wordcount; index+=2) {
-
- key = (char *) words[index];
- value = (char *) words[index+1];
-
- if ( key && !value ) {
- ret = -1;
- goto out;
- }
- if (!strcmp (key, "brick")) {
- delimiter = strchr (value, ':');
- if (!delimiter || delimiter == value
- || *(delimiter+1) != '/') {
- cli_err ("wrong brick type: %s, use <HOSTNAME>:"
- "<export-dir-abs-path>", value);
- ret = -1;
- goto out;
- } else {
- ret = gf_canonicalize_path (delimiter + 1);
- if (ret)
- goto out;
- }
- ret = dict_set_str (dict, "brick", value);
-
- } else if (!strcmp (key, "list-cnt")) {
- ret = gf_is_str_int (value);
- if (!ret)
- list_cnt = atoi (value);
- if (ret || (list_cnt < 0) || (list_cnt > 100)) {
- cli_err ("list-cnt should be between 0 to 100");
- ret = -1;
- goto out;
- }
- } else if (perf && !nfs && !strcmp (key, "bs")) {
- ret = gf_is_str_int (value);
- if (!ret)
- blk_size = atoi (value);
- if (ret || (blk_size <= 0)) {
- if (blk_size < 0)
- cli_err ("block size is an invalid"
- " number");
- else
- cli_err ("block size should be an "
- "integer greater than zero");
- ret = -1;
- goto out;
- }
- ret = dict_set_uint32 (dict, "blk-size", blk_size);
- } else if (perf && !nfs && !strcmp (key, "count")) {
- ret = gf_is_str_int (value);
- if (!ret)
- count = atoi(value);
- if (ret || (count <= 0)) {
- if (count < 0)
- cli_err ("count is an invalid number");
- else
- cli_err ("count should be an integer "
- "greater than zero");
-
- ret = -1;
- goto out;
- }
- ret = dict_set_uint32 (dict, "blk-cnt", count);
- } else {
- ret = -1;
- goto out;
- }
- if (ret) {
- gf_log ("", GF_LOG_WARNING, "Dict set failed for "
- "key %s", key);
- goto out;
- }
+ if ((wordcount > 4) && !strcmp(words[4], "nfs")) {
+ nfs = _gf_true;
+ ret = dict_set_int32(dict, "nfs", nfs);
+ if (ret)
+ goto out;
+ index = 5;
+ } else {
+ index = 4;
+ }
+
+ for (; index < wordcount; index += 2) {
+ key = (char *)words[index];
+ value = (char *)words[index + 1];
+
+ if (!key || !value) {
+ ret = -1;
+ goto out;
}
- if (list_cnt == -1)
- list_cnt = 100;
- ret = dict_set_int32 (dict, "list-cnt", list_cnt);
- if (ret) {
- gf_log ("", GF_LOG_WARNING, "Dict set failed for list_cnt");
+ if (!strcmp(key, "brick")) {
+ delimiter = strchr(value, ':');
+ if (!delimiter || delimiter == value || *(delimiter + 1) != '/') {
+ cli_err(
+ "wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ value);
+ ret = -1;
goto out;
- }
-
- if ((blk_size > 0) ^ (count > 0)) {
- cli_err ("Need to give both 'bs' and 'count'");
+ } else {
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret)
+ goto out;
+ }
+ ret = dict_set_str(dict, "brick", value);
+
+ } else if (!strcmp(key, "list-cnt")) {
+ ret = gf_is_str_int(value);
+ if (!ret)
+ list_cnt = atoi(value);
+ if (ret || (list_cnt < 0) || (list_cnt > 100)) {
+ cli_err("list-cnt should be between 0 to 100");
+ ret = -1;
+ goto out;
+ }
+ } else if (perf && !nfs && !strcmp(key, "bs")) {
+ ret = gf_is_str_int(value);
+ if (!ret)
+ blk_size = atoi(value);
+ if (ret || (blk_size <= 0)) {
+ if (blk_size < 0)
+ cli_err(
+ "block size is an invalid"
+ " number");
+ else
+ cli_err(
+ "block size should be an "
+ "integer greater than zero");
ret = -1;
goto out;
- } else if (((uint64_t)blk_size * count) > (10 * GF_UNIT_GB)) {
- cli_err ("'bs * count' value %"PRIu64" is greater than "
- "maximum allowed value of 10GB",
- ((uint64_t)blk_size * count));
+ }
+ ret = dict_set_uint32(dict, "blk-size", (uint32_t)blk_size);
+ } else if (perf && !nfs && !strcmp(key, "count")) {
+ ret = gf_is_str_int(value);
+ if (!ret)
+ count = atoi(value);
+ if (ret || (count <= 0)) {
+ if (count < 0)
+ cli_err("count is an invalid number");
+ else
+ cli_err(
+ "count should be an integer "
+ "greater than zero");
+
ret = -1;
goto out;
+ }
+ ret = dict_set_uint32(dict, "blk-cnt", count);
+ } else {
+ ret = -1;
+ goto out;
+ }
+ if (ret) {
+ gf_log("", GF_LOG_WARNING,
+ "Dict set failed for "
+ "key %s",
+ key);
+ goto out;
}
+ }
+ if (list_cnt == -1)
+ list_cnt = 100;
+ ret = dict_set_int32(dict, "list-cnt", list_cnt);
+ if (ret) {
+ gf_log("", GF_LOG_WARNING, "Dict set failed for list_cnt");
+ goto out;
+ }
- *options = dict;
+ if ((blk_size > 0) ^ (count > 0)) {
+ cli_err("Need to give both 'bs' and 'count'");
+ ret = -1;
+ goto out;
+ } else if (((uint64_t)blk_size * count) > (10 * GF_UNIT_GB)) {
+ cli_err("'bs * count' value %" PRIu64
+ " is greater than "
+ "maximum allowed value of 10GB",
+ ((uint64_t)blk_size * count));
+ ret = -1;
+ goto out;
+ }
+
+ *options = dict;
out:
- if (ret && dict)
- dict_destroy (dict);
- return ret;
+ if (ret && dict)
+ dict_unref(dict);
+ return ret;
}
uint32_t
-cli_cmd_get_statusop (const char *arg)
+cli_cmd_get_statusop(const char *arg)
{
- int i = 0;
- uint32_t ret = GF_CLI_STATUS_NONE;
- char *w = NULL;
- char *opwords[] = {"detail", "mem", "clients", "fd",
- "inode", "callpool", NULL};
- struct {
- char *opname;
- uint32_t opcode;
- } optable[] = {
- { "detail", GF_CLI_STATUS_DETAIL },
- { "mem", GF_CLI_STATUS_MEM },
- { "clients", GF_CLI_STATUS_CLIENTS },
- { "fd", GF_CLI_STATUS_FD },
- { "inode", GF_CLI_STATUS_INODE },
- { "callpool", GF_CLI_STATUS_CALLPOOL },
- { NULL }
- };
-
- w = str_getunamb (arg, opwords);
- if (!w) {
- gf_log ("cli", GF_LOG_DEBUG,
- "Not a status op %s", arg);
- goto out;
- }
+ int i = 0;
+ uint32_t ret = GF_CLI_STATUS_NONE;
+ char *w = NULL;
+ static char *opwords[] = {"detail", "mem", "clients", "fd", "inode",
+ "callpool", "tasks", "client-list", NULL};
+ static struct {
+ char *opname;
+ uint32_t opcode;
+ } optable[] = {{"detail", GF_CLI_STATUS_DETAIL},
+ {"mem", GF_CLI_STATUS_MEM},
+ {"clients", GF_CLI_STATUS_CLIENTS},
+ {"fd", GF_CLI_STATUS_FD},
+ {"inode", GF_CLI_STATUS_INODE},
+ {"callpool", GF_CLI_STATUS_CALLPOOL},
+ {"tasks", GF_CLI_STATUS_TASKS},
+ {"client-list", GF_CLI_STATUS_CLIENT_LIST},
+ {NULL}};
+
+ w = str_getunamb(arg, opwords);
+ if (!w) {
+ gf_log("cli", GF_LOG_DEBUG, "Not a status op %s", arg);
+ goto out;
+ }
- for (i = 0; optable[i].opname; i++) {
- if (!strcmp (w, optable[i].opname)) {
- ret = optable[i].opcode;
- break;
- }
+ for (i = 0; optable[i].opname; i++) {
+ if (!strcmp(w, optable[i].opname)) {
+ ret = optable[i].opcode;
+ break;
}
+ }
- out:
- return ret;
+out:
+ return ret;
}
int
-cli_cmd_volume_status_parse (const char **words, int wordcount,
- dict_t **options)
+cli_cmd_volume_status_parse(const char **words, int wordcount, dict_t **options)
{
- dict_t *dict = NULL;
- int ret = -1;
- uint32_t cmd = 0;
-
- GF_ASSERT (options);
+ dict_t *dict = NULL;
+ int ret = -1;
+ uint32_t cmd = 0;
- dict = dict_new ();
- if (!dict)
- goto out;
+ GF_ASSERT(options);
- switch (wordcount) {
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ switch (wordcount) {
case 2:
- cmd = GF_CLI_STATUS_ALL;
- ret = 0;
- break;
+ cmd = GF_CLI_STATUS_ALL;
+ ret = 0;
+ break;
case 3:
- if (!strcmp (words[2], "all")) {
- cmd = GF_CLI_STATUS_ALL;
- ret = 0;
+ if (!strcmp(words[2], "all")) {
+ cmd = GF_CLI_STATUS_ALL;
+ ret = 0;
- } else {
- cmd = GF_CLI_STATUS_VOL;
- ret = dict_set_str (dict, "volname", (char *)words[2]);
- }
+ } else {
+ cmd = GF_CLI_STATUS_VOL;
+ ret = dict_set_str(dict, "volname", (char *)words[2]);
+ }
- break;
+ break;
case 4:
- cmd = cli_cmd_get_statusop (words[3]);
-
- if (!strcmp (words[2], "all")) {
- if (cmd == GF_CLI_STATUS_NONE) {
- cli_err ("%s is not a valid status option",
- words[3]);
- ret = -1;
- goto out;
- }
- cmd |= GF_CLI_STATUS_ALL;
- ret = 0;
-
- } else {
- ret = dict_set_str (dict, "volname",
- (char *)words[2]);
- if (ret)
- goto out;
-
- if (cmd == GF_CLI_STATUS_NONE) {
- if (!strcmp (words[3], "nfs")) {
- cmd |= GF_CLI_STATUS_NFS;
- } else if (!strcmp (words[3], "shd")) {
- cmd |= GF_CLI_STATUS_SHD;
- } else {
- cmd = GF_CLI_STATUS_BRICK;
- ret = dict_set_str (dict, "brick",
- (char *)words[3]);
- }
-
- } else {
- cmd |= GF_CLI_STATUS_VOL;
- ret = 0;
- }
- }
+ cmd = cli_cmd_get_statusop(words[3]);
- break;
-
- case 5:
- if (!strcmp (words[2], "all")) {
- cli_err ("Cannot specify brick/nfs for \"all\"");
- ret = -1;
- goto out;
- }
-
- cmd = cli_cmd_get_statusop (words[4]);
+ if (!strcmp(words[2], "all")) {
if (cmd == GF_CLI_STATUS_NONE) {
- cli_err ("%s is not a valid status option",
- words[4]);
- ret = -1;
- goto out;
+ cli_err("%s is not a valid status option", words[3]);
+ ret = -1;
+ goto out;
}
+ cmd |= GF_CLI_STATUS_ALL;
+ ret = 0;
-
- ret = dict_set_str (dict, "volname", (char *)words[2]);
+ } else {
+ ret = dict_set_str(dict, "volname", (char *)words[2]);
if (ret)
- goto out;
+ goto out;
- if (!strcmp (words[3], "nfs")) {
- if (cmd == GF_CLI_STATUS_FD ||
- cmd == GF_CLI_STATUS_DETAIL) {
- cli_err ("Detail/FD status not available"
- " for NFS Servers");
- ret = -1;
- goto out;
- }
+ if (cmd == GF_CLI_STATUS_NONE) {
+ if (!strcmp(words[3], "nfs")) {
cmd |= GF_CLI_STATUS_NFS;
- } else if (!strcmp (words[3], "shd")){
- if (cmd == GF_CLI_STATUS_FD ||
- cmd == GF_CLI_STATUS_CLIENTS ||
- cmd == GF_CLI_STATUS_DETAIL) {
- cli_err ("Detail/FD/Clients status not "
- "available for Self-heal Daemons");
- ret = -1;
- goto out;
- }
+ } else if (!strcmp(words[3], "shd")) {
cmd |= GF_CLI_STATUS_SHD;
+ } else if (!strcmp(words[3], "quotad")) {
+ cmd |= GF_CLI_STATUS_QUOTAD;
+ } else if (!strcmp(words[3], "snapd")) {
+ cmd |= GF_CLI_STATUS_SNAPD;
+ } else if (!strcmp(words[3], "bitd")) {
+ cmd |= GF_CLI_STATUS_BITD;
+ } else if (!strcmp(words[3], "scrub")) {
+ cmd |= GF_CLI_STATUS_SCRUB;
+ } else {
+ cmd = GF_CLI_STATUS_BRICK;
+ ret = dict_set_str(dict, "brick", (char *)words[3]);
+ }
+
} else {
- cmd |= GF_CLI_STATUS_BRICK;
- ret = dict_set_str (dict, "brick", (char *)words[3]);
+ cmd |= GF_CLI_STATUS_VOL;
+ ret = 0;
}
- break;
+ }
- default:
+ break;
+
+ case 5:
+ if (!strcmp(words[2], "all")) {
+ cli_err("Cannot specify brick/nfs for \"all\"");
+ ret = -1;
goto out;
- }
+ }
- if (ret)
+ cmd = cli_cmd_get_statusop(words[4]);
+ if (cmd == GF_CLI_STATUS_NONE) {
+ cli_err("%s is not a valid status option", words[4]);
+ ret = -1;
goto out;
+ }
- ret = dict_set_int32 (dict, "cmd", cmd);
- if (ret)
+ ret = dict_set_str(dict, "volname", (char *)words[2]);
+ if (ret)
goto out;
- *options = dict;
+ if (!strcmp(words[3], "nfs")) {
+ if (cmd == GF_CLI_STATUS_FD || cmd == GF_CLI_STATUS_DETAIL ||
+ cmd == GF_CLI_STATUS_TASKS) {
+ cli_err(
+ "Detail/FD/Tasks status not available"
+ " for NFS Servers");
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_NFS;
+ } else if (!strcmp(words[3], "shd")) {
+ if (cmd == GF_CLI_STATUS_FD || cmd == GF_CLI_STATUS_CLIENTS ||
+ cmd == GF_CLI_STATUS_DETAIL || cmd == GF_CLI_STATUS_TASKS) {
+ cli_err(
+ "Detail/FD/Clients/Tasks status not "
+ "available for Self-heal Daemons");
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_SHD;
+ } else if (!strcmp(words[3], "quotad")) {
+ if (cmd == GF_CLI_STATUS_FD || cmd == GF_CLI_STATUS_CLIENTS ||
+ cmd == GF_CLI_STATUS_DETAIL || cmd == GF_CLI_STATUS_INODE) {
+ cli_err(
+ "Detail/FD/Clients/Inode status not "
+ "available for Quota Daemon");
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_QUOTAD;
+ } else if (!strcmp(words[3], "snapd")) {
+ if (cmd == GF_CLI_STATUS_FD || cmd == GF_CLI_STATUS_CLIENTS ||
+ cmd == GF_CLI_STATUS_DETAIL || cmd == GF_CLI_STATUS_INODE) {
+ cli_err(
+ "Detail/FD/Clients/Inode status not "
+ "available for snap daemon");
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_SNAPD;
+ } else {
+ if (cmd == GF_CLI_STATUS_TASKS) {
+ cli_err(
+ "Tasks status not available for "
+ "bricks");
+ ret = -1;
+ goto out;
+ }
+ cmd |= GF_CLI_STATUS_BRICK;
+ ret = dict_set_str(dict, "brick", (char *)words[3]);
+ }
+ break;
- out:
- if (ret && dict)
- dict_destroy (dict);
+ default:
+ goto out;
+ }
- return ret;
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32(dict, "cmd", cmd);
+ if (ret)
+ goto out;
+
+ *options = dict;
+
+out:
+ if (ret && dict)
+ dict_unref(dict);
+
+ return ret;
}
gf_boolean_t
-cli_cmd_validate_dumpoption (const char *arg, char **option)
+cli_cmd_validate_dumpoption(const char *arg, char **option)
{
- char *opwords[] = {"all", "nfs", "mem", "iobuf", "callpool", "priv",
- "fd", "inode", "history", "inodectx", "fdctx",
- NULL};
- char *w = NULL;
+ static char *opwords[] = {"all", "nfs", "mem", "iobuf", "callpool",
+ "priv", "fd", "inode", "history", "inodectx",
+ "fdctx", "quotad", NULL};
+ char *w = NULL;
+
+ w = str_getunamb(arg, opwords);
+ if (!w) {
+ gf_log("cli", GF_LOG_DEBUG, "Unknown statedump option %s", arg);
+ return _gf_false;
+ }
+ *option = w;
+ return _gf_true;
+}
- w = str_getunamb (arg, opwords);
- if (!w) {
- gf_log ("cli", GF_LOG_DEBUG, "Unknown statedump option %s",
- arg);
- return _gf_false;
+int
+cli_cmd_volume_statedump_options_parse(const char **words, int wordcount,
+ dict_t **options)
+{
+ int ret = 0;
+ int i = 0;
+ dict_t *dict = NULL;
+ int option_cnt = 0;
+ char *option = NULL;
+ char *option_str = NULL;
+ char *tmp_str = NULL;
+ char *tmp = NULL;
+ char *ip_addr = NULL;
+ char *pid = NULL;
+
+ if ((wordcount >= 5) && ((strcmp(words[3], "client")) == 0)) {
+ tmp = gf_strdup(words[4]);
+ if (!tmp) {
+ ret = -1;
+ goto out;
}
- *option = w;
- return _gf_true;
+ ip_addr = strtok(tmp, ":");
+ pid = strtok(NULL, ":");
+ if (valid_internet_address(ip_addr, _gf_true, _gf_false) && pid &&
+ gf_valid_pid(pid, strlen(pid))) {
+ ret = gf_asprintf(&option_str, "%s %s %s", words[3], ip_addr, pid);
+ if (ret < 0) {
+ goto out;
+ }
+ option_cnt = 3;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ } else {
+ for (i = 3; i < wordcount; i++, option_cnt++) {
+ if (!cli_cmd_validate_dumpoption(words[i], &option)) {
+ ret = -1;
+ goto out;
+ }
+ tmp_str = option_str;
+ option_str = NULL;
+ ret = gf_asprintf(&option_str, "%s%s ", tmp_str ? tmp_str : "",
+ option);
+ GF_FREE(tmp_str);
+ if (ret < 0) {
+ goto out;
+ }
+ }
+ if (option_str && (strstr(option_str, "nfs")) &&
+ strstr(option_str, "quotad")) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ /* dynamic string in dict is freed up when dict is freed up, and hence
+ if option_str is NULL pass in an duplicate empty string to the same */
+ ret = dict_set_dynstr(dict, "options",
+ (option_str ? option_str : gf_strdup("")));
+ if (ret)
+ goto out;
+ option_str = NULL;
+
+ ret = dict_set_int32(dict, "option_cnt", option_cnt);
+ if (ret)
+ goto out;
+
+ *options = dict;
+out:
+ GF_FREE(tmp);
+ GF_FREE(option_str);
+ if (ret && dict)
+ dict_unref(dict);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, "Error parsing dumpoptions");
+ return ret;
}
int
-cli_cmd_volume_statedump_options_parse (const char **words, int wordcount,
- dict_t **options)
+cli_cmd_volume_clrlks_opts_parse(const char **words, int wordcount,
+ dict_t **options)
{
- int ret = 0;
- int i = 0;
- dict_t *dict = NULL;
- int option_cnt = 0;
- char *option = NULL;
- char option_str[100] = {0,};
+ int ret = -1;
+ int i = 0;
+ dict_t *dict = NULL;
+ char *kind_opts[4] = {"blocked", "granted", "all", NULL};
+ char *types[4] = {"inode", "entry", "posix", NULL};
+ char *free_ptr = NULL;
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
- for (i = 3; i < wordcount; i++, option_cnt++) {
- if (!cli_cmd_validate_dumpoption (words[i], &option)) {
- ret = -1;
- goto out;
- }
- strncat (option_str, option, strlen (option));
- strncat (option_str, " ", 1);
+ if (strcmp(words[4], "kind"))
+ goto out;
+
+ for (i = 0; kind_opts[i]; i++) {
+ if (!strcmp(words[5], kind_opts[i])) {
+ free_ptr = gf_strdup(words[5]);
+ ret = dict_set_dynstr(dict, "kind", free_ptr);
+ if (ret)
+ goto out;
+ free_ptr = NULL;
+ break;
}
+ }
+ if (i == 3)
+ goto out;
- dict = dict_new ();
- if (!dict)
+ ret = -1;
+ for (i = 0; types[i]; i++) {
+ if (!strcmp(words[6], types[i])) {
+ free_ptr = gf_strdup(words[6]);
+ ret = dict_set_dynstr(dict, "type", free_ptr);
+ if (ret)
goto out;
+ free_ptr = NULL;
+ break;
+ }
+ }
+ if (i == 3)
+ goto out;
- ret = dict_set_dynstr (dict, "options", gf_strdup (option_str));
+ if (wordcount == 8) {
+ free_ptr = gf_strdup(words[7]);
+ ret = dict_set_dynstr(dict, "opts", free_ptr);
if (ret)
+ goto out;
+ free_ptr = NULL;
+ }
+
+ ret = 0;
+ *options = dict;
+out:
+ if (ret) {
+ GF_FREE(free_ptr);
+ dict_unref(dict);
+ }
+
+ return ret;
+}
+
+static int
+extract_hostname_path_from_token(const char *tmp_words, char **hostname,
+ char **path)
+{
+ int ret = 0;
+ char *delimiter = NULL;
+ char *tmp_host = NULL;
+ char *host_name = NULL;
+ char *words = NULL;
+ int str_len = 0;
+ *hostname = NULL;
+ *path = NULL;
+
+ str_len = strlen(tmp_words) + 1;
+ words = GF_MALLOC(str_len, gf_common_mt_char);
+ if (!words) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf(words, str_len, "%s", tmp_words);
+
+ if (validate_brick_name(words)) {
+ cli_err(
+ "Wrong brick type: %s, use <HOSTNAME>:"
+ "<export-dir-abs-path>",
+ words);
+ ret = -1;
+ goto out;
+ } else {
+ delimiter = strrchr(words, ':');
+ ret = gf_canonicalize_path(delimiter + 1);
+ if (ret) {
+ goto out;
+ } else {
+ str_len = strlen(delimiter + 1) + 1;
+ *path = GF_MALLOC(str_len, gf_common_mt_char);
+ if (!*path) {
+ ret = -1;
goto out;
+ }
+ snprintf(*path, str_len, "%s", delimiter + 1);
+ }
+ }
- ret = dict_set_int32 (dict, "option_cnt", option_cnt);
- if (ret)
+ tmp_host = gf_strdup(words);
+ if (!tmp_host) {
+ gf_log("cli", GF_LOG_ERROR, "Out of memory");
+ ret = -1;
+ goto out;
+ }
+ get_host_name(tmp_host, &host_name);
+ if (!host_name) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to allocate "
+ "memory");
+ goto out;
+ }
+ if (!(strcmp(host_name, "localhost") && strcmp(host_name, "127.0.0.1") &&
+ strncmp(host_name, "0.", 2))) {
+ cli_err(
+ "Please provide a valid hostname/ip other "
+ "than localhost, 127.0.0.1 or loopback "
+ "address (0.0.0.0 to 0.255.255.255).");
+ ret = -1;
+ goto out;
+ }
+ if (!valid_internet_address(host_name, _gf_false, _gf_false)) {
+ cli_err(
+ "internet address '%s' does not conform to "
+ "standards",
+ host_name);
+ ret = -1;
+ goto out;
+ }
+
+ str_len = strlen(host_name) + 1;
+ *hostname = GF_MALLOC(str_len, gf_common_mt_char);
+ if (!*hostname) {
+ ret = -1;
+ goto out;
+ }
+ snprintf(*hostname, str_len, "%s", host_name);
+ ret = 0;
+
+out:
+ GF_FREE(words);
+ GF_FREE(tmp_host);
+ return ret;
+}
+
+static int
+set_hostname_path_in_dict(const char *token, dict_t *dict, int heal_op)
+{
+ char *hostname = NULL;
+ char *path = NULL;
+ int ret = 0;
+
+ ret = extract_hostname_path_from_token(token, &hostname, &path);
+ if (ret)
+ goto out;
+
+ switch (heal_op) {
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+ ret = dict_set_dynstr(dict, "heal-source-hostname", hostname);
+ if (ret)
+ goto out;
+ hostname = NULL;
+ ret = dict_set_dynstr(dict, "heal-source-brickpath", path);
+ if (ret) {
+ goto out;
+ }
+ path = NULL;
+ break;
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ ret = dict_set_dynstr(dict, "per-replica-cmd-hostname", hostname);
+ if (ret)
goto out;
+ hostname = NULL;
+ ret = dict_set_dynstr(dict, "per-replica-cmd-path", path);
+ if (ret) {
+ goto out;
+ }
+ path = NULL;
+ break;
+ default:
+ ret = -1;
+ break;
+ }
- *options = dict;
out:
- if (ret && dict)
- dict_destroy (dict);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR, "Error parsing dumpoptions");
- return ret;
+ GF_FREE(hostname);
+ GF_FREE(path);
+ return ret;
+}
+
+static int
+heal_command_type_get(const char *command)
+{
+ int i = 0;
+ /* subcommands are set as NULL */
+ char *heal_cmds[GF_SHD_OP_HEAL_DISABLE + 1] = {
+ [GF_SHD_OP_INVALID] = NULL,
+ [GF_SHD_OP_HEAL_INDEX] = NULL,
+ [GF_SHD_OP_HEAL_FULL] = "full",
+ [GF_SHD_OP_INDEX_SUMMARY] = "info",
+ [GF_SHD_OP_SPLIT_BRAIN_FILES] = NULL,
+ [GF_SHD_OP_STATISTICS] = "statistics",
+ [GF_SHD_OP_STATISTICS_HEAL_COUNT] = NULL,
+ [GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA] = NULL,
+ [GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE] = NULL,
+ [GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK] = NULL,
+ [GF_SHD_OP_HEAL_ENABLE] = "enable",
+ [GF_SHD_OP_HEAL_DISABLE] = "disable",
+ };
+
+ for (i = 0; i <= GF_SHD_OP_HEAL_DISABLE; i++) {
+ if (heal_cmds[i] && (strcmp(heal_cmds[i], command) == 0))
+ return i;
+ }
+
+ return GF_SHD_OP_INVALID;
}
int
-cli_cmd_volume_clrlks_opts_parse (const char **words, int wordcount,
+cli_cmd_volume_heal_options_parse(const char **words, int wordcount,
dict_t **options)
{
- int ret = -1;
- int i = 0;
- dict_t *dict = NULL;
- char *kind_opts[4] = {"blocked", "granted", "all", NULL};
- char *types[4] = {"inode", "entry", "posix", NULL};
- char *free_ptr = NULL;
+ int ret = 0;
+ dict_t *dict = NULL;
+ gf_xl_afr_op_t op = GF_SHD_OP_INVALID;
- dict = dict_new ();
- if (!dict)
- goto out;
+ dict = dict_new();
+ if (!dict) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to create the dict");
+ ret = -1;
+ goto out;
+ }
- if (strcmp (words[4], "kind"))
- goto out;
+ ret = dict_set_str(dict, "volname", (char *)words[2]);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to set volname");
+ goto out;
+ }
+
+ if (wordcount == 3) {
+ ret = dict_set_int32(dict, "heal-op", GF_SHD_OP_HEAL_INDEX);
+ goto done;
+ }
+
+ if (wordcount == 4) {
+ op = heal_command_type_get(words[3]);
+ if (op == GF_SHD_OP_INVALID) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "heal-op", op);
+ goto done;
+ }
- for (i = 0; kind_opts[i]; i++) {
- if (!strcmp (words[5], kind_opts[i])) {
- free_ptr = gf_strdup (words[5]);
- ret = dict_set_dynstr (dict, "kind", free_ptr);
- if (ret)
- goto out;
- free_ptr = NULL;
- break;
- }
+ if (wordcount == 5) {
+ if (strcmp(words[3], "info") && strcmp(words[3], "statistics") &&
+ strcmp(words[3], "granular-entry-heal")) {
+ ret = -1;
+ goto out;
+ }
+
+ if (!strcmp(words[3], "info")) {
+ if (!strcmp(words[4], "split-brain")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_SPLIT_BRAIN_FILES);
+ goto done;
+ }
+ if (!strcmp(words[4], "summary")) {
+ ret = dict_set_int32(dict, "heal-op", GF_SHD_OP_HEAL_SUMMARY);
+ goto done;
+ }
+ }
+
+ if (!strcmp(words[3], "statistics")) {
+ if (!strcmp(words[4], "heal-count")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_STATISTICS_HEAL_COUNT);
+ goto done;
+ }
+ }
+
+ if (!strcmp(words[3], "granular-entry-heal")) {
+ if (!strcmp(words[4], "enable")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE);
+ goto done;
+ } else if (!strcmp(words[4], "disable")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE);
+ goto done;
+ }
}
- if (i == 3)
- goto out;
ret = -1;
- for (i = 0; types[i]; i++) {
- if (!strcmp (words[6], types[i])) {
- free_ptr = gf_strdup (words[6]);
- ret = dict_set_dynstr (dict, "type", free_ptr);
- if (ret)
- goto out;
- free_ptr = NULL;
- break;
- }
+ goto out;
+ }
+ if (wordcount == 6) {
+ if (strcmp(words[3], "split-brain")) {
+ ret = -1;
+ goto out;
}
- if (i == 3)
+ if (!strcmp(words[4], "bigger-file")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE);
+ if (ret)
+ goto out;
+ ret = dict_set_str(dict, "file", (char *)words[5]);
+ if (ret)
+ goto out;
+ goto done;
+ }
+ if (!strcmp(words[4], "latest-mtime")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME);
+ if (ret)
+ goto out;
+ ret = dict_set_str(dict, "file", (char *)words[5]);
+ if (ret)
+ goto out;
+ goto done;
+ }
+ if (!strcmp(words[4], "source-brick")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+ if (ret)
+ goto out;
+ ret = set_hostname_path_in_dict(words[5], dict,
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+ if (ret)
+ goto out;
+ goto done;
+ }
+ ret = -1;
+ goto out;
+ }
+ if (wordcount == 7) {
+ if (!strcmp(words[3], "statistics") &&
+ !strcmp(words[4], "heal-count") && !strcmp(words[5], "replica")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA);
+ if (ret)
+ goto out;
+ ret = set_hostname_path_in_dict(
+ words[6], dict, GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA);
+ if (ret)
goto out;
+ goto done;
+ }
+ if (!strcmp(words[3], "split-brain") &&
+ !strcmp(words[4], "source-brick")) {
+ ret = dict_set_int32(dict, "heal-op",
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+ ret = set_hostname_path_in_dict(words[5], dict,
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+ if (ret)
+ goto out;
+ ret = dict_set_str(dict, "file", (char *)words[6]);
+ if (ret)
+ goto out;
+ goto done;
+ }
+ }
+ ret = -1;
+ goto out;
+done:
+ *options = dict;
+out:
+ if (ret && dict) {
+ dict_unref(dict);
+ *options = NULL;
+ }
- if (wordcount == 8) {
- free_ptr = gf_strdup (words[7]);
- ret = dict_set_dynstr (dict, "opts", free_ptr);
- if (ret)
- goto out;
- free_ptr = NULL;
+ return ret;
+}
+
+int
+cli_cmd_volume_defrag_parse(const char **words, int wordcount, dict_t **options)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+ char *option = NULL;
+ char *volname = NULL;
+ char *command = NULL;
+ gf_cli_defrag_type cmd = 0;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ if (!((wordcount == 4) || (wordcount == 5)))
+ goto out;
+
+ if (wordcount == 4) {
+ if (strcmp(words[3], "start") && strcmp(words[3], "stop") &&
+ strcmp(words[3], "status"))
+ goto out;
+ } else {
+ if (strcmp(words[3], "fix-layout") && strcmp(words[3], "start"))
+ goto out;
+ }
+
+ volname = (char *)words[2];
+
+ if (wordcount == 4) {
+ command = (char *)words[3];
+ }
+ if (wordcount == 5) {
+ if ((strcmp(words[3], "fix-layout") || strcmp(words[4], "start")) &&
+ (strcmp(words[3], "start") || strcmp(words[4], "force"))) {
+ ret = -1;
+ goto out;
+ }
+ command = (char *)words[3];
+ option = (char *)words[4];
+ }
+
+ if (strcmp(command, "start") == 0) {
+ cmd = GF_DEFRAG_CMD_START;
+ if (option && strcmp(option, "force") == 0) {
+ cmd = GF_DEFRAG_CMD_START_FORCE;
+ }
+ goto done;
+ }
+
+ if (strcmp(command, "fix-layout") == 0) {
+ cmd = GF_DEFRAG_CMD_START_LAYOUT_FIX;
+ goto done;
+ }
+ if (strcmp(command, "stop") == 0) {
+ cmd = GF_DEFRAG_CMD_STOP;
+ goto done;
+ }
+ if (strcmp(command, "status") == 0) {
+ cmd = GF_DEFRAG_CMD_STATUS;
+ }
+
+done:
+ ret = dict_set_str(dict, "volname", volname);
+
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to set dict");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "rebalance-command", (int32_t)cmd);
+
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to set dict");
+ goto out;
+ }
+
+ *options = dict;
+
+out:
+ if (ret && dict)
+ dict_unref(dict);
+
+ return ret;
+}
+
+int32_t
+cli_snap_create_desc_parse(dict_t *dict, const char **words, size_t wordcount,
+ int32_t desc_opt_loc)
+{
+ int32_t ret = -1;
+ char *desc = NULL;
+ int32_t desc_len = 0;
+ int len;
+
+ desc = GF_MALLOC(MAX_SNAP_DESCRIPTION_LEN + 1, gf_common_mt_char);
+ if (!desc) {
+ ret = -1;
+ goto out;
+ }
+
+ len = strlen(words[desc_opt_loc]);
+ if (len >= MAX_SNAP_DESCRIPTION_LEN) {
+ cli_out(
+ "snapshot create: description truncated: "
+ "Description provided is longer than 1024 characters");
+ desc_len = MAX_SNAP_DESCRIPTION_LEN;
+ } else {
+ desc_len = len;
+ }
+
+ snprintf(desc, desc_len + 1, "%s", words[desc_opt_loc]);
+ /* Calculating the size of the description as given by the user */
+
+ ret = dict_set_dynstr(dict, "description", desc);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to save snap "
+ "description");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && desc)
+ GF_FREE(desc);
+
+ return ret;
+}
+
+/* Function to check whether the Volume name is repeated */
+int
+cli_check_if_volname_repeated(const char **words, unsigned int start_index,
+ uint64_t cur_index)
+{
+ uint64_t i = -1;
+ int ret = 0;
+
+ GF_ASSERT(words);
+
+ for (i = start_index; i < cur_index; i++) {
+ if (strcmp(words[i], words[cur_index]) == 0) {
+ ret = -1;
+ goto out;
}
+ }
+out:
+ return ret;
+}
+/* snapshot clone <clonename> <snapname>
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_clone_parse(dict_t *dict, const char **words, int wordcount)
+{
+ uint64_t i = 0;
+ int ret = -1;
+ char *clonename = NULL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot clone)*/
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+
+ if (wordcount == cmdi + 1) {
+ cli_err("Invalid Syntax.");
+ gf_log("cli", GF_LOG_ERROR,
+ "Invalid number of words for snap clone command");
+ goto out;
+ }
+
+ if (strlen(words[cmdi]) >= GLUSTERD_MAX_SNAP_NAME) {
+ cli_err(
+ "snapshot clone: failed: clonename cannot exceed "
+ "255 characters.");
+ gf_log("cli", GF_LOG_ERROR, "Clone name too long");
+
+ goto out;
+ }
+
+ clonename = (char *)words[cmdi];
+ for (i = 0; i < strlen(clonename); i++) {
+ /* Following volume name convention */
+ if (!isalnum(clonename[i]) &&
+ (clonename[i] != '_' && (clonename[i] != '-'))) {
+ /* TODO : Is this message enough?? */
+ cli_err(
+ "Clonename can contain only alphanumeric, "
+ "\"-\" and \"_\" characters");
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32(dict, "volcount", 1);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not save volcount");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "clonename", (char *)words[cmdi]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save clone "
+ "name(%s)",
+ (char *)words[cmdi]);
+ goto out;
+ }
+
+ /* Filling snap name in the dictionary */
+ ret = dict_set_str(dict, "snapname", (char *)words[cmdi + 1]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not "
+ "save snap name(%s)",
+ (char *)words[cmdi + 1]);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* snapshot create <snapname> <vol-name(s)> [description <description>]
+ * [force]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_create_parse(dict_t *dict, const char **words, int wordcount)
+{
+ uint64_t i = 0;
+ int ret = -1;
+ uint64_t volcount = 0;
+ char key[PATH_MAX] = "";
+ char *snapname = NULL;
+ unsigned int cmdi = 2;
+ int flags = 0;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot create)*/
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+
+ if (wordcount <= cmdi + 1) {
+ cli_err("Invalid Syntax.");
+ gf_log("cli", GF_LOG_ERROR, "Too less words for snap create command");
+ goto out;
+ }
+
+ if (strlen(words[cmdi]) >= GLUSTERD_MAX_SNAP_NAME) {
+ cli_err(
+ "snapshot create: failed: snapname cannot exceed "
+ "255 characters.");
+ gf_log("cli", GF_LOG_ERROR, "Snapname too long");
+
+ goto out;
+ }
+
+ snapname = (char *)words[cmdi];
+ for (i = 0; i < strlen(snapname); i++) {
+ /* Following volume name convention */
+ if (!isalnum(snapname[i]) &&
+ (snapname[i] != '_' && (snapname[i] != '-'))) {
+ /* TODO : Is this message enough?? */
+ cli_err(
+ "Snapname can contain only alphanumeric, "
+ "\"-\" and \"_\" characters");
+ goto out;
+ }
+ }
+
+ ret = dict_set_str(dict, "snapname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save snap "
+ "name(%s)",
+ (char *)words[cmdi]);
+ goto out;
+ }
+
+ /* Filling volume name in the dictionary */
+ for (i = cmdi + 1;
+ i < wordcount && (strcmp(words[i], "description")) != 0 &&
+ (strcmp(words[i], "force") != 0) &&
+ (strcmp(words[i], "no-timestamp") != 0);
+ i++) {
+ volcount++;
+ /* volume index starts from 1 */
+ ret = snprintf(key, sizeof(key), "volname%" PRIu64, volcount);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_str(dict, key, (char *)words[i]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not "
+ "save volume name(%s)",
+ (char *)words[i]);
+ goto out;
+ }
+
+ if (i >= cmdi + 2) {
+ ret = -1;
+ cli_err(
+ "Creating multiple volume snapshot is not "
+ "supported as of now");
+ goto out;
+ }
+ /* TODO : remove this above condition check once
+ * multiple volume snapshot is supported */
+ }
+
+ if (volcount == 0) {
+ ret = -1;
+ cli_err("Please provide the volume name");
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "volcount", volcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not save volcount");
+ goto out;
+ }
+
+ /* Verify how we got out of "for" loop,
+ * if it is by reaching wordcount limit then goto "out",
+ * because we need not parse for "description","force" and
+ * "no-timestamp" after this.
+ */
+ if (i == wordcount) {
+ goto out;
+ }
+
+ if (strcmp(words[i], "no-timestamp") == 0) {
+ ret = dict_set_int32n(dict, "no-timestamp", SLEN("no-timestamp"), 1);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save "
+ "time-stamp option");
+ }
+ if (i == (wordcount - 1))
+ goto out;
+ i++;
+ }
+
+ if ((strcmp(words[i], "description")) == 0) {
+ ++i;
+ if (i > (wordcount - 1)) {
+ ret = -1;
+ cli_err("Please provide a description");
+ gf_log("cli", GF_LOG_ERROR, "Description not provided");
+ goto out;
+ }
+
+ ret = cli_snap_create_desc_parse(dict, words, wordcount, i);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save snap "
+ "description");
+ goto out;
+ }
+
+ if (i == (wordcount - 1))
+ goto out;
+ i++;
+ /* point the index to next word.
+ * As description might be follwed by force option.
+ * Before that, check if wordcount limit is reached
+ */
+ }
+
+ if (strcmp(words[i], "force") == 0) {
+ flags = GF_CLI_FLAG_OP_FORCE;
+
+ } else {
+ ret = -1;
+ cli_err("Invalid Syntax.");
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ /* Check if the command has anything after "force" keyword */
+ if (++i < wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (ret == 0) {
+ /*Adding force flag in either of the case i.e force set
+ * or unset*/
+ ret = dict_set_int32(dict, "flags", flags);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save "
+ "snap force option");
+ }
+ }
+ return ret;
+}
+
+/* snapshot list [volname]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_list_parse(dict_t *dict, const char **words, int wordcount)
+{
+ int ret = -1;
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+
+ if (wordcount < 2 || wordcount > 3) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ if (wordcount == 2) {
ret = 0;
- *options = dict;
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "volname", (char *)words[2]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to save volname in dictionary");
+ goto out;
+ }
out:
- if (ret) {
- GF_FREE (free_ptr);
- dict_unref (dict);
- }
+ return ret;
+}
+
+/* snapshot info [(snapname | volume <volname>)]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_info_parse(dict_t *dict, const char **words, int wordcount)
+{
+ int ret = -1;
+ int32_t cmd = GF_SNAP_INFO_TYPE_ALL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot info)*/
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
- return ret;
+ if (wordcount > 4 || wordcount < cmdi) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid syntax");
+ goto out;
+ }
+
+ if (wordcount == cmdi) {
+ ret = 0;
+ goto out;
+ }
+
+ /* If 3rd word is not "volume", then it must
+ * be snapname.
+ */
+ if (strcmp(words[cmdi], "volume") != 0) {
+ ret = dict_set_str(dict, "snapname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to save "
+ "snapname %s",
+ words[cmdi]);
+ goto out;
+ }
+
+ /* Once snap name is parsed, if we encounter any other
+ * word then fail it. Invalid Syntax.
+ * example : snapshot info <snapname> word
+ */
+ if ((cmdi + 1) != wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ cmd = GF_SNAP_INFO_TYPE_SNAP;
+ ret = 0;
+ goto out;
+ /* No need to continue the parsing once we
+ * get the snapname
+ */
+ }
+
+ /* If 3rd word is "volume", then check if next word
+ * is present. As, "snapshot info volume" is an
+ * invalid command.
+ */
+ if ((cmdi + 1) == wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "volname", (char *)words[wordcount - 1]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save "
+ "volume name %s",
+ words[wordcount - 1]);
+ goto out;
+ }
+ cmd = GF_SNAP_INFO_TYPE_VOL;
+out:
+ if (ret == 0) {
+ ret = dict_set_int32(dict, "sub-cmd", cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save "
+ "type of snapshot info");
+ }
+ }
+ return ret;
}
+/* snapshot restore <snapname>
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
int
-cli_cmd_volume_heal_options_parse (const char **words, int wordcount,
- dict_t **options)
+cli_snap_restore_parse(dict_t *dict, const char **words, int wordcount,
+ struct cli_state *state)
{
- int ret = 0;
- dict_t *dict = NULL;
+ int ret = -1;
+ const char *question = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
- dict = dict_new ();
- if (!dict)
- goto out;
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+
+ if (wordcount != 3) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
- ret = dict_set_str (dict, "volname", (char *) words[2]);
+ ret = dict_set_str(dict, "snapname", (char *)words[2]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to save snap-name %s", words[2]);
+ goto out;
+ }
+
+ question =
+ "Restore operation will replace the "
+ "original volume with the snapshotted volume. "
+ "Do you still want to continue?";
+
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 1;
+ gf_log("cli", GF_LOG_ERROR,
+ "User cancelled a snapshot "
+ "restore operation for snap %s",
+ (char *)words[2]);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+/* snapshot activate <snapname> [force]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_activate_parse(dict_t *dict, const char **words, int wordcount)
+{
+ int ret = -1;
+ int flags = 0;
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+
+ if ((wordcount < 3) || (wordcount > 4)) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "snapname", (char *)words[2]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to save snap-name %s", words[2]);
+ goto out;
+ }
+
+ if (wordcount == 4) {
+ if (!strcmp("force", (char *)words[3])) {
+ flags = GF_CLI_FLAG_OP_FORCE;
+ } else {
+ gf_log("cli", GF_LOG_ERROR, "Invalid option");
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = dict_set_int32(dict, "flags", flags);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to save force option");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+/* snapshot deactivate <snapname>
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ * 1 if user cancelled the request
+ */
+int
+cli_snap_deactivate_parse(dict_t *dict, const char **words, int wordcount,
+ struct cli_state *state)
+{
+ int ret = -1;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question =
+ "Deactivating snap will make its "
+ "data inaccessible. Do you want to "
+ "continue?";
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+
+ if ((wordcount != 3)) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "snapname", (char *)words[2]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to save snap-name %s", words[2]);
+ goto out;
+ }
+
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 1;
+ gf_log("cli", GF_LOG_DEBUG,
+ "User cancelled "
+ "snapshot deactivate operation");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+/* snapshot delete (all | snapname | volume <volname>)
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ * 1 if user cancel the operation
+ */
+int
+cli_snap_delete_parse(dict_t *dict, const char **words, int wordcount,
+ struct cli_state *state)
+{
+ int ret = -1;
+ const char *question = NULL;
+ int32_t cmd = -1;
+ unsigned int cmdi = 2;
+ gf_answer_t answer = GF_ANSWER_NO;
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+
+ if (wordcount > 4 || wordcount <= cmdi) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ question =
+ "Deleting snap will erase all the information about "
+ "the snap. Do you still want to continue?";
+
+ if (strcmp(words[cmdi], "all") == 0) {
+ ret = 0;
+ cmd = GF_SNAP_DELETE_TYPE_ALL;
+ } else if (strcmp(words[cmdi], "volume") == 0) {
+ if (++cmdi == wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "volname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save "
+ "volume name %s",
+ words[wordcount - 1]);
+ goto out;
+ }
+ cmd = GF_SNAP_DELETE_TYPE_VOL;
+ } else {
+ ret = dict_set_str(dict, "snapname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to save "
+ "snapname %s",
+ words[2]);
+ goto out;
+ }
+ cmd = GF_SNAP_DELETE_TYPE_SNAP;
+ }
+
+ if ((cmdi + 1) != wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ if (cmd == GF_SNAP_DELETE_TYPE_SNAP) {
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 1;
+ gf_log("cli", GF_LOG_DEBUG,
+ "User cancelled "
+ "snapshot delete operation for snap %s",
+ (char *)words[2]);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32(dict, "sub-cmd", cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save "
+ "type of snapshot delete");
+ }
+out:
+ return ret;
+}
+
+/* snapshot status [(snapname | volume <volname>)]
+ * @arg-0, dict : Request Dictionary to be sent to server side.
+ * @arg-1, words : Contains individual words of CLI command.
+ * @arg-2, wordcount: Contains number of words present in the CLI command.
+ *
+ * return value : -1 on failure
+ * 0 on success
+ */
+int
+cli_snap_status_parse(dict_t *dict, const char **words, int wordcount)
+{
+ int ret = -1;
+ int32_t cmd = GF_SNAP_STATUS_TYPE_ALL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot status)*/
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+
+ if (wordcount > 4 || wordcount < cmdi) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ if (wordcount == cmdi) {
+ ret = 0;
+ goto out;
+ }
+
+ /* if 3rd word is not "volume", then it must be "snapname"
+ */
+ if (strcmp(words[cmdi], "volume") != 0) {
+ ret = dict_set_str(dict, "snapname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Count not save "
+ "snap name %s",
+ words[cmdi]);
+ goto out;
+ }
+
+ if ((cmdi + 1) != wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = 0;
+ cmd = GF_SNAP_STATUS_TYPE_SNAP;
+ goto out;
+ }
+
+ /* If 3rd word is "volume", then check if next word is present.
+ * As, "snapshot info volume" is an invalid command
+ */
+ if ((cmdi + 1) == wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "volname", (char *)words[wordcount - 1]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Count not save "
+ "volume name %s",
+ words[wordcount - 1]);
+ goto out;
+ }
+ cmd = GF_SNAP_STATUS_TYPE_VOL;
+
+out:
+ if (ret == 0) {
+ ret = dict_set_int32(dict, "sub-cmd", cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save cmd "
+ "of snapshot status");
+ }
+ }
+
+ return ret;
+}
+
+/* return value:
+ * -1 in case of failure.
+ * 0 in case of success.
+ */
+int32_t
+cli_snap_config_limit_parse(const char **words, dict_t *dict,
+ unsigned int wordcount, unsigned int index,
+ char *key)
+{
+ int ret = -1;
+ int limit = 0;
+ char *end_ptr = NULL;
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+ GF_ASSERT(key);
+
+ if (index >= wordcount) {
+ ret = -1;
+ cli_err("Please provide a value for %s.", key);
+ gf_log("cli", GF_LOG_ERROR, "Value not provided for %s", key);
+ goto out;
+ }
+
+ limit = strtol(words[index], &end_ptr, 10);
+
+ if (limit <= 0 || strcmp(end_ptr, "") != 0) {
+ ret = -1;
+ cli_err(
+ "Please enter an integer value "
+ "greater than zero for %s",
+ key);
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, key, limit);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not set "
+ "%s in dictionary",
+ key);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(dict, "globalname", "All");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not set global key");
+ goto out;
+ }
+ ret = dict_set_int32(dict, "hold_global_locks", _gf_true);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not set global locks");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+/* function cli_snap_config_parse
+ * Config Syntax : gluster snapshot config [volname]
+ * [snap-max-hard-limit <count>]
+ * [snap-max-soft-limit <count>]
+ *
+ return value: <0 on failure
+ 1 if user cancels the operation, or limit value is out of
+ range
+ 0 on success
+
+ NOTE : snap-max-soft-limit can only be set for system.
+*/
+int32_t
+cli_snap_config_parse(const char **words, int wordcount, dict_t *dict,
+ struct cli_state *state)
+{
+ int ret = -1;
+ gf_answer_t answer = GF_ANSWER_NO;
+ gf_boolean_t vol_presence = _gf_false;
+ struct snap_config_opt_vals_ *conf_vals = NULL;
+ int8_t hard_limit = 0;
+ int8_t soft_limit = 0;
+ int8_t config_type = -1;
+ const char *question = NULL;
+ unsigned int cmdi = 2;
+ /* cmdi is command index, here cmdi is "2" (gluster snapshot config)*/
+
+ GF_ASSERT(words);
+ GF_ASSERT(dict);
+ GF_ASSERT(state);
+
+ if ((wordcount < 2) || (wordcount > 7)) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid wordcount(%d)", wordcount);
+ goto out;
+ }
+
+ if (wordcount == 2) {
+ config_type = GF_SNAP_CONFIG_DISPLAY;
+ ret = 0;
+ goto set;
+ }
+
+ /* auto-delete cannot be a volume name */
+ /* Check whether the 3rd word is volname */
+ if (strcmp(words[cmdi], "snap-max-hard-limit") != 0 &&
+ strcmp(words[cmdi], "snap-max-soft-limit") != 0 &&
+ strcmp(words[cmdi], "auto-delete") != 0 &&
+ strcmp(words[cmdi], "activate-on-create") != 0) {
+ ret = dict_set_str(dict, "volname", (char *)words[cmdi]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to set volname");
+ goto out;
+ }
+ cmdi++;
+ vol_presence = _gf_true;
+
+ if (cmdi == wordcount) {
+ config_type = GF_SNAP_CONFIG_DISPLAY;
+ ret = 0;
+ goto set;
+ }
+ }
+
+ config_type = GF_SNAP_CONFIG_TYPE_SET;
+
+ if (strcmp(words[cmdi], "snap-max-hard-limit") == 0) {
+ ret = cli_snap_config_limit_parse(words, dict, wordcount, ++cmdi,
+ "snap-max-hard-limit");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse snap "
+ "config hard limit");
+ goto out;
+ }
+ hard_limit = 1;
+
+ if (++cmdi == wordcount) {
+ ret = 0;
+ goto set;
+ }
+ }
+
+ if (strcmp(words[cmdi], "snap-max-soft-limit") == 0) {
+ if (vol_presence == 1) {
+ ret = -1;
+ cli_err(
+ "Soft limit cannot be set to individual "
+ "volumes.");
+ gf_log("cli", GF_LOG_ERROR,
+ "Soft limit cannot be "
+ "set to volumes");
+ goto out;
+ }
+
+ ret = cli_snap_config_limit_parse(words, dict, wordcount, ++cmdi,
+ "snap-max-soft-limit");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse snap "
+ "config soft limit");
+ goto out;
+ }
+
+ if (++cmdi != wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+ soft_limit = 1;
+ }
+
+ if (hard_limit || soft_limit)
+ goto set;
+
+ if (strcmp(words[cmdi], "auto-delete") == 0) {
+ if (vol_presence == 1) {
+ ret = -1;
+ cli_err(
+ "As of now, auto-delete option cannot be set "
+ "to volumes");
+ gf_log("cli", GF_LOG_ERROR,
+ "auto-delete option "
+ "cannot be set to volumes");
+ goto out;
+ }
+
+ if (++cmdi >= wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "auto-delete", (char *)words[cmdi]);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to set volname");
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set "
+ "value of auto-delete in request "
+ "dictionary");
+ goto out;
+ }
+
+ if (++cmdi != wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+ } else if (strcmp(words[cmdi], "activate-on-create") == 0) {
+ if (vol_presence == 1) {
+ ret = -1;
+ cli_err(
+ "As of now, activate-on-create option "
+ "cannot be set to volumes");
+ gf_log("cli", GF_LOG_ERROR,
+ "activate-on-create "
+ "option cannot be set to volumes");
+ goto out;
+ }
+
+ if (++cmdi >= wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "snap-activate-on-create",
+ (char *)words[cmdi]);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set value "
+ "of activate-on-create in request dictionary");
+ goto out;
+ }
+
+ if (++cmdi != wordcount) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+ } else {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = 0; /* Success */
+
+set:
+ ret = dict_set_int32(dict, "config-command", config_type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to set "
+ "config-command");
+ goto out;
+ }
+
+ if (config_type == GF_SNAP_CONFIG_TYPE_SET && (hard_limit || soft_limit)) {
+ conf_vals = snap_confopt_vals;
+ if (hard_limit && soft_limit) {
+ question = conf_vals[GF_SNAP_CONFIG_SET_BOTH].question;
+ } else if (soft_limit) {
+ question = conf_vals[GF_SNAP_CONFIG_SET_SOFT].question;
+ } else if (hard_limit) {
+ question = conf_vals[GF_SNAP_CONFIG_SET_HARD].question;
+ }
+
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 1;
+ gf_log("cli", GF_LOG_DEBUG,
+ "User cancelled "
+ "snapshot config operation");
+ }
+ }
+
+out:
+ return ret;
+}
+
+int
+validate_op_name(const char *op, const char *opname, char **opwords)
+{
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT(opname);
+ GF_ASSERT(opwords);
+
+ for (i = 0; opwords[i] != NULL; i++) {
+ if (strcmp(opwords[i], opname) == 0) {
+ cli_out("\"%s\" cannot be a %s", opname, op);
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+cli_cmd_snapshot_parse(const char **words, int wordcount, dict_t **options,
+ struct cli_state *state)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ gf1_cli_snapshot type = GF_SNAP_OPTION_TYPE_NONE;
+ char *w = NULL;
+ static char *opwords[] = {"create", "delete", "restore", "activate",
+ "deactivate", "list", "status", "config",
+ "info", "clone", NULL};
+ static char *invalid_snapnames[] = {"description", "force", "volume", "all",
+ NULL};
+ static char *invalid_volnames[] = {"volume",
+ "type",
+ "subvolumes",
+ "option",
+ "end-volume",
+ "all",
+ "volume_not_in_ring",
+ "description",
+ "force",
+ "snap-max-hard-limit",
+ "snap-max-soft-limit",
+ "auto-delete",
+ "activate-on-create",
+ NULL};
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+ GF_ASSERT(state);
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ /* Lowest wordcount possible */
+ if (wordcount < 2) {
+ gf_log("", GF_LOG_ERROR, "Invalid command: Not enough arguments");
+ goto out;
+ }
+
+ w = str_getunamb(words[1], opwords);
+ if (!w) {
+ /* Checks if the operation is a valid operation */
+ gf_log("", GF_LOG_ERROR, "Opword Mismatch");
+ goto out;
+ }
+
+ if (!strcmp(w, "create")) {
+ type = GF_SNAP_OPTION_TYPE_CREATE;
+ } else if (!strcmp(w, "list")) {
+ type = GF_SNAP_OPTION_TYPE_LIST;
+ } else if (!strcmp(w, "info")) {
+ type = GF_SNAP_OPTION_TYPE_INFO;
+ } else if (!strcmp(w, "delete")) {
+ type = GF_SNAP_OPTION_TYPE_DELETE;
+ } else if (!strcmp(w, "config")) {
+ type = GF_SNAP_OPTION_TYPE_CONFIG;
+ } else if (!strcmp(w, "restore")) {
+ type = GF_SNAP_OPTION_TYPE_RESTORE;
+ } else if (!strcmp(w, "status")) {
+ type = GF_SNAP_OPTION_TYPE_STATUS;
+ } else if (!strcmp(w, "activate")) {
+ type = GF_SNAP_OPTION_TYPE_ACTIVATE;
+ } else if (!strcmp(w, "deactivate")) {
+ type = GF_SNAP_OPTION_TYPE_DEACTIVATE;
+ } else if (!strcmp(w, "clone")) {
+ type = GF_SNAP_OPTION_TYPE_CLONE;
+ }
+
+ if (type != GF_SNAP_OPTION_TYPE_CONFIG &&
+ type != GF_SNAP_OPTION_TYPE_STATUS) {
+ ret = dict_set_int32(dict, "hold_snap_locks", _gf_true);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to set hold-snap-locks value "
+ "as _gf_true");
+ goto out;
+ }
+ }
+
+ /* Following commands does not require volume locks */
+ if (type == GF_SNAP_OPTION_TYPE_STATUS ||
+ type == GF_SNAP_OPTION_TYPE_ACTIVATE ||
+ type == GF_SNAP_OPTION_TYPE_DEACTIVATE) {
+ ret = dict_set_int32(dict, "hold_vol_locks", _gf_false);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Setting volume lock "
+ "flag failed");
+ goto out;
+ }
+ }
+
+ /* Check which op is intended */
+ switch (type) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ /* Syntax :
+ * gluster snapshot create <snapname> <vol-name(s)>
+ * [no-timestamp]
+ * [description <description>]
+ * [force]
+ */
+ /* In cases where the snapname is not given then
+ * parsing fails & snapname cannot be "description",
+ * "force" and "volume", that check is made here
+ */
+ if (wordcount == 2) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
+ goto out;
+ }
+
+ ret = validate_op_name("snapname", words[2], invalid_snapnames);
+ if (ret) {
+ goto out;
+ }
+
+ ret = cli_snap_create_parse(dict, words, wordcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "create command parsing failed.");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ /* Syntax :
+ * gluster snapshot clone <clonename> <snapname>
+ */
+ /* In cases where the clonename is not given then
+ * parsing fails & snapname cannot be "description",
+ * "force" and "volume", that check is made here
+ */
+ if (wordcount == 2) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid Syntax");
goto out;
+ }
+
+ ret = validate_op_name("clonename", words[2], invalid_volnames);
+ if (ret) {
+ goto out;
+ }
+
+ ret = cli_snap_clone_parse(dict, words, wordcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "clone command parsing failed.");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_INFO:
+ /* Syntax :
+ * gluster snapshot info [(snapname] | [vol <volname>)]
+ */
+ ret = cli_snap_info_parse(dict, words, wordcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse "
+ "snapshot info command");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_LIST:
+ /* Syntax :
+ * gluster snaphsot list [volname]
+ */
+
+ ret = cli_snap_list_parse(dict, words, wordcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse "
+ "snapshot list command");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ /* Syntax :
+ * snapshot delete (all | snapname | volume <volname>)
+ */
+ ret = cli_snap_delete_parse(dict, words, wordcount, state);
+ if (ret) {
+ /* A positive ret value means user cancelled
+ * the command */
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse "
+ "snapshot delete command");
+ }
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ /* snapshot config [volname] [snap-max-hard-limit <count>]
+ * [snap-max-soft-limit <percent>] */
+ ret = cli_snap_config_parse(words, wordcount, dict, state);
+ if (ret) {
+ if (ret < 0)
+ gf_log("cli", GF_LOG_ERROR,
+ "config command parsing failed.");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "type", GF_SNAP_OPTION_TYPE_CONFIG);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to set "
+ "config type");
+ ret = -1;
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_STATUS: {
+ /* Syntax :
+ * gluster snapshot status [(snapname |
+ * volume <volname>)]
+ */
+ ret = cli_snap_status_parse(dict, words, wordcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse "
+ "snapshot status command");
+ goto out;
+ }
+ break;
}
- if (wordcount == 3) {
- ret = dict_set_int32 (dict, "heal-op", GF_AFR_OP_HEAL_INDEX);
- goto done;
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ /* Syntax:
+ * snapshot restore <snapname>
+ */
+ ret = cli_snap_restore_parse(dict, words, wordcount, state);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse "
+ "restore command");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_ACTIVATE:
+ /* Syntax:
+ * snapshot activate <snapname> [force]
+ */
+ ret = cli_snap_activate_parse(dict, words, wordcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse "
+ "start command");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_DEACTIVATE:
+ /* Syntax:
+ * snapshot deactivate <snapname>
+ */
+ ret = cli_snap_deactivate_parse(dict, words, wordcount, state);
+ if (ret) {
+ /* A positive ret value means user cancelled
+ * the command */
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to parse deactivate "
+ "command");
+ }
+ goto out;
+ }
+ break;
+
+ default:
+ ret = -1;
+ gf_log("", GF_LOG_ERROR, "Opword Mismatch");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "type", type);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Failed to set type.");
+ goto out;
+ }
+ /* If you got so far, input is valid */
+ ret = 0;
+out:
+ if (ret) {
+ if (dict)
+ dict_unref(dict);
+ } else
+ *options = dict;
+
+ return ret;
+}
+
+int
+cli_cmd_validate_volume(char *volname)
+{
+ int i = 0;
+ int ret = -1;
+ int volname_len;
+
+ if (volname[0] == '-')
+ return ret;
+
+ if (!strcmp(volname, "all")) {
+ cli_err("\"all\" cannot be the name of a volume.");
+ return ret;
+ }
+
+ if (strchr(volname, '/')) {
+ cli_err("Volume name should not contain \"/\" character.");
+ return ret;
+ }
+
+ volname_len = strlen(volname);
+ if (volname_len > GD_VOLUME_NAME_MAX) {
+ cli_err("Volname can not exceed %d characters.", GD_VOLUME_NAME_MAX);
+ return ret;
+ }
+
+ for (i = 0; i < volname_len; i++)
+ if (!isalnum(volname[i]) && (volname[i] != '_') &&
+ (volname[i] != '-')) {
+ cli_err(
+ "Volume name should not contain \"%c\""
+ " character.\nVolume names can only"
+ "contain alphanumeric, '-' and '_' "
+ "characters.",
+ volname[i]);
+ return ret;
+ }
+
+ ret = 0;
+
+ return ret;
+}
+
+int32_t
+cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **options)
+{
+ int32_t ret = -1;
+ char *w = NULL;
+ char *volname = NULL;
+ static char *opwords[] = {"enable", "disable", "scrub-throttle",
+ "scrub-frequency", "scrub", "signing-time",
+ "signer-threads", NULL};
+ static char *scrub_throt_values[] = {"lazy", "normal", "aggressive", NULL};
+ static char *scrub_freq_values[] = {
+ "hourly", "daily", "weekly", "biweekly", "monthly", "minute", NULL};
+ static char *scrub_values[] = {"pause", "resume", "status", "ondemand",
+ NULL};
+ dict_t *dict = NULL;
+ gf_bitrot_type type = GF_BITROT_OPTION_TYPE_NONE;
+ int32_t expiry_time = 0;
+ int32_t signer_th_count = 0;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ /* Hack to print out bitrot help properly */
+ if ((wordcount == 3) && !(strcmp(words[2], "help"))) {
+ ret = 1;
+ return ret;
+ }
+
+ if (wordcount < 4 || wordcount > 5) {
+ gf_log("cli", GF_LOG_ERROR, "Invalid syntax");
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ volname = (char *)words[2];
+ if (!volname) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = cli_cmd_validate_volume(volname);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to validate volume name");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret) {
+ cli_out("Failed to set volume name in dictionary ");
+ goto out;
+ }
+
+ w = str_getunamb(words[3], opwords);
+ if (!w) {
+ cli_out("Invalid bit rot option : %s", words[3]);
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(w, "enable") == 0) {
+ if (wordcount == 4) {
+ type = GF_BITROT_OPTION_TYPE_ENABLE;
+ ret = 0;
+ goto set_type;
+ } else {
+ ret = -1;
+ goto out;
}
+ }
+ if (strcmp(w, "disable") == 0) {
if (wordcount == 4) {
- if (!strcmp (words[3], "full")) {
- ret = dict_set_int32 (dict, "heal-op",
- GF_AFR_OP_HEAL_FULL);
- goto done;
- } else if (!strcmp (words[3], "info")) {
- ret = dict_set_int32 (dict, "heal-op",
- GF_AFR_OP_INDEX_SUMMARY);
- goto done;
- } else {
- ret = -1;
- goto out;
- }
+ type = GF_BITROT_OPTION_TYPE_DISABLE;
+ ret = 0;
+ goto set_type;
+ } else {
+ ret = -1;
+ goto out;
}
- if (wordcount == 5) {
- if (strcmp (words[3], "info")) {
- ret = -1;
- goto out;
+ }
+
+ if (!strcmp(w, "scrub-throttle")) {
+ if (!words[4]) {
+ cli_err(
+ "Missing scrub-throttle value for bitrot "
+ "option");
+ ret = -1;
+ goto out;
+ } else {
+ w = str_getunamb(words[4], scrub_throt_values);
+ if (!w) {
+ cli_err(
+ "Invalid scrub-throttle option for "
+ "bitrot");
+ ret = -1;
+ goto out;
+ } else {
+ type = GF_BITROT_OPTION_TYPE_SCRUB_THROTTLE;
+ ret = dict_set_str(dict, "scrub-throttle-value",
+ (char *)words[4]);
+ if (ret) {
+ cli_out(
+ "Failed to set scrub-throttle "
+ "value in the dict");
+ goto out;
}
- if (!strcmp (words[4], "healed")) {
- ret = dict_set_int32 (dict, "heal-op",
- GF_AFR_OP_HEALED_FILES);
- goto done;
+ goto set_type;
+ }
+ }
+ }
+
+ if (!strcmp(words[3], "scrub-frequency")) {
+ if (!words[4]) {
+ cli_err("Missing scrub-frequency value");
+ ret = -1;
+ goto out;
+ } else {
+ w = str_getunamb(words[4], scrub_freq_values);
+ if (!w) {
+ cli_err("Invalid frequency option for bitrot");
+ ret = -1;
+ goto out;
+ } else {
+ type = GF_BITROT_OPTION_TYPE_SCRUB_FREQ;
+ ret = dict_set_str(dict, "scrub-frequency-value",
+ (char *)words[4]);
+ if (ret) {
+ cli_out(
+ "Failed to set dict for "
+ "bitrot");
+ goto out;
}
- if (!strcmp (words[4], "heal-failed")) {
- ret = dict_set_int32 (dict, "heal-op",
- GF_AFR_OP_HEAL_FAILED_FILES);
- goto done;
+ goto set_type;
+ }
+ }
+ }
+
+ if (!strcmp(words[3], "scrub")) {
+ if (!words[4]) {
+ cli_err("Missing scrub value for bitrot option");
+ ret = -1;
+ goto out;
+ } else {
+ w = str_getunamb(words[4], scrub_values);
+ if (!w) {
+ cli_err("Invalid scrub option for bitrot");
+ ret = -1;
+ goto out;
+ } else {
+ if (strcmp(words[4], "status") == 0) {
+ type = GF_BITROT_CMD_SCRUB_STATUS;
+ } else if (strcmp(words[4], "ondemand") == 0) {
+ type = GF_BITROT_CMD_SCRUB_ONDEMAND;
+ } else {
+ type = GF_BITROT_OPTION_TYPE_SCRUB;
}
- if (!strcmp (words[4], "split-brain")) {
- ret = dict_set_int32 (dict, "heal-op",
- GF_AFR_OP_SPLIT_BRAIN_FILES);
- goto done;
+ ret = dict_set_str(dict, "scrub-value", (char *)words[4]);
+ if (ret) {
+ cli_out(
+ "Failed to set dict for "
+ "bitrot");
+ goto out;
}
+ goto set_type;
+ }
+ }
+ }
+
+ if (!strcmp(words[3], "signing-time")) {
+ if (!words[4]) {
+ cli_err(
+ "Missing signing-time value for bitrot "
+ "option");
+ ret = -1;
+ goto out;
+ } else {
+ type = GF_BITROT_OPTION_TYPE_EXPIRY_TIME;
+
+ expiry_time = strtol(words[4], NULL, 0);
+ if (expiry_time < 1) {
+ cli_err(
+ "Expiry time value should not be less"
+ " than 1");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_uint32(dict, "expiry-time",
+ (unsigned int)expiry_time);
+ if (ret) {
+ cli_out("Failed to set dict for bitrot");
+ goto out;
+ }
+ goto set_type;
+ }
+ } else if (!strcmp(words[3], "signer-threads")) {
+ if (!words[4]) {
+ cli_err(
+ "Missing signer-thread value for bitrot "
+ "option");
+ ret = -1;
+ goto out;
+ } else {
+ type = GF_BITROT_OPTION_TYPE_SIGNER_THREADS;
+
+ signer_th_count = strtol(words[4], NULL, 0);
+ if (signer_th_count < 1) {
+ cli_err("signer-thread count should not be less than 1");
ret = -1;
goto out;
+ }
+
+ ret = dict_set_uint32(dict, "signer-threads",
+ (unsigned int)signer_th_count);
+ if (ret) {
+ cli_out("Failed to set dict for bitrot");
+ goto out;
+ }
+ goto set_type;
}
+ } else {
+ cli_err(
+ "Invalid option %s for bitrot. Please enter valid "
+ "bitrot option",
+ words[3]);
ret = -1;
goto out;
-done:
- *options = dict;
+ }
+set_type:
+ ret = dict_set_int32(dict, "type", type);
+ if (ret < 0)
+ goto out;
+
+ *options = dict;
+
out:
- if (ret && dict) {
- dict_unref (dict);
- *options = NULL;
- }
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to parse bitrot command");
+ if (dict)
+ dict_unref(dict);
+ }
- return ret;
+ return ret;
+}
+
+/* Parsing global option for NFS-Ganesha config
+ * gluster nfs-ganesha enable/disable */
+
+int32_t
+cli_cmd_ganesha_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **op_errstr)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+ char *key = NULL;
+ char *value = NULL;
+ char *w = NULL;
+ static char *opwords[] = {"enable", "disable", NULL};
+ const char *question = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+
+ GF_ASSERT(words);
+ GF_ASSERT(options);
+
+ dict = dict_new();
+
+ if (!dict)
+ goto out;
+
+ if (wordcount != 2)
+ goto out;
+
+ key = (char *)words[0];
+ value = (char *)words[1];
+
+ if (!key || !value) {
+ cli_out("Usage : nfs-ganesha <enable/disable>");
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_strip_whitespace(value, strlen(value));
+ if (ret == -1)
+ goto out;
+
+ if (strcmp(key, "nfs-ganesha")) {
+ gf_asprintf(op_errstr,
+ "Global option: error: ' %s '"
+ "is not a valid global option.",
+ key);
+ ret = -1;
+ goto out;
+ }
+
+ w = str_getunamb(value, opwords);
+ if (!w) {
+ cli_out(
+ "Invalid global option \n"
+ "Usage : nfs-ganesha <enable/disable>");
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(value, "enable") == 0) {
+ question =
+ "Enabling NFS-Ganesha requires Gluster-NFS to be "
+ "disabled across the trusted pool. Do you "
+ "still want to continue?\n";
+ } else if (strcmp(value, "disable") == 0) {
+ question =
+ "Disabling NFS-Ganesha will tear down the entire "
+ "ganesha cluster across the trusted pool. Do you "
+ "still want to continue?\n";
+ } else {
+ ret = -1;
+ goto out;
+ }
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Global operation "
+ "cancelled, exiting");
+ ret = -1;
+ goto out;
+ }
+ cli_out("This will take a few minutes to complete. Please wait ..");
+
+ ret = dict_set_str(dict, "key", key);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set on key failed");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "value", value);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set on value failed");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "globalname", "All");
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "dict set on global"
+ " key failed.");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "hold_global_locks", _gf_true);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "dict set on global key "
+ "failed.");
+ goto out;
+ }
+
+ *options = dict;
+out:
+ if (ret)
+ dict_unref(dict);
+
+ return ret;
}
diff --git a/cli/src/cli-cmd-peer.c b/cli/src/cli-cmd-peer.c
index 9b478661155..084998701d8 100644
--- a/cli/src/cli-cmd-peer.c
+++ b/cli/src/cli-cmd-peer.c
@@ -1,260 +1,317 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "cli.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
#include "cli1-xdr.h"
#include "protocol-common.h"
+#include <glusterfs/events.h>
-extern struct rpc_clnt *global_rpc;
-
-extern rpc_clnt_prog_t *cli_rpc_prog;
-
-int cli_cmd_peer_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+int
+cli_cmd_peer_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
const char **words, int wordcount);
int
-cli_cmd_peer_probe_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_peer_probe_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
- int sent = 0;
- int parse_error = 0;
-
- if (!(wordcount == 3)) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ if (!(wordcount == 3)) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_PROBE];
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str(dict, "hostname", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ ret = valid_internet_address((char *)words[2], _gf_false, _gf_false);
+ if (ret == 1) {
+ ret = 0;
+ } else {
+ cli_out("%s is an invalid address", words[2]);
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ ret = -1;
+ goto out;
+ }
+ /* if (words[3]) {
+ ret = dict_set_str (dict, "port", (char *)words[3]);
+ if (ret)
+ goto out;
+ }
+ */
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, dict);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, dict);
+ }
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_PROBE];
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Peer probe failed");
+ }
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ CLI_STACK_DESTROY(frame);
- dict = dict_new ();
- if (!dict)
- goto out;
+ if (ret == 0) {
+ gf_event(EVENT_PEER_ATTACH, "host=%s", (char *)words[2]);
+ }
- ret = dict_set_str (dict, "hostname", (char *)words[2]);
- if (ret)
- goto out;
-
- ret = valid_internet_address ((char *) words[2], _gf_false);
- if (ret == 1) {
- ret = 0;
- } else {
- cli_out ("%s is an invalid address", words[2]);
- cli_usage_out (word->pattern);
- parse_error = 1;
- ret = -1;
- goto out;
- }
-/* if (words[3]) {
- ret = dict_set_str (dict, "port", (char *)words[3]);
- if (ret)
- goto out;
- }
-*/
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
+ return ret;
+}
+
+int
+cli_cmd_peer_deprobe_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ int flags = 0;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question = NULL;
+
+ if ((wordcount < 3) || (wordcount > 4)) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+ question =
+ "All clients mounted through the peer which is getting detached need "
+ "to be remounted using one of the other active peers in the trusted "
+ "storage pool to ensure client gets notification on any changes done "
+ "on the gluster configuration and if the same has been done do you "
+ "want to proceed?";
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DEPROBE];
+
+ dict = dict_new();
+
+ ret = dict_set_str(dict, "hostname", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ /* if (words[3]) {
+ ret = dict_set_str (dict, "port", (char *)words[3]);
+ if (ret)
+ goto out;
+ }
+ */
+ if (wordcount == 4) {
+ if (!strcmp("force", words[3]))
+ flags |= GF_CLI_FLAG_OP_FORCE;
+ else {
+ ret = -1;
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
}
+ }
+ ret = dict_set_int32(dict, "flags", flags);
+ if (ret)
+ goto out;
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, dict);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, dict);
+ }
out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Peer probe failed");
- }
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Peer detach failed");
+ }
- CLI_STACK_DESTROY (frame);
+ CLI_STACK_DESTROY(frame);
- return ret;
-}
+ if (ret == 0) {
+ gf_event(EVENT_PEER_DETACH, "host=%s", (char *)words[2]);
+ }
+ return ret;
+}
int
-cli_cmd_peer_deprobe_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_peer_status_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
- int flags = 0;
- int sent = 0;
- int parse_error = 0;
-
- if ((wordcount < 3) || (wordcount > 4)) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DEPROBE];
+ if (wordcount != 2) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LIST_FRIENDS];
- dict = dict_new ();
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
- ret = dict_set_str (dict, "hostname", (char *)words[2]);
- if (ret)
- goto out;
-
-/* if (words[3]) {
- ret = dict_set_str (dict, "port", (char *)words[3]);
- if (ret)
- goto out;
- }
-*/
- if (wordcount == 4) {
- if (!strcmp("force", words[3]))
- flags |= GF_CLI_FLAG_OP_FORCE;
- else {
- ret = -1;
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
- }
- ret = dict_set_int32 (dict, "flags", flags);
- if (ret)
- goto out;
-
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
- }
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, (void *)GF_CLI_LIST_PEERS);
+ }
out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Peer detach failed");
- }
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Peer status failed");
+ }
- CLI_STACK_DESTROY (frame);
+ CLI_STACK_DESTROY(frame);
- return ret;
+ return ret;
}
int
-cli_cmd_peer_status_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_pool_list_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- int sent = 0;
- int parse_error = 0;
-
- if (wordcount != 2) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LIST_FRIENDS];
+ if (wordcount != 2) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LIST_FRIENDS];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, (char *)words[1] );
- }
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, (void *)GF_CLI_LIST_POOL_NODES);
+ }
out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Peer status failed");
- }
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_err("pool list: command execution failed");
+ }
- CLI_STACK_DESTROY (frame);
+ CLI_STACK_DESTROY(frame);
- return ret;
+ return ret;
}
struct cli_cmd cli_probe_cmds[] = {
- { "peer probe <HOSTNAME>",
- cli_cmd_peer_probe_cbk,
- "probe peer specified by <HOSTNAME>"},
+ {"peer probe { <HOSTNAME> | <IP-address> }", cli_cmd_peer_probe_cbk,
+ "probe peer specified by <HOSTNAME>"},
- { "peer detach <HOSTNAME> [force]",
- cli_cmd_peer_deprobe_cbk,
- "detach peer specified by <HOSTNAME>"},
+ {"peer detach { <HOSTNAME> | <IP-address> } [force]",
+ cli_cmd_peer_deprobe_cbk, "detach peer specified by <HOSTNAME>"},
- { "peer status",
- cli_cmd_peer_status_cbk,
- "list status of peers"},
+ {"peer status", cli_cmd_peer_status_cbk, "list status of peers"},
- { "peer help",
- cli_cmd_peer_help_cbk,
- "Help command for peer "},
+ {"peer help", cli_cmd_peer_help_cbk, "display help for peer commands"},
- { NULL, NULL, NULL }
-};
+ {"pool list", cli_cmd_pool_list_cbk,
+ "list all the nodes in the pool (including localhost)"},
+
+ {NULL, NULL, NULL}};
int
-cli_cmd_peer_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
+cli_cmd_peer_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
const char **words, int wordcount)
{
- struct cli_cmd *cmd = NULL;
+ struct cli_cmd *cmd = NULL;
+ struct cli_cmd *probe_cmd = NULL;
+ int count = 0;
+
+ cli_out("\ngluster peer commands");
+ cli_out("======================\n");
+ cmd = GF_MALLOC(sizeof(cli_probe_cmds), cli_mt_cli_cmd);
+ memcpy(cmd, cli_probe_cmds, sizeof(cli_probe_cmds));
+ count = (sizeof(cli_probe_cmds) / sizeof(struct cli_cmd));
+ cli_cmd_sort(cmd, count);
+ for (probe_cmd = cmd; probe_cmd->pattern; probe_cmd++)
+ cli_out("%s - %s", probe_cmd->pattern, probe_cmd->desc);
- for (cmd = cli_probe_cmds; cmd->pattern; cmd++)
- cli_out ("%s - %s", cmd->pattern, cmd->desc);
+ GF_FREE(cmd);
- return 0;
+ cli_out("\n");
+ return 0;
}
int
-cli_cmd_probe_register (struct cli_state *state)
+cli_cmd_probe_register(struct cli_state *state)
{
- int ret = 0;
- struct cli_cmd *cmd = NULL;
-
- for (cmd = cli_probe_cmds; cmd->pattern; cmd++) {
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
- ret = cli_cmd_register (&state->tree, cmd);
- if (ret)
- goto out;
- }
+ for (cmd = cli_probe_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
out:
- return ret;
+ return ret;
}
diff --git a/cli/src/cli-cmd-snapshot.c b/cli/src/cli-cmd-snapshot.c
new file mode 100644
index 00000000000..859d6b2e40d
--- /dev/null
+++ b/cli/src/cli-cmd-snapshot.c
@@ -0,0 +1,135 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <pthread.h>
+
+#include "cli.h"
+#include "cli-cmd.h"
+#include "cli-mem-types.h"
+
+int
+cli_cmd_snapshot_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
+
+int
+cli_cmd_snapshot_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = 0;
+ int parse_err = 0;
+ dict_t *options = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SNAP];
+
+ /* Parses the command entered by the user */
+ ret = cli_cmd_snapshot_parse(words, wordcount, &options, state);
+ if (ret) {
+ if (ret < 0) {
+ cli_usage_out(word->pattern);
+ parse_err = 1;
+ } else {
+ /* User might have cancelled the snapshot operation */
+ ret = 0;
+ }
+ goto out;
+ }
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (frame == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ if (proc->fn)
+ ret = proc->fn(frame, THIS, options);
+
+out:
+ if (ret && parse_err == 0)
+ cli_out("Snapshot command failed");
+
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
+}
+
+struct cli_cmd snapshot_cmds[] = {
+ {"snapshot help", cli_cmd_snapshot_help_cbk,
+ "display help for snapshot commands"},
+ {"snapshot create <snapname> <volname> [no-timestamp] "
+ "[description <description>] [force]",
+ cli_cmd_snapshot_cbk, "Snapshot Create."},
+ {"snapshot clone <clonename> <snapname>", cli_cmd_snapshot_cbk,
+ "Snapshot Clone."},
+ {"snapshot restore <snapname>", cli_cmd_snapshot_cbk, "Snapshot Restore."},
+ {"snapshot status [(snapname | volume <volname>)]", cli_cmd_snapshot_cbk,
+ "Snapshot Status."},
+ {"snapshot info [(snapname | volume <volname>)]", cli_cmd_snapshot_cbk,
+ "Snapshot Info."},
+ {"snapshot list [volname]", cli_cmd_snapshot_cbk, "Snapshot List."},
+ {"snapshot config [volname] ([snap-max-hard-limit <count>] "
+ "[snap-max-soft-limit <percent>]) "
+ "| ([auto-delete <enable|disable>])"
+ "| ([activate-on-create <enable|disable>])",
+ cli_cmd_snapshot_cbk, "Snapshot Config."},
+ {"snapshot delete (all | snapname | volume <volname>)",
+ cli_cmd_snapshot_cbk, "Snapshot Delete."},
+ {"snapshot activate <snapname> [force]", cli_cmd_snapshot_cbk,
+ "Activate snapshot volume."},
+ {"snapshot deactivate <snapname>", cli_cmd_snapshot_cbk,
+ "Deactivate snapshot volume."},
+ {NULL, NULL, NULL}};
+
+int
+cli_cmd_snapshot_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
+{
+ struct cli_cmd *cmd = NULL;
+ struct cli_cmd *snap_cmd = NULL;
+ int count = 0;
+
+ cmd = GF_MALLOC(sizeof(snapshot_cmds), cli_mt_cli_cmd);
+ memcpy(cmd, snapshot_cmds, sizeof(snapshot_cmds));
+ count = (sizeof(snapshot_cmds) / sizeof(struct cli_cmd));
+ cli_cmd_sort(cmd, count);
+
+ cli_out("\ngluster snapshot commands");
+ cli_out("=========================\n");
+
+ for (snap_cmd = cmd; snap_cmd->pattern; snap_cmd++)
+ if (_gf_false == snap_cmd->disable)
+ cli_out("%s - %s", snap_cmd->pattern, snap_cmd->desc);
+ cli_out("\n");
+
+ GF_FREE(cmd);
+ return 0;
+}
+
+int
+cli_cmd_snapshot_register(struct cli_state *state)
+{
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
+
+ for (cmd = snapshot_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+out:
+ return ret;
+}
diff --git a/cli/src/cli-cmd-system.c b/cli/src/cli-cmd-system.c
index 25938b8974b..801e8f4efed 100644
--- a/cli/src/cli-cmd-system.c
+++ b/cli/src/cli-cmd-system.c
@@ -1,349 +1,624 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "cli.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
#include "protocol-common.h"
+int
+cli_cmd_system_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
-extern struct rpc_clnt *global_rpc;
-
-extern rpc_clnt_prog_t *cli_rpc_prog;
+int
+cli_cmd_copy_file_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
-int cli_cmd_system_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
- const char **words, int wordcount);
+int
+cli_cmd_sys_exec_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
int
-cli_cmd_getspec_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_getspec_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+
+ if (wordcount != 3) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str(dict, "volid", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GETSPEC];
+ if (proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+ ret = proc->fn(frame, THIS, dict);
+ }
- dict = dict_new ();
- if (!dict)
- goto out;
+out:
+ if (!proc && ret) {
+ if (wordcount > 1)
+ cli_out("Fetching spec for volume %s failed", (char *)words[2]);
+ }
- if (wordcount != 3) {
- cli_usage_out (word->pattern);
- goto out;
- }
+ if (dict)
+ dict_unref(dict);
- ret = dict_set_str (dict, "volid", (char *)words[2]);
- if (ret)
- goto out;
+ CLI_STACK_DESTROY(frame);
+ return ret;
+}
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GETSPEC];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
+int
+cli_cmd_pmap_b2p_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+
+ if (wordcount != 4) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str(dict, "brick", (char *)words[3]);
+ if (ret)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_PMAP_PORTBYBRICK];
+ if (proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
}
+ ret = proc->fn(frame, THIS, dict);
+ }
out:
- if (!proc && ret) {
- if (dict)
- dict_destroy (dict);
- if (wordcount > 1)
- cli_out ("Fetching spec for volume %s failed",
- (char *)words[2]);
- }
+ if (!proc && ret) {
+ if (wordcount > 1)
+ cli_out("Fetching spec for volume %s failed", (char *)words[3]);
+ }
- return ret;
+ if (dict)
+ dict_unref(dict);
+
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
int
-cli_cmd_pmap_b2p_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_fsm_log_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
-
- frame = create_frame (THIS, THIS->ctx->pool);
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ char *name = "";
+
+ if ((wordcount != 4) && (wordcount != 3)) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
+
+ if (wordcount == 4)
+ name = (char *)words[3];
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_FSM_LOG];
+ if (proc && proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
if (!frame)
- goto out;
-
- dict = dict_new ();
- if (!dict)
- goto out;
-
- if (wordcount != 4) {
- cli_usage_out (word->pattern);
- goto out;
- }
-
- ret = dict_set_str (dict, "brick", (char *)words[3]);
- if (ret)
- goto out;
-
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_PMAP_PORTBYBRICK];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
- }
+ goto out;
+ ret = proc->fn(frame, THIS, (void *)name);
+ }
+out:
+ return ret;
+}
+int
+cli_cmd_getwd_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+
+ if (wordcount != 2) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GETWD];
+ if (proc && proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+ ret = proc->fn(frame, THIS, NULL);
+ }
out:
- if (!proc && ret) {
- if (dict)
- dict_destroy (dict);
- if (wordcount > 1)
- cli_out ("Fetching spec for volume %s failed",
- (char *)words[3]);
- }
+ return ret;
+}
- return ret;
+static dict_t *
+make_seq_dict(int argc, char **argv)
+{
+ char index[] = "4294967296"; // 1<<32
+ int i = 0;
+ int len;
+ int ret = 0;
+ dict_t *dict = dict_new();
+
+ if (!dict)
+ return NULL;
+
+ for (i = 0; i < argc; i++) {
+ len = snprintf(index, sizeof(index), "%d", i);
+ ret = dict_set_strn(dict, index, len, argv[i]);
+ if (ret == -1)
+ break;
+ }
+
+ if (ret) {
+ dict_unref(dict);
+ dict = NULL;
+ }
+
+ return dict;
}
int
-cli_cmd_fsm_log_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_mount_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- char *name = "";
-
- if ((wordcount != 4) && (wordcount != 3)) {
- cli_usage_out (word->pattern);
- goto out;
- }
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int ret = -1;
+ dict_t *dict = NULL;
+ void *dataa[] = {NULL, NULL};
+
+ if (wordcount < 4) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
+
+ dict = make_seq_dict(wordcount - 3, (char **)words + 3);
+ if (!dict)
+ goto out;
+
+ dataa[0] = (void *)words[2];
+ dataa[1] = dict;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_MOUNT];
+ if (proc && proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
+ ret = proc->fn(frame, THIS, dataa);
+ }
- if (wordcount == 4)
- name = (char*)words[3];
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_FSM_LOG];
- if (proc && proc->fn) {
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
- ret = proc->fn (frame, THIS, (void*)name);
- }
out:
- return ret;
+ if (dict)
+ dict_unref(dict);
+
+ if (!proc && ret)
+ cli_out("Mount command failed");
+
+ return ret;
}
int
-cli_cmd_getwd_cbk (struct cli_state *state, struct cli_cmd_word *word,
+cli_cmd_umount_cbk(struct cli_state *state, struct cli_cmd_word *word,
const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
-
- if (wordcount != 2) {
- cli_usage_out (word->pattern);
- goto out;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ if (!(wordcount == 3 ||
+ (wordcount == 4 && strcmp(words[3], "lazy") == 0))) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_str(dict, "path", (char *)words[2]);
+ if (ret != 0)
+ goto out;
+ ret = dict_set_int32(dict, "lazy", wordcount == 4);
+ if (ret != 0)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_UMOUNT];
+ if (proc && proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
+ ret = -1;
+ goto out;
}
+ ret = proc->fn(frame, THIS, dict);
+ }
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GETWD];
- if (proc && proc->fn) {
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
- ret = proc->fn (frame, THIS, NULL);
- }
out:
- return ret;
+ if (dict)
+ dict_unref(dict);
+
+ if (!proc && ret)
+ cli_out("Umount command failed");
+
+ return ret;
}
-static dict_t *
-make_seq_dict (int argc, char **argv)
+int
+cli_cmd_uuid_get_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- char index[] = "4294967296"; // 1<<32
- int i = 0;
- int ret = 0;
- dict_t *dict = dict_new ();
-
- if (!dict)
- return NULL;
-
- for (i = 0; i < argc; i++) {
- snprintf(index, sizeof(index), "%d", i);
- ret = dict_set_str (dict, index, argv[i]);
- if (ret == -1)
- break;
- }
+ int ret = -1;
+ int sent = 0;
+ int parse_error = 0;
+ dict_t *dict = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ if (wordcount != 3) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_UUID_GET];
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ CLI_LOCAL_INIT(local, words, frame, dict);
+ if (proc->fn)
+ ret = proc->fn(frame, this, dict);
- if (ret) {
- dict_destroy (dict);
- dict = NULL;
- }
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("uuid get failed");
+ }
- return dict;
+ if (dict)
+ dict_unref(dict);
+
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
int
-cli_cmd_mount_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_uuid_reset_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- int ret = -1;
- dict_t *dict = NULL;
- void *dataa[] = {NULL, NULL};
-
- if (wordcount < 4) {
- cli_usage_out (word->pattern);
- goto out;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ char *question = NULL;
+ cli_local_t *local = NULL;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+
+ question =
+ "Resetting uuid changes the uuid of local glusterd. "
+ "Do you want to continue?";
+
+ if (wordcount != 3) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_UUID_RESET];
+
+ this = THIS;
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ CLI_LOCAL_INIT(local, words, frame, dict);
+ answer = cli_cmd_get_confirmation(state, question);
+
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+
+ // send NULL as argument since no dictionary is sent to glusterd
+ if (proc->fn) {
+ ret = proc->fn(frame, this, dict);
+ }
- dict = make_seq_dict (wordcount - 3, (char **)words + 3);
- if (!dict)
- goto out;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("uuid reset failed");
+ }
- dataa[0] = (void *)words[2];
- dataa[1] = dict;
+ if (dict)
+ dict_unref(dict);
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_MOUNT];
- if (proc && proc->fn) {
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
- ret = proc->fn (frame, THIS, dataa);
- }
+ CLI_STACK_DESTROY(frame);
- out:
- if (dict)
- dict_unref (dict);
+ return ret;
+}
- if (!proc && ret)
- cli_out ("Mount command failed");
+static struct cli_cmd cli_system_cmds[] = {
+ {"system:: getspec <VOLNAME>", cli_cmd_getspec_cbk,
+ "fetch the volume file for the volume <VOLNAME>"},
- return ret;
-}
+ {"system:: portmap brick2port <BRICK>", cli_cmd_pmap_b2p_cbk,
+ "query which port <BRICK> listens on"},
-int
-cli_cmd_umount_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
-{
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- int ret = -1;
- dict_t *dict = NULL;
-
- if (!(wordcount == 3 ||
- (wordcount == 4 && strcmp (words[3], "lazy") == 0))) {
- cli_usage_out (word->pattern);
- goto out;
- }
+ {"system:: fsm log [<peer-name>]", cli_cmd_fsm_log_cbk,
+ "display fsm transitions"},
- dict = dict_new ();
- if (!dict)
- goto out;
+ {"system:: getwd", cli_cmd_getwd_cbk, "query glusterd work directory"},
- ret = dict_set_str (dict, "path", (char *)words[2]);
- if (ret != 0)
- goto out;
- ret = dict_set_int32 (dict, "lazy", wordcount == 4);
- if (ret != 0)
- goto out;
+ {"system:: mount <label> <args...>", cli_cmd_mount_cbk, "request a mount"},
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_UMOUNT];
- if (proc && proc->fn) {
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
- ret = proc->fn (frame, THIS, dict);
- }
+ {"system:: umount <path> [lazy]", cli_cmd_umount_cbk, "request an umount"},
- out:
- if (dict)
- dict_unref (dict);
+ {"system:: uuid get", cli_cmd_uuid_get_cbk, "get uuid of glusterd"},
- if (!proc && ret)
- cli_out ("Umount command failed");
+ {"system:: uuid reset", cli_cmd_uuid_reset_cbk,
+ "reset the uuid of glusterd"},
- return ret;
-}
+ {"system:: help", cli_cmd_system_help_cbk,
+ "display help for system commands"},
-struct cli_cmd cli_system_cmds[] = {
- { "system:: getspec <VOLID>",
- cli_cmd_getspec_cbk,
- "fetch spec for volume <VOLID>"},
+ {"system:: copy file [<filename>]", cli_cmd_copy_file_cbk,
+ "Copy file from current node's $working_dir to "
+ "$working_dir of all cluster nodes"},
- { "system:: portmap brick2port <BRICK>",
- cli_cmd_pmap_b2p_cbk,
- "query which port <BRICK> listens on"},
+ {"system:: execute <command> <args>", cli_cmd_sys_exec_cbk,
+ "Execute the command on all the nodes "
+ "in the cluster and display their output."},
- { "system:: fsm log [<peer-name>]",
- cli_cmd_fsm_log_cbk,
- "display fsm transitions"},
+ {NULL, NULL, NULL}};
- { "system:: getwd",
- cli_cmd_getwd_cbk,
- "query glusterd work directory"},
+int
+cli_cmd_sys_exec_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ char cmd_arg_name[PATH_MAX] = "";
+ char *command = NULL;
+ char *saveptr = NULL;
+ char *tmp = NULL;
+ int ret = -1;
+ int i = -1;
+ int len;
+ int cmd_args_count = 0;
+ int in_cmd_args_count = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+
+ if ((wordcount < 3) || (words[2] == NULL)) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
+
+ command = strtok_r((char *)words[2], " ", &saveptr);
+ if (command == NULL) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to parse command");
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ do {
+ tmp = strtok_r(NULL, " ", &saveptr);
+ if (tmp) {
+ in_cmd_args_count++;
+ snprintf(cmd_arg_name, sizeof(cmd_arg_name), "cmd_arg_%d",
+ in_cmd_args_count);
+ ret = dict_set_str(dict, cmd_arg_name, tmp);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR,
+ "Unable to set "
+ "%s in dict",
+ cmd_arg_name);
+ goto out;
+ }
+ }
+ } while (tmp);
+
+ cmd_args_count = wordcount - 3;
- { "system:: mount <label> <args...>",
- cli_cmd_mount_cbk,
- "request a mount"},
+ ret = dict_set_str(dict, "command", command);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Unable to set command in dict");
+ goto out;
+ }
- { "system:: umount <path> [lazy]",
- cli_cmd_umount_cbk,
- "request an umount"},
+ for (i = 1; i <= cmd_args_count; i++) {
+ in_cmd_args_count++;
+ len = snprintf(cmd_arg_name, sizeof(cmd_arg_name), "cmd_arg_%d",
+ in_cmd_args_count);
+ ret = dict_set_strn(dict, cmd_arg_name, len, (char *)words[2 + i]);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Unable to set %s in dict", cmd_arg_name);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32(dict, "cmd_args_count", in_cmd_args_count);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Unable to set cmd_args_count in dict");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "volname", "N/A");
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Unable to set volname in dict");
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SYS_EXEC];
+ if (proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
+ ret = -1;
+ goto out;
+ }
+ CLI_LOCAL_INIT(local, words, frame, dict);
+ ret = proc->fn(frame, THIS, (void *)dict);
+
+ /* proc->fn is processed synchronously, which means that the
+ * execution flow won't return here until the operation is
+ * fully processed, including any related callback. For this
+ * reason, it's safe to destroy the stack here, since no one
+ * can still be using it. Additionally, it's not easy to move
+ * the stack destroy to the callback executed after completion
+ * of the operation because there are multiple things than can
+ * fail even before having queued the callback, so we would
+ * still need to destroy the stack if proc->fn returns an
+ * error. */
+ CLI_STACK_DESTROY(frame);
+ dict = NULL;
+ }
+out:
+ if (dict != NULL) {
+ dict_unref(dict);
+ }
- { "system:: help",
- cli_cmd_system_help_cbk,
- "display help for system commands"},
+ return ret;
+}
- { NULL, NULL, NULL }
-};
+int
+cli_cmd_copy_file_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ char *filename = "";
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+
+ if (wordcount != 4) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ filename = (char *)words[3];
+ ret = dict_set_str(dict, "source", filename);
+ if (ret)
+ gf_log("", GF_LOG_ERROR, "Unable to set filename in dict");
+
+ ret = dict_set_str(dict, "volname", "N/A");
+ if (ret)
+ gf_log("", GF_LOG_ERROR, "Unable to set volname in dict");
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_COPY_FILE];
+ if (proc && proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
+ ret = -1;
+ goto out;
+ }
+ CLI_LOCAL_INIT(local, words, frame, dict);
+ ret = proc->fn(frame, THIS, (void *)dict);
+ }
+out:
+ return ret;
+}
int
-cli_cmd_system_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
- const char **words, int wordcount)
+cli_cmd_system_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
{
- struct cli_cmd *cmd = NULL;
+ struct cli_cmd *cmd = NULL;
+ struct cli_cmd *system_cmd = NULL;
+ int count = 0;
+
+ cmd = GF_MALLOC(sizeof(cli_system_cmds), cli_mt_cli_cmd);
+ memcpy(cmd, cli_system_cmds, sizeof(cli_system_cmds));
+ count = (sizeof(cli_system_cmds) / sizeof(struct cli_cmd));
+ cli_cmd_sort(cmd, count);
- for (cmd = cli_system_cmds; cmd->pattern; cmd++)
- cli_out ("%s - %s", cmd->pattern, cmd->desc);
+ for (system_cmd = cmd; system_cmd->pattern; system_cmd++)
+ cli_out("%s - %s", system_cmd->pattern, system_cmd->desc);
- return 0;
+ GF_FREE(cmd);
+ return 0;
}
int
-cli_cmd_system_register (struct cli_state *state)
+cli_cmd_system_register(struct cli_state *state)
{
- int ret = 0;
- struct cli_cmd *cmd = NULL;
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
- for (cmd = cli_system_cmds; cmd->pattern; cmd++) {
-
- ret = cli_cmd_register (&state->tree, cmd);
- if (ret)
- goto out;
- }
+ for (cmd = cli_system_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
out:
- return ret;
+ return ret;
}
diff --git a/cli/src/cli-cmd-volume.c b/cli/src/cli-cmd-volume.c
index 704f9dddb7d..f238851586e 100644
--- a/cli/src/cli-cmd-volume.c
+++ b/cli/src/cli-cmd-volume.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -26,1927 +16,3263 @@
#include <sys/socket.h>
#include <netdb.h>
#include <sys/types.h>
+#include <sys/wait.h>
#include <netinet/in.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "cli.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
#include "cli1-xdr.h"
-#include "run.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/events.h>
-extern struct rpc_clnt *global_rpc;
+extern rpc_clnt_prog_t cli_quotad_clnt;
-extern rpc_clnt_prog_t *cli_rpc_prog;
+static int
+gf_asprintf_append(char **string_ptr, const char *format, ...);
int
-cli_cmd_volume_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
- const char **words, int wordcount);
+cli_cmd_volume_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
int
-cli_cmd_volume_info_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
-{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- cli_cmd_volume_get_ctx_t ctx = {0,};
- cli_local_t *local = NULL;
- int sent = 0;
- int parse_error = 0;
-
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_VOLUME];
+cli_cmd_bitrot_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+int
+cli_cmd_quota_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount);
- if ((wordcount == 2) || (wordcount == 3 &&
- !strcmp (words[2], "all"))) {
- ctx.flags = GF_CLI_GET_NEXT_VOLUME;
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_NEXT_VOLUME];
- } else if (wordcount == 3) {
- ctx.flags = GF_CLI_GET_VOLUME;
- ctx.volname = (char *)words[2];
- if (strlen (ctx.volname) > 1024) {
- cli_out ("Invalid volume name");
- goto out;
- }
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_VOLUME];
- } else {
- cli_usage_out (word->pattern);
- parse_error = 1;
- return -1;
+int
+cli_cmd_volume_info_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ cli_cmd_volume_get_ctx_t ctx = {
+ 0,
+ };
+ cli_local_t *local = NULL;
+ int sent = 0;
+ int parse_error = 0;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_VOLUME];
+
+ if ((wordcount == 2) || (wordcount == 3 && !strcmp(words[2], "all"))) {
+ ctx.flags = GF_CLI_GET_NEXT_VOLUME;
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_NEXT_VOLUME];
+ } else if (wordcount == 3) {
+ ctx.flags = GF_CLI_GET_VOLUME;
+ ctx.volname = (char *)words[2];
+ if (strlen(ctx.volname) > GD_VOLUME_NAME_MAX) {
+ cli_out("Invalid volume name");
+ goto out;
}
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_VOLUME];
+ } else {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ return -1;
+ }
- local = cli_local_get ();
+ local = cli_local_get();
- if (!local)
- goto out;
+ if (!local)
+ goto out;
- local->get_vol.flags = ctx.flags;
- if (ctx.volname)
- local->get_vol.volname = gf_strdup (ctx.volname);
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame)
+ goto out;
- frame->local = local;
+ local->get_vol.flags = ctx.flags;
+ if (ctx.volname)
+ local->get_vol.volname = gf_strdup(ctx.volname);
- if (proc->fn) {
- ret = proc->fn (frame, THIS, &ctx);
- }
+ frame->local = local;
-out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Getting Volume information failed!");
- }
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, &ctx);
+ }
- CLI_STACK_DESTROY (frame);
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Getting Volume information failed!");
+ }
- return ret;
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
int
-cli_cmd_sync_volume_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_sync_volume_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- int sent = 0;
- int parse_error = 0;
- dict_t *dict = NULL;
-
- if ((wordcount < 3) || (wordcount > 4)) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question =
+ "Sync volume may make data "
+ "inaccessible while the sync "
+ "is in progress. Do you want "
+ "to continue?";
+
+ if ((wordcount < 3) || (wordcount > 4)) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- dict = dict_new ();
- if (!dict)
- goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
- if ((wordcount == 3) || !strcmp(words[3], "all")) {
- ret = dict_set_int32 (dict, "flags", (int32_t)
- GF_CLI_SYNC_ALL);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to set"
- "flag");
- goto out;
- }
- } else {
- ret = dict_set_str (dict, "volname", (char *) words[3]);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to set "
- "volume");
- goto out;
- }
+ if ((wordcount == 3) || !strcmp(words[3], "all")) {
+ ret = dict_set_int32(dict, "flags", (int32_t)GF_CLI_SYNC_ALL);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "failed to set"
+ "flag");
+ goto out;
}
-
- ret = dict_set_str (dict, "hostname", (char *) words[2]);
+ } else {
+ ret = dict_set_str(dict, "volname", (char *)words[3]);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to set hostname");
- goto out;
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "failed to set "
+ "volume");
+ goto out;
}
+ }
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SYNC_VOLUME];
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ ret = dict_set_str(dict, "hostname", (char *)words[2]);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to set hostname");
+ goto out;
+ }
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
+ if (!(state->mode & GLUSTER_MODE_SCRIPT)) {
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
}
+ }
-out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume sync failed");
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SYNC_VOLUME];
- if (dict)
- dict_unref (dict);
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
+ ret = -1;
+ goto out;
+ }
- CLI_STACK_DESTROY (frame);
+ CLI_LOCAL_INIT(local, words, frame, dict);
- return ret;
-}
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, dict);
+ }
-gf_ai_compare_t
-cli_cmd_compare_addrinfo (struct addrinfo *first, struct addrinfo *next)
-{
- int ret = -1;
- struct addrinfo *tmp1 = NULL;
- struct addrinfo *tmp2 = NULL;
- char firstip[NI_MAXHOST] = {0.};
- char nextip[NI_MAXHOST] = {0,};
-
- for (tmp1 = first; tmp1 != NULL; tmp1 = tmp1->ai_next) {
- ret = getnameinfo (tmp1->ai_addr, tmp1->ai_addrlen, firstip,
- NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
- if (ret)
- return GF_AI_COMPARE_ERROR;
- for (tmp2 = next; tmp2 != NULL; tmp2 = tmp2->ai_next) {
- ret = getnameinfo (tmp2->ai_addr, tmp2->ai_addrlen, nextip,
- NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
- if (ret)
- return GF_AI_COMPARE_ERROR;
- if (!strcmp (firstip, nextip)) {
- return GF_AI_COMPARE_MATCH;
- }
- }
- }
- return GF_AI_COMPARE_NO_MATCH;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume sync failed");
+ }
+
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
}
-/* Check for non optimal brick order for replicate :
- * Checks if bricks belonging to a replicate volume
- * are present on the same server
- */
-int32_t
-cli_cmd_check_brick_order (struct cli_state *state, const char *bricks,
- int brick_count, int sub_count)
+int
+cli_cmd_volume_create_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- int i = 0;
- int j = 0;
- int k = 0;
- addrinfo_list_t *ai_list = NULL;
- addrinfo_list_t *ai_list_tmp1 = NULL;
- addrinfo_list_t *ai_list_tmp2 = NULL;
- char *brick = NULL;
- char *brick_list = NULL;
- char *brick_list_dup = NULL;
- char *tmpptr = NULL;
- struct addrinfo *ai_info = NULL;
- gf_answer_t answer = GF_ANSWER_NO;
- const char *failed_question = NULL;
- const char *found_question = NULL;
- failed_question = "Failed to perform brick order check. "
- "Do you want to continue creating the volume? ";
- found_question = "Multiple bricks of a replicate volume are present"
- " on the same server. This setup is not optimal.\n"
- "Do you still want to continue creating the volume? ";
-
- GF_ASSERT (bricks);
- GF_ASSERT (brick_count > 0);
- GF_ASSERT (sub_count > 0);
-
- ai_list = malloc (sizeof (addrinfo_list_t));
- ai_list->info = NULL;
- INIT_LIST_HEAD (&ai_list->list);
- brick_list = gf_strdup (bricks);
- if (brick_list == NULL) {
- gf_log ("cli", GF_LOG_DEBUG, "failed to allocate memory");
- goto check_failed;
- }
- brick_list_dup = brick_list;
- /* Resolve hostnames and get addrinfo */
- while (i < brick_count) {
- ++i;
- brick = strtok_r (brick_list, " \n", &tmpptr);
- brick_list = tmpptr;
- if (brick == NULL)
- goto check_failed;
- brick = strtok_r (brick, ":", &tmpptr);
- if (brick == NULL)
- goto check_failed;
- ret = getaddrinfo (brick, NULL, NULL, &ai_info);
- if (ret)
- goto check_failed;
- ai_list_tmp1 = malloc (sizeof (addrinfo_list_t));
- if (ai_list_tmp1 == NULL)
- goto check_failed;
- ai_list_tmp1->info = ai_info;
- list_add_tail (&ai_list_tmp1->list, &ai_list->list);
- ai_list_tmp1 = NULL;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+ char *trans_type = NULL;
+ char *bricks = NULL;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CREATE_VOLUME];
+
+ ret = cli_cmd_volume_create_parse(state, words, wordcount, &options,
+ &bricks);
+
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- i = 0;
- ai_list_tmp1 = list_entry (ai_list->list.next, addrinfo_list_t, list);
-
- /* Check for bad brick order */
- while (i < brick_count) {
- ++i;
- ai_info = ai_list_tmp1->info;
- ai_list_tmp1 = list_entry (ai_list_tmp1->list.next,
- addrinfo_list_t, list);
- if ( 0 == i % sub_count) {
- j = 0;
- continue;
- }
- ai_list_tmp2 = ai_list_tmp1;
- k = j;
- while (k < sub_count - 1) {
- ++k;
- ret = cli_cmd_compare_addrinfo (ai_info,
- ai_list_tmp2->info);
- if (GF_AI_COMPARE_ERROR == ret)
- goto check_failed;
- if (GF_AI_COMPARE_MATCH == ret)
- goto found_bad_brick_order;
- ai_list_tmp2 = list_entry (ai_list_tmp2->list.next,
- addrinfo_list_t, list);
- }
- ++j;
- }
- gf_log ("cli", GF_LOG_INFO, "Brick order okay");
- ret = 0;
+ ret = dict_get_str(options, "transport", &trans_type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get transport type");
goto out;
+ }
+
+ if (state->mode & GLUSTER_MODE_WIGNORE) {
+ ret = dict_set_int32(options, "force", _gf_true);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set force "
+ "option");
+ goto out;
+ }
+ }
-check_failed:
- gf_log ("cli", GF_LOG_INFO, "Failed bad brick order check");
- answer = cli_cmd_get_confirmation(state, failed_question);
- if (GF_ANSWER_YES == answer)
- ret = 0;
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
-found_bad_brick_order:
- gf_log ("cli", GF_LOG_INFO, "Bad brick order found");
- answer = cli_cmd_get_confirmation (state, found_question);
- if (GF_ANSWER_YES == answer)
- ret = 0;
out:
- ai_list_tmp2 = NULL;
- i = 0;
- if (brick_list_dup)
- GF_FREE (brick_list_dup);
- list_for_each_entry (ai_list_tmp1, &ai_list->list, list) {
- if (ai_list_tmp1->info)
- freeaddrinfo (ai_list_tmp1->info);
- if (ai_list_tmp2)
- free (ai_list_tmp2);
- ai_list_tmp2 = ai_list_tmp1;
- }
- free (ai_list_tmp2);
- return ret;
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume create failed");
+ }
+
+ if (ret == 0) {
+ gf_event(EVENT_VOLUME_CREATE, "name=%s;bricks=%s", (char *)words[2],
+ bricks);
+ }
+
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
int
-cli_cmd_volume_create_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_delete_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
- char *brick_list = NULL;
- int32_t brick_count = 0;
- int32_t sub_count = 0;
- int32_t type = GF_CLUSTER_TYPE_NONE;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ char *volname = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ const char *question = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+ dict_t *dict = NULL;
+
+ question =
+ "Deleting volume will erase all information about the volume. "
+ "Do you want to continue?";
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DELETE_VOLUME];
+
+ if (wordcount != 3) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+ volname = (char *)words[2];
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CREATE_VOLUME];
+ dict = dict_new();
+ if (!dict)
+ goto out;
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_WARNING, "dict set failed");
+ goto out;
+ }
+
+ if (!strcmp(volname, GLUSTER_SHARED_STORAGE)) {
+ question =
+ "Deleting the shared storage volume"
+ "(gluster_shared_storage), will affect features "
+ "like snapshot scheduler, geo-replication "
+ "and NFS-Ganesha. Do you still want to "
+ "continue?";
+ }
+
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
- ret = cli_cmd_volume_create_parse (words, wordcount, &options);
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- if (ret) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
- /*Check brick order if type is replicate*/
- ret = dict_get_int32 (options, "type", &type);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Could not get brick type");
- goto out;
- }
- if ((type == GF_CLUSTER_TYPE_REPLICATE) ||
- (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE)) {
- if ((ret = dict_get_str (options, "bricks", &brick_list)) != 0) {
- gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : "
- "Could not retrieve bricks list");
- goto out;
- }
- if ((ret = dict_get_int32 (options, "count", &brick_count)) != 0) {
- gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : "
- "Could not retrieve brick count");
- goto out;
- }
- if ((ret = dict_get_int32 (options, "replica-count", &sub_count)) != 0) {
- gf_log ("cli", GF_LOG_ERROR, "Replica bricks check : "
- "Could not retrieve replica count");
- goto out;
- }
- gf_log ("cli", GF_LOG_INFO, "Replicate cluster type found."
- " Checking brick order.");
- ret = cli_cmd_check_brick_order (state, brick_list, brick_count, sub_count);
- if (ret) {
- gf_log("cli", GF_LOG_INFO, "Not creating volume because of bad brick order");
- goto out;
- }
- }
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ CLI_LOCAL_INIT(local, words, frame, dict);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, dict);
+ }
out:
- if (options)
- dict_unref (options);
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume create failed");
- }
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume delete failed");
+ }
- CLI_STACK_DESTROY (frame);
+ CLI_STACK_DESTROY(frame);
- return ret;
-}
+ if (ret == 0 && GF_ANSWER_YES == answer) {
+ gf_event(EVENT_VOLUME_DELETE, "name=%s", (char *)words[2]);
+ }
+ return ret;
+}
int
-cli_cmd_volume_delete_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_start_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- char *volname = NULL;
- gf_answer_t answer = GF_ANSWER_NO;
- const char *question = NULL;
- int sent = 0;
- int parse_error = 0;
-
- question = "Deleting volume will erase all information about the volume. "
- "Do you want to continue?";
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DELETE_VOLUME];
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ dict_t *dict = NULL;
+ int flags = 0;
+ cli_local_t *local = NULL;
+
+ if (wordcount < 3 || wordcount > 4) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- if (wordcount != 3) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
+ if (!words[2])
+ goto out;
+
+ if (wordcount == 4) {
+ if (!strcmp("force", words[3])) {
+ flags |= GF_CLI_FLAG_OP_FORCE;
+ } else {
+ ret = -1;
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
}
+ }
- answer = cli_cmd_get_confirmation (state, question);
+ dict = dict_new();
+ if (!dict) {
+ goto out;
+ }
- if (GF_ANSWER_NO == answer) {
- ret = 0;
- goto out;
- }
+ ret = dict_set_str(dict, "volname", (char *)words[2]);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set failed");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "flags", flags);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set failed");
+ goto out;
+ }
- volname = (char *)words[2];
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_START_VOLUME];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, volname);
- }
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, dict);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, dict);
+ }
out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume delete failed");
- }
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume start failed");
+ }
+
+ CLI_STACK_DESTROY(frame);
- CLI_STACK_DESTROY (frame);
+ if (ret == 0) {
+ gf_event(EVENT_VOLUME_START, "name=%s;force=%d", (char *)words[2],
+ (flags & GF_CLI_FLAG_OP_FORCE));
+ }
- return ret;
+ return ret;
}
-int
-cli_cmd_volume_start_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+gf_answer_t
+cli_cmd_get_confirmation(struct cli_state *state, const char *question)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- int sent = 0;
- int parse_error = 0;
- dict_t *dict = NULL;
- int flags = 0;
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ char answer[5] = {
+ '\0',
+ };
+ int flush = '\0';
+ size_t len;
- if (wordcount < 3 || wordcount > 4) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ if (state->mode & GLUSTER_MODE_SCRIPT)
+ return GF_ANSWER_YES;
- dict = dict_new ();
- if (!dict) {
- goto out;
- }
+ printf("%s (y/n) ", question);
- if (!words[2])
- goto out;
+ if (fgets(answer, 4, stdin) == NULL) {
+ cli_out("gluster cli read error");
+ goto out;
+ }
- ret = dict_set_str (dict, "volname", (char *)words[2]);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "dict set failed");
- goto out;
- }
+ len = strlen(answer);
- if (wordcount == 4) {
- if (!strcmp("force", words[3])) {
- flags |= GF_CLI_FLAG_OP_FORCE;
- } else {
- ret = -1;
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
- }
- ret = dict_set_int32 (dict, "flags", flags);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "dict set failed");
- goto out;
- }
+ if (len && answer[len - 1] == '\n') {
+ answer[--len] = '\0';
+ } else {
+ do {
+ flush = getchar();
+ } while (flush != '\n');
+ }
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to serialize dict");
- goto out;
- }
+ if (len > 3)
+ goto out;
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_START_VOLUME];
+ if (!strcasecmp(answer, "y") || !strcasecmp(answer, "yes"))
+ return GF_ANSWER_YES;
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
- }
+ else if (!strcasecmp(answer, "n") || !strcasecmp(answer, "no"))
+ return GF_ANSWER_NO;
out:
- if (dict)
- dict_unref (dict);
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume start failed");
- }
-
- CLI_STACK_DESTROY (frame);
+ cli_out("Invalid input, please enter y/n");
- return ret;
+ return GF_ANSWER_NO;
}
-gf_answer_t
-cli_cmd_get_confirmation (struct cli_state *state, const char *question)
+int
+cli_cmd_volume_stop_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- char answer[5] = {'\0', };
- char flush = '\0';
- int len = 0;
-
- if (state->mode & GLUSTER_MODE_SCRIPT)
- return GF_ANSWER_YES;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int flags = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ int sent = 0;
+ int parse_error = 0;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ cli_local_t *local = NULL;
+
+ const char *question =
+ "Stopping volume will make its data inaccessible. "
+ "Do you want to continue?";
+
+ if (wordcount < 3 || wordcount > 4) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- printf ("%s (y/n) ", question);
+ volname = (char *)words[2];
- if (fgets (answer, 4, stdin) == NULL) {
- cli_out("gluster cli read error");
- goto out;
+ dict = dict_new();
+ ret = dict_set_str(dict, "volname", volname);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set failed");
+ goto out;
+ }
+
+ if (!strcmp(volname, GLUSTER_SHARED_STORAGE)) {
+ question =
+ "Stopping the shared storage volume"
+ "(gluster_shared_storage), will affect features "
+ "like snapshot scheduler, geo-replication "
+ "and NFS-Ganesha. Do you still want to "
+ "continue?";
+ }
+
+ if (wordcount == 4) {
+ if (!strcmp("force", words[3])) {
+ flags |= GF_CLI_FLAG_OP_FORCE;
+ } else {
+ ret = -1;
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
}
+ }
- len = strlen (answer);
+ ret = dict_set_int32(dict, "flags", flags);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "dict set failed");
+ goto out;
+ }
+
+ answer = cli_cmd_get_confirmation(state, question);
- if (answer [len - 1] == '\n'){
- answer [--len] = '\0';
- } else {
- do{
- flush = getchar ();
- }while (flush != '\n');
- }
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
- if (len > 3)
- goto out;
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STOP_VOLUME];
- if (!strcasecmp (answer, "y") || !strcasecmp (answer, "yes"))
- return GF_ANSWER_YES;
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- else if (!strcasecmp (answer, "n") || !strcasecmp (answer, "no"))
- return GF_ANSWER_NO;
+ CLI_LOCAL_INIT(local, words, frame, dict);
-out:
- cli_out ("Invalid input, please enter y/n");
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, dict);
+ }
- return GF_ANSWER_NO;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume stop on '%s' failed", volname);
+ }
+
+ CLI_STACK_DESTROY(frame);
+ if (dict)
+ dict_unref(dict);
+
+ if (ret == 0 && GF_ANSWER_YES == answer) {
+ gf_event(EVENT_VOLUME_STOP, "name=%s;force=%d", (char *)words[2],
+ (flags & GF_CLI_FLAG_OP_FORCE));
+ }
+
+ return ret;
}
int
-cli_cmd_volume_stop_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_rename_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- int flags = 0;
- gf_answer_t answer = GF_ANSWER_NO;
- int sent = 0;
- int parse_error = 0;
- dict_t *dict = NULL;
- char *volname = NULL;
-
- const char *question = "Stopping volume will make its data inaccessible. "
- "Do you want to continue?";
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
-
- if (wordcount < 3 || wordcount > 4) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ int sent = 0;
+ int parse_error = 0;
+
+ if (wordcount != 4) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- volname = (char*) words[2];
+ dict = dict_new();
+ if (!dict)
+ goto out;
- dict = dict_new ();
- ret = dict_set_str (dict, "volname", volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "dict set failed");
- goto out;
- }
+ ret = dict_set_str(dict, "old-volname", (char *)words[2]);
- if (wordcount == 4) {
- if (!strcmp("force", words[3])) {
- flags |= GF_CLI_FLAG_OP_FORCE;
- } else {
- ret = -1;
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
- }
- ret = dict_set_int32 (dict, "flags", flags);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "dict set failed");
- goto out;
- }
+ if (ret)
+ goto out;
- answer = cli_cmd_get_confirmation (state, question);
+ ret = dict_set_str(dict, "new-volname", (char *)words[3]);
- if (GF_ANSWER_NO == answer) {
- ret = 0;
- goto out;
- }
+ if (ret)
+ goto out;
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STOP_VOLUME];
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_RENAME_VOLUME];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
+ if (proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
}
+ ret = proc->fn(frame, THIS, dict);
+ }
out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume stop on '%s' failed", volname);
- }
- if (dict)
- dict_unref (dict);
+ if (dict)
+ dict_unref(dict);
- CLI_STACK_DESTROY (frame);
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume rename on '%s' failed", (char *)words[2]);
+ }
- return ret;
-}
+ CLI_STACK_DESTROY(frame);
+ return ret;
+}
int
-cli_cmd_volume_rename_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_defrag_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
- int sent = 0;
- int parse_error = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+#if (USE_EVENTS)
+ eventtypes_t event = EVENT_LAST;
+#endif
+#ifdef GF_SOLARIS_HOST_OS
+ cli_out("Command not supported on Solaris");
+ goto out;
+#endif
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ ret = cli_cmd_volume_defrag_parse(words, wordcount, &dict);
- dict = dict_new ();
- if (!dict)
- goto out;
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ }
- if (wordcount != 4) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DEFRAG_VOLUME];
- ret = dict_set_str (dict, "old-volname", (char *)words[2]);
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- if (ret)
- goto out;
+ CLI_LOCAL_INIT(local, words, frame, dict);
- ret = dict_set_str (dict, "new-volname", (char *)words[3]);
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, dict);
+ }
- if (ret)
- goto out;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume rebalance failed");
+ } else {
+#if (USE_EVENTS)
+ if (!(strcmp(words[wordcount - 1], "start")) ||
+ !(strcmp(words[wordcount - 1], "force"))) {
+ event = EVENT_VOLUME_REBALANCE_START;
+ } else if (!strcmp(words[wordcount - 1], "stop")) {
+ event = EVENT_VOLUME_REBALANCE_STOP;
+ }
+
+ if (event != EVENT_LAST)
+ gf_event(event, "volume=%s", (char *)words[2]);
+#endif
+ }
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_RENAME_VOLUME];
+ CLI_STACK_DESTROY(frame);
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
- }
+ return ret;
+}
-out:
- if (dict)
- dict_destroy (dict);
+int
+cli_cmd_volume_reset_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int sent = 0;
+ int parse_error = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ cli_local_t *local = NULL;
+#if (USE_EVENTS)
+ int ret1 = -1;
+ char *tmp_opt = NULL;
+#endif
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume rename on '%s' failed", (char *)words[2]);
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_RESET_VOLUME];
+
+ ret = cli_cmd_volume_reset_parse(words, wordcount, &options);
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
- CLI_STACK_DESTROY (frame);
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
- return ret;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume reset failed");
+ }
+
+#if (USE_EVENTS)
+ if (ret == 0) {
+ ret1 = dict_get_str(options, "key", &tmp_opt);
+ if (ret1)
+ tmp_opt = "";
+
+ gf_event(EVENT_VOLUME_RESET, "name=%s;option=%s", (char *)words[2],
+ tmp_opt);
+ }
+#endif
+
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
}
int
-cli_cmd_volume_defrag_cbk (struct cli_state *state, struct cli_cmd_word *word,
+cli_cmd_volume_profile_cbk(struct cli_state *state, struct cli_cmd_word *word,
const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
- int sent = 0;
- int parse_error = 0;
-#ifdef GF_SOLARIS_HOST_OS
- cli_out ("Command not supported on Solaris");
+ int sent = 0;
+ int parse_error = 0;
+
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ cli_local_t *local = NULL;
+
+ ret = cli_cmd_volume_profile_parse(words, wordcount, &options);
+
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_PROFILE_VOLUME];
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
+ ret = -1;
goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume profile failed");
+ }
+
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
+}
+
+int
+cli_cmd_volume_set_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int sent = 0;
+ int parse_error = 0;
+
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ cli_local_t *local = NULL;
+ char *op_errstr = NULL;
+
+#if (USE_EVENTS)
+ int ret1 = -1;
+ int i = 1;
+ char dict_key[50] = {
+ 0,
+ };
+ char *tmp_opt = NULL;
+ char *opts_str = NULL;
+ int num_options = 0;
#endif
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SET_VOLUME];
- dict = dict_new ();
- if (!dict)
- goto out;
+ ret = cli_cmd_volume_set_parse(state, words, wordcount, &options,
+ &op_errstr);
+ if (ret) {
+ if (op_errstr) {
+ cli_err("%s", op_errstr);
+ GF_FREE(op_errstr);
+ } else
+ cli_usage_out(word->pattern);
- if (!((wordcount == 4) || (wordcount == 5))) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ parse_error = 1;
+ goto out;
+ }
- if (wordcount == 4) {
- if (strcmp (words[3], "start") && strcmp (words[3], "stop") &&
- strcmp (words[3], "status")) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume set failed");
+ }
+
+#if (USE_EVENTS)
+ if (ret == 0 && strcmp(words[2], "help") != 0) {
+ ret1 = dict_get_int32(options, "count", &num_options);
+ if (ret1) {
+ num_options = 0;
+ goto end;
} else {
- if (strcmp (words[3], "fix-layout") &&
- strcmp (words[3], "start")) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ num_options = num_options / 2;
}
- ret = dict_set_str (dict, "volname", (char *)words[2]);
- if (ret)
- goto out;
-
- if (wordcount == 4) {
- ret = dict_set_str (dict, "command", (char *)words[3]);
- if (ret)
- goto out;
+ char *free_list_key[num_options];
+ char *free_list_val[num_options];
+ for (i = 0; i < num_options; i++) {
+ free_list_key[i] = NULL;
+ free_list_val[i] = NULL;
}
- if (wordcount == 5) {
- if ((strcmp (words[3], "fix-layout") ||
- strcmp (words[4], "start")) &&
- (strcmp (words[3], "start") ||
- strcmp (words[4], "force"))) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- ret = -1;
- goto out;
- }
+ /* Initialize opts_str */
+ opts_str = "";
+
+ /* Prepare String in format options=KEY1,VALUE1,KEY2,VALUE2 */
+ for (i = 1; i <= num_options; i++) {
+ sprintf(dict_key, "key%d", i);
+ ret1 = dict_get_str(options, dict_key, &tmp_opt);
+ if (ret1)
+ tmp_opt = "";
- ret = dict_set_str (dict, "option", (char *)words[4]);
- if (ret)
- goto out;
- ret = dict_set_str (dict, "command", (char *)words[3]);
- if (ret)
- goto out;
+ gf_asprintf(&opts_str, "%s,%s", opts_str, tmp_opt);
+ free_list_key[i - 1] = opts_str;
+
+ sprintf(dict_key, "value%d", i);
+ ret1 = dict_get_str(options, dict_key, &tmp_opt);
+ if (ret1)
+ tmp_opt = "";
+
+ gf_asprintf(&opts_str, "%s,%s", opts_str, tmp_opt);
+ free_list_val[i - 1] = opts_str;
}
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_DEFRAG_VOLUME];
+ gf_event(EVENT_VOLUME_SET, "name=%s;options=%s", (char *)words[2],
+ opts_str);
- if (proc->fn) {
- ret = proc->fn (frame, THIS, dict);
+ /* Allocated by gf_strdup and gf_asprintf */
+ for (i = 0; i < num_options; i++) {
+ GF_FREE(free_list_key[i]);
+ GF_FREE(free_list_val[i]);
}
+ }
+#endif
-out:
- if (dict)
- dict_destroy (dict);
+end:
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
+}
+
+static int
+cli_event_remove_brick_str(dict_t *options, char **event_str,
+ eventtypes_t *event)
+{
+ int ret = -1;
+ char *bricklist = NULL;
+ char *brick = NULL;
+ char *volname = NULL;
+ char key[256] = {
+ 0,
+ };
+ const char *eventstrformat = "volume=%s;bricks=%s";
+ int32_t command = 0;
+ int32_t i = 1;
+ int32_t count = 0;
+ int32_t eventstrlen = 1;
+ int bricklen = 0;
+ char *tmp_ptr = NULL;
+
+ if (!options || !event_str || !event)
+ goto out;
+ ret = dict_get_str(options, "volname", &volname);
+ if (ret || !volname) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to fetch volname");
+ ret = -1;
+ goto out;
+ }
+ /* Get the list of bricks for the event */
+ ret = dict_get_int32(options, "command", &command);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to fetch command");
+ ret = -1;
+ goto out;
+ }
+
+ switch (command) {
+ case GF_OP_CMD_START:
+ *event = EVENT_VOLUME_REMOVE_BRICK_START;
+ break;
+ case GF_OP_CMD_COMMIT:
+ *event = EVENT_VOLUME_REMOVE_BRICK_COMMIT;
+ break;
+ case GF_OP_CMD_COMMIT_FORCE:
+ *event = EVENT_VOLUME_REMOVE_BRICK_FORCE;
+ break;
+ case GF_OP_CMD_STOP:
+ *event = EVENT_VOLUME_REMOVE_BRICK_STOP;
+ break;
+ default:
+ *event = EVENT_LAST;
+ break;
+ }
+
+ ret = -1;
+
+ if (*event == EVENT_LAST) {
+ goto out;
+ }
+
+ /* I could just get this from words[] but this is cleaner in case the
+ * format changes */
+ while (i) {
+ snprintf(key, sizeof(key), "brick%d", i);
+ ret = dict_get_str(options, key, &brick);
if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume rebalance failed");
+ break;
}
+ eventstrlen += strlen(brick) + 1;
+ i++;
+ }
+
+ count = --i;
+
+ eventstrlen += 1;
- CLI_STACK_DESTROY (frame);
+ bricklist = GF_CALLOC(eventstrlen, sizeof(char), gf_common_mt_char);
+ if (!bricklist) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "memory allocation failed for"
+ "bricklist");
+ ret = -1;
+ goto out;
+ }
+
+ tmp_ptr = bricklist;
- return ret;
+ i = 1;
+ while (i <= count) {
+ snprintf(key, sizeof(key), "brick%d", i);
+ ret = dict_get_str(options, key, &brick);
+ if (ret) {
+ break;
+ }
+ snprintf(tmp_ptr, eventstrlen, "%s ", brick);
+ bricklen = strlen(brick);
+ eventstrlen -= (bricklen + 1);
+ tmp_ptr += (bricklen + 1);
+ i++;
+ }
+
+ if (!ret) {
+ gf_asprintf(event_str, eventstrformat, volname, bricklist);
+ } else {
+ gf_asprintf(event_str, eventstrformat, volname, "<unavailable>");
+ }
+
+ ret = 0;
+out:
+ GF_FREE(bricklist);
+ return ret;
}
int
-cli_cmd_volume_reset_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_add_brick_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int sent = 0;
- int parse_error = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ gf_answer_t answer = GF_ANSWER_NO;
+ cli_local_t *local = NULL;
+
+#if (USE_EVENTS)
+ char *event_str = NULL;
+ char *bricks = NULL;
+ const char *eventstrformat = "volume=%s;bricks=%s";
+#endif
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
+ const char *question =
+ "Changing the 'stripe count' of the volume is "
+ "not a supported feature. In some cases it may result in data "
+ "loss on the volume. Also there may be issues with regular "
+ "filesystem operations on the volume after the change. Do you "
+ "really want to continue with 'stripe' count option ? ";
+
+ ret = cli_cmd_volume_add_brick_parse(state, words, wordcount, &options, 0);
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_RESET_VOLUME];
+ /* TODO: there are challenges in supporting changing of
+ stripe-count, until it is properly supported give warning to user */
+ if (dict_get(options, "stripe-count")) {
+ answer = cli_cmd_get_confirmation(state, question);
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
+ }
+ }
- ret = cli_cmd_volume_reset_parse (words, wordcount, &options);
+#if (USE_EVENTS)
+ /* Get the list of bricks for the event */
+ ret = dict_get_str(options, "bricks", &bricks);
+
+ if (!ret) {
+ gf_asprintf(&event_str, eventstrformat, (char *)words[2],
+ &bricks[1] /*Skip leading space*/);
+ } else {
+ gf_asprintf(&event_str, eventstrformat, (char *)words[2],
+ "<unavailable>");
+ }
+#endif
+
+ if (state->mode & GLUSTER_MODE_WIGNORE) {
+ ret = dict_set_int32(options, "force", _gf_true);
if (ret) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set force "
+ "option");
+ goto out;
}
+ }
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_ADD_BRICK];
-out:
- if (options)
- dict_unref (options);
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume reset failed");
- }
+ CLI_LOCAL_INIT(local, words, frame, options);
- CLI_STACK_DESTROY (frame);
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
- return ret;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume add-brick failed");
+ } else {
+#if (USE_EVENTS)
+ gf_event(EVENT_VOLUME_ADD_BRICK, "%s", event_str);
+#endif
+ }
+#if (USE_EVENTS)
+ GF_FREE(event_str);
+#endif
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
int
-cli_cmd_volume_profile_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_get_soft_limit(dict_t *options, const char **words, dict_t *xdata)
{
- int sent = 0;
- int parse_error = 0;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ char *default_sl = NULL;
+ char *default_sl_dup = NULL;
+ int ret = -1;
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ // We need a ref on @options to prevent CLI_STACK_DESTROY
+ // from destroying it prematurely.
+ dict_ref(options);
+ CLI_LOCAL_INIT(local, words, frame, options);
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_QUOTA];
+ ret = proc->fn(frame, THIS, options);
+
+ ret = dict_get_str(options, "default-soft-limit", &default_sl);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get default soft limit");
+ goto out;
+ }
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
+ default_sl_dup = gf_strdup(default_sl);
+ if (!default_sl_dup) {
+ ret = -1;
+ goto out;
+ }
- ret = cli_cmd_volume_profile_parse (words, wordcount, &options);
+ ret = dict_set_dynstr(xdata, "default-soft-limit", default_sl_dup);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to set default soft limit");
+ GF_FREE(default_sl_dup);
+ goto out;
+ }
- if (ret) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+out:
+ CLI_STACK_DESTROY(frame);
+ return ret;
+}
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_PROFILE_VOLUME];
+/* Checks if at least one limit has been set on the volume
+ *
+ * Returns true if at least one limit is set. Returns false otherwise.
+ */
+gf_boolean_t
+_limits_set_on_volume(char *volname, int type)
+{
+ gf_boolean_t limits_set = _gf_false;
+ int ret = -1;
+ char quota_conf_file[PATH_MAX] = {
+ 0,
+ };
+ int fd = -1;
+ char buf[16] = {
+ 0,
+ };
+ float version = 0.0f;
+ char gfid_type_stored = 0;
+ char gfid_type = 0;
+
+ /* TODO: fix hardcoding; Need to perform an RPC call to glusterd
+ * to fetch working directory
+ */
+ snprintf(quota_conf_file, sizeof quota_conf_file, "%s/vols/%s/quota.conf",
+ GLUSTERD_DEFAULT_WORKDIR, volname);
+ fd = open(quota_conf_file, O_RDONLY);
+ if (fd == -1)
+ goto out;
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ ret = quota_conf_read_version(fd, &version);
+ if (ret)
+ goto out;
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ if (type == GF_QUOTA_OPTION_TYPE_LIST)
+ gfid_type = GF_QUOTA_CONF_TYPE_USAGE;
+ else
+ gfid_type = GF_QUOTA_CONF_TYPE_OBJECTS;
-out:
- if (options)
- dict_unref (options);
+ /* Try to read at least one gfid of type 'gfid_type' */
+ while (1) {
+ ret = quota_conf_read_gfid(fd, buf, &gfid_type_stored, version);
+ if (ret <= 0)
+ break;
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume profile failed");
+ if (gfid_type_stored == gfid_type) {
+ limits_set = _gf_true;
+ break;
}
+ }
+out:
+ if (fd != -1)
+ sys_close(fd);
- CLI_STACK_DESTROY (frame);
-
- return ret;
-
+ return limits_set;
}
int
-cli_cmd_volume_set_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_quota_handle_list_all(const char **words, dict_t *options)
{
- int sent = 0;
- int parse_error = 0;
+ int all_failed = 1;
+ int count = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *xdata = NULL;
+ char gfid_str[UUID_CANONICAL_FORM_LEN + 1];
+ char *volname = NULL;
+ char *volname_dup = NULL;
+ unsigned char buf[16] = {0};
+ int fd = -1;
+ char quota_conf_file[PATH_MAX] = {0};
+ gf_boolean_t xml_err_flag = _gf_false;
+ char err_str[NAME_MAX] = {
+ 0,
+ };
+ int32_t type = 0;
+ char gfid_type = 0;
+ float version = 0.0f;
+ int32_t max_count = 0;
+
+ xdata = dict_new();
+ if (!xdata) {
+ ret = -1;
+ goto out;
+ }
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
+ ret = dict_get_str(options, "volname", &volname);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get volume name");
+ goto out;
+ }
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_SET_VOLUME];
+ ret = dict_get_int32(options, "type", &type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get quota option type");
+ goto out;
+ }
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ ret = dict_set_int32(xdata, "type", type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to set type in xdata");
+ goto out;
+ }
- ret = cli_cmd_volume_set_parse (words, wordcount, &options);
+ ret = cli_get_soft_limit(options, words, xdata);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to fetch default "
+ "soft-limit");
+ goto out;
+ }
+
+ /* Check if at least one limit is set on volume. No need to check for
+ * quota enabled as cli_get_soft_limit() handles that
+ */
+ if (!_limits_set_on_volume(volname, type)) {
+ snprintf(err_str, sizeof(err_str),
+ "No%s quota configured on"
+ " volume %s",
+ (type == GF_QUOTA_OPTION_TYPE_LIST) ? "" : " inode", volname);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ xml_err_flag = _gf_true;
+ } else {
+ cli_out("quota: %s", err_str);
+ }
+ ret = 0;
+ goto out;
+ }
+
+ volname_dup = gf_strdup(volname);
+ if (!volname_dup) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr(xdata, "volume-uuid", volname_dup);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to set volume-uuid");
+ GF_FREE(volname_dup);
+ goto out;
+ }
+
+ // TODO: fix hardcoding; Need to perform an RPC call to glusterd
+ // to fetch working directory
+ snprintf(quota_conf_file, sizeof quota_conf_file, "%s/vols/%s/quota.conf",
+ GLUSTERD_DEFAULT_WORKDIR, volname);
+ fd = open(quota_conf_file, O_RDONLY);
+ if (fd == -1) {
+ // This may because no limits were yet set on the volume
+ gf_log("cli", GF_LOG_TRACE,
+ "Unable to open "
+ "quota.conf");
+ ret = 0;
+ goto out;
+ }
+
+ ret = quota_conf_read_version(fd, &version);
+ if (ret)
+ goto out;
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, xdata);
+ proc = &cli_quotad_clnt.proctable[GF_AGGREGATOR_GETLIMIT];
+
+ for (count = 0;; count++) {
+ ret = quota_conf_read_gfid(fd, buf, &gfid_type, version);
+ if (ret == 0) {
+ break;
+ } else if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_CRITICAL,
+ "Quota "
+ "configuration store may be corrupt.");
+ goto out;
+ }
+
+ if ((type == GF_QUOTA_OPTION_TYPE_LIST &&
+ gfid_type == GF_QUOTA_CONF_TYPE_OBJECTS) ||
+ (type == GF_QUOTA_OPTION_TYPE_LIST_OBJECTS &&
+ gfid_type == GF_QUOTA_CONF_TYPE_USAGE))
+ continue;
+
+ max_count++;
+ }
+ ret = dict_set_int32(xdata, "max_count", max_count);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to set max_count");
+ goto out;
+ }
+
+ ret = sys_lseek(fd, 0L, SEEK_SET);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "failed to move offset to "
+ "the beginning: %s",
+ strerror(errno));
+ goto out;
+ }
+ ret = quota_conf_read_version(fd, &version);
+ if (ret)
+ goto out;
+
+ for (count = 0;; count++) {
+ ret = quota_conf_read_gfid(fd, buf, &gfid_type, version);
+ if (ret == 0) {
+ break;
+ } else if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_CRITICAL,
+ "Quota "
+ "configuration store may be corrupt.");
+ goto out;
+ }
+
+ if ((type == GF_QUOTA_OPTION_TYPE_LIST &&
+ gfid_type == GF_QUOTA_CONF_TYPE_OBJECTS) ||
+ (type == GF_QUOTA_OPTION_TYPE_LIST_OBJECTS &&
+ gfid_type == GF_QUOTA_CONF_TYPE_USAGE))
+ continue;
+
+ uuid_utoa_r(buf, gfid_str);
+ ret = dict_set_str(xdata, "gfid", gfid_str);
if (ret) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
+ gf_log("cli", GF_LOG_ERROR, "Failed to set gfid");
+ goto out;
}
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
+ ret = proc->fn(frame, THIS, xdata);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to get quota "
+ "limits for %s",
+ uuid_utoa((unsigned char *)buf));
}
-out:
- if (options)
- dict_unref (options);
+ dict_del(xdata, "gfid");
+ all_failed = all_failed && ret;
+ }
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_quota_limit_list_end(local);
if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume set failed");
+ gf_log("cli", GF_LOG_ERROR,
+ "Error in printing "
+ "xml output");
+ goto out;
}
+ }
- CLI_STACK_DESTROY (frame);
-
- return ret;
+ if (count > 0) {
+ ret = all_failed ? -1 : 0;
+ } else {
+ ret = 0;
+ }
+out:
+ if (xml_err_flag) {
+ ret = cli_xml_output_str("volQuota", NULL, -1, 0, err_str);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Error outputting in "
+ "xml format");
+ }
+ }
+ if (xdata)
+ dict_unref(xdata);
+
+ if (fd != -1) {
+ sys_close(fd);
+ }
+
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not fetch and display quota"
+ " limits");
+ }
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
int
-cli_cmd_volume_add_brick_cbk (struct cli_state *state,
- struct cli_cmd_word *word, const char **words,
- int wordcount)
+cli_cmd_bitrot_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
- gf_answer_t answer = GF_ANSWER_NO;
-
- const char *question = "Changing the 'stripe count' of the volume is "
- "not a supported feature. In some cases it may result in data "
- "loss on the volume. Also there may be issues with regular "
- "filesystem operations on the volume after the change. Do you "
- "really want to continue with 'stripe' count option ? ";
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ int ret = -1;
+ int parse_err = 0;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ cli_local_t *local = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ int sent = 0;
+#if (USE_EVENTS)
+ int cmd_type = -1;
+ int ret1 = -1;
+ int event_type = -1;
+ char *tmp = NULL;
+ char *events_str = NULL;
+ char *volname = NULL;
+#endif
- ret = cli_cmd_volume_add_brick_parse (words, wordcount, &options);
+ ret = cli_cmd_bitrot_parse(words, wordcount, &options);
+ if (ret < 0) {
+ cli_usage_out(word->pattern);
+ parse_err = 1;
+ goto out;
+ }
- if (ret) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ if (ret == 1) {
+ /* this is 'volume bitrot help' */
+ cli_cmd_bitrot_help_cbk(state, word, words, wordcount);
+ ret = 0;
+ goto out2;
+ }
- /* TODO: there are challenges in supporting changing of
- stripe-count, untill it is properly supported give warning to user */
- if (dict_get (options, "stripe-count")) {
- answer = cli_cmd_get_confirmation (state, question);
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- if (GF_ANSWER_NO == answer) {
- ret = 0;
- goto out;
- }
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_BITROT];
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_ADD_BRICK];
+ CLI_LOCAL_INIT(local, words, frame, options);
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
out:
- if (options)
- dict_unref (options);
-
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume add-brick failed");
- }
-
- CLI_STACK_DESTROY (frame);
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_err == 0))
+ cli_err(
+ "Bit rot command failed. Please check the cli "
+ "logs for more details");
+ }
+
+#if (USE_EVENTS)
+ if (ret == 0) {
+ ret1 = dict_get_int32(options, "type", &cmd_type);
+ if (ret1)
+ cmd_type = -1;
+ else {
+ ret1 = dict_get_str(options, "volname", &volname);
+ if (ret1)
+ volname = "";
+ }
+
+ switch (cmd_type) {
+ case GF_BITROT_OPTION_TYPE_ENABLE:
+ event_type = EVENT_BITROT_ENABLE;
+ break;
+ case GF_BITROT_OPTION_TYPE_DISABLE:
+ event_type = EVENT_BITROT_DISABLE;
+ break;
+ case GF_BITROT_CMD_SCRUB_ONDEMAND:
+ event_type = EVENT_BITROT_SCRUB_ONDEMAND;
+ break;
+ case GF_BITROT_OPTION_TYPE_SCRUB_THROTTLE:
+ event_type = EVENT_BITROT_SCRUB_THROTTLE;
+ ret1 = dict_get_str(options, "scrub-throttle-value", &tmp);
+ if (ret1)
+ tmp = "";
+ gf_asprintf(&events_str, "name=%s;value=%s", volname, tmp);
+ break;
+ case GF_BITROT_OPTION_TYPE_SCRUB_FREQ:
+ event_type = EVENT_BITROT_SCRUB_FREQ;
+ ret1 = dict_get_str(options, "scrub-frequency-value", &tmp);
+ if (ret1)
+ tmp = "";
+ gf_asprintf(&events_str, "name=%s;value=%s", volname, tmp);
+ break;
+ case GF_BITROT_OPTION_TYPE_SCRUB:
+ event_type = EVENT_BITROT_SCRUB_OPTION;
+ ret1 = dict_get_str(options, "scrub-value", &tmp);
+ if (ret1)
+ tmp = "";
+ gf_asprintf(&events_str, "name=%s;value=%s", volname, tmp);
+ break;
+ default:
+ break;
+ }
+
+ if (event_type > -1)
+ gf_event(event_type, "%s", events_str);
+
+ if (events_str)
+ GF_FREE(events_str);
+ }
+#endif
- return ret;
+ CLI_STACK_DESTROY(frame);
+out2:
+ return ret;
}
int
-cli_cmd_quota_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_quota_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
-
- int ret = 0;
- int parse_err = 0;
- int32_t type = 0;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- gf_answer_t answer = GF_ANSWER_NO;
- const char *question = "Disabling quota will delete all the quota "
- "configuration. Do you want to continue?";
-
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_QUOTA];
- if (proc == NULL) {
- ret = -1;
- goto out;
+ int ret = 0;
+ int parse_err = 0;
+ int32_t type = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ cli_local_t *local = NULL;
+ int sent = 0;
+ char *volname = NULL;
+ const char *question =
+ "Disabling quota will delete all the quota "
+ "configuration. Do you want to continue?";
+
+ // parse **words into options dictionary
+ if (strcmp(words[1], "inode-quota") == 0) {
+ ret = cli_cmd_inode_quota_parse(words, wordcount, &options);
+ if (ret < 0) {
+ cli_usage_out(word->pattern);
+ parse_err = 1;
+ goto out;
}
+ } else {
+ ret = cli_cmd_quota_parse(words, wordcount, &options);
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame) {
- ret = -1;
- goto out;
+ if (ret == 1) {
+ cli_cmd_quota_help_cbk(state, word, words, wordcount);
+ ret = 0;
+ goto out;
}
-
- ret = cli_cmd_quota_parse (words, wordcount, &options);
if (ret < 0) {
- cli_usage_out (word->pattern);
- parse_err = 1;
- goto out;
- } else if (dict_get_int32 (options, "type", &type) == 0 &&
- type == GF_QUOTA_OPTION_TYPE_DISABLE) {
- answer = cli_cmd_get_confirmation (state, question);
- if (answer == GF_ANSWER_NO)
- goto out;
+ cli_usage_out(word->pattern);
+ parse_err = 1;
+ goto out;
}
+ }
- if (proc->fn)
- ret = proc->fn (frame, THIS, options);
-
-out:
- if (options)
- dict_unref (options);
+ ret = dict_get_int32(options, "type", &type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get opcode");
+ goto out;
+ }
+
+ // handle quota-disable and quota-list-all different from others
+ switch (type) {
+ case GF_QUOTA_OPTION_TYPE_DISABLE:
+ answer = cli_cmd_get_confirmation(state, question);
+ if (answer == GF_ANSWER_NO)
+ goto out;
+ break;
+ case GF_QUOTA_OPTION_TYPE_LIST:
+ case GF_QUOTA_OPTION_TYPE_LIST_OBJECTS:
+ if (wordcount != 4)
+ break;
+ ret = cli_cmd_quota_handle_list_all(words, options);
+ goto out;
+ default:
+ break;
+ }
+
+ ret = dict_get_str(options, "volname", &volname);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get volume name");
+ goto out;
+ }
- if (ret && parse_err == 0)
- cli_out ("Quota command failed");
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- CLI_STACK_DESTROY (frame);
+ CLI_LOCAL_INIT(local, words, frame, options);
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_QUOTA];
- return ret;
+ if (proc->fn)
+ ret = proc->fn(frame, THIS, options);
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if (sent == 0 && parse_err == 0)
+ cli_out(
+ "Quota command failed. Please check the cli "
+ "logs for more details");
+ }
+ if (options)
+ dict_unref(options);
+
+ /* Events for Quota */
+ if (ret == 0) {
+ switch (type) {
+ case GF_QUOTA_OPTION_TYPE_ENABLE:
+ gf_event(EVENT_QUOTA_ENABLE, "volume=%s", volname);
+ break;
+ case GF_QUOTA_OPTION_TYPE_DISABLE:
+ gf_event(EVENT_QUOTA_DISABLE, "volume=%s", volname);
+ break;
+ case GF_QUOTA_OPTION_TYPE_LIMIT_USAGE:
+ gf_event(EVENT_QUOTA_SET_USAGE_LIMIT,
+ "volume=%s;"
+ "path=%s;limit=%s",
+ volname, words[4], words[5]);
+ break;
+ case GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS:
+ gf_event(EVENT_QUOTA_SET_OBJECTS_LIMIT,
+ "volume=%s;"
+ "path=%s;limit=%s",
+ volname, words[4], words[5]);
+ break;
+ case GF_QUOTA_OPTION_TYPE_REMOVE:
+ gf_event(EVENT_QUOTA_REMOVE_USAGE_LIMIT,
+ "volume=%s;"
+ "path=%s",
+ volname, words[4]);
+ break;
+ case GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS:
+ gf_event(EVENT_QUOTA_REMOVE_OBJECTS_LIMIT,
+ "volume=%s;"
+ "path=%s",
+ volname, words[4]);
+ break;
+ case GF_QUOTA_OPTION_TYPE_ALERT_TIME:
+ gf_event(EVENT_QUOTA_ALERT_TIME, "volume=%s;time=%s", volname,
+ words[4]);
+ break;
+ case GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT:
+ gf_event(EVENT_QUOTA_SOFT_TIMEOUT,
+ "volume=%s;"
+ "soft-timeout=%s",
+ volname, words[4]);
+ break;
+ case GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT:
+ gf_event(EVENT_QUOTA_HARD_TIMEOUT,
+ "volume=%s;"
+ "hard-timeout=%s",
+ volname, words[4]);
+ break;
+ case GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT:
+ gf_event(EVENT_QUOTA_DEFAULT_SOFT_LIMIT,
+ "volume=%s;"
+ "default-soft-limit=%s",
+ volname, words[4]);
+ break;
+ }
+ }
+
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
int
-cli_cmd_volume_remove_brick_cbk (struct cli_state *state,
- struct cli_cmd_word *word, const char **words,
- int wordcount)
+cli_cmd_volume_remove_brick_cbk(struct cli_state *state,
+ struct cli_cmd_word *word, const char **words,
+ int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- gf_answer_t answer = GF_ANSWER_NO;
- int sent = 0;
- int parse_error = 0;
- int need_question = 0;
-
- const char *question = "Removing brick(s) can result in data loss. "
- "Do you want to Continue?";
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+ int brick_count = 0;
+ int sent = 0;
+ int parse_error = 0;
+ int need_question = 0;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+#if (USE_EVENTS)
+ eventtypes_t event = EVENT_LAST;
+ char *event_str = NULL;
+ int event_ret = -1;
+#endif
+ int32_t command = GF_OP_CMD_NONE;
+ char *question = NULL;
+
+ ret = cli_cmd_volume_remove_brick_parse(state, words, wordcount, &options,
+ &need_question, &brick_count,
+ &command);
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ if (command == GF_OP_CMD_COMMIT_FORCE) {
+ question =
+ "Remove-brick force will not migrate files from the "
+ "removed bricks, so they will no longer be available"
+ " on the volume.\nDo you want to continue?";
+ } else if (command == GF_OP_CMD_START) {
+ question =
+ "It is recommended that remove-brick be run with"
+ " cluster.force-migration option disabled to prevent"
+ " possible data corruption. Doing so will ensure that"
+ " files that receive writes during migration will not"
+ " be migrated and will need to be manually copied"
+ " after the remove-brick commit operation. Please"
+ " check the value of the option and update accordingly."
+ " \nDo you want to continue with your current"
+ " cluster.force-migration settings?";
+ }
+
+ if (!brick_count) {
+ cli_err("No bricks specified");
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ ret = -1;
+ goto out;
+ }
+ ret = dict_get_str(options, "volname", &volname);
+ if (ret || !volname) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to fetch volname");
+ ret = -1;
+ goto out;
+ }
- ret = cli_cmd_volume_remove_brick_parse (words, wordcount, &options,
- &need_question);
+#if (USE_EVENTS)
+ event_ret = cli_event_remove_brick_str(options, &event_str, &event);
+#endif
- if (ret) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
+ if (!strcmp(volname, GLUSTER_SHARED_STORAGE)) {
+ question =
+ "Removing brick from the shared storage volume"
+ "(gluster_shared_storage), will affect features "
+ "like snapshot scheduler, geo-replication "
+ "and NFS-Ganesha. Do you still want to "
+ "continue?";
+ need_question = _gf_true;
+ }
+
+ if (!(state->mode & GLUSTER_MODE_SCRIPT) && need_question) {
+ answer = cli_cmd_get_confirmation(state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ goto out;
}
+ }
- if (!(state->mode & GLUSTER_MODE_SCRIPT) && need_question) {
- /* we need to ask question only in case of 'commit or force' */
- answer = cli_cmd_get_confirmation (state, question);
- if (GF_ANSWER_NO == answer) {
- ret = 0;
- goto out;
- }
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REMOVE_BRICK];
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REMOVE_BRICK];
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ CLI_LOCAL_INIT(local, words, frame, options);
-out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume remove-brick failed");
- }
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
- if (options)
- dict_unref (options);
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume remove-brick failed");
+ }
+#if (USE_EVENTS)
+ if (!ret && !event_ret)
+ gf_event(event, "%s", event_str);
+ if (event_str)
+ GF_FREE(event_str);
- CLI_STACK_DESTROY (frame);
+#endif
- return ret;
+ CLI_STACK_DESTROY(frame);
+ if (options)
+ dict_unref(options);
+ return ret;
}
int
-cli_cmd_volume_replace_brick_cbk (struct cli_state *state,
- struct cli_cmd_word *word,
- const char **words,
- int wordcount)
+cli_cmd_volume_reset_brick_cbk(struct cli_state *state,
+ struct cli_cmd_word *word, const char **words,
+ int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
#ifdef GF_SOLARIS_HOST_OS
- cli_out ("Command not supported on Solaris");
- goto out;
+ cli_out("Command not supported on Solaris");
+ goto out;
#endif
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REPLACE_BRICK];
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_RESET_BRICK];
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ ret = cli_cmd_volume_reset_brick_parse(words, wordcount, &options);
- ret = cli_cmd_volume_replace_brick_parse (words, wordcount, &options);
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+ if (state->mode & GLUSTER_MODE_WIGNORE_PARTITION) {
+ ret = dict_set_int32(options, "ignore-partition", _gf_true);
if (ret) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set ignore-"
+ "partition option");
+ goto out;
}
+ }
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
-out:
- if (options)
- dict_unref (options);
+ CLI_LOCAL_INIT(local, words, frame, options);
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume replace-brick failed");
- }
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
- CLI_STACK_DESTROY (frame);
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume reset-brick failed");
+ } else {
+ if (wordcount > 5) {
+ gf_event(EVENT_BRICK_RESET_COMMIT,
+ "Volume=%s;source-brick=%s;"
+ "destination-brick=%s",
+ (char *)words[2], (char *)words[3], (char *)words[4]);
+ } else {
+ gf_event(EVENT_BRICK_RESET_START, "Volume=%s;source-brick=%s",
+ (char *)words[2], (char *)words[3]);
+ }
+ }
+ CLI_STACK_DESTROY(frame);
- return ret;
+ return ret;
}
-
int
-cli_cmd_volume_set_transport_cbk (struct cli_state *state,
- struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_replace_brick_cbk(struct cli_state *state,
+ struct cli_cmd_word *word, const char **words,
+ int wordcount)
{
- cli_cmd_broadcast_response (0);
- return 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+#ifdef GF_SOLARIS_HOST_OS
+ cli_out("Command not supported on Solaris");
+ goto out;
+#endif
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_REPLACE_BRICK];
+
+ ret = cli_cmd_volume_replace_brick_parse(words, wordcount, &options);
+
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
+
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume replace-brick failed");
+ } else {
+ gf_event(EVENT_BRICK_REPLACE,
+ "Volume=%s;source-brick=%s;destination-brick=%s",
+ (char *)words[2], (char *)words[3], (char *)words[4]);
+ }
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
}
int
-cli_cmd_volume_top_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_set_transport_cbk(struct cli_state *state,
+ struct cli_cmd_word *word, const char **words,
+ int wordcount)
{
+ cli_cmd_broadcast_response(0);
+ return 0;
+}
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
+int
+cli_cmd_volume_top_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ ret = cli_cmd_volume_top_parse(words, wordcount, &options);
+
+ if (ret) {
+ parse_error = 1;
+ cli_usage_out(word->pattern);
+ goto out;
+ }
- ret = cli_cmd_volume_top_parse (words, wordcount, &options);
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_TOP_VOLUME];
- if (ret) {
- parse_error = 1;
- cli_usage_out (word->pattern);
- goto out;
- }
-
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_TOP_VOLUME];
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
+ ret = -1;
+ goto out;
+ }
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ CLI_LOCAL_INIT(local, words, frame, options);
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
out:
- if (options)
- dict_unref (options);
-
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume top failed");
- }
-
- CLI_STACK_DESTROY (frame);
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume top failed");
+ }
- return ret;
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
-
int
-cli_cmd_log_rotate_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_log_rotate_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
-
- if (!((wordcount == 4) || (wordcount == 5))) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
-
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LOG_ROTATE];
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ if (!((wordcount == 4) || (wordcount == 5))) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ if (!(strcmp("rotate", words[3]) == 0)) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- ret = cli_cmd_log_rotate_parse (words, wordcount, &options);
- if (ret)
- goto out;
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LOG_ROTATE];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ ret = cli_cmd_log_rotate_parse(words, wordcount, &options);
+ if (ret)
+ goto out;
-out:
- if (options)
- dict_destroy (options);
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
+ ret = -1;
+ goto out;
+ }
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume log rotate failed");
- }
+ CLI_LOCAL_INIT(local, words, frame, options);
- CLI_STACK_DESTROY (frame);
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
- return ret;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume log rotate failed");
+ }
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
}
#if (SYNCDAEMON_COMPILE)
static int
-cli_check_gsync_present ()
+cli_check_gsync_present()
{
- char buff[PATH_MAX] = {0, };
- runner_t runner = {0,};
- char *ptr = NULL;
- int ret = 0;
-
- ret = setenv ("_GLUSTERD_CALLED_", "1", 1);
- if (-1 == ret) {
- gf_log ("", GF_LOG_WARNING, "setenv syscall failed, hence could"
- "not assert if geo-replication is installed");
- goto out;
- }
-
- runinit (&runner);
- runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "--version", NULL);
- runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
- ret = runner_start (&runner);
- if (ret == -1) {
- gf_log ("", GF_LOG_INFO, "geo-replication not installed");
- goto out;
- }
+ char buff[PATH_MAX] = {
+ 0,
+ };
+ runner_t runner = {
+ 0,
+ };
+ char *ptr = NULL;
+ int ret = 0;
+
+ ret = setenv("_GLUSTERD_CALLED_", "1", 1);
+ if (-1 == ret) {
+ gf_log("", GF_LOG_WARNING,
+ "setenv syscall failed, hence could"
+ "not assert if geo-replication is installed");
+ goto out;
+ }
+
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "--version", NULL);
+ runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
+ ret = runner_start(&runner);
+ if (ret == -1) {
+ gf_log("", GF_LOG_INFO, "geo-replication not installed");
+ goto out;
+ }
- ptr = fgets(buff, sizeof(buff), runner_chio (&runner, STDOUT_FILENO));
- if (ptr) {
- if (!strstr (buff, "gsyncd")) {
- ret = -1;
- goto out;
- }
- } else {
- ret = -1;
- goto out;
+ ptr = fgets(buff, sizeof(buff), runner_chio(&runner, STDOUT_FILENO));
+ if (ptr) {
+ if (!strstr(buff, "gsyncd")) {
+ ret = -1;
+ goto out;
}
+ } else {
+ ret = -1;
+ goto out;
+ }
- ret = runner_end (&runner);
+ ret = runner_end(&runner);
- if (ret)
- gf_log ("", GF_LOG_ERROR, "geo-replication not installed");
+ if (ret)
+ gf_log("", GF_LOG_ERROR, "geo-replication not installed");
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret ? -1 : 0;
-
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret ? -1 : 0;
}
void
-cli_cmd_check_gsync_exists_cbk (struct cli_cmd *this)
+cli_cmd_check_gsync_exists_cbk(struct cli_cmd *this)
{
+ int ret = 0;
- int ret = 0;
-
- ret = cli_check_gsync_present ();
- if (ret)
- this->disable = _gf_true;
-
+ ret = cli_check_gsync_present();
+ if (ret)
+ this->disable = _gf_true;
}
#endif
int
-cli_cmd_volume_gsync_set_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_gsync_set_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = 0;
- int parse_err = 0;
- dict_t *options = NULL;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
-
- proc = &cli_rpc_prog->proctable [GLUSTER_CLI_GSYNC_SET];
- if (proc == NULL) {
- ret = -1;
- goto out;
- }
+ int ret = 0;
+ int parse_err = 0;
+ dict_t *options = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+ char *errstr = NULL;
+#if (USE_EVENTS)
+ int ret1 = -1;
+ int cmd_type = -1;
+ int tmpi = 0;
+ char *tmp = NULL;
+ char *events_str = NULL;
+ int event_type = -1;
+#endif
- frame = create_frame (THIS, THIS->ctx->pool);
- if (frame == NULL) {
- ret = -1;
- goto out;
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GSYNC_SET];
- ret = cli_cmd_gsync_set_parse (words, wordcount, &options);
- if (ret) {
- cli_usage_out (word->pattern);
- parse_err = 1;
- goto out;
+ ret = cli_cmd_gsync_set_parse(state, words, wordcount, &options, &errstr);
+ if (ret) {
+ if (errstr) {
+ cli_err("%s", errstr);
+ GF_FREE(errstr);
+ } else {
+ cli_usage_out(word->pattern);
}
+ parse_err = 1;
+ goto out;
+ }
- if (proc->fn)
- ret = proc->fn (frame, THIS, options);
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (frame == NULL) {
+ ret = -1;
+ goto out;
+ }
-out:
- if (options)
- dict_unref (options);
+ CLI_LOCAL_INIT(local, words, frame, options);
- if (ret && parse_err == 0)
- cli_out (GEOREP" command failed");
+ if (proc->fn)
+ ret = proc->fn(frame, THIS, options);
- CLI_STACK_DESTROY (frame);
+out:
+ if (ret && parse_err == 0)
+ cli_out(GEOREP " command failed");
+
+#if (USE_EVENTS)
+ if (ret == 0) {
+ events_str = gf_strdup("");
+
+ /* Type of Geo-rep Action - Create, Start etc */
+ ret1 = dict_get_int32(options, "type", &cmd_type);
+ if (ret1)
+ cmd_type = -1;
+
+ /* Only capture Events for modification commands */
+ switch (cmd_type) {
+ case GF_GSYNC_OPTION_TYPE_CREATE:
+ event_type = EVENT_GEOREP_CREATE;
+ break;
+ case GF_GSYNC_OPTION_TYPE_START:
+ event_type = EVENT_GEOREP_START;
+ break;
+ case GF_GSYNC_OPTION_TYPE_STOP:
+ event_type = EVENT_GEOREP_STOP;
+ break;
+ case GF_GSYNC_OPTION_TYPE_PAUSE:
+ event_type = EVENT_GEOREP_PAUSE;
+ break;
+ case GF_GSYNC_OPTION_TYPE_RESUME:
+ event_type = EVENT_GEOREP_RESUME;
+ break;
+ case GF_GSYNC_OPTION_TYPE_DELETE:
+ event_type = EVENT_GEOREP_DELETE;
+ break;
+ case GF_GSYNC_OPTION_TYPE_CONFIG:
+ ret1 = dict_get_str(options, "subop", &tmp);
+ if (ret1)
+ tmp = "";
+
+ /* For Config Set additionally capture key and value */
+ /* For Config Reset capture key */
+ if (strcmp(tmp, "set") == 0) {
+ event_type = EVENT_GEOREP_CONFIG_SET;
+
+ ret1 = dict_get_str(options, "op_name", &tmp);
+ if (ret1)
+ tmp = "";
+
+ gf_asprintf_append(&events_str, "%soption=%s;", events_str,
+ tmp);
+
+ ret1 = dict_get_str(options, "op_value", &tmp);
+ if (ret1)
+ tmp = "";
+
+ gf_asprintf_append(&events_str, "%svalue=%s;", events_str,
+ tmp);
+ } else if (strcmp(tmp, "del") == 0) {
+ event_type = EVENT_GEOREP_CONFIG_RESET;
+
+ ret1 = dict_get_str(options, "op_name", &tmp);
+ if (ret1)
+ tmp = "";
+
+ gf_asprintf_append(&events_str, "%soption=%s;", events_str,
+ tmp);
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (event_type > -1) {
+ /* Capture all optional arguments used */
+ ret1 = dict_get_int32(options, "force", &tmpi);
+ if (ret1 == 0) {
+ gf_asprintf_append(&events_str, "%sforce=%d;", events_str,
+ tmpi);
+ }
+ ret1 = dict_get_int32(options, "push_pem", &tmpi);
+ if (ret1 == 0) {
+ gf_asprintf_append(&events_str, "%spush_pem=%d;", events_str,
+ tmpi);
+ }
+ ret1 = dict_get_int32(options, "no_verify", &tmpi);
+ if (ret1 == 0) {
+ gf_asprintf_append(&events_str, "%sno_verify=%d;", events_str,
+ tmpi);
+ }
+
+ ret1 = dict_get_int32(options, "ssh_port", &tmpi);
+ if (ret1 == 0) {
+ gf_asprintf_append(&events_str, "%sssh_port=%d;", events_str,
+ tmpi);
+ }
+
+ ret1 = dict_get_int32(options, "reset-sync-time", &tmpi);
+ if (ret1 == 0) {
+ gf_asprintf_append(&events_str, "%sreset_sync_time=%d;",
+ events_str, tmpi);
+ }
+ /* Capture Master and Slave Info */
+ ret1 = dict_get_str(options, "master", &tmp);
+ if (ret1)
+ tmp = "";
+ gf_asprintf_append(&events_str, "%smaster=%s;", events_str, tmp);
+
+ ret1 = dict_get_str(options, "slave", &tmp);
+ if (ret1)
+ tmp = "";
+ gf_asprintf_append(&events_str, "%sslave=%s", events_str, tmp);
+
+ gf_event(event_type, "%s", events_str);
+ }
+
+ /* Allocated by gf_strdup and gf_asprintf */
+ if (events_str)
+ GF_FREE(events_str);
+ }
+#endif
- return ret;
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
}
int
-cli_cmd_volume_status_cbk (struct cli_state *state,
- struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_status_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
- uint32_t cmd = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ uint32_t cmd = 0;
+ cli_local_t *local = NULL;
- ret = cli_cmd_volume_status_parse (words, wordcount, &dict);
+ ret = cli_cmd_volume_status_parse(words, wordcount, &dict);
- if (ret) {
- cli_usage_out (word->pattern);
- goto out;
- }
+ if (ret) {
+ cli_usage_out(word->pattern);
+ goto out;
+ }
- ret = dict_get_uint32 (dict, "cmd", &cmd);
- if (ret)
- goto out;
+ ret = dict_get_uint32(dict, "cmd", &cmd);
+ if (ret)
+ goto out;
- if (!(cmd & GF_CLI_STATUS_ALL)) {
- /* for one volume or brick */
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATUS_VOLUME];
- } else {
- /* volume status all or all detail */
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATUS_ALL];
- }
+ if (!(cmd & GF_CLI_STATUS_ALL)) {
+ /* for one volume or brick */
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATUS_VOLUME];
+ } else {
+ /* volume status all or all detail */
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATUS_ALL];
+ }
- if (!proc->fn)
- goto out;
+ if (!proc->fn) {
+ ret = -1;
+ goto out;
+ }
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to create frame");
+ ret = -1;
+ goto out;
+ }
- ret = proc->fn (frame, THIS, dict);
+ CLI_LOCAL_INIT(local, words, frame, dict);
- out:
- if (dict)
- dict_unref (dict);
+ ret = proc->fn(frame, THIS, dict);
- CLI_STACK_DESTROY (frame);
+out:
+ CLI_STACK_DESTROY(frame);
- return ret;
+ return ret;
}
-
int
-cli_get_detail_status (dict_t *dict, int i, cli_volume_status_t *status)
+cli_get_detail_status(dict_t *dict, int i, cli_volume_status_t *status)
{
- uint64_t free = 0;
- uint64_t total = 0;
- char key[1024] = {0};
- int ret = 0;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.free", i);
- ret = dict_get_uint64 (dict, key, &free);
-
- status->free = gf_uint64_2human_readable (free);
- if (!status->free)
- goto out;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.total", i);
- ret = dict_get_uint64 (dict, key, &total);
+ uint64_t free = 0;
+ uint64_t total = 0;
+ char key[1024] = {0};
+ int ret = 0;
- status->total = gf_uint64_2human_readable (total);
- if (!status->total)
- goto out;
-
-#ifdef GF_LINUX_HOST_OS
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.device", i);
- ret = dict_get_str (dict, key, &(status->device));
- if (ret)
- status->device = NULL;
-#endif
+ snprintf(key, sizeof(key), "brick%d.free", i);
+ ret = dict_get_uint64(dict, key, &free);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.block_size", i);
- ret = dict_get_uint64 (dict, key, &(status->block_size));
- if (ret) {
- ret = 0;
- status->block_size = 0;
- }
+ status->free = gf_uint64_2human_readable(free);
+ if (!status->free)
+ goto out;
-#ifdef GF_LINUX_HOST_OS
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mnt_options", i);
- ret = dict_get_str (dict, key, &(status->mount_options));
- if (ret)
- status->mount_options = NULL;
+ snprintf(key, sizeof(key), "brick%d.total", i);
+ ret = dict_get_uint64(dict, key, &total);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.fs_name", i);
- ret = dict_get_str (dict, key, &(status->fs_name));
- if (ret) {
- ret = 0;
- status->fs_name = NULL;
- }
+ status->total = gf_uint64_2human_readable(total);
+ if (!status->total)
+ goto out;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.inode_size", i);
- ret = dict_get_str (dict, key, &(status->inode_size));
- if (ret)
- status->inode_size = NULL;
-#endif /* GF_LINUX_HOST_OS */
+ snprintf(key, sizeof(key), "brick%d.device", i);
+ ret = dict_get_str(dict, key, &(status->device));
+ if (ret)
+ status->device = NULL;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.total_inodes", i);
- ret = dict_get_uint64 (dict, key,
- &(status->total_inodes));
- if (ret)
- status->total_inodes = 0;
+ snprintf(key, sizeof(key), "brick%d.block_size", i);
+ ret = dict_get_uint64(dict, key, &(status->block_size));
+ if (ret) {
+ ret = 0;
+ status->block_size = 0;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.free_inodes", i);
- ret = dict_get_uint64 (dict, key, &(status->free_inodes));
- if (ret) {
- ret = 0;
- status->free_inodes = 0;
- }
+ snprintf(key, sizeof(key), "brick%d.mnt_options", i);
+ ret = dict_get_str(dict, key, &(status->mount_options));
+ if (ret)
+ status->mount_options = NULL;
+ snprintf(key, sizeof(key), "brick%d.fs_name", i);
+ ret = dict_get_str(dict, key, &(status->fs_name));
+ if (ret) {
+ ret = 0;
+ status->fs_name = NULL;
+ }
+
+ snprintf(key, sizeof(key), "brick%d.inode_size", i);
+ ret = dict_get_str(dict, key, &(status->inode_size));
+ if (ret)
+ status->inode_size = NULL;
+
+ snprintf(key, sizeof(key), "brick%d.total_inodes", i);
+ ret = dict_get_uint64(dict, key, &(status->total_inodes));
+ if (ret)
+ status->total_inodes = 0;
+
+ snprintf(key, sizeof(key), "brick%d.free_inodes", i);
+ ret = dict_get_uint64(dict, key, &(status->free_inodes));
+ if (ret) {
+ ret = 0;
+ status->free_inodes = 0;
+ }
- out:
- return ret;
+out:
+ return ret;
}
void
-cli_print_detailed_status (cli_volume_status_t *status)
+cli_print_detailed_status(cli_volume_status_t *status)
{
- cli_out ("%-20s : %-20s", "Brick", status->brick);
- cli_out ("%-20s : %-20d", "Port", status->port);
- cli_out ("%-20s : %-20c", "Online", (status->online) ? 'Y' : 'N');
- cli_out ("%-20s : %-20s", "Pid", status->pid_str);
-
-#ifdef GF_LINUX_HOST_OS
- if (status->fs_name)
- cli_out ("%-20s : %-20s", "File System", status->fs_name);
- else
- cli_out ("%-20s : %-20s", "File System", "N/A");
-
- if (status->device)
- cli_out ("%-20s : %-20s", "Device", status->device);
- else
- cli_out ("%-20s : %-20s", "Device", "N/A");
-
- if (status->mount_options) {
- cli_out ("%-20s : %-20s", "Mount Options",
- status->mount_options);
- } else {
- cli_out ("%-20s : %-20s", "Mount Options", "N/A");
- }
+ cli_out("%-20s : %-20s", "Brick", status->brick);
+
+ if (status->online) {
+ cli_out("%-20s : %-20d", "TCP Port", status->port);
+ cli_out("%-20s : %-20d", "RDMA Port", status->rdma_port);
+ } else {
+ cli_out("%-20s : %-20s", "TCP Port", "N/A");
+ cli_out("%-20s : %-20s", "RDMA Port", "N/A");
+ }
+
+ cli_out("%-20s : %-20c", "Online", (status->online) ? 'Y' : 'N');
+ cli_out("%-20s : %-20s", "Pid", status->pid_str);
+
+ if (status->fs_name)
+ cli_out("%-20s : %-20s", "File System", status->fs_name);
+ else
+ cli_out("%-20s : %-20s", "File System", "N/A");
+
+ if (status->device)
+ cli_out("%-20s : %-20s", "Device", status->device);
+ else
+ cli_out("%-20s : %-20s", "Device", "N/A");
+
+ if (status->mount_options) {
+ cli_out("%-20s : %-20s", "Mount Options", status->mount_options);
+ } else {
+ cli_out("%-20s : %-20s", "Mount Options", "N/A");
+ }
+
+ if (status->inode_size) {
+ cli_out("%-20s : %-20s", "Inode Size", status->inode_size);
+ } else {
+ cli_out("%-20s : %-20s", "Inode Size", "N/A");
+ }
+ if (status->free)
+ cli_out("%-20s : %-20s", "Disk Space Free", status->free);
+ else
+ cli_out("%-20s : %-20s", "Disk Space Free", "N/A");
+
+ if (status->total)
+ cli_out("%-20s : %-20s", "Total Disk Space", status->total);
+ else
+ cli_out("%-20s : %-20s", "Total Disk Space", "N/A");
+
+ if (status->total_inodes) {
+ cli_out("%-20s : %-20" GF_PRI_INODE, "Inode Count",
+ status->total_inodes);
+ } else {
+ cli_out("%-20s : %-20s", "Inode Count", "N/A");
+ }
+
+ if (status->free_inodes) {
+ cli_out("%-20s : %-20" GF_PRI_INODE, "Free Inodes",
+ status->free_inodes);
+ } else {
+ cli_out("%-20s : %-20s", "Free Inodes", "N/A");
+ }
+}
- if (status->inode_size) {
- cli_out ("%-20s : %-20s", "Inode Size",
- status->inode_size);
+int
+cli_print_brick_status(cli_volume_status_t *status)
+{
+ int fieldlen = CLI_VOL_STATUS_BRICK_LEN;
+ int bricklen = 0;
+ char *p = NULL;
+ int num_spaces = 0;
+
+ p = status->brick;
+ bricklen = strlen(p);
+ while (bricklen > 0) {
+ if (bricklen > fieldlen) {
+ cli_out("%.*s", fieldlen, p);
+ p += fieldlen;
+ bricklen -= fieldlen;
} else {
- cli_out ("%-20s : %-20s", "Inode Size", "N/A");
- }
-#endif
- if (status->free)
- cli_out ("%-20s : %-20s", "Disk Space Free", status->free);
- else
- cli_out ("%-20s : %-20s", "Disk Space Free", "N/A");
-
- if (status->total)
- cli_out ("%-20s : %-20s", "Total Disk Space", status->total);
- else
- cli_out ("%-20s : %-20s", "Total Disk Space", "N/A");
+ num_spaces = (fieldlen - bricklen) + 1;
+ printf("%s", p);
+ while (num_spaces-- != 0)
+ printf(" ");
+ if (status->port || status->rdma_port) {
+ if (status->online)
+ cli_out("%-10d%-11d%-8c%-5s", status->port,
+ status->rdma_port, status->online ? 'Y' : 'N',
+ status->pid_str);
+ else
+ cli_out("%-10s%-11s%-8c%-5s", "N/A", "N/A",
+ status->online ? 'Y' : 'N', status->pid_str);
+ } else
+ cli_out("%-10s%-11s%-8c%-5s", "N/A", "N/A",
+ status->online ? 'Y' : 'N', status->pid_str);
+ bricklen = 0;
+ }
+ }
+
+ return 0;
+}
+#define NEEDS_GLFS_HEAL(op) \
+ ((op == GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE) || \
+ (op == GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME) || \
+ (op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) || \
+ (op == GF_SHD_OP_INDEX_SUMMARY) || (op == GF_SHD_OP_SPLIT_BRAIN_FILES) || \
+ (op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) || \
+ (op == GF_SHD_OP_HEAL_SUMMARY))
- if (status->total_inodes) {
- cli_out ("%-20s : %-20ld", "Inode Count",
- status->total_inodes);
- } else {
- cli_out ("%-20s : %-20s", "Inode Count", "N/A");
- }
+int
+cli_launch_glfs_heal(int heal_op, dict_t *options)
+{
+ char buff[PATH_MAX] = {0};
+ runner_t runner = {0};
+ char *filename = NULL;
+ char *hostname = NULL;
+ char *path = NULL;
+ char *volname = NULL;
+ char *out = NULL;
+ int ret = 0;
+
+ runinit(&runner);
+ ret = dict_get_str(options, "volname", &volname);
+ runner_add_args(&runner, GLFSHEAL_PREFIX "/glfsheal", volname, NULL);
+ runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
+
+ switch (heal_op) {
+ case GF_SHD_OP_INDEX_SUMMARY:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ runner_add_args(&runner, "--xml", NULL);
+ }
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
+ ret = dict_get_str(options, "file", &filename);
+ runner_add_args(&runner, "bigger-file", filename, NULL);
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:
+ ret = dict_get_str(options, "file", &filename);
+ runner_add_args(&runner, "latest-mtime", filename, NULL);
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+ ret = dict_get_str(options, "heal-source-hostname", &hostname);
+ ret = dict_get_str(options, "heal-source-brickpath", &path);
+ runner_add_args(&runner, "source-brick", NULL);
+ runner_argprintf(&runner, "%s:%s", hostname, path);
+ if (dict_get_str(options, "file", &filename) == 0)
+ runner_argprintf(&runner, "%s", filename);
+ break;
+ case GF_SHD_OP_SPLIT_BRAIN_FILES:
+ runner_add_args(&runner, "split-brain-info", NULL);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ runner_add_args(&runner, "--xml", NULL);
+ }
+ break;
+ case GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE:
+ case GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE:
+ runner_add_args(&runner, "granular-entry-heal-op", NULL);
+ break;
+ case GF_SHD_OP_HEAL_SUMMARY:
+ runner_add_args(&runner, "info-summary", NULL);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ runner_add_args(&runner, "--xml", NULL);
+ }
+ break;
+ default:
+ ret = -1;
+ goto out;
+ }
+ if (global_state->mode & GLUSTER_MODE_GLFSHEAL_NOLOG)
+ runner_add_args(&runner, "--nolog", NULL);
+ ret = runner_start(&runner);
+ if (ret == -1)
+ goto out;
+ while ((
+ out = fgets(buff, sizeof(buff), runner_chio(&runner, STDOUT_FILENO)))) {
+ printf("%s", out);
+ }
+ ret = runner_end(&runner);
- if (status->free_inodes) {
- cli_out ("%-20s : %-20ld", "Free Inodes",
- status->free_inodes);
- } else {
- cli_out ("%-20s : %-20s", "Free Inodes", "N/A");
- }
+out:
+ return ret;
}
int
-cli_print_brick_status (cli_volume_status_t *status)
+cli_cmd_volume_heal_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int fieldlen = CLI_VOL_STATUS_BRICK_LEN;
- char buf[80] = {0,};
- int bricklen = 0;
- int i = 0;
- char *p = NULL;
- int num_tabs = 0;
-
- bricklen = strlen (status->brick);
- p = status->brick;
- while (bricklen > 0) {
- if (bricklen > fieldlen) {
- i++;
- strncpy (buf, p, min (fieldlen, (sizeof (buf)-1)));
- buf[strlen(buf) + 1] = '\0';
- cli_out ("%s", buf);
- p = status->brick + i * fieldlen;
- bricklen -= fieldlen;
- } else {
- num_tabs = (fieldlen - bricklen) / CLI_TAB_LENGTH + 1;
- printf ("%s", p);
- while (num_tabs-- != 0)
- printf ("\t");
- if (status->port)
- cli_out ("%d\t%c\t%s",
- status->port, status->online?'Y':'N',
- status->pid_str);
- else
- cli_out ("%s\t%c\t%s",
- "N/A", status->online?'Y':'N',
- status->pid_str);
- bricklen = 0;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ dict_t *options = NULL;
+ xlator_t *this = NULL;
+ cli_local_t *local = NULL;
+ int heal_op = 0;
+
+ this = THIS;
+
+ if (wordcount < 3) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ ret = cli_cmd_volume_heal_options_parse(words, wordcount, &options);
+ if (ret) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+ ret = dict_get_int32(options, "heal-op", &heal_op);
+ if (ret < 0)
+ goto out;
+ if (NEEDS_GLFS_HEAL(heal_op)) {
+ ret = cli_launch_glfs_heal(heal_op, options);
+ if (ret < 0)
+ goto out;
+ if (heal_op != GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE)
+ goto out;
+ }
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_HEAL_VOLUME];
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0) &&
+ !(global_state->mode & GLUSTER_MODE_XML)) {
+ cli_out("Volume heal failed.");
}
+ }
+
+ if (options)
+ dict_unref(options);
- return 0;
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
}
int
-cli_cmd_volume_heal_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_statedump_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- int sent = 0;
- int parse_error = 0;
- dict_t *options = NULL;
- xlator_t *this = NULL;
-
- this = THIS;
- frame = create_frame (this, this->ctx->pool);
- if (!frame)
- goto out;
-
- if (wordcount < 3) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ if (wordcount < 3) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- ret = cli_cmd_volume_heal_options_parse (words, wordcount, &options);
+ if (wordcount >= 3) {
+ ret = cli_cmd_volume_statedump_options_parse(words, wordcount,
+ &options);
if (ret) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
+ parse_error = 1;
+ gf_log("cli", GF_LOG_ERROR,
+ "Error parsing "
+ "statedump options");
+ cli_out("Error parsing options");
+ cli_usage_out(word->pattern);
}
+ }
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_HEAL_VOLUME];
+ ret = dict_set_str(options, "volname", (char *)words[2]);
+ if (ret)
+ goto out;
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATEDUMP_VOLUME];
-out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error == 0))
- cli_out ("Volume heal failed");
- }
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
- if (options)
- dict_unref (options);
+ CLI_LOCAL_INIT(local, words, frame, options);
- CLI_STACK_DESTROY (frame);
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
- return ret;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume statedump failed");
+ }
+
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
}
int
-cli_cmd_volume_statedump_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_list_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
-
- frame = create_frame (THIS, THIS->ctx->pool);
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ int sent = 0;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LIST_VOLUME];
+ if (proc->fn) {
+ frame = create_frame(THIS, THIS->ctx->pool);
if (!frame)
- goto out;
+ goto out;
+ ret = proc->fn(frame, THIS, NULL);
+ }
- if (wordcount < 3) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if (sent == 0)
+ cli_out("Volume list failed");
+ }
- if (wordcount >= 3) {
- ret = cli_cmd_volume_statedump_options_parse (words, wordcount,
- &options);
- if (ret) {
- parse_error = 1;
- gf_log ("cli", GF_LOG_ERROR, "Error parsing "
- "statedump options");
- cli_out ("Error parsing options");
- cli_usage_out (word->pattern);
- }
- }
+ CLI_STACK_DESTROY(frame);
- ret = dict_set_str (options, "volname", (char *)words[2]);
- if (ret)
- goto out;
+ return ret;
+}
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_STATEDUMP_VOLUME];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+int
+cli_cmd_volume_clearlocks_cbk(struct cli_state *state,
+ struct cli_cmd_word *word, const char **words,
+ int wordcount)
+{
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ if (wordcount < 7 || wordcount > 8) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
+
+ ret = cli_cmd_volume_clrlks_opts_parse(words, wordcount, &options);
+ if (ret) {
+ parse_error = 1;
+ gf_log("cli", GF_LOG_ERROR,
+ "Error parsing "
+ "clear-locks options");
+ cli_out("Error parsing options");
+ cli_usage_out(word->pattern);
+ }
+
+ ret = dict_set_str(options, "volname", (char *)words[2]);
+ if (ret)
+ goto out;
+
+ ret = dict_set_str(options, "path", (char *)words[3]);
+ if (ret)
+ goto out;
+
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CLRLOCKS_VOLUME];
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
+
+ if (proc->fn) {
+ ret = proc->fn(frame, THIS, options);
+ }
out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error = 0))
- cli_out ("Volume statedump failed");
- }
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_out("Volume clear-locks failed");
+ }
- CLI_STACK_DESTROY (frame);
+ CLI_STACK_DESTROY(frame);
- return ret;
+ return ret;
}
int
-cli_cmd_volume_list_cbk (struct cli_state *state, struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_barrier_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- call_frame_t *frame = NULL;
- rpc_clnt_procedure_t *proc = NULL;
- int sent = 0;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_error = 0;
+ cli_local_t *local = NULL;
+
+ if (wordcount != 4) {
+ cli_usage_out(word->pattern);
+ parse_error = 1;
+ goto out;
+ }
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ options = dict_new();
+ if (!options) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str(options, "volname", (char *)words[2]);
+ if (ret)
+ goto out;
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_LIST_VOLUME];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, NULL);
- }
+ ret = dict_set_str(options, "barrier", (char *)words[3]);
+ if (ret)
+ goto out;
-out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if (sent == 0)
- cli_out ("Volume list failed");
- }
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_BARRIER_VOLUME];
+
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ CLI_LOCAL_INIT(local, words, frame, options);
- CLI_STACK_DESTROY (frame);
+ if (proc->fn)
+ ret = proc->fn(frame, THIS, options);
- return ret;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_error == 0))
+ cli_err("Volume barrier failed");
+ }
+ CLI_STACK_DESTROY(frame);
+
+ return ret;
}
int
-cli_cmd_volume_clearlocks_cbk (struct cli_state *state,
- struct cli_cmd_word *word,
- const char **words, int wordcount)
+cli_cmd_volume_getopt_cbk(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- dict_t *options = NULL;
- int sent = 0;
- int parse_error = 0;
-
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *options = NULL;
+ int sent = 0;
+ int parse_err = 0;
+ cli_local_t *local = NULL;
+
+ if (wordcount != 4) {
+ cli_usage_out(word->pattern);
+ parse_err = 1;
+ goto out;
+ }
- if (wordcount < 7 || wordcount > 8) {
- cli_usage_out (word->pattern);
- parse_error = 1;
- goto out;
- }
+ options = dict_new();
+ if (!options)
+ goto out;
- ret = cli_cmd_volume_clrlks_opts_parse (words, wordcount, &options);
- if (ret) {
- parse_error = 1;
- gf_log ("cli", GF_LOG_ERROR, "Error parsing "
- "clear-locks options");
- cli_out ("Error parsing options");
- cli_usage_out (word->pattern);
- }
+ ret = dict_set_str(options, "volname", (char *)words[2]);
+ if (ret)
+ goto out;
- ret = dict_set_str (options, "volname", (char *)words[2]);
- if (ret)
- goto out;
+ ret = dict_set_str(options, "key", (char *)words[3]);
+ if (ret)
+ goto out;
- ret = dict_set_str (options, "path", (char *)words[3]);
- if (ret)
- goto out;
+ proc = &cli_rpc_prog->proctable[GLUSTER_CLI_GET_VOL_OPT];
- proc = &cli_rpc_prog->proctable[GLUSTER_CLI_CLRLOCKS_VOLUME];
- if (proc->fn) {
- ret = proc->fn (frame, THIS, options);
- }
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
-out:
- if (ret) {
- cli_cmd_sent_status_get (&sent);
- if ((sent == 0) && (parse_error = 0))
- cli_out ("Volume clear-locks failed");
- }
+ CLI_LOCAL_INIT(local, words, frame, options);
- CLI_STACK_DESTROY (frame);
+ if (proc->fn)
+ ret = proc->fn(frame, THIS, options);
- return ret;
+out:
+ if (ret) {
+ cli_cmd_sent_status_get(&sent);
+ if ((sent == 0) && (parse_err == 0))
+ cli_err("Volume get option failed");
+ }
+ CLI_STACK_DESTROY(frame);
+ return ret;
}
+/* This is a bit of a hack to display the help. The current bitrot cmd
+ * format does not work well when registering the cmds.
+ * Ideally the should have been of the form
+ * gluster volume bitrot <subcommand> <volumename> ...
+ */
+
+struct cli_cmd bitrot_cmds[] = {
+
+ {"volume bitrot help", cli_cmd_bitrot_help_cbk,
+ "display help for volume bitrot commands"},
+
+ {"volume bitrot <VOLNAME> {enable|disable}", NULL, /*cli_cmd_bitrot_cbk,*/
+ "Enable/disable bitrot for volume <VOLNAME>"},
+
+ {"volume bitrot <VOLNAME> signing-time <time-in-secs>",
+ NULL, /*cli_cmd_bitrot_cbk,*/
+ "Waiting time for an object after last fd is closed to start signing "
+ "process"},
+
+ {"volume bitrot <VOLNAME> signer-threads <count>",
+ NULL, /*cli_cmd_bitrot_cbk,*/
+ "Number of signing process threads. Usually set to number of available "
+ "cores"},
+
+ {"volume bitrot <VOLNAME> scrub-throttle {lazy|normal|aggressive}",
+ NULL, /*cli_cmd_bitrot_cbk,*/
+ "Set the speed of the scrubber for volume <VOLNAME>"},
+
+ {"volume bitrot <VOLNAME> scrub-frequency {hourly|daily|weekly|biweekly"
+ "|monthly}",
+ NULL, /*cli_cmd_bitrot_cbk,*/
+ "Set the frequency of the scrubber for volume <VOLNAME>"},
+
+ {"volume bitrot <VOLNAME> scrub {pause|resume|status|ondemand}",
+ NULL, /*cli_cmd_bitrot_cbk,*/
+ "Pause/resume the scrubber for <VOLNAME>. Status displays the status of "
+ "the scrubber. ondemand starts the scrubber immediately."},
+
+ {"volume bitrot <VOLNAME> {enable|disable}\n"
+ "volume bitrot <VOLNAME> signing-time <time-in-secs>\n"
+ "volume bitrot <VOLNAME> signer-threads <count>\n"
+ "volume bitrot <volname> scrub-throttle {lazy|normal|aggressive}\n"
+ "volume bitrot <volname> scrub-frequency {hourly|daily|weekly|biweekly"
+ "|monthly}\n"
+ "volume bitrot <volname> scrub {pause|resume|status|ondemand}",
+ cli_cmd_bitrot_cbk, NULL},
+
+ {NULL, NULL, NULL}};
+
+struct cli_cmd quota_cmds[] = {
+
+ /* Quota commands */
+ {"volume quota help", cli_cmd_quota_help_cbk,
+ "display help for volume quota commands"},
+
+ {"volume quota <VOLNAME> {enable|disable|list [<path> ...]| "
+ "list-objects [<path> ...] | remove <path>| remove-objects <path> | "
+ "default-soft-limit <percent>}",
+ cli_cmd_quota_cbk, "Enable/disable and configure quota for <VOLNAME>"},
+
+ {"volume quota <VOLNAME> {limit-usage <path> <size> [<percent>]}",
+ cli_cmd_quota_cbk, "Set maximum size for <path> for <VOLNAME>"},
+
+ {"volume quota <VOLNAME> {limit-objects <path> <number> [<percent>]}",
+ cli_cmd_quota_cbk,
+ "Set the maximum number of entries allowed in <path> for <VOLNAME>"},
+
+ {"volume quota <VOLNAME> {alert-time|soft-timeout|hard-timeout} {<time>}",
+ cli_cmd_quota_cbk, "Set quota timeout for <VOLNAME>"},
+
+ {"volume inode-quota <VOLNAME> enable", cli_cmd_quota_cbk,
+ "Enable/disable inode-quota for <VOLNAME>"},
+
+ {"volume quota <VOLNAME> {enable|disable|list [<path> ...]| "
+ "list-objects [<path> ...] | remove <path>| remove-objects <path> | "
+ "default-soft-limit <percent>}\n"
+ "volume quota <VOLNAME> {limit-usage <path> <size> [<percent>]}\n"
+ "volume quota <VOLNAME> {limit-objects <path> <number> [<percent>]}\n"
+ "volume quota <VOLNAME> {alert-time|soft-timeout|hard-timeout} {<time>}",
+ cli_cmd_quota_cbk, NULL},
+
+ {NULL, NULL, NULL}};
+
struct cli_cmd volume_cmds[] = {
- { "volume info [all|<VOLNAME>]",
- cli_cmd_volume_info_cbk,
- "list information of all volumes"},
+ {"volume help", cli_cmd_volume_help_cbk,
+ "display help for volume commands"},
- { "volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ...",
- cli_cmd_volume_create_cbk,
- "create a new volume of specified type with mentioned bricks"},
+ {"volume info [all|<VOLNAME>]", cli_cmd_volume_info_cbk,
+ "list information of all volumes"},
- { "volume delete <VOLNAME>",
- cli_cmd_volume_delete_cbk,
- "delete volume specified by <VOLNAME>"},
+ {"volume create <NEW-VOLNAME> [stripe <COUNT>] "
+ "[[replica <COUNT> [arbiter <COUNT>]]|[replica 2 thin-arbiter 1]] "
+ "[disperse [<COUNT>]] [disperse-data <COUNT>] [redundancy <COUNT>] "
+ "[transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> <TA-BRICK>"
+ "... [force]",
- { "volume start <VOLNAME> [force]",
- cli_cmd_volume_start_cbk,
- "start volume specified by <VOLNAME>"},
+ cli_cmd_volume_create_cbk,
+ "create a new volume of specified type with mentioned bricks"},
- { "volume stop <VOLNAME> [force]",
- cli_cmd_volume_stop_cbk,
- "stop volume specified by <VOLNAME>"},
+ {"volume delete <VOLNAME>", cli_cmd_volume_delete_cbk,
+ "delete volume specified by <VOLNAME>"},
- /*{ "volume rename <VOLNAME> <NEW-VOLNAME>",
- cli_cmd_volume_rename_cbk,
- "rename volume <VOLNAME> to <NEW-VOLNAME>"},*/
+ {"volume start <VOLNAME> [force]", cli_cmd_volume_start_cbk,
+ "start volume specified by <VOLNAME>"},
- { "volume add-brick <VOLNAME> [<stripe|replica> <COUNT>] <NEW-BRICK> ...",
- cli_cmd_volume_add_brick_cbk,
- "add brick to volume <VOLNAME>"},
+ {"volume stop <VOLNAME> [force]", cli_cmd_volume_stop_cbk,
+ "stop volume specified by <VOLNAME>"},
- { "volume remove-brick <VOLNAME> [replica <COUNT>] <BRICK> ... {start|stop|status|commit|force}",
- cli_cmd_volume_remove_brick_cbk,
- "remove brick from volume <VOLNAME>"},
+ /*{ "volume rename <VOLNAME> <NEW-VOLNAME>",
+ cli_cmd_volume_rename_cbk,
+ "rename volume <VOLNAME> to <NEW-VOLNAME>"},*/
- { "volume rebalance <VOLNAME> [fix-layout] {start|stop|status} [force]",
- cli_cmd_volume_defrag_cbk,
- "rebalance operations"},
+ {"volume add-brick <VOLNAME> [<stripe|replica> <COUNT> "
+ "[arbiter <COUNT>]] <NEW-BRICK> ... [force]",
+ cli_cmd_volume_add_brick_cbk, "add brick to volume <VOLNAME>"},
- { "volume replace-brick <VOLNAME> <BRICK> <NEW-BRICK> {start|pause|abort|status|commit [force]}",
- cli_cmd_volume_replace_brick_cbk,
- "replace-brick operations"},
+ {"volume remove-brick <VOLNAME> [replica <COUNT>] <BRICK> ..."
+ " <start|stop|status|commit|force>",
+ cli_cmd_volume_remove_brick_cbk, "remove brick from volume <VOLNAME>"},
- /*{ "volume set-transport <VOLNAME> <TRANSPORT-TYPE> [<TRANSPORT-TYPE>] ...",
- cli_cmd_volume_set_transport_cbk,
- "set transport type for volume <VOLNAME>"},*/
+ {"volume rebalance <VOLNAME> {{fix-layout start} | {start "
+ "[force]|stop|status}}",
+ cli_cmd_volume_defrag_cbk, "rebalance operations"},
- { "volume set <VOLNAME> <KEY> <VALUE>",
- cli_cmd_volume_set_cbk,
- "set options for volume <VOLNAME>"},
+ {"volume replace-brick <VOLNAME> <SOURCE-BRICK> <NEW-BRICK> "
+ "{commit force}",
+ cli_cmd_volume_replace_brick_cbk, "replace-brick operations"},
- { "volume help",
- cli_cmd_volume_help_cbk,
- "display help for the volume command"},
+ /*{ "volume set-transport <VOLNAME> <TRANSPORT-TYPE> [<TRANSPORT-TYPE>]
+ ...", cli_cmd_volume_set_transport_cbk, "set transport type for volume
+ <VOLNAME>"},*/
- { "volume log rotate <VOLNAME> [BRICK]",
- cli_cmd_log_rotate_cbk,
- "rotate the log file for corresponding volume/brick"},
+ {"volume set <VOLNAME> <KEY> <VALUE>", cli_cmd_volume_set_cbk,
+ "set options for volume <VOLNAME>"},
- { "volume sync <HOSTNAME> [all|<VOLNAME>]",
- cli_cmd_sync_volume_cbk,
- "sync the volume information from a peer"},
+ {"volume set <VOLNAME> group <GROUP>", cli_cmd_volume_set_cbk,
+ "This option can be used for setting multiple pre-defined volume options "
+ "where group_name is a file under /var/lib/glusterd/groups containing one "
+ "key value pair per line"},
- { "volume reset <VOLNAME> [option] [force]",
- cli_cmd_volume_reset_cbk,
- "reset all the reconfigured options"},
+ {"volume log <VOLNAME> rotate [BRICK]", cli_cmd_log_rotate_cbk,
+ "rotate the log file for corresponding volume/brick"},
+
+ {"volume sync <HOSTNAME> [all|<VOLNAME>]", cli_cmd_sync_volume_cbk,
+ "sync the volume information from a peer"},
+
+ {"volume reset <VOLNAME> [option] [force]", cli_cmd_volume_reset_cbk,
+ "reset all the reconfigured options"},
#if (SYNCDAEMON_COMPILE)
- {"volume "GEOREP" [<VOLNAME>] [<SLAVE-URL>] {start|stop|config|status|log-rotate} [options...]",
- cli_cmd_volume_gsync_set_cbk,
- "Geo-sync operations",
- cli_cmd_check_gsync_exists_cbk},
+ {"volume " GEOREP " [<MASTER-VOLNAME>] [<SLAVE-IP>]::[<SLAVE-VOLNAME>] {"
+ "\\\n create [[ssh-port n] [[no-verify] \\\n | [push-pem]]] [force] \\\n"
+ " | start [force] \\\n | stop [force] \\\n | pause [force] \\\n | resume "
+ "[force] \\\n"
+ " | config [[[\\!]<option>] [<value>]] \\\n | status "
+ "[detail] \\\n | delete [reset-sync-time]} ",
+ cli_cmd_volume_gsync_set_cbk, "Geo-sync operations",
+ cli_cmd_check_gsync_exists_cbk},
#endif
- { "volume profile <VOLNAME> {start|info|stop} [nfs]",
- cli_cmd_volume_profile_cbk,
- "volume profile operations"},
+ {"volume profile <VOLNAME> {start|info [peek|incremental "
+ "[peek]|cumulative|clear]|stop} [nfs]",
+ cli_cmd_volume_profile_cbk, "volume profile operations"},
+
+ {"volume top <VOLNAME> {open|read|write|opendir|readdir|clear} [nfs|brick "
+ "<brick>] [list-cnt <value>] | "
+ "{read-perf|write-perf} [bs <size> count <count>] "
+ "[brick <brick>] [list-cnt <value>]",
+ cli_cmd_volume_top_cbk, "volume top operations"},
+
+ {"volume status [all | <VOLNAME> [nfs|shd|<BRICK>|quotad]]"
+ " [detail|clients|mem|inode|fd|callpool|tasks|client-list]",
+ cli_cmd_volume_status_cbk,
+ "display status of all or specified volume(s)/brick"},
+
+ {"volume heal <VOLNAME> [enable | disable | full |"
+ "statistics [heal-count [replica <HOSTNAME:BRICKNAME>]] |"
+ "info [summary | split-brain] |"
+ "split-brain {bigger-file <FILE> | latest-mtime <FILE> |"
+ "source-brick <HOSTNAME:BRICKNAME> [<FILE>]} |"
+ "granular-entry-heal {enable | disable}]",
+ cli_cmd_volume_heal_cbk,
+ "self-heal commands on volume specified by <VOLNAME>"},
+
+ {"volume statedump <VOLNAME> [[nfs|quotad] [all|mem|iobuf|callpool|"
+ "priv|fd|inode|history]... | [client <hostname:process-id>]]",
+ cli_cmd_volume_statedump_cbk, "perform statedump on bricks"},
+
+ {"volume list", cli_cmd_volume_list_cbk, "list all volumes in cluster"},
+
+ {"volume clear-locks <VOLNAME> <path> kind {blocked|granted|all}"
+ "{inode [range]|entry [basename]|posix [range]}",
+ cli_cmd_volume_clearlocks_cbk, "Clear locks held on path"},
+ {"volume barrier <VOLNAME> {enable|disable}", cli_cmd_volume_barrier_cbk,
+ "Barrier/unbarrier file operations on a volume"},
+ {"volume get <VOLNAME|all> <key|all>", cli_cmd_volume_getopt_cbk,
+ "Get the value of the all options or given option for volume <VOLNAME>"
+ " or all option. gluster volume get all all is to get all global "
+ "options"},
+
+ {"volume reset-brick <VOLNAME> <SOURCE-BRICK> {{start} |"
+ " {<NEW-BRICK> commit}}",
+ cli_cmd_volume_reset_brick_cbk, "reset-brick operations"},
+
+ {NULL, NULL, NULL}};
- { "volume quota <VOLNAME> <enable|disable|limit-usage|list|remove> [path] [value]",
- cli_cmd_quota_cbk,
- "quota translator specific operations"},
+int
+cli_cmd_quota_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
+{
+ struct cli_cmd *cmd = NULL;
+ struct cli_cmd *quota_cmd = NULL;
+ int count = 0;
- { "volume top <VOLNAME> {[open|read|write|opendir|readdir [nfs]] "
- "|[read-perf|write-perf [nfs|{bs <size> count <count>}]]"
- "|[clear [nfs]]} [brick <brick>] [list-cnt <count>]",
- cli_cmd_volume_top_cbk,
- "volume top operations"},
+ cmd = GF_MALLOC(sizeof(quota_cmds), cli_mt_cli_cmd);
+ memcpy(cmd, quota_cmds, sizeof(quota_cmds));
+ count = (sizeof(quota_cmds) / sizeof(struct cli_cmd));
+ cli_cmd_sort(cmd, count);
- { "volume status [all | <VOLNAME> [nfs|shd|<BRICK>]]"
- " [detail|clients|mem|inode|fd|callpool]",
- cli_cmd_volume_status_cbk,
- "display status of all or specified volume(s)/brick"},
+ cli_out("\ngluster quota commands");
+ cli_out("=======================\n");
- { "volume heal <VOLNAME> [{full | info {healed | heal-failed | split-brain}}]",
- cli_cmd_volume_heal_cbk,
- "self-heal commands on volume specified by <VOLNAME>"},
+ for (quota_cmd = cmd; quota_cmd->pattern; quota_cmd++)
+ if ((_gf_false == quota_cmd->disable) && (quota_cmd->desc))
+ cli_out("%s - %s", quota_cmd->pattern, quota_cmd->desc);
- {"volume statedump <VOLNAME> [nfs] [all|mem|iobuf|callpool|priv|fd|"
- "inode|history]...",
- cli_cmd_volume_statedump_cbk,
- "perform statedump on bricks"},
+ cli_out("\n");
+ GF_FREE(cmd);
- {"volume list",
- cli_cmd_volume_list_cbk,
- "list all volumes in cluster"},
+ return 0;
+}
- {"volume clear-locks <VOLNAME> <path> kind {blocked|granted|all}"
- "{inode [range]|entry [basename]|posix [range]}",
- cli_cmd_volume_clearlocks_cbk,
- "Clear locks held on path"
- },
+int
+cli_cmd_bitrot_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
+{
+ struct cli_cmd *cmd = NULL;
+ struct cli_cmd *bitrot_cmd = NULL;
+ int count = 0;
- { NULL, NULL, NULL }
-};
+ cmd = GF_MALLOC(sizeof(bitrot_cmds), cli_mt_cli_cmd);
+ memcpy(cmd, bitrot_cmds, sizeof(bitrot_cmds));
+ count = (sizeof(bitrot_cmds) / sizeof(struct cli_cmd));
+ cli_cmd_sort(cmd, count);
+
+ cli_out("\ngluster bitrot commands");
+ cli_out("========================\n");
+
+ for (bitrot_cmd = cmd; bitrot_cmd->pattern; bitrot_cmd++)
+ if ((_gf_false == bitrot_cmd->disable) && (bitrot_cmd->desc))
+ cli_out("%s - %s", bitrot_cmd->pattern, bitrot_cmd->desc);
+
+ cli_out("\n");
+ GF_FREE(cmd);
+
+ return 0;
+}
int
-cli_cmd_volume_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
- const char **words, int wordcount)
+cli_cmd_volume_help_cbk(struct cli_state *state, struct cli_cmd_word *in_word,
+ const char **words, int wordcount)
{
- struct cli_cmd *cmd = NULL;
+ struct cli_cmd *cmd = NULL;
+ struct cli_cmd *vol_cmd = NULL;
+ int count = 0;
- for (cmd = volume_cmds; cmd->pattern; cmd++)
- if (_gf_false == cmd->disable)
- cli_out ("%s - %s", cmd->pattern, cmd->desc);
+ cmd = GF_MALLOC(sizeof(volume_cmds), cli_mt_cli_cmd);
+ memcpy(cmd, volume_cmds, sizeof(volume_cmds));
+ count = (sizeof(volume_cmds) / sizeof(struct cli_cmd));
+ cli_cmd_sort(cmd, count);
- return 0;
+ cli_out("\ngluster volume commands");
+ cli_out("========================\n");
+
+ for (vol_cmd = cmd; vol_cmd->pattern; vol_cmd++)
+ if (_gf_false == vol_cmd->disable)
+ cli_out("%s - %s", vol_cmd->pattern, vol_cmd->desc);
+
+ cli_out("\n");
+ GF_FREE(cmd);
+ return 0;
}
int
-cli_cmd_volume_register (struct cli_state *state)
+cli_cmd_volume_register(struct cli_state *state)
{
- int ret = 0;
- struct cli_cmd *cmd = NULL;
+ int ret = 0;
+ struct cli_cmd *cmd = NULL;
- for (cmd = volume_cmds; cmd->pattern; cmd++) {
+ for (cmd = volume_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+
+ for (cmd = bitrot_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
+
+ for (cmd = quota_cmds; cmd->pattern; cmd++) {
+ ret = cli_cmd_register(&state->tree, cmd);
+ if (ret)
+ goto out;
+ }
- ret = cli_cmd_register (&state->tree, cmd);
- if (ret)
- goto out;
- }
out:
- return ret;
+ return ret;
+}
+
+static int
+gf_asprintf_append(char **string_ptr, const char *format, ...)
+{
+ va_list arg;
+ int rv = 0;
+ char *tmp = *string_ptr;
+
+ va_start(arg, format);
+ rv = gf_vasprintf(string_ptr, format, arg);
+ va_end(arg);
+
+ if (tmp)
+ GF_FREE(tmp);
+
+ return rv;
}
diff --git a/cli/src/cli-cmd.c b/cli/src/cli-cmd.c
index f2b434ac7a4..2d458b16a56 100644
--- a/cli/src/cli-cmd.c
+++ b/cli/src/cli-cmd.c
@@ -1,33 +1,18 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "cli.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
@@ -37,355 +22,390 @@
static int cmd_done;
static int cmd_sent;
-static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
-static pthread_mutex_t cond_mutex = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t conn = PTHREAD_COND_INITIALIZER;
-static pthread_mutex_t conn_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t cond_mutex = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t conn = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t conn_mutex = PTHREAD_MUTEX_INITIALIZER;
-int cli_op_ret = 0;
-int connected = 0;
-
-int cli_cmd_log_help_cbk (struct cli_state *state, struct cli_cmd_word *in_word,
- const char **words, int wordcount);
+int cli_op_ret = 0;
+static gf_boolean_t connected = _gf_false;
static unsigned
-cli_cmd_needs_connection (struct cli_cmd_word *word)
+cli_cmd_needs_connection(struct cli_cmd_word *word)
{
- if (!strcasecmp ("quit", word->word))
- return 0;
+ if (!strcasecmp("quit", word->word))
+ return 0;
- if (!strcasecmp ("help", word->word))
- return 0;
+ if (!strcasecmp("help", word->word))
+ return 0;
- if (!strcasecmp ("getwd", word->word))
- return 1;
+ if (!strcasecmp("getwd", word->word))
+ return 1;
- if (!strcasecmp ("exit", word->word))
- return 0;
+ if (!strcasecmp("exit", word->word))
+ return 0;
- return CLI_DEFAULT_CONN_TIMEOUT;
+ return cli_default_conn_timeout;
}
int
-cli_cmd_status_reset (void)
+cli_cmd_status_reset(void)
{
- int ret = 0;
-
- ret = cli_cmd_lock ();
- {
- if (ret == 0) {
- cmd_sent = 0;
- cmd_done = 0;
- }
- }
- ret = cli_cmd_unlock ();
- return ret;
+ int ret = 0;
+ ret = cli_cmd_lock();
+ {
+ if (ret == 0) {
+ cmd_sent = 0;
+ cmd_done = 0;
+ }
+ }
+ ret = cli_cmd_unlock();
+ return ret;
}
int
-cli_cmd_sent_status_get (int *status)
+cli_cmd_sent_status_get(int *status)
{
- int ret = 0;
- GF_ASSERT (status);
-
- ret = cli_cmd_lock ();
- {
- if (ret == 0)
- *status = cmd_sent;
- }
- ret = cli_cmd_unlock ();
- return ret;
+ int ret = 0;
+ GF_ASSERT(status);
+
+ ret = cli_cmd_lock();
+ {
+ if (ret == 0)
+ *status = cmd_sent;
+ }
+ ret = cli_cmd_unlock();
+ return ret;
}
int
-cli_cmd_process (struct cli_state *state, int argc, char **argv)
+cli_cmd_process(struct cli_state *state, int argc, char **argv)
{
- int ret = 0;
- struct cli_cmd_word *word = NULL;
- struct cli_cmd_word *next = NULL;
- int i = 0;
+ int ret = 0;
+ struct cli_cmd_word *word = NULL;
+ struct cli_cmd_word *next = NULL;
+ int i = 0;
- word = &state->tree.root;
+ word = &state->tree.root;
- if (!argc)
- return 0;
+ if (!argc)
+ return 0;
- for (i = 0; i < argc; i++) {
- next = cli_cmd_nextword (word, argv[i]);
+ for (i = 0; i < argc; i++) {
+ next = cli_cmd_nextword(word, argv[i]);
- word = next;
- if (!word)
- break;
+ word = next;
+ if (!word)
+ break;
- if (word->cbkfn)
- break;
- }
+ if (word->cbkfn)
+ break;
+ }
- if (!word) {
- cli_out ("unrecognized word: %s (position %d)",
- argv[i], i);
- return -1;
- }
+ if (!word) {
+ cli_out("unrecognized word: %s (position %d)\n", argv[i], i);
+ usage();
+ return -1;
+ }
- if (!word->cbkfn) {
- cli_out ("unrecognized command");
- return -1;
- }
+ if (!word->cbkfn) {
+ cli_out("unrecognized command\n");
+ usage();
+ return -1;
+ }
- if ( strcmp (word->word,"help")==0 )
- goto callback;
+ if (strcmp(word->word, "help") == 0)
+ goto callback;
- state->await_connected = cli_cmd_needs_connection (word);
+ state->await_connected = cli_cmd_needs_connection(word);
- ret = cli_cmd_await_connected (state->await_connected);
- if (ret) {
- cli_out ("Connection failed. Please check if gluster "
- "daemon is operational.");
- gf_log ("", GF_LOG_INFO, "Exiting with: %d", ret);
- exit (ret);
- }
+ ret = cli_cmd_await_connected(state->await_connected);
+ if (ret) {
+ cli_out(
+ "Connection failed. Please check if gluster "
+ "daemon is operational.");
+ gf_log("", GF_LOG_INFO, "Exiting with: %d", ret);
+ exit(ret);
+ }
callback:
- ret = word->cbkfn (state, word, (const char **)argv, argc);
- (void) cli_cmd_status_reset ();
- return ret;
+ ret = word->cbkfn(state, word, (const char **)argv, argc);
+ (void)cli_cmd_status_reset();
+ return ret;
}
int
-cli_cmd_input_token_count (const char *text)
+cli_cmd_input_token_count(const char *text)
{
- int count = 0;
- const char *trav = NULL;
- int is_spc = 1;
-
- for (trav = text; *trav; trav++) {
- if (*trav == ' ') {
- is_spc = 1;
- } else {
- if (is_spc) {
- count++;
- is_spc = 0;
- }
- }
+ int count = 0;
+ const char *trav = NULL;
+ int is_spc = 1;
+
+ for (trav = text; *trav; trav++) {
+ if (*trav == ' ') {
+ is_spc = 1;
+ } else {
+ if (is_spc) {
+ count++;
+ is_spc = 0;
+ }
}
+ }
- return count;
+ return count;
}
-
int
-cli_cmd_process_line (struct cli_state *state, const char *text)
+cli_cmd_process_line(struct cli_state *state, const char *text)
{
- int count = 0;
- char **tokens = NULL;
- char **tokenp = NULL;
- char *token = NULL;
- char *copy = NULL;
- char *saveptr = NULL;
- int i = 0;
- int ret = -1;
-
- count = cli_cmd_input_token_count (text);
-
- tokens = calloc (count + 1, sizeof (*tokens));
- if (!tokens)
- return -1;
-
- copy = strdup (text);
- if (!copy)
- goto out;
-
- tokenp = tokens;
-
- for (token = strtok_r (copy, " \t\r\n", &saveptr); token;
- token = strtok_r (NULL, " \t\r\n", &saveptr)) {
- *tokenp = strdup (token);
-
- if (!*tokenp)
- goto out;
- tokenp++;
- i++;
-
- }
-
- ret = cli_cmd_process (state, count, tokens);
+ int count = 0;
+ char **tokens = NULL;
+ char **tokenp = NULL;
+ char *token = NULL;
+ char *copy = NULL;
+ char *saveptr = NULL;
+ int i = 0;
+ int ret = -1;
+
+ count = cli_cmd_input_token_count(text);
+
+ tokens = calloc(count + 1, sizeof(*tokens));
+ if (!tokens)
+ return -1;
+
+ copy = strdup(text);
+ if (!copy)
+ goto out;
+
+ tokenp = tokens;
+
+ for (token = strtok_r(copy, " \t\r\n", &saveptr); token;
+ token = strtok_r(NULL, " \t\r\n", &saveptr)) {
+ *tokenp = strdup(token);
+
+ if (!*tokenp)
+ goto out;
+ tokenp++;
+ i++;
+ }
+
+ ret = cli_cmd_process(state, count, tokens);
out:
- if (copy)
- free (copy);
+ free(copy);
- if (tokens)
- cli_cmd_tokens_destroy (tokens);
+ if (tokens)
+ cli_cmd_tokens_destroy(tokens);
- return ret;
+ return ret;
}
-
int
-cli_cmds_register (struct cli_state *state)
+cli_cmds_register(struct cli_state *state)
{
- int ret = 0;
-
- ret = cli_cmd_volume_register (state);
- if (ret)
- goto out;
-
- ret = cli_cmd_probe_register (state);
- if (ret)
- goto out;
-
- ret = cli_cmd_system_register (state);
- if (ret)
- goto out;
-
- ret = cli_cmd_misc_register (state);
- if (ret)
- goto out;
-
+ int ret = 0;
+
+ ret = cli_cmd_volume_register(state);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_probe_register(state);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_system_register(state);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_misc_register(state);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_snapshot_register(state);
+ if (ret)
+ goto out;
+ ret = cli_cmd_global_register(state);
+ if (ret)
+ goto out;
out:
- return ret;
-}
-
-int
-cli_cmd_cond_init ()
-{
-
- pthread_mutex_init (&cond_mutex, NULL);
- pthread_cond_init (&cond, NULL);
-
- pthread_mutex_init (&conn_mutex, NULL);
- pthread_cond_init (&conn, NULL);
-
- return 0;
+ return ret;
}
int
-cli_cmd_lock ()
+cli_cmd_lock()
{
- pthread_mutex_lock (&cond_mutex);
- return 0;
+ pthread_mutex_lock(&cond_mutex);
+ return 0;
}
int
-cli_cmd_unlock ()
+cli_cmd_unlock()
{
- pthread_mutex_unlock (&cond_mutex);
- return 0;
+ pthread_mutex_unlock(&cond_mutex);
+ return 0;
}
static void
-seconds_from_now (unsigned secs, struct timespec *ts)
+seconds_from_now(unsigned secs, struct timespec *ts)
{
- struct timeval tv = {0,};
+ struct timeval tv = {
+ 0,
+ };
- gettimeofday (&tv, NULL);
+ gettimeofday(&tv, NULL);
- ts->tv_sec = tv.tv_sec + secs;
- ts->tv_nsec = tv.tv_usec * 1000;
+ ts->tv_sec = tv.tv_sec + secs;
+ ts->tv_nsec = tv.tv_usec * 1000;
}
int
-cli_cmd_await_response (unsigned time)
+cli_cmd_await_response(unsigned time)
{
- struct timespec ts = {0,};
- int ret = 0;
-
- cli_op_ret = -1;
-
- seconds_from_now (time, &ts);
- while (!cmd_done && !ret) {
- ret = pthread_cond_timedwait (&cond, &cond_mutex,
- &ts);
- }
-
- cmd_done = 0;
-
- if (ret)
- return ret;
-
- return cli_op_ret;
+ struct timespec ts = {
+ 0,
+ };
+ int ret = 0;
+
+ cli_op_ret = -1;
+
+ seconds_from_now(time, &ts);
+ while (!cmd_done && !ret) {
+ ret = pthread_cond_timedwait(&cond, &cond_mutex, &ts);
+ }
+
+ if (!cmd_done) {
+ if (ret == ETIMEDOUT)
+ cli_out("Error : Request timed out");
+ else
+ cli_out("Error : Command returned with error code:%d", ret);
+ }
+ cmd_done = 0;
+
+ return cli_op_ret;
}
+/* This function must be called _only_ after all actions associated with
+ * command processing is complete. Otherwise, gluster process may exit before
+ * reporting results to stdout/stderr. */
int
-cli_cmd_broadcast_response (int32_t status)
+cli_cmd_broadcast_response(int32_t status)
{
-
- pthread_mutex_lock (&cond_mutex);
- {
- if (!cmd_sent)
- goto out;
- cmd_done = 1;
- cli_op_ret = status;
- pthread_cond_broadcast (&cond);
- }
-
+ pthread_mutex_lock(&cond_mutex);
+ {
+ if (!cmd_sent)
+ goto out;
+ cmd_done = 1;
+ cli_op_ret = status;
+ pthread_cond_broadcast(&cond);
+ }
out:
- pthread_mutex_unlock (&cond_mutex);
- return 0;
+ pthread_mutex_unlock(&cond_mutex);
+ return 0;
}
int32_t
-cli_cmd_await_connected (unsigned conn_timo)
+cli_cmd_await_connected(unsigned conn_timo)
{
- int32_t ret = 0;
- struct timespec ts = {0,};
-
- if (!conn_timo)
- return 0;
-
- pthread_mutex_lock (&conn_mutex);
- {
- seconds_from_now (conn_timo, &ts);
- while (!connected && !ret) {
- ret = pthread_cond_timedwait (&conn, &conn_mutex,
- &ts);
- }
- }
- pthread_mutex_unlock (&conn_mutex);
+ int32_t ret = 0;
+ struct timespec ts = {
+ 0,
+ };
+ if (!conn_timo)
+ return 0;
+
+ pthread_mutex_lock(&conn_mutex);
+ {
+ seconds_from_now(conn_timo, &ts);
+ while (!connected && !ret) {
+ ret = pthread_cond_timedwait(&conn, &conn_mutex, &ts);
+ }
+ }
+ pthread_mutex_unlock(&conn_mutex);
- return ret;
+ return ret;
}
int32_t
-cli_cmd_broadcast_connected ()
+cli_cmd_broadcast_connected(gf_boolean_t status)
{
- pthread_mutex_lock (&conn_mutex);
- {
- connected = 1;
- pthread_cond_broadcast (&conn);
- }
-
- pthread_mutex_unlock (&conn_mutex);
-
- return 0;
+ pthread_mutex_lock(&conn_mutex);
+ {
+ connected = status;
+ pthread_cond_broadcast(&conn);
+ }
+ pthread_mutex_unlock(&conn_mutex);
+
+ return 0;
}
-int
-cli_cmd_submit (void *req, call_frame_t *frame,
- rpc_clnt_prog_t *prog,
- int procnum, struct iobref *iobref,
- xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+gf_boolean_t
+cli_cmd_connected(void)
{
- int ret = -1;
- unsigned timeout = 0;
+ gf_boolean_t status;
- timeout = (GLUSTER_CLI_PROFILE_VOLUME == procnum) ?
- CLI_TOP_CMD_TIMEOUT : CLI_DEFAULT_CMD_TIMEOUT;
+ pthread_mutex_lock(&conn_mutex);
+ {
+ status = connected;
+ }
+ pthread_mutex_unlock(&conn_mutex);
- cli_cmd_lock ();
- cmd_sent = 0;
- ret = cli_submit_request (req, frame, prog,
- procnum, NULL, this, cbkfn, xdrproc);
+ return status;
+}
- if (!ret) {
- cmd_sent = 1;
- ret = cli_cmd_await_response (timeout);
- }
+int
+cli_cmd_submit(struct rpc_clnt *rpc, void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog, int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ unsigned timeout = 0;
+
+ if ((GLUSTER_CLI_PROFILE_VOLUME == procnum) ||
+ (GLUSTER_CLI_HEAL_VOLUME == procnum) ||
+ (GLUSTER_CLI_GANESHA == procnum))
+ timeout = cli_ten_minutes_timeout;
+ else
+ timeout = cli_default_conn_timeout;
+
+ cli_cmd_lock();
+ cmd_sent = 0;
+ ret = cli_submit_request(rpc, req, frame, prog, procnum, NULL, this, cbkfn,
+ xdrproc);
+
+ if (!ret) {
+ cmd_sent = 1;
+ ret = cli_cmd_await_response(timeout);
+ }
+
+ cli_cmd_unlock();
+
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- cli_cmd_unlock ();
+int
+cli_cmd_pattern_cmp(void *a, void *b)
+{
+ struct cli_cmd *ia = NULL;
+ struct cli_cmd *ib = NULL;
+ int ret = 0;
+
+ ia = a;
+ ib = b;
+ if (strcmp(ia->pattern, ib->pattern) > 0)
+ ret = 1;
+ else if (strcmp(ia->pattern, ib->pattern) < 0)
+ ret = -1;
+ else
+ ret = 0;
+ return ret;
+}
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+void
+cli_cmd_sort(struct cli_cmd *cmd, int count)
+{
+ gf_array_insertionsort(cmd, 1, count - 2, sizeof(struct cli_cmd),
+ cli_cmd_pattern_cmp);
}
diff --git a/cli/src/cli-cmd.h b/cli/src/cli-cmd.h
index ba877e2c496..c1c068c7085 100644
--- a/cli/src/cli-cmd.h
+++ b/cli/src/cli-cmd.h
@@ -1,118 +1,129 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __CLI_CMD_H__
#define __CLI_CMD_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <netdb.h>
#include "cli.h"
-#include "list.h"
-
-#define CLI_STACK_DESTROY(_frame) \
- do { \
- if (_frame) { \
- if (_frame->local) { \
- gf_log ("cli", GF_LOG_DEBUG, "frame->local " \
- "is not NULL (%p)", _frame->local); \
- cli_local_wipe (_frame->local); \
- _frame->local = NULL; \
- } \
- STACK_DESTROY (_frame->root); \
- } \
- } while (0);
-
-typedef enum {
- GF_ANSWER_YES = 1,
- GF_ANSWER_NO = 2
-} gf_answer_t;
+#include <glusterfs/list.h>
+
+#define GLUSTER_SHARED_STORAGE "gluster_shared_storage"
+
+#define CLI_LOCAL_INIT(local, words, frame, dictionary) \
+ do { \
+ local = cli_local_get(); \
+ \
+ if (local) { \
+ local->words = words; \
+ if (dictionary) \
+ local->dict = dictionary; \
+ if (frame) \
+ frame->local = local; \
+ } \
+ } while (0)
+
+#define CLI_STACK_DESTROY(_frame) \
+ do { \
+ if (_frame) { \
+ if (_frame->local) { \
+ gf_log("cli", GF_LOG_DEBUG, \
+ "frame->local " \
+ "is not NULL (%p)", \
+ _frame->local); \
+ cli_local_wipe(_frame->local); \
+ _frame->local = NULL; \
+ } \
+ STACK_DESTROY(_frame->root); \
+ } \
+ } while (0);
+
+typedef enum { GF_ANSWER_YES = 1, GF_ANSWER_NO = 2 } gf_answer_t;
struct cli_cmd {
- const char *pattern;
- cli_cmd_cbk_t *cbk;
- const char *desc;
- cli_cmd_reg_cbk_t *reg_cbk; /* callback to check in runtime if the *
- * command should be enabled or disabled */
- gf_boolean_t disable;
+ const char *pattern;
+ cli_cmd_cbk_t *cbk;
+ const char *desc;
+ cli_cmd_reg_cbk_t *reg_cbk; /* callback to check in runtime if the *
+ * command should be enabled or disabled */
+ gf_boolean_t disable;
};
struct cli_cmd_volume_get_ctx_ {
- char *volname;
- int flags;
+ char *volname;
+ int flags;
};
typedef struct cli_profile_info_ {
- uint64_t fop_hits;
- double min_latency;
- double max_latency;
- double avg_latency;
- char *fop_name;
- double percentage_avg_latency;
+ uint64_t fop_hits;
+ double min_latency;
+ double max_latency;
+ double avg_latency;
+ char *fop_name;
+ double percentage_avg_latency;
} cli_profile_info_t;
-typedef struct addrinfo_list {
- struct list_head list;
- struct addrinfo *info;
-} addrinfo_list_t;
+typedef struct cli_cmd_volume_get_ctx_ cli_cmd_volume_get_ctx_t;
-typedef enum {
- GF_AI_COMPARE_NO_MATCH = 0,
- GF_AI_COMPARE_MATCH = 1,
- GF_AI_COMPARE_ERROR = 2
-} gf_ai_compare_t;
+int
+cli_cmd_volume_register(struct cli_state *state);
-typedef struct cli_cmd_volume_get_ctx_ cli_cmd_volume_get_ctx_t;
+int
+cli_cmd_probe_register(struct cli_state *state);
-int cli_cmd_volume_register (struct cli_state *state);
+int
+cli_cmd_system_register(struct cli_state *state);
-int cli_cmd_probe_register (struct cli_state *state);
+int
+cli_cmd_snapshot_register(struct cli_state *state);
-int cli_cmd_system_register (struct cli_state *state);
+int
+cli_cmd_global_register(struct cli_state *state);
-int cli_cmd_misc_register (struct cli_state *state);
+int
+cli_cmd_misc_register(struct cli_state *state);
-struct cli_cmd_word *cli_cmd_nextword (struct cli_cmd_word *word,
- const char *text);
-void cli_cmd_tokens_destroy (char **tokens);
+struct cli_cmd_word *
+cli_cmd_nextword(struct cli_cmd_word *word, const char *text);
+void
+cli_cmd_tokens_destroy(char **tokens);
-int cli_cmd_await_response (unsigned time);
+int
+cli_cmd_await_response(unsigned time);
-int cli_cmd_broadcast_response (int32_t status);
+int
+cli_cmd_broadcast_response(int32_t status);
-int cli_cmd_cond_init ();
+int
+cli_cmd_lock();
-int cli_cmd_lock ();
+int
+cli_cmd_unlock();
-int cli_cmd_unlock ();
+int
+cli_cmd_submit(struct rpc_clnt *rpc, void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog, int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
int
-cli_cmd_submit (void *req, call_frame_t *frame,
- rpc_clnt_prog_t *prog,
- int procnum, struct iobref *iobref,
- xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
+cli_cmd_pattern_cmp(void *a, void *b);
+
+void
+cli_cmd_sort(struct cli_cmd *cmd, int count);
gf_answer_t
-cli_cmd_get_confirmation (struct cli_state *state, const char *question);
-int cli_cmd_sent_status_get (int *status);
+cli_cmd_get_confirmation(struct cli_state *state, const char *question);
+int
+cli_cmd_sent_status_get(int *status);
+
+gf_boolean_t
+_limits_set_on_volume(char *volname, int type);
+
#endif /* __CLI_CMD_H__ */
diff --git a/cli/src/cli-mem-types.h b/cli/src/cli-mem-types.h
index 3c49d21836b..b42b4dd86c2 100644
--- a/cli/src/cli-mem-types.h
+++ b/cli/src/cli-mem-types.h
@@ -1,39 +1,30 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __CLI_MEM_TYPES_H__
#define __CLI_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
#define CLI_MEM_TYPE_START (gf_common_mt_end + 1)
enum cli_mem_types_ {
- cli_mt_xlator_list_t = CLI_MEM_TYPE_START,
- cli_mt_xlator_t,
- cli_mt_xlator_cmdline_option_t,
- cli_mt_char,
- cli_mt_call_pool_t,
- cli_mt_cli_local_t,
- cli_mt_cli_get_vol_ctx_t,
- cli_mt_append_str,
- cli_mt_end
+ cli_mt_xlator_list_t = CLI_MEM_TYPE_START,
+ cli_mt_xlator_t,
+ cli_mt_xlator_cmdline_option_t,
+ cli_mt_char,
+ cli_mt_call_pool_t,
+ cli_mt_cli_local_t,
+ cli_mt_cli_get_vol_ctx_t,
+ cli_mt_append_str,
+ cli_mt_cli_cmd,
+ cli_mt_end
};
diff --git a/cli/src/cli-quotad-client.c b/cli/src/cli-quotad-client.c
new file mode 100644
index 00000000000..772b8f75bd9
--- /dev/null
+++ b/cli/src/cli-quotad-client.c
@@ -0,0 +1,146 @@
+/*
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "cli-quotad-client.h"
+
+int
+cli_quotad_submit_request(void *req, call_frame_t *frame, rpc_clnt_prog_t *prog,
+ int procnum, struct iobref *iobref, xlator_t *this,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {
+ 0,
+ };
+ struct iobuf *iobuf = NULL;
+ char new_iobref = 0;
+ ssize_t xdr_size = 0;
+
+ GF_ASSERT(this);
+
+ if (req) {
+ xdr_size = xdr_sizeof(xdrproc, req);
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ if (!iobref) {
+ iobref = iobref_new();
+ if (!iobref) {
+ goto out;
+ }
+
+ new_iobref = 1;
+ }
+
+ iobref_add(iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_size(iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic(iov, req, xdrproc);
+ if (ret == -1) {
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit(global_quotad_rpc, prog, procnum, cbkfn, &iov, count,
+ NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL);
+ ret = 0;
+
+out:
+ if (new_iobref)
+ iobref_unref(iobref);
+ if (iobuf)
+ iobuf_unref(iobuf);
+
+ return ret;
+}
+
+int
+cli_quotad_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data)
+{
+ xlator_t *this = NULL;
+ int ret = 0;
+
+ this = mydata;
+
+ switch (event) {
+ case RPC_CLNT_CONNECT: {
+ gf_log(this->name, GF_LOG_TRACE, "got RPC_CLNT_CONNECT");
+ break;
+ }
+
+ case RPC_CLNT_DISCONNECT: {
+ gf_log(this->name, GF_LOG_TRACE, "got RPC_CLNT_DISCONNECT");
+ break;
+ }
+
+ default:
+ gf_log(this->name, GF_LOG_TRACE, "got some other RPC event %d",
+ event);
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+struct rpc_clnt *
+cli_quotad_clnt_init(xlator_t *this, dict_t *options)
+{
+ struct rpc_clnt *rpc = NULL;
+ int ret = -1;
+
+ ret = dict_set_nstrn(options, "transport.address-family",
+ SLEN("transport.address-family"), "unix",
+ SLEN("unix"));
+ if (ret)
+ goto out;
+
+ ret = dict_set_nstrn(options, "transport-type", SLEN("transport-type"),
+ "socket", SLEN("socket"));
+ if (ret)
+ goto out;
+
+ ret = dict_set_nstrn(options, "transport.socket.connect-path",
+ SLEN("transport.socket.connect-path"),
+ "/var/run/gluster/quotad.socket",
+ SLEN("/var/run/gluster/quotad.socket"));
+ if (ret)
+ goto out;
+
+ rpc = rpc_clnt_new(options, this, this->name, 16);
+ if (!rpc)
+ goto out;
+
+ ret = rpc_clnt_register_notify(rpc, cli_quotad_notify, this);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "failed to register notify");
+ goto out;
+ }
+
+ rpc_clnt_start(rpc);
+out:
+ if (ret) {
+ if (rpc)
+ rpc_clnt_unref(rpc);
+ rpc = NULL;
+ }
+
+ return rpc;
+}
diff --git a/cli/src/cli-quotad-client.h b/cli/src/cli-quotad-client.h
new file mode 100644
index 00000000000..71a44e5916b
--- /dev/null
+++ b/cli/src/cli-quotad-client.h
@@ -0,0 +1,29 @@
+/*
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "cli.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include "cli-cmd.h"
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "protocol-common.h"
+#include "cli-mem-types.h"
+
+int
+cli_quotad_submit_request(void *req, call_frame_t *frame, rpc_clnt_prog_t *prog,
+ int procnum, struct iobref *iobref, xlator_t *this,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
+
+struct rpc_clnt *
+cli_quotad_clnt_init(xlator_t *this, dict_t *options);
+
+int
+cli_quotad_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data);
diff --git a/cli/src/cli-rl.c b/cli/src/cli-rl.c
index f9bf7c81923..7a38a0b882a 100644
--- a/cli/src/cli-rl.c
+++ b/cli/src/cli-rl.c
@@ -1,38 +1,23 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "cli.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
-#include "event.h"
+#include <glusterfs/gf-event.h>
#include <fnmatch.h>
@@ -42,384 +27,376 @@
#include <readline/readline.h>
#include <readline/history.h>
-
int
-cli_rl_out (struct cli_state *state, const char *fmt, va_list ap)
+cli_rl_out(struct cli_state *state, const char *fmt, va_list ap)
{
- int tmp_rl_point = rl_point;
- int n = rl_end;
- int ret = 0;
+ int tmp_rl_point = rl_point;
+ int n = rl_end;
+ int ret = 0;
- if (rl_end >= 0 ) {
- rl_kill_text (0, rl_end);
- rl_redisplay ();
- }
+ if (rl_end >= 0) {
+ rl_kill_text(0, rl_end);
+ rl_redisplay();
+ }
- printf ("\r%*s\r", (int)strlen (state->prompt), "");
+ printf("\r%*s\r", (int)strlen(state->prompt), "");
- ret = vprintf (fmt, ap);
+ ret = vprintf(fmt, ap);
- printf ("\n");
- fflush(stdout);
+ printf("\n");
+ fflush(stdout);
- if (n) {
- rl_do_undo ();
- rl_point = tmp_rl_point;
- rl_reset_line_state ();
- }
+ if (n) {
+ rl_do_undo();
+ rl_point = tmp_rl_point;
+ rl_reset_line_state();
+ }
- return ret;
+ return ret;
}
int
-cli_rl_err (struct cli_state *state, const char *fmt, va_list ap)
+cli_rl_err(struct cli_state *state, const char *fmt, va_list ap)
{
- int tmp_rl_point = rl_point;
- int n = rl_end;
- int ret = 0;
+ int tmp_rl_point = rl_point;
+ int n = rl_end;
+ int ret = 0;
- if (rl_end >= 0 ) {
- rl_kill_text (0, rl_end);
- rl_redisplay ();
- }
+ if (rl_end >= 0) {
+ rl_kill_text(0, rl_end);
+ rl_redisplay();
+ }
- fprintf (stderr, "\r%*s\r", (int)strlen (state->prompt), "");
+ fprintf(stderr, "\r%*s\r", (int)strlen(state->prompt), "");
- ret = vfprintf (stderr, fmt, ap);
+ ret = vfprintf(stderr, fmt, ap);
- fprintf (stderr, "\n");
- fflush(stderr);
+ fprintf(stderr, "\n");
+ fflush(stderr);
- if (n) {
- rl_do_undo ();
- rl_point = tmp_rl_point;
- rl_reset_line_state ();
- }
+ if (n) {
+ rl_do_undo();
+ rl_point = tmp_rl_point;
+ rl_reset_line_state();
+ }
- return ret;
+ return ret;
}
-
void
-cli_rl_process_line (char *line)
+cli_rl_process_line(char *line)
{
- struct cli_state *state = NULL;
- int ret = 0;
-
- state = global_state;
+ struct cli_state *state = NULL;
+ int ret = 0;
- state->rl_processing = 1;
- {
- ret = cli_cmd_process_line (state, line);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to process line");
+ state = global_state;
- add_history (line);
- }
- state->rl_processing = 0;
+ state->rl_processing = 1;
+ {
+ ret = cli_cmd_process_line(state, line);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to process line");
+ add_history(line);
+ }
+ state->rl_processing = 0;
}
-
-int
-cli_rl_stdin (int fd, int idx, void *data,
- int poll_out, int poll_in, int poll_err)
+void
+cli_rl_stdin(int fd, int idx, int gen, void *data, int poll_out, int poll_in,
+ int poll_err, char event_thread_died)
{
- rl_callback_read_char ();
+ struct cli_state *state = NULL;
- return 0;
-}
+ state = data;
+
+ rl_callback_read_char();
+ gf_event_handled(state->ctx->event_pool, fd, idx, gen);
+
+ return;
+}
char *
-cli_rl_autocomplete_entry (const char *text, int times)
+cli_rl_autocomplete_entry(const char *text, int times)
{
- struct cli_state *state = NULL;
- char *retp = NULL;
+ struct cli_state *state = NULL;
+ char *retp = NULL;
- state = global_state;
+ state = global_state;
- if (!state->matchesp)
- return NULL;
+ if (!state->matchesp)
+ return NULL;
- retp = *state->matchesp;
+ retp = *state->matchesp;
- state->matchesp++;
+ state->matchesp++;
- return retp ? strdup (retp) : NULL;
+ return retp ? strdup(retp) : NULL;
}
-
int
-cli_rl_token_count (const char *text)
+cli_rl_token_count(const char *text)
{
- int count = 0;
- const char *trav = NULL;
- int is_spc = 1;
-
- for (trav = text; *trav; trav++) {
- if (*trav == ' ') {
- is_spc = 1;
- } else {
- if (is_spc) {
- count++;
- is_spc = 0;
- }
- }
+ int count = 0;
+ const char *trav = NULL;
+ int is_spc = 1;
+
+ for (trav = text; *trav; trav++) {
+ if (*trav == ' ') {
+ is_spc = 1;
+ } else {
+ if (is_spc) {
+ count++;
+ is_spc = 0;
+ }
}
+ }
- if (is_spc)
- /* what needs to be autocompleted is a full
- new word, and not extend the last word
- */
- count++;
+ if (is_spc)
+ /* what needs to be autocompleted is a full
+ new word, and not extend the last word
+ */
+ count++;
- return count;
+ return count;
}
-
char **
-cli_rl_tokenize (const char *text)
+cli_rl_tokenize(const char *text)
{
- int count = 0;
- char **tokens = NULL;
- char **tokenp = NULL;
- char *token = NULL;
- char *copy = NULL;
- char *saveptr = NULL;
- int i = 0;
-
- count = cli_rl_token_count (text);
-
- tokens = calloc (count + 1, sizeof (*tokens));
- if (!tokens)
- return NULL;
-
- copy = strdup (text);
- if (!copy)
- goto out;
-
- tokenp = tokens;
-
- for (token = strtok_r (copy, " \t\r\n", &saveptr); token;
- token = strtok_r (NULL, " \t\r\n", &saveptr)) {
- *tokenp = strdup (token);
-
- if (!*tokenp)
- goto out;
- tokenp++;
- i++;
-
- }
+ int count = 0;
+ char **tokens = NULL;
+ char **tokenp = NULL;
+ char *token = NULL;
+ char *copy = NULL;
+ char *saveptr = NULL;
+ int i = 0;
+
+ count = cli_rl_token_count(text);
+
+ tokens = calloc(count + 1, sizeof(*tokens));
+ if (!tokens)
+ return NULL;
- if (i < count) {
- /* symbolize that what needs to be autocompleted is
- the full set of possible nextwords, and not extend
- the last word
- */
- *tokenp = strdup ("");
- if (!*tokenp)
- goto out;
- tokenp++;
- i++;
- }
+ copy = strdup(text);
+ if (!copy)
+ goto out;
+
+ tokenp = tokens;
+
+ for (token = strtok_r(copy, " \t\r\n", &saveptr); token;
+ token = strtok_r(NULL, " \t\r\n", &saveptr)) {
+ *tokenp = strdup(token);
+
+ if (!*tokenp)
+ goto out;
+ tokenp++;
+ i++;
+ }
+
+ if (i < count) {
+ /* symbolize that what needs to be autocompleted is
+ the full set of possible nextwords, and not extend
+ the last word
+ */
+ *tokenp = strdup("");
+ if (!*tokenp)
+ goto out;
+ tokenp++;
+ i++;
+ }
out:
- if (copy)
- free (copy);
+ free(copy);
- if (i < count) {
- cli_cmd_tokens_destroy (tokens);
- tokens = NULL;
- }
+ if (i < count) {
+ cli_cmd_tokens_destroy(tokens);
+ tokens = NULL;
+ }
- return tokens;
+ return tokens;
}
-
char **
-cli_rl_get_matches (struct cli_state *state, struct cli_cmd_word *word,
- const char *text)
+cli_rl_get_matches(struct cli_state *state, struct cli_cmd_word *word,
+ const char *text)
{
- char **matches = NULL;
- char **matchesp = NULL;
- struct cli_cmd_word **next = NULL;
- int count = 0;
- int len = 0;
+ char **matches = NULL;
+ char **matchesp = NULL;
+ struct cli_cmd_word **next = NULL;
+ int count = 0;
+ int len = 0;
- len = strlen (text);
+ len = strlen(text);
- if (!word->nextwords)
- return NULL;
+ if (!word->nextwords)
+ return NULL;
- for (next = word->nextwords; *next; next++)
- count++;
+ for (next = word->nextwords; *next; next++)
+ count++;
- matches = calloc (count + 1, sizeof (*matches));
- matchesp = matches;
+ matches = calloc(count + 1, sizeof(*matches));
+ matchesp = matches;
- for (next = word->nextwords; *next; next++) {
- if ((*next)->match) {
- continue;
- }
+ for (next = word->nextwords; *next; next++) {
+ if ((*next)->match) {
+ continue;
+ }
- if (strncmp ((*next)->word, text, len) == 0) {
- *matchesp = strdup ((*next)->word);
- matchesp++;
- }
+ if (strncmp((*next)->word, text, len) == 0) {
+ *matchesp = strdup((*next)->word);
+ matchesp++;
}
+ }
- return matches;
+ return matches;
}
-
int
-cli_rl_autocomplete_prepare (struct cli_state *state, const char *text)
+cli_rl_autocomplete_prepare(struct cli_state *state, const char *text)
{
- struct cli_cmd_word *word = NULL;
- struct cli_cmd_word *next = NULL;
- char **tokens = NULL;
- char **tokenp = NULL;
- char *token = NULL;
- char **matches = NULL;
-
- tokens = cli_rl_tokenize (text);
- if (!tokens)
- return 0;
-
- word = &state->tree.root;
-
- for (tokenp = tokens; (token = *tokenp); tokenp++) {
- if (!*(tokenp+1)) {
- /* last word */
- break;
- }
-
- next = cli_cmd_nextword (word, token);
- word = next;
- if (!word)
- break;
+ struct cli_cmd_word *word = NULL;
+ struct cli_cmd_word *next = NULL;
+ char **tokens = NULL;
+ char **tokenp = NULL;
+ char *token = NULL;
+ char **matches = NULL;
+
+ tokens = cli_rl_tokenize(text);
+ if (!tokens)
+ return 0;
+
+ word = &state->tree.root;
+
+ for (tokenp = tokens; (token = *tokenp); tokenp++) {
+ if (!*(tokenp + 1)) {
+ /* last word */
+ break;
}
+ next = cli_cmd_nextword(word, token);
+ word = next;
if (!word)
- goto out;
+ break;
+ }
+
+ if (!word || !token)
+ goto out;
- matches = cli_rl_get_matches (state, word, token);
+ matches = cli_rl_get_matches(state, word, token);
- state->matches = matches;
- state->matchesp = matches;
+ state->matches = matches;
+ state->matchesp = matches;
out:
- cli_cmd_tokens_destroy (tokens);
- return 0;
+ cli_cmd_tokens_destroy(tokens);
+ return 0;
}
-
int
-cli_rl_autocomplete_cleanup (struct cli_state *state)
+cli_rl_autocomplete_cleanup(struct cli_state *state)
{
- if (state->matches)
- cli_cmd_tokens_destroy (state->matches);
+ if (state->matches)
+ cli_cmd_tokens_destroy(state->matches);
- state->matches = NULL;
- state->matchesp = NULL;
+ state->matches = NULL;
+ state->matchesp = NULL;
- return 0;
+ return 0;
}
-
char **
-cli_rl_autocomplete (const char *text, int start, int end)
+cli_rl_autocomplete(const char *text, int start, int end)
{
- struct cli_state *state = NULL;
- char **matches = NULL;
- char save = 0;
+ struct cli_state *state = NULL;
+ char **matches = NULL;
+ char save = 0;
- state = global_state;
+ state = global_state;
- /* hack to make the autocompletion code neater */
- /* fake it as though the cursor is at the end of line */
+ /* hack to make the autocompletion code neater */
+ /* fake it as though the cursor is at the end of line */
- save = rl_line_buffer[rl_point];
- rl_line_buffer[rl_point] = 0;
+ save = rl_line_buffer[rl_point];
+ rl_line_buffer[rl_point] = 0;
- cli_rl_autocomplete_prepare (state, rl_line_buffer);
+ cli_rl_autocomplete_prepare(state, rl_line_buffer);
- matches = rl_completion_matches (text, cli_rl_autocomplete_entry);
+ matches = rl_completion_matches(text, cli_rl_autocomplete_entry);
- cli_rl_autocomplete_cleanup (state);
+ cli_rl_autocomplete_cleanup(state);
- rl_line_buffer[rl_point] = save;
+ rl_line_buffer[rl_point] = save;
- return matches;
+ return matches;
}
-
static char *
-complete_none (const char *txt, int times)
+complete_none(const char *txt, int times)
{
- return NULL;
+ return NULL;
}
-
void *
-cli_rl_input (void *_data)
+cli_rl_input(void *_data)
{
- struct cli_state *state = NULL;
- char *line = NULL;
+ struct cli_state *state = NULL;
+ char *line = NULL;
- state = _data;
+ state = _data;
- for (;;) {
- line = readline (state->prompt);
- if (!line)
- break;
+ fprintf(stderr,
+ "Welcome to gluster prompt, type 'help' to see the available "
+ "commands.\n");
+ for (;;) {
+ line = readline(state->prompt);
+ if (!line)
+ exit(0); // break;
- cli_rl_process_line (line);
+ if (*line)
+ cli_rl_process_line(line);
- free (line);
- }
+ free(line);
+ }
- return NULL;
+ return NULL;
}
-
int
-cli_rl_enable (struct cli_state *state)
+cli_rl_enable(struct cli_state *state)
{
- int ret = 0;
-
- rl_pre_input_hook = NULL;
- rl_attempted_completion_function = cli_rl_autocomplete;
- rl_completion_entry_function = complete_none;
-
- if (!state->rl_async) {
- ret = pthread_create (&state->input, NULL,
- cli_rl_input, state);
- if (ret == 0)
- state->rl_enabled = 1;
- goto out;
- }
+ int ret = 0;
- ret = event_register (state->ctx->event_pool, 0, cli_rl_stdin, state,
- 1, 0);
- if (ret == -1)
- goto out;
+ rl_pre_input_hook = NULL;
+ rl_attempted_completion_function = cli_rl_autocomplete;
+ rl_completion_entry_function = complete_none;
- state->rl_enabled = 1;
- rl_callback_handler_install (state->prompt, cli_rl_process_line);
+ if (!state->rl_async) {
+ ret = pthread_create(&state->input, NULL, cli_rl_input, state);
+ if (ret == 0)
+ state->rl_enabled = 1;
+ goto out;
+ }
+
+ ret = gf_event_register(state->ctx->event_pool, 0, cli_rl_stdin, state, 1,
+ 0, 0);
+ if (ret == -1)
+ goto out;
+
+ state->rl_enabled = 1;
+ rl_callback_handler_install(state->prompt, cli_rl_process_line);
out:
- return state->rl_enabled;
+ return state->rl_enabled;
}
#else /* HAVE_READLINE */
int
-cli_rl_enable (struct cli_state *state)
+cli_rl_enable(struct cli_state *state)
{
- return 0;
+ return 0;
}
#endif /* HAVE_READLINE */
diff --git a/cli/src/cli-rpc-ops.c b/cli/src/cli-rpc-ops.c
index 492be4388e7..9b6b0c7fa50 100644
--- a/cli/src/cli-rpc-ops.c
+++ b/cli/src/cli-rpc-ops.c
@@ -1,31 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef GSYNC_CONF
-#define GSYNC_CONF GEOREP"/gsyncd.conf"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
/* Widths of various columns in top read/write-perf output
* Total width of top read/write-perf should be 80 chars
@@ -33,6369 +14,10936 @@
*/
#define VOL_TOP_PERF_FILENAME_DEF_WIDTH 47
#define VOL_TOP_PERF_FILENAME_ALT_WIDTH 44
-#define VOL_TOP_PERF_SPEED_WIDTH 4
-#define VOL_TOP_PERF_TIME_WIDTH 26
+#define VOL_TOP_PERF_SPEED_WIDTH 4
+#define VOL_TOP_PERF_TIME_WIDTH 26
+
+#define INDENT_MAIN_HEAD "%-25s %s "
+
+#define RETURNING "Returning %d"
+#define XML_ERROR "Error outputting to xml"
+#define XDR_DECODE_FAIL "Failed to decode xdr response"
+#define DICT_SERIALIZE_FAIL "Failed to serialize to data to dictionary"
+#define DICT_UNSERIALIZE_FAIL "Failed to unserialize the dictionary"
+
+/* Do not show estimates if greater than this number */
+#define REBAL_ESTIMATE_SEC_UPPER_LIMIT (60 * 24 * 3600)
+#define REBAL_ESTIMATE_START_TIME 600
#include "cli.h"
-#include "compat-errno.h"
+#include <glusterfs/compat-errno.h>
#include "cli-cmd.h"
#include <sys/uio.h>
#include <stdlib.h>
#include <sys/mount.h>
-#include "cli1-xdr.h"
-#include "xdr-generic.h"
-#include "protocol-common.h"
+#include <glusterfs/compat.h>
#include "cli-mem-types.h"
-#include "compat.h"
-
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#include "glusterfs3.h"
#include "portmap-xdr.h"
+#include <glusterfs/byte-order.h>
-#include "run.h"
+#include <glusterfs/run.h>
+#include <glusterfs/events.h>
-extern rpc_clnt_prog_t *cli_rpc_prog;
-extern int cli_op_ret;
-extern int connected;
+enum gf_task_types { GF_TASK_TYPE_REBALANCE, GF_TASK_TYPE_REMOVE_BRICK };
-char *cli_volume_type[] = {"Distribute",
- "Stripe",
- "Replicate",
- "Striped-Replicate",
- "Distributed-Stripe",
- "Distributed-Replicate",
- "Distributed-Striped-Replicate",
-};
+rpc_clnt_prog_t cli_quotad_clnt;
+static int32_t
+gf_cli_remove_brick(call_frame_t *frame, xlator_t *this, void *data);
-char *cli_volume_status[] = {"Created",
- "Started",
- "Stopped"
+char *cli_vol_status_str[] = {
+ "Created",
+ "Started",
+ "Stopped",
};
-int32_t
-gf_cli3_1_get_volume (call_frame_t *frame, xlator_t *this,
- void *data);
+char *cli_vol_task_status_str[] = {"not started",
+ "in progress",
+ "stopped",
+ "completed",
+ "failed",
+ "fix-layout in progress",
+ "fix-layout stopped",
+ "fix-layout completed",
+ "fix-layout failed",
+ "unknown"};
+static int32_t
+gf_cli_snapshot(call_frame_t *frame, xlator_t *this, void *data);
-rpc_clnt_prog_t cli_handshake_prog = {
- .progname = "cli handshake",
- .prognum = GLUSTER_HNDSK_PROGRAM,
- .progver = GLUSTER_HNDSK_VERSION,
+static int32_t
+gf_cli_get_volume(call_frame_t *frame, xlator_t *this, void *data);
+
+static int
+cli_to_glusterd(gf_cli_req *req, call_frame_t *frame, fop_cbk_fn_t cbkfn,
+ xdrproc_t xdrproc, dict_t *dict, int procnum, xlator_t *this,
+ rpc_clnt_prog_t *prog, struct iobref *iobref);
+
+static int
+add_cli_cmd_timeout_to_dict(dict_t *dict);
+
+static rpc_clnt_prog_t cli_handshake_prog = {
+ .progname = "cli handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
};
-rpc_clnt_prog_t cli_pmap_prog = {
- .progname = "cli portmap",
- .prognum = GLUSTER_PMAP_PROGRAM,
- .progver = GLUSTER_PMAP_VERSION,
+static rpc_clnt_prog_t cli_pmap_prog = {
+ .progname = "cli portmap",
+ .prognum = GLUSTER_PMAP_PROGRAM,
+ .progver = GLUSTER_PMAP_VERSION,
};
-int
-gf_cli3_1_probe_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static void
+gf_free_xdr_cli_rsp(gf_cli_rsp rsp)
{
- gf1_cli_probe_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
-
- if (-1 == req->rpc_status) {
- goto out;
- }
+ if (rsp.dict.dict_val) {
+ free(rsp.dict.dict_val);
+ }
+ if (rsp.op_errstr) {
+ free(rsp.op_errstr);
+ }
+}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_probe_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- //rsp.op_ret = -1;
- //rsp.op_errno = EINVAL;
- goto out;
- }
+static void
+gf_free_xdr_getspec_rsp(gf_getspec_rsp rsp)
+{
+ if (rsp.spec) {
+ free(rsp.spec);
+ }
+ if (rsp.xdata.xdata_val) {
+ free(rsp.xdata.xdata_val);
+ }
+}
- gf_log ("cli", GF_LOG_INFO, "Received resp to probe");
- if (!rsp.op_ret) {
- switch (rsp.op_errno) {
- case GF_PROBE_SUCCESS:
- snprintf (msg, sizeof (msg),
- "Probe successful");
- break;
- case GF_PROBE_LOCALHOST:
- snprintf (msg, sizeof (msg),
- "Probe on localhost not needed");
- break;
- case GF_PROBE_FRIEND:
- snprintf (msg, sizeof (msg),
- "Probe on host %s port %d already"
- " in peer list", rsp.hostname,
- rsp.port);
- break;
- default:
- snprintf (msg, sizeof (msg),
- "Probe returned with unknown errno %d",
- rsp.op_errno);
- break;
- }
- }
+static void
+gf_free_xdr_fsm_log_rsp(gf1_cli_fsm_log_rsp rsp)
+{
+ if (rsp.op_errstr) {
+ free(rsp.op_errstr);
+ }
+ if (rsp.fsm_log.fsm_log_val) {
+ free(rsp.fsm_log.fsm_log_val);
+ }
+}
+static int
+gf_cli_probe_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = "success";
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ // rsp.op_ret = -1;
+ // rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to probe");
+
+ if (rsp.op_errstr && rsp.op_errstr[0] != '\0') {
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
if (rsp.op_ret) {
- if (rsp.op_errstr && (strlen (rsp.op_errstr) > 0)) {
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- } else {
- switch (rsp.op_errno) {
- case GF_PROBE_ANOTHER_CLUSTER:
- snprintf (msg, sizeof (msg),
- "%s is already part of "
- "another cluster",
- rsp.hostname);
- break;
- case GF_PROBE_VOLUME_CONFLICT:
- snprintf (msg, sizeof (msg),
- "Atleast one volume on %s "
- "conflicts with existing "
- "volumes in the cluster",
- rsp.hostname);
- break;
- case GF_PROBE_UNKNOWN_PEER:
- snprintf (msg, sizeof (msg),
- "%s responded with 'unknown "
- "peer' error, this could "
- "happen if %s doesn't have "
- "localhost in its peer "
- "database", rsp.hostname,
- rsp.hostname);
- break;
- case GF_PROBE_ADD_FAILED:
- snprintf (msg, sizeof (msg),
- "Failed to add peer "
- "information on %s" ,
- rsp.hostname);
- break;
-
- default:
- snprintf (msg, sizeof (msg),
- "Probe unsuccessful\nProbe "
- "returned with unknown errno "
- "%d", rsp.op_errno);
- break;
- }
- }
- gf_log ("cli", GF_LOG_ERROR, "%s", msg);
+ gf_log("cli", GF_LOG_ERROR, "%s", msg);
}
+ }
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("peerProbe", msg, rsp.op_ret,
- rsp.op_errno, NULL);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
- if (!rsp.op_ret)
- cli_out ("%s", msg);
- else
- cli_err ("%s", msg);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str(NULL, (rsp.op_ret) ? NULL : msg, rsp.op_ret,
+ rsp.op_errno, (rsp.op_ret) ? msg : NULL);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- ret = rsp.op_ret;
+ if (!rsp.op_ret)
+ cli_out("peer probe: %s", msg);
+ else
+ cli_err("peer probe: failed: %s", msg);
+
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
}
-int
-gf_cli3_1_deprobe_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli_deprobe_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf1_cli_deprobe_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = "success";
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ // rsp.op_ret = -1;
+ // rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to deprobe");
+
+ if (rsp.op_ret) {
+ if (rsp.op_errstr[0] != '\0') {
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ gf_log("cli", GF_LOG_ERROR, "%s", rsp.op_errstr);
+ }
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str(NULL, (rsp.op_ret) ? NULL : msg, rsp.op_ret,
+ rsp.op_errno, (rsp.op_ret) ? msg : NULL);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- if (-1 == req->rpc_status) {
- goto out;
- }
+ if (!rsp.op_ret)
+ cli_out("peer detach: %s", msg);
+ else
+ cli_err("peer detach: failed: %s", msg);
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_deprobe_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- //rsp.op_ret = -1;
- //rsp.op_errno = EINVAL;
- goto out;
- }
+ ret = rsp.op_ret;
- gf_log ("cli", GF_LOG_INFO, "Received resp to deprobe");
- if (rsp.op_ret) {
- if (strlen (rsp.op_errstr) > 0) {
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- gf_log ("cli", GF_LOG_ERROR, "%s", rsp.op_errstr);
- } else {
- switch (rsp.op_errno) {
- case GF_DEPROBE_LOCALHOST:
- snprintf (msg, sizeof (msg),
- "%s is localhost",
- rsp.hostname);
- break;
- case GF_DEPROBE_NOT_FRIEND:
- snprintf (msg, sizeof (msg),
- "%s is not part of cluster",
- rsp.hostname);
- break;
- case GF_DEPROBE_BRICK_EXIST:
- snprintf (msg, sizeof (msg),
- "Brick(s) with the peer %s "
- "exist in cluster",
- rsp.hostname);
- break;
- case GF_DEPROBE_FRIEND_DOWN:
- snprintf (msg, sizeof (msg),
- "One of the peers is probably"
- " down. Check with 'peer "
- "status'.");
- break;
- default:
- snprintf (msg, sizeof (msg),
- "Detach unsuccessful\nDetach"
- " returned with unknown "
- "errno %d", rsp.op_errno);
- break;
- }
- gf_log ("cli", GF_LOG_ERROR,"Detach failed with op_ret "
- "%d and op_errno %d", rsp.op_ret, rsp.op_errno);
- }
- } else {
- snprintf (msg, sizeof (msg), "Detach successful");
- }
+out:
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("peerDetach", msg, rsp.op_ret,
- rsp.op_errno, NULL);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
+static int
+gf_cli_output_peer_hostnames(dict_t *dict, int count, const char *prefix)
+{
+ int ret = -1;
+ char key[512] = {
+ 0,
+ };
+ int i = 0;
+ char *hostname = NULL;
+
+ cli_out("Other names:");
+ /* Starting from friend.hostname1, as friend.hostname0 will be the same
+ * as friend.hostname
+ */
+ for (i = 1; i < count; i++) {
+ ret = snprintf(key, sizeof(key), "%s.hostname%d", prefix, i);
+ ret = dict_get_strn(dict, key, ret, &hostname);
+ if (ret)
+ break;
+ cli_out("%s", hostname);
+ hostname = NULL;
+ }
+
+ return ret;
+}
+
+static int
+gf_cli_output_peer_status(dict_t *dict, int count)
+{
+ int ret = -1;
+ char *uuid_buf = NULL;
+ char *hostname_buf = NULL;
+ int32_t i = 1;
+ char key[256] = {
+ 0,
+ };
+ int keylen;
+ char *state = NULL;
+ int32_t connected = 0;
+ const char *connected_str = NULL;
+ int hostname_count = 0;
+
+ cli_out("Number of Peers: %d", count);
+ i = 1;
+ while (i <= count) {
+ keylen = snprintf(key, sizeof(key), "friend%d.uuid", i);
+ ret = dict_get_strn(dict, key, keylen, &uuid_buf);
+ if (ret)
+ goto out;
+
+ keylen = snprintf(key, sizeof(key), "friend%d.hostname", i);
+ ret = dict_get_strn(dict, key, keylen, &hostname_buf);
+ if (ret)
+ goto out;
+
+ keylen = snprintf(key, sizeof(key), "friend%d.connected", i);
+ ret = dict_get_int32n(dict, key, keylen, &connected);
+ if (ret)
+ goto out;
+ if (connected)
+ connected_str = "Connected";
+ else
+ connected_str = "Disconnected";
+
+ keylen = snprintf(key, sizeof(key), "friend%d.state", i);
+ ret = dict_get_strn(dict, key, keylen, &state);
+ if (ret)
+ goto out;
+
+ cli_out("\nHostname: %s\nUuid: %s\nState: %s (%s)", hostname_buf,
+ uuid_buf, state, connected_str);
+
+ keylen = snprintf(key, sizeof(key), "friend%d.hostname_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &hostname_count);
+ /* Print other addresses only if there are more than 1.
+ */
+ if ((ret == 0) && (hostname_count > 1)) {
+ snprintf(key, sizeof(key), "friend%d", i);
+ ret = gf_cli_output_peer_hostnames(dict, hostname_count, key);
+ if (ret) {
+ gf_log("cli", GF_LOG_WARNING,
+ "error outputting peer other names");
goto out;
+ }
}
-#endif
- if (!rsp.op_ret)
- cli_out ("%s", msg);
+ i++;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+gf_cli_output_pool_list(dict_t *dict, int count)
+{
+ int ret = -1;
+ char *uuid_buf = NULL;
+ char *hostname_buf = NULL;
+ int32_t hostname_len = 8; /*min len 8 chars*/
+ int32_t i = 1;
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+ int32_t connected = 0;
+ const char *connected_str = NULL;
+
+ if (count <= 0)
+ goto out;
+
+ while (i <= count) {
+ keylen = snprintf(key, sizeof(key), "friend%d.hostname", i);
+ ret = dict_get_strn(dict, key, keylen, &hostname_buf);
+ if (ret)
+ goto out;
+
+ ret = strlen(hostname_buf);
+ if (ret > hostname_len)
+ hostname_len = ret;
+
+ i++;
+ }
+
+ cli_out("UUID\t\t\t\t\t%-*s\tState", hostname_len, "Hostname");
+
+ i = 1;
+ while (i <= count) {
+ keylen = snprintf(key, sizeof(key), "friend%d.uuid", i);
+ ret = dict_get_strn(dict, key, keylen, &uuid_buf);
+ if (ret)
+ goto out;
+
+ keylen = snprintf(key, sizeof(key), "friend%d.hostname", i);
+ ret = dict_get_strn(dict, key, keylen, &hostname_buf);
+ if (ret)
+ goto out;
+
+ keylen = snprintf(key, sizeof(key), "friend%d.connected", i);
+ ret = dict_get_int32n(dict, key, keylen, &connected);
+ if (ret)
+ goto out;
+ if (connected)
+ connected_str = "Connected";
else
- cli_err ("%s", msg);
+ connected_str = "Disconnected";
- ret = rsp.op_ret;
+ cli_out("%s\t%-*s\t%s ", uuid_buf, hostname_len, hostname_buf,
+ connected_str);
+ i++;
+ }
+ ret = 0;
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ return ret;
}
-int
-gf_cli3_1_list_friends_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf1_cli_peer_list_rsp rsp = {0,};
- int ret = -1;
- dict_t *dict = NULL;
- char *uuid_buf = NULL;
- char *hostname_buf = NULL;
- int32_t i = 1;
- char key[256] = {0,};
- char *state = NULL;
- int32_t port = 0;
- int32_t connected = 0;
- char *connected_str = NULL;
-
- if (-1 == req->rpc_status) {
+/* function pointer for gf_cli_output_{pool_list,peer_status} */
+typedef int (*cli_friend_output_fn)(dict_t *, int);
+
+static int
+gf_cli_list_friends_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf1_cli_peer_list_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ char msg[1024] = {
+ 0,
+ };
+ const char *cmd = NULL;
+ cli_friend_output_fn friend_output_fn;
+ call_frame_t *frame = NULL;
+ unsigned long flags = 0;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+
+ flags = (long)frame->local;
+
+ if (flags == GF_CLI_LIST_POOL_NODES) {
+ cmd = "pool list";
+ friend_output_fn = &gf_cli_output_pool_list;
+ } else {
+ cmd = "peer status";
+ friend_output_fn = &gf_cli_output_peer_status;
+ }
+
+ /* 'free' the flags set by gf_cli_list_friends */
+ frame->local = NULL;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf1_cli_peer_list_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ // rsp.op_ret = -1;
+ // rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_DEBUG, "Received resp to list: %d", rsp.op_ret);
+
+ if (!rsp.op_ret) {
+ if (!rsp.friends.friends_len) {
+ snprintf(msg, sizeof(msg), "%s: No peers present", cmd);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_peer_status(dict, rsp.op_ret, rsp.op_errno,
+ msg);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
+ }
+ cli_err("%s", msg);
+ ret = 0;
+ goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_peer_list_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- //rsp.op_ret = -1;
- //rsp.op_errno = EINVAL;
- goto out;
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
}
- gf_log ("cli", GF_LOG_INFO, "Received resp to list: %d",
- rsp.op_ret);
+ ret = dict_unserialize(rsp.friends.friends_val, rsp.friends.friends_len,
+ &dict);
- ret = rsp.op_ret;
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
- if (!rsp.op_ret) {
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_peer_status(dict, rsp.op_ret, rsp.op_errno,
+ msg);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- if (!rsp.friends.friends_len) {
- cli_out ("No peers present");
- ret = 0;
- goto out;
- }
+ ret = dict_get_int32_sizen(dict, "count", &count);
+ if (ret) {
+ goto out;
+ }
- dict = dict_new ();
+ ret = friend_output_fn(dict, count);
+ if (ret) {
+ goto out;
+ }
+ } else {
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_peer_status(dict, rsp.op_ret, rsp.op_errno,
+ NULL);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ } else {
+ ret = -1;
+ }
+ goto out;
+ }
- if (!dict) {
- ret = -1;
- goto out;
- }
+ ret = 0;
- ret = dict_unserialize (rsp.friends.friends_val,
- rsp.friends.friends_len,
- &dict);
+out:
+ if (ret)
+ cli_err("%s: failed", cmd);
- if (ret) {
- gf_log ("", GF_LOG_ERROR,
- "Unable to allocate memory");
- goto out;
- }
+ cli_cmd_broadcast_response(ret);
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("peerStatus", dict,
- rsp.op_ret, rsp.op_errno,
- NULL);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
- ret = dict_get_int32 (dict, "count", &count);
+ if (dict)
+ dict_unref(dict);
- if (ret) {
- goto out;
- }
+ if (rsp.friends.friends_val) {
+ free(rsp.friends.friends_val);
+ }
+ return ret;
+}
- cli_out ("Number of Peers: %d", count);
-
- while ( i <= count) {
- snprintf (key, 256, "friend%d.uuid", i);
- ret = dict_get_str (dict, key, &uuid_buf);
- if (ret)
- goto out;
-
- snprintf (key, 256, "friend%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname_buf);
- if (ret)
- goto out;
-
- snprintf (key, 256, "friend%d.connected", i);
- ret = dict_get_int32 (dict, key, &connected);
- if (ret)
- goto out;
- if (connected)
- connected_str = "Connected";
- else
- connected_str = "Disconnected";
-
- snprintf (key, 256, "friend%d.port", i);
- ret = dict_get_int32 (dict, key, &port);
- if (ret)
- goto out;
-
- snprintf (key, 256, "friend%d.state", i);
- ret = dict_get_str (dict, key, &state);
- if (ret)
- goto out;
-
- if (!port) {
- cli_out ("\nHostname: %s\nUuid: %s\nState: %s "
- "(%s)",
- hostname_buf, uuid_buf, state,
- connected_str);
- } else {
- cli_out ("\nHostname: %s\nPort: %d\nUuid: %s\n"
- "State: %s (%s)", hostname_buf, port,
- uuid_buf, state, connected_str);
- }
- i++;
- }
- } else {
- ret = -1;
- goto out;
- }
+static int
+gf_cli_get_state_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ char *daemon_name = NULL;
+ char *ofilepath = NULL;
+
+ GF_VALIDATE_OR_GOTO("cli", myframe, out);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, ""))
+ cli_err("Failed to get daemon state: %s", rsp.op_errstr);
+ else
+ cli_err(
+ "Failed to get daemon state. Check glusterd"
+ " log file for more details");
+ } else {
+ ret = dict_get_str_sizen(dict, "daemon", &daemon_name);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, "Couldn't get daemon name");
+ ret = dict_get_str_sizen(dict, "ofilepath", &ofilepath);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, "Couldn't get filepath");
- ret = 0;
+ if (daemon_name && ofilepath)
+ cli_out("%s state dumped to %s", daemon_name, ofilepath);
+ }
+
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- if (ret)
- cli_err ("Peer status unsuccessful");
+ if (dict)
+ dict_unref(dict);
- if (dict)
- dict_destroy (dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
- return ret;
+ return ret;
}
-void
-cli_out_options ( char *substr, char *optstr, char *valstr)
+static void
+cli_out_options(char *substr, char *optstr, char *valstr)
+{
+ char *ptr1 = NULL;
+ char *ptr2 = NULL;
+
+ ptr1 = substr;
+ ptr2 = optstr;
+
+ while (ptr1) {
+ /* Avoiding segmentation fault. */
+ if (!ptr2)
+ return;
+ if (*ptr1 != *ptr2)
+ break;
+ ptr1++;
+ ptr2++;
+ }
+
+ if (*ptr2 == '\0')
+ return;
+ cli_out("%s: %s", ptr2, valstr);
+}
+
+static int
+_gf_cli_output_volinfo_opts(dict_t *d, char *k, data_t *v, void *tmp)
{
- char *ptr1 = NULL;
- char *ptr2 = NULL;
+ int ret = 0;
+ char *key = NULL;
+ char *ptr = NULL;
+ data_t *value = NULL;
+
+ key = tmp;
+
+ ptr = strstr(k, "option.");
+ if (ptr) {
+ value = v;
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+ cli_out_options(key, k, v->data);
+ }
+out:
+ return ret;
+}
- ptr1 = substr;
- ptr2 = optstr;
+static int
+print_brick_details(dict_t *dict, int volcount, int start_index, int end_index,
+ int replica_count)
+{
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+ int index = start_index;
+ int isArbiter = 0;
+ int ret = -1;
+ char *brick = NULL;
+
+ while (index <= end_index) {
+ keylen = snprintf(key, sizeof(key), "volume%d.brick%d", volcount,
+ index);
+ ret = dict_get_strn(dict, key, keylen, &brick);
+ if (ret)
+ goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.brick%d.isArbiter",
+ volcount, index);
+ if (dict_getn(dict, key, keylen))
+ isArbiter = 1;
+ else
+ isArbiter = 0;
- while (ptr1)
- {
- if (*ptr1 != *ptr2)
- break;
- ptr1++;
- ptr2++;
- if (!ptr1)
- return;
- if (!ptr2)
- return;
- }
+ if (isArbiter)
+ cli_out("Brick%d: %s (arbiter)", index, brick);
+ else
+ cli_out("Brick%d: %s", index, brick);
+ index++;
+ }
+ ret = 0;
+out:
+ return ret;
+}
- if (*ptr2 == '\0')
- return;
- cli_out ("%s: %s",ptr2 , valstr);
+static void
+gf_cli_print_number_of_bricks(int type, int brick_count, int dist_count,
+ int stripe_count, int replica_count,
+ int disperse_count, int redundancy_count,
+ int arbiter_count)
+{
+ if (type == GF_CLUSTER_TYPE_NONE) {
+ cli_out("Number of Bricks: %d", brick_count);
+ } else if (type == GF_CLUSTER_TYPE_DISPERSE) {
+ cli_out("Number of Bricks: %d x (%d + %d) = %d",
+ (brick_count / dist_count), disperse_count - redundancy_count,
+ redundancy_count, brick_count);
+ } else {
+ /* For both replicate and stripe, dist_count is
+ good enough */
+ if (arbiter_count == 0) {
+ cli_out("Number of Bricks: %d x %d = %d",
+ (brick_count / dist_count), dist_count, brick_count);
+ } else {
+ cli_out("Number of Bricks: %d x (%d + %d) = %d",
+ (brick_count / dist_count), dist_count - arbiter_count,
+ arbiter_count, brick_count);
+ }
+ }
}
+static int
+gf_cli_get_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int ret = -1;
+ int opt_count = 0;
+ int32_t i = 0;
+ int32_t j = 1;
+ int32_t status = 0;
+ int32_t type = 0;
+ int32_t brick_count = 0;
+ int32_t dist_count = 0;
+ int32_t stripe_count = 0;
+ int32_t replica_count = 0;
+ int32_t disperse_count = 0;
+ int32_t redundancy_count = 0;
+ int32_t arbiter_count = 0;
+ int32_t snap_count = 0;
+ int32_t thin_arbiter_count = 0;
+ int32_t vol_type = 0;
+ int32_t transport = 0;
+ char *volume_id_str = NULL;
+ char *volname = NULL;
+ char *ta_brick = NULL;
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
+ char key[64] = {0};
+ int keylen;
+ char err_str[2048] = {0};
+ gf_cli_rsp rsp = {0};
+ char *caps __attribute__((unused)) = NULL;
+ int k __attribute__((unused)) = 0;
+ call_frame_t *frame = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status)
+ goto out;
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to get vol: %d", rsp.op_ret);
+
+ if (!rsp.dict.dict_len) {
+ if (global_state->mode & GLUSTER_MODE_XML)
+ goto xml_output;
+ cli_err("No volumes present");
+ ret = 0;
+ goto out;
+ }
+
+ dict = dict_new();
-int
-gf_cli3_1_get_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- int ret = -1;
- int opt_count = 0;
- int k = 0;
- int32_t i = 0;
- int32_t j = 1;
- int32_t status = 0;
- int32_t type = 0;
- int32_t brick_count = 0;
- int32_t dist_count = 0;
- int32_t stripe_count = 0;
- int32_t replica_count = 0;
- int32_t vol_type = 0;
- int32_t transport = 0;
- char *ptr = NULL;
- char *volume_id_str = NULL;
- char *brick = NULL;
- char *volname = NULL;
- dict_t *dict = NULL;
- data_pair_t *pairs = NULL;
- data_t *value = NULL;
- cli_local_t *local = NULL;
- char key[1024] = {0};
- char err_str[2048] = {0};
- gf_cli_rsp rsp = {0};
-
- if (-1 == req->rpc_status)
- goto out;
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("cli", GF_LOG_ERROR, "error");
- goto out;
- }
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
- gf_log ("cli", GF_LOG_INFO, "Received resp to get vol: %d",
- rsp.op_ret);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
- if (rsp.op_ret) {
- ret = -1;
- goto out;
- }
+ ret = dict_get_int32_sizen(dict, "count", &count);
+ if (ret)
+ goto out;
- if (!rsp.dict.dict_len) {
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML)
- goto xml_output;
-#endif
- cli_out ("No volumes present");
+ if (!count) {
+ switch (local->get_vol.flags) {
+ case GF_CLI_GET_NEXT_VOLUME:
+ GF_FREE(local->get_vol.volname);
+ local->get_vol.volname = NULL;
ret = 0;
goto out;
- }
- dict = dict_new ();
-
- if (!dict) {
+ case GF_CLI_GET_VOLUME:
+ snprintf(err_str, sizeof(err_str), "Volume %s does not exist",
+ local->get_vol.volname);
ret = -1;
- goto out;
+ if (!(global_state->mode & GLUSTER_MODE_XML))
+ goto out;
}
+ }
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
+ if (rsp.op_ret) {
+ if (global_state->mode & GLUSTER_MODE_XML)
+ goto xml_output;
+ ret = -1;
+ goto out;
+ }
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR,
- "Unable to allocate memory");
+xml_output:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ /* For GET_NEXT_VOLUME output is already begun in
+ * and will also end in gf_cli_get_next_volume()
+ */
+ if (local->get_vol.flags == GF_CLI_GET_VOLUME) {
+ ret = cli_xml_output_vol_info_begin(local, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
+ }
}
- ret = dict_get_int32 (dict, "count", &count);
- if (ret)
+ if (dict) {
+ ret = cli_xml_output_vol_info(local, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
goto out;
+ }
+ }
- local = ((call_frame_t *)myframe)->local;
-
- if (!count) {
- switch (local->get_vol.flags) {
-
- case GF_CLI_GET_NEXT_VOLUME:
- GF_FREE (local->get_vol.volname);
- local->get_vol.volname = NULL;
- ret = 0;
- goto out;
-
- case GF_CLI_GET_VOLUME:
- memset (err_str, 0, sizeof (err_str));
- snprintf (err_str, sizeof (err_str),
- "Volume %s does not exist",
- local->get_vol.volname);
- ret = -1;
-#if (HAVE_LIB_XML)
- if (!(global_state->mode & GLUSTER_MODE_XML))
-#endif
- {
- goto out;
- }
- }
+ if (local->get_vol.flags == GF_CLI_GET_VOLUME) {
+ ret = cli_xml_output_vol_info_end(local);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
}
+ goto out;
+ }
-#if (HAVE_LIB_XML)
-xml_output:
- if (global_state->mode & GLUSTER_MODE_XML) {
- /* For GET_NEXT_VOLUME output is already begun in
- * and will also end in gf_cli3_1_get_next_volume()
- */
- if (local->get_vol.flags == GF_CLI_GET_VOLUME) {
- ret = cli_xml_output_vol_info_begin
- (local, rsp.op_ret, rsp.op_errno,
- rsp.op_errstr);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
- }
+ while (i < count) {
+ cli_out(" ");
+ keylen = snprintf(key, sizeof(key), "volume%d.name", i);
+ ret = dict_get_strn(dict, key, keylen, &volname);
+ if (ret)
+ goto out;
- if (dict) {
- ret = cli_xml_output_vol_info (local, dict);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
- }
+ keylen = snprintf(key, sizeof(key), "volume%d.type", i);
+ ret = dict_get_int32n(dict, key, keylen, &type);
+ if (ret)
+ goto out;
- if (local->get_vol.flags == GF_CLI_GET_VOLUME) {
- ret = cli_xml_output_vol_info_end (local);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- }
- goto out;
- }
-#endif
+ keylen = snprintf(key, sizeof(key), "volume%d.status", i);
+ ret = dict_get_int32n(dict, key, keylen, &status);
+ if (ret)
+ goto out;
- while ( i < count) {
- cli_out (" ");
- snprintf (key, 256, "volume%d.name", i);
- ret = dict_get_str (dict, key, &volname);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.brick_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &brick_count);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.type", i);
- ret = dict_get_int32 (dict, key, &type);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.dist_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &dist_count);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.status", i);
- ret = dict_get_int32 (dict, key, &status);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.stripe_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &stripe_count);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.brick_count", i);
- ret = dict_get_int32 (dict, key, &brick_count);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.replica_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &replica_count);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.dist_count", i);
- ret = dict_get_int32 (dict, key, &dist_count);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.disperse_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &disperse_count);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.stripe_count", i);
- ret = dict_get_int32 (dict, key, &stripe_count);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.redundancy_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &redundancy_count);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.replica_count", i);
- ret = dict_get_int32 (dict, key, &replica_count);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.arbiter_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &arbiter_count);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.transport", i);
- ret = dict_get_int32 (dict, key, &transport);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "volume%d.transport", i);
+ ret = dict_get_int32n(dict, key, keylen, &transport);
+ if (ret)
+ goto out;
- snprintf (key, 256, "volume%d.volume_id", i);
- ret = dict_get_str (dict, key, &volume_id_str);
- if (ret)
- goto out;
-
- vol_type = type;
-
- // Distributed (stripe/replicate/stripe-replica) setups
- if ((type > 0) && ( dist_count < brick_count))
- vol_type = type + 3;
-
- cli_out ("Volume Name: %s", volname);
- cli_out ("Type: %s", cli_volume_type[vol_type]);
- cli_out ("Volume ID: %s", volume_id_str);
- cli_out ("Status: %s", cli_volume_status[status]);
-
- if (type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) {
- cli_out ("Number of Bricks: %d x %d x %d = %d",
- (brick_count / dist_count),
- stripe_count,
- replica_count,
- brick_count);
-
- } else if (type == GF_CLUSTER_TYPE_NONE) {
- cli_out ("Number of Bricks: %d", brick_count);
-
- } else {
- /* For both replicate and stripe, dist_count is
- good enough */
- cli_out ("Number of Bricks: %d x %d = %d",
- (brick_count / dist_count),
- dist_count, brick_count);
- }
+ keylen = snprintf(key, sizeof(key), "volume%d.volume_id", i);
+ ret = dict_get_strn(dict, key, keylen, &volume_id_str);
+ if (ret)
+ goto out;
- cli_out ("Transport-type: %s",
- ((transport == 0)?"tcp":
- (transport == 1)?"rdma":
- "tcp,rdma"));
- j = 1;
+ keylen = snprintf(key, sizeof(key), "volume%d.snap_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &snap_count);
+ if (ret)
+ goto out;
- GF_FREE (local->get_vol.volname);
- local->get_vol.volname = gf_strdup (volname);
+ keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_count", i);
+ ret = dict_get_int32n(dict, key, keylen, &thin_arbiter_count);
+ if (ret)
+ goto out;
- if (brick_count)
- cli_out ("Bricks:");
+ // Distributed (stripe/replicate/stripe-replica) setups
+ vol_type = get_vol_type(type, dist_count, brick_count);
- while (j <= brick_count) {
- snprintf (key, 1024, "volume%d.brick%d", i, j);
- ret = dict_get_str (dict, key, &brick);
- if (ret)
- goto out;
+ cli_out("Volume Name: %s", volname);
+ cli_out("Type: %s", vol_type_str[vol_type]);
+ cli_out("Volume ID: %s", volume_id_str);
+ cli_out("Status: %s", cli_vol_status_str[status]);
+ cli_out("Snapshot Count: %d", snap_count);
- cli_out ("Brick%d: %s", j, brick);
- j++;
- }
+ gf_cli_print_number_of_bricks(
+ type, brick_count, dist_count, stripe_count, replica_count,
+ disperse_count, redundancy_count, arbiter_count);
- pairs = dict->members_list;
- if (!pairs) {
- ret = -1;
- goto out;
- }
+ cli_out("Transport-type: %s",
+ ((transport == 0) ? "tcp"
+ : (transport == 1) ? "rdma" : "tcp,rdma"));
+ j = 1;
- snprintf (key, 256, "volume%d.opt_count",i);
- ret = dict_get_int32 (dict, key, &opt_count);
- if (ret)
- goto out;
-
- if (!opt_count)
- goto out;
-
- cli_out ("Options Reconfigured:");
- k = 0;
-
- while (k < opt_count) {
-
- snprintf (key, 256, "volume%d.option.",i);
- while (pairs) {
- ptr = strstr (pairs->key, "option.");
- if (ptr) {
- value = pairs->value;
- if (!value) {
- ret = -1;
- goto out;
- }
- cli_out_options (key, pairs->key,
- value->data);
- }
- pairs = pairs->next;
- }
- k++;
- }
+ GF_FREE(local->get_vol.volname);
+ local->get_vol.volname = gf_strdup(volname);
+
+ cli_out("Bricks:");
+ ret = print_brick_details(dict, i, j, brick_count, replica_count);
+ if (ret)
+ goto out;
- i++;
+ if (thin_arbiter_count) {
+ snprintf(key, sizeof(key), "volume%d.thin_arbiter_brick", i);
+ ret = dict_get_str(dict, key, &ta_brick);
+ if (ret)
+ goto out;
+ cli_out("Thin-arbiter-path: %s", ta_brick);
}
+ snprintf(key, sizeof(key), "volume%d.opt_count", i);
+ ret = dict_get_int32(dict, key, &opt_count);
+ if (ret)
+ goto out;
- ret = 0;
-out:
- cli_cmd_broadcast_response (ret);
+ if (!opt_count)
+ goto out;
+
+ cli_out("Options Reconfigured:");
+
+ snprintf(key, sizeof(key), "volume%d.option.", i);
+
+ ret = dict_foreach(dict, _gf_cli_output_volinfo_opts, key);
if (ret)
- cli_err ("%s", err_str);
+ goto out;
+
+ i++;
+ }
- if (dict)
- dict_destroy (dict);
+ ret = 0;
+out:
+ if (ret)
+ cli_err("%s", err_str);
+
+ cli_cmd_broadcast_response(ret);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
+ if (dict)
+ dict_unref(dict);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
+ gf_free_xdr_cli_rsp(rsp);
- gf_log ("cli", GF_LOG_INFO, "Returning: %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
}
-int
-gf_cli3_1_create_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli_create_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- cli_local_t *local = NULL;
- char *volname = NULL;
- dict_t *dict = NULL;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ dict_t *rsp_dict = NULL;
+ call_frame_t *frame = NULL;
- if (-1 == req->rpc_status) {
- goto out;
- }
+ GF_ASSERT(myframe);
- local = ((call_frame_t *) (myframe))->local;
- ((call_frame_t *) (myframe))->local = NULL;
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ frame = myframe;
- dict = local->dict;
+ GF_ASSERT(frame->local);
- ret = dict_get_str (dict, "volname", &volname);
+ local = frame->local;
- gf_log ("cli", GF_LOG_INFO, "Received resp to create volume");
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("volCreate", dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
+ gf_log("cli", GF_LOG_INFO, "Received resp to create volume");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new();
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
+ }
}
-#endif
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
- else
- cli_out ("Creation of volume %s has been %s", volname,
- (rsp.op_ret) ? "unsuccessful":
- "successful. Please start the volume to "
- "access data.");
- ret = rsp.op_ret;
+ ret = cli_xml_output_vol_create(rsp_dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ cli_err("volume create: %s: failed: %s", volname, rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err("volume create: %s: failed", volname);
+ else
+ cli_out(
+ "volume create: %s: success: "
+ "please start the volume to access data",
+ volname);
+
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- if (dict)
- dict_unref (dict);
- if (local)
- cli_local_wipe (local);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
- return ret;
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+ return ret;
}
-int
-gf_cli3_1_delete_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli_delete_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- cli_local_t *local = NULL;
- char *volname = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *rsp_dict = NULL;
- if (-1 == req->rpc_status) {
- goto out;
- }
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ GF_ASSERT(myframe);
+ frame = myframe;
- frame = myframe;
- local = frame->local;
- frame->local = NULL;
+ GF_ASSERT(frame->local);
- if (local)
- dict = local->dict;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "dict get failed");
- goto out;
- }
+ local = frame->local;
- gf_log ("cli", GF_LOG_INFO, "Received resp to delete volume");
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("volDelete", dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
+ gf_log("cli", GF_LOG_INFO, "Received resp to delete volume");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new();
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
+ }
}
-#endif
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
- else
- cli_out ("Deleting volume %s has been %s", volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
- ret = rsp.op_ret;
+ ret = cli_xml_output_generic_volume("volDelete", rsp_dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "dict get failed");
+ goto out;
+ }
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ cli_err("volume delete: %s: failed: %s", volname, rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err("volume delete: %s: failed", volname);
+ else
+ cli_out("volume delete: %s: success", volname);
+
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- cli_local_wipe (local);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (dict)
- dict_unref (dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
- gf_log ("", GF_LOG_INFO, "Returning with %d", ret);
- return ret;
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+ gf_log("", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
}
-int
-gf_cli3_1_start_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli3_1_uuid_get_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- cli_local_t *local = NULL;
- char *volname = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
+ char *uuid_str = NULL;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
- if (-1 == req->rpc_status) {
- goto out;
- }
+ GF_ASSERT(myframe);
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ if (-1 == req->rpc_status)
+ goto out;
- frame = myframe;
+ frame = myframe;
- if (frame) {
- local = frame->local;
- frame->local = NULL;
- }
+ GF_ASSERT(frame->local);
- if (local)
- dict = local->dict;
+ local = frame->local;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "dict get failed");
- goto out;
- }
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
- gf_log ("cli", GF_LOG_INFO, "Received resp to start volume");
+ frame->local = NULL;
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("volStart", dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ gf_log("cli", GF_LOG_INFO, "Received resp to uuid get");
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_dict("uuidGenerate", dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, "") == 0)
+ cli_err("Get uuid was unsuccessful");
else
- cli_out ("Starting volume %s has been %s", volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
+ cli_err("%s", rsp.op_errstr);
- ret = rsp.op_ret;
+ } else {
+ ret = dict_get_str_sizen(dict, "uuid", &uuid_str);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get uuid from dictionary");
+ goto out;
+ }
+ cli_out("UUID: %s", uuid_str);
+ }
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- if (local)
- cli_local_wipe (local);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
- if (dict)
- dict_unref (dict);
- return ret;
+ cli_cmd_broadcast_response(ret);
+ cli_local_wipe(local);
+ gf_free_xdr_cli_rsp(rsp);
+
+ if (dict)
+ dict_unref(dict);
+
+ gf_log("", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
}
-int
-gf_cli3_1_stop_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli3_1_uuid_reset_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- cli_local_t *local = NULL;
- char *volname = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
- if (-1 == req->rpc_status) {
- goto out;
- }
+ GF_ASSERT(myframe);
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
- frame = myframe;
+ frame = myframe;
- if (frame) {
- local = frame->local;
- frame->local = NULL;
- }
+ GF_ASSERT(frame->local);
- if (local) {
- dict = local->dict;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Unable to get volname from dict");
- goto out;
- }
- }
+ local = frame->local;
- gf_log ("cli", GF_LOG_INFO, "Received resp to stop volume");
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("volStop", dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ frame->local = NULL;
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
- else
- cli_out ("Stopping volume %s has been %s", volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
- ret = rsp.op_ret;
+ gf_log("cli", GF_LOG_INFO, "Received resp to uuid reset");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_dict("uuidReset", NULL, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ cli_err("%s", rsp.op_errstr);
+ else
+ cli_out("resetting the peer uuid has been %s",
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (local)
- cli_local_wipe (local);
+ cli_cmd_broadcast_response(ret);
+ cli_local_wipe(local);
+ gf_free_xdr_cli_rsp(rsp);
- return ret;
+ gf_log("", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
}
-int
-gf_cli3_1_defrag_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- cli_local_t *local = NULL;
- char *volname = NULL;
- call_frame_t *frame = NULL;
- char *status = "unknown";
- int cmd = 0;
- int ret = -1;
- dict_t *dict = NULL;
- dict_t *local_dict = NULL;
- uint64_t files = 0;
- uint64_t size = 0;
- uint64_t lookup = 0;
- char msg[1024] = {0,};
- gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
- int32_t counter = 0;
- char *node_uuid = NULL;
- char key[256] = {0,};
- int32_t i = 1;
- uint64_t failures = 0;
- double elapsed = 0;
-
- if (-1 == req->rpc_status) {
- goto out;
- }
+static int
+gf_cli_start_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *rsp_dict = NULL;
- ret = xdr_to_generic (*iov, &rsp,
- (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ GF_ASSERT(myframe);
- frame = myframe;
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
- if (frame) {
- local = frame->local;
- frame->local = NULL;
- }
+ frame = myframe;
- if (local) {
- local_dict = local->dict;
- }
+ GF_ASSERT(frame->local);
- ret = dict_get_str (local_dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to get volname");
- goto out;
- }
+ local = frame->local;
- ret = dict_get_int32 (local_dict, "rebalance-command", (int32_t*)&cmd);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to get command");
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to start volume");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new();
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
+ }
}
- if (rsp.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
+ ret = cli_xml_output_generic_volume("volStart", rsp_dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- }
- }
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "dict get failed");
+ goto out;
+ }
- if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS))) {
- /* All other possibility is about starting a volume */
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg),
- "Starting rebalance on volume %s has been %s",
- volname, (rsp.op_ret) ? "unsuccessful":
- "successful");
- goto done;
- }
-
- if (cmd == GF_DEFRAG_CMD_STOP) {
- if (rsp.op_ret == -1) {
- if (strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg),
- "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg),
- "rebalance volume %s stop failed",
- volname);
- goto done;
- } else {
- snprintf (msg, sizeof (msg),
- "Stopped rebalance process on volume %s \n",
- volname);
- }
- }
- if (cmd == GF_DEFRAG_CMD_STATUS) {
- if (rsp.op_ret == -1) {
- if (strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg),
- "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg),
- "Failed to get the status of "
- "rebalance process");
- goto done;
- }
- }
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ cli_err("volume start: %s: failed: %s", volname, rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err("volume start: %s: failed", volname);
+ else
+ cli_out("volume start: %s: success", volname);
- ret = dict_get_int32 (dict, "count", &counter);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "count not set");
- goto out;
- }
+ ret = rsp.op_ret;
- cli_out ("%40s %16s %13s %13s %13s %14s %s", "Node", "Rebalanced-files",
- "size", "scanned", "failures", "status", "run time in secs");
- cli_out ("%40s %16s %13s %13s %13s %14s %14s", "---------",
- "-----------", "-----------", "-----------", "-----------",
- "------------", "-----------");
- do {
- snprintf (key, 256, "node-uuid-%d", i);
- ret = dict_get_str (dict, key, &node_uuid);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get node-uuid");
+out:
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
- memset (key, 0, 256);
- snprintf (key, 256, "files-%d", i);
- ret = dict_get_uint64 (dict, key, &files);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get file count");
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+ return ret;
+}
- memset (key, 0, 256);
- snprintf (key, 256, "size-%d", i);
- ret = dict_get_uint64 (dict, key, &size);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get size of xfer");
+static int
+gf_cli_stop_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *rsp_dict = NULL;
- memset (key, 0, 256);
- snprintf (key, 256, "lookups-%d", i);
- ret = dict_get_uint64 (dict, key, &lookup);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get lookedup file count");
+ GF_ASSERT(myframe);
- memset (key, 0, 256);
- snprintf (key, 256, "status-%d", i);
- ret = dict_get_int32 (dict, key, (int32_t *)&status_rcd);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get status");
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
- memset (key, 0, 256);
- snprintf (key, 256, "failures-%d", i);
- ret = dict_get_uint64 (dict, key, &failures);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get failures count");
+ frame = myframe;
- memset (key, 0, 256);
- snprintf (key, 256, "run-time-%d", i);
- ret = dict_get_double (dict, key, &elapsed);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get run-time");
-
- switch (status_rcd) {
- case GF_DEFRAG_STATUS_NOT_STARTED:
- status = "not started";
- break;
- case GF_DEFRAG_STATUS_STARTED:
- status = "in progress";
- break;
- case GF_DEFRAG_STATUS_STOPPED:
- status = "stopped";
- break;
- case GF_DEFRAG_STATUS_COMPLETE:
- status = "completed";
- break;
- case GF_DEFRAG_STATUS_FAILED:
- status = "failed";
- break;
- }
- cli_out ("%40s %16"PRIu64 "%13"PRIu64 "%13"PRIu64 "%13"PRIu64
- " %14s %14.2f", node_uuid, files, size, lookup,
- failures, status, elapsed);
- i++;
- } while (i <= counter);
+ GF_ASSERT(frame->local);
+ local = frame->local;
-done:
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volRebalance", msg, rsp.op_ret,
- status_rcd, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to stop volume");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (rsp.op_ret == 0) {
+ rsp_dict = dict_new();
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len,
+ &rsp_dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
goto out;
+ }
}
-#endif
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
+
+ ret = cli_xml_output_generic_volume("volStop", rsp_dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "Unable to get volname from dict");
+ goto out;
+ }
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ cli_err("volume stop: %s: failed: %s", volname, rsp.op_errstr);
+ else if (rsp.op_ret)
+ cli_err("volume stop: %s: failed", volname);
+ else
+ cli_out("volume stop: %s: success", volname);
+
+ ret = rsp.op_ret;
out:
- if (rsp.op_errstr)
- free (rsp.op_errstr); //malloced by xdr
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val); //malloced by xdr
- if (dict)
- dict_unref (dict);
- if (local_dict)
- dict_unref (local_dict);
- if (local)
- cli_local_wipe (local);
- cli_cmd_broadcast_response (ret);
- return ret;
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ return ret;
}
-int
-gf_cli3_1_rename_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli_print_rebalance_status(dict_t *dict, enum gf_task_types task_type)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
+ int ret = -1;
+ int count = 0;
+ int i = 1;
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+ gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ char *node_name = NULL;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ double elapsed = 0;
+ char *status_str = NULL;
+ char *size_str = NULL;
+ int32_t hrs = 0;
+ uint32_t min = 0;
+ uint32_t sec = 0;
+ gf_boolean_t fix_layout = _gf_false;
+ uint64_t max_time = 0;
+ uint64_t max_elapsed = 0;
+ uint64_t time_left = 0;
+ gf_boolean_t show_estimates = _gf_false;
+
+ ret = dict_get_int32_sizen(dict, "count", &count);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "count not set");
+ goto out;
+ }
+
+ for (i = 1; i <= count; i++) {
+ keylen = snprintf(key, sizeof(key), "status-%d", i);
+ ret = dict_get_int32n(dict, key, keylen, (int32_t *)&status_rcd);
+ /* If information from a node is missing we should skip
+ * the node and try to fetch information of other nodes.
+ * If information is not found for all nodes, we should
+ * error out.
+ */
+ if (!ret)
+ break;
+ if (ret && i == count) {
+ gf_log("cli", GF_LOG_TRACE, "failed to get status");
+ goto out;
+ }
+ }
+
+ /* Fix layout will be sent to all nodes for the volume
+ so every status should be of type
+ GF_DEFRAG_STATUS_LAYOUT_FIX*
+ */
+
+ if ((task_type == GF_TASK_TYPE_REBALANCE) &&
+ (status_rcd >= GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED)) {
+ fix_layout = _gf_true;
+ }
+
+ if (fix_layout) {
+ cli_out("%35s %41s %27s", "Node", "status", "run time in h:m:s");
+ cli_out("%35s %41s %27s", "---------", "-----------", "------------");
+ } else {
+ cli_out("%40s %16s %13s %13s %13s %13s %20s %18s", "Node",
+ "Rebalanced-files", "size", "scanned", "failures", "skipped",
+ "status",
+ "run time in"
+ " h:m:s");
+ cli_out("%40s %16s %13s %13s %13s %13s %20s %18s", "---------",
+ "-----------", "-----------", "-----------", "-----------",
+ "-----------", "------------", "--------------");
+ }
+
+ for (i = 1; i <= count; i++) {
+ /* Reset the variables to prevent carryover of values */
+ node_name = NULL;
+ files = 0;
+ size = 0;
+ lookup = 0;
+ skipped = 0;
+ status_str = NULL;
+ elapsed = 0;
+ time_left = 0;
+
+ /* Check if status is NOT_STARTED, and continue early */
+ keylen = snprintf(key, sizeof(key), "status-%d", i);
+
+ ret = dict_get_int32n(dict, key, keylen, (int32_t *)&status_rcd);
+ if (ret == -ENOENT) {
+ gf_log("cli", GF_LOG_TRACE, "count %d %d", count, i);
+ gf_log("cli", GF_LOG_TRACE, "failed to get status");
+ gf_log("cli", GF_LOG_ERROR, "node down and has failed to set dict");
+ continue;
+ /* skip this node if value not available*/
+ } else if (ret) {
+ gf_log("cli", GF_LOG_TRACE, "count %d %d", count, i);
+ gf_log("cli", GF_LOG_TRACE, "failed to get status");
+ continue;
+ /* skip this node if value not available*/
+ }
+
+ if (GF_DEFRAG_STATUS_NOT_STARTED == status_rcd)
+ continue;
+
+ if (GF_DEFRAG_STATUS_STARTED == status_rcd)
+ show_estimates = _gf_true;
+
+ keylen = snprintf(key, sizeof(key), "node-name-%d", i);
+ ret = dict_get_strn(dict, key, keylen, &node_name);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get node-name");
- if (-1 == req->rpc_status) {
- goto out;
- }
+ snprintf(key, sizeof(key), "files-%d", i);
+ ret = dict_get_uint64(dict, key, &files);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get file count");
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ snprintf(key, sizeof(key), "size-%d", i);
+ ret = dict_get_uint64(dict, key, &size);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get size of xfer");
+ snprintf(key, sizeof(key), "lookups-%d", i);
+ ret = dict_get_uint64(dict, key, &lookup);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get lookedup file count");
- gf_log ("cli", GF_LOG_INFO, "Received resp to probe");
- snprintf (msg, sizeof (msg), "Rename volume %s",
- (rsp.op_ret) ? "unsuccessful": "successful");
+ snprintf(key, sizeof(key), "failures-%d", i);
+ ret = dict_get_uint64(dict, key, &failures);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get failures count");
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volRename", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ snprintf(key, sizeof(key), "skipped-%d", i);
+ ret = dict_get_uint64(dict, key, &skipped);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get skipped count");
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
+ /* For remove-brick include skipped count into failure count*/
+ if (task_type != GF_TASK_TYPE_REBALANCE) {
+ failures += skipped;
+ skipped = 0;
+ }
-out:
- cli_cmd_broadcast_response (ret);
- return ret;
-}
+ snprintf(key, sizeof(key), "run-time-%d", i);
+ ret = dict_get_double(dict, key, &elapsed);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get run-time");
-int
-gf_cli3_1_reset_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
+ snprintf(key, sizeof(key), "time-left-%d", i);
+ ret = dict_get_uint64(dict, key, &time_left);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get time left");
- if (-1 == req->rpc_status) {
- goto out;
- }
+ if (elapsed > max_elapsed)
+ max_elapsed = elapsed;
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ if (time_left > max_time)
+ max_time = time_left;
- gf_log ("cli", GF_LOG_INFO, "Received resp to reset");
+ /* Check for array bound */
+ if (status_rcd >= GF_DEFRAG_STATUS_MAX)
+ status_rcd = GF_DEFRAG_STATUS_MAX;
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg), "reset volume %s",
- (rsp.op_ret) ? "unsuccessful": "successful");
+ status_str = cli_vol_task_status_str[status_rcd];
+ size_str = gf_uint64_2human_readable(size);
+ hrs = elapsed / 3600;
+ min = ((uint64_t)elapsed % 3600) / 60;
+ sec = ((uint64_t)elapsed % 3600) % 60;
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volReset", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
+ if (fix_layout) {
+ cli_out("%35s %50s %8d:%d:%d", node_name, status_str, hrs, min,
+ sec);
+ } else {
+ if (size_str) {
+ cli_out("%40s %16" PRIu64
+ " %13s"
+ " %13" PRIu64 " %13" PRIu64 " %13" PRIu64
+ " %20s "
+ "%8d:%02d:%02d",
+ node_name, files, size_str, lookup, failures, skipped,
+ status_str, hrs, min, sec);
+ } else {
+ cli_out("%40s %16" PRIu64 " %13" PRIu64 " %13" PRIu64
+ " %13" PRIu64 " %13" PRIu64
+ " %20s"
+ " %8d:%02d:%02d",
+ node_name, files, size, lookup, failures, skipped,
+ status_str, hrs, min, sec);
+ }
+ }
+ GF_FREE(size_str);
+ }
+
+ /* Max time will be non-zero if rebalance is still running */
+ if (max_time) {
+ hrs = max_time / 3600;
+ min = (max_time % 3600) / 60;
+ sec = (max_time % 3600) % 60;
+
+ if (hrs < REBAL_ESTIMATE_SEC_UPPER_LIMIT) {
+ cli_out(
+ "Estimated time left for rebalance to "
+ "complete : %8d:%02d:%02d",
+ hrs, min, sec);
+ } else {
+ cli_out(
+ "Estimated time left for rebalance to "
+ "complete : > 2 months. Please try again "
+ "later.");
+ }
+ } else {
+ /* Rebalance will return 0 if it could not calculate the
+ * estimates or if it is complete.
+ */
+ if (!show_estimates) {
+ goto out;
+ }
+ if (max_elapsed <= REBAL_ESTIMATE_START_TIME) {
+ cli_out(
+ "The estimated time for rebalance to complete "
+ "will be unavailable for the first 10 "
+ "minutes.");
+ } else {
+ cli_out(
+ "Rebalance estimated time unavailable. Please "
+ "try again later.");
}
-#endif
-
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
-
+ }
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ return ret;
}
-int
-gf_cli3_1_set_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli_defrag_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- dict_t *dict = NULL;
- char *help_str = NULL;
- char msg[1024] = {0,};
-
- if (-1 == req->rpc_status) {
- goto out;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ call_frame_t *frame = NULL;
+ int cmd = 0;
+ int ret = -1;
+ dict_t *dict = NULL;
+ char msg[1024] = {
+ 0,
+ };
+ char *task_id_str = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "Failed to get volname");
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(local->dict, "rebalance-command",
+ (int32_t *)&cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get command");
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret < 0) {
+ gf_log("glusterd", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
}
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
+ if (!((cmd == GF_DEFRAG_CMD_STOP) || (cmd == GF_DEFRAG_CMD_STATUS)) &&
+ !(global_state->mode & GLUSTER_MODE_XML)) {
+ ret = dict_get_str_sizen(dict, GF_REBALANCE_TID_KEY, &task_id_str);
+ if (ret) {
+ gf_log("cli", GF_LOG_WARNING, "failed to get %s from dict",
+ GF_REBALANCE_TID_KEY);
}
+ if (rsp.op_ret && strcmp(rsp.op_errstr, "")) {
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ } else {
+ if (!rsp.op_ret) {
+ /* append errstr in the cli msg for successful
+ * case since unlock failures can be highlighted
+ * event though rebalance command was successful
+ */
+ snprintf(msg, sizeof(msg),
+ "Rebalance on %s has been "
+ "started successfully. Use "
+ "rebalance status command to"
+ " check status of the "
+ "rebalance process.\nID: %s",
+ volname, task_id_str);
+ } else {
+ snprintf(msg, sizeof(msg),
+ "Starting rebalance on volume %s has "
+ "been unsuccessful.",
+ volname);
+ }
+ }
+ goto done;
+ }
+
+ if (cmd == GF_DEFRAG_CMD_STOP) {
+ if (rsp.op_ret == -1) {
+ if (strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg), "rebalance volume %s stop failed",
+ volname);
+ goto done;
+ } else {
+ /* append errstr in the cli msg for successful case
+ * since unlock failures can be highlighted event though
+ * rebalance command was successful */
+ snprintf(msg, sizeof(msg),
+ "rebalance process may be in the middle of a "
+ "file migration.\nThe process will be fully "
+ "stopped once the migration of the file is "
+ "complete.\nPlease check rebalance process "
+ "for completion before doing any further "
+ "brick related tasks on the volume.\n%s",
+ rsp.op_errstr);
+ }
+ }
+ if (cmd == GF_DEFRAG_CMD_STATUS) {
+ if (rsp.op_ret == -1) {
+ if (strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg),
+ "Failed to get the status of rebalance process");
+ goto done;
+ } else {
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ }
+ }
- gf_log ("cli", GF_LOG_INFO, "Received resp to set");
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_rebalance(cmd, dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ goto out;
+ }
- dict = dict_new ();
+ ret = gf_cli_print_rebalance_status(dict, GF_TASK_TYPE_REBALANCE);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, "Failed to print rebalance status");
- if (!dict) {
- ret = -1;
- goto out;
- }
+done:
+ if (global_state->mode & GLUSTER_MODE_XML)
+ cli_xml_output_str("volRebalance", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ else {
+ if (rsp.op_ret)
+ cli_err("volume rebalance: %s: failed%s%s", volname,
+ strlen(msg) ? ": " : "", msg);
+ else
+ cli_out("volume rebalance: %s: success%s%s", volname,
+ strlen(msg) ? ": " : "", msg);
+ }
+ ret = rsp.op_ret;
- ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+out:
+ gf_free_xdr_cli_rsp(rsp);
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ return ret;
+}
+static int
+gf_cli_rename_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = {
+ 0,
+ };
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to probe");
+ snprintf(msg, sizeof(msg), "Rename volume %s",
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str("volRename", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
if (ret)
- goto out;
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- if (dict_get_str (dict, "help-str", &help_str) && !msg[0])
- snprintf (msg, sizeof (msg), "Set volume %s",
- (rsp.op_ret) ? "unsuccessful": "successful");
+ if (rsp.op_ret)
+ cli_err("volume rename: failed");
+ else
+ cli_out("volume rename: success");
-#if (HAVE_LIB_XML)
- if ((global_state->mode & GLUSTER_MODE_XML) && (help_str == NULL)) {
- ret = cli_xml_output_str ("volSet", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ ret = rsp.op_ret;
+
+out:
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
+static int
+gf_cli_reset_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = {
+ 0,
+ };
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to reset");
+
+ if (strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg), "reset volume %s",
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str("volReset", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", ((help_str == NULL) ? msg : help_str));
+ if (rsp.op_ret)
+ cli_err("volume reset: failed: %s", msg);
+ else
+ cli_out("volume reset: success: %s", msg);
- ret = rsp.op_ret;
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
}
-int
-gf_cli3_1_add_brick_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli_ganesha_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
- if (-1 == req->rpc_status) {
- goto out;
- }
+ GF_ASSERT(myframe);
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
- gf_log ("cli", GF_LOG_INFO, "Received resp to add brick");
+ gf_log("cli", GF_LOG_DEBUG, "Received resp to ganesha");
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg), "Add Brick %s",
- (rsp.op_ret) ? "unsuccessful": "successful");
+ dict = dict_new();
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volAddBrick", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
- if (rsp.op_ret)
- cli_err ("%s", msg);
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, ""))
+ cli_err("nfs-ganesha: failed: %s", rsp.op_errstr);
else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
+ cli_err("nfs-ganesha: failed");
+ }
-out:
- cli_cmd_broadcast_response (ret);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
- return ret;
-}
+ else {
+ cli_out("nfs-ganesha : success ");
+ }
-int
-gf_cli3_remove_brick_status_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- char *status = "unknown";
- int ret = -1;
- uint64_t files = 0;
- uint64_t size = 0;
- uint64_t lookup = 0;
- dict_t *dict = NULL;
- //char msg[1024] = {0,};
- char key[256] = {0,};
- int32_t i = 1;
- int32_t counter = 0;
- char *node_uuid = 0;
- gf_defrag_status_t status_rcd = GF_DEFRAG_STATUS_NOT_STARTED;
- uint64_t failures = 0;
- double elapsed = 0;
-
-
- if (-1 == req->rpc_status) {
- goto out;
- }
+ ret = rsp.op_ret;
- ret = xdr_to_generic (*iov, &rsp,
- (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+out:
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ return ret;
+}
- ret = rsp.op_ret;
- if (rsp.op_ret == -1) {
- if (strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
- else
- cli_err ("failed to get the status of "
- "remove-brick process");
- goto out;
+static char *
+is_server_debug_xlator(void *myframe)
+{
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+ char **words = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char *debug_xlator = NULL;
+
+ frame = myframe;
+ local = frame->local;
+ words = (char **)local->words;
+
+ while (*words != NULL) {
+ if (strstr(*words, "trace") == NULL &&
+ strstr(*words, "error-gen") == NULL) {
+ words++;
+ continue;
+ }
+
+ key = *words;
+ words++;
+ value = *words;
+ if (value == NULL)
+ break;
+ if (strstr(value, "client")) {
+ words++;
+ continue;
+ } else {
+ if (!(strstr(value, "posix") || strstr(value, "acl") ||
+ strstr(value, "locks") || strstr(value, "io-threads") ||
+ strstr(value, "marker") || strstr(value, "index"))) {
+ words++;
+ continue;
+ } else {
+ debug_xlator = gf_strdup(key);
+ break;
+ }
}
+ }
- if (rsp.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
+ return debug_xlator;
+}
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- }
- }
+static int
+gf_cli_set_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ char *help_str = NULL;
+ char msg[1024] = {
+ 0,
+ };
+ char *debug_xlator = NULL;
+ char tmp_str[512] = {
+ 0,
+ };
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to set");
+
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+
+ /* For brick processes graph change does not happen on the fly.
+ * The process has to be restarted. So this is a check from the
+ * volume set option such that if debug xlators such as trace/errorgen
+ * are provided in the set command, warn the user.
+ */
+ debug_xlator = is_server_debug_xlator(myframe);
+
+ if (dict_get_str_sizen(dict, "help-str", &help_str) && !msg[0])
+ snprintf(msg, sizeof(msg), "Set volume %s",
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+ if (rsp.op_ret == 0 && debug_xlator) {
+ snprintf(tmp_str, sizeof(tmp_str),
+ "\n%s translator has been "
+ "added to the server volume file. Please restart the"
+ " volume for enabling the translator",
+ debug_xlator);
+ }
+
+ if ((global_state->mode & GLUSTER_MODE_XML) && (help_str == NULL)) {
+ ret = cli_xml_output_str("volSet", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- ret = dict_get_int32 (dict, "count", &counter);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "count not set");
- goto out;
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, ""))
+ cli_err("volume set: failed: %s", rsp.op_errstr);
+ else
+ cli_err("volume set: failed");
+ } else {
+ if (help_str == NULL) {
+ if (debug_xlator == NULL)
+ cli_out("volume set: success");
+ else
+ cli_out("volume set: success%s", tmp_str);
+ } else {
+ cli_out("%s", help_str);
}
+ }
+ ret = rsp.op_ret;
- cli_out ("%40s %16s %13s %13s %13s %14s %s", "Node", "Rebalanced-files",
- "size", "scanned", "failures", "status", "run-time in secs");
- cli_out ("%40s %16s %13s %13s %13s %14s %14s", "---------",
- "-----------", "-----------", "-----------", "-----------",
- "------------", "------------");
-
- do {
- snprintf (key, 256, "node-uuid-%d", i);
- ret = dict_get_str (dict, key, &node_uuid);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get node-uuid");
+out:
+ if (dict)
+ dict_unref(dict);
+ GF_FREE(debug_xlator);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
- memset (key, 0, 256);
- snprintf (key, 256, "files-%d", i);
- ret = dict_get_uint64 (dict, key, &files);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get file count");
+int
+gf_cli_add_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = {
+ 0,
+ };
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to add brick");
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg), "Add Brick %s",
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str("volAddBrick", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- memset (key, 0, 256);
- snprintf (key, 256, "size-%d", i);
- ret = dict_get_uint64 (dict, key, &size);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get size of xfer");
+ if (rsp.op_ret)
+ cli_err("volume add-brick: failed: %s", rsp.op_errstr);
+ else
+ cli_out("volume add-brick: success");
+ ret = rsp.op_ret;
- memset (key, 0, 256);
- snprintf (key, 256, "lookups-%d", i);
- ret = dict_get_uint64 (dict, key, &lookup);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get lookedup file count");
+out:
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
- memset (key, 0, 256);
- snprintf (key, 256, "status-%d", i);
- ret = dict_get_int32 (dict, key, (int32_t *)&status_rcd);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get status");
+static int
+gf_cli3_remove_brick_status_cbk(struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ char msg[1024] = {
+ 0,
+ };
+ int32_t command = 0;
+ gf1_op_commands cmd = GF_OP_CMD_NONE;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ const char *cmd_str;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(local->dict, "command", &command);
+ if (ret)
+ goto out;
+
+ cmd = command;
+
+ switch (cmd) {
+ case GF_OP_CMD_STOP:
+ cmd_str = "stop";
+ break;
+ case GF_OP_CMD_STATUS:
+ cmd_str = "status";
+ break;
+ default:
+ cmd_str = "unknown";
+ break;
+ }
+
+ ret = rsp.op_ret;
+ if (rsp.op_ret == -1) {
+ if (strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "volume remove-brick %s: failed: %s",
+ cmd_str, rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg), "volume remove-brick %s: failed",
+ cmd_str);
- snprintf (key, 256, "failures-%d", i);
- ret = dict_get_uint64 (dict, key, &failures);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "Failed to get failure on files");
+ if (global_state->mode & GLUSTER_MODE_XML)
+ goto xml_output;
- memset (key, 0, 256);
- snprintf (key, 256, "run-time-%d", i);
- ret = dict_get_double (dict, key, &elapsed);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "Failed to get run-time");
-
- switch (status_rcd) {
- case GF_DEFRAG_STATUS_NOT_STARTED:
- status = "not started";
- break;
- case GF_DEFRAG_STATUS_STARTED:
- status = "in progress";
- break;
- case GF_DEFRAG_STATUS_STOPPED:
- status = "stopped";
- break;
- case GF_DEFRAG_STATUS_COMPLETE:
- status = "completed";
- break;
- case GF_DEFRAG_STATUS_FAILED:
- status = "failed";
- break;
- }
- cli_out ("%40s %16"PRIu64 "%13"PRIu64 "%13"PRIu64 "%13"PRIu64
- " %14s %14.2f", node_uuid, files, size, lookup,
- failures, status, elapsed);
- i++;
- } while (i <= counter);
+ cli_err("%s", msg);
+ goto out;
+ }
- //TODO: Do proper xml output
- /*
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volRemoveBrick", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
- cli_out ("%s", msg);
- */
-out:
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val); //malloced by xdr
- if (dict)
- dict_unref (dict);
- cli_cmd_broadcast_response (ret);
- return ret;
-}
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret < 0) {
+ strncpy(msg, DICT_UNSERIALIZE_FAIL, sizeof(msg));
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ rsp.op_ret = -1;
+ goto xml_output;
+ }
-int
-gf_cli3_1_remove_brick_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
- gf1_op_commands cmd = GF_OP_CMD_NONE;
- char *cmd_str = "unknown";
- cli_local_t *local = NULL;
- call_frame_t *frame = NULL;
-
- if (-1 == req->rpc_status) {
- goto out;
+ gf_log("cli", GF_LOG_ERROR, "%s", msg);
+ goto out;
}
+ }
- frame = myframe;
- local = frame->local;
+xml_output:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (strcmp(rsp.op_errstr, "")) {
+ ret = cli_xml_output_vol_remove_brick(_gf_true, dict, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr,
+ "volRemoveBrick");
+ } else {
+ ret = cli_xml_output_vol_remove_brick(_gf_true, dict, rsp.op_ret,
+ rsp.op_errno, msg,
+ "volRemoveBrick");
+ }
+ goto out;
+ }
+
+ ret = gf_cli_print_rebalance_status(dict, GF_TASK_TYPE_REMOVE_BRICK);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to print remove-brick rebalance status");
+ goto out;
+ }
+
+ if ((cmd == GF_OP_CMD_STOP) && (rsp.op_ret == 0)) {
+ cli_out(
+ "'remove-brick' process may be in the middle of a "
+ "file migration.\nThe process will be fully stopped "
+ "once the migration of the file is complete.\nPlease "
+ "check remove-brick process for completion before "
+ "doing any further brick related tasks on the "
+ "volume.");
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+out:
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
- ret = dict_get_int32 (local->dict, "command", (int32_t *)&cmd);
+static int
+gf_cli_remove_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = {
+ 0,
+ };
+ gf1_op_commands cmd = GF_OP_CMD_NONE;
+ char *cmd_str = "unknown";
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ char *task_id_str = NULL;
+ dict_t *rsp_dict = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(local->dict, "command", (int32_t *)&cmd);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "failed to get command");
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "failed to get command");
- goto out;
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
}
+ }
- switch (cmd) {
-
+ switch (cmd) {
+ case GF_OP_CMD_DETACH_START:
case GF_OP_CMD_START:
- cmd_str = "start";
- break;
+ cmd_str = "start";
+
+ ret = dict_get_str_sizen(rsp_dict, GF_REMOVE_BRICK_TID_KEY,
+ &task_id_str);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "remove-brick-id is not present in dict");
+ }
+ break;
case GF_OP_CMD_COMMIT:
- cmd_str = "commit";
- break;
+ cmd_str = "commit";
+ break;
case GF_OP_CMD_COMMIT_FORCE:
- cmd_str = "commit force";
- break;
+ cmd_str = "commit force";
+ break;
default:
- cmd_str = "unknown";
- break;
- }
-
- gf_log ("cli", GF_LOG_INFO, "Received resp to remove brick");
-
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg), "Remove Brick %s %s", cmd_str,
- (rsp.op_ret) ? "unsuccessful": "successful");
-
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volRemoveBrick", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
+ cmd_str = "unknown";
+ break;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to remove brick");
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg), "Remove Brick %s %s", cmd_str,
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_remove_brick(_gf_false, rsp_dict, rsp.op_ret,
+ rsp.op_errno, msg,
+ "volRemoveBrick");
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ cli_err("volume remove-brick %s: failed: %s", cmd_str, msg);
+ } else {
+ cli_out("volume remove-brick %s: success", cmd_str);
+ if (GF_OP_CMD_START == cmd && task_id_str != NULL)
+ cli_out("ID: %s", task_id_str);
+ if (GF_OP_CMD_COMMIT == cmd)
+ cli_out(
+ "Check the removed bricks to ensure all files "
+ "are migrated.\nIf files with data are "
+ "found on the brick path, copy them via a "
+ "gluster mount point before re-purposing the "
+ "removed brick. ");
+ }
+
+ ret = rsp.op_ret;
out:
- if (frame)
- frame->local = NULL;
-
- if (local) {
- dict_unref (local->dict);
- cli_local_wipe (local);
- }
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
- cli_cmd_broadcast_response (ret);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
+ if (rsp_dict)
+ dict_unref(rsp_dict);
- return ret;
+ return ret;
}
+static int
+gf_cli_reset_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ const char *rb_operation_str = NULL;
+ dict_t *rsp_dict = NULL;
+ char msg[1024] = {
+ 0,
+ };
+ char *reset_op = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(local->dict, "operation", &reset_op);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "dict_get on operation failed");
+ goto out;
+ }
+
+ if (strcmp(reset_op, "GF_RESET_OP_START") &&
+ strcmp(reset_op, "GF_RESET_OP_COMMIT") &&
+ strcmp(reset_op, "GF_RESET_OP_COMMIT_FORCE")) {
+ ret = -1;
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+ }
+
+ if (rsp.op_ret && (strcmp(rsp.op_errstr, ""))) {
+ rb_operation_str = rsp.op_errstr;
+ } else {
+ if (!strcmp(reset_op, "GF_RESET_OP_START")) {
+ if (rsp.op_ret)
+ rb_operation_str = "reset-brick start operation failed";
+ else
+ rb_operation_str = "reset-brick start operation successful";
+ } else if (!strcmp(reset_op, "GF_RESET_OP_COMMIT")) {
+ if (rsp.op_ret)
+ rb_operation_str = "reset-brick commit operation failed";
+ else
+ rb_operation_str = "reset-brick commit operation successful";
+ } else if (!strcmp(reset_op, "GF_RESET_OP_COMMIT_FORCE")) {
+ if (rsp.op_ret)
+ rb_operation_str = "reset-brick commit force operation failed";
+ else
+ rb_operation_str =
+ "reset-brick commit force operation successful";
+ }
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to reset brick");
+ snprintf(msg, sizeof(msg), "%s",
+ rb_operation_str ? rb_operation_str : "Unknown operation");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_replace_brick(rsp_dict, rsp.op_ret,
+ rsp.op_errno, msg);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+ if (rsp.op_ret)
+ cli_err("volume reset-brick: failed: %s", msg);
+ else
+ cli_out("volume reset-brick: success: %s", msg);
+ ret = rsp.op_ret;
-int
-gf_cli3_1_replace_brick_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- cli_local_t *local = NULL;
- call_frame_t *frame = NULL;
- dict_t *dict = NULL;
- char *src_brick = NULL;
- char *dst_brick = NULL;
- char *status_reply = NULL;
- gf1_cli_replace_op replace_op = 0;
- char *rb_operation_str = NULL;
- dict_t *rsp_dict = NULL;
- char msg[1024] = {0,};
-
- if (-1 == req->rpc_status) {
- goto out;
- }
-
- frame = (call_frame_t *) myframe;
+out:
+ if (frame)
+ frame->local = NULL;
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ if (local)
+ cli_local_wipe(local);
- local = frame->local;
- GF_ASSERT (local);
- dict = local->dict;
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ if (rsp_dict)
+ dict_unref(rsp_dict);
- ret = dict_get_int32 (dict, "operation", (int32_t *)&replace_op);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "dict_get on operation failed");
- goto out;
+ return ret;
+}
+static int
+gf_cli_replace_brick_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ const char *rb_operation_str = NULL;
+ dict_t *rsp_dict = NULL;
+ char msg[1024] = {
+ 0,
+ };
+ char *replace_op = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(local->dict, "operation", &replace_op);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "dict_get on operation failed");
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
}
+ }
- switch (replace_op) {
- case GF_REPLACE_OP_START:
- if (rsp.op_ret)
- rb_operation_str = "replace-brick failed to start";
- else
- rb_operation_str = "replace-brick started successfully";
- break;
+ if (!strcmp(replace_op, "GF_REPLACE_OP_COMMIT_FORCE")) {
+ if (rsp.op_ret || ret)
+ rb_operation_str = "replace-brick commit force operation failed";
+ else
+ rb_operation_str =
+ "replace-brick commit force operation successful";
+ } else {
+ gf_log(frame->this->name, GF_LOG_DEBUG, "Unknown operation");
+ }
+
+ if (rsp.op_ret && (strcmp(rsp.op_errstr, ""))) {
+ rb_operation_str = rsp.op_errstr;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to replace brick");
+ snprintf(msg, sizeof(msg), "%s",
+ rb_operation_str ? rb_operation_str : "Unknown operation");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_replace_brick(rsp_dict, rsp.op_ret,
+ rsp.op_errno, msg);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- case GF_REPLACE_OP_STATUS:
-
- if (rsp.op_ret || ret)
- rb_operation_str = "replace-brick status unknown";
- else {
- if (rsp.dict.dict_len) {
- /* Unserialize the dictionary */
- rsp_dict = dict_new ();
-
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &rsp_dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- }
- }
- ret = dict_get_str (rsp_dict, "status-reply",
- &status_reply);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to"
- "get status");
- goto out;
- }
-
- rb_operation_str = status_reply;
- }
+ if (rsp.op_ret)
+ cli_err("volume replace-brick: failed: %s", msg);
+ else
+ cli_out("volume replace-brick: success: %s", msg);
+ ret = rsp.op_ret;
- break;
+out:
+ if (frame)
+ frame->local = NULL;
- case GF_REPLACE_OP_PAUSE:
- if (rsp.op_ret)
- rb_operation_str = "replace-brick pause failed";
- else
- rb_operation_str = "replace-brick paused successfully";
- break;
+ if (local)
+ cli_local_wipe(local);
- case GF_REPLACE_OP_ABORT:
- if (rsp.op_ret)
- rb_operation_str = "replace-brick abort failed";
- else
- rb_operation_str = "replace-brick aborted successfully";
- break;
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ if (rsp_dict)
+ dict_unref(rsp_dict);
- case GF_REPLACE_OP_COMMIT:
- case GF_REPLACE_OP_COMMIT_FORCE:
- ret = dict_get_str (dict, "src-brick", &src_brick);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "dict_get on src-brick failed");
- goto out;
- }
+ return ret;
+}
- ret = dict_get_str (dict, "dst-brick", &dst_brick);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "dict_get on dst-brick failed");
- goto out;
- }
+static int
+gf_cli_log_rotate_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = {
+ 0,
+ };
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_DEBUG, "Received resp to log rotate");
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg), "log rotate %s",
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str("volLogRotate", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+ if (rsp.op_ret)
+ cli_err("volume log-rotate: failed: %s", msg);
+ else
+ cli_out("volume log-rotate: success");
+ ret = rsp.op_ret;
- if (rsp.op_ret || ret)
- rb_operation_str = "replace-brick commit failed";
- else
- rb_operation_str = "replace-brick commit successful";
+out:
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
- break;
+ return ret;
+}
- default:
- gf_log ("", GF_LOG_DEBUG,
- "Unknown operation");
- break;
- }
+static int
+gf_cli_sync_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = {
+ 0,
+ };
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_DEBUG, "Received resp to sync");
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "volume sync: failed: %s", rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg), "volume sync: %s",
+ (rsp.op_ret) ? "failed" : "success");
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str("volSync", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- if (rsp.op_ret && (strcmp (rsp.op_errstr, ""))) {
- rb_operation_str = rsp.op_errstr;
- }
+ if (rsp.op_ret)
+ cli_err("%s", msg);
+ else
+ cli_out("%s", msg);
+ ret = rsp.op_ret;
- gf_log ("cli", GF_LOG_INFO, "Received resp to replace brick");
- snprintf (msg,sizeof (msg), "%s",
- rb_operation_str ? rb_operation_str : "Unknown operation");
+out:
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volReplaceBrick", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
+static int
+print_quota_list_usage_output(cli_local_t *local, char *path, int64_t avail,
+ char *sl_str, quota_limits_t *limits,
+ quota_meta_t *used_space, gf_boolean_t sl,
+ gf_boolean_t hl, double sl_num,
+ gf_boolean_t limit_set)
+{
+ int32_t ret = -1;
+ char *used_str = NULL;
+ char *avail_str = NULL;
+ char *hl_str = NULL;
+ char *sl_val = NULL;
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_quota_xml_output(local, path, limits->hl, sl_str, sl_num,
+ used_space->size, avail, sl ? "Yes" : "No",
+ hl ? "Yes" : "No", limit_set);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to output in xml format for quota list command");
}
-#endif
+ goto out;
+ }
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
+ used_str = gf_uint64_2human_readable(used_space->size);
-out:
- if (frame)
- frame->local = NULL;
+ if (limit_set) {
+ hl_str = gf_uint64_2human_readable(limits->hl);
+ sl_val = gf_uint64_2human_readable(sl_num);
- if (local) {
- dict_unref (local->dict);
- cli_local_wipe (local);
+ if (!used_str) {
+ cli_out("%-40s %7s %7s(%s) %8" PRIu64 "%9" PRIu64
+ ""
+ "%15s %18s",
+ path, hl_str, sl_str, sl_val, used_space->size, avail,
+ sl ? "Yes" : "No", hl ? "Yes" : "No");
+ } else {
+ avail_str = gf_uint64_2human_readable(avail);
+ cli_out("%-40s %7s %7s(%s) %8s %7s %15s %20s", path, hl_str, sl_str,
+ sl_val, used_str, avail_str, sl ? "Yes" : "No",
+ hl ? "Yes" : "No");
}
+ } else {
+ cli_out("%-36s %10s %10s %14s %9s %15s %18s", path, "N/A", "N/A",
+ used_str, "N/A", "N/A", "N/A");
+ }
- cli_cmd_broadcast_response (ret);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- if (rsp_dict)
- dict_unref (rsp_dict);
+ ret = 0;
+out:
+ GF_FREE(hl_str);
+ GF_FREE(used_str);
+ GF_FREE(avail_str);
+ GF_FREE(sl_val);
- return ret;
+ return ret;
}
-
static int
-gf_cli3_1_log_rotate_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+print_quota_list_object_output(cli_local_t *local, char *path, int64_t avail,
+ char *sl_str, quota_limits_t *limits,
+ quota_meta_t *used_space, gf_boolean_t sl,
+ gf_boolean_t hl, double sl_num,
+ gf_boolean_t limit_set)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
+ int32_t ret = -1;
+ int64_t sl_val = sl_num;
- if (-1 == req->rpc_status) {
- goto out;
- }
-
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_quota_object_xml_output(local, path, sl_str, sl_val, limits,
+ used_space, avail, sl ? "Yes" : "No",
+ hl ? "Yes" : "No", limit_set);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to output in xml format for quota list command");
+ }
+ goto out;
+ }
+
+ if (limit_set) {
+ cli_out("%-40s %9" PRIu64 " %9s(%" PRId64 ") %10" PRIu64
+ ""
+ "%10" PRIu64 " %11" PRIu64 " %15s %20s",
+ path, limits->hl, sl_str, sl_val, used_space->file_count,
+ used_space->dir_count, avail, sl ? "Yes" : "No",
+ hl ? "Yes" : "No");
+ } else {
+ cli_out("%-40s %9s %9s %10" PRIu64 " %10" PRIu64 " %11s %15s %20s",
+ path, "N/A", "N/A", used_space->file_count,
+ used_space->dir_count, "N/A", "N/A", "N/A");
+ }
+ ret = 0;
- gf_log ("cli", GF_LOG_DEBUG, "Received resp to log rotate");
+out:
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg), "log rotate %s",
- (rsp.op_ret) ? "unsuccessful": "successful");
+ return ret;
+}
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volLogRotate", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
+static int
+print_quota_list_output(cli_local_t *local, char *path, char *default_sl,
+ quota_limits_t *limits, quota_meta_t *used_space,
+ int type, gf_boolean_t limit_set)
+{
+ int64_t avail = 0;
+ char percent_str[20] = {0};
+ char *sl_final = NULL;
+ int ret = -1;
+ double sl_num = 0;
+ gf_boolean_t sl = _gf_false;
+ gf_boolean_t hl = _gf_false;
+ int64_t used_size = 0;
+
+ GF_ASSERT(local);
+ GF_ASSERT(path);
+
+ if (limit_set) {
+ if (limits->sl < 0) {
+ ret = gf_string2percent(default_sl, &sl_num);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "could not convert default soft limit to percent");
+ goto out;
+ }
+ sl_num = (sl_num * limits->hl) / 100;
+ sl_final = default_sl;
+ } else {
+ sl_num = (limits->sl * limits->hl) / 100;
+ ret = snprintf(percent_str, sizeof(percent_str), "%" PRIu64 "%%",
+ limits->sl);
+ if (ret < 0)
goto out;
+ sl_final = percent_str;
}
-#endif
-
- if (rsp.op_ret)
- cli_err ("%s", msg);
+ if (type == GF_QUOTA_OPTION_TYPE_LIST)
+ used_size = used_space->size;
else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
-
+ used_size = used_space->file_count + used_space->dir_count;
+
+ if (limits->hl > used_size) {
+ avail = limits->hl - used_size;
+ hl = _gf_false;
+ if (used_size > sl_num)
+ sl = _gf_true;
+ else
+ sl = _gf_false;
+ } else {
+ avail = 0;
+ hl = sl = _gf_true;
+ }
+ }
+
+ if (type == GF_QUOTA_OPTION_TYPE_LIST)
+ ret = print_quota_list_usage_output(local, path, avail, sl_final,
+ limits, used_space, sl, hl, sl_num,
+ limit_set);
+ else
+ ret = print_quota_list_object_output(local, path, avail, sl_final,
+ limits, used_space, sl, hl, sl_num,
+ limit_set);
out:
- cli_cmd_broadcast_response (ret);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
-
- return ret;
+ return ret;
}
static int
-gf_cli3_1_sync_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+print_quota_list_from_mountdir(cli_local_t *local, char *mountdir,
+ char *default_sl, char *path, int type)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
+ int ret = -1;
+ ssize_t xattr_size = 0;
+ quota_limits_t limits = {
+ 0,
+ };
+ quota_meta_t used_space = {
+ 0,
+ };
+ char *key = NULL;
+ gf_boolean_t limit_set = _gf_true;
+
+ GF_ASSERT(local);
+ GF_ASSERT(mountdir);
+ GF_ASSERT(path);
+
+ if (type == GF_QUOTA_OPTION_TYPE_LIST)
+ key = QUOTA_LIMIT_KEY;
+ else
+ key = QUOTA_LIMIT_OBJECTS_KEY;
+
+ ret = sys_lgetxattr(mountdir, key, (void *)&limits, sizeof(limits));
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to get the xattr %s on %s. Reason : %s", key, mountdir,
+ strerror(errno));
+
+ switch (errno) {
+#if defined(ENODATA)
+ case ENODATA:
+#endif
+#if defined(ENOATTR) && (ENOATTR != ENODATA)
+ case ENOATTR:
+#endif
+ /* If it's an ENOATTR, quota/inode-quota is
+ * configured(limit is set at least for one directory).
+ * The user is trying to issue 'list/list-objects'
+ * command for a directory on which quota limit is
+ * not set and we are showing the used-space in case
+ * of list-usage and showing (dir_count, file_count)
+ * in case of list-objects. Other labels are
+ * shown "N/A".
+ */
- if (-1 == req->rpc_status) {
- goto out;
- }
+ limit_set = _gf_false;
+ goto enoattr;
+ break;
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
+ default:
+ cli_err("%-40s %s", path, strerror(errno));
+ break;
}
- gf_log ("cli", GF_LOG_DEBUG, "Received resp to sync");
-
- if (rsp.op_ret && strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg), "volume sync: %s",
- (rsp.op_ret) ? "unsuccessful": "successful");
+ goto out;
+ }
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volSync", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ limits.hl = ntoh64(limits.hl);
+ limits.sl = ntoh64(limits.sl);
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
+enoattr:
+ xattr_size = sys_lgetxattr(mountdir, QUOTA_SIZE_KEY, NULL, 0);
+ if (xattr_size < (sizeof(int64_t) * 2) &&
+ type == GF_QUOTA_OPTION_TYPE_LIST_OBJECTS) {
+ ret = -1;
+ /* This can happen when glusterfs is upgraded from 3.6 to 3.7
+ * and the xattr healing is not completed.
+ */
+ } else if (xattr_size > (sizeof(int64_t) * 2)) {
+ ret = sys_lgetxattr(mountdir, QUOTA_SIZE_KEY, &used_space,
+ sizeof(used_space));
+ } else if (xattr_size > 0) {
+ /* This is for compatibility.
+ * Older version had only file usage
+ */
+ ret = sys_lgetxattr(mountdir, QUOTA_SIZE_KEY, &(used_space.size),
+ sizeof(used_space.size));
+ used_space.file_count = 0;
+ used_space.dir_count = 0;
+ } else {
+ ret = -1;
+ }
+
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get quota size on path %s: %s",
+ mountdir, strerror(errno));
+ print_quota_list_empty(path, type);
+ goto out;
+ }
+
+ used_space.size = ntoh64(used_space.size);
+ used_space.file_count = ntoh64(used_space.file_count);
+ used_space.dir_count = ntoh64(used_space.dir_count);
+
+ ret = print_quota_list_output(local, path, default_sl, &limits, &used_space,
+ type, limit_set);
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_print_limit_list (char *volname, char *limit_list,
- char *op_errstr)
-{
- int64_t size = 0;
- int64_t limit_value = 0;
- int32_t i, j, k;
- int32_t len = 0, ret = -1;
- char *size_str = NULL;
- char path [PATH_MAX] = {0, };
- char ret_str [1024] = {0, };
- char value [1024] = {0, };
- char mountdir [] = "/tmp/mntXXXXXX";
- char abspath [PATH_MAX] = {0, };
- runner_t runner = {0,};
-
- GF_VALIDATE_OR_GOTO ("cli", volname, out);
- GF_VALIDATE_OR_GOTO ("cli", limit_list, out);
-
- if (!connected)
- goto out;
+static int
+gluster_remove_auxiliary_mount(char *volname)
+{
+ int ret = -1;
+ char mountdir[PATH_MAX] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GLUSTERD_GET_QUOTA_LIST_MOUNT_PATH(mountdir, volname, "/");
+ ret = gf_umount_lazy(this->name, mountdir, 1);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "umount on %s failed, reason : %s",
+ mountdir, strerror(errno));
+ }
+
+ return ret;
+}
- len = strlen (limit_list);
- if (len == 0) {
- cli_err ("%s", op_errstr?op_errstr:"quota limit not set ");
- goto out;
+static int
+gf_cli_print_limit_list_from_dict(cli_local_t *local, char *volname,
+ dict_t *dict, char *default_sl, int count,
+ int op_ret, int op_errno, char *op_errstr)
+{
+ int ret = -1;
+ int i = 0;
+ char key[32] = {
+ 0,
+ };
+ int keylen;
+ char mountdir[PATH_MAX] = {
+ 0,
+ };
+ char *path = NULL;
+ int type = -1;
+
+ if (!dict || count <= 0)
+ goto out;
+
+ ret = dict_get_int32_sizen(dict, "type", &type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get quota type");
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_quota_limit_list_begin(local, op_ret, op_errno,
+ op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
}
+ } else {
+ print_quota_list_header(type);
+ }
- if (mkdtemp (mountdir) == NULL) {
- gf_log ("cli", GF_LOG_WARNING, "failed to create a temporary "
- "mount directory");
- ret = -1;
- goto out;
+ while (count--) {
+ keylen = snprintf(key, sizeof(key), "path%d", i++);
+
+ ret = dict_get_strn(dict, key, keylen, &path);
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_DEBUG, "Path not present in limit list");
+ continue;
}
- /* Mount a temporary client to fetch the disk usage
- * of the directory on which the limit is set.
- */
- ret = runcmd (SBIN_DIR"/glusterfs", "-s",
- "localhost", "--volfile-id", volname, "-l",
- DEFAULT_LOG_FILE_DIRECTORY"/quota-list.log",
- mountdir, NULL);
+ ret = gf_canonicalize_path(path);
+ if (ret)
+ goto out;
+ GLUSTERD_GET_QUOTA_LIST_MOUNT_PATH(mountdir, volname, path);
+ ret = print_quota_list_from_mountdir(local, mountdir, default_sl, path,
+ type);
+ }
+
+out:
+ return ret;
+}
+
+static int
+print_quota_list_from_quotad(call_frame_t *frame, dict_t *rsp_dict)
+{
+ char *path = NULL;
+ char *default_sl = NULL;
+ int ret = -1;
+ cli_local_t *local = NULL;
+ dict_t *gd_rsp_dict = NULL;
+ quota_meta_t used_space = {
+ 0,
+ };
+ quota_limits_t limits = {
+ 0,
+ };
+ quota_limits_t *size_limits = NULL;
+ int32_t type = 0;
+ int32_t success_count = 0;
+
+ GF_ASSERT(frame);
+
+ local = frame->local;
+ gd_rsp_dict = local->dict;
+
+ ret = dict_get_int32_sizen(rsp_dict, "type", &type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get type");
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(rsp_dict, GET_ANCESTRY_PATH_KEY, &path);
+ if (ret) {
+ gf_log("cli", GF_LOG_WARNING, "path key is not present in dict");
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(gd_rsp_dict, "default-soft-limit", &default_sl);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "failed to get default soft limit");
+ goto out;
+ }
+
+ if (type == GF_QUOTA_OPTION_TYPE_LIST) {
+ ret = dict_get_bin(rsp_dict, QUOTA_LIMIT_KEY, (void **)&size_limits);
if (ret) {
- gf_log ("cli", GF_LOG_WARNING, "failed to mount glusterfs client");
- ret = -1;
- goto rm_dir;
+ gf_log("cli", GF_LOG_WARNING, "limit key not present in dict on %s",
+ path);
+ goto out;
}
-
- len = strlen (limit_list);
- if (len == 0) {
- cli_out ("quota limit not set ");
- goto unmount;
+ } else {
+ ret = dict_get_bin(rsp_dict, QUOTA_LIMIT_OBJECTS_KEY,
+ (void **)&size_limits);
+ if (ret) {
+ gf_log("cli", GF_LOG_WARNING,
+ "object limit key not present in dict on %s", path);
+ goto out;
+ }
+ }
+
+ limits.hl = ntoh64(size_limits->hl);
+ limits.sl = ntoh64(size_limits->sl);
+
+ if (type == GF_QUOTA_OPTION_TYPE_LIST)
+ ret = quota_dict_get_meta(rsp_dict, QUOTA_SIZE_KEY,
+ SLEN(QUOTA_SIZE_KEY), &used_space);
+ else
+ ret = quota_dict_get_inode_meta(rsp_dict, QUOTA_SIZE_KEY,
+ SLEN(QUOTA_SIZE_KEY), &used_space);
+
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_WARNING, "size key not present in dict");
+ print_quota_list_empty(path, type);
+ goto out;
+ }
+
+ LOCK(&local->lock);
+ {
+ ret = dict_get_int32_sizen(gd_rsp_dict, "quota-list-success-count",
+ &success_count);
+ if (ret)
+ success_count = 0;
+
+ ret = dict_set_int32_sizen(gd_rsp_dict, "quota-list-success-count",
+ success_count + 1);
+ }
+ UNLOCK(&local->lock);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set quota-list-success-count in dict");
+ goto out;
+ }
+
+ if (success_count == 0) {
+ if (!(global_state->mode & GLUSTER_MODE_XML)) {
+ print_quota_list_header(type);
+ } else {
+ ret = cli_xml_output_vol_quota_limit_list_begin(local, 0, 0, NULL);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
}
+ }
- i = 0;
+ ret = print_quota_list_output(local, path, default_sl, &limits, &used_space,
+ type, _gf_true);
+out:
+ return ret;
+}
- cli_out ("\tpath\t\t limit_set\t size");
- cli_out ("-----------------------------------------------------------"
- "-----------------------");
- while (i < len) {
- j = 0;
- k = 0;
+static void *
+cli_cmd_broadcast_response_detached(void *opaque)
+{
+ int32_t ret = 0;
- while (limit_list [i] != ':') {
- path [k++] = limit_list [i++];
- }
- path [k] = '\0';
+ ret = (intptr_t)opaque;
+ cli_cmd_broadcast_response(ret);
- i++; //skip ':'
+ return NULL;
+}
- while (limit_list [i] != ',' && limit_list [i] != '\0') {
- value [j++] = limit_list[i++];
- }
- value [j] = '\0';
-
- snprintf (abspath, sizeof (abspath), "%s/%s", mountdir, path);
-
- ret = sys_lgetxattr (abspath, "trusted.limit.list", (void *) ret_str, 4096);
- if (ret < 0) {
- cli_out ("%-20s %10s", path, value);
- } else {
- sscanf (ret_str, "%"PRId64",%"PRId64, &size,
- &limit_value);
- size_str = gf_uint64_2human_readable ((uint64_t) size);
- if (size_str == NULL) {
- cli_out ("%-20s %10s %20"PRId64, path,
- value, size);
- } else {
- cli_out ("%-20s %10s %20s", path,
- value, size_str);
- GF_FREE (size_str);
- }
- }
- i++;
- }
+static int32_t
+cli_quota_compare_path(struct list_head *list1, struct list_head *list2)
+{
+ struct list_node *node1 = NULL;
+ struct list_node *node2 = NULL;
+ dict_t *dict1 = NULL;
+ dict_t *dict2 = NULL;
+ char *path1 = NULL;
+ char *path2 = NULL;
+ int ret = 0;
-unmount:
+ node1 = list_entry(list1, struct list_node, list);
+ node2 = list_entry(list2, struct list_node, list);
- runinit (&runner);
- runner_add_args (&runner, "umount",
-#if GF_LINUX_HOST_OS
- "-l",
-#endif
- mountdir, NULL);
- ret = runner_run_reuse (&runner);
- if (ret)
- runner_log (&runner, "cli", GF_LOG_WARNING, "error executing");
- runner_end (&runner);
+ dict1 = node1->ptr;
+ dict2 = node2->ptr;
-rm_dir:
- rmdir (mountdir);
-out:
- return ret;
+ ret = dict_get_str_sizen(dict1, GET_ANCESTRY_PATH_KEY, &path1);
+ if (ret < 0)
+ return 0;
+
+ ret = dict_get_str_sizen(dict2, GET_ANCESTRY_PATH_KEY, &path2);
+ if (ret < 0)
+ return 0;
+
+ return strcmp(path1, path2);
}
-int
-gf_cli3_1_quota_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- dict_t *dict = NULL;
- char *volname = NULL;
- char *limit_list = NULL;
- int32_t type = 0;
- char msg[1024] = {0,};
-
- if (-1 == req->rpc_status) {
- goto out;
- }
+static int
+cli_quotad_getlimit_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ /*TODO: we need to gather the path, hard-limit, soft-limit and used space*/
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ struct list_node *node = NULL;
+ struct list_node *tmpnode = NULL;
+ call_frame_t *frame = NULL;
+ cli_local_t *local = NULL;
+ int32_t list_count = 0;
+ pthread_t th_id = {
+ 0,
+ };
+ int32_t max_count = 0;
+
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ LOCK(&local->lock);
+ {
+ ret = dict_get_int32_sizen(local->dict, "quota-list-count",
+ &list_count);
+ if (ret)
+ list_count = 0;
+
+ list_count++;
+ ret = dict_set_int32_sizen(local->dict, "quota-list-count", list_count);
+ }
+ UNLOCK(&local->lock);
+
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to set quota-list-count in dict");
+ goto out;
+ }
+
+ if (-1 == req->rpc_status) {
+ if (list_count == 0)
+ cli_err(
+ "Connection failed. Please check if quota "
+ "daemon is operational.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ ret = -1;
+ if (strcmp(rsp.op_errstr, ""))
+ cli_err("quota command failed : %s", rsp.op_errstr);
+ else
+ cli_err("quota command : failed");
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
}
- if (rsp.op_ret &&
- strcmp (rsp.op_errstr, "") == 0) {
- snprintf (msg, sizeof (msg), "command unsuccessful %s",
- rsp.op_errstr);
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML)
- goto xml_output;
-#endif
- goto out;
+ node = list_node_add_order(dict, &local->dict_list,
+ cli_quota_compare_path);
+ if (node == NULL) {
+ gf_log("cli", GF_LOG_ERROR, "failed to add node to the list");
+ dict_unref(dict);
+ ret = -1;
+ goto out;
}
+ }
- if (rsp.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
+ ret = dict_get_int32_sizen(local->dict, "max_count", &max_count);
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR, "failed to get max_count");
+ goto out;
+ }
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- }
+ if (list_count == max_count) {
+ list_for_each_entry_safe(node, tmpnode, &local->dict_list, list)
+ {
+ dict = node->ptr;
+ print_quota_list_from_quotad(frame, dict);
+ list_node_del(node);
+ dict_unref(dict);
}
+ }
- ret = dict_get_str (dict, "volname", &volname);
+out:
+ /* Bad Fix: CLI holds the lock to process a command.
+ * When processing quota list command, below sequence of steps executed
+ * in the same thread and causing deadlock
+ *
+ * 1) CLI holds the lock
+ * 2) Send rpc_clnt_submit request to quotad for quota usage
+ * 3) If quotad is down, rpc_clnt_submit invokes cbk function with error
+ * 4) cbk function cli_quotad_getlimit_cbk invokes
+ * cli_cmd_broadcast_response which tries to hold lock to broadcast
+ * the results and hangs, because same thread has already holding
+ * the lock
+ *
+ * Broadcasting response in a separate thread which is not a
+ * good fix. This needs to be re-visted with better solution
+ */
+ if (ret == -1) {
+ ret = pthread_create(&th_id, NULL, cli_cmd_broadcast_response_detached,
+ (void *)-1);
if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get volname");
+ gf_log("cli", GF_LOG_ERROR, "pthread_create failed: %s",
+ strerror(errno));
+ } else {
+ cli_cmd_broadcast_response(ret);
+ }
+ gf_free_xdr_cli_rsp(rsp);
+
+ return ret;
+}
- ret = dict_get_str (dict, "limit_list", &limit_list);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get limit_list");
+static int
+cli_quotad_getlimit(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- ret = dict_get_int32 (dict, "type", &type);
- if (ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get type");
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
- if (type == GF_QUOTA_OPTION_TYPE_LIST) {
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_vol_quota_limit_list
- (volname, limit_list, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
+ dict = data;
+ ret = add_cli_cmd_timeout_to_dict(dict);
- }
-#endif
- if (limit_list) {
- gf_cli3_1_print_limit_list (volname,
- limit_list,
- rsp.op_errstr);
- } else {
- gf_log ("cli", GF_LOG_INFO, "Received resp to quota "
- "command ");
- if (rsp.op_errstr)
- snprintf (msg, sizeof (msg), "%s",
- rsp.op_errstr);
- }
- } else {
- gf_log ("cli", GF_LOG_INFO, "Received resp to quota command ");
- if (rsp.op_errstr)
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg), "successful");
- }
+ ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR, DICT_SERIALIZE_FAIL);
-#if (HAVE_LIB_XML)
-xml_output:
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volQuota", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ goto out;
+ }
- if (strlen (msg) > 0) {
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
- }
+ ret = cli_cmd_submit(global_quotad_rpc, &req, frame, &cli_quotad_clnt,
+ GF_AGGREGATOR_GETLIMIT, NULL, this,
+ cli_quotad_getlimit_cbk, (xdrproc_t)xdr_gf_cli_req);
- ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
+ GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
+}
+
+static void
+gf_cli_quota_list(cli_local_t *local, char *volname, dict_t *dict,
+ char *default_sl, int count, int op_ret, int op_errno,
+ char *op_errstr)
+{
+ if (!cli_cmd_connected())
+ goto out;
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
+ if (count > 0) {
+ GF_VALIDATE_OR_GOTO("cli", volname, out);
- return ret;
+ gf_cli_print_limit_list_from_dict(local, volname, dict, default_sl,
+ count, op_ret, op_errno, op_errstr);
+ }
+out:
+ return;
}
-int
-gf_cli3_1_getspec_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli_quota_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_getspec_rsp rsp = {0,};
- int ret = -1;
- char *spec = NULL;
-
- if (-1 == req->rpc_status) {
- goto out;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ int32_t type = 0;
+ call_frame_t *frame = NULL;
+ char *default_sl = NULL;
+ cli_local_t *local = NULL;
+ char *default_sl_dup = NULL;
+ int32_t entry_count = 0;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ ret = -1;
+ if (global_state->mode & GLUSTER_MODE_XML)
+ goto xml_output;
+
+ if (strcmp(rsp.op_errstr, "")) {
+ cli_err("quota command failed : %s", rsp.op_errstr);
+ if (rsp.op_ret == -ENOENT)
+ cli_err("please enter the path relative to the volume");
+ } else {
+ cli_err("quota command : failed");
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
- if (ret < 0 || rsp.op_ret == -1) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
}
+ }
- gf_log ("cli", GF_LOG_INFO, "Received resp to getspec");
+ gf_log("cli", GF_LOG_DEBUG, "Received resp to quota command");
- spec = GF_MALLOC (rsp.op_ret + 1, cli_mt_char);
- if (!spec) {
- gf_log("", GF_LOG_ERROR, "out of memory");
- goto out;
+ ret = dict_get_str_sizen(dict, "default-soft-limit", &default_sl);
+ if (ret)
+ gf_log(frame->this->name, GF_LOG_TRACE,
+ "failed to get default soft limit");
+
+ // default-soft-limit is part of rsp_dict only iff we sent
+ // GLUSTER_CLI_QUOTA with type being GF_QUOTA_OPTION_TYPE_LIST
+ if (default_sl) {
+ default_sl_dup = gf_strdup(default_sl);
+ if (!default_sl_dup) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr_sizen(local->dict, "default-soft-limit",
+ default_sl_dup);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_TRACE,
+ "failed to set default soft limit");
+ GF_FREE(default_sl_dup);
}
- memcpy (spec, rsp.spec, rsp.op_ret);
- spec[rsp.op_ret] = '\0';
- cli_out ("%s", spec);
- GF_FREE (spec);
+ }
- ret = 0;
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ gf_log(frame->this->name, GF_LOG_ERROR, "failed to get volname");
-out:
- cli_cmd_broadcast_response (ret);
- return ret;
-}
+ ret = dict_get_int32_sizen(dict, "type", &type);
+ if (ret)
+ gf_log(frame->this->name, GF_LOG_TRACE, "failed to get type");
-int
-gf_cli3_1_pmap_b2p_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- pmap_port_by_brick_rsp rsp = {0,};
- int ret = -1;
- char *spec = NULL;
+ ret = dict_get_int32_sizen(dict, "count", &entry_count);
+ if (ret)
+ gf_log(frame->this->name, GF_LOG_TRACE, "failed to get count");
- if (-1 == req->rpc_status) {
- goto out;
- }
+ if ((type == GF_QUOTA_OPTION_TYPE_LIST) ||
+ (type == GF_QUOTA_OPTION_TYPE_LIST_OBJECTS)) {
+ gf_cli_quota_list(local, volname, dict, default_sl, entry_count,
+ rsp.op_ret, rsp.op_errno, rsp.op_errstr);
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_pmap_port_by_brick_rsp);
- if (ret < 0 || rsp.op_ret == -1) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_quota_limit_list_end(local);
+ if (ret < 0) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ }
+ goto out;
}
+ }
- gf_log ("cli", GF_LOG_INFO, "Received resp to pmap b2p");
+xml_output:
- cli_out ("%d", rsp.port);
- GF_FREE (spec);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str("volQuota", NULL, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- ret = rsp.op_ret;
+ if (!rsp.op_ret && type != GF_QUOTA_OPTION_TYPE_LIST &&
+ type != GF_QUOTA_OPTION_TYPE_LIST_OBJECTS)
+ cli_out("volume quota : success");
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+
+ if ((type == GF_QUOTA_OPTION_TYPE_LIST) ||
+ (type == GF_QUOTA_OPTION_TYPE_LIST_OBJECTS)) {
+ gluster_remove_auxiliary_mount(volname);
+ }
+
+ cli_cmd_broadcast_response(ret);
+ if (dict)
+ dict_unref(dict);
+
+ gf_free_xdr_cli_rsp(rsp);
+
+ return ret;
}
+static int
+gf_cli_getspec_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_getspec_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char *spec = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ "getspec failed");
+ ret = -1;
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to getspec");
+
+ spec = GF_MALLOC(rsp.op_ret + 1, cli_mt_char);
+ if (!spec) {
+ gf_log("", GF_LOG_ERROR, "out of memory");
+ ret = -1;
+ goto out;
+ }
+ memcpy(spec, rsp.spec, rsp.op_ret);
+ spec[rsp.op_ret] = '\0';
+ cli_out("%s", spec);
+ GF_FREE(spec);
+
+ ret = 0;
-int32_t
-gf_cli3_1_probe (call_frame_t *frame, xlator_t *this,
- void *data)
+out:
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_getspec_rsp(rsp);
+ return ret;
+}
+
+static int
+gf_cli_pmap_b2p_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf1_cli_probe_req req = {0,};
- int ret = 0;
- dict_t *dict = NULL;
- char *hostname = NULL;
- int port = 0;
+ pmap_port_by_brick_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char *spec = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ GF_ASSERT(myframe);
- dict = data;
- ret = dict_get_str (dict, "hostname", &hostname);
- if (ret)
- goto out;
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
- ret = dict_get_int32 (dict, "port", &port);
- if (ret)
- port = CLI_GLUSTERD_PORT;
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_pmap_port_by_brick_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
- req.hostname = hostname;
- req.port = port;
+ if (rsp.op_ret == -1) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ "pump_b2p failed");
+ ret = -1;
+ goto out;
+ }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_PROBE, NULL,
- this, gf_cli3_1_probe_cbk,
- (xdrproc_t)xdr_gf1_cli_probe_req);
+ gf_log("cli", GF_LOG_INFO, "Received resp to pmap b2p");
+
+ cli_out("%d", rsp.port);
+ GF_FREE(spec);
+
+ ret = rsp.op_ret;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ cli_cmd_broadcast_response(ret);
+ return ret;
}
-int32_t
-gf_cli3_1_deprobe (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_probe(call_frame_t *frame, xlator_t *this, void *data)
{
- gf1_cli_deprobe_req req = {0,};
- int ret = 0;
- dict_t *dict = NULL;
- char *hostname = NULL;
- int port = 0;
- int flags = 0;
+ gf_cli_req req = {
+ {
+ 0,
+ },
+ };
+ int ret = 0;
+ dict_t *dict = NULL;
+ int port = 0;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = dict_get_int32_sizen(dict, "port", &port);
+ if (ret) {
+ ret = dict_set_int32_sizen(dict, "port", CLI_GLUSTERD_PORT);
+ if (ret)
+ goto out;
+ }
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ ret = cli_to_glusterd(&req, frame, gf_cli_probe_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_PROBE,
+ this, cli_rpc_prog, NULL);
- dict = data;
- ret = dict_get_str (dict, "hostname", &hostname);
- if (ret)
- goto out;
+out:
+ GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- ret = dict_get_int32 (dict, "port", &port);
+ return ret;
+}
+
+static int32_t
+gf_cli_deprobe(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {
+ {
+ 0,
+ },
+ };
+ int ret = 0;
+ dict_t *dict = NULL;
+ int port = 0;
+ int flags = 0;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+ ret = dict_get_int32_sizen(dict, "port", &port);
+ if (ret) {
+ ret = dict_set_int32_sizen(dict, "port", CLI_GLUSTERD_PORT);
if (ret)
- port = CLI_GLUSTERD_PORT;
+ goto out;
+ }
- ret = dict_get_int32 (dict, "flags", &flags);
+ ret = dict_get_int32_sizen(dict, "flags", &flags);
+ if (ret) {
+ ret = dict_set_int32_sizen(dict, "flags", 0);
if (ret)
- flags = 0;
+ goto out;
+ }
- req.hostname = hostname;
- req.port = port;
- req.flags = flags;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_DEPROBE, NULL,
- this, gf_cli3_1_deprobe_cbk,
- (xdrproc_t)xdr_gf1_cli_deprobe_req);
+ ret = cli_to_glusterd(&req, frame, gf_cli_deprobe_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_DEPROBE,
+ this, cli_rpc_prog, NULL);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+
+ return ret;
}
-int32_t
-gf_cli3_1_list_friends (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_list_friends(call_frame_t *frame, xlator_t *this, void *data)
{
- gf1_cli_peer_list_req req = {0,};
- int ret = 0;
+ gf1_cli_peer_list_req req = {
+ 0,
+ };
+ int ret = 0;
+ unsigned long flags = 0;
+
+ if (!frame || !this) {
+ ret = -1;
+ goto out;
+ }
+
+ GF_ASSERT(frame->local == NULL);
+
+ flags = (long)data;
+ req.flags = flags;
+ frame->local = (void *)flags;
+ ret = cli_cmd_submit(
+ NULL, &req, frame, cli_rpc_prog, GLUSTER_CLI_LIST_FRIENDS, NULL, this,
+ gf_cli_list_friends_cbk, (xdrproc_t)xdr_gf1_cli_peer_list_req);
- if (!frame || !this) {
- ret = -1;
- goto out;
- }
+out:
+ if (ret && frame) {
+ /*
+ * If everything goes fine, gf_cli_list_friends_cbk()
+ * [invoked through cli_cmd_submit()]resets the
+ * frame->local to NULL. In case cli_cmd_submit()
+ * fails in between, RESET frame->local here.
+ */
+ frame->local = NULL;
+ }
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
+}
+
+static int32_t
+gf_cli_get_state(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {
+ {
+ 0,
+ },
+ };
+ int ret = 0;
+ dict_t *dict = NULL;
- req.flags = GF_CLI_LIST_ALL;
+ dict = data;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_LIST_FRIENDS, NULL,
- this, gf_cli3_1_list_friends_cbk,
- (xdrproc_t) xdr_gf1_cli_peer_list_req);
+ ret = cli_to_glusterd(&req, frame, gf_cli_get_state_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_GET_STATE, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_get_next_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_get_next_volume(call_frame_t *frame, xlator_t *this, void *data)
{
+ int ret = 0;
+ cli_cmd_volume_get_ctx_t *ctx = NULL;
+ cli_local_t *local = NULL;
- int ret = 0;
- cli_cmd_volume_get_ctx_t *ctx = NULL;
- cli_local_t *local = NULL;
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ ctx = data;
+ local = frame->local;
- ctx = data;
- local = frame->local;
-
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_vol_info_begin (local, 0, 0, "");
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Error outputting to xml");
- goto out;
- }
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_info_begin(local, 0, 0, "");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
}
-#endif
+ }
- ret = gf_cli3_1_get_volume (frame, this, data);
+ ret = gf_cli_get_volume(frame, this, data);
+ if (!local || !local->get_vol.volname) {
+ if ((global_state->mode & GLUSTER_MODE_XML))
+ goto end_xml;
- if (!local || !local->get_vol.volname) {
-#if (HAVE_LIB_XML)
- if ((global_state->mode & GLUSTER_MODE_XML))
- goto end_xml;
-#endif
- cli_out ("No volumes present");
- goto out;
- }
+ cli_err("No volumes present");
+ goto out;
+ }
+ ctx->volname = local->get_vol.volname;
+ while (ctx->volname) {
+ ret = gf_cli_get_volume(frame, this, ctx);
+ if (ret)
+ goto out;
ctx->volname = local->get_vol.volname;
+ }
- while (ctx->volname) {
- ret = gf_cli3_1_get_volume (frame, this, ctx);
- if (ret)
- goto out;
- ctx->volname = local->get_vol.volname;
- }
-
-#if (HAVE_LIB_XML)
end_xml:
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_vol_info_end (local);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR, "Error outputting to xml");
- }
-#endif
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_info_end(local);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ }
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
}
-int32_t
-gf_cli3_1_get_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_get_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- cli_cmd_volume_get_ctx_t *ctx = NULL;
- dict_t *dict = NULL;
- int32_t flags = 0;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ cli_cmd_volume_get_ctx_t *ctx = NULL;
+ dict_t *dict = NULL;
+ int32_t flags = 0;
+
+ if (!this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ ctx = data;
+
+ dict = dict_new();
+ if (!dict) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to create the dict");
+ ret = -1;
+ goto out;
+ }
+
+ if (ctx->volname) {
+ ret = dict_set_str_sizen(dict, "volname", ctx->volname);
+ if (ret)
+ goto out;
+ }
+
+ flags = ctx->flags;
+ ret = dict_set_int32_sizen(dict, "flags", flags);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "failed to set flags");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR, DICT_SERIALIZE_FAIL);
+ goto out;
+ }
+
+ ret = cli_cmd_submit(NULL, &req, frame, cli_rpc_prog,
+ GLUSTER_CLI_GET_VOLUME, NULL, this,
+ gf_cli_get_volume_cbk, (xdrproc_t)xdr_gf_cli_req);
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+out:
+ if (dict)
+ dict_unref(dict);
- ctx = data;
+ GF_FREE(req.dict.dict_val);
- dict = dict_new ();
- if (!dict)
- goto out;
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
+}
- if (ctx->volname) {
- ret = dict_set_str (dict, "volname", ctx->volname);
- if (ret)
- goto out;
- }
+static int32_t
+gf_cli3_1_uuid_get(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ dict = data;
+ ret = cli_to_glusterd(&req, frame, gf_cli3_1_uuid_get_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_UUID_GET,
+ this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
- flags = ctx->flags;
- ret = dict_set_int32 (dict, "flags", flags);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to set flags");
- goto out;
- }
+static int32_t
+gf_cli3_1_uuid_reset(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ dict = data;
+ ret = cli_to_glusterd(&req, frame, gf_cli3_1_uuid_reset_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_UUID_RESET, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
+static int32_t
+gf_cli_create_volume(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_GET_VOLUME, NULL,
- this, gf_cli3_1_get_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ dict = data;
-out:
- if (dict)
- dict_unref (dict);
+ ret = cli_to_glusterd(&req, frame, gf_cli_create_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_CREATE_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ GF_FREE(req.dict.dict_val);
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
}
+static int32_t
+gf_cli_delete_volume(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
-int32_t
-gf_cli3_1_create_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+ dict = data;
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_delete_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_DELETE_VOLUME, this, cli_rpc_prog, NULL);
+ GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+
+ return ret;
+}
+
+static int32_t
+gf_cli_start_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- dict_t *dict = NULL;
- cli_local_t *local = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ dict = data;
- dict = dict_ref ((dict_t *)data);
+ ret = cli_to_glusterd(&req, frame, gf_cli_start_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_START_VOLUME, this, cli_rpc_prog, NULL);
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- goto out;
- }
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
- local = cli_local_get ();
+ return ret;
+}
- if (local) {
- local->dict = dict_ref (dict);
- frame->local = local;
- }
+static int32_t
+gf_cli_stop_volume(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = data;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_CREATE_VOLUME, NULL,
- this, gf_cli3_1_create_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ dict = data;
+ ret = cli_to_glusterd(&req, frame, gf_cli_stop_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_STOP_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+static int32_t
+gf_cli_defrag_volume(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- if (dict)
- dict_unref (dict);
+ dict = data;
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
+ ret = cli_to_glusterd(&req, frame, gf_cli_defrag_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_DEFRAG_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_delete_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_rename_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- cli_local_t *local = NULL;
- dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
- local = cli_local_get ();
+ dict = data;
- dict = dict_new ();
- ret = dict_set_str (dict, "volname", data);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "dict set failed");
- goto out;
- }
- if (local) {
- local->dict = dict_ref (dict);
- frame->local = local;
- }
+ ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR, DICT_SERIALIZE_FAIL);
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to get serialize dict");
- goto out;
- }
+ goto out;
+ }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_DELETE_VOLUME, NULL,
- this, gf_cli3_1_delete_volume_cbk,
- (xdrproc_t)xdr_gf_cli_req);
+ ret = cli_cmd_submit(NULL, &req, frame, cli_rpc_prog,
+ GLUSTER_CLI_RENAME_VOLUME, NULL, this,
+ gf_cli_rename_volume_cbk, (xdrproc_t)xdr_gf_cli_req);
out:
- if (dict)
- dict_unref (dict);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_start_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_reset_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- cli_local_t *local = NULL;
- dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ dict = data;
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_reset_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_RESET_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+static int32_t
+gf_cli_ganesha(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- dict = data;
- local = cli_local_get ();
+ dict = data;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize dict");
- goto out;
- }
+ ret = cli_to_glusterd(&req, frame, gf_cli_ganesha_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_GANESHA,
+ this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
+}
- if (local) {
- local->dict = dict_ref (dict);
- frame->local = local;
- }
+static int32_t
+gf_cli_set_volume(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_START_VOLUME, NULL,
- this, gf_cli3_1_start_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ dict = data;
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ ret = cli_to_glusterd(&req, frame, gf_cli_set_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_SET_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
- return ret;
+ return ret;
}
int32_t
-gf_cli3_1_stop_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+gf_cli_add_brick(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- cli_local_t *local = NULL;
- dict_t *dict = data;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ dict = data;
- local = cli_local_get ();
- dict = data;
-
- if (local) {
- local->dict = dict_ref (dict);
- frame->local = local;
- }
+ ret = cli_to_glusterd(&req, frame, gf_cli_add_brick_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_ADD_BRICK, this, cli_rpc_prog, NULL);
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *) &req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
-
- goto out;
- }
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_STOP_VOLUME, NULL,
- this, gf_cli3_1_stop_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ GF_FREE(req.dict.dict_val);
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
-
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_defrag_volume (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- gf_cli_req req = {{0,}};
- int ret = 0;
- cli_local_t *local = NULL;
- char *volname = NULL;
- char *cmd_str = NULL;
- dict_t *dict = NULL;
- gf_cli_defrag_type cmd = 0;
- dict_t *req_dict = NULL;
-
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
- dict = data;
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- gf_log ("", GF_LOG_DEBUG, "error");
+static int32_t
+gf_cli_remove_brick(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ ;
+ gf_cli_req status_req = {{
+ 0,
+ }};
+ ;
+ int ret = 0;
+ dict_t *dict = NULL;
+ int32_t command = 0;
+ int32_t cmd = 0;
+
+ if (!frame || !this) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = dict_get_int32_sizen(dict, "command", &command);
+ if (ret)
+ goto out;
+
+ if ((command != GF_OP_CMD_STATUS) && (command != GF_OP_CMD_STOP)) {
+ ret = cli_to_glusterd(
+ &req, frame, gf_cli_remove_brick_cbk, (xdrproc_t)xdr_gf_cli_req,
+ dict, GLUSTER_CLI_REMOVE_BRICK, this, cli_rpc_prog, NULL);
+ } else {
+ /* Need rebalance status to be sent :-) */
+ if (command == GF_OP_CMD_STATUS)
+ cmd |= GF_DEFRAG_CMD_STATUS;
+ else
+ cmd |= GF_DEFRAG_CMD_STOP;
- ret = dict_get_str (dict, "command", &cmd_str);
+ ret = dict_set_int32_sizen(dict, "rebalance-command", (int32_t)cmd);
if (ret) {
- gf_log ("", GF_LOG_DEBUG, "error");
- goto out;
+ gf_log(this->name, GF_LOG_ERROR, "Failed to set dict");
+ goto out;
}
- if (strcmp (cmd_str, "start") == 0) {
- cmd = GF_DEFRAG_CMD_START;
- ret = dict_get_str (dict, "option", &cmd_str);
- if (!ret) {
- if (strcmp (cmd_str, "force") == 0) {
- cmd = GF_DEFRAG_CMD_START_FORCE;
- }
- }
- goto done;
- }
+ ret = cli_to_glusterd(
+ &status_req, frame, gf_cli3_remove_brick_status_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_DEFRAG_VOLUME, this,
+ cli_rpc_prog, NULL);
+ }
- if (strcmp (cmd_str, "fix-layout") == 0) {
- cmd = GF_DEFRAG_CMD_START_LAYOUT_FIX;
- goto done;
- }
- if (strcmp (cmd_str, "stop") == 0) {
- cmd = GF_DEFRAG_CMD_STOP;
- goto done;
- }
- if (strcmp (cmd_str, "status") == 0) {
- cmd = GF_DEFRAG_CMD_STATUS;
- }
+out:
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
-done:
- local = cli_local_get ();
+ GF_FREE(req.dict.dict_val);
- req_dict = dict_new ();
- if (!req_dict) {
- ret = -1;
- goto out;
- }
+ GF_FREE(status_req.dict.dict_val);
- ret = dict_set_str (req_dict, "volname", volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to set dict");
- goto out;
- }
+ return ret;
+}
- ret = dict_set_int32 (req_dict, "rebalance-command", (int32_t) cmd);
+static int32_t
+gf_cli_reset_brick(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
+ char *dst_brick = NULL;
+ char *op = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = dict_get_str_sizen(dict, "operation", &op);
+ if (ret) {
+ gf_log(this->name, GF_LOG_DEBUG, "dict_get on operation failed");
+ goto out;
+ }
+
+ if (!strcmp(op, "GF_RESET_OP_COMMIT") ||
+ !strcmp(op, "GF_RESET_OP_COMMIT_FORCE")) {
+ ret = dict_get_str_sizen(dict, "dst-brick", &dst_brick);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to set dict");
- goto out;
+ gf_log(this->name, GF_LOG_DEBUG, "dict_get on dst-brick failed");
+ goto out;
}
+ }
- if (local) {
- local->dict = dict_ref (req_dict);
- frame->local = local;
- local = NULL;
- }
+ ret = cli_to_glusterd(&req, frame, gf_cli_reset_brick_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_RESET_BRICK, this, cli_rpc_prog, NULL);
- ret = dict_allocate_and_serialize (req_dict,
- &req.dict.dict_val,
- (size_t *) &req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+out:
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- goto out;
- }
+ GF_FREE(req.dict.dict_val);
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_DEFRAG_VOLUME, NULL,
- this, gf_cli3_1_defrag_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ return ret;
+}
+
+static int32_t
+gf_cli_replace_brick(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
+ int32_t op = 0;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = dict_get_int32_sizen(dict, "operation", &op);
+ if (ret) {
+ gf_log(this->name, GF_LOG_DEBUG, "dict_get on operation failed");
+ goto out;
+ }
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_replace_brick_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_REPLACE_BRICK, this, cli_rpc_prog, NULL);
out:
- if (local)
- cli_local_wipe (local);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ GF_FREE(req.dict.dict_val);
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_rename_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_log_rotate(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ dict = data;
- dict = data;
+ ret = cli_to_glusterd(&req, frame, gf_cli_log_rotate_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_LOG_ROTATE, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *) &req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
- goto out;
- }
+static int32_t
+gf_cli_sync_volume(call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = 0;
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+
+ dict = data;
+ ret = cli_to_glusterd(&req, frame, gf_cli_sync_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_SYNC_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_RENAME_VOLUME, NULL,
- this, gf_cli3_1_rename_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ return ret;
+}
+
+static int32_t
+gf_cli_getspec(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_getspec_req req = {
+ 0,
+ };
+ int ret = 0;
+ dict_t *dict = NULL;
+ dict_t *op_dict = NULL;
+
+ if (!frame || !this) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+
+ ret = dict_get_str_sizen(dict, "volid", &req.key);
+ if (ret)
+ goto out;
+
+ op_dict = dict_new();
+ if (!op_dict) {
+ ret = -1;
+ goto out;
+ }
+
+ // Set the supported min and max op-versions, so glusterd can make a
+ // decision
+ ret = dict_set_int32_sizen(op_dict, "min-op-version", GD_OP_VERSION_MIN);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to set min-op-version in request dict");
+ goto out;
+ }
+
+ ret = dict_set_int32_sizen(op_dict, "max-op-version", GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to set max-op-version in request dict");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize(op_dict, &req.xdata.xdata_val,
+ &req.xdata.xdata_len);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_ERROR, DICT_SERIALIZE_FAIL);
+ goto out;
+ }
+
+ ret = cli_cmd_submit(NULL, &req, frame, &cli_handshake_prog,
+ GF_HNDSK_GETSPEC, NULL, this, gf_cli_getspec_cbk,
+ (xdrproc_t)xdr_gf_getspec_req);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ if (op_dict) {
+ dict_unref(op_dict);
+ }
+ GF_FREE(req.xdata.xdata_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_reset_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_quota(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
+
+ dict = data;
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_quota_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_QUOTA,
+ this, cli_rpc_prog, NULL);
+ GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
+}
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+static int32_t
+gf_cli_pmap_b2p(call_frame_t *frame, xlator_t *this, void *data)
+{
+ pmap_port_by_brick_req req = {
+ 0,
+ };
+ int ret = 0;
+ dict_t *dict = NULL;
- dict = data;
+ if (!frame || !this) {
+ ret = -1;
+ goto out;
+ }
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to get serialized length of dict");
- goto out;
- }
+ dict = data;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_RESET_VOLUME, NULL,
- this, gf_cli3_1_reset_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = dict_get_str_sizen(dict, "brick", &req.brick);
+ if (ret)
+ goto out;
+
+ ret = cli_cmd_submit(NULL, &req, frame, &cli_pmap_prog, GF_PMAP_PORTBYBRICK,
+ NULL, this, gf_cli_pmap_b2p_cbk,
+ (xdrproc_t)xdr_pmap_port_by_brick_req);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_set_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int
+gf_cli_fsm_log_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- dict_t *dict = NULL;
+ gf1_cli_fsm_log_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ int tr_count = 0;
+ char key[64] = {0};
+ int keylen;
+ int i = 0;
+ char *old_state = NULL;
+ char *new_state = NULL;
+ char *event = NULL;
+ char *time = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf1_cli_fsm_log_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, ""))
+ cli_err("%s", rsp.op_errstr);
+ cli_err("fsm log unsuccessful");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.fsm_log.fsm_log_val, rsp.fsm_log.fsm_log_len,
+ &dict);
+
+ if (ret) {
+ cli_err(DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(dict, "count", &tr_count);
+ if (!ret && tr_count)
+ cli_out("number of transitions: %d", tr_count);
+ else
+ cli_err("No transitions");
+ for (i = 0; i < tr_count; i++) {
+ keylen = snprintf(key, sizeof(key), "log%d-old-state", i);
+ ret = dict_get_strn(dict, key, keylen, &old_state);
+ if (ret)
+ goto out;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ keylen = snprintf(key, sizeof(key), "log%d-event", i);
+ ret = dict_get_strn(dict, key, keylen, &event);
+ if (ret)
+ goto out;
- dict = data;
+ keylen = snprintf(key, sizeof(key), "log%d-new-state", i);
+ ret = dict_get_strn(dict, key, keylen, &new_state);
+ if (ret)
+ goto out;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- goto out;
- }
+ keylen = snprintf(key, sizeof(key), "log%d-time", i);
+ ret = dict_get_strn(dict, key, keylen, &time);
+ if (ret)
+ goto out;
+ cli_out(
+ "Old State: [%s]\n"
+ "New State: [%s]\n"
+ "Event : [%s]\n"
+ "timestamp: [%s]\n",
+ old_state, new_state, event, time);
+ }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_SET_VOLUME, NULL,
- this, gf_cli3_1_set_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = rsp.op_ret;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ cli_cmd_broadcast_response(ret);
+ if (dict) {
+ dict_unref(dict);
+ }
+ gf_free_xdr_fsm_log_rsp(rsp);
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_add_brick (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_fsm_log(call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = -1;
+ gf1_cli_fsm_log_req req = {
+ 0,
+ };
+
+ GF_ASSERT(frame);
+ GF_ASSERT(this);
+ GF_ASSERT(data);
+
+ if (!frame || !this || !data)
+ goto out;
+ req.name = data;
+ ret = cli_cmd_submit(NULL, &req, frame, cli_rpc_prog, GLUSTER_CLI_FSM_LOG,
+ NULL, this, gf_cli_fsm_log_cbk,
+ (xdrproc_t)xdr_gf1_cli_fsm_log_req);
+
+out:
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+
+ return ret;
+}
+
+static int
+gf_cli_gsync_config_command(dict_t *dict)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- dict_t *dict = NULL;
- char *volname = NULL;
- int32_t count = 0;
+ runner_t runner = {
+ 0,
+ };
+ char *subop = NULL;
+ char *gwd = NULL;
+ char *slave = NULL;
+ char *confpath = NULL;
+ char *master = NULL;
+ char *op_name = NULL;
+ int ret = -1;
+ char conf_path[PATH_MAX] = "";
+
+ if (dict_get_str_sizen(dict, "subop", &subop) != 0)
+ return -1;
+
+ if (strcmp(subop, "get") != 0 && strcmp(subop, "get-all") != 0) {
+ cli_out(GEOREP " config updated successfully");
+ return 0;
+ }
+
+ if (dict_get_str_sizen(dict, "glusterd_workdir", &gwd) != 0 ||
+ dict_get_str_sizen(dict, "slave", &slave) != 0)
+ return -1;
+
+ if (dict_get_str_sizen(dict, "master", &master) != 0)
+ master = NULL;
+ if (dict_get_str_sizen(dict, "op_name", &op_name) != 0)
+ op_name = NULL;
+
+ ret = dict_get_str_sizen(dict, "conf_path", &confpath);
+ if (ret || !confpath) {
+ ret = snprintf(conf_path, sizeof(conf_path) - 1,
+ "%s/" GEOREP "/gsyncd_template.conf", gwd);
+ conf_path[ret] = '\0';
+ confpath = conf_path;
+ }
+
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
+ runner_argprintf(&runner, "%s", confpath);
+ runner_argprintf(&runner, "--iprefix=%s", DATADIR);
+ if (master)
+ runner_argprintf(&runner, ":%s", master);
+ runner_add_arg(&runner, slave);
+ runner_argprintf(&runner, "--config-%s", subop);
+ if (op_name)
+ runner_add_arg(&runner, op_name);
+
+ return runner_run(&runner);
+}
- if (!frame || !this || !data) {
+static int
+gf_cli_print_status(char **title_values, gf_gsync_status_t **sts_vals,
+ int *spacing, int gsync_count, int number_of_fields,
+ int is_detail)
+{
+ int i = 0;
+ int j = 0;
+ int ret = 0;
+ int status_fields = 8; /* Indexed at 0 */
+ int total_spacing = 0;
+ char **output_values = NULL;
+ char *tmp = NULL;
+ char *hyphens = NULL;
+
+ /* calculating spacing for hyphens */
+ for (i = 0; i < number_of_fields; i++) {
+ /* Suppressing detail output for status */
+ if ((!is_detail) && (i > status_fields)) {
+ /* Suppressing detailed output for
+ * status */
+ continue;
+ }
+ spacing[i] += 3; /* Adding extra space to
+ distinguish between fields */
+ total_spacing += spacing[i];
+ }
+ total_spacing += 4; /* For the spacing between the fields */
+
+ /* char pointers for each field */
+ output_values = GF_MALLOC(number_of_fields * sizeof(char *),
+ gf_common_mt_char);
+ if (!output_values) {
+ ret = -1;
+ goto out;
+ }
+ for (i = 0; i < number_of_fields; i++) {
+ output_values[i] = GF_CALLOC(spacing[i] + 1, sizeof(char),
+ gf_common_mt_char);
+ if (!output_values[i]) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ cli_out(" ");
+
+ /* setting the title "NODE", "MASTER", etc. from title_values[]
+ and printing the same */
+ for (j = 0; j < number_of_fields; j++) {
+ if ((!is_detail) && (j > status_fields)) {
+ /* Suppressing detailed output for
+ * status */
+ output_values[j][0] = '\0';
+ continue;
+ }
+ memset(output_values[j], ' ', spacing[j]);
+ memcpy(output_values[j], title_values[j], strlen(title_values[j]));
+ output_values[j][spacing[j]] = '\0';
+ }
+ cli_out("%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s", output_values[0],
+ output_values[1], output_values[2], output_values[3],
+ output_values[4], output_values[5], output_values[6],
+ output_values[7], output_values[8], output_values[9],
+ output_values[10], output_values[11], output_values[12],
+ output_values[13], output_values[14], output_values[15]);
+
+ hyphens = GF_MALLOC((total_spacing + 1) * sizeof(char), gf_common_mt_char);
+ if (!hyphens) {
+ ret = -1;
+ goto out;
+ }
+
+ /* setting and printing the hyphens */
+ memset(hyphens, '-', total_spacing);
+ hyphens[total_spacing] = '\0';
+ cli_out("%s", hyphens);
+
+ for (i = 0; i < gsync_count; i++) {
+ for (j = 0; j < number_of_fields; j++) {
+ if ((!is_detail) && (j > status_fields)) {
+ /* Suppressing detailed output for
+ * status */
+ output_values[j][0] = '\0';
+ continue;
+ }
+ tmp = get_struct_variable(j, sts_vals[i]);
+ if (!tmp) {
+ gf_log("", GF_LOG_ERROR, "struct member empty.");
ret = -1;
goto out;
+ }
+ memset(output_values[j], ' ', spacing[j]);
+ memcpy(output_values[j], tmp, strlen(tmp));
+ output_values[j][spacing[j]] = '\0';
}
- dict = data;
+ cli_out("%s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s",
+ output_values[0], output_values[1], output_values[2],
+ output_values[3], output_values[4], output_values[5],
+ output_values[6], output_values[7], output_values[8],
+ output_values[9], output_values[10], output_values[11],
+ output_values[12], output_values[13], output_values[14],
+ output_values[15]);
+ }
- ret = dict_get_str (dict, "volname", &volname);
+out:
+ if (output_values) {
+ for (i = 0; i < number_of_fields; i++) {
+ if (output_values[i])
+ GF_FREE(output_values[i]);
+ }
+ GF_FREE(output_values);
+ }
- if (ret)
- goto out;
+ if (hyphens)
+ GF_FREE(hyphens);
- ret = dict_get_int32 (dict, "count", &count);
- if (ret)
- goto out;
+ return ret;
+}
+int
+gf_gsync_status_t_comparator(const void *p, const void *q)
+{
+ char *slavekey1 = NULL;
+ char *slavekey2 = NULL;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
+ slavekey1 = get_struct_variable(20, (*(gf_gsync_status_t **)p));
+ slavekey2 = get_struct_variable(20, (*(gf_gsync_status_t **)q));
+ if (!slavekey1 || !slavekey2) {
+ gf_log("cli", GF_LOG_ERROR, "struct member empty.");
+ return 0;
+ }
+
+ return strcmp(slavekey1, slavekey2);
+}
+
+static int
+gf_cli_read_status_data(dict_t *dict, gf_gsync_status_t **sts_vals,
+ int *spacing, int gsync_count, int number_of_fields)
+{
+ char *tmp = NULL;
+ char sts_val_name[PATH_MAX] = "";
+ int ret = 0;
+ int i = 0;
+ int j = 0;
+
+ /* Storing per node status info in each object */
+ for (i = 0; i < gsync_count; i++) {
+ snprintf(sts_val_name, sizeof(sts_val_name), "status_value%d", i);
+
+ /* Fetching the values from dict, and calculating
+ the max length for each field */
+ ret = dict_get_bin(dict, sts_val_name, (void **)&(sts_vals[i]));
+ if (ret)
+ goto out;
+
+ for (j = 0; j < number_of_fields; j++) {
+ tmp = get_struct_variable(j, sts_vals[i]);
+ if (!tmp) {
+ gf_log("", GF_LOG_ERROR, "struct member empty.");
+ ret = -1;
goto out;
+ }
+ if (strlen(tmp) > spacing[j])
+ spacing[j] = strlen(tmp);
}
+ }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_ADD_BRICK, NULL,
- this, gf_cli3_1_add_brick_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ /* Sort based on Session Slave */
+ qsort(sts_vals, gsync_count, sizeof(gf_gsync_status_t *),
+ gf_gsync_status_t_comparator);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
+static int
+gf_cli_gsync_status_output(dict_t *dict, gf_boolean_t is_detail)
+{
+ int gsync_count = 0;
+ int i = 0;
+ int ret = 0;
+ int spacing[16] = {0};
+ int num_of_fields = 16;
+ char errmsg[1024] = "";
+ char *master = NULL;
+ char *slave = NULL;
+ static char *title_values[] = {"MASTER NODE",
+ "MASTER VOL",
+ "MASTER BRICK",
+ "SLAVE USER",
+ "SLAVE",
+ "SLAVE NODE",
+ "STATUS",
+ "CRAWL STATUS",
+ "LAST_SYNCED",
+ "ENTRY",
+ "DATA",
+ "META",
+ "FAILURES",
+ "CHECKPOINT TIME",
+ "CHECKPOINT COMPLETED",
+ "CHECKPOINT COMPLETION TIME"};
+ gf_gsync_status_t **sts_vals = NULL;
+
+ /* Checks if any session is active or not */
+ ret = dict_get_int32_sizen(dict, "gsync-count", &gsync_count);
+ if (ret) {
+ ret = dict_get_str_sizen(dict, "master", &master);
+
+ ret = dict_get_str_sizen(dict, "slave", &slave);
+
+ if (master) {
+ if (slave)
+ snprintf(errmsg, sizeof(errmsg),
+ "No active geo-replication sessions between %s"
+ " and %s",
+ master, slave);
+ else
+ snprintf(errmsg, sizeof(errmsg),
+ "No active geo-replication sessions for %s", master);
+ } else
+ snprintf(errmsg, sizeof(errmsg),
+ "No active geo-replication sessions");
+
+ gf_log("cli", GF_LOG_INFO, "%s", errmsg);
+ cli_out("%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < num_of_fields; i++)
+ spacing[i] = strlen(title_values[i]);
+
+ /* gsync_count = number of nodes reporting output.
+ each sts_val object will store output of each
+ node */
+ sts_vals = GF_MALLOC(gsync_count * sizeof(gf_gsync_status_t *),
+ gf_common_mt_char);
+ if (!sts_vals) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_cli_read_status_data(dict, sts_vals, spacing, gsync_count,
+ num_of_fields);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Unable to read status data");
+ goto out;
+ }
+
+ ret = gf_cli_print_status(title_values, sts_vals, spacing, gsync_count,
+ num_of_fields, is_detail);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Unable to print status output");
+ goto out;
+ }
+
+out:
+ if (sts_vals)
+ GF_FREE(sts_vals);
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_remove_brick (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- gf_cli_req req = {{0,}};;
- gf_cli_req status_req = {{0,}};;
- int ret = 0;
- dict_t *dict = NULL;
- int32_t command = 0;
- char *volname = NULL;
- dict_t *req_dict = NULL;
- int32_t cmd = 0;
- cli_local_t *local = NULL;
-
- if (!frame || !this || !data) {
+static int32_t
+write_contents_to_common_pem_file(dict_t *dict, int output_count)
+{
+ char *workdir = NULL;
+ char common_pem_file[PATH_MAX] = "";
+ char *output = NULL;
+ char output_name[32] = "";
+ int bytes_written = 0;
+ int fd = -1;
+ int ret = -1;
+ int i = -1;
+
+ ret = dict_get_str_sizen(dict, "glusterd_workdir", &workdir);
+ if (ret || !workdir) {
+ gf_log("", GF_LOG_ERROR, "Unable to fetch workdir");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf(common_pem_file, sizeof(common_pem_file),
+ "%s/geo-replication/common_secret.pem.pub", workdir);
+
+ sys_unlink(common_pem_file);
+
+ fd = open(common_pem_file, O_WRONLY | O_CREAT, 0600);
+ if (fd == -1) {
+ gf_log("", GF_LOG_ERROR, "Failed to open %s Error : %s",
+ common_pem_file, strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 1; i <= output_count; i++) {
+ ret = snprintf(output_name, sizeof(output_name), "output_%d", i);
+ ret = dict_get_strn(dict, output_name, ret, &output);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Failed to get %s.", output_name);
+ cli_out("Unable to fetch output.");
+ }
+ if (output) {
+ bytes_written = sys_write(fd, output, strlen(output));
+ if (bytes_written != strlen(output)) {
+ gf_log("", GF_LOG_ERROR, "Failed to write to %s",
+ common_pem_file);
ret = -1;
goto out;
- }
-
- local = cli_local_get ();
- if (!local) {
+ }
+ /* Adding the new line character */
+ bytes_written = sys_write(fd, "\n", 1);
+ if (bytes_written != 1) {
+ gf_log("", GF_LOG_ERROR, "Failed to add new line char");
ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
goto out;
+ }
+ output = NULL;
}
+ }
- frame->local = local;
+ cli_out("Common secret pub file present at %s", common_pem_file);
+ ret = 0;
+out:
+ if (fd >= 0)
+ sys_close(fd);
- dict = data;
+ gf_log("", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
+}
- local->dict = dict_ref (dict);
+static int
+gf_cli_sys_exec_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int ret = -1;
+ int output_count = -1;
+ int i = -1;
+ char *output = NULL;
+ char *command = NULL;
+ char output_name[32] = "";
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (req->rpc_status == -1) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret) {
+ cli_err("%s", rsp.op_errstr ? rsp.op_errstr : "Command failed.");
+ ret = rsp.op_ret;
+ goto out;
+ }
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto out;
+ ret = dict_get_int32_sizen(dict, "output_count", &output_count);
+ if (ret) {
+ cli_out("Command executed successfully.");
+ ret = 0;
+ goto out;
+ }
- ret = dict_get_int32 (dict, "command", &command);
- if (ret)
- goto out;
+ ret = dict_get_str_sizen(dict, "command", &command);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Unable to get command from dict");
+ goto out;
+ }
- if ((command != GF_OP_CMD_STATUS) &&
- (command != GF_OP_CMD_STOP)) {
+ if (!strcmp(command, "gsec_create")) {
+ ret = write_contents_to_common_pem_file(dict, output_count);
+ if (!ret)
+ goto out;
+ }
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- goto out;
- }
+ for (i = 1; i <= output_count; i++) {
+ ret = snprintf(output_name, sizeof(output_name), "output_%d", i);
+ ret = dict_get_strn(dict, output_name, ret, &output);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "Failed to get %s.", output_name);
+ cli_out("Unable to fetch output.");
+ }
+ if (output) {
+ cli_out("%s", output);
+ output = NULL;
+ }
+ }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_REMOVE_BRICK, NULL,
- this, gf_cli3_1_remove_brick_cbk,
- (xdrproc_t) xdr_gf_cli_req);
- } else {
- /* Need rebalance status to e sent :-) */
- req_dict = dict_new ();
- if (!req_dict) {
- ret = -1;
- goto out;
- }
+ ret = 0;
+out:
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
- ret = dict_set_str (req_dict, "volname", volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to set dict");
- goto out;
- }
+static int
+gf_cli_copy_file_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int ret = -1;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
- if (command == GF_OP_CMD_STATUS)
- cmd |= GF_DEFRAG_CMD_STATUS;
- else
- cmd |= GF_DEFRAG_CMD_STOP;
+ GF_ASSERT(myframe);
- ret = dict_set_int32 (req_dict, "rebalance-command", (int32_t) cmd);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to set dict");
- goto out;
- }
+ if (req->rpc_status == -1) {
+ goto out;
+ }
- ret = dict_allocate_and_serialize (req_dict,
- &status_req.dict.dict_val,
- (size_t *) &status_req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
- goto out;
- }
+ dict = dict_new();
- ret = cli_cmd_submit (&status_req, frame, cli_rpc_prog,
- GLUSTER_CLI_DEFRAG_VOLUME, NULL,
- this, gf_cli3_remove_brick_status_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
- }
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ if (ret)
+ goto out;
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
-
- if (status_req.dict.dict_val)
- GF_FREE (status_req.dict.dict_val);
+ if (rsp.op_ret) {
+ cli_err("%s", rsp.op_errstr ? rsp.op_errstr : "Copy unsuccessful");
+ ret = rsp.op_ret;
+ goto out;
+ }
- if (req_dict)
- dict_unref (req_dict);
+ cli_out("Successfully copied file.");
- return ret;
+out:
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
}
-int32_t
-gf_cli3_1_replace_brick (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- gf_cli_req req = {{0,}};
- int ret = 0;
- cli_local_t *local = NULL;
- dict_t *dict = NULL;
- char *src_brick = NULL;
- char *dst_brick = NULL;
- char *volname = NULL;
- int32_t op = 0;
-
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+static int
+gf_cli_gsync_set_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int ret = -1;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ char *gsync_status = NULL;
+ char *master = NULL;
+ char *slave = NULL;
+ int32_t type = 0;
+ gf_boolean_t status_detail = _gf_false;
+
+ GF_ASSERT(myframe);
+
+ if (req->rpc_status == -1) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret)
+ goto out;
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_gsync(dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- dict = data;
+ if (rsp.op_ret) {
+ cli_err("%s",
+ rsp.op_errstr ? rsp.op_errstr : GEOREP " command unsuccessful");
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(dict, "gsync-status", &gsync_status);
+ if (!ret)
+ cli_out("%s", gsync_status);
+
+ ret = dict_get_int32_sizen(dict, "type", &type);
+ if (ret) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ "failed to get type");
+ goto out;
+ }
+
+ switch (type) {
+ case GF_GSYNC_OPTION_TYPE_START:
+ case GF_GSYNC_OPTION_TYPE_STOP:
+ if (dict_get_str_sizen(dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str_sizen(dict, "slave", &slave) != 0)
+ slave = "???";
+
+ cli_out(
+ "%s " GEOREP " session between %s & %s has been successful",
+ type == GF_GSYNC_OPTION_TYPE_START ? "Starting" : "Stopping",
+ master, slave);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_PAUSE:
+ case GF_GSYNC_OPTION_TYPE_RESUME:
+ if (dict_get_str_sizen(dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str_sizen(dict, "slave", &slave) != 0)
+ slave = "???";
+
+ cli_out("%s " GEOREP " session between %s & %s has been successful",
+ type == GF_GSYNC_OPTION_TYPE_PAUSE ? "Pausing" : "Resuming",
+ master, slave);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_CONFIG:
+ ret = gf_cli_gsync_config_command(dict);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_STATUS:
+ status_detail = dict_get_str_boolean(dict, "status-detail",
+ _gf_false);
+ ret = gf_cli_gsync_status_output(dict, status_detail);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_DELETE:
+ if (dict_get_str_sizen(dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str_sizen(dict, "slave", &slave) != 0)
+ slave = "???";
+ cli_out("Deleting " GEOREP
+ " session between %s & %s has been successful",
+ master, slave);
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_CREATE:
+ if (dict_get_str_sizen(dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str_sizen(dict, "slave", &slave) != 0)
+ slave = "???";
+ cli_out("Creating " GEOREP
+ " session between %s & %s has been successful",
+ master, slave);
+ break;
- local = cli_local_get ();
- if (!local) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- goto out;
- }
+ default:
+ cli_out(GEOREP " command executed successfully");
+ }
- local->dict = dict_ref (dict);
- frame->local = local;
+out:
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
- ret = dict_get_int32 (dict, "operation", &op);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_get on operation failed");
- goto out;
- }
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_get on volname failed");
- goto out;
+static int32_t
+gf_cli_sys_exec(call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+
+ dict = data;
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_sys_exec_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict, GLUSTER_CLI_SYS_EXEC,
+ this, cli_rpc_prog, NULL);
+ if (ret)
+ if (!frame || !this || !data) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid data");
}
- ret = dict_get_str (dict, "src-brick", &src_brick);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_get on src-brick failed");
- goto out;
- }
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
- ret = dict_get_str (dict, "dst-brick", &dst_brick);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_get on dst-brick failed");
- goto out;
+static int32_t
+gf_cli_copy_file(call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+
+ dict = data;
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_copy_file_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_COPY_FILE, this, cli_rpc_prog, NULL);
+ if (ret)
+ if (!frame || !this || !data) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Invalid data");
}
- gf_log (this->name, GF_LOG_DEBUG,
- "Received command replace-brick %s with "
- "%s with operation=%d", src_brick,
- dst_brick, op);
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
+static int32_t
+gf_cli_gsync_set(call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- goto out;
- }
+ dict = data;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_REPLACE_BRICK, NULL,
- this, gf_cli3_1_replace_brick_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_to_glusterd(&req, frame, gf_cli_gsync_set_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_GSYNC_SET, this, cli_rpc_prog, NULL);
+ GF_FREE(req.dict.dict_val);
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- if (req.dict.dict_val) {
- GF_FREE (req.dict.dict_val);
- }
+int
+cli_profile_info_percentage_cmp(void *a, void *b)
+{
+ cli_profile_info_t *ia = NULL;
+ cli_profile_info_t *ib = NULL;
- return ret;
-}
+ ia = a;
+ ib = b;
+ if (ia->percentage_avg_latency < ib->percentage_avg_latency)
+ return -1;
+ else if (ia->percentage_avg_latency > ib->percentage_avg_latency)
+ return 1;
+ return 0;
+}
-int32_t
-gf_cli3_1_log_rotate (call_frame_t *frame, xlator_t *this,
- void *data)
+static void
+cmd_profile_volume_brick_out(dict_t *dict, int count, int interval)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- dict_t *dict = NULL;
-
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
+ char key[256] = {0};
+ int i = 0;
+ uint64_t sec = 0;
+ uint64_t r_count = 0;
+ uint64_t w_count = 0;
+ uint64_t rb_counts[32] = {0};
+ uint64_t wb_counts[32] = {0};
+ cli_profile_info_t profile_info[GF_FOP_MAXVALUE] = {{0}};
+ cli_profile_info_t upcall_info[GF_UPCALL_FLAGS_MAXVALUE] = {
+ {0},
+ };
+ char output[128] = {0};
+ int per_line = 0;
+ char read_blocks[128] = {0};
+ char write_blocks[128] = {0};
+ int index = 0;
+ int is_header_printed = 0;
+ int ret = 0;
+ double total_percentage_latency = 0;
+
+ for (i = 0; i < 32; i++) {
+ snprintf(key, sizeof(key), "%d-%d-read-%" PRIu32, count, interval,
+ (1U << i));
+ ret = dict_get_uint64(dict, key, &rb_counts[i]);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
}
+ }
- dict = data;
+ for (i = 0; i < 32; i++) {
+ snprintf(key, sizeof(key), "%d-%d-write-%" PRIu32, count, interval,
+ (1U << i));
+ ret = dict_get_uint64(dict, key, &wb_counts[i]);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
+ }
+ }
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
+ for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) {
+ snprintf(key, sizeof(key), "%d-%d-%d-upcall-hits", count, interval, i);
+ ret = dict_get_uint64(dict, key, &upcall_info[i].fop_hits);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
+ }
+ upcall_info[i].fop_name = (char *)gf_upcall_list[i];
+ }
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to serialize dict");
- goto out;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ snprintf(key, sizeof(key), "%d-%d-%d-hits", count, interval, i);
+ ret = dict_get_uint64(dict, key, &profile_info[i].fop_hits);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
}
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_LOG_ROTATE, NULL,
- this, gf_cli3_1_log_rotate_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ snprintf(key, sizeof(key), "%d-%d-%d-avglatency", count, interval, i);
+ ret = dict_get_double(dict, key, &profile_info[i].avg_latency);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
+ }
+ snprintf(key, sizeof(key), "%d-%d-%d-minlatency", count, interval, i);
+ ret = dict_get_double(dict, key, &profile_info[i].min_latency);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
+ }
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ snprintf(key, sizeof(key), "%d-%d-%d-maxlatency", count, interval, i);
+ ret = dict_get_double(dict, key, &profile_info[i].max_latency);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
+ }
+ profile_info[i].fop_name = (char *)gf_fop_list[i];
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
- return ret;
+ total_percentage_latency += (profile_info[i].fop_hits *
+ profile_info[i].avg_latency);
+ }
+ if (total_percentage_latency) {
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ profile_info[i]
+ .percentage_avg_latency = 100 * ((profile_info[i].avg_latency *
+ profile_info[i].fop_hits) /
+ total_percentage_latency);
+ }
+ gf_array_insertionsort(profile_info, 1, GF_FOP_MAXVALUE - 1,
+ sizeof(cli_profile_info_t),
+ cli_profile_info_percentage_cmp);
+ }
+ snprintf(key, sizeof(key), "%d-%d-duration", count, interval);
+ ret = dict_get_uint64(dict, key, &sec);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
+ }
+
+ snprintf(key, sizeof(key), "%d-%d-total-read", count, interval);
+ ret = dict_get_uint64(dict, key, &r_count);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
+ }
+
+ snprintf(key, sizeof(key), "%d-%d-total-write", count, interval);
+ ret = dict_get_uint64(dict, key, &w_count);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get %s from dict", key);
+ }
+
+ if (interval == -1)
+ cli_out("Cumulative Stats:");
+ else
+ cli_out("Interval %d Stats:", interval);
+ snprintf(output, sizeof(output), "%14s", "Block Size:");
+ snprintf(read_blocks, sizeof(read_blocks), "%14s", "No. of Reads:");
+ snprintf(write_blocks, sizeof(write_blocks), "%14s", "No. of Writes:");
+ index = 14;
+ for (i = 0; i < 32; i++) {
+ if ((rb_counts[i] == 0) && (wb_counts[i] == 0))
+ continue;
+ per_line++;
+ snprintf(output + index, sizeof(output) - index, "%19" PRIu32 "b+ ",
+ (1U << i));
+ if (rb_counts[i]) {
+ snprintf(read_blocks + index, sizeof(read_blocks) - index,
+ "%21" PRId64 " ", rb_counts[i]);
+ } else {
+ snprintf(read_blocks + index, sizeof(read_blocks) - index, "%21s ",
+ "0");
+ }
+ if (wb_counts[i]) {
+ snprintf(write_blocks + index, sizeof(write_blocks) - index,
+ "%21" PRId64 " ", wb_counts[i]);
+ } else {
+ snprintf(write_blocks + index, sizeof(write_blocks) - index,
+ "%21s ", "0");
+ }
+ index += 22;
+ if (per_line == 3) {
+ cli_out("%s", output);
+ cli_out("%s", read_blocks);
+ cli_out("%s", write_blocks);
+ cli_out(" ");
+ per_line = 0;
+ snprintf(output, sizeof(output), "%14s", "Block Size:");
+ snprintf(read_blocks, sizeof(read_blocks), "%14s", "No. of Reads:");
+ snprintf(write_blocks, sizeof(write_blocks), "%14s",
+ "No. of Writes:");
+ index = 14;
+ }
+ }
+
+ if (per_line != 0) {
+ cli_out("%s", output);
+ cli_out("%s", read_blocks);
+ cli_out("%s", write_blocks);
+ }
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (profile_info[i].fop_hits == 0)
+ continue;
+ if (is_header_printed == 0) {
+ cli_out("%10s %13s %13s %13s %14s %11s", "%-latency", "Avg-latency",
+ "Min-Latency", "Max-Latency", "No. of calls", "Fop");
+ cli_out("%10s %13s %13s %13s %14s %11s", "---------", "-----------",
+ "-----------", "-----------", "------------", "----");
+ is_header_printed = 1;
+ }
+ if (profile_info[i].fop_hits) {
+ cli_out(
+ "%10.2lf %10.2lf us %10.2lf us %10.2lf us"
+ " %14" PRId64 " %11s",
+ profile_info[i].percentage_avg_latency,
+ profile_info[i].avg_latency, profile_info[i].min_latency,
+ profile_info[i].max_latency, profile_info[i].fop_hits,
+ profile_info[i].fop_name);
+ }
+ }
+
+ for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) {
+ if (upcall_info[i].fop_hits == 0)
+ continue;
+ if (upcall_info[i].fop_hits) {
+ cli_out(
+ "%10.2lf %10.2lf us %10.2lf us %10.2lf us"
+ " %14" PRId64 " %11s",
+ upcall_info[i].percentage_avg_latency,
+ upcall_info[i].avg_latency, upcall_info[i].min_latency,
+ upcall_info[i].max_latency, upcall_info[i].fop_hits,
+ upcall_info[i].fop_name);
+ }
+ }
+
+ cli_out(" ");
+ cli_out("%12s: %" PRId64 " seconds", "Duration", sec);
+ cli_out("%12s: %" PRId64 " bytes", "Data Read", r_count);
+ cli_out("%12s: %" PRId64 " bytes", "Data Written", w_count);
+ cli_out(" ");
}
-int32_t
-gf_cli3_1_sync_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_profile_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- int ret = 0;
- gf_cli_req req = {{0,}};
- dict_t *dict = NULL;
-
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ gf1_cli_stats_op op = GF_CLI_STATS_NONE;
+ char key[64] = {0};
+ int len;
+ int interval = 0;
+ int i = 1;
+ int32_t brick_count = 0;
+ char *volname = NULL;
+ char *brick = NULL;
+ char str[1024] = {
+ 0,
+ };
+ int stats_cleared = 0;
+ gf1_cli_info_op info_op = GF_CLI_INFO_NONE;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_DEBUG, "Received resp to profile");
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_profile(dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(dict, "op", (int32_t *)&op);
+ if (ret)
+ goto out;
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
+
+ if (rsp.op_ret && strcmp(rsp.op_errstr, "")) {
+ cli_err("%s", rsp.op_errstr);
+ } else {
+ switch (op) {
+ case GF_CLI_STATS_START:
+ cli_out("Starting volume profile on %s has been %s ", volname,
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+ break;
+ case GF_CLI_STATS_STOP:
+ cli_out("Stopping volume profile on %s has been %s ", volname,
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+ break;
+ case GF_CLI_STATS_INFO:
+ break;
+ default:
+ cli_out("volume profile on %s has been %s ", volname,
+ (rsp.op_ret) ? "unsuccessful" : "successful");
+ break;
}
+ }
- dict = data;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
-
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to serialize dict");
- goto out;
- }
+ if (rsp.op_ret) {
+ ret = rsp.op_ret;
+ goto out;
+ }
- ret = cli_cmd_submit (&req, frame,
- cli_rpc_prog, GLUSTER_CLI_SYNC_VOLUME,
- NULL, this, gf_cli3_1_sync_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ if (GF_CLI_STATS_INFO != op) {
+ ret = 0;
+ goto out;
+ }
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ ret = dict_get_int32_sizen(dict, "count", &brick_count);
+ if (ret)
+ goto out;
- return ret;
-}
+ if (!brick_count) {
+ cli_err("All bricks of volume %s are down.", volname);
+ goto out;
+ }
-int32_t
-gf_cli3_1_getspec (call_frame_t *frame, xlator_t *this,
- void *data)
-{
- gf_getspec_req req = {0,};
- int ret = 0;
- dict_t *dict = NULL;
+ ret = dict_get_int32_sizen(dict, "info-op", (int32_t *)&info_op);
+ if (ret)
+ goto out;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
+ while (i <= brick_count) {
+ len = snprintf(key, sizeof(key), "%d-brick", i);
+ ret = dict_get_strn(dict, key, len, &brick);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Couldn't get brick name");
+ goto out;
}
- dict = data;
+ ret = dict_get_str_boolean(dict, "nfs", _gf_false);
- ret = dict_get_str (dict, "volid", &req.key);
if (ret)
- goto out;
+ len = snprintf(str, sizeof(str), "NFS Server : %s", brick);
+ else
+ len = snprintf(str, sizeof(str), "Brick: %s", brick);
+ cli_out("%s", str);
+ memset(str, '-', len);
+ cli_out("%s", str);
+
+ if (GF_CLI_INFO_CLEAR == info_op) {
+ len = snprintf(key, sizeof(key), "%d-stats-cleared", i);
+ ret = dict_get_int32n(dict, key, len, &stats_cleared);
+ if (ret)
+ goto out;
+ cli_out(stats_cleared ? "Cleared stats."
+ : "Failed to clear stats.");
+ } else {
+ len = snprintf(key, sizeof(key), "%d-cumulative", i);
+ ret = dict_get_int32n(dict, key, len, &interval);
+ if (ret == 0)
+ cmd_profile_volume_brick_out(dict, i, interval);
- ret = cli_cmd_submit (&req, frame, &cli_handshake_prog,
- GF_HNDSK_GETSPEC, NULL,
- this, gf_cli3_1_getspec_cbk,
- (xdrproc_t) xdr_gf_getspec_req);
+ len = snprintf(key, sizeof(key), "%d-interval", i);
+ ret = dict_get_int32n(dict, key, len, &interval);
+ if (ret == 0)
+ cmd_profile_volume_brick_out(dict, i, interval);
+ }
+ i++;
+ }
+ ret = rsp.op_ret;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
-
- return ret;
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
}
-int32_t
-gf_cli3_1_quota (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_profile_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- dict_t *dict = NULL;
+ int ret = -1;
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ GF_ASSERT(frame);
+ GF_ASSERT(this);
+ GF_ASSERT(data);
- dict = data;
+ dict = data;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to get serialized length of dict");
- goto out;
- }
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_QUOTA, NULL,
- this, gf_cli3_1_quota_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = cli_to_glusterd(&req, frame, gf_cli_profile_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_PROFILE_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- GF_FREE (req.dict.dict_val);
-out:
- return ret;
+ GF_FREE(req.dict.dict_val);
+ return ret;
}
-int32_t
-gf_cli3_1_pmap_b2p (call_frame_t *frame, xlator_t *this, void *data)
+static int32_t
+gf_cli_top_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- pmap_port_by_brick_req req = {0,};
- int ret = 0;
- dict_t *dict = NULL;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ gf1_cli_stats_op op = GF_CLI_STATS_NONE;
+ char key[256] = {0};
+ int keylen;
+ int i = 0;
+ int32_t brick_count = 0;
+ char brick[1024];
+ int32_t members = 0;
+ char *filename;
+ char *bricks;
+ uint64_t value = 0;
+ int32_t j = 0;
+ gf1_cli_top_op top_op = GF_CLI_TOP_NONE;
+ uint64_t nr_open = 0;
+ uint64_t max_nr_open = 0;
+ double throughput = 0;
+ double time = 0;
+ int32_t time_sec = 0;
+ long int time_usec = 0;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char *openfd_str = NULL;
+ gf_boolean_t nfs = _gf_false;
+ gf_boolean_t clear_stats = _gf_false;
+ int stats_cleared = 0;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_DEBUG, "Received resp to top");
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, ""))
+ cli_err("%s", rsp.op_errstr);
+ cli_err("volume top unsuccessful");
+ ret = rsp.op_ret;
+ goto out;
+ }
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ dict = dict_new();
- dict = data;
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
- ret = dict_get_str (dict, "brick", &req.brick);
- if (ret)
- goto out;
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
- ret = cli_cmd_submit (&req, frame, &cli_pmap_prog,
- GF_PMAP_PORTBYBRICK, NULL,
- this, gf_cli3_1_pmap_b2p_cbk,
- (xdrproc_t) xdr_pmap_port_by_brick_req);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ ret = dict_get_int32_sizen(dict, "op", (int32_t *)&op);
- return ret;
-}
+ if (op != GF_CLI_STATS_TOP) {
+ ret = 0;
+ goto out;
+ }
-static int
-gf_cli3_1_fsm_log_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf1_cli_fsm_log_rsp rsp = {0,};
- int ret = -1;
- dict_t *dict = NULL;
- int tr_count = 0;
- char key[256] = {0};
- int i = 0;
- char *old_state = NULL;
- char *new_state = NULL;
- char *event = NULL;
- char *time = NULL;
-
- if (-1 == req->rpc_status) {
- goto out;
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_top(dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
}
+ goto out;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_fsm_log_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ ret = dict_get_int32_sizen(dict, "count", &brick_count);
+ if (ret)
+ goto out;
+ keylen = snprintf(key, sizeof(key), "%d-top-op", 1);
+ ret = dict_get_int32n(dict, key, keylen, (int32_t *)&top_op);
+ if (ret)
+ goto out;
- if (rsp.op_ret) {
- if (strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
- cli_err ("fsm log unsuccessful");
- ret = rsp.op_ret;
- goto out;
- }
+ clear_stats = dict_get_str_boolean(dict, "clear-stats", _gf_false);
- dict = dict_new ();
- if (!dict) {
- ret = -1;
- goto out;
- }
+ while (i < brick_count) {
+ i++;
+ keylen = snprintf(brick, sizeof(brick), "%d-brick", i);
+ ret = dict_get_strn(dict, brick, keylen, &bricks);
+ if (ret)
+ goto out;
- ret = dict_unserialize (rsp.fsm_log.fsm_log_val,
- rsp.fsm_log.fsm_log_len,
- &dict);
+ nfs = dict_get_str_boolean(dict, "nfs", _gf_false);
- if (ret) {
- cli_err ("bad response");
+ if (clear_stats) {
+ keylen = snprintf(key, sizeof(key), "%d-stats-cleared", i);
+ ret = dict_get_int32n(dict, key, keylen, &stats_cleared);
+ if (ret)
goto out;
+ cli_out(stats_cleared ? "Cleared stats for %s %s"
+ : "Failed to clear stats for %s %s",
+ nfs ? "NFS server on" : "brick", bricks);
+ continue;
}
- ret = dict_get_int32 (dict, "count", &tr_count);
- if (tr_count)
- cli_out("number of transitions: %d", tr_count);
+ if (nfs)
+ cli_out("NFS Server : %s", bricks);
else
- cli_out("No transitions");
- for (i = 0; i < tr_count; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "log%d-old-state", i);
- ret = dict_get_str (dict, key, &old_state);
- if (ret)
- goto out;
+ cli_out("Brick: %s", bricks);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "log%d-event", i);
- ret = dict_get_str (dict, key, &event);
- if (ret)
- goto out;
+ keylen = snprintf(key, sizeof(key), "%d-members", i);
+ ret = dict_get_int32n(dict, key, keylen, &members);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "log%d-new-state", i);
- ret = dict_get_str (dict, key, &new_state);
+ switch (top_op) {
+ case GF_CLI_TOP_OPEN:
+ snprintf(key, sizeof(key), "%d-current-open", i);
+ ret = dict_get_uint64(dict, key, &nr_open);
if (ret)
- goto out;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "log%d-time", i);
- ret = dict_get_str (dict, key, &time);
+ break;
+ snprintf(key, sizeof(key), "%d-max-open", i);
+ ret = dict_get_uint64(dict, key, &max_nr_open);
+ if (ret)
+ goto out;
+ keylen = snprintf(key, sizeof(key), "%d-max-openfd-time", i);
+ ret = dict_get_strn(dict, key, keylen, &openfd_str);
if (ret)
- goto out;
- cli_out ("Old State: [%s]\n"
- "New State: [%s]\n"
- "Event : [%s]\n"
- "timestamp: [%s]\n", old_state, new_state, event, time);
+ goto out;
+ cli_out("Current open fds: %" PRIu64 ", Max open fds: %" PRIu64
+ ", Max openfd time: %s",
+ nr_open, max_nr_open, openfd_str);
+ case GF_CLI_TOP_READ:
+ case GF_CLI_TOP_WRITE:
+ case GF_CLI_TOP_OPENDIR:
+ case GF_CLI_TOP_READDIR:
+ if (!members) {
+ continue;
+ }
+ cli_out("Count\t\tfilename\n=======================");
+ break;
+ case GF_CLI_TOP_READ_PERF:
+ case GF_CLI_TOP_WRITE_PERF:
+ snprintf(key, sizeof(key), "%d-throughput", i);
+ ret = dict_get_double(dict, key, &throughput);
+ if (!ret) {
+ snprintf(key, sizeof(key), "%d-time", i);
+ ret = dict_get_double(dict, key, &time);
+ }
+ if (!ret)
+ cli_out("Throughput %.2f MBps time %.4f secs", throughput,
+ time / 1e6);
+
+ if (!members) {
+ continue;
+ }
+ cli_out("%*s %-*s %-*s", VOL_TOP_PERF_SPEED_WIDTH, "MBps",
+ VOL_TOP_PERF_FILENAME_DEF_WIDTH, "Filename",
+ VOL_TOP_PERF_TIME_WIDTH, "Time");
+ cli_out("%*s %-*s %-*s", VOL_TOP_PERF_SPEED_WIDTH,
+ "====", VOL_TOP_PERF_FILENAME_DEF_WIDTH,
+ "========", VOL_TOP_PERF_TIME_WIDTH, "====");
+ break;
+ default:
+ goto out;
}
- ret = rsp.op_ret;
+ for (j = 1; j <= members; j++) {
+ keylen = snprintf(key, sizeof(key), "%d-filename-%d", i, j);
+ ret = dict_get_strn(dict, key, keylen, &filename);
+ if (ret)
+ break;
+ snprintf(key, sizeof(key), "%d-value-%d", i, j);
+ ret = dict_get_uint64(dict, key, &value);
+ if (ret)
+ goto out;
+ if (top_op == GF_CLI_TOP_READ_PERF ||
+ top_op == GF_CLI_TOP_WRITE_PERF) {
+ keylen = snprintf(key, sizeof(key), "%d-time-sec-%d", i, j);
+ ret = dict_get_int32n(dict, key, keylen, (int32_t *)&time_sec);
+ if (ret)
+ goto out;
+ keylen = snprintf(key, sizeof(key), "%d-time-usec-%d", i, j);
+ ret = dict_get_int32n(dict, key, keylen, (int32_t *)&time_usec);
+ if (ret)
+ goto out;
+ gf_time_fmt(timestr, sizeof timestr, time_sec, gf_timefmt_FT);
+ snprintf(timestr + strlen(timestr),
+ sizeof timestr - strlen(timestr), ".%ld", time_usec);
+ if (strlen(filename) < VOL_TOP_PERF_FILENAME_DEF_WIDTH)
+ cli_out("%*" PRIu64 " %-*s %-*s", VOL_TOP_PERF_SPEED_WIDTH,
+ value, VOL_TOP_PERF_FILENAME_DEF_WIDTH, filename,
+ VOL_TOP_PERF_TIME_WIDTH, timestr);
+ else
+ cli_out("%*" PRIu64 " ...%-*s %-*s",
+ VOL_TOP_PERF_SPEED_WIDTH, value,
+ VOL_TOP_PERF_FILENAME_ALT_WIDTH,
+ filename + strlen(filename) -
+ VOL_TOP_PERF_FILENAME_ALT_WIDTH,
+ VOL_TOP_PERF_TIME_WIDTH, timestr);
+ } else {
+ cli_out("%" PRIu64 "\t\t%s", value, filename);
+ }
+ }
+ }
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ cli_cmd_broadcast_response(ret);
+
+ if (dict)
+ dict_unref(dict);
+
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
}
-int32_t
-gf_cli3_1_fsm_log (call_frame_t *frame, xlator_t *this, void *data)
+static int32_t
+gf_cli_top_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- int ret = -1;
- gf1_cli_fsm_log_req req = {0,};
+ int ret = -1;
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+
+ GF_ASSERT(frame);
+ GF_ASSERT(this);
+ GF_ASSERT(data);
+
+ dict = data;
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_top_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_PROFILE_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
- GF_ASSERT (frame);
- GF_ASSERT (this);
- GF_ASSERT (data);
+static int
+gf_cli_getwd_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf1_cli_getwd_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf1_cli_getwd_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ cli_err("getwd failed");
+ ret = rsp.op_ret;
+ goto out;
+ }
- if (!frame || !this || !data)
- goto out;
- req.name = data;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_FSM_LOG, NULL,
- this, gf_cli3_1_fsm_log_cbk,
- (xdrproc_t) xdr_gf1_cli_fsm_log_req);
+ gf_log("cli", GF_LOG_INFO, "Received resp to getwd");
+
+ cli_out("%s", rsp.wd);
+
+ ret = 0;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ cli_cmd_broadcast_response(ret);
+ if (rsp.wd) {
+ free(rsp.wd);
+ }
- return ret;
+ return ret;
}
-int
-gf_cli3_1_gsync_config_command (dict_t *dict)
+static int32_t
+gf_cli_getwd(call_frame_t *frame, xlator_t *this, void *data)
{
- runner_t runner = {0,};
- char *subop = NULL;
- char *gwd = NULL;
- char *slave = NULL;
- char *master = NULL;
- char *op_name = NULL;
-
- if (dict_get_str (dict, "subop", &subop) != 0)
- return -1;
+ int ret = -1;
+ gf1_cli_getwd_req req = {
+ 0,
+ };
- if (strcmp (subop, "get") != 0 && strcmp (subop, "get-all") != 0) {
- cli_out (GEOREP" config updated successfully");
- return 0;
- }
+ GF_ASSERT(frame);
+ GF_ASSERT(this);
- if (dict_get_str (dict, "glusterd_workdir", &gwd) != 0 ||
- dict_get_str (dict, "slave", &slave) != 0)
- return -1;
+ if (!frame || !this)
+ goto out;
- if (dict_get_str (dict, "master", &master) != 0)
- master = NULL;
- if (dict_get_str (dict, "op_name", &op_name) != 0)
- op_name = NULL;
+ ret = cli_cmd_submit(NULL, &req, frame, cli_rpc_prog, GLUSTER_CLI_GETWD,
+ NULL, this, gf_cli_getwd_cbk,
+ (xdrproc_t)xdr_gf1_cli_getwd_req);
- runinit (&runner);
- runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
- runner_argprintf (&runner, "%s/"GSYNC_CONF, gwd);
- if (master)
- runner_argprintf (&runner, ":%s", master);
- runner_add_arg (&runner, slave);
- runner_argprintf (&runner, "--config-%s", subop);
- if (op_name)
- runner_add_arg (&runner, op_name);
+out:
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- return runner_run (&runner);
+ return ret;
}
-int
-gf_cli3_1_gsync_out_status (dict_t *dict)
+static void
+cli_print_volume_status_mempool(dict_t *dict, char *prefix)
{
- int gsync_count = 0;
- int i = 0;
- int ret = 0;
- char mst[PATH_MAX] = {0, };
- char slv[PATH_MAX]= {0, };
- char sts[PATH_MAX] = {0, };
- char hyphens[81] = {0, };
- char *mst_val = NULL;
- char *slv_val = NULL;
- char *sts_val = NULL;
+ int ret = -1;
+ int32_t mempool_count = 0;
+ char *name = NULL;
+ int32_t hotcount = 0;
+ int32_t coldcount = 0;
+ uint64_t paddedsizeof = 0;
+ uint64_t alloccount = 0;
+ int32_t maxalloc = 0;
+ uint64_t pool_misses = 0;
+ int32_t maxstdalloc = 0;
+ char key[128] = {
+ /* prefix is really small 'brick%d' really */
+ 0,
+ };
+ int keylen;
+ int i = 0;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(prefix);
+
+ keylen = snprintf(key, sizeof(key), "%s.mempool-count", prefix);
+ ret = dict_get_int32n(dict, key, keylen, &mempool_count);
+ if (ret)
+ goto out;
+
+ cli_out("Mempool Stats\n-------------");
+ cli_out("%-30s %9s %9s %12s %10s %8s %8s %12s", "Name", "HotCount",
+ "ColdCount", "PaddedSizeof", "AllocCount", "MaxAlloc", "Misses",
+ "Max-StdAlloc");
+ cli_out("%-30s %9s %9s %12s %10s %8s %8s %12s", "----", "--------",
+ "---------", "------------", "----------", "--------", "--------",
+ "------------");
+
+ for (i = 0; i < mempool_count; i++) {
+ keylen = snprintf(key, sizeof(key), "%s.pool%d.name", prefix, i);
+ ret = dict_get_strn(dict, key, keylen, &name);
+ if (ret)
+ goto out;
- cli_out ("%-20s %-50s %-10s", "MASTER", "SLAVE", "STATUS");
+ keylen = snprintf(key, sizeof(key), "%s.pool%d.hotcount", prefix, i);
+ ret = dict_get_int32n(dict, key, keylen, &hotcount);
+ if (ret)
+ goto out;
- for (i=0; i<sizeof(hyphens)-1; i++)
- hyphens[i] = '-';
+ keylen = snprintf(key, sizeof(key), "%s.pool%d.coldcount", prefix, i);
+ ret = dict_get_int32n(dict, key, keylen, &coldcount);
+ if (ret)
+ goto out;
- cli_out ("%s", hyphens);
+ snprintf(key, sizeof(key), "%s.pool%d.paddedsizeof", prefix, i);
+ ret = dict_get_uint64(dict, key, &paddedsizeof);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof(key), "%s.pool%d.alloccount", prefix, i);
+ ret = dict_get_uint64(dict, key, &alloccount);
+ if (ret)
+ goto out;
- ret = dict_get_int32 (dict, "gsync-count", &gsync_count);
- if (ret) {
- gf_log ("cli", GF_LOG_INFO, "No active geo-replication sessions"
- "present for the selected");
- ret = 0;
- goto out;
- }
+ keylen = snprintf(key, sizeof(key), "%s.pool%d.max_alloc", prefix, i);
+ ret = dict_get_int32n(dict, key, keylen, &maxalloc);
+ if (ret)
+ goto out;
- for (i = 1; i <= gsync_count; i++) {
- snprintf (mst, sizeof(mst), "master%d", i);
- snprintf (slv, sizeof(slv), "slave%d", i);
- snprintf (sts, sizeof(sts), "status%d", i);
+ keylen = snprintf(key, sizeof(key), "%s.pool%d.max-stdalloc", prefix,
+ i);
+ ret = dict_get_int32n(dict, key, keylen, &maxstdalloc);
+ if (ret)
+ goto out;
- ret = dict_get_str (dict, mst, &mst_val);
- if (ret)
- goto out;
+ snprintf(key, sizeof(key), "%s.pool%d.pool-misses", prefix, i);
+ ret = dict_get_uint64(dict, key, &pool_misses);
+ if (ret)
+ goto out;
- ret = dict_get_str (dict, slv, &slv_val);
- if (ret)
- goto out;
+ cli_out("%-30s %9d %9d %12" PRIu64 " %10" PRIu64 " %8d %8" PRIu64
+ " %12d",
+ name, hotcount, coldcount, paddedsizeof, alloccount, maxalloc,
+ pool_misses, maxstdalloc);
+ }
- ret = dict_get_str (dict, sts, &sts_val);
- if (ret)
- goto out;
+out:
+ return;
+}
- cli_out ("%-20s %-50s %-10s", mst_val,
- slv_val, sts_val);
+static void
+cli_print_volume_status_mem(dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ char key[64] = {
+ 0,
+ };
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ int val = 0;
+ int i = 0;
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out("Memory status for volume : %s", volname);
+
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ cli_out("----------------------------------------------");
+
+ snprintf(key, sizeof(key), "brick%d.hostname", i);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret)
+ continue;
+ snprintf(key, sizeof(key), "brick%d.path", i);
+ ret = dict_get_str(dict, key, &path);
+ if (ret)
+ continue;
+ if (notbrick)
+ cli_out("%s : %s", hostname, path);
+ else
+ cli_out("Brick : %s:%s", hostname, path);
+ snprintf(key, sizeof(key), "brick%d.status", i);
+ ret = dict_get_int32(dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out("%s is offline", hostname);
+ else
+ cli_out("Brick is offline");
+ continue;
}
- out:
- return ret;
+ cli_out("Mallinfo\n--------");
-}
+ snprintf(key, sizeof(key), "brick%d.mallinfo.arena", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Arena", val);
-int
-gf_cli3_1_gsync_set_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- int ret = -1;
- gf_cli_rsp rsp = {0, };
- dict_t *dict = NULL;
- char *gsync_status = NULL;
- char *master = NULL;
- char *slave = NULL;
- int32_t type = 0;
-
- if (req->rpc_status == -1) {
- ret = -1;
- goto out;
- }
+ snprintf(key, sizeof(key), "brick%d.mallinfo.ordblks", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Ordblks", val);
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR,
- "Unable to get response structure");
- goto out;
- }
+ snprintf(key, sizeof(key), "brick%d.mallinfo.smblks", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Smblks", val);
- dict = dict_new ();
+ snprintf(key, sizeof(key), "brick%d.mallinfo.hblks", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Hblks", val);
- if (!dict) {
- ret = -1;
- goto out;
- }
+ snprintf(key, sizeof(key), "brick%d.mallinfo.hblkhd", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Hblkhd", val);
- ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ snprintf(key, sizeof(key), "brick%d.mallinfo.usmblks", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Usmblks", val);
+ snprintf(key, sizeof(key), "brick%d.mallinfo.fsmblks", i);
+ ret = dict_get_int32(dict, key, &val);
if (ret)
- goto out;
+ goto out;
+ cli_out("%-8s : %d", "Fsmblks", val);
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_dict ("volGeoRep", dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ snprintf(key, sizeof(key), "brick%d.mallinfo.uordblks", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Uordblks", val);
- if (rsp.op_ret) {
- cli_err ("%s", rsp.op_errstr ? rsp.op_errstr :
- GEOREP" command unsuccessful");
- ret = rsp.op_ret;
- goto out;
- }
+ snprintf(key, sizeof(key), "brick%d.mallinfo.fordblks", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Fordblks", val);
- ret = dict_get_str (dict, "gsync-status", &gsync_status);
- if (!ret)
- cli_out ("%s", gsync_status);
- else
- ret = 0;
+ snprintf(key, sizeof(key), "brick%d.mallinfo.keepcost", i);
+ ret = dict_get_int32(dict, key, &val);
+ if (ret)
+ goto out;
+ cli_out("%-8s : %d", "Keepcost", val);
- ret = dict_get_int32 (dict, "type", &type);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to get type");
- goto out;
+ cli_out(" ");
+ snprintf(key, sizeof(key), "brick%d", i);
+ cli_print_volume_status_mempool(dict, key);
+ }
+out:
+ cli_out("----------------------------------------------\n");
+ return;
+}
+
+static void
+cli_print_volume_status_client_list(dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int client_count = 0;
+ int current_count = 0;
+ char key[64] = {
+ 0,
+ };
+ int i = 0;
+ int total = 0;
+ char *name = NULL;
+ gf_boolean_t is_fuse_done = _gf_false;
+ gf_boolean_t is_gfapi_done = _gf_false;
+ gf_boolean_t is_rebalance_done = _gf_false;
+ gf_boolean_t is_glustershd_done = _gf_false;
+ gf_boolean_t is_quotad_done = _gf_false;
+ gf_boolean_t is_snapd_done = _gf_false;
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out("Client connections for volume %s", volname);
+
+ ret = dict_get_int32_sizen(dict, "client-count", &client_count);
+ if (ret)
+ goto out;
+
+ cli_out("%-48s %15s", "Name", "count");
+ cli_out("%-48s %15s", "-----", "------");
+ for (i = 0; i < client_count; i++) {
+ name = NULL;
+ snprintf(key, sizeof(key), "client%d.name", i);
+ ret = dict_get_str(dict, key, &name);
+
+ if (!strncmp(name, "fuse", 4)) {
+ if (!is_fuse_done) {
+ is_fuse_done = _gf_true;
+ ret = dict_get_int32_sizen(dict, "fuse-count", &current_count);
+ if (ret)
+ goto out;
+ total = total + current_count;
+ goto print;
+ }
+ continue;
+ } else if (!strncmp(name, "gfapi", 5)) {
+ if (!is_gfapi_done) {
+ is_gfapi_done = _gf_true;
+ ret = dict_get_int32_sizen(dict, "gfapi-count", &current_count);
+ if (ret)
+ goto out;
+ total = total + current_count;
+ goto print;
+ }
+ continue;
+ } else if (!strcmp(name, "rebalance")) {
+ if (!is_rebalance_done) {
+ is_rebalance_done = _gf_true;
+ ret = dict_get_int32_sizen(dict, "rebalance-count",
+ &current_count);
+ if (ret)
+ goto out;
+ total = total + current_count;
+ goto print;
+ }
+ continue;
+ } else if (!strcmp(name, "glustershd")) {
+ if (!is_glustershd_done) {
+ is_glustershd_done = _gf_true;
+ ret = dict_get_int32_sizen(dict, "glustershd-count",
+ &current_count);
+ if (ret)
+ goto out;
+ total = total + current_count;
+ goto print;
+ }
+ continue;
+ } else if (!strcmp(name, "quotad")) {
+ if (!is_quotad_done) {
+ is_quotad_done = _gf_true;
+ ret = dict_get_int32_sizen(dict, "quotad-count",
+ &current_count);
+ if (ret)
+ goto out;
+ total = total + current_count;
+ goto print;
+ }
+ continue;
+ } else if (!strcmp(name, "snapd")) {
+ if (!is_snapd_done) {
+ is_snapd_done = _gf_true;
+ ret = dict_get_int32_sizen(dict, "snapd-count", &current_count);
+ if (ret)
+ goto out;
+ total = total + current_count;
+ goto print;
+ }
+ continue;
}
- switch (type) {
- case GF_GSYNC_OPTION_TYPE_START:
- case GF_GSYNC_OPTION_TYPE_STOP:
- if (dict_get_str (dict, "master", &master) != 0)
- master = "???";
- if (dict_get_str (dict, "slave", &slave) != 0)
- slave = "???";
+ print:
+ cli_out("%-48s %15d", name, current_count);
+ }
+out:
+ cli_out("\ntotal clients for volume %s : %d ", volname, total);
+ cli_out(
+ "-----------------------------------------------------------------\n");
+ return;
+}
- cli_out ("%s " GEOREP " session between %s & %s"
- " has been successful",
- type == GF_GSYNC_OPTION_TYPE_START ?
- "Starting" : "Stopping",
- master, slave);
- break;
+static void
+cli_print_volume_status_clients(dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ int client_count = 0;
+ char *clientname = NULL;
+ uint64_t bytesread = 0;
+ uint64_t byteswrite = 0;
+ uint32_t opversion = 0;
+ char key[128] = {
+ 0,
+ };
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out("Client connections for volume %s", volname);
+
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ hostname = NULL;
+ path = NULL;
+ online = -1;
+ client_count = 0;
+ clientname = NULL;
+ bytesread = 0;
+ byteswrite = 0;
+
+ cli_out("----------------------------------------------");
+
+ snprintf(key, sizeof(key), "brick%d.hostname", i);
+ ret = dict_get_str(dict, key, &hostname);
+
+ snprintf(key, sizeof(key), "brick%d.path", i);
+ ret = dict_get_str(dict, key, &path);
+
+ if (hostname && path) {
+ if (notbrick)
+ cli_out("%s : %s", hostname, path);
+ else
+ cli_out("Brick : %s:%s", hostname, path);
+ }
+ snprintf(key, sizeof(key), "brick%d.status", i);
+ ret = dict_get_int32(dict, key, &online);
+ if (!online) {
+ if (notbrick)
+ cli_out("%s is offline", hostname);
+ else
+ cli_out("Brick is offline");
+ continue;
+ }
+
+ snprintf(key, sizeof(key), "brick%d.clientcount", i);
+ ret = dict_get_int32(dict, key, &client_count);
+
+ if (hostname && path)
+ cli_out("Clients connected : %d", client_count);
+ if (client_count == 0)
+ continue;
+
+ cli_out("%-48s %15s %15s %15s", "Hostname", "BytesRead", "BytesWritten",
+ "OpVersion");
+ cli_out("%-48s %15s %15s %15s", "--------", "---------", "------------",
+ "---------");
+ for (j = 0; j < client_count; j++) {
+ snprintf(key, sizeof(key), "brick%d.client%d.hostname", i, j);
+ ret = dict_get_str(dict, key, &clientname);
+
+ snprintf(key, sizeof(key), "brick%d.client%d.bytesread", i, j);
+ ret = dict_get_uint64(dict, key, &bytesread);
+
+ snprintf(key, sizeof(key), "brick%d.client%d.byteswrite", i, j);
+ ret = dict_get_uint64(dict, key, &byteswrite);
+
+ snprintf(key, sizeof(key), "brick%d.client%d.opversion", i, j);
+ ret = dict_get_uint32(dict, key, &opversion);
+
+ cli_out("%-48s %15" PRIu64 " %15" PRIu64 " %15" PRIu32, clientname,
+ bytesread, byteswrite, opversion);
+ }
+ }
+out:
+ cli_out("----------------------------------------------\n");
+ return;
+}
- case GF_GSYNC_OPTION_TYPE_CONFIG:
- ret = gf_cli3_1_gsync_config_command (dict);
- break;
+#ifdef DEBUG /* this function is only used in debug */
+static void
+cli_print_volume_status_inode_entry(dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {
+ 0,
+ };
+ char *gfid = NULL;
+ uint64_t nlookup = 0;
+ uint32_t ref = 0;
+ int ia_type = 0;
+ char inode_type;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(prefix);
+
+ snprintf(key, sizeof(key), "%s.gfid", prefix);
+ ret = dict_get_str(dict, key, &gfid);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.nlookup", prefix);
+ ret = dict_get_uint64(dict, key, &nlookup);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.ref", prefix);
+ ret = dict_get_uint32(dict, key, &ref);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.ia_type", prefix);
+ ret = dict_get_int32(dict, key, &ia_type);
+ if (ret)
+ goto out;
+
+ switch (ia_type) {
+ case IA_IFREG:
+ inode_type = 'R';
+ break;
+ case IA_IFDIR:
+ inode_type = 'D';
+ break;
+ case IA_IFLNK:
+ inode_type = 'L';
+ break;
+ case IA_IFBLK:
+ inode_type = 'B';
+ break;
+ case IA_IFCHR:
+ inode_type = 'C';
+ break;
+ case IA_IFIFO:
+ inode_type = 'F';
+ break;
+ case IA_IFSOCK:
+ inode_type = 'S';
+ break;
+ default:
+ inode_type = 'I';
+ break;
+ }
- case GF_GSYNC_OPTION_TYPE_STATUS:
- ret = gf_cli3_1_gsync_out_status (dict);
- goto out;
- default:
- cli_out (GEOREP" command executed successfully");
- }
+ cli_out("%-40s %14" PRIu64 " %14" PRIu32 " %9c", gfid, nlookup, ref,
+ inode_type);
out:
+ return;
+}
+#endif
- cli_cmd_broadcast_response (ret);
+static void
+cli_print_volume_status_itables(dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {
+ 0,
+ };
+ uint32_t active_size = 0;
+ uint32_t lru_size = 0;
+ uint32_t purge_size = 0;
+ uint32_t lru_limit = 0;
+#ifdef DEBUG
+ int i = 0;
+#endif
+ GF_ASSERT(dict);
+ GF_ASSERT(prefix);
+
+ snprintf(key, sizeof(key), "%s.lru_limit", prefix);
+ ret = dict_get_uint32(dict, key, &lru_limit);
+ if (ret)
+ goto out;
+ cli_out("LRU limit : %u", lru_limit);
+
+ snprintf(key, sizeof(key), "%s.active_size", prefix);
+ ret = dict_get_uint32(dict, key, &active_size);
+ if (ret)
+ goto out;
+
+#ifdef DEBUG
+ if (active_size != 0) {
+ cli_out("Active inodes:");
+ cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type");
+ cli_out("%-40s %14s %14s %9s", "----", "-------", "---", "-------");
+ }
+ for (i = 0; i < active_size; i++) {
+ snprintf(key, sizeof(key), "%s.active%d", prefix, i);
+ cli_print_volume_status_inode_entry(dict, key);
+ }
+ cli_out(" ");
+#else
+ cli_out("Active Inodes : %u", active_size);
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
+#endif
- return ret;
+ snprintf(key, sizeof(key), "%s.lru_size", prefix);
+ ret = dict_get_uint32(dict, key, &lru_size);
+ if (ret)
+ goto out;
+
+#ifdef DEBUG
+ if (lru_size != 0) {
+ cli_out("LRU inodes:");
+ cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type");
+ cli_out("%-40s %14s %14s %9s", "----", "-------", "---", "-------");
+ }
+ for (i = 0; i < lru_size; i++) {
+ snprintf(key, sizeof(key), "%s.lru%d", prefix, i);
+ cli_print_volume_status_inode_entry(dict, key);
+ }
+ cli_out(" ");
+#else
+ cli_out("LRU Inodes : %u", lru_size);
+#endif
+
+ snprintf(key, sizeof(key), "%s.purge_size", prefix);
+ ret = dict_get_uint32(dict, key, &purge_size);
+ if (ret)
+ goto out;
+#ifdef DEBUG
+ if (purge_size != 0) {
+ cli_out("Purged inodes:");
+ cli_out("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref", "IA type");
+ cli_out("%-40s %14s %14s %9s", "----", "-------", "---", "-------");
+ }
+ for (i = 0; i < purge_size; i++) {
+ snprintf(key, sizeof(key), "%s.purge%d", prefix, i);
+ cli_print_volume_status_inode_entry(dict, key);
+ }
+#else
+ cli_out("Purge Inodes : %u", purge_size);
+#endif
+
+out:
+ return;
}
-int32_t
-gf_cli3_1_gsync_set (call_frame_t *frame, xlator_t *this,
- void *data)
+static void
+cli_print_volume_status_inode(dict_t *dict, gf_boolean_t notbrick)
{
- int ret = 0;
- dict_t *dict = NULL;
- gf_cli_req req = {{0,}};
+ int ret = -1;
+ char *volname = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ int conn_count = 0;
+ char key[64] = {
+ 0,
+ };
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out("Inode tables for volume %s", volname);
+
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ cli_out("----------------------------------------------");
+
+ snprintf(key, sizeof(key), "brick%d.hostname", i);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof(key), "brick%d.path", i);
+ ret = dict_get_str(dict, key, &path);
+ if (ret)
+ goto out;
+ if (notbrick)
+ cli_out("%s : %s", hostname, path);
+ else
+ cli_out("Brick : %s:%s", hostname, path);
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
+ snprintf(key, sizeof(key), "brick%d.status", i);
+ ret = dict_get_int32(dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out("%s is offline", hostname);
+ else
+ cli_out("Brick is offline");
+ continue;
}
- dict = data;
+ snprintf(key, sizeof(key), "brick%d.conncount", i);
+ ret = dict_get_int32(dict, key, &conn_count);
+ if (ret)
+ goto out;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *) &req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+ for (j = 0; j < conn_count; j++) {
+ if (conn_count > 1)
+ cli_out("Connection %d:", j + 1);
- goto out;
+ snprintf(key, sizeof(key), "brick%d.conn%d.itable", i, j);
+ cli_print_volume_status_itables(dict, key);
+ cli_out(" ");
}
+ }
+out:
+ cli_out("----------------------------------------------");
+ return;
+}
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_GSYNC_SET, NULL,
- this, gf_cli3_1_gsync_set_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+void
+cli_print_volume_status_fdtable(dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[256] = {
+ 0,
+ };
+ int refcount = 0;
+ uint32_t maxfds = 0;
+ int firstfree = 0;
+ int openfds = 0;
+ int fd_pid = 0;
+ int fd_refcount = 0;
+ int fd_flags = 0;
+ int i = 0;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(prefix);
+
+ snprintf(key, sizeof(key), "%s.refcount", prefix);
+ ret = dict_get_int32(dict, key, &refcount);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.maxfds", prefix);
+ ret = dict_get_uint32(dict, key, &maxfds);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.firstfree", prefix);
+ ret = dict_get_int32(dict, key, &firstfree);
+ if (ret)
+ goto out;
+
+ cli_out("RefCount = %d MaxFDs = %d FirstFree = %d", refcount, maxfds,
+ firstfree);
+
+ snprintf(key, sizeof(key), "%s.openfds", prefix);
+ ret = dict_get_int32(dict, key, &openfds);
+ if (ret)
+ goto out;
+ if (0 == openfds) {
+ cli_err("No open fds");
+ goto out;
+ }
+
+ cli_out("%-19s %-19s %-19s %-19s", "FD Entry", "PID", "RefCount", "Flags");
+ cli_out("%-19s %-19s %-19s %-19s", "--------", "---", "--------", "-----");
+
+ for (i = 0; i < maxfds; i++) {
+ snprintf(key, sizeof(key), "%s.fdentry%d.pid", prefix, i);
+ ret = dict_get_int32(dict, key, &fd_pid);
+ if (ret)
+ continue;
-out:
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ snprintf(key, sizeof(key), "%s.fdentry%d.refcount", prefix, i);
+ ret = dict_get_int32(dict, key, &fd_refcount);
+ if (ret)
+ continue;
- return ret;
-}
+ snprintf(key, sizeof(key), "%s.fdentry%d.flags", prefix, i);
+ ret = dict_get_int32(dict, key, &fd_flags);
+ if (ret)
+ continue;
+ cli_out("%-19d %-19d %-19d %-19d", i, fd_pid, fd_refcount, fd_flags);
+ }
-int
-cli_profile_info_percentage_cmp (void *a, void *b)
+out:
+ return;
+}
+
+static void
+cli_print_volume_status_fd(dict_t *dict, gf_boolean_t notbrick)
{
- cli_profile_info_t *ia = NULL;
- cli_profile_info_t *ib = NULL;
- int ret = 0;
+ int ret = -1;
+ char *volname = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ int conn_count = 0;
+ char key[64] = {
+ 0,
+ };
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out("FD tables for volume %s", volname);
+
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ cli_out("----------------------------------------------");
+
+ snprintf(key, sizeof(key), "brick%d.hostname", i);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof(key), "brick%d.path", i);
+ ret = dict_get_str(dict, key, &path);
+ if (ret)
+ goto out;
- ia = a;
- ib = b;
- if (ia->percentage_avg_latency < ib->percentage_avg_latency)
- ret = -1;
- else if (ia->percentage_avg_latency > ib->percentage_avg_latency)
- ret = 1;
+ if (notbrick)
+ cli_out("%s : %s", hostname, path);
else
- ret = 0;
- return ret;
-}
-
+ cli_out("Brick : %s:%s", hostname, path);
-void
-cmd_profile_volume_brick_out (dict_t *dict, int count, int interval)
-{
- char key[256] = {0};
- int i = 0;
- uint64_t sec = 0;
- uint64_t r_count = 0;
- uint64_t w_count = 0;
- uint64_t rb_counts[32] = {0};
- uint64_t wb_counts[32] = {0};
- cli_profile_info_t profile_info[GF_FOP_MAXVALUE] = {{0}};
- char output[128] = {0};
- int per_line = 0;
- char read_blocks[128] = {0};
- char write_blocks[128] = {0};
- int index = 0;
- int is_header_printed = 0;
- int ret = 0;
- double total_percentage_latency = 0;
-
- for (i = 0; i < 32; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-read-%d", count,
- interval, (1 << i));
- ret = dict_get_uint64 (dict, key, &rb_counts[i]);
- }
-
- for (i = 0; i < 32; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-write-%d", count, interval,
- (1<<i));
- ret = dict_get_uint64 (dict, key, &wb_counts[i]);
+ snprintf(key, sizeof(key), "brick%d.status", i);
+ ret = dict_get_int32(dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out("%s is offline", hostname);
+ else
+ cli_out("Brick is offline");
+ continue;
}
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-%d-hits", count,
- interval, i);
- ret = dict_get_uint64 (dict, key, &profile_info[i].fop_hits);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-%d-avglatency", count,
- interval, i);
- ret = dict_get_double (dict, key, &profile_info[i].avg_latency);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-%d-minlatency", count,
- interval, i);
- ret = dict_get_double (dict, key, &profile_info[i].min_latency);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-%d-maxlatency", count,
- interval, i);
- ret = dict_get_double (dict, key, &profile_info[i].max_latency);
- profile_info[i].fop_name = gf_fop_list[i];
-
- total_percentage_latency +=
- (profile_info[i].fop_hits * profile_info[i].avg_latency);
- }
- if (total_percentage_latency) {
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- profile_info[i].percentage_avg_latency = 100 * (
- (profile_info[i].avg_latency* profile_info[i].fop_hits) /
- total_percentage_latency);
- }
- gf_array_insertionsort (profile_info, 1, GF_FOP_MAXVALUE - 1,
- sizeof (cli_profile_info_t),
- cli_profile_info_percentage_cmp);
+ snprintf(key, sizeof(key), "brick%d.conncount", i);
+ ret = dict_get_int32(dict, key, &conn_count);
+ if (ret)
+ goto out;
+
+ for (j = 0; j < conn_count; j++) {
+ cli_out("Connection %d:", j + 1);
+
+ snprintf(key, sizeof(key), "brick%d.conn%d.fdtable", i, j);
+ cli_print_volume_status_fdtable(dict, key);
+ cli_out(" ");
}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-duration", count, interval);
- ret = dict_get_uint64 (dict, key, &sec);
+ }
+out:
+ cli_out("----------------------------------------------");
+ return;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-total-read", count, interval);
- ret = dict_get_uint64 (dict, key, &r_count);
+static void
+cli_print_volume_status_call_frame(dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[1024] = {
+ 0,
+ };
+ int ref_count = 0;
+ char *translator = 0;
+ int complete = 0;
+ char *parent = NULL;
+ char *wind_from = NULL;
+ char *wind_to = NULL;
+ char *unwind_from = NULL;
+ char *unwind_to = NULL;
+
+ if (!dict || !prefix)
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-total-write", count, interval);
- ret = dict_get_uint64 (dict, key, &w_count);
+ snprintf(key, sizeof(key), "%s.refcount", prefix);
+ ret = dict_get_int32(dict, key, &ref_count);
+ if (ret)
+ return;
- if (ret == 0) {
- }
+ snprintf(key, sizeof(key), "%s.translator", prefix);
+ ret = dict_get_str(dict, key, &translator);
+ if (ret)
+ return;
- if (interval == -1)
- cli_out ("Cumulative Stats:");
- else
- cli_out ("Interval %d Stats:", interval);
- snprintf (output, sizeof (output), "%14s", "Block Size:");
- snprintf (read_blocks, sizeof (read_blocks), "%14s", "No. of Reads:");
- snprintf (write_blocks, sizeof (write_blocks), "%14s", "No. of Writes:");
- index = 14;
- for (i = 0; i < 32; i++) {
- if ((rb_counts[i] == 0) && (wb_counts[i] == 0))
- continue;
- per_line++;
- snprintf (output+index, sizeof (output)-index, "%19db+ ", (1<<i));
- if (rb_counts[i]) {
- snprintf (read_blocks+index, sizeof (read_blocks)-index,
- "%21"PRId64" ", rb_counts[i]);
- } else {
- snprintf (read_blocks+index, sizeof (read_blocks)-index,
- "%21s ", "0");
- }
- if (wb_counts[i]) {
- snprintf (write_blocks+index, sizeof (write_blocks)-index,
- "%21"PRId64" ", wb_counts[i]);
- } else {
- snprintf (write_blocks+index, sizeof (write_blocks)-index,
- "%21s ", "0");
- }
- index += 22;
- if (per_line == 3) {
- cli_out ("%s", output);
- cli_out ("%s", read_blocks);
- cli_out ("%s", write_blocks);
- cli_out (" ");
- per_line = 0;
- memset (output, 0, sizeof (output));
- memset (read_blocks, 0, sizeof (read_blocks));
- memset (write_blocks, 0, sizeof (write_blocks));
- snprintf (output, sizeof (output), "%14s", "Block Size:");
- snprintf (read_blocks, sizeof (read_blocks), "%14s",
- "No. of Reads:");
- snprintf (write_blocks, sizeof (write_blocks), "%14s",
- "No. of Writes:");
- index = 14;
- }
- }
+ snprintf(key, sizeof(key), "%s.complete", prefix);
+ ret = dict_get_int32(dict, key, &complete);
+ if (ret)
+ return;
- if (per_line != 0) {
- cli_out ("%s", output);
- cli_out ("%s", read_blocks);
- cli_out ("%s", write_blocks);
- }
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- if (profile_info[i].fop_hits == 0)
- continue;
- if (is_header_printed == 0) {
- cli_out ("%10s %13s %13s %13s %14s %11s", "%-latency",
- "Avg-latency", "Min-Latency", "Max-Latency",
- "No. of calls", "Fop");
- cli_out ("%10s %13s %13s %13s %14s %11s", "---------",
- "-----------", "-----------", "-----------",
- "------------", "----");
- is_header_printed = 1;
- }
- if (profile_info[i].fop_hits) {
- cli_out ("%10.2lf %10.2lf us %10.2lf us %10.2lf us"
- " %14"PRId64" %11s",
- profile_info[i].percentage_avg_latency,
- profile_info[i].avg_latency,
- profile_info[i].min_latency,
- profile_info[i].max_latency,
- profile_info[i].fop_hits,
- profile_info[i].fop_name);
- }
- }
- cli_out (" ");
- cli_out ("%12s: %"PRId64" seconds", "Duration", sec);
- cli_out ("%12s: %"PRId64" bytes", "Data Read", r_count);
- cli_out ("%12s: %"PRId64" bytes", "Data Written", w_count);
- cli_out (" ");
+ cli_out(" Ref Count = %d", ref_count);
+ cli_out(" Translator = %s", translator);
+ cli_out(" Completed = %s", (complete ? "Yes" : "No"));
+
+ snprintf(key, sizeof(key), "%s.parent", prefix);
+ ret = dict_get_str(dict, key, &parent);
+ if (!ret)
+ cli_out(" Parent = %s", parent);
+
+ snprintf(key, sizeof(key), "%s.windfrom", prefix);
+ ret = dict_get_str(dict, key, &wind_from);
+ if (!ret)
+ cli_out(" Wind From = %s", wind_from);
+
+ snprintf(key, sizeof(key), "%s.windto", prefix);
+ ret = dict_get_str(dict, key, &wind_to);
+ if (!ret)
+ cli_out(" Wind To = %s", wind_to);
+
+ snprintf(key, sizeof(key), "%s.unwindfrom", prefix);
+ ret = dict_get_str(dict, key, &unwind_from);
+ if (!ret)
+ cli_out(" Unwind From = %s", unwind_from);
+
+ snprintf(key, sizeof(key), "%s.unwindto", prefix);
+ ret = dict_get_str(dict, key, &unwind_to);
+ if (!ret)
+ cli_out(" Unwind To = %s", unwind_to);
}
-int32_t
-gf_cli3_1_profile_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- dict_t *dict = NULL;
- gf1_cli_stats_op op = GF_CLI_STATS_NONE;
- char key[256] = {0};
- int interval = 0;
- int i = 1;
- int32_t brick_count = 0;
- char *volname = NULL;
- char *brick = NULL;
- char str[1024] = {0,};
-
- if (-1 == req->rpc_status) {
- goto out;
- }
+static void
+cli_print_volume_status_call_stack(dict_t *dict, char *prefix)
+{
+ int ret = -1;
+ char key[256] = {
+ 0,
+ };
+ int uid = 0;
+ int gid = 0;
+ int pid = 0;
+ uint64_t unique = 0;
+ // char *op = NULL;
+ int count = 0;
+ int i = 0;
+
+ if (!prefix)
+ return;
- gf_log ("cli", GF_LOG_DEBUG, "Received resp to profile");
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
- }
+ snprintf(key, sizeof(key), "%s.uid", prefix);
+ ret = dict_get_int32(dict, key, &uid);
+ if (ret)
+ return;
- dict = dict_new ();
+ snprintf(key, sizeof(key), "%s.gid", prefix);
+ ret = dict_get_int32(dict, key, &gid);
+ if (ret)
+ return;
- if (!dict) {
- ret = -1;
- goto out;
- }
+ snprintf(key, sizeof(key), "%s.pid", prefix);
+ ret = dict_get_int32(dict, key, &pid);
+ if (ret)
+ return;
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
+ snprintf(key, sizeof(key), "%s.unique", prefix);
+ ret = dict_get_uint64(dict, key, &unique);
+ if (ret)
+ return;
- if (ret) {
- gf_log ("", GF_LOG_ERROR,
- "Unable to allocate memory");
- goto out;
- } else {
- dict->extra_stdfree = rsp.dict.dict_val;
- }
+ /*
+ snprintf (key, sizeof (key), "%s.op", prefix);
+ ret = dict_get_str (dict, key, &op);
+ if (ret)
+ return;
+ */
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_vol_profile (dict, rsp.op_ret,
- rsp.op_errno,
- rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
+ snprintf(key, sizeof(key), "%s.count", prefix);
+ ret = dict_get_int32(dict, key, &count);
+ if (ret)
+ return;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto out;
+ cli_out(" UID : %d", uid);
+ cli_out(" GID : %d", gid);
+ cli_out(" PID : %d", pid);
+ cli_out(" Unique : %" PRIu64, unique);
+ // cli_out ("\tOp : %s", op);
+ cli_out(" Frames : %d", count);
- ret = dict_get_int32 (dict, "op", (int32_t*)&op);
- if (ret)
- goto out;
+ for (i = 0; i < count; i++) {
+ cli_out(" Frame %d", i + 1);
- if (rsp.op_ret && strcmp (rsp.op_errstr, "")) {
- cli_err ("%s", rsp.op_errstr);
- } else {
- switch (op) {
- case GF_CLI_STATS_START:
- cli_out ("Starting volume profile on %s has been %s ",
- volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
- break;
- case GF_CLI_STATS_STOP:
- cli_out ("Stopping volume profile on %s has been %s ",
- volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
- break;
- case GF_CLI_STATS_INFO:
- break;
- default:
- cli_out ("volume profile on %s has been %s ",
- volname,
- (rsp.op_ret) ? "unsuccessful": "successful");
- break;
- }
- }
+ snprintf(key, sizeof(key), "%s.frame%d", prefix, i);
+ cli_print_volume_status_call_frame(dict, key);
+ }
- if (rsp.op_ret) {
- ret = rsp.op_ret;
- goto out;
- }
+ cli_out(" ");
+}
- if (op != GF_CLI_STATS_INFO) {
- ret = 0;
- goto out;
+static void
+cli_print_volume_status_callpool(dict_t *dict, gf_boolean_t notbrick)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int online = -1;
+ int call_count = 0;
+ char key[64] = {
+ 0,
+ };
+ int i = 0;
+ int j = 0;
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ cli_out("Pending calls for volume %s", volname);
+
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ for (i = 0; i <= index_max; i++) {
+ cli_out("----------------------------------------------");
+
+ snprintf(key, sizeof(key), "brick%d.hostname", i);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof(key), "brick%d.path", i);
+ ret = dict_get_str(dict, key, &path);
+ if (ret)
+ goto out;
+
+ if (notbrick)
+ cli_out("%s : %s", hostname, path);
+ else
+ cli_out("Brick : %s:%s", hostname, path);
+
+ snprintf(key, sizeof(key), "brick%d.status", i);
+ ret = dict_get_int32(dict, key, &online);
+ if (ret)
+ goto out;
+ if (!online) {
+ if (notbrick)
+ cli_out("%s is offline", hostname);
+ else
+ cli_out("Brick is offline");
+ continue;
}
- ret = dict_get_int32 (dict, "count", &brick_count);
+ snprintf(key, sizeof(key), "brick%d.callpool.count", i);
+ ret = dict_get_int32(dict, key, &call_count);
if (ret)
- goto out;
+ goto out;
+ cli_out("Pending calls: %d", call_count);
- if (!brick_count) {
- cli_out ("All bricks of volume %s are down.", volname);
- goto out;
- }
+ if (0 == call_count)
+ continue;
- while (i <= brick_count) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-brick", i);
- ret = dict_get_str (dict, key, &brick);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Couldn't get brick name");
- goto out;
- }
+ for (j = 0; j < call_count; j++) {
+ cli_out("Call Stack%d", j + 1);
- ret = dict_get_str_boolean (dict, "nfs", _gf_false);
- if (ret)
- snprintf (str, sizeof (str), "NFS Server : %s", brick);
- else
- snprintf (str, sizeof (str), "Brick: %s", brick);
- cli_out ("%s", str);
- memset (str, '-', strlen (str));
- cli_out ("%s", str);
-
- snprintf (key, sizeof (key), "%d-cumulative", i);
- ret = dict_get_int32 (dict, key, &interval);
- if (ret == 0) {
- cmd_profile_volume_brick_out (dict, i, interval);
- }
- snprintf (key, sizeof (key), "%d-interval", i);
- ret = dict_get_int32 (dict, key, &interval);
- if (ret == 0) {
- cmd_profile_volume_brick_out (dict, i, interval);
- }
- i++;
+ snprintf(key, sizeof(key), "brick%d.callpool.stack%d", i, j);
+ cli_print_volume_status_call_stack(dict, key);
}
- ret = rsp.op_ret;
+ }
out:
- if (dict)
- dict_unref (dict);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
- cli_cmd_broadcast_response (ret);
- return ret;
+ cli_out("----------------------------------------------");
+ return;
}
-int32_t
-gf_cli3_1_profile_volume (call_frame_t *frame, xlator_t *this, void *data)
+static void
+cli_print_volume_status_tasks(dict_t *dict)
{
- int ret = -1;
- gf_cli_req req = {{0,}};
- dict_t *dict = NULL;
+ int ret = -1;
+ int i = 0;
+ int j = 0;
+ int count = 0;
+ int task_count = 0;
+ int status = 0;
+ char *op = NULL;
+ char *task_id_str = NULL;
+ char *volname = NULL;
+ char key[64] = {
+ 0,
+ };
+ char task[32] = {
+ 0,
+ };
+ char *brick = NULL;
+
+ ret = dict_get_int32_sizen(dict, "tasks", &task_count);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get tasks count");
+ return;
+ }
- GF_ASSERT (frame);
- GF_ASSERT (this);
- GF_ASSERT (data);
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
- if (!frame || !this || !data)
- goto out;
- dict = data;
+ cli_out("Task Status of Volume %s", volname);
+ cli_print_line(CLI_BRICK_STATUS_LINE_LEN);
+
+ if (task_count == 0) {
+ cli_out("There are no active volume tasks");
+ cli_out(" ");
+ return;
+ }
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
+ for (i = 0; i < task_count; i++) {
+ snprintf(key, sizeof(key), "task%d.type", i);
+ ret = dict_get_str(dict, key, &op);
+ if (ret)
+ return;
+ cli_out("%-20s : %-20s", "Task", op);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+ snprintf(key, sizeof(key), "task%d.id", i);
+ ret = dict_get_str(dict, key, &task_id_str);
+ if (ret)
+ return;
+ cli_out("%-20s : %-20s", "ID", task_id_str);
+ snprintf(key, sizeof(key), "task%d.status", i);
+ ret = dict_get_int32(dict, key, &status);
+ if (ret)
+ return;
+
+ snprintf(task, sizeof(task), "task%d", i);
+
+ if (!strcmp(op, "Remove brick")) {
+ snprintf(key, sizeof(key), "%s.count", task);
+ ret = dict_get_int32(dict, key, &count);
+ if (ret)
goto out;
- }
+ cli_out("%-20s", "Removed bricks:");
+
+ for (j = 1; j <= count; j++) {
+ snprintf(key, sizeof(key), "%s.brick%d", task, j);
+ ret = dict_get_str(dict, key, &brick);
+ if (ret)
+ goto out;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_PROFILE_VOLUME, NULL,
- this, gf_cli3_1_profile_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ cli_out("%-20s", brick);
+ }
+ }
+ cli_out("%-20s : %-20s", "Status", cli_vol_task_status_str[status]);
+ cli_out(" ");
+ }
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
-
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
- return ret;
+ return;
}
-int32_t
-gf_cli3_1_top_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- dict_t *dict = NULL;
- gf1_cli_stats_op op = GF_CLI_STATS_NONE;
- char key[256] = {0};
- int i = 0;
- int32_t brick_count = 0;
- char brick[1024];
- int32_t members = 0;
- char *filename;
- char *bricks;
- uint64_t value = 0;
- int32_t j = 0;
- gf1_cli_top_op top_op = GF_CLI_TOP_NONE;
- uint64_t nr_open = 0;
- uint64_t max_nr_open = 0;
- double throughput = 0;
- double time = 0;
- int32_t time_sec = 0;
- long int time_usec = 0;
- char timestr[256] = {0, };
- char *openfd_str = NULL;
- gf_boolean_t nfs = _gf_false;
- gf_boolean_t clear_stats = _gf_false;
- int stats_cleared = 0;
-
- if (-1 == req->rpc_status) {
- goto out;
+static int
+gf_cli_status_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int ret = -1;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ int i = 0;
+ int type = -1;
+ int hot_brick_count = -1;
+ int pid = -1;
+ uint32_t cmd = 0;
+ gf_boolean_t notbrick = _gf_false;
+ char key[64] = {
+ 0,
+ };
+ char *hostname = NULL;
+ char *path = NULL;
+ char *volname = NULL;
+ dict_t *dict = NULL;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ cli_volume_status_t status = {0};
+ cli_local_t *local = NULL;
+ gf_boolean_t wipe_local = _gf_false;
+ char msg[1024] = {
+ 0,
+ };
+
+ GF_ASSERT(myframe);
+
+ if (req->rpc_status == -1)
+ goto out;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_DEBUG, "Received response to status cmd");
+
+ local = ((call_frame_t *)myframe)->local;
+ if (!local) {
+ local = cli_local_get();
+ if (!local) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Failed to get local");
+ goto out;
}
+ wipe_local = _gf_true;
+ }
- gf_log ("cli", GF_LOG_DEBUG, "Received resp to top");
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "Unable to decode response");
- goto out;
- }
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg),
+ "Unable to obtain volume status information.");
- if (rsp.op_ret) {
- if (strcmp (rsp.op_errstr, ""))
- cli_err ("%s", rsp.op_errstr);
- cli_err ("volume top unsuccessful");
- ret = rsp.op_ret;
- goto out;
- }
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (!local->all)
+ cli_xml_output_str("volStatus", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ ret = 0;
+ goto out;
+ }
+
+ cli_err("%s", msg);
+ if (local && local->all) {
+ ret = 0;
+ cli_out(" ");
+ } else
+ ret = -1;
+
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to create the dict");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret)
+ goto out;
+
+ ret = dict_get_uint32(dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+ if ((cmd & GF_CLI_STATUS_ALL)) {
+ if (local && local->dict) {
+ dict_ref(dict);
+ ret = dict_set_static_ptr(local->dict, "rsp-dict", dict);
+ ret = 0;
+ } else {
+ gf_log("cli", GF_LOG_ERROR, "local not found");
+ ret = -1;
+ }
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ if (!local->all) {
+ ret = cli_xml_output_vol_status_begin(local, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto xml_end;
+ }
+ }
+ if (cmd & GF_CLI_STATUS_TASKS) {
+ ret = cli_xml_output_vol_status_tasks_detail(local, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Error outputting to xml");
+ goto xml_end;
+ }
+ } else {
+ ret = cli_xml_output_vol_status(local, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto xml_end;
+ }
+ }
+
+ xml_end:
+ if (!local->all) {
+ ret = cli_xml_output_vol_status_end(local);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ }
+ }
+ goto out;
+ }
+
+ if ((cmd & GF_CLI_STATUS_NFS) || (cmd & GF_CLI_STATUS_SHD) ||
+ (cmd & GF_CLI_STATUS_QUOTAD) || (cmd & GF_CLI_STATUS_SNAPD) ||
+ (cmd & GF_CLI_STATUS_BITD) || (cmd & GF_CLI_STATUS_SCRUB))
+ notbrick = _gf_true;
+
+ switch (cmd & GF_CLI_STATUS_MASK) {
+ case GF_CLI_STATUS_MEM:
+ cli_print_volume_status_mem(dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_CLIENTS:
+ cli_print_volume_status_clients(dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_CLIENT_LIST:
+ cli_print_volume_status_client_list(dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_INODE:
+ cli_print_volume_status_inode(dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_FD:
+ cli_print_volume_status_fd(dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_CALLPOOL:
+ cli_print_volume_status_callpool(dict, notbrick);
+ goto cont;
+ break;
+ case GF_CLI_STATUS_TASKS:
+ cli_print_volume_status_tasks(dict);
+ goto cont;
+ break;
+ default:
+ break;
+ }
- dict = dict_new ();
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ goto out;
- if (!dict) {
- ret = -1;
- goto out;
- }
+ ret = dict_get_int32_sizen(dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
+ ret = dict_get_int32_sizen(dict, "other-count", &other_count);
+ if (ret)
+ goto out;
- if (ret) {
- gf_log ("", GF_LOG_ERROR,
- "Unable to allocate memory");
- goto out;
- }
+ index_max = brick_index_max + other_count;
- ret = dict_get_int32 (dict, "op", (int32_t*)&op);
+ ret = dict_get_int32_sizen(dict, "type", &type);
+ if (ret)
+ goto out;
- if (op != GF_CLI_STATS_TOP) {
- ret = 0;
- goto out;
- }
+ ret = dict_get_int32_sizen(dict, "hot_brick_count", &hot_brick_count);
+ if (ret)
+ goto out;
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_vol_top (dict, rsp.op_ret,
- rsp.op_errno,
- rsp.op_errstr);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- }
- goto out;
- }
-#endif
+ cli_out("Status of volume: %s", volname);
- ret = dict_get_int32 (dict, "count", &brick_count);
+ if ((cmd & GF_CLI_STATUS_DETAIL) == 0) {
+ cli_out("%-*s %s %s %s %s", CLI_VOL_STATUS_BRICK_LEN,
+ "Gluster process", "TCP Port", "RDMA Port", "Online", "Pid");
+ cli_print_line(CLI_BRICK_STATUS_LINE_LEN);
+ }
+
+ status.brick = GF_MALLOC(PATH_MAX + 256, gf_common_mt_strdup);
+ if (!status.brick) {
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i <= index_max; i++) {
+ status.rdma_port = 0;
+
+ snprintf(key, sizeof(key), "brick%d.hostname", i);
+ ret = dict_get_str(dict, key, &hostname);
if (ret)
- goto out;
- snprintf (key, sizeof (key), "%d-top-op", 1);
- ret = dict_get_int32 (dict, key, (int32_t*)&top_op);
+ continue;
+
+ snprintf(key, sizeof(key), "brick%d.path", i);
+ ret = dict_get_str(dict, key, &path);
if (ret)
- goto out;
+ continue;
- clear_stats = dict_get_str_boolean (dict, "clear-stats", _gf_false);
+ /* Brick/not-brick is handled separately here as all
+ * types of nodes are contained in the default output
+ */
+ status.brick[0] = '\0';
+ if (!strcmp(hostname, "NFS Server") ||
+ !strcmp(hostname, "Self-heal Daemon") ||
+ !strcmp(hostname, "Quota Daemon") ||
+ !strcmp(hostname, "Snapshot Daemon") ||
+ !strcmp(hostname, "Scrubber Daemon") ||
+ !strcmp(hostname, "Bitrot Daemon"))
+ snprintf(status.brick, PATH_MAX + 255, "%s on %s", hostname, path);
+ else {
+ snprintf(key, sizeof(key), "brick%d.rdma_port", i);
+ ret = dict_get_int32(dict, key, &(status.rdma_port));
+ if (ret)
+ continue;
+ snprintf(status.brick, PATH_MAX + 255, "Brick %s:%s", hostname,
+ path);
+ }
- while (i < brick_count) {
- i++;
- snprintf (brick, sizeof (brick), "%d-brick", i);
- ret = dict_get_str (dict, brick, &bricks);
- if (ret)
- goto out;
-
- nfs = dict_get_str_boolean (dict, "nfs", _gf_false);
-
- if (clear_stats) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-stats-cleared", i);
- ret = dict_get_int32 (dict, key, &stats_cleared);
- if (ret)
- goto out;
- cli_out (stats_cleared ? "Cleared stats for %s %s" :
- "Failed to clear stats for %s %s",
- nfs ? "NFS server on" : "brick", bricks);
- continue;
- }
+ snprintf(key, sizeof(key), "brick%d.port", i);
+ ret = dict_get_int32(dict, key, &(status.port));
+ if (ret)
+ continue;
- if (nfs)
- cli_out ("NFS Server : %s", bricks);
- else
- cli_out ("Brick: %s", bricks);
-
- snprintf(key, sizeof (key), "%d-members", i);
- ret = dict_get_int32 (dict, key, &members);
-
- switch (top_op) {
- case GF_CLI_TOP_OPEN:
- snprintf (key, sizeof (key), "%d-current-open", i);
- ret = dict_get_uint64 (dict, key, &nr_open);
- if (ret)
- break;
- snprintf (key, sizeof (key), "%d-max-open", i);
- ret = dict_get_uint64 (dict, key, &max_nr_open);
- if (ret)
- goto out;
- snprintf (key, sizeof (key), "%d-max-openfd-time", i);
- ret = dict_get_str (dict, key, &openfd_str);
- if (ret)
- goto out;
- cli_out ("Current open fds: %"PRIu64", Max open"
- " fds: %"PRIu64", Max openfd time: %s", nr_open,
- max_nr_open, openfd_str);
- case GF_CLI_TOP_READ:
- case GF_CLI_TOP_WRITE:
- case GF_CLI_TOP_OPENDIR:
- case GF_CLI_TOP_READDIR:
- if (!members) {
- continue;
- }
- cli_out ("Count\t\tfilename\n=======================");
- break;
- case GF_CLI_TOP_READ_PERF:
- case GF_CLI_TOP_WRITE_PERF:
- snprintf (key, sizeof (key), "%d-throughput", i);
- ret = dict_get_double (dict, key, &throughput);
- if (!ret) {
- snprintf (key, sizeof (key), "%d-time", i);
- ret = dict_get_double (dict, key, &time);
- }
- if (!ret)
- cli_out ("Throughput %.2f MBps time %.4f secs", throughput,
- time / 1e6);
-
- if (!members) {
- continue;
- }
- cli_out ("%*s %-*s %-*s",
- VOL_TOP_PERF_SPEED_WIDTH, "MBps",
- VOL_TOP_PERF_FILENAME_DEF_WIDTH, "Filename",
- VOL_TOP_PERF_TIME_WIDTH, "Time");
- cli_out ("%*s %-*s %-*s",
- VOL_TOP_PERF_SPEED_WIDTH, "====",
- VOL_TOP_PERF_FILENAME_DEF_WIDTH, "========",
- VOL_TOP_PERF_TIME_WIDTH, "====");
- break;
- default:
- goto out;
- }
+ snprintf(key, sizeof(key), "brick%d.status", i);
+ ret = dict_get_int32(dict, key, &(status.online));
+ if (ret)
+ continue;
- for (j = 1; j <= members; j++) {
- snprintf (key, sizeof (key), "%d-filename-%d", i, j);
- ret = dict_get_str (dict, key, &filename);
- if (ret)
- break;
- snprintf (key, sizeof (key), "%d-value-%d", i, j);
- ret = dict_get_uint64 (dict, key, &value);
- if (ret)
- goto out;
- if ( top_op == GF_CLI_TOP_READ_PERF ||
- top_op == GF_CLI_TOP_WRITE_PERF) {
- snprintf (key, sizeof (key), "%d-time-sec-%d", i, j);
- ret = dict_get_int32 (dict, key, (int32_t *)&time_sec);
- if (ret)
- goto out;
- snprintf (key, sizeof (key), "%d-time-usec-%d", i, j);
- ret = dict_get_int32 (dict, key, (int32_t *)&time_usec);
- if (ret)
- goto out;
- gf_time_fmt (timestr, sizeof timestr,
- time_sec, gf_timefmt_FT);
- snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, time_usec);
- if (strlen (filename) < VOL_TOP_PERF_FILENAME_DEF_WIDTH)
- cli_out ("%*"PRIu64" %-*s %-*s",
- VOL_TOP_PERF_SPEED_WIDTH,
- value,
- VOL_TOP_PERF_FILENAME_DEF_WIDTH,
- filename,
- VOL_TOP_PERF_TIME_WIDTH,
- timestr);
- else
- cli_out ("%*"PRIu64" ...%-*s %-*s",
- VOL_TOP_PERF_SPEED_WIDTH,
- value,
- VOL_TOP_PERF_FILENAME_ALT_WIDTH ,
- filename + strlen (filename) -
- VOL_TOP_PERF_FILENAME_ALT_WIDTH,
- VOL_TOP_PERF_TIME_WIDTH,
- timestr);
- } else {
- cli_out ("%"PRIu64"\t\t%s", value, filename);
- }
- }
+ snprintf(key, sizeof(key), "brick%d.pid", i);
+ ret = dict_get_int32(dict, key, &pid);
+ if (ret)
+ continue;
+ if (pid == -1)
+ ret = gf_asprintf(&(status.pid_str), "%s", "N/A");
+ else
+ ret = gf_asprintf(&(status.pid_str), "%d", pid);
+
+ if (ret == -1)
+ goto out;
+
+ if ((cmd & GF_CLI_STATUS_DETAIL)) {
+ ret = cli_get_detail_status(dict, i, &status);
+ if (ret)
+ goto out;
+ cli_print_line(CLI_BRICK_STATUS_LINE_LEN);
+ cli_print_detailed_status(&status);
+ } else {
+ cli_print_brick_status(&status);
}
- ret = rsp.op_ret;
-out:
- cli_cmd_broadcast_response (ret);
+ /* Allocatated memory using gf_asprintf*/
+ GF_FREE(status.pid_str);
+ }
+ cli_out(" ");
- if (dict)
- dict_unref (dict);
+ if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE)
+ cli_print_volume_status_tasks(dict);
+cont:
+ ret = rsp.op_ret;
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val);
- return ret;
+out:
+ if (dict)
+ dict_unref(dict);
+ GF_FREE(status.brick);
+ if (local && wipe_local) {
+ cli_local_wipe(local);
+ }
+
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
}
-int32_t
-gf_cli3_1_top_volume (call_frame_t *frame, xlator_t *this, void *data)
+static int32_t
+gf_cli_status_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- int ret = -1;
- gf_cli_req req = {{0,}};
- dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ dict = data;
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_status_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_STATUS_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
- GF_ASSERT (frame);
- GF_ASSERT (this);
- GF_ASSERT (data);
+static int
+gf_cli_status_volume_all(call_frame_t *frame, xlator_t *this, void *data)
+{
+ int i = 0;
+ int ret = -1;
+ int vol_count = -1;
+ uint32_t cmd = 0;
+ char key[1024] = {0};
+ char *volname = NULL;
+ void *vol_dict = NULL;
+ dict_t *dict = NULL;
+ cli_local_t *local = NULL;
- if (!frame || !this || !data)
- goto out;
- dict = data;
+ if (!frame)
+ goto out;
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+ if (!frame->local)
+ goto out;
- goto out;
- }
+ local = frame->local;
+ ret = dict_get_uint32(local->dict, "cmd", &cmd);
+ if (ret)
+ goto out;
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_PROFILE_VOLUME, NULL,
- this, gf_cli3_1_top_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ local->all = _gf_true;
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
- return ret;
-}
+ ret = gf_cli_status_volume(frame, this, data);
+ if (ret)
+ goto out;
-int
-gf_cli3_1_getwd_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf1_cli_getwd_rsp rsp = {0,};
- int ret = -1;
+ ret = dict_get_ptr(local->dict, "rsp-dict", &vol_dict);
+ if (ret)
+ goto out;
- if (-1 == req->rpc_status) {
- goto out;
- }
+ ret = dict_get_int32_sizen((dict_t *)vol_dict, "vol_count", &vol_count);
+ if (ret) {
+ cli_err("Failed to get names of volumes");
+ goto out;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_getwd_rsp);
- if (ret < 0 || rsp.op_ret == -1) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
+ /* remove the "all" flag in cmd */
+ cmd &= ~GF_CLI_STATUS_ALL;
+ cmd |= GF_CLI_STATUS_VOL;
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ // TODO: Pass proper op_* values
+ ret = cli_xml_output_vol_status_begin(local, 0, 0, NULL);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto xml_end;
}
+ }
- gf_log ("cli", GF_LOG_INFO, "Received resp to getwd");
+ if (vol_count == 0 && !(global_state->mode & GLUSTER_MODE_XML)) {
+ cli_err("No volumes present");
+ ret = 0;
+ goto out;
+ }
- cli_out ("%s", rsp.wd);
+ for (i = 0; i < vol_count; i++) {
+ dict = dict_new();
+ if (!dict)
+ goto out;
- ret = 0;
+ ret = snprintf(key, sizeof(key), "vol%d", i);
+ ret = dict_get_strn(vol_dict, key, ret, &volname);
+ if (ret)
+ goto out;
-out:
- cli_cmd_broadcast_response (ret);
- return ret;
-}
+ ret = dict_set_str_sizen(dict, "volname", volname);
+ if (ret)
+ goto out;
-int32_t
-gf_cli3_1_getwd (call_frame_t *frame, xlator_t *this, void *data)
-{
- int ret = -1;
- gf1_cli_getwd_req req = {0,};
+ ret = dict_set_uint32(dict, "cmd", cmd);
+ if (ret)
+ goto out;
- GF_ASSERT (frame);
- GF_ASSERT (this);
+ ret = gf_cli_status_volume(frame, this, dict);
+ if (ret)
+ goto out;
- if (!frame || !this)
- goto out;
+ dict_unref(dict);
+ }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_GETWD, NULL,
- this, gf_cli3_1_getwd_cbk,
- (xdrproc_t) xdr_gf1_cli_getwd_req);
+xml_end:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_status_end(local);
+ }
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, "status all failed");
- return ret;
-}
+ if (vol_dict)
+ dict_unref(vol_dict);
-void
-cli_print_volume_status_mempool (dict_t *dict, char *prefix)
-{
- int ret = -1;
- int32_t mempool_count = 0;
- char *name = NULL;
- int32_t hotcount = 0;
- int32_t coldcount = 0;
- uint64_t paddedsizeof = 0;
- uint64_t alloccount = 0;
- int32_t maxalloc = 0;
- uint64_t pool_misses = 0;
- int32_t maxstdalloc = 0;
- char key[1024] = {0,};
- int i = 0;
-
- GF_ASSERT (dict);
- GF_ASSERT (prefix);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.mempool-count",prefix);
- ret = dict_get_int32 (dict, key, &mempool_count);
- if (ret)
- goto out;
+ if (ret && dict)
+ dict_unref(dict);
- cli_out ("Mempool Stats\n-------------");
- cli_out ("%-30s %9s %9s %12s %10s %8s %8s %12s", "Name", "HotCount",
- "ColdCount", "PaddedSizeof", "AllocCount", "MaxAlloc",
- "Misses", "Max-StdAlloc");
- cli_out ("%-30s %9s %9s %12s %10s %8s %8s %12s", "----", "--------",
- "---------", "------------", "----------",
- "--------", "--------", "------------");
-
- for (i = 0; i < mempool_count; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.name", prefix, i);
- ret = dict_get_str (dict, key, &name);
- if (ret)
- goto out;
+ if (local)
+ cli_local_wipe(local);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.hotcount", prefix, i);
- ret = dict_get_int32 (dict, key, &hotcount);
- if (ret)
- goto out;
+ if (frame)
+ frame->local = NULL;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.coldcount", prefix, i);
- ret = dict_get_int32 (dict, key, &coldcount);
- if (ret)
- goto out;
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.paddedsizeof",
- prefix, i);
- ret = dict_get_uint64 (dict, key, &paddedsizeof);
- if (ret)
- goto out;
+static int
+gf_cli_mount_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf1_cli_mount_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.alloccount", prefix, i);
- ret = dict_get_uint64 (dict, key, &alloccount);
- if (ret)
- goto out;
+ GF_ASSERT(myframe);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.max_alloc", prefix, i);
- ret = dict_get_int32 (dict, key, &maxalloc);
- if (ret)
- goto out;
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.max-stdalloc", prefix, i);
- ret = dict_get_int32 (dict, key, &maxstdalloc);
- if (ret)
- goto out;
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf1_cli_mount_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.pool-misses", prefix, i);
- ret = dict_get_uint64 (dict, key, &pool_misses);
- if (ret)
- goto out;
+ gf_log("cli", GF_LOG_INFO, "Received resp to mount");
- cli_out ("%-30s %9d %9d %12"PRIu64" %10"PRIu64" %8d %8"PRIu64
- " %12d", name, hotcount, coldcount, paddedsizeof,
- alloccount, maxalloc, pool_misses, maxstdalloc);
- }
+ if (rsp.op_ret == 0) {
+ ret = 0;
+ cli_out("%s", rsp.path);
+ } else {
+ /* weird sounding but easy to parse... */
+ cli_err("%d : failed with this errno (%s)", rsp.op_errno,
+ strerror(rsp.op_errno));
+ ret = -1;
+ }
out:
- return;
-
+ cli_cmd_broadcast_response(ret);
+ if (rsp.path) {
+ free(rsp.path);
+ }
+ return ret;
}
-void
-cli_print_volume_status_mem (dict_t *dict, gf_boolean_t notbrick)
+static int32_t
+gf_cli_mount(call_frame_t *frame, xlator_t *this, void *data)
{
- int ret = -1;
- char *volname = NULL;
- char *hostname = NULL;
- char *path = NULL;
- int online = -1;
- char key[1024] = {0,};
- int brick_index_max = -1;
- int other_count = 0;
- int index_max = 0;
- int val = 0;
- int i = 0;
+ gf1_cli_mount_req req = {
+ 0,
+ };
+ int ret = -1;
+ void **dataa = data;
+ char *label = NULL;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data)
+ goto out;
+
+ label = dataa[0];
+ dict = dataa[1];
+
+ req.label = label;
+ ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = cli_cmd_submit(NULL, &req, frame, cli_rpc_prog, GLUSTER_CLI_MOUNT,
+ NULL, this, gf_cli_mount_cbk,
+ (xdrproc_t)xdr_gf1_cli_mount_req);
- GF_ASSERT (dict);
+out:
+ GF_FREE(req.dict.dict_val);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
+}
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto out;
- cli_out ("Memory status for volume : %s", volname);
+static int
+gf_cli_umount_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf1_cli_umount_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
- ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
- if (ret)
- goto out;
- ret = dict_get_int32 (dict, "other-count", &other_count);
- if (ret)
- goto out;
+ GF_ASSERT(myframe);
- index_max = brick_index_max + other_count;
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
- for (i = 0; i <= index_max; i++) {
- cli_out ("----------------------------------------------");
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf1_cli_umount_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- continue;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.path", i);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- continue;
- if (notbrick)
- cli_out ("%s : %s", hostname, path);
- else
- cli_out ("Brick : %s:%s", hostname, path);
+ gf_log("cli", GF_LOG_INFO, "Received resp to mount");
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.status", i);
- ret = dict_get_int32 (dict, key, &online);
- if (ret)
- goto out;
- if (!online) {
- if (notbrick)
- cli_out ("%s is offline", hostname);
- else
- cli_out ("Brick is offline");
- continue;
- }
+ if (rsp.op_ret == 0)
+ ret = 0;
+ else {
+ cli_err("umount failed");
+ ret = -1;
+ }
- cli_out ("Mallinfo\n--------");
+out:
+ cli_cmd_broadcast_response(ret);
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.arena", i);
- ret = dict_get_int32 (dict, key, &val);
- if (ret)
- goto out;
- cli_out ("%-8s : %d","Arena", val);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.ordblks", i);
- ret = dict_get_int32 (dict, key, &val);
- if(ret)
- goto out;
- cli_out ("%-8s : %d","Ordblks", val);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.smblks", i);
- ret = dict_get_int32 (dict, key, &val);
- if(ret)
- goto out;
- cli_out ("%-8s : %d","Smblks", val);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.hblks", i);
- ret = dict_get_int32 (dict, key, &val);
- if(ret)
- goto out;
- cli_out ("%-8s : %d", "Hblks", val);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.hblkhd", i);
- ret = dict_get_int32 (dict, key, &val);
- if (ret)
- goto out;
- cli_out ("%-8s : %d", "Hblkhd", val);
+static int32_t
+gf_cli_umount(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf1_cli_umount_req req = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.usmblks", i);
- ret = dict_get_int32 (dict, key, &val);
- if (ret)
- goto out;
- cli_out ("%-8s : %d", "Usmblks", val);
+ if (!frame || !this || !data)
+ goto out;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.fsmblks", i);
- ret = dict_get_int32 (dict, key, &val);
- if (ret)
- goto out;
- cli_out ("%-8s : %d", "Fsmblks", val);
+ dict = data;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.uordblks", i);
- ret = dict_get_int32 (dict, key, &val);
- if (ret)
- goto out;
- cli_out ("%-8s : %d", "Uordblks", val);
+ ret = dict_get_str_sizen(dict, "path", &req.path);
+ if (ret == 0)
+ ret = dict_get_int32_sizen(dict, "lazy", &req.lazy);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.fordblks", i);
- ret = dict_get_int32 (dict, key, &val);
- if (ret)
- goto out;
- cli_out ("%-8s : %d", "Fordblks", val);
+ if (ret) {
+ ret = -1;
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.keepcost", i);
- ret = dict_get_int32 (dict, key, &val);
- if (ret)
- goto out;
- cli_out ("%-8s : %d", "Keepcost", val);
+ ret = cli_cmd_submit(NULL, &req, frame, cli_rpc_prog, GLUSTER_CLI_UMOUNT,
+ NULL, this, gf_cli_umount_cbk,
+ (xdrproc_t)xdr_gf1_cli_umount_req);
- cli_out (" ");
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d", i);
- cli_print_volume_status_mempool (dict, key);
- }
out:
- cli_out ("----------------------------------------------\n");
- return;
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
}
-void
-cli_print_volume_status_clients (dict_t *dict, gf_boolean_t notbrick)
-{
- int ret = -1;
- char *volname = NULL;
- int brick_index_max = -1;
- int other_count = 0;
- int index_max = 0;
- char *hostname = NULL;
- char *path = NULL;
- int online = -1;
- int client_count = 0;
- char *clientname = NULL;
- uint64_t bytesread = 0;
- uint64_t byteswrite = 0;
- char key[1024] = {0,};
- int i = 0;
- int j = 0;
-
- GF_ASSERT (dict);
-
- ret = dict_get_str (dict, "volname", &volname);
+static void
+cmd_heal_volume_statistics_out(dict_t *dict, int brick)
+{
+ uint64_t num_entries = 0;
+ int ret = 0;
+ char key[256] = {0};
+ char *hostname = NULL;
+ uint64_t i = 0;
+ uint64_t healed_count = 0;
+ uint64_t split_brain_count = 0;
+ uint64_t heal_failed_count = 0;
+ char *start_time_str = NULL;
+ char *end_time_str = NULL;
+ char *crawl_type = NULL;
+ int progress = -1;
+
+ snprintf(key, sizeof key, "%d-hostname", brick);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret)
+ goto out;
+ cli_out("------------------------------------------------");
+ cli_out("\nCrawl statistics for brick no %d", brick);
+ cli_out("Hostname of brick %s", hostname);
+
+ snprintf(key, sizeof key, "statistics-%d-count", brick);
+ ret = dict_get_uint64(dict, key, &num_entries);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < num_entries; i++) {
+ snprintf(key, sizeof key, "statistics_crawl_type-%d-%" PRIu64, brick,
+ i);
+ ret = dict_get_str(dict, key, &crawl_type);
if (ret)
- goto out;
- cli_out ("Client connections for volume %s", volname);
+ goto out;
- ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ snprintf(key, sizeof key, "statistics_healed_cnt-%d-%" PRIu64, brick,
+ i);
+ ret = dict_get_uint64(dict, key, &healed_count);
if (ret)
- goto out;
- ret = dict_get_int32 (dict, "other-count", &other_count);
- if (ret)
- goto out;
+ goto out;
- index_max = brick_index_max + other_count;
+ snprintf(key, sizeof key, "statistics_sb_cnt-%d-%" PRIu64, brick, i);
+ ret = dict_get_uint64(dict, key, &split_brain_count);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof key, "statistics_heal_failed_cnt-%d-%" PRIu64,
+ brick, i);
+ ret = dict_get_uint64(dict, key, &heal_failed_count);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof key, "statistics_strt_time-%d-%" PRIu64, brick, i);
+ ret = dict_get_str(dict, key, &start_time_str);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof key, "statistics_end_time-%d-%" PRIu64, brick, i);
+ ret = dict_get_str(dict, key, &end_time_str);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof key, "statistics_inprogress-%d-%" PRIu64, brick,
+ i);
+ ret = dict_get_int32(dict, key, &progress);
+ if (ret)
+ goto out;
- for (i = 0; i <= index_max; i++) {
- cli_out ("----------------------------------------------");
+ cli_out("\nStarting time of crawl: %s", start_time_str);
+ if (progress == 1)
+ cli_out("Crawl is in progress");
+ else
+ cli_out("Ending time of crawl: %s", end_time_str);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- goto out;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.path", i);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- goto out;
+ cli_out("Type of crawl: %s", crawl_type);
+ cli_out("No. of entries healed: %" PRIu64, healed_count);
+ cli_out("No. of entries in split-brain: %" PRIu64, split_brain_count);
+ cli_out("No. of heal failed entries: %" PRIu64, heal_failed_count);
+ }
- if (notbrick)
- cli_out ("%s : %s", hostname, path);
- else
- cli_out ("Brick : %s:%s", hostname, path);
+out:
+ return;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.status", i);
- ret = dict_get_int32 (dict, key, &online);
- if (ret)
- goto out;
- if (!online) {
- if (notbrick)
- cli_out ("%s is offline", hostname);
- else
- cli_out ("Brick is offline");
- continue;
- }
+static void
+cmd_heal_volume_brick_out(dict_t *dict, int brick)
+{
+ uint64_t num_entries = 0;
+ int ret = 0;
+ char key[64] = {0};
+ char *hostname = NULL;
+ char *path = NULL;
+ char *status = NULL;
+ uint64_t i = 0;
+ uint32_t time = 0;
+ char timestr[GF_TIMESTR_SIZE] = {0};
+ char *shd_status = NULL;
+
+ snprintf(key, sizeof key, "%d-hostname", brick);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof key, "%d-path", brick);
+ ret = dict_get_str(dict, key, &path);
+ if (ret)
+ goto out;
+ cli_out("\nBrick %s:%s", hostname, path);
+
+ snprintf(key, sizeof key, "%d-status", brick);
+ ret = dict_get_str(dict, key, &status);
+ if (status && status[0] != '\0')
+ cli_out("Status: %s", status);
+
+ snprintf(key, sizeof key, "%d-shd-status", brick);
+ ret = dict_get_str(dict, key, &shd_status);
+
+ if (!shd_status) {
+ snprintf(key, sizeof key, "%d-count", brick);
+ ret = dict_get_uint64(dict, key, &num_entries);
+ cli_out("Number of entries: %" PRIu64, num_entries);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.clientcount", i);
- ret = dict_get_int32 (dict, key, &client_count);
- if (ret)
- goto out;
-
- cli_out ("Clients connected : %d", client_count);
- if (client_count == 0)
- continue;
-
- cli_out ("%-48s %15s %15s", "Hostname", "BytesRead",
- "BytesWritten");
- cli_out ("%-48s %15s %15s", "--------", "---------",
- "------------");
- for (j =0; j < client_count; j++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key),
- "brick%d.client%d.hostname", i, j);
- ret = dict_get_str (dict, key, &clientname);
- if (ret)
- goto out;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key),
- "brick%d.client%d.bytesread", i, j);
- ret = dict_get_uint64 (dict, key, &bytesread);
- if (ret)
- goto out;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key),
- "brick%d.client%d.byteswrite", i, j);
- ret = dict_get_uint64 (dict, key, &byteswrite);
- if (ret)
- goto out;
-
- cli_out ("%-48s %15"PRIu64" %15"PRIu64,
- clientname, bytesread, byteswrite);
+ for (i = 0; i < num_entries; i++) {
+ snprintf(key, sizeof key, "%d-%" PRIu64, brick, i);
+ ret = dict_get_str(dict, key, &path);
+ if (ret)
+ continue;
+ time = 0;
+ snprintf(key, sizeof key, "%d-%" PRIu64 "-time", brick, i);
+ ret = dict_get_uint32(dict, key, &time);
+ if (ret || !time) {
+ cli_out("%s", path);
+ } else {
+ gf_time_fmt(timestr, sizeof timestr, time, gf_timefmt_FT);
+ if (i == 0) {
+ cli_out("at path on brick");
+ cli_out("-----------------------------------");
}
+ cli_out("%s %s", timestr, path);
+ }
}
+ }
+
out:
- cli_out ("----------------------------------------------\n");
- return;
+ return;
}
-void
-cli_print_volume_status_inode_entry (dict_t *dict, char *prefix)
+static void
+cmd_heal_volume_statistics_heal_count_out(dict_t *dict, int brick)
{
- int ret = -1;
- char key[1024] = {0,};
- char *gfid = NULL;
- uint64_t nlookup = 0;
- uint32_t ref = 0;
- int ia_type = 0;
- char inode_type;
-
- GF_ASSERT (dict);
- GF_ASSERT (prefix);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.gfid", prefix);
- ret = dict_get_str (dict, key, &gfid);
- if (ret)
- goto out;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.nlookup", prefix);
- ret = dict_get_uint64 (dict, key, &nlookup);
+ uint64_t num_entries = 0;
+ int ret = 0;
+ char key[64] = {0};
+ char *hostname = NULL;
+ char *path = NULL;
+ char *status = NULL;
+ char *shd_status = NULL;
+
+ snprintf(key, sizeof key, "%d-hostname", brick);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret)
+ goto out;
+ snprintf(key, sizeof key, "%d-path", brick);
+ ret = dict_get_str(dict, key, &path);
+ if (ret)
+ goto out;
+ cli_out("\nBrick %s:%s", hostname, path);
+
+ snprintf(key, sizeof key, "%d-status", brick);
+ ret = dict_get_str(dict, key, &status);
+ if (status && strlen(status))
+ cli_out("Status: %s", status);
+
+ snprintf(key, sizeof key, "%d-shd-status", brick);
+ ret = dict_get_str(dict, key, &shd_status);
+
+ if (!shd_status) {
+ snprintf(key, sizeof key, "%d-hardlinks", brick);
+ ret = dict_get_uint64(dict, key, &num_entries);
if (ret)
- goto out;
+ cli_out("No gathered input for this brick");
+ else
+ cli_out("Number of entries: %" PRIu64, num_entries);
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.ref", prefix);
- ret = dict_get_uint32 (dict, key, &ref);
- if (ret)
- goto out;
+out:
+ return;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.ia_type", prefix);
- ret = dict_get_int32 (dict, key, &ia_type);
- if (ret)
- goto out;
+static int
+gf_is_cli_heal_get_command(gf_xl_afr_op_t heal_op)
+{
+ /* If the command is get command value is 1 otherwise 0, for
+ invalid commands -1 */
+ static int get_cmds[GF_SHD_OP_HEAL_DISABLE + 1] = {
+ [GF_SHD_OP_INVALID] = -1,
+ [GF_SHD_OP_HEAL_INDEX] = 0,
+ [GF_SHD_OP_HEAL_FULL] = 0,
+ [GF_SHD_OP_INDEX_SUMMARY] = 1,
+ [GF_SHD_OP_SPLIT_BRAIN_FILES] = 1,
+ [GF_SHD_OP_STATISTICS] = 1,
+ [GF_SHD_OP_STATISTICS_HEAL_COUNT] = 1,
+ [GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA] = 1,
+ [GF_SHD_OP_HEAL_ENABLE] = 0,
+ [GF_SHD_OP_HEAL_DISABLE] = 0,
+ };
+
+ if (heal_op > GF_SHD_OP_INVALID && heal_op <= GF_SHD_OP_HEAL_DISABLE)
+ return get_cmds[heal_op] == 1;
+ return _gf_false;
+}
- switch (ia_type) {
- case IA_IFREG:
- inode_type = 'R';
- break;
- case IA_IFDIR:
- inode_type = 'D';
- break;
- case IA_IFLNK:
- inode_type = 'L';
- break;
- case IA_IFBLK:
- inode_type = 'B';
- break;
- case IA_IFCHR:
- inode_type = 'C';
- break;
- case IA_IFIFO:
- inode_type = 'F';
- break;
- case IA_IFSOCK:
- inode_type = 'S';
- break;
- default:
- inode_type = 'I';
- break;
+int
+gf_cli_heal_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ cli_local_t *local = NULL;
+ char *volname = NULL;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ int brick_count = 0;
+ int i = 0;
+ gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID;
+ const char *operation = NULL;
+ const char *substr = NULL;
+ const char *heal_op_str = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ GF_ASSERT(frame->local);
+
+ local = frame->local;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(local->dict, "heal-op", (int32_t *)&heal_op);
+ // TODO: Proper XML output
+ //#if (HAVE_LIB_XML)
+ // if (global_state->mode & GLUSTER_MODE_XML) {
+ // ret = cli_xml_output_dict ("volHeal", dict, rsp.op_ret,
+ // rsp.op_errno, rsp.op_errstr);
+ // if (ret)
+ // gf_log ("cli", GF_LOG_ERROR, XML_ERROR);
+ // goto out;
+ // }
+ //#endif
+
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "failed to get volname");
+ goto out;
+ }
+
+ gf_log("cli", GF_LOG_INFO, "Received resp to heal volume");
+
+ operation = "Gathering ";
+ substr = "";
+ switch (heal_op) {
+ case GF_SHD_OP_HEAL_INDEX:
+ operation = "Launching heal operation ";
+ heal_op_str = "to perform index self heal";
+ substr = "\nUse heal info commands to check status.";
+ break;
+ case GF_SHD_OP_HEAL_FULL:
+ operation = "Launching heal operation ";
+ heal_op_str = "to perform full self heal";
+ substr = "\nUse heal info commands to check status.";
+ break;
+ case GF_SHD_OP_INDEX_SUMMARY:
+ heal_op_str = "list of entries to be healed";
+ break;
+ case GF_SHD_OP_SPLIT_BRAIN_FILES:
+ heal_op_str = "list of split brain entries";
+ break;
+ case GF_SHD_OP_STATISTICS:
+ heal_op_str = "crawl statistics";
+ break;
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT:
+ heal_op_str = "count of entries to be healed";
+ break;
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ heal_op_str = "count of entries to be healed per replica";
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+ case GF_SHD_OP_HEAL_SUMMARY:
+ case GF_SHD_OP_HEALED_FILES:
+ case GF_SHD_OP_HEAL_FAILED_FILES:
+ /* These cases are never hit; they're coded just to silence the
+ * compiler warnings.*/
+ break;
+
+ case GF_SHD_OP_INVALID:
+ heal_op_str = "invalid heal op";
+ break;
+ case GF_SHD_OP_HEAL_ENABLE:
+ operation = "";
+ heal_op_str = "Enable heal";
+ break;
+ case GF_SHD_OP_HEAL_DISABLE:
+ operation = "";
+ heal_op_str = "Disable heal";
+ break;
+ case GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE:
+ operation = "";
+ heal_op_str = "Enable granular entry heal";
+ break;
+ case GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE:
+ operation = "";
+ heal_op_str = "Disable granular entry heal";
+ break;
+ }
+
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, "")) {
+ cli_err("%s%s on volume %s has been unsuccessful:", operation,
+ heal_op_str, volname);
+ cli_err("%s", rsp.op_errstr);
}
+ ret = rsp.op_ret;
+ goto out;
+ } else {
+ cli_out("%s%s on volume %s has been successful %s", operation,
+ heal_op_str, volname, substr);
+ }
+
+ ret = rsp.op_ret;
+ if (!gf_is_cli_heal_get_command(heal_op))
+ goto out;
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(dict, "count", &brick_count);
+ if (ret)
+ goto out;
+
+ if (!brick_count) {
+ cli_err("All bricks of volume %s are down.", volname);
+ ret = -1;
+ goto out;
+ }
+
+ switch (heal_op) {
+ case GF_SHD_OP_STATISTICS:
+ for (i = 0; i < brick_count; i++)
+ cmd_heal_volume_statistics_out(dict, i);
+ break;
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT:
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ for (i = 0; i < brick_count; i++)
+ cmd_heal_volume_statistics_heal_count_out(dict, i);
+ break;
+ case GF_SHD_OP_INDEX_SUMMARY:
+ case GF_SHD_OP_SPLIT_BRAIN_FILES:
+ for (i = 0; i < brick_count; i++)
+ cmd_heal_volume_brick_out(dict, i);
+ break;
+ default:
+ break;
+ }
- cli_out ("%-40s %14"PRIu64" %14"PRIu32" %9c",
- gfid, nlookup, ref, inode_type);
+ ret = rsp.op_ret;
out:
- return;
-
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ if (dict)
+ dict_unref(dict);
+ return ret;
}
-void
-cli_print_volume_status_itables (dict_t *dict, char *prefix)
+static int32_t
+gf_cli_heal_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- int ret = -1;
- char key[1024] = {0,};
- uint32_t active_size = 0;
- uint32_t lru_size = 0;
- uint32_t purge_size = 0;
- int i =0;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = 0;
+ dict_t *dict = NULL;
- GF_ASSERT (dict);
- GF_ASSERT (prefix);
+ dict = data;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.active_size", prefix);
- ret = dict_get_uint32 (dict, key, &active_size);
- if (ret)
- goto out;
- if (active_size != 0) {
- cli_out ("Active inodes:");
- cli_out ("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref",
- "IA type");
- cli_out ("%-40s %14s %14s %9s", "----", "-------", "---",
- "-------");
- }
- for (i = 0; i < active_size; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.active%d", prefix, i);
- cli_print_volume_status_inode_entry (dict, key);
- }
- cli_out (" ");
+ ret = cli_to_glusterd(&req, frame, gf_cli_heal_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, dict,
+ GLUSTER_CLI_HEAL_VOLUME, this, cli_rpc_prog, NULL);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.lru_size", prefix);
- ret = dict_get_uint32 (dict, key, &lru_size);
- if (ret)
- goto out;
- if (lru_size != 0) {
- cli_out ("LRU inodes:");
- cli_out ("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref",
- "IA type");
- cli_out ("%-40s %14s %14s %9s", "----", "-------", "---",
- "-------");
- }
- for (i = 0; i < lru_size; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.lru%d", prefix, i);
- cli_print_volume_status_inode_entry (dict, key);
- }
- cli_out (" ");
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+
+ GF_FREE(req.dict.dict_val);
+
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.purge_size", prefix);
- ret = dict_get_uint32 (dict, key, &purge_size);
+static int32_t
+gf_cli_statedump_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char msg[1024] = "Volume statedump successful";
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status)
+ goto out;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+ gf_log("cli", GF_LOG_DEBUG, "Received response to statedump");
+ if (rsp.op_ret)
+ snprintf(msg, sizeof(msg), "%s", rsp.op_errstr);
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str("volStatedump", msg, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
if (ret)
- goto out;
- if (purge_size != 0) {
- cli_out ("Purged inodes:");
- cli_out ("%-40s %14s %14s %9s", "GFID", "Lookups", "Ref",
- "IA type");
- cli_out ("%-40s %14s %14s %9s", "----", "-------", "---",
- "-------");
- }
- for (i = 0; i < purge_size; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.purge%d", prefix, i);
- cli_print_volume_status_inode_entry (dict, key);
- }
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err("volume statedump: failed: %s", msg);
+ else
+ cli_out("volume statedump: success");
+ ret = rsp.op_ret;
out:
- return;
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
}
-void
-cli_print_volume_status_inode (dict_t *dict, gf_boolean_t notbrick)
+static int32_t
+gf_cli_statedump_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- int ret = -1;
- char *volname = NULL;
- int brick_index_max = -1;
- int other_count = 0;
- int index_max = 0;
- char *hostname = NULL;
- char *path = NULL;
- int online = -1;
- int conn_count = 0;
- char key[1024] = {0,};
- int i = 0;
- int j = 0;
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *options = NULL;
+ int ret = -1;
- GF_ASSERT (dict);
+ options = data;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto out;
- cli_out ("Inode tables for volume %s", volname);
+ ret = cli_to_glusterd(
+ &req, frame, gf_cli_statedump_volume_cbk, (xdrproc_t)xdr_gf_cli_req,
+ options, GLUSTER_CLI_STATEDUMP_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
- ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+static int32_t
+gf_cli_list_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int ret = -1;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ int vol_count = 0;
+ ;
+ char *volname = NULL;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status)
+ goto out;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_list(dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
if (ret)
- goto out;
- ret = dict_get_int32 (dict, "other-count", &other_count);
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
+
+ if (rsp.op_ret)
+ cli_err("%s", rsp.op_errstr);
+ else {
+ ret = dict_get_int32_sizen(dict, "count", &vol_count);
if (ret)
+ goto out;
+
+ if (vol_count == 0) {
+ cli_err("No volumes present in cluster");
+ goto out;
+ }
+ for (i = 0; i < vol_count; i++) {
+ ret = snprintf(key, sizeof(key), "volume%d", i);
+ ret = dict_get_strn(dict, key, ret, &volname);
+ if (ret)
goto out;
+ cli_out("%s", volname);
+ }
+ }
- index_max = brick_index_max + other_count;
+ ret = rsp.op_ret;
- for ( i = 0; i <= index_max; i++) {
- cli_out ("----------------------------------------------");
+out:
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- goto out;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.path", i);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- goto out;
- if (notbrick)
- cli_out ("%s : %s", hostname, path);
- else
- cli_out ("Brick : %s:%s", hostname, path);
+ if (dict)
+ dict_unref(dict);
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.status", i);
- ret = dict_get_int32 (dict, key, &online);
- if (ret)
- goto out;
- if (!online) {
- if (notbrick)
- cli_out ("%s is offline", hostname);
- else
- cli_out ("Brick is offline");
- continue;
- }
+static int32_t
+gf_cli_list_volume(call_frame_t *frame, xlator_t *this, void *data)
+{
+ int ret = -1;
+ gf_cli_req req = {{
+ 0,
+ }};
+
+ ret = cli_cmd_submit(NULL, &req, frame, cli_rpc_prog,
+ GLUSTER_CLI_LIST_VOLUME, NULL, this,
+ gf_cli_list_volume_cbk, (xdrproc_t)xdr_gf_cli_req);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.conncount", i);
- ret = dict_get_int32 (dict, key, &conn_count);
- if (ret)
- goto out;
+static int32_t
+gf_cli_clearlocks_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ char *lk_summary = NULL;
+ dict_t *dict = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status)
+ goto out;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+ gf_log("cli", GF_LOG_DEBUG, "Received response to clear-locks");
+
+ if (rsp.op_ret) {
+ cli_err("Volume clear-locks unsuccessful");
+ cli_err("%s", rsp.op_errstr);
+
+ } else {
+ if (!rsp.dict.dict_len) {
+ cli_err("Possibly no locks cleared");
+ ret = 0;
+ goto out;
+ }
- for (j = 0; j < conn_count; j++) {
- if (conn_count > 1)
- cli_out ("Connection %d:", j+1);
+ dict = dict_new();
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.conn%d.itable",
- i, j);
- cli_print_volume_status_itables (dict, key);
- cli_out (" ");
- }
+ if (!dict) {
+ ret = -1;
+ goto out;
}
-out:
- cli_out ("----------------------------------------------");
- return;
-}
-void
-cli_print_volume_status_fdtable (dict_t *dict, char *prefix)
-{
- int ret = -1;
- char key[1024] = {0,};
- int refcount = 0;
- uint32_t maxfds = 0;
- int firstfree = 0;
- int openfds = 0;
- int fd_pid = 0;
- int fd_refcount = 0;
- int fd_flags = 0;
- int i = 0;
-
- GF_ASSERT (dict);
- GF_ASSERT (prefix);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.refcount", prefix);
- ret = dict_get_int32 (dict, key, &refcount);
- if (ret)
- goto out;
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.maxfds", prefix);
- ret = dict_get_uint32 (dict, key, &maxfds);
- if (ret)
- goto out;
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
- memset (key, 0 ,sizeof (key));
- snprintf (key, sizeof (key), "%s.firstfree", prefix);
- ret = dict_get_int32 (dict, key, &firstfree);
- if (ret)
- goto out;
+ ret = dict_get_str(dict, "lk-summary", &lk_summary);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Unable to get lock summary from dictionary");
+ goto out;
+ }
+ cli_out("Volume clear-locks successful");
+ cli_out("%s", lk_summary);
+ }
- cli_out ("RefCount = %d MaxFDs = %d FirstFree = %d",
- refcount, maxfds, firstfree);
+ ret = rsp.op_ret;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.openfds", prefix);
- ret = dict_get_int32 (dict, key, &openfds);
- if (ret)
- goto out;
- if (0 == openfds) {
- cli_out ("No open fds");
- goto out;
- }
+out:
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
- cli_out ("%-19s %-19s %-19s %-19s", "FD Entry", "PID",
- "RefCount", "Flags");
- cli_out ("%-19s %-19s %-19s %-19s", "--------", "---",
- "--------", "-----");
+static int32_t
+gf_cli_clearlocks_volume(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *options = NULL;
+ int ret = -1;
- for (i = 0; i < maxfds ; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdentry%d.pid", prefix, i);
- ret = dict_get_int32 (dict, key, &fd_pid);
- if (ret)
- continue;
+ options = data;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdentry%d.refcount",
- prefix, i);
- ret = dict_get_int32 (dict, key, &fd_refcount);
- if (ret)
- continue;
+ ret = cli_to_glusterd(
+ &req, frame, gf_cli_clearlocks_volume_cbk, (xdrproc_t)xdr_gf_cli_req,
+ options, GLUSTER_CLI_CLRLOCKS_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdentry%d.flags", prefix, i);
- ret = dict_get_int32 (dict, key, &fd_flags);
- if (ret)
- continue;
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
+
+static int32_t
+cli_snapshot_remove_reply(gf_cli_rsp *rsp, dict_t *dict, call_frame_t *frame)
+{
+ int32_t ret = -1;
+ char *snap_name = NULL;
+ int32_t delete_cmd = -1;
+ cli_local_t *local = NULL;
+
+ GF_ASSERT(frame);
+ GF_ASSERT(rsp);
+ GF_ASSERT(dict);
+
+ local = frame->local;
+
+ ret = dict_get_int32_sizen(dict, "sub-cmd", &delete_cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get sub-cmd");
+ goto end;
+ }
+
+ if ((global_state->mode & GLUSTER_MODE_XML) &&
+ (delete_cmd == GF_SNAP_DELETE_TYPE_SNAP)) {
+ ret = cli_xml_output_snap_delete_begin(local, rsp->op_ret,
+ rsp->op_errno, rsp->op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create xml output for delete");
+ goto end;
+ }
+ }
+
+ if (rsp->op_ret && !(global_state->mode & GLUSTER_MODE_XML)) {
+ cli_err("snapshot delete: failed: %s",
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = rsp->op_ret;
+ goto out;
+ }
+
+ if (delete_cmd == GF_SNAP_DELETE_TYPE_ALL ||
+ delete_cmd == GF_SNAP_DELETE_TYPE_VOL) {
+ local = ((call_frame_t *)frame)->local;
+ if (!local) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "frame->local is NULL");
+ goto out;
+ }
+
+ /* During first call back of snapshot delete of type
+ * ALL and VOL, We will get the snapcount and snapnames.
+ * Hence to make the subsequent rpc calls for individual
+ * snapshot delete, We need to save it in local dictionary.
+ */
+ dict_copy(dict, local->dict);
+ ret = 0;
+ goto out;
+ }
- cli_out ("%-19d %-19d %-19d %-19d", i, fd_pid, fd_refcount,
- fd_flags);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_snapshot_delete(local, dict, rsp);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create xml output for snapshot delete command");
+ goto out;
+ }
+ /* Error out in case of the op already failed */
+ if (rsp->op_ret) {
+ ret = rsp->op_ret;
+ goto out;
+ }
+ } else {
+ ret = dict_get_str_sizen(dict, "snapname", &snap_name);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snapname");
+ goto out;
}
+ cli_out("snapshot delete: %s: snap removed successfully", snap_name);
+ }
+ ret = 0;
+
out:
- return;
+ if ((global_state->mode & GLUSTER_MODE_XML) &&
+ (delete_cmd == GF_SNAP_DELETE_TYPE_SNAP)) {
+ ret = cli_xml_output_snap_delete_end(local);
+ }
+end:
+ return ret;
}
-void
-cli_print_volume_status_fd (dict_t *dict, gf_boolean_t notbrick)
+static int
+cli_snapshot_config_display(dict_t *dict, gf_cli_rsp *rsp)
{
- int ret = -1;
- char *volname = NULL;
- int brick_index_max = -1;
- int other_count = 0;
- int index_max = 0;
- char *hostname = NULL;
- char *path = NULL;
- int online = -1;
- int conn_count = 0;
- char key[1024] = {0,};
- int i = 0;
- int j = 0;
-
- GF_ASSERT (dict);
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
+ char buf[PATH_MAX] = "";
+ char *volname = NULL;
+ int ret = -1;
+ int config_command = 0;
+ uint64_t value = 0;
+ uint64_t hard_limit = 0;
+ uint64_t soft_limit = 0;
+ uint64_t i = 0;
+ uint64_t voldisplaycount = 0;
+ char *auto_delete = NULL;
+ char *snap_activate = NULL;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp);
+
+ if (rsp->op_ret) {
+ cli_err("Snapshot Config : failed: %s",
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = rsp->op_ret;
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(dict, "config-command", &config_command);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch config type");
+ goto out;
+ }
+
+ ret = dict_get_uint64(dict, "snap-max-hard-limit", &hard_limit);
+ /* Ignore the error, as the key specified is optional */
+ ret = dict_get_uint64(dict, "snap-max-soft-limit", &soft_limit);
+
+ ret = dict_get_str_sizen(dict, "auto-delete", &auto_delete);
+
+ ret = dict_get_str_sizen(dict, "snap-activate-on-create", &snap_activate);
+
+ if (!hard_limit && !soft_limit &&
+ config_command != GF_SNAP_CONFIG_DISPLAY && !auto_delete &&
+ !snap_activate) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_ERROR, "Could not fetch config-key");
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ /* Ignore the error, as volname is optional */
+
+ if (!volname) {
+ volname = "System";
+ }
+
+ switch (config_command) {
+ case GF_SNAP_CONFIG_TYPE_SET:
+ if (hard_limit && soft_limit) {
+ cli_out(
+ "snapshot config: snap-max-hard-limit "
+ "& snap-max-soft-limit for system set successfully");
+ } else if (hard_limit) {
+ cli_out(
+ "snapshot config: snap-max-hard-limit "
+ "for %s set successfully",
+ volname);
+ } else if (soft_limit) {
+ cli_out(
+ "snapshot config: snap-max-soft-limit "
+ "for %s set successfully",
+ volname);
+ } else if (auto_delete) {
+ cli_out("snapshot config: auto-delete successfully set");
+ } else if (snap_activate) {
+ cli_out("snapshot config: activate-on-create successfully set");
+ }
+ break;
+
+ case GF_SNAP_CONFIG_DISPLAY:
+ cli_out("\nSnapshot System Configuration:");
+ ret = dict_get_uint64(dict, "snap-max-hard-limit", &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not fetch snap_max_hard_limit for %s", volname);
+ ret = -1;
goto out;
- cli_out ("FD tables for volume %s", volname);
+ }
+ cli_out("snap-max-hard-limit : %" PRIu64, value);
- ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
- if (ret)
- goto out;
- ret = dict_get_int32 (dict, "other-count", &other_count);
- if (ret)
+ ret = dict_get_uint64(dict, "snap-max-soft-limit", &soft_limit);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not fetch snap-max-soft-limit for %s", volname);
+ ret = -1;
goto out;
+ }
+ cli_out("snap-max-soft-limit : %" PRIu64 "%%", soft_limit);
- index_max = brick_index_max + other_count;
+ cli_out("auto-delete : %s", auto_delete);
- for (i = 0; i <= index_max; i++) {
- cli_out ("----------------------------------------------");
+ cli_out("activate-on-create : %s\n", snap_activate);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- goto out;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.path", i);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- goto out;
+ cli_out("Snapshot Volume Configuration:");
- if (notbrick)
- cli_out ("%s : %s", hostname, path);
- else
- cli_out ("Brick : %s:%s", hostname, path);
+ ret = dict_get_uint64(dict, "voldisplaycount", &voldisplaycount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch voldisplaycount");
+ ret = -1;
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.status", i);
- ret = dict_get_int32 (dict, key, &online);
- if (ret)
- goto out;
- if (!online) {
- if (notbrick)
- cli_out ("%s is offline", hostname);
- else
- cli_out ("Brick is offline");
- continue;
+ for (i = 0; i < voldisplaycount; i++) {
+ ret = snprintf(buf, sizeof(buf), "volume%" PRIu64 "-volname",
+ i);
+ ret = dict_get_strn(dict, buf, ret, &volname);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch %s", buf);
+ ret = -1;
+ goto out;
}
+ cli_out("\nVolume : %s", volname);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.conncount", i);
- ret = dict_get_int32 (dict, key, &conn_count);
- if (ret)
- goto out;
+ snprintf(buf, sizeof(buf),
+ "volume%" PRIu64 "-snap-max-hard-limit", i);
+ ret = dict_get_uint64(dict, buf, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch %s", buf);
+ ret = -1;
+ goto out;
+ }
+ cli_out("snap-max-hard-limit : %" PRIu64, value);
- for (j = 0; j < conn_count; j++) {
- cli_out ("Connection %d:", j+1);
+ snprintf(buf, sizeof(buf),
+ "volume%" PRIu64 "-active-hard-limit", i);
+ ret = dict_get_uint64(dict, buf, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not fetch"
+ " effective snap_max_hard_limit for %s",
+ volname);
+ ret = -1;
+ goto out;
+ }
+ cli_out("Effective snap-max-hard-limit : %" PRIu64, value);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.conn%d.fdtable",
- i, j);
- cli_print_volume_status_fdtable (dict, key);
- cli_out (" ");
+ snprintf(buf, sizeof(buf),
+ "volume%" PRIu64 "-snap-max-soft-limit", i);
+ ret = dict_get_uint64(dict, buf, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch %s", buf);
+ ret = -1;
+ goto out;
}
- }
+ cli_out("Effective snap-max-soft-limit : %" PRIu64
+ " "
+ "(%" PRIu64 "%%)",
+ value, soft_limit);
+ }
+ break;
+ default:
+ break;
+ }
+
+ ret = 0;
out:
- cli_out ("----------------------------------------------");
- return;
+ return ret;
}
-void
-cli_print_volume_status_call_frame (dict_t *dict, char *prefix)
+/* This function is used to print the volume related information
+ * of a snap.
+ *
+ * arg - 0, dict : Response Dictionary.
+ * arg - 1, prefix str : snaplist.snap{0..}.vol{0..}.*
+ */
+static int
+cli_get_each_volinfo_in_snap(dict_t *dict, char *keyprefix,
+ gf_boolean_t snap_driven)
{
- int ret = -1;
- char key[1024] = {0,};
- int ref_count = 0;
- char *translator = 0;
- int complete = 0;
- char *parent = NULL;
- char *wind_from = NULL;
- char *wind_to = NULL;
- char *unwind_from = NULL;
- char *unwind_to = NULL;
-
- if (!dict || !prefix)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.refcount", prefix);
- ret = dict_get_int32 (dict, key, &ref_count);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.translator", prefix);
- ret = dict_get_str (dict, key, &translator);
- if (ret)
- return;
+ char key[PATH_MAX] = "";
+ char *get_buffer = NULL;
+ int value = 0;
+ int ret = -1;
+ char indent[5] = "\t";
+ char *volname = NULL;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(keyprefix);
+
+ if (snap_driven) {
+ ret = snprintf(key, sizeof(key), "%s.volname", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.complete", prefix);
- ret = dict_get_int32 (dict, key, &complete);
- if (ret)
- return;
+ ret = dict_get_str(dict, key, &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out("%s" INDENT_MAIN_HEAD "%s", indent, "Snap Volume Name", ":",
+ get_buffer);
- cli_out (" Ref Count = %d", ref_count);
- cli_out (" Translator = %s", translator);
- cli_out (" Completed = %s", (complete ? "Yes" : "No"));
+ ret = snprintf(key, sizeof(key), "%s.origin-volname", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.parent", prefix);
- ret = dict_get_str (dict, key, &parent);
- if (!ret)
- cli_out (" Parent = %s", parent);
+ ret = dict_get_str(dict, key, &volname);
+ if (ret) {
+ gf_log("cli", GF_LOG_WARNING, "Failed to get %s", key);
+ cli_out("%-12s", "Origin:");
+ }
+ cli_out("%s" INDENT_MAIN_HEAD "%s", indent, "Origin Volume name", ":",
+ volname);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.windfrom", prefix);
- ret = dict_get_str (dict, key, &wind_from);
- if (!ret)
- cli_out (" Wind From = %s", wind_from);
+ ret = snprintf(key, sizeof(key), "%s.snapcount", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.windto", prefix);
- ret = dict_get_str (dict, key, &wind_to);
- if (!ret)
- cli_out (" Wind To = %s", wind_to);
+ ret = dict_get_int32(dict, key, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out("%s%s %s %s %d", indent, "Snaps taken for", volname, ":",
+ value);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unwindfrom", prefix);
- ret = dict_get_str (dict, key, &unwind_from);
- if (!ret)
- cli_out (" Unwind From = %s", unwind_from);
+ ret = snprintf(key, sizeof(key), "%s.snaps-available", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unwindto", prefix);
- ret = dict_get_str (dict, key, &unwind_to);
- if (!ret)
- cli_out (" Unwind To = %s", unwind_to);
+ ret = dict_get_int32(dict, key, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out("%s%s %s %s %d", indent, "Snaps available for", volname, ":",
+ value);
+ }
+
+ ret = snprintf(key, sizeof(key), "%s.vol-status", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str(dict, key, &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+ cli_out("%s" INDENT_MAIN_HEAD "%s", indent, "Status", ":", get_buffer);
+out:
+ return ret;
}
-void
-cli_print_volume_status_call_stack (dict_t *dict, char *prefix)
+/* This function is used to print snap related information
+ * arg - 0, dict : Response dictionary.
+ * arg - 1, prefix_str : snaplist.snap{0..}.*
+ */
+static int
+cli_get_volinfo_in_snap(dict_t *dict, char *keyprefix)
{
- int ret = -1;
- char key[1024] = {0,};
- int uid = 0;
- int gid = 0;
- int pid = 0;
- uint64_t unique = 0;
- //char *op = NULL;
- int count = 0;
- int i = 0;
+ char key[PATH_MAX] = "";
+ int i = 0;
+ int volcount = 0;
+ int ret = -1;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(keyprefix);
+
+ ret = snprintf(key, sizeof(key), "%s.vol-count", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, key, &volcount);
+ for (i = 1; i <= volcount; i++) {
+ ret = snprintf(key, sizeof(key), "%s.vol%d", keyprefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_each_volinfo_in_snap(dict, key, _gf_true);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not list details of volume in a snap");
+ goto out;
+ }
+ cli_out(" ");
+ }
- if (!dict || !prefix)
- return;
+out:
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.uid", prefix);
- ret = dict_get_int32 (dict, key, &uid);
- if (ret)
- return;
+static int
+cli_get_each_snap_info(dict_t *dict, char *prefix_str, gf_boolean_t snap_driven)
+{
+ char key_buffer[PATH_MAX] = "";
+ char *get_buffer = NULL;
+ int ret = -1;
+ char indent[5] = "";
+
+ GF_ASSERT(dict);
+ GF_ASSERT(prefix_str);
+
+ if (!snap_driven)
+ strcat(indent, "\t");
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snapname", prefix_str);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str(dict, key_buffer, &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to fetch snapname %s ", key_buffer);
+ goto out;
+ }
+ cli_out("%s" INDENT_MAIN_HEAD "%s", indent, "Snapshot", ":", get_buffer);
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snap-id", prefix_str);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str(dict, key_buffer, &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to fetch snap-id %s ", key_buffer);
+ goto out;
+ }
+ cli_out("%s" INDENT_MAIN_HEAD "%s", indent, "Snap UUID", ":", get_buffer);
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snap-desc", prefix_str);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str(dict, key_buffer, &get_buffer);
+ if (!ret) {
+ /* Ignore error for description */
+ cli_out("%s" INDENT_MAIN_HEAD "%s", indent, "Description", ":",
+ get_buffer);
+ }
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snap-time", prefix_str);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str(dict, key_buffer, &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to fetch snap-time %s ",
+ prefix_str);
+ goto out;
+ }
+ cli_out("%s" INDENT_MAIN_HEAD "%s", indent, "Created", ":", get_buffer);
+
+ if (snap_driven) {
+ cli_out("%-12s", "Snap Volumes:\n");
+ ret = cli_get_volinfo_in_snap(dict, prefix_str);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to list details of the snaps");
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.gid", prefix);
- ret = dict_get_int32 (dict, key, &gid);
- if (ret)
- return;
+/* This is a generic function to print snap related information.
+ * arg - 0, dict : Response Dictionary
+ */
+static int
+cli_call_snapshot_info(dict_t *dict, gf_boolean_t bool_snap_driven)
+{
+ int snap_count = 0;
+ char key[32] = "";
+ int ret = -1;
+ int i = 0;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pid", prefix);
- ret = dict_get_int32 (dict, key, &pid);
- if (ret)
- return;
+ GF_ASSERT(dict);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unique", prefix);
- ret = dict_get_uint64 (dict, key, &unique);
- if (ret)
- return;
+ ret = dict_get_int32_sizen(dict, "snapcount", &snap_count);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get snapcount");
+ goto out;
+ }
- /*
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.op", prefix);
- ret = dict_get_str (dict, key, &op);
- if (ret)
- return;
- */
+ if (snap_count == 0) {
+ cli_out("No snapshots present");
+ }
+ for (i = 1; i <= snap_count; i++) {
+ ret = snprintf(key, sizeof(key), "snap%d", i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_each_snap_info(dict, key, bool_snap_driven);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to print snap details");
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.count", prefix);
- ret = dict_get_int32 (dict, key, &count);
- if (ret)
- return;
+static int
+cli_get_snaps_in_volume(dict_t *dict)
+{
+ int ret = -1;
+ int i = 0;
+ int count = 0;
+ int avail = 0;
+ char key[32] = "";
+ char *get_buffer = NULL;
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_str_sizen(dict, "origin-volname", &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch origin-volname");
+ goto out;
+ }
+ cli_out(INDENT_MAIN_HEAD "%s", "Volume Name", ":", get_buffer);
+
+ ret = dict_get_int32_sizen(dict, "snapcount", &avail);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch snapcount");
+ goto out;
+ }
+ cli_out(INDENT_MAIN_HEAD "%d", "Snaps Taken", ":", avail);
+
+ ret = dict_get_int32_sizen(dict, "snaps-available", &count);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch snaps-available");
+ goto out;
+ }
+ cli_out(INDENT_MAIN_HEAD "%d", "Snaps Available", ":", count);
+
+ for (i = 1; i <= avail; i++) {
+ snprintf(key, sizeof(key), "snap%d", i);
+ ret = cli_get_each_snap_info(dict, key, _gf_false);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to print snap details");
+ goto out;
+ }
- cli_out (" UID : %d", uid);
- cli_out (" GID : %d", gid);
- cli_out (" PID : %d", pid);
- cli_out (" Unique : %"PRIu64, unique);
- //cli_out ("\tOp : %s", op);
- cli_out (" Frames : %d", count);
+ ret = snprintf(key, sizeof(key), "snap%d.vol1", i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = cli_get_each_volinfo_in_snap(dict, key, _gf_false);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not get volume related information");
+ goto out;
+ }
- for (i = 0; i < count; i++) {
- cli_out (" Frame %d", i+1);
+ cli_out(" ");
+ }
+out:
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.frame%d", prefix, i);
- cli_print_volume_status_call_frame (dict, key);
+static int
+cli_snapshot_list(dict_t *dict)
+{
+ int snapcount = 0;
+ char key[32] = "";
+ int ret = -1;
+ int i = 0;
+ char *get_buffer = NULL;
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_int32_sizen(dict, "snapcount", &snapcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch snap count");
+ goto out;
+ }
+
+ if (snapcount == 0) {
+ cli_out("No snapshots present");
+ }
+
+ for (i = 1; i <= snapcount; i++) {
+ ret = snprintf(key, sizeof(key), "snapname%d", i);
+ if (ret < 0) {
+ goto out;
}
- cli_out (" ");
+ ret = dict_get_strn(dict, key, ret, &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get %s ", key);
+ goto out;
+ } else {
+ cli_out("%s", get_buffer);
+ }
+ }
+out:
+ return ret;
}
-void
-cli_print_volume_status_callpool (dict_t *dict, gf_boolean_t notbrick)
+static int
+cli_get_snap_volume_status(dict_t *dict, char *key_prefix)
{
- int ret = -1;
- char *volname = NULL;
- int brick_index_max = -1;
- int other_count = 0;
- int index_max = 0;
- char *hostname = NULL;
- char *path = NULL;
- int online = -1;
- int call_count = 0;
- char key[1024] = {0,};
- int i = 0;
- int j = 0;
+ int ret = -1;
+ char key[PATH_MAX] = "";
+ char *buffer = NULL;
+ int brickcount = 0;
+ int i = 0;
+ int pid = 0;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(key_prefix);
+
+ ret = snprintf(key, sizeof(key), "%s.brickcount", key_prefix);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = dict_get_int32(dict, key, &brickcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to fetch brickcount");
+ goto out;
+ }
+
+ for (i = 0; i < brickcount; i++) {
+ ret = snprintf(key, sizeof(key), "%s.brick%d.path", key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
- GF_ASSERT (dict);
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Unable to get Brick Path");
+ continue;
+ }
+ cli_out("\n\t%-17s %s %s", "Brick Path", ":", buffer);
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto out;
- cli_out ("Pending calls for volume %s", volname);
+ ret = snprintf(key, sizeof(key), "%s.brick%d.vgname", key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
- ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
- if (ret)
- goto out;
- ret = dict_get_int32 (dict, "other-count", &other_count);
- if (ret)
- goto out;
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Unable to get Volume Group");
+ cli_out("\t%-17s %s %s", "Volume Group", ":", "N/A");
+ } else
+ cli_out("\t%-17s %s %s", "Volume Group", ":", buffer);
- index_max = brick_index_max + other_count;
+ ret = snprintf(key, sizeof(key), "%s.brick%d.status", key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
- for (i = 0; i <= index_max; i++) {
- cli_out ("----------------------------------------------");
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Unable to get Brick Running");
+ cli_out("\t%-17s %s %s", "Brick Running", ":", "N/A");
+ } else
+ cli_out("\t%-17s %s %s", "Brick Running", ":", buffer);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- goto out;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.path", i);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- goto out;
+ ret = snprintf(key, sizeof(key), "%s.brick%d.pid", key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
- if (notbrick)
- cli_out ("%s : %s", hostname, path);
- else
- cli_out ("Brick : %s:%s", hostname, path);
+ ret = dict_get_int32(dict, key, &pid);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Unable to get pid");
+ cli_out("\t%-17s %s %s", "Brick PID", ":", "N/A");
+ } else
+ cli_out("\t%-17s %s %d", "Brick PID", ":", pid);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.status", i);
- ret = dict_get_int32 (dict, key, &online);
- if (ret)
- goto out;
- if (!online) {
- if (notbrick)
- cli_out ("%s is offline", hostname);
- else
- cli_out ("Brick is offline");
- continue;
- }
+ ret = snprintf(key, sizeof(key), "%s.brick%d.data", key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.callpool.count", i);
- ret = dict_get_int32 (dict, key, &call_count);
- if (ret)
- goto out;
- cli_out ("Pending calls: %d", call_count);
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Unable to get Data Percent");
+ cli_out("\t%-17s %s %s", "Data Percentage", ":", "N/A");
+ } else
+ cli_out("\t%-17s %s %s", "Data Percentage", ":", buffer);
- if (0 == call_count)
- continue;
+ ret = snprintf(key, sizeof(key), "%s.brick%d.lvsize", key_prefix, i);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Unable to get LV Size");
+ cli_out("\t%-17s %s %s", "LV Size", ":", "N/A");
+ } else
+ cli_out("\t%-17s %s %s", "LV Size", ":", buffer);
+ }
- for (j = 0; j < call_count; j++) {
- cli_out ("Call Stack%d", j+1);
+ ret = 0;
+out:
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key),
- "brick%d.callpool.stack%d", i, j);
- cli_print_volume_status_call_stack (dict, key);
- }
+static int
+cli_get_single_snap_status(dict_t *dict, char *keyprefix)
+{
+ int ret = -1;
+ char key[64] = ""; /* keyprefix is ""status.snap0" */
+ int i = 0;
+ int volcount = 0;
+ char *get_buffer = NULL;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(keyprefix);
+
+ ret = snprintf(key, sizeof(key), "%s.snapname", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str(dict, key, &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get snapname");
+ goto out;
+ }
+ cli_out("\nSnap Name : %s", get_buffer);
+
+ ret = snprintf(key, sizeof(key), "%s.uuid", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str(dict, key, &get_buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get snap UUID");
+ goto out;
+ }
+ cli_out("Snap UUID : %s", get_buffer);
+
+ ret = snprintf(key, sizeof(key), "%s.volcount", keyprefix);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, key, &volcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get volume count");
+ goto out;
+ }
+
+ for (i = 0; i < volcount; i++) {
+ ret = snprintf(key, sizeof(key), "%s.vol%d", keyprefix, i);
+ if (ret < 0) {
+ goto out;
}
+ ret = cli_get_snap_volume_status(dict, key);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get snap volume status");
+ goto out;
+ }
+ }
out:
- cli_out ("----------------------------------------------");
- return;
+ return ret;
+}
+
+static int32_t
+cli_populate_req_dict_for_delete(dict_t *snap_dict, dict_t *dict, size_t index)
+{
+ int32_t ret = -1;
+ char key[PATH_MAX] = "";
+ char *buffer = NULL;
+
+ GF_ASSERT(snap_dict);
+ GF_ASSERT(dict);
+
+ ret = dict_set_int32_sizen(snap_dict, "sub-cmd", GF_SNAP_DELETE_TYPE_ITER);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not save command type in snap dictionary");
+ goto out;
+ }
+
+ ret = snprintf(key, sizeof(key), "snapname%zu", index);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snapname");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(snap_dict, "snapname", buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to save snapname");
+ goto out;
+ }
+
+ ret = dict_set_int32_sizen(snap_dict, "type", GF_SNAP_OPTION_TYPE_DELETE);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to save command type");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(snap_dict, "cmd-str", "snapshot delete");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not save command string as delete");
+ goto out;
+ }
+out:
+ return ret;
}
static int
-gf_cli3_1_status_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- int ret = -1;
- int brick_index_max = -1;
- int other_count = 0;
- int index_max = 0;
- int i = 0;
- int pid = -1;
- uint32_t cmd = 0;
- gf_boolean_t notbrick = _gf_false;
- char key[1024] = {0,};
- char *hostname = NULL;
- char *path = NULL;
- char *volname = NULL;
- dict_t *dict = NULL;
- gf_cli_rsp rsp = {0,};
- cli_volume_status_t status = {0};
- cli_local_t *local = NULL;
- char msg[1024] = {0,};
-
- if (req->rpc_status == -1)
- goto out;
+cli_populate_req_dict_for_status(dict_t *snap_dict, dict_t *dict, int index)
+{
+ int ret = -1;
+ char key[PATH_MAX] = "";
+ char *buffer = NULL;
+
+ GF_ASSERT(snap_dict);
+ GF_ASSERT(dict);
+
+ ret = dict_set_uint32(snap_dict, "sub-cmd", GF_SNAP_STATUS_TYPE_ITER);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not save command type in snap dict");
+ goto out;
+ }
+
+ ret = snprintf(key, sizeof(key), "status.snap%d.snapname", index);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, key, ret, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get snapname");
+ goto out;
+ }
+
+ ret = dict_set_str_sizen(snap_dict, "snapname", buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not save snapname in snap dict");
+ goto out;
+ }
+
+ ret = dict_set_int32_sizen(snap_dict, "type", GF_SNAP_OPTION_TYPE_STATUS);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not save command type");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(snap_dict, "cmd-str", "snapshot status");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not save command string as status");
+ goto out;
+ }
+
+ ret = dict_set_int32_sizen(snap_dict, "hold_vol_locks", _gf_false);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Setting volume lock flag failed");
+ goto out;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("cli", GF_LOG_ERROR, "Volume status response error");
- goto out;
+out:
+ return ret;
+}
+
+static int
+cli_snapshot_status(dict_t *dict, gf_cli_rsp *rsp, call_frame_t *frame)
+{
+ int ret = -1;
+ int status_cmd = -1;
+ cli_local_t *local = NULL;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp);
+ GF_ASSERT(frame);
+
+ local = ((call_frame_t *)frame)->local;
+ if (!local) {
+ gf_log("cli", GF_LOG_ERROR, "frame->local is NULL");
+ goto out;
+ }
+
+ if (rsp->op_ret) {
+ if (rsp->op_errstr) {
+ ret = dict_set_dynstr_with_alloc(local->dict, "op_err_str",
+ rsp->op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to set op_errstr in local dictionary");
+ goto out;
+ }
+ }
+ ret = rsp->op_ret;
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(dict, "sub-cmd", &status_cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch status type");
+ goto out;
+ }
+
+ if ((status_cmd != GF_SNAP_STATUS_TYPE_SNAP) &&
+ (status_cmd != GF_SNAP_STATUS_TYPE_ITER)) {
+ dict_copy(dict, local->dict);
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_snapshot_status_single_snap(local, dict, "status.snap0");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create xml output for snapshot status");
}
+ } else {
+ ret = cli_get_single_snap_status(dict, "status.snap0");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch status of snap");
+ }
+ }
+
+out:
+ return ret;
+}
+
+static int
+gf_cli_generate_snapshot_event(gf_cli_rsp *rsp, dict_t *dict, int32_t type,
+ char *snap_name, char *volname, char *snap_uuid,
+ char *clone_name)
+{
+ int ret = -1;
+ int config_command = 0;
+ int32_t delete_cmd = -1;
+ uint64_t hard_limit = 0;
+ uint64_t soft_limit = 0;
+ char *auto_delete = NULL;
+ char *snap_activate = NULL;
+ char msg[PATH_MAX] = {
+ 0,
+ };
+ char option[512] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("cli", dict, out);
+ GF_VALIDATE_OR_GOTO("cli", rsp, out);
+
+ switch (type) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
+ goto out;
+ }
+
+ if (!volname) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get volume name");
+ goto out;
+ }
+
+ if (rsp->op_ret != 0) {
+ gf_event(EVENT_SNAPSHOT_CREATE_FAILED,
+ "snapshot_name=%s;volume_name=%s;error=%s", snap_name,
+ volname,
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = 0;
+ break;
+ }
- gf_log ("cli", GF_LOG_DEBUG, "Received response to status cmd");
+ if (!snap_uuid) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
+ goto out;
+ }
- local = ((call_frame_t *)myframe)->local;
+ gf_event(EVENT_SNAPSHOT_CREATED,
+ "snapshot_name=%s;volume_name=%s;snapshot_uuid=%s",
+ snap_name, volname, snap_uuid);
- if (rsp.op_ret) {
- if (strcmp (rsp.op_errstr, ""))
- snprintf (msg, sizeof (msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg), "Unable to obtain volume "
- "status information.");
-
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- cli_xml_output_str ("volStatus", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- ret = 0;
- goto out;
- }
-#endif
- cli_err ("%s", msg);
- if (local && local->all) {
- ret = 0;
- cli_out (" ");
- } else
- ret = -1;
+ ret = 0;
+ break;
+ case GF_SNAP_OPTION_TYPE_ACTIVATE:
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
goto out;
- }
+ }
- dict = dict_new ();
- if (!dict)
+ if (rsp->op_ret != 0) {
+ gf_event(EVENT_SNAPSHOT_ACTIVATE_FAILED,
+ "snapshot_name=%s;error=%s", snap_name,
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = 0;
+ break;
+ }
+
+ if (!snap_uuid) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
goto out;
+ }
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
- if (ret)
+ gf_event(EVENT_SNAPSHOT_ACTIVATED,
+ "snapshot_name=%s;snapshot_uuid=%s", snap_name, snap_uuid);
+
+ ret = 0;
+ break;
+
+ case GF_SNAP_OPTION_TYPE_DEACTIVATE:
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
goto out;
+ }
- ret = dict_get_uint32 (dict, "cmd", &cmd);
- if (ret)
+ if (rsp->op_ret != 0) {
+ gf_event(EVENT_SNAPSHOT_DEACTIVATE_FAILED,
+ "snapshot_name=%s;error=%s", snap_name,
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = 0;
+ break;
+ }
+
+ if (!snap_uuid) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
goto out;
+ }
- if ((cmd & GF_CLI_STATUS_ALL)) {
- if (local) {
- local->dict = dict;
- ret = 0;
- } else {
- gf_log ("cli", GF_LOG_ERROR, "local not found");
- ret = -1;
- }
+ gf_event(EVENT_SNAPSHOT_DEACTIVATED,
+ "snapshot_name=%s;snapshot_uuid=%s", snap_name, snap_uuid);
+
+ ret = 0;
+ break;
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
goto out;
- }
+ }
- if ((cmd & GF_CLI_STATUS_NFS) || (cmd & GF_CLI_STATUS_SHD))
- notbrick = _gf_true;
+ if (rsp->op_ret != 0) {
+ gf_event(EVENT_SNAPSHOT_RESTORE_FAILED,
+ "snapshot_name=%s;error=%s", snap_name,
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = 0;
+ break;
+ }
- ret = dict_get_int32 (dict, "count", &count);
- if (ret)
+ if (!snap_uuid) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
goto out;
- if (count == 0) {
+ }
+
+ if (!volname) {
ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Failed to get volname");
goto out;
- }
+ }
- ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
- if (ret)
+ gf_event(EVENT_SNAPSHOT_RESTORED,
+ "snapshot_name=%s;snapshot_uuid=%s;volume_name=%s",
+ snap_name, snap_uuid, volname);
+
+ ret = 0;
+ break;
+
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = dict_get_int32_sizen(dict, "sub-cmd", &delete_cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get sub-cmd");
goto out;
- ret = dict_get_int32 (dict, "other-count", &other_count);
- if (ret)
+ }
+
+ /*
+ * Need not generate any event (success or failure) for delete *
+ * all, as it will trigger individual delete for all snapshots *
+ */
+ if (delete_cmd == GF_SNAP_DELETE_TYPE_ALL) {
+ ret = 0;
+ break;
+ }
+
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
goto out;
+ }
- index_max = brick_index_max + other_count;
+ if (rsp->op_ret != 0) {
+ gf_event(EVENT_SNAPSHOT_DELETE_FAILED,
+ "snapshot_name=%s;error=%s", snap_name,
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = 0;
+ break;
+ }
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_vol_status (dict, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- }
+ if (!snap_uuid) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
goto out;
- }
-#endif
+ }
- status.brick = GF_CALLOC (1, PATH_MAX + 256, gf_common_mt_strdup);
-
- switch (cmd & GF_CLI_STATUS_MASK) {
- case GF_CLI_STATUS_MEM:
- cli_print_volume_status_mem (dict, notbrick);
- goto cont;
- break;
- case GF_CLI_STATUS_CLIENTS:
- cli_print_volume_status_clients (dict, notbrick);
- goto cont;
- break;
- case GF_CLI_STATUS_INODE:
- cli_print_volume_status_inode (dict, notbrick);
- goto cont;
- break;
- case GF_CLI_STATUS_FD:
- cli_print_volume_status_fd (dict, notbrick);
- goto cont;
- break;
- case GF_CLI_STATUS_CALLPOOL:
- cli_print_volume_status_callpool (dict, notbrick);
- goto cont;
- break;
- default:
- break;
- }
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
+ gf_event(EVENT_SNAPSHOT_DELETED,
+ "snapshot_name=%s;snapshot_uuid=%s", snap_name, snap_uuid);
+
+ ret = 0;
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ if (!clone_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get clone name");
goto out;
+ }
- cli_out ("Status of volume: %s", volname);
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snapname name");
+ goto out;
+ }
- if ((cmd & GF_CLI_STATUS_DETAIL) == 0) {
- cli_out ("Gluster process\t\t\t\t\t\tPort\tOnline\tPid");
- cli_print_line (CLI_BRICK_STATUS_LINE_LEN);
- }
+ if (rsp->op_ret != 0) {
+ gf_event(EVENT_SNAPSHOT_CLONE_FAILED,
+ "snapshot_name=%s;clone_name=%s;error=%s", snap_name,
+ clone_name,
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = 0;
+ break;
+ }
- for (i = 0; i <= index_max; i++) {
+ if (!snap_uuid) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
+ goto out;
+ }
+ gf_event(EVENT_SNAPSHOT_CLONED,
+ "snapshot_name=%s;clone_name=%s;clone_uuid=%s", snap_name,
+ clone_name, snap_uuid);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- continue;
+ ret = 0;
+ break;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.path", i);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- continue;
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ if (rsp->op_ret != 0) {
+ gf_event(EVENT_SNAPSHOT_CONFIG_UPDATE_FAILED, "error=%s",
+ rsp->op_errstr ? rsp->op_errstr
+ : "Please check log file for details");
+ ret = 0;
+ break;
+ }
- /* Brick/not-brick is handled seperately here as all
- * types of nodes are contained in the default output
- */
- memset (status.brick, 0, PATH_MAX + 255);
- if (!strcmp (hostname, "NFS Server") ||
- !strcmp (hostname, "Self-heal Daemon"))
- snprintf (status.brick, PATH_MAX + 255, "%s on %s",
- hostname, path);
- else
- snprintf (status.brick, PATH_MAX + 255, "Brick %s:%s",
- hostname, path);
+ ret = dict_get_int32_sizen(dict, "config-command", &config_command);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch config type");
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.port", i);
- ret = dict_get_int32 (dict, key, &(status.port));
- if (ret)
- continue;
+ if (config_command == GF_SNAP_CONFIG_DISPLAY) {
+ ret = 0;
+ break;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.status", i);
- ret = dict_get_int32 (dict, key, &(status.online));
- if (ret)
- continue;
+ /* These are optional parameters therefore ignore the error */
+ ret = dict_get_uint64(dict, "snap-max-hard-limit", &hard_limit);
+ ret = dict_get_uint64(dict, "snap-max-soft-limit", &soft_limit);
+ ret = dict_get_str_sizen(dict, "auto-delete", &auto_delete);
+ ret = dict_get_str_sizen(dict, "snap-activate-on-create",
+ &snap_activate);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.pid", i);
- ret = dict_get_int32 (dict, key, &pid);
- if (ret)
- continue;
- if (pid == -1)
- ret = gf_asprintf (&(status.pid_str), "%s", "N/A");
- else
- ret = gf_asprintf (&(status.pid_str), "%d", pid);
+ if (!hard_limit && !soft_limit && !auto_delete && !snap_activate) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR,
+ "At least one option from "
+ "snap-max-hard-limit, snap-max-soft-limit, "
+ "auto-delete and snap-activate-on-create "
+ "should be set");
+ goto out;
+ }
- if (ret == -1)
- goto out;
+ if (hard_limit || soft_limit) {
+ snprintf(option, sizeof(option), "%s=%" PRIu64,
+ hard_limit ? "hard_limit" : "soft_limit",
+ hard_limit ? hard_limit : soft_limit);
+ } else if (auto_delete || snap_activate) {
+ snprintf(option, sizeof(option), "%s=%s",
+ auto_delete ? "auto-delete" : "snap-activate",
+ auto_delete ? auto_delete : snap_activate);
+ }
- if ((cmd & GF_CLI_STATUS_DETAIL)) {
- ret = cli_get_detail_status (dict, i, &status);
- if (ret)
- goto out;
- cli_print_line (CLI_BRICK_STATUS_LINE_LEN);
- cli_print_detailed_status (&status);
+ volname = NULL;
+ ret = dict_get_str_sizen(dict, "volname", &volname);
- } else {
- cli_print_brick_status (&status);
- }
- }
- cli_out (" ");
-cont:
- ret = rsp.op_ret;
+ snprintf(msg, sizeof(msg), "config_type=%s;%s",
+ volname ? "volume_config" : "system_config", option);
-out:
- if (status.brick)
- GF_FREE (status.brick);
+ gf_event(EVENT_SNAPSHOT_CONFIG_UPDATED, "%s", msg);
- cli_cmd_broadcast_response (ret);
- return ret;
+ ret = 0;
+ break;
+
+ default:
+ gf_log("cli", GF_LOG_WARNING,
+ "Cannot generate event for unknown type.");
+ ret = 0;
+ goto out;
+ }
+
+out:
+ return ret;
}
-int32_t
-gf_cli3_1_status_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+/*
+ * Fetch necessary data from dict at one place instead of *
+ * repeating the same code again and again. *
+ */
+static int
+gf_cli_snapshot_get_data_from_dict(dict_t *dict, char **snap_name,
+ char **volname, char **snap_uuid,
+ int8_t *soft_limit_flag, char **clone_name)
{
- gf_cli_req req = {{0,}};
- int ret = -1;
- dict_t *dict = NULL;
+ int ret = -1;
- if (!frame || !this || !data)
- goto out;
+ GF_VALIDATE_OR_GOTO("cli", dict, out);
- dict = data;
+ if (snap_name) {
+ ret = dict_get_str_sizen(dict, "snapname", snap_name);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get snapname from dict");
+ }
+ }
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log ("cli", GF_LOG_ERROR,
- "failed to serialize the data");
+ if (volname) {
+ ret = dict_get_str_sizen(dict, "volname1", volname);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get volname1 from dict");
+ }
+ }
- goto out;
+ if (snap_uuid) {
+ ret = dict_get_str_sizen(dict, "snapuuid", snap_uuid);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get snapuuid from dict");
+ }
+ }
+
+ if (soft_limit_flag) {
+ ret = dict_get_int8(dict, "soft-limit-reach", soft_limit_flag);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG,
+ "failed to get soft-limit-reach from dict");
}
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_STATUS_VOLUME, NULL,
- this, gf_cli3_1_status_cbk,
- (xdrproc_t)xdr_gf_cli_req);
+ }
- out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning: %d", ret);
- return ret;
+ if (clone_name) {
+ ret = dict_get_str_sizen(dict, "clonename", clone_name);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "failed to get clonename from dict");
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
}
-int
-gf_cli_status_volume_all (call_frame_t *frame, xlator_t *this, void *data)
+static int
+gf_cli_snapshot_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- int i = 0;
- int ret = -1;
- int vol_count = -1;
- uint32_t cmd = 0;
- char key[1024] = {0};
- char *volname = NULL;
- dict_t *vol_dict = NULL;
- dict_t *dict = NULL;
- cli_local_t *local = NULL;
+ int ret = -1;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ char *snap_name = NULL;
+ char *clone_name = NULL;
+ int32_t type = 0;
+ call_frame_t *frame = NULL;
+ gf_boolean_t snap_driven = _gf_false;
+ int8_t soft_limit_flag = -1;
+ char *volname = NULL;
+ char *snap_uuid = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (req->rpc_status == -1) {
+ goto out;
+ }
+
+ frame = myframe;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32_sizen(dict, "type", &type);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "failed to get type");
+ goto out;
+ }
+
+ ret = gf_cli_snapshot_get_data_from_dict(
+ dict, &snap_name, &volname, &snap_uuid, &soft_limit_flag, &clone_name);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to fetch data from dict.");
+ goto out;
+ }
+
+#if (USE_EVENTS)
+ ret = gf_cli_generate_snapshot_event(&rsp, dict, type, snap_name, volname,
+ snap_uuid, clone_name);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to generate snapshot event");
+ goto out;
+ }
+#endif
- dict = (dict_t *)data;
- ret = dict_get_uint32 (dict, "cmd", &cmd);
- if (ret)
+ /* Snapshot status and delete command is handled separately */
+ if (global_state->mode & GLUSTER_MODE_XML &&
+ GF_SNAP_OPTION_TYPE_STATUS != type &&
+ GF_SNAP_OPTION_TYPE_DELETE != type) {
+ ret = cli_xml_output_snapshot(type, dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ }
+
+ goto out;
+ }
+
+ switch (type) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ if (rsp.op_ret) {
+ cli_err("snapshot create: failed: %s",
+ rsp.op_errstr ? rsp.op_errstr
+ : "Please check log file for details");
+ ret = rsp.op_ret;
goto out;
+ }
- local = cli_local_get ();
- if (!local) {
+ if (!snap_name) {
ret = -1;
- gf_log ("cli", GF_LOG_ERROR, "Failed to allocate local");
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
goto out;
- }
- frame->local = local;
- local->all = _gf_true;
+ }
- ret = gf_cli3_1_status_volume (frame, this, data);
- if (ret)
+ if (!volname) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR, "Failed to get volume name");
+ goto out;
+ }
+
+ cli_out("snapshot create: success: Snap %s created successfully",
+ snap_name);
+
+ if (soft_limit_flag == 1) {
+ cli_out(
+ "Warning: Soft-limit of volume (%s) is "
+ "reached. Snapshot creation is not possible "
+ "once hard-limit is reached.",
+ volname);
+ }
+ ret = 0;
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ if (rsp.op_ret) {
+ cli_err("snapshot clone: failed: %s",
+ rsp.op_errstr ? rsp.op_errstr
+ : "Please check log file for details");
+ ret = rsp.op_ret;
goto out;
+ }
- vol_dict = local->dict;
+ if (!clone_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get clone name");
+ goto out;
+ }
- ret = dict_get_int32 (vol_dict, "vol_count", &vol_count);
- if (ret) {
- cli_err ("Failed to get names of volumes");
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snapname name");
goto out;
- }
+ }
- if (vol_count == 0) {
- cli_out ("No volumes present");
- ret = 0;
+ cli_out("snapshot clone: success: Clone %s created successfully",
+ clone_name);
+
+ ret = 0;
+ break;
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ if (rsp.op_ret) {
+ cli_err("snapshot restore: failed: %s",
+ rsp.op_errstr ? rsp.op_errstr
+ : "Please check log file for details");
+ ret = rsp.op_ret;
goto out;
- }
+ }
- /* remove the "all" flag in cmd */
- cmd &= ~GF_CLI_STATUS_ALL;
- cmd |= GF_CLI_STATUS_VOL;
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
+ goto out;
+ }
- for (i = 0; i < vol_count; i++) {
+ cli_out("Snapshot restore: %s: Snap restored successfully",
+ snap_name);
- dict = dict_new ();
- if (!dict)
- goto out;
+ ret = 0;
+ break;
+ case GF_SNAP_OPTION_TYPE_ACTIVATE:
+ if (rsp.op_ret) {
+ cli_err("snapshot activate: failed: %s",
+ rsp.op_errstr ? rsp.op_errstr
+ : "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "vol%d", i);
- ret = dict_get_str (vol_dict, key, &volname);
- if (ret)
- goto out;
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
+ goto out;
+ }
- ret = dict_set_dynstr (dict, "volname", volname);
- if (ret)
- goto out;
+ cli_out("Snapshot activate: %s: Snap activated successfully",
+ snap_name);
- ret = dict_set_uint32 (dict, "cmd", cmd);
- if (ret)
- goto out;
+ ret = 0;
+ break;
- ret = gf_cli3_1_status_volume (frame, this, dict);
- if (ret)
- goto out;
+ case GF_SNAP_OPTION_TYPE_DEACTIVATE:
+ if (rsp.op_ret) {
+ cli_err("snapshot deactivate: failed: %s",
+ rsp.op_errstr ? rsp.op_errstr
+ : "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
- dict_unref (dict);
- }
+ if (!snap_name) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
+ goto out;
+ }
- out:
- if (ret)
- gf_log ("cli", GF_LOG_ERROR, "status all failed");
- if (ret && dict)
- dict_unref (dict);
- if (frame)
- frame->local = NULL;
- return ret;
-}
+ cli_out("Snapshot deactivate: %s: Snap deactivated successfully",
+ snap_name);
-static int
-gf_cli3_1_mount_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf1_cli_mount_rsp rsp = {0,};
- int ret = -1;
+ ret = 0;
+ break;
+ case GF_SNAP_OPTION_TYPE_INFO:
+ if (rsp.op_ret) {
+ cli_err("Snapshot info : failed: %s",
+ rsp.op_errstr ? rsp.op_errstr
+ : "Please check log file for details");
+ ret = rsp.op_ret;
+ goto out;
+ }
- if (-1 == req->rpc_status) {
+ snap_driven = dict_get_str_boolean(dict, "snap-driven", _gf_false);
+ if (snap_driven == _gf_true) {
+ ret = cli_call_snapshot_info(dict, snap_driven);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Snapshot info failed");
+ goto out;
+ }
+ } else if (snap_driven == _gf_false) {
+ ret = cli_get_snaps_in_volume(dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Snapshot info failed");
+ goto out;
+ }
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ ret = cli_snapshot_config_display(dict, &rsp);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to display snapshot config output.");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_LIST:
+ if (rsp.op_ret) {
+ cli_err("Snapshot list : failed: %s",
+ rsp.op_errstr ? rsp.op_errstr
+ : "Please check log file for details");
+ ret = rsp.op_ret;
goto out;
- }
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_mount_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
+ ret = cli_snapshot_list(dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to display snapshot list");
goto out;
- }
+ }
+ break;
- gf_log ("cli", GF_LOG_INFO, "Received resp to mount");
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = cli_snapshot_remove_reply(&rsp, dict, frame);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to delete snap");
+ goto out;
+ }
+ break;
- if (rsp.op_ret == 0) {
- ret = 0;
- cli_out ("%s", rsp.path);
- } else {
- /* weird sounding but easy to parse... */
- cli_err ("%d : failed with this errno (%s)",
- rsp.op_errno, strerror (rsp.op_errno));
- ret = -1;
- }
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ ret = cli_snapshot_status(dict, &rsp, frame);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to display snapshot status output.");
+ goto out;
+ }
+ break;
+ default:
+ cli_err("Unknown command executed");
+ ret = -1;
+ goto out;
+ }
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+
+ free(rsp.dict.dict_val);
+ free(rsp.op_errstr);
+
+ return ret;
}
int32_t
-gf_cli3_1_mount (call_frame_t *frame, xlator_t *this, void *data)
+gf_cli_snapshot_for_delete(call_frame_t *frame, xlator_t *this, void *data)
{
- gf1_cli_mount_req req = {0,};
- int ret = -1;
- void **dataa = data;
- char *label = NULL;
- dict_t *dict = NULL;
-
- if (!frame || !this || !data)
- goto out;
-
- label = dataa[0];
- dict = dataa[1];
-
- req.label = label;
- ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
+ gf_cli_req req = {{
+ 0,
+ }};
+ int32_t ret = -1;
+ int32_t cmd = -1;
+ cli_local_t *local = NULL;
+ dict_t *snap_dict = NULL;
+ int32_t snapcount = 0;
+ int i = 0;
+ char question[PATH_MAX] = "";
+ char *volname = NULL;
+ gf_answer_t answer = GF_ANSWER_NO;
+
+ GF_VALIDATE_OR_GOTO("cli", frame, out);
+ GF_VALIDATE_OR_GOTO("cli", frame->local, out);
+ GF_VALIDATE_OR_GOTO("cli", this, out);
+ GF_VALIDATE_OR_GOTO("cli", data, out);
+
+ local = frame->local;
+
+ ret = dict_get_int32_sizen(local->dict, "sub-cmd", &cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get sub-cmd");
+ goto out;
+ }
+
+ /* No need multiple RPCs for individual snapshot delete*/
+ if (cmd == GF_SNAP_DELETE_TYPE_SNAP) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(local->dict, "snapcount", &snapcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get snapcount");
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+#ifdef HAVE_LIB_XML
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"snapCount", "%d", snapcount);
if (ret) {
- ret = -1;
- goto out;
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to write xml element \"snapCount\"");
+ goto out;
+ }
+#endif /* HAVE_LIB_XML */
+ } else if (snapcount == 0) {
+ cli_out("No snapshots present");
+ goto out;
+ }
+
+ if (cmd == GF_SNAP_DELETE_TYPE_ALL) {
+ snprintf(question, sizeof(question),
+ "System contains %d snapshot(s).\nDo you still "
+ "want to continue and delete them? ",
+ snapcount);
+ } else {
+ ret = dict_get_str_sizen(local->dict, "volname", &volname);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to fetch volname from local dictionary");
+ goto out;
}
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_MOUNT, NULL,
- this, gf_cli3_1_mount_cbk,
- (xdrproc_t)xdr_gf1_cli_mount_req);
+ snprintf(question, sizeof(question),
+ "Volume (%s) contains %d snapshot(s).\nDo you still want to "
+ "continue and delete them? ",
+ volname, snapcount);
+ }
-out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
-}
+ answer = cli_cmd_get_confirmation(global_state, question);
+ if (GF_ANSWER_NO == answer) {
+ ret = 0;
+ gf_log("cli", GF_LOG_DEBUG,
+ "User cancelled snapshot delete operation for snap delete");
+ goto out;
+ }
-static int
-gf_cli3_1_umount_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf1_cli_umount_rsp rsp = {0,};
- int ret = -1;
+ for (i = 1; i <= snapcount; i++) {
+ ret = -1;
- if (-1 == req->rpc_status) {
- goto out;
- }
+ snap_dict = dict_new();
+ if (!snap_dict)
+ goto out;
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf1_cli_umount_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
+ ret = cli_populate_req_dict_for_delete(snap_dict, local->dict, i);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not populate snap request dictionary");
+ goto out;
}
- gf_log ("cli", GF_LOG_INFO, "Received resp to mount");
-
- if (rsp.op_ret == 0)
- ret = 0;
- else {
- cli_err ("umount failed");
- ret = -1;
+ ret = cli_to_glusterd(&req, frame, gf_cli_snapshot_cbk,
+ (xdrproc_t)xdr_gf_cli_req, snap_dict,
+ GLUSTER_CLI_SNAP, this, cli_rpc_prog, NULL);
+ if (ret) {
+ /* Fail the operation if deleting one of the
+ * snapshots is failed
+ */
+ gf_log("cli", GF_LOG_ERROR,
+ "cli_to_glusterd for snapshot delete failed");
+ goto out;
}
+ dict_unref(snap_dict);
+ snap_dict = NULL;
+ }
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ if (snap_dict)
+ dict_unref(snap_dict);
+ GF_FREE(req.dict.dict_val);
+
+ return ret;
}
-int32_t
-gf_cli3_1_umount (call_frame_t *frame, xlator_t *this, void *data)
+static int32_t
+gf_cli_snapshot_for_status(call_frame_t *frame, xlator_t *this, void *data)
{
- gf1_cli_umount_req req = {0,};
- int ret = -1;
- dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+ int ret = -1;
+ int32_t cmd = -1;
+ cli_local_t *local = NULL;
+ dict_t *snap_dict = NULL;
+ int snapcount = 0;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO("cli", frame, out);
+ GF_VALIDATE_OR_GOTO("cli", frame->local, out);
+ GF_VALIDATE_OR_GOTO("cli", this, out);
+ GF_VALIDATE_OR_GOTO("cli", data, out);
+
+ local = frame->local;
+
+ ret = dict_get_int32_sizen(local->dict, "sub-cmd", &cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get sub-cmd");
+ goto out;
+ }
+
+ /* Snapshot status of single snap (i.e. GF_SNAP_STATUS_TYPE_SNAP)
+ * is already handled. Therefore we can return from here.
+ * If want to get status of all snaps in the system or volume then
+ * we should get them one by one.*/
+ if ((cmd == GF_SNAP_STATUS_TYPE_SNAP) ||
+ (cmd == GF_SNAP_STATUS_TYPE_ITER)) {
+ ret = 0;
+ goto out;
+ }
- if (!frame || !this || !data)
- goto out;
+ ret = dict_get_int32_sizen(local->dict, "status.snapcount", &snapcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get snapcount");
+ goto out;
+ }
+
+ if (snapcount == 0 && !(global_state->mode & GLUSTER_MODE_XML)) {
+ cli_out("No snapshots present");
+ }
- dict = data;
+ for (i = 0; i < snapcount; i++) {
+ ret = -1;
- ret = dict_get_str (dict, "path", &req.path);
- if (ret == 0)
- ret = dict_get_int32 (dict, "lazy", &req.lazy);
+ snap_dict = dict_new();
+ if (!snap_dict)
+ goto out;
+ ret = cli_populate_req_dict_for_status(snap_dict, local->dict, i);
if (ret) {
- ret = -1;
- goto out;
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not populate snap request dictionary");
+ goto out;
+ }
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_snapshot_cbk,
+ (xdrproc_t)xdr_gf_cli_req, snap_dict,
+ GLUSTER_CLI_SNAP, this, cli_rpc_prog, NULL);
+
+ /* Ignore the return value and error for snapshot
+ * status of type "ALL" or "VOL"
+ *
+ * Scenario : There might be case where status command
+ * and delete command might be issued at the same time.
+ * In that case when status tried to fetch detail of
+ * snap which has been deleted by concurrent command,
+ * then it will show snapshot not present. Which will
+ * not be appropriate.
+ */
+ if (ret && (cmd != GF_SNAP_STATUS_TYPE_ALL &&
+ cmd != GF_SNAP_STATUS_TYPE_VOL)) {
+ gf_log("cli", GF_LOG_ERROR,
+ "cli_to_glusterd for snapshot status failed");
+ goto out;
}
+ dict_unref(snap_dict);
+ snap_dict = NULL;
+ }
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_UMOUNT, NULL,
- this, gf_cli3_1_umount_cbk,
- (xdrproc_t)xdr_gf1_cli_umount_req);
+ ret = 0;
+out:
+ if (snap_dict)
+ dict_unref(snap_dict);
+ GF_FREE(req.dict.dict_val);
- out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
}
-void
-cmd_heal_volume_brick_out (dict_t *dict, int brick)
+static int32_t
+gf_cli_snapshot(call_frame_t *frame, xlator_t *this, void *data)
{
- uint64_t num_entries = 0;
- int ret = 0;
- char key[256] = {0};
- char *hostname = NULL;
- char *path = NULL;
- char *status = NULL;
- uint64_t i = 0;
- uint32_t time = 0;
- char timestr[32] = {0};
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *options = NULL;
+ int ret = -1;
+ int tmp_ret = -1;
+ cli_local_t *local = NULL;
+ char *err_str = NULL;
+ int type = -1;
- snprintf (key, sizeof key, "%d-hostname", brick);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- goto out;
- snprintf (key, sizeof key, "%d-path", brick);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- goto out;
- cli_out ("\nBrick %s:%s", hostname, path);
- snprintf (key, sizeof key, "%d-count", brick);
- ret = dict_get_uint64 (dict, key, &num_entries);
- cli_out ("Number of entries: %"PRIu64, num_entries);
- snprintf (key, sizeof key, "%d-status", brick);
- ret = dict_get_str (dict, key, &status);
- if (status && strlen (status))
- cli_out ("Status: %s", status);
- for (i = 0; i < num_entries; i++) {
- snprintf (key, sizeof key, "%d-%"PRIu64, brick, i);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- continue;
- time = 0;
- snprintf (key, sizeof key, "%d-%"PRIu64"-time", brick, i);
- ret = dict_get_uint32 (dict, key, &time);
- if (!time) {
- cli_out ("%s", path);
- } else {
- gf_time_fmt (timestr, sizeof timestr,
- time, gf_timefmt_FT);
- if (i == 0) {
- cli_out ("at path on brick");
- cli_out ("-----------------------------------");
- }
- cli_out ("%s %s", timestr, path);
- }
- }
-out:
- return;
-}
+ if (!frame || !frame->local || !this || !data)
+ goto out;
-int
-gf_cli3_1_heal_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- cli_local_t *local = NULL;
- char *volname = NULL;
- call_frame_t *frame = NULL;
- dict_t *input_dict = NULL;
- dict_t *dict = NULL;
- int brick_count = 0;
- int i = 0;
- gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID;
- char *operation = NULL;
- char *substr = NULL;
-
- if (-1 == req->rpc_status) {
- goto out;
- }
+ local = frame->local;
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- goto out;
+ options = data;
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_snapshot_begin_composite_op(local);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to begin snapshot xml composite op");
+ goto out;
}
+ }
- frame = myframe;
+ ret = cli_to_glusterd(&req, frame, gf_cli_snapshot_cbk,
+ (xdrproc_t)xdr_gf_cli_req, options, GLUSTER_CLI_SNAP,
+ this, cli_rpc_prog, NULL);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "cli_to_glusterd for snapshot failed");
+ goto xmlend;
+ }
- if (frame) {
- local = frame->local;
- frame->local = NULL;
- }
+ ret = dict_get_int32_sizen(local->dict, "type", &type);
- if (local) {
- input_dict = local->dict;
- ret = dict_get_int32 (input_dict, "heal-op",
- (int32_t*)&heal_op);
+ if (GF_SNAP_OPTION_TYPE_STATUS == type) {
+ ret = gf_cli_snapshot_for_status(frame, this, data);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "cli to glusterd for snapshot status command failed");
}
-//TODO: Proper XML output
-//#if (HAVE_LIB_XML)
-// if (global_state->mode & GLUSTER_MODE_XML) {
-// ret = cli_xml_output_dict ("volHeal", dict, rsp.op_ret,
-// rsp.op_errno, rsp.op_errstr);
-// if (ret)
-// gf_log ("cli", GF_LOG_ERROR,
-// "Error outputting to xml");
-// goto out;
-// }
-//#endif
- ret = dict_get_str (input_dict, "volname", &volname);
+ goto xmlend;
+ }
+
+ if (GF_SNAP_OPTION_TYPE_DELETE == type) {
+ ret = gf_cli_snapshot_for_delete(frame, this, data);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to get volname");
- goto out;
+ gf_log("cli", GF_LOG_ERROR,
+ "cli to glusterd for snapshot delete command failed");
}
- gf_log ("cli", GF_LOG_INFO, "Received resp to heal volume");
+ goto xmlend;
+ }
- if ((heal_op == GF_AFR_OP_HEAL_FULL) ||
- (heal_op == GF_AFR_OP_HEAL_INDEX)) {
- operation = "Launching Heal operation";
- substr = "\nUse heal info commands to check status";
- } else {
- operation = "Gathering Heal info";
- substr = "";
- }
+ ret = 0;
- if (rsp.op_ret) {
- if (strcmp (rsp.op_errstr, "")) {
- cli_err ("%s", rsp.op_errstr);
- } else {
- cli_err ("%s on volume %s has been unsuccessful",
- operation, volname);
- }
+xmlend:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_snapshot_end_composite_op(local);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to end snapshot xml composite op");
+ goto out;
+ }
+ }
+out:
+ if (ret && local && GF_SNAP_OPTION_TYPE_STATUS == type) {
+ tmp_ret = dict_get_str_sizen(local->dict, "op_err_str", &err_str);
+ if (tmp_ret || !err_str) {
+ cli_err("Snapshot Status : failed: %s",
+ "Please check log file for details");
} else {
- cli_out ("%s on volume %s has been successful%s", operation,
- volname, substr);
+ cli_err("Snapshot Status : failed: %s", err_str);
+ dict_del_sizen(local->dict, "op_err_str");
}
+ }
- ret = rsp.op_ret;
- if ((heal_op == GF_AFR_OP_HEAL_FULL) ||
- (heal_op == GF_AFR_OP_HEAL_INDEX))
- goto out;
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- dict = dict_new ();
- if (!dict) {
- ret = -1;
- goto out;
- }
+ GF_FREE(req.dict.dict_val);
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ /* XML mode handles its own error */
+ ret = 0;
+ }
+ return ret;
+}
- if (ret) {
- gf_log ("", GF_LOG_ERROR,
- "Unable to allocate memory");
- goto out;
+static int32_t
+gf_cli_barrier_volume_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status)
+ goto out;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+ gf_log("cli", GF_LOG_DEBUG, "Received response to barrier");
+
+ if (rsp.op_ret) {
+ if (rsp.op_errstr && (strlen(rsp.op_errstr) > 1)) {
+ cli_err("volume barrier: command unsuccessful : %s", rsp.op_errstr);
} else {
- dict->extra_stdfree = rsp.dict.dict_val;
- }
- ret = dict_get_int32 (dict, "count", &brick_count);
- if (ret)
- goto out;
-
- if (!brick_count) {
- cli_out ("All bricks of volume %s are down.", volname);
- goto out;
+ cli_err("volume barrier: command unsuccessful");
}
-
- for (i = 0; i < brick_count; i++)
- cmd_heal_volume_brick_out (dict, i);
- ret = rsp.op_ret;
+ } else {
+ cli_out("volume barrier: command successful");
+ }
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- if (local)
- cli_local_wipe (local);
- if (rsp.op_errstr)
- free (rsp.op_errstr);
- if (dict)
- dict_unref (dict);
- return ret;
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
}
-int32_t
-gf_cli3_1_heal_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int
+gf_cli_barrier_volume(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- int ret = 0;
- cli_local_t *local = NULL;
- dict_t *dict = NULL;
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *options = NULL;
+ int ret = -1;
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ options = data;
- dict = data;
- local = cli_local_get ();
+ ret = cli_to_glusterd(&req, frame, gf_cli_barrier_volume_cbk,
+ (xdrproc_t)xdr_gf_cli_req, options,
+ GLUSTER_CLI_BARRIER_VOLUME, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- if (local) {
- local->dict = dict_ref (dict);
- frame->local = local;
- }
+ GF_FREE(req.dict.dict_val);
+ return ret;
+}
- ret = dict_allocate_and_serialize (dict,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+static int32_t
+gf_cli_get_vol_opt_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *dict = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char msg[1024] = {
+ 0,
+ };
+ int i = 0;
+ char dict_key[50] = {
+ 0,
+ };
+
+ GF_ASSERT(myframe);
+
+ if (-1 == req->rpc_status)
+ goto out;
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+ gf_log("cli", GF_LOG_DEBUG, "Received response to get volume option");
+
+ if (rsp.op_ret) {
+ if (strcmp(rsp.op_errstr, ""))
+ snprintf(msg, sizeof(msg), "volume get option: failed: %s",
+ rsp.op_errstr);
+ else
+ snprintf(msg, sizeof(msg), "volume get option: failed");
- goto out;
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_str("volGetopts", msg, rsp.op_ret,
+ rsp.op_errno, rsp.op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ }
+ } else {
+ cli_err("%s", msg);
}
-
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_HEAL_VOLUME, NULL,
- this, gf_cli3_1_heal_volume_cbk,
- (xdrproc_t) xdr_gf_cli_req);
+ ret = rsp.op_ret;
+ goto out_nolog;
+ }
+ dict = dict_new();
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_getopts(dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ ret = 0;
+ }
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(dict, "warning", &value);
+ if (!ret) {
+ cli_out("%s", value);
+ }
+
+ ret = dict_get_int32_sizen(dict, "count", &count);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to retrieve count from the dictionary");
+ goto out;
+ }
+
+ if (count <= 0) {
+ gf_log("cli", GF_LOG_ERROR, "Value of count :%d is invalid", count);
+ ret = -1;
+ goto out;
+ }
+
+ cli_out("%-40s%-40s", "Option", "Value");
+ cli_out("%-40s%-40s", "------", "-----");
+ for (i = 1; i <= count; i++) {
+ ret = snprintf(dict_key, sizeof dict_key, "key%d", i);
+ ret = dict_get_strn(dict, dict_key, ret, &key);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to retrieve %s from the dictionary", dict_key);
+ goto out;
+ }
+ ret = snprintf(dict_key, sizeof dict_key, "value%d", i);
+ ret = dict_get_strn(dict, dict_key, ret, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to retrieve key value for %s from the dictionary",
+ dict_key);
+ goto out;
+ }
+ cli_out("%-40s%-40s", key, value);
+ }
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ if (ret) {
+ cli_out(
+ "volume get option failed. Check the cli/glusterd log "
+ "file for more details");
+ }
+
+out_nolog:
+ if (dict)
+ dict_unref(dict);
+ cli_cmd_broadcast_response(ret);
+ gf_free_xdr_cli_rsp(rsp);
+ return ret;
+}
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+static int
+gf_cli_get_vol_opt(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *options = NULL;
+ int ret = -1;
+
+ options = data;
- return ret;
+ ret = cli_to_glusterd(&req, frame, gf_cli_get_vol_opt_cbk,
+ (xdrproc_t)xdr_gf_cli_req, options,
+ GLUSTER_CLI_GET_VOL_OPT, this, cli_rpc_prog, NULL);
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
+
+ GF_FREE(req.dict.dict_val);
+ return ret;
}
-int32_t
-gf_cli3_1_statedump_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+add_cli_cmd_timeout_to_dict(dict_t *dict)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- char msg[1024] = {0,};
-
- if (-1 == req->rpc_status)
- goto out;
- ret = xdr_to_generic (*iov, &rsp,
- (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "XDR decoding failed");
- goto out;
- }
- gf_log ("cli", GF_LOG_DEBUG, "Received response to statedump");
- if (rsp.op_ret)
- snprintf (msg, sizeof(msg), "%s", rsp.op_errstr);
- else
- snprintf (msg, sizeof (msg), "Volume statedump successful");
+ int ret = 0;
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_str ("volStatedump", msg, rsp.op_ret,
- rsp.op_errno, rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
+ if (cli_default_conn_timeout > 120) {
+ ret = dict_set_uint32(dict, "timeout", cli_default_conn_timeout);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Failed to save timeout to dict");
}
-#endif
-
- if (rsp.op_ret)
- cli_err ("%s", msg);
- else
- cli_out ("%s", msg);
- ret = rsp.op_ret;
+ }
+ return ret;
+}
+static int
+cli_to_glusterd(gf_cli_req *req, call_frame_t *frame, fop_cbk_fn_t cbkfn,
+ xdrproc_t xdrproc, dict_t *dict, int procnum, xlator_t *this,
+ rpc_clnt_prog_t *prog, struct iobref *iobref)
+{
+ int ret = 0;
+ size_t len = 0;
+ char *cmd = NULL;
+ int i = 0;
+ const char **words = NULL;
+ cli_local_t *local = NULL;
+
+ if (!this || !frame || !frame->local || !dict) {
+ ret = -1;
+ goto out;
+ }
+
+ local = frame->local;
+
+ if (!local->words) {
+ ret = -1;
+ goto out;
+ }
+
+ words = local->words;
+
+ while (words[i])
+ len += strlen(words[i++]) + 1;
+
+ cmd = GF_MALLOC(len + 1, gf_common_mt_char);
+ if (!cmd) {
+ ret = -1;
+ goto out;
+ }
+ cmd[0] = '\0';
+
+ for (i = 0; words[i]; i++) {
+ strncat(cmd, words[i], len - 1);
+ if (words[i + 1] != NULL)
+ strncat(cmd, " ", len - 1);
+ }
+
+ ret = dict_set_dynstr_sizen(dict, "cmd-str", cmd);
+ if (ret)
+ goto out;
+
+ ret = add_cli_cmd_timeout_to_dict(dict);
+
+ ret = dict_allocate_and_serialize(dict, &(req->dict).dict_val,
+ &(req->dict).dict_len);
+
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "failed to get serialized length of dict");
+ goto out;
+ }
+
+ ret = cli_cmd_submit(NULL, req, frame, prog, procnum, iobref, this, cbkfn,
+ (xdrproc_t)xdrproc);
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ return ret;
}
-int32_t
-gf_cli3_1_statedump_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int
+gf_cli_print_bitrot_scrub_status(dict_t *dict)
{
- gf_cli_req req = {{0,}};
- dict_t *options = NULL;
- int ret = -1;
+ int i = 1;
+ int j = 0;
+ int ret = -1;
+ int count = 0;
+ char key[64] = {
+ 0,
+ };
+ char *volname = NULL;
+ char *node_name = NULL;
+ char *scrub_freq = NULL;
+ char *state_scrub = NULL;
+ char *scrub_impact = NULL;
+ char *bad_file_str = NULL;
+ char *scrub_log_file = NULL;
+ char *bitrot_log_file = NULL;
+ uint64_t scrub_files = 0;
+ uint64_t unsigned_files = 0;
+ uint64_t scrub_time = 0;
+ uint64_t days = 0;
+ uint64_t hours = 0;
+ uint64_t minutes = 0;
+ uint64_t seconds = 0;
+ char *last_scrub = NULL;
+ uint64_t error_count = 0;
+ int8_t scrub_running = 0;
+ char *scrub_state_op = NULL;
+
+ ret = dict_get_int32_sizen(dict, "count", &count);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "failed to get count value from dictionary");
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get volume name");
+
+ ret = dict_get_str_sizen(dict, "features.scrub", &state_scrub);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get scrub state value");
+
+ ret = dict_get_str_sizen(dict, "features.scrub-throttle", &scrub_impact);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get scrub impact value");
+
+ ret = dict_get_str_sizen(dict, "features.scrub-freq", &scrub_freq);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get scrub -freq value");
+
+ ret = dict_get_str_sizen(dict, "bitrot_log_file", &bitrot_log_file);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get bitrot log file location");
+
+ ret = dict_get_str_sizen(dict, "scrub_log_file", &scrub_log_file);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get scrubber log file location");
+
+ for (i = 1; i <= count; i++) {
+ snprintf(key, sizeof(key), "scrub-running-%d", i);
+ ret = dict_get_int8(dict, key, &scrub_running);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get scrubbed files");
+ if (scrub_running)
+ break;
+ }
- if (!frame || !this || !data)
- goto out;
+ if (scrub_running)
+ gf_asprintf(&scrub_state_op, "%s (In Progress)", state_scrub);
+ else
+ gf_asprintf(&scrub_state_op, "%s (Idle)", state_scrub);
- options = data;
+ cli_out("\n%s: %s\n", "Volume name ", volname);
- ret = dict_allocate_and_serialize (options,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to serialize the data");
+ cli_out("%s: %s\n", "State of scrub", scrub_state_op);
- goto out;
- }
+ cli_out("%s: %s\n", "Scrub impact", scrub_impact);
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_STATEDUMP_VOLUME, NULL,
- this, gf_cli3_1_statedump_volume_cbk,
- (xdrproc_t)xdr_gf_cli_req);
+ cli_out("%s: %s\n", "Scrub frequency", scrub_freq);
-out:
- if (options)
- dict_destroy (options);
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ cli_out("%s: %s\n", "Bitrot error log location", bitrot_log_file);
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
- return ret;
-}
+ cli_out("%s: %s\n", "Scrubber error log location", scrub_log_file);
-int32_t
-gf_cli3_1_list_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- int ret = -1;
- gf_cli_rsp rsp = {0,};
- dict_t *dict = NULL;
- int vol_count = 0;;
- char *volname = NULL;
- char key[1024] = {0,};
- int i = 0;
+ for (i = 1; i <= count; i++) {
+ /* Reset the variables to prevent carryover of values */
+ node_name = NULL;
+ last_scrub = NULL;
+ scrub_time = 0;
+ error_count = 0;
+ scrub_files = 0;
+ unsigned_files = 0;
- if (-1 == req->rpc_status)
- goto out;
- ret = xdr_to_generic (*iov, &rsp,
- (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "XDR decoding failed");
- goto out;
- }
+ snprintf(key, sizeof(key), "node-name-%d", i);
+ ret = dict_get_str(dict, key, &node_name);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get node-name");
- dict = dict_new ();
- if (!dict) {
- ret = -1;
- goto out;
- }
+ snprintf(key, sizeof(key), "scrubbed-files-%d", i);
+ ret = dict_get_uint64(dict, key, &scrub_files);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get scrubbed files");
- ret = dict_unserialize (rsp.dict.dict_val, rsp.dict.dict_len, &dict);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Unable to allocate memory");
- goto out;
- }
+ snprintf(key, sizeof(key), "unsigned-files-%d", i);
+ ret = dict_get_uint64(dict, key, &unsigned_files);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get unsigned files");
-#if (HAVE_LIB_XML)
- if (global_state->mode & GLUSTER_MODE_XML) {
- ret = cli_xml_output_vol_list (dict, rsp.op_ret, rsp.op_errno,
- rsp.op_errstr);
- if (ret)
- gf_log ("cli", GF_LOG_ERROR,
- "Error outputting to xml");
- goto out;
- }
-#endif
- if (rsp.op_ret)
- cli_err ("%s", rsp.op_errstr);
- else {
- ret = dict_get_int32 (dict, "count", &vol_count);
- if (ret)
- goto out;
+ snprintf(key, sizeof(key), "scrub-duration-%d", i);
+ ret = dict_get_uint64(dict, key, &scrub_time);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get last scrub duration");
- if (vol_count == 0) {
- cli_out ("No volumes present in cluster");
- goto out;
- }
- for (i = 0; i < vol_count; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d", i);
- ret = dict_get_str (dict, key, &volname);
- if (ret)
- goto out;
- cli_out ("%s", volname);
- }
- }
+ snprintf(key, sizeof(key), "last-scrub-time-%d", i);
+ ret = dict_get_str(dict, key, &last_scrub);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get last scrub time");
+ snprintf(key, sizeof(key), "error-count-%d", i);
+ ret = dict_get_uint64(dict, key, &error_count);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get error count");
- ret = rsp.op_ret;
+ cli_out("\n%s\n",
+ "=========================================================");
-out:
- cli_cmd_broadcast_response (ret);
- return ret;
-}
+ cli_out("%s: %s\n", "Node", node_name);
-int32_t
-gf_cli3_1_list_volume (call_frame_t *frame, xlator_t *this, void *data)
-{
- int ret = -1;
- gf_cli_req req = {{0,}};
+ cli_out("%s: %" PRIu64 "\n", "Number of Scrubbed files", scrub_files);
- if (!frame || !this)
- goto out;
+ cli_out("%s: %" PRIu64 "\n", "Number of Skipped files", unsigned_files);
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_LIST_VOLUME, NULL,
- this, gf_cli3_1_list_volume_cbk,
- (xdrproc_t)xdr_gf_cli_req);
+ if ((!last_scrub) || !strcmp(last_scrub, ""))
+ cli_out("%s: %s\n", "Last completed scrub time",
+ "Scrubber pending to complete.");
+ else
+ cli_out("%s: %s\n", "Last completed scrub time", last_scrub);
+
+ /* Printing last scrub duration time in human readable form*/
+ seconds = scrub_time % 60;
+ minutes = (scrub_time / 60) % 60;
+ hours = (scrub_time / 3600) % 24;
+ days = scrub_time / 86400;
+ cli_out("%s: %" PRIu64 ":%" PRIu64 ":%" PRIu64 ":%" PRIu64 "\n",
+ "Duration of last scrub (D:M:H:M:S)", days, hours, minutes,
+ seconds);
+
+ cli_out("%s: %" PRIu64 "\n", "Error count", error_count);
+
+ if (error_count) {
+ cli_out("%s:\n", "Corrupted object's [GFID]");
+ /* Printing list of bad file's (Corrupted object's)*/
+ for (j = 0; j < error_count; j++) {
+ snprintf(key, sizeof(key), "quarantine-%d-%d", j, i);
+ ret = dict_get_str(dict, key, &bad_file_str);
+ if (!ret) {
+ cli_out("%s\n", bad_file_str);
+ }
+ }
+ }
+ }
+ cli_out("%s\n",
+ "=========================================================");
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ GF_FREE(scrub_state_op);
+ return 0;
}
-int32_t
-gf_cli3_1_clearlocks_volume_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+static int
+gf_cli_bitrot_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_cli_rsp rsp = {0,};
- int ret = -1;
- char *lk_summary = NULL;
- char *volname = NULL;
- dict_t *dict = NULL;
-
- if (-1 == req->rpc_status)
- goto out;
- ret = xdr_to_generic (*iov, &rsp,
- (xdrproc_t)xdr_gf_cli_rsp);
- if (ret < 0) {
+ int ret = -1;
+ int type = 0;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ char *scrub_cmd = NULL;
+ char *volname = NULL;
+ char *cmd_str = NULL;
+ char *cmd_op = NULL;
+
+ GF_ASSERT(myframe);
+
+ if (req->rpc_status == -1) {
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_cli_rsp);
+ if (ret < 0) {
+ gf_log(((call_frame_t *)myframe)->this->name, GF_LOG_ERROR,
+ XDR_DECODE_FAIL);
+ goto out;
+ }
+
+ if (rsp.op_ret) {
+ ret = -1;
+ if (global_state->mode & GLUSTER_MODE_XML)
+ goto xml_output;
+
+ if (strcmp(rsp.op_errstr, ""))
+ cli_err("Bitrot command failed : %s", rsp.op_errstr);
+ else
+ cli_err("Bitrot command : failed");
- gf_log ("cli", GF_LOG_ERROR, "XDR decoding failed");
- goto out;
- }
- gf_log ("cli", GF_LOG_DEBUG, "Received response to clear-locks");
+ goto out;
+ }
- if (rsp.op_ret) {
- cli_err ("Volume clear-locks unsuccessful");
- cli_err ("%s", rsp.op_errstr);
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
- } else {
- if (!rsp.dict.dict_len) {
- cli_out ("Possibly no locks cleared");
- ret = 0;
- goto out;
- }
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
- dict = dict_new ();
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
- if (!dict) {
- ret = -1;
- goto out;
- }
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, DICT_UNSERIALIZE_FAIL);
+ goto out;
+ }
+ }
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
+ gf_log("cli", GF_LOG_DEBUG, "Received resp to bit rot command");
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR,
- "Unable to serialize response dictionary");
- goto out;
- }
+ ret = dict_get_int32_sizen(dict, "type", &type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get command type");
+ goto out;
+ }
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Unable to get volname "
- "from dictionary");
- goto out;
- }
+ if ((type == GF_BITROT_CMD_SCRUB_STATUS) &&
+ !(global_state->mode & GLUSTER_MODE_XML)) {
+ ret = gf_cli_print_bitrot_scrub_status(dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to print bitrot scrub status");
+ }
+ goto out;
+ }
+
+ /* Ignoring the error, as using dict val for cli output only */
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get volume name");
+
+ ret = dict_get_str_sizen(dict, "scrub-value", &scrub_cmd);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "Failed to get scrub command");
+
+ ret = dict_get_str_sizen(dict, "cmd-str", &cmd_str);
+ if (ret)
+ gf_log("cli", GF_LOG_TRACE, "failed to get command string");
+
+ if (cmd_str)
+ cmd_op = strrchr(cmd_str, ' ') + 1;
+
+ switch (type) {
+ case GF_BITROT_OPTION_TYPE_ENABLE:
+ cli_out("volume bitrot: success bitrot enabled for volume %s",
+ volname);
+ ret = 0;
+ goto out;
+ case GF_BITROT_OPTION_TYPE_DISABLE:
+ cli_out("volume bitrot: success bitrot disabled for volume %s",
+ volname);
+ ret = 0;
+ goto out;
+ case GF_BITROT_CMD_SCRUB_ONDEMAND:
+ cli_out("volume bitrot: scrubber started ondemand for volume %s",
+ volname);
+ ret = 0;
+ goto out;
+ case GF_BITROT_OPTION_TYPE_SCRUB:
+ if (!strncmp("pause", scrub_cmd, sizeof("pause")))
+ cli_out("volume bitrot: scrubber paused for volume %s",
+ volname);
+ if (!strncmp("resume", scrub_cmd, sizeof("resume")))
+ cli_out("volume bitrot: scrubber resumed for volume %s",
+ volname);
+ ret = 0;
+ goto out;
+ case GF_BITROT_OPTION_TYPE_SCRUB_FREQ:
+ cli_out(
+ "volume bitrot: scrub-frequency is set to %s "
+ "successfully for volume %s",
+ cmd_op, volname);
+ ret = 0;
+ goto out;
+ case GF_BITROT_OPTION_TYPE_SCRUB_THROTTLE:
+ cli_out(
+ "volume bitrot: scrub-throttle is set to %s "
+ "successfully for volume %s",
+ cmd_op, volname);
+ ret = 0;
+ goto out;
+ }
- ret = dict_get_str (dict, "lk-summary", &lk_summary);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "Unable to get lock "
- "summary from dictionary");
- goto out;
- }
- cli_out ("Volume clear-locks successful");
- cli_out ("%s", lk_summary);
+xml_output:
+ if (global_state->mode & GLUSTER_MODE_XML) {
+ ret = cli_xml_output_vol_profile(dict, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, XML_ERROR);
+ goto out;
+ }
- }
+ if (!rsp.op_ret)
+ cli_out("volume bitrot: success");
- ret = rsp.op_ret;
+ ret = rsp.op_ret;
out:
- cli_cmd_broadcast_response (ret);
- return ret;
+ if (dict)
+ dict_unref(dict);
+
+ gf_free_xdr_cli_rsp(rsp);
+ cli_cmd_broadcast_response(ret);
+ return ret;
}
-int32_t
-gf_cli3_1_clearlocks_volume (call_frame_t *frame, xlator_t *this,
- void *data)
+static int32_t
+gf_cli_bitrot(call_frame_t *frame, xlator_t *this, void *data)
{
- gf_cli_req req = {{0,}};
- dict_t *options = NULL;
- int ret = -1;
+ gf_cli_req req = {{
+ 0,
+ }};
+ dict_t *options = NULL;
+ int ret = -1;
+
+ options = data;
+
+ ret = cli_to_glusterd(&req, frame, gf_cli_bitrot_cbk,
+ (xdrproc_t)xdr_gf_cli_req, options,
+ GLUSTER_CLI_BITROT, this, cli_rpc_prog, NULL);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "cli_to_glusterd for bitrot failed");
+ goto out;
+ }
- if (!frame || !this || !data)
- goto out;
+out:
+ gf_log("cli", GF_LOG_DEBUG, RETURNING, ret);
- options = data;
+ GF_FREE(req.dict.dict_val);
- ret = dict_allocate_and_serialize (options,
- &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
- if (ret < 0) {
- gf_log ("cli", GF_LOG_ERROR,
- "failed to serialize the data");
+ return ret;
+}
- goto out;
- }
+static struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {
+ [GLUSTER_CLI_NULL] = {"NULL", NULL},
+ [GLUSTER_CLI_PROBE] = {"PROBE_QUERY", gf_cli_probe},
+ [GLUSTER_CLI_DEPROBE] = {"DEPROBE_QUERY", gf_cli_deprobe},
+ [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS", gf_cli_list_friends},
+ [GLUSTER_CLI_UUID_RESET] = {"UUID_RESET", gf_cli3_1_uuid_reset},
+ [GLUSTER_CLI_UUID_GET] = {"UUID_GET", gf_cli3_1_uuid_get},
+ [GLUSTER_CLI_CREATE_VOLUME] = {"CREATE_VOLUME", gf_cli_create_volume},
+ [GLUSTER_CLI_DELETE_VOLUME] = {"DELETE_VOLUME", gf_cli_delete_volume},
+ [GLUSTER_CLI_START_VOLUME] = {"START_VOLUME", gf_cli_start_volume},
+ [GLUSTER_CLI_STOP_VOLUME] = {"STOP_VOLUME", gf_cli_stop_volume},
+ [GLUSTER_CLI_RENAME_VOLUME] = {"RENAME_VOLUME", gf_cli_rename_volume},
+ [GLUSTER_CLI_DEFRAG_VOLUME] = {"DEFRAG_VOLUME", gf_cli_defrag_volume},
+ [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", gf_cli_get_volume},
+ [GLUSTER_CLI_GET_NEXT_VOLUME] = {"GET_NEXT_VOLUME", gf_cli_get_next_volume},
+ [GLUSTER_CLI_SET_VOLUME] = {"SET_VOLUME", gf_cli_set_volume},
+ [GLUSTER_CLI_ADD_BRICK] = {"ADD_BRICK", gf_cli_add_brick},
+ [GLUSTER_CLI_REMOVE_BRICK] = {"REMOVE_BRICK", gf_cli_remove_brick},
+ [GLUSTER_CLI_REPLACE_BRICK] = {"REPLACE_BRICK", gf_cli_replace_brick},
+ [GLUSTER_CLI_LOG_ROTATE] = {"LOG ROTATE", gf_cli_log_rotate},
+ [GLUSTER_CLI_GETSPEC] = {"GETSPEC", gf_cli_getspec},
+ [GLUSTER_CLI_PMAP_PORTBYBRICK] = {"PMAP PORTBYBRICK", gf_cli_pmap_b2p},
+ [GLUSTER_CLI_SYNC_VOLUME] = {"SYNC_VOLUME", gf_cli_sync_volume},
+ [GLUSTER_CLI_RESET_VOLUME] = {"RESET_VOLUME", gf_cli_reset_volume},
+ [GLUSTER_CLI_FSM_LOG] = {"FSM_LOG", gf_cli_fsm_log},
+ [GLUSTER_CLI_GSYNC_SET] = {"GSYNC_SET", gf_cli_gsync_set},
+ [GLUSTER_CLI_PROFILE_VOLUME] = {"PROFILE_VOLUME", gf_cli_profile_volume},
+ [GLUSTER_CLI_QUOTA] = {"QUOTA", gf_cli_quota},
+ [GLUSTER_CLI_TOP_VOLUME] = {"TOP_VOLUME", gf_cli_top_volume},
+ [GLUSTER_CLI_GETWD] = {"GETWD", gf_cli_getwd},
+ [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", gf_cli_status_volume},
+ [GLUSTER_CLI_STATUS_ALL] = {"STATUS_ALL", gf_cli_status_volume_all},
+ [GLUSTER_CLI_MOUNT] = {"MOUNT", gf_cli_mount},
+ [GLUSTER_CLI_UMOUNT] = {"UMOUNT", gf_cli_umount},
+ [GLUSTER_CLI_HEAL_VOLUME] = {"HEAL_VOLUME", gf_cli_heal_volume},
+ [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME",
+ gf_cli_statedump_volume},
+ [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", gf_cli_list_volume},
+ [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME",
+ gf_cli_clearlocks_volume},
+ [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", gf_cli_copy_file},
+ [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", gf_cli_sys_exec},
+ [GLUSTER_CLI_SNAP] = {"SNAP", gf_cli_snapshot},
+ [GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER VOLUME", gf_cli_barrier_volume},
+ [GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", gf_cli_get_vol_opt},
+ [GLUSTER_CLI_BITROT] = {"BITROT", gf_cli_bitrot},
+ [GLUSTER_CLI_GET_STATE] = {"GET_STATE", gf_cli_get_state},
+ [GLUSTER_CLI_RESET_BRICK] = {"RESET_BRICK", gf_cli_reset_brick},
+ [GLUSTER_CLI_GANESHA] = {"GANESHA", gf_cli_ganesha},
+};
- ret = cli_cmd_submit (&req, frame, cli_rpc_prog,
- GLUSTER_CLI_CLRLOCKS_VOLUME, NULL,
- this, gf_cli3_1_clearlocks_volume_cbk,
- (xdrproc_t)xdr_gf_cli_req);
+struct rpc_clnt_program cli_prog = {
+ .progname = "Gluster CLI",
+ .prognum = GLUSTER_CLI_PROGRAM,
+ .progver = GLUSTER_CLI_VERSION,
+ .numproc = GLUSTER_CLI_MAXVALUE,
+ .proctable = gluster_cli_actors,
+};
-out:
- if (options)
- dict_destroy (options);
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
-
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
- return ret;
-}
-
-struct rpc_clnt_procedure gluster_cli_actors[GLUSTER_CLI_MAXVALUE] = {
- [GLUSTER_CLI_NULL] = {"NULL", NULL },
- [GLUSTER_CLI_PROBE] = {"PROBE_QUERY", gf_cli3_1_probe},
- [GLUSTER_CLI_DEPROBE] = {"DEPROBE_QUERY", gf_cli3_1_deprobe},
- [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS", gf_cli3_1_list_friends},
- [GLUSTER_CLI_CREATE_VOLUME] = {"CREATE_VOLUME", gf_cli3_1_create_volume},
- [GLUSTER_CLI_DELETE_VOLUME] = {"DELETE_VOLUME", gf_cli3_1_delete_volume},
- [GLUSTER_CLI_START_VOLUME] = {"START_VOLUME", gf_cli3_1_start_volume},
- [GLUSTER_CLI_STOP_VOLUME] = {"STOP_VOLUME", gf_cli3_1_stop_volume},
- [GLUSTER_CLI_RENAME_VOLUME] = {"RENAME_VOLUME", gf_cli3_1_rename_volume},
- [GLUSTER_CLI_DEFRAG_VOLUME] = {"DEFRAG_VOLUME", gf_cli3_1_defrag_volume},
- [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", gf_cli3_1_get_volume},
- [GLUSTER_CLI_GET_NEXT_VOLUME] = {"GET_NEXT_VOLUME", gf_cli3_1_get_next_volume},
- [GLUSTER_CLI_SET_VOLUME] = {"SET_VOLUME", gf_cli3_1_set_volume},
- [GLUSTER_CLI_ADD_BRICK] = {"ADD_BRICK", gf_cli3_1_add_brick},
- [GLUSTER_CLI_REMOVE_BRICK] = {"REMOVE_BRICK", gf_cli3_1_remove_brick},
- [GLUSTER_CLI_REPLACE_BRICK] = {"REPLACE_BRICK", gf_cli3_1_replace_brick},
- [GLUSTER_CLI_LOG_ROTATE] = {"LOG ROTATE", gf_cli3_1_log_rotate},
- [GLUSTER_CLI_GETSPEC] = {"GETSPEC", gf_cli3_1_getspec},
- [GLUSTER_CLI_PMAP_PORTBYBRICK] = {"PMAP PORTBYBRICK", gf_cli3_1_pmap_b2p},
- [GLUSTER_CLI_SYNC_VOLUME] = {"SYNC_VOLUME", gf_cli3_1_sync_volume},
- [GLUSTER_CLI_RESET_VOLUME] = {"RESET_VOLUME", gf_cli3_1_reset_volume},
- [GLUSTER_CLI_FSM_LOG] = {"FSM_LOG", gf_cli3_1_fsm_log},
- [GLUSTER_CLI_GSYNC_SET] = {"GSYNC_SET", gf_cli3_1_gsync_set},
- [GLUSTER_CLI_PROFILE_VOLUME] = {"PROFILE_VOLUME", gf_cli3_1_profile_volume},
- [GLUSTER_CLI_QUOTA] = {"QUOTA", gf_cli3_1_quota},
- [GLUSTER_CLI_TOP_VOLUME] = {"TOP_VOLUME", gf_cli3_1_top_volume},
- [GLUSTER_CLI_GETWD] = {"GETWD", gf_cli3_1_getwd},
- [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", gf_cli3_1_status_volume},
- [GLUSTER_CLI_STATUS_ALL] = {"STATUS_ALL", gf_cli_status_volume_all},
- [GLUSTER_CLI_MOUNT] = {"MOUNT", gf_cli3_1_mount},
- [GLUSTER_CLI_UMOUNT] = {"UMOUNT", gf_cli3_1_umount},
- [GLUSTER_CLI_HEAL_VOLUME] = {"HEAL_VOLUME", gf_cli3_1_heal_volume},
- [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME", gf_cli3_1_statedump_volume},
- [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", gf_cli3_1_list_volume},
- [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME", gf_cli3_1_clearlocks_volume},
+static struct rpc_clnt_procedure cli_quotad_procs[GF_AGGREGATOR_MAXVALUE] = {
+ [GF_AGGREGATOR_NULL] = {"NULL", NULL},
+ [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", NULL},
+ [GF_AGGREGATOR_GETLIMIT] = {"GETLIMIT", cli_quotad_getlimit},
};
-struct rpc_clnt_program cli_prog = {
- .progname = "Gluster CLI",
- .prognum = GLUSTER_CLI_PROGRAM,
- .progver = GLUSTER_CLI_VERSION,
- .numproc = GLUSTER_CLI_MAXVALUE,
- .proctable = gluster_cli_actors,
+struct rpc_clnt_program cli_quotad_clnt = {
+ .progname = "CLI Quotad client",
+ .prognum = GLUSTER_AGGREGATOR_PROGRAM,
+ .progver = GLUSTER_AGGREGATOR_VERSION,
+ .numproc = GF_AGGREGATOR_MAXVALUE,
+ .proctable = cli_quotad_procs,
};
diff --git a/cli/src/cli-xml-output.c b/cli/src/cli-xml-output.c
index 510ad01033e..069de75801c 100644
--- a/cli/src/cli-xml-output.c
+++ b/cli/src/cli-xml-output.c
@@ -1,2502 +1,5839 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdlib.h>
#include "cli.h"
#include "cli1-xdr.h"
-#include "run.h"
-#include "compat.h"
-#include "syscall.h"
+#include <glusterfs/run.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/upcall-utils.h>
+enum gf_task_types { GF_TASK_TYPE_REBALANCE, GF_TASK_TYPE_REMOVE_BRICK };
+
+/*
+ * IMPORTANT NOTE:
+ * All exported functions in this file which use libxml need use a
+ * #if (HAVE_LIB_XML), #else, #endif
+ * For eg,
+ * int exported_func () {
+ * #if (HAVE_LIB_XML)
+ * <Stuff using libxml>
+ * #else
+ * return 0;
+ * #endif
+ * }
+ *
+ * All other functions, which are called internally within this file need to be
+ * within #if (HAVE_LIB_XML), #endif statements
+ * For eg,
+ * #if (HAVE_LIB_XML)
+ * int internal_func ()
+ * {
+ * }
+ * #endif
+ *
+ * Following the above format ensures that all xml related code is compiled
+ * only when libxml2 is present, and also keeps the rest of the codebase free
+ * of #if (HAVE_LIB_XML)
+ */
#if (HAVE_LIB_XML)
#include <libxml/encoding.h>
#include <libxml/xmlwriter.h>
-#define XML_RET_CHECK_AND_GOTO(ret, label) do { \
- if (ret < 0) { \
- ret = -1; \
- goto label; \
- } \
- else \
- ret = 0; \
- }while (0) \
+#define XML_RET_CHECK_AND_GOTO(ret, label) \
+ do { \
+ if (ret < 0) { \
+ ret = -1; \
+ goto label; \
+ } else \
+ ret = 0; \
+ } while (0)
int
-cli_begin_xml_output (xmlTextWriterPtr *writer, xmlBufferPtr *buf)
+cli_begin_xml_output(xmlTextWriterPtr *writer, xmlDocPtr *doc)
{
- int ret = -1;
-
- *buf = xmlBufferCreateSize (8192);
- if (buf == NULL) {
- ret = -1;
- goto out;
- }
- xmlBufferSetAllocationScheme (*buf, XML_BUFFER_ALLOC_DOUBLEIT);
+ int ret = -1;
- *writer = xmlNewTextWriterMemory (*buf, 0);
- if (writer == NULL) {
- ret = -1;
- goto out;
- }
+ *writer = xmlNewTextWriterDoc(doc, 0);
+ if (*writer == NULL) {
+ ret = -1;
+ goto out;
+ }
- ret = xmlTextWriterStartDocument (*writer, "1.0", "UTF-8", "yes");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterStartDocument(*writer, "1.0", "UTF-8", "yes");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* <cliOutput> */
- ret = xmlTextWriterStartElement (*writer, (xmlChar *)"cliOutput");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* <cliOutput> */
+ ret = xmlTextWriterStartElement(*writer, (xmlChar *)"cliOutput");
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-cli_end_xml_output (xmlTextWriterPtr writer, xmlBufferPtr buf)
+cli_end_xml_output(xmlTextWriterPtr writer, xmlDocPtr doc)
{
- int ret = -1;
+ int ret = -1;
- /* </cliOutput> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </cliOutput> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = xmlTextWriterEndDocument (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterEndDocument(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- cli_out ("%s", (const char *)buf->content);
+ /* Dump xml document to stdout and pretty format it */
+ xmlSaveFormatFileEnc("-", doc, "UTF-8", 1);
- xmlFreeTextWriter (writer);
- xmlBufferFree (buf);
+ xmlFreeTextWriter(writer);
+ xmlFreeDoc(doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-cli_xml_output_common (xmlTextWriterPtr writer, int op_ret, int op_errno,
- char *op_errstr)
+cli_xml_output_common(xmlTextWriterPtr writer, int op_ret, int op_errno,
+ char *op_errstr)
{
- int ret = -1;
+ int ret = -1;
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"opRet", "%d",
+ op_ret);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"opRet",
- "%d", op_ret);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"opErrno", "%d",
+ op_errno);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"opErrno",
- "%d", op_errno);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ if (op_errstr)
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"opErrstr",
+ "%s", op_errstr);
+ else
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"opErrstr",
+ "%s", "");
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"opErrstr",
- "%s", op_errstr);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
+#endif
int
-cli_xml_output_str (char *op, char *str, int op_ret, int op_errno,
- char *op_errstr)
+cli_xml_output_str(char *op, char *str, int op_ret, int op_errno,
+ char *op_errstr)
{
- int ret = -1;
- xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
- ret = cli_begin_xml_output (&writer, &buf);
- if (ret)
- goto out;
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
- ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
- if (ret)
- goto out;
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"cliOp",
- "%s", op);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ if (op) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"cliOp", "%s",
+ op);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"output",
- "%s", str);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ if (str) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"output", "%s",
+ str);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- ret = cli_end_xml_output (writer, buf);
+ ret = cli_end_xml_output(writer, doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
-void
-cli_xml_output_data_pair (dict_t *this, char *key, data_t *value,
- void *data)
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_data_pair(dict_t *this, char *key, data_t *value, void *data)
{
- int ret = -1;
- xmlTextWriterPtr *writer = NULL;
+ int ret = -1;
+ xmlTextWriterPtr *writer = NULL;
- writer = (xmlTextWriterPtr *)data;
+ writer = (xmlTextWriterPtr *)data;
- ret = xmlTextWriterWriteFormatElement (*writer, (xmlChar *)key,
- "%s", value->data);
+ ret = xmlTextWriterWriteFormatElement(*writer, (xmlChar *)key, "%s",
+ value->data);
- return;
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ return ret;
}
+#endif
int
-cli_xml_output_dict ( char *op, dict_t *dict, int op_ret, int op_errno,
- char *op_errstr)
+cli_xml_output_dict(char *op, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
{
- int ret = -1;
- xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
- ret = cli_begin_xml_output (&writer, &buf);
- if (ret)
- goto out;
- /* <"op"> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)op);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
- dict_foreach (dict, cli_xml_output_data_pair, &writer);
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
- /* </"op"> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* <"op"> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)op);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = cli_end_xml_output (writer, buf);
+ if (dict)
+ dict_foreach(dict, cli_xml_output_data_pair, &writer);
+
+ /* </"op"> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_end_xml_output(writer, doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
+#if (HAVE_LIB_XML)
int
-cli_xml_output_vol_status_common (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index, int *online,
- gf_boolean_t *node_present)
-{
- int ret = -1;
- char *hostname = NULL;
- char *path = NULL;
- int port = 0;
- int status = 0;
- int pid = 0;
- char key[1024] = {0,};
-
- snprintf (key, sizeof (key), "brick%d.hostname", brick_index);
- ret = dict_get_str (dict, key, &hostname);
- if (ret) {
- *node_present = _gf_false;
- goto out;
+cli_xml_output_vol_status_common(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index, int *online,
+ gf_boolean_t *node_present)
+{
+ int ret = -1;
+ char *hostname = NULL;
+ char *path = NULL;
+ char *uuid = NULL;
+ int port = 0;
+ int rdma_port = 0;
+ int status = 0;
+ int pid = 0;
+ char key[1024] = {
+ 0,
+ };
+
+ snprintf(key, sizeof(key), "brick%d.hostname", brick_index);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret) {
+ *node_present = _gf_false;
+ goto out;
+ }
+ *node_present = _gf_true;
+
+ /* <node>
+ * will be closed in the calling function cli_xml_output_vol_status()*/
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"node");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hostname", "%s",
+ hostname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.path", brick_index);
+ ret = dict_get_str(dict, key, &path);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"path", "%s",
+ path);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.peerid", brick_index);
+ ret = dict_get_str(dict, key, &uuid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"peerid", "%s",
+ uuid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.status", brick_index);
+ ret = dict_get_int32(dict, key, &status);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"status", "%d",
+ status);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ *online = status;
+
+ snprintf(key, sizeof(key), "brick%d.port", brick_index);
+ ret = dict_get_int32(dict, key, &port);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "brick%d.rdma_port", brick_index);
+ ret = dict_get_int32(dict, key, &rdma_port);
+
+ /* If the process is either offline or doesn't provide a port (shd)
+ * port = "N/A"
+ * else print the port number of the process.
+ */
+
+ /*
+ * Tag 'port' can be removed once console management is started
+ * to support new tag ports.
+ */
+
+ if (*online == 1 && port != 0)
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"port", "%d",
+ port);
+ else
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"port", "%s",
+ "N/A");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"ports");
+ if (*online == 1 && (port != 0 || rdma_port != 0)) {
+ if (port) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"tcp",
+ "%d", port);
+ } else {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"tcp",
+ "%s", "N/A");
}
- *node_present = _gf_true;
-
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"hostname",
- "%s", hostname);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.path", brick_index);
- ret = dict_get_str (dict, key, &path);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"path",
- "%s", path);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.port", brick_index);
- ret = dict_get_int32 (dict, key, &port);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"port",
- "%d", port);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ if (rdma_port) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"rdma",
+ "%d", rdma_port);
+ } else {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"rdma",
+ "%s", "N/A");
+ }
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ } else {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"tcp", "%s",
+ "N/A");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"rdma", "%s",
+ "N/A");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.pid", brick_index);
+ ret = dict_get_int32(dict, key, &pid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"pid", "%d", pid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.status", brick_index);
- ret = dict_get_int32 (dict, key, &status);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"status",
- "%d", status);
- XML_RET_CHECK_AND_GOTO (ret, out);
- *online = status;
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.pid", brick_index);
- ret = dict_get_int32 (dict, key, &pid);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"pid",
- "%d", pid);
- XML_RET_CHECK_AND_GOTO (ret, out);
+int
+cli_xml_output_vol_status_detail(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ uint64_t size_total = 0;
+ uint64_t size_free = 0;
+ char *device = NULL;
+ uint64_t block_size = 0;
+ char *mnt_options = NULL;
+ char *fs_name = NULL;
+ char *inode_size = NULL;
+ uint64_t inodes_total = 0;
+ uint64_t inodes_free = 0;
+ char key[1024] = {
+ 0,
+ };
+
+ snprintf(key, sizeof(key), "brick%d.total", brick_index);
+ ret = dict_get_uint64(dict, key, &size_total);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"sizeTotal",
+ "%" PRIu64, size_total);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ snprintf(key, sizeof(key), "brick%d.free", brick_index);
+ ret = dict_get_uint64(dict, key, &size_free);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"sizeFree",
+ "%" PRIu64, size_free);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ snprintf(key, sizeof(key), "brick%d.device", brick_index);
+ ret = dict_get_str(dict, key, &device);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"device", "%s",
+ device);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ snprintf(key, sizeof(key), "brick%d.block_size", brick_index);
+ ret = dict_get_uint64(dict, key, &block_size);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"blockSize",
+ "%" PRIu64, block_size);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ snprintf(key, sizeof(key), "brick%d.mnt_options", brick_index);
+ ret = dict_get_str(dict, key, &mnt_options);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"mntOptions",
+ "%s", mnt_options);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ snprintf(key, sizeof(key), "brick%d.fs_name", brick_index);
+ ret = dict_get_str(dict, key, &fs_name);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"fsName", "%s",
+ fs_name);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ snprintf(key, sizeof(key), "brick%d.inode_size", brick_index);
+ ret = dict_get_str(dict, key, &inode_size);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"inodeSize",
+ "%s", fs_name);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ snprintf(key, sizeof(key), "brick%d.total_inodes", brick_index);
+ ret = dict_get_uint64(dict, key, &inodes_total);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"inodesTotal",
+ "%" PRIu64, inodes_total);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ snprintf(key, sizeof(key), "brick%d.free_inodes", brick_index);
+ ret = dict_get_uint64(dict, key, &inodes_free);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"inodesFree",
+ "%" PRIu64, inodes_free);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ } else {
+ ret = 0;
+ }
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-cli_xml_output_vol_status_detail (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index)
+cli_xml_output_vol_status_mempool(xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
{
- int ret = -1;
- uint64_t size_total = 0;
- uint64_t size_free = 0;
- char *device = NULL;
- uint64_t block_size = 0;
- char *mnt_options = NULL;
- char *fs_name = NULL;
- char *inode_size = NULL;
- uint64_t inodes_total = 0;
- uint64_t inodes_free = 0;
- char key[1024] = {0,};
-
- snprintf (key, sizeof (key), "brick%d.total", brick_index);
- ret = dict_get_uint64 (dict, key, &size_total);
+ int ret = -1;
+ int mempool_count = 0;
+ char *name = NULL;
+ int hotcount = 0;
+ int coldcount = 0;
+ uint64_t paddedsizeof = 0;
+ uint64_t alloccount = 0;
+ int maxalloc = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ /* <mempool> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"mempool");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.mempool-count", prefix);
+ ret = dict_get_int32(dict, key, &mempool_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count", "%d",
+ mempool_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < mempool_count; i++) {
+ /* <pool> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"pool");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.pool%d.name", prefix, i);
+ ret = dict_get_str(dict, key, &name);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"sizeTotal",
- "%"PRIu64, size_total);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ name);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.free", brick_index);
- ret = dict_get_uint64 (dict, key, &size_free);
+ snprintf(key, sizeof(key), "%s.pool%d.hotcount", prefix, i);
+ ret = dict_get_int32(dict, key, &hotcount);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"sizeFree",
- "%"PRIu64, size_free);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hotCount",
+ "%d", hotcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.device", brick_index);
- ret = dict_get_str (dict, key, &device);
+ snprintf(key, sizeof(key), "%s.pool%d.coldcount", prefix, i);
+ ret = dict_get_int32(dict, key, &coldcount);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"device",
- "%s", device);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"coldCount",
+ "%d", coldcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.block_size", brick_index);
- ret = dict_get_uint64 (dict, key, &block_size);
+ snprintf(key, sizeof(key), "%s.pool%d.paddedsizeof", prefix, i);
+ ret = dict_get_uint64(dict, key, &paddedsizeof);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"blockSize",
- "%"PRIu64, block_size);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"padddedSizeOf", "%" PRIu64, paddedsizeof);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mnt_options", brick_index);
- ret = dict_get_str (dict, key, &mnt_options);
+ snprintf(key, sizeof(key), "%s.pool%d.alloccount", prefix, i);
+ ret = dict_get_uint64(dict, key, &alloccount);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"mntOptions",
- "%s", mnt_options);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"allocCount",
+ "%" PRIu64, alloccount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.fs_name", brick_index);
- ret = dict_get_str (dict, key, &fs_name);
+ snprintf(key, sizeof(key), "%s.pool%d.max_alloc", prefix, i);
+ ret = dict_get_int32(dict, key, &maxalloc);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"fsName",
- "%s", fs_name);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"maxAlloc",
+ "%d", maxalloc);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* inode details are only available for ext 2/3/4 & xfs */
- if (!IS_EXT_FS(fs_name) || strcmp (fs_name, "xfs")) {
- ret = 0;
- goto out;
- }
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.inode_size", brick_index);
- ret = dict_get_str (dict, key, &inode_size);
+ snprintf(key, sizeof(key), "%s.pool%d.pool-misses", prefix, i);
+ ret = dict_get_uint64(dict, key, &alloccount);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"inodeSize",
- "%s", fs_name);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"poolMisses",
+ "%" PRIu64, alloccount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.total_inodes", brick_index);
- ret = dict_get_uint64 (dict, key, &inodes_total);
+ snprintf(key, sizeof(key), "%s.pool%d.max-stdalloc", prefix, i);
+ ret = dict_get_int32(dict, key, &maxalloc);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"inodesTotal",
- "%"PRIu64, inodes_total);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"maxStdAlloc",
+ "%d", maxalloc);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.free_inodes", brick_index);
- ret = dict_get_uint64 (dict, key, &inodes_free);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"inodesFree",
- "%"PRIu64, inodes_free);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </pool> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </mempool> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-cli_xml_output_vol_status_mempool (xmlTextWriterPtr writer, dict_t *dict,
- char *prefix)
-{
- int ret = -1;
- int mempool_count = 0;
- char *name = NULL;
- int hotcount = 0;
- int coldcount = 0;
- uint64_t paddedsizeof = 0;
- uint64_t alloccount = 0;
- int maxalloc = 0;
- char key[1024] = {0,};
- int i = 0;
-
- /* <mempool> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"mempool");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- snprintf (key, sizeof (key), "%s.mempool-count", prefix);
- ret = dict_get_int32 (dict, key, &mempool_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
- "%d", mempool_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < mempool_count; i++) {
- /* <pool> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"pool");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.name", prefix, i);
- ret = dict_get_str (dict, key, &name);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"name",
- "%s", name);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.hotcount", prefix, i);
- ret = dict_get_int32 (dict, key, &hotcount);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"hotCount",
- "%d", hotcount);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.coldcount", prefix, i);
- ret = dict_get_int32 (dict, key, &coldcount);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"coldCount",
- "%d", coldcount);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.paddedsizeof",
- prefix, i);
- ret = dict_get_uint64 (dict, key, &paddedsizeof);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"padddedSizeOf", "%"PRIu64,
- paddedsizeof);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.alloccount", prefix, i);
- ret = dict_get_uint64 (dict, key, &alloccount);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"allocCount",
- "%"PRIu64, alloccount);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.max_alloc", prefix, i);
- ret = dict_get_int32 (dict, key, &maxalloc);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"maxAlloc",
- "%d", maxalloc);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.pool-misses", prefix, i);
- ret = dict_get_uint64 (dict, key, &alloccount);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"poolMisses",
- "%"PRIu64, alloccount);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pool%d.max-stdalloc", prefix, i);
- ret = dict_get_int32 (dict, key, &maxalloc);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"maxStdAlloc",
- "%d", maxalloc);
- XML_RET_CHECK_AND_GOTO (ret, out);
+cli_xml_output_vol_status_mem(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int arena = 0;
+ int ordblks = 0;
+ int smblks = 0;
+ int hblks = 0;
+ int hblkhd = 0;
+ int usmblks = 0;
+ int fsmblks = 0;
+ int uordblks = 0;
+ int fordblks = 0;
+ int keepcost = 0;
+ char key[1024] = {
+ 0,
+ };
+
+ /* <memStatus> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"memStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <mallinfo> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"mallinfo");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.arena", brick_index);
+ ret = dict_get_int32(dict, key, &arena);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"arena", "%d",
+ arena);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.ordblks", brick_index);
+ ret = dict_get_int32(dict, key, &ordblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"ordblks", "%d",
+ ordblks);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.smblks", brick_index);
+ ret = dict_get_int32(dict, key, &smblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"smblks", "%d",
+ smblks);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.hblks", brick_index);
+ ret = dict_get_int32(dict, key, &hblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hblks", "%d",
+ hblks);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.hblkhd", brick_index);
+ ret = dict_get_int32(dict, key, &hblkhd);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hblkhd", "%d",
+ hblkhd);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.usmblks", brick_index);
+ ret = dict_get_int32(dict, key, &usmblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"usmblks", "%d",
+ usmblks);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.fsmblks", brick_index);
+ ret = dict_get_int32(dict, key, &fsmblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"fsmblks", "%d",
+ fsmblks);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.uordblks", brick_index);
+ ret = dict_get_int32(dict, key, &uordblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uordblks", "%d",
+ uordblks);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.fordblks", brick_index);
+ ret = dict_get_int32(dict, key, &fordblks);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"fordblks", "%d",
+ fordblks);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.mallinfo.keepcost", brick_index);
+ ret = dict_get_int32(dict, key, &keepcost);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"keepcost", "%d",
+ keepcost);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </mallinfo> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d", brick_index);
+ ret = cli_xml_output_vol_status_mempool(writer, dict, key);
+ if (ret)
+ goto out;
+
+ /* </memStatus> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- /* </pool> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+int
+cli_xml_output_vol_status_clients(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int client_count = 0;
+ char *hostname = NULL;
+ uint64_t bytes_read = 0;
+ uint64_t bytes_write = 0;
+ uint32_t opversion = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ /* <clientsStatus> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"clientsStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.clientcount", brick_index);
+ ret = dict_get_int32(dict, key, &client_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"clientCount",
+ "%d", client_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < client_count; i++) {
+ /* <client> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"client");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.client%d.hostname", brick_index, i);
+ ret = dict_get_str(dict, key, &hostname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hostname",
+ "%s", hostname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.client%d.bytesread", brick_index,
+ i);
+ ret = dict_get_uint64(dict, key, &bytes_read);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"bytesRead",
+ "%" PRIu64, bytes_read);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.client%d.byteswrite", brick_index,
+ i);
+ ret = dict_get_uint64(dict, key, &bytes_write);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"bytesWrite",
+ "%" PRIu64, bytes_write);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.client%d.opversion", brick_index,
+ i);
+ ret = dict_get_uint32(dict, key, &opversion);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"opVersion",
+ "%" PRIu32, opversion);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </client> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </clientsStatus> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- /* </mempool> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+int
+cli_xml_output_vol_status_inode_entry(xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ char *gfid = NULL;
+ uint64_t nlookup = 0;
+ uint32_t ref = 0;
+ int ia_type = 0;
+ char key[1024] = {
+ 0,
+ };
+
+ /* <inode> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"inode");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.gfid", prefix);
+ ret = dict_get_str(dict, key, &gfid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"gfid", "%s",
+ gfid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.nlookup", prefix);
+ ret = dict_get_uint64(dict, key, &nlookup);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"nLookup",
+ "%" PRIu64, nlookup);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.ref", prefix);
+ ret = dict_get_uint32(dict, key, &ref);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"ref", "%" PRIu32,
+ ref);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.ia_type", prefix);
+ ret = dict_get_int32(dict, key, &ia_type);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"iaType", "%d",
+ ia_type);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </inode> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-cli_xml_output_vol_status_mem (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index)
-{
- int ret = -1;
- int arena = 0;
- int ordblks = 0;
- int smblks = 0;
- int hblks = 0;
- int hblkhd = 0;
- int usmblks = 0;
- int fsmblks = 0;
- int uordblks = 0;
- int fordblks = 0;
- int keepcost = 0;
- char key[1024] = {0,};
-
- /* <memStatus> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"memStatus");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- /* <mallinfo> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"mallinfo");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- snprintf (key, sizeof (key), "brick%d.mallinfo.arena", brick_index);
- ret = dict_get_int32 (dict, key, &arena);
- if (ret)
+cli_xml_output_vol_status_itable(xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ uint32_t active_size = 0;
+ uint32_t lru_size = 0;
+ uint32_t purge_size = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ snprintf(key, sizeof(key), "%s.active_size", prefix);
+ ret = dict_get_uint32(dict, key, &active_size);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"activeSize",
+ "%" PRIu32, active_size);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ if (active_size != 0) {
+ /* <active> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"active");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < active_size; i++) {
+ snprintf(key, sizeof(key), "%s.active%d", prefix, i);
+ ret = cli_xml_output_vol_status_inode_entry(writer, dict, key);
+ if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"arena",
- "%d", arena);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.ordblks", brick_index);
- ret = dict_get_int32 (dict, key, &ordblks);
- if (ret)
+ }
+ /* </active> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ snprintf(key, sizeof(key), "%s.lru_size", prefix);
+ ret = dict_get_uint32(dict, key, &lru_size);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"lruSize",
+ "%" PRIu32, lru_size);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ if (lru_size != 0) {
+ /* <lru> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"lru");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < lru_size; i++) {
+ snprintf(key, sizeof(key), "%s.lru%d", prefix, i);
+ ret = cli_xml_output_vol_status_inode_entry(writer, dict, key);
+ if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"ordblks",
- "%d", ordblks);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.smblks", brick_index);
- ret = dict_get_int32 (dict, key, &smblks);
- if (ret)
+ }
+ /* </lru> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ snprintf(key, sizeof(key), "%s.purge_size", prefix);
+ ret = dict_get_uint32(dict, key, &purge_size);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"purgeSize",
+ "%" PRIu32, purge_size);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ if (purge_size != 0) {
+ /* <purge> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"purge");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < purge_size; i++) {
+ snprintf(key, sizeof(key), "%s.purge%d", prefix, i);
+ ret = cli_xml_output_vol_status_inode_entry(writer, dict, key);
+ if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"smblks",
- "%d", smblks);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+ /* </purge> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.hblks", brick_index);
- ret = dict_get_int32 (dict, key, &hblks);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"hblks",
- "%d", hblks);
- XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.hblkhd", brick_index);
- ret = dict_get_int32 (dict, key, &hblkhd);
+int
+cli_xml_output_vol_status_inode(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int conn_count = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ /* <inodeStatus> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"inodeStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.conncount", brick_index);
+ ret = dict_get_int32(dict, key, &conn_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"connections",
+ "%d", conn_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < conn_count; i++) {
+ /* <connection> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"connection");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.conn%d.itable", brick_index, i);
+ ret = cli_xml_output_vol_status_itable(writer, dict, key);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"hblkhd",
- "%d", hblkhd);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.usmblks", brick_index);
- ret = dict_get_int32 (dict, key, &usmblks);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"usmblks",
- "%d", usmblks);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </connection> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.fsmblks", brick_index);
- ret = dict_get_int32 (dict, key, &fsmblks);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"fsmblks",
- "%d", fsmblks);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </inodeStatus> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.uordblks", brick_index);
- ret = dict_get_int32 (dict, key, &uordblks);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_fdtable(xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ int refcount = 0;
+ uint32_t maxfds = 0;
+ int firstfree = 0;
+ int openfds = 0;
+ int fd_pid = 0;
+ int fd_refcount = 0;
+ int fd_flags = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ /* <fdTable> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"fdTable");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.refcount", prefix);
+ ret = dict_get_int32(dict, key, &refcount);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"refCount", "%d",
+ refcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.maxfds", prefix);
+ ret = dict_get_uint32(dict, key, &maxfds);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"maxFds",
+ "%" PRIu32, maxfds);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.firstfree", prefix);
+ ret = dict_get_int32(dict, key, &firstfree);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"firstFree", "%d",
+ firstfree);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.openfds", prefix);
+ ret = dict_get_int32(dict, key, &openfds);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"openFds", "%d",
+ openfds);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < maxfds; i++) {
+ snprintf(key, sizeof(key), "%s.fdentry%d.pid", prefix, i);
+ ret = dict_get_int32(dict, key, &fd_pid);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"uordblks",
- "%d", uordblks);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ continue;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.fordblks", brick_index);
- ret = dict_get_int32 (dict, key, &fordblks);
+ snprintf(key, sizeof(key), "%s.fdentry%d.refcount", prefix, i);
+ ret = dict_get_int32(dict, key, &fd_refcount);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"fordblks",
- "%d", fordblks);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ continue;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.mallinfo.keepcost", brick_index);
- ret = dict_get_int32 (dict, key, &keepcost);
+ snprintf(key, sizeof(key), "%s.fdentry%d.flags", prefix, i);
+ ret = dict_get_int32(dict, key, &fd_flags);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"keepcost",
- "%d", keepcost);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ continue;
+
+ /* <fd> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"fd");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"entry", "%d",
+ i + 1);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </mallinfo> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"pid", "%d",
+ fd_pid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d", brick_index);
- ret = cli_xml_output_vol_status_mempool (writer, dict, key);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"refCount",
+ "%d", fd_refcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"flags", "%d",
+ fd_flags);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </fd> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </fdTable> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_fd(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int conn_count = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ /* <fdStatus> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"fdStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.conncount", brick_index);
+ ret = dict_get_int32(dict, key, &conn_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"connections",
+ "%d", conn_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < conn_count; i++) {
+ /* <connection> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"connection");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.conn%d.fdtable", brick_index, i);
+ ret = cli_xml_output_vol_status_fdtable(writer, dict, key);
if (ret)
- goto out;
+ goto out;
+
+ /* </connection> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- /* </memStatus> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </fdStatus> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-cli_xml_output_vol_status_clients (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index)
+cli_xml_output_vol_status_callframe(xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
{
- int ret = -1;
- int client_count = 0;
- char *hostname = NULL;
- uint64_t bytes_read = 0;
- uint64_t bytes_write = 0;
- char key[1024] = {0,};
- int i = 0;
+ int ret = -1;
+ int ref_count = 0;
+ char *translator = NULL;
+ int complete = 0;
+ char *parent = NULL;
+ char *wind_from = NULL;
+ char *wind_to = NULL;
+ char *unwind_from = NULL;
+ char *unwind_to = NULL;
+ char key[1024] = {
+ 0,
+ };
+
+ /* <callFrame> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"callFrame");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.refcount", prefix);
+ ret = dict_get_int32(dict, key, &ref_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"refCount", "%d",
+ ref_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.translator", prefix);
+ ret = dict_get_str(dict, key, &translator);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"translator", "%s",
+ translator);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.complete", prefix);
+ ret = dict_get_int32(dict, key, &complete);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"complete", "%d",
+ complete);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.parent", prefix);
+ ret = dict_get_str(dict, key, &parent);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"parent", "%s",
+ parent);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ snprintf(key, sizeof(key), "%s.windfrom", prefix);
+ ret = dict_get_str(dict, key, &wind_from);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"windFrom",
+ "%s", wind_from);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ snprintf(key, sizeof(key), "%s.windto", prefix);
+ ret = dict_get_str(dict, key, &wind_to);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"windTo", "%s",
+ wind_to);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ snprintf(key, sizeof(key), "%s.unwindfrom", prefix);
+ ret = dict_get_str(dict, key, &unwind_from);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"unwindFrom",
+ "%s", unwind_from);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ snprintf(key, sizeof(key), "%s.unwindto", prefix);
+ ret = dict_get_str(dict, key, &unwind_to);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"unwindTo",
+ "%s", unwind_to);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </callFrame> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- /* <clientsStatus> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"clientsStatus");
- XML_RET_CHECK_AND_GOTO (ret, out);
+int
+cli_xml_output_vol_status_callstack(xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
+{
+ int ret = -1;
+ int uid = 0;
+ int gid = 0;
+ int pid = 0;
+ uint64_t unique = 0;
+ int frame_count = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ /* <callStack> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"callStack");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.uid", prefix);
+ ret = dict_get_int32(dict, key, &uid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uid", "%d", uid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.gid", prefix);
+ ret = dict_get_int32(dict, key, &gid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"gid", "%d", gid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.pid", prefix);
+ ret = dict_get_int32(dict, key, &pid);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"pid", "%d", pid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.unique", prefix);
+ ret = dict_get_uint64(dict, key, &unique);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"unique",
+ "%" PRIu64, unique);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.count", prefix);
+ ret = dict_get_int32(dict, key, &frame_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"frameCount", "%d",
+ frame_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < frame_count; i++) {
+ snprintf(key, sizeof(key), "%s.frame%d", prefix, i);
+ ret = cli_xml_output_vol_status_callframe(writer, dict, key);
+ if (ret)
+ goto out;
+ }
- snprintf (key, sizeof (key), "brick%d.clientcount", brick_index);
- ret = dict_get_int32 (dict, key, &client_count);
+ /* </callStack> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+
+int
+cli_xml_output_vol_status_callpool(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index)
+{
+ int ret = -1;
+ int call_count = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ /* <callpoolStatus> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"callpoolStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "brick%d.callpool.count", brick_index);
+ ret = dict_get_int32(dict, key, &call_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count", "%d",
+ call_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < call_count; i++) {
+ snprintf(key, sizeof(key), "brick%d.callpool.stack%d", brick_index, i);
+ ret = cli_xml_output_vol_status_callstack(writer, dict, key);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"clientCount",
- "%d", client_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < client_count; i++) {
- /* <client> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"client");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.client%d.hostname",
- brick_index, i);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"hostname",
- "%s", hostname);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.client%d.bytesread",
- brick_index, i);
- ret = dict_get_uint64 (dict, key, &bytes_read);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"bytesRead",
- "%"PRIu64, bytes_read);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.client%d.byteswrite",
- brick_index, i);
- ret = dict_get_uint64 (dict, key, &bytes_write);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"bytesWrite",
- "%"PRIu64, bytes_write);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- /* </client> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+ goto out;
+ }
+
+ /* </callpoolStatus> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </clientsStatus> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
+#endif
int
-cli_xml_output_vol_status_inode_entry (xmlTextWriterPtr writer, dict_t *dict,
- char *prefix)
+cli_xml_output_vol_status_begin(cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr)
{
- int ret = -1;
- char *gfid = NULL;
- uint64_t nlookup = 0;
- uint32_t ref = 0;
- int ia_type = 0;
- char key[1024] = {0,};
+#if (HAVE_LIB_XML)
+ int ret = -1;
- /* <inode> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"inode");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = cli_begin_xml_output(&(local->writer), &(local->doc));
+ XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf (key, sizeof (key), "%s.gfid", prefix);
- ret = dict_get_str (dict, key, &gfid);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"gfid",
- "%s", gfid);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = cli_xml_output_common(local->writer, op_ret, op_errno, op_errstr);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key,0, sizeof (key));
- snprintf (key, sizeof (key), "%s.nlookup", prefix);
- ret = dict_get_uint64 (dict, key, &nlookup);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"nLookup",
- "%"PRIu64, nlookup);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* <volStatus> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"volStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key,0, sizeof (key));
- snprintf (key, sizeof (key), "%s.ref", prefix);
- ret = dict_get_uint32 (dict, key, &ref);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"ref",
- "%"PRIu32, ref);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* <volumes> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"volumes");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key,0, sizeof (key));
- snprintf (key, sizeof (key), "%s.ia_type", prefix);
- ret = dict_get_int32 (dict, key, &ia_type);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"iaType",
- "%d", ia_type);
- XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
- /* </inode> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+int
+cli_xml_output_vol_status_end(cli_local_t *local)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ /* </volumes> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </volStatus> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_end_xml_output(local->writer, local->doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
+#if (HAVE_LIB_XML)
int
-cli_xml_output_vol_status_itable (xmlTextWriterPtr writer, dict_t *dict,
- char *prefix)
+cli_xml_output_remove_brick_task_params(xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
{
- int ret = -1;
- uint32_t active_size = 0;
- uint32_t lru_size = 0;
- uint32_t purge_size = 0;
- char key[1024] = {0,};
- int i = 0;
+ int ret = -1;
+ char key[1024] = {
+ 0,
+ };
+ int count = 0;
+ int i = 0;
+ char *brick = NULL;
+
+ /* <params> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"params");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.count", prefix);
+ ret = dict_get_int32(dict, key, &count);
+ if (ret)
+ goto out;
+
+ for (i = 1; i <= count; i++) {
+ snprintf(key, sizeof(key), "%s.brick%d", prefix, i);
+ ret = dict_get_str(dict, key, &brick);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"brick", "%s",
+ brick);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ brick = NULL;
+ }
+
+ /* </param> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- snprintf (key, sizeof (key), "%s.active_size", prefix);
- ret = dict_get_uint32 (dict, key, &active_size);
+int
+cli_xml_output_vol_status_tasks(cli_local_t *local, dict_t *dict)
+{
+ int ret = -1;
+ char *task_type = NULL;
+ char *task_id_str = NULL;
+ int status = 0;
+ int tasks = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ /* <tasks> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"tasks");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "tasks", &tasks);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < tasks; i++) {
+ /* <task> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"task");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "task%d.type", i);
+ ret = dict_get_str(dict, key, &task_type);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"activeSize",
- "%"PRIu32, active_size);
- XML_RET_CHECK_AND_GOTO (ret, out);
- if (active_size != 0) {
- /* <active> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"active");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < active_size; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.active%d", prefix, i);
- ret = cli_xml_output_vol_status_inode_entry
- (writer, dict, key);
- if (ret)
- goto out;
- }
- /* </active> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"type",
+ "%s", task_type);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.lru_size", prefix);
- ret = dict_get_uint32 (dict, key, &lru_size);
+ snprintf(key, sizeof(key), "task%d.id", i);
+ ret = dict_get_str(dict, key, &task_id_str);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"lruSize",
- "%"PRIu32, lru_size);
- XML_RET_CHECK_AND_GOTO (ret, out);
- if (lru_size != 0) {
- /* <lru> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"lru");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < lru_size; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.lru%d", prefix, i);
- ret = cli_xml_output_vol_status_inode_entry
- (writer, dict, key);
- if (ret)
- goto out;
- }
- /* </lru> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.purge_size", prefix);
- ret = dict_get_uint32 (dict, key, &purge_size);
+ snprintf(key, sizeof(key), "task%d.status", i);
+ ret = dict_get_int32(dict, key, &status);
if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"status", "%d", status);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(local->writer,
+ (xmlChar *)"statusStr", "%s",
+ cli_vol_task_status_str[status]);
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "task%d", i);
+ if (!strcmp(task_type, "Remove brick")) {
+ ret = cli_xml_output_remove_brick_task_params(local->writer, dict,
+ key);
+ if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"purgeSize",
- "%"PRIu32, purge_size);
- XML_RET_CHECK_AND_GOTO (ret, out);
- if (purge_size != 0) {
- /* <purge> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"purge");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < purge_size; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.purge%d", prefix, i);
- ret = cli_xml_output_vol_status_inode_entry
- (writer, dict, key);
- if (ret)
- goto out;
- }
- /* </purge> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
}
+ /* </task> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </tasks> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
+#endif
+
int
-cli_xml_output_vol_status_inode (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index)
+cli_xml_output_vol_status_tasks_detail(cli_local_t *local, dict_t *dict)
{
- int ret = -1;
- int conn_count = 0;
- char key[1024] = {0,};
- int i = 0;
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ char *volname = NULL;
- /* <inodeStatus> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"inodeStatus");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /*<volume>*/
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf (key, sizeof (key), "brick%d.conncount", brick_index);
- ret = dict_get_int32 (dict, key, &conn_count);
- if (ret)
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"volName",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_xml_output_vol_status_tasks(local, dict);
+ if (ret)
+ goto out;
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+out:
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_status(cli_local_t *local, dict_t *dict)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ char *volname = NULL;
+ int brick_count = 0;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int index_max = 0;
+ uint32_t cmd = GF_CLI_STATUS_NONE;
+ int online = 0;
+ gf_boolean_t node_present = _gf_true;
+ int i;
+ int type = -1;
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"volName",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "count", &brick_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"nodeCount",
+ "%d", brick_count);
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_uint32(dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32(dict, "brick-index-max", &brick_index_max);
+ if (ret)
+ goto out;
+ ret = dict_get_int32(dict, "other-count", &other_count);
+ if (ret)
+ goto out;
+
+ index_max = brick_index_max + other_count;
+
+ ret = dict_get_int32(dict, "type", &type);
+ if (ret)
+ goto out;
+
+ for (i = 0; i <= index_max; i++) {
+ ret = cli_xml_output_vol_status_common(local->writer, dict, i, &online,
+ &node_present);
+ if (ret) {
+ if (node_present)
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"connections",
- "%d", conn_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < conn_count; i++) {
- /* <connection> */
- ret = xmlTextWriterStartElement (writer,
- (xmlChar *)"connection");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.conn%d.itable",
- brick_index, i);
- ret = cli_xml_output_vol_status_itable (writer, dict, key);
+ else
+ continue;
+ }
+
+ switch (cmd & GF_CLI_STATUS_MASK) {
+ case GF_CLI_STATUS_DETAIL:
+ ret = cli_xml_output_vol_status_detail(local->writer, dict, i);
if (ret)
+ goto out;
+ break;
+
+ case GF_CLI_STATUS_MEM:
+ if (online) {
+ ret = cli_xml_output_vol_status_mem(local->writer, dict, i);
+ if (ret)
+ goto out;
+ }
+ break;
+
+ case GF_CLI_STATUS_CLIENTS:
+ if (online) {
+ ret = cli_xml_output_vol_status_clients(local->writer, dict,
+ i);
+ if (ret)
goto out;
+ }
+ break;
+
+ case GF_CLI_STATUS_INODE:
+ if (online) {
+ ret = cli_xml_output_vol_status_inode(local->writer, dict,
+ i);
+ if (ret)
+ goto out;
+ }
+ break;
+
+ case GF_CLI_STATUS_FD:
+ if (online) {
+ ret = cli_xml_output_vol_status_fd(local->writer, dict, i);
+ if (ret)
+ goto out;
+ }
+ break;
- /* </connection> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ case GF_CLI_STATUS_CALLPOOL:
+ if (online) {
+ ret = cli_xml_output_vol_status_callpool(local->writer,
+ dict, i);
+ if (ret)
+ goto out;
+ }
+ break;
+ default:
+ break;
}
- /* </inodeStatus> */
- ret= xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </node> was opened in cli_xml_output_vol_status_common()*/
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* Tasks are only present when a normal volume status call is done on a
+ * single volume or on all volumes
+ */
+ if (((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) &&
+ (cmd & (GF_CLI_STATUS_VOL | GF_CLI_STATUS_ALL))) {
+ ret = cli_xml_output_vol_status_tasks(local, dict);
+ if (ret)
+ goto out;
+ }
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
+#if (HAVE_LIB_XML)
int
-cli_xml_output_vol_status_fdtable (xmlTextWriterPtr writer, dict_t *dict,
- char *prefix)
-{
- int ret = -1;
- int refcount = 0;
- uint32_t maxfds = 0;
- int firstfree = 0;
- int openfds = 0;
- int fd_pid = 0;
- int fd_refcount = 0;
- int fd_flags = 0;
- char key[1024] = {0,};
- int i = 0;
-
- /* <fdTable> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"fdTable");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- snprintf (key, sizeof (key), "%s.refcount", prefix);
- ret = dict_get_int32 (dict, key, &refcount);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"refCount",
- "%d", refcount);
- XML_RET_CHECK_AND_GOTO (ret, out);
+cli_xml_output_vol_top_rw_perf(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index, int member_index)
+{
+ int ret = -1;
+ char *filename = NULL;
+ uint64_t throughput = 0;
+ struct timeval tv = {
+ 0,
+ };
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char key[1024] = {
+ 0,
+ };
+
+ /* <file> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"file");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-filename-%d", brick_index, member_index);
+ ret = dict_get_str(dict, key, &filename);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"filename", "%s",
+ filename);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-value-%d", brick_index, member_index);
+ ret = dict_get_uint64(dict, key, &throughput);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count",
+ "%" PRIu64, throughput);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-time-sec-%d", brick_index, member_index);
+ ret = dict_get_int32(dict, key, (int32_t *)&tv.tv_sec);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%d-time-usec-%d", brick_index, member_index);
+ ret = dict_get_int32(dict, key, (int32_t *)&tv.tv_usec);
+ if (ret)
+ goto out;
+
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"time", "%s",
+ timestr);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </file> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.maxfds", prefix);
- ret = dict_get_uint32 (dict, key, &maxfds);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"maxFds",
- "%"PRIu32, maxfds);
- XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.firstfree", prefix);
- ret = dict_get_int32 (dict, key, &firstfree);
+int
+cli_xml_output_vol_top_other(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index, int member_index)
+{
+ int ret = -1;
+ char *filename = NULL;
+ uint64_t count = 0;
+ char key[1024] = {
+ 0,
+ };
+
+ /* <file> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"file");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-filename-%d", brick_index, member_index);
+ ret = dict_get_str(dict, key, &filename);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"filename", "%s",
+ filename);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-value-%d", brick_index, member_index);
+ ret = dict_get_uint64(dict, key, &count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count",
+ "%" PRIu64, count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </file> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
+
+int
+cli_xml_output_vol_top(dict_t *dict, int op_ret, int op_errno, char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ int brick_count = 0;
+ int top_op = GF_CLI_TOP_NONE;
+ char *brick_name = NULL;
+ int members = 0;
+ uint64_t current_open = 0;
+ uint64_t max_open = 0;
+ char *max_open_time = NULL;
+ double throughput = 0.0;
+ double time_taken = 0.0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+ int j = 0;
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volTop> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volTop");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "count", &brick_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"brickCount", "%d",
+ brick_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "1-top-op", &top_op);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"topOp", "%d",
+ top_op);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ while (i < brick_count) {
+ i++;
+
+ /* <brick> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"brick");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-brick", i);
+ ret = dict_get_str(dict, key, &brick_name);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"firstFree",
- "%d", firstfree);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ brick_name);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.openfds", prefix);
- ret = dict_get_int32 (dict, key, &openfds);
+ snprintf(key, sizeof(key), "%d-members", i);
+ ret = dict_get_int32(dict, key, &members);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"openFds",
- "%d", openfds);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < maxfds; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdentry%d.pid", prefix, i);
- ret = dict_get_int32 (dict, key, &fd_pid);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"members",
+ "%d", members);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ switch (top_op) {
+ case GF_CLI_TOP_OPEN:
+ snprintf(key, sizeof(key), "%d-current-open", i);
+ ret = dict_get_uint64(dict, key, &current_open);
if (ret)
- continue;
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"currentOpen", "%" PRIu64, current_open);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdentry%d.refcount",
- prefix, i);
- ret = dict_get_int32 (dict, key, &fd_refcount);
+ snprintf(key, sizeof(key), "%d-max-open", i);
+ ret = dict_get_uint64(dict, key, &max_open);
if (ret)
- continue;
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"maxOpen", "%" PRIu64, max_open);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdentry%d.flags", prefix, i);
- ret = dict_get_int32 (dict, key, &fd_flags);
+ snprintf(key, sizeof(key), "%d-max-openfd-time", i);
+ ret = dict_get_str(dict, key, &max_open_time);
if (ret)
- continue;
-
- /* <fd> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"fd");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"entry",
- "%d", i+1);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"pid",
- "%d", fd_pid);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"refCount",
- "%d", fd_refcount);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"flags",
- "%d", fd_flags);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- /* </fd> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"maxOpenTime", "%s", max_open_time);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ case GF_CLI_TOP_READ:
+ case GF_CLI_TOP_WRITE:
+ case GF_CLI_TOP_OPENDIR:
+ case GF_CLI_TOP_READDIR:
+
+ break;
+
+ case GF_CLI_TOP_READ_PERF:
+ case GF_CLI_TOP_WRITE_PERF:
+ snprintf(key, sizeof(key), "%d-throughput", i);
+ ret = dict_get_double(dict, key, &throughput);
+ if (!ret) {
+ snprintf(key, sizeof(key), "%d-time", i);
+ ret = dict_get_double(dict, key, &time_taken);
+ }
+
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"throughput", "%f", throughput);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"timeTaken", "%f", time_taken);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ break;
+
+ default:
+ ret = -1;
+ goto out;
}
- /* </fdTable> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ for (j = 1; j <= members; j++) {
+ if (top_op == GF_CLI_TOP_READ_PERF ||
+ top_op == GF_CLI_TOP_WRITE_PERF) {
+ ret = cli_xml_output_vol_top_rw_perf(writer, dict, i, j);
+ } else {
+ ret = cli_xml_output_vol_top_other(writer, dict, i, j);
+ }
+ if (ret)
+ goto out;
+ }
+
+ /* </brick> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </volTop> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ ret = cli_end_xml_output(writer, doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
+#if (HAVE_LIB_XML)
int
-cli_xml_output_vol_status_fd (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index)
+cli_xml_output_vol_profile_stats(xmlTextWriterPtr writer, dict_t *dict,
+ int brick_index, int interval)
{
- int ret = -1;
- int conn_count = 0;
- char key[1024] = {0,};
- int i = 0;
+ int ret = -1;
+ uint64_t read_count = 0;
+ uint64_t write_count = 0;
+ uint64_t hits = 0;
+ double avg_latency = 0.0;
+ double max_latency = 0.0;
+ double min_latency = 0.0;
+ uint64_t duration = 0;
+ uint64_t total_read = 0;
+ uint64_t total_write = 0;
+ char key[1024] = {0};
+ int i = 0;
+
+ /* <cumulativeStats> || <intervalStats> */
+ if (interval == -1)
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"cumulativeStats");
+ else
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"intervalStats");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <blockStats> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"blockStats");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < 32; i++) {
+ /* <block> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"block");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"size",
+ "%" PRIu32, (1U << i));
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-%d-read-%" PRIu32, brick_index, interval,
+ (1U << i));
+ ret = dict_get_uint64(dict, key, &read_count);
+ if (ret)
+ read_count = 0;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"reads",
+ "%" PRIu64, read_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-%d-write-%" PRIu32, brick_index,
+ interval, (1U << i));
+ ret = dict_get_uint64(dict, key, &write_count);
+ if (ret)
+ write_count = 0;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"writes",
+ "%" PRIu64, write_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </block> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </blockStats> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <fopStats> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"fopStats");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ snprintf(key, sizeof(key), "%d-%d-%d-hits", brick_index, interval, i);
+ ret = dict_get_uint64(dict, key, &hits);
+ if (ret)
+ goto cont;
+
+ snprintf(key, sizeof(key), "%d-%d-%d-avglatency", brick_index, interval,
+ i);
+ ret = dict_get_double(dict, key, &avg_latency);
+ if (ret)
+ goto cont;
+
+ snprintf(key, sizeof(key), "%d-%d-%d-minlatency", brick_index, interval,
+ i);
+ ret = dict_get_double(dict, key, &min_latency);
+ if (ret)
+ goto cont;
+
+ snprintf(key, sizeof(key), "%d-%d-%d-maxlatency", brick_index, interval,
+ i);
+ ret = dict_get_double(dict, key, &max_latency);
+ if (ret)
+ goto cont;
+
+ /* <fop> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"fop");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ gf_fop_list[i]);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hits",
+ "%" PRIu64, hits);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"avgLatency",
+ "%f", avg_latency);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"minLatency",
+ "%f", min_latency);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"maxLatency",
+ "%f", max_latency);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </fop> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ cont:
+ hits = 0;
+ avg_latency = 0.0;
+ min_latency = 0.0;
+ max_latency = 0.0;
+ }
+
+ for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) {
+ hits = 0;
+ avg_latency = 0.0;
+ min_latency = 0.0;
+ max_latency = 0.0;
+
+ snprintf(key, sizeof(key), "%d-%d-%d-upcall-hits", brick_index,
+ interval, i);
+ ret = dict_get_uint64(dict, key, &hits);
+ if (ret)
+ continue;
+
+ /* <fop> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"fop");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ gf_fop_list[i]);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hits",
+ "%" PRIu64, hits);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"avgLatency",
+ "%f", avg_latency);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"minLatency",
+ "%f", min_latency);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"maxLatency",
+ "%f", max_latency);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </fop> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </fopStats> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-%d-duration", brick_index, interval);
+ ret = dict_get_uint64(dict, key, &duration);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"duration",
+ "%" PRIu64, duration);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-%d-total-read", brick_index, interval);
+ ret = dict_get_uint64(dict, key, &total_read);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"totalRead",
+ "%" PRIu64, total_read);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-%d-total-write", brick_index, interval);
+ ret = dict_get_uint64(dict, key, &total_write);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"totalWrite",
+ "%" PRIu64, total_write);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </cumulativeStats> || </intervalStats> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* <fdStatus> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"fdStatus");
- XML_RET_CHECK_AND_GOTO (ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
- snprintf (key, sizeof (key), "brick%d.conncount", brick_index);
- ret = dict_get_int32 (dict, key, &conn_count);
+int
+cli_xml_output_vol_profile(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *volname = NULL;
+ int op = GF_CLI_STATS_NONE;
+ int info_op = GF_CLI_INFO_NONE;
+ int brick_count = 0;
+ char *brick_name = NULL;
+ int interval = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+ int stats_cleared = 0;
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volProfile> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volProfile");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"volname", "%s",
+ volname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "op", &op);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"profileOp", "%d",
+ op);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ if (GF_CLI_STATS_INFO != op)
+ goto cont;
+
+ ret = dict_get_int32(dict, "count", &brick_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"brickCount", "%d",
+ brick_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "info-op", &info_op);
+ if (ret)
+ goto out;
+
+ while (i < brick_count) {
+ i++;
+
+ /* <brick> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"brick");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%d-brick", i);
+ ret = dict_get_str(dict, key, &brick_name);
if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"brickName",
+ "%s", brick_name);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ if (GF_CLI_INFO_CLEAR == info_op) {
+ snprintf(key, sizeof(key), "%d-stats-cleared", i);
+ ret = dict_get_int32(dict, key, &stats_cleared);
+ if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"connections",
- "%d", conn_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < conn_count; i++) {
- /* <connection> */
- ret = xmlTextWriterStartElement (writer,
- (xmlChar *)"connection");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.conn%d.fdtable",
- brick_index, i);
- ret = cli_xml_output_vol_status_fdtable (writer, dict, key);
- if (ret)
- goto out;
- /* </connection> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"clearStats", "%s",
+ stats_cleared ? "Cleared stats." : "Failed to clear stats.");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ } else {
+ snprintf(key, sizeof(key), "%d-cumulative", i);
+ ret = dict_get_int32(dict, key, &interval);
+ if (ret == 0) {
+ ret = cli_xml_output_vol_profile_stats(writer, dict, i,
+ interval);
+ if (ret)
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%d-interval", i);
+ ret = dict_get_int32(dict, key, &interval);
+ if (ret == 0) {
+ ret = cli_xml_output_vol_profile_stats(writer, dict, i,
+ interval);
+ if (ret)
+ goto out;
+ }
}
- /* </fdStatus> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </brick> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+cont:
+ /* </volProfile> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ ret = cli_end_xml_output(writer, doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
int
-cli_xml_output_vol_status_callframe (xmlTextWriterPtr writer, dict_t *dict,
- char *prefix)
-{
- int ret = -1;
- int ref_count = 0;
- char *translator = NULL;
- int complete = 0;
- char *parent = NULL;
- char *wind_from = NULL;
- char *wind_to = NULL;
- char *unwind_from = NULL;
- char *unwind_to = NULL;
- char key[1024] = {0,};
-
- /* <callFrame> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"callFrame");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- snprintf (key, sizeof (key), "%s.refcount", prefix);
- ret = dict_get_int32 (dict, key, &ref_count);
+cli_xml_output_vol_list(dict_t *dict, int op_ret, int op_errno, char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ int count = 0;
+ char *volname = NULL;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volList> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volList");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "count", &count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count", "%d",
+ count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < count; i++) {
+ snprintf(key, sizeof(key), "volume%d", i);
+ ret = dict_get_str(dict, key, &volname);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"refCount",
- "%d", ref_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"volume", "%s",
+ volname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.translator", prefix);
- ret = dict_get_str (dict, key, &translator);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"translator",
- "%s", translator);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </volList> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.complete", prefix);
- ret = dict_get_int32 (dict, key, &complete);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"complete",
- "%d", complete);
- XML_RET_CHECK_AND_GOTO (ret ,out);
+ ret = cli_end_xml_output(writer, doc);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.parent", prefix);
- ret = dict_get_str (dict, key, &parent);
- if (!ret) {
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"parent",
- "%s", parent);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_vol_info_option(xmlTextWriterPtr writer, char *substr,
+ char *optstr, char *valstr)
+{
+ int ret = -1;
+ char *ptr1 = NULL;
+ char *ptr2 = NULL;
+
+ ptr1 = substr;
+ ptr2 = optstr;
+
+ while (ptr1) {
+ if (*ptr1 != *ptr2)
+ break;
+ ptr1++;
+ ptr2++;
+ if (!*ptr1)
+ break;
+ if (!*ptr2)
+ break;
+ }
+ if (*ptr2 == '\0')
+ goto out;
+
+ /* <option> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"option");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ ptr2);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"value", "%s",
+ valstr);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </option> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.windfrom", prefix);
- ret = dict_get_str (dict, key, &wind_from);
- if (!ret) {
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"windFrom",
- "%s", wind_from);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+struct tmp_xml_option_logger {
+ char *key;
+ xmlTextWriterPtr writer;
+};
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.windto", prefix);
- ret = dict_get_str (dict, key, &wind_to);
- if (!ret) {
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"windTo",
- "%s", wind_to);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+static int
+_output_vol_info_option(dict_t *d, char *k, data_t *v, void *data)
+{
+ int ret = 0;
+ char *ptr = NULL;
+ struct tmp_xml_option_logger *tmp = NULL;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unwindfrom", prefix);
- ret = dict_get_str (dict, key, &unwind_from);
- if (!ret) {
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"unwindFrom",
- "%s", unwind_from);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+ tmp = data;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unwindto", prefix);
- ret = dict_get_str (dict, key, &unwind_to);
- if (!ret) {
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"unwindTo",
- "%s", unwind_to);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+ ptr = strstr(k, "option.");
+ if (!ptr)
+ goto out;
+
+ if (!v) {
+ ret = -1;
+ goto out;
+ }
+ ret = cli_xml_output_vol_info_option(tmp->writer, tmp->key, k, v->data);
- /* </callFrame> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
}
int
-cli_xml_output_vol_status_callstack (xmlTextWriterPtr writer, dict_t *dict,
- char *prefix)
+cli_xml_output_vol_info_options(xmlTextWriterPtr writer, dict_t *dict,
+ char *prefix)
{
- int ret = -1;
- int uid = 0;
- int gid = 0;
- int pid = 0;
- uint64_t unique = 0;
- int frame_count = 0;
- char key[1024] = {0,};
- int i = 0;
+ int ret = -1;
+ int opt_count = 0;
+ char key[1024] = {
+ 0,
+ };
+ struct tmp_xml_option_logger tmp = {
+ 0,
+ };
+
+ snprintf(key, sizeof(key), "%s.opt_count", prefix);
+ ret = dict_get_int32(dict, key, &opt_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"optCount", "%d",
+ opt_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <options> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"options");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ snprintf(key, sizeof(key), "%s.option.", prefix);
+
+ tmp.key = key;
+ tmp.writer = writer;
+ ret = dict_foreach(dict, _output_vol_info_option, &tmp);
+ if (ret)
+ goto out;
+
+ /* </options> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
- /* <callStack> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"callStack");
- XML_RET_CHECK_AND_GOTO (ret, out);
+int
+cli_xml_output_vol_info(cli_local_t *local, dict_t *dict)
+{
+#if (HAVE_LIB_XML)
+ int ret = 0;
+ int count = 0;
+ char *volname = NULL;
+ char *volume_id = NULL;
+ char *uuid = NULL;
+ int type = 0;
+ int status = 0;
+ int brick_count = 0;
+ int dist_count = 0;
+ int stripe_count = 0;
+ int replica_count = 0;
+ int arbiter_count = 0;
+ int snap_count = 0;
+ int isArbiter = 0;
+ int disperse_count = 0;
+ int redundancy_count = 0;
+ int transport = 0;
+ char *brick = NULL;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+ int j = 1;
+ char *caps __attribute__((unused)) = NULL;
+ int k __attribute__((unused)) = 0;
+
+ ret = dict_get_int32(dict, "count", &count);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ /* <volume> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.name", i);
+ ret = dict_get_str(dict, key, &volname);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"name",
+ "%s", volname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf (key, sizeof (key), "%s.uid", prefix);
- ret = dict_get_int32 (dict, key, &uid);
+ snprintf(key, sizeof(key), "volume%d.volume_id", i);
+ ret = dict_get_str(dict, key, &volume_id);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"uid",
- "%d", uid);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"id",
+ "%s", volume_id);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.gid", prefix);
- ret = dict_get_int32 (dict, key, &gid);
+ snprintf(key, sizeof(key), "volume%d.status", i);
+ ret = dict_get_int32(dict, key, &status);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"gid",
- "%d", gid);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"status", "%d", status);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(local->writer,
+ (xmlChar *)"statusStr", "%s",
+ cli_vol_status_str[status]);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.snap_count", i);
+ ret = dict_get_int32(dict, key, &snap_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"snapshotCount", "%d", snap_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pid", prefix);
- ret = dict_get_int32 (dict, key, &pid);
+ snprintf(key, sizeof(key), "volume%d.brick_count", i);
+ ret = dict_get_int32(dict, key, &brick_count);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"pid",
- "%d", pid);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"brickCount", "%d", brick_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unique", prefix);
- ret = dict_get_uint64 (dict, key, &unique);
+ snprintf(key, sizeof(key), "volume%d.dist_count", i);
+ ret = dict_get_int32(dict, key, &dist_count);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"unique",
- "%"PRIu64, unique);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer,
+ (xmlChar *)"distCount", "%d",
+ (brick_count / dist_count));
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.stripe_count", i);
+ ret = dict_get_int32(dict, key, &stripe_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"stripeCount", "%d", stripe_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.replica_count", i);
+ ret = dict_get_int32(dict, key, &replica_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"replicaCount", "%d", replica_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.arbiter_count", i);
+ ret = dict_get_int32(dict, key, &arbiter_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"arbiterCount", "%d", arbiter_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.disperse_count", i);
+ ret = dict_get_int32(dict, key, &disperse_count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"disperseCount", "%d", disperse_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.count", prefix);
- ret = dict_get_int32 (dict, key, &frame_count);
+ snprintf(key, sizeof(key), "volume%d.redundancy_count", i);
+ ret = dict_get_int32(dict, key, &redundancy_count);
if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(local->writer,
+ (xmlChar *)"redundancyCount",
+ "%d", redundancy_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.type", i);
+ ret = dict_get_int32(dict, key, &type);
+ if (ret)
+ goto out;
+ /* For Distributed-(stripe,replicate,stipe-replicate,disperse)
+ types
+ */
+ type = get_vol_type(type, dist_count, brick_count);
+
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"type",
+ "%d", type);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"typeStr", "%s", vol_type_str[type]);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.transport", i);
+ ret = dict_get_int32(dict, key, &transport);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"transport", "%d", transport);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ j = 1;
+
+ /* <bricks> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"bricks");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ while (j <= brick_count) {
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"brick");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.brick%d.uuid", i, j);
+ ret = dict_get_str(dict, key, &uuid);
+ if (ret)
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"frameCount",
- "%d", frame_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < frame_count; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.frame%d", prefix, i);
- ret = cli_xml_output_vol_status_callframe (writer, dict,
- key);
- if (ret)
- goto out;
+ ret = xmlTextWriterWriteFormatAttribute(
+ local->writer, (xmlChar *)"uuid", "%s", uuid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.brick%d", i, j);
+ ret = dict_get_str(dict, key, &brick);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatString(local->writer, "%s", brick);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"name", "%s", brick);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"hostUuid", "%s", uuid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "volume%d.brick%d.isArbiter", i, j);
+ if (dict_get(dict, key))
+ isArbiter = 1;
+ else
+ isArbiter = 0;
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"isArbiter", "%d", isArbiter);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </brick> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ j++;
}
+ /* </bricks> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </callStack> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ snprintf(key, sizeof(key), "volume%d", i);
+ ret = cli_xml_output_vol_info_options(local->writer, dict, key);
+ if (ret)
+ goto out;
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ if (volname) {
+ GF_FREE(local->get_vol.volname);
+ local->get_vol.volname = gf_strdup(volname);
+ local->vol_count += count;
+ }
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
int
-cli_xml_output_vol_status_callpool (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index)
+cli_xml_output_vol_info_begin(cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr)
{
- int ret = -1;
- int call_count = 0;
- char key[1024] = {0,};
- int i = 0;
+#if (HAVE_LIB_XML)
+ int ret = -1;
- /* <callpoolStatus> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"callpoolStatus");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ GF_ASSERT(local);
- snprintf (key, sizeof (key), "brick%d.callpool.count", brick_index);
- ret = dict_get_int32 (dict, key, &call_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
- "%d", call_count);
-
- for (i = 0; i < call_count; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "brick%d.callpool.stack%d",
- brick_index, i);
- ret = cli_xml_output_vol_status_callstack (writer, dict,
- key);
- if (ret)
- goto out;
- }
+ ret = cli_begin_xml_output(&(local->writer), &(local->doc));
+ if (ret)
+ goto out;
- /* </callpoolStatus> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = cli_xml_output_common(local->writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volInfo> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"volInfo");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <volumes> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"volumes");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* Init vol count */
+ local->vol_count = 0;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
int
-cli_xml_output_vol_status (dict_t *dict, int op_ret, int op_errno,
- char *op_errstr)
+cli_xml_output_vol_info_end(cli_local_t *local)
{
- int ret = -1;
- xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
- char *volname = NULL;
- int brick_count = 0;
- int brick_index_max = -1;
- int other_count = 0;
- int index_max = 0;
- uint32_t cmd = GF_CLI_STATUS_NONE;
- int online = 0;
- gf_boolean_t node_present = _gf_true;
- int i;
-
- ret = cli_begin_xml_output (&writer, &buf);
- if (ret)
- goto out;
+#if (HAVE_LIB_XML)
+ int ret = -1;
- ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
- if (ret)
- goto out;
+ GF_ASSERT(local);
- /* <volStatus> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"volStatus");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"count",
+ "%d", local->vol_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ /* </volumes> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"volName",
- "%s", volname);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </volInfo> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = dict_get_int32 (dict, "count", &brick_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"nodeCount",
- "%d", brick_count);
+ ret = cli_end_xml_output(local->writer, local->doc);
+
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_quota_limit_list_end(cli_local_t *local)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_end_xml_output(local->writer, local->doc);
+
+out:
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_quota_limit_list_begin(cli_local_t *local, int op_ret,
+ int op_errno, char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+
+ ret = cli_begin_xml_output(&(local->writer), &(local->doc));
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(local->writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volQuota> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"volQuota");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+#if (HAVE_LIB_XML)
+static int
+cli_xml_output_peer_hostnames(xmlTextWriterPtr writer, dict_t *dict,
+ const char *prefix, int count)
+{
+ int ret = -1;
+ int i = 0;
+ char *hostname = NULL;
+ char key[1024] = {
+ 0,
+ };
+
+ /* <hostnames> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"hostnames");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 0; i < count; i++) {
+ ret = snprintf(key, sizeof(key), "%s.hostname%d", prefix, i);
+ if (ret < 0)
+ goto out;
+ ret = dict_get_str(dict, key, &hostname);
if (ret)
- goto out;
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hostname",
+ "%s", hostname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ hostname = NULL;
+ }
+
+ /* </hostnames> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
- ret = dict_get_uint32 (dict, "cmd", &cmd);
+int
+cli_xml_output_peer_status(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ int count = 0;
+ char *uuid = NULL;
+ char *hostname = NULL;
+ int connected = 0;
+ int state_id = 0;
+ char *state_str = NULL;
+ int hostname_count = 0;
+ int i = 1;
+ char key[1024] = {
+ 0,
+ };
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <peerStatus> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"peerStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ if (!dict)
+ goto cont;
+
+ ret = dict_get_int32(dict, "count", &count);
+ if (ret)
+ goto out;
+
+ while (i <= count) {
+ /* <peer> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"peer");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "friend%d.uuid", i);
+ ret = dict_get_str(dict, key, &uuid);
if (ret)
- goto out;
+ goto out;
- ret = dict_get_int32 (dict, "brick-index-max", &brick_index_max);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ uuid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "friend%d.hostname", i);
+ ret = dict_get_str(dict, key, &hostname);
if (ret)
- goto out;
- ret = dict_get_int32 (dict, "other-count", &other_count);
+ goto out;
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hostname",
+ "%s", hostname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "friend%d.hostname_count", i);
+ ret = dict_get_int32(dict, key, &hostname_count);
+ if ((ret == 0) && (hostname_count > 0)) {
+ snprintf(key, sizeof(key), "friend%d", i);
+ ret = cli_xml_output_peer_hostnames(writer, dict, key,
+ hostname_count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ snprintf(key, sizeof(key), "friend%d.connected", i);
+ ret = dict_get_int32(dict, key, &connected);
if (ret)
- goto out;
+ goto out;
- index_max = brick_index_max + other_count;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"connected",
+ "%d", connected);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- for (i = 0; i <= index_max; i++) {
- /* <node> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"node");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ snprintf(key, sizeof(key), "friend%d.stateId", i);
+ ret = dict_get_int32(dict, key, &state_id);
+ if (!ret) {
+ /* ignore */
- ret = cli_xml_output_vol_status_common (writer, dict, i,
- &online, &node_present);
- if (ret) {
- if (node_present)
- goto out;
- else
- continue;
- }
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"state",
+ "%d", state_id);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- switch (cmd & GF_CLI_STATUS_MASK) {
- case GF_CLI_STATUS_DETAIL:
- ret = cli_xml_output_vol_status_detail (writer,
- dict, i);
- if (ret)
- goto out;
- break;
-
- case GF_CLI_STATUS_MEM:
- if (online) {
- ret = cli_xml_output_vol_status_mem
- (writer, dict, i);
- if (ret)
- goto out;
- }
- break;
-
- case GF_CLI_STATUS_CLIENTS:
- if (online) {
- ret = cli_xml_output_vol_status_clients
- (writer, dict, i);
- if (ret)
- goto out;
- }
- break;
-
- case GF_CLI_STATUS_INODE:
- if (online) {
- ret = cli_xml_output_vol_status_inode
- (writer, dict, i);
- if (ret)
- goto out;
- }
- break;
-
- case GF_CLI_STATUS_FD:
- if (online) {
- ret = cli_xml_output_vol_status_fd
- (writer, dict, i);
- if (ret)
- goto out;
- }
- break;
-
- case GF_CLI_STATUS_CALLPOOL:
- if (online) {
- ret = cli_xml_output_vol_status_callpool
- (writer, dict, i);
- if (ret)
- goto out;
- }
- break;
-
- default:
- break;
+ snprintf(key, sizeof(key), "friend%d.state", i);
+ ret = dict_get_str(dict, key, &state_str);
+ if (!ret) {
+ /* ignore */
- }
- /* </node> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"stateStr",
+ "%s", state_str);
+ XML_RET_CHECK_AND_GOTO(ret, out);
}
- /* </volStatus> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </peer> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- ret = cli_end_xml_output (writer, buf);
- if (ret)
- goto out;
+ i++;
+ }
+
+cont:
+ /* </peerStatus> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_end_xml_output(writer, doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
+#if (HAVE_LIB_XML)
+/* Used for rebalance stop/status, remove-brick status */
int
-cli_xml_output_vol_top_rw_perf (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index, int member_index)
+cli_xml_output_vol_rebalance_status(xmlTextWriterPtr writer, dict_t *dict,
+ enum gf_task_types task_type)
{
- int ret = -1;
- char *filename = NULL;
- uint64_t throughput = 0;
- long int time_sec = 0;
- long int time_usec = 0;
- char timestr[256] = {0,};
- char key[1024] = {0,};
+ int ret = -1;
+ int count = 0;
+ char *node_name = NULL;
+ char *node_uuid = NULL;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookups = 0;
+ int status_rcd = 0;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ uint64_t total_files = 0;
+ uint64_t total_size = 0;
+ uint64_t total_lookups = 0;
+ uint64_t total_failures = 0;
+ uint64_t total_skipped = 0;
+ char key[1024] = {
+ 0,
+ };
+ int i = 0;
+ int overall_status = -1;
+ double elapsed = 0;
+ double overall_elapsed = 0;
+
+ if (!dict) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, "count", &count);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"nodeCount", "%d",
+ count);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ while (i < count) {
+ i++;
+ /* Getting status early, to skip nodes that don't have the
+ * rebalance process started
+ */
+ snprintf(key, sizeof(key), "status-%d", i);
+ ret = dict_get_int32(dict, key, &status_rcd);
+
+ /* If glusterd is down it fails to get the status, try
+ getting status from other nodes */
+ if (ret)
+ continue;
+ if (GF_DEFRAG_STATUS_NOT_STARTED == status_rcd)
+ continue;
- /* <file> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"file");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* <node> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"node");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf (key, sizeof (key), "%d-filename-%d", brick_index,
- member_index);
- ret = dict_get_str (dict, key, &filename);
+ snprintf(key, sizeof(key), "node-name-%d", i);
+ ret = dict_get_str(dict, key, &node_name);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"filename",
- "%s", filename);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"nodeName",
+ "%s", node_name);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-value-%d", brick_index, member_index);
- ret = dict_get_uint64 (dict, key, &throughput);
+ snprintf(key, sizeof(key), "node-uuid-%d", i);
+ ret = dict_get_str(dict, key, &node_uuid);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
- "%"PRIu64, throughput);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"id", "%s",
+ node_uuid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-time-sec-%d", brick_index,
- member_index);
- ret = dict_get_int32 (dict, key, (int32_t *)&time_sec);
+ snprintf(key, sizeof(key), "files-%d", i);
+ ret = dict_get_uint64(dict, key, &files);
if (ret)
- goto out;
+ goto out;
+ total_files += files;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"files",
+ "%" PRIu64, files);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "size-%d", i);
+ ret = dict_get_uint64(dict, key, &size);
+ if (ret)
+ goto out;
+ total_size += size;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"size",
+ "%" PRIu64, size);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "lookups-%d", i);
+ ret = dict_get_uint64(dict, key, &lookups);
+ if (ret)
+ goto out;
+ total_lookups += lookups;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"lookups",
+ "%" PRIu64, lookups);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "failures-%d", i);
+ ret = dict_get_uint64(dict, key, &failures);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "skipped-%d", i);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-time-usec-%d", brick_index,
- member_index);
- ret = dict_get_int32 (dict, key, (int32_t *)&time_usec);
+ ret = dict_get_uint64(dict, key, &skipped);
if (ret)
- goto out;
+ goto out;
+
+ if (task_type == GF_TASK_TYPE_REMOVE_BRICK) {
+ failures += skipped;
+ skipped = 0;
+ }
+
+ total_failures += failures;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"failures",
+ "%" PRIu64, failures);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ total_skipped += skipped;
- gf_time_fmt (timestr, sizeof timestr, time_sec, gf_timefmt_FT);
- snprintf (timestr + strlen (timestr),
- sizeof timestr - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, time_usec);
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"time",
- "%s", timestr);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"skipped",
+ "%" PRIu64, skipped);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </file> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"status", "%d",
+ status_rcd);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"statusStr", "%s",
+ cli_vol_task_status_str[status_rcd]);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "run-time-%d", i);
+ ret = dict_get_double(dict, key, &elapsed);
+ if (ret)
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"runtime",
+ "%.2f", elapsed);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ if (elapsed > overall_elapsed) {
+ overall_elapsed = elapsed;
+ }
+
+ /* Rebalance has 5 states,
+ * NOT_STARTED, STARTED, STOPPED, COMPLETE, FAILED
+ * The precedence used to determine the aggregate status is as
+ * below,
+ * STARTED > FAILED > STOPPED > COMPLETE > NOT_STARTED
+ */
+ /* TODO: Move this to a common place utilities that both CLI and
+ * glusterd need.
+ * Till then if the below algorithm is changed, change it in
+ * glusterd_volume_status_aggregate_tasks_status in
+ * glusterd-utils.c
+ */
+
+ if (-1 == overall_status)
+ overall_status = status_rcd;
+ int rank[] = {[GF_DEFRAG_STATUS_STARTED] = 1,
+ [GF_DEFRAG_STATUS_FAILED] = 2,
+ [GF_DEFRAG_STATUS_STOPPED] = 3,
+ [GF_DEFRAG_STATUS_COMPLETE] = 4,
+ [GF_DEFRAG_STATUS_NOT_STARTED] = 5};
+ if (rank[status_rcd] <= rank[overall_status])
+ overall_status = status_rcd;
+
+ /* </node> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* Aggregate status */
+ /* <aggregate> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"aggregate");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"files",
+ "%" PRIu64, total_files);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"size", "%" PRIu64,
+ total_size);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"lookups",
+ "%" PRIu64, total_lookups);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"failures",
+ "%" PRIu64, total_failures);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"skipped",
+ "%" PRIu64, total_skipped);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ if (overall_status == -1) {
+ overall_status = status_rcd;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"status", "%d",
+ overall_status);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"statusStr", "%s",
+ cli_vol_task_status_str[overall_status]);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"runtime", "%.2f",
+ overall_elapsed);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </aggregate> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
+#endif
int
-cli_xml_output_vol_top_other (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index, int member_index)
+cli_xml_output_vol_rebalance(gf_cli_defrag_type op, dict_t *dict, int op_ret,
+ int op_errno, char *op_errstr)
{
- int ret = -1;
- char *filename = NULL;
- uint64_t count = 0;
- char key[1024] = {0,};
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *task_id_str = NULL;
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <volRebalance> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volRebalance");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, GF_REBALANCE_TID_KEY, &task_id_str);
+ if (ret == 0) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"task-id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"op", "%d", op);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ if ((GF_DEFRAG_CMD_STOP == op) || (GF_DEFRAG_CMD_STATUS == op)) {
+ ret = cli_xml_output_vol_rebalance_status(writer, dict,
+ GF_TASK_TYPE_REBALANCE);
+ if (ret)
+ goto out;
+ }
- /* <file> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"file");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </volRebalance> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf (key, sizeof (key), "%d-filename-%d", brick_index,
- member_index);
- ret = dict_get_str (dict, key, &filename);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"filename",
- "%s", filename);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = cli_end_xml_output(writer, doc);
+
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-value-%d", brick_index, member_index);
- ret = dict_get_uint64 (dict, key, &count);
+int
+cli_xml_output_vol_remove_brick(gf_boolean_t status_op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr,
+ const char *op)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *task_id_str = NULL;
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)op);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, GF_REMOVE_BRICK_TID_KEY, &task_id_str);
+ if (ret == 0) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"task-id",
+ "%s", task_id_str);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ if (status_op) {
+ ret = cli_xml_output_vol_rebalance_status(writer, dict,
+ GF_TASK_TYPE_REMOVE_BRICK);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
- "%"PRIu64, count);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ }
+
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </file> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = cli_end_xml_output(writer, doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
int
-cli_xml_output_vol_top (dict_t *dict, int op_ret, int op_errno,
- char *op_errstr)
+cli_xml_output_vol_replace_brick(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ ret = cli_end_xml_output(writer, doc);
+
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_vol_create(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
{
- int ret = -1;
- xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
- int brick_count = 0;
- int top_op = GF_CLI_TOP_NONE;
- char *brick_name = NULL;
- int members = 0;
- uint64_t current_open = 0;
- uint64_t max_open = 0;
- char *max_open_time = NULL;
- double throughput = 0.0;
- double time_taken = 0.0;
- char key[1024] = {0,};
- int i = 0;
- int j = 0;
-
- ret = cli_begin_xml_output (&writer, &buf);
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *volname = NULL;
+ char *volid = NULL;
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ if (dict) {
+ /* <volCreate> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volCreate");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "volname", &volname);
if (ret)
- goto out;
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ volname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
+ ret = dict_get_str(dict, "volume-id", &volid);
if (ret)
- goto out;
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"id", "%s",
+ volid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </volCreate> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ ret = cli_end_xml_output(writer, doc);
+
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_generic_volume(char *op, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *volname = NULL;
+ char *volid = NULL;
+
+ GF_ASSERT(op);
- /* <volTop> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"volTop");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
- ret = dict_get_int32 (dict, "count", &brick_count);
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ if (dict) {
+ /* <"op"> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)op);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "volname", &volname);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"brickCount",
- "%d", brick_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ volname);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = dict_get_int32 (dict, "1-top-op", &top_op);
+ ret = dict_get_str(dict, "vol-id", &volid);
if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"topOp",
- "%d", top_op);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"id", "%s",
+ volid);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- while (i < brick_count) {
- i++;
+ /* </volume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* <brick> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"brick");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </"op"> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-brick", i);
- ret = dict_get_str (dict, key, &brick_name);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"name",
- "%s", brick_name);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key , sizeof (key), "%d-members", i);
- ret = dict_get_int32 (dict, key, &members);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"members",
- "%d", members);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- switch (top_op) {
- case GF_CLI_TOP_OPEN:
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-current-open", i);
- ret = dict_get_uint64 (dict, key, &current_open);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"currentOpen", "%"PRIu64,
- current_open);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-max-open", i);
- ret = dict_get_uint64 (dict, key, &max_open);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"maxOpen", "%"PRIu64,
- max_open);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-max-openfd-time", i);
- ret = dict_get_str (dict, key, &max_open_time);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"maxOpenTime", "%s",
- max_open_time);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- case GF_CLI_TOP_READ:
- case GF_CLI_TOP_WRITE:
- case GF_CLI_TOP_OPENDIR:
- case GF_CLI_TOP_READDIR:
- if (!members)
- continue;
-
- break;
-
- case GF_CLI_TOP_READ_PERF:
- case GF_CLI_TOP_WRITE_PERF:
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-throughput", i);
- ret = dict_get_double (dict, key, &throughput);
- if (!ret) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-time", i);
- ret = dict_get_double (dict, key, &time_taken);
- }
-
- if (!ret) {
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"throughput",
- "%f", throughput);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"timeTaken",
- "%f", time_taken);
- }
-
- if (!members)
- continue;
-
- break;
-
- default:
- ret = -1;
- goto out;
- }
+ ret = cli_end_xml_output(writer, doc);
- for (j = 1; j <= members; j++) {
- if (top_op == GF_CLI_TOP_READ_PERF ||
- top_op == GF_CLI_TOP_WRITE_PERF) {
- ret = cli_xml_output_vol_top_rw_perf
- (writer, dict, i, j);
- } else {
- ret = cli_xml_output_vol_top_other
- (writer, dict, i, j);
- }
- if (ret)
- goto out;
- }
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+#if (HAVE_LIB_XML)
+int
+_output_gsync_config(FILE *fp, xmlTextWriterPtr writer, char *op_name)
+{
+ char resbuf[256 + PATH_MAX] = {
+ 0,
+ };
+ char *ptr = NULL;
+ char *v = NULL;
+ int blen = sizeof(resbuf);
+ int ret = 0;
+
+ for (;;) {
+ ptr = fgets(resbuf, blen, fp);
+ if (!ptr)
+ break;
+
+ v = resbuf + strlen(resbuf) - 1;
+ while (isspace(*v)) {
+ /* strip trailing space */
+ *v-- = '\0';
+ }
+ if (v == resbuf) {
+ /* skip empty line */
+ continue;
+ }
- /* </brick> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ if (op_name != NULL) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)op_name,
+ "%s", resbuf);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ goto out;
}
- /* </volTop> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
- ret = cli_end_xml_output (writer, buf);
+ v = strchr(resbuf, ':');
+ if (!v) {
+ ret = -1;
+ goto out;
+ }
+ *v++ = '\0';
+ while (isspace(*v))
+ v++;
+ v = gf_strdup(v);
+ if (!v) {
+ ret = -1;
+ goto out;
+ }
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)resbuf, "%s",
+ v);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
+#endif
+#if (HAVE_LIB_XML)
int
-cli_xml_output_vol_profile_stats (xmlTextWriterPtr writer, dict_t *dict,
- int brick_index, int interval)
-{
- int ret = -1;
- uint64_t read_count = 0;
- uint64_t write_count = 0;
- uint64_t hits = 0;
- double avg_latency = 0.0;
- double max_latency = 0.0;
- double min_latency = 0.0;
- uint64_t duration = 0;
- uint64_t total_read = 0;
- uint64_t total_write = 0;
- char key[1024] = {0};
- int i = 0;
-
- /* <cumulativeStats> || <intervalStats> */
- if (interval == -1)
- ret = xmlTextWriterStartElement (writer,
- (xmlChar *)"cumulativeStats");
- else
- ret = xmlTextWriterStartElement (writer,
- (xmlChar *)"intervalStats");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- /* <blockStats> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"blockStats");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < 32; i++) {
- /* <block> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"block");
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"size", "%"PRIu32, (1 << i));
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-read-%d", brick_index,
- interval, (1 << i));
- ret = dict_get_uint64 (dict, key, &read_count);
- if (ret)
- read_count = 0;
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"reads", "%"PRIu64, read_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-write-%d", brick_index,
- interval, (1 << i));
- ret = dict_get_uint64 (dict, key, &write_count);
- if (ret)
- write_count = 0;
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"writes", "%"PRIu64, write_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- /* </block> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
- }
+get_gsync_config(runner_t *runner,
+ int (*op_conf)(FILE *fp, xmlTextWriterPtr writer,
+ char *op_name),
+ xmlTextWriterPtr writer, char *op_name)
+{
+ int ret = 0;
- /* </blockStats> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ runner_redir(runner, STDOUT_FILENO, RUN_PIPE);
+ if (runner_start(runner) != 0) {
+ gf_log("cli", GF_LOG_ERROR, "spawning child failed");
+ return -1;
+ }
- /* <fopStats> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"fopStats");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = op_conf(runner_chio(runner, STDOUT_FILENO), writer, op_name);
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-%d-hits", brick_index,
- interval, i);
- ret = dict_get_uint64 (dict, key, &hits);
- if (ret)
- goto cont;
+ ret |= runner_end(runner);
+ if (ret)
+ gf_log("cli", GF_LOG_ERROR, "reading data from child failed");
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-%d-avglatency", brick_index,
- interval, i);
- ret = dict_get_double (dict, key, &avg_latency);
- if (ret)
- goto cont;
+ return ret ? -1 : 0;
+}
+#endif
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-%d-minlatency", brick_index,
- interval, i);
- ret = dict_get_double (dict, key, &min_latency);
- if (ret)
- goto cont;
+#if (HAVE_LIB_XML)
+int
+cli_xml_generate_gsync_config(dict_t *dict, xmlTextWriterPtr writer)
+{
+ runner_t runner = {
+ 0,
+ };
+ char *subop = NULL;
+ char *gwd = NULL;
+ char *slave = NULL;
+ char *confpath = NULL;
+ char *master = NULL;
+ char *op_name = NULL;
+ int ret = -1;
+ char conf_path[PATH_MAX] = "";
+
+ if (dict_get_str(dict, "subop", &subop) != 0) {
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(subop, "get") != 0 && strcmp(subop, "get-all") != 0) {
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"message", "%s",
+ GEOREP " config updated successfully");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ ret = 0;
+ goto out;
+ }
+
+ if (dict_get_str(dict, "glusterd_workdir", &gwd) != 0 ||
+ dict_get_str(dict, "slave", &slave) != 0) {
+ ret = -1;
+ goto out;
+ }
+
+ if (dict_get_str(dict, "master", &master) != 0)
+ master = NULL;
+
+ if (dict_get_str(dict, "op_name", &op_name) != 0)
+ op_name = NULL;
+
+ ret = dict_get_str(dict, "conf_path", &confpath);
+ if (!confpath) {
+ ret = snprintf(conf_path, sizeof(conf_path) - 1,
+ "%s/" GEOREP "/gsyncd_template.conf", gwd);
+ conf_path[ret] = '\0';
+ confpath = conf_path;
+ }
+
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
+ runner_argprintf(&runner, "%s", confpath);
+ runner_argprintf(&runner, "--iprefix=%s", DATADIR);
+
+ if (master)
+ runner_argprintf(&runner, ":%s", master);
+
+ runner_add_arg(&runner, slave);
+ runner_argprintf(&runner, "--config-%s", subop);
+
+ if (op_name)
+ runner_add_arg(&runner, op_name);
+
+ ret = get_gsync_config(&runner, _output_gsync_config, writer, op_name);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-%d-maxlatency", brick_index,
- interval, i);
- ret = dict_get_double (dict, key, &max_latency);
- if (ret)
- goto cont;
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
+#endif
- /* <fop> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"fop");
- XML_RET_CHECK_AND_GOTO (ret, out);
+#if (HAVE_LIB_XML)
+int
+cli_xml_output_vol_gsync_status(dict_t *dict, xmlTextWriterPtr writer)
+{
+ int ret = -1;
+ int i = 1;
+ int j = 0;
+ int count = 0;
+ const int number_of_fields = 20;
+ int closed = 1;
+ int session_closed = 1;
+ gf_gsync_status_t **status_values = NULL;
+ char status_value_name[PATH_MAX] = "";
+ char *tmp = NULL;
+ char *volume = NULL;
+ char *volume_next = NULL;
+ char *slave = NULL;
+ char *slave_next = NULL;
+ static const char *title_values[] = {
+ "master_node", "", "master_brick", "slave_user", "slave", "slave_node",
+ "status", "crawl_status",
+ /* last_synced */
+ "", "entry", "data", "meta", "failures",
+ /* checkpoint_time */
+ "", "checkpoint_completed",
+ /* checkpoint_completion_time */
+ "", "master_node_uuid",
+ /* last_synced_utc */
+ "last_synced",
+ /* checkpoint_time_utc */
+ "checkpoint_time",
+ /* checkpoint_completion_time_utc */
+ "checkpoint_completion_time"};
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_int32(dict, "gsync-count", &count);
+ if (ret)
+ goto out;
+
+ status_values = GF_MALLOC(count * sizeof(gf_gsync_status_t *),
+ gf_common_mt_char);
+ if (!status_values) {
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < count; i++) {
+ status_values[i] = GF_CALLOC(1, sizeof(gf_gsync_status_t),
+ gf_common_mt_char);
+ if (!status_values[i]) {
+ ret = -1;
+ goto out;
+ }
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"name","%s", gf_fop_list[i]);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ snprintf(status_value_name, sizeof(status_value_name), "status_value%d",
+ i);
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"hits", "%"PRIu64, hits);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = dict_get_bin(dict, status_value_name,
+ (void **)&(status_values[i]));
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "struct member empty.");
+ goto out;
+ }
+ }
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"avgLatency", "%f", avg_latency);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ qsort(status_values, count, sizeof(gf_gsync_status_t *),
+ gf_gsync_status_t_comparator);
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"minLatency", "%f", min_latency);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ for (i = 0; i < count; i++) {
+ if (closed) {
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"maxLatency", "%f", max_latency);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ tmp = get_struct_variable(1, status_values[i]);
+ if (!tmp) {
+ gf_log("cli", GF_LOG_ERROR, "struct member empty.");
+ ret = -1;
+ goto out;
+ }
- /* </fop> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name",
+ "%s", tmp);
+ XML_RET_CHECK_AND_GOTO(ret, out);
-cont:
- hits = 0;
- avg_latency = 0.0;
- min_latency = 0.0;
- max_latency = 0.0;
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"sessions");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ closed = 0;
}
- /* </fopStats> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ if (session_closed) {
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"session");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-duration", brick_index, interval);
- ret = dict_get_uint64 (dict, key, &duration);
- if (ret)
+ session_closed = 0;
+
+ tmp = get_struct_variable(21, status_values[i]);
+ if (!tmp) {
+ gf_log("cli", GF_LOG_ERROR, "struct member empty.");
+ ret = -1;
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"duration",
- "%"PRIu64, duration);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-total-read", brick_index, interval);
- ret = dict_get_uint64 (dict, key, &total_read);
- if (ret)
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"session_slave", "%s", tmp);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"pair");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (j = 0; j < number_of_fields; j++) {
+ /* XML ignore fields */
+ if (strcmp(title_values[j], "") == 0)
+ continue;
+
+ tmp = get_struct_variable(j, status_values[i]);
+ if (!tmp) {
+ gf_log("cli", GF_LOG_ERROR, "struct member empty.");
+ ret = -1;
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"totalRead",
- "%"PRIu64, total_read);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-%d-total-write", brick_index, interval);
- ret = dict_get_uint64 (dict, key, &total_write);
- if (ret)
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)title_values[j], "%s", tmp);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ if (i + 1 < count) {
+ slave = get_struct_variable(20, status_values[i]);
+ slave_next = get_struct_variable(20, status_values[i + 1]);
+ volume = get_struct_variable(1, status_values[i]);
+ volume_next = get_struct_variable(1, status_values[i + 1]);
+ if (!slave || !slave_next || !volume || !volume_next) {
+ gf_log("cli", GF_LOG_ERROR, "struct member empty.");
+ ret = -1;
goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"totalWrite",
- "%"PRIu64, total_write);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ }
+
+ if (strcmp(volume, volume_next) != 0) {
+ closed = 1;
+ session_closed = 1;
+
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </cumulativeStats> || </intervalStats> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ } else if (strcmp(slave, slave_next) != 0) {
+ session_closed = 1;
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ } else {
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ }
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (status_values)
+ GF_FREE(status_values);
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
+#endif
int
-cli_xml_output_vol_profile (dict_t *dict, int op_ret, int op_errno,
- char *op_errstr)
-{
- int ret = -1;
- xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
- char *volname = NULL;
- int op = GF_CLI_STATS_NONE;
- int brick_count = 0;
- char *brick_name = NULL;
- int interval = 0;
- char key[1024] = {0,};
- int i = 0;
-
- ret = cli_begin_xml_output (&writer, &buf);
- if (ret)
- goto out;
+cli_xml_output_vol_gsync(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *master = NULL;
+ char *slave = NULL;
+ int type = 0;
+
+ GF_ASSERT(dict);
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ /* <geoRep> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"geoRep");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "type", &type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get type");
+ goto out;
+ }
+
+ switch (type) {
+ case GF_GSYNC_OPTION_TYPE_START:
+ case GF_GSYNC_OPTION_TYPE_STOP:
+ case GF_GSYNC_OPTION_TYPE_PAUSE:
+ case GF_GSYNC_OPTION_TYPE_RESUME:
+ case GF_GSYNC_OPTION_TYPE_CREATE:
+ case GF_GSYNC_OPTION_TYPE_DELETE:
+ if (dict_get_str(dict, "master", &master) != 0)
+ master = "???";
+ if (dict_get_str(dict, "slave", &slave) != 0)
+ slave = "???";
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"master",
+ "%s", master);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"slave",
+ "%s", slave);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_CONFIG:
+ if (op_ret == 0) {
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"config");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_xml_generate_gsync_config(dict, writer);
+ if (ret)
+ goto out;
- ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
- if (ret)
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ break;
+ case GF_GSYNC_OPTION_TYPE_STATUS:
+ ret = cli_xml_output_vol_gsync_status(dict, writer);
+ if (ret) {
+ gf_log("cli", GF_LOG_DEBUG, "Failed to get gsync status");
goto out;
+ }
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ /* </geoRep> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_end_xml_output(writer, doc);
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
- /* <volProfile> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"volProfile");
- XML_RET_CHECK_AND_GOTO (ret, out);
+#if (HAVE_LIB_XML)
+/* This function will generate snapshot create output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing create output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_create(xmlTextWriterPtr writer, xmlDocPtr doc, dict_t *dict)
+{
+ int ret = -1;
+ char *str_value = NULL;
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* <snapCreate> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapCreate");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <snapshot> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapshot");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snapname", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snapuuid", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </snapshot> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </snapCreate> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
+out:
+ return ret;
+}
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"volname",
- "%s", volname);
- XML_RET_CHECK_AND_GOTO (ret, out);
+/* This function will generate snapshot clone output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing create output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_clone(xmlTextWriterPtr writer, xmlDocPtr doc, dict_t *dict)
+{
+ int ret = -1;
+ char *str_value = NULL;
+
+ GF_VALIDATE_OR_GOTO("cli", writer, out);
+ GF_VALIDATE_OR_GOTO("cli", doc, out);
+ GF_VALIDATE_OR_GOTO("cli", dict, out);
+
+ /* <CloneCreate> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"CloneCreate");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "clonename", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get clone name");
+ goto out;
+ }
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snapuuid", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get clone uuid");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </CloneCreate> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
+out:
+ return ret;
+}
- ret = dict_get_int32 (dict, "op", &op);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"profileOp",
- "%d", op);
- XML_RET_CHECK_AND_GOTO (ret, out);
+/* This function will generate snapshot restore output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing restore output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_restore(xmlTextWriterPtr writer, xmlDocPtr doc, dict_t *dict)
+{
+ int ret = -1;
+ char *str_value = NULL;
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* <snapRestore> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapRestore");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <volume> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "volname", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get vol name");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "volid", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get volume id");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <snapshot> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapshot");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snapname", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snapuuid", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </snapshot> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </snapRestore> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
+out:
- if (op != GF_CLI_STATS_INFO)
- goto cont;
+ return ret;
+}
- ret = dict_get_int32 (dict, "count", &brick_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"brickCount",
- "%d", brick_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
+/* This function will generate snapshot list output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing list output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_list(xmlTextWriterPtr writer, xmlDocPtr doc, dict_t *dict)
+{
+ int ret = -1;
+ int i = 0;
+ int snapcount = 0;
+ char *str_value = NULL;
+ char key[PATH_MAX] = "";
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* <snapList> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapList");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "snapcount", &snapcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snapcount");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count", "%d",
+ snapcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 1; i <= snapcount; ++i) {
+ ret = snprintf(key, sizeof(key), "snapname%d", i);
+ if (ret < 0) {
+ goto out;
+ }
- while (i < brick_count) {
- i++;
+ ret = dict_get_str(dict, key, &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get %s ", key);
+ goto out;
+ } else {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"snapshot",
+ "%s", str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ }
- /* <brick> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"brick");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </snapList> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- snprintf (key, sizeof (key), "%d-brick", i);
- ret = dict_get_str (dict, key, &brick_name);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"brickName", "%s", brick_name);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- snprintf (key, sizeof (key), "%d-cumulative", i);
- ret = dict_get_int32 (dict, key, &interval);
- if (ret == 0) {
- ret = cli_xml_output_vol_profile_stats
- (writer, dict, i, interval);
- if (ret)
- goto out;
- }
+ ret = 0;
+out:
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-interval", i);
- ret = dict_get_int32 (dict, key, &interval);
- if (ret == 0) {
- ret = cli_xml_output_vol_profile_stats
- (writer, dict, i, interval);
- if (ret)
- goto out;
- }
+ return ret;
+}
- /* </brick> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+/* This function will generate xml output for origin volume
+ * of the given snapshot.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing info output
+ * @param keyprefix prefix for dictionary key
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_info_orig_vol(xmlTextWriterPtr writer, xmlDocPtr doc,
+ dict_t *dict, char *keyprefix)
+{
+ int ret = -1;
+ int value = 0;
+ char *buffer = NULL;
+ char key[PATH_MAX] = "";
+
+ GF_ASSERT(dict);
+ GF_ASSERT(keyprefix);
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+
+ /* <originVolume> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"originVolume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%sorigin-volname", keyprefix);
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_WARNING, "Failed to get %s", key);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%ssnapcount", keyprefix);
+
+ ret = dict_get_int32(dict, key, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"snapCount", "%d",
+ value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%ssnaps-available", keyprefix);
+
+ ret = dict_get_int32(dict, key, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"snapRemaining",
+ "%d", value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </originVolume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function will generate xml output of snapshot volume info.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing info output
+ * @param keyprefix key prefix for dictionary
+ * @param snap_driven boolean to check if output is based of volume
+ * or snapshot
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_info_snap_vol(xmlTextWriterPtr writer, xmlDocPtr doc,
+ dict_t *dict, char *keyprefix,
+ gf_boolean_t snap_driven)
+{
+ char key[PATH_MAX] = "";
+ char *buffer = NULL;
+ int ret = -1;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(keyprefix);
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+
+ /* <snapVolume> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapVolume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key, sizeof(key), "%s.volname", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key, sizeof(key), "%s.vol-status", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get %s", key);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"status", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* If the command is snap_driven then we need to show origin volume
+ * info. Else this is shown in the start of info display.*/
+ if (snap_driven) {
+ ret = snprintf(key, sizeof(key), "%s.", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = cli_xml_snapshot_info_orig_vol(writer, doc, dict, key);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot's origin volume");
+ goto out;
}
+ }
-cont:
- /* </volProfile> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </snapVolume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = cli_end_xml_output (writer, buf);
+ ret = 0;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
+}
+/* This function will generate snapshot info of individual snapshot
+ * in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing info output
+ * @param keyprefix key prefix for dictionary
+ * @param snap_driven boolean to check if output is based of volume
+ * or snapshot
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_info_per_snap(xmlTextWriterPtr writer, xmlDocPtr doc,
+ dict_t *dict, char *keyprefix,
+ gf_boolean_t snap_driven)
+{
+ char key_buffer[PATH_MAX] = "";
+ char *buffer = NULL;
+ int volcount = 0;
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(keyprefix);
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+
+ /* <snapshot> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapshot");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snapname", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key_buffer, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to fetch snapname %s ", key_buffer);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snap-id", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key_buffer, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to fetch snap-id %s ", key_buffer);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snap-desc", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key_buffer, &buffer);
+ if (!ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"description",
+ "%s", buffer);
+ } else {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"description",
+ "%s", "");
+ }
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.snap-time", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key_buffer, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to fetch snap-time %s ", keyprefix);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"createTime", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.vol-count", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_int32(dict, key_buffer, &volcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Fail to get snap vol count");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"volCount", "%d",
+ volcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, key_buffer, &volcount);
+ /* Display info of each snapshot volume */
+ for (i = 1; i <= volcount; i++) {
+ ret = snprintf(key_buffer, sizeof(key_buffer), "%s.vol%d", keyprefix,
+ i);
+ if (ret < 0)
+ goto out;
+
+ ret = cli_xml_snapshot_info_snap_vol(writer, doc, dict, key_buffer,
+ snap_driven);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not list "
+ "details of volume in a snap");
+ goto out;
+ }
+ }
+
+ /* </snapshot> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+out:
+ return ret;
}
-int
-cli_xml_output_vol_list (dict_t *dict, int op_ret, int op_errno,
- char *op_errstr)
+/* This function will generate snapshot info output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing info output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_info(xmlTextWriterPtr writer, xmlDocPtr doc, dict_t *dict)
{
- int ret = -1;
- xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
- int count = 0;
- char *volname = NULL;
- char key[1024] = {0,};
- int i = 0;
+ int ret = -1;
+ int i = 0;
+ int snapcount = 0;
+ char key[PATH_MAX] = "";
+ gf_boolean_t snap_driven = _gf_false;
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* <snapInfo> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapInfo");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snap_driven = dict_get_str_boolean(dict, "snap-driven", _gf_false);
+
+ /* If the approach is volume based then we should display origin volume
+ * information first followed by per snap info*/
+ if (!snap_driven) {
+ ret = cli_xml_snapshot_info_orig_vol(writer, doc, dict, "");
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot's origin volume");
+ goto out;
+ }
+ }
- ret = cli_begin_xml_output (&writer, &buf);
- if (ret)
- goto out;
+ ret = dict_get_int32(dict, "snapcount", &snapcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snapcount");
+ goto out;
+ }
- ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
- if (ret)
- goto out;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count", "%d",
+ snapcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* <volList> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"volList");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* <snapshots> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapshots");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = dict_get_int32 (dict, "count", &count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"count",
- "%d", count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- for (i = 0; i < count; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d", i);
- ret = dict_get_str (dict, key, &volname);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer,
- (xmlChar *)"volume",
- "%s", volname);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* Get snapshot info of individual snapshots */
+ for (i = 1; i <= snapcount; ++i) {
+ snprintf(key, sizeof(key), "snap%d", i);
+
+ ret = cli_xml_snapshot_info_per_snap(writer, doc, dict, key,
+ snap_driven);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get %s ", key);
+ goto out;
}
+ }
+
+ /* </snapshots> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </volList> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </snapInfo> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = cli_end_xml_output (writer, buf);
+ ret = 0;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+
+ return ret;
}
-int
-cli_xml_output_vol_info_option (xmlTextWriterPtr writer, char *substr,
- char *optstr, char *valstr)
-{
- int ret = -1;
- char *ptr1 = NULL;
- char *ptr2 = NULL;
-
- ptr1 = substr;
- ptr2 = optstr;
-
- while (ptr1) {
- if (*ptr1 != *ptr2)
- break;
- ptr1++;
- ptr2++;
- if (!ptr1)
- goto out;
- if (!ptr2)
- goto out;
+/* This function will generate snapshot status of individual
+ * snapshot volume in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing status output
+ * @param keyprefix key prefix for dictionary
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_volume_status(xmlTextWriterPtr writer, xmlDocPtr doc,
+ dict_t *dict, const char *keyprefix)
+{
+ int ret = -1;
+ int brickcount = 0;
+ int i = 0;
+ int pid = 0;
+ char *buffer = NULL;
+ char key[PATH_MAX] = "";
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+ GF_ASSERT(keyprefix);
+
+ ret = snprintf(key, sizeof(key), "%s.brickcount", keyprefix);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_int32(dict, key, &brickcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to fetch brickcount");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"brickCount", "%d",
+ brickcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* Get status of every brick belonging to the snapshot volume */
+ for (i = 0; i < brickcount; i++) {
+ /* <snapInfo> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"brick");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key, sizeof(key), "%s.brick%d.path", keyprefix, i);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get Brick Path");
+ /*
+ * If path itself is not present, then end *
+ * this brick's status and continue to the *
+ * brick *
+ */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ continue;
}
- if (*ptr2 == '\0')
- goto out;
- /* <option> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"option");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"path", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key, sizeof(key), "%s.brick%d.vgname", keyprefix, i);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get Volume Group");
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"volumeGroup", "N/A");
+ } else
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"volumeGroup", "%s", buffer);
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key, sizeof(key), "%s.brick%d.status", keyprefix, i);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Unable to get Brick Running");
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"brick_running", "N/A");
+ } else
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"brick_running", "%s", buffer);
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key, sizeof(key), "%s.brick%d.pid", keyprefix, i);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_int32(dict, key, &pid);
+ if (ret) {
+ gf_log("cli", GF_LOG_INFO, "Unable to get pid");
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"pid",
+ "N/A");
+ } else
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"pid",
+ "%d", pid);
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key, sizeof(key), "%s.brick%d.data", keyprefix, i);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get Data Percent");
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"data_percentage", "N/A");
+ } else
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"data_percentage", "%s", buffer);
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = snprintf(key, sizeof(key), "%s.brick%d.lvsize", keyprefix, i);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get LV Size");
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"lvSize",
+ "N/A");
+ } else {
+ /* Truncate any newline character */
+ buffer = strtok(buffer, "\n");
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"lvSize",
+ "%s", buffer);
+ }
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"name",
- "%s", ptr2);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"value",
- "%s", valstr);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </brick> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
- /* </option> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
}
-int
-cli_xml_output_vol_info_options (xmlTextWriterPtr writer, dict_t *dict,
- char *prefix)
+/* This function will generate snapshot status of individual
+ * snapshot in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing status output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_status_per_snap(xmlTextWriterPtr writer, xmlDocPtr doc,
+ dict_t *dict, const char *keyprefix)
{
- int ret = -1;
- int opt_count = 0;
- data_pair_t *pairs = 0;
- data_t *value = 0;
- char *ptr = NULL;
- char key[1024] = {0,};
- int i = 0;
+ int ret = -1;
+ int volcount = 0;
+ int i = 0;
+ char *buffer = NULL;
+ char key[PATH_MAX] = "";
- pairs = dict->members_list;
- if (!pairs) {
- ret = -1;
- goto out;
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+ GF_ASSERT(keyprefix);
+
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapshot");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.snapname", keyprefix);
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get snapname");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.uuid", keyprefix);
+
+ ret = dict_get_str(dict, key, &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get snap UUID");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.volcount", keyprefix);
+
+ ret = dict_get_int32(dict, key, &volcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Unable to get volume count");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"volCount", "%d",
+ volcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* Get snapshot status of individual snapshot volume */
+ for (i = 0; i < volcount; i++) {
+ /* <volume> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(key, sizeof(key), "%s.vol%d", keyprefix, i);
+
+ ret = cli_xml_snapshot_volume_status(writer, doc, dict, key);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get snap volume status");
+ goto out;
}
- snprintf (key, sizeof (key), "%s.opt_count", prefix);
- ret = dict_get_int32 (dict, key, &opt_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (writer, (xmlChar *)"optCount",
- "%d", opt_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- /* <options> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"options");
- XML_RET_CHECK_AND_GOTO (ret, out);
- while (i < opt_count) {
- snprintf (key, sizeof (key), "%s.option.", prefix);
- while (pairs) {
- ptr = strstr (pairs->key, "option.");
- if (ptr) {
- value = pairs->value;
- if (!value) {
- ret = -1;
- goto out;
- }
- ret = cli_xml_output_vol_info_option
- (writer, key, pairs->key, value->data);
- if (ret)
- goto out;
- }
- pairs = pairs->next;
- }
- i++;
+ /* </volume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </snapshot> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function will generate snapshot status output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing status output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_status(xmlTextWriterPtr writer, xmlDocPtr doc, dict_t *dict)
+{
+ int ret = -1;
+ int snapcount = 0;
+ int i = 0;
+ int status_cmd = 0;
+ char key[PATH_MAX] = "";
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* <snapStatus> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "sub-cmd", &status_cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch status type");
+ goto out;
+ }
+
+ if ((GF_SNAP_STATUS_TYPE_SNAP == status_cmd) ||
+ (GF_SNAP_STATUS_TYPE_ITER == status_cmd)) {
+ snapcount = 1;
+ } else {
+ ret = dict_get_int32(dict, "status.snapcount", &snapcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not get snapcount");
+ goto out;
}
- /* </options> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count", "%d",
+ snapcount);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ for (i = 0; i < snapcount; i++) {
+ snprintf(key, sizeof(key), "status.snap%d", i);
+
+ ret = cli_xml_snapshot_status_per_snap(writer, doc, dict, key);
+ if (ret < 0) {
+ gf_log("cli", GF_LOG_ERROR,
+ "failed to create xml "
+ "output for snapshot status");
+ goto out;
+ }
+ }
+
+ /* </snapStatus> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+
+ return ret;
}
-int
-cli_xml_output_vol_info (cli_local_t *local, dict_t *dict)
-{
- int ret = 0;
- int count = 0;
- char *volname = NULL;
- char *volume_id = NULL;
- int type = 0;
- int status = 0;
- int brick_count = 0;
- int dist_count = 0;
- int stripe_count = 0;
- int replica_count = 0;
- int transport = 0;
- char *brick = NULL;
- char key[1024] = {0,};
- int i = 0;
- int j = 1;
-
-
- ret = dict_get_int32 (dict, "count", &count);
- if (ret)
- goto out;
+/* This function will generate snapshot config show output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing status output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_config_show(xmlTextWriterPtr writer, xmlDocPtr doc,
+ dict_t *dict)
+{
+ int ret = -1;
+ uint64_t i = 0;
+ uint64_t value = 0;
+ uint64_t volcount = 0;
+ char buf[PATH_MAX] = "";
+ char *str_value = NULL;
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* <systemConfig> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"systemConfig");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_uint64(dict, "snap-max-hard-limit", &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to get "
+ "snap-max-hard-limit");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hardLimit",
+ "%" PRIu64, value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_uint64(dict, "snap-max-soft-limit", &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to get "
+ "snap-max-soft-limit");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"softLimit",
+ "%" PRIu64 "%%", value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "auto-delete", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch auto-delete");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"autoDelete", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snap-activate-on-create", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not fetch snap-activate-on-create-delete");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"activateOnCreate",
+ "%s", str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </systemConfig> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <volumeConfig> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volumeConfig");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_uint64(dict, "voldisplaycount", &volcount);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch volcount");
+ goto out;
+ }
+
+ /* Get config of all the volumes */
+ for (i = 0; i < volcount; i++) {
+ /* <volume> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volume");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(buf, sizeof(buf), "volume%" PRIu64 "-volname", i);
+ ret = dict_get_str(dict, buf, &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch %s", buf);
+ goto out;
+ }
- for (i = 0; i < count; i++) {
- /* <volume> */
- ret = xmlTextWriterStartElement (local->writer,
- (xmlChar *)"volume");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.name", i);
- ret = dict_get_str (dict, key, &volname);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"name",
- "%s", volname);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.volume_id", i);
- ret = dict_get_str (dict, key, &volume_id);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"id",
- "%s", volume_id);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.type", i);
- ret = dict_get_int32 (dict, key, &type);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"type",
- "%d", type);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.status", i);
- ret = dict_get_int32 (dict, key, &status);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"status",
- "%d", status);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.brick_count", i);
- ret = dict_get_int32 (dict, key, &brick_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"brickCount",
- "%d", brick_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.dist_count", i);
- ret = dict_get_int32 (dict, key, &dist_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"distCount",
- "%d", dist_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.stripe_count", i);
- ret = dict_get_int32 (dict, key, &stripe_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"stripeCount",
- "%d", stripe_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.replica_count", i);
- ret = dict_get_int32 (dict, key, &replica_count);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"replicaCount",
- "%d", replica_count);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.transport", i);
- ret = dict_get_int32 (dict, key, &transport);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"transport",
- "%d", transport);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- /* <bricks> */
- ret = xmlTextWriterStartElement (local->writer,
- (xmlChar *)"bricks");
- XML_RET_CHECK_AND_GOTO (ret, out);
- while (j <= brick_count) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d.brick%d", i, j);
- ret = dict_get_str (dict, key, &brick);
- if (ret)
- goto out;
- ret = xmlTextWriterWriteFormatElement
- (local->writer, (xmlChar *)"brick", "%s",
- brick);
- XML_RET_CHECK_AND_GOTO (ret, out);
- j++;
- }
- /* </bricks> */
- ret = xmlTextWriterEndElement (local->writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d", i);
- ret = cli_xml_output_vol_info_options (local->writer, dict,
- key);
- if (ret)
- goto out;
+ snprintf(buf, sizeof(buf), "volume%" PRIu64 "-snap-max-hard-limit", i);
+ ret = dict_get_uint64(dict, buf, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch %s", buf);
+ goto out;
+ }
- /* </volume> */
- ret = xmlTextWriterEndElement (local->writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"hardLimit",
+ "%" PRIu64, value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(buf, sizeof(buf), "volume%" PRIu64 "-active-hard-limit", i);
+ ret = dict_get_uint64(dict, buf, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Could not fetch"
+ " effective snap_max_hard_limit for "
+ "%s",
+ str_value);
+ goto out;
}
- GF_FREE (local->get_vol.volname);
- local->get_vol.volname = gf_strdup (volname);
- local->vol_count += count;
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"effectiveHardLimit", "%" PRIu64, value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ snprintf(buf, sizeof(buf), "volume%" PRIu64 "-snap-max-soft-limit", i);
+ ret = dict_get_uint64(dict, buf, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch %s", buf);
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"softLimit",
+ "%" PRIu64, value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </volume> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function will generate snapshot config set output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing status output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_config_set(xmlTextWriterPtr writer, xmlDocPtr doc,
+ dict_t *dict)
+{
+ int ret = -1;
+ uint64_t hard_limit = 0;
+ uint64_t soft_limit = 0;
+ char *volname = NULL;
+ char *auto_delete = NULL;
+ char *snap_activate = NULL;
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* This is optional parameter therefore ignore the error */
+ ret = dict_get_uint64(dict, "snap-max-hard-limit", &hard_limit);
+ /* This is optional parameter therefore ignore the error */
+ ret = dict_get_uint64(dict, "snap-max-soft-limit", &soft_limit);
+ ret = dict_get_str(dict, "auto-delete", &auto_delete);
+ ret = dict_get_str(dict, "snap-activate-on-create", &snap_activate);
+
+ if (!hard_limit && !soft_limit && !auto_delete && !snap_activate) {
+ ret = -1;
+ gf_log("cli", GF_LOG_ERROR,
+ "At least one option from "
+ "snap-max-hard-limit, snap-max-soft-limit, auto-delete"
+ " and snap-activate-on-create should be set");
+ goto out;
+ }
+
+ /* Ignore the error, as volname is optional */
+ ret = dict_get_str(dict, "volname", &volname);
+
+ if (NULL == volname) {
+ /* <systemConfig> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"systemConfig");
+ } else {
+ /* <volumeConfig> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volumeConfig");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ volname);
+ }
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ if (hard_limit) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"newHardLimit",
+ "%" PRIu64, hard_limit);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ if (soft_limit) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"newSoftLimit",
+ "%" PRIu64, soft_limit);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ if (auto_delete) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"autoDelete",
+ "%s", auto_delete);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ if (snap_activate) {
+ ret = xmlTextWriterWriteFormatElement(
+ writer, (xmlChar *)"activateOnCreate", "%s", snap_activate);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+
+ /* </volumeConfig> or </systemConfig> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
}
+/* This function will generate snapshot config output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing config output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_config(xmlTextWriterPtr writer, xmlDocPtr doc, dict_t *dict)
+{
+ int ret = -1;
+ int config_command = 0;
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* <snapConfig> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapConfig");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "config-command", &config_command);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Could not fetch config type");
+ goto out;
+ }
+
+ switch (config_command) {
+ case GF_SNAP_CONFIG_TYPE_SET:
+ ret = cli_xml_snapshot_config_set(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create xml "
+ "output for snapshot config set command");
+ goto out;
+ }
+
+ break;
+ case GF_SNAP_CONFIG_DISPLAY:
+ ret = cli_xml_snapshot_config_show(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create xml "
+ "output for snapshot config show command");
+ goto out;
+ }
+ break;
+ default:
+ gf_log("cli", GF_LOG_ERROR, "Unknown config command :%d",
+ config_command);
+ ret = -1;
+ goto out;
+ }
+
+ /* </snapConfig> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
+out:
+
+ return ret;
+}
+
+/* This function will generate snapshot activate or
+ * deactivate output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing activate or deactivate output
+ *
+ * @return 0 on success and -1 on failure
+ */
+static int
+cli_xml_snapshot_activate_deactivate(xmlTextWriterPtr writer, xmlDocPtr doc,
+ dict_t *dict, int cmd)
+{
+ int ret = -1;
+ char *buffer = NULL;
+ char *tag = NULL;
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ if (GF_SNAP_OPTION_TYPE_ACTIVATE == cmd) {
+ tag = "snapActivate";
+ } else if (GF_SNAP_OPTION_TYPE_DEACTIVATE == cmd) {
+ tag = "snapDeactivate";
+ } else {
+ gf_log("cli", GF_LOG_ERROR, "invalid command %d", cmd);
+ goto out;
+ }
+
+ /* <snapActivate> or <snapDeactivate> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)tag);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <snapshot> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapshot");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snapname", &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snapuuid", &buffer);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
+ goto out;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ buffer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </snapshot> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </snapActivate> or </snapDeactivate> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = 0;
+out:
+
+ return ret;
+}
+#endif /* HAVE_LIB_XML */
+
+/* This function will generate snapshot delete output in xml format.
+ *
+ * @param writer xmlTextWriterPtr
+ * @param doc xmlDocPtr
+ * @param dict dict containing delete output
+ * @param rsp cli response
+ *
+ * @return 0 on success and -1 on failure
+ */
int
-cli_xml_output_vol_info_begin (cli_local_t *local, int op_ret, int op_errno,
- char *op_errstr)
+cli_xml_snapshot_delete(cli_local_t *local, dict_t *dict, gf_cli_rsp *rsp)
{
- int ret = -1;
+ int ret = -1;
+#ifdef HAVE_LIB_XML
+ char *str_value = NULL;
+ xmlTextWriterPtr writer = local->writer;
+ xmlDocPtr doc = local->doc;
+
+ GF_ASSERT(writer);
+ GF_ASSERT(doc);
+ GF_ASSERT(dict);
+
+ /* <snapshot> */
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"snapshot");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_str(dict, "snapname", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap name");
+ goto xmlend;
+ }
+
+ if (!rsp->op_ret) {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"status",
+ "Success");
+ } else {
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"status",
+ "Failure");
+ XML_RET_CHECK_AND_GOTO(ret, xmlend);
+
+ ret = cli_xml_output_common(writer, rsp->op_ret, rsp->op_errno,
+ rsp->op_errstr);
+ }
+ XML_RET_CHECK_AND_GOTO(ret, xmlend);
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"name", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, xmlend);
+
+ ret = dict_get_str(dict, "snapuuid", &str_value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get snap uuid");
+ goto xmlend;
+ }
+
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"uuid", "%s",
+ str_value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+xmlend:
+ /* </snapshot> */
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+#endif /* HAVE_LIB_XML */
+ ret = 0;
+out:
- GF_ASSERT (local);
+ return ret;
+}
- ret = cli_begin_xml_output (&(local->writer), &(local->buf));
- if (ret)
- goto out;
+int
+cli_xml_output_snap_status_begin(cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
- ret = cli_xml_output_common (local->writer, op_ret, op_errno,
- op_errstr);
- if (ret)
- goto out;
+ GF_ASSERT(local);
+
+ ret = cli_begin_xml_output(&(local->writer), &(local->doc));
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_xml_output_common(local->writer, op_ret, op_errno, op_errstr);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <snapStatus> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"snapStatus");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <snapshots> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"snapshots");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+out:
+ gf_log("cli", GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+
+int
+cli_xml_output_snap_status_end(cli_local_t *local)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
- /* <volInfo> */
- ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"volInfo");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ GF_ASSERT(local);
- /* <volumes> */
- ret = xmlTextWriterStartElement (local->writer, (xmlChar *)"volumes");
- XML_RET_CHECK_AND_GOTO (ret, out);
+ /* </snapshots> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* Init vol count */
- local->vol_count = 0;
+ /* </snapStatus> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ ret = cli_end_xml_output(local->writer, local->doc);
out:
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
int
-cli_xml_output_vol_info_end (cli_local_t *local)
+cli_xml_output_snap_delete_begin(cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr)
{
- int ret = -1;
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ int delete_cmd = -1;
- GF_ASSERT (local);
+ GF_ASSERT(local);
- ret = xmlTextWriterWriteFormatElement (local->writer,
- (xmlChar *)"count",
- "%d", local->vol_count);
+ ret = cli_begin_xml_output(&(local->writer), &(local->doc));
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </volumes> */
- ret = xmlTextWriterEndElement (local->writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = dict_get_int32(local->dict, "sub-cmd", &delete_cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to get sub-cmd");
+ goto out;
+ }
- /* </volInfo> */
- ret = xmlTextWriterEndElement (local->writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = cli_xml_output_common(local->writer, op_ret, op_errno, op_errstr);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = cli_end_xml_output (local->writer, local->buf);
+ /* <snapStatus> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"snapDelete");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* <snapshots> */
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"snapshots");
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- gf_log ("cli", GF_LOG_ERROR, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
}
int
-cli_xml_output_vol_quota_limit_list (char *volname, char *limit_list,
- int op_ret, int op_errno,
- char *op_errstr)
-{
- int ret = -1;
- xmlTextWriterPtr writer = NULL;
- xmlBufferPtr buf = NULL;
- int64_t size = 0;
- int64_t limit_value = 0;
- int i = 0;
- int j = 0;
- int k = 0;
- int len = 0;
- char *size_str = NULL;
- char path[PATH_MAX] = {0,};
- char ret_str[1024] = {0,};
- char value[1024] = {0,};
- char mountdir[] = "/tmp/mountXXXXXX";
- char abspath[PATH_MAX] = {0,};
- runner_t runner = {0,};
-
- GF_ASSERT (volname);
- GF_ASSERT (limit_list);
-
- ret = cli_begin_xml_output (&writer, &buf);
- if (ret)
- goto out;
+cli_xml_output_snap_delete_end(cli_local_t *local)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
- ret = cli_xml_output_common (writer, op_ret, op_errno, op_errstr);
- if (ret)
+ GF_ASSERT(local);
+
+ /* </snapshots> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ /* </snapDelete> */
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = cli_end_xml_output(local->writer, local->doc);
+out:
+ gf_log("cli", GF_LOG_TRACE, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif
+}
+/* This function will generate xml output for all the snapshot commands
+ *
+ * @param cmd_type command type
+ * @param dict dict containing snapshot command output
+ * @param op_ret return value of the snapshot command
+ * @param op_errno errno for the snapshot command
+ * @param op_errstr error string for the snapshot command
+ *
+ * @return 0 on success and -1 on failure
+ */
+int
+cli_xml_output_snapshot(int cmd_type, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+
+ GF_ASSERT(dict);
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to output "
+ "xml begin block");
+ goto out;
+ }
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to output "
+ "xml common block");
+ goto out;
+ }
+
+ /* In case of command failure just printing the error message is good
+ * enough */
+ if (0 != op_ret) {
+ goto end;
+ }
+
+ switch (cmd_type) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ ret = cli_xml_snapshot_create(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot create command");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ ret = cli_xml_snapshot_clone(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot clone command");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = cli_xml_snapshot_restore(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot restore command");
goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_LIST:
+ ret = cli_xml_snapshot_list(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot list command");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ ret = cli_xml_snapshot_status(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create"
+ "xml output for snapshot status command");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_INFO:
+ ret = cli_xml_snapshot_info(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot info command");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_ACTIVATE:
+ case GF_SNAP_OPTION_TYPE_DEACTIVATE:
+ ret = cli_xml_snapshot_activate_deactivate(writer, doc, dict,
+ cmd_type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot config command");
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ ret = cli_xml_snapshot_config(writer, doc, dict);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to create "
+ "xml output for snapshot config command");
+ }
+ break;
+ default:
+ gf_log("cli", GF_LOG_ERROR, "Unexpected snapshot command: %d",
+ cmd_type);
+ goto out;
+ }
+
+end:
+ ret = cli_end_xml_output(writer, doc);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to output "
+ "xml end block");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+#else
+ return 0;
+#endif /* HAVE_LIB_XML */
+}
- /* <volQuota> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"volQuota");
- XML_RET_CHECK_AND_GOTO (ret, out);
+int
+cli_xml_snapshot_begin_composite_op(cli_local_t *local)
+{
+ int ret = -1;
+#ifdef HAVE_LIB_XML
+ int cmd = -1;
+ int type = -1;
+
+ ret = dict_get_int32(local->dict, "sub-cmd", &cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to get "
+ "sub-cmd");
+ ret = 0;
+ goto out;
+ }
+
+ if (cmd == GF_SNAP_STATUS_TYPE_ITER || cmd == GF_SNAP_DELETE_TYPE_SNAP) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_int32(local->dict, "type", &type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to get snapshot "
+ "command type from dictionary");
+ goto out;
+ }
+
+ if (GF_SNAP_OPTION_TYPE_STATUS == type)
+ ret = cli_xml_output_snap_status_begin(local, 0, 0, NULL);
+ else if (GF_SNAP_OPTION_TYPE_DELETE == type)
+ ret = cli_xml_output_snap_delete_begin(local, 0, 0, NULL);
+
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "Error creating xml output");
+ goto out;
+ }
+
+#endif /* HAVE_LIB_XML */
+ ret = 0;
+out:
+ return ret;
+}
- if (!limit_list)
- goto cont;
+int
+cli_xml_snapshot_end_composite_op(cli_local_t *local)
+{
+ int ret = -1;
+#ifdef HAVE_LIB_XML
+ int cmd = -1;
+ int type = -1;
+
+ ret = dict_get_int32(local->dict, "sub-cmd", &cmd);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to get "
+ "sub-cmd");
+ ret = 0;
+ goto out;
+ }
+
+ if (cmd == GF_SNAP_STATUS_TYPE_ITER || cmd == GF_SNAP_DELETE_TYPE_SNAP) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_int32(local->dict, "type", &type);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to get snapshot "
+ "command type from dictionary");
+ goto out;
+ }
+
+ if (GF_SNAP_OPTION_TYPE_STATUS == type)
+ ret = cli_xml_output_snap_status_end(local);
+ else if (GF_SNAP_OPTION_TYPE_DELETE == type)
+ ret = cli_xml_output_snap_delete_end(local);
+
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Error creating xml "
+ "output");
+ goto out;
+ }
+#endif /* HAVE_LIB_XML */
+ ret = 0;
+out:
+ return ret;
+}
- len = strlen (limit_list);
- if (len == 0)
- goto cont;
+int
+cli_xml_snapshot_status_single_snap(cli_local_t *local, dict_t *dict, char *key)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
- if (mkdtemp (mountdir) == NULL) {
- gf_log ("cli", GF_LOG_ERROR, "failed to create a temporary"
- " mount directory");
- ret = -1;
- goto out;
- }
+ GF_VALIDATE_OR_GOTO("cli", (local != NULL), out);
+ GF_VALIDATE_OR_GOTO("cli", (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO("cli", (key != NULL), out);
- ret = runcmd (SBIN_DIR"/glusterfs", "-s", "localhost",
- "--volfile-id", volname, "-l",
- DEFAULT_LOG_FILE_DIRECTORY"/quota-list-xml.log",
- mountdir, NULL);
+ ret = cli_xml_snapshot_status_per_snap(local->writer, local->doc, dict,
+ key);
+out:
+ return ret;
+#else
+ return 0;
+#endif /* HAVE_LIB_XML */
+}
+
+int
+cli_xml_output_vol_getopts(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr)
+{
+#if (HAVE_LIB_XML)
+ int i = 0;
+ int ret = -1;
+ int count = 0;
+ xmlTextWriterPtr writer = NULL;
+ xmlDocPtr doc = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char dict_key[50] = {
+ 0,
+ };
+
+ ret = cli_begin_xml_output(&writer, &doc);
+ if (ret)
+ goto out;
+
+ ret = cli_xml_output_common(writer, op_ret, op_errno, op_errstr);
+ if (ret)
+ goto out;
+
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"volGetopts");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = dict_get_int32(dict, "count", &count);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to retrieve count "
+ "from the dictionary");
+ goto out;
+ }
+ if (count <= 0) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Value of count :%d is "
+ "invalid",
+ count);
+ ret = -1;
+ goto out;
+ }
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"count", "%d",
+ count);
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ for (i = 1; i <= count; i++) {
+ sprintf(dict_key, "key%d", i);
+ ret = dict_get_str(dict, dict_key, &key);
if (ret) {
- gf_log ("cli", GF_LOG_ERROR,
- "failed to mount glusterfs client");
- ret = -1;
- goto rm_dir;
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to"
+ " retrieve %s from the "
+ "dictionary",
+ dict_key);
+ goto out;
}
+ sprintf(dict_key, "value%d", i);
+ ret = dict_get_str(dict, dict_key, &value);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR,
+ "Failed to "
+ "retrieve key value for %s from"
+ "the dictionary",
+ dict_key);
+ goto out;
+ }
+ ret = xmlTextWriterStartElement(writer, (xmlChar *)"Opt");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- while (i < len) {
- j = 0;
- k = 0;
- size = 0;
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"Option", "%s",
+ key);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- while (limit_list[i] != ':')
- path[k++] = limit_list[i++];
- path[k] = '\0';
+ ret = xmlTextWriterWriteFormatElement(writer, (xmlChar *)"Value", "%s",
+ value);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- i++;
+ ret = xmlTextWriterEndElement(writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+ }
+ ret = cli_end_xml_output(writer, doc);
- while (limit_list[i] != ',' && limit_list[i] != '\0')
- value[j++] = limit_list[i++];
- i++;
+out:
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+#else
+ return 0;
+#endif /* HAVE_LIB_XML */
+}
- snprintf (abspath, sizeof (abspath), "%s/%s", mountdir, path);
- ret = sys_lgetxattr (abspath, "trusted.limit.list",
- (void *)ret_str, 4096);
- if (ret >= 0) {
- sscanf (ret_str, "%"SCNd64",%"SCNd64, &size,
- &limit_value);
- size_str = gf_uint64_2human_readable ((uint64_t)size);
- }
+int
+cli_quota_list_xml_error(cli_local_t *local, char *path, char *errstr)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
- /* <quota> */
- ret = xmlTextWriterStartElement (writer, (xmlChar *)"quota");
- XML_RET_CHECK_AND_GOTO (ret, unmount);
-
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"path", "%s", path);
- XML_RET_CHECK_AND_GOTO (ret, unmount);
-
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"limit", "%s", value);
- XML_RET_CHECK_AND_GOTO (ret, unmount);
-
- if (size_str) {
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"size", "%s", size_str);
- XML_RET_CHECK_AND_GOTO (ret, unmount);
- GF_FREE (size_str);
- } else {
- ret = xmlTextWriterWriteFormatElement
- (writer, (xmlChar *)"size", "%"PRId64, size);
- XML_RET_CHECK_AND_GOTO (ret, unmount);
- }
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"limit");
+ XML_RET_CHECK_AND_GOTO(ret, out);
- /* </quota> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, unmount);
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"path",
+ "%s", path);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- }
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"errstr",
+ "%s", errstr);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
-unmount:
- runinit (&runner);
- runner_add_args (&runner, "umount",
-#if GF_LINUX_HOST_OS
- "-l",
+out:
+ return ret;
+#else
+ return 0;
#endif
- mountdir, NULL);
- ret = runner_run_reuse (&runner);
- if (ret)
- runner_log (&runner, "cli", GF_LOG_WARNING, "error executing");
- runner_end (&runner);
+}
-rm_dir:
- rmdir (mountdir);
+int
+cli_quota_xml_output(cli_local_t *local, char *path, int64_t hl_str,
+ char *sl_final, int64_t sl_num, int64_t used,
+ int64_t avail, char *sl, char *hl, gf_boolean_t limit_set)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
-cont:
- /* </volQuota> */
- ret = xmlTextWriterEndElement (writer);
- XML_RET_CHECK_AND_GOTO (ret, out);
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"limit");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"path",
+ "%s", path);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"hard_limit", !limit_set ? "N/A" : "%" PRId64,
+ hl_str);
+ XML_RET_CHECK_AND_GOTO(ret, out);
- ret = cli_end_xml_output (writer, buf);
+ ret = xmlTextWriterWriteFormatElement(local->writer,
+ (xmlChar *)"soft_limit_percent",
+ !limit_set ? "N/A" : "%s", sl_final);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"soft_limit_value",
+ !limit_set ? "N/A" : "%" PRId64, sl_num);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"used_space", "%" PRId64, used);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"avail_space",
+ !limit_set ? "N/A" : "%" PRId64, avail);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"sl_exceeded", !limit_set ? "N/A" : "%s", sl);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"hl_exceeded", !limit_set ? "N/A" : "%s", hl);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
out:
- if (size_str)
- GF_FREE (size_str);
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
+#else
+ return 0;
+#endif /* HAVE_LIB_XML */
}
-#endif
+int
+cli_quota_object_xml_output(cli_local_t *local, char *path, char *sl_str,
+ int64_t sl_val, quota_limits_t *limits,
+ quota_meta_t *used_space, int64_t avail, char *sl,
+ char *hl, gf_boolean_t limit_set)
+{
+#if (HAVE_LIB_XML)
+ int ret = -1;
+
+ ret = xmlTextWriterStartElement(local->writer, (xmlChar *)"limit");
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"path",
+ "%s", path);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"hard_limit", !limit_set ? "N/A" : "%" PRId64,
+ limits->hl);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(local->writer,
+ (xmlChar *)"soft_limit_percent",
+ !limit_set ? "N/A" : "%s", sl_str);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"soft_limit_value",
+ !limit_set ? "N/A" : "%" PRIu64, sl_val);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(local->writer,
+ (xmlChar *)"file_count", "%" PRId64,
+ used_space->file_count);
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"dir_count",
+ "%" PRIu64, used_space->dir_count);
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(local->writer, (xmlChar *)"available",
+ !limit_set ? "N/A" : "%" PRId64,
+ avail);
+
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"sl_exceeded", !limit_set ? "N/A" : "%s", sl);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterWriteFormatElement(
+ local->writer, (xmlChar *)"hl_exceeded", !limit_set ? "N/A" : "%s", hl);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+ ret = xmlTextWriterEndElement(local->writer);
+ XML_RET_CHECK_AND_GOTO(ret, out);
+
+out:
+ return ret;
+#else
+ return 0;
+#endif /* HAVE_LIB_XML */
+}
diff --git a/cli/src/cli.c b/cli/src/cli.c
index 16e434f0f8b..a52b39c5fb8 100644
--- a/cli/src/cli.c
+++ b/cli/src/cli.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -39,609 +29,884 @@
#include <semaphore.h>
#include <errno.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
-#ifdef HAVE_MALLOC_STATS
-#ifdef DEBUG
-#include <mcheck.h>
-#endif
-#endif
-
#include "cli.h"
+#include "cli-quotad-client.h"
#include "cli-cmd.h"
#include "cli-mem-types.h"
-#include "xlator.h"
-#include "glusterfs.h"
-#include "compat.h"
-#include "logging.h"
-#include "dict.h"
-#include "list.h"
-#include "timer.h"
-#include "stack.h"
-#include "revision.h"
-#include "common-utils.h"
-#include "event.h"
-#include "globals.h"
-#include "syscall.h"
-#include "call-stub.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/list.h>
+#include <glusterfs/timer.h>
+#include <glusterfs/stack.h>
+#include <glusterfs/revision.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/gf-event.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/call-stub.h>
#include <fnmatch.h>
#include "xdr-generic.h"
-extern int connected;
/* using argp for command line parsing */
-const char *argp_program_version = "" \
- PACKAGE_NAME" "PACKAGE_VERSION" built on "__DATE__" "__TIME__ \
- "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION "\n" \
- "Copyright (c) 2006-2011 Gluster Inc. " \
- "<http://www.gluster.com>\n" \
- "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n" \
- "You may redistribute copies of GlusterFS under the terms of "\
- "the GNU General Public License.";
-
+const char *argp_program_version =
+ "" PACKAGE_NAME " " PACKAGE_VERSION
+ "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION
+ "\n"
+ "Copyright (c) 2006-2016 Red Hat, Inc. "
+ "<https://www.gluster.org/>\n"
+ "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n"
+ "It is licensed to you under your choice of the GNU Lesser\n"
+ "General Public License, version 3 or any later version (LGPLv3\n"
+ "or later), or the GNU General Public License, version 2 (GPLv2),\n"
+ "in all cases as published by the Free Software Foundation.";
const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">";
-
+struct rpc_clnt *global_quotad_rpc;
struct rpc_clnt *global_rpc;
rpc_clnt_prog_t *cli_rpc_prog;
-
extern struct rpc_clnt_program cli_prog;
+int cli_default_conn_timeout = 120;
+int cli_ten_minutes_timeout = 600;
-
-
-static char *
-generate_uuid ()
+static int
+glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
{
- char tmp_str[1024] = {0,};
- char hostname[256] = {0,};
- struct timeval tv = {0,};
- char now_str[32];
-
- if (gettimeofday (&tv, NULL) == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "gettimeofday: failed %s",
- strerror (errno));
- }
-
- if (gethostname (hostname, 256) == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "gethostname: failed %s",
- strerror (errno));
- }
+ cmd_args_t *cmd_args = NULL;
+ struct rlimit lim = {
+ 0,
+ };
+ call_pool_t *pool = NULL;
+ int ret = -1;
+
+ if (!ctx)
+ return ret;
- gf_time_fmt (now_str, sizeof now_str, tv.tv_sec, gf_timefmt_Ymd_T);
- snprintf (tmp_str, sizeof tmp_str, "%s-%d-%s:%"
-#ifdef GF_DARWIN_HOST_OS
- PRId32,
-#else
- "ld",
-#endif
- hostname, getpid(), now_str, tv.tv_usec);
+ ret = xlator_mem_acct_init(THIS, cli_mt_end);
+ if (ret != 0) {
+ gf_log("cli", GF_LOG_ERROR, "Memory accounting init failed.");
+ return ret;
+ }
+
+ /* Resetting ret to -1 to so in case of failure
+ * we can relese allocated resource.
+ */
+ ret = -1;
+
+ ctx->process_uuid = generate_glusterfs_ctx_id();
+ if (!ctx->process_uuid) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to generate uuid.");
+ goto out;
+ }
+
+ ctx->page_size = 128 * GF_UNIT_KB;
+
+ ctx->iobuf_pool = iobuf_pool_new();
+ if (!ctx->iobuf_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create iobuf pool.");
+ goto out;
+ }
+
+ ctx->event_pool = gf_event_pool_new(DEFAULT_EVENT_POOL_SIZE,
+ STARTING_EVENT_THREADS);
+ if (!ctx->event_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create event pool.");
+ goto out;
+ }
+
+ pool = GF_CALLOC(1, sizeof(call_pool_t), cli_mt_call_pool_t);
+ if (!pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create call pool.");
+ goto out;
+ }
+
+ /* frame_mem_pool size 112 * 64 */
+ pool->frame_mem_pool = mem_pool_new(call_frame_t, 32);
+ if (!pool->frame_mem_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create frame mem pool.");
+ goto out;
+ }
+
+ /* stack_mem_pool size 256 * 128 */
+ pool->stack_mem_pool = mem_pool_new(call_stack_t, 16);
+
+ if (!pool->stack_mem_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create stack mem pool.");
+ goto out;
+ }
+
+ ctx->stub_mem_pool = mem_pool_new(call_stub_t, 16);
+ if (!ctx->stub_mem_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to stub mem pool.");
+ goto out;
+ }
+
+ ctx->dict_pool = mem_pool_new(dict_t, 32);
+ if (!ctx->dict_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create dict pool.");
+ goto out;
+ }
+
+ ctx->dict_pair_pool = mem_pool_new(data_pair_t, 512);
+ if (!ctx->dict_pair_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create dict pair pool.");
+ goto out;
+ }
+
+ ctx->dict_data_pool = mem_pool_new(data_t, 512);
+ if (!ctx->dict_data_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create dict data pool.");
+ goto out;
+ }
+
+ ctx->logbuf_pool = mem_pool_new(log_buf_t, 256);
+ if (!ctx->logbuf_pool) {
+ gf_log("cli", GF_LOG_ERROR, "Failed to create logbuf pool.");
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&pool->all_frames);
+ LOCK_INIT(&pool->lock);
+ ctx->pool = pool;
+
+ cmd_args = &ctx->cmd_args;
+
+ INIT_LIST_HEAD(&cmd_args->xlator_options);
+
+ lim.rlim_cur = RLIM_INFINITY;
+ lim.rlim_max = RLIM_INFINITY;
+ setrlimit(RLIMIT_CORE, &lim);
+
+ ret = 0;
- return gf_strdup (tmp_str);
+out:
+ if (ret != 0) {
+ if (pool) {
+ mem_pool_destroy(pool->frame_mem_pool);
+ mem_pool_destroy(pool->stack_mem_pool);
+ }
+ GF_FREE(pool);
+ pool = NULL;
+ GF_FREE(ctx->process_uuid);
+ mem_pool_destroy(ctx->stub_mem_pool);
+ mem_pool_destroy(ctx->dict_pool);
+ mem_pool_destroy(ctx->dict_pair_pool);
+ mem_pool_destroy(ctx->dict_data_pool);
+ mem_pool_destroy(ctx->logbuf_pool);
+ }
+
+ return ret;
}
static int
-glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
+logging_init(glusterfs_ctx_t *ctx, struct cli_state *state)
{
- cmd_args_t *cmd_args = NULL;
- struct rlimit lim = {0, };
- call_pool_t *pool = NULL;
-
- xlator_mem_acct_init (THIS, cli_mt_end);
-
- ctx->process_uuid = generate_uuid ();
- if (!ctx->process_uuid)
- return -1;
-
- ctx->page_size = 128 * GF_UNIT_KB;
-
- ctx->iobuf_pool = iobuf_pool_new ();
- if (!ctx->iobuf_pool)
- return -1;
-
- ctx->event_pool = event_pool_new (DEFAULT_EVENT_POOL_SIZE);
- if (!ctx->event_pool)
- return -1;
-
- pool = GF_CALLOC (1, sizeof (call_pool_t),
- cli_mt_call_pool_t);
- if (!pool)
- return -1;
-
- /* frame_mem_pool size 112 * 64 */
- pool->frame_mem_pool = mem_pool_new (call_frame_t, 32);
- if (!pool->frame_mem_pool)
- return -1;
-
- /* stack_mem_pool size 256 * 128 */
- pool->stack_mem_pool = mem_pool_new (call_stack_t, 16);
-
- if (!pool->stack_mem_pool)
- return -1;
-
- ctx->stub_mem_pool = mem_pool_new (call_stub_t, 16);
- if (!ctx->stub_mem_pool)
- return -1;
-
- ctx->dict_pool = mem_pool_new (dict_t, 32);
- if (!ctx->dict_pool)
- return -1;
-
- ctx->dict_pair_pool = mem_pool_new (data_pair_t, 512);
- if (!ctx->dict_pair_pool)
- return -1;
-
- ctx->dict_data_pool = mem_pool_new (data_t, 512);
- if (!ctx->dict_data_pool)
- return -1;
-
- INIT_LIST_HEAD (&pool->all_frames);
- LOCK_INIT (&pool->lock);
- ctx->pool = pool;
-
- pthread_mutex_init (&(ctx->lock), NULL);
-
- cmd_args = &ctx->cmd_args;
-
- INIT_LIST_HEAD (&cmd_args->xlator_options);
-
- lim.rlim_cur = RLIM_INFINITY;
- lim.rlim_max = RLIM_INFINITY;
- setrlimit (RLIMIT_CORE, &lim);
-
- return 0;
+ char *log_file = state->log_file ? state->log_file
+ : DEFAULT_CLI_LOG_FILE_DIRECTORY
+ "/cli.log";
+
+ /* passing ident as NULL means to use default ident for syslog */
+ if (gf_log_init(ctx, log_file, NULL) == -1) {
+ fprintf(stderr, "ERROR: failed to open logfile %s\n", log_file);
+ }
+
+ /* CLI should not have something to DEBUG after the release,
+ hence defaulting to INFO loglevel */
+ gf_log_set_loglevel(ctx, (state->log_level == GF_LOG_NONE)
+ ? GF_LOG_INFO
+ : state->log_level);
+
+ return 0;
}
-
-static int
-logging_init (struct cli_state *state)
+int
+cli_submit_request(struct rpc_clnt *rpc, void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog, int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
{
- char *log_file = state->log_file ? state->log_file :
- DEFAULT_CLI_LOG_FILE_DIRECTORY "/cli.log";
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {
+ 0,
+ };
+ struct iobuf *iobuf = NULL;
+ char new_iobref = 0;
+ ssize_t xdr_size = 0;
+
+ GF_ASSERT(this);
+
+ if (req) {
+ xdr_size = xdr_sizeof(xdrproc, req);
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ if (!iobref) {
+ iobref = iobref_new();
+ if (!iobref) {
+ goto out;
+ }
- if (gf_log_init (log_file) == -1) {
- fprintf (stderr, "ERROR: failed to open logfile %s\n",
- log_file);
- return -1;
+ new_iobref = 1;
}
- /* CLI should not have something to DEBUG after the release,
- hence defaulting to INFO loglevel */
- gf_log_set_loglevel ((state->log_level == -1) ? GF_LOG_INFO :
- state->log_level);
+ iobref_add(iobref, iobuf);
- return 0;
-}
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_size(iobuf);
-int
-cli_submit_request (void *req, call_frame_t *frame,
- rpc_clnt_prog_t *prog,
- int procnum, struct iobref *iobref,
- xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
-{
- int ret = -1;
- int count = 0;
- struct iovec iov = {0, };
- struct iobuf *iobuf = NULL;
- char new_iobref = 0;
- ssize_t xdr_size = 0;
-
- GF_ASSERT (this);
-
- if (req) {
- xdr_size = xdr_sizeof (xdrproc, req);
- iobuf = iobuf_get2 (this->ctx->iobuf_pool, xdr_size);
- if (!iobuf) {
- goto out;
- };
-
- if (!iobref) {
- iobref = iobref_new ();
- if (!iobref) {
- goto out;
- }
-
- new_iobref = 1;
- }
-
- iobref_add (iobref, iobuf);
-
- iov.iov_base = iobuf->ptr;
- iov.iov_len = iobuf_size (iobuf);
-
-
- /* Create the xdr payload */
- ret = xdr_serialize_generic (iov, req, xdrproc);
- if (ret == -1) {
- goto out;
- }
- iov.iov_len = ret;
- count = 1;
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic(iov, req, xdrproc);
+ if (ret == -1) {
+ goto out;
}
+ iov.iov_len = ret;
+ count = 1;
+ }
- /* Send the msg */
- ret = rpc_clnt_submit (global_rpc, prog, procnum, cbkfn,
- &iov, count,
- NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL);
- ret = 0;
+ if (!rpc)
+ rpc = global_rpc;
+ /* Send the msg */
+ ret = rpc_clnt_submit(rpc, prog, procnum, cbkfn, &iov, count, NULL, 0,
+ iobref, frame, NULL, 0, NULL, 0, NULL);
+ ret = 0;
out:
- if (new_iobref)
- iobref_unref (iobref);
- if (iobuf)
- iobuf_unref (iobuf);
- return ret;
+ if (new_iobref)
+ iobref_unref(iobref);
+ if (iobuf)
+ iobuf_unref(iobuf);
+ return ret;
}
int
-cli_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
- void *data)
+cli_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data)
{
- xlator_t *this = NULL;
- int ret = 0;
-
- this = mydata;
+ xlator_t *this = NULL;
+ int ret = 0;
- switch (event) {
- case RPC_CLNT_CONNECT:
- {
+ this = mydata;
- cli_cmd_broadcast_connected ();
- gf_log (this->name, GF_LOG_TRACE, "got RPC_CLNT_CONNECT");
- break;
+ switch (event) {
+ case RPC_CLNT_CONNECT: {
+ cli_cmd_broadcast_connected(_gf_true);
+ gf_log(this->name, GF_LOG_TRACE, "got RPC_CLNT_CONNECT");
+ break;
}
- case RPC_CLNT_DISCONNECT:
- {
- gf_log (this->name, GF_LOG_TRACE, "got RPC_CLNT_DISCONNECT");
- connected = 0;
- if (!global_state->prompt && global_state->await_connected) {
- ret = 1;
- cli_out ("Connection failed. Please check if gluster "
- "daemon is operational.");
- exit (ret);
- }
- break;
+ case RPC_CLNT_DISCONNECT: {
+ cli_cmd_broadcast_connected(_gf_false);
+ gf_log(this->name, GF_LOG_TRACE, "got RPC_CLNT_DISCONNECT");
+ if (!global_state->prompt && global_state->await_connected) {
+ ret = 1;
+ cli_out(
+ "Connection failed. Please check if gluster "
+ "daemon is operational.");
+ exit(ret);
+ }
+ break;
}
default:
- gf_log (this->name, GF_LOG_TRACE,
- "got some other RPC event %d", event);
- ret = 0;
- break;
- }
+ gf_log(this->name, GF_LOG_TRACE, "got some other RPC event %d",
+ event);
+ ret = 0;
+ break;
+ }
- return ret;
+ return ret;
}
+static gf_boolean_t
+is_valid_int(char *str)
+{
+ if (*str == '-')
+ ++str;
+
+ /* Handle empty string or just "-".*/
+ if (!*str)
+ return _gf_false;
+
+ /* Check for non-digit chars in the rest of the string */
+ while (*str) {
+ if (!isdigit(*str))
+ return _gf_false;
+ else
+ ++str;
+ }
+ return _gf_true;
+}
+
+/*
+ * ret: 0: option successfully processed
+ * 1: signalling end of option list
+ * -1: unknown option
+ * -2: parsing issue (avoid unknown option error)
+ */
int
-cli_opt_parse (char *opt, struct cli_state *state)
+cli_opt_parse(char *opt, struct cli_state *state)
{
- char *oarg;
+ char *oarg = NULL;
+ gf_boolean_t secure_mgmt_tmp = 0;
+
+ if (strcmp(opt, "") == 0)
+ return 1;
+ if (strcmp(opt, "help") == 0) {
+ cli_out(
+ " peer help - display help for peer commands\n"
+ " volume help - display help for volume commands\n"
+ " volume bitrot help - display help for volume"
+ " bitrot commands\n"
+ " volume quota help - display help for volume"
+ " quota commands\n"
+ " snapshot help - display help for snapshot commands\n"
+ " global help - list global commands\n");
+ exit(0);
+ }
+
+ if (strcmp(opt, "version") == 0) {
+ cli_out("%s", argp_program_version);
+ exit(0);
+ }
+
+ if (strcmp(opt, "print-logdir") == 0) {
+ cli_out("%s", DEFAULT_LOG_FILE_DIRECTORY);
+ exit(0);
+ }
+
+ if (strcmp(opt, "print-statedumpdir") == 0) {
+ cli_out("%s", DEFAULT_VAR_RUN_DIRECTORY);
+ exit(0);
+ }
+
+ if (strcmp(opt, "xml") == 0) {
+#if (HAVE_LIB_XML)
+ state->mode |= GLUSTER_MODE_XML;
+#else
+ cli_err("XML output not supported. Ignoring '--xml' option");
+#endif
+ return 0;
+ }
- if (strcmp (opt, "version") == 0) {
- puts (argp_program_version);
- exit (0);
- }
+ if (strcmp(opt, "nolog") == 0) {
+ state->mode |= GLUSTER_MODE_GLFSHEAL_NOLOG;
+ return 0;
+ }
- if (strcmp (opt, "xml") == 0) {
- state->mode |= GLUSTER_MODE_XML;
- return 0;
- }
+ if (strcmp(opt, "wignore-partition") == 0) {
+ state->mode |= GLUSTER_MODE_WIGNORE_PARTITION;
+ return 0;
+ }
- oarg = strtail (opt, "mode=");
- if (oarg) {
- if (strcmp (oarg, "script") == 0) {
- state->mode |= GLUSTER_MODE_SCRIPT;
- return 0;
- }
- if (strcmp (oarg, "interactive") == 0)
- return 0;
- return -1;
- }
+ if (strcmp(opt, "wignore") == 0) {
+ state->mode |= GLUSTER_MODE_WIGNORE;
+ return 0;
+ }
- oarg = strtail (opt, "remote-host=");
- if (oarg) {
- state->remote_host = oarg;
- return 0;
+ oarg = strtail(opt, "mode=");
+ if (oarg) {
+ if (strcmp(oarg, "script") == 0) {
+ state->mode |= GLUSTER_MODE_SCRIPT;
+ return 0;
}
- oarg = strtail (opt, "log-file=");
- if (oarg) {
- state->log_file = oarg;
- return 0;
+ if (strcmp(oarg, "interactive") == 0)
+ return 0;
+
+ return -1;
+ }
+
+ oarg = strtail(opt, "remote-host=");
+ if (oarg) {
+ state->remote_host = oarg;
+ return 0;
+ }
+
+ oarg = strtail(opt, "inet6");
+ if (oarg) {
+ state->address_family = "inet6";
+ return 0;
+ }
+
+ oarg = strtail(opt, "log-file=");
+ if (oarg) {
+ state->log_file = oarg;
+ return 0;
+ }
+ oarg = strtail(opt, "timeout=");
+ if (oarg) {
+ if (!is_valid_int(oarg) || atoi(oarg) <= 0) {
+ cli_err("timeout value should be a positive integer");
+ return -2; /* -2 instead of -1 to avoid unknown option
+ error */
}
+ cli_default_conn_timeout = atoi(oarg);
+ return 0;
+ }
+
+ oarg = strtail(opt, "log-level=");
+ if (oarg) {
+ int log_level = glusterd_check_log_level(oarg);
+ if (log_level == -1)
+ return -1;
+ state->log_level = (gf_loglevel_t)log_level;
+ return 0;
+ }
- oarg = strtail (opt, "log-level=");
- if (oarg) {
- state->log_level = glusterd_check_log_level(oarg);
- if (state->log_level == -1)
- return -1;
- return 0;
+ oarg = strtail(opt, "glusterd-sock=");
+ if (oarg) {
+ state->glusterd_sock = oarg;
+ return 0;
+ }
+
+ oarg = strtail(opt, "secure-mgmt=");
+ if (oarg) {
+ if (gf_string2boolean(oarg, &secure_mgmt_tmp) == 0) {
+ if (secure_mgmt_tmp) {
+ /* See declaration for why this is an int. */
+ state->ctx->secure_mgmt = 1;
+ }
+ } else {
+ cli_err("invalid secure-mgmt value (ignored)");
}
+ return 0;
+ }
- return -1;
+ return -1;
}
int
-parse_cmdline (int argc, char *argv[], struct cli_state *state)
+parse_cmdline(int argc, char *argv[], struct cli_state *state)
{
- int ret = 0;
- int i = 0;
- int j = 0;
- char *opt = NULL;
-
- state->argc=argc-1;
- state->argv=&argv[1];
-
- for (i = 0; i < state->argc; i++) {
- opt = strtail (state->argv[i], "--");
- if (opt) {
- ret = cli_opt_parse (opt, state);
- if (ret == -1) {
- cli_out ("unrecognized option --%s", opt);
- return ret;
- }
- for (j = i; j < state->argc - 1; j++)
- state->argv[j] = state->argv[j + 1];
- state->argc--;
- /* argv shifted, next check should be at i again */
- i--;
- }
+ int ret = 0;
+ int i = 0;
+ int j = 0;
+ char *opt = NULL;
+
+ state->argc = argc - 1;
+ state->argv = &argv[1];
+
+ /* Do this first so that an option can override. */
+ if (sys_access(SECURE_ACCESS_FILE, F_OK) == 0) {
+ state->ctx->secure_mgmt = 1;
+ state->ctx->ssl_cert_depth = glusterfs_read_secure_access_file();
+ }
+
+ if (state->argc > GEO_REP_CMD_CONFIG_INDEX &&
+ strtail(state->argv[GEO_REP_CMD_INDEX], "geo") &&
+ strtail(state->argv[GEO_REP_CMD_CONFIG_INDEX], "co"))
+ goto done;
+
+ for (i = 0; i < state->argc; i++) {
+ opt = strtail(state->argv[i], "--");
+ if (!opt)
+ continue;
+ ret = cli_opt_parse(opt, state);
+ if (ret == -1) {
+ cli_out("unrecognized option --%s\n", opt);
+ usage();
+ return ret;
+ } else if (ret == -2) {
+ return ret;
+ }
+ for (j = i; j < state->argc - 1; j++)
+ state->argv[j] = state->argv[j + 1];
+ state->argc--;
+ /* argv shifted, next check should be at i again */
+ i--;
+ if (ret == 1) {
+ /* end of cli options */
+ ret = 0;
+ break;
}
+ }
- return ret;
-}
+done:
+ state->argv[state->argc] = NULL;
+ return ret;
+}
int
-cli_cmd_tree_init (struct cli_cmd_tree *tree)
+cli_cmd_tree_init(struct cli_cmd_tree *tree)
{
- struct cli_cmd_word *root = NULL;
- int ret = 0;
+ struct cli_cmd_word *root = NULL;
+ int ret = 0;
- root = &tree->root;
- root->tree = tree;
+ root = &tree->root;
+ root->tree = tree;
- return ret;
+ return ret;
}
-
int
-cli_state_init (struct cli_state *state)
+cli_state_init(struct cli_state *state)
{
- struct cli_cmd_tree *tree = NULL;
- int ret = 0;
+ struct cli_cmd_tree *tree = NULL;
+ int ret = 0;
+ state->log_level = GF_LOG_NONE;
- state->remote_host = "localhost";
- state->log_level = -1;
+ tree = &state->tree;
+ tree->state = state;
- tree = &state->tree;
- tree->state = state;
+ ret = cli_cmd_tree_init(tree);
- ret = cli_cmd_tree_init (tree);
-
- return ret;
+ return ret;
}
int
-cli_usage_out (const char *usage)
+cli_usage_out(const char *usage)
{
- GF_ASSERT (usage);
- GF_ASSERT (usage[0] != '\0');
+ GF_ASSERT(usage);
- if (!usage || usage[0] == '\0')
- return -1;
+ if (!usage || usage[0] == '\0')
+ return -1;
- cli_err ("Usage: %s", usage);
- return 0;
+ cli_err("\nUsage:\n%s\n", usage);
+ return 0;
}
int
-_cli_err (const char *fmt, ...)
+_cli_err(const char *fmt, ...)
{
- struct cli_state *state = NULL;
- va_list ap;
- int ret = 0;
-
- state = global_state;
+ va_list ap;
+ int ret = 0;
+#ifdef HAVE_READLINE
+ struct cli_state *state = global_state;
+#endif
- va_start (ap, fmt);
+ va_start(ap, fmt);
#ifdef HAVE_READLINE
- if (state->rl_enabled && !state->rl_processing)
- return cli_rl_err(state, fmt, ap);
+ if (state->rl_enabled && !state->rl_processing) {
+ ret = cli_rl_err(state, fmt, ap);
+ va_end(ap);
+ return ret;
+ }
#endif
- ret = vfprintf (stderr, fmt, ap);
- fprintf (stderr, "\n");
- va_end (ap);
+ ret = vfprintf(stderr, fmt, ap);
+ fprintf(stderr, "\n");
+ va_end(ap);
- return ret;
+ return ret;
}
-
int
-_cli_out (const char *fmt, ...)
+_cli_out(const char *fmt, ...)
{
- struct cli_state *state = NULL;
- va_list ap;
- int ret = 0;
-
- state = global_state;
-
- va_start (ap, fmt);
+ va_list ap;
+ int ret = 0;
+#ifdef HAVE_READLINE
+ struct cli_state *state = global_state;
+#endif
+ va_start(ap, fmt);
#ifdef HAVE_READLINE
- if (state->rl_enabled && !state->rl_processing)
- return cli_rl_out(state, fmt, ap);
+ if (state->rl_enabled && !state->rl_processing) {
+ ret = cli_rl_out(state, fmt, ap);
+ va_end(ap);
+ return ret;
+ }
#endif
- ret = vprintf (fmt, ap);
- printf ("\n");
- va_end (ap);
+ ret = vprintf(fmt, ap);
+ printf("\n");
+ va_end(ap);
- return ret;
+ return ret;
}
struct rpc_clnt *
-cli_rpc_init (struct cli_state *state)
+cli_quotad_clnt_rpc_init(void)
{
- struct rpc_clnt *rpc = NULL;
- dict_t *options = NULL;
- int ret = -1;
- int port = CLI_GLUSTERD_PORT;
- xlator_t *this = NULL;
-
+ struct rpc_clnt *rpc = NULL;
+ dict_t *rpc_opts = NULL;
+ int ret = -1;
+
+ rpc_opts = dict_new();
+ if (!rpc_opts) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str(rpc_opts, "transport.address-family", "unix");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str(rpc_opts, "transport-type", "socket");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str(rpc_opts, "transport.socket.connect-path",
+ "/var/run/gluster/quotad.socket");
+ if (ret)
+ goto out;
+
+ rpc = cli_quotad_clnt_init(THIS, rpc_opts);
+ if (!rpc)
+ goto out;
+
+ global_quotad_rpc = rpc;
+out:
+ if (rpc_opts) {
+ dict_unref(rpc_opts);
+ }
+ return rpc;
+}
- this = THIS;
- cli_rpc_prog = &cli_prog;
- options = dict_new ();
- if (!options)
- goto out;
+struct rpc_clnt *
+cli_rpc_init(struct cli_state *state)
+{
+ struct rpc_clnt *rpc = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
+ int port = CLI_GLUSTERD_PORT;
+ xlator_t *this = NULL;
+#ifdef IPV6_DEFAULT
+ char *addr_family = "inet6";
+#else
+ char *addr_family = "inet";
+#endif
- ret = dict_set_str (options, "remote-host", state->remote_host);
+ this = THIS;
+ cli_rpc_prog = &cli_prog;
+
+ options = dict_new();
+ if (!options)
+ goto out;
+
+ /* If address family specified in CLI */
+ if (state->address_family) {
+ addr_family = state->address_family;
+ }
+
+ /* Connect to glusterd using the specified method, giving preference
+ * to a unix socket connection. If nothing is specified, connect to
+ * the default glusterd socket.
+ */
+ if (state->glusterd_sock) {
+ gf_log("cli", GF_LOG_INFO,
+ "Connecting to glusterd using "
+ "sockfile %s",
+ state->glusterd_sock);
+ ret = rpc_transport_unix_options_build(options, state->glusterd_sock,
+ 0);
if (ret)
- goto out;
+ goto out;
+ } else if (state->remote_host) {
+ gf_log("cli", GF_LOG_INFO,
+ "Connecting to remote glusterd at "
+ "%s",
+ state->remote_host);
+
+ ret = dict_set_str(options, "remote-host", state->remote_host);
+ if (ret)
+ goto out;
if (state->remote_port)
- port = state->remote_port;
+ port = state->remote_port;
- ret = dict_set_int32 (options, "remote-port", port);
+ ret = dict_set_int32(options, "remote-port", port);
if (ret)
- goto out;
+ goto out;
- ret = dict_set_str (options, "transport.address-family", "inet");
+ ret = dict_set_str(options, "transport.address-family", addr_family);
if (ret)
- goto out;
-
- rpc = rpc_clnt_new (options, this->ctx, this->name, 16);
+ goto out;
+ } else {
+ gf_log("cli", GF_LOG_DEBUG,
+ "Connecting to glusterd using "
+ "default socket");
+ ret = rpc_transport_unix_options_build(options,
+ DEFAULT_GLUSTERD_SOCKFILE, 0);
+ if (ret)
+ goto out;
+ }
- if (!rpc)
- goto out;
+ rpc = rpc_clnt_new(options, this, this->name, 16);
+ if (!rpc)
+ goto out;
- ret = rpc_clnt_register_notify (rpc, cli_rpc_notify, this);
- if (ret) {
- gf_log ("cli", GF_LOG_ERROR, "failed to register notify");
- goto out;
- }
+ ret = rpc_clnt_register_notify(rpc, cli_rpc_notify, this);
+ if (ret) {
+ gf_log("cli", GF_LOG_ERROR, "failed to register notify");
+ goto out;
+ }
- rpc_clnt_start (rpc);
+ ret = rpc_clnt_start(rpc);
out:
- if (ret) {
- if (rpc)
- rpc_clnt_unref (rpc);
- rpc = NULL;
- }
- return rpc;
+ if (options)
+ dict_unref(options);
+
+ if (ret) {
+ if (rpc)
+ rpc_clnt_unref(rpc);
+ rpc = NULL;
+ }
+ return rpc;
}
cli_local_t *
-cli_local_get ()
+cli_local_get()
{
- cli_local_t *local = NULL;
+ cli_local_t *local = NULL;
- local = GF_CALLOC (1, sizeof (*local), cli_mt_cli_local_t);
+ local = GF_CALLOC(1, sizeof(*local), cli_mt_cli_local_t);
+ LOCK_INIT(&local->lock);
+ INIT_LIST_HEAD(&local->dict_list);
- return local;
+ return local;
}
void
-cli_local_wipe (cli_local_t *local)
+cli_local_wipe(cli_local_t *local)
{
- if (local) {
- if (local->get_vol.volname)
- GF_FREE (local->get_vol.volname);
- if (local->dict)
- dict_unref (local->dict);
- GF_FREE (local);
- }
-
- return;
+ if (local) {
+ GF_FREE(local->get_vol.volname);
+ if (local->dict)
+ dict_unref(local->dict);
+ GF_FREE(local);
+ }
+
+ return;
}
struct cli_state *global_state;
int
-main (int argc, char *argv[])
+main(int argc, char *argv[])
{
- struct cli_state state = {0, };
- int ret = -1;
- glusterfs_ctx_t *ctx = NULL;
+ struct cli_state state = {
+ 0,
+ };
+ int ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
- ret = glusterfs_globals_init ();
- if (ret)
- return ret;
+ mem_pools_init();
- ctx = glusterfs_ctx_get ();
- if (!ctx)
- return ENOMEM;
+ ctx = glusterfs_ctx_new();
+ if (!ctx)
+ return ENOMEM;
- ret = glusterfs_ctx_defaults_init (ctx);
- if (ret)
- goto out;
+#ifdef DEBUG
+ gf_mem_acct_enable_set(ctx);
+#endif
- ret = cli_state_init (&state);
- if (ret)
- goto out;
+ ret = glusterfs_globals_init(ctx);
+ if (ret)
+ return ret;
- state.ctx = ctx;
- global_state = &state;
+ THIS->ctx = ctx;
- ret = parse_cmdline (argc, argv, &state);
- if (ret)
- goto out;
+ ret = glusterfs_ctx_defaults_init(ctx);
+ if (ret)
+ goto out;
- ret = logging_init (&state);
- if (ret)
- goto out;
+ cli_default_conn_timeout = 120;
+ cli_ten_minutes_timeout = 600;
- global_rpc = cli_rpc_init (&state);
- if (!global_rpc)
- goto out;
+ ret = cli_state_init(&state);
+ if (ret)
+ goto out;
- ret = cli_cmds_register (&state);
- if (ret)
- goto out;
+ state.ctx = ctx;
+ global_state = &state;
- ret = cli_cmd_cond_init ();
- if (ret)
- goto out;
+ ret = parse_cmdline(argc, argv, &state);
+ if (ret)
+ goto out;
- ret = cli_input_init (&state);
- if (ret)
- goto out;
+ ret = logging_init(ctx, &state);
+ if (ret)
+ goto out;
+
+ gf_log("cli", GF_LOG_INFO, "Started running %s with version %s", argv[0],
+ PACKAGE_VERSION);
- ret = event_dispatch (ctx->event_pool);
+ global_rpc = cli_rpc_init(&state);
+ if (!global_rpc)
+ goto out;
+
+ global_quotad_rpc = cli_quotad_clnt_rpc_init();
+ if (!global_quotad_rpc)
+ goto out;
+
+ ret = cli_cmds_register(&state);
+ if (ret)
+ goto out;
+
+ ret = cli_input_init(&state);
+ if (ret)
+ goto out;
+
+ ret = gf_event_dispatch(ctx->event_pool);
out:
-// glusterfs_ctx_destroy (ctx);
+ // glusterfs_ctx_destroy (ctx);
- return ret;
+ mem_pools_fini();
+
+ return ret;
}
void
-cli_print_line (int len)
+cli_print_line(int len)
{
- GF_ASSERT (len > 0);
+ GF_ASSERT(len > 0);
+
+ while (len--)
+ printf("-");
- while (len--)
- printf ("-");
+ printf("\n");
+}
- printf ("\n");
+void
+print_quota_list_header(int type)
+{
+ if (type == GF_QUOTA_OPTION_TYPE_LIST) {
+ cli_out(
+ " Path Hard-limit "
+ " Soft-limit Used Available Soft-limit "
+ "exceeded? Hard-limit exceeded?");
+ cli_out(
+ "-----------------------------------------------------"
+ "-----------------------------------------------------"
+ "---------------------");
+ } else {
+ cli_out(
+ " Path Hard-limit "
+ " Soft-limit Files Dirs Available "
+ "Soft-limit exceeded? Hard-limit exceeded?");
+ cli_out(
+ "-----------------------------------------------------"
+ "-----------------------------------------------------"
+ "-------------------------------------");
+ }
+}
+
+void
+print_quota_list_empty(char *path, int type)
+{
+ if (type == GF_QUOTA_OPTION_TYPE_LIST)
+ cli_out("%-40s %7s %9s %10s %7s %15s %20s", path, "N/A", "N/A", "N/A",
+ "N/A", "N/A", "N/A");
+ else
+ cli_out("%-40s %9s %9s %12s %10s %10s %15s %20s", path, "N/A", "N/A",
+ "N/A", "N/A", "N/A", "N/A", "N/A");
}
diff --git a/cli/src/cli.h b/cli/src/cli.h
index 76b92e91d47..c0d933e8f8a 100644
--- a/cli/src/cli.h
+++ b/cli/src/cli.h
@@ -1,337 +1,516 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __CLI_H__
#define __CLI_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "rpc-clnt.h"
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "protocol-common.h"
+#include <glusterfs/logging.h>
+#include <glusterfs/quota-common-utils.h>
+
+#include "cli1-xdr.h"
+#include "gd-common-utils.h"
#if (HAVE_LIB_XML)
#include <libxml/encoding.h>
#include <libxml/xmlwriter.h>
#endif
-#define DEFAULT_EVENT_POOL_SIZE 16384
-#define CLI_GLUSTERD_PORT 24007
-#define CLI_DEFAULT_CONN_TIMEOUT 120
-#define CLI_DEFAULT_CMD_TIMEOUT 120
-#define CLI_TOP_CMD_TIMEOUT 600 //Longer timeout for volume top
-#define DEFAULT_CLI_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
-#define DEFAULT_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
-#define CLI_VOL_STATUS_BRICK_LEN 55
-#define CLI_TAB_LENGTH 8
-#define CLI_BRICK_STATUS_LINE_LEN 78
+#define DEFAULT_EVENT_POOL_SIZE 16384
+#define CLI_GLUSTERD_PORT 24007
+#define DEFAULT_CLI_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
+#define CLI_VOL_STATUS_BRICK_LEN 43
+#define CLI_TAB_LENGTH 8
+#define CLI_BRICK_STATUS_LINE_LEN 78
+
+/* Geo-rep command positional arguments' index */
+#define GEO_REP_CMD_INDEX 1
+#define GEO_REP_CMD_CONFIG_INDEX 4
enum argp_option_keys {
- ARGP_DEBUG_KEY = 133,
- ARGP_PORT_KEY = 'p',
+ ARGP_DEBUG_KEY = 133,
+ ARGP_PORT_KEY = 'p',
};
-#define GLUSTER_MODE_SCRIPT (1 << 0)
+extern int cli_default_conn_timeout;
+extern int cli_ten_minutes_timeout;
+
+typedef enum {
+ COLD_BRICK_COUNT,
+ COLD_TYPE,
+ COLD_DIST_COUNT,
+ COLD_REPLICA_COUNT,
+ COLD_ARBITER_COUNT,
+ COLD_DISPERSE_COUNT,
+ COLD_REDUNDANCY_COUNT,
+ HOT_BRICK_COUNT,
+ HOT_TYPE,
+ HOT_REPLICA_COUNT,
+ MAX
+} values;
+
+#define GLUSTER_MODE_SCRIPT (1 << 0)
#define GLUSTER_MODE_ERR_FATAL (1 << 1)
-#define GLUSTER_MODE_XML (1 << 2)
+#define GLUSTER_MODE_XML (1 << 2)
+#define GLUSTER_MODE_WIGNORE (1 << 3)
+#define GLUSTER_MODE_WIGNORE_PARTITION (1 << 4)
+#define GLUSTER_MODE_GLFSHEAL_NOLOG (1 << 5)
+
+#define GLUSTERD_GET_QUOTA_LIST_MOUNT_PATH(abspath, volname, path) \
+ do { \
+ snprintf(abspath, sizeof(abspath) - 1, \
+ DEFAULT_VAR_RUN_DIRECTORY "/%s_quota_list%s", volname, path); \
+ } while (0)
+
struct cli_state;
struct cli_cmd_word;
struct cli_cmd_tree;
struct cli_cmd;
-typedef int (cli_cmd_cbk_t)(struct cli_state *state,
- struct cli_cmd_word *word,
- const char **words,
- int wordcount);
-typedef void (cli_cmd_reg_cbk_t)( struct cli_cmd *this);
+extern char *cli_vol_status_str[];
+extern char *cli_vol_task_status_str[];
+
+typedef int(cli_cmd_cbk_t)(struct cli_state *state, struct cli_cmd_word *word,
+ const char **words, int wordcount);
+typedef void(cli_cmd_reg_cbk_t)(struct cli_cmd *this);
-typedef int (cli_cmd_match_t)(struct cli_cmd_word *word);
-typedef int (cli_cmd_filler_t)(struct cli_cmd_word *word);
+typedef int(cli_cmd_match_t)(struct cli_cmd_word *word);
+typedef int(cli_cmd_filler_t)(struct cli_cmd_word *word);
struct cli_cmd_word {
- struct cli_cmd_tree *tree;
- const char *word;
- cli_cmd_filler_t *filler;
- cli_cmd_match_t *match;
- cli_cmd_cbk_t *cbkfn;
- const char *desc;
- const char *pattern;
- int nextwords_cnt;
- struct cli_cmd_word **nextwords;
+ struct cli_cmd_tree *tree;
+ const char *word;
+ cli_cmd_filler_t *filler;
+ cli_cmd_match_t *match;
+ cli_cmd_cbk_t *cbkfn;
+ const char *desc;
+ const char *pattern;
+ int nextwords_cnt;
+ struct cli_cmd_word **nextwords;
};
-
struct cli_cmd_tree {
- struct cli_state *state;
- struct cli_cmd_word root;
+ struct cli_state *state;
+ struct cli_cmd_word root;
};
-
struct cli_state {
- int argc;
- char **argv;
+ int argc;
+ char **argv;
+
+ char debug;
- char debug;
+ /* for events dispatching */
+ glusterfs_ctx_t *ctx;
- /* for events dispatching */
- glusterfs_ctx_t *ctx;
+ /* registry of known commands */
+ struct cli_cmd_tree tree;
- /* registry of known commands */
- struct cli_cmd_tree tree;
+ /* the thread which "executes" the command in non-interactive mode */
+ /* also the thread which reads from stdin in non-readline mode */
+ pthread_t input;
- /* the thread which "executes" the command in non-interactive mode */
- /* also the thread which reads from stdin in non-readline mode */
- pthread_t input;
+ /* terminal I/O */
+ const char *prompt;
+ int rl_enabled;
+ int rl_async;
+ int rl_processing;
- /* terminal I/O */
- const char *prompt;
- int rl_enabled;
- int rl_async;
- int rl_processing;
+ /* autocompletion state */
+ char **matches;
+ char **matchesp;
- /* autocompletion state */
- char **matches;
- char **matchesp;
+ char *remote_host;
+ int remote_port;
+ int mode;
+ int await_connected;
- char *remote_host;
- int remote_port;
- int mode;
- int await_connected;
+ char *log_file;
+ gf_loglevel_t log_level;
- char *log_file;
- gf_loglevel_t log_level;
+ char *glusterd_sock;
+ char *address_family;
};
struct cli_local {
- struct {
- char *volname;
- int flags;
- } get_vol;
-
- dict_t *dict;
- /* Marker for volume status all */
- gf_boolean_t all;
+ struct {
+ char *volname;
+ int flags;
+ } get_vol;
+
+ dict_t *dict;
+ const char **words;
+ /* Marker for volume status all */
+ gf_boolean_t all;
#if (HAVE_LIB_XML)
- xmlTextWriterPtr writer;
- xmlBufferPtr buf;
- int vol_count;
+ xmlTextWriterPtr writer;
+ xmlDocPtr doc;
+ int vol_count;
#endif
+ gf_lock_t lock;
+ struct list_head dict_list;
};
struct cli_volume_status {
- int port;
- int online;
- uint64_t block_size;
- uint64_t total_inodes;
- uint64_t free_inodes;
- char *brick;
- char *pid_str;
- char *free;
- char *total;
-#ifdef GF_LINUX_HOST_OS
- char *fs_name;
- char *mount_options;
- char *device;
- char *inode_size;
-#endif
+ int port;
+ int rdma_port;
+ int online;
+ uint64_t block_size;
+ uint64_t total_inodes;
+ uint64_t free_inodes;
+ char *brick;
+ char *pid_str;
+ char *free;
+ char *total;
+ char *fs_name;
+ char *mount_options;
+ char *device;
+ char *inode_size;
+};
+
+struct snap_config_opt_vals_ {
+ char *op_name;
+ char *question;
};
typedef struct cli_volume_status cli_volume_status_t;
typedef struct cli_local cli_local_t;
-typedef ssize_t (*cli_serialize_t) (struct iovec outmsg, void *args);
+typedef ssize_t (*cli_serialize_t)(struct iovec outmsg, void *args);
extern struct cli_state *global_state; /* use only in readline callback */
-typedef const char *(*cli_selector_t) (void *wcon);
+extern struct rpc_clnt *global_quotad_rpc;
-void *cli_getunamb (const char *tok, void **choices, cli_selector_t sel);
+extern struct rpc_clnt *global_rpc;
-int cli_cmd_register (struct cli_cmd_tree *tree, struct cli_cmd *cmd);
-int cli_cmds_register (struct cli_state *state);
+extern rpc_clnt_prog_t *cli_rpc_prog;
-int cli_input_init (struct cli_state *state);
+typedef const char *(*cli_selector_t)(void *wcon);
-int cli_cmd_process (struct cli_state *state, int argc, char *argv[]);
-int cli_cmd_process_line (struct cli_state *state, const char *line);
+char *
+get_struct_variable(int mem_num, gf_gsync_status_t *sts_val);
-int cli_rl_enable (struct cli_state *state);
-int cli_rl_out (struct cli_state *state, const char *fmt, va_list ap);
-int cli_rl_err (struct cli_state *state, const char *fmt, va_list ap);
+void *
+cli_getunamb(const char *tok, void **choices, cli_selector_t sel);
-int cli_usage_out (const char *usage);
+int
+cli_cmd_register(struct cli_cmd_tree *tree, struct cli_cmd *cmd);
+int
+cli_cmds_register(struct cli_state *state);
-int _cli_out (const char *fmt, ...);
-int _cli_err (const char *fmt, ...);
+int
+cli_input_init(struct cli_state *state);
-#define cli_out(fmt...) do { \
- FMT_WARN (fmt); \
- \
- _cli_out(fmt); \
- \
- } while (0)
+int
+cli_cmd_process(struct cli_state *state, int argc, char *argv[]);
+int
+cli_cmd_process_line(struct cli_state *state, const char *line);
-#define cli_err(fmt...) do { \
- FMT_WARN (fmt); \
- \
- _cli_err(fmt); \
- \
- } while (0)
+int
+cli_rl_enable(struct cli_state *state);
+int
+cli_rl_out(struct cli_state *state, const char *fmt, va_list ap);
+int
+cli_rl_err(struct cli_state *state, const char *fmt, va_list ap);
int
-cli_submit_request (void *req, call_frame_t *frame,
- rpc_clnt_prog_t *prog,
- int procnum, struct iobref *iobref,
- xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
+cli_usage_out(const char *usage);
+
+int
+_cli_out(const char *fmt, ...);
+int
+_cli_err(const char *fmt, ...);
+
+#define cli_out(fmt...) \
+ do { \
+ FMT_WARN(fmt); \
+ \
+ _cli_out(fmt); \
+ \
+ } while (0)
+
+#define cli_err(fmt...) \
+ do { \
+ FMT_WARN(fmt); \
+ \
+ _cli_err(fmt); \
+ \
+ } while (0)
+
+#define usage() \
+ do { \
+ cli_out( \
+ " Usage: gluster [options] <help> <peer>" \
+ " <pool> <volume>\n" \
+ " Options:\n" \
+ " --help Shows the help information\n" \
+ " --version Shows the version\n" \
+ " --print-logdir Shows the log directory\n" \
+ " --print-statedumpdir Shows the state dump directory\n"); \
+ \
+ } while (0)
+
+int
+cli_submit_request(struct rpc_clnt *rpc, void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog, int procnum, struct iobref *iobref,
+ xlator_t *this, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc);
int32_t
-cli_cmd_volume_create_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_volume_create_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **bricks);
int32_t
-cli_cmd_volume_reset_parse (const char **words, int wordcount, dict_t **opt);
+cli_cmd_volume_reset_parse(const char **words, int wordcount, dict_t **opt);
int32_t
-cli_cmd_gsync_set_parse (const char **words, int wordcount, dict_t **opt);
+cli_cmd_gsync_set_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **opt, char **errstr);
int32_t
-cli_cmd_quota_parse (const char **words, int wordcount, dict_t **opt);
+cli_cmd_quota_parse(const char **words, int wordcount, dict_t **opt);
int32_t
-cli_cmd_volume_set_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_inode_quota_parse(const char **words, int wordcount, dict_t **opt);
int32_t
-cli_cmd_volume_add_brick_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_bitrot_parse(const char **words, int wordcount, dict_t **opt);
int32_t
-cli_cmd_volume_remove_brick_parse (const char **words, int wordcount,
- dict_t **options, int *question);
+cli_cmd_volume_set_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **op_errstr);
int32_t
-cli_cmd_volume_replace_brick_parse (const char **words, int wordcount,
+cli_cmd_ganesha_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **op_errstr);
+
+int32_t
+cli_cmd_get_state_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, char **op_errstr);
+
+int32_t
+cli_cmd_volume_add_brick_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options, int *type);
+
+int32_t
+cli_cmd_volume_remove_brick_parse(struct cli_state *state, const char **words,
+ int wordcount, dict_t **options,
+ int *question, int *brick_count,
+ int32_t *command);
+
+int32_t
+cli_cmd_volume_replace_brick_parse(const char **words, int wordcount,
dict_t **options);
int32_t
-cli_cmd_log_rotate_parse (const char **words, int wordcount, dict_t **options);
+cli_cmd_volume_reset_brick_parse(const char **words, int wordcount,
+ dict_t **options);
+
int32_t
-cli_cmd_log_locate_parse (const char **words, int wordcount, dict_t **options);
+cli_cmd_log_rotate_parse(const char **words, int wordcount, dict_t **options);
int32_t
-cli_cmd_log_filename_parse (const char **words, int wordcount, dict_t **options);
+cli_cmd_log_locate_parse(const char **words, int wordcount, dict_t **options);
+int32_t
+cli_cmd_log_filename_parse(const char **words, int wordcount, dict_t **options);
int32_t
-cli_cmd_volume_statedump_options_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_volume_statedump_options_parse(const char **words, int wordcount,
+ dict_t **options);
int32_t
-cli_cmd_volume_clrlks_opts_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_volume_clrlks_opts_parse(const char **words, int wordcount,
+ dict_t **options);
-cli_local_t * cli_local_get ();
+cli_local_t *
+cli_local_get();
void
-cli_local_wipe (cli_local_t *local);
+cli_local_wipe(cli_local_t *local);
+
+gf_boolean_t
+cli_cmd_connected();
int32_t
-cli_cmd_await_connected ();
+cli_cmd_await_connected(unsigned timeout);
int32_t
-cli_cmd_broadcast_connected ();
+cli_cmd_broadcast_connected(gf_boolean_t status);
int
-cli_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
- void *data);
+cli_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data);
int32_t
-cli_cmd_volume_profile_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_volume_profile_parse(const char **words, int wordcount,
+ dict_t **options);
int32_t
-cli_cmd_volume_top_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_volume_top_parse(const char **words, int wordcount, dict_t **options);
int32_t
-cli_cmd_log_level_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_log_level_parse(const char **words, int wordcount, dict_t **options);
int32_t
-cli_cmd_volume_status_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_volume_status_parse(const char **words, int wordcount,
+ dict_t **options);
int
-cli_cmd_volume_heal_options_parse (const char **words, int wordcount,
- dict_t **options);
+cli_cmd_volume_heal_options_parse(const char **words, int wordcount,
+ dict_t **options);
+
+int
+cli_cmd_volume_defrag_parse(const char **words, int wordcount,
+ dict_t **options);
int
-cli_print_brick_status (cli_volume_status_t *status);
+cli_print_brick_status(cli_volume_status_t *status);
void
-cli_print_detailed_status (cli_volume_status_t *status);
+cli_print_detailed_status(cli_volume_status_t *status);
int
-cli_get_detail_status (dict_t *dict, int i, cli_volume_status_t *status);
+cli_get_detail_status(dict_t *dict, int i, cli_volume_status_t *status);
void
-cli_print_line (int len);
+cli_print_line(int len);
+
+int
+cli_xml_output_str(char *op, char *str, int op_ret, int op_errno,
+ char *op_errstr);
-#if (HAVE_LIB_XML)
int
-cli_xml_output_str (char *op, char *str, int op_ret, int op_errno,
+cli_xml_output_dict(char *op, dict_t *dict, int op_ret, int op_errno,
char *op_errstr);
int
-cli_xml_output_dict (char *op, dict_t *dict, int op_ret, int op_errno,
- char *op_errstr);
+cli_xml_output_vol_top(dict_t *dict, int op_ret, int op_errno, char *op_errstr);
int
-cli_xml_output_vol_top (dict_t *dict, int op_ret, int op_errno,
+cli_xml_output_vol_profile(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_status_begin(cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_status_end(cli_local_t *local);
+
+int
+cli_xml_output_vol_status(cli_local_t *local, dict_t *dict);
+
+int
+cli_xml_output_vol_list(dict_t *dict, int op_ret, int op_errno,
char *op_errstr);
int
-cli_xml_output_vol_profile (dict_t *dict, int op_ret, int op_errno,
- char *op_errstr);
+cli_xml_output_vol_info_begin(cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_info_end(cli_local_t *local);
+
+int
+cli_xml_output_vol_info(cli_local_t *local, dict_t *dict);
int
-cli_xml_output_vol_status (dict_t *dict, int op_ret, int op_errno,
+cli_xml_output_vol_quota_limit_list_begin(cli_local_t *local, int op_ret,
+ int op_errno, char *op_errstr);
+int
+cli_xml_output_vol_quota_limit_list_end(cli_local_t *local);
+
+int
+cli_quota_list_xml_error(cli_local_t *local, char *path, char *errstr);
+
+int
+cli_quota_xml_output(cli_local_t *local, char *path, int64_t hl_str,
+ char *sl_final, int64_t sl_num, int64_t used,
+ int64_t avail, char *sl, char *hl, gf_boolean_t limit_set);
+
+int
+cli_quota_object_xml_output(cli_local_t *local, char *path, char *sl_str,
+ int64_t sl_val, quota_limits_t *limits,
+ quota_meta_t *used_space, int64_t avail, char *sl,
+ char *hl, gf_boolean_t limit_set);
+
+int
+cli_xml_output_peer_status(dict_t *dict, int op_ret, int op_errno,
char *op_errstr);
int
-cli_xml_output_vol_list (dict_t *dict, int op_ret, int op_errno,
+cli_xml_output_vol_rebalance(gf_cli_defrag_type op, dict_t *dict, int op_ret,
+ int op_errno, char *op_errstr);
+
+int
+cli_xml_output_vol_remove_brick(gf_boolean_t status_op, dict_t *dict,
+ int op_ret, int op_errno, char *op_errstr,
+ const char *op);
+
+int
+cli_xml_output_vol_replace_brick(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_create(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_generic_volume(char *op, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+int
+cli_xml_output_vol_gsync(dict_t *dict, int op_ret, int op_errno,
char *op_errstr);
+int
+cli_xml_output_vol_status_tasks_detail(cli_local_t *local, dict_t *dict);
int
-cli_xml_output_vol_info_begin (cli_local_t *local, int op_ret, int op_errno,
- char *op_errstr);
+cli_xml_snapshot_delete(cli_local_t *local, dict_t *dict, gf_cli_rsp *rsp);
int
-cli_xml_output_vol_info_end (cli_local_t *local);
+cli_xml_snapshot_begin_composite_op(cli_local_t *local);
int
-cli_xml_output_vol_info (cli_local_t *local, dict_t *dict);
+cli_xml_snapshot_end_composite_op(cli_local_t *local);
int
-cli_xml_output_vol_quota_limit_list (char *volname, char *limit_list,
- int op_ret, int op_errno,
- char *op_errstr);
-#endif
+cli_xml_output_snap_delete_begin(cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr);
+int
+cli_xml_output_snap_delete_end(cli_local_t *local);
+int
+cli_xml_output_snap_status_begin(cli_local_t *local, int op_ret, int op_errno,
+ char *op_errstr);
+int
+cli_xml_output_snap_status_end(cli_local_t *local);
+int
+cli_xml_output_snapshot(int cmd_type, dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+int
+cli_xml_snapshot_status_single_snap(cli_local_t *local, dict_t *dict,
+ char *key);
+int32_t
+cli_cmd_snapshot_parse(const char **words, int wordcount, dict_t **options,
+ struct cli_state *state);
+
+int
+cli_xml_output_vol_getopts(dict_t *dict, int op_ret, int op_errno,
+ char *op_errstr);
+
+void
+print_quota_list_header(int type);
+
+void
+print_quota_list_empty(char *path, int type);
+
+int
+gf_gsync_status_t_comparator(const void *p, const void *q);
#endif /* __CLI_H__ */
diff --git a/cli/src/input.c b/cli/src/input.c
index 004c9e300e5..5ac1a20edb1 100644
--- a/cli/src/input.c
+++ b/cli/src/input.c
@@ -1,107 +1,93 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <pthread.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "cli.h"
#include "cli-mem-types.h"
#define CMDBUFSIZ 1024
void *
-cli_batch (void *d)
+cli_batch(void *d)
{
- struct cli_state *state = NULL;
- int ret = 0;
+ struct cli_state *state = NULL;
+ int ret = 0;
- state = d;
+ state = d;
- ret = cli_cmd_process (state, state->argc, state->argv);
+ ret = cli_cmd_process(state, state->argc, state->argv);
- gf_log ("", GF_LOG_INFO, "Exiting with: %d", ret);
- exit (ret);
+ gf_log("", GF_LOG_INFO, "Exiting with: %d", ret);
+ exit(-ret);
- return NULL;
+ return NULL;
}
-
void *
-cli_input (void *d)
+cli_input(void *d)
{
- struct cli_state *state = NULL;
- int ret = 0;
- char cmdbuf[CMDBUFSIZ];
- char *cmd = NULL;
- size_t len = 0;
-
- state = d;
-
- for (;;) {
- printf ("%s", state->prompt);
-
- cmd = fgets (cmdbuf, CMDBUFSIZ, stdin);
- if (!cmd)
- break;
- len = strlen(cmd);
- if (len > 0 && cmd[len - 1] == '\n') //strip trailing \n
- cmd[len - 1] = '\0';
- ret = cli_cmd_process_line (state, cmd);
- if (ret != 0 && state->mode & GLUSTER_MODE_ERR_FATAL)
- break;
- }
-
- exit (-ret);
-
- return NULL;
+ struct cli_state *state = NULL;
+ int ret = 0;
+ char cmdbuf[CMDBUFSIZ];
+ char *cmd = NULL;
+ size_t len = 0;
+
+ state = d;
+
+ fprintf(stderr,
+ "Welcome to gluster prompt, type 'help' to see the available "
+ "commands.\n");
+ for (;;) {
+ printf("%s", state->prompt);
+
+ cmd = fgets(cmdbuf, CMDBUFSIZ, stdin);
+ if (!cmd)
+ break;
+ len = strlen(cmd);
+ if (len > 0 && cmd[len - 1] == '\n') // strip trailing \n
+ cmd[len - 1] = '\0';
+ ret = cli_cmd_process_line(state, cmd);
+ if (ret != 0 && state->mode & GLUSTER_MODE_ERR_FATAL)
+ break;
+ }
+
+ exit(-ret);
+
+ return NULL;
}
-
int
-cli_input_init (struct cli_state *state)
+cli_input_init(struct cli_state *state)
{
- int ret = 0;
+ int ret = 0;
- if (state->argc) {
- ret = pthread_create (&state->input, NULL, cli_batch, state);
- return ret;
- }
+ if (state->argc) {
+ ret = pthread_create(&state->input, NULL, cli_batch, state);
+ return ret;
+ }
- if (isatty (STDIN_FILENO)) {
- state->prompt = "gluster> ";
+ if (isatty(STDIN_FILENO)) {
+ state->prompt = "gluster> ";
- cli_rl_enable (state);
- } else {
- state->prompt = "";
- state->mode = GLUSTER_MODE_SCRIPT | GLUSTER_MODE_ERR_FATAL;
- }
+ cli_rl_enable(state);
+ } else {
+ state->prompt = "";
+ state->mode |= GLUSTER_MODE_SCRIPT | GLUSTER_MODE_ERR_FATAL;
+ }
- if (!state->rl_enabled)
- ret = pthread_create (&state->input, NULL, cli_input, state);
+ if (!state->rl_enabled)
+ ret = pthread_create(&state->input, NULL, cli_input, state);
- return ret;
+ return ret;
}
diff --git a/cli/src/registry.c b/cli/src/registry.c
index fc3b2decdf1..85f7686ade1 100644
--- a/cli/src/registry.c
+++ b/cli/src/registry.c
@@ -1,27 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
@@ -30,20 +15,18 @@
#include "cli.h"
#include "cli-cmd.h"
-
static int
-__is_spc (int ch)
+__is_spc(int ch)
{
- if (ch == ' ')
- return 1;
- return 0;
+ if (ch == ' ')
+ return 1;
+ return 0;
}
-
static int
-__is_div (int ch)
+__is_div(int ch)
{
- switch (ch) {
+ switch (ch) {
case '(':
case ')':
case '<':
@@ -53,369 +36,356 @@ __is_div (int ch)
case '{':
case '}':
case '|':
- return 1;
- }
+ return 1;
+ }
- return 0;
+ return 0;
}
-
static int
-__is_word (const char *word)
+__is_word(const char *word)
{
- return (!__is_div (*word) && !__is_spc (*word));
+ return (!__is_div(*word) && !__is_spc(*word));
}
-
int
-counter_char (int ch)
+counter_char(int ch)
{
- switch (ch) {
+ switch (ch) {
case '(':
- return ')';
+ return ')';
case '<':
- return '>';
+ return '>';
case '[':
- return ']';
+ return ']';
case '{':
- return '}';
- }
+ return '}';
+ }
- return -1;
+ return -1;
}
-
const char *
-__is_template_balanced (const char *template)
+__is_template_balanced(const char *template)
{
- const char *trav = NULL;
- int ch = 0;
-
- trav = template;
-
- while (*trav) {
- ch = *trav;
-
- switch (ch) {
- case '<':
- case '(':
- case '[':
- trav = __is_template_balanced (trav+1);
- if (!trav)
- return NULL;
- if (*trav != counter_char (ch))
- return NULL;
- break;
- case '>':
- case ')':
- case ']':
- return trav;
- }
+ const char *trav = NULL;
+ int ch = 0;
- trav++;
+ trav = template;
+
+ while (*trav) {
+ ch = *trav;
+
+ switch (ch) {
+ case '<':
+ case '(':
+ case '[':
+ trav = __is_template_balanced(trav + 1);
+ if (!trav)
+ return NULL;
+ if (*trav != counter_char(ch))
+ return NULL;
+ break;
+ case '>':
+ case ')':
+ case ']':
+ return trav;
}
- return trav;
-}
+ trav++;
+ }
+ return trav;
+}
int
-is_template_balanced (const char *template)
+is_template_balanced(const char *template)
{
- const char *trav = NULL;
+ const char *trav = NULL;
- trav = __is_template_balanced (template);
- if (!trav || *trav)
- return -1;
+ trav = __is_template_balanced(template);
+ if (!trav || *trav)
+ return -1;
- return 0;
+ return 0;
}
-
int
-cli_cmd_token_count (const char *template)
+cli_cmd_token_count(const char *template)
{
- int count = 0;
- const char *trav = NULL;
- int is_alnum = 0;
-
- for (trav = template; *trav; trav++) {
- switch (*trav) {
- case '<':
- case '>':
- case '(':
- case ')':
- case '[':
- case ']':
- case '{':
- case '}':
- case '|':
- count++;
- /* fall through */
- case ' ':
- is_alnum = 0;
- break;
- default:
- if (!is_alnum) {
- is_alnum = 1;
- count++;
- }
+ int count = 0;
+ const char *trav = NULL;
+ int is_alnum = 0;
+
+ for (trav = template; *trav; trav++) {
+ switch (*trav) {
+ case '<':
+ case '>':
+ case '(':
+ case ')':
+ case '[':
+ case ']':
+ case '{':
+ case '}':
+ case '|':
+ count++;
+ /* fall through */
+ case ' ':
+ is_alnum = 0;
+ break;
+ default:
+ if (!is_alnum) {
+ is_alnum = 1;
+ count++;
}
}
+ }
- return count + 1;
+ return count + 1;
}
-
void
-cli_cmd_tokens_destroy (char **tokens)
+cli_cmd_tokens_destroy(char **tokens)
{
- char **tokenp = NULL;
+ char **tokenp = NULL;
- if (!tokens)
- return;
+ if (!tokens)
+ return;
- tokenp = tokens;
- while (*tokenp) {
- free (*tokenp);
- tokenp++;
- }
+ tokenp = tokens;
+ while (*tokenp) {
+ free(*tokenp);
+ tokenp++;
+ }
- free (tokens);
+ free(tokens);
}
-
int
-cli_cmd_tokens_fill (char **tokens, const char *template)
+cli_cmd_tokens_fill(char **tokens, const char *template)
{
- const char *trav = NULL;
- char **tokenp = NULL;
- char *token = NULL;
- int ret = 0;
- int ch = 0;
+ const char *trav = NULL;
+ char **tokenp = NULL;
+ char *token = NULL;
+ int ret = 0;
+ int ch = 0;
- tokenp = tokens;
+ tokenp = tokens;
- for (trav = template; *trav; trav++) {
- ch = *trav;
+ for (trav = template; *trav; trav++) {
+ ch = *trav;
- if (__is_spc (ch))
- continue;
+ if (__is_spc(ch))
+ continue;
- if (__is_div (ch)) {
- token = calloc (2, 1);
- if (!token)
- return -1;
- token[0] = ch;
+ if (__is_div(ch)) {
+ token = calloc(2, 1);
+ if (!token)
+ return -1;
+ token[0] = ch;
- *tokenp = token;
- tokenp++;
+ *tokenp = token;
+ tokenp++;
- continue;
- }
+ continue;
+ }
- token = strdup (trav);
- *tokenp = token;
- tokenp++;
+ token = strdup(trav);
+ *tokenp = token;
+ tokenp++;
- for (token++; *token; token++) {
- if (__is_spc (*token) || __is_div (*token)) {
- *token = 0;
- break;
- }
- trav++;
- }
+ for (token++; *token; token++) {
+ if (__is_spc(*token) || __is_div(*token)) {
+ *token = 0;
+ break;
+ }
+ trav++;
}
+ }
- return ret;
+ return ret;
}
-
char **
-cli_cmd_tokenize (const char *template)
+cli_cmd_tokenize(const char *template)
{
- char **tokens = NULL;
- int ret = 0;
- int count = 0;
+ char **tokens = NULL;
+ int ret = 0;
+ int count = 0;
- ret = is_template_balanced (template);
- if (ret)
- return NULL;
+ ret = is_template_balanced(template);
+ if (ret)
+ return NULL;
- count = cli_cmd_token_count (template);
- if (count <= 0)
- return NULL;
+ count = cli_cmd_token_count(template);
+ if (count <= 0)
+ return NULL;
- tokens = calloc (count + 1, sizeof (char *));
- if (!tokens)
- return NULL;
+ tokens = calloc(count + 1, sizeof(char *));
+ if (!tokens)
+ return NULL;
- ret = cli_cmd_tokens_fill (tokens, template);
- if (ret)
- goto err;
+ ret = cli_cmd_tokens_fill(tokens, template);
+ if (ret)
+ goto err;
- return tokens;
+ return tokens;
err:
- cli_cmd_tokens_destroy (tokens);
- return NULL;
+ cli_cmd_tokens_destroy(tokens);
+ return NULL;
}
void *
-cli_getunamb (const char *tok, void **choices, cli_selector_t sel)
+cli_getunamb(const char *tok, void **choices, cli_selector_t sel)
{
- void **wcon = NULL;
- char *w = NULL;
- unsigned mn = 0;
- void *ret = NULL;
-
- if (!choices || !tok || !*tok)
- return NULL;
-
- for (wcon = choices; *wcon; wcon++) {
- w = strtail ((char *)sel (*wcon), tok);
- if (!w)
- /* no match */
- continue;
- if (!*w)
- /* exact match */
- return *wcon;
-
- ret = *wcon;
- mn++;
- }
+ void **wcon = NULL;
+ char *w = NULL;
+ unsigned mn = 0;
+ void *ret = NULL;
-#ifdef FORCE_MATCH_EXACT
+ if (!choices || !tok || !*tok)
return NULL;
+
+ for (wcon = choices; *wcon; wcon++) {
+ w = strtail((char *)sel(*wcon), tok);
+ if (!w)
+ /* no match */
+ continue;
+ if (!*w)
+ /* exact match */
+ return *wcon;
+
+ ret = *wcon;
+ mn++;
+ }
+
+#ifdef FORCE_MATCH_EXACT
+ return NULL;
#else
- return (mn == 1) ? ret : NULL;
+ return (mn == 1) ? ret : NULL;
#endif
}
static const char *
-sel_cmd_word (void *wcon)
+sel_cmd_word(void *wcon)
{
- return ((struct cli_cmd_word *)wcon)->word;
+ return ((struct cli_cmd_word *)wcon)->word;
}
struct cli_cmd_word *
-cli_cmd_nextword (struct cli_cmd_word *word, const char *token)
+cli_cmd_nextword(struct cli_cmd_word *word, const char *token)
{
- return (struct cli_cmd_word *)cli_getunamb (token,
- (void **)word->nextwords,
- sel_cmd_word);
+ return (struct cli_cmd_word *)cli_getunamb(token, (void **)word->nextwords,
+ sel_cmd_word);
}
-
struct cli_cmd_word *
-cli_cmd_newword (struct cli_cmd_word *word, const char *token)
+cli_cmd_newword(struct cli_cmd_word *word, const char *token)
{
- struct cli_cmd_word **nextwords = NULL;
- struct cli_cmd_word *nextword = NULL;
+ struct cli_cmd_word **nextwords = NULL;
+ struct cli_cmd_word *nextword = NULL;
- nextwords = realloc (word->nextwords,
- (word->nextwords_cnt + 2) * sizeof (*nextwords));
- if (!nextwords)
- return NULL;
+ nextwords = realloc(word->nextwords,
+ (word->nextwords_cnt + 2) * sizeof(*nextwords));
+ if (!nextwords)
+ return NULL;
- word->nextwords = nextwords;
+ word->nextwords = nextwords;
- nextword = calloc (1, sizeof (*nextword));
- if (!nextword)
- return NULL;
+ nextword = calloc(1, sizeof(*nextword));
+ if (!nextword)
+ return NULL;
- nextword->word = strdup (token);
- if (!nextword->word) {
- free (nextword);
- return NULL;
- }
+ nextword->word = strdup(token);
+ if (!nextword->word) {
+ free(nextword);
+ return NULL;
+ }
- nextword->tree = word->tree;
- nextwords[word->nextwords_cnt++] = nextword;
- nextwords[word->nextwords_cnt] = NULL;
+ nextword->tree = word->tree;
+ nextwords[word->nextwords_cnt++] = nextword;
+ nextwords[word->nextwords_cnt] = NULL;
- return nextword;
+ return nextword;
}
-
int
-cli_cmd_ingest (struct cli_cmd_tree *tree, char **tokens, cli_cmd_cbk_t *cbkfn,
- const char *desc, const char *pattern)
+cli_cmd_ingest(struct cli_cmd_tree *tree, char **tokens, cli_cmd_cbk_t *cbkfn,
+ const char *desc, const char *pattern)
{
- int ret = 0;
- char **tokenp = NULL;
- char *token = NULL;
- struct cli_cmd_word *word = NULL;
- struct cli_cmd_word *next = NULL;
+ int ret = 0;
+ char **tokenp = NULL;
+ char *token = NULL;
+ struct cli_cmd_word *word = NULL;
+ struct cli_cmd_word *next = NULL;
- word = &tree->root;
+ word = &tree->root;
- for (tokenp = tokens; (token = *tokenp); tokenp++) {
- if (!__is_word (token))
- break;
+ for (tokenp = tokens; (token = *tokenp); tokenp++) {
+ if (!__is_word(token))
+ break;
- next = cli_cmd_nextword (word, token);
- if (!next)
- next = cli_cmd_newword (word, token);
-
- word = next;
- if (!word)
- break;
- }
+ next = cli_cmd_nextword(word, token);
+ if (!next)
+ next = cli_cmd_newword(word, token);
+ word = next;
if (!word)
- return -1;
+ break;
+ }
- if (word->cbkfn) {
- /* warning - command already registered */
- }
+ if (!word)
+ return -1;
- word->cbkfn = cbkfn;
- word->desc = desc;
- word->pattern = pattern;
+ if (word->cbkfn) {
+ /* warning - command already registered */
+ }
- /* end of static strings in command template */
+ word->cbkfn = cbkfn;
+ word->desc = desc;
+ word->pattern = pattern;
- /* TODO: autocompletion beyond this point is just "nice to have" */
+ /* end of static strings in command template */
- return ret;
-}
+ /* TODO: autocompletion beyond this point is just "nice to have" */
+ return ret;
+}
int
-cli_cmd_register (struct cli_cmd_tree *tree, struct cli_cmd *cmd)
+cli_cmd_register(struct cli_cmd_tree *tree, struct cli_cmd *cmd)
{
- char **tokens = NULL;
- int ret = 0;
+ char **tokens = NULL;
+ int ret = 0;
- GF_ASSERT (cmd);
+ GF_ASSERT(cmd);
- if (cmd->reg_cbk)
- cmd->reg_cbk (cmd);
+ if (cmd->reg_cbk)
+ cmd->reg_cbk(cmd);
- if (cmd->disable) {
- ret = 0;
- goto out;
- }
+ if (cmd->disable) {
+ ret = 0;
+ goto out;
+ }
- tokens = cli_cmd_tokenize (cmd->pattern);
- if (!tokens) {
- ret = -1;
- goto out;
- }
+ tokens = cli_cmd_tokenize(cmd->pattern);
+ if (!tokens) {
+ ret = -1;
+ goto out;
+ }
- ret = cli_cmd_ingest (tree, tokens, cmd->cbk, cmd->desc, cmd->pattern);
- if (ret) {
- ret = -1;
- goto out;
- }
+ ret = cli_cmd_ingest(tree, tokens, cmd->cbk, cmd->desc, cmd->pattern);
+ if (ret) {
+ ret = -1;
+ goto out;
+ }
- ret = 0;
+ ret = 0;
out:
- if (tokens)
- cli_cmd_tokens_destroy (tokens);
+ if (tokens)
+ cli_cmd_tokens_destroy(tokens);
- gf_log ("cli", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_log("cli", GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
-
diff --git a/configure.ac b/configure.ac
index 2d3136a9d16..e2d6fd66cec 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,153 +1,455 @@
-dnl Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
-
-dnl This file is part of GlusterFS.
-dnl
-dnl GlusterFS is free software; you can redistribute it and/or modify
-dnl it under the terms of the GNU General Public License as published by
-dnl the Free Software Foundation; either version 3 of the License, or
-dnl (at your option) any later version.
+dnl Copyright (c) 2006-2016 Red Hat, Inc. <http://www.redhat.com>
+dnl This file is part of GlusterFS.
dnl
-dnl GlusterFS is distributed in the hope that it will be useful,
-dnl but WITHOUT ANY WARRANTY; without even the implied warranty of
-dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-dnl GNU General Public License for more details.
-dnl
-dnl You should have received a copy of the GNU General Public License
-dnl along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-AC_INIT([glusterfs],[3git],[gluster-users@gluster.org],,[https://github.com/gluster/glusterfs.git])
+dnl This file is licensed to you under your choice of the GNU Lesser
+dnl General Public License, version 3 or any later version (LGPLv3 or
+dnl later), or the GNU General Public License, version 2 (GPLv2), in all
+dnl cases as published by the Free Software Foundation.
-AM_INIT_AUTOMAKE
+AC_INIT([glusterfs],
+ [m4_esyscmd([build-aux/pkg-version --version])],
+ [gluster-users@gluster.org],,[https://github.com/gluster/glusterfs.git])
-m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)])
+AC_SUBST([PACKAGE_RELEASE],
+ [m4_esyscmd([build-aux/pkg-version --release])])
-if make --help 2>&1 | grep -q no-print-directory; then
- AM_MAKEFLAGS="$AM_MAKEFLAGS --no-print-directory";
-fi
+AM_INIT_AUTOMAKE([tar-pax foreign])
-if make --help 2>&1 | grep -q quiet; then
- AM_MAKEFLAGS="$AM_MAKEFLAGS --quiet"
-fi
+# Removes warnings when using automake 1.14 around (...but option 'subdir-objects' is disabled )
+#but libglusterfs fails to build with contrib (Then are not set up that way?)
+#AM_INIT_AUTOMAKE([subdir-objects])
-if libtool --help 2>&1 | grep -q quiet; then
- AM_LIBTOOLFLAGS="--quiet";
-fi
+m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES(yes)])
-AM_CONFIG_HEADER([config.h])
+AC_CONFIG_HEADERS([config.h site.h])
AC_CONFIG_FILES([Makefile
- libglusterfs/Makefile
- libglusterfs/src/Makefile
- glusterfsd/Makefile
- glusterfsd/src/Makefile
+ libglusterfs/Makefile
+ libglusterfs/src/Makefile
+ libglusterd/Makefile
+ libglusterd/src/Makefile
+ geo-replication/src/peer_gsec_create
+ geo-replication/src/peer_mountbroker
+ geo-replication/src/peer_mountbroker.py
+ geo-replication/src/peer_georep-sshkey.py
+ extras/peer_add_secret_pub
+ geo-replication/syncdaemon/conf.py
+ geo-replication/gsyncd.conf
+ extras/snap_scheduler/conf.py
+ glusterfsd/Makefile
+ glusterfsd/src/Makefile
rpc/Makefile
rpc/rpc-lib/Makefile
rpc/rpc-lib/src/Makefile
rpc/rpc-transport/Makefile
rpc/rpc-transport/socket/Makefile
rpc/rpc-transport/socket/src/Makefile
- rpc/rpc-transport/rdma/Makefile
- rpc/rpc-transport/rdma/src/Makefile
rpc/xdr/Makefile
rpc/xdr/src/Makefile
- xlators/Makefile
- xlators/mount/Makefile
- xlators/mount/fuse/Makefile
- xlators/mount/fuse/src/Makefile
- xlators/mount/fuse/utils/mount.glusterfs
- xlators/mount/fuse/utils/mount_glusterfs
- xlators/mount/fuse/utils/Makefile
- xlators/storage/Makefile
- xlators/storage/posix/Makefile
- xlators/storage/posix/src/Makefile
- xlators/cluster/Makefile
- xlators/cluster/afr/Makefile
- xlators/cluster/afr/src/Makefile
- xlators/cluster/stripe/Makefile
- xlators/cluster/stripe/src/Makefile
- xlators/cluster/dht/Makefile
- xlators/cluster/dht/src/Makefile
- xlators/performance/Makefile
- xlators/performance/write-behind/Makefile
- xlators/performance/write-behind/src/Makefile
- xlators/performance/read-ahead/Makefile
- xlators/performance/read-ahead/src/Makefile
- xlators/performance/io-threads/Makefile
- xlators/performance/io-threads/src/Makefile
- xlators/performance/io-cache/Makefile
- xlators/performance/io-cache/src/Makefile
- xlators/performance/symlink-cache/Makefile
- xlators/performance/symlink-cache/src/Makefile
- xlators/performance/quick-read/Makefile
- xlators/performance/quick-read/src/Makefile
+ xlators/Makefile
+ xlators/meta/Makefile
+ xlators/meta/src/Makefile
+ xlators/mount/Makefile
+ xlators/mount/fuse/Makefile
+ xlators/mount/fuse/src/Makefile
+ xlators/mount/fuse/utils/mount.glusterfs
+ xlators/mount/fuse/utils/mount_glusterfs
+ xlators/mount/fuse/utils/Makefile
+ xlators/storage/Makefile
+ xlators/storage/posix/Makefile
+ xlators/storage/posix/src/Makefile
+ xlators/cluster/Makefile
+ xlators/cluster/afr/Makefile
+ xlators/cluster/afr/src/Makefile
+ xlators/cluster/dht/Makefile
+ xlators/cluster/dht/src/Makefile
+ xlators/cluster/ec/Makefile
+ xlators/cluster/ec/src/Makefile
+ xlators/performance/Makefile
+ xlators/performance/write-behind/Makefile
+ xlators/performance/write-behind/src/Makefile
+ xlators/performance/read-ahead/Makefile
+ xlators/performance/read-ahead/src/Makefile
+ xlators/performance/readdir-ahead/Makefile
+ xlators/performance/readdir-ahead/src/Makefile
+ xlators/performance/io-threads/Makefile
+ xlators/performance/io-threads/src/Makefile
+ xlators/performance/io-cache/Makefile
+ xlators/performance/io-cache/src/Makefile
+ xlators/performance/quick-read/Makefile
+ xlators/performance/quick-read/src/Makefile
+ xlators/performance/open-behind/Makefile
+ xlators/performance/open-behind/src/Makefile
xlators/performance/md-cache/Makefile
xlators/performance/md-cache/src/Makefile
- xlators/debug/Makefile
- xlators/debug/trace/Makefile
- xlators/debug/trace/src/Makefile
- xlators/debug/error-gen/Makefile
- xlators/debug/error-gen/src/Makefile
- xlators/debug/io-stats/Makefile
- xlators/debug/io-stats/src/Makefile
- xlators/protocol/Makefile
- xlators/protocol/auth/Makefile
- xlators/protocol/auth/addr/Makefile
- xlators/protocol/auth/addr/src/Makefile
- xlators/protocol/auth/login/Makefile
- xlators/protocol/auth/login/src/Makefile
- xlators/protocol/client/Makefile
- xlators/protocol/client/src/Makefile
- xlators/protocol/server/Makefile
- xlators/protocol/server/src/Makefile
- xlators/features/Makefile
- xlators/features/locks/Makefile
- xlators/features/locks/src/Makefile
- xlators/features/quota/Makefile
- xlators/features/quota/src/Makefile
+ xlators/performance/nl-cache/Makefile
+ xlators/performance/nl-cache/src/Makefile
+ xlators/debug/Makefile
+ xlators/debug/sink/Makefile
+ xlators/debug/sink/src/Makefile
+ xlators/debug/trace/Makefile
+ xlators/debug/trace/src/Makefile
+ xlators/debug/error-gen/Makefile
+ xlators/debug/error-gen/src/Makefile
+ xlators/debug/delay-gen/Makefile
+ xlators/debug/delay-gen/src/Makefile
+ xlators/debug/io-stats/Makefile
+ xlators/debug/io-stats/src/Makefile
+ xlators/protocol/Makefile
+ xlators/protocol/auth/Makefile
+ xlators/protocol/auth/addr/Makefile
+ xlators/protocol/auth/addr/src/Makefile
+ xlators/protocol/auth/login/Makefile
+ xlators/protocol/auth/login/src/Makefile
+ xlators/protocol/client/Makefile
+ xlators/protocol/client/src/Makefile
+ xlators/protocol/server/Makefile
+ xlators/protocol/server/src/Makefile
+ xlators/features/Makefile
+ xlators/features/arbiter/Makefile
+ xlators/features/arbiter/src/Makefile
+ xlators/features/thin-arbiter/Makefile
+ xlators/features/thin-arbiter/src/Makefile
+ xlators/features/changelog/Makefile
+ xlators/features/changelog/src/Makefile
+ xlators/features/changelog/lib/Makefile
+ xlators/features/changelog/lib/src/Makefile
+ xlators/features/locks/Makefile
+ xlators/features/locks/src/Makefile
+ xlators/features/quota/Makefile
+ xlators/features/quota/src/Makefile
xlators/features/marker/Makefile
xlators/features/marker/src/Makefile
- xlators/features/marker/utils/Makefile
- xlators/features/marker/utils/src/Makefile
- xlators/features/marker/utils/syncdaemon/Makefile
- xlators/features/read-only/Makefile
- xlators/features/read-only/src/Makefile
- xlators/features/mac-compat/Makefile
- xlators/features/mac-compat/src/Makefile
- xlators/features/quiesce/Makefile
- xlators/features/quiesce/src/Makefile
+ xlators/features/selinux/Makefile
+ xlators/features/selinux/src/Makefile
+ xlators/features/sdfs/Makefile
+ xlators/features/sdfs/src/Makefile
+ xlators/features/read-only/Makefile
+ xlators/features/read-only/src/Makefile
+ xlators/features/compress/Makefile
+ xlators/features/compress/src/Makefile
+ xlators/features/namespace/Makefile
+ xlators/features/namespace/src/Makefile
+ xlators/features/quiesce/Makefile
+ xlators/features/quiesce/src/Makefile
+ xlators/features/barrier/Makefile
+ xlators/features/barrier/src/Makefile
xlators/features/index/Makefile
xlators/features/index/src/Makefile
- xlators/encryption/Makefile
- xlators/encryption/rot-13/Makefile
- xlators/encryption/rot-13/src/Makefile
+ xlators/features/gfid-access/Makefile
+ xlators/features/gfid-access/src/Makefile
+ xlators/features/trash/Makefile
+ xlators/features/trash/src/Makefile
+ xlators/features/snapview-server/Makefile
+ xlators/features/snapview-server/src/Makefile
+ xlators/features/snapview-client/Makefile
+ xlators/features/snapview-client/src/Makefile
+ xlators/features/upcall/Makefile
+ xlators/features/upcall/src/Makefile
+ xlators/features/shard/Makefile
+ xlators/features/shard/src/Makefile
+ xlators/features/bit-rot/Makefile
+ xlators/features/bit-rot/src/Makefile
+ xlators/features/bit-rot/src/stub/Makefile
+ xlators/features/bit-rot/src/bitd/Makefile
+ xlators/features/leases/Makefile
+ xlators/features/leases/src/Makefile
+ xlators/features/cloudsync/Makefile
+ xlators/features/cloudsync/src/Makefile
+ xlators/features/utime/Makefile
+ xlators/features/utime/src/Makefile
+ xlators/features/cloudsync/src/cloudsync-plugins/Makefile
+ xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile
+ xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/Makefile
+ xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile
+ xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile
+ xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile
+ xlators/features/metadisp/Makefile
+ xlators/features/metadisp/src/Makefile
+ xlators/playground/Makefile
+ xlators/playground/template/Makefile
+ xlators/playground/template/src/Makefile
xlators/system/Makefile
xlators/system/posix-acl/Makefile
xlators/system/posix-acl/src/Makefile
- cli/Makefile
- cli/src/Makefile
- doc/Makefile
- extras/Makefile
- extras/init.d/Makefile
- extras/init.d/glusterd.plist
- extras/init.d/glusterd-Debian
- extras/init.d/glusterd-Redhat
- extras/init.d/glusterd-SuSE
- extras/benchmarking/Makefile
- extras/hook-scripts/Makefile
- contrib/fuse-util/Makefile
- contrib/uuid/uuid_types.h
xlators/nfs/Makefile
xlators/nfs/server/Makefile
xlators/nfs/server/src/Makefile
xlators/mgmt/Makefile
xlators/mgmt/glusterd/Makefile
xlators/mgmt/glusterd/src/Makefile
- glusterfs.spec])
+ cli/Makefile
+ cli/src/Makefile
+ doc/Makefile
+ extras/Makefile
+ extras/glusterd.vol
+ extras/cliutils/Makefile
+ extras/init.d/Makefile
+ extras/init.d/glusterd.plist
+ extras/init.d/glusterd-Debian
+ extras/init.d/glusterd-Redhat
+ extras/init.d/glusterd-FreeBSD
+ extras/init.d/glusterd-SuSE
+ extras/init.d/glustereventsd-Debian
+ extras/init.d/glustereventsd-Redhat
+ extras/init.d/glustereventsd-FreeBSD
+ extras/ganesha/Makefile
+ extras/ganesha/config/Makefile
+ extras/ganesha/scripts/Makefile
+ extras/ganesha/ocf/Makefile
+ extras/systemd/Makefile
+ extras/systemd/glusterd.service
+ extras/systemd/glustereventsd.service
+ extras/systemd/glusterfssharedstorage.service
+ extras/systemd/gluster-ta-volume.service
+ extras/run-gluster.tmpfiles
+ extras/benchmarking/Makefile
+ extras/hook-scripts/Makefile
+ extras/ocf/Makefile
+ extras/ocf/glusterd
+ extras/ocf/volume
+ extras/LinuxRPM/Makefile
+ extras/geo-rep/Makefile
+ extras/geo-rep/schedule_georep.py
+ extras/firewalld/Makefile
+ extras/hook-scripts/add-brick/Makefile
+ extras/hook-scripts/add-brick/pre/Makefile
+ extras/hook-scripts/add-brick/post/Makefile
+ extras/hook-scripts/create/Makefile
+ extras/hook-scripts/create/post/Makefile
+ extras/hook-scripts/delete/Makefile
+ extras/hook-scripts/delete/pre/Makefile
+ extras/hook-scripts/start/Makefile
+ extras/hook-scripts/start/post/Makefile
+ extras/hook-scripts/set/Makefile
+ extras/hook-scripts/set/post/Makefile
+ extras/hook-scripts/stop/Makefile
+ extras/hook-scripts/stop/pre/Makefile
+ extras/hook-scripts/reset/Makefile
+ extras/hook-scripts/reset/post/Makefile
+ extras/hook-scripts/reset/pre/Makefile
+ extras/python/Makefile
+ extras/snap_scheduler/Makefile
+ events/Makefile
+ events/src/Makefile
+ events/src/eventsapiconf.py
+ events/tools/Makefile
+ contrib/fuse-util/Makefile
+ contrib/umountd/Makefile
+ glusterfs-api.pc
+ libgfchangelog.pc
+ api/Makefile
+ api/src/Makefile
+ api/examples/Makefile
+ geo-replication/Makefile
+ geo-replication/src/Makefile
+ geo-replication/syncdaemon/Makefile
+ tools/Makefile
+ tools/gfind_missing_files/Makefile
+ heal/Makefile
+ heal/src/Makefile
+ glusterfs.spec
+ tools/glusterfind/src/tool.conf
+ tools/glusterfind/glusterfind
+ tools/glusterfind/Makefile
+ tools/glusterfind/src/Makefile
+ tools/setgfid2path/Makefile
+ tools/setgfid2path/src/Makefile])
AC_CANONICAL_HOST
AC_PROG_CC
+AC_DISABLE_STATIC
AC_PROG_LIBTOOL
+AC_SUBST([shrext_cmds])
+
+AC_CHECK_PROG([RPCGEN], [rpcgen], [yes], [no])
+
+if test "x$RPCGEN" = "xno"; then
+ AC_MSG_ERROR([`rpcgen` not found, glusterfs needs `rpcgen` exiting..])
+fi
+
+# Initialize CFLAGS before usage
+AC_ARG_ENABLE([debug],
+ AC_HELP_STRING([--enable-debug],
+ [Enable debug build options.]))
+if test "x$enable_debug" = "xyes"; then
+ BUILD_DEBUG=yes
+ GF_CFLAGS="${GF_CFLAGS} -g -O0 -DDEBUG"
+else
+ BUILD_DEBUG=no
+fi
+
+SANITIZER=none
+
+AC_ARG_ENABLE([asan],
+ AC_HELP_STRING([--enable-asan],
+ [Enable Address Sanitizer support]))
+if test "x$enable_asan" = "xyes"; then
+ SANITIZER=asan
+ AC_CHECK_LIB([asan], [__asan_init], ,
+ [AC_MSG_ERROR([--enable-asan requires libasan.so, exiting])])
+ GF_CFLAGS="${GF_CFLAGS} -O2 -g -fsanitize=address -fno-omit-frame-pointer"
+ GF_LDFLAGS="${GF_LDFLAGS} -lasan"
+fi
+
+AC_ARG_ENABLE([tsan],
+ AC_HELP_STRING([--enable-tsan],
+ [Enable Thread Sanitizer support]))
+if test "x$enable_tsan" = "xyes"; then
+ if test "x$SANITIZER" != "xnone"; then
+ AC_MSG_ERROR([only one sanitizer can be enabled at once])
+ fi
+ SANITIZER=tsan
+ AC_CHECK_LIB([tsan], [__tsan_init], ,
+ [AC_MSG_ERROR([--enable-tsan requires libtsan.so, exiting])])
+ if test "x$ac_cv_lib_tsan___tsan_init" = xyes; then
+ AC_MSG_CHECKING([whether tsan API can be used])
+ saved_CFLAGS=${CFLAGS}
+ CFLAGS="${CFLAGS} -fsanitize=thread"
+ AC_COMPILE_IFELSE(
+ [AC_LANG_PROGRAM([
+ [#include <sanitizer/tsan_interface.h>]],
+ [[__tsan_create_fiber(0)]])],
+ [TSAN_API=yes], [TSAN_API=no])
+ AC_MSG_RESULT([$TSAN_API])
+ if test x$TSAN_API = "xyes"; then
+ AC_DEFINE(HAVE_TSAN_API, 1, [Define if tsan API can be used.])
+ fi
+ CFLAGS=${saved_CFLAGS}
+ fi
+ GF_CFLAGS="${GF_CFLAGS} -O2 -g -fsanitize=thread -fno-omit-frame-pointer"
+ GF_LDFLAGS="${GF_LDFLAGS} -ltsan"
+fi
+
+AC_ARG_ENABLE([ubsan],
+ AC_HELP_STRING([--enable-ubsan],
+ [Enable Undefined Behavior Sanitizer support]))
+if test "x$enable_ubsan" = "xyes"; then
+ if test "x$SANITIZER" != "xnone"; then
+ AC_MSG_ERROR([only one sanitizer can be enabled at once])
+ fi
+ SANITIZER=ubsan
+ AC_CHECK_LIB([ubsan], [__ubsan_default_options], ,
+ [AC_MSG_ERROR([--enable-ubsan requires libubsan.so, exiting])])
+ GF_CFLAGS="${GF_CFLAGS} -O2 -g -fsanitize=undefined -fno-omit-frame-pointer"
+ GF_LDFLAGS="${GF_LDFLAGS} -lubsan"
+fi
+
+# Initialize CFLAGS before usage
+BUILD_TCMALLOC=no
+AC_ARG_ENABLE([tcmalloc],
+ AC_HELP_STRING([--enable-tcmalloc],
+ [Enable linking with tcmalloc library.]))
+if test "x$enable_tcmalloc" = "xyes"; then
+ BUILD_TCMALLOC=yes
+ GF_CFLAGS="${GF_CFLAGS} -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-free"
+ AC_CHECK_LIB([tcmalloc], [malloc], [],
+ [AC_MSG_ERROR([when --enable-tcmalloc is used, tcmalloc library needs to be present])])
+ GF_LDFLAGS="-ltcmalloc ${GF_LDFLAGS}"
+fi
+
+
+dnl When possible, prefer libtirpc over glibc rpc.
+dnl
+dnl On newer linux with only libtirpc, use libtirpc. (Specifying
+dnl --without-libtirpc is an error.)
+dnl
+dnl on older linux with glibc rpc and WITH libtirpc, use libtirpc
+dnl by default except when configured with --without-libtirpc.
+dnl
+dnl on old linux with glibc rpc and WITHOUT libtirpc, default to
+dnl use glibc rpc.
+dnl
+AC_ARG_WITH([libtirpc],
+ [AC_HELP_STRING([--without-libtirpc], [Use legacy glibc RPC.])],
+ [with_libtirpc="no"], [with_libtirpc="yes"])
+
+dnl ipv6-default is off by default
+dnl
+dnl ipv6-default requires libtirpc. (glibc rpc does not support IPv6.)
+dnl ipv6-default can only be enabled if libtipc is enabled.
+dnl
+AC_ARG_WITH([ipv6-default],
+ AC_HELP_STRING([--with-ipv6-default], [Set IPv6 as default.]),
+ [with_ipv6_default=${with_libtirpc}], [with_ipv6_default="no"])
+
+AC_CHECK_FILE([/etc/centos-release])
+if test "x$ac_cv_file__etc_centos_release" = "xyes"; then
+ if grep "release 6" /etc/centos-release; then
+ with_ipv6_default="no"
+ fi
+fi
+
+dnl On some distributions '-ldl' isn't automatically added to LIBS
+AC_CHECK_LIB([dl], [dlopen], [LIB_DL=-ldl])
+AC_SUBST(LIB_DL)
+
+AC_ARG_ENABLE([privport_tracking],
+ AC_HELP_STRING([--disable-privport_tracking],
+ [Disable internal tracking of privileged ports.]))
+TRACK_PRIVPORTS="yes"
+if test x"$enable_privport_tracking" = x"no"; then
+ TRACK_PRIVPORTS="no"
+ AC_DEFINE(GF_DISABLE_PRIVPORT_TRACKING, 1,
+ [Disable internal tracking of privileged ports.])
+fi
+
+case $host_os in
+ darwin*)
+ if ! test "`/usr/bin/sw_vers | grep ProductVersion: | cut -f 2 | cut -d. -f2`" -ge 7; then
+ AC_MSG_ERROR([You need at least OS X 10.7 (Lion) to build Glusterfs])
+ fi
+ # OSX version lesser than 9 has llvm/clang optimization issues which leads to various segfaults
+ if test "`/usr/bin/sw_vers | grep ProductVersion: | cut -f 2 | cut -d. -f2`" -lt 9; then
+ GF_CFLAGS="${GF_CFLAGS} -g -O0 -DDEBUG"
+ fi
+ ;;
+esac
+
+# --enable-valgrind prevents calling dlclose(), this leaks memory
+AC_ARG_ENABLE([valgrind],
+ AC_HELP_STRING([--enable-valgrind@<:@=memcheck,drd@:>@],
+ [Enable valgrind for resource leak (memcheck, which is
+ the default) or thread synchronization (drd) debugging.]))
+case x$enable_valgrind in
+ xmemcheck|xyes)
+ AC_DEFINE(RUN_WITH_MEMCHECK, 1,
+ [Define if all processes should run under 'valgrind --tool=memcheck'.])
+ VALGRIND_TOOL=memcheck
+ ;;
+ xdrd)
+ AC_DEFINE(RUN_WITH_DRD, 1,
+ [Define if all processes should run under 'valgrind --tool=drd'.])
+ VALGRIND_TOOL=drd
+ ;;
+ x|xno)
+ VALGRIND_TOOL=no
+ ;;
+ *)
+ AC_MSG_ERROR([Please specify --enable-valgrind@<:@=memcheck,drd@:>@])
+ ;;
+esac
+
+AC_ARG_WITH([previous-options],
+ [AS_HELP_STRING([--with-previous-options],
+ [read config.status for configure options])
+ ],
+ [ if test -r ./config.status && \
+ args=$(grep 'ac_cs_config=' config.status | \
+ sed -e 's/.*"\(.*\)".*/\1/' -e "s/'//g" -e "s/--with-previous-options//g") ; then
+ echo "###"
+ echo "### Rerunning as '$0 $args'"
+ echo "###"
+ exec $0 $args
+ fi
+ ])
+
+AC_ARG_WITH(pkgconfigdir,
+ [ --with-pkgconfigdir=DIR pkgconfig file in DIR @<:@LIBDIR/pkgconfig@:>@],
+ [pkgconfigdir=$withval],
+ [pkgconfigdir='${libdir}/pkgconfig'])
+AC_SUBST(pkgconfigdir)
AC_ARG_WITH(mountutildir,
[ --with-mountutildir=DIR mount helper utility in DIR @<:@/sbin@:>@],
@@ -155,6 +457,13 @@ AC_ARG_WITH(mountutildir,
[mountutildir='/sbin'])
AC_SUBST(mountutildir)
+AC_ARG_WITH(systemddir,
+ [ --with-systemddir=DIR systemd service files in DIR @<:@PREFIX/lib/systemd/system@:>@],
+ [systemddir=$withval],
+ [systemddir='${prefix}/lib/systemd/system'])
+AC_SUBST(systemddir)
+AM_CONDITIONAL([USE_SYSTEMD], test [ -d '/usr/lib/systemd/system' ])
+
AC_ARG_WITH(initdir,
[ --with-initdir=DIR init.d scripts in DIR @<:@/etc/init.d@:>@],
[initdir=$withval],
@@ -167,6 +476,27 @@ AC_ARG_WITH(launchddir,
[launchddir='/Library/LaunchDaemons'])
AC_SUBST(launchddir)
+AC_ARG_WITH(tmpfilesdir,
+ AC_HELP_STRING([--with-tmpfilesdir=DIR],
+ [tmpfiles config in DIR, disabled by default]),
+ [tmpfilesdir=$withval],
+ [tmpfilesdir=''])
+AC_SUBST(tmpfilesdir)
+
+AC_ARG_WITH([ocf],
+ [AS_HELP_STRING([--without-ocf], [build OCF-compliant cluster resource agents])],
+ ,
+ [OCF_SUBDIR='ocf'],
+ )
+AC_SUBST(OCF_SUBDIR)
+
+AC_ARG_WITH([server],
+ [AS_HELP_STRING([--without-server], [do not build server components])],
+ [with_server='no'],
+ [with_server='yes'],
+ )
+AM_CONDITIONAL([WITH_SERVER], [test x$with_server = xyes])
+
# LEX needs a check
AC_PROG_LEX
if test "x${LEX}" != "xflex" -a "x${FLEX}" != "xlex"; then
@@ -203,20 +533,110 @@ AC_CHECK_LIB([pthread], [pthread_mutex_init], , AC_MSG_ERROR([Posix threads libr
AC_CHECK_FUNC([dlopen], [has_dlopen=yes], AC_CHECK_LIB([dl], [dlopen], , AC_MSG_ERROR([Dynamic linking library required to build glusterfs])))
+AC_CHECK_LIB([readline], [rl_do_undo], [RL_UNDO="yes"], [RL_UNDO="no"])
+
+AC_CHECK_LIB([intl], [gettext])
AC_CHECK_HEADERS([sys/xattr.h])
+AC_CHECK_HEADERS([sys/ioctl.h], AC_DEFINE(HAVE_IOCTL_IN_SYS_IOCTL_H, 1, [have sys/ioctl.h]))
+
AC_CHECK_HEADERS([sys/extattr.h])
-AC_CHECK_HEADERS([openssl/md5.h])
+AC_CHECK_HEADERS([openssl/dh.h])
+
+AC_CHECK_HEADERS([openssl/ecdh.h])
+
+AC_CHECK_LIB([ssl], [SSL_CTX_get0_param], [AC_DEFINE([HAVE_SSL_CTX_GET0_PARAM], [1], [define if found OpenSSL SSL_CTX_get0_param])])
+
+dnl Math library
+AC_CHECK_LIB([m], [pow], [MATH_LIB='-lm'], [MATH_LIB=''])
+AC_SUBST(MATH_LIB)
+
+dnl depend on libuuid.so
+PKG_CHECK_MODULES([UUID], [uuid],
+ [have_uuid=yes
+ AC_DEFINE(HAVE_LIBUUID, 1, [have libuuid.so])
+ PKGCONFIG_UUID=uuid],
+ [have_uuid=no])
+AM_CONDITIONAL([HAVE_LIBUUID], [test x$have_uuid = xyes])
+
+dnl older version of libuuid (from e2fsprogs) require including uuid/uuid.h
+saved_CFLAGS=${CFLAGS}
+CFLAGS="${CFLAGS} ${UUID_CFLAGS}"
+AC_CHECK_HEADER([uuid.h], [], [AC_CHECK_HEADER([uuid/uuid.h])],
+ [[#if HAVE_UUID_H
+ #include <uuid.h>
+ #endif
+ ]])
+CFLAGS=${saved_CFLAGS}
+if test "x$ac_cv_header_uuid_uuid_h" = "xyes"; then
+ UUID_CFLAGS="${UUID_CFLAGS} -I$(pkg-config --variable=includedir uuid)/uuid"
+ have_uuid=yes
+fi
+
+if test "x$have_uuid" != "xyes"; then
+ case $host_os in
+ *freebsd*)
+ AC_MSG_ERROR([e2fsprogs-libuuid is required to build glusterfs])
+ ;;
+ linux*)
+ AC_MSG_ERROR([libuuid is required to build glusterfs])
+ ;;
+ *)
+ AC_MSG_ERROR([a Linux compatible libuuid is required to build glusterfs])
+ ;;
+ esac
+fi
+
+dnl libglusterfs needs uuid.h, practically everything depends on it
+GF_CFLAGS="${GF_CFLAGS} ${UUID_CFLAGS}"
+dnl PKGCONFIG_UUID is used for the dependency in *.pc.in files
+AC_SUBST(PKGCONFIG_UUID)
+dnl NetBSD does not support POSIX ACLs :-(
case $host_os in
- darwin*)
- if ! test "`/usr/bin/sw_vers | grep ProductVersion: | cut -f 2 | cut -d. -f2`" -ge 5; then
- AC_MSG_ERROR([You need at least OS X 10.5 (Leopard) to build Glusterfs])
- fi
- ;;
+ *netbsd* | darwin*)
+ AC_MSG_WARN([platform does not support POSIX ACLs... disabling them])
+ ACL_LIBS=''
+ USE_POSIX_ACLS='0'
+ BUILD_POSIX_ACLS='no'
+ ;;
+ *)
+ AC_CHECK_HEADERS([sys/acl.h], ,
+ AC_MSG_ERROR([Support for POSIX ACLs is required]))
+ USE_POSIX_ACLS='1'
+ BUILD_POSIX_ACLS='yes'
+ case $host_os in
+ linux*)
+ ACL_LIBS='-lacl'
+ ;;
+ solaris*)
+ ACL_LIBS='-lsec'
+ ;;
+ *freebsd*)
+ ACL_LIBS='-lc'
+ ;;
+ darwin*)
+ ACL_LIBS='-lc'
+ ;;
+ esac
+ if test "x${ACL_LIBS}" = "x-lacl"; then
+ AC_CHECK_HEADERS([acl/libacl.h], , AC_MSG_ERROR([libacl is required for building on ${host_os}]))
+ fi
+ ;;
esac
+AC_SUBST(ACL_LIBS)
+AC_SUBST(USE_POSIX_ACLS)
+
+# libglusterfs/checksum
+AC_CHECK_HEADERS([openssl/md5.h])
+AC_CHECK_LIB([z], [adler32], [ZLIB_LIBS="-lz"], AC_MSG_ERROR([zlib is required to build glusterfs]))
+AC_SUBST(ZLIB_LIBS)
+
+AC_CHECK_HEADERS([linux/falloc.h])
+
+AC_CHECK_HEADERS([linux/oom.h], AC_DEFINE(HAVE_LINUX_OOM_H, 1, [have linux/oom.h]))
dnl Mac OS X does not have spinlocks
AC_CHECK_FUNC([pthread_spin_init], [have_spinlock=yes])
@@ -240,11 +660,68 @@ if test "x${have_setfsuid}" = "xyes" -a "x${have_setfsgid}" = "xyes"; then
AC_DEFINE(HAVE_SET_FSID, 1, [define if found setfsuid setfsgid])
fi
+dnl test umount2 function
+AC_CHECK_FUNC([umount2], [have_umount2=yes])
+
+if test "x${have_umount2}" = "xyes"; then
+ AC_DEFINE(HAVE_UMOUNT2, 1, [define if found umount2])
+fi
+
+dnl Check Python Availability
+have_python=no
+dnl if the user has not specified a python, pick one
+if test -z "${PYTHON}"; then
+ case $host_os in
+ freebsd*)
+ if test -x /usr/local/bin/python3; then
+ PYTHON=/usr/local/bin/python3
+ else
+ PYTHON=/usr/local/bin/python2
+ fi
+ ;;
+ *)
+ if test -x /usr/bin/python3; then
+ PYTHON=/usr/bin/python3
+ else
+ PYTHON=/usr/bin/python2
+ fi
+ ;;
+ esac
+fi
+AM_PATH_PYTHON([2.6],,[:])
+if test -n "${PYTHON}"; then
+ have_python=yes
+fi
+AM_CONDITIONAL(HAVE_PYTHON, test "x$have_python" = "xyes")
+
+dnl Use pkg-config to get runtime search path missing from ${PYTHON}-config
+dnl Just do "true" on failure so that configure does not bail out
+dnl Note: python 2.6's devel pkg (e.g. in CentOS/RHEL 6) does not have
+dnl pkg-config files, so this work-around instead
+if test "x${PYTHON_VERSION}" = "x2.6"; then
+ PYTHON_CFLAGS=$(python-config --includes)
+ PYTHON_LIBS=$(python-config --libs)
+else
+ PKG_CHECK_MODULES([PYTHON], "python-${PYTHON_VERSION}",,true)
+fi
+
+PYTHON_CFLAGS=$(echo ${PYTHON_CFLAGS} | sed -e 's|-I|-isystem |')
+
+BUILD_PYTHON_SITE_PACKAGES=${pythondir}
+AC_SUBST(BUILD_PYTHON_SITE_PACKAGES)
+
+# Eval two times to expand fully. First eval replaces $exec_prefix into $prefix
+# Second eval will expand $prefix
+build_python_site_packages_temp="${pythondir}"
+eval build_python_site_packages_temp=\"${build_python_site_packages_temp}\"
+eval build_python_site_packages_temp=\"${build_python_site_packages_temp}\"
+BUILD_PYTHON_SITE_PACKAGES_EXPANDED=${build_python_site_packages_temp}
+AC_SUBST(BUILD_PYTHON_SITE_PACKAGES_EXPANDED)
# FUSE section
AC_ARG_ENABLE([fuse-client],
- AC_HELP_STRING([--disable-fuse-client],
- [Do not build the fuse client. NOTE: you cannot mount glusterfs without the client]))
+ AC_HELP_STRING([--disable-fuse-client],
+ [Do not build the fuse client. NOTE: you cannot mount glusterfs without the client]))
BUILD_FUSE_CLIENT=no
if test "x$enable_fuse_client" != "xno"; then
@@ -253,77 +730,64 @@ if test "x$enable_fuse_client" != "xno"; then
fi
AC_SUBST(FUSE_CLIENT_SUBDIR)
+
+AC_ARG_ENABLE([fuse-notifications],
+ AS_HELP_STRING([--disable-fuse-notifications], [Disable FUSE notifications]))
+
+AS_IF([test "x$enable_fuse_notifications" != "xno"], [
+ AC_DEFINE([HAVE_FUSE_NOTIFICATIONS], [1], [Use FUSE notifications])
+])
+
# end FUSE section
+AC_CHECK_LIB([ssl], TLS_method, [HAVE_OPENSSL_1_1="yes"], [HAVE_OPENSSL_1_1="no"])
+if test "x$HAVE_OPENSSL_1_1" = "xyes"; then
+ AC_DEFINE([HAVE_TLS_METHOD], [1], [Using OpenSSL-1.1 TLS_method])
+else
+ AC_CHECK_LIB([ssl], TLSv1_2_method, [AC_DEFINE([HAVE_TLSV1_2_METHOD], [1], [Using OpenSSL-1.0 TLSv1_2_method])])
+fi
+
+
# FUSERMOUNT section
AC_ARG_ENABLE([fusermount],
- AC_HELP_STRING([--enable-fusermount],
- [Build fusermount]))
-
-BUILD_FUSERMOUNT="no"
-if test "x$enable_fusermount" = "xyes"; then
- FUSERMOUNT_SUBDIR="contrib/fuse-util"
- BUILD_FUSERMOUNT="yes"
- AC_DEFINE(GF_FUSERMOUNT, 1, [Use our own fusermount])
+ AC_HELP_STRING([--disable-fusermount],
+ [Use system's fusermount]))
+
+BUILD_FUSERMOUNT="yes"
+if test "x$enable_fusermount" = "xno"; then
+ BUILD_FUSERMOUNT="no"
+else
+ AC_DEFINE(GF_FUSERMOUNT, 1, [Use our own fusermount])
+ FUSERMOUNT_SUBDIR="contrib/fuse-util"
fi
AC_SUBST(FUSERMOUNT_SUBDIR)
#end FUSERMOUNT section
-
# EPOLL section
AC_ARG_ENABLE([epoll],
- AC_HELP_STRING([--disable-epoll],
- [Use poll instead of epoll.]))
+ AC_HELP_STRING([--disable-epoll],
+ [Use poll instead of epoll.]))
BUILD_EPOLL=no
if test "x$enable_epoll" != "xno"; then
AC_CHECK_HEADERS([sys/epoll.h],
[BUILD_EPOLL=yes],
- [BUILD_EPOLL=no])
+ [BUILD_EPOLL=no])
fi
# end EPOLL section
-
-# IBVERBS section
-AC_ARG_ENABLE([ibverbs],
- AC_HELP_STRING([--disable-ibverbs],
- [Do not build the ibverbs transport]))
-
-if test "x$enable_ibverbs" != "xno"; then
- AC_CHECK_LIB([ibverbs],
- [ibv_get_device_list],
- [HAVE_LIBIBVERBS="yes"],
- [HAVE_LIBIBVERBS="no"])
-fi
-
-if test "x$enable_ibverbs" = "xyes" -a "x$HAVE_LIBIBVERBS" = "xno"; then
- echo "ibverbs requested but not found."
- exit 1
-fi
-
-BUILD_RDMA=no
-BUILD_IBVERBS=no
-if test "x$enable_ibverbs" != "xno" -a "x$HAVE_LIBIBVERBS" = "xyes"; then
- IBVERBS_SUBDIR=ib-verbs
- BUILD_IBVERBS=yes
- RDMA_SUBDIR=rdma
- BUILD_RDMA=yes
-fi
-
-AC_SUBST(IBVERBS_SUBDIR)
-AC_SUBST(RDMA_SUBDIR)
-# end IBVERBS section
-
-
# SYNCDAEMON section
AC_ARG_ENABLE([georeplication],
- AC_HELP_STRING([--disable-georeplication],
- [Do not install georeplication components]))
+ AC_HELP_STRING([--disable-georeplication],
+ [Do not install georeplication components]))
BUILD_SYNCDAEMON=no
case $host_os in
+ freebsd*)
+#do nothing
+ ;;
linux*)
#do nothing
;;
@@ -332,72 +796,226 @@ case $host_os in
;;
*)
#disabling geo replication for non-linux platforms
- enable_georeplication=no
+ enable_georeplication=no
;;
esac
SYNCDAEMON_COMPILE=0
-if test "x$enable_georeplication" != "xno"; then
- SYNCDAEMON_SUBDIR=utils
- SYNCDAEMON_COMPILE=1
-
- BUILD_SYNCDAEMON="yes"
- AM_PATH_PYTHON([2.4])
- echo -n "checking if python is python 2.x... "
- if echo $PYTHON_VERSION | grep ^2; then
- :
- else
- echo no
- AC_MSG_ERROR([only python 2.x is supported])
- fi
- echo -n "checking if python has ctypes support... "
- if "$PYTHON" -c 'import ctypes' 2>/dev/null; then
- echo yes
+if test "x${with_server}" = "xyes" -a "x${enable_georeplication}" != "xno"; then
+ if test "x${have_python}" = "xno" ; then
+ AC_MSG_ERROR([only python 2 and 3 are supported])
else
- echo no
- AC_MSG_ERROR([python does not have ctypes support])
+ SYNCDAEMON_SUBDIR=geo-replication
+ SYNCDAEMON_COMPILE=1
+
+ BUILD_SYNCDAEMON="yes"
+ AC_MSG_CHECKING([if python has ctypes support...])
+ if "${PYTHON}" -c 'import ctypes' 2>/dev/null; then
+ AC_MSG_RESULT("yes")
+ else
+ AC_MSG_ERROR([python does not have ctypes support])
+ fi
fi
fi
AC_SUBST(SYNCDAEMON_COMPILE)
AC_SUBST(SYNCDAEMON_SUBDIR)
# end SYNCDAEMON section
-#check if libxml is present if so enable HAVE_LIB_XML
-echo -n "checking if libxml2 is present... "
-
-PKG_CHECK_MODULES([LIBXML2], [libxml-2.0 >= 2.6.19],
- [echo "yes (features requiring libxml2 enabled)" AC_DEFINE(HAVE_LIB_XML, 1, [define if libxml2 is present])],
- [echo "no"] )
+# only install scripts from extras/geo-rep when enabled
+if test "x${with_server}" = "xyes" -a "x$enable_georeplication" != "xno"; then
+ GEOREP_EXTRAS_SUBDIR=geo-rep
+fi
+AC_SUBST(GEOREP_EXTRAS_SUBDIR)
+AM_CONDITIONAL(USE_GEOREP, test "x$enable_georeplication" != "xno")
+
+# METADISP section
+AC_ARG_ENABLE([metadisp],
+ AC_HELP_STRING([--enable-metadisp],
+ [Enable the metadata dispersal xlator]))
+BUILD_METADISP=no
+if test "x${enable_metadisp}" = "xyes"; then
+ BUILD_METADISP=yes
+fi
+AM_CONDITIONAL([BUILD_METADISP], [test "x$BUILD_METADISP" = "xyes"])
+# end METADISP section
+
+# Events section
+AC_ARG_ENABLE([events],
+ AC_HELP_STRING([--disable-events],
+ [Do not install Events components]))
+
+BUILD_EVENTS=no
+EVENTS_ENABLED=0
+EVENTS_SUBDIR=
+if test "x$enable_events" != "xno"; then
+ EVENTS_SUBDIR=events
+ EVENTS_ENABLED=1
+
+ BUILD_EVENTS="yes"
+
+ if test "x${have_python}" = "xno"; then
+ if test "x${enable_events}" = "xyes"; then
+ AC_MSG_ERROR([python 2 or 3 required. exiting.])
+ fi
+ AC_MSG_WARN([python not found, disabling events])
+ EVENTS_SUBDIR=
+ EVENTS_ENABLED=0
+ BUILD_EVENTS="no"
+ else
+ AC_DEFINE(USE_EVENTS, 1, [define if events enabled])
+ fi
+fi
+AC_SUBST(EVENTS_ENABLED)
+AC_SUBST(EVENTS_SUBDIR)
+AM_CONDITIONAL([BUILD_EVENTS], [test "x${BUILD_EVENTS}" = "xyes"])
+# end Events section
+
+# CDC xlator - check if libz is present if so enable HAVE_LIB_Z
+BUILD_CDC=yes
+PKG_CHECK_MODULES([ZLIB], [zlib >= 1.2.0],,
+ [AC_CHECK_LIB([z], [deflate], [ZLIB_LIBS="-lz"],
+ [BUILD_CDC=no])])
+echo -n "features requiring zlib enabled: "
+if test "x$BUILD_CDC" = "xyes" ; then
+ echo "yes"
+ AC_DEFINE(HAVE_LIB_Z, 1, [define if zlib is present])
+else
+ echo "no"
+fi
+AC_SUBST(ZLIB_CFLAGS)
+AC_SUBST(ZLIB_LIBS)
+# end CDC xlator secion
+
+#start firewalld section
+BUILD_FIREWALLD="no"
+AC_ARG_ENABLE([firewalld],
+ AC_HELP_STRING([--enable-firewalld],
+ [enable installation configuration for firewalld]),
+ [BUILD_FIREWALLD="${enableval}"], [BUILD_FIREWALLD="no"])
+
+if test "x${with_server}" = "xyes" -a "x${BUILD_FIREWALLD}" = "xyes"; then
+ if !(test -d /usr/lib/firewalld/services 1>/dev/null 2>&1) ; then
+ BUILD_FIREWALLD="no (firewalld not installed)"
+ fi
+fi
+AM_CONDITIONAL([USE_FIREWALLD],test ["x${BUILD_FIREWALLD}" = "xyes"])
+
+#endof firewald section
+
+# xml-output
+AC_ARG_ENABLE([xml-output],
+ AC_HELP_STRING([--disable-xml-output],
+ [Disable the xml output]))
+BUILD_XML_OUTPUT="yes"
+if test "x$enable_xml_output" != "xno"; then
+ PKG_CHECK_MODULES([XML], [libxml-2.0], [], [no_xml="yes"])
+ if test "x${no_xml}" = "x"; then
+ AC_DEFINE([HAVE_LIB_XML], [1], [Define to 1 if using libxml2.])
+ else
+ if test "x$enable_georeplication" != "xno"; then
+ AC_MSG_ERROR([libxml2 devel libraries not found])
+ else
+ AC_MSG_WARN([libxml2 devel libraries not found disabling XML support])
+ BUILD_XML_OUTPUT="no"
+ fi
-AC_SUBST(LIBXML2_CFLAGS)
-AC_SUBST(LIBXML2_LIBS)
+ fi
+else
+ if test "x$enable_georeplication" != "xno"; then
+ AC_MSG_ERROR([geo-replication requires xml output])
+ fi
+ BUILD_XML_OUTPUT="no"
+fi
+# end of xml-output
-dnl FreeBSD > 5 has execinfo as a Ported library for giving a workaround
-dnl solution to GCC backtrace functionality
+dnl cloudsync section
+BUILD_CLOUDSYNC="no"
+AC_CHECK_LIB([curl], [curl_easy_setopt], [LIBCURL="-lcurl"])
+if test -n "$LIBCURL";then
+ HAVE_LIBCURL="yes"
+fi
+AC_CHECK_HEADERS([openssl/hmac.h openssl/evp.h openssl/bio.h openssl/buffer.h], [HAVE_OPENSSL="yes"])
+if test "x$HAVE_LIBCURL" = "xyes" -a "x$HAVE_OPENSSL" = "xyes";then
+ HAVE_AMAZONS3="yes"
+fi
+AM_CONDITIONAL([BUILD_AMAZONS3_PLUGIN], [test "x$HAVE_AMAZONS3" = "xyes"])
+if test "x$HAVE_AMAZONS3" = "xyes";then
+ BUILD_CLOUDSYNC="yes"
+fi
+BUILD_CVLT_PLUGIN="no"
+case $host_os in
+#enable cvlt plugin only for linux platforms
+ linux*)
+ BUILD_CVLT_PLUGIN="yes"
+ BUILD_CLOUDSYNC="yes"
+ ;;
+ *)
+ ;;
+esac
+AM_CONDITIONAL([BUILD_CVLT_PLUGIN], [test "x$BUILD_CVLT_PLUGIN" = "xyes"])
+AM_CONDITIONAL([BUILD_CLOUDSYNC], [test "x$BUILD_CLOUDSYNC" = "xyes"])
+dnl end cloudsync section
-AC_CHECK_HEADERS([execinfo.h], [have_backtrace=yes],
- AC_CHECK_LIB([execinfo], [backtrace], [have_backtrace=yes]))
-dnl AC_MSG_ERROR([libexecinfo not found libexecinfo required.])))
+dnl SELinux feature enablement
+case $host_os in
+ linux*)
+ AC_ARG_ENABLE([selinux],
+ AC_HELP_STRING([--disable-selinux],
+ [Disable SELinux features]),
+ [USE_SELINUX="${enableval}"], [USE_SELINUX="yes"])
+ ;;
+ *)
+ USE_SELINUX=no
+ ;;
+esac
+AM_CONDITIONAL(USE_SELINUX, test "x${USE_SELINUX}" = "xyes")
+dnl end of SELinux feature enablement
+AC_CHECK_HEADERS([execinfo.h], [have_backtrace=yes])
if test "x${have_backtrace}" = "xyes"; then
AC_DEFINE(HAVE_BACKTRACE, 1, [define if found backtrace])
fi
AC_SUBST(HAVE_BACKTRACE)
+dnl Old (before C11) compiler can compile (but not link) this:
+dnl
+dnl int main () {
+dnl _Static_assert(1, "True");
+dnl return 0;
+dnl }
+dnl
+dnl assuming that _Static_assert is an implicitly declared function. So
+dnl we're trying to link just to make sure that this is not the case.
+
+AC_MSG_CHECKING([whether $CC supports C11 _Static_assert])
+AC_TRY_LINK([], [_Static_assert(1, "True");],
+ [STATIC_ASSERT=yes], [STATIC_ASSERT=no])
+
+AC_MSG_RESULT([$STATIC_ASSERT])
+if test x$STATIC_ASSERT = "xyes"; then
+ AC_DEFINE(HAVE_STATIC_ASSERT, 1, [Define if C11 _Static_assert is supported.])
+fi
+
+if test "x${have_backtrace}" != "xyes"; then
+AC_TRY_COMPILE([#include <math.h>], [double x=0.0; x=ceil(0.0);],
+ [],
+ AC_MSG_ERROR([need math library for libexecinfo]))
+fi
+
dnl glusterfs prints memory usage to stderr by sending it SIGUSR1
-AC_CHECK_FUNC([malloc_stats], [have_malloc_stats=yes])
-if test "x${have_malloc_stats}" = "xyes"; then
- AC_DEFINE(HAVE_MALLOC_STATS, 1, [define if found malloc_stats])
+AC_CHECK_FUNC([mallinfo], [have_mallinfo=yes])
+if test "x${have_mallinfo}" = "xyes"; then
+ AC_DEFINE(HAVE_MALLINFO, 1, [define if found mallinfo])
fi
-AC_SUBST(HAVE_MALLOC_STATS)
+AC_SUBST(HAVE_MALLINFO)
dnl Linux, Solaris, Cygwin
AC_CHECK_MEMBERS([struct stat.st_atim.tv_nsec])
dnl FreeBSD, NetBSD
AC_CHECK_MEMBERS([struct stat.st_atimespec.tv_nsec])
case $host_os in
- *netbsd*)
- CFLAGS=-D_INCOMPLETE_XOPEN_C063
- ;;
+ *netbsd*)
+ GF_CFLAGS="${GF_CFLAGS} -D_INCOMPLETE_XOPEN_C063 -DCONFIG_MACHINE_BSWAP_H"
+ ;;
esac
AC_CHECK_FUNC([linkat], [have_linkat=yes])
if test "x${have_linkat}" = "xyes"; then
@@ -405,18 +1023,46 @@ if test "x${have_linkat}" = "xyes"; then
fi
AC_SUBST(HAVE_LINKAT)
-dnl Check for argp
+dnl check for Monotonic clock
+AC_CHECK_LIB([rt], [clock_gettime], ,
+ AC_MSG_WARN([System doesn't have monotonic clock using contrib]))
+
+dnl check for argp, FreeBSD has the header in /usr/local/include
+case $host_os in
+ *freebsd*)
+ CFLAGS="${CFLAGS} -isystem /usr/local/include"
+ ARGP_LDADD=-largp
+ ;;
+ *netbsd*)
+ ARGP_LDADD=-largp
+ ;;
+esac
+dnl argp-standalone does not provide a pkg-config file
AC_CHECK_HEADER([argp.h], AC_DEFINE(HAVE_ARGP, 1, [have argp]))
-AC_CONFIG_SUBDIRS(argp-standalone)
-BUILD_ARGP_STANDALONE=no
-if test "x${ac_cv_header_argp_h}" = "xno"; then
- BUILD_ARGP_STANDALONE=yes
- ARGP_STANDALONE_CPPFLAGS='-I${top_srcdir}/argp-standalone'
- ARGP_STANDALONE_LDADD='${top_builddir}/argp-standalone/libargp.a'
+if test "x$ac_cv_header_argp_h" != "xyes"; then
+ AC_MSG_ERROR([argp.h not found, install libargp or argp-standalone])
fi
-
-AC_SUBST(ARGP_STANDALONE_CPPFLAGS)
-AC_SUBST(ARGP_STANDALONE_LDADD)
+AC_SUBST(ARGP_LDADD)
+
+dnl Check for atomic operation support
+AC_MSG_CHECKING([for gcc __atomic builtins])
+AC_TRY_LINK([], [int v; __atomic_load_n(&v, __ATOMIC_ACQUIRE);],
+ [have_atomic_builtins=yes], [have_atomic_builtins=no])
+if test "x${have_atomic_builtins}" = "xyes"; then
+ AC_DEFINE(HAVE_ATOMIC_BUILTINS, 1, [define if __atomic_*() builtins are available])
+fi
+AC_SUBST(HAVE_ATOMIC_BUILTINS)
+AC_MSG_RESULT([$have_atomic_builtins])
+
+dnl __sync_*() will not be needed if __atomic_*() is available
+AC_MSG_CHECKING([for gcc __sync builtins])
+AC_TRY_LINK([], [__sync_synchronize();],
+ [have_sync_builtins=yes], [have_sync_builtins=no])
+if test "x${have_sync_builtins}" = "xyes"; then
+ AC_DEFINE(HAVE_SYNC_BUILTINS, 1, [define if __sync_*() builtins are available])
+fi
+AC_SUBST(HAVE_SYNC_BUILTINS)
+AC_MSG_RESULT([$have_sync_builtins])
AC_CHECK_HEADER([malloc.h], AC_DEFINE(HAVE_MALLOC_H, 1, [have malloc.h]))
@@ -425,12 +1071,79 @@ if test "x${have_llistxattr}" = "xyes"; then
AC_DEFINE(HAVE_LLISTXATTR, 1, [define if llistxattr exists])
fi
-AC_CHECK_FUNC([fdatasync], [have_fdatasync=yes])
+AC_CHECK_FUNC([fdatasync], [have_fdatasync=no])
if test "x${have_fdatasync}" = "xyes"; then
AC_DEFINE(HAVE_FDATASYNC, 1, [define if fdatasync exists])
fi
-# Check the distribution where you are compiling glusterfs on
+AC_CHECK_FUNC([fallocate], [have_fallocate=yes])
+if test "x${have_fallocate}" = "xyes"; then
+ AC_DEFINE(HAVE_FALLOCATE, 1, [define if fallocate exists])
+fi
+
+AC_CHECK_FUNC([posix_fallocate], [have_posix_fallocate=yes])
+if test "x${have_posix_fallocate}" = "xyes"; then
+ AC_DEFINE(HAVE_POSIX_FALLOCATE, 1, [define if posix_fallocate exists])
+fi
+
+# On fedora-29, copy_file_range syscall and the libc API both are present.
+# Whereas, on some machines such as centos-7, RHEL-7, the API is not there.
+# Only the system call is present. So, this change is to determine whether
+# the API is present or not. If not, then check whether the system call is
+# present or not. Accordingly sys_copy_file_range function will first call
+# the API if it is there. Otherwise it will call syscall(SYS_copy_file_range).
+AC_CHECK_FUNC([copy_file_range], [have_copy_file_range=yes])
+if test "x${have_copy_file_range}" = "xyes"; then
+ AC_DEFINE(HAVE_COPY_FILE_RANGE, 1, [define if copy_file_range exists])
+else
+ OLD_CFLAGS=${CFLAGS}
+ CFLAGS="-D_GNU_SOURCE"
+ AC_CHECK_DECL([SYS_copy_file_range], , , [#include <sys/syscall.h>])
+ if test "x${ac_cv_have_decl_SYS_copy_file_range}" = "xyes"; then
+ AC_DEFINE(HAVE_COPY_FILE_RANGE_SYS, 1, [define if SYS_copy_file_range is available])
+ fi
+ CFLAGS=${OLD_CFLAGS}
+fi
+
+AC_CHECK_FUNC([syncfs], [have_syncfs=yes])
+if test "x${have_syncfs}" = "xyes"; then
+ AC_DEFINE(HAVE_SYNCFS, 1, [define if syncfs exists])
+else
+ OLD_CFLAGS=${CFLAGS}
+ CFLAGS="-D_GNU_SOURCE"
+ AC_CHECK_DECL([SYS_syncfs], , , [#include <sys/syscall.h>])
+ if test "x${ac_cv_have_decl_SYS_syncfs}" = "xyes"; then
+ AC_DEFINE(HAVE_SYNCFS_SYS, 1, [define if SYS_syncfs is available])
+ fi
+ CFLAGS=${OLD_CFLAGS}
+fi
+
+BUILD_NANOSECOND_TIMESTAMPS=no
+AC_CHECK_FUNC([utimensat], [have_utimensat=yes])
+if test "x${have_utimensat}" = "xyes"; then
+ BUILD_NANOSECOND_TIMESTAMPS=yes
+ AC_DEFINE(HAVE_UTIMENSAT, 1, [define if utimensat exists])
+fi
+
+OLD_CFLAGS=${CFLAGS}
+CFLAGS="-D_GNU_SOURCE"
+AC_CHECK_DECL([SEEK_HOLE], , , [#include <unistd.h>])
+if test "x${ac_cv_have_decl_SEEK_HOLE}" = "xyes"; then
+ AC_DEFINE(HAVE_SEEK_HOLE, 1, [define if SEEK_HOLE is available])
+fi
+CFLAGS=${OLD_CFLAGS}
+
+AC_CHECK_FUNC([accept4], [have_accept4=yes])
+if test "x${have_accept4}" = "xyes"; then
+ AC_DEFINE(HAVE_ACCEPT4, 1, [define if accept4 exists])
+fi
+
+AC_CHECK_FUNC([paccept], [have_paccept=yes])
+if test "x${have_paccept}" = "xyes"; then
+AC_DEFINE(HAVE_PACCEPT, 1, [define if paccept exists])
+fi
+
+# Check the distribution where you are compiling glusterfs on
GF_DISTRIBUTION=
AC_CHECK_FILE([/etc/debian_version])
@@ -450,107 +1163,555 @@ fi
AC_SUBST(GF_DISTRIBUTION)
GF_HOST_OS=""
-GF_LDFLAGS="-rdynamic"
+GF_LDFLAGS="${GF_LDFLAGS} -rdynamic"
+
+dnl see --with-libtirpc option check above, libtirpc(-devel) is required for
+dnl ipv6-default
+if test "x${with_libtirpc}" = "xyes" || test "x${with_ipv6_default}" = "xyes" ; then
+ PKG_CHECK_MODULES([TIRPC], [libtirpc],
+ [with_libtirpc="yes"; GF_CFLAGS="$GF_CFLAGS $TIRPC_CFLAGS"; GF_LDFLAGS="$GF_LDFLAGS $TIRPC_LIBS";],
+ [with_libtirpc="missing"; with_ipv6_default="no"])
+fi
+
+if test "x${with_libtirpc}" = "xmissing" ; then
+ AC_CHECK_HEADERS([rpc/rpc.h],[
+ AC_MSG_WARN([
+ ---------------------------------------------------------------------------------
+ libtirpc (and/or ipv6-default) were enabled but libtirpc-devel is not installed.
+ Disabling libtirpc and ipv6-default and falling back to legacy glibc rpc headers.
+ This is a transitional warning message. Eventually it will be an error message.
+ ---------------------------------------------------------------------------------])],[
+ AC_MSG_ERROR([
+ ---------------------------------------------------------------------------------
+ libtirpc (and/or ipv6-default) were enabled but libtirpc-devel is not installed
+ and there were no legacy glibc rpc headers and library to fall back to.
+ ---------------------------------------------------------------------------------])])
+fi
+
+if test "x$with_ipv6_default" = "xyes" ; then
+ GF_CFLAGS="$GF_CFLAGS -DIPV6_DEFAULT"
+fi
+
+dnl check for gcc -Werror=format-security
+saved_CFLAGS=$CFLAGS
+CFLAGS="-Wformat -Werror=format-security"
+AC_MSG_CHECKING([whether $CC accepts -Werror=format-security])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], [cc_werror_format_security=yes], [cc_werror_format_security=no])
+echo $cc_werror_format_security
+if test "x$cc_werror_format_security" = "xyes"; then
+ GF_CFLAGS="$GF_CFLAGS ${CFLAGS}"
+fi
+CFLAGS="$saved_CFLAGS"
+
+dnl check for gcc -Werror=implicit-function-declaration
+saved_CFLAGS=$CFLAGS
+CFLAGS="-Werror=implicit-function-declaration"
+AC_MSG_CHECKING([whether $CC accepts -Werror=implicit-function-declaration])
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM()], [cc_werror_implicit=yes], [cc_werror_implicit=no])
+echo $cc_werror_implicit
+if test "x$cc_werror_implicit" = "xyes"; then
+ GF_CFLAGS="${GF_CFLAGS} ${CFLAGS}"
+fi
+CFLAGS="$saved_CFLAGS"
+
+dnl clang is mostly GCC-compatible, but its version is much lower,
+dnl so we have to check for it.
+AC_MSG_CHECKING([if compiling with clang])
+
+AC_COMPILE_IFELSE(
+[AC_LANG_PROGRAM([], [[
+#ifndef __clang__
+ not clang
+#endif
+]])],
+[CLANG=yes], [CLANG=no])
+
+AC_MSG_RESULT([$CLANG])
+
+if test "x$CLANG" = "xyes"; then
+ GF_CFLAGS="${GF_CFLAGS} -Wno-gnu"
+fi
+
+if test "x$ac_cv_header_execinfo_h" = "xno"; then
+ # The reason is that __builtin_frame_address(n) for n > 0 seems
+ # to just crash on most platforms when -fomit-stack-pointer is
+ # specified, which seems to be the default for many platforms on
+ # -O2. The documentation says that __builtin_frame_address()
+ # should return NULL in case it can't get the frame, but it
+ # seems to crash instead.
+
+ # execinfo.c in ./contrib/libexecinfo uses __builtin_frame_address(n)
+ # for providing cross platform backtrace*() functions.
+ if test "x$CLANG" = "xno"; then
+ GF_CFLAGS="${GF_CFLAGS} -fno-omit-frame-pointer"
+ fi
+fi
+
+old_prefix=$prefix
+if test "x$prefix" = xNONE; then
+ prefix=$ac_default_prefix
+fi
+old_exec_prefix=$exec_prefix
+if test "x$exec_prefix" = xNONE; then
+ exec_prefix="$(eval echo $prefix)"
+fi
+GLUSTERFS_LIBEXECDIR="$(eval echo $libexecdir)/glusterfs"
+prefix=$old_prefix
+exec_prefix=$old_exec_prefix
+
+### Dirty hacky stuff to make LOCALSTATEDIR work
+if test "x$prefix" = xNONE; then
+ test $localstatedir = '${prefix}/var' && localstatedir=$ac_default_prefix/var
+ localstatedir=/var
+fi
+localstatedir="$(eval echo ${localstatedir})"
+LOCALSTATEDIR=$localstatedir
+
+GLUSTERFSD_MISCDIR="$(eval echo ${localstatedir})/lib/misc/glusterfsd"
+old_prefix=$prefix
+if test "x$prefix" = xNONE; then
+ prefix=$ac_default_prefix
+fi
+GLUSTERD_VOLFILE="$(eval echo ${sysconfdir})/glusterfs/glusterd.vol"
+prefix=$old_prefix
+
+
+GFAPI_EXTRA_LDFLAGS='-Wl,--version-script=$(top_srcdir)/api/src/gfapi.map'
case $host_os in
linux*)
- dnl GF_LINUX_HOST_OS=1
GF_HOST_OS="GF_LINUX_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -O0"
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- GF_FUSE_CFLAGS="-DFUSERMOUNT_DIR=\\\"\$(bindir)\\\""
- ;;
+ GF_FUSE_CFLAGS="-DFUSERMOUNT_DIR=\\\"\$(bindir)\\\""
+ GLUSTERD_WORKDIR="${LOCALSTATEDIR}/lib/glusterd"
+ ;;
solaris*)
GF_HOST_OS="GF_SOLARIS_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS -O0 -m64"
- GF_LDFLAGS=""
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- GF_GLUSTERFS_LDFLAGS="-lnsl -lresolv -lsocket"
+ GF_CFLAGS="${GF_CFLAGS} -D_REENTRANT -D_POSIX_PTHREAD_SEMANTICS -m64"
BUILD_FUSE_CLIENT=no
FUSE_CLIENT_SUBDIR=""
- ;;
+ GLUSTERD_WORKDIR="${LOCALSTATEDIR}/lib/glusterd"
+ ;;
*netbsd*)
- GF_HOST_OS="GF_BSD_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D_INCOMPLETE_XOPEN_C063"
- GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
- GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- if test "x$ac_cv_header_execinfo_h" = "xyes"; then
- GF_GLUSTERFS_LDFLAGS="-lexecinfo"
- fi
- GF_FUSE_LDADD="-lperfuse"
- BUILD_FUSE_CLIENT=yes
- LEXLIB=""
- ;;
- *bsd*)
GF_HOST_OS="GF_BSD_HOST_OS"
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -O0"
- GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
- GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
- GF_GLUSTERFS_CFLAGS="${GF_CFLAGS}"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- if test "x$ac_cv_header_execinfo_h" = "xyes"; then
- GF_GLUSTERFS_LDFLAGS="-lexecinfo"
- fi
- BUILD_FUSE_CLIENT=no
- ;;
+ GF_CFLAGS="${GF_CFLAGS} -D_INCOMPLETE_XOPEN_C063"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
+ GF_FUSE_CFLAGS="-DFUSERMOUNT_DIR=\\\"\$(sbindir)\\\""
+ GF_LDADD="${ARGP_LDADD}"
+ if test "x$ac_cv_header_execinfo_h" = "xyes"; then
+ GF_LDFLAGS="${GF_LDFLAGS} -lexecinfo"
+ fi
+ GF_FUSE_LDADD="-lperfuse"
+ BUILD_FUSE_CLIENT=yes
+ LEXLIB=""
+ BUILD_FUSERMOUNT=no
+ FUSERMOUNT_SUBDIR=""
+ GLUSTERD_WORKDIR="${LOCALSTATEDIR}/db/glusterd"
+ ;;
+ *freebsd*)
+ GF_HOST_OS="GF_BSD_HOST_OS"
+ GF_CFLAGS="${GF_CFLAGS} -DO_DSYNC=0"
+ GF_CFLAGS="${GF_CFLAGS} -Dxdr_quad_t=xdr_longlong_t"
+ GF_CFLAGS="${GF_CFLAGS} -Dxdr_u_quad_t=xdr_u_longlong_t"
+ GF_FUSE_CFLAGS="-DFUSERMOUNT_DIR=\\\"\$(sbindir)\\\""
+ GF_LDADD="${ARGP_LDADD}"
+ if test "x$ac_cv_header_execinfo_h" = "xyes"; then
+ GF_LDFLAGS="${GF_LDFLAGS} -lexecinfo"
+ fi
+ BUILD_FUSE_CLIENT=yes
+ BUILD_FUSERMOUNT=no
+ FUSERMOUNT_SUBDIR=""
+ GLUSTERD_WORKDIR="${LOCALSTATEDIR}/db/glusterd"
+ ;;
darwin*)
GF_HOST_OS="GF_DARWIN_HOST_OS"
- LIBTOOL=glibtool
- GF_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D__DARWIN_64_BIT_INO_T -bundle -undefined suppress -flat_namespace -D_XOPEN_SOURCE -O0"
- GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
- GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
- GF_GLUSTERFS_CFLAGS="${ARGP_STANDALONE_CPPFLAGS} -D__DARWIN_64_BIT_INO_T -undefined suppress -flat_namespace -O0"
- GF_LDADD="${ARGP_STANDALONE_LDADD}"
- GF_FUSE_CFLAGS="-I\$(CONTRIBDIR)/macfuse"
- ;;
+ LIBTOOL=glibtool
+ GF_CFLAGS="${GF_CFLAGS} -D_REENTRANT -D_XOPEN_SOURCE "
+ GF_CFLAGS="${GF_CFLAGS} -D_DARWIN_USE_64_BIT_INODE "
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_BASENAME"
+ GF_CFLAGS="${GF_CFLAGS} -DTHREAD_UNSAFE_DIRNAME"
+ GF_LDADD="${ARGP_LDADD}"
+ GF_LDFLAGS="${GF_LDFLAGS}"
+ GF_FUSE_CFLAGS="-I\$(CONTRIBDIR)/macfuse"
+ BUILD_FUSERMOUNT="no"
+ FUSERMOUNT_SUBDIR=""
+ GLUSTERD_WORKDIR="${LOCALSTATEDIR}/db/glusterd"
+ GFAPI_EXTRA_LDFLAGS='-Wl,-alias_list,$(top_srcdir)/api/src/gfapi.aliases'
+ ;;
esac
+# Default value for sbindir
+prefix_temp=$prefix
+exec_prefix_temp=$exec_prefix
+
+test "${prefix}" = "NONE" && prefix="${ac_default_prefix}"
+test "${exec_prefix}" = "NONE" && exec_prefix='${prefix}'
+sbintemp="${sbindir}"
+eval sbintemp=\"${sbintemp}\"
+eval sbintemp=\"${sbintemp}\"
+SBIN_DIR=${sbintemp}
+
+sysconfdirtemp="${sysconfdir}"
+eval sysconfdirtemp=\"${sysconfdirtemp}\"
+SYSCONF_DIR=${sysconfdirtemp}
+
+prefix=$prefix_temp
+exec_prefix=$exec_prefix_temp
+
+AC_SUBST(SBIN_DIR)
+AC_SUBST(SYSCONF_DIR)
+
+# lazy umount emulation
+UMOUNTD_SUBDIR=""
+if test "x${GF_HOST_OS}" != "xGF_LINUX_HOST_OS" ; then
+ UMOUNTD_SUBDIR="contrib/umountd"
+fi
+AC_SUBST(UMOUNTD_SUBDIR)
+
+
+# enable debug section
+AC_ARG_ENABLE([debug],
+ AC_HELP_STRING([--enable-debug],
+ [Enable debug build options.]))
+
+AC_ARG_ENABLE([mempool],
+ AC_HELP_STRING([--disable-mempool],
+ [Disable the Gluster memory pooler.]))
+
+USE_MEMPOOL="yes"
+if test "x$enable_mempool" = "xno"; then
+ USE_MEMPOOL="no"
+ AC_DEFINE(GF_DISABLE_MEMPOOL, 1, [Disable the Gluster memory pooler.])
+fi
+
+# syslog section
+AC_ARG_ENABLE([syslog],
+ AC_HELP_STRING([--disable-syslog],
+ [Disable syslog for logging]))
+
+USE_SYSLOG="yes"
+if test "x$enable_syslog" != "xno"; then
+ AC_DEFINE(GF_USE_SYSLOG, 1, [Use syslog for logging])
+else
+ USE_SYSLOG="no"
+fi
+AM_CONDITIONAL([ENABLE_SYSLOG], [test x$USE_SYSLOG = xyes])
+#end syslog section
+
BUILD_READLINE=no
AC_CHECK_LIB([readline -lcurses],[readline],[RLLIBS="-lreadline -lcurses"])
AC_CHECK_LIB([readline -ltermcap],[readline],[RLLIBS="-lreadline -ltermcap"])
AC_CHECK_LIB([readline -lncurses],[readline],[RLLIBS="-lreadline -lncurses"])
-if test "x$RLLIBS" != "x"; then
- AC_DEFINE(HAVE_READLINE, 1, [readline enabled CLI])
- BUILD_READLINE=yes
+if test -n "$RLLIBS"; then
+ if test "x$RL_UNDO" = "xyes"; then
+ AC_DEFINE(HAVE_READLINE, 1, [readline enabled CLI])
+ BUILD_READLINE=yes
+ else
+ BUILD_READLINE="no (present but missing undo)"
+ fi
+
+fi
+
+BUILD_LIBAIO=no
+AC_CHECK_LIB([aio],[io_setup],[LIBAIO="-laio"])
+
+if test -n "$LIBAIO"; then
+ AC_DEFINE(HAVE_LIBAIO, 1, [libaio based POSIX enabled])
+ BUILD_LIBAIO=yes
+fi
+
+dnl gnfs section
+BUILD_GNFS="no"
+RPCBIND_SERVICE=""
+AC_ARG_ENABLE([gnfs],
+ AC_HELP_STRING([--enable-gnfs],
+ [Enable legacy gnfs server xlator.]))
+if test "x${with_server}" = "xyes" -a "x$enable_gnfs" = "xyes"; then
+ BUILD_GNFS="yes"
+ GF_CFLAGS="$GF_CFLAGS -DBUILD_GNFS"
+ RPCBIND_SERVICE="rpcbind.service"
+fi
+AM_CONDITIONAL([BUILD_GNFS], [test x$BUILD_GNFS = xyes])
+AC_SUBST(BUILD_GNFS)
+AC_SUBST(RPCBIND_SERVICE)
+dnl end gnfs section
+
+dnl Check for userspace-rcu
+PKG_CHECK_MODULES([URCU], [liburcu-bp], [],
+ [AC_CHECK_HEADERS([urcu-bp.h],
+ [URCU_LIBS='-lurcu-bp'],
+ AC_MSG_ERROR([liburcu-bp not found]))])
+PKG_CHECK_MODULES([URCU_CDS], [liburcu-cds >= 0.8], [],
+ [PKG_CHECK_MODULES([URCU_CDS], [liburcu-cds >= 0.7],
+ [AC_DEFINE(URCU_OLD, 1, [Define if liburcu 0.6 or 0.7 is found])
+ USE_CONTRIB_URCU='yes'],
+ [AC_CHECK_HEADERS([urcu/cds.h],
+ [AC_DEFINE(URCU_OLD, 1, [Define if liburcu 0.6 or 0.7 is found])
+ URCU_CDS_LIBS='-lurcu-cds'
+ USE_CONTRIB_URCU='yes'],
+ [AC_MSG_ERROR([liburcu-cds not found])])])])
+
+BUILD_UNITTEST="no"
+AC_ARG_ENABLE([cmocka],
+ AC_HELP_STRING([--enable-cmocka],
+ [Enable cmocka build options.]))
+if test "x$enable_cmocka" = "xyes"; then
+ BUILD_UNITTEST="yes"
+ PKG_CHECK_MODULES([UNITTEST], [cmocka >= 1.0.1], [BUILD_UNITTEST="yes"],
+ [AC_MSG_ERROR([cmocka library is required to build glusterfs])]
+ )
+fi
+AM_CONDITIONAL([UNITTEST], [test x$BUILD_UNITTEST = xyes])
+
+dnl Define UNIT_TESTING only for building cmocka binaries.
+UNITTEST_CFLAGS="${UNITTEST_CFLAGS} -DUNIT_TESTING=1"
+
+dnl Add cmocka for unit tests
+case $host_os in
+ freebsd*)
+ dnl remove --coverage on FreeBSD due to a known llvm packaging bug
+ UNITTEST_CFLAGS="${UNITTEST_CPPFLAGS} ${UNITTEST_CFLAGS} -g -DDEBUG -O0"
+ UNITTEST_LDFLAGS="${UNITTEST_LIBS} ${UNITTEST_LDFLAGS}"
+ ;;
+ *)
+ UNITTEST_CFLAGS="${UNITTEST_CPPFLAGS} ${UNITTEST_CFLAGS} -g -DDEBUG -O0 --coverage"
+ UNITTEST_LDFLAGS="${UNITTEST_LIBS} ${UNITTEST_LDFLAGS}"
+ ;;
+esac
+
+AC_SUBST(UNITTEST_CFLAGS)
+AC_SUBST(UNITTEST_LDFLAGS)
+
+AC_SUBST(CFLAGS)
+# end enable debug section
+
+# EC dynamic code generation section
+
+EC_DYNAMIC_SUPPORT="none"
+EC_DYNAMIC_ARCH="none"
+
+AC_ARG_ENABLE([ec-dynamic],
+ AC_HELP_STRING([--disable-ec-dynamic],
+ [Disable all dynamic code generation extensions for EC module]))
+
+AC_ARG_ENABLE([ec-dynamic-intel],
+ AC_HELP_STRING([--disable-ec-dynamic-intel],
+ [Disable all INTEL dynamic code generation extensions for EC module]))
+
+AC_ARG_ENABLE([ec-dynamic-arm],
+ AC_HELP_STRING([--disable-ec-dynamic-arm],
+ [Disable all ARM dynamic code generation extensions for EC module]))
+
+AC_ARG_ENABLE([ec-dynamic-x64],
+ AC_HELP_STRING([--disable-ec-dynamic-x64],
+ [Disable dynamic INTEL x64 code generation for EC module]))
+
+AC_ARG_ENABLE([ec-dynamic-sse],
+ AC_HELP_STRING([--disable-ec-dynamic-sse],
+ [Disable dynamic INTEL SSE code generation for EC module]))
+
+AC_ARG_ENABLE([ec-dynamic-avx],
+ AC_HELP_STRING([--disable-ec-dynamic-avx],
+ [Disable dynamic INTEL AVX code generation for EC module]))
+
+AC_ARG_ENABLE([ec-dynamic-neon],
+ AC_HELP_STRING([--disable-ec-dynamic-neon],
+ [Disable dynamic ARM NEON code generation for EC module]))
+
+if test "x$enable_ec_dynamic" != "xno"; then
+ case $host in
+ x86_64*)
+ if test "x$enable_ec_dynamic_intel" != "xno"; then
+ if test "x$enable_ec_dynamic_x64" != "xno"; then
+ EC_DYNAMIC_SUPPORT="$EC_DYNAMIC_SUPPORT x64"
+ AC_DEFINE(USE_EC_DYNAMIC_X64, 1, [Defined if using dynamic INTEL x64 code])
+ fi
+ if test "x$enable_ec_dynamic_sse" != "xno"; then
+ EC_DYNAMIC_SUPPORT="$EC_DYNAMIC_SUPPORT sse"
+ AC_DEFINE(USE_EC_DYNAMIC_SSE, 1, [Defined if using dynamic INTEL SSE code])
+ fi
+ if test "x$enable_ec_dynamic_avx" != "xno"; then
+ EC_DYNAMIC_SUPPORT="$EC_DYNAMIC_SUPPORT avx"
+ AC_DEFINE(USE_EC_DYNAMIC_AVX, 1, [Defined if using dynamic INTEL AVX code])
+ fi
+
+ if test "x$EC_DYNAMIC_SUPPORT" != "xnone"; then
+ EC_DYNAMIC_ARCH="intel"
+ fi
+ fi
+ ;;
+ arm*)
+ if test "x$enable_ec_dynamic_arm" != "xno"; then
+ if test "x$enable_ec_dynamic_neon" != "xno"; then
+ EC_DYNAMIC_SUPPORT="$EC_DYNAMIC_SUPPORT neon"
+ AC_DEFINE(USE_EC_DYNAMIC_NEON, 1, [Defined if using dynamic ARM NEON code])
+ fi
+
+ if test "x$EC_DYNAMIC_SUPPORT" != "xnone"; then
+ EC_DYNAMIC_ARCH="arm"
+ fi
+ fi
+ ;;
+ esac
+
+ EC_DYNAMIC_SUPPORT="${EC_DYNAMIC_SUPPORT#none }"
fi
+AM_CONDITIONAL([ENABLE_EC_DYNAMIC_INTEL], [test "x$EC_DYNAMIC_ARCH" = "xintel"])
+AM_CONDITIONAL([ENABLE_EC_DYNAMIC_ARM], [test "x$EC_DYNAMIC_ARCH" = "xarm"])
+
+AM_CONDITIONAL([ENABLE_EC_DYNAMIC_X64], [test "x${EC_DYNAMIC_SUPPORT##*x64*}" = "x"])
+AM_CONDITIONAL([ENABLE_EC_DYNAMIC_SSE], [test "x${EC_DYNAMIC_SUPPORT##*sse*}" = "x"])
+AM_CONDITIONAL([ENABLE_EC_DYNAMIC_AVX], [test "x${EC_DYNAMIC_SUPPORT##*avx*}" = "x"])
+AM_CONDITIONAL([ENABLE_EC_DYNAMIC_NEON], [test "x${EC_DYNAMIC_SUPPORT##*neon*}" = "x"])
+
+AC_SUBST(USE_EC_DYNAMIC_X64)
+AC_SUBST(USE_EC_DYNAMIC_SSE)
+AC_SUBST(USE_EC_DYNAMIC_AVX)
+AC_SUBST(USE_EC_DYNAMIC_NEON)
+
+# end EC dynamic code generation section
+
+dnl libglusterfs.so uses math functions
+GF_LDADD="${GF_LDADD} ${MATH_LIB}"
+
+case $host_os in
+ dnl Can't use libtool's portable "-no-undefined" as it seems to be ignored on Linux
+ linux*)
+ GF_NO_UNDEFINED='-Wl,--no-undefined'
+ ;;
+ darwin*)
+ GF_NO_UNDEFINED='-Wl,-undefined'
+ ;;
+ *)
+ dnl There's an issue on FreeBSD with reference to __progname used in some parts of code
+ GF_NO_UNDEFINED=''
+ ;;
+esac
+dnl GF_XLATOR_DEFAULT_LDFLAGS is for most xlators that expose a common set of symbols
+GF_XLATOR_DEFAULT_LDFLAGS='-avoid-version -export-symbols $(top_srcdir)/xlators/xlator.sym $(UUID_LIBS) $(GF_NO_UNDEFINED) $(TIRPC_LIBS)'
+dnl GF_XLATOR_LDFLAGS is for xlators that expose extra symbols, e.g. dht
+GF_XLATOR_LDFLAGS='-avoid-version $(UUID_LIBS) $(GF_NO_UNDEFINED) $(TIRPC_LIBS)'
+
AC_SUBST(GF_HOST_OS)
-AC_SUBST(GF_GLUSTERFS_LDFLAGS)
-AC_SUBST(GF_GLUSTERFS_CFLAGS)
AC_SUBST(GF_CFLAGS)
AC_SUBST(GF_LDFLAGS)
AC_SUBST(GF_LDADD)
AC_SUBST(GF_FUSE_LDADD)
AC_SUBST(GF_FUSE_CFLAGS)
AC_SUBST(RLLIBS)
+AC_SUBST(LIBAIO)
AC_SUBST(AM_MAKEFLAGS)
AC_SUBST(AM_LIBTOOLFLAGS)
+AC_SUBST(GF_NO_UNDEFINED)
+AC_SUBST(GF_XLATOR_DEFAULT_LDFLAGS)
+AC_SUBST(GF_XLATOR_LDFLAGS)
+AC_SUBST(GF_XLATOR_MGNT_LIBADD)
+
+case $host_os in
+ *freebsd*)
+ GF_XLATOR_MGNT_LIBADD="-lutil -lprocstat"
+ ;;
+esac
CONTRIBDIR='$(top_srcdir)/contrib'
AC_SUBST(CONTRIBDIR)
-INCLUDES='-I$(top_srcdir)/libglusterfs/src -I$(CONTRIBDIR)/uuid'
-AC_SUBST(INCLUDES)
+GF_CPPDEFINES='-D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)'
+GF_CPPINCLUDES='-include $(top_builddir)/config.h -include $(top_builddir)/site.h -I$(top_srcdir)/libglusterfs/src -I$(top_builddir)/libglusterfs/src'
+if test "x${USE_CONTRIB_URCU}" = "xyes"; then
+ GF_CPPINCLUDES="${GF_CPPINCLUDES} -I\$(CONTRIBDIR)/userspace-rcu"
+fi
+GF_CPPFLAGS="$GF_CPPFLAGS $GF_CPPDEFINES $GF_CPPINCLUDES"
+AC_SUBST([GF_CPPFLAGS])
+AM_CONDITIONAL([GF_LINUX_HOST_OS], test "${GF_HOST_OS}" = "GF_LINUX_HOST_OS")
AM_CONDITIONAL([GF_DARWIN_HOST_OS], test "${GF_HOST_OS}" = "GF_DARWIN_HOST_OS")
+AM_CONDITIONAL([GF_BSD_HOST_OS], test "${GF_HOST_OS}" = "GF_BSD_HOST_OS")
+if test "${GF_HOST_OS}" = "GF_BSD_HOST_OS"; then
+ AC_DEFINE(GF_BSD_HOST_OS, 1, [This is a BSD compatible OS.])
+fi
-AM_CONDITIONAL([GF_INSTALL_VAR_LIB_GLUSTERD], test ! -d /var/lib/glusterd && test -d /etc/glusterd )
+AC_SUBST(GLUSTERD_WORKDIR)
+AM_CONDITIONAL([GF_INSTALL_GLUSTERD_WORKDIR], test ! -d ${GLUSTERD_WORKDIR} && test -d ${sysconfdir}/glusterd )
+AC_SUBST(GLUSTERD_VOLFILE)
+AC_SUBST(GLUSTERFS_LIBEXECDIR)
+AC_SUBST(GLUSTERFSD_MISCDIR)
+
+dnl pkg-config versioning
+dnl
+dnl Once we released gluster-api.pc with version=7. Since then we undid the
+dnl library versioning and replaced it with symbol-versioning. The current
+dnl libgfapi.so has version 0, but the symbols have the version from the main
+dnl package at the time they were added.
+dnl
+dnl Because other packages (like samba) use the pkg-config version, we can not
+dnl drop it, or decrease the version easily. The simplest solution is to keep
+dnl the version=7 and add sub-digits for the actual package/symbol versions.
+GFAPI_VERSION="7."${PACKAGE_VERSION}
+LIBGFCHANGELOG_VERSION="0.0.1"
+AC_SUBST(GFAPI_VERSION)
+AC_SUBST(LIBGFCHANGELOG_VERSION)
+
+dnl libtool versioning
+LIBGFXDR_LT_VERSION="0:1:0"
+LIBGFRPC_LT_VERSION="0:1:0"
+LIBGLUSTERFS_LT_VERSION="0:1:0"
+LIBGFCHANGELOG_LT_VERSION="0:1:0"
+GFAPI_LT_VERSION="0:0:0"
+AC_SUBST(LIBGFXDR_LT_VERSION)
+AC_SUBST(LIBGFRPC_LT_VERSION)
+AC_SUBST(LIBGLUSTERFS_LT_VERSION)
+AC_SUBST(LIBGFCHANGELOG_LT_VERSION)
+AC_SUBST(GFAPI_LT_VERSION)
+AC_SUBST(GFAPI_EXTRA_LDFLAGS)
+
+GFAPI_LIBS="${ACL_LIBS}"
+AC_SUBST(GFAPI_LIBS)
+
+dnl this change necessary for run-tests.sh
+AC_CONFIG_FILES([tests/env.rc],[ln -s ${ac_abs_builddir}/env.rc ${ac_abs_srcdir}/env.rc 2>/dev/null])
AC_OUTPUT
echo
echo "GlusterFS configure summary"
echo "==========================="
-echo "FUSE client : $BUILD_FUSE_CLIENT"
-echo "Infiniband verbs : $BUILD_IBVERBS"
-echo "epoll IO multiplex : $BUILD_EPOLL"
-echo "argp-standalone : $BUILD_ARGP_STANDALONE"
-echo "fusermount : $BUILD_FUSERMOUNT"
-echo "readline : $BUILD_READLINE"
-echo "georeplication : $BUILD_SYNCDAEMON"
+echo "FUSE client : $BUILD_FUSE_CLIENT"
+echo "epoll IO multiplex : $BUILD_EPOLL"
+echo "fusermount : $BUILD_FUSERMOUNT"
+echo "readline : $BUILD_READLINE"
+echo "georeplication : $BUILD_SYNCDAEMON"
+echo "Linux-AIO : $BUILD_LIBAIO"
+echo "Enable Debug : $BUILD_DEBUG"
+echo "Run with Valgrind : $VALGRIND_TOOL"
+echo "Sanitizer enabled : $SANITIZER"
+echo "Use syslog : $USE_SYSLOG"
+echo "XML output : $BUILD_XML_OUTPUT"
+echo "Unit Tests : $BUILD_UNITTEST"
+echo "Track priv ports : $TRACK_PRIVPORTS"
+echo "POSIX ACLs : $BUILD_POSIX_ACLS"
+echo "SELinux features : $USE_SELINUX"
+echo "firewalld-config : $BUILD_FIREWALLD"
+echo "Events : $BUILD_EVENTS"
+echo "EC dynamic support : $EC_DYNAMIC_SUPPORT"
+echo "Use memory pools : $USE_MEMPOOL"
+echo "Nanosecond m/atimes : $BUILD_NANOSECOND_TIMESTAMPS"
+echo "Server components : $with_server"
+echo "Legacy gNFS server : $BUILD_GNFS"
+echo "IPV6 default : $with_ipv6_default"
+echo "Use TIRPC : $with_libtirpc"
+echo "With Python : ${PYTHON_VERSION}"
+echo "Cloudsync : $BUILD_CLOUDSYNC"
+echo "Metadata dispersal : $BUILD_METADISP"
+echo "Link with TCMALLOC : $BUILD_TCMALLOC"
echo
+
+# dnl Note: ${X^^} capitalization assumes bash >= 4.x
+if test "x$SANITIZER" != "xnone"; then
+ echo "Note: since glusterfs processes are daemon processes, use"
+ echo "'export ${SANITIZER^^}_OPTIONS=log_path=/path/to/xxx.log' to collect"
+ echo "sanitizer output. Further details and more options can be"
+ echo "found at https://github.com/google/sanitizers."
+fi
diff --git a/contrib/aclocal/mkdirp.m4 b/contrib/aclocal/mkdirp.m4
new file mode 100644
index 00000000000..d2f7edd5ccd
--- /dev/null
+++ b/contrib/aclocal/mkdirp.m4
@@ -0,0 +1,146 @@
+# Excerpt from autoconf/autoconf/programs.m4
+# This file is part of Autoconf. -*- Autoconf -*-
+# Checking for programs.
+
+# Copyright (C) 1992, 1993, 1994, 1995, 1996, 1998, 1999, 2000, 2001,
+# 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 Free Software
+# Foundation, Inc.
+
+# This file is part of Autoconf. This program is free
+# software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the
+# Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the Autoconf Configure Script Exception,
+# version 3.0, as published by the Free Software Foundation.
+#
+# You should have received a copy of the GNU General Public License
+# and a copy of the Autoconf Configure Script Exception along with
+# this program; see the files COPYINGv3 and COPYING.EXCEPTION
+# respectively. If not, see <http://www.gnu.org/licenses/>.
+
+# Written by David MacKenzie, with help from
+# Franc,ois Pinard, Karl Berry, Richard Pixley, Ian Lance Taylor,
+# Roland McGrath, Noah Friedman, david d zuhn, and many others.
+
+# AC_PROG_MKDIR_P
+# ---------------
+# Check whether `mkdir -p' is known to be thread-safe, and fall back to
+# install-sh -d otherwise.
+#
+# Automake 1.8 used `mkdir -m 0755 -p --' to ensure that directories
+# created by `make install' are always world readable, even if the
+# installer happens to have an overly restrictive umask (e.g. 077).
+# This was a mistake. There are at least two reasons why we must not
+# use `-m 0755':
+# - it causes special bits like SGID to be ignored,
+# - it may be too restrictive (some setups expect 775 directories).
+#
+# Do not use -m 0755 and let people choose whatever they expect by
+# setting umask.
+#
+# We cannot accept any implementation of `mkdir' that recognizes `-p'.
+# Some implementations (such as Solaris 8's) are vulnerable to race conditions:
+# if a parallel make tries to run `mkdir -p a/b' and `mkdir -p a/c'
+# concurrently, both version can detect that a/ is missing, but only
+# one can create it and the other will error out. Consequently we
+# restrict ourselves to known race-free implementations.
+#
+# Automake used to define mkdir_p as `mkdir -p .', in order to
+# allow $(mkdir_p) to be used without argument. As in
+# $(mkdir_p) $(somedir)
+# where $(somedir) is conditionally defined. However we don't do
+# that for MKDIR_P.
+# 1. before we restricted the check to GNU mkdir, `mkdir -p .' was
+# reported to fail in read-only directories. The system where this
+# happened has been forgotten.
+# 2. in practice we call $(MKDIR_P) on directories such as
+# $(MKDIR_P) "$(DESTDIR)$(somedir)"
+# and we don't want to create $(DESTDIR) if $(somedir) is empty.
+# To support the latter case, we have to write
+# test -z "$(somedir)" || $(MKDIR_P) "$(DESTDIR)$(somedir)"
+# so $(MKDIR_P) always has an argument.
+# We will have better chances of detecting a missing test if
+# $(MKDIR_P) complains about missing arguments.
+# 3. $(MKDIR_P) is named after `mkdir -p' and we don't expect this
+# to accept no argument.
+# 4. having something like `mkdir .' in the output is unsightly.
+#
+# On NextStep and OpenStep, the `mkdir' command does not
+# recognize any option. It will interpret all options as
+# directories to create.
+AN_MAKEVAR([MKDIR_P], [AC_PROG_MKDIR_P])
+AC_DEFUN_ONCE([AC_PROG_MKDIR_P],
+[AC_REQUIRE([AC_CONFIG_AUX_DIR_DEFAULT])dnl
+
+AC_MSG_CHECKING([for a thread-safe mkdir -p])
+if test -z "$MKDIR_P"; then
+ AC_CACHE_VAL([ac_cv_path_mkdir],
+ [_AS_PATH_WALK([$PATH$PATH_SEPARATOR/opt/sfw/bin],
+ [for ac_prog in mkdir gmkdir; do
+ for ac_exec_ext in '' $ac_executable_extensions; do
+ AS_EXECUTABLE_P(["$as_dir/$ac_prog$ac_exec_ext"]) || continue
+ case `"$as_dir/$ac_prog$ac_exec_ext" --version 2>&1` in #(
+ 'mkdir (GNU coreutils) '* | \
+ 'mkdir (coreutils) '* | \
+ 'mkdir (fileutils) '4.1*)
+ ac_cv_path_mkdir=$as_dir/$ac_prog$ac_exec_ext
+ break 3;;
+ esac
+ done
+ done])])
+ test -d ./--version && rmdir ./--version
+ if test "${ac_cv_path_mkdir+set}" = set; then
+ MKDIR_P="$ac_cv_path_mkdir -p"
+ else
+ # As a last resort, use the slow shell script. Don't cache a
+ # value for MKDIR_P within a source directory, because that will
+ # break other packages using the cache if that directory is
+ # removed, or if the value is a relative name.
+ MKDIR_P="$ac_install_sh -d"
+ fi
+fi
+dnl status.m4 does special magic for MKDIR_P instead of AC_SUBST,
+dnl to get relative names right. However, also AC_SUBST here so
+dnl that Automake versions before 1.10 will pick it up (they do not
+dnl trace AC_SUBST_TRACE).
+dnl FIXME: Remove this once we drop support for Automake < 1.10.
+AC_SUBST([MKDIR_P])dnl
+AC_MSG_RESULT([$MKDIR_P])
+])# AC_PROG_MKDIR_P
+
+
+# From automake/m4/mkdirp.m4
+## -*- Autoconf -*-
+# Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PROG_MKDIR_P
+# ---------------
+# Check for `mkdir -p'.
+AC_DEFUN([AM_PROG_MKDIR_P],
+[
+AC_REQUIRE([AC_PROG_MKDIR_P])dnl
+dnl Automake 1.8 to 1.9.6 used to define mkdir_p. We now use MKDIR_P,
+dnl while keeping a definition of mkdir_p for backward compatibility.
+dnl @MKDIR_P@ is magic: AC_OUTPUT adjusts its value for each Makefile.
+dnl However we cannot define mkdir_p as $(MKDIR_P) for the sake of
+dnl Makefile.ins that do not define MKDIR_P, so we do our own
+dnl adjustment using top_builddir (which is defined more often than
+dnl MKDIR_P).
+AC_SUBST([mkdir_p], ["$MKDIR_P"])dnl
+case $mkdir_p in
+ [[\\/$]]* | ?:[[\\/]]*) ;;
+ */*) mkdir_p="\$(top_builddir)/$mkdir_p" ;;
+esac
+])
diff --git a/contrib/aclocal/python.m4 b/contrib/aclocal/python.m4
new file mode 100644
index 00000000000..a39a9009046
--- /dev/null
+++ b/contrib/aclocal/python.m4
@@ -0,0 +1,209 @@
+## ------------------------ -*- Autoconf -*-
+## Python file handling
+## From Andrew Dalke
+## Updated by James Henstridge
+## ------------------------
+# Copyright (C) 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2008, 2009
+# Free Software Foundation, Inc.
+#
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# AM_PATH_PYTHON([MINIMUM-VERSION], [ACTION-IF-FOUND], [ACTION-IF-NOT-FOUND])
+# ---------------------------------------------------------------------------
+# Adds support for distributing Python modules and packages. To
+# install modules, copy them to $(pythondir), using the python_PYTHON
+# automake variable. To install a package with the same name as the
+# automake package, install to $(pkgpythondir), or use the
+# pkgpython_PYTHON automake variable.
+#
+# The variables $(pyexecdir) and $(pkgpyexecdir) are provided as
+# locations to install python extension modules (shared libraries).
+# Another macro is required to find the appropriate flags to compile
+# extension modules.
+#
+# If your package is configured with a different prefix to python,
+# users will have to add the install directory to the PYTHONPATH
+# environment variable, or create a .pth file (see the python
+# documentation for details).
+#
+# If the MINIMUM-VERSION argument is passed, AM_PATH_PYTHON will
+# cause an error if the version of python installed on the system
+# doesn't meet the requirement. MINIMUM-VERSION should consist of
+# numbers and dots only.
+AC_DEFUN([AM_PATH_PYTHON],
+ [
+ dnl Find a Python interpreter. Python versions prior to 2.0 are not
+ dnl supported. (2.0 was released on October 16, 2000).
+ m4_define_default([_AM_PYTHON_INTERPRETER_LIST],
+ [python python2 python3 python3.2 python3.1 python3.0 python2.7 python2.6 python2.5 python2.4 python2.3 python2.2 dnl
+python2.1 python2.0])
+
+ m4_if([$1],[],[
+ dnl No version check is needed.
+ # Find any Python interpreter.
+ if test -z "$PYTHON"; then
+ AC_PATH_PROGS([PYTHON], _AM_PYTHON_INTERPRETER_LIST, :)
+ fi
+ am_display_PYTHON=python
+ ], [
+ dnl A version check is needed.
+ if test -n "$PYTHON"; then
+ # If the user set $PYTHON, use it and don't search something else.
+ AC_MSG_CHECKING([whether $PYTHON version >= $1])
+ AM_PYTHON_CHECK_VERSION([$PYTHON], [$1],
+ [AC_MSG_RESULT(yes)],
+ [AC_MSG_ERROR(too old)])
+ am_display_PYTHON=$PYTHON
+ else
+ # Otherwise, try each interpreter until we find one that satisfies
+ # VERSION.
+ AC_CACHE_CHECK([for a Python interpreter with version >= $1],
+ [am_cv_pathless_PYTHON],[
+ for am_cv_pathless_PYTHON in _AM_PYTHON_INTERPRETER_LIST none; do
+ test "$am_cv_pathless_PYTHON" = none && break
+ AM_PYTHON_CHECK_VERSION([$am_cv_pathless_PYTHON], [$1], [break])
+ done])
+ # Set $PYTHON to the absolute path of $am_cv_pathless_PYTHON.
+ if test "$am_cv_pathless_PYTHON" = none; then
+ PYTHON=:
+ else
+ AC_PATH_PROG([PYTHON], [$am_cv_pathless_PYTHON])
+ fi
+ am_display_PYTHON=$am_cv_pathless_PYTHON
+ fi
+ ])
+
+ if test "$PYTHON" = :; then
+ dnl Run any user-specified action, or abort.
+ m4_default([$3], [AC_MSG_ERROR([no suitable Python interpreter found])])
+ else
+
+ dnl Query Python for its version number. Getting [:3] seems to be
+ dnl the best way to do this; it's what "site.py" does in the standard
+ dnl library.
+
+ AC_CACHE_CHECK([for $am_display_PYTHON version], [am_cv_python_version],
+ [am_cv_python_version=`$PYTHON -c "import sys; sys.stdout.write(sys.version[[:3]])"`])
+ AC_SUBST([PYTHON_VERSION], [$am_cv_python_version])
+
+ dnl Use the values of $prefix and $exec_prefix for the corresponding
+ dnl values of PYTHON_PREFIX and PYTHON_EXEC_PREFIX. These are made
+ dnl distinct variables so they can be overridden if need be. However,
+ dnl general consensus is that you shouldn't need this ability.
+
+ AC_SUBST([PYTHON_PREFIX], ['${prefix}'])
+ AC_SUBST([PYTHON_EXEC_PREFIX], ['${exec_prefix}'])
+
+ dnl At times (like when building shared libraries) you may want
+ dnl to know which OS platform Python thinks this is.
+
+ AC_CACHE_CHECK([for $am_display_PYTHON platform], [am_cv_python_platform],
+ [am_cv_python_platform=`$PYTHON -c "import sys; sys.stdout.write(sys.platform)"`])
+ AC_SUBST([PYTHON_PLATFORM], [$am_cv_python_platform])
+
+
+ dnl Set up 4 directories:
+
+ dnl pythondir -- where to install python scripts. This is the
+ dnl site-packages directory, not the python standard library
+ dnl directory like in previous automake betas. This behavior
+ dnl is more consistent with lispdir.m4 for example.
+ dnl Query distutils for this directory. distutils does not exist in
+ dnl Python 1.5, so we fall back to the hardcoded directory if it
+ dnl doesn't work.
+ AC_CACHE_CHECK([for $am_display_PYTHON script directory],
+ [am_cv_python_pythondir],
+ [if test "x$prefix" = xNONE
+ then
+ am_py_prefix=$ac_default_prefix
+ else
+ am_py_prefix=$prefix
+ fi
+ am_cv_python_pythondir=`$PYTHON -c "import sys; from distutils import sysconfig; sys.stdout.write(sysconfig.get_python_lib(0,0,prefix='$am_py_prefix'))" 2>/dev/null ||
+ echo "$PYTHON_PREFIX/lib/python$PYTHON_VERSION/site-packages"`
+ case $am_cv_python_pythondir in
+ $am_py_prefix*)
+ am__strip_prefix=`echo "$am_py_prefix" | sed 's|.|.|g'`
+ am_cv_python_pythondir=`echo "$am_cv_python_pythondir" | sed "s,^$am__strip_prefix,$PYTHON_PREFIX,"`
+ ;;
+ *)
+ case $am_py_prefix in
+ /usr|/System*) ;;
+ *)
+ am_cv_python_pythondir=$PYTHON_PREFIX/lib/python$PYTHON_VERSION/site-packages
+ ;;
+ esac
+ ;;
+ esac
+ ])
+ AC_SUBST([pythondir], [$am_cv_python_pythondir])
+
+ dnl pkgpythondir -- $PACKAGE directory under pythondir. Was
+ dnl PYTHON_SITE_PACKAGE in previous betas, but this naming is
+ dnl more consistent with the rest of automake.
+
+ AC_SUBST([pkgpythondir], [\${pythondir}/$PACKAGE])
+
+ dnl pyexecdir -- directory for installing python extension modules
+ dnl (shared libraries)
+ dnl Query distutils for this directory. distutils does not exist in
+ dnl Python 1.5, so we fall back to the hardcoded directory if it
+ dnl doesn't work.
+ AC_CACHE_CHECK([for $am_display_PYTHON extension module directory],
+ [am_cv_python_pyexecdir],
+ [if test "x$exec_prefix" = xNONE
+ then
+ am_py_exec_prefix=$am_py_prefix
+ else
+ am_py_exec_prefix=$exec_prefix
+ fi
+ am_cv_python_pyexecdir=`$PYTHON -c "import sys; from distutils import sysconfig; sys.stdout.write(sysconfig.get_python_lib(1,0,prefix='$am_py_exec_prefix'))" 2>/dev/null ||
+ echo "$PYTHON_EXEC_PREFIX/lib/python$PYTHON_VERSION/site-packages"`
+ case $am_cv_python_pyexecdir in
+ $am_py_exec_prefix*)
+ am__strip_prefix=`echo "$am_py_exec_prefix" | sed 's|.|.|g'`
+ am_cv_python_pyexecdir=`echo "$am_cv_python_pyexecdir" | sed "s,^$am__strip_prefix,$PYTHON_EXEC_PREFIX,"`
+ ;;
+ *)
+ case $am_py_exec_prefix in
+ /usr|/System*) ;;
+ *)
+ am_cv_python_pyexecdir=$PYTHON_EXEC_PREFIX/lib/python$PYTHON_VERSION/site-packages
+ ;;
+ esac
+ ;;
+ esac
+ ])
+ AC_SUBST([pyexecdir], [$am_cv_python_pyexecdir])
+
+ dnl pkgpyexecdir -- $(pyexecdir)/$(PACKAGE)
+
+ AC_SUBST([pkgpyexecdir], [\${pyexecdir}/$PACKAGE])
+
+ dnl Run any user-specified action.
+ $2
+ fi
+
+])
+
+
+# AM_PYTHON_CHECK_VERSION(PROG, VERSION, [ACTION-IF-TRUE], [ACTION-IF-FALSE])
+# ---------------------------------------------------------------------------
+# Run ACTION-IF-TRUE if the Python interpreter PROG has version >= VERSION.
+# Run ACTION-IF-FALSE otherwise.
+# This test uses sys.hexversion instead of the string equivalent (first
+# word of sys.version), in order to cope with versions such as 2.2c1.
+# This supports Python 2.0 or higher. (2.0 was released on October 16, 2000).
+AC_DEFUN([AM_PYTHON_CHECK_VERSION],
+ [prog="import sys
+# split strings by '.' and convert to numeric. Append some zeros
+# because we need at least 4 digits for the hex conversion.
+# map returns an iterator in Python 3.0 and a list in 2.x
+minver = list(map(int, '$2'.split('.'))) + [[0, 0, 0]]
+minverhex = 0
+# xrange is not present in Python 3.0 and range returns an iterator
+for i in list(range(0, 4)): minverhex = (minverhex << 8) + minver[[i]]
+sys.exit(sys.hexversion < minverhex)"
+ AS_IF([AM_RUN_LOG([$1 -c "$prog"])], [$3], [$4])])
diff --git a/contrib/fuse-include/fuse-mount.h b/contrib/fuse-include/fuse-mount.h
index 7a3756d92b8..7d28462de47 100644
--- a/contrib/fuse-include/fuse-mount.h
+++ b/contrib/fuse-include/fuse-mount.h
@@ -8,5 +8,6 @@
*/
void gf_fuse_unmount (const char *mountpoint, int fd);
+int gf_fuse_unmount_daemon (const char *mountpoint, int fd);
int gf_fuse_mount (const char *mountpoint, char *fsname, char *mnt_param,
pid_t *mtab_pid, int status_fd);
diff --git a/contrib/fuse-include/fuse_kernel.h b/contrib/fuse-include/fuse_kernel.h
index 9ae25d6f9c0..1e41e237e6f 100644
--- a/contrib/fuse-include/fuse_kernel.h
+++ b/contrib/fuse-include/fuse_kernel.h
@@ -60,23 +60,87 @@
* 7.13
* - make max number of background requests and congestion threshold
* tunables
+ *
+ * 7.14
+ * - add splice support to fuse device
+ *
+ * 7.15
+ * - add store notify
+ * - add retrieve notify
+ *
+ * 7.16
+ * - add BATCH_FORGET request
+ * - FUSE_IOCTL_UNRESTRICTED shall now return with array of 'struct
+ * fuse_ioctl_iovec' instead of ambiguous 'struct iovec'
+ * - add FUSE_IOCTL_32BIT flag
+ *
+ * 7.17
+ * - add FUSE_FLOCK_LOCKS and FUSE_RELEASE_FLOCK_UNLOCK
+ *
+ * 7.18
+ * - add FUSE_IOCTL_DIR flag
+ * - add FUSE_NOTIFY_DELETE
+ *
+ * 7.19
+ * - add FUSE_FALLOCATE
+ *
+ * 7.20
+ * - add FUSE_AUTO_INVAL_DATA
+ *
+ * 7.21
+ * - add FUSE_READDIRPLUS
+ * - send the requested events in POLL request
+ *
+ * 7.22
+ * - add FUSE_ASYNC_DIO
+ *
+ * 7.23
+ * - add FUSE_WRITEBACK_CACHE
+ * - add time_gran to fuse_init_out
+ * - add reserved space to fuse_init_out
+ * - add FATTR_CTIME
+ * - add ctime and ctimensec to fuse_setattr_in
+ * - add FUSE_RENAME2 request
+ * - add FUSE_NO_OPEN_SUPPORT flag
+ *
+ * 7.24
+ * - add FUSE_LSEEK for SEEK_HOLE and SEEK_DATA support
*/
#ifndef _LINUX_FUSE_H
#define _LINUX_FUSE_H
-#include <sys/types.h>
-#define __u64 uint64_t
-#define __s64 int64_t
-#define __u32 uint32_t
-#define __s32 int32_t
-#define __u16 uint16_t
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+/*
+ * Version negotiation:
+ *
+ * Both the kernel and userspace send the version they support in the
+ * INIT request and reply respectively.
+ *
+ * If the major versions match then both shall use the smallest
+ * of the two minor versions for communication.
+ *
+ * If the kernel supports a larger major version, then userspace shall
+ * reply with the major version it supports, ignore the rest of the
+ * INIT message and expect a new INIT message from the kernel with a
+ * matching major version.
+ *
+ * If the library supports a larger major version, then it shall fall
+ * back to the major protocol version sent by the kernel for
+ * communication and reply with that major version (and an arbitrary
+ * supported minor version).
+ */
/** Version number of this interface */
#define FUSE_KERNEL_VERSION 7
/** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 13
+#define FUSE_KERNEL_MINOR_VERSION 24
/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
@@ -85,42 +149,42 @@
userspace works under 64bit kernels */
struct fuse_attr {
- __u64 ino;
- __u64 size;
- __u64 blocks;
- __u64 atime;
- __u64 mtime;
- __u64 ctime;
- __u32 atimensec;
- __u32 mtimensec;
- __u32 ctimensec;
- __u32 mode;
- __u32 nlink;
- __u32 uid;
- __u32 gid;
- __u32 rdev;
- __u32 blksize;
- __u32 padding;
+ uint64_t ino;
+ uint64_t size;
+ uint64_t blocks;
+ uint64_t atime;
+ uint64_t mtime;
+ uint64_t ctime;
+ uint32_t atimensec;
+ uint32_t mtimensec;
+ uint32_t ctimensec;
+ uint32_t mode;
+ uint32_t nlink;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t rdev;
+ uint32_t blksize;
+ uint32_t padding;
};
struct fuse_kstatfs {
- __u64 blocks;
- __u64 bfree;
- __u64 bavail;
- __u64 files;
- __u64 ffree;
- __u32 bsize;
- __u32 namelen;
- __u32 frsize;
- __u32 padding;
- __u32 spare[6];
+ uint64_t blocks;
+ uint64_t bfree;
+ uint64_t bavail;
+ uint64_t files;
+ uint64_t ffree;
+ uint32_t bsize;
+ uint32_t namelen;
+ uint32_t frsize;
+ uint32_t padding;
+ uint32_t spare[6];
};
struct fuse_file_lock {
- __u64 start;
- __u64 end;
- __u32 type;
- __u32 pid; /* tgid */
+ uint64_t start;
+ uint64_t end;
+ uint32_t type;
+ uint32_t pid; /* tgid */
};
/**
@@ -136,6 +200,7 @@ struct fuse_file_lock {
#define FATTR_ATIME_NOW (1 << 7)
#define FATTR_MTIME_NOW (1 << 8)
#define FATTR_LOCKOWNER (1 << 9)
+#define FATTR_CTIME (1 << 10)
/**
* Flags returned by the OPEN request
@@ -151,8 +216,24 @@ struct fuse_file_lock {
/**
* INIT request/reply flags
*
+ * FUSE_ASYNC_READ: asynchronous read requests
+ * FUSE_POSIX_LOCKS: remote locking for POSIX file locks
+ * FUSE_FILE_OPS: kernel sends file handle for fstat, etc... (not yet supported)
+ * FUSE_ATOMIC_O_TRUNC: handles the O_TRUNC open flag in the filesystem
* FUSE_EXPORT_SUPPORT: filesystem handles lookups of "." and ".."
+ * FUSE_BIG_WRITES: filesystem can handle write size larger than 4kB
* FUSE_DONT_MASK: don't apply umask to file mode on create operations
+ * FUSE_SPLICE_WRITE: kernel supports splice write on the device
+ * FUSE_SPLICE_MOVE: kernel supports splice move on the device
+ * FUSE_SPLICE_READ: kernel supports splice read on the device
+ * FUSE_FLOCK_LOCKS: remote locking for BSD style file locks
+ * FUSE_HAS_IOCTL_DIR: kernel supports ioctl on directories
+ * FUSE_AUTO_INVAL_DATA: automatically invalidate cached pages
+ * FUSE_DO_READDIRPLUS: do READDIRPLUS (READDIR+LOOKUP in one)
+ * FUSE_READDIRPLUS_AUTO: adaptive readdirplus
+ * FUSE_ASYNC_DIO: asynchronous direct I/O submission
+ * FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
+ * FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@@ -161,6 +242,17 @@ struct fuse_file_lock {
#define FUSE_EXPORT_SUPPORT (1 << 4)
#define FUSE_BIG_WRITES (1 << 5)
#define FUSE_DONT_MASK (1 << 6)
+#define FUSE_SPLICE_WRITE (1 << 7)
+#define FUSE_SPLICE_MOVE (1 << 8)
+#define FUSE_SPLICE_READ (1 << 9)
+#define FUSE_FLOCK_LOCKS (1 << 10)
+#define FUSE_HAS_IOCTL_DIR (1 << 11)
+#define FUSE_AUTO_INVAL_DATA (1 << 12)
+#define FUSE_DO_READDIRPLUS (1 << 13)
+#define FUSE_READDIRPLUS_AUTO (1 << 14)
+#define FUSE_ASYNC_DIO (1 << 15)
+#define FUSE_WRITEBACK_CACHE (1 << 16)
+#define FUSE_NO_OPEN_SUPPORT (1 << 17)
/**
* CUSE INIT request/reply flags
@@ -173,6 +265,7 @@ struct fuse_file_lock {
* Release flags
*/
#define FUSE_RELEASE_FLUSH (1 << 0)
+#define FUSE_RELEASE_FLOCK_UNLOCK (1 << 1)
/**
* Getattr flags
@@ -204,12 +297,16 @@ struct fuse_file_lock {
* FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine
* FUSE_IOCTL_UNRESTRICTED: not restricted to well-formed ioctls, retry allowed
* FUSE_IOCTL_RETRY: retry with new iovecs
+ * FUSE_IOCTL_32BIT: 32bit ioctl
+ * FUSE_IOCTL_DIR: is a directory
*
* FUSE_IOCTL_MAX_IOV: maximum of in_iovecs + out_iovecs
*/
#define FUSE_IOCTL_COMPAT (1 << 0)
#define FUSE_IOCTL_UNRESTRICTED (1 << 1)
#define FUSE_IOCTL_RETRY (1 << 2)
+#define FUSE_IOCTL_32BIT (1 << 3)
+#define FUSE_IOCTL_DIR (1 << 4)
#define FUSE_IOCTL_MAX_IOV 256
@@ -259,6 +356,12 @@ enum fuse_opcode {
FUSE_DESTROY = 38,
FUSE_IOCTL = 39,
FUSE_POLL = 40,
+ FUSE_NOTIFY_REPLY = 41,
+ FUSE_BATCH_FORGET = 42,
+ FUSE_FALLOCATE = 43,
+ FUSE_READDIRPLUS = 44,
+ FUSE_RENAME2 = 45,
+ FUSE_LSEEK = 46,
/* CUSE specific operations */
CUSE_INIT = 4096,
@@ -268,6 +371,9 @@ enum fuse_notify_code {
FUSE_NOTIFY_POLL = 1,
FUSE_NOTIFY_INVAL_INODE = 2,
FUSE_NOTIFY_INVAL_ENTRY = 3,
+ FUSE_NOTIFY_STORE = 4,
+ FUSE_NOTIFY_RETRIEVE = 5,
+ FUSE_NOTIFY_DELETE = 6,
FUSE_NOTIFY_CODE_MAX,
};
@@ -277,133 +383,149 @@ enum fuse_notify_code {
#define FUSE_COMPAT_ENTRY_OUT_SIZE 120
struct fuse_entry_out {
- __u64 nodeid; /* Inode ID */
- __u64 generation; /* Inode generation: nodeid:gen must
- be unique for the fs's lifetime */
- __u64 entry_valid; /* Cache timeout for the name */
- __u64 attr_valid; /* Cache timeout for the attributes */
- __u32 entry_valid_nsec;
- __u32 attr_valid_nsec;
+ uint64_t nodeid; /* Inode ID */
+ uint64_t generation; /* Inode generation: nodeid:gen must
+ be unique for the fs's lifetime */
+ uint64_t entry_valid; /* Cache timeout for the name */
+ uint64_t attr_valid; /* Cache timeout for the attributes */
+ uint32_t entry_valid_nsec;
+ uint32_t attr_valid_nsec;
struct fuse_attr attr;
};
struct fuse_forget_in {
- __u64 nlookup;
+ uint64_t nlookup;
+};
+
+struct fuse_forget_one {
+ uint64_t nodeid;
+ uint64_t nlookup;
+};
+
+struct fuse_batch_forget_in {
+ uint32_t count;
+ uint32_t dummy;
};
struct fuse_getattr_in {
- __u32 getattr_flags;
- __u32 dummy;
- __u64 fh;
+ uint32_t getattr_flags;
+ uint32_t dummy;
+ uint64_t fh;
};
#define FUSE_COMPAT_ATTR_OUT_SIZE 96
struct fuse_attr_out {
- __u64 attr_valid; /* Cache timeout for the attributes */
- __u32 attr_valid_nsec;
- __u32 dummy;
+ uint64_t attr_valid; /* Cache timeout for the attributes */
+ uint32_t attr_valid_nsec;
+ uint32_t dummy;
struct fuse_attr attr;
};
#define FUSE_COMPAT_MKNOD_IN_SIZE 8
struct fuse_mknod_in {
- __u32 mode;
- __u32 rdev;
- __u32 umask;
- __u32 padding;
+ uint32_t mode;
+ uint32_t rdev;
+ uint32_t umask;
+ uint32_t padding;
};
struct fuse_mkdir_in {
- __u32 mode;
- __u32 umask;
+ uint32_t mode;
+ uint32_t umask;
};
struct fuse_rename_in {
- __u64 newdir;
+ uint64_t newdir;
+};
+
+struct fuse_rename2_in {
+ uint64_t newdir;
+ uint32_t flags;
+ uint32_t padding;
};
struct fuse_link_in {
- __u64 oldnodeid;
+ uint64_t oldnodeid;
};
struct fuse_setattr_in {
- __u32 valid;
- __u32 padding;
- __u64 fh;
- __u64 size;
- __u64 lock_owner;
- __u64 atime;
- __u64 mtime;
- __u64 unused2;
- __u32 atimensec;
- __u32 mtimensec;
- __u32 unused3;
- __u32 mode;
- __u32 unused4;
- __u32 uid;
- __u32 gid;
- __u32 unused5;
+ uint32_t valid;
+ uint32_t padding;
+ uint64_t fh;
+ uint64_t size;
+ uint64_t lock_owner;
+ uint64_t atime;
+ uint64_t mtime;
+ uint64_t ctime;
+ uint32_t atimensec;
+ uint32_t mtimensec;
+ uint32_t ctimensec;
+ uint32_t mode;
+ uint32_t unused4;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t unused5;
};
struct fuse_open_in {
- __u32 flags;
- __u32 unused;
+ uint32_t flags;
+ uint32_t unused;
};
struct fuse_create_in {
- __u32 flags;
- __u32 mode;
- __u32 umask;
- __u32 padding;
+ uint32_t flags;
+ uint32_t mode;
+ uint32_t umask;
+ uint32_t padding;
};
struct fuse_open_out {
- __u64 fh;
- __u32 open_flags;
- __u32 padding;
+ uint64_t fh;
+ uint32_t open_flags;
+ uint32_t padding;
};
struct fuse_release_in {
- __u64 fh;
- __u32 flags;
- __u32 release_flags;
- __u64 lock_owner;
+ uint64_t fh;
+ uint32_t flags;
+ uint32_t release_flags;
+ uint64_t lock_owner;
};
struct fuse_flush_in {
- __u64 fh;
- __u32 unused;
- __u32 padding;
- __u64 lock_owner;
+ uint64_t fh;
+ uint32_t unused;
+ uint32_t padding;
+ uint64_t lock_owner;
};
struct fuse_read_in {
- __u64 fh;
- __u64 offset;
- __u32 size;
- __u32 read_flags;
- __u64 lock_owner;
- __u32 flags;
- __u32 padding;
+ uint64_t fh;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t read_flags;
+ uint64_t lock_owner;
+ uint32_t flags;
+ uint32_t padding;
};
#define FUSE_COMPAT_WRITE_IN_SIZE 24
struct fuse_write_in {
- __u64 fh;
- __u64 offset;
- __u32 size;
- __u32 write_flags;
- __u64 lock_owner;
- __u32 flags;
- __u32 padding;
+ uint64_t fh;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t write_flags;
+ uint64_t lock_owner;
+ uint32_t flags;
+ uint32_t padding;
};
struct fuse_write_out {
- __u32 size;
- __u32 padding;
+ uint32_t size;
+ uint32_t padding;
};
#define FUSE_COMPAT_STATFS_SIZE 48
@@ -413,32 +535,32 @@ struct fuse_statfs_out {
};
struct fuse_fsync_in {
- __u64 fh;
- __u32 fsync_flags;
- __u32 padding;
+ uint64_t fh;
+ uint32_t fsync_flags;
+ uint32_t padding;
};
struct fuse_setxattr_in {
- __u32 size;
- __u32 flags;
+ uint32_t size;
+ uint32_t flags;
};
struct fuse_getxattr_in {
- __u32 size;
- __u32 padding;
+ uint32_t size;
+ uint32_t padding;
};
struct fuse_getxattr_out {
- __u32 size;
- __u32 padding;
+ uint32_t size;
+ uint32_t padding;
};
struct fuse_lk_in {
- __u64 fh;
- __u64 owner;
+ uint64_t fh;
+ uint64_t owner;
struct fuse_file_lock lk;
- __u32 lk_flags;
- __u32 padding;
+ uint32_t lk_flags;
+ uint32_t padding;
};
struct fuse_lk_out {
@@ -446,134 +568,206 @@ struct fuse_lk_out {
};
struct fuse_access_in {
- __u32 mask;
- __u32 padding;
+ uint32_t mask;
+ uint32_t padding;
};
struct fuse_init_in {
- __u32 major;
- __u32 minor;
- __u32 max_readahead;
- __u32 flags;
+ uint32_t major;
+ uint32_t minor;
+ uint32_t max_readahead;
+ uint32_t flags;
};
+#define FUSE_COMPAT_INIT_OUT_SIZE 8
+#define FUSE_COMPAT_22_INIT_OUT_SIZE 24
+
struct fuse_init_out {
- __u32 major;
- __u32 minor;
- __u32 max_readahead;
- __u32 flags;
- __u16 max_background;
- __u16 congestion_threshold;
- __u32 max_write;
+ uint32_t major;
+ uint32_t minor;
+ uint32_t max_readahead;
+ uint32_t flags;
+ uint16_t max_background;
+ uint16_t congestion_threshold;
+ uint32_t max_write;
+ uint32_t time_gran;
+ uint32_t unused[9];
};
#define CUSE_INIT_INFO_MAX 4096
struct cuse_init_in {
- __u32 major;
- __u32 minor;
- __u32 unused;
- __u32 flags;
+ uint32_t major;
+ uint32_t minor;
+ uint32_t unused;
+ uint32_t flags;
};
struct cuse_init_out {
- __u32 major;
- __u32 minor;
- __u32 unused;
- __u32 flags;
- __u32 max_read;
- __u32 max_write;
- __u32 dev_major; /* chardev major */
- __u32 dev_minor; /* chardev minor */
- __u32 spare[10];
+ uint32_t major;
+ uint32_t minor;
+ uint32_t unused;
+ uint32_t flags;
+ uint32_t max_read;
+ uint32_t max_write;
+ uint32_t dev_major; /* chardev major */
+ uint32_t dev_minor; /* chardev minor */
+ uint32_t spare[10];
};
struct fuse_interrupt_in {
- __u64 unique;
+ uint64_t unique;
};
struct fuse_bmap_in {
- __u64 block;
- __u32 blocksize;
- __u32 padding;
+ uint64_t block;
+ uint32_t blocksize;
+ uint32_t padding;
};
struct fuse_bmap_out {
- __u64 block;
+ uint64_t block;
};
struct fuse_ioctl_in {
- __u64 fh;
- __u32 flags;
- __u32 cmd;
- __u64 arg;
- __u32 in_size;
- __u32 out_size;
+ uint64_t fh;
+ uint32_t flags;
+ uint32_t cmd;
+ uint64_t arg;
+ uint32_t in_size;
+ uint32_t out_size;
+};
+
+struct fuse_ioctl_iovec {
+ uint64_t base;
+ uint64_t len;
};
struct fuse_ioctl_out {
- __s32 result;
- __u32 flags;
- __u32 in_iovs;
- __u32 out_iovs;
+ int32_t result;
+ uint32_t flags;
+ uint32_t in_iovs;
+ uint32_t out_iovs;
};
struct fuse_poll_in {
- __u64 fh;
- __u64 kh;
- __u32 flags;
- __u32 padding;
+ uint64_t fh;
+ uint64_t kh;
+ uint32_t flags;
+ uint32_t events;
};
struct fuse_poll_out {
- __u32 revents;
- __u32 padding;
+ uint32_t revents;
+ uint32_t padding;
};
struct fuse_notify_poll_wakeup_out {
- __u64 kh;
+ uint64_t kh;
+};
+
+struct fuse_fallocate_in {
+ uint64_t fh;
+ uint64_t offset;
+ uint64_t length;
+ uint32_t mode;
+ uint32_t padding;
};
struct fuse_in_header {
- __u32 len;
- __u32 opcode;
- __u64 unique;
- __u64 nodeid;
- __u32 uid;
- __u32 gid;
- __u32 pid;
- __u32 padding;
+ uint32_t len;
+ uint32_t opcode;
+ uint64_t unique;
+ uint64_t nodeid;
+ uint32_t uid;
+ uint32_t gid;
+ uint32_t pid;
+ uint32_t padding;
};
struct fuse_out_header {
- __u32 len;
- __s32 error;
- __u64 unique;
+ uint32_t len;
+ int32_t error;
+ uint64_t unique;
};
struct fuse_dirent {
- __u64 ino;
- __u64 off;
- __u32 namelen;
- __u32 type;
- char name[0];
+ uint64_t ino;
+ uint64_t off;
+ uint32_t namelen;
+ uint32_t type;
+ char name[];
};
#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
-#define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1))
+#define FUSE_DIRENT_ALIGN(x) \
+ (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
#define FUSE_DIRENT_SIZE(d) \
FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+struct fuse_direntplus {
+ struct fuse_entry_out entry_out;
+ struct fuse_dirent dirent;
+};
+
+#define FUSE_NAME_OFFSET_DIRENTPLUS \
+ offsetof(struct fuse_direntplus, dirent.name)
+#define FUSE_DIRENTPLUS_SIZE(d) \
+ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET_DIRENTPLUS + (d)->dirent.namelen)
+
struct fuse_notify_inval_inode_out {
- __u64 ino;
- __s64 off;
- __s64 len;
+ uint64_t ino;
+ int64_t off;
+ int64_t len;
};
struct fuse_notify_inval_entry_out {
- __u64 parent;
- __u32 namelen;
- __u32 padding;
+ uint64_t parent;
+ uint32_t namelen;
+ uint32_t padding;
+};
+
+struct fuse_notify_delete_out {
+ uint64_t parent;
+ uint64_t child;
+ uint32_t namelen;
+ uint32_t padding;
+};
+
+struct fuse_notify_store_out {
+ uint64_t nodeid;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t padding;
+};
+
+struct fuse_notify_retrieve_out {
+ uint64_t notify_unique;
+ uint64_t nodeid;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t padding;
+};
+
+/* Matches the size of fuse_write_in */
+struct fuse_notify_retrieve_in {
+ uint64_t dummy1;
+ uint64_t offset;
+ uint32_t size;
+ uint32_t dummy2;
+ uint64_t dummy3;
+ uint64_t dummy4;
+};
+
+struct fuse_lseek_in {
+ uint64_t fh;
+ uint64_t offset;
+ int32_t whence;
+ uint32_t padding;
+};
+
+struct fuse_lseek_out {
+ uint64_t offset;
};
#endif /* _LINUX_FUSE_H */
diff --git a/contrib/fuse-include/fuse_kernel_macfuse.h b/contrib/fuse-include/fuse_kernel_macfuse.h
index 3fbf24f70aa..31bc495a552 100644
--- a/contrib/fuse-include/fuse_kernel_macfuse.h
+++ b/contrib/fuse-include/fuse_kernel_macfuse.h
@@ -61,67 +61,59 @@
userspace works under 64bit kernels */
struct fuse_attr {
- __u64 ino;
- __u64 size;
- __u64 blocks;
- __u64 atime;
- __u64 mtime;
- __u64 ctime;
-#if (__FreeBSD__ >= 10)
- __u64 crtime;
-#endif /* __FreeBSD__ >= 10 */
- __u32 atimensec;
- __u32 mtimensec;
- __u32 ctimensec;
-#if (__FreeBSD__ >= 10)
- __u32 crtimensec;
-#endif /* __FreeBSD__ >= 10 */
- __u32 mode;
- __u32 nlink;
- __u32 uid;
- __u32 gid;
- __u32 rdev;
-#if (__FreeBSD__ >= 10)
- __u32 flags; /* file flags; see chflags(2) */
-#endif /* __FreeBSD__ >= 10 */
+ __u64 ino;
+ __u64 size;
+ __u64 blocks;
+ __u64 atime;
+ __u64 mtime;
+ __u64 ctime;
+ __u64 crtime;
+ __u32 atimensec;
+ __u32 mtimensec;
+ __u32 ctimensec;
+ __u32 crtimensec;
+ __u32 mode;
+ __u32 nlink;
+ __u32 uid;
+ __u32 gid;
+ __u32 rdev;
+ __u32 flags; /* file flags; see chflags(2) */
};
struct fuse_kstatfs {
- __u64 blocks;
- __u64 bfree;
- __u64 bavail;
- __u64 files;
- __u64 ffree;
- __u32 bsize;
- __u32 namelen;
- __u32 frsize;
- __u32 padding;
- __u32 spare[6];
+ __u64 blocks;
+ __u64 bfree;
+ __u64 bavail;
+ __u64 files;
+ __u64 ffree;
+ __u32 bsize;
+ __u32 namelen;
+ __u32 frsize;
+ __u32 padding;
+ __u32 spare[6];
};
struct fuse_file_lock {
- __u64 start;
- __u64 end;
- __u32 type;
- __u32 pid; /* tgid */
+ __u64 start;
+ __u64 end;
+ __u32 type;
+ __u32 pid; /* tgid */
};
/**
* Bitmasks for fuse_setattr_in.valid
*/
-#define FATTR_MODE (1 << 0)
-#define FATTR_UID (1 << 1)
-#define FATTR_GID (1 << 2)
-#define FATTR_SIZE (1 << 3)
-#define FATTR_ATIME (1 << 4)
-#define FATTR_MTIME (1 << 5)
-#define FATTR_FH (1 << 6)
-#if (__FreeBSD__ >= 10)
-#define FATTR_CRTIME (1 << 28)
-#define FATTR_CHGTIME (1 << 29)
-#define FATTR_BKUPTIME (1 << 30)
-#define FATTR_FLAGS (1 << 31)
-#endif /* __FreeBSD__ >= 10 */
+#define FATTR_MODE (1 << 0)
+#define FATTR_UID (1 << 1)
+#define FATTR_GID (1 << 2)
+#define FATTR_SIZE (1 << 3)
+#define FATTR_ATIME (1 << 4)
+#define FATTR_MTIME (1 << 5)
+#define FATTR_FH (1 << 6)
+#define FATTR_CRTIME (1 << 28)
+#define FATTR_CHGTIME (1 << 29)
+#define FATTR_BKUPTIME (1 << 30)
+#define FATTR_FLAGS (1 << 31)
/**
* Flags returned by the OPEN request
@@ -129,311 +121,304 @@ struct fuse_file_lock {
* FOPEN_DIRECT_IO: bypass page cache for this open file
* FOPEN_KEEP_CACHE: don't invalidate the data cache on open
*/
-#define FOPEN_DIRECT_IO (1 << 0)
-#define FOPEN_KEEP_CACHE (1 << 1)
-#if (__FreeBSD__ >= 10)
-#define FOPEN_PURGE_ATTR (1 << 30)
-#define FOPEN_PURGE_UBC (1 << 31)
-#endif
+#define FOPEN_DIRECT_IO (1 << 0)
+#define FOPEN_KEEP_CACHE (1 << 1)
+#define FOPEN_PURGE_ATTR (1 << 30)
+#define FOPEN_PURGE_UBC (1 << 31)
/**
* INIT request/reply flags
*/
-#define FUSE_ASYNC_READ (1 << 0)
-#define FUSE_POSIX_LOCKS (1 << 1)
-#if (__FreeBSD__ >= 10)
-#define FUSE_CASE_INSENSITIVE (1 << 29)
-#define FUSE_VOL_RENAME (1 << 30)
-#define FUSE_XTIMES (1 << 31)
-#endif /* __FreeBSD__ >= 10 */
+#define FUSE_ASYNC_READ (1 << 0)
+#define FUSE_POSIX_LOCKS (1 << 1)
+#define FUSE_CASE_INSENSITIVE (1 << 29)
+#define FUSE_VOL_RENAME (1 << 30)
+#define FUSE_XTIMES (1 << 31)
/**
* Release flags
*/
-#define FUSE_RELEASE_FLUSH (1 << 0)
+#define FUSE_RELEASE_FLUSH (1 << 0)
enum fuse_opcode {
- FUSE_LOOKUP = 1,
- FUSE_FORGET = 2, /* no reply */
- FUSE_GETATTR = 3,
- FUSE_SETATTR = 4,
- FUSE_READLINK = 5,
- FUSE_SYMLINK = 6,
- FUSE_MKNOD = 8,
- FUSE_MKDIR = 9,
- FUSE_UNLINK = 10,
- FUSE_RMDIR = 11,
- FUSE_RENAME = 12,
- FUSE_LINK = 13,
- FUSE_OPEN = 14,
- FUSE_READ = 15,
- FUSE_WRITE = 16,
- FUSE_STATFS = 17,
- FUSE_RELEASE = 18,
- FUSE_FSYNC = 20,
- FUSE_SETXATTR = 21,
- FUSE_GETXATTR = 22,
- FUSE_LISTXATTR = 23,
- FUSE_REMOVEXATTR = 24,
- FUSE_FLUSH = 25,
- FUSE_INIT = 26,
- FUSE_OPENDIR = 27,
- FUSE_READDIR = 28,
- FUSE_RELEASEDIR = 29,
- FUSE_FSYNCDIR = 30,
- FUSE_GETLK = 31,
- FUSE_SETLK = 32,
- FUSE_SETLKW = 33,
- FUSE_ACCESS = 34,
- FUSE_CREATE = 35,
- FUSE_INTERRUPT = 36,
- FUSE_BMAP = 37,
- FUSE_DESTROY = 38,
-#if (__FreeBSD__ >= 10)
+ FUSE_LOOKUP = 1,
+ FUSE_FORGET = 2, /* no reply */
+ FUSE_GETATTR = 3,
+ FUSE_SETATTR = 4,
+ FUSE_READLINK = 5,
+ FUSE_SYMLINK = 6,
+ FUSE_MKNOD = 8,
+ FUSE_MKDIR = 9,
+ FUSE_UNLINK = 10,
+ FUSE_RMDIR = 11,
+ FUSE_RENAME = 12,
+ FUSE_LINK = 13,
+ FUSE_OPEN = 14,
+ FUSE_READ = 15,
+ FUSE_WRITE = 16,
+ FUSE_STATFS = 17,
+ FUSE_RELEASE = 18,
+ FUSE_FSYNC = 20,
+ FUSE_SETXATTR = 21,
+ FUSE_GETXATTR = 22,
+ FUSE_LISTXATTR = 23,
+ FUSE_REMOVEXATTR = 24,
+ FUSE_FLUSH = 25,
+ FUSE_INIT = 26,
+ FUSE_OPENDIR = 27,
+ FUSE_READDIR = 28,
+ FUSE_RELEASEDIR = 29,
+ FUSE_FSYNCDIR = 30,
+ FUSE_GETLK = 31,
+ FUSE_SETLK = 32,
+ FUSE_SETLKW = 33,
+ FUSE_ACCESS = 34,
+ FUSE_CREATE = 35,
+ FUSE_INTERRUPT = 36,
+ FUSE_BMAP = 37,
+ FUSE_DESTROY = 38,
+ /*
+ FUSE_IOCTL = 39,
+ FUSE_POLL = 40,
+ FUSE_NOTIFY_REPLY = 41,
+ FUSE_BATCH_FORGET = 42,
+ FUSE_FALLOCATE = 43,
+ FUSE_READDIRPLUS = 44,
+ */
+
FUSE_SETVOLNAME = 61,
- FUSE_GETXTIMES = 62,
- FUSE_EXCHANGE = 63,
-#endif /* __FreeBSD__ >= 10 */
+ FUSE_GETXTIMES = 62,
+ FUSE_EXCHANGE = 63,
};
/* The read buffer is required to be at least 8k, but may be much larger */
#define FUSE_MIN_READ_BUFFER 8192
struct fuse_entry_out {
- __u64 nodeid; /* Inode ID */
- __u64 generation; /* Inode generation: nodeid:gen must
- be unique for the fs's lifetime */
- __u64 entry_valid; /* Cache timeout for the name */
- __u64 attr_valid; /* Cache timeout for the attributes */
- __u32 entry_valid_nsec;
- __u32 attr_valid_nsec;
- struct fuse_attr attr;
+ __u64 nodeid; /* Inode ID */
+ __u64 generation; /* Inode generation: nodeid:gen must
+ be unique for the fs's lifetime */
+ __u64 entry_valid; /* Cache timeout for the name */
+ __u64 attr_valid; /* Cache timeout for the attributes */
+ __u32 entry_valid_nsec;
+ __u32 attr_valid_nsec;
+ struct fuse_attr attr;
};
struct fuse_forget_in {
- __u64 nlookup;
+ __u64 nlookup;
};
struct fuse_attr_out {
- __u64 attr_valid; /* Cache timeout for the attributes */
- __u32 attr_valid_nsec;
- __u32 dummy;
- struct fuse_attr attr;
+ __u64 attr_valid; /* Cache timeout for the attributes */
+ __u32 attr_valid_nsec;
+ __u32 dummy;
+ struct fuse_attr attr;
};
-#if (__FreeBSD__ >= 10)
struct fuse_getxtimes_out {
- __u64 bkuptime;
- __u64 crtime;
- __u32 bkuptimensec;
- __u32 crtimensec;
+ __u64 bkuptime;
+ __u64 crtime;
+ __u32 bkuptimensec;
+ __u32 crtimensec;
};
-#endif /* __FreeBSD__ >= 10 */
struct fuse_mknod_in {
- __u32 mode;
- __u32 rdev;
+ __u32 mode;
+ __u32 rdev;
};
struct fuse_mkdir_in {
- __u32 mode;
- __u32 padding;
+ __u32 mode;
+ __u32 padding;
};
struct fuse_rename_in {
- __u64 newdir;
+ __u64 newdir;
};
-#if (__FreeBSD__ >= 10)
struct fuse_exchange_in {
- __u64 olddir;
- __u64 newdir;
- __u64 options;
+ __u64 olddir;
+ __u64 newdir;
+ __u64 options;
};
-#endif /* __FreeBSD__ >= 10 */
struct fuse_link_in {
- __u64 oldnodeid;
+ __u64 oldnodeid;
};
struct fuse_setattr_in {
- __u32 valid;
- __u32 padding;
- __u64 fh;
- __u64 size;
- __u64 unused1;
- __u64 atime;
- __u64 mtime;
- __u64 unused2;
- __u32 atimensec;
- __u32 mtimensec;
- __u32 unused3;
- __u32 mode;
- __u32 unused4;
- __u32 uid;
- __u32 gid;
- __u32 unused5;
-#if (__FreeBSD__ >= 10)
- __u64 bkuptime;
- __u64 chgtime;
- __u64 crtime;
- __u32 bkuptimensec;
- __u32 chgtimensec;
- __u32 crtimensec;
- __u32 flags; /* file flags; see chflags(2) */
-#endif /* __FreeBSD__ >= 10 */
+ __u32 valid;
+ __u32 padding;
+ __u64 fh;
+ __u64 size;
+ __u64 unused1;
+ __u64 atime;
+ __u64 mtime;
+ __u64 unused2;
+ __u32 atimensec;
+ __u32 mtimensec;
+ __u32 unused3;
+ __u32 mode;
+ __u32 unused4;
+ __u32 uid;
+ __u32 gid;
+ __u32 unused5;
+ __u64 bkuptime;
+ __u64 chgtime;
+ __u64 crtime;
+ __u32 bkuptimensec;
+ __u32 chgtimensec;
+ __u32 crtimensec;
+ __u32 flags; /* file flags; see chflags(2) */
};
struct fuse_open_in {
- __u32 flags;
- __u32 mode;
+ __u32 flags;
+ __u32 mode;
};
struct fuse_open_out {
- __u64 fh;
- __u32 open_flags;
- __u32 padding;
+ __u64 fh;
+ __u32 open_flags;
+ __u32 padding;
};
struct fuse_release_in {
- __u64 fh;
- __u32 flags;
- __u32 release_flags;
- __u64 lock_owner;
+ __u64 fh;
+ __u32 flags;
+ __u32 release_flags;
+ __u64 lock_owner;
};
struct fuse_flush_in {
- __u64 fh;
- __u32 unused;
- __u32 padding;
- __u64 lock_owner;
+ __u64 fh;
+ __u32 unused;
+ __u32 padding;
+ __u64 lock_owner;
};
struct fuse_read_in {
- __u64 fh;
- __u64 offset;
- __u32 size;
- __u32 padding;
+ __u64 fh;
+ __u64 offset;
+ __u32 size;
+ __u32 padding;
};
struct fuse_write_in {
- __u64 fh;
- __u64 offset;
- __u32 size;
- __u32 write_flags;
+ __u64 fh;
+ __u64 offset;
+ __u32 size;
+ __u32 write_flags;
};
struct fuse_write_out {
- __u32 size;
- __u32 padding;
+ __u32 size;
+ __u32 padding;
};
#define FUSE_COMPAT_STATFS_SIZE 48
struct fuse_statfs_out {
- struct fuse_kstatfs st;
+ struct fuse_kstatfs st;
};
struct fuse_fsync_in {
- __u64 fh;
- __u32 fsync_flags;
- __u32 padding;
+ __u64 fh;
+ __u32 fsync_flags;
+ __u32 padding;
};
struct fuse_setxattr_in {
- __u32 size;
- __u32 flags;
-#if (__FreeBSD__ >= 10)
- __u32 position;
- __u32 padding;
-#endif /* __FreeBSD__ >= 10 */
+ __u32 size;
+ __u32 flags;
+ __u32 position;
+ __u32 padding;
};
struct fuse_getxattr_in {
- __u32 size;
- __u32 padding;
-#if (__FreeBSD__ >= 10)
- __u32 position;
- __u32 padding2;
-#endif /* __FreeBSD__ >= 10 */
+ __u32 size;
+ __u32 padding;
+ __u32 position;
+ __u32 padding2;
};
struct fuse_getxattr_out {
- __u32 size;
- __u32 padding;
+ __u32 size;
+ __u32 padding;
};
struct fuse_lk_in {
- __u64 fh;
- __u64 owner;
- struct fuse_file_lock lk;
+ __u64 fh;
+ __u64 owner;
+ struct fuse_file_lock lk;
};
struct fuse_lk_out {
- struct fuse_file_lock lk;
+ struct fuse_file_lock lk;
};
struct fuse_access_in {
- __u32 mask;
- __u32 padding;
+ __u32 mask;
+ __u32 padding;
};
struct fuse_init_in {
- __u32 major;
- __u32 minor;
- __u32 max_readahead;
- __u32 flags;
+ __u32 major;
+ __u32 minor;
+ __u32 max_readahead;
+ __u32 flags;
};
struct fuse_init_out {
- __u32 major;
- __u32 minor;
- __u32 max_readahead;
- __u32 flags;
- __u32 unused;
- __u32 max_write;
+ __u32 major;
+ __u32 minor;
+ __u32 max_readahead;
+ __u32 flags;
+ __u32 unused;
+ __u32 max_write;
};
struct fuse_interrupt_in {
- __u64 unique;
+ __u64 unique;
};
struct fuse_bmap_in {
- __u64 block;
- __u32 blocksize;
- __u32 padding;
+ __u64 block;
+ __u32 blocksize;
+ __u32 padding;
};
struct fuse_bmap_out {
- __u64 block;
+ __u64 block;
};
struct fuse_in_header {
- __u32 len;
- __u32 opcode;
- __u64 unique;
- __u64 nodeid;
- __u32 uid;
- __u32 gid;
- __u32 pid;
- __u32 padding;
+ __u32 len;
+ __u32 opcode;
+ __u64 unique;
+ __u64 nodeid;
+ __u32 uid;
+ __u32 gid;
+ __u32 pid;
+ __u32 padding;
};
struct fuse_out_header {
- __u32 len;
- __s32 error;
- __u64 unique;
+ __u32 len;
+ __s32 error;
+ __u64 unique;
};
struct fuse_dirent {
- __u64 ino;
- __u64 off;
- __u32 namelen;
- __u32 type;
- char name[0];
+ __u64 ino;
+ __u64 off;
+ __u32 namelen;
+ __u32 type;
+ char name[0];
};
#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name)
#define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1))
#define FUSE_DIRENT_SIZE(d) \
- FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
+ FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen)
diff --git a/contrib/fuse-lib/misc.c b/contrib/fuse-lib/misc.c
index 0c41b1a1917..1a9b418e511 100644
--- a/contrib/fuse-lib/misc.c
+++ b/contrib/fuse-lib/misc.c
@@ -10,7 +10,7 @@
#include <string.h>
#include <limits.h>
#include <fcntl.h>
-#include "glusterfs.h"
+#include "glusterfs/glusterfs.h"
#include "fuse_kernel.h"
#include "fuse-misc.h"
@@ -41,7 +41,6 @@ void
convert_fuse_file_lock (struct fuse_file_lock *fl, struct gf_flock *flock,
uint64_t lk_owner)
{
- memset (flock, 0, sizeof (struct flock));
flock->l_type = fl->type;
flock->l_whence = SEEK_SET;
flock->l_start = fl->start;
diff --git a/contrib/fuse-lib/mount-common.c b/contrib/fuse-lib/mount-common.c
index fd6cce44e3d..cffd4c01ed5 100644
--- a/contrib/fuse-lib/mount-common.c
+++ b/contrib/fuse-lib/mount-common.c
@@ -23,7 +23,7 @@
* see the commit log and per-function comments.
*/
-#ifndef __NetBSD__
+#ifdef GF_LINUX_HOST_OS
/* FUSE: cherry-picked bd99f9cf */
static int
mtab_needs_update (const char *mnt)
@@ -32,7 +32,7 @@ mtab_needs_update (const char *mnt)
struct stat stbuf;
/* If mtab is within new mount, don't touch it */
- if (strncmp (mnt, _PATH_MOUNTED, strlen (mnt)) == 0 &&
+ if (strncmp (mnt, _PATH_MOUNTED, sizeof (_PATH_MOUNTED) - 1) == 0 &&
_PATH_MOUNTED[strlen (mnt)] == '/')
return 0;
@@ -69,9 +69,9 @@ mtab_needs_update (const char *mnt)
return 1;
}
-#else /* __NetBSD__ */
+#else /* GF_LINUX_HOST_OS */
#define mtab_needs_update(x) 1
-#endif /* __NetBSD__ */
+#endif /* GF_LINUX_HOST_OS */
/* FUSE: called add_mount_legacy(); R.I.P. as of cbd3a2a8 */
int
@@ -105,7 +105,11 @@ fuse_mnt_add_mount (const char *progname, const char *fsname,
char *tmp;
sigprocmask (SIG_SETMASK, &oldmask, NULL);
- setuid (geteuid ());
+ res = setuid (geteuid ());
+ if (res != 0) {
+ GFFUSE_LOGERR ("%s: setuid: %s", progname, strerror (errno));
+ exit (1);
+ }
/*
* hide in a directory, where mount isn't able to resolve
@@ -245,11 +249,24 @@ fuse_mnt_umount (const char *progname, const char *abs_mnt,
}
if (res == 0) {
sigprocmask (SIG_SETMASK, &oldmask, NULL);
- setuid (geteuid ());
- execl ("/bin/umount", "/bin/umount", "-i", rel_mnt,
- lazy ? "-l" : NULL, NULL);
- GFFUSE_LOGERR ("%s: failed to execute /bin/umount: %s",
+ res = setuid (geteuid ());
+ if (res != 0) {
+ GFFUSE_LOGERR ("%s: setuid: %s", progname, strerror (errno));
+ exit (1);
+ }
+#ifdef GF_LINUX_HOST_OS
+ execl ("umount", "umount", "-i", rel_mnt,
+ lazy ? "-l" : NULL, NULL);
+ GFFUSE_LOGERR ("%s: failed to execute umount: %s",
+ progname, strerror (errno));
+#elif __NetBSD__
+ /* exitting the filesystem causes the umount */
+ exit (0);
+#else
+ execl ("umount", "umount", "-f", rel_mnt, NULL);
+ GFFUSE_LOGERR ("%s: failed to execute umount: %s",
progname, strerror (errno));
+#endif /* GF_LINUX_HOST_OS */
exit (1);
}
res = waitpid (res, &status, 0);
diff --git a/contrib/fuse-lib/mount-gluster-compat.h b/contrib/fuse-lib/mount-gluster-compat.h
index 17c11e78920..d3646d08d8e 100644
--- a/contrib/fuse-lib/mount-gluster-compat.h
+++ b/contrib/fuse-lib/mount-gluster-compat.h
@@ -21,23 +21,73 @@
#include <errno.h>
#include <dirent.h>
#include <signal.h>
-#ifndef __NetBSD__
+#if defined(GF_LINUX_HOST_OS)
#include <mntent.h>
-#endif /* __NetBSD__ */
+#endif /* GF_LINUX_HOST_OS */
#include <sys/stat.h>
#include <sys/poll.h>
#include <sys/un.h>
#include <sys/wait.h>
#include <sys/mount.h>
-#ifdef __NetBSD__
+#ifdef GF_LINUX_HOST_OS
+typedef unsigned long mount_flag_t;
+#endif
+
+#if defined(__NetBSD__)
#include <perfuse.h>
#define umount2(dir, flags) unmount(dir, ((flags) != 0) ? MNT_FORCE : 0)
+#define MS_RDONLY MNT_RDONLY
+#define MS_NOSUID MNT_NOSUID
+#define MS_NODEV MNT_NODEV
+#define MS_NOATIME MNT_NOATIME
+#define MS_NOEXEC MNT_NOEXEC
+typedef int mount_flag_t;
+#endif
+
+#if defined(GF_DARWIN_HOST_OS) || defined(__FreeBSD__)
+#include <sys/param.h>
+#include <sys/mount.h>
+#define umount2(dir, flags) unmount(dir, ((flags) != 0) ? MNT_FORCE : 0)
+#endif
+
+#if defined(__FreeBSD__)
+#define MS_RDONLY MNT_RDONLY
+#define MS_NOSUID MNT_NOSUID
+/* "nodev"/MNT_NODEV was removed from FreBSD, as it became unneeded because "As
+ * of FreeBSD 6.0 device nodes may be created in regular file systems but such
+ * nodes cannot be used to access devices." (See
+ * https://freebsd.org/cgi/man.cgi?query=mknod&sektion=8 .
+ * Also see:
+ * - https://github.com/freebsd/freebsd/commit/266790a
+ * - https://github.com/freebsd/freebsd/commit/a5e716d
+ * - 700008 in
+ * https://www.freebsd.org/doc/en/books/porters-handbook/versions-7.html .)
+ */
+#if __FreeBSD_version < 700008
+#define MS_NODEV MNT_NODEV
+#else
+#define MS_NODEV 0
+#endif
+#define MS_NOATIME MNT_NOATIME
+#define MS_NOEXEC MNT_NOEXEC
+#if __FreeBSD_version < 1000715
+typedef int mount_flag_t;
+#else
+/* __FreeBSD_version was not bumped for this type change. Anyway, see
+ * https://github.com/freebsd/freebsd/commit/e8d76f8
+ * and respective __FreeBSD_version:
+ * https://github.com/freebsd/freebsd/blob/e8d76f8/sys/sys/param.h#L61 .
+ * We use the subsequent value, 1000715, to switch. (Also see:
+ * https://www.freebsd.org/doc/en/books/porters-handbook/versions-10.html .)
+ */
+typedef long long mount_flag_t;
+#endif
#endif
-#ifdef linux
+#ifdef GF_LINUX_HOST_OS
#define _PATH_MOUNT "/bin/mount"
-#else /* NetBSD, MacOS X */
+#else /* FreeBSD, NetBSD, MacOS X */
#define _PATH_MOUNT "/sbin/mount"
#endif
@@ -46,9 +96,9 @@
#define FREE(ptr) free (ptr)
#define GFFUSE_LOGERR(...) fprintf (stderr, ## __VA_ARGS__)
#else /* FUSE_UTIL */
-#include "glusterfs.h"
-#include "logging.h"
-#include "common-utils.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/common-utils.h"
#define GFFUSE_LOGERR(...) \
gf_log ("glusterfs-fuse", GF_LOG_ERROR, ## __VA_ARGS__)
diff --git a/contrib/fuse-lib/mount.c b/contrib/fuse-lib/mount.c
index f02a835b3a8..06ff191f542 100644
--- a/contrib/fuse-lib/mount.c
+++ b/contrib/fuse-lib/mount.c
@@ -17,6 +17,11 @@
#endif
#define FUSE_DEVFD_ENV "_FUSE_DEVFD"
+#ifdef __FreeBSD__
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+#endif /* __FreeBSD__ */
/* FUSE: function is called fuse_kern_unmount() */
void
@@ -47,12 +52,16 @@ gf_fuse_unmount (const char *mountpoint, int fd)
if (geteuid () == 0) {
fuse_mnt_umount ("fuse", mountpoint, mountpoint, 1);
return;
+ } else {
+ GFFUSE_LOGERR ("fuse: Effective-uid: %d", geteuid());
}
res = umount2 (mountpoint, 2);
if (res == 0)
return;
+ GFFUSE_LOGERR ("fuse: failed to unmount %s: %s",
+ mountpoint, strerror (errno));
pid = fork ();
if (pid == -1)
return;
@@ -62,6 +71,8 @@ gf_fuse_unmount (const char *mountpoint, int fd)
"--", mountpoint, NULL };
execvp (FUSERMOUNT_PROG, (char **)argv);
+ GFFUSE_LOGERR ("fuse: failed to execute fuserumount: %s",
+ strerror (errno));
_exit (1);
}
waitpid (pid, NULL, 0);
@@ -70,6 +81,54 @@ gf_fuse_unmount (const char *mountpoint, int fd)
/* gluster-specific routines */
+/* Unmounting in a daemon that lurks 'till main process exits */
+int
+gf_fuse_unmount_daemon (const char *mountpoint, int fd)
+{
+ int ret = -1;
+ pid_t pid = -1;
+
+ if (fd == -1)
+ return -1;
+
+ int ump[2] = {0,};
+
+ ret = pipe(ump);
+ if (ret == -1) {
+ close (fd);
+ return -1;
+ }
+
+ pid = fork ();
+ switch (pid) {
+ case 0:
+ {
+ char c = 0;
+ sigset_t sigset;
+
+ close_fds_except (ump, 1);
+
+ setsid();
+ (void)chdir("/");
+ sigfillset(&sigset);
+ sigprocmask(SIG_BLOCK, &sigset, NULL);
+
+ read (ump[0], &c, 1);
+
+ gf_fuse_unmount (mountpoint, fd);
+ exit (0);
+ }
+ case -1:
+ close (fd);
+ fd = -1;
+ ret = -1;
+ close (ump[1]);
+ }
+ close (ump[0]);
+
+ return ret;
+}
+
static char *
escape (char *s)
{
@@ -100,8 +159,8 @@ escape (char *s)
}
static int
-fuse_mount_fusermount (const char *mountpoint, char *fsname, char *mnt_param,
- int fd)
+fuse_mount_fusermount (const char *mountpoint, char *fsname,
+ char *mnt_param, int fd)
{
int pid = -1;
int res = 0;
@@ -168,25 +227,176 @@ fuse_mount_fusermount (const char *mountpoint, char *fsname, char *mnt_param,
return ret;
}
+#if defined(__FreeBSD__)
+void
+build_iovec(struct iovec **iov, int *iovlen, const char *name, void *val,
+ size_t len)
+{
+ int i;
+ if (*iovlen < 0)
+ return;
+ i = *iovlen;
+
+ *iov = realloc(*iov, sizeof **iov * (i + 2));
+ if (*iov == NULL) {
+ *iovlen = -1;
+ return;
+ }
+
+ (*iov)[i].iov_base = strdup(name);
+ (*iov)[i].iov_len = strlen(name) + 1;
+
+ i++;
+ (*iov)[i].iov_base = val;
+ if (len == (size_t) -1) {
+ if (val != NULL)
+ len = strlen(val) + 1;
+ else
+ len = 0;
+ }
+ (*iov)[i].iov_len = (int)len;
+ *iovlen = ++i;
+}
+
+/*
+ * This function is needed for compatibility with parameters
+ * which used to use the mount_argf() command for the old mount() syscall.
+ */
+void
+build_iovec_argf(struct iovec **iov, int *iovlen, const char *name,
+ const char *fmt, ...)
+{
+ va_list ap;
+ char val[255] = { 0 };
+
+ va_start(ap, fmt);
+ vsnprintf(val, sizeof(val), fmt, ap);
+ va_end(ap);
+ build_iovec(iov, iovlen, name, strdup(val), (size_t)-1);
+}
+#endif /* __FreeBSD__ */
+
+struct mount_flags {
+ const char *opt;
+ mount_flag_t flag;
+ int on;
+} mount_flags[] = {
+ /* We provide best effort cross platform support for mount flags by
+ * defining the ones which are commonly used in Unix-like OS-es.
+ */
+ {"ro", MS_RDONLY, 1},
+ {"nosuid", MS_NOSUID, 1},
+ {"nodev", MS_NODEV, 1},
+ {"noatime", MS_NOATIME, 1},
+ {"noexec", MS_NOEXEC, 1},
+#ifdef GF_LINUX_HOST_OS
+ {"rw", MS_RDONLY, 0},
+ {"suid", MS_NOSUID, 0},
+ {"dev", MS_NODEV, 0},
+ {"exec", MS_NOEXEC, 0},
+ {"async", MS_SYNCHRONOUS, 0},
+ {"sync", MS_SYNCHRONOUS, 1},
+ {"atime", MS_NOATIME, 0},
+ {"dirsync", MS_DIRSYNC, 1},
+#endif
+ {NULL, 0, 0}
+};
+
static int
-fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, int fd)
+mount_param_to_flag (char *mnt_param, mount_flag_t *mntflags,
+ char **mnt_param_new)
+{
+ gf_boolean_t found = _gf_false;
+ struct mount_flags *flag = NULL;
+ char *param_tok = NULL;
+ token_iter_t tit = {0,};
+ gf_boolean_t iter_end = _gf_false;
+
+ /* Allocate a buffer that will hold the mount parameters remaining
+ * after the ones corresponding to mount flags are processed and
+ * removed.The length of the original params are a good upper bound
+ * of the size needed.
+ */
+ *mnt_param_new = strdup (mnt_param);
+ if (!*mnt_param_new)
+ return -1;
+
+ for (param_tok = token_iter_init (*mnt_param_new, ',', &tit) ;;) {
+ iter_end = next_token (&param_tok, &tit);
+
+ found = _gf_false;
+ for (flag = mount_flags; flag->opt; flag++) {
+ /* Compare the mount flag name to the param
+ * name at hand.
+ */
+ if (strcmp (flag->opt, param_tok) == 0) {
+ /* If there is a match, adjust mntflags
+ * accordingly and break.
+ */
+ if (flag->on) {
+ *mntflags |= flag->flag;
+ } else {
+ *mntflags &= ~flag->flag;
+ }
+ found = _gf_true;
+ break;
+ }
+ }
+ /* Exclude flag names from new parameter list. */
+ if (found)
+ drop_token (param_tok, &tit);
+
+ if (iter_end)
+ break;
+ }
+
+ return 0;
+}
+
+static int
+fuse_mount_sys (const char *mountpoint, char *fsname,
+ char *mnt_param, int fd)
{
int ret = -1;
unsigned mounted = 0;
char *mnt_param_mnt = NULL;
char *fstype = "fuse.glusterfs";
char *source = fsname;
-
- ret = asprintf (&mnt_param_mnt,
- "%s,fd=%i,rootmode=%o,user_id=%i,group_id=%i",
- mnt_param, fd, S_IFDIR, getuid (), getgid ());
+ mount_flag_t mountflags = 0;
+ char *mnt_param_new = NULL;
+
+ ret = mount_param_to_flag (mnt_param, &mountflags, &mnt_param_new);
+ if (ret == 0)
+ ret = asprintf (&mnt_param_mnt,
+ "%s,fd=%i,rootmode=%o,user_id=%i,group_id=%i",
+ mnt_param_new, fd, S_IFDIR, getuid (),
+ getgid ());
if (ret == -1) {
GFFUSE_LOGERR ("Out of memory");
goto out;
}
- ret = mount (source, mountpoint, fstype, 0,
+
+#ifdef __FreeBSD__
+ struct iovec *iov = NULL;
+ int iovlen = 0;
+ char fdstr[15];
+ sprintf (fdstr, "%d", fd);
+
+ build_iovec (&iov, &iovlen, "fstype", "fusefs", -1);
+ build_iovec (&iov, &iovlen, "subtype", "glusterfs", -1);
+ build_iovec (&iov, &iovlen, "fspath", __DECONST(void *, mountpoint),
+ -1);
+ build_iovec (&iov, &iovlen, "from", "/dev/fuse", -1);
+ build_iovec (&iov, &iovlen, "volname", source, -1);
+ build_iovec (&iov, &iovlen, "fd", fdstr, -1);
+ build_iovec (&iov, &iovlen, "allow_other", NULL, -1);
+ ret = nmount (iov, iovlen, mountflags);
+#else
+ ret = mount (source, mountpoint, fstype, mountflags,
mnt_param_mnt);
+#endif /* __FreeBSD__ */
+#ifdef GF_LINUX_HOST_OS
if (ret == -1 && errno == ENODEV) {
/* fs subtype support was added by 79c0b2df aka
v2.6.21-3159-g79c0b2d. Probably we have an
@@ -198,17 +408,19 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, int fd)
goto out;
}
- ret = mount (source, mountpoint, fstype, 0,
+ ret = mount (source, mountpoint, fstype, mountflags,
mnt_param_mnt);
}
+#endif /* GF_LINUX_HOST_OS */
if (ret == -1)
goto out;
else
mounted = 1;
-#ifndef __NetBSD__
+#ifdef GF_LINUX_HOST_OS
if (geteuid () == 0) {
char *newmnt = fuse_mnt_resolve_path ("fuse", mountpoint);
+ char *mnt_param_mtab = NULL;
if (!newmnt) {
ret = -1;
@@ -216,8 +428,17 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, int fd)
goto out;
}
- ret = fuse_mnt_add_mount ("fuse", source, newmnt, fstype,
- mnt_param);
+ ret = asprintf (&mnt_param_mtab, "%s%s",
+ mountflags & MS_RDONLY ? "ro," : "",
+ mnt_param_new);
+ if (ret == -1)
+ GFFUSE_LOGERR ("Out of memory");
+ else {
+ ret = fuse_mnt_add_mount ("fuse", source, newmnt,
+ fstype, mnt_param_mtab);
+ FREE (mnt_param_mtab);
+ }
+
FREE (newmnt);
if (ret == -1) {
GFFUSE_LOGERR ("failed to add mtab entry");
@@ -225,14 +446,16 @@ fuse_mount_sys (const char *mountpoint, char *fsname, char *mnt_param, int fd)
goto out;
}
}
-#endif /* __NetBSD__ */
+#endif /* GF_LINUX_HOST_OS */
out:
if (ret == -1) {
+ GFFUSE_LOGERR("ret = -1\n");
if (mounted)
umount2 (mountpoint, 2); /* lazy umount */
}
FREE (mnt_param_mnt);
+ FREE (mnt_param_new);
if (source != fsname)
FREE (source);
@@ -240,8 +463,8 @@ out:
}
int
-gf_fuse_mount (const char *mountpoint, char *fsname, char *mnt_param,
- pid_t *mnt_pid, int status_fd)
+gf_fuse_mount (const char *mountpoint, char *fsname,
+ char *mnt_param, pid_t *mnt_pid, int status_fd)
{
int fd = -1;
pid_t pid = -1;
@@ -271,16 +494,21 @@ gf_fuse_mount (const char *mountpoint, char *fsname, char *mnt_param,
ret = fuse_mount_sys (mountpoint, fsname, mnt_param, fd);
if (ret == -1) {
gf_log ("glusterfs-fuse", GF_LOG_INFO,
- "direct mount failed (%s), "
- "retry to mount via fusermount",
- strerror (errno));
+ "direct mount failed (%s) errno %d",
+ strerror (errno), errno);
+
+ if (errno == EPERM) {
+ gf_log ("glusterfs-fuse", GF_LOG_INFO,
+ "retry to mount via fusermount");
- ret = fuse_mount_fusermount (mountpoint, fsname,
- mnt_param, fd);
+ ret = fuse_mount_fusermount (mountpoint, fsname,
+ mnt_param, fd);
+ }
}
if (ret == -1)
- GFFUSE_LOGERR ("mount failed");
+ GFFUSE_LOGERR ("mount of %s to %s (%s) failed",
+ fsname, mountpoint, mnt_param);
if (status_fd >= 0)
(void)write (status_fd, &ret, sizeof (ret));
diff --git a/contrib/fuse-util/Makefile.am b/contrib/fuse-util/Makefile.am
index 6e9b31c7756..abbc10eb6d9 100644
--- a/contrib/fuse-util/Makefile.am
+++ b/contrib/fuse-util/Makefile.am
@@ -3,7 +3,9 @@ bin_PROGRAMS = fusermount-glusterfs
fusermount_glusterfs_SOURCES = fusermount.c mount_util.c $(CONTRIBDIR)/fuse-lib/mount-common.c
noinst_HEADERS = $(CONTRIBDIR)/fuse-include/mount_util.h
-AM_CFLAGS = -Wall -D_FILE_OFFSET_BITS=64 -DFUSE_UTIL $(GF_CFLAGS) -D_GNU_SOURCE -I$(CONTRIBDIR)/fuse-include
+AM_CPPFLAGS = $(GF_CPPFLAGS) -DFUSE_UTIL -I$(CONTRIBDIR)/fuse-include -I$(CONTRIBDIR)/fuse-lib
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
install-exec-hook:
-chown root $(DESTDIR)$(bindir)/fusermount-glusterfs
diff --git a/contrib/fuse-util/fusermount.c b/contrib/fuse-util/fusermount.c
index 9a89525718d..ff743f75a21 100644
--- a/contrib/fuse-util/fusermount.c
+++ b/contrib/fuse-util/fusermount.c
@@ -10,6 +10,11 @@
#include <config.h>
#include "mount_util.h"
+
+#ifndef HAVE_UMOUNT2
+#include "mount-gluster-compat.h"
+#endif
+
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -20,11 +25,18 @@
#include <fcntl.h>
#include <pwd.h>
#include <limits.h>
+#if !defined(__NetBSD__) && !defined(GF_DARWIN_HOST_OS)
#include <mntent.h>
+#endif /* __NetBSD__ */
#include <sys/wait.h>
#include <sys/stat.h>
-#include <sys/mount.h>
+#ifdef HAVE_SET_FSID
#include <sys/fsuid.h>
+#endif
+#ifdef GF_DARWIN_HOST_OS
+#include <sys/param.h>
+#endif
+#include <sys/mount.h>
#include <sys/socket.h>
#include <sys/utsname.h>
#include <sched.h>
@@ -63,22 +75,32 @@ static const char *get_user_name(void)
}
}
+#ifdef HAVE_SET_FSID
static uid_t oldfsuid;
static gid_t oldfsgid;
+#endif
static void drop_privs(void)
{
if (getuid() != 0) {
+#ifdef HAVE_SET_FSID
oldfsuid = setfsuid(getuid());
oldfsgid = setfsgid(getgid());
+#else
+ fprintf(stderr, "%s: Implement alternative setfsuid/gid \n", progname);
+#endif
}
}
static void restore_privs(void)
{
if (getuid() != 0) {
+#ifdef HAVE_SET_FSID
setfsuid(oldfsuid);
setfsgid(oldfsgid);
+#else
+ fprintf(stderr, "%s: Implement alternative setfsuid/gid \n", progname);
+#endif
}
}
@@ -498,20 +520,22 @@ static void parse_line(char *line, int linenum)
static void read_conf(void)
{
+ int len;
FILE *fp = fopen(FUSE_CONF, "r");
if (fp != NULL) {
int linenum = 1;
char line[256];
int isnewline = 1;
while (fgets(line, sizeof(line), fp) != NULL) {
+ len = strlen (line);
if (isnewline) {
- if (line[strlen(line)-1] == '\n') {
+ if (len && line[len-1] == '\n') {
strip_line(line);
parse_line(line, linenum);
} else {
isnewline = 0;
}
- } else if(line[strlen(line)-1] == '\n') {
+ } else if (len && line[len-1] == '\n') {
fprintf(stderr, "%s: reading %s: line %i too long\n", progname, FUSE_CONF, linenum);
isnewline = 1;
@@ -606,7 +630,7 @@ static int add_option(char **optsp, const char *opt, unsigned expand)
static int get_mnt_opts(int flags, char *opts, char **mnt_optsp)
{
int i;
- int l;
+ size_t l;
if (!(flags & MS_RDONLY) && add_option(mnt_optsp, "rw", 0) == -1)
return -1;
@@ -621,7 +645,7 @@ static int get_mnt_opts(int flags, char *opts, char **mnt_optsp)
return -1;
/* remove comma from end of opts*/
l = strlen(*mnt_optsp);
- if ((*mnt_optsp)[l-1] == ',')
+ if (l && (*mnt_optsp)[l-1] == ',')
(*mnt_optsp)[l-1] = '\0';
if (getuid() != 0) {
const char *user = get_user_name();
@@ -650,8 +674,7 @@ static int get_string_opt(const char *s, unsigned len, const char *opt,
unsigned opt_len = strlen(opt);
char *d;
- if (*val)
- free(*val);
+ free(*val);
*val = (char *) malloc(len - opt_len + 1);
if (!*val) {
fprintf(stderr, "%s: failed to allocate memory\n", progname);
diff --git a/contrib/ipaddr-py/COPYING b/contrib/ipaddr-py/COPYING
deleted file mode 100644
index d6456956733..00000000000
--- a/contrib/ipaddr-py/COPYING
+++ /dev/null
@@ -1,202 +0,0 @@
-
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright [yyyy] [name of copyright owner]
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
diff --git a/contrib/ipaddr-py/MANIFEST.in b/contrib/ipaddr-py/MANIFEST.in
deleted file mode 100644
index f572804441b..00000000000
--- a/contrib/ipaddr-py/MANIFEST.in
+++ /dev/null
@@ -1,3 +0,0 @@
-include COPYING
-include ipaddr_test.py
-include RELEASENOTES
diff --git a/contrib/ipaddr-py/OWNERS b/contrib/ipaddr-py/OWNERS
deleted file mode 100644
index 501673e0395..00000000000
--- a/contrib/ipaddr-py/OWNERS
+++ /dev/null
@@ -1,4 +0,0 @@
-pmoody
-harro
-mshields
-smart
diff --git a/contrib/ipaddr-py/README b/contrib/ipaddr-py/README
deleted file mode 100644
index 1b54294bb10..00000000000
--- a/contrib/ipaddr-py/README
+++ /dev/null
@@ -1,8 +0,0 @@
-ipaddr.py is a library for working with IP addresses, both IPv4 and IPv6.
-It was developed by Google for internal use, and is now open source.
-
-Project home page: http://code.google.com/p/ipaddr-py/
-
-Please send contributions to ipaddr-py-dev@googlegroups.com. Code should
-include unit tests and follow the Google Python style guide:
-http://code.google.com/p/soc/wiki/PythonStyleGuide
diff --git a/contrib/ipaddr-py/ipaddr.py b/contrib/ipaddr-py/ipaddr.py
deleted file mode 100644
index a89298a315d..00000000000
--- a/contrib/ipaddr-py/ipaddr.py
+++ /dev/null
@@ -1,1907 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright 2007 Google Inc.
-# Licensed to PSF under a Contributor Agreement.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied. See the License for the specific language governing
-# permissions and limitations under the License.
-
-"""A fast, lightweight IPv4/IPv6 manipulation library in Python.
-
-This library is used to create/poke/manipulate IPv4 and IPv6 addresses
-and networks.
-
-"""
-
-__version__ = 'trunk'
-
-import struct
-
-IPV4LENGTH = 32
-IPV6LENGTH = 128
-
-
-class AddressValueError(ValueError):
- """A Value Error related to the address."""
-
-
-class NetmaskValueError(ValueError):
- """A Value Error related to the netmask."""
-
-
-def IPAddress(address, version=None):
- """Take an IP string/int and return an object of the correct type.
-
- Args:
- address: A string or integer, the IP address. Either IPv4 or
- IPv6 addresses may be supplied; integers less than 2**32 will
- be considered to be IPv4 by default.
- version: An Integer, 4 or 6. If set, don't try to automatically
- determine what the IP address type is. important for things
- like IPAddress(1), which could be IPv4, '0.0.0.1', or IPv6,
- '::1'.
-
- Returns:
- An IPv4Address or IPv6Address object.
-
- Raises:
- ValueError: if the string passed isn't either a v4 or a v6
- address.
-
- """
- if version:
- if version == 4:
- return IPv4Address(address)
- elif version == 6:
- return IPv6Address(address)
-
- try:
- return IPv4Address(address)
- except (AddressValueError, NetmaskValueError):
- pass
-
- try:
- return IPv6Address(address)
- except (AddressValueError, NetmaskValueError):
- pass
-
- raise ValueError('%r does not appear to be an IPv4 or IPv6 address' %
- address)
-
-
-def IPNetwork(address, version=None, strict=False):
- """Take an IP string/int and return an object of the correct type.
-
- Args:
- address: A string or integer, the IP address. Either IPv4 or
- IPv6 addresses may be supplied; integers less than 2**32 will
- be considered to be IPv4 by default.
- version: An Integer, if set, don't try to automatically
- determine what the IP address type is. important for things
- like IPNetwork(1), which could be IPv4, '0.0.0.1/32', or IPv6,
- '::1/128'.
-
- Returns:
- An IPv4Network or IPv6Network object.
-
- Raises:
- ValueError: if the string passed isn't either a v4 or a v6
- address. Or if a strict network was requested and a strict
- network wasn't given.
-
- """
- if version:
- if version == 4:
- return IPv4Network(address, strict)
- elif version == 6:
- return IPv6Network(address, strict)
-
- try:
- return IPv4Network(address, strict)
- except (AddressValueError, NetmaskValueError):
- pass
-
- try:
- return IPv6Network(address, strict)
- except (AddressValueError, NetmaskValueError):
- pass
-
- raise ValueError('%r does not appear to be an IPv4 or IPv6 network' %
- address)
-
-
-def v4_int_to_packed(address):
- """The binary representation of this address.
-
- Args:
- address: An integer representation of an IPv4 IP address.
-
- Returns:
- The binary representation of this address.
-
- Raises:
- ValueError: If the integer is too large to be an IPv4 IP
- address.
- """
- if address > _BaseV4._ALL_ONES:
- raise ValueError('Address too large for IPv4')
- return struct.pack('!I', address)
-
-
-def v6_int_to_packed(address):
- """The binary representation of this address.
-
- Args:
- address: An integer representation of an IPv4 IP address.
-
- Returns:
- The binary representation of this address.
- """
- return struct.pack('!QQ', address >> 64, address & (2**64 - 1))
-
-
-def _find_address_range(addresses):
- """Find a sequence of addresses.
-
- Args:
- addresses: a list of IPv4 or IPv6 addresses.
-
- Returns:
- A tuple containing the first and last IP addresses in the sequence.
-
- """
- first = last = addresses[0]
- for ip in addresses[1:]:
- if ip._ip == last._ip + 1:
- last = ip
- else:
- break
- return (first, last)
-
-def _get_prefix_length(number1, number2, bits):
- """Get the number of leading bits that are same for two numbers.
-
- Args:
- number1: an integer.
- number2: another integer.
- bits: the maximum number of bits to compare.
-
- Returns:
- The number of leading bits that are the same for two numbers.
-
- """
- for i in range(bits):
- if number1 >> i == number2 >> i:
- return bits - i
- return 0
-
-def _count_righthand_zero_bits(number, bits):
- """Count the number of zero bits on the right hand side.
-
- Args:
- number: an integer.
- bits: maximum number of bits to count.
-
- Returns:
- The number of zero bits on the right hand side of the number.
-
- """
- if number == 0:
- return bits
- for i in range(bits):
- if (number >> i) % 2:
- return i
-
-def summarize_address_range(first, last):
- """Summarize a network range given the first and last IP addresses.
-
- Example:
- >>> summarize_address_range(IPv4Address('1.1.1.0'),
- IPv4Address('1.1.1.130'))
- [IPv4Network('1.1.1.0/25'), IPv4Network('1.1.1.128/31'),
- IPv4Network('1.1.1.130/32')]
-
- Args:
- first: the first IPv4Address or IPv6Address in the range.
- last: the last IPv4Address or IPv6Address in the range.
-
- Returns:
- The address range collapsed to a list of IPv4Network's or
- IPv6Network's.
-
- Raise:
- TypeError:
- If the first and last objects are not IP addresses.
- If the first and last objects are not the same version.
- ValueError:
- If the last object is not greater than the first.
- If the version is not 4 or 6.
-
- """
- if not (isinstance(first, _BaseIP) and isinstance(last, _BaseIP)):
- raise TypeError('first and last must be IP addresses, not networks')
- if first.version != last.version:
- raise TypeError("%s and %s are not of the same version" % (
- str(first), str(last)))
- if first > last:
- raise ValueError('last IP address must be greater than first')
-
- networks = []
-
- if first.version == 4:
- ip = IPv4Network
- elif first.version == 6:
- ip = IPv6Network
- else:
- raise ValueError('unknown IP version')
-
- ip_bits = first._max_prefixlen
- first_int = first._ip
- last_int = last._ip
- while first_int <= last_int:
- nbits = _count_righthand_zero_bits(first_int, ip_bits)
- current = None
- while nbits >= 0:
- addend = 2**nbits - 1
- current = first_int + addend
- nbits -= 1
- if current <= last_int:
- break
- prefix = _get_prefix_length(first_int, current, ip_bits)
- net = ip('%s/%d' % (str(first), prefix))
- networks.append(net)
- if current == ip._ALL_ONES:
- break
- first_int = current + 1
- first = IPAddress(first_int, version=first._version)
- return networks
-
-def _collapse_address_list_recursive(addresses):
- """Loops through the addresses, collapsing concurrent netblocks.
-
- Example:
-
- ip1 = IPv4Network('1.1.0.0/24')
- ip2 = IPv4Network('1.1.1.0/24')
- ip3 = IPv4Network('1.1.2.0/24')
- ip4 = IPv4Network('1.1.3.0/24')
- ip5 = IPv4Network('1.1.4.0/24')
- ip6 = IPv4Network('1.1.0.1/22')
-
- _collapse_address_list_recursive([ip1, ip2, ip3, ip4, ip5, ip6]) ->
- [IPv4Network('1.1.0.0/22'), IPv4Network('1.1.4.0/24')]
-
- This shouldn't be called directly; it is called via
- collapse_address_list([]).
-
- Args:
- addresses: A list of IPv4Network's or IPv6Network's
-
- Returns:
- A list of IPv4Network's or IPv6Network's depending on what we were
- passed.
-
- """
- ret_array = []
- optimized = False
-
- for cur_addr in addresses:
- if not ret_array:
- ret_array.append(cur_addr)
- continue
- if cur_addr in ret_array[-1]:
- optimized = True
- elif cur_addr == ret_array[-1].supernet().subnet()[1]:
- ret_array.append(ret_array.pop().supernet())
- optimized = True
- else:
- ret_array.append(cur_addr)
-
- if optimized:
- return _collapse_address_list_recursive(ret_array)
-
- return ret_array
-
-
-def collapse_address_list(addresses):
- """Collapse a list of IP objects.
-
- Example:
- collapse_address_list([IPv4('1.1.0.0/24'), IPv4('1.1.1.0/24')]) ->
- [IPv4('1.1.0.0/23')]
-
- Args:
- addresses: A list of IPv4Network or IPv6Network objects.
-
- Returns:
- A list of IPv4Network or IPv6Network objects depending on what we
- were passed.
-
- Raises:
- TypeError: If passed a list of mixed version objects.
-
- """
- i = 0
- addrs = []
- ips = []
- nets = []
-
- # split IP addresses and networks
- for ip in addresses:
- if isinstance(ip, _BaseIP):
- if ips and ips[-1]._version != ip._version:
- raise TypeError("%s and %s are not of the same version" % (
- str(ip), str(ips[-1])))
- ips.append(ip)
- elif ip._prefixlen == ip._max_prefixlen:
- if ips and ips[-1]._version != ip._version:
- raise TypeError("%s and %s are not of the same version" % (
- str(ip), str(ips[-1])))
- ips.append(ip.ip)
- else:
- if nets and nets[-1]._version != ip._version:
- raise TypeError("%s and %s are not of the same version" % (
- str(ip), str(ips[-1])))
- nets.append(ip)
-
- # sort and dedup
- ips = sorted(set(ips))
- nets = sorted(set(nets))
-
- while i < len(ips):
- (first, last) = _find_address_range(ips[i:])
- i = ips.index(last) + 1
- addrs.extend(summarize_address_range(first, last))
-
- return _collapse_address_list_recursive(sorted(
- addrs + nets, key=_BaseNet._get_networks_key))
-
-# backwards compatibility
-CollapseAddrList = collapse_address_list
-
-# Test whether this Python implementation supports byte objects that
-# are not identical to str ones.
-# We need to exclude platforms where bytes == str so that we can
-# distinguish between packed representations and strings, for example
-# b'12::' (the IPv4 address 49.50.58.58) and '12::' (an IPv6 address).
-try:
- _compat_has_real_bytes = bytes is not str
-except NameError: # <Python2.6
- _compat_has_real_bytes = False
-
-def get_mixed_type_key(obj):
- """Return a key suitable for sorting between networks and addresses.
-
- Address and Network objects are not sortable by default; they're
- fundamentally different so the expression
-
- IPv4Address('1.1.1.1') <= IPv4Network('1.1.1.1/24')
-
- doesn't make any sense. There are some times however, where you may wish
- to have ipaddr sort these for you anyway. If you need to do this, you
- can use this function as the key= argument to sorted().
-
- Args:
- obj: either a Network or Address object.
- Returns:
- appropriate key.
-
- """
- if isinstance(obj, _BaseNet):
- return obj._get_networks_key()
- elif isinstance(obj, _BaseIP):
- return obj._get_address_key()
- return NotImplemented
-
-class _IPAddrBase(object):
-
- """The mother class."""
-
- def __index__(self):
- return self._ip
-
- def __int__(self):
- return self._ip
-
- def __hex__(self):
- return hex(self._ip)
-
- @property
- def exploded(self):
- """Return the longhand version of the IP address as a string."""
- return self._explode_shorthand_ip_string()
-
- @property
- def compressed(self):
- """Return the shorthand version of the IP address as a string."""
- return str(self)
-
-
-class _BaseIP(_IPAddrBase):
-
- """A generic IP object.
-
- This IP class contains the version independent methods which are
- used by single IP addresses.
-
- """
-
- def __init__(self, address):
- if (not (_compat_has_real_bytes and isinstance(address, bytes))
- and '/' in str(address)):
- raise AddressValueError(address)
-
- def __eq__(self, other):
- try:
- return (self._ip == other._ip
- and self._version == other._version)
- except AttributeError:
- return NotImplemented
-
- def __ne__(self, other):
- eq = self.__eq__(other)
- if eq is NotImplemented:
- return NotImplemented
- return not eq
-
- def __le__(self, other):
- gt = self.__gt__(other)
- if gt is NotImplemented:
- return NotImplemented
- return not gt
-
- def __ge__(self, other):
- lt = self.__lt__(other)
- if lt is NotImplemented:
- return NotImplemented
- return not lt
-
- def __lt__(self, other):
- if self._version != other._version:
- raise TypeError('%s and %s are not of the same version' % (
- str(self), str(other)))
- if not isinstance(other, _BaseIP):
- raise TypeError('%s and %s are not of the same type' % (
- str(self), str(other)))
- if self._ip != other._ip:
- return self._ip < other._ip
- return False
-
- def __gt__(self, other):
- if self._version != other._version:
- raise TypeError('%s and %s are not of the same version' % (
- str(self), str(other)))
- if not isinstance(other, _BaseIP):
- raise TypeError('%s and %s are not of the same type' % (
- str(self), str(other)))
- if self._ip != other._ip:
- return self._ip > other._ip
- return False
-
- # Shorthand for Integer addition and subtraction. This is not
- # meant to ever support addition/subtraction of addresses.
- def __add__(self, other):
- if not isinstance(other, int):
- return NotImplemented
- return IPAddress(int(self) + other, version=self._version)
-
- def __sub__(self, other):
- if not isinstance(other, int):
- return NotImplemented
- return IPAddress(int(self) - other, version=self._version)
-
- def __repr__(self):
- return '%s(%r)' % (self.__class__.__name__, str(self))
-
- def __str__(self):
- return '%s' % self._string_from_ip_int(self._ip)
-
- def __hash__(self):
- return hash(hex(long(self._ip)))
-
- def _get_address_key(self):
- return (self._version, self)
-
- @property
- def version(self):
- raise NotImplementedError('BaseIP has no version')
-
-
-class _BaseNet(_IPAddrBase):
-
- """A generic IP object.
-
- This IP class contains the version independent methods which are
- used by networks.
-
- """
-
- def __init__(self, address):
- self._cache = {}
-
- def __repr__(self):
- return '%s(%r)' % (self.__class__.__name__, str(self))
-
- def iterhosts(self):
- """Generate Iterator over usable hosts in a network.
-
- This is like __iter__ except it doesn't return the network
- or broadcast addresses.
-
- """
- cur = int(self.network) + 1
- bcast = int(self.broadcast) - 1
- while cur <= bcast:
- cur += 1
- yield IPAddress(cur - 1, version=self._version)
-
- def __iter__(self):
- cur = int(self.network)
- bcast = int(self.broadcast)
- while cur <= bcast:
- cur += 1
- yield IPAddress(cur - 1, version=self._version)
-
- def __getitem__(self, n):
- network = int(self.network)
- broadcast = int(self.broadcast)
- if n >= 0:
- if network + n > broadcast:
- raise IndexError
- return IPAddress(network + n, version=self._version)
- else:
- n += 1
- if broadcast + n < network:
- raise IndexError
- return IPAddress(broadcast + n, version=self._version)
-
- def __lt__(self, other):
- if self._version != other._version:
- raise TypeError('%s and %s are not of the same version' % (
- str(self), str(other)))
- if not isinstance(other, _BaseNet):
- raise TypeError('%s and %s are not of the same type' % (
- str(self), str(other)))
- if self.network != other.network:
- return self.network < other.network
- if self.netmask != other.netmask:
- return self.netmask < other.netmask
- return False
-
- def __gt__(self, other):
- if self._version != other._version:
- raise TypeError('%s and %s are not of the same version' % (
- str(self), str(other)))
- if not isinstance(other, _BaseNet):
- raise TypeError('%s and %s are not of the same type' % (
- str(self), str(other)))
- if self.network != other.network:
- return self.network > other.network
- if self.netmask != other.netmask:
- return self.netmask > other.netmask
- return False
-
- def __le__(self, other):
- gt = self.__gt__(other)
- if gt is NotImplemented:
- return NotImplemented
- return not gt
-
- def __ge__(self, other):
- lt = self.__lt__(other)
- if lt is NotImplemented:
- return NotImplemented
- return not lt
-
- def __eq__(self, other):
- try:
- return (self._version == other._version
- and self.network == other.network
- and int(self.netmask) == int(other.netmask))
- except AttributeError:
- if isinstance(other, _BaseIP):
- return (self._version == other._version
- and self._ip == other._ip)
-
- def __ne__(self, other):
- eq = self.__eq__(other)
- if eq is NotImplemented:
- return NotImplemented
- return not eq
-
- def __str__(self):
- return '%s/%s' % (str(self.ip),
- str(self._prefixlen))
-
- def __hash__(self):
- return hash(int(self.network) ^ int(self.netmask))
-
- def __contains__(self, other):
- # always false if one is v4 and the other is v6.
- if self._version != other._version:
- return False
- # dealing with another network.
- if isinstance(other, _BaseNet):
- return (self.network <= other.network and
- self.broadcast >= other.broadcast)
- # dealing with another address
- else:
- return (int(self.network) <= int(other._ip) <=
- int(self.broadcast))
-
- def overlaps(self, other):
- """Tell if self is partly contained in other."""
- return self.network in other or self.broadcast in other or (
- other.network in self or other.broadcast in self)
-
- @property
- def network(self):
- x = self._cache.get('network')
- if x is None:
- x = IPAddress(self._ip & int(self.netmask), version=self._version)
- self._cache['network'] = x
- return x
-
- @property
- def broadcast(self):
- x = self._cache.get('broadcast')
- if x is None:
- x = IPAddress(self._ip | int(self.hostmask), version=self._version)
- self._cache['broadcast'] = x
- return x
-
- @property
- def hostmask(self):
- x = self._cache.get('hostmask')
- if x is None:
- x = IPAddress(int(self.netmask) ^ self._ALL_ONES,
- version=self._version)
- self._cache['hostmask'] = x
- return x
-
- @property
- def with_prefixlen(self):
- return '%s/%d' % (str(self.ip), self._prefixlen)
-
- @property
- def with_netmask(self):
- return '%s/%s' % (str(self.ip), str(self.netmask))
-
- @property
- def with_hostmask(self):
- return '%s/%s' % (str(self.ip), str(self.hostmask))
-
- @property
- def numhosts(self):
- """Number of hosts in the current subnet."""
- return int(self.broadcast) - int(self.network) + 1
-
- @property
- def version(self):
- raise NotImplementedError('BaseNet has no version')
-
- @property
- def prefixlen(self):
- return self._prefixlen
-
- def address_exclude(self, other):
- """Remove an address from a larger block.
-
- For example:
-
- addr1 = IPNetwork('10.1.1.0/24')
- addr2 = IPNetwork('10.1.1.0/26')
- addr1.address_exclude(addr2) =
- [IPNetwork('10.1.1.64/26'), IPNetwork('10.1.1.128/25')]
-
- or IPv6:
-
- addr1 = IPNetwork('::1/32')
- addr2 = IPNetwork('::1/128')
- addr1.address_exclude(addr2) = [IPNetwork('::0/128'),
- IPNetwork('::2/127'),
- IPNetwork('::4/126'),
- IPNetwork('::8/125'),
- ...
- IPNetwork('0:0:8000::/33')]
-
- Args:
- other: An IPvXNetwork object of the same type.
-
- Returns:
- A sorted list of IPvXNetwork objects addresses which is self
- minus other.
-
- Raises:
- TypeError: If self and other are of difffering address
- versions, or if other is not a network object.
- ValueError: If other is not completely contained by self.
-
- """
- if not self._version == other._version:
- raise TypeError("%s and %s are not of the same version" % (
- str(self), str(other)))
-
- if not isinstance(other, _BaseNet):
- raise TypeError("%s is not a network object" % str(other))
-
- if other not in self:
- raise ValueError('%s not contained in %s' % (str(other),
- str(self)))
- if other == self:
- return []
-
- ret_addrs = []
-
- # Make sure we're comparing the network of other.
- other = IPNetwork('%s/%s' % (str(other.network), str(other.prefixlen)),
- version=other._version)
-
- s1, s2 = self.subnet()
- while s1 != other and s2 != other:
- if other in s1:
- ret_addrs.append(s2)
- s1, s2 = s1.subnet()
- elif other in s2:
- ret_addrs.append(s1)
- s1, s2 = s2.subnet()
- else:
- # If we got here, there's a bug somewhere.
- assert True == False, ('Error performing exclusion: '
- 's1: %s s2: %s other: %s' %
- (str(s1), str(s2), str(other)))
- if s1 == other:
- ret_addrs.append(s2)
- elif s2 == other:
- ret_addrs.append(s1)
- else:
- # If we got here, there's a bug somewhere.
- assert True == False, ('Error performing exclusion: '
- 's1: %s s2: %s other: %s' %
- (str(s1), str(s2), str(other)))
-
- return sorted(ret_addrs, key=_BaseNet._get_networks_key)
-
- def compare_networks(self, other):
- """Compare two IP objects.
-
- This is only concerned about the comparison of the integer
- representation of the network addresses. This means that the
- host bits aren't considered at all in this method. If you want
- to compare host bits, you can easily enough do a
- 'HostA._ip < HostB._ip'
-
- Args:
- other: An IP object.
-
- Returns:
- If the IP versions of self and other are the same, returns:
-
- -1 if self < other:
- eg: IPv4('1.1.1.0/24') < IPv4('1.1.2.0/24')
- IPv6('1080::200C:417A') < IPv6('1080::200B:417B')
- 0 if self == other
- eg: IPv4('1.1.1.1/24') == IPv4('1.1.1.2/24')
- IPv6('1080::200C:417A/96') == IPv6('1080::200C:417B/96')
- 1 if self > other
- eg: IPv4('1.1.1.0/24') > IPv4('1.1.0.0/24')
- IPv6('1080::1:200C:417A/112') >
- IPv6('1080::0:200C:417A/112')
-
- If the IP versions of self and other are different, returns:
-
- -1 if self._version < other._version
- eg: IPv4('10.0.0.1/24') < IPv6('::1/128')
- 1 if self._version > other._version
- eg: IPv6('::1/128') > IPv4('255.255.255.0/24')
-
- """
- if self._version < other._version:
- return -1
- if self._version > other._version:
- return 1
- # self._version == other._version below here:
- if self.network < other.network:
- return -1
- if self.network > other.network:
- return 1
- # self.network == other.network below here:
- if self.netmask < other.netmask:
- return -1
- if self.netmask > other.netmask:
- return 1
- # self.network == other.network and self.netmask == other.netmask
- return 0
-
- def _get_networks_key(self):
- """Network-only key function.
-
- Returns an object that identifies this address' network and
- netmask. This function is a suitable "key" argument for sorted()
- and list.sort().
-
- """
- return (self._version, self.network, self.netmask)
-
- def _ip_int_from_prefix(self, prefixlen=None):
- """Turn the prefix length netmask into a int for comparison.
-
- Args:
- prefixlen: An integer, the prefix length.
-
- Returns:
- An integer.
-
- """
- if not prefixlen and prefixlen != 0:
- prefixlen = self._prefixlen
- return self._ALL_ONES ^ (self._ALL_ONES >> prefixlen)
-
- def _prefix_from_ip_int(self, ip_int, mask=32):
- """Return prefix length from the decimal netmask.
-
- Args:
- ip_int: An integer, the IP address.
- mask: The netmask. Defaults to 32.
-
- Returns:
- An integer, the prefix length.
-
- """
- while mask:
- if ip_int & 1 == 1:
- break
- ip_int >>= 1
- mask -= 1
-
- return mask
-
- def _ip_string_from_prefix(self, prefixlen=None):
- """Turn a prefix length into a dotted decimal string.
-
- Args:
- prefixlen: An integer, the netmask prefix length.
-
- Returns:
- A string, the dotted decimal netmask string.
-
- """
- if not prefixlen:
- prefixlen = self._prefixlen
- return self._string_from_ip_int(self._ip_int_from_prefix(prefixlen))
-
- def iter_subnets(self, prefixlen_diff=1, new_prefix=None):
- """The subnets which join to make the current subnet.
-
- In the case that self contains only one IP
- (self._prefixlen == 32 for IPv4 or self._prefixlen == 128
- for IPv6), return a list with just ourself.
-
- Args:
- prefixlen_diff: An integer, the amount the prefix length
- should be increased by. This should not be set if
- new_prefix is also set.
- new_prefix: The desired new prefix length. This must be a
- larger number (smaller prefix) than the existing prefix.
- This should not be set if prefixlen_diff is also set.
-
- Returns:
- An iterator of IPv(4|6) objects.
-
- Raises:
- ValueError: The prefixlen_diff is too small or too large.
- OR
- prefixlen_diff and new_prefix are both set or new_prefix
- is a smaller number than the current prefix (smaller
- number means a larger network)
-
- """
- if self._prefixlen == self._max_prefixlen:
- yield self
- return
-
- if new_prefix is not None:
- if new_prefix < self._prefixlen:
- raise ValueError('new prefix must be longer')
- if prefixlen_diff != 1:
- raise ValueError('cannot set prefixlen_diff and new_prefix')
- prefixlen_diff = new_prefix - self._prefixlen
-
- if prefixlen_diff < 0:
- raise ValueError('prefix length diff must be > 0')
- new_prefixlen = self._prefixlen + prefixlen_diff
-
- if not self._is_valid_netmask(str(new_prefixlen)):
- raise ValueError(
- 'prefix length diff %d is invalid for netblock %s' % (
- new_prefixlen, str(self)))
-
- first = IPNetwork('%s/%s' % (str(self.network),
- str(self._prefixlen + prefixlen_diff)),
- version=self._version)
-
- yield first
- current = first
- while True:
- broadcast = current.broadcast
- if broadcast == self.broadcast:
- return
- new_addr = IPAddress(int(broadcast) + 1, version=self._version)
- current = IPNetwork('%s/%s' % (str(new_addr), str(new_prefixlen)),
- version=self._version)
-
- yield current
-
- def masked(self):
- """Return the network object with the host bits masked out."""
- return IPNetwork('%s/%d' % (self.network, self._prefixlen),
- version=self._version)
-
- def subnet(self, prefixlen_diff=1, new_prefix=None):
- """Return a list of subnets, rather than an iterator."""
- return list(self.iter_subnets(prefixlen_diff, new_prefix))
-
- def supernet(self, prefixlen_diff=1, new_prefix=None):
- """The supernet containing the current network.
-
- Args:
- prefixlen_diff: An integer, the amount the prefix length of
- the network should be decreased by. For example, given a
- /24 network and a prefixlen_diff of 3, a supernet with a
- /21 netmask is returned.
-
- Returns:
- An IPv4 network object.
-
- Raises:
- ValueError: If self.prefixlen - prefixlen_diff < 0. I.e., you have a
- negative prefix length.
- OR
- If prefixlen_diff and new_prefix are both set or new_prefix is a
- larger number than the current prefix (larger number means a
- smaller network)
-
- """
- if self._prefixlen == 0:
- return self
-
- if new_prefix is not None:
- if new_prefix > self._prefixlen:
- raise ValueError('new prefix must be shorter')
- if prefixlen_diff != 1:
- raise ValueError('cannot set prefixlen_diff and new_prefix')
- prefixlen_diff = self._prefixlen - new_prefix
-
-
- if self.prefixlen - prefixlen_diff < 0:
- raise ValueError(
- 'current prefixlen is %d, cannot have a prefixlen_diff of %d' %
- (self.prefixlen, prefixlen_diff))
- return IPNetwork('%s/%s' % (str(self.network),
- str(self.prefixlen - prefixlen_diff)),
- version=self._version)
-
- # backwards compatibility
- Subnet = subnet
- Supernet = supernet
- AddressExclude = address_exclude
- CompareNetworks = compare_networks
- Contains = __contains__
-
-
-class _BaseV4(object):
-
- """Base IPv4 object.
-
- The following methods are used by IPv4 objects in both single IP
- addresses and networks.
-
- """
-
- # Equivalent to 255.255.255.255 or 32 bits of 1's.
- _ALL_ONES = (2**IPV4LENGTH) - 1
- _DECIMAL_DIGITS = frozenset('0123456789')
-
- def __init__(self, address):
- self._version = 4
- self._max_prefixlen = IPV4LENGTH
-
- def _explode_shorthand_ip_string(self, ip_str=None):
- if not ip_str:
- ip_str = str(self)
- return ip_str
-
- def _ip_int_from_string(self, ip_str):
- """Turn the given IP string into an integer for comparison.
-
- Args:
- ip_str: A string, the IP ip_str.
-
- Returns:
- The IP ip_str as an integer.
-
- Raises:
- AddressValueError: if ip_str isn't a valid IPv4 Address.
-
- """
- octets = ip_str.split('.')
- if len(octets) != 4:
- raise AddressValueError(ip_str)
-
- packed_ip = 0
- for oc in octets:
- try:
- packed_ip = (packed_ip << 8) | self._parse_octet(oc)
- except ValueError:
- raise AddressValueError(ip_str)
- return packed_ip
-
- def _parse_octet(self, octet_str):
- """Convert a decimal octet into an integer.
-
- Args:
- octet_str: A string, the number to parse.
-
- Returns:
- The octet as an integer.
-
- Raises:
- ValueError: if the octet isn't strictly a decimal from [0..255].
-
- """
- # Whitelist the characters, since int() allows a lot of bizarre stuff.
- if not self._DECIMAL_DIGITS.issuperset(octet_str):
- raise ValueError
- octet_int = int(octet_str, 10)
- # Disallow leading zeroes, because no clear standard exists on
- # whether these should be interpreted as decimal or octal.
- if octet_int > 255 or (octet_str[0] == '0' and len(octet_str) > 1):
- raise ValueError
- return octet_int
-
- def _string_from_ip_int(self, ip_int):
- """Turns a 32-bit integer into dotted decimal notation.
-
- Args:
- ip_int: An integer, the IP address.
-
- Returns:
- The IP address as a string in dotted decimal notation.
-
- """
- octets = []
- for _ in xrange(4):
- octets.insert(0, str(ip_int & 0xFF))
- ip_int >>= 8
- return '.'.join(octets)
-
- @property
- def max_prefixlen(self):
- return self._max_prefixlen
-
- @property
- def packed(self):
- """The binary representation of this address."""
- return v4_int_to_packed(self._ip)
-
- @property
- def version(self):
- return self._version
-
- @property
- def is_reserved(self):
- """Test if the address is otherwise IETF reserved.
-
- Returns:
- A boolean, True if the address is within the
- reserved IPv4 Network range.
-
- """
- return self in IPv4Network('240.0.0.0/4')
-
- @property
- def is_private(self):
- """Test if this address is allocated for private networks.
-
- Returns:
- A boolean, True if the address is reserved per RFC 1918.
-
- """
- return (self in IPv4Network('10.0.0.0/8') or
- self in IPv4Network('172.16.0.0/12') or
- self in IPv4Network('192.168.0.0/16'))
-
- @property
- def is_multicast(self):
- """Test if the address is reserved for multicast use.
-
- Returns:
- A boolean, True if the address is multicast.
- See RFC 3171 for details.
-
- """
- return self in IPv4Network('224.0.0.0/4')
-
- @property
- def is_unspecified(self):
- """Test if the address is unspecified.
-
- Returns:
- A boolean, True if this is the unspecified address as defined in
- RFC 5735 3.
-
- """
- return self in IPv4Network('0.0.0.0')
-
- @property
- def is_loopback(self):
- """Test if the address is a loopback address.
-
- Returns:
- A boolean, True if the address is a loopback per RFC 3330.
-
- """
- return self in IPv4Network('127.0.0.0/8')
-
- @property
- def is_link_local(self):
- """Test if the address is reserved for link-local.
-
- Returns:
- A boolean, True if the address is link-local per RFC 3927.
-
- """
- return self in IPv4Network('169.254.0.0/16')
-
-
-class IPv4Address(_BaseV4, _BaseIP):
-
- """Represent and manipulate single IPv4 Addresses."""
-
- def __init__(self, address):
-
- """
- Args:
- address: A string or integer representing the IP
- '192.168.1.1'
-
- Additionally, an integer can be passed, so
- IPv4Address('192.168.1.1') == IPv4Address(3232235777).
- or, more generally
- IPv4Address(int(IPv4Address('192.168.1.1'))) ==
- IPv4Address('192.168.1.1')
-
- Raises:
- AddressValueError: If ipaddr isn't a valid IPv4 address.
-
- """
- _BaseIP.__init__(self, address)
- _BaseV4.__init__(self, address)
-
- # Efficient constructor from integer.
- if isinstance(address, (int, long)):
- self._ip = address
- if address < 0 or address > self._ALL_ONES:
- raise AddressValueError(address)
- return
-
- # Constructing from a packed address
- if _compat_has_real_bytes:
- if isinstance(address, bytes) and len(address) == 4:
- self._ip = struct.unpack('!I', address)[0]
- return
-
- # Assume input argument to be string or any object representation
- # which converts into a formatted IP string.
- addr_str = str(address)
- self._ip = self._ip_int_from_string(addr_str)
-
-
-class IPv4Network(_BaseV4, _BaseNet):
-
- """This class represents and manipulates 32-bit IPv4 networks.
-
- Attributes: [examples for IPv4Network('1.2.3.4/27')]
- ._ip: 16909060
- .ip: IPv4Address('1.2.3.4')
- .network: IPv4Address('1.2.3.0')
- .hostmask: IPv4Address('0.0.0.31')
- .broadcast: IPv4Address('1.2.3.31')
- .netmask: IPv4Address('255.255.255.224')
- .prefixlen: 27
-
- """
-
- # the valid octets for host and netmasks. only useful for IPv4.
- _valid_mask_octets = set((255, 254, 252, 248, 240, 224, 192, 128, 0))
-
- def __init__(self, address, strict=False):
- """Instantiate a new IPv4 network object.
-
- Args:
- address: A string or integer representing the IP [& network].
- '192.168.1.1/24'
- '192.168.1.1/255.255.255.0'
- '192.168.1.1/0.0.0.255'
- are all functionally the same in IPv4. Similarly,
- '192.168.1.1'
- '192.168.1.1/255.255.255.255'
- '192.168.1.1/32'
- are also functionaly equivalent. That is to say, failing to
- provide a subnetmask will create an object with a mask of /32.
-
- If the mask (portion after the / in the argument) is given in
- dotted quad form, it is treated as a netmask if it starts with a
- non-zero field (e.g. /255.0.0.0 == /8) and as a hostmask if it
- starts with a zero field (e.g. 0.255.255.255 == /8), with the
- single exception of an all-zero mask which is treated as a
- netmask == /0. If no mask is given, a default of /32 is used.
-
- Additionally, an integer can be passed, so
- IPv4Network('192.168.1.1') == IPv4Network(3232235777).
- or, more generally
- IPv4Network(int(IPv4Network('192.168.1.1'))) ==
- IPv4Network('192.168.1.1')
-
- strict: A boolean. If true, ensure that we have been passed
- A true network address, eg, 192.168.1.0/24 and not an
- IP address on a network, eg, 192.168.1.1/24.
-
- Raises:
- AddressValueError: If ipaddr isn't a valid IPv4 address.
- NetmaskValueError: If the netmask isn't valid for
- an IPv4 address.
- ValueError: If strict was True and a network address was not
- supplied.
-
- """
- _BaseNet.__init__(self, address)
- _BaseV4.__init__(self, address)
-
- # Efficient constructor from integer.
- if isinstance(address, (int, long)):
- self._ip = address
- self.ip = IPv4Address(self._ip)
- self._prefixlen = self._max_prefixlen
- self.netmask = IPv4Address(self._ALL_ONES)
- if address < 0 or address > self._ALL_ONES:
- raise AddressValueError(address)
- return
-
- # Constructing from a packed address
- if _compat_has_real_bytes:
- if isinstance(address, bytes) and len(address) == 4:
- self._ip = struct.unpack('!I', address)[0]
- self.ip = IPv4Address(self._ip)
- self._prefixlen = self._max_prefixlen
- self.netmask = IPv4Address(self._ALL_ONES)
- return
-
- # Assume input argument to be string or any object representation
- # which converts into a formatted IP prefix string.
- addr = str(address).split('/')
-
- if len(addr) > 2:
- raise AddressValueError(address)
-
- self._ip = self._ip_int_from_string(addr[0])
- self.ip = IPv4Address(self._ip)
-
- if len(addr) == 2:
- mask = addr[1].split('.')
- if len(mask) == 4:
- # We have dotted decimal netmask.
- if self._is_valid_netmask(addr[1]):
- self.netmask = IPv4Address(self._ip_int_from_string(
- addr[1]))
- elif self._is_hostmask(addr[1]):
- self.netmask = IPv4Address(
- self._ip_int_from_string(addr[1]) ^ self._ALL_ONES)
- else:
- raise NetmaskValueError('%s is not a valid netmask'
- % addr[1])
-
- self._prefixlen = self._prefix_from_ip_int(int(self.netmask))
- else:
- # We have a netmask in prefix length form.
- if not self._is_valid_netmask(addr[1]):
- raise NetmaskValueError(addr[1])
- self._prefixlen = int(addr[1])
- self.netmask = IPv4Address(self._ip_int_from_prefix(
- self._prefixlen))
- else:
- self._prefixlen = self._max_prefixlen
- self.netmask = IPv4Address(self._ip_int_from_prefix(
- self._prefixlen))
- if strict:
- if self.ip != self.network:
- raise ValueError('%s has host bits set' %
- self.ip)
-
- def _is_hostmask(self, ip_str):
- """Test if the IP string is a hostmask (rather than a netmask).
-
- Args:
- ip_str: A string, the potential hostmask.
-
- Returns:
- A boolean, True if the IP string is a hostmask.
-
- """
- bits = ip_str.split('.')
- try:
- parts = [int(x) for x in bits if int(x) in self._valid_mask_octets]
- except ValueError:
- return False
- if len(parts) != len(bits):
- return False
- if parts[0] < parts[-1]:
- return True
- return False
-
- def _is_valid_netmask(self, netmask):
- """Verify that the netmask is valid.
-
- Args:
- netmask: A string, either a prefix or dotted decimal
- netmask.
-
- Returns:
- A boolean, True if the prefix represents a valid IPv4
- netmask.
-
- """
- mask = netmask.split('.')
- if len(mask) == 4:
- if [x for x in mask if int(x) not in self._valid_mask_octets]:
- return False
- if [y for idx, y in enumerate(mask) if idx > 0 and
- y > mask[idx - 1]]:
- return False
- return True
- try:
- netmask = int(netmask)
- except ValueError:
- return False
- return 0 <= netmask <= self._max_prefixlen
-
- # backwards compatibility
- IsRFC1918 = lambda self: self.is_private
- IsMulticast = lambda self: self.is_multicast
- IsLoopback = lambda self: self.is_loopback
- IsLinkLocal = lambda self: self.is_link_local
-
-
-class _BaseV6(object):
-
- """Base IPv6 object.
-
- The following methods are used by IPv6 objects in both single IP
- addresses and networks.
-
- """
-
- _ALL_ONES = (2**IPV6LENGTH) - 1
- _HEXTET_COUNT = 8
- _HEX_DIGITS = frozenset('0123456789ABCDEFabcdef')
-
- def __init__(self, address):
- self._version = 6
- self._max_prefixlen = IPV6LENGTH
-
- def _ip_int_from_string(self, ip_str):
- """Turn an IPv6 ip_str into an integer.
-
- Args:
- ip_str: A string, the IPv6 ip_str.
-
- Returns:
- A long, the IPv6 ip_str.
-
- Raises:
- AddressValueError: if ip_str isn't a valid IPv6 Address.
-
- """
- parts = ip_str.split(':')
-
- # An IPv6 address needs at least 2 colons (3 parts).
- if len(parts) < 3:
- raise AddressValueError(ip_str)
-
- # If the address has an IPv4-style suffix, convert it to hexadecimal.
- if '.' in parts[-1]:
- ipv4_int = IPv4Address(parts.pop())._ip
- parts.append('%x' % ((ipv4_int >> 16) & 0xFFFF))
- parts.append('%x' % (ipv4_int & 0xFFFF))
-
- # An IPv6 address can't have more than 8 colons (9 parts).
- if len(parts) > self._HEXTET_COUNT + 1:
- raise AddressValueError(ip_str)
-
- # Disregarding the endpoints, find '::' with nothing in between.
- # This indicates that a run of zeroes has been skipped.
- try:
- skip_index, = (
- [i for i in xrange(1, len(parts) - 1) if not parts[i]] or
- [None])
- except ValueError:
- # Can't have more than one '::'
- raise AddressValueError(ip_str)
-
- # parts_hi is the number of parts to copy from above/before the '::'
- # parts_lo is the number of parts to copy from below/after the '::'
- if skip_index is not None:
- # If we found a '::', then check if it also covers the endpoints.
- parts_hi = skip_index
- parts_lo = len(parts) - skip_index - 1
- if not parts[0]:
- parts_hi -= 1
- if parts_hi:
- raise AddressValueError(ip_str) # ^: requires ^::
- if not parts[-1]:
- parts_lo -= 1
- if parts_lo:
- raise AddressValueError(ip_str) # :$ requires ::$
- parts_skipped = self._HEXTET_COUNT - (parts_hi + parts_lo)
- if parts_skipped < 1:
- raise AddressValueError(ip_str)
- else:
- # Otherwise, allocate the entire address to parts_hi. The endpoints
- # could still be empty, but _parse_hextet() will check for that.
- if len(parts) != self._HEXTET_COUNT:
- raise AddressValueError(ip_str)
- parts_hi = len(parts)
- parts_lo = 0
- parts_skipped = 0
-
- try:
- # Now, parse the hextets into a 128-bit integer.
- ip_int = 0L
- for i in xrange(parts_hi):
- ip_int <<= 16
- ip_int |= self._parse_hextet(parts[i])
- ip_int <<= 16 * parts_skipped
- for i in xrange(-parts_lo, 0):
- ip_int <<= 16
- ip_int |= self._parse_hextet(parts[i])
- return ip_int
- except ValueError:
- raise AddressValueError(ip_str)
-
- def _parse_hextet(self, hextet_str):
- """Convert an IPv6 hextet string into an integer.
-
- Args:
- hextet_str: A string, the number to parse.
-
- Returns:
- The hextet as an integer.
-
- Raises:
- ValueError: if the input isn't strictly a hex number from [0..FFFF].
-
- """
- # Whitelist the characters, since int() allows a lot of bizarre stuff.
- if not self._HEX_DIGITS.issuperset(hextet_str):
- raise ValueError
- hextet_int = int(hextet_str, 16)
- if hextet_int > 0xFFFF:
- raise ValueError
- return hextet_int
-
- def _compress_hextets(self, hextets):
- """Compresses a list of hextets.
-
- Compresses a list of strings, replacing the longest continuous
- sequence of "0" in the list with "" and adding empty strings at
- the beginning or at the end of the string such that subsequently
- calling ":".join(hextets) will produce the compressed version of
- the IPv6 address.
-
- Args:
- hextets: A list of strings, the hextets to compress.
-
- Returns:
- A list of strings.
-
- """
- best_doublecolon_start = -1
- best_doublecolon_len = 0
- doublecolon_start = -1
- doublecolon_len = 0
- for index in range(len(hextets)):
- if hextets[index] == '0':
- doublecolon_len += 1
- if doublecolon_start == -1:
- # Start of a sequence of zeros.
- doublecolon_start = index
- if doublecolon_len > best_doublecolon_len:
- # This is the longest sequence of zeros so far.
- best_doublecolon_len = doublecolon_len
- best_doublecolon_start = doublecolon_start
- else:
- doublecolon_len = 0
- doublecolon_start = -1
-
- if best_doublecolon_len > 1:
- best_doublecolon_end = (best_doublecolon_start +
- best_doublecolon_len)
- # For zeros at the end of the address.
- if best_doublecolon_end == len(hextets):
- hextets += ['']
- hextets[best_doublecolon_start:best_doublecolon_end] = ['']
- # For zeros at the beginning of the address.
- if best_doublecolon_start == 0:
- hextets = [''] + hextets
-
- return hextets
-
- def _string_from_ip_int(self, ip_int=None):
- """Turns a 128-bit integer into hexadecimal notation.
-
- Args:
- ip_int: An integer, the IP address.
-
- Returns:
- A string, the hexadecimal representation of the address.
-
- Raises:
- ValueError: The address is bigger than 128 bits of all ones.
-
- """
- if not ip_int and ip_int != 0:
- ip_int = int(self._ip)
-
- if ip_int > self._ALL_ONES:
- raise ValueError('IPv6 address is too large')
-
- hex_str = '%032x' % ip_int
- hextets = []
- for x in range(0, 32, 4):
- hextets.append('%x' % int(hex_str[x:x+4], 16))
-
- hextets = self._compress_hextets(hextets)
- return ':'.join(hextets)
-
- def _explode_shorthand_ip_string(self, ip_str=None):
- """Expand a shortened IPv6 address.
-
- Args:
- ip_str: A string, the IPv6 address.
-
- Returns:
- A string, the expanded IPv6 address.
-
- """
- if not ip_str:
- ip_str = str(self)
- if isinstance(self, _BaseNet):
- ip_str = str(self.ip)
-
- ip_int = self._ip_int_from_string(ip_str)
- parts = []
- for i in xrange(self._HEXTET_COUNT):
- parts.append('%04x' % (ip_int & 0xFFFF))
- ip_int >>= 16
- parts.reverse()
- return ':'.join(parts)
-
- @property
- def max_prefixlen(self):
- return self._max_prefixlen
-
- @property
- def packed(self):
- """The binary representation of this address."""
- return v6_int_to_packed(self._ip)
-
- @property
- def version(self):
- return self._version
-
- @property
- def is_multicast(self):
- """Test if the address is reserved for multicast use.
-
- Returns:
- A boolean, True if the address is a multicast address.
- See RFC 2373 2.7 for details.
-
- """
- return self in IPv6Network('ff00::/8')
-
- @property
- def is_reserved(self):
- """Test if the address is otherwise IETF reserved.
-
- Returns:
- A boolean, True if the address is within one of the
- reserved IPv6 Network ranges.
-
- """
- return (self in IPv6Network('::/8') or
- self in IPv6Network('100::/8') or
- self in IPv6Network('200::/7') or
- self in IPv6Network('400::/6') or
- self in IPv6Network('800::/5') or
- self in IPv6Network('1000::/4') or
- self in IPv6Network('4000::/3') or
- self in IPv6Network('6000::/3') or
- self in IPv6Network('8000::/3') or
- self in IPv6Network('A000::/3') or
- self in IPv6Network('C000::/3') or
- self in IPv6Network('E000::/4') or
- self in IPv6Network('F000::/5') or
- self in IPv6Network('F800::/6') or
- self in IPv6Network('FE00::/9'))
-
- @property
- def is_unspecified(self):
- """Test if the address is unspecified.
-
- Returns:
- A boolean, True if this is the unspecified address as defined in
- RFC 2373 2.5.2.
-
- """
- return self._ip == 0 and getattr(self, '_prefixlen', 128) == 128
-
- @property
- def is_loopback(self):
- """Test if the address is a loopback address.
-
- Returns:
- A boolean, True if the address is a loopback address as defined in
- RFC 2373 2.5.3.
-
- """
- return self._ip == 1 and getattr(self, '_prefixlen', 128) == 128
-
- @property
- def is_link_local(self):
- """Test if the address is reserved for link-local.
-
- Returns:
- A boolean, True if the address is reserved per RFC 4291.
-
- """
- return self in IPv6Network('fe80::/10')
-
- @property
- def is_site_local(self):
- """Test if the address is reserved for site-local.
-
- Note that the site-local address space has been deprecated by RFC 3879.
- Use is_private to test if this address is in the space of unique local
- addresses as defined by RFC 4193.
-
- Returns:
- A boolean, True if the address is reserved per RFC 3513 2.5.6.
-
- """
- return self in IPv6Network('fec0::/10')
-
- @property
- def is_private(self):
- """Test if this address is allocated for private networks.
-
- Returns:
- A boolean, True if the address is reserved per RFC 4193.
-
- """
- return self in IPv6Network('fc00::/7')
-
- @property
- def ipv4_mapped(self):
- """Return the IPv4 mapped address.
-
- Returns:
- If the IPv6 address is a v4 mapped address, return the
- IPv4 mapped address. Return None otherwise.
-
- """
- if (self._ip >> 32) != 0xFFFF:
- return None
- return IPv4Address(self._ip & 0xFFFFFFFF)
-
- @property
- def teredo(self):
- """Tuple of embedded teredo IPs.
-
- Returns:
- Tuple of the (server, client) IPs or None if the address
- doesn't appear to be a teredo address (doesn't start with
- 2001::/32)
-
- """
- if (self._ip >> 96) != 0x20010000:
- return None
- return (IPv4Address((self._ip >> 64) & 0xFFFFFFFF),
- IPv4Address(~self._ip & 0xFFFFFFFF))
-
- @property
- def sixtofour(self):
- """Return the IPv4 6to4 embedded address.
-
- Returns:
- The IPv4 6to4-embedded address if present or None if the
- address doesn't appear to contain a 6to4 embedded address.
-
- """
- if (self._ip >> 112) != 0x2002:
- return None
- return IPv4Address((self._ip >> 80) & 0xFFFFFFFF)
-
-
-class IPv6Address(_BaseV6, _BaseIP):
-
- """Represent and manipulate single IPv6 Addresses.
- """
-
- def __init__(self, address):
- """Instantiate a new IPv6 address object.
-
- Args:
- address: A string or integer representing the IP
-
- Additionally, an integer can be passed, so
- IPv6Address('2001:4860::') ==
- IPv6Address(42541956101370907050197289607612071936L).
- or, more generally
- IPv6Address(IPv6Address('2001:4860::')._ip) ==
- IPv6Address('2001:4860::')
-
- Raises:
- AddressValueError: If address isn't a valid IPv6 address.
-
- """
- _BaseIP.__init__(self, address)
- _BaseV6.__init__(self, address)
-
- # Efficient constructor from integer.
- if isinstance(address, (int, long)):
- self._ip = address
- if address < 0 or address > self._ALL_ONES:
- raise AddressValueError(address)
- return
-
- # Constructing from a packed address
- if _compat_has_real_bytes:
- if isinstance(address, bytes) and len(address) == 16:
- tmp = struct.unpack('!QQ', address)
- self._ip = (tmp[0] << 64) | tmp[1]
- return
-
- # Assume input argument to be string or any object representation
- # which converts into a formatted IP string.
- addr_str = str(address)
- if not addr_str:
- raise AddressValueError('')
-
- self._ip = self._ip_int_from_string(addr_str)
-
-
-class IPv6Network(_BaseV6, _BaseNet):
-
- """This class represents and manipulates 128-bit IPv6 networks.
-
- Attributes: [examples for IPv6('2001:658:22A:CAFE:200::1/64')]
- .ip: IPv6Address('2001:658:22a:cafe:200::1')
- .network: IPv6Address('2001:658:22a:cafe::')
- .hostmask: IPv6Address('::ffff:ffff:ffff:ffff')
- .broadcast: IPv6Address('2001:658:22a:cafe:ffff:ffff:ffff:ffff')
- .netmask: IPv6Address('ffff:ffff:ffff:ffff::')
- .prefixlen: 64
-
- """
-
-
- def __init__(self, address, strict=False):
- """Instantiate a new IPv6 Network object.
-
- Args:
- address: A string or integer representing the IPv6 network or the IP
- and prefix/netmask.
- '2001:4860::/128'
- '2001:4860:0000:0000:0000:0000:0000:0000/128'
- '2001:4860::'
- are all functionally the same in IPv6. That is to say,
- failing to provide a subnetmask will create an object with
- a mask of /128.
-
- Additionally, an integer can be passed, so
- IPv6Network('2001:4860::') ==
- IPv6Network(42541956101370907050197289607612071936L).
- or, more generally
- IPv6Network(IPv6Network('2001:4860::')._ip) ==
- IPv6Network('2001:4860::')
-
- strict: A boolean. If true, ensure that we have been passed
- A true network address, eg, 192.168.1.0/24 and not an
- IP address on a network, eg, 192.168.1.1/24.
-
- Raises:
- AddressValueError: If address isn't a valid IPv6 address.
- NetmaskValueError: If the netmask isn't valid for
- an IPv6 address.
- ValueError: If strict was True and a network address was not
- supplied.
-
- """
- _BaseNet.__init__(self, address)
- _BaseV6.__init__(self, address)
-
- # Efficient constructor from integer.
- if isinstance(address, (int, long)):
- self._ip = address
- self.ip = IPv6Address(self._ip)
- self._prefixlen = self._max_prefixlen
- self.netmask = IPv6Address(self._ALL_ONES)
- if address < 0 or address > self._ALL_ONES:
- raise AddressValueError(address)
- return
-
- # Constructing from a packed address
- if _compat_has_real_bytes:
- if isinstance(address, bytes) and len(address) == 16:
- tmp = struct.unpack('!QQ', address)
- self._ip = (tmp[0] << 64) | tmp[1]
- self.ip = IPv6Address(self._ip)
- self._prefixlen = self._max_prefixlen
- self.netmask = IPv6Address(self._ALL_ONES)
- return
-
- # Assume input argument to be string or any object representation
- # which converts into a formatted IP prefix string.
- addr = str(address).split('/')
-
- if len(addr) > 2:
- raise AddressValueError(address)
-
- self._ip = self._ip_int_from_string(addr[0])
- self.ip = IPv6Address(self._ip)
-
- if len(addr) == 2:
- if self._is_valid_netmask(addr[1]):
- self._prefixlen = int(addr[1])
- else:
- raise NetmaskValueError(addr[1])
- else:
- self._prefixlen = self._max_prefixlen
-
- self.netmask = IPv6Address(self._ip_int_from_prefix(self._prefixlen))
-
- if strict:
- if self.ip != self.network:
- raise ValueError('%s has host bits set' %
- self.ip)
-
- def _is_valid_netmask(self, prefixlen):
- """Verify that the netmask/prefixlen is valid.
-
- Args:
- prefixlen: A string, the netmask in prefix length format.
-
- Returns:
- A boolean, True if the prefix represents a valid IPv6
- netmask.
-
- """
- try:
- prefixlen = int(prefixlen)
- except ValueError:
- return False
- return 0 <= prefixlen <= self._max_prefixlen
-
- @property
- def with_netmask(self):
- return self.with_prefixlen
diff --git a/contrib/ipaddr-py/ipaddr_test.py b/contrib/ipaddr-py/ipaddr_test.py
deleted file mode 100755
index 09bece0e751..00000000000
--- a/contrib/ipaddr-py/ipaddr_test.py
+++ /dev/null
@@ -1,1099 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright 2007 Google Inc.
-# Licensed to PSF under a Contributor Agreement.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-"""Unittest for ipaddr module."""
-
-
-import unittest
-import time
-import ipaddr
-
-# Compatibility function to cast str to bytes objects
-if ipaddr._compat_has_real_bytes:
- _cb = lambda bytestr: bytes(bytestr, 'charmap')
-else:
- _cb = str
-
-class IpaddrUnitTest(unittest.TestCase):
-
- def setUp(self):
- self.ipv4 = ipaddr.IPv4Network('1.2.3.4/24')
- self.ipv4_hostmask = ipaddr.IPv4Network('10.0.0.1/0.255.255.255')
- self.ipv6 = ipaddr.IPv6Network('2001:658:22a:cafe:200:0:0:1/64')
-
- def tearDown(self):
- del(self.ipv4)
- del(self.ipv4_hostmask)
- del(self.ipv6)
- del(self)
-
- def testRepr(self):
- self.assertEqual("IPv4Network('1.2.3.4/32')",
- repr(ipaddr.IPv4Network('1.2.3.4')))
- self.assertEqual("IPv6Network('::1/128')",
- repr(ipaddr.IPv6Network('::1')))
-
- def testAutoMasking(self):
- addr1 = ipaddr.IPv4Network('1.1.1.255/24')
- addr1_masked = ipaddr.IPv4Network('1.1.1.0/24')
- self.assertEqual(addr1_masked, addr1.masked())
-
- addr2 = ipaddr.IPv6Network('2000:cafe::efac:100/96')
- addr2_masked = ipaddr.IPv6Network('2000:cafe::/96')
- self.assertEqual(addr2_masked, addr2.masked())
-
- # issue57
- def testAddressIntMath(self):
- self.assertEqual(ipaddr.IPv4Address('1.1.1.1') + 255,
- ipaddr.IPv4Address('1.1.2.0'))
- self.assertEqual(ipaddr.IPv4Address('1.1.1.1') - 256,
- ipaddr.IPv4Address('1.1.0.1'))
- self.assertEqual(ipaddr.IPv6Address('::1') + (2**16 - 2),
- ipaddr.IPv6Address('::ffff'))
- self.assertEqual(ipaddr.IPv6Address('::ffff') - (2**16 - 2),
- ipaddr.IPv6Address('::1'))
-
- def testInvalidStrings(self):
- def AssertInvalidIP(ip_str):
- self.assertRaises(ValueError, ipaddr.IPAddress, ip_str)
- AssertInvalidIP("")
- AssertInvalidIP("016.016.016.016")
- AssertInvalidIP("016.016.016")
- AssertInvalidIP("016.016")
- AssertInvalidIP("016")
- AssertInvalidIP("000.000.000.000")
- AssertInvalidIP("000")
- AssertInvalidIP("0x0a.0x0a.0x0a.0x0a")
- AssertInvalidIP("0x0a.0x0a.0x0a")
- AssertInvalidIP("0x0a.0x0a")
- AssertInvalidIP("0x0a")
- AssertInvalidIP("42.42.42.42.42")
- AssertInvalidIP("42.42.42")
- AssertInvalidIP("42.42")
- AssertInvalidIP("42")
- AssertInvalidIP("42..42.42")
- AssertInvalidIP("42..42.42.42")
- AssertInvalidIP("42.42.42.42.")
- AssertInvalidIP("42.42.42.42...")
- AssertInvalidIP(".42.42.42.42")
- AssertInvalidIP("...42.42.42.42")
- AssertInvalidIP("42.42.42.-0")
- AssertInvalidIP("42.42.42.+0")
- AssertInvalidIP(".")
- AssertInvalidIP("...")
- AssertInvalidIP("bogus")
- AssertInvalidIP("bogus.com")
- AssertInvalidIP("192.168.0.1.com")
- AssertInvalidIP("12345.67899.-54321.-98765")
- AssertInvalidIP("257.0.0.0")
- AssertInvalidIP("42.42.42.-42")
- AssertInvalidIP("3ffe::1.net")
- AssertInvalidIP("3ffe::1::1")
- AssertInvalidIP("1::2::3::4:5")
- AssertInvalidIP("::7:6:5:4:3:2:")
- AssertInvalidIP(":6:5:4:3:2:1::")
- AssertInvalidIP("2001::db:::1")
- AssertInvalidIP("FEDC:9878")
- AssertInvalidIP("+1.+2.+3.4")
- AssertInvalidIP("1.2.3.4e0")
- AssertInvalidIP("::7:6:5:4:3:2:1:0")
- AssertInvalidIP("7:6:5:4:3:2:1:0::")
- AssertInvalidIP("9:8:7:6:5:4:3::2:1")
- AssertInvalidIP("0:1:2:3::4:5:6:7")
- AssertInvalidIP("3ffe:0:0:0:0:0:0:0:1")
- AssertInvalidIP("3ffe::10000")
- AssertInvalidIP("3ffe::goog")
- AssertInvalidIP("3ffe::-0")
- AssertInvalidIP("3ffe::+0")
- AssertInvalidIP("3ffe::-1")
- AssertInvalidIP(":")
- AssertInvalidIP(":::")
- AssertInvalidIP("::1.2.3")
- AssertInvalidIP("::1.2.3.4.5")
- AssertInvalidIP("::1.2.3.4:")
- AssertInvalidIP("1.2.3.4::")
- AssertInvalidIP("2001:db8::1:")
- AssertInvalidIP(":2001:db8::1")
- AssertInvalidIP(":1:2:3:4:5:6:7")
- AssertInvalidIP("1:2:3:4:5:6:7:")
- AssertInvalidIP(":1:2:3:4:5:6:")
-
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv4Network, '')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv4Network,
- 'google.com')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv4Network,
- '::1.2.3.4')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network, '')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
- 'google.com')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
- '1.2.3.4')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
- 'cafe:cafe::/128/190')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
- '1234:axy::b')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Address,
- '1234:axy::b')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Address,
- '2001:db8:::1')
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Address,
- '2001:888888::1')
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv4Address(1)._ip_int_from_string,
- '1.a.2.3')
- self.assertEqual(False, ipaddr.IPv4Network(1)._is_hostmask('1.a.2.3'))
-
- def testGetNetwork(self):
- self.assertEqual(int(self.ipv4.network), 16909056)
- self.assertEqual(str(self.ipv4.network), '1.2.3.0')
- self.assertEqual(str(self.ipv4_hostmask.network), '10.0.0.0')
-
- self.assertEqual(int(self.ipv6.network),
- 42540616829182469433403647294022090752)
- self.assertEqual(str(self.ipv6.network),
- '2001:658:22a:cafe::')
- self.assertEqual(str(self.ipv6.hostmask),
- '::ffff:ffff:ffff:ffff')
-
- def testBadVersionComparison(self):
- # These should always raise TypeError
- v4addr = ipaddr.IPAddress('1.1.1.1')
- v4net = ipaddr.IPNetwork('1.1.1.1')
- v6addr = ipaddr.IPAddress('::1')
- v6net = ipaddr.IPAddress('::1')
-
- self.assertRaises(TypeError, v4addr.__lt__, v6addr)
- self.assertRaises(TypeError, v4addr.__gt__, v6addr)
- self.assertRaises(TypeError, v4net.__lt__, v6net)
- self.assertRaises(TypeError, v4net.__gt__, v6net)
-
- self.assertRaises(TypeError, v6addr.__lt__, v4addr)
- self.assertRaises(TypeError, v6addr.__gt__, v4addr)
- self.assertRaises(TypeError, v6net.__lt__, v4net)
- self.assertRaises(TypeError, v6net.__gt__, v4net)
-
- def testMixedTypeComparison(self):
- v4addr = ipaddr.IPAddress('1.1.1.1')
- v4net = ipaddr.IPNetwork('1.1.1.1/32')
- v6addr = ipaddr.IPAddress('::1')
- v6net = ipaddr.IPNetwork('::1/128')
-
- self.assertFalse(v4net.__contains__(v6net))
- self.assertFalse(v6net.__contains__(v4net))
-
- self.assertRaises(TypeError, lambda: v4addr < v4net)
- self.assertRaises(TypeError, lambda: v4addr > v4net)
- self.assertRaises(TypeError, lambda: v4net < v4addr)
- self.assertRaises(TypeError, lambda: v4net > v4addr)
-
- self.assertRaises(TypeError, lambda: v6addr < v6net)
- self.assertRaises(TypeError, lambda: v6addr > v6net)
- self.assertRaises(TypeError, lambda: v6net < v6addr)
- self.assertRaises(TypeError, lambda: v6net > v6addr)
-
- # with get_mixed_type_key, you can sort addresses and network.
- self.assertEqual([v4addr, v4net], sorted([v4net, v4addr],
- key=ipaddr.get_mixed_type_key))
- self.assertEqual([v6addr, v6net], sorted([v6net, v6addr],
- key=ipaddr.get_mixed_type_key))
-
- def testIpFromInt(self):
- self.assertEqual(self.ipv4.ip, ipaddr.IPv4Network(16909060).ip)
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv4Network, 2**32)
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv4Network, -1)
-
- ipv4 = ipaddr.IPNetwork('1.2.3.4')
- ipv6 = ipaddr.IPNetwork('2001:658:22a:cafe:200:0:0:1')
- self.assertEqual(ipv4, ipaddr.IPNetwork(int(ipv4)))
- self.assertEqual(ipv6, ipaddr.IPNetwork(int(ipv6)))
-
- v6_int = 42540616829182469433547762482097946625
- self.assertEqual(self.ipv6.ip, ipaddr.IPv6Network(v6_int).ip)
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv6Network, 2**128)
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv6Network, -1)
-
- self.assertEqual(ipaddr.IPNetwork(self.ipv4.ip).version, 4)
- self.assertEqual(ipaddr.IPNetwork(self.ipv6.ip).version, 6)
-
- if ipaddr._compat_has_real_bytes: # on python3+
- def testIpFromPacked(self):
- ip = ipaddr.IPNetwork
-
- self.assertEqual(self.ipv4.ip,
- ip(_cb('\x01\x02\x03\x04')).ip)
- self.assertEqual(ip('255.254.253.252'),
- ip(_cb('\xff\xfe\xfd\xfc')))
- self.assertRaises(ValueError, ipaddr.IPNetwork, _cb('\x00' * 3))
- self.assertRaises(ValueError, ipaddr.IPNetwork, _cb('\x00' * 5))
- self.assertEqual(self.ipv6.ip,
- ip(_cb('\x20\x01\x06\x58\x02\x2a\xca\xfe'
- '\x02\x00\x00\x00\x00\x00\x00\x01')).ip)
- self.assertEqual(ip('ffff:2:3:4:ffff::'),
- ip(_cb('\xff\xff\x00\x02\x00\x03\x00\x04' +
- '\xff\xff' + '\x00' * 6)))
- self.assertEqual(ip('::'),
- ip(_cb('\x00' * 16)))
- self.assertRaises(ValueError, ip, _cb('\x00' * 15))
- self.assertRaises(ValueError, ip, _cb('\x00' * 17))
-
- def testGetIp(self):
- self.assertEqual(int(self.ipv4.ip), 16909060)
- self.assertEqual(str(self.ipv4.ip), '1.2.3.4')
- self.assertEqual(str(self.ipv4_hostmask.ip), '10.0.0.1')
-
- self.assertEqual(int(self.ipv6.ip),
- 42540616829182469433547762482097946625)
- self.assertEqual(str(self.ipv6.ip),
- '2001:658:22a:cafe:200::1')
-
- def testGetNetmask(self):
- self.assertEqual(int(self.ipv4.netmask), 4294967040L)
- self.assertEqual(str(self.ipv4.netmask), '255.255.255.0')
- self.assertEqual(str(self.ipv4_hostmask.netmask), '255.0.0.0')
- self.assertEqual(int(self.ipv6.netmask),
- 340282366920938463444927863358058659840)
- self.assertEqual(self.ipv6.prefixlen, 64)
-
- def testZeroNetmask(self):
- ipv4_zero_netmask = ipaddr.IPv4Network('1.2.3.4/0')
- self.assertEqual(int(ipv4_zero_netmask.netmask), 0)
- self.assertTrue(ipv4_zero_netmask._is_valid_netmask(str(0)))
-
- ipv6_zero_netmask = ipaddr.IPv6Network('::1/0')
- self.assertEqual(int(ipv6_zero_netmask.netmask), 0)
- self.assertTrue(ipv6_zero_netmask._is_valid_netmask(str(0)))
-
- def testGetBroadcast(self):
- self.assertEqual(int(self.ipv4.broadcast), 16909311L)
- self.assertEqual(str(self.ipv4.broadcast), '1.2.3.255')
-
- self.assertEqual(int(self.ipv6.broadcast),
- 42540616829182469451850391367731642367)
- self.assertEqual(str(self.ipv6.broadcast),
- '2001:658:22a:cafe:ffff:ffff:ffff:ffff')
-
- def testGetPrefixlen(self):
- self.assertEqual(self.ipv4.prefixlen, 24)
-
- self.assertEqual(self.ipv6.prefixlen, 64)
-
- def testGetSupernet(self):
- self.assertEqual(self.ipv4.supernet().prefixlen, 23)
- self.assertEqual(str(self.ipv4.supernet().network), '1.2.2.0')
- self.assertEqual(ipaddr.IPv4Network('0.0.0.0/0').supernet(),
- ipaddr.IPv4Network('0.0.0.0/0'))
-
- self.assertEqual(self.ipv6.supernet().prefixlen, 63)
- self.assertEqual(str(self.ipv6.supernet().network),
- '2001:658:22a:cafe::')
- self.assertEqual(ipaddr.IPv6Network('::0/0').supernet(),
- ipaddr.IPv6Network('::0/0'))
-
- def testGetSupernet3(self):
- self.assertEqual(self.ipv4.supernet(3).prefixlen, 21)
- self.assertEqual(str(self.ipv4.supernet(3).network), '1.2.0.0')
-
- self.assertEqual(self.ipv6.supernet(3).prefixlen, 61)
- self.assertEqual(str(self.ipv6.supernet(3).network),
- '2001:658:22a:caf8::')
-
- def testGetSupernet4(self):
- self.assertRaises(ValueError, self.ipv4.supernet, prefixlen_diff=2,
- new_prefix=1)
- self.assertRaises(ValueError, self.ipv4.supernet, new_prefix=25)
- self.assertEqual(self.ipv4.supernet(prefixlen_diff=2),
- self.ipv4.supernet(new_prefix=22))
-
- self.assertRaises(ValueError, self.ipv6.supernet, prefixlen_diff=2,
- new_prefix=1)
- self.assertRaises(ValueError, self.ipv6.supernet, new_prefix=65)
- self.assertEqual(self.ipv6.supernet(prefixlen_diff=2),
- self.ipv6.supernet(new_prefix=62))
-
- def testIterSubnets(self):
- self.assertEqual(self.ipv4.subnet(), list(self.ipv4.iter_subnets()))
- self.assertEqual(self.ipv6.subnet(), list(self.ipv6.iter_subnets()))
-
- def testFancySubnetting(self):
- self.assertEqual(sorted(self.ipv4.subnet(prefixlen_diff=3)),
- sorted(self.ipv4.subnet(new_prefix=27)))
- self.assertRaises(ValueError, self.ipv4.subnet, new_prefix=23)
- self.assertRaises(ValueError, self.ipv4.subnet,
- prefixlen_diff=3, new_prefix=27)
- self.assertEqual(sorted(self.ipv6.subnet(prefixlen_diff=4)),
- sorted(self.ipv6.subnet(new_prefix=68)))
- self.assertRaises(ValueError, self.ipv6.subnet, new_prefix=63)
- self.assertRaises(ValueError, self.ipv6.subnet,
- prefixlen_diff=4, new_prefix=68)
-
- def testGetSubnet(self):
- self.assertEqual(self.ipv4.subnet()[0].prefixlen, 25)
- self.assertEqual(str(self.ipv4.subnet()[0].network), '1.2.3.0')
- self.assertEqual(str(self.ipv4.subnet()[1].network), '1.2.3.128')
-
- self.assertEqual(self.ipv6.subnet()[0].prefixlen, 65)
-
- def testGetSubnetForSingle32(self):
- ip = ipaddr.IPv4Network('1.2.3.4/32')
- subnets1 = [str(x) for x in ip.subnet()]
- subnets2 = [str(x) for x in ip.subnet(2)]
- self.assertEqual(subnets1, ['1.2.3.4/32'])
- self.assertEqual(subnets1, subnets2)
-
- def testGetSubnetForSingle128(self):
- ip = ipaddr.IPv6Network('::1/128')
- subnets1 = [str(x) for x in ip.subnet()]
- subnets2 = [str(x) for x in ip.subnet(2)]
- self.assertEqual(subnets1, ['::1/128'])
- self.assertEqual(subnets1, subnets2)
-
- def testSubnet2(self):
- ips = [str(x) for x in self.ipv4.subnet(2)]
- self.assertEqual(
- ips,
- ['1.2.3.0/26', '1.2.3.64/26', '1.2.3.128/26', '1.2.3.192/26'])
-
- ipsv6 = [str(x) for x in self.ipv6.subnet(2)]
- self.assertEqual(
- ipsv6,
- ['2001:658:22a:cafe::/66',
- '2001:658:22a:cafe:4000::/66',
- '2001:658:22a:cafe:8000::/66',
- '2001:658:22a:cafe:c000::/66'])
-
- def testSubnetFailsForLargeCidrDiff(self):
- self.assertRaises(ValueError, self.ipv4.subnet, 9)
- self.assertRaises(ValueError, self.ipv6.subnet, 65)
-
- def testSupernetFailsForLargeCidrDiff(self):
- self.assertRaises(ValueError, self.ipv4.supernet, 25)
- self.assertRaises(ValueError, self.ipv6.supernet, 65)
-
- def testSubnetFailsForNegativeCidrDiff(self):
- self.assertRaises(ValueError, self.ipv4.subnet, -1)
- self.assertRaises(ValueError, self.ipv6.subnet, -1)
-
- def testGetNumHosts(self):
- self.assertEqual(self.ipv4.numhosts, 256)
- self.assertEqual(self.ipv4.subnet()[0].numhosts, 128)
- self.assertEqual(self.ipv4.supernet().numhosts, 512)
-
- self.assertEqual(self.ipv6.numhosts, 18446744073709551616)
- self.assertEqual(self.ipv6.subnet()[0].numhosts, 9223372036854775808)
- self.assertEqual(self.ipv6.supernet().numhosts, 36893488147419103232)
-
- def testContains(self):
- self.assertTrue(ipaddr.IPv4Network('1.2.3.128/25') in self.ipv4)
- self.assertFalse(ipaddr.IPv4Network('1.2.4.1/24') in self.ipv4)
- self.assertTrue(self.ipv4 in self.ipv4)
- self.assertTrue(self.ipv6 in self.ipv6)
- # We can test addresses and string as well.
- addr1 = ipaddr.IPv4Address('1.2.3.37')
- self.assertTrue(addr1 in self.ipv4)
- # issue 61, bad network comparison on like-ip'd network objects
- # with identical broadcast addresses.
- self.assertFalse(ipaddr.IPv4Network('1.1.0.0/16').__contains__(
- ipaddr.IPv4Network('1.0.0.0/15')))
-
- def testBadAddress(self):
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv4Network,
- 'poop')
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv4Network, '1.2.3.256')
-
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
- 'poopv6')
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv4Network, '1.2.3.4/32/24')
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv4Network, '10/8')
- self.assertRaises(ipaddr.AddressValueError,
- ipaddr.IPv6Network, '10/8')
-
-
- def testBadNetMask(self):
- self.assertRaises(ipaddr.NetmaskValueError,
- ipaddr.IPv4Network, '1.2.3.4/')
- self.assertRaises(ipaddr.NetmaskValueError,
- ipaddr.IPv4Network, '1.2.3.4/33')
- self.assertRaises(ipaddr.NetmaskValueError,
- ipaddr.IPv4Network, '1.2.3.4/254.254.255.256')
- self.assertRaises(ipaddr.NetmaskValueError,
- ipaddr.IPv4Network, '1.1.1.1/240.255.0.0')
- self.assertRaises(ipaddr.NetmaskValueError,
- ipaddr.IPv6Network, '::1/')
- self.assertRaises(ipaddr.NetmaskValueError,
- ipaddr.IPv6Network, '::1/129')
-
- def testNth(self):
- self.assertEqual(str(self.ipv4[5]), '1.2.3.5')
- self.assertRaises(IndexError, self.ipv4.__getitem__, 256)
-
- self.assertEqual(str(self.ipv6[5]),
- '2001:658:22a:cafe::5')
-
- def testGetitem(self):
- # http://code.google.com/p/ipaddr-py/issues/detail?id=15
- addr = ipaddr.IPv4Network('172.31.255.128/255.255.255.240')
- self.assertEqual(28, addr.prefixlen)
- addr_list = list(addr)
- self.assertEqual('172.31.255.128', str(addr_list[0]))
- self.assertEqual('172.31.255.128', str(addr[0]))
- self.assertEqual('172.31.255.143', str(addr_list[-1]))
- self.assertEqual('172.31.255.143', str(addr[-1]))
- self.assertEqual(addr_list[-1], addr[-1])
-
- def testEqual(self):
- self.assertTrue(self.ipv4 == ipaddr.IPv4Network('1.2.3.4/24'))
- self.assertFalse(self.ipv4 == ipaddr.IPv4Network('1.2.3.4/23'))
- self.assertFalse(self.ipv4 == ipaddr.IPv6Network('::1.2.3.4/24'))
- self.assertFalse(self.ipv4 == '')
- self.assertFalse(self.ipv4 == [])
- self.assertFalse(self.ipv4 == 2)
- self.assertTrue(ipaddr.IPNetwork('1.1.1.1/32') ==
- ipaddr.IPAddress('1.1.1.1'))
- self.assertTrue(ipaddr.IPNetwork('1.1.1.1/24') ==
- ipaddr.IPAddress('1.1.1.1'))
- self.assertFalse(ipaddr.IPNetwork('1.1.1.0/24') ==
- ipaddr.IPAddress('1.1.1.1'))
-
- self.assertTrue(self.ipv6 ==
- ipaddr.IPv6Network('2001:658:22a:cafe:200::1/64'))
- self.assertTrue(ipaddr.IPNetwork('::1/128') ==
- ipaddr.IPAddress('::1'))
- self.assertTrue(ipaddr.IPNetwork('::1/127') ==
- ipaddr.IPAddress('::1'))
- self.assertFalse(ipaddr.IPNetwork('::0/127') ==
- ipaddr.IPAddress('::1'))
- self.assertFalse(self.ipv6 ==
- ipaddr.IPv6Network('2001:658:22a:cafe:200::1/63'))
- self.assertFalse(self.ipv6 == ipaddr.IPv4Network('1.2.3.4/23'))
- self.assertFalse(self.ipv6 == '')
- self.assertFalse(self.ipv6 == [])
- self.assertFalse(self.ipv6 == 2)
-
- def testNotEqual(self):
- self.assertFalse(self.ipv4 != ipaddr.IPv4Network('1.2.3.4/24'))
- self.assertTrue(self.ipv4 != ipaddr.IPv4Network('1.2.3.4/23'))
- self.assertTrue(self.ipv4 != ipaddr.IPv6Network('::1.2.3.4/24'))
- self.assertTrue(self.ipv4 != '')
- self.assertTrue(self.ipv4 != [])
- self.assertTrue(self.ipv4 != 2)
-
- addr2 = ipaddr.IPAddress('2001:658:22a:cafe:200::1')
- self.assertFalse(self.ipv6 !=
- ipaddr.IPv6Network('2001:658:22a:cafe:200::1/64'))
- self.assertTrue(self.ipv6 !=
- ipaddr.IPv6Network('2001:658:22a:cafe:200::1/63'))
- self.assertTrue(self.ipv6 != ipaddr.IPv4Network('1.2.3.4/23'))
- self.assertTrue(self.ipv6 != '')
- self.assertTrue(self.ipv6 != [])
- self.assertTrue(self.ipv6 != 2)
-
- def testSlash32Constructor(self):
- self.assertEqual(str(ipaddr.IPv4Network('1.2.3.4/255.255.255.255')),
- '1.2.3.4/32')
-
- def testSlash128Constructor(self):
- self.assertEqual(str(ipaddr.IPv6Network('::1/128')),
- '::1/128')
-
- def testSlash0Constructor(self):
- self.assertEqual(str(ipaddr.IPv4Network('1.2.3.4/0.0.0.0')),
- '1.2.3.4/0')
-
- def testCollapsing(self):
- # test only IP addresses including some duplicates
- ip1 = ipaddr.IPv4Address('1.1.1.0')
- ip2 = ipaddr.IPv4Address('1.1.1.1')
- ip3 = ipaddr.IPv4Address('1.1.1.2')
- ip4 = ipaddr.IPv4Address('1.1.1.3')
- ip5 = ipaddr.IPv4Address('1.1.1.4')
- ip6 = ipaddr.IPv4Address('1.1.1.0')
- # check that addreses are subsumed properly.
- collapsed = ipaddr.collapse_address_list([ip1, ip2, ip3, ip4, ip5, ip6])
- self.assertEqual(collapsed, [ipaddr.IPv4Network('1.1.1.0/30'),
- ipaddr.IPv4Network('1.1.1.4/32')])
-
- # test a mix of IP addresses and networks including some duplicates
- ip1 = ipaddr.IPv4Address('1.1.1.0')
- ip2 = ipaddr.IPv4Address('1.1.1.1')
- ip3 = ipaddr.IPv4Address('1.1.1.2')
- ip4 = ipaddr.IPv4Address('1.1.1.3')
- ip5 = ipaddr.IPv4Network('1.1.1.4/30')
- ip6 = ipaddr.IPv4Network('1.1.1.4/30')
- # check that addreses are subsumed properly.
- collapsed = ipaddr.collapse_address_list([ip5, ip1, ip2, ip3, ip4, ip6])
- self.assertEqual(collapsed, [ipaddr.IPv4Network('1.1.1.0/29')])
-
- # test only IP networks
- ip1 = ipaddr.IPv4Network('1.1.0.0/24')
- ip2 = ipaddr.IPv4Network('1.1.1.0/24')
- ip3 = ipaddr.IPv4Network('1.1.2.0/24')
- ip4 = ipaddr.IPv4Network('1.1.3.0/24')
- ip5 = ipaddr.IPv4Network('1.1.4.0/24')
- # stored in no particular order b/c we want CollapseAddr to call [].sort
- ip6 = ipaddr.IPv4Network('1.1.0.0/22')
- # check that addreses are subsumed properly.
- collapsed = ipaddr.collapse_address_list([ip1, ip2, ip3, ip4, ip5, ip6])
- self.assertEqual(collapsed, [ipaddr.IPv4Network('1.1.0.0/22'),
- ipaddr.IPv4Network('1.1.4.0/24')])
-
- # test that two addresses are supernet'ed properly
- collapsed = ipaddr.collapse_address_list([ip1, ip2])
- self.assertEqual(collapsed, [ipaddr.IPv4Network('1.1.0.0/23')])
-
- # test same IP networks
- ip_same1 = ip_same2 = ipaddr.IPv4Network('1.1.1.1/32')
- self.assertEqual(ipaddr.collapse_address_list([ip_same1, ip_same2]),
- [ip_same1])
-
- # test same IP addresses
- ip_same1 = ip_same2 = ipaddr.IPv4Address('1.1.1.1')
- self.assertEqual(ipaddr.collapse_address_list([ip_same1, ip_same2]),
- [ipaddr.IPNetwork('1.1.1.1/32')])
- ip1 = ipaddr.IPv6Network('::2001:1/100')
- ip2 = ipaddr.IPv6Network('::2002:1/120')
- ip3 = ipaddr.IPv6Network('::2001:1/96')
- # test that ipv6 addresses are subsumed properly.
- collapsed = ipaddr.collapse_address_list([ip1, ip2, ip3])
- self.assertEqual(collapsed, [ip3])
-
- # the toejam test
- ip1 = ipaddr.IPAddress('1.1.1.1')
- ip2 = ipaddr.IPAddress('::1')
- self.assertRaises(TypeError, ipaddr.collapse_address_list,
- [ip1, ip2])
-
- def testSummarizing(self):
- #ip = ipaddr.IPAddress
- #ipnet = ipaddr.IPNetwork
- summarize = ipaddr.summarize_address_range
- ip1 = ipaddr.IPAddress('1.1.1.0')
- ip2 = ipaddr.IPAddress('1.1.1.255')
- # test a /24 is sumamrized properly
- self.assertEqual(summarize(ip1, ip2)[0], ipaddr.IPNetwork('1.1.1.0/24'))
- # test an IPv4 range that isn't on a network byte boundary
- ip2 = ipaddr.IPAddress('1.1.1.8')
- self.assertEqual(summarize(ip1, ip2), [ipaddr.IPNetwork('1.1.1.0/29'),
- ipaddr.IPNetwork('1.1.1.8')])
-
- ip1 = ipaddr.IPAddress('1::')
- ip2 = ipaddr.IPAddress('1:ffff:ffff:ffff:ffff:ffff:ffff:ffff')
- # test a IPv6 is sumamrized properly
- self.assertEqual(summarize(ip1, ip2)[0], ipaddr.IPNetwork('1::/16'))
- # test an IPv6 range that isn't on a network byte boundary
- ip2 = ipaddr.IPAddress('2::')
- self.assertEqual(summarize(ip1, ip2), [ipaddr.IPNetwork('1::/16'),
- ipaddr.IPNetwork('2::/128')])
-
- # test exception raised when first is greater than last
- self.assertRaises(ValueError, summarize, ipaddr.IPAddress('1.1.1.0'),
- ipaddr.IPAddress('1.1.0.0'))
- # test exception raised when first and last aren't IP addresses
- self.assertRaises(TypeError, summarize,
- ipaddr.IPNetwork('1.1.1.0'),
- ipaddr.IPNetwork('1.1.0.0'))
- self.assertRaises(TypeError, summarize,
- ipaddr.IPNetwork('1.1.1.0'), ipaddr.IPNetwork('1.1.0.0'))
- # test exception raised when first and last are not same version
- self.assertRaises(TypeError, summarize, ipaddr.IPAddress('::'),
- ipaddr.IPNetwork('1.1.0.0'))
-
- def testAddressComparison(self):
- self.assertTrue(ipaddr.IPAddress('1.1.1.1') <=
- ipaddr.IPAddress('1.1.1.1'))
- self.assertTrue(ipaddr.IPAddress('1.1.1.1') <=
- ipaddr.IPAddress('1.1.1.2'))
- self.assertTrue(ipaddr.IPAddress('::1') <= ipaddr.IPAddress('::1'))
- self.assertTrue(ipaddr.IPAddress('::1') <= ipaddr.IPAddress('::2'))
-
- def testNetworkComparison(self):
- # ip1 and ip2 have the same network address
- ip1 = ipaddr.IPv4Network('1.1.1.0/24')
- ip2 = ipaddr.IPv4Network('1.1.1.1/24')
- ip3 = ipaddr.IPv4Network('1.1.2.0/24')
-
- self.assertTrue(ip1 < ip3)
- self.assertTrue(ip3 > ip2)
-
- self.assertEqual(ip1.compare_networks(ip2), 0)
- self.assertTrue(ip1._get_networks_key() == ip2._get_networks_key())
- self.assertEqual(ip1.compare_networks(ip3), -1)
- self.assertTrue(ip1._get_networks_key() < ip3._get_networks_key())
-
- ip1 = ipaddr.IPv6Network('2001::2000/96')
- ip2 = ipaddr.IPv6Network('2001::2001/96')
- ip3 = ipaddr.IPv6Network('2001:ffff::2000/96')
-
- self.assertTrue(ip1 < ip3)
- self.assertTrue(ip3 > ip2)
- self.assertEqual(ip1.compare_networks(ip2), 0)
- self.assertTrue(ip1._get_networks_key() == ip2._get_networks_key())
- self.assertEqual(ip1.compare_networks(ip3), -1)
- self.assertTrue(ip1._get_networks_key() < ip3._get_networks_key())
-
- # Test comparing different protocols.
- # Should always raise a TypeError.
- ipv6 = ipaddr.IPv6Network('::/0')
- ipv4 = ipaddr.IPv4Network('0.0.0.0/0')
- self.assertRaises(TypeError, ipv4.__lt__, ipv6)
- self.assertRaises(TypeError, ipv4.__gt__, ipv6)
- self.assertRaises(TypeError, ipv6.__lt__, ipv4)
- self.assertRaises(TypeError, ipv6.__gt__, ipv4)
-
- # Regression test for issue 19.
- ip1 = ipaddr.IPNetwork('10.1.2.128/25')
- self.assertFalse(ip1 < ip1)
- self.assertFalse(ip1 > ip1)
- ip2 = ipaddr.IPNetwork('10.1.3.0/24')
- self.assertTrue(ip1 < ip2)
- self.assertFalse(ip2 < ip1)
- self.assertFalse(ip1 > ip2)
- self.assertTrue(ip2 > ip1)
- ip3 = ipaddr.IPNetwork('10.1.3.0/25')
- self.assertTrue(ip2 < ip3)
- self.assertFalse(ip3 < ip2)
- self.assertFalse(ip2 > ip3)
- self.assertTrue(ip3 > ip2)
-
- # Regression test for issue 28.
- ip1 = ipaddr.IPNetwork('10.10.10.0/31')
- ip2 = ipaddr.IPNetwork('10.10.10.0')
- ip3 = ipaddr.IPNetwork('10.10.10.2/31')
- ip4 = ipaddr.IPNetwork('10.10.10.2')
- sorted = [ip1, ip2, ip3, ip4]
- unsorted = [ip2, ip4, ip1, ip3]
- unsorted.sort()
- self.assertEqual(sorted, unsorted)
- unsorted = [ip4, ip1, ip3, ip2]
- unsorted.sort()
- self.assertEqual(sorted, unsorted)
- self.assertRaises(TypeError, ip1.__lt__, ipaddr.IPAddress('10.10.10.0'))
- self.assertRaises(TypeError, ip2.__lt__, ipaddr.IPAddress('10.10.10.0'))
-
- # <=, >=
- self.assertTrue(ipaddr.IPNetwork('1.1.1.1') <=
- ipaddr.IPNetwork('1.1.1.1'))
- self.assertTrue(ipaddr.IPNetwork('1.1.1.1') <=
- ipaddr.IPNetwork('1.1.1.2'))
- self.assertFalse(ipaddr.IPNetwork('1.1.1.2') <=
- ipaddr.IPNetwork('1.1.1.1'))
- self.assertTrue(ipaddr.IPNetwork('::1') <= ipaddr.IPNetwork('::1'))
- self.assertTrue(ipaddr.IPNetwork('::1') <= ipaddr.IPNetwork('::2'))
- self.assertFalse(ipaddr.IPNetwork('::2') <= ipaddr.IPNetwork('::1'))
-
- def testStrictNetworks(self):
- self.assertRaises(ValueError, ipaddr.IPNetwork, '192.168.1.1/24',
- strict=True)
- self.assertRaises(ValueError, ipaddr.IPNetwork, '::1/120', strict=True)
-
- def testOverlaps(self):
- other = ipaddr.IPv4Network('1.2.3.0/30')
- other2 = ipaddr.IPv4Network('1.2.2.0/24')
- other3 = ipaddr.IPv4Network('1.2.2.64/26')
- self.assertTrue(self.ipv4.overlaps(other))
- self.assertFalse(self.ipv4.overlaps(other2))
- self.assertTrue(other2.overlaps(other3))
-
- def testEmbeddedIpv4(self):
- ipv4_string = '192.168.0.1'
- ipv4 = ipaddr.IPv4Network(ipv4_string)
- v4compat_ipv6 = ipaddr.IPv6Network('::%s' % ipv4_string)
- self.assertEqual(int(v4compat_ipv6.ip), int(ipv4.ip))
- v4mapped_ipv6 = ipaddr.IPv6Network('::ffff:%s' % ipv4_string)
- self.assertNotEqual(v4mapped_ipv6.ip, ipv4.ip)
- self.assertRaises(ipaddr.AddressValueError, ipaddr.IPv6Network,
- '2001:1.1.1.1:1.1.1.1')
-
- # Issue 67: IPv6 with embedded IPv4 address not recognized.
- def testIPv6AddressTooLarge(self):
- # RFC4291 2.5.5.2
- self.assertEqual(ipaddr.IPAddress('::FFFF:192.0.2.1'),
- ipaddr.IPAddress('::FFFF:c000:201'))
- # RFC4291 2.2 (part 3) x::d.d.d.d
- self.assertEqual(ipaddr.IPAddress('FFFF::192.0.2.1'),
- ipaddr.IPAddress('FFFF::c000:201'))
-
- def testIPVersion(self):
- self.assertEqual(self.ipv4.version, 4)
- self.assertEqual(self.ipv6.version, 6)
-
- def testMaxPrefixLength(self):
- self.assertEqual(self.ipv4.max_prefixlen, 32)
- self.assertEqual(self.ipv6.max_prefixlen, 128)
-
- def testPacked(self):
- self.assertEqual(self.ipv4.packed,
- _cb('\x01\x02\x03\x04'))
- self.assertEqual(ipaddr.IPv4Network('255.254.253.252').packed,
- _cb('\xff\xfe\xfd\xfc'))
- self.assertEqual(self.ipv6.packed,
- _cb('\x20\x01\x06\x58\x02\x2a\xca\xfe'
- '\x02\x00\x00\x00\x00\x00\x00\x01'))
- self.assertEqual(ipaddr.IPv6Network('ffff:2:3:4:ffff::').packed,
- _cb('\xff\xff\x00\x02\x00\x03\x00\x04\xff\xff'
- + '\x00' * 6))
- self.assertEqual(ipaddr.IPv6Network('::1:0:0:0:0').packed,
- _cb('\x00' * 6 + '\x00\x01' + '\x00' * 8))
-
- def testIpStrFromPrefixlen(self):
- ipv4 = ipaddr.IPv4Network('1.2.3.4/24')
- self.assertEqual(ipv4._ip_string_from_prefix(), '255.255.255.0')
- self.assertEqual(ipv4._ip_string_from_prefix(28), '255.255.255.240')
-
- def testIpType(self):
- ipv4net = ipaddr.IPNetwork('1.2.3.4')
- ipv4addr = ipaddr.IPAddress('1.2.3.4')
- ipv6net = ipaddr.IPNetwork('::1.2.3.4')
- ipv6addr = ipaddr.IPAddress('::1.2.3.4')
- self.assertEqual(ipaddr.IPv4Network, type(ipv4net))
- self.assertEqual(ipaddr.IPv4Address, type(ipv4addr))
- self.assertEqual(ipaddr.IPv6Network, type(ipv6net))
- self.assertEqual(ipaddr.IPv6Address, type(ipv6addr))
-
- def testReservedIpv4(self):
- # test networks
- self.assertEqual(True, ipaddr.IPNetwork('224.1.1.1/31').is_multicast)
- self.assertEqual(False, ipaddr.IPNetwork('240.0.0.0').is_multicast)
-
- self.assertEqual(True, ipaddr.IPNetwork('192.168.1.1/17').is_private)
- self.assertEqual(False, ipaddr.IPNetwork('192.169.0.0').is_private)
- self.assertEqual(True, ipaddr.IPNetwork('10.255.255.255').is_private)
- self.assertEqual(False, ipaddr.IPNetwork('11.0.0.0').is_private)
- self.assertEqual(True, ipaddr.IPNetwork('172.31.255.255').is_private)
- self.assertEqual(False, ipaddr.IPNetwork('172.32.0.0').is_private)
-
- self.assertEqual(True,
- ipaddr.IPNetwork('169.254.100.200/24').is_link_local)
- self.assertEqual(False,
- ipaddr.IPNetwork('169.255.100.200/24').is_link_local)
-
- self.assertEqual(True,
- ipaddr.IPNetwork('127.100.200.254/32').is_loopback)
- self.assertEqual(True, ipaddr.IPNetwork('127.42.0.0/16').is_loopback)
- self.assertEqual(False, ipaddr.IPNetwork('128.0.0.0').is_loopback)
-
- # test addresses
- self.assertEqual(True, ipaddr.IPAddress('224.1.1.1').is_multicast)
- self.assertEqual(False, ipaddr.IPAddress('240.0.0.0').is_multicast)
-
- self.assertEqual(True, ipaddr.IPAddress('192.168.1.1').is_private)
- self.assertEqual(False, ipaddr.IPAddress('192.169.0.0').is_private)
- self.assertEqual(True, ipaddr.IPAddress('10.255.255.255').is_private)
- self.assertEqual(False, ipaddr.IPAddress('11.0.0.0').is_private)
- self.assertEqual(True, ipaddr.IPAddress('172.31.255.255').is_private)
- self.assertEqual(False, ipaddr.IPAddress('172.32.0.0').is_private)
-
- self.assertEqual(True,
- ipaddr.IPAddress('169.254.100.200').is_link_local)
- self.assertEqual(False,
- ipaddr.IPAddress('169.255.100.200').is_link_local)
-
- self.assertEqual(True,
- ipaddr.IPAddress('127.100.200.254').is_loopback)
- self.assertEqual(True, ipaddr.IPAddress('127.42.0.0').is_loopback)
- self.assertEqual(False, ipaddr.IPAddress('128.0.0.0').is_loopback)
- self.assertEqual(True, ipaddr.IPNetwork('0.0.0.0').is_unspecified)
-
- def testReservedIpv6(self):
-
- self.assertEqual(True, ipaddr.IPNetwork('ffff::').is_multicast)
- self.assertEqual(True, ipaddr.IPNetwork(2**128-1).is_multicast)
- self.assertEqual(True, ipaddr.IPNetwork('ff00::').is_multicast)
- self.assertEqual(False, ipaddr.IPNetwork('fdff::').is_multicast)
-
- self.assertEqual(True, ipaddr.IPNetwork('fecf::').is_site_local)
- self.assertEqual(True, ipaddr.IPNetwork(
- 'feff:ffff:ffff:ffff::').is_site_local)
- self.assertEqual(False, ipaddr.IPNetwork('fbf:ffff::').is_site_local)
- self.assertEqual(False, ipaddr.IPNetwork('ff00::').is_site_local)
-
- self.assertEqual(True, ipaddr.IPNetwork('fc00::').is_private)
- self.assertEqual(True, ipaddr.IPNetwork(
- 'fc00:ffff:ffff:ffff::').is_private)
- self.assertEqual(False, ipaddr.IPNetwork('fbff:ffff::').is_private)
- self.assertEqual(False, ipaddr.IPNetwork('fe00::').is_private)
-
- self.assertEqual(True, ipaddr.IPNetwork('fea0::').is_link_local)
- self.assertEqual(True, ipaddr.IPNetwork('febf:ffff::').is_link_local)
- self.assertEqual(False, ipaddr.IPNetwork('fe7f:ffff::').is_link_local)
- self.assertEqual(False, ipaddr.IPNetwork('fec0::').is_link_local)
-
- self.assertEqual(True, ipaddr.IPNetwork('0:0::0:01').is_loopback)
- self.assertEqual(False, ipaddr.IPNetwork('::1/127').is_loopback)
- self.assertEqual(False, ipaddr.IPNetwork('::').is_loopback)
- self.assertEqual(False, ipaddr.IPNetwork('::2').is_loopback)
-
- self.assertEqual(True, ipaddr.IPNetwork('0::0').is_unspecified)
- self.assertEqual(False, ipaddr.IPNetwork('::1').is_unspecified)
- self.assertEqual(False, ipaddr.IPNetwork('::/127').is_unspecified)
-
- # test addresses
- self.assertEqual(True, ipaddr.IPAddress('ffff::').is_multicast)
- self.assertEqual(True, ipaddr.IPAddress(2**128-1).is_multicast)
- self.assertEqual(True, ipaddr.IPAddress('ff00::').is_multicast)
- self.assertEqual(False, ipaddr.IPAddress('fdff::').is_multicast)
-
- self.assertEqual(True, ipaddr.IPAddress('fecf::').is_site_local)
- self.assertEqual(True, ipaddr.IPAddress(
- 'feff:ffff:ffff:ffff::').is_site_local)
- self.assertEqual(False, ipaddr.IPAddress('fbf:ffff::').is_site_local)
- self.assertEqual(False, ipaddr.IPAddress('ff00::').is_site_local)
-
- self.assertEqual(True, ipaddr.IPAddress('fc00::').is_private)
- self.assertEqual(True, ipaddr.IPAddress(
- 'fc00:ffff:ffff:ffff::').is_private)
- self.assertEqual(False, ipaddr.IPAddress('fbff:ffff::').is_private)
- self.assertEqual(False, ipaddr.IPAddress('fe00::').is_private)
-
- self.assertEqual(True, ipaddr.IPAddress('fea0::').is_link_local)
- self.assertEqual(True, ipaddr.IPAddress('febf:ffff::').is_link_local)
- self.assertEqual(False, ipaddr.IPAddress('fe7f:ffff::').is_link_local)
- self.assertEqual(False, ipaddr.IPAddress('fec0::').is_link_local)
-
- self.assertEqual(True, ipaddr.IPAddress('0:0::0:01').is_loopback)
- self.assertEqual(True, ipaddr.IPAddress('::1').is_loopback)
- self.assertEqual(False, ipaddr.IPAddress('::2').is_loopback)
-
- self.assertEqual(True, ipaddr.IPAddress('0::0').is_unspecified)
- self.assertEqual(False, ipaddr.IPAddress('::1').is_unspecified)
-
- # some generic IETF reserved addresses
- self.assertEqual(True, ipaddr.IPAddress('100::').is_reserved)
- self.assertEqual(True, ipaddr.IPNetwork('4000::1/128').is_reserved)
-
- def testIpv4Mapped(self):
- self.assertEqual(ipaddr.IPAddress('::ffff:192.168.1.1').ipv4_mapped,
- ipaddr.IPAddress('192.168.1.1'))
- self.assertEqual(ipaddr.IPAddress('::c0a8:101').ipv4_mapped, None)
- self.assertEqual(ipaddr.IPAddress('::ffff:c0a8:101').ipv4_mapped,
- ipaddr.IPAddress('192.168.1.1'))
-
- def testAddrExclude(self):
- addr1 = ipaddr.IPNetwork('10.1.1.0/24')
- addr2 = ipaddr.IPNetwork('10.1.1.0/26')
- addr3 = ipaddr.IPNetwork('10.2.1.0/24')
- addr4 = ipaddr.IPAddress('10.1.1.0')
- self.assertEqual(addr1.address_exclude(addr2),
- [ipaddr.IPNetwork('10.1.1.64/26'),
- ipaddr.IPNetwork('10.1.1.128/25')])
- self.assertRaises(ValueError, addr1.address_exclude, addr3)
- self.assertRaises(TypeError, addr1.address_exclude, addr4)
- self.assertEqual(addr1.address_exclude(addr1), [])
-
- def testHash(self):
- self.assertEqual(hash(ipaddr.IPNetwork('10.1.1.0/24')),
- hash(ipaddr.IPNetwork('10.1.1.0/24')))
- self.assertEqual(hash(ipaddr.IPAddress('10.1.1.0')),
- hash(ipaddr.IPAddress('10.1.1.0')))
- # i70
- self.assertEqual(hash(ipaddr.IPAddress('1.2.3.4')),
- hash(ipaddr.IPAddress(
- long(ipaddr.IPAddress('1.2.3.4')._ip))))
- ip1 = ipaddr.IPAddress('10.1.1.0')
- ip2 = ipaddr.IPAddress('1::')
- dummy = {}
- dummy[self.ipv4] = None
- dummy[self.ipv6] = None
- dummy[ip1] = None
- dummy[ip2] = None
- self.assertTrue(self.ipv4 in dummy)
- self.assertTrue(ip2 in dummy)
-
- def testCopyConstructor(self):
- addr1 = ipaddr.IPNetwork('10.1.1.0/24')
- addr2 = ipaddr.IPNetwork(addr1)
- addr3 = ipaddr.IPNetwork('2001:658:22a:cafe:200::1/64')
- addr4 = ipaddr.IPNetwork(addr3)
- addr5 = ipaddr.IPv4Address('1.1.1.1')
- addr6 = ipaddr.IPv6Address('2001:658:22a:cafe:200::1')
-
- self.assertEqual(addr1, addr2)
- self.assertEqual(addr3, addr4)
- self.assertEqual(addr5, ipaddr.IPv4Address(addr5))
- self.assertEqual(addr6, ipaddr.IPv6Address(addr6))
-
- def testCompressIPv6Address(self):
- test_addresses = {
- '1:2:3:4:5:6:7:8': '1:2:3:4:5:6:7:8/128',
- '2001:0:0:4:0:0:0:8': '2001:0:0:4::8/128',
- '2001:0:0:4:5:6:7:8': '2001::4:5:6:7:8/128',
- '2001:0:3:4:5:6:7:8': '2001:0:3:4:5:6:7:8/128',
- '2001:0:3:4:5:6:7:8': '2001:0:3:4:5:6:7:8/128',
- '0:0:3:0:0:0:0:ffff': '0:0:3::ffff/128',
- '0:0:0:4:0:0:0:ffff': '::4:0:0:0:ffff/128',
- '0:0:0:0:5:0:0:ffff': '::5:0:0:ffff/128',
- '1:0:0:4:0:0:7:8': '1::4:0:0:7:8/128',
- '0:0:0:0:0:0:0:0': '::/128',
- '0:0:0:0:0:0:0:0/0': '::/0',
- '0:0:0:0:0:0:0:1': '::1/128',
- '2001:0658:022a:cafe:0000:0000:0000:0000/66':
- '2001:658:22a:cafe::/66',
- '::1.2.3.4': '::102:304/128',
- '1:2:3:4:5:ffff:1.2.3.4': '1:2:3:4:5:ffff:102:304/128',
- '::7:6:5:4:3:2:1': '0:7:6:5:4:3:2:1/128',
- '::7:6:5:4:3:2:0': '0:7:6:5:4:3:2:0/128',
- '7:6:5:4:3:2:1::': '7:6:5:4:3:2:1:0/128',
- '0:6:5:4:3:2:1::': '0:6:5:4:3:2:1:0/128',
- }
- for uncompressed, compressed in test_addresses.items():
- self.assertEqual(compressed, str(ipaddr.IPv6Network(uncompressed)))
-
- def testExplodeShortHandIpStr(self):
- addr1 = ipaddr.IPv6Network('2001::1')
- addr2 = ipaddr.IPv6Address('2001:0:5ef5:79fd:0:59d:a0e5:ba1')
- self.assertEqual('2001:0000:0000:0000:0000:0000:0000:0001',
- addr1._explode_shorthand_ip_string(str(addr1.ip)))
- self.assertEqual('0000:0000:0000:0000:0000:0000:0000:0001',
- ipaddr.IPv6Network('::1/128').exploded)
- # issue 77
- self.assertEqual('2001:0000:5ef5:79fd:0000:059d:a0e5:0ba1',
- addr2.exploded)
-
- def testIntRepresentation(self):
- self.assertEqual(16909060, int(self.ipv4))
- self.assertEqual(42540616829182469433547762482097946625, int(self.ipv6))
-
- def testHexRepresentation(self):
- self.assertEqual(hex(0x1020304),
- hex(self.ipv4))
-
- self.assertEqual(hex(0x20010658022ACAFE0200000000000001),
- hex(self.ipv6))
-
- # backwards compatibility
- def testBackwardsCompability(self):
- self.assertEqual(ipaddr.CollapseAddrList(
- [ipaddr.IPNetwork('1.1.0.0/24'), ipaddr.IPNetwork('1.1.1.0/24')]),
- [ipaddr.IPNetwork('1.1.0.0/23')])
-
- self.assertEqual(ipaddr.IPNetwork('::42:0/112').AddressExclude(
- ipaddr.IPNetwork('::42:8000/113')),
- [ipaddr.IPNetwork('::42:0/113')])
-
- self.assertTrue(ipaddr.IPNetwork('1::/8').CompareNetworks(
- ipaddr.IPNetwork('2::/9')) < 0)
-
- self.assertEqual(ipaddr.IPNetwork('1::/16').Contains(
- ipaddr.IPNetwork('2::/16')), False)
-
- self.assertEqual(ipaddr.IPNetwork('0.0.0.0/0').Subnet(),
- [ipaddr.IPNetwork('0.0.0.0/1'),
- ipaddr.IPNetwork('128.0.0.0/1')])
- self.assertEqual(ipaddr.IPNetwork('::/127').Subnet(),
- [ipaddr.IPNetwork('::/128'),
- ipaddr.IPNetwork('::1/128')])
-
- self.assertEqual(ipaddr.IPNetwork('1.0.0.0/32').Supernet(),
- ipaddr.IPNetwork('1.0.0.0/31'))
- self.assertEqual(ipaddr.IPNetwork('::/121').Supernet(),
- ipaddr.IPNetwork('::/120'))
-
- self.assertEqual(ipaddr.IPNetwork('10.0.0.2').IsRFC1918(), True)
- self.assertEqual(ipaddr.IPNetwork('10.0.0.0').IsMulticast(), False)
- self.assertEqual(ipaddr.IPNetwork('127.255.255.255').IsLoopback(), True)
- self.assertEqual(ipaddr.IPNetwork('169.255.255.255').IsLinkLocal(),
- False)
-
- def testForceVersion(self):
- self.assertEqual(ipaddr.IPNetwork(1).version, 4)
- self.assertEqual(ipaddr.IPNetwork(1, version=6).version, 6)
-
- def testWithStar(self):
- self.assertEqual(str(self.ipv4.with_prefixlen), "1.2.3.4/24")
- self.assertEqual(str(self.ipv4.with_netmask), "1.2.3.4/255.255.255.0")
- self.assertEqual(str(self.ipv4.with_hostmask), "1.2.3.4/0.0.0.255")
-
- self.assertEqual(str(self.ipv6.with_prefixlen),
- '2001:658:22a:cafe:200::1/64')
- # rfc3513 sec 2.3 says that ipv6 only uses cidr notation for
- # subnets
- self.assertEqual(str(self.ipv6.with_netmask),
- '2001:658:22a:cafe:200::1/64')
- # this probably don't make much sense, but it's included for
- # compatibility with ipv4
- self.assertEqual(str(self.ipv6.with_hostmask),
- '2001:658:22a:cafe:200::1/::ffff:ffff:ffff:ffff')
-
- def testNetworkElementCaching(self):
- # V4 - make sure we're empty
- self.assertFalse(self.ipv4._cache.has_key('network'))
- self.assertFalse(self.ipv4._cache.has_key('broadcast'))
- self.assertFalse(self.ipv4._cache.has_key('hostmask'))
-
- # V4 - populate and test
- self.assertEqual(self.ipv4.network, ipaddr.IPv4Address('1.2.3.0'))
- self.assertEqual(self.ipv4.broadcast, ipaddr.IPv4Address('1.2.3.255'))
- self.assertEqual(self.ipv4.hostmask, ipaddr.IPv4Address('0.0.0.255'))
-
- # V4 - check we're cached
- self.assertTrue(self.ipv4._cache.has_key('network'))
- self.assertTrue(self.ipv4._cache.has_key('broadcast'))
- self.assertTrue(self.ipv4._cache.has_key('hostmask'))
-
- # V6 - make sure we're empty
- self.assertFalse(self.ipv6._cache.has_key('network'))
- self.assertFalse(self.ipv6._cache.has_key('broadcast'))
- self.assertFalse(self.ipv6._cache.has_key('hostmask'))
-
- # V6 - populate and test
- self.assertEqual(self.ipv6.network,
- ipaddr.IPv6Address('2001:658:22a:cafe::'))
- self.assertEqual(self.ipv6.broadcast, ipaddr.IPv6Address(
- '2001:658:22a:cafe:ffff:ffff:ffff:ffff'))
- self.assertEqual(self.ipv6.hostmask,
- ipaddr.IPv6Address('::ffff:ffff:ffff:ffff'))
-
- # V6 - check we're cached
- self.assertTrue(self.ipv6._cache.has_key('network'))
- self.assertTrue(self.ipv6._cache.has_key('broadcast'))
- self.assertTrue(self.ipv6._cache.has_key('hostmask'))
-
- def testTeredo(self):
- # stolen from wikipedia
- server = ipaddr.IPv4Address('65.54.227.120')
- client = ipaddr.IPv4Address('192.0.2.45')
- teredo_addr = '2001:0000:4136:e378:8000:63bf:3fff:fdd2'
- self.assertEqual((server, client),
- ipaddr.IPAddress(teredo_addr).teredo)
- bad_addr = '2000::4136:e378:8000:63bf:3fff:fdd2'
- self.assertFalse(ipaddr.IPAddress(bad_addr).teredo)
- bad_addr = '2001:0001:4136:e378:8000:63bf:3fff:fdd2'
- self.assertFalse(ipaddr.IPAddress(bad_addr).teredo)
-
- # i77
- teredo_addr = ipaddr.IPv6Address('2001:0:5ef5:79fd:0:59d:a0e5:ba1')
- self.assertEqual((ipaddr.IPv4Address('94.245.121.253'),
- ipaddr.IPv4Address('95.26.244.94')),
- teredo_addr.teredo)
-
-
- def testsixtofour(self):
- sixtofouraddr = ipaddr.IPAddress('2002:ac1d:2d64::1')
- bad_addr = ipaddr.IPAddress('2000:ac1d:2d64::1')
- self.assertEqual(ipaddr.IPv4Address('172.29.45.100'),
- sixtofouraddr.sixtofour)
- self.assertFalse(bad_addr.sixtofour)
-
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/contrib/ipaddr-py/setup.py b/contrib/ipaddr-py/setup.py
deleted file mode 100755
index 33564320e45..00000000000
--- a/contrib/ipaddr-py/setup.py
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/usr/bin/python
-#
-# Copyright 2008 Google Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from distutils.core import setup
-
-import ipaddr
-
-
-setup(name='ipaddr',
- maintainer='Google',
- maintainer_email='ipaddr-py-dev@googlegroups.com',
- version=ipaddr.__version__,
- url='http://code.google.com/p/ipaddr-py/',
- license='Apache License, Version 2.0',
- classifiers=[
- 'Development Status :: 5 - Production/Stable',
- 'Intended Audience :: Developers',
- 'License :: OSI Approved :: Apache Software License',
- 'Operating System :: OS Independent',
- 'Topic :: Internet',
- 'Topic :: Software Development :: Libraries',
- 'Topic :: System :: Networking'],
- py_modules=['ipaddr'])
diff --git a/contrib/ipaddr-py/test-2to3.sh b/contrib/ipaddr-py/test-2to3.sh
deleted file mode 100755
index 408d665bcc2..00000000000
--- a/contrib/ipaddr-py/test-2to3.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/sh
-
-# Converts the python2 ipaddr files to python3 and runs the unit tests
-# with both python versions.
-
-mkdir -p 2to3output && \
-cp -f *.py 2to3output && \
-( cd 2to3output && 2to3 . | patch -p0 ) && \
-py3version=$(python3 --version 2>&1) && \
-echo -e "\nTesting with ${py3version}" && \
-python3 2to3output/ipaddr_test.py && \
-rm -r 2to3output && \
-pyversion=$(python --version 2>&1) && \
-echo -e "\nTesting with ${pyversion}" && \
-./ipaddr_test.py
diff --git a/contrib/libexecinfo/execinfo.c b/contrib/libexecinfo/execinfo.c
new file mode 100644
index 00000000000..03500257788
--- /dev/null
+++ b/contrib/libexecinfo/execinfo.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright (c) 2003 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: execinfo.c,v 1.3 2004/07/19 05:21:09 sobomax Exp $
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef HAVE_BACKTRACE
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <dlfcn.h>
+#include <math.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stddef.h>
+
+#include "execinfo_compat.h"
+
+#define D10(x) ceil(log10(((x) == 0) ? 2 : ((x) + 1)))
+
+static void *
+getreturnaddr(int level)
+{
+ switch (level) {
+ case 0: return __builtin_return_address(1);
+ case 1: return __builtin_return_address(2);
+ case 2: return __builtin_return_address(3);
+ case 3: return __builtin_return_address(4);
+ case 4: return __builtin_return_address(5);
+ case 5: return __builtin_return_address(6);
+ case 6: return __builtin_return_address(7);
+ case 7: return __builtin_return_address(8);
+ case 8: return __builtin_return_address(9);
+ case 9: return __builtin_return_address(10);
+ case 10: return __builtin_return_address(11);
+ case 11: return __builtin_return_address(12);
+ case 12: return __builtin_return_address(13);
+ case 13: return __builtin_return_address(14);
+ case 14: return __builtin_return_address(15);
+ case 15: return __builtin_return_address(16);
+ case 16: return __builtin_return_address(17);
+ case 17: return __builtin_return_address(18);
+ case 18: return __builtin_return_address(19);
+ case 19: return __builtin_return_address(20);
+ case 20: return __builtin_return_address(21);
+ case 21: return __builtin_return_address(22);
+ case 22: return __builtin_return_address(23);
+ case 23: return __builtin_return_address(24);
+ case 24: return __builtin_return_address(25);
+ case 25: return __builtin_return_address(26);
+ case 26: return __builtin_return_address(27);
+ case 27: return __builtin_return_address(28);
+ case 28: return __builtin_return_address(29);
+ case 29: return __builtin_return_address(30);
+ case 30: return __builtin_return_address(31);
+ case 31: return __builtin_return_address(32);
+ case 32: return __builtin_return_address(33);
+ case 33: return __builtin_return_address(34);
+ case 34: return __builtin_return_address(35);
+ case 35: return __builtin_return_address(36);
+ case 36: return __builtin_return_address(37);
+ case 37: return __builtin_return_address(38);
+ case 38: return __builtin_return_address(39);
+ case 39: return __builtin_return_address(40);
+ case 40: return __builtin_return_address(41);
+ case 41: return __builtin_return_address(42);
+ case 42: return __builtin_return_address(43);
+ case 43: return __builtin_return_address(44);
+ case 44: return __builtin_return_address(45);
+ case 45: return __builtin_return_address(46);
+ case 46: return __builtin_return_address(47);
+ case 47: return __builtin_return_address(48);
+ case 48: return __builtin_return_address(49);
+ case 49: return __builtin_return_address(50);
+ case 50: return __builtin_return_address(51);
+ case 51: return __builtin_return_address(52);
+ case 52: return __builtin_return_address(53);
+ case 53: return __builtin_return_address(54);
+ case 54: return __builtin_return_address(55);
+ case 55: return __builtin_return_address(56);
+ case 56: return __builtin_return_address(57);
+ case 57: return __builtin_return_address(58);
+ case 58: return __builtin_return_address(59);
+ case 59: return __builtin_return_address(60);
+ case 60: return __builtin_return_address(61);
+ case 61: return __builtin_return_address(62);
+ case 62: return __builtin_return_address(63);
+ case 63: return __builtin_return_address(64);
+ case 64: return __builtin_return_address(65);
+ case 65: return __builtin_return_address(66);
+ case 66: return __builtin_return_address(67);
+ case 67: return __builtin_return_address(68);
+ case 68: return __builtin_return_address(69);
+ case 69: return __builtin_return_address(70);
+ case 70: return __builtin_return_address(71);
+ case 71: return __builtin_return_address(72);
+ case 72: return __builtin_return_address(73);
+ case 73: return __builtin_return_address(74);
+ case 74: return __builtin_return_address(75);
+ case 75: return __builtin_return_address(76);
+ case 76: return __builtin_return_address(77);
+ case 77: return __builtin_return_address(78);
+ case 78: return __builtin_return_address(79);
+ case 79: return __builtin_return_address(80);
+ case 80: return __builtin_return_address(81);
+ case 81: return __builtin_return_address(82);
+ case 82: return __builtin_return_address(83);
+ case 83: return __builtin_return_address(84);
+ case 84: return __builtin_return_address(85);
+ case 85: return __builtin_return_address(86);
+ case 86: return __builtin_return_address(87);
+ case 87: return __builtin_return_address(88);
+ case 88: return __builtin_return_address(89);
+ case 89: return __builtin_return_address(90);
+ case 90: return __builtin_return_address(91);
+ case 91: return __builtin_return_address(92);
+ case 92: return __builtin_return_address(93);
+ case 93: return __builtin_return_address(94);
+ case 94: return __builtin_return_address(95);
+ case 95: return __builtin_return_address(96);
+ case 96: return __builtin_return_address(97);
+ case 97: return __builtin_return_address(98);
+ case 98: return __builtin_return_address(99);
+ case 99: return __builtin_return_address(100);
+ case 100: return __builtin_return_address(101);
+ case 101: return __builtin_return_address(102);
+ case 102: return __builtin_return_address(103);
+ case 103: return __builtin_return_address(104);
+ case 104: return __builtin_return_address(105);
+ case 105: return __builtin_return_address(106);
+ case 106: return __builtin_return_address(107);
+ case 107: return __builtin_return_address(108);
+ case 108: return __builtin_return_address(109);
+ case 109: return __builtin_return_address(110);
+ case 110: return __builtin_return_address(111);
+ case 111: return __builtin_return_address(112);
+ case 112: return __builtin_return_address(113);
+ case 113: return __builtin_return_address(114);
+ case 114: return __builtin_return_address(115);
+ case 115: return __builtin_return_address(116);
+ case 116: return __builtin_return_address(117);
+ case 117: return __builtin_return_address(118);
+ case 118: return __builtin_return_address(119);
+ case 119: return __builtin_return_address(120);
+ case 120: return __builtin_return_address(121);
+ case 121: return __builtin_return_address(122);
+ case 122: return __builtin_return_address(123);
+ case 123: return __builtin_return_address(124);
+ case 124: return __builtin_return_address(125);
+ case 125: return __builtin_return_address(126);
+ case 126: return __builtin_return_address(127);
+ case 127: return __builtin_return_address(128);
+ default: return NULL;
+ }
+}
+
+static void *
+getframeaddr(int level)
+{
+
+ switch (level) {
+ case 0: return __builtin_frame_address(1);
+ case 1: return __builtin_frame_address(2);
+ case 2: return __builtin_frame_address(3);
+ case 3: return __builtin_frame_address(4);
+ case 4: return __builtin_frame_address(5);
+ case 5: return __builtin_frame_address(6);
+ case 6: return __builtin_frame_address(7);
+ case 7: return __builtin_frame_address(8);
+ case 8: return __builtin_frame_address(9);
+ case 9: return __builtin_frame_address(10);
+ case 10: return __builtin_frame_address(11);
+ case 11: return __builtin_frame_address(12);
+ case 12: return __builtin_frame_address(13);
+ case 13: return __builtin_frame_address(14);
+ case 14: return __builtin_frame_address(15);
+ case 15: return __builtin_frame_address(16);
+ case 16: return __builtin_frame_address(17);
+ case 17: return __builtin_frame_address(18);
+ case 18: return __builtin_frame_address(19);
+ case 19: return __builtin_frame_address(20);
+ case 20: return __builtin_frame_address(21);
+ case 21: return __builtin_frame_address(22);
+ case 22: return __builtin_frame_address(23);
+ case 23: return __builtin_frame_address(24);
+ case 24: return __builtin_frame_address(25);
+ case 25: return __builtin_frame_address(26);
+ case 26: return __builtin_frame_address(27);
+ case 27: return __builtin_frame_address(28);
+ case 28: return __builtin_frame_address(29);
+ case 29: return __builtin_frame_address(30);
+ case 30: return __builtin_frame_address(31);
+ case 31: return __builtin_frame_address(32);
+ case 32: return __builtin_frame_address(33);
+ case 33: return __builtin_frame_address(34);
+ case 34: return __builtin_frame_address(35);
+ case 35: return __builtin_frame_address(36);
+ case 36: return __builtin_frame_address(37);
+ case 37: return __builtin_frame_address(38);
+ case 38: return __builtin_frame_address(39);
+ case 39: return __builtin_frame_address(40);
+ case 40: return __builtin_frame_address(41);
+ case 41: return __builtin_frame_address(42);
+ case 42: return __builtin_frame_address(43);
+ case 43: return __builtin_frame_address(44);
+ case 44: return __builtin_frame_address(45);
+ case 45: return __builtin_frame_address(46);
+ case 46: return __builtin_frame_address(47);
+ case 47: return __builtin_frame_address(48);
+ case 48: return __builtin_frame_address(49);
+ case 49: return __builtin_frame_address(50);
+ case 50: return __builtin_frame_address(51);
+ case 51: return __builtin_frame_address(52);
+ case 52: return __builtin_frame_address(53);
+ case 53: return __builtin_frame_address(54);
+ case 54: return __builtin_frame_address(55);
+ case 55: return __builtin_frame_address(56);
+ case 56: return __builtin_frame_address(57);
+ case 57: return __builtin_frame_address(58);
+ case 58: return __builtin_frame_address(59);
+ case 59: return __builtin_frame_address(60);
+ case 60: return __builtin_frame_address(61);
+ case 61: return __builtin_frame_address(62);
+ case 62: return __builtin_frame_address(63);
+ case 63: return __builtin_frame_address(64);
+ case 64: return __builtin_frame_address(65);
+ case 65: return __builtin_frame_address(66);
+ case 66: return __builtin_frame_address(67);
+ case 67: return __builtin_frame_address(68);
+ case 68: return __builtin_frame_address(69);
+ case 69: return __builtin_frame_address(70);
+ case 70: return __builtin_frame_address(71);
+ case 71: return __builtin_frame_address(72);
+ case 72: return __builtin_frame_address(73);
+ case 73: return __builtin_frame_address(74);
+ case 74: return __builtin_frame_address(75);
+ case 75: return __builtin_frame_address(76);
+ case 76: return __builtin_frame_address(77);
+ case 77: return __builtin_frame_address(78);
+ case 78: return __builtin_frame_address(79);
+ case 79: return __builtin_frame_address(80);
+ case 80: return __builtin_frame_address(81);
+ case 81: return __builtin_frame_address(82);
+ case 82: return __builtin_frame_address(83);
+ case 83: return __builtin_frame_address(84);
+ case 84: return __builtin_frame_address(85);
+ case 85: return __builtin_frame_address(86);
+ case 86: return __builtin_frame_address(87);
+ case 87: return __builtin_frame_address(88);
+ case 88: return __builtin_frame_address(89);
+ case 89: return __builtin_frame_address(90);
+ case 90: return __builtin_frame_address(91);
+ case 91: return __builtin_frame_address(92);
+ case 92: return __builtin_frame_address(93);
+ case 93: return __builtin_frame_address(94);
+ case 94: return __builtin_frame_address(95);
+ case 95: return __builtin_frame_address(96);
+ case 96: return __builtin_frame_address(97);
+ case 97: return __builtin_frame_address(98);
+ case 98: return __builtin_frame_address(99);
+ case 99: return __builtin_frame_address(100);
+ case 100: return __builtin_frame_address(101);
+ case 101: return __builtin_frame_address(102);
+ case 102: return __builtin_frame_address(103);
+ case 103: return __builtin_frame_address(104);
+ case 104: return __builtin_frame_address(105);
+ case 105: return __builtin_frame_address(106);
+ case 106: return __builtin_frame_address(107);
+ case 107: return __builtin_frame_address(108);
+ case 108: return __builtin_frame_address(109);
+ case 109: return __builtin_frame_address(110);
+ case 110: return __builtin_frame_address(111);
+ case 111: return __builtin_frame_address(112);
+ case 112: return __builtin_frame_address(113);
+ case 113: return __builtin_frame_address(114);
+ case 114: return __builtin_frame_address(115);
+ case 115: return __builtin_frame_address(116);
+ case 116: return __builtin_frame_address(117);
+ case 117: return __builtin_frame_address(118);
+ case 118: return __builtin_frame_address(119);
+ case 119: return __builtin_frame_address(120);
+ case 120: return __builtin_frame_address(121);
+ case 121: return __builtin_frame_address(122);
+ case 122: return __builtin_frame_address(123);
+ case 123: return __builtin_frame_address(124);
+ case 124: return __builtin_frame_address(125);
+ case 125: return __builtin_frame_address(126);
+ case 126: return __builtin_frame_address(127);
+ case 127: return __builtin_frame_address(128);
+ default: return NULL;
+ }
+}
+
+static inline void *
+realloc_safe(void *ptr, size_t size)
+{
+ void *nptr;
+
+ nptr = realloc (ptr, size);
+ if (nptr == NULL)
+ free (ptr);
+ return nptr;
+}
+
+int
+backtrace(void **buffer, int size)
+{
+ int i;
+
+ for (i = 1; getframeaddr(i + 1) != NULL && i != size + 1; i++) {
+ buffer[i - 1] = getreturnaddr(i);
+ if (buffer[i - 1] == NULL)
+ break;
+ }
+ return i - 1;
+}
+
+char **
+backtrace_symbols(void *const *buffer, int size)
+{
+ size_t clen, alen;
+ int i, offset;
+ char **rval;
+ Dl_info info;
+
+ clen = size * sizeof(char *);
+ rval = malloc(clen);
+ if (rval == NULL)
+ return NULL;
+ for (i = 0; i < size; i++) {
+ if (dladdr(buffer[i], &info) != 0) {
+ if (info.dli_sname == NULL)
+ info.dli_sname = "???";
+ if (info.dli_saddr == NULL)
+ info.dli_saddr = buffer[i];
+ offset = buffer[i] - info.dli_saddr;
+ /* "0x01234567 <function+offset> at filename" */
+ alen = 2 + /* "0x" */
+ (sizeof(void *) * 2) + /* "01234567" */
+ 2 + /* " <" */
+ strlen(info.dli_sname) + /* "function" */
+ 1 + /* "+" */
+ 10 + /* "offset */
+ 5 + /* "> at " */
+ strlen(info.dli_fname) + /* "filename" */
+ 1; /* "\0" */
+ rval = realloc_safe(rval, clen + alen);
+ if (rval == NULL)
+ return NULL;
+ snprintf((char *) rval + clen, alen, "%p <%s+%d> at %s",
+ buffer[i], info.dli_sname, offset, info.dli_fname);
+ } else {
+ alen = 2 + /* "0x" */
+ (sizeof(void *) * 2) + /* "01234567" */
+ 1; /* "\0" */
+ rval = realloc_safe(rval, clen + alen);
+ if (rval == NULL)
+ return NULL;
+ snprintf((char *) rval + clen, alen, "%p", buffer[i]);
+ }
+ rval[i] = (char *) clen;
+ clen += alen;
+ }
+
+ for (i = 0; i < size; i++)
+ rval[i] += (long) rval;
+
+ return rval;
+}
+
+void
+backtrace_symbols_fd(void *const *buffer, int size, int fd)
+{
+ int i, len, offset;
+ char *buf;
+ Dl_info info;
+
+ for (i = 0; i < size; i++) {
+ if (dladdr(buffer[i], &info) != 0) {
+ if (info.dli_sname == NULL)
+ info.dli_sname = "???";
+ if (info.dli_saddr == NULL)
+ info.dli_saddr = buffer[i];
+ offset = buffer[i] - info.dli_saddr;
+ /* "0x01234567 <function+offset> at filename" */
+ len = 2 + /* "0x" */
+ (sizeof(void *) * 2) + /* "01234567" */
+ 2 + /* " <" */
+ strlen(info.dli_sname) + /* "function" */
+ 1 + /* "+" */
+ D10(offset) + /* "offset */
+ 5 + /* "> at " */
+ strlen(info.dli_fname) + /* "filename" */
+ 2; /* "\n\0" */
+ buf = alloca(len);
+ if (buf == NULL)
+ return;
+ snprintf(buf, len, "%p <%s+%d> at %s\n",
+ buffer[i], info.dli_sname, offset, info.dli_fname);
+ } else {
+ len = 2 + /* "0x" */
+ (sizeof(void *) * 2) + /* "01234567" */
+ 2; /* "\n\0" */
+ buf = alloca(len);
+ if (buf == NULL)
+ return;
+ snprintf(buf, len, "%p\n", buffer[i]);
+ }
+ if (write(fd, buf, strlen(buf)) == -1)
+ return;
+ }
+}
+#endif
diff --git a/contrib/libexecinfo/execinfo_compat.h b/contrib/libexecinfo/execinfo_compat.h
new file mode 100644
index 00000000000..ae84cfb1f35
--- /dev/null
+++ b/contrib/libexecinfo/execinfo_compat.h
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2003 Maxim Sobolev <sobomax@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $Id: execinfo.h,v 1.2 2004/07/19 05:20:29 sobomax Exp $
+ */
+
+#ifndef _EXECINFO_H_
+#define _EXECINFO_H_
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#ifndef HAVE_BACKTRACE
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern int backtrace(void **, int);
+extern char **backtrace_symbols(void *const *, int);
+extern void backtrace_symbols_fd(void *const *, int, int);
+
+#ifdef __cplusplus
+}
+#endif
+#endif
+
+#endif /* _EXECINFO_H_ */
diff --git a/contrib/libgen/basename_r.c b/contrib/libgen/basename_r.c
index e3fae60d1fe..2c3a87afe1c 100644
--- a/contrib/libgen/basename_r.c
+++ b/contrib/libgen/basename_r.c
@@ -1,7 +1,10 @@
/*
- * borrowed from glibc-2.12.1/string/basename.c
+ * borrowed from glibc-2.12.1/string/basename.c
* Modified to return "." for NULL or "", as required for SUSv2.
*/
+#include <string.h>
+#include <stdlib.h>
+#ifdef THREAD_UNSAFE_BASENAME
/* Return the name-within-directory of a file name.
Copyright (C) 1996,97,98,2002 Free Software Foundation, Inc.
@@ -34,3 +37,4 @@ basename_r (filename)
p = strrchr (filename, '/');
return p ? p + 1 : (char *) filename;
}
+#endif /* THREAD_UNSAFE_BASENAME */
diff --git a/contrib/libgen/dirname_r.c b/contrib/libgen/dirname_r.c
index 78fe0ee8cbe..131cbcf2a96 100644
--- a/contrib/libgen/dirname_r.c
+++ b/contrib/libgen/dirname_r.c
@@ -4,6 +4,9 @@
* Removed code for long bigger than 32 bytes, renamed __ptr_t as void *
* changed reg_char type to char.
*/
+#include <string.h>
+#include <stdlib.h>
+#ifdef THREAD_UNSAFE_DIRNAME
/* memrchr -- find the last occurrence of a byte in a memory block
Copyright (C) 1991, 93, 96, 97, 99, 2000 Free Software Foundation, Inc.
@@ -237,3 +240,4 @@ dirname_r (char *path)
return path;
}
+#endif /* THREAD_UNSAFE_DIRNAME */
diff --git a/contrib/macfuse/fuse_param.h b/contrib/macfuse/fuse_param.h
index 81d753c6cd7..347db9464bc 100644
--- a/contrib/macfuse/fuse_param.h
+++ b/contrib/macfuse/fuse_param.h
@@ -1,4 +1,9 @@
/*
+ * 'rebel' branch modifications:
+ * Copyright (C) 2010 Tuxera. All Rights Reserved.
+ */
+
+/*
* Copyright (C) 2006-2008 Google. All Rights Reserved.
* Amit Singh <singh@>
*/
@@ -6,69 +11,81 @@
#ifndef _FUSE_PARAM_H_
#define _FUSE_PARAM_H_
-/* Compile-time tunables (M_MACFUSE*) */
-
-#define M_MACFUSE_ENABLE_FIFOFS 0
-#define M_MACFUSE_ENABLE_INTERRUPT 1
-#define M_MACFUSE_ENABLE_SPECFS 0
-#define M_MACFUSE_ENABLE_TSLOCKING 0
-#define M_MACFUSE_ENABLE_UNSUPPORTED 1
-#define M_MACFUSE_ENABLE_XATTR 1
-
-#if M_MACFUSE_ENABLE_UNSUPPORTED
- #define M_MACFUSE_ENABLE_DSELECT 0
- #define M_MACFUSE_ENABLE_EXCHANGE 1
- #define M_MACFUSE_ENABLE_KQUEUE 1
- #define M_MACFUSE_ENABLE_KUNC 0
-#if __LP64__
- #define M_MACFUSE_ENABLE_INTERIM_FSNODE_LOCK 1
-#endif /* __LP64__ */
-#endif /* M_MACFUSE_ENABLE_UNSUPPORTED */
-
-#if M_MACFUSE_ENABLE_INTERIM_FSNODE_LOCK
-#define FUSE_VNOP_EXPORT __private_extern__
+#include <AvailabilityMacros.h>
+
+/* Compile-time tunables (M_OSXFUSE*) */
+
+#define M_OSXFUSE_ENABLE_FIFOFS 0
+#define M_OSXFUSE_ENABLE_INTERRUPT 1
+#define M_OSXFUSE_ENABLE_SPECFS 0
+#define M_OSXFUSE_ENABLE_TSLOCKING 1
+#define M_OSXFUSE_ENABLE_UNSUPPORTED 1
+#define M_OSXFUSE_ENABLE_XATTR 1
+#define M_OSXFUSE_ENABLE_DSELECT 1
+
+#if M_OSXFUSE_ENABLE_UNSUPPORTED
+# define M_OSXFUSE_ENABLE_EXCHANGE 1
+# define M_OSXFUSE_ENABLE_KUNC 0
+# define M_OSXFUSE_ENABLE_INTERIM_FSNODE_LOCK 1
+#endif /* M_OSXFUSE_ENABLE_UNSUPPORTED */
+
+#if MAC_OS_X_VERSION_MIN_REQUIRED < 1060
+# if M_OSXFUSE_ENABLE_UNSUPPORTED
+ /*
+ * In Mac OS X 10.5 the file system implementation is responsible for
+ * posting kqueue events. Starting with Mac OS X 10.6 VFS took over that
+ * job.
+ */
+# define M_OSXFUSE_ENABLE_KQUEUE 1
+# endif
+#endif /* MAC_OS_X_VERSION_MIN_REQUIRED < 1060 */
+
+#if M_OSXFUSE_ENABLE_INTERIM_FSNODE_LOCK
+# define M_OSXFUSE_ENABLE_HUGE_LOCK 0
+# define M_OSXFUSE_ENABLE_LOCK_LOGGING 0
+# define FUSE_VNOP_EXPORT __private_extern__
#else
-#define FUSE_VNOP_EXPORT static
-#endif /* M_MACFUSE_ENABLE_INTERIM_FSNODE_LOCK */
+# define FUSE_VNOP_EXPORT static
+#endif /* M_OSXFUSE_ENABLE_INTERIM_FSNODE_LOCK */
/* User Control */
-#define MACFUSE_POSTUNMOUNT_SIGNAL SIGKILL
+#define OSXFUSE_POSTUNMOUNT_SIGNAL SIGKILL
#define MACOSX_ADMIN_GROUP_NAME "admin"
-#define SYSCTL_MACFUSE_TUNABLES_ADMIN "macfuse.tunables.admin_group"
-#define SYSCTL_MACFUSE_VERSION_NUMBER "macfuse.version.number"
+#define SYSCTL_OSXFUSE_TUNABLES_ADMIN "osxfuse.tunables.admin_group"
+#define SYSCTL_OSXFUSE_VERSION_NUMBER "osxfuse.version.number"
/* Paths */
-#define MACFUSE_BUNDLE_PATH "/Library/Filesystems/fusefs.fs"
-#define MACFUSE_KEXT MACFUSE_BUNDLE_PATH "/Support/fusefs.kext"
-#define MACFUSE_LOAD_PROG MACFUSE_BUNDLE_PATH "/Support/load_fusefs"
-#define MACFUSE_MOUNT_PROG MACFUSE_BUNDLE_PATH "/Support/mount_fusefs"
+#define OSXFUSE_BUNDLE_PATH "/Library/Filesystems/osxfusefs.fs"
+#define OSXFUSE_KEXT OSXFUSE_BUNDLE_PATH "/Support/osxfusefs.kext"
+#define OSXFUSE_LOAD_PROG OSXFUSE_BUNDLE_PATH "/Support/load_osxfusefs"
+#define OSXFUSE_MOUNT_PROG OSXFUSE_BUNDLE_PATH "/Support/mount_osxfusefs"
#define SYSTEM_KEXTLOAD "/sbin/kextload"
#define SYSTEM_KEXTUNLOAD "/sbin/kextunload"
/* Compatible API version */
-#define MACFUSE_MIN_USER_VERSION_MAJOR 7
-#define MACFUSE_MIN_USER_VERSION_MINOR 5
+#define OSXFUSE_MIN_USER_VERSION_MAJOR 7
+#define OSXFUSE_MIN_USER_VERSION_MINOR 5
/* Device Interface */
/*
- * This is the prefix ("fuse" by default) of the name of a FUSE device node
- * in devfs. The suffix is the device number. "/dev/fuse0" is the first FUSE
+ * This is the prefix ("osxfuse" by default) of the name of a FUSE device node
+ * in devfs. The suffix is the device number. "/dev/osxfuse0" is the first FUSE
* device by default. If you change the prefix from the default to something
* else, the user-space FUSE library will need to know about it too.
*/
-#define MACFUSE_DEVICE_BASENAME "fuse"
+#define OSXFUSE_DEVICE_BASENAME "osxfuse"
/*
- * This is the number of /dev/fuse<n> nodes we will create. <n> goes from
- * 0 to (FUSE_NDEVICES - 1).
+ * This is the number of /dev/osxfuse<n> nodes we will create. <n> goes from
+ * 0 to (OSXFUSE_NDEVICES - 1).
*/
-#define MACFUSE_NDEVICES 24
+#define OSXFUSE_NDEVICES 24
/*
* This is the default block size of the virtual storage devices that are
@@ -131,13 +148,13 @@
/* User-Kernel IPC Buffer */
#define FUSE_MIN_USERKERNEL_BUFSIZE (128 * 1024)
-#define FUSE_MAX_USERKERNEL_BUFSIZE (4096 * 1024)
+#define FUSE_MAX_USERKERNEL_BUFSIZE (16 * 1024 * 1024)
#define FUSE_REASONABLE_XATTRSIZE FUSE_MIN_USERKERNEL_BUFSIZE
#endif /* KERNEL */
-#define FUSE_DEFAULT_USERKERNEL_BUFSIZE (4096 * 1024)
+#define FUSE_DEFAULT_USERKERNEL_BUFSIZE (16 * 1024 * 1024)
#define FUSE_LINK_MAX LINK_MAX
#define FUSE_UIO_BACKUP_MAX 8
diff --git a/contrib/macfuse/mount_darwin.c b/contrib/macfuse/mount_darwin.c
index c485583e96b..d1d1c34e761 100644
--- a/contrib/macfuse/mount_darwin.c
+++ b/contrib/macfuse/mount_darwin.c
@@ -1,5 +1,5 @@
/*
- * Derived from mount_bsd.c from the fuse distribution.
+ * Derived from mount_bsd.c from the fuse distribution.
*
* FUSE: Filesystem in Userspace
* Copyright (C) 2005-2006 Csaba Henk <csaba.henk@creo.hu>
@@ -34,324 +34,231 @@
#include "fuse_param.h"
#include "fuse_ioctl.h"
-#include "glusterfs.h"
-#include "logging.h"
-#include "common-utils.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/common-utils.h"
#define GFFUSE_LOGERR(...) \
gf_log ("glusterfs-fuse", GF_LOG_ERROR, ## __VA_ARGS__)
-static long
-fuse_os_version_major_np(void)
-{
- int ret = 0;
- long major = 0;
- char *c = NULL;
- struct utsname u;
- size_t oldlen;
-
- oldlen = sizeof(u.release);
-
- ret = sysctlbyname("kern.osrelease", u.release, &oldlen, NULL, 0);
- if (ret != 0) {
- return -1;
- }
-
- c = strchr(u.release, '.');
- if (c == NULL) {
- return -1;
- }
-
- *c = '\0';
-
- errno = 0;
- major = strtol(u.release, NULL, 10);
- if ((errno == EINVAL) || (errno == ERANGE)) {
- return -1;
- }
-
- return major;
-}
-
-static int
-fuse_running_under_rosetta(void)
-{
- int result = 0;
- int is_native = 1;
- size_t sz = sizeof(result);
-
- int ret = sysctlbyname("sysctl.proc_native", &result, &sz, NULL, (size_t)0);
- if ((ret == 0) && !result) {
- is_native = 0;
- }
-
- return !is_native;
-}
-
-static int
-loadkmod(void)
-{
- int result = -1;
- int pid, terminated_pid;
- union wait status;
- long major;
-
- major = fuse_os_version_major_np();
-
- if (major < 9) { /* not Mac OS X 10.5+ */
- return EINVAL;
- }
-
- pid = fork();
-
- if (pid == 0) {
- execl(MACFUSE_LOAD_PROG, MACFUSE_LOAD_PROG, NULL);
-
- /* exec failed */
- exit(ENOENT);
- }
-
- require_action(pid != -1, Return, result = errno);
-
- while ((terminated_pid = wait4(pid, (int *)&status, 0, NULL)) < 0) {
- /* retry if EINTR, else break out with error */
- if (errno != EINTR) {
- break;
- }
- }
-
- if ((terminated_pid == pid) && (WIFEXITED(status))) {
- result = WEXITSTATUS(status);
- } else {
- result = -1;
- }
-
-Return:
- check_noerr_string(result, strerror(errno));
-
- return result;
-}
-
int
gf_fuse_mount (const char *mountpoint, char *fsname, char *mnt_param,
- pid_t *mtab_pid /* not used on OS X */)
+ pid_t *mnt_pid, int status_fd) /* Not used on OS X */
{
- int fd, pid;
- int result;
- char *fdnam, *dev;
- const char *mountprog = MACFUSE_MOUNT_PROG;
- sig_t chldf;
+ int fd = 0;
+ int pid = 0;
+ int ret = 0;
+ char *fdnam = NULL;
+ char *dev = NULL;
+ char vstr[4];
+ unsigned vval = 0;
+ int i = 0;
+
+ const char *mountprog = OSXFUSE_MOUNT_PROG;
+ sig_t chldf = SIG_ERR;
+ char version[MAXHOSTNAMELEN + 1] = { 0 };
+ size_t version_len = MAXHOSTNAMELEN;
+ size_t version_len_desired = 0;
+ int r = 0;
+ char devpath[MAXPATHLEN] = { 0 };;
+
+ if (!mountpoint) {
+ gf_log ("glustefs-fuse", GF_LOG_ERROR,
+ "missing or invalid mount point");
+ goto err;
+ }
- /* mount_fusefs should not try to spawn the daemon */
- setenv("MOUNT_FUSEFS_SAFE", "1", 1);
+ /* mount_fusefs should not try to spawn the daemon */
+ setenv("MOUNT_FUSEFS_SAFE", "1", 1);
- /* to notify mount_fusefs it's called from lib */
- setenv("MOUNT_FUSEFS_CALL_BY_LIB", "1", 1);
+ /* to notify mount_fusefs it's called from lib */
+ setenv("MOUNT_FUSEFS_CALL_BY_LIB", "1", 1);
- if (!mountpoint) {
- fprintf(stderr, "missing or invalid mount point\n");
- return -1;
- }
+ chldf = signal(SIGCHLD, SIG_DFL); /* So that we can wait4() below. */
- if (fuse_running_under_rosetta()) {
- fprintf(stderr, "MacFUSE does not work under Rosetta\n");
- return -1;
- }
+ if (chldf == SIG_ERR) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "signal() returned SIG_ERR: %s",
+ strerror(errno));
+ goto err;
+ }
- chldf = signal(SIGCHLD, SIG_DFL); /* So that we can wait4() below. */
+ /* check for user<->kernel match. */
+ ret = sysctlbyname(SYSCTL_OSXFUSE_VERSION_NUMBER, version,
+ &version_len, NULL, (size_t)0);
+ if (ret != 0) {
+ gf_log ("glustefs-fuse", GF_LOG_ERROR,
+ "sysctlbyname() returned error: %s",
+ strerror(errno));
+ goto err;
+ }
- result = loadkmod();
- if (result == EINVAL)
- GFFUSE_LOGERR("OS X >= 10.5 (at least Leopard) required");
- else if (result == 0 || result == ENOENT || result == EBUSY) {
- /* Module loaded, but now need to check for user<->kernel match. */
+ /* sysctlbyname() includes the trailing '\0' in version_len */
+ version_len_desired = sizeof ("2.x.y");
- char version[MAXHOSTNAMELEN + 1] = { 0 };
- size_t version_len = MAXHOSTNAMELEN;
- size_t version_len_desired = 0;
+ if (version_len != version_len_desired) {
+ gf_log ("glusterfs-fuse", GF_LOG_ERROR,
+ "version length mismatch for OSXFUSE %s",
+ version);
+ ret = -1;
+ goto err;
+ }
- result = sysctlbyname(SYSCTL_MACFUSE_VERSION_NUMBER, version,
- &version_len, NULL, (size_t)0);
- if (result == 0) {
- /* sysctlbyname() includes the trailing '\0' in version_len */
- version_len_desired = strlen("2.x.y") + 1;
-
- if (version_len != version_len_desired)
- result = -1;
- } else
- strcpy(version, "?.?.?");
- if (result == 0) {
- char *ep;
- char vstr[4];
- unsigned vval;
- int i;
-
- for (i = 0; i < 3; i++)
+ for (i = 0; i < 3; i++)
vstr[i] = version[2*i];
- vstr[3] = '\0';
-
- vval = strtoul(vstr, &ep, 10);
- if (*ep || vval < 203 || vval > 217)
- result = -1;
- else
- gf_log("glusterfs-fuse", GF_LOG_INFO,
- "MacFUSE kext version %s", version);
- }
- if (result != 0)
- GFFUSE_LOGERR("MacFUSE version %s is not supported", version);
- } else
- GFFUSE_LOGERR("cannot load MacFUSE kext");
- if (result != 0)
- return -1;
-
- fdnam = getenv("FUSE_DEV_FD");
-
- if (fdnam) {
- char *ep;
-
- fd = strtol(fdnam, &ep, 10);
- if (*ep != '\0' || fd < 0) {
- GFFUSE_LOGERR("invalid value given in FUSE_DEV_FD");
- return -1;
+ vstr[3] = '\0';
+
+ vval = strtoul(vstr, NULL, 10);
+ if (vval < 264) {
+ GFFUSE_LOGERR("OSXFUSE version %s is not supported", version);
+ ret = -1;
+ goto err;
}
- goto mount;
- }
+ gf_log("glusterfs-fuse", GF_LOG_INFO,
+ "OSXFUSE kext version supported %s", version);
- dev = getenv("FUSE_DEV_NAME");
- if (dev) {
- if ((fd = open(dev, O_RDWR)) < 0) {
- GFFUSE_LOGERR("failed to open device (%s)", strerror(errno));
- return -1;
+ fdnam = getenv("FUSE_DEV_FD");
+ if (fdnam) {
+ fd = strtol(fdnam, NULL, 10);
+ if (fd < 0) {
+ GFFUSE_LOGERR("invalid value given in FUSE_DEV_FD");
+ ret = -1;
+ goto err;
+ }
+ goto mount;
}
- } else {
- int r, devidx = -1;
- char devpath[MAXPATHLEN];
-
- for (r = 0; r < MACFUSE_NDEVICES; r++) {
- snprintf(devpath, MAXPATHLEN - 1,
- _PATH_DEV MACFUSE_DEVICE_BASENAME "%d", r);
- fd = open(devpath, O_RDWR);
- if (fd >= 0) {
- dev = devpath;
- devidx = r;
- break;
- }
+
+ dev = getenv("FUSE_DEV_NAME");
+ if (!dev) {
+ for (r = 0; r < OSXFUSE_NDEVICES; r++) {
+ snprintf(devpath, MAXPATHLEN - 1,
+ _PATH_DEV OSXFUSE_DEVICE_BASENAME "%d", r);
+ if ((fd = open(devpath, O_RDWR)) < 0) {
+ GFFUSE_LOGERR("failed to open device %s (%s)",
+ devpath,
+ strerror(errno));
+ goto err;
+ }
+ dev = devpath;
+ goto mount;
+ }
}
- if (devidx == -1) {
- GFFUSE_LOGERR("failed to open device (%s)", strerror(errno));
- return -1;
+
+ fd = open(dev, O_RDWR);
+ if (fd < 0) {
+ GFFUSE_LOGERR("failed to open device %s (%s)", dev,
+ strerror(errno));
+ ret = -1;
+ goto err;
}
- }
mount:
- if (getenv("FUSE_NO_MOUNT") || ! mountpoint)
- goto out;
-
- signal(SIGCHLD, chldf);
-
- pid = fork();
-
- if (pid == -1) {
- GFFUSE_LOGERR("fork() failed (%s)", strerror(errno));
- close(fd);
- return -1;
- }
-
- if (pid == 0) {
+ signal(SIGCHLD, chldf);
pid = fork();
if (pid == -1) {
- GFFUSE_LOGERR("fork() failed (%s)", strerror(errno));
- close(fd);
- exit(1);
+ GFFUSE_LOGERR("fork() failed (%s)", strerror(errno));
+ ret = -1;
+ goto err;
}
if (pid == 0) {
- const char *argv[32];
- int a = 0;
- char *opts = NULL;
-
- if (asprintf(&opts, "%s,fssubtype=glusterfs", mnt_param) == -1) {
- GFFUSE_LOGERR("Out of memory");
- exit(1);
- }
-
- if (! fdnam)
- asprintf(&fdnam, "%d", fd);
-
- argv[a++] = mountprog;
- if (opts) {
- argv[a++] = "-o";
- argv[a++] = opts;
- }
- argv[a++] = fdnam;
- argv[a++] = mountpoint;
- argv[a++] = NULL;
-
- {
- char title[MAXPATHLEN + 1] = { 0 };
- u_int32_t len = MAXPATHLEN;
- int ret = proc_pidpath(getpid(), title, len);
- if (ret) {
- setenv("MOUNT_FUSEFS_DAEMON_PATH", title, 1);
+ pid = fork();
+ if (pid == -1) {
+ GFFUSE_LOGERR("fork() failed (%s)", strerror(errno));
+ ret = -1;
+ goto err;
}
- }
- execvp(mountprog, (char **) argv);
- GFFUSE_LOGERR("MacFUSE: failed to exec mount program (%s)", strerror(errno));
- exit(1);
- }
- _exit(0);
- }
-
-out:
- return fd;
+ if (pid == 0) {
+ const char *argv[32];
+ int a = 0;
+ char *opts = NULL;
+
+ if (asprintf(&opts, "%s,fssubtype=glusterfs",
+ mnt_param) == -1) {
+ GFFUSE_LOGERR("asprintf() error: %s",
+ strerror(errno));
+ ret = -1;
+ goto err;
+ }
+
+ if (!fdnam)
+ asprintf(&fdnam, "%d", fd);
+
+ argv[a++] = mountprog;
+ if (opts) {
+ argv[a++] = "-o";
+ argv[a++] = opts;
+ }
+ argv[a++] = fdnam;
+ argv[a++] = mountpoint;
+ argv[a++] = NULL;
+
+ {
+ char title[MAXPATHLEN + 1] = { 0 };
+ u_int32_t len = MAXPATHLEN;
+ int ret = proc_pidpath(getpid(), title, len);
+ if (ret) {
+ setenv("MOUNT_FUSEFS_DAEMON_PATH",
+ title, 1);
+ }
+ }
+ execvp(mountprog, (char **) argv);
+ GFFUSE_LOGERR("OSXFUSE: failed to exec mount"
+ " program (%s)", strerror(errno));
+ _exit(1);
+ }
+ _exit(0);
+ }
+ ret = fd;
+err:
+ if (ret == -1) {
+ if (fd > 0) {
+ close(fd);
+ }
+ }
+ return ret;
}
void
gf_fuse_unmount(const char *mountpoint, int fd)
{
- int ret;
- struct stat sbuf;
- char dev[128];
- char resolved_path[PATH_MAX];
- char *ep, *rp = NULL;
-
- unsigned int hs_complete = 0;
+ int ret;
+ struct stat sbuf;
+ char dev[128];
+ char resolved_path[PATH_MAX];
+ char *ep, *rp = NULL;
- ret = ioctl(fd, FUSEDEVIOCGETHANDSHAKECOMPLETE, &hs_complete);
- if (ret || !hs_complete) {
- return;
- }
- /* XXX does this have any use here? */
- ret = ioctl(fd, FUSEDEVIOCSETDAEMONDEAD, &fd);
- if (ret) {
- return;
- }
+ unsigned int hs_complete = 0;
- if (fstat(fd, &sbuf) == -1) {
- return;
- }
+ ret = ioctl(fd, FUSEDEVIOCGETHANDSHAKECOMPLETE, &hs_complete);
+ if (ret || !hs_complete) {
+ return;
+ }
- devname_r(sbuf.st_rdev, S_IFCHR, dev, 128);
+ if (fstat(fd, &sbuf) == -1) {
+ return;
+ }
- if (strncmp(dev, MACFUSE_DEVICE_BASENAME,
- sizeof(MACFUSE_DEVICE_BASENAME) - 1)) {
- return;
- }
+ devname_r(sbuf.st_rdev, S_IFCHR, dev, 128);
- strtol(dev + 4, &ep, 10);
- if (*ep != '\0') {
- return;
- }
+ if (strncmp(dev, OSXFUSE_DEVICE_BASENAME,
+ sizeof(OSXFUSE_DEVICE_BASENAME) - 1)) {
+ return;
+ }
- rp = realpath(mountpoint, resolved_path);
- if (rp) {
- ret = unmount(resolved_path, 0);
- }
+ strtol(dev + sizeof(OSXFUSE_DEVICE_BASENAME) - 1, &ep, 10);
+ if (*ep != '\0') {
+ return;
+ }
- close(fd);
+ rp = realpath(mountpoint, resolved_path);
+ if (rp) {
+ ret = unmount(resolved_path, 0);
+ }
- return;
+ close(fd);
+ return;
}
diff --git a/contrib/mount/mntent.c b/contrib/mount/mntent.c
new file mode 100644
index 00000000000..9a7e5f39bdb
--- /dev/null
+++ b/contrib/mount/mntent.c
@@ -0,0 +1,260 @@
+/*
+ * Copyright (c) 1980, 1989, 1993, 1994
+ * The Regents of the University of California. All rights reserved.
+ * Copyright (c) 2001
+ * David Rufino <daverufino@btinternet.com>
+ * Copyright (c) 2014
+ * Red Hat, Inc. <http://www.redhat.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ * must display the following acknowledgement:
+ * This product includes software developed by the University of
+ * California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#if !defined(GF_LINUX_HOST_OS)
+
+#include <stdlib.h>
+#include <string.h>
+#include <sys/param.h>
+#include <sys/ucred.h>
+#include <sys/mount.h>
+#include "mntent_compat.h"
+
+#ifdef __NetBSD__
+typedef struct statvfs gf_statfs_t;
+#else
+typedef struct statfs gf_statfs_t;
+#endif
+
+typedef struct _mntent_state {
+ struct mntent mntent;
+ gf_statfs_t *statfs;
+ int count;
+ int pos;
+ /* A buffer big enough to store all defined flags as a string.
+ * Increase it if necessary when more flags are defined. */
+ char buf[256];
+} mntent_state_t;
+
+typedef struct _mntflag {
+ unsigned long value;
+ const char *on;
+ const char *off;
+} mntflag_t;
+
+static mntflag_t mntflags[] = {
+ { MNT_RDONLY, "ro", "rw" },
+ { MNT_SYNCHRONOUS, "sync", NULL },
+ { MNT_NOEXEC, "noexec", NULL },
+ { MNT_NOSUID, "nosuid", NULL },
+#if !defined(__FreeBSD__)
+ { MNT_NODEV, "nodev", NULL },
+#endif /* __FreeBSD__ */
+ { MNT_UNION, "union", NULL },
+ { MNT_ASYNC, "async", NULL },
+#if !defined(GF_DARWIN_HOST_OS)
+ { MNT_NOATIME, "noatime", NULL },
+#if !defined(__NetBSD__)
+ { MNT_NOCLUSTERR, "noclusterr", NULL },
+ { MNT_NOCLUSTERW, "noclusterw", NULL },
+ { MNT_NOSYMFOLLOW, "nosymfollow", NULL },
+ { MNT_SUIDDIR, "suiddir", NULL },
+#endif /* !__NetBSD__ */
+#endif /* !GF_DARWIN_HOST_OS */
+ { 0, NULL, NULL }
+};
+
+char *
+hasmntopt (const struct mntent *mnt, const char *option)
+{
+ char *opt, *optbuf;
+ int len;
+
+ optbuf = strdup(mnt->mnt_opts);
+ if (optbuf == NULL) {
+ return NULL;
+ }
+
+ opt = optbuf;
+ len = 0;
+ while (*opt) {
+ while (opt[len] != 0) {
+ if (opt[len] == ' ') {
+ opt[len++] = 0;
+ break;
+ }
+ len++;
+ }
+ if ((*opt != 0) && (strcasecmp(opt, option) == 0)) {
+ break;
+ }
+ opt += len;
+ len = 0;
+ }
+ free(optbuf);
+ if (len == 0) {
+ return NULL;
+ }
+
+ return opt - optbuf + mnt->mnt_opts;
+}
+
+static int
+writeopt(const char *text, char *buf, int buflen, int pos)
+{
+ int len;
+
+ /* buflen must be > 0 */
+
+ if (text == NULL) {
+ return pos;
+ }
+
+ buf += pos;
+ if (pos > 0) {
+ /* We are sure we have at least one byte to store the space.
+ * We don't need to check buflen here. */
+ *buf++ = ' ';
+ pos++;
+ }
+ len = strlen(text) + 1;
+ pos += len;
+ if (pos >= buflen) {
+ /* There won't be enough space for the text and the
+ * terminating null character. We copy as much as we can
+ * of the text and mark the end of the string with '...' */
+ memcpy(buf, text, buflen - pos + len);
+ if (buflen > 3) {
+ strcpy(buf + buflen - 4, "...");
+ } else {
+ strncpy(buf, "...", buflen - 1);
+ buf[buflen - 1] = 0;
+ }
+ pos = buflen;
+ } else {
+ memcpy(buf, text, len);
+ }
+
+ return pos;
+}
+
+static char *
+flags2opts (int flags, char *buf, int buflen)
+{
+ char other[16];
+ mntflag_t *flg;
+ int pos;
+
+ if (buflen == 0) {
+ return NULL;
+ }
+
+ pos = 0;
+ for (flg = mntflags; flg->value != 0; flg++) {
+ pos = writeopt((flags & flg->value) == 0 ? flg->off : flg->on,
+ buf, buflen, pos);
+ flags &= ~flg->value;
+ }
+
+ if (flags != 0) {
+ sprintf(other, "[0x%x]", flags);
+ writeopt(other, buf, buflen, pos);
+ }
+
+ return buf;
+}
+
+static void
+statfs_to_mntent (struct mntent *mntent, gf_statfs_t *mntbuf, char *buf,
+ int buflen)
+{
+ int f_flags;
+
+ mntent->mnt_fsname = mntbuf->f_mntfromname;
+ mntent->mnt_dir = mntbuf->f_mntonname;
+ mntent->mnt_type = mntbuf->f_fstypename;
+
+#ifdef __NetBSD__
+ f_flags = mntbuf->f_flag;
+#else
+ f_flags = mntbuf->f_flags;
+#endif
+ mntent->mnt_opts = flags2opts (f_flags, buf, buflen);
+
+ mntent->mnt_freq = mntent->mnt_passno = 0;
+}
+
+struct mntent *
+getmntent_r (FILE *fp, struct mntent *mntent, char *buf, int buflen)
+{
+ mntent_state_t *state = (mntent_state_t *)fp;
+
+ if (state->pos >= state->count) {
+ return NULL;
+ }
+
+ statfs_to_mntent(mntent, &state->statfs[state->pos++], buf, buflen);
+
+ return mntent;
+}
+
+struct mntent *
+getmntent (FILE *fp)
+{
+ mntent_state_t *state = (mntent_state_t *)fp;
+
+ return getmntent_r(fp, &state->mntent, state->buf,
+ sizeof(state->buf));
+}
+
+FILE *
+setmntent (const char *filename, const char *type)
+{
+ mntent_state_t *state;
+
+ /* We don't really need to access any file so we'll use the FILE* as
+ * a fake file to store state information.
+ */
+
+ state = malloc(sizeof(mntent_state_t));
+ if (state != NULL) {
+ state->pos = 0;
+ state->count = getmntinfo(&state->statfs, MNT_NOWAIT);
+ }
+
+ return (FILE *)state;
+}
+
+int
+endmntent (FILE *fp)
+{
+ free(fp);
+
+ return 1; /* endmntent() always returns 1 */
+}
+
+#endif /* !GF_LINUX_HOST_OS */
diff --git a/contrib/mount/mntent_compat.h b/contrib/mount/mntent_compat.h
new file mode 100644
index 00000000000..ca82e9aa60f
--- /dev/null
+++ b/contrib/mount/mntent_compat.h
@@ -0,0 +1,37 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _MNTENT_H
+#define _MNTENT_H
+
+#if !defined(GF_LINUX_HOST_OS)
+#include <stdio.h>
+
+struct mntent {
+ char *mnt_fsname;
+ char *mnt_dir;
+ char *mnt_type;
+ char *mnt_opts;
+ int mnt_freq;
+ int mnt_passno;
+};
+
+struct mntent *getmntent (FILE *fp);
+struct mntent *getmntent_r (FILE *fp, struct mntent *result,
+ char *buffer, int bufsize);
+FILE *setmntent (const char *filename, const char *type);
+int endmntent(FILE *fp);
+char * hasmntopt (const struct mntent *mnt, const char *option);
+
+/* Dummy - /etc/mtab has no meaning on OSX platform */
+#define _PATH_MOUNTED "/etc/mtab"
+
+#endif /* GF_DARWIN_HOST_OS || __NetBSD__ */
+#endif /* _MNTENT_H */
diff --git a/contrib/timer-wheel/find_last_bit.c b/contrib/timer-wheel/find_last_bit.c
new file mode 100644
index 00000000000..192fee802a8
--- /dev/null
+++ b/contrib/timer-wheel/find_last_bit.c
@@ -0,0 +1,61 @@
+/* bit search implementation
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * Copyright (C) 2008 IBM Corporation
+ * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au>
+ * (Inspired by David Howell's find_next_bit implementation)
+ *
+ * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease
+ * size and improve performance, 2015.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+/**
+ * @find_last_bit
+ * optimized implementation of find last bit in
+ */
+
+#ifndef BITS_PER_LONG
+#ifdef __LP64__
+#define BITS_PER_LONG 64
+#else
+#define BITS_PER_LONG 32
+#endif
+#endif
+
+unsigned long gw_tw_fls (unsigned long word)
+{
+ int num = BITS_PER_LONG;
+
+#if BITS_PER_LONG == 64
+ if (!(word & (~0ul << 32))) {
+ num -= 32;
+ word <<= 32;
+ }
+#endif
+ if (!(word & (~0ul << (BITS_PER_LONG-16)))) {
+ num -= 16;
+ word <<= 16;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-8)))) {
+ num -= 8;
+ word <<= 8;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-4)))) {
+ num -= 4;
+ word <<= 4;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-2)))) {
+ num -= 2;
+ word <<= 2;
+ }
+ if (!(word & (~0ul << (BITS_PER_LONG-1))))
+ num -= 1;
+ return num;
+}
diff --git a/contrib/timer-wheel/timer-wheel.c b/contrib/timer-wheel/timer-wheel.c
new file mode 100644
index 00000000000..58e0607bf0c
--- /dev/null
+++ b/contrib/timer-wheel/timer-wheel.c
@@ -0,0 +1,374 @@
+/*
+ * linux/kernel/timer.c
+ *
+ * Kernel internal timers
+ *
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ *
+ */
+/*
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/select.h>
+
+#include "timer-wheel.h"
+
+static inline void
+__gf_tw_add_timer (struct tvec_base *base, struct gf_tw_timer_list *timer)
+{
+ int i;
+ unsigned long idx;
+ unsigned long expires;
+ struct list_head *vec;
+
+ expires = timer->expires;
+
+ idx = expires - base->timer_sec;
+
+ if (idx < TVR_SIZE) {
+ i = expires & TVR_MASK;
+ vec = base->tv1.vec + i;
+ } else if (idx < 1 << (TVR_BITS + TVN_BITS)) {
+ i = (expires >> TVR_BITS) & TVN_MASK;
+ vec = base->tv2.vec + i;
+ } else if (idx < 1 << (TVR_BITS + 2*TVN_BITS)) {
+ i = (expires >> (TVR_BITS + TVN_BITS)) & TVN_MASK;
+ vec = base->tv3.vec + i;
+ } else if (idx < 1 << (TVR_BITS + 3*TVN_BITS)) {
+ i = (expires >> (TVR_BITS + 2*TVN_BITS)) & TVN_MASK;
+ vec = base->tv4.vec + i;
+ } else if (idx < 0) {
+ vec = base->tv1.vec + (base->timer_sec & TVR_MASK);
+ } else {
+ i = (expires >> (TVR_BITS + 3*TVN_BITS)) & TVN_MASK;
+ vec = base->tv5.vec + i;
+ }
+
+ list_add_tail (&timer->entry, vec);
+}
+
+unsigned long gf_tw_find_last_bit(const unsigned long *, unsigned long);
+
+#if defined(__GNUC__) || defined(__clang__)
+static inline unsigned long gf_tw_fls (unsigned long word)
+{
+ return BITS_PER_LONG - __builtin_clzl(word);
+}
+#else
+extern unsigned long gf_tw_fls (unsigned long);
+#endif
+
+static inline unsigned long
+apply_slack(struct tvec_base *base, struct gf_tw_timer_list *timer)
+{
+ long delta;
+ unsigned long mask, expires, expires_limit;
+
+ expires = timer->expires;
+
+ delta = expires - base->timer_sec;
+ if (delta < 256)
+ return expires;
+
+ expires_limit = expires + delta / 256;
+ mask = expires ^ expires_limit;
+ if (mask == 0)
+ return expires;
+
+ int bit = gf_tw_fls (mask);
+ mask = (1UL << bit) - 1;
+
+ expires_limit = expires_limit & ~(mask);
+ return expires_limit;
+}
+
+static inline void
+__gf_tw_detach_timer (struct gf_tw_timer_list *timer)
+{
+ struct list_head *entry = &timer->entry;
+
+ list_del (entry);
+ entry->next = NULL;
+}
+
+static inline int
+cascade (struct tvec_base *base, struct tvec *tv, int index)
+{
+ struct gf_tw_timer_list *timer, *tmp;
+ struct list_head tv_list;
+
+ list_replace_init (tv->vec + index, &tv_list);
+
+ list_for_each_entry_safe (timer, tmp, &tv_list, entry) {
+ __gf_tw_add_timer (base, timer);
+ }
+
+ return index;
+}
+
+#define INDEX(N) ((base->timer_sec >> (TVR_BITS + N * TVN_BITS)) & TVN_MASK)
+
+/**
+ * run expired timers
+ */
+static inline void
+run_timers (struct tvec_base *base)
+{
+ unsigned long index, call_time;
+ struct gf_tw_timer_list *timer;
+
+ struct list_head work_list;
+ struct list_head *head = &work_list;
+
+ pthread_spin_lock (&base->lock);
+ {
+ index = base->timer_sec & TVR_MASK;
+
+ if (!index &&
+ (!cascade (base, &base->tv2, INDEX(0))) &&
+ (!cascade (base, &base->tv3, INDEX(1))) &&
+ (!cascade (base, &base->tv4, INDEX(2))))
+ cascade (base, &base->tv5, INDEX(3));
+
+ call_time = base->timer_sec++;
+ list_replace_init (base->tv1.vec + index, head);
+ while (!list_empty(head)) {
+ void (*fn)(struct gf_tw_timer_list *, void *, unsigned long);
+ void *data;
+
+ timer = list_first_entry (head, struct gf_tw_timer_list, entry);
+ fn = timer->function;
+ data = timer->data;
+
+ __gf_tw_detach_timer (timer);
+ pthread_spin_unlock(&base->lock);
+ {
+ /* It is required to run the actual function outside
+ of the locked zone, so we don't bother about
+ locked operations inside that function */
+ fn(timer, data, call_time);
+ }
+ pthread_spin_lock(&base->lock);
+ }
+ }
+ pthread_spin_unlock (&base->lock);
+
+}
+
+void *runner (void *arg)
+{
+ struct timeval tv = {0,};
+ struct tvec_base *base = arg;
+
+ while (1) {
+ run_timers (base);
+
+ tv.tv_sec = 1;
+ tv.tv_usec = 0;
+ (void) select (0, NULL, NULL, NULL, &tv);
+ }
+
+ return NULL;
+
+}
+
+static inline int timer_pending (struct gf_tw_timer_list *timer)
+{
+ struct list_head *entry = &timer->entry;
+
+ return (entry->next != NULL);
+}
+
+static inline int __detach_if_pending (struct gf_tw_timer_list *timer)
+{
+ if (!timer_pending (timer))
+ return 0;
+
+ __gf_tw_detach_timer (timer);
+ return 1;
+}
+
+static inline int __mod_timer (struct tvec_base *base,
+ struct gf_tw_timer_list *timer, int pending_only)
+{
+ int ret = 0;
+
+ ret = __detach_if_pending (timer);
+ if (!ret && pending_only)
+ goto done;
+
+ ret = 1;
+ __gf_tw_add_timer (base, timer);
+
+ done:
+ return ret;
+}
+
+/* interface */
+
+/**
+ * Add a timer in the timer wheel
+ */
+void gf_tw_add_timer (struct tvec_base *base, struct gf_tw_timer_list *timer)
+{
+ pthread_spin_lock (&base->lock);
+ {
+ timer->expires += base->timer_sec;
+ timer->expires = apply_slack (base, timer);
+ __gf_tw_add_timer (base, timer);
+ }
+ pthread_spin_unlock (&base->lock);
+}
+
+/**
+ * Remove a timer from the timer wheel
+ */
+int gf_tw_del_timer (struct tvec_base *base, struct gf_tw_timer_list *timer)
+{
+ int ret = 0;
+
+ pthread_spin_lock (&base->lock);
+ {
+ if (timer_pending (timer)) {
+ ret = 1;
+ __gf_tw_detach_timer (timer);
+ }
+ }
+ pthread_spin_unlock (&base->lock);
+
+ return ret;
+}
+
+int gf_tw_mod_timer_pending (struct tvec_base *base,
+ struct gf_tw_timer_list *timer,
+ unsigned long expires)
+{
+ int ret = 1;
+
+ pthread_spin_lock (&base->lock);
+ {
+ timer->expires = expires + base->timer_sec;
+ timer->expires = apply_slack (base, timer);
+
+ ret = __mod_timer (base, timer, 1);
+ }
+ pthread_spin_unlock (&base->lock);
+
+ return ret;
+}
+
+int gf_tw_mod_timer (struct tvec_base *base,
+ struct gf_tw_timer_list *timer, unsigned long expires)
+{
+ int ret = 1;
+
+ pthread_spin_lock (&base->lock);
+ {
+ /* fast path optimization */
+ if (timer_pending (timer) && timer->expires == expires)
+ goto unblock;
+
+ timer->expires = expires + base->timer_sec;
+ timer->expires = apply_slack (base, timer);
+
+ ret = __mod_timer (base, timer, 0);
+ }
+ unblock:
+ pthread_spin_unlock (&base->lock);
+
+ return ret;
+}
+
+int gf_tw_cleanup_timers (struct tvec_base *base)
+{
+ int ret = 0;
+ void *res = NULL;
+
+ /* terminate runner */
+ ret = pthread_cancel (base->runner);
+ if (ret != 0)
+ goto error_return;
+ ret = pthread_join (base->runner, &res);
+ if (ret != 0)
+ goto error_return;
+ if (res != PTHREAD_CANCELED)
+ goto error_return;
+
+ /* destroy lock */
+ ret = pthread_spin_destroy (&base->lock);
+ if (ret != 0)
+ goto error_return;
+
+ /* deallocated timer base */
+ free (base);
+ return 0;
+
+ error_return:
+ return -1;
+}
+
+/**
+ * Initialize various timer wheel lists and spawn a thread that
+ * invokes run_timers()
+ */
+struct tvec_base *gf_tw_init_timers ()
+{
+ int j = 0;
+ int ret = 0;
+ struct timeval tv = {0,};
+ struct tvec_base *base = NULL;
+
+ base = malloc (sizeof (*base));
+ if (!base)
+ goto error_return;
+
+ ret = pthread_spin_init (&base->lock, 0);
+ if (ret != 0)
+ goto error_dealloc;
+
+ for (j = 0; j < TVN_SIZE; j++) {
+ INIT_LIST_HEAD (base->tv5.vec + j);
+ INIT_LIST_HEAD (base->tv4.vec + j);
+ INIT_LIST_HEAD (base->tv3.vec + j);
+ INIT_LIST_HEAD (base->tv2.vec + j);
+ }
+
+ for (j = 0; j < TVR_SIZE; j++) {
+ INIT_LIST_HEAD (base->tv1.vec + j);
+ }
+
+ ret = gettimeofday (&tv, 0);
+ if (ret < 0)
+ goto destroy_lock;
+ base->timer_sec = tv.tv_sec;
+
+ ret = pthread_create (&base->runner, NULL, runner, base);
+ if (ret != 0)
+ goto destroy_lock;
+ return base;
+
+ destroy_lock:
+ (void) pthread_spin_destroy (&base->lock);
+ error_dealloc:
+ free (base);
+ error_return:
+ return NULL;
+}
diff --git a/contrib/timer-wheel/timer-wheel.h b/contrib/timer-wheel/timer-wheel.h
new file mode 100644
index 00000000000..5637735ec22
--- /dev/null
+++ b/contrib/timer-wheel/timer-wheel.h
@@ -0,0 +1,77 @@
+/*
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+#ifndef __TIMER_WHEEL_H
+#define __TIMER_WHEEL_H
+
+#include "glusterfs/locking.h"
+
+#include "glusterfs/list.h"
+
+#define TVR_BITS 8
+#define TVN_BITS 6
+#define TVR_SIZE (1 << TVR_BITS)
+#define TVN_SIZE (1 << TVN_BITS)
+#define TVR_MASK (TVR_SIZE - 1)
+#define TVN_MASK (TVN_SIZE - 1)
+
+#define BITS_PER_LONG 64
+
+struct tvec {
+ struct list_head vec[TVN_SIZE];
+};
+
+struct tvec_root {
+ struct list_head vec[TVR_SIZE];
+};
+
+struct tvec_base {
+ pthread_spinlock_t lock; /* base lock */
+
+ pthread_t runner; /* run_timer() */
+
+ unsigned long timer_sec; /* time counter */
+
+ struct tvec_root tv1;
+ struct tvec tv2;
+ struct tvec tv3;
+ struct tvec tv4;
+ struct tvec tv5;
+};
+
+struct gf_tw_timer_list {
+ void *data;
+ unsigned long expires;
+
+ /** callback routine */
+ void (*function)(struct gf_tw_timer_list *, void *, unsigned long);
+
+ struct list_head entry;
+};
+
+/** The API! */
+struct tvec_base *gf_tw_init_timers ();
+int gf_tw_cleanup_timers (struct tvec_base *);
+void gf_tw_add_timer (struct tvec_base *, struct gf_tw_timer_list *);
+int gf_tw_del_timer (struct tvec_base *, struct gf_tw_timer_list *);
+
+int gf_tw_mod_timer_pending (struct tvec_base *,
+ struct gf_tw_timer_list *, unsigned long);
+
+int gf_tw_mod_timer (struct tvec_base *,
+ struct gf_tw_timer_list *, unsigned long);
+
+#endif
diff --git a/contrib/umountd/Makefile.am b/contrib/umountd/Makefile.am
new file mode 100644
index 00000000000..b39e000f5aa
--- /dev/null
+++ b/contrib/umountd/Makefile.am
@@ -0,0 +1,11 @@
+sbin_PROGRAMS = umountd
+umountd_SOURCES = umountd.c
+umountd_CFLAGS = $(GF_CFLAGS) -DDATADIR=\"$(localstatedir)\"
+umountd_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la ${GF_LDADD}
+umountd_LDFLAGS = $(GF_LDFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/contrib/umountd/umountd.c b/contrib/umountd/umountd.c
new file mode 100644
index 00000000000..3f933ecb554
--- /dev/null
+++ b/contrib/umountd/umountd.c
@@ -0,0 +1,255 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <errno.h>
+#include <dirent.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/globals.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/mem-types.h"
+
+static void
+usage (void)
+{
+ fprintf (stderr, "Usage: umountd [-d dev] [-t timeout] [-r] path\n");
+ exit (EXIT_FAILURE);
+}
+
+
+static int
+sanity_check (char *path, dev_t *devp)
+{
+ struct stat st;
+ struct stat parent_st;
+ int ret;
+ char pathtmp[PATH_MAX];
+ char *parent;
+
+ if (path == NULL)
+ usage ();
+
+ if ((ret = stat (path, &st)) != 0) {
+ switch (errno) {
+ case ENOTCONN:
+ /* volume is stopped */
+ break;
+ default:
+ gf_log ("umountd", GF_LOG_ERROR,
+ "Cannot access %s: %s\n",
+ path, strerror (errno));
+ goto out;
+ }
+ }
+
+ /* If dev was not specified, get it from path */
+ if (*devp == -1 && ret == 0)
+ *devp = st.st_dev;
+
+ snprintf (pathtmp, PATH_MAX, "%s", path);
+ parent = dirname (pathtmp);
+
+ if (stat (parent, &parent_st) != 0) {
+ gf_log ("umountd", GF_LOG_ERROR,
+ "Cannot access %s: %s\n",
+ parent, strerror (errno));
+ goto out;
+ }
+
+ if (st.st_dev == parent_st.st_dev) {
+ gf_log ("umountd", GF_LOG_ERROR,
+ "No filesystem mounted on %s\n", path);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static void
+log_rotate (int signum)
+{
+ gf_log_logrotate (1);
+
+ if (signal (SIGHUP, *log_rotate) == SIG_ERR) {
+ gf_log ("umountd", GF_LOG_ERROR, "signal () failed");
+ exit (EXIT_FAILURE);
+ }
+
+ return;
+}
+
+static int
+logging_init (void)
+{
+ glusterfs_ctx_t *ctx;
+ char log_file[PATH_MAX];
+ int ret = -1;
+
+ ctx = glusterfs_ctx_new ();
+ if (!ctx) {
+ fprintf (stderr, "glusterfs_ctx_new failed\n");
+ goto out;
+ }
+
+ ret = glusterfs_globals_init (ctx);
+ if (ret) {
+ fprintf (stderr, "glusterfs_globals_init failed\n");
+ goto out;
+ }
+
+ THIS->ctx = ctx;
+ xlator_mem_acct_init (THIS, gf_common_mt_end);
+
+ snprintf (log_file, PATH_MAX,
+ "%s/umountd.log", DEFAULT_LOG_FILE_DIRECTORY);
+
+ ret = gf_log_init (ctx, log_file, "umountd");
+ if (ret) {
+ fprintf (stderr, "gf_log_init failed\n");
+ goto out;
+ }
+
+ if (signal (SIGHUP, *log_rotate) == SIG_ERR) {
+ gf_log ("umountd", GF_LOG_ERROR, "signal () failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+umountd_async (char *path, dev_t dev, int frmdir, int timeout)
+{
+ int ret = -1;
+ struct stat stbuf = {0, };
+ int unmount_ret = 0;
+
+ do {
+ unmount_ret = unmount (path, 0);
+ if (unmount_ret == 0)
+ gf_log ("umountd", GF_LOG_INFO, "Unmounted %s", path);
+
+ if (unmount_ret != 0 && errno != EBUSY) {
+ gf_log ("umountd", GF_LOG_WARNING,
+ "umount %s failed: %s",
+ path, strerror (errno));
+ }
+
+ ret = sys_lstat (path, &stbuf);
+ if (ret != 0) {
+ gf_log ("umountd", GF_LOG_WARNING,
+ "Cannot stat device from %s",
+ path, strerror (errno));
+ break;
+ }
+
+ if (stbuf.st_dev != dev) {
+ if (unmount_ret != 0)
+ gf_log ("umountd", GF_LOG_INFO,
+ "device mismatch "
+ "(expect %lld, found %lld), "
+ "someone else unmounted %s",
+ dev, stbuf.st_dev, path);
+ ret = 0;
+ break;
+ }
+
+ sleep (timeout);
+ } while (1/*CONSTCOND*/);
+
+ if (ret) {
+ gf_log ("umountd", GF_LOG_ERROR,
+ "Asynchronous unmount of %s failed: %s",
+ path, strerror (errno));
+ } else {
+ if (frmdir) {
+ ret = rmdir (path);
+ if (ret)
+ gf_log ("umountd", GF_LOG_WARNING,
+ "rmdir %s failed: %s",
+ path, strerror (errno));
+ else
+ gf_log ("umountd", GF_LOG_INFO,
+ "Removed %s", path);
+ }
+ }
+
+ return ret;
+}
+
+int
+main (int argc, char **argv)
+{
+ char *path = NULL;
+ dev_t dev = -1;
+ int frmdir = 0;
+ int timeout = 30;
+ int f;
+
+ while ((f = getopt (argc, argv, "d:rt:")) != -1) {
+ switch (f) {
+ case 'p':
+ path = optarg;
+ break;
+ case 'd':
+ dev = strtoll (optarg, NULL, 10);
+ break;
+ case 't':
+ timeout = atoi (optarg);
+ break;
+ case 'r':
+ frmdir = 1;
+ break;
+ default:
+ usage ();
+ break;
+ }
+ }
+
+ argc -= optind;
+ argv += optind;
+
+ if (argc != 1)
+ usage ();
+
+ path = argv[0];
+
+ if (logging_init () != 0)
+ exit (EXIT_FAILURE);
+
+ if (sanity_check (path, &dev) != 0)
+ exit (EXIT_FAILURE);
+
+ if (daemon (0, 0) != 0)
+ exit (EXIT_FAILURE);
+
+ if (umountd_async (path, dev, frmdir, timeout) != 0)
+ exit (EXIT_FAILURE);
+
+ return EXIT_SUCCESS;
+}
diff --git a/contrib/userspace-rcu/rculist-extra.h b/contrib/userspace-rcu/rculist-extra.h
new file mode 100644
index 00000000000..274cf9f9d7a
--- /dev/null
+++ b/contrib/userspace-rcu/rculist-extra.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2002 Free Software Foundation, Inc.
+ * (originally part of the GNU C Library)
+ * Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
+ *
+ * Copyright (C) 2009 Pierre-Marc Fournier
+ * Conversion to RCU list.
+ * Copyright (C) 2010 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef URCU_RCULIST_EXTRA_H
+#define URCU_RCULIST_EXTRA_H
+/* Copying this definition from liburcu-0.8 as liburcu-0.7 does not have this
+ * particular list api
+ */
+/* Add new element at the tail of the list. */
+
+static inline
+void cds_list_add_tail_rcu(struct cds_list_head *newp,
+ struct cds_list_head *head)
+{
+ newp->next = head;
+ newp->prev = head->prev;
+ rcu_assign_pointer(head->prev->next, newp);
+ head->prev = newp;
+}
+
+#endif
diff --git a/contrib/userspace-rcu/static-wfcqueue.h b/contrib/userspace-rcu/static-wfcqueue.h
new file mode 100644
index 00000000000..37d14ad674b
--- /dev/null
+++ b/contrib/userspace-rcu/static-wfcqueue.h
@@ -0,0 +1,685 @@
+#ifndef _URCU_WFCQUEUE_STATIC_H
+#define _URCU_WFCQUEUE_STATIC_H
+
+/*
+ * urcu/static/wfcqueue.h
+ *
+ * Userspace RCU library - Concurrent Queue with Wait-Free Enqueue/Blocking Dequeue
+ *
+ * TO BE INCLUDED ONLY IN LGPL-COMPATIBLE CODE. See urcu/wfcqueue.h for
+ * linking dynamically with the userspace rcu library.
+ *
+ * Copyright 2010-2012 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * Copyright 2011-2012 - Lai Jiangshan <laijs@cn.fujitsu.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Copied from userspace-rcu 0.10 because version 0.7 doesn't contain it. */
+
+#include <pthread.h>
+#include <assert.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <urcu/compiler.h>
+#include <urcu/uatomic.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Concurrent queue with wait-free enqueue/blocking dequeue.
+ *
+ * This queue has been designed and implemented collaboratively by
+ * Mathieu Desnoyers and Lai Jiangshan. Inspired from
+ * half-wait-free/half-blocking queue implementation done by Paul E.
+ * McKenney.
+ *
+ * Mutual exclusion of cds_wfcq_* / __cds_wfcq_* API
+ *
+ * Synchronization table:
+ *
+ * External synchronization techniques described in the API below is
+ * required between pairs marked with "X". No external synchronization
+ * required between pairs marked with "-".
+ *
+ * Legend:
+ * [1] cds_wfcq_enqueue
+ * [2] __cds_wfcq_splice (destination queue)
+ * [3] __cds_wfcq_dequeue
+ * [4] __cds_wfcq_splice (source queue)
+ * [5] __cds_wfcq_first
+ * [6] __cds_wfcq_next
+ *
+ * [1] [2] [3] [4] [5] [6]
+ * [1] - - - - - -
+ * [2] - - - - - -
+ * [3] - - X X X X
+ * [4] - - X - X X
+ * [5] - - X X - -
+ * [6] - - X X - -
+ *
+ * Mutual exclusion can be ensured by holding cds_wfcq_dequeue_lock().
+ *
+ * For convenience, cds_wfcq_dequeue_blocking() and
+ * cds_wfcq_splice_blocking() hold the dequeue lock.
+ *
+ * Besides locking, mutual exclusion of dequeue, splice and iteration
+ * can be ensured by performing all of those operations from a single
+ * thread, without requiring any lock.
+ */
+
+#define WFCQ_ADAPT_ATTEMPTS 10 /* Retry if being set */
+#define WFCQ_WAIT 10 /* Wait 10 ms if being set */
+
+/*
+ * cds_wfcq_node_init: initialize wait-free queue node.
+ */
+static inline void _cds_wfcq_node_init(struct cds_wfcq_node *node)
+{
+ node->next = NULL;
+}
+
+/*
+ * cds_wfcq_init: initialize wait-free queue (with lock). Pair with
+ * cds_wfcq_destroy().
+ */
+static inline void _cds_wfcq_init(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ int ret;
+
+ /* Set queue head and tail */
+ _cds_wfcq_node_init(&head->node);
+ tail->p = &head->node;
+ ret = pthread_mutex_init(&head->lock, NULL);
+ assert(!ret);
+}
+
+/*
+ * cds_wfcq_destroy: destroy wait-free queue (with lock). Pair with
+ * cds_wfcq_init().
+ */
+static inline void _cds_wfcq_destroy(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ int ret = pthread_mutex_destroy(&head->lock);
+ assert(!ret);
+}
+
+/*
+ * __cds_wfcq_init: initialize wait-free queue (without lock). Don't
+ * pair with any destroy function.
+ */
+static inline void ___cds_wfcq_init(struct __cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ /* Set queue head and tail */
+ _cds_wfcq_node_init(&head->node);
+ tail->p = &head->node;
+}
+
+/*
+ * cds_wfcq_empty: return whether wait-free queue is empty.
+ *
+ * No memory barrier is issued. No mutual exclusion is required.
+ *
+ * We perform the test on head->node.next to check if the queue is
+ * possibly empty, but we confirm this by checking if the tail pointer
+ * points to the head node because the tail pointer is the linearisation
+ * point of the enqueuers. Just checking the head next pointer could
+ * make a queue appear empty if an enqueuer is preempted for a long time
+ * between xchg() and setting the previous node's next pointer.
+ */
+static inline bool _cds_wfcq_empty(cds_wfcq_head_ptr_t u_head,
+ struct cds_wfcq_tail *tail)
+{
+ struct __cds_wfcq_head *head = u_head._h;
+ /*
+ * Queue is empty if no node is pointed by head->node.next nor
+ * tail->p. Even though the tail->p check is sufficient to find
+ * out of the queue is empty, we first check head->node.next as a
+ * common case to ensure that dequeuers do not frequently access
+ * enqueuer's tail->p cache line.
+ */
+ return CMM_LOAD_SHARED(head->node.next) == NULL
+ && CMM_LOAD_SHARED(tail->p) == &head->node;
+}
+
+static inline void _cds_wfcq_dequeue_lock(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ int ret;
+
+ ret = pthread_mutex_lock(&head->lock);
+ assert(!ret);
+}
+
+static inline void _cds_wfcq_dequeue_unlock(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ int ret;
+
+ ret = pthread_mutex_unlock(&head->lock);
+ assert(!ret);
+}
+
+static inline bool ___cds_wfcq_append(cds_wfcq_head_ptr_t u_head,
+ struct cds_wfcq_tail *tail,
+ struct cds_wfcq_node *new_head,
+ struct cds_wfcq_node *new_tail)
+{
+ struct __cds_wfcq_head *head = u_head._h;
+ struct cds_wfcq_node *old_tail;
+
+ /*
+ * Implicit memory barrier before uatomic_xchg() orders earlier
+ * stores to data structure containing node and setting
+ * node->next to NULL before publication.
+ */
+ old_tail = uatomic_xchg(&tail->p, new_tail);
+
+ /*
+ * Implicit memory barrier after uatomic_xchg() orders store to
+ * q->tail before store to old_tail->next.
+ *
+ * At this point, dequeuers see a NULL tail->p->next, which
+ * indicates that the queue is being appended to. The following
+ * store will append "node" to the queue from a dequeuer
+ * perspective.
+ */
+ CMM_STORE_SHARED(old_tail->next, new_head);
+ /*
+ * Return false if queue was empty prior to adding the node,
+ * else return true.
+ */
+ return old_tail != &head->node;
+}
+
+/*
+ * cds_wfcq_enqueue: enqueue a node into a wait-free queue.
+ *
+ * Issues a full memory barrier before enqueue. No mutual exclusion is
+ * required.
+ *
+ * Returns false if the queue was empty prior to adding the node.
+ * Returns true otherwise.
+ */
+static inline bool _cds_wfcq_enqueue(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail,
+ struct cds_wfcq_node *new_tail)
+{
+ return ___cds_wfcq_append(head, tail, new_tail, new_tail);
+}
+
+/*
+ * CDS_WFCQ_WAIT_SLEEP:
+ *
+ * By default, this sleeps for the given @msec milliseconds.
+ * This is a macro which LGPL users may #define themselves before
+ * including wfcqueue.h to override the default behavior (e.g.
+ * to log a warning or perform other background work).
+ */
+#ifndef CDS_WFCQ_WAIT_SLEEP
+#define CDS_WFCQ_WAIT_SLEEP(msec) ___cds_wfcq_wait_sleep(msec)
+#endif
+
+static inline void ___cds_wfcq_wait_sleep(int msec)
+{
+ (void) poll(NULL, 0, msec);
+}
+
+/*
+ * ___cds_wfcq_busy_wait: adaptative busy-wait.
+ *
+ * Returns 1 if nonblocking and needs to block, 0 otherwise.
+ */
+static inline bool
+___cds_wfcq_busy_wait(int *attempt, int blocking)
+{
+ if (!blocking)
+ return 1;
+ if (++(*attempt) >= WFCQ_ADAPT_ATTEMPTS) {
+ CDS_WFCQ_WAIT_SLEEP(WFCQ_WAIT); /* Wait for 10ms */
+ *attempt = 0;
+ } else {
+ caa_cpu_relax();
+ }
+ return 0;
+}
+
+/*
+ * Waiting for enqueuer to complete enqueue and return the next node.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_node_sync_next(struct cds_wfcq_node *node, int blocking)
+{
+ struct cds_wfcq_node *next;
+ int attempt = 0;
+
+ /*
+ * Adaptative busy-looping waiting for enqueuer to complete enqueue.
+ */
+ while ((next = CMM_LOAD_SHARED(node->next)) == NULL) {
+ if (___cds_wfcq_busy_wait(&attempt, blocking))
+ return CDS_WFCQ_WOULDBLOCK;
+ }
+
+ return next;
+}
+
+static inline struct cds_wfcq_node *
+___cds_wfcq_first(cds_wfcq_head_ptr_t u_head,
+ struct cds_wfcq_tail *tail,
+ int blocking)
+{
+ struct __cds_wfcq_head *head = u_head._h;
+ struct cds_wfcq_node *node;
+
+ if (_cds_wfcq_empty(__cds_wfcq_head_cast(head), tail))
+ return NULL;
+ node = ___cds_wfcq_node_sync_next(&head->node, blocking);
+ /* Load head->node.next before loading node's content */
+ cmm_smp_read_barrier_depends();
+ return node;
+}
+
+/*
+ * __cds_wfcq_first_blocking: get first node of a queue, without dequeuing.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Dequeue/splice/iteration mutual exclusion should be ensured by the
+ * caller.
+ *
+ * Used by for-like iteration macros in urcu/wfqueue.h:
+ * __cds_wfcq_for_each_blocking()
+ * __cds_wfcq_for_each_blocking_safe()
+ *
+ * Returns NULL if queue is empty, first node otherwise.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_first_blocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail)
+{
+ return ___cds_wfcq_first(head, tail, 1);
+}
+
+
+/*
+ * __cds_wfcq_first_nonblocking: get first node of a queue, without dequeuing.
+ *
+ * Same as __cds_wfcq_first_blocking, but returns CDS_WFCQ_WOULDBLOCK if
+ * it needs to block.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_first_nonblocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail)
+{
+ return ___cds_wfcq_first(head, tail, 0);
+}
+
+static inline struct cds_wfcq_node *
+___cds_wfcq_next(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail,
+ struct cds_wfcq_node *node,
+ int blocking)
+{
+ struct cds_wfcq_node *next;
+
+ /*
+ * Even though the following tail->p check is sufficient to find
+ * out if we reached the end of the queue, we first check
+ * node->next as a common case to ensure that iteration on nodes
+ * do not frequently access enqueuer's tail->p cache line.
+ */
+ if ((next = CMM_LOAD_SHARED(node->next)) == NULL) {
+ /* Load node->next before tail->p */
+ cmm_smp_rmb();
+ if (CMM_LOAD_SHARED(tail->p) == node)
+ return NULL;
+ next = ___cds_wfcq_node_sync_next(node, blocking);
+ }
+ /* Load node->next before loading next's content */
+ cmm_smp_read_barrier_depends();
+ return next;
+}
+
+/*
+ * __cds_wfcq_next_blocking: get next node of a queue, without dequeuing.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Dequeue/splice/iteration mutual exclusion should be ensured by the
+ * caller.
+ *
+ * Used by for-like iteration macros in urcu/wfqueue.h:
+ * __cds_wfcq_for_each_blocking()
+ * __cds_wfcq_for_each_blocking_safe()
+ *
+ * Returns NULL if reached end of queue, non-NULL next queue node
+ * otherwise.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_next_blocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail,
+ struct cds_wfcq_node *node)
+{
+ return ___cds_wfcq_next(head, tail, node, 1);
+}
+
+/*
+ * __cds_wfcq_next_blocking: get next node of a queue, without dequeuing.
+ *
+ * Same as __cds_wfcq_next_blocking, but returns CDS_WFCQ_WOULDBLOCK if
+ * it needs to block.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_next_nonblocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail,
+ struct cds_wfcq_node *node)
+{
+ return ___cds_wfcq_next(head, tail, node, 0);
+}
+
+static inline struct cds_wfcq_node *
+___cds_wfcq_dequeue_with_state(cds_wfcq_head_ptr_t u_head,
+ struct cds_wfcq_tail *tail,
+ int *state,
+ int blocking)
+{
+ struct __cds_wfcq_head *head = u_head._h;
+ struct cds_wfcq_node *node, *next;
+
+ if (state)
+ *state = 0;
+
+ if (_cds_wfcq_empty(__cds_wfcq_head_cast(head), tail)) {
+ return NULL;
+ }
+
+ node = ___cds_wfcq_node_sync_next(&head->node, blocking);
+ if (!blocking && node == CDS_WFCQ_WOULDBLOCK) {
+ return CDS_WFCQ_WOULDBLOCK;
+ }
+
+ if ((next = CMM_LOAD_SHARED(node->next)) == NULL) {
+ /*
+ * @node is probably the only node in the queue.
+ * Try to move the tail to &q->head.
+ * q->head.next is set to NULL here, and stays
+ * NULL if the cmpxchg succeeds. Should the
+ * cmpxchg fail due to a concurrent enqueue, the
+ * q->head.next will be set to the next node.
+ * The implicit memory barrier before
+ * uatomic_cmpxchg() orders load node->next
+ * before loading q->tail.
+ * The implicit memory barrier before uatomic_cmpxchg
+ * orders load q->head.next before loading node's
+ * content.
+ */
+ _cds_wfcq_node_init(&head->node);
+ if (uatomic_cmpxchg(&tail->p, node, &head->node) == node) {
+ if (state)
+ *state |= CDS_WFCQ_STATE_LAST;
+ return node;
+ }
+ next = ___cds_wfcq_node_sync_next(node, blocking);
+ /*
+ * In nonblocking mode, if we would need to block to
+ * get node's next, set the head next node pointer
+ * (currently NULL) back to its original value.
+ */
+ if (!blocking && next == CDS_WFCQ_WOULDBLOCK) {
+ head->node.next = node;
+ return CDS_WFCQ_WOULDBLOCK;
+ }
+ }
+
+ /*
+ * Move queue head forward.
+ */
+ head->node.next = next;
+
+ /* Load q->head.next before loading node's content */
+ cmm_smp_read_barrier_depends();
+ return node;
+}
+
+/*
+ * __cds_wfcq_dequeue_with_state_blocking: dequeue node from queue, with state.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * It is valid to reuse and free a dequeued node immediately.
+ * Dequeue/splice/iteration mutual exclusion should be ensured by the
+ * caller.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_dequeue_with_state_blocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail, int *state)
+{
+ return ___cds_wfcq_dequeue_with_state(head, tail, state, 1);
+}
+
+/*
+ * ___cds_wfcq_dequeue_blocking: dequeue node from queue.
+ *
+ * Same as __cds_wfcq_dequeue_with_state_blocking, but without saving
+ * state.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_dequeue_blocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail)
+{
+ return ___cds_wfcq_dequeue_with_state_blocking(head, tail, NULL);
+}
+
+/*
+ * __cds_wfcq_dequeue_with_state_nonblocking: dequeue node, with state.
+ *
+ * Same as __cds_wfcq_dequeue_blocking, but returns CDS_WFCQ_WOULDBLOCK
+ * if it needs to block.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_dequeue_with_state_nonblocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail, int *state)
+{
+ return ___cds_wfcq_dequeue_with_state(head, tail, state, 0);
+}
+
+/*
+ * ___cds_wfcq_dequeue_nonblocking: dequeue node from queue.
+ *
+ * Same as __cds_wfcq_dequeue_with_state_nonblocking, but without saving
+ * state.
+ */
+static inline struct cds_wfcq_node *
+___cds_wfcq_dequeue_nonblocking(cds_wfcq_head_ptr_t head,
+ struct cds_wfcq_tail *tail)
+{
+ return ___cds_wfcq_dequeue_with_state_nonblocking(head, tail, NULL);
+}
+
+/*
+ * __cds_wfcq_splice: enqueue all src_q nodes at the end of dest_q.
+ *
+ * Dequeue all nodes from src_q.
+ * dest_q must be already initialized.
+ * Mutual exclusion for src_q should be ensured by the caller as
+ * specified in the "Synchronisation table".
+ * Returns enum cds_wfcq_ret which indicates the state of the src or
+ * dest queue.
+ */
+static inline enum cds_wfcq_ret
+___cds_wfcq_splice(
+ cds_wfcq_head_ptr_t u_dest_q_head,
+ struct cds_wfcq_tail *dest_q_tail,
+ cds_wfcq_head_ptr_t u_src_q_head,
+ struct cds_wfcq_tail *src_q_tail,
+ int blocking)
+{
+ struct __cds_wfcq_head *dest_q_head = u_dest_q_head._h;
+ struct __cds_wfcq_head *src_q_head = u_src_q_head._h;
+ struct cds_wfcq_node *head, *tail;
+ int attempt = 0;
+
+ /*
+ * Initial emptiness check to speed up cases where queue is
+ * empty: only require loads to check if queue is empty.
+ */
+ if (_cds_wfcq_empty(__cds_wfcq_head_cast(src_q_head), src_q_tail))
+ return CDS_WFCQ_RET_SRC_EMPTY;
+
+ for (;;) {
+ /*
+ * Open-coded _cds_wfcq_empty() by testing result of
+ * uatomic_xchg, as well as tail pointer vs head node
+ * address.
+ */
+ head = uatomic_xchg(&src_q_head->node.next, NULL);
+ if (head)
+ break; /* non-empty */
+ if (CMM_LOAD_SHARED(src_q_tail->p) == &src_q_head->node)
+ return CDS_WFCQ_RET_SRC_EMPTY;
+ if (___cds_wfcq_busy_wait(&attempt, blocking))
+ return CDS_WFCQ_RET_WOULDBLOCK;
+ }
+
+ /*
+ * Memory barrier implied before uatomic_xchg() orders store to
+ * src_q->head before store to src_q->tail. This is required by
+ * concurrent enqueue on src_q, which exchanges the tail before
+ * updating the previous tail's next pointer.
+ */
+ tail = uatomic_xchg(&src_q_tail->p, &src_q_head->node);
+
+ /*
+ * Append the spliced content of src_q into dest_q. Does not
+ * require mutual exclusion on dest_q (wait-free).
+ */
+ if (___cds_wfcq_append(__cds_wfcq_head_cast(dest_q_head), dest_q_tail,
+ head, tail))
+ return CDS_WFCQ_RET_DEST_NON_EMPTY;
+ else
+ return CDS_WFCQ_RET_DEST_EMPTY;
+}
+
+/*
+ * __cds_wfcq_splice_blocking: enqueue all src_q nodes at the end of dest_q.
+ *
+ * Dequeue all nodes from src_q.
+ * dest_q must be already initialized.
+ * Mutual exclusion for src_q should be ensured by the caller as
+ * specified in the "Synchronisation table".
+ * Returns enum cds_wfcq_ret which indicates the state of the src or
+ * dest queue. Never returns CDS_WFCQ_RET_WOULDBLOCK.
+ */
+static inline enum cds_wfcq_ret
+___cds_wfcq_splice_blocking(
+ cds_wfcq_head_ptr_t dest_q_head,
+ struct cds_wfcq_tail *dest_q_tail,
+ cds_wfcq_head_ptr_t src_q_head,
+ struct cds_wfcq_tail *src_q_tail)
+{
+ return ___cds_wfcq_splice(dest_q_head, dest_q_tail,
+ src_q_head, src_q_tail, 1);
+}
+
+/*
+ * __cds_wfcq_splice_nonblocking: enqueue all src_q nodes at the end of dest_q.
+ *
+ * Same as __cds_wfcq_splice_blocking, but returns
+ * CDS_WFCQ_RET_WOULDBLOCK if it needs to block.
+ */
+static inline enum cds_wfcq_ret
+___cds_wfcq_splice_nonblocking(
+ cds_wfcq_head_ptr_t dest_q_head,
+ struct cds_wfcq_tail *dest_q_tail,
+ cds_wfcq_head_ptr_t src_q_head,
+ struct cds_wfcq_tail *src_q_tail)
+{
+ return ___cds_wfcq_splice(dest_q_head, dest_q_tail,
+ src_q_head, src_q_tail, 0);
+}
+
+/*
+ * cds_wfcq_dequeue_with_state_blocking: dequeue a node from a wait-free queue.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Mutual exclusion with cds_wfcq_splice_blocking and dequeue lock is
+ * ensured.
+ * It is valid to reuse and free a dequeued node immediately.
+ */
+static inline struct cds_wfcq_node *
+_cds_wfcq_dequeue_with_state_blocking(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail, int *state)
+{
+ struct cds_wfcq_node *retval;
+
+ _cds_wfcq_dequeue_lock(head, tail);
+ retval = ___cds_wfcq_dequeue_with_state_blocking(cds_wfcq_head_cast(head),
+ tail, state);
+ _cds_wfcq_dequeue_unlock(head, tail);
+ return retval;
+}
+
+/*
+ * cds_wfcq_dequeue_blocking: dequeue node from queue.
+ *
+ * Same as cds_wfcq_dequeue_blocking, but without saving state.
+ */
+static inline struct cds_wfcq_node *
+_cds_wfcq_dequeue_blocking(struct cds_wfcq_head *head,
+ struct cds_wfcq_tail *tail)
+{
+ return _cds_wfcq_dequeue_with_state_blocking(head, tail, NULL);
+}
+
+/*
+ * cds_wfcq_splice_blocking: enqueue all src_q nodes at the end of dest_q.
+ *
+ * Dequeue all nodes from src_q.
+ * dest_q must be already initialized.
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Mutual exclusion with cds_wfcq_dequeue_blocking and dequeue lock is
+ * ensured.
+ * Returns enum cds_wfcq_ret which indicates the state of the src or
+ * dest queue. Never returns CDS_WFCQ_RET_WOULDBLOCK.
+ */
+static inline enum cds_wfcq_ret
+_cds_wfcq_splice_blocking(
+ struct cds_wfcq_head *dest_q_head,
+ struct cds_wfcq_tail *dest_q_tail,
+ struct cds_wfcq_head *src_q_head,
+ struct cds_wfcq_tail *src_q_tail)
+{
+ enum cds_wfcq_ret ret;
+
+ _cds_wfcq_dequeue_lock(src_q_head, src_q_tail);
+ ret = ___cds_wfcq_splice_blocking(cds_wfcq_head_cast(dest_q_head), dest_q_tail,
+ cds_wfcq_head_cast(src_q_head), src_q_tail);
+ _cds_wfcq_dequeue_unlock(src_q_head, src_q_tail);
+ return ret;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _URCU_WFCQUEUE_STATIC_H */
diff --git a/contrib/userspace-rcu/static-wfstack.h b/contrib/userspace-rcu/static-wfstack.h
new file mode 100644
index 00000000000..29b81c3aac3
--- /dev/null
+++ b/contrib/userspace-rcu/static-wfstack.h
@@ -0,0 +1,455 @@
+#ifndef _URCU_STATIC_WFSTACK_H
+#define _URCU_STATIC_WFSTACK_H
+
+/*
+ * urcu/static/wfstack.h
+ *
+ * Userspace RCU library - Stack with with wait-free push, blocking traversal.
+ *
+ * TO BE INCLUDED ONLY IN LGPL-COMPATIBLE CODE. See urcu/wfstack.h for
+ * linking dynamically with the userspace rcu library.
+ *
+ * Copyright 2010-2012 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Adapted from userspace-rcu 0.10 because version 0.7 doesn't support a stack
+ * without mutex. */
+
+#include <pthread.h>
+#include <assert.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <urcu/compiler.h>
+#include <urcu/uatomic.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define CDS_WFS_END ((void *) 0x1UL)
+#define CDS_WFS_ADAPT_ATTEMPTS 10 /* Retry if being set */
+#define CDS_WFS_WAIT 10 /* Wait 10 ms if being set */
+
+/*
+ * Stack with wait-free push, blocking traversal.
+ *
+ * Stack implementing push, pop, pop_all operations, as well as iterator
+ * on the stack head returned by pop_all.
+ *
+ * Wait-free operations: cds_wfs_push, __cds_wfs_pop_all, cds_wfs_empty,
+ * cds_wfs_first.
+ * Blocking operations: cds_wfs_pop, cds_wfs_pop_all, cds_wfs_next,
+ * iteration on stack head returned by pop_all.
+ *
+ * Synchronization table:
+ *
+ * External synchronization techniques described in the API below is
+ * required between pairs marked with "X". No external synchronization
+ * required between pairs marked with "-".
+ *
+ * cds_wfs_push __cds_wfs_pop __cds_wfs_pop_all
+ * cds_wfs_push - - -
+ * __cds_wfs_pop - X X
+ * __cds_wfs_pop_all - X -
+ *
+ * cds_wfs_pop and cds_wfs_pop_all use an internal mutex to provide
+ * synchronization.
+ */
+
+/*
+ * cds_wfs_node_init: initialize wait-free stack node.
+ */
+static inline
+void _cds_wfs_node_init(struct cds_wfs_node *node)
+{
+ node->next = NULL;
+}
+
+/*
+ * __cds_wfs_init: initialize wait-free stack. Don't pair with
+ * any destroy function.
+ */
+static inline void ___cds_wfs_init(struct __cds_wfs_stack *s)
+{
+ s->head = CDS_WFS_END;
+}
+
+/*
+ * cds_wfs_init: initialize wait-free stack. Pair with
+ * cds_wfs_destroy().
+ */
+static inline
+void _cds_wfs_init(struct cds_wfs_stack *s)
+{
+ int ret;
+
+ s->head = CDS_WFS_END;
+ ret = pthread_mutex_init(&s->lock, NULL);
+ assert(!ret);
+}
+
+/*
+ * cds_wfs_destroy: destroy wait-free stack. Pair with
+ * cds_wfs_init().
+ */
+static inline
+void _cds_wfs_destroy(struct cds_wfs_stack *s)
+{
+ int ret = pthread_mutex_destroy(&s->lock);
+ assert(!ret);
+}
+
+static inline bool ___cds_wfs_end(void *node)
+{
+ return node == CDS_WFS_END;
+}
+
+/*
+ * cds_wfs_empty: return whether wait-free stack is empty.
+ *
+ * No memory barrier is issued. No mutual exclusion is required.
+ */
+static inline bool _cds_wfs_empty(cds_wfs_stack_ptr_t u_stack)
+{
+ struct __cds_wfs_stack *s = u_stack._s;
+
+ return ___cds_wfs_end(CMM_LOAD_SHARED(s->head));
+}
+
+/*
+ * cds_wfs_push: push a node into the stack.
+ *
+ * Issues a full memory barrier before push. No mutual exclusion is
+ * required.
+ *
+ * Returns 0 if the stack was empty prior to adding the node.
+ * Returns non-zero otherwise.
+ */
+static inline
+int _cds_wfs_push(cds_wfs_stack_ptr_t u_stack, struct cds_wfs_node *node)
+{
+ struct __cds_wfs_stack *s = u_stack._s;
+ struct cds_wfs_head *old_head, *new_head;
+
+ assert(node->next == NULL);
+ new_head = caa_container_of(node, struct cds_wfs_head, node);
+ /*
+ * uatomic_xchg() implicit memory barrier orders earlier stores
+ * to node (setting it to NULL) before publication.
+ */
+ old_head = uatomic_xchg(&s->head, new_head);
+ /*
+ * At this point, dequeuers see a NULL node->next, they should
+ * busy-wait until node->next is set to old_head.
+ */
+ CMM_STORE_SHARED(node->next, &old_head->node);
+ return !___cds_wfs_end(old_head);
+}
+
+/*
+ * Waiting for push to complete enqueue and return the next node.
+ */
+static inline struct cds_wfs_node *
+___cds_wfs_node_sync_next(struct cds_wfs_node *node, int blocking)
+{
+ struct cds_wfs_node *next;
+ int attempt = 0;
+
+ /*
+ * Adaptative busy-looping waiting for push to complete.
+ */
+ while ((next = CMM_LOAD_SHARED(node->next)) == NULL) {
+ if (!blocking)
+ return CDS_WFS_WOULDBLOCK;
+ if (++attempt >= CDS_WFS_ADAPT_ATTEMPTS) {
+ (void) poll(NULL, 0, CDS_WFS_WAIT); /* Wait for 10ms */
+ attempt = 0;
+ } else {
+ caa_cpu_relax();
+ }
+ }
+
+ return next;
+}
+
+static inline
+struct cds_wfs_node *
+___cds_wfs_pop(cds_wfs_stack_ptr_t u_stack, int *state, int blocking)
+{
+ struct cds_wfs_head *head, *new_head;
+ struct cds_wfs_node *next;
+ struct __cds_wfs_stack *s = u_stack._s;
+
+ if (state)
+ *state = 0;
+ for (;;) {
+ head = CMM_LOAD_SHARED(s->head);
+ if (___cds_wfs_end(head)) {
+ return NULL;
+ }
+ next = ___cds_wfs_node_sync_next(&head->node, blocking);
+ if (!blocking && next == CDS_WFS_WOULDBLOCK) {
+ return CDS_WFS_WOULDBLOCK;
+ }
+ new_head = caa_container_of(next, struct cds_wfs_head, node);
+ if (uatomic_cmpxchg(&s->head, head, new_head) == head) {
+ if (state && ___cds_wfs_end(new_head))
+ *state |= CDS_WFS_STATE_LAST;
+ return &head->node;
+ }
+ if (!blocking) {
+ return CDS_WFS_WOULDBLOCK;
+ }
+ /* busy-loop if head changed under us */
+ }
+}
+
+/*
+ * __cds_wfs_pop_with_state_blocking: pop a node from the stack, with state.
+ *
+ * Returns NULL if stack is empty.
+ *
+ * __cds_wfs_pop_blocking needs to be synchronized using one of the
+ * following techniques:
+ *
+ * 1) Calling __cds_wfs_pop_blocking under rcu read lock critical
+ * section. The caller must wait for a grace period to pass before
+ * freeing the returned node or modifying the cds_wfs_node structure.
+ * 2) Using mutual exclusion (e.g. mutexes) to protect
+ * __cds_wfs_pop_blocking and __cds_wfs_pop_all callers.
+ * 3) Ensuring that only ONE thread can call __cds_wfs_pop_blocking()
+ * and __cds_wfs_pop_all(). (multi-provider/single-consumer scheme).
+ *
+ * "state" saves state flags atomically sampled with pop operation.
+ */
+static inline
+struct cds_wfs_node *
+___cds_wfs_pop_with_state_blocking(cds_wfs_stack_ptr_t u_stack, int *state)
+{
+ return ___cds_wfs_pop(u_stack, state, 1);
+}
+
+static inline
+struct cds_wfs_node *
+___cds_wfs_pop_blocking(cds_wfs_stack_ptr_t u_stack)
+{
+ return ___cds_wfs_pop_with_state_blocking(u_stack, NULL);
+}
+
+/*
+ * __cds_wfs_pop_with_state_nonblocking: pop a node from the stack.
+ *
+ * Same as __cds_wfs_pop_with_state_blocking, but returns
+ * CDS_WFS_WOULDBLOCK if it needs to block.
+ *
+ * "state" saves state flags atomically sampled with pop operation.
+ */
+static inline
+struct cds_wfs_node *
+___cds_wfs_pop_with_state_nonblocking(cds_wfs_stack_ptr_t u_stack, int *state)
+{
+ return ___cds_wfs_pop(u_stack, state, 0);
+}
+
+/*
+ * __cds_wfs_pop_nonblocking: pop a node from the stack.
+ *
+ * Same as __cds_wfs_pop_blocking, but returns CDS_WFS_WOULDBLOCK if
+ * it needs to block.
+ */
+static inline
+struct cds_wfs_node *
+___cds_wfs_pop_nonblocking(cds_wfs_stack_ptr_t u_stack)
+{
+ return ___cds_wfs_pop_with_state_nonblocking(u_stack, NULL);
+}
+
+/*
+ * __cds_wfs_pop_all: pop all nodes from a stack.
+ *
+ * __cds_wfs_pop_all does not require any synchronization with other
+ * push, nor with other __cds_wfs_pop_all, but requires synchronization
+ * matching the technique used to synchronize __cds_wfs_pop_blocking:
+ *
+ * 1) If __cds_wfs_pop_blocking is called under rcu read lock critical
+ * section, both __cds_wfs_pop_blocking and cds_wfs_pop_all callers
+ * must wait for a grace period to pass before freeing the returned
+ * node or modifying the cds_wfs_node structure. However, no RCU
+ * read-side critical section is needed around __cds_wfs_pop_all.
+ * 2) Using mutual exclusion (e.g. mutexes) to protect
+ * __cds_wfs_pop_blocking and __cds_wfs_pop_all callers.
+ * 3) Ensuring that only ONE thread can call __cds_wfs_pop_blocking()
+ * and __cds_wfs_pop_all(). (multi-provider/single-consumer scheme).
+ */
+static inline
+struct cds_wfs_head *
+___cds_wfs_pop_all(cds_wfs_stack_ptr_t u_stack)
+{
+ struct __cds_wfs_stack *s = u_stack._s;
+ struct cds_wfs_head *head;
+
+ /*
+ * Implicit memory barrier after uatomic_xchg() matches implicit
+ * memory barrier before uatomic_xchg() in cds_wfs_push. It
+ * ensures that all nodes of the returned list are consistent.
+ * There is no need to issue memory barriers when iterating on
+ * the returned list, because the full memory barrier issued
+ * prior to each uatomic_cmpxchg, which each write to head, are
+ * taking care to order writes to each node prior to the full
+ * memory barrier after this uatomic_xchg().
+ */
+ head = uatomic_xchg(&s->head, CDS_WFS_END);
+ if (___cds_wfs_end(head))
+ return NULL;
+ return head;
+}
+
+/*
+ * cds_wfs_pop_lock: lock stack pop-protection mutex.
+ */
+static inline void _cds_wfs_pop_lock(struct cds_wfs_stack *s)
+{
+ int ret;
+
+ ret = pthread_mutex_lock(&s->lock);
+ assert(!ret);
+}
+
+/*
+ * cds_wfs_pop_unlock: unlock stack pop-protection mutex.
+ */
+static inline void _cds_wfs_pop_unlock(struct cds_wfs_stack *s)
+{
+ int ret;
+
+ ret = pthread_mutex_unlock(&s->lock);
+ assert(!ret);
+}
+
+/*
+ * Call __cds_wfs_pop_with_state_blocking with an internal pop mutex held.
+ */
+static inline
+struct cds_wfs_node *
+_cds_wfs_pop_with_state_blocking(struct cds_wfs_stack *s, int *state)
+{
+ struct cds_wfs_node *retnode;
+
+ _cds_wfs_pop_lock(s);
+ retnode = ___cds_wfs_pop_with_state_blocking(s, state);
+ _cds_wfs_pop_unlock(s);
+ return retnode;
+}
+
+/*
+ * Call _cds_wfs_pop_with_state_blocking without saving any state.
+ */
+static inline
+struct cds_wfs_node *
+_cds_wfs_pop_blocking(struct cds_wfs_stack *s)
+{
+ return _cds_wfs_pop_with_state_blocking(s, NULL);
+}
+
+/*
+ * Call __cds_wfs_pop_all with an internal pop mutex held.
+ */
+static inline
+struct cds_wfs_head *
+_cds_wfs_pop_all_blocking(struct cds_wfs_stack *s)
+{
+ struct cds_wfs_head *rethead;
+
+ _cds_wfs_pop_lock(s);
+ rethead = ___cds_wfs_pop_all(s);
+ _cds_wfs_pop_unlock(s);
+ return rethead;
+}
+
+/*
+ * cds_wfs_first: get first node of a popped stack.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ *
+ * Used by for-like iteration macros in urcu/wfstack.h:
+ * cds_wfs_for_each_blocking()
+ * cds_wfs_for_each_blocking_safe()
+ *
+ * Returns NULL if popped stack is empty, top stack node otherwise.
+ */
+static inline struct cds_wfs_node *
+_cds_wfs_first(struct cds_wfs_head *head)
+{
+ if (___cds_wfs_end(head))
+ return NULL;
+ return &head->node;
+}
+
+static inline struct cds_wfs_node *
+___cds_wfs_next(struct cds_wfs_node *node, int blocking)
+{
+ struct cds_wfs_node *next;
+
+ next = ___cds_wfs_node_sync_next(node, blocking);
+ /*
+ * CDS_WFS_WOULDBLOCK != CSD_WFS_END, so we can check for end
+ * even if ___cds_wfs_node_sync_next returns CDS_WFS_WOULDBLOCK,
+ * and still return CDS_WFS_WOULDBLOCK.
+ */
+ if (___cds_wfs_end(next))
+ return NULL;
+ return next;
+}
+
+/*
+ * cds_wfs_next_blocking: get next node of a popped stack.
+ *
+ * Content written into the node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ *
+ * Used by for-like iteration macros in urcu/wfstack.h:
+ * cds_wfs_for_each_blocking()
+ * cds_wfs_for_each_blocking_safe()
+ *
+ * Returns NULL if reached end of popped stack, non-NULL next stack
+ * node otherwise.
+ */
+static inline struct cds_wfs_node *
+_cds_wfs_next_blocking(struct cds_wfs_node *node)
+{
+ return ___cds_wfs_next(node, 1);
+}
+
+
+/*
+ * cds_wfs_next_nonblocking: get next node of a popped stack.
+ *
+ * Same as cds_wfs_next_blocking, but returns CDS_WFS_WOULDBLOCK if it
+ * needs to block.
+ */
+static inline struct cds_wfs_node *
+_cds_wfs_next_nonblocking(struct cds_wfs_node *node)
+{
+ return ___cds_wfs_next(node, 0);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _URCU_STATIC_WFSTACK_H */
diff --git a/contrib/userspace-rcu/wfcqueue.h b/contrib/userspace-rcu/wfcqueue.h
new file mode 100644
index 00000000000..0292585ac79
--- /dev/null
+++ b/contrib/userspace-rcu/wfcqueue.h
@@ -0,0 +1,216 @@
+#ifndef _URCU_WFCQUEUE_H
+#define _URCU_WFCQUEUE_H
+
+/*
+ * urcu/wfcqueue.h
+ *
+ * Userspace RCU library - Concurrent Queue with Wait-Free Enqueue/Blocking Dequeue
+ *
+ * Copyright 2010-2012 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * Copyright 2011-2012 - Lai Jiangshan <laijs@cn.fujitsu.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Adapted from userspace-rcu 0.10 because version 0.7 doesn't contain it.
+ * The non-LGPL section has been removed. */
+
+#include <pthread.h>
+#include <assert.h>
+#include <stdbool.h>
+#include <urcu/compiler.h>
+#include <urcu/arch.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Concurrent queue with wait-free enqueue/blocking dequeue.
+ *
+ * This queue has been designed and implemented collaboratively by
+ * Mathieu Desnoyers and Lai Jiangshan. Inspired from
+ * half-wait-free/half-blocking queue implementation done by Paul E.
+ * McKenney.
+ */
+
+#define CDS_WFCQ_WOULDBLOCK ((struct cds_wfcq_node *) -1UL)
+
+enum cds_wfcq_ret {
+ CDS_WFCQ_RET_WOULDBLOCK = -1,
+ CDS_WFCQ_RET_DEST_EMPTY = 0,
+ CDS_WFCQ_RET_DEST_NON_EMPTY = 1,
+ CDS_WFCQ_RET_SRC_EMPTY = 2,
+};
+
+enum cds_wfcq_state {
+ CDS_WFCQ_STATE_LAST = (1U << 0),
+};
+
+struct cds_wfcq_node {
+ struct cds_wfcq_node *next;
+};
+
+/*
+ * Do not put head and tail on the same cache-line if concurrent
+ * enqueue/dequeue are expected from many CPUs. This eliminates
+ * false-sharing between enqueue and dequeue.
+ */
+struct __cds_wfcq_head {
+ struct cds_wfcq_node node;
+};
+
+struct cds_wfcq_head {
+ struct cds_wfcq_node node;
+ pthread_mutex_t lock;
+};
+
+#ifndef __cplusplus
+/*
+ * The transparent union allows calling functions that work on both
+ * struct cds_wfcq_head and struct __cds_wfcq_head on any of those two
+ * types.
+ */
+typedef union {
+ struct __cds_wfcq_head *_h;
+ struct cds_wfcq_head *h;
+} __attribute__((__transparent_union__)) cds_wfcq_head_ptr_t;
+
+/*
+ * This static inline is only present for compatibility with C++. It is
+ * effect-less in C.
+ */
+static inline struct __cds_wfcq_head *__cds_wfcq_head_cast(struct __cds_wfcq_head *head)
+{
+ return head;
+}
+
+/*
+ * This static inline is only present for compatibility with C++. It is
+ * effect-less in C.
+ */
+static inline struct cds_wfcq_head *cds_wfcq_head_cast(struct cds_wfcq_head *head)
+{
+ return head;
+}
+#else /* #ifndef __cplusplus */
+
+/* C++ ignores transparent union. */
+typedef union {
+ struct __cds_wfcq_head *_h;
+ struct cds_wfcq_head *h;
+} cds_wfcq_head_ptr_t;
+
+/* C++ ignores transparent union. Requires an explicit conversion. */
+static inline cds_wfcq_head_ptr_t __cds_wfcq_head_cast(struct __cds_wfcq_head *head)
+{
+ cds_wfcq_head_ptr_t ret = { ._h = head };
+ return ret;
+}
+/* C++ ignores transparent union. Requires an explicit conversion. */
+static inline cds_wfcq_head_ptr_t cds_wfcq_head_cast(struct cds_wfcq_head *head)
+{
+ cds_wfcq_head_ptr_t ret = { .h = head };
+ return ret;
+}
+#endif /* #else #ifndef __cplusplus */
+
+struct cds_wfcq_tail {
+ struct cds_wfcq_node *p;
+};
+
+#include "static-wfcqueue.h"
+
+#define cds_wfcq_node_init _cds_wfcq_node_init
+#define cds_wfcq_init _cds_wfcq_init
+#define __cds_wfcq_init ___cds_wfcq_init
+#define cds_wfcq_destroy _cds_wfcq_destroy
+#define cds_wfcq_empty _cds_wfcq_empty
+#define cds_wfcq_enqueue _cds_wfcq_enqueue
+
+/* Dequeue locking */
+#define cds_wfcq_dequeue_lock _cds_wfcq_dequeue_lock
+#define cds_wfcq_dequeue_unlock _cds_wfcq_dequeue_unlock
+
+/* Locking performed within cds_wfcq calls. */
+#define cds_wfcq_dequeue_blocking _cds_wfcq_dequeue_blocking
+#define cds_wfcq_dequeue_with_state_blocking \
+ _cds_wfcq_dequeue_with_state_blocking
+#define cds_wfcq_splice_blocking _cds_wfcq_splice_blocking
+#define cds_wfcq_first_blocking _cds_wfcq_first_blocking
+#define cds_wfcq_next_blocking _cds_wfcq_next_blocking
+
+/* Locking ensured by caller by holding cds_wfcq_dequeue_lock() */
+#define __cds_wfcq_dequeue_blocking ___cds_wfcq_dequeue_blocking
+#define __cds_wfcq_dequeue_with_state_blocking \
+ ___cds_wfcq_dequeue_with_state_blocking
+#define __cds_wfcq_splice_blocking ___cds_wfcq_splice_blocking
+#define __cds_wfcq_first_blocking ___cds_wfcq_first_blocking
+#define __cds_wfcq_next_blocking ___cds_wfcq_next_blocking
+
+/*
+ * Locking ensured by caller by holding cds_wfcq_dequeue_lock().
+ * Non-blocking: deque, first, next return CDS_WFCQ_WOULDBLOCK if they
+ * need to block. splice returns nonzero if it needs to block.
+ */
+#define __cds_wfcq_dequeue_nonblocking ___cds_wfcq_dequeue_nonblocking
+#define __cds_wfcq_dequeue_with_state_nonblocking \
+ ___cds_wfcq_dequeue_with_state_nonblocking
+#define __cds_wfcq_splice_nonblocking ___cds_wfcq_splice_nonblocking
+#define __cds_wfcq_first_nonblocking ___cds_wfcq_first_nonblocking
+#define __cds_wfcq_next_nonblocking ___cds_wfcq_next_nonblocking
+
+/*
+ * __cds_wfcq_for_each_blocking: Iterate over all nodes in a queue,
+ * without dequeuing them.
+ * @head: head of the queue (struct cds_wfcq_head or __cds_wfcq_head pointer).
+ * @tail: tail of the queue (struct cds_wfcq_tail pointer).
+ * @node: iterator on the queue (struct cds_wfcq_node pointer).
+ *
+ * Content written into each node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Dequeue/splice/iteration mutual exclusion should be ensured by the
+ * caller.
+ */
+#define __cds_wfcq_for_each_blocking(head, tail, node) \
+ for (node = __cds_wfcq_first_blocking(head, tail); \
+ node != NULL; \
+ node = __cds_wfcq_next_blocking(head, tail, node))
+
+/*
+ * __cds_wfcq_for_each_blocking_safe: Iterate over all nodes in a queue,
+ * without dequeuing them. Safe against deletion.
+ * @head: head of the queue (struct cds_wfcq_head or __cds_wfcq_head pointer).
+ * @tail: tail of the queue (struct cds_wfcq_tail pointer).
+ * @node: iterator on the queue (struct cds_wfcq_node pointer).
+ * @n: struct cds_wfcq_node pointer holding the next pointer (used
+ * internally).
+ *
+ * Content written into each node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ * Dequeue/splice/iteration mutual exclusion should be ensured by the
+ * caller.
+ */
+#define __cds_wfcq_for_each_blocking_safe(head, tail, node, n) \
+ for (node = __cds_wfcq_first_blocking(head, tail), \
+ n = (node ? __cds_wfcq_next_blocking(head, tail, node) : NULL); \
+ node != NULL; \
+ node = n, n = (node ? __cds_wfcq_next_blocking(head, tail, node) : NULL))
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _URCU_WFCQUEUE_H */
diff --git a/contrib/userspace-rcu/wfstack.h b/contrib/userspace-rcu/wfstack.h
new file mode 100644
index 00000000000..738fd1cfd33
--- /dev/null
+++ b/contrib/userspace-rcu/wfstack.h
@@ -0,0 +1,178 @@
+#ifndef _URCU_WFSTACK_H
+#define _URCU_WFSTACK_H
+
+/*
+ * urcu/wfstack.h
+ *
+ * Userspace RCU library - Stack with wait-free push, blocking traversal.
+ *
+ * Copyright 2010-2012 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Adapted from userspace-rcu 0.10 because version 0.7 doesn't support a stack
+ * without mutex. The non-LGPL section has been removed. */
+
+#include <pthread.h>
+#include <assert.h>
+#include <stdbool.h>
+#include <urcu/compiler.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Stack with wait-free push, blocking traversal.
+ *
+ * Stack implementing push, pop, pop_all operations, as well as iterator
+ * on the stack head returned by pop_all.
+ *
+ * Wait-free operations: cds_wfs_push, __cds_wfs_pop_all, cds_wfs_empty,
+ * cds_wfs_first.
+ * Blocking operations: cds_wfs_pop, cds_wfs_pop_all, cds_wfs_next,
+ * iteration on stack head returned by pop_all.
+ *
+ * Synchronization table:
+ *
+ * External synchronization techniques described in the API below is
+ * required between pairs marked with "X". No external synchronization
+ * required between pairs marked with "-".
+ *
+ * cds_wfs_push __cds_wfs_pop __cds_wfs_pop_all
+ * cds_wfs_push - - -
+ * __cds_wfs_pop - X X
+ * __cds_wfs_pop_all - X -
+ *
+ * cds_wfs_pop and cds_wfs_pop_all use an internal mutex to provide
+ * synchronization.
+ */
+
+#define CDS_WFS_WOULDBLOCK ((void *) -1UL)
+
+enum cds_wfs_state {
+ CDS_WFS_STATE_LAST = (1U << 0),
+};
+
+/*
+ * struct cds_wfs_node is returned by __cds_wfs_pop, and also used as
+ * iterator on stack. It is not safe to dereference the node next
+ * pointer when returned by __cds_wfs_pop_blocking.
+ */
+struct cds_wfs_node {
+ struct cds_wfs_node *next;
+};
+
+/*
+ * struct cds_wfs_head is returned by __cds_wfs_pop_all, and can be used
+ * to begin iteration on the stack. "node" needs to be the first field of
+ * cds_wfs_head, so the end-of-stack pointer value can be used for both
+ * types.
+ */
+struct cds_wfs_head {
+ struct cds_wfs_node node;
+};
+
+struct __cds_wfs_stack {
+ struct cds_wfs_head *head;
+};
+
+struct cds_wfs_stack {
+ struct cds_wfs_head *head;
+ pthread_mutex_t lock;
+};
+
+/*
+ * The transparent union allows calling functions that work on both
+ * struct cds_wfs_stack and struct __cds_wfs_stack on any of those two
+ * types.
+ */
+typedef union {
+ struct __cds_wfs_stack *_s;
+ struct cds_wfs_stack *s;
+} __attribute__((__transparent_union__)) cds_wfs_stack_ptr_t;
+
+#include "static-wfstack.h"
+
+#define cds_wfs_node_init _cds_wfs_node_init
+#define cds_wfs_init _cds_wfs_init
+#define cds_wfs_destroy _cds_wfs_destroy
+#define __cds_wfs_init ___cds_wfs_init
+#define cds_wfs_empty _cds_wfs_empty
+#define cds_wfs_push _cds_wfs_push
+
+/* Locking performed internally */
+#define cds_wfs_pop_blocking _cds_wfs_pop_blocking
+#define cds_wfs_pop_with_state_blocking _cds_wfs_pop_with_state_blocking
+#define cds_wfs_pop_all_blocking _cds_wfs_pop_all_blocking
+
+/*
+ * For iteration on cds_wfs_head returned by __cds_wfs_pop_all or
+ * cds_wfs_pop_all_blocking.
+ */
+#define cds_wfs_first _cds_wfs_first
+#define cds_wfs_next_blocking _cds_wfs_next_blocking
+#define cds_wfs_next_nonblocking _cds_wfs_next_nonblocking
+
+/* Pop locking with internal mutex */
+#define cds_wfs_pop_lock _cds_wfs_pop_lock
+#define cds_wfs_pop_unlock _cds_wfs_pop_unlock
+
+/* Synchronization ensured by the caller. See synchronization table. */
+#define __cds_wfs_pop_blocking ___cds_wfs_pop_blocking
+#define __cds_wfs_pop_with_state_blocking \
+ ___cds_wfs_pop_with_state_blocking
+#define __cds_wfs_pop_nonblocking ___cds_wfs_pop_nonblocking
+#define __cds_wfs_pop_with_state_nonblocking \
+ ___cds_wfs_pop_with_state_nonblocking
+#define __cds_wfs_pop_all ___cds_wfs_pop_all
+
+#ifdef __cplusplus
+}
+#endif
+
+/*
+ * cds_wfs_for_each_blocking: Iterate over all nodes returned by
+ * __cds_wfs_pop_all().
+ * @head: head of the queue (struct cds_wfs_head pointer).
+ * @node: iterator (struct cds_wfs_node pointer).
+ *
+ * Content written into each node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ */
+#define cds_wfs_for_each_blocking(head, node) \
+ for (node = cds_wfs_first(head); \
+ node != NULL; \
+ node = cds_wfs_next_blocking(node))
+
+/*
+ * cds_wfs_for_each_blocking_safe: Iterate over all nodes returned by
+ * __cds_wfs_pop_all(). Safe against deletion.
+ * @head: head of the queue (struct cds_wfs_head pointer).
+ * @node: iterator (struct cds_wfs_node pointer).
+ * @n: struct cds_wfs_node pointer holding the next pointer (used
+ * internally).
+ *
+ * Content written into each node before enqueue is guaranteed to be
+ * consistent, but no other memory ordering is ensured.
+ */
+#define cds_wfs_for_each_blocking_safe(head, node, n) \
+ for (node = cds_wfs_first(head), \
+ n = (node ? cds_wfs_next_blocking(node) : NULL); \
+ node != NULL; \
+ node = n, n = (node ? cds_wfs_next_blocking(node) : NULL))
+
+#endif /* _URCU_WFSTACK_H */
diff --git a/contrib/uuid/clear.c b/contrib/uuid/clear.c
deleted file mode 100644
index 2d91fee9399..00000000000
--- a/contrib/uuid/clear.c
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- * clear.c -- Clear a UUID
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include "string.h"
-
-#include "uuidP.h"
-
-void uuid_clear(uuid_t uu)
-{
- memset(uu, 0, 16);
-}
-
diff --git a/contrib/uuid/compare.c b/contrib/uuid/compare.c
deleted file mode 100644
index f28a72678cf..00000000000
--- a/contrib/uuid/compare.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * compare.c --- compare whether or not two UUID's are the same
- *
- * Returns 0 if the two UUID's are different, and 1 if they are the same.
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include "uuidP.h"
-#include <string.h>
-
-#define UUCMP(u1,u2) if (u1 != u2) return((u1 < u2) ? -1 : 1);
-
-int uuid_compare(const uuid_t uu1, const uuid_t uu2)
-{
- struct uuid uuid1, uuid2;
-
- uuid_unpack(uu1, &uuid1);
- uuid_unpack(uu2, &uuid2);
-
- UUCMP(uuid1.time_low, uuid2.time_low);
- UUCMP(uuid1.time_mid, uuid2.time_mid);
- UUCMP(uuid1.time_hi_and_version, uuid2.time_hi_and_version);
- UUCMP(uuid1.clock_seq, uuid2.clock_seq);
- return memcmp(uuid1.node, uuid2.node, 6);
-}
-
diff --git a/contrib/uuid/copy.c b/contrib/uuid/copy.c
deleted file mode 100644
index ead33aa26e8..00000000000
--- a/contrib/uuid/copy.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * copy.c --- copy UUIDs
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include "uuidP.h"
-
-void uuid_copy(uuid_t dst, const uuid_t src)
-{
- unsigned char *cp1;
- const unsigned char *cp2;
- int i;
-
- for (i=0, cp1 = dst, cp2 = src; i < 16; i++)
- *cp1++ = *cp2++;
-}
diff --git a/contrib/uuid/gen_uuid.c b/contrib/uuid/gen_uuid.c
deleted file mode 100644
index b3eda9de387..00000000000
--- a/contrib/uuid/gen_uuid.c
+++ /dev/null
@@ -1,679 +0,0 @@
-/*
- * gen_uuid.c --- generate a DCE-compatible uuid
- *
- * Copyright (C) 1996, 1997, 1998, 1999 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-/*
- * Force inclusion of SVID stuff since we need it if we're compiling in
- * gcc-wall wall mode
- */
-#define _SVID_SOURCE
-
-#include "config.h"
-#ifdef _WIN32
-#define _WIN32_WINNT 0x0500
-#include <windows.h>
-#define UUID MYUUID
-#endif
-#include <stdio.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#ifdef HAVE_STDLIB_H
-#include <stdlib.h>
-#endif
-#include <string.h>
-#include <fcntl.h>
-#include <errno.h>
-#include <sys/types.h>
-#ifdef HAVE_SYS_TIME_H
-#include <sys/time.h>
-#endif
-#include <sys/wait.h>
-#include <sys/stat.h>
-#ifdef HAVE_SYS_FILE_H
-#include <sys/file.h>
-#endif
-#ifdef HAVE_SYS_IOCTL_H
-#include <sys/ioctl.h>
-#endif
-#ifdef HAVE_SYS_SOCKET_H
-#include <sys/socket.h>
-#endif
-#ifdef HAVE_SYS_UN_H
-#include <sys/un.h>
-#endif
-#ifdef HAVE_SYS_SOCKIO_H
-#include <sys/sockio.h>
-#endif
-#ifdef HAVE_NET_IF_H
-#include <net/if.h>
-#endif
-#ifdef HAVE_NETINET_IN_H
-#include <netinet/in.h>
-#endif
-#ifdef HAVE_NET_IF_DL_H
-#include <net/if_dl.h>
-#endif
-#if defined(__linux__) && defined(HAVE_SYS_SYSCALL_H)
-#include <sys/syscall.h>
-#endif
-#ifdef HAVE_SYS_RESOURCE_H
-#include <sys/resource.h>
-#endif
-#include <limits.h>
-
-#include "uuidP.h"
-#include "uuidd.h"
-
-#ifdef HAVE_SRANDOM
-#define srand(x) srandom(x)
-#define rand() random()
-#endif
-
-#ifdef TLS
-#define THREAD_LOCAL static TLS
-#else
-#define THREAD_LOCAL static
-#endif
-
-#if defined(__linux__) && defined(__NR_gettid) && defined(HAVE_JRAND48)
-#define DO_JRAND_MIX
-THREAD_LOCAL unsigned short jrand_seed[3];
-#endif
-
-#ifndef OPEN_MAX
-#define OPEN_MAX 1024
-#endif
-
-#ifdef _WIN32
-static void gettimeofday (struct timeval *tv, void *dummy)
-{
- FILETIME ftime;
- uint64_t n;
-
- GetSystemTimeAsFileTime (&ftime);
- n = (((uint64_t) ftime.dwHighDateTime << 32)
- + (uint64_t) ftime.dwLowDateTime);
- if (n) {
- n /= 10;
- n -= ((369 * 365 + 89) * (uint64_t) 86400) * 1000000;
- }
-
- tv->tv_sec = n / 1000000;
- tv->tv_usec = n % 1000000;
-}
-
-static int getuid (void)
-{
- return 1;
-}
-#endif
-
-static int get_random_fd(void)
-{
- struct timeval tv;
- static int fd = -2;
- int i;
-
- if (fd == -2) {
- gettimeofday(&tv, 0);
-#ifndef _WIN32
- fd = open("/dev/urandom", O_RDONLY);
- if (fd == -1)
- fd = open("/dev/random", O_RDONLY | O_NONBLOCK);
- if (fd >= 0) {
- i = fcntl(fd, F_GETFD);
- if (i >= 0)
- fcntl(fd, F_SETFD, i | FD_CLOEXEC);
- }
-#endif
- srand((getpid() << 16) ^ getuid() ^ tv.tv_sec ^ tv.tv_usec);
-#ifdef DO_JRAND_MIX
- jrand_seed[0] = getpid() ^ (tv.tv_sec & 0xFFFF);
- jrand_seed[1] = getppid() ^ (tv.tv_usec & 0xFFFF);
- jrand_seed[2] = (tv.tv_sec ^ tv.tv_usec) >> 16;
-#endif
- }
- /* Crank the random number generator a few times */
- gettimeofday(&tv, 0);
- for (i = (tv.tv_sec ^ tv.tv_usec) & 0x1F; i > 0; i--)
- rand();
- return fd;
-}
-
-
-/*
- * Generate a series of random bytes. Use /dev/urandom if possible,
- * and if not, use srandom/random.
- */
-static void get_random_bytes(void *buf, int nbytes)
-{
- int i, n = nbytes, fd = get_random_fd();
- int lose_counter = 0;
- unsigned char *cp = (unsigned char *) buf;
-#ifdef DO_JRAND_MIX
- unsigned short tmp_seed[3];
-#endif
- if (fd >= 0) {
- while (n > 0) {
- i = read(fd, cp, n);
- if (i <= 0) {
- if (lose_counter++ > 16)
- break;
- continue;
- }
- n -= i;
- cp += i;
- lose_counter = 0;
- }
- }
-
- /*
- * We do this all the time, but this is the only source of
- * randomness if /dev/random/urandom is out to lunch.
- */
- for (cp = buf, i = 0; i < nbytes; i++)
- *cp++ ^= (rand() >> 7) & 0xFF;
-#ifdef DO_JRAND_MIX
- memcpy(tmp_seed, jrand_seed, sizeof(tmp_seed));
- jrand_seed[2] = jrand_seed[2] ^ syscall(__NR_gettid);
- for (cp = buf, i = 0; i < nbytes; i++)
- *cp++ ^= (jrand48(tmp_seed) >> 7) & 0xFF;
- memcpy(jrand_seed, tmp_seed,
- sizeof(jrand_seed)-sizeof(unsigned short));
-#endif
-
- return;
-}
-
-/*
- * Get the ethernet hardware address, if we can find it...
- *
- * XXX for a windows version, probably should use GetAdaptersInfo:
- * http://www.codeguru.com/cpp/i-n/network/networkinformation/article.php/c5451
- * commenting out get_node_id just to get gen_uuid to compile under windows
- * is not the right way to go!
- */
-static int get_node_id(unsigned char *node_id)
-{
-#ifdef HAVE_NET_IF_H
- int sd;
- struct ifreq ifr, *ifrp;
- struct ifconf ifc;
- char buf[1024];
- int n, i;
- unsigned char *a;
-#ifdef HAVE_NET_IF_DL_H
- struct sockaddr_dl *sdlp;
-#endif
-
-/*
- * BSD 4.4 defines the size of an ifreq to be
- * max(sizeof(ifreq), sizeof(ifreq.ifr_name)+ifreq.ifr_addr.sa_len
- * However, under earlier systems, sa_len isn't present, so the size is
- * just sizeof(struct ifreq)
- */
-#ifdef HAVE_SA_LEN
-#ifndef max
-#define max(a,b) ((a) > (b) ? (a) : (b))
-#endif
-#define ifreq_size(i) max(sizeof(struct ifreq),\
- sizeof((i).ifr_name)+(i).ifr_addr.sa_len)
-#else
-#define ifreq_size(i) sizeof(struct ifreq)
-#endif /* HAVE_SA_LEN*/
-
- sd = socket(AF_INET, SOCK_DGRAM, IPPROTO_IP);
- if (sd < 0) {
- return -1;
- }
- memset(buf, 0, sizeof(buf));
- ifc.ifc_len = sizeof(buf);
- ifc.ifc_buf = buf;
- if (ioctl (sd, SIOCGIFCONF, (char *)&ifc) < 0) {
- close(sd);
- return -1;
- }
- n = ifc.ifc_len;
- for (i = 0; i < n; i+= ifreq_size(*ifrp) ) {
- ifrp = (struct ifreq *)((char *) ifc.ifc_buf+i);
- strncpy(ifr.ifr_name, ifrp->ifr_name, IFNAMSIZ);
-#ifdef SIOCGIFHWADDR
- if (ioctl(sd, SIOCGIFHWADDR, &ifr) < 0)
- continue;
- a = (unsigned char *) &ifr.ifr_hwaddr.sa_data;
-#else
-#ifdef SIOCGENADDR
- if (ioctl(sd, SIOCGENADDR, &ifr) < 0)
- continue;
- a = (unsigned char *) ifr.ifr_enaddr;
-#else
-#ifdef HAVE_NET_IF_DL_H
- sdlp = (struct sockaddr_dl *) &ifrp->ifr_addr;
- if ((sdlp->sdl_family != AF_LINK) || (sdlp->sdl_alen != 6))
- continue;
- a = (unsigned char *) &sdlp->sdl_data[sdlp->sdl_nlen];
-#else
- /*
- * XXX we don't have a way of getting the hardware
- * address
- */
- close(sd);
- return 0;
-#endif /* HAVE_NET_IF_DL_H */
-#endif /* SIOCGENADDR */
-#endif /* SIOCGIFHWADDR */
- if (!a[0] && !a[1] && !a[2] && !a[3] && !a[4] && !a[5])
- continue;
- if (node_id) {
- memcpy(node_id, a, 6);
- close(sd);
- return 1;
- }
- }
- close(sd);
-#endif
- return 0;
-}
-
-/* Assume that the gettimeofday() has microsecond granularity */
-#define MAX_ADJUSTMENT 10
-
-static int get_clock(uint32_t *clock_high, uint32_t *clock_low,
- uint16_t *ret_clock_seq, int *num)
-{
- THREAD_LOCAL int adjustment = 0;
- THREAD_LOCAL struct timeval last = {0, 0};
- THREAD_LOCAL int state_fd = -2;
- THREAD_LOCAL FILE *state_f;
- THREAD_LOCAL uint16_t clock_seq;
- struct timeval tv;
- struct flock fl;
- uint64_t clock_reg;
- mode_t save_umask;
- int len;
-
- if (state_fd == -2) {
- save_umask = umask(0);
- state_fd = open("/var/lib/libuuid/clock.txt",
- O_RDWR|O_CREAT, 0660);
- (void) umask(save_umask);
- state_f = fdopen(state_fd, "r+");
- if (!state_f) {
- close(state_fd);
- state_fd = -1;
- }
- }
- fl.l_type = F_WRLCK;
- fl.l_whence = SEEK_SET;
- fl.l_start = 0;
- fl.l_len = 0;
- fl.l_pid = 0;
- if (state_fd >= 0) {
- rewind(state_f);
- while (fcntl(state_fd, F_SETLKW, &fl) < 0) {
- if ((errno == EAGAIN) || (errno == EINTR))
- continue;
- fclose(state_f);
- close(state_fd);
- state_fd = -1;
- break;
- }
- }
- if (state_fd >= 0) {
- unsigned int cl;
- unsigned long tv1, tv2;
- int a;
-
- if (fscanf(state_f, "clock: %04x tv: %lu %lu adj: %d\n",
- &cl, &tv1, &tv2, &a) == 4) {
- clock_seq = cl & 0x3FFF;
- last.tv_sec = tv1;
- last.tv_usec = tv2;
- adjustment = a;
- }
- }
-
- if ((last.tv_sec == 0) && (last.tv_usec == 0)) {
- get_random_bytes(&clock_seq, sizeof(clock_seq));
- clock_seq &= 0x3FFF;
- gettimeofday(&last, 0);
- last.tv_sec--;
- }
-
-try_again:
- gettimeofday(&tv, 0);
- if ((tv.tv_sec < last.tv_sec) ||
- ((tv.tv_sec == last.tv_sec) &&
- (tv.tv_usec < last.tv_usec))) {
- clock_seq = (clock_seq+1) & 0x3FFF;
- adjustment = 0;
- last = tv;
- } else if ((tv.tv_sec == last.tv_sec) &&
- (tv.tv_usec == last.tv_usec)) {
- if (adjustment >= MAX_ADJUSTMENT)
- goto try_again;
- adjustment++;
- } else {
- adjustment = 0;
- last = tv;
- }
-
- clock_reg = tv.tv_usec*10 + adjustment;
- clock_reg += ((uint64_t) tv.tv_sec)*10000000;
- clock_reg += (((uint64_t) 0x01B21DD2) << 32) + 0x13814000;
-
- if (num && (*num > 1)) {
- adjustment += *num - 1;
- last.tv_usec += adjustment / 10;
- adjustment = adjustment % 10;
- last.tv_sec += last.tv_usec / 1000000;
- last.tv_usec = last.tv_usec % 1000000;
- }
-
- if (state_fd > 0) {
- rewind(state_f);
- len = fprintf(state_f,
- "clock: %04x tv: %016lu %08lu adj: %08d\n",
- clock_seq, last.tv_sec, last.tv_usec, adjustment);
- fflush(state_f);
- if (ftruncate(state_fd, len) < 0) {
- fprintf(state_f, " \n");
- fflush(state_f);
- }
- rewind(state_f);
- fl.l_type = F_UNLCK;
- fcntl(state_fd, F_SETLK, &fl);
- }
-
- *clock_high = clock_reg >> 32;
- *clock_low = clock_reg;
- *ret_clock_seq = clock_seq;
- return 0;
-}
-
-#if defined(USE_UUIDD) && defined(HAVE_SYS_UN_H)
-static ssize_t read_all(int fd, char *buf, size_t count)
-{
- ssize_t ret;
- ssize_t c = 0;
- int tries = 0;
-
- memset(buf, 0, count);
- while (count > 0) {
- ret = read(fd, buf, count);
- if (ret <= 0) {
- if ((errno == EAGAIN || errno == EINTR || ret == 0) &&
- (tries++ < 5))
- continue;
- return c ? c : -1;
- }
- if (ret > 0)
- tries = 0;
- count -= ret;
- buf += ret;
- c += ret;
- }
- return c;
-}
-#endif
-
-/*
- * Close all file descriptors
- */
-#if defined(USE_UUIDD) && defined(HAVE_SYS_UN_H)
-static void close_all_fds(void)
-{
- int i, max;
-
-#if defined(HAVE_SYSCONF) && defined(_SC_OPEN_MAX)
- max = sysconf(_SC_OPEN_MAX);
-#elif defined(HAVE_GETDTABLESIZE)
- max = getdtablesize();
-#elif defined(HAVE_GETRLIMIT) && defined(RLIMIT_NOFILE)
- struct rlimit rl;
-
- getrlimit(RLIMIT_NOFILE, &rl);
- max = rl.rlim_cur;
-#else
- max = OPEN_MAX;
-#endif
-
- for (i=0; i < max; i++) {
- close(i);
- if (i <= 2)
- open("/dev/null", O_RDWR);
- }
-}
-#endif
-
-
-/*
- * Try using the uuidd daemon to generate the UUID
- *
- * Returns 0 on success, non-zero on failure.
- */
-static int get_uuid_via_daemon(int op, uuid_t out, int *num)
-{
-#if defined(USE_UUIDD) && defined(HAVE_SYS_UN_H)
- char op_buf[64];
- int op_len;
- int s;
- ssize_t ret;
- int32_t reply_len = 0, expected = 16;
- struct sockaddr_un srv_addr;
- struct stat st;
- pid_t pid;
- static const char *uuidd_path = UUIDD_PATH;
- static int access_ret = -2;
- static int start_attempts = 0;
-
- if ((s = socket(AF_UNIX, SOCK_STREAM, 0)) < 0)
- return -1;
-
- srv_addr.sun_family = AF_UNIX;
- strcpy(srv_addr.sun_path, UUIDD_SOCKET_PATH);
-
- if (connect(s, (const struct sockaddr *) &srv_addr,
- sizeof(struct sockaddr_un)) < 0) {
- if (access_ret == -2)
- access_ret = access(uuidd_path, X_OK);
- if (access_ret == 0)
- access_ret = stat(uuidd_path, &st);
- if (access_ret == 0 && (st.st_mode & (S_ISUID | S_ISGID)) == 0)
- access_ret = access(UUIDD_DIR, W_OK);
- if (access_ret == 0 && start_attempts++ < 5) {
- if ((pid = fork()) == 0) {
- close_all_fds();
- execl(uuidd_path, "uuidd", "-qT", "300",
- (char *) NULL);
- exit(1);
- }
- (void) waitpid(pid, 0, 0);
- if (connect(s, (const struct sockaddr *) &srv_addr,
- sizeof(struct sockaddr_un)) < 0)
- goto fail;
- } else
- goto fail;
- }
- op_buf[0] = op;
- op_len = 1;
- if (op == UUIDD_OP_BULK_TIME_UUID) {
- memcpy(op_buf+1, num, sizeof(*num));
- op_len += sizeof(*num);
- expected += sizeof(*num);
- }
-
- ret = write(s, op_buf, op_len);
- if (ret < 1)
- goto fail;
-
- ret = read_all(s, (char *) &reply_len, sizeof(reply_len));
- if (ret < 0)
- goto fail;
-
- if (reply_len != expected)
- goto fail;
-
- ret = read_all(s, op_buf, reply_len);
-
- if (op == UUIDD_OP_BULK_TIME_UUID)
- memcpy(op_buf+16, num, sizeof(int));
-
- memcpy(out, op_buf, 16);
-
- close(s);
- return ((ret == expected) ? 0 : -1);
-
-fail:
- close(s);
-#endif
- return -1;
-}
-
-void uuid__generate_time(uuid_t out, int *num)
-{
- static unsigned char node_id[6];
- static int has_init = 0;
- struct uuid uu;
- uint32_t clock_mid;
-
- if (!has_init) {
- if (get_node_id(node_id) <= 0) {
- get_random_bytes(node_id, 6);
- /*
- * Set multicast bit, to prevent conflicts
- * with IEEE 802 addresses obtained from
- * network cards
- */
- node_id[0] |= 0x01;
- }
- has_init = 1;
- }
- get_clock(&clock_mid, &uu.time_low, &uu.clock_seq, num);
- uu.clock_seq |= 0x8000;
- uu.time_mid = (uint16_t) clock_mid;
- uu.time_hi_and_version = ((clock_mid >> 16) & 0x0FFF) | 0x1000;
- memcpy(uu.node, node_id, 6);
- uuid_pack(&uu, out);
-}
-
-void uuid_generate_time(uuid_t out)
-{
-#ifdef TLS
- THREAD_LOCAL int num = 0;
- THREAD_LOCAL struct uuid uu;
- THREAD_LOCAL time_t last_time = 0;
- time_t now;
-
- if (num > 0) {
- now = time(0);
- if (now > last_time+1)
- num = 0;
- }
- if (num <= 0) {
- num = 1000;
- if (get_uuid_via_daemon(UUIDD_OP_BULK_TIME_UUID,
- out, &num) == 0) {
- last_time = time(0);
- uuid_unpack(out, &uu);
- num--;
- return;
- }
- num = 0;
- }
- if (num > 0) {
- uu.time_low++;
- if (uu.time_low == 0) {
- uu.time_mid++;
- if (uu.time_mid == 0)
- uu.time_hi_and_version++;
- }
- num--;
- uuid_pack(&uu, out);
- return;
- }
-#else
- if (get_uuid_via_daemon(UUIDD_OP_TIME_UUID, out, 0) == 0)
- return;
-#endif
-
- uuid__generate_time(out, 0);
-}
-
-
-void uuid__generate_random(uuid_t out, int *num)
-{
- uuid_t buf;
- struct uuid uu;
- int i, n;
-
- if (!num || !*num)
- n = 1;
- else
- n = *num;
-
- for (i = 0; i < n; i++) {
- get_random_bytes(buf, sizeof(buf));
- uuid_unpack(buf, &uu);
-
- uu.clock_seq = (uu.clock_seq & 0x3FFF) | 0x8000;
- uu.time_hi_and_version = (uu.time_hi_and_version & 0x0FFF)
- | 0x4000;
- uuid_pack(&uu, out);
- out += sizeof(uuid_t);
- }
-}
-
-void uuid_generate_random(uuid_t out)
-{
- int num = 1;
- /* No real reason to use the daemon for random uuid's -- yet */
-
- uuid__generate_random(out, &num);
-}
-
-
-/*
- * This is the generic front-end to uuid_generate_random and
- * uuid_generate_time. It uses uuid_generate_random only if
- * /dev/urandom is available, since otherwise we won't have
- * high-quality randomness.
- */
-void uuid_generate(uuid_t out)
-{
- if (get_random_fd() >= 0)
- uuid_generate_random(out);
- else
- uuid_generate_time(out);
-}
diff --git a/contrib/uuid/gen_uuid_nt.c b/contrib/uuid/gen_uuid_nt.c
deleted file mode 100644
index aa44bfd3d7d..00000000000
--- a/contrib/uuid/gen_uuid_nt.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * gen_uuid_nt.c -- Use NT api to generate uuid
- *
- * Written by Andrey Shedel (andreys@ns.cr.cyco.com)
- */
-
-
-#include "uuidP.h"
-
-#pragma warning(push,4)
-
-#pragma comment(lib, "ntdll.lib")
-
-//
-// Here is a nice example why it's not a good idea
-// to use native API in ordinary applications.
-// Number of parameters in function below was changed from 3 to 4
-// for NT5.
-//
-//
-// NTSYSAPI
-// NTSTATUS
-// NTAPI
-// NtAllocateUuids(
-// OUT PULONG p1,
-// OUT PULONG p2,
-// OUT PULONG p3,
-// OUT PUCHAR Seed // 6 bytes
-// );
-//
-//
-
-unsigned long
-__stdcall
-NtAllocateUuids(
- void* p1, // 8 bytes
- void* p2, // 4 bytes
- void* p3 // 4 bytes
- );
-
-typedef
-unsigned long
-(__stdcall*
-NtAllocateUuids_2000)(
- void* p1, // 8 bytes
- void* p2, // 4 bytes
- void* p3, // 4 bytes
- void* seed // 6 bytes
- );
-
-
-
-//
-// Nice, but instead of including ntddk.h ot winnt.h
-// I should define it here because they MISSED __stdcall in those headers.
-//
-
-__declspec(dllimport)
-struct _TEB*
-__stdcall
-NtCurrentTeb(void);
-
-
-//
-// The only way to get version information from the system is to examine
-// one stored in PEB. But it's pretty dangerouse because this value could
-// be altered in image header.
-//
-
-static
-int
-Nt5(void)
-{
- //return NtCuttentTeb()->Peb->OSMajorVersion >= 5;
- return (int)*(int*)((char*)(int)(*(int*)((char*)NtCurrentTeb() + 0x30)) + 0xA4) >= 5;
-}
-
-
-
-
-void uuid_generate(uuid_t out)
-{
- if(Nt5())
- {
- unsigned char seed[6];
- ((NtAllocateUuids_2000)NtAllocateUuids)(out, ((char*)out)+8, ((char*)out)+12, &seed[0] );
- }
- else
- {
- NtAllocateUuids(out, ((char*)out)+8, ((char*)out)+12);
- }
-}
diff --git a/contrib/uuid/isnull.c b/contrib/uuid/isnull.c
deleted file mode 100644
index 931e7e7dba2..00000000000
--- a/contrib/uuid/isnull.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * isnull.c --- Check whether or not the UUID is null
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include "uuidP.h"
-
-/* Returns 1 if the uuid is the NULL uuid */
-int uuid_is_null(const uuid_t uu)
-{
- const unsigned char *cp;
- int i;
-
- for (i=0, cp = uu; i < 16; i++)
- if (*cp++)
- return 0;
- return 1;
-}
-
diff --git a/contrib/uuid/pack.c b/contrib/uuid/pack.c
deleted file mode 100644
index 097516d2e2f..00000000000
--- a/contrib/uuid/pack.c
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Internal routine for packing UUID's
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include <string.h>
-#include "uuidP.h"
-
-void uuid_pack(const struct uuid *uu, uuid_t ptr)
-{
- uint32_t tmp;
- unsigned char *out = ptr;
-
- tmp = uu->time_low;
- out[3] = (unsigned char) tmp;
- tmp >>= 8;
- out[2] = (unsigned char) tmp;
- tmp >>= 8;
- out[1] = (unsigned char) tmp;
- tmp >>= 8;
- out[0] = (unsigned char) tmp;
-
- tmp = uu->time_mid;
- out[5] = (unsigned char) tmp;
- tmp >>= 8;
- out[4] = (unsigned char) tmp;
-
- tmp = uu->time_hi_and_version;
- out[7] = (unsigned char) tmp;
- tmp >>= 8;
- out[6] = (unsigned char) tmp;
-
- tmp = uu->clock_seq;
- out[9] = (unsigned char) tmp;
- tmp >>= 8;
- out[8] = (unsigned char) tmp;
-
- memcpy(out+10, uu->node, 6);
-}
-
diff --git a/contrib/uuid/parse.c b/contrib/uuid/parse.c
deleted file mode 100644
index 074383efae7..00000000000
--- a/contrib/uuid/parse.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * parse.c --- UUID parsing
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <ctype.h>
-#include <string.h>
-
-#include "uuidP.h"
-
-int uuid_parse(const char *in, uuid_t uu)
-{
- struct uuid uuid;
- int i;
- const char *cp;
- char buf[3];
-
- if (strlen(in) != 36)
- return -1;
- for (i=0, cp = in; i <= 36; i++,cp++) {
- if ((i == 8) || (i == 13) || (i == 18) ||
- (i == 23)) {
- if (*cp == '-')
- continue;
- else
- return -1;
- }
- if (i== 36)
- if (*cp == 0)
- continue;
- if (!isxdigit(*cp))
- return -1;
- }
- uuid.time_low = strtoul(in, NULL, 16);
- uuid.time_mid = strtoul(in+9, NULL, 16);
- uuid.time_hi_and_version = strtoul(in+14, NULL, 16);
- uuid.clock_seq = strtoul(in+19, NULL, 16);
- cp = in+24;
- buf[2] = 0;
- for (i=0; i < 6; i++) {
- buf[0] = *cp++;
- buf[1] = *cp++;
- uuid.node[i] = strtoul(buf, NULL, 16);
- }
-
- uuid_pack(&uuid, uu);
- return 0;
-}
diff --git a/contrib/uuid/tst_uuid.c b/contrib/uuid/tst_uuid.c
deleted file mode 100644
index e03138f7d18..00000000000
--- a/contrib/uuid/tst_uuid.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * tst_uuid.c --- test program from the UUID library
- *
- * Copyright (C) 1996, 1997, 1998 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#ifdef _WIN32
-#define _WIN32_WINNT 0x0500
-#include <windows.h>
-#define UUID MYUUID
-#endif
-
-#include <stdio.h>
-#include <stdlib.h>
-
-#include "uuid.h"
-
-static int test_uuid(const char * uuid, int isValid)
-{
- static const char * validStr[2] = {"invalid", "valid"};
- uuid_t uuidBits;
- int parsedOk;
-
- parsedOk = uuid_parse(uuid, uuidBits) == 0;
-
- printf("%s is %s", uuid, validStr[isValid]);
- if (parsedOk != isValid) {
- printf(" but uuid_parse says %s\n", validStr[parsedOk]);
- return 1;
- }
- printf(", OK\n");
- return 0;
-}
-
-#ifdef __GNUC__
-#define ATTR(x) __attribute__(x)
-#else
-#define ATTR(x)
-#endif
-
-int
-main(int argc ATTR((unused)) , char **argv ATTR((unused)))
-{
- uuid_t buf, tst;
- char str[100];
- struct timeval tv;
- time_t time_reg;
- unsigned char *cp;
- int i;
- int failed = 0;
- int type, variant;
-
- uuid_generate(buf);
- uuid_unparse(buf, str);
- printf("UUID generate = %s\n", str);
- printf("UUID: ");
- for (i=0, cp = (unsigned char *) &buf; i < 16; i++) {
- printf("%02x", *cp++);
- }
- printf("\n");
- type = uuid_type(buf); variant = uuid_variant(buf);
- printf("UUID type = %d, UUID variant = %d\n", type, variant);
- if (variant != UUID_VARIANT_DCE) {
- printf("Incorrect UUID Variant; was expecting DCE!\n");
- failed++;
- }
- printf("\n");
-
- uuid_generate_random(buf);
- uuid_unparse(buf, str);
- printf("UUID random string = %s\n", str);
- printf("UUID: ");
- for (i=0, cp = (unsigned char *) &buf; i < 16; i++) {
- printf("%02x", *cp++);
- }
- printf("\n");
- type = uuid_type(buf); variant = uuid_variant(buf);
- printf("UUID type = %d, UUID variant = %d\n", type, variant);
- if (variant != UUID_VARIANT_DCE) {
- printf("Incorrect UUID Variant; was expecting DCE!\n");
- failed++;
- }
- if (type != 4) {
- printf("Incorrect UUID type; was expecting "
- "4 (random type)!\n");
- failed++;
- }
- printf("\n");
-
- uuid_generate_time(buf);
- uuid_unparse(buf, str);
- printf("UUID string = %s\n", str);
- printf("UUID time: ");
- for (i=0, cp = (unsigned char *) &buf; i < 16; i++) {
- printf("%02x", *cp++);
- }
- printf("\n");
- type = uuid_type(buf); variant = uuid_variant(buf);
- printf("UUID type = %d, UUID variant = %d\n", type, variant);
- if (variant != UUID_VARIANT_DCE) {
- printf("Incorrect UUID Variant; was expecting DCE!\n");
- failed++;
- }
- if (type != 1) {
- printf("Incorrect UUID type; was expecting "
- "1 (time-based type)!\\n");
- failed++;
- }
- tv.tv_sec = 0;
- tv.tv_usec = 0;
- time_reg = uuid_time(buf, &tv);
- printf("UUID time is: (%ld, %ld): %s\n", tv.tv_sec, tv.tv_usec,
- ctime(&time_reg));
- uuid_parse(str, tst);
- if (!uuid_compare(buf, tst))
- printf("UUID parse and compare succeeded.\n");
- else {
- printf("UUID parse and compare failed!\n");
- failed++;
- }
- uuid_clear(tst);
- if (uuid_is_null(tst))
- printf("UUID clear and is null succeeded.\n");
- else {
- printf("UUID clear and is null failed!\n");
- failed++;
- }
- uuid_copy(buf, tst);
- if (!uuid_compare(buf, tst))
- printf("UUID copy and compare succeeded.\n");
- else {
- printf("UUID copy and compare failed!\n");
- failed++;
- }
- failed += test_uuid("84949cc5-4701-4a84-895b-354c584a981b", 1);
- failed += test_uuid("84949CC5-4701-4A84-895B-354C584A981B", 1);
- failed += test_uuid("84949cc5-4701-4a84-895b-354c584a981bc", 0);
- failed += test_uuid("84949cc5-4701-4a84-895b-354c584a981", 0);
- failed += test_uuid("84949cc5x4701-4a84-895b-354c584a981b", 0);
- failed += test_uuid("84949cc504701-4a84-895b-354c584a981b", 0);
- failed += test_uuid("84949cc5-470104a84-895b-354c584a981b", 0);
- failed += test_uuid("84949cc5-4701-4a840895b-354c584a981b", 0);
- failed += test_uuid("84949cc5-4701-4a84-895b0354c584a981b", 0);
- failed += test_uuid("g4949cc5-4701-4a84-895b-354c584a981b", 0);
- failed += test_uuid("84949cc5-4701-4a84-895b-354c584a981g", 0);
-
- if (failed) {
- printf("%d failures.\n", failed);
- exit(1);
- }
- return 0;
-}
diff --git a/contrib/uuid/unpack.c b/contrib/uuid/unpack.c
deleted file mode 100644
index beaaff3ca8a..00000000000
--- a/contrib/uuid/unpack.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Internal routine for unpacking UUID
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include <string.h>
-#include "uuidP.h"
-
-void uuid_unpack(const uuid_t in, struct uuid *uu)
-{
- const uint8_t *ptr = in;
- uint32_t tmp;
-
- tmp = *ptr++;
- tmp = (tmp << 8) | *ptr++;
- tmp = (tmp << 8) | *ptr++;
- tmp = (tmp << 8) | *ptr++;
- uu->time_low = tmp;
-
- tmp = *ptr++;
- tmp = (tmp << 8) | *ptr++;
- uu->time_mid = tmp;
-
- tmp = *ptr++;
- tmp = (tmp << 8) | *ptr++;
- uu->time_hi_and_version = tmp;
-
- tmp = *ptr++;
- tmp = (tmp << 8) | *ptr++;
- uu->clock_seq = tmp;
-
- memcpy(uu->node, ptr, 6);
-}
-
diff --git a/contrib/uuid/unparse.c b/contrib/uuid/unparse.c
deleted file mode 100644
index a95bbb04258..00000000000
--- a/contrib/uuid/unparse.c
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * unparse.c -- convert a UUID to string
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include <stdio.h>
-
-#include "uuidP.h"
-
-static const char *fmt_lower =
- "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x";
-
-static const char *fmt_upper =
- "%08X-%04X-%04X-%02X%02X-%02X%02X%02X%02X%02X%02X";
-
-#ifdef UUID_UNPARSE_DEFAULT_UPPER
-#define FMT_DEFAULT fmt_upper
-#else
-#define FMT_DEFAULT fmt_lower
-#endif
-
-static void uuid_unparse_x(const uuid_t uu, char *out, const char *fmt)
-{
- struct uuid uuid;
-
- uuid_unpack(uu, &uuid);
- sprintf(out, fmt,
- uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
- uuid.clock_seq >> 8, uuid.clock_seq & 0xFF,
- uuid.node[0], uuid.node[1], uuid.node[2],
- uuid.node[3], uuid.node[4], uuid.node[5]);
-}
-
-void uuid_unparse_lower(const uuid_t uu, char *out)
-{
- uuid_unparse_x(uu, out, fmt_lower);
-}
-
-void uuid_unparse_upper(const uuid_t uu, char *out)
-{
- uuid_unparse_x(uu, out, fmt_upper);
-}
-
-void uuid_unparse(const uuid_t uu, char *out)
-{
- uuid_unparse_x(uu, out, FMT_DEFAULT);
-}
diff --git a/contrib/uuid/uuid.h b/contrib/uuid/uuid.h
deleted file mode 100644
index ab006652fc0..00000000000
--- a/contrib/uuid/uuid.h
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Public include file for the UUID library
- *
- * Copyright (C) 1996, 1997, 1998 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#ifndef _UUID_UUID_H
-#define _UUID_UUID_H
-
-#include "config.h"
-#include <sys/types.h>
-#ifndef _WIN32
-#include <sys/time.h>
-#endif
-#include <time.h>
-
-typedef unsigned char uuid_t[16];
-
-/* UUID Variant definitions */
-#define UUID_VARIANT_NCS 0
-#define UUID_VARIANT_DCE 1
-#define UUID_VARIANT_MICROSOFT 2
-#define UUID_VARIANT_OTHER 3
-
-/* UUID Type definitions */
-#define UUID_TYPE_DCE_TIME 1
-#define UUID_TYPE_DCE_RANDOM 4
-
-/* Allow UUID constants to be defined */
-#ifdef __GNUC__
-#define UUID_DEFINE(name,u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12,u13,u14,u15) \
- static const uuid_t name __attribute__ ((unused)) = {u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12,u13,u14,u15}
-#else
-#define UUID_DEFINE(name,u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12,u13,u14,u15) \
- static const uuid_t name = {u0,u1,u2,u3,u4,u5,u6,u7,u8,u9,u10,u11,u12,u13,u14,u15}
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* clear.c */
-void uuid_clear(uuid_t uu);
-
-/* compare.c */
-int uuid_compare(const uuid_t uu1, const uuid_t uu2);
-
-/* copy.c */
-void uuid_copy(uuid_t dst, const uuid_t src);
-
-/* gen_uuid.c */
-void uuid_generate(uuid_t out);
-void uuid_generate_random(uuid_t out);
-void uuid_generate_time(uuid_t out);
-
-/* isnull.c */
-int uuid_is_null(const uuid_t uu);
-
-/* parse.c */
-int uuid_parse(const char *in, uuid_t uu);
-
-/* unparse.c */
-void uuid_unparse(const uuid_t uu, char *out);
-void uuid_unparse_lower(const uuid_t uu, char *out);
-void uuid_unparse_upper(const uuid_t uu, char *out);
-
-/* uuid_time.c */
-time_t uuid_time(const uuid_t uu, struct timeval *ret_tv);
-int uuid_type(const uuid_t uu);
-int uuid_variant(const uuid_t uu);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _UUID_UUID_H */
diff --git a/contrib/uuid/uuidP.h b/contrib/uuid/uuidP.h
deleted file mode 100644
index 9a2de6132fe..00000000000
--- a/contrib/uuid/uuidP.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * uuid.h -- private header file for uuids
- *
- * Copyright (C) 1996, 1997 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#include "uuid.h"
-#ifdef HAVE_INTTYPES_H
-#include <inttypes.h>
-#else
-#include "uuid_types.h"
-#endif
-#include <sys/types.h>
-
-
-/*
- * Offset between 15-Oct-1582 and 1-Jan-70
- */
-#define TIME_OFFSET_HIGH 0x01B21DD2
-#define TIME_OFFSET_LOW 0x13814000
-
-struct uuid {
- uint32_t time_low;
- uint16_t time_mid;
- uint16_t time_hi_and_version;
- uint16_t clock_seq;
- uint8_t node[6];
-};
-
-
-/*
- * prototypes
- */
-void uuid_pack(const struct uuid *uu, uuid_t ptr);
-void uuid_unpack(const uuid_t in, struct uuid *uu);
diff --git a/contrib/uuid/uuid_time.c b/contrib/uuid/uuid_time.c
deleted file mode 100644
index f25f5c90fe5..00000000000
--- a/contrib/uuid/uuid_time.c
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * uuid_time.c --- Interpret the time field from a uuid. This program
- * violates the UUID abstraction barrier by reaching into the guts
- * of a UUID and interpreting it.
- *
- * Copyright (C) 1998, 1999 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#ifdef _WIN32
-#define _WIN32_WINNT 0x0500
-#include <windows.h>
-#define UUID MYUUID
-#endif
-
-#include <stdio.h>
-#ifdef HAVE_UNISTD_H
-#include <unistd.h>
-#endif
-#include <stdlib.h>
-#include <sys/types.h>
-#ifdef HAVE_SYS_TIME_H
-#include <sys/time.h>
-#endif
-#include <time.h>
-
-#include "uuidP.h"
-
-time_t uuid_time(const uuid_t uu, struct timeval *ret_tv)
-{
- struct timeval tv;
- struct uuid uuid;
- uint32_t high;
- uint64_t clock_reg;
-
- uuid_unpack(uu, &uuid);
-
- high = uuid.time_mid | ((uuid.time_hi_and_version & 0xFFF) << 16);
- clock_reg = uuid.time_low | ((uint64_t) high << 32);
-
- clock_reg -= (((uint64_t) 0x01B21DD2) << 32) + 0x13814000;
- tv.tv_sec = clock_reg / 10000000;
- tv.tv_usec = (clock_reg % 10000000) / 10;
-
- if (ret_tv)
- *ret_tv = tv;
-
- return tv.tv_sec;
-}
-
-int uuid_type(const uuid_t uu)
-{
- struct uuid uuid;
-
- uuid_unpack(uu, &uuid);
- return ((uuid.time_hi_and_version >> 12) & 0xF);
-}
-
-int uuid_variant(const uuid_t uu)
-{
- struct uuid uuid;
- int var;
-
- uuid_unpack(uu, &uuid);
- var = uuid.clock_seq;
-
- if ((var & 0x8000) == 0)
- return UUID_VARIANT_NCS;
- if ((var & 0x4000) == 0)
- return UUID_VARIANT_DCE;
- if ((var & 0x2000) == 0)
- return UUID_VARIANT_MICROSOFT;
- return UUID_VARIANT_OTHER;
-}
-
-#ifdef DEBUG
-static const char *variant_string(int variant)
-{
- switch (variant) {
- case UUID_VARIANT_NCS:
- return "NCS";
- case UUID_VARIANT_DCE:
- return "DCE";
- case UUID_VARIANT_MICROSOFT:
- return "Microsoft";
- default:
- return "Other";
- }
-}
-
-
-int
-main(int argc, char **argv)
-{
- uuid_t buf;
- time_t time_reg;
- struct timeval tv;
- int type, variant;
-
- if (argc != 2) {
- fprintf(stderr, "Usage: %s uuid\n", argv[0]);
- exit(1);
- }
- if (uuid_parse(argv[1], buf)) {
- fprintf(stderr, "Invalid UUID: %s\n", argv[1]);
- exit(1);
- }
- variant = uuid_variant(buf);
- type = uuid_type(buf);
- time_reg = uuid_time(buf, &tv);
-
- printf("UUID variant is %d (%s)\n", variant, variant_string(variant));
- if (variant != UUID_VARIANT_DCE) {
- printf("Warning: This program only knows how to interpret "
- "DCE UUIDs.\n\tThe rest of the output is likely "
- "to be incorrect!!\n");
- }
- printf("UUID type is %d", type);
- switch (type) {
- case 1:
- printf(" (time based)\n");
- break;
- case 2:
- printf(" (DCE)\n");
- break;
- case 3:
- printf(" (name-based)\n");
- break;
- case 4:
- printf(" (random)\n");
- break;
- default:
- printf("\n");
- }
- if (type != 1) {
- printf("Warning: not a time-based UUID, so UUID time "
- "decoding will likely not work!\n");
- }
- printf("UUID time is: (%ld, %ld): %s\n", tv.tv_sec, tv.tv_usec,
- ctime(&time_reg));
-
- return 0;
-}
-#endif
diff --git a/contrib/uuid/uuid_types.h.in b/contrib/uuid/uuid_types.h.in
deleted file mode 100644
index f21ff4ee183..00000000000
--- a/contrib/uuid/uuid_types.h.in
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * If linux/types.h is already been included, assume it has defined
- * everything we need. (cross fingers) Other header files may have
- * also defined the types that we need.
- */
-#if (!defined(_STDINT_H) && !defined(_UUID_STDINT_H))
-#define _UUID_STDINT_H
-
-typedef unsigned char uint8_t;
-typedef signed char int8_t;
-
-#if (@SIZEOF_INT@ == 8)
-typedef int int64_t;
-typedef unsigned int uint64_t;
-#elif (@SIZEOF_LONG@ == 8)
-typedef long int64_t;
-typedef unsigned long uint64_t;
-#elif (@SIZEOF_LONG_LONG@ == 8)
-#if defined(__GNUC__)
-typedef __signed__ long long int64_t;
-#else
-typedef signed long long int64_t;
-#endif
-typedef unsigned long long uint64_t;
-#endif
-
-#if (@SIZEOF_INT@ == 2)
-typedef int int16_t;
-typedef unsigned int uint16_t;
-#elif (@SIZEOF_SHORT@ == 2)
-typedef short int16_t;
-typedef unsigned short uint16_t;
-#else
- ?==error: undefined 16 bit type
-#endif
-
-#if (@SIZEOF_INT@ == 4)
-typedef int int32_t;
-typedef unsigned int uint32_t;
-#elif (@SIZEOF_LONG@ == 4)
-typedef long int32_t;
-typedef unsigned long uint32_t;
-#elif (@SIZEOF_SHORT@ == 4)
-typedef short int32_t;
-typedef unsigned short uint32_t;
-#else
- ?== error: undefined 32 bit type
-#endif
-
-#endif
diff --git a/contrib/uuid/uuidd.h b/contrib/uuid/uuidd.h
deleted file mode 100644
index c71f4b78835..00000000000
--- a/contrib/uuid/uuidd.h
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Definitions used by the uuidd daemon
- *
- * Copyright (C) 2007 Theodore Ts'o.
- *
- * %Begin-Header%
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, and the entire permission notice in its entirety,
- * including the disclaimer of warranties.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. The name of the author may not be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ALL OF
- * WHICH ARE HEREBY DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
- * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF NOT ADVISED OF THE POSSIBILITY OF SUCH
- * DAMAGE.
- * %End-Header%
- */
-
-#ifndef _UUID_UUIDD_H
-#define _UUID_UUIDD_H
-
-#define UUIDD_DIR "/var/lib/libuuid"
-#define UUIDD_SOCKET_PATH UUIDD_DIR "/request"
-#define UUIDD_PIDFILE_PATH UUIDD_DIR "/uuidd.pid"
-#define UUIDD_PATH "/usr/sbin/uuidd"
-
-#define UUIDD_OP_GETPID 0
-#define UUIDD_OP_GET_MAXOP 1
-#define UUIDD_OP_TIME_UUID 2
-#define UUIDD_OP_RANDOM_UUID 3
-#define UUIDD_OP_BULK_TIME_UUID 4
-#define UUIDD_OP_BULK_RANDOM_UUID 5
-#define UUIDD_MAX_OP UUIDD_OP_BULK_RANDOM_UUID
-
-extern void uuid__generate_time(uuid_t out, int *num);
-extern void uuid__generate_random(uuid_t out, int *num);
-
-#endif /* _UUID_UUID_H */
diff --git a/contrib/xxhash/xxhash.c b/contrib/xxhash/xxhash.c
new file mode 100644
index 00000000000..56f80f8811d
--- /dev/null
+++ b/contrib/xxhash/xxhash.c
@@ -0,0 +1,1029 @@
+/*
+* xxHash - Fast Hash algorithm
+* Copyright (C) 2012-2016, Yann Collet
+*
+* BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+*
+* Redistribution and use in source and binary forms, with or without
+* modification, are permitted provided that the following conditions are
+* met:
+*
+* * Redistributions of source code must retain the above copyright
+* notice, this list of conditions and the following disclaimer.
+* * Redistributions in binary form must reproduce the above
+* copyright notice, this list of conditions and the following disclaimer
+* in the documentation and/or other materials provided with the
+* distribution.
+*
+* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*
+* You can contact the author at :
+* - xxHash homepage: http://www.xxhash.com
+* - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+
+/* *************************************
+* Tuning parameters
+***************************************/
+/*!XXH_FORCE_MEMORY_ACCESS :
+ * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable.
+ * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal.
+ * The below switch allow to select different access method for improved performance.
+ * Method 0 (default) : use `memcpy()`. Safe and portable.
+ * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable).
+ * This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`.
+ * Method 2 : direct access. This method doesn't depend on compiler but violate C standard.
+ * It can generate buggy code on targets which do not support unaligned memory accesses.
+ * But in some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6)
+ * See http://stackoverflow.com/a/32095106/646947 for details.
+ * Prefer these methods in priority order (0 > 1 > 2)
+ */
+#ifndef XXH_FORCE_MEMORY_ACCESS /* can be defined externally, on command line for example */
+# if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+ || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
+ || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) )
+# define XXH_FORCE_MEMORY_ACCESS 2
+# elif (defined(__INTEL_COMPILER) && !defined(_WIN32)) || \
+ (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+ || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+ || defined(__ARM_ARCH_7S__) ))
+# define XXH_FORCE_MEMORY_ACCESS 1
+# endif
+#endif
+
+/*!XXH_ACCEPT_NULL_INPUT_POINTER :
+ * If input pointer is NULL, xxHash default behavior is to dereference it, triggering a segfault.
+ * When this macro is enabled, xxHash actively checks input for null pointer.
+ * It it is, result for null input pointers is the same as a null-length input.
+ */
+#ifndef XXH_ACCEPT_NULL_INPUT_POINTER /* can be defined externally */
+# define XXH_ACCEPT_NULL_INPUT_POINTER 0
+#endif
+
+/*!XXH_FORCE_NATIVE_FORMAT :
+ * By default, xxHash library provides endian-independent Hash values, based on little-endian convention.
+ * Results are therefore identical for little-endian and big-endian CPU.
+ * This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
+ * Should endian-independence be of no importance for your application, you may set the #define below to 1,
+ * to improve speed for Big-endian CPU.
+ * This option has no impact on Little_Endian CPU.
+ */
+#ifndef XXH_FORCE_NATIVE_FORMAT /* can be defined externally */
+# define XXH_FORCE_NATIVE_FORMAT 0
+#endif
+
+/*!XXH_FORCE_ALIGN_CHECK :
+ * This is a minor performance trick, only useful with lots of very small keys.
+ * It means : check for aligned/unaligned input.
+ * The check costs one initial branch per hash;
+ * set it to 0 when the input is guaranteed to be aligned,
+ * or when alignment doesn't matter for performance.
+ */
+#ifndef XXH_FORCE_ALIGN_CHECK /* can be defined externally */
+# if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) || defined(_M_X64)
+# define XXH_FORCE_ALIGN_CHECK 0
+# else
+# define XXH_FORCE_ALIGN_CHECK 1
+# endif
+#endif
+
+
+/* *************************************
+* Includes & Memory related functions
+***************************************/
+/*! Modify the local functions below should you wish to use some other memory routines
+* for malloc(), free() */
+#include <stdlib.h>
+static void* XXH_malloc(size_t s) { return malloc(s); }
+static void XXH_free (void* p) { free(p); }
+/*! and for memcpy() */
+#include <string.h>
+static void* XXH_memcpy(void* dest, const void* src, size_t size) { return memcpy(dest,src,size); }
+
+#include <assert.h> /* assert */
+
+#define XXH_STATIC_LINKING_ONLY
+#include "xxhash.h"
+
+
+/* *************************************
+* Compiler Specific Options
+***************************************/
+#ifdef _MSC_VER /* Visual Studio */
+# pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
+# define FORCE_INLINE static __forceinline
+#else
+# if defined (__cplusplus) || defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+# ifdef __GNUC__
+# define FORCE_INLINE static inline __attribute__((always_inline))
+# else
+# define FORCE_INLINE static inline
+# endif
+# else
+# define FORCE_INLINE static
+# endif /* __STDC_VERSION__ */
+#endif
+
+
+/* *************************************
+* Basic Types
+***************************************/
+#ifndef MEM_MODULE
+# if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+ typedef uint8_t BYTE;
+ typedef uint16_t U16;
+ typedef uint32_t U32;
+# else
+ typedef unsigned char BYTE;
+ typedef unsigned short U16;
+ typedef unsigned int U32;
+# endif
+#endif
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U32 XXH_read32(const void* memPtr) { return *(const U32*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; } __attribute__((packed)) unalign;
+static U32 XXH_read32(const void* ptr) { return ((const unalign*)ptr)->u32; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+static U32 XXH_read32(const void* memPtr)
+{
+ U32 val;
+ memcpy(&val, memPtr, sizeof(val));
+ return val;
+}
+
+#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+
+/* ****************************************
+* Compiler-specific Functions and Macros
+******************************************/
+#define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
+
+/* Note : although _rotl exists for minGW (GCC under windows), performance seems poor */
+#if defined(_MSC_VER)
+# define XXH_rotl32(x,r) _rotl(x,r)
+# define XXH_rotl64(x,r) _rotl64(x,r)
+#else
+# define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
+# define XXH_rotl64(x,r) ((x << r) | (x >> (64 - r)))
+#endif
+
+#if defined(_MSC_VER) /* Visual Studio */
+# define XXH_swap32 _byteswap_ulong
+#elif GCC_VERSION >= 403
+# define XXH_swap32 __builtin_bswap32
+#else
+static U32 XXH_swap32 (U32 x)
+{
+ return ((x << 24) & 0xff000000 ) |
+ ((x << 8) & 0x00ff0000 ) |
+ ((x >> 8) & 0x0000ff00 ) |
+ ((x >> 24) & 0x000000ff );
+}
+#endif
+
+
+/* *************************************
+* Architecture Macros
+***************************************/
+typedef enum { XXH_bigEndian=0, XXH_littleEndian=1 } XXH_endianess;
+
+/* XXH_CPU_LITTLE_ENDIAN can be defined externally, for example on the compiler command line */
+#ifndef XXH_CPU_LITTLE_ENDIAN
+static int XXH_isLittleEndian(void)
+{
+ const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
+ return one.c[0];
+}
+# define XXH_CPU_LITTLE_ENDIAN XXH_isLittleEndian()
+#endif
+
+
+/* ***************************
+* Memory reads
+*****************************/
+typedef enum { XXH_aligned, XXH_unaligned } XXH_alignment;
+
+FORCE_INLINE U32 XXH_readLE32_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+ if (align==XXH_unaligned)
+ return endian==XXH_littleEndian ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
+ else
+ return endian==XXH_littleEndian ? *(const U32*)ptr : XXH_swap32(*(const U32*)ptr);
+}
+
+FORCE_INLINE U32 XXH_readLE32(const void* ptr, XXH_endianess endian)
+{
+ return XXH_readLE32_align(ptr, endian, XXH_unaligned);
+}
+
+static U32 XXH_readBE32(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
+}
+
+
+/* *************************************
+* Macros
+***************************************/
+#define XXH_STATIC_ASSERT(c) { enum { XXH_sa = 1/(int)(!!(c)) }; } /* use after variable declarations */
+XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
+
+
+/* *******************************************************************
+* 32-bit hash functions
+*********************************************************************/
+static const U32 PRIME32_1 = 2654435761U;
+static const U32 PRIME32_2 = 2246822519U;
+static const U32 PRIME32_3 = 3266489917U;
+static const U32 PRIME32_4 = 668265263U;
+static const U32 PRIME32_5 = 374761393U;
+
+static U32 XXH32_round(U32 seed, U32 input)
+{
+ seed += input * PRIME32_2;
+ seed = XXH_rotl32(seed, 13);
+ seed *= PRIME32_1;
+ return seed;
+}
+
+/* mix all bits */
+static U32 XXH32_avalanche(U32 h32)
+{
+ h32 ^= h32 >> 15;
+ h32 *= PRIME32_2;
+ h32 ^= h32 >> 13;
+ h32 *= PRIME32_3;
+ h32 ^= h32 >> 16;
+ return(h32);
+}
+
+#define XXH_get32bits(p) XXH_readLE32_align(p, endian, align)
+
+static U32
+XXH32_finalize(U32 h32, const void* ptr, size_t len,
+ XXH_endianess endian, XXH_alignment align)
+
+{
+ const BYTE* p = (const BYTE*)ptr;
+#define PROCESS1 \
+ h32 += (*p) * PRIME32_5; \
+ p++; \
+ h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
+
+#define PROCESS4 \
+ h32 += XXH_get32bits(p) * PRIME32_3; \
+ p+=4; \
+ h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
+
+ switch(len&15) /* or switch(bEnd - p) */
+ {
+ case 12: PROCESS4;
+ /* fallthrough */
+ case 8: PROCESS4;
+ /* fallthrough */
+ case 4: PROCESS4;
+ return XXH32_avalanche(h32);
+
+ case 13: PROCESS4;
+ /* fallthrough */
+ case 9: PROCESS4;
+ /* fallthrough */
+ case 5: PROCESS4;
+ PROCESS1;
+ return XXH32_avalanche(h32);
+
+ case 14: PROCESS4;
+ /* fallthrough */
+ case 10: PROCESS4;
+ /* fallthrough */
+ case 6: PROCESS4;
+ PROCESS1;
+ PROCESS1;
+ return XXH32_avalanche(h32);
+
+ case 15: PROCESS4;
+ /* fallthrough */
+ case 11: PROCESS4;
+ /* fallthrough */
+ case 7: PROCESS4;
+ /* fallthrough */
+ case 3: PROCESS1;
+ /* fallthrough */
+ case 2: PROCESS1;
+ /* fallthrough */
+ case 1: PROCESS1;
+ /* fallthrough */
+ case 0: return XXH32_avalanche(h32);
+ }
+ assert(0);
+ return h32; /* reaching this point is deemed impossible */
+}
+
+
+FORCE_INLINE U32
+XXH32_endian_align(const void* input, size_t len, U32 seed,
+ XXH_endianess endian, XXH_alignment align)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* bEnd = p + len;
+ U32 h32;
+
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ if (p==NULL) {
+ len=0;
+ bEnd=p=(const BYTE*)(size_t)16;
+ }
+#endif
+
+ if (len>=16) {
+ const BYTE* const limit = bEnd - 15;
+ U32 v1 = seed + PRIME32_1 + PRIME32_2;
+ U32 v2 = seed + PRIME32_2;
+ U32 v3 = seed + 0;
+ U32 v4 = seed - PRIME32_1;
+
+ do {
+ v1 = XXH32_round(v1, XXH_get32bits(p)); p+=4;
+ v2 = XXH32_round(v2, XXH_get32bits(p)); p+=4;
+ v3 = XXH32_round(v3, XXH_get32bits(p)); p+=4;
+ v4 = XXH32_round(v4, XXH_get32bits(p)); p+=4;
+ } while (p < limit);
+
+ h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7)
+ + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
+ } else {
+ h32 = seed + PRIME32_5;
+ }
+
+ h32 += (U32)len;
+
+ return XXH32_finalize(h32, p, len&15, endian, align);
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32 (const void* input, size_t len, unsigned int seed)
+{
+#if 0
+ /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+ XXH32_state_t state;
+ XXH32_reset(&state, seed);
+ XXH32_update(&state, input, len);
+ return XXH32_digest(&state);
+#else
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if (XXH_FORCE_ALIGN_CHECK) {
+ if ((((size_t)input) & 3) == 0) { /* Input is 4-bytes aligned, leverage the speed benefit */
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+ else
+ return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+ } }
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+ else
+ return XXH32_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+
+
+/*====== Hash streaming ======*/
+
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
+{
+ return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
+{
+ XXH_free(statePtr);
+ return XXH_OK;
+}
+
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
+{
+ memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, unsigned int seed)
+{
+ XXH32_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+ memset(&state, 0, sizeof(state));
+ state.v1 = seed + PRIME32_1 + PRIME32_2;
+ state.v2 = seed + PRIME32_2;
+ state.v3 = seed + 0;
+ state.v4 = seed - PRIME32_1;
+ /* do not write into reserved, planned to be removed in a future version */
+ memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
+ return XXH_OK;
+}
+
+
+FORCE_INLINE
+XXH_errorcode XXH32_update_endian (XXH32_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* const bEnd = p + len;
+
+ if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ return XXH_OK;
+#else
+ return XXH_ERROR;
+#endif
+
+ state->total_len_32 += (unsigned)len;
+ state->large_len |= (len>=16) | (state->total_len_32>=16);
+
+ if (state->memsize + len < 16) { /* fill in tmp buffer */
+ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, len);
+ state->memsize += (unsigned)len;
+ return XXH_OK;
+ }
+
+ if (state->memsize) { /* some data left from previous update */
+ XXH_memcpy((BYTE*)(state->mem32) + state->memsize, input, 16-state->memsize);
+ { const U32* p32 = state->mem32;
+ state->v1 = XXH32_round(state->v1, XXH_readLE32(p32, endian)); p32++;
+ state->v2 = XXH32_round(state->v2, XXH_readLE32(p32, endian)); p32++;
+ state->v3 = XXH32_round(state->v3, XXH_readLE32(p32, endian)); p32++;
+ state->v4 = XXH32_round(state->v4, XXH_readLE32(p32, endian));
+ }
+ p += 16-state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p <= bEnd-16) {
+ const BYTE* const limit = bEnd - 16;
+ U32 v1 = state->v1;
+ U32 v2 = state->v2;
+ U32 v3 = state->v3;
+ U32 v4 = state->v4;
+
+ do {
+ v1 = XXH32_round(v1, XXH_readLE32(p, endian)); p+=4;
+ v2 = XXH32_round(v2, XXH_readLE32(p, endian)); p+=4;
+ v3 = XXH32_round(v3, XXH_readLE32(p, endian)); p+=4;
+ v4 = XXH32_round(v4, XXH_readLE32(p, endian)); p+=4;
+ } while (p<=limit);
+
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < bEnd) {
+ XXH_memcpy(state->mem32, p, (size_t)(bEnd-p));
+ state->memsize = (unsigned)(bEnd-p);
+ }
+
+ return XXH_OK;
+}
+
+
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* state_in, const void* input, size_t len)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_update_endian(state_in, input, len, XXH_littleEndian);
+ else
+ return XXH32_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+
+FORCE_INLINE U32
+XXH32_digest_endian (const XXH32_state_t* state, XXH_endianess endian)
+{
+ U32 h32;
+
+ if (state->large_len) {
+ h32 = XXH_rotl32(state->v1, 1)
+ + XXH_rotl32(state->v2, 7)
+ + XXH_rotl32(state->v3, 12)
+ + XXH_rotl32(state->v4, 18);
+ } else {
+ h32 = state->v3 /* == seed */ + PRIME32_5;
+ }
+
+ h32 += state->total_len_32;
+
+ return XXH32_finalize(h32, state->mem32, state->memsize, endian, XXH_aligned);
+}
+
+
+XXH_PUBLIC_API unsigned int XXH32_digest (const XXH32_state_t* state_in)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH32_digest_endian(state_in, XXH_littleEndian);
+ else
+ return XXH32_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+/*====== Canonical representation ======*/
+
+/*! Default XXH result types are basic unsigned 32 and 64 bits.
+* The canonical representation follows human-readable write convention, aka big-endian (large digits first).
+* These functions allow transformation of hash result into and from its canonical format.
+* This way, hash values can be written into a file or buffer, remaining comparable across different systems.
+*/
+
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
+{
+ XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
+ if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
+ memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
+{
+ return XXH_readBE32(src);
+}
+
+
+#ifndef XXH_NO_LONG_LONG
+
+/* *******************************************************************
+* 64-bit hash functions
+*********************************************************************/
+
+/*====== Memory access ======*/
+
+#ifndef MEM_MODULE
+# define MEM_MODULE
+# if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+ typedef uint64_t U64;
+# else
+ /* if compiler doesn't support unsigned long long, replace by another 64-bit type */
+ typedef unsigned long long U64;
+# endif
+#endif
+
+
+#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
+
+/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
+static U64 XXH_read64(const void* memPtr) { return *(const U64*) memPtr; }
+
+#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
+
+/* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */
+/* currently only defined for gcc and icc */
+typedef union { U32 u32; U64 u64; } __attribute__((packed)) unalign64;
+static U64 XXH_read64(const void* ptr) { return ((const unalign64*)ptr)->u64; }
+
+#else
+
+/* portable and safe solution. Generally efficient.
+ * see : http://stackoverflow.com/a/32095106/646947
+ */
+
+static U64 XXH_read64(const void* memPtr)
+{
+ U64 val;
+ memcpy(&val, memPtr, sizeof(val));
+ return val;
+}
+
+#endif /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
+
+#if defined(_MSC_VER) /* Visual Studio */
+# define XXH_swap64 _byteswap_uint64
+#elif GCC_VERSION >= 403
+# define XXH_swap64 __builtin_bswap64
+#else
+static U64 XXH_swap64 (U64 x)
+{
+ return ((x << 56) & 0xff00000000000000ULL) |
+ ((x << 40) & 0x00ff000000000000ULL) |
+ ((x << 24) & 0x0000ff0000000000ULL) |
+ ((x << 8) & 0x000000ff00000000ULL) |
+ ((x >> 8) & 0x00000000ff000000ULL) |
+ ((x >> 24) & 0x0000000000ff0000ULL) |
+ ((x >> 40) & 0x000000000000ff00ULL) |
+ ((x >> 56) & 0x00000000000000ffULL);
+}
+#endif
+
+FORCE_INLINE U64 XXH_readLE64_align(const void* ptr, XXH_endianess endian, XXH_alignment align)
+{
+ if (align==XXH_unaligned)
+ return endian==XXH_littleEndian ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
+ else
+ return endian==XXH_littleEndian ? *(const U64*)ptr : XXH_swap64(*(const U64*)ptr);
+}
+
+FORCE_INLINE U64 XXH_readLE64(const void* ptr, XXH_endianess endian)
+{
+ return XXH_readLE64_align(ptr, endian, XXH_unaligned);
+}
+
+static U64 XXH_readBE64(const void* ptr)
+{
+ return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
+}
+
+
+/*====== xxh64 ======*/
+
+static const U64 PRIME64_1 = 11400714785074694791ULL;
+static const U64 PRIME64_2 = 14029467366897019727ULL;
+static const U64 PRIME64_3 = 1609587929392839161ULL;
+static const U64 PRIME64_4 = 9650029242287828579ULL;
+static const U64 PRIME64_5 = 2870177450012600261ULL;
+
+static U64 XXH64_round(U64 acc, U64 input)
+{
+ acc += input * PRIME64_2;
+ acc = XXH_rotl64(acc, 31);
+ acc *= PRIME64_1;
+ return acc;
+}
+
+static U64 XXH64_mergeRound(U64 acc, U64 val)
+{
+ val = XXH64_round(0, val);
+ acc ^= val;
+ acc = acc * PRIME64_1 + PRIME64_4;
+ return acc;
+}
+
+static U64 XXH64_avalanche(U64 h64)
+{
+ h64 ^= h64 >> 33;
+ h64 *= PRIME64_2;
+ h64 ^= h64 >> 29;
+ h64 *= PRIME64_3;
+ h64 ^= h64 >> 32;
+ return h64;
+}
+
+
+#define XXH_get64bits(p) XXH_readLE64_align(p, endian, align)
+
+static U64
+XXH64_finalize(U64 h64, const void* ptr, size_t len,
+ XXH_endianess endian, XXH_alignment align)
+{
+ const BYTE* p = (const BYTE*)ptr;
+
+#define PROCESS1_64 \
+ h64 ^= (*p) * PRIME64_5; \
+ p++; \
+ h64 = XXH_rotl64(h64, 11) * PRIME64_1;
+
+#define PROCESS4_64 \
+ h64 ^= (U64)(XXH_get32bits(p)) * PRIME64_1; \
+ p+=4; \
+ h64 = XXH_rotl64(h64, 23) * PRIME64_2 + PRIME64_3;
+
+#define PROCESS8_64 { \
+ U64 const k1 = XXH64_round(0, XXH_get64bits(p)); \
+ p+=8; \
+ h64 ^= k1; \
+ h64 = XXH_rotl64(h64,27) * PRIME64_1 + PRIME64_4; \
+}
+
+ switch(len&31) {
+ case 24: PROCESS8_64;
+ /* fallthrough */
+ case 16: PROCESS8_64;
+ /* fallthrough */
+ case 8: PROCESS8_64;
+ return XXH64_avalanche(h64);
+
+ case 28: PROCESS8_64;
+ /* fallthrough */
+ case 20: PROCESS8_64;
+ /* fallthrough */
+ case 12: PROCESS8_64;
+ /* fallthrough */
+ case 4: PROCESS4_64;
+ return XXH64_avalanche(h64);
+
+ case 25: PROCESS8_64;
+ /* fallthrough */
+ case 17: PROCESS8_64;
+ /* fallthrough */
+ case 9: PROCESS8_64;
+ PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 29: PROCESS8_64;
+ /* fallthrough */
+ case 21: PROCESS8_64;
+ /* fallthrough */
+ case 13: PROCESS8_64;
+ /* fallthrough */
+ case 5: PROCESS4_64;
+ PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 26: PROCESS8_64;
+ /* fallthrough */
+ case 18: PROCESS8_64;
+ /* fallthrough */
+ case 10: PROCESS8_64;
+ PROCESS1_64;
+ PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 30: PROCESS8_64;
+ /* fallthrough */
+ case 22: PROCESS8_64;
+ /* fallthrough */
+ case 14: PROCESS8_64;
+ /* fallthrough */
+ case 6: PROCESS4_64;
+ PROCESS1_64;
+ PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 27: PROCESS8_64;
+ /* fallthrough */
+ case 19: PROCESS8_64;
+ /* fallthrough */
+ case 11: PROCESS8_64;
+ PROCESS1_64;
+ PROCESS1_64;
+ PROCESS1_64;
+ return XXH64_avalanche(h64);
+
+ case 31: PROCESS8_64;
+ /* fallthrough */
+ case 23: PROCESS8_64;
+ /* fallthrough */
+ case 15: PROCESS8_64;
+ /* fallthrough */
+ case 7: PROCESS4_64;
+ /* fallthrough */
+ case 3: PROCESS1_64;
+ /* fallthrough */
+ case 2: PROCESS1_64;
+ /* fallthrough */
+ case 1: PROCESS1_64;
+ /* fallthrough */
+ case 0: return XXH64_avalanche(h64);
+ }
+
+ /* impossible to reach */
+ assert(0);
+ return 0; /* unreachable, but some compilers complain without it */
+}
+
+FORCE_INLINE U64
+XXH64_endian_align(const void* input, size_t len, U64 seed,
+ XXH_endianess endian, XXH_alignment align)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* bEnd = p + len;
+ U64 h64;
+
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ if (p==NULL) {
+ len=0;
+ bEnd=p=(const BYTE*)(size_t)32;
+ }
+#endif
+
+ if (len>=32) {
+ const BYTE* const limit = bEnd - 32;
+ U64 v1 = seed + PRIME64_1 + PRIME64_2;
+ U64 v2 = seed + PRIME64_2;
+ U64 v3 = seed + 0;
+ U64 v4 = seed - PRIME64_1;
+
+ do {
+ v1 = XXH64_round(v1, XXH_get64bits(p)); p+=8;
+ v2 = XXH64_round(v2, XXH_get64bits(p)); p+=8;
+ v3 = XXH64_round(v3, XXH_get64bits(p)); p+=8;
+ v4 = XXH64_round(v4, XXH_get64bits(p)); p+=8;
+ } while (p<=limit);
+
+ h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+ h64 = XXH64_mergeRound(h64, v1);
+ h64 = XXH64_mergeRound(h64, v2);
+ h64 = XXH64_mergeRound(h64, v3);
+ h64 = XXH64_mergeRound(h64, v4);
+
+ } else {
+ h64 = seed + PRIME64_5;
+ }
+
+ h64 += (U64) len;
+
+ return XXH64_finalize(h64, p, len, endian, align);
+}
+
+
+XXH_PUBLIC_API unsigned long long XXH64 (const void* input, size_t len, unsigned long long seed)
+{
+#if 0
+ /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
+ XXH64_state_t state;
+ XXH64_reset(&state, seed);
+ XXH64_update(&state, input, len);
+ return XXH64_digest(&state);
+#else
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if (XXH_FORCE_ALIGN_CHECK) {
+ if ((((size_t)input) & 7)==0) { /* Input is aligned, let's leverage the speed advantage */
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_aligned);
+ else
+ return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_aligned);
+ } }
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_endian_align(input, len, seed, XXH_littleEndian, XXH_unaligned);
+ else
+ return XXH64_endian_align(input, len, seed, XXH_bigEndian, XXH_unaligned);
+#endif
+}
+
+/*====== Hash Streaming ======*/
+
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
+{
+ return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
+}
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
+{
+ XXH_free(statePtr);
+ return XXH_OK;
+}
+
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dstState, const XXH64_state_t* srcState)
+{
+ memcpy(dstState, srcState, sizeof(*dstState));
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH64_state_t* statePtr, unsigned long long seed)
+{
+ XXH64_state_t state; /* using a local state to memcpy() in order to avoid strict-aliasing warnings */
+ memset(&state, 0, sizeof(state));
+ state.v1 = seed + PRIME64_1 + PRIME64_2;
+ state.v2 = seed + PRIME64_2;
+ state.v3 = seed + 0;
+ state.v4 = seed - PRIME64_1;
+ /* do not write into reserved, planned to be removed in a future version */
+ memcpy(statePtr, &state, sizeof(state) - sizeof(state.reserved));
+ return XXH_OK;
+}
+
+FORCE_INLINE
+XXH_errorcode XXH64_update_endian (XXH64_state_t* state, const void* input, size_t len, XXH_endianess endian)
+{
+ const BYTE* p = (const BYTE*)input;
+ const BYTE* const bEnd = p + len;
+
+ if (input==NULL)
+#if defined(XXH_ACCEPT_NULL_INPUT_POINTER) && (XXH_ACCEPT_NULL_INPUT_POINTER>=1)
+ return XXH_OK;
+#else
+ return XXH_ERROR;
+#endif
+
+ state->total_len += len;
+
+ if (state->memsize + len < 32) { /* fill in tmp buffer */
+ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, len);
+ state->memsize += (U32)len;
+ return XXH_OK;
+ }
+
+ if (state->memsize) { /* tmp buffer is full */
+ XXH_memcpy(((BYTE*)state->mem64) + state->memsize, input, 32-state->memsize);
+ state->v1 = XXH64_round(state->v1, XXH_readLE64(state->mem64+0, endian));
+ state->v2 = XXH64_round(state->v2, XXH_readLE64(state->mem64+1, endian));
+ state->v3 = XXH64_round(state->v3, XXH_readLE64(state->mem64+2, endian));
+ state->v4 = XXH64_round(state->v4, XXH_readLE64(state->mem64+3, endian));
+ p += 32-state->memsize;
+ state->memsize = 0;
+ }
+
+ if (p+32 <= bEnd) {
+ const BYTE* const limit = bEnd - 32;
+ U64 v1 = state->v1;
+ U64 v2 = state->v2;
+ U64 v3 = state->v3;
+ U64 v4 = state->v4;
+
+ do {
+ v1 = XXH64_round(v1, XXH_readLE64(p, endian)); p+=8;
+ v2 = XXH64_round(v2, XXH_readLE64(p, endian)); p+=8;
+ v3 = XXH64_round(v3, XXH_readLE64(p, endian)); p+=8;
+ v4 = XXH64_round(v4, XXH_readLE64(p, endian)); p+=8;
+ } while (p<=limit);
+
+ state->v1 = v1;
+ state->v2 = v2;
+ state->v3 = v3;
+ state->v4 = v4;
+ }
+
+ if (p < bEnd) {
+ XXH_memcpy(state->mem64, p, (size_t)(bEnd-p));
+ state->memsize = (unsigned)(bEnd-p);
+ }
+
+ return XXH_OK;
+}
+
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* state_in, const void* input, size_t len)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_update_endian(state_in, input, len, XXH_littleEndian);
+ else
+ return XXH64_update_endian(state_in, input, len, XXH_bigEndian);
+}
+
+FORCE_INLINE U64 XXH64_digest_endian (const XXH64_state_t* state, XXH_endianess endian)
+{
+ U64 h64;
+
+ if (state->total_len >= 32) {
+ U64 const v1 = state->v1;
+ U64 const v2 = state->v2;
+ U64 const v3 = state->v3;
+ U64 const v4 = state->v4;
+
+ h64 = XXH_rotl64(v1, 1) + XXH_rotl64(v2, 7) + XXH_rotl64(v3, 12) + XXH_rotl64(v4, 18);
+ h64 = XXH64_mergeRound(h64, v1);
+ h64 = XXH64_mergeRound(h64, v2);
+ h64 = XXH64_mergeRound(h64, v3);
+ h64 = XXH64_mergeRound(h64, v4);
+ } else {
+ h64 = state->v3 /*seed*/ + PRIME64_5;
+ }
+
+ h64 += (U64) state->total_len;
+
+ return XXH64_finalize(h64, state->mem64, (size_t)state->total_len, endian, XXH_aligned);
+}
+
+XXH_PUBLIC_API unsigned long long XXH64_digest (const XXH64_state_t* state_in)
+{
+ XXH_endianess endian_detected = (XXH_endianess)XXH_CPU_LITTLE_ENDIAN;
+
+ if ((endian_detected==XXH_littleEndian) || XXH_FORCE_NATIVE_FORMAT)
+ return XXH64_digest_endian(state_in, XXH_littleEndian);
+ else
+ return XXH64_digest_endian(state_in, XXH_bigEndian);
+}
+
+
+/*====== Canonical representation ======*/
+
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash)
+{
+ XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
+ if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
+ memcpy(dst, &hash, sizeof(*dst));
+}
+
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src)
+{
+ return XXH_readBE64(src);
+}
+
+#endif /* XXH_NO_LONG_LONG */
diff --git a/contrib/xxhash/xxhash.h b/contrib/xxhash/xxhash.h
new file mode 100644
index 00000000000..d6bad943358
--- /dev/null
+++ b/contrib/xxhash/xxhash.h
@@ -0,0 +1,328 @@
+/*
+ xxHash - Extremely Fast Hash algorithm
+ Header File
+ Copyright (C) 2012-2016, Yann Collet.
+
+ BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are
+ met:
+
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+ copyright notice, this list of conditions and the following disclaimer
+ in the documentation and/or other materials provided with the
+ distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ You can contact the author at :
+ - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* Notice extracted from xxHash homepage :
+
+xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
+It also successfully passes all tests from the SMHasher suite.
+
+Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
+
+Name Speed Q.Score Author
+xxHash 5.4 GB/s 10
+CrapWow 3.2 GB/s 2 Andrew
+MumurHash 3a 2.7 GB/s 10 Austin Appleby
+SpookyHash 2.0 GB/s 10 Bob Jenkins
+SBox 1.4 GB/s 9 Bret Mulvey
+Lookup3 1.2 GB/s 9 Bob Jenkins
+SuperFastHash 1.2 GB/s 1 Paul Hsieh
+CityHash64 1.05 GB/s 10 Pike & Alakuijala
+FNV 0.55 GB/s 5 Fowler, Noll, Vo
+CRC32 0.43 GB/s 9
+MD5-32 0.33 GB/s 10 Ronald L. Rivest
+SHA1-32 0.28 GB/s 10
+
+Q.Score is a measure of quality of the hash function.
+It depends on successfully passing SMHasher test set.
+10 is a perfect score.
+
+A 64-bit version, named XXH64, is available since r35.
+It offers much better speed, but for 64-bit applications only.
+Name Speed on 64 bits Speed on 32 bits
+XXH64 13.8 GB/s 1.9 GB/s
+XXH32 6.8 GB/s 6.0 GB/s
+*/
+
+#ifndef XXHASH_H_5627135585666179
+#define XXHASH_H_5627135585666179 1
+
+#if defined (__cplusplus)
+extern "C" {
+#endif
+
+
+/* ****************************
+* Definitions
+******************************/
+#include <stddef.h> /* size_t */
+typedef enum { XXH_OK=0, XXH_ERROR } XXH_errorcode;
+
+
+/* ****************************
+ * API modifier
+ ******************************/
+/** XXH_INLINE_ALL (and XXH_PRIVATE_API)
+ * This is useful to include xxhash functions in `static` mode
+ * in order to inline them, and remove their symbol from the public list.
+ * Inlining can offer dramatic performance improvement on small keys.
+ * Methodology :
+ * #define XXH_INLINE_ALL
+ * #include "xxhash.h"
+ * `xxhash.c` is automatically included.
+ * It's not useful to compile and link it as a separate module.
+ */
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+# ifndef XXH_STATIC_LINKING_ONLY
+# define XXH_STATIC_LINKING_ONLY
+# endif
+# if defined(__GNUC__)
+# define XXH_PUBLIC_API static __inline __attribute__((unused))
+# elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
+# define XXH_PUBLIC_API static inline
+# elif defined(_MSC_VER)
+# define XXH_PUBLIC_API static __inline
+# else
+ /* this version may generate warnings for unused static functions */
+# define XXH_PUBLIC_API static
+# endif
+#else
+# define XXH_PUBLIC_API /* do nothing */
+#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
+
+/*! XXH_NAMESPACE, aka Namespace Emulation :
+ *
+ * If you want to include _and expose_ xxHash functions from within your own library,
+ * but also want to avoid symbol collisions with other libraries which may also include xxHash,
+ *
+ * you can use XXH_NAMESPACE, to automatically prefix any public symbol from xxhash library
+ * with the value of XXH_NAMESPACE (therefore, avoid NULL and numeric values).
+ *
+ * Note that no change is required within the calling program as long as it includes `xxhash.h` :
+ * regular symbol name will be automatically translated by this header.
+ */
+#ifdef XXH_NAMESPACE
+# define XXH_CAT(A,B) A##B
+# define XXH_NAME2(A,B) XXH_CAT(A,B)
+# define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
+# define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
+# define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
+# define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
+# define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
+# define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
+# define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
+# define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
+# define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
+# define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
+# define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
+# define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
+# define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
+# define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
+# define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
+# define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
+# define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
+# define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
+# define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
+#endif
+
+
+/* *************************************
+* Version
+***************************************/
+#define XXH_VERSION_MAJOR 0
+#define XXH_VERSION_MINOR 6
+#define XXH_VERSION_RELEASE 5
+#define XXH_VERSION_NUMBER (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
+XXH_PUBLIC_API unsigned XXH_versionNumber (void);
+
+
+/*-**********************************************************************
+* 32-bit hash
+************************************************************************/
+typedef unsigned int XXH32_hash_t;
+
+/*! XXH32() :
+ Calculate the 32-bit hash of sequence "length" bytes stored at memory address "input".
+ The memory between input & input+length must be valid (allocated and read-accessible).
+ "seed" can be used to alter the result predictably.
+ Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s */
+XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t length, unsigned int seed);
+
+/*====== Streaming ======*/
+typedef struct XXH32_state_s XXH32_state_t; /* incomplete type */
+XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void);
+XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr);
+XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH32_reset (XXH32_state_t* statePtr, unsigned int seed);
+XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
+
+/*
+ * Streaming functions generate the xxHash of an input provided in multiple segments.
+ * Note that, for small input, they are slower than single-call functions, due to state management.
+ * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
+ *
+ * XXH state must first be allocated, using XXH*_createState() .
+ *
+ * Start a new hash by initializing state with a seed, using XXH*_reset().
+ *
+ * Then, feed the hash state by calling XXH*_update() as many times as necessary.
+ * The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
+ *
+ * Finally, a hash value can be produced anytime, by using XXH*_digest().
+ * This function returns the nn-bits hash as an int or long long.
+ *
+ * It's still possible to continue inserting input into the hash state after a digest,
+ * and generate some new hashes later on, by calling again XXH*_digest().
+ *
+ * When done, free XXH state space if it was allocated dynamically.
+ */
+
+/*====== Canonical representation ======*/
+
+typedef struct { unsigned char digest[4]; } XXH32_canonical_t;
+XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
+XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
+
+/* Default result type for XXH functions are primitive unsigned 32 and 64 bits.
+ * The canonical representation uses human-readable write convention, aka big-endian (large digits first).
+ * These functions allow transformation of hash result into and from its canonical format.
+ * This way, hash values can be written into a file / memory, and remain comparable on different systems and programs.
+ */
+
+
+#ifndef XXH_NO_LONG_LONG
+/*-**********************************************************************
+* 64-bit hash
+************************************************************************/
+typedef unsigned long long XXH64_hash_t;
+
+/*! XXH64() :
+ Calculate the 64-bit hash of sequence of length "len" stored at memory address "input".
+ "seed" can be used to alter the result predictably.
+ This function runs faster on 64-bit systems, but slower on 32-bit systems (see benchmark).
+*/
+XXH_PUBLIC_API XXH64_hash_t XXH64 (const void* input, size_t length, unsigned long long seed);
+
+/*====== Streaming ======*/
+typedef struct XXH64_state_s XXH64_state_t; /* incomplete type */
+XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void);
+XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr);
+XXH_PUBLIC_API void XXH64_copyState(XXH64_state_t* dst_state, const XXH64_state_t* src_state);
+
+XXH_PUBLIC_API XXH_errorcode XXH64_reset (XXH64_state_t* statePtr, unsigned long long seed);
+XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH64_state_t* statePtr, const void* input, size_t length);
+XXH_PUBLIC_API XXH64_hash_t XXH64_digest (const XXH64_state_t* statePtr);
+
+/*====== Canonical representation ======*/
+typedef struct { unsigned char digest[8]; } XXH64_canonical_t;
+XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH64_canonical_t* dst, XXH64_hash_t hash);
+XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(const XXH64_canonical_t* src);
+#endif /* XXH_NO_LONG_LONG */
+
+
+
+#ifdef XXH_STATIC_LINKING_ONLY
+
+/* ================================================================================================
+ This section contains declarations which are not guaranteed to remain stable.
+ They may change in future versions, becoming incompatible with a different version of the library.
+ These declarations should only be used with static linking.
+ Never use them in association with dynamic linking !
+=================================================================================================== */
+
+/* These definitions are only present to allow
+ * static allocation of XXH state, on stack or in a struct for example.
+ * Never **ever** use members directly. */
+
+#if !defined (__VMS) \
+ && (defined (__cplusplus) \
+ || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
+# include <stdint.h>
+
+struct XXH32_state_s {
+ uint32_t total_len_32;
+ uint32_t large_len;
+ uint32_t v1;
+ uint32_t v2;
+ uint32_t v3;
+ uint32_t v4;
+ uint32_t mem32[4];
+ uint32_t memsize;
+ uint32_t reserved; /* never read nor write, might be removed in a future version */
+}; /* typedef'd to XXH32_state_t */
+
+struct XXH64_state_s {
+ uint64_t total_len;
+ uint64_t v1;
+ uint64_t v2;
+ uint64_t v3;
+ uint64_t v4;
+ uint64_t mem64[4];
+ uint32_t memsize;
+ uint32_t reserved[2]; /* never read nor write, might be removed in a future version */
+}; /* typedef'd to XXH64_state_t */
+
+# else
+
+struct XXH32_state_s {
+ unsigned total_len_32;
+ unsigned large_len;
+ unsigned v1;
+ unsigned v2;
+ unsigned v3;
+ unsigned v4;
+ unsigned mem32[4];
+ unsigned memsize;
+ unsigned reserved; /* never read nor write, might be removed in a future version */
+}; /* typedef'd to XXH32_state_t */
+
+# ifndef XXH_NO_LONG_LONG /* remove 64-bit support */
+struct XXH64_state_s {
+ unsigned long long total_len;
+ unsigned long long v1;
+ unsigned long long v2;
+ unsigned long long v3;
+ unsigned long long v4;
+ unsigned long long mem64[4];
+ unsigned memsize;
+ unsigned reserved[2]; /* never read nor write, might be removed in a future version */
+}; /* typedef'd to XXH64_state_t */
+# endif
+
+# endif
+
+
+#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
+# include "xxhash.c" /* include xxhash function bodies as `static`, for inlining */
+#endif
+
+#endif /* XXH_STATIC_LINKING_ONLY */
+
+
+#if defined (__cplusplus)
+}
+#endif
+
+#endif /* XXHASH_H_5627135585666179 */
diff --git a/contrib/xxhash/xxhsum.c b/contrib/xxhash/xxhsum.c
new file mode 100644
index 00000000000..69931f727f0
--- /dev/null
+++ b/contrib/xxhash/xxhsum.c
@@ -0,0 +1,1301 @@
+/*
+* xxhsum - Command line interface for xxhash algorithms
+* Copyright (C) Yann Collet 2012-2016
+*
+* GPL v2 License
+*
+* This program is free software; you can redistribute it and/or modify
+* it under the terms of the GNU General Public License as published by
+* the Free Software Foundation; either version 2 of the License, or
+* (at your option) any later version.
+*
+* This program is distributed in the hope that it will be useful,
+* but WITHOUT ANY WARRANTY; without even the implied warranty of
+* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+* GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License along
+* with this program; if not, write to the Free Software Foundation, Inc.,
+* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*
+* You can contact the author at :
+* - xxHash homepage : http://www.xxhash.com
+* - xxHash source repository : https://github.com/Cyan4973/xxHash
+*/
+
+/* xxhsum :
+ * Provides hash value of a file content, or a list of files, or stdin
+ * Display convention is Big Endian, for both 32 and 64 bits algorithms
+ */
+
+#ifndef XXHASH_C_2097394837
+#define XXHASH_C_2097394837
+
+/* ************************************
+ * Compiler Options
+ **************************************/
+/* MS Visual */
+#if defined(_MSC_VER) || defined(_WIN32)
+# define _CRT_SECURE_NO_WARNINGS /* removes visual warnings */
+#endif
+
+/* Under Linux at least, pull in the *64 commands */
+#ifndef _LARGEFILE64_SOURCE
+# define _LARGEFILE64_SOURCE
+#endif
+
+
+/* ************************************
+ * Includes
+ **************************************/
+#include <stdlib.h> /* malloc, calloc, free, exit */
+#include <stdio.h> /* fprintf, fopen, ftello64, fread, stdin, stdout, _fileno (when present) */
+#include <string.h> /* strcmp */
+#include <sys/types.h> /* stat, stat64, _stat64 */
+#include <sys/stat.h> /* stat, stat64, _stat64 */
+#include <time.h> /* clock_t, clock, CLOCKS_PER_SEC */
+#include <assert.h> /* assert */
+
+#define XXH_STATIC_LINKING_ONLY /* *_state_t */
+#include "xxhash.h"
+
+
+/* ************************************
+ * OS-Specific Includes
+ **************************************/
+#if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(_WIN32) || defined(__CYGWIN__)
+# include <fcntl.h> /* _O_BINARY */
+# include <io.h> /* _setmode, _isatty */
+# define SET_BINARY_MODE(file) _setmode(_fileno(file), _O_BINARY)
+# define IS_CONSOLE(stdStream) _isatty(_fileno(stdStream))
+#else
+# include <unistd.h> /* isatty, STDIN_FILENO */
+# define SET_BINARY_MODE(file)
+# define IS_CONSOLE(stdStream) isatty(STDIN_FILENO)
+#endif
+
+#if !defined(S_ISREG)
+# define S_ISREG(x) (((x) & S_IFMT) == S_IFREG)
+#endif
+
+
+/* ************************************
+* Basic Types
+**************************************/
+#ifndef MEM_MODULE
+# define MEM_MODULE
+# if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L /* C99 */
+# include <stdint.h>
+ typedef uint8_t BYTE;
+ typedef uint16_t U16;
+ typedef uint32_t U32;
+ typedef int32_t S32;
+ typedef uint64_t U64;
+# else
+ typedef unsigned char BYTE;
+ typedef unsigned short U16;
+ typedef unsigned int U32;
+ typedef signed int S32;
+ typedef unsigned long long U64;
+# endif
+#endif
+
+static unsigned BMK_isLittleEndian(void)
+{
+ const union { U32 u; BYTE c[4]; } one = { 1 }; /* don't use static : performance detrimental */
+ return one.c[0];
+}
+
+
+/* *************************************
+ * Constants
+ ***************************************/
+#define LIB_VERSION XXH_VERSION_MAJOR.XXH_VERSION_MINOR.XXH_VERSION_RELEASE
+#define QUOTE(str) #str
+#define EXPAND_AND_QUOTE(str) QUOTE(str)
+#define PROGRAM_VERSION EXPAND_AND_QUOTE(LIB_VERSION)
+static const int g_nbBits = (int)(sizeof(void*)*8);
+static const char g_lename[] = "little endian";
+static const char g_bename[] = "big endian";
+#define ENDIAN_NAME (BMK_isLittleEndian() ? g_lename : g_bename)
+static const char author[] = "Yann Collet";
+#define WELCOME_MESSAGE(exename) "%s %s (%i-bits %s), by %s \n", \
+ exename, PROGRAM_VERSION, g_nbBits, ENDIAN_NAME, author
+
+#define KB *( 1<<10)
+#define MB *( 1<<20)
+#define GB *(1U<<30)
+
+static size_t XXH_DEFAULT_SAMPLE_SIZE = 100 KB;
+#define NBLOOPS 3 /* Default number of benchmark iterations */
+#define TIMELOOP_S 1
+#define TIMELOOP (TIMELOOP_S * CLOCKS_PER_SEC) /* Minimum timing per iteration */
+#define XXHSUM32_DEFAULT_SEED 0 /* Default seed for algo_xxh32 */
+#define XXHSUM64_DEFAULT_SEED 0 /* Default seed for algo_xxh64 */
+
+#define MAX_MEM (2 GB - 64 MB)
+
+static const char stdinName[] = "-";
+typedef enum { algo_xxh32, algo_xxh64 } algoType;
+static const algoType g_defaultAlgo = algo_xxh64; /* required within main() & usage() */
+
+/* <16 hex char> <SPC> <SPC> <filename> <'\0'>
+ * '4096' is typical Linux PATH_MAX configuration. */
+#define DEFAULT_LINE_LENGTH (sizeof(XXH64_hash_t) * 2 + 2 + 4096 + 1)
+
+/* Maximum acceptable line length. */
+#define MAX_LINE_LENGTH (32 KB)
+
+
+/* ************************************
+ * Display macros
+ **************************************/
+#define DISPLAY(...) fprintf(stderr, __VA_ARGS__)
+#define DISPLAYRESULT(...) fprintf(stdout, __VA_ARGS__)
+#define DISPLAYLEVEL(l, ...) do { if (g_displayLevel>=l) DISPLAY(__VA_ARGS__); } while (0)
+static int g_displayLevel = 2;
+
+
+/* ************************************
+ * Local variables
+ **************************************/
+static U32 g_nbIterations = NBLOOPS;
+
+
+/* ************************************
+ * Benchmark Functions
+ **************************************/
+static clock_t BMK_clockSpan( clock_t start )
+{
+ return clock() - start; /* works even if overflow; Typical max span ~ 30 mn */
+}
+
+
+static size_t BMK_findMaxMem(U64 requiredMem)
+{
+ size_t const step = 64 MB;
+ void* testmem = NULL;
+
+ requiredMem = (((requiredMem >> 26) + 1) << 26);
+ requiredMem += 2*step;
+ if (requiredMem > MAX_MEM) requiredMem = MAX_MEM;
+
+ while (!testmem) {
+ if (requiredMem > step) requiredMem -= step;
+ else requiredMem >>= 1;
+ testmem = malloc ((size_t)requiredMem);
+ }
+ free (testmem);
+
+ /* keep some space available */
+ if (requiredMem > step) requiredMem -= step;
+ else requiredMem >>= 1;
+
+ return (size_t)requiredMem;
+}
+
+
+static U64 BMK_GetFileSize(const char* infilename)
+{
+ int r;
+#if defined(_MSC_VER)
+ struct _stat64 statbuf;
+ r = _stat64(infilename, &statbuf);
+#else
+ struct stat statbuf;
+ r = stat(infilename, &statbuf);
+#endif
+ if (r || !S_ISREG(statbuf.st_mode)) return 0; /* No good... */
+ return (U64)statbuf.st_size;
+}
+
+typedef U32 (*hashFunction)(const void* buffer, size_t bufferSize, U32 seed);
+
+static U32 localXXH32(const void* buffer, size_t bufferSize, U32 seed) { return XXH32(buffer, bufferSize, seed); }
+
+static U32 localXXH64(const void* buffer, size_t bufferSize, U32 seed) { return (U32)XXH64(buffer, bufferSize, seed); }
+
+static void BMK_benchHash(hashFunction h, const char* hName, const void* buffer, size_t bufferSize)
+{
+ U32 nbh_perIteration = ((300 MB) / (bufferSize+1)) + 1; /* first loop conservatively aims for 300 MB/s */
+ U32 iterationNb;
+ double fastestH = 100000000.;
+
+ DISPLAYLEVEL(2, "\r%70s\r", ""); /* Clean display line */
+ if (g_nbIterations<1) g_nbIterations=1;
+ for (iterationNb = 1; iterationNb <= g_nbIterations; iterationNb++) {
+ U32 r=0;
+ clock_t cStart;
+
+ DISPLAYLEVEL(2, "%1i-%-17.17s : %10u ->\r", iterationNb, hName, (U32)bufferSize);
+ cStart = clock();
+ while (clock() == cStart); /* starts clock() at its exact beginning */
+ cStart = clock();
+
+ { U32 i;
+ for (i=0; i<nbh_perIteration; i++)
+ r += h(buffer, bufferSize, i);
+ }
+ if (r==0) DISPLAYLEVEL(3,".\r"); /* do something with r to avoid compiler "optimizing" away hash function */
+ { double const timeS = ((double)BMK_clockSpan(cStart) / CLOCKS_PER_SEC) / nbh_perIteration;
+ if (timeS < fastestH) fastestH = timeS;
+ DISPLAYLEVEL(2, "%1i-%-17.17s : %10u -> %8.0f it/s (%7.1f MB/s) \r",
+ iterationNb, hName, (U32)bufferSize,
+ (double)1 / fastestH,
+ ((double)bufferSize / (1<<20)) / fastestH );
+ }
+ assert(fastestH > 1./2000000000); /* avoid U32 overflow */
+ nbh_perIteration = (U32)(1 / fastestH) + 1; /* adjust nbh_perIteration to last roughtly one second */
+ }
+ DISPLAYLEVEL(1, "%-19.19s : %10u -> %8.0f it/s (%7.1f MB/s) \n", hName, (U32)bufferSize,
+ (double)1 / fastestH,
+ ((double)bufferSize / (1<<20)) / fastestH);
+ if (g_displayLevel<1)
+ DISPLAYLEVEL(0, "%u, ", (U32)((double)1 / fastestH));
+}
+
+
+/* BMK_benchMem():
+ * specificTest : 0 == run all tests, 1+ run only specific test
+ * buffer : is supposed 8-bytes aligned (if malloc'ed, it should be)
+ * the real allocated size of buffer is supposed to be >= (bufferSize+3).
+ * @return : 0 on success, 1 if error (invalid mode selected) */
+static int BMK_benchMem(const void* buffer, size_t bufferSize, U32 specificTest)
+{
+ assert((((size_t)buffer) & 8) == 0); /* ensure alignment */
+
+ /* XXH32 bench */
+ if ((specificTest==0) | (specificTest==1))
+ BMK_benchHash(localXXH32, "XXH32", buffer, bufferSize);
+
+ /* Bench XXH32 on Unaligned input */
+ if ((specificTest==0) | (specificTest==2))
+ BMK_benchHash(localXXH32, "XXH32 unaligned", ((const char*)buffer)+1, bufferSize);
+
+ /* Bench XXH64 */
+ if ((specificTest==0) | (specificTest==3))
+ BMK_benchHash(localXXH64, "XXH64", buffer, bufferSize);
+
+ /* Bench XXH64 on Unaligned input */
+ if ((specificTest==0) | (specificTest==4))
+ BMK_benchHash(localXXH64, "XXH64 unaligned", ((const char*)buffer)+3, bufferSize);
+
+ if (specificTest > 4) {
+ DISPLAY("benchmark mode invalid \n");
+ return 1;
+ }
+ return 0;
+}
+
+
+static size_t BMK_selectBenchedSize(const char* fileName)
+{ U64 const inFileSize = BMK_GetFileSize(fileName);
+ size_t benchedSize = (size_t) BMK_findMaxMem(inFileSize);
+ if ((U64)benchedSize > inFileSize) benchedSize = (size_t)inFileSize;
+ if (benchedSize < inFileSize) {
+ DISPLAY("Not enough memory for '%s' full size; testing %i MB only...\n", fileName, (int)(benchedSize>>20));
+ }
+ return benchedSize;
+}
+
+
+static int BMK_benchFiles(const char** fileNamesTable, int nbFiles, U32 specificTest)
+{
+ int result = 0;
+ int fileIdx;
+
+ for (fileIdx=0; fileIdx<nbFiles; fileIdx++) {
+ const char* const inFileName = fileNamesTable[fileIdx];
+ FILE* const inFile = fopen( inFileName, "rb" );
+ size_t const benchedSize = BMK_selectBenchedSize(inFileName);
+ char* const buffer = (char*)calloc(benchedSize+16+3, 1);
+ void* const alignedBuffer = (buffer+15) - (((size_t)(buffer+15)) & 0xF); /* align on next 16 bytes */
+
+ /* Checks */
+ if ((inFile==NULL) || (inFileName==NULL)) {
+ DISPLAY("Pb opening %s\n", inFileName);
+ free(buffer);
+ return 11;
+ }
+ if(!buffer) {
+ DISPLAY("\nError: not enough memory!\n");
+ fclose(inFile);
+ return 12;
+ }
+
+ /* Fill input buffer */
+ DISPLAYLEVEL(1, "\rLoading %s... \n", inFileName);
+ { size_t const readSize = fread(alignedBuffer, 1, benchedSize, inFile);
+ fclose(inFile);
+ if(readSize != benchedSize) {
+ DISPLAY("\nError: problem reading file '%s' !! \n", inFileName);
+ free(buffer);
+ return 13;
+ } }
+
+ /* bench */
+ result |= BMK_benchMem(alignedBuffer, benchedSize, specificTest);
+
+ free(buffer);
+ }
+
+ return result;
+}
+
+
+
+static int BMK_benchInternal(size_t keySize, int specificTest)
+{
+ void* const buffer = calloc(keySize+16+3, 1);
+ void* const alignedBuffer = ((char*)buffer+15) - (((size_t)((char*)buffer+15)) & 0xF); /* align on next 16 bytes */
+ if(!buffer) {
+ DISPLAY("\nError: not enough memory!\n");
+ return 12;
+ }
+
+ /* bench */
+ DISPLAYLEVEL(1, "Sample of ");
+ if (keySize > 10 KB) {
+ DISPLAYLEVEL(1, "%u KB", (U32)(keySize >> 10));
+ } else {
+ DISPLAYLEVEL(1, "%u bytes", (U32)keySize);
+ }
+ DISPLAYLEVEL(1, "... \n");
+
+ { int const result = BMK_benchMem(alignedBuffer, keySize, specificTest);
+ free(buffer);
+ return result;
+ }
+}
+
+
+static void BMK_checkResult(U32 r1, U32 r2)
+{
+ static int nbTests = 1;
+ if (r1==r2) {
+ DISPLAYLEVEL(3, "\rTest%3i : %08X == %08X ok ", nbTests, r1, r2);
+ } else {
+ DISPLAY("\rERROR : Test%3i : %08X <> %08X !!!!! \n", nbTests, r1, r2);
+ exit(1);
+ }
+ nbTests++;
+}
+
+
+static void BMK_checkResult64(U64 r1, U64 r2)
+{
+ static int nbTests = 1;
+ if (r1!=r2) {
+ DISPLAY("\rERROR : Test%3i : 64-bit values non equals !!!!! \n", nbTests);
+ DISPLAY("\r %08X%08X != %08X%08X \n", (U32)(r1>>32), (U32)r1, (U32)(r2>>32), (U32)r2);
+ exit(1);
+ }
+ nbTests++;
+}
+
+
+static void BMK_testSequence64(void* sentence, size_t len, U64 seed, U64 Nresult)
+{
+ XXH64_state_t state;
+ U64 Dresult;
+ size_t pos;
+
+ Dresult = XXH64(sentence, len, seed);
+ BMK_checkResult64(Dresult, Nresult);
+
+ XXH64_reset(&state, seed);
+ XXH64_update(&state, sentence, len);
+ Dresult = XXH64_digest(&state);
+ BMK_checkResult64(Dresult, Nresult);
+
+ XXH64_reset(&state, seed);
+ for (pos=0; pos<len; pos++)
+ XXH64_update(&state, ((char*)sentence)+pos, 1);
+ Dresult = XXH64_digest(&state);
+ BMK_checkResult64(Dresult, Nresult);
+}
+
+
+static void BMK_testSequence(const void* sequence, size_t len, U32 seed, U32 Nresult)
+{
+ XXH32_state_t state;
+ U32 Dresult;
+ size_t pos;
+
+ Dresult = XXH32(sequence, len, seed);
+ BMK_checkResult(Dresult, Nresult);
+
+ XXH32_reset(&state, seed);
+ XXH32_update(&state, sequence, len);
+ Dresult = XXH32_digest(&state);
+ BMK_checkResult(Dresult, Nresult);
+
+ XXH32_reset(&state, seed);
+ for (pos=0; pos<len; pos++)
+ XXH32_update(&state, ((const char*)sequence)+pos, 1);
+ Dresult = XXH32_digest(&state);
+ BMK_checkResult(Dresult, Nresult);
+}
+
+
+#define SANITY_BUFFER_SIZE 101
+static void BMK_sanityCheck(void)
+{
+ static const U32 prime = 2654435761U;
+ BYTE sanityBuffer[SANITY_BUFFER_SIZE];
+ U32 byteGen = prime;
+
+ int i;
+ for (i=0; i<SANITY_BUFFER_SIZE; i++) {
+ sanityBuffer[i] = (BYTE)(byteGen>>24);
+ byteGen *= byteGen;
+ }
+
+ BMK_testSequence(NULL, 0, 0, 0x02CC5D05);
+ BMK_testSequence(NULL, 0, prime, 0x36B78AE7);
+ BMK_testSequence(sanityBuffer, 1, 0, 0xB85CBEE5);
+ BMK_testSequence(sanityBuffer, 1, prime, 0xD5845D64);
+ BMK_testSequence(sanityBuffer, 14, 0, 0xE5AA0AB4);
+ BMK_testSequence(sanityBuffer, 14, prime, 0x4481951D);
+ BMK_testSequence(sanityBuffer, SANITY_BUFFER_SIZE, 0, 0x1F1AA412);
+ BMK_testSequence(sanityBuffer, SANITY_BUFFER_SIZE, prime, 0x498EC8E2);
+
+ BMK_testSequence64(NULL , 0, 0, 0xEF46DB3751D8E999ULL);
+ BMK_testSequence64(NULL , 0, prime, 0xAC75FDA2929B17EFULL);
+ BMK_testSequence64(sanityBuffer, 1, 0, 0x4FCE394CC88952D8ULL);
+ BMK_testSequence64(sanityBuffer, 1, prime, 0x739840CB819FA723ULL);
+ BMK_testSequence64(sanityBuffer, 14, 0, 0xCFFA8DB881BC3A3DULL);
+ BMK_testSequence64(sanityBuffer, 14, prime, 0x5B9611585EFCC9CBULL);
+ BMK_testSequence64(sanityBuffer, SANITY_BUFFER_SIZE, 0, 0x0EAB543384F878ADULL);
+ BMK_testSequence64(sanityBuffer, SANITY_BUFFER_SIZE, prime, 0xCAA65939306F1E21ULL);
+
+ DISPLAYLEVEL(3, "\r%70s\r", ""); /* Clean display line */
+ DISPLAYLEVEL(3, "Sanity check -- all tests ok\n");
+}
+
+
+/* ********************************************************
+* File Hashing
+**********************************************************/
+
+static void BMK_display_LittleEndian(const void* ptr, size_t length)
+{
+ const BYTE* p = (const BYTE*)ptr;
+ size_t idx;
+ for (idx=length-1; idx<length; idx--) /* intentional underflow to negative to detect end */
+ DISPLAYRESULT("%02x", p[idx]);
+}
+
+static void BMK_display_BigEndian(const void* ptr, size_t length)
+{
+ const BYTE* p = (const BYTE*)ptr;
+ size_t idx;
+ for (idx=0; idx<length; idx++)
+ DISPLAYRESULT("%02x", p[idx]);
+}
+
+static void BMK_hashStream(void* xxhHashValue, const algoType hashType, FILE* inFile, void* buffer, size_t blockSize)
+{
+ XXH64_state_t state64;
+ XXH32_state_t state32;
+ size_t readSize;
+
+ /* Init */
+ XXH32_reset(&state32, XXHSUM32_DEFAULT_SEED);
+ XXH64_reset(&state64, XXHSUM64_DEFAULT_SEED);
+
+ /* Load file & update hash */
+ readSize = 1;
+ while (readSize) {
+ readSize = fread(buffer, 1, blockSize, inFile);
+ switch(hashType)
+ {
+ case algo_xxh32:
+ XXH32_update(&state32, buffer, readSize);
+ break;
+ case algo_xxh64:
+ XXH64_update(&state64, buffer, readSize);
+ break;
+ default:
+ break;
+ }
+ }
+
+ switch(hashType)
+ {
+ case algo_xxh32:
+ { U32 const h32 = XXH32_digest(&state32);
+ memcpy(xxhHashValue, &h32, sizeof(h32));
+ break;
+ }
+ case algo_xxh64:
+ { U64 const h64 = XXH64_digest(&state64);
+ memcpy(xxhHashValue, &h64, sizeof(h64));
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+
+typedef enum { big_endian, little_endian} endianess;
+
+static int BMK_hash(const char* fileName,
+ const algoType hashType,
+ const endianess displayEndianess)
+{
+ FILE* inFile;
+ size_t const blockSize = 64 KB;
+ void* buffer;
+ U32 h32 = 0;
+ U64 h64 = 0;
+
+ /* Check file existence */
+ if (fileName == stdinName) {
+ inFile = stdin;
+ SET_BINARY_MODE(stdin);
+ }
+ else
+ inFile = fopen( fileName, "rb" );
+ if (inFile==NULL) {
+ DISPLAY( "Pb opening %s\n", fileName);
+ return 1;
+ }
+
+ /* Memory allocation & restrictions */
+ buffer = malloc(blockSize);
+ if(!buffer) {
+ DISPLAY("\nError: not enough memory!\n");
+ fclose(inFile);
+ return 1;
+ }
+
+ /* loading notification */
+ { const size_t fileNameSize = strlen(fileName);
+ const char* const fileNameEnd = fileName + fileNameSize;
+ const int maxInfoFilenameSize = (int)(fileNameSize > 30 ? 30 : fileNameSize);
+ int infoFilenameSize = 1;
+ while ((infoFilenameSize < maxInfoFilenameSize)
+ && (fileNameEnd[-1-infoFilenameSize] != '/')
+ && (fileNameEnd[-1-infoFilenameSize] != '\\') )
+ infoFilenameSize++;
+ DISPLAY("\rLoading %s... \r", fileNameEnd - infoFilenameSize);
+
+ /* Load file & update hash */
+ switch(hashType)
+ {
+ case algo_xxh32:
+ BMK_hashStream(&h32, algo_xxh32, inFile, buffer, blockSize);
+ break;
+ case algo_xxh64:
+ BMK_hashStream(&h64, algo_xxh64, inFile, buffer, blockSize);
+ break;
+ default:
+ break;
+ }
+
+ fclose(inFile);
+ free(buffer);
+ DISPLAY("%s \r", fileNameEnd - infoFilenameSize); /* erase line */
+ }
+
+ /* display Hash */
+ switch(hashType)
+ {
+ case algo_xxh32:
+ { XXH32_canonical_t hcbe32;
+ XXH32_canonicalFromHash(&hcbe32, h32);
+ displayEndianess==big_endian ?
+ BMK_display_BigEndian(&hcbe32, sizeof(hcbe32)) : BMK_display_LittleEndian(&hcbe32, sizeof(hcbe32));
+ DISPLAYRESULT(" %s\n", fileName);
+ break;
+ }
+ case algo_xxh64:
+ { XXH64_canonical_t hcbe64;
+ XXH64_canonicalFromHash(&hcbe64, h64);
+ displayEndianess==big_endian ?
+ BMK_display_BigEndian(&hcbe64, sizeof(hcbe64)) : BMK_display_LittleEndian(&hcbe64, sizeof(hcbe64));
+ DISPLAYRESULT(" %s\n", fileName);
+ break;
+ }
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+
+static int BMK_hashFiles(const char** fnList, int fnTotal,
+ algoType hashType, endianess displayEndianess)
+{
+ int fnNb;
+ int result = 0;
+
+ if (fnTotal==0)
+ return BMK_hash(stdinName, hashType, displayEndianess);
+
+ for (fnNb=0; fnNb<fnTotal; fnNb++)
+ result += BMK_hash(fnList[fnNb], hashType, displayEndianess);
+ DISPLAY("\r%70s\r", "");
+ return result;
+}
+
+
+typedef enum {
+ GetLine_ok,
+ GetLine_eof,
+ GetLine_exceedMaxLineLength,
+ GetLine_outOfMemory,
+} GetLineResult;
+
+typedef enum {
+ CanonicalFromString_ok,
+ CanonicalFromString_invalidFormat,
+} CanonicalFromStringResult;
+
+typedef enum {
+ ParseLine_ok,
+ ParseLine_invalidFormat,
+} ParseLineResult;
+
+typedef enum {
+ LineStatus_hashOk,
+ LineStatus_hashFailed,
+ LineStatus_failedToOpen,
+} LineStatus;
+
+typedef union {
+ XXH32_canonical_t xxh32;
+ XXH64_canonical_t xxh64;
+} Canonical;
+
+typedef struct {
+ Canonical canonical;
+ const char* filename;
+ int xxhBits; /* canonical type : 32:xxh32, 64:xxh64 */
+} ParsedLine;
+
+typedef struct {
+ unsigned long nProperlyFormattedLines;
+ unsigned long nImproperlyFormattedLines;
+ unsigned long nMismatchedChecksums;
+ unsigned long nOpenOrReadFailures;
+ unsigned long nMixedFormatLines;
+ int xxhBits;
+ int quit;
+} ParseFileReport;
+
+typedef struct {
+ const char* inFileName;
+ FILE* inFile;
+ int lineMax;
+ char* lineBuf;
+ size_t blockSize;
+ char* blockBuf;
+ int strictMode;
+ int statusOnly;
+ int warn;
+ int quiet;
+ ParseFileReport report;
+} ParseFileArg;
+
+
+/* Read line from stream.
+ Returns GetLine_ok, if it reads line successfully.
+ Returns GetLine_eof, if stream reaches EOF.
+ Returns GetLine_exceedMaxLineLength, if line length is longer than MAX_LINE_LENGTH.
+ Returns GetLine_outOfMemory, if line buffer memory allocation failed.
+ */
+static GetLineResult getLine(char** lineBuf, int* lineMax, FILE* inFile)
+{
+ GetLineResult result = GetLine_ok;
+ int len = 0;
+
+ if ((*lineBuf == NULL) || (*lineMax<1)) {
+ free(*lineBuf); /* in case it's != NULL */
+ *lineMax = 0;
+ *lineBuf = (char*)malloc(DEFAULT_LINE_LENGTH);
+ if(*lineBuf == NULL) return GetLine_outOfMemory;
+ *lineMax = DEFAULT_LINE_LENGTH;
+ }
+
+ for (;;) {
+ const int c = fgetc(inFile);
+ if (c == EOF) {
+ /* If we meet EOF before first character, returns GetLine_eof,
+ * otherwise GetLine_ok.
+ */
+ if (len == 0) result = GetLine_eof;
+ break;
+ }
+
+ /* Make enough space for len+1 (for final NUL) bytes. */
+ if (len+1 >= *lineMax) {
+ char* newLineBuf = NULL;
+ int newBufSize = *lineMax;
+
+ newBufSize += (newBufSize/2) + 1; /* x 1.5 */
+ if (newBufSize > MAX_LINE_LENGTH) newBufSize = MAX_LINE_LENGTH;
+ if (len+1 >= newBufSize) return GetLine_exceedMaxLineLength;
+
+ newLineBuf = (char*) realloc(*lineBuf, newBufSize);
+ if (newLineBuf == NULL) return GetLine_outOfMemory;
+
+ *lineBuf = newLineBuf;
+ *lineMax = newBufSize;
+ }
+
+ if (c == '\n') break;
+ (*lineBuf)[len++] = (char) c;
+ }
+
+ (*lineBuf)[len] = '\0';
+ return result;
+}
+
+
+/* Converts one hexadecimal character to integer.
+ * Returns -1, if given character is not hexadecimal.
+ */
+static int charToHex(char c)
+{
+ int result = -1;
+ if (c >= '0' && c <= '9') {
+ result = (int) (c - '0');
+ } else if (c >= 'A' && c <= 'F') {
+ result = (int) (c - 'A') + 0x0a;
+ } else if (c >= 'a' && c <= 'f') {
+ result = (int) (c - 'a') + 0x0a;
+ }
+ return result;
+}
+
+
+/* Converts XXH32 canonical hexadecimal string hashStr to big endian unsigned char array dst.
+ * Returns CANONICAL_FROM_STRING_INVALID_FORMAT, if hashStr is not well formatted.
+ * Returns CANONICAL_FROM_STRING_OK, if hashStr is parsed successfully.
+ */
+static CanonicalFromStringResult canonicalFromString(unsigned char* dst,
+ size_t dstSize,
+ const char* hashStr)
+{
+ size_t i;
+ for (i = 0; i < dstSize; ++i) {
+ int h0, h1;
+
+ h0 = charToHex(hashStr[i*2 + 0]);
+ if (h0 < 0) return CanonicalFromString_invalidFormat;
+
+ h1 = charToHex(hashStr[i*2 + 1]);
+ if (h1 < 0) return CanonicalFromString_invalidFormat;
+
+ dst[i] = (unsigned char) ((h0 << 4) | h1);
+ }
+ return CanonicalFromString_ok;
+}
+
+
+/* Parse single line of xxHash checksum file.
+ * Returns PARSE_LINE_ERROR_INVALID_FORMAT, if line is not well formatted.
+ * Returns PARSE_LINE_OK if line is parsed successfully.
+ * And members of parseLine will be filled by parsed values.
+ *
+ * - line must be ended with '\0'.
+ * - Since parsedLine.filename will point within given argument `line`,
+ * users must keep `line`s content during they are using parsedLine.
+ *
+ * Given xxHash checksum line should have the following format:
+ *
+ * <8 or 16 hexadecimal char> <space> <space> <filename...> <'\0'>
+ */
+static ParseLineResult parseLine(ParsedLine* parsedLine, const char* line)
+{
+ const char* const firstSpace = strchr(line, ' ');
+ const char* const secondSpace = firstSpace + 1;
+
+ parsedLine->filename = NULL;
+ parsedLine->xxhBits = 0;
+
+ if (firstSpace == NULL || *secondSpace != ' ') return ParseLine_invalidFormat;
+
+ switch (firstSpace - line)
+ {
+ case 8:
+ { XXH32_canonical_t* xxh32c = &parsedLine->canonical.xxh32;
+ if (canonicalFromString(xxh32c->digest, sizeof(xxh32c->digest), line)
+ != CanonicalFromString_ok) {
+ return ParseLine_invalidFormat;
+ }
+ parsedLine->xxhBits = 32;
+ break;
+ }
+
+ case 16:
+ { XXH64_canonical_t* xxh64c = &parsedLine->canonical.xxh64;
+ if (canonicalFromString(xxh64c->digest, sizeof(xxh64c->digest), line)
+ != CanonicalFromString_ok) {
+ return ParseLine_invalidFormat;
+ }
+ parsedLine->xxhBits = 64;
+ break;
+ }
+
+ default:
+ return ParseLine_invalidFormat;
+ break;
+ }
+
+ parsedLine->filename = secondSpace + 1;
+ return ParseLine_ok;
+}
+
+
+/*! Parse xxHash checksum file.
+ */
+static void parseFile1(ParseFileArg* parseFileArg)
+{
+ const char* const inFileName = parseFileArg->inFileName;
+ ParseFileReport* const report = &parseFileArg->report;
+
+ unsigned long lineNumber = 0;
+ memset(report, 0, sizeof(*report));
+
+ while (!report->quit) {
+ FILE* fp = NULL;
+ LineStatus lineStatus = LineStatus_hashFailed;
+ GetLineResult getLineResult;
+ ParsedLine parsedLine;
+ memset(&parsedLine, 0, sizeof(parsedLine));
+
+ lineNumber++;
+ if (lineNumber == 0) {
+ /* This is unlikely happen, but md5sum.c has this
+ * error check. */
+ DISPLAY("%s : too many checksum lines\n", inFileName);
+ report->quit = 1;
+ break;
+ }
+
+ getLineResult = getLine(&parseFileArg->lineBuf, &parseFileArg->lineMax,
+ parseFileArg->inFile);
+ if (getLineResult != GetLine_ok) {
+ if (getLineResult == GetLine_eof) break;
+
+ switch (getLineResult)
+ {
+ case GetLine_ok:
+ case GetLine_eof:
+ /* These cases never happen. See above getLineResult related "if"s.
+ They exist just for make gcc's -Wswitch-enum happy. */
+ break;
+
+ default:
+ DISPLAY("%s : %lu: unknown error\n", inFileName, lineNumber);
+ break;
+
+ case GetLine_exceedMaxLineLength:
+ DISPLAY("%s : %lu: too long line\n", inFileName, lineNumber);
+ break;
+
+ case GetLine_outOfMemory:
+ DISPLAY("%s : %lu: out of memory\n", inFileName, lineNumber);
+ break;
+ }
+ report->quit = 1;
+ break;
+ }
+
+ if (parseLine(&parsedLine, parseFileArg->lineBuf) != ParseLine_ok) {
+ report->nImproperlyFormattedLines++;
+ if (parseFileArg->warn) {
+ DISPLAY("%s : %lu: improperly formatted XXHASH checksum line\n"
+ , inFileName, lineNumber);
+ }
+ continue;
+ }
+
+ if (report->xxhBits != 0 && report->xxhBits != parsedLine.xxhBits) {
+ /* Don't accept xxh32/xxh64 mixed file */
+ report->nImproperlyFormattedLines++;
+ report->nMixedFormatLines++;
+ if (parseFileArg->warn) {
+ DISPLAY("%s : %lu: improperly formatted XXHASH checksum line (XXH32/64)\n"
+ , inFileName, lineNumber);
+ }
+ continue;
+ }
+
+ report->nProperlyFormattedLines++;
+ if (report->xxhBits == 0) {
+ report->xxhBits = parsedLine.xxhBits;
+ }
+
+ fp = fopen(parsedLine.filename, "rb");
+ if (fp == NULL) {
+ lineStatus = LineStatus_failedToOpen;
+ } else {
+ lineStatus = LineStatus_hashFailed;
+ switch (parsedLine.xxhBits)
+ {
+ case 32:
+ { XXH32_hash_t xxh;
+ BMK_hashStream(&xxh, algo_xxh32, fp, parseFileArg->blockBuf, parseFileArg->blockSize);
+ if (xxh == XXH32_hashFromCanonical(&parsedLine.canonical.xxh32)) {
+ lineStatus = LineStatus_hashOk;
+ } }
+ break;
+
+ case 64:
+ { XXH64_hash_t xxh;
+ BMK_hashStream(&xxh, algo_xxh64, fp, parseFileArg->blockBuf, parseFileArg->blockSize);
+ if (xxh == XXH64_hashFromCanonical(&parsedLine.canonical.xxh64)) {
+ lineStatus = LineStatus_hashOk;
+ } }
+ break;
+
+ default:
+ break;
+ }
+ fclose(fp);
+ }
+
+ switch (lineStatus)
+ {
+ default:
+ DISPLAY("%s : unknown error\n", inFileName);
+ report->quit = 1;
+ break;
+
+ case LineStatus_failedToOpen:
+ report->nOpenOrReadFailures++;
+ if (!parseFileArg->statusOnly) {
+ DISPLAYRESULT("%s : %lu: FAILED open or read %s\n"
+ , inFileName, lineNumber, parsedLine.filename);
+ }
+ break;
+
+ case LineStatus_hashOk:
+ case LineStatus_hashFailed:
+ { int b = 1;
+ if (lineStatus == LineStatus_hashOk) {
+ /* If --quiet is specified, don't display "OK" */
+ if (parseFileArg->quiet) b = 0;
+ } else {
+ report->nMismatchedChecksums++;
+ }
+
+ if (b && !parseFileArg->statusOnly) {
+ DISPLAYRESULT("%s: %s\n", parsedLine.filename
+ , lineStatus == LineStatus_hashOk ? "OK" : "FAILED");
+ } }
+ break;
+ }
+ } /* while (!report->quit) */
+}
+
+
+/* Parse xxHash checksum file.
+ * Returns 1, if all procedures were succeeded.
+ * Returns 0, if any procedures was failed.
+ *
+ * If strictMode != 0, return error code if any line is invalid.
+ * If statusOnly != 0, don't generate any output.
+ * If warn != 0, print a warning message to stderr.
+ * If quiet != 0, suppress "OK" line.
+ *
+ * "All procedures are succeeded" means:
+ * - Checksum file contains at least one line and less than SIZE_T_MAX lines.
+ * - All files are properly opened and read.
+ * - All hash values match with its content.
+ * - (strict mode) All lines in checksum file are consistent and well formatted.
+ *
+ */
+static int checkFile(const char* inFileName,
+ const endianess displayEndianess,
+ U32 strictMode,
+ U32 statusOnly,
+ U32 warn,
+ U32 quiet)
+{
+ int result = 0;
+ FILE* inFile = NULL;
+ ParseFileArg parseFileArgBody;
+ ParseFileArg* const parseFileArg = &parseFileArgBody;
+ ParseFileReport* const report = &parseFileArg->report;
+
+ if (displayEndianess != big_endian) {
+ /* Don't accept little endian */
+ DISPLAY( "Check file mode doesn't support little endian\n" );
+ return 0;
+ }
+
+ /* note : stdinName is special constant pointer. It is not a string. */
+ if (inFileName == stdinName) {
+ /* note : Since we expect text input for xxhash -c mode,
+ * Don't set binary mode for stdin */
+ inFile = stdin;
+ } else {
+ inFile = fopen( inFileName, "rt" );
+ }
+
+ if (inFile == NULL) {
+ DISPLAY( "Pb opening %s\n", inFileName);
+ return 0;
+ }
+
+ parseFileArg->inFileName = inFileName;
+ parseFileArg->inFile = inFile;
+ parseFileArg->lineMax = DEFAULT_LINE_LENGTH;
+ parseFileArg->lineBuf = (char*) malloc((size_t) parseFileArg->lineMax);
+ parseFileArg->blockSize = 64 * 1024;
+ parseFileArg->blockBuf = (char*) malloc(parseFileArg->blockSize);
+ parseFileArg->strictMode = strictMode;
+ parseFileArg->statusOnly = statusOnly;
+ parseFileArg->warn = warn;
+ parseFileArg->quiet = quiet;
+
+ parseFile1(parseFileArg);
+
+ free(parseFileArg->blockBuf);
+ free(parseFileArg->lineBuf);
+
+ if (inFile != stdin) fclose(inFile);
+
+ /* Show error/warning messages. All messages are copied from md5sum.c
+ */
+ if (report->nProperlyFormattedLines == 0) {
+ DISPLAY("%s: no properly formatted XXHASH checksum lines found\n", inFileName);
+ } else if (!statusOnly) {
+ if (report->nImproperlyFormattedLines) {
+ DISPLAYRESULT("%lu lines are improperly formatted\n"
+ , report->nImproperlyFormattedLines);
+ }
+ if (report->nOpenOrReadFailures) {
+ DISPLAYRESULT("%lu listed files could not be read\n"
+ , report->nOpenOrReadFailures);
+ }
+ if (report->nMismatchedChecksums) {
+ DISPLAYRESULT("%lu computed checksums did NOT match\n"
+ , report->nMismatchedChecksums);
+ } }
+
+ /* Result (exit) code logic is copied from
+ * gnu coreutils/src/md5sum.c digest_check() */
+ result = report->nProperlyFormattedLines != 0
+ && report->nMismatchedChecksums == 0
+ && report->nOpenOrReadFailures == 0
+ && (!strictMode || report->nImproperlyFormattedLines == 0)
+ && report->quit == 0;
+ return result;
+}
+
+
+static int checkFiles(const char** fnList, int fnTotal,
+ const endianess displayEndianess,
+ U32 strictMode,
+ U32 statusOnly,
+ U32 warn,
+ U32 quiet)
+{
+ int ok = 1;
+
+ /* Special case for stdinName "-",
+ * note: stdinName is not a string. It's special pointer. */
+ if (fnTotal==0) {
+ ok &= checkFile(stdinName, displayEndianess, strictMode, statusOnly, warn, quiet);
+ } else {
+ int fnNb;
+ for (fnNb=0; fnNb<fnTotal; fnNb++)
+ ok &= checkFile(fnList[fnNb], displayEndianess, strictMode, statusOnly, warn, quiet);
+ }
+ return ok ? 0 : 1;
+}
+
+
+/* ********************************************************
+* Main
+**********************************************************/
+
+static int usage(const char* exename)
+{
+ DISPLAY( WELCOME_MESSAGE(exename) );
+ DISPLAY( "Usage :\n");
+ DISPLAY( " %s [arg] [filenames]\n", exename);
+ DISPLAY( "When no filename provided, or - provided : use stdin as input\n");
+ DISPLAY( "Arguments :\n");
+ DISPLAY( " -H# : hash selection : 0=32bits, 1=64bits (default: %i)\n", (int)g_defaultAlgo);
+ DISPLAY( " -c : read xxHash sums from the [filenames] and check them\n");
+ DISPLAY( " -h : help \n");
+ return 0;
+}
+
+
+static int usage_advanced(const char* exename)
+{
+ usage(exename);
+ DISPLAY( "Advanced :\n");
+ DISPLAY( " --little-endian : hash printed using little endian convention (default: big endian)\n");
+ DISPLAY( " -V, --version : display version\n");
+ DISPLAY( " -h, --help : display long help and exit\n");
+ DISPLAY( " -b : benchmark mode \n");
+ DISPLAY( " -i# : number of iterations (benchmark mode; default %i)\n", g_nbIterations);
+ DISPLAY( "\n");
+ DISPLAY( "The following four options are useful only when verifying checksums (-c):\n");
+ DISPLAY( "--strict : don't print OK for each successfully verified file\n");
+ DISPLAY( "--status : don't output anything, status code shows success\n");
+ DISPLAY( "--quiet : exit non-zero for improperly formatted checksum lines\n");
+ DISPLAY( "--warn : warn about improperly formatted checksum lines\n");
+ return 0;
+}
+
+static int badusage(const char* exename)
+{
+ DISPLAY("Wrong parameters\n");
+ usage(exename);
+ return 1;
+}
+
+/*! readU32FromChar() :
+ @return : unsigned integer value read from input in `char` format,
+ 0 is no figure at *stringPtr position.
+ Interprets K, KB, KiB, M, MB and MiB suffix.
+ Modifies `*stringPtr`, advancing it to position where reading stopped.
+ Note : function result can overflow if digit string > MAX_UINT */
+static unsigned readU32FromChar(const char** stringPtr)
+{
+ unsigned result = 0;
+ while ((**stringPtr >='0') && (**stringPtr <='9'))
+ result *= 10, result += **stringPtr - '0', (*stringPtr)++ ;
+ if ((**stringPtr=='K') || (**stringPtr=='M')) {
+ result <<= 10;
+ if (**stringPtr=='M') result <<= 10;
+ (*stringPtr)++ ;
+ if (**stringPtr=='i') (*stringPtr)++;
+ if (**stringPtr=='B') (*stringPtr)++;
+ }
+ return result;
+}
+
+int main(int argc, const char** argv)
+{
+ int i, filenamesStart = 0;
+ const char* const exename = argv[0];
+ U32 benchmarkMode = 0;
+ U32 fileCheckMode = 0;
+ U32 strictMode = 0;
+ U32 statusOnly = 0;
+ U32 warn = 0;
+ U32 quiet = 0;
+ U32 specificTest = 0;
+ size_t keySize = XXH_DEFAULT_SAMPLE_SIZE;
+ algoType algo = g_defaultAlgo;
+ endianess displayEndianess = big_endian;
+
+ /* special case : xxh32sum default to 32 bits checksum */
+ if (strstr(exename, "xxh32sum") != NULL) algo = algo_xxh32;
+
+ for(i=1; i<argc; i++) {
+ const char* argument = argv[i];
+
+ if(!argument) continue; /* Protection, if argument empty */
+
+ if (!strcmp(argument, "--little-endian")) { displayEndianess = little_endian; continue; }
+ if (!strcmp(argument, "--check")) { fileCheckMode = 1; continue; }
+ if (!strcmp(argument, "--strict")) { strictMode = 1; continue; }
+ if (!strcmp(argument, "--status")) { statusOnly = 1; continue; }
+ if (!strcmp(argument, "--quiet")) { quiet = 1; continue; }
+ if (!strcmp(argument, "--warn")) { warn = 1; continue; }
+ if (!strcmp(argument, "--help")) { return usage_advanced(exename); }
+ if (!strcmp(argument, "--version")) { DISPLAY(WELCOME_MESSAGE(exename)); return 0; }
+
+ if (*argument!='-') {
+ if (filenamesStart==0) filenamesStart=i; /* only supports a continuous list of filenames */
+ continue;
+ }
+
+ /* command selection */
+ argument++; /* note : *argument=='-' */
+
+ while (*argument!=0) {
+ switch(*argument)
+ {
+ /* Display version */
+ case 'V':
+ DISPLAY(WELCOME_MESSAGE(exename)); return 0;
+
+ /* Display help on usage */
+ case 'h':
+ return usage_advanced(exename);
+
+ /* select hash algorithm */
+ case 'H':
+ algo = (algoType)(argument[1] - '0');
+ argument+=2;
+ break;
+
+ /* File check mode */
+ case 'c':
+ fileCheckMode=1;
+ argument++;
+ break;
+
+ /* Warning mode (file check mode only, alias of "--warning") */
+ case 'w':
+ warn=1;
+ argument++;
+ break;
+
+ /* Trigger benchmark mode */
+ case 'b':
+ argument++;
+ benchmarkMode = 1;
+ specificTest = readU32FromChar(&argument); /* select one specific test (hidden option) */
+ break;
+
+ /* Modify Nb Iterations (benchmark only) */
+ case 'i':
+ argument++;
+ g_nbIterations = readU32FromChar(&argument);
+ break;
+
+ /* Modify Block size (benchmark only) */
+ case 'B':
+ argument++;
+ keySize = readU32FromChar(&argument);
+ break;
+
+ /* Modify verbosity of benchmark output (hidden option) */
+ case 'q':
+ argument++;
+ g_displayLevel--;
+ break;
+
+ default:
+ return badusage(exename);
+ }
+ }
+ } /* for(i=1; i<argc; i++) */
+
+ /* Check benchmark mode */
+ if (benchmarkMode) {
+ DISPLAYLEVEL(2, WELCOME_MESSAGE(exename) );
+ BMK_sanityCheck();
+ if (filenamesStart==0) return BMK_benchInternal(keySize, specificTest);
+ return BMK_benchFiles(argv+filenamesStart, argc-filenamesStart, specificTest);
+ }
+
+ /* Check if input is defined as console; trigger an error in this case */
+ if ( (filenamesStart==0) && IS_CONSOLE(stdin) ) return badusage(exename);
+
+ if (filenamesStart==0) filenamesStart = argc;
+ if (fileCheckMode) {
+ return checkFiles(argv+filenamesStart, argc-filenamesStart,
+ displayEndianess, strictMode, statusOnly, warn, quiet);
+ } else {
+ return BMK_hashFiles(argv+filenamesStart, argc-filenamesStart, algo, displayEndianess);
+ }
+}
+
+#endif /* XXHASH_C_2097394837 */
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 5eea6618220..de68c20b4d7 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -1,9 +1,9 @@
-EXTRA_DIST = glusterfs.vol.sample glusterfsd.vol.sample glusterfs.8 mount.glusterfs.8\
- glusterd.vol gluster.8 glusterd.8 glusterfsd.8
+EXTRA_DIST = glusterfs.8 mount.glusterfs.8 gluster.8 \
+ glusterd.8 glusterfsd.8
-voldir = $(sysconfdir)/glusterfs
-vol_DATA = glusterd.vol
-
-man8_MANS = glusterfs.8 mount.glusterfs.8 gluster.8 glusterd.8 glusterfsd.8
+man8_MANS = glusterfs.8 mount.glusterfs.8 gluster.8
+if WITH_SERVER
+man8_MANS += glusterd.8 glusterfsd.8
+endif
CLEANFILES =
diff --git a/doc/README.md b/doc/README.md
new file mode 100644
index 00000000000..6aa28642ef4
--- /dev/null
+++ b/doc/README.md
@@ -0,0 +1,26 @@
+## Developer Guide
+
+Gluster's contributors can check about the internals by visiting [Developer Guide Section](developer-guide). While it is not 'comprehensive', it can help you to get started.
+
+Also while coding, keep [Coding Standard](developer-guide/coding-standard.md) in mind.
+
+When you are ready to commit the changes, make sure you meet our [Commit message standard](developer-guide/commit-guidelines.md).
+
+## Admin Guide ##
+
+The gluster administration guide is maintained at [github](https://github.com/gluster/glusterdocs). The browsable admin guide can be found [here](http://docs.gluster.org/en/latest/Administrator%20Guide/).
+
+The doc patch has to be sent against the above mentioned repository.
+
+## Features/Spec ##
+
+The Gluster features which are 'in progress' or implemented can be found at [github](https://github.com/gluster/glusterfs-specs).
+
+## Upgrade Guide ##
+
+The gluster upgrade guide is maintained at [github](https://github.com/gluster/glusterdocs). The browsable upgrade guide can be found [here](http://docs.gluster.org/en/latest/Upgrade-Guide)
+
+The doc patch has to be sent against the above mentioned repository.
+
+
+For more details about the docuemntation workflow please refer [this discussion](https://www.mail-archive.com/gluster-users@gluster.org/msg21168.html)
diff --git a/doc/admin-guide/en-US/Administration_Guide.ent b/doc/admin-guide/en-US/Administration_Guide.ent
deleted file mode 100644
index 3381b2bfec1..00000000000
--- a/doc/admin-guide/en-US/Administration_Guide.ent
+++ /dev/null
@@ -1,4 +0,0 @@
-<!ENTITY PRODUCT "Documentation">
-<!ENTITY BOOKID "Administration_Guide">
-<!ENTITY YEAR "2012">
-<!ENTITY HOLDER "Red Hat Inc">
diff --git a/doc/admin-guide/en-US/Administration_Guide.xml b/doc/admin-guide/en-US/Administration_Guide.xml
deleted file mode 100644
index 483855b1a02..00000000000
--- a/doc/admin-guide/en-US/Administration_Guide.xml
+++ /dev/null
@@ -1,27 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE book PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<book>
- <xi:include href="Book_Info.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="Preface.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="gfs_introduction.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_start_stop_daemon.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_console.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_storage_pools.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_setting_volumes.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_settingup_clients.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_managing_volumes.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_geo-replication.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_directory_Quota.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_monitoring_workload.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_ACLs.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_UFO.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_Hadoop.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_troubleshooting.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="admin_commandref.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="glossary.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="Revision_History.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
-</book>
-
diff --git a/doc/admin-guide/en-US/Author_Group.xml b/doc/admin-guide/en-US/Author_Group.xml
deleted file mode 100644
index f3fa3174037..00000000000
--- a/doc/admin-guide/en-US/Author_Group.xml
+++ /dev/null
@@ -1,17 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE authorgroup PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<authorgroup>
- <author>
- <firstname>Divya</firstname>
- <surname>Muntimadugu</surname>
- <affiliation>
- <orgname>Red Hat</orgname>
- <orgdiv>Engineering Content Services</orgdiv>
- </affiliation>
- <email>divya@redhat.com</email>
- </author>
-</authorgroup>
-
diff --git a/doc/admin-guide/en-US/Book_Info.xml b/doc/admin-guide/en-US/Book_Info.xml
deleted file mode 100644
index 6be6a7816ca..00000000000
--- a/doc/admin-guide/en-US/Book_Info.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE bookinfo PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<bookinfo id="book-Administration_Guide-Administration_Guide">
- <title>Administration Guide</title>
- <subtitle>Using Gluster File System <remark> Beta 3</remark> </subtitle>
- <productname>Gluster File System</productname>
- <productnumber>3.3</productnumber>
- <edition>1</edition>
- <pubsnumber>1</pubsnumber>
- <abstract>
- <para>
- This guide describes Gluster File System (GlusterFS) and provides information on how to configure, operate, and manage GlusterFS.
- </para>
- </abstract>
- <corpauthor>
- <inlinemediaobject>
- <imageobject>
- <imagedata fileref="Common_Content/images/title_logo.svg" format="SVG" />
- </imageobject>
- </inlinemediaobject>
- </corpauthor>
- <xi:include href="Common_Content/Legal_Notice.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
- <xi:include href="Author_Group.xml" xmlns:xi="http://www.w3.org/2001/XInclude" />
-</bookinfo>
-
diff --git a/doc/admin-guide/en-US/Chapter.xml b/doc/admin-guide/en-US/Chapter.xml
deleted file mode 100644
index 4a1cef872c8..00000000000
--- a/doc/admin-guide/en-US/Chapter.xml
+++ /dev/null
@@ -1,33 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-Test_Chapter">
- <title>Test Chapter</title>
- <para>
- This is a test paragraph
- </para>
- <section id="sect-Administration_Guide-Test_Chapter-Test_Section_1">
- <title>Test Section 1</title>
- <para>
- This is a test paragraph in a section
- </para>
- </section>
-
- <section id="sect-Administration_Guide-Test_Chapter-Test_Section_2">
- <title>Test Section 2</title>
- <para>
- This is a test paragraph in Section 2
- <orderedlist>
- <listitem>
- <para>
- listitem text
- </para>
- </listitem>
- </orderedlist>
- </para>
- </section>
-
-</chapter>
-
diff --git a/doc/admin-guide/en-US/Preface.xml b/doc/admin-guide/en-US/Preface.xml
deleted file mode 100644
index 320311906d0..00000000000
--- a/doc/admin-guide/en-US/Preface.xml
+++ /dev/null
@@ -1,24 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. -->
-<!DOCTYPE preface PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<preface id="pref-Administration_Guide-Preface">
- <title>Preface</title>
- <para>This guide describes how to configure, operate, and manage Gluster File System (GlusterFS).</para>
- <section>
- <title>Audience</title>
- <para>This guide is intended for Systems Administrators interested in configuring and managing GlusterFS.</para>
- <para>This guide assumes that you are familiar with the Linux operating system, concepts of File System, GlusterFS concepts, and GlusterFS Installation</para>
- </section>
- <section>
- <title>License</title>
- <para>The License information is available at <ulink url="http://www.redhat.com/licenses/rhel_rha_eula.html"/>.</para>
- </section>
- <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Common_Content/Conventions.xml"/>
- <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Feedback.xml">
- <xi:fallback xmlns:xi="http://www.w3.org/2001/XInclude"> <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="Common_Content/Feedback.xml"/>
- </xi:fallback>
- </xi:include>
-</preface>
diff --git a/doc/admin-guide/en-US/Revision_History.xml b/doc/admin-guide/en-US/Revision_History.xml
deleted file mode 100644
index 09320821fb0..00000000000
--- a/doc/admin-guide/en-US/Revision_History.xml
+++ /dev/null
@@ -1,27 +0,0 @@
-<?xml version='1.0' encoding='utf-8' ?>
-<!DOCTYPE appendix PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<appendix id="appe-Administration_Guide-Revision_History">
- <title>Revision History</title>
- <simpara>
- <revhistory>
- <revision>
- <revnumber>1-0</revnumber>
- <date>Thu Apr 5 2012</date>
- <author>
- <firstname>Divya</firstname>
- <surname>Muntimadugu</surname>
- <email>divya@redhat.com</email>
- </author>
- <revdescription>
- <simplelist>
- <member>Draft </member>
- </simplelist>
- </revdescription>
- </revision>
- </revhistory>
- </simpara>
-</appendix>
-
diff --git a/doc/admin-guide/en-US/admin_ACLs.xml b/doc/admin-guide/en-US/admin_ACLs.xml
deleted file mode 100644
index 156e52c17f2..00000000000
--- a/doc/admin-guide/en-US/admin_ACLs.xml
+++ /dev/null
@@ -1,206 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-ACLs">
- <title>POSIX Access Control Lists </title>
- <para>POSIX Access Control Lists (ACLs) allows you to assign different permissions for different users or
-groups even though they do not correspond to the original owner or the owning group.
- </para>
- <para>For example: User john creates a file but does not want to allow anyone to do anything with this
-file, except another user, antony (even though there are other users that belong to the group john).
-</para>
- <para>This means, in addition to the file owner, the file group, and others, additional users and groups can
-be granted or denied access by using POSIX ACLs.
-</para>
- <section id="sect-Administration_Guide-ACLs-Activating_ACLs">
- <title>Activating POSIX ACLs Support </title>
- <para>To use POSIX ACLs for a file or directory, the partition of the file or directory must be mounted with
-POSIX ACLs support.
-</para>
- <section id="sect-Administration_Guide-ACLs-Activating_ACLs-Server">
- <title>Activating POSIX ACLs Support on Sever </title>
- <para>To mount the backend export directories for POSIX ACLs support, use the following command:
-</para>
- <para><command># mount -o acl <replaceable>device-name</replaceable><replaceable>partition</replaceable></command>
-</para>
- <para>For example:
-</para>
- <para><command># mount -o acl /dev/sda1 /export1 </command></para>
- <para>Alternatively, if the partition is listed in the /etc/fstab file, add the following entry for the partition
-to include the POSIX ACLs option:
-</para>
- <para><command>LABEL=/work /export1 ext3 rw, acl 14 </command></para>
- </section>
- <section>
- <title>Activating POSIX ACLs Support on Client </title>
- <para>To mount the glusterfs volumes for POSIX ACLs support, use the following command:
-</para>
- <para><command># mount –t glusterfs -o acl <replaceable>severname:volume-id</replaceable><replaceable>mount point</replaceable></command>
-</para>
- <para>For example:
-</para>
- <para><command># mount -t glusterfs -o acl 198.192.198.234:glustervolume /mnt/gluster</command>
-</para>
- </section>
- </section>
- <section>
- <title>Setting POSIX ACLs </title>
- <para>You can set two types of POSIX ACLs, that is, access ACLs and default ACLs. You can use
-access ACLs to grant permission for a specific file or directory. You can use default ACLs only
-on a directory but if a file inside that directory does not have an ACLs, it inherits the permissions of
-the default ACLs of the directory.
-</para>
- <para>You can set ACLs for per user, per group, for users not in the user group for the file, and via the
-effective right mask.
-</para>
- <section>
- <title>Setting Access ACLs </title>
- <para>You can apply access ACLs to grant permission for both files and directories.
-</para>
- <para><emphasis role="bold">To set or modify Access ACLs</emphasis>
-</para>
- <para>You can set or modify access ACLs use the following command:
-</para>
- <para><command># setfacl –m <replaceable>entry type</replaceable> file </command></para>
- <para>The ACL entry types are the POSIX ACLs representations of owner, group, and other.
-</para>
- <para>Permissions must be a combination of the characters <command>r</command> (read), <command>w</command> (write), and <command>x</command> (execute). You must
-specify the ACL entry in the following format and can specify multiple entry types separated by
-commas.
-</para>
- <informaltable frame="all">
- <tgroup cols="2">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <thead>
- <row>
- <entry>ACL Entry</entry>
- <entry>Description</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>u:uid:&lt;permission&gt; </entry>
- <entry>Sets the access ACLs for a user. You can specify user name or UID </entry>
- </row>
- <row>
- <entry>g:gid:&lt;permission&gt; </entry>
- <entry>Sets the access ACLs for a group. You can specify group name or GID. </entry>
- </row>
- <row>
- <entry>m:&lt;permission&gt; </entry>
- <entry>Sets the effective rights mask. The mask is the combination of all access permissions of the owning group and all of the user and group entries. </entry>
- </row>
- <row>
- <entry>o:&lt;permission&gt; </entry>
- <entry>Sets the access ACLs for users other than the ones in the group for the file. </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
- <para>If a file or directory already has an POSIX ACLs, and the setfacl command is used, the additional
-permissions are added to the existing POSIX ACLs or the existing rule is modified.
-</para>
- <para>For example, to give read and write permissions to user antony:
-</para>
- <para><command># setfacl -m u:antony:rw /mnt/gluster/data/testfile </command></para>
- </section>
- <section>
- <title>Setting Default ACLs </title>
- <para>You can apply default ACLs only to directories. They determine the permissions of a file system
-objects that inherits from its parent directory when it is created.
-</para>
- <para>To set default ACLs
-</para>
- <para>You can set default ACLs for files and directories using the following command:
-</para>
- <para><command># setfacl –m –-set <replaceable>entry type directory</replaceable></command>
-</para>
- <para>For example, to set the default ACLs for the /data directory to read for users not in the user group:
-</para>
- <para><command># setfacl –m --set o::r /mnt/gluster/data </command></para>
- <para><note>
- <para>An access ACLs set for an individual file can override the default ACLs permissions.
-</para>
- </note></para>
- <para><emphasis role="bold">Effects of a Default ACLs </emphasis></para>
- <para>The following are the ways in which the permissions of a directory&apos;s default ACLs are passed to the
-files and subdirectories in it:
-</para>
- <itemizedlist>
- <listitem>
- <para>A subdirectory inherits the default ACLs of the parent directory both as its default ACLs and as an
-access ACLs.
-</para>
- </listitem>
- <listitem>
- <para>A file inherits the default ACLs as its access ACLs.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Retrieving POSIX ACLs </title>
- <para>You can view the existing POSIX ACLs for a file or directory.
-</para>
- <para><emphasis role="bold">To view existing POSIX ACLs </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>View the existing access ACLs of a file using the following command:
-</para>
- <para><command># getfacl <replaceable>path/filename</replaceable></command>
-</para>
- <para>For example, to view the existing POSIX ACLs for sample.jpg
-</para>
- <programlisting># getfacl /mnt/gluster/data/test/sample.jpg
-# owner: antony
-# group: antony
-user::rw-
-group::rw-
-other::r--</programlisting>
- </listitem>
- <listitem>
- <para>View the default ACLs of a directory using the following command:
-</para>
- <para><command># getfacl <replaceable>directory name</replaceable></command></para>
- <para>For example, to view the existing ACLs for /data/doc
-</para>
- <programlisting># getfacl /mnt/gluster/data/doc
-# owner: antony
-# group: antony
-user::rw-
-user:john:r--
-group::r--
-mask::r--
-other::r--
-default:user::rwx
-default:user:antony:rwx
-default:group::r-x
-default:mask::rwx
-default:other::r-x</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Removing POSIX ACLs </title>
- <para>To remove all the permissions for a user, groups, or others, use the following command:
-</para>
- <para><command># setfacl -x <replaceable>ACL entry type file</replaceable></command></para>
- <para>For example, to remove all permissions from the user antony:
-</para>
- <para><command># setfacl -x u:antony /mnt/gluster/data/test-file</command></para>
- </section>
- <section>
- <title>Samba and ACLs </title>
- <para>If you are using Samba to access GlusterFS FUSE mount, then POSIX ACLs are enabled by default.
-Samba has been compiled with the <command>--with-acl-support</command> option, so no special flags are required
-when accessing or mounting a Samba share.
-</para>
- </section>
- <section>
- <title>NFS and ACLs </title>
- <para>Currently we do not support ACLs configuration through NFS, i.e. setfacl and getfacl commands do
-not work. However, ACLs permissions set using Gluster Native Client applies on NFS mounts.
-</para>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_Hadoop.xml b/doc/admin-guide/en-US/admin_Hadoop.xml
deleted file mode 100644
index 08bac89615b..00000000000
--- a/doc/admin-guide/en-US/admin_Hadoop.xml
+++ /dev/null
@@ -1,244 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-Hadoop">
- <title>Managing Hadoop Compatible Storage </title>
- <para>GlusterFS provides compatibility for Apache Hadoop and it uses the standard file system
-APIs available in Hadoop to provide a new storage option for Hadoop deployments. Existing
-MapReduce based applications can use GlusterFS seamlessly. This new functionality opens up data
-within Hadoop deployments to any file-based or object-based application.
-
- </para>
- <section id="sect-Administration_Guide-Hadoop-Introduction-Architecture_Overview">
- <title>Architecture Overview </title>
- <para>The following diagram illustrates Hadoop integration with GlusterFS:
-<mediaobject>
- <imageobject>
- <imagedata fileref="images/Hadoop_Architecture.png"/>
- </imageobject>
- </mediaobject>
- </para>
- </section>
- <section id="sect-Administration_Guide-Hadoop-Introduction-Advantages">
- <title>Advantages </title>
- <para>
-The following are the advantages of Hadoop Compatible Storage with GlusterFS:
-
-
- </para>
- <itemizedlist>
- <listitem>
- <para>Provides simultaneous file-based and object-based access within Hadoop.
-</para>
- </listitem>
- <listitem>
- <para>Eliminates the centralized metadata server.
-</para>
- </listitem>
- <listitem>
- <para>Provides compatibility with MapReduce applications and rewrite is not required.
-</para>
- </listitem>
- <listitem>
- <para>Provides a fault tolerant file system.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Preparing to Install Hadoop Compatible Storage</title>
- <para>This section provides information on pre-requisites and list of dependencies that will be installed
-during installation of Hadoop compatible storage.
-
-</para>
- <section id="sect-Administration_Guide-Hadoop-Preparation">
- <title>Pre-requisites </title>
- <para>The following are the pre-requisites to install Hadoop Compatible
-Storage :
-
- </para>
- <itemizedlist>
- <listitem>
- <para>Hadoop 0.20.2 is installed, configured, and is running on all the machines in the cluster.
-</para>
- </listitem>
- <listitem>
- <para>Java Runtime Environment
-</para>
- </listitem>
- <listitem>
- <para>Maven (mandatory only if you are building the plugin from the source)
-</para>
- </listitem>
- <listitem>
- <para>JDK (mandatory only if you are building the plugin from the source)
-</para>
- </listitem>
- <listitem>
- <para>getfattr
-- command line utility</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Installing, and Configuring Hadoop Compatible Storage</title>
- <para>This section describes how to install and configure Hadoop Compatible Storage in your storage
-environment and verify that it is functioning correctly.
-
-</para>
- <orderedlist>
- <para>To install and configure Hadoop compatible storage:</para>
- <listitem>
- <para>Download <filename>glusterfs-hadoop-0.20.2-0.1.x86_64.rpm</filename> file to each server on your cluster. You can download the file from <ulink url="http://download.gluster.com/pub/gluster/glusterfs/qa-releases/3.3-beta-2/glusterfs-hadoop-0.20.2-0.1.x86_64.rpm"/>.
-
-</para>
- </listitem>
- <listitem>
- <para>To install Hadoop Compatible Storage on all servers in your cluster, run the following command:
-</para>
- <para><command># rpm –ivh --nodeps glusterfs-hadoop-0.20.2-0.1.x86_64.rpm</command>
-</para>
- <para>The following files will be extracted:
- </para>
- <itemizedlist>
- <listitem>
- <para>/usr/local/lib/glusterfs-<replaceable>Hadoop-version-gluster_plugin_version</replaceable>.jar </para>
- </listitem>
- <listitem>
- <para> /usr/local/lib/conf/core-site.xml</para>
- </listitem>
- </itemizedlist>
- </listitem>
- <listitem>
- <para>(Optional) To install Hadoop Compatible Storage in a different location, run the following
-command:
-</para>
- <para><command># rpm –ivh --nodeps –prefix /usr/local/glusterfs/hadoop glusterfs-hadoop- 0.20.2-0.1.x86_64.rpm</command>
-</para>
- </listitem>
- <listitem>
- <para>Edit the <filename>conf/core-site.xml</filename> file. The following is the sample <filename>conf/core-site.xml</filename> file:
-</para>
- <para><programlisting>&lt;configuration&gt;
- &lt;property&gt;
- &lt;name&gt;fs.glusterfs.impl&lt;/name&gt;
- &lt;value&gt;org.apache.hadoop.fs.glusterfs.Gluster FileSystem&lt;/value&gt;
-&lt;/property&gt;
-
-&lt;property&gt;
- &lt;name&gt;fs.default.name&lt;/name&gt;
- &lt;value&gt;glusterfs://fedora1:9000&lt;/value&gt;
-&lt;/property&gt;
-
-&lt;property&gt;
- &lt;name&gt;fs.glusterfs.volname&lt;/name&gt;
- &lt;value&gt;hadoopvol&lt;/value&gt;
-&lt;/property&gt;
-
-&lt;property&gt;
- &lt;name&gt;fs.glusterfs.mount&lt;/name&gt;
- &lt;value&gt;/mnt/glusterfs&lt;/value&gt;
-&lt;/property&gt;
-
-&lt;property&gt;
- &lt;name&gt;fs.glusterfs.server&lt;/name&gt;
- &lt;value&gt;fedora2&lt;/value&gt;
-&lt;/property&gt;
-
-&lt;property&gt;
- &lt;name&gt;quick.slave.io&lt;/name&gt;
- &lt;value&gt;Off&lt;/value&gt;
-&lt;/property&gt;
-&lt;/configuration&gt;
-</programlisting></para>
- <para>The following are the configurable fields:
-</para>
- <para><informaltable frame="none">
- <tgroup cols="3">
- <colspec colnum="1" colname="c0" colsep="0"/>
- <colspec colnum="2" colname="c1" colsep="0"/>
- <colspec colnum="3" colname="c2" colsep="0"/>
- <thead>
- <row>
- <entry>Property Name </entry>
- <entry>Default Value </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>fs.default.name </entry>
- <entry>glusterfs://fedora1:9000</entry>
- <entry>Any hostname in the cluster as the server and any port number. </entry>
- </row>
- <row>
- <entry>fs.glusterfs.volname </entry>
- <entry>hadoopvol </entry>
- <entry>GlusterFS volume to mount. </entry>
- </row>
- <row>
- <entry>fs.glusterfs.mount </entry>
- <entry>/mnt/glusterfs</entry>
- <entry>The directory used to fuse mount the volume.</entry>
- </row>
- <row>
- <entry>fs.glusterfs.server </entry>
- <entry>fedora2</entry>
- <entry>Any hostname or IP address on the cluster except the client/master. </entry>
- </row>
- <row>
- <entry>quick.slave.io </entry>
- <entry>Off </entry>
- <entry>Performance tunable option. If this option is set to On, the plugin will try to perform I/O directly from the disk file system (like ext3 or ext4) the file resides on. Hence read performance will improve and job would run faster. <note>
- <para>This option is not tested widely</para>
- </note></entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- </listitem>
- <listitem>
- <para>Create a soft link in Hadoop’s library and configuration directory for the downloaded files (in
-Step 3) using the following commands:
-</para>
- <para><command># ln -s <replaceable>&lt;target location&gt; &lt;source location</replaceable>&gt;</command>
-</para>
- <para>For example,
-</para>
- <para><command># ln –s /usr/local/lib/glusterfs-0.20.2-0.1.jar <replaceable>$HADOOP_HOME</replaceable>/lib/glusterfs-0.20.2-0.1.jar</command>
-</para>
- <para><command># ln –s /usr/local/lib/conf/core-site.xml <replaceable>$HADOOP_HOME</replaceable>/conf/core-site.xml </command></para>
- </listitem>
- <listitem>
- <para> (Optional) You can run the following command on Hadoop master to build the plugin and deploy
-it along with core-site.xml file, instead of repeating the above steps:
-</para>
- <para><command># build-deploy-jar.py -d <replaceable>$HADOOP_HOME</replaceable> -c </command></para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Starting and Stopping the Hadoop MapReduce Daemon</title>
- <para>To start and stop MapReduce daemon</para>
- <itemizedlist>
- <listitem>
- <para>To start MapReduce daemon manually, enter the following command:
-</para>
- <para><command># <replaceable>$HADOOP_HOME</replaceable>/bin/start-mapred.sh</command>
-</para>
- </listitem>
- <listitem>
- <para>To stop MapReduce daemon manually, enter the following command:
-</para>
- <para><command># <replaceable>$HADOOP_HOME</replaceable>/bin/stop-mapred.sh </command></para>
- </listitem>
- </itemizedlist>
- <para><note>
- <para>You must start Hadoop MapReduce daemon on all servers.
-</para>
- </note></para>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_UFO.xml b/doc/admin-guide/en-US/admin_UFO.xml
deleted file mode 100644
index 03be14dc9dc..00000000000
--- a/doc/admin-guide/en-US/admin_UFO.xml
+++ /dev/null
@@ -1,1588 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-UFO">
- <title>Managing Unified File and Object Storage</title>
- <para>Unified File and Object Storage (UFO) unifies NAS and object storage technology. It
-provides a system for data storage that enables users to access the same data, both as an object and as a
-file, thus simplifying management and controlling storage costs.
-
-</para>
- <para>Unified File and Object Storage is built upon Openstack&apos;s Object Storage Swift. Open Stack Object Storage allows users to store and retrieve files and content through a simple Web Service (REST: Representational State Transfer) interface as objects and GlusterFS, allows users to store and retrieve files using Native Fuse and NFS mounts. It uses GlusterFS as a backend file system for Open Stack Swift. It also leverages on Open Stack Swift&apos;s web interface for storing and retrieving files over the web combined with GlusterFS features like scalability and high availability, replication, elastic volume management for data management at disk level.</para>
- <para>Unified File and Object Storage technology enables enterprises to adopt and deploy
-cloud storage solutions. It allows users to access and modify data as objects from a
-REST interface along with the ability to access and modify files from NAS interfaces including NFS
-and CIFS. In addition to decreasing cost and making it faster and easier to access object data,
-it also delivers massive scalability, high availability and replication of object storage.
-Infrastructure as a Service (IaaS) providers can utilize GlusterFS Unified File and Object Storage technology to enable their own cloud
-storage service. Enterprises can use this technology to accelerate the process of preparing file-based
-applications for the cloud and simplify new application development for cloud computing
-environments.
-
-</para>
- <para>OpenStack Object Storage is scalable object storage system and it is not a traditional file system. You will not be able to mount this system like traditional SAN or NAS
-volumes and perform POSIX compliant operations. </para>
- <para><figure>
- <title>Unified File and Object Storage Architecture</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/UFO_Architecture.png"/>
- </imageobject>
- </mediaobject>
- </figure></para>
- <section>
- <title>Components of Object Storage</title>
- <para>The major components of Object Storage are:
- </para>
- <para><emphasis role="bold">Proxy Server</emphasis>
-
-</para>
- <para>All REST requests to the UFO are routed through the Proxy Server.
-
-
-</para>
- <para><emphasis role="bold">Objects and Containers </emphasis></para>
- <para>An object is the basic storage entity and any optional metadata that represents the data
-you store. When you upload data, the data is stored as-is (with no compression or encryption).
-
-</para>
- <para>A container is a storage compartment for your data and provides a way for you to organize
-your data. Containers can be visualized as directories in a Linux system. Data must be stored in a container and hence objects are created within a container.
-
-</para>
- <para>It implements objects as files and directories under the container. The object name is a &apos;/&apos; separated path and UFO maps it to directories until the last name in the path, which is marked as a file. With this approach, objects can be accessed as files and directories from native GlusterFS (FUSE) or NFS mounts by providing the &apos;/&apos; separated path.</para>
- <para><emphasis role="bold">Accounts and Account Servers</emphasis></para>
- <para>The OpenStack Object Storage system is designed to be used by many different storage
-consumers. Each user is associated with one or more accounts and must identify themselves using an authentication system. While authenticating, users must provide the name of the account for which the authentication is requested.
-
-</para>
- <para>UFO implements accounts as GlusterFS volumes. So, when a user is granted read/write permission on an account, it means that that user has access to all the data available on that GlusterFS volume.
-
-
-
-
-</para>
- <para><emphasis role="bold">Authentication and Access Permissions</emphasis>
-
-</para>
- <para>You must authenticate against an authentication service to receive OpenStack Object
-Storage connection parameters and an authentication token. The token must be passed
-in for all subsequent container or object operations. One authentication service that you
-can use as a middleware example is called <literal>tempauth</literal>.</para>
- <para>By default, each user has their own storage account and has full access to that
-account. Users must authenticate with their credentials as described above, but once
-authenticated they can manage containers and objects within that account. If a user wants to access the content from another account, they must have API access key or a session token provided by their authentication system.</para>
- </section>
- <section>
- <title>Advantages of using GlusterFS Unified File and Object Storage</title>
- <para>The following are the advantages of using GlusterFS UFO:</para>
- <itemizedlist>
- <listitem>
- <para>No limit on upload and download files sizes as compared to Open Stack Swift which limits the object size to 5GB.</para>
- </listitem>
- <listitem>
- <para>A unified view of data across NAS and Object Storage technologies.</para>
- </listitem>
- <listitem>
- <para>Using GlusterFS&apos;s UFO has other advantages like the following: </para>
- <para><itemizedlist>
- <listitem>
- <para>High availability</para>
- </listitem>
- <listitem>
- <para>Scalability</para>
- </listitem>
- <listitem>
- <para>Replication</para>
- </listitem>
- <listitem>
- <para>Elastic Volume management</para>
- </listitem>
- </itemizedlist></para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Preparing to Deploy Unified File and Object Storage</title>
- <para>This section provides information on pre-requisites and list of dependencies that will be installed
-during the installation of Unified File and Object Storage.
-</para>
- <section>
- <title>Pre-requisites </title>
- <para>GlusterFS&apos;s Unified File and Object Storage needs <literal>user_xattr</literal> support from the underlying disk file system.
-Use the following command to enable <literal>user_xattr</literal> for GlusterFS bricks backend:
-</para>
- <para><command># mount –o remount,user_xattr <replaceable>device name</replaceable></command></para>
- <para>For example,
-</para>
- <para><command># mount –o remount,user_xattr /dev/hda1 </command>
-</para>
- </section>
- <section>
- <title>Dependencies </title>
- <para>The following packages are installed on GlusterFS when you install Unified File and Object
-Storage:
-
-</para>
- <itemizedlist>
- <listitem>
- <para>curl
-
-
-
-
-
-
-
-
-
-
-
-
-
-</para>
- </listitem>
- <listitem>
- <para>memcached</para>
- </listitem>
- <listitem>
- <para>openssl</para>
- </listitem>
- <listitem>
- <para>xfsprogs</para>
- </listitem>
- <listitem>
- <para>python2.6</para>
- </listitem>
- <listitem>
- <para>pyxattr</para>
- </listitem>
- <listitem>
- <para>python-configobj
-</para>
- </listitem>
- <listitem>
- <para>python-setuptools
-
-</para>
- </listitem>
- <listitem>
- <para>python-simplejson
-
-</para>
- </listitem>
- <listitem>
- <para>python-webob
-
-</para>
- </listitem>
- <listitem>
- <para>python-eventlet
-
-</para>
- </listitem>
- <listitem>
- <para>python-greenlet
-
-</para>
- </listitem>
- <listitem>
- <para>python-pastedeploy
-
-</para>
- </listitem>
- <listitem>
- <para>python-netifaces
-</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Installing and Configuring Unified File and Object Storage</title>
- <para>This section provides instructions on how to install and configure Unified File and Object Storage in your storage
-environment.</para>
- <section id="chap-ation_Guide-Dir_Quota-Enable">
- <title>Installing Unified File and Object Storage</title>
- <para>To install Unified File and Object Storage:</para>
- <orderedlist>
- <listitem>
- <para>Download <filename>rhel_install.sh</filename> install script from <ulink url="http://download.gluster.com/pub/gluster/glusterfs/3.2/UFO/"/> .
-</para>
- </listitem>
- <listitem>
- <para>Run
- <filename>rhel_install.sh</filename> script using the following command:
-</para>
- <para><command># sh rhel_install.sh</command></para>
- </listitem>
- <listitem>
- <para>Download <filename>swift-1.4.5-1.noarch.rpm</filename> and <filename>swift-plugin-1.0.-1.el6.noarch.rpm</filename> files from <ulink url="http://download.gluster.com/pub/gluster/glusterfs/3.2/UFO/"/>.</para>
- </listitem>
- <listitem>
- <para>Install <filename>swift-1.4.5-1.noarch.rpm</filename> and <filename>swift-plugin-1.0.-1.el6.noarch.rpm</filename> using the following commands:</para>
- <para><command># rpm -ivh swift-1.4.5-1.noarch.rpm</command></para>
- <para><command># rpm -ivh swift-plugin-1.0.-1.el6.noarch.rpm</command></para>
- <para><note>
- <para>You must repeat the above steps on all the machines on which you want to install Unified File and Object Storage. If you install the Unified File and Object Storage on multiple servers, you can use a load balancer like pound, nginx, and so on to distribute the request across the machines.</para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Adding Users</title>
- <para>The authentication system allows the administrator to grant different levels of access to different users based on the requirement. The following are the types of user permissions:
- </para>
- <itemizedlist>
- <listitem>
- <para>admin user
- </para>
- </listitem>
- <listitem>
- <para>normal user</para>
- </listitem>
- </itemizedlist>
- <para>Admin user has read and write permissions on the account. By default, a normal user has no read or write permissions. A normal user can only authenticate itself to get a Auth-Token. Read or write permission are provided through ACLs by the admin users.</para>
- <para>Add a new user by adding the following entry in <filename>/etc/swift/proxy-server.conf</filename> file:</para>
- <para><command>user_&lt;account-name&gt;_&lt;user-name&gt; = &lt;password&gt; [.admin]</command></para>
- <para>For example, </para>
- <para><command>user_test_tester = testing .admin</command>
-</para>
- <note>
- <para>During installation, the installation script adds few sample users to the <filename>proxy-server.conf</filename> file. It is highly recommended that you remove all the default sample user entries from the configuration file.
-</para>
- </note>
- <para>For more information on setting ACLs, see <xref linkend="chap-Administration_Guide-Working_UFO-Setting_ACLs"/>.</para>
- </section>
- <section>
- <title>Configuring Proxy Server</title>
- <para>The Proxy Server is responsible for connecting to the rest of the OpenStack Object Storage architecture. For each request, it looks up the location of the account, container, or object in the ring and route the request accordingly. The public API is also exposed through the proxy server. When objects are streamed to or from an object server, they are streamed directly through the proxy server to or from the user – the proxy server does not spool them.
-</para>
- <para>The configurable options pertaining to proxy server are stored in <filename>/etc/swift/proxy-server.conf</filename>. The following is the sample <filename>proxy-server.conf</filename> file:</para>
- <para><programlisting>[app:proxy-server]
-use = egg:swift#proxy
-allow_account_management=true
-account_autocreate=true
-
-[filter:tempauth]
-use = egg:swift#tempauth user_admin_admin=admin.admin.reseller_admin
-user_test_tester=testing.admin
-user_test2_tester2=testing2.admin
-user_test_tester3=testing3
-
-[filter:healthcheck]
-use = egg:swift#healthcheck
-
-[filter:cache]
-use = egg:swift#memcache</programlisting></para>
- <para>By default, GlusterFS&apos;s Unified File and Object Storage is configured to support HTTP protocol and uses temporary authentication to authenticate the HTTP requests.</para>
- </section>
- <section>
- <title>Configuring Authentication System</title>
- <para>Proxy server must be configured to authenticate using <literal>
- <literal>tempauth</literal>
- </literal>. </para>
- </section>
- <section>
- <title>Configuring Proxy Server for HTTPS</title>
- <para>By default, proxy server only handles HTTP request. To configure the proxy server to process HTTPS requests, perform the following steps:</para>
- <orderedlist>
- <listitem>
- <para>Create self-signed cert for SSL using the following commands:</para>
- <para><programlisting>cd /etc/swift
-openssl req -new -x509 -nodes -out cert.crt -keyout cert.key</programlisting></para>
- </listitem>
- <listitem>
- <para>Add the following lines to <filename>/etc/swift/proxy-server.conf </filename>under <replaceable>[DEFAULT]</replaceable></para>
- <para><programlisting>bind_port = 443
- cert_file = /etc/swift/cert.crt
- key_file = /etc/swift/cert.key</programlisting></para>
- </listitem>
- <listitem>
- <para>Restart the servers using the following commands:</para>
- <para><programlisting>swift-init main stop
-swift-init main start</programlisting></para>
- </listitem>
- </orderedlist>
- <para>The following are the configurable options:
-</para>
- <table frame="all">
- <title>proxy-server.conf Default Options in the [DEFAULT] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>bind_ip </entry>
- <entry>0.0.0.0 </entry>
- <entry>IP Address for server to bind</entry>
- </row>
- <row>
- <entry>bind_port </entry>
- <entry>80 </entry>
- <entry>Port for server to bind </entry>
- </row>
- <row>
- <entry>swift_dir </entry>
- <entry>/etc/swift </entry>
- <entry>Swift configuration directory </entry>
- </row>
- <row>
- <entry>workers </entry>
- <entry>1</entry>
- <entry>Number of workers to fork </entry>
- </row>
- <row>
- <entry>user </entry>
- <entry>swift </entry>
- <entry>swift user</entry>
- </row>
- <row>
- <entry>cert_file </entry>
- <entry/>
- <entry>Path to the ssl .crt </entry>
- </row>
- <row>
- <entry>key_file </entry>
- <entry/>
- <entry>Path to the ssl .key </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <table frame="all">
- <title>proxy-server.conf Server Options in the [proxy-server] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>use </entry>
- <entry/>
- <entry>paste.deploy entry point for the container server. For most cases, this should be <literal>egg:swift#container</literal>. </entry>
- </row>
- <row>
- <entry>log_name </entry>
- <entry>proxy-server </entry>
- <entry>Label used when logging </entry>
- </row>
- <row>
- <entry>log_facility </entry>
- <entry>LOG_LOCAL0 </entry>
- <entry>Syslog log facility </entry>
- </row>
- <row>
- <entry>log_level </entry>
- <entry>INFO </entry>
- <entry>Log level </entry>
- </row>
- <row>
- <entry>log_headers </entry>
- <entry>True </entry>
- <entry>If True, log headers in each request </entry>
- </row>
- <row>
- <entry>recheck_account_existence </entry>
- <entry>60 </entry>
- <entry>Cache timeout in seconds to send memcached for account existence </entry>
- </row>
- <row>
- <entry>recheck_container_existence </entry>
- <entry>60 </entry>
- <entry>Cache timeout in seconds to send memcached for container existence </entry>
- </row>
- <row>
- <entry>object_chunk_size </entry>
- <entry>65536 </entry>
- <entry>Chunk size to read from object servers </entry>
- </row>
- <row>
- <entry>client_chunk_size </entry>
- <entry>65536 </entry>
- <entry>Chunk size to read from clients </entry>
- </row>
- <row>
- <entry>memcache_servers </entry>
- <entry>127.0.0.1:11211 </entry>
- <entry>Comma separated list of memcached servers ip:port </entry>
- </row>
- <row>
- <entry>node_timeout </entry>
- <entry>10 </entry>
- <entry>Request timeout to external services </entry>
- </row>
- <row>
- <entry>client_timeout </entry>
- <entry>60 </entry>
- <entry>Timeout to read one chunk from a client </entry>
- </row>
- <row>
- <entry>conn_timeout </entry>
- <entry>0.5 </entry>
- <entry>Connection timeout to external services </entry>
- </row>
- <row>
- <entry>error_suppression_interval </entry>
- <entry>60 </entry>
- <entry>Time in seconds that must elapse since the last error for a node to be considered no longer error limited </entry>
- </row>
- <row>
- <entry>error_suppression_limit </entry>
- <entry>10 </entry>
- <entry>Error count to consider a node error limited </entry>
- </row>
- <row>
- <entry>allow_account_management </entry>
- <entry>false </entry>
- <entry>Whether account <literal>PUT</literal>s and <literal>DELETE</literal>s are even callable </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- </section>
- <section>
- <title>Configuring Object Server</title>
- <para>The Object Server is a very simple blob storage server that can store, retrieve, and delete objects stored on local devices. Objects are stored as binary files on the file system with metadata stored in the file’s extended attributes (xattrs). This requires that the underlying file system choice for object servers support xattrs on files.
-
-</para>
- <para>The configurable options pertaining Object Server are stored in the file <filename>/etc/swift/object-server/1.conf</filename>. The following is the sample <filename>object-server/1.conf</filename> file:</para>
- <para><programlisting>[DEFAULT]
-devices = /srv/1/node
-mount_check = false
-bind_port = 6010
-user = root
-log_facility = LOG_LOCAL2
-
-[pipeline:main]
-pipeline = gluster object-server
-
-[app:object-server]
-use = egg:swift#object
-
-[filter:gluster]
-use = egg:swift#gluster
-
-[object-replicator]
-vm_test_mode = yes
-
-[object-updater]
-[object-auditor]</programlisting></para>
- <para>The following are the configurable options:
-</para>
- <table frame="all">
- <title>object-server.conf Default Options in the [DEFAULT] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>swift_dir </entry>
- <entry>/etc/swift </entry>
- <entry>Swift configuration directory </entry>
- </row>
- <row>
- <entry>devices </entry>
- <entry>/srv/node </entry>
- <entry>Mount parent directory where devices are mounted </entry>
- </row>
- <row>
- <entry>mount_check </entry>
- <entry>true </entry>
- <entry>Whether or not check if the devices are mounted to prevent accidentally writing to the root device </entry>
- </row>
- <row>
- <entry>bind_ip </entry>
- <entry>0.0.0.0 </entry>
- <entry>IP Address for server to bind</entry>
- </row>
- <row>
- <entry>bind_port </entry>
- <entry>6000 </entry>
- <entry>Port for server to bind</entry>
- </row>
- <row>
- <entry>workers </entry>
- <entry>1 </entry>
- <entry>Number of workers to fork </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <table frame="all">
- <title>object-server.conf Server Options in the [object-server] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>use </entry>
- <entry/>
- <entry>paste.deploy entry point for the object server. For most cases, this should be <literal>egg:swift#object</literal>. </entry>
- </row>
- <row>
- <entry>log_name </entry>
- <entry>object-server </entry>
- <entry>log name used when logging </entry>
- </row>
- <row>
- <entry>log_facility </entry>
- <entry>LOG_LOCAL0 </entry>
- <entry>Syslog log facility </entry>
- </row>
- <row>
- <entry>log_level </entry>
- <entry>INFO </entry>
- <entry>Logging level </entry>
- </row>
- <row>
- <entry>log_requests </entry>
- <entry>True </entry>
- <entry>Whether or not to log each request </entry>
- </row>
- <row>
- <entry>user </entry>
- <entry>swift </entry>
- <entry>swift user</entry>
- </row>
- <row>
- <entry>node_timeout </entry>
- <entry>3</entry>
- <entry>Request timeout to external services </entry>
- </row>
- <row>
- <entry>conn_timeout </entry>
- <entry>0.5</entry>
- <entry>Connection timeout to external services </entry>
- </row>
- <row>
- <entry>network_chunk_size </entry>
- <entry>65536 </entry>
- <entry>Size of chunks to read or write over the network </entry>
- </row>
- <row>
- <entry>disk_chunk_size </entry>
- <entry>65536 </entry>
- <entry>Size of chunks to read or write to disk </entry>
- </row>
- <row>
- <entry>max_upload_time </entry>
- <entry>65536 </entry>
- <entry>Maximum time allowed to upload an object </entry>
- </row>
- <row>
- <entry>slow </entry>
- <entry>0</entry>
- <entry>If &gt; 0, Minimum time in seconds for a <literal>PUT</literal> or <literal>DELETE</literal> request to complete </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- </section>
- <section>
- <title>Configuring Container Server</title>
- <para>The Container Server’s primary job is to handle listings of objects. The listing is done by querying the GlusterFS mount point with path. This query returns a list of all files and directories present under that container.
-</para>
- <para>The configurable options pertaining to container server are stored in <filename>/etc/swift/container-server/1.conf</filename> file. The following is the sample <filename>container-server/1.conf</filename> file:</para>
- <para><programlisting>[DEFAULT]
-devices = /srv/1/node
-mount_check = false
-bind_port = 6011
-user = root
-log_facility = LOG_LOCAL2
-
-[pipeline:main]
-pipeline = gluster container-server
-
-[app:container-server]
-use = egg:swift#container
-
-[filter:gluster]
-use = egg:swift#gluster
-
-[container-replicator]
-[container-updater]
-[container-auditor]</programlisting></para>
- <para>The following are the configurable options:</para>
- <table frame="all">
- <title>container-server.conf Default Options in the [DEFAULT] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>swift_dir </entry>
- <entry>/etc/swift </entry>
- <entry>Swift configuration directory </entry>
- </row>
- <row>
- <entry>devices </entry>
- <entry>/srv/node </entry>
- <entry>Mount parent directory where devices are mounted </entry>
- </row>
- <row>
- <entry>mount_check </entry>
- <entry>true </entry>
- <entry>Whether or not check if the devices are mounted to prevent accidentally writing to the root device </entry>
- </row>
- <row>
- <entry>bind_ip </entry>
- <entry>0.0.0.0 </entry>
- <entry>IP Address for server to bind</entry>
- </row>
- <row>
- <entry>bind_port </entry>
- <entry>6001 </entry>
- <entry>Port for server to bind</entry>
- </row>
- <row>
- <entry>workers </entry>
- <entry>1 </entry>
- <entry>Number of workers to fork </entry>
- </row>
- <row>
- <entry>user </entry>
- <entry>swift </entry>
- <entry>Swift user</entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <table frame="all">
- <title>container-server.conf Server Options in the [container-server] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>use </entry>
- <entry/>
- <entry>paste.deploy entry point for the container server. For most cases, this should be <literal>egg:swift#container</literal>. </entry>
- </row>
- <row>
- <entry>log_name </entry>
- <entry>container-server </entry>
- <entry>Label used when logging </entry>
- </row>
- <row>
- <entry>log_facility </entry>
- <entry>LOG_LOCAL0 </entry>
- <entry>Syslog log facility </entry>
- </row>
- <row>
- <entry>log_level </entry>
- <entry>INFO </entry>
- <entry>Logging level </entry>
- </row>
- <row>
- <entry>node_timeout </entry>
- <entry>3 </entry>
- <entry>Request timeout to external services </entry>
- </row>
- <row>
- <entry>conn_timeout </entry>
- <entry>0.5 </entry>
- <entry>Connection timeout to external services </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- </section>
- <section>
- <title>Configuring Account Server</title>
- <para>The Account Server is very similar to the Container Server, except that it is responsible for listing of containers rather than objects. In UFO, each gluster volume is an account.
-</para>
- <para>The configurable options pertaining to account server are stored in <filename>/etc/swift/account-server/1.conf</filename> file. The following is the sample <filename>account-server/1.conf</filename> file: </para>
- <para><programlisting>[DEFAULT]
-devices = /srv/1/node
-mount_check = false
-bind_port = 6012
-user = root
-log_facility = LOG_LOCAL2
-
-[pipeline:main]
-pipeline = gluster account-server
-
-[app:account-server]
-use = egg:swift#account
-
-[filter:gluster]
-use = egg:swift#gluster
-
-[account-replicator]
-vm_test_mode = yes
-
-[account-auditor]
-[account-reaper]</programlisting></para>
- <para>The following are the configurable options:</para>
- <table frame="all">
- <title>account-server.conf Default Options in the [DEFAULT] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>swift_dir </entry>
- <entry>/etc/swift </entry>
- <entry>Swift configuration directory </entry>
- </row>
- <row>
- <entry>devices </entry>
- <entry>/srv/node </entry>
- <entry>mount parent directory where devices are mounted </entry>
- </row>
- <row>
- <entry>mount_check </entry>
- <entry>true </entry>
- <entry>Whether or not check if the devices are mounted to prevent accidentally writing to the root device </entry>
- </row>
- <row>
- <entry>bind_ip </entry>
- <entry>0.0.0.0 </entry>
- <entry>IP Address for server to bind</entry>
- </row>
- <row>
- <entry>bind_port </entry>
- <entry>6002 </entry>
- <entry>Port for server to bind</entry>
- </row>
- <row>
- <entry>workers </entry>
- <entry>1 </entry>
- <entry>Number of workers to fork </entry>
- </row>
- <row>
- <entry>user </entry>
- <entry>swift </entry>
- <entry>Swift user</entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- <table frame="all">
- <title>account-server.conf Server Options in the [account-server] section </title>
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Option </entry>
- <entry>Default </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>use </entry>
- <entry/>
- <entry>paste.deploy entry point for the container server. For most cases, this should be <literal>egg:swift#container</literal>. </entry>
- </row>
- <row>
- <entry>log_name </entry>
- <entry>account-server </entry>
- <entry>Label used when logging </entry>
- </row>
- <row>
- <entry>log_facility </entry>
- <entry>LOG_LOCAL0 </entry>
- <entry>Syslog log facility </entry>
- </row>
- <row>
- <entry>log_level </entry>
- <entry>INFO </entry>
- <entry>Logging level </entry>
- </row>
- </tbody>
- </tgroup>
- </table>
- </section>
- <section>
- <title>Starting and Stopping Server</title>
- <para>You must start the server manually when system reboots and whenever you update/modify the configuration files.</para>
- <itemizedlist>
- <listitem>
- <para>To start the server, enter the following command:</para>
- <para><command># swift_init main start</command></para>
- </listitem>
- <listitem>
- <para>To stop the server, enter the following command:</para>
- <para><command># swift_init main stop</command></para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Working with Unified File and Object Storage</title>
- <para>This section describes the REST API for administering and managing Object Storage. All requests will
-be directed to the host and URL described in the <filename>X-Storage-URL HTTP</filename> header obtained during
-successful authentication.
-</para>
- <section>
- <title>Configuring Authenticated Access </title>
- <para>Authentication is the process of proving identity to the system. To use the REST interface, you must
-obtain an authorization token using GET method and supply it with v1.0 as the path.
-</para>
- <para>Each REST request against the Object Storage system requires the addition of a specific authorization
-token HTTP x-header, defined as X-Auth-Token. The storage URL and authentication token are
-returned in the headers of the response.
-</para>
- <itemizedlist>
- <listitem>
- <para>To authenticate, run the following command:
-</para>
- <programlisting>GET auth/v1.0 HTTP/1.1
-Host: &lt;auth URL&gt;
-X-Auth-User: &lt;account name&gt;:&lt;user name&gt;
-X-Auth-Key: &lt;user-Password&gt;</programlisting>
- <para>For example,
-</para>
- <programlisting>GET auth/v1.0 HTTP/1.1
-Host: auth.example.com
-X-Auth-User: test:tester
-X-Auth-Key: testing
-
-HTTP/1.1 200 OK
-X-Storage-Url: https:/example.storage.com:443/v1/AUTH_test
-X-Storage-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554
-X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554
-Content-Length: 0
-Date: Wed, 10 jul 2011 06:11:51 GMT</programlisting>
- <para>To authenticate access using cURL (for the above example), run the following
-command:
-</para>
- <programlisting>curl -v -H &apos;X-Storage-User: test:tester&apos; -H &apos;X-Storage-Pass:testing&apos; -k
-https://auth.example.com:443/auth/v1.0</programlisting>
- <para>The X-Auth-Url has to be parsed and used in the connection and request line of all subsequent
-requests to the server. In the example output, users connecting to server will send most
-container/object requests with a host header of example.storage.com and the request line&apos;s version
-and account as v1/AUTH_test.
-
-</para>
- </listitem>
- </itemizedlist>
- <note>
- <para>The authentication tokens are valid for a 24 hour period.
-</para>
- </note>
- </section>
- <section>
- <title>Working with Accounts </title>
- <para>This section describes the list of operations you can perform at the account level of the URL.
-</para>
- <section>
- <title>Displaying Container Information </title>
- <para>You can list the objects of a specific container, or all containers, as needed using GET command. You
-can use the following optional parameters with GET request to refine the results:
-</para>
- <para><informaltable frame="none">
- <tgroup cols="2">
- <colspec colnum="1" colname="c0" colsep="0"/>
- <colspec colnum="2" colname="c1" colsep="0"/>
- <thead>
- <row>
- <entry>Parameter </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>limit </entry>
- <entry>Limits the number of results to at most <emphasis role="italic">n</emphasis> value. </entry>
- </row>
- <row>
- <entry>marker </entry>
- <entry>Returns object names greater in value than the specified marker. </entry>
- </row>
- <row>
- <entry>format </entry>
- <entry>Specify either json or xml to return the respective serialized response. </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- <para><emphasis role="bold">To display container information </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>List all the containers of an account using the following command:
-</para>
- <para><programlisting>GET /&lt;apiversion&gt;/&lt;account&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting></para>
- <para>For example,
-</para>
- <programlisting>GET /v1/AUTH_test HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 200 Ok
-Date: Wed, 13 Jul 2011 16:32:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8
-Content-Length: 39
-
-songs
-movies
-documents
-reports</programlisting>
- </listitem>
- </itemizedlist>
- <para>To display container information using cURL (for the above example), run the following
-command:
-</para>
- <para><programlisting>curl -v -X GET -H &apos;X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test -k</programlisting></para>
- </section>
- <section>
- <title>Displaying Account Metadata Information </title>
- <para>You can issue HEAD command to the storage service to view the number of containers and the total
-bytes stored in the account.
-</para>
- <itemizedlist>
- <listitem>
- <para>To display containers and storage used, run the following command:
-</para>
- <programlisting>HEAD /&lt;apiversion&gt;/&lt;account&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting>
- <para>For example,
-</para>
- <programlisting>HEAD /v1/AUTH_test HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 16:52:21 GMT
-Server: Apache
-X-Account-Container-Count: 4
-X-Account-Total-Bytes-Used: 394792</programlisting>
- <para>To display account metadata information using cURL (for the above example), run the following
-command:
-</para>
- <programlisting>curl -v -X HEAD -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test -k</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Working with Containers </title>
- <para>This section describes the list of operations you can perform at the container level of the URL.
-</para>
- <section>
- <title> Creating Containers </title>
- <para>You can use PUT command to create containers. Containers are the storage folders for your data.
-The URL encoded name must be less than 256 bytes and cannot contain a forward slash &apos;/&apos; character.
-</para>
- <itemizedlist>
- <listitem>
- <para>To create a container, run the following command:
-</para>
- <programlisting>PUT /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/ HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting>
- <para>For example,
-</para>
- <programlisting>PUT /v1/AUTH_test/pictures/ HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-HTTP/1.1 201 Created
-
-Date: Wed, 13 Jul 2011 17:32:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8</programlisting>
- <para>To create container using cURL (for the above example), run the following command:
-</para>
- <programlisting>curl -v -X PUT -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/pictures -k</programlisting>
- <para>The status code of 201 (Created) indicates that you have successfully created the container. If a
-container with same is already existed, the status code of 202 is displayed.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Displaying Objects of a Container </title>
- <para>You can list the objects of a container using GET command. You can use the following optional
-parameters with GET request to refine the results:
-</para>
- <para><informaltable frame="none">
- <tgroup cols="2">
- <colspec colnum="1" colname="c0" colsep="0"/>
- <colspec colnum="2" colname="c1" colsep="0"/>
- <thead>
- <row>
- <entry>Parameter </entry>
- <entry>Description </entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>limit </entry>
- <entry>Limits the number of results to at most <emphasis role="italic">n</emphasis> value. </entry>
- </row>
- <row>
- <entry>marker </entry>
- <entry>Returns object names greater in value than the specified marker. </entry>
- </row>
- <row>
- <entry>prefix </entry>
- <entry>Displays the results limited to object names beginning with the substring x. beginning with the substring x. </entry>
- </row>
- <row>
- <entry>path </entry>
- <entry>Returns the object names nested in the pseudo path. </entry>
- </row>
- <row>
- <entry>format </entry>
- <entry>Specify either json or xml to return the respective serialized response. </entry>
- </row>
- <row>
- <entry>delimiter </entry>
- <entry>Returns all the object names nested in the container. </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- <para>To display objects of a container
-</para>
- <itemizedlist>
- <listitem>
- <para>List objects of a specific container using the following command:
-</para>
- </listitem>
- </itemizedlist>
- <programlisting>GET /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;[parm=value] HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting>
- <para>For example,
-</para>
- <programlisting>GET /v1/AUTH_test/images HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 200 Ok
-Date: Wed, 13 Jul 2011 15:42:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8
-Content-Length: 139
-
-sample file.jpg
-test-file.pdf
-You and Me.pdf
-Puddle of Mudd.mp3
-Test Reports.doc</programlisting>
- <para>To display objects of a container using cURL (for the above example), run the following
-command:
-</para>
- <programlisting>curl -v -X GET-H &apos;X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images -k</programlisting>
- </section>
- <section>
- <title>Displaying Container Metadata Information </title>
- <para>You can issue HEAD command to the storage service to view the number of objects in a container and
-the total bytes of all the objects stored in the container.
-</para>
- <itemizedlist>
- <listitem>
- <para>To display list of objects and storage used, run the following command:
-</para>
- <programlisting>HEAD /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting>
- <para>For example,</para>
- <programlisting>HEAD /v1/AUTH_test/images HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 19:52:21 GMT
-Server: Apache
-X-Account-Object-Count: 8
-X-Container-Bytes-Used: 472</programlisting>
- <para>To display list of objects and storage used in a container using cURL (for the above example), run
-the following command:
-</para>
- <programlisting>curl -v -X HEAD -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images -k</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Deleting Container </title>
- <para>You can use DELETE command to permanently delete containers. The container must be empty
-before it can be deleted.
-</para>
- <para>You can issue HEAD command to determine if it contains any objects.
-</para>
- <itemizedlist>
- <listitem>
- <para>To delete a container, run the following command:
-</para>
- <programlisting>DELETE /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/ HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;</programlisting>
- <para>For example,</para>
- <programlisting>DELETE /v1/AUTH_test/pictures HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 17:52:21 GMT
-Server: Apache
-Content-Length: 0
-Content-Type: text/plain; charset=UTF-8</programlisting>
- <para>To delete a container using cURL (for the above example), run the following command:
-</para>
- <programlisting>curl -v -X DELETE -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/pictures -k</programlisting>
- <para>The status code of 204 (No Content) indicates that you have successfully deleted the container. If
-that container does not exist, the status code 404 (Not Found) is displayed, and if the container is
-not empty, the status code 409 (Conflict) is displayed.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Updating Container Metadata </title>
- <para>You can update the metadata of container using POST operation, metadata keys should be prefixed
-with &apos;x-container-meta&apos;.
-</para>
- <itemizedlist>
- <listitem>
- <para>To update the metadata of the object, run the following command:
-</para>
- <programlisting>POST /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;Authentication-token-key&gt;
-X-Container-Meta-&lt;key&gt;: &lt;new value&gt;
-X-Container-Meta-&lt;key&gt;: &lt;new value&gt;</programlisting>
- <para>For example,
-</para>
- <para><programlisting>POST /v1/AUTH_test/images HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-X-Container-Meta-Zoo: Lion
-X-Container-Meta-Home: Dog
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 20:52:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To update the metadata of the object using cURL (for the above example), run the following
-command:
-</para>
- <para><programlisting>curl -v -X POST -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images -H &apos; X-Container-Meta-Zoo: Lion&apos; -H &apos;X-Container-Meta-Home: Dog&apos; -k</programlisting></para>
- <para>The status code of 204 (No Content) indicates the container&apos;s metadata is updated successfully. If
-that object does not exist, the status code 404 (Not Found) is displayed.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Working_UFO-Setting_ACLs">
- <title> Setting ACLs on Container </title>
- <para>You can set the container access control list by using POST command on container with <command>x- container-read</command> and<command> x-container-write</command> keys.
-</para>
- <para>The ACL format is <command>[item[,item...]]</command>. Each item can be a group name to give access to or a
-referrer designation to grant or deny based on the HTTP Referer header.
-</para>
- <para>The referrer designation format is:<command> .r:[-]value</command>.
-</para>
- <para>The .r can also be <command>.ref, .referer, </command>or .<command>referrer</command>; though it will be shortened to.r for
-decreased character count usage. The value can be <command>*</command> to specify any referrer host is allowed access. The leading minus sign (-)
-indicates referrer hosts that should be denied access.
-</para>
- <para>Examples of valid ACLs:
-</para>
- <para><programlisting>.r:*
-.r:*,bobs_account,sues_account:sue
-bobs_account,sues_account:sue</programlisting></para>
- <para>Examples of invalid ACLs:</para>
- <para><programlisting>.r:
-.r:-</programlisting></para>
- <para>By default, allowing read access via <command><command>.</command>r </command>will not allow listing objects in the container but allows
-retrieving objects from the container. To turn on listings, use the .<command>rlistings</command> directive. Also, <command>.r</command>
-designations are not allowed in headers whose names include the word write.
-</para>
- <para>For example, to set all the objects access rights to &quot;public‟ inside the container using cURL (for the
-above example), run the following command:
-</para>
- <para><programlisting>curl -v -X POST -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images
--H &apos;X-Container-Read: .r:*&apos; -k</programlisting></para>
- </section>
- </section>
- <section>
- <title> Working with Objects </title>
- <para>An object represents the data and any metadata for the files stored in the system. Through the REST
-interface, metadata for an object can be included by adding custom HTTP headers to the request
-and the data payload as the request body. Objects name should not exceed 1024 bytes after URL
-encoding.
-</para>
- <para>This section describes the list of operations you can perform at the object level of the URL.
-</para>
- <section>
- <title>Creating or Updating Object </title>
- <para>You can use PUT command to write or update an object&apos;s content and metadata.
-</para>
- <para>You can verify the data integrity by including an MD5checksum for the object&apos;s data in the ETag
-header. ETag header is optional and can be used to ensure that the object&apos;s contents are stored
-successfully in the storage system.
-</para>
- <para>You can assign custom metadata to objects by including additional HTTP headers on the PUT request.
-The objects created with custom metadata via HTTP headers are identified with the<command>X-Object- Meta</command>- prefix.
-</para>
- <itemizedlist>
- <listitem>
- <para>To create or update an object, run the following command:
-</para>
- <para><programlisting>PUT /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;
-ETag: da1e100dc9e7becc810986e37875ae38
-Content-Length: 342909
-X-Object-Meta-PIN: 2343</programlisting></para>
- <para>For example,</para>
- <para><programlisting>PUT /v1/AUTH_test/pictures/dog HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-ETag: da1e100dc9e7becc810986e37875ae38
-
-HTTP/1.1 201 Created
-Date: Wed, 13 Jul 2011 18:32:21 GMT
-Server: Apache
-ETag: da1e100dc9e7becc810986e37875ae38
-Content-Length: 0
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To create or update an object using cURL (for the above example), run the following command:
-</para>
- <para><programlisting>curl -v -X PUT -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/pictures/dog -H &apos;Content-
-Length: 0&apos; -k</programlisting></para>
- <para>The status code of 201 (Created) indicates that you have successfully created or updated the object.
-If there is a missing content-Length or Content-Type header in the request, the status code of 412
-(Length Required) is displayed. (Optionally) If the MD5 checksum of the data written to the storage
-system does not match the ETag value, the status code of 422 (Unprocessable Entity) is displayed.
-</para>
- </listitem>
- </itemizedlist>
- <section>
- <title>Chunked Transfer Encoding </title>
- <para>You can upload data without knowing the size of the data to be uploaded. You can do this by
-specifying an HTTP header of Transfer-Encoding: chunked and without using a Content-Length
-header.
-</para>
- <para>You can use this feature while doing a DB dump, piping the output through gzip, and then piping the
-data directly into Object Storage without having to buffer the data to disk to compute the file size.
-</para>
- <itemizedlist>
- <listitem>
- <para>To create or update an object, run the following command:
- </para>
- <para><programlisting>PUT /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;authentication-token-key&gt;
-Transfer-Encoding: chunked
-X-Object-Meta-PIN: 2343</programlisting></para>
- <para>For example,
-</para>
- <para><programlisting>PUT /v1/AUTH_test/pictures/cat HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-Transfer-Encoding: chunked
-X-Object-Meta-PIN: 2343
-19
-A bunch of data broken up
-D
-into chunks.
-0</programlisting>
-
-</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Copying Object </title>
- <para>You can copy object from one container to another or add a new object and then add reference to
-designate the source of the data from another container.
-</para>
- <para><emphasis role="bold">To copy object from one container to another </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>To add a new object and designate the source of the data from another container, run the
-following command:
-</para>
- <para><programlisting>COPY /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;sourceobject&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt; authentication-token-key&gt;
-Destination: /&lt;container&gt;/&lt;destinationobject&gt;</programlisting></para>
- <para>For example,
-</para>
- <para><programlisting>COPY /v1/AUTH_test/images/dogs HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-Destination: /photos/cats
-
-HTTP/1.1 201 Created
-Date: Wed, 13 Jul 2011 18:32:21 GMT
-Server: Apache
-Content-Length: 0
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To copy an object using cURL (for the above example), run the following command:
-</para>
- <para><programlisting>curl -v -X COPY -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos; -H &apos;Destination: /photos/cats&apos; -k https://example.storage.com:443/v1/AUTH_test/images/dogs</programlisting></para>
- <para>The status code of 201 (Created) indicates that you have successfully copied the object. If there is a
-missing content-Length or Content-Type header in the request, the status code of 412 (Length
-Required) is displayed.
-</para>
- <para>You can also use PUT command to copy object by using additional header <command>X-Copy-From: container/obj</command>.
-</para>
- </listitem>
- <listitem>
- <para>To use PUT command to copy an object, run the following command:
-</para>
- <para><programlisting>PUT /v1/AUTH_test/photos/cats HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-X-Copy-From: /images/dogs
-
-HTTP/1.1 201 Created
-Date: Wed, 13 Jul 2011 18:32:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To copy an object using cURL (for the above example), run the following command:
-</para>
- <para><programlisting>curl -v -X PUT -H &apos;X-Auth-Token: AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
--H &apos;X-Copy-From: /images/dogs&apos; –k
-https://example.storage.com:443/v1/AUTH_test/images/cats</programlisting></para>
- <para>The status code of 201 (Created) indicates that you have successfully copied the object.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Displaying Object Information </title>
- <para>You can issue GET command on an object to view the object data of the object.
-</para>
- <itemizedlist>
- <listitem>
- <para>To display the content of an object run the following command:</para>
- <para><programlisting>GET /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;Authentication-token-key&gt;</programlisting></para>
- <para>For example,
-</para>
- <para><programlisting>GET /v1/AUTH_test/images/cat HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 200 Ok
-Date: Wed, 13 Jul 2011 23:52:21 GMT
-Server: Apache
-Last-Modified: Thu, 14 Jul 2011 13:40:18 GMT
-ETag: 8a964ee2a5e88be344f36c22562a6486
-Content-Length: 534210
-[.........]</programlisting></para>
- <para>To display the content of an object using cURL (for the above example), run the following
-command:
-</para>
- <para><programlisting>curl -v -X GET -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images/cat -k</programlisting></para>
- <para>The status code of 200 (Ok) indicates the object‟s data is displayed successfully. If that object does
-not exist, the status code 404 (Not Found) is displayed.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Displaying Object Metadata </title>
- <para>You can issue HEAD command on an object to view the object metadata and other standard HTTP
-headers. You must send only authorization token as header.
-</para>
- <itemizedlist>
- <listitem>
- <para>To display the metadata of the object, run the following command:
-</para>
- </listitem>
- </itemizedlist>
- <para><programlisting>HEAD /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;Authentication-token-key&gt;</programlisting></para>
- <para>For example,
-</para>
- <para><programlisting>HEAD /v1/AUTH_test/images/cat HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 21:52:21 GMT
-Server: Apache
-Last-Modified: Thu, 14 Jul 2011 13:40:18 GMT
-ETag: 8a964ee2a5e88be344f36c22562a6486
-Content-Length: 512000
-Content-Type: text/plain; charset=UTF-8
-X-Object-Meta-House: Cat
-X-Object-Meta-Zoo: Cat
-X-Object-Meta-Home: Cat
-X-Object-Meta-Park: Cat</programlisting></para>
- <para>To display the metadata of the object using cURL (for the above example), run the following
-command:
-</para>
- <para><programlisting>curl -v -X HEAD -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images/cat -k</programlisting></para>
- <para>The status code of 204 (No Content) indicates the object‟s metadata is displayed successfully. If that
-object does not exist, the status code 404 (Not Found) is displayed.
-</para>
- </section>
- <section>
- <title>Updating Object Metadata </title>
- <para>You can issue POST command on an object name only to set or overwrite arbitrary key metadata. You
-cannot change the object‟s other headers such as Content-Type, ETag and others using POST
-operation. The POST command will delete all the existing metadata and replace it with the new
-arbitrary key metadata.
-</para>
- <para>You must prefix <emphasis role="bold">X-Object-Meta-</emphasis> to the key names.
-</para>
- <itemizedlist>
- <listitem>
- <para>To update the metadata of an object, run the following command:</para>
- <para><programlisting>POST /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;Authentication-token-key&gt;
-X-Object-Meta-&lt;key&gt;: &lt;new value&gt;
-X-Object-Meta-&lt;key&gt;: &lt;new value&gt;</programlisting>
-</para>
- <para>For example,
-</para>
- <para><programlisting>POST /v1/AUTH_test/images/cat HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-X-Object-Meta-Zoo: Lion
-X-Object-Meta-Home: Dog
-
-HTTP/1.1 202 Accepted
-Date: Wed, 13 Jul 2011 22:52:21 GMT
-Server: Apache
-Content-Length: 0
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To update the metadata of an object using cURL (for the above example), run the following
-command:
-</para>
- <para><programlisting>curl -v -X POST -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/images/cat -H &apos; X-Object-
-Meta-Zoo: Lion&apos; -H &apos;X-Object-Meta-Home: Dog&apos; -k</programlisting></para>
- <para>The status code of 202 (Accepted) indicates that you have successfully updated the object‟s
-metadata. If that object does not exist, the status code 404 (Not Found) is displayed.
-
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Deleting Object </title>
- <para>You can use DELETE command to permanently delete the object.
-</para>
- <para>The DELETE command on an object will be processed immediately and any subsequent operations
-like GET, HEAD, POST, or DELETE on the object will display 404 (Not Found) error.
-</para>
- <itemizedlist>
- <listitem>
- <para>To delete an object, run the following command:
-</para>
- <para><programlisting>DELETE /&lt;apiversion&gt;/&lt;account&gt;/&lt;container&gt;/&lt;object&gt; HTTP/1.1
-Host: &lt;storage URL&gt;
-X-Auth-Token: &lt;Authentication-token-key&gt;</programlisting></para>
- <para>For example,
-</para>
- <para><programlisting>DELETE /v1/AUTH_test/pictures/cat HTTP/1.1
-Host: example.storage.com
-X-Auth-Token: AUTH_tkd3ad38b087b49bbbac0494f7600a554
-
-HTTP/1.1 204 No Content
-Date: Wed, 13 Jul 2011 20:52:21 GMT
-Server: Apache
-Content-Type: text/plain; charset=UTF-8</programlisting></para>
- <para>To delete an object using cURL (for the above example), run the following command:
-</para>
- <para><programlisting>curl -v -X DELETE -H &apos;X-Auth-Token:
-AUTH_tkde3ad38b087b49bbbac0494f7600a554&apos;
-https://example.storage.com:443/v1/AUTH_test/pictures/cat -k</programlisting></para>
- <para>The status code of 204 (No Content) indicates that you have successfully deleted the object. If that
-object does not exist, the status code 404 (Not Found) is displayed.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_commandref.xml b/doc/admin-guide/en-US/admin_commandref.xml
deleted file mode 100644
index df4c78f4869..00000000000
--- a/doc/admin-guide/en-US/admin_commandref.xml
+++ /dev/null
@@ -1,334 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Com_Ref">
- <title>Command Reference </title>
- <para>This section describes the available commands and includes the
-following section:
-</para>
- <itemizedlist>
- <listitem>
- <para>gluster Command
-</para>
- <para>Gluster Console Manager (command line interpreter)
-</para>
- </listitem>
- <listitem>
- <para>glusterd Daemon
-</para>
- <para>Gluster elastic volume management daemon
-</para>
- </listitem>
- </itemizedlist>
- <section>
- <title>gluster Command </title>
- <para><emphasis role="bold">NAME</emphasis>
-</para>
- <para>gluster - Gluster Console Manager (command line interpreter)
-</para>
- <para><emphasis role="bold">SYNOPSIS</emphasis>
-</para>
- <para>To run the program and display the gluster prompt:
-</para>
- <para><emphasis role="bold">gluster</emphasis>
-</para>
- <para>To specify a command directly:
-gluster [COMMANDS] [OPTIONS]
-</para>
- <para><emphasis role="bold">DESCRIPTION</emphasis>
-</para>
- <para>The Gluster Console Manager is a command line utility for elastic volume management. You can run
-the gluster command on any export server. The command enables administrators to perform cloud
-operations such as creating, expanding, shrinking, rebalancing, and migrating volumes without
-needing to schedule server downtime.
-</para>
- <para><emphasis role="bold">COMMANDS</emphasis>
-</para>
- <para><informaltable frame="none">
- <tgroup cols="3">
- <colspec colnum="1" colname="c0" colsep="0"/>
- <colspec colnum="2" colname="cgen1" colsep="0"/>
- <colspec colnum="3" colname="c1" colsep="0"/>
- <thead>
- <row>
- <entry>Command</entry>
- <entry namest="cgen1" nameend="c1">Description</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Volume</emphasis>
- </entry>
- </row>
- <row>
- <entry>volume info [all | VOLNAME]</entry>
- <entry namest="cgen1" nameend="c1">Displays information about all volumes, or the specified volume.</entry>
- </row>
- <row>
- <entry>volume create NEW-VOLNAME [stripe COUNT] [replica COUNT] [transport tcp | rdma | tcp,rdma] NEW-BRICK ...</entry>
- <entry namest="cgen1" nameend="c1">Creates a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp).</entry>
- </row>
- <row>
- <entry>volume delete VOLNAME</entry>
- <entry namest="cgen1" nameend="c1">Deletes the specified volume.</entry>
- </row>
- <row>
- <entry>volume start VOLNAME </entry>
- <entry namest="cgen1" nameend="c1">Starts the specified volume.</entry>
- </row>
- <row>
- <entry>volume stop VOLNAME [force] </entry>
- <entry namest="cgen1" nameend="c1">Stops the specified volume. </entry>
- </row>
- <row>
- <entry>volume rename VOLNAME NEW-VOLNAME </entry>
- <entry namest="cgen1" nameend="c1">Renames the specified volume.</entry>
- </row>
- <row>
- <entry>volume help </entry>
- <entry namest="cgen1" nameend="c1">Displays help for the volume command.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Brick</emphasis>
- </entry>
- </row>
- <row>
- <entry>volume add-brick VOLNAME NEW-BRICK ... </entry>
- <entry namest="cgen1" nameend="c1">Adds the specified brick to the specified volume.</entry>
- </row>
- <row>
- <entry>volume replace-brick VOLNAME (BRICK NEW-BRICK) start | pause | abort | status </entry>
- <entry namest="cgen1" nameend="c1">Replaces the specified brick.</entry>
- </row>
- <row>
- <entry>volume remove-brick VOLNAME [(replica COUNT)|(stripe COUNT)] BRICK ... </entry>
- <entry namest="cgen1" nameend="c1">Removes the specified brick from the specified volume.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Rebalance</emphasis>
- </entry>
- </row>
- <row>
- <entry>volume rebalance VOLNAME start</entry>
- <entry namest="cgen1" nameend="c1">Starts rebalancing the specified volume.</entry>
- </row>
- <row>
- <entry>volume rebalance VOLNAME stop </entry>
- <entry namest="cgen1" nameend="c1">Stops rebalancing the specified volume. </entry>
- </row>
- <row>
- <entry>volume rebalance VOLNAME status </entry>
- <entry namest="cgen1" nameend="c1">Displays the rebalance status of the specified volume.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Log</emphasis>
- </entry>
- </row>
- <row>
- <entry>volume log filename VOLNAME [BRICK] DIRECTORY </entry>
- <entry namest="cgen1" nameend="c1">Sets the log directory for the corresponding volume/brick. </entry>
- </row>
- <row>
- <entry>volume log rotate VOLNAME [BRICK] </entry>
- <entry namest="cgen1" nameend="c1">Rotates the log file for corresponding volume/brick.</entry>
- </row>
- <row>
- <entry>volume log locate VOLNAME [BRICK] </entry>
- <entry namest="cgen1" nameend="c1">Locates the log file for corresponding volume/brick. </entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Peer</emphasis>
- </entry>
- </row>
- <row>
- <entry>peer probe HOSTNAME </entry>
- <entry namest="cgen1" nameend="c1">Probes the specified peer. </entry>
- </row>
- <row>
- <entry>peer detach HOSTNAME </entry>
- <entry namest="cgen1" nameend="c1">Detaches the specified peer. </entry>
- </row>
- <row>
- <entry>peer status </entry>
- <entry namest="cgen1" nameend="c1">Displays the status of peers. </entry>
- </row>
- <row>
- <entry>peer help </entry>
- <entry namest="cgen1" nameend="c1">Displays help for the peer command.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Geo-replication</emphasis>
- </entry>
- </row>
- <row>
- <entry>volume geo-replication MASTER SLAVE start</entry>
- <entry namest="cgen1" nameend="c1">
- <para>Start geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME.</para>
- <para>You can specify a local slave volume as :VOLUME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY.</para>
- </entry>
- </row>
- <row>
- <entry>volume geo-replication MASTER SLAVE stop</entry>
- <entry namest="cgen1" nameend="c1">
- <para>Stop geo-replication between the hosts specified by MASTER and SLAVE. You can specify a local master volume as :VOLNAME and a local master directory as /DIRECTORY/SUB-DIRECTORY.</para>
- <para>You can specify a local slave volume as :VOLNAME and a local slave directory as /DIRECTORY/SUB-DIRECTORY. You can specify a remote slave volume as DOMAIN::VOLNAME and a remote slave directory as DOMAIN:/DIRECTORY/SUB-DIRECTORY.
-</para>
- </entry>
- </row>
- <row>
- <entry morerows="10">volume geo-replication MASTER SLAVE config [options]</entry>
- <entry/>
- <entry>Configure geo-replication options between the hosts specified by MASTER and SLAVE. </entry>
- </row>
- <row>
- <entry>gluster-command COMMAND</entry>
- <entry>The path where the gluster command is installed.</entry>
- </row>
- <row>
- <entry>gluster-log-level LOGFILELEVEL</entry>
- <entry>The log level for gluster processes.</entry>
- </row>
- <row>
- <entry>log-file LOGFILE</entry>
- <entry>The path to the geo-replication log file.</entry>
- </row>
- <row>
- <entry>log-level LOGFILELEVEL</entry>
- <entry>The log level for geo-replication.</entry>
- </row>
- <row>
- <entry>remote-gsyncd COMMAND</entry>
- <entry>The path where the gsyncd binary is installed on the remote machine.</entry>
- </row>
- <row>
- <entry>ssh-command COMMAND</entry>
- <entry>The ssh command to use to connect to the remote machine (the default is ssh).</entry>
- </row>
- <row>
- <entry>rsync-command COMMAND</entry>
- <entry>The rsync command to use for synchronizing the files (the default is rsync).</entry>
- </row>
- <row>
- <entry>volume_id= UID</entry>
- <entry>The command to delete the existing master UID for the intermediate/slave node.</entry>
- </row>
- <row>
- <entry>timeout SECONDS</entry>
- <entry>The timeout period.</entry>
- </row>
- <row>
- <entry>sync-jobs N</entry>
- <entry>The number of simultaneous files/directories that can be synchronized.</entry>
- </row>
- <row>
- <entry/>
- <entry>ignore-deletes</entry>
- <entry>If this option is set to 1, a file deleted on master will not trigger a delete operation on the slave. Hence, the slave will remain as a superset of the master and can be used to recover the master in case of crash and/or accidental delete.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Other</emphasis>
- </entry>
- </row>
- <row>
- <entry>help</entry>
- <entry/>
- <entry>Display the command options.</entry>
- </row>
- <row>
- <entry>quit</entry>
- <entry/>
- <entry>Exit the gluster command line interface.</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- <para><emphasis role="bold">FILES</emphasis>
-
-</para>
- <para>/etc/glusterd/*
-</para>
- <para><emphasis role="bold">SEE ALSO </emphasis></para>
- <para>fusermount(1), mount.glusterfs(8), glusterfs-volgen(8), glusterfs(8), glusterd(8)</para>
- </section>
- <section>
- <title>glusterd Daemon </title>
- <para><emphasis role="bold">NAME</emphasis>
-</para>
- <para>glusterd - Gluster elastic volume management daemon</para>
- <para><emphasis role="bold">SYNOPSIS</emphasis>
-</para>
- <para>glusterd [OPTION...]
-</para>
- <para><emphasis role="bold">DESCRIPTION</emphasis>
-</para>
- <para>The glusterd daemon is used for elastic volume management. The daemon must be run on all export servers.
-</para>
- <para><emphasis role="bold">OPTIONS</emphasis>
-</para>
- <para><informaltable frame="none">
- <tgroup cols="2">
- <colspec colnum="1" colname="c0" colsep="0"/>
- <colspec colnum="2" colname="c1" colsep="0"/>
- <thead>
- <row>
- <entry>Option</entry>
- <entry>Description</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Basic</emphasis>
- </entry>
- </row>
- <row>
- <entry>-l=LOGFILE, --log-file=LOGFILE</entry>
- <entry>Files to use for logging (the default is /usr/local/var/log/glusterfs/glusterfs.log).</entry>
- </row>
- <row>
- <entry>-L=LOGLEVEL, --log-level=LOGLEVEL</entry>
- <entry>Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is INFO). </entry>
- </row>
- <row>
- <entry>--debug</entry>
- <entry>Runs the program in debug mode. This option sets --no-daemon, --log-level to DEBUG, and --log-file to console.</entry>
- </row>
- <row>
- <entry>-N, --no-daemon</entry>
- <entry>Runs the program in the foreground.</entry>
- </row>
- <row>
- <entry namest="c0" nameend="c1" align="left">
- <emphasis role="bold">Miscellaneous</emphasis>
- </entry>
- </row>
- <row>
- <entry>-?, --help</entry>
- <entry>Displays this help.</entry>
- </row>
- <row>
- <entry>--usage</entry>
- <entry>Displays a short usage message.</entry>
- </row>
- <row>
- <entry>-V, --version</entry>
- <entry>Prints the program version.</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable></para>
- <para><emphasis role="bold">FILES</emphasis>
-
-</para>
- <para>/etc/glusterd/*
-</para>
- <para><emphasis role="bold">SEE ALSO </emphasis></para>
- <para>fusermount(1), mount.glusterfs(8), glusterfs-volgen(8), glusterfs(8), gluster(8)</para>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_console.xml b/doc/admin-guide/en-US/admin_console.xml
deleted file mode 100644
index ebf273935ca..00000000000
--- a/doc/admin-guide/en-US/admin_console.xml
+++ /dev/null
@@ -1,28 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter>
- <title>Using the Gluster Console Manager – Command Line Utility</title>
- <para>The Gluster Console Manager is a single command line utility that simplifies configuration and management of your storage environment. The Gluster Console Manager is similar to the LVM (Logical Volume Manager) CLI or ZFS Command Line Interface, but across multiple storage servers. You can use the Gluster Console Manager online, while volumes are mounted and active. Gluster automatically synchronizes volume configuration information across all Gluster servers.</para>
- <para>Using the Gluster Console Manager, you can create new volumes, start volumes, and stop volumes, as required. You can also add bricks to volumes, remove bricks from existing volumes, as well as change translator settings, among other operations.</para>
- <para>You can also use the commands to create scripts for automation, as well as use the commands as an API to allow integration with third-party applications. </para>
- <para><emphasis role="bold">Running the Gluster Console Manager</emphasis></para>
- <para>You can run the Gluster Console Manager on any GlusterFS server either by invoking the commands or by running the Gluster CLI in interactive mode. You can also use the gluster command remotely using SSH. </para>
- <itemizedlist>
- <listitem>
- <para>To run commands directly: </para>
- <para><command> # gluster peer <replaceable>command</replaceable></command></para>
- <para>For example:</para>
- <para><command> # gluster peer status </command></para>
- </listitem>
- <listitem>
- <para>To run the Gluster Console Manager in interactive mode </para>
- <para><command># gluster</command></para>
- <para>You can execute gluster commands from the Console Manager prompt:</para>
- <para><command> gluster&gt; <replaceable>command</replaceable></command> </para>
- <para>For example, to view the status of the peer server:</para>
- <para># <command>gluster </command></para>
- <para><command>gluster &gt; peer status </command></para>
- <para>Display the status of the peer.</para>
- </listitem>
- </itemizedlist>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_directory_Quota.xml b/doc/admin-guide/en-US/admin_directory_Quota.xml
deleted file mode 100644
index 8a1012a6ac2..00000000000
--- a/doc/admin-guide/en-US/admin_directory_Quota.xml
+++ /dev/null
@@ -1,179 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Dir_Quota">
- <title>Managing Directory Quota </title>
- <para>Directory quotas in GlusterFS allow you to set limits on usage of disk space by directories or volumes.
-The storage administrators can control the disk space utilization at the directory and/or volume
-levels in GlusterFS by setting limits to allocatable disk space at any level in the volume and directory
-hierarchy. This is particularly useful in cloud deployments to facilitate utility billing model.
- </para>
- <para> <note>
- <para>For now, only Hard limit is supported. Here, the limit cannot be exceeded and attempts to use
-more disk space or inodes beyond the set limit will be denied.
-</para>
- </note></para>
- <para>System administrators can also monitor the resource utilization to limit the storage for the users
-depending on their role in the organization.
-</para>
- <para>You can set the quota at the following levels:
- </para>
- <itemizedlist>
- <listitem>
- <para>Directory level – limits the usage at the directory level
- </para>
- </listitem>
- <listitem>
- <para>Volume level – limits the usage at the volume level
- </para>
- </listitem>
- </itemizedlist>
- <note>
- <para>You can set the disk limit on the directory even if it is not created. The disk limit is enforced
-immediately after creating that directory. For more information on setting disk limit, see <xref linkend="chap-Administration_Guide-Dir_Quota-Set_Replace"/>.
-</para>
- </note>
- <section id="chap-Administration_Guide-Dir_Quota-Enable">
- <title>Enabling Quota </title>
- <para>You must enable Quota to set disk limits.
-</para>
- <para><emphasis role="bold">To enable quota</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Enable the quota using the following command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> enable </command></para>
- <para>For example, to enable quota on test-volume:
-</para>
- <programlisting># gluster volume quota test-volume enable
-Quota is enabled on /test-volume</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Dir_Quota-Disable">
- <title>Disabling Quota </title>
- <para>You can disable Quota, if needed.
-</para>
- <para><emphasis role="bold">To disable quota:</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Disable the quota using the following command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> disable </command></para>
- <para>For example, to disable quota translator on test-volume:
-</para>
- <programlisting># gluster volume quota test-volume disable
-Quota translator is disabled on /test-volume</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Dir_Quota-Set_Replace">
- <title>Setting or Replacing Disk Limit </title>
- <para>You can create new directories in your storage environment and set the disk limit or set disk limit for
-the existing directories. The directory name should be relative to the volume with the export
-directory/mount being treated as &quot;/&quot;.
-</para>
- <para><emphasis role="bold">To set or replace disk limit</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Set the disk limit using the following command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> limit-usage /<replaceable>directory</replaceable><replaceable>limit-value</replaceable></command></para>
- <para>For example, to set limit on data directory on test-volume where data is a directory under the
-export directory:
-</para>
- <programlisting># gluster volume quota test-volume limit-usage /data 10GB
-Usage limit has been set on /data</programlisting>
- <para><note>
- <para>In a multi-level directory hierarchy, the strictest disk limit will be considered for enforcement.
-</para>
- </note></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Dir_Quota-Display">
- <title>Displaying Disk Limit Information </title>
- <para>You can display disk limit information on all the directories on which the limit is set.
-</para>
- <para><emphasis role="bold">To display disk limit information</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Display disk limit information of all the directories on which limit is set, using the following
-command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> list</command>
-</para>
- <para>For example, to see the set disks limit on test-volume:
-</para>
- <programlisting># gluster volume quota test-volume list
-
-<emphasis role="underline">
- <emphasis role="underline"><emphasis role="underline">Path</emphasis>__________Limit______Set Size</emphasis>
- </emphasis>
-/Test/data 10 GB 6 GB
-/Test/data1 10 GB 4 GB</programlisting>
- </listitem>
- <listitem>
- <para>Display disk limit information on a particular directory on which limit is set, using the following
-command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> list <replaceable>/directory name</replaceable></command>
-</para>
- <para>For example, to see the set limit on /data directory of test-volume:</para>
- <programlisting># gluster volume quota test-volume list /data
-
-<emphasis role="underline"><emphasis role="underline">Path</emphasis>__________Limit______Set Size</emphasis>
-/Test/data 10 GB 6 GB</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Dir_Quota-Update">
- <title> Updating Memory Cache Size </title>
- <para>For performance reasons, quota caches the directory sizes on client. You can set timeout indicating
-the maximum valid duration of directory sizes in cache, from the time they are populated.
-</para>
- <para>For example: If there are multiple clients writing to a single directory, there are chances that some
-other client might write till the quota limit is exceeded. However, this new file-size may not get
-reflected in the client till size entry in cache has become stale because of timeout. If writes happen
-on this client during this duration, they are allowed even though they would lead to exceeding of
-quota-limits, since size in cache is not in sync with the actual size. When timeout happens, the size
-in cache is updated from servers and will be in sync and no further writes will be allowed. A timeout
-of zero will force fetching of directory sizes from server for every operation that modifies file data
-and will effectively disables directory size caching on client side.
-</para>
- <para><emphasis role="bold">To update the memory cache size</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Update the memory cache size using the following command:
-</para>
- <para><command># gluster volume set <replaceable>VOLNAME</replaceable> features.quota-timeout<replaceable> value</replaceable></command></para>
- <para>For example, to update the memory cache size for every 5 seconds on test-volume:
-</para>
- <programlisting># gluster volume set test-volume features.quota-timeout 5
-Set volume successful</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Dir_Quota-Remove">
- <title>Removing Disk Limit </title>
- <para>You can remove set disk limit, if you do not want quota anymore.
-</para>
- <para><emphasis role="bold">To remove disk limit </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Remove disk limit set on a particular directory using the following command:
-</para>
- <para><command># gluster volume quota <replaceable>VOLNAME</replaceable> remove <replaceable>/directory name</replaceable></command>
-</para>
- <para>For example, to remove the disk limit on /data directory of test-volume:
-</para>
- <programlisting># gluster volume quota test-volume remove /data
-Usage limit set on /data is removed</programlisting>
- </listitem>
- </itemizedlist>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_geo-replication.xml b/doc/admin-guide/en-US/admin_geo-replication.xml
deleted file mode 100644
index b546bb8da8c..00000000000
--- a/doc/admin-guide/en-US/admin_geo-replication.xml
+++ /dev/null
@@ -1,732 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Geo_Rep">
- <title>Managing Geo-replication</title>
- <para>Geo-replication provides a continuous, asynchronous, and incremental replication service from one site to another over Local Area Networks (LANs), Wide Area Network (WANs), and across the Internet. </para>
- <para>Geo-replication uses a master–slave model, whereby replication and mirroring occurs between the following partners:</para>
- <itemizedlist>
- <listitem>
- <para>Master – a GlusterFS volume </para>
- </listitem>
- <listitem>
- <para>Slave – a slave which can be of the following types: </para>
- <itemizedlist>
- <listitem>
- <para>A local directory which can be represented as file URL like <filename>file:///path/to/dir</filename>. You can use shortened form, for example, <filename> /path/to/dir</filename>.</para>
- </listitem>
- <listitem>
- <para>A GlusterFS Volume - Slave volume can be either a local volume like <filename>gluster://localhost:volname</filename> (shortened form - <filename>:volname</filename>) or a volume served by different host like <filename>gluster://host:volname</filename> (shortened form - <filename>host:volname</filename>).</para>
- </listitem>
- </itemizedlist>
- <note>
- <para> Both of the above types can be accessed remotely using SSH tunnel. To use SSH, add an SSH prefix to either a file URL or gluster type URL. For example, <literal> ssh://root@remote-host:/path/to/dir</literal> (shortened form - <literal>root@remote-host:/path/to/dir</literal>) or <literal>ssh://root@remote-host:gluster://localhost:volname</literal> (shortened from - <literal>root@remote-host::volname</literal>). </para>
- </note>
- </listitem>
- </itemizedlist>
- <para>This section introduces Geo-replication, illustrates the various deployment scenarios, and explains how to configure the system to provide replication and mirroring in your environment. </para>
- <section id="chap-Administration_Guide-Geo_Rep-Replicated_volumes">
- <title>Replicated Volumes vs Geo-replication</title>
- <para>The following table lists the difference between replicated volumes and geo-replication:</para>
- <informaltable frame="all">
- <tgroup cols="2">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <thead>
- <row>
- <entry>Replicated Volumes</entry>
- <entry>Geo-replication</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>Mirrors data across clusters</entry>
- <entry>Mirrors data across geographically distributed clusters </entry>
- </row>
- <row>
- <entry>Provides high-availability</entry>
- <entry>Ensures backing up of data for disaster recovery</entry>
- </row>
- <row>
- <entry>Synchronous replication (each and every file operation is sent across all the bricks)</entry>
- <entry>Asynchronous replication (checks for the changes in files periodically and syncs them on detecting differences) </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation">
- <title>Preparing to Deploy Geo-replication</title>
- <para>This section provides an overview of the Geo-replication deployment scenarios, describes how you can check the minimum system requirements, and explores common deployment scenarios.</para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_options"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_Overview"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Minimum_Reqs"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Environment"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Slave"/></para>
- </listitem>
- </itemizedlist>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_options">
- <title>Exploring Geo-replication Deployment Scenarios</title>
- <para>Geo-replication provides an incremental replication service over Local Area Networks (LANs), Wide Area Network (WANs), and across the Internet. This section illustrates the most common deployment scenarios for Geo-replication, including the following: </para>
- <itemizedlist>
- <listitem>
- <para>Geo-replication over LAN
-</para>
- </listitem>
- <listitem>
- <para>Geo-replication over WAN
-</para>
- </listitem>
- <listitem>
- <para>Geo-replication over the Internet</para>
- </listitem>
- <listitem>
- <para>Multi-site cascading Geo-replication</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">Geo-replication over LAN</emphasis></para>
- <para>You can configure Geo-replication to mirror data over a Local Area Network. </para>
- <mediaobject>
- <textobject>
- <phrase>Geo-replication over LAN</phrase>
- </textobject>
- <imageobject>
- <imagedata fileref="images/Geo-Rep_LAN.png"/>
- </imageobject>
- </mediaobject>
- <para><emphasis role="bold">Geo-replication over WAN</emphasis></para>
- <para>You can configure Geo-replication to replicate data over a Wide Area Network.</para>
- <mediaobject>
- <textobject>
- <phrase>
- <phrase>Geo-replication over WAN</phrase>
- </phrase>
- </textobject>
- <imageobject>
- <imagedata fileref="images/Geo-Rep_WAN.png"/>
- </imageobject>
- </mediaobject>
- <para><emphasis role="bold">Geo-replication over Internet</emphasis></para>
- <para>You can configure Geo-replication to mirror data over the Internet.</para>
- <mediaobject>
- <textobject>
- <phrase>
- <phrase>Geo-replication over Internet</phrase>
- </phrase>
- </textobject>
- <imageobject>
- <imagedata fileref="images/Geo-Rep03_Internet.png"/>
- </imageobject>
- </mediaobject>
- <para><emphasis role="bold">Multi-site cascading Geo-replication</emphasis> </para>
- <para>You can configure Geo-replication to mirror data in a cascading fashion across multiple sites. </para>
- <mediaobject>
- <textobject>
- <phrase>
- <phrase>Multi-site cascading Geo-replication </phrase>
- </phrase>
- </textobject>
- <imageobject>
- <imagedata fileref="images/Geo-Rep04_Cascading.png"/>
- </imageobject>
- </mediaobject>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_Overview">
- <title>Geo-replication Deployment Overview</title>
- <para>Deploying Geo-replication involves the following steps:</para>
- <orderedlist>
- <listitem>
- <para>Verify that your environment matches the minimum system requirement. For more information, see <xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Minimum_Reqs"/>.</para>
- </listitem>
- <listitem>
- <para>Determine the appropriate deployment scenario. For more information, see <xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Deployment_options"/>.</para>
- </listitem>
- <listitem>
- <para>Start Geo-replication on master and slave systems, as required. For more information, see <xref linkend="chap-Administration_Guide-Geo_Rep-Starting"/>.</para>
- </listitem>
- </orderedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation-Minimum_Reqs">
- <title>Checking Geo-replication Minimum Requirements</title>
- <para condition="gfs">Before deploying GlusterFS Geo-replication, verify that your systems match the minimum requirements. </para>
- <para condition="gfs">The following table outlines the minimum requirements for both master and slave nodes within your environment:</para>
- <informaltable frame="all" condition="gfs">
- <tgroup cols="3">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <thead>
- <row>
- <entry>Component</entry>
- <entry>Master</entry>
- <entry>Slave</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>Operating System </entry>
- <entry>GNU/Linux</entry>
- <entry>GNU/Linux</entry>
- </row>
- <row>
- <entry>Filesystem</entry>
- <entry>GlusterFS 3.2 or higher</entry>
- <entry>GlusterFS 3.2 or higher (GlusterFS needs to be installed, but does not need to be running), ext3, ext4, or XFS (any other POSIX compliant file system would work, but has not been tested extensively) </entry>
- </row>
- <row>
- <entry>Python </entry>
- <entry>Python 2.4 (with ctypes external module), or Python 2.5 (or higher)</entry>
- <entry>Python 2.4 (with ctypes external module), or Python 2.5 (or higher)</entry>
- </row>
- <row>
- <entry>Secure shell </entry>
- <entry>OpenSSH version 4.0 (or higher)</entry>
- <entry>SSH2-compliant daemon </entry>
- </row>
- <row>
- <entry>Remote synchronization</entry>
- <entry>rsync 3.0.7 or higher </entry>
- <entry>rsync 3.0.7 or higher </entry>
- </row>
- <row>
- <entry>FUSE </entry>
- <entry>GlusterFS supported versions </entry>
- <entry>GlusterFS supported versions </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Environment">
- <title>Setting Up the Environment for Geo-replication</title>
- <para><emphasis role="bold">Time Synchronization</emphasis> </para>
- <itemizedlist>
- <listitem>
- <para>On bricks of a geo-replication master volume, all the servers&apos; time must be uniform. You are recommended to set up NTP (Network Time Protocol) service to keep the bricks sync in time and avoid out-of-time sync effect.</para>
- <para>For example: In a Replicated volume where brick1 of the master is at 12.20 hrs and brick 2 of the master is at 12.10 hrs with 10 minutes time lag, all the changes in brick2 between this period may go unnoticed during synchronization of files with Slave.</para>
- <para>For more information on setting up NTP, see <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Enterprise_Linux/6/html/Migration_Planning_Guide/ch04s07.html"/>.</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To setup Geo-replication for SSH </emphasis></para>
- <para>Password-less login has to be set up between the host machine (where geo-replication Start command will be issued) and the remote machine (where slave process should be launched through SSH).</para>
- <orderedlist>
- <listitem>
- <para>On the node where geo-replication sessions are to be set up, run the following command:</para>
- <para><command># ssh-keygen -f /etc/glusterd/geo-replication/secret.pem</command>
-</para>
- <para>Press Enter twice to avoid passphrase.
-</para>
- </listitem>
- <listitem>
- <para>Run the following command on master for all the slave hosts: </para>
- <para><command># ssh-copy-id -i /etc/glusterd/geo-replication/secret.pem.pub <varname>user</varname>@<varname>slavehost</varname></command></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Slave">
- <title>Setting Up the Environment for a Secure Geo-replication Slave</title>
- <para>You can configure a secure slave using SSH so that master is granted a
-restricted access. With GlusterFS, you need not specify
-configuration parameters regarding the slave on the master-side
-configuration. For example, the master does not require the location of
-the rsync program on slave but the slave must ensure that rsync is in
-the PATH of the user which the master connects using SSH. The only
-information that master and slave have to negotiate are the slave-side
-user account, slave&apos;s resources that master uses as slave resources, and
-the master&apos;s public key. Secure access to the slave can be established
-using the following options:</para>
- <itemizedlist>
- <listitem>
- <para>Restricting Remote Command Execution</para>
- </listitem>
- <listitem>
- <para>Using <filename>Mountbroker</filename> for Slaves</para>
- </listitem>
- <listitem>
- <para>Using IP based Access Control</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">Backward Compatibility</emphasis> </para>
- <para>Your existing Ge-replication environment will work with GlusterFS,
-except for the following:</para>
- <itemizedlist>
- <listitem>
- <para>The process of secure reconfiguration affects only the glusterfs
-instance on slave. The changes are transparent to master with the
-exception that you may have to change the SSH target to an unprivileged
- account on slave.</para>
- </listitem>
- <listitem>
- <para>The following are the some exceptions where this might not work:</para>
- <para><itemizedlist>
- <listitem>
- <para>Geo-replication URLs which specify the slave resource when configuring master will include the following special characters: space, *, ?, [;</para>
- </listitem>
- <listitem>
- <para>Slave must have a running instance of glusterd, even if there is no
-gluster volume among the mounted slave resources (that is, file tree
-slaves are used exclusively) .</para>
- </listitem>
- </itemizedlist></para>
- </listitem>
- </itemizedlist>
- <section>
- <title>Restricting Remote Command Execution</title>
- <para>If you restrict remote command execution, then the Slave audits commands
-coming from the master and the commands related to the given
-geo-replication session is allowed. The Slave also provides access only
-to the files within the slave resource which can be read or manipulated
-by the Master.</para>
- <para>To restrict remote command execution:</para>
- <orderedlist>
- <listitem>
- <para>Identify the location of the gsyncd helper utility on Slave. This utility is installed in <filename>PREFIX/libexec/glusterfs/gsyncd</filename>, where PREFIX is a compile-time parameter of glusterfs. For example, <filename>--prefix=PREFIX</filename> to the configure script with the following common values<filename> /usr, /usr/local, and /opt/glusterfs/glusterfs_version</filename>.</para>
- </listitem>
- <listitem>
- <para>Ensure that command invoked from master to slave passed through the slave&apos;s gsyncd utility. </para>
- <para>You can use either of the following two options:</para>
- <itemizedlist>
- <listitem>
- <para>Set gsyncd with an absolute path as the shell for the account
-which the master connects through SSH. If you need to use a privileged
-account, then set it up by creating a new user with UID 0. </para>
- </listitem>
- <listitem>
- <para>Setup key authentication with command enforcement to gsyncd. You must prefix the copy of master&apos;s public key in the Slave account&apos;s <filename>authorized_keys</filename> file with the following command:</para>
- <para><filename>command=&lt;path to gsyncd&gt;</filename>. </para>
- <para>For example, <command>command=&quot;PREFIX/glusterfs/gsyncd&quot; ssh-rsa AAAAB3Nza....</command></para>
- </listitem>
- </itemizedlist>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Using Mountbroker for Slaves </title>
- <para><filename>mountbroker</filename> is a new service of glusterd. This service allows an
-unprivileged process to own a GlusterFS mount by registering a label
-(and DSL (Domain-specific language) options ) with glusterd through a
-glusterd volfile. Using CLI, you can send a mount request to glusterd to
-receive an alias (symlink) of the mounted volume.</para>
- <para>A request from the agent , the unprivileged slave agents use the
-mountbroker service of glusterd to set up an auxiliary gluster mount for
-the agent in a special environment which ensures that the agent is only
-allowed to access with special parameters that provide administrative
-level access to the particular volume.</para>
- <para><emphasis role="bold">To setup an auxiliary gluster mount for the agent</emphasis>:</para>
- <orderedlist>
- <listitem>
- <para>Create a new group. For example, <filename>geogroup</filename>.</para>
- </listitem>
- <listitem>
- <para>Create a unprivileged account. For example, <filename> geoaccount</filename>. Make it a member of <filename> geogroup</filename>.</para>
- </listitem>
- <listitem>
- <para>Create a new directory owned by root and with permissions <emphasis role="italic">0711.</emphasis> For example, create a create mountbroker-root directory <filename>/var/mountbroker-root</filename>.</para>
- </listitem>
- <listitem>
- <para>Add the following options to the glusterd volfile, assuming the name of the slave gluster volume as <filename>slavevol</filename>:</para>
- <para><command>option mountbroker-root /var/mountbroker-root </command></para>
- <para><command>option mountbroker-geo-replication.geoaccount slavevol</command></para>
- <para><command>option geo-replication-log-group geogroup</command></para>
- <para>If you are unable to locate the glusterd volfile at <filename>/etc/glusterfs/glusterd.vol</filename>, you can create a volfile containing both the default configuration and the above options and place it at <filename>/etc/glusterfs/</filename>. </para>
- <para>A sample glusterd volfile along with default options:</para>
- <para><screen>volume management
- type mgmt/glusterd
- option working-directory /etc/glusterd
- option transport-type socket,rdma
- option transport.socket.keepalive-time 10
- option transport.socket.keepalive-interval 2
- option transport.socket.read-fail-log off
-
- option mountbroker-root /var/mountbroker-root
- option mountbroker-geo-replication.geoaccount slavevol
- option geo-replication-log-group geogroup
-end-volume</screen></para>
- <para>If you host multiple slave volumes on Slave, you can repeat step 2. for each of them and add the following options to the <filename>volfile</filename>:</para>
- <para><screen>option mountbroker-geo-replication.geoaccount2 slavevol2
-option mountbroker-geo-replication.geoaccount3 slavevol3</screen></para>
- </listitem>
- <listitem>
- <para>Setup Master to access Slave as <filename>geoaccount@Slave</filename>.</para>
- <para>You can add multiple slave volumes within the same account (geoaccount) by providing comma-separated list (without spaces) as the argument of <command>mountbroker-geo-replication.geogroup</command>. You can also have multiple options of the form <command>mountbroker-geo-replication.*</command>. It is recommended to use one service account per Master machine. For example, if there are multiple slave volumes on Slave for the master machines Master1, Master2, and Master3, then create a dedicated service user on Slave for them by repeating Step 2. for each (like geogroup1, geogroup2, and geogroup3), and then add the following corresponding options to the volfile:
-</para>
- <para><command>option mountbroker-geo-replication.geoaccount1 slavevol11,slavevol12,slavevol13</command></para>
- <para><command>option mountbroker-geo-replication.geoaccount2 slavevol21,slavevol22</command></para>
- <para><command>option mountbroker-geo-replication.geoaccount3 slavevol31</command></para>
- <para>
-Now set up Master1 to ssh to geoaccount1@Slave, etc.
-</para>
- <para>You must restart glusterd after making changes in the configuration to effect the updates. </para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Using IP based Access Control</title>
- <para>You can use IP based access control method to provide access control for
-the slave resources using IP address. You can use method for both Slave
-and file tree slaves, but in the section, we are focusing on file tree
-slaves using this method.</para>
- <para>To set access control based on IP address for file tree slaves:</para>
- <orderedlist>
- <listitem>
- <para>Set a general restriction for accessibility of file tree resources:
-</para>
- <para><command># gluster volume geo-replication &apos;/*&apos; config allow-network ::1,127.0.0.1 </command></para>
- <para>This will refuse all requests for spawning slave agents except for
-requests initiated locally.</para>
- </listitem>
- <listitem>
- <para>If you want the to lease file tree at <filename>/data/slave-tree</filename> to Master, enter the following command:</para>
- <para><command># gluster volume geo-replication<varname> /data/slave-tree </varname>config allow-network <varname>MasterIP</varname></command></para>
- <para><varname>MasterIP</varname> is the IP address of Master. The slave agent spawn request from
-master will be accepted if it is executed at <filename>/data/slave-tree</filename>.</para>
- </listitem>
- </orderedlist>
- <para>If the Master side network configuration does not enable the Slave to
-recognize the exact IP address of Master, you can use CIDR notation to
-specify a subnet instead of a single IP address as MasterIP or even
-comma-separated lists of CIDR subnets.</para>
- <para>If you want to extend IP based access control to gluster slaves, use the following command:</para>
- <para><command># gluster volume geo-replication &apos;*&apos; config allow-network ::1,127.0.0.1</command></para>
- </section>
- </section>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Starting">
- <title>Starting Geo-replication</title>
- <para>This section describes how to configure and start Gluster Geo-replication in your storage environment, and verify that it is functioning correctly. </para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Start"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Verify"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Display"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Configure"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Stop"/></para>
- </listitem>
- </itemizedlist>
- <section id="chap-Administration_Guide-Geo_Rep-Starting-Start">
- <title>Starting Geo-replication</title>
- <para>To start Gluster Geo-replication </para>
- <itemizedlist>
- <listitem>
- <para>Start geo-replication between the hosts using the following command:
- </para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> start</command>
-</para>
- <para>For example:
-</para>
- <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir start
-Starting geo-replication session between Volume1
-example.com:/data/remote_dir has been successful</programlisting></para>
- <para><note>
- <para>You may need to configure the service before starting Gluster Geo-replication. For more information, see <xref linkend="chap-Administration_Guide-Geo_Rep-Starting-Configure"/>.</para>
- </note></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Starting-Verify">
- <title>Verifying Successful Deployment</title>
- <para>You can use the gluster command to verify the status of Gluster Geo-replication in your environment.</para>
- <para><emphasis role="bold">To verify the status Gluster Geo-replication</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Verify the status by issuing the following command on host:</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> status</command>
-</para>
- <para>For example:
-</para>
- <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir status</command>
-</para>
- <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir status
-
-MASTER SLAVE STATUS
-______ ______________________________ ____________
-Volume1 root@example.com:/data/remote_dir Starting....</programlisting>
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Starting-Display">
- <title>Displaying Geo-replication Status Information</title>
- <para>You can display status information about a specific geo-replication master session, or a particular master-slave session, or all geo-replication sessions, as needed.</para>
- <para><emphasis role="bold">To display geo-replication status information</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Display information of all geo-replication sessions using the following command:</para>
- <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir status
-
-MASTER SLAVE STATUS
-______ ______________________________ ____________
-Volume1 root@example.com:/data/remote_dir Starting....</programlisting></para>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>Display information of a particular master slave session using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> status</command>
-</para>
- <para>For example, to display information of Volume1 and example.com:/data/remote_dir
-</para>
- <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir status</command>
-</para>
- <para>The status of the geo-replication between Volume1 and example.com:/data/remote_dir is displayed.</para>
- </listitem>
- <listitem>
- <para>Display information of all geo-replication sessions belonging to a master</para>
- <para><command># gluster volume geo-replication MASTER status</command>
-</para>
- <para>For example, to display information of Volume1</para>
- <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir status
-
-MASTER SLAVE STATUS
-______ ______________________________ ____________
-Volume1 ssh://example.com:gluster://127.0.0.1:remove_volume OK
-
-Volume1 ssh://example.com:file:///data/remote_dir OK</programlisting></para>
- <para>The status of a session could be one of the following four:</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Starting</emphasis>: This is the initial phase of the Geo-replication session; it remains in this state for a minute, to make sure no abnormalities are present.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">OK</emphasis>: The geo-replication session is in a stable state.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Faulty</emphasis>: The geo-replication session has witnessed some abnormality and the situation has to be investigated further. For further information, see <xref linkend="chap-Administration_Guide-Troubleshooting"/> section.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">Corrupt</emphasis>: The monitor thread which is monitoring the geo-replication session has died. This situation should not occur normally, if it persists contact Red Hat Support<ulink url="www.redhat.com/support/"/>.</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Starting-Configure">
- <title>Configuring Geo-replication</title>
- <para>To configure Gluster Geo-replication </para>
- <itemizedlist>
- <listitem>
- <para>Use the following command at the Gluster command line:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> config [options]</command>
-</para>
- <para>For more information about the options, see <xref linkend="chap-Administration_Guide-Com_Ref"/>.
-</para>
- <para>For example:
-</para>
- <para>To view list of all option/value pair, use the following command:
-</para>
- <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir config</command>
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Starting-Stop">
- <title>Stopping Geo-replication</title>
- <para>You can use the gluster command to stop Gluster Geo-replication (syncing of data from Master to Slave) in your environment. </para>
- <para><emphasis role="bold">To stop Gluster Geo-replication</emphasis> </para>
- <itemizedlist>
- <listitem>
- <para>Stop geo-replication between the hosts using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> stop </command></para>
- <para>For example:
-</para>
- <para><programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir stop
-Stopping geo-replication session between Volume1 and
-example.com:/data/remote_dir has been successful</programlisting></para>
- <para>See <xref linkend="chap-Administration_Guide-Com_Ref"/> for more information about the gluster command.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Restoring_Data">
- <title>Restoring Data from the Slave</title>
- <para>You can restore data from the slave to the master volume, whenever the master volume becomes faulty for reasons like hardware failure.
-</para>
- <para>The example in this section assumes that you are using the Master Volume (Volume1) with the following configuration:
-</para>
- <para><programlisting>machine1# gluster volume info
-Type: Distribute
-Status: Started
-Number of Bricks: 2
-Transport-type: tcp
-Bricks:
-Brick1: machine1:/export/dir16
-Brick2: machine2:/export/dir16
-Options Reconfigured:
-geo-replication.indexing: on</programlisting></para>
- <para>The data is syncing from master volume (Volume1) to slave directory (example.com:/data/remote_dir). To view the status of this geo-replication session run the following command on Master: </para>
- <programlisting># gluster volume geo-replication Volume1 root@example.com:/data/remote_dir status
-
-MASTER SLAVE STATUS
-______ ______________________________ ____________
-Volume1 root@example.com:/data/remote_dir OK</programlisting>
- <para><emphasis role="bold">Before Failure</emphasis>
-</para>
- <para>Assume that the Master volume had 100 files and was mounted at /mnt/gluster on one of the client machines (client). Run the following command on Client machine to view the list of files:
-</para>
- <para><programlisting>client# ls /mnt/gluster | wc –l
-100</programlisting></para>
- <para>The slave directory (example.com) will have same data as in the master volume and same can be viewed by running the following command on slave:
-</para>
- <para><programlisting>example.com# ls /data/remote_dir/ | wc –l
-100</programlisting></para>
- <para><emphasis role="bold">After Failure</emphasis>
-</para>
- <para>If one of the bricks (machine2) fails, then the status of Geo-replication session is changed from &quot;OK&quot; to &quot;Faulty&quot;. To view the status of this geo-replication session run the following command on Master:
-</para>
- <programlisting># gluster volume geo-replication Volume1 root@example.com:/data/remote_dir status
-
-MASTER SLAVE STATUS
-______ ______________________________ ____________
-Volume1 root@example.com:/data/remote_dir Faulty</programlisting>
- <para>Machine2 is failed and now you can see discrepancy in number of files between master and slave. Few files will be missing from the master volume but they will be available only on slave as shown below.
-</para>
- <para>Run the following command on Client:
- </para>
- <para><programlisting>client # ls /mnt/gluster | wc –l
-52</programlisting></para>
- <para>Run the following command on slave (example.com):
-</para>
- <para><programlisting>Example.com# # ls /data/remote_dir/ | wc –l
-100</programlisting></para>
- <para><emphasis role="bold">To restore data from the slave machine</emphasis></para>
- <orderedlist>
- <listitem>
- <para>Stop all Master&apos;s geo-replication sessions using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> stop</command>
-</para>
- <para>For example:
-</para>
- <para><programlisting>machine1# gluster volume geo-replication Volume1
-example.com:/data/remote_dir stop
-
-Stopping geo-replication session between Volume1 &amp;
-example.com:/data/remote_dir has been successful</programlisting></para>
- <para><note>
- <para>Repeat <command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> stop </command>command on all active geo-replication sessions of master volume.</para>
- </note></para>
- </listitem>
- <listitem>
- <para>Replace the faulty brick in the master by using the following command:
-</para>
- <para><command># gluster volume replace-brick <replaceable>VOLNAME BRICK NEW-BRICK</replaceable> start</command>
-</para>
- <para>For example:
-</para>
- <para><programlisting>machine1# gluster volume replace-brick Volume1 machine2:/export/dir16 machine3:/export/dir16 start
-Replace-brick started successfully</programlisting></para>
- </listitem>
- <listitem>
- <para>Commit the migration of data using the following command:
-</para>
- <para><command># gluster volume replace-brick <replaceable>VOLNAME BRICK NEW-BRICK</replaceable> commit force </command></para>
- <para>For example:
-</para>
- <para><programlisting>machine1# gluster volume replace-brick Volume1 machine2:/export/dir16 machine3:/export/dir16 commit force
-Replace-brick commit successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Verify the migration of brick by viewing the volume info using the following command:
-</para>
- <para><command># gluster volume info <replaceable>VOLNAME</replaceable></command></para>
- <para>For example:
-</para>
- <para><programlisting>machine1# gluster volume info
-Volume Name: Volume1
-Type: Distribute
-Status: Started
-Number of Bricks: 2
-Transport-type: tcp
-Bricks:
-Brick1: machine1:/export/dir16
-Brick2: machine3:/export/dir16
-Options Reconfigured:
-geo-replication.indexing: on</programlisting></para>
- </listitem>
- <listitem>
- <para>Run rsync command manually to sync data from slave to master volume&apos;s client (mount point).
-</para>
- <para>For example:
-</para>
- <para><command>example.com# rsync -PavhS --xattrs --ignore-existing /data/remote_dir/ client:/mnt/gluster</command></para>
- <para>Verify that the data is synced by using the following command:
-</para>
- <para>On master volume, run the following command:
-</para>
- <para><programlisting>Client # ls | wc –l
-100</programlisting></para>
- <para>On the Slave run the following command:
-</para>
- <para><programlisting>example.com# ls /data/remote_dir/ | wc –l
-100</programlisting></para>
- <para>Now Master volume and Slave directory is synced.
-</para>
- </listitem>
- <listitem>
- <para>Restart geo-replication session from master to slave using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> start </command></para>
- <para>For example:
-</para>
- <para><programlisting>machine1# gluster volume geo-replication Volume1
-example.com:/data/remote_dir start
-Starting geo-replication session between Volume1 &amp;
-example.com:/data/remote_dir has been successful</programlisting></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="chap-Administration_Guide-Geo_Rep-Best_Practices">
- <title>Best Practices</title>
- <para><emphasis role="bold">Manually Setting Time </emphasis></para>
- <para>If you have to change the time on your bricks manually, then you must set uniform time on all bricks. This avoids the out-of-time sync issue described in <xref linkend="chap-Administration_Guide-Geo_Rep-Preparation-Settingup_Environment"/>. Setting time backward corrupts the geo-replication index, so the recommended way to set the time manually is:
-</para>
- <orderedlist>
- <listitem>
- <para>Stop geo-replication between the master and slave using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE</replaceable> sto</command>p
-</para>
- </listitem>
- <listitem>
- <para>Stop the geo-replication indexing using the following command:
-</para>
- <para><command># gluster volume set <replaceable>MASTER</replaceable> geo-replication.indexing of</command>f</para>
- </listitem>
- <listitem>
- <para>Set uniform time on
- all bricks.s</para>
- </listitem>
- <listitem>
- <para>Restart your geo-replication sessions by using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>MASTER SLAVE </replaceable>start </command></para>
- </listitem>
- </orderedlist>
- <para><emphasis role="bold">Running Geo-replication commands in one system</emphasis>
-</para>
- <para>It is advisable to run the geo-replication commands in one of the bricks in the trusted storage pool. This is because, the log files for the geo-replication session would be stored in the *Server* where the Geo-replication start is initiated. Hence it would be easier to locate the log-files when required.
-</para>
- <para><emphasis role="bold">Isolation </emphasis></para>
- <para>Geo-replication slave operation is not sandboxed as of now and is ran as a privileged service. So for the security reason, it is advised to create a sandbox environment (dedicated machine / dedicated virtual machine / chroot/container type solution) by the administrator to run the geo-replication slave in it. Enhancement in this regard will be available in follow-up minor release.
-</para>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_managing_volumes.xml b/doc/admin-guide/en-US/admin_managing_volumes.xml
deleted file mode 100644
index 0c4d2e922cf..00000000000
--- a/doc/admin-guide/en-US/admin_managing_volumes.xml
+++ /dev/null
@@ -1,735 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-Managing_Volumes">
- <title>Managing GlusterFS Volumes</title>
- <para>This section describes how to perform common GlusterFS management operations, including the following: </para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Expanding"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Shrinking"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Migrating"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Stop"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Delete"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Managing_Volumes-Self_heal"/></para>
- </listitem>
- </itemizedlist>
- <section id="sect-Administration_Guide-Managing_Volumes-Tuning">
- <title>Tuning Volume Options</title>
- <para>You can tune volume options, as needed, while the cluster is online and available. </para>
- <para><note>
- <para>Red Hat recommends you to set server.allow-insecure option to ON if there are too many bricks in each volume or if there are too many services which have already utilized all the privileged ports in the system. Turning this option ON allows ports to accept/reject messages from insecure ports. So, use this option only if your deployment requires it. </para>
- </note></para>
- <para>To tune volume options </para>
- <itemizedlist>
- <listitem>
- <para>Tune volume options using the following command:</para>
- <para><command># gluster volume set <replaceable>VOLNAME OPTION PARAMETER</replaceable></command></para>
- <para>For example, to specify the performance cache size for test-volume:</para>
- <para><programlisting># gluster volume set test-volume performance.cache-size 256MB
-Set volume successful</programlisting></para>
- <para>The following table lists the Volume options along with its description and default value: </para>
- <para><note>
- <para>The default options given here are subject to modification at any given time and may not be the same for all versions.</para>
- </note></para>
- <informaltable frame="all">
- <tgroup cols="4">
- <colspec colname="c1"/>
- <colspec colname="c2"/>
- <colspec colname="c3"/>
- <colspec colname="c4"/>
- <thead>
- <row>
- <entry>Option</entry>
- <entry>Description</entry>
- <entry>Default Value</entry>
- <entry>Available Options</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>auth.allow</entry>
- <entry>IP addresses of the clients which should be allowed to access the volume. </entry>
- <entry>* (allow all)</entry>
- <entry>Valid IP address which includes wild card patterns including *, such as 192.168.1.*</entry>
- </row>
- <row>
- <entry>auth.reject</entry>
- <entry>IP addresses of the clients which should be denied to access the volume. </entry>
- <entry>NONE (reject none) </entry>
- <entry>Valid IP address which includes wild card patterns including *, such as 192.168.2.*</entry>
- </row>
- <row>
- <entry>client.grace-timeout</entry>
- <entry>Specifies the duration for the lock state to be maintained on the client after a network disconnection.</entry>
- <entry>10 </entry>
- <entry>10 - 1800 secs</entry>
- </row>
- <row>
- <entry>cluster.self-heal-window-size</entry>
- <entry>Specifies the maximum number of blocks per file on which self-heal would happen simultaneously. </entry>
- <entry>16 </entry>
- <entry>0 - 1025 blocks</entry>
- </row>
- <row>
- <entry>cluster.data-self-heal-algorithm</entry>
- <entry>Specifies the type of self-heal. If you set the option as &quot;full&quot;, the entire file is copied from source to destinations. If the option is set to &quot;diff&quot; the file blocks that are not in sync are copied to destinations. Reset uses a heuristic model. If the file does not exist on one of the subvolumes, or a zero-byte file exists (created by entry self-heal) the entire content has to be copied anyway, so there is no benefit from using the &quot;diff&quot; algorithm. If the file size is about the same as page size, the entire file can be read and written with a few operations, which will be faster than &quot;diff&quot; which has to read checksums and then read and write. </entry>
- <entry>reset</entry>
- <entry>full | diff | reset</entry>
- </row>
- <row>
- <entry>cluster.min-free-disk</entry>
- <entry>Specifies the percentage of disk space that must be kept free. Might be useful for non-uniform bricks. </entry>
- <entry>10%</entry>
- <entry>Percentage of required minimum free disk space</entry>
- </row>
- <row>
- <entry>cluster.stripe-block-size</entry>
- <entry>Specifies the size of the stripe unit that will be read from or written to. </entry>
- <entry>128 KB (for all files)</entry>
- <entry>size in bytes</entry>
- </row>
- <row>
- <entry>cluster.self-heal-daemon</entry>
- <entry>Allows you to turn-off proactive self-heal on replicated volumes.</entry>
- <entry>on</entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>diagnostics.brick-log-level</entry>
- <entry>Changes the log-level of the bricks. </entry>
- <entry>INFO </entry>
- <entry>DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE</entry>
- </row>
- <row>
- <entry>diagnostics.client-log-level</entry>
- <entry>Changes the log-level of the clients. </entry>
- <entry>INFO </entry>
- <entry>DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE</entry>
- </row>
- <row>
- <entry>diagnostics.latency-measurement</entry>
- <entry>Statistics related to the latency of each operation would be tracked. </entry>
- <entry>off </entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>diagnostics.dump-fd-stats</entry>
- <entry>Statistics related to file-operations would be tracked.</entry>
- <entry>off </entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>feature.read-only</entry>
- <entry>Enables you to mount the entire volume as read-only for all the clients (including NFS clients) accessing it.</entry>
- <entry>off</entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>features.lock-heal</entry>
- <entry>Enables self-healing of locks when the network disconnects.</entry>
- <entry>on</entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>features.quota-timeout</entry>
- <entry>For performance reasons, quota caches the directory sizes on client. You can set timeout indicating the maximum duration of directory sizes in cache, from the time they are populated, during which they are considered valid. </entry>
- <entry>0</entry>
- <entry>0 - 3600 secs</entry>
- </row>
- <row>
- <entry>geo-replication.indexing</entry>
- <entry>Use this option to automatically sync the changes in the filesystem from Master to Slave.</entry>
- <entry>off </entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>network.frame-timeout</entry>
- <entry>The time frame after which the operation has to be declared as dead, if the server does not respond for a particular operation. </entry>
- <entry>1800 (30 mins) </entry>
- <entry>1800 secs</entry>
- </row>
- <row>
- <entry>network.ping-timeout</entry>
- <entry>The time duration for which the client waits to check if the server is responsive. When a ping timeout happens, there is a network disconnect between the client and server. All resources held by server on behalf of the client get cleaned up. When a reconnection happens, all resources will need to be re-acquired before the client can resume its operations on the server. Additionally, the locks will be acquired and the lock tables updated. <para>This reconnect is a very expensive operation and should be avoided.
-</para></entry>
- <entry>42 Secs</entry>
- <entry>42 Secs</entry>
- </row>
- <row>
- <entry>nfs.enable-ino32</entry>
- <entry>For 32-bit nfs clients or applications that do not support 64-bit inode numbers or large files, use this option from the CLI to make Gluster NFS return 32-bit inode numbers instead of 64-bit inode numbers. Applications that will benefit are those that were either: <para>* Built 32-bit and run on 32-bit machines.</para><para>* Built 32-bit on 64-bit systems.</para><para>* Built 64-bit but use a library built 32-bit, especially relevant for python and perl scripts.</para><para>Either of the conditions above can lead to application on Linux NFS clients failing with &quot;Invalid argument&quot; or &quot;Value too large for defined data type&quot; errors.</para></entry>
- <entry>off</entry>
- <entry>On | Off</entry>
- </row>
- <row>
- <entry>nfs.volume-access </entry>
- <entry>Set the access type for the specified sub-volume. </entry>
- <entry>read-write </entry>
- <entry>read-write|read-only </entry>
- </row>
- <row>
- <entry>nfs.trusted-write </entry>
- <entry>If there is an UNSTABLE write from the client, STABLE flag will be returned to force the client to not send a COMMIT request. <para>In some environments, combined with a replicated GlusterFS setup, this option can improve write performance. This flag allows users to trust Gluster replication logic to sync data to the disks and recover when required. COMMIT requests if received will be handled in a default manner by fsyncing. STABLE writes are still handled in a sync manner.</para></entry>
- <entry> off </entry>
- <entry>On | Off </entry>
- </row>
- <row>
- <entry>nfs.trusted-sync</entry>
- <entry> All writes and COMMIT requests are treated as async. This implies that no write requests are guaranteed to be on server disks when the write reply is received at the NFS client. Trusted sync includes trusted-write behavior. </entry>
- <entry>off </entry>
- <entry>On | Off </entry>
- </row>
- <row>
- <entry>nfs.export-dir </entry>
- <entry>By default, all sub-volumes of NFS are exported as individual exports. Now, this option allows you to export only the specified subdirectory or subdirectories in the volume. This option can also be used in conjunction with nfs3.export-volumes option to restrict exports only to the subdirectories specified through this option. You must provide an absolute path.</entry>
- <entry>Enabled for all sub directories.</entry>
- <entry>Enable | Disable </entry>
- </row>
- <row>
- <entry>nfs.export-volumes </entry>
- <entry>Enable/Disable exporting entire volumes, instead if used in conjunction with nfs3.export-dir, can allow setting up only subdirectories as exports. </entry>
- <entry>on</entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.rpc-auth-unix </entry>
- <entry>Enable/Disable the AUTH_UNIX authentication type. This option is enabled by default for better interoperability. However, you can disable it if required.</entry>
- <entry>on </entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.rpc-auth-null </entry>
- <entry>Enable/Disable the AUTH_NULL authentication type. It is not recommended to change the default value for this option. </entry>
- <entry>on </entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.rpc-auth-allow&lt;IP- Addresses&gt; </entry>
- <entry>Allow a comma separated list of addresses and/or hostnames to connect to the server. By default, all clients are disallowed. This allows you to define a general rule for all exported volumes.</entry>
- <entry>Reject All </entry>
- <entry>IP address or Host name </entry>
- </row>
- <row>
- <entry>nfs.rpc-auth-reject IP- Addresses </entry>
- <entry>Reject a comma separated list of addresses and/or hostnames from connecting to the server. By default, all connections are disallowed. This allows you to define a general rule for all exported volumes.</entry>
- <entry>Reject All </entry>
- <entry>IP address or Host name </entry>
- </row>
- <row>
- <entry>nfs.ports-insecure </entry>
- <entry>Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. </entry>
- <entry>off</entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.addr-namelookup </entry>
- <entry>Turn-off name lookup for incoming client connections using this option. In some setups, the name server can take too long to reply to DNS queries resulting in timeouts of mount requests. Use this option to turn off name lookups during address authentication. Note, turning this off will prevent you from using hostnames in rpc-auth.addr.* filters. </entry>
- <entry>on </entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.register-with- portmap </entry>
- <entry>For systems that need to run multiple NFS servers, you need to prevent more than one from registering with portmap service. Use this option to turn off portmap registration for Gluster NFS. </entry>
- <entry>on </entry>
- <entry> On | Off </entry>
- </row>
- <row>
- <entry>nfs.port &lt;PORT- NUMBER&gt; </entry>
- <entry>Use this option on systems that need Gluster NFS to be associated with a non-default port number. </entry>
- <entry>38465- 38467 </entry>
- <entry/>
- </row>
- <row>
- <entry>nfs.disable</entry>
- <entry>Turn-off volume being exported by NFS</entry>
- <entry> off </entry>
- <entry>On | Off </entry>
- </row>
- <row>
- <entry>performance.write-behind-window-size </entry>
- <entry>Size of the per-file write-behind buffer.</entry>
- <entry>1 MB </entry>
- <entry>Write-behind cache size </entry>
- </row>
- <row>
- <entry>performance.io-thread-count </entry>
- <entry>The number of threads in IO threads translator. </entry>
- <entry>16</entry>
- <entry>0 - 65 </entry>
- </row>
- <row>
- <entry>performance.flush-behind </entry>
- <entry>If this option is set ON, instructs write-behind translator to perform flush in background, by returning success (or any errors, if any of previous writes were failed) to application even before flush is sent to backend filesystem. </entry>
- <entry>On </entry>
- <entry>On | Off </entry>
- </row>
- <row>
- <entry>performance.cache-max-file-size </entry>
- <entry>Sets the maximum file size cached by the io-cache translator. Can use the normal size descriptors of KB, MB, GB,TB or PB (for example, 6GB). Maximum size uint64. </entry>
- <entry>2 ^ 64 -1 bytes </entry>
- <entry>size in bytes </entry>
- </row>
- <row>
- <entry>performance.cache-min-file-size </entry>
- <entry> Sets the minimum file size cached by the io-cache translator. Values same as &quot;max&quot; above.</entry>
- <entry>0B</entry>
- <entry>size in bytes </entry>
- </row>
- <row>
- <entry>performance.cache-refresh-timeout </entry>
- <entry>The cached data for a file will be retained till &apos;cache-refresh-timeout&apos; seconds, after which data re-validation is performed. </entry>
- <entry>1 sec </entry>
- <entry>0 - 61 </entry>
- </row>
- <row>
- <entry>performance.cache-size </entry>
- <entry>Size of the read cache.</entry>
- <entry> 32 MB </entry>
- <entry>size in bytes </entry>
- </row>
- <row>
- <entry>server.allow-insecure </entry>
- <entry>Allow client connections from unprivileged ports. By default only privileged ports are allowed. This is a global setting in case insecure ports are to be enabled for all exports using a single option. </entry>
- <entry>on </entry>
- <entry>On | Off </entry>
- </row>
- <row>
- <entry>server.grace-timeout</entry>
- <entry>Specifies the duration for the lock state to be maintained on the server after a network disconnection.</entry>
- <entry>10</entry>
- <entry>10 - 1800 secs</entry>
- </row>
- <row>
- <entry>server.statedump-path </entry>
- <entry>Location of the state dump file. </entry>
- <entry>/tmp directory of the brick </entry>
- <entry>New directory path</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
- <para>You can view the changed volume options using the<command> # gluster volume info <replaceable>VOLNAME</replaceable></command> command. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Delete"/>.</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Expanding">
- <title>Expanding Volumes</title>
- <para>You can expand volumes, as needed, while the cluster is online and available. For example, you might want to add a brick to a distributed volume, thereby increasing the distribution and adding to the capacity of the GlusterFS volume. </para>
- <para>Similarly, you might want to add a group of bricks to a distributed replicated volume, increasing the capacity of the GlusterFS volume. </para>
- <para><note>
- <para>When expanding distributed replicated and distributed striped volumes, you need to add a number of bricks that is a multiple of the replica or stripe count. For example, to expand a distributed replicated volume with a replica count of 2, you need to add bricks in multiples of 2 (such as 4, 6, 8, etc.). </para>
- </note></para>
- <para><emphasis role="bold">To expand a volume</emphasis> </para>
- <orderedlist>
- <listitem>
- <para>On the first server in the cluster, probe the server to which you want to add the new brick using the following command:</para>
- <para><command># gluster peer probe <replaceable>HOSTNAME</replaceable></command></para>
- <para>For example:</para>
- <para><programlisting># gluster peer probe server4
-Probe successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Add the brick using the following command: </para>
- <para><command># gluster volume add-brick <replaceable>VOLNAME NEW-BRICK</replaceable></command></para>
- <para>For example:</para>
- <para><programlisting># gluster volume add-brick test-volume server4:/exp4
-Add Brick successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Check the volume information using the following command: </para>
- <para><command># gluster volume info </command></para>
- <para>The command displays information similar to the following:</para>
- <para><programlisting>Volume Name: test-volume
-Type: Distribute
-Status: Started
-Number of Bricks: 4
-Bricks:
-Brick1: server1:/exp1
-Brick2: server2:/exp2
-Brick3: server3:/exp3
-Brick4: server4:/exp4</programlisting></para>
- </listitem>
- <listitem>
- <para>Rebalance the volume to ensure that all files are distributed to the new brick.</para>
- <para>You can use the rebalance command as described in <xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing"/>.</para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Shrinking">
- <title>Shrinking Volumes</title>
- <para>You can shrink volumes, as needed, while the cluster is online and available. For example, you might need to remove a brick that has become inaccessible in a distributed volume due to hardware or network failure. </para>
- <para><note>
- <para>Data residing on the brick that you are removing will no longer be accessible at the Gluster mount point. Note however that only the configuration information is removed - you can continue to access the data directly from the brick, as necessary. </para>
- </note></para>
- <para>When shrinking distributed replicated and distributed striped volumes, you need to remove a number of bricks that is a multiple of the replica or stripe count. For example, to shrink a distributed striped volume with a stripe count of 2, you need to remove bricks in multiples of 2 (such as 4, 6, 8, etc.). In addition, the bricks you are trying to remove must be from the same sub-volume (the same replica or stripe set). </para>
- <para><emphasis role="bold">To shrink a volume</emphasis> </para>
- <orderedlist>
- <listitem>
- <para>Remove the brick using the following command:</para>
- <para><command># gluster volume remove-brick <varname>VOLNAME</varname><replaceable> BRICK</replaceable></command> <command>start</command></para>
- <para>For example, to remove server2:/exp2:</para>
- <para><programlisting># gluster volume remove-brick test-volume server2:/exp2
-
-Removing brick(s) can result in data loss. Do you want to Continue? (y/n)</programlisting></para>
- </listitem>
- <listitem>
- <para>Enter &quot;y&quot; to confirm the operation. The command displays the following message indicating that the remove brick operation is successfully started: </para>
- <para><programlisting>Remove Brick successful </programlisting></para>
- </listitem>
- <listitem>
- <para>(Optional) View the status of the remove brick operation using the following command:</para>
- <para><command># gluster volume remove-brick <varname>VOLNAME</varname><replaceable> BRICK</replaceable></command><command> status</command></para>
- <para>For example, to view the status of remove brick operation on server2:/exp2 brick:</para>
- <para><screen># gluster volume remove-brick test-volume server2:/exp2 status
- Node Rebalanced-files size scanned status
- --------- ---------------- ---- ------- -----------
-617c923e-6450-4065-8e33-865e28d9428f 34 340 162 in progress</screen></para>
- </listitem>
- <listitem>
- <para>Check the volume information using the following command: </para>
- <para><command># gluster volume info </command></para>
- <para>The command displays information similar to the following:</para>
- <para><programlisting># gluster volume info
-Volume Name: test-volume
-Type: Distribute
-Status: Started
-Number of Bricks: 3
-Bricks:
-Brick1: server1:/exp1
-Brick3: server3:/exp3
-Brick4: server4:/exp4</programlisting></para>
- </listitem>
- <listitem>
- <para>Rebalance the volume to ensure that all files are distributed to the new brick.</para>
- <para>You can use the rebalance command as described in <xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing"/>.</para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Migrating">
- <title>Migrating Volumes</title>
- <para>You can migrate the data from one brick to another, as needed, while the cluster is online and available. </para>
- <para><emphasis role="bold">To migrate a volume</emphasis> </para>
- <orderedlist>
- <listitem>
- <para>Make sure the new brick, server5 in this example, is successfully added to the cluster.</para>
- <para>For more information, see <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Migrate the data from one brick to another using the following command:</para>
- <para><command> # gluster volume replace-brick <code>VOLNAME</code><code> BRICK</code><code>NEW-BRICK</code> start</command></para>
- <para>For example, to migrate the data in server3:/exp3 to server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:exp5 start
-Replace brick start operation successful</programlisting></para>
- <para><note>
- <para>You need to have the FUSE package installed on the server on which you are running the replace-brick command for the command to work.</para>
- </note></para>
- </listitem>
- <listitem>
- <para>To pause the migration operation, if needed, use the following command: </para>
- <para><command># gluster volume replace-brick <varname>VOLNAME BRICK NEW-BRICK </varname> pause </command></para>
- <para>For example, to pause the data migration from server3:/exp3 to server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:exp5 pause
-Replace brick pause operation successful</programlisting></para>
- </listitem>
- <listitem>
- <para>To abort the migration operation, if needed, use the following command: </para>
- <para><command> # gluster volume replace-brick <varname>VOLNAME BRICK NEW-BRICK </varname>abort </command></para>
- <para>For example, to abort the data migration from server3:/exp3 to server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:exp5 abort
-Replace brick abort operation successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Check the status of the migration operation using the following command: </para>
- <para><command> # gluster volume replace-brick <varname>VOLNAME BRICK NEW-BRICK </varname>status </command></para>
- <para>For example, to check the data migration status from server3:/exp3 to server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:/exp5 status
-Current File = /usr/src/linux-headers-2.6.31-14/block/Makefile
-Number of files migrated = 10567
-Migration complete</programlisting></para>
- <para>The status command shows the current file being migrated along with the current total number of files migrated. After completion of migration, it displays Migration complete.</para>
- </listitem>
- <listitem>
- <para>Commit the migration of data from one brick to another using the following command: </para>
- <para><command> # gluster volume replace-brick <varname>VOLNAME BRICK NEW-BRICK </varname>commit </command></para>
- <para>For example, to commit the data migration from server3:/exp3 to server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume replace-brick test-volume server3:/exp3 server5:/exp5 commit
-replace-brick commit successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Verify the migration of brick by viewing the volume info using the following command: </para>
- <para><command># gluster volume info <code>VOLNAME</code></command></para>
- <para>For example, to check the volume information of new brick server5:/exp5 in test-volume:</para>
- <para><programlisting># gluster volume info test-volume
-Volume Name: testvolume
-Type: Replicate
-Status: Started
-Number of Bricks: 4
-Transport-type: tcp
-Bricks:
-Brick1: server1:/exp1
-Brick2: server2:/exp2
-Brick3: server4:/exp4
-Brick4: server5:/exp5
-
-The new volume details are displayed.
-</programlisting></para>
- <para>The new volume details are displayed.</para>
- <para>In the above example, previously, there were bricks; 1,2,3, and 4 and now brick 3 is replaced by brick 5.</para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Rebalancing">
- <title>Rebalancing Volumes</title>
- <para>After expanding or shrinking a volume (using the add-brick and remove-brick commands respectively), you need to rebalance the data among the servers. New directories created after expanding or shrinking of the volume will be evenly distributed automatically. For all the existing directories, the distribution can be fixed by rebalancing the layout and/or data. </para>
- <para>This section describes how to rebalance GlusterFS volumes in your storage environment, using the following common scenarios: </para>
- <itemizedlist>
- <listitem>
- <para>Fix Layout - Fixes the layout changes so that the files can actually go to newly added nodes. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing-Fix_Layout"/>. </para>
- </listitem>
- <listitem>
- <para>Fix Layout and Migrate Data - Rebalances volume by fixing the layout changes and migrating the existing data. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Rebalancing-Fix_Migrate"/>.</para>
- </listitem>
- </itemizedlist>
- <section id="sect-Administration_Guide-Managing_Volumes-Rebalancing-Fix_Layout">
- <title>Rebalancing Volume to Fix Layout Changes</title>
- <para>Fixing the layout is necessary because the layout structure is static for a given directory. In a scenario where new bricks have been added to the existing volume, newly created files in existing directories will still be distributed only among the old bricks. The <command># gluster volume rebalance<varname> VOLNAME</varname> fix-layout start </command>command will fix the layout information so that the files can also go to newly added nodes. When this command is issued, all the file stat information which is already cached will get revalidated. </para>
- <para>A fix-layout rebalance will only fix the layout changes and does not migrate data. If you want to migrate the existing data, use<command># gluster volume rebalance <varname>VOLNAME</varname> start </command> command to rebalance data among the servers. </para>
- <para><emphasis role="bold">To rebalance a volume to fix layout changes</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Start the rebalance operation on any one of the server using the following command:</para>
- <para><command># gluster volume rebalance<varname> VOLNAME</varname> fix-layout start</command></para>
- <para>For example:</para>
- <para><programlisting># gluster volume rebalance test-volume fix-layout start
-Starting rebalance on volume test-volume has been successful</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Rebalancing-Fix_Migrate">
- <title>Rebalancing Volume to Fix Layout and Migrate Data</title>
- <para>After expanding or shrinking a volume (using the add-brick and remove-brick commands respectively), you need to rebalance the data among the servers. </para>
- <para><emphasis role="bold">To rebalance a volume to fix layout and migrate the existing data</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Start the rebalance operation on any one of the server using the following command:</para>
- <para><command># gluster volume rebalance<varname> VOLNAME</varname> start</command></para>
- <para>For example:</para>
- <para><programlisting># gluster volume rebalance test-volume start
-Starting rebalancing on volume test-volume has been successful</programlisting></para>
- </listitem>
- <listitem>
- <para>Start the migration operation forcefully on any one of the server using the following command:</para>
- <para><command># gluster volume rebalance<varname> VOLNAME</varname> start force</command></para>
- <para>For example:</para>
- <para><programlisting># gluster volume rebalance test-volume start force
-Starting rebalancing on volume test-volume has been successful</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Displaying Status of Rebalance Operation</title>
- <para>You can display the status information about rebalance volume operation, as needed. </para>
- <para><emphasis role="bold">To view status of rebalance volume</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Check the status of the rebalance operation, using the following command:</para>
- <para><command># gluster volume rebalance <replaceable>VOLNAME</replaceable> status</command></para>
- <para>For example:</para>
- <para><screen># gluster volume rebalance test-volume status
- Node Rebalanced-files size scanned status
- --------- ---------------- ---- ------- -----------
-617c923e-6450-4065-8e33-865e28d9428f 416 1463 312 in progress</screen></para>
- <para>The time to complete the rebalance operation depends on the number of files on the volume along with the corresponding file sizes. Continue checking the rebalance status, verifying that the number of files rebalanced or total files scanned keeps increasing.</para>
- <para>For example, running the status command again might display a result similar to the following:</para>
- <para><screen># gluster volume rebalance test-volume status
- Node Rebalanced-files size scanned status
- --------- ---------------- ---- ------- -----------
-617c923e-6450-4065-8e33-865e28d9428f 498 1783 378 in progress</screen></para>
- <para>The rebalance status displays the following when the rebalance is complete:</para>
- <para><screen># gluster volume rebalance test-volume status
- Node Rebalanced-files size scanned status
- --------- ---------------- ---- ------- -----------
-617c923e-6450-4065-8e33-865e28d9428f 502 1873 334 completed</screen></para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Stopping Rebalance Operation</title>
- <para>You can stop the rebalance operation, as needed.</para>
- <para><emphasis role="bold">To stop rebalance</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Stop the rebalance operation using the following command:</para>
- <para><command># gluster volume rebalance <replaceable>VOLNAME</replaceable> stop</command></para>
- <para>For example:</para>
- <para><screen># gluster volume rebalance test-volume stop
- Node Rebalanced-files size scanned status
- --------- ---------------- ---- ------- -----------
-617c923e-6450-4065-8e33-865e28d9428f 59 590 244 stopped
-Stopped rebalance process on volume test-volume </screen></para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Stop">
- <title>Stopping Volumes</title>
- <para>To stop a volume</para>
- <orderedlist>
- <listitem>
- <para>Stop the volume using the following command:
-
-</para>
- <para><command># gluster volume stop <varname>VOLNAME </varname></command></para>
- <para>For example, to stop test-volume:</para>
- <para><programlisting># gluster volume stop test-volume
-Stopping volume will make its data inaccessible. Do you want to continue? (y/n)
-</programlisting></para>
- </listitem>
- <listitem>
- <para>Enter <userinput>y</userinput> to confirm the operation. The output of the command displays the following:
-
-</para>
- <programlisting>Stopping volume test-volume has been successful</programlisting>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Delete">
- <title>Deleting Volumes</title>
- <para>To delete a volume </para>
- <orderedlist>
- <listitem>
- <para>Delete the volume using the following command:</para>
- <para><command># gluster volume delete <varname>VOLNAME</varname></command></para>
- <para>For example, to delete test-volume:</para>
- <para><programlisting># gluster volume delete test-volume
-Deleting volume will erase all information about the volume. Do you want to continue? (y/n)</programlisting></para>
- </listitem>
- <listitem>
- <para>Enter <userinput role="bold">y</userinput> to confirm the operation. The command displays the following:</para>
- <para><programlisting>Deleting volume test-volume has been successful</programlisting></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Managing_Volumes-Self_heal">
- <title>Triggering Self-Heal on Replicate</title>
- <para>In replicate module, previously you had to manually trigger a self-heal when a brick goes offline and comes back online, to bring all the replicas in sync. Now the pro-active self-heal daemon runs in the background, diagnoses issues and automatically initiates self-healing every 10 minutes on the files which requires<emphasis role="italic"> healing</emphasis>. </para>
- <para>You can view the list of files that need <emphasis role="italic">healing</emphasis>, the list of files which are currently/previously <emphasis role="italic">healed</emphasis>, list of files which are in split-brain state, and you can manually trigger self-heal on the entire volume or only on the files which need <emphasis role="italic">healing</emphasis>.</para>
- <itemizedlist>
- <listitem>
- <para>Trigger self-heal only on the files which requires <emphasis role="italic">healing</emphasis>:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command></para>
- <para>For example, to trigger self-heal on files which requires <emphasis role="italic">healing</emphasis> of test-volume:</para>
- <para><screen># gluster volume heal test-volume
-Heal operation on volume test-volume has been successful</screen></para>
- </listitem>
- <listitem>
- <para>Trigger self-heal on all the files of a volume:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>full</command></para>
- <para>For example, to trigger self-heal on all the files of of test-volume:</para>
- <para><screen># gluster volume heal test-volume full
-Heal operation on volume test-volume has been successful</screen></para>
- </listitem>
- <listitem>
- <para>View the list of files that needs <emphasis role="italic">healing</emphasis>:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>info</command></para>
- <para>For example, to view the list of files on test-volume that needs <emphasis role="italic">healing</emphasis>:</para>
- <para><screen># gluster volume heal test-volume info
-Brick <emphasis role="italic">server1</emphasis>:/gfs/test-volume_0
-Number of entries: 0
-
-Brick <emphasis role="italic">server2</emphasis>:/gfs/test-volume_1
-Number of entries: 101
-/95.txt
-/32.txt
-/66.txt
-/35.txt
-/18.txt
-/26.txt
-/47.txt
-/55.txt
-/85.txt
-...</screen></para>
- </listitem>
- <listitem>
- <para>View the list of files that are self-healed:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>info healed</command> </para>
- <para>For example, to view the list of files on test-volume that are self-healed:</para>
- <para><screen># gluster volume heal test-volume info healed
-Brick <emphasis role="italic">server1</emphasis>:/gfs/test-volume_0
-Number of entries: 0
-
-Brick <emphasis role="italic">server2</emphasis>:/gfs/test-volume_1
-Number of entries: 69
-/99.txt
-/93.txt
-/76.txt
-/11.txt
-/27.txt
-/64.txt
-/80.txt
-/19.txt
-/41.txt
-/29.txt
-/37.txt
-/46.txt
-...</screen></para>
- </listitem>
- <listitem>
- <para>View the list of files of a particular volume on which the self-heal failed:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>info failed</command> </para>
- <para>For example, to view the list of files of test-volume that are not self-healed:</para>
- <para><screen># gluster volume heal test-volume info failed
-Brick <emphasis role="italic">server1</emphasis>:/gfs/test-volume_0
-Number of entries: 0
-
-Brick server2:/gfs/test-volume_3
-Number of entries: 72
-/90.txt
-/95.txt
-/77.txt
-/71.txt
-/87.txt
-/24.txt
-...</screen></para>
- </listitem>
- <listitem>
- <para>View the list of files of a particular volume which are in split-brain state:</para>
- <para><command># gluster volume heal <replaceable>VOLNAME</replaceable></command> <command>info split-brain</command> </para>
- <para>For example, to view the list of files of test-volume which are in split-brain state:</para>
- <para><screen># gluster volume heal test-volume info split-brain
-Brick server1:/gfs/test-volume_2
-Number of entries: 12
-/83.txt
-/28.txt
-/69.txt
-...
-
-Brick <emphasis role="italic">server2</emphasis>:/gfs/test-volume_2
-Number of entries: 12
-/83.txt
-/28.txt
-/69.txt
-...</screen></para>
- </listitem>
- </itemizedlist>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_monitoring_workload.xml b/doc/admin-guide/en-US/admin_monitoring_workload.xml
deleted file mode 100644
index e85bc51d896..00000000000
--- a/doc/admin-guide/en-US/admin_monitoring_workload.xml
+++ /dev/null
@@ -1,878 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Monitor_Workload">
- <title>Monitoring your GlusterFS Workload</title>
- <para>You can monitor the GlusterFS volumes on different parameters. Monitoring volumes helps in capacity planning and performance tuning tasks of the GlusterFS volume. Using these information, you can identify and troubleshoot issues. </para>
- <para>You can use Volume Top and Profile commands to view the performance and identify bottlenecks/hotspots of each brick of a volume. This helps system administrators to get vital performance information whenever performance needs to be probed. </para>
- <para>You can also perform statedump of the brick processes and nfs server process of a volume, and also view volume status and volume information. </para>
- <section id="chap-Administration_Guide-Monitor_Workload-Profile">
- <title>Running GlusterFS Volume Profile Command</title>
- <para>GlusterFS Volume Profile command provides an interface to get the per-brick I/O information for each File Operation (FOP) of a volume. The per brick information helps in identifying bottlenecks in the storage system.
-</para>
- <para>This section describes how to run GlusterFS Volume Profile command by performing the following operations:
-</para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Profile-Start"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Profile-Display"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Profile-Stop"/></para>
- </listitem>
- </itemizedlist>
- <section id="chap-Administration_Guide-Monitor_Workload-Profile-Start">
- <title>Start Profiling</title>
- <para>You must start the Profiling to view the File Operation information for each brick.
-</para>
- <para><emphasis role="bold">To start profiling: </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Start profiling using the following command:
- </para>
- </listitem>
- </itemizedlist>
- <para><command># gluster volume profile <replaceable>VOLNAME</replaceable> start </command></para>
- <para>For example, to start profiling on test-volume:
-</para>
- <para><programlisting># gluster volume profile test-volume start
-Profiling started on test-volume</programlisting></para>
- <para>When profiling on the volume is started, the following additional options are displayed in the Volume Info:
-</para>
- <para><programlisting>diagnostics.count-fop-hits: on
-
-diagnostics.latency-measurement: on</programlisting></para>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Profile-Display">
- <title>Displaying the I/0 Information</title>
- <para>You can view the I/O information of each brick.
-</para>
- <para>To display I/O information:
-</para>
- <itemizedlist>
- <listitem>
- <para>Display the I/O information using the following command:
-</para>
- </listitem>
- </itemizedlist>
- <para><command># gluster volume profile <replaceable>VOLNAME</replaceable> info</command>
-
-</para>
- <para>For example, to see the I/O information on test-volume:
-
-</para>
- <para><programlisting># gluster volume profile test-volume info
-Brick: Test:/export/2
-Cumulative Stats:
-
-Block 1b+ 32b+ 64b+
-Size:
- Read: 0 0 0
- Write: 908 28 8
-
-Block 128b+ 256b+ 512b+
-Size:
- Read: 0 6 4
- Write: 5 23 16
-
-Block 1024b+ 2048b+ 4096b+
-Size:
- Read: 0 52 17
- Write: 15 120 846
-
-Block 8192b+ 16384b+ 32768b+
-Size:
- Read: 52 8 34
- Write: 234 134 286
-
-Block 65536b+ 131072b+
-Size:
- Read: 118 622
- Write: 1341 594
-
-
-%-latency Avg- Min- Max- calls Fop
- latency Latency Latency
-___________________________________________________________
-4.82 1132.28 21.00 800970.00 4575 WRITE
-5.70 156.47 9.00 665085.00 39163 READDIRP
-11.35 315.02 9.00 1433947.00 38698 LOOKUP
-11.88 1729.34 21.00 2569638.00 7382 FXATTROP
-47.35 104235.02 2485.00 7789367.00 488 FSYNC
-
-------------------
-
-------------------
-
-Duration : 335
-
-BytesRead : 94505058
-
-BytesWritten : 195571980</programlisting></para>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Profile-Stop">
- <title>Stop Profiling</title>
- <para>You can stop profiling the volume, if you do not need profiling information anymore.
-</para>
- <para><emphasis role="bold">To stop profiling</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>Stop profiling using the following command:
-</para>
- <para><command># gluster volume profile <replaceable>VOLNAME</replaceable> stop</command>
-</para>
- <para>For example, to stop profiling on test-volume:</para>
- <para><command># gluster volume profile <replaceable>test-volume</replaceable> stop</command> </para>
- <para><computeroutput>Profiling stopped on test-volume</computeroutput></para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top">
- <title> Running GlusterFS Volume TOP Command </title>
- <para>GlusterFS Volume Top command allows you to view the glusterfs bricks’ performance metrics like
-read, write, file open calls, file read calls, file write calls, directory open calls, and directory real
-calls. The top command displays up to 100 results.
-</para>
- <para>This section describes how to run and view the results for the following GlusterFS Top commands:
-</para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Open_FD_Count"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-File_Read"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-File_Write"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Open_Calls"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Read_Calls"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Read_Perf"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="chap-Administration_Guide-Monitor_Workload-Top-Write_Perf"/></para>
- </listitem>
- </itemizedlist>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-Open_FD_Count">
- <title>Viewing Open fd Count and Maximum fd Count </title>
- <para>You can view both current open fd count (list of files that are currently the most opened and the
-count) on the brick and the maximum open fd count (count of files that are the currently open and
-the count of maximum number of files opened at any given point of time, since the servers are up
-and running). If the brick name is not specified, then open fd metrics of all the bricks belonging to
-the volume will be displayed.
-</para>
- <para><emphasis role="bold">To view open fd count and maximum fd count: </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>View open fd count and maximum fd count using the following command:</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> open [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>]</command>
-</para>
- <para>For example, to view open fd count and maximum fd count on brick <replaceable>server:/export</replaceable> of <replaceable>test-volume</replaceable> and list top 10 open calls:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> open brick <replaceable>server:/export/</replaceable> list-cnt <replaceable>10</replaceable></command></para>
- <para><computeroutput>Brick: server:/export/dir1 </computeroutput></para>
- <para><computeroutput>Current open fd&apos;s: 34 Max open fd&apos;s: 209 </computeroutput><programlisting> ==========Open file stats========
-
-open file name
-call count
-
-2 /clients/client0/~dmtmp/PARADOX/
- COURSES.DB
-
-11 /clients/client0/~dmtmp/PARADOX/
- ENROLL.DB
-
-11 /clients/client0/~dmtmp/PARADOX/
- STUDENTS.DB
-
-10 /clients/client0/~dmtmp/PWRPNT/
- TIPS.PPT
-
-10 /clients/client0/~dmtmp/PWRPNT/
- PCBENCHM.PPT
-
-9 /clients/client7/~dmtmp/PARADOX/
- STUDENTS.DB
-
-9 /clients/client1/~dmtmp/PARADOX/
- STUDENTS.DB
-
-9 /clients/client2/~dmtmp/PARADOX/
- STUDENTS.DB
-
-9 /clients/client0/~dmtmp/PARADOX/
- STUDENTS.DB
-
-9 /clients/client8/~dmtmp/PARADOX/
- STUDENTS.DB</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-File_Read">
- <title>Viewing Highest File Read Calls </title>
- <para>You can view highest read calls on each brick. If brick name is not specified, then by default, list of
-100 files will be displayed.
-</para>
- <para><emphasis role="bold">To view highest file Read calls:</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>View highest file Read calls using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> read [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para>
- <para>For example, to view highest Read calls on brick server:/export of test-volume:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> read brick <replaceable>server:/export</replaceable> list-cnt <replaceable>10</replaceable></command></para>
- <para><computeroutput>Brick:</computeroutput> <replaceable>server:/export/dir1</replaceable><programlisting> ==========Read file stats========
-
-read filename
-call count
-
-116 /clients/client0/~dmtmp/SEED/LARGE.FIL
-
-64 /clients/client0/~dmtmp/SEED/MEDIUM.FIL
-
-54 /clients/client2/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client6/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client5/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client0/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client3/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client4/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client9/~dmtmp/SEED/LARGE.FIL
-
-54 /clients/client8/~dmtmp/SEED/LARGE.FIL</programlisting> </para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-File_Write">
- <title>Viewing Highest File Write Calls </title>
- <para>You can view list of files which has highest file write calls on each brick. If brick name is not
-specified, then by default, list of 100 files will be displayed.
-</para>
- <para><emphasis role="bold">To view highest file Write calls:</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>View highest file Write calls using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> write [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para>
- <para>For example, to view highest Write calls on brick <replaceable>server:/export</replaceable> of <replaceable>test-volume</replaceable>:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> write brick <replaceable>server:/export</replaceable> list-cnt <replaceable>10</replaceable></command></para>
- <para><code>Brick: server:/export/dir1 </code><programlisting> ==========Write file stats========
-write call count filename
-
-83 /clients/client0/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client7/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client1/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client2/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client0/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client8/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client5/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client4/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client6/~dmtmp/SEED/LARGE.FIL
-
-59 /clients/client3/~dmtmp/SEED/LARGE.FIL</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-Open_Calls">
- <title>Viewing Highest Open Calls on Directories </title>
- <para>You can view list of files which has highest open calls on directories of each brick. If brick name is
-not specified, then the metrics of all the bricks belonging to that volume will be displayed.
-</para>
- <para>To view list of open calls on each directory</para>
- <itemizedlist>
- <listitem>
- <para>View list of open calls on each directory using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> opendir [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para>
- <para>For example, to view open calls on brick server:/export/ of test-volume:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> opendir brick <replaceable>server:/export</replaceable> list-cnt <replaceable>10</replaceable></command></para>
- <para><code>Brick: server:/export/dir1 </code><programlisting> ==========Directory open stats========
-
-Opendir count directory name
-
-1001 /clients/client0/~dmtmp
-
-454 /clients/client8/~dmtmp
-
-454 /clients/client2/~dmtmp
-
-454 /clients/client6/~dmtmp
-
-454 /clients/client5/~dmtmp
-
-454 /clients/client9/~dmtmp
-
-443 /clients/client0/~dmtmp/PARADOX
-
-408 /clients/client1/~dmtmp
-
-408 /clients/client7/~dmtmp
-
-402 /clients/client4/~dmtmp</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-Read_Calls">
- <title>Viewing Highest Read Calls on Directory </title>
- <para>You can view list of files which has highest directory read calls on each brick. If brick name is not
-specified, then the metrics of all the bricks belonging to that volume will be displayed.
-</para>
- <para><emphasis role="bold">To view list of highest directory read calls on each brick</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>View list of highest directory read calls on each brick using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> readdir [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para>
- <para>For example, to view highest directory read calls on brick <replaceable>server:/export</replaceable> of <replaceable>test-volume</replaceable>:</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> readdir brick <replaceable>server:/export</replaceable> list-cnt <replaceable>10</replaceable></command> </para>
- <para><code>Brick: <replaceable>server:/export/dir1</replaceable></code><programlisting>==========Directory readdirp stats========
-
-readdirp count directory name
-
-1996 /clients/client0/~dmtmp
-
-1083 /clients/client0/~dmtmp/PARADOX
-
-904 /clients/client8/~dmtmp
-
-904 /clients/client2/~dmtmp
-
-904 /clients/client6/~dmtmp
-
-904 /clients/client5/~dmtmp
-
-904 /clients/client9/~dmtmp
-
-812 /clients/client1/~dmtmp
-
-812 /clients/client7/~dmtmp
-
-800 /clients/client4/~dmtmp</programlisting>
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-Read_Perf">
- <title>Viewing List of Read Performance on each Brick </title>
- <para>You can view the read throughput of files on each brick. If brick name is not specified, then the
-metrics of all the bricks belonging to that volume will be displayed. The output will be the read
-throughput.
-</para>
- <para><programlisting> ==========Read throughput file stats========
-
-read filename Time
-through
-put(MBp
-s)
-
-2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
- TRIDOTS.POT 15:38:36.894610
-2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
- PCBENCHM.PPT 15:38:39.815310
-2383.00 /clients/client2/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:52:53.631499
-
-2340.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:38:36.926198
-
-2299.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- LARGE.FIL 15:38:36.930445
-
-2259.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31
- COURSES.X04 15:38:40.549919
-
-2221.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31
- STUDENTS.VAL 15:52:53.298766
-
-2221.00 /clients/client3/~dmtmp/SEED/ -2011-01-31
- COURSES.DB 15:39:11.776780
-
-2184.00 /clients/client3/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:39:10.251764
-
-2184.00 /clients/client5/~dmtmp/WORD/ -2011-01-31
- BASEMACH.DOC 15:39:09.336572 </programlisting>This command will initiate a dd for the specified count and block size and measures the
-corresponding throughput.
-</para>
- <para><emphasis role="bold">To view list of read performance on each brick</emphasis>
-</para>
- <itemizedlist>
- <listitem>
- <para>View list of read performance on each brick using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> read-perf [bs <replaceable>blk-size</replaceable> count <replaceable>count</replaceable>] [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>]</command>
-</para>
- <para>For example, to view read performance on brick server:/export/ of test-volume, 256 block size
-of count 1, and list count 10:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> read-perf bs 256 count 1 brick <replaceable>server:/export/ </replaceable>list-cnt <replaceable>10</replaceable></command></para>
- <para><computeroutput>Brick: server:/export/dir1 256 bytes (256 B) copied, Throughput: 4.1 MB/s </computeroutput></para>
- <programlisting> ==========Read throughput file stats========
-
-read filename Time
-through
-put(MBp
-s)
-
-2912.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
- TRIDOTS.POT 15:38:36.896486
-
-2570.00 /clients/client0/~dmtmp/PWRPNT/ -2011-01-31
- PCBENCHM.PPT 15:38:39.815310
-
-2383.00 /clients/client2/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:52:53.631499
-
-2340.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:38:36.926198
-
-2299.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- LARGE.FIL 15:38:36.930445
-
-2259.00 /clients/client0/~dmtmp/PARADOX/ -2011-01-31
- COURSES.X04 15:38:40.549919
-
-2221.00 /clients/client9/~dmtmp/PARADOX/ -2011-01-31
- STUDENTS.VAL 15:52:53.298766
-
-2221.00 /clients/client8/~dmtmp/PARADOX/ -2011-01-31
- COURSES.DB 15:39:11.776780
-
-2184.00 /clients/client3/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:39:10.251764
-
-2184.00 /clients/client5/~dmtmp/WORD/ -2011-01-31
- BASEMACH.DOC 15:39:09.336572
- </programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section id="chap-Administration_Guide-Monitor_Workload-Top-Write_Perf">
- <title>Viewing List of Write Performance on each Brick </title>
- <para>You can view list of write throughput of files on each brick. If brick name is not specified, then the
-metrics of all the bricks belonging to that volume will be displayed. The output will be the write
-throughput.
-</para>
- <para>This command will initiate a dd for the specified count and block size and measures the
-corresponding throughput.
-To view list of write performance on each brick:
-</para>
- <itemizedlist>
- <listitem>
- <para>View list of write performance on each brick using the following command:
-</para>
- <para><command># gluster volume top <replaceable>VOLNAME</replaceable> write-perf [bs <replaceable>blk-size</replaceable> count <replaceable>count</replaceable>] [brick <replaceable>BRICK-NAME</replaceable>] [list-cnt <replaceable>cnt</replaceable>] </command></para>
- <para>For example, to view write performance on brick <replaceable>server:/export/</replaceable> of <replaceable>test-volume</replaceable>, 256 block size
-of count 1, and list count 10:
-</para>
- <para><command># gluster volume top <replaceable>test-volume</replaceable> write-perf bs 256 count 1 brick <replaceable>server:/export/ </replaceable>list-cnt <replaceable>10</replaceable></command></para>
- <para><code>Brick</code>: <replaceable>server:/export/dir1</replaceable>
-</para>
- <para><code>256 bytes (256 B) copied, Throughput: 2.8 MB/s </code><programlisting> ==========Write throughput file stats========
-
-write filename Time
-throughput
-(MBps)
-
-1170.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- SMALL.FIL 15:39:09.171494
-
-1008.00 /clients/client6/~dmtmp/SEED/ -2011-01-31
- LARGE.FIL 15:39:09.73189
-
-949.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:38:36.927426
-
-936.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- LARGE.FIL 15:38:36.933177
-897.00 /clients/client5/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:39:09.33628
-
-897.00 /clients/client6/~dmtmp/SEED/ -2011-01-31
- MEDIUM.FIL 15:39:09.27713
-
-885.00 /clients/client0/~dmtmp/SEED/ -2011-01-31
- SMALL.FIL 15:38:36.924271
-
-528.00 /clients/client5/~dmtmp/SEED/ -2011-01-31
- LARGE.FIL 15:39:09.81893
-
-516.00 /clients/client6/~dmtmp/ACCESS/ -2011-01-31
- FASTENER.MDB 15:39:01.797317
-</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section id="sect-Administration_Guide-Monitor_Workload-Displaying_Volume_Information">
- <title>Displaying Volume Information </title>
- <para>You can display information about a specific volume, or all volumes, as needed.</para>
- <para><emphasis role="bold">To display volume information </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Display information about a specific volume using the following command:</para>
- <para><command># gluster volume info </command><varname>VOLNAME</varname></para>
- <para>For example, to display information about test-volume:</para>
- <para><programlisting># gluster volume info test-volume
-Volume Name: test-volume
-Type: Distribute
-Status: Created
-Number of Bricks: 4
-Bricks:
-Brick1: server1:/exp1
-Brick2: server2:/exp2
-Brick3: server3:/exp3
-Brick4: server4:/exp4</programlisting></para>
- </listitem>
- <listitem>
- <para>Display information about all volumes using the following command:</para>
- <para><command># gluster volume info all</command></para>
- <para><programlisting># gluster volume info all
-
-Volume Name: test-volume
-Type: Distribute
-Status: Created
-Number of Bricks: 4
-Bricks:
-Brick1: server1:/exp1
-Brick2: server2:/exp2
-Brick3: server3:/exp3
-Brick4: server4:/exp4
-
-Volume Name: mirror
-Type: Distributed-Replicate
-Status: Started
-Number of Bricks: 2 X 2 = 4
-Bricks:
-Brick1: server1:/brick1
-Brick2: server2:/brick2
-Brick3: server3:/brick3
-Brick4: server4:/brick4
-
-Volume Name: Vol
-Type: Distribute
-Status: Started
-Number of Bricks: 1
-Bricks:
-Brick: server:/brick6
-
-</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-Monitor_Workload-Performing_Statedump">
- <title>Performing Statedump on a Volume </title>
- <para>Statedump is a mechanism through which you can get details of all internal variables and state of the glusterfs process at the time of issuing the command.You can perform statedumps of the brick processes and nfs server process of a volume using the statedump command. The following options can be used to determine what information is to be dumped:</para>
- <itemizedlist>
- <listitem>
- <para><emphasis role="bold">mem</emphasis> - Dumps the memory usage and memory pool details of the bricks.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">iobuf</emphasis> - Dumps iobuf details of the bricks.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">priv</emphasis> - Dumps private information of loaded translators.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">callpool</emphasis> - Dumps the pending calls of the volume.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">fd</emphasis> - Dumps the open fd tables of the volume.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">inode</emphasis> - Dumps the inode tables of the volume.</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To display volume statedump </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Display statedump of a volume or NFS server using the following command:</para>
- <para> <command># gluster volume statedump <replaceable>VOLNAME</replaceable> [nfs] [all|mem|iobuf|callpool|priv|fd|inode]</command></para>
- <para>For example, to display statedump of test-volume:</para>
- <para><programlisting># gluster volume statedump test-volume
-Volume statedump successful</programlisting></para>
- <para>The statedump files are created on the brick servers in the<filename> /tmp</filename> directory or in the directory set using <command>server.statedump-path</command> volume option. The naming convention of the dump file is <filename>&lt;brick-path&gt;.&lt;brick-pid&gt;.dump</filename>.</para>
- </listitem>
- <listitem>
- <para>By defult, the output of the statedump is stored at <filename> /tmp/&lt;brickname.PID.dump&gt;</filename> file on that particular server. Change the directory of the statedump file using the following command:</para>
- <para><command># gluster volume set <replaceable>VOLNAME</replaceable> server.statedump-path <replaceable>path</replaceable></command></para>
- <para>For example, to change the location of the statedump file of test-volume:</para>
- <para><programlisting># gluster volume set test-volume server.statedump-path /usr/local/var/log/glusterfs/dumps/
-Set volume successful</programlisting></para>
- <para>You can view the changed path of the statedump file using the following command:</para>
- <para><command># gluster volume info <replaceable>VOLNAME</replaceable></command></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-Monitor_Workload-Displaying_Volume_Status">
- <title>Displaying Volume Status </title>
- <para>You can display the status information about a specific volume, brick or all volumes, as needed. Status information can be used to understand the current status of the brick, nfs processes, and overall file system. Status information can also be used to monitor and debug the volume information. You can view status of the volume along with the following details:</para>
- <itemizedlist>
- <listitem>
- <para><emphasis role="bold">detail</emphasis> - Displays additional information about the bricks.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">clients</emphasis> - Displays the list of clients connected to the volume.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">mem</emphasis> - Displays the memory usage and memory pool details of the bricks.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">inode</emphasis> - Displays the inode tables of the volume.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">fd</emphasis> - Displays the open fd (file descriptors) tables of the volume.</para>
- </listitem>
- <listitem>
- <para><emphasis role="bold">callpool</emphasis> - Displays the pending calls of the volume.</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To display volume status </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Display information about a specific volume using the following command:</para>
- <para><command># gluster volume status [all|<replaceable>VOLNAME</replaceable> [<replaceable>BRICKNAME</replaceable>]] [detail|clients|mem|inode|fd|callpool]</command> </para>
- <para>For example, to display information about test-volume:</para>
- <para><programlisting># gluster volume status test-volume
-STATUS OF VOLUME: test-volume
-BRICK PORT ONLINE PID
---------------------------------------------------------
-arch:/export/1 24009 Y 22445
---------------------------------------------------------
-arch:/export/2 24010 Y 22450</programlisting></para>
- </listitem>
- <listitem>
- <para>Display information about all volumes using the following command:</para>
- <para><command># gluster volume status all</command>
-</para>
- <para><programlisting># gluster volume status all
-STATUS OF VOLUME: volume-test
-BRICK PORT ONLINE PID
---------------------------------------------------------
-arch:/export/4 24010 Y 22455
-
-STATUS OF VOLUME: test-volume
-BRICK PORT ONLINE PID
---------------------------------------------------------
-arch:/export/1 24009 Y 22445
---------------------------------------------------------
-arch:/export/2 24010 Y 22450</programlisting></para>
- </listitem>
- <listitem>
- <para>Display additional information about the bricks using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> detail</command>
-</para>
- <para>For example, to display additional information about the bricks of test-volume:</para>
- <para><programlisting># gluster volume status test-volume details
-STATUS OF VOLUME: test-volume
--------------------------------------------
-Brick : arch:/export/1
-Port : 24009
-Online : Y
-Pid : 16977
-File System : rootfs
-Device : rootfs
-Mount Options : rw
-Disk Space Free : 13.8GB
-Total Disk Space : 46.5GB
-Inode Size : N/A
-Inode Count : N/A
-Free Inodes : N/A
-
-Number of Bricks: 1
-Bricks:
-Brick: server:/brick6</programlisting></para>
- </listitem>
- <listitem>
- <para>Display the list of clients accessing the volumes using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> clients</command>
-</para>
- <para>For example, to display the list of clients connected to test-volume:</para>
- <para><programlisting># gluster volume status test-volume clients
-Brick : arch:/export/1
-Clients connected : 2
-Hostname Bytes Read BytesWritten
--------- --------- ------------
-127.0.0.1:1013 776 676
-127.0.0.1:1012 50440 51200</programlisting></para>
- </listitem>
- <listitem>
- <para>Display the memory usage and memory pool details of the bricks using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> mem</command>
-</para>
- <para>For example, to display the memory usage and memory pool details of the bricks of test-volume:</para>
- <screen>Memory status for volume : test-volume
-----------------------------------------------
-Brick : arch:/export/1
-Mallinfo
---------
-Arena : 434176
-Ordblks : 2
-Smblks : 0
-Hblks : 12
-Hblkhd : 40861696
-Usmblks : 0
-Fsmblks : 0
-Uordblks : 332416
-Fordblks : 101760
-Keepcost : 100400
-
-Mempool Stats
--------------
-Name HotCount ColdCount PaddedSizeof AllocCount MaxAlloc
----- -------- --------- ------------ ---------- --------
-test-volume-server:fd_t 0 16384 92 57 5
-test-volume-server:dentry_t 59 965 84 59 59
-test-volume-server:inode_t 60 964 148 60 60
-test-volume-server:rpcsvc_request_t 0 525 6372 351 2
-glusterfs:struct saved_frame 0 4096 124 2 2
-glusterfs:struct rpc_req 0 4096 2236 2 2
-glusterfs:rpcsvc_request_t 1 524 6372 2 1
-glusterfs:call_stub_t 0 1024 1220 288 1
-glusterfs:call_stack_t 0 8192 2084 290 2
-glusterfs:call_frame_t 0 16384 172 1728 6</screen>
- </listitem>
- <listitem>
- <para>Display the inode tables of the volume using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> inode</command>
-</para>
- <para>For example, to display the inode tables of the test-volume:</para>
- <para><programlisting># gluster volume status test-volume inode
-inode tables for volume test-volume
-----------------------------------------------
-Brick : arch:/export/1
-Active inodes:
-GFID Lookups Ref IA type
----- ------- --- -------
-6f3fe173-e07a-4209-abb6-484091d75499 1 9 2
-370d35d7-657e-44dc-bac4-d6dd800ec3d3 1 1 2
-
-LRU inodes:
-GFID Lookups Ref IA type
----- ------- --- -------
-80f98abe-cdcf-4c1d-b917-ae564cf55763 1 0 1
-3a58973d-d549-4ea6-9977-9aa218f233de 1 0 1
-2ce0197d-87a9-451b-9094-9baa38121155 1 0 2</programlisting></para>
- </listitem>
- <listitem>
- <para>Display the open fd tables of the volume using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> fd</command>
-</para>
- <para>For example, to display the open fd tables of the test-volume:</para>
- <para><screen># gluster volume status test-volume fd
-
-FD tables for volume test-volume
-----------------------------------------------
-Brick : arch:/export/1
-Connection 1:
-RefCount = 0 MaxFDs = 128 FirstFree = 4
-FD Entry PID RefCount Flags
--------- --- -------- -----
-0 26311 1 2
-1 26310 3 2
-2 26310 1 2
-3 26311 3 2
-
-Connection 2:
-RefCount = 0 MaxFDs = 128 FirstFree = 0
-No open fds
-
-Connection 3:
-RefCount = 0 MaxFDs = 128 FirstFree = 0
-No open fds</screen></para>
- </listitem>
- <listitem>
- <para>Display the pending calls of the volume using the following command:</para>
- <para><command># gluster volume status <replaceable>VOLNAME</replaceable> callpool</command>
-</para>
- <para>Each call has a call stack containing call frames.</para>
- <para>For example, to display the pending calls of test-volume:</para>
- <para><programlisting># gluster volume status test-volume
-
-Pending calls for volume test-volume
-----------------------------------------------
-Brick : arch:/export/1
-Pending calls: 2
-Call Stack1
- UID : 0
- GID : 0
- PID : 26338
- Unique : 192138
- Frames : 7
- Frame 1
- Ref Count = 1
- Translator = test-volume-server
- Completed = No
- Frame 2
- Ref Count = 0
- Translator = test-volume-posix
- Completed = No
- Parent = test-volume-access-control
- Wind From = default_fsync
- Wind To = FIRST_CHILD(this)-&gt;fops-&gt;fsync
- Frame 3
- Ref Count = 1
- Translator = test-volume-access-control
- Completed = No
- Parent = repl-locks
- Wind From = default_fsync
- Wind To = FIRST_CHILD(this)-&gt;fops-&gt;fsync
- Frame 4
- Ref Count = 1
- Translator = test-volume-locks
- Completed = No
- Parent = test-volume-io-threads
- Wind From = iot_fsync_wrapper
- Wind To = FIRST_CHILD (this)-&gt;fops-&gt;fsync
- Frame 5
- Ref Count = 1
- Translator = test-volume-io-threads
- Completed = No
- Parent = test-volume-marker
- Wind From = default_fsync
- Wind To = FIRST_CHILD(this)-&gt;fops-&gt;fsync
- Frame 6
- Ref Count = 1
- Translator = test-volume-marker
- Completed = No
- Parent = /export/1
- Wind From = io_stats_fsync
- Wind To = FIRST_CHILD(this)-&gt;fops-&gt;fsync
- Frame 7
- Ref Count = 1
- Translator = /export/1
- Completed = No
- Parent = test-volume-server
- Wind From = server_fsync_resume
- Wind To = bound_xl-&gt;fops-&gt;fsync</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_setting_volumes.xml b/doc/admin-guide/en-US/admin_setting_volumes.xml
deleted file mode 100644
index 6a8468d5f11..00000000000
--- a/doc/admin-guide/en-US/admin_setting_volumes.xml
+++ /dev/null
@@ -1,325 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-Setting_Volumes">
- <title>Setting up GlusterFS Server Volumes</title>
- <para>A volume is a logical collection of bricks where each brick is an export directory on a server in the trusted storage pool. Most of the gluster management operations are performed on the volume. </para>
- <para>To create a new volume in your storage environment, specify the bricks that comprise the volume. After you have created a new volume, you must start it before attempting to mount it. </para>
- <itemizedlist>
- <listitem>
- <para>Volumes of the following types can be created in your storage environment: </para>
- <itemizedlist>
- <listitem>
- <para>Distributed - Distributed volumes distributes files throughout the bricks in the volume. You can use distributed volumes where the requirement is to scale storage and the redundancy is either not important or is provided by other hardware/software layers. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Distributed"/> .</para>
- </listitem>
- <listitem>
- <para>Replicated – Replicated volumes replicates files across bricks in the volume. You can use replicated volumes in environments where high-availability and high-reliability are critical. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Replicated"/>.</para>
- </listitem>
- <listitem>
- <para>Striped – Striped volumes stripes data across bricks in the volume. For best results, you should use striped volumes only in high concurrency environments accessing very large files. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Striped"/>.</para>
- </listitem>
- <listitem>
- <para>Distributed Striped - Distributed striped volumes stripe data across two or more nodes in the cluster. You should use distributed striped volumes where the requirement is to scale storage and in high concurrency environments accessing very large files is critical. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Distributed_Striped"/>.</para>
- </listitem>
- <listitem>
- <para>Distributed Replicated - Distributed replicated volumes distributes files across replicated bricks in the volume. You can use distributed replicated volumes in environments where the requirement is to scale storage and high-reliability is critical. Distributed replicated volumes also offer improved read performance in most environments. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Distributed_Replicated"/>. </para>
- </listitem>
- <listitem>
- <para>Distributed Striped Replicated – Distributed striped replicated volumes distributes striped data across replicated bricks in the cluster. For best results, you should use distributed striped replicated volumes in highly concurrent environments where parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads. For more information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Distributed_Striped_Replicated"/>.
-</para>
- </listitem>
- <listitem>
- <para>Striped Replicated – Striped replicated volumes stripes data across replicated bricks in the cluster. For best results, you should use striped replicated volumes in highly concurrent environments where there is parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads. For more
-information, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Striped_Replicated"/>.</para>
- </listitem>
- </itemizedlist>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To create a new volume </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Create a new volume :</para>
- <para><command># gluster volume create<replaceable> NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable> | replica <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp, rdma] <replaceable>NEW-BRICK1 NEW-BRICK2 NEW-BRICK3...</replaceable></command></para>
- <para>For example, to create a volume called test-volume consisting of server3:/exp3 and server4:/exp4:</para>
- <para><programlisting># gluster volume create test-volume server3:/exp3 server4:/exp4
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- </listitem>
- </itemizedlist>
- <section id="sect-Administration_Guide-Setting_Volumes-Distributed">
- <title>Creating Distributed Volumes</title>
- <para>In a distributed volumes files are spread randomly across the bricks in the volume. Use distributed volumes where you need to scale storage and redundancy is either not important or is provided by other hardware/software layers. </para>
- <para><note>
- <para>Disk/server failure in distributed volumes can result in a serious loss of data because directory contents are spread randomly across the bricks in the volume. </para>
- </note></para>
- <figure>
- <title>Illustration of a Distributed Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Distributed_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a distributed volume</emphasis></para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create the distributed volume:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a distributed volume with four storage servers using tcp:</para>
- <para><programlisting># gluster volume create test-volume server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>(Optional) You can display the volume information:</para>
- <para><programlisting># gluster volume info
-Volume Name: test-volume
-Type: Distribute
-Status: Created
-Number of Bricks: 4
-Transport-type: tcp
-Bricks:
-Brick1: server1:/exp1
-Brick2: server2:/exp2
-Brick3: server3:/exp3
-Brick4: server4:/exp4</programlisting></para>
- <para>For example, to create a distributed volume with four storage servers over InfiniBand:</para>
- <para><programlisting># gluster volume create test-volume transport rdma server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Replicated">
- <title>Creating Replicated Volumes </title>
- <para>Replicated volumes create copies of files across multiple bricks in the volume. You can use replicated volumes in environments where high-availability and high-reliability are critical. </para>
- <para><note>
- <para>The number of bricks should be equal to of the replica count for a replicated volume.
-To protect against server and disk failures, it is recommended that the bricks of the volume are from different servers. </para>
- </note></para>
- <figure>
- <title>Illustration of a Replicated Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Replicated_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a replicated volume </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create the replicated volume:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [replica <replaceable>COUNT</replaceable>] [transport tcp | rdma tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a replicated volume with two storage servers:</para>
- <para><programlisting># gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Striped">
- <title>Creating Striped Volumes</title>
- <para>Striped volumes stripes data across bricks in the volume. For best results, you should use striped volumes only in high concurrency environments accessing very large files.</para>
- <para><note>
- <para>The number of bricks should be a equal to the stripe count for a striped volume. </para>
- </note></para>
- <figure>
- <title>Illustration of a Striped Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Striped_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a striped volume </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create the striped volume:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a striped volume across two storage servers:</para>
- <para><programlisting># gluster volume create test-volume stripe 2 transport tcp server1:/exp1 server2:/exp2
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Distributed_Striped">
- <title>Creating Distributed Striped Volumes </title>
- <para>Distributed striped volumes stripes files across two or more nodes in the cluster. For best results, you should use distributed striped volumes where the requirement is to scale storage and in high concurrency environments accessing very large files is critical.</para>
- <para><note>
- <para>The number of bricks should be a multiple of the stripe count for a distributed striped volume. </para>
- </note></para>
- <figure>
- <title>Illustration of a Distributed Striped Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Distributed_Striped_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a distributed striped volume </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create the distributed striped volume:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a distributed striped volume across eight storage servers:</para>
- <para><programlisting># gluster volume create test-volume stripe 4 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6 server7:/exp7 server8:/exp8
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Distributed_Replicated">
- <title>Creating Distributed Replicated Volumes </title>
- <para>Distributes files across replicated bricks in the volume. You can use distributed replicated volumes in environments where the requirement is to scale storage and high-reliability is critical. Distributed replicated volumes also offer improved read performance in most environments.</para>
- <para><note>
- <para>The number of bricks should be a multiple of the replica count for a distributed replicated volume. Also, the order in which bricks are specified has a great effect on data protection. Each replica_count consecutive bricks in the list you give will form a replica set, with all replica sets combined into a volume-wide distribute set. To make sure that replica-set members are not placed on the same node, list the first brick on every server, then the second brick on every server in the same order, and so on. </para>
- </note></para>
- <figure>
- <title>Illustration of a Distributed Replicated Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Distributed_Replicated_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a distributed replicated volume </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create the distributed replicated volume:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [replica <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, four node distributed (replicated) volume with a two-way mirror:
-</para>
- <para><programlisting># gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>For example, to create a six node distributed (replicated) volume with a two-way mirror:</para>
- <para><programlisting># gluster volume create test-volume replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Distributed_Striped_Replicated">
- <title>Creating Distributed Striped Replicated Volumes </title>
- <para>Distributed striped replicated volumes distributes striped data across replicated bricks in the cluster. For best results, you should use distributed striped replicated volumes in highly concurrent environments where parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads. </para>
- <para><note>
- <para>The number of bricks should be a multiples of number of stripe count and replica count for
-a distributed striped replicated volume.
- </para>
- </note></para>
- <para><emphasis role="bold">To create a distributed striped replicated volume</emphasis>
-</para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool as described earlier in <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create a distributed striped replicated volume using the following command:</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable>] [replica <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a distributed replicated striped volume across eight storage servers:
-</para>
- <para><programlisting># gluster volume create test-volume stripe 2 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6 server7:/exp7 server8:/exp8
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Striped_Replicated">
- <title>Creating Striped Replicated Volumes </title>
- <para>Striped replicated volumes stripes data across replicated bricks in the cluster. For best results, you should use striped replicated volumes in highly concurrent environments where there is parallel access of very large files and performance is critical. In this release, configuration of this volume type is supported only for Map Reduce workloads.</para>
- <para><note>
- <para>The number of bricks should be a multiple of the replicate count and stripe count for a
-striped replicated volume.
-</para>
- </note></para>
- <figure>
- <title>Illustration of a Striped Replicated Volume</title>
- <mediaobject>
- <imageobject>
- <imagedata fileref="images/Striped_Replicated_Volume.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para><emphasis role="bold">To create a striped replicated volume</emphasis>
-</para>
- <orderedlist>
- <listitem>
- <para>Create a trusted storage pool consisting of the storage servers that will comprise the volume.</para>
- <para>For more information, see <xref linkend="sect-Administration_Guide-Storage_Pools-Adding_Servers"/>.</para>
- </listitem>
- <listitem>
- <para>Create a striped replicated volume :</para>
- <para><command># gluster volume create <replaceable>NEW-VOLNAME</replaceable> [stripe <replaceable>COUNT</replaceable>] [replica <replaceable>COUNT</replaceable>] [transport tcp | rdma | tcp,rdma] <replaceable>NEW-BRICK...</replaceable></command></para>
- <para>For example, to create a striped replicated volume across four storage servers:
-
-</para>
- <para><programlisting># gluster volume create test-volume stripe 2 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>To create a striped replicated volume across six storage servers:
-</para>
- <para><programlisting># gluster volume create test-volume stripe 3 replica 2 transport tcp server1:/exp1 server2:/exp2 server3:/exp3 server4:/exp4 server5:/exp5 server6:/exp6
-Creation of test-volume has been successful
-Please start the volume to access data.</programlisting></para>
- <para>If the transport type is not specified, <emphasis role="italic"> tcp</emphasis> is used as the default. You can also set additional options if required, such as auth.allow or auth.reject. For more information, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/></para>
- <para><note>
- <para>Make sure you start your volumes before you try to mount them or else client operations after the mount will hang, see <xref linkend="sect-Administration_Guide-Setting_Volumes-Starting"/> for details. </para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-Setting_Volumes-Starting">
- <title>Starting Volumes </title>
- <para>You must start your volumes before you try to mount them. </para>
- <para><emphasis role="bold">To start a volume </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Start a volume:</para>
- <para><command># gluster volume start <replaceable>VOLNAME</replaceable></command></para>
- <para>For example, to start test-volume:</para>
- <para><programlisting># gluster volume start test-volume
-Starting test-volume has been successful</programlisting></para>
- </listitem>
- </itemizedlist>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_settingup_clients.xml b/doc/admin-guide/en-US/admin_settingup_clients.xml
deleted file mode 100644
index 22979acf477..00000000000
--- a/doc/admin-guide/en-US/admin_settingup_clients.xml
+++ /dev/null
@@ -1,511 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-GlusterFS_Client">
- <title>Accessing Data - Setting Up GlusterFS Client</title>
- <para>You can access gluster volumes in multiple ways. You can use Gluster Native Client method for high concurrency, performance and transparent failover in GNU/Linux clients. You can also use NFS v3 to access gluster volumes. Extensive testing has be done on GNU/Linux clients and NFS implementation in other operating system, such as FreeBSD, and Mac OS X, as well as Windows 7 (Professional and Up) and Windows Server 2003. Other NFS client implementations may work with gluster NFS server.</para>
- <para>You can use CIFS to access volumes when using Microsoft Windows as well as SAMBA clients. For this access method, Samba packages need to be present on the client side. </para>
- <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Native">
- <title>Gluster Native Client</title>
- <para>The Gluster Native Client is a FUSE-based client running in user space. Gluster Native Client is the recommended method for accessing volumes when high concurrency and high write performance is required.</para>
- <para>This section introduces the Gluster Native Client and explains how to install the software on client machines. This section also describes how to mount volumes on clients (both manually and automatically) and how to verify that the volume has mounted successfully. </para>
- <section>
- <title>Installing the Gluster Native Client</title>
- <para>Before you begin installing the Gluster Native Client, you need to verify that the FUSE module is loaded on the client and has access to the required modules as follows: </para>
- <orderedlist>
- <listitem>
- <para>Add the FUSE loadable kernel module (LKM) to the Linux kernel:</para>
- <para><command># modprobe fuse</command></para>
- </listitem>
- <listitem>
- <para>Verify that the FUSE module is loaded:</para>
- <para><command># dmesg | grep -i fuse </command></para>
- <para><command>fuse init (API version 7.13)</command></para>
- </listitem>
- </orderedlist>
- <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Native-RPM">
- <title>Installing on Red Hat Package Manager (RPM) Distributions </title>
- <para>To install Gluster Native Client on RPM distribution-based systems</para>
- <orderedlist>
- <listitem>
- <para>Install required prerequisites on the client using the following command:</para>
- <para><command>$ sudo yum -y install openssh-server wget fuse fuse-libs openib libibverbs</command></para>
- </listitem>
- <listitem>
- <para>Ensure that TCP and UDP ports 24007 and 24008 are open on all Gluster servers. Apart from these ports, you need to open one port for each brick starting from port 24009. For example: if you have five bricks, you need to have ports 24009 to 24013 open.</para>
- <para>You can use the following chains with iptables:</para>
- <para><code>$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24007:24008 -j ACCEPT </code></para>
- <para><code>$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24009:24014 -j ACCEPT</code></para>
- <para><note>
- <para>If you already have iptable chains, make sure that the above ACCEPT rules precede the DROP rules. This can be achieved by providing a lower rule number than the DROP rule.</para>
- </note></para>
- </listitem>
- <listitem>
- <para>Download the latest glusterfs, glusterfs-fuse, and glusterfs-rdma RPM files to each client. The glusterfs package contains the Gluster Native Client. The glusterfs-fuse package contains the FUSE translator required for mounting on client systems and the glusterfs-rdma packages contain OpenFabrics verbs RDMA module for Infiniband.</para>
- <para>You can download the software at <ulink url="http://bits.gluster.com/gluster/glusterfs/3.3.0qa30/x86_64/"/>.</para>
- </listitem>
- <listitem>
- <para>Install Gluster Native Client on the client.</para>
- <para><command>$ sudo rpm -i glusterfs-3.3.0qa30-1.x86_64.rpm </command></para>
- <para><command>$ sudo rpm -i glusterfs-fuse-3.3.0qa30-1.x86_64.rpm </command></para>
- <para><command>$ sudo rpm -i glusterfs-rdma-3.3.0qa30-1.x86_64.rpm</command></para>
- <para><note>
- <para>The RDMA module is only required when using Infiniband.</para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section condition="gfs">
- <title>Installing on Debian-based Distributions</title>
- <para>To install Gluster Native Client on Debian-based distributions</para>
- <orderedlist>
- <listitem>
- <para>Install OpenSSH Server on each client using the following command:</para>
- <para><command>$ sudo apt-get install openssh-server vim wget</command></para>
- </listitem>
- <listitem>
- <para>Download the latest GlusterFS .deb file and checksum to each client.</para>
- <para>You can download the software at <ulink url="http://www.gluster.org/download/"/>.</para>
- </listitem>
- <listitem>
- <para>For each .deb file, get the checksum (using the following command) and compare it against the checksum for that file in the md5sum file.</para>
- <para>
-<command>$ md5sum GlusterFS_DEB_file.deb </command></para>
- <para>The md5sum of the packages is available at: <ulink url="http://download.gluster.com/pub/gluster/glusterfs"/></para>
- </listitem>
- <listitem>
- <para>Uninstall GlusterFS v3.1 (or an earlier version) from the client using the following command:
-</para>
- <para><command>$ sudo dpkg -r glusterfs </command></para>
- <para>(Optional) Run <command>$ sudo dpkg -purge glusterfs </command>to purge the configuration files.</para>
- </listitem>
- <listitem>
- <para>Install Gluster Native Client on the client using the following command:
-</para>
- <para><command>$ sudo dpkg -i GlusterFS_DEB_file </command></para>
- <para>For example:
-</para>
- <para><command>$ sudo dpkg -i glusterfs-3.3.x.deb </command></para>
- </listitem>
- <listitem>
- <para>Ensure that TCP and UDP ports 24007 and 24008 are open on all Gluster servers. Apart from these ports, you need to open one port for each brick starting from port 24009. For example: if you have five bricks, you need to have ports 24009 to 24013 open.
-</para>
- <para>You can use the following chains with iptables:
-</para>
- <para><code>$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24007:24008 -j ACCEPT </code></para>
- <para><code>$ sudo iptables -A RH-Firewall-1-INPUT -m state --state NEW -m tcp -p tcp --dport 24009:24014 -j ACCEPT</code></para>
- <para><note>
- <para>If you already have iptable chains, make sure that the above ACCEPT rules precede the DROP rules. This can be achieved by providing a lower rule number than the DROP rule.</para>
- </note></para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Performing a Source Installation</title>
- <para>To build and install Gluster Native Client from the source code</para>
- <orderedlist>
- <listitem>
- <para>Create a new directory using the following commands:</para>
- <para><command># mkdir glusterfs </command></para>
- <para><command># cd glusterfs</command></para>
- </listitem>
- <listitem>
- <para>Download the source code.
-</para>
- <para>You can download the source at <ulink url="http://www.gluster.org/download/"/>.</para>
- </listitem>
- <listitem>
- <para>Extract the source code using the following command:
-</para>
- <para><command># tar -xvzf SOURCE-FILE </command></para>
- </listitem>
- <listitem>
- <para>Run the configuration utility using the following command:
-</para>
- <para><code># ./configure </code></para>
- <para><code>GlusterFS configure summary </code></para>
- <para><code>================== </code></para>
- <para><code>FUSE client : yes </code></para>
- <para><code>Infiniband verbs : yes </code></para>
- <para><code>epoll IO multiplex : yes </code></para>
- <para><code>argp-standalone : no </code></para>
- <para><code>fusermount : no </code></para>
- <para><code>readline : yes</code></para>
- <para>The configuration summary shows the components that will be built with Gluster Native Client.</para>
- </listitem>
- <listitem>
- <para>Build the Gluster Native Client software using the following commands:
-</para>
- <para><command># make </command></para>
- <para><command># make install</command></para>
- </listitem>
- <listitem>
- <para>Verify that the correct version of Gluster Native Client is installed, using the following command:
-</para>
- <para><command># glusterfs –-version</command></para>
- </listitem>
- </orderedlist>
- </section>
- </section>
- <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Mounting_Volumes">
- <title>Mounting Volumes</title>
- <para>After installing the Gluster Native Client, you need to mount Gluster volumes to access data. There are two methods you can choose: </para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Manuall"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Automatic"/></para>
- </listitem>
- </itemizedlist>
- <para>After mounting a volume, you can test the mounted volume using the procedure described in <xref linkend="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Native-Testing"/>. </para>
- <para><note>
- <para>Server names selected during creation of Volumes should be resolvable in the client machine. You can use appropriate /etc/hosts entries or DNS server to resolve server names to IP addresses. </para>
- </note></para>
- <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Manuall">
- <title>Manually Mounting Volumes</title>
- <para>To manually mount a Gluster volume </para>
- <itemizedlist>
- <listitem>
- <para>To mount a volume, use the following command:
-</para>
- <para><command># mount -t glusterfs HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR</command>
-</para>
- <para>For example:
-</para>
- <para><command># mount -t glusterfs server1:/test-volume /mnt/glusterfs</command></para>
- <note>
- <para>The server specified in the mount command is only used to fetch the gluster configuration volfile describing the volume name. Subsequently, the client will communicate directly with the servers mentioned in the volfile (which might not even include the one used for mount).
-
-</para>
- <para>If you see a usage message like &quot;Usage: mount.glusterfs&quot;, mount usually requires you to create a directory to be used as the mount point. Run &quot;mkdir /mnt/glusterfs&quot; before you attempt to run the mount command listed above.</para>
- </note>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">Mounting Options</emphasis></para>
- <para>You can specify the following options when using the <command>mount -t glusterfs</command> command. Note that you need to separate all options with commas.
-
-</para>
- <para>backupvolfile-server=server-name</para>
- <para>volfile-max-fetch-attempts=number of attempts</para>
- <para>log-level=loglevel
-</para>
- <para>log-file=logfile
-</para>
- <para>transport=transport-type
-</para>
- <para>direct-io-mode=[enable|disable]
-
-</para>
- <para>For example:
-</para>
- <para><code># mount -t glusterfs -o backupvolfile-server=volfile_server2 --volfile-max-fetch-attempts=2 log-level=WARNING,log-file=/var/log/gluster.log server1:/test-volume /mnt/glusterfs</code></para>
- <para>If <option>backupvolfile-server</option> option is added while mounting fuse client, when the first
-volfile server fails, then the server specified in <option>backupvolfile-server</option> option is used as volfile server to mount
-the client.</para>
- <para>In <code>--volfile-max-fetch-attempts=X</code> option, specify the number of attempts to fetch volume files while mounting a volume. This option is useful when you mount a server with multiple IP addresses or when round-robin DNS is configured for the server-name.. </para>
- </section>
- <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Automatic" dir="lro">
- <title>Automatically Mounting Volumes</title>
- <para>You can configure your system to automatically mount the Gluster volume each time your system starts. </para>
- <para>The server specified in the mount command is only used to fetch the gluster configuration volfile describing the volume name. Subsequently, the client will communicate directly with the servers mentioned in the volfile (which might not even include the one used for mount). </para>
- <para><emphasis role="bold">To automatically mount a Gluster volume</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>To mount a volume, edit the /etc/fstab file and add the following line:
-</para>
- <para><command>HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR glusterfs defaults,_netdev 0 0 </command></para>
- <para>For example:
-</para>
- <para><code>server1:/test-volume /mnt/glusterfs glusterfs defaults,_netdev 0 0</code></para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">Mounting Options </emphasis></para>
- <para>You can specify the following options when updating the /etc/fstab file. Note that you need to separate all options with commas.
-
-</para>
- <para>log-level=loglevel
-</para>
- <para>log-file=logfile
-</para>
- <para>transport=transport-type
-</para>
- <para>direct-io-mode=[enable|disable]
-
-</para>
- <para>For example:
-</para>
- <para><code>HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR glusterfs defaults,_netdev,log-level=WARNING,log-file=/var/log/gluster.log 0 0 </code></para>
- </section>
- <section id="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Native-Testing">
- <title>Testing Mounted Volumes</title>
- <para>To test mounted volumes</para>
- <itemizedlist>
- <listitem>
- <para>Use the following command:
-</para>
- <para><command># mount </command></para>
- <para>If the gluster volume was successfully mounted, the output of the mount command on the client will be similar to this example:
-
-</para>
- <para><code>server1:/test-volume on /mnt/glusterfs type fuse.glusterfs (rw,allow_other,default_permissions,max_read=131072</code></para>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>Use the following command:
-</para>
- <para><command># df</command>
-</para>
- <para>The output of df command on the client will display the aggregated storage space from all the bricks in a volume similar to this example:
-</para>
- <para><code># df -h /mnt/glusterfs Filesystem Size Used Avail Use% Mounted on server1:/test-volume 28T 22T 5.4T 82% /mnt/glusterfs</code></para>
- </listitem>
- <listitem>
- <para>Change to the directory and list the contents by entering the following:
-</para>
- <para><command># cd MOUNTDIR </command></para>
- <para><command># ls</command></para>
- </listitem>
- <listitem>
- <para>For example,</para>
- <para><code># cd /mnt/glusterfs </code></para>
- <para><code># ls</code></para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- </section>
- <section id="sect-Administration_Guide-GlusterFS_Client-NFS">
- <title>NFS</title>
- <para>You can use NFS v3 to access to gluster volumes. Extensive testing has be done on GNU/Linux clients and NFS implementation in other operating system, such as FreeBSD, and Mac OS X, as well as Windows 7 (Professional and Up), Windows Server 2003, and others, may work with gluster NFS server implementation. </para>
- <para>GlusterFS now includes network lock manager (NLM) v4. NLM enables applications on NFSv3 clients to do record locking on files on NFS server. It is started automatically whenever the NFS server is run.</para>
- <para condition="gfs">You must install nfs-common package on both servers and clients (only for Debian-based) distribution.</para>
- <para>This section describes how to use NFS to mount Gluster volumes (both manually and automatically) and how to verify that the volume has been mounted successfully. </para>
- <section>
- <title>Using NFS to Mount Volumes</title>
- <para>You can use either of the following methods to mount Gluster volumes: </para>
- <para><itemizedlist>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-NFS-Manual"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-NFS-Automatic"/></para>
- </listitem>
- </itemizedlist></para>
- <para condition="gfs"><emphasis role="bold">Prerequisite</emphasis>: Install nfs-common package on both servers and clients (only for Debian-based distribution), using the following command: </para>
- <para condition="gfs"><command>$ sudo aptitude install nfs-common </command></para>
- <para>After mounting a volume, you can test the mounted volume using the procedure described in <xref linkend="sect-Administration_Guide-GlusterFS_Client-NFS-Testing"/>. </para>
- <section id="sect-Administration_Guide-GlusterFS_Client-NFS-Manual">
- <title>Manually Mounting Volumes Using NFS </title>
- <para>To manually mount a Gluster volume using NFS </para>
- <itemizedlist>
- <listitem>
- <para>To mount a volume, use the following command:
-</para>
- <para><command># mount -t nfs -o vers=3 HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR</command>
-</para>
- <para>For example:</para>
- <para><command># mount -t nfs -o vers=3 server1:/test-volume /mnt/glusterfs</command></para>
- <para><note>
- <para> Gluster NFS server does not support UDP. If the NFS client you are using defaults to connecting using UDP, the following message appears:
-</para>
- <para><code>requested NFS version or transport protocol is not supported</code>. </para>
- </note></para>
- <para><emphasis role="bold">To connect using TCP</emphasis></para>
- </listitem>
- <listitem>
- <para>Add the following option to the mount command:
-</para>
- <para><command>-o mountproto=tcp </command></para>
- <para>For example:
-</para>
- <para><command># mount -o mountproto=tcp -t nfs server1:/test-volume /mnt/glusterfs</command></para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To mount Gluster NFS server from a Solaris client </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Use the following command:
-</para>
- <para><command># mount -o proto=tcp,vers=3 nfs://HOSTNAME-OR-IPADDRESS:38467/VOLNAME MOUNTDIR</command></para>
- <para>
-For example:</para>
- <para><command> # mount -o proto=tcp,vers=3 nfs://server1:38467/test-volume /mnt/glusterfs</command></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-GlusterFS_Client-NFS-Automatic">
- <title>Automatically Mounting Volumes Using NFS</title>
- <para>You can configure your system to automatically mount Gluster volumes using NFS each time the system starts.</para>
- <para><emphasis role="bold">To automatically mount a Gluster volume using NFS </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>To mount a volume, edit the /etc/fstab file and add the following line:</para>
- <para><command>HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR nfs defaults,_netdev,vers=3 0 0</command></para>
- <para>For example,</para>
- <para><command>server1:/test-volume /mnt/glusterfs nfs defaults,_netdev,vers=3 0 0</command></para>
- <note>
- <para>Gluster NFS server does not support UDP. If the NFS client you are using defaults to connecting using UDP, the following message appears: </para>
- <para><command>requested NFS version or transport protocol is not supported.</command></para>
- </note>
- <para/>
- <para>To connect using TCP </para>
- </listitem>
- <listitem>
- <para>Add the following entry in /etc/fstab file :</para>
- <para><command>HOSTNAME-OR-IPADDRESS:/VOLNAME MOUNTDIR nfs defaults,_netdev,mountproto=tcp 0 0</command></para>
- <para>For example,</para>
- <para><command>server1:/test-volume /mnt/glusterfs nfs defaults,_netdev,mountproto=tcp 0 0</command></para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">To automount NFS mounts</emphasis></para>
- <para>Gluster supports *nix standard method of automounting NFS mounts. Update the /etc/auto.master and /etc/auto.misc and restart the autofs service. After that, whenever a user or process attempts to access the directory it will be mounted in the background. </para>
- </section>
- <section id="sect-Administration_Guide-GlusterFS_Client-NFS-Testing">
- <title>Testing Volumes Mounted Using NFS</title>
- <para>You can confirm that Gluster directories are mounting successfully. </para>
- <para><emphasis role="bold">To test mounted volumes</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Use the mount command by entering the following:</para>
- <para><command># mount</command></para>
- <para>For example, the output of the mount command on the client will display an entry like the following:</para>
- <para><command>server1:/test-volume on /mnt/glusterfs type nfs (rw,vers=3,addr=server1)</command></para>
- </listitem>
- </itemizedlist>
- <itemizedlist>
- <listitem>
- <para>Use the df command by entering the following:</para>
- <para><command># df</command></para>
- <para>For example, the output of df command on the client will display the aggregated storage space from all the bricks in a volume.</para>
- <para><screen># df -h /mnt/glusterfs
-Filesystem Size Used Avail Use% Mounted on
-server1:/test-volume 28T 22T 5.4T 82% /mnt/glusterfs</screen></para>
- </listitem>
- <listitem>
- <para>Change to the directory and list the contents by entering the following:</para>
- <para><command># cd MOUNTDIR</command></para>
- <para><command># ls</command></para>
- <para>For example,</para>
- <para><command>
- <command># cd /mnt/glusterfs</command>
- </command></para>
- <para><command># ls</command></para>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- </section>
- <section id="sect-Administration_Guide-GlusterFS_Client-CIFS">
- <title>CIFS</title>
- <para>You can use CIFS to access to volumes when using Microsoft Windows as well as SAMBA clients. For this access method, Samba packages need to be present on the client side. You can export glusterfs mount point as the samba export, and then mount it using CIFS protocol.</para>
- <para>This section describes how to mount CIFS shares on Microsoft Windows-based clients (both manually and automatically) and how to verify that the volume has mounted successfully.</para>
- <para><note>
- <para> CIFS access using the Mac OS X Finder is not supported, however, you can use the Mac OS X command line to access Gluster volumes using CIFS.</para>
- </note></para>
- <section>
- <title>Using CIFS to Mount Volumes</title>
- <para>You can use either of the following methods to mount Gluster volumes: </para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-CIFS-Manual"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-GlusterFS_Client-CIFS-Automatic"/></para>
- </listitem>
- </itemizedlist>
- <para>After mounting a volume, you can test the mounted volume using the procedure described in <xref linkend="sect-Administration_Guide-GlusterFS_Client-CIFS-Testing"/>.</para>
- <para>You can also use Samba for exporting Gluster Volumes through CIFS protocol.</para>
- <section>
- <title>Exporting Gluster Volumes Through Samba</title>
- <para>We recommend you to use Samba for exporting Gluster volumes through the CIFS protocol. </para>
- <para><emphasis role="bold">To export volumes through CIFS protocol </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Mount a Gluster volume. For more information on mounting volumes, see <xref linkend="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Mounting_Volumes"/>.</para>
- </listitem>
- <listitem>
- <para>Setup Samba configuration to export the mount point of the Gluster volume.</para>
- <para>For example, if a Gluster volume is mounted on /mnt/gluster, you must edit smb.conf file to enable exporting this through CIFS. Open smb.conf file in an editor and add the following lines for a simple configuration:</para>
- <para>[glustertest]
- </para>
- <para> comment = For testing a Gluster volume exported through CIFS
- </para>
- <para> path = /mnt/glusterfs
- </para>
- <para> read only = no
- </para>
- <para> guest ok = yes</para>
- </listitem>
- </orderedlist>
- <para>Save the changes and start the smb service using your systems init scripts (/etc/init.d/smb [re]start).</para>
- <para><note>
- <para>To be able mount from any server in the trusted storage pool, you must repeat these steps on each Gluster node. For more advanced configurations, see Samba documentation. </para>
- </note></para>
- </section>
- <section id="sect-Administration_Guide-GlusterFS_Client-CIFS-Manual">
- <title>Manually Mounting Volumes Using CIFS </title>
- <para>You can manually mount Gluster volumes using CIFS on Microsoft Windows-based client machines. </para>
- <para><emphasis role="bold">To manually mount a Gluster volume using CIFS </emphasis></para>
- <orderedlist>
- <listitem>
- <para>Using Windows Explorer, choose <emphasis role="bold">Tools &gt; Map Network Drive…</emphasis> from the menu. The <emphasis role="bold">Map Network Drive </emphasis>window appears. </para>
- </listitem>
- <listitem>
- <para>Choose the drive letter using the <emphasis role="bold">Drive</emphasis> drop-down list. </para>
- </listitem>
- <listitem>
- <para>Click <emphasis role="bold">Browse</emphasis>, select the volume to map to the network drive, and click <emphasis role="bold">OK</emphasis>. </para>
- </listitem>
- <listitem>
- <para>Click <emphasis role="bold">Finish.</emphasis></para>
- </listitem>
- </orderedlist>
- <para>The network drive (mapped to the volume) appears in the Computer window.</para>
- <para><emphasis role="bold">Alternatively, to manually mount a Gluster volume using CIFS.</emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Click <emphasis role="bold">Start &gt; Run</emphasis> and enter the following:</para>
- <para><command>
- <code>\\SERVERNAME\VOLNAME</code>
- </command></para>
- <para>For example:</para>
- <para><command>
- <code>\\server1\test-volume</code>
- </command></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-GlusterFS_Client-CIFS-Automatic">
- <title>Automatically Mounting Volumes Using CIFS</title>
- <para>You can configure your system to automatically mount Gluster volumes using CIFS on Microsoft Windows-based clients each time the system starts.</para>
- <para><emphasis role="bold">To automatically mount a Gluster volume using CIFS</emphasis></para>
- <para>The network drive (mapped to the volume) appears in the Computer window and is reconnected each time the system starts.</para>
- <orderedlist>
- <listitem>
- <para>Using Windows Explorer, choose <emphasis role="bold">Tools &gt; Map Network Drive…</emphasis> from the menu. The <emphasis role="bold">Map Network Drive </emphasis>window appears. </para>
- </listitem>
- <listitem>
- <para>Choose the drive letter using the <emphasis role="bold">Drive</emphasis> drop-down list. </para>
- </listitem>
- <listitem>
- <para>Click <emphasis role="bold">Browse</emphasis>, select the volume to map to the network drive, and click <emphasis role="bold">OK</emphasis>. </para>
- </listitem>
- <listitem>
- <para>Click the <emphasis role="bold">Reconnect</emphasis> at logon checkbox.</para>
- </listitem>
- <listitem>
- <para>Click <emphasis role="bold">Finish.</emphasis></para>
- </listitem>
- </orderedlist>
- </section>
- <section id="sect-Administration_Guide-GlusterFS_Client-CIFS-Testing">
- <title>Testing Volumes Mounted Using CIFS</title>
- <para>You can confirm that Gluster directories are mounting successfully by navigating to the directory using Windows Explorer. </para>
- </section>
- </section>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_start_stop_daemon.xml b/doc/admin-guide/en-US/admin_start_stop_daemon.xml
deleted file mode 100644
index bdab0b8b608..00000000000
--- a/doc/admin-guide/en-US/admin_start_stop_daemon.xml
+++ /dev/null
@@ -1,56 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd" [
-<!ENTITY % BOOK_ENTITIES SYSTEM "Administration_Guide.ent">
-%BOOK_ENTITIES;
-]>
-<chapter id="chap-Administration_Guide-Start_Stop_Daemon">
- <title id="chap-Administration_Guide-Stop_Start_Daemon">Managing the glusterd Service</title>
- <para>After installing GlusterFS, you must start glusterd service. The glusterd service serves as the Gluster elastic volume manager, overseeing glusterfs processes, and co-ordinating dynamic volume operations, such as adding and removing volumes across multiple storage servers non-disruptively.</para>
- <para>This section describes how to start the glusterd service in the following ways: </para>
- <itemizedlist>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Start_Stop_Daemon-Manually"/></para>
- </listitem>
- <listitem>
- <para><xref linkend="sect-Administration_Guide-Start_Stop_Daemon-Automatically"/></para>
- </listitem>
- </itemizedlist>
- <note>
- <para>You must start glusterd on all GlusterFS servers.</para>
- </note>
- <section id="sect-Administration_Guide-Start_Stop_Daemon-Manually">
- <title>Starting and Stopping glusterd Manually</title>
- <para>This section describes how to start and stop glusterd manually</para>
- <itemizedlist>
- <listitem>
- <para>To start glusterd manually, enter the following command:</para>
- <para><command># /etc/init.d/glusterd start </command></para>
- </listitem>
- <listitem>
- <para>To stop glusterd manually, enter the following command: </para>
- <para><command># /etc/init.d/glusterd stop</command></para>
- </listitem>
- </itemizedlist>
- </section>
- <section id="sect-Administration_Guide-Start_Stop_Daemon-Automatically">
- <title>Starting glusterd Automatically</title>
- <para condition="gfs">This section describes how to configure the system to automatically start the glusterd service every time the system boots. </para>
- <para condition="appliance">To automatically start the glusterd service every time the system boots, enter the following from the command line: </para>
- <para condition="appliance"><command># chkconfig glusterd on </command></para>
- <section condition="gfs">
- <title condition="gfs">Red Hat-based Systems</title>
- <para>To configure Red Hat-based systems to automatically start the glusterd service every time the system boots, enter the following from the command line: </para>
- <para><command># chkconfig glusterd on </command></para>
- </section>
- <section condition="gfs">
- <title condition="gfs">Debian-based Systems</title>
- <para>To configure Debian-based systems to automatically start the glusterd service every time the system boots, enter the following from the command line:</para>
- <para><command># update-rc.d glusterd defaults</command></para>
- </section>
- <section condition="gfs">
- <title condition="gfs">Systems Other than Red Hat and Debain</title>
- <para>To configure systems other than Red Hat or Debian to automatically start the glusterd service every time the system boots, enter the following entry to the<emphasis role="italic"> /etc/rc.local</emphasis> file: </para>
- <para><command># echo &quot;glusterd&quot; &gt;&gt; /etc/rc.local </command></para>
- </section>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_storage_pools.xml b/doc/admin-guide/en-US/admin_storage_pools.xml
deleted file mode 100644
index 87b6320bd4b..00000000000
--- a/doc/admin-guide/en-US/admin_storage_pools.xml
+++ /dev/null
@@ -1,57 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Storage-pool">
- <title>Setting up Trusted Storage Pools</title>
- <para>Before you can configure a GlusterFS volume, you must create a trusted storage pool consisting of the storage servers that provides bricks to a volume. </para>
- <para>A storage pool is a trusted network of storage servers. When you start the first server, the storage pool consists of that server alone. To add additional storage servers to the storage pool, you can use the probe command from a storage server that is already trusted. </para>
- <para><note>
- <para>Do not self-probe the first server/localhost.</para>
- </note></para>
- <para>The GlusterFS service must be running on all storage servers that you want to add to the storage pool. See <xref linkend="chap-Administration_Guide-Start_Stop_Daemon"/> for more information.</para>
- <section id="sect-Administration_Guide-Storage_Pools-Adding_Servers">
- <title>Adding Servers to Trusted Storage Pool</title>
- <para>To create a trusted storage pool, add servers to the trusted storage pool</para>
- <orderedlist>
- <listitem>
- <para>The hostnames used to create the storage pool must be resolvable by DNS.</para>
- <para>To add a server to the storage pool:</para>
- <para><command># gluster peer probe <replaceable>server</replaceable></command></para>
- <para>For example, to create a trusted storage pool of four servers, add three servers to the storage pool from server1:</para>
- <para><programlisting># gluster peer probe server2
-Probe successful
-
-# gluster peer probe server3
-Probe successful
-
-# gluster peer probe server4
-Probe successful
-</programlisting></para>
- </listitem>
- <listitem>
- <para>Verify the peer status from the first server using the following commands:</para>
- <para><programlisting># gluster peer status
-Number of Peers: 3
-
-Hostname: server2
-Uuid: 5e987bda-16dd-43c2-835b-08b7d55e94e5
-State: Peer in Cluster (Connected)
-
-Hostname: server3
-Uuid: 1e0ca3aa-9ef7-4f66-8f15-cbc348f29ff7
-State: Peer in Cluster (Connected)
-
-Hostname: server4
-Uuid: 3e0caba-9df7-4f66-8e5d-cbc348f29ff7
-State: Peer in Cluster (Connected)</programlisting></para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Removing Servers from the Trusted Storage Pool</title>
- <para>To remove a server from the storage pool:</para>
- <para><command># gluster peer detach<replaceable> server</replaceable></command></para>
- <para> For example, to remove server4 from the trusted storage pool:</para>
- <para><programlisting># gluster peer detach server4
-Detach successful</programlisting></para>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/admin_troubleshooting.xml b/doc/admin-guide/en-US/admin_troubleshooting.xml
deleted file mode 100644
index dff182c5f1e..00000000000
--- a/doc/admin-guide/en-US/admin_troubleshooting.xml
+++ /dev/null
@@ -1,509 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter id="chap-Administration_Guide-Troubleshooting">
- <title>Troubleshooting GlusterFS </title>
- <para>This section describes how to manage GlusterFS logs and most common troubleshooting scenarios
-related to GlusterFS.
-</para>
- <section>
- <title>Managing GlusterFS Logs </title>
- <para>This section describes how to manage GlusterFS logs by performing the following operation:
-
-</para>
- <itemizedlist>
- <listitem>
- <para>Rotating Logs
-</para>
- </listitem>
- </itemizedlist>
- <section>
- <title>Rotating Logs </title>
- <para>Administrators can rotate the log file in a volume, as needed.
-</para>
- <para><emphasis role="bold">To rotate a log file </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Rotate the log file using the following command:
-</para>
- <para><command># gluster volume log rotate <replaceable>VOLNAME</replaceable></command></para>
- <para>For example, to rotate the log file on test-volume:
-</para>
- <programlisting># gluster volume log rotate test-volume
-log rotate successful
-</programlisting>
- <note>
- <para>When a log file is rotated, the contents of the current log file are moved to log-file-
-name.epoch-time-stamp.
-</para>
- </note>
- </listitem>
- </itemizedlist>
- </section>
- </section>
- <section>
- <title>Troubleshooting Geo-replication </title>
- <para>This section describes the most common troubleshooting scenarios related to GlusterFS Geo-replication.
-</para>
- <section>
- <title>Locating Log Files </title>
- <para>For every Geo-replication session, the following three log files are associated to it (four, if the slave is a
-gluster volume):
-</para>
- <itemizedlist>
- <listitem>
- <para>Master-log-file - log file for the process which monitors the Master volume
-</para>
- </listitem>
- <listitem>
- <para>Slave-log-file - log file for process which initiates the changes in slave
-</para>
- </listitem>
- <listitem>
- <para>Master-gluster-log-file - log file for the maintenance mount point that Geo-replication module
-uses to monitor the master volume
-</para>
- </listitem>
- <listitem>
- <para>Slave-gluster-log-file - is the slave&apos;s counterpart of it
-</para>
- </listitem>
- </itemizedlist>
- <para><emphasis role="bold">Master Log File</emphasis>
-</para>
- <para>To get the Master-log-file for geo-replication, use the following command:
-</para>
- <para><command>gluster volume geo-replication <code>MASTER SLAVE</code> config log-file</command>
-</para>
- <para>For example:
-</para>
- <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir config log-file </command></para>
- <para><emphasis role="bold">Slave Log File </emphasis></para>
- <para>To get the log file for Geo-replication on slave (glusterd must be running on slave machine), use the
-following commands:
-</para>
- <orderedlist>
- <listitem>
- <para>On master, run the following command:
-</para>
- <para><command># gluster volume geo-replication Volume1 example.com:/data/remote_dir config session-owner 5f6e5200-756f-11e0-a1f0-0800200c9a66 </command></para>
- <para>Displays the session owner details.
-</para>
- </listitem>
- <listitem>
- <para>On slave, run the following command:
-</para>
- <para><command># gluster volume geo-replication /data/remote_dir config log-file /var/log/gluster/${session-owner}:remote-mirror.log </command></para>
- </listitem>
- <listitem>
- <para>Replace the session owner details (output of Step 1) to the output of the Step 2 to get the
-location of the log file.
-</para>
- <para><command>/var/log/gluster/5f6e5200-756f-11e0-a1f0-0800200c9a66:remote-mirror.log</command>
-</para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>Rotating Geo-replication Logs</title>
- <para>Administrators can rotate the log file of a particular master-slave session, as needed.
-When you run geo-replication&apos;s <command> log-rotate</command> command, the log file
-is backed up with the current timestamp suffixed to the file
-name and signal is sent to gsyncd to start logging to a new
-log file.</para>
- <para><emphasis role="bold">To rotate a geo-replication log file </emphasis></para>
- <itemizedlist>
- <listitem>
- <para>Rotate log file for a particular master-slave session using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>master slave</replaceable> log-rotate</command>
-</para>
- <para>For example, to rotate the log file of master <filename>Volume1</filename> and slave <filename>example.com:/data/remote_dir</filename>
-:
-</para>
- <programlisting># gluster volume geo-replication Volume1 example.com:/data/remote_dir log rotate
-log rotate successful</programlisting>
- </listitem>
- <listitem>
- <para>Rotate log file for all sessions for a master volume using the following command:
-</para>
- <para><command># gluster volume geo-replication <replaceable>master</replaceable> log-rotate</command>
-</para>
- <para>For example, to rotate the log file of master <filename>Volume1</filename>:
-</para>
- <programlisting># gluster volume geo-replication Volume1 log rotate
-log rotate successful</programlisting>
- </listitem>
- <listitem>
- <para>Rotate log file for all sessions using the following command:
-</para>
- <para><command># gluster volume geo-replication log-rotate</command>
-</para>
- <para>For example, to rotate the log file for all sessions:</para>
- <programlisting># gluster volume geo-replication log rotate
-log rotate successful</programlisting>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Synchronization is not complete </title>
- <para><emphasis role="bold">Description</emphasis>: GlusterFS Geo-replication did not synchronize the data completely but still the geo-
-replication status displayed is OK.
-</para>
- <para><emphasis role="bold">Solution</emphasis>: You can enforce a full sync of the data by erasing the index and restarting GlusterFS Geo-
-replication. After restarting, GlusterFS Geo-replication begins synchronizing all the data. All files are compared using checksum, which can be a lengthy and high resource utilization operation on large
-data sets. If the error situation persists, contact Red Hat Support.
-</para>
- <para>For more information about erasing index, see <xref linkend="sect-Administration_Guide-Managing_Volumes-Tuning"/>.
-</para>
- </section>
- <section>
- <title>Issues in Data Synchronization </title>
- <para><emphasis role="bold">Description</emphasis>: Geo-replication display status as OK, but the files do not get synced, only
-directories and symlink gets synced with the following error message in the log:
-</para>
- <para><errortext>[2011-05-02 13:42:13.467644] E [master:288:regjob] GMaster: failed to sync ./some_file` </errortext></para>
- <para><emphasis role="bold">Solution</emphasis>: Geo-replication invokes rsync v3.0.0 or higher on the host and the remote machine. You must verify if
-you have installed the required version.
-</para>
- </section>
- <section>
- <title>Geo-replication status displays Faulty very often </title>
- <para><emphasis role="bold">Description</emphasis>: Geo-replication displays status as faulty very often with a backtrace similar to
-the following:
-</para>
- <para><errortext>2011-04-28 14:06:18.378859] E [syncdutils:131:log_raise_exception] &lt;top&gt;: FAIL: Traceback (most recent call last): File &quot;/usr/local/libexec/glusterfs/python/syncdaemon/syncdutils.py&quot;, line 152, in twraptf(*aa) File &quot;/usr/local/libexec/glusterfs/python/syncdaemon/repce.py&quot;, line 118, in listen rid, exc, res = recv(self.inf) File &quot;/usr/local/libexec/glusterfs/python/syncdaemon/repce.py&quot;, line 42, in recv return pickle.load(inf) EOFError </errortext></para>
- <para><emphasis role="bold">Solution</emphasis>: This error indicates that the RPC communication between the master gsyncd module and slave
-gsyncd module is broken and this can happen for various reasons. Check if it satisfies all the following
-pre-requisites:
-</para>
- <itemizedlist>
- <listitem>
- <para>Password-less SSH is set up properly between the host and the remote machine.
-</para>
- </listitem>
- <listitem>
- <para>If FUSE is installed in the machine, because geo-replication module mounts the GlusterFS volume
-using FUSE to sync data.
-</para>
- </listitem>
- <listitem>
- <para>If the <emphasis role="bold">Slave</emphasis> is a volume, check if that volume is started.
-</para>
- </listitem>
- <listitem>
- <para>If the Slave is a plain directory, verify if the directory has been created already with the
-required permissions.
-</para>
- </listitem>
- <listitem>
- <para>If GlusterFS 3.2 or higher is not installed in the default location (in Master) and has been prefixed to be
-installed in a custom location, configure the <command>gluster-command</command> for it to point to the exact
-location.
-</para>
- </listitem>
- <listitem>
- <para>If GlusterFS 3.2 or higher is not installed in the default location (in slave) and has been prefixed to be
-installed in a custom location, configure the <command>remote-gsyncd-command</command> for it to point to the
-exact place where gsyncd is located.
-</para>
- </listitem>
- </itemizedlist>
- </section>
- <section>
- <title>Intermediate Master goes to Faulty State </title>
- <para><emphasis role="bold">Description</emphasis>: In a cascading set-up, the intermediate master goes to faulty state with the following
-log:
-</para>
- <para><errortext>raise RuntimeError (&quot;aborting on uuid change from %s to %s&quot; % \ RuntimeError: aborting on uuid change from af07e07c-427f-4586-ab9f- 4bf7d299be81 to de6b5040-8f4e-4575-8831-c4f55bd41154 </errortext></para>
- <para><emphasis role="bold">Solution</emphasis>: In a cascading set-up the Intermediate master is loyal to the original primary master. The
-above log means that the geo-replication module has detected change in primary master.
-If this is the desired behavior, delete the config option volume-id in the session initiated from the
-intermediate master.
-</para>
- </section>
- </section>
- <section>
- <title>Troubleshooting POSIX ACLs </title>
- <para>This section describes the most common troubleshooting issues related to POSIX ACLs.
-</para>
- <section>
- <title>setfacl command fails with “setfacl: &lt;file or directory name&gt;: Operation not supported†error </title>
- <para>You may face this error when the backend file systems in one of the servers is not mounted with
-the &quot;-o acl&quot; option. The same can be confirmed by viewing the following error message in the log file
-of the server &quot;Posix access control list is not supported&quot;.
-</para>
- <para><emphasis role="bold">Solution</emphasis>: Remount the backend file system with &quot;-o acl&quot; option. For more information, see <xref linkend="sect-Administration_Guide-ACLs-Activating_ACLs-Server"/>.
-</para>
- </section>
- </section>
- <section>
- <title>Troubleshooting Hadoop Compatible Storage </title>
- <para>This section describes the most common troubleshooting issues related to Hadoop Compatible
-Storage.
-
- </para>
- <section id="sect-Administration_Guide-Troubleshooting-Test_Section_1">
- <title>Time Sync</title>
- <para>Running MapReduce job may throw exceptions if the time is out-of-sync on the hosts in the cluster.
-
- </para>
- <para><emphasis role="bold">Solution</emphasis>: Sync the time on all hosts using ntpd program.
-</para>
- </section>
- </section>
- <section>
- <title>Troubleshooting NFS </title>
- <para>This section describes the most common troubleshooting issues related to NFS .
-</para>
- <section>
- <title>mount command on NFS client fails with “RPC Error: Program not registered†</title>
- <para>Start portmap or rpcbind service on the NFS server.
-</para>
- <para>This error is encountered when the server has not started correctly.
-</para>
- <para>On most Linux distributions this is fixed by starting portmap:
-</para>
- <para><command>$ /etc/init.d/portmap start</command>
-</para>
- <para>On some distributions where portmap has been replaced by rpcbind, the following command is
-required:
-</para>
- <para><command>$ /etc/init.d/rpcbind start </command></para>
- <para>After starting portmap or rpcbind, gluster NFS server needs to be restarted.
-</para>
- </section>
- <section>
- <title>NFS server start-up fails with “Port is already in use†error in the log file.&quot; </title>
- <para>Another Gluster NFS server is running on the same machine.
-</para>
- <para>This error can arise in case there is already a Gluster NFS server running on the same machine.
-This situation can be confirmed from the log file, if the following error lines exist:
-</para>
- <para><screen>[2010-05-26 23:40:49] E [rpc-socket.c:126:rpcsvc_socket_listen] rpc-socket: binding socket failed:Address already in use
-[2010-05-26 23:40:49] E [rpc-socket.c:129:rpcsvc_socket_listen] rpc-socket: Port is already in use
-[2010-05-26 23:40:49] E [rpcsvc.c:2636:rpcsvc_stage_program_register] rpc-service: could not create listening connection
-[2010-05-26 23:40:49] E [rpcsvc.c:2675:rpcsvc_program_register] rpc-service: stage registration of program failed
-[2010-05-26 23:40:49] E [rpcsvc.c:2695:rpcsvc_program_register] rpc-service: Program registration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465
-[2010-05-26 23:40:49] E [nfs.c:125:nfs_init_versions] nfs: Program init failed
-[2010-05-26 23:40:49] C [nfs.c:531:notify] nfs: Failed to initialize protocols</screen></para>
- <para>To resolve this error one of the Gluster NFS servers will have to be shutdown. At this time,
-Gluster NFS server does not support running multiple NFS servers on the same machine.
-</para>
- </section>
- <section>
- <title>mount command fails with “rpc.statd†related error message </title>
- <para>If the mount command fails with the following error message:
-</para>
- <para><errortext>mount.nfs: rpc.statd is not running but is required for remote locking. mount.nfs: Either use &apos;-o nolock&apos; to keep locks local, or start statd. </errortext></para>
- <para><errortext>Start rpc.statd </errortext></para>
- <para>For NFS clients to mount the NFS server, rpc.statd service must be running on the clients. </para>
- <para>Start
-rpc.statd service by running the following command:
-</para>
- <para><command>$ rpc.statd </command></para>
- </section>
- <section>
- <title>mount command takes too long to finish. </title>
- <para>Start rpcbind service on the NFS client.
-</para>
- <para>The problem is that the rpcbind or portmap service is not running on the NFS client. The
-resolution for this is to start either of these services by running the following command:
-</para>
- <para><command>$ /etc/init.d/portmap start</command>
-</para>
- <para>On some distributions where portmap has been replaced by rpcbind, the following command is
-required:
-</para>
- <para><command>$ /etc/init.d/rpcbind start</command></para>
- </section>
- <section>
- <title>NFS server glusterfsd starts but initialization fails with “nfsrpc- service: portmap registration of program failed†error message in the log. </title>
- <para>NFS start-up can succeed but the initialization of the NFS service can still fail preventing clients
-from accessing the mount points. Such a situation can be confirmed from the following error
-messages in the log file:
-</para>
- <para><screen>[2010-05-26 23:33:47] E [rpcsvc.c:2598:rpcsvc_program_register_portmap] rpc-service: Could notregister with portmap
-[2010-05-26 23:33:47] E [rpcsvc.c:2682:rpcsvc_program_register] rpc-service: portmap registration of program failed
-[2010-05-26 23:33:47] E [rpcsvc.c:2695:rpcsvc_program_register] rpc-service: Program registration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465
-[2010-05-26 23:33:47] E [nfs.c:125:nfs_init_versions] nfs: Program init failed
-[2010-05-26 23:33:47] C [nfs.c:531:notify] nfs: Failed to initialize protocols
-[2010-05-26 23:33:49] E [rpcsvc.c:2614:rpcsvc_program_unregister_portmap] rpc-service: Could not unregister with portmap
-[2010-05-26 23:33:49] E [rpcsvc.c:2731:rpcsvc_program_unregister] rpc-service: portmap unregistration of program failed
-[2010-05-26 23:33:49] E [rpcsvc.c:2744:rpcsvc_program_unregister] rpc-service: Program unregistration failed: MOUNT3, Num: 100005, Ver: 3, Port: 38465</screen></para>
- <orderedlist>
- <listitem>
- <para>Start portmap or rpcbind service on the NFS server.
-</para>
- <para>On most Linux distributions, portmap can be started using the following command:
-</para>
- <para><command>$ /etc/init.d/portmap start </command></para>
- <para>On some distributions where portmap has been replaced by rpcbind, run the following command:
-</para>
- <para><command>$ /etc/init.d/rpcbind start </command></para>
- <para>After starting portmap or rpcbind, gluster NFS server needs to be restarted.
-</para>
- </listitem>
- <listitem>
- <para>Stop another NFS server running on the same machine.
-</para>
- <para>Such an error is also seen when there is another NFS server running on the same machine but it is
-not the Gluster NFS server. On Linux systems, this could be the kernel NFS server. Resolution
-involves stopping the other NFS server or not running the Gluster NFS server on the machine.
-Before stopping the kernel NFS server, ensure that no critical service depends on access to that
-NFS server&apos;s exports.
-</para>
- <para>On Linux, kernel NFS servers can be stopped by using either of the following commands
-depending on the distribution in use:
-</para>
- <para><command>$ /etc/init.d/nfs-kernel-server stop</command>
-</para>
- <para><command>$ /etc/init.d/nfs stop</command></para>
- </listitem>
- <listitem>
- <para>Restart Gluster NFS server.
-</para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>mount command fails with NFS server failed error. </title>
- <para>mount command fails with following error
-</para>
- <para><emphasis role="italic">mount: mount to NFS server &apos;10.1.10.11&apos; failed: timed out (retrying).</emphasis></para>
- <para>Perform one of the following to resolve this issue:
-</para>
- <orderedlist>
- <listitem>
- <para>Disable name lookup requests from NFS server to a DNS server.
-</para>
- <para>The NFS server attempts to authenticate NFS clients by performing a reverse DNS lookup to
-match hostnames in the volume file with the client IP addresses. There can be a situation where
-the NFS server either is not able to connect to the DNS server or the DNS server is taking too long
-to responsd to DNS request. These delays can result in delayed replies from the NFS server to the
-NFS client resulting in the timeout error seen above.
-</para>
- <para>NFS server provides a work-around that disables DNS requests, instead relying only on the client
-IP addresses for authentication. The following option can be added for successful mounting in
-such situations:
-</para>
- <para><command>option rpc-auth.addr.namelookup off </command></para>
- <para><note>
- <para>Note: Remember that disabling the NFS server forces authentication of clients to use only IP
-addresses and if the authentication rules in the volume file use hostnames, those authentication
-rules will fail and disallow mounting for those clients.
-</para>
- </note></para>
- <para>or</para>
- </listitem>
- <listitem>
- <para>NFS version used by the NFS client is other than version 3.
-</para>
- <para>Gluster NFS server supports version 3 of NFS protocol. In recent Linux kernels, the default NFS
-version has been changed from 3 to 4. It is possible that the client machine is unable to connect
-to the Gluster NFS server because it is using version 4 messages which are not understood by
-Gluster NFS server. The timeout can be resolved by forcing the NFS client to use version 3. The
-<emphasis role="bold">vers</emphasis> option to mount command is used for this purpose:
-</para>
- <para><command>$ mount <replaceable>nfsserver</replaceable><replaceable>:export</replaceable> -o vers=3 <replaceable>mount-point</replaceable></command>
-</para>
- </listitem>
- </orderedlist>
- </section>
- <section>
- <title>showmount fails with clnt_create: RPC: Unable to receive </title>
- <para>Check your firewall setting to open ports 111 for portmap requests/replies and Gluster NFS
-server requests/replies. Gluster NFS server operates over the following port numbers: 38465,
-38466, and 38467.
-</para>
- <para>For more information, see <xref linkend="sect-Administration_Guide-Test_Chapter-GlusterFS_Client-Native-RPM"/>.
-</para>
- </section>
- <section>
- <title>Application fails with &quot;Invalid argument&quot; or &quot;Value too large for defined data type&quot; error. </title>
- <para>These two errors generally happen for 32-bit nfs clients or applications that do not support 64-bit
-inode numbers or large files.
-Use the following option from the CLI to make Gluster NFS return 32-bit inode numbers instead:
-nfs.enable-ino32 &lt;on|off&gt;
-</para>
- <para>Applications that will benefit are those that were either:
-</para>
- <itemizedlist>
- <listitem>
- <para>built 32-bit and run on 32-bit machines such that they do not support large files by default</para>
- </listitem>
- <listitem>
- <para>built 32-bit on 64-bit systems
-</para>
- </listitem>
- </itemizedlist>
- <para>This option is disabled by default so NFS returns 64-bit inode numbers by default.
-</para>
- <para>Applications which can be rebuilt from source are recommended to rebuild using the following
-flag with gcc:</para>
- <para><command> -D_FILE_OFFSET_BITS=64</command>
-</para>
- </section>
- </section>
- <section>
- <title>Troubleshooting File Locks</title>
- <para>In GlusterFS 3.3 you can use <command>statedump</command> command to list the locks held on files. The statedump output also provides information on each lock with its range, basename, PID of the application holding the lock, and so on. You can analyze the output to know about the locks whose owner/application is no longer running or interested in that lock. After ensuring that the no application is using the file, you can clear the lock using the following <command>clear lock</command> command:</para>
- <para><command># <command>gluster volume clear-locks <replaceable>VOLNAME path</replaceable> kind {blocked | granted | all}{inode [range] | entry [basename] | posix [range]}</command></command></para>
- <para>For more information on performing <command>statedump</command>, see <xref linkend="sect-Administration_Guide-Monitor_Workload-Performing_Statedump"/></para>
- <para><emphasis role="bold">To identify locked file and clear locks</emphasis></para>
- <orderedlist>
- <listitem>
- <para>Perform statedump on the volume to view the files that are locked using the following command:</para>
- <para> <command># gluster volume statedump <replaceable>VOLNAME</replaceable> inode</command></para>
- <para>For example, to display statedump of test-volume:</para>
- <para><programlisting># gluster volume statedump test-volume
-Volume statedump successful</programlisting></para>
- <para>The statedump files are created on the brick servers in the<filename> /tmp</filename> directory or in the directory set using <command>server.statedump-path</command> volume option. The naming convention of the dump file is <filename>&lt;brick-path&gt;.&lt;brick-pid&gt;.dump</filename>.</para>
- <para>The following are the sample contents of the statedump file. It indicates that GlusterFS has entered into a state where there is an entry lock (entrylk) and an inode lock (inodelk). Ensure that those are stale locks and no resources own them. </para>
- <para><screen>[xlator.features.locks.vol-locks.inode]
-path=/
-mandatory=0
-entrylk-count=1
-lock-dump.domain.domain=vol-replicate-0
-xlator.feature.locks.lock-dump.domain.entrylk.entrylk[0](ACTIVE)=type=ENTRYLK_WRLCK on basename=file1, pid = 714782904, owner=ffffff2a3c7f0000, transport=0x20e0670, , granted at Mon Feb 27 16:01:01 2012
-
-conn.2.bound_xl./gfs/brick1.hashsize=14057
-conn.2.bound_xl./gfs/brick1.name=/gfs/brick1/inode
-conn.2.bound_xl./gfs/brick1.lru_limit=16384
-conn.2.bound_xl./gfs/brick1.active_size=2
-conn.2.bound_xl./gfs/brick1.lru_size=0
-conn.2.bound_xl./gfs/brick1.purge_size=0
-
-[conn.2.bound_xl./gfs/brick1.active.1]
-gfid=538a3d4a-01b0-4d03-9dc9-843cd8704d07
-nlookup=1
-ref=2
-ia_type=1
-[xlator.features.locks.vol-locks.inode]
-path=/file1
-mandatory=0
-inodelk-count=1
-lock-dump.domain.domain=vol-replicate-0
-inodelk.inodelk[0](ACTIVE)=type=WRITE, whence=0, start=0, len=0, pid = 714787072, owner=00ffff2a3c7f0000, transport=0x20e0670, , granted at Mon Feb 27 16:01:01 2012</screen></para>
- </listitem>
- <listitem>
- <para>Clear the lock using the following command:</para>
- <para><command># <command>gluster volume clear-locks <replaceable>VOLNAME path</replaceable> kind granted entry basename</command></command></para>
- <para>For example, to clear the entry lock on <filename>file1</filename> of test-volume:
-</para>
- <para><screen># gluster volume clear-locks test-volume / kind granted entry file1
-Volume clear-locks successful
-vol-locks: entry blocked locks=0 granted locks=1</screen></para>
- </listitem>
- <listitem>
- <para>Clear the inode lock using the following command:</para>
- <para><command># <command>gluster volume clear-locks <replaceable>VOLNAME path</replaceable> kind granted inode range </command></command></para>
- <para>For example, to clear the inode lock on <filename>file1</filename> of test-volume:
-</para>
- <para><screen># gluster volume clear-locks test-volume /file1 kind granted inode 0,0-0
-Volume clear-locks successful
-vol-locks: inode blocked locks=0 granted locks=1</screen></para>
- <para>You can perform statedump on test-volume again to verify that the above inode and entry locks are cleared.</para>
- </listitem>
- </orderedlist>
- </section>
-</chapter>
diff --git a/doc/admin-guide/en-US/gfs_introduction.xml b/doc/admin-guide/en-US/gfs_introduction.xml
deleted file mode 100644
index 5fd88730556..00000000000
--- a/doc/admin-guide/en-US/gfs_introduction.xml
+++ /dev/null
@@ -1,54 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter>
- <title>Introducing Gluster File System</title>
- <para>GlusterFS is an open source, clustered file system capable of scaling to several petabytes and handling thousands of clients. GlusterFS can be flexibly combined with commodity physical, virtual, and cloud resources to deliver highly available and performant enterprise storage at a fraction of the cost of traditional solutions.</para>
- <para>GlusterFS clusters together storage building blocks over Infiniband RDMA and/or TCP/IP interconnect, aggregating disk and memory resources and managing data in a single global namespace. GlusterFS is based on a stackable user space design, delivering exceptional performance for diverse workloads.
-</para>
- <figure>
- <title>Virtualized Cloud Environments</title>
- <mediaobject>
- <textobject>
- <phrase>Virtualized Cloud Environments</phrase>
- </textobject>
- <imageobject>
- <imagedata align="center" fileref="images/640px-GlusterFS_3.2_Architecture.png"/>
- </imageobject>
- </mediaobject>
- </figure>
- <para>GlusterFS is designed for today&apos;s high-performance, virtualized cloud environments. Unlike traditional data centers, cloud environments require multi-tenancy along with the ability to grow or shrink resources on demand. Enterprises can scale capacity, performance, and availability on demand, with no vendor lock-in, across on-premise, public cloud, and hybrid environments. </para>
- <para>GlusterFS is in production at thousands of enterprises spanning media, healthcare, government, education, web 2.0, and financial services. The following table lists the commercial offerings and its documentation location:
-</para>
- <informaltable frame="all">
- <tgroup cols="2">
- <colspec colname="c1" colwidth="16%"/>
- <colspec colname="c2" colwidth="84%"/>
- <thead>
- <row>
- <entry>Product</entry>
- <entry>Documentation Location</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>Red Hat Storage Software Appliance</entry>
- <entry>
- <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Storage_Software_Appliance/index.html"/>
- </entry>
- </row>
- <row>
- <entry>Red Hat Virtual Storage Appliance</entry>
- <entry>
- <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Virtual_Storage_Appliance/index.html"/>
- </entry>
- </row>
- <row>
- <entry>Red Hat Storage </entry>
- <entry>
- <ulink url="http://docs.redhat.com/docs/en-US/Red_Hat_Storage/index.html"/>
- </entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
-</chapter>
diff --git a/doc/admin-guide/en-US/glossary.xml b/doc/admin-guide/en-US/glossary.xml
deleted file mode 100644
index 8c314feaad4..00000000000
--- a/doc/admin-guide/en-US/glossary.xml
+++ /dev/null
@@ -1,126 +0,0 @@
-<?xml version='1.0' encoding='UTF-8'?>
-<!-- This document was created with Syntext Serna Free. --><!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.5//EN" "docbookV4.5/docbookx.dtd" []>
-<chapter>
- <title>Glossary</title>
- <glosslist>
- <glossentry>
- <glossterm>Brick</glossterm>
- <glossdef>
- <para>A Brick is the GlusterFS basic unit of storage, represented by an export directory on a server in the trusted storage pool. A Brick is expressed by combining a server with an export directory in the following format:</para>
- <para><code>SERVER:EXPORT</code></para>
- <para>For example:</para>
- <para><filename>myhostname:/exports/myexportdir/</filename></para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Cluster</glossterm>
- <glossdef>
- <para>A cluster is a group of linked computers, working together closely thus in many respects forming a single computer.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Distributed File System</glossterm>
- <glossdef>
- <para>A file system that allows multiple clients to concurrently access data over a computer network.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Filesystem</glossterm>
- <glossdef>
- <para>A method of storing and organizing computer files and their data. Essentially, it organizes these files into a database for the storage, organization, manipulation, and retrieval by the computer&apos;s operating system.</para>
- <para>Source: <ulink url="http://en.wikipedia.org/wiki/Filesystem">Wikipedia</ulink></para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>FUSE</glossterm>
- <glossdef>
- <para>Filesystem in Userspace (<acronym>FUSE</acronym>) is a loadable kernel module for Unix-like computer operating systems that lets non-privileged users create their own file systems without editing kernel code. This is achieved by running file system code in user space while the <acronym>FUSE</acronym> module provides only a &quot;bridge&quot; to the actual kernel interfaces.</para>
- <para>Source: <ulink url="http://en.wikipedia.org/wiki/Filesystem_in_Userspace">Wikipedia</ulink></para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Geo-Replication</glossterm>
- <glossdef>
- <para>Geo-replication provides a continuous, asynchronous, and incremental replication service from site to another over Local Area Networks (<acronym>LAN</acronym>), Wide Area Network (<acronym>WAN</acronym>), and across the Internet.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>glusterd</glossterm>
- <glossdef>
- <para>The Gluster management daemon that needs to run on all servers in the trusted storage pool.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Metadata</glossterm>
- <glossdef>
- <para>Metadata is data providing information about one or more other pieces of data.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Namespace</glossterm>
- <glossdef>
- <para>Namespace is an abstract container or environment created to hold a logical grouping of unique identifiers or symbols. Each Gluster volume exposes a single namespace as a POSIX mount point that contains every file in the cluster.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Open Source</glossterm>
- <glossdef>
- <para>Open source describes practices in production and development that promote access to the end product&apos;s source materials. Some consider open source a philosophy, others consider it a pragmatic methodology.</para>
- <para>Before the term open source became widely adopted, developers and producers used a variety of phrases to describe the concept; open source gained hold with the rise of the Internet, and the attendant need for massive retooling of the computing source code.</para>
- <para>Opening the source code enabled a self-enhancing diversity of production models, communication paths, and interactive communities. Subsequently, a new, three-word phrase &quot;open source software&quot; was born to describe the environment that the new copyright, licensing, domain, and consumer issues created.</para>
- <para>Source: <ulink url="http://en.wikipedia.org/wiki/Open_source">Wikipedia</ulink></para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Petabyte</glossterm>
- <glossdef>
- <para>A petabyte (derived from the SI prefix peta- ) is a unit of information equal to one quadrillion (short scale) bytes, or 1000 terabytes. The unit symbol for the petabyte is PB. The prefix peta- (P) indicates a power of 1000:</para>
- <para>1 PB = 1,000,000,000,000,000 B = 10005 B = 1015 B.</para>
- <para>The term &quot;pebibyte&quot; (<acronym>PiB</acronym>), using a binary prefix, is used for the corresponding power of 1024.</para>
- <para>Source: <ulink url="http://en.wikipedia.org/wiki/Petabyte">Wikipedia</ulink></para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>POSIX</glossterm>
- <glossdef>
- <para>Portable Operating System Interface (for Unix) is the name of a family of related standards specified by the IEEE to define the application programming interface (<acronym>API</acronym>), along with shell and utilities interfaces for software compatible with variants of the Unix operating system. Gluster exports a fully <acronym>POSIX</acronym> compliant file system.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>RAID</glossterm>
- <glossdef>
- <para>Redundant Array of Inexpensive Disks (<acronym>RAID</acronym>) is a technology that provides increased storage reliability through redundancy, combining multiple low-cost, less-reliable disk drives components into a logical unit where all drives in the array are interdependent.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>RRDNS</glossterm>
- <glossdef>
- <para>Round Robin Domain Name Service (<acronym>RRDNS</acronym>) is a method to distribute load across application servers. <acronym>RRDNS</acronym> is implemented by creating multiple A records with the same name and different IP addresses in the zone file of a DNS server.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Trusted Storage Pool</glossterm>
- <glossdef>
- <para>A storage pool is a trusted network of storage servers. When you start the first server, the storage pool consists of that server alone.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Userspace</glossterm>
- <glossdef>
- <para>Applications running in user space don’t directly interact with hardware, instead using the kernel to moderate access. Userspace applications are generally more portable than applications in kernel space. Gluster is a user space application.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Volfile</glossterm>
- <glossdef>
- <para>Volfile is a configuration file used by glusterfs process. Volfile will be usually located at <filename>/etc/glusterd/vols/VOLNAME</filename>.</para>
- </glossdef>
- </glossentry>
- <glossentry>
- <glossterm>Volume</glossterm>
- <glossdef>
- <para>A volume is a logical collection of bricks. Most of the gluster management operations happen on the volume.</para>
- </glossdef>
- </glossentry>
- </glosslist>
-</chapter>
diff --git a/doc/admin-guide/en-US/images/640px-GlusterFS_3.2_Architecture.png b/doc/admin-guide/en-US/images/640px-GlusterFS_3.2_Architecture.png
deleted file mode 100644
index 95f89ec8286..00000000000
--- a/doc/admin-guide/en-US/images/640px-GlusterFS_3.2_Architecture.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Distributed_Replicated_Volume.png b/doc/admin-guide/en-US/images/Distributed_Replicated_Volume.png
deleted file mode 100644
index dfc0a2c5600..00000000000
--- a/doc/admin-guide/en-US/images/Distributed_Replicated_Volume.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Distributed_Striped_Replicated_Volume.png b/doc/admin-guide/en-US/images/Distributed_Striped_Replicated_Volume.png
deleted file mode 100644
index d286fa99e94..00000000000
--- a/doc/admin-guide/en-US/images/Distributed_Striped_Replicated_Volume.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Distributed_Striped_Volume.png b/doc/admin-guide/en-US/images/Distributed_Striped_Volume.png
deleted file mode 100644
index 752fa982fa6..00000000000
--- a/doc/admin-guide/en-US/images/Distributed_Striped_Volume.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Distributed_Volume.png b/doc/admin-guide/en-US/images/Distributed_Volume.png
deleted file mode 100644
index 4386ca935b9..00000000000
--- a/doc/admin-guide/en-US/images/Distributed_Volume.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Geo-Rep03_Internet.png b/doc/admin-guide/en-US/images/Geo-Rep03_Internet.png
deleted file mode 100644
index 3cd0eaded02..00000000000
--- a/doc/admin-guide/en-US/images/Geo-Rep03_Internet.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Geo-Rep04_Cascading.png b/doc/admin-guide/en-US/images/Geo-Rep04_Cascading.png
deleted file mode 100644
index 54bf9f05cff..00000000000
--- a/doc/admin-guide/en-US/images/Geo-Rep04_Cascading.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Geo-Rep_LAN.png b/doc/admin-guide/en-US/images/Geo-Rep_LAN.png
deleted file mode 100644
index a74f6dbb50a..00000000000
--- a/doc/admin-guide/en-US/images/Geo-Rep_LAN.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Geo-Rep_WAN.png b/doc/admin-guide/en-US/images/Geo-Rep_WAN.png
deleted file mode 100644
index d72d72768bc..00000000000
--- a/doc/admin-guide/en-US/images/Geo-Rep_WAN.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/GlusterFS_3.2_Architecture.png b/doc/admin-guide/en-US/images/GlusterFS_3.2_Architecture.png
deleted file mode 100644
index b506db1f4e7..00000000000
--- a/doc/admin-guide/en-US/images/GlusterFS_3.2_Architecture.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Hadoop_Architecture.png b/doc/admin-guide/en-US/images/Hadoop_Architecture.png
deleted file mode 100644
index 8725bd330bb..00000000000
--- a/doc/admin-guide/en-US/images/Hadoop_Architecture.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Replicated_Volume.png b/doc/admin-guide/en-US/images/Replicated_Volume.png
deleted file mode 100644
index 135a63f345a..00000000000
--- a/doc/admin-guide/en-US/images/Replicated_Volume.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Striped_Replicated_Volume.png b/doc/admin-guide/en-US/images/Striped_Replicated_Volume.png
deleted file mode 100644
index ee88af73134..00000000000
--- a/doc/admin-guide/en-US/images/Striped_Replicated_Volume.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/Striped_Volume.png b/doc/admin-guide/en-US/images/Striped_Volume.png
deleted file mode 100644
index 63a84b242ab..00000000000
--- a/doc/admin-guide/en-US/images/Striped_Volume.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/UFO_Architecture.png b/doc/admin-guide/en-US/images/UFO_Architecture.png
deleted file mode 100644
index be85d7b2825..00000000000
--- a/doc/admin-guide/en-US/images/UFO_Architecture.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/VSA_Architecture.png b/doc/admin-guide/en-US/images/VSA_Architecture.png
deleted file mode 100644
index c3ab80cf3e8..00000000000
--- a/doc/admin-guide/en-US/images/VSA_Architecture.png
+++ /dev/null
Binary files differ
diff --git a/doc/admin-guide/en-US/images/arhitecture.png b/doc/admin-guide/en-US/images/arhitecture.png
deleted file mode 100644
index 4e5188bf8dd..00000000000
--- a/doc/admin-guide/en-US/images/arhitecture.png
+++ /dev/null
@@ -1,13 +0,0 @@
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml">
-<head>
-<title>HTTP Error 403</title>
-</head>
-<body>
-<h1>Error 403</h1>
-<p>We're sorry, but we could not fulfill your request for
-/community/documentation/index.php/Image:GlusterFS_3.2_Architecture.png on this server.</p>
-<p>An invalid request was received from your browser. This may be caused by a malfunctioning proxy server or browser privacy software.</p>
-<p>Your technical support key is: <strong>7ab5-0b6a-1756-6707</strong></p>
-<p>You can use this key to <a href="http://www.ioerror.us/bb2-support-key?key=7ab5-0b6a-1756-6707">fix this problem yourself</a>.</p>
-<p>If you are unable to fix the problem yourself, please contact <a href="mailto:webmaster+nospam@nospam.gluster.com">webmaster at gluster.com</a> and be sure to provide the technical support key shown above.</p>
diff --git a/doc/admin-guide/en-US/images/icon.svg b/doc/admin-guide/en-US/images/icon.svg
deleted file mode 100644
index b2f16d0f61d..00000000000
--- a/doc/admin-guide/en-US/images/icon.svg
+++ /dev/null
@@ -1,19 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?>
-<svg xmlns:svg="http://www.w3.org/2000/svg" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.0" width="32" height="32" id="svg3017">
- <defs id="defs3019">
- <linearGradient id="linearGradient2381">
- <stop id="stop2383" style="stop-color:#ffffff;stop-opacity:1" offset="0"/>
- <stop id="stop2385" style="stop-color:#ffffff;stop-opacity:0" offset="1"/>
- </linearGradient>
- <linearGradient x1="296.4996" y1="188.81061" x2="317.32471" y2="209.69398" id="linearGradient2371" xlink:href="#linearGradient2381" gradientUnits="userSpaceOnUse" gradientTransform="matrix(0.90776,0,0,0.90776,24.35648,49.24131)"/>
- </defs>
- <g transform="matrix(0.437808,-0.437808,0.437808,0.437808,-220.8237,43.55311)" id="g5089">
- <path d="m 8.4382985,-6.28125 c -0.6073916,0 -4.3132985,5.94886271 -4.3132985,8.25 l 0,26.71875 c 0,0.846384 0.5818159,1.125 1.15625,1.125 l 25.5625,0 c 0.632342,0 1.125001,-0.492658 1.125,-1.125 l 0,-5.21875 0.28125,0 c 0.49684,0 0.906249,-0.409411 0.90625,-0.90625 l 0,-27.9375 c 0,-0.4968398 -0.40941,-0.90625 -0.90625,-0.90625 l -23.8117015,0 z" transform="translate(282.8327,227.1903)" id="path5091" style="fill:#5c5c4f;stroke:#000000;stroke-width:3.23021388;stroke-miterlimit:4;stroke-dasharray:none"/>
- <rect width="27.85074" height="29.369793" rx="1.1414107" ry="1.1414107" x="286.96509" y="227.63805" id="rect5093" style="fill:#032c87"/>
- <path d="m 288.43262,225.43675 25.2418,0 0,29.3698 -26.37615,0.0241 1.13435,-29.39394 z" id="rect5095" style="fill:#ffffff"/>
- <path d="m 302.44536,251.73726 c 1.38691,7.85917 -0.69311,11.28365 -0.69311,11.28365 2.24384,-1.60762 3.96426,-3.47694 4.90522,-5.736 0.96708,2.19264 1.83294,4.42866 4.27443,5.98941 0,0 -1.59504,-7.2004 -1.71143,-11.53706 l -6.77511,0 z" id="path5097" style="fill:#a70000;fill-opacity:1;stroke-width:2"/>
- <rect width="25.241802" height="29.736675" rx="0.89682275" ry="0.89682275" x="290.73544" y="220.92249" id="rect5099" style="fill:#809cc9"/>
- <path d="m 576.47347,725.93939 6.37084,0.41502 0.4069,29.51809 c -1.89202,-1.31785 -6.85427,-3.7608 -8.26232,-1.68101 l 0,-26.76752 c 0,-0.82246 0.66212,-1.48458 1.48458,-1.48458 z" transform="matrix(0.499065,-0.866565,0,1,0,0)" id="rect5101" style="fill:#4573b3;fill-opacity:1"/>
- <path d="m 293.2599,221.89363 20.73918,0 c 0.45101,0 0.8141,0.3631 0.8141,0.81411 0.21547,6.32836 -19.36824,21.7635 -22.36739,17.59717 l 0,-17.59717 c 0,-0.45101 0.3631,-0.81411 0.81411,-0.81411 z" id="path5103" style="opacity:0.65536726;fill:url(#linearGradient2371);fill-opacity:1"/>
- </g>
-</svg>
diff --git a/doc/admin-guide/publican.cfg b/doc/admin-guide/publican.cfg
deleted file mode 100644
index e42fa1b3dc8..00000000000
--- a/doc/admin-guide/publican.cfg
+++ /dev/null
@@ -1,12 +0,0 @@
-# Config::Simple 4.59
-# Thu Apr 5 11:09:15 2012
-
-xml_lang: "en-US"
-type: Book
-brand: Gluster_Brand
-prod_url: http://www.gluster.org
-doc_url: http://www.gluster.com/community/documentation/index.php/Main_Page
-condition: gfs
-show_remarks: 1
-
-
diff --git a/doc/debugging/analyzing-regression-cores.md b/doc/debugging/analyzing-regression-cores.md
new file mode 100644
index 00000000000..5e10f41c6eb
--- /dev/null
+++ b/doc/debugging/analyzing-regression-cores.md
@@ -0,0 +1,54 @@
+# Analyzing Regression Cores
+This document explains how to analyze core-dumps obtained from regression machines, with examples.
+1. Download the core-tarball and extract it.
+2. `cd` into directory where the tarball is extracted.
+```
+[sh]# pwd
+/home/user/Downloads
+[sh]# ls
+build build-install-20150625_05_42_39.tar.bz2 lib64 usr
+```
+3. Determine the core file you need to examine. There can be more than one core file. You can list them from './build/install/cores' directory.
+```
+[sh]# ls build/install/cores/
+core.9341 liblist.txt liblist.txt.tmp
+```
+In case you are unsure which binary generated the core-file, executing 'file' command on it will help.
+```
+[sh]# file ./build/install/cores/core.9341
+./build/install/cores/core.9341: ELF 64-bit LSB core file x86-64, version 1 (SYSV), SVR4-style, from '/build/install/sbin/glusterfsd -s slave26.cloud.gluster.org --volfile-id patchy'
+```
+As seen, the core file was generated by glusterfsd binary, and path to it is provided (/build/install/sbin/glusterfsd).
+
+4. Now, run the following command on the core:
+```
+gdb -ex 'set sysroot ./' -ex 'core-file ./build/install/cores/core.xxx' <target, say ./build/install/sbin/glusterd>
+In this case,
+gdb -ex 'set sysroot ./' -ex 'core-file ./build/install/cores/core.9341' ./build/install/sbin/glusterfsd
+```
+5. You can cross check if all shared libraries are available and loaded by using 'info sharedlibrary' command from inside gdb.
+6. Once verified, usual gdb commands based on requirement can be used to debug the core.
+ `bt` or `backtrace` from gdb of core used in examples:
+```
+Core was generated by `/build/install/sbin/glusterfsd -s slave26.cloud.gluster.org --volfile-id patchy'.
+Program terminated with signal SIGABRT, Aborted.
+#0 0x00007f512a54e625 in raise () from ./lib64/libc.so.6
+(gdb) bt
+#0 0x00007f512a54e625 in raise () from ./lib64/libc.so.6
+#1 0x00007f512a54fe05 in abort () from ./lib64/libc.so.6
+#2 0x00007f512a54774e in __assert_fail_base () from ./lib64/libc.so.6
+#3 0x00007f512a547810 in __assert_fail () from ./lib64/libc.so.6
+#4 0x00007f512b9fc434 in __gf_free (free_ptr=0x7f50f4000e50) at /home/jenkins/root/workspace/rackspace-regression-2GB-triggered/libglusterfs/src/mem-pool.c:304
+#5 0x00007f512b9b6657 in loc_wipe (loc=0x7f510c20d1a0) at /home/jenkins/root/workspace/rackspace-regression-2GB-triggered/libglusterfs/src/xlator.c:685
+#6 0x00007f511cb8201d in mq_start_quota_txn_v2 (this=0x7f5118019b60, loc=0x7f510c20d2b8, ctx=0x7f50f4000bf0, contri=0x7f50f4000d60)
+ at /home/jenkins/root/workspace/rackspace-regression-2GB-triggered/xlators/features/marker/src/marker-quota.c:2921
+#7 0x00007f511cb82c55 in mq_initiate_quota_task (opaque=0x7f510c20d2b0) at /home/jenkins/root/workspace/rackspace-regression-2GB-triggered/xlators/features/marker/src/marker-quota.c:3199
+#8 0x00007f511cb81820 in mq_synctask (this=0x7f5118019b60, task=0x7f511cb829fa <mq_initiate_quota_task>, spawn=_gf_false, loc=0x7f510c20d430, dict=0x0, buf=0x0, contri=0)
+ at /home/jenkins/root/workspace/rackspace-regression-2GB-triggered/xlators/features/marker/src/marker-quota.c:2789
+#9 0x00007f511cb82f82 in mq_initiate_quota_blocking_txn (this=0x7f5118019b60, loc=0x7f510c20d430) at /home/jenkins/root/workspace/rackspace-regression-2GB-triggered/xlators/features/marker/src/marker-quota.c:3230
+#10 0x00007f511cb82844 in mq_reduce_parent_size_task (opaque=0x7f510c000df0) at /home/jenkins/root/workspace/rackspace-regression-2GB-triggered/xlators/features/marker/src/marker-quota.c:3117
+#11 0x00007f512ba0f9dc in synctask_wrap (old_task=0x7f510c0053e0) at /home/jenkins/root/workspace/rackspace-regression-2GB-triggered/libglusterfs/src/syncop.c:370
+#12 0x00007f512a55f8f0 in ?? () from ./lib64/libc.so.6
+#13 0x0000000000000000 in ?? ()
+(gdb)
+```
diff --git a/doc/debugging/gfid-to-path.md b/doc/debugging/gfid-to-path.md
new file mode 100644
index 00000000000..1917bf2cca1
--- /dev/null
+++ b/doc/debugging/gfid-to-path.md
@@ -0,0 +1,68 @@
+# Convert GFID to Path
+
+GlusterFS internal file identifier (GFID) is a uuid that is unique to each
+file across the entire cluster. This is analogous to inode number in a
+normal filesystem. The GFID of a file is stored in its xattr named
+`trusted.gfid`.
+
+#### Special mount using [gfid-access translator][1]:
+```
+mount -t glusterfs -o aux-gfid-mount vm1:test /mnt/testvol
+```
+
+Assuming, you have `GFID` of a file from changelog (or somewhere else).
+For trying this out, you can get `GFID` of a file from mountpoint:
+```
+getfattr -n glusterfs.gfid.string /mnt/testvol/dir/file
+```
+
+
+---
+### Get file path from GFID (Method 1):
+**(Lists hardlinks delimited by `:`, returns path as seen from mountpoint)**
+
+#### Turn on build-pgfid option
+```
+gluster volume set test build-pgfid on
+```
+Read virtual xattr `glusterfs.ancestry.path` which contains the file path
+```
+getfattr -n glusterfs.ancestry.path -e text /mnt/testvol/.gfid/<GFID>
+```
+
+**Example:**
+```
+[root@vm1 glusterfs]# ls -il /mnt/testvol/dir/
+total 1
+10610563327990022372 -rw-r--r--. 2 root root 3 Jul 17 18:05 file
+10610563327990022372 -rw-r--r--. 2 root root 3 Jul 17 18:05 file3
+
+[root@vm1 glusterfs]# getfattr -n glusterfs.gfid.string /mnt/testvol/dir/file
+getfattr: Removing leading '/' from absolute path names
+# file: mnt/testvol/dir/file
+glusterfs.gfid.string="11118443-1894-4273-9340-4b212fa1c0e4"
+
+[root@vm1 glusterfs]# getfattr -n glusterfs.ancestry.path -e text /mnt/testvol/.gfid/11118443-1894-4273-9340-4b212fa1c0e4
+getfattr: Removing leading '/' from absolute path names
+# file: mnt/testvol/.gfid/11118443-1894-4273-9340-4b212fa1c0e4
+glusterfs.ancestry.path="/dir/file:/dir/file3"
+```
+
+---
+### Get file path from GFID (Method 2):
+**(Does not list all hardlinks, returns backend brick path)**
+```
+getfattr -n trusted.glusterfs.pathinfo -e text /mnt/testvol/.gfid/<GFID>
+```
+
+**Example:**
+```
+[root@vm1 glusterfs]# getfattr -n trusted.glusterfs.pathinfo -e text /mnt/testvol/.gfid/11118443-1894-4273-9340-4b212fa1c0e4
+getfattr: Removing leading '/' from absolute path names
+# file: mnt/testvol/.gfid/11118443-1894-4273-9340-4b212fa1c0e4
+trusted.glusterfs.pathinfo="(<DISTRIBUTE:test-dht> <POSIX(/mnt/brick-test/b):vm1:/mnt/brick-test/b/dir//file3>)"
+```
+
+---
+#### References and links:
+[posix: placeholders for GFID to path conversion](http://review.gluster.org/5951)
diff --git a/doc/debugging/mem-alloc-list.md b/doc/debugging/mem-alloc-list.md
new file mode 100644
index 00000000000..1c68e65d323
--- /dev/null
+++ b/doc/debugging/mem-alloc-list.md
@@ -0,0 +1,19 @@
+## Viewing Memory Allocations
+
+While statedumps provide stats of the number of allocations, size etc for a
+particular mem type, there is no easy way to examine all the allocated objects of that type
+in memory.Being able to view this information could help with determining how an object is used,
+and if there are any memory leaks.
+
+The mem_acct_rec structures have been updated to include lists to which the allocated object is
+added. These can be examined in gdb using simple scripts.
+
+`gdb> plist xl->mem_acct.rec[$type]->obj_list`
+
+will print out the pointers of all allocations of $type.
+
+These changes are primarily targeted at developers and need to enabled
+at compile-time using `configure --enable-debug`.
+
+
+
diff --git a/doc/debugging/split-brain.md b/doc/debugging/split-brain.md
new file mode 100644
index 00000000000..6b122c40551
--- /dev/null
+++ b/doc/debugging/split-brain.md
@@ -0,0 +1,264 @@
+# Steps to recover from File split-brain
+This document contains steps to recover from a file split-brain.
+## Quick Start:
+### Step 1. Get the path of the file that is in split-brain:
+It can be obtained either by
+1. The command `gluster volume heal info split-brain`.
+2. Identify the files for which file operations performed from the client keep failing with Input/Output error.
+
+### Step 2. Close the applications that opened this file from the mount point.
+In case of VMs, they need to be powered-off.
+
+### Step 3. Decide on the correct copy:
+This is done by observing the afr changelog extended attributes of the file on
+the bricks using the getfattr command; then identifying the type of split-brain
+(data split-brain, metadata split-brain, entry split-brain or split-brain due to
+gfid-mismatch); and finally determining which of the bricks contains the 'good copy'
+of the file.
+```
+getfattr -d -m . -e hex <file-path-on-brick>
+```
+
+It is also possible that one brick might contain the correct data while the
+other might contain the correct metadata.
+
+### Step 4. Reset the relevant extended attribute on the brick(s) that contains the 'bad copy' of the file data/metadata using the setfattr command.
+```
+setfattr -n <attribute-name> -v <attribute-value> <file-path-on-brick>
+```
+
+### Step 5. Trigger self-heal on the file by performing lookup from the client:
+```
+ls -l <file-path-on-gluster-mount>
+```
+
+Detailed Instructions for steps 3 through 5:
+===========================================
+To understand how to resolve split-brain we need to know how to interpret the
+afr changelog extended attributes.
+
+Execute `getfattr -d -m . -e hex <file-path-on-brick>`
+
+Example:
+```
+[root@store3 ~]# getfattr -d -e hex -m. brick-a/file.txt
+\#file: brick-a/file.txt
+security.selinux=0x726f6f743a6f626a6563745f723a66696c655f743a733000
+trusted.afr.vol-client-2=0x000000000000000000000000
+trusted.afr.vol-client-3=0x000000000200000000000000
+trusted.gfid=0x307a5c9efddd4e7c96e94fd4bcdcbd1b
+```
+
+The extended attributes with `trusted.afr.<volname>-client-<subvolume-index>`
+are used by afr to maintain changelog of the file.The values of the
+`trusted.afr.<volname>-client-<subvolume-index>` are calculated by the glusterfs
+client (fuse or nfs-server) processes. When the glusterfs client modifies a file
+or directory, the client contacts each brick and updates the changelog extended
+attribute according to the response of the brick.
+
+`subvolume-index` is nothing but (brick number - 1) in
+`gluster volume info <volname>` output.
+
+Example:
+```
+[root@pranithk-laptop ~]# gluster volume info vol
+ Volume Name: vol
+ Type: Distributed-Replicate
+ Volume ID: 4f2d7849-fbd6-40a2-b346-d13420978a01
+ Status: Created
+ Number of Bricks: 4 x 2 = 8
+ Transport-type: tcp
+ Bricks:
+ brick-a: pranithk-laptop:/gfs/brick-a
+ brick-b: pranithk-laptop:/gfs/brick-b
+ brick-c: pranithk-laptop:/gfs/brick-c
+ brick-d: pranithk-laptop:/gfs/brick-d
+ brick-e: pranithk-laptop:/gfs/brick-e
+ brick-f: pranithk-laptop:/gfs/brick-f
+ brick-g: pranithk-laptop:/gfs/brick-g
+ brick-h: pranithk-laptop:/gfs/brick-h
+```
+
+In the example above:
+```
+Brick | Replica set | Brick subvolume index
+----------------------------------------------------------------------------
+-/gfs/brick-a | 0 | 0
+-/gfs/brick-b | 0 | 1
+-/gfs/brick-c | 1 | 2
+-/gfs/brick-d | 1 | 3
+-/gfs/brick-e | 2 | 4
+-/gfs/brick-f | 2 | 5
+-/gfs/brick-g | 3 | 6
+-/gfs/brick-h | 3 | 7
+```
+
+Each file in a brick maintains the changelog of itself and that of the files
+present in all the other bricks in it's replica set as seen by that brick.
+
+In the example volume given above, all files in brick-a will have 2 entries,
+one for itself and the other for the file present in it's replica pair, i.e.brick-b:
+```
+trusted.afr.vol-client-0=0x000000000000000000000000 -->changelog for itself (brick-a)
+trusted.afr.vol-client-1=0x000000000000000000000000 -->changelog for brick-b as seen by brick-a
+```
+Likewise, all files in brick-b will have:
+```
+trusted.afr.vol-client-0=0x000000000000000000000000 -->changelog for brick-a as seen by brick-b
+trusted.afr.vol-client-1=0x000000000000000000000000 -->changelog for itself (brick-b)
+```
+
+The same can be extended for other replica pairs.
+
+Interpreting Changelog (roughly pending operation count) Value:
+Each extended attribute has a value which is 24 hexa decimal digits.
+First 8 digits represent changelog of data. Second 8 digits represent changelog
+of metadata. Last 8 digits represent Changelog of directory entries.
+
+Pictorially representing the same, we have:
+```
+0x 000003d7 00000001 00000000
+ | | |
+ | | \_ changelog of directory entries
+ | \_ changelog of metadata
+ \ _ changelog of data
+```
+
+
+For Directories metadata and entry changelogs are valid.
+For regular files data and metadata changelogs are valid.
+For special files like device files etc metadata changelog is valid.
+When a file split-brain happens it could be either data split-brain or
+meta-data split-brain or both. When a split-brain happens the changelog of the
+file would be something like this:
+
+Example:(Lets consider both data, metadata split-brain on same file).
+```
+[root@pranithk-laptop vol]# getfattr -d -m . -e hex /gfs/brick-?/a
+getfattr: Removing leading '/' from absolute path names
+\#file: gfs/brick-a/a
+trusted.afr.vol-client-0=0x000000000000000000000000
+trusted.afr.vol-client-1=0x000003d70000000100000000
+trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
+\#file: gfs/brick-b/a
+trusted.afr.vol-client-0=0x000003b00000000100000000
+trusted.afr.vol-client-1=0x000000000000000000000000
+trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
+```
+
+### Observations:
+
+#### According to changelog extended attributes on file /gfs/brick-a/a:
+The first 8 digits of trusted.afr.vol-client-0 are all
+zeros (0x00000000................), and the first 8 digits of
+trusted.afr.vol-client-1 are not all zeros (0x000003d7................).
+So the changelog on /gfs/brick-a/a implies that some data operations succeeded
+on itself but failed on /gfs/brick-b/a.
+
+The second 8 digits of trusted.afr.vol-client-0 are
+all zeros (0x........00000000........), and the second 8 digits of
+trusted.afr.vol-client-1 are not all zeros (0x........00000001........).
+So the changelog on /gfs/brick-a/a implies that some metadata operations succeeded
+on itself but failed on /gfs/brick-b/a.
+
+#### According to Changelog extended attributes on file /gfs/brick-b/a:
+The first 8 digits of trusted.afr.vol-client-0 are not all
+zeros (0x000003b0................), and the first 8 digits of
+trusted.afr.vol-client-1 are all zeros (0x00000000................).
+So the changelog on /gfs/brick-b/a implies that some data operations succeeded
+on itself but failed on /gfs/brick-a/a.
+
+The second 8 digits of trusted.afr.vol-client-0 are not
+all zeros (0x........00000001........), and the second 8 digits of
+trusted.afr.vol-client-1 are all zeros (0x........00000000........).
+So the changelog on /gfs/brick-b/a implies that some metadata operations succeeded
+on itself but failed on /gfs/brick-a/a.
+
+Since both the copies have data, metadata changes that are not on the other
+file, it is in both data and metadata split-brain.
+
+Deciding on the correct copy:
+-----------------------------
+The user may have to inspect stat,getfattr output of the files to decide which
+metadata to retain and contents of the file to decide which data to retain.
+Continuing with the example above, lets say we want to retain the data
+of /gfs/brick-a/a and metadata of /gfs/brick-b/a.
+
+Resetting the relevant changelogs to resolve the split-brain:
+-------------------------------------------------------------
+For resolving data-split-brain:
+We need to change the changelog extended attributes on the files as if some data
+operations succeeded on /gfs/brick-a/a but failed on /gfs/brick-b/a. But
+/gfs/brick-b/a should NOT have any changelog which says some data operations
+succeeded on /gfs/brick-b/a but failed on /gfs/brick-a/a. We need to reset the
+data part of the changelog on trusted.afr.vol-client-0 of /gfs/brick-b/a.
+
+For resolving metadata-split-brain:
+We need to change the changelog extended attributes on the files as if some
+metadata operations succeeded on /gfs/brick-b/a but failed on /gfs/brick-a/a.
+But /gfs/brick-a/a should NOT have any changelog which says some metadata
+operations succeeded on /gfs/brick-a/a but failed on /gfs/brick-b/a.
+We need to reset metadata part of the changelog on
+trusted.afr.vol-client-1 of /gfs/brick-a/a
+
+So, the intended changes are:
+On /gfs/brick-b/a:
+For trusted.afr.vol-client-0
+0x000003b00000000100000000 to 0x000000000000000100000000
+(Note that the metadata part is still not all zeros)
+Hence execute
+`setfattr -n trusted.afr.vol-client-0 -v 0x000000000000000100000000 /gfs/brick-b/a`
+
+On /gfs/brick-a/a:
+For trusted.afr.vol-client-1
+0x0000000000000000ffffffff to 0x000003d70000000000000000
+(Note that the data part is still not all zeros)
+Hence execute
+`setfattr -n trusted.afr.vol-client-1 -v 0x000003d70000000000000000 /gfs/brick-a/a`
+
+Thus after the above operations are done, the changelogs look like this:
+```
+[root@pranithk-laptop vol]# getfattr -d -m . -e hex /gfs/brick-?/a
+getfattr: Removing leading '/' from absolute path names
+\#file: gfs/brick-a/a
+trusted.afr.vol-client-0=0x000000000000000000000000
+trusted.afr.vol-client-1=0x000003d70000000000000000
+trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
+
+\#file: gfs/brick-b/a
+trusted.afr.vol-client-0=0x000000000000000100000000
+trusted.afr.vol-client-1=0x000000000000000000000000
+trusted.gfid=0x80acdbd886524f6fbefa21fc356fed57
+```
+
+Triggering Self-heal:
+---------------------
+Perform `ls -l <file-path-on-gluster-mount>` to trigger healing.
+
+Fixing Directory entry split-brain:
+----------------------------------
+Afr has the ability to conservatively merge different entries in the directories
+when there is a split-brain on directory.
+If on one brick directory 'd' has entries '1', '2' and has entries '3', '4' on
+the other brick then afr will merge all of the entries in the directory to have
+'1', '2', '3', '4' entries in the same directory.
+(Note: this may result in deleted files to re-appear in case the split-brain
+happens because of deletion of files on the directory)
+Split-brain resolution needs human intervention when there is at least one entry
+which has same file name but different gfid in that directory.
+Example:
+On brick-a the directory has entries '1' (with gfid g1), '2' and on brick-b
+directory has entries '1' (with gfid g2) and '3'.
+These kinds of directory split-brains need human intervention to resolve.
+The user needs to remove either file '1' on brick-a or the file '1' on brick-b
+to resolve the split-brain. In addition, the corresponding gfid-link file also
+needs to be removed.The gfid-link files are present in the .glusterfs folder
+in the top-level directory of the brick. If the gfid of the file is
+0x307a5c9efddd4e7c96e94fd4bcdcbd1b (the trusted.gfid extended attribute got
+from the getfattr command earlier),the gfid-link file can be found at
+`/gfs/brick-a/.glusterfs/30/7a/307a5c9efddd4e7c96e94fd4bcdcbd1b`
+
+#### Word of caution:
+Before deleting the gfid-link, we have to ensure that there are no hard links
+to the file present on that brick. If hard-links exist,they must be deleted as
+well.
diff --git a/doc/debugging/statedump.md b/doc/debugging/statedump.md
new file mode 100644
index 00000000000..9dfdce15fad
--- /dev/null
+++ b/doc/debugging/statedump.md
@@ -0,0 +1,414 @@
+# Statedump
+Statedump is a file generated by glusterfs process with different data structure state which may contain the active inodes, fds, mempools, iobufs, memory allocation stats of different types of datastructures per xlator etc.
+
+## How to generate statedump
+We can find the directory where statedump files are created using `gluster --print-statedumpdir` command.
+Create that directory if not already present based on the type of installation.
+Lets call this directory `statedump-directory`.
+
+We can generate statedump using `kill -USR1 <pid-of-gluster-process>`.
+gluster-process is nothing but glusterd/glusterfs/glusterfsd process.
+
+There are also commands to generate statedumps for brick processes/nfs server/quotad
+
+For bricks:
+```
+gluster volume statedump <volname>
+```
+
+For nfs server:
+```
+gluster volume statedump <volname> nfs
+```
+
+For quotad:
+```
+gluster volume statedump <volname> quotad
+```
+
+For brick-processes files will be created in `statedump-directory` with name of the file as `hyphenated-brick-path.<pid>.dump.timestamp`. For all other processes it will be `glusterdump.<pid>.dump.timestamp`.
+
+For applications using libgfapi, `SIGUSR1` cannot be used, eg: smbd/libvirtd
+processes could have used the `SIGUSR1` signal already for other purposes.
+To generate statedump for the processes, using libgfapi, below command can be
+executed from one of the nodes in the gluster cluster to which the libgfapi
+application is connected to.
+```
+gluster volume statedump <volname> client <hostname>:<process id>
+```
+The statedumps can be found in the `statedump-directory`, the name of the
+statedumps being `glusterdump.<pid>.dump.timestamp`. For a process there can be
+multiple such files created depending on the number of times the volume is
+accessed by the process (related to the number of `glfs_init()` calls).
+
+## How to read statedump
+We shall see snippets of each type of statedump.
+
+First and last lines of the file have starting and ending time of writing the statedump file. Times will be in UTC timezone.
+
+mallinfo return status is printed in the following format. Please read man mallinfo for more information about what each field means.
+### Mallinfo
+```
+[mallinfo]
+mallinfo_arena=100020224 /* Non-mmapped space allocated (bytes) */
+mallinfo_ordblks=69467 /* Number of free chunks */
+mallinfo_smblks=449 /* Number of free fastbin blocks */
+mallinfo_hblks=13 /* Number of mmapped regions */
+mallinfo_hblkhd=20144128 /* Space allocated in mmapped regions (bytes) */
+mallinfo_usmblks=0 /* Maximum total allocated space (bytes) */
+mallinfo_fsmblks=39264 /* Space in freed fastbin blocks (bytes) */
+mallinfo_uordblks=96710112 /* Total allocated space (bytes) */
+mallinfo_fordblks=3310112 /* Total free space (bytes) */
+mallinfo_keepcost=133712 /* Top-most, releasable space (bytes) */
+```
+
+### Data structure allocation stats
+For every xlator data structure memory per translator loaded in the call-graph is displayed in the following format:
+
+For xlator with name: glusterfs
+```
+[global.glusterfs - Memory usage] #[global.xlator-name - Memory usage]
+num_types=119 #It shows the number of data types it is using
+```
+
+Now for each data-type it prints the memory usage.
+
+```
+[global.glusterfs - usage-type gf_common_mt_gf_timer_t memusage]
+#[global.xlator-name - usage-type <tag associated with the data-type> memusage]
+size=112 #num_allocs times the sizeof(data-type) i.e. num_allocs * sizeof (data-type)
+num_allocs=2 #Number of allocations of the data-type which are active at the time of taking statedump.
+max_size=168 #max_num_allocs times the sizeof(data-type) i.e. max_num_allocs * sizeof (data-type)
+max_num_allocs=3 #Maximum number of active allocations at any point in the life of the process.
+total_allocs=7 #Number of times this data is allocated in the life of the process.
+```
+
+### Mempools
+
+Mempools are optimization to reduce the number of allocations of a data type. If we create a mem-pool of lets say 1024 elements for a data-type, new elements will be allocated from heap using syscalls like calloc, only if all the 1024 elements in the pool are in active use.
+
+Memory pool allocated by each xlator is displayed in the following format:
+
+```
+[mempool] #Section name
+-----=-----
+pool-name=fuse:fd_t #pool-name=<xlator-name>:<data-type>
+hot-count=1 #number of mempool elements that are in active use. i.e. for this pool it is the number of 'fd_t' s in active use.
+cold-count=1023 #number of mempool elements that are not in use. If a new allocation is required it will be served from here until all the elements in the pool are in use i.e. cold-count becomes 0.
+padded_sizeof=108 #Each mempool element is padded with a doubly-linked-list + ptr of mempool + is-in-use info to operate the pool of elements, this size is the element-size after padding
+pool-misses=0 #Number of times the element had to be allocated from heap because all elements from the pool are in active use.
+alloc-count=314 #Number of times this type of data is allocated through out the life of this process. This may include pool-misses as well.
+max-alloc=3 #Maximum number of elements from the pool in active use at any point in the life of the process. This does *not* include pool-misses.
+cur-stdalloc=0 #Denotes the number of allocations made from heap once cold-count reaches 0, that are yet to be released via mem_put().
+max-stdalloc=0 #Maximum number of allocations from heap that are in active use at any point in the life of the process.
+```
+
+### Iobufs
+```
+[iobuf.global]
+iobuf_pool=0x1f0d970 #The memory pool for iobufs
+iobuf_pool.default_page_size=131072 #The default size of iobuf (if no iobuf size is specified the default size is allocated)
+#iobuf_arena: One arena represents a group of iobufs of a particular size
+iobuf_pool.arena_size=12976128 # The initial size of the iobuf pool (doesn't include the stdalloc'd memory or the newly added arenas)
+iobuf_pool.arena_cnt=8 #Total number of arenas in the pool
+iobuf_pool.request_misses=0 #The number of iobufs that were stdalloc'd (as they exceeded the default max page size provided by iobuf_pool).
+```
+
+There are 3 lists of arenas:
+
+1. Arena list: arenas allocated during iobuf pool creation and the arenas that are in use(active_cnt != 0) will be part of this list.
+2. Purge list: arenas that can be purged(no active iobufs, active_cnt == 0).
+3. Filled list: arenas without free iobufs.
+
+```
+[purge.1] #purge.<S.No.>
+purge.1.mem_base=0x7fc47b35f000 #The address of the arena structure
+purge.1.active_cnt=0 #The number of iobufs active in that arena
+purge.1.passive_cnt=1024 #The number of unused iobufs in the arena
+purge.1.alloc_cnt=22853 #Total allocs in this pool(number of times the iobuf was allocated from this arena)
+purge.1.max_active=7 #Max active iobufs from this arena, at any point in the life of this process.
+purge.1.page_size=128 #Size of all the iobufs in this arena.
+
+[arena.5] #arena.<S.No.>
+arena.5.mem_base=0x7fc47af1f000
+arena.5.active_cnt=0
+arena.5.passive_cnt=64
+arena.5.alloc_cnt=0
+arena.5.max_active=0
+arena.5.page_size=32768
+```
+
+If the active_cnt of any arena is non zero, then the statedump will also have the iobuf list.
+```
+[arena.6.active_iobuf.1] #arena.<S.No>.active_iobuf.<iobuf.S.No.>
+arena.6.active_iobuf.1.ref=1 #refcount of the iobuf
+arena.6.active_iobuf.1.ptr=0x7fdb921a9000 #address of the iobuf
+
+[arena.6.active_iobuf.2]
+arena.6.active_iobuf.2.ref=1
+arena.6.active_iobuf.2.ptr=0x7fdb92189000
+```
+
+At any given point in time if there are lots of filled arenas then that could be a sign of iobuf leaks.
+
+### Call stack
+All the fops received by gluster are handled using call-stacks. Call stack contains the information about uid/gid/pid etc of the process that is executing the fop. Each call-stack contains different call-frames per xlator which handles that fop.
+
+```
+[global.callpool.stack.3] #global.callpool.stack.<Serial-Number>
+stack=0x7fc47a44bbe0 #Stack address
+uid=0 #Uid of the process which is executing the fop
+gid=0 #Gid of the process which is executing the fop
+pid=6223 #Pid of the process which is executing the fop
+unique=2778 #Xlators like afr do copy_frame and perform the operation in a different stack, this id is useful to find out the stacks that are inter-related because of copy-frame
+lk-owner=0000000000000000 #Some of the fuse fops have lk-owner.
+op=LOOKUP #Fop
+type=1 #Type of the op i.e. FOP/MGMT-OP
+cnt=9 #Number of frames in this stack.
+```
+### Call-frame
+Each frame will have information about which xlator the frame belongs to, what is the function it wound to/from and will be unwind to. It also mentions if the unwind happened or not. If we observe hangs in the system and want to find out which xlator is causing it. Take a statedump and see what is the final xlator which is yet to be unwound.
+
+```
+[global.callpool.stack.3.frame.2]#global.callpool.stack.<stack-serial-number>.frame.<frame-serial-number>
+frame=0x7fc47a611dbc #Frame address
+ref_count=0 #Incremented at the time of wind and decremented at the time of unwind.
+translator=r2-client-1 #Xlator this frame belongs to
+complete=0 #if this value is 1 that means this frame is already unwound. 0 if it is yet to unwind.
+parent=r2-replicate-0 #Parent xlator of this frame
+wind_from=afr_lookup #Parent xlator function from which the wind happened
+wind_to=priv->children[i]->fops->lookup
+unwind_to=afr_lookup_cbk #Parent xlator function to which unwind happened
+```
+
+### History of operations in Fuse
+
+Fuse maintains history of operations that happened in fuse.
+
+```
+[xlator.mount.fuse.history]
+TIME=2014-07-09 16:44:57.523364
+message=[0] fuse_release: RELEASE(): 4590:, fd: 0x1fef0d8, gfid: 3afb4968-5100-478d-91e9-76264e634c9f
+
+TIME=2014-07-09 16:44:57.523373
+message=[0] send_fuse_err: Sending Success for operation 18 on inode 3afb4968-5100-478d-91e9-76264e634c9f
+
+TIME=2014-07-09 16:44:57.523394
+message=[0] fuse_getattr_resume: 4591, STAT, path: (/iozone.tmp), gfid: (3afb4968-5100-478d-91e9-76264e634c9f)
+```
+
+### Xlator configuration
+```
+[cluster/replicate.r2-replicate-0] #Xlator type, name information
+child_count=2 #Number of children to the xlator
+#Xlator specific configuration below
+child_up[0]=1
+pending_key[0]=trusted.afr.r2-client-0
+child_up[1]=1
+pending_key[1]=trusted.afr.r2-client-1
+data_self_heal=on
+metadata_self_heal=1
+entry_self_heal=1
+data_change_log=1
+metadata_change_log=1
+entry-change_log=1
+read_child=1
+favorite_child=-1
+wait_count=1
+```
+
+### Graph/inode table
+```
+[active graph - 1]
+
+conn.1.bound_xl./data/brick01a/homegfs.hashsize=14057
+conn.1.bound_xl./data/brick01a/homegfs.name=/data/brick01a/homegfs/inode
+conn.1.bound_xl./data/brick01a/homegfs.lru_limit=16384 #Least recently used size limit
+conn.1.bound_xl./data/brick01a/homegfs.active_size=690 #Number of inodes undergoing some kind of fop to be precise on which there is at least one ref.
+conn.1.bound_xl./data/brick01a/homegfs.lru_size=183 #Number of inodes present in lru list
+conn.1.bound_xl./data/brick01a/homegfs.purge_size=0 #Number of inodes present in purge list
+```
+
+### Inode
+```
+[conn.1.bound_xl./data/brick01a/homegfs.active.324] #324th inode in active inode list
+gfid=e6d337cf-97eb-44b3-9492-379ba3f6ad42 #Gfid of the inode
+nlookup=13 #Number of times lookups happened from the client or from fuse kernel
+fd-count=4 #Number of fds opened on the inode
+ref=11 #Number of refs taken on the inode
+ia_type=1 #Type of the inode. This should be changed to some string :-(
+Ref by xl:.patchy-md-cache=11 #Further this there will be a list of xlators, and the ref count taken by each of them on this inode at the time of statedump
+
+[conn.1.bound_xl./data/brick01a/homegfs.lru.1] #1st inode in lru list. Note that ref count is zero for these inodes.
+gfid=5114574e-69bc-412b-9e52-f13ff087c6fc
+nlookup=5
+fd-count=0
+ref=0
+ia_type=2
+Ref by xl:.fuse=1
+Ref by xl:.patchy-client-0=-1
+```
+### Inode context
+For each inode per xlator some context could be stored. This context can also be printed in the statedump. Here is the inode ctx of locks xlator
+```
+[xlator.features.locks.homegfs-locks.inode]
+path=/homegfs/users/dfrobins/gfstest/r4/SCRATCH/fort.5102 - path of the file
+mandatory=0
+inodelk-count=5 #Number of inode locks
+lock-dump.domain.domain=homegfs-replicate-0:self-heal #Domain name where self-heals take locks to prevent more than one heal on the same file
+inodelk.inodelk[0](ACTIVE)=type=WRITE, whence=0, start=0, len=0, pid = 18446744073709551615, owner=080b1ada117f0000, client=0xb7fc30, connection-id=compute-30-029.com-3505-2014/06/29-14:46:12:477358-homegfs-client-0-0-1, granted at Sun Jun 29 11:01:00 2014 #Active lock information
+
+inodelk.inodelk[1](BLOCKED)=type=WRITE, whence=0, start=0, len=0, pid = 18446744073709551615, owner=c0cb091a277f0000, client=0xad4f10, connection-id=gfs01a.com-4080-2014/06/29-14:41:36:917768-homegfs-client-0-0-0, blocked at Sun Jun 29 11:04:44 2014 #Blocked lock information
+
+lock-dump.domain.domain=homegfs-replicate-0:metadata #Domain name where metadata operations take locks to maintain replication consistency
+lock-dump.domain.domain=homegfs-replicate-0 #Domain name where entry/data operations take locks to maintain replication consistency
+inodelk.inodelk[0](ACTIVE)=type=WRITE, whence=0, start=11141120, len=131072, pid = 18446744073709551615, owner=080b1ada117f0000, client=0xb7fc30, connection-id=compute-30-029.com-3505-2014/06/29-14:46:12:477358-homegfs-client-0-0-1, granted at Sun Jun 29 11:10:36 2014 #Active lock information
+```
+
+## FAQ
+### How to debug Memory leaks using statedump?
+
+#### Using memory accounting feature:
+
+[Bug 1120151](https://bugzilla.redhat.com/show_bug.cgi?id=1120151) is one of the bugs which was debugged using statedump to see which data-structure is leaking. Here is the process used to find what the leak is using statedump. According to the bug the observation is that the process memory usage is increasing whenever one of the bricks is wiped in a replicate volume and a `full` self-heal is invoked to heal the contents. Statedump of the process is taken using `kill -USR1 <pid-of-gluster-self-heal-daemon>`.
+```
+grep -w num_allocs glusterdump.5225.dump.1405493251
+num_allocs=77078
+num_allocs=87070
+num_allocs=117376
+....
+
+grep hot-count glusterdump.5225.dump.1405493251
+hot-count=16384
+hot-count=16384
+hot-count=4095
+....
+
+Find the occurrences in the statedump file to figure out the tags.
+```
+grep of the statedump revealed too many allocations for the following data-types under replicate,
+
+1. gf_common_mt_asprintf
+2. gf_common_mt_char
+3. gf_common_mt_mem_pool.
+
+After checking afr-code for allocations with tag `gf_common_mt_char` found `data-self-heal` code path does not free one such allocated memory. `gf_common_mt_mem_pool` suggests that there is a leak in pool memory. `replicate-0:dict_t`, `glusterfs:data_t` and `glusterfs:data_pair_t` pools are using lot of memory, i.e. cold_count is `0` and too many allocations. Checking source code of dict.c revealed that `key` in `dict` is allocated with `gf_common_mt_char` i.e. `2.` tag and value is created using gf_asprintf which in-turn uses `gf_common_mt_asprintf` i.e. `1.`. Browsing the code for leak in self-heal code paths lead to a line which over-writes a variable with new dictionary even when it was already holding a reference to another dictionary. After fixing these leaks, ran the same test to verify that none of the `num_allocs` are increasing even after healing 10,000 files directory hierarchy in statedump of self-heal daemon.
+Please check this [patch](http://review.gluster.org/8316) for more info about the fix.
+
+#### Debugging leaks in memory pools:
+Statedump output of memory pools was used to test and verify the fixes to [Bug 1134221](https://bugzilla.redhat.com/show_bug.cgi?id=1134221). On code analysis, dict_t objects were found to be leaking (in terms of not being unref'd enough number of times, during name self-heal. The test involved creating 100 files on plain replicate volume, removing them from one of the brick's backend, and then triggering lookup on them from the mount point. Statedump of the mount process was taken before executing the test case and after it, after compiling glusterfs with -DDEBUG flags (to have cold count set to 0 by default).
+
+Statedump output of the fuse mount process before the test case was executed:
+
+```
+
+pool-name=glusterfs:dict_t
+hot-count=0
+cold-count=0
+padded_sizeof=140
+alloc-count=33
+max-alloc=0
+pool-misses=33
+cur-stdalloc=14
+max-stdalloc=18
+
+```
+Statedump output of the fuse mount process after the test case was executed:
+
+```
+
+pool-name=glusterfs:dict_t
+hot-count=0
+cold-count=0
+padded_sizeof=140
+alloc-count=2841
+max-alloc=0
+pool-misses=2841
+cur-stdalloc=214
+max-stdalloc=220
+
+```
+Here, with cold count being 0 by default, `cur-stdalloc` indicated the number of `dict_t` objects that were allocated in heap using `mem_get()`, and yet to be freed using `mem_put()` (refer to this [page](../developer-guide/datastructure-mem-pool.md) for more details on how mempool works). After the test case (name selfheal of 100 files), there was a rise in the cur-stdalloc value (from 14 to 214) for `dict_t`.
+
+After these leaks were fixed, glusterfs was again compiled with -DDEBUG flags, and the same steps were performed again and statedump was taken before and after executing the test case, of the mount. This was done to ascertain the validity of the fix. And the following are the results:
+
+Statedump output of the fuse mount process before executing the test case:
+
+```
+pool-name=glusterfs:dict_t
+hot-count=0
+cold-count=0
+padded_sizeof=140
+alloc-count=33
+max-alloc=0
+pool-misses=33
+cur-stdalloc=14
+max-stdalloc=18
+
+```
+Statedump output of the fuse mount process after executing the test case:
+
+```
+pool-name=glusterfs:dict_t
+hot-count=0
+cold-count=0
+padded_sizeof=140
+alloc-count=2837
+max-alloc=0
+pool-misses=2837
+cur-stdalloc=14
+max-stdalloc=119
+
+```
+The value of cur-stdalloc remained 14 before and after the test, indicating that the fix indeed does what it's supposed to do.
+
+### How to debug hangs because of frame-loss?
+[Bug 994959](https://bugzilla.redhat.com/show_bug.cgi?id=994959) is one of the bugs where statedump was helpful in finding where the frame was lost. Here is the process used to find where the hang is using statedump.
+When the hang was observed, statedumps are taken for all the processes. On mount's statedump the following stack is shown:
+```
+[global.callpool.stack.1.frame.1]
+ref_count=1
+translator=fuse
+complete=0
+
+[global.callpool.stack.1.frame.2]
+ref_count=0
+translator=r2-client-1
+complete=1 <<----- Client xlator completed the readdirp call and unwound to afr
+parent=r2-replicate-0
+wind_from=afr_do_readdir
+wind_to=children[call_child]->fops->readdirp
+unwind_from=client3_3_readdirp_cbk
+unwind_to=afr_readdirp_cbk
+
+[global.callpool.stack.1.frame.3]
+ref_count=0
+translator=r2-replicate-0
+complete=0 <<---- Afr xlator is not unwinding for some reason.
+parent=r2-dht
+wind_from=dht_do_readdir
+wind_to=xvol->fops->readdirp
+unwind_to=dht_readdirp_cbk
+
+[global.callpool.stack.1.frame.4]
+ref_count=1
+translator=r2-dht
+complete=0
+parent=r2-io-cache
+wind_from=ioc_readdirp
+wind_to=FIRST_CHILD(this)->fops->readdirp
+unwind_to=ioc_readdirp_cbk
+
+[global.callpool.stack.1.frame.5]
+ref_count=1
+translator=r2-io-cache
+complete=0
+parent=r2-quick-read
+wind_from=qr_readdirp
+wind_to=FIRST_CHILD (this)->fops->readdirp
+unwind_to=qr_readdirp_cbk
+
+```
+`unwind_to` shows that call was unwound to `afr_readdirp_cbk` from client xlator.
+Inspecting that function revealed that afr is not unwinding the stack when fop failed.
+Check this [patch](http://review.gluster.org/5531) for more info about the fix.
diff --git a/doc/developer-guide/Language-Bindings.md b/doc/developer-guide/Language-Bindings.md
new file mode 100644
index 00000000000..951f5fae2f6
--- /dev/null
+++ b/doc/developer-guide/Language-Bindings.md
@@ -0,0 +1,45 @@
+# Language Bindings
+GlusterFS 3.4 introduced the libgfapi client API for C programs. This
+page lists bindings to the libgfapi C library from other languages.
+
+Go
+--
+
+- [gogfapi](https://github.com/gluster/gogfapi) - Go language bindings
+ for libgfapi, aiming to provide an api consistent with the default
+ Go file apis.
+
+Java
+----
+
+- [libgfapi-jni](https://github.com/semiosis/libgfapi-jni/) - Low
+ level JNI binding for libgfapi
+- [glusterfs-java-filesystem](https://github.com/semiosis/glusterfs-java-filesystem)
+ - High level NIO.2 FileSystem Provider implementation for the Java
+ platform
+- [libgfapi-java-io](https://github.com/gluster/libgfapi-java-io) -
+ Java bindings for libgfapi, similar to java.io
+
+Python
+------
+
+- [libgfapi-python](https://github.com/gluster/libgfapi-python) -
+ Libgfapi bindings for Python
+
+Ruby
+----
+
+- [libgfapi-ruby](https://github.com/spajus/libgfapi-ruby) - Libgfapi
+ bindings for Ruby using FFI
+
+Rust
+----
+
+- [gfapi-sys](https://github.com/cholcombe973/Gfapi-sys) - Libgfapi
+ bindings for Rust using FFI
+
+Perl
+----
+
+- [libgfapi-perl](https://github.com/gluster/libgfapi-perl) - Libgfapi
+ bindings for Perl using FFI
diff --git a/doc/developer-guide/README.md b/doc/developer-guide/README.md
new file mode 100644
index 00000000000..aaf9c7476b0
--- /dev/null
+++ b/doc/developer-guide/README.md
@@ -0,0 +1,81 @@
+Developers
+==========
+
+### From GlusterDocumentation
+
+Contributing to the Gluster community
+-------------------------------------
+
+Are you itching to send in patches and participate as a developer in the
+Gluster community? Here are a number of starting points for getting
+involved. We don't require a signed contributor license agreement or
+copyright assignment, but we do require a "signed-off-by" line on each
+code check-in.
+
+- [License
+ Change](http://www.gluster.org/2012/05/glusterfs-license-change/) -
+ we recently changed the client library code to a dual license under
+ the GPL v2 and the LGPL v3 or later
+- [GlusterFS Coding Standards](./coding-standard.md)
+
+- If you are not sure of where to start, and what to do, we have a small
+ write-up on what you can pick. [Check it out](./options-to-contribute.md)
+
+
+Adding File operations
+----------------------
+
+- [Steps to be followed when adding a new FOP to GlusterFS ](./adding-fops.md)
+
+Automatic File Replication
+--------------------------
+
+- [Cluster/afr translator](./afr.md)
+- [History of Locking in AFR](./afr-locks-evolution.md)
+- [Self heal Daemon](./afr-self-heal-daemon.md)
+
+Data Structures
+---------------
+
+- [inode data structure](./datastructure-inode.md)
+- [iobuf data structure](./datastructure-iobuf.md)
+- [mem-pool data structure](./datastructure-mem-pool.md)
+
+Find the gfapi symbol versions [here](./gfapi-symbol-versions.md)
+
+Daemon Management Framework
+---------------------------
+
+- [How to introduce new daemons using daemon management framework](./daemon-management-framework.md)
+
+Translators
+-----------
+
+- [Performance/write-Behind Translator](./write-behind.md)
+- [Translator Development](./translator-development.md)
+- [Storage/posix Translator](./posix.md)
+
+
+Brick multiplex
+---------------
+
+- [Brick mux resource reduction](./brickmux-thread-reduction.md)
+
+Fuse
+----
+
+- [Interrupt handling](./fuse-interrupt.md)
+
+Testing/Debugging
+-----------------
+
+- [Unit Tests in GlusterFS](./unittest.md)
+- [Using the Gluster Test
+ Framework](./Using-Gluster-Test-Framework.md) - Step by
+ step instructions for running the Gluster Test Framework
+- [Coredump Analysis](../debugging/analyzing-regression-cores.md) - Steps to analize coredumps generated by regression machines.
+- [Identifying Resource Leaks](./identifying-resource-leaks.md)
+
+Release Process
+---------------
+- [Versioning](./versioning.md)
diff --git a/doc/developer-guide/Using-Gluster-Test-Framework.md b/doc/developer-guide/Using-Gluster-Test-Framework.md
new file mode 100644
index 00000000000..d2bb1c391da
--- /dev/null
+++ b/doc/developer-guide/Using-Gluster-Test-Framework.md
@@ -0,0 +1,271 @@
+# USing Gluster Test Framwork
+Description
+-----------
+
+The Gluster Test Framework, is a suite of scripts used for regression
+testing of Gluster.
+
+It runs well on RHEL and CentOS (possibly Fedora too, presently being
+tested), and is automatically run against every patch submitted to
+Gluster [for review](http://review.gluster.org).
+
+The Gluster Test Framework is part of the main Gluster code base, living
+under the "tests" subdirectory:
+
+ http://git.gluster.org/?p=glusterfs.git;a=summary
+
+WARNING
+-------
+
+Running the Gluster Test Framework deletes “/var/lib/glusterd/\*â€.
+
+**DO NOT run it on a server with any data.**
+
+Preparation steps for Ubuntu 14.04 LTS
+--------------------------------------
+
+​1. \# apt-get install dbench git libacl1-dev mock nfs-common
+nfs-kernel-server libtest-harness-perl libyajl-dev xfsprogs psmisc attr
+acl lvm2 rpm
+
+​2. \# apt-get install python-webob python-paste python-sphinx
+
+​3. \# apt-get install autoconf automake bison dos2unix flex libfuse-dev
+libaio-dev libibverbs-dev librdmacm-dev libtool libxml2-dev
+libxml2-utils liblvm2-dev make libssl-dev pkg-config libpython-dev
+python-eventlet python-netifaces python-simplejson python-pyxattr
+libreadline-dev tar
+
+​4) Install cmockery2 from github (https://github.com/lpabon/cmockery2)
+and compile and make install as in Readme
+
+5)
+
+ sudo groupadd mock
+ sudo useradd -g mock mock
+
+​6) mkdir /var/run/gluster
+
+**Note**: redhat-rpm-config package is not found in ubuntu
+
+Preparation steps for CentOS 7 (only)
+-------------------------------------
+
+​1. Install EPEL:
+
+ $ sudo yum install -y http://epel.mirror.net.in/epel/7/x86_64/e/epel-release-7-1.noarch.rpm
+
+​2. Install the CentOS 7.x dependencies:
+
+ $ sudo yum install -y --enablerepo=epel cmockery2-devel dbench git libacl-devel mock nfs-utils perl-Test-Harness yajl xfsprogs psmisc
+
+ $ sudo yum install -y --enablerepo=epel python-webob1.0 python-paste-deploy1.5 python-sphinx10 redhat-rpm-config
+
+==\> Despite below missing packages it worked for me
+
+ No package python-webob1.0 available.
+ No package python-paste-deploy1.5 available.
+ No package python-sphinx10 available.
+
+ $ sudo yum install -y --enablerepo=epel autoconf automake bison dos2unix flex fuse-devel libaio-devel libibverbs-devel \
+  librdmacm-devel libtool libxml2-devel lvm2-devel make openssl-devel pkgconfig \
+  python-devel python-eventlet python-netifaces python-paste-deploy \
+  python-simplejson python-sphinx python-webob pyxattr readline-devel rpm-build \
+  tar
+
+​3. Create the mock user
+
+ $ sudo useradd -g mock mock
+
+Preparation steps for CentOS 6.3+ (only)
+----------------------------------------
+
+​1. Install EPEL:
+
+ $ sudo yum install -y http://download.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
+
+​2. Install the CentOS 6.x dependencies:
+
+ $ sudo yum install -y --enablerepo=epel cmockery2-devel dbench git libacl-devel mock nfs-utils perl-Test-Harness yajl xfsprogs
+ $ sudo yum install -y --enablerepo=epel python-webob1.0 python-paste-deploy1.5 python-sphinx10 redhat-rpm-config
+ $ sudo yum install -y --enablerepo=epel autoconf automake bison dos2unix flex fuse-devel libaio-devel libibverbs-devel \
+   librdmacm-devel libtool libxml2-devel lvm2-devel make openssl-devel pkgconfig \
+   python-devel python-eventlet python-netifaces python-paste-deploy \
+   python-simplejson python-sphinx python-webob pyxattr readline-devel rpm-build \
+   tar
+
+​3. Create the mock user
+
+ $ sudo useradd -g mock mock
+
+Preparation steps for RHEL 6.3+ (only)
+--------------------------------------
+
+​1. Ensure you have the "Scalable Filesystem Support" group installed
+
+This provides the xfsprogs package, which is required by the test
+framework.
+
+​2. Install EPEL:
+
+ $ sudo yum install -y http://download.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm
+
+​3. Install the CentOS 6.x dependencies:
+
+ $ sudo yum install -y --enablerepo=epel cmockery2-devel dbench git libacl-devel mock nfs-utils yajl perl-Test-Harness
+ $ sudo yum install -y --enablerepo=rhel-6-server-optional-rpms python-webob1.0 python-paste-deploy1.5 python-sphinx10 redhat-rpm-config
+ $ sudo yum install -y --disablerepo=rhs* --enablerepo=*optional-rpms autoconf \
+   automake bison dos2unix flex fuse-devel libaio-devel libibverbs-devel \
+   librdmacm-devel libtool libxml2-devel lvm2-devel make openssl-devel pkgconfig \
+   python-devel python-eventlet python-netifaces python-paste-deploy \
+   python-simplejson python-sphinx python-webob pyxattr readline-devel rpm-build \
+   tar
+
+​4. Create the mock user
+
+ $ sudo useradd -g mock mock
+
+Preparation steps for Fedora 16-19 (only)
+-----------------------------------------
+
+**Still in development**
+
+​1. Install the Fedora dependencies:
+
+ $ sudo yum install -y attr cmockery2-devel dbench git mock nfs-utils perl-Test-Harness psmisc xfsprogs
+ $ sudo yum install -y python-webob1.0 python-paste-deploy1.5 python-sphinx10 redhat-rpm-config
+ $ sudo yum install -y autoconf automake bison dos2unix flex fuse-devel libaio-devel libibverbs-devel \
+   librdmacm-devel libtool libxml2-devel lvm2-devel make openssl-devel pkgconfig \
+   python-devel python-eventlet python-netifaces python-paste-deploy \
+   python-simplejson python-sphinx python-webob pyxattr readline-devel rpm-build \
+   tar
+
+​3. Create the mock user
+
+ $ sudo useradd -g mock mock
+
+Common steps
+------------
+
+​1. Ensure DNS for your server is working
+
+The Gluster Test Framework fails miserably if the full domain name for
+your server doesn't resolve back to itself.
+
+If you don't have a working DNS infrastructure in place, adding an entry
+for your server to its /etc/hosts file will work.
+
+​2. Install the version of Gluster you are testing
+
+Either install an existing set of rpms:
+
+ $ sudo yum install [your gluster rpms here]
+
+Or compile your own ones (fairly easy):
+
+ http://www.gluster.org/community/documentation/index.php/CompilingRPMS
+
+​3. Clone the GlusterFS git repository
+
+ $ git clone git://git.gluster.org/glusterfs
+ $ cd glusterfs
+
+Ensure mock can access the directory
+------------------------------------
+
+Some tests run as the user "mock". If the mock user can't access the
+tests subdirectory directory, these tests fail. (rpm.t is one such test)
+
+This is a known gotcha when the git repo is cloned to your home
+directory. Home directories generally don't have world readable
+permissions. You can fix this by adjusting your home directory
+permissions, or placing the git repo somewhere else (with access for the
+mock user).
+
+Running the tests
+-----------------
+
+The tests need to run as root, so they can mount volumes and manage
+gluster processes as needed.
+
+It's also best to run them directly as the root user, instead of through
+sudo. Strange things sporadicly happen (for me) when using the full test
+framework through sudo, that haven't happened (yet) when running
+directly as root. Hangs in dbench particularly, which are part of at
+least one test.
+
+ # ./run-tests.sh
+
+The test framework takes just over 45 minutes to run in a VM here (4
+cpu's assigned, 8GB ram, SSD storage). It may take significantly more or
+less time for you, depending on the hardware and software you're using.
+
+Showing debug information
+-------------------------
+
+To display verbose information while the tests are running, set the
+DEBUG environment variable to 1 prior to running the tests.
+
+ # DEBUG=1 ./run-tests.sh
+
+Log files
+---------
+
+Verbose output from the rpm.t test goes into "rpmbuild-mock.log",
+located in the same directory the test is run from.
+
+Reporting bugs
+--------------
+
+If you hit a bug when running the test framework, **please** create a
+bug report for it on Bugzilla so it gets fixed:
+
+ https://bugzilla.redhat.com/enter_bug.cgi?product=GlusterFS&component=tests
+
+Creating your own tests
+-----------------------
+
+The test scripts are written in bash, with their filenames ending in .t
+instead of .sh.
+
+When creating your own test scripts, create them in an appropriate
+subdirectory under "tests" (eg "bugs" or "features") and use descriptive
+names like "bug-XXXXXXX-checking-feature-X.t"
+
+Also include the "include.rc" file, which defines the test types and
+host/brick/volume defaults:
+
+ . $(dirname $0)/../include.rc
+
+There are 5 test types available at present, but feel free to add more
+if you need something that doesn't yet exist. The test types are
+explained in more detail below.
+
+Also essential is the "cleanup" command, which removes any existing
+Gluster configuration (**without backing it up**), and also kills any
+running gluster processes.
+
+There is a basic test template you can copy, named bug-000000.t in the
+bugs subdirectory:
+
+ $ cp bugs/bug-000000.t somedir/descriptive-name.t
+
+### TEST
+
+- Example of usage in basic/volume.t
+
+### TEST\_IN\_LOOP
+
+- Example of usage in basic/rpm.t
+
+### EXPECT
+
+- Example of usage in basic/volume.t
+
+### EXPECT\_WITHIN
+
+- Example of usage in basic/volume-status.t
+
+### EXPECT\_KEYWORD
+
+- Defined in include.rc, but seems to be unused?
diff --git a/doc/developer-guide/adding-fops.md b/doc/developer-guide/adding-fops.md
new file mode 100644
index 00000000000..3f72ed3e23a
--- /dev/null
+++ b/doc/developer-guide/adding-fops.md
@@ -0,0 +1,18 @@
+Adding a new FOP
+================
+
+Steps to be followed when adding a new FOP to GlusterFS:
+
+1. Edit `glusterfs.h` and add a `GF_FOP_*` constant.
+2. Edit `xlator.[ch]` and:
+ * add the new prototype for fop and callback.
+ * edit `xlator_fops` structure.
+3. Edit `xlator.c` and add to fill_defaults.
+4. Edit `protocol.h` and add struct necessary for the new FOP.
+5. Edit `defaults.[ch]` and provide default implementation.
+6. Edit `call-stub.[ch]` and provide stub implementation.
+7. Edit `common-utils.c` and add to gf_global_variable_init().
+8. Edit client-protocol and add your FOP.
+9. Edit server-protocol and add your FOP.
+10. Implement your FOP in any translator for which the default implementation
+ is not sufficient.
diff --git a/doc/developer-guide/afr-locks-evolution.md b/doc/developer-guide/afr-locks-evolution.md
new file mode 100644
index 00000000000..2dabbcfeb13
--- /dev/null
+++ b/doc/developer-guide/afr-locks-evolution.md
@@ -0,0 +1,91 @@
+History of locking in AFR
+--------------------------
+
+GlusterFS has **locks** translator which provides the following internal locking operations called `inodelk`, `entrylk` which are used by afr to achieve synchronization of operations on files or directories that conflict with each other.
+
+`Inodelk` gives the facility for translators in GlusterFS to obtain range (denoted by tuple with **offset**, **length**) locks in a given **domain** for an inode.
+Full file lock is denoted by the tuple (offset: `0`, length: `0`) i.e. length `0` is considered as infinity.
+
+`Entrylk` enables translators of GlusterFS to obtain locks on `name` in a given **domain** for an inode, typically a directory.
+
+The **locks** translator provides both *blocking* and *nonblocking* variants and of these locks.
+
+
+AFR makes use of locks xlator extensively:
+
+1)For FOPS (from clients)
+-----------------------
+* Data transactions take inode locks on data domain, Let's refer to this domain name as DATA_DOMAIN.
+
+ So locking for writes would be something like this:`inodelk(offset,length, DATA_DOMAIN)`
+ For truncating a file to zero, it would be `inodelk(0,0,DATA_DOMAIN)`
+
+* Metadata transactions (chown/chmod) also take inode locks but on a special range on metadata domain,
+ i.e.`(LLONG_MAX-1 , 0, METADATA_DOMAIN).`
+
+* Entry transactions (create, mkdir, rmdir,unlink, symlink, link,rename) take entrylk on `(name, parent inode)`.
+
+
+2)For self heal:
+-------------
+* For Metadata self-heal, it is the same. i.e.`inodelk(LLONG_MAX-1 , 0, METADATA_DOMAIN)`.
+* For Entry self-heal, it is `entrylk(NULL name, parent inode)`. Specifying NULL for the name takes full lock on the directory referred to by the inode.
+* For data self-heal, there is a bit of history as to how locks evolved:
+
+### Initial version (say version 1) :
+There was no concept of selfheal daemon (shd). Only client lookups triggered heals. so AFR always took `inodelk(0,0,DATA_DOMAIN)` for healing. The issue with this approach was that when heal was in progress, I/O from clients was blocked .
+
+### version 2:
+shd was introduced. We needed to allow I/O to go through when heal was going,provided the ranges did not overlap. To that extent, the following approach was adopted:
+
++ 1.shd takes (full inodelk in DATA_DOMAIN). Thus client FOPS are blocked and cannot modify changelog-xattrs
++ 2.shd inspects xattrs to determine source/sink
++ 3.shd takes a chunk inodelk(0-128kb) again in DATA_DOMAIN (locks xlator allows overlapping locks if lock owner is the same).
++ 4.unlock full lock
++ 5.heal
++ 6.take next chunk lock(129-256kb)
++ 7.unlock 1st chunk lock, heal the second chunk and so on.
+
+
+Thus after 4, any client FOP could write to regions that was not currently under heal. The exception was truncate (to size 0) because it needs full file lock and will always block because some chunk is always under lock by the shd until heal completes.
+
+Another issue was that 2 shds could run in parallel. Say SHD1 and SHD2 compete for step 1. Let SHD1 win. It proceeds and completes step 4. Now SHD2 also succeeds in step 1, continues all steps. Thus at the end both shds will decrement the changelog leading to negative values in it)
+
+### version 3
+To prevent parallel self heals, another domain was introduced, let us call it SELF_HEAL_DOMAIN. With this domain, the following approach was adopted and is **the approach currently in use**:
+
++ 1.shd takes (full inodelk on SELF_HEAL_DOMAIN)
++ 2.shd takes (full inodelk on DATA_DOMAIN)
++ 3.shd inspects xattrs to determine source/sink
++ 4.unlock full lock on DATA_DOMAIN
++ 5.take chunk lock(0-128kb) on DATA_DOMAIN
++ 6.heal
++ 7.take next chunk lock(129-256kb) on DATA_DOMAIN
++ 8.unlock 1st chunk lock, heal and so on.
++ 9.Finally release full lock on SELF_HEAL_DOMAIN
+
+Thus until one shd completes step 9, another shd cannot start step 1, solving the problem of simultaneous heals.
+Note that the issue of truncate (to zero) FOP hanging still remains.
+Also there are multiple network calls involved in this scheme. (lock,heal(ie read+write), unlock) per chunk. i.e 4 calls per chunk.
+
+### version 4 (ToDo)
+Some improvements that need to be made in version 3:
+* Reduce network calls using piggy backing.
+* After taking chunk lock and healing, we need to unlock the lock before locking the next chunk. This gives a window for any pending truncate FOPs to succeed. If truncate succeeds, the heal of next chunk will fail (read returns zero)
+and heal is stopped. *BUT* there is **yet another** issue:
+
+* shd does steps 1 to 4. Let's assume source is brick b1, sink is brick b2 . i.e xattrs are (0,1) and (0,0) on b1 and b2 respectively. Now before shd takes (0-128kb) lock, a client FOP takes it.
+It modifies data but the FOP succeeds only on brick 2. writev returns success, and the attrs now read (0,1) (1,0). SHD takes over and heals. It had observed (0,1),(0,0) earlier
+and thus goes ahead and copies stale 128Kb from brick 1 to brick2. Thus as far as application is concerned, `writev` returned success but bricks have stale data.
+What needs to be done is `writev` must return success only if it succeeded on atleast one source brick (brick b1 in this case). Otherwise The heal still happens in reverse direction but as far as the application is concerned, it received an error.
+
+### Note on lock **domains**
+We have used conceptual names in this document like DATA_DOMAIN/ METADATA_DOMAIN/ SELF_HEAL_DOMAIN. In the code, these are mapped to strings that are based on the AFR xlator name like so:
+
+DATA_DOMAIN --->"vol_name-replicate-n"
+
+METADATA_DOMAIN --->"vol_name-replicate-n:metadata"
+
+SELF_HEAL_DOMAIN -->"vol_name-replicate-n:self-heal"
+
+where vol_name is the name of the volume and 'n' is the replica subvolume index (starting from 0).
diff --git a/doc/developer-guide/afr-self-heal-daemon.md b/doc/developer-guide/afr-self-heal-daemon.md
new file mode 100644
index 00000000000..65940d420b7
--- /dev/null
+++ b/doc/developer-guide/afr-self-heal-daemon.md
@@ -0,0 +1,92 @@
+Self-Heal Daemon
+================
+The self-heal daemon (shd) is a glusterfs process that is responsible for healing files in a replicate/ disperse gluster volume.
+Every server (brick) node of the volume runs one instance of the shd. So even if one node contains replicate/ disperse bricks of
+multiple volumes, it would be healed by the same shd.
+
+This document only describes how the shd works for replicate (AFR) volumes.
+
+The shd is launched by glusterd when the volume starts (only if the volume includes a replicate configuration). The graph
+of the shd process in every node contains the following: The io-stats which is the top most xlator, its children being the
+replicate xlators (subvolumes) of *only* the bricks present in that particular node, and finally *all* the client xlators that are the children of the replicate xlators.
+
+The shd does two types of self-heal crawls: Index heal and Full heal. For both these types of crawls, the basic idea is the same:
+For each file encountered while crawling, perform metadata, data and entry heals under appropriate locks.
+* An overview of how each of these heals is performed is detailed in the 'Self-healing' section of *doc/features/afr-v1.md*
+* The different file locks which the shd takes for each of these heals is detailed in *doc/developer
+-guide/afr-locks-evolution.md*
+
+Metadata heal refers to healing extended attributes, mode and permissions of a file or directory.
+Data heal refers to healing the file contents.
+Entry self-heal refers to healing entries inside a directory.
+
+Index heal
+==========
+The index heal is done:
+ a) Every 600 seconds (can be changed via the `cluster.heal-timeout` volume option)
+ b) When it is explicitly triggered via the `gluster vol heal <VOLNAME>` command
+ c) Whenever a replica brick that was down comes back up.
+
+Only one heal can be in progress at one time, irrespective of reason why it was triggered. If another heal is triggered before the first one completes, it will be queued.
+Only one heal can be queued while the first one is running. If an Index heal is queued, it can be overridden by queuing a Full heal and not vice-versa. Also, before processing
+each entry in index heal, a check is made if a full heal is queued. If it is, then the index heal is aborted so that the full heal can proceed.
+
+In index heal, each shd reads the entries present inside .glusterfs/indices/xattrop/ folder and triggers heal on each entry with appropriate locks.
+The .glusterfs/indices/xattrop/ directory contains a base entry of the name "xattrop-<virtual-gfid-string>". All other entries are hardlinks to the base entry. The
+*names* of the hardlinks are the gfid strings of the files that may need heal.
+
+When a client (mount) performs an operation on the file, the index xlator present in each brick process adds the hardlinks in the pre-op phase of the FOP's transaction
+and removes it in post-op phase if the operation is successful. Thus if an entry is present inside the .glusterfs/indices/xattrop/ directory when there is no I/O
+happening on the file, it means the file needs healing (or atleast an examination if the brick crashed after the post-op completed but just before the removal of the hardlink).
+
+#### Index heal steps:
+<pre><code>
+In shd process of *each node* {
+ opendir +readdir (.glusterfs/indices/xattrop/)
+ for each entry inside it {
+ self_heal_entry() //Explained below.
+ }
+}
+</code></pre>
+
+<pre><code>
+self_heal_entry() {
+ Call syncop_lookup(replicae subvolume) which eventually does {
+ take appropriate locks
+ determine source and sinks from AFR changelog xattrs
+ perform whatever heal is needed (any of metadata, data and entry heal in that order)
+ clear changelog xattrs and hardlink inside .glusterfs/indices/xattrop/
+ }
+}
+</code></pre>
+
+Note:
+* If the gfid hardlink is present in the .glusterfs/indices/xattrop/ of both replica bricks, then each shd will try to heal the file but only one of them will be able to proceed due to the self-heal domain lock.
+
+* While processing entries inside .glusterfs/indices/xattrop/, if shd encounters an entry whose parent is yet to be healed, it will skip it and it will be picked up in the next crawl.
+
+* If a file is in data/ metadata split-brain, it will not be healed.
+
+* If a directory is in entry split-brain, a conservative merge will be performed, wherein after the merge, the entries of the directory will be a union of the entries in the replica pairs.
+
+Full heal
+=========
+A full heal is triggered by running `gluster vol heal <VOLNAME> full`. This command is usually run in disk replacement scenarios where the entire data is to be copied from one of the healthy bricks of the replica to the brick that was just replaced.
+
+Unlike the index heal which runs on the shd of every node in a replicate subvolume, the full heal is run only on the shd of one node per replicate subvolume: the node having the highest UUID.
+i.e In a 2x2 volume made of 4 nodes N1, N2, N3 and N4, If UUID of N1>N2 and UUID N4 >N3, then the full crawl is carried out by the shds of N1 and N4.(Node UUID can be found in `/var/lib/glusterd/glusterd.info`)
+
+The full heal steps are almost identical to the index heal, except the heal is performed on each replica starting from the root of the volume:
+<pre><code>
+In shd process of *highest UUID node per replica* {
+ opendir +readdir ("/")
+ for each entry inside it {
+ self_heal_entry()
+ if (entry == directory) {
+ /* Recurse*/
+ again opendir+readdir (directory) followed by self_heal_entry() of each entry.
+ }
+
+ }
+}
+</code></pre>
diff --git a/doc/developer-guide/afr.md b/doc/developer-guide/afr.md
new file mode 100644
index 00000000000..566573a4e26
--- /dev/null
+++ b/doc/developer-guide/afr.md
@@ -0,0 +1,191 @@
+cluster/afr translator
+======================
+
+Locking
+-------
+
+Before understanding replicate, one must understand two internal FOPs:
+
+### `GF_FILE_LK`
+
+This is exactly like `fcntl(2)` locking, except the locks are in a
+separate domain from locks held by applications.
+
+### `GF_DIR_LK (loc_t *loc, char *basename)`
+
+This allows one to lock a name under a directory. For example,
+to lock /mnt/glusterfs/foo, one would use the call:
+
+```
+GF_DIR_LK ({loc_t for "/mnt/glusterfs"}, "foo")
+```
+
+If one wishes to lock *all* the names under a particular directory,
+supply the basename argument as `NULL`.
+
+The locks can either be read locks or write locks; consult the
+function prototype for more details.
+
+Both these operations are implemented by the features/locks (earlier
+known as posix-locks) translator.
+
+Basic design
+------------
+
+All FOPs can be classified into four major groups:
+
+### inode-read
+
+Operations that read an inode's data (file contents) or metadata (perms, etc.).
+
+access, getxattr, fstat, readlink, readv, stat.
+
+### inode-write
+
+Operations that modify an inode's data or metadata.
+
+chmod, chown, truncate, writev, utimens.
+
+### dir-read
+
+Operations that read a directory's contents or metadata.
+
+readdir, getdents, checksum.
+
+### dir-write
+
+Operations that modify a directory's contents or metadata.
+
+create, link, mkdir, mknod, rename, rmdir, symlink, unlink.
+
+Some of these make a subgroup in that they modify *two* different entries:
+link, rename, symlink.
+
+### Others
+
+Other operations.
+
+flush, lookup, open, opendir, statfs.
+
+Algorithms
+----------
+
+Each of the four major groups has its own algorithm:
+
+### inode-read, dir-read
+
+1. Send a request to the first child that is up:
+ * if it fails:
+ * try the next available child
+ * if we have exhausted all children:
+ * return failure
+
+### inode-write
+
+ All operations are done in parallel unless specified otherwise.
+
+1. Send a ``GF_FILE_LK`` request on all children for a write lock on the
+ appropriate region
+ (for metadata operations: entire file (0, 0) for writev:
+ (offset, offset+size of buffer))
+ * If a lock request fails on a child:
+ * unlock all children
+ * try to acquire a blocking lock (`F_SETLKW`) on each child, serially.
+ If this fails (due to `ENOTCONN` or `EINVAL`):
+ Consider this child as dead for rest of transaction.
+2. Mark all children as "pending" on all (alive) children (see below for
+meaning of "pending").
+ * If it fails on any child:
+ * mark it as dead (in transaction local state).
+3. Perform operation on all (alive) children.
+ * If it fails on any child:
+ * mark it as dead (in transaction local state).
+4. Unmark all successful children as not "pending" on all nodes.
+5. Unlock region on all (alive) children.
+
+### dir-write
+
+ The algorithm for dir-write is same as above except instead of holding
+ `GF_FILE_LK` locks we hold a GF_DIR_LK lock on the name being operated upon.
+ In case of link-type calls, we hold locks on both the operand names.
+
+"pending"
+---------
+
+The "pending" number is like a journal entry. A pending entry is an
+array of 32-bit integers stored in network byte-order as the extended
+attribute of an inode (which can be a directory as well).
+
+There are three keys corresponding to three types of pending operations:
+
+### `AFR_METADATA_PENDING`
+
+There are some metadata operations pending on this inode (perms, ctime/mtime,
+xattr, etc.).
+
+### `AFR_DATA_PENDING`
+
+There is some data pending on this inode (writev).
+
+### `AFR_ENTRY_PENDING`
+
+There are some directory operations pending on this directory
+(create, unlink, etc.).
+
+Self heal
+---------
+
+* On lookup, gather extended attribute data:
+ * If entry is a regular file:
+ * If an entry is present on one child and not on others:
+ * create entry on others.
+ * If entries exist but have different metadata (perms, etc.):
+ * consider the entry with the highest `AFR_METADATA_PENDING` number as
+ definitive and replicate its attributes on children.
+ * If entry is a directory:
+ * Consider the entry with the highest `AFR_ENTRY_PENDING` number as
+ definitive and replicate its contents on all children.
+ * If any two entries have non-matching types (i.e., one is file and
+ other is directory):
+ * Announce to the user via log that a split-brain situation has been
+ detected, and do nothing.
+* On open, gather extended attribute data:
+ * Consider the file with the highest `AFR_DATA_PENDING` number as
+ the definitive one and replicate its contents on all other
+ children.
+
+During all self heal operations, appropriate locks must be held on all
+regions/entries being affected.
+
+Inode scaling
+-------------
+
+Inode scaling is necessary because if a situation arises where an inode number
+is returned for a directory (by lookup) which was previously the inode number
+of a file (as per FUSE's table), then FUSE gets horribly confused (consult a
+FUSE expert for more details).
+
+To avoid such a situation, we distribute the 64-bit inode space equally
+among all children of replicate.
+
+To illustrate:
+
+If c1, c2, c3 are children of replicate, they each get 1/3 of the available
+inode space:
+
+------------- -- -- -- -- -- -- -- -- -- -- -- ---
+Child: c1 c2 c3 c1 c2 c3 c1 c2 c3 c1 c2 ...
+Inode number: 1 2 3 4 5 6 7 8 9 10 11 ...
+------------- -- -- -- -- -- -- -- -- -- -- -- ---
+
+Thus, if lookup on c1 returns an inode number "2", it is scaled to "4"
+(which is the second inode number in c1's space).
+
+This way we ensure that there is never a collision of inode numbers from
+two different children.
+
+This reduction of inode space doesn't really reduce the usability of
+replicate since even if we assume replicate has 1024 children (which would be a
+highly unusual scenario), each child still has a 54-bit inode space:
+$2^{54} \sim 1.8 \times 10^{16}$, which is much larger than any real
+world requirement.
diff --git a/doc/developer-guide/brickmux-thread-reduction.md b/doc/developer-guide/brickmux-thread-reduction.md
new file mode 100644
index 00000000000..7d76e8ff579
--- /dev/null
+++ b/doc/developer-guide/brickmux-thread-reduction.md
@@ -0,0 +1,64 @@
+# Resource usage reduction in brick multiplexing
+
+Each brick is regresented with a graph of translators in a brick process.
+Each translator in the graph has its own set of threads and mem pools
+and other system resources allocations. Most of the times all these
+resources are not put to full use. Reducing the resource consumption
+of each brick is a problem in itself that needs to be addressed. The other
+aspect to it is, sharing of resources across brick graph, this becomes
+critical in brick multiplexing scenario. In this document we will be discussing
+only about the threads.
+
+If a brick mux process hosts 50 bricks there are atleast 600+ threads created
+in that process. Some of these are global threads that are shared by all the
+brick graphs, and others are per translator threads. The global threads like
+synctask threads, timer threads, sigwaiter, poller etc. are configurable and
+do not needs to be reduced. The per translator threads keeps growing as the
+number of bricks in the process increases. Each brick spawns atleast 10+
+threads:
+- io-threads
+- posix threads:
+ 1. Janitor
+ 2. Fsyncer
+ 3. Helper
+ 4. aio-thread
+- changelog and bitrot threads(even when the features are not enabled)
+
+## io-threads
+
+io-threads should be made global to the process, having 16+ threads for
+each brick does not make sense. But io-thread translator is loaded in
+the graph, and the position of io-thread translator decides from when
+the fops will be parallelised across threads. We cannot entirely move
+the io-threads to libglusterfs and say the multiplexing happens from
+the master translator or so. Hence, the io-thread orchestrator code
+is moved to libglusterfs, which ensures there is only one set of
+io-threads that is shared among the io-threads translator in each brick.
+This poses performance issues due to lock-contention in the io-threds
+layer. This also shall be addressed by having multiple locks instead of
+one global lock for io-threads.
+
+## Posix threads
+Most of the posix threads execute tasks in a timely manner, hence it can be
+replaced with a timer whose handler register a task to synctask framework, once
+the task is complete, the timer is registered again. With this we can eliminate
+the need of one thread for each task. The problem with using synctasks is
+the performance impact it will have due to make/swapcontext. For task that
+does not involve network wait, we need not do makecontext, instead the task
+function with arg can be stored and executed when a synctask thread is free.
+We need to implement an api in synctask to execute atomic tasks(no network wait)
+without the overhead of make/swapcontext. This will solve the performance
+impact associated with using synctask framework.
+
+And the other challenge, is to cancel all the tasks pending from a translator.
+This is important to cleanly detach brick. For this, we need to implement an
+api in synctask that can cancel all the tasks from a given translator.
+
+For future, this will be replced to use global thread-pool(once implemented).
+
+## Changelog and bitrot threads
+
+In the initial implementation, the threads are not created if the feature is
+not enabled. We need to share threads across changelog instances if we plan
+to enable these features in brick mux scenario.
+
diff --git a/doc/developer-guide/coding-standard.md b/doc/developer-guide/coding-standard.md
new file mode 100644
index 00000000000..031c6c0da99
--- /dev/null
+++ b/doc/developer-guide/coding-standard.md
@@ -0,0 +1,697 @@
+GlusterFS Coding Standards
+==========================
+
+Before you get started
+----------------------
+Before starting with other part of coding standard, install `clang-format`
+
+On Fedora:
+```
+$ dnf install clang
+```
+On debian/Ubuntu:
+```
+$ apt-get install clang
+```
+Once you are done with all the local changes, you need to run below set of commands,
+before submitting the patch for review.
+```
+$ git add $file # if any
+$ git commit -a -s -m "commit message"
+$ git show --pretty="format:" --name-only | grep -v "contrib/" | egrep "*\.[ch]$" | xargs clang-format -i
+$ git diff # see if there are any changes
+$ git commit -a --amend # get the format changes done
+$ ./submit-for-review.sh
+```
+
+
+Structure definitions should have a comment per member
+------------------------------------------------------
+
+Every member in a structure definition must have a comment about its purpose.
+The comment should be descriptive without being overly verbose. For pointer
+members, lifecycle concerns for the pointed-to object should be noted. For lock
+members, the relationship between the lock member and the other members it
+protects should be explicit.
+
+*Bad:*
+
+```
+gf_lock_t lock; /* lock */
+```
+
+*Good:*
+
+```
+DBTYPE access_mode; /* access mode for accessing
+ * the databases, can be
+ * DB_HASH, DB_BTREE
+ * (option access-mode <mode>)
+ */
+```
+
+Structure members should be aligned based on the padding requirements
+---------------------------------------------------------------------
+
+The compiler will make sure that structure members have optimum alignment,
+but at the expense of suboptimal padding. More important is to optimize the
+padding. The compiler won't do that for you.
+
+This also will help utilize the memory better
+
+*Bad:*
+```
+struct bad {
+ bool b; /* 0 */
+ /* 1..7 pad */
+ void *p; /* 8..15 */
+ char c; /* 16 */
+ char a[16]; /* 17..33 */
+ /* 34..39 pad */
+ int64_t ii; /* 40..47 */
+ int32_t i; /* 48..51 */
+ /* 52..55 pad */
+ int64_t iii; /* 56..63 */
+};
+```
+
+*Good:*
+```
+struct good {
+ int64_t ii; /* explicit 64-bit types */
+ void *p; /* may be 64- or 32-bit */
+ long l; /* may be 64- or 32-bit */
+ int i; /* 32-bit */
+ short s; /* 16-bit */
+ char c; /* 8-bit */
+ bool b; /* 8-bit */
+ char a[1024];
+);
+```
+Make sure the items with the most stringent alignment requirements will need
+to come earliest (ie, pointers and perhaps uint64_t etc), and those with less
+stringent alignment requirements at the end (uint16/uint8 and char). Also note
+that the long array (if any) should be at the end of the structure, regardless
+of the type.
+
+Also note, if your structure's overall size is crossing 1k-4k limit, it is
+recommended to mention the reason why the particular structure needs so much
+memory as a comment at the top.
+
+Use \_typename for struct tags and typename\_t for typedefs
+---------------------------------------------------------
+
+Being consistent here makes it possible to automate navigation from use of a
+type to its true definition (not just the typedef).
+
+*Bad:*
+
+```
+struct thing {...};
+struct thing_t {...};
+typedef struct _thing thing;
+```
+
+*Good:*
+
+```
+typedef struct _thing {...} thing_t;
+```
+
+No double underscores
+---------------------
+
+Identifiers beginning with double underscores are supposed to reserved for the
+compiler.
+
+http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1570.pdf
+
+When you need to define inner/outer functions, use a different prefix/suffix.
+
+*Bad:*
+
+```
+void __do_something (void);
+
+void
+do_something (void)
+{
+ LOCK ();
+ __do_something ();
+ UNLOCK ();
+}
+```
+
+*Good:*
+
+```
+void do_something_locked (void);
+```
+
+Only use safe pointers in initializers
+----------------------------------------------------------
+
+Some pointers, such as `this` in a fop function, can be assumed to be non-NULL.
+However, other parameters and further-derived values might be NULL.
+
+*Good:*
+
+```
+pid_t pid = frame->root->pid;
+```
+
+
+*Bad:*
+
+```
+data_t *my_data = dict_get (xdata, "fubar");
+```
+
+No giant stack allocations
+--------------------------
+
+Synctasks have small finite stacks. To avoid overflowing these stacks, avoid
+allocating any large data structures on the stack. Use dynamic allocation
+instead.
+
+*Bad:*
+
+```
+gf_boolean_t port_inuse[65536]; /* 256KB, this actually happened */
+```
+
+NOTE: Ideal is to limit the stack array to less than 256 bytes.
+
+
+Character array initializing
+----------------------------
+
+It is recommended to keep the character array initializing to empty string.
+
+*Good:*
+```
+char msg[1024] = "";
+```
+
+Not so much recommended, even though it means the same.
+
+```
+char msg[1024] = {0,};
+```
+
+We recommend above to structure initialization.
+
+
+
+Validate all arguments to a function
+------------------------------------
+
+All pointer arguments to a function must be checked for `NULL`.
+A macro named `GF_VALIDATE_OR_GOTO` (in `common-utils.h`)
+takes two arguments; if the first is `NULL`, it writes a log message and
+jumps to a label specified by the second aergument after setting errno
+appropriately. There are several variants of this function for more
+specific purposes, and their use is recommended.
+
+*Bad:*
+
+```
+/* top of function */
+ret = dict_get (xdata, ...)
+```
+
+*Good:*
+
+```
+/* top of function */
+GF_VALIDATE_OR_GOTO(xdata,out);
+ret = dict_get (xdata, ...)
+```
+
+Never rely on precedence of operators
+-------------------------------------
+
+Never write code that relies on the precedence of operators to execute
+correctly. Such code can be hard to read and someone else might not
+know the precedence of operators as accurately as you do. This includes
+precedence of increment/decrement vs. field/subscript. The only exceptions are
+arithmetic operators (which have had defined precedence since before computers
+even existed) and boolean negation.
+
+*Bad:*
+
+```
+if (op_ret == -1 && errno != ENOENT)
+++foo->bar /* incrementing foo, or incrementing foo->bar? */
+a && b || !c
+```
+
+*Good:*
+
+```
+if ((op_ret == -1) && (errno != ENOENT))
+(++foo)->bar
+++(foo->bar)
+(a && b) || !c
+a && (b || !c)
+```
+
+Use exactly matching types
+--------------------------
+
+Use a variable of the exact type declared in the manual to hold the
+return value of a function. Do not use an 'equivalent' type.
+
+
+*Bad:*
+
+```
+int len = strlen (path);
+```
+
+*Good:*
+
+```
+size_t len = strlen (path);
+```
+
+Avoid code such as `foo->bar->baz`; check every pointer
+-------------------------------------------------------------
+
+Do not write code that blindly follows a chain of pointer references. Any
+pointer in the chain may be `NULL` and thus cause a crash. Verify that each
+pointer is non-null before following it. Even if `foo->bar` has been checked
+and is known safe, repeating it can make code more verbose and less clear.
+
+This rule includes `[]` as well as `->` because both dereference pointers.
+
+*Bad:*
+
+```
+foo->bar->field1 = value1;
+xyz = foo->bar->field2 + foo->bar->field3 * foo->bar->field4;
+foo->bar[5].baz
+```
+
+*Good:*
+
+```
+my_bar = foo->bar;
+if (!my_bar) ... return;
+my_bar->field1 = value1;
+xyz = my_bar->field2 + my_bar->field3 * my_bar->field4;
+```
+
+Document unchecked return values
+----------------------------------------------------
+
+In general, return values should be checked. If a function is being called
+for its side effects and the return value really doesn't matter, an explicit
+cast to void is required (to keep static analyzers happy) and a comment is
+recommended.
+
+*Bad:*
+
+```
+close (fd);
+do_important_thing ();
+```
+
+*Good (or at least OK):*
+
+```
+(void) sleep (1);
+```
+
+Gracefully handle failure of malloc (and other allocation functions)
+--------------------------------------------------------------------
+
+GlusterFS should never crash or exit due to lack of memory. If a
+memory allocation fails, the call should be unwound and an error
+returned to the user.
+
+*Use result args and reserve the return value to indicate success or failure:*
+
+The return value of every functions must indicate success or failure (unless
+it is impossible for the function to fail --- e.g., boolean functions). If
+the function needs to return additional data, it must be returned using a
+result (pointer) argument.
+
+*Bad:*
+
+```
+int32_t dict_get_int32 (dict_t *this, char *key);
+```
+
+*Good:*
+
+```
+int dict_get_int32 (dict_t *this, char *key, int32_t *val);
+```
+
+Always use the 'n' versions of string functions
+-----------------------------------------------
+
+Unless impossible, use the length-limited versions of the string functions.
+
+*Bad:*
+
+```
+strcpy (entry_path, real_path);
+```
+
+*Good:*
+
+```
+strncpy (entry_path, real_path, entry_path_len);
+```
+
+Do not use memset prior to sprintf/snprintf/vsnprintf etc...
+------------------------------------------------------------
+snprintf(and other similar string functions) terminates the buffer with a
+'\0'(null character). Hence, there is no need to do a memset before using
+snprintf. (Of course you need to account one extra byte for the null character
+in your allocation).
+
+Note: Similarly if you are doing pre-memory allocation for the buffer, use
+GF_MALLOC instead of GF_CALLOC, since the later is bit costlier.
+
+*Bad:*
+
+```
+char buffer[x];
+memset (buffer, 0, x);
+bytes_read = snprintf (buffer, sizeof buffer, "bad standard");
+```
+
+*Good:*
+```
+char buffer[x];
+bytes_read = snprintf (buffer, sizeof (buffer), "good standard");
+```
+
+And it is always to good initialize the char array if the string is static.
+
+E.g.
+```
+char buffer[] = "good standard";
+```
+
+No dead or commented code
+-------------------------
+
+There must be no dead code (code to which control can never be passed) or
+commented out code in the codebase.
+
+Function length or Keep functions small
+---------------------------------------
+
+We live in the UNIX-world where modules do one thing and do it well.
+This rule should apply to our functions also. If a function is very long, try splitting it
+into many little helper functions. The question is, in a coding
+spree, how do we know a function is long and unreadable. One rule of
+thumb given by Linus Torvalds is that, a function should be broken-up
+if you have 4 or more levels of indentation going on for more than 3-4
+lines.
+
+*Example for a helper function:*
+```
+static int
+same_owner (posix_lock_t *l1, posix_lock_t *l2)
+{
+ return ((l1->client_pid == l2->client_pid) &&
+ (l1->transport == l2->transport));
+}
+```
+
+Define functions as static
+--------------------------
+
+Declare functions as static unless they're exposed via a module-level API for
+use from other modules.
+
+No nested functions
+-------------------
+
+Nested functions have proven unreliable, e.g. as callbacks in code that uses
+ucontext (green) threads,
+
+Use inline functions instead of macros whenever possible
+--------------------------------------------------------
+
+Inline functions enforce type safety; macros do not. Use macros only for things
+that explicitly need to be type-agnostic (e.g. cases where one might use
+generics or templates in other languages), or that use other preprocessor
+features such as `#` for stringification or `##` for token pasting. In general,
+"static inline" is the preferred form.
+
+Avoid copypasta
+---------------
+
+Code that is copied and then pasted into multiple functions often creates
+maintenance problems later, e.g. updating all but one instance for a subsequent
+change. If you find yourself copying the same "boilerplate" many places,
+consider refactoring to use helper functions (including inline) or macros, or
+code generation.
+
+Ensure function calls wrap around after 80-columns
+--------------------------------------------------
+
+Place remaining arguments on the next line if needed.
+
+Functions arguments and function definition
+-------------------------------------------
+
+Place all the arguments of a function definition on the same line
+until the line goes beyond 80-cols. Arguments that extend beyind
+80-cols should be placed on the next line.
+
+Style issues
+------------
+
+### Brace placement
+
+Use K&R/Linux style of brace placement for blocks.
+
+*Good:*
+
+```
+int some_function (...)
+{
+ if (...) {
+ /* ... */
+ } else if (...) {
+ /* ... */
+ } else {
+ /* ... */
+ }
+
+ do {
+ /* ... */
+ } while (cond);
+}
+```
+
+### Indentation
+
+Use *eight* spaces for indenting blocks. Ensure that your
+file contains only spaces and not tab characters. You can do this
+in Emacs by selecting the entire file (`C-x h`) and
+running `M-x untabify`.
+
+To make Emacs indent lines automatically by eight spaces, add this
+line to your `.emacs`:
+
+```
+(add-hook 'c-mode-hook (lambda () (c-set-style "linux")))
+```
+
+### Comments
+
+Write a comment before every function describing its purpose (one-line),
+its arguments, and its return value. Mention whether it is an internal
+function or an exported function.
+
+Write a comment before every structure describing its purpose, and
+write comments about each of its members.
+
+Follow the style shown below for comments, since such comments
+can then be automatically extracted by doxygen to generate
+documentation.
+
+*Good:*
+
+```
+/**
+* hash_name -hash function for filenames
+* @par: parent inode number
+* @name: basename of inode
+* @mod: number of buckets in the hashtable
+*
+* @return: success: bucket number
+* failure: -1
+*
+* Not for external use.
+*/
+```
+
+### Indicating critical sections
+
+To clearly show regions of code which execute with locks held, use
+the following format:
+
+```
+pthread_mutex_lock (&mutex);
+{
+ /* code */
+}
+pthread_mutex_unlock (&mutex);
+```
+
+### Always use braces
+
+Even around single statements.
+
+*Bad:*
+
+```
+if (condition) action ();
+
+if (condition)
+ action ();
+```
+
+*Good:*
+
+```
+if (condition) {
+ action ();
+}
+```
+
+### Avoid multi-line conditionals
+
+These can be hard to read and even harder to modify later. Predicate functions
+and helper variables are always better for maintainability.
+
+*Bad:*
+
+```
+if ((thing1 && other_complex_condition (thing1, lots, of, args))
+ || (!thing2 || even_more_complex_condition (thing2))
+ || all_sorts_of_stuff_with_thing3) {
+ return;
+}
+
+```
+
+*Better:*
+
+```
+thing1_ok = predicate1 (thing1, lots, of, args
+thing2_ok = predicate2 (thing2);
+thing3_ok = predicate3 (thing3);
+
+if (!thing1_ok || !thing2_ok || !thing3_ok) {
+ return;
+}
+```
+
+*Best:*
+
+```
+if (thing1 && other_complex_condition (thing1, lots, of, args)) {
+ return;
+}
+if (!thing2 || even_more_complex_condition (thing2)) {
+ /* Note potential for a different message here. */
+ return;
+}
+if (all_sorts_of_stuff_with_thing3) {
+ /* And here too. */
+ return;
+}
+```
+
+### Use 'const' liberally
+
+If a value isn't supposed/expected to change, there's no cost to adding a
+'const' keyword and it will help prevent violation of expectations.
+
+### Avoid global variables (including 'static' auto variables)
+Almost all state in Gluster is contextual and should be contained in the
+appropriate structure reflecting its scope (e.g. `call\_frame\_t`, `call\_stack\_t`,
+`xlator\_t`, `glusterfs\_ctx\_t`). With dynamic loading and graph switches in play,
+each global requires careful consideration of when it should be initialized or
+reinitialized, when it might _accidentally_ be reinitialized, when its value
+might become stale, and so on. A few global variables are needed to serve as
+'anchor points' for these structures, and more exceptions to the rule might be
+approved in the future, but new globals should not be added to the codebase
+without explicit approval.
+
+## A skeleton fop function
+
+This is the recommended template for any fop. In the beginning come the
+initializations. After that, the 'success' control flow should be linear. Any
+error conditions should cause a `goto` to a label at the end. By convention
+this is 'out' if there is only one such label, but a cascade of such labels is
+allowable to support multi-stage cleanup. At that point, the code should detect
+the error that has occurred and do appropriate cleanup.
+
+```
+int32_t
+sample_fop (call_frame_t *frame, xlator_t *this, ...)
+{
+ char * var1 = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ DIR * dir = NULL;
+ struct posix_fd * pfd = NULL;
+
+ VALIDATE_OR_GOTO (frame, out);
+ VALIDATE_OR_GOTO (this, out);
+
+ /* other validations */
+
+ dir = opendir (...);
+
+ if (dir == NULL) {
+ op_errno = errno;
+ gf_log (this->name, GF_LOG_ERROR,
+ "opendir failed on %s (%s)", loc->path,
+ strerror (op_errno));
+ goto out;
+ }
+
+ /* another system call */
+ if (...) {
+ op_errno = ENOMEM;
+ gf_log (this->name, GF_LOG_ERROR,
+ "out of memory :(");
+ goto out;
+ }
+
+ /* ... */
+
+ out:
+ if (op_ret == -1) {
+
+ /* check for all the cleanup that needs to be
+ done */
+
+ if (dir) {
+ closedir (dir);
+ dir = NULL;
+ }
+
+ if (pfd) {
+ FREE (pfd->path);
+ FREE (pfd);
+ pfd = NULL;
+ }
+ }
+
+ STACK_UNWIND (frame, op_ret, op_errno, fd);
+ return 0;
+}
+```
diff --git a/doc/developer-guide/commit-guidelines.md b/doc/developer-guide/commit-guidelines.md
new file mode 100644
index 00000000000..38bbe525cbd
--- /dev/null
+++ b/doc/developer-guide/commit-guidelines.md
@@ -0,0 +1,136 @@
+## Git Commit Good Practice
+
+The following document is based on experience doing code development, bug troubleshooting and code review across a number of projects using Git. The document is mostly borrowed from [Open Stack](https://wiki.openstack.org/wiki/GitCommitMessages), but made more meaningful in the context of GlusterFS project.
+
+This topic can be split into two areas of concern
+
+* The structured set/split of the code changes
+* The information provided in the commit message
+
+### Executive Summary
+The points and examples that will be raised in this document ought to clearly demonstrate the value in splitting up changes into a sequence of individual commits, and the importance in writing good commit messages to go along with them. If these guidelines were widely applied it would result in a significant improvement in the quality of the GlusterFS Git history. Both a carrot & stick will be required to effect changes. This document intends to be the carrot by alerting people to the benefits, while anyone doing Gerrit code review can act as the stick ;-P
+
+In other words, when reviewing a change in Gerrit:
+* Do not simply look at the correctness of the code.
+* Review the commit message itself and request improvements to its content.
+* Look out for commits which are mixing multiple logical changes and require the submitter to split them into separate commits.
+* Ensure whitespace changes are not mixed in with functional changes.
+* Ensure no-op code refactoring is done separately from functional changes.
+
+And so on.
+
+It might be mentioned that Gerrit's handling of patch series is not entirely perfect. Let that not become a valid reason to avoid creating patch series. The tools being used should be subservient to developers needs, and since they are open source they can be fixed / improved. Software source code is "read mostly, write occassionally" and thus the most important criteria is to improve the long term maintainability by the large pool of developers in the community, and not to sacrifice too much for the sake of the single author who may never touch the code again.
+
+And now the long detailed guidelines & examples of good & bad practice
+
+### Structural split of changes
+The cardinal rule for creating good commits is to ensure there is only one "logical change" per commit. There are many reasons why this is an important rule:
+
+* The smaller the amount of code being changed, the quicker & easier it is to review & identify potential flaws.
+* If a change is found to be flawed later, it may be necessary to revert the broken commit. This is much easier to do if there are not other unrelated code changes entangled with the original commit.
+* When troubleshooting problems using Git's bisect capability, small well defined changes will aid in isolating exactly where the code problem was introduced.
+* When browsing history using Git annotate/blame, small well defined changes also aid in isolating exactly where & why a piece of code came from.
+
+#### Things to avoid when creating commits
+With the above points in mind, there are some commonly encountered examples of bad things to avoid
+
+* Mixing whitespace changes with functional code changes.
+
+The whitespace changes will obscure the important functional changes, making it harder for a reviewer to correctly determine whether the change is correct. Solution: Create 2 commits, one with the whitespace changes, one with the functional changes. Typically the whitespace change would be done first, but that need not be a hard rule.
+
+* Mixing two unrelated functional changes.
+
+Again the reviewer will find it harder to identify flaws if two unrelated changes are mixed together. If it becomes necessary to later revert a broken commit, the two unrelated changes will need to be untangled, with further risk of bug creation.
+
+* Sending large new features in a single giant commit.
+
+It may well be the case that the code for a new feature is only useful when all of it is present. This does not, however, imply that the entire feature should be provided in a single commit. New features often entail refactoring existing code. It is highly desirable that any refactoring is done in commits which are separate from those implementing the new feature. This helps reviewers and test suites validate that the refactoring has no unintentional functional changes.
+
+Even the newly written code can often be split up into multiple pieces that can be independently reviewed. For example, changes which add new internal fops or library functions, can be in self-contained commits. Again this leads to easier code review. It also allows other developers to cherry-pick small parts of the work, if the entire new feature is not immediately ready for merge. This will encourage the author & reviewers to think about the generic library functions' design, and not simply pick a design that is easier for their currently chosen internal implementation.
+
+The basic rule to follow is
+
+If a code change can be split into a sequence of patches/commits, then it should be split. Less is not more. More is more.
+
+##### Examples of bad practice
+
+TODO: Pick glusterfs specific example.
+
+
+##### Examples of good practice
+
+
+### Information in commit messages
+As important as the content of the change, is the content of the commit message describing it. When writing a commit message there are some important things to remember
+
+* Do not assume the reviewer understands what the original problem was.
+
+When reading bug reports, after a number of back & forth comments, it is often as clear as mud, what the root cause problem is. The commit message should have a clear statement as to what the original problem is. The bug is merely interesting historical background on /how/ the problem was identified. It should be possible to review a proposed patch for correctness without needing to read the bug ticket.
+
+* Do not assume the reviewer has access to external web services/site.
+
+In 6 months time when someone is on a train/plane/coach/beach/pub troubleshooting a problem & browsing Git history, there is no guarantee they will have access to the online bug tracker, or online blueprint documents. The great step forward with distributed SCM is that you no longer need to be "online" to have access to all information about the code repository. The commit message should be totally self-contained, to maintain that benefit.
+
+* Do not assume the code is self-evident/self-documenting.
+
+What is self-evident to one person, might be clear as mud to another person. Always document what the original problem was and how it is being fixed, for any change except the most obvious typos, or whitespace only commits.
+
+* Describe why a change is being made.
+
+A common mistake is to just document how the code has been written, without describing /why/ the developer chose to do it that way. By all means describe the overall code structure, particularly for large changes, but more importantly describe the intent/motivation behind the changes.
+
+* Read the commit message to see if it hints at improved code structure.
+
+Often when describing a large commit message, it becomes obvious that a commit should have in fact been split into 2 or more parts. Don't be afraid to go back and rebase the change to split it up into separate commits.
+
+* Ensure sufficient information to decide whether to review.
+
+When Gerrit sends out email alerts for new patch submissions there is minimal information included, principally the commit message and the list of files changes. Given the high volume of patches, it is not reasonable to expect all reviewers to examine the patches in detail. The commit message must thus contain sufficient information to alert the potential reviewers to the fact that this is a patch they need to look at.
+
+* The first commit line is the most important.
+
+In Git commits the first line of the commit message has special significance. It is used as email subject line, git annotate messages, gitk viewer annotations, merge commit messages and many more places where space is at a premium. As well as summarizing the change itself, it should take care to detail what part of the code is affected. eg if it is 'afr', 'dht' or any translator. Or in some cases, it can be touching all these components, but the commit message can be 'coverity:', 'txn-framework:', 'new-fop: ', etc.
+
+* Describe any limitations of the current code.
+
+If the code being changed still has future scope for improvements, or any known limitations then mention these in the commit message. This demonstrates to the reviewer that the broader picture has been considered and what tradeoffs have been done in terms of short term goals vs. long term wishes.
+
+* Do not include patch set-specific comments.
+
+In other words, if you rebase your change please don't add "Patch set 2: rebased" to your commit message. That isn't going to be relevant once your change has merged. Please do make a note of that in Gerrit as a comment on your change, however. It helps reviewers know what changed between patch sets. This also applies to comments such as "Added unit tests", "Fixed localization problems", or any other such patch set to patch set changes that don't affect the overall intent of your commit.
+
+**The main rule to follow is:**
+
+The commit message must contain all the information required to fully understand & review the patch for correctness. Less is not more. More is more.
+
+
+#### Including external references
+
+The commit message is primarily targeted towards human interpretation, but there is always some metadata provided for machine use. In the case of GlusterFS this includes at least the 'Change-id', "bug"/"feature" ID references and "Signed-off-by" tag (generated by 'git commit -s').
+
+The 'Change-id' line is a unique hash describing the change, which is generated by a Git commit hook. This should not be changed when rebasing a commit following review feedback, since it is used by Gerrit, to track versions of a patch.
+
+The 'bug' line can reference a bug in a few ways. Gerrit creates a link to the bug when viewing the patch on review.gluster.org so that reviewers can quickly access the bug/issue on Bugzilla or Github.
+
+**Fixes: bz#1601166** -- use 'Fixes: bz#NNNNN' if the commit is intended to fully fix and close the bug being referenced.
+**Fixes: #411** -- use 'Fixes: #NNN' if the patch fixes the github issue completely.
+
+**Updates: bz#1193929** -- use 'Updates: bz#NNNN' if the commit is only a partial fix and more work is needed.
+**Updates: #175** -- use 'Updates: #NNNN' if the commit is only a partial fix and more work is needed for the feature completion.
+
+We encourage the use of `Co-Authored-By: name <name@example.com>` in commit messages to indicate people who worked on a particular patch. It's a convention for recognizing multiple authors, and our projects would encourage the stats tools to observe it when collecting statistics.
+
+### Summary of Git commit message structure
+
+* Provide a brief description of the change in the first line.
+* The first line should be limited to 50 characters and should not end with a period.
+
+* Insert a single blank line after the first line.
+
+* Provide a detailed description of the change in the following lines, breaking paragraphs where needed.
+
+* Subsequent lines should be wrapped at 72 characters.
+
+Put the 'Change-id', 'Fixes bz#NNNNN' and 'Signed-off-by: <>' lines at the very end.
+
+TODO: Add good examples
diff --git a/doc/developer-guide/daemon-management-framework.md b/doc/developer-guide/daemon-management-framework.md
new file mode 100644
index 00000000000..592192e665d
--- /dev/null
+++ b/doc/developer-guide/daemon-management-framework.md
@@ -0,0 +1,38 @@
+
+How to introduce new daemons using daemon management framework
+==============================================================
+Glusterd manages GlusterFS daemons providing services like NFS, Proactive
+self-heal, Quota, User servicable snapshots etc. Following are some of the
+aspects that come under daemon management.
+
+Data members & functions of different management objects
+
+- **Connection Management**
+ - unix domain sockets based channel for internal communication
+ - rpc connection for the communication
+ - frame timeout value for UDS
+ - Methods - notify
+ - init, connect, termination, disconnect APIs can be invoked using the
+ connection management object
+
+- **Process Management**
+ - Name of the process
+ - pidfile to detect if the daemon is running
+ - loggging directory, log file, volfile, volfileserver & volfileid
+ - init, stop APIs can be invoked using the process management object
+
+- **Service Management**
+ - connection object
+ - process object
+ - online status
+ - Methods - manager, start, stop which can be abstracted as a common methods
+ or specific to service requirements
+ - init API can be invoked using the service management object
+
+ The above structures defines the skeleton of the daemon management framework.
+ Introduction of new daemons in GlusterFS needs to inherit these properties. Any
+ requirement specific to a daemon needs to be implemented in its own service
+ (for eg : snapd defines its own type glusterd_snapdsvc_t using glusterd_svc_t
+ and snapd specific data). New daemons will need to have its own service specific
+ code written in glusterd-<feature>-svc.h{c} and need to reuse the existing
+ framework.
diff --git a/doc/developer-guide/datastructure-inode.md b/doc/developer-guide/datastructure-inode.md
new file mode 100644
index 00000000000..45d7a941e5f
--- /dev/null
+++ b/doc/developer-guide/datastructure-inode.md
@@ -0,0 +1,221 @@
+# Inode and dentry management in GlusterFS:
+
+## Background
+Filesystems internally refer to files and directories via inodes. Inodes
+are unique identifiers of the entities stored in a filesystem. Whenever an
+application has to operate on a file/directory (read/modify), the filesystem
+maps that file/directory to the right inode and start referring to that inode
+whenever an operation has to be performed on the file/directory.
+
+In GlusterFS a new inode gets created whenever a new file/directory is created
+OR when a successful lookup is done on a file/directory for the first time.
+Inodes in GlusterFS are maintained by the inode table which gets initiated when
+the filesystem daemon is started (both for the brick process as well as the
+mount process). Below are some important data structures for inode management.
+
+## Data-structure (inode-table)
+```
+struct _inode_table {
+ pthread_mutex_t lock;
+ size_t hashsize; /* bucket size of inode hash and dentry hash */
+ char *name; /* name of the inode table, just for gf_log() */
+ inode_t *root; /* root directory inode, with inode
+ number and gfid 1 */
+ xlator_t *xl; /* xlator to be called to do purge and
+ the xlator which maintains the inode table*/
+ uint32_t lru_limit; /* maximum LRU cache size */
+ struct list_head *inode_hash; /* buckets for inode hash table */
+ struct list_head *name_hash; /* buckets for dentry hash table */
+ struct list_head active; /* list of inodes currently active (in an fop) */
+ uint32_t active_size; /* count of inodes in active list */
+ struct list_head lru; /* list of inodes recently used.
+ lru.next most recent */
+ uint32_t lru_size; /* count of inodes in lru list */
+ struct list_head purge; /* list of inodes to be purged soon */
+ uint32_t purge_size; /* count of inodes in purge list */
+
+ struct mem_pool *inode_pool; /* memory pool for inodes */
+ struct mem_pool *dentry_pool; /* memory pool for dentrys */
+ struct mem_pool *fd_mem_pool; /* memory pool for fd_t */
+ int ctxcount; /* number of slots in inode->ctx */
+};
+```
+
+# Life-cycle
+```
+inode_table_new (size_t lru_limit, xlator_t *xl)
+```
+This is a function which allocates a new inode table. Usually the top xlators in
+the graph such as protocol/server (for bricks), fuse and nfs (for fuse and nfs
+mounts) and libgfapi do inode managements. Hence they are the ones which will
+allocate a new inode table by calling the above function.
+
+Each xlator graph in glusterfs maintains an inode table. So in fuse clients,
+whenever there is a graph change due to add brick/remove brick or
+addition/removal of some other xlators, a new graph is created which creates a
+new inode table.
+
+Thus an allocated inode table is destroyed only when the filesystem daemon is
+killed or unmounted.
+
+
+# what it contains.
+Inode table in glusterfs mainly contains a hash table for maintaining inodes.
+In general a file/directory is considered to be existing if there is a
+corresponding inode present in the inode table. If a inode for a file/directory
+cannot be found in the inode table, glusterfs tries to resolve it by sending a
+lookup on the entry for which the inode is needed. If lookup is successful, then
+a new inode correponding to the entry is added to the hash table present in the
+inode table. Thus an inode present in the hash-table means, its an existing
+file/directory within the filesystem. The inode table also contains the hash
+size of the hash table (as of now it is hard coded to 14057. The hash value of
+a inode is calculated using its gfid).
+
+Apart from the hash table, inode table also maintains 3 important list of inodes
+1. Active list:
+Active list contains all the active inodes (i.e inodes which are currently part
+of some fop).
+2. Lru list:
+Least recently used inodes list. A limit can be set for the size of the lru
+list. For bricks it is 16384 and for clients it is infinity.
+3. Purge list:
+List of all the inodes which have to be purged (i.e inodes which have to be
+deleted from the inode table due to unlink/rmdir/forget).
+
+And at last it also contains the mem-pool for allocating inodes, dentries so
+that frequent malloc/calloc and free of the data structures can be avoided.
+
+
+# Data structure (inode)
+```
+struct _inode {
+ inode_table_t *table; /* the table this inode belongs to */
+ uuid_t gfid; /* unique identifier of the inode */
+ gf_lock_t lock;
+ uint64_t nlookup;
+ uint32_t fd_count; /* Open fd count */
+ uint32_t ref; /* reference count on this inode */
+ ia_type_t ia_type; /* what kind of file */
+ struct list_head fd_list; /* list of open files on this inode */
+ struct list_head dentry_list; /* list of directory entries for this inode */
+ struct list_head hash; /* hash table pointers */
+ struct list_head list; /* active/lru/purge */
+
+ struct _inode_ctx *_ctx; /* place holder for keeping the
+ information about the inode by different xlators */
+};
+```
+As said above, inodes are internal way of identifying the files/directories. A
+inode uniquely represents a file/directory. A new inode is created whenever a
+create/mkdir/symlink/mknod operations are performed. Apart from that a new inode
+is created upon the successful fresh lookup of a file/directory. Say the
+filesystem contained some file "a" within root and the filesystem was
+unmounted. Now when glusterfs is mounted and some operation is perfomed on "/a",
+glusterfs tries to get the inode for the entry "a" with parent inode as
+root. But, since glusterfs just came up, it will not be able to find the inode
+for "a" and will send a lookup on "/a". If the lookup operation succeeds (i.e.
+the root of glusterfs contains an entry called "a"), then a new inode for "/a"
+is created and added to the inode table.
+
+Depending upon the situation, an inode can be in one of the 3 lists maintained
+by the inode table. If some fop is happening on the inode, then the inode will
+be present in the active inodes list maintained by the inode table. Active
+inodes are those inodes whose refcount is greater than zero. Whenever some
+operation comes on a file/directory, and the resolver tries to find the inode
+for it, it increments the refcount of the inode before returning the inode. The
+refcount of an inode can be incremented by calling the below function
+```
+inode_ref (inode_t *inode)
+```
+Any xlator which wants to operate on a inode as part of some fop (or wants the
+inode in the callback), should hold a ref on the inode.
+Once the fop is completed before sending the reply of the fop to the above
+layers , the inode has to be unrefed. When the refcount of an inode becomes
+zero, it is removed from the active inodes list and put into LRU list maintained
+by the inode table. Thus in short if some fop is happening on a file/directory,
+the corresponding inode will be in the active list or it will be in the LRU
+list.
+
+
+# Life Cycle
+
+A new inode is created whenever a new file/directory/symlink is created OR a
+successful lookup of an existing entry is done. The xlators which does inode
+management (as of now protocol/server, fuse, nfs, gfapi) will perform inode_link
+operation upon successful lookup or successful creation of a new entry.
+```
+inode_link (inode_t *inode, inode_t *parent, const char *name,
+ struct iatt *buf);
+```
+inode_link actually adds the inode to the inode table (to be precise it adds
+the inode to the hash table maintained by the inode table. The hash value is
+calculated based on the gfid). Copies the gfid to the inode (the gfid is
+present in the iatt structure). Creates a dentry with the new name.
+
+A inode is removed from the inode table and eventually destroyed when unlink
+or rmdir operation is performed on a file/directory, or the the lru limit of
+the inode table has been exceeded.
+
+# Data structure (dentry)
+```
+
+struct _dentry {
+ struct list_head inode_list; /* list of dentries of inode */
+ struct list_head hash; /* hash table pointers */
+ inode_t *inode; /* inode of this directory entry */
+ char *name; /* name of the directory entry */
+ inode_t *parent; /* directory of the entry */
+};
+```
+A dentry is the presence of an entry for a file/directory within its parent
+directory. A dentry usually points to the inode to which it belongs to. In
+glusterfs a dentry contains the following fields.
+1. a hook using which it can add itself to the list of
+the dentries maintained by the inode to which it points to.
+2. A hash table pointer.
+3. Pointer to the inode to which it belongs to.
+4. Name of the dentry
+5. Pointer to the inode of the parent directory in which the dentry is present
+
+A new dentry is created when a new file/directory/symlink is created or a hard
+link to an existing file is created.
+```
+__dentry_create (inode_t *inode, inode_t *parent, const char *name);
+```
+A dentry holds a refcount on the parent
+directory so that the parent inode is never removed from the active inode's list
+and put to the lru list (If the lru limit of the lru list is exceeded, there is
+a chance of parent inode being destroyed. To avoid it, the dentries hold a
+reference to the parent inode). A dentry is removed whenevern a unlink/rmdir
+is perfomed on a file/directory. Or when the lru limit has been exceeded, the
+oldest inodes are purged out of the inode table, during which all the dentries
+of the inode are removed.
+
+Whenever a unlink/rmdir comes on a file/directory, the corresponding inode
+should be removed from the inode table. So upon unlink/rmdir, the inode will
+be moved to the purge list maintained by the inode table and from there it is
+destroyed. To be more specific, if a inode has to be destroyed, its refcount
+and nlookup count both should become 0. For refcount to become 0, the inode
+should not be part of any fop (there should not be any open fds). Or if the
+inode belongs to a directory, then there should not be any fop happening on the
+directory and it should not contain any dentries within it. For nlookup count to
+become zero, a forget has to be sent on the inode with nlookup count set to 0 as
+an argument. For fuse clients, forget is sent by the kernel itself whenever a
+unlink/rmdir is performed. But for brick processes, upon unlink/rmdir, the
+protocol/server itself has to do inode_forget. Whenever the inode has to be
+deleted due to file removal or lru limit being exceeded the inode is retired
+(i.e. all the dentries of the inode are deleted and the inode is moved to the
+purge list maintained by the inode table), the nlookup count is set to 0 via
+inode_forget api. The inode table, then prunes all the inodes from the purge
+list by destroying the inode contexts maintained by each xlator.
+```
+unlinking of the dentry is done via inode_unlink;
+
+void
+inode_unlink (inode_t *inode, inode_t *parent, const char *name);
+```
+If the inode has multiple hard links, then the unlink operation performed by
+the application results just in the removal of the dentry with the name provided
+by the application. For the inode to be removed, all the dentries of the inode
+should be unlinked.
+
diff --git a/doc/developer-guide/datastructure-iobuf.md b/doc/developer-guide/datastructure-iobuf.md
new file mode 100644
index 00000000000..03604e3672c
--- /dev/null
+++ b/doc/developer-guide/datastructure-iobuf.md
@@ -0,0 +1,259 @@
+# Iobuf-pool
+## Datastructures
+### iobuf
+Short for IO Buffer. It is one allocatable unit for the consumers of the IOBUF
+API, each unit hosts @page_size(defined in arena structure) bytes of memory. As
+initial step of processing a fop, the IO buffer passed onto GlusterFS by the
+other applications (FUSE VFS/ Applications using gfapi) is copied into GlusterFS
+space i.e. iobufs. Hence Iobufs are mostly allocated/deallocated in Fuse, gfapi,
+protocol xlators, and also in performance xlators to cache the IO buffers etc.
+```
+struct iobuf {
+ union {
+ struct list_head list;
+ struct {
+ struct iobuf *next;
+ struct iobuf *prev;
+ };
+ };
+ struct iobuf_arena *iobuf_arena;
+
+ gf_lock_t lock; /* for ->ptr and ->ref */
+ int ref; /* 0 == passive, >0 == active */
+
+ void *ptr; /* usable memory region by the consumer */
+
+ void *free_ptr; /* in case of stdalloc, this is the
+ one to be freed not the *ptr */
+};
+```
+
+### iobref
+There may be need of multiple iobufs for a single fop, like in vectored read/write.
+Hence multiple iobufs(default 16) are encapsulated under one iobref.
+```
+struct iobref {
+ gf_lock_t lock;
+ int ref;
+ struct iobuf **iobrefs; /* list of iobufs */
+ int alloced; /* 16 by default, grows as required */
+ int used; /* number of iobufs added to this iobref */
+};
+```
+### iobuf_arenas
+One region of memory MMAPed from the operating system. Each region MMAPs
+@arena_size bytes of memory, and hosts @arena_size / @page_size IOBUFs.
+The same sized iobufs are grouped into one arena, for sanity of access.
+
+```
+struct iobuf_arena {
+ union {
+ struct list_head list;
+ struct {
+ struct iobuf_arena *next;
+ struct iobuf_arena *prev;
+ };
+ };
+
+ size_t page_size; /* size of all iobufs in this arena */
+ size_t arena_size; /* this is equal to
+ (iobuf_pool->arena_size / page_size)
+ * page_size */
+ size_t page_count;
+
+ struct iobuf_pool *iobuf_pool;
+
+ void *mem_base;
+ struct iobuf *iobufs; /* allocated iobufs list */
+
+ int active_cnt;
+ struct iobuf active; /* head node iobuf
+ (unused by itself) */
+ int passive_cnt;
+ struct iobuf passive; /* head node iobuf
+ (unused by itself) */
+ uint64_t alloc_cnt; /* total allocs in this pool */
+ int max_active; /* max active buffers at a given time */
+};
+
+```
+### iobuf_pool
+Pool of Iobufs. As there may be many Io buffers required by the filesystem,
+a pool of iobufs are preallocated and kept, if these preallocated ones are
+exhausted only then the standard malloc/free is called, thus improving the
+performance. Iobuf pool is generally one per process, allocated during
+glusterfs_ctx_t init (glusterfs_ctx_defaults_init), currently the preallocated
+iobuf pool memory is freed on process exit. Iobuf pool is globally accessible
+across GlusterFs, hence iobufs allocated by any xlator can be accessed by any
+other xlators(unless iobuf is not passed).
+```
+struct iobuf_pool {
+ pthread_mutex_t mutex;
+ size_t arena_size; /* size of memory region in
+ arena */
+ size_t default_page_size; /* default size of iobuf */
+
+ int arena_cnt;
+ struct list_head arenas[GF_VARIABLE_IOBUF_COUNT];
+ /* array of arenas. Each element of the array is a list of arenas
+ holding iobufs of particular page_size */
+
+ struct list_head filled[GF_VARIABLE_IOBUF_COUNT];
+ /* array of arenas without free iobufs */
+
+ struct list_head purge[GF_VARIABLE_IOBUF_COUNT];
+ /* array of of arenas which can be purged */
+
+ uint64_t request_misses; /* mostly the requests for higher
+ value of iobufs */
+};
+```
+~~~
+The default size of the iobuf_pool(as of yet):
+1024 iobufs of 128Bytes = 128KB
+512 iobufs of 512Bytes = 256KB
+512 iobufs of 2KB = 1MB
+128 iobufs of 8KB = 1MB
+64 iobufs of 32KB = 2MB
+32 iobufs of 128KB = 4MB
+8 iobufs of 256KB = 2MB
+2 iobufs of 1MB = 2MB
+Total ~13MB
+~~~
+As seen in the datastructure iobuf_pool has 3 arena lists.
+
+- arenas:
+The arenas allocated during iobuf_pool create, are part of this list. This list
+also contains arenas that are partially filled i.e. contain few active and few
+passive iobufs (passive_cnt !=0, active_cnt!=0 except for initially allocated
+arenas). There will be by default 8 arenas of the sizes mentioned above.
+- filled:
+If all the iobufs in the arena are filled(passive_cnt = 0), the arena is moved
+to the filled list. If any of the iobufs from the filled arena is iobuf_put,
+then the arena moves back to the 'arenas' list.
+- purge:
+If there are no active iobufs in the arena(active_cnt = 0), the arena is moved
+to purge list. iobuf_put() triggers destruction of the arenas in this list. The
+arenas in the purge list are destroyed only if there is atleast one arena in
+'arenas' list, that way there won't be spurious mmap/unmap of buffers.
+(e.g: If there is an arena (page_size=128KB, count=32) in purge list, this arena
+is destroyed(munmap) only if there is an arena in 'arenas' list with page_size=128KB).
+
+## APIs
+### iobuf_get
+
+```
+struct iobuf *iobuf_get (struct iobuf_pool *iobuf_pool);
+```
+Creates a new iobuf of the default page size(128KB hard coded as of yet).
+Also takes a reference(increments ref count), hence no need of doing it
+explicitly after getting iobuf.
+
+### iobuf_get2
+
+```
+struct iobuf * iobuf_get2 (struct iobuf_pool *iobuf_pool, size_t page_size);
+```
+Creates a new iobuf of a specified page size, if page_size=0 default page size
+is considered.
+```
+if (requested iobuf size > Max iobuf size in the pool(1MB as of yet))
+ {
+ Perform standard allocation(CALLOC) of the requested size and
+ add it to the list iobuf_pool->arenas[IOBUF_ARENA_MAX_INDEX].
+ }
+ else
+ {
+ -Round the page size to match the stndard sizes in iobuf pool.
+ (eg: if 3KB is requested, it is rounded to 8KB).
+ -Select the arena list corresponding to the rounded size
+ (eg: select 8KB arena)
+ If the selected arena has passive count > 0, then return the
+ iobuf from this arena, set the counters(passive/active/etc.)
+ appropriately.
+ else the arena is full, allocate new arena with rounded size
+ and standard page numbers and add to the arena list
+ (eg: 128 iobufs of 8KB is allocated).
+ }
+```
+Also takes a reference(increments ref count), hence no need of doing it
+explicitly after getting iobuf.
+
+### iobuf_ref
+
+```
+struct iobuf *iobuf_ref (struct iobuf *iobuf);
+```
+ Take a reference on the iobuf. If using an iobuf allocated by some other
+xlator/function/, its a good practice to take a reference so that iobuf is not
+deleted by the allocator.
+
+### iobuf_unref
+```
+void iobuf_unref (struct iobuf *iobuf);
+```
+Unreference the iobuf, if the ref count is zero iobuf is considered free.
+
+```
+ -Delete the iobuf, if allocated from standard alloc and return.
+ -set the active/passive count appropriately.
+ -if passive count > 0 then add the arena to 'arena' list.
+ -if active count = 0 then add the arena to 'purge' list.
+```
+Every iobuf_ref should have a corresponding iobuf_unref, and also every
+iobuf_get/2 should have a correspondning iobuf_unref.
+
+### iobref_new
+```
+struct iobref *iobref_new ();
+```
+Creates a new iobref structure and returns its pointer.
+
+### iobref_ref
+```
+struct iobref *iobref_ref (struct iobref *iobref);
+```
+Take a reference on the iobref.
+
+### iobref_unref
+```
+void iobref_unref (struct iobref *iobref);
+```
+Decrements the reference count of the iobref. If the ref count is 0, then unref
+all the iobufs(iobuf_unref) in the iobref, and destroy the iobref.
+
+### iobref_add
+```
+int iobref_add (struct iobref *iobref, struct iobuf *iobuf);
+```
+Adds the given iobuf into the iobref, it takes a ref on the iobuf before adding
+it, hence explicit iobuf_ref is not required if adding to the iobref.
+
+### iobref_merge
+```
+int iobref_merge (struct iobref *to, struct iobref *from);
+```
+Adds all the iobufs in the 'from' iobref to the 'to' iobref. Merge will not
+cause the delete of the 'from' iobref, therefore it will result in another ref
+on all the iobufs added to the 'to' iobref. Hence iobref_unref should be
+performed both on 'from' and 'to' iobrefs (performing iobref_unref only on 'to'
+will not free the iobufs and may result in leak).
+
+### iobref_clear
+```
+void iobref_clear (struct iobref *iobref);
+```
+Unreference all the iobufs in the iobref, and also unref the iobref.
+
+## Iobuf Leaks
+If all iobuf_refs/iobuf_new do not have correspondning iobuf_unref, then the
+iobufs are not freed and recurring execution of such code path may lead to huge
+memory leaks. The easiest way to identify if a memory leak is caused by iobufs
+is to take a statedump. If the statedump shows a lot of filled arenas then it is
+a sure sign of leak. Refer doc/debugging/statedump.md for more details.
+
+If iobufs are leaking, the next step is to find where the iobuf_unref went
+missing. There is no standard/easy way of debugging this, code reading and logs
+are the only ways. If there is a liberty to reproduce the memory leak at will,
+then logs(gf_callinginfo) in iobuf_ref/unref might help.
+TODO: A easier way to debug iobuf leaks.
diff --git a/doc/developer-guide/datastructure-mem-pool.md b/doc/developer-guide/datastructure-mem-pool.md
new file mode 100644
index 00000000000..225567cbf9f
--- /dev/null
+++ b/doc/developer-guide/datastructure-mem-pool.md
@@ -0,0 +1,124 @@
+# Mem-pool
+## Background
+There was a time when every fop in glusterfs used to incur cost of allocations/de-allocations for every stack wind/unwind between xlators because stack/frame/*_localt_t in every wind/unwind was allocated and de-allocated. Because of all these system calls in the fop path there was lot of latency and the worst part is that most of the times the number of frames/stacks active at any time wouldn't cross a threshold. So it was decided that this threshold number of frames/stacks would be allocated in the beginning of the process only once. Get one of them from the pool of stacks/frames whenever `STACK_WIND` is performed and put it back into the pool in `STACK_UNWIND`/`STACK_DESTROY` without incurring any extra system calls. The data structures are allocated only when threshold number of such items are in active use i.e. pool is in complete use.% increase in the performance once this was added to all the common data structures (inode/fd/dict etc) in xlators throughout the stack was tremendous.
+
+## Data structure
+```
+struct mem_pool {
+ struct list_head list; /*Each member in the mempool is element padded with a doubly-linked-list + ptr of mempool + is-in
+-use info. This list is used to add the element to the list of free members in the mem-pool*/
+ int hot_count;/*number of mempool elements that are in active use*/
+ int cold_count;/*number of mempool elements that are not in use. If a new allocation is required it
+will be served from here until all the elements in the pool are in use i.e. cold-count becomes 0.*/
+ gf_lock_t lock;/*synchronization mechanism*/
+ unsigned long padded_sizeof_type;/*Each mempool element is padded with a doubly-linked-list + ptr of mempool + is-in
+-use info to operate the pool of elements, this size is the element-size after padding*/
+ void *pool;/*Starting address of pool*/
+ void *pool_end;/*Ending address of pool*/
+/* If an element address is in the range between pool, pool_end addresses then it is alloced from the pool otherwise it is 'calloced' this is very useful for functions like 'mem_put'*/
+ int real_sizeof_type;/* size of just the element without any padding*/
+ uint64_t alloc_count; /*Number of times this type of data is allocated through out the life of this process. This may include calloced elements as well*/
+ uint64_t pool_misses; /*Number of times the element had to be allocated from heap because all elements from the pool are in active use.*/
+ int max_alloc; /*Maximum number of elements from the pool in active use at any point in the life of the process. This does *not* include calloced elements*/
+ int curr_stdalloc;/*Number of elements that are allocated from heap at the moment because the pool is in completed use. It should be '0' when pool is not in complete use*/
+ int max_stdalloc;/*Maximum number of allocations from heap after the pool is completely used that are in active use at any point in the life of the process.*/
+ char *name; /*Contains xlator-name:data-type as a string
+ struct list_head global_list;/*This is used to insert it into the global_list of mempools maintained in 'glusterfs-ctx'
+};
+```
+
+## Life-cycle
+```
+mem_pool_new (data_type, unsigned long count)
+
+This is a macro which expands to mem_pool_new_fn (sizeof (data_type), count, string-rep-of-data_type)
+
+struct mem_pool *
+mem_pool_new_fn (unsigned long sizeof_type, unsigned long count, char *name)
+
+Padded-element:
+ ----------------------------------------
+|list-ptr|mem-pool-address|in-use|Element|
+ ----------------------------------------
+ ```
+
+This function allocates the `mem-pool` structure and sets up the pool for use.
+`name` parameter above is the `string` containing type of the datatype. This `name` is appended to `xlator-name + ':'` so that it can be easily identified in things like statedump. `count` is the number of elements that need to be allocated. `sizeof_type` is the size of each element. Ideally `('sizeof_type'*'count')` should be the size of the total pool. But to manage the pool using `mem_get`/`mem_put` (will be explained after this section) each element needs to be padded in the front with a `('list', 'mem-pool-address', 'in_use')`. So the actual size of the pool it allocates will be `('padded_sizeof_type'*'count')`. Why these extra elements are needed will be evident after understanding how `mem_get` and `mem_put` are implemented. In this function it just initializes all the `list` structures in front of each element and adds them to the `mem_pool->list` which represent the list of `cold` elements which can be allocated whenever `mem_get` is called on this mem_pool. It remembers mem_pool's start and end addresses in `mem_pool->pool`, `mem_pool->pool_end` respectively. Initializes `mem_pool->cold_count` to `count` and `mem_pool->hot_count` to `0`. This mem-pool will be added to the list of `global_list` maintained in `glusterfs-ctx`
+
+
+```
+void* mem_get (struct mem_pool *mem_pool)
+
+Initial-list before mem-get
+----------------
+| Pool |
+| ----------- | ---------------------------------------- ----------------------------------------
+| | pool-list | |<---> |list-ptr|mem-pool-address|in-use|Element|<--->|list-ptr|mem-pool-address|in-use|Element|
+| ----------- | ---------------------------------------- ----------------------------------------
+----------------
+
+list after mem-get from the pool
+----------------
+| Pool |
+| ----------- | ----------------------------------------
+| | pool-list | |<--->|list-ptr|mem-pool-address|in-use|Element|
+| ----------- | ----------------------------------------
+----------------
+
+List when the pool is full:
+ ----------------
+| Pool | extra element that is allocated
+| ----------- | ----------------------------------------
+| | pool-list | | |list-ptr|mem-pool-address|in-use|Element|
+| ----------- | ----------------------------------------
+ ----------------
+```
+
+This function is similar to `malloc()` but it gives memory of type `element` of this pool. When this function is called it increments `mem_pool->alloc_count`, checks if there are any free elements in the pool that can be returned by inspecting `mem_pool->cold_count`. If `mem_pool->cold_count` is non-zero then it means there are elements in the pool which are not in active use. It deletes one element from the list of free elements and decrements `mem_pool->cold_count` and increments `mem_pool->hot_count` to indicate there is one more element in active use. Updates `mem_pool->max_alloc` accordingly. Sets `element->in_use` in the padded memory to `1`. Sets `element->mem_pool` address to this mem_pool also in the padded memory(It is useful for mem_put). Returns the address of the memory after the padded boundary to the caller of this function. In the cases where all the elements in the pool are in active use it `callocs` the element with padded size and sets mem_pool address in the padded memory. To indicate the pool-miss and give useful accounting information of the pool-usage it increments `mem_pool->pool_misses`, `mem_pool->curr_stdalloc`. Updates `mem_pool->max_stdalloc` accordingly.
+
+```
+void* mem_get0 (struct mem_pool *mem_pool)
+```
+Just like `calloc` is to `malloc`, `mem_get0` is to `mem_get`. It memsets the memory to all '0' before returning the element.
+
+
+```
+void mem_put (void *ptr)
+
+list before mem-put from the pool
+ ----------------
+| Pool |
+| ----------- | ----------------------------------------
+| | pool-list | |<--->|list-ptr|mem-pool-address|in-use|Element|
+| ----------- | ----------------------------------------
+ ----------------
+
+list after mem-put to the pool
+ ----------------
+| Pool |
+| ----------- | ---------------------------------------- ----------------------------------------
+| | pool-list | |<---> |list-ptr|mem-pool-address|in-use|Element|<--->|list-ptr|mem-pool-address|in-use|Element|
+| ----------- | ---------------------------------------- ----------------------------------------
+ ----------------
+
+If mem_put is putting an element not from pool then it is just freed so
+no change to the pool
+ ----------------
+| Pool |
+| ----------- |
+| | pool-list | |
+| ----------- |
+ ----------------
+```
+
+This function is similar to `free()`. Remember that ptr passed to this function is the address of the element, so this function gets the ptr to its head of the padding in front of it. If this memory falls in bettween `mem_pool->pool`, `mem_pool->pool_end` then the memory is part of the 'pool' memory that is allocated so it does some sanity checks to see if the memory is indeed head of the element by checking if `in_use` is set to `1`. It resets `in_use` to `0`. It gets the mem_pool address stored in the padded region and adds this element to the list of free elements. Decreases `mem_pool->hot_count` increases `mem_pool->cold_count`. In the case where padded-element address does not fall in the range of `mem_pool->pool`, `mem_pool->pool_end` it just frees the element and decreases `mem_pool->curr_stdalloc`.
+
+```
+void
+mem_pool_destroy (struct mem_pool *pool)
+```
+Deletes this pool from the `global_list` maintained by `glusterfs-ctx` and frees all the memory allocated in `mem_pool_new`.
+
+
+### How to pick pool-size
+This varies from work-load to work-load. Create the mem-pool with some random size and run the work-load. Take the statedump after the work-load is complete. In the statedump if `max_alloc` is always less than `cold_count` may be reduce the size of the pool closer to `max_alloc`. On the otherhand if there are lots of `pool-misses` then increase the `pool_size` by `max_stdalloc` to achieve better 'hit-rate' of the pool.
diff --git a/doc/developer-guide/dirops-transactions-in-dht.md b/doc/developer-guide/dirops-transactions-in-dht.md
new file mode 100644
index 00000000000..909a97001aa
--- /dev/null
+++ b/doc/developer-guide/dirops-transactions-in-dht.md
@@ -0,0 +1,273 @@
+# dirops transactions in dht
+Need for transactions during operations on directories arise from two
+basic design elements of DHT:
+
+ 1. A directory is created on all subvolumes of dht. Since glusterfs
+ associates each file-system object with an unique gfid, every
+ subvolume should have the same unique mapping of (path of directory,
+ gfid). To elaborate,
+ * Each subvolume should've same gfid associated with a path to
+ directory.
+ * A gfid should not be associated with more than one path in any
+ subvolume.
+
+ So, entire operations like mkdir, renamedir, rmdir and creation of
+ directories during self-heal need to be atomic in dht. In other words,
+ any of these operations shouldn't begin on an inode if one of them is
+ already in progress on the same inode, till it completes on all
+ subvolumes of dht. If not, more than one of these operations
+ happening in parallel can break any or all of the two requirements
+ listed above. This is referred in the rest of the document by the
+ name _Atomicity during namespace operations_.
+
+ 2. Each directory has an independent layout persisted on
+ subvolumes. Each subvolume contains only part of the layout relevant
+ to it. For performance reasons _and_ since _only_ dht has aggregated
+ view, this layout is cached in memory of client. To make sure dht
+ reads or modifies a single complete layout while parallel modifications of the layout are in progress, we need atomicity during layout modification and reading. This is referred in the rest of the document as _Atomicity during layout modification and reading_.
+
+Rest of the document explains how atomicity is achieved for each of
+the case above.
+
+**Atomicity during layout modification and reading**
+File operations a.k.a fops can be classified into two categories based on how they consume layout.
+
+ - Layout writer. Setting of layout during selfheal of a directory is
+ layout writer of _that_ directory.
+ - Layout reader.
+ * Any entry fop like create, unlink, rename, link, symlink,
+ unlink, mknod, rename, mkdir, rmdir, renamedir which needs layout of the parent directory. Each of these fops are readers of layout on parent directory.
+ * setting of layout during mkdir of a directory is considered as
+ a reader of the same directory's layout. The reason for this is that
+ only a parallel lookup on that directory can be a competing fop that modifies the layout (Other fops need gfid of the directory which can be got only after either lookup or mkdir completes). However, healing of layout is considered as a writer and a single writer blocks all readers.
+
+*Algorithm*
+Atomicity is achieved by locking on the inode of directory whose
+layout is being modified or read. The fop used is inodelk.
+ - Writer acquires blocking inodelk (directory-inode, write-lock) on
+ all subvolumes serially. The order of subvols in which they are
+ locked by different clients remains constant for a directory. If locking fails on any subvolume, layout modification is abandoned.
+ - Reader acquires an inodelk (directory-inode, read-lock) on _any_
+ one subvolume. If locking fails on a subvolume (say with
+ ESTALE/ENOTCONN error), locking can be tried on other subvolumes till
+ we get one lock. If we cannot get lock on at least one subvolume,
+ consistency of layout is not guaranteed. Based on the consistency
+ requirements of fops, they can be failed or continued.
+
+Reasons why writer has to lock on _all_ subvols:
+
+ - DHT don't have a metadata server and locking is implemented by brick. So, there is no well-defined subvol/brick that can be used as an arbitrator by different clients while acquiring locks.
+ - readers should acquire as minimum number of locks as possible. In
+ other words, the algorithm aims to have less synchronization cost to
+ readers.
+ - The subvolume to which a directory hashes could be used as a
+ lock server. However, in the case of an entry fop like create
+ (/a/b/c) where we want to block modification of layout of b for the
+ duration of create, we would be required to acquire lock on the
+ subvol to which /a/b hashes. To find out the hashed-subvol of
+ /a/b, we would need layout of /a. Note that how there is a dependency
+ of locking the layouts of ancestors all the way to root. So this
+ locking is not preferred. Also, note that only the immediate parent
+ inode is available in arguments of a fop like create.
+
+**Atomicity during namespace operations**
+
+ - We use locks on inode of parent directory in the namespace of
+ _"basename"_ during mkdir, rmdir, renamedir and directory
+ creation phase of self-heal. The exact fop we use is _entrylk
+ (parent-inode, "basename")_.
+ - refresh in-memory layout of parent-inode from values stored on backend
+ - _entrylk (parent-inode, "basename")_ is done on subvolume to which
+ _"basename" hashes_. So, this operation is a _reader_ of the
+ layout on _parent-inode_. Which means an _inodelk (parent-inode,
+ read-lock)_ has to be completed before _entrylk (parent-inode,
+ "basename")_ is issued. Both the locks have to be held till the
+ operation is tried on all subvolumes. If acquiring of any/all of
+ these locks fail, the operation should be failed.
+
+With the above details, algorithms for mkdir, rmdir, renamedir,
+self-heal of directory are explicitly detailed below.
+
+**Self-heal of a directory**
+
+ - creation of directories on subvolumes is done only during
+ _named-lookup_ of a directory as we need < parent-inode,
+ "basename" >.
+ - If a directory is missing on one or more subvolumes,
+ * acquire _inodelk (parent-inode, read-lock)_ on _any one_ of the
+ subvolumes.
+ * refresh the in-memory layout of parent-inode from values stored on backend
+ * acquire _entrylk (parent-inode, "basename")_ on the subvolume
+ to which _"basename"_ hashes.
+ * If any/all of the locks fail, self-heal is aborted.
+ * create directories on missing subvolumes.
+ * release _entrylk (parent-inode, "basename")_.
+ * release _inodelk (parent-inode, read-lock)_.
+
+ - If layout of a directory needs healing
+ * acquire _inodelk (directory-inode, write-lock)_ on _all_ the
+ subvolumes. If locking fails on any of the subvolumes,
+ self-heal is aborted. Blocking Locks are acquired serially across subvolumes in a _well-defined_ order which is _constant_ across all the healers of a directory. One order could be the order in which subvolumes are stored in the array _children_ of dht xlator.
+ * heal the layout.
+ * release _inodelk (directory-inode, write-lock)_ on _all_ the
+ subvolumes in parallel.
+ * Note that healing of layout can be done in both _named_ and
+ _nameless_ lookups of a directory as _only directory-inode_ is needed
+ for healing and it is available during both.
+
+**mkdir (parent-inode, "basename")**
+
+* while creating directory across subvolumes,
+
+ - acquire _inodelk (parent-inode, read-lock)_ on _any one_ of the
+ subvolumes.
+ - refresh in-memory layout of parent-inode from values stored on backend
+ - acquire _entrylk (parent-inode, "basename")_ on the subvolume to
+ which _"basename"_ hashes.
+ - If any/all of the above two locks fail, release the locks that
+ were acquired successfully and mkdir should be failed (as perceived by application).
+ - do _mkdir (parent-inode, "basename")_ on the subvolume to which
+ _"basename"_ hashes. If this mkdir fails, mkdir is failed.
+ - do _mkdir (parent-inode, "basename")_ on the remaining subvolumes.
+ - release _entrylk (parent-inode, "basename")_.
+ - release _inodelk (parent-inode, "read-lock")_.
+* while setting the layout of a directory,
+ - acquire _inodelk (directory-inode, read-lock)_ on _any one_ of the
+ subvolumes.
+ - If locking fails, cleanup the locks that were acquired
+ successfully and abort layout setting. Note that we'll have a
+ directory without a layout till a lookup happens on the
+ directory. This means entry operations within this directory fail
+ in this time window. We can also consider failing mkdir. The
+ problem of dealing with a directory without layout is out of the
+ scope of this document.
+ - set the layout on _directory-inode_.
+ - release _inodelk (directory-inode, read-lock)_.
+* Note that during layout setting we consider mkdir as a _reader_ not
+ _writer_, though it is setting the layout. Reasons are:
+ - Before any of other readers like create, link etc that operate on
+ this directory to happen, _gfid_ of this directory has to be
+ resolved. But _gfid_ is only available only if either of following
+ conditions are true:
+ * after mkdir is complete.
+ * a lookup on the same path happens parallel to in-progress
+ mkdir.
+
+ But, on completion of any of the above two operations, layout
+ will be healed. So, none of the _readers_ will happen on a
+ directory with partial layout.
+
+* Note that since we've an _entrylk (parent-inode, "basename")_ for
+ the entire duration of (attempting) creating directories, parallel
+ mkdirs will no longer contend on _mkdir_ on subvolume _to which "basename" hashes_. But instead, contend on _entrylk (parent-inode, "basename")_ on the subvolume _to which "basename" hashes_. So, we can attempt the _mkdir_ in _parallel_ on all subvolumes instead of two stage mkdir on hashed first and the rest of them in parallel later. However, we need to make sure that mkdir is successful on the subvolume _to which "basename" hashes_ for mkdir to be successful (as perceived by application). In the case of failed mkdir (as perceived by application), a cleanup should be performed on all the subvolumes before _entrylk (parent-inode, "basename")_ is released.
+
+**rmdir (parent-inode, "basename", directory-inode)**
+
+ - acquire _inodelk (parent-inode, read-lock)_ on _any one_
+ subvolume.
+ - refresh in-memory layout of parent-inode from values stored on backend
+ - acquire _entrylk (parent-inode, "basename")_ on the subvolume to
+ which _"basename" hashes_.
+ - If any/all of the above locks fail, rmdir is failed after cleanup
+ of the locks that were acquired successfully.
+ - do _rmdir (parent-inode, "basename")_ on the subvolumes to which
+ _"basename" doesn't hash to_.
+ * If successful, continue.
+ * Else,
+ * recreate directories on those subvolumes where rmdir
+ succeeded.
+ * heal the layout of _directory-inode_. Note that this will have
+ same synchronization requirements as discussed during layout
+ healing part of the section "Directoy self-heal" above.
+ * release _entrylk (parent-inode, "basename")_.
+ * release _inodelk (parent-inode, read-lock)_.
+ * fail _rmdir (parent-inode, "basename")_ to application.
+ - do _rmdir (parent-inode, "basename")_ on the subvolume to which
+ _"basename" hashes_.
+ - If successful, continue.
+ - Else, Go to the failure part of _rmdir (parent-inode, "basename")_
+ on subvolumes to which "basename" _doesn't hash to_.
+ - release _entrylk (parent-inode, "basename")_.
+ - release _inodelk (parent-inode, read-lock)_.
+ - return success to application.
+
+**renamedir (src-parent-inode, "src-basename", src-directory-inode, dst-parent-inode, "dst-basename", dst-directory-inode)**
+
+ - requirement is to prevent any operation in both _src-namespace_
+ and _dst-namespace_. So, we need to acquire locks on both
+ namespaces.We also need to have constant ordering while acquiring
+ locks during parallel renames of the form _rename (src, dst)_ and
+ _rename (dst, src)_ to prevent deadlocks. We can sort gfids of
+ _src-parent-inode_ and _dst-parent-inode_ and use that order to
+ acquire locks. For the sake of explanation lets say we ended up
+ with order of _src_ followed by _dst_.
+ - acquire _inodelk (src-parent-inode, read-lock)_.
+ - refresh in-memory layout of src-parent-inode from values stored on backend
+ - acquire _entrylk (src-parent-inode, "src-basename")_.
+ - acquire _inodelk (dst-parent-inode, read-lock)_.
+ - refresh in-memory layout of dst-parent-inode from values stored on backend
+ - acquire _entrylk (dst-parent-inode, "dst-basename")_.
+ - If acquiring any/all of the locks above fail,
+ * release the locks that were successfully acquired.
+ * fail the renamedir operation to application
+ * done
+ - do _renamedir ("src", "dst")_ on the subvolume _to which "dst-basename" hashes_.
+ * If failure, Goto point _If acquiring any/all of the locks above fail_.
+ * else, continue.
+ - do _renamedir ("src", "dst")_ on rest of the subvolumes.
+ * If there is any failure,
+ * revert the successful renames.
+ * Goto to point _If acquiring any/all of the locks above fail_.
+ * else,
+ - release all the locks acquired.
+ - return renamedir as success to application.
+
+**Some examples of races**
+This section gives concrete examples of races that can result in inconsistencies explained in the beginning of the document.
+
+Some assumptions are:
+
+* We consider an example distribute of three subvols s1, s2 and s3.
+* For examples of renamedir ("src", "dst"), _src_ hashes to s1 and _dst_ hashes to s2. _src_ and _dst_ are associated with _gfid-src_ and _gfid-dst_ respectively
+* For non renamedir examples, _dir_ is the name of directory and it hashes to s1.
+
+And the examples are:
+
+ - mkdir vs rmdir - inconsistency in namespace.
+ * mkdir ("dir", gfid1) is complete on s1
+ * rmdir is issued on same directory. Note that, since rmdir needs a gfid, a lookup should be complete before rmdir. lookup creates the directory on rest of the subvols as part of self-heal.
+ * rmdir (gfid1) deletes directory from all subvols.
+ * A new mkdir ("dir", gfid2) is issued. It is successful on s1 associating "dir" with gfid2.
+ * mkdir ("dir", gfid1) resumes and creates directory on s2 and s3 associating "dir" with gfid1.
+ * mkdir ("dir", gfid2) fails with EEXIST on s2 and s3. Since, EEXIST errors are ignored, mkdir is considered successful to application.
+ * In this example we have multiple inconsitencies
+ * "dir" is associated with gfid1 on s2, s3 and with gfid2 on s1
+ * Even if mkdir ("dir", gfid2) was not issued, we would've a case of a directory magically reappearing after a successful rmdir.
+ - lookup heal vs rmdir
+ * rmdir ("dir", gfid1) is issued. It is successful on s2 and s3 (non-hashed subvols for name "dir")
+ * lookup ("dir") is issued. Since directory is present on s1 yet, it is created on s2 and s3 associating with gfid1 as part of self-heal
+ * rmdir ("dir", gfid1) is complete on s1 and it is successful
+ * Another lookup ("dir") creates the directory on s1 too
+ * "dir" magically reappears after a successful rmdir
+ - lookup heal (src) vs renamedir ("src", "dst")
+ * renamedir ("src", "dst") complete on s2
+ * lookup ("src") recreates _src_ with _gfid-src_ on s2
+ * renamedir ("src", "dst") completes on s1, s3. After rename is complete path _dst_ will be associated with gfid _gfid-src_
+ * Another lookup ("src") recreates _src_ on subvols s1 and s3, associating it with gfid _gfid-src_
+ * Inconsistencies are
+ * after a successful renamedir ("src", "dst"), both src and dst exist
+ * Two directories - src and dst - are associated with same gfid. One common symptom is that some entries (of the earlier _src_ and current _dst_ directory) being missed out in readdir listing as the gfid handle might be pointing to the empty healed directory than the actual directory containing entries
+ - lookup heal (dst) vs renamedir ("src", "dst")
+ * dst exists and empty when renamdir started
+ * dst doesn't exist when renamedir started
+ - renamedir ("src", "dst") complete on s2 and s3
+ - lookup ("dst") creates _dst_ associating it with _gfid-src_ on s1
+ - An entry is created in _dst_ on either s1
+ - renamedir ("src", "dst") on s1 will result in a directory _dst/dst_ as _dst_ is no longer empty and _man 2 rename_ states that if _dst_ is not empty, _src_ is renamed _as a subdirectory of dst_
+ - A lookup ( _dst/dst_) creates _dst/dst_ on s2 and s3 associating with _gfid-src_ as part of self-heal
+ - Inconsistencies are:
+ * Two directories - _dst_ and _dst/dst_ - exist even though both of them didn't exist at the beginning of renamedir
+ * Both _dst_ and _dst/dst_ have same gfid - _gfid-src_. As observed earlier, symptom might be directory listing being incomplete
+ - mkdir (dst) vs renamedir ("src", "dst")
+ - rmdir (src) vs renamedir ("src", "dst")
+ - rmdir (dst) vs renamedir ("src", "dst")
diff --git a/doc/developer-guide/ec-implementation.md b/doc/developer-guide/ec-implementation.md
new file mode 100644
index 00000000000..77e62583caa
--- /dev/null
+++ b/doc/developer-guide/ec-implementation.md
@@ -0,0 +1,588 @@
+Erasure coding implementation
+=============================
+
+This document provides information about how [erasure code][1] has
+been implemented into ec translator. It describes the algorithm used
+and the optimizations made, but it doesn't contain a full description
+of the mathematical background needed to understand erasure coding in
+general. It either describes the other parts of ec not directly
+related to the encoding/decoding procedure, like synchronization or
+fop management.
+
+
+Introduction
+------------
+
+EC is based on [Reed-Solomon][2] erasure code. It's a very old code.
+It's not considered the best one nowadays, but is good enough and it's
+one of the few codes that is not covered by any patent and can be
+freely used.
+
+To define the Reed-Solomon code we use 3 parameters:
+
+ * __Key fragments (K)__
+ It represents the minimum number of healthy fragments that will be
+ needed to be able to recover the original data. Any subset of K
+ out of the total number of fragments will serve.
+
+ * __Redundancy fragments (R)__
+ It represents the number of extra fragments to compute for each
+ original data block. This value determines how many fragments can
+ be lost before being unable to recover the original data.
+
+ * __Fragment size (S)__
+ This determines the size of each fragment. The original data
+ block size is computed as S * K. Currently this values is fixed
+ to 512 bytes.
+
+ * __Total number of fragments (N = K + R)__
+ This isn't a real parameter but it will be useful to simplify
+ the following descriptions.
+
+From the point of view of the implementation, it only consists on
+matrix multiplications. There are two kinds of matrices to use for
+Reed-Solomon:
+
+ * __[Systematic][3]__
+ This kind of matrix has the particularity that K of the encoded
+ fragments are simply a copy of the original data, divided into K
+ pieces. Thus no real encoding needs to be done for them and only
+ the R redundancy fragments need to be computed.
+
+ This kind of matrices contain one KxK submatrix that is the
+ [identity matrix][4].
+
+ * __Non-systematic__
+ This kind of matrix doesn't contain an identity submatrix. This
+ means that all of the N fragments need to be encoded, requiring
+ more computation. On the other hand, these matrices have some nice
+ properties that allow faster implementations of some algorithms,
+ like the matrix inversion used to decode the data.
+
+ Another advantage of non-systematic matrices is that the decoding
+ time is constant, independently of how many fragments are lost,
+ while systematic approach can suffer from performance degradation
+ when one fragment is lost.
+
+All non-systematic matrices can be converted to systematic ones, but
+then we lose the good properties of the non-systematic. We have to
+choose betwee best peek performance (systematic) and performance
+stability (non-systematic).
+
+
+Encoding procedure
+------------------
+
+To encode a block of data we need a KxN matrix where each subset of K
+rows is [linearly independent][5]. In other words, the determinant of
+each KxK submatrix is not 0.
+
+There are some known ways to obtain this kind of matrices. EC uses a
+small variation of a matrix known as [Vandermonde Matrix][6] where
+each element of the matrix is defined as:
+
+ a(i, j) = i ^ (K - j)
+
+ where i is the row from 1 to N, and j is the column from 1 to K.
+
+This is exactly the Vandermonde Matrix but with the elements of each
+row in reverse order. This change is made to be able to implement a
+small optimization in the matrix multiplication.
+
+Once we have the matrix, we only need to compute the multiplication
+of this matrix by a vector composed of K elements of data coming from
+the original data block.
+
+ / \ / \
+ | 1 1 1 1 1 | / \ | a + b + c + d + e = t |
+ | 16 8 4 2 1 | | a | | 16a + 8b + 4c + 2d + e = u |
+ | 81 27 9 3 1 | | b | = | 81a + 27b + 9c + 3d + e = v |
+ | 256 64 16 4 1 | * | c | | 256a + 64b + 16c + 4d + e = w |
+ | 625 125 25 5 1 | | d | | 625a + 125b + 25c + 5d + e = x |
+ | 1296 216 36 6 1 | | e | | 1296a + 216b + 36c + 6d + e = y |
+ | 2401 343 49 7 1 | \ / | 2401a + 343b + 49c + 7d + e = z |
+ \ / \ /
+
+The optimization that can be done here is this:
+
+ 16a + 8b + 4c + 2d + e = 2(2(2(2a + b) + c) + d) + e
+
+So all the multiplications are always by the number of the row (2 in
+this case) and we don't need temporal storage for intermediate
+results:
+
+ a *= 2
+ a += b
+ a *= 2
+ a += c
+ a *= 2
+ a += d
+ a *= 2
+ a += e
+
+Once we have the result vector, each element is a fragment that needs
+to be stored in a separate place.
+
+
+Decoding procedure
+------------------
+
+To recover the data we need exactly K of the fragments. We need to
+know which K fragments we have (i.e. the original row number from
+which each fragment was calculated). Once we have this data we build
+a square KxK matrix composed by the rows corresponding to the given
+fragments and invert it.
+
+With the inverted matrix, we can recover the original data by
+multiplying it with the vector composed by the K fragments.
+
+In our previous example, if we consider that we have recovered
+fragments t, u, v, x and z, corresponding to rows 1, 2, 3, 5 and 7,
+we can build the following matrix:
+
+ / \
+ | 1 1 1 1 1 |
+ | 16 8 4 2 1 |
+ | 81 27 9 3 1 |
+ | 625 125 25 5 1 |
+ | 2401 343 49 7 1 |
+ \ /
+
+And invert it:
+
+ / \
+ | 1/48 -1/15 1/16 -1/48 1/240 |
+ | -17/48 16/15 -15/16 13/48 -11/240 |
+ | 101/48 -86/15 73/16 -53/48 41/240 |
+ | -247/48 176/15 -129/16 83/48 -61/240 |
+ | 35/8 -7 35/8 -7/8 1/8 |
+ \ /
+
+Multiplying it by the vector (t, u, v, x, z) we recover the original
+data (a, b, c, d, e):
+
+ / \ / \ / \
+ | 1/48 -1/15 1/16 -1/48 1/240 | | t | | a |
+ | -17/48 16/15 -15/16 13/48 -11/240 | | u | | b |
+ | 101/48 -86/15 73/16 -53/48 41/240 | * | v | = | c |
+ | -247/48 176/15 -129/16 83/48 -61/240 | | x | | d |
+ | 35/8 -7 35/8 -7/8 1/8 | | z | | e |
+ \ / \ / \ /
+
+
+Galois Field
+------------
+
+This encoding/decoding procedure is quite complex to compute using
+regular mathematical operations and it's not well suited for what
+we want to do (note that matrix elements can grow unboundly).
+
+To solve this problem, exactly the same procedure is done inside a
+[Galois Field][7] of characteristic 2, which is a finite field with
+some interesting properties that make it specially useful for fast
+operations using computers.
+
+There are two main differences when we use this specific Galois Field:
+
+ * __All regular additions are replaced by bitwise xor's__
+ For todays computers it's not really faster to execute an xor
+ compared to an addition, however replacing additions by xor's
+ inside a multiplication has many advantages (we will make use of
+ this to optimize the multiplication).
+
+ Another consequence of this change is that additions and
+ substractions are really the same xor operation.
+
+ * __The elements of the matrix are bounded__
+ The field uses a modulus that keep all possible elements inside
+ a delimited region, avoiding really big numbers and fixing the
+ number of bits needed to represent each value.
+
+ In the current implementation EC uses 8 bits per field element.
+
+It's very important to understand how multiplications are computed
+inside a Galois Field to be able to understand how has it been
+optimized.
+
+We'll start with a simple 'old school' multiplication but in base 2.
+For example, if we want to multiply 7 * 5 (111b * 101b in binary), we
+do the following:
+
+ 1 1 1 (= 7)
+ * 1 0 1 (= 5)
+ -----------
+ 1 1 1 (= 7)
+ + 0 0 0 (= 0)
+ + 1 1 1 (= 7)
+ -----------
+ 1 0 0 0 1 1 (= 35)
+
+This is quite simple. Note that the addition of the third column
+generates a carry that is propagated to all the other left columns.
+
+The next step is to define the modulus of the field. Suppose we use
+11 as the modulus. Then we convert the result into an element of the
+field by dividing by the modulus and taking the residue. We also use
+the 'old school' method in binary:
+
+
+ 1 0 0 0 1 1 (= 35) | 1 0 1 1 (= 11)
+ - 0 0 0 0 ----------------
+ --------- 0 1 1 (= 3)
+ 1 0 0 0 1
+ - 1 0 1 1
+ -----------
+ 0 0 1 1 0 1
+ - 1 0 1 1
+ -------------
+ 0 0 1 0 (= 2)
+
+So, 7 * 5 in a field with modulus 11 is 2. Note that the main
+objective in each iteration of the division is to make higher bits
+equal to 0 when possible (if it's not possible in one iteration, it
+will be zeroed on the next).
+
+If we do the same but changing additions with xors we get this:
+
+ 1 1 1 (= 7)
+ * 1 0 1 (= 5)
+ -----------
+ 1 1 1 (= 7)
+ x 0 0 0 (= 0)
+ x 1 1 1 (= 7)
+ -----------
+ 1 1 0 1 1 (= 27)
+
+In this case, the xor of the third column doesn't generate any carry.
+
+Now we need to divide by the modulus. We can also use 11 as the
+modulus since it still satisfies the needed conditions to work on a
+Galois Field of characteristic 2 with 3 bits:
+
+ 1 1 0 1 1 (= 27) | 1 0 1 1 (= 11)
+ x 1 0 1 1 ----------------
+ --------- 1 1 1 (= 7)
+ 0 1 1 0 1
+ x 1 0 1 1
+ -----------
+ 0 1 1 0 1
+ x 1 0 1 1
+ -------------
+ 0 1 1 0 (= 6)
+
+Note that, in this case, to make zero the higher bit we need to
+consider the result of the xor operation, not the addition operation.
+
+So, 7 * 5 in a Galois Field of 3 bits with modulus 11 is 6.
+
+
+Optimization
+------------
+
+To compute all these operations in a fast way some methods have been
+traditionally used. Maybe the most common is the [lookup table][8].
+
+The problem with this method is that it requires 3 lookups for each
+byte multiplication, greatly amplifying the needed memory bandwidth
+and making it difficult to take advantage of any SIMD support on the
+processor.
+
+What EC does to improve the performance is based on the following
+property (using the 3 bits Galois Field of the last example):
+
+ A * B mod N = (A * b{2} * 4 mod N) x
+ (A * b{1} * 2 mod N) x
+ (A * b{0} mod N)
+
+This is basically a rewrite of the steps made in the previous example
+to multiply two numbers but moving the modulus calculation inside each
+intermediate result. What we can see here is that each term of the
+xor can be zeroed if the corresponding bit of B is 0, so we can ignore
+that factor. If the bit is 1, we need to compute A multiplied by a
+power of two and take the residue of the division by the modulus. We
+can precompute these values:
+
+ A0 = A (we don't need to compute the modulus here)
+ A1 = A0 * 2 mod N
+ A2 = A1 * 2 mod N
+
+Having these values we only need to add those corresponding to bits
+set to 1 in B. Using our previous example:
+
+ A = 1 1 1 (= 7)
+ B = 1 0 1 (= 5)
+
+ A0 = 1 1 1 (= 7)
+ A1 = 1 1 1 * 1 0 mod 1 0 1 1 = 1 0 1 (= 5)
+ A2 = 1 0 1 * 1 0 mod 1 0 1 1 = 0 0 1 (= 1)
+
+ Since only bits 0 and 2 are 1 in B, we add A0 and A2:
+
+ A0 + A2 = 1 1 1 x 0 0 1 = 1 1 0 (= 6)
+
+If we carefully look at what we are doing when computing each Ax, we
+see that we do two basic things:
+
+ - Shift the original value one bit to the left
+ - If the highest bit is 1, xor with the modulus
+
+Let's write this in a detailed way (representing each bit):
+
+ Original value: a{2} a{1} a{0}
+ Shift 1 bit: a{2} a{1} a{0} 0
+
+ If a{2} is 0 we already have the result:
+ a{1} a{0} 0
+
+ If a{2} is 1 we need to xor with the modulus:
+ 1 a{1} a{0} 0 x 1 0 1 1 = a{1} (a{0} x 1) 1
+
+An important thing to see here is that if a{2} is 0, we can get the
+same result by xoring with all 0 instead of the modulus. For this
+reason we can rewrite the modulus as this:
+
+ Modulus: a{2} 0 a{2} a{2}
+
+This means that the modulus will be 0 0 0 0 is a{2} is 0, so the value
+won't change, and it will be 1 0 1 1 if a{2} is 1, giving the correct
+result. So, the computation is simply:
+
+ Original value: a{2} a{1} a{0}
+ Shift 1 bit: a{2} a{1} a{0} 0
+ Apply modulus: a{1} (a{0} x a{2}) a{2}
+
+We can compute all Ax using this method. We'll get this:
+
+ A0 = a{2} a{1} a{0}
+ A1 = a{1} (a{0} x a{2}) a{2}
+ A2 = (a{0} x a{2}) (a{1} x a{2}) a{1}
+
+Once we have all terms, we xor the ones corresponding to the bits set
+to 1 in B. In out example this will be A0 and A2:
+
+ Result: (a{2} x a{0} x a{2}) (a{1} x a{1} x a{2}) (a{0} x a{1})
+
+We can easily see that we can remove some redundant factors:
+
+ Result: a{0} a{2} (a{0} x a{1})
+
+This way we have come up with a simply set of equations to compute the
+multiplication of any number by 5. If A is 1 1 1 (= 7), the result
+must be 1 1 0 (= 6) using the equations, as we expected. If we try
+another numbe for A, like 0 1 0 (= 2), the result must be 0 0 1 (= 1).
+
+This seems a really fast way to compute the multiplication without
+using any table lookup. The problem is that this is only valid for
+B = 5. For other values of B another set of equations will be found.
+To solve this problem we can pregenerate the equations for all
+possible values of B. Since the Galois Field we use is small, this is
+feasible.
+
+One thing to be aware of is that, in general, two equations for
+different bits of the same B can share common subexpressions. This
+gives space for further optimizations to reduce the total number of
+xors used in the final equations for a given B. However this is not
+easy to find, since finding the smallest number of xors that give the
+correct result is an NP-Problem. For EC an exhaustive search has been
+made to find the best combinations for each possible value.
+
+
+Implementation
+--------------
+
+All this seems great from the hardware point of view, but implementing
+this using normal processor instructions is not so easy because we
+would need a lot of shifts, ands, xors and ors to move the bits of
+each number to the correct position to compute the equation and then
+another shift to put each bit back to its final place.
+
+For example, to implement the functions to multiply by 5, we would
+need something like this:
+
+ Bit 2: T2 = (A & 1) << 2
+ Bit 1: T1 = (A & 4) >> 1
+ Bit 0: T0 = ((A >> 1) x A) & 1
+ Result: T2 + T1 + T0
+
+This doesn't look good. So here we make a change in the way we get
+and process the data: instead of reading full numbers into variables
+and operate with them afterwards, we use a single independent variable
+for each bit of the number.
+
+Assume that we can read and write independent bits from memory (later
+we'll see how to solve this problem when this is not possible). In
+this case, the code would look something like this:
+
+ Bit 2: T2 = Mem[2]
+ Bit 1: T1 = Mem[1]
+ Bit 0: T0 = Mem[0]
+ Computation: T1 ^= T0
+ Store result: Mem[2] = T0
+ Mem[1] = T2
+ Mem[0] = T1
+
+Note that in this case we handle the final reordering of bits simply
+by storing the right variable to the right place, without any shifts,
+ands nor ors. In fact we only have memory loads, memory stores and
+xors. Note also that we can do all the computations directly using the
+variables themselves, without additional storage. This true for most
+of the values, but in some cases an additional one or two temporal
+variables will be needed to store intermediate results.
+
+The drawback of this approach is that additions, that are simply a
+xor of two numbers will need as many xors as bits are in each number.
+
+
+SIMD optimization
+-----------------
+
+So we have a good way to compute the multiplications, but even using
+this we'll need several operations for each byte of the original data.
+We can improve this by doing multiple multiplications using the same
+set of instructions.
+
+With the approach taken in the implementation section, we can see that
+in fact it's really easy to add SIMD support to this method. We only
+need to store in each variable one bit from multiple numbers. For
+example, when we load T2 from memory, instead of reading the bit 2 of
+the first number, we can read the bit 2 of the first, second, third,
+fourth, ... numbers. The same can be done when loading T1 and T0.
+
+Obviously this needs to have a special encoding of the numbers into
+memory to be able to do that in a single operation, but since we can
+choose whatever encoding we want for EC, we have chosen to have
+exactly that. We interpret the original data as a stream of bits, and
+we split it into subsequences of length L, each containing one bit of
+a number. Every S subsequences form a set of numbers of S bits that
+are encoded and decoded as a single group. This repeats for any
+remaining data.
+
+For example, in a simple case with L = 8 and S = 3, the original data
+would contain something like this (interpreted as a sequence of bits,
+offsets are also bit-based):
+
+ Offset 0: a{0} b{0} c{0} d{0} e{0} f{0} g{0} h{0}
+ Offset 8: a{1} b{1} c{1} d{1} e{1} f{1} g{1} h{1}
+ Offset 16: a{2} b{2} c{2} d{2} e{2} f{2} g{2} h{2}
+ Offset 24: i{0} j{0} k{0} l{0} m{0} n{0} o{0} p{0}
+ Offset 32: i{1} j{1} k{1} l{1} m{1} n{1} o{1} p{1}
+ Offset 40: i{2} j{2} k{2} l{2} m{2} n{2} o{2} p{2}
+
+Note: If the input file is not a multiple of S * L, 0-padding is done.
+
+Here we have 16 numbers encoded, from A to P. This way we can easily
+see that reading the first byte of the file will read all bits 0 of
+number A, B, C, D, E, F, G and H. The same happens with bits 1 and 2
+when we read the second and third bytes respectively. Using this
+encoding and the implementation described above, we can see that the
+same set of instructions will be computing the multiplication of 8
+numbers at the same time.
+
+This can be further improved if we use L = 64 with 64 bits variables
+on 64-bits processor. It's even faster if we use L = 128 using SSE
+registers or L = 256 using AVX registers on Intel processors.
+
+Currently EC uses L = 512 and S = 8. This means that numbers are
+packed in blocks of 512 bytes and gives space for even bigger
+processor registers up to 512 bits.
+
+
+Conclusions
+-----------
+
+This method requires a single variable/processor register for each
+bit. This can be challenging if we want to avoid additional memory
+accesses, even if we use modern processors that have many registers.
+However, the implementation we chose for the Vandermonde Matrix
+doesn't require temporary storage, so we don't need a full set of 8
+new registers (one for each bit) to store partial computations.
+Additionally, the computation of the multiplications requires, at
+most, 2 extra registers, but this is afordable.
+
+Xors are a really fast operation in modern processors. Intel CPU's
+can dispatch up to 3 xors per CPU cycle if there are no dependencies
+with ongoing previous instructions. Worst case is 1 xor per cycle. So,
+in some configurations, this method could be very near to the memory
+speed.
+
+Another interesting thing of this method is that all data it needs to
+operate is packed in small sequential blocks of memory, meaning that
+it can take advantage of the faster internal CPU caches.
+
+
+Results
+-------
+
+For the particular case of 8 bits, EC can compute each multiplication
+using 12.8 xors on average (without counting 0 and 1 that do not
+require any xor). Some numbers require less, like 2 that only requires
+3 xors.
+
+Having all this, we can check some numbers to see the performance of
+this method.
+
+Maybe the most interesting thing is the average number of xors needed
+to encode a single byte of data. To compute this we'll need to define
+some variables:
+
+ * K: Number of data fragments
+ * R: Number of redundancy fragments
+ * N: K + R
+ * B: Number of bits per number
+ * A: Average number of xors per number
+ * Z: Bits per CPU register (can be up to 256 for AVX registers)
+ * X: Average number of xors per CPU cycle
+ * L: Average cycles per load
+ * S: Average cycles per store
+ * G: Core speed in Hz
+
+_Total number of bytes processed for a single matrix multiplication_:
+
+ * __Read__: K * B * Z / 8
+ * __Written__: N * B * Z / 8
+
+_Total number of memory accesses_:
+
+ * __Loads__: K * B * N
+ * __Stores__: B * N
+
+> We need to read the same K * B * Z bits, in registers of Z bits, N
+> times, one for each row of the matrix. However the last N - 1 reads
+> could be made from the internal CPU caches if conditions are good.
+
+_Total number of operations_:
+
+ * __Additions__: (K - 1) * N
+ * __Multiplications__: K * N
+
+__Total number of xors__: B * (K - 1) * N + A * K * N =
+ N * ((A + B) * K - B)
+
+__Xors per byte__: 8 * N * ((A + B) * K - B) / (K * B * Z)
+
+__CPU cycles per byte__: 8 * N * ((A + B) * K - B) / (K * B * Z * X) +
+ 8 * L * N / Z + (loads)
+ 8 * S * N / (K * Z) (stores)
+
+__Bytes per second__: G / {CPU cycles per byte}
+
+Some xors per byte numbers for specific configurations (B=8):
+
+ Z=64 Z=128 Z=256
+ K=2/R=1 0.79 0.39 0.20
+ K=4/R=2 1.76 0.88 0.44
+ K=4/R=3 2.06 1.03 0.51
+ K=8/R=3 3.40 1.70 0.85
+ K=8/R=4 3.71 1.86 0.93
+ K=16/R=4 6.34 3.17 1.59
+
+
+
+[1]: https://en.wikipedia.org/wiki/Erasure_code
+[2]: https://en.wikipedia.org/wiki/Reed%E2%80%93Solomon_error_correction
+[3]: https://en.wikipedia.org/wiki/Systematic_code
+[4]: https://en.wikipedia.org/wiki/Identity_matrix
+[5]: https://en.wikipedia.org/wiki/Linear_independence
+[6]: https://en.wikipedia.org/wiki/Vandermonde_matrix
+[7]: https://en.wikipedia.org/wiki/Finite_field
+[8]: https://en.wikipedia.org/wiki/Finite_field_arithmetic#Implementation_tricks
diff --git a/doc/developer-guide/fuse-interrupt.md b/doc/developer-guide/fuse-interrupt.md
new file mode 100644
index 00000000000..ec991b81ec5
--- /dev/null
+++ b/doc/developer-guide/fuse-interrupt.md
@@ -0,0 +1,211 @@
+# Fuse interrupt handling
+
+## Conventions followed
+
+- *FUSE* refers to the "wire protocol" between kernel and userspace and
+ related specifications.
+- *fuse* refers to the kernel subsystem and also to the GlusterFs translator.
+
+## FUSE interrupt handling spec
+
+The [Linux kernel FUSE documentation](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/filesystems/fuse.txt?h=v4.18#n148)
+desrcibes how interrupt handling happens in fuse.
+
+## Interrupt handling in the fuse translator
+
+### Declarations
+
+This document describes the internal API in the fuse translator with which
+interrupt can be handled.
+
+The API being internal (to be used only in fuse-bridge.c; the functions are
+not exported to a header file).
+
+```
+enum fuse_interrupt_state {
+ /* ... */
+ INTERRUPT_SQUELCHED,
+ INTERRUPT_HANDLED,
+ /* ... */
+};
+typedef enum fuse_interrupt_state fuse_interrupt_state_t;
+struct fuse_interrupt_record;
+typedef struct fuse_interrupt_record fuse_interrupt_record_t;
+typedef void (*fuse_interrupt_handler_t)(xlator_t *this,
+ fuse_interrupt_record_t *);
+struct fuse_interrupt_record {
+ fuse_in_header_t fuse_in_header;
+ void *data;
+ /*
+ ...
+ */
+};
+
+fuse_interrupt_record_t *
+fuse_interrupt_record_new(fuse_in_header_t *finh,
+ fuse_interrupt_handler_t handler);
+
+void
+fuse_interrupt_record_insert(xlator_t *this, fuse_interrupt_record_t *fir);
+
+gf_boolean_t
+fuse_interrupt_finish_fop(call_frame_t *frame, xlator_t *this,
+ gf_boolean_t sync, void **datap);
+
+void
+fuse_interrupt_finish_interrupt(xlator_t *this, fuse_interrupt_record_t *fir,
+ fuse_interrupt_state_t intstat,
+ gf_boolean_t sync, void **datap);
+```
+
+The code demonstrates the usage of the API through `fuse_flush()`. (It's a
+dummy implementation only for demonstration purposes.) Flush is chosen
+because a `FLUSH` interrupt is easy to trigger (see
+*tests/features/interrupt.t*). Interrupt handling for flush is switched on
+by `--fuse-flush-handle-interrupt` (a hidden glusterfs command line flag).
+The implementation of flush interrupt is contained in the
+`fuse_flush_interrupt_handler()` function and blocks guarded by the
+
+```
+if (priv->flush_handle_interrupt) { ...
+```
+
+conditional (where `priv` is a `*fuse_private_t`).
+
+### Overview
+
+"Regular" fuse fops and interrupt handlers interact via a list containing
+interrupt records.
+
+If a fop wishes to have its interrupts handled, it needs to set up an
+interrupt record and insert it into the list; also when it's to finish
+(ie. in its "cbk" stage) it needs to delete the record from the list.
+
+If no interrupt happens, basically that's all to it - a list insertion
+and deletion.
+
+However, if an interrupt comes for the fop, the interrupt FUSE request
+will carry the data identifying an ongoing fop (that is, its `unique`),
+and based on that, the interrupt record will be looked up in the list, and
+the specific interrupt handler (a member of the interrupt record) will be
+called.
+
+Usually the fop needs to share some data with the interrupt handler to
+enable it to perform its task (also shared via the interrupt record).
+The interrupt API offers two approaches to manage shared data:
+- _Async or reference-counting strategy_: from the point on when the interrupt
+ record is inserted to the list, it's owned jointly by the regular fop and
+ the prospective interrupt handler. Both of them need to check before they
+ return if the other is still holding a reference; if not, then they are
+ responsible for reclaiming the shared data.
+- _Sync or borrow strategy_: the interrupt handler is considered a borrower
+ of the shared data. The interrupt handler should not reclaim the shared
+ data. The fop will wait for the interrupt handler to finish (ie., the borrow
+ to be returned), then it has to reclaim the shared data.
+
+The user of the interrupt API need to call the following functions to
+instrument this control flow:
+- `fuse_interrupt_record_insert()` in the fop to insert the interrupt record to
+ the list;
+- `fuse_interrupt_finish_fop()`in the fop (cbk) and
+- `fuse_interrupt_finish_interrupt()`in the interrupt handler
+
+to perform needed synchronization at the end their tenure. The data management
+strategies are implemented by the `fuse_interrupt_finish_*()` functions (which
+have an argument to specify which strategy to use); these routines take care
+of freeing the interrupt record itself, while the reclamation of the shared data
+is left to the API user.
+
+### Usage
+
+A given FUSE fop can be enabled to handle interrupts via the following
+steps:
+
+- Define a handler function (of type `fuse_interrupt_handler_t`).
+ It should implement the interrupt handling logic and in the end
+ call (directly or as async callback) `fuse_interrupt_finish_interrupt()`.
+ The `intstat` argument to `fuse_interrupt_finish_interrupt` should be
+ either `INTERRUPT_SQUELCHED` or `INTERRUPT_HANDLED`.
+ - `INTERRUPT_SQUELCHED` means that the interrupt could not be delivered
+ and the fop is going on uninterrupted.
+ - `INTERRUPT_HANDLED` means that the interrupt was actually handled. In
+ this case the fop will be answered from interrupt context with errno
+ `EINTR` (that is, the fop should not send a response to the kernel).
+
+ (the enum `fuse_interrupt_state` includes further members, which are reserved
+ for internal use).
+
+ We return to the `sync` and `datap` arguments later.
+- In the `fuse_<FOP>` function create an interrupt record using
+ `fuse_interrupt_record_new()`, passing the incoming `fuse_in_header` and
+ the above handler function to it.
+ - Arbitrary further data can be referred to via the `data` member of the
+ interrupt record that is to be passed on from fop context to
+ interrupt context.
+- When it's set up, pass the interrupt record to
+ `fuse_interrupt_record_insert()`.
+- In `fuse_<FOP>_cbk` call `fuse_interrupt_finish_fop()`.
+ - `fuse_interrupt_finish_fop()` returns a Boolean according to whether the
+ interrupt was handled. If it was, then the FUSE request is already
+ answered and the stack gets destroyed in `fuse_interrupt_finish_fop` so
+ `fuse_<FOP>_cbk()` can just return (zero). Otherwise follow the standard
+ cbk logic (answer the FUSE request and destroy the stack -- these are
+ typically accomplished by `fuse_err_cbk()`).
+- The last two argument of `fuse_interrupt_finish_fop()` and
+ `fuse_interrupt_finish_interrupt()` are `gf_boolean_t sync` and
+ `void **datap`.
+ - `sync` represents the strategy for freeing the interrupt record. The
+ interrupt handler and the fop handler are in race to get at the interrupt
+ record first (interrupt handler for purposes of doing the interrupt
+ handling, fop handler for purposes of deactivating the interrupt record
+ upon completion of the fop handling).
+ - If `sync` is true, then the fop handler will wait for the interrupt
+ handler to finish and it takes care of freeing.
+ - If `sync` is false, the loser of the above race will perform freeing.
+
+ Freeing is done within the respective interrupt finish routines, except
+ for the `data` field of the interrupt record; with respect to that, see
+ the discussion of the `datap` parameter below. The strategy has to be
+ consensual, that is, `fuse_interrupt_finish_fop()` and
+ `fuse_interrupt_finish_interrupt()` must pass the same value for `sync`.
+ If dismantling the resources associated with the interrupt record is
+ simple, `sync = _gf_false` is the suggested choice; `sync = _gf_true` can
+ be useful in the opposite case, when dismantling those resources would
+ be inconvenient to implement in two places or to enact in non-fop context.
+ - If `datap` is `NULL`, the `data` member of the interrupt record will be
+ freed within the interrupt finish routine. If it points to a valid
+ `void *` pointer, and if caller is doing the cleanup (see `sync` above),
+ then that pointer will be directed to the `data` member of the interrupt
+ record and it's up to the caller what it's doing with it.
+ - If `sync` is true, interrupt handler can use `datap = NULL`, and
+ fop handler will have `datap` point to a valid pointer.
+ - If `sync` is false, and handlers pass a pointer to a pointer for
+ `datap`, they should check if the pointed pointer is NULL before
+ attempting to deal with the data.
+
+### FUSE answer for the interrupted fop
+
+The kernel acknowledges a successful interruption for a given FUSE request
+if the filesystem daemon answers it with errno EINTR; upon that, the syscall
+which induced the request will be abruptly terminated with an interrupt, rather
+than returning a value.
+
+In glusterfs, this can be arranged in two ways.
+
+- If the interrupt handler wins the race for the interrupt record, ie.
+ `fuse_interrupt_finish_fop()` returns true to `fuse_<FOP>_cbk()`, then, as
+ said above, `fuse_<FOP>_cbk()` does not need to answer the FUSE request.
+ That's because then the interrupt handler will take care about answering
+ it (with errno EINTR).
+- If `fuse_interrupt_finish_fop()` returns false to `fuse_<FOP>_cbk()`, then
+ this return value does not inform the fop handler whether there was an interrupt
+ or not. This return value occurs both when fop handler won the race for the
+ interrupt record against the interrupt handler, and when there was no interrupt
+ at all.
+
+ However, the internal logic of the fop handler might detect from other
+ circumstances that an interrupt was delivered. For example, the fop handler
+ might be sleeping, waiting for some data to arrive, so that a premature
+ wakeup (with no data present) occurs if the interrupt handler intervenes. In
+ such cases it's the responsibility of the fop handler to reply the FUSE
+ request with errro EINTR.
diff --git a/doc/developer-guide/gfapi-symbol-versions.md b/doc/developer-guide/gfapi-symbol-versions.md
new file mode 100644
index 00000000000..e4f4fe9f052
--- /dev/null
+++ b/doc/developer-guide/gfapi-symbol-versions.md
@@ -0,0 +1,270 @@
+
+## Symbol Versions and SO_NAMEs
+
+ In general, adding new APIs to a shared library does not require that
+symbol versions be used or the the SO_NAME be "bumped." These actions
+are usually reserved for when a major change is introduced, e.g. many
+APIs change or a signficant change in the functionality occurs.
+
+ Over the normal lifetime of a When a new API is added, the library is
+recompiled, consumers of the new API are able to do so, and existing,
+legacy consumers of the original API continue as before. If by some
+chance an old copy of the library is installed on a system, it's unlikely
+that most applications will be affected. New applications that use the
+new API will incur a run-time error terminate.
+
+ Bumping the SO_NAME, i.e. changing the shared lib's file name, e.g.
+from libfoo.so.0 to libfoo.so.1, which also changes the ELF SO_NAME
+attribute inside the file, works a little differently. libfoo.so.0
+contains only the old APIs. libfoo.so.1 contains both the old and new
+APIs. Legacy software that was linked with libfoo.so.0 continues to work
+as libfoo.so.0 is usually left installed on the system. New software that
+uses the new APIs is linked with libfoo.so.1, and works as long as
+long as libfoo.so.1 is installed on the system. Accidentally (re)installing
+libfoo.so.0 doesn't break new software as long as reinstalling doesn't
+erase libfoo.so.1.
+
+ Using symbol versions is somewhere in the middle. The shared library
+file remains libfoo.so.0 forever. Legacy APIs may or may not have an
+associated symbol version. New APIs may or may not have an associated
+symbol version either. In general symbol versions are reserved for APIs
+that have changed. Either the function's signature has changed, i.e. the
+return time or the number of paramaters, and/or the parameter types have
+changed. Another reason for using symbol versions on an API is when the
+behaviour or functionality of the API changes dramatically. As with a
+library that doesn't use versioned symbols, old and new applications
+either find or don't find the versioned symbols they need. If the versioned
+symbol doesn't exist in the installed library, the application incurs a
+run-time error and terminates.
+
+ GlusterFS wanted to keep tight control over the APIs in libgfapi.
+Originally bumping the SO_NAME was considered, and GlusterFS-3.6.0 was
+released with libgfapi.so.7. Not only was "7" a mistake (it should have
+been "6"), but it was quickly pointed out that many dependent packages
+that use libgfapi would be forced to be recompiled/relinked. Thus no
+packages of 3.6.0 were ever released and 3.6.1 was quickly released with
+libgfapi.so.0, but with symbol versions. There's no strong technical
+reason for either; the APIs have not changed, only new APIs have been
+added. It's merely being done in anticipation that some APIs might change
+sometime in the future.
+
+ Enough about that now, let's get into the nitty gritty——
+
+## Adding new APIs
+
+### Adding a public API.
+
+ This is the default, and the easiest thing to do. Public APIs have
+declarations in either glfs.h, glfs-handles.h, or, at your discretion,
+in a new header file intended for consumption by other developers.
+
+Here's what you need to do to add a new public API:
+
++ Write the declaration, e.g. in glfs.h:
+
+```C
+ int glfs_dtrt (const char *volname, void *stuff) __THROW
+```
+
++ Write the definition, e.g. in glfs-dtrt.c:
+
+```C
+ int
+ pub_glfs_dtrt (const char *volname, void *stuff)
+ {
+ ...
+ return 0;
+ }
+```
+
++ Add the symbol version magic for ELF, gnu toolchain to the definition.
+
+ following the definition of your new function in glfs-dtrtops.c, add a
+ line like this:
+
+```C
+ GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_dtrt, 3.7.0)
+```
+
+ The whole thing should look like:
+
+```C
+ int
+ pub_glfs_dtrt (const char *volname, void *stuff)
+ {
+ ...
+ }
+ GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_dtrt, 3.7.0);
+```
+
+ In this example, 3.7.0 refers to the Version the symbol will first
+ appear in. There's nothing magic about it, it's just a string token.
+ The current versions we have are 3.4.0, 3.4.2, 3.5.0, 3.5.1, and 3.6.0.
+ They are to be considered locked or closed. You can not, must not add
+ any new APIs and use these versions. Most new APIs will use 3.7.0. If
+ you add a new API appearing in 3.6.2 (and mainline) then you would use
+ 3.6.2.
+
++ Add the symbol version magic for OS X to the declaration.
+
+ following the declaration in glfs.h, add a line like this:
+
+```C
+ GFAPI_PUBLIC(glfs_dtrt, 3.7.0)
+```
+
+ The whole thing should look like:
+
+```C
+ int glfs_dtrt (const char *volname, void *stuff) __THROW
+ GFAPI_PUBLIC(glfs_dtrt, 3.7.0);
+```
+
+ The version here must match the version associated with the definition.
+
++ Add the new API to the ELF, gnu toolchain link map file, gfapi.map
+
+ Most new public APIs will probably be added to a new section that
+ looks like this:
+
+```
+ GFAPI_3.7.0 {
+ global:
+ glfs_dtrt;
+ } GFAPI_PRIVATE_3.7.0;
+```
+
+ if you're adding your new API to, e.g. 3.6.2, it'll look like this:
+
+```
+ GFAPI_3.6.2 {
+ global:
+ glfs_dtrt;
+ } GFAPI_3.6.0;
+```
+
+ and you must change the
+```
+ GFAPI_PRIVATE_3.7.0 { ...} GFAPI_3.6.0;
+```
+ section to:
+```
+ GFAPI_PRIVATE_3.7.0 { ...} GFAPI_3.6.2;
+```
+
++ Add the new API to the OS X alias list file, gfapi.aliases.
+
+ Most new APIs will use a line that looks like this:
+
+```C
+ _pub_glfs_dtrt _glfs_dtrt$GFAPI_3.7.0
+```
+
+ if you're adding your new API to, e.g. 3.6.2, it'll look like this:
+
+```C
+ _pub_glfs_dtrt _glfs_dtrt$GFAPI_3.6.2
+```
+
+And that's it.
+
+
+### Adding a private API.
+
+ If you're thinking about adding a private API that isn't declared in
+one of the header files, then you should seriously rethink what you're
+doing and figure out how to put it in libglusterfs instead.
+
+If that hasn't convinced you, follow the instructions above, but use the
+_PRIVATE versions of macros, symbol versions, and aliases. If you're 1337
+enough to ignore this advice, then you're 1337 enough to figure out how
+to do it.
+
+
+## Changing an API.
+
+### Changing a public API.
+
+ There are two ways an API might change, 1) its signature has changed, or
+2) its new functionality or behavior is substantially different than the
+old. An APIs signature consists of the function return type, and the number
+and/or type of its parameters. E.g. the original API:
+
+```C
+ int glfs_dtrt (const char *volname, void *stuff);
+```
+
+and the changed API:
+
+```C
+ void *glfs_dtrt (const char *volname, glfs_t *ctx, void *stuff);
+```
+
+ One way to avoid a change like this, and which is preferable in many
+ways, is to leave the legacy glfs_dtrt() function alone, document it as
+deprecated, and simply add a new API, e.g. glfs_dtrt2(). Practically
+speaking, that's effectively what we'll be doing anyway, the difference
+is only that we'll use a versioned symbol to do it.
+
+ On the assumption that adding a new API is undesirable for some reason,
+perhaps the use of glfs_gnu() is just so pervasive that we really don't
+want to add glfs_gnu2().
+
++ change the declaration in glfs.h:
+
+```C
+ glfs_t *glfs_gnu (const char *volname, void *stuff) __THROW
+ GFAPI_PUBLIC(glfs_gnu, 3.7.0);
+````
+
+Note that there is only the single, new declaration.
+
++ change the old definition of glfs_gnu() in glfs.c:
+
+```C
+ struct glfs *
+ pub_glfs_gnu340 (const char * volname)
+ {
+ ...
+ }
+ GFAPI_SYMVER_PUBLIC(glfs_gnu340, glfs_gnu, 3.4.0);
+```
+
++ create the new definition of glfs_gnu in glfs.c:
+
+```C
+ struct glfs *
+ pub_glfs_gnu (const char * volname, void *stuff)
+ {
+ ...
+ }
+ GFAPI_SYMVER_PUBLIC_DEFAULT(glfs_gnu, 3.7.0);
+```
+
++ Add the new API to the ELF, gnu toolchain link map file, gfapi.map
+
+```
+ GFAPI_3.7.0 {
+ global:
+ glfs_gnu;
+ } GFAPI_PRIVATE_3.7.0;
+```
+
++ Update the OS X alias list file, gfapi.aliases, for both versions:
+
+Change the old line:
+```C
+ _pub_glfs_gnu _glfs_gnu$GFAPI_3.4.0
+```
+to:
+```C
+ _pub_glfs_gnu340 _glfs_gnu$GFAPI_3.4.0
+```
+
+Add a new line:
+```C
+ _pub_glfs_gnu _glfs_gnu$GFAPI_3.7.0
+```
+
++ Lastly, change all gfapi internal calls glfs_gnu to the new API.
+
diff --git a/doc/developer-guide/identifying-resource-leaks.md b/doc/developer-guide/identifying-resource-leaks.md
new file mode 100644
index 00000000000..950cae79b0a
--- /dev/null
+++ b/doc/developer-guide/identifying-resource-leaks.md
@@ -0,0 +1,200 @@
+# Identifying Resource Leaks
+
+Like most other pieces of software, GlusterFS is not perfect in how it manages
+its resources like memory, threads and the like. Gluster developers try hard to
+prevent leaking resources but releasing and unallocating the used structures.
+Unfortunately every now and then some resource leaks are unintentionally added.
+
+This document tries to explain a few helpful tricks to identify resource leaks
+so that they can be addressed.
+
+
+## Debug Builds
+
+There are certain techniques used in GlusterFS that make it difficult to use
+tools like Valgrind for memory leak detection. There are some build options
+that make it more practical to use Valgrind and other tools. When running
+Valgrind, it is important to have GlusterFS builds that contain the
+debuginfo/symbols. Some distributions (try to) strip the debuginfo to get
+smaller executables. Fedora and RHEL based distributions have sub-packages
+called ...-debuginfo that need to be installed for symbol resolving.
+
+
+### Memory Pools
+
+By using memory pools, there are no allocation/freeing of single structures
+needed. This improves performance, but also makes it impossible to track the
+allocation and freeing of srtuctures.
+
+It is possible to disable the use of memory pools, and use standard `malloc()`
+and `free()` functions provided by the C library. Valgrind is then able to
+track the allocated areas and verify if they have been free'd. In order to
+disable memory pools, the Gluster sources needs to be configured with the
+`--enable-debug` option:
+
+```shell
+./configure --enable-debug
+```
+
+When building RPMs, the `.spec` handles the `--with=debug` option too:
+
+```shell
+make dist
+rpmbuild -ta --with=debug glusterfs-....tar.gz
+```
+
+### Dynamically Loaded xlators
+
+Valgrind tracks the call chain of functions that do memory allocations. The
+addresses of the functions are stored and before Valgrind exits the addresses
+are resolved into human readable function names and offsets (line numbers in
+source files). Because Gluster loads xlators dynamically, and unloads then
+before exiting, Valgrind is not able to resolve the function addresses into
+symbols anymore. Whenever this happend, Valgrind shows `???` in the output,
+like
+
+```
+ ==25170== 344 bytes in 1 blocks are definitely lost in loss record 233 of 324
+ ==25170== at 0x4C29975: calloc (vg_replace_malloc.c:711)
+ ==25170== by 0x52C7C0B: __gf_calloc (mem-pool.c:117)
+ ==25170== by 0x12B0638A: ???
+ ==25170== by 0x528FCE6: __xlator_init (xlator.c:472)
+ ==25170== by 0x528FE16: xlator_init (xlator.c:498)
+ ...
+```
+
+These `???` can be prevented by not calling `dlclose()` for unloading the
+xlator. This will cause a small leak of the handle that was returned with
+`dlopen()`, but for improved debugging this can be acceptible. For this and
+other Valgrind features, a `--enable-valgrind` option is available to
+`./configure`. When GlusterFS is built with this option, Valgrind will be able
+to resolve the symbol names of the functions that do memory allocations inside
+xlators.
+
+```shell
+./configure --enable-valgrind
+```
+
+When building RPMs, the `.spec` handles the `--with=valgrind` option too:
+
+```shell
+make dist
+rpmbuild -ta --with=valgrind glusterfs-....tar.gz
+```
+
+## Running Valgrind against a single xlator
+
+Debugging a single xlator is not trivial. But there are some tools to make it
+easier. The `sink` xlator does not do any memory allocations itself, but
+contains just enough functionality to mount a volume with only the `sink`
+xlator. There is a little gfapi application under `tests/basic/gfapi/` in the
+GlusterFS sources that can be used to run only gfapi and the core GlusterFS
+infrastructure with the `sink` xlator. By extending the `.vol` file to load
+more xlators, each xlator can be debugged pretty much separately (as long as
+the xlators have no dependencies on each other). A basic Valgrind run with the
+suitable configure options looks like this:
+
+```shell
+./autogen.sh
+./configure --enable-debug --enable-valgrind
+make && make install
+cd tests/basic/gfapi/
+make gfapi-load-volfile
+valgrind ./gfapi-load-volfile sink.vol
+```
+
+Combined with other very useful options to Valgrind, the following execution
+shows many more useful details:
+
+```shell
+valgrind \
+ --fullpath-after= --leak-check=full --show-leak-kinds=all \
+ ./gfapi-load-volfile sink.vol
+```
+
+Note that the `--fullpath-after=` option is left empty, this makes Valgrind
+print the full path and filename that contains the functions:
+
+```
+==2450== 80 bytes in 1 blocks are definitely lost in loss record 8 of 60
+==2450== at 0x4C29975: calloc (/builddir/build/BUILD/valgrind-3.11.0/coregrind/m_replacemalloc/vg_replace_malloc.c:711)
+==2450== by 0x52C6F73: __gf_calloc (/usr/src/debug/glusterfs-3.11dev/libglusterfs/src/mem-pool.c:117)
+==2450== by 0x12F10CDA: init (/usr/src/debug/glusterfs-3.11dev/xlators/meta/src/meta.c:231)
+==2450== by 0x528EFD5: __xlator_init (/usr/src/debug/glusterfs-3.11dev/libglusterfs/src/xlator.c:472)
+==2450== by 0x528F105: xlator_init (/usr/src/debug/glusterfs-3.11dev/libglusterfs/src/xlator.c:498)
+==2450== by 0x52D9D8B: glusterfs_graph_init (/usr/src/debug/glusterfs-3.11dev/libglusterfs/src/graph.c:321)
+...
+```
+
+In the above example, the `init` function in `xlators/meta/src/meta.c` does a
+memory allocation on line 231. This memory is never free'd again, and hence
+Valgrind logs this call stack. When looking in the code, it seems that the
+allocation of `priv` is assigned to the `this->private` member of the
+`xlator_t` structure. Because the allocation is done in `init()`, free'ing is
+expected to happen in `fini()`. Both functions are shown below, with the
+inclusion of the empty `fini()`:
+
+
+```
+226 int
+227 init (xlator_t *this)
+228 {
+229 meta_priv_t *priv = NULL;
+230
+231 priv = GF_CALLOC (sizeof(*priv), 1, gf_meta_mt_priv_t);
+232 if (!priv)
+233 return -1;
+234
+235 GF_OPTION_INIT ("meta-dir-name", priv->meta_dir_name, str, out);
+236
+237 this->private = priv;
+238 out:
+239 return 0;
+240 }
+241
+242
+243 int
+244 fini (xlator_t *this)
+245 {
+246 return 0;
+247 }
+```
+
+In this case, the resource leak can be addressed by adding a single line to the
+`fini()` function:
+
+```
+243 int
+244 fini (xlator_t *this)
+245 {
+246 GF_FREE (this->private);
+247 return 0;
+248 }
+```
+
+Running the same Valgrind command and comparing the output will show that the
+memory leak in `xlators/meta/src/meta.c:init` is not reported anymore.
+
+### Running DRD, the Valgrind thread error detector
+
+When configuring GlusterFS with:
+
+```shell
+./configure --enable-valgrind
+```
+
+the default Valgrind tool (Memcheck) is enabled. But it's also possble to select
+one of Memcheck or DRD by using:
+
+```shell
+./configure --enable-valgrind=memcheck
+```
+
+or:
+
+```shell
+./configure --enable-valgrind=drd
+```
+
+respectively. When using DRD, it's recommended to consult
+https://valgrind.org/docs/manual/drd-manual.html before running.
diff --git a/doc/developer-guide/logging-guidelines.md b/doc/developer-guide/logging-guidelines.md
new file mode 100644
index 00000000000..0e6b2588535
--- /dev/null
+++ b/doc/developer-guide/logging-guidelines.md
@@ -0,0 +1,132 @@
+Guidelines on using the logging framework within a component
+============================================================
+Gluster library libglusterfs.so provides message logging abstractions that
+are intended to be used across all code/components within gluster.
+
+There could be potentially 2 major cases how the logging infrastructure is
+used,
+ - A new gluster service daemon or end point is created
+ - The service daemon infrastructure itself initlializes the logging
+ infrastructure (i.e calling gf_log_init and related set functions)
+ - See, glusterfsd.c:logging_init
+ - Alternatively there could be a case where an end point service (say
+ gfapi) may need to do the required initialization
+ - This document does not (yet?) cover guidelines for these cases. Best
+ bet would be to look at code in glusterfsd.c:logging_init (or equivalent)
+ in case a need arises and you reach this document.
+ - A new xlator or subcomponent is written as a part of the stack
+ - Primarily in this case, the consumer of the logging APIs would only
+ invoke an API to *log* a particular message at a certain severity
+ - This document elaborates on this use of the message logging framework
+ in this context
+
+There are 2 interfaces provided to log messages,
+1. The older gf_log* interface
+2. The newer gf_msg* interface
+
+1. Older _to_be_deprecated_ gf_log* interface
+ - Not much documentation is provided for this interface here, as these are
+ meant to be deprecated and all code should be moved to the newer gf_msg*
+ interface
+
+2. New gf_msg* interface
+ - The set of interfaces provided by gf_msg are,
+ - NOTE: It is best to consult logging.h for the latest interfaces, as
+ this document may get out of sync with the code
+
+ *gf_msg(dom, levl, errnum, msgid, fmt...)*
+ - Used predominantly to log a message above TRACE and DEBUG levels
+ - See further guidelines below
+
+ *gf_msg_debug(dom, errnum, fmt...)*
+ *gf_msg_trace(dom, errnum, fmt...)*
+ - Above are handy shortcuts for TRACE and DEBUG level messages
+ - See further guidelines below
+
+ *gf_msg_callingfn(dom, levl, errnum, msgid, fmt...)*
+ - Useful function when a backtrace to detect calling routines that
+ reached this execution point needs to be logged in addition to the
+ message
+ - This is very handy for framework libraries or code, as there could
+ be many callers to the same, and the real failure would need the call
+ stack to determine who the caller actually was
+ - A good example is the dict interfaces using this function to log who
+ called, when a particular error/warning needs to be displayed
+ - See further guidelines below
+
+ *gf_msg_plain(levl, fmt...)*
+ *gf_msg_plain_nomem(levl, msg)*
+ *gf_msg_vplain(levl, fmt, va)*
+ *gf_msg_backtrace_nomem*
+ - The above interfaces are provided to log messages without any typical
+ headers (like the time stamp, dom, errnum etc.). The primary users of
+ the above interfaces are, when printing the final graph, or printing
+ the configuration when a process is about dump core or abort, or
+ printing the backtrace when a process receives a critical signal
+ - These interfaces should not be used outside the scope of the users
+ above, unless you know what you are doing
+
+ *gf_msg_nomem(dom, levl, size)*
+ - Very crisp messages, throwing out file, function, line numbers, in
+ addition to the passed in size
+ - These are used in the memory allocation abstractions interfaces in
+ gluster, when the allocation fails (hence a no-mem log message, so that
+ further allocation is not attempted by the logging infrastructure)
+ - If you are contemplating using these, then you know why and how
+
+ - Guidelines for the various arguments to the interfaces
+ **dom**
+ - The domain from which this message appears, IOW for xlators it should
+ be the name of the xlator (as in this->name)
+
+ - The intention of this string is to distinguish from which
+ component/xlator the message is being printed
+
+ - This information can also be gleaned from the FILE:LINE:FUNCTION
+ information printed in the message anyway, but is retained to provide
+ backward compatability to the messages as seen by admins earlier
+
+ **levl**
+ - Consult logging.h:gf_loglevel_t for logging levels (they pretty much
+ map to syslog levels)
+
+ **errnum**
+ - errno should be passed in here, if available, 0 otherwise. This auto
+ enables the logging infrastructure to print (man 3) strerror form of the
+ same at the end of the message in a consistent format
+
+ - If any message is already adding the strerror as a parameter to the
+ message, it is suggested/encouraged to remove the same and pass it as
+ errnum
+
+ - The intention is that, if all messages did this using errnum and not
+ as a part of their own argument list, the output would look consistent
+ for the admin and cross component developers when reading logs
+
+ **msgid**
+ - This is a unique message ID per message (which now is more a message
+ ID per group of messages in the implementation)
+
+ - Rules for generating this ID can be found in dht-messages.h (it used
+ to be template-common-messages.h, but that template is not updated,
+ this comment should be changed once that is done) and glfs-message-id.h
+
+ - Every message that is *above* TRACE and DEBUG should get a message
+ ID, as this helps generating message catalogs that can help admins to
+ understand the context of messages better. Another intention is that
+ automated message parsers could detect a class of message IDs and send
+ out notifications on the same, rather than parse the message string
+ itself (which if it changes, the input to the parser has to change, etc.
+ and hence is better to retain message IDs)
+
+ - Ok so if intention is not yet clear, look at journald MESSAGE ID
+ motivation, as that coupled with ident/dom above is expected to provide
+ us with similar advantages
+
+ - Bottomline: Every message gets its own ID, in case a message is
+ *not* DEBUG or TRACE and still is developer centric, a generic message
+ ID per component *maybe* assigned to the same to provide ease of use
+ of the API
+
+ **fmt**
+ - As in format argument of (man 3) printf
diff --git a/doc/developer-guide/network_compression.md b/doc/developer-guide/network_compression.md
new file mode 100644
index 00000000000..1222a765276
--- /dev/null
+++ b/doc/developer-guide/network_compression.md
@@ -0,0 +1,71 @@
+# On-Wire Compression + Decompression
+
+The 'compression translator' compresses and decompresses data in-flight
+between client and bricks.
+
+### Working
+When a writev call occurs, the client compresses the data before sending it to
+brick. On the brick, compressed data is decompressed. Similarly, when a readv
+call occurs, the brick compresses the data before sending it to client. On the
+client, the compressed data is decompressed. Thus, the amount of data sent over
+the wire is minimized. Compression/Decompression is done using Zlib library.
+
+During normal operation, this is the format of data sent over wire:
+
+~~~
+<compressed-data> + trailer(8 bytes)
+~~~
+
+The trailer contains the CRC32 checksum and length of original uncompressed
+data. This is used for validation.
+
+### Usage
+
+Turning on compression xlator:
+
+~~~
+gluster volume set <vol_name> network.compression on
+~~~
+
+### Configurable parameters (optional)
+
+**Compression level**
+~~~
+gluster volume set <vol_name> network.compression.compression-level 8
+~~~
+
+~~~
+ 0 : no compression
+ 1 : best speed
+ 9 : best compression
+-1 : default compression
+~~~
+
+**Minimum file size**
+
+~~~
+gluster volume set <vol_name> network.compression.min-size 50
+~~~
+
+Data is compressed only when its size exceeds the above value in bytes.
+
+**Other paramaters**
+
+Other less frequently used parameters include `network.compression.mem-level`
+and `network.compression.window-size`. More details can about these options
+can be found by running `gluster volume set help` command.
+
+### Known Issues and Limitations
+
+* Compression translator cannot work with striped volumes.
+* Mount point hangs when writing a file with write-behind xlator turned on. To
+overcome this, turn off `performance.write-behind` entirely OR
+set`performance.strict-write-ordering` to on.
+* For glusterfs versions <= 3.5, compression traslator can ONLY work with pure
+distribute volumes. This limitation is caused by AFR not being able to
+propagate xdata. This issue has been fixed in glusterfs versions > 3.5
+
+### TODO
+Although zlib offers high compression ratio, it is very slow. We can make the
+translator pluggable to add support for other compression methods such as
+[lz4 compression](https://code.google.com/p/lz4/)
diff --git a/doc/developer-guide/options-to-contribute.md b/doc/developer-guide/options-to-contribute.md
new file mode 100644
index 00000000000..3f0d84e7645
--- /dev/null
+++ b/doc/developer-guide/options-to-contribute.md
@@ -0,0 +1,212 @@
+# A guide for contributors
+
+While you have gone through 'how to contribute' guides, if you are
+not sure what to work on, but really want to help the project, you
+have now landed on the right document :-)
+
+### Basic
+
+Instead of planning to fix **all** the below issues in one patch,
+we recommend you to have a a constant, continuous flow of improvements
+for the project. We recommend you to pick 1 file (or just few files) at
+a time to address below issues.
+Pick any `.c` (or `.h`) file, and you can send a patch which fixes **any**
+of the below themes. Ideally, fix all such occurrences in the file, even
+though, the reviewers would review even a single line change patch
+from you.
+
+1. Check for variable definitions, and if there is an array definition,
+which is very large at the top of the function, see if you can re-scope
+the variable to relevant sections (if it helps).
+
+Most of the time, some of these arrays may be used for 'error' handling,
+and it is possible to use them only in that scope.
+
+Reference: https://review.gluster.org/20846/
+
+
+2. Check for complete string initialization at the beginning of a function.
+Ideally, there is no reason to initialize a string. Fix it across the file.
+
+Example:
+
+`char new_path_name[PATH_MAX] = {0};` to `char new_path_name[PATH_MAX];`
+
+
+3. Change `calloc()` to `malloc()` wherever it makes sense.
+
+In a case of allocating a structures, where you expect certain (or most of)
+variables to be 0 (or NULL), it makes sense to use calloc(). But otherwise,
+there is an extra cost to `memset()` the whole object after allocating it.
+While it is not a significant improvement in performance, code which gets
+hit 1000s of times in a second, it would add some value.
+
+Reference: https://review.gluster.org/20878/
+
+
+4. You can consider using `snprintf()`, instead of `strncpy()` while dealing
+with strings.
+
+strncpy() won't null terminate if the dest buffer isn't big enough; snprintf()
+does. While most of the string operations in the code is on array, and larger
+size than required, strncpy() does an extra copy of 0s at the end of
+string till the size of the array. It makes sense to use `snprintf()`,
+which doesn't suffer from that behavior.
+
+Also check the return value from snprintf() for buffer overflow and handle
+accordingly
+
+Reference: https://review.gluster.org/20925/
+
+
+5. Now, pick a `.h` file, and see if a structure is very large, and see
+if re-aligning them as per [coding-standard](./coding-standard.md) gives any size benefit,
+if yes, go ahead and change it. Make sure you check all the structures
+in the file for similar pattern.
+
+Reference: [Check this section](https://github.com/gluster/glusterfs/blob/master/doc/developer-guide/coding-standard.md#structure-members-should-be-aligned-based-on-the-padding-requirements
+
+
+### If you are up for more :-)
+
+Good progress! Glad you are interested to know more. We are surely interested
+in next level of contributions from you!
+
+#### Coverity
+
+Visit [Coverity Dashboard](https://scan.coverity.com/projects/gluster-glusterfs?tab=overview).
+
+Now, if the number of defect is not 0, you have an opportunity to contribute.
+
+You get all the detail on why the particular defect is mentioned there, and
+most probable hint on how to fix it. Do it!
+
+Reference: https://review.gluster.org/21394/
+
+Use the same reference Id (789278) as the patch, so we can capture it is in
+single bugzilla.
+
+#### Clang-Scan
+
+Clang-Scan is a tool which scans the .c files and reports the possible issues,
+similar to coverity, but a different tool. Over the years we have seen, they
+both report very different set of issues, and hence there is a value in fixing it.
+
+GlusterFS project gets tested with clang-scan job every night, and the report is
+posted in the [job details page](https://build.gluster.org/job/clang-scan/lastCompletedBuild/clangScanBuildBugs/).
+As long as the number is not 0 in the report here, you have an opportunity to
+contribute! Similar to coverity dashboard, click on 'Details' to find out the
+reason behind that report, and send a patch.
+
+Reference: https://review.gluster.org/21025
+
+Again, you can use reference Id (1622665) for these patches!
+
+
+### I am good with programming, I would like to do more than above!
+
+#### Locked regions / Critical sections
+
+In the file you open, see if the lock is taken only to increment or decrement
+a flag, counter etc. If yes, then recommend you to convert it to ATOMIC locks.
+It is simple activity, but, if you know programing, you would know the benefit
+here.
+
+NOTE: There may not always a possibility to do this! You may have to check
+with developers first before going ahead.
+
+Reference: https://review.gluster.org/21221/
+
+
+#### ASan (address sanitizer)
+
+[The job](https://build.gluster.org/job/asan/) runs regression with asan builds,
+and you can also run glusterfs with asan on your workload to identify the leaks.
+If there are any leaks reported, feel free to check it, and send us patch.
+
+You can also run `valgrind` and let us know what it reports.
+
+Reference: https://review.gluster.org/21397
+
+
+#### Porting to different architecture
+
+This is something which we are not focusing right now, happy to collaborate!
+
+Reference: https://review.gluster.org/21276
+
+
+#### Fix 'TODO/FIXME' in codebase
+
+There are few cases of pending features, or pending validations, which are
+pending from sometime. You can pick them in the given file, and choose to
+fix it.
+
+
+### I don't know C, but I am interested to contribute in some way!
+
+You are most welcome! Our community is open for your contribution! First thing
+which comes to our mind is **documentation**. Next is, **testing** or validation.
+
+If you have some hardware, and want to run some performance comparisons with
+different version, or options, and help us to tune better is also a great help.
+
+
+#### Documentation
+
+1. We have some documentation in [glusterfs repo](../), go through these, and
+see if you can help us to keep up-to-date.
+
+2. The https://docs.gluster.org is powered by https://github.com/gluster/glusterdocs
+repo. You can check out the repo, and help in keeping that up-to-date.
+
+3. [Our website](https://gluster.org) is maintained by https://github.com/gluster/glusterweb
+repo. Help us to keep this up-to-date, and add content there.
+
+4. Write blogs about Gluster, and your experience, and make world know little
+more about Gluster, and your use-case, and how it helped to solve the problem.
+
+
+#### Testing
+
+1. There is a regression test suite in glusterfs, which runs with every patch, and is
+triggered by just running `./run-tests.sh` from the root of the project repo.
+
+You can add more test case to match your use-case, and send it as a patch, so you
+can make sure all future patches in glusterfs would keep your usecase intact.
+
+2. [Glusto-Tests](https://github.com/gluster/glusto-tests): This is another testing
+framework written for gluster, and makes use of clustered setup to test different
+use-cases, and helps to validate many bugs.
+
+
+#### Ansible
+
+Gluster Organization has rich set of ansible roles, which are actively maintained.
+Feel free to check them out here - https://github.com/gluster/gluster-ansible
+
+
+#### Monitoring
+
+We have prometheus repo, and are actively working on adding more metrics. Add what
+you need @ https://github.com/gluster/gluster-prometheus
+
+
+#### Health-Report
+
+This is a project, where at any given point in time, you want to run some set of
+commands locally, and get an output to analyze the status, it can be added.
+Contribute @ https://github.com/gluster/gluster-health-report
+
+
+### All these C/bash/python is old-school, I want something in containers.
+
+We have something for you too :-)
+
+Please visit our https://github.com/gluster/gcs repo for checking how you can help,
+and how gluster can help you in container world.
+
+
+### Note
+
+For any queries, best way is to contact us through mailing-list, <mailto:gluster-devel@gluster.org>
diff --git a/doc/developer-guide/posix.md b/doc/developer-guide/posix.md
new file mode 100644
index 00000000000..84c813e55a2
--- /dev/null
+++ b/doc/developer-guide/posix.md
@@ -0,0 +1,59 @@
+storage/posix translator
+========================
+
+Notes
+-----
+
+### `SET_FS_ID`
+
+This is so that all filesystem checks are done with the user's
+uid/gid and not GlusterFS's uid/gid.
+
+### `MAKE_REAL_PATH`
+
+This macro concatenates the base directory of the posix volume
+('option directory') with the given path.
+
+### `need_xattr` in lookup
+
+If this flag is passed, lookup returns a xattr dictionary that contains
+the file's create time, the file's contents, and the version number
+of the file.
+
+This is a hack to increase small file performance. If an application
+wants to read a small file, it can finish its job with just a lookup
+call instead of a lookup followed by read.
+
+### `getdents`/`setdents`
+
+These are used by unify to set and get directory entries.
+
+### `ALIGN_BUF`
+
+Macro to align an address to a page boundary (4K).
+
+### `priv->export_statfs`
+
+In some cases, two exported volumes may reside on the same
+partition on the server. Sending statvfs info for both
+the volumes will lead to erroneous df output at the client,
+since free space on the partition will be counted twice.
+
+In such cases, user can disable exporting statvfs info
+on one of the volumes by setting this option.
+
+### `xattrop`
+
+This fop is used by replicate to set version numbers on files.
+
+### `getxattr`/`setxattr` hack to read/write files
+
+A key, `GLUSTERFS_FILE_CONTENT_STRING`, is handled in a special way by
+`getxattr`/`setxattr`. A getxattr with the key will return the entire
+content of the file as the value. A `setxattr` with the key will write
+the value as the entire content of the file.
+
+### `posix_checksum`
+
+This calculates a simple XOR checksum on all entry names in a
+directory that is used by unify to compare directory contents.
diff --git a/doc/legacy/rpc-for-glusterfs.changes-done.txt b/doc/developer-guide/rpc-for-glusterfs.changes-done.txt
index 6bbbca78826..6bbbca78826 100644
--- a/doc/legacy/rpc-for-glusterfs.changes-done.txt
+++ b/doc/developer-guide/rpc-for-glusterfs.changes-done.txt
diff --git a/doc/developer-guide/rpc-for-glusterfs.new-versions.md b/doc/developer-guide/rpc-for-glusterfs.new-versions.md
new file mode 100644
index 00000000000..e3da5efa4a2
--- /dev/null
+++ b/doc/developer-guide/rpc-for-glusterfs.new-versions.md
@@ -0,0 +1,32 @@
+# GlusterFS RPC program versions
+
+## Compatibility
+
+RPC layer of glusterfs is implemented with possible changes over the protocol layers in mind. If there are any changes in the FOPs from what is assumed to be client side, and whats in serverside, they are to be added as a separate program table.
+
+### Program tables and Versions
+
+A given RPC program has a specific Task, and Version along with actors belonging to the program. In any of the programs, if a new actor is added, it is very important to define one more program with different version, and then keep both, if both are supported. Or else, it is important to handle the 'handshake' properly.
+
+#### Server details
+
+More info on RPC program is at `rpc/rpc-lib/src/rpcsvc.h` and check for structure `rpcsvc_actor_t` and `struct rpcsvc_program`. For usage, check `xlators/protocol/server/src/server-rpc-fops.c`
+
+#### Client details
+
+For details on client structures check `rpc/rpc-lib/src/rpc-clnt.h` for `rpc_clnt_procedure_t` and `rpc_clnt_program_t`. For usage, check `xlators/protocol/client/src/client-rpc-fops.c`
+
+## Protocol
+
+A protocol is what is agreed between two parties. In glusterfs, a RPC protocol is defined as .x file, which then gets converted to .c/.h file using `rpcgen`. There are different protocols defined for communication between `xlators/mgmt/glusterd <==> glusterfsd`, `gluster CLI <==> glusterd`, and `client-protocol <==> server-protocol`
+
+Once a protocol is defined and a release is made with that protocol, make sure no one changes it. Any edits to a given structure there should be a new version of the structure, and also it should get used in new actor, and thus new program version.
+
+## Server and Client Handshake
+
+When a client succeeds to establish a connect (it can be any transport, socket, ib-verbs or unix), client sends a dump (GF_DUMP_DUMP) request to server, which will respond back with all the supported versions of the server RPC (the supported programs which are registered with `rpcsvc_program_register()`).
+
+A client which expects certain programs to be present in server, it should be taking care of looking for it in the handshake methods, and take appropriate action depending on what to do next. In general a compatibility issue should be handled at handshake level itself, thus we can clearly let user/admin know of any 'in-compatibilities'.
+As a developer of GlusterFS protocol layer, one just has to make sure *never to make changes to existing program structures*, but they have to add new programs if required. New programs can have the same actors as present in existing, and also little more. Or it can even have same actor behave differently, take different parameter.
+
+If this is followed properly, there would be smooth upgrade / downgrade of versions. If not, technically, it is 100% guarantee of getting compatibility related issues.
diff --git a/doc/developer-guide/syncop.md b/doc/developer-guide/syncop.md
new file mode 100644
index 00000000000..bcc8bd08e01
--- /dev/null
+++ b/doc/developer-guide/syncop.md
@@ -0,0 +1,72 @@
+# syncop framework
+A coroutines-based, cooperative multi-tasking framework.
+
+## Topics
+
+- Glossary
+- Lifecycle of a synctask
+- Existing usage
+
+
+## Glossary
+
+### syncenv
+
+syncenv is an object that provides access to a pool of worker threads.
+synctasks execute in a syncenv.
+
+### synctask
+
+synctask can be informally defined as a pair of function pointers, namely _the
+call_ and _the callback_ (see syncop.h for more details).
+
+ synctask_fn_t - 'the call'
+ synctask_cbk_t - 'the callback'
+
+synctask has two modes of operation,
+
+1. The calling thread waits for the synctask to complete.
+2. The calling thread schedules the synctask and continues.
+
+synctask guarantees that the callback is called _after_ the call completes.
+
+### Lifecycle of a synctask
+
+A synctask could go into the following stages while in execution.
+
+- CREATED - On calling synctask_create/synctask_new.
+
+- RUNNABLE - synctask is queued in env->runq.
+
+- RUNNING - When one of syncenv's worker threads calls synctask_switch_to.
+
+- WAITING - When a synctask calls synctask_yield.
+
+- DONE - When a synctask has run to completion.
+
+
+ +-------------------------------+
+ | CREATED |
+ +-------------------------------+
+ |
+ | synctask_new/synctask_create
+ v
+ +-------------------------------+
+ | RUNNABLE (in env->runq) | <+
+ +-------------------------------+ |
+ | |
+ | synctask_switch_to |
+ v |
+ +------+ on task completion +-------------------------------+ |
+ | DONE | <-------------------- | RUNNING | | synctask_wake/wake
+ +------+ +-------------------------------+ |
+ | |
+ | synctask_yield/yield |
+ v |
+ +-------------------------------+ |
+ | WAITING (in env->waitq) | -+
+ +-------------------------------+
+
+Note: A synctask is not guaranteed to run on the same thread throughout its
+lifetime. Every time a synctask yields, it is possible for it to run on a
+different thread.
diff --git a/doc/developer-guide/thread-naming.md b/doc/developer-guide/thread-naming.md
new file mode 100644
index 00000000000..513140d4437
--- /dev/null
+++ b/doc/developer-guide/thread-naming.md
@@ -0,0 +1,104 @@
+Thread Naming
+================
+Gluster processes spawn many threads; some threads are created by libglusterfs
+library, while others are created by xlators. When gfapi library is used in an
+application, some threads belong to the application and some are spawned by
+gluster libraries. We also have features where n number of threads are spawned
+to act as worker threads for same operation.
+
+In all the above cases, it is useful to be able to determine the list of threads
+that exist in runtime. Naming threads when you create them is the easiest way to
+provide that information to kernel so that it can then be queried by any means.
+
+How to name threads
+-------------------
+We have two wrapper functions in libglusterfs for creating threads. They take
+name as an argument and set thread name after its creation.
+
+```C
+gf_thread_create (pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg, const char *name)
+```
+
+```C
+gf_thread_create_detached (pthread_t *thread,
+ void *(*start_routine)(void *), void *arg,
+ const char *name)
+```
+
+As max name length for a thread in POSIX is only 16 characters including the
+'\0' character, you have to be a little creative with naming. Also, it is
+important that all Gluster threads have common prefix. Considering these
+conditions, we have "glfs_" as prefix for all the threads created by these
+wrapper functions. It is responsibility of the owner of thread to provide the
+suffix part of the name. It does not have to be a descriptive name, as it has
+only 10 letters to work with. However, it should be unique enough such that it
+can be matched with a table which describes it.
+
+If n number of threads are spwaned to perform same function, it is must that the
+threads are numbered.
+
+Table of thread names
+---------------------
+Thread names don't have to be a descriptive; however, it should be unique enough
+such that it can be matched with a table below without ambiguity.
+
+- bdaio - block device aio
+- brfsscan - bit rot fs scanner
+- brhevent - bit rot event handler
+- brmon - bit rot monitor
+- brosign - bit rot one shot signer
+- brpobj - bit rot object processor
+- brsproc - bit rot scrubber
+- brssign - bit rot stub signer
+- brswrker - bit rot worker
+- clogc - changelog consumer
+- clogcbki - changelog callback invoker
+- clogd - changelog dispatcher
+- clogecon - changelog reverse connection
+- clogfsyn - changelog fsync
+- cloghcon - changelog history consumer
+- clogjan - changelog janitor
+- clogpoll - changelog poller
+- clogproc - changelog process
+- clogro - changelog rollover
+- ctrcomp - change time recorder compaction
+- dhtdf - dht defrag task
+- dhtdg - dht defrag start
+- dhtfcnt - dht rebalance file counter
+- ecshd - ec heal daemon
+- epollN - epoll thread
+- fdlwrker - fdl worker
+- fusenoti - fuse notify
+- fuseproc - fuse main thread
+- gdhooks - glusterd hooks
+- glfspoll - gfapi poller thread
+- idxwrker - index worker
+- iosdump - io stats dump
+- iotwr - io thread worker
+- jbrflush - jbr flush
+- leasercl - lease recall
+- memsweep - sweeper thread for mem pools
+- nfsauth - nfs auth
+- nfsnsm - nfs nsm
+- nfsudp - nfs udp mount
+- nlmmon - nfs nlm/nsm mon
+- posixaio - posix aio
+- posixfsy - posix fsync
+- posixhc - posix heal
+- posixjan - posix janitor
+- posixrsv - posix reserve
+- quiesce - quiesce dequeue
+- rdmaAsyn - rdma async event handler
+- rdmaehan - rdma completion handler
+- rdmarcom - rdma receive completion handler
+- rdmascom - rdma send completion handler
+- rpcsvcrh - rpcsvc request handler
+- scleanup - socket cleanup
+- shdheal - self heal daemon
+- sigwait - glusterfsd sigwaiter
+- spoller - socket poller
+- sprocN - syncop worker thread
+- tbfclock - token bucket filter token generator thread
+- timer - timer thread
+- upreaper - upcall reaper
diff --git a/doc/developer-guide/translator-development.md b/doc/developer-guide/translator-development.md
new file mode 100644
index 00000000000..f75935519f6
--- /dev/null
+++ b/doc/developer-guide/translator-development.md
@@ -0,0 +1,683 @@
+Translator development
+======================
+
+Setting the Stage
+-----------------
+
+This is the first post in a series that will explain some of the details of
+writing a GlusterFS translator, using some actual code to illustrate.
+
+Before we begin, a word about environments. GlusterFS is over 300K lines of
+code spread across a few hundred files. That's no Linux kernel or anything, but
+ you're still going to be navigating through a lot of code in every
+code-editing session, so some kind of cross-referencing is *essential*. I use
+cscope with the vim bindings, and if I couldn't do Crtl+G and such to jump
+between definitions all the time my productivity would be cut in half. You may
+prefer different tools, but as I go through these examples you'll need
+something functionally similar to follow on. OK, on with the show.
+
+The first thing you need to know is that translators are not just bags of
+functions and variables. They need to have a very definite internal structure
+so that the translator-loading code can figure out where all the pieces are.
+The way it does this is to use dlsym to look for specific names within your
+shared-object file, as follow (from `xlator.c`):
+
+```
+if (!(xl->fops = dlsym (handle, "fops"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(fops) on %s",
+ dlerror ());
+ goto out;
+}
+
+if (!(xl->cbks = dlsym (handle, "cbks"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(cbks) on %s",
+ dlerror ());
+ goto out;
+}
+
+if (!(xl->init = dlsym (handle, "init"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(init) on %s",
+ dlerror ());
+ goto out;
+}
+
+if (!(xl->fini = dlsym (handle, "fini"))) {
+ gf_log ("xlator", GF_LOG_WARNING, "dlsym(fini) on %s",
+ dlerror ());
+ goto out;
+}
+```
+
+In this example, `xl` is a pointer to the in-memory object for the translator
+we're loading. As you can see, it's looking up various symbols *by name* in the
+ shared object it just loaded, and storing pointers to those symbols. Some of
+them (e.g. init) are functions, while others (e.g. fops) are dispatch tables
+containing pointers to many functions. Together, these make up the translator's
+ public interface.
+
+Most of this glue or boilerplate can easily be found at the bottom of one of
+the source files that make up each translator. We're going to use the `rot-13`
+translator just for fun, so in this case you'd look in `rot-13.c` to see this:
+
+```
+struct xlator_fops fops = {
+ .readv = rot13_readv,
+ .writev = rot13_writev
+};
+
+struct xlator_cbks cbks = {
+};
+
+struct volume_options options[] = {
+{ .key = {"encrypt-write"},
+ .type = GF_OPTION_TYPE_BOOL
+},
+{ .key = {"decrypt-read"},
+ .type = GF_OPTION_TYPE_BOOL
+},
+{ .key = {NULL} },
+};
+```
+
+The `fops` table, defined in `xlator.h`, is one of the most important pieces.
+This table contains a pointer to each of the filesystem functions that your
+translator might implement -- `open`, `read`, `stat`, `chmod`, and so on. There
+are 82 such functions in all, but don't worry; any that you don't specify here
+will be see as null and filled with defaults from `defaults.c` when your
+translator is loaded. In this particular example, since `rot-13` is an
+exceptionally simple translator, we only fill in two entries for `readv` and
+`writev`.
+
+There are actually two other tables, also required to have predefined names,
+that are also used to find translator functions: `cbks` (which is empty in this
+ snippet) and `dumpops` (which is missing entirely). The first of these specify
+ entry points for when inodes are forgotten or file descriptors are released.
+In other words, they're destructors for objects in which your translator might
+ have an interest. Mostly you can ignore them, because the default behavior
+handles even the simpler cases of translator-specific inode/fd context
+automatically. However, if the context you attach is a complex structure
+requiring complex cleanup, you'll need to supply these functions. As for
+dumpops, that's just used if you want to provide functions to pretty-print
+various structures in logs. I've never used it myself, though I probably
+should. What's noteworthy here is that we don't even define dumpops. That's
+because all of the functions that might use these dispatch functions will check
+ for `xl->dumpops` being `NULL` before calling through it. This is in sharp
+contrast to the behavior for `fops` and `cbks`, which *must* be present. If
+they're not, translator loading will fail because these pointers are not
+checked every time and if they're `NULL` then we'll segfault. That's why we
+provide an empty definition for cbks; it's OK for the individual function
+pointers to be NULL, but not for the whole table to be absent.
+
+The last piece I'll cover today is options. As you can see, this is a table of
+translator-specific option names and some information about their types.
+GlusterFS actually provides a pretty rich set of types (`volume_option_type_t`
+in `options.`h) which includes paths, translator names, percentages, and times
+in addition to the obvious integers and strings. Also, the `volume_option_t`
+structure can include information about alternate names, min/max/default
+values, enumerated string values, and descriptions. We don't see any of these
+here, so let's take a quick look at some more complex examples from afr.c and
+then come back to `rot-13`.
+
+```
+{ .key = {"data-self-heal-algorithm"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "",
+ .description = "Select between \"full\", \"diff\". The "
+ "\"full\" algorithm copies the entire file from "
+ "source to sink. The \"diff\" algorithm copies to "
+ "sink only those blocks whose checksums don't match "
+ "with those of source.",
+ .value = { "diff", "full", "" }
+},
+{ .key = {"data-self-heal-window-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 1024,
+ .default_value = "1",
+ .description = "Maximum number blocks per file for which "
+ "self-heal process would be applied simultaneously."
+},
+```
+
+When your translator is loaded, all of this information is used to parse the
+options actually provided in the volfile, and then the result is turned into a
+dictionary and stored as `xl->options`. This dictionary is then processed by
+your init function, which you can see being looked up in the first code
+fragment above. We're only going to look at a small part of the `rot-13`'s
+init for now.
+
+```
+priv->decrypt_read = 1;
+priv->encrypt_write = 1;
+
+data = dict_get (this->options, "encrypt-write");
+if (data) {
+ if (gf_string2boolean (data->data, &priv->encrypt_write
+ == -1) {
+ gf_log (this->name, GF_LOG_ERROR,
+ "encrypt-write takes only boolean options");
+ return -1;
+ }
+}
+```
+
+What we can see here is that we're setting some defaults in our priv structure,
+then looking to see if an `encrypt-write` option was actually provided. If so,
+we convert and store it. This is a pretty classic use of dict_get to fetch a
+field from a dictionary, and of using one of many conversion functions in
+`common-utils.c` to convert `data->data` into something we can use.
+
+So far we've covered the basic of how a translator gets loaded, how we find its
+various parts, and how we process its options. In my next Translator 101 post,
+we'll go a little deeper into other things that init and its companion fini
+might do, and how some other fields in our `xlator_t` structure (commonly
+referred to as this) are commonly used.
+
+`init`, `fini`, and private context
+-----------------------------------
+
+In the previous Translator 101 post, we looked at some of the dispatch tables
+and options processing in a translator. This time we're going to cover the rest
+ of the "shell" of a translator -- i.e. the other global parts not specific to
+handling a particular request.
+
+Let's start by looking at the relationship between a translator and its shared
+library. At a first approximation, this is the relationship between an object
+and a class in just about any object-oriented programming language. The class
+defines behaviors, but has to be instantiated as an object to have any kind of
+existence. In our case the object is an `xlator_t`. Several of these might be
+created within the same daemon, sharing all of the same code through init/fini
+and dispatch tables, but sharing *no data*. You could implement shared data (as
+ static variables in your shared libraries) but that's strongly discouraged.
+Every function in your shared library will get an `xlator_t` as an argument,
+and should use it. This lack of class-level data is one of the points where
+the analogy to common OOP systems starts to break down. Another place is the
+complete lack of inheritance. Translators inherit behavior (code) from exactly
+one shared library -- looked up and loaded using the `type` field in a volfile
+`volume ... end-volume` block -- and that's it -- not even single inheritance,
+no subclasses or superclasses, no mixins or prototypes, just the relationship
+between an object and its class. With that in mind, let's turn to the init
+function that we just barely touched on last time.
+
+```
+int32_t
+init (xlator_t *this)
+{
+ data_t *data = NULL;
+ rot_13_private_t *priv = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log ("rot13", GF_LOG_ERROR,
+ "FATAL: rot13 should have exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log (this->name, GF_LOG_WARNING,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC (sizeof (rot_13_private_t), 1, 0);
+ if (!priv)
+ return -1;
+```
+
+At the very top, we see the function signature -- we get a pointer to the
+`xlator_t` object that we're initializing, and we return an `int32_t` status.
+As with most functions in the translator API, this should be zero to indicate
+success. In this case it's safe to return -1 for failure, but watch out: in
+dispatch-table functions, the return value means the status of the *function
+call* rather than the *request*. A request error should be reflected as a
+callback with a non-zero `op_re`t value, but the dispatch function itself
+should still return zero. In fact, the handling of a non-zero return from a
+dispatch function is not all that robust (we recently had a bug report in
+HekaFS related to this) so it's something you should probably avoid
+altogether. This only underscores the difference between dispatch functions
+and `init`/`fini` functions, where non-zero returns *are* expected and handled
+logically by aborting the translator setup. We can see that down at the
+bottom, where we return -1 to indicate that we couldn't allocate our
+private-data area (more about that later).
+
+The first thing this init function does is check that the translator is being
+set up in the right kind of environment. Translators are called by parents and
+in turn call children. Some translators are "initial" translators that inject
+requests into the system from elsewhere -- e.g. mount/fuse injecting requests
+from the kernel, protocol/server injecting requests from the network. Those
+translators don't need parents, but `rot-13` does and so we check for that.
+Similarly, some translators are "final" translators that (from the perspective
+of the current process) terminate requests instead of passing them on -- e.g.
+`protocol/client` passing them to another node, `storage/posix` passing them to
+a local filesystem. Other translators "multiplex" between multiple children --
+ passing each parent request on to one (`cluster/dht`), some
+(`cluster/stripe`), or all (`cluster/afr`) of those children. `rot-13` fits
+into none of those categories either, so it checks that it has *exactly one*
+child. It might be more convenient or robust if translator shared libraries
+had standard variables describing these requirements, to be checked in a
+consistent way by the translator-loading infrastructure itself instead of by
+each separate init function, but this is the way translators work today.
+
+The last thing we see in this fragment is allocating our private data area.
+This can literally be anything we want; the infrastructure just provides the
+priv pointer as a convenience but takes no responsibility for how it's used. In
+ this case we're using `GF_CALLOC` to allocate our own `rot_13_private_t`
+structure. This gets us all the benefits of GlusterFS's memory-leak detection
+infrastructure, but the way we're calling it is not quite ideal. For one thing,
+ the first two arguments -- from `calloc(3)` -- are kind of reversed. For
+another, notice how the last argument is zero. That can actually be an
+enumerated value, to tell the GlusterFS allocator *what* type we're
+allocating. This can be very useful information for memory profiling and leak
+detection, so it's recommended that you follow the example of any
+x`xx-mem-types.h` file elsewhere in the source tree instead of just passing
+zero here (even though that works).
+
+To finish our tour of standard initialization/termination, let's look at the
+end of `init` and the beginning of `fini`:
+
+```
+ this->private = priv;
+ gf_log ("rot13", GF_LOG_DEBUG, "rot13 xlator loaded");
+ return 0;
+}
+
+void
+fini (xlator_t *this)
+{
+ rot_13_private_t *priv = this->private;
+
+ if (!priv)
+ return;
+ this->private = NULL;
+ GF_FREE (priv);
+```
+
+At the end of init we're just storing our private-data pointer in the `priv`
+field of our `xlator_t`, then returning zero to indicate that initialization
+succeeded. As is usually the case, our fini is even simpler. All it really has
+to do is `GF_FREE` our private-data pointer, which we do in a slightly
+roundabout way here. Notice how we don't even have a return value here, since
+there's nothing obvious and useful that the infrastructure could do if `fini`
+failed.
+
+That's practically everything we need to know to get our translator through
+loading, initialization, options processing, and termination. If we had defined
+ no dispatch functions, we could actually configure a daemon to use our
+translator and it would work as a basic pass-through from its parent to a
+single child. In the next post I'll cover how to build the translator and
+configure a daemon to use it, so that we can actually step through it in a
+debugger and see how it all fits together before we actually start adding
+functionality.
+
+This Time For Real
+------------------
+
+In the first two parts of this series, we learned how to write a basic
+translator skeleton that can get through loading, initialization, and option
+processing. This time we'll cover how to build that translator, configure a
+volume to use it, and run the glusterfs daemon in debug mode.
+
+Unfortunately, there's not much direct support for writing new translators. You
+can check out a GlusterFS tree and splice in your own translator directory, but
+ that's a bit painful because you'll have to update multiple makefiles plus a
+bunch of autoconf garbage. As part of the HekaFS project, I basically reverse
+engineered the truly necessary parts of the translator-building process and
+then pestered one of the Fedora glusterfs package maintainers (thanks
+daMaestro!) to add a `glusterfs-devel` package with the required headers. Since
+ then the complexity level in the HekaFS tree has crept back up a bit, but I
+still remember the simple method and still consider it the easiest way to get
+started on a new translator. For the sake of those not using Fedora, I'm going
+to describe a method that doesn't depend on that header package. What it does
+depend on is a GlusterFS source tree, much as you might have cloned from GitHub
+ or the Gluster review site. This tree doesn't have to be fully built, but you
+do need to run `autogen.sh` and configure in it. Then you can take the
+following simple makefile and put it in a directory with your actual source.
+
+```
+# Change these to match your source code.
+TARGET = rot-13.so
+OBJECTS = rot-13.o
+
+# Change these to match your environment.
+GLFS_SRC = /srv/glusterfs
+GLFS_LIB = /usr/lib64
+HOST_OS = GF_LINUX_HOST_OS
+
+# You shouldn't need to change anything below here.
+
+CFLAGS = -fPIC -Wall -O0 -g \
+ -DHAVE_CONFIG_H -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE \
+ -D$(HOST_OS) -I$(GLFS_SRC) -I$(GLFS_SRC)/contrib/uuid \
+ -I$(GLFS_SRC)/libglusterfs/src
+LDFLAGS = -shared -nostartfiles -L$(GLFS_LIB)
+LIBS = -lglusterfs -lpthread
+
+$(TARGET): $(OBJECTS)
+ $(CC) $(OBJECTS) $(LDFLAGS) -o $(TARGET) $(OBJECTS) $(LIBS)
+```
+
+Yes, it's still Linux-specific. Mea culpa. As you can see, we're sticking with
+the `rot-13` example, so you can just copy the files from
+`xlators/encryption/rot-13/src` in your GlusterFS tree to follow on. Type
+`make` and you should be rewarded with a nice little `.so` file.
+
+```
+xlator_example$ ls -l rot-13.so
+-rwxr-xr-x. 1 jeff jeff 40784 Nov 16 16:41 rot-13.so
+```
+
+Notice that we've built with optimization level zero and debugging symbols
+included, which would not typically be the case for a packaged version of
+GlusterFS. Let's put our version of `rot-13.so` into a slightly different file
+on our system, so that it doesn't stomp on the installed version (not that
+you'd ever want to use that anyway).
+
+```
+xlator_example# ls /usr/lib64/glusterfs/3git/xlator/encryption/
+crypt.so crypt.so.0 crypt.so.0.0.0 rot-13.so rot-13.so.0
+rot-13.so.0.0.0
+xlator_example# cp rot-13.so \
+ /usr/lib64/glusterfs/3git/xlator/encryption/my-rot-13.so
+```
+
+These paths represent the current Gluster filesystem layout, which is likely to
+be deprecated in favor of the Fedora layout; your paths may vary. At this point
+ we're ready to configure a volume using our new translator. To do that, I'm
+going to suggest something that's strongly discouraged except during
+development (the Gluster guys are going to hate me for this): write our own
+volfile. Here's just about the simplest volfile you'll ever see.
+
+```
+volume my-posix
+ type storage/posix
+ option directory /srv/export
+end-volume
+
+volume my-rot13
+ type encryption/my-rot-13
+ subvolumes my-posix
+end-volume
+```
+
+All we have here is a basic brick using `/srv/export` for its data, and then
+an instance of our translator layered on top -- no client or server is
+necessary for what we're doing, and the system will automatically push a
+mount/fuse translator on top if there's no server translator. To try this out,
+all we need is the following command (assuming the directories involved already
+ exist).
+
+```
+xlator_example$ glusterfs --debug -f my.vol /srv/import
+```
+
+You should be rewarded with a whole lot of log output, including the text of
+the volfile (this is very useful for debugging problems in the field). If you
+go to another window on the same machine, you can see that you have a new
+filesystem mounted.
+
+```
+~$ df /srv/import
+Filesystem 1K-blocks Used Available Use% Mounted on
+/srv/xlator_example/my.vol
+ 114506240 2706176 105983488 3% /srv/import
+```
+
+Just for fun, write something into a file in `/srv/import`, then look at the
+corresponding file in `/srv/export` to see it all `rot-13`'ed for you.
+
+```
+~$ echo hello > /srv/import/a_file
+~$ cat /srv/export/a_file
+uryyb
+```
+
+There you have it -- functionality you control, implemented easily, layered on
+top of local storage. Now you could start adding functionality -- real
+encryption, perhaps -- and inevitably having to debug it. You could do that the
+ old-school way, with `gf_log` (preferred) or even plain old `printf`, or you
+could run daemons under `gdb` instead. Alternatively, you could wait for the
+next Translator 101 post, where we'll be doing exactly that.
+
+Debugging a Translator
+----------------------
+
+Now that we've learned what a translator looks like and how to build one, it's
+time to run one and actually watch it work. The best way to do this is good
+old-fashioned `gdb`, as follows (using some of the examples from last time).
+
+```
+xlator_example# gdb glusterfs
+GNU gdb (GDB) Red Hat Enterprise Linux (7.2-50.el6)
+...
+(gdb) r --debug -f my.vol /srv/import
+Starting program: /usr/sbin/glusterfs --debug -f my.vol /srv/import
+...
+[2011-11-23 11:23:16.495516] I [fuse-bridge.c:2971:fuse_init]
+ 0-glusterfs-fuse: FUSE inited with protocol versions:
+ glusterfs 7.13 kernel 7.13
+```
+
+If you get to this point, your glusterfs client process is already running. You
+can go to another window to see the mountpoint, do file operations, etc.
+
+```
+~# df /srv/import
+Filesystem 1K-blocks Used Available Use% Mounted on
+/root/xlator_example/my.vol
+ 114506240 2643968 106045568 3% /srv/import
+~# ls /srv/import
+a_file
+~# cat /srv/import/a_file
+hello
+```
+
+Now let's interrupt the process and see where we are.
+
+```
+
+Program received signal SIGINT, Interrupt.
+0x0000003a0060b3dc in pthread_cond_wait@@GLIBC_2.3.2 ()
+ from /lib64/libpthread.so.0
+(gdb) info threads
+ 5 Thread 0x7fffeffff700 (LWP 27206) 0x0000003a002dd8c7
+ in readv ()
+ from /lib64/libc.so.6
+ 4 Thread 0x7ffff50e3700 (LWP 27205) 0x0000003a0060b75b
+ in pthread_cond_timedwait@@GLIBC_2.3.2 ()
+ from /lib64/libpthread.so.0
+ 3 Thread 0x7ffff5f02700 (LWP 27204) 0x0000003a0060b3dc
+ in pthread_cond_wait@@GLIBC_2.3.2 ()
+ from /lib64/libpthread.so.0
+ 2 Thread 0x7ffff6903700 (LWP 27203) 0x0000003a0060f245
+ in sigwait ()
+ from /lib64/libpthread.so.0
+* 1 Thread 0x7ffff7957700 (LWP 27196) 0x0000003a0060b3dc
+ in pthread_cond_wait@@GLIBC_2.3.2 ()
+ from /lib64/libpthread.so.0
+```
+
+Like any non-toy server, this one has multiple threads. What are they all
+doing? Honestly, even I don't know. Thread 1 turns out to be in
+`event_dispatch_epoll`, which means it's the one handling all of our network
+I/O. Note that with socket multi-threading patch this will change, with one
+thread in `socket_poller` per connection. Thread 2 is in `glusterfs_sigwaiter`
+which means signals will be isolated to that thread. Thread 3 is in
+`syncenv_task`, so it's a worker process for synchronous requests such as
+those used by the rebalance and repair code. Thread 4 is in
+`janitor_get_next_fd`, so it's waiting for a chance to close no-longer-needed
+file descriptors on the local filesystem. (I admit I had to look that one up,
+BTW.) Lastly, thread 5 is in `fuse_thread_proc`, so it's the one fetching
+requests from our FUSE interface. You'll often see many more threads than
+this, but it's a pretty good basic set. Now, let's set a breakpoint so we can
+actually watch a request.
+
+```
+(gdb) b rot13_writev
+Breakpoint 1 at 0x7ffff50e4f0b: file rot-13.c, line 119.
+(gdb) c
+Continuing.
+```
+
+At this point we go into our other window and do something that will involve a write.
+
+```
+~# echo goodbye > /srv/import/another_file
+(back to the first window)
+[Switching to Thread 0x7fffeffff700 (LWP 27206)]
+
+Breakpoint 1, rot13_writev (frame=0x7ffff6e4402c, this=0x638440,
+ fd=0x7ffff409802c, vector=0x7fffe8000cd8, count=1, offset=0,
+ iobref=0x7fffe8001070) at rot-13.c:119
+119 rot_13_private_t *priv = (rot_13_private_t *)this->private;
+```
+
+Remember how we built with debugging symbols enabled and no optimization? That
+will be pretty important for the next few steps. As you can see, we're in
+`rot13_writev`, with several parameters.
+
+* `frame` is our always-present frame pointer for this request. Also,
+ `frame->local` will point to any local data we created and attached to the
+ request ourselves.
+* `this` is a pointer to our instance of the `rot-13` translator. You can examine
+ it if you like to see the name, type, options, parent/children, inode table,
+ and other stuff associated with it.
+* `fd` is a pointer to a file-descriptor *object* (`fd_t`, not just a
+ file-descriptor index which is what most people use "fd" for). This in turn
+ points to an inode object (`inode_t`) and we can associate our own
+ `rot-13`-specific data with either of these.
+* `vector` and `count` together describe the data buffers for this write, which
+ we'll get to in a moment.
+* `offset` is the offset into the file at which we're writing.
+* `iobref` is a buffer-reference object, which is used to track the life cycle
+ of buffers containing read/write data. If you look closely, you'll notice that
+ `vector[0].iov_base` points to the same address as `iobref->iobrefs[0].ptr`, which
+ should give you some idea of the inter-relationships between vector and iobref.
+
+OK, now what about that `vector`? We can use it to examine the data being
+written, like this.
+
+```
+(gdb) p vector[0]
+$2 = {iov_base = 0x7ffff7936000, iov_len = 8}
+(gdb) x/s 0x7ffff7936000
+0x7ffff7936000: "goodbye\n"
+```
+
+It's not always safe to view this data as a string, because it might just as
+well be binary data, but since we're generating the write this time it's safe
+and convenient. With that knowledge, let's step through things a bit.
+
+```
+(gdb) s
+120 if (priv->encrypt_write)
+(gdb)
+121 rot13_iovec (vector, count);
+(gdb)
+rot13_iovec (vector=0x7fffe8000cd8, count=1) at rot-13.c:57
+57 for (i = 0; i < count; i++) {
+(gdb)
+58 rot13 (vector[i].iov_base, vector[i].iov_len);
+(gdb)
+rot13 (buf=0x7ffff7936000 "goodbye\n", len=8) at rot-13.c:45
+45 for (i = 0; i < len; i++) {
+(gdb)
+46 if (buf[i] >= 'a' && buf[i] <= 'z')
+(gdb)
+47 buf[i] = 'a' + ((buf[i] - 'a' + 13) % 26);
+```
+
+Here we've stepped into `rot13_iovec`, which iterates through our vector
+calling `rot13`, which in turn iterates through the characters in that chunk
+doing the `rot-13` operation if/as appropriate. This is pretty straightforward
+stuff, so let's skip to the next interesting bit.
+
+```
+(gdb) fin
+Run till exit from #0 rot13 (buf=0x7ffff7936000 "goodbye\n",
+ len=8) at rot-13.c:47
+rot13_iovec (vector=0x7fffe8000cd8, count=1) at rot-13.c:57
+57 for (i = 0; i < count; i++) {
+(gdb) fin
+Run till exit from #0 rot13_iovec (vector=0x7fffe8000cd8,
+ count=1) at rot-13.c:57
+rot13_writev (frame=0x7ffff6e4402c, this=0x638440,
+ fd=0x7ffff409802c, vector=0x7fffe8000cd8, count=1,
+ offset=0, iobref=0x7fffe8001070) at rot-13.c:123
+123 STACK_WIND (frame,
+(gdb) b 129
+Breakpoint 2 at 0x7ffff50e4f35: file rot-13.c, line 129.
+(gdb) b rot13_writev_cbk
+Breakpoint 3 at 0x7ffff50e4db3: file rot-13.c, line 106.
+(gdb) c
+```
+
+So we've set breakpoints on both the callback and the statement following the
+`STACK_WIND`. Which one will we hit first?
+
+```
+Breakpoint 3, rot13_writev_cbk (frame=0x7ffff6e4402c,
+ cookie=0x7ffff6e440d8, this=0x638440, op_ret=8, op_errno=0,
+ prebuf=0x7fffefffeca0, postbuf=0x7fffefffec30)
+ at rot-13.c:106
+106 STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno,
+ prebuf, postbuf);
+(gdb) bt
+#0 rot13_writev_cbk (frame=0x7ffff6e4402c,
+ cookie=0x7ffff6e440d8, this=0x638440, op_ret=8, op_errno=0,
+ prebuf=0x7fffefffeca0, postbuf=0x7fffefffec30)
+ at rot-13.c:106
+#1 0x00007ffff52f1b37 in posix_writev (frame=0x7ffff6e440d8,
+ this=<value optimized out>, fd=<value optimized out>,
+ vector=<value optimized out>, count=1,
+ offset=<value optimized out>, iobref=0x7fffe8001070)
+ at posix.c:2217
+#2 0x00007ffff50e513e in rot13_writev (frame=0x7ffff6e4402c,
+ this=0x638440, fd=0x7ffff409802c, vector=0x7fffe8000cd8,
+ count=1, offset=0, iobref=0x7fffe8001070) at rot-13.c:123
+```
+
+Surprise! We're in `rot13_writev_cbk` now, called (indirectly) while we're
+still in `rot13_writev` before `STACK_WIND` returns (still at rot-13.c:123). If
+ you did any request cleanup here, then you need to be careful about what you
+do in the remainder of `rot13_writev` because data may have been freed etc.
+It's tempting to say you should just do the cleanup in `rot13_writev` after
+the `STACK_WIND,` but that's not valid because it's also possible that some
+other translator returned without calling `STACK_UNWIND` -- i.e. before
+`rot13_writev` is called, so then it would be the one getting null-pointer
+errors instead. To put it another way, the callback and the return from
+`STACK_WIND` can occur in either order or even simultaneously on different
+threads. Even if you were to use reference counts, you'd have to make sure to
+use locking or atomic operations to avoid races, and it's not worth it. Unless
+you *really* understand the possible flows of control and know what you're
+doing, it's better to do cleanup in the callback and nothing after
+`STACK_WIND.`
+
+At this point all that's left is a `STACK_UNWIND` and a return. The
+`STACK_UNWIND` invokes our parent's completion callback, and in this case our
+parent is FUSE so at that point the VFS layer is notified of the write being
+complete. Finally, we return through several levels of normal function calls
+until we come back to fuse_thread_proc, which waits for the next request.
+
+So that's it. For extra fun, you might want to repeat this exercise by stepping
+through some other call -- stat or setxattr might be good choices -- but you'll
+ have to use a translator that actually implements those calls to see much
+that's interesting. Then you'll pretty much know everything I knew when I
+started writing my first for-real translators, and probably even a bit more. I
+hope you've enjoyed this series, or at least found it useful, and if you have
+any suggestions for other topics I should cover please let me know (via
+comments or email, IRC or Twitter).
+
+Other versions
+--------------
+
+Original author's site:
+
+ * [Translator 101 - Setting the Stage](http://pl.atyp.us/hekafs.org/index.php/2011/11/translator-101-class-1-setting-the-stage/)
+
+ * [Translator 101 - Init, Fini and Private Context](http://pl.atyp.us/hekafs.org/index.php/2011/11/translator-101-lesson-2-init-fini-and-private-context/)
+
+ * [Translator 101 - This Time for Real](http://pl.atyp.us/hekafs.org/index.php/2011/11/translator-101-lesson-3-this-time-for-real/)
+
+ * [Translator 101 - Debugging a Translator](http://pl.atyp.us/hekafs.org/index.php/2011/11/translator-101-lesson-4-debugging-a-translator/)
+
+Gluster community site:
+
+ * [Translators](https://docs.gluster.org/en/latest/Quick-Start-Guide/Architecture/#translators)
diff --git a/doc/developer-guide/unittest.md b/doc/developer-guide/unittest.md
new file mode 100644
index 00000000000..5c6c0a8a039
--- /dev/null
+++ b/doc/developer-guide/unittest.md
@@ -0,0 +1,228 @@
+# Unit Tests in GlusterFS
+
+## Overview
+[Art-of-unittesting][definitionofunittest] provides a good definition for unit tests. A good unit test is:
+
+* Able to be fully automated
+* Has full control over all the pieces running (Use mocks or stubs to achieve this isolation when needed)
+* Can be run in any order if part of many other tests
+* Runs in memory (no DB or File access, for example)
+* Consistently returns the same result (You always run the same test, so no random numbers, for example. save those for integration or range tests)
+* Runs fast
+* Tests a single logical concept in the system
+* Readable
+* Maintainable
+* Trustworthy (when you see its result, you don’t need to debug the code just to be sure)
+
+## cmocka
+GlusterFS unit test framework is based on [cmocka][]. cmocka provides
+developers with methods to isolate and test modules written in C language. It
+also provides integration with Jenkins by providing JUnit XML compliant unit
+test results.
+
+cmocka
+
+## Running Unit Tests
+To execute the unit tests, all you need is to type `make check`. Here is a step-by-step example assuming you just cloned a GlusterFS tree:
+
+```
+$ ./autogen.sh
+$ ./configure --enable-debug
+$ make check
+```
+
+Sample output:
+
+```
+PASS: mem_pool_unittest
+============================================================================
+Testsuite summary for glusterfs 3git
+============================================================================
+# TOTAL: 1
+# PASS: 1
+# SKIP: 0
+# XFAIL: 0
+# FAIL: 0
+# XPASS: 0
+# ERROR: 0
+============================================================================
+```
+
+In this example, `mem_pool_unittest` has multiple tests inside, but `make check` assumes that the program itself is the test, and that is why it only shows one test. Here is the output when we run `mem_pool_unittest` directly:
+
+```
+$ ./libglusterfs/src/mem_pool_unittest
+[==========] Running 10 test(s).
+[ RUN ] test_gf_mem_acct_enable_set
+Expected assertion data != ((void *)0) occurred
+[ OK ] test_gf_mem_acct_enable_set
+[ RUN ] test_gf_mem_set_acct_info_asserts
+Expected assertion xl != ((void *)0) occurred
+Expected assertion size > ((4 + sizeof (size_t) + sizeof (xlator_t *) + 4 + 8) + 8) occurred
+Expected assertion type <= xl->mem_acct.num_types occurred
+[ OK ] test_gf_mem_set_acct_info_asserts
+[ RUN ] test_gf_mem_set_acct_info_memory
+[ OK ] test_gf_mem_set_acct_info_memory
+[ RUN ] test_gf_calloc_default_calloc
+[ OK ] test_gf_calloc_default_calloc
+[ RUN ] test_gf_calloc_mem_acct_enabled
+[ OK ] test_gf_calloc_mem_acct_enabled
+[ RUN ] test_gf_malloc_default_malloc
+[ OK ] test_gf_malloc_default_malloc
+[ RUN ] test_gf_malloc_mem_acct_enabled
+[ OK ] test_gf_malloc_mem_acct_enabled
+[ RUN ] test_gf_realloc_default_realloc
+[ OK ] test_gf_realloc_default_realloc
+[ RUN ] test_gf_realloc_mem_acct_enabled
+[ OK ] test_gf_realloc_mem_acct_enabled
+[ RUN ] test_gf_realloc_ptr
+Expected assertion ((void *)0) != ptr occurred
+[ OK ] test_gf_realloc_ptr
+[==========] 10 test(s) run.
+[ PASSED ] 10 test(s).
+[ FAILED ] 0 test(s).
+[ REPORT ] Created libglusterfs_mem_pool_xunit.xml report
+```
+
+
+## Writing Unit Tests
+
+### Enhancing your C functions
+
+#### Programming by Contract
+Add the following to your C file:
+
+```c
+#include <cmocka_pbc.h>
+```
+
+```c
+/*
+ * Programming by Contract is a programming methodology
+ * which binds the caller and the function called to a
+ * contract. The contract is represented using Hoare Triple:
+ * {P} C {Q}
+ * where {P} is the precondition before executing command C,
+ * and {Q} is the postcondition.
+ *
+ * See also:
+ * http://en.wikipedia.org/wiki/Design_by_contract
+ * http://en.wikipedia.org/wiki/Hoare_logic
+ * http://dlang.org/dbc.html
+ */
+ #ifndef CMOCKERY_PBC_H_
+#define CMOCKERY_PBC_H_
+
+#if defined(UNIT_TESTING) || defined (DEBUG)
+
+#include <assert.h>
+
+/*
+ * Checks caller responsibility against contract
+ */
+#define REQUIRE(cond) assert(cond)
+
+/*
+ * Checks function reponsability against contract.
+ */
+#define ENSURE(cond) assert(cond)
+
+/*
+ * While REQUIRE and ENSURE apply to functions, INVARIANT
+ * applies to classes/structs. It ensures that intances
+ * of the class/struct are consistent. In other words,
+ * that the instance has not been corrupted.
+ */
+#define INVARIANT(invariant_fnc) do{ (invariant_fnc) } while (0);
+
+#else
+#define REQUIRE(cond) do { } while (0);
+#define ENSURE(cond) do { } while (0);
+#define INVARIANT(invariant_fnc) do{ } while (0);
+
+#endif /* defined(UNIT_TESTING) || defined (DEBUG) */
+#endif /* CMOCKERY_PBC_H_ */
+```
+
+##### Example
+This is an _extremely_ simple example:
+
+```c
+int divide (int n, int d)
+{
+ int ans;
+
+ REQUIRE(d != 0);
+
+ ans = n / d;
+
+ // As code is added to this function throughout its lifetime,
+ // ENSURE will assert that data will be returned
+ // according to the contract. Again this is an
+ // extremely simple example. :-D
+ ENSURE( ans == (n / d) );
+
+ return ans;
+}
+
+```
+
+##### Important Note
+`REQUIRE`, `ENSURE`, and `INVARIANT` are only available when `DEBUG` or `UNIT_TESTING` are set in the CFLAGS. You must pass `--enable-debug` to `./configure` to enable PBC on your non-unittest builds.
+
+#### Overriding functions
+Cmockery2 provides its own memory allocation functions which check for buffer overrun and memory leaks. The following header file must be included **last** to be able to override any of the memory allocation functions:
+
+```c
+#include <cmocka.h>
+```
+
+This file will only take effect with the `UNIT_TESTING` CFLAG is set.
+
+### Creating a unit test
+Once you identify the C file you would like to test, first create a `unittest` directory under the directory where the C file is located. This will isolate the unittests to a different directory.
+
+Next, you need to edit the `Makefile.am` file in the directory where your C file is located. Initialize the
+`Makefile.am` if it does not already have the following sections:
+
+```
+#### UNIT TESTS #####
+CLEANFILES += *.gcda *.gcno *_xunit.xml
+noinst_PROGRAMS =
+TESTS =
+```
+
+Now you can add the following for each of the unit tests that you would like to build:
+
+```
+### UNIT TEST xxx_unittest ###
+xxx_unittest_CPPFLAGS = $(xxx_CPPFLAGS)
+xxx_unittest_SOURCES = xxx.c \
+ unittest/xxx_unittest.c
+xxx_unittest_CFLAGS = $(UNITTEST_CFLAGS)
+xxx_unittest_LDFLAGS = $(UNITTEST_LDFLAGS)
+noinst_PROGRAMS += xxx_unittest
+TESTS += xxx_unittest
+```
+
+Where `xxx` is the name of your C file. For example, look at `libglusterfs/src/Makefile.am`.
+
+Copy the simple unit test from the [cmocka API][cmockaapi] to `unittest/xxx_unittest.c`. If you would like to see an example of a unit test, please refer to `libglusterfs/src/unittest/mem_pool_unittest.c`.
+
+#### Mocking
+You may see that the linker will complain about missing functions needed by the C file you would like to test. Identify the required functions, then place their stubs in a file called `unittest/xxx_mock.c`, then include this file in `Makefile.am` in `xxx_unittest_SOURCES`. This will allow you to you Cmockery2's mocking functions.
+
+#### Running the unit test
+You can type `make` in the directory where the C file is located. Once you built it and there are no errors, you can execute the test either by directly executing the program (in our example above it is called `xxx_unittest` ), or by running `make check`.
+
+#### Debugging
+Sometimes you may need to debug your unit test. To do that, you will have to point `gdb` to the binary which is located in the same directory as the source. For example, you can do the following from the root of the source tree to debug `mem_pool_unittest`:
+
+```
+$ gdb libglusterfs/src/mem_pool_unittest
+```
+
+
+[cmocka]: https://cmocka.org
+[definitionofunittest]: http://artofunittesting.com/definition-of-a-unit-test/
+[cmockapi]: https://api.cmocka.org
diff --git a/doc/developer-guide/versioning.md b/doc/developer-guide/versioning.md
new file mode 100644
index 00000000000..10c1511b79b
--- /dev/null
+++ b/doc/developer-guide/versioning.md
@@ -0,0 +1,44 @@
+Versioning
+==========
+
+### current
+
+The number of the current interface exported by the library. A current value
+of '1', means that you are calling the interface exported by this library
+interface 1.
+
+### revision
+
+The implementation number of the most recent interface exported by this library.
+In this case, a revision value of `0` means that this is the first
+implementation of the interface.
+
+If the next release of this library exports the same interface, but has a
+different implementation (perhaps some bugs have been fixed), the revision
+number will be higher, but current number will be the same. In that case, when
+given a choice, the library with the highest revision will always be used by
+the runtime loader.
+
+### age
+
+The number of previous additional interfaces supported by this library. If age
+were '2', then this library can be linked into executables which were built with
+a release of this library that exported the current interface number, current,
+or any of the previous two interfaces. By definition age must be less than or
+equal to current. At the outset, only the first ever interface is implemented,
+so age can only be `0'.
+
+For every release of the library `-version-info` argument needs to be set
+correctly depending on any interface changes you have made.
+
+This is quite straightforward when you understand what the three numbers mean:
+
+If you have changed any of the sources for this library, the revision number
+must be incremented. This is a new revision of the current interface. If the
+interface has changed, then current must be incremented, and revision reset
+to '0'.
+
+This is the first revision of a new interface. If the new interface is a
+superset of the previous interface (that is, if the previous interface has not
+been broken by the changes in this new release), then age must be incremented.
+This release is backwards compatible with the previous release.
diff --git a/doc/developer-guide/write-behind.md b/doc/developer-guide/write-behind.md
new file mode 100644
index 00000000000..0d78964fa20
--- /dev/null
+++ b/doc/developer-guide/write-behind.md
@@ -0,0 +1,56 @@
+performance/write-behind translator
+===================================
+
+Basic working
+--------------
+
+Write behind is basically a translator to lie to the application that the
+write-requests are finished, even before it is actually finished.
+
+On a regular translator tree without write-behind, control flow is like this:
+
+1. application makes a `write()` system call.
+2. VFS ==> FUSE ==> `/dev/fuse`.
+3. fuse-bridge initiates a glusterfs `writev()` call.
+4. `writev()` is `STACK_WIND()`ed up to client-protocol or storage translator.
+5. client-protocol, on receiving reply from server, starts `STACK_UNWIND()` towards the fuse-bridge.
+
+On a translator tree with write-behind, control flow is like this:
+
+1. application makes a `write()` system call.
+2. VFS ==> FUSE ==> `/dev/fuse`.
+3. fuse-bridge initiates a glusterfs `writev()` call.
+4. `writev()` is `STACK_WIND()`ed up to write-behind translator.
+5. write-behind adds the write buffer to its internal queue and does a `STACK_UNWIND()` towards the fuse-bridge.
+
+write call is completed in application's percepective. after
+`STACK_UNWIND()`ing towards the fuse-bridge, write-behind initiates a fresh
+writev() call to its child translator, whose replies will be consumed by
+write-behind itself. Write-behind _doesn't_ cache the write buffer, unless
+`option flush-behind on` is specified in volume specification file.
+
+Windowing
+---------
+
+With respect to write-behind, each write-buffer has three flags: `stack_wound`, `write_behind` and `got_reply`.
+
+* `stack_wound`: if set, indicates that write-behind has initiated `STACK_WIND()` towards child translator.
+* `write_behind`: if set, indicates that write-behind has done `STACK_UNWIND()` towards fuse-bridge.
+* `got_reply`: if set, indicates that write-behind has received reply from child translator for a `writev()` `STACK_WIND()`. a request will be destroyed by write-behind only if this flag is set.
+
+Currently pending write requests = aggregate size of requests with write_behind = 1 and got_reply = 0.
+
+window size limits the aggregate size of currently pending write requests. once
+the pending requests' size has reached the window size, write-behind blocks
+writev() calls from fuse-bridge. Blocking is only from application's
+perspective. Write-behind does `STACK_WIND()` to child translator
+straight-away, but hold behind the `STACK_UNWIND()` towards fuse-bridge.
+`STACK_UNWIND()` is done only once write-behind gets enough replies to
+accommodate for currently blocked request.
+
+Flush behind
+------------
+
+If `option flush-behind on` is specified in volume specification file, then
+write-behind sends aggregate write requests to child translator, instead of
+regular per request `STACK_WIND()`s.
diff --git a/doc/developer-guide/writing-a-cloudsync-plugin.md b/doc/developer-guide/writing-a-cloudsync-plugin.md
new file mode 100644
index 00000000000..907860aaed8
--- /dev/null
+++ b/doc/developer-guide/writing-a-cloudsync-plugin.md
@@ -0,0 +1,164 @@
+## How to write your Cloudsync Plugin
+
+### Background
+
+Cloudsync translator is part of the archival feature in Gluster. This translator
+does the retrieval/download part. Each cold file will be archived to a remote
+storage (public or private cloud). On future access to the file, it will be
+retrieved from the remote storage by Cloudsync translator. Each remote storage
+would need a unique plugin. Cloudsync translator will load this plugin and
+call the necessary plugin functions.
+
+Upload can be done by a script or program. There are some basic mandatory steps
+for uploading the data. There is a sample script for crawl and upload given at
+the end of this guide.
+
+### Necessary changes to create a plugin
+
+1. Define store_methods:
+
+* This structure is the container of basic functions that will be called by
+ cloudsync xlator.
+
+ typedef struct store_methodds {
+ int (*fop_download) (call_frame_t *frame, void *config);
+ /* return type should be the store config */
+ void *(*fop_init) (xlator_t *this);
+ int (*fop_reconfigure) (xlator_t *this, dict_t *options);
+ void (*fop_fini) (void *config);
+ } store_methods_t;
+
+
+ Member details:
+ fop_download:
+ This is the download function pointer.
+
+ frame: This will have the fd to write data downloaded from
+ cloud to GlusterFS.(frame->local->fd)
+
+ config: This is the plugin configuration variable.
+
+ Note: Structure cs_local_t has member dlfd and dloffset which
+ can be used to manage the writes to Glusterfs.
+ Include cloudsync-common.h to access these structures.
+
+ fop_init:
+ This is similar to xlator init. But here the return value is
+ the plugin configuration pointer. This pointer will be stored
+ in the cloudsync private object (priv->stores->config). And
+ the cloudsync private object can be accessed by "this->private"
+ where "this" is of type xlator_t.
+
+ fop_reconfigure:
+ This is similar to xlator_reconfigure.
+
+ fop_fini:
+ Free plugin resources.
+
+ Note: Store_methods_t is part of cs_private_t which in turn part of
+ xlator_t. Create a store_methods_t object named "store_ops" in
+ your plugin. For example
+
+ store_methods_t store_ops = {
+ .fop_download = aws_download_s3,
+ .fop_init = aws_init,
+ .fop_reconfigure = aws_reconfigure,
+ .fop_fini = aws_fini,
+ };
+
+
+2 - Making Cloudsync xlator aware of the plugin:
+
+ Add an entry in to the cs_plugin structure. For example
+ struct cs_plugin plugins[] = {
+ {
+ .name = "amazons3",
+ .library = "libamazons3.so",
+ .description = "amazon s3 store."
+ },
+
+ {.name = NULL},
+ };
+
+ Description about individual members:
+ name: name of the plugin
+ library: This is the shared object created. Cloudsync will load
+ this library during init.
+ description: Describe about the plugin.
+
+3- Makefile Changes in Cloudsync:
+
+ Add <plugin.la> to cloudsync_la_LIBADD variable.
+
+4 - Configure.ac changes:
+
+ In cloudsync section add the necessary dependency checks for
+ the plugin.
+
+5 - Export symbols:
+
+ Cloudsync needs "store_ops" to resolve all plugin functions.
+ Create a file <plugin>.sym and add write "store_ops" to it.
+
+
+### Sample script for upload
+This script assumes amazon s3 is the target cloud and bucket name is
+gluster-bucket. User can do necessary aws configuration using command
+"aws configure". Currently for amazons3 there are four gluster settings
+available.
+1- features.s3plugin-seckey -> s3 secret key
+2- features.s3plugin-keyid -> s3 key id
+3- features.s3plugin-bucketid -> bucketid
+4- features.s3plugin-hostname -> hostname e.g. s3.amazonaws.com
+
+Additionally set cloudsync storetype to amazons3.
+
+gluster v set <VOLNAME> cloudsync-storetype amazons3
+
+Now create a mount dedicated for this upload task.
+
+That covers necessary configurations needed.
+
+Below is the sample script for upload. The script will crawl directly on the
+brick and will upload those files which are not modified for last one month.
+It needs two arguments.
+1st arguement - Gluster Brick path
+2nd arguement - coldness that is how many days since the file was modified.
+3rd argument - dedicated gluster mount point created for uploading.
+
+Once the cloud setup is done, run the following script on individual bricks.
+Note: For an AFR volume, pick only the fully synchronized brick among the
+replica bricks.
+
+```
+target_folder=$1
+coldness=$2
+mnt=$3
+
+cd $target_folder
+for i in `find . -type f | grep -v "glusterfs" | sed 's/..//'`
+do
+ echo "processing $mnt/$i"
+
+ #check whether the file is already archived
+ getfattr -n trusted.glusterfs.cs.remote $i &> /dev/null
+ if [ $? -eq 0 ]
+ then
+ echo "file $mnt/$i is already archived"
+ else
+ #upload to cloud
+ aws s3 cp $mnt/$i s3://gluster-bucket/
+ mtime=`stat -c "%Y" $mnt/$i`
+
+ #post processing of upload
+ setfattr -n trusted.glusterfs.csou.complete -v $mtime $mnt/$i
+ if [ $? -ne 0 ]
+ then
+ echo "archiving of file $mnt/$i failed"
+ else
+ echo "archiving of file $mnt/$i succeeded"
+ fi
+
+ fi
+done
+```
diff --git a/doc/developer-guide/xlator-classification.md b/doc/developer-guide/xlator-classification.md
new file mode 100644
index 00000000000..6073df9375f
--- /dev/null
+++ b/doc/developer-guide/xlator-classification.md
@@ -0,0 +1,221 @@
+# xlator categories and expectations
+
+The purpose of the document is to define a category for various xlators
+and expectations around what each category means from a perspective of
+health and maintenance of a xlator.
+
+The need to do this is to ensure certain categories are kept in good
+health, and helps the community and contributors focus their efforts around the
+same.
+
+This document also provides implementation details for xlator developers to
+declare a category for any xlator.
+
+## Table of contents
+1. Audience
+2. Categories (and expectations of each category)
+3. Implementation and usage details
+
+## Audience
+
+This document is intended for the following community participants,
+- New xlator contributors
+- Existing xlator maintainers
+- Packaging and gluster management stack maintainers
+
+For a more user facing understanding it is recommended to read section (TBD)
+in the gluster documentation.
+
+## Categories
+1. Experimental (E)
+2. TechPreview (TP)
+3. Maintained (M)
+4. Deprecated (D)
+5. Obsolete (O)
+
+### Experimental (E)
+
+Developed in the experimental branch, for exploring new features. These xlators
+are NEVER packaged as a part of releases, interested users and contributors can
+build and work with these from sources. In the future, these maybe available as
+an package based on a weekly build of the same.
+
+#### Quality expectations
+- Compiles or passes smoke tests
+- Does not break nightly experimental regressions
+ - NOTE: If a nightly is broken, then all patches that were merged are reverted
+ till the errant patch is found and subsequently fixed
+
+### TechPreview (TP)
+
+Xlators in master or release branches that are not deemed fit to be in
+production deployments, but are feature complete to invite feedback and host
+user data.
+
+These xlators will be worked upon with priority by maintainers/authors who are
+involved in making them more stable than xlators in the Experimental/Deprecated/
+Obsolete categories.
+
+There is no guarantee that these xlators will move to the Maintained state, and
+may just get Obsoleted based on feedback, or other project goals or technical
+alternatives.
+
+#### Quality expectations
+- Same as Maintained, minus
+ - Performance, Scale, other(?)
+ - *TBD* *NOTE* Need inputs, Intention is all quality goals as in Maintained,
+ other than the list above (which for now has scale and performance)
+
+### Maintained (M)
+
+These xltors are part of the core Gluster functionality and are maintained
+actively. These are part of master and release branches and are higher in
+priority of maintainers and other interested contributors.
+
+#### Quality expectations
+
+NOTE: A short note on what each of these mean are added here, details to follow.
+
+NOTE: Out of the gate all of the following are not mandated, consider the
+following a desirable state to reach as we progress on each
+
+- Bug backlog: Actively address bug backlog
+- Enhancement backlog: Actively maintain outstanding enhancement backlog (need
+ not be acted on, but should be visible to all)
+- Review backlog: Actively keep this below desired counts and states
+- Static code health: Actively meet near-zero issues in this regard
+ - Coverity, spellcheck and other checks
+- Runtime code health: Actively meet defined coverage levels in this regard
+ - Coverage, others?
+ - Per-patch regressions
+ - Glusto runs
+ - Performance
+ - Scalability
+- Technical specifications: Implementation details should be documented and
+ updated at regular cadence (even per patch that change assumptions in
+ here)
+- User documentation: User facing details should be maintained to current
+ status in the documentation
+- Debuggability: Steps, tools, procedures should be documented and maintained
+ each release/patch as applicable
+- Troubleshooting: Steps, tools, procedures should be documented and maintained
+ each release/patch as applicable
+ - Steps/guides for self service
+ - Knowledge base for problems
+- Other common criteria that will apply: Required metrics/desired states to be
+ defined per criteria
+ - Monitoring, usability, statedump, and other such xlator expectations
+
+### Deprecated (D)
+
+Xlators on master or release branches that would be obsoleted and/or replaced
+with similar or other functionality in the next major release.
+
+#### Quality expectations
+- Retain status-quo when moved to this state, till it is moved to obsoleted
+- Provide migration steps if feature provided by the xlator is replaced with
+other xlators
+
+### Obsolete (O)
+
+Xlator/code still in tree, but not packaged or shipped or maintained in any
+form. This is noted as a category till the code is removed from the tree.
+
+These xlators and their corresponding code and test health will not be executed.
+
+#### Quality expectations
+- None
+
+## Implementation and usage details
+
+### How to specify an xlators category
+
+While defining 'xlator_api_t' structure for the corresponding xlator, add a
+flag like below:
+
+```
+diff --git a/xlators/performance/nl-cache/src/nl-cache.c b/xlators/performance/nl-cache/src/nl-cache.c
+index 0f0e53bac2..8267d6897c 100644
+--- a/xlators/performance/nl-cache/src/nl-cache.c
++++ b/xlators/performance/nl-cache/src/nl-cache.c
+@@ -869,4 +869,5 @@ xlator_api_t xlator_api = {
+ .cbks = &nlc_cbks,
+ .options = nlc_options,
+ .identifier = "nl-cache",
++ .category = GF_TECH_PREVIEW,
+ };
+diff --git a/xlators/performance/quick-read/src/quick-read.c b/xlators/performance/quick-read/src/quick-read.c
+index 8d39720e7f..235de27c19 100644
+--- a/xlators/performance/quick-read/src/quick-read.c
++++ b/xlators/performance/quick-read/src/quick-read.c
+@@ -1702,4 +1702,5 @@ xlator_api_t xlator_api = {
+ .cbks = &qr_cbks,
+ .options = qr_options,
+ .identifier = "quick-read",
++ .category = GF_MAINTAINED,
+ };
+```
+
+Similarly, if a particular option is in different state other than
+the xlator state, one can add the same flag in options structure too.
+
+```
+diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
+index 0e86e33d03..81996743d1 100644
+--- a/xlators/cluster/afr/src/afr.c
++++ b/xlators/cluster/afr/src/afr.c
+@@ -772,6 +772,7 @@ struct volume_options options[] = {
+ .description = "Maximum latency for shd halo replication in msec."
+ },
+ { .key = {"halo-enabled"},
++ .category = GF_TECH_PREVIEW,
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "False",
+
+```
+
+
+### User experience using the categories
+
+#### Ability to use a category
+
+This section details which category of xlators can be used when and specifics
+around when each category is enabled.
+
+1. Maintained category xlators can be used by default, this implies, volumes
+created with these xlators enabled will throw no warnings, or need no user
+intervention to use the xlator.
+
+2. Tech Preview category xlators needs cluster configuration changes to allow
+these xlatorss to be used in volumes, further, logs will contain a message
+stating TP xlators are in use. Without the cluster configured to allow TP
+xlators, volumes created or edited to use such xlators would result in errors.
+ - (TBD) Cluster configuration option
+ - (TBD) Warning message
+ - (TBD) Code mechanics on how this is achieved
+
+3. Deprecated category xlators can be used by default, but will throw a warning
+in the logs that such are in use and will be deprecated in the future.
+ - (TBD) Warning message
+
+4. Obsolete category xlators will not be packaged and hence cannot be used from
+release builds.
+
+5. Experimental category xlators will not be packaged and hence cannot be used
+from release builds, if running experimental (weekly or other such) builds,
+these will throw a warning in the logs stating experimental xlators are in use.
+ - (TBD) Warning message
+
+#### Ability to query xlator category
+
+(TBD) Need to provide the ability to query xlator categories, or list xlators
+and their respective categories.
+
+#### User facing changes
+
+User facing changes that are expected due to this change include the following,
+- Cluster wide option to enable TP xlators, or more generically a category
+level of xlators
+- Errors in commands that fail due to invalid categories
+- Warning messages in logs to denote certain categories of xlators are in use
+- (TBD) Ability to query xlators and their respective categories
diff --git a/doc/examples/legacy/Makefile.am b/doc/examples/legacy/Makefile.am
deleted file mode 100644
index 49c9701efb3..00000000000
--- a/doc/examples/legacy/Makefile.am
+++ /dev/null
@@ -1,8 +0,0 @@
-EXTRA = README replicate.vol stripe.vol protocol-client.vol protocol-server.vol posix-locks.vol trash.vol write-behind.vol io-threads.vol io-cache.vol read-ahead.vol filter.vol trace.vol
-EXTRA_DIST = $(EXTRA)
-
-docdir = $(datadir)/doc/$(PACKAGE_NAME)
-Examplesdir = $(docdir)/examples
-Examples_DATA = $(EXTRA)
-
-CLEANFILES =
diff --git a/doc/examples/legacy/README b/doc/examples/legacy/README
deleted file mode 100644
index 73275157160..00000000000
--- a/doc/examples/legacy/README
+++ /dev/null
@@ -1,13 +0,0 @@
-GlusterFS's translator feature is very flexible and there are quite a lot of
-ways one can configure their filesystem to behave like.
-
-Volume Specification is a way in which GlusterFS understands how it has to work,
-based on what is written there.
-
-Going through the following URLs may give you more idea about all these.
-
-* http://www.gluster.org/docs/index.php/GlusterFS
-* http://www.gluster.org/docs/index.php/GlusterFS_Volume_Specification
-* http://www.gluster.org/docs/index.php/GlusterFS_Translators
-
-Mail us any doubts, suggestions on 'gluster-devel(at)nongnu.org'
diff --git a/doc/examples/legacy/filter.vol b/doc/examples/legacy/filter.vol
deleted file mode 100644
index 59bb23ecf2c..00000000000
--- a/doc/examples/legacy/filter.vol
+++ /dev/null
@@ -1,23 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-## In normal clustered storage type, any of the cluster translators can come here.
-#
-# Definition of other clients
-#
-# Definition of cluster translator (may be unify, afr, or unify over afr)
-#
-
-### 'Filter' translator is used on client side (or server side according to needs). This traslator makes all the below translators, (or say volumes) as read-only. Hence if one wants a 'read-only' filesystem, using filter as the top most volume will make it really fast as the fops are returned from this level itself.
-
-volume filter-ro
- type features/filter
- option root-squashing enable
-# option completely-read-only yes
-# translate-uid 1-99=0
- subvolumes client
-end-volume
diff --git a/doc/examples/legacy/io-cache.vol b/doc/examples/legacy/io-cache.vol
deleted file mode 100644
index a71745017a2..00000000000
--- a/doc/examples/legacy/io-cache.vol
+++ /dev/null
@@ -1,31 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-## In normal clustered storage type, any of the cluster translators can come
-# here.
-#
-# Definition of other clients
-#
-# Definition of cluster translator (may be distribute, replicate, or distribute
-# over replicate)
-#
-
-### 'IO-Cache' translator is best used on client side when a filesystem has file
-# which are not modified frequently but read several times. For example, while
-# compiling a kernel, *.h files are read while compiling every *.c file, in
-# these case, io-cache translator comes very handy, as it keeps the whole file
-# content in the cache, and serves from the cache.
-# One can provide the priority of the cache too.
-
-volume ioc
- type performance/io-cache
- subvolumes client # In this example it is 'client' you may have to
- # change it according to your spec file.
- option cache-size 64MB # 32MB is default
- option force-revalidate-timeout 5 # 1second is default
- option priority *.html:2,*:1 # default is *:0
-end-volume
diff --git a/doc/examples/legacy/io-threads.vol b/doc/examples/legacy/io-threads.vol
deleted file mode 100644
index 236f5b8b188..00000000000
--- a/doc/examples/legacy/io-threads.vol
+++ /dev/null
@@ -1,22 +0,0 @@
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-### 'IO-threads' translator gives a threading behaviour to File I/O calls. All
-# other normal fops are having default behaviour. Loading this on server side
-# helps to reduce the contension of network. (Which is assumed as a GlusterFS
-# hang).
-
-volume iot
- type performance/io-threads
- subvolumes brick
- option thread-count 4 # default value is 1
-end-volume
-
-volume server
- type protocol/server
- subvolumes iot
- option transport-type tcp # For TCP/IP transport
- option auth.addr.iot.allow 192.168.*
-end-volume
diff --git a/doc/examples/legacy/posix-locks.vol b/doc/examples/legacy/posix-locks.vol
deleted file mode 100644
index 673afa3f8ae..00000000000
--- a/doc/examples/legacy/posix-locks.vol
+++ /dev/null
@@ -1,19 +0,0 @@
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-# 'Posix-locks' feature should be added on the server side.
-
-volume p-locks
- type features/posix-locks
- subvolumes brick
- option mandatory on
-end-volume
-
-volume server
- type protocol/server
- subvolumes p-locks
- option transport-type tcp
- option auth.addr.p-locks.allow 192.168.* # Allow access to "p-locks" volume
-end-volume
diff --git a/doc/examples/legacy/protocol-client.vol b/doc/examples/legacy/protocol-client.vol
deleted file mode 100644
index c34ef790d0e..00000000000
--- a/doc/examples/legacy/protocol-client.vol
+++ /dev/null
@@ -1,12 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
-# option transport.socket.remote-port 24016
-
-# option transport-type rdma # for Infiniband verbs transport
-# option transport.rdma.work-request-send-count 16
-# option transport.rdma.work-request-recv-count 16
-# option transport.rdma.remote-port 24016
- option remote-subvolume brick # name of the remote volume
-end-volume
diff --git a/doc/examples/legacy/protocol-server.vol b/doc/examples/legacy/protocol-server.vol
deleted file mode 100644
index 195e4965782..00000000000
--- a/doc/examples/legacy/protocol-server.vol
+++ /dev/null
@@ -1,21 +0,0 @@
-### Export volume "brick" with the contents of "/home/export" directory.
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
-# option transport.socket.listen-port 24016
-
-# option transport-type rdma
-# option transport.rdma.work-request-send-count 64
-# option transport.rdma.work-request-recv-count 64
-# option transport.rdma.listen-port 24016
-
-# option bind-address 192.168.1.10 # Default is to listen on all interfaces
- subvolumes brick
- option auth.addr.brick.allow 192.168.* # Allow access to "brick" volume
-end-volume
diff --git a/doc/examples/legacy/read-ahead.vol b/doc/examples/legacy/read-ahead.vol
deleted file mode 100644
index 9e4dba55627..00000000000
--- a/doc/examples/legacy/read-ahead.vol
+++ /dev/null
@@ -1,24 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-## In normal clustered storage type, any of the cluster translators can come here.
-#
-# Definition of other clients
-#
-# Definition of cluster translator (may be distribute, replicate, or distribute
-# over replicate)
-#
-
-# 'Read-Ahead' translator is best utilized on client side, as it prefetches
-# the file contents when the first read() call is issued.
-
-volume ra
- type performance/read-ahead
- subvolumes client
- option page-count 4 # default is 2
- option force-atime-update no # defalut is 'no'
-end-volume
diff --git a/doc/examples/legacy/replicate.vol b/doc/examples/legacy/replicate.vol
deleted file mode 100644
index 10626d46f05..00000000000
--- a/doc/examples/legacy/replicate.vol
+++ /dev/null
@@ -1,118 +0,0 @@
-### 'NOTE'
-# This file has both server spec and client spec to get an understanding of
-# replicate spec file. Hence can't be used as it is, as a GlusterFS spec file.
-# One need to seperate out server spec and client spec to get it working.
-
-#=========================================================================
-
-# **** server1 spec file ****
-
-### Export volume "brick" with the contents of "/home/export" directory.
-volume posix1
- type storage/posix # POSIX FS translator
- option directory /home/export1 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick1
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix1
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24016
- subvolumes brick1
- option auth.addr.brick1.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server2 spec file ****
-volume posix2
- type storage/posix # POSIX FS translator
- option directory /home/export2 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick2
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix2
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24017
- subvolumes brick2
- option auth.addr.brick2.allow * # Allow access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server3 spec file ****
-
-volume posix3
- type storage/posix # POSIX FS translator
- option directory /home/export3 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick3
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix3
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24018
- subvolumes brick3
- option auth.addr.brick3.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** Clustered Client config file ****
-
-### Add client feature and attach to remote subvolume of server1
-volume client1
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24016
- option remote-subvolume brick1 # name of the remote volume
-end-volume
-
-### Add client feature and attach to remote subvolume of server2
-volume client2
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24017
- option remote-subvolume brick2 # name of the remote volume
-end-volume
-
-volume client3
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24018
- option remote-subvolume brick3 # name of the remote volume
-end-volume
-
-## Add replicate feature.
-volume replicate
- type cluster/replicate
- subvolumes client1 client2 client3
-end-volume
diff --git a/doc/examples/legacy/stripe.vol b/doc/examples/legacy/stripe.vol
deleted file mode 100644
index 9524e8198bf..00000000000
--- a/doc/examples/legacy/stripe.vol
+++ /dev/null
@@ -1,120 +0,0 @@
-
-### 'NOTE'
-# This file has both server spec and client spec to get an understanding of
-# stripe's spec file. Hence can't be used as it is, as a GlusterFS spec file.
-# One need to seperate out server spec and client spec to get it working.
-
-#=========================================================================
-
-# **** server1 spec file ****
-
-### Export volume "brick" with the contents of "/home/export" directory.
-volume posix1
- type storage/posix # POSIX FS translator
- option directory /home/export1 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick1
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix1
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24016
- subvolumes brick1
- option auth.addr.brick1.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server2 spec file ****
-volume posix2
- type storage/posix # POSIX FS translator
- option directory /home/export2 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick2
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix2
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24017
- subvolumes brick2
- option auth.addr.brick2.allow * # Allow access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** server3 spec file ****
-
-volume posix3
- type storage/posix # POSIX FS translator
- option directory /home/export3 # Export this directory
-end-volume
-
-### Add POSIX record locking support to the storage brick
-volume brick3
- type features/posix-locks
- option mandatory on # enables mandatory locking on all files
- subvolumes posix3
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp # For TCP/IP transport
- option transport.socket.listen-port 24018
- subvolumes brick3
- option auth.addr.brick3.allow * # access to "brick" volume
-end-volume
-
-
-#=========================================================================
-
-# **** Clustered Client config file ****
-
-### Add client feature and attach to remote subvolume of server1
-volume client1
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24016
- option remote-subvolume brick1 # name of the remote volume
-end-volume
-
-### Add client feature and attach to remote subvolume of server2
-volume client2
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24017
- option remote-subvolume brick2 # name of the remote volume
-end-volume
-
-volume client3
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 127.0.0.1 # IP address of the remote brick
- option transport.socket.remote-port 24018
- option remote-subvolume brick3 # name of the remote volume
-end-volume
-
-## Add Stripe Feature.
-volume stripe
- type cluster/stripe
- subvolumes client1 client2 client3
- option block-size 1MB
-end-volume
diff --git a/doc/examples/legacy/trace.vol b/doc/examples/legacy/trace.vol
deleted file mode 100644
index 59830f26a9d..00000000000
--- a/doc/examples/legacy/trace.vol
+++ /dev/null
@@ -1,21 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-### 'Trace' translator is a very handy debug tool for GlusterFS, as it can be
-# loaded between any of the two volumes without changing the behaviour of the
-# filesystem.
-# On client side it can be the top most volume in spec (like now) to understand
-# what calls are made on FUSE filesystem, when a mounted filesystem is
-# accessed.
-
-volume trace
- type debug/trace
- subvolumes client
-end-volume
-
-# 'NOTE:' By loading 'debug/trace' translator, filesystem will be very slow as
-# it logs each and every calls to the log file.
diff --git a/doc/examples/legacy/trash.vol b/doc/examples/legacy/trash.vol
deleted file mode 100644
index 3fcf315af08..00000000000
--- a/doc/examples/legacy/trash.vol
+++ /dev/null
@@ -1,20 +0,0 @@
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-### 'Trash' translator is best used on server side as it just renames the
-# deleted file inside 'trash-dir', and it makes 4 seperate fops for one unlink
-# call.
-volume trashcan
- type features/trash
- subvolumes brick
- option trash-dir /.trashcan
-end-volume
-
-volume server
- type protocol/server
- subvolumes trashcan
- option transport-type tcp # For TCP/IP transport
- option auth.addr.brick.allow 192.168.* # Allow access to "brick" volume
-end-volume
diff --git a/doc/examples/legacy/write-behind.vol b/doc/examples/legacy/write-behind.vol
deleted file mode 100644
index 2b5ed413921..00000000000
--- a/doc/examples/legacy/write-behind.vol
+++ /dev/null
@@ -1,27 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp # for TCP/IP transport
- option remote-host 192.168.1.10 # IP address of the remote brick
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-## In normal clustered storage type, any of the cluster translators can come here.
-#
-# Definition of other clients
-#
-# Definition of cluster translator (may be unify, replicate, or unify over replicate)
-#
-
-
-# 'Write-behind' translator is a performance booster for write operation. Best
-# used on client side, as its main intension is to reduce the network latency
-# caused for each write operation.
-
-volume wb
- type performance/write-behind
- subvolumes client
- option flush-behind on # default value is 'off'
- option window-size 2MB
- option enable-O_SYNC no # default is no
- option disable-for-first-nbytes 128KB #default is 1
-end-volume
diff --git a/doc/features/ctime.md b/doc/features/ctime.md
new file mode 100644
index 00000000000..74a77abed4b
--- /dev/null
+++ b/doc/features/ctime.md
@@ -0,0 +1,68 @@
+# Consistent time attributes in gluster across replica/distribute
+
+
+#### Problem:
+Traditionally gluster has been using time attributes (ctime, atime, mtime) of files/dirs from bricks. The problem with this approach is that, it is not consisteant across replica and distribute bricks. And applications which depend on it breaks as replica might not always return time attributes from same brick.
+
+Tar especially gives "file changed as we read it" whenever it detects ctime differences when stat is served from different bricks. The way we have been trying to solve it is to serve the stat structures from same brick in afr, max-time in dht. But it doesn't avoid the problem completely. Because there is no way to change ctime at the moment(lutimes() only allows mtime, atime), there is little we can do to make sure ctimes match after self-heals/xattr updates/rebalance.
+
+#### Solution Proposed:
+Store time attribues (ctime, mtime, atime) as an xattr of the file. The xattr is updated based
+on the fop. If a filesystem fop changes only mtime and ctime, update only those in xattr for
+that file.
+
+#### Design Overview:
+1) As part of each fop, top layer will generate a time stamp and pass it to the down along
+ with other information
+ - This will bring a dependency for NTP synced clients along with servers
+ - There can be a diff in time if the fop stuck in the xlator for various reason,
+for ex: because of locks.
+
+ 2) On the server, posix layer stores the value in the memory (inode ctx) and will sync the data periodically to the disk as an extended attr
+ - Of course sync call also will force it. And fop comes for an inode which is not linked, we do the sync immediately.
+
+ 3) Each time when inodes are created or initialized it read the data from disk and store in inode ctx.
+
+ 4) Before setting to inode_ctx we compare the timestamp stored and the timestamp received, and only store if the stored value is lesser than the current value.
+
+ 5) So in best case data will be stored and retrieved from the memory. We replace the values in iatt with the values in inode_ctx.
+
+ 6) File ops that changes the parent directory attr time need to be consistent across all the distributed directories across the subvolumes. (for eg: a create call will change ctime and mtime of parent dir)
+
+ - This has to handle separately because we only send the fop to the hashed subvolume.
+ - We can asynchronously send the timeupdate setattr fop to the other subvoumes and change the values for parent directory if the file fops is successful on hashed subvolume.
+ - This will have a window where the times are inconsistent across dht subvolume (Please provide your suggestions)
+
+7) Currently we have couple of mount options for time attributes like noatime, relatime , nodiratime etc. But we are not explicitly handled those options even if it is given as mount option when gluster mount.
+
+
+#### Implementation Overview:
+This features involves changes in following xlators.
+ - utime xlator
+ - posix xlator
+
+##### utime xlator:
+This is a new client side xlator which does following tasks.
+
+1. It will generate a time stamp and passes it down in frame->root->ctime and over the network.
+2. Based on fop, it also decides the time attributes to be updated and this passed using "frame->root->flags"
+
+ Patches:
+ 1. https://review.gluster.org/#/c/19857/
+
+##### posix xlator:
+Following tasks are done in posix xlator:
+
+1. Provides APIs to set and get the xattr from backend. It also caches the xattr in inode context. During get, it updates time attributes stored in xattr into iatt structure.
+2. Based on the flags from utime xlator, relevant fops update the time attributes in the xattr.
+
+ Patches:
+ 1. https://review.gluster.org/#/c/19267/
+ 2. https://review.gluster.org/#/c/19795/
+ 3. https://review.gluster.org/#/c/19796/
+
+#### Pending Work:
+1. Handling of time related mount options (noatime, realatime,etc)
+2. flag based create (depending on flags in open, create behaviour might change)
+3. Changes in dht for direcotory sync acrosss multiple subvolumes
+4. readdirp stat need to be worked on.
diff --git a/doc/features/ganesha-ha.md b/doc/features/ganesha-ha.md
new file mode 100644
index 00000000000..4b226a22ccf
--- /dev/null
+++ b/doc/features/ganesha-ha.md
@@ -0,0 +1,43 @@
+# Overview of Ganesha HA Resource Agents in GlusterFS 3.7
+
+The ganesha_mon RA monitors its ganesha.nfsd daemon. While the
+daemon is running, it creates two attributes: ganesha-active and
+grace-active. When the daemon stops for any reason, the attributes
+are deleted. Deleting the ganesha-active attribute triggers the
+failover of the virtual IP (the IPaddr RA) to another node —
+according to constraint location rules — where ganesha.nfsd is
+still running.
+
+The ganesha_grace RA monitors the grace-active attribute. When
+the grace-active attibute is deleted, the ganesha_grace RA stops,
+and will not restart. This triggers pacemaker to invoke the notify
+action in the ganesha_grace RAs on the other nodes in the cluster;
+which send a DBUS message to their respective ganesha.nfsd.
+
+(N.B. grace-active is a bit of a misnomer. while the grace-active
+attribute exists, everything is normal and healthy. Deleting the
+attribute triggers putting the surviving ganesha.nfsds into GRACE.)
+
+To ensure that the remaining/surviving ganesha.nfsds are put into
+ NFS-GRACE before the IPaddr (virtual IP) fails over there is a
+short delay (sleep) between deleting the grace-active attribute
+and the ganesha-active attribute. To summarize, e.g. in a four
+node cluster:
+
+1. on node 2 ganesha_mon::monitor notices that ganesha.nfsd has died
+
+2. on node 2 ganesha_mon::monitor deletes its grace-active attribute
+
+3. on node 2 ganesha_grace::monitor notices that grace-active is gone
+and returns OCF_ERR_GENERIC, a.k.a. new error. When pacemaker tries
+to (re)start ganesha_grace, its start action will return
+OCF_NOT_RUNNING, a.k.a. known error, don't attempt further restarts.
+
+4. on nodes 1, 3, and 4, ganesha_grace::notify receives a post-stop
+notification indicating that node 2 is gone, and sends a DBUS message
+to its ganesha.nfsd, putting it into NFS-GRACE.
+
+5. on node 2 ganesha_mon::monitor waits a short period, then deletes
+its ganesha-active attribute. This triggers the IPaddr (virt IP)
+failover according to constraint location rules.
+
diff --git a/doc/gluster.8 b/doc/gluster.8
index 87c81208175..ba595edca15 100644
--- a/doc/gluster.8
+++ b/doc/gluster.8
@@ -1,18 +1,11 @@
-.\" Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+
+.\" Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
-.\" GlusterFS is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published
-.\" by the Free Software Foundation; either version 3 of the License,
-.\" or (at your option) any later version.
-.\"
-.\" GlusterFS is distributed in the hope that it will be useful, but
-.\" WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-.\" General Public License for more details.
-.\"
-.\" You should have received a copy of the GNU General Public License
-.\" along with this program. If not, see " <http://www.gnu.org/licenses/>.
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
.\"
.\"
.TH Gluster 8 "Gluster command line utility" "07 March 2011" "Gluster Inc."
@@ -23,15 +16,14 @@ gluster - Gluster Console Manager (command line utility)
.PP
To run the program and display gluster prompt:
.PP
-.B gluster
+.B gluster [--remote-host=<gluster_node>] [--mode=script] [--xml]
.PP
(or)
.PP
To specify a command directly:
.PP
.B gluster
-.I [commands] [options]
-
+.I [commands] [options] [--remote-host=<gluster_node>] [--mode=script] [--xml]
.SH DESCRIPTION
The Gluster Console Manager is a command line utility for elastic volume management. You can run the gluster command on any export server. The command enables administrators to perform cloud operations, such as creating, expanding, shrinking, rebalancing, and migrating volumes without needing to schedule server downtime.
.SH COMMANDS
@@ -43,7 +35,13 @@ The Gluster Console Manager is a command line utility for elastic volume managem
\fB\ volume info [all|<VOLNAME>] \fR
Display information about all volumes, or the specified volume.
.TP
-\fB\ volume create <NEW-VOLNAME> [stripe <COUNT>] [replica <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ... \fR
+\fB\ volume list \fR
+List all volumes in cluster
+.TP
+\fB\ volume status [all | <VOLNAME> [nfs|shd|<BRICK>|quotad]] [detail|clients|mem|inode|fd|callpool|tasks|client-list] \fR
+Display status of all or specified volume(s)/brick
+.TP
+\fB\ volume create <NEW-VOLNAME> [stripe <COUNT>] [[replica <COUNT> [arbiter <COUNT>]]|[replica 2 thin-arbiter 1]] [disperse [<COUNT>]] [disperse-data <COUNT>] [redundancy <COUNT>] [transport <tcp|rdma|tcp,rdma>] <NEW-BRICK> ... <TA-BRICK> \fR
Create a new volume of the specified type using the specified bricks and transport type (the default transport type is tcp).
To create a volume with both transports (tcp and rdma), give 'transport tcp,rdma' as an option.
.TP
@@ -56,12 +54,21 @@ Start the specified volume.
\fB\ volume stop <VOLNAME> [force] \fR
Stop the specified volume.
.TP
-\fB\ volume rename <VOLNAME> <NEW-VOLNAME> \fR
-Rename the specified volume.
-.TP
\fB\ volume set <VOLNAME> <OPTION> <PARAMETER> [<OPTION> <PARAMETER>] ... \fR
Set the volume options.
.TP
+\fB\ volume get <VOLNAME/all> <OPTION/all> \fR
+Get the value of the all options or given option for volume <VOLNAME> or all option. gluster volume get all all is to get all global options
+.TP
+\fB\ volume reset <VOLNAME> [option] [force] \fR
+Reset all the reconfigured options
+.TP
+\fB\ volume barrier <VOLNAME> {enable|disable} \fR
+Barrier/unbarrier file operations on a volume
+.TP
+\fB\ volume clear-locks <VOLNAME> <path> kind {blocked|granted|all}{inode [range]|entry [basename]|posix [range]} \fR
+Clear locks held on path
+.TP
\fB\ volume help \fR
Display help for the volume command.
.SS "Brick Commands"
@@ -78,7 +85,13 @@ If you remove the brick, the data stored in that brick will not be available. Yo
.B replace-brick
option.
.TP
-\fB\ volume rebalance-brick <VOLNAME>(<BRICK> <NEW-BRICK>) start \fR
+\fB\ volume reset-brick <VOLNAME> <SOURCE-BRICK> {{start} | {<NEW-BRICK> commit}} \fR
+Brings down or replaces the specified source brick with the new brick.
+.TP
+\fB\ volume replace-brick <VOLNAME> <SOURCE-BRICK> <NEW-BRICK> commit force \fR
+Replace the specified source brick with a new brick.
+.TP
+\fB\ volume rebalance <VOLNAME> start \fR
Start rebalancing the specified volume.
.TP
\fB\ volume rebalance <VOLNAME> stop \fR
@@ -86,9 +99,6 @@ Stop rebalancing the specified volume.
.TP
\fB\ volume rebalance <VOLNAME> status \fR
Display the rebalance status of the specified volume.
-.TP
-\fB\ volume replace-brick <VOLNAME> (<BRICK> <NEW-BRICK>) start|pause|abort|status|commit \fR
-Replace the specified brick.
.SS "Log Commands"
.TP
\fB\ volume log filename <VOLNAME> [BRICK] <DIRECTORY> \fB
@@ -99,10 +109,22 @@ Locate the log file for corresponding volume/brick.
.TP
\fB\ volume log rotate <VOLNAME> [BRICK] \fB
Rotate the log file for corresponding volume/brick.
+.TP
+\fB\ volume profile <VOLNAME> {start|info [peek|incremental [peek]|cumulative|clear]|stop} [nfs] \fR
+Profile operations on the volume. Once started, volume profile <volname> info provides cumulative statistics of the FOPs performed.
+.TP
+\fB\ volume top <VOLNAME> {open|read|write|opendir|readdir|clear} [nfs|brick <brick>] [list-cnt <value>] | {read-perf|write-perf} [bs <size> count <count>] [brick <brick>] [list-cnt <value>] \fR
+Generates a profile of a volume representing the performance and bottlenecks/hotspots of each brick.
+.TP
+\fB\ volume statedump <VOLNAME> [[nfs|quotad] [all|mem|iobuf|callpool|priv|fd|inode|history]... | [client <hostname:process-id>]] \fR
+Dumps the in memory state of the specified process or the bricks of the volume.
+.TP
+\fB\ volume sync <HOSTNAME> [all|<VOLNAME>] \fR
+Sync the volume information from a peer
.SS "Peer Commands"
.TP
\fB\ peer probe <HOSTNAME> \fR
-Probe the specified peer.
+Probe the specified peer. In case the <HOSTNAME> given belongs to an already probed peer, the peer probe command will add the hostname to the peer if required.
.TP
\fB\ peer detach <HOSTNAME> \fR
Detach the specified peer.
@@ -110,10 +132,216 @@ Detach the specified peer.
\fB\ peer status \fR
Display the status of peers.
.TP
+\fB\ pool list \fR
+List all the nodes in the pool (including localhost)
+.TP
\fB\ peer help \fR
Display help for the peer command.
+.SS "Quota Commands"
+.TP
+\fB\ volume quota <VOLNAME> enable \fR
+Enable quota on the specified volume. This will cause all the directories in the filesystem hierarchy to be accounted and updated thereafter on each operation in the the filesystem. To kick start this accounting, a crawl is done over the hierarchy with an auxiliary client.
+.TP
+\fB\ volume quota <VOLNAME> disable \fR
+Disable quota on the volume. This will disable enforcement and accounting in the filesystem. Any configured limits will be lost.
+.TP
+\fB\ volume quota <VOLNAME> limit-usage <PATH> <SIZE> [<PERCENT>] \fR
+Set a usage limit on the given path. Any previously set limit is overridden to the new value. The soft limit can optionally be specified (as a percentage of hard limit). If soft limit percentage is not provided the default soft limit value for the volume is used to decide the soft limit.
+.TP
+\fB\ volume quota <VOLNAME> limit-objects <PATH> <SIZE> [<PERCENT>] \fR
+Set an inode limit on the given path. Any previously set limit is overridden to the new value. The soft limit can optionally be specified (as a percentage of hard limit). If soft limit percentage is not provided the default soft limit value for the volume is used to decide the soft limit.
+.TP
+NOTE: valid units of SIZE are : B, KB, MB, GB, TB, PB. If no unit is specified, the unit defaults to bytes.
+.TP
+\fB\ volume quota <VOLNAME> remove <PATH> \fR
+Remove any usage limit configured on the specified directory. Note that if any limit is configured on the ancestors of this directory (previous directories along the path), they will still be honored and enforced.
+.TP
+\fB\ volume quota <VOLNAME> remove-objects <PATH> \fR
+Remove any inode limit configured on the specified directory. Note that if any limit is configured on the ancestors of this directory (previous directories along the path), they will still be honored and enforced.
+.TP
+\fB\ volume quota <VOLNAME> list <PATH> \fR
+Lists the usage and limits configured on directory(s). If a path is given only the limit that has been configured on the directory(if any) is displayed along with the directory's usage. If no path is given, usage and limits are displayed for all directories that has limits configured.
+.TP
+\fB\ volume quota <VOLNAME> list-objects <PATH> \fR
+Lists the inode usage and inode limits configured on directory(s). If a path is given only the limit that has been configured on the directory(if any) is displayed along with the directory's inode usage. If no path is given, usage and limits are displayed for all directories that has limits configured.
+.TP
+\fB\ volume quota <VOLNAME> default-soft-limit <PERCENT> \fR
+Set the percentage value for default soft limit for the volume.
+.TP
+\fB\ volume quota <VOLNAME> soft-timeout <TIME> \fR
+Set the soft timeout for the volume. The interval in which limits are retested before the soft limit is breached.
+.TP
+\fB\ volume quota <VOLNAME> hard-timeout <TIME> \fR
+Set the hard timeout for the volume. The interval in which limits are retested after the soft limit is breached.
+.TP
+\fB\ volume quota <VOLNAME> alert-time <TIME> \fR
+Set the frequency in which warning messages need to be logged (in the brick logs) once soft limit is breached.
+.TP
+\fB\ volume inode-quota <VOLNAME> enable/disable \fR
+Enable/disable inode-quota for <VOLNAME>
+.TP
+\fB\ volume quota help \fR
+Display help for volume quota commands
+.TP
+NOTE: valid units of time and their symbols are : hours(h/hr), minutes(m/min), seconds(s/sec), weeks(w/wk), Days(d/days).
+.SS "Geo-replication Commands"
+.TP
+\fI\ Note\fR: password-less ssh, from the master node (where these commands are executed) to the slave node <SLAVE_HOST>, is a prerequisite for the geo-replication commands.
+.TP
+\fB\ system:: execute gsec_create\fR
+Generates pem keys which are required for push-pem
+.TP
+\fB\ volume geo-replication <MASTER_VOL> <SLAVE_HOST>::<SLAVE_VOL> create [[ssh-port n][[no-verify]|[push-pem]]] [force]\fR
+Create a new geo-replication session from <MASTER_VOL> to <SLAVE_HOST> host machine having <SLAVE_VOL>.
+Use ssh-port n if custom SSH port is configured in slave nodes.
+Use no-verify if the rsa-keys of nodes in master volume is distributed to slave nodes through an external agent.
+Use push-pem to push the keys automatically.
+.TP
+\fB\ volume geo-replication <MASTER_VOL> <SLAVE_HOST>::<SLAVE_VOL> {start|stop} [force] \fR
+Start/stop the geo-replication session from <MASTER_VOL> to <SLAVE_HOST> host machine having <SLAVE_VOL>.
+.TP
+\fB\ volume geo-replication [<MASTER_VOL> [<SLAVE_HOST>::<SLAVE_VOL>]] status [detail] \fR
+Query status of the geo-replication session from <MASTER_VOL> to <SLAVE_HOST> host machine having <SLAVE_VOL>.
+.TP
+\fB\ volume geo-replication <MASTER_VOL> <SLAVE_HOST>::<SLAVE_VOL> {pause|resume} [force] \fR
+Pause/resume the geo-replication session from <MASTER_VOL> to <SLAVE_HOST> host machine having <SLAVE_VOL>.
+.TP
+\fB\ volume geo-replication <MASTER_VOL> <SLAVE_HOST>::<SLAVE_VOL> delete [reset-sync-time]\fR
+Delete the geo-replication session from <MASTER_VOL> to <SLAVE_HOST> host machine having <SLAVE_VOL>.
+Optionally you can also reset the sync time in case you need to resync the entire volume on session recreate.
+.TP
+\fB\ volume geo-replication <MASTER_VOL> <SLAVE_HOST>::<SLAVE_VOL> config [[!]<options> [<value>]] \fR
+View (when no option provided) or set configuration for this geo-replication session.
+Use "!<OPTION>" to reset option <OPTION> to default value.
+.SS "Bitrot Commands"
+.TP
+\fB\ volume bitrot <VOLNAME> {enable|disable} \fR
+Enable/disable bitrot for volume <VOLNAME>
+.TP
+\fB\ volume bitrot <VOLNAME> signing-time <time-in-secs> \fR
+Waiting time for an object after last fd is closed to start signing process.
+.TP
+\fB\ volume bitrot <VOLNAME> signer-threads <count> \fR
+Number of signing process threads. Usually set to number of available cores.
+.TP
+\fB\ volume bitrot <VOLNAME> scrub-throttle {lazy|normal|aggressive} \fR
+Scrub-throttle value is a measure of how fast or slow the scrubber scrubs the filesystem for volume <VOLNAME>
+.TP
+\fB\ volume bitrot <VOLNAME> scrub-frequency {hourly|daily|weekly|biweekly|monthly} \fR
+Scrub frequency for volume <VOLNAME>
+.TP
+\fB\ volume bitrot <VOLNAME> scrub {pause|resume|status|ondemand} \fR
+Pause/Resume scrub. Upon resume, scrubber continues where it left off. status option shows the statistics of scrubber. ondemand option starts the scrubbing immediately if the scrubber is not paused or already running.
+.TP
+\fB\ volume bitrot help \fR
+Display help for volume bitrot commands
+.TP
+.SS "Snapshot Commands"
+.TP
+\fB\ snapshot create <snapname> <volname> [no-timestamp] [description <description>] [force] \fR
+Creates a snapshot of a GlusterFS volume. User can provide a snap-name and a description to identify the snap. Snap will be created by appending timestamp in GMT. User can override this behaviour using "no-timestamp" option. The description cannot be more than 1024 characters. To be able to take a snapshot, volume should be present and it should be in started state.
+.TP
+\fB\ snapshot restore <snapname> \fR
+Restores an already taken snapshot of a GlusterFS volume. Snapshot restore is an offline activity therefore if the volume is online (in started state) then the restore operation will fail. Once the snapshot is restored it will not be available in the list of snapshots.
+.TP
+\fB\ snapshot clone <clonename> <snapname> \fR
+Create a clone of a snapshot volume, the resulting volume will be GlusterFS volume. User can provide a clone-name. To be able to take a clone, snapshot should be present and it should be in activated state.
+.TP
+\fB\ snapshot delete ( all | <snapname> | volume <volname> ) \fR
+If snapname is specified then mentioned snapshot is deleted. If volname is specified then all snapshots belonging to that particular volume is deleted. If keyword *all* is used then all snapshots belonging to the system is deleted.
+.TP
+\fB\ snapshot list [volname] \fR
+Lists all snapshots taken. If volname is provided, then only the snapshots belonging to that particular volume is listed.
+.TP
+\fB\ snapshot info [snapname | (volume <volname>)] \fR
+This command gives information such as snapshot name, snapshot UUID, time at which snapshot was created, and it lists down the snap-volume-name, number of snapshots already taken and number of snapshots still available for that particular volume, and the state of the snapshot. If snapname is specified then info of the mentioned snapshot is displayed. If volname is specified then info of all snapshots belonging to that volume is displayed. If both snapname and volname is not specified then info of all the snapshots present in the system are displayed.
+.TP
+\fB\ snapshot status [snapname | (volume <volname>)] \fR
+This command gives status of the snapshot. The details included are snapshot brick path, volume group(LVM details), status of the snapshot bricks, PID of the bricks, data percentage filled for that particular volume group to which the snapshots belong to, and total size of the logical volume.
+
+If snapname is specified then status of the mentioned snapshot is displayed. If volname is specified then status of all snapshots belonging to that volume is displayed. If both snapname and volname is not specified then status of all the snapshots present in the system are displayed.
+.TP
+\fB\ snapshot config [volname] ([snap-max-hard-limit <count>] [snap-max-soft-limit <percent>]) | ([auto-delete <enable|disable>]) | ([activate-on-create <enable|disable>])
+Displays and sets the snapshot config values.
+
+snapshot config without any keywords displays the snapshot config values of all volumes in the system. If volname is provided, then the snapshot config values of that volume is displayed.
+
+Snapshot config command along with keywords can be used to change the existing config values. If volname is provided then config value of that volume is changed, else it will set/change the system limit.
+
+snap-max-soft-limit and auto-delete are global options, that will be inherited by all volumes in the system and cannot be set to individual volumes.
+
+snap-max-hard-limit can be set globally, as well as per volume. The lowest limit between the global system limit and the volume specific limit, becomes the
+"Effective snap-max-hard-limit" for a volume.
+
+snap-max-soft-limit is a percentage value, which is applied on the "Effective snap-max-hard-limit" to get the "Effective snap-max-soft-limit".
+
+When auto-delete feature is enabled, then upon reaching the "Effective snap-max-soft-limit", with every successful snapshot creation, the oldest snapshot will be deleted.
+
+When auto-delete feature is disabled, then upon reaching the "Effective snap-max-soft-limit", the user gets a warning with every successful snapshot creation.
+
+When auto-delete feature is disabled, then upon reaching the "Effective snap-max-hard-limit", further snapshot creations will not be allowed.
+
+activate-on-create is disabled by default. If you enable activate-on-create, then further snapshot will be activated during the time of snapshot creation.
+.TP
+\fB\ snapshot activate <snapname> \fR
+Activates the mentioned snapshot.
+
+Note : By default the snapshot is activated during snapshot creation.
+.TP
+\fB\ snapshot deactivate <snapname> \fR
+Deactivates the mentioned snapshot.
+.TP
+\fB\ snapshot help \fR
+Display help for the snapshot commands.
+.SS "Self-heal Commands"
+.TP
+\fB\ volume heal <VOLNAME>\fR
+Triggers index self heal for the files that need healing.
+
+.TP
+\fB\ volume heal <VOLNAME> [enable | disable]\fR
+Enable/disable self-heal-daemon for volume <VOLNAME>.
+
+.TP
+\fB\ volume heal <VOLNAME> full\fR
+Triggers self heal on all the files.
+
+.TP
+\fB\ volume heal <VOLNAME> info \fR
+Lists the files that need healing.
+
+.TP
+\fB\ volume heal <VOLNAME> info split-brain \fR
+Lists the files which are in split-brain state.
+
+.TP
+\fB\ volume heal <VOLNAME> statistics \fR
+Lists the crawl statistics.
+
+.TP
+\fB\ volume heal <VOLNAME> statistics heal-count \fR
+Displays the count of files to be healed.
+
+.TP
+\fB\ volume heal <VOLNAME> statistics heal-count replica <HOSTNAME:BRICKNAME> \fR
+Displays the number of files to be healed from a particular replica subvolume to which the brick <HOSTNAME:BRICKNAME> belongs.
+
+.TP
+\fB\ volume heal <VOLNAME> split-brain bigger-file <FILE> \fR
+Performs healing of <FILE> which is in split-brain by choosing the bigger file in the replica as source.
+
+.TP
+\fB\ volume heal <VOLNAME> split-brain source-brick <HOSTNAME:BRICKNAME> \fR
+Selects <HOSTNAME:BRICKNAME> as the source for all the files that are in split-brain in that replica and heals them.
+
+.TP
+\fB\ volume heal <VOLNAME> split-brain source-brick <HOSTNAME:BRICKNAME> <FILE> \fR
+Selects the split-brained <FILE> present in <HOSTNAME:BRICKNAME> as source and completes heal.
.SS "Other Commands"
.TP
+\fB\ get-state [<daemon>] [[odir </path/to/output/dir/>] [file <filename>]] [detail|volumeoptions] \fR
+Get local state representation of mentioned daemon and store data in provided path information
+.TP
\fB\ help \fR
Display the command options.
.TP
@@ -121,7 +349,7 @@ Display the command options.
Exit the gluster command line interface.
.SH FILES
-/etc/glusterd/*
+/var/lib/glusterd/*
.SH SEE ALSO
.nf
\fBfusermount\fR(1), \fBmount.glusterfs\fR(8), \fBglusterfs\fR(8), \fBglusterd\fR(8)
diff --git a/doc/glusterd.8 b/doc/glusterd.8
index 267a7e00f48..e3768c78761 100644
--- a/doc/glusterd.8
+++ b/doc/glusterd.8
@@ -1,20 +1,11 @@
.\"
-.\" Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+.\" Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
-.\" GlusterFS is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published
-.\" by the Free Software Foundation; either version 3 of the License,
-.\" or (at your option) any later version.
-.\"
-.\" GlusterFS is distributed in the hope that it will be useful, but
-.\" WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-.\" General Public License for more details.
-.\"
-.\" You should have received a copy of the GNU General Public License
-.\" along with this program. If not, see
-.\" <http://www.gnu.org/licenses/>.
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
.\"
.\"
@@ -39,6 +30,9 @@ File to use for logging.
\fB\-L <LOGLEVEL>, \fB\-\-log\-level=<LOGLEVEL>\fR
Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is INFO).
.TP
+\fB\-\-localtime\-logging\fR
+Enable localtime log timestamps.
+.TP
\fB\-\-debug\fR
Run the program in debug mode. This option sets \fB\-\-no\-daemon\fR, \fB\-\-log\-level\fR to DEBUG
and \fB\-\-log\-file\fR to console.
@@ -59,7 +53,7 @@ Print the program version.
.PP
.SH FILES
-/etc/glusterd/*
+/var/lib/glusterd/*
.SH SEE ALSO
.nf
diff --git a/doc/glusterd.vol b/doc/glusterd.vol
deleted file mode 100644
index de17d8fd8f9..00000000000
--- a/doc/glusterd.vol
+++ /dev/null
@@ -1,8 +0,0 @@
-volume management
- type mgmt/glusterd
- option working-directory /var/lib/glusterd
- option transport-type socket,rdma
- option transport.socket.keepalive-time 10
- option transport.socket.keepalive-interval 2
- option transport.socket.read-fail-log off
-end-volume
diff --git a/doc/glusterfs.8 b/doc/glusterfs.8
index 37bf67d1821..3d359ea85e4 100644
--- a/doc/glusterfs.8
+++ b/doc/glusterfs.8
@@ -1,19 +1,10 @@
-.\" Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+.\" Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
-.\" GlusterFS is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published
-.\" by the Free Software Foundation; either version 3 of the License,
-.\" or (at your option) any later version.
-.\"
-.\" GlusterFS is distributed in the hope that it will be useful, but
-.\" WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-.\" General Public License for more details.
-.\"
-.\" You should have received a copy of the GNU General Public License
-.\" long with this program. If not, see
-.\" <http://www.gnu.org/licenses/>.
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
.\"
.\"
.\"
@@ -32,7 +23,8 @@ be made of any commodity hardware, such as x86-64 server with SATA-II RAID and
Infiniband HBA.
GlusterFS is fully POSIX compliant file system. On client side, it has dependency
-on FUSE package, on server side, it works seemlessly on different operating systems. Currently supported on GNU/Linux and Solaris.
+on FUSE package, on server side, it works seemlessly on different operating systems.
+Currently supported on GNU/Linux and Solaris.
.SH OPTIONS
@@ -40,33 +32,58 @@ on FUSE package, on server side, it works seemlessly on different operating syst
.PP
.TP
\fB\-f, \fB\-\-volfile=VOLUME-FILE\fR
-File to use as VOLUME-FILE (the default is /etc/glusterfs/glusterfs.vol).
+File to use as VOLUME-FILE.
.TP
\fB\-l, \fB\-\-log\-file=LOGFILE\fR
-File to use for logging.
+File to use for logging (the default is <INSTALL-DIR>/var/log/glusterfs/<MOUNT-POINT>.log).
.TP
\fB\-L, \fB\-\-log\-level=LOGLEVEL\fR
-Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is WARNING).
+Logging severity. Valid options are TRACE, DEBUG, INFO, WARNING, ERROR and CRITICAL (the default is INFO).
.TP
\fB\-s, \fB\-\-volfile\-server=SERVER\fR
Server to get the volume from. This option overrides \fB\-\-volfile \fR option.
+.TP
+\fB\-\-volfile\-max\-fetch\-attempts=MAX\-ATTEMPTS\fR
+Maximum number of connect attempts to server. This option should be provided with
+\fB\-\-volfile\-server\fR option (the default is 1).
.SS "Advanced options"
.PP
.TP
+\fB\-\-acl\fR
+Mount the filesystem with POSIX ACL support.
+.TP
+\fB\-\-localtime\-logging\fR
+Enable localtime log timestamps.
+.TP
\fB\-\-debug\fR
Run in debug mode. This option sets \fB\-\-no\-daemon\fR, \fB\-\-log\-level\fR to DEBUG,
and \fB\-\-log\-file\fR to console.
.TP
+\fB\-\-enable\-ino32=BOOL\fR
+Use 32-bit inodes when mounting to workaround application that doesn't support 64-bit inodes.
+.TP
+\fB\-\-fopen\-keep\-cache[=BOOL]\fR
+Do not purge the cache on file open (default: false).
+.TP
+\fB\-\-mac\-compat=BOOL\fR
+Provide stubs for attributes needed for seamless operation on Macs (the default is off).
+.TP
\fB\-N, \fB\-\-no\-daemon\fR
Run in the foreground.
.TP
-\fB\-\-read\-only\fR
-Make the file system read-only.
-.TP
\fB\-p, \fB\-\-pid\-file=PIDFILE\fR
File to use as PID file.
.TP
+\fB\-\-read\-only\fR
+Mount the file system in 'read-only' mode.
+.TP
+\fB\-\-selinux\fR
+Enable SELinux label (extended attributes) support on inodes.
+.TP
+\fB\-S, \fB\-\-socket\-file=SOCKFILE\fR
+File to use as unix-socket.
+.TP
\fB\-\-volfile\-id=KEY\fR
Key of the volume file to be fetched from the server.
.TP
@@ -74,26 +91,61 @@ Key of the volume file to be fetched from the server.
Port number of volfile server.
.TP
\fB\-\-volfile\-server\-transport=TRANSPORT\fR
-Transport type to get volume file from server (the default is socket).
+Transport type to get volume file from server (the default is tcp).
.TP
\fB\-\-volume\-name=VOLUME\-NAME\fR
Volume name to be used for MOUNT-POINT (the default is top most volume in VOLUME-FILE).
.TP
+\fB\-\-worm\fR
+Mount the filesystem in 'worm' mode.
+.TP
\fB\-\-xlator\-option=VOLUME\-NAME.OPTION=VALUE\fR
Add/Override a translator option for a volume with the specified value.
+.TP
+\fB\-\-subdir\-mount=SUBDIR\-MOUNT\-PATH\fR
+Mount subdirectory instead of the '/' of volume.
.SS "Fuse options"
.PP
.TP
+\fB\-\-attr\-times\-granularity=NANOSECONDS\fR
+Declare supported granularity of file attribute times (default is 0 which kernel handles as unspecified; valid real values are between 1 and 1000000000).
+.TP
\fB\-\-attribute\-timeout=SECONDS\fR
Set attribute timeout to SECONDS for inodes in fuse kernel module (the default is 1).
.TP
+\fB\-\-background\-qlen=N\fR
+Set fuse module's background queue length to N (the default is 64).
+.TP
+\fB\-\-congestion\-threshold=N\fR
+Set fuse module's congestion threshold to N (the default is 48).
+.TP
+\fB\-\-direct\-io\-mode=BOOL|auto\fR
+Specify fuse direct I/O strategy (the default is auto).
+.TP
+\fB\-\-dump-fuse=PATH\f\R
+Dump fuse traffic to PATH
+.TP
\fB\-\-entry\-timeout=SECONDS\fR
Set entry timeout to SECONDS in fuse kernel module (the default is 1).
.TP
-\fB\-\-direct\-io\-mode=BOOL\fR
-Enable/Disable the direct-I/O mode in fuse module (the default is enable).
+\fB\-\-gid\-timeout=SECONDS\fR
+Set auxiliary group list timeout to SECONDS for fuse translator (the default is 0).
+.TP
+\fB\-\-kernel-writeback-cache=BOOL\fR
+Enable fuse in-kernel writeback cache.
+.TP
+\fB\-\-negative\-timeout=SECONDS\fR
+Set negative timeout to SECONDS in fuse kernel module (the default is 0).
+.TP
+\fB\-\-auto\-invalidation=BOOL\fR
+controls whether fuse-kernel can auto-invalidate attribute, dentry and
+page-cache. Disable this only if same files/directories are not
+accessed across two different mounts concurrently [default: on].
+.TP
+\fB\-\-volfile-check\fR
+Enable strict volume file checking.
.SS "Miscellaneous Options"
.PP
@@ -110,7 +162,7 @@ Print the program version.
.PP
.SH FILES
-/etc/glusterfs/*.vol, /etc/glusterd/vols/*/*.vol
+/var/lib/glusterd/vols/*/*.vol
.SH EXAMPLES
mount a volume named foo on server bar with log level DEBUG on mount point
/mnt/foo
@@ -124,6 +176,6 @@ mount a volume named foo on server bar with log level DEBUG on mount point
.fi
.SH COPYRIGHT
.nf
-Copyright(c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+Copyright(c) 2006-2011 Red Hat, Inc. <http://www.redhat.com>
\fR
.fi
diff --git a/doc/glusterfs.vol.sample b/doc/glusterfs.vol.sample
deleted file mode 100644
index 977363b921b..00000000000
--- a/doc/glusterfs.vol.sample
+++ /dev/null
@@ -1,53 +0,0 @@
-### file: client-volume.vol.sample
-
-#####################################
-### GlusterFS Client Volume File ##
-#####################################
-
-#### CONFIG FILE RULES:
-### "#" is comment character.
-### - Config file is case sensitive
-### - Options within a volume block can be in any order.
-### - Spaces or tabs are used as delimitter within a line.
-### - Each option should end within a line.
-### - Missing or commented fields will assume default values.
-### - Blank/commented lines are allowed.
-### - Sub-volumes should already be defined above before referring.
-
-### Add client feature and attach to remote subvolume
-volume client
- type protocol/client
- option transport-type tcp
-# option transport-type unix
-# option transport-type ib-sdp
- option remote-host 127.0.0.1 # IP address of the remote brick
-# option transport.socket.remote-port 24016
-
-# option transport-type rdma
-# option transport.rdma.remote-port 24016
-# option transport.rdma.work-request-send-count 16
-# option transport.rdma.work-request-recv-count 16
-
- option remote-subvolume brick # name of the remote volume
-end-volume
-
-### Add readahead feature
-#volume readahead
-# type performance/read-ahead
-# option page-count 2 # cache per file = (page-count x page-size)
-# subvolumes client
-#end-volume
-
-### Add IO-Cache feature
-#volume iocache
-# type performance/io-cache
-# subvolumes readahead
-#end-volume
-
-### Add writeback feature
-#volume writeback
-# type performance/write-behind
-# option window-size 2MB
-# option flush-behind off
-# subvolumes iocache
-#end-volume
diff --git a/doc/glusterfsd.8 b/doc/glusterfsd.8
index 17d053a5c07..bc1de2a8c80 100644
--- a/doc/glusterfsd.8
+++ b/doc/glusterfsd.8
@@ -1,19 +1,10 @@
-.\" Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+.\" Copyright (c) 20088888888-2012 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
-.\" GlusterFS is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published
-.\" by the Free Software Foundation; either version 3 of the License,
-.\" or (at your option) any later version.
-.\"
-.\" GlusterFS is distributed in the hope that it will be useful, but
-.\" WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-.\" General Public License for more details.
-.\"
-.\" You should have received a copy of the GNU General Public License
-.\" long with this program. If not, see
-.\" <http://www.gnu.org/licenses/>.
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
.\"
.\"
.\"
@@ -60,6 +51,9 @@ Server to get the volume from. This option overrides \fB\-\-volfile option
.PP
.TP
+\fB\-\-localtime\-logging\fR
+Enable localtime log timestamps.
+.TP
\fB\-\-debug\fR
Run in debug mode. This option sets \fB\-\-no\-daemon\fR, \fB\-\-log\-level\fR to DEBUG
and \fB\-\-log\-file\fR to console
@@ -89,7 +83,7 @@ KEY of the volume file to be fetched from server
Port number of volfile server
.TP
\fB\-\-volfile\-server\-transport=TRANSPORT\fR
-Transport type to get volume file from server [default: socket]
+Transport type to get volume file from server [default: tcp]
.TP
\fB\-\-volume\-name=VOLUME\-NAME\fR
Volume name to be used for MOUNT-POINT [default: top most volume in
@@ -110,6 +104,14 @@ Set entry timeout to SECONDS in fuse kernel module [default: 1]
.TP
\fB\-\-direct\-io\-mode=BOOL\fR
Enable/Disable direct-io mode in fuse module [default: enable]
+.TP
+\fB\-\-resolve-gids\fR
+Resolve all auxiliary groups in fuse translator (max 32 otherwise)
+.TP
+\fB\-\-auto\-invalidation=BOOL\fR
+controls whether fuse-kernel can auto-invalidate attribute, dentry and
+page-cache. Disable this only if same files/directories are not
+accessed across two different mounts concurrently [default: on]
.SS "Miscellaneous Options"
.PP
@@ -131,7 +133,7 @@ Print program version
.SH EXAMPLES
Start a GlusterFS server on localhost with volume name foo
-glusterfsd \-s localhost \-\-volfile\-id foo.server.media-disk\-1 \-p /etc/glusterd/vols/foo/run/server\-media\-disk\-1.pid \-S /tmp/<uniqueid>.socket \-\-brick-name /media/disk\-1 \-l /var/log/glusterfs/bricks/media\-disk\-1.log \-\-brick\-port 24009 \-\-xlator\-option foo\-server.listen-port=24009
+glusterfsd \-s localhost \-\-volfile\-id foo.server.media-disk\-1 \-p /var/lib/glusterd/vols/foo/run/server\-media\-disk\-1.pid \-S /tmp/<uniqueid>.socket \-\-brick-name /media/disk\-1 \-l /var/log/glusterfs/bricks/media\-disk\-1.log \-\-brick\-port 24009 \-\-xlator\-option foo\-server.listen-port=24009
.SH SEE ALSO
.nf
diff --git a/doc/glusterfsd.vol.sample b/doc/glusterfsd.vol.sample
deleted file mode 100644
index ec2fd341ef0..00000000000
--- a/doc/glusterfsd.vol.sample
+++ /dev/null
@@ -1,44 +0,0 @@
-### file: server-volume.vol.sample
-
-#####################################
-### GlusterFS Server Volume File ##
-#####################################
-
-#### CONFIG FILE RULES:
-### "#" is comment character.
-### - Config file is case sensitive
-### - Options within a volume block can be in any order.
-### - Spaces or tabs are used as delimitter within a line.
-### - Multiple values to options will be : delimited.
-### - Each option should end within a line.
-### - Missing or commented fields will assume default values.
-### - Blank/commented lines are allowed.
-### - Sub-volumes should already be defined above before referring.
-
-### Export volume "brick" with the contents of "/home/export" directory.
-volume brick
- type storage/posix # POSIX FS translator
- option directory /home/export # Export this directory
-end-volume
-
-### Add network serving capability to above brick.
-volume server
- type protocol/server
- option transport-type tcp
-# option transport-type unix
-# option transport-type ib-sdp
-# option transport.socket.bind-address 192.168.1.10 # Default is to listen
- # on all interfaces
-# option transport.socket.listen-port 24016
-
-# option transport-type rdma
-# option transport.rdma.listen-port 24016
-# option transport.rdma.work-request-send-count 64
-# option transport.rdma.work-request-recv-count 64
-
- subvolumes brick
-# NOTE: Access to any volume through protocol/server is denied by
-# default. You need to explicitly grant access through # "auth"
-# option.
- option auth.addr.brick.allow * # Allow access to "brick" volume
-end-volume
diff --git a/doc/legacy/authentication.txt b/doc/legacy/authentication.txt
deleted file mode 100644
index 73cb21d73ba..00000000000
--- a/doc/legacy/authentication.txt
+++ /dev/null
@@ -1,112 +0,0 @@
-
-* Authentication is provided by two modules addr and login. Login based authentication uses username/password from client for authentication. Each module returns either ACCEPT, REJCET or DONT_CARE. DONT_CARE is returned if the input authentication information to the module is not concerned to its working. The theory behind authentication is that "none of the auth modules should return REJECT and atleast one of them should return ACCEPT"
-
-* Currently all the authentication related information is passed un-encrypted over the network from client to server.
-
-----------------------------------------------------------------------------------------------------
-* options provided in protocol/client:
- * for username/password based authentication:
- option username <username>
- option password <password>
- * client can have only one set of username/password
- * for addr based authentication:
- * no options required in protocol/client. Client has to bind to privileged port (port < 1024 ) which means the process in which protocol/client is loaded has to be run as root.
-
-----------------------------------------------------------------------------------------------------
-* options provided in protocol/server:
- * for username/password based authentication:
- option auth.login.<brick>.allow [comma seperated list of usernames using which clients can connect to volume <brick>]
- option auth.login.<username>.password <password> #specify password <password> for username <username>
- * for addr based authentication:
- option auth.addr.<brick>.allow [comma seperated list of ip-addresses/unix-paths from which clients are allowed to connect to volume <brick>]
- option auth.addr.<brick>.reject [comma seperated list of ip-addresses/unix-paths from which clients are not allowed to connect to volume <brick>]
- * negation operator '!' is used to invert the sense of matching.
- Eg., option auth.addr.brick.allow !a.b.c.d #do not allow client from a.b.c.d to connect to volume brick
- option auth.addr.brick.reject !w.x.y.z #allow client from w.x.y.z to connect to volume brick
- * wildcard '*' can be used to match any ip-address/unix-path
-
-----------------------------------------------------------------------------------------------------
-
-* Usecases:
-
-* username/password based authentication only
- protocol/client:
- option username foo
- option password foo-password
- option remote-subvolume foo-brick
-
- protocol/server:
- option auth.login.foo-brick.allow foo,who #,other users allowed to connect to foo-brick
- option auth.login.foo.password foo-password
- option auth.login.who.password who-password
-
- * in protocol/server, dont specify ip from which client is connecting in auth.addr.foo-brick.reject list
-
-****************************************************************************************************
-
-* ip based authentication only
- protocol/client:
- option remote-subvolume foo-brick
- * Client is connecting from a.b.c.d
-
- protocol/server:
- option auth.addr.foo-brick.allow a.b.c.d,e.f.g.h,i.j.k.l #, other ip addresses from which clients are allowed to connect to foo-brick
-
-****************************************************************************************************
-* ip and username/password based authentication
- * allow only "user foo from a.b.c.d"
- protocol/client:
- option username foo
- option password foo-password
- option remote-subvolume foo-brick
-
- protocol/server:
- option auth.login.foo-brick.allow foo
- option auth.login.foo.password foo-password
- option auth.addr.foo-brick.reject !a.b.c.d
-
- * allow only "user foo" from a.b.c.d i.e., only user foo is allowed from a.b.c.d, but anyone is allowed from ip addresses other than a.b.c.d
- protocol/client:
- option username foo
- option password foo-password
- option remote-subvolume foo-brick
-
- protocol/server:
- option auth.login.foo-brick.allow foo
- option auth.login.foo.password foo-password
- option auth.addr.foo-brick.allow !a.b.c.d
-
- * reject only "user shoo from a.b.c.d"
- protcol/client:
- option remote-subvolume shoo-brick
-
- protocol/server:
- # observe that no "option auth.login.shoo-brick.allow shoo" given
- # Also other users from a.b.c.d have to be explicitly allowed using auth.login.shoo-brick.allow ...
- option auth.addr.shoo-brick.allow !a.b.c.d
-
- * reject only "user shoo" from a.b.c.d i.e., user shoo from a.b.c.d has to be rejected.
- * same as reject only "user shoo from a.b.c.d" above, but rules have to be added whether to allow ip addresses (and users from those ips) other than a.b.c.d
-
-****************************************************************************************************
-
-* ip or username/password based authentication
-
- * allow user foo or clients from a.b.c.d
- protocol/client:
- option remote-subvolume foo-brick
-
- protocol/server:
- option auth.login.foo-brick.allow foo
- option auth.login.foo.password foo-password
- option auth.addr.foo-brick.allow a.b.c.d
-
- * reject user shoo or clients from a.b.c.d
- protocol/client:
- option remote-subvolume shoo-brick
-
- protocol/server:
- option auth.login.shoo-brick.allow <usernames other than shoo>
- #for each username mentioned in the above <usernames other than shoo> list, specify password as below
- option auth.login.<username other than shoo>.password password
- option auth.addr.shoo-brick.reject a.b.c.d
diff --git a/doc/legacy/booster.txt b/doc/legacy/booster.txt
deleted file mode 100644
index 051401a28fc..00000000000
--- a/doc/legacy/booster.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-Introduction
-============
-* booster is a LD_PRELOADable library which boosts read/write performance by bypassing fuse for
- read() and write() calls.
-
-Requirements
-============
-* fetch volfile from glusterfs.
-* identify whether multiple files are from the same mount point. If so, use only one context.
-
-Design
-======
-* for a getxattr, along with other attributes, fuse returns following attributes.
- * contents of client volume-file.
- * mount point.
-
-* LD_PRELOADed booster.so maintains an hash table storing mount-points and libglusterfsclient handles
- so that handles are reused for files from same mount point.
-
-* it also maintains a fdtable. fdtable maps the fd (integer) returned to application to fd (pointer to fd struct)
- used by libglusterfsclient. application is returned the same fd as the one returned from libc apis.
-
-* During fork, these tables are overwritten to enable creation of fresh glusterfs context in child.
-
-Working
-=======
-* application willing to use booster LD_PRELOADs booster.so which is a wrapper library implementing
- open, read and write.
-
-* application should specify the path to logfile through the environment variable GLFS_BOOSTER_LOGFILE. If
- not specified, logging is done to /dev/stderr.
-
-* open call does,
- * real_open on the file.
- * fgetxattr(fd).
- * store the volume-file content got in the dictionary to a temporary file.
- * look in the hashtable for the mount-point, if already present get the libglusterfsclient handle from the
- hashtable. Otherwise get a new handle from libglusterfsclient (be careful about mount point not present in
- the hashtable and multiple glusterfs_inits running simultaneously for the same mount-point there by using
- multiple handles for the same mount point).
- * real_close (fd).
- * delete temporary volume-volfile.
- * glusterfs_open (handle, path, mode).
- * store the fd returned by glusterfs_open in the fdtable at the same index as the fd returned by real_open.
- * return the index as fd.
-
-* read/write calls do,
- * get the libglusterfsclient fd from fdtable.
- * if found use glusterfs_read/glusterfs_write, else use real_read/real_write.
-
-* close call does,
- * remove the fd from the fdtable.
-
-* other calls use real_calls.
diff --git a/doc/legacy/coding-standard.pdf b/doc/legacy/coding-standard.pdf
deleted file mode 100644
index bc9cb56202c..00000000000
--- a/doc/legacy/coding-standard.pdf
+++ /dev/null
Binary files differ
diff --git a/doc/legacy/coding-standard.tex b/doc/legacy/coding-standard.tex
deleted file mode 100644
index abaedb69c3c..00000000000
--- a/doc/legacy/coding-standard.tex
+++ /dev/null
@@ -1,385 +0,0 @@
-\documentclass{article}[12pt]
-\usepackage{color}
-
-\begin{document}
-
-
-\hrule
-\begin{center}\textbf{\Large{GlusterFS Coding Standards}}\end{center}
-\begin{center}\textbf{\large{\textcolor{red}{Z} Research}}\end{center}
-\begin{center}{July 14, 2008}\end{center}
-\hrule
-
-\vspace{8ex}
-
-\section*{$\bullet$ Structure definitions should have a comment per member}
-
-Every member in a structure definition must have a comment about its
-purpose. The comment should be descriptive without being overly verbose.
-
-\vspace{2ex}
-\textsl{Bad}:
-
-\begin{verbatim}
- gf_lock_t lock; /* lock */
-\end{verbatim}
-
-\textsl{Good}:
-
-\begin{verbatim}
- DBTYPE access_mode; /* access mode for accessing
- * the databases, can be
- * DB_HASH, DB_BTREE
- * (option access-mode <mode>)
- */
-\end{verbatim}
-
-\section*{$\bullet$ Declare all variables at the beginning of the function}
-All local variables in a function must be declared immediately after the
-opening brace. This makes it easy to keep track of memory that needs to be freed
-during exit. It also helps debugging, since gdb cannot handle variables
-declared inside loops or other such blocks.
-
-\section*{$\bullet$ Always initialize local variables}
-Every local variable should be initialized to a sensible default value
-at the point of its declaration. All pointers should be initialized to NULL,
-and all integers should be zero or (if it makes sense) an error value.
-
-\vspace{2ex}
-
-\textsl{Good}:
-
-\begin{verbatim}
- int ret = 0;
- char *databuf = NULL;
- int _fd = -1;
-\end{verbatim}
-
-\section*{$\bullet$ Initialization should always be done with a constant value}
-Never use a non-constant expression as the initialization value for a variable.
-
-\vspace{2ex}
-
-\textsl{Bad}:
-
-\begin{verbatim}
- pid_t pid = frame->root->pid;
- char *databuf = malloc (1024);
-\end{verbatim}
-
-\section*{$\bullet$ Validate all arguments to a function}
-All pointer arguments to a function must be checked for \texttt{NULL}.
-A macro named \texttt{VALIDATE} (in \texttt{common-utils.h})
-takes one argument, and if it is \texttt{NULL}, writes a log message and
-jumps to a label called \texttt{err} after setting op\_ret and op\_errno
-appropriately. It is recommended to use this template.
-
-\vspace{2ex}
-
-\textsl{Good}:
-
-\begin{verbatim}
- VALIDATE(frame);
- VALIDATE(this);
- VALIDATE(inode);
-\end{verbatim}
-
-\section*{$\bullet$ Never rely on precedence of operators}
-Never write code that relies on the precedence of operators to execute
-correctly. Such code can be hard to read and someone else might not
-know the precedence of operators as accurately as you do.
-\vspace{2ex}
-
-\textsl{Bad}:
-
-\begin{verbatim}
- if (op_ret == -1 && errno != ENOENT)
-\end{verbatim}
-
-\textsl{Good}:
-
-\begin{verbatim}
- if ((op_ret == -1) && (errno != ENOENT))
-\end{verbatim}
-
-\section*{$\bullet$ Use exactly matching types}
-Use a variable of the exact type declared in the manual to hold the
-return value of a function. Do not use an ``equivalent'' type.
-
-\vspace{2ex}
-
-\textsl{Bad}:
-
-\begin{verbatim}
- int len = strlen (path);
-\end{verbatim}
-
-\textsl{Good}:
-
-\begin{verbatim}
- size_t len = strlen (path);
-\end{verbatim}
-
-\section*{$\bullet$ Never write code such as \texttt{foo->bar->baz}; check every pointer}
-Do not write code that blindly follows a chain of pointer
-references. Any pointer in the chain may be \texttt{NULL} and thus
-cause a crash. Verify that each pointer is non-null before following
-it.
-
-\section*{$\bullet$ Check return value of all functions and system calls}
-The return value of all system calls and API functions must be checked
-for success or failure.
-
-\vspace{2ex}
-\textsl{Bad}:
-
-\begin{verbatim}
- close (fd);
-\end{verbatim}
-
-\textsl{Good}:
-
-\begin{verbatim}
- op_ret = close (_fd);
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "close on file %s failed (%s)", real_path,
- strerror (errno));
- op_errno = errno;
- goto out;
- }
-\end{verbatim}
-
-
-\section*{$\bullet$ Gracefully handle failure of malloc}
-GlusterFS should never crash or exit due to lack of memory. If a
-memory allocation fails, the call should be unwound and an error
-returned to the user.
-
-\section*{$\bullet$ Use result args and reserve the return value to indicate success or failure}
-The return value of every functions must indicate success or failure (unless
-it is impossible for the function to fail --- e.g., boolean functions). If
-the function needs to return additional data, it must be returned using a
-result (pointer) argument.
-
-\vspace{2ex}
-\textsl{Bad}:
-
-\begin{verbatim}
- int32_t dict_get_int32 (dict_t *this, char *key);
-\end{verbatim}
-
-\textsl{Good}:
-
-\begin{verbatim}
- int dict_get_int32 (dict_t *this, char *key, int32_t *val);
-\end{verbatim}
-
-\section*{$\bullet$ Always use the `n' versions of string functions}
-Unless impossible, use the length-limited versions of the string functions.
-
-\vspace{2ex}
-\textsl{Bad}:
-
-\begin{verbatim}
- strcpy (entry_path, real_path);
-\end{verbatim}
-
-\textsl{Good}:
-
-\begin{verbatim}
- strncpy (entry_path, real_path, entry_path_len);
-\end{verbatim}
-
-\section*{$\bullet$ No dead or commented code}
-There must be no dead code (code to which control can never be passed) or
-commented out code in the codebase.
-
-\section*{$\bullet$ Only one unwind and return per function}
-There must be only one exit out of a function. \texttt{UNWIND} and return
-should happen at only point in the function.
-
-\section*{$\bullet$ Function length or Keep functions small}
-We live in the UNIX-world where modules do one thing and do it well.
-This rule should apply to our functions also. If a function is very long, try splitting it
-into many little helper functions. The question is, in a coding
-spree, how do we know a function is long and unreadable. One rule of
-thumb given by Linus Torvalds is that, a function should be broken-up
-if you have 4 or more levels of indentation going on for more than 3-4
-lines.
-
-\vspace{2ex}
-\textsl{Example for a helper function}:
-\begin{verbatim}
- static int
- same_owner (posix_lock_t *l1, posix_lock_t *l2)
- {
- return ((l1->client_pid == l2->client_pid) &&
- (l1->transport == l2->transport));
- }
-\end{verbatim}
-
-\section*{$\bullet$ Defining functions as static}
-Define internal functions as static only if you're
-very sure that there will not be a crash(..of any kind..) emanating in
-that function. If there is even a remote possibility, perhaps due to
-pointer derefering, etc, declare the function as non-static. This
-ensures that when a crash does happen, the function name shows up the
-in the back-trace generated by libc. However, doing so has potential
-for polluting the function namespace, so to avoid conflicts with other
-components in other parts, ensure that the function names are
-prepended with a prefix that identify the component to which it
-belongs. For eg. non-static functions in io-threads translator start
-with iot\_.
-
-\section*{$\bullet$ Ensure function calls wrap around after
-80-columns.}
-Place remaining arguments on the next line if needed.
-
-\section*{$\bullet$ Functions arguments and function definition}
-Place all the arguments of a function definition on the same line
-until the line goes beyond 80-cols. Arguments that extend beyind
-80-cols should be placed on the next line.
-
-\section*{Style issues}
-
-\subsection*{Brace placement}
-Use K\&R/Linux style of brace placement for blocks.
-
-\textsl{Example}:
-\begin{verbatim}
- int some_function (...)
- {
- if (...) {
- /* ... */
- } else if (...) {
- /* ... */
- } else {
- /* ... */
- }
-
- do {
- /* ... */
- } while (cond);
- }
-\end{verbatim}
-
-\subsection*{Indentation}
-Use \textbf{eight} spaces for indenting blocks. Ensure that your
-file contains only spaces and not tab characters. You can do this
-in Emacs by selecting the entire file (\texttt{C-x h}) and
-running \texttt{M-x untabify}.
-
-To make Emacs indent lines automatically by eight spaces, add this
-line to your \texttt{.emacs}:
-
-\begin{verbatim}
- (add-hook 'c-mode-hook (lambda () (c-set-style "linux")))
-\end{verbatim}
-
-\subsection*{Comments}
-Write a comment before every function describing its purpose (one-line),
-its arguments, and its return value. Mention whether it is an internal
-function or an exported function.
-
-Write a comment before every structure describing its purpose, and
-write comments about each of its members.
-
-Follow the style shown below for comments, since such comments
-can then be automatically extracted by doxygen to generate
-documentation.
-
-\textsl{Example}:
-\begin{verbatim}
-/**
- * hash_name -hash function for filenames
- * @par: parent inode number
- * @name: basename of inode
- * @mod: number of buckets in the hashtable
- *
- * @return: success: bucket number
- * failure: -1
- *
- * Not for external use.
- */
-\end{verbatim}
-
-\subsection*{Indicating critical sections}
-To clearly show regions of code which execute with locks held, use
-the following format:
-
-\begin{verbatim}
- pthread_mutex_lock (&mutex);
- {
- /* code */
- }
- pthread_mutex_unlock (&mutex);
-\end{verbatim}
-
-\section*{A skeleton fop function}
-This is the recommended template for any fop. In the beginning come
-the initializations. After that, the `success' control flow should be
-linear. Any error conditions should cause a \texttt{goto} to a single
-point, \texttt{out}. At that point, the code should detect the error
-that has occured and do appropriate cleanup.
-
-\begin{verbatim}
-int32_t
-sample_fop (call_frame_t *frame, xlator_t *this, ...)
-{
- char * var1 = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- DIR * dir = NULL;
- struct posix_fd * pfd = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
-
- /* other validations */
-
- dir = opendir (...);
-
- if (dir == NULL) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR,
- "opendir failed on %s (%s)", loc->path,
- strerror (op_errno));
- goto out;
- }
-
- /* another system call */
- if (...) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory :(");
- goto out;
- }
-
- /* ... */
-
- out:
- if (op_ret == -1) {
-
- /* check for all the cleanup that needs to be
- done */
-
- if (dir) {
- closedir (dir);
- dir = NULL;
- }
-
- if (pfd) {
- if (pfd->path)
- FREE (pfd->path);
- FREE (pfd);
- pfd = NULL;
- }
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, fd);
- return 0;
-}
-\end{verbatim}
-
-\end{document}
diff --git a/doc/legacy/errno.list.bsd.txt b/doc/legacy/errno.list.bsd.txt
deleted file mode 100644
index 350af25e4ab..00000000000
--- a/doc/legacy/errno.list.bsd.txt
+++ /dev/null
@@ -1,376 +0,0 @@
-/*-
- * Copyright (c) 1982, 1986, 1989, 1993
- * The Regents of the University of California. All rights reserved.
- * (c) UNIX System Laboratories, Inc.
- * All or some portions of this file are derived from material licensed
- * to the University of California by American Telephone and Telegraph
- * Co. or Unix System Laboratories, Inc. and are reproduced herein with
- * the permission of UNIX System Laboratories, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)errno.h 8.5 (Berkeley) 1/21/94
- * $FreeBSD: src/sys/sys/errno.h,v 1.28 2005/04/02 12:33:28 das Exp $
- */
-
-#ifndef _SYS_ERRNO_H_
-#define _SYS_ERRNO_H_
-
-#ifndef _KERNEL
-#include <sys/cdefs.h>
-__BEGIN_DECLS
-int * __error(void);
-__END_DECLS
-#define errno (* __error())
-#endif
-
-#define EPERM 1 /* Operation not permitted */
-#define ENOENT 2 /* No such file or directory */
-#define ESRCH 3 /* No such process */
-#define EINTR 4 /* Interrupted system call */
-#define EIO 5 /* Input/output error */
-#define ENXIO 6 /* Device not configured */
-#define E2BIG 7 /* Argument list too long */
-#define ENOEXEC 8 /* Exec format error */
-#define EBADF 9 /* Bad file descriptor */
-#define ECHILD 10 /* No child processes */
-#define EDEADLK 11 /* Resource deadlock avoided */
- /* 11 was EAGAIN */
-#define ENOMEM 12 /* Cannot allocate memory */
-#define EACCES 13 /* Permission denied */
-#define EFAULT 14 /* Bad address */
-#ifndef _POSIX_SOURCE
-#define ENOTBLK 15 /* Block device required */
-#endif
-#define EBUSY 16 /* Device busy */
-#define EEXIST 17 /* File exists */
-#define EXDEV 18 /* Cross-device link */
-#define ENODEV 19 /* Operation not supported by device */
-#define ENOTDIR 20 /* Not a directory */
-#define EISDIR 21 /* Is a directory */
-#define EINVAL 22 /* Invalid argument */
-#define ENFILE 23 /* Too many open files in system */
-#define EMFILE 24 /* Too many open files */
-#define ENOTTY 25 /* Inappropriate ioctl for device */
-#ifndef _POSIX_SOURCE
-#define ETXTBSY 26 /* Text file busy */
-#endif
-#define EFBIG 27 /* File too large */
-#define ENOSPC 28 /* No space left on device */
-#define ESPIPE 29 /* Illegal seek */
-#define EROFS 30 /* Read-only filesystem */
-#define EMLINK 31 /* Too many links */
-#define EPIPE 32 /* Broken pipe */
-
-/* math software */
-#define EDOM 33 /* Numerical argument out of domain */
-#define ERANGE 34 /* Result too large */
-
-/* non-blocking and interrupt i/o */
-#define EAGAIN 35 /* Resource temporarily unavailable */
-#ifndef _POSIX_SOURCE
-#define EWOULDBLOCK EAGAIN /* Operation would block */
-#define EINPROGRESS 36 /* Operation now in progress */
-#define EALREADY 37 /* Operation already in progress */
-
-/* ipc/network software -- argument errors */
-#define ENOTSOCK 38 /* Socket operation on non-socket */
-#define EDESTADDRREQ 39 /* Destination address required */
-#define EMSGSIZE 40 /* Message too long */
-#define EPROTOTYPE 41 /* Protocol wrong type for socket */
-#define ENOPROTOOPT 42 /* Protocol not available */
-#define EPROTONOSUPPORT 43 /* Protocol not supported */
-#define ESOCKTNOSUPPORT 44 /* Socket type not supported */
-#define EOPNOTSUPP 45 /* Operation not supported */
-#define ENOTSUP EOPNOTSUPP /* Operation not supported */
-#define EPFNOSUPPORT 46 /* Protocol family not supported */
-#define EAFNOSUPPORT 47 /* Address family not supported by protocol family */
-#define EADDRINUSE 48 /* Address already in use */
-#define EADDRNOTAVAIL 49 /* Can't assign requested address */
-
-/* ipc/network software -- operational errors */
-#define ENETDOWN 50 /* Network is down */
-#define ENETUNREACH 51 /* Network is unreachable */
-#define ENETRESET 52 /* Network dropped connection on reset */
-#define ECONNABORTED 53 /* Software caused connection abort */
-#define ECONNRESET 54 /* Connection reset by peer */
-#define ENOBUFS 55 /* No buffer space available */
-#define EISCONN 56 /* Socket is already connected */
-#define ENOTCONN 57 /* Socket is not connected */
-#define ESHUTDOWN 58 /* Can't send after socket shutdown */
-#define ETOOMANYREFS 59 /* Too many references: can't splice */
-#define ETIMEDOUT 60 /* Operation timed out */
-#define ECONNREFUSED 61 /* Connection refused */
-
-#define ELOOP 62 /* Too many levels of symbolic links */
-#endif /* _POSIX_SOURCE */
-#define ENAMETOOLONG 63 /* File name too long */
-
-/* should be rearranged */
-#ifndef _POSIX_SOURCE
-#define EHOSTDOWN 64 /* Host is down */
-#define EHOSTUNREACH 65 /* No route to host */
-#endif /* _POSIX_SOURCE */
-#define ENOTEMPTY 66 /* Directory not empty */
-
-/* quotas & mush */
-#ifndef _POSIX_SOURCE
-#define EPROCLIM 67 /* Too many processes */
-#define EUSERS 68 /* Too many users */
-#define EDQUOT 69 /* Disc quota exceeded */
-
-/* Network File System */
-#define ESTALE 70 /* Stale NFS file handle */
-#define EREMOTE 71 /* Too many levels of remote in path */
-#define EBADRPC 72 /* RPC struct is bad */
-#define ERPCMISMATCH 73 /* RPC version wrong */
-#define EPROGUNAVAIL 74 /* RPC prog. not avail */
-#define EPROGMISMATCH 75 /* Program version wrong */
-#define EPROCUNAVAIL 76 /* Bad procedure for program */
-#endif /* _POSIX_SOURCE */
-
-#define ENOLCK 77 /* No locks available */
-#define ENOSYS 78 /* Function not implemented */
-
-#ifndef _POSIX_SOURCE
-#define EFTYPE 79 /* Inappropriate file type or format */
-#define EAUTH 80 /* Authentication error */
-#define ENEEDAUTH 81 /* Need authenticator */
-#define EIDRM 82 /* Identifier removed */
-#define ENOMSG 83 /* No message of desired type */
-#define EOVERFLOW 84 /* Value too large to be stored in data type */
-#define ECANCELED 85 /* Operation canceled */
-#define EILSEQ 86 /* Illegal byte sequence */
-#define ENOATTR 87 /* Attribute not found */
-
-#define EDOOFUS 88 /* Programming error */
-#endif /* _POSIX_SOURCE */
-
-#define EBADMSG 89 /* Bad message */
-#define EMULTIHOP 90 /* Multihop attempted */
-#define ENOLINK 91 /* Link has been severed */
-#define EPROTO 92 /* Protocol error */
-
-#ifndef _POSIX_SOURCE
-#define ELAST 92 /* Must be equal largest errno */
-#endif /* _POSIX_SOURCE */
-
-#ifdef _KERNEL
-/* pseudo-errors returned inside kernel to modify return to process */
-#define ERESTART (-1) /* restart syscall */
-#define EJUSTRETURN (-2) /* don't modify regs, just return */
-#define ENOIOCTL (-3) /* ioctl not handled by this layer */
-#define EDIRIOCTL (-4) /* do direct ioctl in GEOM */
-#endif
-
-#endif
-/*-
- * Copyright (c) 1982, 1986, 1989, 1993
- * The Regents of the University of California. All rights reserved.
- * (c) UNIX System Laboratories, Inc.
- * All or some portions of this file are derived from material licensed
- * to the University of California by American Telephone and Telegraph
- * Co. or Unix System Laboratories, Inc. and are reproduced herein with
- * the permission of UNIX System Laboratories, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)errno.h 8.5 (Berkeley) 1/21/94
- * $FreeBSD: src/sys/sys/errno.h,v 1.28 2005/04/02 12:33:28 das Exp $
- */
-
-#ifndef _SYS_ERRNO_H_
-#define _SYS_ERRNO_H_
-
-#ifndef _KERNEL
-#include <sys/cdefs.h>
-__BEGIN_DECLS
-int * __error(void);
-__END_DECLS
-#define errno (* __error())
-#endif
-
-#define EPERM 1 /* Operation not permitted */
-#define ENOENT 2 /* No such file or directory */
-#define ESRCH 3 /* No such process */
-#define EINTR 4 /* Interrupted system call */
-#define EIO 5 /* Input/output error */
-#define ENXIO 6 /* Device not configured */
-#define E2BIG 7 /* Argument list too long */
-#define ENOEXEC 8 /* Exec format error */
-#define EBADF 9 /* Bad file descriptor */
-#define ECHILD 10 /* No child processes */
-#define EDEADLK 11 /* Resource deadlock avoided */
- /* 11 was EAGAIN */
-#define ENOMEM 12 /* Cannot allocate memory */
-#define EACCES 13 /* Permission denied */
-#define EFAULT 14 /* Bad address */
-#ifndef _POSIX_SOURCE
-#define ENOTBLK 15 /* Block device required */
-#endif
-#define EBUSY 16 /* Device busy */
-#define EEXIST 17 /* File exists */
-#define EXDEV 18 /* Cross-device link */
-#define ENODEV 19 /* Operation not supported by device */
-#define ENOTDIR 20 /* Not a directory */
-#define EISDIR 21 /* Is a directory */
-#define EINVAL 22 /* Invalid argument */
-#define ENFILE 23 /* Too many open files in system */
-#define EMFILE 24 /* Too many open files */
-#define ENOTTY 25 /* Inappropriate ioctl for device */
-#ifndef _POSIX_SOURCE
-#define ETXTBSY 26 /* Text file busy */
-#endif
-#define EFBIG 27 /* File too large */
-#define ENOSPC 28 /* No space left on device */
-#define ESPIPE 29 /* Illegal seek */
-#define EROFS 30 /* Read-only filesystem */
-#define EMLINK 31 /* Too many links */
-#define EPIPE 32 /* Broken pipe */
-
-/* math software */
-#define EDOM 33 /* Numerical argument out of domain */
-#define ERANGE 34 /* Result too large */
-
-/* non-blocking and interrupt i/o */
-#define EAGAIN 35 /* Resource temporarily unavailable */
-#ifndef _POSIX_SOURCE
-#define EWOULDBLOCK EAGAIN /* Operation would block */
-#define EINPROGRESS 36 /* Operation now in progress */
-#define EALREADY 37 /* Operation already in progress */
-
-/* ipc/network software -- argument errors */
-#define ENOTSOCK 38 /* Socket operation on non-socket */
-#define EDESTADDRREQ 39 /* Destination address required */
-#define EMSGSIZE 40 /* Message too long */
-#define EPROTOTYPE 41 /* Protocol wrong type for socket */
-#define ENOPROTOOPT 42 /* Protocol not available */
-#define EPROTONOSUPPORT 43 /* Protocol not supported */
-#define ESOCKTNOSUPPORT 44 /* Socket type not supported */
-#define EOPNOTSUPP 45 /* Operation not supported */
-#define ENOTSUP EOPNOTSUPP /* Operation not supported */
-#define EPFNOSUPPORT 46 /* Protocol family not supported */
-#define EAFNOSUPPORT 47 /* Address family not supported by protocol family */
-#define EADDRINUSE 48 /* Address already in use */
-#define EADDRNOTAVAIL 49 /* Can't assign requested address */
-
-/* ipc/network software -- operational errors */
-#define ENETDOWN 50 /* Network is down */
-#define ENETUNREACH 51 /* Network is unreachable */
-#define ENETRESET 52 /* Network dropped connection on reset */
-#define ECONNABORTED 53 /* Software caused connection abort */
-#define ECONNRESET 54 /* Connection reset by peer */
-#define ENOBUFS 55 /* No buffer space available */
-#define EISCONN 56 /* Socket is already connected */
-#define ENOTCONN 57 /* Socket is not connected */
-#define ESHUTDOWN 58 /* Can't send after socket shutdown */
-#define ETOOMANYREFS 59 /* Too many references: can't splice */
-#define ETIMEDOUT 60 /* Operation timed out */
-#define ECONNREFUSED 61 /* Connection refused */
-
-#define ELOOP 62 /* Too many levels of symbolic links */
-#endif /* _POSIX_SOURCE */
-#define ENAMETOOLONG 63 /* File name too long */
-
-/* should be rearranged */
-#ifndef _POSIX_SOURCE
-#define EHOSTDOWN 64 /* Host is down */
-#define EHOSTUNREACH 65 /* No route to host */
-#endif /* _POSIX_SOURCE */
-#define ENOTEMPTY 66 /* Directory not empty */
-
-/* quotas & mush */
-#ifndef _POSIX_SOURCE
-#define EPROCLIM 67 /* Too many processes */
-#define EUSERS 68 /* Too many users */
-#define EDQUOT 69 /* Disc quota exceeded */
-
-/* Network File System */
-#define ESTALE 70 /* Stale NFS file handle */
-#define EREMOTE 71 /* Too many levels of remote in path */
-#define EBADRPC 72 /* RPC struct is bad */
-#define ERPCMISMATCH 73 /* RPC version wrong */
-#define EPROGUNAVAIL 74 /* RPC prog. not avail */
-#define EPROGMISMATCH 75 /* Program version wrong */
-#define EPROCUNAVAIL 76 /* Bad procedure for program */
-#endif /* _POSIX_SOURCE */
-
-#define ENOLCK 77 /* No locks available */
-#define ENOSYS 78 /* Function not implemented */
-
-#ifndef _POSIX_SOURCE
-#define EFTYPE 79 /* Inappropriate file type or format */
-#define EAUTH 80 /* Authentication error */
-#define ENEEDAUTH 81 /* Need authenticator */
-#define EIDRM 82 /* Identifier removed */
-#define ENOMSG 83 /* No message of desired type */
-#define EOVERFLOW 84 /* Value too large to be stored in data type */
-#define ECANCELED 85 /* Operation canceled */
-#define EILSEQ 86 /* Illegal byte sequence */
-#define ENOATTR 87 /* Attribute not found */
-
-#define EDOOFUS 88 /* Programming error */
-#endif /* _POSIX_SOURCE */
-
-#define EBADMSG 89 /* Bad message */
-#define EMULTIHOP 90 /* Multihop attempted */
-#define ENOLINK 91 /* Link has been severed */
-#define EPROTO 92 /* Protocol error */
-
-#ifndef _POSIX_SOURCE
-#define ELAST 92 /* Must be equal largest errno */
-#endif /* _POSIX_SOURCE */
-
-#ifdef _KERNEL
-/* pseudo-errors returned inside kernel to modify return to process */
-#define ERESTART (-1) /* restart syscall */
-#define EJUSTRETURN (-2) /* don't modify regs, just return */
-#define ENOIOCTL (-3) /* ioctl not handled by this layer */
-#define EDIRIOCTL (-4) /* do direct ioctl in GEOM */
-#endif
-
-#endif
diff --git a/doc/legacy/errno.list.linux.txt b/doc/legacy/errno.list.linux.txt
deleted file mode 100644
index cc868644b36..00000000000
--- a/doc/legacy/errno.list.linux.txt
+++ /dev/null
@@ -1,1586 +0,0 @@
-#define ICONV_SUPPORTS_ERRNO 1
-#include <errno.h>
-/* Error constants. Linux specific version.
- Copyright (C) 1996, 1997, 1998, 1999, 2005 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-#ifdef _ERRNO_H
-
-# undef EDOM
-# undef EILSEQ
-# undef ERANGE
-# include <linux/errno.h>
-
-/* Linux has no ENOTSUP error code. */
-# define ENOTSUP EOPNOTSUPP
-
-/* Older Linux versions also had no ECANCELED error code. */
-# ifndef ECANCELED
-# define ECANCELED 125
-# endif
-
-/* Support for error codes to support robust mutexes was added later, too. */
-# ifndef EOWNERDEAD
-# define EOWNERDEAD 130
-# define ENOTRECOVERABLE 131
-# endif
-
-# ifndef __ASSEMBLER__
-/* Function to get address of global `errno' variable. */
-extern int *__errno_location (void) __THROW __attribute__ ((__const__));
-
-# if !defined _LIBC || defined _LIBC_REENTRANT
-/* When using threads, errno is a per-thread value. */
-# define errno (*__errno_location ())
-# endif
-# endif /* !__ASSEMBLER__ */
-#endif /* _ERRNO_H */
-
-#if !defined _ERRNO_H && defined __need_Emath
-/* This is ugly but the kernel header is not clean enough. We must
- define only the values EDOM, EILSEQ and ERANGE in case __need_Emath is
- defined. */
-# define EDOM 33 /* Math argument out of domain of function. */
-# define EILSEQ 84 /* Illegal byte sequence. */
-# define ERANGE 34 /* Math result not representable. */
-#endif /* !_ERRNO_H && __need_Emath */
-/* Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef APR_ERRNO_H
-#define APR_ERRNO_H
-
-/**
- * @file apr_errno.h
- * @brief APR Error Codes
- */
-
-#include "apr.h"
-
-#if APR_HAVE_ERRNO_H
-#include <errno.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/**
- * @defgroup apr_errno Error Codes
- * @ingroup APR
- * @{
- */
-
-/**
- * Type for specifying an error or status code.
- */
-typedef int apr_status_t;
-
-/**
- * Return a human readable string describing the specified error.
- * @param statcode The error code the get a string for.
- * @param buf A buffer to hold the error string.
- * @param bufsize Size of the buffer to hold the string.
- */
-APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
- apr_size_t bufsize);
-
-#if defined(DOXYGEN)
-/**
- * @def APR_FROM_OS_ERROR(os_err_type syserr)
- * Fold a platform specific error into an apr_status_t code.
- * @return apr_status_t
- * @param e The platform os error code.
- * @warning macro implementation; the syserr argument may be evaluated
- * multiple times.
- */
-#define APR_FROM_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e + APR_OS_START_SYSERR)
-
-/**
- * @def APR_TO_OS_ERROR(apr_status_t statcode)
- * @return os_err_type
- * Fold an apr_status_t code back to the native platform defined error.
- * @param e The apr_status_t folded platform os error code.
- * @warning macro implementation; the statcode argument may be evaluated
- * multiple times. If the statcode was not created by apr_get_os_error
- * or APR_FROM_OS_ERROR, the results are undefined.
- */
-#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
-
-/** @def apr_get_os_error()
- * @return apr_status_t the last platform error, folded into apr_status_t, on most platforms
- * @remark This retrieves errno, or calls a GetLastError() style function, and
- * folds it with APR_FROM_OS_ERROR. Some platforms (such as OS2) have no
- * such mechanism, so this call may be unsupported. Do NOT use this
- * call for socket errors from socket, send, recv etc!
- */
-
-/** @def apr_set_os_error(e)
- * Reset the last platform error, unfolded from an apr_status_t, on some platforms
- * @param e The OS error folded in a prior call to APR_FROM_OS_ERROR()
- * @warning This is a macro implementation; the statcode argument may be evaluated
- * multiple times. If the statcode was not created by apr_get_os_error
- * or APR_FROM_OS_ERROR, the results are undefined. This macro sets
- * errno, or calls a SetLastError() style function, unfolding statcode
- * with APR_TO_OS_ERROR. Some platforms (such as OS2) have no such
- * mechanism, so this call may be unsupported.
- */
-
-/** @def apr_get_netos_error()
- * Return the last socket error, folded into apr_status_t, on all platforms
- * @remark This retrieves errno or calls a GetLastSocketError() style function,
- * and folds it with APR_FROM_OS_ERROR.
- */
-
-/** @def apr_set_netos_error(e)
- * Reset the last socket error, unfolded from an apr_status_t
- * @param e The socket error folded in a prior call to APR_FROM_OS_ERROR()
- * @warning This is a macro implementation; the statcode argument may be evaluated
- * multiple times. If the statcode was not created by apr_get_os_error
- * or APR_FROM_OS_ERROR, the results are undefined. This macro sets
- * errno, or calls a WSASetLastError() style function, unfolding
- * socketcode with APR_TO_OS_ERROR.
- */
-
-#endif /* defined(DOXYGEN) */
-
-/**
- * APR_OS_START_ERROR is where the APR specific error values start.
- */
-#define APR_OS_START_ERROR 20000
-/**
- * APR_OS_ERRSPACE_SIZE is the maximum number of errors you can fit
- * into one of the error/status ranges below -- except for
- * APR_OS_START_USERERR, which see.
- */
-#define APR_OS_ERRSPACE_SIZE 50000
-/**
- * APR_OS_START_STATUS is where the APR specific status codes start.
- */
-#define APR_OS_START_STATUS (APR_OS_START_ERROR + APR_OS_ERRSPACE_SIZE)
-/**
- * APR_OS_START_USERERR are reserved for applications that use APR that
- * layer their own error codes along with APR's. Note that the
- * error immediately following this one is set ten times farther
- * away than usual, so that users of apr have a lot of room in
- * which to declare custom error codes.
- */
-#define APR_OS_START_USERERR (APR_OS_START_STATUS + APR_OS_ERRSPACE_SIZE)
-/**
- * APR_OS_START_USEERR is obsolete, defined for compatibility only.
- * Use APR_OS_START_USERERR instead.
- */
-#define APR_OS_START_USEERR APR_OS_START_USERERR
-/**
- * APR_OS_START_CANONERR is where APR versions of errno values are defined
- * on systems which don't have the corresponding errno.
- */
-#define APR_OS_START_CANONERR (APR_OS_START_USERERR \
- + (APR_OS_ERRSPACE_SIZE * 10))
-/**
- * APR_OS_START_EAIERR folds EAI_ error codes from getaddrinfo() into
- * apr_status_t values.
- */
-#define APR_OS_START_EAIERR (APR_OS_START_CANONERR + APR_OS_ERRSPACE_SIZE)
-/**
- * APR_OS_START_SYSERR folds platform-specific system error values into
- * apr_status_t values.
- */
-#define APR_OS_START_SYSERR (APR_OS_START_EAIERR + APR_OS_ERRSPACE_SIZE)
-
-/** no error. */
-#define APR_SUCCESS 0
-
-/**
- * @defgroup APR_Error APR Error Values
- * <PRE>
- * <b>APR ERROR VALUES</b>
- * APR_ENOSTAT APR was unable to perform a stat on the file
- * APR_ENOPOOL APR was not provided a pool with which to allocate memory
- * APR_EBADDATE APR was given an invalid date
- * APR_EINVALSOCK APR was given an invalid socket
- * APR_ENOPROC APR was not given a process structure
- * APR_ENOTIME APR was not given a time structure
- * APR_ENODIR APR was not given a directory structure
- * APR_ENOLOCK APR was not given a lock structure
- * APR_ENOPOLL APR was not given a poll structure
- * APR_ENOSOCKET APR was not given a socket
- * APR_ENOTHREAD APR was not given a thread structure
- * APR_ENOTHDKEY APR was not given a thread key structure
- * APR_ENOSHMAVAIL There is no more shared memory available
- * APR_EDSOOPEN APR was unable to open the dso object. For more
- * information call apr_dso_error().
- * APR_EGENERAL General failure (specific information not available)
- * APR_EBADIP The specified IP address is invalid
- * APR_EBADMASK The specified netmask is invalid
- * APR_ESYMNOTFOUND Could not find the requested symbol
- * </PRE>
- *
- * <PRE>
- * <b>APR STATUS VALUES</b>
- * APR_INCHILD Program is currently executing in the child
- * APR_INPARENT Program is currently executing in the parent
- * APR_DETACH The thread is detached
- * APR_NOTDETACH The thread is not detached
- * APR_CHILD_DONE The child has finished executing
- * APR_CHILD_NOTDONE The child has not finished executing
- * APR_TIMEUP The operation did not finish before the timeout
- * APR_INCOMPLETE The operation was incomplete although some processing
- * was performed and the results are partially valid
- * APR_BADCH Getopt found an option not in the option string
- * APR_BADARG Getopt found an option that is missing an argument
- * and an argument was specified in the option string
- * APR_EOF APR has encountered the end of the file
- * APR_NOTFOUND APR was unable to find the socket in the poll structure
- * APR_ANONYMOUS APR is using anonymous shared memory
- * APR_FILEBASED APR is using a file name as the key to the shared memory
- * APR_KEYBASED APR is using a shared key as the key to the shared memory
- * APR_EINIT Ininitalizer value. If no option has been found, but
- * the status variable requires a value, this should be used
- * APR_ENOTIMPL The APR function has not been implemented on this
- * platform, either because nobody has gotten to it yet,
- * or the function is impossible on this platform.
- * APR_EMISMATCH Two passwords do not match.
- * APR_EABSOLUTE The given path was absolute.
- * APR_ERELATIVE The given path was relative.
- * APR_EINCOMPLETE The given path was neither relative nor absolute.
- * APR_EABOVEROOT The given path was above the root path.
- * APR_EBUSY The given lock was busy.
- * APR_EPROC_UNKNOWN The given process wasn't recognized by APR
- * </PRE>
- * @{
- */
-/** @see APR_STATUS_IS_ENOSTAT */
-#define APR_ENOSTAT (APR_OS_START_ERROR + 1)
-/** @see APR_STATUS_IS_ENOPOOL */
-#define APR_ENOPOOL (APR_OS_START_ERROR + 2)
-/* empty slot: +3 */
-/** @see APR_STATUS_IS_EBADDATE */
-#define APR_EBADDATE (APR_OS_START_ERROR + 4)
-/** @see APR_STATUS_IS_EINVALSOCK */
-#define APR_EINVALSOCK (APR_OS_START_ERROR + 5)
-/** @see APR_STATUS_IS_ENOPROC */
-#define APR_ENOPROC (APR_OS_START_ERROR + 6)
-/** @see APR_STATUS_IS_ENOTIME */
-#define APR_ENOTIME (APR_OS_START_ERROR + 7)
-/** @see APR_STATUS_IS_ENODIR */
-#define APR_ENODIR (APR_OS_START_ERROR + 8)
-/** @see APR_STATUS_IS_ENOLOCK */
-#define APR_ENOLOCK (APR_OS_START_ERROR + 9)
-/** @see APR_STATUS_IS_ENOPOLL */
-#define APR_ENOPOLL (APR_OS_START_ERROR + 10)
-/** @see APR_STATUS_IS_ENOSOCKET */
-#define APR_ENOSOCKET (APR_OS_START_ERROR + 11)
-/** @see APR_STATUS_IS_ENOTHREAD */
-#define APR_ENOTHREAD (APR_OS_START_ERROR + 12)
-/** @see APR_STATUS_IS_ENOTHDKEY */
-#define APR_ENOTHDKEY (APR_OS_START_ERROR + 13)
-/** @see APR_STATUS_IS_EGENERAL */
-#define APR_EGENERAL (APR_OS_START_ERROR + 14)
-/** @see APR_STATUS_IS_ENOSHMAVAIL */
-#define APR_ENOSHMAVAIL (APR_OS_START_ERROR + 15)
-/** @see APR_STATUS_IS_EBADIP */
-#define APR_EBADIP (APR_OS_START_ERROR + 16)
-/** @see APR_STATUS_IS_EBADMASK */
-#define APR_EBADMASK (APR_OS_START_ERROR + 17)
-/* empty slot: +18 */
-/** @see APR_STATUS_IS_EDSOPEN */
-#define APR_EDSOOPEN (APR_OS_START_ERROR + 19)
-/** @see APR_STATUS_IS_EABSOLUTE */
-#define APR_EABSOLUTE (APR_OS_START_ERROR + 20)
-/** @see APR_STATUS_IS_ERELATIVE */
-#define APR_ERELATIVE (APR_OS_START_ERROR + 21)
-/** @see APR_STATUS_IS_EINCOMPLETE */
-#define APR_EINCOMPLETE (APR_OS_START_ERROR + 22)
-/** @see APR_STATUS_IS_EABOVEROOT */
-#define APR_EABOVEROOT (APR_OS_START_ERROR + 23)
-/** @see APR_STATUS_IS_EBADPATH */
-#define APR_EBADPATH (APR_OS_START_ERROR + 24)
-/** @see APR_STATUS_IS_EPATHWILD */
-#define APR_EPATHWILD (APR_OS_START_ERROR + 25)
-/** @see APR_STATUS_IS_ESYMNOTFOUND */
-#define APR_ESYMNOTFOUND (APR_OS_START_ERROR + 26)
-/** @see APR_STATUS_IS_EPROC_UNKNOWN */
-#define APR_EPROC_UNKNOWN (APR_OS_START_ERROR + 27)
-/** @see APR_STATUS_IS_ENOTENOUGHENTROPY */
-#define APR_ENOTENOUGHENTROPY (APR_OS_START_ERROR + 28)
-/** @} */
-
-/**
- * @defgroup APR_STATUS_IS Status Value Tests
- * @warning For any particular error condition, more than one of these tests
- * may match. This is because platform-specific error codes may not
- * always match the semantics of the POSIX codes these tests (and the
- * corresponding APR error codes) are named after. A notable example
- * are the APR_STATUS_IS_ENOENT and APR_STATUS_IS_ENOTDIR tests on
- * Win32 platforms. The programmer should always be aware of this and
- * adjust the order of the tests accordingly.
- * @{
- */
-/**
- * APR was unable to perform a stat on the file
- * @warning always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_ENOSTAT(s) ((s) == APR_ENOSTAT)
-/**
- * APR was not provided a pool with which to allocate memory
- * @warning always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_ENOPOOL(s) ((s) == APR_ENOPOOL)
-/** APR was given an invalid date */
-#define APR_STATUS_IS_EBADDATE(s) ((s) == APR_EBADDATE)
-/** APR was given an invalid socket */
-#define APR_STATUS_IS_EINVALSOCK(s) ((s) == APR_EINVALSOCK)
-/** APR was not given a process structure */
-#define APR_STATUS_IS_ENOPROC(s) ((s) == APR_ENOPROC)
-/** APR was not given a time structure */
-#define APR_STATUS_IS_ENOTIME(s) ((s) == APR_ENOTIME)
-/** APR was not given a directory structure */
-#define APR_STATUS_IS_ENODIR(s) ((s) == APR_ENODIR)
-/** APR was not given a lock structure */
-#define APR_STATUS_IS_ENOLOCK(s) ((s) == APR_ENOLOCK)
-/** APR was not given a poll structure */
-#define APR_STATUS_IS_ENOPOLL(s) ((s) == APR_ENOPOLL)
-/** APR was not given a socket */
-#define APR_STATUS_IS_ENOSOCKET(s) ((s) == APR_ENOSOCKET)
-/** APR was not given a thread structure */
-#define APR_STATUS_IS_ENOTHREAD(s) ((s) == APR_ENOTHREAD)
-/** APR was not given a thread key structure */
-#define APR_STATUS_IS_ENOTHDKEY(s) ((s) == APR_ENOTHDKEY)
-/** Generic Error which can not be put into another spot */
-#define APR_STATUS_IS_EGENERAL(s) ((s) == APR_EGENERAL)
-/** There is no more shared memory available */
-#define APR_STATUS_IS_ENOSHMAVAIL(s) ((s) == APR_ENOSHMAVAIL)
-/** The specified IP address is invalid */
-#define APR_STATUS_IS_EBADIP(s) ((s) == APR_EBADIP)
-/** The specified netmask is invalid */
-#define APR_STATUS_IS_EBADMASK(s) ((s) == APR_EBADMASK)
-/* empty slot: +18 */
-/**
- * APR was unable to open the dso object.
- * For more information call apr_dso_error().
- */
-#if defined(WIN32)
-#define APR_STATUS_IS_EDSOOPEN(s) ((s) == APR_EDSOOPEN \
- || APR_TO_OS_ERROR(s) == ERROR_MOD_NOT_FOUND)
-#else
-#define APR_STATUS_IS_EDSOOPEN(s) ((s) == APR_EDSOOPEN)
-#endif
-/** The given path was absolute. */
-#define APR_STATUS_IS_EABSOLUTE(s) ((s) == APR_EABSOLUTE)
-/** The given path was relative. */
-#define APR_STATUS_IS_ERELATIVE(s) ((s) == APR_ERELATIVE)
-/** The given path was neither relative nor absolute. */
-#define APR_STATUS_IS_EINCOMPLETE(s) ((s) == APR_EINCOMPLETE)
-/** The given path was above the root path. */
-#define APR_STATUS_IS_EABOVEROOT(s) ((s) == APR_EABOVEROOT)
-/** The given path was bad. */
-#define APR_STATUS_IS_EBADPATH(s) ((s) == APR_EBADPATH)
-/** The given path contained wildcards. */
-#define APR_STATUS_IS_EPATHWILD(s) ((s) == APR_EPATHWILD)
-/** Could not find the requested symbol.
- * For more information call apr_dso_error().
- */
-#if defined(WIN32)
-#define APR_STATUS_IS_ESYMNOTFOUND(s) ((s) == APR_ESYMNOTFOUND \
- || APR_TO_OS_ERROR(s) == ERROR_PROC_NOT_FOUND)
-#else
-#define APR_STATUS_IS_ESYMNOTFOUND(s) ((s) == APR_ESYMNOTFOUND)
-#endif
-/** The given process was not recognized by APR. */
-#define APR_STATUS_IS_EPROC_UNKNOWN(s) ((s) == APR_EPROC_UNKNOWN)
-
-/** APR could not gather enough entropy to continue. */
-#define APR_STATUS_IS_ENOTENOUGHENTROPY(s) ((s) == APR_ENOTENOUGHENTROPY)
-
-/** @} */
-
-/**
- * @addtogroup APR_Error
- * @{
- */
-/** @see APR_STATUS_IS_INCHILD */
-#define APR_INCHILD (APR_OS_START_STATUS + 1)
-/** @see APR_STATUS_IS_INPARENT */
-#define APR_INPARENT (APR_OS_START_STATUS + 2)
-/** @see APR_STATUS_IS_DETACH */
-#define APR_DETACH (APR_OS_START_STATUS + 3)
-/** @see APR_STATUS_IS_NOTDETACH */
-#define APR_NOTDETACH (APR_OS_START_STATUS + 4)
-/** @see APR_STATUS_IS_CHILD_DONE */
-#define APR_CHILD_DONE (APR_OS_START_STATUS + 5)
-/** @see APR_STATUS_IS_CHILD_NOTDONE */
-#define APR_CHILD_NOTDONE (APR_OS_START_STATUS + 6)
-/** @see APR_STATUS_IS_TIMEUP */
-#define APR_TIMEUP (APR_OS_START_STATUS + 7)
-/** @see APR_STATUS_IS_INCOMPLETE */
-#define APR_INCOMPLETE (APR_OS_START_STATUS + 8)
-/* empty slot: +9 */
-/* empty slot: +10 */
-/* empty slot: +11 */
-/** @see APR_STATUS_IS_BADCH */
-#define APR_BADCH (APR_OS_START_STATUS + 12)
-/** @see APR_STATUS_IS_BADARG */
-#define APR_BADARG (APR_OS_START_STATUS + 13)
-/** @see APR_STATUS_IS_EOF */
-#define APR_EOF (APR_OS_START_STATUS + 14)
-/** @see APR_STATUS_IS_NOTFOUND */
-#define APR_NOTFOUND (APR_OS_START_STATUS + 15)
-/* empty slot: +16 */
-/* empty slot: +17 */
-/* empty slot: +18 */
-/** @see APR_STATUS_IS_ANONYMOUS */
-#define APR_ANONYMOUS (APR_OS_START_STATUS + 19)
-/** @see APR_STATUS_IS_FILEBASED */
-#define APR_FILEBASED (APR_OS_START_STATUS + 20)
-/** @see APR_STATUS_IS_KEYBASED */
-#define APR_KEYBASED (APR_OS_START_STATUS + 21)
-/** @see APR_STATUS_IS_EINIT */
-#define APR_EINIT (APR_OS_START_STATUS + 22)
-/** @see APR_STATUS_IS_ENOTIMPL */
-#define APR_ENOTIMPL (APR_OS_START_STATUS + 23)
-/** @see APR_STATUS_IS_EMISMATCH */
-#define APR_EMISMATCH (APR_OS_START_STATUS + 24)
-/** @see APR_STATUS_IS_EBUSY */
-#define APR_EBUSY (APR_OS_START_STATUS + 25)
-/** @} */
-
-/**
- * @addtogroup APR_STATUS_IS
- * @{
- */
-/**
- * Program is currently executing in the child
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code */
-#define APR_STATUS_IS_INCHILD(s) ((s) == APR_INCHILD)
-/**
- * Program is currently executing in the parent
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_INPARENT(s) ((s) == APR_INPARENT)
-/**
- * The thread is detached
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_DETACH(s) ((s) == APR_DETACH)
-/**
- * The thread is not detached
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_NOTDETACH(s) ((s) == APR_NOTDETACH)
-/**
- * The child has finished executing
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_CHILD_DONE(s) ((s) == APR_CHILD_DONE)
-/**
- * The child has not finished executing
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_CHILD_NOTDONE(s) ((s) == APR_CHILD_NOTDONE)
-/**
- * The operation did not finish before the timeout
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP)
-/**
- * The operation was incomplete although some processing was performed
- * and the results are partially valid.
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_INCOMPLETE(s) ((s) == APR_INCOMPLETE)
-/* empty slot: +9 */
-/* empty slot: +10 */
-/* empty slot: +11 */
-/**
- * Getopt found an option not in the option string
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_BADCH(s) ((s) == APR_BADCH)
-/**
- * Getopt found an option not in the option string and an argument was
- * specified in the option string
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_BADARG(s) ((s) == APR_BADARG)
-/**
- * APR has encountered the end of the file
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_EOF(s) ((s) == APR_EOF)
-/**
- * APR was unable to find the socket in the poll structure
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_NOTFOUND(s) ((s) == APR_NOTFOUND)
-/* empty slot: +16 */
-/* empty slot: +17 */
-/* empty slot: +18 */
-/**
- * APR is using anonymous shared memory
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_ANONYMOUS(s) ((s) == APR_ANONYMOUS)
-/**
- * APR is using a file name as the key to the shared memory
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_FILEBASED(s) ((s) == APR_FILEBASED)
-/**
- * APR is using a shared key as the key to the shared memory
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_KEYBASED(s) ((s) == APR_KEYBASED)
-/**
- * Ininitalizer value. If no option has been found, but
- * the status variable requires a value, this should be used
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_EINIT(s) ((s) == APR_EINIT)
-/**
- * The APR function has not been implemented on this
- * platform, either because nobody has gotten to it yet,
- * or the function is impossible on this platform.
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_ENOTIMPL(s) ((s) == APR_ENOTIMPL)
-/**
- * Two passwords do not match.
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_EMISMATCH(s) ((s) == APR_EMISMATCH)
-/**
- * The given lock was busy
- * @warning always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_EBUSY(s) ((s) == APR_EBUSY)
-
-/** @} */
-
-/**
- * @addtogroup APR_Error APR Error Values
- * @{
- */
-/* APR CANONICAL ERROR VALUES */
-/** @see APR_STATUS_IS_EACCES */
-#ifdef EACCES
-#define APR_EACCES EACCES
-#else
-#define APR_EACCES (APR_OS_START_CANONERR + 1)
-#endif
-
-/** @see APR_STATUS_IS_EXIST */
-#ifdef EEXIST
-#define APR_EEXIST EEXIST
-#else
-#define APR_EEXIST (APR_OS_START_CANONERR + 2)
-#endif
-
-/** @see APR_STATUS_IS_ENAMETOOLONG */
-#ifdef ENAMETOOLONG
-#define APR_ENAMETOOLONG ENAMETOOLONG
-#else
-#define APR_ENAMETOOLONG (APR_OS_START_CANONERR + 3)
-#endif
-
-/** @see APR_STATUS_IS_ENOENT */
-#ifdef ENOENT
-#define APR_ENOENT ENOENT
-#else
-#define APR_ENOENT (APR_OS_START_CANONERR + 4)
-#endif
-
-/** @see APR_STATUS_IS_ENOTDIR */
-#ifdef ENOTDIR
-#define APR_ENOTDIR ENOTDIR
-#else
-#define APR_ENOTDIR (APR_OS_START_CANONERR + 5)
-#endif
-
-/** @see APR_STATUS_IS_ENOSPC */
-#ifdef ENOSPC
-#define APR_ENOSPC ENOSPC
-#else
-#define APR_ENOSPC (APR_OS_START_CANONERR + 6)
-#endif
-
-/** @see APR_STATUS_IS_ENOMEM */
-#ifdef ENOMEM
-#define APR_ENOMEM ENOMEM
-#else
-#define APR_ENOMEM (APR_OS_START_CANONERR + 7)
-#endif
-
-/** @see APR_STATUS_IS_EMFILE */
-#ifdef EMFILE
-#define APR_EMFILE EMFILE
-#else
-#define APR_EMFILE (APR_OS_START_CANONERR + 8)
-#endif
-
-/** @see APR_STATUS_IS_ENFILE */
-#ifdef ENFILE
-#define APR_ENFILE ENFILE
-#else
-#define APR_ENFILE (APR_OS_START_CANONERR + 9)
-#endif
-
-/** @see APR_STATUS_IS_EBADF */
-#ifdef EBADF
-#define APR_EBADF EBADF
-#else
-#define APR_EBADF (APR_OS_START_CANONERR + 10)
-#endif
-
-/** @see APR_STATUS_IS_EINVAL */
-#ifdef EINVAL
-#define APR_EINVAL EINVAL
-#else
-#define APR_EINVAL (APR_OS_START_CANONERR + 11)
-#endif
-
-/** @see APR_STATUS_IS_ESPIPE */
-#ifdef ESPIPE
-#define APR_ESPIPE ESPIPE
-#else
-#define APR_ESPIPE (APR_OS_START_CANONERR + 12)
-#endif
-
-/**
- * @see APR_STATUS_IS_EAGAIN
- * @warning use APR_STATUS_IS_EAGAIN instead of just testing this value
- */
-#ifdef EAGAIN
-#define APR_EAGAIN EAGAIN
-#elif defined(EWOULDBLOCK)
-#define APR_EAGAIN EWOULDBLOCK
-#else
-#define APR_EAGAIN (APR_OS_START_CANONERR + 13)
-#endif
-
-/** @see APR_STATUS_IS_EINTR */
-#ifdef EINTR
-#define APR_EINTR EINTR
-#else
-#define APR_EINTR (APR_OS_START_CANONERR + 14)
-#endif
-
-/** @see APR_STATUS_IS_ENOTSOCK */
-#ifdef ENOTSOCK
-#define APR_ENOTSOCK ENOTSOCK
-#else
-#define APR_ENOTSOCK (APR_OS_START_CANONERR + 15)
-#endif
-
-/** @see APR_STATUS_IS_ECONNREFUSED */
-#ifdef ECONNREFUSED
-#define APR_ECONNREFUSED ECONNREFUSED
-#else
-#define APR_ECONNREFUSED (APR_OS_START_CANONERR + 16)
-#endif
-
-/** @see APR_STATUS_IS_EINPROGRESS */
-#ifdef EINPROGRESS
-#define APR_EINPROGRESS EINPROGRESS
-#else
-#define APR_EINPROGRESS (APR_OS_START_CANONERR + 17)
-#endif
-
-/**
- * @see APR_STATUS_IS_ECONNABORTED
- * @warning use APR_STATUS_IS_ECONNABORTED instead of just testing this value
- */
-
-#ifdef ECONNABORTED
-#define APR_ECONNABORTED ECONNABORTED
-#else
-#define APR_ECONNABORTED (APR_OS_START_CANONERR + 18)
-#endif
-
-/** @see APR_STATUS_IS_ECONNRESET */
-#ifdef ECONNRESET
-#define APR_ECONNRESET ECONNRESET
-#else
-#define APR_ECONNRESET (APR_OS_START_CANONERR + 19)
-#endif
-
-/** @see APR_STATUS_IS_ETIMEDOUT
- * @deprecated */
-#ifdef ETIMEDOUT
-#define APR_ETIMEDOUT ETIMEDOUT
-#else
-#define APR_ETIMEDOUT (APR_OS_START_CANONERR + 20)
-#endif
-
-/** @see APR_STATUS_IS_EHOSTUNREACH */
-#ifdef EHOSTUNREACH
-#define APR_EHOSTUNREACH EHOSTUNREACH
-#else
-#define APR_EHOSTUNREACH (APR_OS_START_CANONERR + 21)
-#endif
-
-/** @see APR_STATUS_IS_ENETUNREACH */
-#ifdef ENETUNREACH
-#define APR_ENETUNREACH ENETUNREACH
-#else
-#define APR_ENETUNREACH (APR_OS_START_CANONERR + 22)
-#endif
-
-/** @see APR_STATUS_IS_EFTYPE */
-#ifdef EFTYPE
-#define APR_EFTYPE EFTYPE
-#else
-#define APR_EFTYPE (APR_OS_START_CANONERR + 23)
-#endif
-
-/** @see APR_STATUS_IS_EPIPE */
-#ifdef EPIPE
-#define APR_EPIPE EPIPE
-#else
-#define APR_EPIPE (APR_OS_START_CANONERR + 24)
-#endif
-
-/** @see APR_STATUS_IS_EXDEV */
-#ifdef EXDEV
-#define APR_EXDEV EXDEV
-#else
-#define APR_EXDEV (APR_OS_START_CANONERR + 25)
-#endif
-
-/** @see APR_STATUS_IS_ENOTEMPTY */
-#ifdef ENOTEMPTY
-#define APR_ENOTEMPTY ENOTEMPTY
-#else
-#define APR_ENOTEMPTY (APR_OS_START_CANONERR + 26)
-#endif
-
-/** @} */
-
-#if defined(OS2) && !defined(DOXYGEN)
-
-#define APR_FROM_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e + APR_OS_START_SYSERR)
-#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
-
-#define INCL_DOSERRORS
-#define INCL_DOS
-
-/* Leave these undefined.
- * OS2 doesn't rely on the errno concept.
- * The API calls always return a result codes which
- * should be filtered through APR_FROM_OS_ERROR().
- *
- * #define apr_get_os_error() (APR_FROM_OS_ERROR(GetLastError()))
- * #define apr_set_os_error(e) (SetLastError(APR_TO_OS_ERROR(e)))
- */
-
-/* A special case, only socket calls require this;
- */
-#define apr_get_netos_error() (APR_FROM_OS_ERROR(errno))
-#define apr_set_netos_error(e) (errno = APR_TO_OS_ERROR(e))
-
-/* And this needs to be greped away for good:
- */
-#define APR_OS2_STATUS(e) (APR_FROM_OS_ERROR(e))
-
-/* These can't sit in a private header, so in spite of the extra size,
- * they need to be made available here.
- */
-#define SOCBASEERR 10000
-#define SOCEPERM (SOCBASEERR+1) /* Not owner */
-#define SOCESRCH (SOCBASEERR+3) /* No such process */
-#define SOCEINTR (SOCBASEERR+4) /* Interrupted system call */
-#define SOCENXIO (SOCBASEERR+6) /* No such device or address */
-#define SOCEBADF (SOCBASEERR+9) /* Bad file number */
-#define SOCEACCES (SOCBASEERR+13) /* Permission denied */
-#define SOCEFAULT (SOCBASEERR+14) /* Bad address */
-#define SOCEINVAL (SOCBASEERR+22) /* Invalid argument */
-#define SOCEMFILE (SOCBASEERR+24) /* Too many open files */
-#define SOCEPIPE (SOCBASEERR+32) /* Broken pipe */
-#define SOCEOS2ERR (SOCBASEERR+100) /* OS/2 Error */
-#define SOCEWOULDBLOCK (SOCBASEERR+35) /* Operation would block */
-#define SOCEINPROGRESS (SOCBASEERR+36) /* Operation now in progress */
-#define SOCEALREADY (SOCBASEERR+37) /* Operation already in progress */
-#define SOCENOTSOCK (SOCBASEERR+38) /* Socket operation on non-socket */
-#define SOCEDESTADDRREQ (SOCBASEERR+39) /* Destination address required */
-#define SOCEMSGSIZE (SOCBASEERR+40) /* Message too long */
-#define SOCEPROTOTYPE (SOCBASEERR+41) /* Protocol wrong type for socket */
-#define SOCENOPROTOOPT (SOCBASEERR+42) /* Protocol not available */
-#define SOCEPROTONOSUPPORT (SOCBASEERR+43) /* Protocol not supported */
-#define SOCESOCKTNOSUPPORT (SOCBASEERR+44) /* Socket type not supported */
-#define SOCEOPNOTSUPP (SOCBASEERR+45) /* Operation not supported on socket */
-#define SOCEPFNOSUPPORT (SOCBASEERR+46) /* Protocol family not supported */
-#define SOCEAFNOSUPPORT (SOCBASEERR+47) /* Address family not supported by protocol family */
-#define SOCEADDRINUSE (SOCBASEERR+48) /* Address already in use */
-#define SOCEADDRNOTAVAIL (SOCBASEERR+49) /* Can't assign requested address */
-#define SOCENETDOWN (SOCBASEERR+50) /* Network is down */
-#define SOCENETUNREACH (SOCBASEERR+51) /* Network is unreachable */
-#define SOCENETRESET (SOCBASEERR+52) /* Network dropped connection on reset */
-#define SOCECONNABORTED (SOCBASEERR+53) /* Software caused connection abort */
-#define SOCECONNRESET (SOCBASEERR+54) /* Connection reset by peer */
-#define SOCENOBUFS (SOCBASEERR+55) /* No buffer space available */
-#define SOCEISCONN (SOCBASEERR+56) /* Socket is already connected */
-#define SOCENOTCONN (SOCBASEERR+57) /* Socket is not connected */
-#define SOCESHUTDOWN (SOCBASEERR+58) /* Can't send after socket shutdown */
-#define SOCETOOMANYREFS (SOCBASEERR+59) /* Too many references: can't splice */
-#define SOCETIMEDOUT (SOCBASEERR+60) /* Connection timed out */
-#define SOCECONNREFUSED (SOCBASEERR+61) /* Connection refused */
-#define SOCELOOP (SOCBASEERR+62) /* Too many levels of symbolic links */
-#define SOCENAMETOOLONG (SOCBASEERR+63) /* File name too long */
-#define SOCEHOSTDOWN (SOCBASEERR+64) /* Host is down */
-#define SOCEHOSTUNREACH (SOCBASEERR+65) /* No route to host */
-#define SOCENOTEMPTY (SOCBASEERR+66) /* Directory not empty */
-
-/* APR CANONICAL ERROR TESTS */
-#define APR_STATUS_IS_EACCES(s) ((s) == APR_EACCES \
- || (s) == APR_OS_START_SYSERR + ERROR_ACCESS_DENIED \
- || (s) == APR_OS_START_SYSERR + ERROR_SHARING_VIOLATION)
-#define APR_STATUS_IS_EEXIST(s) ((s) == APR_EEXIST \
- || (s) == APR_OS_START_SYSERR + ERROR_OPEN_FAILED \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_EXISTS \
- || (s) == APR_OS_START_SYSERR + ERROR_ALREADY_EXISTS \
- || (s) == APR_OS_START_SYSERR + ERROR_ACCESS_DENIED)
-#define APR_STATUS_IS_ENAMETOOLONG(s) ((s) == APR_ENAMETOOLONG \
- || (s) == APR_OS_START_SYSERR + ERROR_FILENAME_EXCED_RANGE \
- || (s) == APR_OS_START_SYSERR + SOCENAMETOOLONG)
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_PATH_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_MORE_FILES \
- || (s) == APR_OS_START_SYSERR + ERROR_OPEN_FAILED)
-#define APR_STATUS_IS_ENOTDIR(s) ((s) == APR_ENOTDIR)
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC \
- || (s) == APR_OS_START_SYSERR + ERROR_DISK_FULL)
-#define APR_STATUS_IS_ENOMEM(s) ((s) == APR_ENOMEM)
-#define APR_STATUS_IS_EMFILE(s) ((s) == APR_EMFILE \
- || (s) == APR_OS_START_SYSERR + ERROR_TOO_MANY_OPEN_FILES)
-#define APR_STATUS_IS_ENFILE(s) ((s) == APR_ENFILE)
-#define APR_STATUS_IS_EBADF(s) ((s) == APR_EBADF \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_HANDLE)
-#define APR_STATUS_IS_EINVAL(s) ((s) == APR_EINVAL \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_PARAMETER \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_FUNCTION)
-#define APR_STATUS_IS_ESPIPE(s) ((s) == APR_ESPIPE \
- || (s) == APR_OS_START_SYSERR + ERROR_NEGATIVE_SEEK)
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_DATA \
- || (s) == APR_OS_START_SYSERR + SOCEWOULDBLOCK \
- || (s) == APR_OS_START_SYSERR + ERROR_LOCK_VIOLATION)
-#define APR_STATUS_IS_EINTR(s) ((s) == APR_EINTR \
- || (s) == APR_OS_START_SYSERR + SOCEINTR)
-#define APR_STATUS_IS_ENOTSOCK(s) ((s) == APR_ENOTSOCK \
- || (s) == APR_OS_START_SYSERR + SOCENOTSOCK)
-#define APR_STATUS_IS_ECONNREFUSED(s) ((s) == APR_ECONNREFUSED \
- || (s) == APR_OS_START_SYSERR + SOCECONNREFUSED)
-#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS \
- || (s) == APR_OS_START_SYSERR + SOCEINPROGRESS)
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED \
- || (s) == APR_OS_START_SYSERR + SOCECONNABORTED)
-#define APR_STATUS_IS_ECONNRESET(s) ((s) == APR_ECONNRESET \
- || (s) == APR_OS_START_SYSERR + SOCECONNRESET)
-/* XXX deprecated */
-#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + SOCETIMEDOUT)
-#undef APR_STATUS_IS_TIMEUP
-#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP \
- || (s) == APR_OS_START_SYSERR + SOCETIMEDOUT)
-#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH \
- || (s) == APR_OS_START_SYSERR + SOCEHOSTUNREACH)
-#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH \
- || (s) == APR_OS_START_SYSERR + SOCENETUNREACH)
-#define APR_STATUS_IS_EFTYPE(s) ((s) == APR_EFTYPE)
-#define APR_STATUS_IS_EPIPE(s) ((s) == APR_EPIPE \
- || (s) == APR_OS_START_SYSERR + ERROR_BROKEN_PIPE \
- || (s) == APR_OS_START_SYSERR + SOCEPIPE)
-#define APR_STATUS_IS_EXDEV(s) ((s) == APR_EXDEV \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_SAME_DEVICE)
-#define APR_STATUS_IS_ENOTEMPTY(s) ((s) == APR_ENOTEMPTY \
- || (s) == APR_OS_START_SYSERR + ERROR_DIR_NOT_EMPTY \
- || (s) == APR_OS_START_SYSERR + ERROR_ACCESS_DENIED)
-
-/*
- Sorry, too tired to wrap this up for OS2... feel free to
- fit the following into their best matches.
-
- { ERROR_NO_SIGNAL_SENT, ESRCH },
- { SOCEALREADY, EALREADY },
- { SOCEDESTADDRREQ, EDESTADDRREQ },
- { SOCEMSGSIZE, EMSGSIZE },
- { SOCEPROTOTYPE, EPROTOTYPE },
- { SOCENOPROTOOPT, ENOPROTOOPT },
- { SOCEPROTONOSUPPORT, EPROTONOSUPPORT },
- { SOCESOCKTNOSUPPORT, ESOCKTNOSUPPORT },
- { SOCEOPNOTSUPP, EOPNOTSUPP },
- { SOCEPFNOSUPPORT, EPFNOSUPPORT },
- { SOCEAFNOSUPPORT, EAFNOSUPPORT },
- { SOCEADDRINUSE, EADDRINUSE },
- { SOCEADDRNOTAVAIL, EADDRNOTAVAIL },
- { SOCENETDOWN, ENETDOWN },
- { SOCENETRESET, ENETRESET },
- { SOCENOBUFS, ENOBUFS },
- { SOCEISCONN, EISCONN },
- { SOCENOTCONN, ENOTCONN },
- { SOCESHUTDOWN, ESHUTDOWN },
- { SOCETOOMANYREFS, ETOOMANYREFS },
- { SOCELOOP, ELOOP },
- { SOCEHOSTDOWN, EHOSTDOWN },
- { SOCENOTEMPTY, ENOTEMPTY },
- { SOCEPIPE, EPIPE }
-*/
-
-#elif defined(WIN32) && !defined(DOXYGEN) /* !defined(OS2) */
-
-#define APR_FROM_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e + APR_OS_START_SYSERR)
-#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
-
-#define apr_get_os_error() (APR_FROM_OS_ERROR(GetLastError()))
-#define apr_set_os_error(e) (SetLastError(APR_TO_OS_ERROR(e)))
-
-/* A special case, only socket calls require this:
- */
-#define apr_get_netos_error() (APR_FROM_OS_ERROR(WSAGetLastError()))
-#define apr_set_netos_error(e) (WSASetLastError(APR_TO_OS_ERROR(e)))
-
-/* APR CANONICAL ERROR TESTS */
-#define APR_STATUS_IS_EACCES(s) ((s) == APR_EACCES \
- || (s) == APR_OS_START_SYSERR + ERROR_ACCESS_DENIED \
- || (s) == APR_OS_START_SYSERR + ERROR_CANNOT_MAKE \
- || (s) == APR_OS_START_SYSERR + ERROR_CURRENT_DIRECTORY \
- || (s) == APR_OS_START_SYSERR + ERROR_DRIVE_LOCKED \
- || (s) == APR_OS_START_SYSERR + ERROR_FAIL_I24 \
- || (s) == APR_OS_START_SYSERR + ERROR_LOCK_VIOLATION \
- || (s) == APR_OS_START_SYSERR + ERROR_LOCK_FAILED \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_LOCKED \
- || (s) == APR_OS_START_SYSERR + ERROR_NETWORK_ACCESS_DENIED \
- || (s) == APR_OS_START_SYSERR + ERROR_SHARING_VIOLATION)
-#define APR_STATUS_IS_EEXIST(s) ((s) == APR_EEXIST \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_EXISTS \
- || (s) == APR_OS_START_SYSERR + ERROR_ALREADY_EXISTS)
-#define APR_STATUS_IS_ENAMETOOLONG(s) ((s) == APR_ENAMETOOLONG \
- || (s) == APR_OS_START_SYSERR + ERROR_FILENAME_EXCED_RANGE \
- || (s) == APR_OS_START_SYSERR + WSAENAMETOOLONG)
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_PATH_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_OPEN_FAILED \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_MORE_FILES)
-#define APR_STATUS_IS_ENOTDIR(s) ((s) == APR_ENOTDIR \
- || (s) == APR_OS_START_SYSERR + ERROR_PATH_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_NETPATH \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_NET_NAME \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_PATHNAME \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_DRIVE)
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC \
- || (s) == APR_OS_START_SYSERR + ERROR_DISK_FULL)
-#define APR_STATUS_IS_ENOMEM(s) ((s) == APR_ENOMEM \
- || (s) == APR_OS_START_SYSERR + ERROR_ARENA_TRASHED \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_ENOUGH_MEMORY \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_BLOCK \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_ENOUGH_QUOTA \
- || (s) == APR_OS_START_SYSERR + ERROR_OUTOFMEMORY)
-#define APR_STATUS_IS_EMFILE(s) ((s) == APR_EMFILE \
- || (s) == APR_OS_START_SYSERR + ERROR_TOO_MANY_OPEN_FILES)
-#define APR_STATUS_IS_ENFILE(s) ((s) == APR_ENFILE)
-#define APR_STATUS_IS_EBADF(s) ((s) == APR_EBADF \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_HANDLE \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_TARGET_HANDLE)
-#define APR_STATUS_IS_EINVAL(s) ((s) == APR_EINVAL \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_ACCESS \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_DATA \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_FUNCTION \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_HANDLE \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_PARAMETER \
- || (s) == APR_OS_START_SYSERR + ERROR_NEGATIVE_SEEK)
-#define APR_STATUS_IS_ESPIPE(s) ((s) == APR_ESPIPE \
- || (s) == APR_OS_START_SYSERR + ERROR_SEEK_ON_DEVICE \
- || (s) == APR_OS_START_SYSERR + ERROR_NEGATIVE_SEEK)
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_DATA \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_PROC_SLOTS \
- || (s) == APR_OS_START_SYSERR + ERROR_NESTING_NOT_ALLOWED \
- || (s) == APR_OS_START_SYSERR + ERROR_MAX_THRDS_REACHED \
- || (s) == APR_OS_START_SYSERR + ERROR_LOCK_VIOLATION \
- || (s) == APR_OS_START_SYSERR + WSAEWOULDBLOCK)
-#define APR_STATUS_IS_EINTR(s) ((s) == APR_EINTR \
- || (s) == APR_OS_START_SYSERR + WSAEINTR)
-#define APR_STATUS_IS_ENOTSOCK(s) ((s) == APR_ENOTSOCK \
- || (s) == APR_OS_START_SYSERR + WSAENOTSOCK)
-#define APR_STATUS_IS_ECONNREFUSED(s) ((s) == APR_ECONNREFUSED \
- || (s) == APR_OS_START_SYSERR + WSAECONNREFUSED)
-#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS \
- || (s) == APR_OS_START_SYSERR + WSAEINPROGRESS)
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED \
- || (s) == APR_OS_START_SYSERR + WSAECONNABORTED)
-#define APR_STATUS_IS_ECONNRESET(s) ((s) == APR_ECONNRESET \
- || (s) == APR_OS_START_SYSERR + ERROR_NETNAME_DELETED \
- || (s) == APR_OS_START_SYSERR + WSAECONNRESET)
-/* XXX deprecated */
-#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WSAETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WAIT_TIMEOUT)
-#undef APR_STATUS_IS_TIMEUP
-#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP \
- || (s) == APR_OS_START_SYSERR + WSAETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WAIT_TIMEOUT)
-#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH \
- || (s) == APR_OS_START_SYSERR + WSAEHOSTUNREACH)
-#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH \
- || (s) == APR_OS_START_SYSERR + WSAENETUNREACH)
-#define APR_STATUS_IS_EFTYPE(s) ((s) == APR_EFTYPE \
- || (s) == APR_OS_START_SYSERR + ERROR_EXE_MACHINE_TYPE_MISMATCH \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_DLL \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_MODULETYPE \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_EXE_FORMAT \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_EXE_SIGNATURE \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_CORRUPT \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_FORMAT)
-#define APR_STATUS_IS_EPIPE(s) ((s) == APR_EPIPE \
- || (s) == APR_OS_START_SYSERR + ERROR_BROKEN_PIPE)
-#define APR_STATUS_IS_EXDEV(s) ((s) == APR_EXDEV \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_SAME_DEVICE)
-#define APR_STATUS_IS_ENOTEMPTY(s) ((s) == APR_ENOTEMPTY \
- || (s) == APR_OS_START_SYSERR + ERROR_DIR_NOT_EMPTY)
-
-#elif defined(NETWARE) && defined(USE_WINSOCK) && !defined(DOXYGEN) /* !defined(OS2) && !defined(WIN32) */
-
-#define APR_FROM_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e + APR_OS_START_SYSERR)
-#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
-
-#define apr_get_os_error() (errno)
-#define apr_set_os_error(e) (errno = (e))
-
-/* A special case, only socket calls require this: */
-#define apr_get_netos_error() (APR_FROM_OS_ERROR(WSAGetLastError()))
-#define apr_set_netos_error(e) (WSASetLastError(APR_TO_OS_ERROR(e)))
-
-/* APR CANONICAL ERROR TESTS */
-#define APR_STATUS_IS_EACCES(s) ((s) == APR_EACCES)
-#define APR_STATUS_IS_EEXIST(s) ((s) == APR_EEXIST)
-#define APR_STATUS_IS_ENAMETOOLONG(s) ((s) == APR_ENAMETOOLONG)
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT)
-#define APR_STATUS_IS_ENOTDIR(s) ((s) == APR_ENOTDIR)
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC)
-#define APR_STATUS_IS_ENOMEM(s) ((s) == APR_ENOMEM)
-#define APR_STATUS_IS_EMFILE(s) ((s) == APR_EMFILE)
-#define APR_STATUS_IS_ENFILE(s) ((s) == APR_ENFILE)
-#define APR_STATUS_IS_EBADF(s) ((s) == APR_EBADF)
-#define APR_STATUS_IS_EINVAL(s) ((s) == APR_EINVAL)
-#define APR_STATUS_IS_ESPIPE(s) ((s) == APR_ESPIPE)
-
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN \
- || (s) == EWOULDBLOCK \
- || (s) == APR_OS_START_SYSERR + WSAEWOULDBLOCK)
-#define APR_STATUS_IS_EINTR(s) ((s) == APR_EINTR \
- || (s) == APR_OS_START_SYSERR + WSAEINTR)
-#define APR_STATUS_IS_ENOTSOCK(s) ((s) == APR_ENOTSOCK \
- || (s) == APR_OS_START_SYSERR + WSAENOTSOCK)
-#define APR_STATUS_IS_ECONNREFUSED(s) ((s) == APR_ECONNREFUSED \
- || (s) == APR_OS_START_SYSERR + WSAECONNREFUSED)
-#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS \
- || (s) == APR_OS_START_SYSERR + WSAEINPROGRESS)
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED \
- || (s) == APR_OS_START_SYSERR + WSAECONNABORTED)
-#define APR_STATUS_IS_ECONNRESET(s) ((s) == APR_ECONNRESET \
- || (s) == APR_OS_START_SYSERR + WSAECONNRESET)
-/* XXX deprecated */
-#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WSAETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WAIT_TIMEOUT)
-#undef APR_STATUS_IS_TIMEUP
-#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP \
- || (s) == APR_OS_START_SYSERR + WSAETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WAIT_TIMEOUT)
-#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH \
- || (s) == APR_OS_START_SYSERR + WSAEHOSTUNREACH)
-#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH \
- || (s) == APR_OS_START_SYSERR + WSAENETUNREACH)
-#define APR_STATUS_IS_ENETDOWN(s) ((s) == APR_OS_START_SYSERR + WSAENETDOWN)
-#define APR_STATUS_IS_EFTYPE(s) ((s) == APR_EFTYPE)
-#define APR_STATUS_IS_EPIPE(s) ((s) == APR_EPIPE)
-#define APR_STATUS_IS_EXDEV(s) ((s) == APR_EXDEV)
-#define APR_STATUS_IS_ENOTEMPTY(s) ((s) == APR_ENOTEMPTY)
-
-#else /* !defined(NETWARE) && !defined(OS2) && !defined(WIN32) */
-
-/*
- * os error codes are clib error codes
- */
-#define APR_FROM_OS_ERROR(e) (e)
-#define APR_TO_OS_ERROR(e) (e)
-
-#define apr_get_os_error() (errno)
-#define apr_set_os_error(e) (errno = (e))
-
-/* A special case, only socket calls require this:
- */
-#define apr_get_netos_error() (errno)
-#define apr_set_netos_error(e) (errno = (e))
-
-/**
- * @addtogroup APR_STATUS_IS
- * @{
- */
-
-/** permission denied */
-#define APR_STATUS_IS_EACCES(s) ((s) == APR_EACCES)
-/** file exists */
-#define APR_STATUS_IS_EEXIST(s) ((s) == APR_EEXIST)
-/** path name is too long */
-#define APR_STATUS_IS_ENAMETOOLONG(s) ((s) == APR_ENAMETOOLONG)
-/**
- * no such file or directory
- * @remark
- * EMVSCATLG can be returned by the automounter on z/OS for
- * paths which do not exist.
- */
-#ifdef EMVSCATLG
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT \
- || (s) == EMVSCATLG)
-#else
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT)
-#endif
-/** not a directory */
-#define APR_STATUS_IS_ENOTDIR(s) ((s) == APR_ENOTDIR)
-/** no space left on device */
-#ifdef EDQUOT
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC \
- || (s) == EDQUOT)
-#else
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC)
-#endif
-/** not enough memory */
-#define APR_STATUS_IS_ENOMEM(s) ((s) == APR_ENOMEM)
-/** too many open files */
-#define APR_STATUS_IS_EMFILE(s) ((s) == APR_EMFILE)
-/** file table overflow */
-#define APR_STATUS_IS_ENFILE(s) ((s) == APR_ENFILE)
-/** bad file # */
-#define APR_STATUS_IS_EBADF(s) ((s) == APR_EBADF)
-/** invalid argument */
-#define APR_STATUS_IS_EINVAL(s) ((s) == APR_EINVAL)
-/** illegal seek */
-#define APR_STATUS_IS_ESPIPE(s) ((s) == APR_ESPIPE)
-
-/** operation would block */
-#if !defined(EWOULDBLOCK) || !defined(EAGAIN)
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN)
-#elif (EWOULDBLOCK == EAGAIN)
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN)
-#else
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN \
- || (s) == EWOULDBLOCK)
-#endif
-
-/** interrupted system call */
-#define APR_STATUS_IS_EINTR(s) ((s) == APR_EINTR)
-/** socket operation on a non-socket */
-#define APR_STATUS_IS_ENOTSOCK(s) ((s) == APR_ENOTSOCK)
-/** Connection Refused */
-#define APR_STATUS_IS_ECONNREFUSED(s) ((s) == APR_ECONNREFUSED)
-/** operation now in progress */
-#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS)
-
-/**
- * Software caused connection abort
- * @remark
- * EPROTO on certain older kernels really means ECONNABORTED, so we need to
- * ignore it for them. See discussion in new-httpd archives nh.9701 & nh.9603
- *
- * There is potentially a bug in Solaris 2.x x<6, and other boxes that
- * implement tcp sockets in userland (i.e. on top of STREAMS). On these
- * systems, EPROTO can actually result in a fatal loop. See PR#981 for
- * example. It's hard to handle both uses of EPROTO.
- */
-#ifdef EPROTO
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED \
- || (s) == EPROTO)
-#else
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED)
-#endif
-
-/** Connection Reset by peer */
-#define APR_STATUS_IS_ECONNRESET(s) ((s) == APR_ECONNRESET)
-/** Operation timed out
- * @deprecated */
-#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT)
-/** no route to host */
-#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH)
-/** network is unreachable */
-#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH)
-/** inappropiate file type or format */
-#define APR_STATUS_IS_EFTYPE(s) ((s) == APR_EFTYPE)
-/** broken pipe */
-#define APR_STATUS_IS_EPIPE(s) ((s) == APR_EPIPE)
-/** cross device link */
-#define APR_STATUS_IS_EXDEV(s) ((s) == APR_EXDEV)
-/** Directory Not Empty */
-#define APR_STATUS_IS_ENOTEMPTY(s) ((s) == APR_ENOTEMPTY || \
- (s) == APR_EEXIST)
-/** @} */
-
-#endif /* !defined(NETWARE) && !defined(OS2) && !defined(WIN32) */
-
-/** @} */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ! APR_ERRNO_H */
-#ifndef _LINUX_ERRNO_H
-#define _LINUX_ERRNO_H
-
-#include <asm/errno.h>
-
-#ifdef __KERNEL__
-
-/* Should never be seen by user programs */
-#define ERESTARTSYS 512
-#define ERESTARTNOINTR 513
-#define ERESTARTNOHAND 514 /* restart if no handler.. */
-#define ENOIOCTLCMD 515 /* No ioctl command */
-#define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */
-
-/* Defined for the NFSv3 protocol */
-#define EBADHANDLE 521 /* Illegal NFS file handle */
-#define ENOTSYNC 522 /* Update synchronization mismatch */
-#define EBADCOOKIE 523 /* Cookie is stale */
-#define ENOTSUPP 524 /* Operation is not supported */
-#define ETOOSMALL 525 /* Buffer or request is too small */
-#define ESERVERFAULT 526 /* An untranslatable error occurred */
-#define EBADTYPE 527 /* Type not supported by server */
-#define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */
-#define EIOCBQUEUED 529 /* iocb queued, will get completion event */
-#define EIOCBRETRY 530 /* iocb queued, will trigger a retry */
-
-#endif
-
-#endif
-// Copyright (c) 1994 James Clark
-// See the file COPYING for copying permission.
-
-#ifndef ErrnoMessageArg_INCLUDED
-#define ErrnoMessageArg_INCLUDED 1
-
-#include "MessageArg.h"
-#include "rtti.h"
-
-#ifdef SP_NAMESPACE
-namespace SP_NAMESPACE {
-#endif
-
-class SP_API ErrnoMessageArg : public OtherMessageArg {
- RTTI_CLASS
-public:
- ErrnoMessageArg(int errnum) : errno_(errnum) { }
- MessageArg *copy() const;
- // errno might be a macro so we must use a different name
- int errnum() const;
-private:
- int errno_;
-};
-
-inline
-int ErrnoMessageArg::errnum() const
-{
- return errno_;
-}
-
-#ifdef SP_NAMESPACE
-}
-#endif
-
-#endif /* not ErrnoMessageArg_INCLUDED */
-/* Copyright (C) 1991,92,93,94,95,96,97,2002 Free Software Foundation, Inc.
- This file is part of the GNU C Library.
-
- The GNU C Library is free software; you can redistribute it and/or
- modify it under the terms of the GNU Lesser General Public
- License as published by the Free Software Foundation; either
- version 2.1 of the License, or (at your option) any later version.
-
- The GNU C Library is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- Lesser General Public License for more details.
-
- You should have received a copy of the GNU Lesser General Public
- License along with the GNU C Library; if not, write to the Free
- Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
- 02111-1307 USA. */
-
-/*
- * ISO C99 Standard: 7.5 Errors <errno.h>
- */
-
-#ifndef _ERRNO_H
-
-/* The includer defined __need_Emath if he wants only the definitions
- of EDOM and ERANGE, and not everything else. */
-#ifndef __need_Emath
-# define _ERRNO_H 1
-# include <features.h>
-#endif
-
-__BEGIN_DECLS
-
-/* Get the error number constants from the system-specific file.
- This file will test __need_Emath and _ERRNO_H. */
-#include <bits/errno.h>
-#undef __need_Emath
-
-#ifdef _ERRNO_H
-
-/* Declare the `errno' variable, unless it's defined as a macro by
- bits/errno.h. This is the case in GNU, where it is a per-thread
- variable. This redeclaration using the macro still works, but it
- will be a function declaration without a prototype and may trigger
- a -Wstrict-prototypes warning. */
-#ifndef errno
-extern int errno;
-#endif
-
-#ifdef __USE_GNU
-
-/* The full and simple forms of the name with which the program was
- invoked. These variables are set up automatically at startup based on
- the value of ARGV[0] (this works only if you use GNU ld). */
-extern char *program_invocation_name, *program_invocation_short_name;
-#endif /* __USE_GNU */
-#endif /* _ERRNO_H */
-
-__END_DECLS
-
-#endif /* _ERRNO_H */
-
-/* The Hurd <bits/errno.h> defines `error_t' as an enumerated type so
- that printing `error_t' values in the debugger shows the names. We
- might need this definition sometimes even if this file was included
- before. */
-#if defined __USE_GNU || defined __need_error_t
-# ifndef __error_t_defined
-typedef int error_t;
-# define __error_t_defined 1
-# endif
-# undef __need_error_t
-#endif
-#ifndef _I386_ERRNO_H
-#define _I386_ERRNO_H
-
-#include <asm-generic/errno.h>
-
-#endif
-#ifndef _ASM_GENERIC_ERRNO_BASE_H
-#define _ASM_GENERIC_ERRNO_BASE_H
-
-#define EPERM 1 /* Operation not permitted */
-#define ENOENT 2 /* No such file or directory */
-#define ESRCH 3 /* No such process */
-#define EINTR 4 /* Interrupted system call */
-#define EIO 5 /* I/O error */
-#define ENXIO 6 /* No such device or address */
-#define E2BIG 7 /* Argument list too long */
-#define ENOEXEC 8 /* Exec format error */
-#define EBADF 9 /* Bad file number */
-#define ECHILD 10 /* No child processes */
-#define EAGAIN 11 /* Try again */
-#define ENOMEM 12 /* Out of memory */
-#define EACCES 13 /* Permission denied */
-#define EFAULT 14 /* Bad address */
-#define ENOTBLK 15 /* Block device required */
-#define EBUSY 16 /* Device or resource busy */
-#define EEXIST 17 /* File exists */
-#define EXDEV 18 /* Cross-device link */
-#define ENODEV 19 /* No such device */
-#define ENOTDIR 20 /* Not a directory */
-#define EISDIR 21 /* Is a directory */
-#define EINVAL 22 /* Invalid argument */
-#define ENFILE 23 /* File table overflow */
-#define EMFILE 24 /* Too many open files */
-#define ENOTTY 25 /* Not a typewriter */
-#define ETXTBSY 26 /* Text file busy */
-#define EFBIG 27 /* File too large */
-#define ENOSPC 28 /* No space left on device */
-#define ESPIPE 29 /* Illegal seek */
-#define EROFS 30 /* Read-only file system */
-#define EMLINK 31 /* Too many links */
-#define EPIPE 32 /* Broken pipe */
-#define EDOM 33 /* Math argument out of domain of func */
-#define ERANGE 34 /* Math result not representable */
-
-#endif
-#ifndef _ASM_GENERIC_ERRNO_H
-#define _ASM_GENERIC_ERRNO_H
-
-#include <asm-generic/errno-base.h>
-
-#define EDEADLK 35 /* Resource deadlock would occur */
-#define ENAMETOOLONG 36 /* File name too long */
-#define ENOLCK 37 /* No record locks available */
-#define ENOSYS 38 /* Function not implemented */
-#define ENOTEMPTY 39 /* Directory not empty */
-#define ELOOP 40 /* Too many symbolic links encountered */
-#define EWOULDBLOCK EAGAIN /* Operation would block */
-#define ENOMSG 42 /* No message of desired type */
-#define EIDRM 43 /* Identifier removed */
-#define ECHRNG 44 /* Channel number out of range */
-#define EL2NSYNC 45 /* Level 2 not synchronized */
-#define EL3HLT 46 /* Level 3 halted */
-#define EL3RST 47 /* Level 3 reset */
-#define ELNRNG 48 /* Link number out of range */
-#define EUNATCH 49 /* Protocol driver not attached */
-#define ENOCSI 50 /* No CSI structure available */
-#define EL2HLT 51 /* Level 2 halted */
-#define EBADE 52 /* Invalid exchange */
-#define EBADR 53 /* Invalid request descriptor */
-#define EXFULL 54 /* Exchange full */
-#define ENOANO 55 /* No anode */
-#define EBADRQC 56 /* Invalid request code */
-#define EBADSLT 57 /* Invalid slot */
-
-#define EDEADLOCK EDEADLK
-
-#define EBFONT 59 /* Bad font file format */
-#define ENOSTR 60 /* Device not a stream */
-#define ENODATA 61 /* No data available */
-#define ETIME 62 /* Timer expired */
-#define ENOSR 63 /* Out of streams resources */
-#define ENONET 64 /* Machine is not on the network */
-#define ENOPKG 65 /* Package not installed */
-#define EREMOTE 66 /* Object is remote */
-#define ENOLINK 67 /* Link has been severed */
-#define EADV 68 /* Advertise error */
-#define ESRMNT 69 /* Srmount error */
-#define ECOMM 70 /* Communication error on send */
-#define EPROTO 71 /* Protocol error */
-#define EMULTIHOP 72 /* Multihop attempted */
-#define EDOTDOT 73 /* RFS specific error */
-#define EBADMSG 74 /* Not a data message */
-#define EOVERFLOW 75 /* Value too large for defined data type */
-#define ENOTUNIQ 76 /* Name not unique on network */
-#define EBADFD 77 /* File descriptor in bad state */
-#define EREMCHG 78 /* Remote address changed */
-#define ELIBACC 79 /* Can not access a needed shared library */
-#define ELIBBAD 80 /* Accessing a corrupted shared library */
-#define ELIBSCN 81 /* .lib section in a.out corrupted */
-#define ELIBMAX 82 /* Attempting to link in too many shared libraries */
-#define ELIBEXEC 83 /* Cannot exec a shared library directly */
-#define EILSEQ 84 /* Illegal byte sequence */
-#define ERESTART 85 /* Interrupted system call should be restarted */
-#define ESTRPIPE 86 /* Streams pipe error */
-#define EUSERS 87 /* Too many users */
-#define ENOTSOCK 88 /* Socket operation on non-socket */
-#define EDESTADDRREQ 89 /* Destination address required */
-#define EMSGSIZE 90 /* Message too long */
-#define EPROTOTYPE 91 /* Protocol wrong type for socket */
-#define ENOPROTOOPT 92 /* Protocol not available */
-#define EPROTONOSUPPORT 93 /* Protocol not supported */
-#define ESOCKTNOSUPPORT 94 /* Socket type not supported */
-#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */
-#define EPFNOSUPPORT 96 /* Protocol family not supported */
-#define EAFNOSUPPORT 97 /* Address family not supported by protocol */
-#define EADDRINUSE 98 /* Address already in use */
-#define EADDRNOTAVAIL 99 /* Cannot assign requested address */
-#define ENETDOWN 100 /* Network is down */
-#define ENETUNREACH 101 /* Network is unreachable */
-#define ENETRESET 102 /* Network dropped connection because of reset */
-#define ECONNABORTED 103 /* Software caused connection abort */
-#define ECONNRESET 104 /* Connection reset by peer */
-#define ENOBUFS 105 /* No buffer space available */
-#define EISCONN 106 /* Transport endpoint is already connected */
-#define ENOTCONN 107 /* Transport endpoint is not connected */
-#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */
-#define ETOOMANYREFS 109 /* Too many references: cannot splice */
-#define ETIMEDOUT 110 /* Connection timed out */
-#define ECONNREFUSED 111 /* Connection refused */
-#define EHOSTDOWN 112 /* Host is down */
-#define EHOSTUNREACH 113 /* No route to host */
-#define EALREADY 114 /* Operation already in progress */
-#define EINPROGRESS 115 /* Operation now in progress */
-#define ESTALE 116 /* Stale NFS file handle */
-#define EUCLEAN 117 /* Structure needs cleaning */
-#define ENOTNAM 118 /* Not a XENIX named type file */
-#define ENAVAIL 119 /* No XENIX semaphores available */
-#define EISNAM 120 /* Is a named type file */
-#define EREMOTEIO 121 /* Remote I/O error */
-#define EDQUOT 122 /* Quota exceeded */
-
-#define ENOMEDIUM 123 /* No medium found */
-#define EMEDIUMTYPE 124 /* Wrong medium type */
-#define ECANCELED 125 /* Operation Canceled */
-#define ENOKEY 126 /* Required key not available */
-#define EKEYEXPIRED 127 /* Key has expired */
-#define EKEYREVOKED 128 /* Key has been revoked */
-#define EKEYREJECTED 129 /* Key was rejected by service */
-
-/* for robust mutexes */
-#define EOWNERDEAD 130 /* Owner died */
-#define ENOTRECOVERABLE 131 /* State not recoverable */
-
-#endif
diff --git a/doc/legacy/errno.list.macosx.txt b/doc/legacy/errno.list.macosx.txt
deleted file mode 100644
index 4954e03d855..00000000000
--- a/doc/legacy/errno.list.macosx.txt
+++ /dev/null
@@ -1,1513 +0,0 @@
-/* Copyright 2000-2005 The Apache Software Foundation or its licensors, as
- * applicable.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef APR_ERRNO_H
-#define APR_ERRNO_H
-
-/**
- * @file apr_errno.h
- * @brief APR Error Codes
- */
-
-#include "apr.h"
-
-#if APR_HAVE_ERRNO_H
-#include <errno.h>
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif /* __cplusplus */
-
-/**
- * @defgroup apr_errno Error Codes
- * @ingroup APR
- * @{
- */
-
-/**
- * Type for specifying an error or status code.
- */
-typedef int apr_status_t;
-
-/**
- * Return a human readable string describing the specified error.
- * @param statcode The error code the get a string for.
- * @param buf A buffer to hold the error string.
- * @param bufsize Size of the buffer to hold the string.
- */
-APR_DECLARE(char *) apr_strerror(apr_status_t statcode, char *buf,
- apr_size_t bufsize);
-
-#if defined(DOXYGEN)
-/**
- * @def APR_FROM_OS_ERROR(os_err_type syserr)
- * Fold a platform specific error into an apr_status_t code.
- * @return apr_status_t
- * @param e The platform os error code.
- * @warning macro implementation; the syserr argument may be evaluated
- * multiple times.
- */
-#define APR_FROM_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e + APR_OS_START_SYSERR)
-
-/**
- * @def APR_TO_OS_ERROR(apr_status_t statcode)
- * @return os_err_type
- * Fold an apr_status_t code back to the native platform defined error.
- * @param e The apr_status_t folded platform os error code.
- * @warning macro implementation; the statcode argument may be evaluated
- * multiple times. If the statcode was not created by apr_get_os_error
- * or APR_FROM_OS_ERROR, the results are undefined.
- */
-#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
-
-/** @def apr_get_os_error()
- * @return apr_status_t the last platform error, folded into apr_status_t, on most platforms
- * @remark This retrieves errno, or calls a GetLastError() style function, and
- * folds it with APR_FROM_OS_ERROR. Some platforms (such as OS2) have no
- * such mechanism, so this call may be unsupported. Do NOT use this
- * call for socket errors from socket, send, recv etc!
- */
-
-/** @def apr_set_os_error(e)
- * Reset the last platform error, unfolded from an apr_status_t, on some platforms
- * @param e The OS error folded in a prior call to APR_FROM_OS_ERROR()
- * @warning This is a macro implementation; the statcode argument may be evaluated
- * multiple times. If the statcode was not created by apr_get_os_error
- * or APR_FROM_OS_ERROR, the results are undefined. This macro sets
- * errno, or calls a SetLastError() style function, unfolding statcode
- * with APR_TO_OS_ERROR. Some platforms (such as OS2) have no such
- * mechanism, so this call may be unsupported.
- */
-
-/** @def apr_get_netos_error()
- * Return the last socket error, folded into apr_status_t, on all platforms
- * @remark This retrieves errno or calls a GetLastSocketError() style function,
- * and folds it with APR_FROM_OS_ERROR.
- */
-
-/** @def apr_set_netos_error(e)
- * Reset the last socket error, unfolded from an apr_status_t
- * @param e The socket error folded in a prior call to APR_FROM_OS_ERROR()
- * @warning This is a macro implementation; the statcode argument may be evaluated
- * multiple times. If the statcode was not created by apr_get_os_error
- * or APR_FROM_OS_ERROR, the results are undefined. This macro sets
- * errno, or calls a WSASetLastError() style function, unfolding
- * socketcode with APR_TO_OS_ERROR.
- */
-
-#endif /* defined(DOXYGEN) */
-
-/**
- * APR_OS_START_ERROR is where the APR specific error values start.
- */
-#define APR_OS_START_ERROR 20000
-/**
- * APR_OS_ERRSPACE_SIZE is the maximum number of errors you can fit
- * into one of the error/status ranges below -- except for
- * APR_OS_START_USERERR, which see.
- */
-#define APR_OS_ERRSPACE_SIZE 50000
-/**
- * APR_OS_START_STATUS is where the APR specific status codes start.
- */
-#define APR_OS_START_STATUS (APR_OS_START_ERROR + APR_OS_ERRSPACE_SIZE)
-/**
- * APR_OS_START_USERERR are reserved for applications that use APR that
- * layer their own error codes along with APR's. Note that the
- * error immediately following this one is set ten times farther
- * away than usual, so that users of apr have a lot of room in
- * which to declare custom error codes.
- */
-#define APR_OS_START_USERERR (APR_OS_START_STATUS + APR_OS_ERRSPACE_SIZE)
-/**
- * APR_OS_START_USEERR is obsolete, defined for compatibility only.
- * Use APR_OS_START_USERERR instead.
- */
-#define APR_OS_START_USEERR APR_OS_START_USERERR
-/**
- * APR_OS_START_CANONERR is where APR versions of errno values are defined
- * on systems which don't have the corresponding errno.
- */
-#define APR_OS_START_CANONERR (APR_OS_START_USERERR \
- + (APR_OS_ERRSPACE_SIZE * 10))
-/**
- * APR_OS_START_EAIERR folds EAI_ error codes from getaddrinfo() into
- * apr_status_t values.
- */
-#define APR_OS_START_EAIERR (APR_OS_START_CANONERR + APR_OS_ERRSPACE_SIZE)
-/**
- * APR_OS_START_SYSERR folds platform-specific system error values into
- * apr_status_t values.
- */
-#define APR_OS_START_SYSERR (APR_OS_START_EAIERR + APR_OS_ERRSPACE_SIZE)
-
-/** no error. */
-#define APR_SUCCESS 0
-
-/**
- * @defgroup APR_Error APR Error Values
- * <PRE>
- * <b>APR ERROR VALUES</b>
- * APR_ENOSTAT APR was unable to perform a stat on the file
- * APR_ENOPOOL APR was not provided a pool with which to allocate memory
- * APR_EBADDATE APR was given an invalid date
- * APR_EINVALSOCK APR was given an invalid socket
- * APR_ENOPROC APR was not given a process structure
- * APR_ENOTIME APR was not given a time structure
- * APR_ENODIR APR was not given a directory structure
- * APR_ENOLOCK APR was not given a lock structure
- * APR_ENOPOLL APR was not given a poll structure
- * APR_ENOSOCKET APR was not given a socket
- * APR_ENOTHREAD APR was not given a thread structure
- * APR_ENOTHDKEY APR was not given a thread key structure
- * APR_ENOSHMAVAIL There is no more shared memory available
- * APR_EDSOOPEN APR was unable to open the dso object. For more
- * information call apr_dso_error().
- * APR_EGENERAL General failure (specific information not available)
- * APR_EBADIP The specified IP address is invalid
- * APR_EBADMASK The specified netmask is invalid
- * APR_ESYMNOTFOUND Could not find the requested symbol
- * </PRE>
- *
- * <PRE>
- * <b>APR STATUS VALUES</b>
- * APR_INCHILD Program is currently executing in the child
- * APR_INPARENT Program is currently executing in the parent
- * APR_DETACH The thread is detached
- * APR_NOTDETACH The thread is not detached
- * APR_CHILD_DONE The child has finished executing
- * APR_CHILD_NOTDONE The child has not finished executing
- * APR_TIMEUP The operation did not finish before the timeout
- * APR_INCOMPLETE The operation was incomplete although some processing
- * was performed and the results are partially valid
- * APR_BADCH Getopt found an option not in the option string
- * APR_BADARG Getopt found an option that is missing an argument
- * and an argument was specified in the option string
- * APR_EOF APR has encountered the end of the file
- * APR_NOTFOUND APR was unable to find the socket in the poll structure
- * APR_ANONYMOUS APR is using anonymous shared memory
- * APR_FILEBASED APR is using a file name as the key to the shared memory
- * APR_KEYBASED APR is using a shared key as the key to the shared memory
- * APR_EINIT Ininitalizer value. If no option has been found, but
- * the status variable requires a value, this should be used
- * APR_ENOTIMPL The APR function has not been implemented on this
- * platform, either because nobody has gotten to it yet,
- * or the function is impossible on this platform.
- * APR_EMISMATCH Two passwords do not match.
- * APR_EABSOLUTE The given path was absolute.
- * APR_ERELATIVE The given path was relative.
- * APR_EINCOMPLETE The given path was neither relative nor absolute.
- * APR_EABOVEROOT The given path was above the root path.
- * APR_EBUSY The given lock was busy.
- * APR_EPROC_UNKNOWN The given process wasn't recognized by APR
- * </PRE>
- * @{
- */
-/** @see APR_STATUS_IS_ENOSTAT */
-#define APR_ENOSTAT (APR_OS_START_ERROR + 1)
-/** @see APR_STATUS_IS_ENOPOOL */
-#define APR_ENOPOOL (APR_OS_START_ERROR + 2)
-/* empty slot: +3 */
-/** @see APR_STATUS_IS_EBADDATE */
-#define APR_EBADDATE (APR_OS_START_ERROR + 4)
-/** @see APR_STATUS_IS_EINVALSOCK */
-#define APR_EINVALSOCK (APR_OS_START_ERROR + 5)
-/** @see APR_STATUS_IS_ENOPROC */
-#define APR_ENOPROC (APR_OS_START_ERROR + 6)
-/** @see APR_STATUS_IS_ENOTIME */
-#define APR_ENOTIME (APR_OS_START_ERROR + 7)
-/** @see APR_STATUS_IS_ENODIR */
-#define APR_ENODIR (APR_OS_START_ERROR + 8)
-/** @see APR_STATUS_IS_ENOLOCK */
-#define APR_ENOLOCK (APR_OS_START_ERROR + 9)
-/** @see APR_STATUS_IS_ENOPOLL */
-#define APR_ENOPOLL (APR_OS_START_ERROR + 10)
-/** @see APR_STATUS_IS_ENOSOCKET */
-#define APR_ENOSOCKET (APR_OS_START_ERROR + 11)
-/** @see APR_STATUS_IS_ENOTHREAD */
-#define APR_ENOTHREAD (APR_OS_START_ERROR + 12)
-/** @see APR_STATUS_IS_ENOTHDKEY */
-#define APR_ENOTHDKEY (APR_OS_START_ERROR + 13)
-/** @see APR_STATUS_IS_EGENERAL */
-#define APR_EGENERAL (APR_OS_START_ERROR + 14)
-/** @see APR_STATUS_IS_ENOSHMAVAIL */
-#define APR_ENOSHMAVAIL (APR_OS_START_ERROR + 15)
-/** @see APR_STATUS_IS_EBADIP */
-#define APR_EBADIP (APR_OS_START_ERROR + 16)
-/** @see APR_STATUS_IS_EBADMASK */
-#define APR_EBADMASK (APR_OS_START_ERROR + 17)
-/* empty slot: +18 */
-/** @see APR_STATUS_IS_EDSOPEN */
-#define APR_EDSOOPEN (APR_OS_START_ERROR + 19)
-/** @see APR_STATUS_IS_EABSOLUTE */
-#define APR_EABSOLUTE (APR_OS_START_ERROR + 20)
-/** @see APR_STATUS_IS_ERELATIVE */
-#define APR_ERELATIVE (APR_OS_START_ERROR + 21)
-/** @see APR_STATUS_IS_EINCOMPLETE */
-#define APR_EINCOMPLETE (APR_OS_START_ERROR + 22)
-/** @see APR_STATUS_IS_EABOVEROOT */
-#define APR_EABOVEROOT (APR_OS_START_ERROR + 23)
-/** @see APR_STATUS_IS_EBADPATH */
-#define APR_EBADPATH (APR_OS_START_ERROR + 24)
-/** @see APR_STATUS_IS_EPATHWILD */
-#define APR_EPATHWILD (APR_OS_START_ERROR + 25)
-/** @see APR_STATUS_IS_ESYMNOTFOUND */
-#define APR_ESYMNOTFOUND (APR_OS_START_ERROR + 26)
-/** @see APR_STATUS_IS_EPROC_UNKNOWN */
-#define APR_EPROC_UNKNOWN (APR_OS_START_ERROR + 27)
-/** @see APR_STATUS_IS_ENOTENOUGHENTROPY */
-#define APR_ENOTENOUGHENTROPY (APR_OS_START_ERROR + 28)
-/** @} */
-
-/**
- * @defgroup APR_STATUS_IS Status Value Tests
- * @warning For any particular error condition, more than one of these tests
- * may match. This is because platform-specific error codes may not
- * always match the semantics of the POSIX codes these tests (and the
- * corresponding APR error codes) are named after. A notable example
- * are the APR_STATUS_IS_ENOENT and APR_STATUS_IS_ENOTDIR tests on
- * Win32 platforms. The programmer should always be aware of this and
- * adjust the order of the tests accordingly.
- * @{
- */
-/**
- * APR was unable to perform a stat on the file
- * @warning always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_ENOSTAT(s) ((s) == APR_ENOSTAT)
-/**
- * APR was not provided a pool with which to allocate memory
- * @warning always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_ENOPOOL(s) ((s) == APR_ENOPOOL)
-/** APR was given an invalid date */
-#define APR_STATUS_IS_EBADDATE(s) ((s) == APR_EBADDATE)
-/** APR was given an invalid socket */
-#define APR_STATUS_IS_EINVALSOCK(s) ((s) == APR_EINVALSOCK)
-/** APR was not given a process structure */
-#define APR_STATUS_IS_ENOPROC(s) ((s) == APR_ENOPROC)
-/** APR was not given a time structure */
-#define APR_STATUS_IS_ENOTIME(s) ((s) == APR_ENOTIME)
-/** APR was not given a directory structure */
-#define APR_STATUS_IS_ENODIR(s) ((s) == APR_ENODIR)
-/** APR was not given a lock structure */
-#define APR_STATUS_IS_ENOLOCK(s) ((s) == APR_ENOLOCK)
-/** APR was not given a poll structure */
-#define APR_STATUS_IS_ENOPOLL(s) ((s) == APR_ENOPOLL)
-/** APR was not given a socket */
-#define APR_STATUS_IS_ENOSOCKET(s) ((s) == APR_ENOSOCKET)
-/** APR was not given a thread structure */
-#define APR_STATUS_IS_ENOTHREAD(s) ((s) == APR_ENOTHREAD)
-/** APR was not given a thread key structure */
-#define APR_STATUS_IS_ENOTHDKEY(s) ((s) == APR_ENOTHDKEY)
-/** Generic Error which can not be put into another spot */
-#define APR_STATUS_IS_EGENERAL(s) ((s) == APR_EGENERAL)
-/** There is no more shared memory available */
-#define APR_STATUS_IS_ENOSHMAVAIL(s) ((s) == APR_ENOSHMAVAIL)
-/** The specified IP address is invalid */
-#define APR_STATUS_IS_EBADIP(s) ((s) == APR_EBADIP)
-/** The specified netmask is invalid */
-#define APR_STATUS_IS_EBADMASK(s) ((s) == APR_EBADMASK)
-/* empty slot: +18 */
-/**
- * APR was unable to open the dso object.
- * For more information call apr_dso_error().
- */
-#if defined(WIN32)
-#define APR_STATUS_IS_EDSOOPEN(s) ((s) == APR_EDSOOPEN \
- || APR_TO_OS_ERROR(s) == ERROR_MOD_NOT_FOUND)
-#else
-#define APR_STATUS_IS_EDSOOPEN(s) ((s) == APR_EDSOOPEN)
-#endif
-/** The given path was absolute. */
-#define APR_STATUS_IS_EABSOLUTE(s) ((s) == APR_EABSOLUTE)
-/** The given path was relative. */
-#define APR_STATUS_IS_ERELATIVE(s) ((s) == APR_ERELATIVE)
-/** The given path was neither relative nor absolute. */
-#define APR_STATUS_IS_EINCOMPLETE(s) ((s) == APR_EINCOMPLETE)
-/** The given path was above the root path. */
-#define APR_STATUS_IS_EABOVEROOT(s) ((s) == APR_EABOVEROOT)
-/** The given path was bad. */
-#define APR_STATUS_IS_EBADPATH(s) ((s) == APR_EBADPATH)
-/** The given path contained wildcards. */
-#define APR_STATUS_IS_EPATHWILD(s) ((s) == APR_EPATHWILD)
-/** Could not find the requested symbol.
- * For more information call apr_dso_error().
- */
-#if defined(WIN32)
-#define APR_STATUS_IS_ESYMNOTFOUND(s) ((s) == APR_ESYMNOTFOUND \
- || APR_TO_OS_ERROR(s) == ERROR_PROC_NOT_FOUND)
-#else
-#define APR_STATUS_IS_ESYMNOTFOUND(s) ((s) == APR_ESYMNOTFOUND)
-#endif
-/** The given process was not recognized by APR. */
-#define APR_STATUS_IS_EPROC_UNKNOWN(s) ((s) == APR_EPROC_UNKNOWN)
-
-/** APR could not gather enough entropy to continue. */
-#define APR_STATUS_IS_ENOTENOUGHENTROPY(s) ((s) == APR_ENOTENOUGHENTROPY)
-
-/** @} */
-
-/**
- * @addtogroup APR_Error
- * @{
- */
-/** @see APR_STATUS_IS_INCHILD */
-#define APR_INCHILD (APR_OS_START_STATUS + 1)
-/** @see APR_STATUS_IS_INPARENT */
-#define APR_INPARENT (APR_OS_START_STATUS + 2)
-/** @see APR_STATUS_IS_DETACH */
-#define APR_DETACH (APR_OS_START_STATUS + 3)
-/** @see APR_STATUS_IS_NOTDETACH */
-#define APR_NOTDETACH (APR_OS_START_STATUS + 4)
-/** @see APR_STATUS_IS_CHILD_DONE */
-#define APR_CHILD_DONE (APR_OS_START_STATUS + 5)
-/** @see APR_STATUS_IS_CHILD_NOTDONE */
-#define APR_CHILD_NOTDONE (APR_OS_START_STATUS + 6)
-/** @see APR_STATUS_IS_TIMEUP */
-#define APR_TIMEUP (APR_OS_START_STATUS + 7)
-/** @see APR_STATUS_IS_INCOMPLETE */
-#define APR_INCOMPLETE (APR_OS_START_STATUS + 8)
-/* empty slot: +9 */
-/* empty slot: +10 */
-/* empty slot: +11 */
-/** @see APR_STATUS_IS_BADCH */
-#define APR_BADCH (APR_OS_START_STATUS + 12)
-/** @see APR_STATUS_IS_BADARG */
-#define APR_BADARG (APR_OS_START_STATUS + 13)
-/** @see APR_STATUS_IS_EOF */
-#define APR_EOF (APR_OS_START_STATUS + 14)
-/** @see APR_STATUS_IS_NOTFOUND */
-#define APR_NOTFOUND (APR_OS_START_STATUS + 15)
-/* empty slot: +16 */
-/* empty slot: +17 */
-/* empty slot: +18 */
-/** @see APR_STATUS_IS_ANONYMOUS */
-#define APR_ANONYMOUS (APR_OS_START_STATUS + 19)
-/** @see APR_STATUS_IS_FILEBASED */
-#define APR_FILEBASED (APR_OS_START_STATUS + 20)
-/** @see APR_STATUS_IS_KEYBASED */
-#define APR_KEYBASED (APR_OS_START_STATUS + 21)
-/** @see APR_STATUS_IS_EINIT */
-#define APR_EINIT (APR_OS_START_STATUS + 22)
-/** @see APR_STATUS_IS_ENOTIMPL */
-#define APR_ENOTIMPL (APR_OS_START_STATUS + 23)
-/** @see APR_STATUS_IS_EMISMATCH */
-#define APR_EMISMATCH (APR_OS_START_STATUS + 24)
-/** @see APR_STATUS_IS_EBUSY */
-#define APR_EBUSY (APR_OS_START_STATUS + 25)
-/** @} */
-
-/**
- * @addtogroup APR_STATUS_IS
- * @{
- */
-/**
- * Program is currently executing in the child
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code */
-#define APR_STATUS_IS_INCHILD(s) ((s) == APR_INCHILD)
-/**
- * Program is currently executing in the parent
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_INPARENT(s) ((s) == APR_INPARENT)
-/**
- * The thread is detached
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_DETACH(s) ((s) == APR_DETACH)
-/**
- * The thread is not detached
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_NOTDETACH(s) ((s) == APR_NOTDETACH)
-/**
- * The child has finished executing
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_CHILD_DONE(s) ((s) == APR_CHILD_DONE)
-/**
- * The child has not finished executing
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_CHILD_NOTDONE(s) ((s) == APR_CHILD_NOTDONE)
-/**
- * The operation did not finish before the timeout
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP)
-/**
- * The operation was incomplete although some processing was performed
- * and the results are partially valid.
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_INCOMPLETE(s) ((s) == APR_INCOMPLETE)
-/* empty slot: +9 */
-/* empty slot: +10 */
-/* empty slot: +11 */
-/**
- * Getopt found an option not in the option string
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_BADCH(s) ((s) == APR_BADCH)
-/**
- * Getopt found an option not in the option string and an argument was
- * specified in the option string
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_BADARG(s) ((s) == APR_BADARG)
-/**
- * APR has encountered the end of the file
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_EOF(s) ((s) == APR_EOF)
-/**
- * APR was unable to find the socket in the poll structure
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_NOTFOUND(s) ((s) == APR_NOTFOUND)
-/* empty slot: +16 */
-/* empty slot: +17 */
-/* empty slot: +18 */
-/**
- * APR is using anonymous shared memory
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_ANONYMOUS(s) ((s) == APR_ANONYMOUS)
-/**
- * APR is using a file name as the key to the shared memory
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_FILEBASED(s) ((s) == APR_FILEBASED)
-/**
- * APR is using a shared key as the key to the shared memory
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_KEYBASED(s) ((s) == APR_KEYBASED)
-/**
- * Ininitalizer value. If no option has been found, but
- * the status variable requires a value, this should be used
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_EINIT(s) ((s) == APR_EINIT)
-/**
- * The APR function has not been implemented on this
- * platform, either because nobody has gotten to it yet,
- * or the function is impossible on this platform.
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_ENOTIMPL(s) ((s) == APR_ENOTIMPL)
-/**
- * Two passwords do not match.
- * @warning
- * always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_EMISMATCH(s) ((s) == APR_EMISMATCH)
-/**
- * The given lock was busy
- * @warning always use this test, as platform-specific variances may meet this
- * more than one error code
- */
-#define APR_STATUS_IS_EBUSY(s) ((s) == APR_EBUSY)
-
-/** @} */
-
-/**
- * @addtogroup APR_Error APR Error Values
- * @{
- */
-/* APR CANONICAL ERROR VALUES */
-/** @see APR_STATUS_IS_EACCES */
-#ifdef EACCES
-#define APR_EACCES EACCES
-#else
-#define APR_EACCES (APR_OS_START_CANONERR + 1)
-#endif
-
-/** @see APR_STATUS_IS_EXIST */
-#ifdef EEXIST
-#define APR_EEXIST EEXIST
-#else
-#define APR_EEXIST (APR_OS_START_CANONERR + 2)
-#endif
-
-/** @see APR_STATUS_IS_ENAMETOOLONG */
-#ifdef ENAMETOOLONG
-#define APR_ENAMETOOLONG ENAMETOOLONG
-#else
-#define APR_ENAMETOOLONG (APR_OS_START_CANONERR + 3)
-#endif
-
-/** @see APR_STATUS_IS_ENOENT */
-#ifdef ENOENT
-#define APR_ENOENT ENOENT
-#else
-#define APR_ENOENT (APR_OS_START_CANONERR + 4)
-#endif
-
-/** @see APR_STATUS_IS_ENOTDIR */
-#ifdef ENOTDIR
-#define APR_ENOTDIR ENOTDIR
-#else
-#define APR_ENOTDIR (APR_OS_START_CANONERR + 5)
-#endif
-
-/** @see APR_STATUS_IS_ENOSPC */
-#ifdef ENOSPC
-#define APR_ENOSPC ENOSPC
-#else
-#define APR_ENOSPC (APR_OS_START_CANONERR + 6)
-#endif
-
-/** @see APR_STATUS_IS_ENOMEM */
-#ifdef ENOMEM
-#define APR_ENOMEM ENOMEM
-#else
-#define APR_ENOMEM (APR_OS_START_CANONERR + 7)
-#endif
-
-/** @see APR_STATUS_IS_EMFILE */
-#ifdef EMFILE
-#define APR_EMFILE EMFILE
-#else
-#define APR_EMFILE (APR_OS_START_CANONERR + 8)
-#endif
-
-/** @see APR_STATUS_IS_ENFILE */
-#ifdef ENFILE
-#define APR_ENFILE ENFILE
-#else
-#define APR_ENFILE (APR_OS_START_CANONERR + 9)
-#endif
-
-/** @see APR_STATUS_IS_EBADF */
-#ifdef EBADF
-#define APR_EBADF EBADF
-#else
-#define APR_EBADF (APR_OS_START_CANONERR + 10)
-#endif
-
-/** @see APR_STATUS_IS_EINVAL */
-#ifdef EINVAL
-#define APR_EINVAL EINVAL
-#else
-#define APR_EINVAL (APR_OS_START_CANONERR + 11)
-#endif
-
-/** @see APR_STATUS_IS_ESPIPE */
-#ifdef ESPIPE
-#define APR_ESPIPE ESPIPE
-#else
-#define APR_ESPIPE (APR_OS_START_CANONERR + 12)
-#endif
-
-/**
- * @see APR_STATUS_IS_EAGAIN
- * @warning use APR_STATUS_IS_EAGAIN instead of just testing this value
- */
-#ifdef EAGAIN
-#define APR_EAGAIN EAGAIN
-#elif defined(EWOULDBLOCK)
-#define APR_EAGAIN EWOULDBLOCK
-#else
-#define APR_EAGAIN (APR_OS_START_CANONERR + 13)
-#endif
-
-/** @see APR_STATUS_IS_EINTR */
-#ifdef EINTR
-#define APR_EINTR EINTR
-#else
-#define APR_EINTR (APR_OS_START_CANONERR + 14)
-#endif
-
-/** @see APR_STATUS_IS_ENOTSOCK */
-#ifdef ENOTSOCK
-#define APR_ENOTSOCK ENOTSOCK
-#else
-#define APR_ENOTSOCK (APR_OS_START_CANONERR + 15)
-#endif
-
-/** @see APR_STATUS_IS_ECONNREFUSED */
-#ifdef ECONNREFUSED
-#define APR_ECONNREFUSED ECONNREFUSED
-#else
-#define APR_ECONNREFUSED (APR_OS_START_CANONERR + 16)
-#endif
-
-/** @see APR_STATUS_IS_EINPROGRESS */
-#ifdef EINPROGRESS
-#define APR_EINPROGRESS EINPROGRESS
-#else
-#define APR_EINPROGRESS (APR_OS_START_CANONERR + 17)
-#endif
-
-/**
- * @see APR_STATUS_IS_ECONNABORTED
- * @warning use APR_STATUS_IS_ECONNABORTED instead of just testing this value
- */
-
-#ifdef ECONNABORTED
-#define APR_ECONNABORTED ECONNABORTED
-#else
-#define APR_ECONNABORTED (APR_OS_START_CANONERR + 18)
-#endif
-
-/** @see APR_STATUS_IS_ECONNRESET */
-#ifdef ECONNRESET
-#define APR_ECONNRESET ECONNRESET
-#else
-#define APR_ECONNRESET (APR_OS_START_CANONERR + 19)
-#endif
-
-/** @see APR_STATUS_IS_ETIMEDOUT
- * @deprecated */
-#ifdef ETIMEDOUT
-#define APR_ETIMEDOUT ETIMEDOUT
-#else
-#define APR_ETIMEDOUT (APR_OS_START_CANONERR + 20)
-#endif
-
-/** @see APR_STATUS_IS_EHOSTUNREACH */
-#ifdef EHOSTUNREACH
-#define APR_EHOSTUNREACH EHOSTUNREACH
-#else
-#define APR_EHOSTUNREACH (APR_OS_START_CANONERR + 21)
-#endif
-
-/** @see APR_STATUS_IS_ENETUNREACH */
-#ifdef ENETUNREACH
-#define APR_ENETUNREACH ENETUNREACH
-#else
-#define APR_ENETUNREACH (APR_OS_START_CANONERR + 22)
-#endif
-
-/** @see APR_STATUS_IS_EFTYPE */
-#ifdef EFTYPE
-#define APR_EFTYPE EFTYPE
-#else
-#define APR_EFTYPE (APR_OS_START_CANONERR + 23)
-#endif
-
-/** @see APR_STATUS_IS_EPIPE */
-#ifdef EPIPE
-#define APR_EPIPE EPIPE
-#else
-#define APR_EPIPE (APR_OS_START_CANONERR + 24)
-#endif
-
-/** @see APR_STATUS_IS_EXDEV */
-#ifdef EXDEV
-#define APR_EXDEV EXDEV
-#else
-#define APR_EXDEV (APR_OS_START_CANONERR + 25)
-#endif
-
-/** @see APR_STATUS_IS_ENOTEMPTY */
-#ifdef ENOTEMPTY
-#define APR_ENOTEMPTY ENOTEMPTY
-#else
-#define APR_ENOTEMPTY (APR_OS_START_CANONERR + 26)
-#endif
-
-/** @} */
-
-#if defined(OS2) && !defined(DOXYGEN)
-
-#define APR_FROM_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e + APR_OS_START_SYSERR)
-#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
-
-#define INCL_DOSERRORS
-#define INCL_DOS
-
-/* Leave these undefined.
- * OS2 doesn't rely on the errno concept.
- * The API calls always return a result codes which
- * should be filtered through APR_FROM_OS_ERROR().
- *
- * #define apr_get_os_error() (APR_FROM_OS_ERROR(GetLastError()))
- * #define apr_set_os_error(e) (SetLastError(APR_TO_OS_ERROR(e)))
- */
-
-/* A special case, only socket calls require this;
- */
-#define apr_get_netos_error() (APR_FROM_OS_ERROR(errno))
-#define apr_set_netos_error(e) (errno = APR_TO_OS_ERROR(e))
-
-/* And this needs to be greped away for good:
- */
-#define APR_OS2_STATUS(e) (APR_FROM_OS_ERROR(e))
-
-/* These can't sit in a private header, so in spite of the extra size,
- * they need to be made available here.
- */
-#define SOCBASEERR 10000
-#define SOCEPERM (SOCBASEERR+1) /* Not owner */
-#define SOCESRCH (SOCBASEERR+3) /* No such process */
-#define SOCEINTR (SOCBASEERR+4) /* Interrupted system call */
-#define SOCENXIO (SOCBASEERR+6) /* No such device or address */
-#define SOCEBADF (SOCBASEERR+9) /* Bad file number */
-#define SOCEACCES (SOCBASEERR+13) /* Permission denied */
-#define SOCEFAULT (SOCBASEERR+14) /* Bad address */
-#define SOCEINVAL (SOCBASEERR+22) /* Invalid argument */
-#define SOCEMFILE (SOCBASEERR+24) /* Too many open files */
-#define SOCEPIPE (SOCBASEERR+32) /* Broken pipe */
-#define SOCEOS2ERR (SOCBASEERR+100) /* OS/2 Error */
-#define SOCEWOULDBLOCK (SOCBASEERR+35) /* Operation would block */
-#define SOCEINPROGRESS (SOCBASEERR+36) /* Operation now in progress */
-#define SOCEALREADY (SOCBASEERR+37) /* Operation already in progress */
-#define SOCENOTSOCK (SOCBASEERR+38) /* Socket operation on non-socket */
-#define SOCEDESTADDRREQ (SOCBASEERR+39) /* Destination address required */
-#define SOCEMSGSIZE (SOCBASEERR+40) /* Message too long */
-#define SOCEPROTOTYPE (SOCBASEERR+41) /* Protocol wrong type for socket */
-#define SOCENOPROTOOPT (SOCBASEERR+42) /* Protocol not available */
-#define SOCEPROTONOSUPPORT (SOCBASEERR+43) /* Protocol not supported */
-#define SOCESOCKTNOSUPPORT (SOCBASEERR+44) /* Socket type not supported */
-#define SOCEOPNOTSUPP (SOCBASEERR+45) /* Operation not supported on socket */
-#define SOCEPFNOSUPPORT (SOCBASEERR+46) /* Protocol family not supported */
-#define SOCEAFNOSUPPORT (SOCBASEERR+47) /* Address family not supported by protocol family */
-#define SOCEADDRINUSE (SOCBASEERR+48) /* Address already in use */
-#define SOCEADDRNOTAVAIL (SOCBASEERR+49) /* Can't assign requested address */
-#define SOCENETDOWN (SOCBASEERR+50) /* Network is down */
-#define SOCENETUNREACH (SOCBASEERR+51) /* Network is unreachable */
-#define SOCENETRESET (SOCBASEERR+52) /* Network dropped connection on reset */
-#define SOCECONNABORTED (SOCBASEERR+53) /* Software caused connection abort */
-#define SOCECONNRESET (SOCBASEERR+54) /* Connection reset by peer */
-#define SOCENOBUFS (SOCBASEERR+55) /* No buffer space available */
-#define SOCEISCONN (SOCBASEERR+56) /* Socket is already connected */
-#define SOCENOTCONN (SOCBASEERR+57) /* Socket is not connected */
-#define SOCESHUTDOWN (SOCBASEERR+58) /* Can't send after socket shutdown */
-#define SOCETOOMANYREFS (SOCBASEERR+59) /* Too many references: can't splice */
-#define SOCETIMEDOUT (SOCBASEERR+60) /* Connection timed out */
-#define SOCECONNREFUSED (SOCBASEERR+61) /* Connection refused */
-#define SOCELOOP (SOCBASEERR+62) /* Too many levels of symbolic links */
-#define SOCENAMETOOLONG (SOCBASEERR+63) /* File name too long */
-#define SOCEHOSTDOWN (SOCBASEERR+64) /* Host is down */
-#define SOCEHOSTUNREACH (SOCBASEERR+65) /* No route to host */
-#define SOCENOTEMPTY (SOCBASEERR+66) /* Directory not empty */
-
-/* APR CANONICAL ERROR TESTS */
-#define APR_STATUS_IS_EACCES(s) ((s) == APR_EACCES \
- || (s) == APR_OS_START_SYSERR + ERROR_ACCESS_DENIED \
- || (s) == APR_OS_START_SYSERR + ERROR_SHARING_VIOLATION)
-#define APR_STATUS_IS_EEXIST(s) ((s) == APR_EEXIST \
- || (s) == APR_OS_START_SYSERR + ERROR_OPEN_FAILED \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_EXISTS \
- || (s) == APR_OS_START_SYSERR + ERROR_ALREADY_EXISTS \
- || (s) == APR_OS_START_SYSERR + ERROR_ACCESS_DENIED)
-#define APR_STATUS_IS_ENAMETOOLONG(s) ((s) == APR_ENAMETOOLONG \
- || (s) == APR_OS_START_SYSERR + ERROR_FILENAME_EXCED_RANGE \
- || (s) == APR_OS_START_SYSERR + SOCENAMETOOLONG)
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_PATH_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_MORE_FILES \
- || (s) == APR_OS_START_SYSERR + ERROR_OPEN_FAILED)
-#define APR_STATUS_IS_ENOTDIR(s) ((s) == APR_ENOTDIR)
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC \
- || (s) == APR_OS_START_SYSERR + ERROR_DISK_FULL)
-#define APR_STATUS_IS_ENOMEM(s) ((s) == APR_ENOMEM)
-#define APR_STATUS_IS_EMFILE(s) ((s) == APR_EMFILE \
- || (s) == APR_OS_START_SYSERR + ERROR_TOO_MANY_OPEN_FILES)
-#define APR_STATUS_IS_ENFILE(s) ((s) == APR_ENFILE)
-#define APR_STATUS_IS_EBADF(s) ((s) == APR_EBADF \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_HANDLE)
-#define APR_STATUS_IS_EINVAL(s) ((s) == APR_EINVAL \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_PARAMETER \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_FUNCTION)
-#define APR_STATUS_IS_ESPIPE(s) ((s) == APR_ESPIPE \
- || (s) == APR_OS_START_SYSERR + ERROR_NEGATIVE_SEEK)
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_DATA \
- || (s) == APR_OS_START_SYSERR + SOCEWOULDBLOCK \
- || (s) == APR_OS_START_SYSERR + ERROR_LOCK_VIOLATION)
-#define APR_STATUS_IS_EINTR(s) ((s) == APR_EINTR \
- || (s) == APR_OS_START_SYSERR + SOCEINTR)
-#define APR_STATUS_IS_ENOTSOCK(s) ((s) == APR_ENOTSOCK \
- || (s) == APR_OS_START_SYSERR + SOCENOTSOCK)
-#define APR_STATUS_IS_ECONNREFUSED(s) ((s) == APR_ECONNREFUSED \
- || (s) == APR_OS_START_SYSERR + SOCECONNREFUSED)
-#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS \
- || (s) == APR_OS_START_SYSERR + SOCEINPROGRESS)
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED \
- || (s) == APR_OS_START_SYSERR + SOCECONNABORTED)
-#define APR_STATUS_IS_ECONNRESET(s) ((s) == APR_ECONNRESET \
- || (s) == APR_OS_START_SYSERR + SOCECONNRESET)
-/* XXX deprecated */
-#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + SOCETIMEDOUT)
-#undef APR_STATUS_IS_TIMEUP
-#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP \
- || (s) == APR_OS_START_SYSERR + SOCETIMEDOUT)
-#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH \
- || (s) == APR_OS_START_SYSERR + SOCEHOSTUNREACH)
-#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH \
- || (s) == APR_OS_START_SYSERR + SOCENETUNREACH)
-#define APR_STATUS_IS_EFTYPE(s) ((s) == APR_EFTYPE)
-#define APR_STATUS_IS_EPIPE(s) ((s) == APR_EPIPE \
- || (s) == APR_OS_START_SYSERR + ERROR_BROKEN_PIPE \
- || (s) == APR_OS_START_SYSERR + SOCEPIPE)
-#define APR_STATUS_IS_EXDEV(s) ((s) == APR_EXDEV \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_SAME_DEVICE)
-#define APR_STATUS_IS_ENOTEMPTY(s) ((s) == APR_ENOTEMPTY \
- || (s) == APR_OS_START_SYSERR + ERROR_DIR_NOT_EMPTY \
- || (s) == APR_OS_START_SYSERR + ERROR_ACCESS_DENIED)
-
-/*
- Sorry, too tired to wrap this up for OS2... feel free to
- fit the following into their best matches.
-
- { ERROR_NO_SIGNAL_SENT, ESRCH },
- { SOCEALREADY, EALREADY },
- { SOCEDESTADDRREQ, EDESTADDRREQ },
- { SOCEMSGSIZE, EMSGSIZE },
- { SOCEPROTOTYPE, EPROTOTYPE },
- { SOCENOPROTOOPT, ENOPROTOOPT },
- { SOCEPROTONOSUPPORT, EPROTONOSUPPORT },
- { SOCESOCKTNOSUPPORT, ESOCKTNOSUPPORT },
- { SOCEOPNOTSUPP, EOPNOTSUPP },
- { SOCEPFNOSUPPORT, EPFNOSUPPORT },
- { SOCEAFNOSUPPORT, EAFNOSUPPORT },
- { SOCEADDRINUSE, EADDRINUSE },
- { SOCEADDRNOTAVAIL, EADDRNOTAVAIL },
- { SOCENETDOWN, ENETDOWN },
- { SOCENETRESET, ENETRESET },
- { SOCENOBUFS, ENOBUFS },
- { SOCEISCONN, EISCONN },
- { SOCENOTCONN, ENOTCONN },
- { SOCESHUTDOWN, ESHUTDOWN },
- { SOCETOOMANYREFS, ETOOMANYREFS },
- { SOCELOOP, ELOOP },
- { SOCEHOSTDOWN, EHOSTDOWN },
- { SOCENOTEMPTY, ENOTEMPTY },
- { SOCEPIPE, EPIPE }
-*/
-
-#elif defined(WIN32) && !defined(DOXYGEN) /* !defined(OS2) */
-
-#define APR_FROM_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e + APR_OS_START_SYSERR)
-#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
-
-#define apr_get_os_error() (APR_FROM_OS_ERROR(GetLastError()))
-#define apr_set_os_error(e) (SetLastError(APR_TO_OS_ERROR(e)))
-
-/* A special case, only socket calls require this:
- */
-#define apr_get_netos_error() (APR_FROM_OS_ERROR(WSAGetLastError()))
-#define apr_set_netos_error(e) (WSASetLastError(APR_TO_OS_ERROR(e)))
-
-/* APR CANONICAL ERROR TESTS */
-#define APR_STATUS_IS_EACCES(s) ((s) == APR_EACCES \
- || (s) == APR_OS_START_SYSERR + ERROR_ACCESS_DENIED \
- || (s) == APR_OS_START_SYSERR + ERROR_CANNOT_MAKE \
- || (s) == APR_OS_START_SYSERR + ERROR_CURRENT_DIRECTORY \
- || (s) == APR_OS_START_SYSERR + ERROR_DRIVE_LOCKED \
- || (s) == APR_OS_START_SYSERR + ERROR_FAIL_I24 \
- || (s) == APR_OS_START_SYSERR + ERROR_LOCK_VIOLATION \
- || (s) == APR_OS_START_SYSERR + ERROR_LOCK_FAILED \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_LOCKED \
- || (s) == APR_OS_START_SYSERR + ERROR_NETWORK_ACCESS_DENIED \
- || (s) == APR_OS_START_SYSERR + ERROR_SHARING_VIOLATION)
-#define APR_STATUS_IS_EEXIST(s) ((s) == APR_EEXIST \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_EXISTS \
- || (s) == APR_OS_START_SYSERR + ERROR_ALREADY_EXISTS)
-#define APR_STATUS_IS_ENAMETOOLONG(s) ((s) == APR_ENAMETOOLONG \
- || (s) == APR_OS_START_SYSERR + ERROR_FILENAME_EXCED_RANGE \
- || (s) == APR_OS_START_SYSERR + WSAENAMETOOLONG)
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_PATH_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_OPEN_FAILED \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_MORE_FILES)
-#define APR_STATUS_IS_ENOTDIR(s) ((s) == APR_ENOTDIR \
- || (s) == APR_OS_START_SYSERR + ERROR_PATH_NOT_FOUND \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_NETPATH \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_NET_NAME \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_PATHNAME \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_DRIVE)
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC \
- || (s) == APR_OS_START_SYSERR + ERROR_DISK_FULL)
-#define APR_STATUS_IS_ENOMEM(s) ((s) == APR_ENOMEM \
- || (s) == APR_OS_START_SYSERR + ERROR_ARENA_TRASHED \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_ENOUGH_MEMORY \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_BLOCK \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_ENOUGH_QUOTA \
- || (s) == APR_OS_START_SYSERR + ERROR_OUTOFMEMORY)
-#define APR_STATUS_IS_EMFILE(s) ((s) == APR_EMFILE \
- || (s) == APR_OS_START_SYSERR + ERROR_TOO_MANY_OPEN_FILES)
-#define APR_STATUS_IS_ENFILE(s) ((s) == APR_ENFILE)
-#define APR_STATUS_IS_EBADF(s) ((s) == APR_EBADF \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_HANDLE \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_TARGET_HANDLE)
-#define APR_STATUS_IS_EINVAL(s) ((s) == APR_EINVAL \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_ACCESS \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_DATA \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_FUNCTION \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_HANDLE \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_PARAMETER \
- || (s) == APR_OS_START_SYSERR + ERROR_NEGATIVE_SEEK)
-#define APR_STATUS_IS_ESPIPE(s) ((s) == APR_ESPIPE \
- || (s) == APR_OS_START_SYSERR + ERROR_SEEK_ON_DEVICE \
- || (s) == APR_OS_START_SYSERR + ERROR_NEGATIVE_SEEK)
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_DATA \
- || (s) == APR_OS_START_SYSERR + ERROR_NO_PROC_SLOTS \
- || (s) == APR_OS_START_SYSERR + ERROR_NESTING_NOT_ALLOWED \
- || (s) == APR_OS_START_SYSERR + ERROR_MAX_THRDS_REACHED \
- || (s) == APR_OS_START_SYSERR + ERROR_LOCK_VIOLATION \
- || (s) == APR_OS_START_SYSERR + WSAEWOULDBLOCK)
-#define APR_STATUS_IS_EINTR(s) ((s) == APR_EINTR \
- || (s) == APR_OS_START_SYSERR + WSAEINTR)
-#define APR_STATUS_IS_ENOTSOCK(s) ((s) == APR_ENOTSOCK \
- || (s) == APR_OS_START_SYSERR + WSAENOTSOCK)
-#define APR_STATUS_IS_ECONNREFUSED(s) ((s) == APR_ECONNREFUSED \
- || (s) == APR_OS_START_SYSERR + WSAECONNREFUSED)
-#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS \
- || (s) == APR_OS_START_SYSERR + WSAEINPROGRESS)
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED \
- || (s) == APR_OS_START_SYSERR + WSAECONNABORTED)
-#define APR_STATUS_IS_ECONNRESET(s) ((s) == APR_ECONNRESET \
- || (s) == APR_OS_START_SYSERR + ERROR_NETNAME_DELETED \
- || (s) == APR_OS_START_SYSERR + WSAECONNRESET)
-/* XXX deprecated */
-#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WSAETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WAIT_TIMEOUT)
-#undef APR_STATUS_IS_TIMEUP
-#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP \
- || (s) == APR_OS_START_SYSERR + WSAETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WAIT_TIMEOUT)
-#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH \
- || (s) == APR_OS_START_SYSERR + WSAEHOSTUNREACH)
-#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH \
- || (s) == APR_OS_START_SYSERR + WSAENETUNREACH)
-#define APR_STATUS_IS_EFTYPE(s) ((s) == APR_EFTYPE \
- || (s) == APR_OS_START_SYSERR + ERROR_EXE_MACHINE_TYPE_MISMATCH \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_DLL \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_MODULETYPE \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_EXE_FORMAT \
- || (s) == APR_OS_START_SYSERR + ERROR_INVALID_EXE_SIGNATURE \
- || (s) == APR_OS_START_SYSERR + ERROR_FILE_CORRUPT \
- || (s) == APR_OS_START_SYSERR + ERROR_BAD_FORMAT)
-#define APR_STATUS_IS_EPIPE(s) ((s) == APR_EPIPE \
- || (s) == APR_OS_START_SYSERR + ERROR_BROKEN_PIPE)
-#define APR_STATUS_IS_EXDEV(s) ((s) == APR_EXDEV \
- || (s) == APR_OS_START_SYSERR + ERROR_NOT_SAME_DEVICE)
-#define APR_STATUS_IS_ENOTEMPTY(s) ((s) == APR_ENOTEMPTY \
- || (s) == APR_OS_START_SYSERR + ERROR_DIR_NOT_EMPTY)
-
-#elif defined(NETWARE) && defined(USE_WINSOCK) && !defined(DOXYGEN) /* !defined(OS2) && !defined(WIN32) */
-
-#define APR_FROM_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e + APR_OS_START_SYSERR)
-#define APR_TO_OS_ERROR(e) (e == 0 ? APR_SUCCESS : e - APR_OS_START_SYSERR)
-
-#define apr_get_os_error() (errno)
-#define apr_set_os_error(e) (errno = (e))
-
-/* A special case, only socket calls require this: */
-#define apr_get_netos_error() (APR_FROM_OS_ERROR(WSAGetLastError()))
-#define apr_set_netos_error(e) (WSASetLastError(APR_TO_OS_ERROR(e)))
-
-/* APR CANONICAL ERROR TESTS */
-#define APR_STATUS_IS_EACCES(s) ((s) == APR_EACCES)
-#define APR_STATUS_IS_EEXIST(s) ((s) == APR_EEXIST)
-#define APR_STATUS_IS_ENAMETOOLONG(s) ((s) == APR_ENAMETOOLONG)
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT)
-#define APR_STATUS_IS_ENOTDIR(s) ((s) == APR_ENOTDIR)
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC)
-#define APR_STATUS_IS_ENOMEM(s) ((s) == APR_ENOMEM)
-#define APR_STATUS_IS_EMFILE(s) ((s) == APR_EMFILE)
-#define APR_STATUS_IS_ENFILE(s) ((s) == APR_ENFILE)
-#define APR_STATUS_IS_EBADF(s) ((s) == APR_EBADF)
-#define APR_STATUS_IS_EINVAL(s) ((s) == APR_EINVAL)
-#define APR_STATUS_IS_ESPIPE(s) ((s) == APR_ESPIPE)
-
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN \
- || (s) == EWOULDBLOCK \
- || (s) == APR_OS_START_SYSERR + WSAEWOULDBLOCK)
-#define APR_STATUS_IS_EINTR(s) ((s) == APR_EINTR \
- || (s) == APR_OS_START_SYSERR + WSAEINTR)
-#define APR_STATUS_IS_ENOTSOCK(s) ((s) == APR_ENOTSOCK \
- || (s) == APR_OS_START_SYSERR + WSAENOTSOCK)
-#define APR_STATUS_IS_ECONNREFUSED(s) ((s) == APR_ECONNREFUSED \
- || (s) == APR_OS_START_SYSERR + WSAECONNREFUSED)
-#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS \
- || (s) == APR_OS_START_SYSERR + WSAEINPROGRESS)
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED \
- || (s) == APR_OS_START_SYSERR + WSAECONNABORTED)
-#define APR_STATUS_IS_ECONNRESET(s) ((s) == APR_ECONNRESET \
- || (s) == APR_OS_START_SYSERR + WSAECONNRESET)
-/* XXX deprecated */
-#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WSAETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WAIT_TIMEOUT)
-#undef APR_STATUS_IS_TIMEUP
-#define APR_STATUS_IS_TIMEUP(s) ((s) == APR_TIMEUP \
- || (s) == APR_OS_START_SYSERR + WSAETIMEDOUT \
- || (s) == APR_OS_START_SYSERR + WAIT_TIMEOUT)
-#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH \
- || (s) == APR_OS_START_SYSERR + WSAEHOSTUNREACH)
-#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH \
- || (s) == APR_OS_START_SYSERR + WSAENETUNREACH)
-#define APR_STATUS_IS_ENETDOWN(s) ((s) == APR_OS_START_SYSERR + WSAENETDOWN)
-#define APR_STATUS_IS_EFTYPE(s) ((s) == APR_EFTYPE)
-#define APR_STATUS_IS_EPIPE(s) ((s) == APR_EPIPE)
-#define APR_STATUS_IS_EXDEV(s) ((s) == APR_EXDEV)
-#define APR_STATUS_IS_ENOTEMPTY(s) ((s) == APR_ENOTEMPTY)
-
-#else /* !defined(NETWARE) && !defined(OS2) && !defined(WIN32) */
-
-/*
- * os error codes are clib error codes
- */
-#define APR_FROM_OS_ERROR(e) (e)
-#define APR_TO_OS_ERROR(e) (e)
-
-#define apr_get_os_error() (errno)
-#define apr_set_os_error(e) (errno = (e))
-
-/* A special case, only socket calls require this:
- */
-#define apr_get_netos_error() (errno)
-#define apr_set_netos_error(e) (errno = (e))
-
-/**
- * @addtogroup APR_STATUS_IS
- * @{
- */
-
-/** permission denied */
-#define APR_STATUS_IS_EACCES(s) ((s) == APR_EACCES)
-/** file exists */
-#define APR_STATUS_IS_EEXIST(s) ((s) == APR_EEXIST)
-/** path name is too long */
-#define APR_STATUS_IS_ENAMETOOLONG(s) ((s) == APR_ENAMETOOLONG)
-/**
- * no such file or directory
- * @remark
- * EMVSCATLG can be returned by the automounter on z/OS for
- * paths which do not exist.
- */
-#ifdef EMVSCATLG
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT \
- || (s) == EMVSCATLG)
-#else
-#define APR_STATUS_IS_ENOENT(s) ((s) == APR_ENOENT)
-#endif
-/** not a directory */
-#define APR_STATUS_IS_ENOTDIR(s) ((s) == APR_ENOTDIR)
-/** no space left on device */
-#ifdef EDQUOT
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC \
- || (s) == EDQUOT)
-#else
-#define APR_STATUS_IS_ENOSPC(s) ((s) == APR_ENOSPC)
-#endif
-/** not enough memory */
-#define APR_STATUS_IS_ENOMEM(s) ((s) == APR_ENOMEM)
-/** too many open files */
-#define APR_STATUS_IS_EMFILE(s) ((s) == APR_EMFILE)
-/** file table overflow */
-#define APR_STATUS_IS_ENFILE(s) ((s) == APR_ENFILE)
-/** bad file # */
-#define APR_STATUS_IS_EBADF(s) ((s) == APR_EBADF)
-/** invalid argument */
-#define APR_STATUS_IS_EINVAL(s) ((s) == APR_EINVAL)
-/** illegal seek */
-#define APR_STATUS_IS_ESPIPE(s) ((s) == APR_ESPIPE)
-
-/** operation would block */
-#if !defined(EWOULDBLOCK) || !defined(EAGAIN)
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN)
-#elif (EWOULDBLOCK == EAGAIN)
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN)
-#else
-#define APR_STATUS_IS_EAGAIN(s) ((s) == APR_EAGAIN \
- || (s) == EWOULDBLOCK)
-#endif
-
-/** interrupted system call */
-#define APR_STATUS_IS_EINTR(s) ((s) == APR_EINTR)
-/** socket operation on a non-socket */
-#define APR_STATUS_IS_ENOTSOCK(s) ((s) == APR_ENOTSOCK)
-/** Connection Refused */
-#define APR_STATUS_IS_ECONNREFUSED(s) ((s) == APR_ECONNREFUSED)
-/** operation now in progress */
-#define APR_STATUS_IS_EINPROGRESS(s) ((s) == APR_EINPROGRESS)
-
-/**
- * Software caused connection abort
- * @remark
- * EPROTO on certain older kernels really means ECONNABORTED, so we need to
- * ignore it for them. See discussion in new-httpd archives nh.9701 & nh.9603
- *
- * There is potentially a bug in Solaris 2.x x<6, and other boxes that
- * implement tcp sockets in userland (i.e. on top of STREAMS). On these
- * systems, EPROTO can actually result in a fatal loop. See PR#981 for
- * example. It's hard to handle both uses of EPROTO.
- */
-#ifdef EPROTO
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED \
- || (s) == EPROTO)
-#else
-#define APR_STATUS_IS_ECONNABORTED(s) ((s) == APR_ECONNABORTED)
-#endif
-
-/** Connection Reset by peer */
-#define APR_STATUS_IS_ECONNRESET(s) ((s) == APR_ECONNRESET)
-/** Operation timed out
- * @deprecated */
-#define APR_STATUS_IS_ETIMEDOUT(s) ((s) == APR_ETIMEDOUT)
-/** no route to host */
-#define APR_STATUS_IS_EHOSTUNREACH(s) ((s) == APR_EHOSTUNREACH)
-/** network is unreachable */
-#define APR_STATUS_IS_ENETUNREACH(s) ((s) == APR_ENETUNREACH)
-/** inappropiate file type or format */
-#define APR_STATUS_IS_EFTYPE(s) ((s) == APR_EFTYPE)
-/** broken pipe */
-#define APR_STATUS_IS_EPIPE(s) ((s) == APR_EPIPE)
-/** cross device link */
-#define APR_STATUS_IS_EXDEV(s) ((s) == APR_EXDEV)
-/** Directory Not Empty */
-#define APR_STATUS_IS_ENOTEMPTY(s) ((s) == APR_ENOTEMPTY || \
- (s) == APR_EEXIST)
-/** @} */
-
-#endif /* !defined(NETWARE) && !defined(OS2) && !defined(WIN32) */
-
-/** @} */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* ! APR_ERRNO_H */
-/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this
- * file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_LICENSE_HEADER_END@
- */
-#include <sys/errno.h>
-
-
-/*
- * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-/* Copyright (c) 1995 NeXT Computer, Inc. All Rights Reserved */
-/*
- * Copyright (c) 1982, 1986, 1989, 1993
- * The Regents of the University of California. All rights reserved.
- * (c) UNIX System Laboratories, Inc.
- * All or some portions of this file are derived from material licensed
- * to the University of California by American Telephone and Telegraph
- * Co. or Unix System Laboratories, Inc. and are reproduced herein with
- * the permission of UNIX System Laboratories, Inc.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- * must display the following acknowledgement:
- * This product includes software developed by the University of
- * California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * @(#)errno.h 8.5 (Berkeley) 1/21/94
- */
-
-#ifndef _SYS_ERRNO_H_
-#define _SYS_ERRNO_H_
-
-#include <sys/cdefs.h>
-__BEGIN_DECLS
-extern int * __error(void);
-#define errno (*__error())
-__END_DECLS
-
-/*
- * Error codes
- */
-
-#define EPERM 1 /* Operation not permitted */
-#define ENOENT 2 /* No such file or directory */
-#define ESRCH 3 /* No such process */
-#define EINTR 4 /* Interrupted system call */
-#define EIO 5 /* Input/output error */
-#define ENXIO 6 /* Device not configured */
-#define E2BIG 7 /* Argument list too long */
-#define ENOEXEC 8 /* Exec format error */
-#define EBADF 9 /* Bad file descriptor */
-#define ECHILD 10 /* No child processes */
-#define EDEADLK 11 /* Resource deadlock avoided */
- /* 11 was EAGAIN */
-#define ENOMEM 12 /* Cannot allocate memory */
-#define EACCES 13 /* Permission denied */
-#define EFAULT 14 /* Bad address */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define ENOTBLK 15 /* Block device required */
-#endif
-#define EBUSY 16 /* Device / Resource busy */
-#define EEXIST 17 /* File exists */
-#define EXDEV 18 /* Cross-device link */
-#define ENODEV 19 /* Operation not supported by device */
-#define ENOTDIR 20 /* Not a directory */
-#define EISDIR 21 /* Is a directory */
-#define EINVAL 22 /* Invalid argument */
-#define ENFILE 23 /* Too many open files in system */
-#define EMFILE 24 /* Too many open files */
-#define ENOTTY 25 /* Inappropriate ioctl for device */
-#define ETXTBSY 26 /* Text file busy */
-#define EFBIG 27 /* File too large */
-#define ENOSPC 28 /* No space left on device */
-#define ESPIPE 29 /* Illegal seek */
-#define EROFS 30 /* Read-only file system */
-#define EMLINK 31 /* Too many links */
-#define EPIPE 32 /* Broken pipe */
-
-/* math software */
-#define EDOM 33 /* Numerical argument out of domain */
-#define ERANGE 34 /* Result too large */
-
-/* non-blocking and interrupt i/o */
-#define EAGAIN 35 /* Resource temporarily unavailable */
-#define EWOULDBLOCK EAGAIN /* Operation would block */
-#define EINPROGRESS 36 /* Operation now in progress */
-#define EALREADY 37 /* Operation already in progress */
-
-/* ipc/network software -- argument errors */
-#define ENOTSOCK 38 /* Socket operation on non-socket */
-#define EDESTADDRREQ 39 /* Destination address required */
-#define EMSGSIZE 40 /* Message too long */
-#define EPROTOTYPE 41 /* Protocol wrong type for socket */
-#define ENOPROTOOPT 42 /* Protocol not available */
-#define EPROTONOSUPPORT 43 /* Protocol not supported */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define ESOCKTNOSUPPORT 44 /* Socket type not supported */
-#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
-#define ENOTSUP 45 /* Operation not supported */
-#if !__DARWIN_UNIX03 && !defined(KERNEL)
-/*
- * This is the same for binary and source copmpatability, unless compiling
- * the kernel itself, or compiling __DARWIN_UNIX03; if compiling for the
- * kernel, the correct value will be returned. If compiling non-POSIX
- * source, the kernel return value will be converted by a stub in libc, and
- * if compiling source with __DARWIN_UNIX03, the conversion in libc is not
- * done, and the caller gets the expected (discrete) value.
- */
-#define EOPNOTSUPP ENOTSUP /* Operation not supported on socket */
-#endif /* !__DARWIN_UNIX03 && !KERNEL */
-
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define EPFNOSUPPORT 46 /* Protocol family not supported */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-#define EAFNOSUPPORT 47 /* Address family not supported by protocol family */
-#define EADDRINUSE 48 /* Address already in use */
-#define EADDRNOTAVAIL 49 /* Can't assign requested address */
-
-/* ipc/network software -- operational errors */
-#define ENETDOWN 50 /* Network is down */
-#define ENETUNREACH 51 /* Network is unreachable */
-#define ENETRESET 52 /* Network dropped connection on reset */
-#define ECONNABORTED 53 /* Software caused connection abort */
-#define ECONNRESET 54 /* Connection reset by peer */
-#define ENOBUFS 55 /* No buffer space available */
-#define EISCONN 56 /* Socket is already connected */
-#define ENOTCONN 57 /* Socket is not connected */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define ESHUTDOWN 58 /* Can't send after socket shutdown */
-#define ETOOMANYREFS 59 /* Too many references: can't splice */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-#define ETIMEDOUT 60 /* Operation timed out */
-#define ECONNREFUSED 61 /* Connection refused */
-
-#define ELOOP 62 /* Too many levels of symbolic links */
-#define ENAMETOOLONG 63 /* File name too long */
-
-/* should be rearranged */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define EHOSTDOWN 64 /* Host is down */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-#define EHOSTUNREACH 65 /* No route to host */
-#define ENOTEMPTY 66 /* Directory not empty */
-
-/* quotas & mush */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define EPROCLIM 67 /* Too many processes */
-#define EUSERS 68 /* Too many users */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-#define EDQUOT 69 /* Disc quota exceeded */
-
-/* Network File System */
-#define ESTALE 70 /* Stale NFS file handle */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define EREMOTE 71 /* Too many levels of remote in path */
-#define EBADRPC 72 /* RPC struct is bad */
-#define ERPCMISMATCH 73 /* RPC version wrong */
-#define EPROGUNAVAIL 74 /* RPC prog. not avail */
-#define EPROGMISMATCH 75 /* Program version wrong */
-#define EPROCUNAVAIL 76 /* Bad procedure for program */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-#define ENOLCK 77 /* No locks available */
-#define ENOSYS 78 /* Function not implemented */
-
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define EFTYPE 79 /* Inappropriate file type or format */
-#define EAUTH 80 /* Authentication error */
-#define ENEEDAUTH 81 /* Need authenticator */
-
-/* Intelligent device errors */
-#define EPWROFF 82 /* Device power is off */
-#define EDEVERR 83 /* Device error, e.g. paper out */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-#define EOVERFLOW 84 /* Value too large to be stored in data type */
-
-/* Program loading errors */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define EBADEXEC 85 /* Bad executable */
-#define EBADARCH 86 /* Bad CPU type in executable */
-#define ESHLIBVERS 87 /* Shared library version mismatch */
-#define EBADMACHO 88 /* Malformed Macho file */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-#define ECANCELED 89 /* Operation canceled */
-
-#define EIDRM 90 /* Identifier removed */
-#define ENOMSG 91 /* No message of desired type */
-#define EILSEQ 92 /* Illegal byte sequence */
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define ENOATTR 93 /* Attribute not found */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-#define EBADMSG 94 /* Bad message */
-#define EMULTIHOP 95 /* Reserved */
-#define ENODATA 96 /* No message available on STREAM */
-#define ENOLINK 97 /* Reserved */
-#define ENOSR 98 /* No STREAM resources */
-#define ENOSTR 99 /* Not a STREAM */
-#define EPROTO 100 /* Protocol error */
-#define ETIME 101 /* STREAM ioctl timeout */
-
-#if __DARWIN_UNIX03 || defined(KERNEL)
-/* This value is only discrete when compiling __DARWIN_UNIX03, or KERNEL */
-#define EOPNOTSUPP 102 /* Operation not supported on socket */
-#endif /* __DARWIN_UNIX03 || KERNEL */
-
-#define ENOPOLICY 103 /* No such policy registered */
-
-#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
-#define ELAST 103 /* Must be equal largest errno */
-#endif /* (_POSIX_C_SOURCE && !_DARWIN_C_SOURCE) */
-
-#endif /* _SYS_ERRNO_H_ */
diff --git a/doc/legacy/errno.list.solaris.txt b/doc/legacy/errno.list.solaris.txt
deleted file mode 100644
index 23601e9d374..00000000000
--- a/doc/legacy/errno.list.solaris.txt
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * CDDL HEADER START
- *
- * The contents of this file are subject to the terms of the
- * Common Development and Distribution License, Version 1.0 only
- * (the "License"). You may not use this file except in compliance
- * with the License.
- *
- * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
- * See the License for the specific language governing permissions
- * and limitations under the License.
- *
- * When distributing Covered Code, include this CDDL HEADER in each
- * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
- * If applicable, add the following below this CDDL HEADER, with the
- * fields enclosed by brackets "[]" replaced with your own identifying
- * information: Portions Copyright [yyyy] [name of copyright owner]
- *
- * CDDL HEADER END
- */
-/*
- * Copyright 2000 Sun Microsystems, Inc. All rights reserved.
- * Use is subject to license terms.
- */
-
-/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
-/* All Rights Reserved */
-
-/*
- * University Copyright- Copyright (c) 1982, 1986, 1988
- * The Regents of the University of California
- * All Rights Reserved
- *
- * University Acknowledgment- Portions of this document are derived from
- * software developed by the University of California, Berkeley, and its
- * contributors.
- */
-
-#ifndef _SYS_ERRNO_H
-#define _SYS_ERRNO_H
-
-#pragma ident "@(#)errno.h 1.22 05/06/08 SMI"
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Error codes
- */
-
-#define EPERM 1 /* Not super-user */
-#define ENOENT 2 /* No such file or directory */
-#define ESRCH 3 /* No such process */
-#define EINTR 4 /* interrupted system call */
-#define EIO 5 /* I/O error */
-#define ENXIO 6 /* No such device or address */
-#define E2BIG 7 /* Arg list too long */
-#define ENOEXEC 8 /* Exec format error */
-#define EBADF 9 /* Bad file number */
-#define ECHILD 10 /* No children */
-#define EAGAIN 11 /* Resource temporarily unavailable */
-#define ENOMEM 12 /* Not enough core */
-#define EACCES 13 /* Permission denied */
-#define EFAULT 14 /* Bad address */
-#define ENOTBLK 15 /* Block device required */
-#define EBUSY 16 /* Mount device busy */
-#define EEXIST 17 /* File exists */
-#define EXDEV 18 /* Cross-device link */
-#define ENODEV 19 /* No such device */
-#define ENOTDIR 20 /* Not a directory */
-#define EISDIR 21 /* Is a directory */
-#define EINVAL 22 /* Invalid argument */
-#define ENFILE 23 /* File table overflow */
-#define EMFILE 24 /* Too many open files */
-#define ENOTTY 25 /* Inappropriate ioctl for device */
-#define ETXTBSY 26 /* Text file busy */
-#define EFBIG 27 /* File too large */
-#define ENOSPC 28 /* No space left on device */
-#define ESPIPE 29 /* Illegal seek */
-#define EROFS 30 /* Read only file system */
-#define EMLINK 31 /* Too many links */
-#define EPIPE 32 /* Broken pipe */
-#define EDOM 33 /* Math arg out of domain of func */
-#define ERANGE 34 /* Math result not representable */
-#define ENOMSG 35 /* No message of desired type */
-#define EIDRM 36 /* Identifier removed */
-#define ECHRNG 37 /* Channel number out of range */
-#define EL2NSYNC 38 /* Level 2 not synchronized */
-#define EL3HLT 39 /* Level 3 halted */
-#define EL3RST 40 /* Level 3 reset */
-#define ELNRNG 41 /* Link number out of range */
-#define EUNATCH 42 /* Protocol driver not attached */
-#define ENOCSI 43 /* No CSI structure available */
-#define EL2HLT 44 /* Level 2 halted */
-#define EDEADLK 45 /* Deadlock condition. */
-#define ENOLCK 46 /* No record locks available. */
-#define ECANCELED 47 /* Operation canceled */
-#define ENOTSUP 48 /* Operation not supported */
-
-/* Filesystem Quotas */
-#define EDQUOT 49 /* Disc quota exceeded */
-
-/* Convergent Error Returns */
-#define EBADE 50 /* invalid exchange */
-#define EBADR 51 /* invalid request descriptor */
-#define EXFULL 52 /* exchange full */
-#define ENOANO 53 /* no anode */
-#define EBADRQC 54 /* invalid request code */
-#define EBADSLT 55 /* invalid slot */
-#define EDEADLOCK 56 /* file locking deadlock error */
-
-#define EBFONT 57 /* bad font file fmt */
-
-/* Interprocess Robust Locks */
-#define EOWNERDEAD 58 /* process died with the lock */
-#define ENOTRECOVERABLE 59 /* lock is not recoverable */
-
-/* stream problems */
-#define ENOSTR 60 /* Device not a stream */
-#define ENODATA 61 /* no data (for no delay io) */
-#define ETIME 62 /* timer expired */
-#define ENOSR 63 /* out of streams resources */
-
-#define ENONET 64 /* Machine is not on the network */
-#define ENOPKG 65 /* Package not installed */
-#define EREMOTE 66 /* The object is remote */
-#define ENOLINK 67 /* the link has been severed */
-#define EADV 68 /* advertise error */
-#define ESRMNT 69 /* srmount error */
-
-#define ECOMM 70 /* Communication error on send */
-#define EPROTO 71 /* Protocol error */
-
-/* Interprocess Robust Locks */
-#define ELOCKUNMAPPED 72 /* locked lock was unmapped */
-
-#define ENOTACTIVE 73 /* Facility is not active */
-#define EMULTIHOP 74 /* multihop attempted */
-#define EBADMSG 77 /* trying to read unreadable message */
-#define ENAMETOOLONG 78 /* path name is too long */
-#define EOVERFLOW 79 /* value too large to be stored in data type */
-#define ENOTUNIQ 80 /* given log. name not unique */
-#define EBADFD 81 /* f.d. invalid for this operation */
-#define EREMCHG 82 /* Remote address changed */
-
-/* shared library problems */
-#define ELIBACC 83 /* Can't access a needed shared lib. */
-#define ELIBBAD 84 /* Accessing a corrupted shared lib. */
-#define ELIBSCN 85 /* .lib section in a.out corrupted. */
-#define ELIBMAX 86 /* Attempting to link in too many libs. */
-#define ELIBEXEC 87 /* Attempting to exec a shared library. */
-#define EILSEQ 88 /* Illegal byte sequence. */
-#define ENOSYS 89 /* Unsupported file system operation */
-#define ELOOP 90 /* Symbolic link loop */
-#define ERESTART 91 /* Restartable system call */
-#define ESTRPIPE 92 /* if pipe/FIFO, don't sleep in stream head */
-#define ENOTEMPTY 93 /* directory not empty */
-#define EUSERS 94 /* Too many users (for UFS) */
-
-/* BSD Networking Software */
- /* argument errors */
-#define ENOTSOCK 95 /* Socket operation on non-socket */
-#define EDESTADDRREQ 96 /* Destination address required */
-#define EMSGSIZE 97 /* Message too long */
-#define EPROTOTYPE 98 /* Protocol wrong type for socket */
-#define ENOPROTOOPT 99 /* Protocol not available */
-#define EPROTONOSUPPORT 120 /* Protocol not supported */
-#define ESOCKTNOSUPPORT 121 /* Socket type not supported */
-#define EOPNOTSUPP 122 /* Operation not supported on socket */
-#define EPFNOSUPPORT 123 /* Protocol family not supported */
-#define EAFNOSUPPORT 124 /* Address family not supported by */
- /* protocol family */
-#define EADDRINUSE 125 /* Address already in use */
-#define EADDRNOTAVAIL 126 /* Can't assign requested address */
- /* operational errors */
-#define ENETDOWN 127 /* Network is down */
-#define ENETUNREACH 128 /* Network is unreachable */
-#define ENETRESET 129 /* Network dropped connection because */
- /* of reset */
-#define ECONNABORTED 130 /* Software caused connection abort */
-#define ECONNRESET 131 /* Connection reset by peer */
-#define ENOBUFS 132 /* No buffer space available */
-#define EISCONN 133 /* Socket is already connected */
-#define ENOTCONN 134 /* Socket is not connected */
-/* XENIX has 135 - 142 */
-#define ESHUTDOWN 143 /* Can't send after socket shutdown */
-#define ETOOMANYREFS 144 /* Too many references: can't splice */
-#define ETIMEDOUT 145 /* Connection timed out */
-#define ECONNREFUSED 146 /* Connection refused */
-#define EHOSTDOWN 147 /* Host is down */
-#define EHOSTUNREACH 148 /* No route to host */
-#define EWOULDBLOCK EAGAIN
-#define EALREADY 149 /* operation already in progress */
-#define EINPROGRESS 150 /* operation now in progress */
-
-/* SUN Network File System */
-#define ESTALE 151 /* Stale NFS file handle */
-
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* _SYS_ERRNO_H */
diff --git a/doc/legacy/get_put_api_using_xattr.txt b/doc/legacy/get_put_api_using_xattr.txt
deleted file mode 100644
index 243f9f1aec2..00000000000
--- a/doc/legacy/get_put_api_using_xattr.txt
+++ /dev/null
@@ -1,22 +0,0 @@
-GlusterFS get/put API interface provided through extended attributes:
-
-API usage:
- int put(dirpath/filename, data): setfattr -n glusterfs.file.<filename> -v <data> <dirpath>
- void *get(dirpath/filename): getfattr -n glusterfs.file.<filename> <dirpath>
-
-
-internals:
-* unify handling setxattr/getxattr
- - setxattr
- unify's setxattr forwards setxattr call to all the child nodes with XATTR_REPLACE flag, except namespace. setxattr will succeeds only on the child node on which the file already exists. if the setxattr operation fails on all child nodes, it indicates that the file does not already exist on any of the child nodes. unify follows the same rules as it follows for create, but using setxattr call itself with XATTR_CREATE flag. unify sends a setxattr to namespace first, with zero length data. if namespace setxattr succeeds, unify schedules setxattr to one of the child nodes.
-
- - getxattr
- unify's getxattr forwards getxattr call to all the child nodes. wait for completion of operation on all the child nodes, and returns success if getxattr succeeded one child node.
-
-* posix handling setxattr/getxattr
- - setxattr
- posix setxattr does a open with O_CREAT|O_TRUNC on the <path>/<name>, writes value of the setxattr as data into the file and closes the file. when data is null, posix setxattr avoids doing write. file is closed after write.
-
- - getxattr
- posix getxattr does open with O_RDONLY on the <path>/<name>, reads the complete content of the file. file is closed after read.
-
diff --git a/doc/legacy/hacker-guide/Makefile.am b/doc/legacy/hacker-guide/Makefile.am
deleted file mode 100644
index 65c92ac235e..00000000000
--- a/doc/legacy/hacker-guide/Makefile.am
+++ /dev/null
@@ -1,8 +0,0 @@
-EXTRA_DIST = replicate.txt bdb.txt posix.txt call-stub.txt write-behind.txt
-
-#EXTRA_DIST = hacker-guide.tex afr.txt bdb.txt posix.txt call-stub.txt write-behind.txt
-#hacker_guidedir = $(docdir)
-#hacker_guide_DATA = hacker-guide.pdf
-
-#hacker-guide.pdf: $(EXTRA_DIST)
-# pdflatex $(srcdir)/hacker-guide.tex
diff --git a/doc/legacy/hacker-guide/adding-fops.txt b/doc/legacy/hacker-guide/adding-fops.txt
deleted file mode 100644
index e70dbbdc829..00000000000
--- a/doc/legacy/hacker-guide/adding-fops.txt
+++ /dev/null
@@ -1,33 +0,0 @@
- HOW TO ADD A NEW FOP TO GlusterFS
- =================================
-
-Steps to be followed when adding a new FOP to GlusterFS:
-
-1. Edit glusterfs.h and add a GF_FOP_* constant.
-
-2. Edit xlator.[ch] and:
- 2a. add the new prototype for fop and callback.
- 2b. edit xlator_fops structure.
-
-3. Edit xlator.c and add to fill_defaults.
-
-4. Edit protocol.h and add struct necessary for the new FOP.
-
-5. Edit defaults.[ch] and provide default implementation.
-
-6. Edit call-stub.[ch] and provide stub implementation.
-
-7. Edit common-utils.c and add to gf_global_variable_init().
-
-8. Edit client-protocol and add your FOP.
-
-9. Edit server-protocol and add your FOP.
-
-10. Implement your FOP in any translator for which the default implementation
- is not sufficient.
-
-==========================================
-Last updated: Mon Oct 27 21:35:49 IST 2008
-
-Author: Vikas Gorur <vikas@gluster.com>
-==========================================
diff --git a/doc/legacy/hacker-guide/bdb.txt b/doc/legacy/hacker-guide/bdb.txt
deleted file mode 100644
index 1a80be813f6..00000000000
--- a/doc/legacy/hacker-guide/bdb.txt
+++ /dev/null
@@ -1,70 +0,0 @@
-
-* How does file translates to key/value pair?
----------------------------------------------
-
- in bdb a file is identified by key (obtained by taking basename() of the path of
-the file) and file contents are stored as value corresponding to the key in database
-file (defaults to glusterfs_storage.db under dirname() directory).
-
-* symlinks, directories
------------------------
-
- symlinks and directories are stored as is.
-
-* db (database) files
----------------------
-
- every directory, including root directory, contains a database file called
-glusterfs_storage.db. all the regular files contained in the directory are stored
-as key/value pair inside the glusterfs_storage.db.
-
-* internal data cache
----------------------
-
- db does not provide a way to find out the size of the value corresponding to a key.
-so, bdb makes DB->get() call for key and takes the length of the value returned.
-since DB->get() also returns file contents for key, bdb maintains an internal cache and
-stores the file contents in the cache.
- every directory maintains a seperate cache.
-
-* inode number transformation
------------------------------
-
- bdb allocates a inode number to each file and directory on its own. bdb maintains a
-global counter and increments it after allocating inode number for each file
-(regular, symlink or directory). NOTE: bdb does not guarantee persistent inode numbers.
-
-* checkpoint thread
--------------------
-
- bdb creates a checkpoint thread at the time of init(). checkpoint thread does a
-periodic checkpoint on the DB_ENV. checkpoint is the mechanism, provided by db, to
-forcefully commit the logged transactions to the storage.
-
-NOTES ABOUT FOPS:
------------------
-
-lookup() -
- 1> do lstat() on the path, if lstat fails, we assume that the file being looked up
- is either a regular file or doesn't exist.
- 2> lookup in the DB of parent directory for key corresponding to path. if key exists,
- return key, with.
- NOTE: 'struct stat' stat()ed from DB file is used as a container for 'struct stat'
- of the regular file. st_ino, st_size, st_blocks are updated with file's values.
-
-readv() -
- 1> do a lookup in bctx cache. if successful, return the requested data from cache.
- 2> if cache missed, do a DB->get() the entire file content and insert to cache.
-
-writev():
- 1> flush any cached content of this file.
- 2> do a DB->put(), with DB_DBT_PARTIAL flag.
- NOTE: DB_DBT_PARTIAL is used to do partial update of a value in DB.
-
-readdir():
- 1> regular readdir() in a loop, and vomit all DB_ENV log files and DB files that
- we encounter.
- 2> if the readdir() buffer still has space, open a DB cursor and do a sequential
- DBC->get() to fill the reaadir buffer.
-
-
diff --git a/doc/legacy/hacker-guide/call-stub.txt b/doc/legacy/hacker-guide/call-stub.txt
deleted file mode 100644
index 021037a3512..00000000000
--- a/doc/legacy/hacker-guide/call-stub.txt
+++ /dev/null
@@ -1,1033 +0,0 @@
-creating a call stub and pausing a call
----------------------------------------
-libglusterfs provides seperate API to pause each of the fop. parameters to each API is
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
- NOTE: @fn should exactly take the same type and number of parameters that
- the corresponding regular fop takes.
-rest will be the regular parameters to corresponding fop.
-
-NOTE: @frame can never be NULL. fop_<operation>_stub() fails with errno
- set to EINVAL, if @frame is NULL. also wherever @loc is applicable,
- @loc cannot be NULL.
-
-refer to individual stub creation API to know about call-stub creation's behaviour with
-specific parameters.
-
-here is the list of stub creation APIs for xlator fops.
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@need_xattr - flag to specify if xattr should be returned or not.
-call_stub_t *
-fop_lookup_stub (call_frame_t *frame,
- fop_lookup_t fn,
- loc_t *loc,
- int32_t need_xattr);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-call_stub_t *
-fop_stat_stub (call_frame_t *frame,
- fop_stat_t fn,
- loc_t *loc);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to lk fop.
- NOTE: @fd is stored with a fd_ref().
-call_stub_t *
-fop_fstat_stub (call_frame_t *frame,
- fop_fstat_t fn,
- fd_t *fd);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to @loc->inode and
- @loc->parent, if not NULL. also @loc->path will be copied to a different location.
-@mode - mode parameter to chmod.
-call_stub_t *
-fop_chmod_stub (call_frame_t *frame,
- fop_chmod_t fn,
- loc_t *loc,
- mode_t mode);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to lk fop.
- NOTE: @fd is stored with a fd_ref().
-@mode - mode parameter for fchmod fop.
-call_stub_t *
-fop_fchmod_stub (call_frame_t *frame,
- fop_fchmod_t fn,
- fd_t *fd,
- mode_t mode);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to @loc->inode and
- @loc->parent, if not NULL. also @loc->path will be copied to a different location.
-@uid - uid parameter to chown.
-@gid - gid parameter to chown.
-call_stub_t *
-fop_chown_stub (call_frame_t *frame,
- fop_chown_t fn,
- loc_t *loc,
- uid_t uid,
- gid_t gid);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to lk fop.
- NOTE: @fd is stored with a fd_ref().
-@uid - uid parameter to fchown.
-@gid - gid parameter to fchown.
-call_stub_t *
-fop_fchown_stub (call_frame_t *frame,
- fop_fchown_t fn,
- fd_t *fd,
- uid_t uid,
- gid_t gid);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location, if not NULL.
-@off - offset parameter to truncate fop.
-call_stub_t *
-fop_truncate_stub (call_frame_t *frame,
- fop_truncate_t fn,
- loc_t *loc,
- off_t off);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to lk fop.
- NOTE: @fd is stored with a fd_ref().
-@off - offset parameter to ftruncate fop.
-call_stub_t *
-fop_ftruncate_stub (call_frame_t *frame,
- fop_ftruncate_t fn,
- fd_t *fd,
- off_t off);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@tv - tv parameter to utimens fop.
-call_stub_t *
-fop_utimens_stub (call_frame_t *frame,
- fop_utimens_t fn,
- loc_t *loc,
- struct timespec tv[2]);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@mask - mask parameter for access fop.
-call_stub_t *
-fop_access_stub (call_frame_t *frame,
- fop_access_t fn,
- loc_t *loc,
- int32_t mask);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@size - size parameter to readlink fop.
-call_stub_t *
-fop_readlink_stub (call_frame_t *frame,
- fop_readlink_t fn,
- loc_t *loc,
- size_t size);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@mode - mode parameter to mknod fop.
-@rdev - rdev parameter to mknod fop.
-call_stub_t *
-fop_mknod_stub (call_frame_t *frame,
- fop_mknod_t fn,
- loc_t *loc,
- mode_t mode,
- dev_t rdev);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@mode - mode parameter to mkdir fop.
-call_stub_t *
-fop_mkdir_stub (call_frame_t *frame,
- fop_mkdir_t fn,
- loc_t *loc,
- mode_t mode);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-call_stub_t *
-fop_unlink_stub (call_frame_t *frame,
- fop_unlink_t fn,
- loc_t *loc);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-call_stub_t *
-fop_rmdir_stub (call_frame_t *frame,
- fop_rmdir_t fn,
- loc_t *loc);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@linkname - linkname parameter to symlink fop.
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-call_stub_t *
-fop_symlink_stub (call_frame_t *frame,
- fop_symlink_t fn,
- const char *linkname,
- loc_t *loc);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@oldloc - pointer to location structure.
- NOTE: @oldloc will be copied to a different location, with inode_ref() to
- @oldloc->inode and @oldloc->parent, if not NULL. also @oldloc->path will
- be copied to a different location, if not NULL.
-@newloc - pointer to location structure.
- NOTE: @newloc will be copied to a different location, with inode_ref() to
- @newloc->inode and @newloc->parent, if not NULL. also @newloc->path will
- be copied to a different location, if not NULL.
-call_stub_t *
-fop_rename_stub (call_frame_t *frame,
- fop_rename_t fn,
- loc_t *oldloc,
- loc_t *newloc);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@newpath - newpath parameter to link fop.
-call_stub_t *
-fop_link_stub (call_frame_t *frame,
- fop_link_t fn,
- loc_t *oldloc,
- const char *newpath);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@flags - flags parameter to create fop.
-@mode - mode parameter to create fop.
-@fd - file descriptor parameter to create fop.
- NOTE: @fd is stored with a fd_ref().
-call_stub_t *
-fop_create_stub (call_frame_t *frame,
- fop_create_t fn,
- loc_t *loc,
- int32_t flags,
- mode_t mode, fd_t *fd);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@flags - flags parameter to open fop.
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-call_stub_t *
-fop_open_stub (call_frame_t *frame,
- fop_open_t fn,
- loc_t *loc,
- int32_t flags,
- fd_t *fd);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to lk fop.
- NOTE: @fd is stored with a fd_ref().
-@size - size parameter to readv fop.
-@off - offset parameter to readv fop.
-call_stub_t *
-fop_readv_stub (call_frame_t *frame,
- fop_readv_t fn,
- fd_t *fd,
- size_t size,
- off_t off);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to lk fop.
- NOTE: @fd is stored with a fd_ref().
-@vector - vector parameter to writev fop.
- NOTE: @vector is iov_dup()ed while creating stub. and frame->root->req_refs
- dictionary is dict_ref()ed.
-@count - count parameter to writev fop.
-@off - off parameter to writev fop.
-call_stub_t *
-fop_writev_stub (call_frame_t *frame,
- fop_writev_t fn,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to flush fop.
- NOTE: @fd is stored with a fd_ref().
-call_stub_t *
-fop_flush_stub (call_frame_t *frame,
- fop_flush_t fn,
- fd_t *fd);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to lk fop.
- NOTE: @fd is stored with a fd_ref().
-@datasync - datasync parameter to fsync fop.
-call_stub_t *
-fop_fsync_stub (call_frame_t *frame,
- fop_fsync_t fn,
- fd_t *fd,
- int32_t datasync);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to @loc->inode and
- @loc->parent, if not NULL. also @loc->path will be copied to a different location.
-@fd - file descriptor parameter to opendir fop.
- NOTE: @fd is stored with a fd_ref().
-call_stub_t *
-fop_opendir_stub (call_frame_t *frame,
- fop_opendir_t fn,
- loc_t *loc,
- fd_t *fd);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to getdents fop.
- NOTE: @fd is stored with a fd_ref().
-@size - size parameter to getdents fop.
-@off - off parameter to getdents fop.
-@flags - flags parameter to getdents fop.
-call_stub_t *
-fop_getdents_stub (call_frame_t *frame,
- fop_getdents_t fn,
- fd_t *fd,
- size_t size,
- off_t off,
- int32_t flag);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to setdents fop.
- NOTE: @fd is stored with a fd_ref().
-@flags - flags parameter to setdents fop.
-@entries - entries parameter to setdents fop.
-call_stub_t *
-fop_setdents_stub (call_frame_t *frame,
- fop_setdents_t fn,
- fd_t *fd,
- int32_t flags,
- dir_entry_t *entries,
- int32_t count);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to setdents fop.
- NOTE: @fd is stored with a fd_ref().
-@datasync - datasync parameter to fsyncdir fop.
-call_stub_t *
-fop_fsyncdir_stub (call_frame_t *frame,
- fop_fsyncdir_t fn,
- fd_t *fd,
- int32_t datasync);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-call_stub_t *
-fop_statfs_stub (call_frame_t *frame,
- fop_statfs_t fn,
- loc_t *loc);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@dict - dict parameter to setxattr fop.
- NOTE: stub creation procedure stores @dict pointer with dict_ref() to it.
-call_stub_t *
-fop_setxattr_stub (call_frame_t *frame,
- fop_setxattr_t fn,
- loc_t *loc,
- dict_t *dict,
- int32_t flags);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@name - name parameter to getxattr fop.
-call_stub_t *
-fop_getxattr_stub (call_frame_t *frame,
- fop_getxattr_t fn,
- loc_t *loc,
- const char *name);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@name - name parameter to removexattr fop.
- NOTE: name string will be copied to a different location while creating stub.
-call_stub_t *
-fop_removexattr_stub (call_frame_t *frame,
- fop_removexattr_t fn,
- loc_t *loc,
- const char *name);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to lk fop.
- NOTE: @fd is stored with a fd_ref().
-@cmd - command parameter to lk fop.
-@lock - lock parameter to lk fop.
- NOTE: lock will be copied to a different location while creating stub.
-call_stub_t *
-fop_lk_stub (call_frame_t *frame,
- fop_lk_t fn,
- fd_t *fd,
- int32_t cmd,
- struct flock *lock);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - fd parameter to gf_lk fop.
- NOTE: @fd is fd_ref()ed while creating stub, if not NULL.
-@cmd - cmd parameter to gf_lk fop.
-@lock - lock paramater to gf_lk fop.
- NOTE: @lock is copied to a different memory location while creating
- stub.
-call_stub_t *
-fop_gf_lk_stub (call_frame_t *frame,
- fop_gf_lk_t fn,
- fd_t *fd,
- int32_t cmd,
- struct flock *lock);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@fd - file descriptor parameter to readdir fop.
- NOTE: @fd is stored with a fd_ref().
-@size - size parameter to readdir fop.
-@off - offset parameter to readdir fop.
-call_stub_t *
-fop_readdir_stub (call_frame_t *frame,
- fop_readdir_t fn,
- fd_t *fd,
- size_t size,
- off_t off);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@loc - pointer to location structure.
- NOTE: @loc will be copied to a different location, with inode_ref() to
- @loc->inode and @loc->parent, if not NULL. also @loc->path will be
- copied to a different location.
-@flags - flags parameter to checksum fop.
-call_stub_t *
-fop_checksum_stub (call_frame_t *frame,
- fop_checksum_t fn,
- loc_t *loc,
- int32_t flags);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@inode - inode parameter to @fn.
- NOTE: @inode pointer is stored with a inode_ref().
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-@dict - dict parameter to @fn.
- NOTE: @dict pointer is stored with dict_ref().
-call_stub_t *
-fop_lookup_cbk_stub (call_frame_t *frame,
- fop_lookup_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf,
- dict_t *dict);
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_stat_cbk_stub (call_frame_t *frame,
- fop_stat_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_fstat_cbk_stub (call_frame_t *frame,
- fop_fstat_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_chmod_cbk_stub (call_frame_t *frame,
- fop_chmod_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_fchmod_cbk_stub (call_frame_t *frame,
- fop_fchmod_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_chown_cbk_stub (call_frame_t *frame,
- fop_chown_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_fchown_cbk_stub (call_frame_t *frame,
- fop_fchown_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_truncate_cbk_stub (call_frame_t *frame,
- fop_truncate_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_ftruncate_cbk_stub (call_frame_t *frame,
- fop_ftruncate_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_utimens_cbk_stub (call_frame_t *frame,
- fop_utimens_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_access_cbk_stub (call_frame_t *frame,
- fop_access_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@path - path parameter to @fn.
- NOTE: @path is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_readlink_cbk_stub (call_frame_t *frame,
- fop_readlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- const char *path);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@inode - inode parameter to @fn.
- NOTE: @inode pointer is stored with a inode_ref().
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_mknod_cbk_stub (call_frame_t *frame,
- fop_mknod_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@inode - inode parameter to @fn.
- NOTE: @inode pointer is stored with a inode_ref().
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_mkdir_cbk_stub (call_frame_t *frame,
- fop_mkdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_unlink_cbk_stub (call_frame_t *frame,
- fop_unlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_rmdir_cbk_stub (call_frame_t *frame,
- fop_rmdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@inode - inode parameter to @fn.
- NOTE: @inode pointer is stored with a inode_ref().
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_symlink_cbk_stub (call_frame_t *frame,
- fop_symlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_rename_cbk_stub (call_frame_t *frame,
- fop_rename_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@inode - inode parameter to @fn.
- NOTE: @inode pointer is stored with a inode_ref().
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_link_cbk_stub (call_frame_t *frame,
- fop_link_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct stat *buf);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@fd - fd parameter to @fn.
- NOTE: @fd pointer is stored with a fd_ref().
-@inode - inode parameter to @fn.
- NOTE: @inode pointer is stored with a inode_ref().
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_create_cbk_stub (call_frame_t *frame,
- fop_create_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct stat *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@fd - fd parameter to @fn.
- NOTE: @fd pointer is stored with a fd_ref().
-call_stub_t *
-fop_open_cbk_stub (call_frame_t *frame,
- fop_open_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@vector - vector parameter to @fn.
- NOTE: @vector is copied to a different memory location, if not NULL. also
- frame->root->rsp_refs is dict_ref()ed.
-@stbuf - stbuf parameter to @fn.
- NOTE: @stbuf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_readv_cbk_stub (call_frame_t *frame,
- fop_readv_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct stat *stbuf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@stbuf - stbuf parameter to @fn.
- NOTE: @stbuf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_writev_cbk_stub (call_frame_t *frame,
- fop_writev_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *stbuf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_flush_cbk_stub (call_frame_t *frame,
- fop_flush_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_fsync_cbk_stub (call_frame_t *frame,
- fop_fsync_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@fd - fd parameter to @fn.
- NOTE: @fd pointer is stored with a fd_ref().
-call_stub_t *
-fop_opendir_cbk_stub (call_frame_t *frame,
- fop_opendir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@entries - entries parameter to @fn.
-@count - count parameter to @fn.
-call_stub_t *
-fop_getdents_cbk_stub (call_frame_t *frame,
- fop_getdents_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_setdents_cbk_stub (call_frame_t *frame,
- fop_setdents_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_fsyncdir_cbk_stub (call_frame_t *frame,
- fop_fsyncdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@buf - buf parameter to @fn.
- NOTE: @buf is copied to a different memory location, if not NULL.
-call_stub_t *
-fop_statfs_cbk_stub (call_frame_t *frame,
- fop_statfs_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_setxattr_cbk_stub (call_frame_t *frame,
- fop_setxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@value - value dictionary parameter to @fn.
- NOTE: @value pointer is stored with a dict_ref().
-call_stub_t *
-fop_getxattr_cbk_stub (call_frame_t *frame,
- fop_getxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *value);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-call_stub_t *
-fop_removexattr_cbk_stub (call_frame_t *frame,
- fop_removexattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@lock - lock parameter to @fn.
- NOTE: @lock is copied to a different memory location while creating
- stub.
-call_stub_t *
-fop_lk_cbk_stub (call_frame_t *frame,
- fop_lk_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock);
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@lock - lock parameter to @fn.
- NOTE: @lock is copied to a different memory location while creating
- stub.
-call_stub_t *
-fop_gf_lk_cbk_stub (call_frame_t *frame,
- fop_gf_lk_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct flock *lock);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@entries - entries parameter to @fn.
-call_stub_t *
-fop_readdir_cbk_stub (call_frame_t *frame,
- fop_readdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries);
-
-
-@frame - call frame which has to be used to resume the call at call_resume().
-@fn - procedure to call during call_resume().
-@op_ret - op_ret parameter to @fn.
-@op_errno - op_errno parameter to @fn.
-@file_checksum - file_checksum parameter to @fn.
- NOTE: file_checksum will be copied to a different memory location
- while creating stub.
-@dir_checksum - dir_checksum parameter to @fn.
- NOTE: file_checksum will be copied to a different memory location
- while creating stub.
-call_stub_t *
-fop_checksum_cbk_stub (call_frame_t *frame,
- fop_checksum_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum);
-
-resuming a call:
----------------
- call can be resumed using call stub through call_resume API.
-
- void call_resume (call_stub_t *stub);
-
- stub - call stub created during pausing a call.
-
- NOTE: call_resume() will decrease reference count of any fd_t, dict_t and inode_t that it finds
- in stub->args.<operation>.<fd_t-or-inode_t-or-dict_t>. so, if any fd_t, dict_t or
- inode_t pointers are assigned at stub->args.<operation>.<fd_t-or-inode_t-or-dict_t> after
- fop_<operation>_stub() call, they must be <fd_t-or-inode_t-or-dict_t>_ref()ed.
-
- call_resume does not STACK_DESTROY() for any fop.
-
- if stub->fn is NULL, call_resume does STACK_WIND() or STACK_UNWIND() using the stub->frame.
-
- return - call resume fails only if stub is NULL. call resume fails with errno set to EINVAL.
diff --git a/doc/legacy/hacker-guide/hacker-guide.tex b/doc/legacy/hacker-guide/hacker-guide.tex
deleted file mode 100644
index 11101e7a87a..00000000000
--- a/doc/legacy/hacker-guide/hacker-guide.tex
+++ /dev/null
@@ -1,309 +0,0 @@
-\documentclass{book}[12pt]
-\usepackage{graphicx}
-% \usepackage{fancyhdr}
-
-% \pagestyle{fancy}
-\begin{document}
-
-% \headheight 117pt
-% \rhead{\includegraphics{zr-logo.eps}}
-
-\author{Gluster}
-\title{GlusterFS 1.3 Hacker's Guide}
-\date{June 1, 2007}
-
-\maketitle
-\frontmatter
-\tableofcontents
-
-\mainmatter
-\chapter{Introduction}
-
-\section{Coding guidelines}
-GlusterFS uses Git for version control. To get the latest source do:
-\begin{verbatim}
- $ git clone git://git.gluster.com/glusterfs.git glusterfs
-\end{verbatim}
-\noindent
-GlusterFS follows the GNU coding
-standards\footnote{http://www.gnu.org/prep/standards\_toc.html} for the
-most part.
-
-\chapter{Major components}
-\section{libglusterfs}
-\texttt{libglusterfs} contains supporting code used by all the other components.
-The important files here are:
-
-\texttt{dict.c}: This is an implementation of a serializable dictionary type. It is
-used by the protocol code to send requests and replies. It is also used to pass options
-to translators.
-
-\texttt{logging.c}: This is a thread-safe logging library. The log messages go to a
-file (default \texttt{/usr/local/var/log/glusterfs/*}).
-
-\texttt{protocol.c}: This file implements the GlusterFS on-the-wire
-protocol. The protocol itself is a simple ASCII protocol, designed to
-be easy to parse and be human readable.
-
-A sample GlusterFS protocol block looks like this:
-\begin{verbatim}
- Block Start header
- 0000000000000023 callid
- 00000001 type
- 00000016 op
- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx human-readable name
- 00000000000000000000000000000ac3 block size
- <...> block
- Block End
-\end{verbatim}
-
-\texttt{stack.h}: This file defines the \texttt{STACK\_WIND} and
-\texttt{STACK\_UNWIND} macros which are used to implement the parallel
-stack that is maintained for inter-xlator calls. See the \textsl{Taking control
-of the stack} section below for more details.
-
-\texttt{spec.y}: This contains the Yacc grammar for the GlusterFS
-specification file, and the parsing code.
-
-
-Draw diagrams of trees
-Two rules:
-(1) directory structure is same
-(2) file can exist only on one node
-
-\section{glusterfs-fuse}
-\section{glusterfsd}
-\section{transport}
-\section{scheduler}
-\section{xlator}
-
-\chapter{xlators}
-\section{Taking control of the stack}
-One can think of STACK\_WIND/UNWIND as a very specific RPC mechanism.
-
-% \includegraphics{stack.eps}
-
-\section{Overview of xlators}
-
-\flushleft{\LARGE\texttt{cluster/}}
-\vskip 2ex
-\flushleft{\Large\texttt{afr}}
-\vskip 2ex
-\flushleft{\Large\texttt{stripe}}
-\vskip 2ex
-\flushleft{\Large\texttt{unify}}
-
-\vskip 4ex
-\flushleft{\LARGE\texttt{debug/}}
-\vskip 2ex
-\flushleft{\Large\texttt{trace}}
-\vskip 2ex
-The trace xlator simply logs all fops and mops, and passes them through to its child.
-
-\vskip 4ex
-\flushleft{\LARGE\texttt{features/}}
-\flushleft{\Large\texttt{posix-locks}}
-\vskip 2ex
-This xlator implements \textsc{posix} record locking semantics over
-any kind of storage.
-
-\vskip 4ex
-\flushleft{\LARGE\texttt{performance/}}
-
-\flushleft{\Large\texttt{io-threads}}
-\vskip 2ex
-\flushleft{\Large\texttt{read-ahead}}
-\vskip 2ex
-\flushleft{\Large\texttt{stat-prefetch}}
-\vskip 2ex
-\flushleft{\Large\texttt{write-behind}}
-\vskip 2ex
-
-\vskip 4ex
-\flushleft{\LARGE\texttt{protocol/}}
-\vskip 2ex
-
-\flushleft{\Large\texttt{client}}
-\vskip 2ex
-
-\flushleft{\Large\texttt{server}}
-\vskip 2ex
-
-\vskip 4ex
-\flushleft{\LARGE\texttt{storage/}}
-\flushleft{\Large\texttt{posix}}
-\vskip 2ex
-The \texttt{posix} xlator is the one which actually makes calls to the
-on-disk filesystem. Currently this is the only storage xlator available. However,
-plans to develop other storage xlators, such as one for Amazon's S3 service, are
-on the roadmap.
-
-\chapter{Writing a simple xlator}
-\noindent
-In this section we're going to write a rot13 xlator. ``Rot13'' is a
-simple substitution cipher which obscures a text by replacing each
-letter with the letter thirteen places down the alphabet. So `a' (0)
-would become `n' (12), `b' would be 'm', and so on. Rot13 applied to
-a piece of ciphertext yields the plaintext again, because rot13 is its
-own inverse, since:
-
-\[
-x_c = x + 13\; (mod\; 26)
-\]
-\[
-x_c + 13\; (mod\; 26) = x + 13 + 13\; (mod\; 26) = x
-\]
-
-First we include the requisite headers.
-
-\begin{verbatim}
-#include <ctype.h>
-#include <sys/uio.h>
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-
-/*
- * This is a rot13 ``encryption'' xlator. It rot13's data when
- * writing to disk and rot13's it back when reading it.
- * This xlator is meant as an example, not for production
- * use ;) (hence no error-checking)
- */
-
-\end{verbatim}
-
-Then we write the rot13 function itself. For simplicity, we only transform lower case
-letters. Any other byte is passed through as it is.
-
-\begin{verbatim}
-/* We only handle lower case letters for simplicity */
-static void
-rot13 (char *buf, int len)
-{
- int i;
- for (i = 0; i < len; i++) {
- if (isalpha (buf[i]))
- buf[i] = (buf[i] - 'a' + 13) % 26;
- else if (buf[i] <= 26)
- buf[i] = (buf[i] + 13) % 26 + 'a';
- }
-}
-\end{verbatim}
-
-Next comes a utility function whose purpose will be clear after looking at the code
-below.
-
-\begin{verbatim}
-static void
-rot13_iovec (struct iovec *vector, int count)
-{
- int i;
- for (i = 0; i < count; i++) {
- rot13 (vector[i].iov_base, vector[i].iov_len);
- }
-}
-\end{verbatim}
-
-\begin{verbatim}
-static int32_t
-rot13_readv_cbk (call_frame_t *frame,
- call_frame_t *prev_frame,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count)
-{
- rot13_iovec (vector, count);
-
- STACK_UNWIND (frame, op_ret, op_errno, vector, count);
- return 0;
-}
-
-static int32_t
-rot13_readv (call_frame_t *frame,
- xlator_t *this,
- dict_t *ctx,
- size_t size,
- off_t offset)
-{
- STACK_WIND (frame,
- rot13_readv_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readv,
- ctx, size, offset);
- return 0;
-}
-
-static int32_t
-rot13_writev_cbk (call_frame_t *frame,
- call_frame_t *prev_frame,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-static int32_t
-rot13_writev (call_frame_t *frame,
- xlator_t *this,
- dict_t *ctx,
- struct iovec *vector,
- int32_t count,
- off_t offset)
-{
- rot13_iovec (vector, count);
-
- STACK_WIND (frame,
- rot13_writev_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->writev,
- ctx, vector, count, offset);
- return 0;
-}
-
-\end{verbatim}
-
-Every xlator must define two functions and two external symbols. The functions are
-\texttt{init} and \texttt{fini}, and the symbols are \texttt{fops} and \texttt{mops}.
-The \texttt{init} function is called when the xlator is loaded by GlusterFS, and
-contains code for the xlator to initialize itself. Note that if an xlator is present
-multiple times in the spec tree, the \texttt{init} function will be called each time
-the xlator is loaded.
-
-\begin{verbatim}
-int32_t
-init (xlator_t *this)
-{
- if (!this->children) {
- gf_log ("rot13", GF_LOG_ERROR,
- "FATAL: rot13 should have exactly one child");
- return -1;
- }
-
- gf_log ("rot13", GF_LOG_DEBUG, "rot13 xlator loaded");
- return 0;
-}
-\end{verbatim}
-
-\begin{verbatim}
-
-void
-fini (xlator_t *this)
-{
- return;
-}
-
-struct xlator_fops fops = {
- .readv = rot13_readv,
- .writev = rot13_writev
-};
-
-
-\end{verbatim}
-
-\end{document}
-
diff --git a/doc/legacy/hacker-guide/lock-ahead.txt b/doc/legacy/hacker-guide/lock-ahead.txt
deleted file mode 100644
index 70aa452d3de..00000000000
--- a/doc/legacy/hacker-guide/lock-ahead.txt
+++ /dev/null
@@ -1,80 +0,0 @@
- Lock-ahead translator
- ---------------------
-
-The objective of the lock-ahead translator is to speculatively
-hold locks (inodelk and entrylk) on the universal set (0 - infinity
-in case of inodelk and all basenames in case of entrylk) even
-when a lock is requested only on a subset, in anticipation that
-further locks will be requested within the same universal set.
-
-So, for example, when cluster/replicate locks a region before
-writing to it, lock-ahead would instead lock the entire file.
-On further writes, lock-ahead can immediately return success for
-the lock requests, since the entire file has been previously locked.
-
-To avoid starvation of other clients/mountpoints, we employ a
-notify mechanism, described below.
-
-typedef struct {
- struct list_head subset_locks;
-} la_universal_lock_t;
-
-Universal lock structure is stored in the inode context.
-
-typedef struct {
- enum {LOCK_AHEAD_ENTRYLK, LOCK_AHEAD_FENTRYLK,
- LOCK_AHEAD_INODELK, LOCK_AHEAD_FINODELK};
-
- union {
- fd_t *fd;
- loc_t loc;
- };
-
- off_t l_start;
- off_t l_len;
-
- const char *basename;
-
- struct list_head universal_lock;
-} la_subset_lock_t;
-
-
-fops implemented:
-
-* inodelk/finodelk/entrylk/fentrylk:
-
-lock:
- if universal lock held:
- add subset to it (save loc_t or fd) and return success
- else:
- send lock-notify fop
- hold universal lock and return
- (set inode context, add subset to it, save loc_t or fd)
-
- if this fails:
- forward the lock request
-
-unlock:
- if subset exists in universal lock:
- delete subset lock from list
- else:
- forward it
-
-* release:
- hold subset locks (each subset lock using the saved loc_t or fd)
- and release universal lock
-
-* lock-notify (on unwind) (new fop)
- hold subset locks and release universal lock
-
-
-lock-notify in locks translator:
-
-if a subset lock in entrylk/inodelk cannot be satisfied
-because of a universal lock held by someone else:
- unwind the lock-notify fop
-
-==============================================
-$ Last updated: Tue Feb 17 11:31:18 IST 2009 $
-$ Author: Vikas Gorur <vikas@gluster.com> $
-==============================================
diff --git a/doc/legacy/hacker-guide/posix.txt b/doc/legacy/hacker-guide/posix.txt
deleted file mode 100644
index 7958af2ea7d..00000000000
--- a/doc/legacy/hacker-guide/posix.txt
+++ /dev/null
@@ -1,59 +0,0 @@
----------------
-* storage/posix
----------------
-
-- SET_FS_ID
-
- This is so that all filesystem checks are done with the user's
- uid/gid and not GlusterFS's uid/gid.
-
-- MAKE_REAL_PATH
-
- This macro concatenates the base directory of the posix volume
- ('option directory') with the given path.
-
-- need_xattr in lookup
-
- If this flag is passed, lookup returns a xattr dictionary that contains
- the file's create time, the file's contents, and the version number
- of the file.
-
- This is a hack to increase small file performance. If an application
- wants to read a small file, it can finish its job with just a lookup
- call instead of a lookup followed by read.
-
-- getdents/setdents
-
- These are used by unify to set and get directory entries.
-
-- ALIGN_BUF
-
- Macro to align an address to a page boundary (4K).
-
-- priv->export_statfs
-
- In some cases, two exported volumes may reside on the same
- partition on the server. Sending statvfs info for both
- the volumes will lead to erroneous df output at the client,
- since free space on the partition will be counted twice.
-
- In such cases, user can disable exporting statvfs info
- on one of the volumes by setting this option.
-
-- xattrop
-
- This fop is used by replicate to set version numbers on files.
-
-- getxattr/setxattr hack to read/write files
-
- A key, GLUSTERFS_FILE_CONTENT_STRING, is handled in a special way by
- getxattr/setxattr. A getxattr with the key will return the entire
- content of the file as the value. A setxattr with the key will write
- the value as the entire content of the file.
-
-- posix_checksum
-
- This calculates a simple XOR checksum on all entry names in a
- directory that is used by unify to compare directory contents.
-
-
diff --git a/doc/legacy/hacker-guide/replicate.txt b/doc/legacy/hacker-guide/replicate.txt
deleted file mode 100644
index 133c72afa91..00000000000
--- a/doc/legacy/hacker-guide/replicate.txt
+++ /dev/null
@@ -1,206 +0,0 @@
----------------
-* cluster/replicate
----------------
-
-Before understanding replicate, one must understand two internal FOPs:
-
-GF_FILE_LK:
- This is exactly like fcntl(2) locking, except the locks are in a
- separate domain from locks held by applications.
-
-GF_DIR_LK (loc_t *loc, char *basename):
- This allows one to lock a name under a directory. For example,
- to lock /mnt/glusterfs/foo, one would use the call:
-
- GF_DIR_LK ({loc_t for "/mnt/glusterfs"}, "foo")
-
- If one wishes to lock *all* the names under a particular directory,
- supply the basename argument as NULL.
-
- The locks can either be read locks or write locks; consult the
- function prototype for more details.
-
-Both these operations are implemented by the features/locks (earlier
-known as posix-locks) translator.
-
---------------
-* Basic design
---------------
-
-All FOPs can be classified into four major groups:
-
- - inode-read
- Operations that read an inode's data (file contents) or metadata (perms, etc.).
-
- access, getxattr, fstat, readlink, readv, stat.
-
- - inode-write
- Operations that modify an inode's data or metadata.
-
- chmod, chown, truncate, writev, utimens.
-
- - dir-read
- Operations that read a directory's contents or metadata.
-
- readdir, getdents, checksum.
-
- - dir-write
- Operations that modify a directory's contents or metadata.
-
- create, link, mkdir, mknod, rename, rmdir, symlink, unlink.
-
- Some of these make a subgroup in that they modify *two* different entries:
- link, rename, symlink.
-
- - Others
- Other operations.
-
- flush, lookup, open, opendir, statfs.
-
-------------
-* Algorithms
-------------
-
-Each of the four major groups has its own algorithm:
-
- ----------------------
- - inode-read, dir-read
- ----------------------
-
- = Send a request to the first child that is up:
- - if it fails:
- try the next available child
- - if we have exhausted all children:
- return failure
-
- -------------
- - inode-write
- -------------
-
- All operations are done in parallel unless specified otherwise.
-
- (1) Send a GF_FILE_LK request on all children for a write lock on
- the appropriate region
- (for metadata operations: entire file (0, 0)
- for writev: (offset, offset+size of buffer))
-
- - If a lock request fails on a child:
- unlock all children
- try to acquire a blocking lock (F_SETLKW) on each child, serially.
-
- If this fails (due to ENOTCONN or EINVAL):
- Consider this child as dead for rest of transaction.
-
- (2) Mark all children as "pending" on all (alive) children
- (see below for meaning of "pending").
-
- - If it fails on any child:
- mark it as dead (in transaction local state).
-
- (3) Perform operation on all (alive) children.
-
- - If it fails on any child:
- mark it as dead (in transaction local state).
-
- (4) Unmark all successful children as not "pending" on all nodes.
-
- (5) Unlock region on all (alive) children.
-
- -----------
- - dir-write
- -----------
-
- The algorithm for dir-write is same as above except instead of holding
- GF_FILE_LK locks we hold a GF_DIR_LK lock on the name being operated upon.
- In case of link-type calls, we hold locks on both the operand names.
-
------------
-* "pending"
------------
-
- The "pending" number is like a journal entry. A pending entry is an
- array of 32-bit integers stored in network byte-order as the extended
- attribute of an inode (which can be a directory as well).
-
- There are three keys corresponding to three types of pending operations:
-
- - AFR_METADATA_PENDING
- There are some metadata operations pending on this inode (perms, ctime/mtime,
- xattr, etc.).
-
- - AFR_DATA_PENDING
- There is some data pending on this inode (writev).
-
- - AFR_ENTRY_PENDING
- There are some directory operations pending on this directory
- (create, unlink, etc.).
-
------------
-* Self heal
------------
-
- - On lookup, gather extended attribute data:
- - If entry is a regular file:
- - If an entry is present on one child and not on others:
- - create entry on others.
- - If entries exist but have different metadata (perms, etc.):
- - consider the entry with the highest AFR_METADATA_PENDING number as
- definitive and replicate its attributes on children.
-
- - If entry is a directory:
- - Consider the entry with the higest AFR_ENTRY_PENDING number as
- definitive and replicate its contents on all children.
-
- - If any two entries have non-matching types (i.e., one is file and
- other is directory):
- - Announce to the user via log that a split-brain situation has been
- detected, and do nothing.
-
- - On open, gather extended attribute data:
- - Consider the file with the highest AFR_DATA_PENDING number as
- the definitive one and replicate its contents on all other
- children.
-
- During all self heal operations, appropriate locks must be held on all
- regions/entries being affected.
-
----------------
-* Inode scaling
----------------
-
-Inode scaling is necessary because if a situation arises where:
- - An inode number is returned for a directory (by lookup) which was
- previously the inode number of a file (as per FUSE's table), then
- FUSE gets horribly confused (consult a FUSE expert for more details).
-
-To avoid such a situation, we distribute the 64-bit inode space equally
-among all children of replicate.
-
-To illustrate:
-
-If c1, c2, c3 are children of replicate, they each get 1/3 of the available
-inode space:
-
-Child: c1 c2 c3 c1 c2 c3 c1 c2 c3 c1 c2 ...
-Inode number: 1 2 3 4 5 6 7 8 9 10 11 ...
-
-Thus, if lookup on c1 returns an inode number "2", it is scaled to "4"
-(which is the second inode number in c1's space).
-
-This way we ensure that there is never a collision of inode numbers from
-two different children.
-
-This reduction of inode space doesn't really reduce the usability of
-replicate since even if we assume replicate has 1024 children (which would be a
-highly unusual scenario), each child still has a 54-bit inode space.
-
-2^54 ~ 1.8 * 10^16
-
-which is much larger than any real world requirement.
-
-
-==============================================
-$ Last updated: Sun Oct 12 23:17:01 IST 2008 $
-$ Author: Vikas Gorur <vikas@gluster.com> $
-==============================================
-
diff --git a/doc/legacy/hacker-guide/write-behind.txt b/doc/legacy/hacker-guide/write-behind.txt
deleted file mode 100644
index 50b7d2a1d07..00000000000
--- a/doc/legacy/hacker-guide/write-behind.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-basic working
---------------
-
- write behind is basically a translator to lie to the application that the write-requests are finished, even before it is actually finished.
-
- on a regular translator tree without write-behind, control flow is like this:
-
- 1. application makes a write() system call.
- 2. VFS ==> FUSE ==> /dev/fuse.
- 3. fuse-bridge initiates a glusterfs writev() call.
- 4. writev() is STACK_WIND()ed upto client-protocol or storage translator.
- 5. client-protocol, on receiving reply from server, starts STACK_UNWIND() towards the fuse-bridge.
-
- on a translator tree with write-behind, control flow is like this:
-
- 1. application makes a write() system call.
- 2. VFS ==> FUSE ==> /dev/fuse.
- 3. fuse-bridge initiates a glusterfs writev() call.
- 4. writev() is STACK_WIND()ed upto write-behind translator.
- 5. write-behind adds the write buffer to its internal queue and does a STACK_UNWIND() towards the fuse-bridge.
-
- write call is completed in application's percepective. after STACK_UNWIND()ing towards the fuse-bridge, write-behind initiates a fresh writev() call to its child translator, whose replies will be consumed by write-behind itself. write-behind _doesn't_ cache the write buffer, unless 'option flush-behind on' is specified in volume specification file.
-
-windowing
----------
-
- write respect to write-behind, each write-buffer has three flags: 'stack_wound', 'write_behind' and 'got_reply'.
-
- stack_wound: if set, indicates that write-behind has initiated STACK_WIND() towards child translator.
-
- write_behind: if set, indicates that write-behind has done STACK_UNWIND() towards fuse-bridge.
-
- got_reply: if set, indicates that write-behind has received reply from child translator for a writev() STACK_WIND(). a request will be destroyed by write-behind only if this flag is set.
-
- currently pending write requests = aggregate size of requests with write_behind = 1 and got_reply = 0.
-
- window size limits the aggregate size of currently pending write requests. once the pending requests' size has reached the window size, write-behind blocks writev() calls from fuse-bridge.
- blocking is only from application's perspective. write-behind does STACK_WIND() to child translator straight-away, but hold behind the STACK_UNWIND() towards fuse-bridge. STACK_UNWIND() is done only once write-behind gets enough replies to accomodate for currently blocked request.
-
-flush behind
-------------
-
- if 'option flush-behind on' is specified in volume specification file, then write-behind sends aggregate write requests to child translator, instead of regular per request STACK_WIND()s.
-
-
diff --git a/doc/legacy/handling-options.txt b/doc/legacy/handling-options.txt
deleted file mode 100644
index 9a3b2510acb..00000000000
--- a/doc/legacy/handling-options.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-
-How to add a new option to a given volume ?
-===========================================
-
-* Add a entry in 'struct volume_options options[]' with your key, what is
- the type of the 'key', etc.
-
-* The 'key' and corresponding 'value' given for the same by user are validated
- before calling init() of the translator/transport/scheduler/auth-module.
-
-* Once the complete init() is successful, user will get a warning if he has
- given a 'key' which is not defined in these modules.
-
diff --git a/doc/legacy/mac-related-xattrs.txt b/doc/legacy/mac-related-xattrs.txt
deleted file mode 100644
index 92bb2ceef2d..00000000000
--- a/doc/legacy/mac-related-xattrs.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-
-This document is intended to briefly explain how the Extended Attributes on
-Darwin 10.5.x releases works
-----
-
-On Darwin other than all the normal filesystem operations, 'Finder' (like
-Explorer in Windows but a little more) keeps its information in two extended
-attributes named 'com.apple.FinderInfo' and 'com.apple.ResourceFork'. If these
-xattrs are not implemented the filesystem won't be shown on Finder, and if they
-are not implemented properly there may be issues when some of the file operations
-are done through GUI of Finder. But when a filesystem is used over mountpoint in a
-terminal, everything is fine and these xattrs are not required.
-
-Currently the way these xattrs are implemented is simple. All the xattr calls
-(getxattr, setxattr, listxattr, removexattr) are passed down to underlaying filesystem,
-most of the cases when exported FS is on MacOS X itself, these keys are supported, hence
-the fops succeed. But in the case of using exports of different OS on Darwin the issue is
-extended attribute prefix like 'com.apple.' may not be supported, hence the problem with
-Finder. To solve this issue, GlusterFS returns virtual default values to these keys, which
-works fine on most of the cases.
-
diff --git a/doc/legacy/porting_guide.txt b/doc/legacy/porting_guide.txt
deleted file mode 100644
index 5705cd96461..00000000000
--- a/doc/legacy/porting_guide.txt
+++ /dev/null
@@ -1,45 +0,0 @@
- GlusterFS Porting Guide
- -----------------------
-
-* General setup
-
-The configure script will detect the target platform for the build.
-All platform-specific CFLAGS, macro definitions should be done
-in configure.ac
-
-Platform-specific code can be written like this:
-
-#ifdef GF_DARWIN_HOST_OS
- /* some code specific to Darwin */
-#endif
-
-* Coding guidelines
-
-In general, avoid glibc extensions. For example, nested functions don't work
-on Mac OS X. It is best to stick to C99.
-
-When using library calls and system calls, pay attention to the
-portability notes. As far as possible stick to POSIX-specified behavior.
-Do not use anything expressly permitted by the specification. For example,
-some fields in structures may be present only on certain platforms. Avoid
-use of such things.
-
-Do not pass values of constants such as F_*, O_*, errno values, etc. across
-platforms.
-
-Please refer compat-errno.h for more details about errno handling inside
-glusterfs for cross platform.
-
-* Specific issues
-
-- The argp library is available only on Linux through glibc, but for other
- platforms glusterfs has already included argp-standalone library which will
- statically linked during the glusterfs build.
-
-- Extended attribute calls (setxattr, listxattr, etc.) have differing prototypes
- on different platforms. See compat.h for macro definitions to resolve this, also
- read out the specific extended attribute documentation for your platforms.
-
-------------------------------------------
-Last revised: Thu Feb 28 13:58:07 IST 2008
-------------------------------------------
diff --git a/doc/legacy/replicate.lyx b/doc/legacy/replicate.lyx
deleted file mode 100644
index 58ba6b2e00e..00000000000
--- a/doc/legacy/replicate.lyx
+++ /dev/null
@@ -1,797 +0,0 @@
-#LyX 1.4.2 created this file. For more info see http://www.lyx.org/
-\lyxformat 245
-\begin_document
-\begin_header
-\textclass article
-\language english
-\inputencoding auto
-\fontscheme default
-\graphics default
-\paperfontsize default
-\spacing single
-\papersize default
-\use_geometry false
-\use_amsmath 1
-\cite_engine basic
-\use_bibtopic false
-\paperorientation portrait
-\secnumdepth 3
-\tocdepth 3
-\paragraph_separation skip
-\defskip medskip
-\quotes_language english
-\papercolumns 1
-\papersides 1
-\paperpagestyle default
-\tracking_changes false
-\output_changes false
-\end_header
-
-\begin_body
-
-\begin_layout Title
-
-\size larger
-Automatic File Replication (replicate) in GlusterFS
-\end_layout
-
-\begin_layout Author
-Vikas Gorur
-\family typewriter
-\size larger
-<vikas@gluster.com>
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Standard
-
-
-\backslash
-hrule
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Section*
-Overview
-\end_layout
-
-\begin_layout Standard
-This document describes the design and usage of the replicate translator in GlusterFS.
- This document is valid for the 1.4.x releases, and not earlier ones.
-\end_layout
-
-\begin_layout Standard
-The replicate translator of GlusterFS aims to keep identical copies of a file
- on all its subvolumes, as far as possible.
- It tries to do this by performing all filesystem mutation operations (writing
- data, creating files, changing ownership, etc.) on all its subvolumes in
- such a way that if an operation succeeds on atleast one subvolume, all
- other subvolumes can later be brought up to date.
-\end_layout
-
-\begin_layout Standard
-In the rest of the document the terms
-\begin_inset Quotes eld
-\end_inset
-
-subvolume
-\begin_inset Quotes erd
-\end_inset
-
- and
-\begin_inset Quotes eld
-\end_inset
-
-server
-\begin_inset Quotes erd
-\end_inset
-
- are used interchangeably, trusting that it will cause no confusion to the
- reader.
-\end_layout
-
-\begin_layout Section*
-Usage
-\end_layout
-
-\begin_layout Standard
-A sample volume declaration for replicate looks like this:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Standard
-
-
-\backslash
-begin{verbatim}
-\end_layout
-
-\begin_layout Standard
-
-volume replicate
-\end_layout
-
-\begin_layout Standard
-
- type cluster/replicate
-\end_layout
-
-\begin_layout Standard
-
- # options, see below for description
-\end_layout
-
-\begin_layout Standard
-
- subvolumes brick1 brick2
-\end_layout
-
-\begin_layout Standard
-
-end-volume
-\end_layout
-
-\begin_layout Standard
-
-
-\backslash
-end{verbatim}
-\end_layout
-
-\begin_layout Standard
-
-\end_layout
-
-\begin_layout Standard
-
-\end_layout
-
-\begin_layout Standard
-
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-This defines an replicate volume with two subvolumes, brick1, and brick2.
- For replicate to work properly, it is essential that its subvolumes support
-\series bold
-extended attributes
-\series default
-.
- This means that you should choose a backend filesystem that supports extended
- attributes, like XFS, ReiserFS, or Ext3.
-\end_layout
-
-\begin_layout Standard
-The storage volumes used as backend for replicate
-\emph on
-must
-\emph default
- have a posix-locks volume loaded above them.
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Standard
-
-
-\backslash
-begin{verbatim}
-\end_layout
-
-\begin_layout Standard
-
-volume brick1
-\end_layout
-
-\begin_layout Standard
-
- type features/posix-locks
-\end_layout
-
-\begin_layout Standard
-
- subvolumes brick1-ds
-\end_layout
-
-\begin_layout Standard
-
-end-volume
-\end_layout
-
-\begin_layout Standard
-
-
-\backslash
-end{verbatim}
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Section*
-Design
-\end_layout
-
-\begin_layout Subsection*
-Read algorithm
-\end_layout
-
-\begin_layout Standard
-All operations that do not modify the file or directory are sent to all
- the subvolumes and the first successful reply is returned to the application.
-\end_layout
-
-\begin_layout Standard
-The read() system call (reading data from a file) is an exception.
- For read() calls, replicate tries to do load balancing by sending all reads from
- a particular file to a particular server.
-\end_layout
-
-\begin_layout Standard
-The read algorithm is also affected by the option read-subvolume; see below
- for details.
-\end_layout
-
-\begin_layout Subsection*
-Classes of file operations
-\end_layout
-
-\begin_layout Standard
-replicate divides all filesystem write operations into three classes:
-\end_layout
-
-\begin_layout Itemize
-
-\series bold
-data:
-\series default
-Operations that modify the contents of a file (write, truncate).
-\end_layout
-
-\begin_layout Itemize
-
-\series bold
-metadata:
-\series default
-Operations that modify attributes of a file or directory (permissions, ownership
-, etc.).
-\end_layout
-
-\begin_layout Itemize
-
-\series bold
-entry:
-\series default
-Operations that create or delete directory entries (mkdir, create, rename,
- rmdir, unlink, etc.).
-\end_layout
-
-\begin_layout Subsection*
-Locking and Change Log
-\end_layout
-
-\begin_layout Standard
-To ensure consistency across subvolumes, replicate holds a lock whenever a modificatio
-n is being made to a file or directory.
- By default, replicate considers the first subvolume as the sole lock server.
- However, the number of lock servers can be increased upto the total number
- of subvolumes.
-\end_layout
-
-\begin_layout Standard
-The change log is a set of extended attributes associated with files and
- directories that replicate maintains.
- The change log keeps track of the changes made to files and directories
- (data, metadata, entry) so that the self-heal algorithm knows which copy
- of a file or directory is the most recent one.
-\end_layout
-
-\begin_layout Subsection*
-Write algorithm
-\end_layout
-
-\begin_layout Standard
-The algorithm for all write operations (data, metadata, entry) is:
-\end_layout
-
-\begin_layout Enumerate
-Lock the file (or directory) on all of the lock servers (see options below).
-\end_layout
-
-\begin_layout Enumerate
-Write change log entries on all servers.
-\end_layout
-
-\begin_layout Enumerate
-Perform the operation.
-\end_layout
-
-\begin_layout Enumerate
-Erase change log entries.
-\end_layout
-
-\begin_layout Enumerate
-Unlock the file (or directory) on all of the lock servers.
-\end_layout
-
-\begin_layout Standard
-The above algorithm is a simplified version intended for general users.
- Please refer to the source code for the full details.
-\end_layout
-
-\begin_layout Subsection*
-Self-Heal
-\end_layout
-
-\begin_layout Standard
-replicate automatically tries to fix any inconsistencies it detects among different
- copies of a file.
- It uses information in the change log to determine which copy is the
-\begin_inset Quotes eld
-\end_inset
-
-correct
-\begin_inset Quotes erd
-\end_inset
-
- version.
-\end_layout
-
-\begin_layout Standard
-Self-heal is triggered when a file or directory is first
-\begin_inset Quotes eld
-\end_inset
-
-accessed
-\begin_inset Quotes erd
-\end_inset
-
-, that is, the first time any operation is attempted on it.
- The self-heal algorithm does the following things:
-\end_layout
-
-\begin_layout Standard
-If the entry being accessed is a directory:
-\end_layout
-
-\begin_layout Itemize
-The contents of the
-\begin_inset Quotes eld
-\end_inset
-
-correct
-\begin_inset Quotes erd
-\end_inset
-
- version is replicated on all subvolumes, by deleting entries and creating
- entries as necessary.
-\end_layout
-
-\begin_layout Standard
-If the entry being accessed is a file:
-\end_layout
-
-\begin_layout Itemize
-If the file does not exist on some subvolumes, it is created.
-\end_layout
-
-\begin_layout Itemize
-If there is a mismatch in the size of the file, or ownership, or permission,
- it is fixed.
-\end_layout
-
-\begin_layout Itemize
-If the change log indicates that some copies need updating, they are updated.
-\end_layout
-
-\begin_layout Subsection*
-Split-brain
-\end_layout
-
-\begin_layout Standard
-It may happen that one replicate client can access only some of the servers in
- a cluster and another replicate client can access the remaining servers.
- Or it may happen that in a cluster of two servers, one server goes down
- and comes back up, but the other goes down immediately.
- Both these scenarios result in a
-\begin_inset Quotes eld
-\end_inset
-
-split-brain
-\begin_inset Quotes erd
-\end_inset
-
-.
-\end_layout
-
-\begin_layout Standard
-In a split-brain situation, there will be two or more copies of a file,
- all of which are
-\begin_inset Quotes eld
-\end_inset
-
-correct
-\begin_inset Quotes erd
-\end_inset
-
- in some sense.
- replicate without manual intervention has no way of knowing what to do, since
- it cannot consider any single copy as definitive, nor does it know of any
- meaningful way to merge the copies.
-\end_layout
-
-\begin_layout Standard
-If replicate detects that a split-brain has happened on a file, it disallows opening
- of that file.
- You will have to manually resolve the conflict by deleting all but one
- copy of the file.
- Alternatively you can set an automatic split-brain resolution policy by
- using the `favorite-child' option (see below).
-\end_layout
-
-\begin_layout Section*
-Translator Options
-\end_layout
-
-\begin_layout Standard
-replicate accepts the following options:
-\end_layout
-
-\begin_layout Subsection*
-read-subvolume (default: none)
-\end_layout
-
-\begin_layout Standard
-The value of this option must be the name of a subvolume.
- If given, all read operations are sent to only the specified subvolume,
- instead of being balanced across all subvolumes.
-\end_layout
-
-\begin_layout Subsection*
-favorite-child (default: none)
-\end_layout
-
-\begin_layout Standard
-The value of this option must be the name of a subvolume.
- If given, the specified subvolume will be preferentially used in resolving
- conflicts (
-\begin_inset Quotes eld
-\end_inset
-
-split-brain
-\begin_inset Quotes erd
-\end_inset
-
-).
- This means if a discrepancy is noticed in the attributes or content of
- a file, the copy on the `favorite-child' will be considered the definitive
- version and its contents will
-\emph on
-overwrite
-\emph default
-the contents of all other copies.
- Use this option with caution! It is possible to
-\emph on
-lose data
-\emph default
- with this option.
- If you are in doubt, do not specify this option.
-\end_layout
-
-\begin_layout Subsection*
-Self-heal options
-\end_layout
-
-\begin_layout Standard
-Setting any of these options to
-\begin_inset Quotes eld
-\end_inset
-
-off
-\begin_inset Quotes erd
-\end_inset
-
- prevents that kind of self-heal from being done on a file or directory.
- For example, if metadata self-heal is turned off, permissions and ownership
- are no longer fixed automatically.
-\end_layout
-
-\begin_layout Subsubsection*
-data-self-heal (default: on)
-\end_layout
-
-\begin_layout Standard
-Enable/disable self-healing of file contents.
-\end_layout
-
-\begin_layout Subsubsection*
-metadata-self-heal (default: off)
-\end_layout
-
-\begin_layout Standard
-Enable/disable self-healing of metadata (permissions, ownership, modification
- times).
-\end_layout
-
-\begin_layout Subsubsection*
-entry-self-heal (default: on)
-\end_layout
-
-\begin_layout Standard
-Enable/disable self-healing of directory entries.
-\end_layout
-
-\begin_layout Subsection*
-Change Log options
-\end_layout
-
-\begin_layout Standard
-If any of these options is turned off, it disables writing of change log
- entries for that class of file operations.
- That is, steps 2 and 4 of the write algorithm (see above) are not done.
- Note that if the change log is not written, the self-heal algorithm cannot
- determine the
-\begin_inset Quotes eld
-\end_inset
-
-correct
-\begin_inset Quotes erd
-\end_inset
-
- version of a file and hence self-heal will only be able to fix
-\begin_inset Quotes eld
-\end_inset
-
-obviously
-\begin_inset Quotes erd
-\end_inset
-
- wrong things (such as a file existing on only one node).
-\end_layout
-
-\begin_layout Subsubsection*
-data-change-log (default: on)
-\end_layout
-
-\begin_layout Standard
-Enable/disable writing of change log for data operations.
-\end_layout
-
-\begin_layout Subsubsection*
-metadata-change-log (default: on)
-\end_layout
-
-\begin_layout Standard
-Enable/disable writing of change log for metadata operations.
-\end_layout
-
-\begin_layout Subsubsection*
-entry-change-log (default: on)
-\end_layout
-
-\begin_layout Standard
-Enable/disable writing of change log for entry operations.
-\end_layout
-
-\begin_layout Subsection*
-Locking options
-\end_layout
-
-\begin_layout Standard
-These options let you specify the number of lock servers to use for each
- class of file operations.
- The default values are satisfactory in most cases.
- If you are extra paranoid, you may want to increase the values.
- However, be very cautious if you set the data- or entry- lock server counts
- to zero, since this can result in
-\emph on
-lost data.
-
-\emph default
- For example, if you set the data-lock-server-count to zero, and two application
-s write to the same region of a file, there is a possibility that none of
- your servers will have all the data.
- In other words, the copies will be
-\emph on
-inconsistent
-\emph default
-, and
-\emph on
-incomplete
-\emph default
-.
- Do not set data- and entry- lock server counts to zero unless you absolutely
- know what you are doing and agree to not hold GlusterFS responsible for
- any lost data.
-\end_layout
-
-\begin_layout Subsubsection*
-data-lock-server-count (default: 1)
-\end_layout
-
-\begin_layout Standard
-Number of lock servers to use for data operations.
-\end_layout
-
-\begin_layout Subsubsection*
-metadata-lock-server-count (default: 0)
-\end_layout
-
-\begin_layout Standard
-Number of lock servers to use for metadata operations.
-\end_layout
-
-\begin_layout Subsubsection*
-entry-lock-server-count (default: 1)
-\end_layout
-
-\begin_layout Standard
-Number of lock servers to use for entry operations.
-\end_layout
-
-\begin_layout Section*
-Known Issues
-\end_layout
-
-\begin_layout Subsection*
-Self-heal of file with more than one link (hard links):
-\end_layout
-
-\begin_layout Standard
-Consider two servers, A and B.
- Assume A is down, and the user creates a file `new' as a hard link to a
- file `old'.
- When A comes back up, replicate will see that the file `new' does not exist on
- A, and self-heal will create the file and copy the contents from B.
- However, now on server A the file `new' is not a link to the file `old'
- but an entirely different file.
-\end_layout
-
-\begin_layout Standard
-We know of no easy way to fix this problem, but we will try to fix it in
- forthcoming releases.
-\end_layout
-
-\begin_layout Subsection*
-File re-opening after a server comes back up:
-\end_layout
-
-\begin_layout Standard
-If a server A goes down and comes back up, any files which were opened while
- A was down and are still open will not have their writes replicated on
- A.
- In other words, data replication only happens on those servers which were
- alive when the file was opened.
-\end_layout
-
-\begin_layout Standard
-This is a rather tricky issue but we hope to fix it very soon.
-\end_layout
-
-\begin_layout Section*
-Frequently Asked Questions
-\end_layout
-
-\begin_layout Subsection*
-1.
- How can I force self-heal to happen?
-\end_layout
-
-\begin_layout Standard
-You can force self-heal to happen on your cluster by running a script or
- a command that accesses every file.
- A simple way to do it would be:
-\end_layout
-
-\begin_layout Standard
-\begin_inset ERT
-status open
-
-\begin_layout Standard
-
-\end_layout
-
-\begin_layout Standard
-
-
-\backslash
-begin{verbatim}
-\end_layout
-
-\begin_layout Standard
-
-$ ls -lR
-\end_layout
-
-\begin_layout Standard
-
-
-\backslash
-end{verbatim}
-\end_layout
-
-\begin_layout Standard
-
-\end_layout
-
-\end_inset
-
-
-\end_layout
-
-\begin_layout Standard
-Run the command in all directories which you want to forcibly self-heal.
-\end_layout
-
-\begin_layout Subsection*
-2.
- Which backend filesystem should I use for replicate?
-\end_layout
-
-\begin_layout Standard
-You can use any backend filesystem that supports extended attributes.
- We know of users successfully using XFR, ReiserFS, and Ext3.
-\end_layout
-
-\begin_layout Subsection*
-3.
- What can I do to improve replicate performance?
-\end_layout
-
-\begin_layout Standard
-Try loading performance translators such as io-threads, write-behind, io-cache,
- and read-ahead depending on your workload.
- If you are willing to sacrifice correctness in corner cases, you can experiment
- with the lock-server-count and the change-log options (see above).
- As warned earlier, be very careful!
-\end_layout
-
-\begin_layout Subsection*
-4.
- How can I selectively replicate files?
-\end_layout
-
-\begin_layout Standard
-There is no support for selective replication in replicate itself.
- You can achieve selective replication by loading the unify translator over
- replicate, and using the switch scheduler.
- Configure unify with two subvolumes, one of them being replicate.
- Using the switch scheduler, schedule all files for which you need replication
- to the replicate subvolume.
- Consult unify and switch documentation for more details.
-\end_layout
-
-\begin_layout Section*
-Contact
-\end_layout
-
-\begin_layout Standard
-If you need more assistance on replicate, contact us on the mailing list <gluster-user
-s@gluster.org> (visit gluster.org for details on how to subscribe).
-\end_layout
-
-\begin_layout Standard
-Send you comments and suggestions about this document to <vikas@gluster.com>.
-\end_layout
-
-\end_body
-\end_document
diff --git a/doc/legacy/replicate.pdf b/doc/legacy/replicate.pdf
deleted file mode 100644
index b7212af2b4e..00000000000
--- a/doc/legacy/replicate.pdf
+++ /dev/null
Binary files differ
diff --git a/doc/legacy/solaris-related-xattrs.txt b/doc/legacy/solaris-related-xattrs.txt
deleted file mode 100644
index 3a4643948c0..00000000000
--- a/doc/legacy/solaris-related-xattrs.txt
+++ /dev/null
@@ -1,44 +0,0 @@
- Solaris Extended Attributes
-
-In solaris extended attributes are logically supported as files
-within the filesystem. The file system is therefore augmented
-with an orthogonal namespace of file attributes. Attribute values
-are accessed by file descriptors obtained through a special attribute
-interface. This type of logical view of "attributes as files" allows
-the leveraging of existing file system interface functionality to
-support the construction, deletion and manipulation of attributes.
-
-But as we have tested through this functionality provided by Solaris
-we have come accross two major issues as written below.
-
-1. Symlink XATTR_NOFOLLOW not present for creating extended attributes
- directly on the symlinks like other platforms Linux,MAC-OSX,BSD etc.
- An implementation is present for O_NOFOLLOW for "openat()" call sets
- up errno ELOOP whenever encountered with a symlink and also another
- implementation AT_SYMLINK_NOFOLLOW which is not present for calls like
- "attropen(), openat()"
-
- a snippet of test code which helped us understand this behaviour
- --------------------------------------
- attrfd = attropen (path, key,
- flags|AT_SYMLINK_NOFOLLOW|O_CREAT|O_WRONLY|O_NOFOLLOW, 0777);
- if (attrfd >= 0) {
- ftruncate (attrfd, 0);
- ret = write (attrfd, value, size);
- close (attrfd);
- } else {
- fprintf (stderr, "Couldn't set extended attribute for %s (%d)\n",
- path, errno);
- }
- --------------------------------------
-
-2. Extended attribute support for special files like device files, fifo files
- is not supported under solaris.
-
-Apart from these glitches almost everything regarding porting functionality
-for extended attribute calls has been properly implemented in compat.c
-with writing wrapper around functions over
-"attropen()", "openat()", "unlinkat()"
-
-
-
diff --git a/doc/legacy/stat-prefetch-design.txt b/doc/legacy/stat-prefetch-design.txt
deleted file mode 100644
index 68ed423d3dd..00000000000
--- a/doc/legacy/stat-prefetch-design.txt
+++ /dev/null
@@ -1,154 +0,0 @@
-what is stat-prefetch?
-======================
-It is a translator which caches the dentries read in readdir. This dentry
-list is stored in the context of fd. Later when lookup happens on
-[parent-inode, basename (path)] combination, this list is searched for the
-basename. The dentry thus searched is used to fill up the stat corresponding
-to path being looked upon, thereby short-cutting lookup calls. This cache is
-preserved till closedir is called on the fd. The purpose of this translator
-is to optimize operations like 'ls -l', where a readdir is followed by
-lookup (stat) calls on each directory entry.
-
-1. stat-prefetch harnesses the efficiency of short lookup calls
- (saves network roundtrip time for lookup calls from being accounted to
- the stat call).
-2. To maintain the correctness, it does lookup-behind - lookup is winded to
- underlying translators after it is unwound to upper translators.
- lookup-behind is necessary as inode gets populated in server inode table
- only in lookup-cbk and also because various translators store their
- contexts in inode contexts during lookup calls.
-
-fops to be implemented:
-=======================
-* lookup
- 1. check the dentry cache stored in context of fds opened by the same process
- on parent inode for basename. If found unwind with cached stat, else wind
- the lookup call to underlying translators.
- 2. stat is stored in the context of inode if the path being looked upon
- happens to be directory. This stat will be used to fill postparent stat
- when lookup happens on any of the directory contents.
-
-* readdir
- 1. cache the direntries returned in readdir_cbk in the context of fd.
- 2. if the readdir is happening on non-expected offsets (means a seekdir/rewinddir
- has happened), cache has to be flushed.
- 3. delete the entry corresponding to basename of path on which fd is opened
- from cache stored in parent.
-
-* chmod/fchmod
- delete the entry corresponding to basename from cache stored in context of
- fds opened on parent inode, since these calls change st_mode and st_ctime of
- stat.
-
-* chown/fchown
- delete the entry corresponding to basename from cache stored in context of
- fds opened on parent inode, since these calls change st_uid/st_gid and
- st_ctime of stat.
-
-* truncate/ftruncate
- delete the entry corresponding to basename from cache stored in context of
- fds opened on parent inode, since these calls change st_size/st_mtime of stat.
-
-* utimens
- delete the entry corresponding to basename from cache stored in context of
- fds opened on parent inode, since this call changes st_atime/st_mtime of stat.
-
-* readlink
- delete the entry corresponding to basename from cache stored in context of fds
- opened on parent inode, since this call changes st_atime of stat.
-
-* unlink
- 1. delete the entry corresponding to basename from cache stored in context of
- fds, opened on parent directory containing the file being unlinked.
- 2. delete the entry corresponding to basename of parent directory from cache
- of grand-parent.
-
-* rmdir
- 1. delete the entry corresponding to basename from cache stored in context of
- fds opened on parent inode.
- 2. remove the entire cache from all fds opened on inode corresponding to
- directory being removed.
- 3. delete the entry correspondig to basename of parent from cache stored in
- grand-parent.
-
-* readv
- delete the entry corresponding to basename from cache stored in context of fds
- opened on parent inode, since readv changes st_atime of file.
-
-* writev
- delete the entry corresponding to basename from cache stored in context of fds
- opened on parent inode, since writev can possibly change st_size and definitely
- changes st_mtime of file.
-
-* fsync
- there is a confusion here as to whether fsync updates mtime/ctimes. Disk based
- filesystems (atleast ext2) just writes the times stored in inode to disk
- during fsync and not the time at which fsync is being done. But in glusterfs,
- a translator like write-behind actually sends writes during fsync which will
- change mtime/ctime. Hence stat-prefetch implements fsync to delete the entry
- corresponding to basename from cache stored in context of fds opened on parent
- inode.
-
-* rename
- 1. remove entry corresponding to oldname from cache stored in fd contexts of
- oldparent.
- 2. remove entry corresponding to newname from cache stored in fd contexts of
- newparent.
- 3. remove entry corresponding to oldparent from cache stored in
- old-grand-parent, since removing oldname changes st_mtime and st_ctime
- of oldparent stat.
- 4. remove entry corresponding to newparent from cache stored in
- new-grand-parent, since adding newname changes st_mtime and st_ctime
- of newparent stat.
- 5. if oldname happens to be a directory, remove entire cache from all fds
- opened on it.
-
-* create/mknod/mkdir/symlink/link
- delete entry corresponding to basename of parent directory in which these
- operations are happening, from cache stored in context of fds opened on
- grand-parent, since adding a new entry to a directory changes st_mtime
- and st_ctime of parent directory.
-
-* setxattr/removexattr
- delete the entry corresponding to basename from cache stored in context of
- fds opened on parent inode, since setxattr changes st_ctime of file.
-
-* setdents
- 1. remove entry corresponding to basename of path on which fd is opened from
- cache stored in context of fds opened on parent.
- 2. for each of the entry in the direntry list, delete from cache stored in
- context of fd, the entry corresponding to basename of path being passed.
-
-* getdents
- 1. remove entry corresponding to basename of path on which fd is opened from
- cache stored in parent, since getdents changes st_atime.
- 2. remove entries corresponding to symbolic links from cache, since readlink
- would've changed st_atime.
-
-* checksum
- delete the entry corresponding to basename from cache stored in context of
- fds opened on parent inode, since st_atime is changed during this call.
-
-* xattrop/fxattrop
- delete the entry corresponding to basename from cache stored in context of fds
- opened on parent inode, since these calls modify st_ctime of file.
-
-callbacks to be implemented:
-============================
-* releasedir
- free the context stored in fd.
-
-* forget
- dree the stat if the inode corresponds to a directory.
-
-limitations:
-============
-* since a readdir does not return extended attributes of file, if need_xattr is
- set, short-cutting of lookup does not happen and lookup is passed to
- underlying translators.
-
-* posix_readdir does not check whether the dentries are spanning across multiple
- mount points. Hence it is not transforming inode numbers in stat buffers if
- posix is configured to allow export directory spanning on multiple mountpoints.
- This is a bug which needs to be fixed. posix_readdir should treat dentries the
- same way as if lookup is happening on dentries.
diff --git a/doc/legacy/translator-options.txt b/doc/legacy/translator-options.txt
deleted file mode 100644
index 3422c058a5d..00000000000
--- a/doc/legacy/translator-options.txt
+++ /dev/null
@@ -1,224 +0,0 @@
-mount/fuse:
- * direct-io-mode GF_OPTION_TYPE_BOOL on|off|yes|no
- * mount-point (mountpoint) GF_OPTION_TYPE_PATH <any-posix-valid-path>
- * attribute-timeout GF_OPTION_TYPE_DOUBLE 0.0
- * entry-timeout GF_OPTION_TYPE_DOUBLE 0.0
-
-protocol/server:
- * transport-type GF_OPTION_TYPE_STR tcp|socket|ib-verbs|unix|ib-sdp|
- tcp/client|ib-verbs/client
- * volume-filename.* GF_OPTION_TYPE_PATH
- * inode-lru-limit GF_OPTION_TYPE_INT 0-(1 * GF_UNIT_MB)
- * client-volume-filename GF_OPTION_TYPE_PATH
-
-protocol/client:
- * username GF_OPTION_TYPE_ANY
- * password GF_OPTION_TYPE_ANY
- * transport-type GF_OPTION_TYPE_STR tcp|socket|ib-verbs|unix|ib-sdp|
- tcp/client|ib-verbs/client
- * remote-host GF_OPTION_TYPE_ANY
- * remote-subvolume GF_OPTION_TYPE_ANY
- * transport-timeout GF_OPTION_TYPE_TIME 5-1013
-
-cluster/replicate:
- * read-subvolume GF_OPTION_TYPE_XLATOR
- * favorite-child GF_OPTION_TYPE_XLATOR
- * data-self-heal GF_OPTION_TYPE_BOOL
- * metadata-self-heal GF_OPTION_TYPE_BOOL
- * entry-self-heal GF_OPTION_TYPE_BOOL
- * data-change-log GF_OPTION_TYPE_BOOL
- * metadata-change-log GF_OPTION_TYPE_BOOL
- * entry-change-log GF_OPTION_TYPE_BOOL
- * data-lock-server-count GF_OPTION_TYPE_INT 0
- * metadata-lock-server-count GF_OPTION_TYPE_INT 0
- * entry-lock-server-count GF_OPTION_TYPE_INT 0
-
-cluster/distribute:
- * lookup-unhashed GF_OPTION_TYPE_BOOL
-
-cluster/unify:
- * namespace GF_OPTION_TYPE_XLATOR
- * scheduler GF_OPTION_TYPE_STR alu|rr|random|nufa|switch
- * self-heal GF_OPTION_TYPE_STR foreground|background|off
- * optimist GF_OPTION_TYPE_BOOL
-
-cluster/nufa:
- local-volume-name GF_OPTION_TYPE_XLATOR
-
-cluster/stripe:
- * block-size GF_OPTION_TYPE_ANY
- * use-xattr GF_OPTION_TYPE_BOOL
-
-debug/trace:
- * include-ops (include) GF_OPTION_TYPE_STR
- * exclude-ops (exclude) GF_OPTION_TYPE_STR
-
-encryption/rot-13:
- * encrypt-write GF_OPTION_TYPE_BOOL
- * decrypt-read GF_OPTION_TYPE_BOOL
-
-features/path-convertor:
- * start-offset GF_OPTION_TYPE_INT 0-4095
- * end-offset GF_OPTION_TYPE_INT 1-4096
- * replace-with GF_OPTION_TYPE_ANY
-
-features/trash:
- * trash-dir GF_OPTION_TYPE_PATH
-
-features/locks:
- * mandatory-locks (mandatory) GF_OPTION_TYPE_BOOL
-
-features/filter:
- * root-squashing GF_OPTION_TYPE_BOOL
- * read-only GF_OPTION_TYPE_BOOL
- * fixed-uid GF_OPTION_TYPE_INT
- * fixed-gid GF_OPTION_TYPE_INT
- * translate-uid GF_OPTION_TYPE_ANY
- * translate-gid GF_OPTION_TYPE_ANY
- * filter-uid GF_OPTION_TYPE_ANY
- * filter-gid GF_OPTION_TYPE_ANY
-
-features/quota:
- * min-free-disk-limit GF_OPTION_TYPE_PERCENT
- * refresh-interval GF_OPTION_TYPE_TIME
- * disk-usage-limit GF_OPTION_TYPE_SIZET
-
-storage/posix:
- * o-direct GF_OPTION_TYPE_BOOL
- * directory GF_OPTION_TYPE_PATH
- * export-statfs-size GF_OPTION_TYPE_BOOL
- * mandate-attribute GF_OPTION_TYPE_BOOL
-
-storage/bdb:
- * directory GF_OPTION_TYPE_PATH
- * logdir GF_OPTION_TYPE_PATH
- * errfile GF_OPTION_TYPE_PATH
- * dir-mode GF_OPTION_TYPE_ANY
- * file-mode GF_OPTION_TYPE_ANY
- * page-size GF_OPTION_TYPE_SIZET
- * lru-limit GF_OPTION_TYPE_INT
- * lock-timeout GF_OPTION_TYPE_TIME
- * checkpoint-timeout GF_OPTION_TYPE_TIME
- * transaction-timeout GF_OPTION_TYPE_TIME
- * mode GF_OPTION_TYPE_BOOL
- * access-mode GF_OPTION_TYPE_STR
-
-performance/read-ahead:
- * force-atime-update GF_OPTION_TYPE_BOOL
- * page-size GF_OPTION_TYPE_SIZET (64 * GF_UNIT_KB)-(2 * GF_UNIT_MB)
- * page-count GF_OPTION_TYPE_INT 1-16
-
-performance/write-behind:
- * flush-behind GF_OPTION_TYPE_BOOL
- * aggregate-size GF_OPTION_TYPE_SIZET (128 * GF_UNIT_KB)-(4 * GF_UNIT_MB)
- * window-size GF_OPTION_TYPE_SIZET (512 * GF_UNIT_KB)-(1 * GF_UNIT_GB)
- * enable-O_SYNC GF_OPTION_TYPE_BOOL
- * disable-for-first-nbytes GF_OPTION_TYPE_SIZET 1 - (1 * GF_UNIT_MB)
-
-performance/symlink-cache:
-
-performance/io-threads:
- * thread-count GF_OPTION_TYPE_INT 1-32
-
-performance/io-cache:
- * priority GF_OPTION_TYPE_ANY
- * cache-timeout (force-revalidate-timeout) GF_OPTION_TYPE_INT 0-60
- * page-size GF_OPTION_TYPE_SIZET (16 * GF_UNIT_KB)-(4 * GF_UNIT_MB)
- * cache-size GF_OPTION_TYPE_SIZET (4 * GF_UNIT_MB)-(6 * GF_UNIT_GB)
-
-performance/quick-read:
- * cache-timeout GF_OPTION_TYPE_INT 1-60
- * max-file-size GF_OPTION_TYPE_SIZET 0-(1000 * GF_UNIT_KB)
-
-auth:
-- addr:
- * auth.addr.*.allow GF_OPTION_TYPE_ANY
- * auth.addr.*.reject GF_OPTION_TYPE_ANY
-
-- login:
- * auth.login.*.allow GF_OPTION_TYPE_ANY
- * auth.login.*.password GF_OPTION_TYPE_ANY
-
-scheduler/alu:
- * scheduler.alu.order (alu.order)
- GF_OPTION_TYPE_ANY
- * scheduler.alu.disk-usage.entry-threshold (alu.disk-usage.entry-threshold)
- GF_OPTION_TYPE_SIZET
- * scheduler.alu.disk-usage.exit-threshold (alu.disk-usage.exit-threshold)
- GF_OPTION_TYPE_SIZET
- * scheduler.alu.write-usage.entry-threshold (alu.write-usage.entry-threshold)
- GF_OPTION_TYPE_SIZET
- * scheduler.alu.write-usage.exit-threshold (alu.write-usage.exit-threshold)
- GF_OPTION_TYPE_SIZET
- * scheduler.alu.read-usage.entry-threshold (alu.read-usage.entry-threshold)
- GF_OPTION_TYPE_SIZET
- * scheduler.alu.read-usage.exit-threshold (alu.read-usage.exit-threshold)
- GF_OPTION_TYPE_SIZET
- * scheduler.alu.open-files-usage.entry-threshold (alu.open-files-usage.entry-threshold)
- GF_OPTION_TYPE_INT
- * scheduler.alu.open-files-usage.exit-threshold (alu.open-files-usage.exit-threshold)
- GF_OPTION_TYPE_INT
- * scheduler.read-only-subvolumes (alu.read-only-subvolumes)
- GF_OPTION_TYPE_ANY
- * scheduler.refresh-interval (alu.refresh-interval)
- GF_OPTION_TYPE_TIME
- * scheduler.limits.min-free-disk (alu.limits.min-free-disk)
- GF_OPTION_TYPE_PERCENT
- * scheduler.alu.stat-refresh.num-file-create (alu.stat-refresh.num-file-create)
- GF_OPTION_TYPE_INT
-
-scheduler/nufa:
- * scheduler.refresh-interval (nufa.refresh-interval)
- GF_OPTION_TYPE_TIME
- * scheduler.limits.min-free-disk (nufa.limits.min-free-disk)
- GF_OPTION_TYPE_PERCENT
- * scheduler.local-volume-name (nufa.local-volume-name)
- GF_OPTION_TYPE_XLATOR
-
-scheduler/random:
- * scheduler.refresh-interval (random.refresh-interval) GF_OPTION_TYPE_TIME
- * scheduler.limits.min-free-disk (random.limits.min-free-disk) GF_OPTION_TYPE_PERCENT
-
-scheduler/rr:
- * scheduler.refresh-interval (rr.refresh-interval) GF_OPTION_TYPE_TIME
- * scheduler.limits.min-free-disk (rr.limits.min-free-disk) GF_OPTION_TYPE_PERCENT
- * scheduler.read-only-subvolumes (rr.read-only-subvolumes) GF_OPTION_TYPE_ANY
-
-scheduler/switch:
- * scheduler.read-only-subvolumes (switch.read-only-subvolumes) GF_OPTION_TYPE_ANY
- * scheduler.local-volume-name (switch.nufa.local-volume-name) GF_OPTION_TYPE_XLATOR
- * scheduler.switch.case (switch.case) GF_OPTION_TYPE_ANY
-
-transport/ib-verbs:
- * transport.ib-verbs.port (ib-verbs-port) GF_OPTION_TYPE_INT 1-4
- check the option by 'ibv_devinfo'
- * transport.ib-verbs.mtu (ib-verbs-mtu) GF_OPTION_TYPE_INT
- * transport.ib-verbs.device-name (ib-verbs-device-name) GF_OPTION_TYPE_ANY,
- check by 'ibv_devinfo'
- * transport.ib-verbs.work-request-send-size (ib-verbs-work-request-send-size)
- GF_OPTION_TYPE_INT,
- * transport.ib-verbs.work-request-recv-size (ib-verbs-work-request-recv-size)
- GF_OPTION_TYPE_INT
- * transport.ib-verbs.work-request-send-count (ib-verbs-work-request-send-count)
- GF_OPTION_TYPE_INT
- * transport.ib-verbs.work-request-recv-count (ib-verbs-work-request-recv-count)
- GF_OPTION_TYPE_INT
- * remote-port (transport.remote-port,transport.ib-verbs.remote-port)
- GF_OPTION_TYPE_INT
- * transport.ib-verbs.listen-port GF_OPTION_TYPE_INT
- * transport.ib-verbs.connect-path (connect-path) GF_OPTION_TYPE_ANY
- * transport.ib-verbs.bind-path (bind-path) GF_OPTION_TYPE_ANY
- * transport.ib-verbs.listen-path (listen-path) GF_OPTION_TYPE_ANY
- * transport.address-family (address-family) GF_OPTION_TYPE_STR inet|inet6|inet/inet6|
- inet6/inet|unix|inet-sdp
-
-transport/socket:
- * transport.remote-port (remote-port,transport.socket.remote-port) GF_OPTION_TYPE_INT
- * transport.socket.listen-port (listen-port) GF_OPTION_TYPE_INT
- * transport.socket.bind-address (bind-address) GF_OPTION_TYPE_ANY
- * transport.socket.connect-path (connect-path) GF_OPTION_TYPE_ANY
- * transport.socket.bind-path (bind-path) GF_OPTION_TYPE_ANY
- * transport.socket.listen-path (listen-path) GF_OPTION_TYPE_ANY
- * transport.address-family (address-family) GF_OPTION_TYPE_STR inet|inet6|
- inet/inet6|inet6/inet|
- unix|inet-sdp
diff --git a/doc/mount.glusterfs.8 b/doc/mount.glusterfs.8
index 10e1d59f055..ce16e9e40b7 100644
--- a/doc/mount.glusterfs.8
+++ b/doc/mount.glusterfs.8
@@ -1,37 +1,31 @@
-.\" Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+.\" Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
.\" This file is part of GlusterFS.
.\"
-.\" GlusterFS is free software; you can redistribute it and/or modify
-.\" it under the terms of the GNU General Public License as published
-.\" by the Free Software Foundation; either version 3 of the License,
-.\" or (at your option) any later version.
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
.\"
-.\" GlusterFS is distributed in the hope that it will be useful, but
-.\" WITHOUT ANY WARRANTY; without even the implied warranty of
-.\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-.\" General Public License for more details.
.\"
-.\" You should have received a copy of the GNU General Public License
-.\" long with this program. If not, see
-.\" <http://www.gnu.org/licenses/>.
.\"
-.\"
-.\"
-.TH GlusterFS 8 "Cluster Filesystem" "18 March 2010" "Gluster Inc."
+.TH GlusterFS 8 "Cluster Filesystem" "14 September 2013" "Red Hat, Inc."
.SH NAME
-mount.glusterfs - script to mount native GlusterFS volume
+.B mount.glusterfs - script to mount native GlusterFS volume
.SH SYNOPSIS
-.B mount -t glusterfs
-.I [-o <options>] <volumeserver>:<volumeid> <mountpoint>
+.B mount -t glusterfs [-o <options>] <volumeserver>:/<volume>[/<subdir>]
+.B <mountpoint>
+.TP
+.B mount -t glusterfs [-o <options>] <server1>,<server2>,
+.B <server3>,..<serverN>:/<volname>[/<subdir>] <mount_point>
.TP
-.B mount -t glusterfs
-.I [-o <options>] <path/to/volumefile> <mountpoint>
+.TP
+.B mount -t glusterfs [-o <options>] <path/to/volumefile> <mountpoint>
.PP
.SH DESCRIPTION
This tool is part of \fBglusterfs\fR(8) package, which is used to mount using
GlusterFS native binary.
-\fBmount.glusterfs\fR is meant to be used by the mount(8) command for mounting
+\fBmount.glusterfs\fR is meant to be used by the mount(8) command for mounting
native GlusterFS client. This subcommand, however, can also be used as a
standalone command with limited functionality.
@@ -47,41 +41,137 @@ File to use for logging [default:/var/log/glusterfs/glusterfs.log]
Logging severity. Valid options are TRACE, DEBUG, WARNING, ERROR, CRITICAL
INFO and NONE [default: INFO]
.TP
+\fBacl
+Mount the filesystem with POSIX ACL support
+.TP
+\fBfopen\-keep\-cache[=BOOL]
+Do not purge the cache on file open (default: false)
+.TP
+\fBworm
+Mount the filesystem in 'worm' mode
+.TP
+\fBaux\-gfid\-mount
+Enable access to filesystem through gfid directly
+.TP
\fBro\fR
Mount the filesystem read-only
+.TP
+\fBenable\-ino32=\fRBOOL
+Use 32-bit inodes when mounting to workaround broken applications that don't
+support 64-bit inodes
+.TP
+\fBmem\-accounting
+Enable internal memory accounting
+.TP
+\fBcapability
+Enable file capability setting and retrival
+.TP
+\fBthin-client
+Enables thin mount and connects via gfproxyd daemon
+
.PP
.SS "Advanced options"
.PP
.TP
-\fBvolfile\-id=\fRKEY
-Volume key or name of the volume file to be fetched from server
+\fBattribute\-timeout=\fRSECONDS
+Set attribute timeout to SECONDS for inodes in fuse kernel module [default: 1]
+.TP
+\fBentry\-timeout=\fRSECONDS
+Set entry timeout to SECONDS in fuse kernel module [default: 1]
+.TP
+\fBbackground\-qlen=\fRN
+Set fuse module's background queue length to N [default: 64]
.TP
-\fBtransport=\fRTRANSPORT-TYPE
-Transport type to get volume file from server [default: socket]
+\fBgid\-timeout=\fRSECONDS
+Set auxiliary group list timeout to SECONDS for fuse translator [default: 0]
+.TP
+\fBnegative\-timeout=\fRSECONDS
+Set negative timeout to SECONDS in fuse kernel module [default: 0]
.TP
\fBvolume\-name=\fRVOLUME-NAME
Volume name to be used for MOUNT-POINT [default: top most volume in
VOLUME-FILE]
.TP
-\fBdirect\-io\-mode=\fRdisable
-Disable direct I/O mode in fuse kernel module
+\fBdirect\-io\-mode=\fRBOOL|auto
+Specify fuse direct I/O strategy [default: auto]
+.TP
+\fBcongestion\-threshold=\fRN
+Set fuse module's congestion threshold to N [default: 48]
+.TP
+\fsubdir\-mount=\fRN
+Set the subdirectory mount option [default: NULL, ie, no subdirectory mount]
+.TP
+.TP
+\fBbackup\-volfile\-servers=\fRSERVERLIST
+Provide list of backup volfile servers in the following format [default: None]
+
+\fB$ mount \-t glusterfs \-obackup\-volfile\-servers=<server2>:\fR
+\fB <server3>:...:<serverN> <server1>:/<volname> <mount_point>\fR
+
+.TP
+.TP
+\fBbackupvolfile\-server=\fRSERVER
+Provide list of backup volfile servers in the following format [default: None]
+
+\fB $ mount \-t glusterfs \-obackupvolfile\-server=<server2>
+\fB <server1>:/<volname> <mount_point>
+
+.TP
+.TP
+\fBfetch-attempts=\fRN
+\fBDeprecated\fR option - placed here for backward compatibility [default: 1]
+.TP
+.TP
+\fBlru-limit=\fRN
+Set fuse module's limit for number of inodes kept in LRU list to N [default: 65536]
+.TP
+.TP
+\fBinvalidate-limit=\fRN
+Suspend fuse invalidations implied by 'lru-limit' if number of outstanding
+invalidations reaches N
+.TP
+.TP
+\fBbackground-qlen=\fRN
+Set fuse module's background queue length to N [default: 64]
+.TP
+\fBno\-root\-squash=\fRBOOL
+disable root squashing for the trusted client [default: off]
+.TP
+\fBroot\-squash=\fRBOOL
+enable root squashing for the trusted client [default: on]
+.TP
+\fBuse\-readdirp=\fRBOOL
+Use readdirp() mode in fuse kernel module [default: on]
+.TP
+\fBdump\-fuse=\fRPATH
+Dump fuse traffic to PATH
+.TP
+\fBkernel\-writeback\-cache=\fRBOOL
+Enable fuse in-kernel writeback cache [default: off]
+.TP
+\fBattr\-times\-granularity=\fRNS
+Declare supported granularity of file attribute [default: 0]
.TP
+\fBauto\-invalidation=\fRBOOL
+controls whether fuse-kernel can auto-invalidate attribute, dentry and
+page-cache. Disable this only if same files/directories are not
+accessed across two different mounts concurrently [default: on]
.PP
.SH FILES
.TP
.I /etc/fstab
A typical GlusterFS entry in /etc/fstab looks like below
-server1.gluster.com:mirror /mnt/mirror glusterfs log-file=/var/log/mirror.vol,ro,defaults 0 0
+\fBserver1:/mirror /mnt/mirror glusterfs log-file=/var/log/mirror.log,acl 0 0\fR
.TP
-.I /etc/mtab
-An example entry of a GlusterFS mountpoint in /etc/mtab looks like below
+.I /proc/mounts
+An example entry of a GlusterFS mountpoint in /proc/mounts looks like below
-mirror.vol /mnt/glusterfs fuse.glusterfs rw,allow_other,default_permissions,max_read=131072 0 0
+\fBserver1:/mirror /mnt/glusterfs fuse.glusterfs rw,allow_other,default_permissions,max_read=131072 0 0\fR
.SH SEE ALSO
\fBglusterfs\fR(8), \fBmount\fR(8), \fBgluster\fR(8)
.SH COPYRIGHT
-Copyright(c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+Copyright(c) 2006-2013 Red Hat, Inc. <http://www.redhat.com>
diff --git a/doc/qa/legacy/qa-client.vol b/doc/qa/legacy/qa-client.vol
deleted file mode 100644
index bcf242347d3..00000000000
--- a/doc/qa/legacy/qa-client.vol
+++ /dev/null
@@ -1,170 +0,0 @@
-# This spec file should be used for testing before any release
-#
-
-# 1st client
-volume client1
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
-# option transport.ib-verbs.work-request-send-size 131072
-# option transport.ib-verbs.work-request-send-count 64
-# option transport.ib-verbs.work-request-recv-size 131072
-# option transport.ib-verbs.work-request-recv-count 64
- option remote-host 127.0.0.1
- option remote-subvolume ra1
-end-volume
-
-# 2nd client
-volume client2
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra2
-end-volume
-
-# 3rd client
-volume client3
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra3
-end-volume
-
-# 4th client
-volume client4
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra4
-end-volume
-
-# 5th client
-volume client5
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra5
-end-volume
-
-# 6th client
-volume client6
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra6
-end-volume
-
-# 7th client
-volume client7
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra7
-end-volume
-
-# 8th client
-volume client8
- type protocol/client
- option transport-type tcp # for TCP/IP transport
-# option transport-type ib-sdp # for Infiniband transport
-# option transport-type ib-verbs # for ib-verbs transport
- option remote-host 127.0.0.1
- option remote-subvolume ra8
-end-volume
-
-# 1st Stripe (client1 client2)
-volume stripe1
- type cluster/stripe
- subvolumes client1 client2
- option block-size 128KB # all striped in 128kB block
-end-volume
-
-# 2st Stripe (client3 client4)
-volume stripe2
- type cluster/stripe
- subvolumes client3 client4
- option block-size 128KB # all striped in 128kB block
-end-volume
-
-# 3st Stripe (client5 client6)
-volume stripe3
- type cluster/stripe
- subvolumes client5 client6
- option block-size 128KB # all striped in 128kB block
-end-volume
-
-# 4st Stripe (client7 client8)
-volume stripe4
- type cluster/stripe
- subvolumes client7 client8
- option block-size 128KB # all striped in 128kB block
-end-volume
-
-
-# 1st replicate
-volume replicate1
- type cluster/replicate
- subvolumes stripe1 stripe2
-end-volume
-
-# 2nd replicate
-volume replicate2
- type cluster/replicate
- subvolumes stripe3 stripe4
-end-volume
-
-volume ns
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option remote-subvolume brick-ns
-end-volume
-
-# Unify
-volume unify0
- type cluster/unify
- subvolumes replicate1 replicate2
-# subvolumes stripe1 stripe3
- option namespace ns
- option scheduler rr # random # alu # nufa
- option rr.limits.min-free-disk 1GB
-# option alu.order x
-# option alu.x.entry-threshold
-# option alu.x.exit-threshold
-end-volume
-
-
-# ==== Performance Translators ====
-# The default options for performance translators should be the best for 90+% of the cases
-volume iot
- type performance/io-threads
- subvolumes unify0
-end-volume
-
-volume wb
- type performance/write-behind
- subvolumes iot
-end-volume
-
-volume ioc
- type performance/io-cache
- subvolumes wb
-end-volume
-
-volume ra
- type performance/read-ahead
- subvolumes ioc
-end-volume
diff --git a/doc/qa/legacy/qa-high-avail-client.vol b/doc/qa/legacy/qa-high-avail-client.vol
deleted file mode 100644
index 69cb8dd30f1..00000000000
--- a/doc/qa/legacy/qa-high-avail-client.vol
+++ /dev/null
@@ -1,17 +0,0 @@
-volume client
- type protocol/client
- option transport-type tcp
- option remote-host localhost
- option transport.socket.remote-port 7001
- option remote-subvolume server1-iot
-end-volume
-
-volume ra
- type performance/read-ahead
- subvolumes client
-end-volume
-
-volume wb
- type performance/write-behind
- subvolumes ra
-end-volume
diff --git a/doc/qa/legacy/qa-high-avail-server.vol b/doc/qa/legacy/qa-high-avail-server.vol
deleted file mode 100644
index 784e8d208e4..00000000000
--- a/doc/qa/legacy/qa-high-avail-server.vol
+++ /dev/null
@@ -1,344 +0,0 @@
-
-# -- server 1 --
-volume server1-posix1
- type storage/posix
- option directory /tmp/ha-export1/
-end-volume
-
-volume server1-ns1
- type storage/posix
- option directory /tmp/ha-export-ns1/
-end-volume
-
-volume server1-client2
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7002
- option remote-subvolume server2-posix2
-end-volume
-
-volume server1-ns2
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7002
- option remote-subvolume server2-ns2
-end-volume
-
-volume server1-client3
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7003
- option remote-subvolume server3-posix3
-end-volume
-
-volume server1-ns3
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7003
- option remote-subvolume server3-ns3
-end-volume
-
-volume server1-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server1-posix1
-end-volume
-
-
-volume server1-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server1-client2
-end-volume
-
-volume server1-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server1-client3
-end-volume
-
-volume server1-ns-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server1-ns1
-end-volume
-
-volume server1-ns-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server1-ns2
-end-volume
-
-volume server1-ns-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server1-ns3
-end-volume
-
-volume server1-ns-replicate
- type cluster/replicate
- subvolumes server1-ns-io1 server1-ns-io2 server1-ns-io3
-end-volume
-
-volume server1-storage-replicate
- type cluster/replicate
- subvolumes server1-io1 server1-io2 server1-io3
-end-volume
-
-volume server1-unify
- type cluster/unify
- #option self-heal off
- subvolumes server1-storage-replicate
- option namespace server1-ns-replicate
- option scheduler rr
-end-volume
-
-volume server1-iot
- type performance/io-threads
- option thread-count 8
- subvolumes server1-unify
-end-volume
-
-volume server1
- type protocol/server
- option transport-type tcp
- subvolumes server1-iot
- option transport.socket.listen-port 7001
- option auth.addr.server1-posix1.allow *
- option auth.addr.server1-ns1.allow *
- option auth.addr.server1-iot.allow *
-end-volume
-
-
-# == Server2 ==
-volume server2-client1
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7001
- option remote-subvolume server1-posix1
-end-volume
-
-volume server2-ns1
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7001
- option remote-subvolume server1-ns1
-end-volume
-
-volume server2-posix2
- type storage/posix
- option directory /tmp/ha-export2/
-end-volume
-
-volume server2-ns2
- type storage/posix
- option directory /tmp/ha-export-ns2/
-end-volume
-
-volume server2-client3
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7003
- option remote-subvolume server3-posix3
-end-volume
-
-volume server2-ns3
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7003
- option remote-subvolume server3-ns3
-end-volume
-
-volume server2-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server2-client1
-end-volume
-
-
-volume server2-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server2-posix2
-end-volume
-
-volume server2-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server2-client3
-end-volume
-
-volume server2-ns-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server2-ns1
-end-volume
-
-volume server2-ns-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server2-ns2
-end-volume
-
-volume server2-ns-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server2-ns3
-end-volume
-
-volume server2-ns-replicate
- type cluster/replicate
- subvolumes server2-ns-io1 server2-ns-io2 server2-ns-io3
-end-volume
-
-volume server2-storage-replicate
- type cluster/replicate
- subvolumes server2-io2 server2-io3 server2-io1
-end-volume
-
-volume server2-unify
- type cluster/unify
- option self-heal off
- subvolumes server2-storage-replicate
- option namespace server2-ns-replicate
- option scheduler rr
-end-volume
-
-volume server2-iot
- type performance/io-threads
- option thread-count 8
- subvolumes server2-unify
-end-volume
-
-volume server2
- type protocol/server
- option transport-type tcp
- subvolumes server2-iot
- option transport.socket.listen-port 7002
- option auth.addr.server2-posix2.allow *
- option auth.addr.server2-ns2.allow *
- option auth.addr.server2-iot.allow *
-end-volume
-
-# == server 3 ==
-volume server3-client1
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7001
- option remote-subvolume server1-posix1
-end-volume
-
-volume server3-ns1
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7001
- option remote-subvolume server1-ns1
-end-volume
-
-volume server3-client2
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7002
- option remote-subvolume server2-posix2
-end-volume
-
-volume server3-ns2
- type protocol/client
- option transport-type tcp
- option remote-host 127.0.0.1
- option transport.socket.remote-port 7002
- option remote-subvolume server2-ns2
-end-volume
-
-volume server3-posix3
- type storage/posix
- option directory /tmp/ha-export3/
-end-volume
-
-volume server3-ns3
- type storage/posix
- option directory /tmp/ha-export-ns3/
-end-volume
-
-volume server3-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server3-client1
-end-volume
-
-
-volume server3-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server3-client2
-end-volume
-
-volume server3-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server3-posix3
-end-volume
-
-volume server3-ns-io1
- type performance/io-threads
- option thread-count 8
- subvolumes server3-ns1
-end-volume
-
-volume server3-ns-io2
- type performance/io-threads
- option thread-count 8
- subvolumes server3-ns2
-end-volume
-
-volume server3-ns-io3
- type performance/io-threads
- option thread-count 8
- subvolumes server3-ns3
-end-volume
-
-volume server3-ns-replicate
- type cluster/replicate
- subvolumes server3-ns-io1 server3-ns-io2 server3-ns-io3
-end-volume
-
-volume server3-storage-replicate
- type cluster/replicate
- subvolumes server3-io3 server3-io2 server3-io1
-end-volume
-
-volume server3-unify
- type cluster/unify
- option self-heal off
- subvolumes server3-storage-replicate
- option namespace server3-ns-replicate
- option scheduler rr
-end-volume
-
-volume server3-iot
- type performance/io-threads
- option thread-count 8
- subvolumes server3-unify
-end-volume
-
-volume server3
- type protocol/server
- option transport-type tcp
- subvolumes server3-iot
- option transport.socket.listen-port 7003
- option auth.addr.server3-posix3.allow *
- option auth.addr.server3-ns3.allow *
- option auth.addr.server3-iot.allow *
-end-volume
-
diff --git a/doc/qa/legacy/qa-server.vol b/doc/qa/legacy/qa-server.vol
deleted file mode 100644
index d948f701f1b..00000000000
--- a/doc/qa/legacy/qa-server.vol
+++ /dev/null
@@ -1,284 +0,0 @@
-# This spec file should be used for testing before any release
-#
-
-# Namespace posix
-volume brick-ns
- type storage/posix # POSIX FS translator
- option directory /tmp/export-ns # Export this directory
-end-volume
-
-# 1st server
-
-volume brick1
- type storage/posix # POSIX FS translator
- option directory /tmp/export1 # Export this directory
-end-volume
-
-# == Posix-Locks ==
- volume plocks1
- type features/posix-locks
-# option mandatory on
- subvolumes brick1
- end-volume
-
-volume iot1
- type performance/io-threads
- subvolumes plocks1 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb1
- type performance/write-behind
- subvolumes iot1
-# option <key> <value>
-end-volume
-
-volume ra1
- type performance/read-ahead
- subvolumes wb1
-# option <key> <value>
-end-volume
-
-volume brick2
- type storage/posix # POSIX FS translator
- option directory /tmp/export2 # Export this directory
-end-volume
-
-# == TrashCan Translator ==
-# volume trash2
-# type features/trash
-# option trash-dir /.trashcan
-# subvolumes brick2
-# end-volume
-
-# == Posix-Locks ==
-volume plocks2
- type features/posix-locks
-# option <something> <something>
- subvolumes brick2
-end-volume
-
-volume iot2
- type performance/io-threads
- subvolumes plocks2 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb2
- type performance/write-behind
- subvolumes iot2
-# option <key> <value>
-end-volume
-
-volume ra2
- type performance/read-ahead
- subvolumes wb2
-# option <key> <value>
-end-volume
-
-volume brick3
- type storage/posix # POSIX FS translator
- option directory /tmp/export3 # Export this directory
-end-volume
-
-# == TrashCan Translator ==
-# volume trash3
-# type features/trash
-# option trash-dir /.trashcan
-# subvolumes brick3
-# end-volume
-
-# == Posix-Locks ==
-volume plocks3
- type features/posix-locks
-# option <something> <something>
- subvolumes brick3
-end-volume
-
-volume iot3
- type performance/io-threads
- subvolumes plocks3 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb3
- type performance/write-behind
- subvolumes iot3
-# option <key> <value>
-end-volume
-
-volume ra3
- type performance/read-ahead
- subvolumes wb3
-# option <key> <value>
-end-volume
-
-volume brick4
- type storage/posix # POSIX FS translator
- option directory /tmp/export4 # Export this directory
-end-volume
-
-# == Posix-Locks ==
-volume plocks4
- type features/posix-locks
-# option <something> <something>
- subvolumes brick4
-end-volume
-
-volume iot4
- type performance/io-threads
- subvolumes plocks4 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb4
- type performance/write-behind
- subvolumes iot4
-# option <key> <value>
-end-volume
-
-volume ra4
- type performance/read-ahead
- subvolumes wb4
-# option <key> <value>
-end-volume
-
-volume brick5
- type storage/posix # POSIX FS translator
- option directory /tmp/export5 # Export this directory
-end-volume
-
-
-# == Posix-Locks ==
-volume plocks5
- type features/posix-locks
-# option <something> <something>
- subvolumes brick5
-end-volume
-
-volume iot5
- type performance/io-threads
- subvolumes plocks5 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb5
- type performance/write-behind
- subvolumes iot5
-# option <key> <value>
-end-volume
-
-volume ra5
- type performance/read-ahead
- subvolumes wb5
-# option <key> <value>
-end-volume
-
-volume brick6
- type storage/posix # POSIX FS translator
- option directory /tmp/export6 # Export this directory
-end-volume
-
-# == Posix-Locks ==
-volume plocks6
- type features/posix-locks
-# option <something> <something>
- subvolumes brick6
-end-volume
-
-volume iot6
- type performance/io-threads
- subvolumes plocks6 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb6
- type performance/write-behind
- subvolumes iot6
-# option <key> <value>
-end-volume
-
-volume ra6
- type performance/read-ahead
- subvolumes wb6
-# option <key> <value>
-end-volume
-
-volume brick7
- type storage/posix # POSIX FS translator
- option directory /tmp/export7 # Export this directory
-end-volume
-
-# == Posix-Locks ==
-volume plocks7
- type features/posix-locks
-# option <something> <something>
- subvolumes brick7
-end-volume
-
-volume iot7
- type performance/io-threads
- subvolumes plocks7 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb7
- type performance/write-behind
- subvolumes iot7
-# option <key> <value>
-end-volume
-
-volume ra7
- type performance/read-ahead
- subvolumes wb7
-# option <key> <value>
-end-volume
-
-volume brick8
- type storage/posix # POSIX FS translator
- option directory /tmp/export8 # Export this directory
-end-volume
-
-# == Posix-Locks ==
-volume plocks8
- type features/posix-locks
-# option <something> <something>
- subvolumes brick8
-end-volume
-
-volume iot8
- type performance/io-threads
- subvolumes plocks8 # change properly if above commented volumes needs to be included
-# option <key> <value>
-end-volume
-
-volume wb8
- type performance/write-behind
- subvolumes iot8
-# option <key> <value>
-end-volume
-
-volume ra8
- type performance/read-ahead
- subvolumes wb8
-# option <key> <value>
-end-volume
-
-volume server8
- type protocol/server
- subvolumes ra8 ra1 ra2 ra3 ra4 ra5 ra6 ra7 brick-ns
- option transport-type tcp # For TCP/IP transport
-# option transport-type ib-sdp # For Infiniband transport
-# option transport-type ib-verbs # For ib-verbs transport
- option client-volume-filename /examples/qa-client.vol
- option auth.addr.ra1.allow * # Allow access to "stat8" volume
- option auth.addr.ra2.allow * # Allow access to "stat8" volume
- option auth.addr.ra3.allow * # Allow access to "stat8" volume
- option auth.addr.ra4.allow * # Allow access to "stat8" volume
- option auth.addr.ra5.allow * # Allow access to "stat8" volume
- option auth.addr.ra6.allow * # Allow access to "stat8" volume
- option auth.addr.ra7.allow * # Allow access to "stat8" volume
- option auth.addr.ra8.allow * # Allow access to "stat8" volume
- option auth.addr.brick-ns.allow * # Allow access to "stat8" volume
-end-volume
-
diff --git a/doc/user-guide/legacy/Makefile.am b/doc/user-guide/legacy/Makefile.am
deleted file mode 100644
index b2caabaa2f3..00000000000
--- a/doc/user-guide/legacy/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-info_TEXINFOS = user-guide.texi
-CLEANFILES = *~
-DISTCLEANFILES = .deps/*.P *.info *vti
diff --git a/doc/user-guide/legacy/advanced-stripe.odg b/doc/user-guide/legacy/advanced-stripe.odg
deleted file mode 100644
index 7686d7091b2..00000000000
--- a/doc/user-guide/legacy/advanced-stripe.odg
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/advanced-stripe.pdf b/doc/user-guide/legacy/advanced-stripe.pdf
deleted file mode 100644
index ec8b03dcfbb..00000000000
--- a/doc/user-guide/legacy/advanced-stripe.pdf
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/colonO-icon.jpg b/doc/user-guide/legacy/colonO-icon.jpg
deleted file mode 100644
index 3e66f7a2775..00000000000
--- a/doc/user-guide/legacy/colonO-icon.jpg
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/fdl.texi b/doc/user-guide/legacy/fdl.texi
deleted file mode 100644
index e33c687cdfb..00000000000
--- a/doc/user-guide/legacy/fdl.texi
+++ /dev/null
@@ -1,454 +0,0 @@
-
-@c @node GNU Free Documentation License
-@c @appendixsec GNU Free Documentation License
-
-@cindex FDL, GNU Free Documentation License
-@center Version 1.2, November 2002
-
-@display
-Copyright @copyright{} 2000,2001,2002 Free Software Foundation, Inc.
-59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
-
-Everyone is permitted to copy and distribute verbatim copies
-of this license document, but changing it is not allowed.
-@end display
-
-@enumerate 0
-@item
-PREAMBLE
-
-The purpose of this License is to make a manual, textbook, or other
-functional and useful document @dfn{free} in the sense of freedom: to
-assure everyone the effective freedom to copy and redistribute it,
-with or without modifying it, either commercially or noncommercially.
-Secondarily, this License preserves for the author and publisher a way
-to get credit for their work, while not being considered responsible
-for modifications made by others.
-
-This License is a kind of ``copyleft'', which means that derivative
-works of the document must themselves be free in the same sense. It
-complements the GNU General Public License, which is a copyleft
-license designed for free software.
-
-We have designed this License in order to use it for manuals for free
-software, because free software needs free documentation: a free
-program should come with manuals providing the same freedoms that the
-software does. But this License is not limited to software manuals;
-it can be used for any textual work, regardless of subject matter or
-whether it is published as a printed book. We recommend this License
-principally for works whose purpose is instruction or reference.
-
-@item
-APPLICABILITY AND DEFINITIONS
-
-This License applies to any manual or other work, in any medium, that
-contains a notice placed by the copyright holder saying it can be
-distributed under the terms of this License. Such a notice grants a
-world-wide, royalty-free license, unlimited in duration, to use that
-work under the conditions stated herein. The ``Document'', below,
-refers to any such manual or work. Any member of the public is a
-licensee, and is addressed as ``you''. You accept the license if you
-copy, modify or distribute the work in a way requiring permission
-under copyright law.
-
-A ``Modified Version'' of the Document means any work containing the
-Document or a portion of it, either copied verbatim, or with
-modifications and/or translated into another language.
-
-A ``Secondary Section'' is a named appendix or a front-matter section
-of the Document that deals exclusively with the relationship of the
-publishers or authors of the Document to the Document's overall
-subject (or to related matters) and contains nothing that could fall
-directly within that overall subject. (Thus, if the Document is in
-part a textbook of mathematics, a Secondary Section may not explain
-any mathematics.) The relationship could be a matter of historical
-connection with the subject or with related matters, or of legal,
-commercial, philosophical, ethical or political position regarding
-them.
-
-The ``Invariant Sections'' are certain Secondary Sections whose titles
-are designated, as being those of Invariant Sections, in the notice
-that says that the Document is released under this License. If a
-section does not fit the above definition of Secondary then it is not
-allowed to be designated as Invariant. The Document may contain zero
-Invariant Sections. If the Document does not identify any Invariant
-Sections then there are none.
-
-The ``Cover Texts'' are certain short passages of text that are listed,
-as Front-Cover Texts or Back-Cover Texts, in the notice that says that
-the Document is released under this License. A Front-Cover Text may
-be at most 5 words, and a Back-Cover Text may be at most 25 words.
-
-A ``Transparent'' copy of the Document means a machine-readable copy,
-represented in a format whose specification is available to the
-general public, that is suitable for revising the document
-straightforwardly with generic text editors or (for images composed of
-pixels) generic paint programs or (for drawings) some widely available
-drawing editor, and that is suitable for input to text formatters or
-for automatic translation to a variety of formats suitable for input
-to text formatters. A copy made in an otherwise Transparent file
-format whose markup, or absence of markup, has been arranged to thwart
-or discourage subsequent modification by readers is not Transparent.
-An image format is not Transparent if used for any substantial amount
-of text. A copy that is not ``Transparent'' is called ``Opaque''.
-
-Examples of suitable formats for Transparent copies include plain
-@sc{ascii} without markup, Texinfo input format, La@TeX{} input
-format, @acronym{SGML} or @acronym{XML} using a publicly available
-@acronym{DTD}, and standard-conforming simple @acronym{HTML},
-PostScript or @acronym{PDF} designed for human modification. Examples
-of transparent image formats include @acronym{PNG}, @acronym{XCF} and
-@acronym{JPG}. Opaque formats include proprietary formats that can be
-read and edited only by proprietary word processors, @acronym{SGML} or
-@acronym{XML} for which the @acronym{DTD} and/or processing tools are
-not generally available, and the machine-generated @acronym{HTML},
-PostScript or @acronym{PDF} produced by some word processors for
-output purposes only.
-
-The ``Title Page'' means, for a printed book, the title page itself,
-plus such following pages as are needed to hold, legibly, the material
-this License requires to appear in the title page. For works in
-formats which do not have any title page as such, ``Title Page'' means
-the text near the most prominent appearance of the work's title,
-preceding the beginning of the body of the text.
-
-A section ``Entitled XYZ'' means a named subunit of the Document whose
-title either is precisely XYZ or contains XYZ in parentheses following
-text that translates XYZ in another language. (Here XYZ stands for a
-specific section name mentioned below, such as ``Acknowledgements'',
-``Dedications'', ``Endorsements'', or ``History''.) To ``Preserve the Title''
-of such a section when you modify the Document means that it remains a
-section ``Entitled XYZ'' according to this definition.
-
-The Document may include Warranty Disclaimers next to the notice which
-states that this License applies to the Document. These Warranty
-Disclaimers are considered to be included by reference in this
-License, but only as regards disclaiming warranties: any other
-implication that these Warranty Disclaimers may have is void and has
-no effect on the meaning of this License.
-
-@item
-VERBATIM COPYING
-
-You may copy and distribute the Document in any medium, either
-commercially or noncommercially, provided that this License, the
-copyright notices, and the license notice saying this License applies
-to the Document are reproduced in all copies, and that you add no other
-conditions whatsoever to those of this License. You may not use
-technical measures to obstruct or control the reading or further
-copying of the copies you make or distribute. However, you may accept
-compensation in exchange for copies. If you distribute a large enough
-number of copies you must also follow the conditions in section 3.
-
-You may also lend copies, under the same conditions stated above, and
-you may publicly display copies.
-
-@item
-COPYING IN QUANTITY
-
-If you publish printed copies (or copies in media that commonly have
-printed covers) of the Document, numbering more than 100, and the
-Document's license notice requires Cover Texts, you must enclose the
-copies in covers that carry, clearly and legibly, all these Cover
-Texts: Front-Cover Texts on the front cover, and Back-Cover Texts on
-the back cover. Both covers must also clearly and legibly identify
-you as the publisher of these copies. The front cover must present
-the full title with all words of the title equally prominent and
-visible. You may add other material on the covers in addition.
-Copying with changes limited to the covers, as long as they preserve
-the title of the Document and satisfy these conditions, can be treated
-as verbatim copying in other respects.
-
-If the required texts for either cover are too voluminous to fit
-legibly, you should put the first ones listed (as many as fit
-reasonably) on the actual cover, and continue the rest onto adjacent
-pages.
-
-If you publish or distribute Opaque copies of the Document numbering
-more than 100, you must either include a machine-readable Transparent
-copy along with each Opaque copy, or state in or with each Opaque copy
-a computer-network location from which the general network-using
-public has access to download using public-standard network protocols
-a complete Transparent copy of the Document, free of added material.
-If you use the latter option, you must take reasonably prudent steps,
-when you begin distribution of Opaque copies in quantity, to ensure
-that this Transparent copy will remain thus accessible at the stated
-location until at least one year after the last time you distribute an
-Opaque copy (directly or through your agents or retailers) of that
-edition to the public.
-
-It is requested, but not required, that you contact the authors of the
-Document well before redistributing any large number of copies, to give
-them a chance to provide you with an updated version of the Document.
-
-@item
-MODIFICATIONS
-
-You may copy and distribute a Modified Version of the Document under
-the conditions of sections 2 and 3 above, provided that you release
-the Modified Version under precisely this License, with the Modified
-Version filling the role of the Document, thus licensing distribution
-and modification of the Modified Version to whoever possesses a copy
-of it. In addition, you must do these things in the Modified Version:
-
-@enumerate A
-@item
-Use in the Title Page (and on the covers, if any) a title distinct
-from that of the Document, and from those of previous versions
-(which should, if there were any, be listed in the History section
-of the Document). You may use the same title as a previous version
-if the original publisher of that version gives permission.
-
-@item
-List on the Title Page, as authors, one or more persons or entities
-responsible for authorship of the modifications in the Modified
-Version, together with at least five of the principal authors of the
-Document (all of its principal authors, if it has fewer than five),
-unless they release you from this requirement.
-
-@item
-State on the Title page the name of the publisher of the
-Modified Version, as the publisher.
-
-@item
-Preserve all the copyright notices of the Document.
-
-@item
-Add an appropriate copyright notice for your modifications
-adjacent to the other copyright notices.
-
-@item
-Include, immediately after the copyright notices, a license notice
-giving the public permission to use the Modified Version under the
-terms of this License, in the form shown in the Addendum below.
-
-@item
-Preserve in that license notice the full lists of Invariant Sections
-and required Cover Texts given in the Document's license notice.
-
-@item
-Include an unaltered copy of this License.
-
-@item
-Preserve the section Entitled ``History'', Preserve its Title, and add
-to it an item stating at least the title, year, new authors, and
-publisher of the Modified Version as given on the Title Page. If
-there is no section Entitled ``History'' in the Document, create one
-stating the title, year, authors, and publisher of the Document as
-given on its Title Page, then add an item describing the Modified
-Version as stated in the previous sentence.
-
-@item
-Preserve the network location, if any, given in the Document for
-public access to a Transparent copy of the Document, and likewise
-the network locations given in the Document for previous versions
-it was based on. These may be placed in the ``History'' section.
-You may omit a network location for a work that was published at
-least four years before the Document itself, or if the original
-publisher of the version it refers to gives permission.
-
-@item
-For any section Entitled ``Acknowledgements'' or ``Dedications'', Preserve
-the Title of the section, and preserve in the section all the
-substance and tone of each of the contributor acknowledgements and/or
-dedications given therein.
-
-@item
-Preserve all the Invariant Sections of the Document,
-unaltered in their text and in their titles. Section numbers
-or the equivalent are not considered part of the section titles.
-
-@item
-Delete any section Entitled ``Endorsements''. Such a section
-may not be included in the Modified Version.
-
-@item
-Do not retitle any existing section to be Entitled ``Endorsements'' or
-to conflict in title with any Invariant Section.
-
-@item
-Preserve any Warranty Disclaimers.
-@end enumerate
-
-If the Modified Version includes new front-matter sections or
-appendices that qualify as Secondary Sections and contain no material
-copied from the Document, you may at your option designate some or all
-of these sections as invariant. To do this, add their titles to the
-list of Invariant Sections in the Modified Version's license notice.
-These titles must be distinct from any other section titles.
-
-You may add a section Entitled ``Endorsements'', provided it contains
-nothing but endorsements of your Modified Version by various
-parties---for example, statements of peer review or that the text has
-been approved by an organization as the authoritative definition of a
-standard.
-
-You may add a passage of up to five words as a Front-Cover Text, and a
-passage of up to 25 words as a Back-Cover Text, to the end of the list
-of Cover Texts in the Modified Version. Only one passage of
-Front-Cover Text and one of Back-Cover Text may be added by (or
-through arrangements made by) any one entity. If the Document already
-includes a cover text for the same cover, previously added by you or
-by arrangement made by the same entity you are acting on behalf of,
-you may not add another; but you may replace the old one, on explicit
-permission from the previous publisher that added the old one.
-
-The author(s) and publisher(s) of the Document do not by this License
-give permission to use their names for publicity for or to assert or
-imply endorsement of any Modified Version.
-
-@item
-COMBINING DOCUMENTS
-
-You may combine the Document with other documents released under this
-License, under the terms defined in section 4 above for modified
-versions, provided that you include in the combination all of the
-Invariant Sections of all of the original documents, unmodified, and
-list them all as Invariant Sections of your combined work in its
-license notice, and that you preserve all their Warranty Disclaimers.
-
-The combined work need only contain one copy of this License, and
-multiple identical Invariant Sections may be replaced with a single
-copy. If there are multiple Invariant Sections with the same name but
-different contents, make the title of each such section unique by
-adding at the end of it, in parentheses, the name of the original
-author or publisher of that section if known, or else a unique number.
-Make the same adjustment to the section titles in the list of
-Invariant Sections in the license notice of the combined work.
-
-In the combination, you must combine any sections Entitled ``History''
-in the various original documents, forming one section Entitled
-``History''; likewise combine any sections Entitled ``Acknowledgements'',
-and any sections Entitled ``Dedications''. You must delete all
-sections Entitled ``Endorsements.''
-
-@item
-COLLECTIONS OF DOCUMENTS
-
-You may make a collection consisting of the Document and other documents
-released under this License, and replace the individual copies of this
-License in the various documents with a single copy that is included in
-the collection, provided that you follow the rules of this License for
-verbatim copying of each of the documents in all other respects.
-
-You may extract a single document from such a collection, and distribute
-it individually under this License, provided you insert a copy of this
-License into the extracted document, and follow this License in all
-other respects regarding verbatim copying of that document.
-
-@item
-AGGREGATION WITH INDEPENDENT WORKS
-
-A compilation of the Document or its derivatives with other separate
-and independent documents or works, in or on a volume of a storage or
-distribution medium, is called an ``aggregate'' if the copyright
-resulting from the compilation is not used to limit the legal rights
-of the compilation's users beyond what the individual works permit.
-When the Document is included in an aggregate, this License does not
-apply to the other works in the aggregate which are not themselves
-derivative works of the Document.
-
-If the Cover Text requirement of section 3 is applicable to these
-copies of the Document, then if the Document is less than one half of
-the entire aggregate, the Document's Cover Texts may be placed on
-covers that bracket the Document within the aggregate, or the
-electronic equivalent of covers if the Document is in electronic form.
-Otherwise they must appear on printed covers that bracket the whole
-aggregate.
-
-@item
-TRANSLATION
-
-Translation is considered a kind of modification, so you may
-distribute translations of the Document under the terms of section 4.
-Replacing Invariant Sections with translations requires special
-permission from their copyright holders, but you may include
-translations of some or all Invariant Sections in addition to the
-original versions of these Invariant Sections. You may include a
-translation of this License, and all the license notices in the
-Document, and any Warranty Disclaimers, provided that you also include
-the original English version of this License and the original versions
-of those notices and disclaimers. In case of a disagreement between
-the translation and the original version of this License or a notice
-or disclaimer, the original version will prevail.
-
-If a section in the Document is Entitled ``Acknowledgements'',
-``Dedications'', or ``History'', the requirement (section 4) to Preserve
-its Title (section 1) will typically require changing the actual
-title.
-
-@item
-TERMINATION
-
-You may not copy, modify, sublicense, or distribute the Document except
-as expressly provided for under this License. Any other attempt to
-copy, modify, sublicense or distribute the Document is void, and will
-automatically terminate your rights under this License. However,
-parties who have received copies, or rights, from you under this
-License will not have their licenses terminated so long as such
-parties remain in full compliance.
-
-@item
-FUTURE REVISIONS OF THIS LICENSE
-
-The Free Software Foundation may publish new, revised versions
-of the GNU Free Documentation License from time to time. Such new
-versions will be similar in spirit to the present version, but may
-differ in detail to address new problems or concerns. See
-@uref{http://www.gnu.org/copyleft/}.
-
-Each version of the License is given a distinguishing version number.
-If the Document specifies that a particular numbered version of this
-License ``or any later version'' applies to it, you have the option of
-following the terms and conditions either of that specified version or
-of any later version that has been published (not as a draft) by the
-Free Software Foundation. If the Document does not specify a version
-number of this License, you may choose any version ever published (not
-as a draft) by the Free Software Foundation.
-@end enumerate
-
-@page
-@c @appendixsubsec ADDENDUM: How to use this License for your
-@c documents
-@subsection ADDENDUM: How to use this License for your documents
-
-To use this License in a document you have written, include a copy of
-the License in the document and put the following copyright and
-license notices just after the title page:
-
-@smallexample
-@group
- Copyright (C) @var{year} @var{your name}.
- Permission is granted to copy, distribute and/or modify this document
- under the terms of the GNU Free Documentation License, Version 1.2
- or any later version published by the Free Software Foundation;
- with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
- Texts. A copy of the license is included in the section entitled ``GNU
- Free Documentation License''.
-@end group
-@end smallexample
-
-If you have Invariant Sections, Front-Cover Texts and Back-Cover Texts,
-replace the ``with...Texts.'' line with this:
-
-@smallexample
-@group
- with the Invariant Sections being @var{list their titles}, with
- the Front-Cover Texts being @var{list}, and with the Back-Cover Texts
- being @var{list}.
-@end group
-@end smallexample
-
-If you have Invariant Sections without Cover Texts, or some other
-combination of the three, merge those two alternatives to suit the
-situation.
-
-If your document contains nontrivial examples of program code, we
-recommend releasing these examples in parallel under your choice of
-free software license, such as the GNU General Public License,
-to permit their use in free software.
-
-@c Local Variables:
-@c ispell-local-pdict: "ispell-dict"
-@c End:
-
diff --git a/doc/user-guide/legacy/fuse.odg b/doc/user-guide/legacy/fuse.odg
deleted file mode 100644
index 61bd103c78b..00000000000
--- a/doc/user-guide/legacy/fuse.odg
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/fuse.pdf b/doc/user-guide/legacy/fuse.pdf
deleted file mode 100644
index a7d13faff56..00000000000
--- a/doc/user-guide/legacy/fuse.pdf
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/ha.odg b/doc/user-guide/legacy/ha.odg
deleted file mode 100644
index e4b8b72d08b..00000000000
--- a/doc/user-guide/legacy/ha.odg
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/ha.pdf b/doc/user-guide/legacy/ha.pdf
deleted file mode 100644
index e372c0ab03e..00000000000
--- a/doc/user-guide/legacy/ha.pdf
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/stripe.odg b/doc/user-guide/legacy/stripe.odg
deleted file mode 100644
index 79441bf1452..00000000000
--- a/doc/user-guide/legacy/stripe.odg
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/stripe.pdf b/doc/user-guide/legacy/stripe.pdf
deleted file mode 100644
index b94446feb56..00000000000
--- a/doc/user-guide/legacy/stripe.pdf
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/unify.odg b/doc/user-guide/legacy/unify.odg
deleted file mode 100644
index ccaa9bf16f9..00000000000
--- a/doc/user-guide/legacy/unify.odg
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/unify.pdf b/doc/user-guide/legacy/unify.pdf
deleted file mode 100644
index c22027f66e7..00000000000
--- a/doc/user-guide/legacy/unify.pdf
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/user-guide.info b/doc/user-guide/legacy/user-guide.info
deleted file mode 100644
index 6a49d078d64..00000000000
--- a/doc/user-guide/legacy/user-guide.info
+++ /dev/null
@@ -1,2697 +0,0 @@
-This is ../../../doc/user-guide/user-guide.info, produced by makeinfo version 4.13 from ../../../doc/user-guide/user-guide.texi.
-
-START-INFO-DIR-ENTRY
-* GlusterFS: (user-guide). GlusterFS distributed filesystem user guide
-END-INFO-DIR-ENTRY
-
- This is the user manual for GlusterFS 2.0.
-
- Copyright (c) 2007-2011 Gluster, Inc. Permission is granted to
-copy, distribute and/or modify this document under the terms of the GNU
-Free Documentation License, Version 1.2 or any later version published
-by the Free Software Foundation; with no Invariant Sections, no
-Front-Cover Texts, and no Back-Cover Texts. A copy of the license is
-included in the chapter entitled "GNU Free Documentation License".
-
-
-File: user-guide.info, Node: Top, Next: Acknowledgements, Up: (dir)
-
-GlusterFS 2.0 User Guide
-************************
-
-This is the user manual for GlusterFS 2.0.
-
- Copyright (c) 2007-2011 Gluster, Inc. Permission is granted to
-copy, distribute and/or modify this document under the terms of the GNU
-Free Documentation License, Version 1.2 or any later version published
-by the Free Software Foundation; with no Invariant Sections, no
-Front-Cover Texts, and no Back-Cover Texts. A copy of the license is
-included in the chapter entitled "GNU Free Documentation License".
-
-* Menu:
-
-* Acknowledgements::
-* Introduction::
-* Installation and Invocation::
-* Concepts::
-* Translators::
-* Usage Scenarios::
-* Troubleshooting::
-* GNU Free Documentation Licence::
-* Index::
-
- --- The Detailed Node Listing ---
-
-Installation and Invocation
-
-* Pre requisites::
-* Getting GlusterFS::
-* Building::
-* Running GlusterFS::
-* A Tutorial Introduction::
-
-Running GlusterFS
-
-* Server::
-* Client::
-
-Concepts
-
-* Filesystems in Userspace::
-* Translator::
-* Volume specification file::
-
-Translators
-
-* Storage Translators::
-* Client and Server Translators::
-* Clustering Translators::
-* Performance Translators::
-* Features Translators::
-
-Storage Translators
-
-* POSIX::
-
-Client and Server Translators
-
-* Transport modules::
-* Client protocol::
-* Server protocol::
-
-Clustering Translators
-
-* Unify::
-* Replicate::
-* Stripe::
-
-Performance Translators
-
-* Read Ahead::
-* Write Behind::
-* IO Threads::
-* IO Cache::
-
-Features Translators
-
-* POSIX Locks::
-* Fixed ID::
-
-Miscellaneous Translators
-
-* ROT-13::
-* Trace::
-
-
-File: user-guide.info, Node: Acknowledgements, Next: Introduction, Prev: Top, Up: Top
-
-Acknowledgements
-****************
-
-GlusterFS continues to be a wonderful and enriching experience for all
-of us involved.
-
- GlusterFS development would not have been possible at this pace if
-not for our enthusiastic users. People from around the world have
-helped us with bug reports, performance numbers, and feature
-suggestions. A huge thanks to them all.
-
- Matthew Paine - for RPMs & general enthu
-
- Leonardo Rodrigues de Mello - for DEBs
-
- Julian Perez & Adam D'Auria - for multi-server tutorial
-
- Paul England - for HA spec
-
- Brent Nelson - for many bug reports
-
- Jacques Mattheij - for Europe mirror.
-
- Patrick Negri - for TCP non-blocking connect.
- http://gluster.org/core-team.php (<list-hacking@gluster.com>)
- Gluster
-
-
-File: user-guide.info, Node: Introduction, Next: Installation and Invocation, Prev: Acknowledgements, Up: Top
-
-1 Introduction
-**************
-
-GlusterFS is a distributed filesystem. It works at the file level, not
-block level.
-
- A network filesystem is one which allows us to access remote files. A
-distributed filesystem is one that stores data on multiple machines and
-makes them all appear to be a part of the same filesystem.
-
- Need for distributed filesystems
-
- * Scalability: A distributed filesystem allows us to store more data
- than what can be stored on a single machine.
-
- * Redundancy: We might want to replicate crucial data on to several
- machines.
-
- * Uniform access: One can mount a remote volume (for example your
- home directory) from any machine and access the same data.
-
-1.1 Contacting us
-=================
-
-You can reach us through the mailing list *gluster-devel*
-(<gluster-devel@nongnu.org>).
-
- You can also find many of the developers on IRC, on the `#gluster'
-channel on Freenode (<irc.freenode.net>).
-
- The GlusterFS documentation wiki is also useful:
-<http://gluster.org/docs/index.php/GlusterFS>
-
- For commercial support, you can contact Gluster at:
-
- 3194 Winding Vista Common
- Fremont, CA 94539
- USA.
-
- Phone: +1 (510) 354 6801
- Toll free: +1 (888) 813 6309
- Fax: +1 (510) 372 0604
-
- You can also email us at <support@gluster.com>.
-
-
-File: user-guide.info, Node: Installation and Invocation, Next: Concepts, Prev: Introduction, Up: Top
-
-2 Installation and Invocation
-*****************************
-
-* Menu:
-
-* Pre requisites::
-* Getting GlusterFS::
-* Building::
-* Running GlusterFS::
-* A Tutorial Introduction::
-
-
-File: user-guide.info, Node: Pre requisites, Next: Getting GlusterFS, Up: Installation and Invocation
-
-2.1 Pre requisites
-==================
-
-Before installing GlusterFS make sure you have the following components
-installed.
-
-2.1.1 FUSE
-----------
-
-You'll need FUSE version 2.6.0 or higher to use GlusterFS. You can omit
-installing FUSE if you want to build _only_ the server. Note that you
-won't be able to mount a GlusterFS filesystem on a machine that does
-not have FUSE installed.
-
- FUSE can be downloaded from: <http://fuse.sourceforge.net/>
-
- To get the best performance from GlusterFS, however, it is
-recommended that you use our patched version of FUSE. See Patched FUSE
-for details.
-
-2.1.2 Patched FUSE
-------------------
-
-The GlusterFS project maintains a patched version of FUSE meant to be
-used with GlusterFS. The patches increase GlusterFS performance. It is
-recommended that all users use the patched FUSE.
-
- The patched FUSE tarball can be downloaded from:
-
- <ftp://ftp.gluster.com/pub/gluster/glusterfs/fuse/>
-
- The specific changes made to FUSE are:
-
- * The communication channel size between FUSE kernel module and
- GlusterFS has been increased to 1MB, permitting large reads and
- writes to be sent in bigger chunks.
-
- * The kernel's read-ahead boundry has been extended upto 1MB.
-
- * Block size returned in the `stat()'/`fstat()' calls tuned to 1MB,
- to make cp and similar commands perform I/O using that block size.
-
- * `flock()' locking support has been added (although some rework in
- GlusterFS is needed for perfect compliance).
-
-2.1.3 libibverbs (optional)
----------------------------
-
-This is only needed if you want GlusterFS to use InfiniBand as the
-interconnect mechanism between server and client. You can get it from:
-
- <http://www.openfabrics.org/downloads.htm>.
-
-2.1.4 Bison and Flex
---------------------
-
-These should be already installed on most Linux systems. If not, use
-your distribution's normal software installation procedures to install
-them. Make sure you install the relevant developer packages also.
-
-
-File: user-guide.info, Node: Getting GlusterFS, Next: Building, Prev: Pre requisites, Up: Installation and Invocation
-
-2.2 Getting GlusterFS
-=====================
-
-There are many ways to get hold of GlusterFS. For a production
-deployment, the recommended method is to download the latest release
-tarball. Release tarballs are available at:
-<http://gluster.org/download.php>.
-
- If you want the bleeding edge development source, you can get them
-from the GNU Arch(1) repository. First you must install GNU Arch
-itself. Then register the GlusterFS archive by doing:
-
- $ tla register-archive http://arch.sv.gnu.org/archives/gluster
-
- Now you can check out the source itself:
-
- $ tla get -A gluster@sv.gnu.org glusterfs--mainline--3.0
-
- ---------- Footnotes ----------
-
- (1) <http://www.gnu.org/software/gnu-arch/>
-
-
-File: user-guide.info, Node: Building, Next: Running GlusterFS, Prev: Getting GlusterFS, Up: Installation and Invocation
-
-2.3 Building
-============
-
-You can skip this section if you're installing from RPMs or DEBs.
-
- GlusterFS uses the Autotools mechanism to build. As such, the
-procedure is straight-forward. First, change into the GlusterFS source
-directory.
-
- $ cd glusterfs-<version>
-
- If you checked out the source from the Arch repository, you'll need
-to run `./autogen.sh' first. Note that you'll need to have Autoconf and
-Automake installed for this.
-
- Run `configure'.
-
- $ ./configure
-
- The configure script accepts the following options:
-
-`--disable-ibverbs'
- Disable the InfiniBand transport mechanism.
-
-`--disable-fuse-client'
- Disable the FUSE client.
-
-`--disable-server'
- Disable building of the GlusterFS server.
-
-`--disable-bdb'
- Disable building of Berkeley DB based storage translator.
-
-`--disable-mod_glusterfs'
- Disable building of Apache/lighttpd glusterfs plugins.
-
-`--disable-epoll'
- Use poll instead of epoll.
-
-`--disable-libglusterfsclient'
- Disable building of libglusterfsclient
-
-
- Build and install GlusterFS.
-
- # make install
-
- The binaries (`glusterfsd' and `glusterfs') will be by default
-installed in `/usr/local/sbin/'. Translator, scheduler, and transport
-shared libraries will be installed in
-`/usr/local/lib/glusterfs/<version>/'. Sample volume specification
-files will be in `/usr/local/etc/glusterfs/'. This document itself can
-be found in `/usr/local/share/doc/glusterfs/'. If you passed the
-`--prefix' argument to the configure script, then replace `/usr/local'
-in the preceding paths with the prefix.
-
-
-File: user-guide.info, Node: Running GlusterFS, Next: A Tutorial Introduction, Prev: Building, Up: Installation and Invocation
-
-2.4 Running GlusterFS
-=====================
-
-* Menu:
-
-* Server::
-* Client::
-
-
-File: user-guide.info, Node: Server, Next: Client, Up: Running GlusterFS
-
-2.4.1 Server
-------------
-
-The GlusterFS server is necessary to export storage volumes to remote
-clients (See *note Server protocol:: for more info). This section
-documents the invocation of the GlusterFS server program and all the
-command-line options accepted by it.
-
- Basic Options
-
-`-f, --volfile=<path>'
- Use the volume file as the volume specification.
-
-`-s, --volfile-server=<hostname>'
- Server to get volume file from. This option overrides -volfile
- option.
-
-`-l, --log-file=<path>'
- Specify the path for the log file.
-
-`-L, --log-level=<level>'
- Set the log level for the server. Log level should be one of DEBUG,
- WARNING, ERROR, CRITICAL, or NONE.
-
- Advanced Options
-
-`--debug'
- Run in debug mode. This option sets -no-daemon, -log-level to
- DEBUG and -log-file to console.
-
-`-N, --no-daemon'
- Run glusterfsd as a foreground process.
-
-`-p, --pid-file=<path>'
- Path for the PID file.
-
-`--volfile-id=<key>'
- 'key' of the volfile to be fetched from server.
-
-`--volfile-server-port=<port-number>'
- Listening port number of volfile server.
-
-`--volfile-server-transport=[socket|ib-verbs]'
- Transport type to get volfile from server. [default: `socket']
-
-`--xlator-options=<volume-name.option=value>'
- Add/override a translator option for a volume with specified value.
-
- Miscellaneous Options
-
-`-?, --help'
- Show this help text.
-
-`--usage'
- Display a short usage message.
-
-`-V, --version'
- Show version information.
-
-
-File: user-guide.info, Node: Client, Prev: Server, Up: Running GlusterFS
-
-2.4.2 Client
-------------
-
-The GlusterFS client process is necessary to access remote storage
-volumes and mount them locally using FUSE. This section documents the
-invocation of the client process and all its command-line arguments.
-
- # glusterfs [options] <mountpoint>
-
- The `mountpoint' is the directory where you want the GlusterFS
-filesystem to appear. Example:
-
- # glusterfs -f /usr/local/etc/glusterfs-client.vol /mnt
-
- The command-line options are detailed below.
-
- Basic Options
-
-`-f, --volfile=<path>'
- Use the volume file as the volume specification.
-
-`-s, --volfile-server=<hostname>'
- Server to get volume file from. This option overrides -volfile
- option.
-
-`-l, --log-file=<path>'
- Specify the path for the log file.
-
-`-L, --log-level=<level>'
- Set the log level for the server. Log level should be one of DEBUG,
- WARNING, ERROR, CRITICAL, or NONE.
-
- Advanced Options
-
-`--debug'
- Run in debug mode. This option sets -no-daemon, -log-level to
- DEBUG and -log-file to console.
-
-`-N, --no-daemon'
- Run `glusterfs' as a foreground process.
-
-`-p, --pid-file=<path>'
- Path for the PID file.
-
-`--volfile-id=<key>'
- 'key' of the volfile to be fetched from server.
-
-`--volfile-server-port=<port-number>'
- Listening port number of volfile server.
-
-`--volfile-server-transport=[socket|ib-verbs]'
- Transport type to get volfile from server. [default: `socket']
-
-`--xlator-options=<volume-name.option=value>'
- Add/override a translator option for a volume with specified value.
-
-`--volume-name=<volume name>'
- Volume name in client spec to use. Defaults to the root volume.
-
- FUSE Options
-
-`--attribute-timeout=<n>'
- Attribute timeout for inodes in the kernel, in seconds. Defaults
- to 1 second.
-
-`--disable-direct-io-mode'
- Disable direct I/O mode in FUSE kernel module.
-
-`-e, --entry-timeout=<n>'
- Entry timeout for directory entries in the kernel, in seconds.
- Defaults to 1 second.
-
- Missellaneous Options
-
-`-?, --help'
- Show this help information.
-
-`-V, --version'
- Show version information.
-
-
-File: user-guide.info, Node: A Tutorial Introduction, Prev: Running GlusterFS, Up: Installation and Invocation
-
-2.5 A Tutorial Introduction
-===========================
-
-This section will show you how to quickly get GlusterFS up and running.
-We'll configure GlusterFS as a simple network filesystem, with one
-server and one client. In this mode of usage, GlusterFS can serve as a
-replacement for NFS.
-
- We'll make use of two machines; call them _server_ and _client_ (If
-you don't want to setup two machines, just run everything that follows
-on the same machine). In the examples that follow, the shell prompts
-will use these names to clarify the machine on which the command is
-being run. For example, a command that should be run on the server will
-be shown with the prompt:
-
- [root@server]#
-
- Our goal is to make a directory on the _server_ (say, `/export')
-accessible to the _client_.
-
- First of all, get GlusterFS installed on both the machines, as
-described in the previous sections. Make sure you have the FUSE kernel
-module loaded. You can ensure this by running:
-
- [root@server]# modprobe fuse
-
- Before we can run the GlusterFS client or server programs, we need
-to write two files called _volume specifications_ (equivalently refered
-to as _volfiles_). The volfile describes the _translator tree_ on a
-node. The next chapter will explain the concepts of `translator' and
-`volume specification' in detail. For now, just assume that the volfile
-is like an NFS `/etc/export' file.
-
- On the server, create a text file somewhere (we'll assume the path
-`/tmp/glusterfsd.vol') with the following contents.
-
- volume colon-o
- type storage/posix
- option directory /export
- end-volume
-
- volume server
- type protocol/server
- subvolumes colon-o
- option transport-type tcp
- option auth.addr.colon-o.allow *
- end-volume
-
- A brief explanation of the file's contents. The first section
-defines a storage volume, named "colon-o" (the volume names are
-arbitrary), which exports the `/export' directory. The second section
-defines options for the translator which will make the storage volume
-accessible remotely. It specifies `colon-o' as a subvolume. This
-defines the _translator tree_, about which more will be said in the
-next chapter. The two options specify that the TCP protocol is to be
-used (as opposed to InfiniBand, for example), and that access to the
-storage volume is to be provided to clients with any IP address at all.
-If you wanted to restrict access to this server to only your subnet for
-example, you'd specify something like `192.168.1.*' in the second
-option line.
-
- On the client machine, create the following text file (again, we'll
-assume the path to be `/tmp/glusterfs-client.vol'). Replace
-_server-ip-address_ with the IP address of your server machine. If you
-are doing all this on a single machine, use `127.0.0.1'.
-
- volume client
- type protocol/client
- option transport-type tcp
- option remote-host _server-ip-address_
- option remote-subvolume colon-o
- end-volume
-
- Now we need to start both the server and client programs. To start
-the server:
-
- [root@server]# glusterfsd -f /tmp/glusterfs-server.vol
-
- To start the client:
-
- [root@client]# glusterfs -f /tmp/glusterfs-client.vol /mnt/glusterfs
-
- You should now be able to see the files under the server's `/export'
-directory in the `/mnt/glusterfs' directory on the client. That's it;
-GlusterFS is now working as a network file system.
-
-
-File: user-guide.info, Node: Concepts, Next: Translators, Prev: Installation and Invocation, Up: Top
-
-3 Concepts
-**********
-
-* Menu:
-
-* Filesystems in Userspace::
-* Translator::
-* Volume specification file::
-
-
-File: user-guide.info, Node: Filesystems in Userspace, Next: Translator, Up: Concepts
-
-3.1 Filesystems in Userspace
-============================
-
-A filesystem is usually implemented in kernel space. Kernel space
-development is much harder than userspace development. FUSE is a kernel
-module/library that allows us to write a filesystem completely in
-userspace.
-
- FUSE consists of a kernel module which interacts with the userspace
-implementation using a device file `/dev/fuse'. When a process makes a
-syscall on a FUSE filesystem, VFS hands the request to the FUSE module,
-which writes the request to `/dev/fuse'. The userspace implementation
-polls `/dev/fuse', and when a request arrives, processes it and writes
-the result back to `/dev/fuse'. The kernel then reads from the device
-file and returns the result to the user process.
-
- In case of GlusterFS, the userspace program is the GlusterFS client.
-The control flow is shown in the diagram below. The GlusterFS client
-services the request by sending it to the server, which in turn hands
-it to the local POSIX filesystem.
-
-
- Fig 1. Control flow in GlusterFS
-
-
-File: user-guide.info, Node: Translator, Next: Volume specification file, Prev: Filesystems in Userspace, Up: Concepts
-
-3.2 Translator
-==============
-
-The _translator_ is the most important concept in GlusterFS. In fact,
-GlusterFS is nothing but a collection of translators working together,
-forming a translator _tree_.
-
- The idea of a translator is perhaps best understood using an
-analogy. Consider the VFS in the Linux kernel. The VFS abstracts the
-various filesystem implementations (such as EXT3, ReiserFS, XFS, etc.)
-supported by the kernel. When an application calls the kernel to
-perform an operation on a file, the kernel passes the request on to the
-appropriate filesystem implementation.
-
- For example, let's say there are two partitions on a Linux machine:
-`/', which is an EXT3 partition, and `/usr', which is a ReiserFS
-partition. Now if an application wants to open a file called, say,
-`/etc/fstab', then the kernel will internally pass the request to the
-EXT3 implementation. If on the other hand, an application wants to
-read a file called `/usr/src/linux/CREDITS', then the kernel will call
-upon the ReiserFS implementation to do the job.
-
- The "filesystem implementation" objects are analogous to GlusterFS
-translators. A GlusterFS translator implements all the filesystem
-operations. Whereas in VFS there is a two-level tree (with the kernel
-at the root and all the filesystem implementation as its children), in
-GlusterFS there exists a more elaborate tree structure.
-
- We can now define translators more precisely. A GlusterFS translator
-is a shared object (`.so') that implements every filesystem call.
-GlusterFS translators can be arranged in an arbitrary tree structure
-(subject to constraints imposed by the translators). When GlusterFS
-receives a filesystem call, it passes it on to the translator at the
-root of the translator tree. The root translator may in turn pass it on
-to any or all of its children, and so on, until the leaf nodes are
-reached. The result of a filesystem call is communicated in the reverse
-fashion, from the leaf nodes up to the root node, and then on to the
-application.
-
- So what might a translator tree look like?
-
-
- Fig 2. A sample translator tree
-
- The diagram depicts three servers and one GlusterFS client. It is
-important to note that conceptually, the translator tree spans machine
-boundaries. Thus, the client machine in the diagram, `10.0.0.1', can
-access the aggregated storage of the filesystems on the server machines
-`10.0.0.2', `10.0.0.3', and `10.0.0.4'. The translator diagram will
-make more sense once you've read the next chapter and understood the
-functions of the various translators.
-
-
-File: user-guide.info, Node: Volume specification file, Prev: Translator, Up: Concepts
-
-3.3 Volume specification file
-=============================
-
-The volume specification file describes the translator tree for both the
-server and client programs.
-
- A volume specification file is a sequence of volume definitions.
-The syntax of a volume definition is explained below:
-
- *volume* _volume-name_
- *type* _translator-name_
- *option* _option-name_ _option-value_
- ...
- *subvolumes* _subvolume1_ _subvolume2_ ...
- *end-volume*
-
- ...
-
-_volume-name_
- An identifier for the volume. This is just a human-readable name,
- and can contain any alphanumeric character. For instance,
- "storage-1", "colon-o", or "forty-two".
-
-_translator-name_
- Name of one of the available translators. Example:
- `protocol/client', `cluster/unify'.
-
-_option-name_
- Name of a valid option for the translator.
-
-_option-value_
- Value for the option. Everything following the "option" keyword to
- the end of the line is considered the value; it is up to the
- translator to parse it.
-
-_subvolume1_, _subvolume2_, ...
- Volume names of sub-volumes. The sub-volumes must already have
- been defined earlier in the file.
-
- There are a few rules you must follow when writing a volume
-specification file:
-
- * Everything following a ``#'' is considered a comment and is
- ignored. Blank lines are also ignored.
-
- * All names and keywords are case-sensitive.
-
- * The order of options inside a volume definition does not matter.
-
- * An option value may not span multiple lines.
-
- * If an option is not specified, it will assume its default value.
-
- * A sub-volume must have already been defined before it can be
- referenced. This means you have to write the specification file
- "bottom-up", starting from the leaf nodes of the translator tree
- and moving up to the root.
-
- A simple example volume specification file is shown below:
-
- # This is a comment line
- volume client
- type protocol/client
- option transport-type tcp
- option remote-host localhost # Also a comment
- option remote-subvolume brick
- # The subvolumes line may be absent
- end-volume
-
- volume iot
- type performance/io-threads
- option thread-count 4
- subvolumes client
- end-volume
-
- volume wb
- type performance/write-behind
- subvolumes iot
- end-volume
-
-
-File: user-guide.info, Node: Translators, Next: Usage Scenarios, Prev: Concepts, Up: Top
-
-4 Translators
-*************
-
-* Menu:
-
-* Storage Translators::
-* Client and Server Translators::
-* Clustering Translators::
-* Performance Translators::
-* Features Translators::
-* Miscellaneous Translators::
-
- This chapter documents all the available GlusterFS translators in
-detail. Each translator section will show its name (for example,
-`cluster/unify'), briefly describe its purpose and workings, and list
-every option accepted by that translator and their meaning.
-
-
-File: user-guide.info, Node: Storage Translators, Next: Client and Server Translators, Up: Translators
-
-4.1 Storage Translators
-=======================
-
-The storage translators form the "backend" for GlusterFS. Currently,
-the only available storage translator is the POSIX translator, which
-stores files on a normal POSIX filesystem. A pleasant consequence of
-this is that your data will still be accessible if GlusterFS crashes or
-cannot be started.
-
- Other storage backends are planned for the future. One of the
-possibilities is an Amazon S3 translator. Amazon S3 is an unlimited
-online storage service accessible through a web services API. The S3
-translator will allow you to access the storage as a normal POSIX
-filesystem. (1)
-
-* Menu:
-
-* POSIX::
-* BDB::
-
- ---------- Footnotes ----------
-
- (1) Some more discussion about this can be found at:
-
-http://developer.amazonwebservices.com/connect/message.jspa?messageID=52873
-
-
-File: user-guide.info, Node: POSIX, Next: BDB, Up: Storage Translators
-
-4.1.1 POSIX
------------
-
- type storage/posix
-
- The `posix' translator uses a normal POSIX filesystem as its
-"backend" to actually store files and directories. This can be any
-filesystem that supports extended attributes (EXT3, ReiserFS, XFS,
-...). Extended attributes are used by some translators to store
-metadata, for example, by the replicate and stripe translators. See
-*note Replicate:: and *note Stripe::, respectively for details.
-
-`directory <path>'
- The directory on the local filesystem which is to be used for
- storage.
-
-
-File: user-guide.info, Node: BDB, Prev: POSIX, Up: Storage Translators
-
-4.1.2 BDB
----------
-
- type storage/bdb
-
- The `BDB' translator uses a Berkeley DB database as its "backend" to
-actually store files as key-value pair in the database and directories
-as regular POSIX directories. Note that BDB does not provide extended
-attribute support for regular files. Do not use BDB as storage
-translator while using any translator that demands extended attributes
-on "backend".
-
-`directory <path>'
- The directory on the local filesystem which is to be used for
- storage.
-
-`mode [cache|persistent] (cache)'
- When BDB is run in `cache' mode, recovery of back-end is not
- completely guaranteed. `persistent' guarantees that BDB can
- recover back-end from Berkeley DB even if GlusterFS crashes.
-
-`errfile <path>'
- The path of the file to be used as `errfile' for Berkeley DB to
- report detailed error messages, if any. Note that all the contents
- of this file will be written by Berkeley DB, not GlusterFS.
-
-`logdir <path>'
-
-
-File: user-guide.info, Node: Client and Server Translators, Next: Clustering Translators, Prev: Storage Translators, Up: Translators
-
-4.2 Client and Server Translators
-=================================
-
-The client and server translator enable GlusterFS to export a
-translator tree over the network or access a remote GlusterFS server.
-These two translators implement GlusterFS's network protocol.
-
-* Menu:
-
-* Transport modules::
-* Client protocol::
-* Server protocol::
-
-
-File: user-guide.info, Node: Transport modules, Next: Client protocol, Up: Client and Server Translators
-
-4.2.1 Transport modules
------------------------
-
-The client and server translators are capable of using any of the
-pluggable transport modules. Currently available transport modules are
-`tcp', which uses a TCP connection between client and server to
-communicate; `ib-sdp', which uses a TCP connection over InfiniBand, and
-`ibverbs', which uses high-speed InfiniBand connections.
-
- Each transport module comes in two different versions, one to be
-used on the server side and the other on the client side.
-
-4.2.1.1 TCP
-...........
-
-The TCP transport module uses a TCP/IP connection between the server
-and the client.
-
- option transport-type tcp
-
- The TCP client module accepts the following options:
-
-`non-blocking-connect [no|off|on|yes] (on)'
- Whether to make the connection attempt asynchronous.
-
-`remote-port <n> (24007)'
- Server port to connect to.
-
-`remote-host <hostname> *'
- Hostname or IP address of the server. If the host name resolves to
- multiple IP addresses, all of them will be tried in a round-robin
- fashion. This feature can be used to implement fail-over.
-
- The TCP server module accepts the following options:
-
-`bind-address <address> (0.0.0.0)'
- The local interface on which the server should listen to requests.
- Default is to listen on all interfaces.
-
-`listen-port <n> (24007)'
- The local port to listen on.
-
-4.2.1.2 IB-SDP
-..............
-
- option transport-type ib-sdp
-
- kernel implements socket interface for ib hardware. SDP is over
-ib-verbs. This module accepts the same options as `tcp'
-
-4.2.1.3 ibverbs
-...............
-
- option transport-type tcp
-
- InfiniBand is a scalable switched fabric interconnect mechanism
-primarily used in high-performance computing. InfiniBand can deliver
-data throughput of the order of 10 Gbit/s, with latencies of 4-5 ms.
-
- The `ib-verbs' transport accesses the InfiniBand hardware through
-the "verbs" API, which is the lowest level of software access possible
-and which gives the highest performance. On InfiniBand hardware, it is
-always best to use `ib-verbs'. Use `ib-sdp' only if you cannot get
-`ib-verbs' working for some reason.
-
- The `ib-verbs' client module accepts the following options:
-
-`non-blocking-connect [no|off|on|yes] (on)'
- Whether to make the connection attempt asynchronous.
-
-`remote-port <n> (24007)'
- Server port to connect to.
-
-`remote-host <hostname> *'
- Hostname or IP address of the server. If the host name resolves to
- multiple IP addresses, all of them will be tried in a round-robin
- fashion. This feature can be used to implement fail-over.
-
- The `ib-verbs' server module accepts the following options:
-
-`bind-address <address> (0.0.0.0)'
- The local interface on which the server should listen to requests.
- Default is to listen on all interfaces.
-
-`listen-port <n> (24007)'
- The local port to listen on.
-
- The following options are common to both the client and server
-modules:
-
- If you are familiar with InfiniBand jargon, the mode is used by
-GlusterFS is "reliable connection-oriented channel transfer".
-
-`ib-verbs-work-request-send-count <n> (64)'
- Length of the send queue in datagrams. [Reason to
- increase/decrease?]
-
-`ib-verbs-work-request-recv-count <n> (64)'
- Length of the receive queue in datagrams. [Reason to
- increase/decrease?]
-
-`ib-verbs-work-request-send-size <size> (128KB)'
- Size of each datagram that is sent. [Reason to increase/decrease?]
-
-`ib-verbs-work-request-recv-size <size> (128KB)'
- Size of each datagram that is received. [Reason to
- increase/decrease?]
-
-`ib-verbs-port <n> (1)'
- Port number for ib-verbs.
-
-`ib-verbs-mtu [256|512|1024|2048|4096] (2048)'
- The Maximum Transmission Unit [Reason to increase/decrease?]
-
-`ib-verbs-device-name <device-name> (first device in the list)'
- InfiniBand device to be used.
-
- For maximum performance, you should ensure that the send/receive
-counts on both the client and server are the same.
-
- ib-verbs is preferred over ib-sdp.
-
-
-File: user-guide.info, Node: Client protocol, Next: Server protocol, Prev: Transport modules, Up: Client and Server Translators
-
-4.2.2 Client
-------------
-
- type procotol/client
-
- The client translator enables the GlusterFS client to access a
-remote server's translator tree.
-
-`transport-type [tcp,ib-sdp,ib-verbs] (tcp)'
- The transport type to use. You should use the client versions of
- all the transport modules (`tcp', `ib-sdp', `ib-verbs').
-
-`remote-subvolume <volume_name> *'
- The name of the volume on the remote host to attach to. Note that
- this is _not_ the name of the `protocol/server' volume on the
- server. It should be any volume under the server.
-
-`transport-timeout <n> (120- seconds)'
- Inactivity timeout. If a reply is expected and no activity takes
- place on the connection within this time, the transport connection
- will be broken, and a new connection will be attempted.
-
-
-File: user-guide.info, Node: Server protocol, Prev: Client protocol, Up: Client and Server Translators
-
-4.2.3 Server
-------------
-
- type protocol/server
-
- The server translator exports a translator tree and makes it
-accessible to remote GlusterFS clients.
-
-`client-volume-filename <path> (<CONFDIR>/glusterfs-client.vol)'
- The volume specification file to use for the client. This is the
- file the client will receive when it is invoked with the
- `--server' option (*note Client::).
-
-`transport-type [tcp,ib-verbs,ib-sdp] (tcp)'
- The transport to use. You should use the server versions of all
- the transport modules (`tcp', `ib-sdp', `ib-verbs').
-
-`auth.addr.<volume name>.allow <IP address wildcard pattern>'
- IP addresses of the clients that are allowed to attach to the
- specified volume. This can be a wildcard. For example, a wildcard
- of the form `192.168.*.*' allows any host in the `192.168.x.x'
- subnet to connect to the server.
-
-
-
-File: user-guide.info, Node: Clustering Translators, Next: Performance Translators, Prev: Client and Server Translators, Up: Translators
-
-4.3 Clustering Translators
-==========================
-
-The clustering translators are the most important GlusterFS
-translators, since it is these that make GlusterFS a cluster
-filesystem. These translators together enable GlusterFS to access an
-arbitrarily large amount of storage, and provide RAID-like redundancy
-and distribution over the entire cluster.
-
- There are three clustering translators: *unify*, *replicate*, and
-*stripe*. The unify translator aggregates storage from many server
-nodes. The replicate translator provides file replication. The stripe
-translator allows a file to be spread across many server nodes. The
-following sections look at each of these translators in detail.
-
-* Menu:
-
-* Unify::
-* Replicate::
-* Stripe::
-
-
-File: user-guide.info, Node: Unify, Next: Replicate, Up: Clustering Translators
-
-4.3.1 Unify
------------
-
- type cluster/unify
-
- The unify translator presents a `unified' view of all its
-sub-volumes. That is, it makes the union of all its sub-volumes appear
-as a single volume. It is the unify translator that gives GlusterFS the
-ability to access an arbitrarily large amount of storage.
-
- For unify to work correctly, certain invariants need to be
-maintained across the entire network. These are:
-
- * The directory structure of all the sub-volumes must be identical.
-
- * A particular file can exist on only one of the sub-volumes.
- Phrasing it in another way, a pathname such as
- `/home/calvin/homework.txt') is unique across the entire cluster.
-
-
-
-Looking at the second requirement, you might wonder how one can
-accomplish storing redundant copies of a file, if no file can exist
-multiple times. To answer, we must remember that these invariants are
-from _unify's perspective_. A translator such as replicate at a lower
-level in the translator tree than unify may subvert this picture.
-
- The first invariant might seem quite tedious to ensure. We shall see
-later that this is not so, since unify's _self-heal_ mechanism takes
-care of maintaining it.
-
- The second invariant implies that unify needs some way to decide
-which file goes where. Unify makes use of _scheduler_ modules for this
-purpose.
-
- When a file needs to be created, unify's scheduler decides upon the
-sub-volume to be used to store the file. There are many schedulers
-available, each using a different algorithm and suitable for different
-purposes.
-
- The various schedulers are described in detail in the sections that
-follow.
-
-4.3.1.1 ALU
-...........
-
- option scheduler alu
-
- ALU stands for "Adaptive Least Usage". It is the most advanced
-scheduler available in GlusterFS. It balances the load across volumes
-taking several factors in account. It adapts itself to changing I/O
-patterns according to its configuration. When properly configured, it
-can eliminate the need for regular tuning of the filesystem to keep
-volume load nicely balanced.
-
- The ALU scheduler is composed of multiple least-usage
-sub-schedulers. Each sub-scheduler keeps track of a certain type of
-load, for each of the sub-volumes, getting statistics from the
-sub-volumes themselves. The sub-schedulers are these:
-
- * disk-usage: The used and free disk space on the volume.
-
- * read-usage: The amount of reading done from this volume.
-
- * write-usage: The amount of writing done to this volume.
-
- * open-files-usage: The number of files currently open from this
- volume.
-
- * disk-speed-usage: The speed at which the disks are spinning. This
- is a constant value and therefore not very useful.
-
- The ALU scheduler needs to know which of these sub-schedulers to use,
-and in which order to evaluate them. This is done through the `option
-alu.order' configuration directive.
-
- Each sub-scheduler needs to know two things: when to kick in (the
-entry-threshold), and how long to stay in control (the exit-threshold).
-For example: when unifying three disks of 100GB, keeping an exact
-balance of disk-usage is not necesary. Instead, there could be a 1GB
-margin, which can be used to nicely balance other factors, such as
-read-usage. The disk-usage scheduler can be told to kick in only when a
-certain threshold of discrepancy is passed, such as 1GB. When it
-assumes control under this condition, it will write all subsequent data
-to the least-used volume. If it is doing so, it is unwise to stop right
-after the values are below the entry-threshold again, since that would
-make it very likely that the situation will occur again very soon. Such
-a situation would cause the ALU to spend most of its time disk-usage
-scheduling, which is unfair to the other sub-schedulers. The
-exit-threshold therefore defines the amount of data that needs to be
-written to the least-used disk, before control is relinquished again.
-
- In addition to the sub-schedulers, the ALU scheduler also has
-"limits" options. These can stop the creation of new files on a volume
-once values drop below a certain threshold. For example, setting
-`option alu.limits.min-free-disk 5GB' will stop the scheduling of files
-to volumes that have less than 5GB of free disk space, leaving the
-files on that disk some room to grow.
-
- The actual values you assign to the thresholds for sub-schedulers and
-limits depend on your situation. If you have fast-growing files, you'll
-want to stop file-creation on a disk much earlier than when hardly any
-of your files are growing. If you care less about disk-usage balance
-than about read-usage balance, you'll want a bigger disk-usage
-scheduler entry-threshold and a smaller read-usage scheduler
-entry-threshold.
-
- For thresholds defining a size, values specifying "KB", "MB" and "GB"
-are allowed. For example: `option alu.limits.min-free-disk 5GB'.
-
-`alu.order <order> * ("disk-usage:write-usage:read-usage:open-files-usage:disk-speed")'
-
-`alu.disk-usage.entry-threshold <size> (1GB)'
-
-`alu.disk-usage.exit-threshold <size> (512MB)'
-
-`alu.write-usage.entry-threshold <%> (25)'
-
-`alu.write-usage.exit-threshold <%> (5)'
-
-`alu.read-usage.entry-threshold <%> (25)'
-
-`alu.read-usage.exit-threshold <%> (5)'
-
-`alu.open-files-usage.entry-threshold <n> (1000)'
-
-`alu.open-files-usage.exit-threshold <n> (100)'
-
-`alu.limits.min-free-disk <%>'
-
-`alu.limits.max-open-files <n>'
-
-4.3.1.2 Round Robin (RR)
-........................
-
- option scheduler rr
-
- Round-Robin (RR) scheduler creates files in a round-robin fashion.
-Each client will have its own round-robin loop. When your files are
-mostly similar in size and I/O access pattern, this scheduler is a good
-choice. RR scheduler checks for free disk space on the server before
-scheduling, so you can know when to add another server node. The
-default value of min-free-disk is 5% and is checked on file creation
-calls, with atleast 10 seconds (by default) elapsing between two checks.
-
- Options:
-`rr.limits.min-free-disk <%> (5)'
- Minimum free disk space a node must have for RR to schedule a file
- to it.
-
-`rr.refresh-interval <t> (10 seconds)'
- Time between two successive free disk space checks.
-
-4.3.1.3 Random
-..............
-
- option scheduler random
-
- The random scheduler schedules file creation randomly among its
-child nodes. Like the round-robin scheduler, it also checks for a
-minimum amount of free disk space before scheduling a file to a node.
-
-`random.limits.min-free-disk <%> (5)'
- Minimum free disk space a node must have for random to schedule a
- file to it.
-
-`random.refresh-interval <t> (10 seconds)'
- Time between two successive free disk space checks.
-
-4.3.1.4 NUFA
-............
-
- option scheduler nufa
-
- It is common in many GlusterFS computing environments for all
-deployed machines to act as both servers and clients. For example, a
-research lab may have 40 workstations each with its own storage. All of
-these workstations might act as servers exporting a volume as well as
-clients accessing the entire cluster's storage. In such a situation,
-it makes sense to store locally created files on the local workstation
-itself (assuming files are accessed most by the workstation that
-created them). The Non-Uniform File Allocation (NUFA) scheduler
-accomplishes that.
-
- NUFA gives the local system first priority for file creation over
-other nodes. If the local volume does not have more free disk space
-than a specified amount (5% by default) then NUFA schedules files among
-the other child volumes in a round-robin fashion.
-
- NUFA is named after the similar strategy used for memory access,
-NUMA(1).
-
-`nufa.limits.min-free-disk <%> (5)'
- Minimum disk space that must be free (local or remote) for NUFA to
- schedule a file to it.
-
-`nufa.refresh-interval <t> (10 seconds)'
- Time between two successive free disk space checks.
-
-`nufa.local-volume-name <volume>'
- The name of the volume corresponding to the local system. This
- volume must be one of the children of the unify volume. This
- option is mandatory.
-
-4.3.1.5 Namespace
-.................
-
-Namespace volume needed because: - persistent inode numbers. - file
-exists even when node is down.
-
- namespace files are simply touched. on every lookup it is checked.
-
-`namespace <volume> *'
- Name of the namespace volume (which should be one of the unify
- volume's children).
-
-`self-heal [on|off] (on)'
- Enable/disable self-heal. Unless you know what you are doing, do
- not disable self-heal.
-
-4.3.1.6 Self Heal
-.................
-
-* When a 'lookup()/stat()' call is made on directory for the first
-time, a self-heal call is made, which checks for the consistancy of its
-child nodes. If an entry is present in storage node, but not in
-namespace, that entry is created in namespace, and vica-versa. There is
-an writedir() API introduced which is used for the same. It also checks
-for permissions, and uid/gid consistencies.
-
- * This check is also done when an server goes down and comes up.
-
- * If one starts with an empty namespace export, but has data in
-storage nodes, a 'find .>/dev/null' or 'ls -lR >/dev/null' should help
-to build namespace in one shot. Even otherwise, namespace is built on
-demand when a file is looked up for the first time.
-
- NOTE: There are some issues (Kernel 'Oops' msgs) seen with
-fuse-2.6.3, when someone deletes namespace in backend, when glusterfs is
-running. But with fuse-2.6.5, this issue is not there.
-
- ---------- Footnotes ----------
-
- (1) Non-Uniform Memory Access:
-<http://en.wikipedia.org/wiki/Non-Uniform_Memory_Access>
-
-
-File: user-guide.info, Node: Replicate, Next: Stripe, Prev: Unify, Up: Clustering Translators
-
-4.3.2 Replicate (formerly AFR)
-------------------------------
-
- type cluster/replicate
-
- Replicate provides RAID-1 like functionality for GlusterFS.
-Replicate replicates files and directories across the subvolumes. Hence
-if Replicate has four subvolumes, there will be four copies of all
-files and directories. Replicate provides high-availability, i.e., in
-case one of the subvolumes go down (e. g. server crash, network
-disconnection) Replicate will still service the requests using the
-redundant copies.
-
- Replicate also provides self-heal functionality, i.e., in case the
-crashed servers come up, the outdated files and directories will be
-updated with the latest versions. Replicate uses extended attributes of
-the backend file system to track the versioning of files and
-directories and provide the self-heal feature.
-
- volume replicate-example
- type cluster/replicate
- subvolumes brick1 brick2 brick3
- end-volume
-
- This sample configuration will replicate all directories and files on
-brick1, brick2 and brick3.
-
- All the read operations happen from the first alive child. If all the
-three sub-volumes are up, reads will be done from brick1; if brick1 is
-down read will be done from brick2. In case read() was being done on
-brick1 and it goes down, replicate transparently falls back to brick2.
-
- The next release of GlusterFS will add the following features:
- * Ability to specify the sub-volume from which read operations are
- to be done (this will help users who have one of the sub-volumes
- as a local storage volume).
-
- * Allow scheduling of read operations amongst the sub-volumes in a
- round-robin fashion.
-
- The order of the subvolumes list should be same across all the
-'replicate's as they will be used for locking purposes.
-
-4.3.2.1 Self Heal
-.................
-
-Replicate has self-heal feature, which updates the outdated file and
-directory copies by the most recent versions. For example consider the
-following config:
-
- volume replicate-example
- type cluster/replicate
- subvolumes brick1 brick2
- end-volume
-
-4.3.2.2 File self-heal
-......................
-
-Now if we create a file foo.txt on replicate-example, the file will be
-created on brick1 and brick2. The file will have two extended
-attributes associated with it in the backend filesystem. One is
-trusted.afr.createtime and the other is trusted.afr.version. The
-trusted.afr.createtime xattr has the create time (in terms of seconds
-since epoch) and trusted.afr.version is a number that is incremented
-each time a file is modified. This increment happens during close
-(incase any write was done before close).
-
- If brick1 goes down, we edit foo.txt the version gets incremented.
-Now the brick1 comes back up, when we open() on foo.txt replicate will
-check if their versions are same. If they are not same, the outdated
-copy is replaced by the latest copy and its version is updated. After
-the sync the open() proceeds in the usual manner and the application
-calling open() can continue on its access to the file.
-
- If brick1 goes down, we delete foo.txt and create a file with the
-same name again i.e foo.txt. Now brick1 comes back up, clearly there is
-a chance that the version on brick1 being more than the version on
-brick2, this is where createtime extended attribute helps in deciding
-which the outdated copy is. Hence we need to consider both createtime
-and version to decide on the latest copy.
-
- The version attribute is incremented during the close() call. Version
-will not be incremented in case there was no write() done. In case the
-fd that the close() gets was got by create() call, we also create the
-createtime extended attribute.
-
-4.3.2.3 Directory self-heal
-...........................
-
-Suppose brick1 goes down, we delete foo.txt, brick1 comes back up, now
-we should not create foo.txt on brick2 but we should delete foo.txt on
-brick1. We handle this situation by having the createtime and version
-attribute on the directory similar to the file. when lookup() is done
-on the directory, we compare the createtime/version attributes of the
-copies and see which files needs to be deleted and delete those files
-and update the extended attributes of the outdated directory copy.
-Each time a directory is modified (a file or a subdirectory is created
-or deleted inside the directory) and one of the subvols is down, we
-increment the directory's version.
-
- lookup() is a call initiated by the kernel on a file or directory
-just before any access to that file or directory. In glusterfs, by
-default, lookup() will not be called in case it was called in the past
-one second on that particular file or directory.
-
- The extended attributes can be seen in the backend filesystem using
-the `getfattr' command. (`getfattr -n trusted.afr.version <file>')
-
-`debug [on|off] (off)'
-
-`self-heal [on|off] (on)'
-
-`replicate <pattern> (*:1)'
-
-`lock-node <child_volume> (first child is used by default)'
-
-
-File: user-guide.info, Node: Stripe, Prev: Replicate, Up: Clustering Translators
-
-4.3.3 Stripe
-------------
-
- type cluster/stripe
-
- The stripe translator distributes the contents of a file over its
-sub-volumes. It does this by creating a file equal in size to the
-total size of the file on each of its sub-volumes. It then writes only
-a part of the file to each sub-volume, leaving the rest of it empty.
-These empty regions are called `holes' in Unix terminology. The holes
-do not consume any disk space.
-
- The diagram below makes this clear.
-
-
-
-You can configure stripe so that only filenames matching a pattern are
-striped. You can also configure the size of the data to be stored on
-each sub-volume.
-
-`block-size <pattern>:<size> (*:0 no striping)'
- Distribute files matching `<pattern>' over the sub-volumes,
- storing at least `<size>' on each sub-volume. For example,
-
- option block-size *.mpg:1M
-
- distributes all files ending in `.mpg', storing at least 1 MB on
- each sub-volume.
-
- Any number of `block-size' option lines may be present, specifying
- different sizes for different file name patterns.
-
-
-File: user-guide.info, Node: Performance Translators, Next: Features Translators, Prev: Clustering Translators, Up: Translators
-
-4.4 Performance Translators
-===========================
-
-* Menu:
-
-* Read Ahead::
-* Write Behind::
-* IO Threads::
-* IO Cache::
-* Booster::
-
-
-File: user-guide.info, Node: Read Ahead, Next: Write Behind, Up: Performance Translators
-
-4.4.1 Read Ahead
-----------------
-
- type performance/read-ahead
-
- The read-ahead translator pre-fetches data in advance on every read.
-This benefits applications that mostly process files in sequential
-order, since the next block of data will already be available by the
-time the application is done with the current one.
-
- Additionally, the read-ahead translator also behaves as a
-read-aggregator. Many small read operations are combined and issued as
-fewer, larger read requests to the server.
-
- Read-ahead deals in "pages" as the unit of data fetched. The page
-size is configurable, as is the "page count", which is the number of
-pages that are pre-fetched.
-
- Read-ahead is best used with InfiniBand (using the ib-verbs
-transport). On FastEthernet and Gigabit Ethernet networks, GlusterFS
-can achieve the link-maximum throughput even without read-ahead, making
-it quite superflous.
-
- Note that read-ahead only happens if the reads are perfectly
-sequential. If your application accesses data in a random fashion,
-using read-ahead might actually lead to a performance loss, since
-read-ahead will pointlessly fetch pages which won't be used by the
-application.
-
- Options:
-`page-size <n> (256KB)'
- The unit of data that is pre-fetched.
-
-`page-count <n> (2)'
- The number of pages that are pre-fetched.
-
-`force-atime-update [on|off|yes|no] (off|no)'
- Whether to force an access time (atime) update on the file on
- every read. Without this, the atime will be slightly imprecise, as
- it will reflect the time when the read-ahead translator read the
- data, not when the application actually read it.
-
-
-File: user-guide.info, Node: Write Behind, Next: IO Threads, Prev: Read Ahead, Up: Performance Translators
-
-4.4.2 Write Behind
-------------------
-
- type performance/write-behind
-
- The write-behind translator improves the latency of a write
-operation. It does this by relegating the write operation to the
-background and returning to the application even as the write is in
-progress. Using the write-behind translator, successive write requests
-can be pipelined. This mode of write-behind operation is best used on
-the client side, to enable decreased write latency for the application.
-
- The write-behind translator can also aggregate write requests. If the
-`aggregate-size' option is specified, then successive writes upto that
-size are accumulated and written in a single operation. This mode of
-operation is best used on the server side, as this will decrease the
-disk's head movement when multiple files are being written to in
-parallel.
-
- The `aggregate-size' option has a default value of 128KB. Although
-this works well for most users, you should always experiment with
-different values to determine the one that will deliver maximum
-performance. This is because the performance of write-behind depends on
-your interconnect, size of RAM, and the work load.
-
-`aggregate-size <n> (128KB)'
- Amount of data to accumulate before doing a write
-
-`flush-behind [on|yes|off|no] (off|no)'
-
-
-File: user-guide.info, Node: IO Threads, Next: IO Cache, Prev: Write Behind, Up: Performance Translators
-
-4.4.3 IO Threads
-----------------
-
- type performance/io-threads
-
- The IO threads translator is intended to increase the responsiveness
-of the server to metadata operations by doing file I/O (read, write) in
-a background thread. Since the GlusterFS server is single-threaded,
-using the IO threads translator can significantly improve performance.
-This translator is best used on the server side, loaded just below the
-server protocol translator.
-
- IO threads operates by handing out read and write requests to a
-separate thread. The total number of threads in existence at a time is
-constant, and configurable.
-
-`thread-count <n> (1)'
- Number of threads to use.
-
-
-File: user-guide.info, Node: IO Cache, Next: Booster, Prev: IO Threads, Up: Performance Translators
-
-4.4.4 IO Cache
---------------
-
- type performance/io-cache
-
- The IO cache translator caches data that has been read. This is
-useful if many applications read the same data multiple times, and if
-reads are much more frequent than writes (for example, IO caching may be
-useful in a web hosting environment, where most clients will simply
-read some files and only a few will write to them).
-
- The IO cache translator reads data from its child in `page-size'
-chunks. It caches data upto `cache-size' bytes. The cache is
-maintained as a prioritized least-recently-used (LRU) list, with
-priorities determined by user-specified patterns to match filenames.
-
- When the IO cache translator detects a write operation, the cache
-for that file is flushed.
-
- The IO cache translator periodically verifies the consistency of
-cached data, using the modification times on the files. The
-verification timeout is configurable.
-
-`page-size <n> (128KB)'
- Size of a page.
-
-`cache-size (n) (32MB)'
- Total amount of data to be cached.
-
-`force-revalidate-timeout <n> (1)'
- Timeout to force a cache consistency verification, in seconds.
-
-`priority <pattern> (*:0)'
- Filename patterns listed in order of priority.
-
-
-File: user-guide.info, Node: Booster, Prev: IO Cache, Up: Performance Translators
-
-4.4.5 Booster
--------------
-
- type performance/booster
-
- The booster translator gives applications a faster path to
-communicate read and write requests to GlusterFS. Normally, all
-requests to GlusterFS from applications go through FUSE, as indicated
-in *note Filesystems in Userspace::. Using the booster translator in
-conjunction with the GlusterFS booster shared library, an application
-can bypass the FUSE path and send read/write requests directly to the
-GlusterFS client process.
-
- The booster mechanism consists of two parts: the booster translator,
-and the booster shared library. The booster translator is meant to be
-loaded on the client side, usually at the root of the translator tree.
-The booster shared library should be `LD_PRELOAD'ed with the
-application.
-
- The booster translator when loaded opens a Unix domain socket and
-listens for read/write requests on it. The booster shared library
-intercepts read and write system calls and sends the requests to the
-GlusterFS process directly using the Unix domain socket, bypassing FUSE.
-This leads to superior performance.
-
- Once you've loaded the booster translator in your volume
-specification file, you can start your application as:
-
- $ LD_PRELOAD=/usr/local/bin/glusterfs-booster.so your_app
-
- The booster translator accepts no options.
-
-
-File: user-guide.info, Node: Features Translators, Next: Miscellaneous Translators, Prev: Performance Translators, Up: Translators
-
-4.5 Features Translators
-========================
-
-* Menu:
-
-* POSIX Locks::
-* Fixed ID::
-
-
-File: user-guide.info, Node: POSIX Locks, Next: Fixed ID, Up: Features Translators
-
-4.5.1 POSIX Locks
------------------
-
- type features/posix-locks
-
- This translator provides storage independent POSIX record locking
-support (`fcntl' locking). Typically you'll want to load this on the
-server side, just above the POSIX storage translator. Using this
-translator you can get both advisory locking and mandatory locking
-support. It also handles `flock()' locks properly.
-
- Caveat: Consider a file that does not have its mandatory locking bits
-(+setgid, -group execution) turned on. Assume that this file is now
-opened by a process on a client that has the write-behind xlator
-loaded. The write-behind xlator does not cache anything for files which
-have mandatory locking enabled, to avoid incoherence. Let's say that
-mandatory locking is now enabled on this file through another client.
-The former client will not know about this change, and write-behind may
-erroneously report a write as being successful when in fact it would
-fail due to the region it is writing to being locked.
-
- There seems to be no easy way to fix this. To work around this
-problem, it is recommended that you never enable the mandatory bits on
-a file while it is open.
-
-`mandatory [on|off] (on)'
- Turns mandatory locking on.
-
-
-File: user-guide.info, Node: Fixed ID, Prev: POSIX Locks, Up: Features Translators
-
-4.5.2 Fixed ID
---------------
-
- type features/fixed-id
-
- The fixed ID translator makes all filesystem requests from the client
-to appear to be coming from a fixed, specified UID/GID, regardless of
-which user actually initiated the request.
-
-`fixed-uid <n> [if not set, not used]'
- The UID to send to the server
-
-`fixed-gid <n> [if not set, not used]'
- The GID to send to the server
-
-
-File: user-guide.info, Node: Miscellaneous Translators, Prev: Features Translators, Up: Translators
-
-4.6 Miscellaneous Translators
-=============================
-
-* Menu:
-
-* ROT-13::
-* Trace::
-
-
-File: user-guide.info, Node: ROT-13, Next: Trace, Up: Miscellaneous Translators
-
-4.6.1 ROT-13
-------------
-
- type encryption/rot-13
-
- ROT-13 is a toy translator that can "encrypt" and "decrypt" file
-contents using the ROT-13 algorithm. ROT-13 is a trivial algorithm that
-rotates each alphabet by thirteen places. Thus, 'A' becomes 'N', 'B'
-becomes 'O', and 'Z' becomes 'M'.
-
- It goes without saying that you shouldn't use this translator if you
-need _real_ encryption (a future release of GlusterFS will have real
-encryption translators).
-
-`encrypt-write [on|off] (on)'
- Whether to encrypt on write
-
-`decrypt-read [on|off] (on)'
- Whether to decrypt on read
-
-
-File: user-guide.info, Node: Trace, Prev: ROT-13, Up: Miscellaneous Translators
-
-4.6.2 Trace
------------
-
- type debug/trace
-
- The trace translator is intended for debugging purposes. When
-loaded, it logs all the system calls received by the server or client
-(wherever trace is loaded), their arguments, and the results. You must
-use a GlusterFS log level of DEBUG (See *note Running GlusterFS::) for
-trace to work.
-
- Sample trace output (lines have been wrapped for readability):
- 2007-10-30 00:08:58 D [trace.c:1579:trace_opendir] trace: callid: 68
- (*this=0x8059e40, loc=0x8091984 {path=/iozone3_283, inode=0x8091f00},
- fd=0x8091d50)
-
- 2007-10-30 00:08:58 D [trace.c:630:trace_opendir_cbk] trace:
- (*this=0x8059e40, op_ret=4, op_errno=1, fd=0x8091d50)
-
- 2007-10-30 00:08:58 D [trace.c:1602:trace_readdir] trace: callid: 69
- (*this=0x8059e40, size=4096, offset=0 fd=0x8091d50)
-
- 2007-10-30 00:08:58 D [trace.c:215:trace_readdir_cbk] trace:
- (*this=0x8059e40, op_ret=0, op_errno=0, count=4)
-
- 2007-10-30 00:08:58 D [trace.c:1624:trace_closedir] trace: callid: 71
- (*this=0x8059e40, *fd=0x8091d50)
-
- 2007-10-30 00:08:58 D [trace.c:809:trace_closedir_cbk] trace:
- (*this=0x8059e40, op_ret=0, op_errno=1)
-
-
-File: user-guide.info, Node: Usage Scenarios, Next: Troubleshooting, Prev: Translators, Up: Top
-
-5 Usage Scenarios
-*****************
-
-5.1 Advanced Striping
-=====================
-
-This section is based on the Advanced Striping tutorial written by
-Anand Avati on the GlusterFS wiki (1).
-
-5.1.1 Mixed Storage Requirements
---------------------------------
-
-There are two ways of scheduling the I/O. One at file level (using
-unify translator) and other at block level (using stripe translator).
-Striped I/O is good for files that are potentially large and require
-high parallel throughput (for example, a single file of 400GB being
-accessed by 100s and 1000s of systems simultaneously and randomly). For
-most of the cases, file level scheduling works best.
-
- In the real world, it is desirable to mix file level and block level
-scheduling on a single storage volume. Alternatively users can choose
-to have two separate volumes and hence two mount points, but the
-applications may demand a single storage system to host both.
-
- This document explains how to mix file level scheduling with stripe.
-
-5.1.2 Configuration Brief
--------------------------
-
-This setup demonstrates how users can configure unify translator with
-appropriate I/O scheduler for file level scheduling and strip for only
-matching patterns. This way, GlusterFS chooses appropriate I/O profile
-and knows how to efficiently handle both the types of data.
-
- A simple technique to achieve this effect is to create a stripe set
-of unify and stripe blocks, where unify is the first sub-volume. Files
-that do not match the stripe policy passed on to first unify sub-volume
-and inturn scheduled arcoss the cluster using its file level I/O
-scheduler.
-
- 5.1.3 Preparing GlusterFS Envoronment
--------------------------------------
-
-Create the directories /export/namespace, /export/unify and
-/export/stripe on all the storage bricks.
-
- Place the following server and client volume spec file under
-/etc/glusterfs (or appropriate installed path) and replace the IP
-addresses / access control fields to match your environment.
-
- ## file: /etc/glusterfs/glusterfsd.vol
- volume posix-unify
- type storage/posix
- option directory /export/for-unify
- end-volume
-
- volume posix-stripe
- type storage/posix
- option directory /export/for-stripe
- end-volume
-
- volume posix-namespace
- type storage/posix
- option directory /export/for-namespace
- end-volume
-
- volume server
- type protocol/server
- option transport-type tcp
- option auth.addr.posix-unify.allow 192.168.1.*
- option auth.addr.posix-stripe.allow 192.168.1.*
- option auth.addr.posix-namespace.allow 192.168.1.*
- subvolumes posix-unify posix-stripe posix-namespace
- end-volume
-
- ## file: /etc/glusterfs/glusterfs.vol
- volume client-namespace
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.1
- option remote-subvolume posix-namespace
- end-volume
-
- volume client-unify-1
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.1
- option remote-subvolume posix-unify
- end-volume
-
- volume client-unify-2
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.2
- option remote-subvolume posix-unify
- end-volume
-
- volume client-unify-3
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.3
- option remote-subvolume posix-unify
- end-volume
-
- volume client-unify-4
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.4
- option remote-subvolume posix-unify
- end-volume
-
- volume client-stripe-1
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.1
- option remote-subvolume posix-stripe
- end-volume
-
- volume client-stripe-2
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.2
- option remote-subvolume posix-stripe
- end-volume
-
- volume client-stripe-3
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.3
- option remote-subvolume posix-stripe
- end-volume
-
- volume client-stripe-4
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.4
- option remote-subvolume posix-stripe
- end-volume
-
- volume unify
- type cluster/unify
- option scheduler rr
- subvolumes cluster-unify-1 cluster-unify-2 cluster-unify-3 cluster-unify-4
- end-volume
-
- volume stripe
- type cluster/stripe
- option block-size *.img:2MB # All files ending with .img are striped with 2MB stripe block size.
- subvolumes unify cluster-stripe-1 cluster-stripe-2 cluster-stripe-3 cluster-stripe-4
- end-volume
-
- Bring up the Storage
-
- Starting GlusterFS Server: If you have installed through binary
-package, you can start the service through init.d startup script. If
-not:
-
- [root@server]# glusterfsd
-
- Mounting GlusterFS Volumes:
-
- [root@client]# glusterfs -s [BRICK-IP-ADDRESS] /mnt/cluster
-
- Improving upon this Setup
-
- Infiniband Verbs RDMA transport is much faster than TCP/IP GigE
-transport.
-
- Use of performance translators such as read-ahead, write-behind,
-io-cache, io-threads, booster is recommended.
-
- Replace round-robin (rr) scheduler with ALU to handle more dynamic
-storage environments.
-
- ---------- Footnotes ----------
-
- (1)
-http://gluster.org/docs/index.php/Mixing_Striped_and_Regular_Files
-
-
-File: user-guide.info, Node: Troubleshooting, Next: GNU Free Documentation Licence, Prev: Usage Scenarios, Up: Top
-
-6 Troubleshooting
-*****************
-
-This chapter is a general troubleshooting guide to GlusterFS. It lists
-common GlusterFS server and client error messages, debugging hints, and
-concludes with the suggested procedure to report bugs in GlusterFS.
-
-6.1 GlusterFS error messages
-============================
-
-6.1.1 Server errors
--------------------
-
- glusterfsd: FATAL: could not open specfile:
- '/etc/glusterfs/glusterfsd.vol'
-
- The GlusterFS server expects the volume specification file to be at
-`/etc/glusterfs/glusterfsd.vol'. The example specification file will be
-installed as `/etc/glusterfs/glusterfsd.vol.sample'. You need to edit
-it and rename it, or provide a different specification file using the
-`--spec-file' command line option (See *note Server::).
-
- gf_log_init: failed to open logfile "/usr/var/log/glusterfs/glusterfsd.log"
- (Permission denied)
-
- You don't have permission to create files in the
-`/usr/var/log/glusterfs' directory. Make sure you are running GlusterFS
-as root. Alternatively, specify a different path for the log file using
-the `--log-file' option (See *note Server::).
-
-6.1.2 Client errors
--------------------
-
- fusermount: failed to access mountpoint /mnt:
- Transport endpoint is not connected
-
- A previous failed (or hung) mount of GlusterFS is preventing it from
-being mounted again in the same location. The fix is to do:
-
- # umount /mnt
-
- and try mounting again.
-
- *"Transport endpoint is not connected".*
-
- If you get this error when you try a command such as `ls' or `cat',
-it means the GlusterFS mount did not succeed. Try running GlusterFS in
-`DEBUG' logging level and study the log messages to discover the cause.
-
- *"Connect to server failed", "SERVER-ADDRESS: Connection refused".*
-
- GluserFS Server is not running or dead. Check your network
-connections and firewall settings. To check if the server is reachable,
-try:
-
- telnet IP-ADDRESS 24007
-
- If the server is accessible, your `telnet' command should connect and
-block. If not you will see an error message such as `telnet: Unable to
-connect to remote host: Connection refused'. 24007 is the default
-GlusterFS port. If you have changed it, then use the corresponding port
-instead.
-
- gf_log_init: failed to open logfile "/usr/var/log/glusterfs/glusterfs.log"
- (Permission denied)
-
- You don't have permission to create files in the
-`/usr/var/log/glusterfs' directory. Make sure you are running GlusterFS
-as root. Alternatively, specify a different path for the log file using
-the `--log-file' option (See *note Client::).
-
-6.2 FUSE error messages
-=======================
-
-`modprobe fuse' fails with: "Unknown symbol in module, or unknown
-parameter".
-
- If you are using fuse-2.6.x on Redhat Enterprise Linux Work Station 4
-and Advanced Server 4 with 2.6.9-42.ELlargesmp, 2.6.9-42.ELsmp,
-2.6.9-42.EL kernels and get this error while loading FUSE kernel
-module, you need to apply the following patch.
-
- For fuse-2.6.2:
-
-<http://ftp.gluster.com/pub/gluster/glusterfs/fuse/fuse-2.6.2-rhel-build.patch>
-
- For fuse-2.6.3:
-
-<http://ftp.gluster.com/pub/gluster/glusterfs/fuse/fuse-2.6.3-rhel-build.patch>
-
-6.3 AppArmour and GlusterFS
-===========================
-
-Under OpenSuSE GNU/Linux, the AppArmour security feature does not allow
-GlusterFS to create temporary files or network socket connections even
-while running as root. You will see error messages like `Unable to open
-log file: Operation not permitted' or `Connection refused'. Disabling
-AppArmour using YaST or properly configuring AppArmour to recognize
-`glusterfsd' or `glusterfs'/`fusermount' should solve the problem.
-
-6.4 Reporting a bug
-===================
-
-If you encounter a bug in GlusterFS, please follow the below guidelines
-when you report it to the mailing list. Be sure to report it! User
-feedback is crucial to the health of the project and we value it highly.
-
-6.4.1 General instructions
---------------------------
-
-When running GlusterFS in a non-production environment, be sure to
-build it with the following command:
-
- $ make CFLAGS='-g -O0 -DDEBUG'
-
- This includes debugging information which will be helpful in getting
-backtraces (see below) and also disable optimization. Enabling
-optimization can result in incorrect line numbers being reported to gdb.
-
-6.4.2 Volume specification files
---------------------------------
-
-Attach all relevant server and client spec files you were using when
-you encountered the bug. Also tell us details of your setup, i.e., how
-many clients and how many servers.
-
-6.4.3 Log files
----------------
-
-Set the loglevel of your client and server programs to DEBUG (by
-passing the -L DEBUG option) and attach the log files with your bug
-report. Obviously, if only the client is failing (for example), you
-only need to send us the client log file.
-
-6.4.4 Backtrace
----------------
-
-If GlusterFS has encountered a segmentation fault or has crashed for
-some other reason, include the backtrace with the bug report. You can
-get the backtrace using the following procedure.
-
- Run the GlusterFS client or server inside gdb.
-
- $ gdb ./glusterfs
- (gdb) set args -f client.spec -N -l/path/to/log/file -LDEBUG /mnt/point
- (gdb) run
-
- Now when the process segfaults, you can get the backtrace by typing:
-
- (gdb) bt
-
- If the GlusterFS process has crashed and dumped a core file (you can
-find this in / if running as a daemon and in the current directory
-otherwise), you can do:
-
- $ gdb /path/to/glusterfs /path/to/core.<pid>
-
- and then get the backtrace.
-
- If the GlusterFS server or client seems to be hung, then you can get
-the backtrace by attaching gdb to the process. First get the `PID' of
-the process (using ps), and then do:
-
- $ gdb ./glusterfs <pid>
-
- Press Ctrl-C to interrupt the process and then generate the
-backtrace.
-
-6.4.5 Reproducing the bug
--------------------------
-
-If the bug is reproducible, please include the steps necessary to do
-so. If the bug is not reproducible, send us the bug report anyway.
-
-6.4.6 Other information
------------------------
-
-If you think it is relevant, send us also the version of FUSE you're
-using, the kernel version, platform.
-
-
-File: user-guide.info, Node: GNU Free Documentation Licence, Next: Index, Prev: Troubleshooting, Up: Top
-
-Appendix A GNU Free Documentation Licence
-*****************************************
-
- Version 1.2, November 2002
-
- Copyright (C) 2000,2001,2002 Free Software Foundation, Inc.
- 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA
-
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
- 0. PREAMBLE
-
- The purpose of this License is to make a manual, textbook, or other
- functional and useful document "free" in the sense of freedom: to
- assure everyone the effective freedom to copy and redistribute it,
- with or without modifying it, either commercially or
- noncommercially. Secondarily, this License preserves for the
- author and publisher a way to get credit for their work, while not
- being considered responsible for modifications made by others.
-
- This License is a kind of "copyleft", which means that derivative
- works of the document must themselves be free in the same sense.
- It complements the GNU General Public License, which is a copyleft
- license designed for free software.
-
- We have designed this License in order to use it for manuals for
- free software, because free software needs free documentation: a
- free program should come with manuals providing the same freedoms
- that the software does. But this License is not limited to
- software manuals; it can be used for any textual work, regardless
- of subject matter or whether it is published as a printed book.
- We recommend this License principally for works whose purpose is
- instruction or reference.
-
- 1. APPLICABILITY AND DEFINITIONS
-
- This License applies to any manual or other work, in any medium,
- that contains a notice placed by the copyright holder saying it
- can be distributed under the terms of this License. Such a notice
- grants a world-wide, royalty-free license, unlimited in duration,
- to use that work under the conditions stated herein. The
- "Document", below, refers to any such manual or work. Any member
- of the public is a licensee, and is addressed as "you". You
- accept the license if you copy, modify or distribute the work in a
- way requiring permission under copyright law.
-
- A "Modified Version" of the Document means any work containing the
- Document or a portion of it, either copied verbatim, or with
- modifications and/or translated into another language.
-
- A "Secondary Section" is a named appendix or a front-matter section
- of the Document that deals exclusively with the relationship of the
- publishers or authors of the Document to the Document's overall
- subject (or to related matters) and contains nothing that could
- fall directly within that overall subject. (Thus, if the Document
- is in part a textbook of mathematics, a Secondary Section may not
- explain any mathematics.) The relationship could be a matter of
- historical connection with the subject or with related matters, or
- of legal, commercial, philosophical, ethical or political position
- regarding them.
-
- The "Invariant Sections" are certain Secondary Sections whose
- titles are designated, as being those of Invariant Sections, in
- the notice that says that the Document is released under this
- License. If a section does not fit the above definition of
- Secondary then it is not allowed to be designated as Invariant.
- The Document may contain zero Invariant Sections. If the Document
- does not identify any Invariant Sections then there are none.
-
- The "Cover Texts" are certain short passages of text that are
- listed, as Front-Cover Texts or Back-Cover Texts, in the notice
- that says that the Document is released under this License. A
- Front-Cover Text may be at most 5 words, and a Back-Cover Text may
- be at most 25 words.
-
- A "Transparent" copy of the Document means a machine-readable copy,
- represented in a format whose specification is available to the
- general public, that is suitable for revising the document
- straightforwardly with generic text editors or (for images
- composed of pixels) generic paint programs or (for drawings) some
- widely available drawing editor, and that is suitable for input to
- text formatters or for automatic translation to a variety of
- formats suitable for input to text formatters. A copy made in an
- otherwise Transparent file format whose markup, or absence of
- markup, has been arranged to thwart or discourage subsequent
- modification by readers is not Transparent. An image format is
- not Transparent if used for any substantial amount of text. A
- copy that is not "Transparent" is called "Opaque".
-
- Examples of suitable formats for Transparent copies include plain
- ASCII without markup, Texinfo input format, LaTeX input format,
- SGML or XML using a publicly available DTD, and
- standard-conforming simple HTML, PostScript or PDF designed for
- human modification. Examples of transparent image formats include
- PNG, XCF and JPG. Opaque formats include proprietary formats that
- can be read and edited only by proprietary word processors, SGML or
- XML for which the DTD and/or processing tools are not generally
- available, and the machine-generated HTML, PostScript or PDF
- produced by some word processors for output purposes only.
-
- The "Title Page" means, for a printed book, the title page itself,
- plus such following pages as are needed to hold, legibly, the
- material this License requires to appear in the title page. For
- works in formats which do not have any title page as such, "Title
- Page" means the text near the most prominent appearance of the
- work's title, preceding the beginning of the body of the text.
-
- A section "Entitled XYZ" means a named subunit of the Document
- whose title either is precisely XYZ or contains XYZ in parentheses
- following text that translates XYZ in another language. (Here XYZ
- stands for a specific section name mentioned below, such as
- "Acknowledgements", "Dedications", "Endorsements", or "History".)
- To "Preserve the Title" of such a section when you modify the
- Document means that it remains a section "Entitled XYZ" according
- to this definition.
-
- The Document may include Warranty Disclaimers next to the notice
- which states that this License applies to the Document. These
- Warranty Disclaimers are considered to be included by reference in
- this License, but only as regards disclaiming warranties: any other
- implication that these Warranty Disclaimers may have is void and
- has no effect on the meaning of this License.
-
- 2. VERBATIM COPYING
-
- You may copy and distribute the Document in any medium, either
- commercially or noncommercially, provided that this License, the
- copyright notices, and the license notice saying this License
- applies to the Document are reproduced in all copies, and that you
- add no other conditions whatsoever to those of this License. You
- may not use technical measures to obstruct or control the reading
- or further copying of the copies you make or distribute. However,
- you may accept compensation in exchange for copies. If you
- distribute a large enough number of copies you must also follow
- the conditions in section 3.
-
- You may also lend copies, under the same conditions stated above,
- and you may publicly display copies.
-
- 3. COPYING IN QUANTITY
-
- If you publish printed copies (or copies in media that commonly
- have printed covers) of the Document, numbering more than 100, and
- the Document's license notice requires Cover Texts, you must
- enclose the copies in covers that carry, clearly and legibly, all
- these Cover Texts: Front-Cover Texts on the front cover, and
- Back-Cover Texts on the back cover. Both covers must also clearly
- and legibly identify you as the publisher of these copies. The
- front cover must present the full title with all words of the
- title equally prominent and visible. You may add other material
- on the covers in addition. Copying with changes limited to the
- covers, as long as they preserve the title of the Document and
- satisfy these conditions, can be treated as verbatim copying in
- other respects.
-
- If the required texts for either cover are too voluminous to fit
- legibly, you should put the first ones listed (as many as fit
- reasonably) on the actual cover, and continue the rest onto
- adjacent pages.
-
- If you publish or distribute Opaque copies of the Document
- numbering more than 100, you must either include a
- machine-readable Transparent copy along with each Opaque copy, or
- state in or with each Opaque copy a computer-network location from
- which the general network-using public has access to download
- using public-standard network protocols a complete Transparent
- copy of the Document, free of added material. If you use the
- latter option, you must take reasonably prudent steps, when you
- begin distribution of Opaque copies in quantity, to ensure that
- this Transparent copy will remain thus accessible at the stated
- location until at least one year after the last time you
- distribute an Opaque copy (directly or through your agents or
- retailers) of that edition to the public.
-
- It is requested, but not required, that you contact the authors of
- the Document well before redistributing any large number of
- copies, to give them a chance to provide you with an updated
- version of the Document.
-
- 4. MODIFICATIONS
-
- You may copy and distribute a Modified Version of the Document
- under the conditions of sections 2 and 3 above, provided that you
- release the Modified Version under precisely this License, with
- the Modified Version filling the role of the Document, thus
- licensing distribution and modification of the Modified Version to
- whoever possesses a copy of it. In addition, you must do these
- things in the Modified Version:
-
- A. Use in the Title Page (and on the covers, if any) a title
- distinct from that of the Document, and from those of
- previous versions (which should, if there were any, be listed
- in the History section of the Document). You may use the
- same title as a previous version if the original publisher of
- that version gives permission.
-
- B. List on the Title Page, as authors, one or more persons or
- entities responsible for authorship of the modifications in
- the Modified Version, together with at least five of the
- principal authors of the Document (all of its principal
- authors, if it has fewer than five), unless they release you
- from this requirement.
-
- C. State on the Title page the name of the publisher of the
- Modified Version, as the publisher.
-
- D. Preserve all the copyright notices of the Document.
-
- E. Add an appropriate copyright notice for your modifications
- adjacent to the other copyright notices.
-
- F. Include, immediately after the copyright notices, a license
- notice giving the public permission to use the Modified
- Version under the terms of this License, in the form shown in
- the Addendum below.
-
- G. Preserve in that license notice the full lists of Invariant
- Sections and required Cover Texts given in the Document's
- license notice.
-
- H. Include an unaltered copy of this License.
-
- I. Preserve the section Entitled "History", Preserve its Title,
- and add to it an item stating at least the title, year, new
- authors, and publisher of the Modified Version as given on
- the Title Page. If there is no section Entitled "History" in
- the Document, create one stating the title, year, authors,
- and publisher of the Document as given on its Title Page,
- then add an item describing the Modified Version as stated in
- the previous sentence.
-
- J. Preserve the network location, if any, given in the Document
- for public access to a Transparent copy of the Document, and
- likewise the network locations given in the Document for
- previous versions it was based on. These may be placed in
- the "History" section. You may omit a network location for a
- work that was published at least four years before the
- Document itself, or if the original publisher of the version
- it refers to gives permission.
-
- K. For any section Entitled "Acknowledgements" or "Dedications",
- Preserve the Title of the section, and preserve in the
- section all the substance and tone of each of the contributor
- acknowledgements and/or dedications given therein.
-
- L. Preserve all the Invariant Sections of the Document,
- unaltered in their text and in their titles. Section numbers
- or the equivalent are not considered part of the section
- titles.
-
- M. Delete any section Entitled "Endorsements". Such a section
- may not be included in the Modified Version.
-
- N. Do not retitle any existing section to be Entitled
- "Endorsements" or to conflict in title with any Invariant
- Section.
-
- O. Preserve any Warranty Disclaimers.
-
- If the Modified Version includes new front-matter sections or
- appendices that qualify as Secondary Sections and contain no
- material copied from the Document, you may at your option
- designate some or all of these sections as invariant. To do this,
- add their titles to the list of Invariant Sections in the Modified
- Version's license notice. These titles must be distinct from any
- other section titles.
-
- You may add a section Entitled "Endorsements", provided it contains
- nothing but endorsements of your Modified Version by various
- parties--for example, statements of peer review or that the text
- has been approved by an organization as the authoritative
- definition of a standard.
-
- You may add a passage of up to five words as a Front-Cover Text,
- and a passage of up to 25 words as a Back-Cover Text, to the end
- of the list of Cover Texts in the Modified Version. Only one
- passage of Front-Cover Text and one of Back-Cover Text may be
- added by (or through arrangements made by) any one entity. If the
- Document already includes a cover text for the same cover,
- previously added by you or by arrangement made by the same entity
- you are acting on behalf of, you may not add another; but you may
- replace the old one, on explicit permission from the previous
- publisher that added the old one.
-
- The author(s) and publisher(s) of the Document do not by this
- License give permission to use their names for publicity for or to
- assert or imply endorsement of any Modified Version.
-
- 5. COMBINING DOCUMENTS
-
- You may combine the Document with other documents released under
- this License, under the terms defined in section 4 above for
- modified versions, provided that you include in the combination
- all of the Invariant Sections of all of the original documents,
- unmodified, and list them all as Invariant Sections of your
- combined work in its license notice, and that you preserve all
- their Warranty Disclaimers.
-
- The combined work need only contain one copy of this License, and
- multiple identical Invariant Sections may be replaced with a single
- copy. If there are multiple Invariant Sections with the same name
- but different contents, make the title of each such section unique
- by adding at the end of it, in parentheses, the name of the
- original author or publisher of that section if known, or else a
- unique number. Make the same adjustment to the section titles in
- the list of Invariant Sections in the license notice of the
- combined work.
-
- In the combination, you must combine any sections Entitled
- "History" in the various original documents, forming one section
- Entitled "History"; likewise combine any sections Entitled
- "Acknowledgements", and any sections Entitled "Dedications". You
- must delete all sections Entitled "Endorsements."
-
- 6. COLLECTIONS OF DOCUMENTS
-
- You may make a collection consisting of the Document and other
- documents released under this License, and replace the individual
- copies of this License in the various documents with a single copy
- that is included in the collection, provided that you follow the
- rules of this License for verbatim copying of each of the
- documents in all other respects.
-
- You may extract a single document from such a collection, and
- distribute it individually under this License, provided you insert
- a copy of this License into the extracted document, and follow
- this License in all other respects regarding verbatim copying of
- that document.
-
- 7. AGGREGATION WITH INDEPENDENT WORKS
-
- A compilation of the Document or its derivatives with other
- separate and independent documents or works, in or on a volume of
- a storage or distribution medium, is called an "aggregate" if the
- copyright resulting from the compilation is not used to limit the
- legal rights of the compilation's users beyond what the individual
- works permit. When the Document is included in an aggregate, this
- License does not apply to the other works in the aggregate which
- are not themselves derivative works of the Document.
-
- If the Cover Text requirement of section 3 is applicable to these
- copies of the Document, then if the Document is less than one half
- of the entire aggregate, the Document's Cover Texts may be placed
- on covers that bracket the Document within the aggregate, or the
- electronic equivalent of covers if the Document is in electronic
- form. Otherwise they must appear on printed covers that bracket
- the whole aggregate.
-
- 8. TRANSLATION
-
- Translation is considered a kind of modification, so you may
- distribute translations of the Document under the terms of section
- 4. Replacing Invariant Sections with translations requires special
- permission from their copyright holders, but you may include
- translations of some or all Invariant Sections in addition to the
- original versions of these Invariant Sections. You may include a
- translation of this License, and all the license notices in the
- Document, and any Warranty Disclaimers, provided that you also
- include the original English version of this License and the
- original versions of those notices and disclaimers. In case of a
- disagreement between the translation and the original version of
- this License or a notice or disclaimer, the original version will
- prevail.
-
- If a section in the Document is Entitled "Acknowledgements",
- "Dedications", or "History", the requirement (section 4) to
- Preserve its Title (section 1) will typically require changing the
- actual title.
-
- 9. TERMINATION
-
- You may not copy, modify, sublicense, or distribute the Document
- except as expressly provided for under this License. Any other
- attempt to copy, modify, sublicense or distribute the Document is
- void, and will automatically terminate your rights under this
- License. However, parties who have received copies, or rights,
- from you under this License will not have their licenses
- terminated so long as such parties remain in full compliance.
-
- 10. FUTURE REVISIONS OF THIS LICENSE
-
- The Free Software Foundation may publish new, revised versions of
- the GNU Free Documentation License from time to time. Such new
- versions will be similar in spirit to the present version, but may
- differ in detail to address new problems or concerns. See
- `http://www.gnu.org/copyleft/'.
-
- Each version of the License is given a distinguishing version
- number. If the Document specifies that a particular numbered
- version of this License "or any later version" applies to it, you
- have the option of following the terms and conditions either of
- that specified version or of any later version that has been
- published (not as a draft) by the Free Software Foundation. If
- the Document does not specify a version number of this License,
- you may choose any version ever published (not as a draft) by the
- Free Software Foundation.
-
-A.0.1 ADDENDUM: How to use this License for your documents
-----------------------------------------------------------
-
-To use this License in a document you have written, include a copy of
-the License in the document and put the following copyright and license
-notices just after the title page:
-
- Copyright (C) YEAR YOUR NAME.
- Permission is granted to copy, distribute and/or modify this document
- under the terms of the GNU Free Documentation License, Version 1.2
- or any later version published by the Free Software Foundation;
- with no Invariant Sections, no Front-Cover Texts, and no Back-Cover
- Texts. A copy of the license is included in the section entitled ``GNU
- Free Documentation License''.
-
- If you have Invariant Sections, Front-Cover Texts and Back-Cover
-Texts, replace the "with...Texts." line with this:
-
- with the Invariant Sections being LIST THEIR TITLES, with
- the Front-Cover Texts being LIST, and with the Back-Cover Texts
- being LIST.
-
- If you have Invariant Sections without Cover Texts, or some other
-combination of the three, merge those two alternatives to suit the
-situation.
-
- If your document contains nontrivial examples of program code, we
-recommend releasing these examples in parallel under your choice of
-free software license, such as the GNU General Public License, to
-permit their use in free software.
-
-
-File: user-guide.info, Node: Index, Prev: GNU Free Documentation Licence, Up: Top
-
-Index
-*****
-
-
-* Menu:
-
-* alu (scheduler): Unify. (line 49)
-* AppArmour: Troubleshooting. (line 96)
-* arch: Getting GlusterFS. (line 6)
-* booster: Booster. (line 6)
-* commercial support: Introduction. (line 36)
-* DNS round robin: Transport modules. (line 29)
-* fcntl: POSIX Locks. (line 6)
-* FDL, GNU Free Documentation License: GNU Free Documentation Licence.
- (line 6)
-* fixed-id (translator): Fixed ID. (line 6)
-* GlusterFS client: Client. (line 6)
-* GlusterFS mailing list: Introduction. (line 28)
-* GlusterFS server: Server. (line 6)
-* infiniband transport: Transport modules. (line 58)
-* InfiniBand, installation: Pre requisites. (line 51)
-* io-cache (translator): IO Cache. (line 6)
-* io-threads (translator): IO Threads. (line 6)
-* IRC channel, #gluster: Introduction. (line 31)
-* libibverbs: Pre requisites. (line 51)
-* namespace: Unify. (line 207)
-* nufa (scheduler): Unify. (line 175)
-* OpenSuSE: Troubleshooting. (line 96)
-* posix-locks (translator): POSIX Locks. (line 6)
-* random (scheduler): Unify. (line 159)
-* read-ahead (translator): Read Ahead. (line 6)
-* record locking: POSIX Locks. (line 6)
-* Redhat Enterprise Linux: Troubleshooting. (line 78)
-* Replicate: Replicate. (line 6)
-* rot-13 (translator): ROT-13. (line 6)
-* rr (scheduler): Unify. (line 138)
-* scheduler (unify): Unify. (line 6)
-* self heal (replicate): Replicate. (line 46)
-* self heal (unify): Unify. (line 223)
-* stripe (translator): Stripe. (line 6)
-* trace (translator): Trace. (line 6)
-* unify (translator): Unify. (line 6)
-* unify invariants: Unify. (line 16)
-* write-behind (translator): Write Behind. (line 6)
-* Gluster, Inc.: Introduction. (line 36)
-
-
-
-Tag Table:
-Node: Top704
-Node: Acknowledgements2304
-Node: Introduction3214
-Node: Installation and Invocation4649
-Node: Pre requisites4933
-Node: Getting GlusterFS7023
-Ref: Getting GlusterFS-Footnote-17809
-Node: Building7857
-Node: Running GlusterFS9559
-Node: Server9770
-Node: Client11358
-Node: A Tutorial Introduction13564
-Node: Concepts17101
-Node: Filesystems in Userspace17316
-Node: Translator18457
-Node: Volume specification file21160
-Node: Translators23632
-Node: Storage Translators24201
-Ref: Storage Translators-Footnote-125008
-Node: POSIX25142
-Node: BDB25765
-Node: Client and Server Translators26822
-Node: Transport modules27298
-Node: Client protocol31445
-Node: Server protocol32384
-Node: Clustering Translators33373
-Node: Unify34260
-Ref: Unify-Footnote-143859
-Node: Replicate43951
-Node: Stripe49006
-Node: Performance Translators50164
-Node: Read Ahead50438
-Node: Write Behind52170
-Node: IO Threads53579
-Node: IO Cache54367
-Node: Booster55691
-Node: Features Translators57105
-Node: POSIX Locks57333
-Node: Fixed ID58650
-Node: Miscellaneous Translators59136
-Node: ROT-1359334
-Node: Trace60013
-Node: Usage Scenarios61282
-Ref: Usage Scenarios-Footnote-167215
-Node: Troubleshooting67290
-Node: GNU Free Documentation Licence73638
-Node: Index96087
-
-End Tag Table
diff --git a/doc/user-guide/legacy/user-guide.pdf b/doc/user-guide/legacy/user-guide.pdf
deleted file mode 100644
index ed7bd2a9907..00000000000
--- a/doc/user-guide/legacy/user-guide.pdf
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/user-guide.texi b/doc/user-guide/legacy/user-guide.texi
deleted file mode 100644
index 2d51da022cf..00000000000
--- a/doc/user-guide/legacy/user-guide.texi
+++ /dev/null
@@ -1,2246 +0,0 @@
-\input texinfo
-@setfilename user-guide.info
-@settitle GlusterFS 2.0 User Guide
-@afourpaper
-
-@direntry
-* GlusterFS: (user-guide). GlusterFS distributed filesystem user guide
-@end direntry
-
-@copying
-This is the user manual for GlusterFS 2.0.
-
-Copyright @copyright{} 2007-2011 @email{@b{Gluster}} , Inc. Permission is granted to
-copy, distribute and/or modify this document under the terms of the
-@acronym{GNU} Free Documentation License, Version 1.2 or any later
-version published by the Free Software Foundation; with no Invariant
-Sections, no Front-Cover Texts, and no Back-Cover Texts. A copy of the
-license is included in the chapter entitled ``@acronym{GNU} Free
-Documentation License''.
-@end copying
-
-@titlepage
-@title GlusterFS 2.0 User Guide [DRAFT]
-@subtitle January 15, 2008
-@author http://gluster.org/core-team.php
-@author @email{@b{Gluster}}
-@page
-@vskip 0pt plus 1filll
-@insertcopying
-@end titlepage
-
-@c Info stuff
-@ifnottex
-@node Top
-@top GlusterFS 2.0 User Guide
-
-@insertcopying
-@menu
-* Acknowledgements::
-* Introduction::
-* Installation and Invocation::
-* Concepts::
-* Translators::
-* Usage Scenarios::
-* Troubleshooting::
-* GNU Free Documentation Licence::
-* Index::
-
-@detailmenu
- --- The Detailed Node Listing ---
-
-Installation and Invocation
-
-* Pre requisites::
-* Getting GlusterFS::
-* Building::
-* Running GlusterFS::
-* A Tutorial Introduction::
-
-Running GlusterFS
-
-* Server::
-* Client::
-
-Concepts
-
-* Filesystems in Userspace::
-* Translator::
-* Volume specification file::
-
-Translators
-
-* Storage Translators::
-* Client and Server Translators::
-* Clustering Translators::
-* Performance Translators::
-* Features Translators::
-
-Storage Translators
-
-* POSIX::
-
-Client and Server Translators
-
-* Transport modules::
-* Client protocol::
-* Server protocol::
-
-Clustering Translators
-
-* Unify::
-* Replicate::
-* Stripe::
-
-Performance Translators
-
-* Read Ahead::
-* Write Behind::
-* IO Threads::
-* IO Cache::
-
-Features Translators
-
-* POSIX Locks::
-* Fixed ID::
-
-Miscellaneous Translators
-
-* ROT-13::
-* Trace::
-
-@end detailmenu
-@end menu
-
-@end ifnottex
-@c Info stuff end
-
-@contents
-
-@node Acknowledgements
-@unnumbered Acknowledgements
-GlusterFS continues to be a wonderful and enriching experience for all
-of us involved.
-
-GlusterFS development would not have been possible at this pace if
-not for our enthusiastic users. People from around the world have
-helped us with bug reports, performance numbers, and feature suggestions.
-A huge thanks to them all.
-
-Matthew Paine - for RPMs & general enthu
-
-Leonardo Rodrigues de Mello - for DEBs
-
-Julian Perez & Adam D'Auria - for multi-server tutorial
-
-Paul England - for HA spec
-
-Brent Nelson - for many bug reports
-
-Jacques Mattheij - for Europe mirror.
-
-Patrick Negri - for TCP non-blocking connect.
-@flushright
-http://gluster.org/core-team.php (@email{list-hacking@@gluster.com})
-@email{@b{Gluster}}
-@end flushright
-
-@node Introduction
-@chapter Introduction
-
-GlusterFS is a distributed filesystem. It works at the file level,
-not block level.
-
-A network filesystem is one which allows us to access remote files. A
-distributed filesystem is one that stores data on multiple machines
-and makes them all appear to be a part of the same filesystem.
-
-Need for distributed filesystems
-
-@itemize @bullet
-@item Scalability: A distributed filesystem allows us to store more data than what can be stored on a single machine.
-
-@item Redundancy: We might want to replicate crucial data on to several machines.
-
-@item Uniform access: One can mount a remote volume (for example your home directory) from any machine and access the same data.
-@end itemize
-
-@section Contacting us
-You can reach us through the mailing list @strong{gluster-devel}
-(@email{gluster-devel@@nongnu.org}).
-@cindex GlusterFS mailing list
-
-You can also find many of the developers on @acronym{IRC}, on the @code{#gluster}
-channel on Freenode (@indicateurl{irc.freenode.net}).
-@cindex IRC channel, #gluster
-
-The GlusterFS documentation wiki is also useful: @*
-@indicateurl{http://gluster.org/docs/index.php/GlusterFS}
-
-For commercial support, you can contact @email{@b{Gluster}} at:
-@cindex commercial support
-@cindex Gluster, Inc.
-
-@display
-3194 Winding Vista Common
-Fremont, CA 94539
-USA.
-
-Phone: +1 (510) 354 6801
-Toll free: +1 (888) 813 6309
-Fax: +1 (510) 372 0604
-@end display
-
-You can also email us at @email{support@@gluster.com}.
-
-@node Installation and Invocation
-@chapter Installation and Invocation
-
-@menu
-* Pre requisites::
-* Getting GlusterFS::
-* Building::
-* Running GlusterFS::
-* A Tutorial Introduction::
-@end menu
-
-@node Pre requisites
-@section Pre requisites
-
-Before installing GlusterFS make sure you have the
-following components installed.
-
-@subsection @acronym{FUSE}
-GlusterFS has now built-in support for the @acronym{FUSE} protocol.
-You need a kernel with @acronym{FUSE} support to mount GlusterFS.
-You do not need the @acronym{FUSE} package (library and utilities),
-but be aware of the following issues:
-
-@itemize
-@item If you want unprivileged users to be able to mount GlusterFS filesystems,
-you need a recent version of the @command{fusermount} utility. You already have
-it if you have @acronym{FUSE} version 2.7.0 or higher installed; if that's not
-the case, one will be compiled along with GlusterFS if you pass
-@command{--enable-fusermount} to the @command{configure} script. @item You
-need to ensure @acronym{FUSE} support is configured properly on your system. In
-details:
-@itemize
-@item If your kernel has @acronym{FUSE} as a loadable module, make sure it's
-loaded.
-@item Create @command{/dev/fuse} (major 10, minor 229) either by means of udev
-rules or by hand.
-@item Optionally, if you want runtime control over your @acronym{FUSE} mounts,
-mount the fusectl auxiliary filesystem:
-
-@example
-# mount -t fusectl none /sys/fs/fuse/connections
-@end example
-@end itemize
-
-The @acronym{FUSE} packages shipped by the various distributions usually take care
-about these things, so the easiest way to get the above tasks handled is still
-installing the @acronym{FUSE} package(s).
-@end itemize
-
-To get the best performance from GlusterFS,it is recommended that you use
-our patched version of the @acronym{FUSE} kernel module. See Patched FUSE for details.
-
-@subsection Patched FUSE
-
-The GlusterFS project maintains a patched version of @acronym{FUSE} meant to be used
-with GlusterFS. The patches increase GlusterFS performance. It is recommended that
-all users use the patched @acronym{FUSE}.
-
-The patched @acronym{FUSE} tarball can be downloaded from:
-
-@indicateurl{ftp://ftp.gluster.com/pub/gluster/glusterfs/fuse/}
-
-The specific changes made to @acronym{FUSE} are:
-
-@itemize
-@item The communication channel size between @acronym{FUSE} kernel module and GlusterFS has been increased to 1MB, permitting large reads and writes to be sent in bigger chunks.
-
-@item The kernel's read-ahead boundry has been extended upto 1MB.
-
-@item Block size returned in the @command{stat()}/@command{fstat()} calls tuned to 1MB, to make cp and similar commands perform I/O using that block size.
-
-@item @command{flock()} locking support has been added (although some rework in GlusterFS is needed for perfect compliance).
-@end itemize
-
-@subsection libibverbs (optional)
-@cindex InfiniBand, installation
-@cindex libibverbs
-This is only needed if you want GlusterFS to use InfiniBand as the
-interconnect mechanism between server and client. You can get it from:
-
-@indicateurl{http://www.openfabrics.org/downloads.htm}.
-
-@subsection Bison and Flex
-These should be already installed on most Linux systems. If not, use your distribution's
-normal software installation procedures to install them. Make sure you install the
-relevant developer packages also.
-
-@node Getting GlusterFS
-@section Getting GlusterFS
-@cindex arch
-There are many ways to get hold of GlusterFS. For a production deployment,
-the recommended method is to download the latest release tarball.
-Release tarballs are available at: @indicateurl{http://gluster.org/download.php}.
-
-If you want the bleeding edge development source, you can get them
-from the Git
-@footnote{@indicateurl{http://git-scm.com}}
-repository. First you must install Git itself. Then
-you can check out the source
-
-@example
-$ git clone git://git.sv.gnu.org/gluster.git glusterfs
-@end example
-
-@node Building
-@section Building
-You can skip this section if you're installing from @acronym{RPM}s
-or @acronym{DEB}s.
-
-GlusterFS uses the Autotools mechanism to build. As such, the procedure
-is straight-forward. First, change into the GlusterFS source directory.
-
-@example
-$ cd glusterfs-<version>
-@end example
-
-If you checked out the source from the Arch repository, you'll need
-to run @command{./autogen.sh} first. Note that you'll need to have
-Autoconf and Automake installed for this.
-
-Run @command{configure}.
-
-@example
-$ ./configure
-@end example
-
-The configure script accepts the following options:
-
-@cartouche
-@table @code
-
-@item --disable-ibverbs
-Disable the InfiniBand transport mechanism.
-
-@item --disable-fuse-client
-Disable the @acronym{FUSE} client.
-
-@item --disable-server
-Disable building of the GlusterFS server.
-
-@item --disable-bdb
-Disable building of Berkeley DB based storage translator.
-
-@item --disable-mod_glusterfs
-Disable building of Apache/lighttpd glusterfs plugins.
-
-@item --disable-epoll
-Use poll instead of epoll.
-
-@item --disable-libglusterfsclient
-Disable building of libglusterfsclient
-
-@item --enable-fusermount
-Build fusermount
-
-@end table
-@end cartouche
-
-Build and install GlusterFS.
-
-@example
-# make install
-@end example
-
-The binaries (@command{glusterfsd} and @command{glusterfs}) will be by
-default installed in @command{/usr/local/sbin/}. Translator,
-scheduler, and transport shared libraries will be installed in
-@command{/usr/local/lib/glusterfs/<version>/}. Sample volume
-specification files will be in @command{/usr/local/etc/glusterfs/}.
-This document itself can be found in
-@command{/usr/local/share/doc/glusterfs/}. If you passed the @command{--prefix}
-argument to the configure script, then replace @command{/usr/local} in the preceding
-paths with the prefix.
-
-@node Running GlusterFS
-@section Running GlusterFS
-
-@menu
-* Server::
-* Client::
-@end menu
-
-@node Server
-@subsection Server
-@cindex GlusterFS server
-
-The GlusterFS server is necessary to export storage volumes to remote clients
-(See @ref{Server protocol} for more info). This section documents the invocation
-of the GlusterFS server program and all the command-line options accepted by it.
-
-@cartouche
-@table @code
-Basic Options
-@item -f, --volfile=<path>
- Use the volume file as the volume specification.
-
-@item -s, --volfile-server=<hostname>
- Server to get volume file from. This option overrides --volfile option.
-
-@item -l, --log-file=<path>
- Specify the path for the log file.
-
-@item -L, --log-level=<level>
- Set the log level for the server. Log level should be one of @acronym{DEBUG},
-@acronym{WARNING}, @acronym{ERROR}, @acronym{CRITICAL}, or @acronym{NONE}.
-
-Advanced Options
-@item --debug
- Run in debug mode. This option sets --no-daemon, --log-level to DEBUG and
- --log-file to console.
-
-@item -N, --no-daemon
- Run glusterfsd as a foreground process.
-
-@item -p, --pid-file=<path>
- Path for the @acronym{PID} file.
-
-@item --volfile-id=<key>
- 'key' of the volfile to be fetched from server.
-
-@item --volfile-server-port=<port-number>
- Listening port number of volfile server.
-
-@item --volfile-server-transport=[socket|ib-verbs]
- Transport type to get volfile from server. [default: @command{socket}]
-
-@item --xlator-options=<volume-name.option=value>
- Add/override a translator option for a volume with specified value.
-
-Miscellaneous Options
-@item -?, --help
- Show this help text.
-
-@item --usage
- Display a short usage message.
-
-@item -V, --version
- Show version information.
-@end table
-@end cartouche
-
-@node Client
-@subsection Client
-@cindex GlusterFS client
-
-The GlusterFS client process is necessary to access remote storage volumes and
-mount them locally using @acronym{FUSE}. This section documents the invocation of the
-client process and all its command-line arguments.
-
-@example
- # glusterfs [options] <mountpoint>
-@end example
-
-The @command{mountpoint} is the directory where you want the GlusterFS
-filesystem to appear. Example:
-
-@example
- # glusterfs -f /usr/local/etc/glusterfs-client.vol /mnt
-@end example
-
-The command-line options are detailed below.
-
-@tex
-\vfill
-@end tex
-@page
-
-@cartouche
-@table @code
-
-Basic Options
-@item -f, --volfile=<path>
- Use the volume file as the volume specification.
-
-@item -s, --volfile-server=<hostname>
- Server to get volume file from. This option overrides --volfile option.
-
-@item -l, --log-file=<path>
- Specify the path for the log file.
-
-@item -L, --log-level=<level>
- Set the log level for the server. Log level should be one of @acronym{DEBUG},
-@acronym{WARNING}, @acronym{ERROR}, @acronym{CRITICAL}, or @acronym{NONE}.
-
-Advanced Options
-@item --debug
- Run in debug mode. This option sets --no-daemon, --log-level to DEBUG and
- --log-file to console.
-
-@item -N, --no-daemon
- Run @command{glusterfs} as a foreground process.
-
-@item -p, --pid-file=<path>
- Path for the @acronym{PID} file.
-
-@item --volfile-id=<key>
- 'key' of the volfile to be fetched from server.
-
-@item --volfile-server-port=<port-number>
- Listening port number of volfile server.
-
-@item --volfile-server-transport=[socket|ib-verbs]
- Transport type to get volfile from server. [default: @command{socket}]
-
-@item --xlator-options=<volume-name.option=value>
- Add/override a translator option for a volume with specified value.
-
-@item --volume-name=<volume name>
- Volume name in client spec to use. Defaults to the root volume.
-
-@acronym{FUSE} Options
-@item --attribute-timeout=<n>
- Attribute timeout for inodes in the kernel, in seconds. Defaults to 1 second.
-
-@item --disable-direct-io-mode
- Disable direct @acronym{I/O} mode in @acronym{FUSE} kernel module. This is set
- automatically if kernel supports big writes (>= 2.6.26).
-
-@item -e, --entry-timeout=<n>
- Entry timeout for directory entries in the kernel, in seconds.
- Defaults to 1 second.
-
-Missellaneous Options
-@item -?, --help
- Show this help information.
-
-@item -V, --version
- Show version information.
-@end table
-@end cartouche
-
-@node A Tutorial Introduction
-@section A Tutorial Introduction
-
-This section will show you how to quickly get GlusterFS up and running. We'll
-configure GlusterFS as a simple network filesystem, with one server and one client.
-In this mode of usage, GlusterFS can serve as a replacement for NFS.
-
-We'll make use of two machines; call them @emph{server} and
-@emph{client} (If you don't want to setup two machines, just run
-everything that follows on the same machine). In the examples that
-follow, the shell prompts will use these names to clarify the machine
-on which the command is being run. For example, a command that should
-be run on the server will be shown with the prompt:
-
-@example
-[root@@server]#
-@end example
-
-Our goal is to make a directory on the @emph{server} (say, @command{/export})
-accessible to the @emph{client}.
-
-First of all, get GlusterFS installed on both the machines, as described in the
-previous sections. Make sure you have the @acronym{FUSE} kernel module loaded. You
-can ensure this by running:
-
-@example
-[root@@server]# modprobe fuse
-@end example
-
-Before we can run the GlusterFS client or server programs, we need to write
-two files called @emph{volume specifications} (equivalently refered to as @emph{volfiles}).
-The volfile describes the @emph{translator tree} on a node. The next chapter will
-explain the concepts of `translator' and `volume specification' in detail. For now,
-just assume that the volfile is like an NFS @command{/etc/export} file.
-
-On the server, create a text file somewhere (we'll assume the path
-@command{/tmp/glusterfsd.vol}) with the following contents.
-
-@cartouche
-@example
-volume colon-o
- type storage/posix
- option directory /export
-end-volume
-
-volume server
- type protocol/server
- subvolumes colon-o
- option transport-type tcp
- option auth.addr.colon-o.allow *
-end-volume
-@end example
-@end cartouche
-
-A brief explanation of the file's contents. The first section defines a storage
-volume, named ``colon-o'' (the volume names are arbitrary), which exports the
-@command{/export} directory. The second section defines options for the translator
-which will make the storage volume accessible remotely. It specifies @command{colon-o} as
-a subvolume. This defines the @emph{translator tree}, about which more will be said
-in the next chapter. The two options specify that the @acronym{TCP} protocol is to be
-used (as opposed to InfiniBand, for example), and that access to the storage volume
-is to be provided to clients with any @acronym{IP} address at all. If you wanted to
-restrict access to this server to only your subnet for example, you'd specify
-something like @command{192.168.1.*} in the second option line.
-
-On the client machine, create the following text file (again, we'll assume
-the path to be @command{/tmp/glusterfs-client.vol}). Replace
-@emph{server-ip-address} with the @acronym{IP} address of your server machine. If you
-are doing all this on a single machine, use @command{127.0.0.1}.
-
-@cartouche
-@example
-volume client
- type protocol/client
- option transport-type tcp
- option remote-host @emph{server-ip-address}
- option remote-subvolume colon-o
-end-volume
-@end example
-@end cartouche
-
-Now we need to start both the server and client programs. To start the server:
-
-@example
-[root@@server]# glusterfsd -f /tmp/glusterfs-server.vol
-@end example
-
-To start the client:
-
-@example
-[root@@client]# glusterfs -f /tmp/glusterfs-client.vol /mnt/glusterfs
-@end example
-
-You should now be able to see the files under the server's @command{/export} directory
-in the @command{/mnt/glusterfs} directory on the client. That's it; GlusterFS is now
-working as a network file system.
-
-@node Concepts
-@chapter Concepts
-
-@menu
-* Filesystems in Userspace::
-* Translator::
-* Volume specification file::
-@end menu
-
-@node Filesystems in Userspace
-@section Filesystems in Userspace
-
-A filesystem is usually implemented in kernel space. Kernel space
-development is much harder than userspace development. @acronym{FUSE}
-is a kernel module/library that allows us to write a filesystem
-completely in userspace.
-
-@acronym{FUSE} consists of a kernel module which interacts with the userspace
-implementation using a device file @code{/dev/fuse}. When a process
-makes a syscall on a @acronym{FUSE} filesystem, @acronym{VFS} hands the request to the
-@acronym{FUSE} module, which writes the request to @code{/dev/fuse}. The
-userspace implementation polls @code{/dev/fuse}, and when a request arrives,
-processes it and writes the result back to @code{/dev/fuse}. The kernel then
-reads from the device file and returns the result to the user process.
-
-In case of GlusterFS, the userspace program is the GlusterFS client.
-The control flow is shown in the diagram below. The GlusterFS client
-services the request by sending it to the server, which in turn
-hands it to the local @acronym{POSIX} filesystem.
-
-@center @image{fuse,44pc,,,.pdf}
-@center Fig 1. Control flow in GlusterFS
-
-@node Translator
-@section Translator
-
-The @emph{translator} is the most important concept in GlusterFS. In
-fact, GlusterFS is nothing but a collection of translators working
-together, forming a translator @emph{tree}.
-
-The idea of a translator is perhaps best understood using an
-analogy. Consider the @acronym{VFS} in the Linux kernel. The
-@acronym{VFS} abstracts the various filesystem implementations (such
-as @acronym{EXT3}, ReiserFS, @acronym{XFS}, etc.) supported by the
-kernel. When an application calls the kernel to perform an operation
-on a file, the kernel passes the request on to the appropriate
-filesystem implementation.
-
-For example, let's say there are two partitions on a Linux machine:
-@command{/}, which is an @acronym{EXT3} partition, and @command{/usr},
-which is a ReiserFS partition. Now if an application wants to open a
-file called, say, @command{/etc/fstab}, then the kernel will
-internally pass the request to the @acronym{EXT3} implementation. If
-on the other hand, an application wants to read a file called
-@command{/usr/src/linux/CREDITS}, then the kernel will call upon the
-ReiserFS implementation to do the job.
-
-The ``filesystem implementation'' objects are analogous to GlusterFS
-translators. A GlusterFS translator implements all the filesystem
-operations. Whereas in @acronym{VFS} there is a two-level tree (with
-the kernel at the root and all the filesystem implementation as its
-children), in GlusterFS there exists a more elaborate tree structure.
-
-We can now define translators more precisely. A GlusterFS translator
-is a shared object (@command{.so}) that implements every filesystem
-call. GlusterFS translators can be arranged in an arbitrary tree
-structure (subject to constraints imposed by the translators). When
-GlusterFS receives a filesystem call, it passes it on to the
-translator at the root of the translator tree. The root translator may
-in turn pass it on to any or all of its children, and so on, until the
-leaf nodes are reached. The result of a filesystem call is
-communicated in the reverse fashion, from the leaf nodes up to the
-root node, and then on to the application.
-
-So what might a translator tree look like?
-
-@tex
-\vfill
-@end tex
-@page
-
-@center @image{xlator,44pc,,,.pdf}
-@center Fig 2. A sample translator tree
-
-The diagram depicts three servers and one GlusterFS client. It is important
-to note that conceptually, the translator tree spans machine boundaries.
-Thus, the client machine in the diagram, @command{10.0.0.1}, can access
-the aggregated storage of the filesystems on the server machines @command{10.0.0.2},
-@command{10.0.0.3}, and @command{10.0.0.4}. The translator diagram will make more
-sense once you've read the next chapter and understood the functions of the
-various translators.
-
-@node Volume specification file
-@section Volume specification file
-The volume specification file describes the translator tree for both the
-server and client programs.
-
-A volume specification file is a sequence of volume definitions.
-The syntax of a volume definition is explained below:
-
-@cartouche
-@example
-@strong{volume} @emph{volume-name}
- @strong{type} @emph{translator-name}
- @strong{option} @emph{option-name} @emph{option-value}
- @dots{}
- @strong{subvolumes} @emph{subvolume1} @emph{subvolume2} @dots{}
-@strong{end-volume}
-@end example
-
-@dots{}
-@end cartouche
-
-@table @asis
-@item @emph{volume-name}
- An identifier for the volume. This is just a human-readable name,
-and can contain any alphanumeric character. For instance, ``storage-1'', ``colon-o'',
-or ``forty-two''.
-
-@item @emph{translator-name}
- Name of one of the available translators. Example: @command{protocol/client},
-@command{cluster/unify}.
-
-@item @emph{option-name}
- Name of a valid option for the translator.
-
-@item @emph{option-value}
- Value for the option. Everything following the ``option'' keyword to the end of the
-line is considered the value; it is up to the translator to parse it.
-
-@item @emph{subvolume1}, @emph{subvolume2}, @dots{}
- Volume names of sub-volumes. The sub-volumes must already have been defined earlier
-in the file.
-@end table
-
-There are a few rules you must follow when writing a volume specification file:
-
-@itemize
-@item Everything following a `@command{#}' is considered a comment and is ignored. Blank lines are also ignored.
-@item All names and keywords are case-sensitive.
-@item The order of options inside a volume definition does not matter.
-@item An option value may not span multiple lines.
-@item If an option is not specified, it will assume its default value.
-@item A sub-volume must have already been defined before it can be referenced. This means you have to write the specification file ``bottom-up'', starting from the leaf nodes of the translator tree and moving up to the root.
-@end itemize
-
-A simple example volume specification file is shown below:
-
-@cartouche
-@example
-# This is a comment line
-volume client
- type protocol/client
- option transport-type tcp
- option remote-host localhost # Also a comment
- option remote-subvolume brick
-# The subvolumes line may be absent
-end-volume
-
-volume iot
- type performance/io-threads
- option thread-count 4
- subvolumes client
-end-volume
-
-volume wb
- type performance/write-behind
- subvolumes iot
-end-volume
-@end example
-@end cartouche
-
-@node Translators
-@chapter Translators
-
-@menu
-* Storage Translators::
-* Client and Server Translators::
-* Clustering Translators::
-* Performance Translators::
-* Features Translators::
-* Miscellaneous Translators::
-@end menu
-
-This chapter documents all the available GlusterFS translators in detail.
-Each translator section will show its name (for example, @command{cluster/unify}),
-briefly describe its purpose and workings, and list every option accepted by
-that translator and their meaning.
-
-@node Storage Translators
-@section Storage Translators
-
-The storage translators form the ``backend'' for GlusterFS. Currently,
-the only available storage translator is the @acronym{POSIX}
-translator, which stores files on a normal @acronym{POSIX}
-filesystem. A pleasant consequence of this is that your data will
-still be accessible if GlusterFS crashes or cannot be started.
-
-Other storage backends are planned for the future. One of the possibilities is an
-Amazon S3 translator. Amazon S3 is an unlimited online storage service accessible
-through a web services @acronym{API}. The S3 translator will allow you to access
-the storage as a normal @acronym{POSIX} filesystem.
-@footnote{Some more discussion about this can be found at:
-
-http://developer.amazonwebservices.com/connect/message.jspa?messageID=52873}
-
-@menu
-* POSIX::
-* BDB::
-@end menu
-
-@node POSIX
-@subsection POSIX
-@example
-type storage/posix
-@end example
-
-The @command{posix} translator uses a normal @acronym{POSIX}
-filesystem as its ``backend'' to actually store files and
-directories. This can be any filesystem that supports extended
-attributes (@acronym{EXT3}, ReiserFS, @acronym{XFS}, ...). Extended
-attributes are used by some translators to store metadata, for
-example, by the replicate and stripe translators. See
-@ref{Replicate} and @ref{Stripe}, respectively for details.
-
-@cartouche
-@table @code
-@item directory <path>
-The directory on the local filesystem which is to be used for storage.
-@end table
-@end cartouche
-
-@node BDB
-@subsection BDB
-@example
-type storage/bdb
-@end example
-
-The @command{BDB} translator uses a @acronym{Berkeley DB} database as its
-``backend'' to actually store files as key-value pair in the database and
-directories as regular @acronym{POSIX} directories. Note that @acronym{BDB}
-does not provide extended attribute support for regular files. Do not use
-@acronym{BDB} as storage translator while using any translator that demands
-extended attributes on ``backend''.
-
-@cartouche
-@table @code
-@item directory <path>
-The directory on the local filesystem which is to be used for storage.
-@item mode [cache|persistent] (cache)
-When @acronym{BDB} is run in @command{cache} mode, recovery of back-end is not completely
-guaranteed. @command{persistent} guarantees that @acronym{BDB} can recover back-end from
-@acronym{Berkeley DB} even if GlusterFS crashes.
-@item errfile <path>
-The path of the file to be used as @command{errfile} for @acronym{Berkeley DB} to report
-detailed error messages, if any. Note that all the contents of this file will be written
-by @acronym{Berkeley DB}, not GlusterFS.
-@item logdir <path>
-
-
-@end table
-@end cartouche
-
-@node Client and Server Translators, Clustering Translators, Storage Translators, Translators
-@section Client and Server Translators
-
-The client and server translator enable GlusterFS to export a
-translator tree over the network or access a remote GlusterFS
-server. These two translators implement GlusterFS's network protocol.
-
-@menu
-* Transport modules::
-* Client protocol::
-* Server protocol::
-@end menu
-
-@node Transport modules
-@subsection Transport modules
-The client and server translators are capable of using any of the
-pluggable transport modules. Currently available transport modules are
-@command{tcp}, which uses a @acronym{TCP} connection between client
-and server to communicate; @command{ib-sdp}, which uses a
-@acronym{TCP} connection over InfiniBand, and @command{ibverbs}, which
-uses high-speed InfiniBand connections.
-
-Each transport module comes in two different versions, one to be used on
-the server side and the other on the client side.
-
-@subsubsection TCP
-
-The @acronym{TCP} transport module uses a @acronym{TCP/IP} connection between
-the server and the client.
-
-@example
- option transport-type tcp
-@end example
-
-The @acronym{TCP} client module accepts the following options:
-
-@cartouche
-@table @code
-@item non-blocking-connect [no|off|on|yes] (on)
-Whether to make the connection attempt asynchronous.
-@item remote-port <n> (24007)
-Server port to connect to.
-@cindex DNS round robin
-@item remote-host <hostname> *
-Hostname or @acronym{IP} address of the server. If the host name resolves to
-multiple IP addresses, all of them will be tried in a round-robin fashion. This
-feature can be used to implement fail-over.
-@end table
-@end cartouche
-
-The @acronym{TCP} server module accepts the following options:
-
-@cartouche
-@table @code
-@item bind-address <address> (0.0.0.0)
-The local interface on which the server should listen to requests. Default is to
-listen on all interfaces.
-@item listen-port <n> (24007)
-The local port to listen on.
-@end table
-@end cartouche
-
-@subsubsection IB-SDP
-@example
- option transport-type ib-sdp
-@end example
-
-kernel implements socket interface for ib hardware. SDP is over ib-verbs.
-This module accepts the same options as @command{tcp}
-
-@subsubsection ibverbs
-
-@example
- option transport-type tcp
-@end example
-
-@cindex infiniband transport
-
-InfiniBand is a scalable switched fabric interconnect mechanism
-primarily used in high-performance computing. InfiniBand can deliver
-data throughput of the order of 10 Gbit/s, with latencies of 4-5 ms.
-
-The @command{ib-verbs} transport accesses the InfiniBand hardware through
-the ``verbs'' @acronym{API}, which is the lowest level of software access possible
-and which gives the highest performance. On InfiniBand hardware, it is always
-best to use @command{ib-verbs}. Use @command{ib-sdp} only if you cannot get
-@command{ib-verbs} working for some reason.
-
-The @command{ib-verbs} client module accepts the following options:
-
-@cartouche
-@table @code
-@item non-blocking-connect [no|off|on|yes] (on)
-Whether to make the connection attempt asynchronous.
-@item remote-port <n> (24007)
-Server port to connect to.
-@cindex DNS round robin
-@item remote-host <hostname> *
-Hostname or @acronym{IP} address of the server. If the host name resolves to
-multiple IP addresses, all of them will be tried in a round-robin fashion. This
-feature can be used to implement fail-over.
-@end table
-@end cartouche
-
-The @command{ib-verbs} server module accepts the following options:
-
-@cartouche
-@table @code
-@item bind-address <address> (0.0.0.0)
-The local interface on which the server should listen to requests. Default is to
-listen on all interfaces.
-@item listen-port <n> (24007)
-The local port to listen on.
-@end table
-@end cartouche
-
-The following options are common to both the client and server modules:
-
-If you are familiar with InfiniBand jargon,
-the mode is used by GlusterFS is ``reliable connection-oriented channel transfer''.
-
-@cartouche
-@table @code
-@item ib-verbs-work-request-send-count <n> (64)
-Length of the send queue in datagrams. [Reason to increase/decrease?]
-
-@item ib-verbs-work-request-recv-count <n> (64)
-Length of the receive queue in datagrams. [Reason to increase/decrease?]
-
-@item ib-verbs-work-request-send-size <size> (128KB)
-Size of each datagram that is sent. [Reason to increase/decrease?]
-
-@item ib-verbs-work-request-recv-size <size> (128KB)
-Size of each datagram that is received. [Reason to increase/decrease?]
-
-@item ib-verbs-port <n> (1)
-Port number for ib-verbs.
-
-@item ib-verbs-mtu [256|512|1024|2048|4096] (2048)
-The Maximum Transmission Unit [Reason to increase/decrease?]
-
-@item ib-verbs-device-name <device-name> (first device in the list)
-InfiniBand device to be used.
-@end table
-@end cartouche
-
-For maximum performance, you should ensure that the send/receive counts on both
-the client and server are the same.
-
-ib-verbs is preferred over ib-sdp.
-
-@node Client protocol
-@subsection Client
-@example
-type procotol/client
-@end example
-
-The client translator enables the GlusterFS client to access a remote server's
-translator tree.
-
-@cartouche
-@table @code
-
-@item transport-type [tcp,ib-sdp,ib-verbs] (tcp)
-The transport type to use. You should use the client versions of all the
-transport modules (@command{tcp}, @command{ib-sdp},
-@command{ib-verbs}).
-@item remote-subvolume <volume_name> *
-The name of the volume on the remote host to attach to. Note that
-this is @emph{not} the name of the @command{protocol/server} volume on the
-server. It should be any volume under the server.
-@item transport-timeout <n> (120- seconds)
-Inactivity timeout. If a reply is expected and no activity takes place
-on the connection within this time, the transport connection will be
-broken, and a new connection will be attempted.
-@end table
-@end cartouche
-
-@node Server protocol
-@subsection Server
-@example
-type protocol/server
-@end example
-
-The server translator exports a translator tree and makes it accessible to
-remote GlusterFS clients.
-
-@cartouche
-@table @code
-@item client-volume-filename <path> (<CONFDIR>/glusterfs-client.vol)
-The volume specification file to use for the client. This is the file the
-client will receive when it is invoked with the @command{--server} option
-(@ref{Client}).
-
-@item transport-type [tcp,ib-verbs,ib-sdp] (tcp)
-The transport to use. You should use the server versions of all the transport
-modules (@command{tcp}, @command{ib-sdp}, @command{ib-verbs}).
-
-@item auth.addr.<volume name>.allow <IP address wildcard pattern>
-IP addresses of the clients that are allowed to attach to the specified volume.
-This can be a wildcard. For example, a wildcard of the form @command{192.168.*.*}
-allows any host in the @command{192.168.x.x} subnet to connect to the server.
-
-@end table
-@end cartouche
-
-@node Clustering Translators
-@section Clustering Translators
-
-The clustering translators are the most important GlusterFS
-translators, since it is these that make GlusterFS a cluster
-filesystem. These translators together enable GlusterFS to access an
-arbitrarily large amount of storage, and provide @acronym{RAID}-like
-redundancy and distribution over the entire cluster.
-
-There are three clustering translators: @strong{unify}, @strong{replicate},
-and @strong{stripe}. The unify translator aggregates storage from
-many server nodes. The replicate translator provides file replication. The stripe
-translator allows a file to be spread across many server nodes. The following sections
-look at each of these translators in detail.
-
-@menu
-* Unify::
-* Replicate::
-* Stripe::
-@end menu
-
-@node Unify
-@subsection Unify
-@cindex unify (translator)
-@cindex scheduler (unify)
-@example
-type cluster/unify
-@end example
-
-The unify translator presents a `unified' view of all its sub-volumes. That is,
-it makes the union of all its sub-volumes appear as a single volume. It is the
-unify translator that gives GlusterFS the ability to access an arbitrarily
-large amount of storage.
-
-For unify to work correctly, certain invariants need to be maintained across
-the entire network. These are:
-
-@cindex unify invariants
-@itemize
-@item The directory structure of all the sub-volumes must be identical.
-@item A particular file can exist on only one of the sub-volumes. Phrasing it in another way, a pathname such as @command{/home/calvin/homework.txt}) is unique across the entire cluster.
-@end itemize
-
-@tex
-\vfill
-@end tex
-@page
-
-@center @image{unify,44pc,,,.pdf}
-
-Looking at the second requirement, you might wonder how one can
-accomplish storing redundant copies of a file, if no file can exist
-multiple times. To answer, we must remember that these invariants are
-from @emph{unify's perspective}. A translator such as replicate at a lower
-level in the translator tree than unify may subvert this picture.
-
-The first invariant might seem quite tedious to ensure. We shall see
-later that this is not so, since unify's @emph{self-heal} mechanism
-takes care of maintaining it.
-
-The second invariant implies that unify needs some way to decide which file goes where.
-Unify makes use of @emph{scheduler} modules for this purpose.
-
-When a file needs to be created, unify's scheduler decides upon the
-sub-volume to be used to store the file. There are many schedulers
-available, each using a different algorithm and suitable for different
-purposes.
-
-The various schedulers are described in detail in the sections that follow.
-
-@subsubsection ALU
-@cindex alu (scheduler)
-
-@example
- option scheduler alu
-@end example
-
-ALU stands for "Adaptive Least Usage". It is the most advanced
-scheduler available in GlusterFS. It balances the load across volumes
-taking several factors in account. It adapts itself to changing I/O
-patterns according to its configuration. When properly configured, it
-can eliminate the need for regular tuning of the filesystem to keep
-volume load nicely balanced.
-
-The ALU scheduler is composed of multiple least-usage
-sub-schedulers. Each sub-scheduler keeps track of a certain type of
-load, for each of the sub-volumes, getting statistics from
-the sub-volumes themselves. The sub-schedulers are these:
-
-@itemize
-@item disk-usage: The used and free disk space on the volume.
-
-@item read-usage: The amount of reading done from this volume.
-
-@item write-usage: The amount of writing done to this volume.
-
-@item open-files-usage: The number of files currently open from this volume.
-
-@item disk-speed-usage: The speed at which the disks are spinning. This is a constant value and therefore not very useful.
-@end itemize
-
-The ALU scheduler needs to know which of these sub-schedulers to use,
-and in which order to evaluate them. This is done through the
-@command{option alu.order} configuration directive.
-
-Each sub-scheduler needs to know two things: when to kick in (the
-entry-threshold), and how long to stay in control (the
-exit-threshold). For example: when unifying three disks of 100GB,
-keeping an exact balance of disk-usage is not necesary. Instead, there
-could be a 1GB margin, which can be used to nicely balance other
-factors, such as read-usage. The disk-usage scheduler can be told to
-kick in only when a certain threshold of discrepancy is passed, such
-as 1GB. When it assumes control under this condition, it will write
-all subsequent data to the least-used volume. If it is doing so, it is
-unwise to stop right after the values are below the entry-threshold
-again, since that would make it very likely that the situation will
-occur again very soon. Such a situation would cause the ALU to spend
-most of its time disk-usage scheduling, which is unfair to the other
-sub-schedulers. The exit-threshold therefore defines the amount of
-data that needs to be written to the least-used disk, before control
-is relinquished again.
-
-In addition to the sub-schedulers, the ALU scheduler also has "limits"
-options. These can stop the creation of new files on a volume once
-values drop below a certain threshold. For example, setting
-@command{option alu.limits.min-free-disk 5GB} will stop the scheduling
-of files to volumes that have less than 5GB of free disk space,
-leaving the files on that disk some room to grow.
-
-The actual values you assign to the thresholds for sub-schedulers and
-limits depend on your situation. If you have fast-growing files,
-you'll want to stop file-creation on a disk much earlier than when
-hardly any of your files are growing. If you care less about
-disk-usage balance than about read-usage balance, you'll want a bigger
-disk-usage scheduler entry-threshold and a smaller read-usage
-scheduler entry-threshold.
-
-For thresholds defining a size, values specifying "KB", "MB" and "GB"
-are allowed. For example: @command{option alu.limits.min-free-disk 5GB}.
-
-@cartouche
-@table @code
-@item alu.order <order> * ("disk-usage:write-usage:read-usage:open-files-usage:disk-speed")
-@item alu.disk-usage.entry-threshold <size> (1GB)
-@item alu.disk-usage.exit-threshold <size> (512MB)
-@item alu.write-usage.entry-threshold <%> (25)
-@item alu.write-usage.exit-threshold <%> (5)
-@item alu.read-usage.entry-threshold <%> (25)
-@item alu.read-usage.exit-threshold <%> (5)
-@item alu.open-files-usage.entry-threshold <n> (1000)
-@item alu.open-files-usage.exit-threshold <n> (100)
-@item alu.limits.min-free-disk <%>
-@item alu.limits.max-open-files <n>
-@end table
-@end cartouche
-
-@subsubsection Round Robin (RR)
-@cindex rr (scheduler)
-
-@example
- option scheduler rr
-@end example
-
-Round-Robin (RR) scheduler creates files in a round-robin
-fashion. Each client will have its own round-robin loop. When your
-files are mostly similar in size and I/O access pattern, this
-scheduler is a good choice. RR scheduler checks for free disk space
-on the server before scheduling, so you can know when to add
-another server node. The default value of min-free-disk is 5% and is
-checked on file creation calls, with atleast 10 seconds (by default)
-elapsing between two checks.
-
-Options:
-@cartouche
-@table @code
-@item rr.limits.min-free-disk <%> (5)
-Minimum free disk space a node must have for RR to schedule a file to it.
-@item rr.refresh-interval <t> (10 seconds)
-Time between two successive free disk space checks.
-@end table
-@end cartouche
-
-@subsubsection Random
-@cindex random (scheduler)
-
-@example
- option scheduler random
-@end example
-
-The random scheduler schedules file creation randomly among its child nodes.
-Like the round-robin scheduler, it also checks for a minimum amount of free disk
-space before scheduling a file to a node.
-
-@cartouche
-@table @code
-@item random.limits.min-free-disk <%> (5)
-Minimum free disk space a node must have for random to schedule a file to it.
-@item random.refresh-interval <t> (10 seconds)
-Time between two successive free disk space checks.
-@end table
-@end cartouche
-
-@subsubsection NUFA
-@cindex nufa (scheduler)
-
-@example
- option scheduler nufa
-@end example
-
-It is common in many GlusterFS computing environments for all deployed
-machines to act as both servers and clients. For example, a
-research lab may have 40 workstations each with its own storage. All
-of these workstations might act as servers exporting a volume as well
-as clients accessing the entire cluster's storage. In such a
-situation, it makes sense to store locally created files on the local
-workstation itself (assuming files are accessed most by the
-workstation that created them). The Non-Uniform File Allocation (@acronym{NUFA})
-scheduler accomplishes that.
-
-@acronym{NUFA} gives the local system first priority for file creation
-over other nodes. If the local volume does not have more free disk space
-than a specified amount (5% by default) then @acronym{NUFA} schedules files
-among the other child volumes in a round-robin fashion.
-
-@acronym{NUFA} is named after the similar strategy used for memory access,
-@acronym{NUMA}@footnote{Non-Uniform Memory Access:
-@indicateurl{http://en.wikipedia.org/wiki/Non-Uniform_Memory_Access}}.
-
-@cartouche
-@table @code
-@item nufa.limits.min-free-disk <%> (5)
-Minimum disk space that must be free (local or remote) for @acronym{NUFA} to schedule a
-file to it.
-@item nufa.refresh-interval <t> (10 seconds)
-Time between two successive free disk space checks.
-@item nufa.local-volume-name <volume>
-The name of the volume corresponding to the local system. This volume must be
-one of the children of the unify volume. This option is mandatory.
-@end table
-@end cartouche
-
-@cindex namespace
-@subsubsection Namespace
-Namespace volume needed because:
- - persistent inode numbers.
- - file exists even when node is down.
-
-namespace files are simply touched. on every lookup it is checked.
-
-@cartouche
-@table @code
-@item namespace <volume> *
-Name of the namespace volume (which should be one of the unify volume's children).
-@item self-heal [on|off] (on)
-Enable/disable self-heal. Unless you know what you are doing, do not disable self-heal.
-@end table
-@end cartouche
-
-@cindex self heal (unify)
-@subsubsection Self Heal
- * When a 'lookup()/stat()' call is made on directory for the first
-time, a self-heal call is made, which checks for the consistancy of
-its child nodes. If an entry is present in storage node, but not in
-namespace, that entry is created in namespace, and vica-versa. There
-is an writedir() API introduced which is used for the same. It also
-checks for permissions, and uid/gid consistencies.
-
- * This check is also done when an server goes down and comes up.
-
- * If one starts with an empty namespace export, but has data in
-storage nodes, a 'find .>/dev/null' or 'ls -lR >/dev/null' should help
-to build namespace in one shot. Even otherwise, namespace is built on
-demand when a file is looked up for the first time.
-
-NOTE: There are some issues (Kernel 'Oops' msgs) seen with fuse-2.6.3,
-when someone deletes namespace in backend, when glusterfs is
-running. But with fuse-2.6.5, this issue is not there.
-
-@node Replicate
-@subsection Replicate (formerly AFR)
-@cindex Replicate
-@example
-type cluster/replicate
-@end example
-
-Replicate provides @acronym{RAID}-1 like functionality for
-GlusterFS. Replicate replicates files and directories across the
-subvolumes. Hence if Replicate has four subvolumes, there will be
-four copies of all files and directories. Replicate provides
-high-availability, i.e., in case one of the subvolumes go down
-(e. g. server crash, network disconnection) Replicate will still
-service the requests using the redundant copies.
-
-Replicate also provides self-heal functionality, i.e., in case the
-crashed servers come up, the outdated files and directories will be
-updated with the latest versions. Replicate uses extended
-attributes of the backend file system to track the versioning of files
-and directories and provide the self-heal feature.
-
-@example
-volume replicate-example
- type cluster/replicate
- subvolumes brick1 brick2 brick3
-end-volume
-@end example
-
-This sample configuration will replicate all directories and files on
-brick1, brick2 and brick3.
-
-All the read operations happen from the first alive child. If all the
-three sub-volumes are up, reads will be done from brick1; if brick1 is
-down read will be done from brick2. In case read() was being done on
-brick1 and it goes down, replicate transparently falls back to
-brick2.
-
-The next release of GlusterFS will add the following features:
-@itemize
-@item Ability to specify the sub-volume from which read operations are to be done (this will help users who have one of the sub-volumes as a local storage volume).
-@item Allow scheduling of read operations amongst the sub-volumes in a round-robin fashion.
-@end itemize
-
-The order of the subvolumes list should be same across all the 'replicate's as
-they will be used for locking purposes.
-
-@cindex self heal (replicate)
-@subsubsection Self Heal
-Replicate has self-heal feature, which updates the outdated file and
-directory copies by the most recent versions. For example consider the
-following config:
-
-@example
-volume replicate-example
- type cluster/replicate
- subvolumes brick1 brick2
-end-volume
-@end example
-
-@subsubsection File self-heal
-
-Now if we create a file foo.txt on replicate-example, the file will be created
-on brick1 and brick2. The file will have two extended attributes associated
-with it in the backend filesystem. One is trusted.afr.createtime and the
-other is trusted.afr.version. The trusted.afr.createtime xattr has the
-create time (in terms of seconds since epoch) and trusted.afr.version
-is a number that is incremented each time a file is modified. This increment
-happens during close (incase any write was done before close).
-
-If brick1 goes down, we edit foo.txt the version gets incremented. Now
-the brick1 comes back up, when we open() on foo.txt replicate will check if
-their versions are same. If they are not same, the outdated copy is
-replaced by the latest copy and its version is updated. After the sync
-the open() proceeds in the usual manner and the application calling open()
-can continue on its access to the file.
-
-If brick1 goes down, we delete foo.txt and create a file with the same
-name again i.e foo.txt. Now brick1 comes back up, clearly there is a
-chance that the version on brick1 being more than the version on brick2,
-this is where createtime extended attribute helps in deciding which
-the outdated copy is. Hence we need to consider both createtime and
-version to decide on the latest copy.
-
-The version attribute is incremented during the close() call. Version
-will not be incremented in case there was no write() done. In case the
-fd that the close() gets was got by create() call, we also create
-the createtime extended attribute.
-
-@subsubsection Directory self-heal
-
-Suppose brick1 goes down, we delete foo.txt, brick1 comes back up, now
-we should not create foo.txt on brick2 but we should delete foo.txt
-on brick1. We handle this situation by having the createtime and version
-attribute on the directory similar to the file. when lookup() is done
-on the directory, we compare the createtime/version attributes of the
-copies and see which files needs to be deleted and delete those files
-and update the extended attributes of the outdated directory copy.
-Each time a directory is modified (a file or a subdirectory is created
-or deleted inside the directory) and one of the subvols is down, we
-increment the directory's version.
-
-lookup() is a call initiated by the kernel on a file or directory
-just before any access to that file or directory. In glusterfs, by
-default, lookup() will not be called in case it was called in the
-past one second on that particular file or directory.
-
-The extended attributes can be seen in the backend filesystem using
-the @command{getfattr} command. (@command{getfattr -n trusted.afr.version <file>})
-
-@cartouche
-@table @code
-@item debug [on|off] (off)
-@item self-heal [on|off] (on)
-@item replicate <pattern> (*:1)
-@item lock-node <child_volume> (first child is used by default)
-@end table
-@end cartouche
-
-@node Stripe
-@subsection Stripe
-@cindex stripe (translator)
-@example
-type cluster/stripe
-@end example
-
-The stripe translator distributes the contents of a file over its
-sub-volumes. It does this by creating a file equal in size to the
-total size of the file on each of its sub-volumes. It then writes only
-a part of the file to each sub-volume, leaving the rest of it empty.
-These empty regions are called `holes' in Unix terminology. The holes
-do not consume any disk space.
-
-The diagram below makes this clear.
-
-@center @image{stripe,44pc,,,.pdf}
-
-You can configure stripe so that only filenames matching a pattern
-are striped. You can also configure the size of the data to be stored
-on each sub-volume.
-
-@cartouche
-@table @code
-@item block-size <pattern>:<size> (*:0 no striping)
-Distribute files matching @command{<pattern>} over the sub-volumes,
-storing at least @command{<size>} on each sub-volume. For example,
-
-@example
- option block-size *.mpg:1M
-@end example
-
-distributes all files ending in @command{.mpg}, storing at least 1 MB on
-each sub-volume.
-
-Any number of @command{block-size} option lines may be present, specifying
-different sizes for different file name patterns.
-@end table
-@end cartouche
-
-@node Performance Translators
-@section Performance Translators
-
-@menu
-* Read Ahead::
-* Write Behind::
-* IO Threads::
-* IO Cache::
-* Booster::
-@end menu
-
-@node Read Ahead
-@subsection Read Ahead
-@cindex read-ahead (translator)
-@example
-type performance/read-ahead
-@end example
-
-The read-ahead translator pre-fetches data in advance on every read.
-This benefits applications that mostly process files in sequential order,
-since the next block of data will already be available by the time the
-application is done with the current one.
-
-Additionally, the read-ahead translator also behaves as a read-aggregator.
-Many small read operations are combined and issued as fewer, larger read
-requests to the server.
-
-Read-ahead deals in ``pages'' as the unit of data fetched. The page size
-is configurable, as is the ``page count'', which is the number of pages
-that are pre-fetched.
-
-Read-ahead is best used with InfiniBand (using the ib-verbs transport).
-On FastEthernet and Gigabit Ethernet networks,
-GlusterFS can achieve the link-maximum throughput even without
-read-ahead, making it quite superflous.
-
-Note that read-ahead only happens if the reads are perfectly
-sequential. If your application accesses data in a random fashion,
-using read-ahead might actually lead to a performance loss, since
-read-ahead will pointlessly fetch pages which won't be used by the
-application.
-
-@cartouche
-Options:
-@table @code
-@item page-size <n> (256KB)
-The unit of data that is pre-fetched.
-@item page-count <n> (2)
-The number of pages that are pre-fetched.
-@item force-atime-update [on|off|yes|no] (off|no)
-Whether to force an access time (atime) update on the file on every read. Without
-this, the atime will be slightly imprecise, as it will reflect the time when
-the read-ahead translator read the data, not when the application actually read it.
-@end table
-@end cartouche
-
-@node Write Behind
-@subsection Write Behind
-@cindex write-behind (translator)
-@example
-type performance/write-behind
-@end example
-
-The write-behind translator improves the latency of a write operation.
-It does this by relegating the write operation to the background and
-returning to the application even as the write is in progress. Using the
-write-behind translator, successive write requests can be pipelined.
-This mode of write-behind operation is best used on the client side, to
-enable decreased write latency for the application.
-
-The write-behind translator can also aggregate write requests. If the
-@command{aggregate-size} option is specified, then successive writes upto that
-size are accumulated and written in a single operation. This mode of operation
-is best used on the server side, as this will decrease the disk's head movement
-when multiple files are being written to in parallel.
-
-The @command{aggregate-size} option has a default value of 128KB. Although
-this works well for most users, you should always experiment with different values
-to determine the one that will deliver maximum performance. This is because the
-performance of write-behind depends on your interconnect, size of RAM, and the
-work load.
-
-@cartouche
-@table @code
-@item aggregate-size <n> (128KB)
-Amount of data to accumulate before doing a write
-@item flush-behind [on|yes|off|no] (off|no)
-
-@end table
-@end cartouche
-
-@node IO Threads
-@subsection IO Threads
-@cindex io-threads (translator)
-@example
-type performance/io-threads
-@end example
-
-The IO threads translator is intended to increase the responsiveness
-of the server to metadata operations by doing file I/O (read, write)
-in a background thread. Since the GlusterFS server is
-single-threaded, using the IO threads translator can significantly
-improve performance. This translator is best used on the server side,
-loaded just below the server protocol translator.
-
-IO threads operates by handing out read and write requests to a separate thread.
-The total number of threads in existence at a time is constant, and configurable.
-
-@cartouche
-@table @code
-@item thread-count <n> (1)
-Number of threads to use.
-@end table
-@end cartouche
-
-@node IO Cache
-@subsection IO Cache
-@cindex io-cache (translator)
-@example
-type performance/io-cache
-@end example
-
-The IO cache translator caches data that has been read. This is useful
-if many applications read the same data multiple times, and if reads
-are much more frequent than writes (for example, IO caching may be
-useful in a web hosting environment, where most clients will simply
-read some files and only a few will write to them).
-
-The IO cache translator reads data from its child in @command{page-size} chunks.
-It caches data upto @command{cache-size} bytes. The cache is maintained as
-a prioritized least-recently-used (@acronym{LRU}) list, with priorities determined
-by user-specified patterns to match filenames.
-
-When the IO cache translator detects a write operation, the
-cache for that file is flushed.
-
-The IO cache translator periodically verifies the consistency of
-cached data, using the modification times on the files. The verification timeout
-is configurable.
-
-@cartouche
-@table @code
-@item page-size <n> (128KB)
-Size of a page.
-@item cache-size (n) (32MB)
-Total amount of data to be cached.
-@item force-revalidate-timeout <n> (1)
-Timeout to force a cache consistency verification, in seconds.
-@item priority <pattern> (*:0)
-Filename patterns listed in order of priority.
-@end table
-@end cartouche
-
-@node Booster
-@subsection Booster
-@cindex booster
-@example
- type performance/booster
-@end example
-
-The booster translator gives applications a faster path to communicate
-read and write requests to GlusterFS. Normally, all requests to GlusterFS from
-applications go through FUSE, as indicated in @ref{Filesystems in Userspace}.
-Using the booster translator in conjunction with the GlusterFS booster shared
-library, an application can bypass the FUSE path and send read/write requests
-directly to the GlusterFS client process.
-
-The booster mechanism consists of two parts: the booster translator,
-and the booster shared library. The booster translator is meant to be
-loaded on the client side, usually at the root of the translator tree.
-The booster shared library should be @command{LD_PRELOAD}ed with the
-application.
-
-The booster translator when loaded opens a Unix domain socket and
-listens for read/write requests on it. The booster shared library
-intercepts read and write system calls and sends the requests to the
-GlusterFS process directly using the Unix domain socket, bypassing FUSE.
-This leads to superior performance.
-
-Once you've loaded the booster translator in your volume specification file, you
-can start your application as:
-
-@example
- $ LD_PRELOAD=/usr/local/bin/glusterfs-booster.so your_app
-@end example
-
-The booster translator accepts no options.
-
-@node Features Translators
-@section Features Translators
-
-@menu
-* POSIX Locks::
-* Fixed ID::
-@end menu
-
-@node POSIX Locks
-@subsection POSIX Locks
-@cindex record locking
-@cindex fcntl
-@cindex posix-locks (translator)
-@example
-type features/posix-locks
-@end example
-
-This translator provides storage independent POSIX record locking
-support (@command{fcntl} locking). Typically you'll want to load this on the
-server side, just above the @acronym{POSIX} storage translator. Using this
-translator you can get both advisory locking and mandatory locking
-support. It also handles @command{flock()} locks properly.
-
-Caveat: Consider a file that does not have its mandatory locking bits
-(+setgid, -group execution) turned on. Assume that this file is now
-opened by a process on a client that has the write-behind xlator
-loaded. The write-behind xlator does not cache anything for files
-which have mandatory locking enabled, to avoid incoherence. Let's say
-that mandatory locking is now enabled on this file through another
-client. The former client will not know about this change, and
-write-behind may erroneously report a write as being successful when
-in fact it would fail due to the region it is writing to being locked.
-
-There seems to be no easy way to fix this. To work around this
-problem, it is recommended that you never enable the mandatory bits on
-a file while it is open.
-
-@cartouche
-@table @code
-@item mandatory [on|off] (on)
-Turns mandatory locking on.
-@end table
-@end cartouche
-
-@node Fixed ID
-@subsection Fixed ID
-@cindex fixed-id (translator)
-@example
-type features/fixed-id
-@end example
-
-The fixed ID translator makes all filesystem requests from the client
-to appear to be coming from a fixed, specified
-@acronym{UID}/@acronym{GID}, regardless of which user actually
-initiated the request.
-
-@cartouche
-@table @code
-@item fixed-uid <n> [if not set, not used]
-The @acronym{UID} to send to the server
-@item fixed-gid <n> [if not set, not used]
-The @acronym{GID} to send to the server
-@end table
-@end cartouche
-
-@node Miscellaneous Translators
-@section Miscellaneous Translators
-
-@menu
-* ROT-13::
-* Trace::
-@end menu
-
-@node ROT-13
-@subsection ROT-13
-@cindex rot-13 (translator)
-@example
-type encryption/rot-13
-@end example
-
-@acronym{ROT-13} is a toy translator that can ``encrypt'' and ``decrypt'' file
-contents using the @acronym{ROT-13} algorithm. @acronym{ROT-13} is a trivial
-algorithm that rotates each alphabet by thirteen places. Thus, 'A' becomes 'N',
-'B' becomes 'O', and 'Z' becomes 'M'.
-
-It goes without saying that you shouldn't use this translator if you need
-@emph{real} encryption (a future release of GlusterFS will have real encryption
-translators).
-
-@cartouche
-@table @code
-@item encrypt-write [on|off] (on)
-Whether to encrypt on write
-@item decrypt-read [on|off] (on)
-Whether to decrypt on read
-@end table
-@end cartouche
-
-@node Trace
-@subsection Trace
-@cindex trace (translator)
-@example
-type debug/trace
-@end example
-
-The trace translator is intended for debugging purposes. When loaded, it
-logs all the system calls received by the server or client (wherever
-trace is loaded), their arguments, and the results. You must use a GlusterFS log
-level of DEBUG (See @ref{Running GlusterFS}) for trace to work.
-
-Sample trace output (lines have been wrapped for readability):
-@cartouche
-@example
-2007-10-30 00:08:58 D [trace.c:1579:trace_opendir] trace: callid: 68
-(*this=0x8059e40, loc=0x8091984 @{path=/iozone3_283, inode=0x8091f00@},
- fd=0x8091d50)
-
-2007-10-30 00:08:58 D [trace.c:630:trace_opendir_cbk] trace:
-(*this=0x8059e40, op_ret=4, op_errno=1, fd=0x8091d50)
-
-2007-10-30 00:08:58 D [trace.c:1602:trace_readdir] trace: callid: 69
-(*this=0x8059e40, size=4096, offset=0 fd=0x8091d50)
-
-2007-10-30 00:08:58 D [trace.c:215:trace_readdir_cbk] trace:
-(*this=0x8059e40, op_ret=0, op_errno=0, count=4)
-
-2007-10-30 00:08:58 D [trace.c:1624:trace_closedir] trace: callid: 71
-(*this=0x8059e40, *fd=0x8091d50)
-
-2007-10-30 00:08:58 D [trace.c:809:trace_closedir_cbk] trace:
-(*this=0x8059e40, op_ret=0, op_errno=1)
-@end example
-@end cartouche
-
-@node Usage Scenarios
-@chapter Usage Scenarios
-
-@section Advanced Striping
-
-This section is based on the Advanced Striping tutorial written by
-Anand Avati on the GlusterFS wiki
-@footnote{http://gluster.org/docs/index.php/Mixing_Striped_and_Regular_Files}.
-
-@subsection Mixed Storage Requirements
-
-There are two ways of scheduling the I/O. One at file level (using
-unify translator) and other at block level (using stripe
-translator). Striped I/O is good for files that are potentially large
-and require high parallel throughput (for example, a single file of
-400GB being accessed by 100s and 1000s of systems simultaneously and
-randomly). For most of the cases, file level scheduling works best.
-
-In the real world, it is desirable to mix file level and block level
-scheduling on a single storage volume. Alternatively users can choose
-to have two separate volumes and hence two mount points, but the
-applications may demand a single storage system to host both.
-
-This document explains how to mix file level scheduling with stripe.
-
-@subsection Configuration Brief
-
-This setup demonstrates how users can configure unify translator with
-appropriate I/O scheduler for file level scheduling and strip for only
-matching patterns. This way, GlusterFS chooses appropriate I/O profile
-and knows how to efficiently handle both the types of data.
-
-A simple technique to achieve this effect is to create a stripe set of
-unify and stripe blocks, where unify is the first sub-volume. Files
-that do not match the stripe policy passed on to first unify
-sub-volume and inturn scheduled arcoss the cluster using its file
-level I/O scheduler.
-
-@image{advanced-stripe,44pc,,,.pdf}
-
-@subsection Preparing GlusterFS Envoronment
-
-Create the directories /export/namespace, /export/unify and
-/export/stripe on all the storage bricks.
-
- Place the following server and client volume spec file under
-/etc/glusterfs (or appropriate installed path) and replace the IP
-addresses / access control fields to match your environment.
-
-@cartouche
-@example
- ## file: /etc/glusterfs/glusterfsd.vol
- volume posix-unify
- type storage/posix
- option directory /export/for-unify
- end-volume
-
- volume posix-stripe
- type storage/posix
- option directory /export/for-stripe
- end-volume
-
- volume posix-namespace
- type storage/posix
- option directory /export/for-namespace
- end-volume
-
- volume server
- type protocol/server
- option transport-type tcp
- option auth.addr.posix-unify.allow 192.168.1.*
- option auth.addr.posix-stripe.allow 192.168.1.*
- option auth.addr.posix-namespace.allow 192.168.1.*
- subvolumes posix-unify posix-stripe posix-namespace
- end-volume
-@end example
-@end cartouche
-
-@cartouche
-@example
- ## file: /etc/glusterfs/glusterfs.vol
- volume client-namespace
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.1
- option remote-subvolume posix-namespace
- end-volume
-
- volume client-unify-1
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.1
- option remote-subvolume posix-unify
- end-volume
-
- volume client-unify-2
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.2
- option remote-subvolume posix-unify
- end-volume
-
- volume client-unify-3
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.3
- option remote-subvolume posix-unify
- end-volume
-
- volume client-unify-4
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.4
- option remote-subvolume posix-unify
- end-volume
-
- volume client-stripe-1
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.1
- option remote-subvolume posix-stripe
- end-volume
-
- volume client-stripe-2
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.2
- option remote-subvolume posix-stripe
- end-volume
-
- volume client-stripe-3
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.3
- option remote-subvolume posix-stripe
- end-volume
-
- volume client-stripe-4
- type protocol/client
- option transport-type tcp
- option remote-host 192.168.1.4
- option remote-subvolume posix-stripe
- end-volume
-
- volume unify
- type cluster/unify
- option scheduler rr
- subvolumes cluster-unify-1 cluster-unify-2 cluster-unify-3 cluster-unify-4
- end-volume
-
- volume stripe
- type cluster/stripe
- option block-size *.img:2MB # All files ending with .img are striped with 2MB stripe block size.
- subvolumes unify cluster-stripe-1 cluster-stripe-2 cluster-stripe-3 cluster-stripe-4
- end-volume
-@end example
-@end cartouche
-
-
-Bring up the Storage
-
-Starting GlusterFS Server: If you have installed through binary
-package, you can start the service through init.d startup script. If
-not:
-
-@example
-[root@@server]# glusterfsd
-@end example
-
-Mounting GlusterFS Volumes:
-
-@example
-[root@@client]# glusterfs -s [BRICK-IP-ADDRESS] /mnt/cluster
-@end example
-
-Improving upon this Setup
-
-Infiniband Verbs RDMA transport is much faster than TCP/IP GigE
-transport.
-
-Use of performance translators such as read-ahead, write-behind,
-io-cache, io-threads, booster is recommended.
-
-Replace round-robin (rr) scheduler with ALU to handle more dynamic
-storage environments.
-
-@node Troubleshooting
-@chapter Troubleshooting
-
-This chapter is a general troubleshooting guide to GlusterFS. It lists
-common GlusterFS server and client error messages, debugging hints, and
-concludes with the suggested procedure to report bugs in GlusterFS.
-
-@section GlusterFS error messages
-
-@subsection Server errors
-
-@example
-glusterfsd: FATAL: could not open specfile:
-'/etc/glusterfs/glusterfsd.vol'
-@end example
-
-The GlusterFS server expects the volume specification file to be
-at @command{/etc/glusterfs/glusterfsd.vol}. The example
-specification file will be installed as
-@command{/etc/glusterfs/glusterfsd.vol.sample}. You need to edit
-it and rename it, or provide a different specification file using
-the @command{--spec-file} command line option (See @ref{Server}).
-
-@vskip 4ex
-
-@example
-gf_log_init: failed to open logfile "/usr/var/log/glusterfs/glusterfsd.log"
- (Permission denied)
-@end example
-
-You don't have permission to create files in the
-@command{/usr/var/log/glusterfs} directory. Make sure you are running
-GlusterFS as root. Alternatively, specify a different path for the log
-file using the @command{--log-file} option (See @ref{Server}).
-
-@subsection Client errors
-
-@example
-fusermount: failed to access mountpoint /mnt:
- Transport endpoint is not connected
-@end example
-
-A previous failed (or hung) mount of GlusterFS is preventing it from being
-mounted again in the same location. The fix is to do:
-
-@example
-# umount /mnt
-@end example
-
-and try mounting again.
-
-@vskip 4ex
-
-@strong{``Transport endpoint is not connected''.}
-
-If you get this error when you try a command such as @command{ls} or @command{cat},
-it means the GlusterFS mount did not succeed. Try running GlusterFS in @command{DEBUG}
-logging level and study the log messages to discover the cause.
-
-@vskip 4ex
-
-@strong{``Connect to server failed'', ``SERVER-ADDRESS: Connection refused''.}
-
-GluserFS Server is not running or dead. Check your network
-connections and firewall settings. To check if the server is reachable,
-try:
-
-@example
-telnet IP-ADDRESS 24007
-@end example
-
-If the server is accessible, your `telnet' command should connect and
-block. If not you will see an error message such as @command{telnet: Unable to
-connect to remote host: Connection refused}. 24007 is the default
-GlusterFS port. If you have changed it, then use the corresponding
-port instead.
-
-@vskip 4ex
-
-@example
-gf_log_init: failed to open logfile "/usr/var/log/glusterfs/glusterfs.log"
- (Permission denied)
-@end example
-
-You don't have permission to create files in the
-@command{/usr/var/log/glusterfs} directory. Make sure you are running
-GlusterFS as root. Alternatively, specify a different path for the log
-file using the @command{--log-file} option (See @ref{Client}).
-
-@section FUSE error messages
-@command{modprobe fuse} fails with: ``Unknown symbol in module, or unknown parameter''.
-@cindex Redhat Enterprise Linux
-
-If you are using fuse-2.6.x on Redhat Enterprise Linux Work Station 4
-and Advanced Server 4 with 2.6.9-42.ELlargesmp, 2.6.9-42.ELsmp,
-2.6.9-42.EL kernels and get this error while loading @acronym{FUSE} kernel
-module, you need to apply the following patch.
-
-For fuse-2.6.2:
-
-@indicateurl{http://ftp.gluster.com/pub/gluster/glusterfs/fuse/fuse-2.6.2-rhel-build.patch}
-
-For fuse-2.6.3:
-
-@indicateurl{http://ftp.gluster.com/pub/gluster/glusterfs/fuse/fuse-2.6.3-rhel-build.patch}
-
-@section AppArmour and GlusterFS
-@cindex AppArmour
-@cindex OpenSuSE
-Under OpenSuSE GNU/Linux, the AppArmour security feature does not
-allow GlusterFS to create temporary files or network socket
-connections even while running as root. You will see error messages
-like `Unable to open log file: Operation not permitted' or `Connection
-refused'. Disabling AppArmour using YaST or properly configuring
-AppArmour to recognize @command{glusterfsd} or @command{glusterfs}/@command{fusermount}
-should solve the problem.
-
-@section Reporting a bug
-
-If you encounter a bug in GlusterFS, please follow the below
-guidelines when you report it to the mailing list. Be sure to report
-it! User feedback is crucial to the health of the project and we value
-it highly.
-
-@subsection General instructions
-
-When running GlusterFS in a non-production environment, be sure to
-build it with the following command:
-
-@example
- $ make CFLAGS='-g -O0 -DDEBUG'
-@end example
-
-This includes debugging information which will be helpful in getting
-backtraces (see below) and also disable optimization. Enabling
-optimization can result in incorrect line numbers being reported to
-gdb.
-
-@subsection Volume specification files
-
-Attach all relevant server and client spec files you were using when
-you encountered the bug. Also tell us details of your setup, i.e., how
-many clients and how many servers.
-
-@subsection Log files
-
-Set the loglevel of your client and server programs to @acronym{DEBUG} (by
-passing the -L @acronym{DEBUG} option) and attach the log files with your bug
-report. Obviously, if only the client is failing (for example), you
-only need to send us the client log file.
-
-@subsection Backtrace
-
-If GlusterFS has encountered a segmentation fault or has crashed for
-some other reason, include the backtrace with the bug report. You can
-get the backtrace using the following procedure.
-
-Run the GlusterFS client or server inside gdb.
-
-@example
- $ gdb ./glusterfs
- (gdb) set args -f client.spec -N -l/path/to/log/file -LDEBUG /mnt/point
- (gdb) run
-@end example
-
-Now when the process segfaults, you can get the backtrace by typing:
-
-@example
- (gdb) bt
-@end example
-
-If the GlusterFS process has crashed and dumped a core file (you can
-find this in / if running as a daemon and in the current directory
-otherwise), you can do:
-
-@example
- $ gdb /path/to/glusterfs /path/to/core.<pid>
-@end example
-
-and then get the backtrace.
-
-If the GlusterFS server or client seems to be hung, then you can get
-the backtrace by attaching gdb to the process. First get the @command{PID} of
-the process (using ps), and then do:
-
-@example
- $ gdb ./glusterfs <pid>
-@end example
-
-Press Ctrl-C to interrupt the process and then generate the backtrace.
-
-@subsection Reproducing the bug
-
-If the bug is reproducible, please include the steps necessary to do
-so. If the bug is not reproducible, send us the bug report anyway.
-
-@subsection Other information
-
-If you think it is relevant, send us also the version of @acronym{FUSE} you're
-using, the kernel version, platform.
-
-@node GNU Free Documentation Licence
-@appendix GNU Free Documentation Licence
-@include fdl.texi
-
-@node Index
-@unnumbered Index
-@printindex cp
-
-@bye
diff --git a/doc/user-guide/legacy/xlator.odg b/doc/user-guide/legacy/xlator.odg
deleted file mode 100644
index 179a65f6e26..00000000000
--- a/doc/user-guide/legacy/xlator.odg
+++ /dev/null
Binary files differ
diff --git a/doc/user-guide/legacy/xlator.pdf b/doc/user-guide/legacy/xlator.pdf
deleted file mode 100644
index a07e14d67d2..00000000000
--- a/doc/user-guide/legacy/xlator.pdf
+++ /dev/null
Binary files differ
diff --git a/events/Makefile.am b/events/Makefile.am
new file mode 100644
index 00000000000..264bb742a80
--- /dev/null
+++ b/events/Makefile.am
@@ -0,0 +1,8 @@
+SUBDIRS = src tools
+EXTRA_DIST = eventskeygen.py
+noinst_PYTHON = eventskeygen.py
+
+if BUILD_EVENTS
+install-data-hook:
+ $(INSTALL) -d -m 755 $(DESTDIR)@GLUSTERD_WORKDIR@/events
+endif
diff --git a/events/eventskeygen.py b/events/eventskeygen.py
new file mode 100644
index 00000000000..e28ebe9b7e6
--- /dev/null
+++ b/events/eventskeygen.py
@@ -0,0 +1,243 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import os
+import sys
+
+GLUSTER_SRC_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+eventtypes_h = os.path.join(GLUSTER_SRC_ROOT, "libglusterfs/src/eventtypes.h")
+eventtypes_py = os.path.join(GLUSTER_SRC_ROOT, "events/src/eventtypes.py")
+
+gen_header_type = sys.argv[1]
+
+# When adding new keys add it to the END
+keys = (
+ # user driven events
+ #peer and volume management events
+ "EVENT_PEER_ATTACH",
+ "EVENT_PEER_DETACH",
+ "EVENT_VOLUME_CREATE",
+ "EVENT_VOLUME_START",
+ "EVENT_VOLUME_STOP",
+ "EVENT_VOLUME_DELETE",
+ "EVENT_VOLUME_SET",
+ "EVENT_VOLUME_RESET",
+ "EVENT_BRICK_RESET_START",
+ "EVENT_BRICK_RESET_COMMIT",
+ "EVENT_BRICK_REPLACE",
+
+ #geo-rep events
+ "EVENT_GEOREP_CREATE",
+ "EVENT_GEOREP_START",
+ "EVENT_GEOREP_STOP",
+ "EVENT_GEOREP_PAUSE",
+ "EVENT_GEOREP_RESUME",
+ "EVENT_GEOREP_DELETE",
+ "EVENT_GEOREP_CONFIG_SET",
+ "EVENT_GEOREP_CONFIG_RESET",
+
+ #bitrot events
+ "EVENT_BITROT_ENABLE",
+ "EVENT_BITROT_DISABLE",
+ "EVENT_BITROT_SCRUB_THROTTLE",
+ "EVENT_BITROT_SCRUB_FREQ",
+ "EVENT_BITROT_SCRUB_OPTION",
+ "EVENT_BITROT_SCRUB_ONDEMAND",
+
+ #quota events
+ "EVENT_QUOTA_ENABLE",
+ "EVENT_QUOTA_DISABLE",
+ "EVENT_QUOTA_SET_USAGE_LIMIT",
+ "EVENT_QUOTA_SET_OBJECTS_LIMIT",
+ "EVENT_QUOTA_REMOVE_USAGE_LIMIT",
+ "EVENT_QUOTA_REMOVE_OBJECTS_LIMIT",
+ "EVENT_QUOTA_ALERT_TIME",
+ "EVENT_QUOTA_SOFT_TIMEOUT",
+ "EVENT_QUOTA_HARD_TIMEOUT",
+ "EVENT_QUOTA_DEFAULT_SOFT_LIMIT",
+
+ #snapshot events
+ "EVENT_SNAPSHOT_CREATED",
+ "EVENT_SNAPSHOT_CREATE_FAILED",
+ "EVENT_SNAPSHOT_ACTIVATED",
+ "EVENT_SNAPSHOT_ACTIVATE_FAILED",
+ "EVENT_SNAPSHOT_DEACTIVATED",
+ "EVENT_SNAPSHOT_DEACTIVATE_FAILED",
+ "EVENT_SNAPSHOT_SOFT_LIMIT_REACHED",
+ "EVENT_SNAPSHOT_HARD_LIMIT_REACHED",
+ "EVENT_SNAPSHOT_RESTORED",
+ "EVENT_SNAPSHOT_RESTORE_FAILED",
+ "EVENT_SNAPSHOT_DELETED",
+ "EVENT_SNAPSHOT_DELETE_FAILED",
+ "EVENT_SNAPSHOT_CLONED",
+ "EVENT_SNAPSHOT_CLONE_FAILED",
+ "EVENT_SNAPSHOT_CONFIG_UPDATED",
+ "EVENT_SNAPSHOT_CONFIG_UPDATE_FAILED",
+ "EVENT_SNAPSHOT_SCHEDULER_INITIALISED",
+ "EVENT_SNAPSHOT_SCHEDULER_INIT_FAILED",
+ "EVENT_SNAPSHOT_SCHEDULER_ENABLED",
+ "EVENT_SNAPSHOT_SCHEDULER_ENABLE_FAILED",
+ "EVENT_SNAPSHOT_SCHEDULER_DISABLED",
+ "EVENT_SNAPSHOT_SCHEDULER_DISABLE_FAILED",
+ "EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_ADDED",
+ "EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_ADD_FAILED",
+ "EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_EDITED",
+ "EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_EDIT_FAILED",
+ "EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_DELETED",
+ "EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_DELETE_FAILED",
+
+ #async events
+ #glusterd events
+ "EVENT_SVC_MANAGER_FAILED",
+ "EVENT_SVC_RECONFIGURE_FAILED",
+ "EVENT_SVC_CONNECTED",
+ "EVENT_SVC_DISCONNECTED",
+ "EVENT_PEER_STORE_FAILURE",
+ "EVENT_PEER_RPC_CREATE_FAILED",
+ "EVENT_PEER_REJECT",
+ "EVENT_PEER_CONNECT",
+ "EVENT_PEER_DISCONNECT",
+ "EVENT_PEER_NOT_FOUND",
+ "EVENT_UNKNOWN_PEER",
+ "EVENT_BRICK_START_FAILED",
+ "EVENT_BRICK_STOP_FAILED",
+ "EVENT_BRICK_DISCONNECTED",
+ "EVENT_BRICK_CONNECTED",
+ "EVENT_BRICKPATH_RESOLVE_FAILED",
+ "EVENT_NOTIFY_UNKNOWN_OP",
+ "EVENT_QUORUM_LOST",
+ "EVENT_QUORUM_REGAINED",
+ "EVENT_REBALANCE_START_FAILED",
+ "EVENT_REBALANCE_STATUS_UPDATE_FAILED",
+ "EVENT_IMPORT_QUOTA_CONF_FAILED",
+ "EVENT_IMPORT_VOLUME_FAILED",
+ "EVENT_IMPORT_BRICK_FAILED",
+ "EVENT_COMPARE_FRIEND_VOLUME_FAILED",
+ "EVENT_NFS_GANESHA_EXPORT_FAILED",
+ #ec events
+ "EVENT_EC_MIN_BRICKS_NOT_UP",
+ "EVENT_EC_MIN_BRICKS_UP",
+ #georep async events
+ "EVENT_GEOREP_FAULTY",
+ "EVENT_GEOREP_CHECKPOINT_COMPLETED",
+ "EVENT_GEOREP_ACTIVE",
+ "EVENT_GEOREP_PASSIVE",
+
+ #quota async events
+ "EVENT_QUOTA_CROSSED_SOFT_LIMIT",
+ #bitrot async events
+ "EVENT_BITROT_BAD_FILE",
+ #protocol-server events
+ "EVENT_CLIENT_CONNECT",
+ "EVENT_CLIENT_AUTH_REJECT",
+ "EVENT_CLIENT_DISCONNECT",
+ #posix events
+ "EVENT_POSIX_SAME_GFID",
+ "EVENT_POSIX_ALREADY_PART_OF_VOLUME",
+ "EVENT_POSIX_BRICK_NOT_IN_VOLUME",
+ "EVENT_POSIX_BRICK_VERIFICATION_FAILED",
+ "EVENT_POSIX_ACL_NOT_SUPPORTED",
+ "EVENT_POSIX_HEALTH_CHECK_FAILED",
+ #afr events
+ "EVENT_AFR_QUORUM_MET",
+ "EVENT_AFR_QUORUM_FAIL",
+ "EVENT_AFR_SUBVOL_UP",
+ "EVENT_AFR_SUBVOLS_DOWN",
+ "EVENT_AFR_SPLIT_BRAIN",
+
+ #tier events
+ "EVENT_TIER_ATTACH",
+ "EVENT_TIER_ATTACH_FORCE",
+ "EVENT_TIER_DETACH_START",
+ "EVENT_TIER_DETACH_STOP",
+ "EVENT_TIER_DETACH_COMMIT",
+ "EVENT_TIER_DETACH_FORCE",
+ "EVENT_TIER_PAUSE",
+ "EVENT_TIER_RESUME",
+ "EVENT_TIER_WATERMARK_HI",
+ "EVENT_TIER_WATERMARK_DROPPED_TO_MID",
+ "EVENT_TIER_WATERMARK_RAISED_TO_MID",
+ "EVENT_TIER_WATERMARK_DROPPED_TO_LOW",
+
+ #dht events
+ #add/remove brick events
+ "EVENT_VOLUME_ADD_BRICK",
+ "EVENT_VOLUME_ADD_BRICK_FAILED",
+ "EVENT_VOLUME_REMOVE_BRICK_START",
+ "EVENT_VOLUME_REMOVE_BRICK_START_FAILED",
+ "EVENT_VOLUME_REMOVE_BRICK_COMMIT",
+ "EVENT_VOLUME_REMOVE_BRICK_COMMIT_FAILED",
+ "EVENT_VOLUME_REMOVE_BRICK_STOP",
+ "EVENT_VOLUME_REMOVE_BRICK_STOP_FAILED",
+ "EVENT_VOLUME_REMOVE_BRICK_FORCE",
+ "EVENT_VOLUME_REMOVE_BRICK_FORCE_FAILED",
+ "EVENT_VOLUME_REMOVE_BRICK_FAILED",
+
+ #rebalance events
+ "EVENT_VOLUME_REBALANCE_START",
+ "EVENT_VOLUME_REBALANCE_STOP",
+ "EVENT_VOLUME_REBALANCE_FAILED",
+ "EVENT_VOLUME_REBALANCE_COMPLETE",
+
+ #tier events
+ "EVENT_TIER_START",
+ "EVENT_TIER_START_FORCE",
+
+ #brick/inodes events
+ "EVENT_DHT_DISK_USAGE",
+ "EVENT_DHT_INODES_USAGE",
+)
+
+LAST_EVENT = "EVENT_LAST"
+
+ERRORS = (
+ "EVENT_SEND_OK",
+ "EVENT_ERROR_INVALID_INPUTS",
+ "EVENT_ERROR_SOCKET",
+ "EVENT_ERROR_CONNECT",
+ "EVENT_ERROR_SEND",
+ "EVENT_ERROR_RESOLVE",
+ "EVENT_ERROR_MSG_FORMAT",
+)
+
+if gen_header_type == "C_HEADER":
+ # Generate eventtypes.h
+ with open(eventtypes_h, "w") as f:
+ f.write("#ifndef __EVENTTYPES_H__\n")
+ f.write("#define __EVENTTYPES_H__\n\n")
+ f.write("typedef enum {\n")
+ for k in ERRORS:
+ f.write(" {0},\n".format(k))
+ f.write("} event_errors_t;\n")
+
+ f.write("\n")
+
+ f.write("typedef enum {\n")
+ for k in keys:
+ f.write(" {0},\n".format(k))
+
+ f.write(" {0}\n".format(LAST_EVENT))
+ f.write("} eventtypes_t;\n")
+ f.write("\n#endif /* __EVENTTYPES_H__ */\n")
+
+if gen_header_type == "PY_HEADER":
+ # Generate eventtypes.py
+ with open(eventtypes_py, "w") as f:
+ f.write("# -*- coding: utf-8 -*-\n")
+ f.write("all_events = [\n")
+ for ev in keys:
+ f.write(' "{0}",\n'.format(ev))
+
+ f.write("]\n\n")
+
+ for idx, ev in enumerate(keys):
+ f.write("{0} = {1}\n".format(ev.replace("EVENT_", ""), idx))
diff --git a/events/src/Makefile.am b/events/src/Makefile.am
new file mode 100644
index 00000000000..3b229691897
--- /dev/null
+++ b/events/src/Makefile.am
@@ -0,0 +1,41 @@
+noinst_PYTHON = $(top_srcdir)/events/eventskeygen.py
+EXTRA_DIST = glustereventsd.py __init__.py eventsapiconf.py.in \
+ handlers.py utils.py peer_eventsapi.py eventsconfig.json gf_event.py
+
+BUILT_SOURCES = eventtypes.py
+CLEANFILES = eventtypes.py
+
+eventsdir = $(GLUSTERFS_LIBEXECDIR)/gfevents
+if BUILD_EVENTS
+events_PYTHON = __init__.py gf_event.py eventsapiconf.py eventtypes.py \
+ utils.py
+endif
+# this does not work, see the Makefile.am in the root for a workaround
+#nodist_events_PYTHON = eventtypes.py
+
+eventtypes.py: $(top_srcdir)/events/eventskeygen.py
+ $(PYTHON) $(top_srcdir)/events/eventskeygen.py PY_HEADER
+
+if BUILD_EVENTS
+eventspeerscriptdir = $(GLUSTERFS_LIBEXECDIR)
+eventsconfdir = $(sysconfdir)/glusterfs
+eventsconf_DATA = eventsconfig.json
+
+events_PYTHON += handlers.py
+events_SCRIPTS = glustereventsd.py
+eventspeerscript_SCRIPTS = peer_eventsapi.py
+
+install-exec-hook:
+ $(mkdir_p) $(DESTDIR)$(sbindir)
+ rm -f $(DESTDIR)$(sbindir)/glustereventsd
+ ln -s $(GLUSTERFS_LIBEXECDIR)/gfevents/glustereventsd.py \
+ $(DESTDIR)$(sbindir)/glustereventsd
+ rm -f $(DESTDIR)$(sbindir)/gluster-eventsapi
+ ln -s $(GLUSTERFS_LIBEXECDIR)/peer_eventsapi.py \
+ $(DESTDIR)$(sbindir)/gluster-eventsapi
+
+uninstall-hook:
+ rm -f $(DESTDIR)$(sbindir)/glustereventsd
+ rm -f $(DESTDIR)$(sbindir)/gluster-eventsapi
+
+endif
diff --git a/events/src/__init__.py b/events/src/__init__.py
new file mode 100644
index 00000000000..f27c53a4df4
--- /dev/null
+++ b/events/src/__init__.py
@@ -0,0 +1,10 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
diff --git a/events/src/eventsapiconf.py.in b/events/src/eventsapiconf.py.in
new file mode 100644
index 00000000000..700093bee60
--- /dev/null
+++ b/events/src/eventsapiconf.py.in
@@ -0,0 +1,59 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import subprocess
+glusterd_workdir = None
+
+# Methods
+def get_glusterd_workdir():
+ global glusterd_workdir
+ if glusterd_workdir is not None:
+ return glusterd_workdir
+ proc = subprocess.Popen(["gluster", "system::", "getwd"],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ universal_newlines = True)
+ out, err = proc.communicate()
+ if proc.returncode == 0:
+ glusterd_workdir = out.strip()
+ else:
+ glusterd_workdir = "@GLUSTERD_WORKDIR@"
+ return glusterd_workdir
+
+SERVER_ADDRESS = "0.0.0.0"
+SERVER_ADDRESSv4 = "0.0.0.0"
+SERVER_ADDRESSv6 = "::1"
+DEFAULT_CONFIG_FILE = "@SYSCONF_DIR@/glusterfs/eventsconfig.json"
+CUSTOM_CONFIG_FILE_TO_SYNC = "/events/config.json"
+CUSTOM_CONFIG_FILE = get_glusterd_workdir() + CUSTOM_CONFIG_FILE_TO_SYNC
+WEBHOOKS_FILE_TO_SYNC = "/events/webhooks.json"
+WEBHOOKS_FILE = get_glusterd_workdir() + WEBHOOKS_FILE_TO_SYNC
+LOG_FILE = "@localstatedir@/log/glusterfs/events.log"
+EVENTSD = "glustereventsd"
+CONFIG_KEYS = ["log-level", "port", "disable-events-log"]
+BOOL_CONFIGS = ["disable-events-log"]
+INT_CONFIGS = ["port"]
+RESTART_CONFIGS = ["port"]
+EVENTS_ENABLED = @EVENTS_ENABLED@
+UUID_FILE = get_glusterd_workdir() + "/glusterd.info"
+PID_FILE = "@localstatedir@/run/glustereventsd.pid"
+AUTO_BOOL_ATTRIBUTES = ["force", "push-pem", "no-verify"]
+AUTO_INT_ATTRIBUTES = ["ssh-port"]
+CERTS_DIR = get_glusterd_workdir() + "/events"
+
+# Errors
+ERROR_SAME_CONFIG = 2
+ERROR_ALL_NODES_STATUS_NOT_OK = 3
+ERROR_PARTIAL_SUCCESS = 4
+ERROR_WEBHOOK_ALREADY_EXISTS = 5
+ERROR_WEBHOOK_NOT_EXISTS = 6
+ERROR_INVALID_CONFIG = 7
+ERROR_WEBHOOK_SYNC_FAILED = 8
+ERROR_CONFIG_SYNC_FAILED = 9
diff --git a/events/src/eventsconfig.json b/events/src/eventsconfig.json
new file mode 100644
index 00000000000..89e5b9c1d68
--- /dev/null
+++ b/events/src/eventsconfig.json
@@ -0,0 +1,5 @@
+{
+ "log-level": "INFO",
+ "port": 24009,
+ "disable-events-log": false
+}
diff --git a/events/src/gf_event.py b/events/src/gf_event.py
new file mode 100644
index 00000000000..260b0d9aa48
--- /dev/null
+++ b/events/src/gf_event.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import socket
+import time
+
+from gfevents.eventsapiconf import SERVER_ADDRESS, EVENTS_ENABLED
+from gfevents.eventtypes import all_events
+
+from gfevents.utils import logger, setup_logger, get_config
+
+# Run this when this lib loads
+setup_logger()
+
+
+def gf_event(event_type, **kwargs):
+ if EVENTS_ENABLED == 0:
+ return
+
+ if not isinstance(event_type, int) or event_type >= len(all_events):
+ logger.error("Invalid Event Type: {0}".format(event_type))
+ return
+
+ try:
+ client = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
+ except socket.error as e:
+ logger.error("Unable to connect to events Server: {0}".format(e))
+ return
+
+ port = get_config("port")
+ if port is None:
+ logger.error("Unable to get eventsd port details")
+ return
+
+ # Convert key value args into KEY1=VALUE1;KEY2=VALUE2;..
+ msg = ""
+ for k, v in kwargs.items():
+ msg += "{0}={1};".format(k, v)
+
+ # <TIMESTAMP> <EVENT_TYPE> <MSG>
+ msg = "{0} {1} {2}".format(int(time.time()), event_type, msg.strip(";")).encode()
+
+ try:
+ sent = client.sendto(msg, (SERVER_ADDRESS, port))
+ assert sent == len(msg)
+ except socket.error as e:
+ logger.error("Unable to Send message: {0}".format(e))
+ except AssertionError:
+ logger.error("Unable to send message. Sent: {0}, Actual: {1}".format(
+ sent, len(msg)))
+ finally:
+ client.close()
diff --git a/events/src/glustereventsd.py b/events/src/glustereventsd.py
new file mode 100644
index 00000000000..341a3b60947
--- /dev/null
+++ b/events/src/glustereventsd.py
@@ -0,0 +1,159 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from __future__ import print_function
+import sys
+import signal
+import threading
+try:
+ import socketserver
+except ImportError:
+ import SocketServer as socketserver
+import socket
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+
+from eventtypes import all_events
+import handlers
+import utils
+from eventsapiconf import SERVER_ADDRESSv4, SERVER_ADDRESSv6, PID_FILE
+from eventsapiconf import AUTO_BOOL_ATTRIBUTES, AUTO_INT_ATTRIBUTES
+from utils import logger, PidFile, PidFileLockFailed, boolify
+
+# Subclass so that specifically IPv4 packets are captured
+class UDPServerv4(socketserver.ThreadingUDPServer):
+ address_family = socket.AF_INET
+
+# Subclass so that specifically IPv6 packets are captured
+class UDPServerv6(socketserver.ThreadingUDPServer):
+ address_family = socket.AF_INET6
+
+class GlusterEventsRequestHandler(socketserver.BaseRequestHandler):
+
+ def handle(self):
+ data = self.request[0].strip()
+ if sys.version_info >= (3,):
+ data = self.request[0].strip().decode("utf-8")
+
+ logger.debug("EVENT: {0} from {1}".format(repr(data),
+ self.client_address[0]))
+ try:
+ # Event Format <TIMESTAMP> <TYPE> <DETAIL>
+ ts, key, value = data.split(" ", 2)
+ except ValueError:
+ logger.warn("Invalid Event Format {0}".format(data))
+ return
+
+ data_dict = {}
+ try:
+ # Format key=value;key=value
+ data_dict = dict(x.split('=') for x in value.split(';'))
+ except ValueError:
+ logger.warn("Unable to parse Event {0}".format(data))
+ return
+
+ for k, v in data_dict.items():
+ try:
+ if k in AUTO_BOOL_ATTRIBUTES:
+ data_dict[k] = boolify(v)
+ if k in AUTO_INT_ATTRIBUTES:
+ data_dict[k] = int(v)
+ except ValueError:
+ # Auto Conversion failed, Retain the old value
+ continue
+
+ try:
+ # Event Type to Function Map, Received event data will be in
+ # the form <TIMESTAMP> <TYPE> <DETAIL>, Get Event name for the
+ # received Type/Key and construct a function name starting with
+ # handle_ For example: handle_event_volume_create
+ func_name = "handle_" + all_events[int(key)].lower()
+ except IndexError:
+ # This type of Event is not handled?
+ logger.warn("Unhandled Event: {0}".format(key))
+ func_name = None
+
+ if func_name is not None:
+ # Get function from handlers module
+ func = getattr(handlers, func_name, None)
+ # If func is None, then handler unimplemented for that event.
+ if func is not None:
+ func(ts, int(key), data_dict)
+ else:
+ # Generic handler, broadcast whatever received
+ handlers.generic_handler(ts, int(key), data_dict)
+
+
+def signal_handler_sigusr2(sig, frame):
+ utils.load_all()
+ utils.restart_webhook_pool()
+
+
+def UDP_server_thread(sock):
+ sock.serve_forever()
+
+
+def init_event_server():
+ utils.setup_logger()
+ utils.load_all()
+ utils.init_webhook_pool()
+
+ port = utils.get_config("port")
+ if port is None:
+ sys.stderr.write("Unable to get Port details from Config\n")
+ sys.exit(1)
+
+ # Creating the Eventing Server, UDP Server for IPv4 packets
+ try:
+ serverv4 = UDPServerv4((SERVER_ADDRESSv4, port),
+ GlusterEventsRequestHandler)
+ except socket.error as e:
+ sys.stderr.write("Failed to start Eventsd for IPv4: {0}\n".format(e))
+ sys.exit(1)
+ # Creating the Eventing Server, UDP Server for IPv6 packets
+ try:
+ serverv6 = UDPServerv6((SERVER_ADDRESSv6, port),
+ GlusterEventsRequestHandler)
+ except socket.error as e:
+ sys.stderr.write("Failed to start Eventsd for IPv6: {0}\n".format(e))
+ sys.exit(1)
+ server_thread1 = threading.Thread(target=UDP_server_thread,
+ args=(serverv4,))
+ server_thread2 = threading.Thread(target=UDP_server_thread,
+ args=(serverv6,))
+ server_thread1.start()
+ server_thread2.start()
+
+
+def get_args():
+ parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+ description=__doc__)
+ parser.add_argument("-p", "--pid-file", help="PID File",
+ default=PID_FILE)
+
+ return parser.parse_args()
+
+
+def main():
+ args = get_args()
+ try:
+ with PidFile(args.pid_file):
+ init_event_server()
+ except PidFileLockFailed as e:
+ sys.stderr.write("Failed to get lock for pid file({0}): {1}".format(
+ args.pid_file, e))
+ except KeyboardInterrupt:
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ signal.signal(signal.SIGUSR2, signal_handler_sigusr2)
+ main()
diff --git a/events/src/handlers.py b/events/src/handlers.py
new file mode 100644
index 00000000000..7746d488bf3
--- /dev/null
+++ b/events/src/handlers.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import utils
+
+
+def generic_handler(ts, key, data):
+ """
+ Generic handler to broadcast message to all peers, custom handlers
+ can be created by func name handler_<event_name>
+ Ex: handle_event_volume_create(ts, key, data)
+ """
+ utils.publish(ts, key, data)
+
+
+def handle_event_volume_set(ts, key, data):
+ """
+ Received data will have all the options as one string, split into
+ list of options. "key1,value1,key2,value2" into
+ [[key1, value1], [key2, value2]]
+ """
+ opts = data.get("options", "").strip(",").split(",")
+ data["options"] = []
+ for i, opt in enumerate(opts):
+ if i % 2 == 0:
+ # Add new array with key
+ data["options"].append([opt])
+ else:
+ # Add to the last added array
+ data["options"][-1].append(opt)
+
+ utils.publish(ts, key, data)
diff --git a/events/src/peer_eventsapi.py b/events/src/peer_eventsapi.py
new file mode 100644
index 00000000000..4d2e5f35b1c
--- /dev/null
+++ b/events/src/peer_eventsapi.py
@@ -0,0 +1,669 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from __future__ import print_function
+import os
+import json
+from errno import EEXIST
+import fcntl
+from errno import EACCES, EAGAIN
+import signal
+import sys
+import time
+
+import requests
+from prettytable import PrettyTable
+
+from gluster.cliutils import (Cmd, node_output_ok, node_output_notok,
+ sync_file_to_peers, GlusterCmdException,
+ output_error, execute_in_peers, runcli,
+ set_common_args_func)
+from gfevents.utils import LockedOpen, get_jwt_token, save_https_cert
+
+from gfevents.eventsapiconf import (WEBHOOKS_FILE_TO_SYNC,
+ WEBHOOKS_FILE,
+ DEFAULT_CONFIG_FILE,
+ CUSTOM_CONFIG_FILE,
+ CUSTOM_CONFIG_FILE_TO_SYNC,
+ EVENTSD,
+ CONFIG_KEYS,
+ BOOL_CONFIGS,
+ INT_CONFIGS,
+ PID_FILE,
+ RESTART_CONFIGS,
+ ERROR_INVALID_CONFIG,
+ ERROR_WEBHOOK_NOT_EXISTS,
+ ERROR_CONFIG_SYNC_FAILED,
+ ERROR_WEBHOOK_ALREADY_EXISTS,
+ ERROR_PARTIAL_SUCCESS,
+ ERROR_ALL_NODES_STATUS_NOT_OK,
+ ERROR_SAME_CONFIG,
+ ERROR_WEBHOOK_SYNC_FAILED,
+ CERTS_DIR)
+
+
+def handle_output_error(err, errcode=1, json_output=False):
+ if json_output:
+ print (json.dumps({
+ "output": "",
+ "error": err
+ }))
+ sys.exit(errcode)
+ else:
+ output_error(err, errcode)
+
+
+def file_content_overwrite(fname, data):
+ with open(fname + ".tmp", "w") as f:
+ f.write(json.dumps(data))
+
+ os.rename(fname + ".tmp", fname)
+
+
+def create_custom_config_file_if_not_exists(args):
+ try:
+ config_dir = os.path.dirname(CUSTOM_CONFIG_FILE)
+ mkdirp(config_dir)
+ except OSError as e:
+ handle_output_error("Failed to create dir %s: %s" % (config_dir, e),
+ json_output=args.json)
+
+ if not os.path.exists(CUSTOM_CONFIG_FILE):
+ with open(CUSTOM_CONFIG_FILE, "w") as f:
+ f.write("{}")
+
+
+def create_webhooks_file_if_not_exists(args):
+ try:
+ webhooks_dir = os.path.dirname(WEBHOOKS_FILE)
+ mkdirp(webhooks_dir)
+ except OSError as e:
+ handle_output_error("Failed to create dir %s: %s" % (webhooks_dir, e),
+ json_output=args.json)
+
+ if not os.path.exists(WEBHOOKS_FILE):
+ with open(WEBHOOKS_FILE, "w") as f:
+ f.write("{}")
+
+
+def boolify(value):
+ val = False
+ if value.lower() in ["enabled", "true", "on", "yes"]:
+ val = True
+ return val
+
+
+def mkdirp(path, exit_on_err=False, logger=None):
+ """
+ Try creating required directory structure
+ ignore EEXIST and raise exception for rest of the errors.
+ Print error in stderr and exit
+ """
+ try:
+ os.makedirs(path)
+ except OSError as e:
+ if e.errno != EEXIST or not os.path.isdir(path):
+ raise
+
+
+def is_active():
+ state = False
+ try:
+ with open(PID_FILE, "a+") as f:
+ fcntl.flock(f.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+ state = False
+ except (IOError, OSError) as e:
+ if e.errno in (EACCES, EAGAIN):
+ # cannot grab. so, process still running..move on
+ state = True
+ else:
+ state = False
+ return state
+
+
+def reload_service():
+ pid = None
+ if is_active():
+ with open(PID_FILE) as f:
+ try:
+ pid = int(f.read().strip())
+ except ValueError:
+ pid = None
+ if pid is not None:
+ os.kill(pid, signal.SIGUSR2)
+
+ return (0, "", "")
+
+
+def rows_to_json(json_out, column_name, rows):
+ num_ok_rows = 0
+ for row in rows:
+ num_ok_rows += 1 if row.ok else 0
+ json_out.append({
+ "node": row.hostname,
+ "node_status": "UP" if row.node_up else "DOWN",
+ column_name: "OK" if row.ok else "NOT OK",
+ "error": row.error
+ })
+ return num_ok_rows
+
+
+def rows_to_table(table, rows):
+ num_ok_rows = 0
+ for row in rows:
+ num_ok_rows += 1 if row.ok else 0
+ table.add_row([row.hostname,
+ "UP" if row.node_up else "DOWN",
+ "OK" if row.ok else "NOT OK: {0}".format(
+ row.error)])
+ return num_ok_rows
+
+
+def sync_to_peers(args):
+ if os.path.exists(WEBHOOKS_FILE):
+ try:
+ sync_file_to_peers(WEBHOOKS_FILE_TO_SYNC)
+ except GlusterCmdException as e:
+ # Print stdout if stderr is empty
+ errmsg = e.message[2] if e.message[2] else e.message[1]
+ handle_output_error("Failed to sync Webhooks file: [Error: {0}]"
+ "{1}".format(e.message[0], errmsg),
+ errcode=ERROR_WEBHOOK_SYNC_FAILED,
+ json_output=args.json)
+
+ if os.path.exists(CUSTOM_CONFIG_FILE):
+ try:
+ sync_file_to_peers(CUSTOM_CONFIG_FILE_TO_SYNC)
+ except GlusterCmdException as e:
+ # Print stdout if stderr is empty
+ errmsg = e.message[2] if e.message[2] else e.message[1]
+ handle_output_error("Failed to sync Config file: [Error: {0}]"
+ "{1}".format(e.message[0], errmsg),
+ errcode=ERROR_CONFIG_SYNC_FAILED,
+ json_output=args.json)
+
+ out = execute_in_peers("node-reload")
+ if not args.json:
+ table = PrettyTable(["NODE", "NODE STATUS", "SYNC STATUS"])
+ table.align["NODE STATUS"] = "r"
+ table.align["SYNC STATUS"] = "r"
+
+ json_out = []
+ if args.json:
+ num_ok_rows = rows_to_json(json_out, "sync_status", out)
+ else:
+ num_ok_rows = rows_to_table(table, out)
+
+ ret = 0
+ if num_ok_rows == 0:
+ ret = ERROR_ALL_NODES_STATUS_NOT_OK
+ elif num_ok_rows != len(out):
+ ret = ERROR_PARTIAL_SUCCESS
+
+ if args.json:
+ print (json.dumps({
+ "output": json_out,
+ "error": ""
+ }))
+ else:
+ print (table)
+
+ # If sync status is not ok for any node set error code as partial success
+ sys.exit(ret)
+
+
+def node_output_handle(resp):
+ rc, out, err = resp
+ if rc == 0:
+ node_output_ok(out)
+ else:
+ node_output_notok(err)
+
+
+def action_handle(action, json_output=False):
+ out = execute_in_peers("node-" + action)
+ column_name = action.upper()
+ if action == "status":
+ column_name = EVENTSD.upper()
+
+ if not json_output:
+ table = PrettyTable(["NODE", "NODE STATUS", column_name + " STATUS"])
+ table.align["NODE STATUS"] = "r"
+ table.align[column_name + " STATUS"] = "r"
+
+ json_out = []
+ if json_output:
+ rows_to_json(json_out, column_name.lower() + "_status", out)
+ else:
+ rows_to_table(table, out)
+
+ return json_out if json_output else table
+
+
+class NodeReload(Cmd):
+ name = "node-reload"
+
+ def run(self, args):
+ node_output_handle(reload_service())
+
+
+class ReloadCmd(Cmd):
+ name = "reload"
+
+ def run(self, args):
+ out = action_handle("reload", args.json)
+ if args.json:
+ print (json.dumps({
+ "output": out,
+ "error": ""
+ }))
+ else:
+ print (out)
+
+
+class NodeStatus(Cmd):
+ name = "node-status"
+
+ def run(self, args):
+ node_output_ok("UP" if is_active() else "DOWN")
+
+
+class StatusCmd(Cmd):
+ name = "status"
+
+ def run(self, args):
+ webhooks = {}
+ if os.path.exists(WEBHOOKS_FILE):
+ webhooks = json.load(open(WEBHOOKS_FILE))
+
+ json_out = {"webhooks": [], "data": []}
+ if args.json:
+ json_out["webhooks"] = webhooks.keys()
+ else:
+ print ("Webhooks: " + ("" if webhooks else "None"))
+ for w in webhooks:
+ print (w)
+
+ print ()
+
+ out = action_handle("status", args.json)
+ if args.json:
+ json_out["data"] = out
+ print (json.dumps({
+ "output": json_out,
+ "error": ""
+ }))
+ else:
+ print (out)
+
+
+class WebhookAddCmd(Cmd):
+ name = "webhook-add"
+
+ def args(self, parser):
+ parser.add_argument("url", help="URL of Webhook")
+ parser.add_argument("--bearer_token", "-t", help="Bearer Token",
+ default="")
+ parser.add_argument("--secret", "-s",
+ help="Secret to add JWT Bearer Token", default="")
+
+ def run(self, args):
+ create_webhooks_file_if_not_exists(args)
+
+ with LockedOpen(WEBHOOKS_FILE, 'r+'):
+ data = json.load(open(WEBHOOKS_FILE))
+ if data.get(args.url, None) is not None:
+ handle_output_error("Webhook already exists",
+ errcode=ERROR_WEBHOOK_ALREADY_EXISTS,
+ json_output=args.json)
+
+ data[args.url] = {"token": args.bearer_token,
+ "secret": args.secret}
+ file_content_overwrite(WEBHOOKS_FILE, data)
+
+ sync_to_peers(args)
+
+
+class WebhookModCmd(Cmd):
+ name = "webhook-mod"
+
+ def args(self, parser):
+ parser.add_argument("url", help="URL of Webhook")
+ parser.add_argument("--bearer_token", "-t", help="Bearer Token",
+ default="")
+ parser.add_argument("--secret", "-s",
+ help="Secret to add JWT Bearer Token", default="")
+
+ def run(self, args):
+ create_webhooks_file_if_not_exists(args)
+
+ with LockedOpen(WEBHOOKS_FILE, 'r+'):
+ data = json.load(open(WEBHOOKS_FILE))
+ if data.get(args.url, None) is None:
+ handle_output_error("Webhook does not exists",
+ errcode=ERROR_WEBHOOK_NOT_EXISTS,
+ json_output=args.json)
+
+ if isinstance(data[args.url], str):
+ data[args.url]["token"] = data[args.url]
+
+ if args.bearer_token != "":
+ data[args.url]["token"] = args.bearer_token
+
+ if args.secret != "":
+ data[args.url]["secret"] = args.secret
+
+ file_content_overwrite(WEBHOOKS_FILE, data)
+
+ sync_to_peers(args)
+
+
+class WebhookDelCmd(Cmd):
+ name = "webhook-del"
+
+ def args(self, parser):
+ parser.add_argument("url", help="URL of Webhook")
+
+ def run(self, args):
+ create_webhooks_file_if_not_exists(args)
+
+ with LockedOpen(WEBHOOKS_FILE, 'r+'):
+ data = json.load(open(WEBHOOKS_FILE))
+ if data.get(args.url, None) is None:
+ handle_output_error("Webhook does not exists",
+ errcode=ERROR_WEBHOOK_NOT_EXISTS,
+ json_output=args.json)
+
+ del data[args.url]
+ file_content_overwrite(WEBHOOKS_FILE, data)
+
+ sync_to_peers(args)
+
+
+class NodeWebhookTestCmd(Cmd):
+ name = "node-webhook-test"
+
+ def args(self, parser):
+ parser.add_argument("url")
+ parser.add_argument("bearer_token")
+ parser.add_argument("secret")
+
+ def run(self, args):
+ http_headers = {}
+ hashval = ""
+ if args.bearer_token != ".":
+ hashval = args.bearer_token
+
+ if args.secret != ".":
+ hashval = get_jwt_token(args.secret, "TEST", int(time.time()))
+
+ if hashval:
+ http_headers["Authorization"] = "Bearer " + hashval
+
+ urldata = requests.utils.urlparse(args.url)
+ parts = urldata.netloc.split(":")
+ domain = parts[0]
+ # Default https port if not specified
+ port = 443
+ if len(parts) == 2:
+ port = int(parts[1])
+
+ cert_path = os.path.join(CERTS_DIR, args.url.replace("/", "_").strip())
+ verify = True
+ while True:
+ try:
+ resp = requests.post(args.url, headers=http_headers,
+ verify=verify)
+ # Successful webhook push
+ break
+ except requests.exceptions.SSLError as e:
+ # If verify is equal to cert path, but still failed with
+ # SSLError, Looks like some issue with custom downloaded
+ # certificate, Try with verify = false
+ if verify == cert_path:
+ verify = False
+ continue
+
+ # If verify is instance of bool and True, then custom cert
+ # is required, download the cert and retry
+ try:
+ save_https_cert(domain, port, cert_path)
+ verify = cert_path
+ except Exception:
+ verify = False
+
+ # Done with collecting cert, continue
+ continue
+ except Exception as e:
+ node_output_notok("{0}".format(e))
+ break
+
+ if resp.status_code != 200:
+ node_output_notok("{0}".format(resp.status_code))
+
+ node_output_ok()
+
+
+class WebhookTestCmd(Cmd):
+ name = "webhook-test"
+
+ def args(self, parser):
+ parser.add_argument("url", help="URL of Webhook")
+ parser.add_argument("--bearer_token", "-t", help="Bearer Token")
+ parser.add_argument("--secret", "-s",
+ help="Secret to generate Bearer Token")
+
+ def run(self, args):
+ url = args.url
+ bearer_token = args.bearer_token
+ secret = args.secret
+
+ if not args.url:
+ url = "."
+ if not args.bearer_token:
+ bearer_token = "."
+ if not args.secret:
+ secret = "."
+
+ out = execute_in_peers("node-webhook-test", [url, bearer_token,
+ secret])
+
+ if not args.json:
+ table = PrettyTable(["NODE", "NODE STATUS", "WEBHOOK STATUS"])
+ table.align["NODE STATUS"] = "r"
+ table.align["WEBHOOK STATUS"] = "r"
+
+ num_ok_rows = 0
+ json_out = []
+ if args.json:
+ num_ok_rows = rows_to_json(json_out, "webhook_status", out)
+ else:
+ num_ok_rows = rows_to_table(table, out)
+
+ ret = 0
+ if num_ok_rows == 0:
+ ret = ERROR_ALL_NODES_STATUS_NOT_OK
+ elif num_ok_rows != len(out):
+ ret = ERROR_PARTIAL_SUCCESS
+
+ if args.json:
+ print (json.dumps({
+ "output": json_out,
+ "error": ""
+ }))
+ else:
+ print (table)
+
+ sys.exit(ret)
+
+
+class ConfigGetCmd(Cmd):
+ name = "config-get"
+
+ def args(self, parser):
+ parser.add_argument("--name", help="Config Name")
+
+ def run(self, args):
+ data = json.load(open(DEFAULT_CONFIG_FILE))
+ if os.path.exists(CUSTOM_CONFIG_FILE):
+ data.update(json.load(open(CUSTOM_CONFIG_FILE)))
+
+ if args.name is not None and args.name not in CONFIG_KEYS:
+ handle_output_error("Invalid Config item",
+ errcode=ERROR_INVALID_CONFIG,
+ json_output=args.json)
+
+ if args.json:
+ json_out = {}
+ if args.name is None:
+ json_out = data
+ else:
+ json_out[args.name] = data[args.name]
+
+ print (json.dumps({
+ "output": json_out,
+ "error": ""
+ }))
+ else:
+ table = PrettyTable(["NAME", "VALUE"])
+ if args.name is None:
+ for k, v in data.items():
+ table.add_row([k, v])
+ else:
+ table.add_row([args.name, data[args.name]])
+
+ print (table)
+
+
+def read_file_content_json(fname):
+ content = "{}"
+ with open(fname) as f:
+ content = f.read()
+ if content.strip() == "":
+ content = "{}"
+
+ return json.loads(content)
+
+
+class ConfigSetCmd(Cmd):
+ name = "config-set"
+
+ def args(self, parser):
+ parser.add_argument("name", help="Config Name")
+ parser.add_argument("value", help="Config Value")
+
+ def run(self, args):
+ if args.name not in CONFIG_KEYS:
+ handle_output_error("Invalid Config item",
+ errcode=ERROR_INVALID_CONFIG,
+ json_output=args.json)
+
+ create_custom_config_file_if_not_exists(args)
+
+ with LockedOpen(CUSTOM_CONFIG_FILE, 'r+'):
+ data = json.load(open(DEFAULT_CONFIG_FILE))
+ if os.path.exists(CUSTOM_CONFIG_FILE):
+ config_json = read_file_content_json(CUSTOM_CONFIG_FILE)
+ data.update(config_json)
+
+ # Do Nothing if same as previous value
+ if data[args.name] == args.value:
+ handle_output_error("Config value not changed. Same config",
+ errcode=ERROR_SAME_CONFIG,
+ json_output=args.json)
+
+ # TODO: Validate Value
+ new_data = read_file_content_json(CUSTOM_CONFIG_FILE)
+
+ v = args.value
+ if args.name in BOOL_CONFIGS:
+ v = boolify(args.value)
+
+ if args.name in INT_CONFIGS:
+ v = int(args.value)
+
+ new_data[args.name] = v
+ file_content_overwrite(CUSTOM_CONFIG_FILE, new_data)
+
+ # If any value changed which requires restart of REST server
+ restart = False
+ if args.name in RESTART_CONFIGS:
+ restart = True
+
+ if restart:
+ print ("\nRestart glustereventsd in all nodes")
+
+ sync_to_peers(args)
+
+
+class ConfigResetCmd(Cmd):
+ name = "config-reset"
+
+ def args(self, parser):
+ parser.add_argument("name", help="Config Name or all")
+
+ def run(self, args):
+ create_custom_config_file_if_not_exists(args)
+
+ with LockedOpen(CUSTOM_CONFIG_FILE, 'r+'):
+ changed_keys = []
+ data = {}
+ if os.path.exists(CUSTOM_CONFIG_FILE):
+ data = read_file_content_json(CUSTOM_CONFIG_FILE)
+
+ # If No data available in custom config or, the specific config
+ # item is not available in custom config
+ if not data or \
+ (args.name != "all" and data.get(args.name, None) is None):
+ handle_output_error("Config value not reset. Already "
+ "set to default value",
+ errcode=ERROR_SAME_CONFIG,
+ json_output=args.json)
+
+ if args.name.lower() == "all":
+ for k, v in data.items():
+ changed_keys.append(k)
+
+ # Reset all keys
+ file_content_overwrite(CUSTOM_CONFIG_FILE, {})
+ else:
+ changed_keys.append(args.name)
+ del data[args.name]
+ file_content_overwrite(CUSTOM_CONFIG_FILE, data)
+
+ # If any value changed which requires restart of REST server
+ restart = False
+ for key in changed_keys:
+ if key in RESTART_CONFIGS:
+ restart = True
+ break
+
+ if restart:
+ print ("\nRestart glustereventsd in all nodes")
+
+ sync_to_peers(args)
+
+
+class SyncCmd(Cmd):
+ name = "sync"
+
+ def run(self, args):
+ sync_to_peers(args)
+
+
+def common_args(parser):
+ parser.add_argument("--json", help="JSON Output", action="store_true")
+
+
+if __name__ == "__main__":
+ set_common_args_func(common_args)
+ runcli()
diff --git a/events/src/utils.py b/events/src/utils.py
new file mode 100644
index 00000000000..6d4e0791a2b
--- /dev/null
+++ b/events/src/utils.py
@@ -0,0 +1,445 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import sys
+import json
+import os
+import logging
+import logging.handlers
+import fcntl
+from errno import EBADF
+from threading import Thread
+import multiprocessing
+try:
+ from queue import Queue
+except ImportError:
+ from Queue import Queue
+from datetime import datetime, timedelta
+import base64
+import hmac
+from hashlib import sha256
+from calendar import timegm
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from gfevents.eventsapiconf import (LOG_FILE,
+ WEBHOOKS_FILE,
+ DEFAULT_CONFIG_FILE,
+ CUSTOM_CONFIG_FILE,
+ UUID_FILE,
+ CERTS_DIR)
+from gfevents import eventtypes
+
+
+# Webhooks list
+_webhooks = {}
+_webhooks_file_mtime = 0
+# Default Log Level
+_log_level = "INFO"
+# Config Object
+_config = {}
+
+# Init Logger instance
+logger = logging.getLogger(__name__)
+NodeID = None
+webhooks_pool = None
+
+
+def boolify(value):
+ value = str(value)
+ if value.lower() in ["1", "on", "true", "yes"]:
+ return True
+ else:
+ return False
+
+
+def log_event(data):
+ # Log all published events unless it is disabled
+ if not _config.get("disable-events-log", False):
+ logger.info(repr(data))
+
+
+def get_node_uuid():
+ val = None
+ with open(UUID_FILE) as f:
+ for line in f:
+ if line.startswith("UUID="):
+ val = line.strip().split("=")[-1]
+ break
+ return val
+
+
+def get_config(key, default_value=None):
+ if not _config:
+ load_config()
+ return _config.get(key, default_value)
+
+
+def get_event_type_name(idx):
+ """
+ Returns Event Type text from the index. For example, VOLUME_CREATE
+ """
+ return eventtypes.all_events[idx].replace("EVENT_", "")
+
+
+def setup_logger():
+ """
+ Logging initialization, Log level by default will be INFO, once config
+ file is read, respective log_level will be set.
+ """
+ global logger
+ logger.setLevel(logging.INFO)
+
+ # create the logging file handler
+ fh = logging.handlers.WatchedFileHandler(LOG_FILE)
+
+ formatter = logging.Formatter("[%(asctime)s] %(levelname)s "
+ "[%(module)s - %(lineno)s:%(funcName)s] "
+ "- %(message)s")
+
+ fh.setFormatter(formatter)
+
+ # add handler to logger object
+ logger.addHandler(fh)
+
+
+def load_config():
+ """
+ Load/Reload the config from REST Config files. This function will
+ be triggered during init and when SIGUSR2.
+ """
+ global _config
+ _config = {}
+ if os.path.exists(DEFAULT_CONFIG_FILE):
+ _config = json.load(open(DEFAULT_CONFIG_FILE))
+ if os.path.exists(CUSTOM_CONFIG_FILE):
+ _config.update(json.load(open(CUSTOM_CONFIG_FILE)))
+
+
+def load_log_level():
+ """
+ Reads log_level from Config file and sets accordingly. This function will
+ be triggered during init and when SIGUSR2.
+ """
+ global logger, _log_level
+ new_log_level = _config.get("log-level", "INFO")
+ if _log_level != new_log_level:
+ logger.setLevel(getattr(logging, new_log_level.upper()))
+ _log_level = new_log_level.upper()
+
+
+def load_webhooks():
+ """
+ Load/Reload the webhooks list. This function will
+ be triggered during init and when SIGUSR2.
+ """
+ global _webhooks, _webhooks_file_mtime
+ _webhooks = {}
+ if os.path.exists(WEBHOOKS_FILE):
+ _webhooks = json.load(open(WEBHOOKS_FILE))
+ st = os.lstat(WEBHOOKS_FILE)
+ _webhooks_file_mtime = st.st_mtime
+
+
+def load_all():
+ """
+ Wrapper function to call all load/reload functions. This function will
+ be triggered during init and when SIGUSR2.
+ """
+ load_config()
+ load_webhooks()
+ load_log_level()
+
+
+def publish(ts, event_key, data):
+ global NodeID
+ if NodeID is None:
+ NodeID = get_node_uuid()
+
+ autoload_webhooks()
+
+ message = {
+ "nodeid": NodeID,
+ "ts": int(ts),
+ "event": get_event_type_name(event_key),
+ "message": data
+ }
+
+ log_event(message)
+
+ if _webhooks:
+ plugin_webhook(message)
+ else:
+ # TODO: Default action?
+ pass
+
+
+def autoload_webhooks():
+ global _webhooks_file_mtime
+ try:
+ st = os.lstat(WEBHOOKS_FILE)
+ except OSError:
+ st = None
+
+ if st is not None:
+ # If Stat is available and mtime is not matching with
+ # previously recorded mtime, reload the webhooks file
+ if st.st_mtime != _webhooks_file_mtime:
+ load_webhooks()
+
+
+def base64_urlencode(inp):
+ return base64.urlsafe_b64encode(inp).replace("=", "").strip()
+
+
+def get_jwt_token(secret, event_type, event_ts, jwt_expiry_time_seconds=60):
+ exp = datetime.utcnow() + timedelta(seconds=jwt_expiry_time_seconds)
+ payload = {
+ "exp": timegm(exp.utctimetuple()),
+ "iss": "gluster",
+ "sub": event_type,
+ "iat": event_ts
+ }
+ header = '{"alg":"HS256","typ":"JWT"}'
+ payload = json.dumps(payload, separators=(',', ':'), sort_keys=True)
+ msg = base64_urlencode(header) + "." + base64_urlencode(payload)
+ return "%s.%s" % (
+ msg,
+ base64_urlencode(hmac.HMAC(str(secret), msg, sha256).digest())
+ )
+
+
+def save_https_cert(domain, port, cert_path):
+ import ssl
+
+ # Cert file already available for this URL
+ if os.path.exists(cert_path):
+ return
+
+ cert_data = ssl.get_server_certificate((domain, port))
+ with open(cert_path, "w") as f:
+ f.write(cert_data)
+
+
+def publish_to_webhook(url, token, secret, message_queue):
+ # Import requests here since not used in any other place
+ import requests
+
+ http_headers = {"Content-Type": "application/json"}
+ urldata = requests.utils.urlparse(url)
+ parts = urldata.netloc.split(":")
+ domain = parts[0]
+ # Default https port if not specified
+ port = 443
+ if len(parts) == 2:
+ port = int(parts[1])
+
+ cert_path = os.path.join(CERTS_DIR, url.replace("/", "_").strip())
+
+ while True:
+ hashval = ""
+ event_type, event_ts, message_json = message_queue.get()
+ if token != "" and token is not None:
+ hashval = token
+
+ if secret != "" and secret is not None:
+ hashval = get_jwt_token(secret, event_type, event_ts)
+
+ if hashval:
+ http_headers["Authorization"] = "Bearer " + hashval
+
+ verify = True
+ while True:
+ try:
+ resp = requests.post(url, headers=http_headers,
+ data=message_json,
+ verify=verify)
+ # Successful webhook push
+ message_queue.task_done()
+ if resp.status_code != 200:
+ logger.warn("Event push failed to URL: {url}, "
+ "Event: {event}, "
+ "Status Code: {status_code}".format(
+ url=url,
+ event=message_json,
+ status_code=resp.status_code))
+ break
+ except requests.exceptions.SSLError as e:
+ # If verify is equal to cert path, but still failed with
+ # SSLError, Looks like some issue with custom downloaded
+ # certificate, Try with verify = false
+ if verify == cert_path:
+ logger.warn("Event push failed with certificate, "
+ "ignoring verification url={0} "
+ "Error={1}".format(url, e))
+ verify = False
+ continue
+
+ # If verify is instance of bool and True, then custom cert
+ # is required, download the cert and retry
+ try:
+ save_https_cert(domain, port, cert_path)
+ verify = cert_path
+ except Exception as ex:
+ verify = False
+ logger.warn("Unable to get Server certificate, "
+ "ignoring verification url={0} "
+ "Error={1}".format(url, ex))
+
+ # Done with collecting cert, continue
+ continue
+ except Exception as e:
+ logger.warn("Event push failed to URL: {url}, "
+ "Event: {event}, "
+ "Status: {error}".format(
+ url=url,
+ event=message_json,
+ error=e))
+ message_queue.task_done()
+ break
+
+
+def plugin_webhook(message):
+ message_json = json.dumps(message, sort_keys=True)
+ logger.debug("EVENT: {0}".format(message_json))
+ webhooks_pool.send(message["event"], message["ts"], message_json)
+
+
+class LockedOpen(object):
+
+ def __init__(self, filename, *args, **kwargs):
+ self.filename = filename
+ self.open_args = args
+ self.open_kwargs = kwargs
+ self.fileobj = None
+
+ def __enter__(self):
+ """
+ If two processes compete to update a file, The first process
+ gets the lock and the second process is blocked in the fcntl.flock()
+ call. When first process replaces the file and releases the lock,
+ the already open file descriptor in the second process now points
+ to a "ghost" file(not reachable by any path name) with old contents.
+ To avoid that conflict, check the fd already opened is same or
+ not. Open new one if not same
+ """
+ f = open(self.filename, *self.open_args, **self.open_kwargs)
+ while True:
+ fcntl.flock(f, fcntl.LOCK_EX)
+ fnew = open(self.filename, *self.open_args, **self.open_kwargs)
+ if os.path.sameopenfile(f.fileno(), fnew.fileno()):
+ fnew.close()
+ break
+ else:
+ f.close()
+ f = fnew
+ self.fileobj = f
+ return f
+
+ def __exit__(self, _exc_type, _exc_value, _traceback):
+ self.fileobj.close()
+
+
+class PidFileLockFailed(Exception):
+ pass
+
+
+class PidFile(object):
+ def __init__(self, filename):
+ self.filename = filename
+ self.pid = os.getpid()
+ self.fh = None
+
+ def cleanup(self, remove_file=True):
+ try:
+ if self.fh is not None:
+ self.fh.close()
+ except IOError as exc:
+ if exc.errno != EBADF:
+ raise
+ finally:
+ if os.path.isfile(self.filename) and remove_file:
+ os.remove(self.filename)
+
+ def __enter__(self):
+ self.fh = open(self.filename, 'a+')
+ try:
+ fcntl.flock(self.fh.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
+ except IOError as exc:
+ self.cleanup(remove_file=False)
+ raise PidFileLockFailed(exc)
+
+ self.fh.seek(0)
+ self.fh.truncate()
+ self.fh.write("%d\n" % self.pid)
+ self.fh.flush()
+ self.fh.seek(0)
+ return self
+
+ def __exit__(self, _exc_type, _exc_value, _traceback):
+ self.cleanup()
+
+
+def webhook_monitor(proc_queue, webhooks):
+ queues = {}
+ for url, data in webhooks.items():
+ if isinstance(data, str):
+ token = data
+ secret = None
+ else:
+ token = data["token"]
+ secret = data["secret"]
+
+ queues[url] = Queue()
+ t = Thread(target=publish_to_webhook, args=(url, token, secret,
+ queues[url]))
+ t.start()
+
+ # Get the message sent to Process queue and distribute to all thread queues
+ while True:
+ message = proc_queue.get()
+ for _, q in queues.items():
+ q.put(message)
+
+
+class WebhookThreadPool(object):
+ def start(self):
+ # Separate process to emit messages to webhooks
+ # which maintains one thread per webhook. Separate
+ # process is required since on reload we need to stop
+ # and start the thread pool. In Python Threads can't be stopped
+ # so terminate the process and start again. Note: In transit
+ # events will be missed during reload
+ self.queue = multiprocessing.Queue()
+ self.proc = multiprocessing.Process(target=webhook_monitor,
+ args=(self.queue, _webhooks))
+ self.proc.start()
+
+ def restart(self):
+ # In transit messages are skipped, since webhooks monitor process
+ # is terminated.
+ self.proc.terminate()
+ self.start()
+
+ def send(self, event_type, event_ts, message):
+ self.queue.put((event_type, event_ts, message))
+
+
+def init_webhook_pool():
+ global webhooks_pool
+ webhooks_pool = WebhookThreadPool()
+ webhooks_pool.start()
+
+
+def restart_webhook_pool():
+ global webhooks_pool
+ if webhooks_pool is not None:
+ webhooks_pool.restart()
diff --git a/events/tools/Makefile.am b/events/tools/Makefile.am
new file mode 100644
index 00000000000..fb6770cf070
--- /dev/null
+++ b/events/tools/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_DIST = eventsdash.py
+
+if BUILD_EVENTS
+scriptsdir = $(datadir)/glusterfs/scripts
+scripts_SCRIPTS = eventsdash.py
+endif
diff --git a/events/tools/eventsdash.py b/events/tools/eventsdash.py
new file mode 100644
index 00000000000..6479ea59da6
--- /dev/null
+++ b/events/tools/eventsdash.py
@@ -0,0 +1,75 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from __future__ import print_function
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+import logging
+from datetime import datetime
+
+from flask import Flask, request
+
+app = Flask(__name__)
+app.logger.disabled = True
+log = logging.getLogger('werkzeug')
+log.disabled = True
+
+
+def human_time(ts):
+ return datetime.fromtimestamp(float(ts)).strftime("%Y-%m-%d %H:%M:%S")
+
+
+@app.route("/")
+def home():
+ return "OK"
+
+
+@app.route("/listen", methods=["POST"])
+def listen():
+ data = request.json
+ if data is None:
+ return "OK"
+
+ message = []
+ for k, v in data.get("message", {}).items():
+ message.append("{0}={1}".format(k, v))
+
+ print(("{0:20s} {1:20s} {2:36} {3}".format(
+ human_time(data.get("ts")),
+ data.get("event"),
+ data.get("nodeid"),
+ " ".join(message))))
+
+ return "OK"
+
+
+def main():
+ parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+ description=__doc__)
+ parser.add_argument("--port", type=int, help="Port", default=9000)
+ parser.add_argument("--debug", help="Run Server in debug mode",
+ action="store_true")
+ args = parser.parse_args()
+
+ print(("{0:20s} {1:20s} {2:36} {3}".format(
+ "TIMESTAMP", "EVENT", "NODE ID", "MESSAGE"
+ )))
+ print(("{0:20s} {1:20s} {2:36} {3}".format(
+ "-"*20, "-"*20, "-"*36, "-"*20
+ )))
+ if args.debug:
+ app.debug = True
+
+ app.run(host="0.0.0.0", port=args.port)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/extras/LinuxRPM/Makefile.am b/extras/LinuxRPM/Makefile.am
new file mode 100644
index 00000000000..f02853798c0
--- /dev/null
+++ b/extras/LinuxRPM/Makefile.am
@@ -0,0 +1,55 @@
+
+GFS_TAR = ../../glusterfs-$(VERSION).tar.gz
+
+.PHONY: all
+
+all:
+ @echo "To build RPMS run 'make glusterrpms'"
+
+.PHONY: glusterrpms glusterrpms_without_autogen
+.PHONY: autogen prep srcrpm testsrpm clean
+
+glusterrpms: autogen glusterrpms_without_autogen
+
+glusterrpms_without_autogen: prep srcrpm rpms
+ -rm -rf rpmbuild
+
+autogen:
+ cd ../.. && \
+ rm -rf autom4te.cache && \
+ ./autogen.sh && \
+ ./configure --enable-gnfs --with-previous-options
+
+prep:
+ $(MAKE) -C ../.. dist;
+ -mkdir -p rpmbuild/BUILD
+ -mkdir -p rpmbuild/SPECS
+ -mkdir -p rpmbuild/RPMS
+ -mkdir -p rpmbuild/SRPMS
+ -mkdir -p rpmbuild/SOURCES
+ -rm -rf rpmbuild/SOURCES/*
+ cp ../../*.tar.gz ./rpmbuild/SOURCES
+ cp ../../glusterfs.spec ./rpmbuild/SPECS
+
+srcrpm:
+ rpmbuild --define '_topdir $(shell pwd)/rpmbuild' -bs rpmbuild/SPECS/glusterfs.spec
+ mv rpmbuild/SRPMS/* .
+
+rpms:
+ rpmbuild --define '_topdir $(shell pwd)/rpmbuild' --with gnfs -bb rpmbuild/SPECS/glusterfs.spec
+ mv rpmbuild/RPMS/*/* .
+
+# EPEL-5 does not like new versions of rpmbuild and requires some
+# _source_* defines
+
+testsrpm: prep
+ rpmbuild --define '_topdir $(shell pwd)/rpmbuild' \
+ --define '_source_payload w9.gzdio' \
+ --define '_source_filedigest_algorithm 1' \
+ -bs rpmbuild/SPECS/glusterfs.spec
+ mv rpmbuild/SRPMS/* ../..
+ -rm -rf rpmbuild
+
+clean:
+ -rm -rf rpmbuild
+ -rm -f *.rpm
diff --git a/extras/LinuxRPM/make_glusterrpms b/extras/LinuxRPM/make_glusterrpms
new file mode 100755
index 00000000000..3156af97870
--- /dev/null
+++ b/extras/LinuxRPM/make_glusterrpms
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+(
+ cd $(realpath $(dirname $0))/../.. || exit 1
+ rm -rf autom4te.cache
+ ./autogen.sh || exit 1
+ ./configure --with-previous-options || exit 1
+)
+make -C $(realpath $(dirname $0)) glusterrpms_without_autogen
diff --git a/extras/MacOSX/Portfile b/extras/MacOSX/Portfile
deleted file mode 100644
index 1262a44daea..00000000000
--- a/extras/MacOSX/Portfile
+++ /dev/null
@@ -1,26 +0,0 @@
-# $Id$
-
-PortSystem 1.0
-
-name glusterfs
-version 2.0.0rc8
-categories fuse
-maintainers amar@gluster.com
-description GlusterFS
-long_description GlusterFS is a cluster file system, flexible to tune it for your needs.
-homepage http://www.gluster.org/
-platforms darwin
-master_sites http://ftp.gluster.com/pub/gluster/glusterfs/2.0/2.0.0
-
-configure.args --disable-bdb
-checksums md5 33c2d02344d4fab422e80cfb637e0b48
-
-post-destroot {
- file mkdir ${destroot}/Library/LaunchDaemons/
- file copy ${worksrcpath}/extras/glusterfs-server.plist \
- ${destroot}/Library/LaunchDaemons/com.gluster.glusterfs.plist
-
- file mkdir ${destroot}/sbin/
- file copy ${worksrcpath}/xlators/mount/fuse/utils/mount_glusterfs \
- ${destroot}/sbin/
-} \ No newline at end of file
diff --git a/extras/MacOSX/README.MacOSX b/extras/MacOSX/README.MacOSX
deleted file mode 100644
index ec7abc2cc78..00000000000
--- a/extras/MacOSX/README.MacOSX
+++ /dev/null
@@ -1,84 +0,0 @@
-
-Mostly the usage is over OS-X.
-
-Important links
-
-GlusterFS
-=========
-* http://www.gluster.org
-* http://gluster.org/docs/index.php/GlusterFS
-* http://gluster.org/docs/index.php/GlusterFS_on_MAC_OS_X
-
-MacFUSE
-=======
-* http://code.google.com/p/macfuse/
-* http://code.google.com/p/macfuse/wiki/FAQ
-
-
-Important steps:
-================
-
-Make sure that there is no previous installation of MacFUSE exists in the
-system. Run,
-
- bash# sudo /Library/Filesystems/fusefs.fs/Support/uninstall-macfuse-core.sh
-
-After this, install MacFUSE (mostly through .dmg available in macfuse homepage
-or if Gluster Inc provides any custom built .dmg)
-
-Make sure the .dmg of glusterfs is built against the installed MacFUSE version
-(if not, any operations over mountpoint gives EIO ie, Input/Output Error). If
-glusterfs tarball is used then compile it only after the MacFUSE installation
-is complete.
-
-To make an entry in /etc/fstab for glusterfs mount, use 'vifs' command
-
- bash# sudo vifs
-
-after the entry is added in /etc/fstab, it can be mounted by 'mount' command.
-
-To start the server process one can use the 'launchd' mechanism. Follow below
-steps after installation
-
- bash# launchctl load /Library/LaunchDaemons/com.gluster.glusterfs.plist
-
-No need to run the command if the machine reboots, as it will be loaded
-automatically by launchd process.
-
-Now copy the server volume file in the proper path
-
- bash# sudo vi /opt/local/etc/glusterfs/server.vol
-
-NOTE: (If glusterfs is installed in different path other than '/opt/local'
- update the volume file at the corresponding path, and also need to
- update the /Library/LaunchDaemons/com.gluster.glusterfs.plist with
- the proper path)
-
-Once the volume file is updated, administrator can start the server process by
-running,
-
- bash# launchctl start com.gluster.glusterfs
-
-and stop like
-
- bash# launchctl stop com.gluster.glusterfs
-
-NOTE: To start the process by default when the loaded, one need to add the
-following lines to .plist file
- -----
- <key>KeepAlive</key>
- <true/>
- -----
-
-
-
-
-Install using .dmg in Terminal
-=================================
-
- bash# hdiutil attach <package>.dmg
- bash# cd /Volumes/<package>/
- bash# installer -pkg <package>.pkg -installer /
- bash# cd
- bash# hdiutil detach /Volumes/<package>/
-
diff --git a/extras/Makefile.am b/extras/Makefile.am
index 0c260de92a5..983f014cca6 100644
--- a/extras/Makefile.am
+++ b/extras/Makefile.am
@@ -1,11 +1,77 @@
+addonexecdir = $(GLUSTERFS_LIBEXECDIR)
+addonexec_SCRIPTS =
+if WITH_SERVER
+addonexec_SCRIPTS += peer_add_secret_pub
+if USE_SYSTEMD
+addonexec_SCRIPTS += mount-shared-storage.sh
+endif
+endif
-docdir = $(datadir)/doc/glusterfs/
-EditorModedir = $(docdir)/
+EditorModedir = $(docdir)
EditorMode_DATA = glusterfs-mode.el glusterfs.vim
-SUBDIRS = init.d benchmarking hook-scripts
+SUBDIRS = init.d systemd benchmarking hook-scripts $(OCF_SUBDIR) LinuxRPM \
+ $(GEOREP_EXTRAS_SUBDIR) snap_scheduler firewalld cliutils python \
+ ganesha
-EXTRA_DIST = specgen.scm MacOSX/Portfile glusterfs-mode.el glusterfs.vim \
- migrate-unify-to-distribute.sh backend-xattr-sanitize.sh \
- backend-cleanup.sh disk_usage_sync.sh quota-remove-xattr.sh \
- quota-metadata-cleanup.sh glusterfs-logrotate
+confdir = $(sysconfdir)/glusterfs
+if WITH_SERVER
+conf_DATA = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.conf \
+ logger.conf.example glusterfs-georep-logrotate group-virt.example \
+ group-metadata-cache group-gluster-block group-nl-cache \
+ group-db-workload group-distributed-virt group-samba
+endif
+
+voldir = $(sysconfdir)/glusterfs
+vol_DATA = thin-arbiter/thin-arbiter.vol
+if WITH_SERVER
+vol_DATA += glusterd.vol
+endif
+
+scriptsdir = $(datadir)/glusterfs/scripts
+scripts_SCRIPTS = thin-arbiter/setup-thin-arbiter.sh
+if WITH_SERVER
+scripts_SCRIPTS += post-upgrade-script-for-quota.sh \
+ pre-upgrade-script-for-quota.sh stop-all-gluster-processes.sh
+if USE_SYSTEMD
+scripts_SCRIPTS += control-cpu-load.sh
+scripts_SCRIPTS += control-mem.sh
+endif
+endif
+
+EXTRA_DIST = glusterfs-logrotate gluster-rsyslog-7.2.conf gluster-rsyslog-5.8.conf \
+ logger.conf.example glusterfs-georep-logrotate group-virt.example \
+ group-metadata-cache group-gluster-block group-nl-cache \
+ group-db-workload group-samba specgen.scm glusterfs-mode.el glusterfs.vim \
+ migrate-unify-to-distribute.sh backend-xattr-sanitize.sh \
+ backend-cleanup.sh disk_usage_sync.sh clear_xattrs.sh \
+ glusterd-sysconfig glusterd.vol post-upgrade-script-for-quota.sh \
+ pre-upgrade-script-for-quota.sh command-completion/gluster.bash \
+ command-completion/Makefile command-completion/README \
+ stop-all-gluster-processes.sh clang-checker.sh mount-shared-storage.sh \
+ control-cpu-load.sh control-mem.sh group-distributed-virt \
+ thin-arbiter/thin-arbiter.vol thin-arbiter/setup-thin-arbiter.sh
+
+if WITH_SERVER
+install-data-local:
+ if [ -n "$(tmpfilesdir)" ]; then \
+ $(mkdir_p) $(DESTDIR)$(tmpfilesdir); \
+ $(INSTALL_DATA) run-gluster.tmpfiles \
+ $(DESTDIR)$(tmpfilesdir)/gluster.conf; \
+ fi
+ $(mkdir_p) $(DESTDIR)$(GLUSTERD_WORKDIR)/groups
+ $(INSTALL_DATA) $(top_srcdir)/extras/group-virt.example \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/virt
+ $(INSTALL_DATA) $(top_srcdir)/extras/group-metadata-cache \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/metadata-cache
+ $(INSTALL_DATA) $(top_srcdir)/extras/group-gluster-block \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/gluster-block
+ $(INSTALL_DATA) $(top_srcdir)/extras/group-nl-cache \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/nl-cache
+ $(INSTALL_DATA) $(top_srcdir)/extras/group-db-workload \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/db-workload
+ $(INSTALL_DATA) $(top_srcdir)/extras/group-distributed-virt \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/distributed-virt
+ $(INSTALL_DATA) $(top_srcdir)/extras/group-samba \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/groups/samba
+endif
diff --git a/extras/Ubuntu/README.Ubuntu b/extras/Ubuntu/README.Ubuntu
index 0c5b7828d12..890da3ca614 100644
--- a/extras/Ubuntu/README.Ubuntu
+++ b/extras/Ubuntu/README.Ubuntu
@@ -1,5 +1,6 @@
Bug 765014 - Mounting from localhost in fstab fails at boot on ubuntu
-(https://bugzilla.redhat.com/show_bug.cgi?id=765014)
+(original bug: https://bugzilla.redhat.com/show_bug.cgi?id=765014)
+(updated in: https://bugzilla.redhat.com/show_bug.cgi?id=1047007)
(https://bugs.launchpad.net/ubuntu/+source/glusterfs/+bug/876648)
Ubuntu uses upstart instead of init to bootstrap the system and it has a unique
@@ -10,15 +11,16 @@ and the volume is mounted from localhost, the mount fails at boot time. To
correct this we need to launch glusterd using upstart and block the glusterfs
mounting event until glusterd is started.
-The glusterd.conf file contains the necessary configuration for upstart to
-manage the glusterd service. It should be placed in /etc/init/glusterd.conf
+The glusterfs-server.conf file contains the necessary configuration for upstart to
+manage the glusterd service. It should be placed in /etc/init/glusterfs-server.conf
on Ubuntu systems, and then the old initscript /etc/init.d/glusterd can be
removed. An additional upstart job, mounting-glusterfs.conf, is also required
-to block mounting glusterfs volumes until the glusterd service is available.
+to block mounting glusterfs volumes until the network interfaces are available.
Both of these upstart jobs need to be placed in /etc/init to resolve the issue.
-Starting with Ubuntu 12.04, Precise Pangolin, these upstart jobs will be
-included with the glusterfs-server package in the Ubuntu repository.
+Starting with Ubuntu 14.04, Trusty Tahr, these upstart jobs will be included
+with the glusterfs-server and glusterfs-client packages in the Ubuntu
+universe repository.
This affects all versions of glusterfs on the Ubuntu platform since at least
10.04, Lucid Lynx.
diff --git a/extras/Ubuntu/glusterd.conf b/extras/Ubuntu/glusterfs-server.conf
index aa99502b0a7..aa99502b0a7 100644
--- a/extras/Ubuntu/glusterd.conf
+++ b/extras/Ubuntu/glusterfs-server.conf
diff --git a/extras/Ubuntu/mounting-glusterfs.conf b/extras/Ubuntu/mounting-glusterfs.conf
index 3c59c0f635e..786ef16df04 100644
--- a/extras/Ubuntu/mounting-glusterfs.conf
+++ b/extras/Ubuntu/mounting-glusterfs.conf
@@ -1,7 +1,6 @@
author "Louis Zuckerman <me@louiszuckerman.com>"
-description "Block the mounting event for glusterfs filesystems until glusterd is running"
+description "Block the mounting event for glusterfs filesystems until the network interfaces are running"
start on mounting TYPE=glusterfs
task
-exec start wait-for-state WAIT_FOR=glusterd WAITER=mounting-glusterfs
-
+exec start wait-for-state WAIT_FOR=static-network-up WAITER=mounting-glusterfs
diff --git a/extras/backend-cleanup.sh b/extras/backend-cleanup.sh
index dc504860b73..cb0a1d8871f 100644
--- a/extras/backend-cleanup.sh
+++ b/extras/backend-cleanup.sh
@@ -17,7 +17,7 @@ export_directory="/export/glusterfs"
clean_dir()
{
# Clean the 'link' files on backend
- find "${export_directory}" -type f -perm +01000 -exec rm -v '{}' \;
+ find "${export_directory}" -type f -perm /01000 -exec rm -v '{}' \;
}
main()
diff --git a/extras/benchmarking/Makefile.am b/extras/benchmarking/Makefile.am
index 04cc06182ab..bfcc59277a5 100644
--- a/extras/benchmarking/Makefile.am
+++ b/extras/benchmarking/Makefile.am
@@ -1,7 +1,5 @@
-docdir = $(datadir)/doc/$(PACKAGE_NAME)/benchmarking
-
-benchmarkingdir = $(docdir)
+benchmarkingdir = $(docdir)/benchmarking
benchmarking_DATA = rdd.c glfs-bm.c README launch-script.sh local-script.sh
diff --git a/extras/benchmarking/glfs-bm.c b/extras/benchmarking/glfs-bm.c
index 035d055dfcd..f7f5873f84d 100644
--- a/extras/benchmarking/glfs-bm.c
+++ b/extras/benchmarking/glfs-bm.c
@@ -1,21 +1,12 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#define _GNU_SOURCE
#define __USE_FILE_OFFSET64
#define _FILE_OFFSET_BITS 64
@@ -34,365 +25,338 @@
#include <sys/time.h>
struct state {
- char need_op_write:1;
- char need_op_read:1;
+ char need_op_write : 1;
+ char need_op_read : 1;
- char need_iface_fileio:1;
- char need_iface_xattr:1;
+ char need_iface_fileio : 1;
+ char need_iface_xattr : 1;
- char need_mode_posix:1;
+ char need_mode_posix : 1;
- char prefix[512];
- long int count;
+ char prefix[512];
+ long int count;
- size_t block_size;
+ size_t block_size;
- char *specfile;
+ char *specfile;
- long int io_size;
+ long int io_size;
};
-
-#define MEASURE(func, arg) measure (func, #func, arg)
-
+#define MEASURE(func, arg) measure(func, #func, arg)
void
-tv_difference (struct timeval *tv_stop,
- struct timeval *tv_start,
- struct timeval *tv_diff)
+tv_difference(struct timeval *tv_stop, struct timeval *tv_start,
+ struct timeval *tv_diff)
{
- if (tv_stop->tv_usec < tv_start->tv_usec) {
- tv_diff->tv_usec = (tv_stop->tv_usec + 1000000) - tv_start->tv_usec;
- tv_diff->tv_sec = (tv_stop->tv_sec - 1 - tv_start->tv_sec);
- } else {
- tv_diff->tv_usec = tv_stop->tv_usec - tv_start->tv_usec;
- tv_diff->tv_sec = tv_stop->tv_sec - tv_start->tv_sec;
- }
+ if (tv_stop->tv_usec < tv_start->tv_usec) {
+ tv_diff->tv_usec = (tv_stop->tv_usec + 1000000) - tv_start->tv_usec;
+ tv_diff->tv_sec = (tv_stop->tv_sec - 1 - tv_start->tv_sec);
+ } else {
+ tv_diff->tv_usec = tv_stop->tv_usec - tv_start->tv_usec;
+ tv_diff->tv_sec = tv_stop->tv_sec - tv_start->tv_sec;
+ }
}
-
void
-measure (int (*func)(struct state *state),
- char *func_name, struct state *state)
+measure(int (*func)(struct state *state), char *func_name, struct state *state)
{
- struct timeval tv_start, tv_stop, tv_diff;
- state->io_size = 0;
- long int count;
+ struct timeval tv_start, tv_stop, tv_diff;
+ state->io_size = 0;
+ long int count;
- gettimeofday (&tv_start, NULL);
- count = func (state);
- gettimeofday (&tv_stop, NULL);
+ gettimeofday(&tv_start, NULL);
+ count = func(state);
+ gettimeofday(&tv_stop, NULL);
- tv_difference (&tv_stop, &tv_start, &tv_diff);
+ tv_difference(&tv_stop, &tv_start, &tv_diff);
- fprintf (stdout, "%s: count=%ld, size=%ld, time=%ld:%ld\n",
- func_name, count, state->io_size,
- tv_diff.tv_sec, tv_diff.tv_usec);
+ fprintf(stdout, "%s: count=%ld, size=%ld, time=%ld:%ld\n", func_name, count,
+ state->io_size, tv_diff.tv_sec, tv_diff.tv_usec);
}
-
static error_t
-parse_opts (int key, char *arg,
- struct argp_state *_state)
+parse_opts(int key, char *arg, struct argp_state *_state)
{
- struct state *state = _state->input;
+ struct state *state = _state->input;
- switch (key)
- {
+ switch (key) {
case 'o':
- if (strcasecmp (arg, "read") == 0) {
- state->need_op_write = 0;
- state->need_op_read = 1;
- } else if (strcasecmp (arg, "write") == 0) {
- state->need_op_write = 1;
- state->need_op_read = 0;
- } else if (strcasecmp (arg, "both") == 0) {
- state->need_op_write = 1;
- state->need_op_read = 1;
- } else {
- fprintf (stderr, "unknown op: %s\n", arg);
- return -1;
- }
- break;
+ if (strcasecmp(arg, "read") == 0) {
+ state->need_op_write = 0;
+ state->need_op_read = 1;
+ } else if (strcasecmp(arg, "write") == 0) {
+ state->need_op_write = 1;
+ state->need_op_read = 0;
+ } else if (strcasecmp(arg, "both") == 0) {
+ state->need_op_write = 1;
+ state->need_op_read = 1;
+ } else {
+ fprintf(stderr, "unknown op: %s\n", arg);
+ return -1;
+ }
+ break;
case 'i':
- if (strcasecmp (arg, "fileio") == 0) {
- state->need_iface_fileio = 1;
- state->need_iface_xattr = 0;
- } else if (strcasecmp (arg, "xattr") == 0) {
- state->need_iface_fileio = 0;
- state->need_iface_xattr = 1;
- } else if (strcasecmp (arg, "both") == 0) {
- state->need_iface_fileio = 1;
- state->need_iface_xattr = 1;
- } else {
- fprintf (stderr, "unknown interface: %s\n", arg);
- return -1;
- }
- break;
- case 'b':
- {
- size_t block_size = atoi (arg);
- if (!block_size) {
- fprintf (stderr, "incorrect size: %s\n", arg);
- return -1;
- }
- state->block_size = block_size;
- }
- break;
+ if (strcasecmp(arg, "fileio") == 0) {
+ state->need_iface_fileio = 1;
+ state->need_iface_xattr = 0;
+ } else if (strcasecmp(arg, "xattr") == 0) {
+ state->need_iface_fileio = 0;
+ state->need_iface_xattr = 1;
+ } else if (strcasecmp(arg, "both") == 0) {
+ state->need_iface_fileio = 1;
+ state->need_iface_xattr = 1;
+ } else {
+ fprintf(stderr, "unknown interface: %s\n", arg);
+ return -1;
+ }
+ break;
+ case 'b': {
+ size_t block_size = atoi(arg);
+ if (!block_size) {
+ fprintf(stderr, "incorrect size: %s\n", arg);
+ return -1;
+ }
+ state->block_size = block_size;
+ } break;
case 's':
- state->specfile = strdup (arg);
- break;
+ state->specfile = strdup(arg);
+ break;
case 'p':
- fprintf (stderr, "using prefix: %s\n", arg);
- strncpy (state->prefix, arg, 512);
- break;
- case 'c':
- {
- long count = atol (arg);
- if (!count) {
- fprintf (stderr, "incorrect count: %s\n", arg);
- return -1;
- }
- state->count = count;
- }
- break;
+ fprintf(stderr, "using prefix: %s\n", arg);
+ strncpy(state->prefix, arg, 512);
+ break;
+ case 'c': {
+ long count = atol(arg);
+ if (!count) {
+ fprintf(stderr, "incorrect count: %s\n", arg);
+ return -1;
+ }
+ state->count = count;
+ } break;
case ARGP_KEY_NO_ARGS:
- break;
+ break;
case ARGP_KEY_ARG:
- break;
- }
+ break;
+ }
- return 0;
+ return 0;
}
int
-do_mode_posix_iface_fileio_write (struct state *state)
+do_mode_posix_iface_fileio_write(struct state *state)
{
- long int i;
- int ret = -1;
- char block[state->block_size];
-
- for (i=0; i<state->count; i++) {
- int fd = -1;
- char filename[512];
-
- sprintf (filename, "%s.%06ld", state->prefix, i);
-
- fd = open (filename, O_CREAT|O_WRONLY, 00600);
- if (fd == -1) {
- fprintf (stderr, "open(%s) => %s\n", filename, strerror (errno));
- break;
- }
- ret = write (fd, block, state->block_size);
- if (ret != state->block_size) {
- fprintf (stderr, "write (%s) => %d/%s\n", filename, ret,
- strerror (errno));
- close (fd);
- break;
- }
- close (fd);
- state->io_size += ret;
+ long int i;
+ int ret = -1;
+ char block[state->block_size];
+
+ for (i = 0; i < state->count; i++) {
+ int fd = -1;
+ char filename[512];
+
+ sprintf(filename, "%s.%06ld", state->prefix, i);
+
+ fd = open(filename, O_CREAT | O_WRONLY, 00600);
+ if (fd == -1) {
+ fprintf(stderr, "open(%s) => %s\n", filename, strerror(errno));
+ break;
}
+ ret = write(fd, block, state->block_size);
+ if (ret != state->block_size) {
+ fprintf(stderr, "write (%s) => %d/%s\n", filename, ret,
+ strerror(errno));
+ close(fd);
+ break;
+ }
+ close(fd);
+ state->io_size += ret;
+ }
- return i;
+ return i;
}
-
int
-do_mode_posix_iface_fileio_read (struct state *state)
+do_mode_posix_iface_fileio_read(struct state *state)
{
- long int i;
- int ret = -1;
- char block[state->block_size];
-
- for (i=0; i<state->count; i++) {
- int fd = -1;
- char filename[512];
-
- sprintf (filename, "%s.%06ld", state->prefix, i);
-
- fd = open (filename, O_RDONLY);
- if (fd == -1) {
- fprintf (stderr, "open(%s) => %s\n", filename, strerror (errno));
- break;
- }
- ret = read (fd, block, state->block_size);
- if (ret == -1) {
- fprintf (stderr, "read(%s) => %d/%s\n", filename, ret, strerror (errno));
- close (fd);
- break;
- }
- close (fd);
- state->io_size += ret;
+ long int i;
+ int ret = -1;
+ char block[state->block_size];
+
+ for (i = 0; i < state->count; i++) {
+ int fd = -1;
+ char filename[512];
+
+ sprintf(filename, "%s.%06ld", state->prefix, i);
+
+ fd = open(filename, O_RDONLY);
+ if (fd == -1) {
+ fprintf(stderr, "open(%s) => %s\n", filename, strerror(errno));
+ break;
+ }
+ ret = read(fd, block, state->block_size);
+ if (ret == -1) {
+ fprintf(stderr, "read(%s) => %d/%s\n", filename, ret,
+ strerror(errno));
+ close(fd);
+ break;
}
+ close(fd);
+ state->io_size += ret;
+ }
- return i;
+ return i;
}
-
int
-do_mode_posix_iface_fileio (struct state *state)
+do_mode_posix_iface_fileio(struct state *state)
{
- if (state->need_op_write)
- MEASURE (do_mode_posix_iface_fileio_write, state);
+ if (state->need_op_write)
+ MEASURE(do_mode_posix_iface_fileio_write, state);
- if (state->need_op_read)
- MEASURE (do_mode_posix_iface_fileio_read, state);
+ if (state->need_op_read)
+ MEASURE(do_mode_posix_iface_fileio_read, state);
- return 0;
+ return 0;
}
-
int
-do_mode_posix_iface_xattr_write (struct state *state)
+do_mode_posix_iface_xattr_write(struct state *state)
{
- long int i;
- int ret = -1;
- char block[state->block_size];
- char *dname = NULL, *dirc = NULL;
- char *bname = NULL, *basec = NULL;
-
- dirc = strdup (state->prefix);
- basec = strdup (state->prefix);
- dname = dirname (dirc);
- bname = basename (basec);
-
- for (i=0; i<state->count; i++) {
- char key[512];
-
- sprintf (key, "glusterfs.file.%s.%06ld", bname, i);
-
- ret = lsetxattr (dname, key, block, state->block_size, 0);
-
- if (ret != 0) {
- fprintf (stderr, "lsetxattr (%s, %s, %p) => %s\n",
- dname, key, block, strerror (errno));
- break;
- }
- state->io_size += state->block_size;
+ long int i;
+ int ret = -1;
+ char block[state->block_size];
+ char *dname = NULL, *dirc = NULL;
+ char *bname = NULL, *basec = NULL;
+
+ dirc = strdup(state->prefix);
+ basec = strdup(state->prefix);
+ dname = dirname(dirc);
+ bname = basename(basec);
+
+ for (i = 0; i < state->count; i++) {
+ char key[512];
+
+ sprintf(key, "glusterfs.file.%s.%06ld", bname, i);
+
+ ret = lsetxattr(dname, key, block, state->block_size, 0);
+
+ if (ret != 0) {
+ fprintf(stderr, "lsetxattr (%s, %s, %p) => %s\n", dname, key, block,
+ strerror(errno));
+ break;
}
+ state->io_size += state->block_size;
+ }
- free (dirc);
- free (basec);
+ free(dirc);
+ free(basec);
- return i;
+ return i;
}
-
int
-do_mode_posix_iface_xattr_read (struct state *state)
+do_mode_posix_iface_xattr_read(struct state *state)
{
- long int i;
- int ret = -1;
- char block[state->block_size];
- char *dname = NULL, *dirc = NULL;
- char *bname = NULL, *basec = NULL;
-
- dirc = strdup (state->prefix);
- basec = strdup (state->prefix);
- dname = dirname (dirc);
- bname = basename (basec);
-
- for (i=0; i<state->count; i++) {
- char key[512];
-
- sprintf (key, "glusterfs.file.%s.%06ld", bname, i);
-
- ret = lgetxattr (dname, key, block, state->block_size);
-
- if (ret < 0) {
- fprintf (stderr, "lgetxattr (%s, %s, %p) => %s\n",
- dname, key, block, strerror (errno));
- break;
- }
- state->io_size += ret;
+ long int i;
+ int ret = -1;
+ char block[state->block_size];
+ char *dname = NULL, *dirc = NULL;
+ char *bname = NULL, *basec = NULL;
+
+ dirc = strdup(state->prefix);
+ basec = strdup(state->prefix);
+ dname = dirname(dirc);
+ bname = basename(basec);
+
+ for (i = 0; i < state->count; i++) {
+ char key[512];
+
+ sprintf(key, "glusterfs.file.%s.%06ld", bname, i);
+
+ ret = lgetxattr(dname, key, block, state->block_size);
+
+ if (ret < 0) {
+ fprintf(stderr, "lgetxattr (%s, %s, %p) => %s\n", dname, key, block,
+ strerror(errno));
+ break;
}
+ state->io_size += ret;
+ }
- return i;
+ return i;
}
-
int
-do_mode_posix_iface_xattr (struct state *state)
+do_mode_posix_iface_xattr(struct state *state)
{
- if (state->need_op_write)
- MEASURE (do_mode_posix_iface_xattr_write, state);
+ if (state->need_op_write)
+ MEASURE(do_mode_posix_iface_xattr_write, state);
- if (state->need_op_read)
- MEASURE (do_mode_posix_iface_xattr_read, state);
+ if (state->need_op_read)
+ MEASURE(do_mode_posix_iface_xattr_read, state);
- return 0;
+ return 0;
}
int
-do_mode_posix (struct state *state)
+do_mode_posix(struct state *state)
{
- if (state->need_iface_fileio)
- do_mode_posix_iface_fileio (state);
+ if (state->need_iface_fileio)
+ do_mode_posix_iface_fileio(state);
- if (state->need_iface_xattr)
- do_mode_posix_iface_xattr (state);
+ if (state->need_iface_xattr)
+ do_mode_posix_iface_xattr(state);
- return 0;
+ return 0;
}
-
int
-do_actions (struct state *state)
+do_actions(struct state *state)
{
- if (state->need_mode_posix)
- do_mode_posix (state);
+ if (state->need_mode_posix)
+ do_mode_posix(state);
- return 0;
+ return 0;
}
static struct argp_option options[] = {
- {"op", 'o', "OPERATIONS", 0,
- "WRITE|READ|BOTH - defaults to BOTH"},
- {"iface", 'i', "INTERFACE", 0,
- "FILEIO|XATTR|BOTH - defaults to FILEIO"},
- {"block", 'b', "BLOCKSIZE", 0,
- "<NUM> - defaults to 4096"},
- {"specfile", 's', "SPECFILE", 0,
- "absolute path to specfile"},
- {"prefix", 'p', "PREFIX", 0,
- "filename prefix"},
- {"count", 'c', "COUNT", 0,
- "number of files"},
- {0, 0, 0, 0, 0}
-};
+ {"op", 'o', "OPERATIONS", 0, "WRITE|READ|BOTH - defaults to BOTH"},
+ {"iface", 'i', "INTERFACE", 0, "FILEIO|XATTR|BOTH - defaults to FILEIO"},
+ {"block", 'b', "BLOCKSIZE", 0, "<NUM> - defaults to 4096"},
+ {"specfile", 's', "SPECFILE", 0, "absolute path to specfile"},
+ {"prefix", 'p', "PREFIX", 0, "filename prefix"},
+ {"count", 'c', "COUNT", 0, "number of files"},
+ {0, 0, 0, 0, 0}};
-static struct argp argp = {
- options,
- parse_opts,
- "tool",
- "tool to benchmark small file performance"
-};
+static struct argp argp = {options, parse_opts, "tool",
+ "tool to benchmark small file performance"};
int
-main (int argc, char *argv[])
+main(int argc, char *argv[])
{
- struct state state = {0, };
+ struct state state = {
+ 0,
+ };
- state.need_op_write = 1;
- state.need_op_read = 1;
+ state.need_op_write = 1;
+ state.need_op_read = 1;
- state.need_iface_fileio = 1;
- state.need_iface_xattr = 0;
+ state.need_iface_fileio = 1;
+ state.need_iface_xattr = 0;
- state.need_mode_posix = 1;
+ state.need_mode_posix = 1;
- state.block_size = 4096;
+ state.block_size = 4096;
- strcpy (state.prefix, "tmpfile");
- state.count = 1048576;
+ strcpy(state.prefix, "tmpfile");
+ state.count = 1048576;
- if (argp_parse (&argp, argc, argv, 0, 0, &state) != 0) {
- fprintf (stderr, "argp_parse() failed\n");
- return 1;
- }
+ if (argp_parse(&argp, argc, argv, 0, 0, &state) != 0) {
+ fprintf(stderr, "argp_parse() failed\n");
+ return 1;
+ }
- do_actions (&state);
+ do_actions(&state);
- return 0;
+ return 0;
}
diff --git a/extras/benchmarking/rdd.c b/extras/benchmarking/rdd.c
index b06333370ae..efc9d342a37 100644
--- a/extras/benchmarking/rdd.c
+++ b/extras/benchmarking/rdd.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <sys/stat.h>
#include <sys/types.h>
@@ -30,633 +20,586 @@
#define TWO_POWER(power) (2UL << (power))
-#define RDD_INTEGER_VALUE ((TWO_POWER ((sizeof (int) * 8))) - 1)
+#define RDD_INTEGER_VALUE ((TWO_POWER((sizeof(int) * 8))) - 1)
#ifndef UNIX_PATH_MAX
#define UNIX_PATH_MAX 108
#endif
#define UNIT_KB 1024ULL
-#define UNIT_MB UNIT_KB*1024ULL
-#define UNIT_GB UNIT_MB*1024ULL
-#define UNIT_TB UNIT_GB*1024ULL
-#define UNIT_PB UNIT_TB*1024ULL
+#define UNIT_MB UNIT_KB * 1024ULL
+#define UNIT_GB UNIT_MB * 1024ULL
+#define UNIT_TB UNIT_GB * 1024ULL
+#define UNIT_PB UNIT_TB * 1024ULL
-#define UNIT_KB_STRING "KB"
-#define UNIT_MB_STRING "MB"
-#define UNIT_GB_STRING "GB"
-#define UNIT_TB_STRING "TB"
-#define UNIT_PB_STRING "PB"
+#define UNIT_KB_STRING "KB"
+#define UNIT_MB_STRING "MB"
+#define UNIT_GB_STRING "GB"
+#define UNIT_TB_STRING "TB"
+#define UNIT_PB_STRING "PB"
struct rdd_file {
- char path[UNIX_PATH_MAX];
- struct stat st;
- int fd;
+ char path[UNIX_PATH_MAX];
+ struct stat st;
+ int fd;
};
struct rdd_config {
- long iters;
- long max_ops_per_seq;
- size_t max_bs;
- size_t min_bs;
- int thread_count;
- pthread_t *threads;
- pthread_barrier_t barrier;
- pthread_mutex_t lock;
- struct rdd_file in_file;
- struct rdd_file out_file;
- ssize_t file_size;
+ long iters;
+ long max_ops_per_seq;
+ size_t max_bs;
+ size_t min_bs;
+ int thread_count;
+ pthread_t *threads;
+ pthread_barrier_t barrier;
+ pthread_mutex_t lock;
+ struct rdd_file in_file;
+ struct rdd_file out_file;
+ ssize_t file_size;
};
static struct rdd_config rdd_config;
enum rdd_keys {
- RDD_MIN_BS_KEY = 1,
- RDD_MAX_BS_KEY,
+ RDD_MIN_BS_KEY = 1,
+ RDD_MAX_BS_KEY,
};
static error_t
-rdd_parse_opts (int key, char *arg,
- struct argp_state *_state)
+rdd_parse_opts(int key, char *arg, struct argp_state *_state)
{
- switch (key) {
- case 'o':
- {
- int len = 0;
- len = strlen (arg);
- if (len > UNIX_PATH_MAX) {
- fprintf (stderr, "output file name too long (%s)\n",
- arg);
- return -1;
- }
-
- strncpy (rdd_config.out_file.path, arg, len);
- }
- break;
-
- case 'i':
- {
- int len = 0;
- len = strlen (arg);
- if (len > UNIX_PATH_MAX) {
- fprintf (stderr, "input file name too long (%s)\n",
- arg);
- return -1;
- }
-
- strncpy (rdd_config.in_file.path, arg, len);
- rdd_config.in_file.path[len] = '\0';
- }
- break;
-
- case 'f':
- {
- char *tmp = NULL;
- unsigned long long fs = 0;
- if (string2bytesize (arg, &fs) == -1) {
- fprintf (stderr, "invalid argument for file size "
- "(%s)\n", arg);
- return -1;
- }
-
- rdd_config.file_size = fs;
- }
- break;
-
- case RDD_MIN_BS_KEY:
- {
- char *tmp = NULL;
- long bs = 0;
- bs = strtol (arg, &tmp, 10);
- if ((bs == LONG_MAX) || (bs == LONG_MIN) || (tmp && *tmp)) {
- fprintf (stderr, "invalid argument for minimum block"
- "size (%s)\n", arg);
- return -1;
- }
-
- rdd_config.min_bs = bs;
- }
- break;
-
- case RDD_MAX_BS_KEY:
- {
- char *tmp = NULL;
- long bs = 0;
- bs = strtol (arg, &tmp, 10);
- if ((bs == LONG_MAX) || (bs == LONG_MIN) || (tmp && *tmp)) {
- fprintf (stderr, "invalid argument for maximum block"
- "size (%s)\n", arg);
- return -1;
- }
-
- rdd_config.max_bs = bs;
- }
- break;
-
- case 'r':
- {
- char *tmp = NULL;
- long iters = 0;
- iters = strtol (arg, &tmp, 10);
- if ((iters == LONG_MAX) ||
- (iters == LONG_MIN) ||
- (tmp && *tmp)) {
- fprintf (stderr, "invalid argument for iterations"
- "(%s)\n", arg);
- return -1;
- }
-
- rdd_config.iters = iters;
- }
- break;
-
- case 'm':
- {
- char *tmp = NULL;
- long max_ops = 0;
- max_ops = strtol (arg, &tmp, 10);
- if ((max_ops == LONG_MAX) ||
- (max_ops == LONG_MIN) ||
- (tmp && *tmp)) {
- fprintf (stderr, "invalid argument for max-ops"
- "(%s)\n", arg);
- return -1;
- }
+ switch (key) {
+ case 'o': {
+ int len = 0;
+ len = strlen(arg);
+ if (len > UNIX_PATH_MAX) {
+ fprintf(stderr, "output file name too long (%s)\n", arg);
+ return -1;
+ }
- rdd_config.max_ops_per_seq = max_ops;
- }
- break;
+ strncpy(rdd_config.out_file.path, arg, len);
+ } break;
- case 't':
- {
- char *tmp = NULL;
- long threads = 0;
- threads = strtol (arg, &tmp, 10);
- if ((threads == LONG_MAX) ||
- (threads == LONG_MIN) ||
- (tmp && *tmp)) {
- fprintf (stderr, "invalid argument for thread count"
- "(%s)\n", arg);
- return -1;
- }
+ case 'i': {
+ int len = 0;
+ len = strlen(arg);
+ if (len > UNIX_PATH_MAX) {
+ fprintf(stderr, "input file name too long (%s)\n", arg);
+ return -1;
+ }
+
+ strncpy(rdd_config.in_file.path, arg, len);
+ rdd_config.in_file.path[len] = '\0';
+ } break;
+
+ case 'f': {
+ char *tmp = NULL;
+ unsigned long long fs = 0;
+ if (string2bytesize(arg, &fs) == -1) {
+ fprintf(stderr,
+ "invalid argument for file size "
+ "(%s)\n",
+ arg);
+ return -1;
+ }
+
+ rdd_config.file_size = fs;
+ } break;
+
+ case RDD_MIN_BS_KEY: {
+ char *tmp = NULL;
+ long bs = 0;
+ bs = strtol(arg, &tmp, 10);
+ if ((bs == LONG_MAX) || (bs == LONG_MIN) || (tmp && *tmp)) {
+ fprintf(stderr,
+ "invalid argument for minimum block"
+ "size (%s)\n",
+ arg);
+ return -1;
+ }
+
+ rdd_config.min_bs = bs;
+ } break;
+
+ case RDD_MAX_BS_KEY: {
+ char *tmp = NULL;
+ long bs = 0;
+ bs = strtol(arg, &tmp, 10);
+ if ((bs == LONG_MAX) || (bs == LONG_MIN) || (tmp && *tmp)) {
+ fprintf(stderr,
+ "invalid argument for maximum block"
+ "size (%s)\n",
+ arg);
+ return -1;
+ }
+
+ rdd_config.max_bs = bs;
+ } break;
+
+ case 'r': {
+ char *tmp = NULL;
+ long iters = 0;
+ iters = strtol(arg, &tmp, 10);
+ if ((iters == LONG_MAX) || (iters == LONG_MIN) || (tmp && *tmp)) {
+ fprintf(stderr,
+ "invalid argument for iterations"
+ "(%s)\n",
+ arg);
+ return -1;
+ }
+
+ rdd_config.iters = iters;
+ } break;
+
+ case 'm': {
+ char *tmp = NULL;
+ long max_ops = 0;
+ max_ops = strtol(arg, &tmp, 10);
+ if ((max_ops == LONG_MAX) || (max_ops == LONG_MIN) ||
+ (tmp && *tmp)) {
+ fprintf(stderr,
+ "invalid argument for max-ops"
+ "(%s)\n",
+ arg);
+ return -1;
+ }
+
+ rdd_config.max_ops_per_seq = max_ops;
+ } break;
+
+ case 't': {
+ char *tmp = NULL;
+ long threads = 0;
+ threads = strtol(arg, &tmp, 10);
+ if ((threads == LONG_MAX) || (threads == LONG_MIN) ||
+ (tmp && *tmp)) {
+ fprintf(stderr,
+ "invalid argument for thread count"
+ "(%s)\n",
+ arg);
+ return -1;
+ }
- rdd_config.thread_count = threads;
- }
- break;
+ rdd_config.thread_count = threads;
+ } break;
case ARGP_KEY_NO_ARGS:
- break;
+ break;
case ARGP_KEY_ARG:
- break;
+ break;
case ARGP_KEY_END:
- if (_state->argc == 1) {
- argp_usage (_state);
- }
-
- }
+ if (_state->argc == 1) {
+ argp_usage(_state);
+ }
+ }
- return 0;
+ return 0;
}
int
-string2bytesize (const char *str, unsigned long long *n)
+string2bytesize(const char *str, unsigned long long *n)
{
- unsigned long long value = 0ULL;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
-
- if (str == NULL || n == NULL)
- {
- errno = EINVAL;
- return -1;
+ unsigned long long value = 0ULL;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace(*s)) {
+ continue;
}
-
- for (s = str; *s != '\0'; s++)
- {
- if (isspace (*s))
- {
- continue;
- }
- if (*s == '-')
- {
- return -1;
- }
- break;
+ if (*s == '-') {
+ return -1;
}
-
- old_errno = errno;
- errno = 0;
- value = strtoull (str, &tail, 10);
-
- if (errno == ERANGE || errno == EINVAL)
- {
- return -1;
- }
-
- if (errno == 0)
- {
- errno = old_errno;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtoull(str, &tail, 10);
+
+ if (errno == ERANGE || errno == EINVAL) {
+ return -1;
+ }
+
+ if (errno == 0) {
+ errno = old_errno;
+ }
+
+ if (tail[0] != '\0') {
+ if (strcasecmp(tail, UNIT_KB_STRING) == 0) {
+ value *= UNIT_KB;
+ } else if (strcasecmp(tail, UNIT_MB_STRING) == 0) {
+ value *= UNIT_MB;
+ } else if (strcasecmp(tail, UNIT_GB_STRING) == 0) {
+ value *= UNIT_GB;
+ } else if (strcasecmp(tail, UNIT_TB_STRING) == 0) {
+ value *= UNIT_TB;
+ } else if (strcasecmp(tail, UNIT_PB_STRING) == 0) {
+ value *= UNIT_PB;
}
- if (tail[0] != '\0')
- {
- if (strcasecmp (tail, UNIT_KB_STRING) == 0)
- {
- value *= UNIT_KB;
- }
- else if (strcasecmp (tail, UNIT_MB_STRING) == 0)
- {
- value *= UNIT_MB;
- }
- else if (strcasecmp (tail, UNIT_GB_STRING) == 0)
- {
- value *= UNIT_GB;
- }
- else if (strcasecmp (tail, UNIT_TB_STRING) == 0)
- {
- value *= UNIT_TB;
- }
- else if (strcasecmp (tail, UNIT_PB_STRING) == 0)
- {
- value *= UNIT_PB;
- }
-
- else
- {
- return -1;
- }
+ else {
+ return -1;
}
+ }
- *n = value;
+ *n = value;
- return 0;
+ return 0;
}
static struct argp_option rdd_options[] = {
- {"if", 'i', "INPUT_FILE", 0, "input-file"},
- {"of", 'o', "OUTPUT_FILE", 0, "output-file"},
- {"threads", 't', "COUNT", 0, "number of threads to spawn (defaults to 2)"},
- {"min-bs", RDD_MIN_BS_KEY, "MIN_BLOCK_SIZE", 0,
- "Minimum block size in bytes (defaults to 1024)"},
- {"max-bs", RDD_MAX_BS_KEY, "MAX_BLOCK_SIZE", 0,
- "Maximum block size in bytes (defaults to 4096)"},
- {"iters", 'r', "ITERS", 0,
- "Number of read-write sequences (defaults to 1000000)"},
- {"max-ops", 'm', "MAXOPS", 0,
- "maximum number of read-writes to be performed in a sequence (defaults to 1)"},
- {"file-size", 'f', "FILESIZE", 0,
- "the size of the file which will be created and upon it I/O will be done"
- " (defaults to 100MB"},
- {0, 0, 0, 0, 0}
-};
+ {"if", 'i', "INPUT_FILE", 0, "input-file"},
+ {"of", 'o', "OUTPUT_FILE", 0, "output-file"},
+ {"threads", 't', "COUNT", 0, "number of threads to spawn (defaults to 2)"},
+ {"min-bs", RDD_MIN_BS_KEY, "MIN_BLOCK_SIZE", 0,
+ "Minimum block size in bytes (defaults to 1024)"},
+ {"max-bs", RDD_MAX_BS_KEY, "MAX_BLOCK_SIZE", 0,
+ "Maximum block size in bytes (defaults to 4096)"},
+ {"iters", 'r', "ITERS", 0,
+ "Number of read-write sequences (defaults to 1000000)"},
+ {"max-ops", 'm', "MAXOPS", 0,
+ "maximum number of read-writes to be performed in a sequence (defaults to "
+ "1)"},
+ {"file-size", 'f', "FILESIZE", 0,
+ "the size of the file which will be created and upon it I/O will be done"
+ " (defaults to 100MB"},
+ {0, 0, 0, 0, 0}};
static struct argp argp = {
- rdd_options,
- rdd_parse_opts,
- "",
- "random dd - tool to do a sequence of random block-sized continuous"
- "read writes starting at a random offset"
-};
-
+ rdd_options, rdd_parse_opts, "",
+ "random dd - tool to do a sequence of random block-sized continuous"
+ "read writes starting at a random offset"};
static void
-rdd_default_config (void)
+rdd_default_config(void)
{
- char *tmp_path = "rdd.in";
-
- rdd_config.thread_count = 2;
- rdd_config.iters = 1000000;
- rdd_config.max_bs = 4096;
- rdd_config.min_bs = 1024;
- rdd_config.in_file.fd = rdd_config.out_file.fd = -1;
- rdd_config.max_ops_per_seq = 1;
- strncpy (rdd_config.in_file.path, tmp_path, strlen (tmp_path));
- rdd_config.file_size = 104857600;
-
- return;
+ char *tmp_path = "rdd.in";
+
+ rdd_config.thread_count = 2;
+ rdd_config.iters = 1000000;
+ rdd_config.max_bs = 4096;
+ rdd_config.min_bs = 1024;
+ rdd_config.in_file.fd = rdd_config.out_file.fd = -1;
+ rdd_config.max_ops_per_seq = 1;
+ strncpy(rdd_config.in_file.path, tmp_path, strlen(tmp_path));
+ rdd_config.file_size = 104857600;
+
+ return;
}
-
static char
-rdd_valid_config (void)
+rdd_valid_config(void)
{
- char ret = 1;
- int fd = -1;
-
- fd = open (rdd_config.in_file.path, O_RDONLY);
- if (fd == -1 && (errno != ENOENT)) {
- fprintf (stderr, "open: (%s)", strerror (errno));
- ret = 0;
- goto out;
- }
- close (fd);
+ char ret = 1;
+ int fd = -1;
- if (rdd_config.min_bs > rdd_config.max_bs) {
- fprintf (stderr, "minimum blocksize %ld is greater than the "
- "maximum blocksize %ld", rdd_config.min_bs,
- rdd_config.max_bs);
- ret = 0;
- goto out;
- }
+ fd = open(rdd_config.in_file.path, O_RDONLY);
+ if (fd == -1 && (errno != ENOENT)) {
+ fprintf(stderr, "open: (%s)", strerror(errno));
+ ret = 0;
+ goto out;
+ }
+ close(fd);
+
+ if (rdd_config.min_bs > rdd_config.max_bs) {
+ fprintf(stderr,
+ "minimum blocksize %ld is greater than the "
+ "maximum blocksize %ld",
+ rdd_config.min_bs, rdd_config.max_bs);
+ ret = 0;
+ goto out;
+ }
- if (strlen (rdd_config.out_file.path) == 0) {
- sprintf (rdd_config.out_file.path, "%s.rddout",
- rdd_config.in_file.path);
- }
+ if (strlen(rdd_config.out_file.path) == 0) {
+ sprintf(rdd_config.out_file.path, "%s.rddout", rdd_config.in_file.path);
+ }
out:
- return ret;
+ return ret;
}
-
static void *
-rdd_read_write (void *arg)
+rdd_read_write(void *arg)
{
- int i = 0, ret = 0;
- size_t bs = 0;
- off_t offset = 0;
- long rand = 0;
- long max_ops = 0;
- char *buf = NULL;
-
- buf = calloc (1, rdd_config.max_bs);
- if (!buf) {
- fprintf (stderr, "calloc failed (%s)\n", strerror (errno));
+ int i = 0, ret = 0;
+ size_t bs = 0;
+ off_t offset = 0;
+ long rand = 0;
+ long max_ops = 0;
+ char *buf = NULL;
+
+ buf = calloc(1, rdd_config.max_bs);
+ if (!buf) {
+ fprintf(stderr, "calloc failed (%s)\n", strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < rdd_config.iters; i++) {
+ pthread_mutex_lock(&rdd_config.lock);
+ {
+ int bytes = 0;
+ rand = random();
+
+ if (rdd_config.min_bs == rdd_config.max_bs) {
+ bs = rdd_config.max_bs;
+ } else {
+ bs = rdd_config.min_bs +
+ (rand % (rdd_config.max_bs - rdd_config.min_bs));
+ }
+
+ offset = rand % rdd_config.in_file.st.st_size;
+ max_ops = rand % rdd_config.max_ops_per_seq;
+ if (!max_ops) {
+ max_ops++;
+ }
+
+ ret = lseek(rdd_config.in_file.fd, offset, SEEK_SET);
+ if (ret != offset) {
+ fprintf(stderr, "lseek failed (%s)\n", strerror(errno));
ret = -1;
- goto out;
- }
+ goto unlock;
+ }
- for (i = 0; i < rdd_config.iters; i++)
- {
- pthread_mutex_lock (&rdd_config.lock);
- {
- int bytes = 0;
- rand = random ();
-
- if (rdd_config.min_bs == rdd_config.max_bs) {
- bs = rdd_config.max_bs;
- } else {
- bs = rdd_config.min_bs +
- (rand %
- (rdd_config.max_bs -
- rdd_config.min_bs));
- }
-
- offset = rand % rdd_config.in_file.st.st_size;
- max_ops = rand % rdd_config.max_ops_per_seq;
- if (!max_ops) {
- max_ops ++;
- }
-
- ret = lseek (rdd_config.in_file.fd, offset, SEEK_SET);
- if (ret != offset) {
- fprintf (stderr, "lseek failed (%s)\n",
- strerror (errno));
- ret = -1;
- goto unlock;
- }
-
- ret = lseek (rdd_config.out_file.fd, offset, SEEK_SET);
- if (ret != offset) {
- fprintf (stderr, "lseek failed (%s)\n",
- strerror (errno));
- ret = -1;
- goto unlock;
- }
-
- while (max_ops--)
- {
- bytes = read (rdd_config.in_file.fd, buf, bs);
- if (!bytes) {
- break;
- }
-
- if (bytes == -1) {
- fprintf (stderr, "read failed (%s)\n",
- strerror (errno));
- ret = -1;
- goto unlock;
- }
-
- if (write (rdd_config.out_file.fd, buf, bytes)
- != bytes) {
- fprintf (stderr, "write failed (%s)\n",
- strerror (errno));
- ret = -1;
- goto unlock;
- }
- }
+ ret = lseek(rdd_config.out_file.fd, offset, SEEK_SET);
+ if (ret != offset) {
+ fprintf(stderr, "lseek failed (%s)\n", strerror(errno));
+ ret = -1;
+ goto unlock;
+ }
+
+ while (max_ops--) {
+ bytes = read(rdd_config.in_file.fd, buf, bs);
+ if (!bytes) {
+ break;
}
- unlock:
- pthread_mutex_unlock (&rdd_config.lock);
- if (ret == -1) {
- goto out;
+
+ if (bytes == -1) {
+ fprintf(stderr, "read failed (%s)\n", strerror(errno));
+ ret = -1;
+ goto unlock;
+ }
+
+ if (write(rdd_config.out_file.fd, buf, bytes) != bytes) {
+ fprintf(stderr, "write failed (%s)\n", strerror(errno));
+ ret = -1;
+ goto unlock;
}
- ret = 0;
+ }
}
+ unlock:
+ pthread_mutex_unlock(&rdd_config.lock);
+ if (ret == -1) {
+ goto out;
+ }
+ ret = 0;
+ }
out:
- free (buf);
- pthread_barrier_wait (&rdd_config.barrier);
+ free(buf);
+ pthread_barrier_wait(&rdd_config.barrier);
- return NULL;
+ return NULL;
}
static void
-cleanup (void)
+cleanup(void)
{
- close (rdd_config.in_file.fd);
- close (rdd_config.out_file.fd);
- rdd_config.in_file.fd = rdd_config.out_file.fd = -1;
+ close(rdd_config.in_file.fd);
+ close(rdd_config.out_file.fd);
+ rdd_config.in_file.fd = rdd_config.out_file.fd = -1;
}
static int
-check_and_create (void)
+check_and_create(void)
{
- int ret = -1;
- char buf[4096] = {0,};
- struct stat stbuf = {0,};
- int fd[2] = {-1,};
- size_t total_size = -1;
-
- total_size = rdd_config.file_size;
-
- ret = stat (rdd_config.in_file.path, &stbuf);
- if (ret == -1 && (errno != ENOENT))
+ int ret = -1;
+ char buf[4096] = {
+ 0,
+ };
+ struct stat stbuf = {
+ 0,
+ };
+ int fd[2] = {
+ -1,
+ };
+ size_t total_size = -1;
+
+ total_size = rdd_config.file_size;
+
+ ret = stat(rdd_config.in_file.path, &stbuf);
+ if (ret == -1 && (errno != ENOENT))
+ goto out;
+
+ fd[1] = open(rdd_config.in_file.path, O_CREAT | O_WRONLY | O_TRUNC);
+ if (fd[1] == -1)
+ goto out;
+
+ fd[0] = open("/dev/urandom", O_RDONLY);
+ if (fd[0] == -1)
+ goto out;
+
+ while (total_size > 0) {
+ if (total_size >= 4096) {
+ ret = read(fd[0], buf, 4096);
+ if (ret == -1)
goto out;
-
- fd[1] = open (rdd_config.in_file.path, O_CREAT | O_WRONLY | O_TRUNC);
- if (fd[1] == -1)
+ ret = write(fd[1], buf, 4096);
+ if (ret == -1)
goto out;
-
- fd[0] = open ("/dev/urandom", O_RDONLY);
- if (fd[0] == -1)
+ total_size = total_size - 4096;
+ } else {
+ ret = read(fd[0], buf, total_size);
+ if (ret == -1)
goto out;
-
- while (total_size > 0) {
- if (total_size >= 4096) {
- ret = read (fd[0], buf, 4096);
- if (ret == -1)
- goto out;
- ret = write (fd[1], buf, 4096);
- if (ret == -1)
- goto out;
- total_size = total_size - 4096;
- } else {
- ret = read (fd[0], buf, total_size);
- if (ret == -1)
- goto out;
- ret = write (fd[1], buf, total_size);
- if (ret == -1)
- goto out;
- total_size = total_size - total_size;
- }
-
+ ret = write(fd[1], buf, total_size);
+ if (ret == -1)
+ goto out;
+ total_size = total_size - total_size;
}
+ }
- ret = 0;
+ ret = 0;
out:
- if (fd[0] > 0)
- close (fd[0]);
- if (fd[1] > 0)
- close (fd[1]);
- return ret;
+ if (fd[0] > 0)
+ close(fd[0]);
+ if (fd[1] > 0)
+ close(fd[1]);
+ return ret;
}
static int
-rdd_spawn_threads (void)
+rdd_spawn_threads(void)
{
- int i = 0, ret = -1, fd = -1;
- char buf[4096];
-
- ret = check_and_create ();
- if (ret == -1)
- goto out;
-
- fd = open (rdd_config.in_file.path, O_RDONLY);
- if (fd < 0) {
- fprintf (stderr, "cannot open %s (%s)\n",
- rdd_config.in_file.path, strerror (errno));
- ret = -1;
- goto out;
- }
- ret = fstat (fd, &rdd_config.in_file.st);
- if (ret != 0) {
- close (fd);
- fprintf (stderr, "cannot stat %s (%s)\n",
- rdd_config.in_file.path, strerror (errno));
- ret = -1;
- goto out;
- }
- rdd_config.in_file.fd = fd;
-
- fd = open (rdd_config.out_file.path, O_WRONLY | O_CREAT | O_TRUNC,
- S_IRWXU | S_IROTH);
- if (fd < 0) {
- close (rdd_config.in_file.fd);
- rdd_config.in_file.fd = -1;
- fprintf (stderr, "cannot open %s (%s)\n",
- rdd_config.out_file.path, strerror (errno));
- ret = -1;
- goto out;
- }
- rdd_config.out_file.fd = fd;
-
- while ((ret = read (rdd_config.in_file.fd, buf, 4096)) > 0) {
- if (write (rdd_config.out_file.fd, buf, ret) != ret) {
- fprintf (stderr, "write failed (%s)\n",
- strerror (errno));
- cleanup ();
- ret = -1;
- goto out;
- }
- }
-
- rdd_config.threads = calloc (rdd_config.thread_count,
- sizeof (pthread_t));
- if (rdd_config.threads == NULL) {
- fprintf (stderr, "calloc() failed (%s)\n", strerror (errno));
-
- ret = -1;
- cleanup ();
- goto out;
- }
-
- ret = pthread_barrier_init (&rdd_config.barrier, NULL,
- rdd_config.thread_count + 1);
- if (ret != 0) {
- fprintf (stderr, "pthread_barrier_init() failed (%s)\n",
- strerror (ret));
-
- free (rdd_config.threads);
- cleanup ();
- ret = -1;
- goto out;
+ int i = 0, ret = -1, fd = -1;
+ char buf[4096];
+
+ ret = check_and_create();
+ if (ret == -1)
+ goto out;
+
+ fd = open(rdd_config.in_file.path, O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "cannot open %s (%s)\n", rdd_config.in_file.path,
+ strerror(errno));
+ ret = -1;
+ goto out;
+ }
+ ret = fstat(fd, &rdd_config.in_file.st);
+ if (ret != 0) {
+ close(fd);
+ fprintf(stderr, "cannot stat %s (%s)\n", rdd_config.in_file.path,
+ strerror(errno));
+ ret = -1;
+ goto out;
+ }
+ rdd_config.in_file.fd = fd;
+
+ fd = open(rdd_config.out_file.path, O_WRONLY | O_CREAT | O_TRUNC,
+ S_IRWXU | S_IROTH);
+ if (fd < 0) {
+ close(rdd_config.in_file.fd);
+ rdd_config.in_file.fd = -1;
+ fprintf(stderr, "cannot open %s (%s)\n", rdd_config.out_file.path,
+ strerror(errno));
+ ret = -1;
+ goto out;
+ }
+ rdd_config.out_file.fd = fd;
+
+ while ((ret = read(rdd_config.in_file.fd, buf, 4096)) > 0) {
+ if (write(rdd_config.out_file.fd, buf, ret) != ret) {
+ fprintf(stderr, "write failed (%s)\n", strerror(errno));
+ cleanup();
+ ret = -1;
+ goto out;
}
-
- ret = pthread_mutex_init (&rdd_config.lock, NULL);
+ }
+
+ rdd_config.threads = calloc(rdd_config.thread_count, sizeof(pthread_t));
+ if (rdd_config.threads == NULL) {
+ fprintf(stderr, "calloc() failed (%s)\n", strerror(errno));
+
+ ret = -1;
+ cleanup();
+ goto out;
+ }
+
+ ret = pthread_barrier_init(&rdd_config.barrier, NULL,
+ rdd_config.thread_count + 1);
+ if (ret != 0) {
+ fprintf(stderr, "pthread_barrier_init() failed (%s)\n", strerror(ret));
+
+ free(rdd_config.threads);
+ cleanup();
+ ret = -1;
+ goto out;
+ }
+
+ ret = pthread_mutex_init(&rdd_config.lock, NULL);
+ if (ret != 0) {
+ fprintf(stderr, "pthread_mutex_init() failed (%s)\n", strerror(ret));
+
+ free(rdd_config.threads);
+ pthread_barrier_destroy(&rdd_config.barrier);
+ cleanup();
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < rdd_config.thread_count; i++) {
+ ret = pthread_create(&rdd_config.threads[i], NULL, rdd_read_write,
+ NULL);
if (ret != 0) {
- fprintf (stderr, "pthread_mutex_init() failed (%s)\n",
- strerror (ret));
-
- free (rdd_config.threads);
- pthread_barrier_destroy (&rdd_config.barrier);
- cleanup ();
- ret = -1;
- goto out;
- }
-
- for (i = 0; i < rdd_config.thread_count; i++)
- {
- ret = pthread_create (&rdd_config.threads[i], NULL,
- rdd_read_write, NULL);
- if (ret != 0) {
- fprintf (stderr, "pthread_create failed (%s)\n",
- strerror (errno));
- exit (1);
- }
+ fprintf(stderr, "pthread_create failed (%s)\n", strerror(errno));
+ exit(1);
}
+ }
out:
- return ret;
+ return ret;
}
static void
-rdd_wait_for_completion (void)
+rdd_wait_for_completion(void)
{
- pthread_barrier_wait (&rdd_config.barrier);
+ pthread_barrier_wait(&rdd_config.barrier);
}
-
int
-main (int argc, char *argv[])
+main(int argc, char *argv[])
{
- int ret = -1;
+ int ret = -1;
- rdd_default_config ();
+ rdd_default_config();
- ret = argp_parse (&argp, argc, argv, 0, 0, NULL);
- if (ret != 0) {
- ret = -1;
- fprintf (stderr, "%s: argp_parse() failed\n", argv[0]);
- goto err;
- }
+ ret = argp_parse(&argp, argc, argv, 0, 0, NULL);
+ if (ret != 0) {
+ ret = -1;
+ fprintf(stderr, "%s: argp_parse() failed\n", argv[0]);
+ goto err;
+ }
- if (!rdd_valid_config ()) {
- ret = -1;
- fprintf (stderr, "%s: configuration validation failed\n",
- argv[0]);
- goto err;
- }
+ if (!rdd_valid_config()) {
+ ret = -1;
+ fprintf(stderr, "%s: configuration validation failed\n", argv[0]);
+ goto err;
+ }
- ret = rdd_spawn_threads ();
- if (ret != 0) {
- fprintf (stderr, "%s: spawning threads failed\n", argv[0]);
- goto err;
- }
+ ret = rdd_spawn_threads();
+ if (ret != 0) {
+ fprintf(stderr, "%s: spawning threads failed\n", argv[0]);
+ goto err;
+ }
- rdd_wait_for_completion ();
+ rdd_wait_for_completion();
err:
- return ret;
+ return ret;
}
diff --git a/extras/check_goto.pl b/extras/check_goto.pl
new file mode 100755
index 00000000000..fa71bfc6683
--- /dev/null
+++ b/extras/check_goto.pl
@@ -0,0 +1,45 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+
+my @ignore_labels = qw (TODO retry fetch_data again try_again sp_state_read_proghdr redo disabled_loop fd_alloc_try_again);
+my @ignore_files = qw (y.tab.c lex.c);
+my @c_files;
+my $line;
+my @labels;
+my $in_comments;
+
+{
+ local $" = "|";
+ my $cmd = "find . -type f -name '*.c' | grep -vE '(@ignore_files)'";
+ @c_files = `$cmd`;
+}
+
+foreach my $file (@c_files) {
+ chomp ($file);
+ open FD, $file or die ("Failed to read file $file: $!");
+ @labels = ();
+ $in_comments = 0;
+ while ($line = <FD>) {
+ chomp ($line);
+
+ next if $line =~ /^\s*(#|\/\/)/;
+ $in_comments = 1 if ($line =~ /\/\*/);
+ $in_comments = 0 if ($line =~ /\*\//);
+
+ next if $in_comments;
+ if ($line =~ /^\s*(([a-zA-Z]|_)\w*)\s*:/) {
+ push (@labels, $1) unless grep (/$1/, @ignore_labels);
+ }
+ @labels = () if $line =~ /^}/;
+
+ next unless @labels;
+ if ($line =~ /^\s*goto\s*(\w+)/) {
+ print "$file:$.: $line\n" if grep /^$1$/, @labels;
+ }
+ }
+
+ close FD;
+}
+
diff --git a/extras/clang-checker.sh b/extras/clang-checker.sh
new file mode 100755
index 00000000000..4909d3adfcd
--- /dev/null
+++ b/extras/clang-checker.sh
@@ -0,0 +1,301 @@
+#!/usr/bin/env bash
+#*******************************************************************************
+# *
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com> *
+# This file is part of GlusterFS. *
+# *
+# This file is licensed to you under your choice of the GNU Lesser *
+# General Public License, version 3 or any later version (LGPLv3 or *
+# later), or the GNU General Public License, version 2 (GPLv2), in all *
+# cases as published by the Free Software Foundation. *
+#------------------------------------------------------------------------------*
+# *
+# clang-checker.sh: This script runs clang static analyzer using 'scan-build' *
+# a perl wrapper. After you commit your patch i.e. right *
+# before executing rfc.sh in order to push the patch to *
+# repository, it is recommended that you execute *
+# clang-checker.sh to perform static analysis inorder to *
+# check if there are any possible bugs in the code. *
+# *
+# This script performs the static analysis with and *
+# without HEAD commit, it runs the analyzer only in the *
+# directory where changes have been made and finally diff's *
+# the number of bugs using both cases (i.e. with and *
+# without your commit) and gives a summary, which explain's *
+# about the eligibility of your patch. *
+# *
+# Usage: $ cd $PATH_TO_GLUSTERFS *
+# $ extras/clang-checker.sh (or) $ make clang-check *
+# *
+# Author: Prasanna Kumar Kalever <prasanna.kalever@redhat.com> *
+# *
+#*******************************************************************************
+
+REPORTS_DIR=$(pwd)
+BASELINE_DIR=${REPORTS_DIR}/baseline
+BRESULTS_DIR=${BASELINE_DIR}/results
+BBACKUP_DIR=${BASELINE_DIR}/backup
+TARGET_DIR=${REPORTS_DIR}/target
+TRESULTS_DIR=${TARGET_DIR}/results
+TBACKUP_DIR=${TARGET_DIR}/backup
+
+declare -A DICT_B
+declare -A DICT_T
+declare -A ARR
+declare -A FILES
+
+function identify_changes () {
+ MODIFIED_DATA=$(git show --name-status --oneline | tail -n +2)
+ FLAG=0
+ for i in ${MODIFIED_DATA}; do
+ if [ $FLAG -eq 1 ]; then
+ ARR+="$(dirname $i) ";
+ FLAG=0;
+ fi
+ if [ $i = 'M' ] || [ $i = 'A' ]; then
+ FLAG=1;
+ fi
+ done
+
+ MODIFIED_DIR=$(echo "${ARR[@]}" | tr ' ' '\n' | sort -u | tr '\n' ' ')
+ for i in $MODIFIED_DIR; do
+ # run only in directories which has Makefile
+ if [ $(find ./$i -iname "makefile*" | wc -c) -gt 0 ]; then
+ # skip 'doc' and '.'(top) directory
+ if [ "xx$i" != "xxdoc" ] && [ "xx$i" != "xx." ]; then
+ FILES+="$i "
+ fi
+ fi
+ done
+ if [ -z $FILES ]; then
+ echo "Probably no changes made to 'c' files"
+ exit;
+ fi
+}
+
+function check_prerequisites () {
+ if ! type "clang" 2> /dev/null; then
+ echo -e "\ntry after installing clang and scan-build..."
+ echo "useful info at http://clang-analyzer.llvm.org/installation.html\n"
+ echo -e "hint: 'dnf -y install clang-analyzer.noarch'\n"
+ exit 1;
+ elif ! type "scan-build" 2> /dev/null; then
+ echo -e "\ntry after installing scan-build..."
+ echo "useful info at http://clang-analyzer.llvm.org/installation.html"
+ echo -e "hint: 'dnf -y install clang-analyzer.noarch'\n"
+ exit 1;
+ fi
+}
+
+function force_terminate () {
+ echo -e "\nreceived a signal to force terminate ..\n"
+ git am --abort 2> /dev/null
+ git am ${PATCH_NAME}
+ rm -f ${REPORTS_DIR}/${PATCH_NAME}
+ exit 1;
+}
+
+function run_scanbuild () {
+ local CLANG=$(which clang)
+ local SCAN_BUILD=$(which scan-build)
+ local ORIG_COMMIT=$(git rev-parse --verify HEAD^)
+ PATCH_NAME=$(git format-patch $ORIG_COMMIT)
+
+ echo -e "\n| Performing clang analysis on:" \
+ "$(git log --pretty=format:"%h - '%s' by %an" -1) ... |\n"
+ echo -e "Changes are identified in '${FILES[@]}' directorie[s]\n"
+
+ if [ -d "${BRESULTS_DIR}" ]; then
+ mkdir -p ${BBACKUP_DIR} ${TBACKUP_DIR}
+ mv ${BRESULTS_DIR} \
+ ${BBACKUP_DIR}/results_$(ls -l ${BBACKUP_DIR} | wc -l)
+ mv ${TRESULTS_DIR} \
+ ${TBACKUP_DIR}/results_$(ls -l ${TBACKUP_DIR} | wc -l)
+ fi
+ for DIR in ${FILES[@]}; do
+ mkdir -p ${BRESULTS_DIR}/$(echo ${DIR} | sed 's/\//_/g')
+ mkdir -p ${TRESULTS_DIR}/$(echo ${DIR} | sed 's/\//_/g')
+ done
+ # get nproc info
+ case $(uname -s) in
+ 'Linux')
+ local NPROC=$(getconf _NPROCESSORS_ONLN)
+ ;;
+ 'NetBSD')
+ local NPROC=$(getconf NPROCESSORS_ONLN)
+ ;;
+ esac
+
+ trap force_terminate INT TERM QUIT EXIT
+
+ git reset --hard HEAD^
+
+ # build complete source code for sake of dependencies
+ echo -e "\n# make -j${NPROC} ..."
+ make -j${NPROC} 1>/dev/null
+
+ for DIR in ${FILES[@]}; do
+ if [ $(find ./$i -iname "makefile*" | wc -c) -gt 0 ]; then
+ make clean -C ${DIR} 1>/dev/null
+ echo -e "\n| Analyzing ${DIR} without commit ... |\n"
+ # run only in directory where changes are made
+ ${SCAN_BUILD} -o ${BRESULTS_DIR}/$(echo ${DIR} | sed 's/\//_/g') \
+ --use-analyzer=${CLANG} make -j${NPROC} -C ${DIR}
+ fi
+ done
+
+ echo -e "\n| Analyzing without commit complete ... |\n"
+
+ git am ${PATCH_NAME}
+ trap - INT TERM QUIT EXIT
+
+ # In case commit has changes to configure stuff ?
+ echo -e "\n# make clean ..."
+ make clean 1>/dev/null
+ echo -e "\n# ./autogen.sh && ./configure --with-previous-options ..."
+ ${REPORTS_DIR}/autogen.sh 2>/dev/null
+ ${REPORTS_DIR}/configure --with-previous-options 1>/dev/null
+ echo -e "\n# make -j${NPROC} ..."
+ make -j${NPROC} 1>/dev/null
+
+ for DIR in ${FILES[@]}; do
+ if [ $(find ./$i -iname "makefile*" | wc -c) -gt 0 ]; then
+ make clean -C ${DIR} 1>/dev/null
+ echo -e "\n| Analyzing ${DIR} with commit ... |\n"
+ # run only in directory where changes are made
+ ${SCAN_BUILD} -o ${TRESULTS_DIR}/$(echo ${DIR} | sed 's/\//_/g') \
+ --use-analyzer=${CLANG} make -j${NPROC} -C ${DIR}
+ fi
+ done
+
+ echo -e "\n| Analyzing with commit complete ... |\n"
+
+ rm -f ${REPORTS_DIR}/${PATCH_NAME}
+}
+
+function count_for_baseline () {
+ for DIR in ${FILES[@]}; do
+ HTMLS_DIR=${BRESULTS_DIR}/$(echo ${DIR} |
+ sed 's/\//_/g')/$(ls ${BRESULTS_DIR}/$(echo ${DIR} |
+ sed 's/\//_/g')/);
+
+ local NAMES_OF_BUGS_B=$(grep -n "SUMM_DESC" ${HTMLS_DIR}/index.html |
+ cut -d"<" -f3 | cut -d">" -f2 |
+ sed 's/[^a-zA-Z0]/_/g' | tr '\n' ' ')
+ local NO_OF_BUGS_B=$(grep -n "SUMM_DESC" ${HTMLS_DIR}/index.html |
+ cut -d"<" -f5 | cut -d">" -f2 | tr '\n' ' ')
+ local count_B=0;
+
+ read -a BUG_NAME_B <<<$NAMES_OF_BUGS_B
+ read -a BUG_COUNT_B <<<$NO_OF_BUGS_B
+ for i in ${BUG_NAME_B[@]};
+ do
+ if [ ! -z ${DICT_B[$i]} ]; then
+ DICT_B[$i]=$(expr ${BUG_COUNT_B[count_B]} + ${DICT_B[$i]});
+ else
+ DICT_B+=([$i]=${BUG_COUNT_B[count_B]});
+ fi
+ count_B=$(expr $count_B + 1)
+ done
+ done
+
+ echo -e "\nBASELINE BUGS LIST (before applying patch):"
+ echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+ for key_B in ${!DICT_B[@]}; do
+ echo "${key_B} --> ${DICT_B[${key_B}]}" | sed 's/_/ /g' | tr -s ' '
+ done
+}
+
+function count_for_target () {
+ for DIR in ${FILES[@]}; do
+ HTMLS_DIR=${TRESULTS_DIR}/$(echo ${DIR} |
+ sed 's/\//_/g')/$(ls ${TRESULTS_DIR}/$(echo ${DIR} |
+ sed 's/\//_/g')/);
+
+ local NAME_OF_BUGS_T=$(grep -n "SUMM_DESC" ${HTMLS_DIR}/index.html |
+ cut -d"<" -f3 | cut -d">" -f2 |
+ sed 's/[^a-zA-Z0]/_/g'| tr '\n' ' ')
+ local NO_OF_BUGS_T=$(grep -n "SUMM_DESC" ${HTMLS_DIR}/index.html |
+ cut -d"<" -f5 | cut -d">" -f2 | tr '\n' ' ')
+ local count_T=0;
+
+ read -a BUG_NAME_T <<<$NAME_OF_BUGS_T
+ read -a BUG_COUNT_T <<<$NO_OF_BUGS_T
+
+ for i in ${BUG_NAME_T[@]};
+ do
+ if [ ! -z ${DICT_T[$i]} ]; then
+ DICT_T[$i]=$(expr ${BUG_COUNT_T[count_T]} + ${DICT_T[$i]});
+ else
+ DICT_T+=([$i]=${BUG_COUNT_T[count_T]});
+ fi
+ count_T=$(expr $count_T + 1)
+ done
+ done
+
+ echo -e "\nTARGET BUGS LIST (after applying patch):"
+ echo "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+ for key_T in ${!DICT_T[@]}; do
+ echo "${key_T} --> ${DICT_T[${key_T}]}" | sed 's/_/ /g' | tr -s ' '
+ done
+}
+
+function array_contains () {
+ local SEEKING=$1; shift
+ local IN=1
+ for ELEMENT; do
+ if [[ $ELEMENT == $SEEKING ]]; then
+ IN=0
+ break
+ fi
+ done
+ return $IN
+}
+
+function main () {
+ echo -e "\n================ Clang analyzer in progress ================\n"
+ check_prerequisites
+ identify_changes
+ run_scanbuild
+ clear
+ count_for_baseline
+ count_for_target
+ echo -e "\nSUMMARY OF CLANG-ANALYZER:"
+ echo "~~~~~~~~~~~~~~~~~~~~~~~~~~"
+
+ FLAG=0
+ for BUG in ${!DICT_T[@]}; do
+ array_contains $BUG "${!DICT_B[@]}"
+ if [ $? -eq 1 ]; then
+ echo "New ${DICT_T[${BUG}]} Bug[s] introduced: $(echo $BUG |
+ sed 's/_/ /g' |
+ tr -s ' ')"
+ FLAG=1
+ else
+ if [ ${BUG} != "All_Bugs" ]; then
+ if [ ${DICT_B[${BUG}]} -lt \
+ ${DICT_T[${BUG}]} ]; then
+ echo "Extra $(expr ${DICT_T[${BUG}]} - \
+ ${DICT_B[${BUG}]}) Bug[s] Introduced in: $(echo $BUG |
+ sed 's/_/ /g' | tr -s ' ')"
+ FLAG=1
+ fi
+ fi
+ fi
+ done
+
+ echo
+ if [ $FLAG -eq 0 ]; then
+ echo -e "Patch Value given by Clang analyzer '+1'\n"
+ else
+ echo -e "Patch Value given by Clang analyzer '-1'\n"
+ fi
+ echo -e "\nExplore complete results at:"
+ find ${BRESULTS_DIR}/ -iname "index.html"
+ find ${TRESULTS_DIR}/ -iname "index.html"
+ echo -e "\n================= Done with Clang Analysis =================\n"
+
+ exit ${FLAG}
+}
+
+main
diff --git a/extras/cliutils/Makefile.am b/extras/cliutils/Makefile.am
new file mode 100644
index 00000000000..7039703e275
--- /dev/null
+++ b/extras/cliutils/Makefile.am
@@ -0,0 +1,4 @@
+EXTRA_DIST= cliutils.py __init__.py
+
+cliutilsdir = @BUILD_PYTHON_SITE_PACKAGES@/gluster/cliutils
+cliutils_PYTHON = cliutils.py __init__.py
diff --git a/extras/cliutils/README.md b/extras/cliutils/README.md
new file mode 100644
index 00000000000..309beb1ca25
--- /dev/null
+++ b/extras/cliutils/README.md
@@ -0,0 +1,233 @@
+# CLI utility for creating Cluster aware CLI tools for Gluster
+cliutils is a Python library which provides wrapper around `gluster system::
+execute` command to extend the functionalities of Gluster.
+
+Example use cases:
+- Start a service in all peer nodes of Cluster
+- Collect the status of a service from all peer nodes
+- Collect the config values from each peer nodes and display latest
+ config based on version.
+- Copy a file present in GLUSTERD_WORKDIR from one peer node to all
+ other peer nodes.(Geo-replication create push-pem is using this to
+ distribute the SSH public keys from all master nodes to all slave
+ nodes)
+- Generate pem keys in all peer nodes and collect all the public keys
+ to one place(Geo-replication gsec_create is doing this)
+- Provide Config sync CLIs for new features like `gluster-eventsapi`,
+ `gluster-restapi`, `gluster-mountbroker` etc.
+
+## Introduction
+
+If a executable file present in `$GLUSTER_LIBEXEC` directory in all
+peer nodes(Filename startswith `peer_`) then it can be executed by
+running `gluster system:: execute` command from any one peer node.
+
+- This command will not copy any executables to peer nodes, Script
+ should exist in all peer nodes to use this infrastructure. Raises
+ error in case script not exists in any one of the peer node.
+- Filename should start with `peer_` and should exist in
+ `$GLUSTER_LIBEXEC` directory.
+- This command can not be called from outside the cluster.
+
+To understand the functionality, create a executable file `peer_hello`
+under $GLUSTER_LIBEXEC directory and copy to all peer nodes.
+
+ #!/usr/bin/env bash
+ echo "Hello from $(gluster system:: uuid get)"
+
+Now run the following command from any one gluster node,
+
+ gluster system:: execute hello
+
+**Note:** Gluster will not copy the executable script to all nodes,
+ copy `peer_hello` script to all peer nodes to use `gluster system::
+ execute` infrastructure.
+
+It will run `peer_hello` executable in all peer nodes and shows the
+output from each node(Below example shows output from my two nodes
+cluster)
+
+ Hello from UUID: e7a3c5c8-e7ad-47ad-aa9c-c13907c4da84
+ Hello from UUID: c680fc0a-01f9-4c93-a062-df91cc02e40f
+
+## cliutils
+A Python wrapper around `gluster system:: execute` command is created
+to address the following issues
+
+- If a node is down in the cluster, `system:: execute` just skips it
+ and runs only in up nodes.
+- `system:: execute` commands are not user friendly
+- It captures only stdout, so handling errors is tricky.
+
+**Advantages of cliutils:**
+
+- Single executable file will act as node component as well as User CLI.
+- `execute_in_peers` utility function will merge the `gluster system::
+ execute` output with `gluster peer status` to identify offline nodes.
+- Easy CLI Arguments handling.
+- If node component returns non zero return value then, `gluster
+ system:: execute` will fail to aggregate the output from other
+ nodes. `node_output_ok` or `node_output_notok` utility functions
+ returns zero both in case of success or error, but returns json
+ with ok: true or ok:false respectively.
+- Easy to iterate on the node outputs.
+- Better error handling - Geo-rep CLIs `gluster system:: execute
+ mountbroker`, `gluster system:: execute gsec_create` and `gluster
+ system:: add_secret_pub` are suffering from error handling. These
+ tools are not notifying user if any failures during execute or if a node
+ is down during execute.
+
+### Hello World
+Create a file in `$LIBEXEC/glusterfs/peer_message.py` with following
+content.
+
+ #!/usr/bin/python3
+ from gluster.cliutils import Cmd, runcli, execute_in_peers, node_output_ok
+
+ class NodeHello(Cmd):
+ name = "node-hello"
+
+ def run(self, args):
+ node_output_ok("Hello")
+
+ class Hello(Cmd):
+ name = "hello"
+
+ def run(self, args):
+ out = execute_in_peers("node-hello")
+ for row in out:
+ print ("{0} from {1}".format(row.output, row.hostname))
+
+ if __name__ == "__main__":
+ runcli()
+
+When we run `python peer_message.py`, it will have two subcommands,
+"node-hello" and "hello". This file should be copied to
+`$LIBEXEC/glusterfs` directory in all peer nodes. User will call
+subcommand "hello" from any one peer node, which internally call
+`gluster system:: execute message.py node-hello`(This runs in all peer
+nodes and collect the outputs)
+
+For node component do not print the output directly, use
+`node_output_ok` or `node_output_notok` functions. `node_output_ok`
+additionally collects the node UUID and prints in JSON
+format. `execute_in_peers` function will collect this output and
+merges with `peers list` so that we don't miss the node information if
+that node is offline.
+
+If you observed already, function `args` is optional, if you don't
+have arguments then no need to create a function. When we run the
+file, we will have two subcommands. For example,
+
+ python peer_message.py hello
+ python peer_message.py node-hello
+
+First subcommand calls second subcommand in all peer nodes. Basically
+`execute_in_peers(NAME, ARGS)` will be converted into
+
+ CMD_NAME = FILENAME without "peers_"
+ gluster system:: execute <CMD_NAME> <SUBCOMMAND> <ARGS>
+
+In our example,
+
+ filename = "peer_message.py"
+ cmd_name = "message.py"
+ gluster system:: execute ${cmd_name} node-hello
+
+Now create symlink in `/usr/bin` or `/usr/sbin` directory depending on
+the usecase.(Optional step for usability)
+
+ ln -s /usr/libexec/glusterfs/peer_message.py /usr/bin/gluster-message
+
+Now users can use `gluster-message` instead of calling
+`/usr/libexec/glusterfs/peer_message.py`
+
+ gluster-message hello
+
+### Showing CLI output as Table
+
+Following example uses prettytable library, which can be installed
+using `pip install prettytable` or `dnf install python-prettytable`
+
+ #!/usr/bin/python3
+ from prettytable import PrettyTable
+ from gluster.cliutils import Cmd, runcli, execute_in_peers, node_output_ok
+
+ class NodeHello(Cmd):
+ name = "node-hello"
+
+ def run(self, args):
+ node_output_ok("Hello")
+
+ class Hello(Cmd):
+ name = "hello"
+
+ def run(self, args):
+ out = execute_in_peers("node-hello")
+ # Initialize the CLI table
+ table = PrettyTable(["ID", "NODE", "NODE STATUS", "MESSAGE"])
+ table.align["NODE STATUS"] = "r"
+ for row in out:
+ table.add_row([row.nodeid,
+ row.hostname,
+ "UP" if row.node_up else "DOWN",
+ row.output if row.ok else row.error])
+
+ print table
+
+ if __name__ == "__main__":
+ runcli()
+
+
+Example output,
+
+ +--------------------------------------+-----------+-------------+---------+
+ | ID | NODE | NODE STATUS | MESSAGE |
+ +--------------------------------------+-----------+-------------+---------+
+ | e7a3c5c8-e7ad-47ad-aa9c-c13907c4da84 | localhost | UP | Hello |
+ | bb57a4c4-86eb-4af5-865d-932148c2759b | vm2 | UP | Hello |
+ | f69b918f-1ffa-4fe5-b554-ee10f051294e | vm3 | DOWN | N/A |
+ +--------------------------------------+-----------+-------------+---------+
+
+## How to package in Gluster
+If the project is created in `$GLUSTER_SRC/tools/message`
+
+Add "message" to SUBDIRS list in `$GLUSTER_SRC/tools/Makefile.am`
+
+and then create a `Makefile.am` in `$GLUSTER_SRC/tools/message`
+directory with following content.
+
+ EXTRA_DIST = peer_message.py
+
+ peertoolsdir = $(libexecdir)/glusterfs/
+ peertools_SCRIPTS = peer_message.py
+
+ install-exec-hook:
+ $(mkdir_p) $(DESTDIR)$(bindir)
+ rm -f $(DESTDIR)$(bindir)/gluster-message
+ ln -s $(libexecdir)/glusterfs/peer_message.py \
+ $(DESTDIR)$(bindir)/gluster-message
+
+ uninstall-hook:
+ rm -f $(DESTDIR)$(bindir)/gluster-message
+
+Thats all. Add following files in `glusterfs.spec.in` if packaging is
+required.(Under `%files` section)
+
+ %{_libexecdir}/glusterfs/peer_message.py*
+ %{_bindir}/gluster-message
+
+## Who is using cliutils
+- gluster-mountbroker http://review.gluster.org/14544
+- gluster-eventsapi http://review.gluster.org/14248
+- gluster-georep-sshkey http://review.gluster.org/14732
+- gluster-restapi https://github.com/gluster/restapi
+
+## Limitations/TODOs
+- Not yet possible to create CLI without any subcommand, For example
+ `gluster-message` without any arguments
+- Hiding node subcommands in `--help`(`gluster-message --help` will
+ show all subcommands including node subcommands)
+- Only positional arguments supported for node arguments, Optional
+ arguments can be used for other commands.
+- API documentation
diff --git a/extras/cliutils/__init__.py b/extras/cliutils/__init__.py
new file mode 100644
index 00000000000..8765cc85099
--- /dev/null
+++ b/extras/cliutils/__init__.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+# Reexporting the utility funcs and classes
+from .cliutils import (runcli,
+ sync_file_to_peers,
+ execute_in_peers,
+ execute,
+ node_output_ok,
+ node_output_notok,
+ output_error,
+ oknotok,
+ yesno,
+ get_node_uuid,
+ Cmd,
+ GlusterCmdException,
+ set_common_args_func)
+
+
+# This will be useful when `from cliutils import *`
+__all__ = ["runcli",
+ "sync_file_to_peers",
+ "execute_in_peers",
+ "execute",
+ "node_output_ok",
+ "node_output_notok",
+ "output_error",
+ "oknotok",
+ "yesno",
+ "get_node_uuid",
+ "Cmd",
+ "GlusterCmdException",
+ "set_common_args_func"]
diff --git a/extras/cliutils/cliutils.py b/extras/cliutils/cliutils.py
new file mode 100644
index 00000000000..55fbaf56704
--- /dev/null
+++ b/extras/cliutils/cliutils.py
@@ -0,0 +1,237 @@
+# -*- coding: utf-8 -*-
+from __future__ import print_function
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+import inspect
+import subprocess
+import os
+import xml.etree.cElementTree as etree
+import json
+import sys
+
+MY_UUID = None
+parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+ description=__doc__)
+subparsers = parser.add_subparsers(dest="mode")
+
+subcommands = {}
+cache_data = {}
+ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError
+_common_args_func = lambda p: True
+
+
+class GlusterCmdException(Exception):
+ def __init__(self, message):
+ self.message = message
+ try:
+ # Python 3
+ super().__init__(message)
+ except TypeError:
+ # Python 2
+ super(GlusterCmdException, self).__init__(message)
+
+
+def get_node_uuid():
+ # Caches the Node UUID in global variable,
+ # Executes gluster system:: uuid get command only if
+ # calling this function for first time
+ global MY_UUID
+ if MY_UUID is not None:
+ return MY_UUID
+
+ cmd = ["gluster", "system::", "uuid", "get", "--xml"]
+ rc, out, err = execute(cmd)
+
+ if rc != 0:
+ return None
+
+ tree = etree.fromstring(out)
+ uuid_el = tree.find("uuidGenerate/uuid")
+ MY_UUID = uuid_el.text
+ return MY_UUID
+
+
+def yesno(flag):
+ return "Yes" if flag else "No"
+
+
+def oknotok(flag):
+ return "OK" if flag else "NOT OK"
+
+
+def output_error(message, errcode=1):
+ print (message, file=sys.stderr)
+ sys.exit(errcode)
+
+
+def node_output_ok(message=""):
+ # Prints Success JSON output and exits with returncode zero
+ out = {"ok": True, "nodeid": get_node_uuid(), "output": message}
+ print (json.dumps(out))
+ sys.exit(0)
+
+
+def node_output_notok(message):
+ # Prints Error JSON output and exits with returncode zero
+ out = {"ok": False, "nodeid": get_node_uuid(), "error": message}
+ print (json.dumps(out))
+ sys.exit(0)
+
+
+def execute(cmd):
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE,
+ universal_newlines=True)
+ out, err = p.communicate()
+ return p.returncode, out, err
+
+
+def get_pool_list():
+ cmd = ["gluster", "--mode=script", "pool", "list", "--xml"]
+ rc, out, err = execute(cmd)
+ if rc != 0:
+ output_error("Failed to get Pool Info: {0}".format(err))
+
+ tree = etree.fromstring(out)
+
+ pool = []
+ try:
+ for p in tree.findall('peerStatus/peer'):
+ pool.append({"nodeid": p.find("uuid").text,
+ "hostname": p.find("hostname").text,
+ "connected": (True if p.find("connected").text == "1"
+ else False)})
+ except (ParseError, AttributeError, ValueError) as e:
+ output_error("Failed to parse Pool Info: {0}".format(e))
+
+ return pool
+
+
+class NodeOutput(object):
+ def __init__(self, **kwargs):
+ self.nodeid = kwargs.get("nodeid", "")
+ self.hostname = kwargs.get("hostname", "")
+ self.node_up = kwargs.get("node_up", False)
+ self.ok = kwargs.get("ok", False)
+ self.output = kwargs.get("output", "N/A")
+ self.error = kwargs.get("error", "N/A")
+
+
+def execute_in_peers(name, args=[]):
+ # Get the file name of Caller function, If the file name is peer_example.py
+ # then Gluster peer command will be gluster system:: execute example.py
+ # Command name is without peer_
+ frame = inspect.stack()[1]
+ module = inspect.getmodule(frame[0])
+ actual_file = module.__file__
+ # If file is symlink then find actual file
+ if os.path.islink(actual_file):
+ actual_file = os.readlink(actual_file)
+
+ # Get the name of file without peer_
+ cmd_name = os.path.basename(actual_file).replace("peer_", "")
+ cmd = ["gluster", "system::", "execute", cmd_name, name] + args
+ rc, out, err = execute(cmd)
+ if rc != 0:
+ raise GlusterCmdException((rc, out, err, " ".join(cmd)))
+
+ out = out.strip().splitlines()
+
+ # JSON decode each line and construct one object with node id as key
+ all_nodes_data = {}
+ for node_data in out:
+ data = json.loads(node_data)
+ all_nodes_data[data["nodeid"]] = {
+ "nodeid": data.get("nodeid"),
+ "ok": data.get("ok"),
+ "output": data.get("output", ""),
+ "error": data.get("error", "")}
+
+ # gluster pool list
+ pool_list = get_pool_list()
+
+ data_out = []
+ # Iterate pool_list and merge all_nodes_data collected above
+ # If a peer node is down then set node_up = False
+ for p in pool_list:
+ p_data = all_nodes_data.get(p.get("nodeid"), None)
+ row_data = NodeOutput(node_up=False,
+ hostname=p.get("hostname"),
+ nodeid=p.get("nodeid"),
+ ok=False)
+
+ if p_data is not None:
+ # Node is UP
+ row_data.node_up = True
+ row_data.ok = p_data.get("ok")
+ row_data.output = p_data.get("output")
+ row_data.error = p_data.get("error")
+
+ data_out.append(row_data)
+
+ return data_out
+
+
+def sync_file_to_peers(fname):
+ # Copy file from current node to all peer nodes, fname
+ # is path after GLUSTERD_WORKDIR
+ cmd = ["gluster", "system::", "copy", "file", fname]
+ rc, out, err = execute(cmd)
+ if rc != 0:
+ raise GlusterCmdException((rc, out, err))
+
+
+class Cmd(object):
+ name = ""
+
+ def run(self, args):
+ # Must required method. Raise NotImplementedError if derived class
+ # not implemented this method
+ raise NotImplementedError("\"run(self, args)\" method is "
+ "not implemented by \"{0}\"".format(
+ self.__class__.__name__))
+
+
+def runcli():
+ # Get list of Classes derived from class "Cmd" and create
+ # a subcommand as specified in the Class name. Call the args
+ # method by passing subcommand parser, Derived class can add
+ # arguments to the subcommand parser.
+ metavar_data = []
+ for c in Cmd.__subclasses__():
+ cls = c()
+ if getattr(cls, "name", "") == "":
+ raise NotImplementedError("\"name\" is not added "
+ "to \"{0}\"".format(
+ cls.__class__.__name__))
+
+ # Do not show in help message if subcommand starts with node-
+ if not cls.name.startswith("node-"):
+ metavar_data.append(cls.name)
+
+ p = subparsers.add_parser(cls.name)
+ args_func = getattr(cls, "args", None)
+ if args_func is not None:
+ args_func(p)
+
+ # Apply common args if any
+ _common_args_func(p)
+
+ # A dict to save subcommands, key is name of the subcommand
+ subcommands[cls.name] = cls
+
+ # Hide node commands in Help message
+ subparsers.metavar = "{" + ",".join(metavar_data) + "}"
+
+ # Get all parsed arguments
+ args = parser.parse_args()
+
+ # Get the subcommand to execute
+ cls = subcommands.get(args.mode, None)
+
+ # Run
+ if cls is not None:
+ cls.run(args)
+
+
+def set_common_args_func(func):
+ global _common_args_func
+ _common_args_func = func
diff --git a/extras/collect-system-stats.sh b/extras/collect-system-stats.sh
new file mode 100755
index 00000000000..865e70bbc11
--- /dev/null
+++ b/extras/collect-system-stats.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+################################################################################
+# Usage: collect-system-stats.sh <delay-in-seconds>
+# This script starts sar/top/iostat/vmstat processes which collect system stats
+# with the interval <delay-in-seconds> given as argument to the script. When
+# the script is stopped either by entering any input or Ctrl+C the list of
+# files where output is captured will be printed on the screen which can be
+# observed to find any problems/bottlenecks.
+###############################################################################
+
+function stop_processes {
+ echo "Stopping the monitoring processes"
+ echo "sar pid:$sar_pid", "top pid: $top_pid", "iostat pid: $iostat_pid", "vmstat pid: $vmstat_pid"
+ kill "$sar_pid" "$top_pid" "$iostat_pid" "$vmstat_pid"
+ echo "Files created: ${timestamp}-network.out, ${timestamp}-top.out, ${timestamp}-iostat.out, ${timestamp}-vmstat.out"
+}
+
+function check_dependent_commands_exist()
+{
+ declare -a arr=("sar" "top" "iostat" "vmstat")
+ for i in "${arr[@]}"
+ do
+ if ! command -v "$i" > /dev/null 2>&1
+ then
+ echo "ERROR: '$i' command is not found"
+ exit 1
+ fi
+ done
+
+}
+
+case "$1" in
+ ''|*[!0-9]*) echo "Usage: $0 <delay-between-successive-metrics-collection-in-seconds>"; exit 1 ;;
+ *) interval="$1" ;;
+esac
+
+timestamp=$(date +"%s")
+
+check_dependent_commands_exist
+sar -n DEV "$interval" > "${timestamp}"-network.out &
+sar_pid="$!"
+top -bHd "$interval" > "${timestamp}"-top.out &
+top_pid="$!"
+iostat -Ntkdx "$interval" > "${timestamp}"-iostat.out &
+iostat_pid="$!"
+vmstat -t "$interval" > "${timestamp}"-vmstat.out &
+vmstat_pid="$!"
+echo "Started sar, vmstat, iostat, top for collecting stats"
+
+
+trap stop_processes EXIT
+read -r -p "Press anything and ENTER to exit";
diff --git a/extras/command-completion/Makefile b/extras/command-completion/Makefile
new file mode 100644
index 00000000000..06cbfe0672f
--- /dev/null
+++ b/extras/command-completion/Makefile
@@ -0,0 +1,6 @@
+install:
+ mkdir -p /etc/bash_completion.d
+ cp gluster.bash /etc/bash_completion.d/gluster
+
+uninstall:
+ rm -f /etc/bash_completion.d/gluster
diff --git a/extras/command-completion/README b/extras/command-completion/README
new file mode 100644
index 00000000000..0acba38168f
--- /dev/null
+++ b/extras/command-completion/README
@@ -0,0 +1,5 @@
+This file is not moved to /etc/bash_completion.d/ with the source installation.
+Please execute make install explicity from here to move this file to
+/etc/bash_completion.d
+
+Similarly, use make uninstall to remove it from /etc/bash_completion.d
diff --git a/extras/command-completion/gluster.bash b/extras/command-completion/gluster.bash
new file mode 100644
index 00000000000..73d16098875
--- /dev/null
+++ b/extras/command-completion/gluster.bash
@@ -0,0 +1,492 @@
+#!/bin/bash
+
+if pidof glusterd > /dev/null 2>&1; then
+ GLUSTER_SET_OPTIONS="
+ $(for token in `gluster volume set help 2>/dev/null | grep "^Option:" | cut -d ' ' -f 2`
+ do
+ echo "{$token},"
+ done)
+ "
+ GLUSTER_RESET_OPTIONS="$GLUSTER_SET_OPTIONS"
+fi
+
+GLUSTER_TOP_SUBOPTIONS1="
+ {nfs},
+ {brick},
+ {list-cnt}
+"
+GLUSTER_TOP_SUBOPTIONS2="
+ {bs
+ {__SIZE
+ {count}
+ }
+ },
+ {brick},
+ {list-cnt}
+"
+GLUSTER_TOP_OPTIONS="
+ {open
+ [ $GLUSTER_TOP_SUBOPTIONS1 ]
+ },
+ {read
+ [ $GLUSTER_TOP_SUBOPTIONS1 ]
+ },
+ {write
+ [ $GLUSTER_TOP_SUBOPTIONS1 ]
+ },
+ {opendir
+ [ $GLUSTER_TOP_SUBOPTIONS1 ]
+ },
+ {readdir
+ [ $GLUSTER_TOP_SUBOPTIONS1 ]
+ },
+ {clear
+ [ $GLUSTER_TOP_SUBOPTIONS1 ]
+ },
+ {read-perf
+ [ $GLUSTER_TOP_SUBOPTIONS2 ]
+ },
+ {write-perf
+ [ $GLUSTER_TOP_SUBOPTIONS2 ]
+ }
+"
+
+GLUSTER_QUOTA_OPTIONS="
+ {enable},
+ {disable},
+ {list},
+ {remove},
+ {default-soft-limit},
+ {limit-usage},
+ {alert-time},
+ {soft-timeout},
+ {hard-timeout}
+"
+
+GLUSTER_PROFILE_OPTIONS="
+ {start},
+ {info [
+ {peek},
+ {incremental
+ {peek}
+ },
+ {cumulative},
+ {clear},
+ ]
+ },
+ {stop}
+"
+
+GLUSTER_BARRIER_OPTIONS="
+ {enable},
+ {disable}
+"
+
+GLUSTER_GEO_REPLICATION_SUBOPTIONS="
+"
+GLUSTER_GEO_REPLICATION_OPTIONS="
+ {__VOLNAME [
+ {__SLAVEURL [
+ {create [
+ {push-pem
+ {force}
+ },
+ {force}
+ ]
+ },
+ {start {force} },
+ {status {detail} },
+ {config},
+ {pause {force} },
+ {resume {force} },
+ {stop {force} },
+ {delete {force} }
+ ]
+ },
+ {status}
+ ]
+ },
+ {status}
+"
+
+GLUSTER_VOLUME_OPTIONS="
+ {volume [
+ {add-brick
+ {__VOLNAME}
+ },
+ {barrier
+ {__VOLNAME
+ [ $GLUSTER_BARRIER_OPTIONS ]
+ }
+ },
+ {clear-locks
+ {__VOLNAME}
+ },
+ {create},
+ {delete
+ {__VOLNAME}
+ },
+ {geo-replication
+ [ $GLUSTER_GEO_REPLICATION_OPTIONS ]
+ },
+ {heal
+ {__VOLNAME}
+ },
+ {help},
+ {info
+ {__VOLNAME}
+ },
+ {list},
+ {log
+ {__VOLNAME}
+ },
+ {profile
+ {__VOLNAME
+ [ $GLUSTER_PROFILE_OPTIONS ]
+ }
+ },
+ {quota
+ {__VOLNAME
+ [ $GLUSTER_QUOTA_OPTIONS ]
+ }
+ },
+ {rebalance
+ {__VOLNAME}
+ },
+ {remove-brick
+ {__VOLNAME}
+ },
+ {replace-brick
+ {__VOLNAME}
+ },
+ {reset
+ {__VOLNAME
+ [ $GLUSTER_RESET_OPTIONS ]
+ }
+ },
+ {set
+ {__VOLNAME
+ [ $GLUSTER_SET_OPTIONS ]
+ }
+ },
+ {start
+ {__VOLNAME
+ {force}
+ }
+ },
+ {statedump
+ {__VOLNAME}
+ },
+ {status
+ {__VOLNAME}
+ },
+ {stop
+ {__VOLNAME
+ {force}
+ }
+ },
+ {sync
+ {__HOSTNAME}
+ },
+ {top
+ {__VOLNAME
+ [ $GLUSTER_TOP_OPTIONS ]
+ }
+ }
+ ]
+ }
+"
+
+GLUSTER_COMMAND_TREE="
+{gluster [
+ $GLUSTER_VOLUME_OPTIONS ,
+ {peer [
+ {probe
+ {__HOSTNAME}
+ },
+ {detach
+ {__HOSTNAME
+ {force}
+ }
+ },
+ {status}
+ ]
+ },
+ {pool
+ {list}
+ },
+ {help}
+ ]
+}"
+
+__SIZE ()
+{
+ return 0
+}
+
+__SLAVEURL ()
+{
+ return 0
+}
+
+__HOSTNAME ()
+{
+ local zero=0
+ local ret=0
+ local cur_word="$2"
+
+ if [ "$1" == "X" ]; then
+ return
+
+ elif [ "$1" == "match" ]; then
+ return 0
+
+ elif [ "$1" == "complete" ]; then
+ COMPREPLY=($(compgen -A hostname -- $cur_word))
+ fi
+ return 0
+}
+
+__VOLNAME ()
+{
+ local zero=0
+ local ret=0
+ local cur_word="$2"
+ local list=""
+
+ if [ "X$1" == "X" ]; then
+ return
+
+ elif [ "$1" == "match" ]; then
+ return 0
+
+ elif [ "$1" == "complete" ]; then
+ if ! pidof glusterd > /dev/null 2>&1; then
+ list='';
+
+ else
+ list=`gluster volume list 2> /dev/null`
+ fi
+
+ else
+ return 0
+ fi
+
+ COMPREPLY=($(compgen -W "$list" -- $cur_word))
+ return 0
+}
+
+_gluster_throw () {
+#echo $1 >&2
+ COMPREPLY=''
+ exit
+}
+
+declare GLUSTER_FINAL_LIST=''
+declare GLUSTER_LIST=''
+declare -i GLUSTER_TOP=0
+_gluster_push () {
+ GLUSTER_TOP=$((GLUSTER_TOP + 1))
+ return $GLUSTER_TOP
+}
+_gluster_pop () {
+ GLUSTER_TOP=$((GLUSTER_TOP - 1))
+ return $GLUSTER_TOP
+}
+
+_gluster_goto_end ()
+{
+ local prev_top=$1
+ local top=$1
+ local token=''
+
+ while [ $top -ge $prev_top ]; do
+ read -r token
+ case $token in
+ '{' | '[')
+ _gluster_push
+ top=$?
+ ;;
+ '}' | ']')
+ _gluster_pop
+ top=$?
+ ;;
+ esac
+ done
+
+ return
+}
+
+_gluster_form_list ()
+{
+ local token=''
+ local top=0
+ local comma=''
+ local cur_word="$1"
+
+ read -r token
+ case $token in
+ ']')
+ ;;
+ '{')
+ _gluster_push
+ top=$?
+ read -r key
+ if [ "X$cur_word" == "X" -o "${cur_word:0:1}" == "${key:0:1}" -o "${key:0:1}" == "_" ]; then
+ GLUSTER_LIST="$GLUSTER_LIST $key"
+ fi
+
+ _gluster_goto_end $top
+ read -r comma
+ if [ "$comma" == "," ]; then
+ _gluster_form_list $cur_word
+ fi
+ ;;
+ *)
+ _gluster_throw "Expected '{' but received $token"
+ ;;
+ esac
+
+ return
+}
+
+_gluster_goto_child ()
+{
+ local match_string="$1"
+ local token=''
+ local top=0
+ local comma=''
+
+ read -r token
+ case $token in
+ '{')
+ _gluster_push
+ top=$?
+ ;;
+ *)
+ _gluster_throw "Expected '{' but received $token"
+ ;;
+ esac
+
+ read -r token
+ case `echo $token` in
+ '[' | ']' | '{' | '}')
+ _gluster_throw "Expected string but received $token"
+ ;;
+ _*)
+ $token "match" $match_string
+ ret=$?
+ if [ $ret -eq 0 ]; then
+ return
+ else
+ _gluster_goto_end $top
+
+ read -r comma
+ if [ "$comma" == "," ]; then
+ _gluster_goto_child $match_string
+ fi
+ fi
+ ;;
+
+ "$match_string")
+ return
+ ;;
+ *)
+ _gluster_goto_end $top
+
+ read -r comma
+ if [ "$comma" == "," ]; then
+ _gluster_goto_child $match_string
+ fi
+ ;;
+ esac
+
+ return
+}
+
+_gluster_does_match ()
+{
+ local token="$1"
+ local key="$2"
+
+ if [ "${token:0:1}" == "_" ]; then
+ $token $2
+ return $?
+ fi
+
+ [ "$token" == "$key" ] && return 0
+
+ return 1
+}
+
+_gluster_parse ()
+{
+ local i=0
+ local token=''
+ local tmp_token=''
+ local word=''
+
+ while [ $i -lt $COMP_CWORD ]; do
+ read -r token
+ case $token in
+ '[')
+ _gluster_push
+ _gluster_goto_child ${COMP_WORDS[$i]}
+ ;;
+ '{')
+ _gluster_push
+ read -r tmp_token
+ _gluster_does_match $tmp_token ${COMP_WORDS[$i]}
+ if [ $? -ne 0 ]; then
+ _gluster_throw "No match"
+ fi
+ ;;
+ esac
+ i=$((i+1))
+ done
+
+ read -r token
+ if [ "$token" == '[' ]; then
+ _gluster_push
+ _gluster_form_list ${COMP_WORDS[COMP_CWORD]}
+
+ elif [ "$token" == '{' ]; then
+ read -r tmp_token
+ GLUSTER_LIST="$tmp_token"
+ fi
+
+ echo $GLUSTER_LIST
+}
+
+_gluster_handle_list ()
+{
+ local list="${!1}"
+ local cur_word=$2
+ local count=0
+ local i=0
+
+ for i in `echo $list`; do
+ count=$((count + 1))
+ done
+
+ if [ $count -eq 1 ] && [ "${i:0:1}" == "_" ]; then
+ $i "complete" $cur_word
+ else
+ COMPREPLY=($(compgen -W "$list" -- $cur_word))
+ fi
+ return
+}
+
+_gluster_completion ()
+{
+ GLUSTER_FINAL_LIST=`echo $GLUSTER_COMMAND_TREE | \
+ egrep -ao --color=never "([A-Za-z0-9_.-]+)|[[:space:]]+|." | \
+ egrep -v --color=never "^[[:space:]]*$" | \
+ _gluster_parse`
+
+ ARG="GLUSTER_FINAL_LIST"
+ _gluster_handle_list $ARG ${COMP_WORDS[COMP_CWORD]}
+ return
+}
+
+complete -F _gluster_completion gluster
diff --git a/extras/control-cpu-load.sh b/extras/control-cpu-load.sh
new file mode 100755
index 00000000000..52dcf62fd9f
--- /dev/null
+++ b/extras/control-cpu-load.sh
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+USAGE="This script provides a utility to control CPU utilization for any
+gluster daemon.In this, we use cgroup framework to configure CPU quota
+for a process(like selfheal daemon). Before running this script, make
+sure that daemon is running.Every time daemon restarts, it is required
+to rerun this command to set CPU quota on new daemon process id.
+User can enter any value between 10 to 100 for CPU quota.
+Recommended value of quota period is 25. 25 means, kernel will allocate
+25 ms period to this group of tasks in every 100 ms period. This 25ms
+could be considered as the maximum percentage of CPU quota daemon can take.
+This value will be reflected on CPU usage of "top" command.If provided pid
+is the only process and no other process is in competition to get CPU, more
+ than 25% could be allocated to daemon to speed up the process."
+
+if [ $# -ge 1 ]; then
+ case $1 in
+ -h|--help) echo " " "$USAGE" | sed -r -e 's/^[ ]+//g'
+ exit 0;
+ ;;
+ *) echo "Please Provide correct input for script."
+ echo "For help correct options are -h or --help."
+ exit 1;
+ ;;
+ esac
+fi
+
+DIR_EXIST=0
+LOC="/sys/fs/cgroup/cpu,cpuacct/system.slice/glusterd.service"
+echo "Enter gluster daemon pid for which you want to control CPU."
+read daemon_pid
+
+if expr ${daemon_pid} + 0 > /dev/null 2>&1 ;then
+ CHECK_PID=$(pgrep -f gluster | grep ${daemon_pid})
+ if [ -z "${CHECK_PID}" ]; then
+ echo "No daemon is running or pid ${daemon_pid} does not match."
+ echo "with running gluster processes."
+ exit 1
+ fi
+else
+ echo "Entered daemon_pid is not numeric so Rerun the script."
+ exit 1
+fi
+
+
+if [ -f ${LOC}/tasks ];then
+ CHECK_CGROUP=$(grep ${daemon_pid} ${LOC}/tasks)
+ if [ ${CHECK_CGROUP} ]; then
+ echo "pid ${daemon_pid} is attached with glusterd.service cgroup."
+ fi
+fi
+
+cgroup_name=cgroup_gluster_${daemon_pid}
+if [ -f ${LOC}/${cgroup_name}/tasks ]; then
+ CHECK_CGROUP=$(grep ${daemon_pid} ${LOC}/${cgroup_name}/tasks)
+ if [ ${CHECK_CGROUP} ]; then
+ val=`cat ${LOC}/${cgroup_name}/cpu.cfs_quota_us`
+ qval=$((val / 1000))
+ echo "pid ${daemon_pid} is already attached ${cgroup_name} with quota value ${qval}."
+ echo "Press n if you don't want to reassign ${daemon_pid} with new quota value."
+ DIR_EXIST=1
+ else
+ echo "pid ${daemon_pid} is not attached with ${cgroup_name}."
+ fi
+fi
+
+read -p "If you want to continue the script to attach ${daemon_pid} with new ${cgroup_name} cgroup Press (y/n)?" choice
+case "$choice" in
+ y|Y ) echo "yes";;
+ n|N ) echo "no";exit;;
+ * ) echo "invalid";exit;;
+esac
+
+systemctl set-property glusterd.service CPUShares=1024
+
+if [ ${DIR_EXIST} -eq 0 ];then
+ echo "Creating child cgroup directory '${cgroup_name} cgroup' for glusterd.service."
+ mkdir -p ${LOC}/${cgroup_name}
+ if [ ! -f ${LOC}/${cgroup_name}/tasks ];then
+ echo "Not able to create ${cgroup_name} directory so exit."
+ exit 1
+ fi
+fi
+
+echo "Enter quota value in range [10,100]: "
+
+read quota_value
+if expr ${quota_value} + 0 > /dev/null 2>&1 ;then
+ if [ ${quota_value} -lt 10 ] || [ ${quota_value} -gt 100 ]; then
+ echo "Entered quota value is not correct,it should be in the range ."
+ echo "10-100. Ideal value is 25."
+ echo "Rerun the sript with correct value."
+ exit 1
+ else
+ echo "Entered quota value is $quota_value"
+ fi
+else
+ echo "Entered quota value is not numeric so Rerun the script."
+ exit 1
+fi
+
+quota_value=$((quota_value * 1000))
+echo "Setting $quota_value to cpu.cfs_quota_us for gluster_cgroup."
+echo ${quota_value} > ${LOC}/${cgroup_name}/cpu.cfs_quota_us
+
+if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then
+ for thid in `ps -T -p ${daemon_pid} | grep -v SPID | awk -F " " '{print $2}'`;
+ do
+ echo ${thid} > ${LOC}/${cgroup_name}/tasks ;
+ done
+ if cat /proc/${daemon_pid}/cgroup | grep -w ${cgroup_name} > /dev/null; then
+ echo "Tasks are attached successfully specific to ${daemon_pid} to ${cgroup_name}."
+ else
+ echo "Tasks are not attached successfully."
+ fi
+fi
diff --git a/extras/control-mem.sh b/extras/control-mem.sh
new file mode 100755
index 00000000000..91b36f8107a
--- /dev/null
+++ b/extras/control-mem.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+
+USAGE="This commands provides a utility to control MEMORY utilization for any
+gluster daemon.In this, we use cgroup framework to configure MEMORY limit for
+a process. Before running this script, make sure that daemon is running.Every
+time daemon restarts, it is required to rerun this command to set memory limit
+(in bytes) on new daemon process id.User can enter any value between 100
+(in Mega bytes) to 8000000000000 for Memory limit in Mega bytes.
+Memory limit value is depends on how much maximum memory user wants to restrict
+for specific daemon process.If a process will try to consume memore more than
+configured value then cgroup will hang/sleep this task and to resume the task
+rerun the script with new increase memory limit value ."
+
+if [ $# -ge 1 ]; then
+ case $1 in
+ -h|--help) echo " " "$USAGE" | sed -r -e 's/^[ ]+//g'
+ exit 0;
+ ;;
+ *) echo "Please Provide correct input for script."
+ echo "For help correct options are -h of --help."
+ exit 1;
+ ;;
+ esac
+fi
+
+DIR_EXIST=0
+LOC="/sys/fs/cgroup/memory/system.slice/glusterd.service"
+echo "Enter Any gluster daemon pid for that you want to control MEMORY."
+read daemon_pid
+
+if expr ${daemon_pid} + 0 > /dev/null 2>&1 ;then
+ CHECK_PID=$(pgrep -f gluster | grep ${daemon_pid})
+ if [ -z "${CHECK_PID}" ]; then
+ echo "No daemon is running or pid ${daemon_pid} does not match."
+ echo "with running gluster processes."
+ exit 1
+ fi
+else
+ echo "Entered daemon_pid is not numeric so Rerun the script."
+ exit 1
+fi
+
+
+if [ -f ${LOC}/tasks ]; then
+ CHECK_CGROUP=$(grep ${daemon_pid} ${LOC}/tasks)
+ if [ ${CHECK_CGROUP} ] ;then
+ echo "pid ${daemon_pid} is attached with default glusterd.service cgroup."
+ fi
+fi
+
+cgroup_name=cgroup_gluster_${daemon_pid}
+if [ -f ${LOC}/${cgroup_name}/tasks ];then
+ CHECK_CGROUP=$(grep ${daemon_pid} ${LOC}/${cgroup_name}/tasks)
+ if [ ${CHECK_CGROUP} ]; then
+ val=`cat ${LOC}/${cgroup_name}/memory.limit_in_bytes`
+ mval=$((val / 1024 / 1024))
+ echo "pid ${daemon_pid} is already attached ${cgroup_name} with mem value ${mval}."
+ echo "Press n if you don't want to reassign ${daemon_pid} with new mem value."
+ DIR_EXIST=1
+ else
+ echo "pid ${daemon_pid} is not attached with ${cgroup_name}."
+ fi
+fi
+
+read -p "If you want to continue the script to attach daeomon with new cgroup. Press (y/n)?" choice
+case "$choice" in
+ y|Y ) echo "yes";;
+ n|N ) echo "no";exit;;
+ * ) echo "invalid";exit;;
+esac
+
+systemctl set-property glusterd.service CPUShares=1024
+
+if [ ${DIR_EXIST} -eq 0 ];then
+ echo "Creating child cgroup directory '${cgroup_name} cgroup' for glusterd.service."
+ mkdir -p ${LOC}/${cgroup_name}
+ if [ ! -f ${LOC}/${cgroup_name}/tasks ];then
+ echo "Not able to create ${LOC}/${cgroup_name} directory so exit."
+ exit 1
+ fi
+fi
+
+echo "Enter Memory value in Mega bytes [100,8000000000000]: "
+
+read mem_value
+if expr ${mem_value} + 0 > /dev/null 2>&1 ;then
+ if [ ${mem_value} -lt 100 ] || [ ${mem_value} -gt 8000000000000 ]; then
+ echo "Entered memory value is not correct,it should be in the range ."
+ echo "100-8000000000000, Rerun the script with correct value ."
+ exit 1
+ else
+ echo "Entered memory limit value is ${mem_value}."
+ fi
+else
+ echo "Entered memory value is not numeric so Rerun the script."
+ exit 1
+fi
+
+mem_value=$(($mem_value * 1024 * 1024))
+if [ ${DIR_EXIST} -eq 0 ];then
+ echo "Setting ${mem_value} to memory.limit_in_bytes for ${LOC}/${cgroup_name}."
+ echo ${mem_value} > ${LOC}/${cgroup_name}/memory.limit_in_bytes
+ #Set memory value to memory.memsw.limit_in_bytes
+ echo ${mem_value} > ${LOC}/${cgroup_name}/memory.memsw.limit_in_bytes
+ # disable oom_control so that kernel will not send kill signal to the
+ # task once limit has reached
+ echo 1 > ${LOC}/${cgroup_name}/memory.oom_control
+else
+ #Increase mem_value to memory.memsw.limit_in_bytes
+ echo ${mem_value} > ${LOC}/${cgroup_name}/memory.memsw.limit_in_bytes
+ echo "Increase ${mem_value} to memory.limit_in_bytes for ${LOC}/${cgroup_name}."
+ echo ${mem_value} > ${LOC}/${cgroup_name}/memory.limit_in_bytes
+ # disable oom_control so that kernel will not send kill signal to the
+ # task once limit has reached
+ echo 1 > ${LOC}/${cgroup_name}/memory.oom_control
+fi
+
+if ps -T -p ${daemon_pid} | grep gluster > /dev/null; then
+ for thid in `ps -T -p ${daemon_pid} | grep -v SPID | awk -F " " '{print $2}'`;
+ do
+ echo ${thid} > ${LOC}/${cgroup_name}/tasks ;
+ done
+ if cat /proc/${daemon_pid}/cgroup | grep -iw ${cgroup_name} > /dev/null; then
+ echo "Tasks are attached successfully specific to ${daemon_pid} to ${cgroup_name}."
+ else
+ echo "Tasks are not attached successfully."
+ fi
+fi
diff --git a/extras/create_new_xlator/README.md b/extras/create_new_xlator/README.md
new file mode 100644
index 00000000000..fdc1ba00812
--- /dev/null
+++ b/extras/create_new_xlator/README.md
@@ -0,0 +1,24 @@
+####This document explains how to create a template for your new xlator.
+
+`$ python ./generate_xlator.py <XLATOR_DIRECTORY> <XLATOR_NAME> <FOP_PREFIX>`
+ * XLATOR_DIRECTORY: Directory path where the new xlator folder will reside
+ * XLATOR_NAME: Name of the xlator you wish to create
+ * FOP_PREFIX: This is the fop prefix that you wish to prefix every fop definition in your xlator, fop prefix is generally different than xlator name, if the xlator name is too long.
+
+Eg: `python ./generate_xlator.py /home/u1/glusterfs/xlators/features compression cmpr`
+This command will create the following files with some initial contents like copyright, fops definition etc.
+Note that there shouldn't be a "/" specified at the end of the <XLATOR_DIRECTORY>
+ `* /home/u1/glusterfs/xlators/features/compression/Makefile.am
+ * /home/u1/glusterfs/xlators/features/compression/src/Makefile.am
+ * /home/u1/glusterfs/xlators/features/compression/src/compression.c
+ * /home/u1/glusterfs/xlators/features/compression/src/compression.h
+ * /home/u1/glusterfs/xlators/features/compression/src/compression-mem-types.h
+ * /home/u1/glusterfs/xlators/features/compression/src/compression-messages.h`
+
+By default all the fops and functions are generated, if you wish to not implement certain fops and functions, comment those lines (by adding '#' at the start of the line) in libglusterfs/src/generate_xlator.py
+
+Few other manual steps required to get the new xlator completely functional:
+* Change configure.ac
+* Change `<XLATOR_DIRECTORY>/Makefile.am` to include the new xlator directory.
+ Eg: `/home/u1/glusterfs/xlators/features/Makefile.am`
+* Change vol file or glusterd volgen to include the new xlator in volfile
diff --git a/extras/create_new_xlator/generate_xlator.py b/extras/create_new_xlator/generate_xlator.py
new file mode 100755
index 00000000000..983868c04db
--- /dev/null
+++ b/extras/create_new_xlator/generate_xlator.py
@@ -0,0 +1,208 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+import os
+import re
+import sys
+import string
+import time
+path = os.path.abspath(os.path.dirname(__file__)) + '/../../libglusterfs/src'
+sys.path.append(path)
+from generator import ops, xlator_cbks, xlator_dumpops
+
+MAKEFILE_FMT = """
+xlator_LTLIBRARIES = @XL_NAME@.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/@XL_TYPE@
+@XL_NAME_NO_HYPHEN@_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+@XL_NAME_NO_HYPHEN@_la_SOURCES = @XL_NAME@.c
+@XL_NAME_NO_HYPHEN@_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+noinst_HEADERS = @XL_NAME@.h @XL_NAME@-mem-types.h @XL_NAME@-messages.h
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
+CLEANFILES =
+"""
+
+fop_subs = {}
+cbk_subs = {}
+fn_subs = {}
+
+
+def get_error_arg(type_str):
+ if type_str.find(" *") != -1:
+ return "NULL"
+ return "-1"
+
+
+def get_param(names, types):
+ # Convert two separate tuples to one of (name, type) sub-tuples.
+ as_tuples = list(zip(types, names))
+ # Convert each sub-tuple into a "type name" string.
+ as_strings = [' '.join(item) for item in as_tuples]
+ # Join all of those into one big string.
+ return ',\n\t'.join(as_strings)
+
+
+def generate(tmpl, name, table):
+ w_arg_names = [a[1] for a in table[name] if a[0] == 'fop-arg']
+ w_arg_types = [a[2] for a in table[name] if a[0] == 'fop-arg']
+ u_arg_names = [a[1] for a in table[name] if a[0] == 'cbk-arg']
+ u_arg_types = [a[2] for a in table[name] if a[0] == 'cbk-arg']
+ fn_arg_names = [a[1] for a in table[name] if a[0] == 'fn-arg']
+ fn_arg_types = [a[2] for a in table[name] if a[0] == 'fn-arg']
+ ret_type = [a[1] for a in table[name] if a[0] == 'ret-val']
+ ret_var = [a[2] for a in table[name] if a[0] == 'ret-val']
+
+ sdict = {}
+ #Parameters are (t1, var1), (t2, var2)...
+ #Args are (var1, var2,...)
+ sdict["@WIND_ARGS@"] = ', '.join(w_arg_names)
+ sdict["@UNWIND_ARGS@"] = ', '.join(u_arg_names)
+ sdict["@ERROR_ARGS@"] = ', '.join(list(map(get_error_arg, u_arg_types)))
+ sdict["@WIND_PARAMS@"] = get_param(w_arg_names, w_arg_types)
+ sdict["@UNWIND_PARAMS@"] = get_param(u_arg_names, u_arg_types)
+ sdict["@FUNC_PARAMS@"] = get_param(fn_arg_names, fn_arg_types)
+ sdict["@NAME@"] = name
+ sdict["@FOP_PREFIX@"] = fop_prefix
+ sdict["@RET_TYPE@"] = ''.join(ret_type)
+ sdict["@RET_VAR@"] = ''.join(ret_var)
+
+ for old, new in sdict.items():
+ tmpl = tmpl.replace(old, new)
+ # TBD: reindent/reformat the result for maximum readability.
+ return tmpl
+
+
+def gen_xlator():
+ xl = open(src_dir_path+"/"+xl_name+".c", 'w+')
+
+ print(COPYRIGHT, file=xl)
+ print(fragments["INCLUDE_IN_SRC_FILE"].replace("@XL_NAME@",
+ xl_name), file=xl)
+
+ #Generate cbks and fops
+ for fop in ops:
+ print(generate(fragments["CBK_TEMPLATE"], fop, ops), file=xl)
+ print(generate(fragments["FOP_TEMPLATE"], fop, ops), file=xl)
+
+ for cbk in xlator_cbks:
+ print(generate(fragments["FUNC_TEMPLATE"], cbk,
+ xlator_cbks), file=xl)
+
+ for dops in xlator_dumpops:
+ print(generate(fragments["FUNC_TEMPLATE"], dops,
+ xlator_dumpops), file=xl)
+
+ #Generate fop table
+ print("struct xlator_fops fops = {", file=xl)
+ for fop in ops:
+ print(" .{0:20} = {1}_{2},".format(fop, fop_prefix, fop), file=xl)
+ print("};", file=xl)
+
+ #Generate xlator_cbks table
+ print("struct xlator_cbks cbks = {", file=xl)
+ for cbk in xlator_cbks:
+ print(" .{0:20} = {1}_{2},".format(cbk, fop_prefix, cbk), file=xl)
+ print("};", file=xl)
+
+ #Generate xlator_dumpops table
+ print("struct xlator_dumpops dumpops = {", file=xl)
+ for dops in xlator_dumpops:
+ print(" .{0:20} = {1}_{2},".format(dops, fop_prefix, dops), file=xl)
+ print("};", file=xl)
+
+ xlator_methods = fragments["XLATOR_METHODS"].replace("@XL_NAME@", xl_name)
+ xlator_methods = xlator_methods.replace("@FOP_PREFIX@", fop_prefix)
+ print(xlator_methods, file=xl)
+
+ xl.close()
+
+
+def create_dir_struct():
+ if not os.path.exists(dir_path+"/src"):
+ os.makedirs(dir_path+"/src")
+
+
+def gen_header_files():
+ upname = xl_name_no_hyphen.upper()
+ h = open(src_dir_path+"/"+xl_name+".h", 'w+')
+ print(COPYRIGHT, file=h)
+ txt = fragments["HEADER_FMT"].replace("@HFL_NAME@", upname)
+ txt = txt.replace("@XL_NAME@", xl_name)
+ print(txt, file=h)
+ h.close()
+
+ h = open(src_dir_path+"/"+xl_name+"-mem-types.h", 'w+')
+ print(COPYRIGHT, file=h)
+ txt = fragments["MEM_HEADER_FMT"].replace("@HFL_NAME@", upname+"_MEM_TYPES")
+ txt = txt.replace("@FOP_PREFIX@", fop_prefix)
+ print(txt, file=h)
+ h.close()
+
+ h = open(src_dir_path+"/"+xl_name+"-messages.h", 'w+')
+ print(COPYRIGHT, file=h)
+ txt = fragments["MSG_HEADER_FMT"].replace("@HFL_NAME@", upname+"_MESSAGES")
+ txt = txt.replace("@FOP_PREFIX@", fop_prefix.upper())
+ print(txt, file=h)
+ h.close()
+
+
+def gen_makefiles():
+ m = open(dir_path+"/Makefile.am", 'w+')
+ print("SUBDIRS = src\n\nCLEANFILES =", file=m)
+ m.close()
+
+ m = open(src_dir_path+"/Makefile.am", 'w+')
+ txt = MAKEFILE_FMT.replace("@XL_NAME@", xl_name)
+ txt = txt.replace("@XL_NAME_NO_HYPHEN@", xl_name_no_hyphen)
+ txt = txt.replace("@XL_TYPE@", xlator_type)
+ print(txt, file=m)
+ m.close()
+
+def get_copyright ():
+ return fragments["CP"].replace("@CURRENT_YEAR@",
+ time.strftime("%Y"))
+
+def load_fragments ():
+ pragma_re = re.compile('pragma fragment (.*)')
+ cur_symbol = None
+ cur_value = ""
+ result = {}
+ basepath = os.path.abspath(os.path.dirname(__file__))
+ fragpath = basepath + "/new-xlator.c.tmpl"
+ for line in open(fragpath, "r").readlines():
+ m = pragma_re.search(line)
+ if m:
+ if cur_symbol:
+ result[cur_symbol] = cur_value
+ cur_symbol = m.group(1)
+ cur_value = ""
+ else:
+ cur_value += line
+ if cur_symbol:
+ result[cur_symbol] = cur_value
+ return result
+
+if __name__ == '__main__':
+
+ if len(sys.argv) < 3:
+ print("USAGE: ./gen_xlator <XLATOR_DIR> <XLATOR_NAME> <FOP_PREFIX>")
+ sys.exit(0)
+
+ xl_name = sys.argv[2]
+ xl_name_no_hyphen = xl_name.replace("-", "_")
+ if sys.argv[1].endswith('/'):
+ dir_path = sys.argv[1] + xl_name
+ else:
+ dir_path = sys.argv[1] + "/" + xl_name
+ xlator_type = os.path.basename(sys.argv[1])
+ fop_prefix = sys.argv[3]
+ src_dir_path = dir_path + "/src"
+
+ fragments = load_fragments()
+
+ COPYRIGHT = get_copyright()
+ create_dir_struct()
+ gen_xlator()
+ gen_header_files()
+ gen_makefiles()
diff --git a/extras/create_new_xlator/new-xlator.c.tmpl b/extras/create_new_xlator/new-xlator.c.tmpl
new file mode 100644
index 00000000000..fe9735bfcf1
--- /dev/null
+++ b/extras/create_new_xlator/new-xlator.c.tmpl
@@ -0,0 +1,151 @@
+#pragma fragment CBK_TEMPLATE
+int32_t @FOP_PREFIX@_@NAME@_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, @UNWIND_PARAMS@)
+{
+ STACK_UNWIND_STRICT(@NAME@, frame, op_ret, op_errno, @UNWIND_ARGS@);
+ return 0;
+}
+
+#pragma fragment COMMENT
+If you are generating the leaf xlators, remove the STACK_WIND and replace the
+ @ERROR_ARGS@ to @UNWIND_ARGS@ if necessary
+
+#pragma fragment FOP_TEMPLATE
+ int32_t @FOP_PREFIX@_@NAME@(call_frame_t *frame, xlator_t *this, @WIND_PARAMS@)
+{
+ STACK_WIND(frame, @FOP_PREFIX@_@NAME@_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->@NAME@, @WIND_ARGS@);
+ return 0;
+err:
+ STACK_UNWIND_STRICT(@NAME@, frame, -1, errno, @ERROR_ARGS@);
+ return 0;
+}
+
+#pragma fragment FUNC_TEMPLATE
+@RET_TYPE@ @FOP_PREFIX@_@NAME@(@FUNC_PARAMS@)
+{
+ return @RET_VAR@;
+}
+
+#pragma fragment CP
+/*
+ * Copyright (c) @CURRENT_YEAR@ Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#pragma fragment INCLUDE_IN_SRC_FILE
+#include "@XL_NAME@.h"
+
+#pragma fragment XLATOR_METHODS
+
+static int32_t @FOP_PREFIX@_init(xlator_t *this)
+{
+ return 0;
+}
+
+static void @FOP_PREFIX@_fini(xlator_t *this)
+{
+ return;
+}
+
+static int32_t @FOP_PREFIX@_reconfigure(xlator_t *this, dict_t *dict)
+{
+ return 0;
+}
+
+static int @FOP_PREFIX@_notify(xlator_t *this, int event, void *data, ...)
+{
+ return default_notify(this, event, data);
+}
+
+static int32_t @FOP_PREFIX@_mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init(this, gf_@FOP_PREFIX@_mt_end + 1);
+ return ret;
+}
+
+static int32_t @FOP_PREFIX@_dump_metrics(xlator_t *this, int fd)
+{
+ return 0;
+}
+
+struct volume_options @FOP_PREFIX@_options[] = {
+ /*{ .key = {""},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "",
+ .op_version = {GD_OP_VERSION_},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {""},
+ .description = "",
+ .category = GF_EXPERIMENTAL,
+ },
+ { .key = {NULL} },
+ */
+};
+
+xlator_api_t xlator_api = {
+ .init = @FOP_PREFIX@_init,
+ .fini = @FOP_PREFIX@_fini,
+ .notify = @FOP_PREFIX@_notify,
+ .reconfigure = @FOP_PREFIX@_reconfigure,
+ .mem_acct_init = @FOP_PREFIX@_mem_acct_init,
+ .dump_metrics = @FOP_PREFIX@_dump_metrics,
+ .op_version = {GD_OP_VERSION_},
+ .dumpops = &@FOP_PREFIX@_dumpops,
+ .fops = &@FOP_PREFIX@_fops,
+ .cbks = &@FOP_PREFIX @_cbks,
+ .options = @FOP_PREFIX@_options,
+ .identifier = "@XL_NAME@",
+ .category = GF_EXPERIMENTAL,
+};
+#pragma fragment HEADER_FMT
+#ifndef __ @HFL_NAME@_H__
+#define __ @HFL_NAME@_H__
+
+#include "@XL_NAME@-mem-types.h"
+#include "@XL_NAME@-messages.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#endif /* __@HFL_NAME@_H__ */
+
+#pragma fragment MEM_HEADER_FMT
+#ifndef __ @HFL_NAME@_H__
+#define __ @HFL_NAME@_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_mdc_mem_types_ {
+ gf_@FOP_PREFIX@_mt_ = gf_common_mt_end + 1,
+ gf_@FOP_PREFIX@_mt_end
+};
+
+#endif /* __@HFL_NAME@_H__ */
+
+#pragma fragment MSG_HEADER_FMT
+#ifndef __@HFL_NAME@_H__
+#define __@HFL_NAME@_H__
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(@FOP_PREFIX@, @FOP_PREFIX@_MSG_NO_MEMORY);
+
+#endif /* __@HFL_NAME@_H__ */
diff --git a/extras/devel-tools/devel-vagrant/Vagrantfile b/extras/devel-tools/devel-vagrant/Vagrantfile
new file mode 100644
index 00000000000..78dc29bdc68
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/Vagrantfile
@@ -0,0 +1,165 @@
+# -*- mode: ruby -*-
+# vi: set ft=ruby :
+#
+# Author: Rajesh Joseph (rjoseph@redhat.com)
+# Author: Christopher Blum (cblum@redhat.com)
+#
+
+
+#Variables
+box_name = "gluster-dev-fedora"
+box_url = "http://download.gluster.org/pub/gluster/glusterfs/vagrant/gluster-dev-fedora/boxes/gluster-dev-fedora.json"
+node_count = 0
+disk_count = -1
+node_name = "Node"
+ipbase="192.168.99."
+source_path = "/source/glusterfs"
+target_path = "/mnt/src"
+
+if ARGV[0] == "up"
+ environment = open('vagrant_env.conf', 'w')
+
+ print "\n\e[1;37mEnter Node (or VM) Count? Default: 1 \e[32m"
+ while node_count < 1 or node_count > 99
+ node_count = $stdin.gets.strip.to_i
+ if node_count == 0 # The user pressed enter without input or we cannot parse the input to a number
+ node_count = 1
+ elsif node_count < 1
+ print "\e[31mWe need at least 1 VM ;) Try again \e[32m"
+ elsif node_count > 99
+ print "\e[31mWe don't support more than 99 VMs - Try again \e[32m"
+ end
+ end
+
+ print "\e[1;37mEnter per Node Disc (Brick) Count? Default: 2 \e[32m"
+
+ while disk_count < 1
+ disk_count = $stdin.gets.strip.to_i
+ if disk_count == 0 # The user pressed enter without input or we cannot parse the input to a number
+ disk_count = 2
+ elsif disk_count < 1
+ print "\e[31mWe need at least 1 disk ;) Try again \e[32m"
+ end
+ end
+
+ print "\e[1;37mEnter GlusterFS source location? Default: \"#{source_path}\" : \e[32m"
+ tmploc = $stdin.gets.strip.to_s
+ if tmploc != ""
+ source_path = "#{tmploc}"
+ end
+
+ environment.puts("# BEWARE: Do NOT modify ANY settings in here or your vagrant environment will be messed up")
+ environment.puts(node_count.to_s)
+ environment.puts(disk_count.to_s)
+ environment.puts(source_path)
+
+ print "\e[32m\nOK I will provision #{node_count} VMs for you and each one will have #{disk_count} disks for bricks\e[37m\n\n"
+ system "sleep 1"
+else # So that we destroy and can connect to all VMs...
+ environment = open('vagrant_env.conf', 'r')
+
+ environment.readline # Skip the comment on top
+ node_count = environment.readline.to_i
+ disk_count = environment.readline.to_i
+ source_path = environment.readline.gsub(/\s+/, "")
+
+ if ARGV[0] != "ssh-config"
+ puts "Detected settings from previous vagrant up:"
+ puts " We deployed #{node_count} VMs with each #{disk_count} disks"
+ puts ""
+ end
+end
+
+environment.close
+
+$ansivar = Hash.new{ |hash,key| hash[key] = [] }
+$devnamecreated = false
+
+#
+# Function to create and attach disks
+#
+def attachDisks(numDisk, provider)
+ suffix = "bcdefghijklmn".split("")
+ for i in 1..numDisk.to_i
+ devname = "vd" + (suffix[i-1]).to_s
+ if $devnamecreated == false
+ $ansivar["device"].push "#{devname}"
+ end
+ provider.storage :file,
+ :size => '1G',
+ :device => "vd" + (suffix[i-1]).to_s,
+ :type => "qcow2",
+ :bus => "virtio",
+ :cache => "default"
+ end
+ $devnamecreated = true
+end
+
+
+$ansivar["src_path"].push "#{source_path}"
+$ansivar["trg_path"].push "#{target_path}"
+
+groups = Hash.new{ |hash,key| hash[key] = [] }
+
+groups["origin"].push "#{node_name}1"
+groups["all"].push "#{node_name}1"
+
+(2..node_count).each do |num|
+ $ansivar["peer_nodes"].push "#{node_name}#{num}"
+ groups["all"].push "#{node_name}#{num}"
+end
+
+hostsFile = "\n"
+(1..node_count).each do |num|
+ hostsFile += "#{ipbase}#{( 100 + num).to_s} #{node_name}#{num.to_s}\n"
+end
+
+Vagrant.configure("2") do |config|
+ (1..node_count).each do |num|
+ config.vm.define "#{node_name}#{num}" do |node|
+ ip_addr = "#{ipbase}#{(100 + num).to_s}"
+ node.vm.network "private_network", ip: "#{ip_addr}"
+ node.vm.box = box_name
+ node.vm.box_url = box_url
+ node.vm.hostname = "#{node_name}#{num}"
+ node.ssh.insert_key = false
+ node.vm.synced_folder "#{source_path}", "#{target_path}", type: "nfs"
+
+ # Define basic config for VM, memory, cpu, storage pool
+ node.vm.provider "libvirt" do |virt|
+ virt.storage_pool_name = "default"
+ virt.memory = 1024
+ virt.cpus = 1
+
+ attachDisks( disk_count, virt )
+ end
+
+ node.vm.post_up_message = "\e[37mBuilding of this VM is finished \n"
+ "You can access it now with: \n"
+ "vagrant ssh #{node_name}#{num.to_s}\n\n"
+ "#{target_path} directory in VM #{node_name}#{num.to_s}"
+ "is synced with Host machine. \nSo any changes done in this"
+ "directory will be reflected in the host machine as well\n"
+ "Beware of this when you delete content from this directory\e[32m"
+
+ node.vm.provision :shell, path: "bootstrap.sh"
+
+ node.vm.provision "shell", inline: <<-SHELL
+ echo '#{hostsFile}' | sudo tee -a /etc/hosts
+ SHELL
+
+ if num == node_count
+ # Let's provision
+ node.vm.provision "ansible" do |setup|
+ setup.verbose = "v"
+ setup.playbook = "ansible/setup.yml"
+ setup.limit = "all"
+ setup.sudo = "true"
+ setup.groups = groups
+ setup.extra_vars = $ansivar
+ end
+ end
+
+ end
+ end
+end
diff --git a/extras/devel-tools/devel-vagrant/ansible/roles/cluster/tasks/main.yml b/extras/devel-tools/devel-vagrant/ansible/roles/cluster/tasks/main.yml
new file mode 100644
index 00000000000..3306c7a3dc2
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/ansible/roles/cluster/tasks/main.yml
@@ -0,0 +1,5 @@
+---
+- name: gluster peer probe
+ shell: gluster peer probe {{ item }}
+ with_items: "{{ peer_nodes | default([]) }}"
+
diff --git a/extras/devel-tools/devel-vagrant/ansible/roles/compile-gluster/tasks/main.yml b/extras/devel-tools/devel-vagrant/ansible/roles/compile-gluster/tasks/main.yml
new file mode 100644
index 00000000000..6ee258c7780
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/ansible/roles/compile-gluster/tasks/main.yml
@@ -0,0 +1,29 @@
+---
+- name: autogen.sh
+ shell: chdir={{ item }} ./autogen.sh
+ with_items: "{{ trg_path }}"
+
+- name: configure
+ shell: chdir={{ item }} CFLAGS="-g -O0 -Werror -Wall -Wno-error=cpp -Wno-error=maybe-uninitialized" \
+ ./configure \
+ --prefix=/usr \
+ --exec-prefix=/usr \
+ --bindir=/usr/bin \
+ --sbindir=/usr/sbin \
+ --sysconfdir=/etc \
+ --datadir=/usr/share \
+ --includedir=/usr/include \
+ --libdir=/usr/lib64 \
+ --libexecdir=/usr/libexec \
+ --localstatedir=/var \
+ --sharedstatedir=/var/lib \
+ --mandir=/usr/share/man \
+ --infodir=/usr/share/info \
+ --libdir=/usr/lib64 \
+ --enable-debug
+ with_items: "{{ trg_path }}"
+
+- name: make install
+ shell: chdir={{ item }} make install
+ with_items: "{{ trg_path }}"
+
diff --git a/extras/devel-tools/devel-vagrant/ansible/roles/install-pkgs/tasks/main.yml b/extras/devel-tools/devel-vagrant/ansible/roles/install-pkgs/tasks/main.yml
new file mode 100644
index 00000000000..3944054dd25
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/ansible/roles/install-pkgs/tasks/main.yml
@@ -0,0 +1,72 @@
+---
+- name: install deltarpm
+ dnf: name=deltarpm state=present
+
+- name: update system
+ shell: dnf update -y
+
+- name: install other packages
+ dnf: name={{ item }} state=present
+ with_items:
+ - attr
+ - autoconf
+ - automake
+ - bison
+ - cifs-utils
+ - cscope
+ - ctags
+ - dbench
+ - dos2unix
+ - e2fsprogs
+ - findutils
+ - flex
+ - fuse-devel
+ - fuse-libs
+ - gcc
+ - gdb
+ - git
+ - glib2-devel
+ - hostname
+ - libacl-devel
+ - libaio-devel
+ - libattr-devel
+ - libibverbs-devel
+ - librdmacm-devel
+ - libtool
+ - libxml2-devel
+ - lvm2-devel
+ - make
+ - man-db
+ - mock
+ - net-tools
+ - nfs-utils
+ - openssh-server
+ - openssl-devel
+ - perl-Test-Harness
+ - pkgconfig
+ - procps-ng
+ - psmisc
+ - python-devel
+ - python-eventlet
+ - python-netifaces
+ - python-paste-deploy
+ - python-setuptools
+ - python-simplejson
+ - python-sphinx
+ - python-webob
+ - pyxattr
+ - readline-devel
+ - rpm-build
+ - screen
+ - strace
+ - supervisor
+ - systemtap-sdt-devel
+ - sqlite-devel
+ - samba*
+ - userspace-rcu-devel
+ - vim
+ - wget
+ - which
+ - xfsprogs
+ - yajl-devel
+
diff --git a/extras/devel-tools/devel-vagrant/ansible/roles/iptables/tasks/main.yml b/extras/devel-tools/devel-vagrant/ansible/roles/iptables/tasks/main.yml
new file mode 100644
index 00000000000..768cb0e8668
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/ansible/roles/iptables/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: disable iptables, need to add specific rules later
+ shell: iptables -F
diff --git a/extras/devel-tools/devel-vagrant/ansible/roles/prepare-brick/tasks/main.yml b/extras/devel-tools/devel-vagrant/ansible/roles/prepare-brick/tasks/main.yml
new file mode 100644
index 00000000000..a3a6c463468
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/ansible/roles/prepare-brick/tasks/main.yml
@@ -0,0 +1,30 @@
+---
+- name: Create physical device
+ shell: pvcreate /dev/{{ item }}
+ with_items: "{{ device }}"
+
+- name: Create volume group
+ shell: vgcreate vg{{ item }} /dev/{{ item }}
+ with_items: "{{ device }}"
+
+- name: Create thin pool
+ shell: lvcreate -L 950M -T vg{{ item }}/thinpool
+ with_items: "{{ device }}"
+
+- name: Create thin volume
+ shell: lvcreate -V900M -T vg{{ item }}/thinpool -n thinp1
+ with_items: "{{ device }}"
+
+- name: Format backend
+ filesystem: fstype=xfs dev=/dev/vg{{ item }}/thinp1
+ with_items: "{{ device }}"
+
+- name: Create mount directory
+ file: path=/bricks/br{{ item }} state=directory recurse=yes
+ with_items: "{{ device }}"
+
+- name: Add entry to fstab and mount
+ mount: name=/bricks/br{{ item }} src=/dev/vg{{ item }}/thinp1 fstype=xfs state=mounted
+ with_items: "{{ device }}"
+
+
diff --git a/extras/devel-tools/devel-vagrant/ansible/roles/selinux/tasks/main.yml b/extras/devel-tools/devel-vagrant/ansible/roles/selinux/tasks/main.yml
new file mode 100644
index 00000000000..c9ba9618428
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/ansible/roles/selinux/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: Allow gfapi in Samba to bind to other ports than well known smb ports
+ seboolean: name=samba_load_libgfapi state=yes persistent=yes
diff --git a/extras/devel-tools/devel-vagrant/ansible/roles/service/tasks/main.yml b/extras/devel-tools/devel-vagrant/ansible/roles/service/tasks/main.yml
new file mode 100644
index 00000000000..ef78a125ae8
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/ansible/roles/service/tasks/main.yml
@@ -0,0 +1,21 @@
+---
+- name: disable kernel nfs
+ service: name=nfs-server enabled=no
+
+- name: stop kernel nfs
+ service: name=nfs-server state=stopped
+
+- name: enable rpcbind
+ service: name=rpcbind enabled=yes
+
+- name: start rpcbind
+ service: name=rpcbind state=started
+
+- name: enable glusterd
+ service: name=glusterd enabled=yes
+
+- name: start glusterd
+ service: name=glusterd state=started
+
+
+
diff --git a/extras/devel-tools/devel-vagrant/ansible/setup.yml b/extras/devel-tools/devel-vagrant/ansible/setup.yml
new file mode 100644
index 00000000000..c26bd7d6051
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/ansible/setup.yml
@@ -0,0 +1,23 @@
+---
+- hosts: all
+ become: yes
+ become_method: sudo
+ roles:
+ - install-pkgs
+ - prepare-brick
+ - selinux
+ - iptables
+
+- hosts: all
+ become: yes
+ become_method: sudo
+ serial: 1
+ roles:
+ - compile-gluster
+ - service
+
+- hosts: origin
+ become: yes
+ become_method: sudo
+ roles:
+ - cluster
diff --git a/extras/devel-tools/devel-vagrant/bootstrap.sh b/extras/devel-tools/devel-vagrant/bootstrap.sh
new file mode 100644
index 00000000000..bbf9fa2c063
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/bootstrap.sh
@@ -0,0 +1,3 @@
+dnf install -y nfs-utils
+dnf install -y vim
+dnf install -y python2 python2-dnf libselinux-python libsemanage-python
diff --git a/extras/devel-tools/devel-vagrant/up.sh b/extras/devel-tools/devel-vagrant/up.sh
new file mode 100755
index 00000000000..35cbe79d835
--- /dev/null
+++ b/extras/devel-tools/devel-vagrant/up.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+vagrant up --no-provision $@
+vagrant provision
diff --git a/extras/devel-tools/gdb_macros b/extras/devel-tools/gdb_macros
new file mode 100644
index 00000000000..aae4ccb3b37
--- /dev/null
+++ b/extras/devel-tools/gdb_macros
@@ -0,0 +1,84 @@
+
+#
+# gdb_macros is a gdb script file which can assist
+# developer in debugging. This script provides
+# following functions for debugging gluster processes
+# effectively:
+#
+# pdict : This function will iterate through all the
+# dictionary (dict_t) members and print the
+# key-value pair.
+#
+# plist : This function will print address of each member
+# of gluster list (list_head).
+#
+# gdb script should be loaded in gdb before using these
+# functions. gdb script can be loaded on-demand or on gdb
+# start-up. Use the following command on gdb prompt for
+# loading it on-demand.
+# source <script filename>
+# e.g.
+# (gdb) source /mnt/gdb_macros
+#
+# To automatically load gdb script on startup use .gdbinit
+# file. This file is automatically loaded by gdb on start.
+# Edit (or create) ~/.gdbinit file and add the following
+# entry:
+# source /mnt/gdb_macros
+#
+
+
+
+# This is an internal helper function
+define _print_dict_data
+ set $data = ($arg0)
+ set $len = $data->len - 1
+ set $i = 0
+ set $ishex = 0
+ while ($i < $len)
+ set $ch = $data->data [$i]
+
+ # Print only alpha-numeric values as char
+ # and remaining as hex value. This is done
+ # in this way because using %s screws up
+ # the display if the string has non-displayable
+ # characters
+ if ($ishex == 0) && ($ch > 31) && ($ch < 127)
+ printf "%c", $ch
+ else
+ printf "%x", ($ch & 0xFF)
+ set $ishex = 1
+ end
+ set $i = $i + 1
+ end
+end
+
+
+define pdict
+ set $cnt = 1
+ set $temp = *($arg0->members)
+ while ($temp)
+ printf "[%d](%s::",$cnt, $temp->key
+ _print_dict_data ($temp)->value
+ printf ")\n"
+ set $temp = $temp->next
+ set $cnt = $cnt + 1
+ end
+ printf "Done\n"
+end
+
+
+define plist
+ set $node = &($arg0)
+ set $head = $node
+
+ printf "[0x%lx]", $node
+ set $node = $node->next
+
+ while ($node != $head)
+ printf "--> [0x%lx]", $node
+ set $node = $node->next
+ end
+ printf "\n"
+end
+
diff --git a/extras/devel-tools/print-backtrace.sh b/extras/devel-tools/print-backtrace.sh
new file mode 100755
index 00000000000..33fbae288bc
--- /dev/null
+++ b/extras/devel-tools/print-backtrace.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# sample unresolved backtrace lines picked up from a brick log that should go
+# into a backtrace file eg. bt-file.txt:
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x3ec81)[0x7fe4bc271c81]
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x3eecd)[0x7fe4bc271ecd]
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x404cb)[0x7fe4bc2734cb]
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x3d2b6)[0x7fe4bc2702b6]
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x3d323)[0x7fe4bc270323]
+#
+# following is the output of the script for the above backtrace lines:
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x3ec81)[0x7fe4bc271c81] __afr_selfheal_data_finalize_source inlined at /usr/src/debug/glusterfs-3.8.4/xlators/cluster/afr/src/afr-self-heal-data.c:684 in __afr_selfheal_data_prepare /usr/src/debug/glusterfs-3.8.4/xlators/cluster/afr/src/afr-self-heal-data.c:603
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x3eecd)[0x7fe4bc271ecd] __afr_selfheal_data /usr/src/debug/glusterfs-3.8.4/xlators/cluster/afr/src/afr-self-heal-data.c:740
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x404cb)[0x7fe4bc2734cb] afr_selfheal_data /usr/src/debug/glusterfs-3.8.4/xlators/cluster/afr/src/afr-self-heal-data.c:883
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x3d2b6)[0x7fe4bc2702b6] afr_selfheal_do /usr/src/debug/glusterfs-3.8.4/xlators/cluster/afr/src/afr-self-heal-common.c:1968
+# /usr/lib64/glusterfs/3.8.4/xlator/cluster/replicate.so(+0x3d323)[0x7fe4bc270323] afr_selfheal /usr/src/debug/glusterfs-3.8.4/xlators/cluster/afr/src/afr-self-heal-common.c:2015
+#
+# Usage with debuginfo RPM:
+# print-backtrace.sh $HOME/Downloads/glusterfs-debuginfo-3.8.4-10.el7.x86_64.rpm bt-file.txt
+#
+# Usage with source install:
+# print-packtrace.sh none bt-file.txt
+
+function version_compare() { test $(echo $1|awk -F '.' '{print $1 $2 $3}') -gt $(echo $2|awk -F '.' '{print $1 $2 $3}'); }
+
+function Usage()
+{
+ echo -e "Usage:\n\t$0 { none | <debuginfo-rpm> } <backtrace-file>"
+ echo "none: implies we don't have a debuginfo rpm but want to resolve"
+ echo " against a source install which already has the debuginfo"
+ echo " NOTE: in this case you should have configured the build"
+ echo " with --enable-debug and the linker options should"
+ echo " have the option -rdynamic"
+}
+
+debuginfo_rpm=$1
+backtrace_file=$2
+
+if [ ! $debuginfo_rpm ] || [ ! $backtrace_file ]; then
+ Usage
+ exit 1
+fi
+
+if [ $debuginfo_rpm != "none" ]; then
+ if [ ! -f $debuginfo_rpm ]; then
+ echo "no such rpm file: $debuginfo_rpm"
+ exit 1
+ fi
+fi
+
+if [ ! -f $backtrace_file ]; then
+ echo "no such backtrace file: $backtrace_file"
+ exit 1
+fi
+
+if [ "$debuginfo_rpm" != "none" ]; then
+ if ! file $debuginfo_rpm | grep RPM >/dev/null 2>&1 ; then
+ echo "file does not look like an rpm: $debuginfo_rpm"
+ exit 1
+ fi
+fi
+
+cpio_version=$(cpio --version|grep cpio|cut -f 2 -d ')'|sed -e 's/^[[:space:]]*//')
+rpm_name=""
+debuginfo_path=""
+debuginfo_extension=""
+
+if [ $debuginfo_rpm != "none" ]; then
+ # extract the gluster debuginfo rpm to resolve the symbols against
+ rpm_name=$(basename $debuginfo_rpm '.rpm')
+ if [ -d $rpm_name ]; then
+ echo "directory already exists: $rpm_name"
+ echo "please remove/move it and reattempt"
+ exit 1
+ fi
+ mkdir -p $rpm_name
+ if version_compare $cpio_version "2.11"; then
+ rpm2cpio $debuginfo_rpm | cpio --quiet --extract --make-directories --preserve-modification-time --directory=$rpm_name
+ ret=$?
+ else
+ current_dir="$PWD"
+ cd $rpm_name
+ rpm2cpio $debuginfo_rpm | cpio --quiet --extract --make-directories --preserve-modification-time
+ ret=$?
+ cd $current_dir
+ fi
+ if [ $ret -eq 1 ]; then
+ echo "failed to extract rpm $debuginfo_rpm to $PWD/$rpm_name directory"
+ rm -rf $rpm_name
+ exit 1
+ fi
+ debuginfo_path="$PWD/$rpm_name/usr/lib/debug"
+ debuginfo_extension=".debug"
+else
+ debuginfo_path=""
+ debuginfo_extension=""
+fi
+
+# NOTE: backtrace file should contain only the lines which need to be resolved
+for bt in $(cat $backtrace_file)
+do
+ libname=$(echo $bt | cut -f 1 -d '(')
+ addr=$(echo $bt | cut -f 2 -d '(' | cut -f 1 -d ')')
+ libpath=${debuginfo_path}${libname}${debuginfo_extension}
+ if [ ! -f $libpath ]; then
+ continue
+ fi
+ newbt=( $(eu-addr2line --functions --exe=$libpath $addr) )
+ echo "$bt ${newbt[*]}"
+done
+
+# remove the temporary directory
+if [ -d $rpm_name ]; then
+ rm -rf $rpm_name
+fi
+
diff --git a/extras/devel-tools/strace-brick.sh b/extras/devel-tools/strace-brick.sh
new file mode 100755
index 00000000000..a140729111c
--- /dev/null
+++ b/extras/devel-tools/strace-brick.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+# Usage:
+# nice -n -19 strace-brick.sh glusterfsd 50
+
+brick_process_name=$1
+min_watch_cpu=$2
+if [ ! $brick_process_name ]; then
+ brick_process_name=glusterfsd
+fi
+
+if [ ! $min_watch_cpu ]; then
+ min_watch_cpu=50
+fi
+
+echo "min_watch_cpu: $min_watch_cpu"
+
+break=false
+
+while ! $break;
+do
+ mypids=( $(pgrep $brick_process_name) )
+ echo "mypids: ${mypids[*]}"
+
+ pid_args=$(echo ${mypids[*]} | sed -e 's/ / -p /g;s/^/-p /')
+ echo "pid_args: $pid_args"
+
+ pcpu=( $(ps $pid_args -o pcpu -h ) )
+ echo "pcpu: ${pcpu[*]}"
+
+ wait_longer=false
+
+ for i in $( seq 0 $((${#pcpu[*]} - 1)) )
+ do
+ echo "i: $i"
+ echo "mypids[$i]: ${mypids[$i]}"
+
+ int_pcpu=$(echo ${pcpu[$i]} | cut -f 1 -d '.')
+ echo "int_pcpu: $int_pcpu"
+ if [ ! $int_pcpu ] || [ ! $min_watch_cpu ]; then
+ break=true
+ echo "breaking"
+ fi
+ if [ $int_pcpu -ge $min_watch_cpu ]; then
+ wait_longer=true
+ mydirname="${brick_process_name}-${mypids[$i]}-$(date --utc +'%Y%m%d-%H%M%S.%N')"
+ $(mkdir $mydirname && cd $mydirname && timeout --kill-after=5 --signal=KILL 60 nice -n -19 strace -p ${mypids[$i]} -ff -tt -T -o $brick_process_name) &
+ fi
+ done
+
+ if $wait_longer; then
+ sleep 90
+ else
+ sleep 15
+ fi
+done
diff --git a/extras/distributed-testing/README b/extras/distributed-testing/README
new file mode 100644
index 00000000000..928d943f211
--- /dev/null
+++ b/extras/distributed-testing/README
@@ -0,0 +1,28 @@
+PROBLEM
+
+The testing methodology of Gluster is extremely slow. It takes a very long time (6+ hrs) to run the basic tests on a single machine. It takes about 20+ hours to run code analysis version of tests like valgrind, asan, tsan etc.
+
+SOLUTION
+
+The fundamental problem is that the tests cannot be parallelized on a single machine. The natural solution is to run these tests on a cluster of machines. In a nutshell, apply map-reduce to run unit tests.
+
+WORK @ Facebook
+
+At Facebook we have applied the map-reduce approach to testing and have observed 10X improvements.
+
+The solution supports the following
+
+Distribute tests across machines, collect results/logs
+Share worker pool across different testers
+Try failure 3 times on 3 different machines before calling it a failure
+Support running asan, valgrind, asan-noleaks
+Self management of worker pools. The clients will manage the worker pool including version update, no manual maintenance required
+WORK
+
+Port the code from gluster-fb-3.8 to gluster master
+
+HOW TO RUN
+
+./extras/distributed-testing/distributed-test.sh --hosts '<h1> <h2> <h3>'
+
+All hosts should have no password for ssh via root. This can be achieved with keys setup on the client and the server machines.
diff --git a/extras/distributed-testing/distributed-test-build-env b/extras/distributed-testing/distributed-test-build-env
new file mode 100644
index 00000000000..cd68ff717da
--- /dev/null
+++ b/extras/distributed-testing/distributed-test-build-env
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+GF_CONF_OPTS="--localstatedir=/var --sysconfdir /var/lib --prefix /usr --libdir /usr/lib64 \
+ --enable-bd-xlator=yes --enable-debug --enable-gnfs"
+
+if [ -x /usr/lib/rpm/redhat/dist.sh ]; then
+ REDHAT_MAJOR=$(/usr/lib/rpm/redhat/dist.sh --distnum)
+else
+ REDHAT_MAJOR=0
+fi
+
+ASAN_ENABLED=${ASAN_ENABLED:=0}
+if [ "$ASAN_ENABLED" -eq "1" ]; then
+ GF_CONF_OPTS="$GF_CONF_OPTS --with-asan"
+fi
+
+GF_CONF_OPTS="$GF_CONF_OPTS --with-systemd"
+export GF_CONF_OPTS
+
+export CFLAGS="-O0 -ggdb -fPIC -Wall"
diff --git a/extras/distributed-testing/distributed-test-build.sh b/extras/distributed-testing/distributed-test-build.sh
new file mode 100755
index 00000000000..e8910d8425c
--- /dev/null
+++ b/extras/distributed-testing/distributed-test-build.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+set -e
+
+EXTRA_CONFIGURE_ARGS="$@"
+ASAN_REQUESTED=false
+for arg in $EXTRA_CONFIGURE_ARGS; do
+ if [ $arg == "--with-asan" ]; then
+ echo "Requested ASAN, cleaning build first."
+ make -j distclean || true
+ touch .with_asan
+ ASAN_REQUESTED=true
+ fi
+done
+
+if [ $ASAN_REQUESTED == false ]; then
+ if [ -f .with_asan ]; then
+ echo "Previous build was with ASAN, cleaning build first."
+ make -j distclean || true
+ rm -v .with_asan
+ fi
+fi
+
+source extras/distributed-testing/distributed-test-build-env
+./autogen.sh
+./configure $GF_CONF_OPTS $EXTRA_CONFIGURE_ARGS
+make -j
diff --git a/extras/distributed-testing/distributed-test-env b/extras/distributed-testing/distributed-test-env
new file mode 100644
index 00000000000..36fdd82e5dd
--- /dev/null
+++ b/extras/distributed-testing/distributed-test-env
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+SMOKE_TESTS="\
+ tests/basic/*.t\
+ tests/basic/afr/*.t\
+ tests/basic/distribute/*.t\
+ tests/bugs/fb*.t\
+ tests/features/brick-min-free-space.t\
+"
+
+KNOWN_FLAKY_TESTS="\
+"
+
+BROKEN_TESTS="\
+ tests/features/lock_revocation.t\
+ tests/features/recon.t\
+ tests/features/fdl-overflow.t\
+ tests/features/fdl.t\
+ tests/features/ipc.t\
+ tests/bugs/distribute/bug-1247563.t\
+ tests/bugs/distribute/bug-1543279.t\
+ tests/bugs/distribute/bug-1066798.t\
+ tests/bugs/ec/bug-1304988.t\
+ tests/bugs/unclassified/bug-1357397.t\
+ tests/bugs/quota/bug-1235182.t\
+ tests/bugs/fuse/bug-1309462.t\
+ tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t\
+ tests/bugs/stripe/bug-1002207.t\
+ tests/bugs/stripe/bug-1111454.t\
+ tests/bugs/snapshot/bug-1140162-file-snapshot-features-encrypt-opts-validation.t\
+ tests/bugs/write-behind/bug-1279730.t\
+ tests/bugs/gfapi/bug-1093594.t\
+ tests/bugs/replicate/bug-1473026.t\
+ tests/bugs/replicate/bug-802417.t\
+ tests/basic/inode-leak.t\
+ tests/basic/distribute/force-migration.t\
+ tests/basic/ec/heal-info.t\
+ tests/basic/ec/ec-seek.t\
+ tests/basic/jbr/jbr-volgen.t\
+ tests/basic/jbr/jbr.t\
+ tests/basic/afr/tarissue.t\
+ tests/basic/tier/tierd_check.t\
+ tests/basic/gfapi/bug1291259.t\
+"
+
+SMOKE_TESTS=$(echo $SMOKE_TESTS | tr -s ' ' ' ')
+KNOWN_FLAKY_TESTS=$(echo $KNOWN_FLAKY_TESTS | tr -s ' ' ' ')
+BROKEN_TESTS=$(echo $BROKEN_TESTS | tr -s ' ' ' ')
diff --git a/extras/distributed-testing/distributed-test-runner.py b/extras/distributed-testing/distributed-test-runner.py
new file mode 100755
index 00000000000..5a07e2feab1
--- /dev/null
+++ b/extras/distributed-testing/distributed-test-runner.py
@@ -0,0 +1,859 @@
+#!/usr/bin/python3
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import unicode_literals
+from __future__ import print_function
+import re
+import sys
+import fcntl
+import base64
+import threading
+import socket
+import os
+import shlex
+import argparse
+import subprocess
+import time
+import SimpleXMLRPCServer
+import xmlrpclib
+import md5
+import httplib
+import uuid
+
+DEFAULT_PORT = 9999
+TEST_TIMEOUT_S = 15 * 60
+CLIENT_CONNECT_TIMEOUT_S = 10
+CLIENT_TIMEOUT_S = 60
+PATCH_FILE_UID = str(uuid.uuid4())
+SSH_TIMEOUT_S = 10
+MAX_ATTEMPTS = 3
+ADDRESS_FAMILY = 'IPv4'
+
+
+def socket_instance(address_family):
+ if address_family.upper() == 'ipv4'.upper():
+ return socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ elif address_family.upper() == 'ipv6'.upper():
+ return socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
+ else:
+ Log.error("Invalid IP address family")
+ sys.exit(1)
+
+
+def patch_file():
+ return "/tmp/%s-patch.tar.gz" % PATCH_FILE_UID
+
+# ..............................................................................
+# SimpleXMLRPCServer IPvX Wrapper
+# ..............................................................................
+
+
+class GeneralXMLRPCServer(SimpleXMLRPCServer.SimpleXMLRPCServer):
+ def __init__(self, addr):
+ SimpleXMLRPCServer.SimpleXMLRPCServer.__init__(self, addr)
+
+ def server_bind(self):
+ if self.socket:
+ self.socket.close()
+ self.socket = socket_instance(args.address_family)
+ self.socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ SimpleXMLRPCServer.SimpleXMLRPCServer.server_bind(self)
+
+
+class HTTPConnection(httplib.HTTPConnection):
+ def __init__(self, host):
+ self.host = host
+ httplib.HTTPConnection.__init__(self, host)
+
+ def connect(self):
+ old_timeout = socket.getdefaulttimeout()
+ self.sock = socket.create_connection((self.host, self.port),
+ timeout=CLIENT_CONNECT_TIMEOUT_S)
+ self.sock.settimeout(old_timeout)
+
+
+class IPTransport(xmlrpclib.Transport):
+ def __init__(self, *args, **kwargs):
+ xmlrpclib.Transport.__init__(self, *args, **kwargs)
+
+ def make_connection(self, host):
+ return HTTPConnection(host)
+
+
+# ..............................................................................
+# Common
+# ..............................................................................
+
+
+class Timer:
+ def __init__(self):
+ self.start = time.time()
+
+ def elapsed_s(self):
+ return int(time.time() - self.start)
+
+ def reset(self):
+ ret = self.elapsed_s()
+ self.start = time.time()
+ return ret
+
+
+def encode(buf):
+ return base64.b16encode(buf)
+
+
+def decode(buf):
+ return base64.b16decode(buf)
+
+
+def get_file_content(path):
+ with open(path, "r") as f:
+ return f.read()
+
+
+def write_to_file(path, data):
+ with open(path, "w") as f:
+ f.write(data)
+
+
+def failsafe(fn, args=()):
+ try:
+ return (True, fn(*args))
+ except (xmlrpclib.Fault, xmlrpclib.ProtocolError, xmlrpclib.ResponseError,
+ Exception) as err:
+ Log.debug(str(err))
+ return (False, None)
+
+
+class LogLevel:
+ DEBUG = 2
+ ERROR = 1
+ CLI = 0
+
+
+class Log:
+ LOGLEVEL = LogLevel.ERROR
+
+ @staticmethod
+ def _normalize(msg):
+ return msg[:100]
+
+ @staticmethod
+ def debug(msg):
+ if Log.LOGLEVEL >= LogLevel.DEBUG:
+ sys.stdout.write("<debug> %s\n" % Log._normalize(msg))
+ sys.stdout.flush()
+
+ @staticmethod
+ def error(msg):
+ sys.stderr.write("<error> %s\n" % Log._normalize(msg))
+
+ @staticmethod
+ def header(msg):
+ sys.stderr.write("* %s *\n" % Log._normalize(msg))
+
+ @staticmethod
+ def cli(msg):
+ sys.stderr.write("%s\n" % msg)
+
+
+class Shell:
+ def __init__(self, cwd=None, logpath=None):
+ self.cwd = cwd
+ self.shell = True
+ self.redirect = open(os.devnull if not logpath else logpath, "wr+")
+
+ def __del__(self):
+ self.redirect.close()
+
+ def cd(self, cwd):
+ Log.debug("cd %s" % cwd)
+ self.cwd = cwd
+
+ def truncate(self):
+ self.redirect.truncate(0)
+
+ def read_logs(self):
+ self.redirect.seek(0)
+ return self.redirect.read()
+
+ def check_call(self, cmd):
+ status = self.call(cmd)
+ if status:
+ raise Exception("Error running command %s. status=%s"
+ % (cmd, status))
+
+ def call(self, cmd):
+ if isinstance(cmd, list):
+ return self._calls(cmd)
+
+ return self._call(cmd)
+
+ def ssh(self, hostname, cmd, id_rsa=None):
+ flags = "" if not id_rsa else "-i " + id_rsa
+ return self.call("timeout %s ssh %s root@%s \"%s\"" %
+ (SSH_TIMEOUT_S, flags, hostname, cmd))
+
+ def scp(self, hostname, src, dest, id_rsa=None):
+ flags = "" if not id_rsa else "-i " + id_rsa
+ return self.call("timeout %s scp %s %s root@%s:%s" %
+ (SSH_TIMEOUT_S, flags, src, hostname, dest))
+
+ def output(self, cmd, cwd=None):
+ Log.debug("%s> %s" % (cwd, cmd))
+ return subprocess.check_output(shlex.split(cmd), cwd=self.cwd)
+
+ def _calls(self, cmds):
+ Log.debug("Running commands. %s" % cmds)
+ for c in cmds:
+ status = self.call(c)
+ if status:
+ Log.error("Commands failed with %s" % status)
+ return status
+ return 0
+
+ def _call(self, cmd):
+ if not self.shell:
+ cmd = shlex.split(cmd)
+
+ Log.debug("%s> %s" % (self.cwd, cmd))
+
+ status = subprocess.call(cmd, cwd=self.cwd, shell=self.shell,
+ stdout=self.redirect, stderr=self.redirect)
+
+ Log.debug("return %s" % status)
+ return status
+
+
+# ..............................................................................
+# Server role
+# ..............................................................................
+
+class TestServer:
+ def __init__(self, port, scratchdir):
+ self.port = port
+ self.scratchdir = scratchdir
+ self.shell = Shell()
+ self.rpc = None
+ self.pidf = None
+
+ self.shell.check_call("mkdir -p %s" % self.scratchdir)
+ self._process_lock()
+
+ def __del__(self):
+ if self.pidf:
+ self.pidf.close()
+
+ def init(self):
+ Log.debug("Starting xmlrpc server on port %s" % self.port)
+ self.rpc = GeneralXMLRPCServer(("", self.port))
+ self.rpc.register_instance(Handlers(self.scratchdir))
+
+ def serve(self):
+ (status, _) = failsafe(self.rpc.serve_forever)
+ Log.cli("== End ==")
+
+ def _process_lock(self):
+ pid_filename = os.path.basename(__file__).replace("/", "-")
+ pid_filepath = "%s/%s.pid" % (self.scratchdir, pid_filename)
+ self.pidf = open(pid_filepath, "w")
+ try:
+ fcntl.lockf(self.pidf, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ # We have the lock, kick anybody listening on this port
+ self.shell.call("kill $(lsof -t -i:%s)" % self.port)
+ except IOError:
+ Log.error("Another process instance is running")
+ sys.exit(0)
+
+#
+# Server Handler
+#
+
+
+handler_lock = threading.Lock()
+handler_serving_since = Timer()
+
+
+def synchronized(func):
+ def decorator(*args, **kws):
+ handler_lock.acquire()
+ h = args[0]
+ try:
+ h.shell.truncate()
+ ret = func(*args, **kws)
+ return ret
+ except Exception() as err:
+ Log.error(str(err))
+ Log.error(decode(h._log_content()))
+ raise
+ finally:
+ handler_lock.release()
+ handler_serving_since.reset()
+
+ return decorator
+
+
+class Handlers:
+ def __init__(self, scratchdir):
+ self.client_id = None
+ self.scratchdir = scratchdir
+ self.gluster_root = "%s/glusterfs" % self.scratchdir
+ self.shell = Shell(logpath="%s/test-handlers.log" % self.scratchdir)
+
+ def hello(self, id):
+ if not handler_lock.acquire(False):
+ return False
+ try:
+ return self._hello_locked(id)
+ finally:
+ handler_lock.release()
+
+ def _hello_locked(self, id):
+ if handler_serving_since.elapsed_s() > CLIENT_TIMEOUT_S:
+ Log.debug("Disconnected client %s" % self.client_id)
+ self.client_id = None
+
+ if not self.client_id:
+ self.client_id = id
+ handler_serving_since.reset()
+ return True
+
+ return (id == self.client_id)
+
+ @synchronized
+ def ping(self, id=None):
+ if id:
+ return id == self.client_id
+ return True
+
+ @synchronized
+ def bye(self, id):
+ assert id == self.client_id
+ self.client_id = None
+ handler_serving_since.reset()
+ return True
+
+ @synchronized
+ def cleanup(self, id):
+ assert id == self.client_id
+ self.shell.cd(self.gluster_root)
+ self.shell.check_call("PATH=.:$PATH; sudo ./clean_gfs_devserver.sh")
+ return True
+
+ @synchronized
+ def copy(self, id, name, content):
+ with open("%s/%s" % (self.scratchdir, name), "w+") as f:
+ f.write(decode(content))
+ return True
+
+ @synchronized
+ def copygzip(self, id, content):
+ assert id == self.client_id
+ gzipfile = "%s/tmp.tar.gz" % self.scratchdir
+ tarfile = "%s/tmp.tar" % self.scratchdir
+ self.shell.check_call("rm -f %s" % gzipfile)
+ self.shell.check_call("rm -f %s" % tarfile)
+ write_to_file(gzipfile, decode(content))
+
+ self.shell.cd(self.scratchdir)
+ self.shell.check_call("rm -r -f %s" % self.gluster_root)
+ self.shell.check_call("mkdir -p %s" % self.gluster_root)
+
+ self.shell.cd(self.gluster_root)
+ cmds = [
+ "gunzip -f -q %s" % gzipfile,
+ "tar -xvf %s" % tarfile
+ ]
+ return self.shell.call(cmds) == 0
+
+ @synchronized
+ def build(self, id, asan=False):
+ assert id == self.client_id
+ self.shell.cd(self.gluster_root)
+ self.shell.call("make clean")
+ env = "ASAN_ENABLED=1" if asan else ""
+ return self.shell.call(
+ "%s ./extras/distributed-testing/distributed-test-build.sh" % env) == 0
+
+ @synchronized
+ def install(self, id):
+ assert id == self.client_id
+ self.shell.cd(self.gluster_root)
+ return self.shell.call("make install") == 0
+
+ @synchronized
+ def prove(self, id, test, timeout, valgrind="no", asan_noleaks=True):
+ assert id == self.client_id
+ self.shell.cd(self.gluster_root)
+ env = "DEBUG=1 "
+ if valgrind == "memcheck" or valgrind == "yes":
+ cmd = "valgrind"
+ cmd += " --tool=memcheck --leak-check=full --track-origins=yes"
+ cmd += " --show-leak-kinds=all -v prove -v"
+ elif valgrind == "drd":
+ cmd = "valgrind"
+ cmd += " --tool=drd -v prove -v"
+ elif asan_noleaks:
+ cmd = "prove -v"
+ env += "ASAN_OPTIONS=detect_leaks=0 "
+ else:
+ cmd = "prove -v"
+
+ status = self.shell.call(
+ "%s timeout %s %s %s" % (env, timeout, cmd, test))
+
+ if status != 0:
+ return (False, self._log_content())
+ return (True, "")
+
+ def _log_content(self):
+ return encode(self.shell.read_logs())
+
+# ..............................................................................
+# Cli role
+# ..............................................................................
+
+
+class RPCConnection((threading.Thread)):
+ def __init__(self, host, port, path, cb):
+ threading.Thread.__init__(self)
+ self.host = host
+ self.port = port
+ self.path = path
+ self.shell = Shell()
+ self.cb = cb
+ self.stop = False
+ self.proxy = None
+ self.logid = "%s:%s" % (self.host, self.port)
+
+ def connect(self):
+ (status, ret) = failsafe(self._connect)
+ return (status and ret)
+
+ def _connect(self):
+ url = "http://%s:%s" % (self.host, self.port)
+ self.proxy = xmlrpclib.ServerProxy(url, transport=IPTransport())
+ return self.proxy.hello(self.cb.id)
+
+ def disconnect(self):
+ self.stop = True
+
+ def ping(self):
+ return self.proxy.ping()
+
+ def init(self):
+ return self._copy() and self._compile_and_install()
+
+ def run(self):
+ (status, ret) = failsafe(self.init)
+ if not status:
+ self.cb.note_lost_connection(self)
+ return
+ elif not ret:
+ self.cb.note_setup_failed(self)
+ return
+
+ while not self.stop:
+ (status, ret) = failsafe(self._run)
+ if not status or not ret:
+ self.cb.note_lost_connection(self)
+ break
+ time.sleep(0)
+
+ failsafe(self.proxy.bye, (self.cb.id,))
+ Log.debug("%s connection thread stopped" % self.host)
+
+ def _run(self):
+ test = self.cb.next_test()
+ (status, _) = failsafe(self._execute_next, (test,))
+ if not status:
+ self.cb.note_retry(test)
+ return False
+ return True
+
+ def _execute_next(self, test):
+ if not test:
+ time.sleep(1)
+ return
+
+ (status, error) = self.proxy.prove(self.cb.id, test,
+ self.cb.test_timeout,
+ self.cb.valgrind,
+ self.cb.asan_noleaks)
+ if status:
+ self.cb.note_done(test)
+ else:
+ self.cb.note_error(test, error)
+
+ def _compile_and_install(self):
+ Log.debug("<%s> Build " % self.logid)
+ asan = self.cb.asan or self.cb.asan_noleaks
+ return (self.proxy.build(self.cb.id, asan) and
+ self.proxy.install(self.cb.id))
+
+ def _copy(self):
+ return self._copy_gzip()
+
+ def _copy_gzip(self):
+ Log.cli("<%s> copying and compiling %s to remote" %
+ (self.logid, self.path))
+ data = encode(get_file_content(patch_file()))
+ Log.debug("GZIP size = %s B" % len(data))
+ return self.proxy.copygzip(self.cb.id, data)
+
+
+class RPCConnectionPool:
+ def __init__(self, gluster_path, hosts, n, id_rsa):
+ self.gluster_path = gluster_path
+ self.hosts = hosts
+ self.conns = []
+ self.faulty = []
+ self.n = int(len(hosts) / 2) + 1 if not n else n
+ self.id_rsa = id_rsa
+ self.stop = False
+ self.scanner = threading.Thread(target=self._scan_hosts_loop)
+ self.kicker = threading.Thread(target=self._kick_hosts_loop)
+ self.shell = Shell()
+ self.since_start = Timer()
+
+ self.shell.check_call("rm -f %s" % patch_file())
+ self.shell.check_call("tar -zcvf %s ." % patch_file())
+ self.id = md5.new(get_file_content(patch_file())).hexdigest()
+ Log.cli("client UID %s" % self.id)
+ Log.cli("patch UID %s" % PATCH_FILE_UID)
+
+ def __del__(self):
+ self.shell.check_call("rm -f %s" % patch_file())
+
+ def pool_status(self):
+ elapsed_m = int(self.since_start.elapsed_s() / 60)
+ return "%s/%s connected, %smin elapsed" % (len(self.conns), self.n,
+ elapsed_m)
+
+ def connect(self):
+ Log.debug("Starting scanner")
+ self.scanner.start()
+ self.kicker.start()
+
+ def disconnect(self):
+ self.stop = True
+ for conn in self.conns:
+ conn.disconnect()
+
+ def note_lost_connection(self, conn):
+ Log.cli("lost connection to %s" % conn.host)
+ self.conns.remove(conn)
+ self.hosts.append((conn.host, conn.port))
+
+ def note_setup_failed(self, conn):
+ Log.error("Setup failed on %s:%s" % (conn.host, conn.port))
+ self.conns.remove(conn)
+ self.faulty.append((conn.host, conn.port))
+
+ def _scan_hosts_loop(self):
+ Log.debug("Scanner thread started")
+ while not self.stop:
+ failsafe(self._scan_hosts)
+ time.sleep(5)
+
+ def _scan_hosts(self):
+ if len(self.hosts) == 0 and len(self.conns) == 0:
+ Log.error("no more hosts available to loadbalance")
+ sys.exit(1)
+
+ for (host, port) in self.hosts:
+ if (len(self.conns) >= self.n) or self.stop:
+ break
+ self._scan_host(host, port)
+
+ def _scan_host(self, host, port):
+ Log.debug("scanning %s:%s" % (host, port))
+ c = RPCConnection(host, port, self.gluster_path, self)
+ (status, result) = failsafe(c.connect)
+ if status and result:
+ self.hosts.remove((host, port))
+ Log.debug("Connected to %s:%s" % (host, port))
+ self.conns.append(c)
+ c.start()
+ Log.debug("%s / %s connected" % (len(self.conns), self.n))
+ else:
+ Log.debug("Failed to connect to %s:%s" % (host, port))
+
+ def _kick_hosts_loop(self):
+ Log.debug("Kick thread started")
+ while not self.stop:
+ time.sleep(10)
+ failsafe(self._kick_hosts)
+
+ Log.debug("Kick thread stopped")
+
+ def _is_pingable(self, host, port):
+ c = RPCConnection(host, port, self.gluster_path, self)
+ failsafe(c.connect)
+ (status, result) = failsafe(c.ping)
+ return status and result
+
+ def _kick_hosts(self):
+ # Do not kick hosts if we have the optimal number of connections
+ if (len(self.conns) >= self.n) or self.stop:
+ Log.debug("Skip kicking hosts")
+ return
+
+ # Check and if dead kick all hosts
+ for (host, port) in self.hosts:
+ if self.stop:
+ Log.debug("Break kicking hosts")
+ break
+
+ if self._is_pingable(host, port):
+ Log.debug("Host=%s is alive. Won't kick" % host)
+ continue
+
+ Log.debug("Kicking %s" % host)
+ mypath = sys.argv[0]
+ myname = os.path.basename(mypath)
+ destpath = "/tmp/%s" % myname
+ sh = Shell()
+ sh.scp(host, mypath, destpath, self.id_rsa)
+ sh.ssh(host, "nohup %s --server &>> %s.log &" %
+ (destpath, destpath), self.id_rsa)
+
+ def join(self):
+ self.scanner.join()
+ self.kicker.join()
+ for c in self.conns:
+ c.join()
+
+
+# ..............................................................................
+# test role
+# ..............................................................................
+
+class TestRunner(RPCConnectionPool):
+ def __init__(self, gluster_path, hosts, n, tests, flaky_tests, valgrind,
+ asan, asan_noleaks, id_rsa, test_timeout):
+ RPCConnectionPool.__init__(self, gluster_path, self._parse_hosts(hosts),
+ n, id_rsa)
+ self.flaky_tests = flaky_tests.split(" ")
+ self.pending = []
+ self.done = []
+ self.error = []
+ self.retry = {}
+ self.error_logs = []
+ self.stats_timer = Timer()
+ self.valgrind = valgrind
+ self.asan = asan
+ self.asan_noleaks = asan_noleaks
+ self.test_timeout = test_timeout
+
+ self.tests = self._get_tests(tests)
+
+ Log.debug("tests: %s" % self.tests)
+
+ def _get_tests(self, tests):
+ if not tests or tests == "all":
+ return self._not_flaky(self._all())
+ elif tests == "flaky":
+ return self.flaky_tests
+ else:
+ return self._not_flaky(tests.strip().split(" "))
+
+ def run(self):
+ self.connect()
+ self.join()
+ return len(self.error)
+
+ def _pretty_print(self, data):
+ if isinstance(data, list):
+ str = ""
+ for i in data:
+ str = "%s %s" % (str, i)
+ return str
+ return "%s" % data
+
+ def print_result(self):
+ Log.cli("== RESULTS ==")
+ Log.cli("SUCCESS : %s" % len(self.done))
+ Log.cli("ERRORS : %s" % len(self.error))
+ Log.cli("== ERRORS ==")
+ Log.cli(self._pretty_print(self.error))
+ Log.cli("== LOGS ==")
+ Log.cli(self._pretty_print(self.error_logs))
+ Log.cli("== END ==")
+
+ def next_test(self):
+ if len(self.tests):
+ test = self.tests.pop()
+ self.pending.append(test)
+ return test
+
+ if not len(self.pending):
+ self.disconnect()
+
+ return None
+
+ def _pct_completed(self):
+ total = len(self.tests) + len(self.pending) + len(self.done)
+ total += len(self.error)
+ completed = len(self.done) + len(self.error)
+ return 0 if not total else int(completed / total * 100)
+
+ def note_done(self, test):
+ Log.cli("%s PASS (%s%% done) (%s)" % (test, self._pct_completed(),
+ self.pool_status()))
+ self.pending.remove(test)
+ self.done.append(test)
+ if test in self.retry:
+ del self.retry[test]
+
+ def note_error(self, test, errstr):
+ Log.cli("%s FAIL" % test)
+ self.pending.remove(test)
+ if test not in self.retry:
+ self.retry[test] = 1
+
+ if errstr:
+ path = "%s/%s-%s.log" % ("/tmp", test.replace("/", "-"),
+ self.retry[test])
+ failsafe(write_to_file, (path, decode(errstr),))
+ self.error_logs.append(path)
+
+ if self.retry[test] < MAX_ATTEMPTS:
+ self.retry[test] += 1
+ Log.debug("retry test %s attempt %s" % (test, self.retry[test]))
+ self.tests.append(test)
+ else:
+ Log.debug("giveup attempt test %s" % test)
+ del self.retry[test]
+ self.error.append(test)
+
+ def note_retry(self, test):
+ Log.cli("retry %s on another host" % test)
+ self.pending.remove(test)
+ self.tests.append(test)
+
+ #
+ # test classifications
+ #
+ def _all(self):
+ return self._list_tests(["tests"], recursive=True)
+
+ def _not_flaky(self, tests):
+ for t in self.flaky_tests:
+ if t in tests:
+ tests.remove(t)
+ return tests
+
+ def _list_tests(self, prefixes, recursive=False, ignore_ifnotexist=False):
+ tests = []
+ for prefix in prefixes:
+ real_path = "%s/%s" % (self.gluster_path, prefix)
+ if not os.path.exists(real_path) and ignore_ifnotexist:
+ continue
+ for f in os.listdir(real_path):
+ if os.path.isdir(real_path + "/" + f):
+ if recursive:
+ tests += self._list_tests([prefix + "/" + f], recursive)
+ else:
+ if re.match(r".*\.t$", f):
+ tests += [prefix + "/" + f]
+ return tests
+
+ def _parse_hosts(self, hosts):
+ ret = []
+ for h in args.hosts.split(" "):
+ ret.append((h, DEFAULT_PORT))
+ Log.debug(ret)
+ return ret
+
+# ..............................................................................
+# Roles entry point
+# ..............................................................................
+
+
+def run_as_server(args):
+ if not args.server_path:
+ Log.error("please provide server path")
+ return 1
+
+ server = TestServer(args.port, args.server_path)
+ server.init()
+ server.serve()
+ return 0
+
+
+def run_as_tester(args):
+ Log.header("GLUSTER TEST CLI")
+
+ Log.debug("args = %s" % args)
+
+ tests = TestRunner(args.gluster_path, args.hosts, args.n, args.tests,
+ args.flaky_tests, valgrind=args.valgrind,
+ asan=args.asan, asan_noleaks=args.asan_noleaks,
+ id_rsa=args.id_rsa, test_timeout=args.test_timeout)
+ result = tests.run()
+ tests.print_result()
+ return result
+
+# ..............................................................................
+# main
+# ..............................................................................
+
+
+def main(args):
+ if args.v:
+ Log.LOGLEVEL = LogLevel.DEBUG
+
+ if args.server and args.tester:
+ Log.error("Invalid arguments. More than one role specified")
+ sys.exit(1)
+
+ if args.server:
+ sys.exit(run_as_server(args))
+ elif args.tester:
+ sys.exit(run_as_tester(args))
+ else:
+ Log.error("please specify a mode for CI")
+ parser.print_help()
+ sys.exit(1)
+
+
+parser = argparse.ArgumentParser(description="Gluster CI")
+
+# server role
+parser.add_argument("--server", help="start server", action="store_true")
+parser.add_argument("--server_path", help="server scratch space",
+ default="/tmp/gluster-test")
+parser.add_argument("--host", help="server address to listen", default="")
+parser.add_argument("--port", help="server port to listen",
+ type=int, default=DEFAULT_PORT)
+# test role
+parser.add_argument("--tester", help="start tester", action="store_true")
+parser.add_argument("--valgrind[=memcheck,drd]",
+ help="run tests with valgrind tool 'memcheck' or 'drd'",
+ default="no")
+parser.add_argument("--asan", help="test with asan enabled",
+ action="store_true")
+parser.add_argument("--asan-noleaks", help="test with asan but no mem leaks",
+ action="store_true")
+parser.add_argument("--tests", help="all/flaky/list of tests", default=None)
+parser.add_argument("--flaky_tests", help="list of flaky tests", default=None)
+parser.add_argument("--n", help="max number of machines to use", type=int,
+ default=0)
+parser.add_argument("--hosts", help="list of worker machines")
+parser.add_argument("--gluster_path", help="gluster path to test",
+ default=os.getcwd())
+parser.add_argument("--id-rsa", help="private key to use for ssh",
+ default=None)
+parser.add_argument("--test-timeout",
+ help="test timeout in sec (default 15min)",
+ default=TEST_TIMEOUT_S)
+# general
+parser.add_argument("-v", help="verbose", action="store_true")
+parser.add_argument("--address_family", help="IPv6 or IPv4 to use",
+ default=ADDRESS_FAMILY)
+
+args = parser.parse_args()
+
+main(args)
diff --git a/extras/distributed-testing/distributed-test.sh b/extras/distributed-testing/distributed-test.sh
new file mode 100755
index 00000000000..8f1e0310f33
--- /dev/null
+++ b/extras/distributed-testing/distributed-test.sh
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+source ./extras/distributed-testing/distributed-test-env
+
+N=0
+TESTS='all'
+FLAKY=$KNOWN_FLAKY_TESTS
+BROKEN=$BROKEN_TESTS
+TEST_TIMEOUT_S=900
+ADDRESS_FAMILY='IPv4'
+
+FLAGS=""
+
+function print_env {
+ echo "Settings:"
+ echo "N=$N"
+ echo -e "-------\nHOSTS\n$HOSTS\n-------"
+ echo -e "TESTS\n$TESTS\n-------"
+ echo -e "SKIP\n$FLAKY $BROKEN\n-------"
+ echo -e "TEST_TIMEOUT_S=$TEST_TIMEOUT_S s\n"
+}
+
+function cleanup {
+ rm -f /tmp/test*.log
+}
+
+function usage {
+ echo "Usage: $0 [-h or --help] [-v or --verbose]
+ [--all] [--flaky] [--smoke] [--broken]
+ [--valgrind] [--asan] [--asan-noleaks]
+ [--hosts <hosts>] [-n <parallelism>]
+ [--tests <tests>]
+ [--id-rsa <ssh private key>]
+ [--address_family <IPv4 or IPv6>]
+ "
+}
+
+function parse_args () {
+ args=`getopt \
+ -o hvn: \
+ --long help,verbose,address_family:,valgrind,asan,asan-noleaks,all,\
+smoke,flaky,broken,hosts:,tests:,id-rsa:,test-timeout: \
+ -n 'fb-remote-test.sh' -- "$@"`
+
+ if [ $? != 0 ]; then
+ echo "Error parsing getopt"
+ exit 1
+ fi
+
+ eval set -- "$args"
+
+ while true; do
+ case "$1" in
+ -h | --help) usage ; exit 1 ;;
+ -v | --verbose) FLAGS="$FLAGS -v" ; shift ;;
+ --address_family) ADDRESS_FAMILY=$2; shift 2 ;;
+ --valgrind) FLAGS="$FLAGS --valgrind" ; shift ;;
+ --asan-noleaks) FLAGS="$FLAGS --asan-noleaks"; shift ;;
+ --asan) FLAGS="$FLAGS --asan" ; shift ;;
+ --hosts) HOSTS=$2; shift 2 ;;
+ --tests) TESTS=$2; FLAKY= ; BROKEN= ; shift 2 ;;
+ --test-timeout) TEST_TIMEOUT_S=$2; shift 2 ;;
+ --all) TESTS='all' ; shift 1 ;;
+ --flaky) TESTS=$FLAKY; FLAKY= ; shift 1 ;;
+ --smoke) TESTS=$SMOKE_TESTS; shift 1 ;;
+ --broken) TESTS=$BROKEN_TESTS; FLAKY= ; BROKEN= ; shift 1 ;;
+ --id-rsa) FLAGS="$FLAGS --id-rsa $2" ; shift 2 ;;
+ -n) N=$2; shift 2 ;;
+ *) break ;;
+ esac
+ done
+ run_tests_args="$@"
+}
+
+function main {
+ parse_args "$@"
+
+ if [ -z "$HOSTS" ]; then
+ echo "Please provide hosts to run the tests in"
+ exit -1
+ fi
+
+ print_env
+
+ cleanup
+
+ "extras/distributed-testing/distributed-test-runner.py" $FLAGS --tester \
+ --n "$N" --hosts "$HOSTS" --tests "$TESTS" \
+ --flaky_tests "$FLAKY $BROKEN" --test-timeout "$TEST_TIMEOUT_S" \
+ --address_family "$ADDRESS_FAMILY"
+
+ exit $?
+}
+
+main "$@"
diff --git a/extras/ec-heal-script/README.md b/extras/ec-heal-script/README.md
new file mode 100644
index 00000000000..aaefd6681f6
--- /dev/null
+++ b/extras/ec-heal-script/README.md
@@ -0,0 +1,69 @@
+# gluster-heal-scripts
+Scripts to correct extended attributes of fragments of files to make them healble.
+
+Following are the guidelines/suggestions to use these scripts.
+
+1 - Passwordless ssh should be setup for all the nodes of the cluster.
+
+2 - Scripts should be executed from one of these nodes.
+
+3 - Make sure NO "IO" is going on for the files for which we are running
+these two scripts.
+
+4 - There should be no heal going on for the file for which xattrs are being
+set by correct_pending_heals.sh. Disable the self heal while running this script.
+
+5 - All the bricks of the volume should be UP to identify good and bad fragments
+and to decide if an entry is healble or not.
+
+6 - If correct_pending_heals.sh is stopped in the middle while it was processing
+healble entries, it is suggested to re-run gfid_needing_heal_parallel.sh to create
+latest list of healble and non healble entries and "potential_heal" "can_not_heal" files.
+
+7 - Based on the number of entries, these files might take time to get and set the
+stats and xattrs of entries.
+
+8 - A backup of the fragments will be taken on <brick path>/.glusterfs/correct_pending_heals
+ directory with a file name same as gfid.
+
+9 - Once the correctness of the file gets verified by user, these backup should be removed.
+
+10 - Make sure we have enough space on bricks to take these backups.
+
+11 - At the end this will create two files -
+ 1 - modified_and_backedup_files - Contains list of files which have been modified and should be healed.
+ 2 - can_not_heal - Contains list of files which can not be healed.
+
+12 - It is suggested that the integrity of the data of files, which were modified and healed,
+ should be checked by the user.
+
+
+Usage:
+
+Following are the sequence of steps to use these scripts -
+
+1 - ./gfid_needing_heal_parallel.sh <volume name>
+
+ Execute gfid_needing_heal_parallel.sh with volume name to create list of files which could
+ be healed and can not be healed. It creates "potential_heal" and "can_not_heal" files.
+ During execution, it also displays the list of files on consol with the verdict.
+
+2 - ./correct_pending_heals.sh
+
+ Execute correct_pending_heals.sh without any argument. This script processes entries present
+ in "heal" file. It asks user to enter how many files we want to process in one attempt.
+ Once the count is provided, this script will fetch the entries one by one from "potential_heal" file and takes necessary action.
+ If at this point also a file can not be healed, it will be pushed to "can_not_heal" file.
+ If a file can be healed, this script will modify the xattrs of that file fragments and create an entry in "modified_and_backedup_files" file
+
+3 - At the end, all the entries of "potential_heal" will be processed and based on the processing only two files will be left.
+
+ 1 - modified_and_backedup_files - Contains list of files which have been modified and should be healed.
+ 2 - can_not_heal - Contains list of files which can not be healed.
+
+Logs and other files -
+
+1 - modified_and_backedup_files - It contains all the files which could be healed and the location of backup of each fragments.
+2 - can_not_heal - It contains all the files which can not be healed.
+3 - potential_heal - List of files which could be healed and should be processed by "correct_pending_heals.sh"
+4 - /var/log/glusterfs/ec-heal-script.log - It contains logs of both the files.
diff --git a/extras/ec-heal-script/correct_pending_heals.sh b/extras/ec-heal-script/correct_pending_heals.sh
new file mode 100755
index 00000000000..c9f19dd7c89
--- /dev/null
+++ b/extras/ec-heal-script/correct_pending_heals.sh
@@ -0,0 +1,415 @@
+#!/bin/bash
+# Copyright (c) 2019-2020 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+# This script finally resets the xattrs of all the fragments of a file
+# which can be healed as per gfid_needing_heal_parallel.sh.
+# gfid_needing_heal_parallel.sh will produce two files, potential_heal and can_not_heal.
+# This script takes potential_heal as input and resets xattrs of all the fragments
+# of those files present in this file and which could be healed as per
+# trusted.ec.size xattar of the file else it will place the entry in can_not_heal
+# file. Those entries which must be healed will be place in must_heal file
+# after setting xattrs so that user can track those files.
+
+
+MOD_BACKUP_FILES="modified_and_backedup_files"
+CAN_NOT_HEAL="can_not_heal"
+LOG_DIR="/var/log/glusterfs"
+LOG_FILE="$LOG_DIR/ec-heal-script.log"
+LINE_SEP="==================================================="
+
+function heal_log()
+{
+ echo "$1" >> "$LOG_FILE"
+}
+
+function desc ()
+{
+ echo ""
+ echo "This script finally resets the xattrs of all the fragments of a file
+which can be healed as per gfid_needing_heal_parallel.sh.
+gfid_needing_heal_parallel.sh will produce two files, potential_heal and can_not_heal.
+This script takes potential_heal as input and resets xattrs of all the fragments
+of those files present in this file and which could be healed as per
+trusted.ec.size xattar of the file else it will place the entry in can_not_heal
+file. Those entries which must be healed will be place in must_heal file
+after setting xattrs so that user can track those files."
+}
+
+function _init ()
+{
+ if [ $# -ne 0 ]
+ then
+ echo "usage: $0"
+ desc
+ exit 2
+ fi
+
+ if [ ! -f "potential_heal" ]
+ then
+ echo "Nothing to correct. File "potential_heal" does not exist"
+ echo ""
+ desc
+ exit 2
+ fi
+}
+
+function total_file_size_in_hex()
+{
+ local frag_size=$1
+ local size=0
+ local hex_size=""
+
+ size=$((frag_size * 4))
+ hex_size=$(printf '0x%016x' $size)
+ echo "$hex_size"
+}
+
+function backup_file_fragment()
+{
+ local file_host=$1
+ local file_entry=$2
+ local gfid_actual_paths=$3
+ local brick_root=""
+ local temp=""
+ local backup_dir=""
+ local cmd=""
+ local gfid=""
+
+ brick_root=$(echo "$file_entry" | cut -d "#" -f 1)
+ temp=$(echo "$(basename "$BASH_SOURCE")" | cut -d '.' -f 1)
+ backup_dir=$(echo "${brick_root}/.glusterfs/${temp}")
+ file_entry=${file_entry//#}
+
+ gfid=$(echo "${gfid_actual_paths}" | cut -d '|' -f 1 | cut -d '/' -f 5)
+ echo "${file_host}:${backup_dir}/${gfid}" >> "$MOD_BACKUP_FILES"
+
+ cmd="mkdir -p ${backup_dir} && yes | cp -af ${file_entry} ${backup_dir}/${gfid} 2>/dev/null"
+ ssh -n "${file_host}" "${cmd}"
+}
+
+function set_frag_xattr ()
+{
+ local file_host=$1
+ local file_entry=$2
+ local good=$3
+ local cmd1=""
+ local cmd2=""
+ local cmd=""
+ local version="0x00000000000000010000000000000001"
+ local dirty="0x00000000000000010000000000000001"
+
+ if [[ $good -eq 0 ]]
+ then
+ version="0x00000000000000000000000000000000"
+ fi
+
+ cmd1=" setfattr -n trusted.ec.version -v ${version} ${file_entry} &&"
+ cmd2=" setfattr -n trusted.ec.dirty -v ${dirty} ${file_entry}"
+ cmd=${cmd1}${cmd2}
+ ssh -n "${file_host}" "${cmd}"
+}
+
+function set_version_dirty_xattr ()
+{
+ local file_paths=$1
+ local good=$2
+ local gfid_actual_paths=$3
+ local file_entry=""
+ local file_host=""
+ local bpath=""
+
+ for bpath in ${file_paths//,/ }
+ do
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+ backup_file_fragment "$file_host" "$file_entry" "$gfid_actual_paths"
+ file_entry=${file_entry//#}
+ set_frag_xattr "$file_host" "$file_entry" "$good"
+ done
+}
+
+function match_size_xattr_quorum ()
+{
+ local file_paths=$1
+ local file_entry=""
+ local file_host=""
+ local cmd=""
+ local size_xattr=""
+ local bpath=""
+ declare -A xattr_count
+
+ for bpath in ${file_paths//,/ }
+ do
+ size_xattr=""
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+ file_entry=${file_entry//#}
+
+ cmd="getfattr -n trusted.ec.size -d -e hex ${file_entry} 2>/dev/null | grep -w "trusted.ec.size" | cut -d '=' -f 2"
+ size_xattr=$(ssh -n "${file_host}" "${cmd}")
+ if [[ -n $size_xattr ]]
+ then
+ count=$((xattr_count["$size_xattr"] + 1))
+ xattr_count["$size_xattr"]=${count}
+ if [[ $count -ge 4 ]]
+ then
+ echo "${size_xattr}"
+ return
+ fi
+ fi
+ done
+ echo "False"
+}
+
+function match_version_xattr ()
+{
+ local file_paths=$1
+ local file_entry=""
+ local file_host=""
+ local cmd=""
+ local version=""
+ local bpath=""
+ declare -A ver_count
+
+ for bpath in ${file_paths//,/ }
+ do
+ version=""
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+ file_entry=${file_entry//#}
+
+ cmd="getfattr -n trusted.ec.version -d -e hex ${file_entry} 2>/dev/null | grep -w "trusted.ec.version" | cut -d '=' -f 2"
+ version=$(ssh -n "${file_host}" "${cmd}")
+ ver_count["$version"]=$((ver_count["$version"] + 1))
+ done
+ for key in "${ver_count[@]}"
+ do
+ if [[ $key -ge 4 ]]
+ then
+ echo "True"
+ return
+ else
+ echo "False"
+ return
+ fi
+ done
+}
+
+function match_stat_size_with_xattr ()
+{
+ local bpath=$1
+ local size=$2
+ local file_stat=$3
+ local xattr=$4
+ local file_entry=""
+ local file_host=""
+ local cmd=""
+ local stat_output=""
+ local hex_size=""
+
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+
+ file_entry=${file_entry//#}
+ cmd="stat --format=%F:%B:%s $file_entry 2>/dev/null"
+ stat_output=$(ssh -n "${file_host}" "${cmd}")
+ echo "$stat_output" | grep -w "${file_stat}" > /dev/null
+
+ if [[ $? -eq 0 ]]
+ then
+ cmd="getfattr -n trusted.ec.size -d -e hex ${file_entry} 2>/dev/null | grep -w "trusted.ec.size" | cut -d '=' -f 2"
+ hex_size=$(ssh -n "${file_host}" "${cmd}")
+
+ if [[ -z $hex_size || "$hex_size" != "$xattr" ]]
+ then
+ echo "False"
+ return
+ fi
+ size_diff=$(printf '%d' $(( size - hex_size )))
+ if [[ $size_diff -gt 2047 ]]
+ then
+ echo "False"
+ return
+ else
+ echo "True"
+ return
+ fi
+ else
+ echo "False"
+ return
+ fi
+}
+
+function find_file_paths ()
+{
+ local bpath=$1
+ local file_entry=""
+ local file_host=""
+ local cmd=""
+ local brick_root=""
+ local gfid=""
+ local actual_path=""
+ local gfid_path=""
+
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+ brick_root=$(echo "$file_entry" | cut -d "#" -f 1)
+
+ gfid=$(echo "${file_entry}" | grep ".glusterfs")
+ if [[ -n "$gfid" ]]
+ then
+ gfid_path=$(echo "$file_entry" | cut -d "#" -f 2)
+ file_entry=${file_entry//#}
+ cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep -v '.glusterfs' "
+ actual_path=$(ssh -n "${file_host}" "${cmd}")
+ #removing absolute path so that user can refer this from mount point
+ actual_path=${actual_path#"$brick_root"}
+ else
+ actual_path=$(echo "$file_entry" | cut -d "#" -f 2)
+ file_entry=${file_entry//#}
+ cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep '.glusterfs' "
+ gfid_path=$(ssh -n "${file_host}" "${cmd}")
+ gfid_path=${gfid_path#"$brick_root"}
+ fi
+
+ echo "${gfid_path}|${actual_path}"
+}
+
+function log_can_not_heal ()
+{
+ local gfid_actual_paths=$1
+ local file_paths=$2
+ file_paths=${file_paths//#}
+
+ echo "${LINE_SEP}" >> "$CAN_NOT_HEAL"
+ echo "Can Not Heal : $(echo "$gfid_actual_paths" | cut -d '|' -f 2)" >> "$CAN_NOT_HEAL"
+ for bpath in ${file_paths//,/ }
+ do
+ echo "${bpath}" >> "$CAN_NOT_HEAL"
+ done
+}
+
+function check_all_frag_and_set_xattr ()
+{
+ local file_paths=$1
+ local total_size=$2
+ local file_stat=$3
+ local bpath=""
+ local healthy_count=0
+ local match="False"
+ local matching_bricks=""
+ local bad_bricks=""
+ local gfid_actual_paths=""
+
+ for bpath in ${file_paths//,/ }
+ do
+ if [[ -n "$gfid_actual_paths" ]]
+ then
+ break
+ fi
+ gfid_actual_paths=$(find_file_paths "$bpath")
+ done
+
+ match=$(match_size_xattr_quorum "$file_paths")
+
+# echo "${match} : $bpath" >> "$MOD_BACKUP_FILES"
+
+ if [[ "$match" != "False" ]]
+ then
+ xattr="$match"
+ for bpath in ${file_paths//,/ }
+ do
+ match="False"
+ match=$(match_stat_size_with_xattr "$bpath" "$total_size" "$file_stat" "$xattr")
+ if [[ "$match" == "True" ]]
+ then
+ matching_bricks="${bpath},${matching_bricks}"
+ healthy_count=$((healthy_count + 1))
+ else
+ bad_bricks="${bpath},${bad_bricks}"
+ fi
+ done
+ fi
+
+ if [[ $healthy_count -ge 4 ]]
+ then
+ match="True"
+ echo "${LINE_SEP}" >> "$MOD_BACKUP_FILES"
+ echo "Modified : $(echo "$gfid_actual_paths" | cut -d '|' -f 2)" >> "$MOD_BACKUP_FILES"
+ set_version_dirty_xattr "$matching_bricks" 1 "$gfid_actual_paths"
+ set_version_dirty_xattr "$bad_bricks" 0 "$gfid_actual_paths"
+ else
+ log_can_not_heal "$gfid_actual_paths" "${file_paths}"
+ fi
+
+ echo "$match"
+}
+function set_xattr()
+{
+ local count=$1
+ local heal_entry=""
+ local file_stat=""
+ local frag_size=""
+ local total_size=""
+ local file_paths=""
+ local num=""
+ local can_heal_count=0
+
+ heal_log "Started $(basename $BASH_SOURCE) on $(date) "
+
+ while read -r heal_entry
+ do
+ heal_log "$LINE_SEP"
+ heal_log "${heal_entry}"
+
+ file_stat=$(echo "$heal_entry" | cut -d "|" -f 1)
+ frag_size=$(echo "$file_stat" | rev | cut -d ":" -f 1 | rev)
+ total_size="$(total_file_size_in_hex "$frag_size")"
+ file_paths=$(echo "$heal_entry" | cut -d "|" -f 2)
+ match=$(check_all_frag_and_set_xattr "$file_paths" "$total_size" "$file_stat")
+ if [[ "$match" == "True" ]]
+ then
+ can_heal_count=$((can_heal_count + 1))
+ fi
+
+ sed -i '1d' potential_heal
+ count=$((count - 1))
+ if [ $count == 0 ]
+ then
+ num=$(cat potential_heal | wc -l)
+ heal_log "$LINE_SEP"
+ heal_log "${1} : Processed"
+ heal_log "${can_heal_count} : Modified to Heal"
+ heal_log "$((${1} - can_heal_count)) : Moved to can_not_heal."
+ heal_log "${num} : Pending as Potential Heal"
+ exit 0
+ fi
+
+ done < potential_heal
+}
+
+function main ()
+{
+ local count=0
+
+ read -p "Number of files to correct: [choose between 1-1000] (0 for All):" count
+ if [[ $count -lt 0 || $count -gt 1000 ]]
+ then
+ echo "Provide correct value:"
+ exit 2
+ fi
+
+ if [[ $count -eq 0 ]]
+ then
+ count=$(cat potential_heal | wc -l)
+ fi
+ set_xattr "$count"
+}
+
+_init "$@" && main "$@"
diff --git a/extras/ec-heal-script/gfid_needing_heal_parallel.sh b/extras/ec-heal-script/gfid_needing_heal_parallel.sh
new file mode 100755
index 00000000000..d7f53c97c33
--- /dev/null
+++ b/extras/ec-heal-script/gfid_needing_heal_parallel.sh
@@ -0,0 +1,278 @@
+#!/bin/bash
+# Copyright (c) 2019-2020 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+# This script provides a list of all the files which can be healed or not healed.
+# It also generates two files, potential_heal and can_not_heal, which contains the information
+# of all theose files. These files could be used by correct_pending_heals.sh to correct
+# the fragmnets so that files could be healed by shd.
+
+CAN_NOT_HEAL="can_not_heal"
+CAN_HEAL="potential_heal"
+LINE_SEP="==================================================="
+LOG_DIR="/var/log/glusterfs"
+LOG_FILE="$LOG_DIR/ec-heal-script.log"
+
+function heal_log()
+{
+ echo "$1" >> "$LOG_FILE"
+}
+
+function _init ()
+{
+ if [ $# -ne 1 ]; then
+ echo "usage: $0 <gluster volume name>";
+ echo "This script provides a list of all the files which can be healed or not healed.
+It also generates two files, potential_heal and can_not_heal, which contains the information
+of all theose files. These files could be used by correct_pending_heals.sh to correct
+the fragmnets so that files could be healed by shd."
+ exit 2;
+ fi
+
+ volume=$1;
+}
+
+function get_pending_entries ()
+{
+ local volume_name=$1
+
+ gluster volume heal "$volume_name" info | grep -v ":/" | grep -v "Number of entries" | grep -v "Status:" | sort -u | sed '/^$/d'
+}
+
+function get_entry_path_on_brick()
+{
+ local path="$1"
+ local gfid_string=""
+ if [[ "${path:0:1}" == "/" ]];
+ then
+ echo "$path"
+ else
+ gfid_string="$(echo "$path" | cut -f2 -d':' | cut -f1 -d '>')"
+ echo "/.glusterfs/${gfid_string:0:2}/${gfid_string:2:2}/$gfid_string"
+ fi
+}
+
+function run_command_on_server()
+{
+ local subvolume="$1"
+ local host="$2"
+ local cmd="$3"
+ local output
+ output=$(ssh -n "${host}" "${cmd}")
+ if [ -n "$output" ]
+ then
+ echo "$subvolume:$output"
+ fi
+}
+
+function get_entry_path_all_bricks ()
+{
+ local entry="$1"
+ local bricks="$2"
+ local cmd=""
+ for brick in $bricks
+ do
+ echo "${brick}#$(get_entry_path_on_brick "$entry")"
+ done | tr '\n' ','
+}
+
+function get_stat_for_entry_from_all_bricks ()
+{
+ local entry="$1"
+ local bricks="$2"
+ local subvolume=0
+ local host=""
+ local bpath=""
+ local cmd=""
+
+ for brick in $bricks
+ do
+ if [[ "$((subvolume % 6))" == "0" ]]
+ then
+ subvolume=$((subvolume+1))
+ fi
+ host=$(echo "$brick" | cut -f1 -d':')
+ bpath=$(echo "$brick" | cut -f2 -d':')
+
+ cmd="stat --format=%F:%B:%s $bpath$(get_entry_path_on_brick "$entry") 2>/dev/null"
+ run_command_on_server "$subvolume" "${host}" "${cmd}" &
+ done | sort | uniq -c | sort -rnk1
+}
+
+function get_bricks_from_volume()
+{
+ local v=$1
+ gluster volume info "$v" | grep -E "^Brick[0-9][0-9]*:" | cut -f2- -d':'
+}
+
+function print_entry_gfid()
+{
+ local host="$1"
+ local dirpath="$2"
+ local entry="$3"
+ local gfid
+ gfid="$(ssh -n "${host}" "getfattr -d -m. -e hex $dirpath/$entry 2>/dev/null | grep trusted.gfid=|cut -f2 -d'='")"
+ echo "$entry" - "$gfid"
+}
+
+function print_brick_directory_info()
+{
+ local h="$1"
+ local dirpath="$2"
+ while read -r e
+ do
+ print_entry_gfid "${h}" "${dirpath}" "${e}"
+ done < <(ssh -n "${h}" "ls $dirpath 2>/dev/null")
+}
+
+function print_directory_info()
+{
+ local entry="$1"
+ local bricks="$2"
+ local h
+ local b
+ local gfid
+ for brick in $bricks;
+ do
+ h="$(echo "$brick" | cut -f1 -d':')"
+ b="$(echo "$brick" | cut -f2 -d':')"
+ dirpath="$b$(get_entry_path_on_brick "$entry")"
+ print_brick_directory_info "${h}" "${dirpath}" &
+ done | sort | uniq -c
+}
+
+function print_entries_needing_heal()
+{
+ local quorum=0
+ local entry="$1"
+ local bricks="$2"
+ while read -r line
+ do
+ quorum=$(echo "$line" | awk '{print $1}')
+ if [[ "$quorum" -lt 4 ]]
+ then
+ echo "$line - Not in Quorum"
+ else
+ echo "$line - In Quorum"
+ fi
+ done < <(print_directory_info "$entry" "$bricks")
+}
+
+function find_file_paths ()
+{
+ local bpath=$1
+ local file_entry=""
+ local file_host=""
+ local cmd=""
+ local brick_root=""
+ local gfid=""
+ local actual_path=""
+ local gfid_path=""
+
+ file_host=$(echo "$bpath" | cut -d ":" -f 1)
+ file_entry=$(echo "$bpath" | cut -d ":" -f 2)
+ brick_root=$(echo "$file_entry" | cut -d "#" -f 1)
+
+ gfid=$(echo "${file_entry}" | grep ".glusterfs")
+
+ if [[ -n "$gfid" ]]
+ then
+ gfid_path=$(echo "$file_entry" | cut -d "#" -f 2)
+ file_entry=${file_entry//#}
+ cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep -v '.glusterfs' "
+ actual_path=$(ssh -n "${file_host}" "${cmd}")
+ #removing absolute path so that user can refer this from mount point
+ actual_path=${actual_path#"$brick_root"}
+ else
+ actual_path=$(echo "$file_entry" | cut -d "#" -f 2)
+ file_entry=${file_entry//#}
+ cmd="find -L '$brick_root' -samefile '$file_entry' 2>/dev/null | grep '.glusterfs' "
+ gfid_path=$(ssh -n "${file_host}" "${cmd}")
+ gfid_path=${gfid_path#"$brick_root"}
+ fi
+
+ echo "${gfid_path}|${actual_path}"
+}
+
+function log_can_not_heal ()
+{
+ local gfid_actual_paths=$1
+ local file_paths=$2
+ file_paths=${file_paths//#}
+
+ echo "${LINE_SEP}" >> "$CAN_NOT_HEAL"
+ echo "Can Not Heal : $(echo "$gfid_actual_paths" | cut -d '|' -f 2)" >> "$CAN_NOT_HEAL"
+ for bpath in ${file_paths//,/ }
+ do
+ echo "${bpath}" >> "$CAN_NOT_HEAL"
+ done
+}
+
+function main ()
+{
+ local bricks=""
+ local quorum=0
+ local stat_info=""
+ local file_type=""
+ local gfid_actual_paths=""
+ local bpath=""
+ local file_paths=""
+ local good=0
+ local bad=0
+ bricks=$(get_bricks_from_volume "$volume")
+ rm -f "$CAN_HEAL"
+ rm -f "$CAN_NOT_HEAL"
+ mkdir "$LOG_DIR" -p
+
+ heal_log "Started $(basename "$BASH_SOURCE") on $(date) "
+ while read -r heal_entry
+ do
+ heal_log "------------------------------------------------------------------"
+ heal_log "$heal_entry"
+
+ gfid_actual_paths=""
+ file_paths="$(get_entry_path_all_bricks "$heal_entry" "$bricks")"
+ stat_info="$(get_stat_for_entry_from_all_bricks "$heal_entry" "$bricks")"
+ heal_log "$stat_info"
+
+ quorum=$(echo "$stat_info" | head -1 | awk '{print $1}')
+ good_stat=$(echo "$stat_info" | head -1 | awk '{print $3}')
+ file_type="$(echo "$stat_info" | head -1 | cut -f2 -d':')"
+ if [[ "$file_type" == "directory" ]]
+ then
+ print_entries_needing_heal "$heal_entry" "$bricks"
+ else
+ if [[ "$quorum" -ge 4 ]]
+ then
+ good=$((good + 1))
+ heal_log "Verdict: Healable"
+
+ echo "${good_stat}|$file_paths" >> "$CAN_HEAL"
+ else
+ bad=$((bad + 1))
+ heal_log "Verdict: Not Healable"
+ for bpath in ${file_paths//,/ }
+ do
+ if [[ -z "$gfid_actual_paths" ]]
+ then
+ gfid_actual_paths=$(find_file_paths "$bpath")
+ else
+ break
+ fi
+ done
+ log_can_not_heal "$gfid_actual_paths" "${file_paths}"
+ fi
+ fi
+ done < <(get_pending_entries "$volume")
+ heal_log "========================================="
+ heal_log "Total number of potential heal : ${good}"
+ heal_log "Total number of can not heal : ${bad}"
+ heal_log "========================================="
+}
+
+_init "$@" && main "$@"
diff --git a/extras/failed-tests.py b/extras/failed-tests.py
new file mode 100755
index 00000000000..f7f110246b5
--- /dev/null
+++ b/extras/failed-tests.py
@@ -0,0 +1,180 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+import blessings
+import requests
+from requests.packages.urllib3.exceptions import InsecureRequestWarning
+import re
+import argparse
+from collections import defaultdict
+from datetime import timedelta, datetime
+from pystache import render
+
+# This tool goes though the Gluster regression links and checks for failures
+
+BASE = 'https://build.gluster.org'
+TERM = blessings.Terminal()
+MAX_BUILDS = 1000
+summary = defaultdict(list)
+VERBOSE = None
+
+
+def process_failure(url, node):
+ text = requests.get(url, verify=False).text
+ accum = []
+ for t in text.split('\n'):
+ if t.find("Result: FAIL") != -1:
+ for t2 in accum:
+ if VERBOSE:
+ print(t2.encode('utf-8'))
+ if t2.find("Wstat") != -1:
+ test_case = re.search('\./tests/.*\.t', t2)
+ if test_case:
+ summary[test_case.group()].append((url, node))
+ accum = []
+ elif t.find("cur_cores=/") != -1:
+ summary["core"].append([t.split("/")[1]])
+ summary["core"].append(url)
+ else:
+ accum.append(t)
+
+
+def print_summary(failed_builds, total_builds, html=False):
+ # All the templates
+ count = [
+ '{{failed}} of {{total}} regressions failed',
+ '<p><b>{{failed}}</b> of <b>{{total}}</b> regressions failed</p>'
+ ]
+ regression_link = [
+ '\tRegression Link: {{link}}\n'
+ '\tNode: {{node}}',
+ '<p>&emsp;Regression Link: {{link}}</p>'
+ '<p>&emsp;Node: {{node}}</p>'
+ ]
+ component = [
+ '\tComponent: {{comp}}',
+ '<p>&emsp;Component: {{comp}}</p>'
+ ]
+ failure_count = [
+ ''.join([
+ TERM.red,
+ '{{test}} ; Failed {{count}} times',
+ TERM.normal
+ ]),
+ (
+ '<p><font color="red"><b>{{test}};</b> Failed <b>{{count}}'
+ '</b> times</font></p>'
+ )
+ ]
+
+ template = 0
+ if html:
+ template = 1
+ print(render(
+ count[template],
+ {'failed': failed_builds, 'total': total_builds}
+ ))
+ for k, v in summary.items():
+ if k == 'core':
+ print(''.join([TERM.red, "Found cores:", TERM.normal]))
+ for comp, link in zip(v[::2], v[1::2]):
+ print(render(component[template], {'comp': comp}))
+ print(render(
+ regression_link[template],
+ {'link': link[0], 'node': link[1]}
+ ))
+ else:
+ print(render(failure_count[template], {'test': k, 'count': len(v)}))
+ for link in v:
+ print(render(
+ regression_link[template],
+ {'link': link[0], 'node': link[1]}
+ ))
+
+
+def get_summary(cut_off_date, reg_link):
+ '''
+ Get links to the failed jobs
+ '''
+ success_count = 0
+ failure_count = 0
+ for page in range(0, MAX_BUILDS, 100):
+ build_info = requests.get(''.join([
+ BASE,
+ reg_link,
+ 'api/json?depth=1&tree=allBuilds'
+ '[url,result,timestamp,builtOn]',
+ '{{{0},{1}}}'.format(page, page+100)
+ ]), verify=False).json()
+ for build in build_info.get('allBuilds'):
+ if datetime.fromtimestamp(build['timestamp']/1000) < cut_off_date:
+ # stop when timestamp older than cut off date
+ return failure_count, failure_count + success_count
+ if build['result'] in [None, 'SUCCESS']:
+ # pass when build is a success or ongoing
+ success_count += 1
+ continue
+ if VERBOSE:
+ print(''.join([
+ TERM.red,
+ 'FAILURE on {0}'.format(build['url']),
+ TERM.normal
+ ]))
+ url = ''.join([build['url'], 'consoleText'])
+ failure_count += 1
+ process_failure(url, build['builtOn'])
+ return failure_count, failure_count + success_count
+
+
+def main(num_days, regression_link, html_report):
+ cut_off_date = datetime.today() - timedelta(days=num_days)
+ failure = 0
+ total = 0
+ for reg in regression_link:
+ if reg == 'centos':
+ reg_link = '/job/centos6-regression/'
+ elif reg == 'netbsd':
+ reg_link = '/job/netbsd7-regression/'
+ else:
+ reg_link = reg
+ counts = get_summary(cut_off_date, reg_link)
+ failure += counts[0]
+ total += counts[1]
+ print_summary(failure, total, html_report)
+
+
+if __name__ == '__main__':
+ requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
+ parser = argparse.ArgumentParser()
+ parser.add_argument("get-summary")
+ parser.add_argument(
+ "last_no_of_days",
+ default=1,
+ type=int,
+ help="Regression summary of last number of days"
+ )
+ parser.add_argument(
+ "regression_link",
+ default="centos",
+ nargs='+',
+ help="\"centos\" | \"netbsd\" | any other regression link"
+ )
+ parser.add_argument(
+ "--verbose",
+ default=False,
+ action="store_true",
+ help="Print a detailed report of each test case that is failed"
+ )
+ parser.add_argument(
+ "--html-report",
+ default=False,
+ action="store_true",
+ help="Print a brief report of failed regressions in html format"
+ )
+ args = parser.parse_args()
+ VERBOSE = args.verbose
+ main(
+ num_days=args.last_no_of_days,
+ regression_link=args.regression_link,
+ html_report=args.html_report
+ )
diff --git a/extras/firewalld/Makefile.am b/extras/firewalld/Makefile.am
new file mode 100644
index 00000000000..530881fb8eb
--- /dev/null
+++ b/extras/firewalld/Makefile.am
@@ -0,0 +1,8 @@
+EXTRA_DIST = glusterfs.xml
+
+if USE_FIREWALLD
+if WITH_SERVER
+staticdir = /usr/lib/firewalld/services/
+static_DATA = glusterfs.xml
+endif
+endif
diff --git a/extras/firewalld/glusterfs.xml b/extras/firewalld/glusterfs.xml
new file mode 100644
index 00000000000..7e176442f5b
--- /dev/null
+++ b/extras/firewalld/glusterfs.xml
@@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="utf-8"?>
+<service>
+<short>glusterfs-static</short>
+<description>Default ports for gluster-distributed storage</description>
+<port protocol="tcp" port="24007"/> <!--For glusterd -->
+<port protocol="tcp" port="24008"/> <!--For glusterd RDMA port management -->
+<port protocol="tcp" port="24009"/> <!--For glustereventsd -->
+<port protocol="tcp" port="38465"/> <!--Gluster NFS service -->
+<port protocol="tcp" port="38466"/> <!--Gluster NFS service -->
+<port protocol="tcp" port="38467"/> <!--Gluster NFS service -->
+<port protocol="tcp" port="38468"/> <!--Gluster NFS service -->
+<port protocol="tcp" port="38469"/> <!--Gluster NFS service -->
+<port protocol="tcp" port="49152-49664"/> <!--512 ports for bricks -->
+</service>
diff --git a/extras/ganesha/Makefile.am b/extras/ganesha/Makefile.am
new file mode 100644
index 00000000000..9eaa401b6c8
--- /dev/null
+++ b/extras/ganesha/Makefile.am
@@ -0,0 +1,2 @@
+SUBDIRS = scripts config ocf
+CLEANFILES =
diff --git a/extras/ganesha/config/Makefile.am b/extras/ganesha/config/Makefile.am
new file mode 100644
index 00000000000..c729273096e
--- /dev/null
+++ b/extras/ganesha/config/Makefile.am
@@ -0,0 +1,4 @@
+EXTRA_DIST= ganesha-ha.conf.sample
+
+confdir = $(sysconfdir)/ganesha
+conf_DATA = ganesha-ha.conf.sample
diff --git a/extras/ganesha/config/ganesha-ha.conf.sample b/extras/ganesha/config/ganesha-ha.conf.sample
new file mode 100644
index 00000000000..c22892bde56
--- /dev/null
+++ b/extras/ganesha/config/ganesha-ha.conf.sample
@@ -0,0 +1,19 @@
+# Name of the HA cluster created.
+# must be unique within the subnet
+HA_NAME="ganesha-ha-360"
+#
+# N.B. you may use short names or long names; you may not use IP addrs.
+# Once you select one, stay with it as it will be mildly unpleasant to
+# clean up if you switch later on. Ensure that all names - short and/or
+# long - are in DNS or /etc/hosts on all machines in the cluster.
+#
+# The subset of nodes of the Gluster Trusted Pool that form the ganesha
+# HA cluster. Hostname is specified.
+HA_CLUSTER_NODES="server1,server2,..."
+#HA_CLUSTER_NODES="server1.lab.redhat.com,server2.lab.redhat.com,..."
+#
+# Virtual IPs for each of the nodes specified above.
+VIP_server1="10.0.2.1"
+VIP_server2="10.0.2.2"
+#VIP_server1_lab_redhat_com="10.0.2.1"
+#VIP_server2_lab_redhat_com="10.0.2.2"
diff --git a/extras/ganesha/ocf/Makefile.am b/extras/ganesha/ocf/Makefile.am
new file mode 100644
index 00000000000..990a609f254
--- /dev/null
+++ b/extras/ganesha/ocf/Makefile.am
@@ -0,0 +1,11 @@
+EXTRA_DIST= ganesha_grace ganesha_mon ganesha_nfsd
+
+# The root of the OCF resource agent hierarchy
+# Per the OCF standard, it's always "lib",
+# not "lib64" (even on 64-bit platforms).
+ocfdir = $(prefix)/lib/ocf
+
+# The provider directory
+radir = $(ocfdir)/resource.d/heartbeat
+
+ra_SCRIPTS = ganesha_grace ganesha_mon ganesha_nfsd
diff --git a/extras/ganesha/ocf/ganesha_grace b/extras/ganesha/ocf/ganesha_grace
new file mode 100644
index 00000000000..825f7164597
--- /dev/null
+++ b/extras/ganesha/ocf/ganesha_grace
@@ -0,0 +1,221 @@
+#!/bin/bash
+#
+# Copyright (c) 2014 Anand Subramanian anands@redhat.com
+# Copyright (c) 2015 Red Hat Inc.
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+#
+
+# Initialization:
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+if [ -n "$OCF_DEBUG_LIBRARY" ]; then
+ . $OCF_DEBUG_LIBRARY
+else
+ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+fi
+
+OCF_RESKEY_grace_active_default="grace-active"
+: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}}
+
+ganesha_meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="ganesha_grace">
+<version>1.0</version>
+
+<longdesc lang="en">
+This Linux-specific resource agent acts as a dummy
+resource agent for nfs-ganesha.
+</longdesc>
+
+<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc>
+
+<parameters>
+<parameter name="grace_active">
+<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc>
+<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc>
+<content type="string" default="grace-active" />
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="40s" />
+<action name="stop" timeout="40s" />
+<action name="status" timeout="20s" interval="60s" />
+<action name="monitor" depth="0" timeout="10s" interval="5s" />
+<action name="notify" timeout="10s" />
+<action name="meta-data" timeout="20s" />
+</actions>
+</resource-agent>
+END
+
+return ${OCF_SUCCESS}
+}
+
+ganesha_grace_usage() {
+ echo "ganesha.nfsd USAGE"
+}
+
+# Make sure meta-data and usage always succeed
+case $__OCF_ACTION in
+ meta-data) ganesha_meta_data
+ exit ${OCF_SUCCESS}
+ ;;
+ usage|help) ganesha_usage
+ exit ${OCF_SUCCESS}
+ ;;
+ *)
+ ;;
+esac
+
+ganesha_grace_start()
+{
+ local rc=${OCF_ERR_GENERIC}
+ local host=$(hostname -s)
+
+ ocf_log debug "ganesha_grace_start()"
+ # give ganesha_mon RA a chance to set the crm_attr first
+ # I mislike the sleep, but it's not clear that looping
+ # with a small sleep is necessarily better
+ # start has a 40sec timeout, so a 5sec sleep here is okay
+ sleep 5
+ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+ if [ $? -ne 0 ]; then
+ host=$(hostname)
+ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null )
+ if [ $? -ne 0 ]; then
+ ocf_log info "grace start: crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
+ fi
+ fi
+
+ # Three possibilities:
+ # 1. There is no attribute at all and attr_updater returns
+ # a zero length string. This happens when
+ # ganesha_mon::monitor hasn't run at least once to set
+ # the attribute. The assumption here is that the system
+ # is coming up. We pretend, for now, that the node is
+ # healthy, to allow the system to continue coming up.
+ # It will cure itself in a few seconds
+ # 2. There is an attribute, and it has the value "1"; this
+ # node is healthy.
+ # 3. There is an attribute, but it has no value or the value
+ # "0"; this node is not healthy.
+
+ # case 1
+ if [[ -z "${attr}" ]]; then
+ return ${OCF_SUCCESS}
+ fi
+
+ # case 2
+ if [[ "${attr}" = *"value=1" ]]; then
+ return ${OCF_SUCCESS}
+ fi
+
+ # case 3
+ return ${OCF_NOT_RUNNING}
+}
+
+ganesha_grace_stop()
+{
+
+ ocf_log debug "ganesha_grace_stop()"
+ return ${OCF_SUCCESS}
+}
+
+ganesha_grace_notify()
+{
+ # since this is a clone RA we should only ever see pre-start
+ # or post-stop
+ mode="${OCF_RESKEY_CRM_meta_notify_type}-${OCF_RESKEY_CRM_meta_notify_operation}"
+ case "${mode}" in
+ pre-start | post-stop)
+ dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname}
+ if [ $? -ne 0 ]; then
+ ocf_log info "dbus-send --print-reply --system --dest=org.ganesha.nfsd /org/ganesha/nfsd/admin org.ganesha.nfsd.admin.grace string:${OCF_RESKEY_CRM_meta_notify_stop_uname} failed"
+ fi
+ ;;
+ esac
+
+ return ${OCF_SUCCESS}
+}
+
+ganesha_grace_monitor()
+{
+ local host=$(hostname -s)
+
+ ocf_log debug "monitor"
+
+ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+ if [ $? -ne 0 ]; then
+ host=$(hostname)
+ attr=$(crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} 2> /dev/null)
+ if [ $? -ne 0 ]; then
+ ocf_log info "crm_attribute --query --node=${host} --name=${OCF_RESKEY_grace_active} failed"
+ fi
+ fi
+
+ # if there is no attribute (yet), maybe it's because
+ # this RA started before ganesha_mon (nfs-mon) has had
+ # chance to create it. In which case we'll pretend
+ # everything is okay this time around
+ if [[ -z "${attr}" ]]; then
+ return ${OCF_SUCCESS}
+ fi
+
+ if [[ "${attr}" = *"value=1" ]]; then
+ return ${OCF_SUCCESS}
+ fi
+
+ return ${OCF_NOT_RUNNING}
+}
+
+ganesha_grace_validate()
+{
+ return ${OCF_SUCCESS}
+}
+
+ganesha_grace_validate
+
+# Translate each action into the appropriate function call
+case $__OCF_ACTION in
+start) ganesha_grace_start
+ ;;
+stop) ganesha_grace_stop
+ ;;
+status|monitor) ganesha_grace_monitor
+ ;;
+notify) ganesha_grace_notify
+ ;;
+*) ganesha_grace_usage
+ exit ${OCF_ERR_UNIMPLEMENTED}
+ ;;
+esac
+
+rc=$?
+
+# The resource agent may optionally log a debug message
+ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc"
+exit $rc
diff --git a/extras/ganesha/ocf/ganesha_mon b/extras/ganesha/ocf/ganesha_mon
new file mode 100644
index 00000000000..2b4a9d6da84
--- /dev/null
+++ b/extras/ganesha/ocf/ganesha_mon
@@ -0,0 +1,234 @@
+#!/bin/bash
+#
+# Copyright (c) 2014 Anand Subramanian anands@redhat.com
+# Copyright (c) 2015 Red Hat Inc.
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+#
+
+# Initialization:
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+if [ -n "${OCF_DEBUG_LIBRARY}" ]; then
+ . ${OCF_DEBUG_LIBRARY}
+else
+ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+fi
+
+# Defaults
+OCF_RESKEY_ganesha_active_default="ganesha-active"
+OCF_RESKEY_grace_active_default="grace-active"
+OCF_RESKEY_grace_delay_default="5"
+
+: ${OCF_RESKEY_ganesha_active=${OCF_RESKEY_ganesha_active_default}}
+: ${OCF_RESKEY_grace_active=${OCF_RESKEY_grace_active_default}}
+: ${OCF_RESKEY_grace_delay=${OCF_RESKEY_grace_delay_default}}
+
+ganesha_meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="ganesha_mon">
+<version>1.0</version>
+
+<longdesc lang="en">
+This Linux-specific resource agent acts as a dummy
+resource agent for nfs-ganesha.
+</longdesc>
+
+<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc>
+
+<parameters>
+<parameter name="ganesha_active">
+<longdesc lang="en">NFS-Ganesha daemon active attribute</longdesc>
+<shortdesc lang="en">NFS-Ganesha daemon active attribute</shortdesc>
+<content type="string" default="ganesha-active" />
+</parameter>
+<parameter name="grace_active">
+<longdesc lang="en">NFS-Ganesha grace active attribute</longdesc>
+<shortdesc lang="en">NFS-Ganesha grace active attribute</shortdesc>
+<content type="string" default="grace-active" />
+</parameter>
+<parameter name="grace_delay">
+<longdesc lang="en">
+NFS-Ganesha grace delay.
+When changing this, adjust the ganesha_grace RA's monitor interval to match.
+</longdesc>
+<shortdesc lang="en">NFS-Ganesha grace delay</shortdesc>
+<content type="string" default="5" />
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="40s" />
+<action name="stop" timeout="40s" />
+<action name="status" timeout="20s" interval="60s" />
+<action name="monitor" depth="0" timeout="10s" interval="10s" />
+<action name="meta-data" timeout="20s" />
+</actions>
+</resource-agent>
+END
+
+return ${OCF_SUCCESS}
+}
+
+ganesha_mon_usage() {
+ echo "ganesha.nfsd USAGE"
+}
+
+# Make sure meta-data and usage always succeed
+case ${__OCF_ACTION} in
+ meta-data) ganesha_meta_data
+ exit ${OCF_SUCCESS}
+ ;;
+ usage|help) ganesha_usage
+ exit ${OCF_SUCCESS}
+ ;;
+ *)
+ ;;
+esac
+
+ganesha_mon_start()
+{
+ ocf_log debug "ganesha_mon_start"
+ ganesha_mon_monitor
+ return $OCF_SUCCESS
+}
+
+ganesha_mon_stop()
+{
+ ocf_log debug "ganesha_mon_stop"
+ return $OCF_SUCCESS
+}
+
+ganesha_mon_monitor()
+{
+ local host=$(hostname -s)
+ local pid_file="/var/run/ganesha.pid"
+ local rhel6_pid_file="/var/run/ganesha.nfsd.pid"
+ local proc_pid="/proc/"
+
+ # RHEL6 /etc/init.d/nfs-ganesha adds -p /var/run/ganesha.nfsd.pid
+ # RHEL7 systemd does not. Would be nice if all distros used the
+ # same pid file.
+ if [ -e ${rhel6_pid_file} ]; then
+ pid_file=${rhel6_pid_file}
+ fi
+ if [ -e ${pid_file} ]; then
+ proc_pid="${proc_pid}$(cat ${pid_file})"
+ fi
+
+ if [ "x${proc_pid}" != "x/proc/" -a -d ${proc_pid} ]; then
+
+ attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1
+ if [ $? -ne 0 ]; then
+ ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_ganesha_active} -v 1 failed"
+ fi
+
+ # ganesha_grace (nfs-grace) RA follows grace-active attr
+ # w/ constraint location
+ attrd_updater -n ${OCF_RESKEY_grace_active} -v 1
+ if [ $? -ne 0 ]; then
+ ocf_log info "warning: attrd_updater -n ${OCF_RESKEY_grace_active} -v 1 failed"
+ fi
+
+ # ganesha_mon (nfs-mon) and ganesha_grace (nfs-grace)
+ # track grace-active crm_attr (attr != crm_attr)
+ # we can't just use the attr as there's no way to query
+ # its value in RHEL6 pacemaker
+
+ crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
+ if [ $? -ne 0 ]; then
+ host=$(hostname)
+ crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 2> /dev/null
+ if [ $? -ne 0 ]; then
+ ocf_log info "mon monitor warning: crm_attribute --node=${host} --lifetime=forever --name=${OCF_RESKEY_grace_active} --update=1 failed"
+ fi
+ fi
+
+ return ${OCF_SUCCESS}
+ fi
+
+ # VIP fail-over is triggered by clearing the
+ # ganesha-active node attribute on this node.
+ #
+ # Meanwhile the ganesha_grace notify() runs when its
+ # nfs-grace resource is disabled on a node; which
+ # is triggered by clearing the grace-active attribute
+ # on this node.
+ #
+ # We need to allow time for it to run and put
+ # the remaining ganesha.nfsds into grace before
+ # initiating the VIP fail-over.
+
+ attrd_updater -D -n ${OCF_RESKEY_grace_active}
+ if [ $? -ne 0 ]; then
+ ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_grace_active} failed"
+ fi
+
+ host=$(hostname -s)
+ crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
+ if [ $? -ne 0 ]; then
+ host=$(hostname)
+ crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 2> /dev/null
+ if [ $? -ne 0 ]; then
+ ocf_log info "mon monitor warning: crm_attribute --node=${host} --name=${OCF_RESKEY_grace_active} --update=0 failed"
+ fi
+ fi
+
+ sleep ${OCF_RESKEY_grace_delay}
+
+ attrd_updater -D -n ${OCF_RESKEY_ganesha_active}
+ if [ $? -ne 0 ]; then
+ ocf_log info "warning: attrd_updater -D -n ${OCF_RESKEY_ganesha_active} failed"
+ fi
+
+ return ${OCF_SUCCESS}
+}
+
+ganesha_mon_validate()
+{
+ return ${OCF_SUCCESS}
+}
+
+ganesha_mon_validate
+
+# Translate each action into the appropriate function call
+case ${__OCF_ACTION} in
+start) ganesha_mon_start
+ ;;
+stop) ganesha_mon_stop
+ ;;
+status|monitor) ganesha_mon_monitor
+ ;;
+*) ganesha_mon_usage
+ exit ${OCF_ERR_UNIMPLEMENTED}
+ ;;
+esac
+
+rc=$?
+
+# The resource agent may optionally log a debug message
+ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc"
+exit $rc
diff --git a/extras/ganesha/ocf/ganesha_nfsd b/extras/ganesha/ocf/ganesha_nfsd
new file mode 100644
index 00000000000..f91e8b6b8f7
--- /dev/null
+++ b/extras/ganesha/ocf/ganesha_nfsd
@@ -0,0 +1,167 @@
+#!/bin/bash
+#
+# Copyright (c) 2014 Anand Subramanian anands@redhat.com
+# Copyright (c) 2015 Red Hat Inc.
+# All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of version 2 of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
+#
+# Further, this software is distributed without any warranty that it is
+# free of the rightful claim of any third person regarding infringement
+# or the like. Any license provided herein, whether implied or
+# otherwise, applies only to this software file. Patent licenses, if
+# any, provided herein do not apply to combinations of this program with
+# other software, or any other product whatsoever.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc., 59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
+#
+#
+
+# Initialization:
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+if [ -n "${OCF_DEBUG_LIBRARY}" ]; then
+ . ${OCF_DEBUG_LIBRARY}
+else
+ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+ . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+fi
+
+OCF_RESKEY_ha_vol_mnt_default="/run/gluster/shared_storage"
+: ${OCF_RESKEY_ha_vol_mnt=${OCF_RESKEY_ha_vol_mnt_default}}
+
+ganesha_meta_data() {
+ cat <<END
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="ganesha_nfsd">
+<version>1.0</version>
+
+<longdesc lang="en">
+This Linux-specific resource agent acts as a dummy
+resource agent for nfs-ganesha.
+</longdesc>
+
+<shortdesc lang="en">Manages the user-space nfs-ganesha NFS server</shortdesc>
+
+<parameters>
+<parameter name="ha_vol_mnt">
+<longdesc lang="en">HA State Volume Mount Point</longdesc>
+<shortdesc lang="en">HA_State Volume Mount Point</shortdesc>
+<content type="string" default="" />
+</parameter>
+</parameters>
+
+<actions>
+<action name="start" timeout="5s" />
+<action name="stop" timeout="5s" />
+<action name="status" depth="0" timeout="5s" interval="0" />
+<action name="monitor" depth="0" timeout="5s" interval="0" />
+<action name="meta-data" timeout="20s" />
+</actions>
+</resource-agent>
+END
+
+return ${OCF_SUCCESS}
+}
+
+ganesha_nfsd_usage() {
+ echo "ganesha.nfsd USAGE"
+}
+
+# Make sure meta-data and usage always succeed
+case $__OCF_ACTION in
+ meta-data) ganesha_meta_data
+ exit ${OCF_SUCCESS}
+ ;;
+ usage|help) ganesha_usage
+ exit ${OCF_SUCCESS}
+ ;;
+ *)
+ ;;
+esac
+
+ganesha_nfsd_start()
+{
+ local long_host=$(hostname)
+
+ if [[ -d /var/lib/nfs ]]; then
+ mv /var/lib/nfs /var/lib/nfs.backup
+ if [ $? -ne 0 ]; then
+ ocf_log notice "mv /var/lib/nfs /var/lib/nfs.backup failed"
+ fi
+ ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs
+ if [ $? -ne 0 ]; then
+ ocf_log notice "ln -s ${OCF_RESKEY_ha_vol_mnt}/nfs-ganesha/${long_host}/nfs /var/lib/nfs failed"
+ fi
+ fi
+
+ return ${OCF_SUCCESS}
+}
+
+ganesha_nfsd_stop()
+{
+
+ if [ -L /var/lib/nfs -a -d /var/lib/nfs.backup ]; then
+ rm -f /var/lib/nfs
+ if [ $? -ne 0 ]; then
+ ocf_log notice "rm -f /var/lib/nfs failed"
+ fi
+ mv /var/lib/nfs.backup /var/lib/nfs
+ if [ $? -ne 0 ]; then
+ ocf_log notice "mv /var/lib/nfs.backup /var/lib/nfs failed"
+ fi
+ fi
+
+ return ${OCF_SUCCESS}
+}
+
+ganesha_nfsd_monitor()
+{
+ # pacemaker checks to see if RA is already running before starting it.
+ # if we return success, then it's presumed it's already running and
+ # doesn't need to be started, i.e. invoke the start action.
+ # return something other than success to make pacemaker invoke the
+ # start action
+ if [[ -L /var/lib/nfs ]]; then
+ return ${OCF_SUCCESS}
+ fi
+ return ${OCF_NOT_RUNNING}
+}
+
+ganesha_nfsd_validate()
+{
+ return ${OCF_SUCCESS}
+}
+
+ganesha_nfsd_validate
+
+# ocf_log notice "ganesha_nfsd ${OCF_RESOURCE_INSTANCE} $__OCF_ACTION"
+
+# Translate each action into the appropriate function call
+case $__OCF_ACTION in
+start) ganesha_nfsd_start
+ ;;
+stop) ganesha_nfsd_stop
+ ;;
+status|monitor) ganesha_nfsd_monitor
+ ;;
+*) ganesha_nfsd_usage
+ exit ${OCF_ERR_UNIMPLEMENTED}
+ ;;
+esac
+
+rc=$?
+
+# The resource agent may optionally log a debug message
+ocf_log debug "${OCF_RESOURCE_INSTANCE} ${__OCF_ACTION} returned $rc"
+exit $rc
diff --git a/extras/ganesha/scripts/Makefile.am b/extras/ganesha/scripts/Makefile.am
new file mode 100644
index 00000000000..7e345fd5f19
--- /dev/null
+++ b/extras/ganesha/scripts/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_DIST= create-export-ganesha.sh generate-epoch.py dbus-send.sh \
+ ganesha-ha.sh
+
+scriptsdir = $(libexecdir)/ganesha
+scripts_SCRIPTS = create-export-ganesha.sh dbus-send.sh generate-epoch.py \
+ ganesha-ha.sh
diff --git a/extras/ganesha/scripts/create-export-ganesha.sh b/extras/ganesha/scripts/create-export-ganesha.sh
new file mode 100755
index 00000000000..3040e8138b0
--- /dev/null
+++ b/extras/ganesha/scripts/create-export-ganesha.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+
+#This script is called by glusterd when the user
+#tries to export a volume via NFS-Ganesha.
+#An export file specific to a volume
+#is created in GANESHA_DIR/exports.
+
+# Try loading the config from any of the distro
+# specific configuration locations
+if [ -f /etc/sysconfig/ganesha ]
+ then
+ . /etc/sysconfig/ganesha
+fi
+if [ -f /etc/conf.d/ganesha ]
+ then
+ . /etc/conf.d/ganesha
+fi
+if [ -f /etc/default/ganesha ]
+ then
+ . /etc/default/ganesha
+fi
+
+GANESHA_DIR=${1%/}
+OPTION=$2
+VOL=$3
+CONF=$GANESHA_DIR"/ganesha.conf"
+declare -i EXPORT_ID
+
+function check_cmd_status()
+{
+ if [ "$1" != "0" ]
+ then
+ rm -rf $GANESHA_DIR/exports/export.$VOL.conf
+ sed -i /$VOL.conf/d $CONF
+ exit 1
+ fi
+}
+
+
+if [ ! -d "$GANESHA_DIR/exports" ];
+ then
+ mkdir $GANESHA_DIR/exports
+ check_cmd_status `echo $?`
+fi
+
+function write_conf()
+{
+echo -e "# WARNING : Using Gluster CLI will overwrite manual
+# changes made to this file. To avoid it, edit the
+# file and run ganesha-ha.sh --refresh-config."
+
+echo "EXPORT{"
+echo " Export_Id = 2;"
+echo " Path = \"/$VOL\";"
+echo " FSAL {"
+echo " name = "GLUSTER";"
+echo " hostname=\"localhost\";"
+echo " volume=\"$VOL\";"
+echo " }"
+echo " Access_type = RW;"
+echo " Disable_ACL = true;"
+echo ' Squash="No_root_squash";'
+echo " Pseudo=\"/$VOL\";"
+echo ' Protocols = "3", "4" ;'
+echo ' Transports = "UDP","TCP";'
+echo ' SecType = "sys";'
+echo ' Security_Label = False;'
+echo " }"
+}
+if [ "$OPTION" = "on" ];
+then
+ if ! (cat $CONF | grep $VOL.conf\"$ )
+ then
+ write_conf $@ > $GANESHA_DIR/exports/export.$VOL.conf
+ echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF
+ count=`ls -l $GANESHA_DIR/exports/*.conf | wc -l`
+ if [ "$count" = "1" ] ; then
+ EXPORT_ID=2
+ else
+ EXPORT_ID=`cat $GANESHA_DIR/.export_added`
+ check_cmd_status `echo $?`
+ EXPORT_ID=EXPORT_ID+1
+ sed -i s/Export_Id.*/"Export_Id= $EXPORT_ID ;"/ \
+ $GANESHA_DIR/exports/export.$VOL.conf
+ check_cmd_status `echo $?`
+ fi
+ echo $EXPORT_ID > $GANESHA_DIR/.export_added
+ fi
+else
+ rm -rf $GANESHA_DIR/exports/export.$VOL.conf
+ sed -i /$VOL.conf/d $CONF
+fi
diff --git a/extras/ganesha/scripts/dbus-send.sh b/extras/ganesha/scripts/dbus-send.sh
new file mode 100755
index 00000000000..9d613a0e7ad
--- /dev/null
+++ b/extras/ganesha/scripts/dbus-send.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# Try loading the config from any of the distro
+# specific configuration locations
+if [ -f /etc/sysconfig/ganesha ]
+ then
+ . /etc/sysconfig/ganesha
+fi
+if [ -f /etc/conf.d/ganesha ]
+ then
+ . /etc/conf.d/ganesha
+fi
+if [ -f /etc/default/ganesha ]
+ then
+ . /etc/default/ganesha
+fi
+
+GANESHA_DIR=${1%/}
+OPTION=$2
+VOL=$3
+CONF=$GANESHA_DIR"/ganesha.conf"
+
+function check_cmd_status()
+{
+ if [ "$1" != "0" ]
+ then
+ logger "dynamic export failed on node :${hostname -s}"
+ fi
+}
+
+#This function keeps track of export IDs and increments it with every new entry
+function dynamic_export_add()
+{
+ dbus-send --system \
+--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \
+org.ganesha.nfsd.exportmgr.AddExport string:$GANESHA_DIR/exports/export.$VOL.conf \
+string:"EXPORT(Path=/$VOL)"
+ check_cmd_status `echo $?`
+}
+
+#This function removes an export dynamically(uses the export_id of the export)
+function dynamic_export_remove()
+{
+ # Below bash fetch all the export from ShowExport command and search
+ # export entry based on path and then get its export entry.
+ # There are two possiblities for path, either entire volume will be
+ # exported or subdir. It handles both cases. But it remove only first
+ # entry from the list based on assumption that entry exported via cli
+ # has lowest export id value
+ removed_id=$(dbus-send --type=method_call --print-reply --system \
+ --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \
+ org.ganesha.nfsd.exportmgr.ShowExports | grep -B 1 -we \
+ "/"$VOL -e "/"$VOL"/" | grep uint16 | awk '{print $2}' \
+ | head -1)
+
+ dbus-send --print-reply --system \
+--dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \
+org.ganesha.nfsd.exportmgr.RemoveExport uint16:$removed_id
+ check_cmd_status `echo $?`
+}
+
+if [ "$OPTION" = "on" ];
+then
+ dynamic_export_add $@
+fi
+
+if [ "$OPTION" = "off" ];
+then
+ dynamic_export_remove $@
+fi
diff --git a/extras/ganesha/scripts/ganesha-ha.sh b/extras/ganesha/scripts/ganesha-ha.sh
new file mode 100644
index 00000000000..9790a719e10
--- /dev/null
+++ b/extras/ganesha/scripts/ganesha-ha.sh
@@ -0,0 +1,1199 @@
+#!/bin/bash
+
+# Copyright 2015-2016 Red Hat Inc. All Rights Reserved
+#
+# Pacemaker+Corosync High Availability for NFS-Ganesha
+#
+# setup, teardown, add, delete, refresh-config, and status
+#
+# Each participating node in the cluster is assigned a virtual IP (VIP)
+# which fails over to another node when its associated ganesha.nfsd dies
+# for any reason. After the VIP is moved to another node all the
+# ganesha.nfsds are send a signal using DBUS to put them into NFS GRACE.
+#
+# There are six resource agent types used: ganesha_mon, ganesha_grace,
+# ganesha_nfsd, IPaddr, and Dummy. ganesha_mon is used to monitor the
+# ganesha.nfsd. ganesha_grace is used to send the DBUS signal to put
+# the remaining ganesha.nfsds into grace. ganesha_nfsd is used to start
+# and stop the ganesha.nfsd during setup and teardown. IPaddr manages
+# the VIP. A Dummy resource named $hostname-trigger_ip-1 is used to
+# ensure that the NFS GRACE DBUS signal is sent after the VIP moves to
+# the new host.
+
+GANESHA_HA_SH=$(realpath $0)
+HA_NUM_SERVERS=0
+HA_SERVERS=""
+HA_VOL_NAME="gluster_shared_storage"
+HA_VOL_MNT="/run/gluster/shared_storage"
+HA_CONFDIR=$HA_VOL_MNT"/nfs-ganesha"
+SERVICE_MAN="DISTRO_NOT_FOUND"
+
+# rhel, fedora id, version
+ID=""
+VERSION_ID=""
+
+PCS9OR10_PCS_CNAME_OPTION=""
+PCS9OR10_PCS_CLONE_OPTION="clone"
+SECRET_PEM="/var/lib/glusterd/nfs/secret.pem"
+
+# UNBLOCK RA uses shared_storage which may become unavailable
+# during any of the nodes reboot. Hence increase timeout value.
+PORTBLOCK_UNBLOCK_TIMEOUT="60s"
+
+# Try loading the config from any of the distro
+# specific configuration locations
+if [ -f /etc/sysconfig/ganesha ]
+ then
+ . /etc/sysconfig/ganesha
+fi
+if [ -f /etc/conf.d/ganesha ]
+ then
+ . /etc/conf.d/ganesha
+fi
+if [ -f /etc/default/ganesha ]
+ then
+ . /etc/default/ganesha
+fi
+
+GANESHA_CONF=
+
+function find_rhel7_conf
+{
+ while [[ $# > 0 ]]
+ do
+ key="$1"
+ case $key in
+ -f)
+ CONFFILE="$2"
+ break;
+ ;;
+ *)
+ ;;
+ esac
+ shift
+ done
+}
+
+if [ -z ${CONFFILE} ]
+ then
+ find_rhel7_conf ${OPTIONS}
+
+fi
+
+GANESHA_CONF=${CONFFILE:-/etc/ganesha/ganesha.conf}
+
+usage() {
+
+ echo "Usage : add|delete|refresh-config|status"
+ echo "Add-node : ganesha-ha.sh --add <HA_CONF_DIR> \
+<NODE-HOSTNAME> <NODE-VIP>"
+ echo "Delete-node: ganesha-ha.sh --delete <HA_CONF_DIR> \
+<NODE-HOSTNAME>"
+ echo "Refresh-config : ganesha-ha.sh --refresh-config <HA_CONFDIR> \
+<volume>"
+ echo "Status : ganesha-ha.sh --status <HA_CONFDIR>"
+}
+
+determine_service_manager () {
+
+ if [ -e "/bin/systemctl" ];
+ then
+ SERVICE_MAN="/bin/systemctl"
+ elif [ -e "/sbin/invoke-rc.d" ];
+ then
+ SERVICE_MAN="/sbin/invoke-rc.d"
+ elif [ -e "/sbin/service" ];
+ then
+ SERVICE_MAN="/sbin/service"
+ fi
+ if [[ "${SERVICE_MAN}X" == "DISTRO_NOT_FOUNDX" ]]
+ then
+ logger "Service manager not recognized, exiting"
+ exit 1
+ fi
+}
+
+manage_service ()
+{
+ local action=${1}
+ local new_node=${2}
+ local option=
+
+ if [[ "${action}" == "start" ]]; then
+ option="yes"
+ else
+ option="no"
+ fi
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${new_node} "${GANESHA_HA_SH} --setup-ganesha-conf-files $HA_CONFDIR $option"
+
+ if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]
+ then
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${new_node} "${SERVICE_MAN} ${action} nfs-ganesha"
+ else
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${new_node} "${SERVICE_MAN} nfs-ganesha ${action}"
+ fi
+}
+
+
+check_cluster_exists()
+{
+ local name=${1}
+ local cluster_name=""
+
+ if [ -e /var/run/corosync.pid ]; then
+ cluster_name=$(pcs status | grep "Cluster name:" | cut -d ' ' -f 3)
+ if [[ "${cluster_name}X" == "${name}X" ]]; then
+ logger "$name already exists, exiting"
+ exit 0
+ fi
+ fi
+}
+
+
+determine_servers()
+{
+ local cmd=${1}
+ local num_servers=0
+ local tmp_ifs=${IFS}
+ local ha_servers=""
+
+ if [ "${cmd}X" != "setupX" -a "${cmd}X" != "statusX" ]; then
+ ha_servers=$(pcs status | grep "Online:" | grep -o '\[.*\]' | sed -e 's/\[//' | sed -e 's/\]//')
+ IFS=$' '
+ for server in ${ha_servers} ; do
+ num_servers=$(expr ${num_servers} + 1)
+ done
+ IFS=${tmp_ifs}
+ HA_NUM_SERVERS=${num_servers}
+ HA_SERVERS="${ha_servers}"
+ else
+ IFS=$','
+ for server in ${HA_CLUSTER_NODES} ; do
+ num_servers=$(expr ${num_servers} + 1)
+ done
+ IFS=${tmp_ifs}
+ HA_NUM_SERVERS=${num_servers}
+ HA_SERVERS="${HA_CLUSTER_NODES//,/ }"
+ fi
+}
+
+stop_ganesha_all()
+{
+ local serverlist=${1}
+ for node in ${serverlist} ; do
+ manage_service "stop" ${node}
+ done
+}
+
+setup_cluster()
+{
+ local name=${1}
+ local num_servers=${2}
+ local servers=${3}
+ local unclean=""
+ local quorum_policy="stop"
+
+ logger "setting up cluster ${name} with the following ${servers}"
+
+ # pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} ${servers}
+ pcs cluster setup --force ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers}
+ if [ $? -ne 0 ]; then
+ logger "pcs cluster setup ${PCS9OR10_PCS_CNAME_OPTION} ${name} --enable ${servers} failed, shutting down ganesha and bailing out"
+ #set up failed stop all ganesha process and clean up symlinks in cluster
+ stop_ganesha_all "${servers}"
+ exit 1;
+ fi
+
+ # pcs cluster auth ${servers}
+ pcs cluster auth
+ if [ $? -ne 0 ]; then
+ logger "pcs cluster auth failed"
+ fi
+
+ pcs cluster start --all
+ if [ $? -ne 0 ]; then
+ logger "pcs cluster start failed"
+ exit 1;
+ fi
+
+ sleep 1
+ # wait for the cluster to elect a DC before querying or writing
+ # to the CIB. BZ 1334092
+ crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1
+ while [ $? -ne 0 ]; do
+ crmadmin --dc_lookup --timeout=5000 > /dev/null 2>&1
+ done
+
+ unclean=$(pcs status | grep -u "UNCLEAN")
+ while [[ "${unclean}X" == "UNCLEANX" ]]; do
+ sleep 1
+ unclean=$(pcs status | grep -u "UNCLEAN")
+ done
+ sleep 1
+
+ if [ ${num_servers} -lt 3 ]; then
+ quorum_policy="ignore"
+ fi
+ pcs property set no-quorum-policy=${quorum_policy}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed"
+ fi
+
+ pcs property set stonith-enabled=false
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs property set stonith-enabled=false failed"
+ fi
+}
+
+
+setup_finalize_ha()
+{
+ local cibfile=${1}
+ local stopped=""
+
+ stopped=$(pcs status | grep -u "Stopped")
+ while [[ "${stopped}X" == "StoppedX" ]]; do
+ sleep 1
+ stopped=$(pcs status | grep -u "Stopped")
+ done
+}
+
+
+refresh_config ()
+{
+ local short_host=$(hostname -s)
+ local VOL=${1}
+ local HA_CONFDIR=${2}
+ local short_host=$(hostname -s)
+
+ local export_id=$(grep ^[[:space:]]*Export_Id $HA_CONFDIR/exports/export.$VOL.conf |\
+ awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]')
+
+
+ if [ -e ${SECRET_PEM} ]; then
+ while [[ ${3} ]]; do
+ current_host=`echo ${3} | cut -d "." -f 1`
+ if [[ ${short_host} != ${current_host} ]]; then
+ output=$(ssh -oPasswordAuthentication=no \
+-oStrictHostKeyChecking=no -i ${SECRET_PEM} root@${current_host} \
+"dbus-send --print-reply --system --dest=org.ganesha.nfsd \
+/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \
+string:$HA_CONFDIR/exports/export.$VOL.conf \
+string:\"EXPORT(Export_Id=$export_id)\" 2>&1")
+ ret=$?
+ logger <<< "${output}"
+ if [ ${ret} -ne 0 ]; then
+ echo "Refresh-config failed on ${current_host}. Please check logs on ${current_host}"
+ else
+ echo "Refresh-config completed on ${current_host}."
+ fi
+
+ fi
+ shift
+ done
+ else
+ echo "Error: refresh-config failed. Passwordless ssh is not enabled."
+ exit 1
+ fi
+
+ # Run the same command on the localhost,
+ output=$(dbus-send --print-reply --system --dest=org.ganesha.nfsd \
+/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.UpdateExport \
+string:$HA_CONFDIR/exports/export.$VOL.conf \
+string:"EXPORT(Export_Id=$export_id)" 2>&1)
+ ret=$?
+ logger <<< "${output}"
+ if [ ${ret} -ne 0 ] ; then
+ echo "Refresh-config failed on localhost."
+ else
+ echo "Success: refresh-config completed."
+ fi
+}
+
+
+teardown_cluster()
+{
+ local name=${1}
+
+ for server in ${HA_SERVERS} ; do
+ if [[ ${HA_CLUSTER_NODES} != *${server}* ]]; then
+ logger "info: ${server} is not in config, removing"
+
+ pcs cluster stop ${server} --force
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster stop ${server} failed"
+ fi
+
+ pcs cluster node remove ${server}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster node remove ${server} failed"
+ fi
+ fi
+ done
+
+ # BZ 1193433 - pcs doesn't reload cluster.conf after modification
+ # after teardown completes, a subsequent setup will appear to have
+ # 'remembered' the deleted node. You can work around this by
+ # issuing another `pcs cluster node remove $node`,
+ # `crm_node -f -R $server`, or
+ # `cibadmin --delete --xml-text '<node id="$server"
+ # uname="$server"/>'
+
+ pcs cluster stop --all
+ if [ $? -ne 0 ]; then
+ logger "warning pcs cluster stop --all failed"
+ fi
+
+ pcs cluster destroy
+ if [ $? -ne 0 ]; then
+ logger "error pcs cluster destroy failed"
+ exit 1
+ fi
+}
+
+
+cleanup_ganesha_config ()
+{
+ rm -f /etc/corosync/corosync.conf
+ rm -rf /etc/cluster/cluster.conf*
+ rm -rf /var/lib/pacemaker/cib/*
+ sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $HA_CONFDIR/ganesha.conf
+}
+
+do_create_virt_ip_constraints()
+{
+ local cibfile=${1}; shift
+ local primary=${1}; shift
+ local weight="1000"
+
+ # first a constraint location rule that says the VIP must be where
+ # there's a ganesha.nfsd running
+ pcs -f ${cibfile} constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint location ${primary}-group rule score=-INFINITY ganesha-active ne 1 failed"
+ fi
+
+ # then a set of constraint location prefers to set the prefered order
+ # for where a VIP should move
+ while [[ ${1} ]]; do
+ pcs -f ${cibfile} constraint location ${primary}-group prefers ${1}=${weight}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint location ${primary}-group prefers ${1}=${weight} failed"
+ fi
+ weight=$(expr ${weight} + 1000)
+ shift
+ done
+ # and finally set the highest preference for the VIP to its home node
+ # default weight when created is/was 100.
+ # on Fedora setting appears to be additive, so to get the desired
+ # value we adjust the weight
+ # weight=$(expr ${weight} - 100)
+ pcs -f ${cibfile} constraint location ${primary}-group prefers ${primary}=${weight}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint location ${primary}-group prefers ${primary}=${weight} failed"
+ fi
+}
+
+
+wrap_create_virt_ip_constraints()
+{
+ local cibfile=${1}; shift
+ local primary=${1}; shift
+ local head=""
+ local tail=""
+
+ # build a list of peers, e.g. for a four node cluster, for node1,
+ # the result is "node2 node3 node4"; for node2, "node3 node4 node1"
+ # and so on.
+ while [[ ${1} ]]; do
+ if [[ ${1} == ${primary} ]]; then
+ shift
+ while [[ ${1} ]]; do
+ tail=${tail}" "${1}
+ shift
+ done
+ else
+ head=${head}" "${1}
+ fi
+ shift
+ done
+ do_create_virt_ip_constraints ${cibfile} ${primary} ${tail} ${head}
+}
+
+
+create_virt_ip_constraints()
+{
+ local cibfile=${1}; shift
+
+ while [[ ${1} ]]; do
+ wrap_create_virt_ip_constraints ${cibfile} ${1} ${HA_SERVERS}
+ shift
+ done
+}
+
+
+setup_create_resources()
+{
+ local cibfile=$(mktemp -u)
+
+ # fixup /var/lib/nfs
+ logger "pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}"
+ pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource create nfs_setup ocf:heartbeat:ganesha_nfsd ha_vol_mnt=${HA_VOL_MNT} ${PCS9OR10_PCS_CLONE_OPTION} failed"
+ fi
+
+ pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource create nfs-mon ocf:heartbeat:ganesha_mon ${PCS9OR10_PCS_CLONE_OPTION} failed"
+ fi
+
+ # see comment in (/usr/lib/ocf/resource.d/heartbeat/ganesha_grace
+ # start method. Allow time for ganesha_mon to start and set the
+ # ganesha-active crm_attribute
+ sleep 5
+
+ pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} notify=true
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource create nfs-grace ocf:heartbeat:ganesha_grace ${PCS9OR10_PCS_CLONE_OPTION} failed"
+ fi
+
+ pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint location nfs-grace-clone rule score=-INFINITY grace-active ne 1"
+ fi
+
+ pcs cluster cib ${cibfile}
+
+ while [[ ${1} ]]; do
+
+ # this is variable indirection
+ # from a nvs like 'VIP_host1=10.7.6.5' or 'VIP_host1="10.7.6.5"'
+ # (or VIP_host-1=..., or VIP_host-1.my.domain.name=...)
+ # a variable 'clean_name' is created (e.g. w/ value 'VIP_host_1')
+ # and a clean nvs (e.g. w/ value 'VIP_host_1="10_7_6_5"')
+ # after the `eval ${clean_nvs}` there is a variable VIP_host_1
+ # with the value '10_7_6_5', and the following \$$ magic to
+ # reference it, i.e. `eval tmp_ipaddr=\$${clean_name}` gives us
+ # ${tmp_ipaddr} with 10_7_6_5 and then convert the _s back to .s
+ # to give us ipaddr="10.7.6.5". whew!
+ name="VIP_${1}"
+ clean_name=${name//[-.]/_}
+ nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf)
+ clean_nvs=${nvs//[-.]/_}
+ eval ${clean_nvs}
+ eval tmp_ipaddr=\$${clean_name}
+ ipaddr=${tmp_ipaddr//_/.}
+
+ pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \
+ portno=2049 action=block ip=${ipaddr} --group ${1}-group
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-nfs_block failed"
+ fi
+ pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+ cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+ cidr_netmask=32 op monitor interval=15s failed"
+ fi
+
+ pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed"
+ fi
+
+ pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \
+ portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \
+ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \
+ op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \
+ op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT}
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-nfs_unblock failed"
+ fi
+
+
+ shift
+ done
+
+ create_virt_ip_constraints ${cibfile} ${HA_SERVERS}
+
+ pcs cluster cib-push ${cibfile}
+ if [ $? -ne 0 ]; then
+ logger "warning pcs cluster cib-push ${cibfile} failed"
+ fi
+ rm -f ${cibfile}
+}
+
+
+teardown_resources()
+{
+ # local mntpt=$(grep ha-vol-mnt ${HA_CONFIG_FILE} | cut -d = -f 2)
+
+ # restore /var/lib/nfs
+ logger "notice: pcs resource delete nfs_setup-clone"
+ pcs resource delete nfs_setup-clone
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource delete nfs_setup-clone failed"
+ fi
+
+ # delete -clone resource agents
+ # in particular delete the ganesha monitor so we don't try to
+ # trigger anything when we shut down ganesha next.
+ pcs resource delete nfs-mon-clone
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource delete nfs-mon-clone failed"
+ fi
+
+ pcs resource delete nfs-grace-clone
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource delete nfs-grace-clone failed"
+ fi
+
+ while [[ ${1} ]]; do
+ pcs resource delete ${1}-group
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs resource delete ${1}-group failed"
+ fi
+ shift
+ done
+
+}
+
+
+recreate_resources()
+{
+ local cibfile=${1}; shift
+
+ while [[ ${1} ]]; do
+ # this is variable indirection
+ # see the comment on the same a few lines up
+ name="VIP_${1}"
+ clean_name=${name//[-.]/_}
+ nvs=$(grep "^${name}=" ${HA_CONFDIR}/ganesha-ha.conf)
+ clean_nvs=${nvs//[-.]/_}
+ eval ${clean_nvs}
+ eval tmp_ipaddr=\$${clean_name}
+ ipaddr=${tmp_ipaddr//_/.}
+
+ pcs -f ${cibfile} resource create ${1}-nfs_block ocf:heartbeat:portblock protocol=tcp \
+ portno=2049 action=block ip=${ipaddr} --group ${1}-group
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-nfs_block failed"
+ fi
+ pcs -f ${cibfile} resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+ cidr_netmask=32 op monitor interval=15s --group ${1}-group --after ${1}-nfs_block
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-cluster_ip-1 ocf:heartbeat:IPaddr ip=${ipaddr} \
+ cidr_netmask=32 op monitor interval=15s failed"
+ fi
+
+ pcs -f ${cibfile} constraint order nfs-grace-clone then ${1}-cluster_ip-1
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint order nfs-grace-clone then ${1}-cluster_ip-1 failed"
+ fi
+
+ pcs -f ${cibfile} resource create ${1}-nfs_unblock ocf:heartbeat:portblock protocol=tcp \
+ portno=2049 action=unblock ip=${ipaddr} reset_local_on_unblock_stop=true \
+ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${1}-group --after ${1}-cluster_ip-1 \
+ op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} \
+ op monitor interval=10s timeout=${PORTBLOCK_UNBLOCK_TIMEOUT}
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${1}-nfs_unblock failed"
+ fi
+
+ shift
+ done
+}
+
+
+addnode_recreate_resources()
+{
+ local cibfile=${1}; shift
+ local add_node=${1}; shift
+ local add_vip=${1}; shift
+
+ recreate_resources ${cibfile} ${HA_SERVERS}
+
+ pcs -f ${cibfile} resource create ${add_node}-nfs_block ocf:heartbeat:portblock \
+ protocol=tcp portno=2049 action=block ip=${add_vip} --group ${add_node}-group
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${add_node}-nfs_block failed"
+ fi
+ pcs -f ${cibfile} resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \
+ ip=${add_vip} cidr_netmask=32 op monitor interval=15s --group ${add_node}-group \
+ --after ${add_node}-nfs_block
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${add_node}-cluster_ip-1 ocf:heartbeat:IPaddr \
+ ip=${add_vip} cidr_netmask=32 op monitor interval=15s failed"
+ fi
+
+ pcs -f ${cibfile} constraint order nfs-grace-clone then ${add_node}-cluster_ip-1
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs constraint order nfs-grace-clone then ${add_node}-cluster_ip-1 failed"
+ fi
+ pcs -f ${cibfile} resource create ${add_node}-nfs_unblock ocf:heartbeat:portblock \
+ protocol=tcp portno=2049 action=unblock ip=${add_vip} reset_local_on_unblock_stop=true \
+ tickle_dir=${HA_VOL_MNT}/nfs-ganesha/tickle_dir/ --group ${add_node}-group --after \
+ ${add_node}-cluster_ip-1 op stop timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op start \
+ timeout=${PORTBLOCK_UNBLOCK_TIMEOUT} op monitor interval=10s \
+ timeout=${PORTBLOCK_UNBLOCK_TIMEOUT}
+ if [ $? -ne 0 ]; then
+ logger "warning pcs resource create ${add_node}-nfs_unblock failed"
+ fi
+}
+
+
+clear_resources()
+{
+ local cibfile=${1}; shift
+
+ while [[ ${1} ]]; do
+ pcs -f ${cibfile} resource delete ${1}-group
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs -f ${cibfile} resource delete ${1}-group"
+ fi
+
+ shift
+ done
+}
+
+
+addnode_create_resources()
+{
+ local add_node=${1}; shift
+ local add_vip=${1}; shift
+ local cibfile=$(mktemp -u)
+
+ # start HA on the new node
+ pcs cluster start ${add_node}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster start ${add_node} failed"
+ fi
+
+ pcs cluster cib ${cibfile}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster cib ${cibfile} failed"
+ fi
+
+ # delete all the -cluster_ip-1 resources, clearing
+ # their constraints, then create them again so we can
+ # recompute their constraints
+ clear_resources ${cibfile} ${HA_SERVERS}
+ addnode_recreate_resources ${cibfile} ${add_node} ${add_vip}
+
+ HA_SERVERS="${HA_SERVERS} ${add_node}"
+ create_virt_ip_constraints ${cibfile} ${HA_SERVERS}
+
+ pcs cluster cib-push ${cibfile}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster cib-push ${cibfile} failed"
+ fi
+ rm -f ${cibfile}
+}
+
+
+deletenode_delete_resources()
+{
+ local node=${1}; shift
+ local ha_servers=$(echo "${HA_SERVERS}" | sed s/${node}//)
+ local cibfile=$(mktemp -u)
+
+ pcs cluster cib ${cibfile}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster cib ${cibfile} failed"
+ fi
+
+ # delete all the -cluster_ip-1 and -trigger_ip-1 resources,
+ # clearing their constraints, then create them again so we can
+ # recompute their constraints
+ clear_resources ${cibfile} ${HA_SERVERS}
+ recreate_resources ${cibfile} ${ha_servers}
+ HA_SERVERS=$(echo "${ha_servers}" | sed -e "s/ / /")
+
+ create_virt_ip_constraints ${cibfile} ${HA_SERVERS}
+
+ pcs cluster cib-push ${cibfile}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster cib-push ${cibfile} failed"
+ fi
+ rm -f ${cibfile}
+
+}
+
+
+deletenode_update_haconfig()
+{
+ local name="VIP_${1}"
+ local clean_name=${name//[-.]/_}
+
+ ha_servers=$(echo ${HA_SERVERS} | sed -e "s/ /,/")
+ sed -i -e "s/^HA_CLUSTER_NODES=.*$/HA_CLUSTER_NODES=\"${ha_servers// /,}\"/" -e "s/^${name}=.*$//" -e "/^$/d" ${HA_CONFDIR}/ganesha-ha.conf
+}
+
+
+setup_state_volume()
+{
+ local mnt=${HA_VOL_MNT}
+ local longname=""
+ local shortname=""
+ local dname=""
+ local dirname=""
+
+ longname=$(hostname)
+ dname=${longname#$(hostname -s)}
+
+ while [[ ${1} ]]; do
+
+ if [[ ${1} == *${dname} ]]; then
+ dirname=${1}
+ else
+ dirname=${1}${dname}
+ fi
+
+ if [ ! -d ${mnt}/nfs-ganesha/tickle_dir ]; then
+ mkdir ${mnt}/nfs-ganesha/tickle_dir
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/state
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state
+ fi
+ for server in ${HA_SERVERS} ; do
+ if [[ ${server} != ${dirname} ]]; then
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server}
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server}
+ fi
+ done
+ shift
+ done
+
+}
+
+
+enable_pacemaker()
+{
+ while [[ ${1} ]]; do
+ if [[ "${SERVICE_MAN}" == "/bin/systemctl" ]]; then
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${1} "${SERVICE_MAN} enable pacemaker"
+ else
+ ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i \
+${SECRET_PEM} root@${1} "${SERVICE_MAN} pacemaker enable"
+ fi
+ shift
+ done
+}
+
+
+addnode_state_volume()
+{
+ local newnode=${1}; shift
+ local mnt=${HA_VOL_MNT}
+ local longname=""
+ local dname=""
+ local dirname=""
+
+ longname=$(hostname)
+ dname=${longname#$(hostname -s)}
+
+ if [[ ${newnode} == *${dname} ]]; then
+ dirname=${newnode}
+ else
+ dirname=${newnode}${dname}
+ fi
+
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname} ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/state
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/state
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4recov
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/v4old
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm
+ fi
+ if [ ! -d ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak ]; then
+ mkdir ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
+ chown rpcuser:rpcuser ${mnt}/nfs-ganesha/${dirname}/nfs/statd/sm.bak
+ fi
+ if [ ! -e ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state ]; then
+ touch ${mnt}/nfs-ganesha/${dirname}/nfs/statd/state
+ fi
+
+ for server in ${HA_SERVERS} ; do
+
+ if [[ ${server} != ${dirname} ]]; then
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/ganesha ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha/${server}
+ ln -s ${mnt}/nfs-ganesha/${server}/nfs/statd ${mnt}/nfs-ganesha/${dirname}/nfs/statd/${server}
+
+ ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/ganesha ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname}
+ ln -s ${mnt}/nfs-ganesha/${dirname}/nfs/statd ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname}
+ fi
+ done
+
+}
+
+
+delnode_state_volume()
+{
+ local delnode=${1}; shift
+ local mnt=${HA_VOL_MNT}
+ local longname=""
+ local dname=""
+ local dirname=""
+
+ longname=$(hostname)
+ dname=${longname#$(hostname -s)}
+
+ if [[ ${delnode} == *${dname} ]]; then
+ dirname=${delnode}
+ else
+ dirname=${delnode}${dname}
+ fi
+
+ rm -rf ${mnt}/nfs-ganesha/${dirname}
+
+ for server in ${HA_SERVERS} ; do
+ if [[ ${server} != ${dirname} ]]; then
+ rm -f ${mnt}/nfs-ganesha/${server}/nfs/ganesha/${dirname}
+ rm -f ${mnt}/nfs-ganesha/${server}/nfs/statd/${dirname}
+ fi
+ done
+}
+
+
+status()
+{
+ local scratch=$(mktemp)
+ local regex_str="^${1}-cluster_ip-1"
+ local healthy=0
+ local index=1
+ local nodes
+
+ # change tabs to spaces, strip leading spaces, including any
+ # new '*' at the beginning of a line introduced in pcs-0.10.x
+ pcs status | sed -e "s/\t/ /g" -e "s/^[ ]*\*//" -e "s/^[ ]*//" > ${scratch}
+
+ nodes[0]=${1}; shift
+
+ # make a regex of the configured nodes
+ # and initalize the nodes array for later
+ while [[ ${1} ]]; do
+
+ regex_str="${regex_str}|^${1}-cluster_ip-1"
+ nodes[${index}]=${1}
+ ((index++))
+ shift
+ done
+
+ # print the nodes that are expected to be online
+ grep -E "Online:" ${scratch}
+
+ echo
+
+ # print the VIPs and which node they are on
+ grep -E "${regex_str}" < ${scratch} | cut -d ' ' -f 1,4
+
+ echo
+
+ # check if the VIP and port block/unblock RAs are on the expected nodes
+ for n in ${nodes[*]}; do
+
+ grep -E -x "${n}-nfs_block \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch}
+ result=$?
+ ((healthy+=${result}))
+ grep -E -x "${n}-cluster_ip-1 \(ocf::heartbeat:IPaddr\): Started ${n}" > /dev/null 2>&1 ${scratch}
+ result=$?
+ ((healthy+=${result}))
+ grep -E -x "${n}-nfs_unblock \(ocf::heartbeat:portblock\): Started ${n}" > /dev/null 2>&1 ${scratch}
+ result=$?
+ ((healthy+=${result}))
+ done
+
+ grep -E "\):\ Stopped|FAILED" > /dev/null 2>&1 ${scratch}
+ result=$?
+
+ if [ ${result} -eq 0 ]; then
+ echo "Cluster HA Status: BAD"
+ elif [ ${healthy} -eq 0 ]; then
+ echo "Cluster HA Status: HEALTHY"
+ else
+ echo "Cluster HA Status: FAILOVER"
+ fi
+
+ rm -f ${scratch}
+}
+
+create_ganesha_conf_file()
+{
+ if [[ "$1" == "yes" ]];
+ then
+ if [ -e $GANESHA_CONF ];
+ then
+ rm -rf $GANESHA_CONF
+ fi
+ # The symlink /etc/ganesha/ganesha.conf need to be
+ # created using ganesha conf file mentioned in the
+ # shared storage. Every node will only have this
+ # link and actual file will stored in shared storage,
+ # so that ganesha conf editing of ganesha conf will
+ # be easy as well as it become more consistent.
+
+ ln -s $HA_CONFDIR/ganesha.conf $GANESHA_CONF
+ else
+ # Restoring previous file
+ rm -rf $GANESHA_CONF
+ cp $HA_CONFDIR/ganesha.conf $GANESHA_CONF
+ sed -r -i -e '/^%include[[:space:]]+".+\.conf"$/d' $GANESHA_CONF
+ fi
+}
+
+set_quorum_policy()
+{
+ local quorum_policy="stop"
+ local num_servers=${1}
+
+ if [ ${num_servers} -lt 3 ]; then
+ quorum_policy="ignore"
+ fi
+ pcs property set no-quorum-policy=${quorum_policy}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs property set no-quorum-policy=${quorum_policy} failed"
+ fi
+}
+
+main()
+{
+
+ local cmd=${1}; shift
+ if [[ ${cmd} == *help ]]; then
+ usage
+ exit 0
+ fi
+
+ if (selinuxenabled) ;then
+ semanage boolean -m gluster_use_execmem --on
+ fi
+
+ local osid=""
+
+ osid=$(grep ^ID= /etc/os-release)
+ eval $(echo ${osid} | grep -F ID=)
+ osid=$(grep ^VERSION_ID= /etc/os-release)
+ eval $(echo ${osid} | grep -F VERSION_ID=)
+
+ HA_CONFDIR=${1%/}; shift
+ local ha_conf=${HA_CONFDIR}/ganesha-ha.conf
+ local node=""
+ local vip=""
+
+ # ignore any comment lines
+ cfgline=$(grep ^HA_NAME= ${ha_conf})
+ eval $(echo ${cfgline} | grep -F HA_NAME=)
+ cfgline=$(grep ^HA_CLUSTER_NODES= ${ha_conf})
+ eval $(echo ${cfgline} | grep -F HA_CLUSTER_NODES=)
+
+ case "${cmd}" in
+
+ setup | --setup)
+ logger "setting up ${HA_NAME}"
+
+ check_cluster_exists ${HA_NAME}
+
+ determine_servers "setup"
+
+ # Fedora 29+ and rhel/centos 8 has PCS-0.10.x
+ # default is pcs-0.10.x options but check for
+ # rhel/centos 7 (pcs-0.9.x) and adjust accordingly
+ if [[ ! ${ID} =~ {rhel,centos} ]]; then
+ if [[ ${VERSION_ID} == 7.* ]]; then
+ PCS9OR10_PCS_CNAME_OPTION="--name"
+ PCS9OR10_PCS_CLONE_OPTION="--clone"
+ fi
+ fi
+
+ if [[ "${HA_NUM_SERVERS}X" != "1X" ]]; then
+
+ determine_service_manager
+
+ setup_cluster ${HA_NAME} ${HA_NUM_SERVERS} "${HA_SERVERS}"
+
+ setup_create_resources ${HA_SERVERS}
+
+ setup_finalize_ha
+
+ setup_state_volume ${HA_SERVERS}
+
+ enable_pacemaker ${HA_SERVERS}
+
+ else
+
+ logger "insufficient servers for HA, aborting"
+ fi
+ ;;
+
+ teardown | --teardown)
+ logger "tearing down ${HA_NAME}"
+
+ determine_servers "teardown"
+
+ teardown_resources ${HA_SERVERS}
+
+ teardown_cluster ${HA_NAME}
+
+ cleanup_ganesha_config ${HA_CONFDIR}
+ ;;
+
+ cleanup | --cleanup)
+ cleanup_ganesha_config ${HA_CONFDIR}
+ ;;
+
+ add | --add)
+ node=${1}; shift
+ vip=${1}; shift
+
+ logger "adding ${node} with ${vip} to ${HA_NAME}"
+
+ determine_service_manager
+
+ manage_service "start" ${node}
+
+ determine_servers "add"
+
+ pcs cluster node add ${node}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster node add ${node} failed"
+ fi
+
+ addnode_create_resources ${node} ${vip}
+ # Subsequent add-node recreates resources for all the nodes
+ # that already exist in the cluster. The nodes are picked up
+ # from the entries in the ganesha-ha.conf file. Adding the
+ # newly added node to the file so that the resources specfic
+ # to this node is correctly recreated in the future.
+ clean_node=${node//[-.]/_}
+ echo "VIP_${node}=\"${vip}\"" >> ${HA_CONFDIR}/ganesha-ha.conf
+
+ NEW_NODES="$HA_CLUSTER_NODES,${node}"
+
+ sed -i s/HA_CLUSTER_NODES.*/"HA_CLUSTER_NODES=\"$NEW_NODES\""/ \
+$HA_CONFDIR/ganesha-ha.conf
+
+ addnode_state_volume ${node}
+
+ # addnode_create_resources() already appended ${node} to
+ # HA_SERVERS, so only need to increment HA_NUM_SERVERS
+ # and set quorum policy
+ HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} + 1)
+ set_quorum_policy ${HA_NUM_SERVERS}
+ ;;
+
+ delete | --delete)
+ node=${1}; shift
+
+ logger "deleting ${node} from ${HA_NAME}"
+
+ determine_servers "delete"
+
+ deletenode_delete_resources ${node}
+
+ pcs cluster node remove ${node}
+ if [ $? -ne 0 ]; then
+ logger "warning: pcs cluster node remove ${node} failed"
+ fi
+
+ deletenode_update_haconfig ${node}
+
+ delnode_state_volume ${node}
+
+ determine_service_manager
+
+ manage_service "stop" ${node}
+
+ HA_NUM_SERVERS=$(expr ${HA_NUM_SERVERS} - 1)
+ set_quorum_policy ${HA_NUM_SERVERS}
+ ;;
+
+ status | --status)
+ determine_servers "status"
+
+ status ${HA_SERVERS}
+ ;;
+
+ refresh-config | --refresh-config)
+ VOL=${1}
+
+ determine_servers "refresh-config"
+
+ refresh_config ${VOL} ${HA_CONFDIR} ${HA_SERVERS}
+ ;;
+
+ setup-ganesha-conf-files | --setup-ganesha-conf-files)
+
+ create_ganesha_conf_file ${1}
+ ;;
+
+ *)
+ # setup and teardown are not intended to be used by a
+ # casual user
+ usage
+ logger "Usage: ganesha-ha.sh add|delete|status"
+ ;;
+
+ esac
+
+ if (selinuxenabled) ;then
+ semanage boolean -m gluster_use_execmem --off
+ fi
+}
+
+main $*
diff --git a/extras/ganesha/scripts/generate-epoch.py b/extras/ganesha/scripts/generate-epoch.py
new file mode 100755
index 00000000000..77af014bab9
--- /dev/null
+++ b/extras/ganesha/scripts/generate-epoch.py
@@ -0,0 +1,48 @@
+#!/usr/bin/python3
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+# Generates unique epoch value on each gluster node to be used by
+# nfs-ganesha service on that node.
+#
+# Configure 'EPOCH_EXEC' option to this script path in
+# '/etc/sysconfig/ganesha' file used by nfs-ganesha service.
+#
+# Construct epoch as follows -
+# first 32-bit contains the now() time
+# rest 32-bit value contains the local glusterd node uuid
+
+import time
+import binascii
+
+# Calculate the now() time into a 64-bit integer value
+def epoch_now():
+ epoch_time = int(time.mktime(time.localtime())) << 32
+ return epoch_time
+
+# Read glusterd UUID and extract first 32-bit of it
+def epoch_uuid():
+ file_name = '/var/lib/glusterd/glusterd.info'
+
+ for line in open(file_name):
+ if "UUID" in line:
+ glusterd_uuid = line.split('=')[1].strip()
+
+ uuid_bin = binascii.unhexlify(glusterd_uuid.replace("-",""))
+
+ epoch_uuid = int(binascii.hexlify(uuid_bin), 32) & 0xFFFF0000
+ return epoch_uuid
+
+# Construct epoch as follows -
+# first 32-bit contains the now() time
+# rest 32-bit value contains the local glusterd node uuid
+epoch = (epoch_now() | epoch_uuid())
+print((str(epoch)))
+
+exit(0)
diff --git a/extras/generate-xdr-files.sh b/extras/generate-xdr-files.sh
deleted file mode 100755
index e52321cd362..00000000000
--- a/extras/generate-xdr-files.sh
+++ /dev/null
@@ -1,107 +0,0 @@
-#!/bin/sh
-
-_init ()
-{
- xfile="$1";
- # TODO: check the validity of .x file
-
- cfile="${1%.x}.c";
- hfile="${1%.x}.h";
-
- tmp_cfile="$1.c";
-
- tmp1_hfile="$1.h.tmp";
- tmp1_cfile="$1.c.tmp";
-
-}
-
-append_licence_header ()
-{
- src_file=$1;
- dst_file=$2;
-
- cat >$dst_file <<EOF
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-EOF
-
- cat $src_file >> $dst_file;
-
-}
-
-main ()
-{
- if [ $# -ne 1 ]; then
- echo "wrong number of arguments given"
- echo " $0 <XDR-definition-file>.x"
- exit 1;
- fi
-
-
- echo -n "writing the XDR routine file ($tmp_cfile) ... ";
- rm -f $tmp_cfile;
- rpcgen -c -o $tmp_cfile $xfile;
-
- # get rid of warnings in xdr .c file due to "unused variable 'buf'"
- sed -i -e 's:buf;$:buf;\
- buf = NULL;:' $tmp_cfile;
-
- sed -i '/int i;/d' $tmp_cfile;
-
- echo "OK";
-
- # no need for a temporary file here as there are no changes from glusterfs
- echo -n "writing the XDR header file ($hfile) ... ";
- rm -f $hfile;
- rpcgen -h -o $hfile $xfile;
-
- # the '#ifdef' part of file should be fixed
- sed -i -e 's/-/_/g' $hfile;
-
- echo "OK";
-
- echo -n "writing licence header to the generated files ... ";
- # Write header to temp file and append generated file
- append_licence_header $hfile $tmp1_hfile;
- append_licence_header $tmp_cfile $tmp1_cfile;
- echo "OK"
-
- # now move the destination file to actual original file
- echo -n "updating existing files ... ";
- mv $tmp1_hfile $hfile;
- mv $tmp1_cfile $cfile;
-
- # remove unwanted temporary files (if any)
- rm -f $tmp_cfile $tmp1_cfile $tmp1_hfile
-
- echo "OK"
-
-}
-
-_init "$@" && main "$@";
diff --git a/extras/geo-rep/Makefile.am b/extras/geo-rep/Makefile.am
new file mode 100644
index 00000000000..09eff308ac4
--- /dev/null
+++ b/extras/geo-rep/Makefile.am
@@ -0,0 +1,16 @@
+scriptsdir = $(libexecdir)/glusterfs/scripts
+scripts_SCRIPTS = gsync-upgrade.sh generate-gfid-file.sh get-gfid.sh \
+ slave-upgrade.sh schedule_georep.py
+
+scripts_PROGRAMS = gsync-sync-gfid
+gsync_sync_gfid_CFLAGS = $(GF_CFLAGS) -Wall -I$(top_srcdir)/libglusterfs/src
+gsync_sync_gfid_LDFLAGS = $(GF_LDFLAGS)
+gsync_sync_gfid_LDADD = $(GF_LDADD) $(top_builddir)/libglusterfs/src/libglusterfs.la
+gsync_sync_gfid_SOURCES = gsync-sync-gfid.c
+gsync_sync_gfid_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+EXTRA_DIST = gsync-sync-gfid.c gsync-upgrade.sh generate-gfid-file.sh \
+ get-gfid.sh slave-upgrade.sh schedule_georep.py.in
+
+CLEANFILES = schedule_georep.py
diff --git a/extras/geo-rep/generate-gfid-file.sh b/extras/geo-rep/generate-gfid-file.sh
new file mode 100644
index 00000000000..14f104b986d
--- /dev/null
+++ b/extras/geo-rep/generate-gfid-file.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+#Usage: generate-gfid-file.sh <master-volfile-server:master-volume> <path-to-get-gfid.sh> <output-file> [dirs-list-file]
+
+function get_gfids()
+{
+ GET_GFID_CMD=$1
+ OUTPUT_FILE=$2
+ DIR_PATH=$3
+ find "$DIR_PATH" -exec $GET_GFID_CMD {} \; >> $OUTPUT_FILE
+}
+
+function mount_client()
+{
+ local T; # temporary mount
+ local i; # inode number
+
+ VOLFILE_SERVER=$1;
+ VOLUME=$2;
+ GFID_CMD=$3;
+ OUTPUT=$4;
+
+ T=$(mktemp -d -t ${0##*/}.XXXXXX);
+
+ glusterfs -s $VOLFILE_SERVER --volfile-id $VOLUME $T;
+
+ i=$(stat -c '%i' $T);
+
+ [ "x$i" = "x1" ] || fatal "could not mount volume $MASTER on $T";
+
+ cd $T;
+ rm -f $OUTPUT;
+ touch $OUTPUT;
+
+ if [ "$DIRS_FILE" = "." ]
+ then
+ get_gfids $GFID_CMD $OUTPUT "."
+ else
+ while read line
+ do
+ get_gfids $GFID_CMD $OUTPUT "$line"
+ done < $DIRS_FILE
+ fi;
+
+ cd -;
+
+ umount $T || fatal "could not umount $MASTER from $T";
+
+ rmdir $T || warn "rmdir of $T failed";
+}
+
+
+function main()
+{
+ SLAVE=$1
+ GET_GFID_CMD=$2
+ OUTPUT=$3
+
+ VOLFILE_SERVER=`echo $SLAVE | sed -e 's/\(.*\):.*/\1/'`
+ VOLUME_NAME=`echo $SLAVE | sed -e 's/.*:\(.*\)/\1/'`
+
+ if [ "$#" -lt 4 ]
+ then
+ DIRS_FILE="."
+ else
+ DIRS_FILE=$4
+ fi
+ mount_client $VOLFILE_SERVER $VOLUME_NAME $GET_GFID_CMD $OUTPUT $DIRS_FILE
+}
+
+main "$@";
diff --git a/extras/geo-rep/get-gfid.sh b/extras/geo-rep/get-gfid.sh
new file mode 100755
index 00000000000..a4d609b0bc5
--- /dev/null
+++ b/extras/geo-rep/get-gfid.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+ATTR_STR=`getfattr -h $1 -n glusterfs.gfid.string`
+GLFS_PATH=`echo $ATTR_STR | sed -e 's/# file: \(.*\) glusterfs.gfid.string*/\1/g'`
+GFID=`echo $ATTR_STR | sed -e 's/.*glusterfs.gfid.string="\(.*\)"/\1/g'`
+
+echo "$GFID $GLFS_PATH"
diff --git a/extras/geo-rep/gsync-sync-gfid.c b/extras/geo-rep/gsync-sync-gfid.c
new file mode 100644
index 00000000000..47dca0413e9
--- /dev/null
+++ b/extras/geo-rep/gsync-sync-gfid.c
@@ -0,0 +1,109 @@
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <limits.h>
+#include <sys/types.h>
+#include <libgen.h>
+#include <ctype.h>
+#include <stdlib.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/syscall.h>
+
+#ifndef UUID_CANONICAL_FORM_LEN
+#define UUID_CANONICAL_FORM_LEN 36
+#endif
+
+#ifndef GF_FUSE_AUX_GFID_HEAL
+#define GF_FUSE_AUX_GFID_HEAL "glusterfs.gfid.heal"
+#endif
+
+#define GLFS_LINE_MAX (PATH_MAX + (2 * UUID_CANONICAL_FORM_LEN))
+
+int
+main(int argc, char *argv[])
+{
+ char *file = NULL;
+ char *tmp = NULL;
+ char *tmp1 = NULL;
+ char *parent_dir = NULL;
+ char *gfid = NULL;
+ char *bname = NULL;
+ int ret = -1;
+ int len = 0;
+ FILE *fp = NULL;
+ char line[GLFS_LINE_MAX] = {
+ 0,
+ };
+ char *path = NULL;
+ void *blob = NULL;
+ void *tmp_blob = NULL;
+
+ if (argc != 2) {
+ /* each line in the file has the following format
+ * uuid-in-canonical-form path-relative-to-gluster-mount.
+ * Both uuid and relative path are from master mount.
+ */
+ fprintf(stderr, "usage: %s <file-of-paths-to-be-synced>\n", argv[0]);
+ goto out;
+ }
+
+ file = argv[1];
+
+ fp = fopen(file, "r");
+ if (fp == NULL) {
+ fprintf(stderr, "cannot open %s for reading (%s)\n", file,
+ strerror(errno));
+ goto out;
+ }
+
+ while (fgets(line, GLFS_LINE_MAX, fp) != NULL) {
+ tmp = line;
+ path = gfid = line;
+
+ path += UUID_CANONICAL_FORM_LEN + 1;
+
+ while (isspace(*path))
+ path++;
+
+ len = strlen(line);
+ if ((len < GLFS_LINE_MAX) && (line[len - 1] == '\n'))
+ line[len - 1] = '\0';
+
+ line[UUID_CANONICAL_FORM_LEN] = '\0';
+
+ tmp = strdup(path);
+ tmp1 = strdup(path);
+ parent_dir = dirname(tmp);
+ bname = basename(tmp1);
+
+ /* gfid + '\0' + bname + '\0' */
+ len = UUID_CANONICAL_FORM_LEN + 1 + strlen(bname) + 1;
+
+ blob = malloc(len);
+
+ memcpy(blob, gfid, UUID_CANONICAL_FORM_LEN);
+
+ tmp_blob = blob + UUID_CANONICAL_FORM_LEN + 1;
+
+ memcpy(tmp_blob, bname, strlen(bname));
+
+ ret = sys_lsetxattr(parent_dir, GF_FUSE_AUX_GFID_HEAL, blob, len, 0);
+ if (ret < 0) {
+ fprintf(stderr, "setxattr on %s/%s failed (%s)\n", parent_dir,
+ bname, strerror(errno));
+ }
+ memset(line, 0, GLFS_LINE_MAX);
+
+ free(blob);
+ free(tmp);
+ free(tmp1);
+ blob = NULL;
+ }
+
+ ret = 0;
+out:
+ if (fp)
+ fclose(fp);
+ return ret;
+}
diff --git a/extras/geo-rep/gsync-upgrade.sh b/extras/geo-rep/gsync-upgrade.sh
new file mode 100644
index 00000000000..0f73a33884b
--- /dev/null
+++ b/extras/geo-rep/gsync-upgrade.sh
@@ -0,0 +1,123 @@
+#!/bin/bash
+#usage: gsync-upgrade.sh <slave-volfile-server:slave-volume> <gfid-file>
+# <path-to-gsync-sync-gfid> <ssh-identity-file>
+#<slave-volfile-server>: a machine on which gluster cli can fetch slave volume info.
+# slave-volfile-server defaults to localhost.
+#
+#<gfid-file>: a file containing paths and their associated gfids
+# on master. The paths are relative to master mount point
+# (not absolute). An example extract of <gfid-file> can be,
+#
+# <extract>
+# 22114455-57c5-46e9-a783-c40f83a72b09 /dir
+# 25772386-3eb8-4550-a802-c3fdc938ca80 /dir/file
+# </extract>
+#
+#<ssh-identity-file>: file from which the identity (private key) for public key authentication is read.
+
+SLAVE_MOUNT='/tmp/glfs_slave'
+
+function SSH()
+{
+ HOST=$1
+ SSHKEY=$2
+
+ shift 2
+
+ ssh -qi $SSHKEY \
+ -oPasswordAuthentication=no \
+ -oStrictHostKeyChecking=no \
+ "$HOST" "$@";
+}
+
+function get_bricks()
+{
+ SSHKEY=$3
+
+ SSH $1 $SSHKEY "gluster volume info $2" | grep -E 'Brick[0-9]+' | sed -e 's/[^:]*:\(.*\)/\1/g'
+}
+
+function cleanup_brick()
+{
+ HOST=$1
+ BRICK=$2
+ SSHKEY=$3
+
+ # TODO: write a C program to receive a list of files and does cleanup on
+ # them instead of spawning a new setfattr process for each file if
+ # performance is bad.
+ SSH -i $SSHKEY $HOST "rm -rf $BRICK/.glusterfs/* && find $BRICK -exec setfattr -x trusted.gfid {} \;"
+}
+
+function cleanup_slave()
+{
+ SSHKEY=$2
+
+ VOLFILE_SERVER=`echo $1 | sed -e 's/\(.*\):.*/\1/'`
+ VOLUME_NAME=`echo $1 | sed -e 's/.*:\(.*\)/\1/'`
+
+ BRICKS=`get_bricks $VOLFILE_SERVER $VOLUME_NAME $SSHKEY`
+
+ for i in $BRICKS; do
+ HOST=`echo $i | sed -e 's/\(.*\):.*/\1/'`
+ BRICK=`echo $i | sed -e 's/.*:\(.*\)/\1/'`
+ cleanup_brick $HOST $BRICK $SSHKEY
+ done
+
+ SSH -i $SSHKEY $VOLFILE_SERVER "gluster --mode=script volume stop $VOLUME_NAME; gluster volume start $VOLUME_NAME";
+
+}
+
+function mount_client()
+{
+ local T; # temporary mount
+ local i; # inode number
+ GFID_FILE=$3
+ SYNC_CMD=$4
+
+ T=$(mktemp -d -t ${0##*/}.XXXXXX);
+
+ glusterfs --aux-gfid-mount -s $1 --volfile-id $2 $T;
+
+ i=$(stat -c '%i' $T);
+
+ [ "x$i" = "x1" ] || fatal "could not mount volume $MASTER on $T";
+
+ cd $T;
+
+ $SYNC_CMD $GFID_FILE
+
+ cd -;
+
+ umount -l $T || fatal "could not umount $MASTER from $T";
+
+ rmdir $T || warn "rmdir of $T failed";
+}
+
+function sync_gfids()
+{
+ SLAVE=$1
+ GFID_FILE=$2
+
+ SLAVE_VOLFILE_SERVER=`echo $SLAVE | sed -e 's/\(.*\):.*/\1/'`
+ SLAVE_VOLUME_NAME=`echo $SLAVE | sed -e 's/.*:\(.*\)/\1/'`
+
+ if [ "x$SLAVE_VOLFILE_SERVER" = "x" ]; then
+ SLAVE_VOLFILE_SERVER="localhost"
+ fi
+
+ mount_client $SLAVE_VOLFILE_SERVER $SLAVE_VOLUME_NAME $GFID_FILE $3
+}
+
+function upgrade()
+{
+ SLAVE=$1
+ GFID_FILE=$2
+ SYNC_CMD=$3
+ SSHKEY=$4
+
+ cleanup_slave $SLAVE $SSHKEY
+ sync_gfids $SLAVE $GFID_FILE $SYNC_CMD
+}
+
+upgrade "$@"
diff --git a/extras/geo-rep/schedule_georep.py.in b/extras/geo-rep/schedule_georep.py.in
new file mode 100644
index 00000000000..48b2b507060
--- /dev/null
+++ b/extras/geo-rep/schedule_georep.py.in
@@ -0,0 +1,492 @@
+#!/usr/bin/python3
+"""
+Schedule Geo-replication
+------------------------
+A tool to run Geo-replication when required. This can be used to
+schedule the Geo-replication to run once in a day using
+
+ # Run daily at 08:30pm
+ 30 20 * * * root python /usr/share/glusterfs/scripts/schedule_georep.py \\
+ --no-color gv1 fvm1 gv2 >> /var/log/glusterfs/schedule_georep.log 2>&1
+
+This tool does the following,
+
+1. Stop Geo-replication if Started
+2. Start Geo-replication
+3. Set Checkpoint
+4. Check the Status and see Checkpoint is Complete.(LOOP)
+5. If checkpoint complete, Stop Geo-replication
+
+Usage:
+
+ python /usr/share/glusterfs/scripts/schedule_georep.py <MASTERVOL> \\
+ <SLAVEHOST> <SLAVEVOL>
+
+For example,
+
+ python /usr/share/glusterfs/scripts/schedule_georep.py gv1 fvm1 gv2
+
+"""
+import subprocess
+import time
+import xml.etree.cElementTree as etree
+import sys
+from contextlib import contextmanager
+import tempfile
+import os
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+
+ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError
+cache_data = {}
+
+SESSION_MOUNT_LOG_FILE = ("/var/log/glusterfs/geo-replication"
+ "/schedule_georep.mount.log")
+
+USE_CLI_COLOR = True
+mnt_list = []
+
+class GlusterBadXmlFormat(Exception):
+ """
+ Exception class for XML Parse Errors
+ """
+ pass
+
+
+def output_notok(msg, err="", exitcode=1):
+ if USE_CLI_COLOR:
+ out = "\033[31m[NOT OK]\033[0m {0}\n{1}\n"
+ else:
+ out = "[NOT OK] {0}\n{1}\n"
+ sys.stderr.write(out.format(msg, err))
+ sys.exit(exitcode)
+
+
+def output_warning(msg):
+ if USE_CLI_COLOR:
+ out = "\033[33m[ WARN]\033[0m {0}\n"
+ else:
+ out = "[ WARN] {0}\n"
+ sys.stderr.write(out.format(msg))
+
+
+def output_ok(msg):
+ if USE_CLI_COLOR:
+ out = "\033[32m[ OK]\033[0m {0}\n"
+ else:
+ out = "[ OK] {0}\n"
+ sys.stderr.write(out.format(msg))
+
+
+def execute(cmd, success_msg="", failure_msg="", exitcode=-1):
+ """
+ Generic wrapper to execute the CLI commands. Returns Output if success.
+ On success it can print message in stdout if specified.
+ On failure, exits after writing to stderr.
+ """
+ p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+ out, err = p.communicate()
+ if p.returncode == 0:
+ if success_msg:
+ output_ok(success_msg)
+ return out
+ else:
+ if exitcode == 0:
+ return
+ err_msg = err if err else out
+ output_notok(failure_msg, err=err_msg, exitcode=exitcode)
+
+
+def cache_output_with_args(func):
+ """
+ Decorator function to remember the output of any function
+ """
+ def wrapper(*args, **kwargs):
+ global cache_data
+ key = "_".join([func.func_name] + list(args))
+ if cache_data.get(key, None) is None:
+ cache_data[key] = func(*args, **kwargs)
+
+ return cache_data[key]
+ return wrapper
+
+
+def cleanup(hostname, volname, mnt):
+ """
+ Unmount the Volume and Remove the temporary directory
+ """
+ execute(["umount", "-l", mnt],
+ failure_msg="Unable to Unmount Gluster Volume "
+ "{0}:{1}(Mounted at {2})".format(hostname, volname, mnt))
+ execute(["rmdir", mnt],
+ failure_msg="Unable to Remove temp directory "
+ "{0}".format(mnt), exitcode=0)
+
+
+@contextmanager
+def glustermount(hostname, volname):
+ """
+ Context manager for Mounting Gluster Volume
+ Use as
+ with glustermount(HOSTNAME, VOLNAME) as MNT:
+ # Do your stuff
+ Automatically unmounts it in case of Exceptions/out of context
+ """
+ mnt = tempfile.mkdtemp(prefix="georepsetup_")
+ mnt_list.append(mnt)
+ execute(["@SBIN_DIR@/glusterfs",
+ "--volfile-server", hostname,
+ "--volfile-id", volname,
+ "-l", SESSION_MOUNT_LOG_FILE,
+ mnt],
+ failure_msg="Unable to Mount Gluster Volume "
+ "{0}:{1}".format(hostname, volname))
+ if os.path.ismount(mnt):
+ yield mnt
+ else:
+ output_notok("Unable to Mount Gluster Volume "
+ "{0}:{1}".format(hostname, volname))
+ cleanup(hostname, volname, mnt)
+
+
+@cache_output_with_args
+def get_bricks(volname):
+ """
+ Returns Bricks list, caches the Bricks list for a volume once
+ parsed.
+ """
+ value = []
+ cmd = ["@SBIN_DIR@/gluster", "volume", "info", volname, "--xml"]
+ info = execute(cmd)
+ try:
+ tree = etree.fromstring(info)
+ volume_el = tree.find('volInfo/volumes/volume')
+ for b in volume_el.findall('bricks/brick'):
+ value.append({"name": b.find("name").text,
+ "hostUuid": b.find("hostUuid").text})
+ except ParseError:
+ raise GlusterBadXmlFormat("Bad XML Format: %s" % " ".join(cmd))
+
+ return value
+
+
+def get_georep_status(mastervol, slave):
+ session_keys = set()
+ out = {}
+ cmd = ["@SBIN_DIR@/gluster", "volume", "geo-replication"]
+ if mastervol is not None:
+ cmd += [mastervol]
+ if slave:
+ cmd += [slave]
+
+ cmd += ["status", "--xml"]
+ info = execute(cmd)
+
+ try:
+ tree = etree.fromstring(info)
+ # Get All Sessions
+ for volume_el in tree.findall("geoRep/volume"):
+ sessions_el = volume_el.find("sessions")
+ # Master Volume name if multiple Volumes
+ mvol = volume_el.find("name").text
+
+ # For each session, collect the details
+ for session in sessions_el.findall("session"):
+ session_slave = "{0}:{1}".format(mvol, session.find(
+ "session_slave").text)
+ session_keys.add(session_slave)
+ out[session_slave] = {}
+
+ for pair in session.findall('pair'):
+ master_brick = "{0}:{1}".format(
+ pair.find("master_node").text,
+ pair.find("master_brick").text
+ )
+
+ out[session_slave][master_brick] = {
+ "mastervol": mvol,
+ "slavevol": pair.find("slave").text.split("::")[-1],
+ "master_node": pair.find("master_node").text,
+ "master_brick": pair.find("master_brick").text,
+ "slave_user": pair.find("slave_user").text,
+ "slave": pair.find("slave").text,
+ "slave_node": pair.find("slave_node").text,
+ "status": pair.find("status").text,
+ "crawl_status": pair.find("crawl_status").text,
+ "entry": pair.find("entry").text,
+ "data": pair.find("data").text,
+ "meta": pair.find("meta").text,
+ "failures": pair.find("failures").text,
+ "checkpoint_completed": pair.find(
+ "checkpoint_completed").text,
+ "master_node_uuid": pair.find("master_node_uuid").text,
+ "last_synced": pair.find("last_synced").text,
+ "checkpoint_time": pair.find("checkpoint_time").text,
+ "checkpoint_completion_time":
+ pair.find("checkpoint_completion_time").text
+ }
+ except ParseError:
+ raise GlusterBadXmlFormat("Bad XML Format: %s" % " ".join(cmd))
+
+ return session_keys, out
+
+
+def get_offline_status(volname, brick, node_uuid, slave):
+ node, brick = brick.split(":")
+ if "@" not in slave:
+ slave_user = "root"
+ else:
+ slave_user, _ = slave.split("@")
+
+ return {
+ "mastervol": volname,
+ "slavevol": slave.split("::")[-1],
+ "master_node": node,
+ "master_brick": brick,
+ "slave_user": slave_user,
+ "slave": slave,
+ "slave_node": "N/A",
+ "status": "Offline",
+ "crawl_status": "N/A",
+ "entry": "N/A",
+ "data": "N/A",
+ "meta": "N/A",
+ "failures": "N/A",
+ "checkpoint_completed": "N/A",
+ "master_node_uuid": node_uuid,
+ "last_synced": "N/A",
+ "checkpoint_time": "N/A",
+ "checkpoint_completion_time": "N/A"
+ }
+
+
+def get(mastervol=None, slave=None):
+ """
+ This function gets list of Bricks of Master Volume and collects
+ respective Geo-rep status. Output will be always ordered as the
+ bricks list in Master Volume. If Geo-rep status is not available
+ for any brick then it updates OFFLINE status.
+ """
+ out = []
+ session_keys, gstatus = get_georep_status(mastervol, slave)
+
+ for session in session_keys:
+ mvol, _, slave = session.split(":", 2)
+ slave = slave.replace("ssh://", "")
+ master_bricks = get_bricks(mvol)
+ out.append([])
+ for brick in master_bricks:
+ bname = brick["name"]
+ if gstatus.get(session) and gstatus[session].get(bname, None):
+ out[-1].append(gstatus[session][bname])
+ else:
+ out[-1].append(
+ get_offline_status(mvol, bname, brick["hostUuid"], slave))
+
+ return out
+
+
+def get_summary(mastervol, slave_url):
+ """
+ Wrapper function around Geo-rep Status and Gluster Volume Info
+ This combines the output from Bricks list and Geo-rep Status.
+ If a Master Brick node is down or Status is faulty then increments
+ the faulty counter. It also collects the checkpoint status from all
+ workers and compares with Number of Bricks.
+ """
+ down_rows = []
+ faulty_rows = []
+ out = []
+
+ status_data = get(mastervol, slave_url)
+
+ for session in status_data:
+ session_name = ""
+ summary = {
+ "active": 0,
+ "passive": 0,
+ "faulty": 0,
+ "initializing": 0,
+ "stopped": 0,
+ "created": 0,
+ "offline": 0,
+ "paused": 0,
+ "workers": 0,
+ "completed_checkpoints": 0,
+ "checkpoint": False,
+ "checkpoints_ok": False,
+ "ok": False
+ }
+
+ for row in session:
+ summary[row["status"].replace("...", "").lower()] += 1
+ summary["workers"] += 1
+ if row["checkpoint_completed"] == "Yes":
+ summary["completed_checkpoints"] += 1
+
+ session_name = "{0}=>{1}".format(
+ row["mastervol"],
+ row["slave"].replace("ssh://", "")
+ )
+
+ if row["status"] == "Faulty":
+ faulty_rows.append("{0}:{1}".format(row["master_node"],
+ row["master_brick"]))
+
+ if row["status"] == "Offline":
+ down_rows.append("{0}:{1}".format(row["master_node"],
+ row["master_brick"]))
+
+ if summary["active"] == summary["completed_checkpoints"] and \
+ summary["faulty"] == 0 and summary["offline"] == 0:
+ summary["checkpoints_ok"] = True
+
+ if summary["faulty"] == 0 and summary["offline"] == 0:
+ summary["ok"] = True
+
+ if session_name != "":
+ out.append([session_name, summary, faulty_rows, down_rows])
+
+ return out
+
+
+def touch_mount_root(mastervol):
+ # Create a Mount and Touch the Mount point root,
+ # Hack to make sure some event available after
+ # setting Checkpoint. Without this there is a chance of
+ # Checkpoint never completes.
+ with glustermount("localhost", mastervol) as mnt:
+ execute(["touch", mnt])
+
+
+def main(args):
+ turns = 1
+
+ # Stop Force
+ cmd = ["@SBIN_DIR@/gluster", "volume", "geo-replication", args.mastervol,
+ "%s::%s" % (args.slave, args.slavevol), "stop", "force"]
+ execute(cmd)
+ output_ok("Stopped Geo-replication")
+
+ # Set Checkpoint to NOW
+ cmd = ["@SBIN_DIR@/gluster", "volume", "geo-replication", args.mastervol,
+ "%s::%s" % (args.slave, args.slavevol), "config", "checkpoint",
+ "now"]
+ execute(cmd)
+ output_ok("Set Checkpoint")
+
+ # Start the Geo-replication
+ cmd = ["@SBIN_DIR@/gluster", "volume", "geo-replication", args.mastervol,
+ "%s::%s" % (args.slave, args.slavevol), "start"]
+ execute(cmd)
+ output_ok("Started Geo-replication and watching Status for "
+ "Checkpoint completion")
+
+ start_time = int(time.time())
+ duration = 0
+
+ # Sleep till Geo-rep initializes
+ time.sleep(60)
+
+ touch_mount_root(args.mastervol)
+
+ slave_url = "{0}::{1}".format(args.slave, args.slavevol)
+
+ # Loop to Check the Geo-replication Status and Checkpoint
+ # If All Status OK and all Checkpoints complete,
+ # Stop the Geo-replication and Log the Completeness
+ while True:
+ session_summary = get_summary(args.mastervol,
+ slave_url)
+ if len(session_summary) == 0:
+ # If Status command fails with another transaction error
+ # or any other error. Gluster cmd still produces XML output
+ # with different message
+ output_warning("Unable to get Geo-replication Status")
+ else:
+ session_name, summary, faulty_rows, down_rows = session_summary[0]
+ chkpt_status = "COMPLETE" if summary["checkpoints_ok"] else \
+ "NOT COMPLETE"
+ ok_status = "OK" if summary["ok"] else "NOT OK"
+
+ if summary["ok"]:
+ output_ok("All Checkpoints {1}, "
+ "All status {2} (Turns {0:>3})".format(
+ turns, chkpt_status, ok_status))
+ else:
+ output_warning("All Checkpoints {1}, "
+ "All status {2} (Turns {0:>3})".format(
+ turns, chkpt_status, ok_status))
+
+ output_warning("Geo-rep workers Faulty/Offline, "
+ "Faulty: {0} Offline: {1}".format(
+ repr(faulty_rows),
+ repr(down_rows)))
+
+ if summary["checkpoints_ok"]:
+ output_ok("Stopping Geo-replication session now")
+ cmd = ["@SBIN_DIR@/gluster", "volume", "geo-replication",
+ args.mastervol,
+ "%s::%s" % (args.slave, args.slavevol), "stop"]
+ execute(cmd)
+ break
+ else:
+ # If Checkpoint is not complete after a iteration means brick
+ # was down and came online now. SETATTR on mount is not
+ # recorded, So again issue touch on mount root So that
+ # Stime will increase and Checkpoint will complete.
+ touch_mount_root(args.mastervol)
+
+ # Increment the turns and Sleep for 10 sec
+ turns += 1
+ duration = int(time.time()) - start_time
+ if args.timeout > 0 and duration > (args.timeout * 60):
+ cmd = ["@SBIN_DIR@/gluster", "volume", "geo-replication",
+ args.mastervol,
+ "%s::%s" % (args.slave, args.slavevol), "stop", "force"]
+ execute(cmd)
+ output_notok("Timed out, Stopping Geo-replication("
+ "Duration: {0}sec)".format(duration))
+
+ time.sleep(args.interval)
+
+ for mnt in mnt_list:
+ execute(["rmdir", mnt],
+ failure_msg="Unable to Remove temp directory "
+ "{0}".format(mnt), exitcode=0)
+
+if __name__ == "__main__":
+ parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+ description=__doc__)
+ parser.add_argument("mastervol", help="Master Volume Name")
+ parser.add_argument("slave",
+ help="Slave hostname "
+ "(<username>@SLAVEHOST or SLAVEHOST)",
+ metavar="SLAVE")
+ parser.add_argument("slavevol", help="Slave Volume Name")
+ parser.add_argument("--interval", help="Interval in Seconds. "
+ "Wait time before each status check",
+ type=int, default=10)
+ parser.add_argument("--timeout", help="Timeout in minutes. Script will "
+ "stop Geo-replication if Checkpoint is not complete "
+ "in the specified timeout time", type=int,
+ default=0)
+ parser.add_argument("--no-color", help="Don't use Color in CLI output",
+ action="store_true")
+ args = parser.parse_args()
+ if args.no_color:
+ USE_CLI_COLOR = False
+ try:
+ # Check for session existence
+ cmd = ["@SBIN_DIR@/gluster", "volume", "geo-replication",
+ args.mastervol, "%s::%s" % (args.slave, args.slavevol), "status"]
+ execute(cmd)
+ main(args)
+ except KeyboardInterrupt:
+ for mnt in mnt_list:
+ execute(["umount", "-l", mnt],
+ failure_msg="Unable to Unmount Gluster Volume "
+ "Mounted at {0}".format(mnt), exitcode=0)
+ execute(["rmdir", mnt],
+ failure_msg="Unable to Remove temp directory "
+ "{0}".format(mnt), exitcode=0)
+ output_notok("Exiting...")
diff --git a/extras/geo-rep/slave-upgrade.sh b/extras/geo-rep/slave-upgrade.sh
new file mode 100644
index 00000000000..3a37f8e3579
--- /dev/null
+++ b/extras/geo-rep/slave-upgrade.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+#usage: slave-upgrade.sh <volfile-server:volname> <gfid-file>
+# <path-to-gsync-sync-gfid>
+#<slave-volfile-server>: a machine on which gluster cli can fetch slave volume info.
+# slave-volfile-server defaults to localhost.
+#
+#<gfid-file>: a file containing paths and their associated gfids
+# on master. The paths are relative to master mount point
+# (not absolute). An example extract of <gfid-file> can be,
+#
+# <extract>
+# 22114455-57c5-46e9-a783-c40f83a72b09 /dir
+# 25772386-3eb8-4550-a802-c3fdc938ca80 /dir/file
+# </extract>
+
+function get_bricks()
+{
+ gluster volume info $1 | grep -E 'Brick[0-9]+' | sed -e 's/[^:]*:\(.*\)/\1/g'
+}
+
+function cleanup_brick()
+{
+ HOST=$1
+ BRICK=$2
+
+ # TODO: write a C program to receive a list of files and does cleanup on
+ # them instead of spawning a new setfattr process for each file if
+ # performance is bad.
+ ssh $HOST "rm -rf $BRICK/.glusterfs/* && find $BRICK -exec setfattr -x trusted.gfid {} \; 2>/dev/null"
+}
+
+function cleanup_slave()
+{
+ VOLUME_NAME=`echo $1 | sed -e 's/.*:\(.*\)/\1/'`
+
+ BRICKS=`get_bricks $VOLUME_NAME`
+
+ for i in $BRICKS; do
+ HOST=`echo $i | sed -e 's/\(.*\):.*/\1/'`
+ BRICK=`echo $i | sed -e 's/.*:\(.*\)/\1/'`
+ cleanup_brick $HOST $BRICK
+ done
+
+ # Now restart the volume
+ gluster --mode=script volume stop $VOLUME_NAME;
+ gluster volume start $VOLUME_NAME;
+}
+
+function mount_client()
+{
+ local T; # temporary mount
+ local i; # inode number
+
+ VOLUME_NAME=$2;
+ GFID_FILE=$3
+ SYNC_CMD=$4
+
+ T=$(mktemp -d -t ${0##*/}.XXXXXX);
+
+ glusterfs --aux-gfid-mount -s $1 --volfile-id $VOLUME_NAME $T;
+
+ i=$(stat -c '%i' $T);
+
+ cd $T;
+
+ $SYNC_CMD $GFID_FILE
+
+ cd -;
+
+ umount $T || fatal "could not umount $MASTER from $T";
+
+ rmdir $T || warn "rmdir of $T failed";
+}
+
+function sync_gfids()
+{
+ SLAVE=$1
+ GFID_FILE=$2
+ SYNC_CMD=$3
+
+ SLAVE_VOLFILE_SERVER=`echo $SLAVE | sed -e 's/\(.*\):.*/\1/'`
+ SLAVE_VOLUME_NAME=`echo $SLAVE | sed -e 's/.*:\(.*\)/\1/'`
+
+ if [ "x$SLAVE_VOLFILE_SERVER" = "x" ]; then
+ SLAVE_VOLFILE_SERVER="localhost"
+ fi
+
+ mount_client $SLAVE_VOLFILE_SERVER $SLAVE_VOLUME_NAME $GFID_FILE $SYNC_CMD
+}
+
+function upgrade()
+{
+ SLAVE=$1
+ GFID_FILE=$2
+ SYNC_CMD=$3
+
+ cleanup_slave $SLAVE
+
+ sync_gfids $SLAVE $GFID_FILE $SYNC_CMD
+}
+
+upgrade "$@"
diff --git a/extras/gfid-to-dirname.sh b/extras/gfid-to-dirname.sh
new file mode 100755
index 00000000000..fd359fab58a
--- /dev/null
+++ b/extras/gfid-to-dirname.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+function read_symlink()
+{
+ DOT_GLUSTERFS_PATH=$BRICK_PATH/.glusterfs
+ gfid_string=$1
+ symlink_path="$DOT_GLUSTERFS_PATH/${gfid_string:0:2}/${gfid_string:2:2}/$gfid_string"
+ #remove trailing '/'
+ symlink_path=${symlink_path%/}
+ linkname=$(readlink $symlink_path)
+ if [ $? -ne 0 ]; then
+ echo "readlink of $symlink_path returned an error." >&2
+ exit -1
+ fi
+ echo $linkname
+}
+
+main()
+{
+ if [ $# -lt 2 ] ;then
+ echo "Usage: $0 <brick-path> <gfid-string-of-directory>"
+ echo "Example: $0 /bricks/brick1 1b835012-1ae5-4f0d-9db4-64de574d891c"
+ exit -1
+ fi
+
+ BRICK_PATH=$1
+ name=$(read_symlink $2)
+ if [ $? -ne 0 ]; then
+ exit -1
+ fi
+
+ while [ ${name:12:36} != "00000000-0000-0000-0000-000000000001" ]
+ do
+ LOCATION=`basename $name`/$LOCATION
+ GFID_STRING=${name:12:36}
+ name=$(read_symlink $GFID_STRING)
+ if [ $? -ne 0 ]; then
+ exit -1
+ fi
+ done
+
+ LOCATION=`basename $name`/$LOCATION
+ echo "Location of the directory corresponding to gfid:$2 is $BRICK_PATH/$LOCATION"
+}
+
+main "$@"
diff --git a/extras/git-branch-diff.py b/extras/git-branch-diff.py
new file mode 100755
index 00000000000..382513e069e
--- /dev/null
+++ b/extras/git-branch-diff.py
@@ -0,0 +1,285 @@
+#!/bin/python2
+
+"""
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+"""
+
+"""
+ ABOUT:
+ This script helps in visualizing backported and missed commits between two
+ different branches, tags or commit ranges. In the list of missed commits,
+ it will help you identify patches which are posted for reviews on gerrit server.
+
+ USAGE:
+ $ ./extras/git-branch-diff.py --help
+ usage: git-branch-diff.py [-h] [-s SOURCE] -t TARGET [-a AUTHOR] [-p PATH]
+ [-o OPTIONS]
+
+ git wrapper to diff local or remote branches/tags/commit-ranges
+
+ optional arguments:
+ -h, --help show this help message and exit
+ -s SOURCE, --source SOURCE
+ source pattern, it could be a branch, tag or a commit
+ range
+ -t TARGET, --target TARGET
+ target pattern, it could be a branch, tag or a commit
+ range
+ -a AUTHOR, --author AUTHOR
+ default: git config name/email, to provide multiple
+ specify comma separated values
+ -p PATH, --path PATH show source and target diff w.r.t given path, to
+ provide multiple specify space in between them
+ -o OPTIONS, --options OPTIONS
+ add other git options such as --after=<>, --before=<>
+ etc. experts use;
+
+ SAMPLE EXECUTIONS:
+ $ ./extras/git-branch-diff.py -t origin/release-3.8
+
+ $ ./extras/git-branch-diff.py -s local_branch -t origin/release-3.7
+
+ $ ./extras/git-branch-diff.py -s 4517bf8..e66add8 -t origin/release-3.7
+ $ ./extras/git-branch-diff.py -s HEAD..c4efd39 -t origin/release-3.7
+
+ $ ./extras/git-branch-diff.py -t v3.7.11 --author="author@redhat.com"
+ $ ./extras/git-branch-diff.py -t v3.7.11 --author="authorX, authorY, authorZ"
+
+ $ ./extras/git-branch-diff.py -t origin/release-3.8 --path="xlators/"
+ $ ./extras/git-branch-diff.py -t origin/release-3.8 --path="./xlators ./rpc"
+
+ $ ./extras/git-branch-diff.py -t origin/release-3.6 --author="*"
+ $ ./extras/git-branch-diff.py -t origin/release-3.6 --author="All"
+ $ ./extras/git-branch-diff.py -t origin/release-3.6 --author="Null"
+
+ $ ./extras/git-branch-diff.py -t v3.7.11 --options="--after=2015-03-01 \
+ --before=2016-01-30"
+
+ DECLARATION:
+ While backporting commit to another branch only subject of the patch may
+ remain unchanged, all others such as commit message, commit Id, change Id,
+ bug Id, may be changed. This script works by taking commit subject as the
+ key value for comparing two git branches, which can be local or remote.
+
+ Note: This script may ignore commits which have altered their commit subjects
+ while backporting patches. Also this script doesn't have any intelligence to
+ detect squashed commits.
+
+ AUTHOR:
+ Prasanna Kumar Kalever <prasanna.kalever@redhat.com>
+"""
+
+from __future__ import print_function
+import os
+import sys
+import argparse
+import commands
+import subprocess
+import requests
+
+class GitBranchDiff:
+ def __init__ (self):
+ " color symbols"
+ self.tick = u'\033[1;32m[ \u2714 ]\033[0m'
+ self.cross = u'\033[1;31m[ \u2716 ]\033[0m'
+ self.green_set = u'\033[1;34m'
+ self.yello_set = u'\033[4;33m'
+ self.color_unset = '\033[0m'
+
+ self.parse_cmd_args()
+
+ " replace default values with actual values from command args"
+ self.g_author = self.argsdict['author']
+ self.s_pattern = self.argsdict['source']
+ self.t_pattern = self.argsdict['target']
+ self.r_path = self.argsdict['path']
+ self.options = ' '.join(self.argsdict['options'])
+
+ self.gerrit_server = "http://review.gluster.org"
+
+ def check_dir_exist (self, os_path):
+ " checks whether given path exist"
+ path_list = os_path.split()
+ for path in path_list:
+ if not os.path.exists(path):
+ raise argparse.ArgumentTypeError("'%s' path %s is not valid"
+ %(os_path, path))
+ return os_path
+
+ def check_pattern_exist (self):
+ " defend to check given branch[s] exit"
+ status_sbr, op = commands.getstatusoutput('git log ' +
+ self.s_pattern)
+ status_tbr, op = commands.getstatusoutput('git log ' +
+ self.t_pattern)
+ if status_sbr != 0:
+ print("Error: --source=" + self.s_pattern + " doesn't exit\n")
+ self.parser.print_help()
+ exit(status_sbr)
+ elif status_tbr != 0:
+ print("Error: --target=" + self.t_pattern + " doesn't exit\n")
+ self.parser.print_help()
+ exit(status_tbr)
+
+ def check_author_exist (self):
+ " defend to check given author exist, format in case of multiple"
+ contrib_list = ['', '*', 'all', 'All', 'ALL', 'null', 'Null', 'NULL']
+ if self.g_author in contrib_list:
+ self.g_author = ""
+ else:
+ ide_list = self.g_author.split(',')
+ for ide in ide_list:
+ cmd4 = 'git log ' + self.s_pattern + ' --author=' + ide
+ c_list = subprocess.check_output(cmd4, shell = True)
+ if len(c_list) is 0:
+ print("Error: --author=%s doesn't exit" %self.g_author)
+ print("see '%s --help'" %__file__)
+ exit(1)
+ if len(ide_list) > 1:
+ self.g_author = "\|".join(ide_list)
+
+ def connected_to_gerrit (self):
+ "check if gerrit server is reachable"
+ try:
+ r = requests.get(self.gerrit_server, timeout=3)
+ return True
+ except requests.Timeout as err:
+ " request timed out"
+ print("Warning: failed to get list of open review commits on " \
+ "gerrit.\n" \
+ "hint: Request timed out! gerrit server could possibly " \
+ "slow ...\n")
+ return False
+ except requests.RequestException as err:
+ " handle other errors"
+ print("Warning: failed to get list of open review commits on " \
+ "gerrit\n" \
+ "hint: check with internet connection ...\n")
+ return False
+
+ def parse_cmd_args (self):
+ " command line parser"
+ author = subprocess.check_output('git config user.email',
+ shell = True).rstrip('\n')
+ source = "remotes/origin/master"
+ options = [' --pretty=format:"%h %s" ']
+ path = subprocess.check_output('git rev-parse --show-toplevel',
+ shell = True).rstrip('\n')
+ self.parser = argparse.ArgumentParser(description = 'git wrapper to '
+ 'diff local or remote branches/'
+ 'tags/commit-ranges')
+ self.parser.add_argument('-s',
+ '--source',
+ help = 'source pattern, it could be a branch,'
+ ' tag or a commit range',
+ default = source,
+ dest = 'source')
+ self.parser.add_argument('-t',
+ '--target',
+ help = 'target pattern, it could be a branch,'
+ ' tag or a commit range',
+ required = True,
+ dest = 'target')
+ self.parser.add_argument('-a',
+ '--author',
+ help = 'default: git config name/email, '
+ 'to provide multiple specify comma'
+ ' separated values',
+ default = author,
+ dest = 'author')
+ self.parser.add_argument('-p',
+ '--path',
+ type = self.check_dir_exist,
+ help = 'show source and target diff w.r.t '
+ 'given path, to provide multiple '
+ 'specify space in between them',
+ default = path,
+ dest = 'path')
+ self.parser.add_argument('-o',
+ '--options',
+ help = 'add other git options such as '
+ '--after=<>, --before=<> etc. '
+ 'experts use;',
+ default = options,
+ dest = 'options',
+ action='append')
+ self.argsdict = vars(self.parser.parse_args())
+
+ def print_output (self):
+ " display the result list"
+ print("\n------------------------------------------------------------\n")
+ print(self.tick + " Successfully Backported changes:")
+ print(' {' + 'from: ' + self.s_pattern + \
+ ' to: '+ self.t_pattern + '}\n')
+ for key, value in self.s_dict.items():
+ if value in self.t_dict.itervalues():
+ print("[%s%s%s] %s" %(self.yello_set,
+ key,
+ self.color_unset,
+ value))
+ print("\n------------------------------------------------------------\n")
+ print(self.cross + " Missing patches in " + self.t_pattern + ':\n')
+ if self.connected_to_gerrit():
+ cmd3 = "git review -r origin -l"
+ review_list = subprocess.check_output(cmd3, shell = True).split('\n')
+ else:
+ review_list = []
+
+ for key, value in self.s_dict.items():
+ if value not in self.t_dict.itervalues():
+ if any(value in s for s in review_list):
+ print("[%s%s%s] %s %s(under review)%s" %(self.yello_set,
+ key,
+ self.color_unset,
+ value,
+ self.green_set,
+ self.color_unset))
+ else:
+ print("[%s%s%s] %s" %(self.yello_set,
+ key,
+ self.color_unset,
+ value))
+ print("\n------------------------------------------------------------\n")
+
+ def main (self):
+ self.check_pattern_exist()
+ self.check_author_exist()
+
+ " actual git commands"
+ cmd1 = 'git log' + self.options + ' ' + self.s_pattern + \
+ ' --author=\'' + self.g_author + '\' ' + self.r_path
+
+ " could be backported by anybody so --author doesn't apply here"
+ cmd2 = 'git log' + self.options + ' ' + self.t_pattern + \
+ ' ' + self.r_path
+
+ s_list = subprocess.check_output(cmd1, shell = True).split('\n')
+ t_list = subprocess.check_output(cmd2, shell = True)
+
+ if len(t_list) is 0:
+ print("No commits in the target: %s" %self.t_pattern)
+ print("see '%s --help'" %__file__)
+ exit()
+ else:
+ t_list = t_list.split('\n')
+
+ self.s_dict = dict()
+ self.t_dict = dict()
+
+ for item in s_list:
+ self.s_dict.update(dict([item.split(' ', 1)]))
+ for item in t_list:
+ self.t_dict.update(dict([item.split(' ', 1)]))
+
+ self.print_output()
+
+
+if __name__ == '__main__':
+ run = GitBranchDiff()
+ run.main()
diff --git a/extras/gluster-rsyslog-5.8.conf b/extras/gluster-rsyslog-5.8.conf
new file mode 100644
index 00000000000..2519999bcac
--- /dev/null
+++ b/extras/gluster-rsyslog-5.8.conf
@@ -0,0 +1,51 @@
+##### gluster.conf #####
+
+#
+## If you want to log every message to the log file instead of
+## intelligently suppressing repeated messages, set off to
+## RepeatedMsgReduction. This change requires rsyslog restart
+## (eg. run 'service rsyslog restart')
+#
+#$RepeatedMsgReduction off
+$RepeatedMsgReduction on
+
+#
+## The mmcount module provides the capability to count log messages by
+## severity or json property of given app-name. The count value is added
+## into the log message as json property named '$msgid'
+#
+$ModLoad mmcount
+$mmcountKey gf_code # start counting value of gf_code
+
+$template Glusterfsd_dynLogFile,"/var/log/glusterfs/bricks/%app-name%.log"
+$template Gluster_dynLogFile,"/var/log/glusterfs/%app-name%.log"
+
+$template GLFS_Template,"%msgid%/%syslogfacility-text:::uppercase%/%syslogseverity-text:::uppercase% [%TIMESTAMP:::date-rfc3339%] %msg:::sp-if-no-1st-sp%%msg:::drop-last-lf%\n"
+
+#
+## Pass logs to mmcount if app-name is 'gluster'
+#
+if $app-name contains 'gluster' then :mmcount:
+
+if $app-name contains 'glusterfsd' then ?Glusterfsd_dynLogFile;GLFS_Template
+if $app-name contains 'gluster' and not ( $app-name contains 'glusterfsd' ) then ?Gluster_dynLogFile;GLFS_Template
+
+#
+## Sample configuration to send a email alert for every 50th mmcount
+#
+#$ModLoad ommail
+#$ActionMailSMTPServer smtp.example.com
+#$ActionMailFrom rsyslog@example.com
+#$ActionMailTo glusteradmin@example.com
+#$template mailSubject,"50th message of gf_code=9999 on %hostname%"
+#$template mailBody,"RSYSLOG Alert\r\nmsg='%msg%'"
+#$ActionMailSubject mailSubject
+#$ActionExecOnlyOnceEveryInterval 30
+#if $app-name == 'glusterfsd' and $msgid != 0 and $msgid % 50 == 0 \
+#then :ommail:;RSYSLOG_SyslogProtocol23Format
+#
+
+#
+## discard logs where app-name is 'gluster' as we processed already
+#
+if $app-name contains 'gluster' then ~
diff --git a/extras/gluster-rsyslog-7.2.conf b/extras/gluster-rsyslog-7.2.conf
new file mode 100644
index 00000000000..8b2841543f4
--- /dev/null
+++ b/extras/gluster-rsyslog-7.2.conf
@@ -0,0 +1,76 @@
+##### gluster.conf #####
+#
+## If you want to log every message to the log file instead of
+## intelligently suppressing repeated messages, set off to
+## RepeatedMsgReduction. This change requires rsyslog restart
+## (eg. run 'service rsyslog restart')
+#
+#$RepeatedMsgReduction off
+$RepeatedMsgReduction on
+
+$ModLoad mmjsonparse
+*.* :mmjsonparse:
+
+#
+## The mmcount module provides the capability to count log messages by
+## severity or json property of given app-name. The count value is added
+## into the log message as json property named 'mmcount'
+##
+## More info at http://www.rsyslog.com/doc/mmcount.html
+#
+#module(load="mmcount")
+#action(type="mmcount" appname="glusterd" key="!gf_code") # count each value of gf_code of appname glusterd
+#action(type="mmcount" appname="glusterfsd" key="!gf_code") # count each value of gf_code of appname glusterfsd
+#action(type="mmcount" appname="glusterfs" key="!gf_code") # count each value of gf_code of appname glusterfs
+
+template (name="Glusterfsd_dynLogFile" type="string" string="/var/log/glusterfs/bricks/%app-name%.log")
+template (name="Gluster_dynLogFile" type="string" string="/var/log/glusterfs/%app-name%.log")
+
+template(name="GLFS_template" type="list") {
+ property(name="$!mmcount")
+ constant(value="/")
+ property(name="syslogfacility-text" caseConversion="upper")
+ constant(value="/")
+ property(name="syslogseverity-text" caseConversion="upper")
+ constant(value=" ")
+ constant(value="[")
+ property(name="timereported" dateFormat="rfc3339")
+ constant(value="] ")
+ constant(value="[")
+ property(name="$!gf_code")
+ constant(value="] ")
+ constant(value="[")
+ property(name="$!gf_message")
+ constant(value="] ")
+ property(name="$!msg")
+ constant(value="\n")
+}
+
+if $app-name contains 'glusterfsd' then {
+ action(type="omfile"
+ DynaFile="Glusterfsd_dynLogFile"
+ Template="GLFS_template")
+ stop
+}
+
+if $app-name contains 'gluster' then {
+ action(type="omfile"
+ DynaFile="Gluster_dynLogFile"
+ Template="GLFS_template")
+ stop
+}
+
+#
+## send email for every 50th mmcount
+#$ModLoad ommail
+#if $app-name == 'glusterfsd' and $!mmcount <> 0 and $!mmcount % 50 == 0 then {
+# $ActionMailSMTPServer smtp.example.com
+# $ActionMailFrom rsyslog@example.com
+# $ActionMailTo glusteradmin@example.com
+# $template mailSubject,"50th message of gf_code=9999 on %hostname%"
+# $template mailBody,"RSYSLOG Alert\r\nmsg='%msg%'"
+# $ActionMailSubject mailSubject
+# $ActionExecOnlyOnceEveryInterval 30
+# :ommail:;RSYSLOG_SyslogProtocol23Format
+#}
+#
diff --git a/extras/glusterd-sysconfig b/extras/glusterd-sysconfig
new file mode 100644
index 00000000000..8237c571104
--- /dev/null
+++ b/extras/glusterd-sysconfig
@@ -0,0 +1,6 @@
+## Set custom log file and log level (bellow are defaults)
+# LOG_FILE='/var/log/glusterfs/glusterd.log'
+# LOG_LEVEL='INFO'
+
+## Set custom options for glusterd
+# GLUSTERD_OPTIONS=''
diff --git a/extras/glusterd.vol.in b/extras/glusterd.vol.in
new file mode 100644
index 00000000000..5d7bad0e4c8
--- /dev/null
+++ b/extras/glusterd.vol.in
@@ -0,0 +1,15 @@
+volume management
+ type mgmt/glusterd
+ option working-directory @GLUSTERD_WORKDIR@
+ option transport-type socket
+ option transport.socket.keepalive-time 10
+ option transport.socket.keepalive-interval 2
+ option transport.socket.read-fail-log off
+ option transport.socket.listen-port 24007
+ option ping-timeout 0
+ option event-threads 1
+# option lock-timer 180
+# option transport.address-family inet6
+# option base-port 49152
+ option max-port 60999
+end-volume
diff --git a/extras/glusterfs-georep-logrotate b/extras/glusterfs-georep-logrotate
new file mode 100644
index 00000000000..3e7ecf373a1
--- /dev/null
+++ b/extras/glusterfs-georep-logrotate
@@ -0,0 +1,61 @@
+/var/log/glusterfs/geo-replication/*/*.log {
+ sharedscripts
+ weekly
+ maxsize 10M
+ minsize 100k
+
+ # 6 months of logs are good enough
+ rotate 26
+
+ missingok
+ compress
+ delaycompress
+ notifempty
+ postrotate
+ for pid in `ps -aef | grep glusterfs | egrep "\-\-aux-gfid-mount" | awk '{print $2}'`; do
+ /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true
+ done
+ endscript
+}
+
+
+/var/log/glusterfs/geo-replication-slaves/*.log {
+ sharedscripts
+ weekly
+ maxsize 10M
+ minsize 100k
+
+ # 6 months of logs are good enough
+ rotate 26
+
+ missingok
+ compress
+ delaycompress
+ notifempty
+ postrotate
+ for pid in `ps -aef | grep glusterfs | egrep "\-\-aux-gfid-mount" | awk '{print $2}'`; do
+ /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true
+ done
+ endscript
+}
+
+
+/var/log/glusterfs/geo-replication-slaves/*/*.log {
+ sharedscripts
+ weekly
+ maxsize 10M
+ minsize 100k
+
+ # 6 months of logs are good enough
+ rotate 26
+
+ missingok
+ compress
+ delaycompress
+ notifempty
+ postrotate
+ for pid in `ps -aef | grep glusterfs | egrep "\-\-aux-gfid-mount" | awk '{print $2}'`; do
+ /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true
+ done
+ endscript
+}
diff --git a/extras/glusterfs-georep-upgrade.py b/extras/glusterfs-georep-upgrade.py
new file mode 100755
index 00000000000..634576058d6
--- /dev/null
+++ b/extras/glusterfs-georep-upgrade.py
@@ -0,0 +1,77 @@
+#!/usr/bin/python3
+"""
+
+Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
+This file is part of GlusterFS.
+
+This file is licensed to you under your choice of the GNU Lesser
+General Public License, version 3 or any later version (LGPLv3 or
+later), or the GNU General Public License, version 2 (GPLv2), in all
+cases as published by the Free Software Foundation.
+
+"""
+
+import argparse
+import errno
+import os, sys
+import shutil
+from datetime import datetime
+
+def find_htime_path(brick_path):
+ dirs = []
+ htime_dir = os.path.join(brick_path, '.glusterfs/changelogs/htime')
+ for file in os.listdir(htime_dir):
+ if os.path.isfile(os.path.join(htime_dir,file)) and file.startswith("HTIME"):
+ dirs.append(os.path.join(htime_dir, file))
+ else:
+ raise FileNotFoundError("%s unavailable" % (os.path.join(htime_dir, file)))
+ return dirs
+
+def modify_htime_file(brick_path):
+ htime_file_path_list = find_htime_path(brick_path)
+
+ for htime_file_path in htime_file_path_list:
+ changelog_path = os.path.join(brick_path, '.glusterfs/changelogs')
+ temp_htime_path = os.path.join(changelog_path, 'htime/temp_htime_file')
+ with open(htime_file_path, 'r') as htime_file, open(temp_htime_path, 'w') as temp_htime_file:
+ #extract epoch times from htime file
+ paths = htime_file.read().split("\x00")
+
+ for pth in paths:
+ epoch_no = pth.split(".")[-1]
+ changelog = os.path.basename(pth)
+ #convert epoch time to year, month and day
+ if epoch_no != '':
+ date=(datetime.fromtimestamp(float(int(epoch_no))).strftime("%Y/%m/%d"))
+ #update paths in temp htime file
+ temp_htime_file.write("%s/%s/%s\x00" % (changelog_path, date, changelog))
+ #create directory in the format year/month/days
+ path = os.path.join(changelog_path, date)
+
+ if changelog.startswith("CHANGELOG."):
+ try:
+ os.makedirs(path, mode = 0o600);
+ except OSError as exc:
+ if exc.errno == errno.EEXIST:
+ pass
+ else:
+ raise
+
+ #copy existing changelogs to new directory structure, delete old changelog files
+ shutil.copyfile(pth, os.path.join(path, changelog))
+ os.remove(pth)
+
+ #rename temp_htime_file with htime file
+ os.rename(htime_file_path, os.path.join('%s.bak'%htime_file_path))
+ os.rename(temp_htime_path, htime_file_path)
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser()
+ parser.add_argument('brick_path', help="This upgrade script, which is to be run on\
+ server side, takes brick path as the argument, \
+ updates paths inside htime file and alters the directory structure \
+ above the changelog files inorder to support new optimised format \
+ of the directory structure as per \
+ https://review.gluster.org/#/c/glusterfs/+/23733/")
+ args = parser.parse_args()
+ modify_htime_file(args.brick_path)
diff --git a/extras/glusterfs-logrotate b/extras/glusterfs-logrotate
index 373ec2e0b92..6ba6ef18e9f 100644
--- a/extras/glusterfs-logrotate
+++ b/extras/glusterfs-logrotate
@@ -1,17 +1,17 @@
-# perform the log rotate every week
-weekly
-# keep the backup of 52 weeks
-rotate 52
-missingok
-
-# compress the logs, but from the .2 onwards
-compress
-delaycompress
-notifempty
-
# Rotate client logs
/var/log/glusterfs/*.log {
sharedscripts
+ weekly
+ maxsize 10M
+ minsize 100k
+
+# 6 months of logs are good enough
+ rotate 26
+
+ missingok
+ compress
+ delaycompress
+ notifempty
postrotate
/usr/bin/killall -HUP glusterfs > /dev/null 2>&1 || true
/usr/bin/killall -HUP glusterd > /dev/null 2>&1 || true
@@ -21,7 +21,48 @@ notifempty
# Rotate server logs
/var/log/glusterfs/bricks/*.log {
sharedscripts
+ weekly
+ maxsize 10M
+ minsize 100k
+
+# 6 months of logs are good enough
+ rotate 26
+
+ missingok
+ compress
+ delaycompress
+ notifempty
postrotate
/usr/bin/killall -HUP glusterfsd > /dev/null 2>&1 || true
endscript
}
+
+/var/log/glusterfs/samples/*.samp {
+ daily
+ rotate 3
+ sharedscripts
+ missingok
+ compress
+ delaycompress
+}
+
+# Rotate snapd log
+/var/log/glusterfs/snaps/*/*.log {
+ sharedscripts
+ weekly
+ maxsize 10M
+ minsize 100k
+
+ # 6 months of logs are good enough
+ rotate 26
+
+ missingok
+ compress
+ delaycompress
+ notifempty
+ postrotate
+ for pid in `ps -aef | grep glusterfs | egrep "snapd" | awk '{print $2}'`; do
+ /usr/bin/kill -HUP $pid > /dev/null 2>&1 || true
+ done
+ endscript
+}
diff --git a/extras/glusterfs-mode.el b/extras/glusterfs-mode.el
index d4f6dc568b6..a9ed2335ab3 100644
--- a/extras/glusterfs-mode.el
+++ b/extras/glusterfs-mode.el
@@ -1,112 +1,113 @@
-;;; Copyright (C) 2007-2011 Gluster Inc. <http://www.gluster.com>
-;;;
-;;; This program is free software; you can redistribute it and/or modify
-;;; it under the terms of the GNU General Public License as published by
-;;; the Free Software Foundation; either version 2 of the License, or
-;;; (at your option) any later version.
-;;;
-;;; This program is distributed in the hope that it will be useful,
-;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-;;; GNU General Public License for more details.
-;;;
-;;; You should have received a copy of the GNU General Public License
-;;; along with this program; if not, write to the Free Software
-;;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
-;;;
-
-(defvar glusterfs-mode-hook nil)
-
-;; (defvar glusterfs-mode-map
-;; (let ((glusterfs-mode-map (make-keymap)))
-;; (define-key glusterfs-mode-map "\C-j" 'newline-and-indent)
-;; glusterfs-mode-map)
-;; "Keymap for WPDL major mode")
-
-(add-to-list 'auto-mode-alist '("\\.vol\\'" . glusterfs-mode))
-
-(defconst glusterfs-font-lock-keywords-1
- (list
- ; "cluster/{unify,afr,stripe}"
- ; "performance/{io-cache,io-threads,write-behind,read-ahead,stat-prefetch}"
- ; "protocol/{client/server}"
- ; "features/{trash,posix-locks,fixed-id,filter}"
- ; "stroage/posix"
- ; "encryption/rot-13"
- ; "debug/trace"
- '("\\<\\(cluster/\\(unify\\|afr\\|replicate\\|stripe\\|ha\\|dht\\|distribute\\)\\|\\performance/\\(io-\\(cache\\|threads\\)\\|write-behind\\|read-ahead\\|symlink-cache\\)\\|protocol/\\(server\\|client\\)\\|features/\\(trash\\|posix-locks\\|locks\\|path-converter\\|filter\\)\\|storage/\\(posix\\|bdb\\)\\|encryption/rot-13\\|debug/trace\\)\\>" . font-lock-keyword-face))
-"Additional Keywords to highlight in GlusterFS mode.")
-
-(defconst glusterfs-font-lock-keywords-2
- (append glusterfs-font-lock-keywords-1
- (list
- ; "replicate" "namespace" "scheduler" "remote-subvolume" "remote-host"
- ; "auth.addr" "block-size" "remote-port" "listen-port" "transport-type"
- ; "limits.min-free-disk" "directory"
- ; TODO: add all the keys here.
- '("\\<\\(inode-lru-limit\\|replicate\\|namespace\\|scheduler\\|username\\|password\\|allow\\|reject\\|block-size\\|listen-port\\|transport-type\\|transport-timeout\\|directory\\|page-size\\|page-count\\|aggregate-size\\|non-blocking-io\\|client-volume-filename\\|bind-address\\|self-heal\\|read-only-subvolumes\\|read-subvolume\\|thread-count\\|cache-size\\|window-size\\|force-revalidate-timeout\\|priority\\|include\\|exclude\\|remote-\\(host\\|subvolume\\|port\\)\\|auth.\\(addr\\|login\\)\\|limits.\\(min-disk-free\\|transaction-size\\|ib-verbs-\\(work-request-\\(send-\\|recv-\\(count\\|size\\)\\)\\|port\\|mtu\\|device-name\\)\\)\\)\ \\>" . font-lock-constant-face)))
- "option keys in GlusterFS mode.")
-
-(defconst glusterfs-font-lock-keywords-3
- (append glusterfs-font-lock-keywords-2
- (list
- ; "option" "volume" "end-volume" "subvolumes" "type"
- '("\\<\\(option\ \\|volume\ \\|subvolumes\ \\|type\ \\|end-volume\\)\\>" . font-lock-builtin-face)))
- ;'((regexp-opt (" option " "^volume " "^end-volume" "subvolumes " " type ") t) . font-lock-builtin-face))
- "Minimal highlighting expressions for GlusterFS mode.")
-
-
-(defvar glusterfs-font-lock-keywords glusterfs-font-lock-keywords-3
- "Default highlighting expressions for GlusterFS mode.")
-
-(defvar glusterfs-mode-syntax-table
- (let ((glusterfs-mode-syntax-table (make-syntax-table)))
- (modify-syntax-entry ?\# "<" glusterfs-mode-syntax-table)
- (modify-syntax-entry ?* ". 23" glusterfs-mode-syntax-table)
- (modify-syntax-entry ?\n ">#" glusterfs-mode-syntax-table)
- glusterfs-mode-syntax-table)
- "Syntax table for glusterfs-mode")
-
-;; TODO: add an indentation table
-
-(defun glusterfs-indent-line ()
- "Indent current line as GlusterFS code"
- (interactive)
- (beginning-of-line)
- (if (bobp)
- (indent-line-to 0) ; First line is always non-indented
- (let ((not-indented t) cur-indent)
- (if (looking-at "^[ \t]*volume\ ")
- (progn
- (save-excursion
- (forward-line -1)
- (setq not-indented nil)
- (setq cur-indent 0))))
- (if (looking-at "^[ \t]*end-volume")
- (progn
- (save-excursion
- (forward-line -1)
- (setq cur-indent 0))
- (if (< cur-indent 0) ; We can't indent past the left margin
- (setq cur-indent 0)))
- (save-excursion
- (while not-indented ; Iterate backwards until we find an indentation hint
- (progn
- (setq cur-indent 2) ; Do the actual indenting
- (setq not-indented nil)))))
- (if cur-indent
- (indent-line-to cur-indent)
- (indent-line-to 0)))))
-
-(defun glusterfs-mode ()
- (interactive)
- (kill-all-local-variables)
- ;; (use-local-map glusterfs-mode-map)
- (set-syntax-table glusterfs-mode-syntax-table)
- (set (make-local-variable 'indent-line-function) 'glusterfs-indent-line)
- (set (make-local-variable 'font-lock-defaults) '(glusterfs-font-lock-keywords))
- (setq major-mode 'glusterfs-mode)
- (setq mode-name "GlusterFS")
- (run-hooks 'glusterfs-mode-hook))
-
-(provide 'glusterfs-mode)
+;;; Copyright (C) 2007-2017 Red Hat, Inc. <http://www.redhat.com>
+;;; Copyright (C) 2007-2011 Gluster Inc. <http://www.gluster.com>
+;;;
+;;; This program is free software; you can redistribute it and/or
+;;; modify it under the terms of the GNU General Public License
+;;; as published by the Free Software Foundation; either version 2
+;;; of the License, or (at your option) any later version.
+;;;
+;;; This program is distributed in the hope that it will be useful,
+;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;;; GNU General Public License for more details.
+;;;
+;;; You should have received a copy of the GNU General Public License
+;;; along with this program; if not, write to the Free Software
+;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+;;;
+
+(defvar glusterfs-mode-hook nil)
+
+;; (defvar glusterfs-mode-map
+;; (let ((glusterfs-mode-map (make-keymap)))
+;; (define-key glusterfs-mode-map "\C-j" 'newline-and-indent)
+;; glusterfs-mode-map)
+;; "Keymap for WPDL major mode")
+
+(add-to-list 'auto-mode-alist '("\\.vol\\'" . glusterfs-mode))
+
+(defconst glusterfs-font-lock-keywords-1
+ (list
+ ; "cluster/{unify,afr,stripe}"
+ ; "performance/{io-cache,io-threads,write-behind,read-ahead,stat-prefetch}"
+ ; "protocol/{client/server}"
+ ; "features/{trash,posix-locks,fixed-id,filter}"
+ ; "storage/posix"
+ ; "encryption/rot-13"
+ ; "debug/trace"
+ '("\\<\\(cluster/\\(unify\\|afr\\|replicate\\|stripe\\|ha\\|dht\\|distribute\\)\\|\\performance/\\(io-\\(cache\\|threads\\)\\|write-behind\\|read-ahead\\|symlink-cache\\)\\|protocol/\\(server\\|client\\)\\|features/\\(trash\\|posix-locks\\|locks\\|path-converter\\|filter\\)\\|storage/\\(posix\\|bdb\\)\\|encryption/rot-13\\|debug/trace\\)\\>" . font-lock-keyword-face))
+"Additional Keywords to highlight in GlusterFS mode.")
+
+(defconst glusterfs-font-lock-keywords-2
+ (append glusterfs-font-lock-keywords-1
+ (list
+ ; "replicate" "namespace" "scheduler" "remote-subvolume" "remote-host"
+ ; "auth.addr" "block-size" "remote-port" "listen-port" "transport-type"
+ ; "limits.min-free-disk" "directory"
+ ; TODO: add all the keys here.
+ '("\\<\\(inode-lru-limit\\|replicate\\|namespace\\|scheduler\\|username\\|password\\|allow\\|reject\\|block-size\\|listen-port\\|transport-type\\|transport-timeout\\|directory\\|page-size\\|page-count\\|aggregate-size\\|non-blocking-io\\|client-volume-filename\\|bind-address\\|self-heal\\|read-only-subvolumes\\|read-subvolume\\|thread-count\\|cache-size\\|window-size\\|force-revalidate-timeout\\|priority\\|include\\|exclude\\|remote-\\(host\\|subvolume\\|port\\)\\|auth.\\(addr\\|login\\)\\|limits.\\(min-disk-free\\|transaction-size\\|ib-verbs-\\(work-request-\\(send-\\|recv-\\(count\\|size\\)\\)\\|port\\|mtu\\|device-name\\)\\)\\)\ \\>" . font-lock-constant-face)))
+ "option keys in GlusterFS mode.")
+
+(defconst glusterfs-font-lock-keywords-3
+ (append glusterfs-font-lock-keywords-2
+ (list
+ ; "option" "volume" "end-volume" "subvolumes" "type"
+ '("\\<\\(option\ \\|volume\ \\|subvolumes\ \\|type\ \\|end-volume\\)\\>" . font-lock-builtin-face)))
+ ;'((regexp-opt (" option " "^volume " "^end-volume" "subvolumes " " type ") t) . font-lock-builtin-face))
+ "Minimal highlighting expressions for GlusterFS mode.")
+
+
+(defvar glusterfs-font-lock-keywords glusterfs-font-lock-keywords-3
+ "Default highlighting expressions for GlusterFS mode.")
+
+(defvar glusterfs-mode-syntax-table
+ (let ((glusterfs-mode-syntax-table (make-syntax-table)))
+ (modify-syntax-entry ?\# "<" glusterfs-mode-syntax-table)
+ (modify-syntax-entry ?* ". 23" glusterfs-mode-syntax-table)
+ (modify-syntax-entry ?\n ">#" glusterfs-mode-syntax-table)
+ glusterfs-mode-syntax-table)
+ "Syntax table for glusterfs-mode")
+
+;; TODO: add an indentation table
+
+(defun glusterfs-indent-line ()
+ "Indent current line as GlusterFS code"
+ (interactive)
+ (beginning-of-line)
+ (if (bobp)
+ (indent-line-to 0) ; First line is always non-indented
+ (let ((not-indented t) cur-indent)
+ (if (looking-at "^[ \t]*volume\ ")
+ (progn
+ (save-excursion
+ (forward-line -1)
+ (setq not-indented nil)
+ (setq cur-indent 0))))
+ (if (looking-at "^[ \t]*end-volume")
+ (progn
+ (save-excursion
+ (forward-line -1)
+ (setq cur-indent 0))
+ (if (< cur-indent 0) ; We can't indent past the left margin
+ (setq cur-indent 0)))
+ (save-excursion
+ (while not-indented ; Iterate backwards until we find an indentation hint
+ (progn
+ (setq cur-indent 2) ; Do the actual indenting
+ (setq not-indented nil)))))
+ (if cur-indent
+ (indent-line-to cur-indent)
+ (indent-line-to 0)))))
+
+(defun glusterfs-mode ()
+ (interactive)
+ (kill-all-local-variables)
+ ;; (use-local-map glusterfs-mode-map)
+ (set-syntax-table glusterfs-mode-syntax-table)
+ (set (make-local-variable 'indent-line-function) 'glusterfs-indent-line)
+ (set (make-local-variable 'font-lock-defaults) '(glusterfs-font-lock-keywords))
+ (setq major-mode 'glusterfs-mode)
+ (setq mode-name "GlusterFS")
+ (run-hooks 'glusterfs-mode-hook))
+
+(provide 'glusterfs-mode)
diff --git a/extras/glusterfs.vim b/extras/glusterfs.vim
index 62102c1ee1b..899cc65511c 100644
--- a/extras/glusterfs.vim
+++ b/extras/glusterfs.vim
@@ -1,20 +1,11 @@
" glusterfs.vim: GNU Vim Syntax file for GlusterFS .vol specification
-" Copyright (c) 2017-2011 Gluster, Inc. <http://www.gluster.com>
-" This file is part of GlusterFS.
+" Copyright (c) 2007777777Red Hat, Inc. <http://www.redhat.com>
+" This file is part of GlusterFS.
"
-" GlusterFS is free software; you can redistribute it and/or modify
-" it under the terms of the GNU General Public License as published
-" by the Free Software Foundation; either version 3 of the License,
-" or (at your option) any later version.
-"
-" GlusterFS is distributed in the hope that it will be useful, but
-" WITHOUT ANY WARRANTY; without even the implied warranty of
-" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-" General Public License for more details.
-"
-" You should have received a copy of the GNU General Public License
-" along with this program. If not, see
-" <http://www.gnu.org/licenses/>.
+" This file is licensed to you under your choice of the GNU Lesser
+" General Public License, version 3 or any later version (LGPLv3 or
+" later), or the GNU General Public License, version 2 (GPLv2), in all
+" cases as published by the Free Software Foundation.
"
" Last Modified: Wed Aug 1 00:47:10 IST 2007
" Version: 0.8
diff --git a/extras/gnfs-loganalyse.py b/extras/gnfs-loganalyse.py
index 6b24e5a7192..6341d007188 100644..100755
--- a/extras/gnfs-loganalyse.py
+++ b/extras/gnfs-loganalyse.py
@@ -1,23 +1,16 @@
#!/bin/python
"""
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
"""
+from __future__ import print_function
import os
import string
import sys
@@ -80,7 +73,7 @@ class NFSRequest:
self.replygfid = tokens [gfididx + 1].strip(",")
def dump (self):
- print "ReqLine: " + str(self.calllinecount) + " TimeStamp: " + self.timestamp + ", XID: " + self.xid + " " + self.op + " ARGS: " + self.opdata + " RepLine: " + str(self.replylinecount) + " " + self.replydata
+ print("ReqLine: " + str(self.calllinecount) + " TimeStamp: " + self.timestamp + ", XID: " + self.xid + " " + self.op + " ARGS: " + self.opdata + " RepLine: " + str(self.replylinecount) + " " + self.replydata)
class NFSLogAnalyzer:
@@ -157,7 +150,7 @@ class NFSLogAnalyzer:
return
rcount = len (self.xid_request_map.keys ())
orphancount = len (self.orphan_replies.keys ())
- print "Requests: " + str(rcount) + ", Orphans: " + str(orphancount)
+ print("Requests: " + str(rcount) + ", Orphans: " + str(orphancount))
def dump (self):
self.getStats ()
diff --git a/extras/group-db-workload b/extras/group-db-workload
new file mode 100644
index 00000000000..9334d6fb942
--- /dev/null
+++ b/extras/group-db-workload
@@ -0,0 +1,12 @@
+performance.open-behind=on
+performance.write-behind=off
+performance.stat-prefetch=off
+performance.quick-read=off
+performance.strict-o-direct=on
+performance.read-ahead=off
+performance.io-cache=off
+performance.readdir-ahead=off
+performance.client-io-threads=on
+server.event-threads=4
+client.event-threads=4
+performance.read-after-open=yes
diff --git a/extras/group-distributed-virt b/extras/group-distributed-virt
new file mode 100644
index 00000000000..a960b76c694
--- /dev/null
+++ b/extras/group-distributed-virt
@@ -0,0 +1,10 @@
+performance.quick-read=off
+performance.read-ahead=off
+performance.io-cache=off
+performance.low-prio-threads=32
+network.remote-dio=enable
+features.shard=on
+user.cifs=off
+client.event-threads=4
+server.event-threads=4
+performance.client-io-threads=on
diff --git a/extras/group-gluster-block b/extras/group-gluster-block
new file mode 100644
index 00000000000..1e398019e6b
--- /dev/null
+++ b/extras/group-gluster-block
@@ -0,0 +1,27 @@
+performance.quick-read=off
+performance.read-ahead=off
+performance.io-cache=off
+performance.stat-prefetch=off
+performance.open-behind=off
+performance.readdir-ahead=off
+performance.strict-o-direct=on
+performance.client-io-threads=on
+performance.io-thread-count=32
+performance.high-prio-threads=32
+performance.normal-prio-threads=32
+performance.low-prio-threads=32
+performance.least-prio-threads=4
+client.event-threads=8
+server.event-threads=8
+network.remote-dio=disable
+cluster.eager-lock=enable
+cluster.quorum-type=auto
+cluster.data-self-heal-algorithm=full
+cluster.locking-scheme=granular
+cluster.shd-max-threads=8
+cluster.shd-wait-qlength=10000
+features.shard=on
+features.shard-block-size=64MB
+user.cifs=off
+server.allow-insecure=on
+cluster.choose-local=off
diff --git a/extras/group-metadata-cache b/extras/group-metadata-cache
new file mode 100644
index 00000000000..b890b288fc7
--- /dev/null
+++ b/extras/group-metadata-cache
@@ -0,0 +1,6 @@
+features.cache-invalidation=on
+features.cache-invalidation-timeout=600
+performance.stat-prefetch=on
+performance.cache-invalidation=on
+performance.md-cache-timeout=600
+network.inode-lru-limit=200000
diff --git a/extras/group-nl-cache b/extras/group-nl-cache
new file mode 100644
index 00000000000..897807e8933
--- /dev/null
+++ b/extras/group-nl-cache
@@ -0,0 +1,5 @@
+features.cache-invalidation=on
+features.cache-invalidation-timeout=600
+performance.nl-cache=on
+performance.nl-cache-timeout=600
+network.inode-lru-limit=200000
diff --git a/extras/group-samba b/extras/group-samba
new file mode 100644
index 00000000000..eeee6e06031
--- /dev/null
+++ b/extras/group-samba
@@ -0,0 +1,11 @@
+features.cache-invalidation=on
+features.cache-invalidation-timeout=600
+performance.cache-samba-metadata=on
+performance.stat-prefetch=on
+performance.cache-invalidation=on
+performance.md-cache-timeout=600
+network.inode-lru-limit=200000
+performance.nl-cache=on
+performance.nl-cache-timeout=600
+performance.readdir-ahead=on
+performance.parallel-readdir=on
diff --git a/extras/group-virt.example b/extras/group-virt.example
new file mode 100644
index 00000000000..cc37c98a25c
--- /dev/null
+++ b/extras/group-virt.example
@@ -0,0 +1,24 @@
+performance.quick-read=off
+performance.read-ahead=off
+performance.io-cache=off
+performance.low-prio-threads=32
+network.remote-dio=disable
+performance.strict-o-direct=on
+cluster.eager-lock=enable
+cluster.quorum-type=auto
+cluster.server-quorum-type=server
+cluster.data-self-heal-algorithm=full
+cluster.locking-scheme=granular
+cluster.shd-max-threads=8
+cluster.shd-wait-qlength=10000
+features.shard=on
+user.cifs=off
+cluster.choose-local=off
+client.event-threads=4
+server.event-threads=4
+performance.client-io-threads=on
+network.ping-timeout=20
+server.tcp-user-timeout=20
+server.keepalive-time=10
+server.keepalive-interval=2
+server.keepalive-count=5
diff --git a/extras/hook-scripts/Makefile.am b/extras/hook-scripts/Makefile.am
index 5c6249de707..26059d7dbb9 100644
--- a/extras/hook-scripts/Makefile.am
+++ b/extras/hook-scripts/Makefile.am
@@ -1 +1,7 @@
-EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh S30samba-stop.sh
+EXTRA_DIST = S40ufo-stop.py S56glusterd-geo-rep-create-post.sh
+SUBDIRS = add-brick create delete set start stop reset
+
+scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/gsync-create/post/
+if USE_GEOREP
+scripts_SCRIPTS = S56glusterd-geo-rep-create-post.sh
+endif
diff --git a/extras/hook-scripts/S29CTDBsetup.sh b/extras/hook-scripts/S29CTDBsetup.sh
deleted file mode 100644
index e256be1f3ea..00000000000
--- a/extras/hook-scripts/S29CTDBsetup.sh
+++ /dev/null
@@ -1,69 +0,0 @@
-#! /bin/bash
-#non-portable - RHS-2.0 only
-# - The script mounts the 'meta-vol' on start 'event' on a known
-# directory (eg. /gluster/lock)
-# - Adds the necessary configuration changes for ctdb in smb.conf and
-# restarts smb service.
-# - P.S: There are other 'tasks' that need to be done outside this script
-# to get CTDB based failover up and running.
-
-SMB_CONF=/etc/samba/smb.conf
-
-CTDB_MNT=/gluster/lock
-PROGNAME="ctdb"
-OPTSPEC="volname:"
-VOL=
-# $META is the volume that will be used by CTDB as a shared filesystem.
-# It is not desirable to use this volume for storing 'data' as well.
-# META is set to 'all' (viz. a keyword and hence not a legal volume name)
-# to prevent the script from running for volumes it was not intended.
-# User needs to set META to the volume that serves CTDB lockfile.
-META="all"
-
-function sighup_samba () {
- pid=`cat /var/run/smbd.pid`
- if [ $pid != " " ]
- then
- kill -HUP $pid;
- else
- /etc/init.d/smb start
- fi
-}
-
-function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
- eval set -- "$ARGS"
-
- while true; do
- case $1 in
- --volname)
- shift
- VOL=$1
- ;;
-
- *)
- shift
- break
- ;;
-
- esac
-
- shift
- done
-}
-
-function add_glusterfs_ctdb_options () {
- PAT="Share Definitions"
- GLUSTER_CTDB_CONFIG="# ctdb config for glusterfs\n\tclustering = yes\n\tidmap backend = tdb2\n\tprivate dir = "$CTDB_MNT"\n"
-
- sed -i /"$PAT"/i\ "$GLUSTER_CTDB_CONFIG" $SMB_CONF
-}
-
-parse_args $@
-if [ "$META" = "$VOL" ]
-then
- add_glusterfs_ctdb_options
- sighup_samba
- mount -t glusterfs `hostname`:$VOL "$CTDB_MNT" &
-fi
-
diff --git a/extras/hook-scripts/S30samba-start.sh b/extras/hook-scripts/S30samba-start.sh
deleted file mode 100755
index a42bb07eba8..00000000000
--- a/extras/hook-scripts/S30samba-start.sh
+++ /dev/null
@@ -1,62 +0,0 @@
-#!/bin/bash
-#Need to be copied to hooks/<HOOKS_VER>/start/post
-
-PROGNAME="Ssamba-start"
-OPTSPEC="volname:,mnt:"
-VOL=
-#FIXME: glusterd hook interface will eventually provide mntpt prefix as
-# command line arg
-MNT_PRE="/mnt/samba"
-
-function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
- eval set -- "$ARGS"
-
- while true; do
- case $1 in
- --volname)
- shift
- VOL=$1
- ;;
- --mnt)
- shift
- MNT_PRE=$1
- ;;
- *)
- shift
- break
- ;;
- esac
- shift
- done
-}
-
-function add_samba_export () {
- volname=$1
- mnt_pre=$2
- mkdir -p $mnt_pre/$volname && \
- printf "\n[gluster-$volname]\ncomment=For samba export of volume $volname\npath=$mnt_pre/$volname\nread only=no\nguest ok=yes\n" >> /etc/samba/smb.conf
-}
-
-function mount_volume () {
- volname=$1
- mnt_pre=$2
- #Mount shouldn't block on glusterd to fetch volfile, hence the 'bg'
- mount -t glusterfs `hostname`:$volname $mnt_pre/$volname &
-}
-
-function sighup_samba () {
- pid=`cat /var/run/smbd.pid`
- if [ $pid != "" ]
- then
- kill -HUP $pid;
- else
- /etc/init.d/smb start
- fi
-}
-
-
-parse_args $@
-add_samba_export $VOL $MNT_PRE
-mount_volume $VOL $MNT_PRE
-sighup_samba
diff --git a/extras/hook-scripts/S30samba-stop.sh b/extras/hook-scripts/S30samba-stop.sh
deleted file mode 100755
index 0e483bff835..00000000000
--- a/extras/hook-scripts/S30samba-stop.sh
+++ /dev/null
@@ -1,60 +0,0 @@
-#! /bin/bash
-#Need to be copied to hooks/<HOOKS_VER>/stop/post
-
-PROGNAME="Ssamba-stop"
-OPTSPEC="volname:,mnt:"
-VOL=
-#FIXME: gluster will eventually pass mnt prefix as command line argument
-MNT_PRE="/mnt/samba"
-
-function parse_args () {
- ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
- eval set -- "$ARGS"
-
- while true; do
- case $1 in
- --volname)
- shift
- VOL=$1
- ;;
- --mnt)
- shift
- MNT_PRE=$1
- echo $1
- ;;
- *)
- shift
- break
- ;;
- esac
- shift
- done
-}
-
-function del_samba_export () {
- volname=$1
- cp /etc/samba/smb.conf /tmp/smb.conf
- sed -i "/gluster-$volname/,/^$/d" /tmp/smb.conf &&\
- mv /tmp/smb.conf /etc/samba/smb.conf
-}
-
-function umount_volume () {
- volname=$1
- mnt_pre=$2
- umount -l $mnt_pre/$volname
-}
-
-function sighup_samba () {
- pid=`cat /var/run/smbd.pid`
- if [ $pid != "" ]
- then
- kill -HUP $pid;
- else
- /etc/init.d/smb start
- fi
-}
-
-parse_args $@
-del_samba_export $VOL
-umount_volume $VOL $MNT_PRE
-sighup_samba
diff --git a/extras/hook-scripts/S40ufo-stop.py b/extras/hook-scripts/S40ufo-stop.py
new file mode 100755
index 00000000000..2c79eb1d54a
--- /dev/null
+++ b/extras/hook-scripts/S40ufo-stop.py
@@ -0,0 +1,24 @@
+#!/usr/bin/python3
+
+import os
+from optparse import OptionParser
+
+if __name__ == '__main__':
+ # check if swift is installed
+ try:
+ from gluster.swift.common.Glusterfs import get_mnt_point, unmount
+ except ImportError:
+ import sys
+ sys.exit("Openstack Swift does not appear to be installed properly");
+
+ op = OptionParser(usage="%prog [options...]")
+ op.add_option('--volname', dest='vol', type=str)
+ op.add_option('--last', dest='last', type=str)
+ (opts, args) = op.parse_args()
+
+
+ mnt_point = get_mnt_point(opts.vol)
+ if mnt_point:
+ unmount(mnt_point)
+ else:
+ sys.exit("get_mnt_point returned none for mount point")
diff --git a/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh b/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh
new file mode 100755
index 00000000000..7d6052315bb
--- /dev/null
+++ b/extras/hook-scripts/S56glusterd-geo-rep-create-post.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+
+#key_val_pair is the arguments passed to the script in the form of
+#key value pair
+
+key_val_pair1=`echo $2 | cut -d ',' -f 1`
+key_val_pair2=`echo $2 | cut -d ',' -f 2`
+key_val_pair3=`echo $2 | cut -d ',' -f 3`
+key_val_pair4=`echo $2 | cut -d ',' -f 4`
+key_val_pair5=`echo $2 | cut -d ',' -f 5`
+key_val_pair6=`echo $2 | cut -d ',' -f 6`
+
+mastervol=`echo $1 | cut -d '=' -f 2`
+if [ "$mastervol" == "" ]; then
+ exit;
+fi
+
+key=`echo $key_val_pair1 | cut -d '=' -f 1`
+val=`echo $key_val_pair1 | cut -d '=' -f 2`
+if [ "$key" != "is_push_pem" ]; then
+ exit;
+fi
+if [ "$val" != '1' ]; then
+ exit;
+fi
+
+key=`echo $key_val_pair2 | cut -d '=' -f 1`
+val=`echo $key_val_pair2 | cut -d '=' -f 2`
+if [ "$key" != "pub_file" ]; then
+ exit;
+fi
+if [ "$val" == "" ]; then
+ exit;
+fi
+
+pub_file=`echo $val`
+pub_file_bname="$(basename $pub_file)"
+pub_file_dname="$(dirname $pub_file)"
+pub_file_tmp=`echo $val`_tmp
+
+key=`echo $key_val_pair3 | cut -d '=' -f 1`
+val=`echo $key_val_pair3 | cut -d '=' -f 2`
+if [ "$key" != "slave_user" ]; then
+ exit;
+fi
+if [ "$val" == "" ]; then
+ exit;
+fi
+slave_user=`echo $val`
+
+key=`echo $key_val_pair4 | cut -d '=' -f 1`
+val=`echo $key_val_pair4 | cut -d '=' -f 2`
+if [ "$key" != "slave_ip" ]; then
+ exit;
+fi
+if [ "$val" == "" ]; then
+ exit;
+fi
+slave_ip=`echo $val`
+
+key=`echo $key_val_pair5 | cut -d '=' -f 1`
+val=`echo $key_val_pair5 | cut -d '=' -f 2`
+if [ "$key" != "slave_vol" ]; then
+ exit;
+fi
+if [ "$val" == "" ]; then
+ exit;
+fi
+slavevol=`echo $val`
+
+key=`echo $key_val_pair6 | cut -d '=' -f 1`
+val=`echo $key_val_pair6 | cut -d '=' -f 2`
+if [ "$key" != "ssh_port" ]; then
+ exit;
+fi
+if [ "$val" == "" ]; then
+ exit;
+fi
+SSH_PORT=`echo $val`
+SSH_OPT="-oPasswordAuthentication=no -oStrictHostKeyChecking=no"
+
+if [ -f $pub_file ]; then
+ # For a non-root user copy the pub file to the user's home directory
+ # For a root user copy the pub files to priv_dir->geo-rep.
+ if [ "$slave_user" != "root" ]; then
+ slave_user_home_dir=`ssh -p ${SSH_PORT} ${SSH_OPT} $slave_user@$slave_ip "getent passwd $slave_user | cut -d ':' -f 6"`
+ scp -P ${SSH_PORT} ${SSH_OPT} $pub_file $slave_user@$slave_ip:$slave_user_home_dir/common_secret.pem.pub_tmp
+ ssh -p ${SSH_PORT} ${SSH_OPT} $slave_user@$slave_ip "mv $slave_user_home_dir/common_secret.pem.pub_tmp $slave_user_home_dir/${mastervol}_${slavevol}_common_secret.pem.pub"
+ else
+ if [[ -z "${GR_SSH_IDENTITY_KEY}" ]]; then
+ scp -P ${SSH_PORT} ${SSH_OPT} $pub_file $slave_ip:$pub_file_tmp
+ ssh -p ${SSH_PORT} ${SSH_OPT} $slave_ip "mv $pub_file_tmp ${pub_file_dname}/${mastervol}_${slavevol}_${pub_file_bname}"
+ ssh -p ${SSH_PORT} ${SSH_OPT} $slave_ip "gluster system:: copy file /geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null"
+ ssh -p ${SSH_PORT} ${SSH_OPT} $slave_ip "gluster system:: execute add_secret_pub root geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null"
+ ssh -p ${SSH_PORT} ${SSH_OPT} $slave_ip "gluster vol set ${slavevol} features.read-only on"
+ else
+ scp -P ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $pub_file $slave_ip:$pub_file_tmp
+ ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $slave_ip "mv $pub_file_tmp ${pub_file_dname}/${mastervol}_${slavevol}_${pub_file_bname}"
+ ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $slave_ip "gluster system:: copy file /geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null"
+ ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $slave_ip "gluster system:: execute add_secret_pub root geo-replication/${mastervol}_${slavevol}_common_secret.pem.pub > /dev/null"
+ ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} ${SSH_OPT} $slave_ip "gluster vol set ${slavevol} features.read-only on"
+ fi
+ fi
+fi
diff --git a/extras/hook-scripts/add-brick/Makefile.am b/extras/hook-scripts/add-brick/Makefile.am
new file mode 100644
index 00000000000..6e2701e909e
--- /dev/null
+++ b/extras/hook-scripts/add-brick/Makefile.am
@@ -0,0 +1,2 @@
+SUBDIRS = post pre
+CLEANFILES =
diff --git a/extras/hook-scripts/add-brick/post/Makefile.am b/extras/hook-scripts/add-brick/post/Makefile.am
new file mode 100644
index 00000000000..9b236df096d
--- /dev/null
+++ b/extras/hook-scripts/add-brick/post/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_DIST = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh
+
+hookdir = $(GLUSTERD_WORKDIR)/hooks/1/add-brick/post/
+if WITH_SERVER
+hook_SCRIPTS = disabled-quota-root-xattr-heal.sh S10selinux-label-brick.sh S13create-subdir-mounts.sh
+endif
diff --git a/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh
new file mode 100755
index 00000000000..4a17c993a77
--- /dev/null
+++ b/extras/hook-scripts/add-brick/post/S10selinux-label-brick.sh
@@ -0,0 +1,100 @@
+#!/bin/bash
+#
+# Install to hooks/<HOOKS_VER>/add-brick/post
+#
+# Add an SELinux file context for each brick using the glusterd_brick_t type.
+# This ensures that the brick is relabeled correctly on an SELinux restart or
+# restore. Subsequently, run a restore on the brick path to set the selinux
+# labels.
+#
+###
+
+PROGNAME="Sselinux"
+OPTSPEC="volname:,version:,gd-workdir:,volume-op:"
+VOL=
+
+parse_args () {
+ ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
+ eval set -- "${ARGS}"
+
+ while true; do
+ case ${1} in
+ --volname)
+ shift
+ VOL=${1}
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ --version)
+ shift
+ ;;
+ --volume-op)
+ shift
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+ done
+}
+
+set_brick_labels()
+{
+ local volname="${1}"
+ local fctx
+ local list=()
+
+ fctx="$(semanage fcontext --list -C)"
+
+ # wait for new brick path to be updated under
+ # ${GLUSTERD_WORKDIR}/vols/${volname}/bricks/
+ sleep 5
+
+ # grab the path for each local brick
+ brickpath="${GLUSTERD_WORKDIR}/vols/${volname}/bricks/"
+ brickdirs=$(
+ find "${brickpath}" -type f -exec grep '^path=' {} \; | \
+ cut -d= -f 2 | \
+ sort -u
+ )
+
+ # create a list of bricks for which custom SELinux
+ # label doesn't exist
+ for b in ${brickdirs}; do
+ pattern="${b}(/.*)?"
+ echo "${fctx}" | grep "^${pattern}\s" >/dev/null
+ if [[ $? -ne 0 ]]; then
+ list+=("${pattern}")
+ fi
+ done
+
+ # Add a file context for each brick path in the list and associate with the
+ # glusterd_brick_t SELinux type.
+ for p in ${list[@]}
+ do
+ semanage fcontext --add -t glusterd_brick_t -r s0 "${p}"
+ done
+
+ # Set the labels for which SELinux label was added above
+ for b in ${brickdirs}
+ do
+ echo "${list[@]}" | grep "${b}" >/dev/null
+ if [[ $? -eq 0 ]]; then
+ restorecon -R "${b}"
+ fi
+ done
+}
+
+SELINUX_STATE=$(which getenforce && getenforce)
+[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
+
+parse_args "$@"
+[ -z "${VOL}" ] && exit 1
+
+set_brick_labels "${VOL}"
+
+exit 0
diff --git a/extras/hook-scripts/add-brick/post/S13create-subdir-mounts.sh b/extras/hook-scripts/add-brick/post/S13create-subdir-mounts.sh
new file mode 100755
index 00000000000..1a6923ee7aa
--- /dev/null
+++ b/extras/hook-scripts/add-brick/post/S13create-subdir-mounts.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+##---------------------------------------------------------------------------
+## This script runs the self-heal of the directories which are expected to
+## be present as they are mounted as subdirectory mounts.
+##---------------------------------------------------------------------------
+
+MOUNT_DIR=`mktemp -d -t ${0##*/}.XXXXXX`;
+OPTSPEC="volname:,version:,gd-workdir:,volume-op:"
+PROGNAME="add-brick-create-subdir"
+VOL_NAME=test
+GLUSTERD_WORKDIR="/var/lib/glusterd"
+
+cleanup_mountpoint ()
+{
+ umount -f $MOUNT_DIR;
+ if [ 0 -ne $? ]
+ then
+ return $?
+ fi
+
+ rmdir $MOUNT_DIR;
+ if [ 0 -ne $? ]
+ then
+ return $?
+ fi
+}
+
+##------------------------------------------
+## Parse the arguments
+##------------------------------------------
+ARGS=$(getopt -l $OPTSPEC -name $PROGNAME $@)
+eval set -- "$ARGS"
+
+while true;
+do
+ case $1 in
+ --volname)
+ shift
+ VOL_NAME=$1
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ --version)
+ shift
+ ;;
+ --volume-op)
+ shift
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+done
+
+## See if we have any subdirs to be healed before going further
+subdirs=$(grep 'auth.allow' ${GLUSTERD_WORKDIR}/vols/${VOL_NAME}/info | cut -f2 -d'=' | tr ',' '\n' | cut -f1 -d'(');
+
+if [ -z ${subdirs} ]; then
+ rmdir $MOUNT_DIR;
+ exit 0;
+fi
+
+##----------------------------------------
+## Mount the volume in temp directory.
+## -----------------------------------
+glusterfs -s localhost --volfile-id=$VOL_NAME --client-pid=-50 $MOUNT_DIR;
+if [ 0 -ne $? ]
+then
+ exit $?;
+fi
+
+## -----------------------------------
+# Do the 'stat' on all the directory for now. Ideal fix is to look at subdir
+# list from 'auth.allow' option and only stat them.
+for subdir in ${subdirs}
+do
+ stat ${MOUNT_DIR}/${subdir} > /dev/null;
+done
+
+## Clean up and exit
+cleanup_mountpoint;
diff --git a/extras/hook-scripts/add-brick/post/disabled-quota-root-xattr-heal.sh b/extras/hook-scripts/add-brick/post/disabled-quota-root-xattr-heal.sh
new file mode 100755
index 00000000000..ca17a903549
--- /dev/null
+++ b/extras/hook-scripts/add-brick/post/disabled-quota-root-xattr-heal.sh
@@ -0,0 +1,145 @@
+#!/bin/sh
+
+##---------------------------------------------------------------------------
+## This script updates the 'limit-set' xattr on the newly added node. Please
+## refer hook-scripts/add-brick/pre/S28Quota-root-xattr-heal.sh for the complete
+## description.
+## Do the following only if limit configured on root.
+## 1. Do an auxiliary mount.
+## 2. Get 'limit-set' xattr on root
+## 3. Set xattrs with the same value on the root.
+## 4. Disable itself
+##---------------------------------------------------------------------------
+
+QUOTA_LIMIT_XATTR="trusted.glusterfs.quota.limit-set"
+QUOTA_OBJECT_LIMIT_XATTR="trusted.glusterfs.quota.limit-objects"
+MOUNT_DIR=$(mktemp -d -t "${0##*/}.XXXXXX");
+OPTSPEC="volname:,version:,gd-workdir:,volume-op:"
+PROGNAME="Quota-xattr-heal-add-brick"
+VOL_NAME=
+VERSION=
+VOLUME_OP=
+GLUSTERD_WORKDIR=
+ENABLED_NAME_PREFIX="S28"
+ENABLED_NAME="Quota-root-xattr-heal.sh"
+
+THIS_SCRIPT=$(echo "${0}" | awk -F'/' '{print $NF}')
+
+cleanup_mountpoint ()
+{
+
+ if umount -f "${MOUNT_DIR}"; then
+ return $?
+ fi
+
+ if rmdir "${MOUNT_DIR}"; then
+ return $?
+ fi
+}
+
+disable_and_exit ()
+{
+ if [ -e "${ENABLED_STATE}" ]
+ then
+ unlink "${ENABLED_STATE}";
+ exit $?
+ fi
+
+ exit 0
+}
+
+get_and_set_xattr ()
+{
+ XATTR=$1
+
+ VALUE=$(getfattr -n "${XATTR}" -e hex --absolute-names "${MOUNT_DIR}" 2>&1)
+ RET=$?
+ if [ 0 -eq ${RET} ]; then
+ VALUE=$(echo "${VALUE}" | grep "${XATTR}" | awk -F'=' '{print $NF}')
+ setfattr -n "${XATTR}" -v "${VALUE}" "${MOUNT_DIR}";
+ RET=$?
+ else
+ if echo "${VALUE}" | grep -iq "No such attribute" ; then
+ RET=0
+ fi
+ fi
+
+ return ${RET};
+}
+
+##------------------------------------------
+## Parse the arguments
+##------------------------------------------
+ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
+eval set -- "$ARGS"
+
+while true;
+do
+ case $1 in
+ --volname)
+ shift
+ VOL_NAME=$1
+ ;;
+ --version)
+ shift
+ VERSION=$1
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ --volume-op)
+ shift
+ VOLUME_OP=$1
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+done
+##----------------------------------------
+
+# Avoid long lines
+ENABLED_STATE_1="${GLUSTERD_WORKDIR}/hooks/${VERSION}/${VOLUME_OP}/"
+ENABLED_STATE_2="post/${ENABLED_NAME_PREFIX}${VOL_NAME}-${ENABLED_NAME}"
+ENABLED_STATE="${ENABLED_STATE_1}${ENABLED_STATE_2}"
+
+if [ "${THIS_SCRIPT}" != *"${VOL_NAME}"* ]; then
+ exit 0
+fi
+
+## Is quota enabled?
+FLAG=$(grep "^features.quota=" "${GLUSTERD_WORKDIR}/vols/${VOL_NAME}/info" \
+| awk -F'=' '{print $NF}');
+if [ "${FLAG}" != "on" ]
+then
+ disable_and_exit
+fi
+
+## -----------------------------------
+## Mount the volume in temp directory.
+## -----------------------------------
+# Avoid long lines
+CMD_1="glusterfs -s localhost"
+CMD_2="--volfile-id=${VOL_NAME} client-pid=-42 ${MOUNT_DIR}"
+CMD="${CMD_1}${CMD_2}"
+
+if ${CMD}
+then
+ exit $?;
+fi
+## -----------------------------------
+
+RET1=$(get_and_set_xattr "${QUOTA_LIMIT_XATTR}")
+RET2=$(get_and_set_xattr "${QUOTA_OBJECT_LIMIT_XATTR}")
+
+## Clean up and exit
+cleanup_mountpoint;
+
+if [ "${RET1}" -ne 0 ] || [ "${RET2}" -ne 0 ]; then
+ exit 1
+fi
+
+disable_and_exit;
diff --git a/extras/hook-scripts/add-brick/pre/Makefile.am b/extras/hook-scripts/add-brick/pre/Makefile.am
new file mode 100644
index 00000000000..3288581aa57
--- /dev/null
+++ b/extras/hook-scripts/add-brick/pre/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_DIST = S28Quota-enable-root-xattr-heal.sh
+
+hookdir = $(GLUSTERD_WORKDIR)/hooks/1/add-brick/pre/
+if WITH_SERVER
+hook_SCRIPTS = S28Quota-enable-root-xattr-heal.sh
+endif
diff --git a/extras/hook-scripts/add-brick/pre/S28Quota-enable-root-xattr-heal.sh b/extras/hook-scripts/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
new file mode 100755
index 00000000000..27e85231f45
--- /dev/null
+++ b/extras/hook-scripts/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
@@ -0,0 +1,101 @@
+#!/bin/sh
+
+###############################################################################
+## ----------------------------------------------------------------------------
+## The scripts
+## I. add-brick/pre/S28Quota-root-xattr-heal.sh (itself)
+## II. add-brick/post/disabled-root-xattr-heal.sh AND
+## collectively archieves the job of healing the 'limit-set' xattr upon
+## add-brick to the gluster volume.
+##
+## This script is the 'controlling' script. Upon add-brick this script enables
+## the corresponding script based on the status of the volume.
+## If volume is started - enable add-brick/post script
+## else - enable start/post script.
+##
+## The enabling and disabling of a script is based on the glusterd's logic,
+## that it only runs the scripts which starts its name with 'S'. So,
+## Enable - symlink the file to 'S'*.
+## Disable- unlink symlink
+## ----------------------------------------------------------------------------
+###############################################################################
+
+OPTSPEC="volname:,version:,gd-workdir:,volume-op:"
+PROGNAME="Quota-xattr-heal-add-brick-pre"
+VOL_NAME=
+GLUSTERD_WORKDIR=
+VOLUME_OP=
+VERSION=
+ENABLED_NAME_PREFIX="S28"
+ENABLED_NAME="Quota-root-xattr-heal.sh"
+DISABLED_NAME="disabled-quota-root-xattr-heal.sh"
+
+activate ()
+{
+ ln -sf $DISABLED_STATE $1;
+}
+
+##------------------------------------------
+## Parse the arguments
+##------------------------------------------
+ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
+eval set -- "$ARGS"
+
+while true;
+do
+ case $1 in
+ --volname)
+ shift
+ VOL_NAME=$1
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ --volume-op)
+ shift
+ VOLUME_OP=$1
+ ;;
+ --version)
+ shift
+ VERSION=$1
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+done
+##----------------------------------------
+
+DISABLED_STATE="$GLUSTERD_WORKDIR/hooks/$VERSION/add-brick/post/$DISABLED_NAME"
+ENABLED_STATE_START="$GLUSTERD_WORKDIR/hooks/$VERSION/start/post/""$ENABLED_NAME_PREFIX$VOL_NAME""-""$ENABLED_NAME"
+ENABLED_STATE_ADD_BRICK="$GLUSTERD_WORKDIR/hooks/$VERSION/add-brick/post/""$ENABLED_NAME_PREFIX""$VOL_NAME""-""$ENABLED_NAME";
+
+## Why to proceed if the required script itself is not present?
+ls $DISABLED_STATE;
+if [ 0 -ne $? ]
+then
+ exit $?;
+fi
+
+## Is quota enabled?
+FLAG=`cat $GLUSTERD_WORKDIR/vols/$VOL_NAME/info | grep "^features.quota=" \
+ | awk -F'=' '{print $NF}'`;
+if [ "$FLAG" != "on" ]
+then
+ exit $EXIT_SUCCESS;
+fi
+
+## Is volume started?
+FLAG=`cat $GLUSTERD_WORKDIR/vols/$VOL_NAME/info | grep "^status=" \
+ | awk -F'=' '{print $NF}'`;
+if [ "$FLAG" != "1" ]
+then
+ activate $ENABLED_STATE_START;
+ exit $?
+fi
+
+activate $ENABLED_STATE_ADD_BRICK;
+exit $?
diff --git a/extras/hook-scripts/create/Makefile.am b/extras/hook-scripts/create/Makefile.am
new file mode 100644
index 00000000000..b083a9145d6
--- /dev/null
+++ b/extras/hook-scripts/create/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = post
diff --git a/extras/hook-scripts/create/post/Makefile.am b/extras/hook-scripts/create/post/Makefile.am
new file mode 100644
index 00000000000..fd1892e9589
--- /dev/null
+++ b/extras/hook-scripts/create/post/Makefile.am
@@ -0,0 +1,8 @@
+EXTRA_DIST = S10selinux-label-brick.sh
+
+scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/create/post/
+if WITH_SERVER
+if USE_SELINUX
+scripts_SCRIPTS = S10selinux-label-brick.sh
+endif
+endif
diff --git a/extras/hook-scripts/create/post/S10selinux-label-brick.sh b/extras/hook-scripts/create/post/S10selinux-label-brick.sh
new file mode 100755
index 00000000000..f9b4b1a57e3
--- /dev/null
+++ b/extras/hook-scripts/create/post/S10selinux-label-brick.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# Install to hooks/<HOOKS_VER>/create/post
+#
+# Add an SELinux file context for each brick using the glusterd_brick_t type.
+# This ensures that the brick is relabeled correctly on an SELinux restart or
+# restore. Subsequently, run a restore on the brick path to set the selinux
+# labels.
+#
+###
+
+PROGNAME="Sselinux"
+OPTSPEC="volname:"
+VOL=
+
+parse_args () {
+ ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
+ eval set -- "${ARGS}"
+
+ while true; do
+ case ${1} in
+ --volname)
+ shift
+ VOL=${1}
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+ done
+}
+
+set_brick_labels()
+{
+ volname="${1}"
+
+ # grab the path for each local brick
+ brickpath="/var/lib/glusterd/vols/${volname}/bricks/"
+ brickdirs=$(
+ find "${brickpath}" -type f -exec grep '^path=' {} \; | \
+ cut -d= -f 2 | \
+ sort -u
+ )
+
+ for b in ${brickdirs}; do
+ # Add a file context for each brick path and associate with the
+ # glusterd_brick_t SELinux type.
+ pattern="${b}(/.*)?"
+ semanage fcontext --add -t glusterd_brick_t -r s0 "${pattern}"
+ # Set the labels on the new brick path.
+ restorecon -R "${b}"
+ done
+}
+
+SELINUX_STATE=$(which getenforce && getenforce)
+[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
+
+parse_args "$@"
+[ -z "${VOL}" ] && exit 1
+
+set_brick_labels "${VOL}"
+
+exit 0
diff --git a/extras/hook-scripts/delete/Makefile.am b/extras/hook-scripts/delete/Makefile.am
new file mode 100644
index 00000000000..c98a05d9205
--- /dev/null
+++ b/extras/hook-scripts/delete/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = pre
diff --git a/extras/hook-scripts/delete/pre/Makefile.am b/extras/hook-scripts/delete/pre/Makefile.am
new file mode 100644
index 00000000000..4fbfbe7311f
--- /dev/null
+++ b/extras/hook-scripts/delete/pre/Makefile.am
@@ -0,0 +1,8 @@
+EXTRA_DIST = S10selinux-del-fcontext.sh
+
+scriptsdir = $(GLUSTERD_WORKDIR)/hooks/1/delete/pre/
+if WITH_SERVER
+if USE_SELINUX
+scripts_SCRIPTS = S10selinux-del-fcontext.sh
+endif
+endif
diff --git a/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
new file mode 100755
index 00000000000..056b52afe76
--- /dev/null
+++ b/extras/hook-scripts/delete/pre/S10selinux-del-fcontext.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+#
+# Install to hooks/<HOOKS_VER>/delete/pre
+#
+# Delete the file context associated with the brick path on volume deletion. The
+# associated file context was added during volume creation.
+#
+# We do not explicitly relabel the brick, as this could be time consuming and
+# unnecessary.
+#
+###
+
+PROGNAME="Sselinux"
+OPTSPEC="volname:"
+VOL=
+
+function parse_args () {
+ ARGS=$(getopt -o '' -l ${OPTSPEC} -n ${PROGNAME} -- "$@")
+ eval set -- "${ARGS}"
+
+ while true; do
+ case ${1} in
+ --volname)
+ shift
+ VOL=${1}
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+ done
+}
+
+function delete_brick_fcontext()
+{
+ local volname=$1
+ local fctx
+ local list=()
+
+ fctx="$(semanage fcontext --list -C)"
+ # grab the path for each local brick
+ brickpath="/var/lib/glusterd/vols/${volname}/bricks/"
+ brickdirs=$(find "${brickpath}" -type f -exec grep '^path=' {} \; | \
+ cut -d= -f 2 | sort -u)
+ for b in ${brickdirs}
+ do
+ pattern="${b}(/.*)?"
+ echo "${fctx}" | grep "^${pattern}\s" >/dev/null
+ if [[ $? -eq 0 ]]; then
+ list+=("${pattern}")
+ fi
+ done
+ if [[ ${#list[@]} -gt 0 ]]; then
+ printf 'fcontext --delete %s\n' "${list[@]}" | semanage -i -
+ fi
+ for b in ${brickdirs}
+ do
+ restorecon -R "${b}"
+ done
+}
+
+SELINUX_STATE=$(which getenforce && getenforce)
+[ "${SELINUX_STATE}" = 'Disabled' ] && exit 0
+
+parse_args "$@"
+[ -z "${VOL}" ] && exit 1
+
+delete_brick_fcontext "${VOL}"
+
+# failure to delete the fcontext is not fatal
+exit 0
diff --git a/extras/hook-scripts/reset/Makefile.am b/extras/hook-scripts/reset/Makefile.am
new file mode 100644
index 00000000000..6e2701e909e
--- /dev/null
+++ b/extras/hook-scripts/reset/Makefile.am
@@ -0,0 +1,2 @@
+SUBDIRS = post pre
+CLEANFILES =
diff --git a/extras/hook-scripts/reset/post/Makefile.am b/extras/hook-scripts/reset/post/Makefile.am
new file mode 100644
index 00000000000..1b336ac1a85
--- /dev/null
+++ b/extras/hook-scripts/reset/post/Makefile.am
@@ -0,0 +1 @@
+EXTRA_DIST =
diff --git a/extras/hook-scripts/reset/pre/Makefile.am b/extras/hook-scripts/reset/pre/Makefile.am
new file mode 100644
index 00000000000..1b336ac1a85
--- /dev/null
+++ b/extras/hook-scripts/reset/pre/Makefile.am
@@ -0,0 +1 @@
+EXTRA_DIST =
diff --git a/extras/hook-scripts/set/Makefile.am b/extras/hook-scripts/set/Makefile.am
new file mode 100644
index 00000000000..1fcade4b07f
--- /dev/null
+++ b/extras/hook-scripts/set/Makefile.am
@@ -0,0 +1,2 @@
+SUBDIRS = post
+CLEANFILES =
diff --git a/extras/hook-scripts/set/post/Makefile.am b/extras/hook-scripts/set/post/Makefile.am
new file mode 100644
index 00000000000..506a25a8666
--- /dev/null
+++ b/extras/hook-scripts/set/post/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_DIST = S30samba-set.sh S32gluster_enable_shared_storage.sh
+
+hookdir = $(GLUSTERD_WORKDIR)/hooks/1/set/post/
+if WITH_SERVER
+hook_SCRIPTS = $(EXTRA_DIST)
+endif
diff --git a/extras/hook-scripts/set/post/S30samba-set.sh b/extras/hook-scripts/set/post/S30samba-set.sh
new file mode 100755
index 00000000000..854f131f6c8
--- /dev/null
+++ b/extras/hook-scripts/set/post/S30samba-set.sh
@@ -0,0 +1,161 @@
+#!/bin/bash
+
+#Need to be copied to hooks/<HOOKS_VER>/set/post/
+
+#TODO: All gluster and samba paths are assumed for fedora like systems.
+#Some efforts are required to make it work on other distros.
+
+#The preferred way of creating a smb share of a gluster volume has changed.
+#The old method was to create a fuse mount of the volume and share the mount
+#point through samba.
+#
+#New method eliminates the requirement of fuse mount and changes in fstab.
+#glusterfs_vfs plugin for samba makes call to libgfapi to access the volume.
+#
+#This hook script enables user to enable or disable smb share by volume set
+#option. Keys "user.cifs" and "user.smb" both are valid, but user.smb is
+#preferred.
+
+
+PROGNAME="Ssamba-set"
+OPTSPEC="volname:,gd-workdir:"
+VOL=
+CONFIGFILE=
+LOGFILEBASE=
+PIDDIR=
+GLUSTERD_WORKDIR=
+USERSMB_SET=""
+USERCIFS_SET=""
+
+function parse_args () {
+ ARGS=$(getopt -o 'o:' -l $OPTSPEC -n $PROGNAME -- "$@")
+ eval set -- "$ARGS"
+
+ while true; do
+ case $1 in
+ --volname)
+ shift
+ VOL=$1
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ --)
+ shift
+ break
+ ;;
+ -o)
+ shift
+ read key value < <(echo "$1" | tr "=" " ")
+ case "$key" in
+ "user.cifs")
+ USERCIFS_SET="YES"
+ ;;
+ "user.smb")
+ USERSMB_SET="YES"
+ ;;
+ *)
+ ;;
+ esac
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+ done
+}
+
+function find_config_info () {
+ cmdout=`smbd -b | grep smb.conf`
+ if [ $? -ne 0 ]; then
+ echo "Samba is not installed"
+ exit 1
+ fi
+ CONFIGFILE=`echo $cmdout | awk '{print $2}'`
+ PIDDIR=`smbd -b | grep PIDDIR | awk '{print $2}'`
+ LOGFILEBASE=`smbd -b | grep 'LOGFILEBASE' | awk '{print $2}'`
+}
+
+function add_samba_share () {
+ volname=$1
+ STRING="\n[gluster-$volname]\n"
+ STRING+="comment = For samba share of volume $volname\n"
+ STRING+="vfs objects = glusterfs\n"
+ STRING+="glusterfs:volume = $volname\n"
+ STRING+="glusterfs:logfile = $LOGFILEBASE/glusterfs-$volname.%%M.log\n"
+ STRING+="glusterfs:loglevel = 7\n"
+ STRING+="path = /\n"
+ STRING+="read only = no\n"
+ STRING+="kernel share modes = no\n"
+ printf "$STRING" >> ${CONFIGFILE}
+}
+
+function sighup_samba () {
+ pid=`cat ${PIDDIR}/smbd.pid`
+ if [ "x$pid" != "x" ]
+ then
+ kill -HUP "$pid";
+ else
+ service smb condrestart
+ fi
+}
+
+function deactivate_samba_share () {
+ volname=$1
+ sed -i -e '/^\[gluster-'"$volname"'\]/{ :a' -e 'n; /available = no/H; /^$/!{$!ba;}; x; /./!{ s/^/available = no/; $!{G;x}; $H; }; s/.*//; x; };' ${CONFIGFILE}
+}
+
+function is_volume_started () {
+ volname=$1
+ echo "$(grep status $GLUSTERD_WORKDIR/vols/"$volname"/info |\
+ cut -d"=" -f2)"
+}
+
+function get_smb () {
+ volname=$1
+ uservalue=
+
+ usercifsvalue=$(grep user.cifs $GLUSTERD_WORKDIR/vols/"$volname"/info |\
+ cut -d"=" -f2)
+ usersmbvalue=$(grep user.smb $GLUSTERD_WORKDIR/vols/"$volname"/info |\
+ cut -d"=" -f2)
+
+ if [ -n "$usercifsvalue" ]; then
+ if [ "$usercifsvalue" = "disable" ] || [ "$usercifsvalue" = "off" ]; then
+ uservalue="disable"
+ fi
+ fi
+
+ if [ -n "$usersmbvalue" ]; then
+ if [ "$usersmbvalue" = "disable" ] || [ "$usersmbvalue" = "off" ]; then
+ uservalue="disable"
+ fi
+ fi
+
+ echo "$uservalue"
+}
+
+parse_args "$@"
+if [ "0" = "$(is_volume_started "$VOL")" ]; then
+ exit 0
+fi
+
+
+if [ "$USERCIFS_SET" = "YES" ] || [ "$USERSMB_SET" = "YES" ]; then
+ #Find smb.conf, smbd pid directory and smbd logfile path
+ find_config_info
+
+ if [ "$(get_smb "$VOL")" = "disable" ]; then
+ deactivate_samba_share $VOL
+ else
+ if ! grep --quiet "\[gluster-$VOL\]" ${CONFIGFILE} ; then
+ add_samba_share $VOL
+ else
+ sed -i '/\[gluster-'"$VOL"'\]/,/^$/!b;/available = no/d' ${CONFIGFILE}
+ fi
+ fi
+ sighup_samba
+fi
diff --git a/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
new file mode 100755
index 00000000000..1f2564b44ff
--- /dev/null
+++ b/extras/hook-scripts/set/post/S32gluster_enable_shared_storage.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+
+key=`echo $3 | cut -d '=' -f 1`
+val=`echo $3 | cut -d '=' -f 2`
+if [ "$key" != "cluster.enable-shared-storage" ] && [ "$key" != "enable-shared-storage" ]; then
+ exit;
+fi
+if [ "$val" != 'enable' ]; then
+ if [ "$val" != 'disable' ]; then
+ exit;
+ fi
+fi
+
+option=$val
+
+key_val_pair1=`echo $4 | cut -d ',' -f 1`
+key_val_pair2=`echo $4 | cut -d ',' -f 2`
+
+key=`echo $key_val_pair1 | cut -d '=' -f 1`
+val=`echo $key_val_pair1 | cut -d '=' -f 2`
+if [ "$key" != "is_originator" ]; then
+ exit;
+fi
+is_originator=$val;
+
+key=`echo $key_val_pair2 | cut -d '=' -f 1`
+val=`echo $key_val_pair2 | cut -d '=' -f 2`
+if [ "$key" != "local_node_hostname" ]; then
+ exit;
+fi
+local_node_hostname=$val;
+
+# Read gluster peer status to find the peers
+# which are in 'Peer in Cluster' mode and
+# are connected.
+
+number_of_connected_peers=0
+while read -r line
+do
+ # Already got two connected peers. Including the current node
+ # we have 3 peers which is enough to create a shared storage
+ # with replica 3
+ if [ "$number_of_connected_peers" == "2" ]; then
+ break;
+ fi
+
+ key=`echo $line | cut -d ':' -f 1`
+ if [ "$key" == "Hostname" ]; then
+ hostname=`echo $line | cut -d ':' -f 2 | xargs`
+ fi
+
+ if [ "$key" == "State" ]; then
+ peer_state=`echo $line | cut -d ':' -f 2 | cut -d '(' -f 1 | xargs`
+ conn_state=`echo $line | cut -d '(' -f 2 | cut -d ')' -f 1 | xargs`
+
+ if [ "$peer_state" == "Peer in Cluster" ]; then
+ if [ "$conn_state" == "Connected" ]; then
+ ((number_of_connected_peers++))
+ connected_peer[$number_of_connected_peers]=$hostname
+ fi
+ fi
+ fi
+
+done < <(gluster peer status)
+
+# Include current node in connected peer list
+((number_of_connected_peers++))
+connected_peer[$number_of_connected_peers]=$local_node_hostname
+
+# forming the create vol command
+create_cmd="gluster --mode=script --wignore volume create \
+ gluster_shared_storage replica $number_of_connected_peers"
+
+# Adding the brick names in the command
+for i in "${connected_peer[@]}"
+do
+ create_cmd=$create_cmd" "$i:"$GLUSTERD_WORKDIR"/ss_brick
+done
+
+if [ "$option" == "disable" ]; then
+ # Unmount the volume on all the nodes
+ umount /run/gluster/shared_storage
+ cat /etc/fstab | grep -v "gluster_shared_storage /run/gluster/shared_storage/" > /run/gluster/fstab.tmp
+ mv /run/gluster/fstab.tmp /etc/fstab
+fi
+
+if [ "$is_originator" == 1 ]; then
+ if [ "$option" == "enable" ]; then
+ # Create and start the volume
+ $create_cmd
+ gluster --mode=script --wignore volume start gluster_shared_storage
+ fi
+
+ if [ "$option" == "disable" ]; then
+ # Stop and delete the volume
+ gluster --mode=script --wignore volume stop gluster_shared_storage
+ gluster --mode=script --wignore volume delete gluster_shared_storage
+ fi
+fi
+
+function check_volume_status()
+{
+ status=`gluster volume info gluster_shared_storage | grep Status | cut -d ':' -f 2 | xargs`
+ echo $status
+}
+
+key=`echo $5 | cut -d '=' -f 1`
+val=`echo $5 | cut -d '=' -f 2`
+if [ "$key" == "transport.address-family" ]; then
+ mount_cmd="mount -t glusterfs -o xlator-option=transport.address-family=inet6 \
+ $local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage"
+else
+ mount_cmd="mount -t glusterfs $local_node_hostname:/gluster_shared_storage \
+ /run/gluster/shared_storage"
+fi
+
+if [ "$option" == "enable" ]; then
+ retry=0;
+ # Wait for volume to start before mounting
+ status=$(check_volume_status)
+ while [ "$status" != "Started" ]; do
+ sleep 5;
+ ((retry++))
+ if [ "$retry" == 3 ]; then
+ break;
+ fi
+ status=$(check_volume_status)
+ done
+ # Mount the volume on all the nodes
+ umount /run/gluster/shared_storage
+ mkdir -p /run/gluster/shared_storage
+ $mount_cmd
+ cp /etc/fstab /run/gluster/fstab.tmp
+ echo "$local_node_hostname:/gluster_shared_storage /run/gluster/shared_storage/ glusterfs defaults 0 0" >> /run/gluster/fstab.tmp
+ mv /run/gluster/fstab.tmp /etc/fstab
+fi
diff --git a/extras/hook-scripts/start/Makefile.am b/extras/hook-scripts/start/Makefile.am
new file mode 100644
index 00000000000..1fcade4b07f
--- /dev/null
+++ b/extras/hook-scripts/start/Makefile.am
@@ -0,0 +1,2 @@
+SUBDIRS = post
+CLEANFILES =
diff --git a/extras/hook-scripts/start/post/Makefile.am b/extras/hook-scripts/start/post/Makefile.am
new file mode 100644
index 00000000000..792019d3c9f
--- /dev/null
+++ b/extras/hook-scripts/start/post/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_DIST = S29CTDBsetup.sh S30samba-start.sh S31ganesha-start.sh
+
+hookdir = $(GLUSTERD_WORKDIR)/hooks/1/start/post/
+if WITH_SERVER
+hook_SCRIPTS = $(EXTRA_DIST)
+endif
diff --git a/extras/hook-scripts/start/post/S29CTDBsetup.sh b/extras/hook-scripts/start/post/S29CTDBsetup.sh
new file mode 100755
index 00000000000..69a0d89a3eb
--- /dev/null
+++ b/extras/hook-scripts/start/post/S29CTDBsetup.sh
@@ -0,0 +1,84 @@
+#! /bin/bash
+#
+# - The script mounts the 'meta-vol' on start 'event' on a known
+# directory (eg. /gluster/lock)
+# - P.S: There are other 'tasks' that need to be done outside this script
+# to get CTDB based failover up and running.
+
+CTDB_MNT=/gluster/lock
+# Make sure ping-timeout is not default for CTDB volume
+PING_TIMEOUT_SECS=10
+PROGNAME="ctdb"
+OPTSPEC="volname:,gd-workdir:,version:,volume-op:,first:"
+HOSTNAME=`hostname`
+MNTOPTS="_netdev,transport=tcp,xlator-option=*client*.ping-timeout=${PING_TIMEOUT_SECS}"
+VOL=
+GLUSTERD_WORKDIR=
+VERSION=
+VOLUME_OP=
+FIRST=
+# $META is the volume that will be used by CTDB as a shared filesystem.
+# It is not desirable to use this volume for storing 'data' as well.
+# META is set to 'all' (viz. a keyword and hence not a legal volume name)
+# to prevent the script from running for volumes it was not intended.
+# User needs to set META to the volume that serves CTDB lockfile.
+META="all"
+
+function parse_args () {
+ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
+ eval set -- "$ARGS"
+
+ while true; do
+ case $1 in
+ --volname)
+ shift
+ VOL=$1
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ --version)
+ shift
+ VERSION=$1
+ ;;
+ --volume-op)
+ shift
+ VOLUME_OP=$1
+ ;;
+ --first)
+ shift
+ FIRST=$1
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+
+ shift
+ done
+}
+
+function add_fstab_entry () {
+ volname=$1
+ mntpt=$2
+ mntopts="${MNTOPTS}"
+
+ mntent="${HOSTNAME}:/${volname} ${mntpt} glusterfs ${mntopts} 0 0"
+ exists=`grep "${mntpt}" /etc/fstab`
+ if [ "$exists" == "" ]
+ then
+ echo "${mntent}" >> /etc/fstab
+ fi
+}
+
+parse_args "$@"
+if [ "$META" = "$VOL" ]
+then
+ mkdir -p $CTDB_MNT
+ sleep 5
+ mount -t glusterfs -o${MNTOPTS} ${HOSTNAME}:/$VOL "$CTDB_MNT" && \
+ add_fstab_entry $VOL $CTDB_MNT
+ chkconfig ctdb on
+fi
diff --git a/extras/hook-scripts/start/post/S30samba-start.sh b/extras/hook-scripts/start/post/S30samba-start.sh
new file mode 100755
index 00000000000..cac0cbf1464
--- /dev/null
+++ b/extras/hook-scripts/start/post/S30samba-start.sh
@@ -0,0 +1,145 @@
+#!/bin/bash
+
+#Need to be copied to hooks/<HOOKS_VER>/start/post
+
+#TODO: All gluster and samba paths are assumed for fedora like systems.
+#Some efforts are required to make it work on other distros.
+
+#The preferred way of creating a smb share of a gluster volume has changed.
+#The old method was to create a fuse mount of the volume and share the mount
+#point through samba.
+#
+#New method eliminates the requirement of fuse mount and changes in fstab.
+#glusterfs_vfs plugin for samba makes call to libgfapi to access the volume.
+#
+#This hook script automagically creates shares for volume on every volume start
+#event by adding the entries in smb.conf file and sending SIGHUP to samba.
+#
+#In smb.conf:
+#glusterfs vfs plugin has to be specified as required vfs object.
+#Path value is relative to the root of gluster volume;"/" signifies complete
+#volume.
+
+PROGNAME="Ssamba-start"
+OPTSPEC="volname:,gd-workdir:,version:,volume-op:,first:"
+VOL=
+CONFIGFILE=
+LOGFILEBASE=
+PIDDIR=
+GLUSTERD_WORKDIR=
+VERSION=
+VOLUME_OP=
+FIRST=
+
+function parse_args () {
+ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
+ eval set -- "$ARGS"
+
+ while true; do
+ case $1 in
+ --volname)
+ shift
+ VOL=$1
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ --version)
+ shift
+ VERSION=$1
+ ;;
+ --volume-op)
+ shift
+ VOLUME_OP=$1
+ ;;
+ --first)
+ shift
+ FIRST=$1
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+
+ shift
+ done
+}
+
+function find_config_info () {
+ cmdout=$(smbd -b 2> /dev/null)
+ CONFIGFILE=$(echo "$cmdout" | grep CONFIGFILE | awk '{print $2}')
+ if [ -z "$CONFIGFILE" ]; then
+ echo "Samba is not installed"
+ exit 1
+ fi
+ PIDDIR=$(echo "$cmdout" | grep PIDDIR | awk '{print $2}')
+ LOGFILEBASE=$(echo "$cmdout" | grep 'LOGFILEBASE' | awk '{print $2}')
+}
+
+function add_samba_share () {
+ volname=$1
+ STRING="\n[gluster-$volname]\n"
+ STRING+="comment = For samba share of volume $volname\n"
+ STRING+="vfs objects = glusterfs\n"
+ STRING+="glusterfs:volume = $volname\n"
+ STRING+="glusterfs:logfile = $LOGFILEBASE/glusterfs-$volname.%%M.log\n"
+ STRING+="glusterfs:loglevel = 7\n"
+ STRING+="path = /\n"
+ STRING+="read only = no\n"
+ STRING+="kernel share modes = no\n"
+ printf "$STRING" >> "${CONFIGFILE}"
+}
+
+function sighup_samba () {
+ pid=$(cat "${PIDDIR}/smbd.pid" 2> /dev/null)
+ if [ "x$pid" != "x" ]
+ then
+ kill -HUP "$pid";
+ else
+ service smb condrestart
+ fi
+}
+
+function get_smb () {
+ volname=$1
+ uservalue=
+
+ usercifsvalue=$(grep user.cifs "$GLUSTERD_WORKDIR"/vols/"$volname"/info |\
+ cut -d"=" -f2)
+ usersmbvalue=$(grep user.smb "$GLUSTERD_WORKDIR"/vols/"$volname"/info |\
+ cut -d"=" -f2)
+
+ if [ -n "$usercifsvalue" ]; then
+ if [ "$usercifsvalue" = "enable" ] || [ "$usercifsvalue" = "on" ]; then
+ uservalue="enable"
+ fi
+ fi
+
+ if [ -n "$usersmbvalue" ]; then
+ if [ "$usersmbvalue" = "enable" ] || [ "$usersmbvalue" = "on" ]; then
+ uservalue="enable"
+ fi
+ fi
+
+ echo "$uservalue"
+}
+
+parse_args "$@"
+
+value=$(get_smb "$VOL")
+
+if [ -z "$value" ] || [ "$value" != "enable" ]; then
+ exit 0
+fi
+
+#Find smb.conf, smbd pid directory and smbd logfile path
+find_config_info
+
+if ! grep --quiet "\[gluster-$VOL\]" "${CONFIGFILE}" ; then
+ add_samba_share "$VOL"
+else
+ sed -i '/\[gluster-'"$VOL"'\]/,/^$/!b;/available = no/d' "${CONFIGFILE}"
+fi
+sighup_samba
diff --git a/extras/hook-scripts/start/post/S31ganesha-start.sh b/extras/hook-scripts/start/post/S31ganesha-start.sh
new file mode 100755
index 00000000000..7ad6f23ad06
--- /dev/null
+++ b/extras/hook-scripts/start/post/S31ganesha-start.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+PROGNAME="Sganesha-start"
+OPTSPEC="volname:,gd-workdir:"
+VOL=
+declare -i EXPORT_ID
+ganesha_key="ganesha.enable"
+GANESHA_DIR="/run/gluster/shared_storage/nfs-ganesha"
+CONF1="$GANESHA_DIR/ganesha.conf"
+GLUSTERD_WORKDIR=
+
+function parse_args ()
+{
+ ARGS=$(getopt -l $OPTSPEC -o "o" -name $PROGNAME $@)
+ eval set -- "$ARGS"
+
+ while true; do
+ case $1 in
+ --volname)
+ shift
+ VOL=$1
+ ;;
+ --gd-workdir)
+ shift
+ GLUSTERD_WORKDIR=$1
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+ done
+}
+
+
+
+#This function generates a new export entry as export.volume_name.conf
+function write_conf()
+{
+echo -e "# WARNING : Using Gluster CLI will overwrite manual
+# changes made to this file. To avoid it, edit the
+# file, copy it over to all the NFS-Ganesha nodes
+# and run ganesha-ha.sh --refresh-config."
+
+echo "EXPORT{"
+echo " Export_Id = 2;"
+echo " Path = \"/$VOL\";"
+echo " FSAL {"
+echo " name = \"GLUSTER\";"
+echo " hostname=\"localhost\";"
+echo " volume=\"$VOL\";"
+echo " }"
+echo " Access_type = RW;"
+echo " Disable_ACL = true;"
+echo " Squash=\"No_root_squash\";"
+echo " Pseudo=\"/$VOL\";"
+echo " Protocols = \"3\", \"4\" ;"
+echo " Transports = \"UDP\",\"TCP\";"
+echo " SecType = \"sys\";"
+echo "}"
+}
+
+#It adds the export dynamically by sending dbus signals
+function export_add()
+{
+ dbus-send --print-reply --system --dest=org.ganesha.nfsd \
+/org/ganesha/nfsd/ExportMgr org.ganesha.nfsd.exportmgr.AddExport \
+string:$GANESHA_DIR/exports/export.$VOL.conf string:"EXPORT(Export_Id=$EXPORT_ID)"
+
+}
+
+# based on src/scripts/ganeshactl/Ganesha/export_mgr.py
+function is_exported()
+{
+ local volume="${1}"
+
+ dbus-send --type=method_call --print-reply --system \
+ --dest=org.ganesha.nfsd /org/ganesha/nfsd/ExportMgr \
+ org.ganesha.nfsd.exportmgr.ShowExports \
+ | grep -w -q "/${volume}"
+
+ return $?
+}
+
+# Check the info file (contains the volume options) to see if Ganesha is
+# enabled for this volume.
+function ganesha_enabled()
+{
+ local volume="${1}"
+ local info_file="${GLUSTERD_WORKDIR}/vols/${VOL}/info"
+ local enabled="off"
+
+ enabled=$(grep -w ${ganesha_key} ${info_file} | cut -d"=" -f2)
+
+ [ "${enabled}" == "on" ]
+
+ return $?
+}
+
+parse_args $@
+
+if ganesha_enabled ${VOL} && ! is_exported ${VOL}
+then
+ if [ ! -e ${GANESHA_DIR}/exports/export.${VOL}.conf ]
+ then
+ #Remove export entry from nfs-ganesha.conf
+ sed -i /$VOL.conf/d $CONF1
+ write_conf ${VOL} > ${GANESHA_DIR}/exports/export.${VOL}.conf
+ EXPORT_ID=`cat $GANESHA_DIR/.export_added`
+ EXPORT_ID=EXPORT_ID+1
+ echo $EXPORT_ID > $GANESHA_DIR/.export_added
+ sed -i s/Export_Id.*/"Export_Id=$EXPORT_ID;"/ \
+ $GANESHA_DIR/exports/export.$VOL.conf
+ echo "%include \"$GANESHA_DIR/exports/export.$VOL.conf\"" >> $CONF1
+ else
+ EXPORT_ID=$(grep ^[[:space:]]*Export_Id $GANESHA_DIR/exports/export.$VOL.conf |\
+ awk -F"[=,;]" '{print $2}' | tr -d '[[:space:]]')
+ fi
+ export_add $VOL
+fi
+
+exit 0
diff --git a/extras/hook-scripts/stop/Makefile.am b/extras/hook-scripts/stop/Makefile.am
new file mode 100644
index 00000000000..e2ac8e2740b
--- /dev/null
+++ b/extras/hook-scripts/stop/Makefile.am
@@ -0,0 +1,2 @@
+SUBDIRS = pre
+CLEANFILES =
diff --git a/extras/hook-scripts/stop/pre/Makefile.am b/extras/hook-scripts/stop/pre/Makefile.am
new file mode 100644
index 00000000000..9e8d1565e93
--- /dev/null
+++ b/extras/hook-scripts/stop/pre/Makefile.am
@@ -0,0 +1,6 @@
+EXTRA_DIST = S29CTDB-teardown.sh S30samba-stop.sh
+
+hookdir = $(GLUSTERD_WORKDIR)/hooks/1/stop/pre/
+if WITH_SERVER
+hook_SCRIPTS = $(EXTRA_DIST)
+endif
diff --git a/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh b/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh
new file mode 100755
index 00000000000..0975a00f18d
--- /dev/null
+++ b/extras/hook-scripts/stop/pre/S29CTDB-teardown.sh
@@ -0,0 +1,62 @@
+#! /bin/bash
+
+CTDB_MNT=/gluster/lock
+PROGNAME="ctdb"
+OPTSPEC="volname:,last:"
+VOL=
+LAST=
+# $META is the volume that will be used by CTDB as a shared filesystem.
+# It is not desirable to use this volume for storing 'data' as well.
+# META is set to 'all' (viz. a keyword and hence not a legal volume name)
+# to prevent the script from running for volumes it was not intended.
+# User needs to set META to the volume that serves CTDB lockfile.
+META="all"
+
+function parse_args () {
+ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
+ eval set -- "$ARGS"
+
+ while true; do
+ case $1 in
+ --volname)
+ shift
+ VOL=$1
+ ;;
+ --last)
+ shift
+ LAST=$1
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+ shift
+ done
+}
+
+
+function remove_fstab_entry () {
+ mntpt=$1
+ fstab="/etc/fstab"
+ exists=`grep "$mntpt" ${fstab}`
+ esc_mntpt=$(echo -e $mntpt | sed 's/\//\\\//g')
+ if [ "$exists" != " " ]
+ then
+ sed -i /"$esc_mntpt"/d $fstab
+ exists=`grep "$mntpt" ${fstab}`
+ if [ "$exists" != " " ]
+ then
+ echo "fstab entry cannot be removed for unknown reason"
+ exit 1
+ fi
+ fi
+}
+
+parse_args "$@"
+if [ "$META" = "$VOL" ]
+then
+ umount "$CTDB_MNT"
+ chkconfig ctdb off
+ remove_fstab_entry $CTDB_MNT
+fi
diff --git a/extras/hook-scripts/stop/pre/S30samba-stop.sh b/extras/hook-scripts/stop/pre/S30samba-stop.sh
new file mode 100755
index 00000000000..ea799381d62
--- /dev/null
+++ b/extras/hook-scripts/stop/pre/S30samba-stop.sh
@@ -0,0 +1,77 @@
+#! /bin/bash
+
+#Need to be copied to hooks/<HOOKS_VER>/stop/pre
+
+#TODO: All gluster and samba paths are assumed for fedora like systems.
+#Some efforts are required to make it work on other distros.
+
+#The preferred way of creating a smb share of a gluster volume has changed.
+#The old method was to create a fuse mount of the volume and share the mount
+#point through samba.
+#
+#New method eliminates the requirement of fuse mount and changes in fstab.
+#glusterfs_vfs plugin for samba makes call to libgfapi to access the volume.
+#
+#This hook script automagically removes shares for volume on every volume stop
+#event by removing the volume related entries(if any) in smb.conf file.
+
+PROGNAME="Ssamba-stop"
+OPTSPEC="volname:,last:"
+VOL=
+CONFIGFILE=
+PIDDIR=
+LAST=
+
+function parse_args () {
+ ARGS=$(getopt -o '' -l $OPTSPEC -n $PROGNAME -- "$@")
+ eval set -- "$ARGS"
+
+ while true; do
+ case $1 in
+ --volname)
+ shift
+ VOL=$1
+ ;;
+ --last)
+ shift
+ LAST=$1
+ ;;
+ *)
+ shift
+ break
+ ;;
+ esac
+
+ shift
+ done
+}
+
+function find_config_info () {
+ cmdout=`smbd -b | grep smb.conf`
+ if [ $? -ne 0 ];then
+ echo "Samba is not installed"
+ exit 1
+ fi
+ CONFIGFILE=`echo $cmdout | awk '{print $2}'`
+ PIDDIR=`smbd -b | grep PIDDIR | awk '{print $2}'`
+}
+
+function deactivate_samba_share () {
+ volname=$1
+ sed -i -e '/^\[gluster-'"$volname"'\]/{ :a' -e 'n; /available = no/H; /^$/!{$!ba;}; x; /./!{ s/^/available = no/; $!{G;x}; $H; }; s/.*//; x; };' ${CONFIGFILE}
+}
+
+function sighup_samba () {
+ pid=`cat ${PIDDIR}/smbd.pid`
+ if [ "x$pid" != "x" ]
+ then
+ kill -HUP $pid;
+ else
+ service smb condrestart
+ fi
+}
+
+parse_args "$@"
+find_config_info
+deactivate_samba_share $VOL
+sighup_samba
diff --git a/extras/identify-hangs.sh b/extras/identify-hangs.sh
new file mode 100755
index 00000000000..ebc6bf144aa
--- /dev/null
+++ b/extras/identify-hangs.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+function get_statedump_fnames_without_timestamps
+{
+ ls | grep -E "[.]dump[.][0-9][0-9]*" | cut -f1-3 -d'.' | sort -u
+}
+
+function get_non_uniq_fields
+{
+ local statedump_fname_prefix=$1
+ print_stack_lkowner_unique_in_one_line "$statedump_fname_prefix" | sort | uniq -c | grep -vE "^\s*1 " | awk '{$1="repeats="$1; print $0}'
+}
+
+function print_stack_lkowner_unique_in_one_line
+{
+ local statedump_fname_prefix=$1
+ sed -e '/./{H;$!d;}' -e 'x;/unique=/!d;/stack=/!d;/lk-owner=/!d;/pid=/!d;' "${statedump_fname_prefix}"* | grep -E "(stack|lk-owner|unique|pid)=" | paste -d " " - - - -
+}
+
+function get_stacks_that_appear_in_multiple_statedumps
+{
+ #If a stack with same 'unique/lk-owner/stack' appears in multiple statedumps
+ #print the stack
+ local statedump_fname_prefix=$1
+ while read -r non_uniq_stack;
+ do
+ if [ -z "$printed" ];
+ then
+ printed="1"
+ fi
+ echo "$statedump_fname_prefix" "$non_uniq_stack"
+ done < <(get_non_uniq_fields "$statedump_fname_prefix")
+}
+
+statedumpdir=${1}
+if [ -z "$statedumpdir" ];
+then
+ echo "Usage: $0 <statedump-dir>"
+ exit 1
+fi
+
+if [ ! -d "$statedumpdir" ];
+then
+ echo "$statedumpdir: Is not a directory"
+ echo "Usage: $0 <statedump-dir>"
+ exit 1
+fi
+
+cd "$statedumpdir" || exit 1
+for statedump_fname_prefix in $(get_statedump_fnames_without_timestamps);
+do
+ get_stacks_that_appear_in_multiple_statedumps "$statedump_fname_prefix"
+done | column -t
+echo "NOTE: stacks with lk-owner=\"\"/lk-owner=0000000000000000/unique=0 may not be hung frames and need further inspection" >&2
diff --git a/extras/init.d/Makefile.am b/extras/init.d/Makefile.am
index 66715f4314a..8d8cc69571a 100644
--- a/extras/init.d/Makefile.am
+++ b/extras/init.d/Makefile.am
@@ -1,19 +1,32 @@
-EXTRA_DIST = glusterd-Debian glusterd-Redhat glusterd-SuSE glusterd.plist
+EXTRA_DIST = glusterd-Debian glusterd-FreeBSD glusterd-Redhat \
+ glusterd-SuSE glusterd.plist glustereventsd-FreeBSD \
+ glustereventsd-Redhat glustereventsd-Debian
-CLEANFILES =
+CLEANFILES =
-initdir = @initdir@
-launchddir = @launchddir@
+INIT_DIR = @initdir@
+SYSTEMD_DIR = @systemddir@
+LAUNCHD_DIR = @launchddir@
-$(GF_DISTRIBUTION):
- $(mkdir_p) $(DESTDIR)$(initdir)
- $(INSTALL_PROGRAM) glusterd-$(GF_DISTRIBUTION) $(DESTDIR)$(initdir)/glusterd
+$(GF_DISTRIBUTION):
+if WITH_SERVER
+ @if [ ! -d $(SYSTEMD_DIR) ]; then \
+ $(mkdir_p) $(DESTDIR)$(INIT_DIR); \
+ $(INSTALL_PROGRAM) glusterd-$(GF_DISTRIBUTION) $(DESTDIR)$(INIT_DIR)/glusterd; \
+ fi
+endif
+if BUILD_EVENTS
+ @if [ ! -d $(SYSTEMD_DIR) ]; then \
+ $(mkdir_p) $(DESTDIR)$(INIT_DIR); \
+ $(INSTALL_PROGRAM) glustereventsd-$(GF_DISTRIBUTION) $(DESTDIR)$(INIT_DIR)/glustereventsd; \
+ fi
+endif
install-exec-local: $(GF_DISTRIBUTION)
-install-data-local:
+install-data-local:
if GF_DARWIN_HOST_OS
- $(mkdir_p) $(DESTDIR)$(launchddir)
- $(INSTALL_PROGRAM) glusterd.plist $(DESTDIR)$(launchddir)/com.gluster.glusterd.plist
+ $(mkdir_p) $(DESTDIR)$(LAUNCHD_DIR)
+ $(INSTALL_PROGRAM) glusterd.plist $(DESTDIR)$(LAUNCHD_DIR)/org.gluster.glusterd.plist
endif
diff --git a/extras/init.d/glusterd-FreeBSD.in b/extras/init.d/glusterd-FreeBSD.in
new file mode 100644
index 00000000000..21c3da72624
--- /dev/null
+++ b/extras/init.d/glusterd-FreeBSD.in
@@ -0,0 +1,24 @@
+#!/bin/sh
+#
+# $FreeBSD$
+#
+
+# PROVIDE: glusterd
+
+. /etc/rc.subr
+
+name="glusterd"
+rcvar=`set_rcvar`
+command=@prefix@/sbin/${name}
+pidfile="/var/run/${name}.pid"
+glusterd_flags="-p /var/run/${name}.pid"
+start_precmd="glusterd_prestart"
+
+glusterd_prestart()
+{
+ mkdir -p @GLUSTERD_WORKDIR@ /var/log/glusterfs
+ return 0
+}
+
+load_rc_config $name
+run_rc_command "$1"
diff --git a/extras/init.d/glusterd-Redhat.in b/extras/init.d/glusterd-Redhat.in
index 18f3debc4ca..94801fe31a5 100755
--- a/extras/init.d/glusterd-Redhat.in
+++ b/extras/init.d/glusterd-Redhat.in
@@ -1,76 +1,143 @@
#!/bin/bash
#
-# chkconfig: 35 20 80
-# description: Gluster File System service for volume management
+# glusterd Startup script for the glusterfs server
#
+# chkconfig: - 20 80
+# description: Clustered file-system server
-# Get function from functions library
+### BEGIN INIT INFO
+# Provides: glusterd
+# Required-Start: $local_fs $network
+# Required-Stop: $local_fs $network
+# Should-Start:
+# Should-Stop:
+# Default-Start: 2 3 4 5
+# Default-Stop: 0 1 6
+# Short-Description: glusterfs server
+# Description: Clustered file-system server
+### END INIT INFO
+#
+
+# Source function library.
. /etc/rc.d/init.d/functions
BASE=glusterd
-PIDFILE=/var/run/$BASE.pid
+
+# Fedora File System Layout dictates /run
+[ -e /run ] && RUNDIR="/run"
+PIDFILE="${RUNDIR:-/var/run}/${BASE}.pid"
+
PID=`test -f $PIDFILE && cat $PIDFILE`
+
+# Overwriteable from sysconfig
+LOG_LEVEL=''
+LOG_FILE=''
+GLUSTERD_OPTIONS=''
+GLUSTERD_NOFILE='65536'
+
+[ -f /etc/sysconfig/${BASE} ] && . /etc/sysconfig/${BASE}
+
+[ ! -z $LOG_LEVEL ] && GLUSTERD_OPTIONS="${GLUSTERD_OPTIONS} --log-level ${LOG_LEVEL}"
+[ ! -z $LOG_FILE ] && GLUSTERD_OPTIONS="${GLUSTERD_OPTIONS} --log-file ${LOG_FILE}"
+
GLUSTERFSD=glusterfsd
GLUSTERFS=glusterfs
GLUSTERD_BIN=@prefix@/sbin/$BASE
-GLUSTERD_OPTS="--pid-file=$PIDFILE"
+GLUSTERD_OPTS="--pid-file=$PIDFILE ${GLUSTERD_OPTIONS}"
GLUSTERD="$GLUSTERD_BIN $GLUSTERD_OPTS"
RETVAL=0
+LOCKFILE=/var/lock/subsys/${BASE}
+
# Start the service $BASE
start()
{
- pidofproc -p $PIDFILE $GLUSTERD_BIN &> /dev/null
- status=$?
- if [ $status -eq 0 ]; then
+ if pidofproc -p $PIDFILE $GLUSTERD_BIN &> /dev/null; then
echo "glusterd service is already running with pid $PID"
- exit 1
+ return 0
else
+ ulimit -n $GLUSTERD_NOFILE
echo -n $"Starting $BASE:"
daemon $GLUSTERD
RETVAL=$?
echo
- [ $RETVAL -ne 0 ] && exit $RETVAL
+ [ $RETVAL -eq 0 ] && touch $LOCKFILE
+ return $RETVAL
fi
-
}
# Stop the service $BASE
stop()
{
echo -n $"Stopping $BASE:"
- pidofproc -p $PIDFILE $GLUSTERD_BIN &> /dev/null
- status=$?
- if [ $status -eq 0 ]; then
+ if pidofproc -p $PIDFILE $GLUSTERD_BIN &> /dev/null; then
killproc -p $PIDFILE $BASE
- [ -w $PIDFILE ] && rm -f $PIDFILE
else
killproc $BASE
fi
-
+ RETVAL=$?
echo
+ [ $RETVAL -eq 0 ] && rm -f $LOCKFILE
+ return $RETVAL
+}
+
+restart()
+{
+ stop
+ start
+}
+reload()
+{
+ restart
+}
+
+force_reload()
+{
+ restart
+}
+
+rh_status()
+{
+ status $BASE
+}
+
+rh_status_q()
+{
+ rh_status &>/dev/null
}
### service arguments ###
case $1 in
start)
- start
+ rh_status_q && exit 0
+ $1
;;
stop)
- stop
+ rh_status_q || exit 0
+ $1
+ ;;
+ restart)
+ $1
+ ;;
+ reload)
+ rh_status_q || exit 7
+ $1
+ ;;
+ force-reload)
+ force_reload
;;
status)
- status $BASE
+ rh_status
;;
- restart)
- $0 stop
- $0 start
+ condrestart|try-restart)
+ rh_status_q || exit 0
+ restart
;;
*)
- echo $"Usage: $0 {start|stop|status|restart}."
+ echo $"Usage: $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload}"
exit 1
esac
-exit 0
+exit $?
diff --git a/extras/init.d/glusterd-SuSE.in b/extras/init.d/glusterd-SuSE.in
index 16cf8de6a13..6259bab00b6 100755
--- a/extras/init.d/glusterd-SuSE.in
+++ b/extras/init.d/glusterd-SuSE.in
@@ -2,8 +2,8 @@
#
### BEGIN INIT INFO
# Provides: glusterd
-# Required-Start: $local_fs $network
-# Required-Stop:
+# Required-Start: $remote_fs $network
+# Required-Stop: $remote_fs $network
# Default-Start: 3 5
# Default-Stop:
# Short-Description: Gluster File System service for volume management
@@ -61,13 +61,17 @@ case $1 in
fi
rc_status -v
;;
+ reload)
+ rc_failed 3
+ rc_status -v
+ ;;
restart)
$0 stop
$0 start
rc_status
;;
*)
- echo $"Usage: $0 {start|stop|status|restart}."
+ echo $"Usage: $0 {start|stop|status|reload|restart}."
exit 1
esac
diff --git a/extras/init.d/glusterd.plist.in b/extras/init.d/glusterd.plist.in
index 7385fa486df..3ee9f60c5c5 100644
--- a/extras/init.d/glusterd.plist.in
+++ b/extras/init.d/glusterd.plist.in
@@ -3,7 +3,7 @@
<plist version="1.0">
<dict>
<key>Label</key>
- <string>com.gluster.glusterd</string>
+ <string>org.gluster.glusterd</string>
<key>ProgramArguments</key>
<array>
<string>@prefix@/sbin/glusterd</string>
diff --git a/extras/init.d/glustereventsd-Debian.in b/extras/init.d/glustereventsd-Debian.in
new file mode 100644
index 00000000000..6eebdb2b8d8
--- /dev/null
+++ b/extras/init.d/glustereventsd-Debian.in
@@ -0,0 +1,91 @@
+#!/bin/sh
+### BEGIN INIT INFO
+# Provides: glustereventsd
+# Required-Start: $local_fs $network
+# Required-Stop: $local_fs $network
+# Default-Start: 2 3 4 5
+# Default-Stop: 0 1 6
+# Short-Description: Gluster Events Server
+# Description: Gluster Events Server
+### END INIT INFO
+
+# Author: Chris AtLee <chris@atlee.ca>
+# Patched by: Matthias Albert < matthias@linux4experts.de>
+
+PATH=/sbin:/usr/sbin:/bin:/usr/bin
+NAME=glustereventsd
+SCRIPTNAME=/etc/init.d/$NAME
+DAEMON=@prefix@/sbin/$NAME
+PIDFILE=/var/run/$NAME.pid
+GLUSTEREVENTSD_OPTS=""
+PID=`test -f $PIDFILE && cat $PIDFILE`
+
+
+# Gracefully exit if the package has been removed.
+test -x $DAEMON || exit 0
+
+# Load the VERBOSE setting and other rcS variables
+. /lib/init/vars.sh
+
+# Define LSB log_* functions.
+. /lib/lsb/init-functions
+
+
+do_start()
+{
+ pidofproc -p $PIDFILE $DAEMON >/dev/null
+ status=$?
+ if [ $status -eq 0 ]; then
+ log_success_msg "glustereventsd service is already running with pid $PID"
+ else
+ log_daemon_msg "Starting glustereventsd service" "glustereventsd"
+ start-stop-daemon --start --quiet --oknodo --pidfile $PIDFILE --startas $DAEMON -- -p $PIDFILE $GLUSTEREVENTSD_OPTS
+ log_end_msg $?
+ start_daemon -p $PIDFILE $DAEMON -f $CONFIGFILE
+ return $?
+ fi
+}
+
+do_stop()
+{
+ log_daemon_msg "Stopping glustereventsd service" "glustereventsd"
+ start-stop-daemon --stop --quiet --oknodo --pidfile $PIDFILE
+ log_end_msg $?
+ rm -f $PIDFILE
+ killproc -p $PIDFILE $DAEMON
+ return $?
+}
+
+do_status()
+{
+ pidofproc -p $PIDFILE $DAEMON >/dev/null
+ status=$?
+ if [ $status -eq 0 ]; then
+ log_success_msg "glustereventsd service is running with pid $PID"
+ else
+ log_failure_msg "glustereventsd service is not running."
+ fi
+ exit $status
+}
+
+case "$1" in
+ start)
+ do_start
+ ;;
+ stop)
+ do_stop
+ ;;
+ status)
+ do_status;
+ ;;
+ restart|force-reload)
+ do_stop
+ sleep 2
+ do_start
+ ;;
+ *)
+ echo "Usage: $SCRIPTNAME {start|stop|status|restart|force-reload}" >&2
+ exit 3
+ ;;
+esac
+
diff --git a/extras/init.d/glustereventsd-FreeBSD.in b/extras/init.d/glustereventsd-FreeBSD.in
new file mode 100644
index 00000000000..2e8303ec6c6
--- /dev/null
+++ b/extras/init.d/glustereventsd-FreeBSD.in
@@ -0,0 +1,19 @@
+#!/bin/sh
+#
+# $FreeBSD$
+#
+
+# PROVIDE: glustereventsd
+
+. /etc/rc.subr
+
+name="glustereventsd"
+rcvar=`set_rcvar`
+command=@prefix@/sbin/${name}
+command_interpreter=/usr/local/bin/python
+pidfile="/var/run/${name}.pid"
+glustereventsd_flags="-p /var/run/${name}.pid"
+start_cmd="/usr/sbin/daemon $command ${glustereventsd_flags}"
+
+load_rc_config $name
+run_rc_command "$1"
diff --git a/extras/init.d/glustereventsd-Redhat.in b/extras/init.d/glustereventsd-Redhat.in
new file mode 100644
index 00000000000..d23ce4c244f
--- /dev/null
+++ b/extras/init.d/glustereventsd-Redhat.in
@@ -0,0 +1,129 @@
+#!/bin/bash
+#
+# glustereventsd Startup script for the glusterfs Events server
+#
+# chkconfig: - 20 80
+# description: Gluster Events Server
+
+### BEGIN INIT INFO
+# Provides: glustereventsd
+# Required-Start: $local_fs $network
+# Required-Stop: $local_fs $network
+# Should-Start:
+# Should-Stop:
+# Default-Start: 2 3 4 5
+# Default-Stop: 0 1 6
+# Short-Description: glusterfs Events server
+# Description: GlusterFS Events Server
+### END INIT INFO
+#
+
+# Source function library.
+. /etc/rc.d/init.d/functions
+
+BASE=glustereventsd
+
+# Fedora File System Layout dictates /run
+[ -e /run ] && RUNDIR="/run"
+PIDFILE="${RUNDIR:-/var/run}/${BASE}.pid"
+
+PID=`test -f $PIDFILE && cat $PIDFILE`
+
+GLUSTEREVENTSD_BIN=@prefix@/sbin/$BASE
+GLUSTEREVENTSD_OPTS="--pid-file=$PIDFILE"
+GLUSTEREVENTSD="$GLUSTEREVENTSD_BIN $GLUSTEREVENTSD_OPTS"
+RETVAL=0
+
+LOCKFILE=/var/lock/subsys/${BASE}
+
+# Start the service $BASE
+start()
+{
+ if pidofproc -p $PIDFILE $GLUSTEREVENTSD_BIN &> /dev/null; then
+ echo "glustereventsd service is already running with pid $PID"
+ return 0
+ else
+ echo -n $"Starting $BASE:"
+ daemon $GLUSTEREVENTSD &
+ RETVAL=$?
+ echo
+ [ $RETVAL -eq 0 ] && touch $LOCKFILE
+ return $RETVAL
+ fi
+}
+
+# Stop the service $BASE
+stop()
+{
+ echo -n $"Stopping $BASE:"
+ if pidofproc -p $PIDFILE $GLUSTEREVENTSD_BIN &> /dev/null; then
+ killproc -p $PIDFILE $BASE
+ else
+ killproc $BASE
+ fi
+ RETVAL=$?
+ echo
+ [ $RETVAL -eq 0 ] && rm -f $LOCKFILE
+ return $RETVAL
+}
+
+restart()
+{
+ stop
+ start
+}
+
+reload()
+{
+ restart
+}
+
+force_reload()
+{
+ restart
+}
+
+rh_status()
+{
+ status $BASE
+}
+
+rh_status_q()
+{
+ rh_status &>/dev/null
+}
+
+
+### service arguments ###
+case $1 in
+ start)
+ rh_status_q && exit 0
+ $1
+ ;;
+ stop)
+ rh_status_q || exit 0
+ $1
+ ;;
+ restart)
+ $1
+ ;;
+ reload)
+ rh_status_q || exit 7
+ $1
+ ;;
+ force-reload)
+ force_reload
+ ;;
+ status)
+ rh_status
+ ;;
+ condrestart|try-restart)
+ rh_status_q || exit 0
+ restart
+ ;;
+ *)
+ echo $"Usage: $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload}"
+ exit 1
+esac
+
+exit $?
diff --git a/extras/logger.conf.example b/extras/logger.conf.example
new file mode 100644
index 00000000000..248be5bda69
--- /dev/null
+++ b/extras/logger.conf.example
@@ -0,0 +1,13 @@
+#
+# Sample logger.conf file to configure enhanced Logging in GlusterFS
+#
+# To enable enhanced logging capabilities,
+#
+# 1. rename this file to /etc/glusterfs/logger.conf
+#
+# 2. rename /etc/rsyslog.d/gluster.conf.example to
+# /etc/rsyslog.d/gluster.conf
+#
+# This change requires restart of all gluster services/volumes and
+# rsyslog.
+#
diff --git a/extras/mount-shared-storage.sh b/extras/mount-shared-storage.sh
new file mode 100755
index 00000000000..cc40e13c3e3
--- /dev/null
+++ b/extras/mount-shared-storage.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+#Post reboot there is a chance in which mounting of shared storage will fail
+#This will impact starting of features like NFS-Ganesha. So this script will
+#try to mount the shared storage if it fails
+
+exitStatus=0
+
+while IFS= read -r glm
+do
+ IFS=$' \t' read -r -a arr <<< "$glm"
+
+ #Validate storage type is glusterfs
+ if [ "${arr[2]}" == "glusterfs" ]
+ then
+
+ #check whether shared storage is mounted
+ #if it is mounted then mountpoint -q will return a 0 success code
+ if mountpoint -q "${arr[1]}"
+ then
+ echo "${arr[1]} is already mounted"
+ continue
+ fi
+
+ mount -t glusterfs -o "${arr[3]}" "${arr[0]}" "${arr[1]}"
+ #wait for few seconds
+ sleep 10
+
+ #recheck mount got succeed
+ if mountpoint -q "${arr[1]}"
+ then
+ echo "${arr[1]} has been mounted"
+ continue
+ else
+ echo "${arr[1]} failed to mount"
+ exitStatus=1
+ fi
+ fi
+done <<< "$(sed '/^#/ d' </etc/fstab | grep 'glusterfs')"
+exit $exitStatus
diff --git a/extras/ocf/Makefile.am b/extras/ocf/Makefile.am
new file mode 100644
index 00000000000..c49a835fbca
--- /dev/null
+++ b/extras/ocf/Makefile.am
@@ -0,0 +1,11 @@
+EXTRA_DIST = glusterd.in volume.in
+
+# The root of the OCF resource agent hierarchy
+# Per the OCF standard, it's always "lib",
+# not "lib64" (even on 64-bit platforms).
+ocfdir = $(prefix)/lib/ocf
+
+# The ceph provider directory
+radir = $(ocfdir)/resource.d/$(PACKAGE_NAME)
+
+ra_SCRIPTS = glusterd volume
diff --git a/extras/ocf/glusterd.in b/extras/ocf/glusterd.in
new file mode 100755
index 00000000000..c119a285d32
--- /dev/null
+++ b/extras/ocf/glusterd.in
@@ -0,0 +1,212 @@
+#!/bin/sh
+#
+# glusterd
+#
+# Description: Manages a glusterd server as a (typically cloned)
+# HA resource
+#
+# Authors: Florian Haas (hastexo Professional Services GmbH)
+#
+# License: GNU General Public License (GPL)
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Convenience variables
+# When sysconfdir and localstatedir aren't passed in as
+# configure flags, they're defined in terms of prefix
+prefix=@prefix@
+#######################################################################
+
+
+OCF_RESKEY_binary_default="glusterd"
+OCF_RESKEY_pid_default="@localstatedir@/run/glusterd.pid"
+OCF_RESKEY_socket_default=""
+OCF_RESKEY_additional_parameters_default=""
+
+: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
+: ${OCF_RESKEY_pid=${OCF_RESKEY_pid_default}}
+
+glusterd_meta_data() {
+ cat <<EOF
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="glusterd" version="0.1">
+ <version>0.1</version>
+ <longdesc lang="en">
+ </longdesc>
+ <shortdesc lang="en">Manages a Gluster server</shortdesc>
+ <parameters>
+ <parameter name="binary">
+ <longdesc lang="en">
+ Name of the glusterd executable. Specify a full absolute
+ path if the binary is not in your \$PATH.
+ </longdesc>
+ <shortdesc lang="en">glusterd executable</shortdesc>
+ <content type="string" default="$OCF_RESKEY_binary_default"/>
+ </parameter>
+ <parameter name="pid">
+ <longdesc lang="en">
+ Path to the glusterd PID file.
+ </longdesc>
+ <shortdesc lang="en">PID file</shortdesc>
+ <content type="string" default="$OCF_RESKEY_pid_default"/>
+ </parameter>
+ <parameter name="socket">
+ <longdesc lang="en">
+ Path to the glusterd UNIX socket file. If unspecified,
+ glusterd will not listen on any socket.
+ </longdesc>
+ <shortdesc lang="en">Socket file</shortdesc>
+ <content type="string"/>
+ </parameter>
+ </parameters>
+ <actions>
+ <action name="start" timeout="20" />
+ <action name="stop" timeout="20" />
+ <action name="monitor" timeout="20" interval="10" />
+ <action name="reload" timeout="20" />
+ <action name="meta-data" timeout="5" />
+ <action name="validate-all" timeout="20" />
+ </actions>
+</resource-agent>
+EOF
+
+}
+
+glusterd_start() {
+ local glusterd_options
+ # exit immediately if configuration is not valid
+ glusterd_validate_all || exit $?
+
+ # if resource is already running, bail out early
+ if glusterd_monitor; then
+ ocf_log info "Resource is already running"
+ return $OCF_SUCCESS
+ fi
+
+ # actually start up the resource here (make sure to immediately
+ # exit with an $OCF_ERR_ error code if anything goes seriously
+ # wrong)
+ glusterd_options="-p $OCF_RESKEY_pid"
+ if [ -n "$OCF_RESKEY_socket" ]; then
+ glusterd_options="$glusterd_options -S $OCF_RESKEY_socket"
+ fi
+ if [ -n "$OCF_RESKEY_additional_parameters" ]; then
+ glusterd_options="$glusterd_options $OCF_RESKEY_additional_parameters"
+ fi
+
+ ocf_run $OCF_RESKEY_binary $glusterd_options || exit $OCF_ERR_GENERIC
+
+ # After the resource has been started, check whether it started up
+ # correctly. If the resource starts asynchronously, the agent may
+ # spin on the monitor function here -- if the resource does not
+ # start up within the defined timeout, the cluster manager will
+ # consider the start action failed
+ while ! glusterd_monitor; do
+ ocf_log debug "Resource has not started yet, waiting"
+ sleep 1
+ done
+
+ # only return $OCF_SUCCESS if _everything_ succeeded as expected
+ return $OCF_SUCCESS
+}
+
+glusterd_stop() {
+ local rc
+ local pid
+
+ # exit immediately if configuration is not valid
+ glusterd_validate_all || exit $?
+
+ glusterd_monitor
+ rc=$?
+ case "$rc" in
+ "$OCF_SUCCESS")
+ # Currently running. Normal, expected behavior.
+ ocf_log debug "Resource is currently running"
+ ;;
+ "$OCF_NOT_RUNNING")
+ # Currently not running. Nothing to do.
+ ocf_log info "Resource is already stopped"
+ return $OCF_SUCCESS
+ ;;
+ esac
+
+ # actually shut down the resource here (make sure to immediately
+ # exit with an $OCF_ERR_ error code if anything goes seriously
+ # wrong)
+ pid=`cat $OCF_RESKEY_pid`
+ ocf_run kill -s TERM $pid || exit OCF_ERR_GENERIC
+
+ # After the resource has been stopped, check whether it shut down
+ # correctly. If the resource stops asynchronously, the agent may
+ # spin on the monitor function here -- if the resource does not
+ # shut down within the defined timeout, the cluster manager will
+ # consider the stop action failed
+ while glusterd_monitor; do
+ ocf_log debug "Resource has not stopped yet, waiting"
+ sleep 1
+ done
+
+ # only return $OCF_SUCCESS if _everything_ succeeded as expected
+ return $OCF_SUCCESS
+
+}
+
+glusterd_monitor() {
+ local pid
+
+ [ -e $OCF_RESKEY_pid ] || return $OCF_NOT_RUNNING
+
+ pid=`cat $OCF_RESKEY_pid`
+ ocf_run kill -s 0 $pid || return $OCF_NOT_RUNNING
+
+ ocf_log debug "$OCF_RESKEY_binary running with PID $pid"
+ return $OCF_SUCCESS
+}
+
+glusterd_validate_all() {
+ # Test for required binaries
+ check_binary $OCF_RESKEY_binary
+
+ return $OCF_SUCCESS
+}
+
+
+
+# Make sure meta-data and usage always succeed
+case $__OCF_ACTION in
+meta-data) glusterd_meta_data
+ exit $OCF_SUCCESS
+ ;;
+usage|help) glusterd_usage
+ exit $OCF_SUCCESS
+ ;;
+esac
+
+# Anything other than meta-data and usage must pass validation
+glusterd_validate_all || exit $?
+
+# Translate each action into the appropriate function call
+case $__OCF_ACTION in
+start) glusterd_start;;
+stop) glusterd_stop;;
+status|monitor) glusterd_monitor;;
+reload) ocf_log info "Reloading..."
+ glusterd_start
+ ;;
+validate-all) ;;
+notify) exit $OCF_SUCCESS;;
+*) glusterd_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+
+# The resource agent may optionally log a debug message
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc"
+exit $rc
diff --git a/extras/ocf/volume.in b/extras/ocf/volume.in
new file mode 100755
index 00000000000..76cc649e55f
--- /dev/null
+++ b/extras/ocf/volume.in
@@ -0,0 +1,276 @@
+#!/bin/sh
+#
+# glusterd
+#
+# Description: Manages a glusterd server as a (typically cloned)
+# HA resource
+#
+# Authors: Florian Haas (hastexo Professional Services GmbH)
+# Jiri Lunacek (Hosting90 Systems s.r.o.)
+#
+# License: GNU General Public License (GPL)
+
+#######################################################################
+# Initialization:
+
+: ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
+. ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
+
+# Convenience variables
+# When sysconfdir and localstatedir aren't passed in as
+# configure flags, they're defined in terms of prefix
+prefix=@prefix@
+SHORTHOSTNAME=`hostname -s`
+#######################################################################
+
+OCF_RESKEY_binary_default="gluster"
+
+: ${OCF_RESKEY_binary=${OCF_RESKEY_binary_default}}
+
+volume_meta_data() {
+ cat <<EOF
+<?xml version="1.0"?>
+<!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
+<resource-agent name="volume" version="0.1">
+ <version>0.1</version>
+ <longdesc lang="en">
+Manages a GlusterFS volume and monitors its bricks. When a resource of
+this type is configured as a clone (as is commonly the case), then it
+must have clone ordering enabled.
+ </longdesc>
+ <shortdesc lang="en">Manages a GlusterFS volume</shortdesc>
+ <parameters>
+ <parameter name="volname" required="1">
+ <longdesc lang="en">
+ The name of the volume to manage.
+ </longdesc>
+ <shortdesc lang="en">volume name</shortdesc>
+ <content type="string"/>
+ </parameter>
+ <parameter name="binary">
+ <longdesc lang="en">
+ Name of the gluster executable. Specify a full absolute
+ path if the binary is not in your \$PATH.
+ </longdesc>
+ <shortdesc lang="en">gluster executable</shortdesc>
+ <content type="string" default="$OCF_RESKEY_binary_default"/>
+ </parameter>
+ <parameter name="peer_map">
+ <longdesc lang="en">
+ Mapping of hostname - peer name in the gluster cluster
+ in format hostname1:peername1,hostname2:peername2,...
+ </longdesc>
+ <shortdesc lang="en">gluster peer map</shortdesc>
+ <content type="string" default=""/>
+ </parameter>
+ </parameters>
+ <actions>
+ <action name="start" timeout="20" />
+ <action name="stop" timeout="20" />
+ <action name="monitor" timeout="20" interval="10" />
+ <action name="reload" timeout="20" />
+ <action name="meta-data" timeout="5" />
+ <action name="validate-all" timeout="20" />
+ </actions>
+</resource-agent>
+EOF
+
+}
+
+if [ -n "${OCF_RESKEY_peer_map}" ]; then
+ SHORTHOSTNAME=`echo "${OCF_RESKEY_peer_map}" | egrep -o "$SHORTHOSTNAME\:[^,]+" | awk -F: '{print $2}'`
+fi
+
+volume_getdir() {
+ local voldir
+ voldir="@GLUSTERD_WORKDIR@/vols/${OCF_RESKEY_volname}"
+
+ [ -d ${voldir} ] || return 1
+
+ echo "${voldir}"
+ return 0
+}
+
+volume_getpid_dir() {
+ local volpid_dir
+ volpid_dir="/var/run/gluster/vols/${OCF_RESKEY_volname}"
+
+ [ -d ${volpid_dir} ] || return 1
+
+ echo "${volpid_dir}"
+ return 0
+}
+
+volume_getbricks() {
+ local infofile
+ local voldir
+ voldir=`volume_getdir`
+ infofile="${voldir}/info"
+
+ [ -e ${infofile} ] || return 1
+
+ echo "`sed -n -e "s/^brick-.\+=${SHORTHOSTNAME}://p" < ${infofile}`"
+ return 0
+}
+
+volume_getpids() {
+ local bricks
+ local pidfile
+ local infofile
+ local volpid_dir
+
+ volpid_dir=`volume_getpid_dir`
+ bricks=`volume_getbricks`
+
+ if [ -z "$bricks" ]; then
+ return 1
+ fi
+
+ for brick in ${bricks}; do
+ pidfile="${volpid_dir}/${SHORTHOSTNAME}${brick}.pid"
+ [ -e $pidfile ] || return 1
+ cat $pidfile
+ done
+
+ return 0
+}
+
+volume_start() {
+ local volume_options
+
+ # exit immediately if configuration is not valid
+ volume_validate_all || exit $?
+
+ # if resource is already running, bail out early
+ if volume_monitor; then
+ ocf_log info "Resource is already running"
+ return $OCF_SUCCESS
+ fi
+
+ # actually start up the resource here
+ ocf_run "$OCF_RESKEY_binary" \
+ volume start "$OCF_RESKEY_volname" force || exit $OCF_ERR_GENERIC
+
+ # After the resource has been started, check whether it started up
+ # correctly. If the resource starts asynchronously, the agent may
+ # spin on the monitor function here -- if the resource does not
+ # start up within the defined timeout, the cluster manager will
+ # consider the start action failed
+ while ! volume_monitor; do
+ ocf_log debug "Resource has not started yet, waiting"
+ sleep 1
+ done
+
+ # only return $OCF_SUCCESS if _everything_ succeeded as expected
+ return $OCF_SUCCESS
+}
+
+volume_stop() {
+ local rc
+ local pid
+
+ # exit immediately if configuration is not valid
+ volume_validate_all || exit $?
+
+ volume_monitor
+ rc=$?
+ case "$rc" in
+ "$OCF_SUCCESS")
+ # Currently running. Normal, expected behavior.
+ ocf_log debug "Resource is currently running"
+ ;;
+ "$OCF_NOT_RUNNING")
+ # Currently not running. Nothing to do.
+ ocf_log info "Resource is already stopped"
+ return $OCF_SUCCESS
+ ;;
+ esac
+
+ # actually shut down the resource here (make sure to immediately
+ # exit with an $OCF_ERR_ error code if anything goes seriously
+ # wrong)
+ pids=`volume_getpids`
+ for pid in $pids; do
+ ocf_run kill -s TERM $pid
+ done
+
+ # After the resource has been stopped, check whether it shut down
+ # correctly. If the resource stops asynchronously, the agent may
+ # spin on the monitor function here -- if the resource does not
+ # shut down within the defined timeout, the cluster manager will
+ # consider the stop action failed
+ while volume_monitor; do
+ ocf_log debug "Resource has not stopped yet, waiting"
+ sleep 1
+ done
+
+ # only return $OCF_SUCCESS if _everything_ succeeded as expected
+ return $OCF_SUCCESS
+
+}
+
+volume_monitor() {
+ local pid
+
+ pids=`volume_getpids` || return $OCF_NOT_RUNNING
+
+ for pid in $pids; do
+ ocf_run kill -s 0 $pid || return $OCF_NOT_RUNNING
+ done
+
+ ocf_log debug "Local bricks for volume ${OCF_RESKEY_volname} running with PIDs $pids"
+ return $OCF_SUCCESS
+}
+
+volume_validate_all() {
+ # Test for configuration errors first
+ if [ -z "${OCF_RESKEY_volname}" ]; then
+ ocf_log err 'Missing required parameter "volname"'
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ # Test for required binaries
+ check_binary $OCF_RESKEY_binary
+
+ if [ -z "$SHORTHOSTNAME" ]; then
+ ocf_log err 'Unable to get host in node map'
+ return $OCF_ERR_CONFIGURED
+ fi
+
+ return $OCF_SUCCESS
+}
+
+
+
+# Make sure meta-data and usage always succeed
+case $__OCF_ACTION in
+meta-data) volume_meta_data
+ exit $OCF_SUCCESS
+ ;;
+usage|help) volume_usage
+ exit $OCF_SUCCESS
+ ;;
+esac
+
+# Anything other than meta-data and usage must pass validation
+volume_validate_all || exit $?
+
+# Translate each action into the appropriate function call
+case $__OCF_ACTION in
+start) volume_start;;
+stop) volume_stop;;
+status|monitor) volume_monitor;;
+reload) ocf_log info "Reloading..."
+ volume_start
+ ;;
+validate-all) ;;
+notify) exit $OCF_SUCCESS;;
+*) volume_usage
+ exit $OCF_ERR_UNIMPLEMENTED
+ ;;
+esac
+rc=$?
+
+# The resource agent may optionally log a debug message
+ocf_log debug "${OCF_RESOURCE_INSTANCE} $__OCF_ACTION returned $rc"
+exit $rc
diff --git a/extras/peer_add_secret_pub.in b/extras/peer_add_secret_pub.in
new file mode 100644
index 00000000000..c9674af353d
--- /dev/null
+++ b/extras/peer_add_secret_pub.in
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+user=$1
+pub_file=$2
+
+if [ "$user" == "" ]; then
+ echo "Invalid User";
+ exit 1;
+fi
+
+if [ "$pub_file" == "" ]; then
+ echo "Invalid pub file";
+ exit 1;
+fi
+
+home_dir=`getent passwd $user | cut -d ':' -f 6`;
+
+if [ "$home_dir" == "" ]; then
+ echo "Invalid home dir";
+ exit 1;
+fi
+
+authorized_keys_file=$(cat /etc/ssh/sshd_config | \
+ grep -e "^AuthorizedKeysFile" | \
+ awk '{print $2}' | tail -1);
+
+# If not set, use default location
+if [ "x$authorized_keys_file" == "x" ]; then
+ authorized_keys_file="%h/.ssh/authorized_keys"
+fi
+
+# If default location
+if [ "$authorized_keys_file" == ".ssh/authorized_keys" ]; then
+ authorized_keys_file="%h/$authorized_keys_file"
+fi
+
+# Replace %u with user name (ex: /etc/ssh/keys/%u/authorized_keys)
+authorized_keys_file="${authorized_keys_file//%u/$user}";
+
+# Replace %h with home dir (ex: %h/.ssh/authorized_keys)
+authorized_keys_file="${authorized_keys_file//%h/$home_dir}";
+ssh_dir=$(dirname $authorized_keys_file);
+
+if [ ! -d $ssh_dir ]; then
+ mkdir $ssh_dir;
+ chmod 700 $ssh_dir;
+ chown $user: $ssh_dir;
+fi
+
+if [ ! -d $authorized_keys_file ]; then
+ touch $authorized_keys_file;
+ chmod 600 $authorized_keys_file;
+ chown $user: $authorized_keys_file;
+fi
+
+# Restore SELinux security contexts. This is required
+# for passwdless SSH to work.
+
+if type restorecon >/dev/null 2>&1; then
+ restorecon -F $ssh_dir $authorized_keys_file;
+fi
+
+# Add to authorized_keys file only if not exists already
+while read line
+do
+ grep -Fxq "$line" $authorized_keys_file;
+ [ $? -ne 0 ] && echo "$line" >> $authorized_keys_file;
+done < "$GLUSTERD_WORKDIR"/$pub_file;
+
+exit 0;
diff --git a/extras/post-upgrade-script-for-quota.sh b/extras/post-upgrade-script-for-quota.sh
new file mode 100755
index 00000000000..15652429056
--- /dev/null
+++ b/extras/post-upgrade-script-for-quota.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+#The post-upgrade script must be executed after all the nodes in the cluster
+#have upgraded.
+#Also, all the clients accessing the given volume must also be upgraded
+#before the script is run.
+#Make sure glusterd and the brick processes are running on all nodes in the
+#cluster post upgrade.
+#Execute this script on the node where the pre-upgrade script for quota was run.
+
+VOL=$1;
+
+BACKUP_DIR=/var/tmp/glusterfs/quota-config-backup
+
+function set_limits {
+ local var=$(gluster volume info $1 | grep 'features.quota'| cut -d" " -f2);
+
+ if [ -z "$var" ] || [ "$var" = "off" ]; then
+ if [ $2 -eq '0' ]; then
+ echo "Volume $1 does not have quota enabled. " \
+ "Exiting ..."
+ exit 1
+ fi
+ else
+ gluster volume set $1 default-soft-limit 80%
+ if [ $? -ne '0' ]; then
+ echo "Post-upgrade process failed." \
+ "Please address the error and run " \
+ "post-upgrade-script.sh on volume $VOL again."
+ exit 1
+ fi
+
+ gluster volume start $1 force
+ sleep 3;
+
+ local path_array=( $(cat $BACKUP_DIR/vol_$1 | tail -n +3 | awk '{print $1}') )
+ local limit_array=( $(cat $BACKUP_DIR/vol_$1 | tail -n +3 | awk '{print $2}') )
+ local len=${#path_array[@]}
+
+ for ((j=0; j<$len; j++))
+ do
+ gluster volume quota $1 limit-usage ${path_array[$j]} ${limit_array[j]};
+ if [ $? -eq 0 ]; then
+ echo "Setting limit (${limit_array[j]}) on " \
+ "path ${path_array[$j]} has been " \
+ "successful"
+ fi
+ done
+ fi;
+}
+
+if [ -z $1 ]; then
+ echo "Please provide volume name or 'all'";
+ exit 1;
+fi
+
+if [ "$1" = "all" ]; then
+ for VOL in `gluster volume list`;
+ do
+ set_limits $VOL '1';
+ done
+
+else
+ set_limits $1 '0';
+fi
diff --git a/extras/pre-upgrade-script-for-quota.sh b/extras/pre-upgrade-script-for-quota.sh
new file mode 100755
index 00000000000..9ff15f3b307
--- /dev/null
+++ b/extras/pre-upgrade-script-for-quota.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+#Make sure glusterd and the brick processes are running on all nodes in the
+#cluster.
+#This script must be run prior to upgradation, that too on
+#only one of the nodes in the cluster.
+
+BACKUP_DIR=/var/tmp/glusterfs/quota-config-backup
+
+mkdir -p $BACKUP_DIR
+for i in `gluster volume list`; do
+ var=$(gluster volume info $i | grep 'features.quota'| cut -d" " -f2);
+ if [ -z "$var" ] || [ "$var" = "off" ]; then
+ continue
+ else
+ gluster volume quota $i list > $BACKUP_DIR/vol_$i;
+ fi;
+done
diff --git a/extras/profiler/glusterfs-profiler b/extras/profiler/glusterfs-profiler
index 042eadcf2df..aaafd088648 100755
--- a/extras/profiler/glusterfs-profiler
+++ b/extras/profiler/glusterfs-profiler
@@ -1,22 +1,15 @@
-#!/usr/bin/env python
+#!/usr/bin/python3
-# texttable - module for creating simple ASCII tables
-# Copyright (C) 2003-2010 Gerome Fournier <jefke(at)free.fr>
-#
-# This library is free software; you can redistribute it and/or
-# modify it under the terms of the GNU Lesser General Public
-# License as published by the Free Software Foundation; either
-# version 2.1 of the License, or (at your option) any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# Lesser General Public License for more details.
+# Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
#
-# You should have received a copy of the GNU Lesser General Public
-# License along with this library; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+# texttable - module for creating simple ASCII tables
# Incorporated from texttable.py downloaded from
# http://jefke.free.fr/stuff/python/texttable/texttable-0.7.0.tar.gz
@@ -298,7 +291,7 @@ class Texttable:
s = "%s%s%s" % (horiz, [horiz, self._char_corner][self._has_vlines()],
horiz)
# build the line
- l = string.join([horiz*n for n in self._width], s)
+ l = s.join([horiz*n for n in self._width])
# add border if needed
if self._has_border():
l = "%s%s%s%s%s\n" % (self._char_corner, horiz, l, horiz,
diff --git a/extras/python/Makefile.am b/extras/python/Makefile.am
new file mode 100644
index 00000000000..7d81fa0319b
--- /dev/null
+++ b/extras/python/Makefile.am
@@ -0,0 +1,7 @@
+if HAVE_PYTHON
+# Install __init__.py into the Python site-packages area
+pypkgdir = @BUILD_PYTHON_SITE_PACKAGES@/gluster
+pypkg_PYTHON = __init__.py
+endif
+
+EXTRA_DIST = __init__.py
diff --git a/extras/python/__init__.py b/extras/python/__init__.py
new file mode 100644
index 00000000000..3ad9513f40e
--- /dev/null
+++ b/extras/python/__init__.py
@@ -0,0 +1,2 @@
+from pkgutil import extend_path
+__path__ = extend_path(__path__, __name__)
diff --git a/extras/quota-metadata-cleanup.sh b/extras/quota-metadata-cleanup.sh
deleted file mode 100755
index 37ad8bc3137..00000000000
--- a/extras/quota-metadata-cleanup.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-# This script is used to cleanup xattrs setup by quota-translator in (a)
-# backend directory(ies). It takes a single or list of backend directories
-# as argument(s).
-
-usage ()
-{
- echo >&2 "usage: $0 <list-of-backend-directories>"
-}
-
-main ()
-{
- [ $# -lt 1 ] && usage
-
- INSTALL_DIR=`dirname $0`
-
- for i in $@; do
- find $i -exec $INSTALL_DIR/quota-remove-xattr.sh '{}' \;
- done
-
-}
-
-main $@
diff --git a/extras/quota-remove-xattr.sh b/extras/quota-remove-xattr.sh
deleted file mode 100755
index 7191f9bd443..00000000000
--- a/extras/quota-remove-xattr.sh
+++ /dev/null
@@ -1,24 +0,0 @@
-#!/bin/bash
-
-# This script is used to remove xattrs set by quota translator on a path.
-# It is generally invoked from quota-metadata-cleanup.sh, but can
-# also be used stand-alone.
-
-usage ()
-{
- echo >&2 "usage: $0 <path>"
-}
-
-main ()
-{
- [ $# -ne 1 ] && usage $0
-
- XATTR_KEY_VALUE_PAIRS=`getfattr -h -d -m 'trusted.glusterfs.quota' $1 2>/dev/null | sed -e '/^# file/d'`
-
- for i in $XATTR_KEY_VALUE_PAIRS; do
- XATTR_KEY=`echo $i | sed -e 's/\([^=]*\).*/\1/g'`
- setfattr -h -x $XATTR_KEY $1
- done
-}
-
-main $@
diff --git a/extras/contri-add.sh b/extras/quota/contri-add.sh
index 7db5edd5d20..7db5edd5d20 100755
--- a/extras/contri-add.sh
+++ b/extras/quota/contri-add.sh
diff --git a/extras/quota/log_accounting.sh b/extras/quota/log_accounting.sh
new file mode 100755
index 00000000000..e2dd87b84d7
--- /dev/null
+++ b/extras/quota/log_accounting.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# The script does an accounting of all directories using command 'du' and
+# using gluster. We can then compare the two to identify accounting mismatch
+# THere can be minor mismatch because gluster only accounts for the size of
+# files. Direcotries can take up upto 4kB space on FS per directory. THis
+# size is accounted by du and not by gluster. However the difference would
+# not be significant.
+
+mountpoint=$1
+volname=$2
+
+usage ()
+{
+ echo >&2 "usage: $0 <mountpoint> <volume name>"
+ exit
+}
+
+[ $# -lt 2 ] && usage
+
+cd $mountpoint
+du -h | head -n -1 | tr -d '.' |awk '{ for (i = 2; i <= NF; i++) { printf("%s ", $i);} print "" }' > /tmp/gluster_quota_1
+cat /tmp/gluster_quota_1 | sed 's/ $//' | sed 's/ /\\ /g' | sed 's/(/\\(/g' | sed 's/)/\\)/g' |xargs gluster v quota $volname list > /tmp/gluster_quota_2
+du -h | head -n -1 |awk '{ for (i = 2; i <= NF; i++) { printf("%s %s", $i, $1);} print "" }' | tr -d '.' > /tmp/gluster_quota_3
+cat /tmp/gluster_quota_2 /tmp/gluster_quota_3 | sort > /tmp/gluster_quota_4
+find . -type d > /tmp/gluster_quota_5
+tar -cvf /tmp/gluster_quota_files.tar /tmp/gluster_quota_*
diff --git a/extras/quota/quota_fsck.py b/extras/quota/quota_fsck.py
new file mode 100755
index 00000000000..e62f7fc52a3
--- /dev/null
+++ b/extras/quota/quota_fsck.py
@@ -0,0 +1,377 @@
+#!/usr/bin/python3
+# The following script enables, Detecting, Reporting and Fixing
+# anomalies in quota accounting. Run this script with -h option
+# for further details.
+
+'''
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+'''
+from __future__ import print_function
+import os, sys, re
+from stat import *
+import subprocess
+import argparse
+import xattr
+
+aggr_size = {}
+verbose_mode = False
+mnt_path = None
+brick_path = None
+obj_fix_count = 0
+file_count = 0
+dir_count = 0
+
+#CONSTANTS
+KB = 1024
+MB = 1048576
+GB = 1048576 * 1024
+TB = 1048576 * 1048576
+
+QUOTA_VERBOSE = 0
+QUOTA_META_ABSENT = 1
+QUOTA_SIZE_MISMATCH = 2
+
+IS_DIRTY ='0x3100'
+IS_CLEAN ='0x3000'
+
+
+epilog_msg='''
+ The script attempts to find any gluster accounting issues in the
+ filesystem at the given subtree. The script crawls the given
+ subdirectory tree doing a stat for all files and compares the
+ size reported by gluster quota with the size reported by stat
+ calls. Any mismatch is reported. In addition integrity of marker
+ xattrs are verified.
+ '''
+
+def print_msg(log_type, path, xattr_dict = {}, stbuf = "", dir_size = None):
+ if log_type == QUOTA_VERBOSE:
+ print('%-24s %-60s\nxattr_values: %s\n%s\n' % ("Verbose", path, xattr_dict, stbuf))
+ elif log_type == QUOTA_META_ABSENT:
+ print('%-24s %-60s\n%s\n' % ("Quota-Meta Absent", path, xattr_dict))
+ elif log_type == QUOTA_SIZE_MISMATCH:
+ print("mismatch")
+ if dir_size is not None:
+ print('%24s %60s %12s %12s' % ("Size Mismatch", path,
+ xattr_dict, dir_size))
+ else:
+ print('%-24s %-60s %-12s %-12s' % ("Size Mismatch", path, xattr_dict,
+ stbuf.st_size))
+
+def size_differs_lot(s1, s2):
+ '''
+ There could be minor accounting differences between the stat based
+ accounting and gluster accounting. To avoid these from throwing lot
+ of false positives in our logs. using a threshold of 1M for now.
+ TODO: For a deeply nested directory, at higher levels in hierarchy
+ differences may not be significant, hence this check needs to be improved.
+ '''
+ if abs(s1-s2) > 0:
+ return True
+ else:
+ return False
+
+def fix_hardlink_accounting(curr_dict, accounted_dict, curr_size):
+ '''
+ Hard links are messy.. we have to account them for their parent
+ directory. But, stop accounting at the most common ancestor.
+ Eg:
+ say we have 3 hardlinks : /d1/d2/h1, /d1/d3/h2 and /d1/h3
+
+ suppose we encounter the hard links h1 first , then h2 and then h3.
+ while accounting for h1, we account the size until root(d2->d1->/)
+ while accounting for h2, we need to account only till d3. (as d1
+ and / are accounted for this inode).
+ while accounting for h3 we should not account at all.. as all
+ its ancestors are already accounted for same inode.
+
+ curr_dict : dict of hardlinks that were seen and
+ accounted by the current iteration.
+ accounted_dict : dict of hardlinks that has already been
+ accounted for.
+
+ size : size of the object as accounted by the
+ curr_iteration.
+
+ Return vale:
+ curr_size : size reduced by hardlink sizes for those
+ hardlinks that has already been accounted
+ in current subtree.
+ Also delete the duplicate link from curr_dict.
+ '''
+
+ dual_accounted_links = set(curr_dict.keys()) & set(accounted_dict.keys())
+ for link in dual_accounted_links:
+ curr_size = curr_size - curr_dict[link]
+ del curr_dict[link]
+ return curr_size
+
+
+def fix_xattr(file_name, mark_dirty):
+ global obj_fix_count
+ global mnt_path
+
+ if mnt_path is None:
+ return
+ if mark_dirty:
+ print("MARKING DIRTY: " + file_name)
+ out = subprocess.check_output (["/usr/bin/setfattr", "-n",
+ "trusted.glusterfs.quota.dirty",
+ "-v", IS_DIRTY, file_name])
+ rel_path = os.path.relpath(file_name, brick_path)
+ print("stat on " + mnt_path + "/" + rel_path)
+ stbuf = os.lstat(mnt_path + "/" + rel_path)
+
+ obj_fix_count += 1
+
+def get_quota_xattr_brick(dpath):
+ out = subprocess.check_output (["/usr/bin/getfattr", "--no-dereference",
+ "-d", "-m.", "-e", "hex", dpath])
+ pairs = out.splitlines()
+
+ '''
+ Sample output to be parsed:
+ [root@dhcp35-100 mnt]# getfattr -d -m. -e hex /export/b1/B0/d14/d13/
+ # file: export/b1/B0/d14/d13/
+ security.selinux=0x756e636f6e66696e65645f753a6f626a6563745f723a7573725f743a733000
+ trusted.gfid=0xbae5e0d2d05043de9fd851d91ecf63e8
+ trusted.glusterfs.dht=0x000000010000000000000000ffffffff
+ trusted.glusterfs.dht.mds=0x00000000
+ trusted.glusterfs.quota.6a7675a3-b85a-40c5-830b-de9229d702ce.contri.39=0x00000000000000000000000000000000000000000000000e
+ trusted.glusterfs.quota.dirty=0x3000
+ trusted.glusterfs.quota.size.39=0x00000000000000000000000000000000000000000000000e
+ '''
+
+ '''
+ xattr_dict dictionary holds quota related xattrs
+ eg:
+ '''
+
+ xattr_dict = {}
+ xattr_dict['parents'] = {}
+
+ for xattr in pairs[1:]:
+ xattr = xattr.decode("utf-8")
+ xattr_key = xattr.split("=")[0]
+ if xattr_key == "":
+ # skip any empty lines
+ continue
+ elif not re.search("quota", xattr_key):
+ # skip all non quota xattr.
+ continue
+
+ xattr_value = xattr.split("=")[1]
+ if re.search("contri", xattr_key):
+
+ xattr_version = xattr_key.split(".")[5]
+ if 'version' not in xattr_dict:
+ xattr_dict['version'] = xattr_version
+ else:
+ if xattr_version != xattr_dict['version']:
+ print("Multiple xattr version found")
+
+
+ cur_parent = xattr_key.split(".")[3]
+ if cur_parent not in xattr_dict['parents']:
+ xattr_dict['parents'][cur_parent] = {}
+
+ contri_dict = xattr_dict['parents'][cur_parent]
+ if len(xattr_value) == 34:
+ # 34 bytes implies file contri xattr
+ # contri format =0x< 16bytes file size><16bytes file count>
+ # size is obtained in iatt, file count = 1, dir count=0
+ contri_dict['contri_size'] = int(xattr_value[2:18], 16)
+ contri_dict['contri_file_count'] = int(xattr_value[18:34], 16)
+ contri_dict['contri_dir_count'] = 0
+ else:
+ # This is a directory contri.
+ contri_dict['contri_size'] = int(xattr_value[2:18], 16)
+ contri_dict['contri_file_count'] = int(xattr_value[18:34], 16)
+ contri_dict['contri_dir_count'] = int(xattr_value[34:], 16)
+
+ elif re.search("size", xattr_key):
+ xattr_dict['size'] = int(xattr_value[2:18], 16)
+ xattr_dict['file_count'] = int(xattr_value[18:34], 16)
+ xattr_dict['dir_count'] = int(xattr_value[34:], 16)
+ elif re.search("dirty", xattr_key):
+ if xattr_value == IS_CLEAN:
+ xattr_dict['dirty'] = False
+ elif xattr_value == IS_DIRTY:
+ xattr_dict['dirty'] = True
+ elif re.search("limit_objects", xattr_key):
+ xattr_dict['limit_objects'] = int(xattr_value[2:18], 16)
+ elif re.search("limit_set", xattr_key):
+ xattr_dict['limit_set'] = int(xattr_value[2:18], 16)
+
+ return xattr_dict
+
+def verify_file_xattr(path, stbuf = None):
+
+ global file_count
+ file_count += 1
+
+ if stbuf is None:
+ stbuf = os.lstat(path)
+
+ xattr_dict = get_quota_xattr_brick(path)
+
+ for parent in xattr_dict['parents']:
+ contri_dict = xattr_dict['parents'][parent]
+
+ if 'contri_size' not in contri_dict or \
+ 'contri_file_count' not in contri_dict or \
+ 'contri_dir_count' not in contri_dict:
+ print_msg(QUOTA_META_ABSENT, path, xattr_dict, stbuf)
+ fix_xattr(path, False)
+ return
+ elif size_differs_lot(contri_dict['contri_size'], stbuf.st_size):
+ print_msg(QUOTA_SIZE_MISMATCH, path, xattr_dict, stbuf)
+ fix_xattr(path, False)
+ return
+
+ if verbose_mode is True:
+ print_msg(QUOTA_VERBOSE, path, xattr_dict, stbuf)
+
+
+def verify_dir_xattr(path, dir_size):
+
+ global dir_count
+ dir_count += 1
+ xattr_dict = get_quota_xattr_brick(path)
+
+ stbuf = os.lstat(path)
+
+ for parent in xattr_dict['parents']:
+ contri_dict = xattr_dict['parents'][parent]
+
+ if 'size' not in xattr_dict or 'contri_size' not in contri_dict:
+ print_msg(QUOTA_META_ABSENT, path)
+ fix_xattr(path, True)
+ return
+ elif size_differs_lot(dir_size, xattr_dict['size']) or \
+ size_differs_lot(contri_dict['contri_size'], xattr_dict['size']):
+ print_msg(QUOTA_SIZE_MISMATCH, path, xattr_dict, stbuf, dir_size)
+ fix_xattr(path, True)
+ return
+
+ if verbose_mode is True:
+ print_msg("VERBOSE", path, xattr_dict, stbuf, dir_size)
+
+
+def walktree(t_dir, hard_link_dict):
+ '''recursively descend the directory tree rooted at dir,
+ aggregating the size
+ t_dir : directory to walk over.
+ hard_link_dict : dict of inodes with multiple hard_links under t_dir
+ '''
+ global aggr_size
+ aggr_size[t_dir] = 0
+
+ for entry in os.listdir(t_dir):
+ pathname = os.path.join(t_dir, entry)
+ stbuf = os.lstat(pathname)
+ if S_ISDIR(stbuf.st_mode):
+ # It's a directory, recurse into it
+ if entry == '.glusterfs':
+ print("skipping " + pathname)
+ continue
+ descendent_hardlinks = {}
+ subtree_size = walktree(pathname, descendent_hardlinks)
+
+ subtree_size = fix_hardlink_accounting(descendent_hardlinks,
+ hard_link_dict,
+ subtree_size)
+
+ aggr_size[t_dir] = aggr_size[t_dir] + subtree_size
+
+ elif S_ISREG(stbuf.st_mode) or S_ISLNK(stbuf.st_mode):
+ # Even a symbolic link file may have multiple hardlinks.
+
+ file_size = stbuf.st_size
+ if stbuf.st_nlink > 2:
+ # send a single element dict to check if file is accounted.
+ file_size = fix_hardlink_accounting({stbuf.st_ino:stbuf.st_size},
+ hard_link_dict,
+ stbuf.st_size)
+
+ if file_size == 0:
+ print_msg("HARD_LINK (skipped)", pathname, "",
+ stbuf)
+ else:
+ print_msg("HARD_LINK (accounted)", pathname, "",
+ stbuf)
+ hard_link_dict[stbuf.st_ino] = stbuf.st_size
+
+ if t_dir in aggr_size:
+ aggr_size[t_dir] = aggr_size[t_dir] + file_size
+ else:
+ aggr_size[t_dir] = file_size
+ verify_file_xattr(pathname, stbuf)
+
+ else:
+ # Unknown file type, print a message
+ print('Skipping %s, due to file mode' % (pathname))
+
+ if t_dir not in aggr_size:
+ aggr_size[t_dir] = 0
+
+ verify_dir_xattr(t_dir, aggr_size[t_dir])
+ # du also accounts for t_directory sizes
+ # aggr_size[t_dir] += 4096
+
+ #cleanup
+ ret = aggr_size[t_dir]
+ del aggr_size[t_dir]
+ return ret
+
+
+if __name__ == '__main__':
+
+ parser = argparse.ArgumentParser(description='Diagnose quota accounting issues.', epilog=epilog_msg)
+ parser.add_argument('brick_path', nargs=1,
+ help='The brick path (or any descendent sub-directory of brick path)',
+ )
+ parser.add_argument('--full-logs', dest='verbose', action='store_true',
+ help='''
+ log all the xattr values and stat values reported
+ for analysis. [CAUTION: This can give lot of output
+ depending on FS depth. So one has to make sure enough
+ disk space exists if redirecting to file]
+ '''
+ )
+ parser.add_argument('--fix-issues', metavar='mount_path', dest='mnt', action='store',
+ help='''
+ fix accounting issues where the xattr values disagree
+ with stat sizes reported by gluster. A mount is also
+ required for this option to be used.
+ [CAUTION: This will directly modify backend xattr]
+ '''
+ )
+ parser.add_argument('--sub-dir', metavar='sub_dir', dest='sub_dir', action='store',
+ help='''
+ limit the crawling and accounting verification/correction
+ to a specific subdirectory.
+ '''
+ )
+
+ args = parser.parse_args()
+ verbose_mode = args.verbose
+ brick_path = args.brick_path[0]
+ sub_dir = args.sub_dir
+ mnt_path = args.mnt
+ hard_link_dict = {}
+ if sub_dir is not None:
+ walktree(os.path.join(brick_path, sub_dir), hard_link_dict)
+ else:
+ walktree(brick_path, hard_link_dict)
+
+ print("Files verified : " + str(file_count))
+ print("Directories verified : " + str(dir_count))
+ if mnt_path is not None:
+ print("Objects Fixed : " + str(obj_fix_count))
diff --git a/extras/quota/xattr_analysis.py b/extras/quota/xattr_analysis.py
new file mode 100755
index 00000000000..7bd7d96374c
--- /dev/null
+++ b/extras/quota/xattr_analysis.py
@@ -0,0 +1,73 @@
+#!/usr/bin/python3
+# Below script has two purposes
+# 1. Display xattr of entire FS tree in a human readable form
+# 2. Display all the directory where contri and size mismatch.
+# (If there are any directory with contri and size mismatch that are not dirty
+# then that highlights a propagation issue)
+# The script takes only one input LOG _FILE generated from the command,
+# find <brick_path> | xargs getfattr -d -m. -e hex > log_gluster_xattr
+
+from __future__ import print_function
+import re
+import subprocess
+import sys
+from hurry.filesize import size
+
+if len(sys.argv) < 2:
+ sys.exit('Usage: %s log_gluster_xattr \n'
+ 'to generate log_gluster_xattr use: \n'
+ 'find <brick_path> | xargs getfattr -d -m. -e hex > log_gluster_xattr'
+ % sys.argv[0])
+LOG_FILE=sys.argv[1]
+
+def get_quota_xattr_brick():
+ out = subprocess.check_output (["/usr/bin/cat", LOG_FILE])
+ pairs = out.splitlines()
+
+ xdict = {}
+ mismatch_size = [('====contri_size===', '====size====')]
+ for xattr in pairs:
+ k = xattr.split("=")[0]
+ if re.search("# file:", k):
+ print(xdict)
+ filename=k
+ print("=====" + filename + "=======")
+ xdict = {}
+ elif k is "":
+ pass
+ else:
+ print(xattr)
+ v = xattr.split("=")[1]
+ if re.search("contri", k):
+ if len(v) == 34:
+ # for files size is obtained in iatt, file count should be 1, dir count=0
+ xdict['contri_file_count'] = int(v[18:34], 16)
+ xdict['contri_dir_count'] = 0
+ else:
+ xdict['contri_size'] = size(int(v[2:18], 16))
+ xdict['contri_file_count'] = int(v[18:34], 16)
+ xdict['contri_dir_count'] = int(v[34:], 16)
+ elif re.search("size", k):
+ xdict['size'] = size(int(v[2:18], 16))
+ xdict['file_count'] = int(v[18:34], 16)
+ xdict['dir_count'] = int(v[34:], 16)
+ elif re.search("dirty", k):
+ if v == '0x3000':
+ xdict['dirty'] = False
+ elif v == '0x3100':
+ xdict['dirty'] = True
+ elif re.search("limit_objects", k):
+ xdict['limit_objects'] = int(v[2:18], 16)
+ elif re.search("limit_set", k):
+ xdict['limit_set'] = size(int(v[2:18], 16))
+
+ if 'size' in xdict and 'contri_size' in xdict and xdict['size'] != xdict['contri_size']:
+ mismatch_size.append((xdict['contri_size'], xdict['size'], filename))
+
+ for values in mismatch_size:
+ print(values)
+
+
+if __name__ == '__main__':
+ get_quota_xattr_brick()
+
diff --git a/extras/rebalance.py b/extras/rebalance.py
new file mode 100755
index 00000000000..37c68ebbb42
--- /dev/null
+++ b/extras/rebalance.py
@@ -0,0 +1,309 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+
+import atexit
+import copy
+import optparse
+import os
+import pipes
+import shutil
+import string
+import subprocess
+import sys
+import tempfile
+import volfilter
+import platform
+
+# It's just more convenient to have named fields.
+class Brick:
+ def __init__ (self, path, name):
+ self.path = path
+ self.sv_name = name
+ self.size = 0
+ self.curr_size = 0
+ self.good_size = 0
+ def set_size (self, size):
+ self.size = size
+ def set_range (self, rs, re):
+ self.r_start = rs
+ self.r_end = re
+ self.curr_size = self.r_end - self.r_start + 1
+ def __repr__ (self):
+ value = self.path[:]
+ value += "(%d," % self.size
+ if self.curr_size:
+ value += "0x%x,0x%x)" % (self.r_start, self.r_end)
+ else:
+ value += "-)"
+ return value
+
+def get_bricks (host, vol):
+ t = pipes.Template()
+ t.prepend("gluster --remote-host=%s system getspec %s"%(host, vol), ".-")
+ return t.open(None, "r")
+
+def generate_stanza (vf, all_xlators, cur_subvol):
+ sv_list = []
+ for sv in cur_subvol.subvols:
+ generate_stanza(vf, all_xlators, sv)
+ sv_list.append(sv.name)
+ vf.write("volume %s\n" % cur_subvol.name)
+ vf.write(" type %s\n" % cur_subvol.type)
+ for kvpair in cur_subvol.opts.items():
+ vf.write(" option %s %s\n" % kvpair)
+ if sv_list:
+ vf.write(" subvolumes %s\n" % ''.join(sv_list))
+ vf.write("end-volume\n\n")
+
+
+def mount_brick (localpath, all_xlators, dht_subvol):
+
+ # Generate a volfile.
+ vf_name = localpath + ".vol"
+ vf = open(vf_name, "w")
+ generate_stanza(vf, all_xlators, dht_subvol)
+ vf.flush()
+ vf.close()
+
+ # Create a brick directory and mount the brick there.
+ os.mkdir(localpath)
+ subprocess.call(["glusterfs", "-f", vf_name, localpath])
+
+# We use the command-line tools because there's no getxattr support in the
+# Python standard library (which is ridiculous IMO). Adding the xattr package
+# from PyPI would create a new and difficult dependency because the bits to
+# satisfy it don't seem to exist in Fedora. We already expect the command-line
+# tools to be there, so it's safer just to rely on them.
+#
+# We might have to revisit this if we get as far as actually issuing millions
+# of setxattr requests. Even then, it might be better to do that part with a C
+# program which has only a build-time dependency.
+def get_range (brick):
+ t = pipes.Template()
+ cmd = "getfattr -e hex -n trusted.glusterfs.dht %s 2> /dev/null"
+ t.prepend(cmd%brick, ".-")
+ t.append("grep ^trusted.glusterfs.dht=", "--")
+ f = t.open(None, "r")
+ try:
+ value = f.readline().rstrip().split('=')[1][2:]
+ except:
+ print("could not get layout for %s (might be OK)" % brick)
+ return None
+ v_start = int("0x"+value[16:24], 16)
+ v_end = int("0x"+value[24:32], 16)
+ return (v_start, v_end)
+
+def calc_sizes (bricks, total):
+ leftover = 1 << 32
+ for b in bricks:
+ if b.size:
+ b.good_size = (b.size << 32) / total
+ leftover -= b.good_size
+ else:
+ b.good_size = 0
+ if leftover:
+ # Add the leftover to an old brick if we can.
+ for b in bricks:
+ if b.good_size:
+ b.good_size += leftover
+ break
+ else:
+ # Fine, just add it wherever.
+ bricks[0].good_size += leftover
+
+# Normalization means sorting the bricks by r_start and (b) ensuring that there
+# are no gaps.
+def normalize (in_bricks):
+ out_bricks = []
+ curr_hash = 0
+ used = 0
+ while curr_hash < (1<<32):
+ curr_best = None
+ for b in in_bricks:
+ if b.r_start == curr_hash:
+ used += 1
+ out_bricks.append(b)
+ in_bricks.remove(b)
+ curr_hash = b.r_end + 1
+ break
+ else:
+ print("gap found at 0x%08x" % curr_hash)
+ sys.exit(1)
+ return out_bricks + in_bricks, used
+
+def get_score (bricks):
+ score = 0
+ curr_hash = 0
+ for b in bricks:
+ if not b.curr_size:
+ curr_hash += b.good_size
+ continue
+ new_start = curr_hash
+ curr_hash += b.good_size
+ new_end = curr_hash - 1
+ if new_start > b.r_start:
+ max_start = new_start
+ else:
+ max_start = b.r_start
+ if new_end < b.r_end:
+ min_end = new_end
+ else:
+ min_end = b.r_end
+ if max_start <= min_end:
+ score += (min_end - max_start + 1)
+ return score
+
+if __name__ == "__main__":
+
+ my_usage = "%prog [options] server volume [directory]"
+ parser = optparse.OptionParser(usage=my_usage)
+ parser.add_option("-f", "--free-space", dest="free_space",
+ default=False, action="store_true",
+ help="use free space instead of total space")
+ parser.add_option("-l", "--leave-mounted", dest="leave_mounted",
+ default=False, action="store_true",
+ help="leave subvolumes mounted")
+ parser.add_option("-v", "--verbose", dest="verbose",
+ default=False, action="store_true",
+ help="verbose output")
+ options, args = parser.parse_args()
+
+ if len(args) == 3:
+ fix_dir = args[2]
+ else:
+ if len(args) != 2:
+ parser.print_help()
+ sys.exit(1)
+ fix_dir = None
+ hostname, volname = args[:2]
+
+ # Make sure stuff gets cleaned up, even if there are exceptions.
+ orig_dir = os.getcwd()
+ work_dir = tempfile.mkdtemp()
+ bricks = []
+ def cleanup_workdir ():
+ os.chdir(orig_dir)
+ if options.verbose:
+ print("Cleaning up %s" % work_dir)
+ for b in bricks:
+ subprocess.call(["umount", b.path])
+ shutil.rmtree(work_dir)
+ if not options.leave_mounted:
+ atexit.register(cleanup_workdir)
+ os.chdir(work_dir)
+
+ # Mount each brick individually, so we can issue brick-specific calls.
+ if options.verbose:
+ print("Mounting subvolumes...")
+ index = 0
+ volfile_pipe = get_bricks(hostname, volname)
+ all_xlators, last_xlator = volfilter.load(volfile_pipe)
+ for dht_vol in all_xlators.itervalues():
+ if dht_vol.type == "cluster/distribute":
+ break
+ else:
+ print("no DHT volume found")
+ sys.exit(1)
+ for sv in dht_vol.subvols:
+ #print "found subvol %s" % sv.name
+ lpath = "%s/brick%s" % (work_dir, index)
+ index += 1
+ mount_brick(lpath, all_xlators, sv)
+ bricks.append(Brick(lpath, sv.name))
+ if index == 0:
+ print("no bricks")
+ sys.exit(1)
+
+ # Collect all of the sizes.
+ if options.verbose:
+ print("Collecting information...")
+ total = 0
+ for b in bricks:
+ info = os.statvfs(b.path)
+ # On FreeBSD f_bsize (info[0]) contains the optimal I/O size,
+ # not the block size as it's found on Linux. In this case we
+ # use f_frsize (info[1]).
+ if platform.system() == 'FreeBSD':
+ bsize = info[1]
+ else:
+ bsize = info[0]
+ # We want a standard unit even if different bricks use
+ # different block sizes. The size is chosen to avoid overflows
+ # for very large bricks with very small block sizes, but also
+ # accommodate filesystems which use very large block sizes to
+ # cheat on benchmarks.
+ blocksper100mb = 104857600 / bsize
+ if options.free_space:
+ size = info[3] / blocksper100mb
+ else:
+ size = info[2] / blocksper100mb
+ if size <= 0:
+ print("brick %s has invalid size %d" % (b.path, size))
+ sys.exit(1)
+ b.set_size(size)
+ total += size
+
+ # Collect all of the layout information.
+ for b in bricks:
+ hash_range = get_range(b.path)
+ if hash_range is not None:
+ rs, re = hash_range
+ if rs > re:
+ print("%s has backwards hash range" % b.path)
+ sys.exit(1)
+ b.set_range(hash_range[0], hash_range[1])
+
+ if options.verbose:
+ print("Calculating new layouts...")
+ calc_sizes(bricks, total)
+ bricks, used = normalize(bricks)
+
+ # We can't afford O(n!) here, but O(n^2) should be OK and the result
+ # should be almost as good.
+ while used < len(bricks):
+ best_place = used
+ best_score = get_score(bricks)
+ for i in range(used):
+ new_bricks = bricks[:]
+ del new_bricks[used]
+ new_bricks.insert(i, bricks[used])
+ new_score = get_score(new_bricks)
+ if new_score > best_score:
+ best_place = i
+ best_score = new_score
+ if best_place != used:
+ nb = bricks[used]
+ del bricks[used]
+ bricks.insert(best_place, nb)
+ used += 1
+
+ # Finalize whatever we decided on.
+ curr_hash = 0
+ for b in bricks:
+ b.r_start = curr_hash
+ curr_hash += b.good_size
+ b.r_end = curr_hash - 1
+
+ print("Here are the xattr values for your size-weighted layout:")
+ for b in bricks:
+ print(" %s: 0x0000000200000000%08x%08x" % (
+ b.sv_name, b.r_start, b.r_end))
+
+ if fix_dir:
+ if options.verbose:
+ print("Fixing layout for %s" % fix_dir)
+ for b in bricks:
+ value = "0x0000000200000000%08x%08x" % (
+ b.r_start, b.r_end)
+ path = "%s/%s" % (b.path, fix_dir)
+ cmd = "setfattr -n trusted.glusterfs.dht -v %s %s" % (
+ value, path)
+ print(cmd)
+
+ if options.leave_mounted:
+ print("The following subvolumes are still mounted:")
+ for b in bricks:
+ print("%s on %s" % (b.sv_name, b.path))
+ print("Don't forget to clean up when you're done.")
+
diff --git a/extras/run-gluster.tmpfiles.in b/extras/run-gluster.tmpfiles.in
new file mode 100644
index 00000000000..329f2dde6db
--- /dev/null
+++ b/extras/run-gluster.tmpfiles.in
@@ -0,0 +1,2 @@
+# hardcoding /run for now, should be detected while building from source?
+d /run/gluster 0775 gluster gluster -
diff --git a/extras/snap_scheduler/Makefile.am b/extras/snap_scheduler/Makefile.am
new file mode 100644
index 00000000000..782f139016f
--- /dev/null
+++ b/extras/snap_scheduler/Makefile.am
@@ -0,0 +1,9 @@
+snap_schedulerdir = $(sbindir)/
+
+if WITH_SERVER
+snap_scheduler_SCRIPTS = gcron.py snap_scheduler.py conf.py
+endif
+
+EXTRA_DIST = gcron.py snap_scheduler.py conf.py
+
+CLEANFILES =
diff --git a/extras/snap_scheduler/README.md b/extras/snap_scheduler/README.md
new file mode 100644
index 00000000000..1316bb76469
--- /dev/null
+++ b/extras/snap_scheduler/README.md
@@ -0,0 +1,125 @@
+Snapshot Scheduler
+==============================
+
+SUMMARY
+-------
+
+GlusterFS volume snapshot provides point-in-time copy of a GlusterFS volume. Currently, GlusterFS volume snapshots can be easily scheduled by setting up cron jobs on one of the nodes in the GlusterFS trusted storage pool. This has a single point failure (SPOF), as scheduled jobs can be missed if the node running the cron jobs dies.
+
+We can avoid the SPOF by distributing the cron jobs to all nodes of the trusted storage pool.
+
+DETAILED DESCRIPTION
+--------------------
+
+The solution to the above problems involves the usage of:
+
+* A shared storage - This can be any shared storage (another gluster volume, a NFS mount, etc.) that will be used to share the schedule configuration and will help in the coordination of the jobs.
+* An agent - This agent will perform the actual snapshot commands, instead of cron. It will contain the logic to perform coordinated snapshots.
+* A helper script - This script will allow the user to initialise the scheduler on the local node, enable/disable scheduling, add/edit/list/delete snapshot schedules.
+* cronie - It is the default cron daemon shipped with RHEL. It invokes the agent at the appropriate intervals as mentioned by the user to perform the snapshot operation on the volume as mentioned by the user in the schedule.
+
+INITIAL SETUP
+-------------
+
+The administrator needs to create a shared storage that can be available to nodes across the cluster. A GlusterFS volume can also be used for the same. It is preferable that the *shared volume* be a replicate volume to avoid SPOF.
+
+Once the shared storage is created, it should be mounted on all nodes in the trusted storage pool which will be participating in the scheduling. The location where the shared_storage should be mounted (/var/run/gluster/snaps/shared_storage) in these nodes is fixed and is not configurable. Each node participating in the scheduling then needs to perform an initialisation of the snapshot scheduler by invoking the following:
+
+snap_scheduler.py init
+
+NOTE: This command needs to be run on all the nodes participating in the scheduling
+
+HELPER SCRIPT
+-------------
+
+The helper script(snap_scheduler.py) will initialise the scheduler on the local node, enable/disable scheduling, add/edit/list/delete snapshot schedules.
+
+a) snap_scheduler.py init
+
+This command initialises the snap_scheduler and interfaces it with the crond running on the local node. This is the first step, before executing any scheduling related commands from a node.
+
+NOTE: The helper script needs to be run with this option on all the nodes participating in the scheduling. Other options of the helper script can be run independently from any node, where initialisation has been successfully completed.
+
+b) snap_scheduler.py enable
+
+The snap scheduler is disabled by default after initialisation. This command enables the snap scheduler.
+
+c) snap_scheduler.py disable
+
+This command disables the snap scheduler.
+
+d) snap_scheduler.py status
+
+This command displays the current status(Enabled/Disabled) of the snap scheduler.
+
+e) snap_scheduler.py add "Job Name" "Schedule" "Volume Name"
+
+This command adds a new snapshot schedule. All the arguments must be provided within double-quotes(""). It takes three arguments:
+
+-> Job Name: This name uniquely identifies this particular schedule, and can be used to reference this schedule for future events like edit/delete. If a schedule already exists for the specified Job Name, the add command will fail.
+
+-> Schedule: The schedules are accepted in the format crond understands:-
+
+Example of job definition:
+.---------------- minute (0 - 59)
+| .------------- hour (0 - 23)
+| | .---------- day of month (1 - 31)
+| | | .------- month (1 - 12) OR jan,feb,mar,apr ...
+| | | | .---- day of week (0 - 6) (Sunday=0 or 7) OR sun,mon,tue,wed,thu,fri,sat
+| | | | |
+* * * * * user-name command to be executed
+Although we accept all valid cron schedules, currently we support granularity of snapshot schedules to a maximum of half-hourly snapshots.
+
+-> Volume Name: The name of the volume on which the scheduled snapshot operation will be performed.
+
+f) snap_scheduler.py edit "Job Name" "Schedule" "Volume Name"
+
+This command edits an existing snapshot schedule. It takes the same three arguments that the add option takes. All the arguments must be provided within double-quotes(""). If a schedule does not exists for the specified Job Name, the edit command will fail.
+
+g) snap_scheduler.py delete "Job Name"
+
+This command deletes an existing snapshot schedule. It takes the job name of the schedule as argument. The argument must be provided within double-quotes(""). If a schedule does not exists for the specified Job Name, the delete command will fail.
+
+h) snap_scheduler.py list
+
+This command lists the existing snapshot schedules in the following manner: Pseudocode:
+
+# snap_scheduler.py list
+JOB_NAME SCHEDULE OPERATION VOLUME NAME
+--------------------------------------------------------------------
+Job0 * * * * * Snapshot Create test_vol
+
+THE AGENT
+---------
+
+The snapshots scheduled with the help of the helper script, are read by crond which then invokes the agent(gcron.py) at the scheduled intervals to perform the snapshot operations on the specified volumes. It then performs the scheduled snapshots using the following algorithm to coordinate.
+
+start_time = get current time
+lock_file = job_name passed as an argument
+vol_name = volume name psased as an argument
+try POSIX locking the $lock_file
+ if lock is obtained, then
+ mod_time = Get modification time of $entry
+ if $mod_time < $start_time, then
+ Take snapshot of $entry.name (Volume name)
+ if snapshot failed, then
+ log the failure
+ Update modification time of $entry to current time
+ unlock the $entry
+
+The coordination with other scripts running on other nodes, is handled by the use of POSIX locks. All the instances of the script will attempt to lock the lock_file which is essentialy an empty file with the job name, and one which gets the lock will take the snapshot.
+
+To prevent redoing a done task, the script will make use of the mtime attribute of the entry. At the beginning execution, the script would have saved its start time. Once the script obtains the lock on the lock_file, before taking the snapshot, it compares the mtime of the entry with the start time. The snapshot will only be taken if the mtime is smaller than start time. Once the snapshot command completes, the script will update the mtime of the lock_file to the current time before unlocking.
+
+If a snapshot command fails, the script will log the failure (in syslog) and continue with its operation. It will not attempt to retry the failed snapshot in the current schedule, but will attempt it again in the next schedules. It is left to the administrator to monitor the logs and decide what to do after a failure.
+
+ASSUMPTIONS AND LIMITATIONS
+---------------------------
+
+It is assumed that all nodes in the have their times synced using NTP or any other mechanism. This is a hard requirement for this feature to work.
+
+The administrator needs to have python2.7 or higher installed, as well as the argparse module installed, to be able to use the helper script(snap_scheduler.py).
+
+There is a latency of one minute, between providing a command by the helper script and that command taking effect. Hence, currently we do not support snapshot schedules with per minute granularity.
+
+The administrator can however leverage the scheduler to schedule snapshots with granularity of half-hourly/hourly/daily/weekly/monthly/yearly periodic intervals. They can also schedule snapshots, which are customised mentioning which minute of the hour, which day of the week, which week of the month, and which month of the year, they want to schedule the snapshot operation.
diff --git a/extras/snap_scheduler/conf.py.in b/extras/snap_scheduler/conf.py.in
new file mode 100644
index 00000000000..6dcca0534a7
--- /dev/null
+++ b/extras/snap_scheduler/conf.py.in
@@ -0,0 +1,11 @@
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+GLUSTERFS_LIBEXECDIR = '@GLUSTERFS_LIBEXECDIR@'
diff --git a/extras/snap_scheduler/gcron.py b/extras/snap_scheduler/gcron.py
new file mode 100755
index 00000000000..0e4df77d481
--- /dev/null
+++ b/extras/snap_scheduler/gcron.py
@@ -0,0 +1,190 @@
+#!/usr/bin/python3
+#
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+from __future__ import print_function
+import subprocess
+import os
+import os.path
+import sys
+import time
+import logging
+import logging.handlers
+import fcntl
+
+
+GCRON_TASKS = "/run/gluster/shared_storage/snaps/glusterfs_snap_cron_tasks"
+GCRON_CROND_TASK = "/etc/cron.d/glusterfs_snap_cron_tasks"
+GCRON_RELOAD_FLAG = "/var/run/gluster/crond_task_reload_flag"
+LOCK_FILE_DIR = "/run/gluster/shared_storage/snaps/lock_files/"
+log = logging.getLogger("gcron-logger")
+start_time = 0.0
+
+
+def initLogger(script_name):
+ log.setLevel(logging.DEBUG)
+ logFormat = "[%(asctime)s %(filename)s:%(lineno)s %(funcName)s] "\
+ "%(levelname)s %(message)s"
+ formatter = logging.Formatter(logFormat)
+
+ sh = logging.handlers.SysLogHandler()
+ sh.setLevel(logging.ERROR)
+ sh.setFormatter(formatter)
+
+ process = subprocess.Popen(["gluster", "--print-logdir"],
+ stdout=subprocess.PIPE,
+ universal_newlines=True)
+ out, err = process.communicate()
+ if process.returncode == 0:
+ logfile = os.path.join(out.strip(), script_name[:-3]+".log")
+
+ fh = logging.FileHandler(logfile)
+ fh.setLevel(logging.DEBUG)
+ fh.setFormatter(formatter)
+
+ log.addHandler(sh)
+ log.addHandler(fh)
+
+
+def takeSnap(volname="", snapname=""):
+ success = True
+ if volname == "":
+ log.debug("No volname given")
+ return False
+ if snapname == "":
+ log.debug("No snapname given")
+ return False
+
+ cli = ["gluster",
+ "snapshot",
+ "create",
+ snapname,
+ volname]
+ log.debug("Running command '%s'", " ".join(cli))
+
+ p = subprocess.Popen(cli, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ out, err = p.communicate()
+ rv = p.returncode
+
+ log.debug("Command '%s' returned '%d'", " ".join(cli), rv)
+
+ if rv:
+ log.error("Snapshot of %s failed", volname)
+ log.error("Command output:")
+ log.error(err)
+ success = False
+ else:
+ log.info("Snapshot of %s successful", volname)
+
+ return success
+
+
+def doJob(name, lockFile, jobFunc, volname):
+ success = True
+ try:
+ f = os.open(lockFile, os.O_CREAT | os.O_RDWR | os.O_NONBLOCK)
+ try:
+ fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ mtime = os.path.getmtime(lockFile)
+ global start_time
+ log.debug("%s last modified at %s", lockFile, time.ctime(mtime))
+ if mtime < start_time:
+ log.debug("Processing job %s", name)
+ if jobFunc(volname, name):
+ log.info("Job %s succeeded", name)
+ else:
+ log.error("Job %s failed", name)
+ success = False
+ os.utime(lockFile, None)
+ else:
+ log.info("Job %s has been processed already", name)
+ fcntl.flock(f, fcntl.LOCK_UN)
+ except (OSError, IOError):
+ log.info("Job %s is being processed by another agent", name)
+ os.close(f)
+ except (OSError, IOError) as e:
+ log.debug("Failed to open lock file %s : %s", lockFile, e)
+ log.error("Failed to process job %s", name)
+ success = False
+
+ return success
+
+
+def main():
+ script_name = os.path.basename(__file__)
+ initLogger(script_name)
+ global start_time
+ if sys.argv[1] == "--update":
+ if not os.path.exists(GCRON_TASKS):
+ # Create a flag in /var/run/gluster which indicates that this
+ # node doesn't have access to GCRON_TASKS right now, so that
+ # when the mount is available and GCRON_TASKS is available
+ # the flag will tell this routine to reload GCRON_CROND_TASK
+ try:
+ f = os.open(GCRON_RELOAD_FLAG,
+ os.O_CREAT | os.O_NONBLOCK, 0o644)
+ os.close(f)
+ except OSError as e:
+ if errno != EEXIST:
+ log.error("Failed to create %s : %s",
+ GCRON_RELOAD_FLAG, e)
+ output("Failed to create %s. Error: %s"
+ % (GCRON_RELOAD_FLAG, e))
+ return
+
+ if not os.path.exists(GCRON_CROND_TASK):
+ return
+
+ # As GCRON_TASKS exists now, we should check if GCRON_RELOAD_FLAG
+ # also exists. If so we should touch GCRON_CROND_TASK and remove
+ # the GCRON_RELOAD_FLAG
+ if os.path.exists(GCRON_RELOAD_FLAG):
+ try:
+ os.remove(GCRON_RELOAD_FLAG);
+ process = subprocess.Popen(["touch", "-h", GCRON_CROND_TASK],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ out, err = process.communicate()
+ if process.returncode != 0:
+ log.error("Failed to touch %s. Error: %s.",
+ GCRON_CROND_TASK, err)
+ except (IOError, OSError) as e:
+ log.error("Failed to touch %s. Error: %s.",
+ GCRON_CROND_TASK, e)
+ return
+ if os.lstat(GCRON_TASKS).st_mtime > \
+ os.lstat(GCRON_CROND_TASK).st_mtime:
+ try:
+ process = subprocess.Popen(["touch", "-h", GCRON_CROND_TASK],
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ out, err = process.communicate()
+ if process.returncode != 0:
+ log.error("Failed to touch %s. Error: %s.",
+ GCRON_CROND_TASK, err)
+ except IOError as e:
+ log.error("Failed to touch %s. Error: %s.",
+ GCRON_CROND_TASK, e)
+ return
+
+ volname = sys.argv[1]
+ jobname = sys.argv[2]
+ locking_file = os.path.join(LOCK_FILE_DIR, jobname)
+ log.debug("locking_file = %s", locking_file)
+ log.debug("volname = %s", volname)
+ log.debug("jobname = %s", jobname)
+
+ start_time = int(time.time())
+
+ doJob("Scheduled-" + jobname + "-" + volname, locking_file, takeSnap, volname)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/extras/snap_scheduler/snap_scheduler.py b/extras/snap_scheduler/snap_scheduler.py
new file mode 100755
index 00000000000..e8fcc449a9b
--- /dev/null
+++ b/extras/snap_scheduler/snap_scheduler.py
@@ -0,0 +1,941 @@
+#!/usr/bin/python3
+#
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+from __future__ import print_function
+import subprocess
+import os
+import os.path
+import logging
+import argparse
+import fcntl
+import logging.handlers
+import sys
+import shutil
+from errno import EEXIST
+from conf import GLUSTERFS_LIBEXECDIR
+sys.path.insert(1, GLUSTERFS_LIBEXECDIR)
+
+EVENTS_ENABLED = True
+try:
+ from events.eventtypes import SNAPSHOT_SCHEDULER_INITIALISED \
+ as EVENT_SNAPSHOT_SCHEDULER_INITIALISED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_INIT_FAILED \
+ as EVENT_SNAPSHOT_SCHEDULER_INIT_FAILED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_DISABLED \
+ as EVENT_SNAPSHOT_SCHEDULER_DISABLED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_DISABLE_FAILED \
+ as EVENT_SNAPSHOT_SCHEDULER_DISABLE_FAILED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_ENABLED \
+ as EVENT_SNAPSHOT_SCHEDULER_ENABLED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_ENABLE_FAILED \
+ as EVENT_SNAPSHOT_SCHEDULER_ENABLE_FAILED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_SCHEDULE_ADDED \
+ as EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_ADDED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_SCHEDULE_ADD_FAILED \
+ as EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_ADD_FAILED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_SCHEDULE_DELETED \
+ as EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_DELETED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_SCHEDULE_DELETE_FAILED \
+ as EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_DELETE_FAILED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_SCHEDULE_EDITED \
+ as EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_EDITED
+ from events.eventtypes import SNAPSHOT_SCHEDULER_SCHEDULE_EDIT_FAILED \
+ as EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_EDIT_FAILED
+except ImportError:
+ # Events APIs not installed, dummy eventtypes with None
+ EVENTS_ENABLED = False
+ EVENT_SNAPSHOT_SCHEDULER_INITIALISED = None
+ EVENT_SNAPSHOT_SCHEDULER_INIT_FAILED = None
+ EVENT_SNAPSHOT_SCHEDULER_DISABLED = None
+ EVENT_SNAPSHOT_SCHEDULER_DISABLE_FAILED = None
+ EVENT_SNAPSHOT_SCHEDULER_ENABLED = None
+ EVENT_SNAPSHOT_SCHEDULER_ENABLE_FAILED = None
+ EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_ADDED = None
+ EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_ADD_FAILED = None
+ EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_DELETED = None
+ EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_DELETE_FAILED = None
+ EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_EDITED = None
+ EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_EDIT_FAILED = None
+
+SCRIPT_NAME = "snap_scheduler"
+scheduler_enabled = False
+log = logging.getLogger(SCRIPT_NAME)
+SHARED_STORAGE_DIR="/run/gluster/shared_storage"
+GCRON_DISABLED = SHARED_STORAGE_DIR+"/snaps/gcron_disabled"
+GCRON_ENABLED = SHARED_STORAGE_DIR+"/snaps/gcron_enabled"
+GCRON_TASKS = SHARED_STORAGE_DIR+"/snaps/glusterfs_snap_cron_tasks"
+GCRON_CROND_TASK = "/etc/cron.d/glusterfs_snap_cron_tasks"
+LOCK_FILE_DIR = SHARED_STORAGE_DIR+"/snaps/lock_files/"
+LOCK_FILE = LOCK_FILE_DIR+"lock_file"
+TMP_FILE = SHARED_STORAGE_DIR+"/snaps/tmp_file"
+GCRON_UPDATE_TASK = "/etc/cron.d/gcron_update_task"
+CURRENT_SCHEDULER = SHARED_STORAGE_DIR+"/snaps/current_scheduler"
+tasks = {}
+longest_field = 12
+current_scheduler = ""
+
+INTERNAL_ERROR = 2
+SHARED_STORAGE_DIR_DOESNT_EXIST = 3
+SHARED_STORAGE_NOT_MOUNTED = 4
+ANOTHER_TRANSACTION_IN_PROGRESS = 5
+INIT_FAILED = 6
+SCHEDULING_ALREADY_DISABLED = 7
+SCHEDULING_ALREADY_ENABLED = 8
+NODE_NOT_INITIALISED = 9
+ANOTHER_SCHEDULER_ACTIVE = 10
+JOB_ALREADY_EXISTS = 11
+JOB_NOT_FOUND = 12
+INVALID_JOBNAME = 13
+INVALID_VOLNAME = 14
+INVALID_SCHEDULE = 15
+INVALID_ARG = 16
+VOLUME_DOES_NOT_EXIST = 17
+
+def print_error (error_num):
+ if error_num == INTERNAL_ERROR:
+ return "Internal Error"
+ elif error_num == SHARED_STORAGE_DIR_DOESNT_EXIST:
+ return "The shared storage directory ("+SHARED_STORAGE_DIR+")" \
+ " does not exist."
+ elif error_num == SHARED_STORAGE_NOT_MOUNTED:
+ return "The shared storage directory ("+SHARED_STORAGE_DIR+")" \
+ " is not mounted."
+ elif error_num == ANOTHER_TRANSACTION_IN_PROGRESS:
+ return "Another transaction is in progress."
+ elif error_num == INIT_FAILED:
+ return "Initialisation failed."
+ elif error_num == SCHEDULING_ALREADY_DISABLED:
+ return "Snapshot scheduler is already disabled."
+ elif error_num == SCHEDULING_ALREADY_ENABLED:
+ return "Snapshot scheduler is already enabled."
+ elif error_num == NODE_NOT_INITIALISED:
+ return "The node is not initialised."
+ elif error_num == ANOTHER_SCHEDULER_ACTIVE:
+ return "Another scheduler is active."
+ elif error_num == JOB_ALREADY_EXISTS:
+ return "The job already exists."
+ elif error_num == JOB_NOT_FOUND:
+ return "The job cannot be found."
+ elif error_num == INVALID_JOBNAME:
+ return "The job name is invalid."
+ elif error_num == INVALID_VOLNAME:
+ return "The volume name is invalid."
+ elif error_num == INVALID_SCHEDULE:
+ return "The schedule is invalid."
+ elif error_num == INVALID_ARG:
+ return "The argument is invalid."
+ elif error_num == VOLUME_DOES_NOT_EXIST:
+ return "The volume does not exist."
+
+def output(msg):
+ print("%s: %s" % (SCRIPT_NAME, msg))
+
+
+def initLogger():
+ log.setLevel(logging.DEBUG)
+ logFormat = "[%(asctime)s %(filename)s:%(lineno)s %(funcName)s] "\
+ "%(levelname)s %(message)s"
+ formatter = logging.Formatter(logFormat)
+
+ sh = logging.handlers.SysLogHandler()
+ sh.setLevel(logging.ERROR)
+ sh.setFormatter(formatter)
+
+ process = subprocess.Popen(["gluster", "--print-logdir"],
+ stdout=subprocess.PIPE, universal_newlines=True)
+ logfile = os.path.join(process.stdout.read()[:-1], SCRIPT_NAME + ".log")
+
+ fh = logging.FileHandler(logfile)
+ fh.setLevel(logging.DEBUG)
+ fh.setFormatter(formatter)
+
+ log.addHandler(sh)
+ log.addHandler(fh)
+
+
+def scheduler_status():
+ ret = INTERNAL_ERROR
+ global scheduler_enabled
+ try:
+ f = os.path.realpath(GCRON_TASKS)
+ if f != os.path.realpath(GCRON_ENABLED) or not os.path.exists(GCRON_ENABLED):
+ log.info("Snapshot scheduler is currently disabled.")
+ scheduler_enabled = False
+ else:
+ log.info("Snapshot scheduler is currently enabled.")
+ scheduler_enabled = True
+ ret = 0
+ except:
+ log.error("Failed to enable snapshot scheduling. Error: "
+ "Failed to check the status of %s.", GCRON_DISABLED)
+
+ return ret
+
+def enable_scheduler():
+ ret = scheduler_status()
+ if ret == 0:
+ if not scheduler_enabled:
+
+ # Check if another scheduler is active.
+ ret = get_current_scheduler()
+ if ret == 0:
+ if (current_scheduler != "none"):
+ print_str = "Failed to enable snapshot scheduling. " \
+ "Error: Another scheduler is active."
+ log.error(print_str)
+ output(print_str)
+ ret = ANOTHER_SCHEDULER_ACTIVE
+ return ret
+ else:
+ print_str = "Failed to get current scheduler info."
+ log.error(print_str)
+ output(print_str)
+ return ret
+
+ log.info("Enabling snapshot scheduler.")
+ try:
+ if os.path.exists(GCRON_DISABLED):
+ os.remove(GCRON_DISABLED)
+ if os.path.lexists(GCRON_TASKS):
+ os.remove(GCRON_TASKS)
+ try:
+ f = os.open(GCRON_ENABLED, os.O_CREAT | os.O_NONBLOCK,
+ 0o644)
+ os.close(f)
+ except OSError as e:
+ log.error("Failed to open %s. Error: %s.",
+ GCRON_ENABLED, e)
+ ret = INTERNAL_ERROR
+ return ret
+ os.symlink(GCRON_ENABLED, GCRON_TASKS)
+ update_current_scheduler("cli")
+ log.info("Snapshot scheduling is enabled")
+ output("Snapshot scheduling is enabled")
+ ret = 0
+ except OSError as e:
+ print_str = ("Failed to enable snapshot scheduling."
+ "Error: {{}}" + e)
+ log.error(print_str)
+ output(print_str)
+ ret = INTERNAL_ERROR
+ else:
+ print_str = "Failed to enable snapshot scheduling. " \
+ "Error: Snapshot scheduling is already enabled."
+ log.error(print_str)
+ output(print_str)
+ ret = SCHEDULING_ALREADY_ENABLED
+ else:
+ print_str = "Failed to enable snapshot scheduling. " \
+ "Error: Failed to check scheduler status."
+ log.error(print_str)
+ output(print_str)
+
+ return ret
+
+
+def disable_scheduler():
+ ret = scheduler_status()
+ if ret == 0:
+ if scheduler_enabled:
+ log.info("Disabling snapshot scheduler.")
+ try:
+ # Check if another scheduler is active. If not, then
+ # update current scheduler to "none". Else do nothing.
+ ret = get_current_scheduler()
+ if ret == 0:
+ if (current_scheduler == "cli"):
+ update_current_scheduler("none")
+ else:
+ print_str = "Failed to disable snapshot scheduling. " \
+ "Error: Failed to get current scheduler info."
+ log.error(print_str)
+ output(print_str)
+ return ret
+
+ if os.path.exists(GCRON_DISABLED):
+ os.remove(GCRON_DISABLED)
+ if os.path.lexists(GCRON_TASKS):
+ os.remove(GCRON_TASKS)
+ f = os.open(GCRON_DISABLED, os.O_CREAT, 0o644)
+ os.close(f)
+ os.symlink(GCRON_DISABLED, GCRON_TASKS)
+ log.info("Snapshot scheduling is disabled")
+ output("Snapshot scheduling is disabled")
+ ret = 0
+ except OSError as e:
+ print_str = ("Failed to disable snapshot scheduling. Error: "
+ + e)
+ log.error(print_str)
+ output(print_str)
+ ret = INTERNAL_ERROR
+ else:
+ print_str = "Failed to disable scheduling. " \
+ "Error: Snapshot scheduling is already disabled."
+ log.error(print_str)
+ output(print_str)
+ ret = SCHEDULING_ALREADY_DISABLED
+ else:
+ print_str = "Failed to disable snapshot scheduling. " \
+ "Error: Failed to check scheduler status."
+ log.error(print_str)
+ output(print_str)
+ ret = INTERNAL_ERROR
+
+ return ret
+
+
+def load_tasks_from_file():
+ global tasks
+ global longest_field
+ try:
+ with open(GCRON_ENABLED, 'r') as f:
+ for line in f:
+ line = line.rstrip('\n')
+ if not line:
+ break
+ line = line.split("gcron.py")
+ schedule = line[0].split("root")[0].rstrip(' ')
+ line = line[1].split(" ")
+ volname = line[1]
+ jobname = line[2]
+ longest_field = max(longest_field, len(jobname), len(volname),
+ len(schedule))
+ tasks[jobname] = schedule+":"+volname
+ f.close()
+ ret = 0
+ except IOError as e:
+ log.error("Failed to open %s. Error: %s.", GCRON_ENABLED, e)
+ ret = INTERNAL_ERROR
+
+ return ret
+
+
+def get_current_scheduler():
+ global current_scheduler
+ try:
+ with open(CURRENT_SCHEDULER, 'r') as f:
+ current_scheduler = f.readline().rstrip('\n')
+ f.close()
+ ret = 0
+ except IOError as e:
+ log.error("Failed to open %s. Error: %s.", CURRENT_SCHEDULER, e)
+ ret = INTERNAL_ERROR
+
+ return ret
+
+
+def list_schedules():
+ log.info("Listing snapshot schedules.")
+ ret = load_tasks_from_file()
+ if ret == 0:
+ if len(tasks) == 0:
+ output("No snapshots scheduled")
+ else:
+ jobname = "JOB_NAME".ljust(longest_field+5)
+ schedule = "SCHEDULE".ljust(longest_field+5)
+ operation = "OPERATION".ljust(longest_field+5)
+ volname = "VOLUME NAME".ljust(longest_field+5)
+ hyphens = "".ljust((longest_field+5) * 4, '-')
+ print(jobname+schedule+operation+volname)
+ print(hyphens)
+ for key in sorted(tasks):
+ jobname = key.ljust(longest_field+5)
+ schedule = tasks[key].split(":")[0].ljust(
+ longest_field + 5)
+ volname = tasks[key].split(":")[1].ljust(
+ longest_field + 5)
+ operation = "Snapshot Create".ljust(longest_field+5)
+ print(jobname+schedule+operation+volname)
+ ret = 0
+ else:
+ print_str = "Failed to list snapshot schedules. " \
+ "Error: Failed to load tasks from "+GCRON_ENABLED
+ log.error(print_str)
+ output(print_str)
+
+ return ret
+
+
+def write_tasks_to_file():
+ try:
+ with open(TMP_FILE, "w", 0o644) as f:
+ # If tasks is empty, just create an empty tmp file
+ if len(tasks) != 0:
+ for key in sorted(tasks):
+ jobname = key
+ schedule = tasks[key].split(":")[0]
+ volname = tasks[key].split(":")[1]
+ f.write("%s root PATH=$PATH:/usr/local/sbin:/usr/sbin "
+ "gcron.py %s %s\n" % (schedule, volname, jobname))
+ f.write("\n")
+ f.flush()
+ os.fsync(f.fileno())
+ f.close()
+ except IOError as e:
+ log.error("Failed to open %s. Error: %s.", TMP_FILE, e)
+ ret = INTERNAL_ERROR
+ return ret
+
+ shutil.move(TMP_FILE, GCRON_ENABLED)
+ ret = 0
+
+ return ret
+
+def update_current_scheduler(data):
+ try:
+ with open(TMP_FILE, "w", 0o644) as f:
+ f.write("%s" % data)
+ f.flush()
+ os.fsync(f.fileno())
+ f.close()
+ except IOError as e:
+ log.error("Failed to open %s. Error: %s.", TMP_FILE, e)
+ ret = INTERNAL_ERROR
+ return ret
+
+ shutil.move(TMP_FILE, CURRENT_SCHEDULER)
+ ret = 0
+
+ return ret
+
+
+def isVolumePresent(volname):
+ success = False
+ if volname == "":
+ log.debug("No volname given")
+ return success
+
+ cli = ["gluster",
+ "volume",
+ "info",
+ volname]
+ log.debug("Running command '%s'", " ".join(cli))
+
+ p = subprocess.Popen(cli, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ out, err = p.communicate()
+ rv = p.returncode
+
+ log.debug("Command '%s' returned '%d'", " ".join(cli), rv)
+
+ if rv:
+ log.error("Command output:")
+ log.error(err)
+ else:
+ success = True;
+
+ return success
+
+
+def add_schedules(jobname, schedule, volname):
+ log.info("Adding snapshot schedules.")
+ ret = load_tasks_from_file()
+ if ret == 0:
+ if jobname in tasks:
+ print_str = ("%s already exists in schedule. Use "
+ "'edit' to modify %s" % (jobname, jobname))
+ log.error(print_str)
+ output(print_str)
+ ret = JOB_ALREADY_EXISTS
+ else:
+ if not isVolumePresent(volname):
+ print_str = ("Volume %s does not exist. Create %s and retry." %
+ (volname, volname))
+ log.error(print_str)
+ output(print_str)
+ ret = VOLUME_DOES_NOT_EXIST
+ else:
+ tasks[jobname] = schedule + ":" + volname
+ ret = write_tasks_to_file()
+ if ret == 0:
+ # Create a LOCK_FILE for the job
+ job_lockfile = LOCK_FILE_DIR + jobname
+ try:
+ f = os.open(job_lockfile, os.O_CREAT | os.O_NONBLOCK,
+ 0o644)
+ os.close(f)
+ except OSError as e:
+ log.error("Failed to open %s. Error: %s.",
+ job_lockfile, e)
+ ret = INTERNAL_ERROR
+ return ret
+ log.info("Successfully added snapshot schedule %s" %
+ jobname)
+ output("Successfully added snapshot schedule")
+ ret = 0
+ else:
+ print_str = "Failed to add snapshot schedule. " \
+ "Error: Failed to load tasks from "+GCRON_ENABLED
+ log.error(print_str)
+ output(print_str)
+
+ return ret
+
+
+def delete_schedules(jobname):
+ log.info("Delete snapshot schedules.")
+ ret = load_tasks_from_file()
+ if ret == 0:
+ if jobname in tasks:
+ del tasks[jobname]
+ ret = write_tasks_to_file()
+ if ret == 0:
+ # Delete the LOCK_FILE for the job
+ job_lockfile = LOCK_FILE_DIR+jobname
+ try:
+ os.remove(job_lockfile)
+ except OSError as e:
+ log.error("Failed to open %s. Error: %s.",
+ job_lockfile, e)
+ ret = INTERNAL_ERROR
+ return ret
+ log.info("Successfully deleted snapshot schedule %s"
+ % jobname)
+ output("Successfully deleted snapshot schedule")
+ ret = 0
+ else:
+ print_str = ("Failed to delete %s. Error: No such "
+ "job scheduled" % jobname)
+ log.error(print_str)
+ output(print_str)
+ ret = JOB_NOT_FOUND
+ else:
+ print_str = "Failed to delete snapshot schedule. " \
+ "Error: Failed to load tasks from "+GCRON_ENABLED
+ log.error(print_str)
+ output(print_str)
+
+ return ret
+
+
+def edit_schedules(jobname, schedule, volname):
+ log.info("Editing snapshot schedules.")
+ ret = load_tasks_from_file()
+ if ret == 0:
+ if jobname in tasks:
+ if not isVolumePresent(volname):
+ print_str = ("Volume %s does not exist. Create %s and retry." %
+ (volname, volname))
+ log.error(print_str)
+ output(print_str)
+ ret = VOLUME_DOES_NOT_EXIST
+ else:
+ tasks[jobname] = schedule+":"+volname
+ ret = write_tasks_to_file()
+ if ret == 0:
+ log.info("Successfully edited snapshot schedule %s" %
+ jobname)
+ output("Successfully edited snapshot schedule")
+ else:
+ print_str = ("Failed to edit %s. Error: No such "
+ "job scheduled" % jobname)
+ log.error(print_str)
+ output(print_str)
+ ret = JOB_NOT_FOUND
+ else:
+ print_str = "Failed to edit snapshot schedule. " \
+ "Error: Failed to load tasks from "+GCRON_ENABLED
+ log.error(print_str)
+ output(print_str)
+
+ return ret
+
+def get_bool_val():
+ getsebool_cli = ["getsebool",
+ "-a"]
+ p1 = subprocess.Popen(getsebool_cli, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+
+ grep_cmd = ["grep",
+ "cron_system_cronjob_use_shares"]
+ p2 = subprocess.Popen(grep_cmd, stdin=p1.stdout,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+
+ p1.stdout.close()
+ output, err = p2.communicate()
+ rv = p2.returncode
+
+ if rv:
+ log.error("Command output:")
+ log.error(err)
+ return -1
+
+ bool_val = output.split()[2]
+ log.debug("Bool value = '%s'", bool_val)
+
+ return bool_val
+
+def get_selinux_status():
+ getenforce_cli = ["getenforce"]
+ log.debug("Running command '%s'", " ".join(getenforce_cli))
+
+ try:
+ p1 = subprocess.Popen(getenforce_cli, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ except OSError as oserr:
+ log.error("Failed to run the command \"getenforce\". Error: %s" %\
+ oserr)
+ return -1
+
+ output, err = p1.communicate()
+ rv = p1.returncode
+
+ if rv:
+ log.error("Command output:")
+ log.error(err)
+ return -1
+ else:
+ selinux_status=output.rstrip()
+ log.debug("selinux status: %s", selinux_status)
+
+ return selinux_status
+
+def set_cronjob_user_share():
+ selinux_status = get_selinux_status()
+ if (selinux_status == -1):
+ log.error("Failed to get selinux status")
+ return -1
+ elif (selinux_status == "Disabled"):
+ return 0
+
+ bool_val = get_bool_val()
+ # In case of a failure (where the boolean value is not)
+ # present in the system, we should not proceed further
+ # We should only proceed when the value is "off"
+ if (bool_val == -1 or bool_val != "off"):
+ return 0
+
+ setsebool_cli = ["setsebool", "-P",
+ "cron_system_cronjob_use_shares",
+ "on"]
+ log.debug("Running command '%s'", " ".join(setsebool_cli))
+
+ p1 = subprocess.Popen(setsebool_cli, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+
+ output, err = p1.communicate()
+ rv = p1.returncode
+
+ if rv:
+ log.error("Command output:")
+ log.error(err)
+ return rv
+
+ bool_val = get_bool_val()
+ if (bool_val == "on"):
+ return 0
+ else:
+ # In case of an error or if boolean is not on
+ # we return a failure here
+ return -1
+
+def initialise_scheduler():
+ ret = set_cronjob_user_share()
+ if ret:
+ log.error("Failed to set selinux boolean "
+ "cron_system_cronjob_use_shares to 'on'")
+ return ret
+
+ try:
+ with open(TMP_FILE, "w+", 0o644) as f:
+ updater = ("* * * * * root PATH=$PATH:/usr/local/sbin:"
+ "/usr/sbin gcron.py --update\n")
+ f.write("%s\n" % updater)
+ f.flush()
+ os.fsync(f.fileno())
+ f.close()
+ except IOError as e:
+ log.error("Failed to open %s. Error: %s.", TMP_FILE, e)
+ ret = INIT_FAILED
+ return ret
+
+ shutil.move(TMP_FILE, GCRON_UPDATE_TASK)
+
+ if not os.path.lexists(GCRON_TASKS):
+ try:
+ f = open(GCRON_TASKS, "w", 0o644)
+ f.close()
+ except IOError as e:
+ log.error("Failed to open %s. Error: %s.", GCRON_TASKS, e)
+ ret = INIT_FAILED
+ return ret
+
+ if os.path.lexists(GCRON_CROND_TASK):
+ os.remove(GCRON_CROND_TASK)
+
+ os.symlink(GCRON_TASKS, GCRON_CROND_TASK)
+
+ log.info("Successfully initialised snapshot scheduler for this node")
+ output("Successfully initialised snapshot scheduler for this node")
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_INITIALISED, status="Success")
+
+ ret = 0
+ return ret
+
+
+def syntax_checker(args):
+ if hasattr(args, 'jobname'):
+ if (len(args.jobname.split()) != 1):
+ output("Invalid Jobname. Jobname should not be empty and should not contain \" \" character.")
+ ret = INVALID_JOBNAME
+ return ret
+ args.jobname=args.jobname.strip()
+
+ if hasattr(args, 'volname'):
+ if (len(args.volname.split()) != 1):
+ output("Invalid Volname. Volname should not be empty and should not contain \" \" character.")
+ ret = INVALID_VOLNAME
+ return ret
+ args.volname=args.volname.strip()
+
+ if hasattr(args, 'schedule'):
+ if (len(args.schedule.split()) != 5):
+ output("Invalid Schedule. Please refer to the following for adding a valid cron schedule")
+ print ("* * * * *")
+ print ("| | | | |")
+ print ("| | | | +---- Day of the Week (range: 1-7, 1 standing for Monday)")
+ print ("| | | +------ Month of the Year (range: 1-12)")
+ print ("| | +-------- Day of the Month (range: 1-31)")
+ print ("| +---------- Hour (range: 0-23)")
+ print ("+------------ Minute (range: 0-59)")
+ ret = INVALID_SCHEDULE
+ return ret
+
+ ret = 0
+ return ret
+
+
+def perform_operation(args):
+ if not os.path.exists(CURRENT_SCHEDULER):
+ update_current_scheduler("none")
+
+ # Initialise snapshot scheduler on local node
+ if args.action == "init":
+ ret = initialise_scheduler()
+ if ret != 0:
+ output("Failed to initialise snapshot scheduling")
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_INIT_FAILED,
+ error=print_error(ret))
+ return ret
+
+ # Disable snapshot scheduler
+ if args.action == "disable_force":
+ ret = disable_scheduler()
+ if ret == 0:
+ subprocess.Popen(["touch", "-h", GCRON_TASKS])
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_DISABLED,
+ status="Successfully Disabled")
+ else:
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_DISABLE_FAILED,
+ error=print_error(ret))
+ return ret
+
+ # Check if the symlink to GCRON_TASKS is properly set in the shared storage
+ if (not os.path.lexists(GCRON_UPDATE_TASK) or
+ not os.path.lexists(GCRON_CROND_TASK) or
+ os.readlink(GCRON_CROND_TASK) != GCRON_TASKS):
+ print_str = ("Please run 'snap_scheduler.py' init to initialise "
+ "the snap scheduler for the local node.")
+ log.error(print_str)
+ output(print_str)
+ ret = NODE_NOT_INITIALISED
+ return ret
+
+ # Check status of snapshot scheduler.
+ if args.action == "status":
+ ret = scheduler_status()
+ if ret == 0:
+ if scheduler_enabled:
+ output("Snapshot scheduling status: Enabled")
+ else:
+ output("Snapshot scheduling status: Disabled")
+ else:
+ output("Failed to check status of snapshot scheduler")
+ return ret
+
+ # Enable snapshot scheduler
+ if args.action == "enable":
+ ret = enable_scheduler()
+ if ret == 0:
+ subprocess.Popen(["touch", "-h", GCRON_TASKS])
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_ENABLED,
+ status="Successfully Enabled")
+ else:
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_ENABLE_FAILED,
+ error=print_error(ret))
+ return ret
+
+ # Disable snapshot scheduler
+ if args.action == "disable":
+ ret = disable_scheduler()
+ if ret == 0:
+ subprocess.Popen(["touch", "-h", GCRON_TASKS])
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_DISABLED,
+ status="Successfully Disabled")
+ else:
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_DISABLE_FAILED,
+ error=print_error(ret))
+ return ret
+
+ # List snapshot schedules
+ if args.action == "list":
+ ret = list_schedules()
+ return ret
+
+ # Add snapshot schedules
+ if args.action == "add":
+ ret = syntax_checker(args)
+ if ret != 0:
+ return ret
+ ret = add_schedules(args.jobname, args.schedule, args.volname)
+ if ret == 0:
+ subprocess.Popen(["touch", "-h", GCRON_TASKS])
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_ADDED,
+ status="Successfully added job "+args.jobname)
+ else:
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_ADD_FAILED,
+ status="Failed to add job "+args.jobname,
+ error=print_error(ret))
+ return ret
+
+ # Delete snapshot schedules
+ if args.action == "delete":
+ ret = syntax_checker(args)
+ if ret != 0:
+ return ret
+ ret = delete_schedules(args.jobname)
+ if ret == 0:
+ subprocess.Popen(["touch", "-h", GCRON_TASKS])
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_DELETED,
+ status="Successfully deleted job "+args.jobname)
+ else:
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_DELETE_FAILED,
+ status="Failed to delete job "+args.jobname,
+ error=print_error(ret))
+ return ret
+
+ # Edit snapshot schedules
+ if args.action == "edit":
+ ret = syntax_checker(args)
+ if ret != 0:
+ return ret
+ ret = edit_schedules(args.jobname, args.schedule, args.volname)
+ if ret == 0:
+ subprocess.Popen(["touch", "-h", GCRON_TASKS])
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_EDITED,
+ status="Successfully edited job "+args.jobname)
+ else:
+ gf_event (EVENT_SNAPSHOT_SCHEDULER_SCHEDULE_EDIT_FAILED,
+ status="Failed to edit job "+args.jobname,
+ error=print_error(ret))
+ return ret
+
+ ret = INVALID_ARG
+ return ret
+
+def gf_event(event_type, **kwargs):
+ if EVENTS_ENABLED:
+ from events.gf_event import gf_event as gfevent
+ gfevent(event_type, **kwargs)
+
+
+def main(argv):
+ initLogger()
+ ret = -1
+ parser = argparse.ArgumentParser()
+ subparsers = parser.add_subparsers(dest="action",
+ metavar=('{init, status, enable,'
+ ' disable, list, add,'
+ ' delete, edit}'))
+ subparsers.add_parser('init',
+ help="Initialise the node for snapshot scheduling")
+
+ subparsers.add_parser("status",
+ help="Check if snapshot scheduling is "
+ "enabled or disabled")
+ subparsers.add_parser("enable",
+ help="Enable snapshot scheduling")
+ subparsers.add_parser("disable",
+ help="Disable snapshot scheduling")
+ subparsers.add_parser("disable_force")
+ subparsers.add_parser("list",
+ help="List snapshot schedules")
+ parser_add = subparsers.add_parser("add",
+ help="Add snapshot schedules")
+ parser_add.add_argument("jobname", help="Job Name")
+ parser_add.add_argument("schedule", help="Schedule")
+ parser_add.add_argument("volname", help="Volume Name")
+
+ parser_delete = subparsers.add_parser("delete",
+ help="Delete snapshot schedules")
+ parser_delete.add_argument("jobname", help="Job Name")
+ parser_edit = subparsers.add_parser("edit",
+ help="Edit snapshot schedules")
+ parser_edit.add_argument("jobname", help="Job Name")
+ parser_edit.add_argument("schedule", help="Schedule")
+ parser_edit.add_argument("volname", help="Volume Name")
+
+ args = parser.parse_args(argv)
+
+ if not os.path.exists(SHARED_STORAGE_DIR):
+ output("Failed: "+SHARED_STORAGE_DIR+" does not exist.")
+ return SHARED_STORAGE_DIR_DOESNT_EXIST
+
+ if not os.path.ismount(SHARED_STORAGE_DIR):
+ output("Failed: Shared storage is not mounted at "+SHARED_STORAGE_DIR)
+ return SHARED_STORAGE_NOT_MOUNTED
+
+ if not os.path.exists(SHARED_STORAGE_DIR+"/snaps/"):
+ try:
+ os.makedirs(SHARED_STORAGE_DIR+"/snaps/")
+ except OSError as e:
+ if errno != EEXIST:
+ log.error("Failed to create %s : %s", SHARED_STORAGE_DIR+"/snaps/", e)
+ output("Failed to create %s. Error: %s"
+ % (SHARED_STORAGE_DIR+"/snaps/", e))
+ return INTERNAL_ERROR
+
+ if not os.path.exists(GCRON_ENABLED):
+ f = os.open(GCRON_ENABLED, os.O_CREAT | os.O_NONBLOCK, 0o644)
+ os.close(f)
+
+ if not os.path.exists(LOCK_FILE_DIR):
+ try:
+ os.makedirs(LOCK_FILE_DIR)
+ except OSError as e:
+ if errno != EEXIST:
+ log.error("Failed to create %s : %s", LOCK_FILE_DIR, e)
+ output("Failed to create %s. Error: %s"
+ % (LOCK_FILE_DIR, e))
+ return INTERNAL_ERROR
+
+ try:
+ f = os.open(LOCK_FILE, os.O_CREAT | os.O_RDWR | os.O_NONBLOCK, 0o644)
+ try:
+ fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ ret = perform_operation(args)
+ fcntl.flock(f, fcntl.LOCK_UN)
+ except IOError:
+ log.info("%s is being processed by another agent.", LOCK_FILE)
+ output("Another snap_scheduler command is running. "
+ "Please try again after some time.")
+ return ANOTHER_TRANSACTION_IN_PROGRESS
+ os.close(f)
+ except OSError as e:
+ log.error("Failed to open %s : %s", LOCK_FILE, e)
+ output("Failed to open %s. Error: %s" % (LOCK_FILE, e))
+ return INTERNAL_ERROR
+
+ return ret
+
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv[1:]))
diff --git a/extras/statedumpparse.rb b/extras/statedumpparse.rb
new file mode 100755
index 00000000000..1aff43377db
--- /dev/null
+++ b/extras/statedumpparse.rb
@@ -0,0 +1,208 @@
+#!/usr/bin/env ruby
+
+require 'time'
+require 'optparse'
+
+unless Array.instance_methods.include? :to_h
+ class Array
+ def to_h
+ h = {}
+ each { |k,v| h[k]=v }
+ h
+ end
+ end
+end
+
+# statedump.c:gf_proc_dump_mempool_info uses a five-dash record separator,
+# client.c:client_fd_lk_ctx_dump uses a six-dash record separator.
+ARRSEP = /^(-{5,6}=-{5,6})?$/
+HEAD = /^\[(.*)\]$/
+INPUT_FORMATS = %w[statedump json]
+
+format = 'json'
+input_format = 'statedump'
+tz = '+0000'
+memstat_select,memstat_reject = //,/\Z./
+OptionParser.new do |op|
+ op.banner << " [<] <STATEDUMP>"
+ op.on("-f", "--format=F", "json/yaml/memstat(-[plain|human|json])") { |s| format = s }
+ op.on("--input-format=F", INPUT_FORMATS.join(?/)) { |s| input_format = s }
+ op.on("--timezone=T",
+ "time zone to apply to zoneless timestamps [default UTC]") { |s| tz = s }
+ op.on("--memstat-select=RX", "memstat: select memory types matching RX") { |s|
+ memstat_select = Regexp.new s
+ }
+ op.on("--memstat-reject=RX", "memstat: reject memory types matching RX") { |s|
+ memstat_reject = Regexp.new s
+ }
+end.parse!
+
+
+if format =~ /\Amemstat(?:-(.*))?/
+ memstat_type = $1 || 'plain'
+ unless %w[plain human json].include? memstat_type
+ raise "unknown memstat type #{memstat_type.dump}"
+ end
+ format = 'memstat'
+end
+
+repr, logsep = case format
+when 'yaml'
+ require 'yaml'
+
+ [proc { |e| e.to_yaml }, "\n"]
+when 'json', 'memstat'
+ require 'json'
+
+ [proc { |e| e.to_json }, " "]
+else
+ raise "unkonwn format '#{format}'"
+end
+formatter = proc { |e| puts repr.call(e) }
+
+INPUT_FORMATS.include? input_format or raise "unkwown input format '#{input_format}'"
+
+dumpinfo = {}
+
+# parse a statedump entry
+elem_cbk = proc { |s,&cbk|
+ arraylike = false
+ s.grep(/\S/).empty? and next
+ head = nil
+ while s.last =~ /^\s*$/
+ s.pop
+ end
+ body = catch { |misc2|
+ s[0] =~ HEAD ? (head = $1) : (throw misc2)
+ body = [[]]
+ s[1..-1].each { |l|
+ if l =~ ARRSEP
+ arraylike = true
+ body << []
+ next
+ end
+ body.last << l
+ }
+
+ body.reject(&:empty?).map { |e|
+ ea = e.map { |l|
+ k,v = l.split("=",2)
+ m = /\A(0|-?[1-9]\d*)(\.\d+)?\Z/.match v
+ [k, m ? (m[2] ? Float(v) : Integer(v)) : v]
+ }
+ begin
+ ea.to_h
+ rescue
+ throw misc2
+ end
+ }
+ }
+
+ if body
+ cbk.call [head, arraylike ? body : (body.empty? ? {} : body[0])]
+ else
+ STDERR.puts ["WARNING: failed to parse record:", repr.call(s)].join(logsep)
+ end
+}
+
+# aggregator routine
+aggr = case format
+when 'memstat'
+ meminfo = {}
+ # commit memory-related entries to meminfo
+ proc { |k,r|
+ case k
+ when /memusage/
+ (meminfo["GF_MALLOC"]||={})[k] ||= r["size"] if k =~ memstat_select and k !~ memstat_reject
+ when "mempool"
+ r.each {|e|
+ kk = "mempool:#{e['pool-name']}"
+ (meminfo["mempool"]||={})[kk] ||= e["size"] if kk =~ memstat_select and kk !~ memstat_reject
+ }
+ end
+ }
+else
+ # just format data, don't actually aggregate anything
+ proc { |pair| formatter.call pair }
+end
+
+# processing the data
+case input_format
+when 'statedump'
+ acc = []
+ $<.each { |l|
+ l = l.strip
+ if l =~ /^(DUMP-(?:START|END)-TIME):\s+(.*)/
+ dumpinfo["_meta"]||={}
+ (dumpinfo["_meta"]["date"]||={})[$1] = Time.parse([$2, tz].join " ")
+ next
+ end
+
+ if l =~ HEAD
+ elem_cbk.call(acc, &aggr)
+ acc = [l]
+ next
+ end
+
+ acc << l
+ }
+ elem_cbk.call(acc, &aggr)
+when 'json'
+ $<.each { |l|
+ r = JSON.load l
+ case r
+ when Array
+ aggr[r]
+ when Hash
+ dumpinfo.merge! r
+ end
+ }
+end
+
+# final actions: output aggregated data
+case format
+when 'memstat'
+ ma = meminfo.values.map(&:to_a).inject(:+)
+ totals = meminfo.map { |coll,h| [coll, h.values.inject(:+)] }.to_h
+ tt = ma.transpose[1].inject(:+)
+
+ summary_sep,showm = case memstat_type
+ when 'json'
+ ["", proc { |k,v| puts({type: k, value: v}.to_json) }]
+ when 'plain', 'human'
+ # human-friendly number representation
+ hr = proc { |n|
+ qa = %w[B kB MB GB]
+ q = ((1...qa.size).find {|i| n < (1 << i*10)} || qa.size) - 1
+ "%.2f%s" % [n.to_f / (1 << q*10), qa[q]]
+ }
+
+ templ = "%{val} %{key}"
+ tft = proc { |t| t }
+ nft = if memstat_type == 'human'
+ nw = [ma.transpose[1], totals.values, tt].flatten.map{|n| hr[n].size}.max
+ proc { |n|
+ hn = hr[n]
+ " " * (nw - hn.size) + hn
+ }
+ else
+ nw = tt.to_s.size
+ proc { |n| "%#{nw}d" % n }
+ end
+ ## Alternative template, key first:
+ # templ = "%{key} %{val}"
+ # tw = ma.transpose[0].map(&:size).max
+ # tft = proc { |t| t + " " * [tw - t.size, 0].max }
+ # nft = (memstat_type == 'human') ? hr : proc { |n| n }
+ ["\n", proc { |k,v| puts templ % {key: tft[k], val: nft[v]} }]
+ else
+ raise 'this should be impossible'
+ end
+
+ ma.sort_by { |k,v| v }.each(&showm)
+ print summary_sep
+ totals.each { |coll,t| showm.call "Total #{coll}", t }
+ showm.call "TOTAL", tt
+else
+ formatter.call dumpinfo
+end
diff --git a/extras/stop-all-gluster-processes.sh b/extras/stop-all-gluster-processes.sh
new file mode 100755
index 00000000000..710aaf5fd3c
--- /dev/null
+++ b/extras/stop-all-gluster-processes.sh
@@ -0,0 +1,193 @@
+#!/bin/bash
+#
+# Kill all the processes/services except glusterd
+#
+# Usage: ./extras/stop-all-gluster-processes.sh [-g] [-h]
+# options:
+# -g Terminate in graceful mode
+# -h Show this message, then exit
+#
+# eg:
+# 1. ./extras/stop-all-gluster-processes.sh
+# 2. ./extras/stop-all-gluster-processes.sh -g
+#
+# By default, this script executes in force mode, i.e. all of brick, gsyncd
+# and other glustershd services/processes are killed without checking for
+# ongoing tasks such as geo-rep, self-heal, rebalance and etc. which may lead
+# to inconsistency after the node is brought back.
+#
+# On specifying '-g' option this script works in graceful mode, to maintain
+# data consistency the script fails with a valid exit code incase if any of
+# the gluster processes are busy in doing their jobs.
+#
+# The author of page [1] proposes user-defined exit codes to the range 64 - 113
+# Find the better explanation behind the choice in the link
+#
+# The exit code returned by stop-all-gluster-processes.sh:
+# 0 No errors/Success
+# 64 Rebalance is in progress
+# 65 Self-Heal is in progress
+# 66 Tier daemon running on this node
+# 127 option not found
+#
+# [1] http://www.tldp.org/LDP/abs/html/exitcodes.html
+
+
+# global
+errors=0
+
+# find the mounts and return their pids
+get_mount_pids()
+{
+ local opts
+ local pid
+
+ for opts in $(grep -w fuse.glusterfs /proc/mounts| awk '{print $1":/"$2}');
+ do
+ IFS=' ' read -r -a volinfo <<< $(echo "${opts}" | sed 's/:\// /g')
+ pid+="$(ps -Ao pid,args | grep -w "volfile-server=${volinfo[0]}" |
+ grep -w "volfile-id=/${volinfo[1]}" | grep -w "${volinfo[2]}" |
+ awk '{print $1}') "
+ done
+ echo "${pid}"
+}
+
+# handle mount processes i.e. 'glusterfs'
+kill_mounts()
+{
+ local signal=${1}
+ local pid
+
+ for pid in $(get_mount_pids);
+ do
+ echo "sending SIG${signal} to mount process with pid: ${pid}";
+ kill -${signal} ${pid};
+ done
+}
+
+# handle brick processes and node services
+kill_bricks_and_services()
+{
+ local signal=${1}
+ local pidfile
+ local pid
+
+ for pidfile in $(find /var/run/gluster/ -name '*.pid');
+ do
+ local pid=$(cat ${pidfile});
+ echo "sending SIG${signal} to pid: ${pid}";
+ kill -${signal} ${pid};
+ done
+}
+
+# for geo-replication, only 'monitor' has pid file written, other
+# processes are not having a pid file, so get it through 'ps' and
+# handle these processes
+kill_georep_gsync()
+{
+ local signal=${1}
+
+ # FIXME: add strick/better check
+ local gsyncpid=$(ps -Ao pid,args | grep gluster | grep gsync |
+ awk '{print $1}');
+ if [ -n "${gsyncpid}" ]
+ then
+ echo "sending SIG${signal} to geo-rep gsync process ${gsyncpid}";
+ kill -${signal} ${gsyncpid} || errors=$((${errors} + 1));
+ fi
+}
+
+# check if all processes are ready to die
+check_background_tasks()
+{
+ volumes=$(gluster vol list)
+ quit=0
+ for volname in ${volumes};
+ do
+ # tiering
+ if [[ $(gluster volume tier ${volname} status 2> /dev/null |
+ grep "localhost" | grep -c "in progress") -gt 0 ]]
+ then
+ quit=66
+ break;
+ fi
+
+ # rebalance
+ if [[ $(gluster volume rebalance ${volname} status 2> /dev/null |
+ grep -c "in progress") -gt 0 ]]
+ then
+ quit=64
+ break;
+ fi
+
+ # self heal
+ if [[ $(gluster volume heal ${volname} info | grep "Number of entries" |
+ awk '{ sum+=$4} END {print sum}') -gt 0 ]];
+ then
+ quit=65
+ break;
+ fi
+
+ # geo-rep, snapshot and quota doesn't need grace checks,
+ # as they ensures the consistancy on force kills
+ done
+
+ echo ${quit}
+}
+
+usage()
+{
+ cat <<EOM
+Usage: $0 [-g] [-h]
+ options:
+ -g Terminate in graceful mode
+ -h Show this message, then exit
+
+eg:
+ 1. $0
+ 2. $0 -g
+EOM
+}
+
+main()
+{
+ while getopts "gh" opt; do
+ case $opt in
+ g)
+ # graceful mode
+ quit=$(check_background_tasks)
+ if [[ ${quit} -ne 0 ]]
+ then
+ exit ${quit};
+ fi
+ # else safe to kill
+ ;;
+ h)
+ usage
+ exit 0;
+ ;;
+ *)
+ usage
+ exit 127;
+ ;;
+ esac
+ done
+ # remove all the options that have been parsed by getopts
+ shift $((OPTIND-1))
+
+ kill_mounts TERM
+ kill_georep_gsync TERM
+ kill_bricks_and_services TERM
+
+ sleep 5;
+ echo ""
+
+ # still not Terminated? let's pass SIGKILL
+ kill_mounts KILL
+ kill_georep_gsync KILL
+ kill_bricks_and_services KILL
+
+ exit ${errors};
+}
+
+main "$@"
diff --git a/extras/stripe-merge.c b/extras/stripe-merge.c
index 32768badd36..e013a6e6e8a 100644
--- a/extras/stripe-merge.c
+++ b/extras/stripe-merge.c
@@ -28,7 +28,7 @@
#include <stdint.h>
#include <errno.h>
#include <string.h>
-#include <attr/xattr.h>
+#include <sys/xattr.h>
#include <fnmatch.h>
#define ATTRNAME_STRIPE_INDEX "trusted.*.stripe-index"
@@ -40,33 +40,33 @@
#define INVALID_MODE UINT32_MAX
struct file_stripe_info {
- int stripe_count;
- int stripe_size;
- int coalesce;
- mode_t mode;
- int fd[0];
+ int stripe_count;
+ int stripe_size;
+ int coalesce;
+ mode_t mode;
+ int fd[0];
};
-static int close_files(struct file_stripe_info *);
+static int
+close_files(struct file_stripe_info *);
-static struct
-file_stripe_info *alloc_file_stripe_info(int count)
+static struct file_stripe_info *
+alloc_file_stripe_info(int count)
{
- int i;
- struct file_stripe_info *finfo;
+ int i;
+ struct file_stripe_info *finfo;
- finfo = calloc(1, sizeof(struct file_stripe_info) +
- (sizeof(int) * count));
- if (!finfo)
- return NULL;
+ finfo = calloc(1, sizeof(struct file_stripe_info) + (sizeof(int) * count));
+ if (!finfo)
+ return NULL;
- for (i = 0; i < count; i++)
- finfo->fd[i] = INVALID_FD;
+ for (i = 0; i < count; i++)
+ finfo->fd[i] = INVALID_FD;
- finfo->mode = INVALID_MODE;
- finfo->coalesce = INVALID_FD;
+ finfo->mode = INVALID_MODE;
+ finfo->coalesce = INVALID_FD;
- return finfo;
+ return finfo;
}
/*
@@ -77,39 +77,39 @@ file_stripe_info *alloc_file_stripe_info(int count)
static int
get_stripe_attr_name(const char *path, const char *pattern, char **attrname)
{
- char attrbuf[4096];
- char *ptr, *match = NULL;
- int len, r, match_count = 0;
-
- if (!path || !pattern || !attrname)
- return -1;
-
- len = listxattr(path, attrbuf, sizeof(attrbuf));
- if (len < 0)
- return len;
-
- ptr = attrbuf;
- while (ptr) {
- r = fnmatch(pattern, ptr, 0);
- if (!r) {
- if (!match)
- match = ptr;
- match_count++;
- } else if (r != FNM_NOMATCH) {
- return -1;
- }
-
- len -= strlen(ptr) + 1;
- if (len > 0)
- ptr += strlen(ptr) + 1;
- else
- ptr = NULL;
- }
-
- if (match)
- *attrname = strdup(match);
-
- return match_count;
+ char attrbuf[4096];
+ char *ptr, *match = NULL;
+ int len, r, match_count = 0;
+
+ if (!path || !pattern || !attrname)
+ return -1;
+
+ len = listxattr(path, attrbuf, sizeof(attrbuf));
+ if (len < 0)
+ return len;
+
+ ptr = attrbuf;
+ while (ptr) {
+ r = fnmatch(pattern, ptr, 0);
+ if (!r) {
+ if (!match)
+ match = ptr;
+ match_count++;
+ } else if (r != FNM_NOMATCH) {
+ return -1;
+ }
+
+ len -= strlen(ptr) + 1;
+ if (len > 0)
+ ptr += strlen(ptr) + 1;
+ else
+ ptr = NULL;
+ }
+
+ if (match)
+ *attrname = strdup(match);
+
+ return match_count;
}
/*
@@ -118,19 +118,19 @@ get_stripe_attr_name(const char *path, const char *pattern, char **attrname)
static int
get_stripe_attr_val(const char *path, const char *attr, int *val)
{
- char attrbuf[4096];
- int len;
+ char attrbuf[4096];
+ int len;
- if (!path || !attr || !val)
- return -1;
+ if (!path || !attr || !val)
+ return -1;
- len = getxattr(path, attr, attrbuf, sizeof(attrbuf));
- if (len < 0)
- return len;
+ len = getxattr(path, attr, attrbuf, sizeof(attrbuf));
+ if (len < 0)
+ return len;
- *val = atoi(attrbuf);
+ *val = atoi(attrbuf);
- return 0;
+ return 0;
}
/*
@@ -145,29 +145,31 @@ get_stripe_attr_val(const char *path, const char *attr, int *val)
static int
get_attr(const char *path, const char *pattern, char **buf, int *val)
{
- int count = 1;
-
- if (!buf)
- return -1;
-
- if (!*buf) {
- count = get_stripe_attr_name(path, pattern, buf);
- if (count > 1) {
- /* pattern isn't good enough */
- fprintf(stderr, "ERROR: duplicate attributes found "
- "matching pattern: %s\n", pattern);
- free(*buf);
- *buf = NULL;
- return count;
- } else if (count < 1) {
- return count;
- }
- }
-
- if (get_stripe_attr_val(path, *buf, val) < 0)
- return -1;
-
- return count;
+ int count = 1;
+
+ if (!buf)
+ return -1;
+
+ if (!*buf) {
+ count = get_stripe_attr_name(path, pattern, buf);
+ if (count > 1) {
+ /* pattern isn't good enough */
+ fprintf(stderr,
+ "ERROR: duplicate attributes found "
+ "matching pattern: %s\n",
+ pattern);
+ free(*buf);
+ *buf = NULL;
+ return count;
+ } else if (count < 1) {
+ return count;
+ }
+ }
+
+ if (get_stripe_attr_val(path, *buf, val) < 0)
+ return -1;
+
+ return count;
}
/*
@@ -178,168 +180,168 @@ get_attr(const char *path, const char *pattern, char **buf, int *val)
* print a warning if any files are missing. We proceed without error in the
* latter case to support partial recovery.
*/
-static struct
-file_stripe_info *validate_and_open_files(char *paths[], int count)
+static struct file_stripe_info *
+validate_and_open_files(char *paths[], int count)
{
- int i, val, tmp;
- struct stat sbuf;
- char *stripe_count_attr = NULL;
- char *stripe_size_attr = NULL;
- char *stripe_index_attr = NULL;
- char *stripe_coalesce_attr = NULL;
- struct file_stripe_info *finfo = NULL;
-
- for (i = 0; i < count; i++) {
- if (!paths[i])
- goto err;
-
- /*
- * Check the stripe count first so we can allocate the info
- * struct with the appropriate number of fds.
- */
- if (get_attr(paths[i], ATTRNAME_STRIPE_COUNT,
- &stripe_count_attr, &val) != 1) {
- fprintf(stderr, "ERROR: %s: attribute: '%s'\n",
- paths[i], ATTRNAME_STRIPE_COUNT);
- goto err;
- }
- if (!finfo) {
- finfo = alloc_file_stripe_info(val);
- if (!finfo)
- goto err;
-
- if (val != count)
- fprintf(stderr, "WARNING: %s: stripe-count "
- "(%d) != file count (%d). Result may "
- "be incomplete.\n", paths[i], val,
- count);
-
- finfo->stripe_count = val;
- } else if (val != finfo->stripe_count) {
- fprintf(stderr, "ERROR %s: invalid stripe count: %d "
- "(expected %d)\n", paths[i], val,
- finfo->stripe_count);
- goto err;
- }
-
- /*
- * Get and validate the chunk size.
- */
- if (get_attr(paths[i], ATTRNAME_STRIPE_SIZE, &stripe_size_attr,
- &val) != 1) {
- fprintf(stderr, "ERROR: %s: attribute: '%s'\n",
- paths[i], ATTRNAME_STRIPE_SIZE);
- goto err;
- }
-
- if (!finfo->stripe_size) {
- finfo->stripe_size = val;
- } else if (val != finfo->stripe_size) {
- fprintf(stderr, "ERROR: %s: invalid stripe size: %d "
- "(expected %d)\n", paths[i], val,
- finfo->stripe_size);
- goto err;
- }
-
- /*
- * stripe-coalesce is a backward compatible attribute. If the
- * attribute does not exist, assume a value of zero for the
- * traditional stripe format.
- */
- tmp = get_attr(paths[i], ATTRNAME_STRIPE_COALESCE,
- &stripe_coalesce_attr, &val);
- if (!tmp) {
- val = 0;
- } else if (tmp != 1) {
- fprintf(stderr, "ERROR: %s: attribute: '%s'\n",
- paths[i], ATTRNAME_STRIPE_COALESCE);
- goto err;
- }
-
- if (finfo->coalesce == INVALID_FD) {
- finfo->coalesce = val;
- } else if (val != finfo->coalesce) {
- fprintf(stderr, "ERROR: %s: invalid coalesce flag\n",
- paths[i]);
- goto err;
- }
-
- /*
- * Get/validate the stripe index and open the file in the
- * appropriate fd slot.
- */
- if (get_attr(paths[i], ATTRNAME_STRIPE_INDEX,
- &stripe_index_attr, &val) != 1) {
- fprintf(stderr, "ERROR: %s: attribute: '%s'\n",
- paths[i], ATTRNAME_STRIPE_INDEX);
- goto err;
- }
- if (finfo->fd[val] != INVALID_FD) {
- fprintf(stderr, "ERROR: %s: duplicate stripe index: "
- "%d\n", paths[i], val);
- goto err;
- }
-
- finfo->fd[val] = open(paths[i], O_RDONLY);
- if (finfo->fd[val] < 0)
- goto err;
-
- /*
- * Get the creation mode for the file.
- */
- if (fstat(finfo->fd[val], &sbuf) < 0)
- goto err;
- if (finfo->mode == INVALID_MODE) {
- finfo->mode = sbuf.st_mode;
- } else if (sbuf.st_mode != finfo->mode) {
- fprintf(stderr, "ERROR: %s: invalid mode\n", paths[i]);
- goto err;
- }
- }
-
- free(stripe_count_attr);
- free(stripe_size_attr);
- free(stripe_index_attr);
- free(stripe_coalesce_attr);
-
- return finfo;
+ int i, val, tmp;
+ struct stat sbuf;
+ char *stripe_count_attr = NULL;
+ char *stripe_size_attr = NULL;
+ char *stripe_index_attr = NULL;
+ char *stripe_coalesce_attr = NULL;
+ struct file_stripe_info *finfo = NULL;
+
+ for (i = 0; i < count; i++) {
+ if (!paths[i])
+ goto err;
+
+ /*
+ * Check the stripe count first so we can allocate the info
+ * struct with the appropriate number of fds.
+ */
+ if (get_attr(paths[i], ATTRNAME_STRIPE_COUNT, &stripe_count_attr,
+ &val) != 1) {
+ fprintf(stderr, "ERROR: %s: attribute: '%s'\n", paths[i],
+ ATTRNAME_STRIPE_COUNT);
+ goto err;
+ }
+ if (!finfo) {
+ finfo = alloc_file_stripe_info(val);
+ if (!finfo)
+ goto err;
+
+ if (val != count)
+ fprintf(stderr,
+ "WARNING: %s: stripe-count "
+ "(%d) != file count (%d). Result may "
+ "be incomplete.\n",
+ paths[i], val, count);
+
+ finfo->stripe_count = val;
+ } else if (val != finfo->stripe_count) {
+ fprintf(stderr,
+ "ERROR %s: invalid stripe count: %d "
+ "(expected %d)\n",
+ paths[i], val, finfo->stripe_count);
+ goto err;
+ }
+
+ /*
+ * Get and validate the chunk size.
+ */
+ if (get_attr(paths[i], ATTRNAME_STRIPE_SIZE, &stripe_size_attr, &val) !=
+ 1) {
+ fprintf(stderr, "ERROR: %s: attribute: '%s'\n", paths[i],
+ ATTRNAME_STRIPE_SIZE);
+ goto err;
+ }
+
+ if (!finfo->stripe_size) {
+ finfo->stripe_size = val;
+ } else if (val != finfo->stripe_size) {
+ fprintf(stderr,
+ "ERROR: %s: invalid stripe size: %d "
+ "(expected %d)\n",
+ paths[i], val, finfo->stripe_size);
+ goto err;
+ }
+
+ /*
+ * stripe-coalesce is a backward compatible attribute. If the
+ * attribute does not exist, assume a value of zero for the
+ * traditional stripe format.
+ */
+ tmp = get_attr(paths[i], ATTRNAME_STRIPE_COALESCE,
+ &stripe_coalesce_attr, &val);
+ if (!tmp) {
+ val = 0;
+ } else if (tmp != 1) {
+ fprintf(stderr, "ERROR: %s: attribute: '%s'\n", paths[i],
+ ATTRNAME_STRIPE_COALESCE);
+ goto err;
+ }
+
+ if (finfo->coalesce == INVALID_FD) {
+ finfo->coalesce = val;
+ } else if (val != finfo->coalesce) {
+ fprintf(stderr, "ERROR: %s: invalid coalesce flag\n", paths[i]);
+ goto err;
+ }
+
+ /*
+ * Get/validate the stripe index and open the file in the
+ * appropriate fd slot.
+ */
+ if (get_attr(paths[i], ATTRNAME_STRIPE_INDEX, &stripe_index_attr,
+ &val) != 1) {
+ fprintf(stderr, "ERROR: %s: attribute: '%s'\n", paths[i],
+ ATTRNAME_STRIPE_INDEX);
+ goto err;
+ }
+ if (finfo->fd[val] != INVALID_FD) {
+ fprintf(stderr,
+ "ERROR: %s: duplicate stripe index: "
+ "%d\n",
+ paths[i], val);
+ goto err;
+ }
+
+ finfo->fd[val] = open(paths[i], O_RDONLY);
+ if (finfo->fd[val] < 0)
+ goto err;
+
+ /*
+ * Get the creation mode for the file.
+ */
+ if (fstat(finfo->fd[val], &sbuf) < 0)
+ goto err;
+ if (finfo->mode == INVALID_MODE) {
+ finfo->mode = sbuf.st_mode;
+ } else if (sbuf.st_mode != finfo->mode) {
+ fprintf(stderr, "ERROR: %s: invalid mode\n", paths[i]);
+ goto err;
+ }
+ }
+
+ free(stripe_count_attr);
+ free(stripe_size_attr);
+ free(stripe_index_attr);
+ free(stripe_coalesce_attr);
+
+ return finfo;
err:
- if (stripe_count_attr)
- free(stripe_count_attr);
- if (stripe_size_attr)
- free(stripe_size_attr);
- if (stripe_index_attr)
- free(stripe_index_attr);
- if (stripe_coalesce_attr)
- free(stripe_coalesce_attr);
-
- if (finfo) {
- close_files(finfo);
- free(finfo);
- }
-
- return NULL;
+ free(stripe_count_attr);
+ free(stripe_size_attr);
+ free(stripe_index_attr);
+ free(stripe_coalesce_attr);
+
+ if (finfo) {
+ close_files(finfo);
+ free(finfo);
+ }
+
+ return NULL;
}
static int
close_files(struct file_stripe_info *finfo)
{
- int i, ret;
+ int i, ret;
- if (!finfo)
- return -1;
+ if (!finfo)
+ return -1;
- for (i = 0; i < finfo->stripe_count; i++) {
- if (finfo->fd[i] == INVALID_FD)
- continue;
+ for (i = 0; i < finfo->stripe_count; i++) {
+ if (finfo->fd[i] == INVALID_FD)
+ continue;
- ret = close(finfo->fd[i]);
- if (ret < 0)
- return ret;
- }
+ ret = close(finfo->fd[i]);
+ if (ret < 0)
+ return ret;
+ }
- return ret;
+ return ret;
}
/*
@@ -355,43 +357,43 @@ close_files(struct file_stripe_info *finfo)
static int
generate_file_coalesce(int target, struct file_stripe_info *finfo)
{
- char *buf;
- int ret = 0;
- int r, w, i;
-
- buf = malloc(finfo->stripe_size);
- if (!buf)
- return -1;
-
- i = 0;
- while (1) {
- if (finfo->fd[i] == INVALID_FD) {
- if (lseek(target, finfo->stripe_size, SEEK_CUR) < 0)
- break;
-
- i = (i + 1) % finfo->stripe_count;
- continue;
- }
-
- r = read(finfo->fd[i], buf, finfo->stripe_size);
- if (r < 0) {
- ret = r;
- break;
- }
- if (!r)
- break;
-
- w = write(target, buf, r);
- if (w < 0) {
- ret = w;
- break;
- }
-
- i = (i + 1) % finfo->stripe_count;
- }
-
- free(buf);
- return ret;
+ char *buf;
+ int ret = 0;
+ int r, w, i;
+
+ buf = malloc(finfo->stripe_size);
+ if (!buf)
+ return -1;
+
+ i = 0;
+ while (1) {
+ if (finfo->fd[i] == INVALID_FD) {
+ if (lseek(target, finfo->stripe_size, SEEK_CUR) < 0)
+ break;
+
+ i = (i + 1) % finfo->stripe_count;
+ continue;
+ }
+
+ r = read(finfo->fd[i], buf, finfo->stripe_size);
+ if (r < 0) {
+ ret = r;
+ break;
+ }
+ if (!r)
+ break;
+
+ w = write(target, buf, r);
+ if (w < 0) {
+ ret = w;
+ break;
+ }
+
+ i = (i + 1) % finfo->stripe_count;
+ }
+
+ free(buf);
+ return ret;
}
/*
@@ -402,97 +404,100 @@ generate_file_coalesce(int target, struct file_stripe_info *finfo)
static int
generate_file_traditional(int target, struct file_stripe_info *finfo)
{
- int i, j, max_ret, ret;
- char buf[finfo->stripe_count][4096];
-
- do {
- char newbuf[4096] = {0, };
-
- max_ret = 0;
- for (i = 0; i < finfo->stripe_count; i++) {
- memset(buf[i], 0, 4096);
- ret = read(finfo->fd[i], buf[i], 4096);
- if (ret > max_ret)
- max_ret = ret;
- }
- for (i = 0; i < max_ret; i++)
- for (j = 0; j < finfo->stripe_count; j++)
- newbuf[i] |= buf[j][i];
- write(target, newbuf, max_ret);
- } while (max_ret);
-
- return 0;
+ int i, j, max_ret, ret;
+ char buf[finfo->stripe_count][4096];
+
+ do {
+ char newbuf[4096] = {
+ 0,
+ };
+
+ max_ret = 0;
+ for (i = 0; i < finfo->stripe_count; i++) {
+ memset(buf[i], 0, 4096);
+ ret = read(finfo->fd[i], buf[i], 4096);
+ if (ret > max_ret)
+ max_ret = ret;
+ }
+ for (i = 0; i < max_ret; i++)
+ for (j = 0; j < finfo->stripe_count; j++)
+ newbuf[i] |= buf[j][i];
+ write(target, newbuf, max_ret);
+ } while (max_ret);
+
+ return 0;
}
static int
generate_file(int target, struct file_stripe_info *finfo)
{
- if (finfo->coalesce)
- return generate_file_coalesce(target, finfo);
+ if (finfo->coalesce)
+ return generate_file_coalesce(target, finfo);
- return generate_file_traditional(target, finfo);
+ return generate_file_traditional(target, finfo);
}
static void
usage(char *name)
{
- fprintf(stderr, "Usage: %s [-o <outputfile>] <inputfile1> "
- "<inputfile2> ...\n", name);
+ fprintf(stderr,
+ "Usage: %s [-o <outputfile>] <inputfile1> "
+ "<inputfile2> ...\n",
+ name);
}
int
main(int argc, char *argv[])
{
- int file_count, opt;
- char *opath = NULL;
- int targetfd;
- struct file_stripe_info *finfo;
-
- while ((opt = getopt(argc, argv, "o:")) != -1) {
- switch (opt) {
- case 'o':
- opath = optarg;
- break;
- default:
- usage(argv[0]);
- return -1;
- }
- }
-
- file_count = argc - optind;
-
- if (!opath || !file_count) {
- usage(argv[0]);
- return -1;
- }
-
- finfo = validate_and_open_files(&argv[optind], file_count);
- if (!finfo)
- goto err;
-
- targetfd = open(opath, O_RDWR|O_CREAT, finfo->mode);
- if (targetfd < 0)
- goto err;
-
- if (generate_file(targetfd, finfo) < 0)
- goto err;
-
- if (fsync(targetfd) < 0)
- fprintf(stderr, "ERROR: %s\n", strerror(errno));
- if (close(targetfd) < 0)
- fprintf(stderr, "ERROR: %s\n", strerror(errno));
-
- close_files(finfo);
- free(finfo);
-
- return 0;
+ int file_count, opt;
+ char *opath = NULL;
+ int targetfd;
+ struct file_stripe_info *finfo;
+
+ while ((opt = getopt(argc, argv, "o:")) != -1) {
+ switch (opt) {
+ case 'o':
+ opath = optarg;
+ break;
+ default:
+ usage(argv[0]);
+ return -1;
+ }
+ }
+
+ file_count = argc - optind;
+
+ if (!opath || !file_count) {
+ usage(argv[0]);
+ return -1;
+ }
+
+ finfo = validate_and_open_files(&argv[optind], file_count);
+ if (!finfo)
+ goto err;
+
+ targetfd = open(opath, O_RDWR | O_CREAT, finfo->mode);
+ if (targetfd < 0)
+ goto err;
+
+ if (generate_file(targetfd, finfo) < 0)
+ goto err;
+
+ if (fsync(targetfd) < 0)
+ fprintf(stderr, "ERROR: %s\n", strerror(errno));
+ if (close(targetfd) < 0)
+ fprintf(stderr, "ERROR: %s\n", strerror(errno));
+
+ close_files(finfo);
+ free(finfo);
+
+ return 0;
err:
- if (finfo) {
- close_files(finfo);
- free(finfo);
- }
+ if (finfo) {
+ close_files(finfo);
+ free(finfo);
+ }
- return -1;
+ return -1;
}
-
diff --git a/extras/systemd/Makefile.am b/extras/systemd/Makefile.am
new file mode 100644
index 00000000000..61446a9b84a
--- /dev/null
+++ b/extras/systemd/Makefile.am
@@ -0,0 +1,17 @@
+CLEANFILES = glusterd.service glustereventsd.service glusterfssharedstorage.service gluster-ta-volume.service
+EXTRA_DIST = glusterd.service.in glustereventsd.service.in glusterfssharedstorage.service.in gluster-ta-volume.service.in
+
+if USE_SYSTEMD
+systemd_DATA = gluster-ta-volume.service
+endif
+
+if WITH_SERVER
+if USE_SYSTEMD
+# systemddir is already defined through configure.ac
+systemd_DATA += glusterd.service glusterfssharedstorage.service
+
+if BUILD_EVENTS
+systemd_DATA += glustereventsd.service
+endif
+endif
+endif
diff --git a/extras/systemd/gluster-ta-volume.service.in b/extras/systemd/gluster-ta-volume.service.in
new file mode 100644
index 00000000000..2802bca05bf
--- /dev/null
+++ b/extras/systemd/gluster-ta-volume.service.in
@@ -0,0 +1,13 @@
+[Unit]
+Description=GlusterFS, Thin-arbiter process to maintain quorum for replica volume
+After=network.target
+
+[Service]
+Environment="LOG_LEVEL=WARNING"
+ExecStart=@prefix@/sbin/glusterfsd -N --volfile-id ta -f @GLUSTERD_WORKDIR@/thin-arbiter/thin-arbiter.vol --brick-port 24007 --xlator-option ta-server.transport.socket.listen-port=24007 -LWARNING
+Restart=always
+KillMode=process
+SuccessExitStatus=15
+
+[Install]
+WantedBy=multi-user.target
diff --git a/extras/systemd/glusterd.service.in b/extras/systemd/glusterd.service.in
new file mode 100644
index 00000000000..abb0d82911f
--- /dev/null
+++ b/extras/systemd/glusterd.service.in
@@ -0,0 +1,26 @@
+[Unit]
+Description=GlusterFS, a clustered file-system server
+Documentation=man:glusterd(8)
+StartLimitBurst=6
+StartLimitIntervalSec=3600
+Requires=@RPCBIND_SERVICE@
+After=network.target @RPCBIND_SERVICE@
+Before=network-online.target
+
+[Service]
+Type=forking
+PIDFile=@localstatedir@/run/glusterd.pid
+LimitNOFILE=65536
+Environment="LOG_LEVEL=INFO"
+EnvironmentFile=-@SYSCONF_DIR@/sysconfig/glusterd
+ExecStart=@prefix@/sbin/glusterd -p @localstatedir@/run/glusterd.pid --log-level $LOG_LEVEL $GLUSTERD_OPTIONS
+KillMode=process
+TimeoutSec=300
+SuccessExitStatus=15
+Restart=on-abnormal
+RestartSec=60
+StartLimitBurst=6
+StartLimitInterval=3600
+
+[Install]
+WantedBy=multi-user.target
diff --git a/extras/systemd/glustereventsd.service.in b/extras/systemd/glustereventsd.service.in
new file mode 100644
index 00000000000..f80b78199f6
--- /dev/null
+++ b/extras/systemd/glustereventsd.service.in
@@ -0,0 +1,16 @@
+[Unit]
+Description=Gluster Events Notifier
+After=network.target
+Documentation=man:glustereventsd(8)
+
+
+[Service]
+Environment=PYTHONPATH=@BUILD_PYTHON_SITE_PACKAGES_EXPANDED@:$PYTHONPATH
+Type=simple
+ExecStart=@SBIN_DIR@/glustereventsd --pid-file @localstatedir@/run/glustereventsd.pid
+ExecReload=/bin/kill -SIGUSR2 $MAINPID
+KillMode=control-group
+PIDFile=@localstatedir@/run/glustereventsd.pid
+
+[Install]
+WantedBy=multi-user.target
diff --git a/extras/systemd/glusterfssharedstorage.service.in b/extras/systemd/glusterfssharedstorage.service.in
new file mode 100644
index 00000000000..723ff49afb7
--- /dev/null
+++ b/extras/systemd/glusterfssharedstorage.service.in
@@ -0,0 +1,13 @@
+[Unit]
+Description=Mount glusterfs sharedstorage
+Requires=glusterd.service remote-fs-pre.target local-fs.target
+
+[Service]
+Type=forking
+ExecStart=@GLUSTERFS_LIBEXECDIR@/mount-shared-storage.sh
+Restart=on-failure
+RestartSec=3
+RestartForceExitStatus=1
+
+[Install]
+WantedBy=multi-user.target
diff --git a/extras/test/bug-920583.t b/extras/test/bug-920583.t
new file mode 100755
index 00000000000..eedbb800ac0
--- /dev/null
+++ b/extras/test/bug-920583.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+##Copy this file to tests/bugs before running run.sh (cp extras/test/bug-920583.t tests/bugs/)
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+logdir=`gluster --print-logdir`
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 replica 2 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+function log-file-name()
+{
+ logfilename=$M0".log"
+ echo ${logfilename:1} | tr / -
+}
+
+log_file=$logdir"/"`log-file-name`
+
+lookup_unhashed_count=`grep "adding option 'lookup-unhashed'" $log_file | wc -l`
+no_child_down_count=`grep "adding option 'assert-no-child-down'" $log_file | wc -l`
+mount -t glusterfs $H0:/$V0 $M0 -o "xlator-option=*dht.assert-no-child-down=yes,xlator-option=*dht.lookup-unhashed=yes"
+touch $M0/file1;
+
+new_lookup_unhashed_count=`grep "adding option 'lookup-unhashed'" $log_file | wc -l`
+new_no_child_down_count=`grep "adding option 'assert-no-child-down'" $log_file | wc -l`
+EXPECT "1" expr $new_lookup_unhashed_count - $lookup_unhashed_count
+EXPECT "1" expr $new_no_child_down_count - $no_child_down_count
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/extras/test/gluster_commands.sh b/extras/test/gluster_commands.sh
index e1e396020af..cb2a55fd5b5 100755
--- a/extras/test/gluster_commands.sh
+++ b/extras/test/gluster_commands.sh
@@ -1,21 +1,13 @@
#!/bin/bash
-# Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
-# This file is part of GlusterFS.
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
# This script tests the basics gluster cli commands.
@@ -54,13 +46,13 @@ gluster volume start vol
gluster volume info
sleep 1
mount -t glusterfs `hostname`:vol /mnt/client
-sleep 1
+sleep 1
df -h
echo "adding-brick......."
gluster volume add-brick vol `hostname`:/exports/exp2
gluster volume info
-sleep 1
+sleep 1
umount /mnt/client
mount -t glusterfs `hostname`:vol /mnt/client
df -h
@@ -102,7 +94,7 @@ sleep 1
echo "removing brick......."
gluster --mode=script volume remove-brick vol `hostname`:/exports/exp2
gluster volume info
-sleep 1
+sleep 1
df -h
sleep 1
@@ -127,7 +119,7 @@ sleep 1
echo "starting replicate volume......"
gluster volume start mirror
gluster volume info
-sleep 1
+sleep 1
mount -t glusterfs `hostname`:mirror /mnt/client
sleep 1
df -h
@@ -136,7 +128,7 @@ sleep 1
echo "adding-brick......."
gluster volume add-brick mirror `hostname`:/exports/exp3 `hostname`:/exports/exp4
gluster volume info
-sleep 1
+sleep 1
df -h
sleep 1
@@ -178,14 +170,14 @@ sleep 1
echo "removeing-brick....."
gluster --mode=script volume remove-brick mirror `hostname`:/exports/exp3 `hostname`:/exports/exp4
gluster volume info
-sleep 1
+sleep 1
df -h
sleep 1
echo "stopping replicate volume....."
gluster --mode=script volume stop mirror
gluster volume info
-sleep 1
+sleep 1
umount /mnt/client
df -h
@@ -206,14 +198,14 @@ gluster volume start str
gluster volume info
sleep 1
mount -t glusterfs `hostname`:str /mnt/client
-sleep 1
+sleep 1
df -h
sleep 1
echo "adding brick...."
gluster volume add-brick str `hostname`:/exports/exp3 `hostname`:/exports/exp4
gluster volume info
-sleep 1
+sleep 1
df -h
sleep 1
diff --git a/extras/test/ld-preload-test/README b/extras/test/ld-preload-test/README
index 725b94023c8..df80003844b 100644
--- a/extras/test/ld-preload-test/README
+++ b/extras/test/ld-preload-test/README
@@ -32,9 +32,9 @@ Instructions
$ make
(We've tested the tool on Debian and CentOS. If there are build errors or
-warnings, please do report them to glusterfs-devel@nongnu.org.)
+warnings, please do report them to gluster-devel@gluster.org.)
2. Run the test.
$ ./test-preload.sh > preload.log
-3. Mail the log to glusterfs-devel@nongnu.org.
+3. Mail the log to gluster-devel@gluster.org.
diff --git a/extras/test/ld-preload-test/ld-preload-lib.c b/extras/test/ld-preload-test/ld-preload-lib.c
index e17305a4a1d..d120c053a69 100644
--- a/extras/test/ld-preload-test/ld-preload-lib.c
+++ b/extras/test/ld-preload-test/ld-preload-lib.c
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
/* LD PRELOAD'able library
* A very simple library that intercepts booster supported system calls
* and prints a log message to stdout.
@@ -44,594 +34,582 @@
#include <fcntl.h>
#include <sys/stat.h>
#include <dirent.h>
-#include <attr/xattr.h>
+#include <sys/xattr.h>
#include <sys/sendfile.h>
/* Err number that is assigned to errno so that test application can
* verify that the function was intercepted correctly.
*/
-#define PRELOAD_ERRNO_VERF 6449
-#define set_errno() (errno = PRELOAD_ERRNO_VERF)
+#define PRELOAD_ERRNO_VERF 6449
+#define set_errno() (errno = PRELOAD_ERRNO_VERF)
-inline void
-intercept (char *call, int tabs)
+void
+intercept(char *call, int tabs)
{
- while (tabs > 0) {
- fprintf (stdout, "\t");
- --tabs;
- }
+ while (tabs > 0) {
+ fprintf(stdout, "\t");
+ --tabs;
+ }
- fprintf (stdout, "Intercepted by %s", call);
+ fprintf(stdout, "Intercepted by %s", call);
}
int
-creat64 (const char *pathname, mode_t mode)
+creat64(const char *pathname, mode_t mode)
{
- intercept ("creat64", 2);
- set_errno ();
- return -1;
+ intercept("creat64", 2);
+ set_errno();
+ return -1;
}
int
-creat (const char *pathname, mode_t mode)
+creat(const char *pathname, mode_t mode)
{
- intercept ("creat", 2);
- set_errno ();
- return -1;
+ intercept("creat", 2);
+ set_errno();
+ return -1;
}
-
int
-close (int fd)
+close(int fd)
{
- intercept ("close", 2);
- set_errno ();
- return -1;
+ intercept("close", 2);
+ set_errno();
+ return -1;
}
int
-open64 (const char *pathname, int flags, ...)
+open64(const char *pathname, int flags, ...)
{
- intercept ("open64", 2);
- set_errno ();
- return -1;
+ intercept("open64", 2);
+ set_errno();
+ return -1;
}
-
int
-open (const char *pathname, int flags, ...)
+open(const char *pathname, int flags, ...)
{
- intercept ("open", 2);
- set_errno ();
- return -1;
+ intercept("open", 2);
+ set_errno();
+ return -1;
}
ssize_t
-read (int fd, void *buf, size_t count)
+read(int fd, void *buf, size_t count)
{
- intercept ("read", 2);
- set_errno ();
- return -1;
+ intercept("read", 2);
+ set_errno();
+ return -1;
}
ssize_t
-readv (int fd, const struct iovec *vector, int count)
+readv(int fd, const struct iovec *vector, int count)
{
- intercept ("readv", 2);
- set_errno ();
- return -1;
+ intercept("readv", 2);
+ set_errno();
+ return -1;
}
ssize_t
-pread (int fd, void *buf, size_t count, unsigned long offset)
+pread(int fd, void *buf, size_t count, unsigned long offset)
{
- intercept ("pread", 2);
- set_errno ();
- return -1;
+ intercept("pread", 2);
+ set_errno();
+ return -1;
}
-
ssize_t
-pread64 (int fd, void *buf, size_t count, uint64_t offset)
+pread64(int fd, void *buf, size_t count, uint64_t offset)
{
- intercept ("pread64", 2);
- set_errno ();
- return -1;
+ intercept("pread64", 2);
+ set_errno();
+ return -1;
}
ssize_t
-write (int fd, const void *buf, size_t count)
+write(int fd, const void *buf, size_t count)
{
- intercept ("write", 2);
- set_errno ();
- return -1;
+ intercept("write", 2);
+ set_errno();
+ return -1;
}
ssize_t
-writev (int fd, const struct iovec *vector, int count)
+writev(int fd, const struct iovec *vector, int count)
{
- intercept ("writev", 2);
- set_errno ();
- return -1;
+ intercept("writev", 2);
+ set_errno();
+ return -1;
}
ssize_t
-pwrite (int fd, const void *buf, size_t count, unsigned long offset)
+pwrite(int fd, const void *buf, size_t count, unsigned long offset)
{
- intercept ("pwrite", 2);
- set_errno ();
- return -1;
+ intercept("pwrite", 2);
+ set_errno();
+ return -1;
}
ssize_t
-pwrite64 (int fd, const void *buf, size_t count, uint64_t offset)
+pwrite64(int fd, const void *buf, size_t count, uint64_t offset)
{
- intercept ("pwrite64", 2);
- set_errno ();
- return -1;
+ intercept("pwrite64", 2);
+ set_errno();
+ return -1;
}
-
off_t
-lseek (int fildes, unsigned long offset, int whence)
+lseek(int fildes, unsigned long offset, int whence)
{
- intercept ("lseek", 2);
- set_errno ();
- return -1;
+ intercept("lseek", 2);
+ set_errno();
+ return -1;
}
off_t
-lseek64 (int fildes, uint64_t offset, int whence)
+lseek64(int fildes, uint64_t offset, int whence)
{
- intercept ("lseek64", 2);
- set_errno ();
- return -1;
+ intercept("lseek64", 2);
+ set_errno();
+ return -1;
}
-
int
-dup (int fd)
+dup(int fd)
{
- intercept ("dup", 2);
- set_errno ();
- return -1;
+ intercept("dup", 2);
+ set_errno();
+ return -1;
}
int
-dup2 (int oldfd, int newfd)
+dup2(int oldfd, int newfd)
{
- intercept ("dup2", 2);
- set_errno ();
- return -1;
+ intercept("dup2", 2);
+ set_errno();
+ return -1;
}
int
-mkdir (const char *pathname, mode_t mode)
+mkdir(const char *pathname, mode_t mode)
{
- intercept ("mkdir", 2);
- set_errno ();
- return -1;
+ intercept("mkdir", 2);
+ set_errno();
+ return -1;
}
int
-rmdir (const char *pathname)
+rmdir(const char *pathname)
{
- intercept ("rmdir", 2);
- set_errno ();
- return -1;
+ intercept("rmdir", 2);
+ set_errno();
+ return -1;
}
int
-chmod (const char *pathname, mode_t mode)
+chmod(const char *pathname, mode_t mode)
{
- intercept ("chmod", 2);
- set_errno ();
- return -1;
+ intercept("chmod", 2);
+ set_errno();
+ return -1;
}
int
-chown (const char *pathname, uid_t owner, gid_t group)
+chown(const char *pathname, uid_t owner, gid_t group)
{
- intercept ("chown", 2);
- set_errno ();
- return -1;
+ intercept("chown", 2);
+ set_errno();
+ return -1;
}
int
-fchmod (int fd, mode_t mode)
+fchmod(int fd, mode_t mode)
{
- intercept ("fchmod", 2);
- set_errno ();
- return -1;
+ intercept("fchmod", 2);
+ set_errno();
+ return -1;
}
int
-fchown (int fd, uid_t uid, gid_t gid)
+fchown(int fd, uid_t uid, gid_t gid)
{
- intercept ("fchown", 2);
- set_errno ();
- return -1;
+ intercept("fchown", 2);
+ set_errno();
+ return -1;
}
-int fsync (int fd)
+int
+fsync(int fd)
{
- intercept ("fsync", 2);
- set_errno ();
- return -1;
+ intercept("fsync", 2);
+ set_errno();
+ return -1;
}
-
int
-ftruncate (int fd, off_t length)
+ftruncate(int fd, off_t length)
{
- intercept ("ftruncate", 1);
- set_errno ();
- return -1;
+ intercept("ftruncate", 1);
+ set_errno();
+ return -1;
}
-
int
-ftruncate64 (int fd, off_t length)
+ftruncate64(int fd, off_t length)
{
- intercept ("ftruncate64", 1);
- set_errno ();
- return -1;
+ intercept("ftruncate64", 1);
+ set_errno();
+ return -1;
}
int
-link (const char *oldpath, const char *newname)
+link(const char *oldpath, const char *newname)
{
- intercept ("link", 2);
- set_errno ();
- return -1;
+ intercept("link", 2);
+ set_errno();
+ return -1;
}
int
-rename (const char *oldpath, const char *newpath)
+rename(const char *oldpath, const char *newpath)
{
- intercept ("rename", 2);
- set_errno ();
- return -1;
+ intercept("rename", 2);
+ set_errno();
+ return -1;
}
int
-utimes (const char *path, const struct timeval times[2])
+utimes(const char *path, const struct timeval times[2])
{
- intercept ("utimes", 2);
- set_errno ();
- return -1;
+ intercept("utimes", 2);
+ set_errno();
+ return -1;
}
int
-utime (const char *path, const struct utimbuf *buf)
+futimes(int fd, const struct timeval times[2])
{
- intercept ("utime", 2);
- set_errno ();
- return -1;
+ intercept("futimes", 2);
+ set_errno();
+ return -1;
}
-
int
-mknod (const char *path, mode_t mode, dev_t dev)
+utime(const char *path, const struct utimbuf *buf)
{
- intercept ("mknod", 2);
- set_errno ();
- return -1;
+ intercept("utime", 2);
+ set_errno();
+ return -1;
}
int
-__xmknod (int ver, const char *path, mode_t mode, dev_t *dev)
+mknod(const char *path, mode_t mode, dev_t dev)
{
- intercept ("__xmknod", 2);
- set_errno ();
- return -1;
+ intercept("mknod", 2);
+ set_errno();
+ return -1;
}
int
-mkfifo (const char *path, mode_t mode)
+__xmknod(int ver, const char *path, mode_t mode, dev_t *dev)
{
- intercept ("mkfifo", 2);
- set_errno ();
- return -1;
+ intercept("__xmknod", 2);
+ set_errno();
+ return -1;
}
int
-unlink (const char *path)
+mkfifo(const char *path, mode_t mode)
{
- intercept ("unlink", 2);
- set_errno ();
- return -1;
+ intercept("mkfifo", 2);
+ set_errno();
+ return -1;
}
-
int
-symlink (const char *oldpath, const char *newpath)
+unlink(const char *path)
{
- intercept ("symlink", 2);
- set_errno ();
- return -1;
+ intercept("unlink", 2);
+ set_errno();
+ return -1;
}
int
-readlink (const char *path, char *buf, size_t bufsize)
+symlink(const char *oldpath, const char *newpath)
{
- intercept ("readlink", 1);
- set_errno ();
- return -1;
+ intercept("symlink", 2);
+ set_errno();
+ return -1;
}
+int
+readlink(const char *path, char *buf, size_t bufsize)
+{
+ intercept("readlink", 1);
+ set_errno();
+ return -1;
+}
char *
-realpath (const char *path, char *resolved)
+realpath(const char *path, char *resolved)
{
- intercept ("realpath", 1);
- set_errno ();
- return NULL;
+ intercept("realpath", 1);
+ set_errno();
+ return NULL;
}
-
DIR *
-opendir (const char *path)
+opendir(const char *path)
{
- intercept ("opendir", 2);
- set_errno ();
- return NULL;
+ intercept("opendir", 2);
+ set_errno();
+ return NULL;
}
-
struct dirent *
-readdir (DIR *dir)
+readdir(DIR *dir)
{
- intercept ("readdir\t", 2);
- set_errno ();
- return NULL;
+ intercept("readdir\t", 2);
+ set_errno();
+ return NULL;
}
struct dirent *
-readdir64 (DIR *dir)
+readdir64(DIR *dir)
{
- intercept ("readdir64", 2);
- set_errno ();
- return NULL;
+ intercept("readdir64", 2);
+ set_errno();
+ return NULL;
}
-
int
-readdir_r (DIR *dir, struct dirent *entry, struct dirent **result)
+readdir_r(DIR *dir, struct dirent *entry, struct dirent **result)
{
- intercept ("readdir_r", 1);
- set_errno ();
- return -1;
+ intercept("readdir_r", 1);
+ set_errno();
+ return -1;
}
int
-readdir64_r (DIR *dir, struct dirent *entry, struct dirent **result)
+readdir64_r(DIR *dir, struct dirent *entry, struct dirent **result)
{
- intercept ("readdir64_r", 1);
- set_errno ();
- return -1;
+ intercept("readdir64_r", 1);
+ set_errno();
+ return -1;
}
-
int
-closedir (DIR *dh)
+closedir(DIR *dh)
{
- intercept ("closedir", 1);
- set_errno ();
- return -1;
+ intercept("closedir", 1);
+ set_errno();
+ return -1;
}
int
-__xstat (int ver, const char *path, struct stat *buf)
+__xstat(int ver, const char *path, struct stat *buf)
{
- intercept ("__xstat\t", 2);
- set_errno ();
- return -1;
+ intercept("__xstat\t", 2);
+ set_errno();
+ return -1;
}
-
int
-__xstat64 (int ver, const char *path, struct stat *buf)
+__xstat64(int ver, const char *path, struct stat *buf)
{
- intercept ("__xstat64", 2);
- set_errno ();
- return -1;
+ intercept("__xstat64", 2);
+ set_errno();
+ return -1;
}
int
-stat (const char *path, struct stat *buf)
+stat(const char *path, struct stat *buf)
{
- intercept ("stat", 2);
- set_errno ();
- return -1;
+ intercept("stat", 2);
+ set_errno();
+ return -1;
}
int
-stat64 (const char *path, struct stat *buf)
+stat64(const char *path, struct stat *buf)
{
- intercept ("stat64", 2);
- set_errno ();
- return -1;
+ intercept("stat64", 2);
+ set_errno();
+ return -1;
}
int
-__fxstat (int ver, int fd, struct stat *buf)
+__fxstat(int ver, int fd, struct stat *buf)
{
- intercept ("__fxstat\t", 2);
- set_errno ();
- return -1;
+ intercept("__fxstat\t", 2);
+ set_errno();
+ return -1;
}
-
int
-__fxstat64 (int ver, int fd, struct stat *buf)
+__fxstat64(int ver, int fd, struct stat *buf)
{
- intercept ("__fxstat64", 2);
- set_errno ();
- return -1;
+ intercept("__fxstat64", 2);
+ set_errno();
+ return -1;
}
int
-fstat (int fd, struct stat *buf)
+fstat(int fd, struct stat *buf)
{
- intercept ("fstat", 2);
- set_errno ();
- return -1;
+ intercept("fstat", 2);
+ set_errno();
+ return -1;
}
int
-fstat64 (int fd , struct stat *buf)
+fstat64(int fd, struct stat *buf)
{
- intercept ("fstat64", 2);
- set_errno ();
- return -1;
+ intercept("fstat64", 2);
+ set_errno();
+ return -1;
}
int
-__lxstat (int ver, const char *path, struct stat *buf)
+__lxstat(int ver, const char *path, struct stat *buf)
{
- intercept ("__lxstat\t", 2);
- set_errno ();
- return -1;
+ intercept("__lxstat\t", 2);
+ set_errno();
+ return -1;
}
int
-__lxstat64 (int ver, const char *path, struct stat *buf)
+__lxstat64(int ver, const char *path, struct stat *buf)
{
- intercept ("__lxstat64", 2);
- set_errno ();
- return -1;
+ intercept("__lxstat64", 2);
+ set_errno();
+ return -1;
}
int
-lstat (const char *path, struct stat *buf)
+lstat(const char *path, struct stat *buf)
{
- intercept ("lstat", 2);
- set_errno ();
- return -1;
+ intercept("lstat", 2);
+ set_errno();
+ return -1;
}
int
-lstat64 (const char *path, struct stat *buf)
+lstat64(const char *path, struct stat *buf)
{
- intercept ("lstat64", 2);
- set_errno ();
- return -1;
+ intercept("lstat64", 2);
+ set_errno();
+ return -1;
}
int
-statfs (const char *path, struct statfs *buf)
+statfs(const char *path, struct statfs *buf)
{
- intercept ("statfs", 2);
- set_errno ();
- return -1;
+ intercept("statfs", 2);
+ set_errno();
+ return -1;
}
-
int
-statfs64 (const char *path, struct statfs *buf)
+statfs64(const char *path, struct statfs *buf)
{
- intercept ("statfs64", 2);
- set_errno ();
- return -1;
+ intercept("statfs64", 2);
+ set_errno();
+ return -1;
}
int
-statvfs (const char *path, struct statvfs *buf)
+statvfs(const char *path, struct statvfs *buf)
{
- intercept ("statvfs\t", 2);
- set_errno ();
- return -1;
+ intercept("statvfs\t", 2);
+ set_errno();
+ return -1;
}
-
int
-statvfs64 (const char *path, struct statvfs *buf)
+statvfs64(const char *path, struct statvfs *buf)
{
- intercept ("statvfs64", 2);
- set_errno ();
- return -1;
+ intercept("statvfs64", 2);
+ set_errno();
+ return -1;
}
ssize_t
-getxattr (const char *path, const char *name, void *value, size_t size)
+getxattr(const char *path, const char *name, void *value, size_t size)
{
- intercept ("getxattr", 1);
- set_errno ();
- return -1;
+ intercept("getxattr", 1);
+ set_errno();
+ return -1;
}
ssize_t
-lgetxattr (const char *path, const char *name, void *value, size_t size)
+lgetxattr(const char *path, const char *name, void *value, size_t size)
{
- intercept ("lgetxattr", 1);
- set_errno ();
- return -1;
+ intercept("lgetxattr", 1);
+ set_errno();
+ return -1;
}
-
int
-remove (const char* path)
+remove(const char *path)
{
- intercept ("remove", 2);
- set_errno ();
- return -1;
+ intercept("remove", 2);
+ set_errno();
+ return -1;
}
int
-lchown (const char *path, uid_t owner, gid_t group)
+lchown(const char *path, uid_t owner, gid_t group)
{
- intercept ("lchown", 2);
- set_errno ();
- return -1;
+ intercept("lchown", 2);
+ set_errno();
+ return -1;
}
void
-rewinddir (DIR *dirp)
+rewinddir(DIR *dirp)
{
- intercept ("rewinddir", 1);
- set_errno ();
- return;
+ intercept("rewinddir", 1);
+ set_errno();
+ return;
}
void
-seekdir (DIR *dirp, off_t offset)
+seekdir(DIR *dirp, off_t offset)
{
- intercept ("seekdir", 2);
- set_errno ();
- return;
+ intercept("seekdir", 2);
+ set_errno();
+ return;
}
off_t
-telldir (DIR *dirp)
+telldir(DIR *dirp)
{
- intercept ("telldir", 2);
- set_errno ();
- return -1;
+ intercept("telldir", 2);
+ set_errno();
+ return -1;
}
ssize_t
-sendfile (int out_fd, int in_fd, off_t *offset, size_t count)
+sendfile(int out_fd, int in_fd, off_t *offset, size_t count)
{
- intercept ("sendfile\t", 1);
- set_errno ();
- return -1;
+ intercept("sendfile\t", 1);
+ set_errno();
+ return -1;
}
ssize_t
-sendfile64 (int out_fd, int in_fd, off_t *offset, size_t count)
+sendfile64(int out_fd, int in_fd, off_t *offset, size_t count)
{
- intercept ("sendfile64", 1);
- set_errno ();
- return -1;
+ intercept("sendfile64", 1);
+ set_errno();
+ return -1;
}
-
int
-fcntl (int fd, int cmd, ...)
+fcntl(int fd, int cmd, ...)
{
- intercept ("fcntl", 2);
- set_errno ();
- return -1;
+ intercept("fcntl", 2);
+ set_errno();
+ return -1;
}
-
diff --git a/extras/test/ld-preload-test/ld-preload-test.c b/extras/test/ld-preload-test/ld-preload-test.c
index 55dd98805b4..54dde8c7d54 100644
--- a/extras/test/ld-preload-test/ld-preload-test.c
+++ b/extras/test/ld-preload-test/ld-preload-test.c
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
/*
* LD PRELOAD Test Tool
*
@@ -56,322 +46,313 @@
#include <sys/uio.h>
#include <utime.h>
#include <sys/time.h>
-#include <attr/xattr.h>
+#include <sys/xattr.h>
#include <sys/sendfile.h>
-
-#define PRELOAD_ERRNO_VERF 6449
-inline void
+#define PRELOAD_ERRNO_VERF 6449
+void
check_err(int ret, char *call, int tabs)
{
- while (tabs > 0) {
- fprintf (stdout, "\t");
- --tabs;
- }
- if (ret != -1) {
- fprintf (stdout, "Not intercepted: %s\n", call);
- return;
- }
-
- if (errno != PRELOAD_ERRNO_VERF) {
- fprintf (stdout, "Not intercepted: %s: err: %s\n", call,
- strerror (errno));
- return;
- }
+ while (tabs > 0) {
+ fprintf(stdout, "\t");
+ --tabs;
+ }
+ if (ret != -1) {
+ fprintf(stdout, "Not intercepted: %s\n", call);
+ return;
+ }
- fprintf (stdout, "Intercept verified: %s\n", call);
+ if (errno != PRELOAD_ERRNO_VERF) {
+ fprintf(stdout, "Not intercepted: %s: err: %s\n", call,
+ strerror(errno));
return;
+ }
+
+ fprintf(stdout, "Intercept verified: %s\n", call);
+ return;
}
void
-usage (FILE *fp)
+usage(FILE *fp)
{
- fprintf (fp, "Usage: ld-preload-test <Options>\n");
- fprintf (fp, "Options\n");
- fprintf (fp, "\t--path\t\tPathname is used as the file/directory"
- " created for the test.\n");
-
+ fprintf(fp, "Usage: ld-preload-test <Options>\n");
+ fprintf(fp, "Options\n");
+ fprintf(fp,
+ "\t--path\t\tPathname is used as the file/directory"
+ " created for the test.\n");
}
-
int
-run_file_tests (char *testfile)
+run_file_tests(char *testfile)
{
- int ret = -1;
- struct stat buf;
+ int ret = -1;
+ struct stat buf;
- assert (testfile);
- fprintf (stdout, "Testing creat");
- ret = creat (testfile, S_IRWXU);
- check_err (ret, "creat", 2);
+ assert(testfile);
+ fprintf(stdout, "Testing creat");
+ ret = creat(testfile, S_IRWXU);
+ check_err(ret, "creat", 2);
- fprintf (stdout, "Testing close");
- ret = close (ret);
- check_err (ret, "close", 2);
+ fprintf(stdout, "Testing close");
+ ret = close(ret);
+ check_err(ret, "close", 2);
- fprintf (stdout, "Testing open");
- ret = open (testfile, O_RDONLY);
- check_err (ret, "open", 2);
+ fprintf(stdout, "Testing open");
+ ret = open(testfile, O_RDONLY);
+ check_err(ret, "open", 2);
- fprintf (stdout, "Testing read");
- ret = read (0, NULL, 0);
- check_err (ret, "read", 2);
+ fprintf(stdout, "Testing read");
+ ret = read(0, NULL, 0);
+ check_err(ret, "read", 2);
- fprintf (stdout, "Testing readv");
- ret = readv (0, NULL, 0);
- check_err (ret, "readv", 2);
+ fprintf(stdout, "Testing readv");
+ ret = readv(0, NULL, 0);
+ check_err(ret, "readv", 2);
- fprintf (stdout, "Testing pread");
- ret = pread (0, NULL, 0, 0);
- check_err (ret, "pread", 2);
+ fprintf(stdout, "Testing pread");
+ ret = pread(0, NULL, 0, 0);
+ check_err(ret, "pread", 2);
- fprintf (stdout, "Testing write");
- ret = write (0, NULL, 0);
- check_err (ret, "write", 2);
+ fprintf(stdout, "Testing write");
+ ret = write(0, NULL, 0);
+ check_err(ret, "write", 2);
- fprintf (stdout, "Testing writev");
- ret = writev (0, NULL, 0);
- check_err (ret, "writev", 2);
+ fprintf(stdout, "Testing writev");
+ ret = writev(0, NULL, 0);
+ check_err(ret, "writev", 2);
- fprintf (stdout, "Testing pwrite");
- ret = pwrite (0, NULL, 0, 0);
- check_err (ret, "pwrite", 2);
+ fprintf(stdout, "Testing pwrite");
+ ret = pwrite(0, NULL, 0, 0);
+ check_err(ret, "pwrite", 2);
- fprintf (stdout, "Testing lseek");
- ret = lseek (0, 0, 0);
- check_err (ret, "lseek", 2);
+ fprintf(stdout, "Testing lseek");
+ ret = lseek(0, 0, 0);
+ check_err(ret, "lseek", 2);
- fprintf (stdout, "Testing dup");
- ret = dup (0);
- check_err (ret, "dup", 2);
+ fprintf(stdout, "Testing dup");
+ ret = dup(0);
+ check_err(ret, "dup", 2);
- fprintf (stdout, "Testing dup2");
- ret = dup2 (0, 0);
- check_err (ret, "dup2", 2);
+ fprintf(stdout, "Testing dup2");
+ ret = dup2(0, 0);
+ check_err(ret, "dup2", 2);
- fprintf (stdout, "Testing fchmod");
- ret = fchmod (0, 0);
- check_err (ret, "fchmod", 2);
+ fprintf(stdout, "Testing fchmod");
+ ret = fchmod(0, 0);
+ check_err(ret, "fchmod", 2);
- fprintf (stdout, "Testing fchown");
- ret = fchown (0, 0, 0);
- check_err (ret, "fchown", 2);
+ fprintf(stdout, "Testing fchown");
+ ret = fchown(0, 0, 0);
+ check_err(ret, "fchown", 2);
- fprintf (stdout, "Testing fsync");
- ret = fsync (0);
- check_err (ret, "fsync", 2);
+ fprintf(stdout, "Testing fsync");
+ ret = fsync(0);
+ check_err(ret, "fsync", 2);
- fprintf (stdout, "Testing ftruncate");
- ret = ftruncate (0, 0);
- check_err (ret, "ftruncate", 1);
+ fprintf(stdout, "Testing ftruncate");
+ ret = ftruncate(0, 0);
+ check_err(ret, "ftruncate", 1);
- fprintf (stdout, "Testing fstat");
- ret = fstat (0, &buf);
- check_err (ret, "fstat", 1);
+ fprintf(stdout, "Testing fstat");
+ ret = fstat(0, &buf);
+ check_err(ret, "fstat", 1);
- fprintf (stdout, "Testing sendfile");
- ret = sendfile (0, 0, NULL, 0);
- check_err (ret, "sendfile", 1);
+ fprintf(stdout, "Testing sendfile");
+ ret = sendfile(0, 0, NULL, 0);
+ check_err(ret, "sendfile", 1);
- fprintf (stdout, "Testing fcntl");
- ret = fcntl (0, 0, NULL);
- check_err (ret, "fcntl", 2);
+ fprintf(stdout, "Testing fcntl");
+ ret = fcntl(0, 0, NULL);
+ check_err(ret, "fcntl", 2);
- fprintf (stdout, "Testing close");
- ret = close (ret);
- check_err (ret, "close", 2);
+ fprintf(stdout, "Testing close");
+ ret = close(ret);
+ check_err(ret, "close", 2);
- fprintf (stdout, "Testing remove");
- ret = remove (testfile);
- check_err (ret, "remove", 2);
+ fprintf(stdout, "Testing remove");
+ ret = remove(testfile);
+ check_err(ret, "remove", 2);
- return ret;
+ return ret;
}
-
int
-run_attr_tests (char *testfile)
+run_attr_tests(char *testfile)
{
- int ret = -1;
- char *res = NULL;
- struct stat buf;
- struct statfs sbuf;
- struct statvfs svbuf;
-
- assert (testfile);
-
- fprintf (stdout, "Testing chmod");
- ret = chmod (testfile, 0);
- check_err (ret, "chmod", 2);
-
- fprintf (stdout, "Testing chown");
- ret = chown (testfile, 0, 0);
- check_err (ret, "chown", 2);
-
- fprintf (stdout, "Testing link");
- ret = link (testfile, testfile);
- check_err (ret, "link", 2);
-
- fprintf (stdout, "Testing rename");
- ret = rename (testfile, testfile);
- check_err (ret, "rename", 2);
-
- fprintf (stdout, "Testing utimes");
- ret = utimes (testfile, NULL);
- check_err (ret, "utimes", 2);
-
- fprintf (stdout, "Testing utime");
- ret = utime (testfile, NULL);
- check_err (ret, "utime", 2);
-
- fprintf (stdout, "Testing unlink");
- ret = unlink (testfile);
- check_err (ret, "unlink", 2);
-
- fprintf (stdout, "Testing symlink");
- ret = symlink (testfile, testfile);
- check_err (ret, "symlink", 2);
-
- fprintf (stdout, "Testing readlink");
- ret = readlink (testfile, testfile, 0);
- check_err (ret, "readlink", 2);
-
- fprintf (stdout, "Testing realpath");
- ret = 0;
- res = realpath ((const char *)testfile, testfile);
- if (!res)
- ret = -1;
- check_err (ret, "realpath", 2);
-
- fprintf (stdout, "Testing stat");
- ret = stat (testfile, &buf);
- check_err (ret, "stat", 1);
-
- fprintf (stdout, "Testing lstat");
- ret = lstat (testfile, &buf);
- check_err (ret, "lstat", 1);
-
- fprintf (stdout, "Testing statfs");
- ret = statfs (testfile, &sbuf);
- check_err (ret, "statfs", 2);
-
- fprintf (stdout, "Testing statvfs");
- ret = statvfs (testfile, &svbuf);
- check_err (ret, "statvfs", 1);
-
- fprintf (stdout, "Testing getxattr");
- ret = getxattr (testfile, NULL, NULL, 0);
- check_err (ret, "getxattr", 2);
-
- fprintf (stdout, "Testing lgetxattr");
- ret = lgetxattr (testfile, NULL, NULL, 0);
- check_err (ret, "lgetxattr", 1);
-
- fprintf (stdout, "Testing lchown");
- ret = lchown (testfile, 0, 0);
- check_err (ret, "lchown", 2);
- return 0;
+ int ret = -1;
+ char *res = NULL;
+ struct stat buf;
+ struct statfs sbuf;
+ struct statvfs svbuf;
+
+ assert(testfile);
+
+ fprintf(stdout, "Testing chmod");
+ ret = chmod(testfile, 0);
+ check_err(ret, "chmod", 2);
+
+ fprintf(stdout, "Testing chown");
+ ret = chown(testfile, 0, 0);
+ check_err(ret, "chown", 2);
+
+ fprintf(stdout, "Testing link");
+ ret = link(testfile, testfile);
+ check_err(ret, "link", 2);
+
+ fprintf(stdout, "Testing rename");
+ ret = rename(testfile, testfile);
+ check_err(ret, "rename", 2);
+
+ fprintf(stdout, "Testing utimes");
+ ret = utimes(testfile, NULL);
+ check_err(ret, "utimes", 2);
+
+ fprintf(stdout, "Testing utime");
+ ret = utime(testfile, NULL);
+ check_err(ret, "utime", 2);
+
+ fprintf(stdout, "Testing unlink");
+ ret = unlink(testfile);
+ check_err(ret, "unlink", 2);
+
+ fprintf(stdout, "Testing symlink");
+ ret = symlink(testfile, testfile);
+ check_err(ret, "symlink", 2);
+
+ fprintf(stdout, "Testing readlink");
+ ret = readlink(testfile, testfile, 0);
+ check_err(ret, "readlink", 2);
+
+ fprintf(stdout, "Testing realpath");
+ ret = 0;
+ res = realpath((const char *)testfile, testfile);
+ if (!res)
+ ret = -1;
+ check_err(ret, "realpath", 2);
+
+ fprintf(stdout, "Testing stat");
+ ret = stat(testfile, &buf);
+ check_err(ret, "stat", 1);
+
+ fprintf(stdout, "Testing lstat");
+ ret = lstat(testfile, &buf);
+ check_err(ret, "lstat", 1);
+
+ fprintf(stdout, "Testing statfs");
+ ret = statfs(testfile, &sbuf);
+ check_err(ret, "statfs", 2);
+
+ fprintf(stdout, "Testing statvfs");
+ ret = statvfs(testfile, &svbuf);
+ check_err(ret, "statvfs", 1);
+
+ fprintf(stdout, "Testing getxattr");
+ ret = getxattr(testfile, NULL, NULL, 0);
+ check_err(ret, "getxattr", 2);
+
+ fprintf(stdout, "Testing lgetxattr");
+ ret = lgetxattr(testfile, NULL, NULL, 0);
+ check_err(ret, "lgetxattr", 1);
+
+ fprintf(stdout, "Testing lchown");
+ ret = lchown(testfile, 0, 0);
+ check_err(ret, "lchown", 2);
+ return 0;
}
-
int
-run_dev_tests (char *testfile)
+run_dev_tests(char *testfile)
{
- int ret = -1;
+ int ret = -1;
- assert (testfile);
+ assert(testfile);
- fprintf (stdout, "Testing mknod");
- ret = mknod (testfile, 0, 0);
- check_err (ret, "mknod", 2);
+ fprintf(stdout, "Testing mknod");
+ ret = mknod(testfile, 0, 0);
+ check_err(ret, "mknod", 2);
- fprintf (stdout, "Testing mkfifo");
- ret = mkfifo (testfile, 0);
- check_err (ret, "mkfifo", 2);
- return 0;
+ fprintf(stdout, "Testing mkfifo");
+ ret = mkfifo(testfile, 0);
+ check_err(ret, "mkfifo", 2);
+ return 0;
}
int
-run_dir_tests (char *testpath)
+run_dir_tests(char *testpath)
{
- int ret = -1;
- DIR *dh = NULL;
- struct dirent *dire = NULL;
-
- assert (testpath);
-
- fprintf (stdout, "Testing mkdir");
- ret = mkdir (testpath, 0);
- check_err (ret, "mkdir", 2);
-
- fprintf (stdout, "Testing rmdir");
- ret = rmdir (testpath);
- check_err (ret, "rmdir", 2);
-
- fprintf (stdout, "Testing opendir");
- ret = 0;
- dh = opendir (testpath);
- if (!dh)
- ret = -1;
- check_err (ret, "opendir", 2);
-
- fprintf (stdout, "Testing readdir");
- ret = 0;
- dire = readdir (dh);
- if (!dire)
- ret = -1;
- check_err (ret, "readdir", 1);
-
- fprintf (stdout, "Testing readdir_r");
- ret = readdir_r (dh, dire, &dire);
- check_err (ret, "readdir_r", 1);
-
- fprintf (stdout, "Testing rewinddir");
- rewinddir (dh);
- check_err (-1, "rewinddir", 1);
-
- fprintf (stdout, "Testing seekdir");
- seekdir (dh, 0);
- check_err (-1, "seekdir", 2);
-
- fprintf (stdout, "Testing telldir");
- ret = telldir (dh);
- check_err (ret, "telldir", 2);
-
- fprintf (stdout, "Testing closedir");
- ret = closedir (dh);
- check_err (ret, "closedir", 2);
- return 0;
+ int ret = -1;
+ DIR *dh = NULL;
+ struct dirent *dire = NULL;
+
+ assert(testpath);
+
+ fprintf(stdout, "Testing mkdir");
+ ret = mkdir(testpath, 0);
+ check_err(ret, "mkdir", 2);
+
+ fprintf(stdout, "Testing rmdir");
+ ret = rmdir(testpath);
+ check_err(ret, "rmdir", 2);
+
+ fprintf(stdout, "Testing opendir");
+ ret = 0;
+ dh = opendir(testpath);
+ if (!dh)
+ ret = -1;
+ check_err(ret, "opendir", 2);
+
+ fprintf(stdout, "Testing readdir");
+ ret = 0;
+ dire = readdir(dh);
+ if (!dire)
+ ret = -1;
+ check_err(ret, "readdir", 1);
+
+ fprintf(stdout, "Testing readdir_r");
+ ret = readdir_r(dh, dire, &dire);
+ check_err(ret, "readdir_r", 1);
+
+ fprintf(stdout, "Testing rewinddir");
+ rewinddir(dh);
+ check_err(-1, "rewinddir", 1);
+
+ fprintf(stdout, "Testing seekdir");
+ seekdir(dh, 0);
+ check_err(-1, "seekdir", 2);
+
+ fprintf(stdout, "Testing telldir");
+ ret = telldir(dh);
+ check_err(ret, "telldir", 2);
+
+ fprintf(stdout, "Testing closedir");
+ ret = closedir(dh);
+ check_err(ret, "closedir", 2);
+ return 0;
}
-
-
int
-main (int argc, char *argv[])
+main(int argc, char *argv[])
{
- char *testpath = NULL;
- int x = 0;
-
- for (;x < argc; ++x) {
- if (strcmp (argv[x], "--path") == 0) {
- testpath = argv[x+1];
- continue;
- }
+ char *testpath = NULL;
+ int x = 0;
+ for (; x < argc; ++x) {
+ if (strcmp(argv[x], "--path") == 0) {
+ testpath = argv[x + 1];
+ continue;
}
+ }
- if (!testpath) {
- fprintf (stderr, "--path not specified\n");
- usage (stderr);
- return -1;
- }
+ if (!testpath) {
+ fprintf(stderr, "--path not specified\n");
+ usage(stderr);
+ return -1;
+ }
- run_file_tests (testpath);
- run_dir_tests (testpath);
- run_attr_tests (testpath);
- run_dev_tests (testpath);
+ run_file_tests(testpath);
+ run_dir_tests(testpath);
+ run_attr_tests(testpath);
+ run_dev_tests(testpath);
- return 0;
+ return 0;
}
-
-
diff --git a/extras/test/open-fd-tests.c b/extras/test/open-fd-tests.c
index 4184079d043..509952b4180 100644
--- a/extras/test/open-fd-tests.c
+++ b/extras/test/open-fd-tests.c
@@ -4,61 +4,64 @@
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
-#include <attr/xattr.h>
+#include <sys/xattr.h>
#include <errno.h>
#include <string.h>
int
-main (int argc, char *argv[])
+main(int argc, char *argv[])
{
- int ret = -1;
- int fd = 0;
- char *filename = NULL;
- int loop = 0;
- struct stat stbuf = {0,};
- char string[1024] = {0,};
+ int ret = -1;
+ int fd = 0;
+ char *filename = NULL;
+ int loop = 0;
+ struct stat stbuf = {
+ 0,
+ };
+ char string[1024] = {
+ 0,
+ };
- if (argc > 1)
- filename = argv[1];
+ if (argc > 1)
+ filename = argv[1];
- if (!filename)
- filename = "temp-fd-test-file";
+ if (!filename)
+ filename = "temp-fd-test-file";
- fd = open (filename, O_RDWR|O_CREAT|O_TRUNC);
- if (fd < 0) {
- fd = 0;
- fprintf (stderr, "open failed : %s\n", strerror (errno));
- goto out;
- }
-
- while (loop < 1000) {
- /* Use it as a mechanism to test time delays */
- memset (string, 0, 1024);
- scanf ("%s", string);
+ fd = open(filename, O_RDWR | O_CREAT | O_TRUNC);
+ if (fd < 0) {
+ fd = 0;
+ fprintf(stderr, "open failed : %s\n", strerror(errno));
+ goto out;
+ }
- ret = write (fd, string, strlen (string));
- if (ret != strlen (string)) {
- fprintf (stderr, "write failed : %s (%s %d)\n",
- strerror (errno), string, loop);
- goto out;
- }
+ while (loop < 1000) {
+ /* Use it as a mechanism to test time delays */
+ memset(string, 0, 1024);
+ scanf("%s", string);
- ret = write (fd, "\n", 1);
- if (ret != 1) {
- fprintf (stderr, "write failed : %s (%d)\n",
- strerror (errno), loop);
- goto out;
- }
+ ret = write(fd, string, strlen(string));
+ if (ret != strlen(string)) {
+ fprintf(stderr, "write failed : %s (%s %d)\n", strerror(errno),
+ string, loop);
+ goto out;
+ }
- loop++;
+ ret = write(fd, "\n", 1);
+ if (ret != 1) {
+ fprintf(stderr, "write failed : %s (%d)\n", strerror(errno), loop);
+ goto out;
}
- fprintf (stdout, "finishing the test after %d loops\n", loop);
+ loop++;
+ }
+
+ fprintf(stdout, "finishing the test after %d loops\n", loop);
- ret = 0;
+ ret = 0;
out:
- if (fd)
- close (fd);
+ if (fd)
+ close(fd);
- return ret;
+ return ret;
}
diff --git a/extras/test/run.sh b/extras/test/run.sh
index 2440af237a5..4b3839cf679 100755
--- a/extras/test/run.sh
+++ b/extras/test/run.sh
@@ -1,21 +1,12 @@
#!/bin/sh
-# Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
-# This file is part of GlusterFS.
-
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
# Running gluster sanity test which starts glusterd and runs gluster commands, and exit at the first failure.
$PWD/gluster_commands.sh
diff --git a/extras/test/stop_glusterd.sh b/extras/test/stop_glusterd.sh
index a84689beb48..a2db13f4241 100755
--- a/extras/test/stop_glusterd.sh
+++ b/extras/test/stop_glusterd.sh
@@ -1,21 +1,12 @@
#!/bin/bash
-# Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
-# This file is part of GlusterFS.
-
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
+# Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
#This script stops the glusterd running on the machine. Helpful for gluster sanity script
diff --git a/extras/test/test-ffop.c b/extras/test/test-ffop.c
index 2c47ab0045b..1d9c125db67 100644
--- a/extras/test/test-ffop.c
+++ b/extras/test/test-ffop.c
@@ -3,103 +3,918 @@
#include <unistd.h>
#include <sys/types.h>
#include <sys/stat.h>
-#include <attr/xattr.h>
+#include <sys/xattr.h>
#include <errno.h>
#include <string.h>
+#include <dirent.h>
int
-main (int argc, char *argv[])
+fd_based_fops_1(char *filename); // for fd based fops after unlink
+int
+fd_based_fops_2(char *filename); // for fd based fops before unlink
+int
+dup_fd_based_fops(char *filename); // fops based on fd after dup
+int
+path_based_fops(char *filename); // for fops based on path
+int
+dir_based_fops(char *filename); // for fops which operate on directory
+int
+link_based_fops(
+ char *filename); // for fops which operate in link files (symlinks)
+int
+test_open_modes(
+ char *filename); // to test open syscall with open modes available.
+int
+generic_open_read_write(
+ char *filename,
+ int flag); // generic function which does open write and read.
+
+int
+main(int argc, char *argv[])
{
- int ret = -1;
- int fd = 0;
- char *filename = NULL;
- struct stat stbuf = {0,};
-
- if (argc > 1)
- filename = argv[1];
-
- if (!filename)
- filename = "temp-xattr-test-file";
-
- fd = open (filename, O_RDWR|O_CREAT);
- if (fd < 0) {
- fd = 0;
- fprintf (stderr, "open failed : %s\n", strerror (errno));
- goto out;
- }
+ int ret = -1;
+ char filename[255] = {
+ 0,
+ };
- ret = unlink (filename);
- if (ret < 0) {
- fprintf (stderr, "unlink failed : %s\n", strerror (errno));
- goto out;
- }
+ if (argc > 1)
+ strcpy(filename, argv[1]);
+ else
+ strcpy(filename, "temp-xattr-test-file");
- ret = ftruncate (fd, 0);
- if (ret < 0) {
- fprintf (stderr, "ftruncate failed : %s\n", strerror (errno));
- goto out;
- }
+ ret = fd_based_fops_1(strcat(filename, "_1"));
+ if (ret < 0)
+ fprintf(stderr, "fd based file operation 1 failed\n");
+ else
+ fprintf(stdout, "fd based file operation 1 passed\n");
- ret = fstat (fd, &stbuf);
- if (ret < 0) {
- fprintf (stderr, "fstat failed : %s\n", strerror (errno));
- goto out;
- }
+ ret = fd_based_fops_2(strcat(filename, "_2"));
+ if (ret < 0)
+ fprintf(stderr, "fd based file operation 2 failed\n");
+ else
+ fprintf(stdout, "fd based file operation 2 passed\n");
- ret = fchmod (fd, 0640);
- if (ret < 0) {
- fprintf (stderr, "fchmod failed : %s\n", strerror (errno));
- goto out;
- }
+ ret = dup_fd_based_fops(strcat(filename, "_3"));
+ if (ret < 0)
+ fprintf(stderr, "dup fd based file operation failed\n");
+ else
+ fprintf(stdout, "dup fd based file operation passed\n");
- ret = fchown (fd, 10001, 10001);
- if (ret < 0) {
- fprintf (stderr, "fchown failed : %s\n", strerror (errno));
- goto out;
- }
+ ret = path_based_fops(strcat(filename, "_4"));
+ if (ret < 0)
+ fprintf(stderr, "path based file operation failed\n");
+ else
+ fprintf(stdout, "path based file operation passed\n");
- ret = fsync (fd);
- if (ret < 0) {
- fprintf (stderr, "fsync failed : %s\n", strerror (errno));
- goto out;
- }
+ ret = dir_based_fops(strcat(filename, "_5"));
+ if (ret < 0)
+ fprintf(stderr, "directory based file operation failed\n");
+ else
+ fprintf(stdout, "directory based file operation passed\n");
- ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0);
- if (ret < 0) {
- fprintf (stderr, "fsetxattr failed : %s\n", strerror (errno));
- goto out;
- }
+ ret = link_based_fops(strcat(filename, "_5"));
+ if (ret < 0)
+ fprintf(stderr, "link based file operation failed\n");
+ else
+ fprintf(stdout, "link based file operation passed\n");
- ret = fdatasync (fd);
- if (ret < 0) {
- fprintf (stderr, "fdatasync failed : %s\n", strerror (errno));
- goto out;
- }
+ ret = test_open_modes(strcat(filename, "_5"));
+ if (ret < 0)
+ fprintf(stderr, "testing modes of 'open' call failed\n");
+ else
+ fprintf(stdout, "testing modes of 'open' call passed\n");
+
+out:
+ return ret;
+}
+
+int
+fd_based_fops_1(char *filename)
+{
+ int fd = 0;
+ int ret = -1;
+ struct stat stbuf = {
+ 0,
+ };
+ char wstr[50] = {
+ 0,
+ };
+ char rstr[50] = {
+ 0,
+ };
+
+ fd = open(filename, O_RDWR | O_CREAT);
+ if (fd < 0) {
+ fd = 0;
+ fprintf(stderr, "open failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = unlink(filename);
+ if (ret < 0) {
+ fprintf(stderr, "unlink failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ strcpy(wstr, "This is my string\n");
+ ret = write(fd, wstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = lseek(fd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = read(fd, rstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "read failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = memcmp(rstr, wstr, strlen(wstr));
+ if (ret != 0) {
+ ret = -1;
+ fprintf(stderr, "read returning junk\n");
+ goto out;
+ }
+
+ ret = ftruncate(fd, 0);
+ if (ret < 0) {
+ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fstat(fd, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fchmod(fd, 0640);
+ if (ret < 0) {
+ fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fchown(fd, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "fchown failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fsync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fdatasync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = flistxattr(fd, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fremovexattr(fd, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (fd)
+ close(fd);
+
+ return ret;
+}
+
+int
+fd_based_fops_2(char *filename)
+{
+ int fd = 0;
+ int ret = -1;
+ struct stat stbuf = {
+ 0,
+ };
+ char wstr[50] = {
+ 0,
+ };
+ char rstr[50] = {
+ 0,
+ };
+
+ fd = open(filename, O_RDWR | O_CREAT);
+ if (fd < 0) {
+ fd = 0;
+ fprintf(stderr, "open failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = ftruncate(fd, 0);
+
+ if (ret < 0) {
+ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ strcpy(wstr, "This is my second string\n");
+ ret = write(fd, wstr, strlen(wstr));
+ if (ret < 0) {
+ ret = -1;
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = read(fd, rstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "read failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = memcmp(rstr, wstr, strlen(wstr));
+ if (ret != 0) {
+ ret = -1;
+ fprintf(stderr, "read returning junk\n");
+ goto out;
+ }
+
+ ret = fstat(fd, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fchmod(fd, 0640);
+ if (ret < 0) {
+ fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fchown(fd, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "fchown failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fsync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fdatasync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = flistxattr(fd, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fremovexattr(fd, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+out:
+ if (fd)
+ close(fd);
+ unlink(filename);
+
+ return ret;
+}
+
+int
+path_based_fops(char *filename)
+{
+ int ret = -1;
+ int fd = 0;
+ struct stat stbuf = {
+ 0,
+ };
+ char newfilename[255] = {
+ 0,
+ };
+
+ fd = creat(filename, 0644);
+ if (fd < 0) {
+ fprintf(stderr, "creat failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = truncate(filename, 0);
+ if (ret < 0) {
+ fprintf(stderr, "truncate failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = stat(filename, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "stat failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = chmod(filename, 0640);
+ if (ret < 0) {
+ fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = chown(filename, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "chown failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "setxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = listxattr(filename, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "listxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = getxattr(filename, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "getxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = removexattr(filename, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "removexattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = access(filename, R_OK | W_OK);
+ if (ret < 0) {
+ fprintf(stderr, "access failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ strcpy(newfilename, filename);
+ strcat(newfilename, "_new");
+ ret = rename(filename, newfilename);
+ if (ret < 0) {
+ fprintf(stderr, "rename failed: %s\n", strerror(errno));
+ goto out;
+ }
+ unlink(newfilename);
+
+out:
+ if (fd)
+ close(fd);
+
+ unlink(filename);
+ return ret;
+}
- ret = flistxattr (fd, NULL, 0);
- if (ret <= 0) {
- ret = -1;
- fprintf (stderr, "flistxattr failed : %s\n", strerror (errno));
- goto out;
+int
+dup_fd_based_fops(char *filename)
+{
+ int fd = 0;
+ int newfd = 0;
+ int ret = -1;
+ struct stat stbuf = {
+ 0,
+ };
+ char wstr[50] = {
+ 0,
+ };
+ char rstr[50] = {
+ 0,
+ };
+
+ fd = open(filename, O_RDWR | O_CREAT);
+ if (fd < 0) {
+ fd = 0;
+ fprintf(stderr, "open failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ newfd = dup(fd);
+ if (newfd < 0) {
+ ret = -1;
+ fprintf(stderr, "dup failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ close(fd);
+
+ strcpy(wstr, "This is my string\n");
+ ret = write(newfd, wstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = lseek(newfd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = read(newfd, rstr, strlen(wstr));
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "read failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = memcmp(rstr, wstr, strlen(wstr));
+ if (ret != 0) {
+ ret = -1;
+ fprintf(stderr, "read returning junk\n");
+ goto out;
+ }
+
+ ret = ftruncate(newfd, 0);
+ if (ret < 0) {
+ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fstat(newfd, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fchmod(newfd, 0640);
+ if (ret < 0) {
+ fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fchown(newfd, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "fchown failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fsync(newfd);
+ if (ret < 0) {
+ fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fdatasync(newfd);
+ if (ret < 0) {
+ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = flistxattr(newfd, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = fremovexattr(newfd, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (newfd)
+ close(newfd);
+ ret = unlink(filename);
+ if (ret < 0)
+ fprintf(stderr, "unlink failed : %s\n", strerror(errno));
+
+ return ret;
+}
+
+int
+dir_based_fops(char *dirname)
+{
+ int ret = -1;
+ DIR *dp = NULL;
+ char buff[255] = {
+ 0,
+ };
+ struct dirent *dbuff = {
+ 0,
+ };
+ struct stat stbuff = {
+ 0,
+ };
+ char newdname[255] = {
+ 0,
+ };
+ char *cwd = NULL;
+
+ ret = mkdir(dirname, 0755);
+ if (ret < 0) {
+ fprintf(stderr, "mkdir failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ dp = opendir(dirname);
+ if (dp == NULL) {
+ fprintf(stderr, "opendir failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ dbuff = readdir(dp);
+ if (NULL == dbuff) {
+ fprintf(stderr, "readdir failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = closedir(dp);
+ if (ret < 0) {
+ fprintf(stderr, "closedir failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = stat(dirname, &stbuff);
+ if (ret < 0) {
+ fprintf(stderr, "stat failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = chmod(dirname, 0744);
+ if (ret < 0) {
+ fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = chown(dirname, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "setxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = listxattr(dirname, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "listxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = getxattr(dirname, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "getxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = removexattr(dirname, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "removexattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ strcpy(newdname, dirname);
+ strcat(newdname, "/../");
+ ret = chdir(newdname);
+ if (ret < 0) {
+ fprintf(stderr, "chdir failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ cwd = getcwd(buff, 255);
+ if (NULL == cwd) {
+ fprintf(stderr, "getcwd failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ strcpy(newdname, dirname);
+ strcat(newdname, "new");
+ ret = rename(dirname, newdname);
+ if (ret < 0) {
+ fprintf(stderr, "rename failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = rmdir(newdname);
+ if (ret < 0) {
+ fprintf(stderr, "rmdir failed: %s\n", strerror(errno));
+ return ret;
+ }
+
+out:
+ rmdir(dirname);
+ return ret;
+}
+
+int
+link_based_fops(char *filename)
+{
+ int ret = -1;
+ int fd = 0;
+ char newname[255] = {
+ 0,
+ };
+ char linkname[255] = {
+ 0,
+ };
+ struct stat lstbuf = {
+ 0,
+ };
+
+ fd = creat(filename, 0644);
+ if (fd < 0) {
+ fd = 0;
+ fprintf(stderr, "creat failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ strcpy(newname, filename);
+ strcat(newname, "_hlink");
+ ret = link(filename, newname);
+ if (ret < 0) {
+ fprintf(stderr, "link failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = unlink(filename);
+ if (ret < 0) {
+ fprintf(stderr, "unlink failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ strcpy(linkname, filename);
+ strcat(linkname, "_slink");
+ ret = symlink(newname, linkname);
+ if (ret < 0) {
+ fprintf(stderr, "symlink failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = lstat(linkname, &lstbuf);
+ if (ret < 0) {
+ fprintf(stderr, "lstbuf failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = lchown(linkname, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "lchown failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = llistxattr(linkname, NULL, 0);
+ if (ret < 0) {
+ ret = -1;
+ fprintf(stderr, "llistxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0);
+ if (ret < 0) {
+ ret = -1;
+ fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = lremovexattr(linkname, "trusted.lxattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+out:
+ if (fd)
+ close(fd);
+ unlink(linkname);
+ unlink(newname);
+}
+
+int
+test_open_modes(char *filename)
+{
+ int ret = -1;
+
+ ret = generic_open_read_write(filename, O_CREAT | O_WRONLY);
+ if (3 != ret) {
+ fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n");
+ goto out;
+ }
+
+ ret = generic_open_read_write(filename, O_CREAT | O_RDWR);
+ if (ret != 0) {
+ fprintf(stderr, "flag O_CREAT|O_RDWR failed\n");
+ goto out;
+ }
+
+ ret = generic_open_read_write(filename, O_CREAT | O_RDONLY);
+ if (ret != 0) {
+ fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n");
+ goto out;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_WRONLY);
+ if (3 != ret) {
+ fprintf(stderr, "flag O_WRONLY failed\n");
+ goto out;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_RDWR);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_RDWR failed\n");
+ goto out;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_RDONLY);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_RDONLY failed\n");
+ goto out;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY);
+ if (3 != ret) {
+ fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n");
+ goto out;
+ }
+
+#if 0 /* undefined behaviour, unable to reliably test */
+ ret = creat (filename, 0644);
+ close (ret);
+ ret = generic_open_read_write (filename, O_TRUNC|O_RDONLY);
+ if (0 != ret) {
+ fprintf (stderr, "flag O_TRUNC|O_RDONLY failed\n");
+ goto out;
}
+#endif
+
+ ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n");
+ goto out;
+ }
- ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0);
- if (ret <= 0) {
- ret = -1;
- fprintf (stderr, "fgetxattr failed : %s\n", strerror (errno));
- goto out;
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_CREAT | O_EXCL);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_CREAT|O_EXCL failed\n");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+generic_open_read_write(char *filename, int flag)
+{
+ int fd = 0;
+ int ret = -1;
+ char wstring[50] = {
+ 0,
+ };
+ char rstring[50] = {
+ 0,
+ };
+
+ fd = open(filename, flag);
+ if (fd < 0) {
+ if (flag == O_CREAT | O_EXCL && errno == EEXIST) {
+ unlink(filename);
+ return 0;
+ } else {
+ fd = 0;
+ fprintf(stderr, "open failed: %s\n", strerror(errno));
+ return 1;
}
+ }
- ret = fremovexattr (fd, "trusted.xattr-test");
- if (ret < 0) {
- fprintf (stderr, "fremovexattr failed : %s\n", strerror (errno));
- goto out;
+ strcpy(wstring, "My string to write\n");
+ ret = write(fd, wstring, strlen(wstring));
+ if (ret <= 0) {
+ if (errno != EBADF) {
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ close(fd);
+ unlink(filename);
+ return 2;
}
+ }
- ret = 0;
-out:
- if (fd)
- close (fd);
+ ret = lseek(fd, 0, SEEK_SET);
+ if (ret < 0) {
+ close(fd);
+ unlink(filename);
+ return 4;
+ }
- return ret;
+ ret = read(fd, rstring, strlen(wstring));
+ if (ret < 0) {
+ close(fd);
+ unlink(filename);
+ return 3;
+ }
+
+ /* Compare the rstring with wstring. But we do not want to return
+ * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or
+ * O_TRUNC|O_RDONLY. Because in that case we are not writing
+ * anything to the file.*/
+
+ ret = memcmp(wstring, rstring, strlen(wstring));
+ if (0 != ret && !(flag == O_CREAT | O_RDONLY || flag == O_RDONLY ||
+ flag == O_TRUNC | O_RDONLY)) {
+ fprintf(stderr, "read is returning junk\n");
+ close(fd);
+ unlink(filename);
+ return 4;
+ }
+
+ close(fd);
+ unlink(filename);
+ return 0;
}
diff --git a/extras/thin-arbiter/setup-thin-arbiter.sh b/extras/thin-arbiter/setup-thin-arbiter.sh
new file mode 100755
index 00000000000..0681b30ef3f
--- /dev/null
+++ b/extras/thin-arbiter/setup-thin-arbiter.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+# Copyright (c) 2018-2019 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+
+# This tool has been developed to setup thin-arbiter process on a node.
+# Seting up a thin arbiter process involves following files -
+# 1 - thin-arbiter.vol
+# Thin-arbiter (TA) process will use the graph in this file to load the
+# required translators.
+# 2 - gluster-ta-volume.service (generated by gluster-ta-volume.service.in)
+# TA process would be running as systemd service.
+#
+# TA process uses a location to save TA id files for every subvolume.
+# This location can be taken as input from user. Once provided and the
+# TA process is started on a node, it can not be changed using this
+# script or by any other mean. The same location should be used in
+# the gluster CLI when creating thin-arbiter volumes.
+
+MYPATH=`dirname $0`
+
+volloc="/var/lib/glusterd/thin-arbiter"
+mkdir -p $volloc
+
+if [ -f /etc/glusterfs/thin-arbiter.vol ]; then
+ volfile=/etc/glusterfs/thin-arbiter.vol
+else
+ volfile=$MYPATH/thin-arbiter.vol
+fi
+
+tafile="$volloc/thin-arbiter.vol"
+
+
+help () {
+ echo " "
+ echo ' This tool helps to setup thin-arbiter (TA) process on a node.
+ TA process uses a location to save TA id files for every subvolume.
+ This location can be taken as input from user. Once provided and the
+ TA process is started on a node, it can not be changed using this script
+ or by any other mean. The same location should be used in gluster CLI
+ when creating thin-arbiter volumes.
+
+ usage: setup-thin-arbiter.sh [-s] [-h]
+ options:
+ -s - Setup thin-arbiter file path and start process
+ -h - Show this help message and exit
+'
+}
+
+volfile_set_brick_path () {
+ while read -r line
+ do
+ dir=`echo "$line" | cut -d' ' -f 2`
+ if [ "$dir" = "directory" ]; then
+ bpath=`echo "$line" | cut -d' ' -f 3`
+ sed -i -- 's?'$bpath'?'$1'?g' $tafile
+ return
+ fi
+ done < $tafile
+}
+
+check_ta_proc () {
+ pro=`ps aux | grep thin-arbiter.vol | grep "volfile-id"`
+ if [ "${pro}" = '' ]; then
+ echo ""
+ else
+ curr_loc=`cat $volloc/thin-arbiter.vol | grep option | grep directory`
+ loc=`echo "${curr_loc##* }"`
+ echo "******************************************************"
+ echo "Error:"
+ echo "Thin-arbiter process is running with thin-arbiter path = $loc"
+ echo "Can not change TA path on this host now."
+ echo "$pro"
+ echo "******************************************************"
+ exit 1
+ fi
+}
+
+getpath () {
+ check_ta_proc
+ echo "******************************************************"
+ echo "User will be required to enter a path/folder for arbiter volume."
+ echo "Please note that this path will be used for ALL VOLUMES using this"
+ echo "node to host thin-arbiter. After setting, if a volume"
+ echo "has been created using this host and path then path for"
+ echo "thin-arbiter can not be changed "
+ echo "******************************************************"
+ echo " "
+ while true;
+ do
+ echo -n "Enter brick path for thin arbiter volumes: "
+ echo " "
+ read tapath
+ if [ "${tapath}" = '' ]; then
+ echo "Please enter valid path"
+ continue
+ else
+ echo "Entered brick path : $tapath "
+ echo "Please note that this brick path will be used for ALL"
+ echo "VOLUMES using this node to host thin-arbiter brick"
+ echo -n "Want to continue? (y/N): "
+ echo " "
+ read cont
+
+ if [ "${cont}" = 'N' ] || [ "${cont}" = 'n' ]; then
+ exit 0
+ else
+ break
+ fi
+ fi
+ done
+}
+
+setup () {
+ getpath
+ mkdir -p $tapath/.glusterfs/indices
+ if [ -d $tapath/.glusterfs/indices ]; then
+ echo " "
+ else
+ echo "Could not create $tapath/.glusterfs/indices directory, check provided ta path."
+ exit 1
+ fi
+
+ cp -f --backup --suffix=_old $volfile $volloc/thin-arbiter.vol
+ volfile_set_brick_path "$tapath"
+
+ echo "Directory path to be used for thin-arbiter volume is: $tapath"
+ echo " "
+ echo "========================================================"
+
+ if [ -f /usr/lib/systemd/system/gluster-ta-volume.service ]; then
+ echo "Starting thin-arbiter process"
+ else
+ cp $MYPATH/../systemd/gluster-ta-volume.service /etc/systemd/system/
+ echo "Starting thin-arbiter process"
+ chmod 0644 /etc/systemd/system/gluster-ta-volume.service
+ fi
+
+ systemctl daemon-reload
+ systemctl enable gluster-ta-volume
+ systemctl stop gluster-ta-volume
+ systemctl start gluster-ta-volume
+
+ if [ $? == 0 ]; then
+ echo "thin-arbiter process has been setup and running"
+ else
+ echo "Failed to setup thin arbiter"
+ exit 1
+ fi
+
+}
+
+main()
+{
+
+ if [ "$#" -ne 1 ]; then
+ help
+ exit 0
+ fi
+
+ while getopts "sh" opt; do
+ case $opt in
+ h)
+ help
+ exit 0
+ ;;
+ s)
+ setup
+ exit 0
+ ;;
+ *)
+ help
+ exit 0
+ ;;
+ esac
+ done
+}
+
+main "$@"
diff --git a/extras/thin-arbiter/thin-arbiter.vol b/extras/thin-arbiter/thin-arbiter.vol
new file mode 100644
index 00000000000..c76babc7b3c
--- /dev/null
+++ b/extras/thin-arbiter/thin-arbiter.vol
@@ -0,0 +1,57 @@
+volume ta-posix
+ type storage/posix
+ option directory /mnt/thin-arbiter
+end-volume
+
+volume ta-thin-arbiter
+ type features/thin-arbiter
+ subvolumes ta-posix
+end-volume
+
+volume ta-locks
+ type features/locks
+ option notify-contention yes
+ subvolumes ta-thin-arbiter
+end-volume
+
+volume ta-upcall
+ type features/upcall
+ option cache-invalidation off
+ subvolumes ta-locks
+end-volume
+
+volume ta-io-threads
+ type performance/io-threads
+ subvolumes ta-upcall
+end-volume
+
+volume ta-index
+ type features/index
+ option xattrop-pending-watchlist trusted.afr.ta-
+ option xattrop-dirty-watchlist trusted.afr.dirty
+ option index-base /mnt/thin-arbiter/.glusterfs/indices
+ subvolumes ta-io-threads
+end-volume
+
+volume /mnt/thin-arbiter
+ type debug/io-stats
+ option count-fop-hits off
+ option latency-measurement off
+ option unique-id /mnt/thin-arbiter
+ subvolumes ta-index
+end-volume
+
+volume ta-server
+ type protocol/server
+ option transport.listen-backlog 10
+ option transport.socket.keepalive-count 9
+ option transport.socket.keepalive-interval 2
+ option transport.socket.keepalive-time 20
+ option transport.tcp-user-timeout 0
+ option transport.socket.keepalive 1
+ option auth.addr./mnt/thin-arbiter.allow *
+ option auth-path /mnt/thin-arbiter
+ option transport.address-family inet
+ option transport-type tcp
+ subvolumes /mnt/thin-arbiter
+end-volume
diff --git a/extras/volfilter.py b/extras/volfilter.py
new file mode 100644
index 00000000000..5558a1beff4
--- /dev/null
+++ b/extras/volfilter.py
@@ -0,0 +1,168 @@
+# Copyright (c) 2010-2011 Red Hat, Inc.
+#
+# This file is part of HekaFS.
+#
+# HekaFS is free software: you can redistribute it and/or modify it under the
+# terms of the GNU General Public License, version 3, as published by the Free
+# Software Foundation.
+#
+# HekaFS is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
+# A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License * along
+# with HekaFS. If not, see <http://www.gnu.org/licenses/>.
+
+from __future__ import print_function
+import copy
+import string
+import sys
+import types
+
+good_xlators = [
+ "cluster/afr",
+ "cluster/dht",
+ "cluster/distribute",
+ "cluster/replicate",
+ "cluster/stripe",
+ "debug/io-stats",
+ "features/access-control",
+ "features/locks",
+ "features/marker",
+ "features/uidmap",
+ "performance/io-threads",
+ "protocol/client",
+ "protocol/server",
+ "storage/posix",
+]
+
+def copy_stack (old_xl, suffix, recursive=False):
+ if recursive:
+ new_name = old_xl.name + "-" + suffix
+ else:
+ new_name = suffix
+ new_xl = Translator(new_name)
+ new_xl.type = old_xl.type
+ # The results with normal assignment here are . . . amusing.
+ new_xl.opts = copy.deepcopy(old_xl.opts)
+ for sv in old_xl.subvols:
+ new_xl.subvols.append(copy_stack(sv, suffix, True))
+ # Patch up the path at the bottom.
+ if new_xl.type == "storage/posix":
+ new_xl.opts["directory"] += ("/" + suffix)
+ return new_xl
+
+def cleanup (parent, graph):
+ if parent.type in good_xlators:
+ # Temporary fix so that HekaFS volumes can use the
+ # SSL-enabled multi-threaded socket transport.
+ if parent.type == "protocol/server":
+ parent.type = "protocol/server2"
+ parent.opts["transport-type"] = "ssl"
+ elif parent.type == "protocol/client":
+ parent.type = "protocol/client2"
+ parent.opts["transport-type"] = "ssl"
+ sv = []
+ for child in parent.subvols:
+ sv.append(cleanup(child, graph))
+ parent.subvols = sv
+ else:
+ parent = cleanup(parent.subvols[0], graph)
+ return parent
+
+class Translator:
+ def __init__ (self, name):
+ self.name = name
+ self.type = ""
+ self.opts = {}
+ self.subvols = []
+ self.dumped = False
+ def __repr__ (self):
+ return "<Translator %s>" % self.name
+
+def load (path):
+ # If it's a string, open it; otherwise, assume it's already a
+ # file-like object (most notably from urllib*).
+ if type(path) in (str,):
+ fp = file(path, "r")
+ else:
+ fp = path
+ all_xlators = {}
+ xlator = None
+ last_xlator = None
+ while True:
+ text = fp.readline()
+ if text == "":
+ break
+ text = text.split()
+ if not len(text):
+ continue
+ if text[0] == "volume":
+ if xlator:
+ raise RuntimeError("nested volume definition")
+ xlator = Translator(text[1])
+ continue
+ if not xlator:
+ raise RuntimeError("text outside volume definition")
+ if text[0] == "type":
+ xlator.type = text[1]
+ continue
+ if text[0] == "option":
+ xlator.opts[text[1]] = ''.join(text[2:])
+ continue
+ if text[0] == "subvolumes":
+ for sv in text[1:]:
+ xlator.subvols.append(all_xlators[sv])
+ continue
+ if text[0] == "end-volume":
+ all_xlators[xlator.name] = xlator
+ last_xlator = xlator
+ xlator = None
+ continue
+ raise RuntimeError("unrecognized keyword %s" % text[0])
+ if xlator:
+ raise RuntimeError("unclosed volume definition")
+ return all_xlators, last_xlator
+
+def generate (graph, last, stream=sys.stdout):
+ for sv in last.subvols:
+ if not sv.dumped:
+ generate(graph, sv, stream)
+ print("", file=stream)
+ sv.dumped = True
+ print("volume %s" % last.name, file=stream)
+ print(" type %s" % last.type, file=stream)
+ for k, v in last.opts.items():
+ print(" option %s %s" % (k, v), file=stream)
+ if last.subvols:
+ print(" subvolumes %s" % ''.join(
+ [ sv.name for sv in last.subvols ]), file=stream)
+ print("end-volume", file=stream)
+
+def push_filter (graph, old_xl, filt_type, opts={}):
+ suffix = "-" + old_xl.type.split("/")[1]
+ if len(old_xl.name) > len(suffix):
+ if old_xl.name[-len(suffix):] == suffix:
+ old_xl.name = old_xl.name[:-len(suffix)]
+ new_xl = Translator(old_xl.name+suffix)
+ new_xl.type = old_xl.type
+ new_xl.opts = old_xl.opts
+ new_xl.subvols = old_xl.subvols
+ graph[new_xl.name] = new_xl
+ old_xl.name += ("-" + filt_type.split("/")[1])
+ old_xl.type = filt_type
+ old_xl.opts = opts
+ old_xl.subvols = [new_xl]
+ graph[old_xl.name] = old_xl
+
+def delete (graph, victim):
+ if len(victim.subvols) != 1:
+ raise RuntimeError("attempt to delete non-unary translator")
+ for xl in graph.itervalues():
+ while xl.subvols.count(victim):
+ i = xl.subvols.index(victim)
+ xl.subvols[i] = victim.subvols[0]
+
+if __name__ == "__main__":
+ graph, last = load(sys.argv[1])
+ generate(graph, last)
diff --git a/extras/who-wrote-glusterfs/gitdm.aliases b/extras/who-wrote-glusterfs/gitdm.aliases
new file mode 100644
index 00000000000..901c12418e3
--- /dev/null
+++ b/extras/who-wrote-glusterfs/gitdm.aliases
@@ -0,0 +1,58 @@
+#
+# This is the email aliases file, mapping secondary addresses onto a single,
+# canonical address. This file should probably match the contents of .mailmap
+# in the root of the git repository.
+#
+# Format: <alias> <real>
+
+amar@gluster.com amarts@redhat.com
+amar@del.gluster.com amarts@redhat.com
+avati@amp.gluster.com avati@redhat.com
+avati@blackhole.gluster.com avati@redhat.com
+avati@dev.gluster.com avati@redhat.com
+avati@gluster.com avati@redhat.com
+wheelear@gmail.com awheeler@redhat.com
+anush@gluster.com ashetty@redhat.com
+csaba@gluster.com csaba@redhat.com
+csaba@lowlife.hu csaba@redhat.com
+csaba@zresearch.com csaba@redhat.com
+gd@samba.org gd@redhat.com
+harsha@gluster.com fharshav@redhat.com
+harsha@zresearch.com fharshav@redhat.com
+harsha@dev.gluster.com fharshav@redhat.com
+harsha@harshavardhana.net fharshav@redhat.com
+jclift@redhat.com jclift@gluster.org
+kkeithle@linux.keithley.org kkeithle@redhat.com
+kkeithle@f16node1.kkeithle.usersys.redhat.com kkeithle@redhat.com
+kaushal@gluster.com kaushal@redhat.com
+kaushikbv@gluster.com kbudiger@redhat.com
+krishna@gluster.com ksriniva@redhat.com
+krishna@zresearch.com ksriniva@redhat.com
+krishna@guest-laptop ksriniva@redhat.com
+kp@gluster.com kparthas@redhat.com
+me@louiszuckerman.com louiszuckerman@gmail.com
+msvbhat@gmail.com vbhat@redhat.com
+nullpai@gmail.com ppai@redhat.com
+vishwanath@gluster.com vbhat@redhat.com
+obnox@samba.org madam@redhat.com
+oleksandr@natalenko.name o.natalenko@lanet.ua
+patrick@puiterwijk.org puiterwijk@fedoraproject.org
+pavan@dev.gluster.com pavan@gluster.com
+zaitcev@yahoo.com zaitcev@kotori.zaitcev.us
+pranithk@gluster.com pkarampu@redhat.com
+raghavendrabhat@gluster.com raghavendra@redhat.com
+raghavendra@gluster.com rgowdapp@redhat.com
+raghavendra@zresearch.com rgowdapp@redhat.com
+rahulcssjce@gmail.com rahulcs@redhat.com
+rajesh@gluster.com rajesh@redhat.com
+rajesh.amaravathi@gmail.com rajesh@redhat.com
+root@ravi2.(none) ravishankar@redhat.com
+sabansal@localhost.localdomain sabansal@redhat.com
+shehjart@zresearch.com shehjart@gluster.com
+venky@gluster.com vshankar@redhat.com
+vijay@gluster.com vbellur@redhat.com
+vijay@dev.gluster.com vbellur@redhat.com
+vijaykumar.koppad@gmail.com vkoppad@redhat.com
+vikas@zresearch.com vikas@gluster.com
+shishirng@gluster.com sgowda@redhat.com
+potatogim@potatogim.net potatogim@gluesys.com
diff --git a/extras/who-wrote-glusterfs/gitdm.config b/extras/who-wrote-glusterfs/gitdm.config
new file mode 100644
index 00000000000..e1ff2bd5bda
--- /dev/null
+++ b/extras/who-wrote-glusterfs/gitdm.config
@@ -0,0 +1,8 @@
+#
+# This is the gitdm configuration file for GlusterFS.
+# See the gitdm.config in the gitdm repositofy for additional options and
+# comments.
+#
+
+EmailAliases gitdm.aliases
+EmailMap gitdm.domain-map
diff --git a/extras/who-wrote-glusterfs/gitdm.domain-map b/extras/who-wrote-glusterfs/gitdm.domain-map
new file mode 100644
index 00000000000..7cd2bbd605b
--- /dev/null
+++ b/extras/who-wrote-glusterfs/gitdm.domain-map
@@ -0,0 +1,29 @@
+#
+# Here is a set of mappings of domain names onto employer names.
+#
+active.by ActiveCloud
+appeartv.com Appear TV
+cern.ch CERN
+cmss.chinamobile.com China Mobile(Suzhou) Software Technology
+datalab.es DataLab S.L.
+fb.com Facebook
+fedoraproject.org Fedora Project
+gluster.com Red Hat
+gmail.com (personal contributions)
+gooddata.com GoodData
+hastexo.com hastexo
+horde.com (personal contributions)
+ibm.com IBM
+io.com IO
+lanet.ua Lanet Network
+linbit.com LINBIT
+nectec.or.th NECTEC
+netbsd.org NetBSD
+netdirect.ca Net Direct
+nokia.com Nokia
+redhat.com Red Hat
+stepping-stone.ch stepping stone GmbH
+xtaotech.com XTAO Co.
+yahoo.in (personal contributions)
+zresearch.com Red Hat
+gluesys.com Gluesys
diff --git a/extras/who-wrote-glusterfs/who-wrote-glusterfs.sh b/extras/who-wrote-glusterfs/who-wrote-glusterfs.sh
new file mode 100755
index 00000000000..70d5964c576
--- /dev/null
+++ b/extras/who-wrote-glusterfs/who-wrote-glusterfs.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+#
+# Gather statistics on "Who wrote GlusterFS". The idea comes from the excellent
+# articles on http://lwn.net/ named "Who wrote <linux-version>?".
+#
+# gitdm comes from git://git.lwn.net/gitdm.git by Jonathan Corbet.
+#
+# Confguration files used:
+# - gitdm.config: main configuration file, pointing to the others
+# - gitdm.aliases: merge users with different emailaddresses into one
+# - gitdm.domain-map: map domain names from emailaddresses to companies
+#
+
+DIRNAME=$(dirname $0)
+
+GITDM_REPO=git://git.lwn.net/gitdm.git
+GITDM_DIR=${DIRNAME}/gitdm
+GITDM_CMD="python ${GITDM_DIR}/gitdm"
+
+error()
+{
+ local ret=${?}
+ printf "${@}\n" > /dev/stderr
+ return ${ret}
+}
+
+check_gitdm()
+{
+ if [ ! -e "${GITDM_DIR}/gitdm" ]
+ then
+ git clone --quiet ${GITDM_REPO} ${DIRNAME}/gitdm
+ fi
+}
+
+# The first argument is the revision-range (see 'git rev-list --help').
+# REV can be empty, and the statistics will be calculated over the whole
+# current branch.
+REV=${1}
+shift
+# all remaining options are passed to gitdm, see the gitdm script for an
+# explanation of the accepted options.
+GITDM_OPTS=${@}
+
+if ! check_gitdm
+then
+ error "Could not find 'gitdm', exiting..."
+ exit 1
+fi
+
+git log --numstat -M ${REV} | ${GITDM_CMD} -b ${DIRNAME} -n ${GITDM_OPTS}
diff --git a/geo-replication/Makefile.am b/geo-replication/Makefile.am
new file mode 100644
index 00000000000..591b23d0eaf
--- /dev/null
+++ b/geo-replication/Makefile.am
@@ -0,0 +1,8 @@
+SUBDIRS = syncdaemon src
+
+CLEANFILES =
+
+EXTRA_DIST = gsyncd.conf.in
+
+gsyncdconfdir = $(sysconfdir)/glusterfs/
+gsyncdconf_DATA = gsyncd.conf
diff --git a/geo-replication/gsyncd.conf.in b/geo-replication/gsyncd.conf.in
new file mode 100644
index 00000000000..9688c79fab7
--- /dev/null
+++ b/geo-replication/gsyncd.conf.in
@@ -0,0 +1,349 @@
+[__meta__]
+version = 4.0
+
+[master-bricks]
+configurable=false
+
+[slave-bricks]
+configurable=false
+
+[master-volume-id]
+configurable=false
+
+[slave-volume-id]
+configurable=false
+
+[master-replica-count]
+configurable=false
+type=int
+value=1
+
+[master-disperse-count]
+configurable=false
+type=int
+value=1
+
+[master-distribution-count]
+configurable=false
+type=int
+value=1
+
+[glusterd-workdir]
+value = @GLUSTERD_WORKDIR@
+
+[gluster-logdir]
+value = /var/log/glusterfs
+
+[gluster-rundir]
+value = /var/run/gluster
+
+[gsyncd-miscdir]
+value = /var/lib/misc/gluster/gsyncd
+
+[stime-xattr-prefix]
+value=
+
+[checkpoint]
+value=0
+help=Set Checkpoint
+validation=unixtime
+type=int
+
+[gluster-cli-options]
+value=
+help=Gluster CLI Options
+
+[pid-file]
+value=${gluster_rundir}/gsyncd-${master}-${primary_slave_host}-${slavevol}.pid
+configurable=false
+template = true
+help=PID file path
+
+[state-file]
+value=${glusterd_workdir}/geo-replication/${master}_${primary_slave_host}_${slavevol}/monitor.status
+configurable=false
+template=true
+help=Status File path
+
+[georep-session-working-dir]
+value=${glusterd_workdir}/geo-replication/${master}_${primary_slave_host}_${slavevol}/
+template=true
+help=Session Working directory
+configurable=false
+
+[access-mount]
+value=false
+type=bool
+validation=bool
+help=Do not lazy unmount the master volume. This allows admin to access the mount for debugging.
+
+[slave-access-mount]
+value=false
+type=bool
+validation=bool
+help=Do not lazy unmount the slave volume. This allows admin to access the mount for debugging.
+
+[isolated-slaves]
+value=
+help=List of Slave nodes which are isolated
+
+[changelog-batch-size]
+# Max size of Changelogs to process per batch, Changelogs Processing is
+# not limited by the number of changelogs but instead based on
+# size of the changelog file, One sample changelog file size was 145408
+# with ~1000 CREATE and ~1000 DATA. 5 such files in one batch is 727040
+# If geo-rep worker crashes while processing a batch, it has to retry only
+# that batch since stime will get updated after each batch.
+value=727040
+help=Max size of Changelogs to process per batch.
+type=int
+
+[slave-timeout]
+value=120
+type=int
+help=Timeout in seconds for Slave Gsyncd. If no activity from master for this timeout, Slave gsyncd will be disconnected. Set Timeout to zero to skip this check.
+
+[connection-timeout]
+value=60
+type=int
+help=Timeout for mounts
+
+[replica-failover-interval]
+value=1
+type=int
+help=Minimum time interval in seconds for passive worker to become Active
+
+[changelog-archive-format]
+value=%Y%m
+help=Processed changelogs will be archived in working directory. Pattern for archive file
+
+[use-meta-volume]
+value=false
+type=bool
+help=Use this to set Active Passive mode to meta-volume.
+
+[meta-volume-mnt]
+value=/run/gluster/shared_storage
+help=Meta Volume or Shared Volume mount path
+
+[allow-network]
+value=
+
+[change-interval]
+value=5
+type=int
+
+[sync-method]
+value=rsync
+help=Sync method for data sync. Available methods are tar over ssh and rsync. Default is rsync.
+validation=choice
+allowed_values=tarssh,rsync
+
+[remote-gsyncd]
+value =
+help=If SSH keys are not secured with gsyncd prefix then use this configuration to set the actual path of gsyncd(Usually /usr/libexec/glusterfs/gsyncd)
+
+[gluster-command-dir]
+value=@SBIN_DIR@
+help=Directory where Gluster binaries exist on master
+
+[slave-gluster-command-dir]
+value=@SBIN_DIR@
+help=Directory where Gluster binaries exist on slave
+
+[gluster-params]
+value = aux-gfid-mount acl
+help=Parameters for Gluster Geo-rep mount in Master
+
+[slave-gluster-params]
+value = aux-gfid-mount acl
+help=Parameters for Gluster Geo-rep mount in Slave
+
+[ignore-deletes]
+value = false
+type=bool
+help=Do not sync deletes in Slave
+
+[special-sync-mode]
+# tunables for failover/failback mechanism:
+# None - gsyncd behaves as normal
+# blind - gsyncd works with xtime pairs to identify
+# candidates for synchronization
+# wrapup - same as normal mode but does not assign
+# xtimes to orphaned files
+# see crawl() for usage of the above tunables
+value =
+help=
+
+[gfid-conflict-resolution]
+value = true
+validation=bool
+type=bool
+help=Disables automatic gfid conflict resolution while syncing
+
+[working-dir]
+value = ${gsyncd_miscdir}/${master}_${primary_slave_host}_${slavevol}/
+template=true
+configurable=false
+help=Working directory for storing Changelogs
+
+[change-detector]
+value=changelog
+help=Change detector
+validation=choice
+allowed_values=changelog,xsync
+
+[cli-log-file]
+value=${gluster_logdir}/geo-replication/cli.log
+template=true
+configurable=false
+
+[cli-log-level]
+value=INFO
+help=Set CLI Log Level
+validation=choice
+allowed_values=ERROR,INFO,WARNING,DEBUG
+
+[log-file]
+value=${gluster_logdir}/geo-replication/${master}_${primary_slave_host}_${slavevol}/gsyncd.log
+configurable=false
+template=true
+
+[changelog-log-file]
+value=${gluster_logdir}/geo-replication/${master}_${primary_slave_host}_${slavevol}/changes-${local_id}.log
+configurable=false
+template=true
+
+[gluster-log-file]
+value=${gluster_logdir}/geo-replication/${master}_${primary_slave_host}_${slavevol}/mnt-${local_id}.log
+template=true
+configurable=false
+
+[slave-log-file]
+value=${gluster_logdir}/geo-replication-slaves/${master}_${primary_slave_host}_${slavevol}/gsyncd.log
+template=true
+configurable=false
+
+[slave-gluster-log-file]
+value=${gluster_logdir}/geo-replication-slaves/${master}_${primary_slave_host}_${slavevol}/mnt-${master_node}-${master_brick_id}.log
+template=true
+configurable=false
+
+[slave-gluster-log-file-mbr]
+value=${gluster_logdir}/geo-replication-slaves/${master}_${primary_slave_host}_${slavevol}/mnt-mbr-${master_node}-${master_brick_id}.log
+template=true
+configurable=false
+
+[log-level]
+value=INFO
+help=Set Log Level
+validation=choice
+allowed_values=ERROR,INFO,WARNING,DEBUG
+
+[gluster-log-level]
+value=INFO
+help=Set Gluster mount Log Level
+validation=choice
+allowed_values=ERROR,INFO,WARNING,DEBUG
+
+[changelog-log-level]
+value=INFO
+help=Set Changelog Log Level
+validation=choice
+allowed_values=ERROR,INFO,WARNING,DEBUG
+
+[slave-log-level]
+value=INFO
+help=Set Slave Gsyncd Log Level
+validation=choice
+allowed_values=ERROR,INFO,WARNING,DEBUG
+
+[slave-gluster-log-level]
+value=INFO
+help=Set Slave Gluster mount Log Level
+validation=choice
+allowed_values=ERROR,INFO,WARNING,DEBUG
+
+[ssh-port]
+value=22
+validation=minmax
+min=1
+max=65535
+help=Set SSH port
+type=int
+
+[ssh-command]
+value=ssh
+help=Set ssh binary path
+validation=execpath
+
+[tar-command]
+value=tar
+help=Set tar command path
+validation=execpath
+
+[ssh-options]
+value = -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i ${glusterd_workdir}/geo-replication/secret.pem
+template=true
+
+[ssh-options-tar]
+value = -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i ${glusterd_workdir}/geo-replication/tar_ssh.pem
+template=true
+
+[gluster-command]
+value=gluster
+help=Set gluster binary path
+validation=execpath
+
+[sync-jobs]
+value=3
+help=Number of Syncer jobs
+validation=minmax
+min=1
+max=100
+type=int
+
+[rsync-command]
+value=rsync
+help=Set rsync command path
+validation=execpath
+
+[rsync-options]
+value=
+
+[rsync-ssh-options]
+value=
+
+[rsync-opt-ignore-missing-args]
+value=true
+type=bool
+
+[rsync-opt-existing]
+value=true
+type=bool
+
+[log-rsync-performance]
+value=false
+help=Log Rsync performance
+validation=bool
+type=bool
+
+[use-rsync-xattrs]
+value=false
+type=bool
+
+[sync-xattrs]
+value=true
+type=bool
+
+[sync-acls]
+value=true
+type=bool
+
+[max-rsync-retries]
+value=10
+type=int
+
+[state_socket_unencoded]
+# Unused, For backward compatibility
+value=
diff --git a/geo-replication/setup.py b/geo-replication/setup.py
new file mode 100644
index 00000000000..0eae469d2d6
--- /dev/null
+++ b/geo-replication/setup.py
@@ -0,0 +1,32 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+"""
+This setup.py only used to run tests, since geo-replication will
+be installed in /usr/local/libexec/glusterfs or /usr/libexec/glusterfs
+"""
+from setuptools import setup
+
+name = 'syncdaemon'
+
+setup(
+ name=name,
+ version="",
+ description='GlusterFS Geo Replication',
+ license='GPLV2 and LGPLV3+',
+ author='Red Hat, Inc.',
+ author_email='gluster-devel@gluster.org',
+ url='http://www.gluster.org',
+ packages=[name, ],
+ test_suite='nose.collector',
+ install_requires=[],
+ scripts=[],
+ entry_points={},
+)
diff --git a/geo-replication/src/Makefile.am b/geo-replication/src/Makefile.am
new file mode 100644
index 00000000000..9937a0bd026
--- /dev/null
+++ b/geo-replication/src/Makefile.am
@@ -0,0 +1,48 @@
+gsyncddir = $(GLUSTERFS_LIBEXECDIR)
+
+gsyncd_SCRIPTS = gverify.sh peer_gsec_create \
+ set_geo_rep_pem_keys.sh peer_mountbroker peer_mountbroker.py \
+ peer_georep-sshkey.py
+
+# peer_gsec_create and peer_add_secret_pub are not added to
+# EXTRA_DIST as it's derived from a .in file
+EXTRA_DIST = gverify.sh set_geo_rep_pem_keys.sh peer_mountbroker.py.in \
+ peer_georep-sshkey.py.in
+
+gsyncd_PROGRAMS = gsyncd
+
+gsyncd_SOURCES = gsyncd.c procdiggy.c
+
+gsyncd_LDADD = $(GF_LDADD) $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+gsyncd_LDFLAGS = $(GF_LDFLAGS)
+
+noinst_HEADERS = procdiggy.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -DGSYNCD_PREFIX=\"$(GLUSTERFS_LIBEXECDIR)\" -DUSE_LIBGLUSTERFS \
+ -DSBIN_DIR=\"$(sbindir)\" -DPYTHON=\"$(PYTHON)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
+$(top_builddir)/libglusterfs/src/libglusterfs.la:
+ $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
+
+
+install-exec-hook:
+ $(mkdir_p) $(DESTDIR)$(sbindir)
+ rm -f $(DESTDIR)$(sbindir)/gluster-mountbroker
+ ln -s $(GLUSTERFS_LIBEXECDIR)/peer_mountbroker.py \
+ $(DESTDIR)$(sbindir)/gluster-mountbroker
+
+ rm -f $(DESTDIR)$(sbindir)/gluster-georep-sshkey
+ ln -s $(GLUSTERFS_LIBEXECDIR)/peer_georep-sshkey.py \
+ $(DESTDIR)$(sbindir)/gluster-georep-sshkey
+
+
+uninstall-hook:
+ rm -f $(DESTDIR)$(sbindir)/gluster-mountbroker
+ rm -f $(DESTDIR)$(sbindir)/gluster-georep-sshkey
diff --git a/geo-replication/src/gsyncd.c b/geo-replication/src/gsyncd.c
new file mode 100644
index 00000000000..b5aeec5bf33
--- /dev/null
+++ b/geo-replication/src/gsyncd.c
@@ -0,0 +1,402 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/compat.h>
+#include <glusterfs/syscall.h>
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/param.h> /* for PATH_MAX */
+
+/* NOTE (USE_LIBGLUSTERFS):
+ * ------------------------
+ * When USE_LIBGLUSTERFS debugging sumbol is passed; perform
+ * glusterfs translator like initialization so that glusterfs
+ * globals, contexts are valid when glustefs api's are invoked.
+ * We unconditionally pass then while building gsyncd binary.
+ */
+#ifdef USE_LIBGLUSTERFS
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/globals.h>
+#include <glusterfs/defaults.h>
+#endif
+
+#include <glusterfs/common-utils.h>
+#include <glusterfs/run.h>
+#include "procdiggy.h"
+
+#define _GLUSTERD_CALLED_ "_GLUSTERD_CALLED_"
+#define _GSYNCD_DISPATCHED_ "_GSYNCD_DISPATCHED_"
+#define GSYNCD_CONF_TEMPLATE "geo-replication/gsyncd_template.conf"
+#define GSYNCD_PY "gsyncd.py"
+#define RSYNC "rsync"
+
+int restricted = 0;
+
+static int
+duplexpand(void **buf, size_t tsiz, size_t *len)
+{
+ size_t osiz = tsiz * *len;
+ char *p = realloc(*buf, osiz << 1);
+ if (!p) {
+ return -1;
+ }
+
+ memset(p + osiz, 0, osiz);
+ *buf = p;
+ *len <<= 1;
+
+ return 0;
+}
+
+static int
+str2argv(char *str, char ***argv)
+{
+ char *p = NULL;
+ char *savetok = NULL;
+ char *temp = NULL;
+ char *temp1 = NULL;
+ int argc = 0;
+ size_t argv_len = 32;
+ int ret = 0;
+ int i = 0;
+
+ assert(str);
+ temp = str = strdup(str);
+ if (!str)
+ goto error;
+
+ *argv = calloc(argv_len, sizeof(**argv));
+ if (!*argv)
+ goto error;
+
+ while ((p = strtok_r(str, " ", &savetok))) {
+ str = NULL;
+
+ argc++;
+ if (argc == argv_len) {
+ ret = duplexpand((void *)argv, sizeof(**argv), &argv_len);
+ if (ret == -1)
+ goto error;
+ }
+ temp1 = strdup(p);
+ if (!temp1)
+ goto error;
+ (*argv)[argc - 1] = temp1;
+ }
+
+ free(temp);
+ return argc;
+
+error:
+ fprintf(stderr, "out of memory\n");
+ free(temp);
+ for (i = 0; i < argc - 1; i++)
+ free((*argv)[i]);
+ free(*argv);
+ return -1;
+}
+
+static int
+invoke_gsyncd(int argc, char **argv)
+{
+ int i = 0;
+ int j = 0;
+ char *nargv[argc + 4];
+ char *python = NULL;
+
+ if (chdir("/") == -1)
+ goto error;
+
+ j = 0;
+ python = getenv("PYTHON");
+ if (!python)
+ python = PYTHON;
+ nargv[j++] = python;
+ nargv[j++] = GSYNCD_PREFIX "/python/syncdaemon/" GSYNCD_PY;
+ for (i = 1; i < argc; i++)
+ nargv[j++] = argv[i];
+
+ nargv[j++] = NULL;
+
+ execvp(python, nargv);
+
+ fprintf(stderr, "exec of '%s' failed\n", python);
+ return 127;
+
+error:
+ fprintf(stderr, "gsyncd initializaion failed\n");
+ return 1;
+}
+
+static int
+find_gsyncd(pid_t pid, pid_t ppid, char *name, void *data)
+{
+ char buf[NAME_MAX * 2] = {
+ 0,
+ };
+ char path[PATH_MAX] = {
+ 0,
+ };
+ char *p = NULL;
+ int zeros = 0;
+ int ret = 0;
+ int fd = -1;
+ pid_t *pida = (pid_t *)data;
+
+ if (ppid != pida[0])
+ return 0;
+
+ snprintf(path, sizeof path, PROC "/%d/cmdline", pid);
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return 0;
+ ret = sys_read(fd, buf, sizeof(buf));
+ sys_close(fd);
+ if (ret == -1)
+ return 0;
+ for (zeros = 0, p = buf; zeros < 2 && p < buf + ret; p++)
+ zeros += !*p;
+
+ ret = 0;
+ switch (zeros) {
+ case 2:
+ if ((strcmp(basename(buf), basename(PYTHON)) ||
+ strcmp(basename(buf + strlen(buf) + 1), GSYNCD_PY)) == 0) {
+ ret = 1;
+ break;
+ }
+ /* fallthrough */
+ case 1:
+ if (strcmp(basename(buf), GSYNCD_PY) == 0)
+ ret = 1;
+ }
+
+ if (ret == 1) {
+ if (pida[1] != -1) {
+ fprintf(stderr, GSYNCD_PY " sibling is not unique");
+ return -1;
+ }
+ pida[1] = pid;
+ }
+
+ return 0;
+}
+
+static int
+invoke_rsync(int argc, char **argv)
+{
+ int i = 0;
+ char path[PATH_MAX] = {
+ 0,
+ };
+ pid_t pid = -1;
+ pid_t ppid = -1;
+ pid_t pida[] = {-1, -1};
+ char *name = NULL;
+ char buf[PATH_MAX + 1] = {
+ 0,
+ };
+ int ret = 0;
+
+ assert(argv[argc] == NULL);
+
+ if (argc < 2 || strcmp(argv[1], "--server") != 0)
+ goto error;
+
+ for (i = 2; i < argc && argv[i][0] == '-'; i++)
+ ;
+
+ if (!(i == argc - 2 && strcmp(argv[i], ".") == 0 &&
+ argv[i + 1][0] == '/')) {
+ fprintf(stderr, "need an rsync invocation without protected args\n");
+ goto error;
+ }
+
+ /* look up sshd we are spawned from */
+ for (pid = getpid();; pid = ppid) {
+ ppid = pidinfo(pid, &name);
+ if (ppid < 0) {
+ fprintf(stderr, "sshd ancestor not found\n");
+ goto error;
+ }
+ if (strcmp(name, "sshd") == 0) {
+ GF_FREE(name);
+ break;
+ }
+ GF_FREE(name);
+ }
+ /* look up "ssh-sibling" gsyncd */
+ pida[0] = pid;
+ ret = prociter(find_gsyncd, pida);
+ if (ret == -1 || pida[1] == -1) {
+ fprintf(stderr, "gsyncd sibling not found\n");
+ goto error;
+ }
+ /* check if rsync target matches gsyncd target */
+ snprintf(path, sizeof path, PROC "/%d/cwd", pida[1]);
+ ret = sys_readlink(path, buf, sizeof(buf));
+ if (ret == -1 || ret == sizeof(buf))
+ goto error;
+ if (strcmp(argv[argc - 1], "/") == 0 /* root dir cannot be a target */ ||
+ (strcmp(argv[argc - 1], path) /* match against gluster target */ &&
+ strcmp(argv[argc - 1], buf) /* match against file target */) != 0) {
+ fprintf(stderr, "rsync target does not match " GEOREP " session\n");
+ goto error;
+ }
+
+ argv[0] = RSYNC;
+
+ execvp(RSYNC, argv);
+
+ fprintf(stderr, "exec of " RSYNC " failed\n");
+ return 127;
+
+error:
+ fprintf(stderr, "disallowed " RSYNC " invocation\n");
+ return 1;
+}
+
+static int
+invoke_gluster(int argc, char **argv)
+{
+ int i = 0;
+ int j = 0;
+ int optsover = 0;
+ char *ov = NULL;
+
+ for (i = 1; i < argc; i++) {
+ ov = strtail(argv[i], "--");
+ if (ov && !optsover) {
+ if (*ov == '\0')
+ optsover = 1;
+ continue;
+ }
+ switch (++j) {
+ case 1:
+ if (strcmp(argv[i], "volume") != 0)
+ goto error;
+ break;
+ case 2:
+ if (strcmp(argv[i], "info") != 0)
+ goto error;
+ break;
+ case 3:
+ break;
+ default:
+ goto error;
+ }
+ }
+
+ argv[0] = "gluster";
+ execvp(SBIN_DIR "/gluster", argv);
+ fprintf(stderr, "exec of gluster failed\n");
+ return 127;
+
+error:
+ fprintf(stderr, "disallowed gluster invocation\n");
+ return 1;
+}
+
+struct invocable {
+ char *name;
+ int (*invoker)(int argc, char **argv);
+};
+
+struct invocable invocables[] = {{"rsync", invoke_rsync},
+ {"gsyncd", invoke_gsyncd},
+ {"gluster", invoke_gluster},
+ {NULL, NULL}};
+
+int
+main(int argc, char **argv)
+{
+ int ret = -1;
+ char *evas = NULL;
+ struct invocable *i = NULL;
+ char *b = NULL;
+ char *sargv = NULL;
+ int j = 0;
+
+#ifdef USE_LIBGLUSTERFS
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = glusterfs_ctx_new();
+ if (!ctx)
+ return ENOMEM;
+
+ if (glusterfs_globals_init(ctx))
+ return 1;
+
+ THIS->ctx = ctx;
+ ret = default_mem_acct_init(THIS);
+ if (ret) {
+ fprintf(stderr, "internal error: mem accounting failed\n");
+ return 1;
+ }
+#endif
+
+ evas = getenv(_GLUSTERD_CALLED_);
+ if (evas && strcmp(evas, "1") == 0)
+ /* OK, we know glusterd called us, no need to look for further config
+ *...although this conclusion should not inherit to our children
+ */
+ unsetenv(_GLUSTERD_CALLED_);
+ else {
+ /* we regard all gsyncd invocations unsafe
+ * that do not come from glusterd and
+ * therefore restrict it
+ */
+ restricted = 1;
+
+ if (!getenv(_GSYNCD_DISPATCHED_)) {
+ evas = getenv("SSH_ORIGINAL_COMMAND");
+ if (evas)
+ sargv = evas;
+ else {
+ evas = getenv("SHELL");
+ if (evas && strcmp(basename(evas), "gsyncd") == 0 &&
+ argc == 3 && strcmp(argv[1], "-c") == 0)
+ sargv = argv[2];
+ }
+ }
+ }
+
+ if (!(sargv && restricted))
+ return invoke_gsyncd(argc, argv);
+
+ argc = str2argv(sargv, &argv);
+
+ if (argc == -1) {
+ fprintf(stderr, "internal error\n");
+ return 1;
+ }
+
+ if (setenv(_GSYNCD_DISPATCHED_, "1", 1) == -1) {
+ fprintf(stderr, "internal error\n");
+ goto out;
+ }
+
+ b = basename(argv[0]);
+ for (i = invocables; i->name; i++) {
+ if (strcmp(b, i->name) == 0)
+ return i->invoker(argc, argv);
+ }
+
+ fprintf(stderr, "invoking %s in restricted SSH session is not allowed\n",
+ b);
+
+out:
+ for (j = 1; j < argc; j++)
+ free(argv[j]);
+ free(argv);
+ return 1;
+}
diff --git a/geo-replication/src/gverify.sh b/geo-replication/src/gverify.sh
new file mode 100755
index 00000000000..f5f70d245e0
--- /dev/null
+++ b/geo-replication/src/gverify.sh
@@ -0,0 +1,276 @@
+#!/bin/bash
+
+# Script to verify the Master and Slave Gluster compatibility.
+# To use ./gverify <master volume> <slave user> <slave host> <slave volume> <ssh port> <log file>
+# Returns 0 if master and slave compatible.
+
+# Considering buffer_size 100MB
+BUFFER_SIZE=104857600;
+SSH_PORT=$5;
+master_log_file=`gluster --print-logdir`/geo-replication/gverify-mastermnt.log
+slave_log_file=`gluster --print-logdir`/geo-replication/gverify-slavemnt.log
+
+function SSHM()
+{
+ if [[ -z "${GR_SSH_IDENTITY_KEY}" ]]; then
+ ssh -p ${SSH_PORT} -q \
+ -oPasswordAuthentication=no \
+ -oStrictHostKeyChecking=no \
+ -oControlMaster=yes \
+ "$@";
+ else
+ ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -q \
+ -oPasswordAuthentication=no \
+ -oStrictHostKeyChecking=no \
+ -oControlMaster=yes \
+ "$@";
+ fi
+}
+
+function get_inode_num()
+{
+ local os
+ case `uname -s` in
+ NetBSD) os="NetBSD";;
+ Linux) os="Linux";;
+ *) os="Default";;
+ esac
+
+ if [[ "X$os" = "XNetBSD" ]]; then
+ echo $(stat -f "%i" "$1")
+ else
+ echo $(stat -c "%i" "$1")
+ fi
+}
+
+function umount_lazy()
+{
+ local os
+ case `uname -s` in
+ NetBSD) os="NetBSD";;
+ Linux) os="Linux";;
+ *) os="Default";;
+ esac
+
+ if [[ "X$os" = "XNetBSD" ]]; then
+ umount -f -R "$1"
+ else
+ umount -l "$1"
+ fi;
+}
+
+function disk_usage()
+{
+ local os
+ case `uname -s` in
+ NetBSD) os="NetBSD";;
+ Linux) os="Linux";;
+ *) os="Default";;
+ esac
+
+ if [[ "X$os" = "XNetBSD" ]]; then
+ echo $(df -P "$1" | tail -1)
+ else
+ echo $(df -P -B1 "$1" | tail -1)
+ fi;
+
+}
+
+function cmd_slave()
+{
+ local cmd_line;
+ cmd_line=$(cat <<EOF
+function do_verify() {
+ver=\$(gluster --version | head -1 | cut -f2 -d " ");
+echo \$ver;
+};
+source /etc/profile && do_verify;
+EOF
+);
+
+echo $cmd_line;
+}
+
+function master_stats()
+{
+ MASTERVOL=$1;
+ local inet6=$2;
+ local d;
+ local i;
+ local disk_size;
+ local used_size;
+ local ver;
+ local m_status;
+
+ d=$(mktemp -d -t ${0##*/}.XXXXXX 2>/dev/null);
+ if [ "$inet6" = "inet6" ]; then
+ glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --xlator-option="transport.address-family=inet6" --volfile-id $MASTERVOL -l $master_log_file $d;
+ else
+ glusterfs -s localhost --xlator-option="*dht.lookup-unhashed=off" --volfile-id $MASTERVOL -l $master_log_file $d;
+ fi
+
+ i=$(get_inode_num $d);
+ if [[ "$i" -ne "1" ]]; then
+ echo 0:0;
+ exit 1;
+ fi;
+ cd $d;
+ disk_size=$(disk_usage $d | awk "{print \$2}");
+ used_size=$(disk_usage $d | awk "{print \$3}");
+ umount_lazy $d;
+ rmdir $d;
+ ver=$(gluster --version | head -1 | cut -f2 -d " ");
+ m_status=$(echo "$disk_size:$used_size:$ver");
+ echo $m_status
+}
+
+
+function slave_stats()
+{
+ SLAVEUSER=$1;
+ SLAVEHOST=$2;
+ SLAVEVOL=$3;
+ local inet6=$4;
+ local cmd_line;
+ local ver;
+ local status;
+
+ d=$(mktemp -d -t ${0##*/}.XXXXXX 2>/dev/null);
+ if [ "$inet6" = "inet6" ]; then
+ glusterfs --xlator-option="*dht.lookup-unhashed=off" --xlator-option="transport.address-family=inet6" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d;
+ else
+ glusterfs --xlator-option="*dht.lookup-unhashed=off" --volfile-server $SLAVEHOST --volfile-id $SLAVEVOL -l $slave_log_file $d;
+ fi
+
+ i=$(get_inode_num $d);
+ if [[ "$i" -ne "1" ]]; then
+ echo 0:0;
+ exit 1;
+ fi;
+ cd $d;
+ disk_size=$(disk_usage $d | awk "{print \$2}");
+ used_size=$(disk_usage $d | awk "{print \$3}");
+ no_of_files=$(find $d -maxdepth 1 -path "$d/.trashcan" -prune -o -path "$d" -o -print0 -quit);
+ umount_lazy $d;
+ rmdir $d;
+
+ cmd_line=$(cmd_slave);
+ ver=`SSHM $SLAVEUSER@$SLAVEHOST bash -c "'$cmd_line'"`;
+ status=$disk_size:$used_size:$ver:$no_of_files;
+ echo $status
+}
+
+function ping_host ()
+{
+ ### Use bash internal socket support
+ {
+ exec 100<>/dev/tcp/$1/$2
+ if [ $? -ne '0' ]; then
+ return 1;
+ else
+ exec 100>&-
+ return 0;
+ fi
+ } 1>&2 2>/dev/null
+}
+
+function main()
+{
+ log_file=$6
+ > $log_file
+
+ inet6=$7
+ local cmd_line
+ local ver
+
+ # Use FORCE_BLOCKER flag in the error message to differentiate
+ # between the errors which the force command should bypass
+
+ # Test tcp connection to port 22, this is necessary since `ping`
+ # does not work on all environments where 'ssh' is allowed but
+ # ICMP is filterd
+
+ ping_host $3 ${SSH_PORT}
+
+ if [ $? -ne 0 ]; then
+ echo "FORCE_BLOCKER|$3 not reachable." > $log_file
+ exit 1;
+ fi;
+
+ if [[ -z "${GR_SSH_IDENTITY_KEY}" ]]; then
+ ssh -p ${SSH_PORT} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "echo Testing_Passwordless_SSH";
+ else
+ ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 "echo Testing_Passwordless_SSH";
+ fi
+
+ if [ $? -ne 0 ]; then
+ echo "FORCE_BLOCKER|Passwordless ssh login has not been setup with $3 for user $2." > $log_file
+ exit 1;
+ fi;
+
+ cmd_line=$(cmd_slave);
+ if [[ -z "${GR_SSH_IDENTITY_KEY}" ]]; then
+ ver=$(ssh -p ${SSH_PORT} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 bash -c "'$cmd_line'")
+ else
+ ver=$(ssh -p ${SSH_PORT} -i ${GR_SSH_IDENTITY_KEY} -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $2@$3 bash -c "'$cmd_line'")
+ fi
+
+ if [ -z "$ver" ]; then
+ echo "FORCE_BLOCKER|gluster command not found on $3 for user $2." > $log_file
+ exit 1;
+ fi;
+
+ ERRORS=0;
+ master_data=$(master_stats $1 ${inet6});
+ slave_data=$(slave_stats $2 $3 $4 ${inet6});
+ master_disk_size=$(echo $master_data | cut -f1 -d':');
+ slave_disk_size=$(echo $slave_data | cut -f1 -d':');
+ master_used_size=$(echo $master_data | cut -f2 -d':');
+ slave_used_size=$(echo $slave_data | cut -f2 -d':');
+ master_version=$(echo $master_data | cut -f3 -d':');
+ slave_version=$(echo $slave_data | cut -f3 -d':');
+ slave_no_of_files=$(echo $slave_data | cut -f4 -d':');
+
+ if [[ "x$master_disk_size" = "x" || "x$master_version" = "x" || "$master_disk_size" -eq "0" ]]; then
+ echo "FORCE_BLOCKER|Unable to mount and fetch master volume details. Please check the log: $master_log_file" > $log_file;
+ exit 1;
+ fi;
+
+ if [[ "x$slave_disk_size" = "x" || "x$slave_version" = "x" || "$slave_disk_size" -eq "0" ]]; then
+ echo "FORCE_BLOCKER|Unable to mount and fetch slave volume details. Please check the log: $slave_log_file" > $log_file;
+ exit 1;
+ fi;
+
+ # The above checks are mandatory and force command should be blocked
+ # if they fail. The checks below can be bypassed if force option is
+ # provided hence no FORCE_BLOCKER flag.
+
+ if [ "$slave_disk_size" -lt "$master_disk_size" ]; then
+ echo "Total disk size of master is greater than disk size of slave." >> $log_file;
+ ERRORS=$(($ERRORS + 1));
+ fi
+
+ effective_master_used_size=$(( $master_used_size + $BUFFER_SIZE ))
+ slave_available_size=$(( $slave_disk_size - $slave_used_size ))
+ master_available_size=$(( $master_disk_size - $effective_master_used_size ));
+
+ if [ "$slave_available_size" -lt "$master_available_size" ]; then
+ echo "Total available size of master is greater than available size of slave" >> $log_file;
+ ERRORS=$(($ERRORS + 1));
+ fi
+
+ if [ ! -z $slave_no_of_files ]; then
+ echo "$3::$4 is not empty. Please delete existing files in $3::$4 and retry, or use force to continue without deleting the existing files." >> $log_file;
+ ERRORS=$(($ERRORS + 1));
+ fi;
+
+ if [[ $master_version != $slave_version ]]; then
+ echo "Gluster version mismatch between master and slave. Master version: $master_version Slave version: $slave_version" >> $log_file;
+ ERRORS=$(($ERRORS + 1));
+ fi;
+
+ exit $ERRORS;
+}
+
+
+main "$@";
diff --git a/geo-replication/src/peer_georep-sshkey.py.in b/geo-replication/src/peer_georep-sshkey.py.in
new file mode 100644
index 00000000000..58696e9a616
--- /dev/null
+++ b/geo-replication/src/peer_georep-sshkey.py.in
@@ -0,0 +1,116 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+"""
+Usage:
+ gluster-georep-sshkey generate
+ or
+ gluster-georep-sshkey generate --no-prefix
+
+Generates two SSH keys(one for gsyncd access and other for tar) in all
+peer nodes and collects the public keys to the local node where it is
+initiated. Adds `command=` prefix to common_secret.pem.pub if `--no-prefix`
+argument is not passed.
+"""
+import os
+import glob
+
+from gluster.cliutils import (node_output_ok, execute, execute_in_peers,
+ Cmd, runcli)
+from prettytable import PrettyTable
+
+
+SECRET_PEM = "@GLUSTERD_WORKDIR@/geo-replication/secret.pem"
+TAR_SSH_PEM = "@GLUSTERD_WORKDIR@/geo-replication/tar_ssh.pem"
+GSYNCD_CMD = 'command="@GLUSTERFS_LIBEXECDIR@/gsyncd" '
+TAR_CMD = 'command="tar ${SSH_ORIGINAL_COMMAND#* }" '
+COMMON_SECRET_FILE = "@GLUSTERD_WORKDIR@/geo-replication/common_secret.pem.pub"
+
+
+class NodeGenCmd(Cmd):
+ name = "node-generate"
+
+ def args(self, parser):
+ parser.add_argument("no_prefix")
+
+ def run(self, args):
+ # Regenerate if secret.pem.pub not exists
+ if not os.path.exists(SECRET_PEM + ".pub"):
+ # Cleanup old files
+ for f in glob.glob(SECRET_PEM + "*"):
+ os.remove(f)
+
+ execute(["ssh-keygen", "-N", "", "-f", SECRET_PEM])
+
+ # Regenerate if ssh_tar.pem.pub not exists
+ if not os.path.exists(TAR_SSH_PEM + ".pub"):
+ # Cleanup old files
+ for f in glob.glob(TAR_SSH_PEM + "*"):
+ os.remove(f)
+
+ execute(["ssh-keygen", "-N", "", "-f", TAR_SSH_PEM])
+
+ # Add required prefixes if prefix is not "container"
+ prefix_secret_pem_pub = ""
+ prefix_tar_ssh_pem_pub = ""
+ if args.no_prefix != "no-prefix":
+ prefix_secret_pem_pub = GSYNCD_CMD
+ prefix_tar_ssh_pem_pub = TAR_CMD
+
+ data = {"default_pub": "", "tar_pub": ""}
+ with open(SECRET_PEM + ".pub") as f:
+ data["default_pub"] = prefix_secret_pem_pub + f.read().strip()
+
+ with open(TAR_SSH_PEM + ".pub") as f:
+ data["tar_pub"] = prefix_tar_ssh_pem_pub + f.read().strip()
+
+ node_output_ok(data)
+
+
+def color_status(value):
+ if value in ["UP", "OK"]:
+ return "green"
+ return "red"
+
+
+class GenCmd(Cmd):
+ name = "generate"
+
+ def args(self, parser):
+ parser.add_argument("--no-prefix", help="Do not use prefix in "
+ "generated pub keys", action="store_true")
+
+ def run(self, args):
+ prefix = "no-prefix" if args.no_prefix else "."
+ out = execute_in_peers("node-generate", [prefix])
+
+ common_secrets = []
+ table = PrettyTable(["NODE", "NODE STATUS", "KEYGEN STATUS"])
+ table.align["NODE STATUS"] = "r"
+ table.align["KEYGEN STATUS"] = "r"
+ for p in out:
+ if p.ok:
+ common_secrets.append(p.output["default_pub"])
+ common_secrets.append(p.output["tar_pub"])
+
+ table.add_row([p.hostname,
+ "UP" if p.node_up else "DOWN",
+ "OK" if p.ok else "NOT OK: {0}".format(
+ p.error)])
+
+ with open(COMMON_SECRET_FILE, "w") as f:
+ f.write("\n".join(common_secrets) + "\n")
+
+ print (table)
+
+
+if __name__ == "__main__":
+ runcli()
diff --git a/geo-replication/src/peer_gsec_create.in b/geo-replication/src/peer_gsec_create.in
new file mode 100755
index 00000000000..6d4a4847013
--- /dev/null
+++ b/geo-replication/src/peer_gsec_create.in
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libexecdir=@libexecdir@
+
+if [ ! -f "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub ]; then
+ \rm -rf "$GLUSTERD_WORKDIR"/geo-replication/secret.pem*
+ ssh-keygen -N '' -f "$GLUSTERD_WORKDIR"/geo-replication/secret.pem > /dev/null
+fi
+
+if [ ! -f "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub ]; then
+ \rm -rf "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem*
+ ssh-keygen -N '' -f "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem > /dev/null
+fi
+
+if [ "Xcontainer" = "X$1" ]; then
+ output1=`cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub`
+ output2=`cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub`
+else
+ output1=`echo command=\"${libexecdir}/glusterfs/gsyncd\" ""``cat "$GLUSTERD_WORKDIR"/geo-replication/secret.pem.pub`
+ output2=`echo command=\"tar \$\{SSH_ORIGINAL_COMMAND#* \}\" ""``cat "$GLUSTERD_WORKDIR"/geo-replication/tar_ssh.pem.pub`
+fi
+echo -e "$output1\n$output2"
diff --git a/geo-replication/src/peer_mountbroker.in b/geo-replication/src/peer_mountbroker.in
new file mode 100644
index 00000000000..8ecf38ded41
--- /dev/null
+++ b/geo-replication/src/peer_mountbroker.in
@@ -0,0 +1,211 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+
+import os
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+import json
+import sys
+
+PROG_DESCRIPTION = """
+GlusterFS Mountbroker user management
+"""
+
+args = None
+
+
+def ok(message=""):
+ if (not args and "-j" in sys.argv) or (args and args.json):
+ print(json.dumps({"ok": True, "message": message}))
+ else:
+ if message:
+ print(message)
+
+ sys.exit(0)
+
+
+def notok(message=""):
+ if (not args and "-j" in sys.argv) or (args and args.json):
+ print(json.dumps({"ok": False, "message": message}))
+ else:
+ print("error: %s" % message)
+
+ # Always return zero due to limitation while executing
+ # as `gluster system:: execute`
+ sys.exit(0)
+
+
+class NoStdErrParser(ArgumentParser):
+ """
+ with gluster system:: execute, stderr gives
+ "Unable to end. Error : Bad file descriptor" error,
+ so deriving new class, prints error message and
+ exits with zero.
+ """
+ def error(self, message):
+ notok(message)
+
+
+class MountbrokerUserMgmt(object):
+ def __init__(self, volfile):
+ self.volfile = volfile
+ self._options = {}
+ self.commented_lines = []
+ self._parse()
+
+ def _parse(self):
+ with open(self.volfile, "r") as f:
+ for line in f:
+ line = line.strip()
+ if line.startswith("option "):
+ key, value = line.split(" ")[1:]
+ self._options[key] = value
+ if line.startswith("#"):
+ self.commented_lines.append(line)
+
+ def _get_write_data(self):
+ op = "volume management\n"
+ op += " type mgmt/glusterd\n"
+ for k, v in self._options.items():
+ op += " option %s %s\n" % (k, v)
+ for line in self.commented_lines:
+ op += " %s\n" % line
+ op += "end-volume"
+ return op
+
+ def save(self):
+ with open(self.volfile + "_tmp", "w") as f:
+ f.write(self._get_write_data())
+ f.flush()
+ os.fsync(f.fileno())
+ os.rename(self.volfile + "_tmp", self.volfile)
+
+ def set_opt(self, key, value):
+ self._options[key] = value.strip()
+
+ def remove_opt(self, key):
+ if key in self._options:
+ del(self._options[key])
+
+ def add_user(self, user, volumes):
+ vols = set()
+ for k, v in self._options.items():
+ if k.startswith("mountbroker-geo-replication.") \
+ and user == k.split(".")[-1]:
+ vols.update(v.split(","))
+
+ vols.update(volumes)
+ self.set_opt("mountbroker-geo-replication.%s" % user,
+ ",".join(vols))
+
+ def remove_volume(self, user, volumes):
+ vols = set()
+ for k, v in self._options.items():
+ if k.startswith("mountbroker-geo-replication.") \
+ and user == k.split(".")[-1]:
+ vols.update(v.split(","))
+
+ for v1 in volumes:
+ vols.discard(v1)
+
+ if vols:
+ self.set_opt("mountbroker-geo-replication.%s" % user,
+ ",".join(vols))
+ else:
+ self.remove_opt("mountbroker-geo-replication.%s" % user)
+
+ def remove_user(self, user):
+ self.remove_opt("mountbroker-geo-replication.%s" % user)
+
+ def info(self):
+ data = {"users": []}
+
+ for k, v in self._options.items():
+ if k.startswith("mountbroker-geo-replication."):
+ data["users"].append(
+ {"name": k.split(".")[-1], "volumes": v.split(",")}
+ )
+ else:
+ data[k] = v
+
+ return data
+
+
+def format_info(data):
+ op = "%s %s\n" % ("Option".ljust(50), "Value".ljust(50))
+ op += ("-" * 101) + "\n"
+ for key, value in data.items():
+ if key != "users":
+ op += "%s %s\n" % (key.ljust(50), value)
+
+ op += "\nUsers: %s\n" % ("None" if not data["users"] else "")
+ for user in data["users"]:
+ op += "%s: %s\n" % (user["name"], ", ".join(user["volumes"]))
+ op += "\n\n"
+ return op
+
+
+def _get_args():
+ parser = NoStdErrParser(formatter_class=RawDescriptionHelpFormatter,
+ description=PROG_DESCRIPTION)
+
+ parser.add_argument('-j', dest="json", help="JSON output",
+ action="store_true")
+ subparsers = parser.add_subparsers(title='subcommands', dest='cmd')
+ parser_useradd = subparsers.add_parser('user')
+ parser_userdel = subparsers.add_parser('userdel')
+ parser_volumedel = subparsers.add_parser('volumedel')
+ subparsers.add_parser('info')
+ parser_opt = subparsers.add_parser('opt')
+ parser_optdel = subparsers.add_parser('optdel')
+
+ parser_useradd.add_argument('username', help="Username", type=str)
+ parser_useradd.add_argument('volumes', type=str, default='',
+ help="Volumes list. ',' separated")
+
+ parser_volumedel.add_argument('username', help="Username", type=str)
+ parser_volumedel.add_argument('volumes', type=str, default='',
+ help="Volumes list. ',' separated")
+
+ parser_userdel.add_argument('username', help="Username", type=str)
+
+ parser_opt.add_argument('opt_name', help="Name", type=str)
+ parser_opt.add_argument('opt_value', help="Value", type=str)
+
+ parser_optdel.add_argument('opt_name', help="Name", type=str)
+
+ return parser.parse_args()
+
+
+def main():
+ global args
+ args = _get_args()
+
+ m = MountbrokerUserMgmt("@GLUSTERD_VOLFILE@")
+
+ if args.cmd == "opt":
+ m.set_opt(args.opt_name, args.opt_value)
+ elif args.cmd == "optdel":
+ m.remove_opt(args.opt_name)
+ elif args.cmd == "userdel":
+ m.remove_user(args.username)
+ elif args.cmd == "user":
+ volumes = [v.strip() for v in args.volumes.split(",")
+ if v.strip() != ""]
+ m.add_user(args.username, volumes)
+ elif args.cmd == "volumedel":
+ volumes = [v.strip() for v in args.volumes.split(",")
+ if v.strip() != ""]
+ m.remove_volume(args.username, volumes)
+ elif args.cmd == "info":
+ info = m.info()
+ if not args.json:
+ info = format_info(info)
+ ok(info)
+
+ if args.cmd != "info":
+ m.save()
+ ok()
+
+if __name__ == "__main__":
+ main()
diff --git a/geo-replication/src/peer_mountbroker.py.in b/geo-replication/src/peer_mountbroker.py.in
new file mode 100644
index 00000000000..40b90ffc560
--- /dev/null
+++ b/geo-replication/src/peer_mountbroker.py.in
@@ -0,0 +1,401 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+
+import os
+from errno import EEXIST, ENOENT
+
+from gluster.cliutils import (execute, Cmd, node_output_ok,
+ node_output_notok, execute_in_peers,
+ runcli, oknotok)
+from prettytable import PrettyTable
+
+LOG_DIR = "@localstatedir@/log/glusterfs/geo-replication-slaves"
+CLI_LOG = "@localstatedir@/log/glusterfs/cli.log"
+GEOREP_DIR = "@GLUSTERD_WORKDIR@/geo-replication"
+GLUSTERD_VOLFILE = "@GLUSTERD_VOLFILE@"
+
+
+class MountbrokerUserMgmt(object):
+ def __init__(self, volfile):
+ self.volfile = volfile
+ self._options = {}
+ self.commented_lines = []
+ self.user_volumes = {}
+ self._parse()
+
+ def _parse(self):
+ """ Example glusterd.vol
+ volume management
+ type mgmt/glusterd
+ option working-directory /var/lib/glusterd
+ option transport-type socket,rdma
+ option transport.socket.keepalive-time 10
+ option transport.socket.keepalive-interval 2
+ option transport.socket.read-fail-log off
+ option rpc-auth-allow-insecure on
+ option ping-timeout 0
+ option event-threads 1
+ # option base-port 49152
+ option mountbroker-root /var/mountbroker-root
+ option mountbroker-geo-replication.user1 vol1,vol2,vol3
+ option geo-replication-log-group geogroup
+ option rpc-auth-allow-insecure on
+ end-volume
+ """
+ with open(self.volfile, "r") as f:
+ for line in f:
+ line = line.strip()
+ if line.startswith("option "):
+ key, value = line.split()[1:]
+ self._options[key] = value
+ if line.startswith("#"):
+ self.commented_lines.append(line)
+
+ for k, v in self._options.items():
+ if k.startswith("mountbroker-geo-replication."):
+ user = k.split(".")[-1]
+ self.user_volumes[user] = set(v.split(","))
+
+ def get_group(self):
+ return self._options.get("geo-replication-log-group", None)
+
+ def _get_write_data(self):
+ op = "volume management\n"
+ op += " type mgmt/glusterd\n"
+ for k, v in self._options.items():
+ if k.startswith("mountbroker-geo-replication."):
+ # Users will be added seperately
+ continue
+
+ op += " option %s %s\n" % (k, v)
+
+ for k, v in self.user_volumes.items():
+ if v:
+ op += (" option mountbroker-geo-replication."
+ "%s %s\n" % (k, ",".join(v)))
+
+ for line in self.commented_lines:
+ op += " %s\n" % line
+
+ op += "end-volume"
+ return op
+
+ def save(self):
+ with open(self.volfile + "_tmp", "w") as f:
+ f.write(self._get_write_data())
+ f.flush()
+ os.fsync(f.fileno())
+ os.rename(self.volfile + "_tmp", self.volfile)
+
+ def set_mount_root_and_group(self, mnt_root, group):
+ self._options["mountbroker-root"] = mnt_root
+ self._options["geo-replication-log-group"] = group
+
+ def add(self, volume, user):
+ user_volumes = self.user_volumes.get(user, None)
+
+ if user_volumes is not None and volume in user_volumes:
+ # User and Volume already exists
+ return
+
+ if user_volumes is None:
+ # User not exists
+ self.user_volumes[user] = set()
+
+ self.user_volumes[user].add(volume)
+
+ def remove(self, volume=None, user=None):
+ if user is not None:
+ if volume is None:
+ self.user_volumes[user] = set()
+ else:
+ try:
+ self.user_volumes.get(user, set()).remove(volume)
+ except KeyError:
+ pass
+ else:
+ if volume is None:
+ return
+
+ for k, v in self.user_volumes.items():
+ try:
+ self.user_volumes[k].remove(volume)
+ except KeyError:
+ pass
+
+ def info(self):
+ # Convert Volumes set into Volumes list
+ users = {}
+ for k, v in self.user_volumes.items():
+ users[k] = list(v)
+
+ data = {
+ "mountbroker-root": self._options.get("mountbroker-root", "None"),
+ "geo-replication-log-group": self._options.get(
+ "geo-replication-log-group", ""),
+ "users": users
+ }
+
+ return data
+
+
+class NodeSetup(Cmd):
+ # Test if group exists using `getent group <grp>`
+ # and then group add using `groupadd <grp>`
+ # chgrp -R <grp> /var/log/glusterfs/geo-replication-slaves
+ # chgrp -R <grp> /var/lib/glusterd/geo-replication
+ # chmod -R 770 /var/log/glusterfs/geo-replication-slaves
+ # chmod 770 /var/lib/glusterd/geo-replication
+ # mkdir -p <mnt_root>
+ # chmod 0711 <mnt_root>
+ # If selinux,
+ # semanage fcontext -a -e /home /var/mountbroker-root
+ # restorecon -Rv /var/mountbroker-root
+ name = "node-setup"
+
+ def args(self, parser):
+ parser.add_argument("mount_root")
+ parser.add_argument("group")
+
+ def run(self, args):
+ m = MountbrokerUserMgmt(GLUSTERD_VOLFILE)
+
+ try:
+ os.makedirs(args.mount_root)
+ except OSError as e:
+ if e.errno == EEXIST:
+ pass
+ else:
+ node_output_notok("Unable to Create {0}".format(
+ args.mount_root))
+
+ execute(["chmod", "0711", args.mount_root])
+ try:
+ execute(["semanage", "fcontext", "-a", "-e",
+ "/home", args.mount_root])
+ except OSError as e:
+ if e.errno == ENOENT:
+ pass
+ else:
+ node_output_notok(
+ "Unable to run semanage: {0}".format(e))
+
+ try:
+ execute(["restorecon", "-Rv", args.mount_root])
+ except OSError as e:
+ if e.errno == ENOENT:
+ pass
+ else:
+ node_output_notok(
+ "Unable to run restorecon: {0}".format(e))
+
+ rc, out, err = execute(["getent", "group", args.group])
+ if rc != 0:
+ node_output_notok("User Group not exists")
+
+ execute(["chgrp", "-R", args.group, GEOREP_DIR])
+ execute(["chgrp", "-R", args.group, LOG_DIR])
+ execute(["chgrp", args.group, CLI_LOG])
+ execute(["chmod", "770", GEOREP_DIR])
+ execute(["find", LOG_DIR, "-type", "d", "-exec", "chmod", "770", "{}",
+ "+"])
+ execute(["find", LOG_DIR, "-type", "f", "-exec", "chmod", "660", "{}",
+ "+"])
+ execute(["chmod", "660", CLI_LOG])
+
+ m.set_mount_root_and_group(args.mount_root, args.group)
+ m.save()
+
+ node_output_ok()
+
+
+def color_status(value):
+ if value.lower() in ("up", "ok", "yes"):
+ return "green"
+ else:
+ return "red"
+
+
+class CliSetup(Cmd):
+ # gluster-mountbroker setup <MOUNT_ROOT> <GROUP>
+ name = "setup"
+
+ def args(self, parser):
+ parser.add_argument("mount_root",
+ help="Path to the mountbroker-root directory.")
+ parser.add_argument("group",
+ help="Group to be used for setup.")
+
+ def run(self, args):
+ out = execute_in_peers("node-setup", [args.mount_root,
+ args.group])
+ table = PrettyTable(["NODE", "NODE STATUS", "SETUP STATUS"])
+ table.align["NODE STATUS"] = "r"
+ table.align["SETUP STATUS"] = "r"
+ for p in out:
+ table.add_row([p.hostname,
+ "UP" if p.node_up else "DOWN",
+ "OK" if p.ok else "NOT OK: {0}".format(
+ p.error)])
+
+ print(table)
+
+
+class NodeStatus(Cmd):
+ # Check if Group exists
+ # Check if user exists
+ # Check directory permission /var/log/glusterfs/geo-replication-slaves
+ # and /var/lib/glusterd/geo-replication
+ # Check mount root and its permissions
+ # Check glusterd.vol file for user, group, dir existance
+ name = "node-status"
+
+ def run(self, args):
+ m = MountbrokerUserMgmt(GLUSTERD_VOLFILE)
+ data = m.info()
+ data["group_exists"] = False
+ data["path_exists"] = False
+
+ rc, out, err = execute(["getent", "group",
+ data["geo-replication-log-group"]])
+
+ if rc == 0:
+ data["group_exists"] = True
+
+ if os.path.exists(data["mountbroker-root"]):
+ data["path_exists"] = True
+
+ node_output_ok(data)
+
+
+class CliStatus(Cmd):
+ # gluster-mountbroker status
+ name = "status"
+
+ def run(self, args):
+ out = execute_in_peers("node-status")
+ table = PrettyTable(["NODE", "NODE STATUS", "MOUNT ROOT",
+ "GROUP", "USERS"])
+ table.align["NODE STATUS"] = "r"
+
+ for p in out:
+ node_data = p.output
+ if node_data == "" or node_data == "N/A":
+ node_data = {}
+
+ users_row_data = ""
+ for k, v in node_data.get("users", {}).items():
+ users_row_data += "{0}({1}) ".format(k, ", ".join(v))
+
+ if not users_row_data:
+ users_row_data = "None"
+
+ mount_root = node_data.get("mountbroker-root", "None")
+ if mount_root != "None":
+ mount_root += "({0})".format(oknotok(
+ node_data.get("path_exists", False)))
+
+ grp = node_data.get("geo-replication-log-group", "None")
+ if grp != "None":
+ grp += "({0})".format(oknotok(
+ node_data.get("group_exists", False)))
+
+ table.add_row([p.hostname,
+ "UP" if p.node_up else "DOWN",
+ mount_root,
+ grp,
+ users_row_data])
+
+ print(table)
+
+
+class NodeAdd(Cmd):
+ # useradd -m -g <grp> <usr>
+ # useradd to glusterd.vol
+ name = "node-add"
+
+ def args(self, parser):
+ parser.add_argument("volume")
+ parser.add_argument("user")
+
+ def run(self, args):
+ m = MountbrokerUserMgmt(GLUSTERD_VOLFILE)
+ grp = m.get_group()
+ if grp is None:
+ node_output_notok("Group is not available")
+
+ m.add(args.volume, args.user)
+ m.save()
+ node_output_ok()
+
+
+class CliAdd(Cmd):
+ # gluster-mountbroker add <VOLUME> <USER>
+ name = "add"
+
+ def args(self, parser):
+ parser.add_argument("volume",
+ help="Volume to be added.")
+ parser.add_argument("user",
+ help="User for which volume is to be added.")
+
+ def run(self, args):
+ out = execute_in_peers("node-add", [args.volume,
+ args.user])
+ table = PrettyTable(["NODE", "NODE STATUS", "ADD STATUS"])
+ table.align["NODE STATUS"] = "r"
+ table.align["ADD STATUS"] = "r"
+
+ for p in out:
+ table.add_row([p.hostname,
+ "UP" if p.node_up else "DOWN",
+ "OK" if p.ok else "NOT OK: {0}".format(
+ p.error)])
+
+ print(table)
+
+
+class NodeRemove(Cmd):
+ # userremove from glusterd.vol file
+ name = "node-remove"
+
+ def args(self, parser):
+ parser.add_argument("volume")
+ parser.add_argument("user")
+
+ def run(self, args):
+ m = MountbrokerUserMgmt(GLUSTERD_VOLFILE)
+ volume = None if args.volume == "." else args.volume
+ user = None if args.user == "." else args.user
+ m.remove(volume=volume, user=user)
+ m.save()
+ node_output_ok()
+
+
+class CliRemove(Cmd):
+ # gluster-mountbroker remove --volume <VOLUME> --user <USER>
+ name = "remove"
+
+ def args(self, parser):
+ parser.add_argument("--volume", default=".", help="Volume to be removed.")
+ parser.add_argument("--user", default=".",
+ help="User for which volume has to be removed.")
+
+ def run(self, args):
+ out = execute_in_peers("node-remove", [args.volume,
+ args.user])
+ table = PrettyTable(["NODE", "NODE STATUS", "REMOVE STATUS"])
+ table.align["NODE STATUS"] = "r"
+ table.align["REMOVE STATUS"] = "r"
+
+ for p in out:
+ table.add_row([p.hostname,
+ "UP" if p.node_up else "DOWN",
+ "OK" if p.ok else "NOT OK: {0}".format(
+ p.error)])
+
+ print(table)
+
+if __name__ == "__main__":
+ runcli()
diff --git a/geo-replication/src/procdiggy.c b/geo-replication/src/procdiggy.c
new file mode 100644
index 00000000000..8068ef79a42
--- /dev/null
+++ b/geo-replication/src/procdiggy.c
@@ -0,0 +1,136 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#include <sys/param.h> /* for PATH_MAX */
+
+#include <glusterfs/common-utils.h>
+#include <glusterfs/syscall.h>
+#include "procdiggy.h"
+
+pid_t
+pidinfo(pid_t pid, char **name)
+{
+ char buf[NAME_MAX * 2] = {
+ 0,
+ };
+ FILE *f = NULL;
+ char path[PATH_MAX] = {
+ 0,
+ };
+ char *p = NULL;
+ int ret = 0;
+ pid_t lpid = -1;
+
+ if (name)
+ *name = NULL;
+
+ snprintf(path, sizeof path, PROC "/%d/status", pid);
+
+ f = fopen(path, "r");
+ if (!f)
+ return -1;
+
+ for (;;) {
+ size_t len;
+ memset(buf, 0, sizeof(buf));
+ if (fgets(buf, sizeof(buf), f) == NULL || (len = strlen(buf)) == 0 ||
+ buf[len - 1] != '\n') {
+ lpid = -1;
+ goto out;
+ }
+ buf[len - 1] = '\0';
+
+ if (name && !*name) {
+ p = strtail(buf, "Name:");
+ if (p) {
+ while (isspace(*++p))
+ ;
+ *name = gf_strdup(p);
+ if (!*name) {
+ lpid = -2;
+ goto out;
+ }
+ continue;
+ }
+ }
+
+ p = strtail(buf, "PPid:");
+ if (p)
+ break;
+ }
+
+ while (isspace(*++p))
+ ;
+ ret = gf_string2int(p, &lpid);
+ if (ret == -1)
+ lpid = -1;
+
+out:
+ fclose(f);
+ if (lpid == -1 && name && *name)
+ GF_FREE(*name);
+ if (lpid == -2)
+ fprintf(stderr, "out of memory\n");
+ return lpid;
+}
+
+int
+prociter(int (*proch)(pid_t pid, pid_t ppid, char *tmpname, void *data),
+ void *data)
+{
+ char *name = NULL;
+ DIR *d = NULL;
+ struct dirent *de = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ pid_t pid = -1;
+ pid_t ppid = -1;
+ int ret = 0;
+
+ d = sys_opendir(PROC);
+ if (!d)
+ return -1;
+
+ for (;;) {
+ errno = 0;
+ de = sys_readdir(d, scratch);
+ if (!de || errno != 0)
+ break;
+
+ if (gf_string2int(de->d_name, &pid) != -1 && pid >= 0) {
+ ppid = pidinfo(pid, &name);
+ switch (ppid) {
+ case -1:
+ continue;
+ case -2:
+ break;
+ }
+ ret = proch(pid, ppid, name, data);
+ GF_FREE(name);
+ if (ret)
+ break;
+ }
+ }
+ sys_closedir(d);
+ if (!de && errno) {
+ fprintf(stderr, "failed to traverse " PROC " (%s)\n", strerror(errno));
+ ret = -1;
+ }
+
+ return ret;
+}
diff --git a/geo-replication/src/procdiggy.h b/geo-replication/src/procdiggy.h
new file mode 100644
index 00000000000..e17ccd31c89
--- /dev/null
+++ b/geo-replication/src/procdiggy.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifdef __NetBSD__
+#include <sys/syslimits.h>
+#endif /* __NetBSD__ */
+
+#define PROC "/proc"
+
+pid_t
+pidinfo(pid_t pid, char **name);
+
+int
+prociter(int (*proch)(pid_t pid, pid_t ppid, char *name, void *data),
+ void *data);
diff --git a/geo-replication/src/set_geo_rep_pem_keys.sh b/geo-replication/src/set_geo_rep_pem_keys.sh
new file mode 100755
index 00000000000..8a43fa39d1f
--- /dev/null
+++ b/geo-replication/src/set_geo_rep_pem_keys.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# Script to copy the pem keys from the user's home directory
+# to $GLUSTERD_WORKDIR/geo-replication and then copy
+# the keys to other nodes in the cluster and add them to the
+# respective authorized keys. The script takes as argument the
+# user name and assumes that the user will be present in all
+# the nodes in the cluster. Not to be used for root user
+
+function main()
+{
+ user=$1
+ master_vol=$2
+ slave_vol=$3
+ GLUSTERD_WORKDIR=$(gluster system:: getwd)
+
+ if [ "$user" == "" ]; then
+ echo "Please enter the user's name"
+ exit 1;
+ fi
+
+ if [ "$master_vol" == "" ]; then
+ echo "Invalid master volume name"
+ exit 1;
+ fi
+
+ if [ "$slave_vol" == "" ]; then
+ echo "Invalid slave volume name"
+ exit 1;
+ fi
+
+ COMMON_SECRET_PEM_PUB=${master_vol}_${slave_vol}_common_secret.pem.pub
+
+ if [ "$user" == "root" ]; then
+ echo "This script is not needed for root"
+ exit 1;
+ fi
+
+ home_dir=`getent passwd $user | cut -d ':' -f 6`;
+
+ if [ "$home_dir" == "" ]; then
+ echo "No user $user found"
+ exit 1;
+ fi
+
+ if [ -f $home_dir/${COMMON_SECRET_PEM_PUB} ]; then
+ cp $home_dir/${COMMON_SECRET_PEM_PUB} ${GLUSTERD_WORKDIR}/geo-replication/
+ gluster system:: copy file /geo-replication/${COMMON_SECRET_PEM_PUB}
+ gluster system:: execute add_secret_pub $user geo-replication/${master_vol}_${slave_vol}_common_secret.pem.pub
+ gluster vol set ${slave_vol} features.read-only on
+ else
+ echo "$home_dir/common_secret.pem.pub not present. Please run geo-replication command on master with push-pem option to generate the file"
+ exit 1;
+ fi
+ exit 0;
+}
+
+main "$@";
diff --git a/geo-replication/syncdaemon/Makefile.am b/geo-replication/syncdaemon/Makefile.am
new file mode 100644
index 00000000000..d70e3368faf
--- /dev/null
+++ b/geo-replication/syncdaemon/Makefile.am
@@ -0,0 +1,8 @@
+syncdaemondir = $(GLUSTERFS_LIBEXECDIR)/python/syncdaemon
+
+syncdaemon_PYTHON = rconf.py gsyncd.py __init__.py master.py README.md repce.py \
+ resource.py syncdutils.py monitor.py libcxattr.py gsyncdconfig.py \
+ libgfchangelog.py gsyncdstatus.py conf.py logutils.py \
+ subcmds.py argsupgrade.py py2py3.py
+
+CLEANFILES =
diff --git a/xlators/features/marker/utils/syncdaemon/README.md b/geo-replication/syncdaemon/README.md
index d45006932d1..5ab785ae669 100644
--- a/xlators/features/marker/utils/syncdaemon/README.md
+++ b/geo-replication/syncdaemon/README.md
@@ -11,15 +11,14 @@ Requirements are categorized according to this.
* Python >= 2.5, or 2.4 with Ctypes (see below) (both)
* OpenSSH >= 4.0 (master) / SSH2 compliant sshd (eg. openssh) (slave)
* rsync (both)
-* glusterfs with marker support (master); glusterfs (optional on slave)
-* FUSE; for supported versions consult glusterfs
+* glusterfs: with marker and changelog support (master & slave);
+* FUSE: glusterfs fuse module with auxiliary gfid based access support
INSTALLATION
------------
-As of now, the supported way of operation is running from the source directory.
+As of now, the supported way of operation is running from the source directory or using the RPMs given.
-If you use Python 2.4.x, you need to install the [Ctypes module](http://python.net/crew/theller/ctypes/).
CONFIGURATION
-------------
@@ -39,41 +38,18 @@ The config file format matches the following syntax:
<option2>: <value2>
# comment
-By default (unless specified by the option `-c`), gsyncd looks for config file at _conf/gsyncd.conf_
+By default (unless specified by the option `-c`), gsyncd looks for config file at _conf/gsyncd_template.conf_
in the source tree.
USAGE
-----
-gsyncd is a utilitly for continous mirroring, ie. it mirrors master to slave incrementally.
-Assume we have a gluster volume _pop_ at localhost. We try to set up the following mirrors
-for it with gysncd:
+gsyncd is a utilitly for continuous mirroring, ie. it mirrors master to slave incrementally.
+Assume we have a gluster volume _pop_ at localhost. We try to set up the mirroring for volume
+_pop_ using gsyncd for gluster volume _moz_ on remote machine/cluster @ example.com. The
+respective gsyncd invocations are (demoing some syntax sugaring):
-1. _/data/mirror_
-2. local gluster volume _yow_
-3. _/data/far_mirror_ at example.com
-4. gluster volume _moz_ at example.com
-
-The respective gsyncd invocations are (demoing some syntax sugaring):
-
-1.
-
- gsyncd.py gluster://localhost:pop file:///data/mirror
-
- or short form
-
- gsyncd.py :pop /data/mirror
-
-2. `gsyncd :pop :yow`
-3.
-
- gsyncd.py :pop ssh://example.com:/data/far_mirror
-
- or short form
-
- gsyncd.py :pop example.com:/data/far_mirror
-
-4. `gsyncd.py :pop example.com::moz`
+`gsyncd.py :pop example.com::moz`
gsyncd has to be available on both sides; it's location on the remote side has to be specified
via the "--remote-gsyncd" option (or "remote-gsyncd" config file parameter). (This option can also be
diff --git a/xlators/features/marker/utils/syncdaemon/__codecheck.py b/geo-replication/syncdaemon/__codecheck.py
index e3386afba8b..9437147f7d9 100644
--- a/xlators/features/marker/utils/syncdaemon/__codecheck.py
+++ b/geo-replication/syncdaemon/__codecheck.py
@@ -1,10 +1,21 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from __future__ import print_function
import os
import os.path
import sys
import tempfile
import shutil
-ipd = tempfile.mkdtemp(prefix = 'codecheck-aux')
+ipd = tempfile.mkdtemp(prefix='codecheck-aux')
try:
# add a fake ipaddr module, we don't want to
@@ -25,7 +36,7 @@ class IPNetwork(list):
if f[-3:] != '.py' or f[0] == '_':
continue
m = f[:-3]
- sys.stdout.write('importing %s ...' % m)
+ sys.stdout.write('importing %s ...' % m)
__import__(m)
print(' OK.')
@@ -33,8 +44,9 @@ class IPNetwork(list):
sys.argv = sys.argv[:1] + a
gsyncd = sys.modules['gsyncd']
- for a in [['--help'], ['--version'], ['--canonicalize-escape-url', '/foo']]:
- print('>>> invoking program with args: %s' % ' '.join(a))
+ for a in [['--help'], ['--version'],
+ ['--canonicalize-escape-url', '/foo']]:
+ print(('>>> invoking program with args: %s' % ' '.join(a)))
pid = os.fork()
if not pid:
sys_argv_set(a)
diff --git a/geo-replication/syncdaemon/__init__.py b/geo-replication/syncdaemon/__init__.py
new file mode 100644
index 00000000000..b4648b69645
--- /dev/null
+++ b/geo-replication/syncdaemon/__init__.py
@@ -0,0 +1,9 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
diff --git a/geo-replication/syncdaemon/argsupgrade.py b/geo-replication/syncdaemon/argsupgrade.py
new file mode 100644
index 00000000000..7af40633ef8
--- /dev/null
+++ b/geo-replication/syncdaemon/argsupgrade.py
@@ -0,0 +1,359 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+# Converts old style args into new style args
+
+from __future__ import print_function
+import sys
+from argparse import ArgumentParser
+import socket
+import os
+
+from syncdutils import GsyncdError
+from conf import GLUSTERD_WORKDIR
+
+
+def gethostbyname(hnam):
+ """gethostbyname wrapper"""
+ try:
+ return socket.gethostbyname(hnam)
+ except socket.gaierror:
+ ex = sys.exc_info()[1]
+ raise GsyncdError("failed to resolve %s: %s" %
+ (hnam, ex.strerror))
+
+
+def slave_url(urldata):
+ urldata = urldata.replace("ssh://", "")
+ host, vol = urldata.split("::")
+ vol = vol.split(":")[0]
+ return "%s::%s" % (host, vol)
+
+
+def init_gsyncd_template_conf():
+ path = GLUSTERD_WORKDIR + "/geo-replication/gsyncd_template.conf"
+ dname = os.path.dirname(path)
+ if not os.path.exists(dname):
+ try:
+ os.mkdir(dname)
+ except OSError:
+ pass
+
+ if not os.path.exists(path):
+ fd = os.open(path, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+
+
+def init_gsyncd_session_conf(master, slave):
+ slave = slave_url(slave)
+ master = master.strip(":")
+ slavehost, slavevol = slave.split("::")
+ slavehost = slavehost.split("@")[-1]
+
+ # Session Config File
+ path = "%s/geo-replication/%s_%s_%s/gsyncd.conf" % (
+ GLUSTERD_WORKDIR, master, slavehost, slavevol)
+
+ if os.path.exists(os.path.dirname(path)) and not os.path.exists(path):
+ fd = os.open(path, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+
+
+def init_gsyncd_conf(path):
+ dname = os.path.dirname(path)
+ if not os.path.exists(dname):
+ try:
+ os.mkdir(dname)
+ except OSError:
+ pass
+
+ if os.path.exists(dname) and not os.path.exists(path):
+ fd = os.open(path, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+
+
+def upgrade():
+ # Create dummy template conf(empty), hack to avoid glusterd
+ # fail when it does stat to check the existence.
+ init_gsyncd_template_conf()
+
+ inet6 = False
+ if "--inet6" in sys.argv:
+ inet6 = True
+
+ if "--monitor" in sys.argv:
+ # python gsyncd.py --path=/bricks/b1
+ # --monitor -c gsyncd.conf
+ # --iprefix=/var :gv1
+ # --glusterd-uuid=f26ac7a8-eb1b-4ea7-959c-80b27d3e43d0
+ # f241::gv2
+ p = ArgumentParser()
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("--glusterd-uuid")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ p.add_argument("--path", action="append")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ # Overwrite the sys.argv after rearrange
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+ sys.argv = [
+ sys.argv[0],
+ "monitor",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ "--local-node-id",
+ pargs.glusterd_uuid
+ ]
+ elif "--status-get" in sys.argv:
+ # -c gsyncd.conf --iprefix=/var :gv1 f241::gv2
+ # --status-get --path /bricks/b1
+ p = ArgumentParser()
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("-c")
+ p.add_argument("--path")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ sys.argv = [
+ sys.argv[0],
+ "status",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ "--local-path",
+ pargs.path
+ ]
+ elif "--canonicalize-url" in sys.argv:
+ # This can accept multiple URLs and converts each URL to the
+ # format ssh://USER@IP:gluster://127.0.0.1:VOLUME
+ # This format not used in gsyncd, but added for glusterd compatibility
+ p = ArgumentParser()
+ p.add_argument("--canonicalize-url", nargs="+")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ for url in pargs.canonicalize_url:
+ host, vol = url.split("::")
+ host = host.replace("ssh://", "")
+ remote_addr = host
+ if "@" not in remote_addr:
+ remote_addr = "root@" + remote_addr
+
+ user, hname = remote_addr.split("@")
+
+ if not inet6:
+ hname = gethostbyname(hname)
+
+ print(("ssh://%s@%s:gluster://127.0.0.1:%s" % (
+ user, hname, vol)))
+
+ sys.exit(0)
+ elif "--normalize-url" in sys.argv:
+ # Adds schema prefix as ssh://
+ # This format not used in gsyncd, but added for glusterd compatibility
+ p = ArgumentParser()
+ p.add_argument("--normalize-url")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+ print(("ssh://%s" % slave_url(pargs.normalize_url)))
+ sys.exit(0)
+ elif "--config-get-all" in sys.argv:
+ # -c gsyncd.conf --iprefix=/var :gv1 f241::gv2 --config-get-all
+ p = ArgumentParser()
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ sys.argv = [
+ sys.argv[0],
+ "config-get",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ "--show-defaults",
+ "--use-underscore"
+ ]
+ elif "--verify" in sys.argv and "spawning" in sys.argv:
+ # Just checks that able to spawn gsyncd or not
+ sys.exit(0)
+ elif "--slavevoluuid-get" in sys.argv:
+ # --slavevoluuid-get f241::gv2
+ p = ArgumentParser()
+ p.add_argument("--slavevoluuid-get")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+ host, vol = pargs.slavevoluuid_get.split("::")
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "voluuidget",
+ host,
+ vol
+ ]
+ elif "--config-set-rx" in sys.argv:
+ # Not required since default conf is not generated
+ # and custom conf generated only when required
+ # -c gsyncd.conf --config-set-rx remote-gsyncd
+ # /usr/local/libexec/glusterfs/gsyncd . .
+ # Touch the gsyncd.conf file and create session
+ # directory if required
+ p = ArgumentParser()
+ p.add_argument("-c", dest="config_file")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ # If not template conf then it is trying to create
+ # session config, create a empty file instead
+ if pargs.config_file.endswith("gsyncd.conf"):
+ init_gsyncd_conf(pargs.config_file)
+ sys.exit(0)
+ elif "--create" in sys.argv:
+ # To update monitor status file
+ # --create Created -c gsyncd.conf
+ # --iprefix=/var :gv1 f241::gv2
+ p = ArgumentParser()
+ p.add_argument("--create")
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "monitor-status",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ pargs.create
+ ]
+ elif "--config-get" in sys.argv:
+ # -c gsyncd.conf --iprefix=/var :gv1 f241::gv2 --config-get pid-file
+ p = ArgumentParser()
+ p.add_argument("--config-get")
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "config-get",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ "--only-value",
+ "--show-defaults",
+ "--name",
+ pargs.config_get.replace("_", "-")
+ ]
+ elif "--config-set" in sys.argv:
+ # ignore session-owner
+ if "session-owner" in sys.argv:
+ sys.exit(0)
+
+ # --path=/bricks/b1 -c gsyncd.conf :gv1 f241::gv2
+ # --config-set log_level DEBUG
+ p = ArgumentParser()
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("--config-set", action='store_true')
+ p.add_argument("name")
+ p.add_argument("--value")
+ p.add_argument("-c")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "config-set",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ "--name=%s" % pargs.name,
+ "--value=%s" % pargs.value
+ ]
+ elif "--config-check" in sys.argv:
+ # --config-check georep_session_working_dir
+ p = ArgumentParser()
+ p.add_argument("--config-check")
+ p.add_argument("-c")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "config-check",
+ pargs.config_check.replace("_", "-")
+ ]
+ elif "--config-del" in sys.argv:
+ # -c gsyncd.conf --iprefix=/var :gv1 f241::gv2 --config-del log_level
+ p = ArgumentParser()
+ p.add_argument("--config-del")
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("-c")
+ p.add_argument("--iprefix")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "config-reset",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave),
+ pargs.config_del.replace("_", "-")
+ ]
+ elif "--delete" in sys.argv:
+ # --delete -c gsyncd.conf --iprefix=/var
+ # --path-list=--path=/bricks/b1 :gv1 f241::gv2
+ p = ArgumentParser()
+ p.add_argument("--reset-sync-time", action="store_true")
+ p.add_argument("--path-list")
+ p.add_argument("master")
+ p.add_argument("slave")
+ p.add_argument("--iprefix")
+ p.add_argument("-c")
+ pargs = p.parse_known_args(sys.argv[1:])[0]
+
+ init_gsyncd_session_conf(pargs.master, pargs.slave)
+
+ paths = pargs.path_list.split("--path=")
+ paths = ["--path=%s" % x.strip() for x in paths if x.strip() != ""]
+
+ # Modified sys.argv
+ sys.argv = [
+ sys.argv[0],
+ "delete",
+ pargs.master.strip(":"),
+ slave_url(pargs.slave)
+ ]
+ sys.argv += paths
+
+ if pargs.reset_sync_time:
+ sys.argv.append("--reset-sync-time")
+
+ if inet6:
+ # Add `--inet6` as first argument
+ sys.argv = [sys.argv[0], "--inet6"] + sys.argv[1:]
diff --git a/geo-replication/syncdaemon/conf.py.in b/geo-replication/syncdaemon/conf.py.in
new file mode 100644
index 00000000000..2042fa9cdfb
--- /dev/null
+++ b/geo-replication/syncdaemon/conf.py.in
@@ -0,0 +1,17 @@
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+GLUSTERFS_LIBEXECDIR = '@GLUSTERFS_LIBEXECDIR@'
+GLUSTERD_WORKDIR = "@GLUSTERD_WORKDIR@"
+
+LOCALSTATEDIR = "@localstatedir@"
+UUID_FILE = "@GLUSTERD_WORKDIR@/glusterd.info"
+GLUSTERFS_CONFDIR = "@SYSCONF_DIR@/glusterfs"
+GCONF_VERSION = 4.0
diff --git a/geo-replication/syncdaemon/gsyncd.py b/geo-replication/syncdaemon/gsyncd.py
new file mode 100644
index 00000000000..257ed72c6ae
--- /dev/null
+++ b/geo-replication/syncdaemon/gsyncd.py
@@ -0,0 +1,325 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from argparse import ArgumentParser
+import time
+import os
+from errno import EEXIST
+import sys
+import logging
+
+from logutils import setup_logging
+import gsyncdconfig as gconf
+from rconf import rconf
+import subcmds
+from conf import GLUSTERD_WORKDIR, GLUSTERFS_CONFDIR, GCONF_VERSION
+from syncdutils import (set_term_handler, finalize, lf,
+ log_raise_exception, FreeObject, escape)
+import argsupgrade
+
+
+GSYNCD_VERSION = "gsyncd.py %s.0" % GCONF_VERSION
+
+
+def main():
+ rconf.starttime = time.time()
+
+ # If old Glusterd sends commands in old format, below function
+ # converts the sys.argv to new format. This conversion is added
+ # temporarily for backward compatibility. This can be removed
+ # once integrated with Glusterd2
+ # This modifies sys.argv globally, so rest of the code works as usual
+ argsupgrade.upgrade()
+
+ # Default argparse version handler prints to stderr, which is fixed in
+ # 3.x series but not in 2.x, using custom parser to fix this issue
+ if "--version" in sys.argv:
+ print(GSYNCD_VERSION)
+ sys.exit(0)
+
+ parser = ArgumentParser()
+ parser.add_argument("--inet6", action="store_true")
+ sp = parser.add_subparsers(dest="subcmd")
+
+ # Monitor Status File update
+ p = sp.add_parser("monitor-status")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave details user@host::vol format")
+ p.add_argument("status", help="Update Monitor Status")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+
+ # Monitor
+ p = sp.add_parser("monitor")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave details user@host::vol format")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--pause-on-start",
+ action="store_true",
+ help="Start with Paused state")
+ p.add_argument("--local-node-id", help="Local Node ID")
+ p.add_argument("--debug", action="store_true")
+ p.add_argument("--use-gconf-volinfo", action="store_true")
+
+ # Worker
+ p = sp.add_parser("worker")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave details user@host::vol format")
+ p.add_argument("--local-path", help="Local Brick Path")
+ p.add_argument("--feedback-fd", type=int,
+ help="feedback fd between monitor and worker")
+ p.add_argument("--local-node", help="Local master node")
+ p.add_argument("--local-node-id", help="Local Node ID")
+ p.add_argument("--subvol-num", type=int, help="Subvolume number")
+ p.add_argument("--is-hottier", action="store_true",
+ help="Is this brick part of hot tier")
+ p.add_argument("--resource-remote",
+ help="Remote node to connect to Slave Volume")
+ p.add_argument("--resource-remote-id",
+ help="Remote node ID to connect to Slave Volume")
+ p.add_argument("--slave-id", help="Slave Volume ID")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+
+ # Slave
+ p = sp.add_parser("slave")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave details user@host::vol format")
+ p.add_argument("--session-owner")
+ p.add_argument("--master-brick",
+ help="Master brick which is connected to the Slave")
+ p.add_argument("--master-node",
+ help="Master node which is connected to the Slave")
+ p.add_argument("--master-node-id",
+ help="Master node ID which is connected to the Slave")
+ p.add_argument("--local-node", help="Local Slave node")
+ p.add_argument("--local-node-id", help="Local Slave ID")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+
+ # All configurations which are configured via "slave-" options
+ # DO NOT add default values for these configurations, default values
+ # will be picked from template config file
+ p.add_argument("--slave-timeout", type=int,
+ help="Timeout to end gsyncd at Slave side")
+ p.add_argument("--use-rsync-xattrs", action="store_true")
+ p.add_argument("--slave-log-level", help="Slave Gsyncd Log level")
+ p.add_argument("--slave-gluster-log-level",
+ help="Slave Gluster mount Log level")
+ p.add_argument("--slave-gluster-command-dir",
+ help="Directory where Gluster binaries exist on slave")
+ p.add_argument("--slave-access-mount", action="store_true",
+ help="Do not lazy umount the slave volume")
+ p.add_argument("--master-dist-count", type=int,
+ help="Master Distribution count")
+
+ # Status
+ p = sp.add_parser("status")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--local-path", help="Local Brick Path")
+ p.add_argument("--debug", action="store_true")
+ p.add_argument("--json", action="store_true")
+
+ # Config-check
+ p = sp.add_parser("config-check")
+ p.add_argument("name", help="Config Name")
+ p.add_argument("--value", help="Config Value")
+ p.add_argument("--debug", action="store_true")
+
+ # Config-get
+ p = sp.add_parser("config-get")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave")
+ p.add_argument("--name", help="Config Name")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+ p.add_argument("--show-defaults", action="store_true")
+ p.add_argument("--only-value", action="store_true")
+ p.add_argument("--use-underscore", action="store_true")
+ p.add_argument("--json", action="store_true")
+
+ # Config-set
+ p = sp.add_parser("config-set")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave")
+ p.add_argument("-n", "--name", help="Config Name")
+ p.add_argument("-v", "--value", help="Config Value")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+
+ # Config-reset
+ p = sp.add_parser("config-reset")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave")
+ p.add_argument("name", help="Config Name")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument("--debug", action="store_true")
+
+ # voluuidget
+ p = sp.add_parser("voluuidget")
+ p.add_argument("host", help="Hostname")
+ p.add_argument("volname", help="Volume Name")
+ p.add_argument("--debug", action="store_true")
+
+ # Delete
+ p = sp.add_parser("delete")
+ p.add_argument("master", help="Master Volume Name")
+ p.add_argument("slave", help="Slave")
+ p.add_argument("-c", "--config-file", help="Config File")
+ p.add_argument('--path', dest='paths', action="append")
+ p.add_argument("--reset-sync-time", action="store_true",
+ help="Reset Sync Time")
+ p.add_argument("--debug", action="store_true")
+
+ # Parse arguments
+ args = parser.parse_args()
+
+ # Extra template values, All arguments are already part of template
+ # variables, use this for adding extra variables
+ extra_tmpl_args = {}
+
+ # Add First/Primary Slave host, user and volume
+ if getattr(args, "slave", None) is not None:
+ hostdata, slavevol = args.slave.split("::")
+ hostdata = hostdata.split("@")
+ slavehost = hostdata[-1]
+ slaveuser = "root"
+ if len(hostdata) == 2:
+ slaveuser = hostdata[0]
+ extra_tmpl_args["primary_slave_host"] = slavehost
+ extra_tmpl_args["slaveuser"] = slaveuser
+ extra_tmpl_args["slavevol"] = slavevol
+
+ # Add Bricks encoded path
+ if getattr(args, "local_path", None) is not None:
+ extra_tmpl_args["local_id"] = escape(args.local_path)
+
+ # Add Master Bricks encoded path(For Slave)
+ if getattr(args, "master_brick", None) is not None:
+ extra_tmpl_args["master_brick_id"] = escape(args.master_brick)
+
+ # Load configurations
+ config_file = getattr(args, "config_file", None)
+
+ # Subcmd accepts config file argument but not passed
+ # Set default path for config file in that case
+ # If an subcmd accepts config file then it also accepts
+ # master and Slave arguments.
+ if config_file is None and hasattr(args, "config_file") \
+ and args.subcmd != "slave":
+ config_file = "%s/geo-replication/%s_%s_%s/gsyncd.conf" % (
+ GLUSTERD_WORKDIR,
+ args.master,
+ extra_tmpl_args["primary_slave_host"],
+ extra_tmpl_args["slavevol"])
+
+ # If Config file path not exists, log error and continue using default conf
+ config_file_error_msg = None
+ if config_file is not None and not os.path.exists(config_file):
+ # Logging not yet initialized, create the error message to
+ # log later and reset the config_file to None
+ config_file_error_msg = lf(
+ "Session config file not exists, using the default config",
+ path=config_file)
+ config_file = None
+
+ rconf.config_file = config_file
+
+ # Override gconf values from argument values only if it is slave gsyncd
+ override_from_args = False
+ if args.subcmd == "slave":
+ override_from_args = True
+
+ if config_file is not None and \
+ args.subcmd in ["monitor", "config-get", "config-set", "config-reset"]:
+ ret = gconf.is_config_file_old(config_file, args.master, extra_tmpl_args["slavevol"])
+ if ret is not None:
+ gconf.config_upgrade(config_file, ret)
+
+ # Load Config file
+ gconf.load(GLUSTERFS_CONFDIR + "/gsyncd.conf",
+ config_file,
+ vars(args),
+ extra_tmpl_args,
+ override_from_args)
+
+ # Default label to print in log file
+ label = args.subcmd
+ if args.subcmd in ("worker"):
+ # If Worker, then add brick path also to label
+ label = "%s %s" % (args.subcmd, args.local_path)
+ elif args.subcmd == "slave":
+ # If Slave add Master node and Brick details
+ label = "%s %s%s" % (args.subcmd, args.master_node, args.master_brick)
+
+ # Setup Logger
+ # Default log file
+ log_file = gconf.get("cli-log-file")
+ log_level = gconf.get("cli-log-level")
+ if getattr(args, "master", None) is not None and \
+ getattr(args, "slave", None) is not None:
+ log_file = gconf.get("log-file")
+ log_level = gconf.get("log-level")
+
+ # Use different log file location for Slave log file
+ if args.subcmd == "slave":
+ log_file = gconf.get("slave-log-file")
+ log_level = gconf.get("slave-log-level")
+
+ if args.debug:
+ log_file = "-"
+ log_level = "DEBUG"
+
+ # Create Logdir if not exists
+ try:
+ if log_file != "-":
+ os.mkdir(os.path.dirname(log_file))
+ except OSError as e:
+ if e.errno != EEXIST:
+ raise
+
+ setup_logging(
+ log_file=log_file,
+ level=log_level,
+ label=label
+ )
+
+ if config_file_error_msg is not None:
+ logging.warn(config_file_error_msg)
+
+ # Log message for loaded config file
+ if config_file is not None:
+ logging.debug(lf("Using session config file", path=config_file))
+
+ set_term_handler()
+ excont = FreeObject(exval=0)
+
+ # Gets the function name based on the input argument. For example
+ # if subcommand passed as argument is monitor then it looks for
+ # function with name "subcmd_monitor" in subcmds file
+ func = getattr(subcmds, "subcmd_" + args.subcmd.replace("-", "_"), None)
+
+ try:
+ try:
+ if func is not None:
+ rconf.args = args
+ func(args)
+ except:
+ log_raise_exception(excont)
+ finally:
+ finalize(exval=excont.exval)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/geo-replication/syncdaemon/gsyncdconfig.py b/geo-replication/syncdaemon/gsyncdconfig.py
new file mode 100644
index 00000000000..8848071997a
--- /dev/null
+++ b/geo-replication/syncdaemon/gsyncdconfig.py
@@ -0,0 +1,485 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+try:
+ from ConfigParser import RawConfigParser, NoSectionError
+except ImportError:
+ from configparser import RawConfigParser, NoSectionError
+import os
+import shutil
+from string import Template
+from datetime import datetime
+from threading import Lock
+
+
+# Global object which can be used in other modules
+# once load_config is called
+_gconf = {}
+
+
+class GconfNotConfigurable(Exception):
+ pass
+
+
+class GconfInvalidValue(Exception):
+ pass
+
+
+class Gconf(object):
+ def __init__(self, default_conf_file, custom_conf_file=None,
+ args={}, extra_tmpl_args={}, override_from_args=False):
+ self.lock = Lock()
+ self.default_conf_file = default_conf_file
+ self.custom_conf_file = custom_conf_file
+ self.tmp_conf_file = None
+ self.gconf = {}
+ self.gconfdata = {}
+ self.gconf_typecast = {}
+ self.template_conf = []
+ self.non_configurable_configs = []
+ self.prev_mtime = 0
+ if custom_conf_file is not None:
+ self.tmp_conf_file = custom_conf_file + ".tmp"
+
+ self.session_conf_items = []
+ self.args = args
+ self.extra_tmpl_args = extra_tmpl_args
+ self.override_from_args = override_from_args
+ # Store default values only if overwritten, Only for JSON/CLI output
+ self.default_values = {}
+ self._load()
+
+ def _tmpl_substitute(self):
+ tmpl_values = {}
+ for k, v in self.gconf.items():
+ tmpl_values[k.replace("-", "_")] = v
+
+ # override the config file values with the one user passed
+ for k, v in self.args.items():
+ # override the existing value only if set by user
+ if v is not None:
+ tmpl_values[k] = v
+
+ for k, v in self.extra_tmpl_args.items():
+ tmpl_values[k] = v
+
+ for k, v in self.gconf.items():
+ if k in self.template_conf and \
+ (isinstance(v, str) or isinstance(v, unicode)):
+ self.gconf[k] = Template(v).safe_substitute(tmpl_values)
+
+ def _do_typecast(self):
+ for k, v in self.gconf.items():
+ cast_func = globals().get(
+ "to_" + self.gconf_typecast.get(k, "string"), None)
+ if cast_func is not None:
+ self.gconf[k] = cast_func(v)
+ if self.default_values.get(k, None) is not None:
+ self.default_values[k] = cast_func(v)
+
+ def reset(self, name):
+ # If custom conf file is not set then it is only read only configs
+ if self.custom_conf_file is None:
+ raise GconfNotConfigurable()
+
+ # If a config can not be modified
+ if name != "all" and not self._is_configurable(name):
+ raise GconfNotConfigurable()
+
+ cnf = RawConfigParser()
+ with open(self.custom_conf_file) as f:
+ cnf.readfp(f)
+
+ # Nothing to Reset, Not configured
+ if name != "all":
+ if not cnf.has_option("vars", name):
+ return True
+
+ # Remove option from custom conf file
+ cnf.remove_option("vars", name)
+ else:
+ # Remove and add empty section, do not disturb if config file
+ # already has any other section
+ try:
+ cnf.remove_section("vars")
+ except NoSectionError:
+ pass
+
+ cnf.add_section("vars")
+
+ with open(self.tmp_conf_file, "w") as fw:
+ cnf.write(fw)
+
+ os.rename(self.tmp_conf_file, self.custom_conf_file)
+
+ self.reload()
+
+ return True
+
+ def set(self, name, value):
+ if self.custom_conf_file is None:
+ raise GconfNotConfigurable()
+
+ if not self._is_configurable(name):
+ raise GconfNotConfigurable()
+
+ if not self._is_valid_value(name, value):
+ raise GconfInvalidValue()
+
+ curr_val = self.gconf.get(name, None)
+ if curr_val == value:
+ return True
+
+ cnf = RawConfigParser()
+ with open(self.custom_conf_file) as f:
+ cnf.readfp(f)
+
+ if not cnf.has_section("vars"):
+ cnf.add_section("vars")
+
+ cnf.set("vars", name, value)
+ with open(self.tmp_conf_file, "w") as fw:
+ cnf.write(fw)
+
+ os.rename(self.tmp_conf_file, self.custom_conf_file)
+
+ self.reload()
+
+ return True
+
+ def check(self, name, value=None, with_conffile=True):
+ if with_conffile and self.custom_conf_file is None:
+ raise GconfNotConfigurable()
+
+ if not self._is_configurable(name):
+ raise GconfNotConfigurable()
+
+ if value is not None and not self._is_valid_value(name, value):
+ raise GconfInvalidValue()
+
+
+ def _load_with_lock(self):
+ with self.lock:
+ self._load()
+
+ def _load(self):
+ self.gconf = {}
+ self.template_conf = []
+ self.gconf_typecast = {}
+ self.non_configurable_configs = []
+ self.session_conf_items = []
+ self.default_values = {}
+
+ conf = RawConfigParser()
+ # Default Template config file
+ with open(self.default_conf_file) as f:
+ conf.readfp(f)
+
+ # Custom Config file
+ if self.custom_conf_file is not None:
+ with open(self.custom_conf_file) as f:
+ conf.readfp(f)
+
+ # Get version from default conf file
+ self.version = conf.get("__meta__", "version")
+
+ # Populate default values
+ for sect in conf.sections():
+ if sect in ["__meta__", "vars"]:
+ continue
+
+ # Collect list of available options with help details
+ self.gconfdata[sect] = {}
+ for k, v in conf.items(sect):
+ self.gconfdata[sect][k] = v.strip()
+
+ # Collect the Type cast information
+ if conf.has_option(sect, "type"):
+ self.gconf_typecast[sect] = conf.get(sect, "type")
+
+ # Prepare list of configurable conf
+ if conf.has_option(sect, "configurable"):
+ if conf.get(sect, "configurable").lower() == "false":
+ self.non_configurable_configs.append(sect)
+
+ # if it is a template conf value which needs to be substituted
+ if conf.has_option(sect, "template"):
+ if conf.get(sect, "template").lower().strip() == "true":
+ self.template_conf.append(sect)
+
+ # Set default values
+ if conf.has_option(sect, "value"):
+ self.gconf[sect] = conf.get(sect, "value").strip()
+
+ # Load the custom conf elements and overwrite
+ if conf.has_section("vars"):
+ for k, v in conf.items("vars"):
+ self.session_conf_items.append(k)
+ self.default_values[k] = self.gconf.get(k, "")
+ self.gconf[k] = v.strip()
+
+ # Overwrite the Slave configurations which are sent as
+ # arguments to gsyncd slave
+ if self.override_from_args:
+ for k, v in self.args.items():
+ k = k.replace("_", "-")
+ if k.startswith("slave-") and k in self.gconf:
+ self.gconf[k] = v
+
+ self._tmpl_substitute()
+ self._do_typecast()
+
+ def reload(self, with_lock=True):
+ if self._is_config_changed():
+ if with_lock:
+ self._load_with_lock()
+ else:
+ self._load()
+
+ def get(self, name, default_value=None, with_lock=True):
+ if with_lock:
+ with self.lock:
+ return self.gconf.get(name, default_value)
+ else:
+ return self.gconf.get(name, default_value)
+
+ def getall(self, show_defaults=False, show_non_configurable=False):
+ cnf = {}
+ if not show_defaults:
+ for k in self.session_conf_items:
+ if k not in self.non_configurable_configs:
+ dv = self.default_values.get(k, "")
+ cnf[k] = {
+ "value": self.get(k),
+ "default": dv,
+ "configurable": True,
+ "modified": False if dv == "" else True
+ }
+ return cnf
+
+ # Show all configs including defaults
+ for k, v in self.gconf.items():
+ configurable = False if k in self.non_configurable_configs \
+ else True
+ dv = self.default_values.get(k, "")
+ modified = False if dv == "" else True
+ if show_non_configurable:
+ cnf[k] = {
+ "value": v,
+ "default": dv,
+ "configurable": configurable,
+ "modified": modified
+ }
+ else:
+ if k not in self.non_configurable_configs:
+ cnf[k] = {
+ "value": v,
+ "default": dv,
+ "configurable": configurable,
+ "modified": modified
+ }
+
+ return cnf
+
+ def getr(self, name, default_value=None):
+ with self.lock:
+ self.reload(with_lock=False)
+ return self.get(name, default_value, with_lock=False)
+
+ def get_help(self, name=None):
+ pass
+
+ def _is_configurable(self, name):
+ item = self.gconfdata.get(name, None)
+ if item is None:
+ return False
+
+ return item.get("configurable", True)
+
+ def _is_valid_value(self, name, value):
+ item = self.gconfdata.get(name, None)
+ if item is None:
+ return False
+
+ # If validation func not defined
+ if item.get("validation", None) is None:
+ return True
+
+ # minmax validation
+ if item["validation"] == "minmax":
+ return validate_minmax(value, item["min"], item["max"])
+
+ if item["validation"] == "choice":
+ return validate_choice(value, item["allowed_values"])
+
+ if item["validation"] == "bool":
+ return validate_bool(value)
+
+ if item["validation"] == "execpath":
+ return validate_execpath(value)
+
+ if item["validation"] == "unixtime":
+ return validate_unixtime(value)
+
+ if item["validation"] == "int":
+ return validate_int(value)
+
+ return False
+
+ def _is_config_changed(self):
+ if self.custom_conf_file is not None and \
+ os.path.exists(self.custom_conf_file):
+ st = os.lstat(self.custom_conf_file)
+ if st.st_mtime > self.prev_mtime:
+ self.prev_mtime = st.st_mtime
+ return True
+
+ return False
+
+def is_config_file_old(config_file, mastervol, slavevol):
+ cnf = RawConfigParser()
+ cnf.read(config_file)
+ session_section = "peers %s %s" % (mastervol, slavevol)
+ try:
+ return dict(cnf.items(session_section))
+ except NoSectionError:
+ return None
+
+def config_upgrade(config_file, ret):
+ config_file_backup = os.path.join(os.path.dirname(config_file), "gsyncd.conf.bkp")
+
+ #copy old config file in a backup file
+ shutil.copyfile(config_file, config_file_backup)
+
+ #write a new config file
+ config = RawConfigParser()
+ config.add_section('vars')
+
+ for key, value in ret.items():
+ #handle option name changes
+ if key == "use_tarssh":
+ new_key = "sync-method"
+ if value == "true":
+ new_value = "tarssh"
+ else:
+ new_value = "rsync"
+ config.set('vars', new_key, new_value)
+ elif key == "timeout":
+ new_key = "slave-timeout"
+ config.set('vars', new_key, value)
+ #for changes like: ignore_deletes to ignore-deletes
+ else:
+ new_key = key.replace("_", "-")
+ config.set('vars', new_key, value)
+
+ with open(config_file, 'w') as configfile:
+ config.write(configfile)
+
+
+def validate_int(value):
+ try:
+ _ = int(value)
+ return True
+ except ValueError:
+ return False
+
+
+def validate_unixtime(value):
+ try:
+ y = datetime.fromtimestamp(int(value)).strftime("%Y")
+ if y == "1970":
+ return False
+
+ return True
+ except ValueError:
+ return False
+
+
+def validate_minmax(value, minval, maxval):
+ try:
+ value = int(value)
+ minval = int(minval)
+ maxval = int(maxval)
+ return value >= minval and value <= maxval
+ except ValueError:
+ return False
+
+
+def validate_choice(value, allowed_values):
+ allowed_values = allowed_values.split(",")
+ allowed_values = [v.strip() for v in allowed_values]
+
+ return value in allowed_values
+
+
+def validate_bool(value):
+ return value in ["true", "false"]
+
+
+def validate_execpath(value):
+ return os.path.isfile(value) and os.access(value, os.X_OK)
+
+
+def validate_filepath(value):
+ return os.path.isfile(value)
+
+
+def validate_path(value):
+ return os.path.exists(value)
+
+
+def to_int(value):
+ return int(value)
+
+
+def to_float(value):
+ return float(value)
+
+
+def to_bool(value):
+ if isinstance(value, bool):
+ return value
+ return True if value in ["true", "True"] else False
+
+
+def get(name, default_value=None):
+ return _gconf.get(name, default_value)
+
+
+def getall(show_defaults=False, show_non_configurable=False):
+ return _gconf.getall(show_defaults=show_defaults,
+ show_non_configurable=show_non_configurable)
+
+
+def getr(name, default_value=None):
+ return _gconf.getr(name, default_value)
+
+
+def load(default_conf, custom_conf=None, args={}, extra_tmpl_args={},
+ override_from_args=False):
+ global _gconf
+ _gconf = Gconf(default_conf, custom_conf, args, extra_tmpl_args,
+ override_from_args)
+
+
+def setconfig(name, value):
+ global _gconf
+ _gconf.set(name, value)
+
+
+def resetconfig(name):
+ global _gconf
+ _gconf.reset(name)
+
+
+def check(name, value=None, with_conffile=True):
+ global _gconf
+ _gconf.check(name, value=value, with_conffile=with_conffile)
diff --git a/geo-replication/syncdaemon/gsyncdstatus.py b/geo-replication/syncdaemon/gsyncdstatus.py
new file mode 100644
index 00000000000..1a655ff8887
--- /dev/null
+++ b/geo-replication/syncdaemon/gsyncdstatus.py
@@ -0,0 +1,419 @@
+#!/usr/bin/python3
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from __future__ import print_function
+import fcntl
+import os
+import tempfile
+try:
+ import urllib.parse as urllib
+except ImportError:
+ import urllib
+import json
+import time
+from datetime import datetime
+from errno import EACCES, EAGAIN, ENOENT
+import logging
+
+from syncdutils import (EVENT_GEOREP_ACTIVE, EVENT_GEOREP_PASSIVE, gf_event,
+ EVENT_GEOREP_CHECKPOINT_COMPLETED, lf)
+
+DEFAULT_STATUS = "N/A"
+MONITOR_STATUS = ("Created", "Started", "Paused", "Stopped")
+STATUS_VALUES = (DEFAULT_STATUS,
+ "Initializing...",
+ "Active",
+ "Passive",
+ "Faulty")
+
+CRAWL_STATUS_VALUES = (DEFAULT_STATUS,
+ "Hybrid Crawl",
+ "History Crawl",
+ "Changelog Crawl")
+
+
+def human_time(ts):
+ try:
+ return datetime.fromtimestamp(float(ts)).strftime("%Y-%m-%d %H:%M:%S")
+ except ValueError:
+ return DEFAULT_STATUS
+
+
+def human_time_utc(ts):
+ try:
+ return datetime.utcfromtimestamp(
+ float(ts)).strftime("%Y-%m-%d %H:%M:%S")
+ except ValueError:
+ return DEFAULT_STATUS
+
+
+def get_default_values():
+ return {
+ "slave_node": DEFAULT_STATUS,
+ "worker_status": DEFAULT_STATUS,
+ "last_synced": 0,
+ "last_synced_entry": 0,
+ "crawl_status": DEFAULT_STATUS,
+ "entry": 0,
+ "data": 0,
+ "meta": 0,
+ "failures": 0,
+ "checkpoint_completed": DEFAULT_STATUS,
+ "checkpoint_time": 0,
+ "checkpoint_completion_time": 0}
+
+
+class LockedOpen(object):
+
+ def __init__(self, filename, *args, **kwargs):
+ self.filename = filename
+ self.open_args = args
+ self.open_kwargs = kwargs
+ self.fileobj = None
+
+ def __enter__(self):
+ """
+ If two processes compete to update a file, The first process
+ gets the lock and the second process is blocked in the fcntl.flock()
+ call. When first process replaces the file and releases the lock,
+ the already open file descriptor in the second process now points
+ to a "ghost" file(not reachable by any path name) with old contents.
+ To avoid that conflict, check the fd already opened is same or
+ not. Open new one if not same
+ """
+ f = open(self.filename, *self.open_args, **self.open_kwargs)
+ while True:
+ fcntl.flock(f, fcntl.LOCK_EX)
+ fnew = open(self.filename, *self.open_args, **self.open_kwargs)
+ if os.path.sameopenfile(f.fileno(), fnew.fileno()):
+ fnew.close()
+ break
+ else:
+ f.close()
+ f = fnew
+ self.fileobj = f
+ return f
+
+ def __exit__(self, _exc_type, _exc_value, _traceback):
+ fcntl.flock(self.fileobj, fcntl.LOCK_UN)
+ self.fileobj.close()
+
+
+def set_monitor_status(status_file, status):
+ fd = os.open(status_file, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+ with LockedOpen(status_file, 'r+'):
+ with tempfile.NamedTemporaryFile('w', dir=os.path.dirname(status_file),
+ delete=False) as tf:
+ tf.write(status)
+ tempname = tf.name
+
+ os.rename(tempname, status_file)
+ dirfd = os.open(os.path.dirname(os.path.abspath(status_file)),
+ os.O_DIRECTORY)
+ os.fsync(dirfd)
+ os.close(dirfd)
+
+
+class GeorepStatus(object):
+ def __init__(self, monitor_status_file, master_node, brick, master_node_id,
+ master, slave, monitor_pid_file=None):
+ self.master = master
+ slv_data = slave.split("::")
+ self.slave_host = slv_data[0]
+ self.slave_volume = slv_data[1].split(":")[0] # Remove Slave UUID
+ self.work_dir = os.path.dirname(monitor_status_file)
+ self.monitor_status_file = monitor_status_file
+ self.filename = os.path.join(self.work_dir,
+ "brick_%s.status"
+ % urllib.quote_plus(brick))
+
+ fd = os.open(self.filename, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+ fd = os.open(self.monitor_status_file, os.O_CREAT | os.O_RDWR)
+ os.close(fd)
+ self.master_node = master_node
+ self.master_node_id = master_node_id
+ self.brick = brick
+ self.default_values = get_default_values()
+ self.monitor_pid_file = monitor_pid_file
+
+ def send_event(self, event_type, **kwargs):
+ gf_event(event_type,
+ master_volume=self.master,
+ master_node=self.master_node,
+ master_node_id=self.master_node_id,
+ slave_host=self.slave_host,
+ slave_volume=self.slave_volume,
+ brick_path=self.brick,
+ **kwargs)
+
+ def _update(self, mergerfunc):
+ data = self.default_values
+ with LockedOpen(self.filename, 'r+') as f:
+ try:
+ data.update(json.load(f))
+ except ValueError:
+ pass
+
+ data = mergerfunc(data)
+ # If Data is not changed by merger func
+ if not data:
+ return False
+
+ with tempfile.NamedTemporaryFile(
+ 'w',
+ dir=os.path.dirname(self.filename),
+ delete=False) as tf:
+ tf.write(data)
+ tempname = tf.name
+
+ os.rename(tempname, self.filename)
+ dirfd = os.open(os.path.dirname(os.path.abspath(self.filename)),
+ os.O_DIRECTORY)
+ os.fsync(dirfd)
+ os.close(dirfd)
+ return True
+
+ def reset_on_worker_start(self):
+ def merger(data):
+ data["slave_node"] = DEFAULT_STATUS
+ data["crawl_status"] = DEFAULT_STATUS
+ data["entry"] = 0
+ data["data"] = 0
+ data["meta"] = 0
+ return json.dumps(data)
+
+ self._update(merger)
+
+ def set_field(self, key, value):
+ def merger(data):
+ # Current data and prev data is same
+ if data[key] == value:
+ return {}
+
+ data[key] = value
+ return json.dumps(data)
+
+ return self._update(merger)
+
+ def trigger_gf_event_checkpoint_completion(self, checkpoint_time,
+ checkpoint_completion_time):
+ self.send_event(EVENT_GEOREP_CHECKPOINT_COMPLETED,
+ checkpoint_time=checkpoint_time,
+ checkpoint_completion_time=checkpoint_completion_time)
+
+ def set_last_synced(self, value, checkpoint_time):
+ def merger(data):
+ data["last_synced"] = value[0]
+
+ # If checkpoint is not set or reset
+ # or if last set checkpoint is changed
+ if checkpoint_time == 0 or \
+ checkpoint_time != data["checkpoint_time"]:
+ data["checkpoint_time"] = 0
+ data["checkpoint_completion_time"] = 0
+ data["checkpoint_completed"] = "No"
+
+ # If checkpoint is completed and not marked as completed
+ # previously then update the checkpoint completed time
+ if checkpoint_time > 0 and checkpoint_time <= value[0]:
+ if data["checkpoint_completed"] == "No":
+ curr_time = int(time.time())
+ data["checkpoint_time"] = checkpoint_time
+ data["checkpoint_completion_time"] = curr_time
+ data["checkpoint_completed"] = "Yes"
+ logging.info(lf("Checkpoint completed",
+ checkpoint_time=human_time_utc(
+ checkpoint_time),
+ completion_time=human_time_utc(curr_time)))
+ self.trigger_gf_event_checkpoint_completion(
+ checkpoint_time, curr_time)
+
+ return json.dumps(data)
+
+ self._update(merger)
+
+ def set_worker_status(self, status):
+ if self.set_field("worker_status", status):
+ logging.info(lf("Worker Status Change",
+ status=status))
+
+ def set_worker_crawl_status(self, status):
+ if self.set_field("crawl_status", status):
+ logging.info(lf("Crawl Status Change",
+ status=status))
+
+ def set_slave_node(self, slave_node):
+ def merger(data):
+ data["slave_node"] = slave_node
+ return json.dumps(data)
+
+ self._update(merger)
+
+ def inc_value(self, key, value):
+ def merger(data):
+ data[key] = data.get(key, 0) + value
+ return json.dumps(data)
+
+ self._update(merger)
+
+ def dec_value(self, key, value):
+ def merger(data):
+ data[key] = data.get(key, 0) - value
+ if data[key] < 0:
+ data[key] = 0
+ return json.dumps(data)
+
+ self._update(merger)
+
+ def set_active(self):
+ if self.set_field("worker_status", "Active"):
+ logging.info(lf("Worker Status Change",
+ status="Active"))
+ self.send_event(EVENT_GEOREP_ACTIVE)
+
+ def set_passive(self):
+ if self.set_field("worker_status", "Passive"):
+ logging.info(lf("Worker Status Change",
+ status="Passive"))
+ self.send_event(EVENT_GEOREP_PASSIVE)
+
+ def get_monitor_status(self):
+ data = ""
+ with open(self.monitor_status_file, "r") as f:
+ data = f.read().strip()
+ return data
+
+ def get_status(self, checkpoint_time=0):
+ """
+ Monitor Status ---> Created Started Paused Stopped
+ ----------------------------------------------------------------------
+ slave_node N/A VALUE VALUE N/A
+ status Created VALUE Paused Stopped
+ last_synced N/A VALUE VALUE VALUE
+ last_synced_entry N/A VALUE VALUE VALUE
+ crawl_status N/A VALUE N/A N/A
+ entry N/A VALUE N/A N/A
+ data N/A VALUE N/A N/A
+ meta N/A VALUE N/A N/A
+ failures N/A VALUE VALUE VALUE
+ checkpoint_completed N/A VALUE VALUE VALUE
+ checkpoint_time N/A VALUE VALUE VALUE
+ checkpoint_completed_time N/A VALUE VALUE VALUE
+ """
+ data = self.default_values
+ with open(self.filename) as f:
+ try:
+ data.update(json.load(f))
+ except ValueError:
+ pass
+ monitor_status = self.get_monitor_status()
+
+ # Verifying whether monitor process running and adjusting status
+ if monitor_status in ["Started", "Paused"]:
+ try:
+ with open(self.monitor_pid_file, "r+") as f:
+ fcntl.lockf(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ monitor_status = "Stopped"
+ except (IOError, OSError) as e:
+ # If pid file not exists, either monitor died or Geo-rep
+ # not even started once
+ if e.errno == ENOENT:
+ monitor_status = "Stopped"
+ elif e.errno in (EACCES, EAGAIN):
+ # cannot grab. so, monitor process still running..move on
+ pass
+ else:
+ raise
+
+ if monitor_status in ["Created", "Paused", "Stopped"]:
+ data["worker_status"] = monitor_status
+
+ if monitor_status == "":
+ data["worker_status"] = "Stopped"
+
+ # Checkpoint adjustments
+ if checkpoint_time == 0:
+ data["checkpoint_completed"] = DEFAULT_STATUS
+ data["checkpoint_time"] = DEFAULT_STATUS
+ data["checkpoint_completion_time"] = DEFAULT_STATUS
+ else:
+ if checkpoint_time != data["checkpoint_time"]:
+ if checkpoint_time <= data["last_synced"]:
+ data["checkpoint_completed"] = "Yes"
+ data["checkpoint_time"] = checkpoint_time
+ data["checkpoint_completion_time"] = data["last_synced"]
+ else:
+ data["checkpoint_completed"] = "No"
+ data["checkpoint_time"] = checkpoint_time
+ data["checkpoint_completion_time"] = DEFAULT_STATUS
+
+ if data["checkpoint_time"] not in [0, DEFAULT_STATUS]:
+ chkpt_time = data["checkpoint_time"]
+ data["checkpoint_time"] = human_time(chkpt_time)
+ data["checkpoint_time_utc"] = human_time_utc(chkpt_time)
+
+ if data["checkpoint_completion_time"] not in [0, DEFAULT_STATUS]:
+ chkpt_completion_time = data["checkpoint_completion_time"]
+ data["checkpoint_completion_time"] = human_time(
+ chkpt_completion_time)
+ data["checkpoint_completion_time_utc"] = human_time_utc(
+ chkpt_completion_time)
+
+ if data["last_synced"] == 0:
+ data["last_synced"] = DEFAULT_STATUS
+ data["last_synced_utc"] = DEFAULT_STATUS
+ else:
+ last_synced = data["last_synced"]
+ data["last_synced"] = human_time(last_synced)
+ data["last_synced_utc"] = human_time_utc(last_synced)
+
+ if data["worker_status"] != "Active":
+ data["last_synced"] = DEFAULT_STATUS
+ data["last_synced_utc"] = DEFAULT_STATUS
+ data["crawl_status"] = DEFAULT_STATUS
+ data["entry"] = DEFAULT_STATUS
+ data["data"] = DEFAULT_STATUS
+ data["meta"] = DEFAULT_STATUS
+ data["failures"] = DEFAULT_STATUS
+ data["checkpoint_completed"] = DEFAULT_STATUS
+ data["checkpoint_time"] = DEFAULT_STATUS
+ data["checkpoint_completed_time"] = DEFAULT_STATUS
+ data["checkpoint_time_utc"] = DEFAULT_STATUS
+ data["checkpoint_completion_time_utc"] = DEFAULT_STATUS
+
+ if data["worker_status"] not in ["Active", "Passive"]:
+ data["slave_node"] = DEFAULT_STATUS
+
+ if data.get("last_synced_utc", 0) == 0:
+ data["last_synced_utc"] = DEFAULT_STATUS
+
+ if data.get("checkpoint_completion_time_utc", 0) == 0:
+ data["checkpoint_completion_time_utc"] = DEFAULT_STATUS
+
+ if data.get("checkpoint_time_utc", 0) == 0:
+ data["checkpoint_time_utc"] = DEFAULT_STATUS
+
+ return data
+
+ def print_status(self, checkpoint_time=0, json_output=False):
+ status_out = self.get_status(checkpoint_time)
+ if json_output:
+ out = {}
+ # Convert all values as string
+ for k, v in status_out.items():
+ out[k] = str(v)
+ print(json.dumps(out))
+ return
+
+ for key, value in status_out.items():
+ print(("%s: %s" % (key, value)))
diff --git a/geo-replication/syncdaemon/libcxattr.py b/geo-replication/syncdaemon/libcxattr.py
new file mode 100644
index 00000000000..e6406c36bd7
--- /dev/null
+++ b/geo-replication/syncdaemon/libcxattr.py
@@ -0,0 +1,112 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import os
+from ctypes import CDLL, get_errno
+from py2py3 import (bytearray_to_str, gr_create_string_buffer,
+ gr_query_xattr, gr_lsetxattr, gr_lremovexattr)
+
+
+class Xattr(object):
+
+ """singleton that wraps the extended attributes system
+ interface for python using ctypes
+
+ Just implement it to the degree we need it, in particular
+ - we need just the l*xattr variants, ie. we never want symlinks to be
+ followed
+ - don't need size discovery for getxattr, as we always know the exact
+ sizes we expect
+ """
+
+ libc = CDLL("libc.so.6", use_errno=True)
+
+ @classmethod
+ def geterrno(cls):
+ return get_errno()
+
+ @classmethod
+ def raise_oserr(cls):
+ errn = cls.geterrno()
+ raise OSError(errn, os.strerror(errn))
+
+ @classmethod
+ def _query_xattr(cls, path, siz, syscall, *a):
+ if siz:
+ buf = gr_create_string_buffer(siz)
+ else:
+ buf = None
+ ret = getattr(cls.libc, syscall)(*((path,) + a + (buf, siz)))
+ if ret == -1:
+ cls.raise_oserr()
+ if siz:
+ # py2 and py3 compatibility. Convert bytes array
+ # to string
+ result = bytearray_to_str(buf.raw)
+ return result[:ret]
+ else:
+ return ret
+
+ @classmethod
+ def lgetxattr(cls, path, attr, siz=0):
+ return gr_query_xattr(cls, path, siz, 'lgetxattr', attr)
+
+ @classmethod
+ def lgetxattr_buf(cls, path, attr):
+ """lgetxattr variant with size discovery"""
+ size = cls.lgetxattr(path, attr)
+ if size == -1:
+ cls.raise_oserr()
+ if size == 0:
+ return ''
+ return cls.lgetxattr(path, attr, size)
+
+ @classmethod
+ def llistxattr(cls, path, siz=0):
+ ret = gr_query_xattr(cls, path, siz, 'llistxattr')
+ if isinstance(ret, str):
+ ret = ret.strip('\0')
+ ret = ret.split('\0') if ret else []
+ return ret
+
+ @classmethod
+ def lsetxattr(cls, path, attr, val):
+ ret = gr_lsetxattr(cls, path, attr, val)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def lremovexattr(cls, path, attr):
+ ret = gr_lremovexattr(cls, path, attr)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def llistxattr_buf(cls, path):
+ """listxattr variant with size discovery"""
+ try:
+ # Assuming no more than 100 xattrs in a file/directory and
+ # each xattr key length will be less than 256 bytes
+ # llistxattr will be called with bigger size so that
+ # listxattr will not fail with ERANGE. OSError will be
+ # raised if fails even with the large size specified.
+ size = 256 * 100
+ return cls.llistxattr(path, size)
+ except OSError:
+ # If fixed length failed for getting list of xattrs then
+ # use the llistxattr call to get the size and use that
+ # size to get the list of xattrs.
+ size = cls.llistxattr(path)
+ if size == -1:
+ cls.raise_oserr()
+ if size == 0:
+ return []
+
+ return cls.llistxattr(path, size)
diff --git a/geo-replication/syncdaemon/libgfchangelog.py b/geo-replication/syncdaemon/libgfchangelog.py
new file mode 100644
index 00000000000..a3bda7282c0
--- /dev/null
+++ b/geo-replication/syncdaemon/libgfchangelog.py
@@ -0,0 +1,143 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import os
+from ctypes import CDLL, RTLD_GLOBAL, get_errno, byref, c_ulong
+from ctypes.util import find_library
+from syncdutils import ChangelogException, ChangelogHistoryNotAvailable
+from py2py3 import (gr_cl_history_changelog, gr_cl_done,
+ gr_create_string_buffer, gr_cl_register,
+ gr_cl_history_done, bytearray_to_str)
+
+
+libgfc = CDLL(
+ find_library("gfchangelog"),
+ mode=RTLD_GLOBAL,
+ use_errno=True
+)
+
+
+def _raise_changelog_err():
+ errn = get_errno()
+ raise ChangelogException(errn, os.strerror(errn))
+
+
+def _init():
+ if libgfc.gf_changelog_init(None) == -1:
+ _raise_changelog_err()
+
+
+def register(brick, path, log_file, log_level, retries=0):
+ _init()
+
+ ret = gr_cl_register(libgfc, brick, path, log_file, log_level, retries)
+
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def scan():
+ ret = libgfc.gf_changelog_scan()
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def startfresh():
+ ret = libgfc.gf_changelog_start_fresh()
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def getchanges():
+ def clsort(cfile):
+ return cfile.split('.')[-1]
+
+ changes = []
+ buf = gr_create_string_buffer(4096)
+ call = libgfc.gf_changelog_next_change
+
+ while True:
+ ret = call(buf, 4096)
+ if ret in (0, -1):
+ break
+
+ # py2 and py3 compatibility
+ result = bytearray_to_str(buf.raw[:ret - 1])
+ changes.append(result)
+
+ if ret == -1:
+ _raise_changelog_err()
+
+ # cleanup tracker
+ startfresh()
+
+ return sorted(changes, key=clsort)
+
+
+def done(clfile):
+ ret = gr_cl_done(libgfc, clfile)
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def history_scan():
+ ret = libgfc.gf_history_changelog_scan()
+ if ret == -1:
+ _raise_changelog_err()
+
+ return ret
+
+
+def history_changelog(changelog_path, start, end, num_parallel):
+ actual_end = c_ulong()
+ ret = gr_cl_history_changelog(libgfc, changelog_path, start, end,
+ num_parallel, byref(actual_end))
+ if ret == -1:
+ _raise_changelog_err()
+
+ if ret == -2:
+ raise ChangelogHistoryNotAvailable()
+
+ return (ret, actual_end.value)
+
+
+def history_startfresh():
+ ret = libgfc.gf_history_changelog_start_fresh()
+ if ret == -1:
+ _raise_changelog_err()
+
+
+def history_getchanges():
+ def clsort(cfile):
+ return cfile.split('.')[-1]
+
+ changes = []
+ buf = gr_create_string_buffer(4096)
+ call = libgfc.gf_history_changelog_next_change
+
+ while True:
+ ret = call(buf, 4096)
+ if ret in (0, -1):
+ break
+
+ # py2 and py3 compatibility
+ result = bytearray_to_str(buf.raw[:ret - 1])
+ changes.append(result)
+
+ if ret == -1:
+ _raise_changelog_err()
+
+ return sorted(changes, key=clsort)
+
+
+def history_done(clfile):
+ ret = gr_cl_history_done(libgfc, clfile)
+ if ret == -1:
+ _raise_changelog_err()
diff --git a/geo-replication/syncdaemon/logutils.py b/geo-replication/syncdaemon/logutils.py
new file mode 100644
index 00000000000..01ae7852f23
--- /dev/null
+++ b/geo-replication/syncdaemon/logutils.py
@@ -0,0 +1,77 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import logging
+from logging import Logger, handlers
+import sys
+import time
+
+
+class GLogger(Logger):
+
+ """Logger customizations for gsyncd.
+
+ It implements a log format similar to that of glusterfs.
+ """
+
+ def makeRecord(self, name, level, *a):
+ rv = Logger.makeRecord(self, name, level, *a)
+ rv.nsecs = (rv.created - int(rv.created)) * 1000000
+ fr = sys._getframe(4)
+ callee = fr.f_locals.get('self')
+ if callee:
+ ctx = str(type(callee)).split("'")[1].split('.')[-1]
+ else:
+ ctx = '<top>'
+ if not hasattr(rv, 'funcName'):
+ rv.funcName = fr.f_code.co_name
+ rv.lvlnam = logging.getLevelName(level)[0]
+ rv.ctx = ctx
+ return rv
+
+
+LOGFMT = ("[%(asctime)s.%(nsecs)d] %(lvlnam)s [%(module)s{0}"
+ ":%(lineno)s:%(funcName)s] %(ctx)s: %(message)s")
+
+
+def setup_logging(level="INFO", label="", log_file=""):
+ if label:
+ label = "(" + label + ")"
+
+ filename = None
+ stream = None
+ if log_file:
+ if log_file in ('-', '/dev/stderr'):
+ stream = sys.stderr
+ elif log_file == '/dev/stdout':
+ stream = sys.stdout
+ else:
+ filename = log_file
+
+ datefmt = "%Y-%m-%d %H:%M:%S"
+ fmt = LOGFMT.format(label)
+ logging.root = GLogger("root", level)
+ logging.setLoggerClass(GLogger)
+ logging.Formatter.converter = time.gmtime # Log in GMT/UTC time
+ logging.getLogger().handlers = []
+ logging.getLogger().setLevel(level)
+
+ if filename is not None:
+ logging_handler = handlers.WatchedFileHandler(filename)
+ formatter = logging.Formatter(fmt=fmt,
+ datefmt=datefmt)
+ logging_handler.setFormatter(formatter)
+ logging.getLogger().addHandler(logging_handler)
+ else:
+ logging.basicConfig(stream=stream,
+ format=fmt,
+ datefmt=datefmt,
+ level=level)
diff --git a/geo-replication/syncdaemon/master.py b/geo-replication/syncdaemon/master.py
new file mode 100644
index 00000000000..9501aeae6b5
--- /dev/null
+++ b/geo-replication/syncdaemon/master.py
@@ -0,0 +1,2020 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import os
+import sys
+import time
+import stat
+import logging
+import fcntl
+import string
+import errno
+import tarfile
+from errno import ENOENT, ENODATA, EEXIST, EACCES, EAGAIN, ESTALE, EINTR
+from threading import Condition, Lock
+from datetime import datetime
+
+import gsyncdconfig as gconf
+import libgfchangelog
+from rconf import rconf
+from syncdutils import (Thread, GsyncdError, escape_space_newline,
+ unescape_space_newline, gauxpfx, escape,
+ lstat, errno_wrap, FreeObject, lf, matching_disk_gfid,
+ NoStimeAvailable, PartialHistoryAvailable,
+ host_brick_split)
+
+URXTIME = (-1, 0)
+
+# Default rollover time set in changelog translator
+# changelog rollover time is hardcoded here to avoid the
+# xsync usage when crawling switch happens from history
+# to changelog. If rollover time increased in translator
+# then geo-rep can enter into xsync crawl after history
+# crawl before starting live changelog crawl.
+CHANGELOG_ROLLOVER_TIME = 15
+
+# Utility functions to help us to get to closer proximity
+# of the DRY principle (no, don't look for elevated or
+# perspectivistic things here)
+
+
+def _xtime_now():
+ t = time.time()
+ sec = int(t)
+ nsec = int((t - sec) * 1000000)
+ return (sec, nsec)
+
+
+def _volinfo_hook_relax_foreign(self):
+ volinfo_sys = self.get_sys_volinfo()
+ fgn_vi = volinfo_sys[self.KFGN]
+ if fgn_vi:
+ expiry = fgn_vi['timeout'] - int(time.time()) + 1
+ logging.info(lf('foreign volume info found, waiting for expiry',
+ expiry=expiry))
+ time.sleep(expiry)
+ volinfo_sys = self.get_sys_volinfo()
+ return volinfo_sys
+
+
+def edct(op, **ed):
+ dct = {}
+ dct['op'] = op
+ # This is used in automatic gfid conflict resolution.
+ # When marked True, it's skipped during re-processing.
+ dct['skip_entry'] = False
+ for k in ed:
+ if k == 'stat':
+ st = ed[k]
+ dst = dct['stat'] = {}
+ if st:
+ dst['uid'] = st.st_uid
+ dst['gid'] = st.st_gid
+ dst['mode'] = st.st_mode
+ dst['atime'] = st.st_atime
+ dst['mtime'] = st.st_mtime
+ else:
+ dct[k] = ed[k]
+ return dct
+
+
+# The API!
+
+def gmaster_builder(excrawl=None):
+ """produce the GMaster class variant corresponding
+ to sync mode"""
+ this = sys.modules[__name__]
+ modemixin = gconf.get("special-sync-mode")
+ if not modemixin:
+ modemixin = 'normal'
+
+ if gconf.get("change-detector") == 'xsync':
+ changemixin = 'xsync'
+ elif excrawl:
+ changemixin = excrawl
+ else:
+ changemixin = gconf.get("change-detector")
+
+ logging.debug(lf('setting up change detection mode',
+ mode=changemixin))
+ modemixin = getattr(this, modemixin.capitalize() + 'Mixin')
+ crawlmixin = getattr(this, 'GMaster' + changemixin.capitalize() + 'Mixin')
+
+ if gconf.get("use-rsync-xattrs"):
+ sendmarkmixin = SendmarkRsyncMixin
+ else:
+ sendmarkmixin = SendmarkNormalMixin
+
+ if gconf.get("ignore-deletes"):
+ purgemixin = PurgeNoopMixin
+ else:
+ purgemixin = PurgeNormalMixin
+
+ if gconf.get("sync-method") == "tarssh":
+ syncengine = TarSSHEngine
+ else:
+ syncengine = RsyncEngine
+
+ class _GMaster(crawlmixin, modemixin, sendmarkmixin,
+ purgemixin, syncengine):
+ pass
+
+ return _GMaster
+
+
+# Mixin classes that implement the data format
+# and logic particularities of the certain
+# sync modes
+
+class NormalMixin(object):
+
+ """normal geo-rep behavior"""
+
+ minus_infinity = URXTIME
+
+ # following staticmethods ideally would be
+ # methods of an xtime object (in particular,
+ # implementing the hooks needed for comparison
+ # operators), but at this point we don't yet
+ # have a dedicated xtime class
+
+ @staticmethod
+ def serialize_xtime(xt):
+ return "%d.%d" % tuple(xt)
+
+ @staticmethod
+ def deserialize_xtime(xt):
+ return tuple(int(x) for x in xt.split("."))
+
+ @staticmethod
+ def native_xtime(xt):
+ return xt
+
+ @staticmethod
+ def xtime_geq(xt0, xt1):
+ return xt0 >= xt1
+
+ def make_xtime_opts(self, is_master, opts):
+ if 'create' not in opts:
+ opts['create'] = is_master
+ if 'default_xtime' not in opts:
+ opts['default_xtime'] = URXTIME
+
+ def xtime_low(self, rsc, path, **opts):
+ if rsc == self.master:
+ xt = rsc.server.xtime(path, self.uuid)
+ else:
+ xt = rsc.server.stime(path, self.uuid)
+ if isinstance(xt, int) and xt == ENODATA:
+ xt = rsc.server.xtime(path, self.uuid)
+ if not isinstance(xt, int):
+ self.slave.server.set_stime(path, self.uuid, xt)
+ if isinstance(xt, int) and xt != ENODATA:
+ return xt
+ if xt == ENODATA or xt < self.volmark:
+ if opts['create']:
+ xt = _xtime_now()
+ rsc.server.aggregated.set_xtime(path, self.uuid, xt)
+ else:
+ zero_zero = (0, 0)
+ if xt != zero_zero:
+ xt = opts['default_xtime']
+ return xt
+
+ def keepalive_payload_hook(self, timo, gap):
+ # first grab a reference as self.volinfo
+ # can be changed in main thread
+ vi = self.volinfo
+ if vi:
+ # then have a private copy which we can mod
+ vi = vi.copy()
+ vi['timeout'] = int(time.time()) + timo
+ else:
+ # send keep-alive more frequently to
+ # avoid a delay in announcing our volume info
+ # to slave if it becomes established in the
+ # meantime
+ gap = min(10, gap)
+ return (vi, gap)
+
+ def volinfo_hook(self):
+ return self.get_sys_volinfo()
+
+ def xtime_reversion_hook(self, path, xtl, xtr):
+ if xtr > xtl:
+ raise GsyncdError("timestamp corruption for " + path)
+
+ def need_sync(self, e, xte, xtrd):
+ return xte > xtrd
+
+ def set_slave_xtime(self, path, mark):
+ self.slave.server.set_stime(path, self.uuid, mark)
+ # self.slave.server.set_xtime_remote(path, self.uuid, mark)
+
+
+class PartialMixin(NormalMixin):
+
+ """a variant tuned towards operation with a master
+ that has partial info of the slave (brick typically)"""
+
+ def xtime_reversion_hook(self, path, xtl, xtr):
+ pass
+
+
+class RecoverMixin(NormalMixin):
+
+ """a variant that differs from normal in terms
+ of ignoring non-indexed files"""
+
+ @staticmethod
+ def make_xtime_opts(is_master, opts):
+ if 'create' not in opts:
+ opts['create'] = False
+ if 'default_xtime' not in opts:
+ opts['default_xtime'] = URXTIME
+
+ def keepalive_payload_hook(self, timo, gap):
+ return (None, gap)
+
+ def volinfo_hook(self):
+ return _volinfo_hook_relax_foreign(self)
+
+# Further mixins for certain tunable behaviors
+
+
+class SendmarkNormalMixin(object):
+
+ def sendmark_regular(self, *a, **kw):
+ return self.sendmark(*a, **kw)
+
+
+class SendmarkRsyncMixin(object):
+
+ def sendmark_regular(self, *a, **kw):
+ pass
+
+
+class PurgeNormalMixin(object):
+
+ def purge_missing(self, path, names):
+ self.slave.server.purge(path, names)
+
+
+class PurgeNoopMixin(object):
+
+ def purge_missing(self, path, names):
+ pass
+
+
+class TarSSHEngine(object):
+
+ """Sync engine that uses tar(1) piped over ssh(1)
+ for data transfers. Good for lots of small files.
+ """
+
+ def a_syncdata(self, files):
+ logging.debug(lf("Files", files=files))
+
+ for f in files:
+ pb = self.syncer.add(f)
+
+ def regjob(se, xte, pb):
+ rv = pb.wait()
+ if rv[0]:
+ logging.debug(lf('synced', file=se))
+ return True
+ else:
+ # stat check for file presence
+ st = lstat(se)
+ if isinstance(st, int):
+ # file got unlinked in the interim
+ self.unlinked_gfids.add(se)
+ return True
+
+ self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, f, None, pb)
+
+ def syncdata_wait(self):
+ if self.wait(self.FLAT_DIR_HIERARCHY, None):
+ return True
+
+ def syncdata(self, files):
+ self.a_syncdata(files)
+ self.syncdata_wait()
+
+
+class RsyncEngine(object):
+
+ """Sync engine that uses rsync(1) for data transfers"""
+
+ def a_syncdata(self, files):
+ logging.debug(lf("files", files=files))
+
+ for f in files:
+ logging.debug(lf('candidate for syncing', file=f))
+ pb = self.syncer.add(f)
+
+ def regjob(se, xte, pb):
+ rv = pb.wait()
+ if rv[0]:
+ logging.debug(lf('synced', file=se))
+ return True
+ else:
+ # stat to check if the file exist
+ st = lstat(se)
+ if isinstance(st, int):
+ # file got unlinked in the interim
+ self.unlinked_gfids.add(se)
+ return True
+
+ self.add_job(self.FLAT_DIR_HIERARCHY, 'reg', regjob, f, None, pb)
+
+ def syncdata_wait(self):
+ if self.wait(self.FLAT_DIR_HIERARCHY, None):
+ return True
+
+ def syncdata(self, files):
+ self.a_syncdata(files)
+ self.syncdata_wait()
+
+
+class GMasterCommon(object):
+
+ """abstract class impementling master role"""
+
+ KFGN = 0
+ KNAT = 1
+
+ def get_sys_volinfo(self):
+ """query volume marks on fs root
+
+ err out on multiple foreign masters
+ """
+ fgn_vis, nat_vi = (
+ self.master.server.aggregated.foreign_volume_infos(),
+ self.master.server.aggregated.native_volume_info())
+ fgn_vi = None
+ if fgn_vis:
+ if len(fgn_vis) > 1:
+ raise GsyncdError("cannot work with multiple foreign masters")
+ fgn_vi = fgn_vis[0]
+ return fgn_vi, nat_vi
+
+ @property
+ def uuid(self):
+ if self.volinfo:
+ return self.volinfo['uuid']
+
+ @property
+ def volmark(self):
+ if self.volinfo:
+ return self.volinfo['volume_mark']
+
+ def get_entry_stime(self):
+ data = self.slave.server.entry_stime(".", self.uuid)
+ if isinstance(data, int):
+ data = None
+ return data
+
+ def get_data_stime(self):
+ data = self.slave.server.stime(".", self.uuid)
+ if isinstance(data, int):
+ data = None
+ return data
+
+ def xtime(self, path, *a, **opts):
+ """get amended xtime
+
+ as of amending, we can create missing xtime, or
+ determine a valid value if what we get is expired
+ (as of the volume mark expiry); way of amendig
+ depends on @opts and on subject of query (master
+ or slave).
+ """
+ if a:
+ rsc = a[0]
+ else:
+ rsc = self.master
+ self.make_xtime_opts(rsc == self.master, opts)
+ return self.xtime_low(rsc, path, **opts)
+
+ def __init__(self, master, slave):
+ self.master = master
+ self.slave = slave
+ self.jobtab = {}
+ if gconf.get("sync-method") == "tarssh":
+ self.syncer = Syncer(slave, self.slave.tarssh, [2])
+ else:
+ # partial transfer (cf. rsync(1)), that's normal
+ self.syncer = Syncer(slave, self.slave.rsync, [23, 24])
+ # crawls vs. turns:
+ # - self.crawls is simply the number of crawl() invocations on root
+ # - one turn is a maximal consecutive sequence of crawls so that each
+ # crawl in it detects a change to be synced
+ # - self.turns is the number of turns since start
+ # - self.total_turns is a limit so that if self.turns reaches it, then
+ # we exit (for diagnostic purposes)
+ # so, eg., if the master fs changes unceasingly, self.turns will remain
+ # 0.
+ self.crawls = 0
+ self.turns = 0
+ self.total_turns = rconf.turns
+ self.crawl_start = datetime.now()
+ self.lastreport = {'crawls': 0, 'turns': 0, 'time': 0}
+ self.start = None
+ self.change_seen = None
+ # the actual volinfo we make use of
+ self.volinfo = None
+ self.terminate = False
+ self.sleep_interval = 1
+ self.unlinked_gfids = set()
+
+ def init_keep_alive(cls):
+ """start the keep-alive thread """
+ timo = gconf.get("slave-timeout", 0)
+ if timo > 0:
+ def keep_alive():
+ while True:
+ vi, gap = cls.keepalive_payload_hook(timo, timo * 0.5)
+ cls.slave.server.keep_alive(vi)
+ time.sleep(gap)
+ t = Thread(target=keep_alive)
+ t.start()
+
+ def mgmt_lock(self):
+
+ """Take management volume lock """
+ if rconf.mgmt_lock_fd:
+ try:
+ fcntl.lockf(rconf.mgmt_lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ return True
+ except:
+ ex = sys.exc_info()[1]
+ if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
+ return False
+ raise
+
+ fd = None
+ bname = str(self.uuid) + "_" + rconf.args.slave_id + "_subvol_" \
+ + str(rconf.args.subvol_num) + ".lock"
+ mgmt_lock_dir = os.path.join(gconf.get("meta-volume-mnt"), "geo-rep")
+ path = os.path.join(mgmt_lock_dir, bname)
+ logging.debug(lf("lock file path", path=path))
+ try:
+ fd = os.open(path, os.O_CREAT | os.O_RDWR)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ logging.info("Creating geo-rep directory in meta volume...")
+ try:
+ os.makedirs(mgmt_lock_dir)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == EEXIST:
+ pass
+ else:
+ raise
+ fd = os.open(path, os.O_CREAT | os.O_RDWR)
+ else:
+ raise
+ try:
+ fcntl.lockf(fd, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ # Save latest FD for future use
+ rconf.mgmt_lock_fd = fd
+ except:
+ ex = sys.exc_info()[1]
+ if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
+ # cannot grab, it's taken
+ rconf.mgmt_lock_fd = fd
+ return False
+ raise
+
+ return True
+
+ def should_crawl(self):
+ if not gconf.get("use-meta-volume"):
+ return rconf.args.local_node_id in self.master.server.node_uuid()
+
+ if not os.path.ismount(gconf.get("meta-volume-mnt")):
+ logging.error("Meta-volume is not mounted. Worker Exiting...")
+ sys.exit(1)
+ return self.mgmt_lock()
+
+ def register(self):
+ self.register()
+
+ def crawlwrap(self, oneshot=False, register_time=None):
+ if oneshot:
+ # it's important to do this during the oneshot crawl as
+ # for a passive gsyncd (ie. in a replicate scenario)
+ # the keepalive thread would keep the connection alive.
+ self.init_keep_alive()
+
+ # If crawlwrap is called when partial history available,
+ # then it sets register_time which is the time when geo-rep
+ # worker registered to changelog consumption. Since nsec is
+ # not considered in register time, there are chances of skipping
+ # changes detection in xsync crawl. This limit will be reset when
+ # crawlwrap is called again.
+ self.live_changelog_start_time = None
+ if register_time:
+ self.live_changelog_start_time = (register_time, 0)
+
+ # no need to maintain volinfo state machine.
+ # in a cascading setup, each geo-replication session is
+ # independent (ie. 'volume-mark' and 'xtime' are not
+ # propagated). This is because the slave's xtime is now
+ # stored on the master itself. 'volume-mark' just identifies
+ # that we are in a cascading setup and need to enable
+ # 'geo-replication.ignore-pid-check' option.
+ volinfo_sys = self.volinfo_hook()
+ self.volinfo = volinfo_sys[self.KNAT]
+ inter_master = volinfo_sys[self.KFGN]
+ logging.debug("%s master with volume id %s ..." %
+ (inter_master and "intermediate" or "primary",
+ self.uuid))
+ rconf.volume_id = self.uuid
+ if self.volinfo:
+ if self.volinfo['retval']:
+ logging.warn(lf("master cluster's info may not be valid",
+ error=self.volinfo['retval']))
+ else:
+ raise GsyncdError("master volinfo unavailable")
+ self.lastreport['time'] = time.time()
+
+ t0 = time.time()
+ crawl = self.should_crawl()
+ while not self.terminate:
+ if self.start:
+ logging.debug("... crawl #%d done, took %.6f seconds" %
+ (self.crawls, time.time() - self.start))
+ self.start = time.time()
+ should_display_info = self.start - self.lastreport['time'] >= 60
+ if should_display_info:
+ logging.debug("%d crawls, %d turns",
+ self.crawls - self.lastreport['crawls'],
+ self.turns - self.lastreport['turns'])
+ self.lastreport.update(crawls=self.crawls,
+ turns=self.turns,
+ time=self.start)
+ t1 = time.time()
+ if int(t1 - t0) >= gconf.get("replica-failover-interval"):
+ crawl = self.should_crawl()
+ t0 = t1
+ self.update_worker_remote_node()
+ if not crawl:
+ self.status.set_passive()
+ # bring up _this_ brick to the cluster stime
+ # which is min of cluster (but max of the replicas)
+ brick_stime = self.xtime('.', self.slave)
+ cluster_stime = self.master.server.aggregated.stime_mnt(
+ '.', '.'.join([str(self.uuid), rconf.args.slave_id]))
+ logging.debug(lf("Crawl info",
+ cluster_stime=cluster_stime,
+ brick_stime=brick_stime))
+
+ if not isinstance(cluster_stime, int):
+ if brick_stime < cluster_stime:
+ self.slave.server.set_stime(
+ self.FLAT_DIR_HIERARCHY, self.uuid, cluster_stime)
+ self.upd_stime(cluster_stime)
+ # Purge all changelogs available in processing dir
+ # less than cluster_stime
+ proc_dir = os.path.join(self.tempdir,
+ ".processing")
+
+ if os.path.exists(proc_dir):
+ to_purge = [f for f in os.listdir(proc_dir)
+ if (f.startswith("CHANGELOG.") and
+ int(f.split('.')[-1]) <
+ cluster_stime[0])]
+ for f in to_purge:
+ os.remove(os.path.join(proc_dir, f))
+
+ time.sleep(5)
+ continue
+
+ self.status.set_active()
+ self.crawl()
+
+ if oneshot:
+ return
+ time.sleep(self.sleep_interval)
+
+ @staticmethod
+ def humantime(*tpair):
+ """format xtime-like (sec, nsec) pair to human readable format"""
+ ts = datetime.fromtimestamp(float('.'.join(str(n) for n in tpair))).\
+ strftime("%Y-%m-%d %H:%M:%S")
+ if len(tpair) > 1:
+ ts += '.' + str(tpair[1])
+ return ts
+
+ def _crawl_time_format(self, crawl_time):
+ # Ex: 5 years, 4 days, 20:23:10
+ years, days = divmod(crawl_time.days, 365.25)
+ years = int(years)
+ days = int(days)
+
+ date = ""
+ m, s = divmod(crawl_time.seconds, 60)
+ h, m = divmod(m, 60)
+
+ if years != 0:
+ date += "%s %s " % (years, "year" if years == 1 else "years")
+ if days != 0:
+ date += "%s %s " % (days, "day" if days == 1 else "days")
+
+ date += "%s:%s:%s" % (string.zfill(h, 2),
+ string.zfill(m, 2), string.zfill(s, 2))
+ return date
+
+ def add_job(self, path, label, job, *a, **kw):
+ """insert @job function to job table at @path with @label"""
+ if self.jobtab.get(path) is None:
+ self.jobtab[path] = []
+ self.jobtab[path].append((label, a, lambda: job(*a, **kw)))
+
+ def add_failjob(self, path, label):
+ """invoke .add_job with a job that does nothing just fails"""
+ logging.debug('salvaged: ' + label)
+ self.add_job(path, label, lambda: False)
+
+ def wait(self, path, *args):
+ """perform jobs registered for @path
+
+ Reset jobtab entry for @path,
+ determine success as the conjunction of
+ success of all the jobs. In case of
+ success, call .sendmark on @path
+ """
+ jobs = self.jobtab.pop(path, [])
+ succeed = True
+ for j in jobs:
+ ret = j[-1]()
+ if not ret:
+ succeed = False
+ if succeed and not args[0] is None:
+ self.sendmark(path, *args)
+ return succeed
+
+ def sendmark(self, path, mark, adct=None):
+ """update slave side xtime for @path to master side xtime
+
+ also can send a setattr payload (see Server.setattr).
+ """
+ if adct:
+ self.slave.server.setattr(path, adct)
+ self.set_slave_xtime(path, mark)
+
+
+class XCrawlMetadata(object):
+ def __init__(self, st_uid, st_gid, st_mode, st_atime, st_mtime):
+ self.st_uid = int(st_uid)
+ self.st_gid = int(st_gid)
+ self.st_mode = int(st_mode)
+ self.st_atime = float(st_atime)
+ self.st_mtime = float(st_mtime)
+
+
+class GMasterChangelogMixin(GMasterCommon):
+
+ """ changelog based change detection and syncing """
+
+ # index for change type and entry
+ IDX_START = 0
+ IDX_END = 2
+ UNLINK_ENTRY = 2
+
+ POS_GFID = 0
+ POS_TYPE = 1
+ POS_ENTRY1 = -1
+
+ TYPE_META = "M "
+ TYPE_GFID = "D "
+ TYPE_ENTRY = "E "
+
+ MAX_EF_RETRIES = 10
+ MAX_OE_RETRIES = 10
+
+ # flat directory hierarchy for gfid based access
+ FLAT_DIR_HIERARCHY = '.'
+
+ CHANGELOG_CONN_RETRIES = 5
+
+ def init_fop_batch_stats(self):
+ self.batch_stats = {
+ "CREATE": 0,
+ "MKNOD": 0,
+ "UNLINK": 0,
+ "MKDIR": 0,
+ "RMDIR": 0,
+ "LINK": 0,
+ "SYMLINK": 0,
+ "RENAME": 0,
+ "SETATTR": 0,
+ "SETXATTR": 0,
+ "XATTROP": 0,
+ "DATA": 0,
+ "ENTRY_SYNC_TIME": 0,
+ "META_SYNC_TIME": 0,
+ "DATA_START_TIME": 0
+ }
+
+ def update_fop_batch_stats(self, ty):
+ if ty in ['FSETXATTR']:
+ ty = 'SETXATTR'
+ self.batch_stats[ty] = self.batch_stats.get(ty, 0) + 1
+
+ def archive_and_purge_changelogs(self, changelogs):
+ # Creates tar file instead of tar.gz, since changelogs will
+ # be appended to existing tar. archive name is
+ # archive_<YEAR><MONTH>.tar
+ archive_name = "archive_%s.tar" % datetime.today().strftime(
+ gconf.get("changelog-archive-format"))
+
+ try:
+ tar = tarfile.open(os.path.join(self.processed_changelogs_dir,
+ archive_name),
+ "a")
+ except tarfile.ReadError:
+ tar = tarfile.open(os.path.join(self.processed_changelogs_dir,
+ archive_name),
+ "w")
+
+ for f in changelogs:
+ try:
+ f = os.path.basename(f)
+ tar.add(os.path.join(self.processed_changelogs_dir, f),
+ arcname=os.path.basename(f))
+ except:
+ exc = sys.exc_info()[1]
+ if ((isinstance(exc, OSError) or
+ isinstance(exc, IOError)) and exc.errno == ENOENT):
+ continue
+ else:
+ tar.close()
+ raise
+ tar.close()
+
+ for f in changelogs:
+ try:
+ f = os.path.basename(f)
+ os.remove(os.path.join(self.processed_changelogs_dir, f))
+ except OSError as e:
+ if e.errno == errno.ENOENT:
+ continue
+ else:
+ raise
+
+ def setup_working_dir(self):
+ workdir = os.path.join(gconf.get("working-dir"),
+ escape(rconf.args.local_path))
+ logging.debug('changelog working dir %s' % workdir)
+ return workdir
+
+ def log_failures(self, failures, entry_key, gfid_prefix, log_prefix):
+ num_failures = 0
+ for failure in failures:
+ st = lstat(os.path.join(gfid_prefix, failure[0][entry_key]))
+ if not isinstance(st, int):
+ num_failures += 1
+ logging.error(lf('%s FAILED' % log_prefix,
+ data=failure))
+ if failure[0]['op'] == 'MKDIR':
+ raise GsyncdError("The above directory failed to sync."
+ " Please fix it to proceed further.")
+
+ self.status.inc_value("failures", num_failures)
+
+ def fix_possible_entry_failures(self, failures, retry_count, entries):
+ pfx = gauxpfx()
+ fix_entry_ops = []
+ failures1 = []
+ remove_gfids = set()
+ for failure in failures:
+ if failure[2]['name_mismatch']:
+ pbname = failure[2]['slave_entry']
+ elif failure[2]['dst']:
+ pbname = failure[0]['entry1']
+ else:
+ pbname = failure[0]['entry']
+
+ op = failure[0]['op']
+ # name exists but gfid is different
+ if failure[2]['gfid_mismatch'] or failure[2]['name_mismatch']:
+ slave_gfid = failure[2]['slave_gfid']
+ st = lstat(os.path.join(pfx, slave_gfid))
+ # Takes care of scenarios with no hardlinks
+ if isinstance(st, int) and st == ENOENT:
+ logging.debug(lf('Entry not present on master. Fixing gfid '
+ 'mismatch in slave. Deleting the entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ # Add deletion to fix_entry_ops list
+ if failure[2]['slave_isdir']:
+ fix_entry_ops.append(
+ edct('RMDIR',
+ gfid=failure[2]['slave_gfid'],
+ entry=pbname))
+ else:
+ fix_entry_ops.append(
+ edct('UNLINK',
+ gfid=failure[2]['slave_gfid'],
+ entry=pbname))
+ remove_gfids.add(slave_gfid)
+ if op in ['RENAME']:
+ # If renamed gfid doesn't exists on master, remove
+ # rename entry and unlink src on slave
+ st = lstat(os.path.join(pfx, failure[0]['gfid']))
+ if isinstance(st, int) and st == ENOENT:
+ logging.debug("Unlink source %s" % repr(failure))
+ remove_gfids.add(failure[0]['gfid'])
+ fix_entry_ops.append(
+ edct('UNLINK',
+ gfid=failure[0]['gfid'],
+ entry=failure[0]['entry']))
+ # Takes care of scenarios of hardlinks/renames on master
+ elif not isinstance(st, int):
+ if matching_disk_gfid(slave_gfid, pbname):
+ # Safe to ignore the failure as master contains same
+ # file with same gfid. Remove entry from entries list
+ logging.debug(lf('Fixing gfid mismatch in slave. '
+ ' Safe to ignore, take out entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ remove_gfids.add(failure[0]['gfid'])
+ if op == 'RENAME':
+ fix_entry_ops.append(
+ edct('UNLINK',
+ gfid=failure[0]['gfid'],
+ entry=failure[0]['entry']))
+ # The file exists on master but with different name.
+ # Probably renamed and got missed during xsync crawl.
+ elif failure[2]['slave_isdir']:
+ realpath = os.readlink(os.path.join(
+ rconf.args.local_path,
+ ".glusterfs",
+ slave_gfid[0:2],
+ slave_gfid[2:4],
+ slave_gfid))
+ dst_entry = os.path.join(pfx, realpath.split('/')[-2],
+ realpath.split('/')[-1])
+ src_entry = pbname
+ logging.debug(lf('Fixing dir name/gfid mismatch in '
+ 'slave', retry_count=retry_count,
+ entry=repr(failure)))
+ if src_entry == dst_entry:
+ # Safe to ignore the failure as master contains
+ # same directory as in slave with same gfid.
+ # Remove the failure entry from entries list
+ logging.debug(lf('Fixing dir name/gfid mismatch'
+ ' in slave. Safe to ignore, '
+ 'take out entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ try:
+ entries.remove(failure[0])
+ except ValueError:
+ pass
+ else:
+ rename_dict = edct('RENAME', gfid=slave_gfid,
+ entry=src_entry,
+ entry1=dst_entry, stat=st,
+ link=None)
+ logging.debug(lf('Fixing dir name/gfid mismatch'
+ ' in slave. Renaming',
+ retry_count=retry_count,
+ entry=repr(rename_dict)))
+ fix_entry_ops.append(rename_dict)
+ else:
+ # A hardlink file exists with different name or
+ # renamed file exists and we are sure from
+ # matching_disk_gfid check that the entry doesn't
+ # exist with same gfid so we can safely delete on slave
+ logging.debug(lf('Fixing file gfid mismatch in slave. '
+ 'Hardlink/Rename Case. Deleting entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ fix_entry_ops.append(
+ edct('UNLINK',
+ gfid=failure[2]['slave_gfid'],
+ entry=pbname))
+ elif failure[1] == ENOENT:
+ if op in ['RENAME']:
+ pbname = failure[0]['entry1']
+ else:
+ pbname = failure[0]['entry']
+
+ pargfid = pbname.split('/')[1]
+ st = lstat(os.path.join(pfx, pargfid))
+ # Safe to ignore the failure as master doesn't contain
+ # parent directory.
+ if isinstance(st, int):
+ logging.debug(lf('Fixing ENOENT error in slave. Parent '
+ 'does not exist on master. Safe to '
+ 'ignore, take out entry',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ try:
+ entries.remove(failure[0])
+ except ValueError:
+ pass
+ else:
+ logging.debug(lf('Fixing ENOENT error in slave. Create '
+ 'parent directory on slave.',
+ retry_count=retry_count,
+ entry=repr(failure)))
+ realpath = os.readlink(os.path.join(rconf.args.local_path,
+ ".glusterfs",
+ pargfid[0:2],
+ pargfid[2:4],
+ pargfid))
+ dir_entry = os.path.join(pfx, realpath.split('/')[-2],
+ realpath.split('/')[-1])
+ fix_entry_ops.append(
+ edct('MKDIR', gfid=pargfid, entry=dir_entry,
+ mode=st.st_mode, uid=st.st_uid, gid=st.st_gid))
+
+ logging.debug("remove_gfids: %s" % repr(remove_gfids))
+ if remove_gfids:
+ for e in entries:
+ if e['op'] in ['MKDIR', 'MKNOD', 'CREATE', 'RENAME'] \
+ and e['gfid'] in remove_gfids:
+ logging.debug("Removed entry op from retrial list: entry: %s" % repr(e))
+ e['skip_entry'] = True
+
+ if fix_entry_ops:
+ # Process deletions of entries whose gfids are mismatched
+ failures1 = self.slave.server.entry_ops(fix_entry_ops)
+
+ return (failures1, fix_entry_ops)
+
+ def handle_entry_failures(self, failures, entries):
+ retries = 0
+ pending_failures = False
+ failures1 = []
+ failures2 = []
+ entry_ops1 = []
+ entry_ops2 = []
+
+ if failures:
+ pending_failures = True
+ failures1 = failures
+ entry_ops1 = entries
+
+ while pending_failures and retries < self.MAX_EF_RETRIES:
+ retries += 1
+ (failures2, entry_ops2) = self.fix_possible_entry_failures(
+ failures1, retries, entry_ops1)
+ if not failures2:
+ pending_failures = False
+ logging.info(lf('Successfully fixed entry ops with gfid '
+ 'mismatch', retry_count=retries))
+ else:
+ pending_failures = True
+ failures1 = failures2
+ entry_ops1 = entry_ops2
+
+ if pending_failures:
+ for failure in failures1:
+ logging.error("Failed to fix entry ops %s", repr(failure))
+
+ def process_change(self, change, done, retry):
+ pfx = gauxpfx()
+ clist = []
+ entries = []
+ meta_gfid = set()
+ datas = set()
+
+ change_ts = change.split(".")[-1]
+
+ # Ignore entry ops which are already processed in Changelog modes
+ ignore_entry_ops = False
+ entry_stime = None
+ data_stime = None
+ if self.name in ["live_changelog", "history_changelog"]:
+ entry_stime = self.get_entry_stime()
+ data_stime = self.get_data_stime()
+
+ if entry_stime is not None and data_stime is not None:
+ # if entry_stime is not None but data_stime > entry_stime
+ # This situation is caused by the stime update of Passive worker
+ # Consider data_stime in this case.
+ if data_stime[0] > entry_stime[0]:
+ entry_stime = data_stime
+
+ # Compare the entry_stime with changelog file suffix
+ # if changelog time is less than entry_stime then ignore
+ if int(change_ts) <= entry_stime[0]:
+ ignore_entry_ops = True
+
+ try:
+ f = open(change, "r")
+ clist = f.readlines()
+ f.close()
+ except IOError:
+ raise
+
+ for e in clist:
+ e = e.strip()
+ et = e[self.IDX_START:self.IDX_END] # entry type
+ ec = e[self.IDX_END:].split(' ') # rest of the bits
+
+ # skip ENTRY operation if hot tier brick
+ if self.name == 'live_changelog' or \
+ self.name == 'history_changelog':
+ if rconf.args.is_hottier and et == self.TYPE_ENTRY:
+ logging.debug(lf('skip ENTRY op if hot tier brick',
+ op=ec[self.POS_TYPE]))
+ continue
+
+ # Data and Meta operations are decided while parsing
+ # UNLINK/RMDIR/MKNOD except that case ignore all the other
+ # entry ops if ignore_entry_ops is True.
+ # UNLINK/RMDIR/MKNOD entry_ops are ignored in the end
+ if ignore_entry_ops and et == self.TYPE_ENTRY and \
+ ec[self.POS_TYPE] not in ["UNLINK", "RMDIR", "MKNOD"]:
+ continue
+
+ if et == self.TYPE_ENTRY:
+ # extract information according to the type of
+ # the entry operation. create(), mkdir() and mknod()
+ # have mode, uid, gid information in the changelog
+ # itself, so no need to stat()...
+ ty = ec[self.POS_TYPE]
+
+ self.update_fop_batch_stats(ec[self.POS_TYPE])
+
+ # PARGFID/BNAME
+ en = unescape_space_newline(
+ os.path.join(pfx, ec[self.POS_ENTRY1]))
+ # GFID of the entry
+ gfid = ec[self.POS_GFID]
+
+ if ty in ['UNLINK', 'RMDIR']:
+ # The index of PARGFID/BNAME for UNLINK, RMDIR
+ # is no more the last index. It varies based on
+ # changelog.capture-del-path is enabled or not.
+ en = unescape_space_newline(
+ os.path.join(pfx, ec[self.UNLINK_ENTRY]))
+
+ # Remove from DATA list, so that rsync will
+ # not fail
+ pt = os.path.join(pfx, ec[0])
+ st = lstat(pt)
+ if pt in datas and isinstance(st, int):
+ # file got unlinked, May be historical Changelog
+ datas.remove(pt)
+
+ if ty in ['RMDIR'] and not isinstance(st, int):
+ logging.info(lf('Ignoring rmdir. Directory present in '
+ 'master', gfid=gfid, pgfid_bname=en))
+ continue
+
+ if not gconf.get("ignore-deletes"):
+ if not ignore_entry_ops:
+ entries.append(edct(ty, gfid=gfid, entry=en))
+ elif ty in ['CREATE', 'MKDIR', 'MKNOD']:
+ # Special case: record mknod as link
+ if ty in ['MKNOD']:
+ mode = int(ec[2])
+ if mode & 0o1000:
+ # Avoid stat'ing the file as it
+ # may be deleted in the interim
+ st = FreeObject(st_mode=int(ec[2]),
+ st_uid=int(ec[3]),
+ st_gid=int(ec[4]),
+ st_atime=0,
+ st_mtime=0)
+
+ # So, it may be deleted, but still we are
+ # append LINK? Because, the file will be
+ # CREATED if source not exists.
+ entries.append(edct('LINK', stat=st, entry=en,
+ gfid=gfid))
+
+ # Here, we have the assumption that only
+ # tier-gfid.linkto causes this mknod. Add data
+ datas.add(os.path.join(pfx, ec[0]))
+ continue
+
+ # stat info. present in the changelog itself
+ entries.append(edct(ty, gfid=gfid, entry=en,
+ mode=int(ec[2]),
+ uid=int(ec[3]), gid=int(ec[4])))
+ elif ty == "RENAME":
+ go = os.path.join(pfx, gfid)
+ st = lstat(go)
+ if isinstance(st, int):
+ st = {}
+
+ rl = None
+ if st and stat.S_ISLNK(st.st_mode):
+ rl = errno_wrap(os.readlink, [en], [ENOENT],
+ [ESTALE, EINTR])
+ if isinstance(rl, int):
+ rl = None
+
+ e1 = unescape_space_newline(
+ os.path.join(pfx, ec[self.POS_ENTRY1 - 1]))
+ entries.append(edct(ty, gfid=gfid, entry=e1, entry1=en,
+ stat=st, link=rl))
+ # If src doesn't exist while doing rename, destination
+ # is created. If data is not followed by rename, this
+ # remains zero byte file on slave. Hence add data entry
+ # for renames
+ datas.add(os.path.join(pfx, gfid))
+ else:
+ # stat() to get mode and other information
+ if not matching_disk_gfid(gfid, en):
+ logging.debug(lf('Ignoring entry, purged in the '
+ 'interim', file=en, gfid=gfid))
+ continue
+
+ go = os.path.join(pfx, gfid)
+ st = lstat(go)
+ if isinstance(st, int):
+ logging.debug(lf('Ignoring entry, purged in the '
+ 'interim', file=en, gfid=gfid))
+ continue
+
+ if ty == 'LINK':
+ rl = None
+ if st and stat.S_ISLNK(st.st_mode):
+ rl = errno_wrap(os.readlink, [en], [ENOENT],
+ [ESTALE, EINTR])
+ if isinstance(rl, int):
+ rl = None
+ entries.append(edct(ty, stat=st, entry=en, gfid=gfid,
+ link=rl))
+ # If src doesn't exist while doing link, destination
+ # is created based on file type. If data is not
+ # followed by link, this remains zero byte file on
+ # slave. Hence add data entry for links
+ if rl is None:
+ datas.add(os.path.join(pfx, gfid))
+ elif ty == 'SYMLINK':
+ rl = errno_wrap(os.readlink, [en], [ENOENT],
+ [ESTALE, EINTR])
+ if isinstance(rl, int):
+ continue
+
+ entries.append(
+ edct(ty, stat=st, entry=en, gfid=gfid, link=rl))
+ else:
+ logging.warn(lf('ignoring op',
+ gfid=gfid,
+ type=ty))
+ elif et == self.TYPE_GFID:
+ # If self.unlinked_gfids is available, then that means it is
+ # retrying the changelog second time. Do not add the GFID's
+ # to rsync job if failed previously but unlinked in master
+ if self.unlinked_gfids and \
+ os.path.join(pfx, ec[0]) in self.unlinked_gfids:
+ logging.debug("ignoring data, since file purged interim")
+ else:
+ datas.add(os.path.join(pfx, ec[0]))
+ elif et == self.TYPE_META:
+ self.update_fop_batch_stats(ec[self.POS_TYPE])
+ if ec[1] == 'SETATTR': # only setattr's for now...
+ if len(ec) == 5:
+ # In xsync crawl, we already have stat data
+ # avoid doing stat again
+ meta_gfid.add((os.path.join(pfx, ec[0]),
+ XCrawlMetadata(st_uid=ec[2],
+ st_gid=ec[3],
+ st_mode=ec[4],
+ st_atime=ec[5],
+ st_mtime=ec[6])))
+ else:
+ meta_gfid.add((os.path.join(pfx, ec[0]), ))
+ elif ec[1] in ['SETXATTR', 'XATTROP', 'FXATTROP']:
+ # To sync xattr/acls use rsync/tar, --xattrs and --acls
+ # switch to rsync and tar
+ if not gconf.get("sync-method") == "tarssh" and \
+ (gconf.get("sync-xattrs") or gconf.get("sync-acls")):
+ datas.add(os.path.join(pfx, ec[0]))
+ else:
+ logging.warn(lf('got invalid fop type',
+ type=et))
+ logging.debug('entries: %s' % repr(entries))
+
+ # Increment counters for Status
+ self.files_in_batch += len(datas)
+ self.status.inc_value("data", len(datas))
+
+ self.batch_stats["DATA"] += self.files_in_batch - \
+ self.batch_stats["SETXATTR"] - \
+ self.batch_stats["XATTROP"]
+
+ entry_start_time = time.time()
+ # sync namespace
+ if entries and not ignore_entry_ops:
+ # Increment counters for Status
+ self.status.inc_value("entry", len(entries))
+
+ failures = self.slave.server.entry_ops(entries)
+
+ if gconf.get("gfid-conflict-resolution"):
+ count = 0
+ if failures:
+ logging.info(lf('Entry ops failed with gfid mismatch',
+ count=len(failures)))
+ while failures and count < self.MAX_OE_RETRIES:
+ count += 1
+ self.handle_entry_failures(failures, entries)
+ logging.info(lf('Retry original entries', count=count))
+ failures = self.slave.server.entry_ops(entries)
+ if not failures:
+ logging.info("Successfully fixed all entry ops with "
+ "gfid mismatch")
+ break
+
+ self.log_failures(failures, 'gfid', gauxpfx(), 'ENTRY')
+ self.status.dec_value("entry", len(entries))
+
+ # Update Entry stime in Brick Root only in case of Changelog mode
+ if self.name in ["live_changelog", "history_changelog"]:
+ entry_stime_to_update = (int(change_ts) - 1, 0)
+ self.upd_entry_stime(entry_stime_to_update)
+ self.status.set_field("last_synced_entry",
+ entry_stime_to_update[0])
+
+ self.batch_stats["ENTRY_SYNC_TIME"] += time.time() - entry_start_time
+
+ if ignore_entry_ops:
+ # Book keeping, to show in logs the range of Changelogs skipped
+ self.num_skipped_entry_changelogs += 1
+ if self.skipped_entry_changelogs_first is None:
+ self.skipped_entry_changelogs_first = change_ts
+
+ self.skipped_entry_changelogs_last = change_ts
+
+ meta_start_time = time.time()
+ # sync metadata
+ if meta_gfid:
+ meta_entries = []
+ for go in meta_gfid:
+ if len(go) > 1:
+ st = go[1]
+ else:
+ st = lstat(go[0])
+ if isinstance(st, int):
+ logging.debug(lf('file got purged in the interim',
+ file=go[0]))
+ continue
+ meta_entries.append(edct('META', go=go[0], stat=st))
+ if meta_entries:
+ self.status.inc_value("meta", len(meta_entries))
+ failures = self.slave.server.meta_ops(meta_entries)
+ self.log_failures(failures, 'go', '', 'META')
+ self.status.dec_value("meta", len(meta_entries))
+
+ self.batch_stats["META_SYNC_TIME"] += time.time() - meta_start_time
+
+ if self.batch_stats["DATA_START_TIME"] == 0:
+ self.batch_stats["DATA_START_TIME"] = time.time()
+
+ # sync data
+ if datas:
+ self.a_syncdata(datas)
+ self.datas_in_batch.update(datas)
+
+ def process(self, changes, done=1):
+ tries = 0
+ retry = False
+ self.unlinked_gfids = set()
+ self.files_in_batch = 0
+ self.datas_in_batch = set()
+ # Error log disabled till the last round
+ self.syncer.disable_errorlog()
+ self.skipped_entry_changelogs_first = None
+ self.skipped_entry_changelogs_last = None
+ self.num_skipped_entry_changelogs = 0
+ self.batch_start_time = time.time()
+ self.init_fop_batch_stats()
+
+ while True:
+ # first, fire all changelog transfers in parallel. entry and
+ # metadata are performed synchronously, therefore in serial.
+ # However at the end of each changelog, data is synchronized
+ # with syncdata_async() - which means it is serial w.r.t
+ # entries/metadata of that changelog but happens in parallel
+ # with data of other changelogs.
+
+ if retry:
+ if tries == (gconf.get("max-rsync-retries") - 1):
+ # Enable Error logging if it is last retry
+ self.syncer.enable_errorlog()
+
+ # Remove Unlinked GFIDs from Queue
+ for unlinked_gfid in self.unlinked_gfids:
+ if unlinked_gfid in self.datas_in_batch:
+ self.datas_in_batch.remove(unlinked_gfid)
+
+ # Retry only Sync. Do not retry entry ops
+ if self.datas_in_batch:
+ self.a_syncdata(self.datas_in_batch)
+ else:
+ for change in changes:
+ logging.debug(lf('processing change',
+ changelog=change))
+ self.process_change(change, done, retry)
+ if not retry:
+ # number of changelogs processed in the batch
+ self.turns += 1
+
+ # Now we wait for all the data transfers fired off in the above
+ # step to complete. Note that this is not ideal either. Ideally
+ # we want to trigger the entry/meta-data transfer of the next
+ # batch while waiting for the data transfer of the current batch
+ # to finish.
+
+ # Note that the reason to wait for the data transfer (vs doing it
+ # completely in the background and call the changelog_done()
+ # asynchronously) is because this waiting acts as a "backpressure"
+ # and prevents a spiraling increase of wait stubs from consuming
+ # unbounded memory and resources.
+
+ # update the slave's time with the timestamp of the _last_
+ # changelog file time suffix. Since, the changelog prefix time
+ # is the time when the changelog was rolled over, introduce a
+ # tolerance of 1 second to counter the small delta b/w the
+ # marker update and gettimeofday().
+ # NOTE: this is only for changelog mode, not xsync.
+
+ # @change is the last changelog (therefore max time for this batch)
+ if self.syncdata_wait():
+ self.unlinked_gfids = set()
+ if done:
+ xtl = (int(change.split('.')[-1]) - 1, 0)
+ self.upd_stime(xtl)
+ list(map(self.changelog_done_func, changes))
+ self.archive_and_purge_changelogs(changes)
+
+ # Reset Data counter after sync
+ self.status.dec_value("data", self.files_in_batch)
+ self.files_in_batch = 0
+ self.datas_in_batch = set()
+ break
+
+ # We do not know which changelog transfer failed, retry everything.
+ retry = True
+ tries += 1
+ if tries == gconf.get("max-rsync-retries"):
+ logging.error(lf('changelogs could not be processed '
+ 'completely - moving on...',
+ files=list(map(os.path.basename, changes))))
+
+ # Reset data counter on failure
+ self.status.dec_value("data", self.files_in_batch)
+ self.files_in_batch = 0
+ self.datas_in_batch = set()
+
+ if done:
+ xtl = (int(change.split('.')[-1]) - 1, 0)
+ self.upd_stime(xtl)
+ list(map(self.changelog_done_func, changes))
+ self.archive_and_purge_changelogs(changes)
+ break
+ # it's either entry_ops() or Rsync that failed to do it's
+ # job. Mostly it's entry_ops() [which currently has a problem
+ # of failing to create an entry but failing to return an errno]
+ # Therefore we do not know if it's either Rsync or the freaking
+ # entry_ops() that failed... so we retry the _whole_ changelog
+ # again.
+ # TODO: remove entry retries when it's gets fixed.
+ logging.warn(lf('incomplete sync, retrying changelogs',
+ files=list(map(os.path.basename, changes))))
+
+ # Reset the Data counter before Retry
+ self.status.dec_value("data", self.files_in_batch)
+ self.files_in_batch = 0
+ self.init_fop_batch_stats()
+ time.sleep(0.5)
+
+ # Log the Skipped Entry ops range if any
+ if self.skipped_entry_changelogs_first is not None and \
+ self.skipped_entry_changelogs_last is not None:
+ logging.info(lf("Skipping already processed entry ops",
+ from_changelog=self.skipped_entry_changelogs_first,
+ to_changelog=self.skipped_entry_changelogs_last,
+ num_changelogs=self.num_skipped_entry_changelogs))
+
+ # Log Current batch details
+ if changes:
+ logging.info(
+ lf("Entry Time Taken",
+ UNL=self.batch_stats["UNLINK"],
+ RMD=self.batch_stats["RMDIR"],
+ CRE=self.batch_stats["CREATE"],
+ MKN=self.batch_stats["MKNOD"],
+ MKD=self.batch_stats["MKDIR"],
+ REN=self.batch_stats["RENAME"],
+ LIN=self.batch_stats["LINK"],
+ SYM=self.batch_stats["SYMLINK"],
+ duration="%.4f" % self.batch_stats["ENTRY_SYNC_TIME"]))
+
+ logging.info(
+ lf("Data/Metadata Time Taken",
+ SETA=self.batch_stats["SETATTR"],
+ meta_duration="%.4f" % self.batch_stats["META_SYNC_TIME"],
+ SETX=self.batch_stats["SETXATTR"],
+ XATT=self.batch_stats["XATTROP"],
+ DATA=self.batch_stats["DATA"],
+ data_duration="%.4f" % (
+ time.time() - self.batch_stats["DATA_START_TIME"])))
+
+ logging.info(
+ lf("Batch Completed",
+ mode=self.name,
+ duration="%.4f" % (time.time() - self.batch_start_time),
+ changelog_start=changes[0].split(".")[-1],
+ changelog_end=changes[-1].split(".")[-1],
+ num_changelogs=len(changes),
+ stime=self.get_data_stime(),
+ entry_stime=self.get_entry_stime()))
+
+ def upd_entry_stime(self, stime):
+ self.slave.server.set_entry_stime(self.FLAT_DIR_HIERARCHY,
+ self.uuid,
+ stime)
+
+ def upd_stime(self, stime, path=None):
+ if not path:
+ path = self.FLAT_DIR_HIERARCHY
+ if not stime == URXTIME:
+ self.sendmark(path, stime)
+
+ # Update last_synced_time in status file based on stime
+ # only update stime if stime xattr set to Brick root
+ if path == self.FLAT_DIR_HIERARCHY:
+ chkpt_time = gconf.getr("checkpoint")
+ checkpoint_time = 0
+ if chkpt_time is not None:
+ checkpoint_time = int(chkpt_time)
+
+ self.status.set_last_synced(stime, checkpoint_time)
+
+ def update_worker_remote_node(self):
+ node = rconf.args.resource_remote
+ node_data = node.split("@")
+ node = node_data[-1]
+ remote_node_ip, _ = host_brick_split(node)
+ self.status.set_slave_node(remote_node_ip)
+
+ def changelogs_batch_process(self, changes):
+ changelogs_batches = []
+ current_size = 0
+ for c in changes:
+ si = os.lstat(c).st_size
+ if (si + current_size) > gconf.get("changelog-batch-size"):
+ # Create new batch if single Changelog file greater than
+ # Max Size! or current batch size exceeds Max size
+ changelogs_batches.append([c])
+ current_size = si
+ else:
+ # Append to last batch, if No batches available Create one
+ current_size += si
+ if not changelogs_batches:
+ changelogs_batches.append([c])
+ else:
+ changelogs_batches[-1].append(c)
+
+ for batch in changelogs_batches:
+ logging.debug(lf('processing changes',
+ batch=batch))
+ self.process(batch)
+
+ def crawl(self):
+ self.status.set_worker_crawl_status("Changelog Crawl")
+ changes = []
+ # get stime (from the brick) and purge changelogs
+ # that are _historical_ to that time.
+ data_stime = self.get_data_stime()
+
+ libgfchangelog.scan()
+ self.crawls += 1
+ changes = libgfchangelog.getchanges()
+ if changes:
+ if data_stime:
+ logging.info(lf("slave's time",
+ stime=data_stime))
+ processed = [x for x in changes
+ if int(x.split('.')[-1]) < data_stime[0]]
+ for pr in processed:
+ logging.debug(
+ lf('skipping already processed change',
+ changelog=os.path.basename(pr)))
+ self.changelog_done_func(pr)
+ changes.remove(pr)
+ self.archive_and_purge_changelogs(processed)
+
+ self.changelogs_batch_process(changes)
+
+ def register(self, register_time, status):
+ self.sleep_interval = gconf.get("change-interval")
+ self.changelog_done_func = libgfchangelog.done
+ self.tempdir = self.setup_working_dir()
+ self.processed_changelogs_dir = os.path.join(self.tempdir,
+ ".processed")
+ self.name = "live_changelog"
+ self.status = status
+
+
+class GMasterChangeloghistoryMixin(GMasterChangelogMixin):
+ def register(self, register_time, status):
+ self.changelog_register_time = register_time
+ self.history_crawl_start_time = register_time
+ self.changelog_done_func = libgfchangelog.history_done
+ self.history_turns = 0
+ self.tempdir = self.setup_working_dir()
+ self.processed_changelogs_dir = os.path.join(self.tempdir,
+ ".history/.processed")
+ self.name = "history_changelog"
+ self.status = status
+
+ def crawl(self):
+ self.history_turns += 1
+ self.status.set_worker_crawl_status("History Crawl")
+ data_stime = self.get_data_stime()
+
+ end_time = int(time.time())
+
+ #as start of historical crawl marks Geo-rep worker restart
+ if gconf.get("ignore-deletes"):
+ logging.info(lf('ignore-deletes config option is set',
+ stime=data_stime))
+
+ logging.info(lf('starting history crawl',
+ turns=self.history_turns,
+ stime=data_stime,
+ etime=end_time,
+ entry_stime=self.get_entry_stime()))
+
+ if not data_stime or data_stime == URXTIME:
+ raise NoStimeAvailable()
+
+ # Changelogs backend path is hardcoded as
+ # <BRICK_PATH>/.glusterfs/changelogs, if user configured to different
+ # location then consuming history will not work(Known issue as of now)
+ changelog_path = os.path.join(rconf.args.local_path,
+ ".glusterfs/changelogs")
+ ret, actual_end = libgfchangelog.history_changelog(
+ changelog_path,
+ data_stime[0],
+ end_time,
+ gconf.get("sync-jobs"))
+
+ # scan followed by getchanges till scan returns zero.
+ # history_scan() is blocking call, till it gets the number
+ # of changelogs to process. Returns zero when no changelogs
+ # to be processed. returns positive value as number of changelogs
+ # to be processed, which will be fetched using
+ # history_getchanges()
+ while libgfchangelog.history_scan() > 0:
+ self.crawls += 1
+
+ changes = libgfchangelog.history_getchanges()
+ if changes:
+ if data_stime:
+ logging.info(lf("slave's time",
+ stime=data_stime))
+ processed = [x for x in changes
+ if int(x.split('.')[-1]) < data_stime[0]]
+ for pr in processed:
+ logging.debug(lf('skipping already processed change',
+ changelog=os.path.basename(pr)))
+ self.changelog_done_func(pr)
+ changes.remove(pr)
+
+ self.changelogs_batch_process(changes)
+
+ history_turn_time = int(time.time()) - self.history_crawl_start_time
+
+ logging.info(lf('finished history crawl',
+ endtime=actual_end,
+ stime=self.get_data_stime(),
+ entry_stime=self.get_entry_stime()))
+
+ # If TS returned from history_changelog is < register_time
+ # then FS crawl may be required, since history is only available
+ # till TS returned from history_changelog
+ if actual_end < self.changelog_register_time:
+ if self.history_turns < 2:
+ sleep_time = 1
+ if history_turn_time < CHANGELOG_ROLLOVER_TIME:
+ sleep_time = CHANGELOG_ROLLOVER_TIME - history_turn_time
+ time.sleep(sleep_time)
+ self.history_crawl_start_time = int(time.time())
+ self.crawl()
+ else:
+ # This exception will be caught in resource.py and
+ # fallback to xsync for the small gap.
+ raise PartialHistoryAvailable(str(actual_end))
+
+
+class GMasterXsyncMixin(GMasterChangelogMixin):
+
+ """
+ This crawl needs to be xtime based (as of now
+ it's not. this is because we generate CHANGELOG
+ file during each crawl which is then processed
+ by process_change()).
+ For now it's used as a one-shot initial sync
+ mechanism and only syncs directories, regular
+ files, hardlinks and symlinks.
+ """
+
+ XSYNC_MAX_ENTRIES = 1 << 13
+
+ def register(self, register_time=None, status=None):
+ self.status = status
+ self.counter = 0
+ self.comlist = []
+ self.stimes = []
+ self.sleep_interval = 60
+ self.tempdir = self.setup_working_dir()
+ logging.info(lf('Working dir',
+ path=self.tempdir))
+ self.tempdir = os.path.join(self.tempdir, 'xsync')
+ self.processed_changelogs_dir = self.tempdir
+ self.name = "xsync"
+ try:
+ os.makedirs(self.tempdir)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == EEXIST and os.path.isdir(self.tempdir):
+ pass
+ else:
+ raise
+ # Purge stale unprocessed xsync changelogs
+ for f in os.listdir(self.tempdir):
+ if f.startswith("XSYNC-CHANGELOG"):
+ os.remove(os.path.join(self.tempdir, f))
+
+
+ def crawl(self):
+ """
+ event dispatcher thread
+
+ this thread dispatches either changelog or synchronizes stime.
+ additionally terminates itself on receiving a 'finale' event
+ """
+ def Xsyncer():
+ self.Xcrawl()
+ t = Thread(target=Xsyncer)
+ t.start()
+ logging.info(lf('starting hybrid crawl',
+ stime=self.get_data_stime()))
+ self.status.set_worker_crawl_status("Hybrid Crawl")
+ while True:
+ try:
+ item = self.comlist.pop(0)
+ if item[0] == 'finale':
+ logging.info(lf('finished hybrid crawl',
+ stime=self.get_data_stime()))
+ break
+ elif item[0] == 'xsync':
+ logging.info(lf('processing xsync changelog',
+ path=item[1]))
+ self.process([item[1]], 0)
+ self.archive_and_purge_changelogs([item[1]])
+ elif item[0] == 'stime':
+ logging.debug(lf('setting slave time',
+ time=item[1]))
+ self.upd_stime(item[1][1], item[1][0])
+ else:
+ logging.warn(lf('unknown tuple in comlist',
+ entry=item))
+ except IndexError:
+ time.sleep(1)
+
+ def write_entry_change(self, prefix, data=[]):
+ if not getattr(self, "fh", None):
+ self.open()
+
+ self.fh.write("%s %s\n" % (prefix, ' '.join(data)))
+
+ def open(self):
+ try:
+ self.xsync_change = os.path.join(
+ self.tempdir, 'XSYNC-CHANGELOG.' + str(int(time.time())))
+ self.fh = open(self.xsync_change, 'w')
+ except IOError:
+ raise
+
+ def close(self):
+ if getattr(self, "fh", None):
+ self.fh.flush()
+ os.fsync(self.fh.fileno())
+ self.fh.close()
+ self.fh = None
+
+ def fname(self):
+ return self.xsync_change
+
+ def put(self, mark, item):
+ self.comlist.append((mark, item))
+
+ def sync_xsync(self, last):
+ """schedule a processing of changelog"""
+ self.close()
+ if self.counter > 0:
+ self.put('xsync', self.fname())
+ self.counter = 0
+ if not last:
+ time.sleep(1) # make sure changelogs are 1 second apart
+
+ def sync_stime(self, stime=None, last=False):
+ """schedule a stime synchronization"""
+ if stime:
+ self.put('stime', stime)
+ if last:
+ self.put('finale', None)
+
+ def sync_done(self, stime=[], last=False):
+ self.sync_xsync(last)
+ if stime:
+ # Send last as True only for last stime entry
+ for st in stime[:-1]:
+ self.sync_stime(st, False)
+
+ if stime and stime[-1]:
+ self.sync_stime(stime[-1], last)
+
+ def is_sticky(self, path, mo):
+ """check for DHTs linkto sticky bit file"""
+ sticky = False
+ if mo & 0o1000:
+ sticky = self.master.server.linkto_check(path)
+ return sticky
+
+ def Xcrawl(self, path='.', xtr_root=None):
+ """
+ generate a CHANGELOG file consumable by process_change.
+
+ slave's xtime (stime) is _cached_ for comparisons across
+ the filesystem tree, but set after directory synchronization.
+ """
+ if path == '.':
+ self.crawls += 1
+ if not xtr_root:
+ # get the root stime and use it for all comparisons
+ xtr_root = self.xtime('.', self.slave)
+ if isinstance(xtr_root, int):
+ if xtr_root != ENOENT:
+ logging.warn(lf("slave cluster not returning the "
+ "xtime for root",
+ error=xtr_root))
+ xtr_root = self.minus_infinity
+ xtl = self.xtime(path)
+ if isinstance(xtl, int):
+ logging.warn("master cluster's xtime not found")
+ xtr = self.xtime(path, self.slave)
+ if isinstance(xtr, int):
+ if xtr != ENOENT:
+ logging.warn(lf("slave cluster not returning the "
+ "xtime for dir",
+ path=path,
+ error=xtr))
+ xtr = self.minus_infinity
+ xtr = max(xtr, xtr_root)
+ zero_zero = (0, 0)
+ if xtr_root == zero_zero:
+ xtr = self.minus_infinity
+ if not self.need_sync(path, xtl, xtr):
+ if path == '.':
+ self.sync_done([(path, xtl)], True)
+ return
+ self.xtime_reversion_hook(path, xtl, xtr)
+ logging.debug("entering " + path)
+ dem = self.master.server.entries(path)
+ pargfid = self.master.server.gfid(path)
+ if isinstance(pargfid, int):
+ logging.warn(lf('skipping directory',
+ path=path))
+ for e in dem:
+ bname = e
+ e = os.path.join(path, e)
+ xte = self.xtime(e)
+ if isinstance(xte, int):
+ logging.warn(lf("irregular xtime",
+ path=e,
+ error=errno.errorcode[xte]))
+ continue
+ if not self.need_sync(e, xte, xtr):
+ continue
+ st = self.master.server.lstat(e)
+ if isinstance(st, int):
+ logging.warn(lf('got purged in the interim',
+ path=e))
+ continue
+ if self.is_sticky(e, st.st_mode):
+ logging.debug(lf('ignoring sticky bit file',
+ path=e))
+ continue
+ gfid = self.master.server.gfid(e)
+ if isinstance(gfid, int):
+ logging.warn(lf('skipping entry',
+ path=e))
+ continue
+ mo = st.st_mode
+ self.counter += 1 if ((stat.S_ISDIR(mo) or
+ stat.S_ISLNK(mo) or
+ stat.S_ISREG(mo))) else 0
+ if self.counter == self.XSYNC_MAX_ENTRIES:
+ self.sync_done(self.stimes, False)
+ self.stimes = []
+ if stat.S_ISDIR(mo):
+ self.write_entry_change("E",
+ [gfid, 'MKDIR', str(mo),
+ str(0), str(0), escape_space_newline(
+ os.path.join(pargfid, bname))])
+ self.write_entry_change("M", [gfid, "SETATTR", str(st.st_uid),
+ str(st.st_gid), str(st.st_mode),
+ str(st.st_atime),
+ str(st.st_mtime)])
+ self.Xcrawl(e, xtr_root)
+ stime_to_update = xte
+ # Live Changelog Start time indicates that from that time
+ # onwards Live changelogs are available. If we update stime
+ # greater than live_changelog_start time then Geo-rep will
+ # skip those changelogs as already processed. But Xsync
+ # actually failed to sync the deletes and Renames. Update
+ # stime as min(Live_changelogs_time, Actual_stime) When it
+ # switches to Changelog mode, it syncs Deletes and Renames.
+ if self.live_changelog_start_time:
+ stime_to_update = min(self.live_changelog_start_time, xte)
+ self.stimes.append((e, stime_to_update))
+ elif stat.S_ISLNK(mo):
+ self.write_entry_change(
+ "E", [gfid, 'SYMLINK', escape_space_newline(
+ os.path.join(pargfid, bname))])
+ elif stat.S_ISREG(mo):
+ nlink = st.st_nlink
+ nlink -= 1 # fixup backend stat link count
+ # if a file has a hardlink, create a Changelog entry as
+ # 'LINK' so the slave side will decide if to create the
+ # new entry, or to create link.
+ if nlink == 1:
+ self.write_entry_change("E",
+ [gfid, 'MKNOD', str(mo),
+ str(0), str(0),
+ escape_space_newline(
+ os.path.join(
+ pargfid, bname))])
+ else:
+ self.write_entry_change(
+ "E", [gfid, 'LINK', escape_space_newline(
+ os.path.join(pargfid, bname))])
+ self.write_entry_change("D", [gfid])
+ if path == '.':
+ stime_to_update = xtl
+ if self.live_changelog_start_time:
+ stime_to_update = min(self.live_changelog_start_time, xtl)
+ self.stimes.append((path, stime_to_update))
+ self.sync_done(self.stimes, True)
+
+
+class BoxClosedErr(Exception):
+ pass
+
+
+class PostBox(list):
+
+ """synchronized collection for storing things thought of as "requests" """
+
+ def __init__(self, *a):
+ list.__init__(self, *a)
+ # too bad Python stdlib does not have read/write locks...
+ # it would suffivce to grab the lock in .append as reader, in .close as
+ # writer
+ self.lever = Condition()
+ self.open = True
+ self.done = False
+
+ def wait(self):
+ """wait on requests to be processed"""
+ self.lever.acquire()
+ if not self.done:
+ self.lever.wait()
+ self.lever.release()
+ return self.result
+
+ def wakeup(self, data):
+ """wake up requestors with the result"""
+ self.result = data
+ self.lever.acquire()
+ self.done = True
+ self.lever.notifyAll()
+ self.lever.release()
+
+ def append(self, e):
+ """post a request"""
+ self.lever.acquire()
+ if not self.open:
+ raise BoxClosedErr
+ list.append(self, e)
+ self.lever.release()
+
+ def close(self):
+ """prohibit the posting of further requests"""
+ self.lever.acquire()
+ self.open = False
+ self.lever.release()
+
+
+class Syncer(object):
+
+ """a staged queue to relay rsync requests to rsync workers
+
+ By "staged queue" its meant that when a consumer comes to the
+ queue, it takes _all_ entries, leaving the queue empty.
+ (I don't know if there is an official term for this pattern.)
+
+ The queue uses a PostBox to accumulate incoming items.
+ When a consumer (rsync worker) comes, a new PostBox is
+ set up and the old one is passed on to the consumer.
+
+ Instead of the simplistic scheme of having one big lock
+ which synchronizes both the addition of new items and
+ PostBox exchanges, use a separate lock to arbitrate consumers,
+ and rely on PostBox's synchronization mechanisms take
+ care about additions.
+
+ There is a corner case racy situation, producers vs. consumers,
+ which is not handled by this scheme: namely, when the PostBox
+ exchange occurs in between being passed to the producer for posting
+ and the post placement. But that's what Postbox.close is for:
+ such a posting will find the PostBox closed, in which case
+ the producer can re-try posting against the actual PostBox of
+ the queue.
+
+ To aid accumlation of items in the PostBoxen before grabbed
+ by an rsync worker, the worker goes to sleep a bit after
+ each completed syncjob.
+ """
+
+ def __init__(self, slave, sync_engine, resilient_errnos=[]):
+ """spawn worker threads"""
+ self.log_err = False
+ self.slave = slave
+ self.lock = Lock()
+ self.pb = PostBox()
+ self.sync_engine = sync_engine
+ self.errnos_ok = resilient_errnos
+ for i in range(gconf.get("sync-jobs")):
+ t = Thread(target=self.syncjob, args=(i + 1, ))
+ t.start()
+
+ def syncjob(self, job_id):
+ """the life of a worker"""
+ while True:
+ pb = None
+ while True:
+ self.lock.acquire()
+ if self.pb:
+ pb, self.pb = self.pb, PostBox()
+ self.lock.release()
+ if pb:
+ break
+ time.sleep(0.5)
+ pb.close()
+ start = time.time()
+ po = self.sync_engine(pb, self.log_err)
+ logging.info(lf("Sync Time Taken",
+ job=job_id,
+ num_files=len(pb),
+ return_code=po.returncode,
+ duration="%.4f" % (time.time() - start)))
+
+ if po.returncode == 0:
+ ret = (True, 0)
+ elif po.returncode in self.errnos_ok:
+ ret = (False, po.returncode)
+ else:
+ po.errfail()
+ pb.wakeup(ret)
+
+ def add(self, e):
+ while True:
+ pb = self.pb
+ try:
+ pb.append(e)
+ return pb
+ except BoxClosedErr:
+ pass
+
+ def enable_errorlog(self):
+ self.log_err = True
+
+ def disable_errorlog(self):
+ self.log_err = False
diff --git a/geo-replication/syncdaemon/monitor.py b/geo-replication/syncdaemon/monitor.py
new file mode 100644
index 00000000000..6aa7b9dfc99
--- /dev/null
+++ b/geo-replication/syncdaemon/monitor.py
@@ -0,0 +1,395 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import os
+import sys
+import time
+import signal
+import logging
+import xml.etree.ElementTree as XET
+from threading import Lock
+from errno import ECHILD, ESRCH
+import random
+
+from resource import SSH
+import gsyncdconfig as gconf
+import libgfchangelog
+from rconf import rconf
+from syncdutils import (select, waitpid, errno_wrap, lf, grabpidfile,
+ set_term_handler, GsyncdError,
+ Thread, finalize, Volinfo, VolinfoFromGconf,
+ gf_event, EVENT_GEOREP_FAULTY, get_up_nodes,
+ unshare_propagation_supported)
+from gsyncdstatus import GeorepStatus, set_monitor_status
+import py2py3
+from py2py3 import pipe
+
+ParseError = XET.ParseError if hasattr(XET, 'ParseError') else SyntaxError
+
+
+def get_subvol_num(brick_idx, vol, hot):
+ tier = vol.is_tier()
+ disperse_count = vol.disperse_count(tier, hot)
+ replica_count = vol.replica_count(tier, hot)
+ distribute_count = vol.distribution_count(tier, hot)
+ gconf.setconfig("master-distribution-count", distribute_count)
+
+ if (tier and not hot):
+ brick_idx = brick_idx - vol.get_hot_bricks_count(tier)
+
+ subvol_size = disperse_count if disperse_count > 0 else replica_count
+ cnt = int((brick_idx + 1) / subvol_size)
+ rem = (brick_idx + 1) % subvol_size
+ if rem > 0:
+ cnt = cnt + 1
+
+ if (tier and hot):
+ return "hot_" + str(cnt)
+ elif (tier and not hot):
+ return "cold_" + str(cnt)
+ else:
+ return str(cnt)
+
+
+class Monitor(object):
+
+ """class which spawns and manages gsyncd workers"""
+
+ ST_INIT = 'Initializing...'
+ ST_STARTED = 'Started'
+ ST_STABLE = 'Active'
+ ST_FAULTY = 'Faulty'
+ ST_INCON = 'inconsistent'
+ _ST_ORD = [ST_STABLE, ST_INIT, ST_FAULTY, ST_INCON]
+
+ def __init__(self):
+ self.lock = Lock()
+ self.state = {}
+ self.status = {}
+
+ @staticmethod
+ def terminate():
+ # relax one SIGTERM by setting a handler that sets back
+ # standard handler
+ set_term_handler(lambda *a: set_term_handler())
+ # give a chance to graceful exit
+ errno_wrap(os.kill, [-os.getpid(), signal.SIGTERM], [ESRCH])
+
+ def monitor(self, w, argv, cpids, slave_vol, slave_host, master,
+ suuid, slavenodes):
+ """the monitor loop
+
+ Basic logic is a blantantly simple blunt heuristics:
+ if spawned client survives 60 secs, it's considered OK.
+ This servers us pretty well as it's not vulneralbe to
+ any kind of irregular behavior of the child...
+
+ ... well, except for one: if children is hung up on
+ waiting for some event, it can survive aeons, still
+ will be defunct. So we tweak the above logic to
+ expect the worker to send us a signal within 60 secs
+ (in the form of closing its end of a pipe). The worker
+ does this when it's done with the setup stage
+ ready to enter the service loop (note it's the setup
+ stage which is vulnerable to hangs -- the full
+ blown worker blows up on EPIPE if the net goes down,
+ due to the keep-alive thread)
+ """
+ if not self.status.get(w[0]['dir'], None):
+ self.status[w[0]['dir']] = GeorepStatus(gconf.get("state-file"),
+ w[0]['host'],
+ w[0]['dir'],
+ w[0]['uuid'],
+ master,
+ "%s::%s" % (slave_host,
+ slave_vol))
+ ret = 0
+
+ def nwait(p, o=0):
+ try:
+ p2, r = waitpid(p, o)
+ if not p2:
+ return
+ return r
+ except OSError as e:
+ # no child process, this happens if the child process
+ # already died and has been cleaned up
+ if e.errno == ECHILD:
+ return -1
+ else:
+ raise
+
+ def exit_signalled(s):
+ """ child terminated due to receipt of SIGUSR1 """
+ return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1))
+
+ def exit_status(s):
+ if os.WIFEXITED(s):
+ return os.WEXITSTATUS(s)
+ return 1
+
+ conn_timeout = gconf.get("connection-timeout")
+ while ret in (0, 1):
+ remote_user, remote_host = w[1][0].split("@")
+ remote_id = w[1][1]
+ # Check the status of the connected slave node
+ # If the connected slave node is down then try to connect to
+ # different up node.
+ current_slave_host = remote_host
+ slave_up_hosts = get_up_nodes(slavenodes, gconf.get("ssh-port"))
+
+ if (current_slave_host, remote_id) not in slave_up_hosts:
+ if len(slave_up_hosts) > 0:
+ remote_new = random.choice(slave_up_hosts)
+ remote_host = "%s@%s" % (remote_user, remote_new[0])
+ remote_id = remote_new[1]
+
+ # Spawn the worker in lock to avoid fd leak
+ self.lock.acquire()
+
+ self.status[w[0]['dir']].set_worker_status(self.ST_INIT)
+ logging.info(lf('starting gsyncd worker',
+ brick=w[0]['dir'],
+ slave_node=remote_host))
+
+ pr, pw = pipe()
+ cpid = os.fork()
+ if cpid == 0:
+ os.close(pr)
+
+ args_to_worker = argv + [
+ 'worker',
+ rconf.args.master,
+ rconf.args.slave,
+ '--feedback-fd', str(pw),
+ '--local-path', w[0]['dir'],
+ '--local-node', w[0]['host'],
+ '--local-node-id', w[0]['uuid'],
+ '--slave-id', suuid,
+ '--subvol-num', str(w[2]),
+ '--resource-remote', remote_host,
+ '--resource-remote-id', remote_id
+ ]
+
+ if rconf.args.config_file is not None:
+ args_to_worker += ['-c', rconf.args.config_file]
+
+ if w[3]:
+ args_to_worker.append("--is-hottier")
+
+ if rconf.args.debug:
+ args_to_worker.append("--debug")
+
+ access_mount = gconf.get("access-mount")
+ if access_mount:
+ os.execv(sys.executable, args_to_worker)
+ else:
+ if unshare_propagation_supported():
+ logging.debug("Worker would mount volume privately")
+ unshare_cmd = ['unshare', '-m', '--propagation',
+ 'private']
+ cmd = unshare_cmd + args_to_worker
+ os.execvp("unshare", cmd)
+ else:
+ logging.debug("Mount is not private. It would be lazy"
+ " umounted")
+ os.execv(sys.executable, args_to_worker)
+
+ cpids.add(cpid)
+ os.close(pw)
+
+ self.lock.release()
+
+ t0 = time.time()
+ so = select((pr,), (), (), conn_timeout)[0]
+ os.close(pr)
+
+ if so:
+ ret = nwait(cpid, os.WNOHANG)
+
+ if ret is not None:
+ logging.info(lf("worker died before establishing "
+ "connection",
+ brick=w[0]['dir']))
+ else:
+ logging.debug("worker(%s) connected" % w[0]['dir'])
+ while time.time() < t0 + conn_timeout:
+ ret = nwait(cpid, os.WNOHANG)
+
+ if ret is not None:
+ logging.info(lf("worker died in startup phase",
+ brick=w[0]['dir']))
+ break
+
+ time.sleep(1)
+ else:
+ logging.info(
+ lf("Worker not confirmed after wait, aborting it. "
+ "Gsyncd invocation on remote slave via SSH or "
+ "gluster master mount might have hung. Please "
+ "check the above logs for exact issue and check "
+ "master or slave volume for errors. Restarting "
+ "master/slave volume accordingly might help.",
+ brick=w[0]['dir'],
+ timeout=conn_timeout))
+ errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
+ ret = nwait(cpid)
+ if ret is None:
+ ret = nwait(cpid)
+ if exit_signalled(ret):
+ ret = 0
+ else:
+ ret = exit_status(ret)
+ if ret in (0, 1):
+ self.status[w[0]['dir']].set_worker_status(self.ST_FAULTY)
+ gf_event(EVENT_GEOREP_FAULTY,
+ master_volume=master.volume,
+ master_node=w[0]['host'],
+ master_node_id=w[0]['uuid'],
+ slave_host=slave_host,
+ slave_volume=slave_vol,
+ current_slave_host=current_slave_host,
+ brick_path=w[0]['dir'])
+ time.sleep(10)
+ self.status[w[0]['dir']].set_worker_status(self.ST_INCON)
+ return ret
+
+ def multiplex(self, wspx, suuid, slave_vol, slave_host, master, slavenodes):
+ argv = [os.path.basename(sys.executable), sys.argv[0]]
+
+ cpids = set()
+ ta = []
+ for wx in wspx:
+ def wmon(w):
+ cpid, _ = self.monitor(w, argv, cpids, slave_vol,
+ slave_host, master, suuid, slavenodes)
+ time.sleep(1)
+ self.lock.acquire()
+ for cpid in cpids:
+ errno_wrap(os.kill, [cpid, signal.SIGKILL], [ESRCH])
+ self.lock.release()
+ finalize(exval=1)
+ t = Thread(target=wmon, args=[wx])
+ t.start()
+ ta.append(t)
+
+ # monitor status was being updated in each monitor thread. It
+ # should not be done as it can cause deadlock for a worker start.
+ # set_monitor_status uses flock to synchronize multple instances
+ # updating the file. Since each monitor thread forks worker,
+ # these processes can hold the reference to fd of status
+ # file causing deadlock to workers which starts later as flock
+ # will not be release until all references to same fd is closed.
+ # It will also cause fd leaks.
+
+ self.lock.acquire()
+ set_monitor_status(gconf.get("state-file"), self.ST_STARTED)
+ self.lock.release()
+ for t in ta:
+ t.join()
+
+
+def distribute(master, slave):
+ if rconf.args.use_gconf_volinfo:
+ mvol = VolinfoFromGconf(master.volume, master=True)
+ else:
+ mvol = Volinfo(master.volume, master.host, master=True)
+ logging.debug('master bricks: ' + repr(mvol.bricks))
+ prelude = []
+ slave_host = None
+ slave_vol = None
+
+ prelude = [gconf.get("ssh-command")] + \
+ gconf.get("ssh-options").split() + \
+ ["-p", str(gconf.get("ssh-port"))] + \
+ [slave.remote_addr]
+
+ logging.debug('slave SSH gateway: ' + slave.remote_addr)
+
+ if rconf.args.use_gconf_volinfo:
+ svol = VolinfoFromGconf(slave.volume, master=False)
+ else:
+ svol = Volinfo(slave.volume, "localhost", prelude, master=False)
+
+ sbricks = svol.bricks
+ suuid = svol.uuid
+ slave_host = slave.remote_addr.split('@')[-1]
+ slave_vol = slave.volume
+
+ # save this xattr for the session delete command
+ old_stime_xattr_prefix = gconf.get("stime-xattr-prefix", None)
+ new_stime_xattr_prefix = "trusted.glusterfs." + mvol.uuid + "." + \
+ svol.uuid
+ if not old_stime_xattr_prefix or \
+ old_stime_xattr_prefix != new_stime_xattr_prefix:
+ gconf.setconfig("stime-xattr-prefix", new_stime_xattr_prefix)
+
+ logging.debug('slave bricks: ' + repr(sbricks))
+
+ slavenodes = set((b['host'], b["uuid"]) for b in sbricks)
+ rap = SSH.parse_ssh_address(slave)
+ slaves = [(rap['user'] + '@' + h[0], h[1]) for h in slavenodes]
+
+ workerspex = []
+ for idx, brick in enumerate(mvol.bricks):
+ if rconf.args.local_node_id == brick['uuid']:
+ is_hot = mvol.is_hot(":".join([brick['host'], brick['dir']]))
+ workerspex.append((brick,
+ slaves[idx % len(slaves)],
+ get_subvol_num(idx, mvol, is_hot),
+ is_hot))
+ logging.debug('worker specs: ' + repr(workerspex))
+ return workerspex, suuid, slave_vol, slave_host, master, slavenodes
+
+
+def monitor(local, remote):
+ # Check if gsyncd restarted in pause state. If
+ # yes, send SIGSTOP to negative of monitor pid
+ # to go back to pause state.
+ if rconf.args.pause_on_start:
+ errno_wrap(os.kill, [-os.getpid(), signal.SIGSTOP], [ESRCH])
+
+ """oh yeah, actually Monitor is used as singleton, too"""
+ return Monitor().multiplex(*distribute(local, remote))
+
+
+def startup(go_daemon=True):
+ """set up logging, pidfile grabbing, daemonization"""
+ pid_file = gconf.get("pid-file")
+ if not grabpidfile():
+ sys.stderr.write("pidfile is taken, exiting.\n")
+ sys.exit(2)
+ rconf.pid_file_owned = True
+
+ if not go_daemon:
+ return
+
+ x, y = pipe()
+ cpid = os.fork()
+ if cpid:
+ os.close(x)
+ sys.exit()
+ os.close(y)
+ os.setsid()
+ dn = os.open(os.devnull, os.O_RDWR)
+ for f in (sys.stdin, sys.stdout, sys.stderr):
+ os.dup2(dn, f.fileno())
+
+ if not grabpidfile(pid_file + '.tmp'):
+ raise GsyncdError("cannot grab temporary pidfile")
+
+ os.rename(pid_file + '.tmp', pid_file)
+
+ # wait for parent to terminate
+ # so we can start up with
+ # no messing from the dirty
+ # ol' bustard
+ select((x,), (), ())
+ os.close(x)
diff --git a/geo-replication/syncdaemon/py2py3.py b/geo-replication/syncdaemon/py2py3.py
new file mode 100644
index 00000000000..f9c76e1b50a
--- /dev/null
+++ b/geo-replication/syncdaemon/py2py3.py
@@ -0,0 +1,184 @@
+#
+# Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+# All python2/python3 compatibility routines
+
+import sys
+import os
+import stat
+import struct
+from syncdutils import umask
+from ctypes import create_string_buffer
+
+if sys.version_info >= (3,):
+ def pipe():
+ (r, w) = os.pipe()
+ os.set_inheritable(r, True)
+ os.set_inheritable(w, True)
+ return (r, w)
+
+ # Raw conversion of bytearray to string. Used in the cases where
+ # buffer is created by create_string_buffer which is a 8-bit char
+ # array and passed to syscalls to fetch results. Using encode/decode
+ # doesn't work as it converts to string altering the size.
+ def bytearray_to_str(byte_arr):
+ return ''.join([chr(b) for b in byte_arr])
+
+ # Raw conversion of string to bytes. This is required to convert
+ # back the string into bytearray(c char array) to use in struc
+ # pack/unpacking. Again encode/decode can't be used as it
+ # converts it alters size.
+ def str_to_bytearray(string):
+ return bytes([ord(c) for c in string])
+
+ def gr_create_string_buffer(size):
+ return create_string_buffer(b'\0', size)
+
+ def gr_query_xattr(cls, path, size, syscall, attr=None):
+ if attr:
+ return cls._query_xattr(path.encode(), size, syscall,
+ attr.encode())
+ else:
+ return cls._query_xattr(path.encode(), size, syscall)
+
+ def gr_lsetxattr(cls, path, attr, val):
+ return cls.libc.lsetxattr(path.encode(), attr.encode(), val,
+ len(val), 0)
+
+ def gr_lremovexattr(cls, path, attr):
+ return cls.libc.lremovexattr(path.encode(), attr.encode())
+
+ def gr_cl_register(libgfapi, brick, path, log_file, log_level, retries):
+ return libgfapi.gf_changelog_register(brick.encode(),
+ path.encode(),
+ log_file.encode(),
+ log_level, retries)
+
+ def gr_cl_done(libgfapi, clfile):
+ return libgfapi.gf_changelog_done(clfile.encode())
+
+ def gr_cl_history_changelog(libgfapi, changelog_path, start, end, num_parallel,
+ actual_end):
+ return libgfapi.gf_history_changelog(changelog_path.encode(),
+ start, end, num_parallel,
+ actual_end)
+
+ def gr_cl_history_done(libgfapi, clfile):
+ return libgfapi.gf_history_changelog_done(clfile.encode())
+
+ # regular file
+
+ def entry_pack_reg(cls, gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(cls._fmt_mknod(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), 0, umask())
+
+ def entry_pack_reg_stat(cls, gf, bn, st):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mknod(blen),
+ st['uid'], st['gid'],
+ gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), 0, umask())
+ # mkdir
+
+ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(cls._fmt_mkdir(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), umask())
+ # symlink
+
+ def entry_pack_symlink(cls, gf, bn, lnk, st):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ lnk_encoded = lnk.encode()
+ llen = len(lnk_encoded)
+ return struct.pack(cls._fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf.encode(), st['mode'], bn_encoded,
+ lnk_encoded)
+else:
+ def pipe():
+ (r, w) = os.pipe()
+ return (r, w)
+
+ # Raw conversion of bytearray to string
+ def bytearray_to_str(byte_arr):
+ return byte_arr
+
+ # Raw conversion of string to bytearray
+ def str_to_bytearray(string):
+ return string
+
+ def gr_create_string_buffer(size):
+ return create_string_buffer('\0', size)
+
+ def gr_query_xattr(cls, path, size, syscall, attr=None):
+ if attr:
+ return cls._query_xattr(path, size, syscall, attr)
+ else:
+ return cls._query_xattr(path, size, syscall)
+
+ def gr_lsetxattr(cls, path, attr, val):
+ return cls.libc.lsetxattr(path, attr, val, len(val), 0)
+
+ def gr_lremovexattr(cls, path, attr):
+ return cls.libc.lremovexattr(path, attr)
+
+ def gr_cl_register(libgfapi, brick, path, log_file, log_level, retries):
+ return libgfapi.gf_changelog_register(brick, path, log_file,
+ log_level, retries)
+
+ def gr_cl_done(libgfapi, clfile):
+ return libgfapi.gf_changelog_done(clfile)
+
+ def gr_cl_history_changelog(libgfapi, changelog_path, start, end, num_parallel,
+ actual_end):
+ return libgfapi.gf_history_changelog(changelog_path, start, end,
+ num_parallel, actual_end)
+
+ def gr_cl_history_done(libgfapi, clfile):
+ return libgfapi.gf_history_changelog_done(clfile)
+
+ # regular file
+
+ def entry_pack_reg(cls, gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(cls._fmt_mknod(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+
+ def entry_pack_reg_stat(cls, gf, bn, st):
+ blen = len(bn)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mknod(blen),
+ st['uid'], st['gid'],
+ gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+ # mkdir
+
+ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(cls._fmt_mkdir(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), umask())
+ # symlink
+
+ def entry_pack_symlink(cls, gf, bn, lnk, st):
+ blen = len(bn)
+ llen = len(lnk)
+ return struct.pack(cls._fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf, st['mode'], bn, lnk)
diff --git a/geo-replication/syncdaemon/rconf.py b/geo-replication/syncdaemon/rconf.py
new file mode 100644
index 00000000000..ff716ee4d6d
--- /dev/null
+++ b/geo-replication/syncdaemon/rconf.py
@@ -0,0 +1,31 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+
+class RConf(object):
+
+ """singleton class to store runtime globals
+ shared between gsyncd modules"""
+
+ ssh_ctl_dir = None
+ ssh_ctl_args = None
+ cpid = None
+ pid_file_owned = False
+ log_exit = False
+ permanent_handles = []
+ log_metadata = {}
+ mgmt_lock_fd = None
+ args = None
+ turns = 0
+ mountbroker = False
+ mount_point = None
+ mbr_umount_cmd = []
+
+rconf = RConf()
diff --git a/xlators/features/marker/utils/syncdaemon/repce.py b/geo-replication/syncdaemon/repce.py
index 755fb61df48..c622afa6373 100644
--- a/xlators/features/marker/utils/syncdaemon/repce.py
+++ b/geo-replication/syncdaemon/repce.py
@@ -1,32 +1,40 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
import os
import sys
import time
import logging
from threading import Condition
try:
- import thread
-except ImportError:
- # py 3
import _thread as thread
-try:
- from Queue import Queue
except ImportError:
- # py 3
+ import thread
+try:
from queue import Queue
+except ImportError:
+ from Queue import Queue
try:
import cPickle as pickle
except ImportError:
- # py 3
import pickle
-from syncdutils import Thread, select
+from syncdutils import Thread, select, lf
-pickle_proto = -1
+pickle_proto = 2
repce_version = 1.0
+
def ioparse(i, o):
if isinstance(i, int):
- i = os.fdopen(i)
+ i = os.fdopen(i, 'rb')
# rely on duck typing for recognizing
# streams as that works uniformly
# in py2 and py3
@@ -34,6 +42,7 @@ def ioparse(i, o):
o = o.fileno()
return (i, o)
+
def send(out, *args):
"""pickle args and write out wholly in one syscall
@@ -43,12 +52,21 @@ def send(out, *args):
"""
os.write(out, pickle.dumps(args, pickle_proto))
+
def recv(inf):
- """load an object from input stream"""
- return pickle.load(inf)
+ """load an object from input stream
+ python2 and python3 compatibility, inf is sys.stdin
+ and is opened as text stream by default. Hence using the
+ buffer attribute in python3
+ """
+ if hasattr(inf, "buffer"):
+ return pickle.load(inf.buffer)
+ else:
+ return pickle.load(inf)
class RepceServer(object):
+
"""RePCe is Hungarian for canola, http://hu.wikipedia.org/wiki/Repce
... also our homebrewed RPC backend where the transport layer is
@@ -95,16 +113,17 @@ class RepceServer(object):
if rmeth == '__repce_version__':
res = repce_version
else:
- try:
- res = getattr(self.obj, rmeth)(*in_data[2:])
- except:
- res = sys.exc_info()[1]
- exc = True
- logging.exception("call failed: ")
+ try:
+ res = getattr(self.obj, rmeth)(*in_data[2:])
+ except:
+ res = sys.exc_info()[1]
+ exc = True
+ logging.exception("call failed: ")
send(self.out, rid, exc, res)
class RepceJob(object):
+
"""class representing message status we can use
for waiting on reply"""
@@ -137,6 +156,7 @@ class RepceJob(object):
class RepceClient(object):
+
"""RePCe is Hungarian for canola, http://hu.wikipedia.org/wiki/Repce
... also our homebrewed RPC backend where the transport layer is
@@ -148,7 +168,7 @@ class RepceClient(object):
def __init__(self, i, o):
self.inf, self.out = ioparse(i, o)
self.jtab = {}
- t = Thread(target = self.listen)
+ t = Thread(target=self.listen)
t.start()
def listen(self):
@@ -177,25 +197,33 @@ class RepceClient(object):
return rjob
def __call__(self, meth, *args):
- """RePCe client is callabe, calling it implements a synchronous remote call
+ """RePCe client is callabe, calling it implements a synchronous
+ remote call.
- We do a .push with a cbk which does a wakeup upon receiving anwser, then wait
- on the RepceJob.
+ We do a .push with a cbk which does a wakeup upon receiving answer,
+ then wait on the RepceJob.
"""
- rjob = self.push(meth, *args, **{'cbk': lambda rj, res: rj.wakeup(res)})
+ rjob = self.push(
+ meth, *args, **{'cbk': lambda rj, res: rj.wakeup(res)})
exc, res = rjob.wait()
if exc:
- logging.error('call %s (%s) failed on peer with %s' % (repr(rjob), meth, str(type(res).__name__)))
+ logging.error(lf('call failed',
+ call=repr(rjob),
+ method=meth,
+ error=str(type(res).__name__)))
raise res
logging.debug("call %s %s -> %s" % (repr(rjob), meth, repr(res)))
return res
class mprx(object):
- """method proxy, standard trick to implement rubyesque method_missing
- in Python
- A class is a closure factory, you know what I mean, or go read some SICP.
+ """method proxy, standard trick to implement rubyesque
+ method_missing in Python
+
+ A class is a closure factory, you know what I mean, or go read
+ some SICP.
"""
+
def __init__(self, ins, meth):
self.ins = ins
self.meth = meth
diff --git a/geo-replication/syncdaemon/resource.py b/geo-replication/syncdaemon/resource.py
new file mode 100644
index 00000000000..f12c7ceaa36
--- /dev/null
+++ b/geo-replication/syncdaemon/resource.py
@@ -0,0 +1,1583 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import re
+import os
+import sys
+import stat
+import time
+import fcntl
+import types
+import struct
+import logging
+import tempfile
+import subprocess
+from errno import (EEXIST, ENOENT, ENODATA, ENOTDIR, ELOOP, EACCES,
+ EISDIR, ENOTEMPTY, ESTALE, EINVAL, EBUSY, EPERM)
+import errno
+
+from rconf import rconf
+import gsyncdconfig as gconf
+import libgfchangelog
+
+import repce
+from repce import RepceServer, RepceClient
+from master import gmaster_builder
+import syncdutils
+from syncdutils import (GsyncdError, select, privileged, funcode,
+ entry2pb, gauxpfx, errno_wrap, lstat,
+ NoStimeAvailable, PartialHistoryAvailable,
+ ChangelogException, ChangelogHistoryNotAvailable,
+ get_changelog_log_level, get_rsync_version,
+ GX_GFID_CANONICAL_LEN,
+ gf_mount_ready, lf, Popen, sup,
+ Xattr, matching_disk_gfid, get_gfid_from_mnt,
+ unshare_propagation_supported, get_slv_dir_path)
+from gsyncdstatus import GeorepStatus
+from py2py3 import (pipe, str_to_bytearray, entry_pack_reg,
+ entry_pack_reg_stat, entry_pack_mkdir,
+ entry_pack_symlink)
+
+
+ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP')
+
+slv_volume = None
+slv_host = None
+
+
+class Server(object):
+
+ """singleton implemening those filesystem access primitives
+ which are needed for geo-replication functionality
+
+ (Singleton in the sense it's a class which has only static
+ and classmethods and is used directly, without instantiation.)
+ """
+
+ GX_NSPACE_PFX = (privileged() and "trusted" or "system")
+ GX_NSPACE = GX_NSPACE_PFX + ".glusterfs"
+ NTV_FMTSTR = "!" + "B" * 19 + "II"
+ FRGN_XTRA_FMT = "I"
+ FRGN_FMTSTR = NTV_FMTSTR + FRGN_XTRA_FMT
+
+ # for backend gfid fetch, do not use GX_NSPACE_PFX
+ GFID_XATTR = 'trusted.gfid'
+ GFID_FMTSTR = "!" + "B" * 16
+
+ local_path = ''
+
+ @classmethod
+ def _fmt_mknod(cls, l):
+ return "!II%dsI%dsIII" % (GX_GFID_CANONICAL_LEN, l + 1)
+
+ @classmethod
+ def _fmt_mkdir(cls, l):
+ return "!II%dsI%dsII" % (GX_GFID_CANONICAL_LEN, l + 1)
+
+ @classmethod
+ def _fmt_symlink(cls, l1, l2):
+ return "!II%dsI%ds%ds" % (GX_GFID_CANONICAL_LEN, l1 + 1, l2 + 1)
+
+ def _pathguard(f):
+ """decorator method that checks
+ the path argument of the decorated
+ functions to make sure it does not
+ point out of the managed tree
+ """
+
+ fc = funcode(f)
+ pi = list(fc.co_varnames).index('path')
+
+ def ff(*args):
+ path = args[pi]
+ ps = path.split('/')
+ if path[0] == '/' or '..' in ps:
+ raise ValueError('unsafe path')
+ args = list(args)
+ args[pi] = os.path.join(args[0].local_path, path)
+ return f(*args)
+ return ff
+
+ @classmethod
+ @_pathguard
+ def entries(cls, path):
+ """directory entries in an array"""
+ # prevent symlinks being followed
+ if not stat.S_ISDIR(os.lstat(path).st_mode):
+ raise OSError(ENOTDIR, os.strerror(ENOTDIR))
+ return os.listdir(path)
+
+ @classmethod
+ @_pathguard
+ def lstat(cls, path):
+ try:
+ return os.lstat(path)
+ except (IOError, OSError):
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ return ex.errno
+ else:
+ raise
+
+ @classmethod
+ @_pathguard
+ def linkto_check(cls, path):
+ try:
+ return not (
+ Xattr.lgetxattr_buf(path,
+ 'trusted.glusterfs.dht.linkto') == '')
+ except (IOError, OSError):
+ ex = sys.exc_info()[1]
+ if ex.errno in (ENOENT, ENODATA):
+ return False
+ else:
+ raise
+
+ @classmethod
+ @_pathguard
+ def gfid(cls, path):
+ buf = errno_wrap(Xattr.lgetxattr, [path, cls.GFID_XATTR, 16],
+ [ENOENT], [ESTALE, ENODATA])
+ if buf == ENOENT:
+ return buf
+ else:
+ buf = str_to_bytearray(buf)
+ m = re.match('(.{8})(.{4})(.{4})(.{4})(.{12})', "".join(
+ ['%02x' % x for x in struct.unpack(cls.GFID_FMTSTR, buf)]))
+ return '-'.join(m.groups())
+
+ @classmethod
+ @_pathguard
+ def purge(cls, path, entries=None):
+ """force-delete subtrees
+
+ If @entries is not specified, delete
+ the whole subtree under @path (including
+ @path).
+
+ Otherwise, @entries should be a
+ a sequence of children of @path, and
+ the effect is identical with a joint
+ @entries-less purge on them, ie.
+
+ for e in entries:
+ cls.purge(os.path.join(path, e))
+ """
+ me_also = entries is None
+ if not entries:
+ try:
+ # if it's a symlink, prevent
+ # following it
+ try:
+ os.unlink(path)
+ return
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == EISDIR:
+ entries = os.listdir(path)
+ else:
+ raise
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in (ENOTDIR, ENOENT, ELOOP):
+ try:
+ os.unlink(path)
+ return
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ return
+ raise
+ else:
+ raise
+ for e in entries:
+ cls.purge(os.path.join(path, e))
+ if me_also:
+ os.rmdir(path)
+
+ @classmethod
+ @_pathguard
+ def _create(cls, path, ctor):
+ """path creation backend routine"""
+ try:
+ ctor(path)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno == EEXIST:
+ cls.purge(path)
+ return ctor(path)
+ raise
+
+ @classmethod
+ @_pathguard
+ def mkdir(cls, path):
+ cls._create(path, os.mkdir)
+
+ @classmethod
+ @_pathguard
+ def symlink(cls, lnk, path):
+ cls._create(path, lambda p: os.symlink(lnk, p))
+
+ @classmethod
+ @_pathguard
+ def xtime(cls, path, uuid):
+ """query xtime extended attribute
+
+ Return xtime of @path for @uuid as a pair of integers.
+ "Normal" errors due to non-existent @path or extended attribute
+ are tolerated and errno is returned in such a case.
+ """
+
+ try:
+ val = Xattr.lgetxattr(path,
+ '.'.join([cls.GX_NSPACE, uuid, 'xtime']),
+ 8)
+ val = str_to_bytearray(val)
+ return struct.unpack('!II', val)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in (ENOENT, ENODATA, ENOTDIR):
+ return ex.errno
+ else:
+ raise
+
+ @classmethod
+ @_pathguard
+ def stime_mnt(cls, path, uuid):
+ """query xtime extended attribute
+
+ Return xtime of @path for @uuid as a pair of integers.
+ "Normal" errors due to non-existent @path or extended attribute
+ are tolerated and errno is returned in such a case.
+ """
+
+ try:
+ val = Xattr.lgetxattr(path,
+ '.'.join([cls.GX_NSPACE, uuid, 'stime']),
+ 8)
+ val = str_to_bytearray(val)
+ return struct.unpack('!II', val)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in (ENOENT, ENODATA, ENOTDIR):
+ return ex.errno
+ else:
+ raise
+
+ @classmethod
+ @_pathguard
+ def stime(cls, path, uuid):
+ """query xtime extended attribute
+
+ Return xtime of @path for @uuid as a pair of integers.
+ "Normal" errors due to non-existent @path or extended attribute
+ are tolerated and errno is returned in such a case.
+ """
+
+ try:
+ val = Xattr.lgetxattr(path,
+ '.'.join([cls.GX_NSPACE, uuid, 'stime']),
+ 8)
+ val = str_to_bytearray(val)
+ return struct.unpack('!II', val)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in (ENOENT, ENODATA, ENOTDIR):
+ return ex.errno
+ else:
+ raise
+
+ @classmethod
+ @_pathguard
+ def entry_stime(cls, path, uuid):
+ """
+ entry_stime xattr to reduce the number of retry of Entry changes when
+ Geo-rep worker crashes and restarts. entry_stime is updated after
+ processing every changelog file. On failure and restart, worker only
+ have to reprocess the last changelog for Entry ops.
+ Xattr Key: <PFX>.<MASTERVOL_UUID>.<SLAVEVOL_UUID>.entry_stime
+ """
+ try:
+ val = Xattr.lgetxattr(path,
+ '.'.join([cls.GX_NSPACE, uuid,
+ 'entry_stime']),
+ 8)
+ val = str_to_bytearray(val)
+ return struct.unpack('!II', val)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in (ENOENT, ENODATA, ENOTDIR):
+ return ex.errno
+ else:
+ raise
+
+ @classmethod
+ def node_uuid(cls, path='.'):
+ try:
+ uuid_l = Xattr.lgetxattr_buf(
+ path, '.'.join([cls.GX_NSPACE, 'node-uuid']))
+ return uuid_l[:-1].split(' ')
+ except OSError:
+ raise
+
+ @classmethod
+ @_pathguard
+ def set_stime(cls, path, uuid, mark):
+ """set @mark as stime for @uuid on @path"""
+ errno_wrap(Xattr.lsetxattr,
+ [path,
+ '.'.join([cls.GX_NSPACE, uuid, 'stime']),
+ struct.pack('!II', *mark)],
+ [ENOENT],
+ [ESTALE, EINVAL])
+
+ @classmethod
+ @_pathguard
+ def set_entry_stime(cls, path, uuid, mark):
+ """set @mark as stime for @uuid on @path"""
+ errno_wrap(Xattr.lsetxattr,
+ [path,
+ '.'.join([cls.GX_NSPACE, uuid, 'entry_stime']),
+ struct.pack('!II', *mark)],
+ [ENOENT],
+ [ESTALE, EINVAL])
+
+ @classmethod
+ @_pathguard
+ def set_xtime(cls, path, uuid, mark):
+ """set @mark as xtime for @uuid on @path"""
+ errno_wrap(Xattr.lsetxattr,
+ [path,
+ '.'.join([cls.GX_NSPACE, uuid, 'xtime']),
+ struct.pack('!II', *mark)],
+ [ENOENT],
+ [ESTALE, EINVAL])
+
+ @classmethod
+ @_pathguard
+ def set_xtime_remote(cls, path, uuid, mark):
+ """
+ set @mark as xtime for @uuid on @path
+ the difference b/w this and set_xtime() being
+ set_xtime() being overloaded to set the xtime
+ on the brick (this method sets xtime on the
+ remote slave)
+ """
+ Xattr.lsetxattr(
+ path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']),
+ struct.pack('!II', *mark))
+
+ @classmethod
+ def entry_ops(cls, entries):
+ pfx = gauxpfx()
+ logging.debug('entries: %s' % repr(entries))
+ dist_count = rconf.args.master_dist_count
+
+ def entry_purge(op, entry, gfid, e, uid, gid):
+ # This is an extremely racy code and needs to be fixed ASAP.
+ # The GFID check here is to be sure that the pargfid/bname
+ # to be purged is the GFID gotten from the changelog.
+ # (a stat(changelog_gfid) would also be valid here)
+ # The race here is between the GFID check and the purge.
+
+ # If the entry or the gfid of the file to be deleted is not present
+ # on slave, we can ignore the unlink/rmdir
+ if isinstance(lstat(entry), int) or \
+ isinstance(lstat(os.path.join(pfx, gfid)), int):
+ return
+
+ if not matching_disk_gfid(gfid, entry):
+ collect_failure(e, EEXIST, uid, gid)
+ return
+
+ if op == 'UNLINK':
+ er = errno_wrap(os.unlink, [entry], [ENOENT, ESTALE], [EBUSY])
+ # EISDIR is safe error, ignore. This can only happen when
+ # unlink is sent from master while fixing gfid conflicts.
+ if er != EISDIR:
+ return er
+
+ elif op == 'RMDIR':
+ er = errno_wrap(os.rmdir, [entry], [ENOENT, ESTALE,
+ ENOTEMPTY], [EBUSY])
+ if er == ENOTEMPTY:
+ return er
+
+ def collect_failure(e, cmd_ret, uid, gid, dst=False):
+ slv_entry_info = {}
+ slv_entry_info['gfid_mismatch'] = False
+ slv_entry_info['name_mismatch'] = False
+ slv_entry_info['dst'] = dst
+ slv_entry_info['slave_isdir'] = False
+ slv_entry_info['slave_name'] = None
+ slv_entry_info['slave_gfid'] = None
+ # We do this for failing fops on Slave
+ # Master should be logging this
+ if cmd_ret is None:
+ return False
+
+ if e.get("stat", {}):
+ # Copy actual UID/GID value back to entry stat
+ e['stat']['uid'] = uid
+ e['stat']['gid'] = gid
+
+ if cmd_ret in [EEXIST, ESTALE]:
+ if dst:
+ en = e['entry1']
+ else:
+ en = e['entry']
+ disk_gfid = get_gfid_from_mnt(en)
+ if isinstance(disk_gfid, str) and \
+ e['gfid'] != disk_gfid:
+ slv_entry_info['gfid_mismatch'] = True
+ st = lstat(en)
+ if not isinstance(st, int):
+ if st and stat.S_ISDIR(st.st_mode):
+ slv_entry_info['slave_isdir'] = True
+ dir_name = get_slv_dir_path(slv_host, slv_volume,
+ disk_gfid)
+ slv_entry_info['slave_name'] = dir_name
+ else:
+ slv_entry_info['slave_isdir'] = False
+ slv_entry_info['slave_gfid'] = disk_gfid
+ failures.append((e, cmd_ret, slv_entry_info))
+ else:
+ return False
+ else:
+ failures.append((e, cmd_ret, slv_entry_info))
+
+ return True
+
+ failures = []
+
+ def recursive_rmdir(gfid, entry, path):
+ """disk_gfid check added for original path for which
+ recursive_delete is called. This disk gfid check executed
+ before every Unlink/Rmdir. If disk gfid is not matching
+ with GFID from Changelog, that means other worker
+ deleted the directory. Even if the subdir/file present,
+ it belongs to different parent. Exit without performing
+ further deletes.
+ """
+ if not matching_disk_gfid(gfid, entry):
+ return
+
+ names = []
+ names = errno_wrap(os.listdir, [path], [ENOENT], [ESTALE, ENOTSUP])
+ if isinstance(names, int):
+ return
+
+ for name in names:
+ fullname = os.path.join(path, name)
+ if not matching_disk_gfid(gfid, entry):
+ return
+ er = errno_wrap(os.remove, [fullname], [ENOENT, ESTALE,
+ EISDIR], [EBUSY])
+
+ if er == EISDIR:
+ recursive_rmdir(gfid, entry, fullname)
+
+ if not matching_disk_gfid(gfid, entry):
+ return
+
+ errno_wrap(os.rmdir, [path], [ENOENT, ESTALE], [EBUSY])
+
+ def rename_with_disk_gfid_confirmation(gfid, entry, en, uid, gid):
+ if not matching_disk_gfid(gfid, entry):
+ logging.error(lf("RENAME ignored: source entry does not match "
+ "with on-disk gfid",
+ source=entry,
+ gfid=gfid,
+ disk_gfid=get_gfid_from_mnt(entry),
+ target=en))
+ collect_failure(e, EEXIST, uid, gid)
+ return
+
+ cmd_ret = errno_wrap(os.rename,
+ [entry, en],
+ [ENOENT, EEXIST], [ESTALE, EBUSY])
+ collect_failure(e, cmd_ret, uid, gid)
+
+ for e in entries:
+ blob = None
+ op = e['op']
+ gfid = e['gfid']
+ entry = e['entry']
+ uid = 0
+ gid = 0
+
+ # Skip entry processing if it's marked true during gfid
+ # conflict resolution
+ if e['skip_entry']:
+ continue
+
+ if e.get("stat", {}):
+ # Copy UID/GID value and then reset to zero. Copied UID/GID
+ # will be used to run chown once entry is created.
+ uid = e['stat']['uid']
+ gid = e['stat']['gid']
+ e['stat']['uid'] = 0
+ e['stat']['gid'] = 0
+
+ (pg, bname) = entry2pb(entry)
+ if op in ['RMDIR', 'UNLINK']:
+ # Try once, if rmdir failed with ENOTEMPTY
+ # then delete recursively.
+ er = entry_purge(op, entry, gfid, e, uid, gid)
+ if isinstance(er, int):
+ if er == ENOTEMPTY and op == 'RMDIR':
+ # Retry if ENOTEMPTY, ESTALE
+ er1 = errno_wrap(recursive_rmdir,
+ [gfid, entry,
+ os.path.join(pg, bname)],
+ [], [ENOTEMPTY, ESTALE, ENODATA])
+ if not isinstance(er1, int):
+ logging.debug("Removed %s => %s/%s recursively" %
+ (gfid, pg, bname))
+ else:
+ logging.warn(lf("Recursive remove failed",
+ gfid=gfid,
+ pgfid=pg,
+ bname=bname,
+ error=os.strerror(er1)))
+ else:
+ logging.warn(lf("Failed to remove",
+ gfid=gfid,
+ pgfid=pg,
+ bname=bname,
+ error=os.strerror(er)))
+ elif op in ['CREATE', 'MKNOD']:
+ slink = os.path.join(pfx, gfid)
+ st = lstat(slink)
+ # don't create multiple entries with same gfid
+ if isinstance(st, int):
+ blob = entry_pack_reg(cls, gfid, bname,
+ e['mode'], e['uid'], e['gid'])
+ # Self healed hardlinks are recorded as MKNOD.
+ # So if the gfid already exists, it should be
+ # processed as hard link not mknod.
+ elif op in ['MKNOD']:
+ cmd_ret = errno_wrap(os.link,
+ [slink, entry],
+ [ENOENT, EEXIST], [ESTALE])
+ collect_failure(e, cmd_ret, uid, gid)
+ elif op == 'MKDIR':
+ en = e['entry']
+ slink = os.path.join(pfx, gfid)
+ st = lstat(slink)
+ # don't create multiple entries with same gfid
+ if isinstance(st, int):
+ blob = entry_pack_mkdir(cls, gfid, bname,
+ e['mode'], e['uid'], e['gid'])
+ elif (isinstance(lstat(en), int) or
+ not matching_disk_gfid(gfid, en)):
+ # If gfid of a directory exists on slave but path based
+ # create is getting EEXIST. This means the directory is
+ # renamed in master but recorded as MKDIR during hybrid
+ # crawl. Get the directory path by reading the backend
+ # symlink and trying to rename to new name as said by
+ # master.
+ logging.info(lf("Special case: rename on mkdir",
+ gfid=gfid, entry=repr(entry)))
+ src_entry = get_slv_dir_path(slv_host, slv_volume, gfid)
+ if src_entry is None:
+ collect_failure(e, ENOENT, uid, gid)
+ if src_entry is not None and src_entry != entry:
+ slv_entry_info = {}
+ slv_entry_info['gfid_mismatch'] = False
+ slv_entry_info['name_mismatch'] = True
+ slv_entry_info['dst'] = False
+ slv_entry_info['slave_isdir'] = True
+ slv_entry_info['slave_gfid'] = gfid
+ slv_entry_info['slave_entry'] = src_entry
+
+ failures.append((e, EEXIST, slv_entry_info))
+ elif op == 'LINK':
+ slink = os.path.join(pfx, gfid)
+ st = lstat(slink)
+ if isinstance(st, int):
+ (pg, bname) = entry2pb(entry)
+ if stat.S_ISREG(e['stat']['mode']):
+ blob = entry_pack_reg_stat(cls, gfid, bname, e['stat'])
+ elif stat.S_ISLNK(e['stat']['mode']):
+ blob = entry_pack_symlink(cls, gfid, bname, e['link'],
+ e['stat'])
+ else:
+ cmd_ret = errno_wrap(os.link,
+ [slink, entry],
+ [ENOENT, EEXIST], [ESTALE])
+ collect_failure(e, cmd_ret, uid, gid)
+ elif op == 'SYMLINK':
+ en = e['entry']
+ st = lstat(entry)
+ if isinstance(st, int):
+ blob = entry_pack_symlink(cls, gfid, bname, e['link'],
+ e['stat'])
+ elif not matching_disk_gfid(gfid, en):
+ collect_failure(e, EEXIST, uid, gid)
+ elif op == 'RENAME':
+ en = e['entry1']
+ # The matching disk gfid check validates two things
+ # 1. Validates name is present, return false otherwise
+ # 2. Validates gfid is same, returns false otherwise
+ # So both validations are necessary to decide src doesn't
+ # exist. We can't rely on only gfid stat as hardlink could
+ # be present and we can't rely only on name as name could
+ # exist with different gfid.
+ if not matching_disk_gfid(gfid, entry):
+ if e['stat'] and not stat.S_ISDIR(e['stat']['mode']):
+ if stat.S_ISLNK(e['stat']['mode']):
+ # src is not present, so don't sync symlink as
+ # we don't know target. It's ok to ignore. If
+ # it's unliked, it's fine. If it's renamed to
+ # something else, it will be synced then.
+ if e['link'] is not None:
+ st1 = lstat(en)
+ if isinstance(st1, int):
+ (pg, bname) = entry2pb(en)
+ blob = entry_pack_symlink(cls, gfid, bname,
+ e['link'],
+ e['stat'])
+ elif not matching_disk_gfid(gfid, en):
+ collect_failure(e, EEXIST, uid, gid, True)
+ else:
+ slink = os.path.join(pfx, gfid)
+ st = lstat(slink)
+ # don't create multiple entries with same gfid
+ if isinstance(st, int):
+ (pg, bname) = entry2pb(en)
+ blob = entry_pack_reg_stat(cls, gfid, bname,
+ e['stat'])
+ else:
+ cmd_ret = errno_wrap(os.link, [slink, en],
+ [ENOENT, EEXIST], [ESTALE])
+ collect_failure(e, cmd_ret, uid, gid)
+ else:
+ st = lstat(entry)
+ st1 = lstat(en)
+ if isinstance(st1, int):
+ rename_with_disk_gfid_confirmation(gfid, entry, en,
+ uid, gid)
+ else:
+ if st.st_ino == st1.st_ino:
+ # we have a hard link, we can now unlink source
+ try:
+ errno_wrap(os.unlink, [entry],
+ [ENOENT, ESTALE], [EBUSY])
+ except OSError as e:
+ if e.errno == EISDIR:
+ try:
+ errno_wrap(os.rmdir, [entry],
+ [ENOENT, ESTALE], [EBUSY])
+ except OSError as e:
+ if e.errno == ENOTEMPTY:
+ logging.error(
+ lf("Directory Rename failed. "
+ "Both Old and New"
+ " directories exists",
+ old=entry,
+ new=en))
+ else:
+ raise
+ else:
+ raise
+ elif not matching_disk_gfid(gfid, en) and dist_count > 1:
+ collect_failure(e, EEXIST, uid, gid, True)
+ else:
+ # We are here which means matching_disk_gfid for
+ # both source and destination has returned false
+ # and distribution count for master vol is greater
+ # then one. Which basically says both the source and
+ # destination exist and not hardlinks.
+ # So we are safe to go ahead with rename here.
+ rename_with_disk_gfid_confirmation(gfid, entry, en,
+ uid, gid)
+ if blob:
+ cmd_ret = errno_wrap(Xattr.lsetxattr,
+ [pg, 'glusterfs.gfid.newfile', blob],
+ [EEXIST, ENOENT, ESTALE],
+ [ESTALE, EINVAL, EBUSY])
+ collect_failure(e, cmd_ret, uid, gid)
+
+ # If UID/GID is different than zero that means we are trying
+ # create Entry with different UID/GID. Create Entry with
+ # UID:0 and GID:0, and then call chown to set UID/GID
+ if uid != 0 or gid != 0:
+ path = os.path.join(pfx, gfid)
+ cmd_ret = errno_wrap(os.lchown, [path, uid, gid], [ENOENT],
+ [ESTALE, EINVAL])
+ collect_failure(e, cmd_ret, uid, gid)
+
+ return failures
+
+ @classmethod
+ def meta_ops(cls, meta_entries):
+ logging.debug('Meta-entries: %s' % repr(meta_entries))
+ failures = []
+ for e in meta_entries:
+ mode = e['stat']['mode']
+ uid = e['stat']['uid']
+ gid = e['stat']['gid']
+ atime = e['stat']['atime']
+ mtime = e['stat']['mtime']
+ go = e['go']
+ # Linux doesn't support chmod on symlink itself.
+ # It is always applied to the target file. So
+ # changelog would record target file's gfid
+ # and we are good. But 'chown' is supported on
+ # symlink file. So changelog would record symlink
+ # gfid in such cases. Since we do 'chown' 'chmod'
+ # 'utime' for each gfid recorded for metadata, and
+ # we know from changelog the metadata is on symlink's
+ # gfid or target file's gfid, we should be doing
+ # 'lchown' 'lchmod' 'utime with no-deference' blindly.
+ # But since 'lchmod' and 'utime with no de-reference' is
+ # not supported in python3, we have to rely on 'chmod'
+ # and 'utime with de-reference'. Hence avoiding 'chmod'
+ # and 'utime' if it's symlink file.
+
+ is_symlink = False
+ cmd_ret = errno_wrap(os.lchown, [go, uid, gid], [ENOENT],
+ [ESTALE, EINVAL])
+ if isinstance(cmd_ret, int):
+ continue
+
+ is_symlink = os.path.islink(go)
+
+ if not is_symlink:
+ cmd_ret = errno_wrap(os.chmod, [go, mode],
+ [ENOENT, EACCES, EPERM], [ESTALE, EINVAL])
+ if isinstance(cmd_ret, int):
+ failures.append((e, cmd_ret, "chmod"))
+
+ cmd_ret = errno_wrap(os.utime, [go, (atime, mtime)],
+ [ENOENT, EACCES, EPERM], [ESTALE, EINVAL])
+ if isinstance(cmd_ret, int):
+ failures.append((e, cmd_ret, "utime"))
+ return failures
+
+ @classmethod
+ @_pathguard
+ def setattr(cls, path, adct):
+ """set file attributes
+
+ @adct is a dict, where 'own', 'mode' and 'times'
+ keys are looked for and values used to perform
+ chown, chmod or utimes on @path.
+ """
+ own = adct.get('own')
+ if own:
+ os.lchown(path, *own)
+ mode = adct.get('mode')
+ if mode:
+ os.chmod(path, stat.S_IMODE(mode))
+ times = adct.get('times')
+ if times:
+ os.utime(path, times)
+
+ @staticmethod
+ def pid():
+ return os.getpid()
+
+ last_keep_alive = 0
+
+ @classmethod
+ def keep_alive(cls, dct):
+ """process keepalive messages.
+
+ Return keep-alive counter (number of received keep-alive
+ messages).
+
+ Now the "keep-alive" message can also have a payload which is
+ used to set a foreign volume-mark on the underlying file system.
+ """
+ if dct:
+ key = '.'.join([cls.GX_NSPACE, 'volume-mark', dct['uuid']])
+ val = struct.pack(cls.FRGN_FMTSTR,
+ *(dct['version'] +
+ tuple(int(x, 16)
+ for x in re.findall('(?:[\da-f]){2}',
+ dct['uuid'])) +
+ (dct['retval'],) + dct['volume_mark'][0:2] + (
+ dct['timeout'],)))
+ Xattr.lsetxattr('.', key, val)
+ cls.last_keep_alive += 1
+ return cls.last_keep_alive
+
+ @staticmethod
+ def version():
+ """version used in handshake"""
+ return 1.0
+
+
+class Mounter(object):
+
+ """Abstract base class for mounter backends"""
+
+ def __init__(self, params):
+ self.params = params
+ self.mntpt = None
+ self.umount_cmd = []
+
+ @classmethod
+ def get_glusterprog(cls):
+ gluster_cmd_dir = gconf.get("gluster-command-dir")
+ if rconf.args.subcmd == "slave":
+ gluster_cmd_dir = gconf.get("slave-gluster-command-dir")
+ return os.path.join(gluster_cmd_dir, cls.glusterprog)
+
+ def umount_l(self, d):
+ """perform lazy umount"""
+ po = Popen(self.make_umount_argv(d), stderr=subprocess.PIPE,
+ universal_newlines=True)
+ po.wait()
+ return po
+
+ @classmethod
+ def make_umount_argv(cls, d):
+ raise NotImplementedError
+
+ def make_mount_argv(self, label=None):
+ raise NotImplementedError
+
+ def cleanup_mntpt(self, *a):
+ pass
+
+ def handle_mounter(self, po):
+ po.wait()
+
+ def inhibit(self, label):
+ """inhibit a gluster filesystem
+
+ Mount glusterfs over a temporary mountpoint,
+ change into the mount, and lazy unmount the
+ filesystem.
+ """
+ mpi, mpo = pipe()
+ mh = Popen.fork()
+ if mh:
+ # Parent
+ os.close(mpi)
+ fcntl.fcntl(mpo, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
+ d = None
+ margv = self.make_mount_argv(label)
+ if self.mntpt:
+ # mntpt is determined pre-mount
+ d = self.mntpt
+ mnt_msg = d + '\0'
+ encoded_msg = mnt_msg.encode()
+ os.write(mpo, encoded_msg)
+ po = Popen(margv, **self.mountkw)
+ self.handle_mounter(po)
+ po.terminate_geterr()
+ logging.debug('auxiliary glusterfs mount in place')
+ if not d:
+ # mntpt is determined during mount
+ d = self.mntpt
+ mnt_msg = d + '\0'
+ encoded_msg = mnt_msg.encode()
+ os.write(mpo, encoded_msg)
+ encoded_msg = 'M'.encode()
+ os.write(mpo, encoded_msg)
+ t = syncdutils.Thread(target=lambda: os.chdir(d))
+ t.start()
+ tlim = rconf.starttime + gconf.get("connection-timeout")
+ while True:
+ if not t.isAlive():
+ break
+
+ if time.time() >= tlim:
+ syncdutils.finalize(exval=1)
+ time.sleep(1)
+ os.close(mpo)
+ _, rv = syncdutils.waitpid(mh, 0)
+ if rv:
+ rv = (os.WIFEXITED(rv) and os.WEXITSTATUS(rv) or 0) - \
+ (os.WIFSIGNALED(rv) and os.WTERMSIG(rv) or 0)
+ logging.warn(lf('stale mount possibly left behind',
+ path=d))
+ raise GsyncdError("cleaning up temp mountpoint %s "
+ "failed with status %d" %
+ (d, rv))
+ else:
+ rv = 0
+ try:
+ os.setsid()
+ os.close(mpo)
+ mntdata = ''
+ while True:
+ c = os.read(mpi, 1)
+ c = c.decode()
+ if not c:
+ break
+ mntdata += c
+ if mntdata:
+ mounted = False
+ if mntdata[-1] == 'M':
+ mntdata = mntdata[:-1]
+ assert(mntdata)
+ mounted = True
+ assert(mntdata[-1] == '\0')
+ mntpt = mntdata[:-1]
+ assert(mntpt)
+
+ umount_master = False
+ umount_slave = False
+ if rconf.args.subcmd == "worker" \
+ and not unshare_propagation_supported() \
+ and not gconf.get("access-mount"):
+ umount_master = True
+ if rconf.args.subcmd == "slave" \
+ and not gconf.get("slave-access-mount"):
+ umount_slave = True
+
+ if mounted and (umount_master or umount_slave):
+ po = self.umount_l(mntpt)
+ po.terminate_geterr(fail_on_err=False)
+ if po.returncode != 0:
+ po.errlog()
+ rv = po.returncode
+ logging.debug("Lazy umount done: %s" % mntpt)
+ if umount_master or umount_slave:
+ self.cleanup_mntpt(mntpt)
+ except:
+ logging.exception('mount cleanup failure:')
+ rv = 200
+ os._exit(rv)
+
+ #Polling the dht.subvol.status value.
+ RETRIES = 10
+ while not gf_mount_ready():
+ if RETRIES < 0:
+ logging.error('Subvols are not up')
+ break
+ RETRIES -= 1
+ time.sleep(0.2)
+
+ logging.debug('auxiliary glusterfs mount prepared')
+
+
+class DirectMounter(Mounter):
+
+ """mounter backend which calls mount(8), umount(8) directly"""
+
+ mountkw = {'stderr': subprocess.PIPE, 'universal_newlines': True}
+ glusterprog = 'glusterfs'
+
+ @staticmethod
+ def make_umount_argv(d):
+ return ['umount', '-l', d]
+
+ def make_mount_argv(self, label=None):
+ self.mntpt = tempfile.mkdtemp(prefix='gsyncd-aux-mount-')
+ rconf.mount_point = self.mntpt
+ return [self.get_glusterprog()] + \
+ ['--' + p for p in self.params] + [self.mntpt]
+
+ def cleanup_mntpt(self, mntpt=None):
+ if not mntpt:
+ mntpt = self.mntpt
+ errno_wrap(os.rmdir, [mntpt], [ENOENT, EBUSY])
+
+
+class MountbrokerMounter(Mounter):
+
+ """mounter backend using the mountbroker gluster service"""
+
+ mountkw = {'stderr': subprocess.PIPE, 'stdout': subprocess.PIPE,
+ 'universal_newlines': True}
+ glusterprog = 'gluster'
+
+ @classmethod
+ def make_cli_argv(cls):
+ return [cls.get_glusterprog()] + ['--remote-host=localhost'] + \
+ gconf.get("gluster-cli-options").split() + ['system::']
+
+ @classmethod
+ def make_umount_argv(cls, d):
+ return cls.make_cli_argv() + ['umount', d, 'lazy']
+
+ def make_mount_argv(self, label):
+ return self.make_cli_argv() + \
+ ['mount', label, 'user-map-root=' +
+ syncdutils.getusername()] + self.params
+
+ def handle_mounter(self, po):
+ self.mntpt = po.stdout.readline()[:-1]
+ rconf.mount_point = self.mntpt
+ rconf.mountbroker = True
+ self.umount_cmd = self.make_cli_argv() + ['umount']
+ rconf.mbr_umount_cmd = self.umount_cmd
+ po.stdout.close()
+ sup(self, po)
+ if po.returncode != 0:
+ # if cli terminated with error due to being
+ # refused by glusterd, what it put
+ # out on stdout is a diagnostic message
+ logging.error(lf('glusterd answered', mnt=self.mntpt))
+
+
+class GLUSTERServer(Server):
+
+ "server enhancements for a glusterfs backend"""
+
+ @classmethod
+ def _attr_unpack_dict(cls, xattr, extra_fields=''):
+ """generic volume mark fetching/parsing backed"""
+ fmt_string = cls.NTV_FMTSTR + extra_fields
+ buf = Xattr.lgetxattr('.', xattr, struct.calcsize(fmt_string))
+ buf = str_to_bytearray(buf)
+ vm = struct.unpack(fmt_string, buf)
+ m = re.match(
+ '(.{8})(.{4})(.{4})(.{4})(.{12})',
+ "".join(['%02x' % x for x in vm[2:18]]))
+ uuid = '-'.join(m.groups())
+ volinfo = {'version': vm[0:2],
+ 'uuid': uuid,
+ 'retval': vm[18],
+ 'volume_mark': vm[19:21],
+ }
+ if extra_fields:
+ return volinfo, vm[-len(extra_fields):]
+ else:
+ return volinfo
+
+ @classmethod
+ def foreign_volume_infos(cls):
+ """return list of valid (not expired) foreign volume marks"""
+ dict_list = []
+ xattr_list = Xattr.llistxattr_buf('.')
+ for ele in xattr_list:
+ if ele.find('.'.join([cls.GX_NSPACE, 'volume-mark', ''])) == 0:
+ d, x = cls._attr_unpack_dict(ele, cls.FRGN_XTRA_FMT)
+ now = int(time.time())
+ if x[0] > now:
+ logging.debug("volinfo[%s] expires: %d "
+ "(%d sec later)" %
+ (d['uuid'], x[0], x[0] - now))
+ d['timeout'] = x[0]
+ dict_list.append(d)
+ else:
+ try:
+ Xattr.lremovexattr('.', ele)
+ except OSError:
+ pass
+ return dict_list
+
+ @classmethod
+ def native_volume_info(cls):
+ """get the native volume mark of the underlying gluster volume"""
+ try:
+ return cls._attr_unpack_dict('.'.join([cls.GX_NSPACE,
+ 'volume-mark']))
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno != ENODATA:
+ raise
+
+
+class GLUSTER(object):
+
+ """scheme class for gluster:// urls
+
+ can be used to represent a gluster slave server
+ on slave side, or interface to a remote gluster
+ slave on master side, or to represent master
+ (slave-ish features come from the mixins, master
+ functionality is outsourced to GMaster from master)
+ """
+ server = GLUSTERServer
+
+ def __init__(self, host, volume):
+ self.path = "%s:%s" % (host, volume)
+ self.host = host
+ self.volume = volume
+
+ global slv_volume
+ global slv_host
+ slv_volume = self.volume
+ slv_host = self.host
+
+ def connect(self):
+ """inhibit the resource beyond
+
+ Choose mounting backend (direct or mountbroker),
+ set up glusterfs parameters and perform the mount
+ with given backend
+ """
+
+ logging.info("Mounting gluster volume locally...")
+ t0 = time.time()
+ label = gconf.get('mountbroker', None)
+ if not label and not privileged():
+ label = syncdutils.getusername()
+ mounter = label and MountbrokerMounter or DirectMounter
+
+ log_file = gconf.get("gluster-log-file")
+ if rconf.args.subcmd == "slave":
+ log_file = gconf.get("slave-gluster-log-file")
+
+ log_level = gconf.get("gluster-log-level")
+ if rconf.args.subcmd == "slave":
+ log_level = gconf.get("slave-gluster-log-level")
+
+ params = gconf.get("gluster-params").split() + \
+ ['log-level=' + log_level] + \
+ ['log-file=' + log_file, 'volfile-server=' + self.host] + \
+ ['volfile-id=' + self.volume, 'client-pid=-1']
+
+ self.mounter = mounter(params)
+ self.mounter.inhibit(label)
+ logging.info(lf("Mounted gluster volume",
+ duration="%.4f" % (time.time() - t0)))
+
+ def gmaster_instantiate_tuple(self, slave):
+ """return a tuple of the 'one shot' and the 'main crawl'
+ class instance"""
+ return (gmaster_builder('xsync')(self, slave),
+ gmaster_builder()(self, slave),
+ gmaster_builder('changeloghistory')(self, slave))
+
+ def service_loop(self, slave=None):
+ """enter service loop
+
+ - if slave given, instantiate GMaster and
+ pass control to that instance, which implements
+ master behavior
+ - else do that's what's inherited
+ """
+ if rconf.args.subcmd == "slave":
+ if gconf.get("use-rsync-xattrs") and not privileged():
+ raise GsyncdError(
+ "using rsync for extended attributes is not supported")
+
+ repce = RepceServer(
+ self.server, sys.stdin, sys.stdout, gconf.get("sync-jobs"))
+ t = syncdutils.Thread(target=lambda: (repce.service_loop(),
+ syncdutils.finalize()))
+ t.start()
+ logging.info("slave listening")
+ if gconf.get("slave-timeout") and gconf.get("slave-timeout") > 0:
+ while True:
+ lp = self.server.last_keep_alive
+ time.sleep(gconf.get("slave-timeout"))
+ if lp == self.server.last_keep_alive:
+ logging.info(
+ lf("connection inactive, stopping",
+ timeout=gconf.get("slave-timeout")))
+ break
+ else:
+ select((), (), ())
+
+ return
+
+ class brickserver(Server):
+ local_path = rconf.args.local_path
+ aggregated = self.server
+
+ @classmethod
+ def entries(cls, path):
+ e = super(brickserver, cls).entries(path)
+ # on the brick don't mess with /.glusterfs
+ if path == '.':
+ try:
+ e.remove('.glusterfs')
+ e.remove('.trashcan')
+ except ValueError:
+ pass
+ return e
+
+ @classmethod
+ def lstat(cls, e):
+ """ path based backend stat """
+ return super(brickserver, cls).lstat(e)
+
+ @classmethod
+ def gfid(cls, e):
+ """ path based backend gfid fetch """
+ return super(brickserver, cls).gfid(e)
+
+ @classmethod
+ def linkto_check(cls, e):
+ return super(brickserver, cls).linkto_check(e)
+
+ # define {,set_}xtime in slave, thus preempting
+ # the call to remote, so that it takes data from
+ # the local brick
+ slave.server.xtime = types.MethodType(
+ lambda _self, path, uuid: (
+ brickserver.xtime(path,
+ uuid + '.' + rconf.args.slave_id)
+ ),
+ slave.server)
+ slave.server.stime = types.MethodType(
+ lambda _self, path, uuid: (
+ brickserver.stime(path,
+ uuid + '.' + rconf.args.slave_id)
+ ),
+ slave.server)
+ slave.server.entry_stime = types.MethodType(
+ lambda _self, path, uuid: (
+ brickserver.entry_stime(
+ path,
+ uuid + '.' + rconf.args.slave_id)
+ ),
+ slave.server)
+ slave.server.set_stime = types.MethodType(
+ lambda _self, path, uuid, mark: (
+ brickserver.set_stime(path,
+ uuid + '.' + rconf.args.slave_id,
+ mark)
+ ),
+ slave.server)
+ slave.server.set_entry_stime = types.MethodType(
+ lambda _self, path, uuid, mark: (
+ brickserver.set_entry_stime(
+ path,
+ uuid + '.' + rconf.args.slave_id,
+ mark)
+ ),
+ slave.server)
+
+ (g1, g2, g3) = self.gmaster_instantiate_tuple(slave)
+ g1.master.server = brickserver
+ g2.master.server = brickserver
+ g3.master.server = brickserver
+
+ # bad bad bad: bad way to do things like this
+ # need to make this elegant
+ # register the crawlers and start crawling
+ # g1 ==> Xsync, g2 ==> config.change_detector(changelog by default)
+ # g3 ==> changelog History
+ status = GeorepStatus(gconf.get("state-file"),
+ rconf.args.local_node,
+ rconf.args.local_path,
+ rconf.args.local_node_id,
+ rconf.args.master,
+ rconf.args.slave)
+ status.reset_on_worker_start()
+
+ try:
+ workdir = g2.setup_working_dir()
+ # Register only when change_detector is not set to
+ # xsync, else agent will generate changelog files
+ # in .processing directory of working dir
+ if gconf.get("change-detector") != 'xsync':
+ # register with the changelog library
+ # 9 == log level (DEBUG)
+ # 5 == connection retries
+ libgfchangelog.register(rconf.args.local_path,
+ workdir,
+ gconf.get("changelog-log-file"),
+ get_changelog_log_level(
+ gconf.get("changelog-log-level")),
+ g2.CHANGELOG_CONN_RETRIES)
+
+ register_time = int(time.time())
+ g2.register(register_time, status)
+ g3.register(register_time, status)
+ except ChangelogException as e:
+ logging.error(lf("Changelog register failed", error=e))
+ sys.exit(1)
+
+ g1.register(status=status)
+ logging.info(lf("Register time",
+ time=register_time))
+ # oneshot: Try to use changelog history api, if not
+ # available switch to FS crawl
+ # Note: if config.change_detector is xsync then
+ # it will not use changelog history api
+ try:
+ g3.crawlwrap(oneshot=True)
+ except PartialHistoryAvailable as e:
+ logging.info(lf('Partial history available, using xsync crawl'
+ ' after consuming history',
+ till=e))
+ g1.crawlwrap(oneshot=True, register_time=register_time)
+ except ChangelogHistoryNotAvailable:
+ logging.info('Changelog history not available, using xsync')
+ g1.crawlwrap(oneshot=True, register_time=register_time)
+ except NoStimeAvailable:
+ logging.info('No stime available, using xsync crawl')
+ g1.crawlwrap(oneshot=True, register_time=register_time)
+ except ChangelogException as e:
+ logging.error(lf("Changelog History Crawl failed",
+ error=e))
+ sys.exit(1)
+
+ try:
+ g2.crawlwrap()
+ except ChangelogException as e:
+ logging.error(lf("Changelog crawl failed", error=e))
+ sys.exit(1)
+
+
+class SSH(object):
+
+ """scheme class for ssh:// urls
+
+ interface to remote slave on master side
+ implementing an ssh based proxy
+ """
+
+ def __init__(self, host, volume):
+ self.remote_addr = host
+ self.volume = volume
+
+ @staticmethod
+ def parse_ssh_address(self):
+ m = re.match('([^@]+)@(.+)', self.remote_addr)
+ if m:
+ u, h = m.groups()
+ else:
+ u, h = syncdutils.getusername(), self.remote_addr
+ self.remotehost = h
+ return {'user': u, 'host': h}
+
+ def start_fd_client(self, i, o):
+ """set up RePCe client, handshake with server
+
+ It's cut out as a separate method to let
+ subclasses hook into client startup
+ """
+ self.server = RepceClient(i, o)
+ rv = self.server.__version__()
+ exrv = {'proto': repce.repce_version, 'object': Server.version()}
+ da0 = (rv, exrv)
+ da1 = ({}, {})
+ for i in range(2):
+ for k, v in da0[i].items():
+ da1[i][k] = int(v)
+ if da1[0] != da1[1]:
+ raise GsyncdError(
+ "RePCe major version mismatch: local %s, remote %s" %
+ (exrv, rv))
+ slavepath = "/proc/%d/cwd" % self.server.pid()
+ self.slaveurl = ':'.join([self.remote_addr, slavepath])
+
+ def connect_remote(self):
+ """connect to inner slave url through outer ssh url
+
+ Wrap the connecting utility in ssh.
+
+ Much care is put into daemonizing: in that case
+ ssh is started before daemonization, but
+ RePCe client is to be created after that (as ssh
+ interactive password auth would be defeated by
+ a daemonized ssh, while client should be present
+ only in the final process). In that case the action
+ is taken apart to two parts, this method is ivoked
+ once pre-daemon, once post-daemon. Use @go_daemon
+ to deiced what part to perform.
+
+ [NB. ATM gluster product does not makes use of interactive
+ authentication.]
+ """
+ syncdutils.setup_ssh_ctl(tempfile.mkdtemp(prefix='gsyncd-aux-ssh-'),
+ self.remote_addr,
+ self.volume)
+
+ logging.info("Initializing SSH connection between master and slave...")
+ t0 = time.time()
+
+ extra_opts = []
+ remote_gsyncd = gconf.get("remote-gsyncd")
+ if remote_gsyncd == "":
+ remote_gsyncd = "/nonexistent/gsyncd"
+
+ if gconf.get("use-rsync-xattrs"):
+ extra_opts.append('--use-rsync-xattrs')
+
+ args_to_slave = [gconf.get("ssh-command")] + \
+ gconf.get("ssh-options").split() + \
+ ["-p", str(gconf.get("ssh-port"))] + \
+ rconf.ssh_ctl_args + [self.remote_addr] + \
+ [remote_gsyncd, "slave"] + \
+ extra_opts + \
+ [rconf.args.master, rconf.args.slave] + \
+ [
+ '--master-node', rconf.args.local_node,
+ '--master-node-id', rconf.args.local_node_id,
+ '--master-brick', rconf.args.local_path,
+ '--local-node', rconf.args.resource_remote,
+ '--local-node-id', rconf.args.resource_remote_id] + \
+ [
+ # Add all config arguments here, slave gsyncd will not use
+ # config file in slave side, so all overriding options should
+ # be sent as arguments
+ '--slave-timeout', str(gconf.get("slave-timeout")),
+ '--slave-log-level', gconf.get("slave-log-level"),
+ '--slave-gluster-log-level',
+ gconf.get("slave-gluster-log-level"),
+ '--slave-gluster-command-dir',
+ gconf.get("slave-gluster-command-dir"),
+ '--master-dist-count',
+ str(gconf.get("master-distribution-count"))]
+
+ if gconf.get("slave-access-mount"):
+ args_to_slave.append('--slave-access-mount')
+
+ if rconf.args.debug:
+ args_to_slave.append('--debug')
+
+ po = Popen(args_to_slave,
+ stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ rconf.transport = po
+ self.start_fd_client(po.stdout, po.stdin)
+ logging.info(lf("SSH connection between master and slave established.",
+ duration="%.4f" % (time.time() - t0)))
+
+ def rsync(self, files, *args, **kw):
+ """invoke rsync"""
+ if not files:
+ raise GsyncdError("no files to sync")
+ logging.debug("files: " + ", ".join(files))
+
+ extra_rsync_flags = []
+ # Performance flag, --ignore-missing-args, if rsync version is
+ # greater than 3.1.0 then include this flag.
+ if gconf.get("rsync-opt-ignore-missing-args") and \
+ get_rsync_version(gconf.get("rsync-command")) >= "3.1.0":
+ extra_rsync_flags = ["--ignore-missing-args"]
+
+ rsync_ssh_opts = [gconf.get("ssh-command")] + \
+ gconf.get("ssh-options").split() + \
+ ["-p", str(gconf.get("ssh-port"))] + \
+ rconf.ssh_ctl_args + \
+ gconf.get("rsync-ssh-options").split()
+
+ argv = [
+ gconf.get("rsync-command"),
+ '-aR0',
+ '--inplace',
+ '--files-from=-',
+ '--super',
+ '--stats',
+ '--numeric-ids',
+ '--no-implied-dirs'
+ ]
+
+ if gconf.get("rsync-opt-existing"):
+ argv += ["--existing"]
+
+ if gconf.get("sync-xattrs"):
+ argv += ['--xattrs']
+
+ if gconf.get("sync-acls"):
+ argv += ['--acls']
+
+ argv = argv + \
+ gconf.get("rsync-options").split() + \
+ extra_rsync_flags + ['.'] + \
+ ["-e", " ".join(rsync_ssh_opts)] + \
+ [self.slaveurl]
+
+ log_rsync_performance = gconf.getr("log-rsync-performance", False)
+
+ if log_rsync_performance:
+ # use stdout=PIPE only when log_rsync_performance enabled
+ # Else rsync will write to stdout and nobody is there
+ # to consume. If PIPE is full rsync hangs.
+ po = Popen(argv, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE, universal_newlines=True)
+ else:
+ po = Popen(argv, stdin=subprocess.PIPE, stderr=subprocess.PIPE,
+ universal_newlines=True)
+
+ for f in files:
+ po.stdin.write(f)
+ po.stdin.write('\0')
+
+ stdout, stderr = po.communicate()
+
+ if kw.get("log_err", False):
+ for errline in stderr.strip().split("\n")[:-1]:
+ logging.error(lf("SYNC Error",
+ sync_engine="Rsync",
+ error=errline))
+
+ if log_rsync_performance:
+ rsync_msg = []
+ for line in stdout.split("\n"):
+ if line.startswith("Number of files:") or \
+ line.startswith("Number of regular files transferred:") or \
+ line.startswith("Total file size:") or \
+ line.startswith("Total transferred file size:") or \
+ line.startswith("Literal data:") or \
+ line.startswith("Matched data:") or \
+ line.startswith("Total bytes sent:") or \
+ line.startswith("Total bytes received:") or \
+ line.startswith("sent "):
+ rsync_msg.append(line)
+ logging.info(lf("rsync performance",
+ data=", ".join(rsync_msg)))
+
+ return po
+
+ def tarssh(self, files, log_err=False):
+ """invoke tar+ssh
+ -z (compress) can be use if needed, but omitting it now
+ as it results in weird error (tar+ssh errors out (errcode: 2)
+ """
+ if not files:
+ raise GsyncdError("no files to sync")
+ logging.debug("files: " + ", ".join(files))
+ (host, rdir) = self.slaveurl.split(':')
+
+ tar_cmd = ["tar"] + \
+ ["--sparse", "-cf", "-", "--files-from", "-"]
+ ssh_cmd = gconf.get("ssh-command").split() + \
+ gconf.get("ssh-options-tar").split() + \
+ ["-p", str(gconf.get("ssh-port"))] + \
+ [host, "tar"] + \
+ ["--overwrite", "-xf", "-", "-C", rdir]
+ p0 = Popen(tar_cmd, stdout=subprocess.PIPE,
+ stdin=subprocess.PIPE, stderr=subprocess.PIPE,
+ universal_newlines=True)
+ p1 = Popen(ssh_cmd, stdin=p0.stdout, stderr=subprocess.PIPE,
+ universal_newlines=True)
+ for f in files:
+ p0.stdin.write(f)
+ p0.stdin.write('\n')
+
+ p0.stdin.close()
+ p0.stdout.close() # Allow p0 to receive a SIGPIPE if p1 exits.
+
+ # stdin and stdout of p0 is already closed, Reset to None and
+ # wait for child process to complete
+ p0.stdin = None
+ p0.stdout = None
+
+ def wait_for_tar(p0):
+ _, stderr = p0.communicate()
+ if log_err:
+ for errline in stderr.strip().split("\n")[:-1]:
+ if "No such file or directory" not in errline:
+ logging.error(lf("SYNC Error",
+ sync_engine="Tarssh",
+ error=errline))
+
+ t = syncdutils.Thread(target=wait_for_tar, args=(p0, ))
+ # wait for tar to terminate, collecting any errors, further
+ # waiting for transfer to complete
+ t.start()
+
+ # wait for ssh process
+ _, stderr1 = p1.communicate()
+ t.join()
+
+ if log_err:
+ for errline in stderr1.strip().split("\n")[:-1]:
+ logging.error(lf("SYNC Error",
+ sync_engine="Tarssh",
+ error=errline))
+
+ return p1
diff --git a/geo-replication/syncdaemon/subcmds.py b/geo-replication/syncdaemon/subcmds.py
new file mode 100644
index 00000000000..b8508532e30
--- /dev/null
+++ b/geo-replication/syncdaemon/subcmds.py
@@ -0,0 +1,335 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from __future__ import print_function
+from syncdutils import lf
+import logging
+import gsyncdconfig as gconf
+
+
+ERROR_CONFIG_INVALID = 2
+ERROR_CONFIG_INVALID_VALUE = 3
+ERROR_CONFIG_NOT_CONFIGURABLE = 4
+
+
+def subcmd_monitor_status(args):
+ from gsyncdstatus import set_monitor_status
+ from rconf import rconf
+
+ set_monitor_status(gconf.get("state-file"), args.status)
+ rconf.log_exit = False
+ logging.info(lf("Monitor Status Change", status=args.status))
+
+
+def subcmd_status(args):
+ from gsyncdstatus import GeorepStatus
+
+ master_name = args.master.replace(":", "")
+ slave_data = args.slave.replace("ssh://", "")
+
+ brick_status = GeorepStatus(gconf.get("state-file"),
+ "",
+ args.local_path,
+ "",
+ master_name,
+ slave_data,
+ gconf.get("pid-file"))
+ checkpoint_time = gconf.get("checkpoint", 0)
+ brick_status.print_status(checkpoint_time=checkpoint_time,
+ json_output=args.json)
+
+
+def subcmd_monitor(args):
+ import monitor
+ from resource import GLUSTER, SSH, Popen
+ go_daemon = False if args.debug else True
+
+ monitor.startup(go_daemon)
+ Popen.init_errhandler()
+ local = GLUSTER("localhost", args.master)
+ slavehost, slavevol = args.slave.split("::")
+ remote = SSH(slavehost, slavevol)
+ return monitor.monitor(local, remote)
+
+
+def subcmd_verify_spawning(args):
+ logging.info("Able to spawn gsyncd.py")
+
+
+def subcmd_worker(args):
+ import os
+ import fcntl
+
+ from resource import GLUSTER, SSH, Popen
+
+ Popen.init_errhandler()
+ fcntl.fcntl(args.feedback_fd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
+ local = GLUSTER("localhost", args.master)
+ slave_url, slavevol = args.slave.split("::")
+ if "@" not in slave_url:
+ slavehost = args.resource_remote
+ else:
+ slavehost = "%s@%s" % (slave_url.split("@")[0], args.resource_remote)
+ remote = SSH(slavehost, slavevol)
+ remote.connect_remote()
+ local.connect()
+ logging.info("Worker spawn successful. Acknowledging back to monitor")
+ os.close(args.feedback_fd)
+ local.service_loop(remote)
+
+
+def subcmd_slave(args):
+ from resource import GLUSTER, Popen
+
+ Popen.init_errhandler()
+ slavevol = args.slave.split("::")[-1]
+ local = GLUSTER("localhost", slavevol)
+
+ local.connect()
+ local.service_loop()
+
+
+def subcmd_voluuidget(args):
+ from subprocess import Popen, PIPE
+ import xml.etree.ElementTree as XET
+
+ ParseError = XET.ParseError if hasattr(XET, 'ParseError') else SyntaxError
+
+ cmd = ['gluster', '--xml', '--remote-host=' + args.host,
+ 'volume', 'info', args.volname]
+
+ if args.inet6:
+ cmd.append("--inet6")
+
+ po = Popen(cmd, bufsize=0,
+ stdin=None, stdout=PIPE, stderr=PIPE,
+ universal_newlines=True)
+
+ vix, err = po.communicate()
+ if po.returncode != 0:
+ logging.info(lf("Volume info failed, unable to get "
+ "volume uuid of slavevol, "
+ "returning empty string",
+ slavevol=args.volname,
+ slavehost=args.host,
+ error=po.returncode))
+ return ""
+ vi = XET.fromstring(vix)
+ if vi.find('opRet').text != '0':
+ logging.info(lf("Unable to get volume uuid of slavevol, "
+ "returning empty string",
+ slavevol=args.volname,
+ slavehost=args.host,
+ error=vi.find('opErrstr').text))
+ return ""
+
+ try:
+ voluuid = vi.find("volInfo/volumes/volume/id").text
+ except (ParseError, AttributeError, ValueError) as e:
+ logging.info(lf("Parsing failed to volume uuid of slavevol, "
+ "returning empty string",
+ slavevol=args.volname,
+ slavehost=args.host,
+ error=e))
+ voluuid = ""
+
+ print(voluuid)
+
+
+def _unlink(path):
+ import os
+ from errno import ENOENT
+ from syncdutils import GsyncdError
+ import sys
+
+ try:
+ os.unlink(path)
+ except (OSError, IOError):
+ if sys.exc_info()[1].errno == ENOENT:
+ pass
+ else:
+ raise GsyncdError('Unlink error: %s' % path)
+
+
+def subcmd_delete(args):
+ import logging
+ import shutil
+ import glob
+ import sys
+ from errno import ENOENT, ENODATA
+ import struct
+
+ from syncdutils import GsyncdError, Xattr, errno_wrap
+ import gsyncdconfig as gconf
+
+ logging.info('geo-replication delete')
+ # remove the stime xattr from all the brick paths so that
+ # a re-create of a session will start sync all over again
+ stime_xattr_prefix = gconf.get('stime-xattr-prefix', None)
+
+ # Delete pid file, status file, socket file
+ cleanup_paths = []
+ cleanup_paths.append(gconf.get("pid-file"))
+
+ # Cleanup Session dir
+ try:
+ shutil.rmtree(gconf.get("georep-session-working-dir"))
+ except (IOError, OSError):
+ if sys.exc_info()[1].errno == ENOENT:
+ pass
+ else:
+ raise GsyncdError(
+ 'Error while removing working dir: %s' %
+ gconf.get("georep-session-working-dir"))
+
+ # Cleanup changelog working dirs
+ try:
+ shutil.rmtree(gconf.get("working-dir"))
+ except (IOError, OSError):
+ if sys.exc_info()[1].errno == ENOENT:
+ pass
+ else:
+ raise GsyncdError(
+ 'Error while removing working dir: %s' %
+ gconf.get("working-dir"))
+
+ for path in cleanup_paths:
+ # To delete temp files
+ for f in glob.glob(path + "*"):
+ _unlink(f)
+
+ if args.reset_sync_time and stime_xattr_prefix:
+ for p in args.paths:
+ if p != "":
+ # set stime to (0,0) to trigger full volume content resync
+ # to slave on session recreation
+ # look at master.py::Xcrawl hint: zero_zero
+ errno_wrap(Xattr.lsetxattr,
+ (p, stime_xattr_prefix + ".stime",
+ struct.pack("!II", 0, 0)),
+ [ENOENT, ENODATA])
+ errno_wrap(Xattr.lremovexattr,
+ (p, stime_xattr_prefix + ".entry_stime"),
+ [ENOENT, ENODATA])
+
+ return
+
+
+def print_config(name, value, only_value=False, use_underscore=False):
+ val = value
+ if isinstance(value, bool):
+ val = str(value).lower()
+
+ if only_value:
+ print(val)
+ else:
+ if use_underscore:
+ name = name.replace("-", "_")
+
+ print(("%s:%s" % (name, val)))
+
+
+def config_name_format(val):
+ return val.replace("_", "-")
+
+
+def subcmd_config_get(args):
+ import sys
+ import json
+
+ all_config = gconf.getall(show_defaults=args.show_defaults,
+ show_non_configurable=True)
+ if args.name is not None:
+ val = all_config.get(config_name_format(args.name), None)
+ if val is None:
+ sys.stderr.write("Invalid config name \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_INVALID)
+
+ print_config(args.name, val["value"], only_value=args.only_value,
+ use_underscore=args.use_underscore)
+ return
+
+ if args.json:
+ out = []
+ # Convert all values as string
+ for k in sorted(all_config):
+ v = all_config[k]
+ out.append({
+ "name": k,
+ "value": str(v["value"]),
+ "default": str(v["default"]),
+ "configurable": v["configurable"],
+ "modified": v["modified"]
+ })
+
+ print((json.dumps(out)))
+ return
+
+ for k in sorted(all_config):
+ print_config(k, all_config[k]["value"],
+ use_underscore=args.use_underscore)
+
+
+def subcmd_config_check(args):
+ import sys
+
+ try:
+ gconf.check(config_name_format(args.name), value=args.value,
+ with_conffile=False)
+ except gconf.GconfNotConfigurable:
+ cnf_val = gconf.get(config_name_format(args.name), None)
+ if cnf_val is None:
+ sys.stderr.write("Invalid config name \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_INVALID)
+
+ # Not configurable
+ sys.stderr.write("Not configurable \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_NOT_CONFIGURABLE)
+ except gconf.GconfInvalidValue:
+ sys.stderr.write("Invalid config value \"%s=%s\"\n" % (args.name,
+ args.value))
+ sys.exit(ERROR_CONFIG_INVALID_VALUE)
+
+
+def subcmd_config_set(args):
+ import sys
+
+ try:
+ gconf.setconfig(config_name_format(args.name), args.value)
+ except gconf.GconfNotConfigurable:
+ cnf_val = gconf.get(config_name_format(args.name), None)
+ if cnf_val is None:
+ sys.stderr.write("Invalid config name \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_INVALID)
+
+ # Not configurable
+ sys.stderr.write("Not configurable \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_NOT_CONFIGURABLE)
+ except gconf.GconfInvalidValue:
+ sys.stderr.write("Invalid config value \"%s=%s\"\n" % (args.name,
+ args.value))
+ sys.exit(ERROR_CONFIG_INVALID_VALUE)
+
+
+def subcmd_config_reset(args):
+ import sys
+
+ try:
+ gconf.resetconfig(config_name_format(args.name))
+ except gconf.GconfNotConfigurable:
+ cnf_val = gconf.get(config_name_format(args.name), None)
+ if cnf_val is None:
+ sys.stderr.write("Invalid config name \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_INVALID)
+
+ # Not configurable
+ sys.stderr.write("Not configurable \"%s\"\n" % args.name)
+ sys.exit(ERROR_CONFIG_NOT_CONFIGURABLE)
diff --git a/geo-replication/syncdaemon/syncdutils.py b/geo-replication/syncdaemon/syncdutils.py
new file mode 100644
index 00000000000..a3df103e76c
--- /dev/null
+++ b/geo-replication/syncdaemon/syncdutils.py
@@ -0,0 +1,1115 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import os
+import sys
+import pwd
+import time
+import fcntl
+import shutil
+import logging
+import errno
+import threading
+import subprocess
+import socket
+from subprocess import PIPE
+from threading import Lock, Thread as baseThread
+from errno import (EACCES, EAGAIN, EPIPE, ENOTCONN, ENOMEM, ECONNABORTED,
+ EINTR, ENOENT, ESTALE, EBUSY, ENODATA, errorcode, EIO)
+from signal import signal, SIGTERM
+import select as oselect
+from os import waitpid as owaitpid
+import xml.etree.ElementTree as XET
+from select import error as SelectError
+try:
+ from cPickle import PickleError
+except ImportError:
+ from pickle import PickleError
+
+from conf import GLUSTERFS_LIBEXECDIR, UUID_FILE
+sys.path.insert(1, GLUSTERFS_LIBEXECDIR)
+EVENTS_ENABLED = True
+try:
+ from gfevents.eventtypes import GEOREP_FAULTY as EVENT_GEOREP_FAULTY
+ from gfevents.eventtypes import GEOREP_ACTIVE as EVENT_GEOREP_ACTIVE
+ from gfevents.eventtypes import GEOREP_PASSIVE as EVENT_GEOREP_PASSIVE
+ from gfevents.eventtypes import GEOREP_CHECKPOINT_COMPLETED \
+ as EVENT_GEOREP_CHECKPOINT_COMPLETED
+except ImportError:
+ # Events APIs not installed, dummy eventtypes with None
+ EVENTS_ENABLED = False
+ EVENT_GEOREP_FAULTY = None
+ EVENT_GEOREP_ACTIVE = None
+ EVENT_GEOREP_PASSIVE = None
+ EVENT_GEOREP_CHECKPOINT_COMPLETED = None
+
+import gsyncdconfig as gconf
+from rconf import rconf
+
+from hashlib import sha256 as sha256
+
+ENOTSUP = getattr(errno, 'ENOTSUP', 'EOPNOTSUPP')
+
+# auxiliary gfid based access prefix
+_CL_AUX_GFID_PFX = ".gfid/"
+ROOT_GFID = "00000000-0000-0000-0000-000000000001"
+GF_OP_RETRIES = 10
+
+GX_GFID_CANONICAL_LEN = 37 # canonical gfid len + '\0'
+
+NodeID = None
+rsync_version = None
+unshare_mnt_propagation = None
+slv_bricks = None
+SPACE_ESCAPE_CHAR = "%20"
+NEWLINE_ESCAPE_CHAR = "%0A"
+PERCENTAGE_ESCAPE_CHAR = "%25"
+
+final_lock = Lock()
+
+def sup(x, *a, **kw):
+ """a rubyesque "super" for python ;)
+
+ invoke caller method in parent class with given args.
+ """
+ return getattr(super(type(x), x),
+ sys._getframe(1).f_code.co_name)(*a, **kw)
+
+
+def escape(s):
+ """the chosen flavor of string escaping, used all over
+ to turn whatever data to creatable representation"""
+ return s.replace("/", "-").strip("-")
+
+
+def escape_space_newline(s):
+ return s.replace("%", PERCENTAGE_ESCAPE_CHAR)\
+ .replace(" ", SPACE_ESCAPE_CHAR)\
+ .replace("\n", NEWLINE_ESCAPE_CHAR)
+
+
+def unescape_space_newline(s):
+ return s.replace(SPACE_ESCAPE_CHAR, " ")\
+ .replace(NEWLINE_ESCAPE_CHAR, "\n")\
+ .replace(PERCENTAGE_ESCAPE_CHAR, "%")
+
+# gf_mount_ready() returns 1 if all subvols are up, else 0
+def gf_mount_ready():
+ ret = errno_wrap(Xattr.lgetxattr,
+ ['.', 'dht.subvol.status', 16],
+ [ENOENT, ENOTSUP, ENODATA], [ENOMEM])
+
+ if isinstance(ret, int):
+ logging.error("failed to get the xattr value")
+ return 1
+ ret = ret.rstrip('\x00')
+ if ret == "1":
+ return 1
+ return 0
+
+def norm(s):
+ if s:
+ return s.replace('-', '_')
+
+
+def update_file(path, updater, merger=lambda f: True):
+ """update a file in a transaction-like manner"""
+
+ fr = fw = None
+ try:
+ fd = os.open(path, os.O_CREAT | os.O_RDWR)
+ try:
+ fr = os.fdopen(fd, 'r+b')
+ except:
+ os.close(fd)
+ raise
+ fcntl.lockf(fr, fcntl.LOCK_EX)
+ if not merger(fr):
+ return
+
+ tmpp = path + '.tmp.' + str(os.getpid())
+ fd = os.open(tmpp, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
+ try:
+ fw = os.fdopen(fd, 'wb', 0)
+ except:
+ os.close(fd)
+ raise
+ updater(fw)
+ os.fsync(fd)
+ os.rename(tmpp, path)
+ finally:
+ for fx in (fr, fw):
+ if fx:
+ fx.close()
+
+
+def create_manifest(fname, content):
+ """
+ Create manifest file for SSH Control Path
+ """
+ fd = None
+ try:
+ fd = os.open(fname, os.O_CREAT | os.O_RDWR)
+ try:
+ os.write(fd, content)
+ except:
+ os.close(fd)
+ raise
+ finally:
+ if fd is not None:
+ os.close(fd)
+
+
+def setup_ssh_ctl(ctld, remote_addr, resource_url):
+ """
+ Setup GConf ssh control path parameters
+ """
+ rconf.ssh_ctl_dir = ctld
+ content = "SLAVE_HOST=%s\nSLAVE_RESOURCE_URL=%s" % (remote_addr,
+ resource_url)
+ encoded_content = content.encode()
+ content_sha256 = sha256hex(encoded_content)
+ """
+ The length of ctl_path for ssh connection should not be > 108.
+ ssh fails with ctl_path too long if it is so. But when rsync
+ is piped to ssh, it is not taking > 90. Hence using first 32
+ bytes of hash. Hash collision doesn't matter as only one sock
+ file is created per directory.
+ """
+ content_sha256 = content_sha256[:32]
+ fname = os.path.join(rconf.ssh_ctl_dir,
+ "%s.mft" % content_sha256)
+
+ create_manifest(fname, encoded_content)
+ ssh_ctl_path = os.path.join(rconf.ssh_ctl_dir,
+ "%s.sock" % content_sha256)
+ rconf.ssh_ctl_args = ["-oControlMaster=auto", "-S", ssh_ctl_path]
+
+
+def grabfile(fname, content=None):
+ """open @fname + contest for its fcntl lock
+
+ @content: if given, set the file content to it
+ """
+ # damn those messy open() mode codes
+ fd = os.open(fname, os.O_CREAT | os.O_RDWR)
+ f = os.fdopen(fd, 'r+')
+ try:
+ fcntl.lockf(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
+ except:
+ ex = sys.exc_info()[1]
+ f.close()
+ if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
+ # cannot grab, it's taken
+ return
+ raise
+ if content:
+ try:
+ f.truncate()
+ f.write(content)
+ f.flush()
+ except:
+ f.close()
+ raise
+ rconf.permanent_handles.append(f)
+ return f
+
+
+def grabpidfile(fname=None, setpid=True):
+ """.grabfile customization for pid files"""
+ if not fname:
+ fname = gconf.get("pid-file")
+ content = None
+ if setpid:
+ content = str(os.getpid()) + '\n'
+ return grabfile(fname, content=content)
+
+
+def finalize(*args, **kwargs):
+ """all those messy final steps we go trough upon termination
+
+ Do away with pidfile, ssh control dir and logging.
+ """
+
+ final_lock.acquire()
+ if gconf.get('pid_file'):
+ rm_pidf = rconf.pid_file_owned
+ if rconf.cpid:
+ # exit path from parent branch of daemonization
+ rm_pidf = False
+ while True:
+ f = grabpidfile(setpid=False)
+ if not f:
+ # child has already taken over pidfile
+ break
+ if os.waitpid(rconf.cpid, os.WNOHANG)[0] == rconf.cpid:
+ # child has terminated
+ rm_pidf = True
+ break
+ time.sleep(0.1)
+ if rm_pidf:
+ try:
+ os.unlink(rconf.pid_file)
+ except:
+ ex = sys.exc_info()[1]
+ if ex.errno == ENOENT:
+ pass
+ else:
+ raise
+ if rconf.ssh_ctl_dir and not rconf.cpid:
+ def handle_rm_error(func, path, exc_info):
+ if exc_info[1].errno == ENOENT:
+ return
+ raise exc_info[1]
+
+ shutil.rmtree(rconf.ssh_ctl_dir, onerror=handle_rm_error)
+
+ """ Unmount if not done """
+ if rconf.mount_point:
+ if rconf.mountbroker:
+ umount_cmd = rconf.mbr_umount_cmd + [rconf.mount_point, 'lazy']
+ else:
+ umount_cmd = ['umount', '-l', rconf.mount_point]
+ p0 = subprocess.Popen(umount_cmd, stderr=subprocess.PIPE,
+ universal_newlines=True)
+ _, errdata = p0.communicate()
+ if p0.returncode == 0:
+ try:
+ os.rmdir(rconf.mount_point)
+ except OSError:
+ pass
+ else:
+ pass
+
+ if rconf.log_exit:
+ logging.info("exiting.")
+ sys.stdout.flush()
+ sys.stderr.flush()
+ os._exit(kwargs.get('exval', 0))
+
+
+def log_raise_exception(excont):
+ """top-level exception handler
+
+ Try to some fancy things to cover up we face with an error.
+ Translate some weird sounding but well understood exceptions
+ into human-friendly lingo
+ """
+
+ is_filelog = False
+ for h in logging.getLogger().handlers:
+ fno = getattr(getattr(h, 'stream', None), 'fileno', None)
+ if fno and not os.isatty(fno()):
+ is_filelog = True
+
+ exc = sys.exc_info()[1]
+ if isinstance(exc, SystemExit):
+ excont.exval = exc.code or 0
+ raise
+ else:
+ logtag = None
+ if isinstance(exc, GsyncdError):
+ if is_filelog:
+ logging.error(exc.args[0])
+ sys.stderr.write('failure: ' + exc.args[0] + '\n')
+ elif isinstance(exc, PickleError) or isinstance(exc, EOFError) or \
+ ((isinstance(exc, OSError) or isinstance(exc, IOError)) and
+ exc.errno == EPIPE):
+ logging.error('connection to peer is broken')
+ if hasattr(rconf, 'transport'):
+ rconf.transport.wait()
+ if rconf.transport.returncode == 127:
+ logging.error("getting \"No such file or directory\""
+ "errors is most likely due to "
+ "MISCONFIGURATION, please remove all "
+ "the public keys added by geo-replication "
+ "from authorized_keys file in slave nodes "
+ "and run Geo-replication create "
+ "command again.")
+ logging.error("If `gsec_create container` was used, then "
+ "run `gluster volume geo-replication "
+ "<MASTERVOL> [<SLAVEUSER>@]<SLAVEHOST>::"
+ "<SLAVEVOL> config remote-gsyncd "
+ "<GSYNCD_PATH> (Example GSYNCD_PATH: "
+ "`/usr/libexec/glusterfs/gsyncd`)")
+ rconf.transport.terminate_geterr()
+ elif isinstance(exc, OSError) and exc.errno in (ENOTCONN,
+ ECONNABORTED):
+ logging.error(lf('Gluster Mount process exited',
+ error=errorcode[exc.errno]))
+ elif isinstance(exc, OSError) and exc.errno == EIO:
+ logging.error("Getting \"Input/Output error\" "
+ "is most likely due to "
+ "a. Brick is down or "
+ "b. Split brain issue.")
+ logging.error("This is expected as per design to "
+ "keep the consistency of the file system. "
+ "Once the above issue is resolved "
+ "geo-replication would automatically "
+ "proceed further.")
+ logtag = "FAIL"
+ else:
+ logtag = "FAIL"
+ if not logtag and logging.getLogger().isEnabledFor(logging.DEBUG):
+ logtag = "FULL EXCEPTION TRACE"
+ if logtag:
+ logging.exception(logtag + ": ")
+ sys.stderr.write("failed with %s: %s.\n" % (type(exc).__name__, exc))
+ excont.exval = 1
+ sys.exit(excont.exval)
+
+
+class FreeObject(object):
+
+ """wildcard class for which any attribute can be set"""
+
+ def __init__(self, **kw):
+ for k, v in kw.items():
+ setattr(self, k, v)
+
+
+class Thread(baseThread):
+
+ """thread class flavor for gsyncd
+
+ - always a daemon thread
+ - force exit for whole program if thread
+ function coughs up an exception
+ """
+
+ def __init__(self, *args, **kwargs):
+ tf = kwargs.get('target')
+ if tf:
+ def twrap(*aargs):
+ excont = FreeObject(exval=0)
+ try:
+ tf(*aargs)
+ except:
+ try:
+ log_raise_exception(excont)
+ finally:
+ finalize(exval=excont.exval)
+ kwargs['target'] = twrap
+ baseThread.__init__(self, *args, **kwargs)
+ self.setDaemon(True)
+
+
+class GsyncdError(Exception):
+ pass
+
+
+class _MetaXattr(object):
+
+ """singleton class, a lazy wrapper around the
+ libcxattr module
+
+ libcxattr (a heavy import due to ctypes) is
+ loaded only when when the single
+ instance is tried to be used.
+
+ This reduces runtime for those invocations
+ which do not need filesystem manipulation
+ (eg. for config, url parsing)
+ """
+
+ def __getattr__(self, meth):
+ from libcxattr import Xattr as LXattr
+ xmeth = [m for m in dir(LXattr) if m[0] != '_']
+ if meth not in xmeth:
+ return
+ for m in xmeth:
+ setattr(self, m, getattr(LXattr, m))
+ return getattr(self, meth)
+
+
+Xattr = _MetaXattr()
+
+
+def getusername(uid=None):
+ if uid is None:
+ uid = os.geteuid()
+ return pwd.getpwuid(uid).pw_name
+
+
+def privileged():
+ return os.geteuid() == 0
+
+
+def boolify(s):
+ """
+ Generic string to boolean converter
+
+ return
+ - Quick return if string 's' is of type bool
+ - True if it's in true_list
+ - False if it's in false_list
+ - Warn if it's not present in either and return False
+ """
+ true_list = ['true', 'yes', '1', 'on']
+ false_list = ['false', 'no', '0', 'off']
+
+ if isinstance(s, bool):
+ return s
+
+ rv = False
+ lstr = s.lower()
+ if lstr in true_list:
+ rv = True
+ elif lstr not in false_list:
+ logging.warn(lf("Unknown string in \"string to boolean\" conversion, "
+ "defaulting to False",
+ str=s))
+
+ return rv
+
+
+def eintr_wrap(func, exc, *args):
+ """
+ wrapper around syscalls resilient to interrupt caused
+ by signals
+ """
+ while True:
+ try:
+ return func(*args)
+ except exc:
+ ex = sys.exc_info()[1]
+ if not ex.args[0] == EINTR:
+ raise
+
+
+def select(*args):
+ return eintr_wrap(oselect.select, oselect.error, *args)
+
+
+def waitpid(*args):
+ return eintr_wrap(owaitpid, OSError, *args)
+
+
+def term_handler_default_hook(signum, frame):
+ finalize(signum, frame, exval=1)
+
+
+def set_term_handler(hook=term_handler_default_hook):
+ signal(SIGTERM, hook)
+
+
+def get_node_uuid():
+ global NodeID
+ if NodeID is not None:
+ return NodeID
+
+ NodeID = ""
+ with open(UUID_FILE) as f:
+ for line in f:
+ if line.startswith("UUID="):
+ NodeID = line.strip().split("=")[-1]
+ break
+
+ if NodeID == "":
+ raise GsyncdError("Failed to get Host UUID from %s" % UUID_FILE)
+ return NodeID
+
+
+def is_host_local(host_id):
+ return host_id == get_node_uuid()
+
+
+def funcode(f):
+ fc = getattr(f, 'func_code', None)
+ if not fc:
+ # python 3
+ fc = f.__code__
+ return fc
+
+
+def memoize(f):
+ fc = funcode(f)
+ fn = fc.co_name
+
+ def ff(self, *a, **kw):
+ rv = getattr(self, '_' + fn, None)
+ if rv is None:
+ rv = f(self, *a, **kw)
+ setattr(self, '_' + fn, rv)
+ return rv
+ return ff
+
+
+def umask():
+ return os.umask(0)
+
+
+def entry2pb(e):
+ return e.rsplit('/', 1)
+
+
+def gauxpfx():
+ return _CL_AUX_GFID_PFX
+
+
+def sha256hex(s):
+ return sha256(s).hexdigest()
+
+
+def selfkill(sig=SIGTERM):
+ os.kill(os.getpid(), sig)
+
+
+def errno_wrap(call, arg=[], errnos=[], retry_errnos=[]):
+ """ wrapper around calls resilient to errnos.
+ """
+ nr_tries = 0
+ while True:
+ try:
+ return call(*arg)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if ex.errno in errnos:
+ return ex.errno
+ if ex.errno not in retry_errnos:
+ raise
+ nr_tries += 1
+ if nr_tries == GF_OP_RETRIES:
+ # probably a screwed state, cannot do much...
+ logging.warn(lf('reached maximum retries',
+ args=repr(arg),
+ error=ex))
+ raise
+ time.sleep(0.250) # retry the call
+
+
+def lstat(e):
+ return errno_wrap(os.lstat, [e], [ENOENT], [ESTALE, EBUSY])
+
+def get_gfid_from_mnt(gfidpath):
+ return errno_wrap(Xattr.lgetxattr,
+ [gfidpath, 'glusterfs.gfid.string',
+ GX_GFID_CANONICAL_LEN], [ENOENT], [ESTALE])
+
+
+def matching_disk_gfid(gfid, entry):
+ disk_gfid = get_gfid_from_mnt(entry)
+ if isinstance(disk_gfid, int):
+ return False
+
+ if not gfid == disk_gfid:
+ return False
+
+ return True
+
+
+class NoStimeAvailable(Exception):
+ pass
+
+
+class PartialHistoryAvailable(Exception):
+ pass
+
+
+class ChangelogHistoryNotAvailable(Exception):
+ pass
+
+
+class ChangelogException(OSError):
+ pass
+
+
+def gf_event(event_type, **kwargs):
+ if EVENTS_ENABLED:
+ from gfevents.gf_event import gf_event as gfevent
+ gfevent(event_type, **kwargs)
+
+
+class GlusterLogLevel(object):
+ NONE = 0
+ EMERG = 1
+ ALERT = 2
+ CRITICAL = 3
+ ERROR = 4
+ WARNING = 5
+ NOTICE = 6
+ INFO = 7
+ DEBUG = 8
+ TRACE = 9
+
+
+def get_changelog_log_level(lvl):
+ return getattr(GlusterLogLevel, lvl, GlusterLogLevel.INFO)
+
+
+def get_master_and_slave_data_from_args(args):
+ master_name = None
+ slave_data = None
+ for arg in args:
+ if arg.startswith(":"):
+ master_name = arg.replace(":", "")
+ if "::" in arg:
+ slave_data = arg.replace("ssh://", "")
+
+ return (master_name, slave_data)
+
+def unshare_propagation_supported():
+ global unshare_mnt_propagation
+ if unshare_mnt_propagation is not None:
+ return unshare_mnt_propagation
+
+ unshare_mnt_propagation = False
+ p = subprocess.Popen(["unshare", "--help"],
+ stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ universal_newlines=True)
+ out, err = p.communicate()
+ if p.returncode == 0:
+ if "propagation" in out:
+ unshare_mnt_propagation = True
+
+ return unshare_mnt_propagation
+
+
+def get_rsync_version(rsync_cmd):
+ global rsync_version
+ if rsync_version is not None:
+ return rsync_version
+
+ rsync_version = "0"
+ p = subprocess.Popen([rsync_cmd, "--version"],
+ stderr=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ universal_newlines=True)
+ out, err = p.communicate()
+ if p.returncode == 0:
+ rsync_version = out.split(" ", 4)[3]
+
+ return rsync_version
+
+
+def get_slv_dir_path(slv_host, slv_volume, gfid):
+ global slv_bricks
+
+ dir_path = ENOENT
+ pfx = gauxpfx()
+
+ if not slv_bricks:
+ slv_info = Volinfo(slv_volume, slv_host, master=False)
+ slv_bricks = slv_info.bricks
+ # Result of readlink would be of format as below.
+ # readlink = "../../pgfid[0:2]/pgfid[2:4]/pgfid/basename"
+ for brick in slv_bricks:
+ dir_path = errno_wrap(os.path.join,
+ [brick['dir'],
+ ".glusterfs", gfid[0:2],
+ gfid[2:4],
+ gfid], [ENOENT], [ESTALE])
+ if dir_path != ENOENT:
+ try:
+ realpath = errno_wrap(os.readlink, [dir_path],
+ [ENOENT], [ESTALE])
+ if not isinstance(realpath, int):
+ realpath_parts = realpath.split('/')
+ pargfid = realpath_parts[-2]
+ basename = realpath_parts[-1]
+ dir_entry = os.path.join(pfx, pargfid, basename)
+ return dir_entry
+ except OSError:
+ # .gfid/GFID
+ gfidpath = unescape_space_newline(os.path.join(pfx, gfid))
+ realpath = errno_wrap(Xattr.lgetxattr_buf,
+ [gfidpath, 'glusterfs.gfid2path'], [ENOENT], [ESTALE])
+ if not isinstance(realpath, int):
+ basename = os.path.basename(realpath).rstrip('\x00')
+ dirpath = os.path.dirname(realpath)
+ if dirpath == "/":
+ pargfid = ROOT_GFID
+ else:
+ dirpath = dirpath.strip("/")
+ pargfid = get_gfid_from_mnt(dirpath)
+ if isinstance(pargfid, int):
+ return None
+ dir_entry = os.path.join(pfx, pargfid, basename)
+ return dir_entry
+
+ return None
+
+
+def lf(event, **kwargs):
+ """
+ Log Format helper function, log messages can be
+ easily modified to structured log format.
+ lf("Config Change", sync_jobs=4, brick=/bricks/b1) will be
+ converted as "Config Change [{brick=/bricks/b1}, {sync_jobs=4}]"
+ """
+ msgparts = []
+ for k, v in kwargs.items():
+ msgparts.append("{%s=%s}" % (k, v))
+ return "%s [%s]" % (event, ", ".join(msgparts))
+
+
+class Popen(subprocess.Popen):
+
+ """customized subclass of subprocess.Popen with a ring
+ buffer for children error output"""
+
+ @classmethod
+ def init_errhandler(cls):
+ """start the thread which handles children's error output"""
+ cls.errstore = {}
+
+ def tailer():
+ while True:
+ errstore = cls.errstore.copy()
+ try:
+ poe, _, _ = select(
+ [po.stderr for po in errstore], [], [], 1)
+ except (ValueError, SelectError):
+ # stderr is already closed wait for some time before
+ # checking next error
+ time.sleep(0.5)
+ continue
+ for po in errstore:
+ if po.stderr not in poe:
+ continue
+ po.lock.acquire()
+ try:
+ if po.on_death_row:
+ continue
+ la = errstore[po]
+ try:
+ fd = po.stderr.fileno()
+ except ValueError: # file is already closed
+ time.sleep(0.5)
+ continue
+
+ try:
+ l = os.read(fd, 1024)
+ except OSError:
+ time.sleep(0.5)
+ continue
+
+ if not l:
+ continue
+ tots = len(l)
+ for lx in la:
+ tots += len(lx)
+ while tots > 1 << 20 and la:
+ tots -= len(la.pop(0))
+ la.append(l)
+ finally:
+ po.lock.release()
+ t = Thread(target=tailer)
+ t.start()
+ cls.errhandler = t
+
+ @classmethod
+ def fork(cls):
+ """fork wrapper that restarts errhandler thread in child"""
+ pid = os.fork()
+ if not pid:
+ cls.init_errhandler()
+ return pid
+
+ def __init__(self, args, *a, **kw):
+ """customizations for subprocess.Popen instantiation
+
+ - 'close_fds' is taken to be the default
+ - if child's stderr is chosen to be managed,
+ register it with the error handler thread
+ """
+ self.args = args
+ if 'close_fds' not in kw:
+ kw['close_fds'] = True
+ self.lock = threading.Lock()
+ self.on_death_row = False
+ self.elines = []
+ try:
+ sup(self, args, *a, **kw)
+ except:
+ ex = sys.exc_info()[1]
+ if not isinstance(ex, OSError):
+ raise
+ raise GsyncdError("""execution of "%s" failed with %s (%s)""" %
+ (args[0], errno.errorcode[ex.errno],
+ os.strerror(ex.errno)))
+ if kw.get('stderr') == subprocess.PIPE:
+ assert(getattr(self, 'errhandler', None))
+ self.errstore[self] = []
+
+ def errlog(self):
+ """make a log about child's failure event"""
+ logging.error(lf("command returned error",
+ cmd=" ".join(self.args),
+ error=self.returncode))
+ lp = ''
+
+ def logerr(l):
+ logging.error(self.args[0] + "> " + l)
+ for l in self.elines:
+ ls = l.split('\n')
+ ls[0] = lp + ls[0]
+ lp = ls.pop()
+ for ll in ls:
+ logerr(ll)
+ if lp:
+ logerr(lp)
+
+ def errfail(self):
+ """fail nicely if child did not terminate with success"""
+ self.errlog()
+ finalize(exval=1)
+
+ def terminate_geterr(self, fail_on_err=True):
+ """kill child, finalize stderr harvesting (unregister
+ from errhandler, set up .elines), fail on error if
+ asked for
+ """
+ self.lock.acquire()
+ try:
+ self.on_death_row = True
+ finally:
+ self.lock.release()
+ elines = self.errstore.pop(self)
+ if self.poll() is None:
+ self.terminate()
+ if self.poll() is None:
+ time.sleep(0.1)
+ self.kill()
+ self.wait()
+ while True:
+ if not select([self.stderr], [], [], 0.1)[0]:
+ break
+ b = os.read(self.stderr.fileno(), 1024)
+ if b:
+ elines.append(b.decode())
+ else:
+ break
+ self.stderr.close()
+ self.elines = elines
+ if fail_on_err and self.returncode != 0:
+ self.errfail()
+
+
+def host_brick_split(value):
+ """
+ IPv6 compatible way to split and get the host
+ and brick information. Example inputs:
+ node1.example.com:/exports/bricks/brick1/brick
+ fe80::af0f:df82:844f:ef66%utun0:/exports/bricks/brick1/brick
+ """
+ parts = value.split(":")
+ brick = parts[-1]
+ hostparts = parts[0:-1]
+ return (":".join(hostparts), brick)
+
+
+class Volinfo(object):
+
+ def __init__(self, vol, host='localhost', prelude=[], master=True):
+ if master:
+ gluster_cmd_dir = gconf.get("gluster-command-dir")
+ else:
+ gluster_cmd_dir = gconf.get("slave-gluster-command-dir")
+
+ gluster_cmd = os.path.join(gluster_cmd_dir, 'gluster')
+ po = Popen(prelude + [gluster_cmd, '--xml', '--remote-host=' + host,
+ 'volume', 'info', vol],
+ stdout=PIPE, stderr=PIPE, universal_newlines=True)
+ vix = po.stdout.read()
+ po.wait()
+ po.terminate_geterr()
+ vi = XET.fromstring(vix)
+ if vi.find('opRet').text != '0':
+ if prelude:
+ via = '(via %s) ' % prelude.join(' ')
+ else:
+ via = ' '
+ raise GsyncdError('getting volume info of %s%s '
+ 'failed with errorcode %s' %
+ (vol, via, vi.find('opErrno').text))
+ self.tree = vi
+ self.volume = vol
+ self.host = host
+
+ def get(self, elem):
+ return self.tree.findall('.//' + elem)
+
+ def is_tier(self):
+ return (self.get('typeStr')[0].text == 'Tier')
+
+ def is_hot(self, brickpath):
+ logging.debug('brickpath: ' + repr(brickpath))
+ return brickpath in self.hot_bricks
+
+ @property
+ @memoize
+ def bricks(self):
+ def bparse(b):
+ host, dirp = host_brick_split(b.find("name").text)
+ return {'host': host, 'dir': dirp, 'uuid': b.find("hostUuid").text}
+ return [bparse(b) for b in self.get('brick')]
+
+ @property
+ @memoize
+ def uuid(self):
+ ids = self.get('id')
+ if len(ids) != 1:
+ raise GsyncdError("volume info of %s obtained from %s: "
+ "ambiguous uuid" % (self.volume, self.host))
+ return ids[0].text
+
+ def replica_count(self, tier, hot):
+ if (tier and hot):
+ return int(self.get('hotBricks/hotreplicaCount')[0].text)
+ elif (tier and not hot):
+ return int(self.get('coldBricks/coldreplicaCount')[0].text)
+ else:
+ return int(self.get('replicaCount')[0].text)
+
+ def disperse_count(self, tier, hot):
+ if (tier and hot):
+ # Tiering doesn't support disperse volume as hot brick,
+ # hence no xml output, so returning 0. In case, if it's
+ # supported later, we should change here.
+ return 0
+ elif (tier and not hot):
+ return int(self.get('coldBricks/colddisperseCount')[0].text)
+ else:
+ return int(self.get('disperseCount')[0].text)
+
+ def distribution_count(self, tier, hot):
+ if (tier and hot):
+ return int(self.get('hotBricks/hotdistCount')[0].text)
+ elif (tier and not hot):
+ return int(self.get('coldBricks/colddistCount')[0].text)
+ else:
+ return int(self.get('distCount')[0].text)
+
+ @property
+ @memoize
+ def hot_bricks(self):
+ return [b.text for b in self.get('hotBricks/brick')]
+
+ def get_hot_bricks_count(self, tier):
+ if (tier):
+ return int(self.get('hotBricks/hotbrickCount')[0].text)
+ else:
+ return 0
+
+
+class VolinfoFromGconf(object):
+ # Glusterd will generate following config items before Geo-rep start
+ # So that Geo-rep need not run gluster commands from inside
+ # Volinfo object API/interface kept as is so that caller need not
+ # change anything except calling this instead of Volinfo()
+ #
+ # master-bricks=
+ # master-bricks=NODEID:HOSTNAME:PATH,..
+ # slave-bricks=NODEID:HOSTNAME,..
+ # master-volume-id=
+ # slave-volume-id=
+ # master-replica-count=
+ # master-disperse_count=
+ def __init__(self, vol, host='localhost', master=True):
+ self.volume = vol
+ self.host = host
+ self.master = master
+
+ def is_tier(self):
+ return False
+
+ def is_hot(self, brickpath):
+ return False
+
+ def is_uuid(self, value):
+ try:
+ uuid.UUID(value)
+ return True
+ except ValueError:
+ return False
+
+ def possible_path(self, value):
+ return "/" in value
+
+ @property
+ @memoize
+ def bricks(self):
+ pfx = "master-" if self.master else "slave-"
+ bricks_data = gconf.get(pfx + "bricks")
+ if bricks_data is None:
+ return []
+
+ bricks_data = bricks_data.split(",")
+ bricks_data = [b.strip() for b in bricks_data]
+ out = []
+ for b in bricks_data:
+ parts = b.split(":")
+ b_uuid = None
+ if self.is_uuid(parts[0]):
+ b_uuid = parts[0]
+ # Set all parts except first
+ parts = parts[1:]
+
+ if self.possible_path(parts[-1]):
+ bpath = parts[-1]
+ # Set all parts except last
+ parts = parts[0:-1]
+
+ out.append({
+ "host": ":".join(parts), # if remaining parts are IPv6 name
+ "dir": bpath,
+ "uuid": b_uuid
+ })
+
+ return out
+
+ @property
+ @memoize
+ def uuid(self):
+ if self.master:
+ return gconf.get("master-volume-id")
+ else:
+ return gconf.get("slave-volume-id")
+
+ def replica_count(self, tier, hot):
+ return gconf.get("master-replica-count")
+
+ def disperse_count(self, tier, hot):
+ return gconf.get("master-disperse-count")
+
+ def distribution_count(self, tier, hot):
+ return gconf.get("master-distribution-count")
+
+ @property
+ @memoize
+ def hot_bricks(self):
+ return []
+
+ def get_hot_bricks_count(self, tier):
+ return 0
+
+
+def can_ssh(host, port=22):
+ s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ try:
+ s.connect((host, port))
+ flag = True
+ except socket.error:
+ flag = False
+
+ s.close()
+ return flag
+
+
+def get_up_nodes(hosts, port):
+ # List of hosts with Hostname/IP and UUID
+ up_nodes = []
+ for h in hosts:
+ if can_ssh(h[0], port):
+ up_nodes.append(h)
+
+ return up_nodes
diff --git a/geo-replication/test-requirements.txt b/geo-replication/test-requirements.txt
new file mode 100644
index 00000000000..d6165640d3f
--- /dev/null
+++ b/geo-replication/test-requirements.txt
@@ -0,0 +1,7 @@
+# Hacking already pins down pep8, pyflakes and flake8
+flake8
+coverage
+nose
+nosexcover
+nosehtmloutput
+mock>=0.8.0
diff --git a/geo-replication/tests/__init__.py b/geo-replication/tests/__init__.py
new file mode 100644
index 00000000000..b4648b69645
--- /dev/null
+++ b/geo-replication/tests/__init__.py
@@ -0,0 +1,9 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
diff --git a/geo-replication/tests/unit/__init__.py b/geo-replication/tests/unit/__init__.py
new file mode 100644
index 00000000000..b4648b69645
--- /dev/null
+++ b/geo-replication/tests/unit/__init__.py
@@ -0,0 +1,9 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
diff --git a/geo-replication/tests/unit/test_gsyncdstatus.py b/geo-replication/tests/unit/test_gsyncdstatus.py
new file mode 100755
index 00000000000..9c1aa2ad4ad
--- /dev/null
+++ b/geo-replication/tests/unit/test_gsyncdstatus.py
@@ -0,0 +1,193 @@
+#!/usr/bin/python3
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import unittest
+import os
+import urllib
+
+from syncdaemon.gstatus import (GeorepStatus, set_monitor_status,
+ get_default_values,
+ MONITOR_STATUS, DEFAULT_STATUS,
+ STATUS_VALUES, CRAWL_STATUS_VALUES,
+ human_time, human_time_utc)
+
+
+class GeorepStatusTestCase(unittest.TestCase):
+ @classmethod
+ def setUpClass(cls):
+ cls.work_dir = os.path.dirname(os.path.abspath(__file__))
+ cls.monitor_status_file = os.path.join(cls.work_dir, "monitor.status")
+ cls.brick = "/exports/bricks/b1"
+ cls.status = GeorepStatus(cls.monitor_status_file, cls.brick)
+ cls.statusfile = os.path.join(cls.work_dir,
+ "brick_%s.status"
+ % urllib.quote_plus(cls.brick))
+
+ @classmethod
+ def tearDownClass(cls):
+ os.remove(cls.statusfile)
+ os.remove(cls.monitor_status_file)
+
+ def _filter_dict(self, inp, keys):
+ op = {}
+ for k in keys:
+ op[k] = inp.get(k, None)
+ return op
+
+ def test_monitor_status_file_created(self):
+ self.assertTrue(os.path.exists(self.monitor_status_file))
+
+ def test_status_file_created(self):
+ self.assertTrue(os.path.exists(self.statusfile))
+
+ def test_set_monitor_status(self):
+ for st in MONITOR_STATUS:
+ set_monitor_status(self.monitor_status_file, st)
+ self.assertTrue(self.status.get_monitor_status(), st)
+
+ def test_default_values_test(self):
+ self.assertTrue(get_default_values(), {
+ "slave_node": DEFAULT_STATUS,
+ "worker_status": DEFAULT_STATUS,
+ "last_synced": 0,
+ "last_synced_utc": 0,
+ "crawl_status": DEFAULT_STATUS,
+ "entry": 0,
+ "data": 0,
+ "metadata": 0,
+ "failures": 0,
+ "checkpoint_completed": False,
+ "checkpoint_time": 0,
+ "checkpoint_time_utc": 0,
+ "checkpoint_completion_time": 0,
+ "checkpoint_completion_time_utc": 0
+ })
+
+ def test_human_time(self):
+ self.assertTrue(human_time(1429174398), "2015-04-16 14:23:18")
+
+ def test_human_time_utc(self):
+ self.assertTrue(human_time_utc(1429174398), "2015-04-16 08:53:18")
+
+ def test_invalid_human_time(self):
+ self.assertTrue(human_time(142917439), DEFAULT_STATUS)
+ self.assertTrue(human_time("abcdef"), DEFAULT_STATUS)
+
+ def test_invalid_human_time_utc(self):
+ self.assertTrue(human_time_utc(142917439), DEFAULT_STATUS)
+ self.assertTrue(human_time_utc("abcdef"), DEFAULT_STATUS)
+
+ def test_worker_status(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ for st in STATUS_VALUES:
+ self.status.set_worker_status(st)
+ self.assertTrue(self.status.get_status()["worker_status"], st)
+
+ def test_crawl_status(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ self.status.set_active()
+ for st in CRAWL_STATUS_VALUES:
+ self.status.set_worker_crawl_status(st)
+ self.assertTrue(self.status.get_status()["crawl_status"], st)
+
+ def test_slave_node(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ self.status.set_active()
+ self.status.set_slave_node("fvm2")
+ self.assertTrue(self.status.get_status()["slave_node"], "fvm2")
+
+ self.status.set_worker_status("Passive")
+ self.status.set_slave_node("fvm2")
+ self.assertTrue(self.status.get_status()["slave_node"], "fvm2")
+
+ def test_active_worker_status(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ self.status.set_active()
+ self.assertTrue(self.status.get_status()["worker_status"], "Active")
+
+ def test_passive_worker_status(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ self.status.set_passive()
+ self.assertTrue(self.status.get_status()["worker_status"], "Passive")
+
+ def test_set_field(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ self.status.set_active()
+ self.status.set_field("entry", 42)
+ self.assertTrue(self.status.get_status()["entry"], 42)
+
+ def test_inc_value(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ self.status.set_active()
+ self.status.set_field("entry", 0)
+ self.status.inc_value("entry", 2)
+ self.assertTrue(self.status.get_status()["entry"], 2)
+
+ self.status.set_field("data", 0)
+ self.status.inc_value("data", 2)
+ self.assertTrue(self.status.get_status()["data"], 2)
+
+ self.status.set_field("meta", 0)
+ self.status.inc_value("meta", 2)
+ self.assertTrue(self.status.get_status()["meta"], 2)
+
+ self.status.set_field("failures", 0)
+ self.status.inc_value("failures", 2)
+ self.assertTrue(self.status.get_status()["failures"], 2)
+
+ def test_dec_value(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ self.status.set_active()
+
+ self.status.set_field("entry", 4)
+ self.status.inc_value("entry", 2)
+ self.assertTrue(self.status.get_status()["entry"], 2)
+
+ self.status.set_field("data", 4)
+ self.status.inc_value("data", 2)
+ self.assertTrue(self.status.get_status()["data"], 2)
+
+ self.status.set_field("meta", 4)
+ self.status.inc_value("meta", 2)
+ self.assertTrue(self.status.get_status()["meta"], 2)
+
+ self.status.set_field("failures", 4)
+ self.status.inc_value("failures", 2)
+ self.assertTrue(self.status.get_status()["failures"], 2)
+
+ def test_worker_status_when_monitor_status_created(self):
+ set_monitor_status(self.monitor_status_file, "Created")
+ for st in STATUS_VALUES:
+ self.status.set_worker_status(st)
+ self.assertTrue(self.status.get_status()["worker_status"],
+ "Created")
+
+ def test_worker_status_when_monitor_status_paused(self):
+ set_monitor_status(self.monitor_status_file, "Paused")
+ for st in STATUS_VALUES:
+ self.status.set_worker_status(st)
+ self.assertTrue(self.status.get_status()["worker_status"],
+ "Paused")
+
+ def test_worker_status_when_monitor_status_stopped(self):
+ set_monitor_status(self.monitor_status_file, "Stopped")
+ for st in STATUS_VALUES:
+ self.status.set_worker_status(st)
+ self.assertTrue(self.status.get_status()["worker_status"],
+ "Stopped")
+
+ def test_status_when_worker_status_active(self):
+ set_monitor_status(self.monitor_status_file, "Started")
+ self.status.set_active()
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/geo-replication/tests/unit/test_syncdutils.py b/geo-replication/tests/unit/test_syncdutils.py
new file mode 100644
index 00000000000..ff537ab2660
--- /dev/null
+++ b/geo-replication/tests/unit/test_syncdutils.py
@@ -0,0 +1,29 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import unittest
+
+from syncdaemon import syncdutils
+
+
+class SyncdutilsTestCase(unittest.TestCase):
+ def setUp(self):
+ pass
+
+ def tearDown(self):
+ pass
+
+ def test_escape(self):
+ self.assertEqual(syncdutils.escape("http://gluster.org"),
+ "http%3A%2F%2Fgluster.org")
+
+ def test_unescape(self):
+ self.assertEqual(syncdutils.unescape("http%3A%2F%2Fgluster.org"),
+ "http://gluster.org")
diff --git a/geo-replication/tox.ini b/geo-replication/tox.ini
new file mode 100644
index 00000000000..57ff086b947
--- /dev/null
+++ b/geo-replication/tox.ini
@@ -0,0 +1,32 @@
+[tox]
+envlist = py26,py27,pep8
+
+[testenv]
+whitelist_externals=bash
+setenv = VIRTUAL_ENV={envdir}
+deps =
+ --download-cache={homedir}/.pipcache
+ -r{toxinidir}/test-requirements.txt
+changedir = {toxinidir}/tests/unit
+commands = nosetests -v --exe --with-xunit --with-coverage --cover-package syncdaemon --cover-erase --cover-xml --cover-html --cover-branches --with-html-output {posargs}
+
+[tox:jenkins]
+downloadcache = ~/cache/pip
+
+[testenv:pep8]
+changedir = {toxinidir}
+commands =
+ flake8
+ flake8 syncdaemon tests
+
+[testenv:cover]
+setenv = NOSE_WITH_COVERAGE=1
+
+[testenv:venv]
+commands = {posargs}
+
+[flake8]
+ignore = H
+builtins = _
+exclude = .venv,.tox,dist,doc,tests,*egg
+show-source = True \ No newline at end of file
diff --git a/geo-replication/unittests.sh b/geo-replication/unittests.sh
new file mode 100644
index 00000000000..d5dbd00bd4c
--- /dev/null
+++ b/geo-replication/unittests.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+cd $(dirname $0)/tests/unit
+nosetests -v --exe --with-coverage --cover-package \
+ syncdaemon --cover-erase --cover-html --cover-branches $@
+
+saved_status=$?
+rm -f .coverage
+exit $saved_status
diff --git a/glusterfs-api.pc.in b/glusterfs-api.pc.in
new file mode 100644
index 00000000000..4a2edb7bf07
--- /dev/null
+++ b/glusterfs-api.pc.in
@@ -0,0 +1,12 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: glusterfs-api
+Description: GlusterFS API
+/* This is the API version, NOT package version */
+Version: @GFAPI_VERSION@
+Requires: @PKGCONFIG_UUID@
+Libs: -L${libdir} @GFAPI_LIBS@ -lgfapi -lglusterfs -lgfrpc -lgfxdr
+Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 -DUSE_POSIX_ACLS=@USE_POSIX_ACLS@
diff --git a/glusterfs-hadoop/0.20.2/conf/core-site.xml b/glusterfs-hadoop/0.20.2/conf/core-site.xml
deleted file mode 100644
index d7f75fca733..00000000000
--- a/glusterfs-hadoop/0.20.2/conf/core-site.xml
+++ /dev/null
@@ -1,38 +0,0 @@
-<?xml version="1.0"?>
-<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
-
-<!-- Put site-specific property overrides in this file. -->
-
-<configuration>
-
- <property>
- <name>fs.glusterfs.impl</name>
- <value>org.apache.hadoop.fs.glusterfs.GlusterFileSystem</value>
- </property>
-
- <property>
- <name>fs.default.name</name>
- <value>glusterfs://192.168.1.36:9000</value>
- </property>
-
- <property>
- <name>fs.glusterfs.volname</name>
- <value>volume-dist-rep</value>
- </property>
-
- <property>
- <name>fs.glusterfs.mount</name>
- <value>/mnt/glusterfs</value>
- </property>
-
- <property>
- <name>fs.glusterfs.server</name>
- <value>192.168.1.36</value>
- </property>
-
- <property>
- <name>quick.slave.io</name>
- <value>Off</value>
- </property>
-
-</configuration>
diff --git a/glusterfs-hadoop/0.20.2/pom.xml b/glusterfs-hadoop/0.20.2/pom.xml
deleted file mode 100644
index fe661d40fdc..00000000000
--- a/glusterfs-hadoop/0.20.2/pom.xml
+++ /dev/null
@@ -1,36 +0,0 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
- <modelVersion>4.0.0</modelVersion>
- <groupId>org.apache.hadoop.fs.glusterfs</groupId>
- <artifactId>glusterfs</artifactId>
- <packaging>jar</packaging>
- <version>0.20.2-0.1</version>
- <name>glusterfs</name>
- <url>http://maven.apache.org</url>
- <dependencies>
- <dependency>
- <groupId>junit</groupId>
- <artifactId>junit</artifactId>
- <version>3.8.1</version>
- <scope>test</scope>
- </dependency>
- <dependency>
- <groupId>org.apache.hadoop</groupId>
- <artifactId>hadoop-core</artifactId>
- <version>0.20.2</version>
- </dependency>
- </dependencies>
- <build>
- <plugins>
- <plugin>
- <groupId>org.apache.maven.plugins</groupId>
- <artifactId>maven-compiler-plugin</artifactId>
- <version>2.3.2</version>
- <configuration>
- <source>1.5</source>
- <target>1.5</target>
- </configuration>
- </plugin>
- </plugins>
- </build>
-</project>
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickClass.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickClass.java
deleted file mode 100644
index e633b8aae80..00000000000
--- a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickClass.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/**
- *
- * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- * This file is part of GlusterFS.
- *
- * Licensed under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- * implied. See the License for the specific language governing
- * permissions and limitations under the License.
- *
- */
-
-package org.apache.hadoop.fs.glusterfs;
-
-import java.io.*;
-
-public class GlusterFSBrickClass {
- String host;
- String exportedFile;
- long start;
- long end;
- boolean isChunked;
- int stripeSize; // Stripe size in bytes
- int nrStripes; // number of stripes
- int switchCount; // for SR, DSR - number of replicas of each stripe
- // -1 for others
-
- public GlusterFSBrickClass (String brick, long start, long len, boolean flag,
- int stripeSize, int nrStripes, int switchCount)
- throws IOException {
- this.host = brick2host(brick);
- this.exportedFile = brick2file(brick);
- this.start = start;
- this.end = start + len;
- this.isChunked = flag;
- this.stripeSize = stripeSize;
- this.nrStripes = nrStripes;
- this.switchCount = switchCount;
- }
-
- public boolean isChunked () {
- return isChunked;
- }
-
- public String brickIsLocal(String hostname) {
- String path = null;
- File f = null;
- if (host.equals(hostname))
- path = exportedFile;
-
- return path;
- }
-
- public int[] getBrickNumberInTree(long start, int len) {
- long end = len;
- int startNodeInTree = ((int) (start / stripeSize)) % nrStripes;
- int endNodeInTree = ((int) ((start + len) / stripeSize)) % nrStripes;
-
- if (startNodeInTree != endNodeInTree) {
- end = (start - (start % stripeSize)) + stripeSize;
- end -= start;
- }
-
- return new int[] {startNodeInTree, endNodeInTree, (int) end};
- }
-
- public boolean brickHasFilePart(int nodeInTree, int nodeLoc) {
- if (switchCount == -1)
- return (nodeInTree == nodeLoc);
-
- nodeInTree *= switchCount;
- for (int i = nodeInTree; i < (nodeInTree + switchCount); i++) {
- if (i == nodeLoc)
- return true;
- }
-
- return false;
- }
-
- public String brick2host (String brick)
- throws IOException {
- String[] hf = null;
-
- hf = brick.split(":");
- if (hf.length != 2)
- throw new IOException("Error getting hostname from brick");
-
- return hf[0];
- }
-
- public String brick2file (String brick)
- throws IOException {
- String[] hf = null;
-
- hf = brick.split(":");
- if (hf.length != 2)
- throw new IOException("Error getting hostname from brick");
-
- return hf[1];
- }
-
-}
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickRepl.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickRepl.java
deleted file mode 100644
index 11454e6368a..00000000000
--- a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickRepl.java
+++ /dev/null
@@ -1,52 +0,0 @@
-/**
- *
- * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- * This file is part of GlusterFS.
- *
- * Licensed under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- * implied. See the License for the specific language governing
- * permissions and limitations under the License.
- *
- */
-
-package org.apache.hadoop.fs.glusterfs;
-
-import java.io.*;
-
-public class GlusterFSBrickRepl {
- private String[] replHost;
- private long start;
- private long len;
- private int cnt;
-
- GlusterFSBrickRepl(int replCount, long start, long len) {
- this.replHost = new String[replCount];
- this.start = start;
- this.len = len;
- this.cnt = 0;
- }
-
- public void addHost (String host) {
- this.replHost[cnt++] = host;
- }
-
- public String[] getReplHosts () {
- return this.replHost;
- }
-
- public long getStartLen () {
- return this.start;
- }
-
- public long getOffLen () {
- return this.len;
- }
-}
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSXattr.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSXattr.java
deleted file mode 100644
index 455dda97e9c..00000000000
--- a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSXattr.java
+++ /dev/null
@@ -1,472 +0,0 @@
-/**
- *
- * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- * This file is part of GlusterFS.
- *
- * Licensed under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- * implied. See the License for the specific language governing
- * permissions and limitations under the License.
- *
- */
-
-package org.apache.hadoop.fs.glusterfs;
-
-import java.net.*;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-import java.io.*;
-import java.util.HashMap;
-import java.util.TreeMap;
-import java.util.ArrayList;
-import java.util.Iterator;
-
-import org.apache.hadoop.fs.BlockLocation;
-
-public class GlusterFSXattr {
-
- public enum LAYOUT { D, S, R, DS, DR, SR, DSR }
- public enum CMD { GET_HINTS, GET_REPLICATION, GET_BLOCK_SIZE, CHECK_FOR_QUICK_IO }
-
- private static String hostname;
-
- public GlusterFSXattr() { }
-
- public static String brick2host (String brick)
- throws IOException {
- String[] hf = null;
-
- hf = brick.split(":");
- if (hf.length != 2) {
- System.out.println("brick not of format hostname:path");
- throw new IOException("Error getting hostname from brick");
- }
-
- return hf[0];
- }
-
- public static String brick2file (String brick)
- throws IOException {
- String[] hf = null;
-
- hf = brick.split(":");
- if (hf.length != 2) {
- System.out.println("brick not of format hostname:path");
- throw new IOException("Error getting hostname from brick");
- }
-
- return hf[1];
- }
-
- public static BlockLocation[] getPathInfo (String filename, long start, long len)
- throws IOException {
- HashMap<String, ArrayList<String>> vol = null;
- HashMap<String, Integer> meta = new HashMap<String, Integer>();
-
- vol = execGetFattr(filename, meta, CMD.GET_HINTS);
-
- return getHints(vol, meta, start, len, null);
- }
-
- public static long getBlockSize (String filename)
- throws IOException {
- HashMap<String, ArrayList<String>> vol = null;
- HashMap<String, Integer> meta = new HashMap<String, Integer>();
-
- vol = execGetFattr(filename, meta, CMD.GET_BLOCK_SIZE);
-
- if (!meta.containsKey("block-size"))
- return 0;
-
- return (long) meta.get("block-size");
-
- }
-
- public static short getReplication (String filename)
- throws IOException {
- HashMap<String, ArrayList<String>> vol = null;
- HashMap<String, Integer> meta = new HashMap<String, Integer>();
-
- vol = execGetFattr(filename, meta, CMD.GET_REPLICATION);
-
- return (short) getReplicationFromLayout(vol, meta);
-
- }
-
- public static TreeMap<Integer, GlusterFSBrickClass> quickIOPossible (String filename, long start,
- long len)
- throws IOException {
- String realpath = null;
- HashMap<String, ArrayList<String>> vol = null;
- HashMap<String, Integer> meta = new HashMap<String, Integer>();
- TreeMap<Integer, GlusterFSBrickClass> hnts = new TreeMap<Integer, GlusterFSBrickClass>();
-
- vol = execGetFattr(filename, meta, CMD.GET_HINTS);
- getHints(vol, meta, start, len, hnts);
-
- if (hnts.size() == 0)
- return null; // BOOM !!
-
- // DEBUG - dump hnts here
- return hnts;
- }
-
- public static HashMap<String, ArrayList<String>> execGetFattr (String filename,
- HashMap<String, Integer> meta,
- CMD cmd)
- throws IOException {
- Process p = null;
- BufferedReader brInput = null;
- String s = null;
- String cmdOut = null;
- String getfattrCmd = null;
- String xlator = null;
- String enclosingXl = null;
- String enclosingXlVol = null;
- String key = null;
- String layout = "";
- int rcount = 0;
- int scount = 0;
- int dcount = 0;
- int count = 0;
-
- HashMap<String, ArrayList<String>> vol = new HashMap<String, ArrayList<String>>();
-
- getfattrCmd = "getfattr -m . -n trusted.glusterfs.pathinfo " + filename;
-
- p = Runtime.getRuntime().exec(getfattrCmd);
- brInput = new BufferedReader(new InputStreamReader(p.getInputStream()));
-
- cmdOut = "";
- while ( (s = brInput.readLine()) != null )
- cmdOut += s;
-
- /**
- * TODO: Use a single regex for extracting posix paths as well
- * as xlator counts for layout matching.
- */
-
- Pattern pattern = Pattern.compile("<(.*?)[:\\(](.*?)>");
- Matcher matcher = pattern.matcher(cmdOut);
-
- Pattern p_px = Pattern.compile(".*?:(.*)");
- Matcher m_px;
- String gibberish_path;
-
- s = null;
- while (matcher.find()) {
- xlator = matcher.group(1);
- if (xlator.equalsIgnoreCase("posix")) {
- if (enclosingXl.equalsIgnoreCase("replicate"))
- count = rcount;
- else if (enclosingXl.equalsIgnoreCase("stripe"))
- count = scount;
- else if (enclosingXl.equalsIgnoreCase("distribute"))
- count = dcount;
- else
- throw new IOException("Unknown Translator: " + enclosingXl);
-
- key = enclosingXl + "-" + count;
-
- if (vol.get(key) == null)
- vol.put(key, new ArrayList<String>());
-
- gibberish_path = matcher.group(2);
-
- /* extract posix path from the gibberish string */
- m_px = p_px.matcher(gibberish_path);
- if (!m_px.find())
- throw new IOException("Cannot extract posix path");
-
- vol.get(key).add(m_px.group(1));
- continue;
- }
-
- enclosingXl = xlator;
- enclosingXlVol = matcher.group(2);
-
- if (xlator.equalsIgnoreCase("replicate"))
- if (rcount++ != 0)
- continue;
-
- if (xlator.equalsIgnoreCase("stripe")) {
- if (scount++ != 0)
- continue;
-
-
- Pattern ps = Pattern.compile("\\[(\\d+)\\]");
- Matcher ms = ps.matcher(enclosingXlVol);
-
- if (ms.find()) {
- if (((cmd == CMD.GET_BLOCK_SIZE) || (cmd == CMD.GET_HINTS))
- && (meta != null))
- meta.put("block-size", Integer.parseInt(ms.group(1)));
- } else
- throw new IOException("Cannot get stripe size");
- }
-
- if (xlator.equalsIgnoreCase("distribute"))
- if (dcount++ != 0)
- continue;
-
- layout += xlator.substring(0, 1);
- }
-
- if ((dcount == 0) && (scount == 0) && (rcount == 0))
- throw new IOException("Cannot get layout");
-
- if (meta != null) {
- meta.put("dcount", dcount);
- meta.put("scount", scount);
- meta.put("rcount", rcount);
- }
-
- vol.put("layout", new ArrayList<String>(1));
- vol.get("layout").add(layout);
-
- return vol;
- }
-
- static BlockLocation[] getHints (HashMap<String, ArrayList<String>> vol,
- HashMap<String, Integer> meta,
- long start, long len,
- TreeMap<Integer, GlusterFSBrickClass> hnts)
- throws IOException {
- String brick = null;
- String key = null;
- boolean done = false;
- int i = 0;
- int counter = 0;
- int stripeSize = 0;
- long stripeStart = 0;
- long stripeEnd = 0;
- int nrAllocs = 0;
- int allocCtr = 0;
- BlockLocation[] result = null;
- ArrayList<String> brickList = null;
- ArrayList<String> stripedBricks = null;
- Iterator<String> it = null;
-
- String[] blks = null;
- GlusterFSBrickRepl[] repl = null;
- int dcount, scount, rcount;
-
- LAYOUT l = LAYOUT.valueOf(vol.get("layout").get(0));
- dcount = meta.get("dcount");
- scount = meta.get("scount");
- rcount = meta.get("rcount");
-
- switch (l) {
- case D:
- key = "DISTRIBUTE-" + dcount;
- brick = vol.get(key).get(0);
-
- if (hnts == null) {
- result = new BlockLocation[1];
- result[0] = new BlockLocation(null, new String[] {brick2host(brick)}, start, len);
- } else
- hnts.put(0, new GlusterFSBrickClass(brick, start, len, false, -1, -1, -1));
- break;
-
- case R:
- case DR:
- /* just the name says it's striped - the volume isn't */
- stripedBricks = new ArrayList<String>();
-
- for (i = 1; i <= rcount; i++) {
- key = "REPLICATE-" + i;
- brickList = vol.get(key);
- it = brickList.iterator();
- while (it.hasNext()) {
- stripedBricks.add(it.next());
- }
- }
-
- nrAllocs = stripedBricks.size();
- if (hnts == null) {
- result = new BlockLocation[1];
- blks = new String[nrAllocs];
- }
-
- for (i = 0; i < nrAllocs; i++) {
- if (hnts == null)
- blks[i] = brick2host(stripedBricks.get(i));
- else
- hnts.put(i, new GlusterFSBrickClass(stripedBricks.get(i), start, len, false, -1, -1, -1));
- }
-
- if (hnts == null)
- result[0] = new BlockLocation(null, blks, start, len);
-
- break;
-
- case SR:
- case DSR:
- int rsize = 0;
- ArrayList<ArrayList<String>> replicas = new ArrayList<ArrayList<String>>();
-
- stripedBricks = new ArrayList<String>();
-
- if (rcount == 0)
- throw new IOException("got replicated volume with replication count 0");
-
- for (i = 1; i <= rcount; i++) {
- key = "REPLICATE-" + i;
- brickList = vol.get(key);
- it = brickList.iterator();
- replicas.add(i - 1, new ArrayList<String>());
- while (it.hasNext()) {
- replicas.get(i - 1).add(it.next());
- }
- }
-
- stripeSize = meta.get("block-size");
-
- nrAllocs = (int) (((len - start) / stripeSize) + 1);
- if (hnts == null) {
- result = new BlockLocation[nrAllocs];
- repl = new GlusterFSBrickRepl[nrAllocs];
- }
-
- // starting stripe position
- counter = (int) ((start / stripeSize) % rcount);
- stripeStart = start;
-
- key = null;
- int currAlloc = 0;
- boolean hntsDone = false;
- while ((stripeStart < len) && !done) {
- stripeEnd = (stripeStart - (stripeStart % stripeSize)) + stripeSize - 1;
- if (stripeEnd > start + len) {
- stripeEnd = start + len - 1;
- done = true;
- }
-
- rsize = replicas.get(counter).size();
-
- if (hnts == null)
- repl[allocCtr] = new GlusterFSBrickRepl(rsize, stripeStart, (stripeEnd - stripeStart));
-
- for (i = 0; i < rsize; i++) {
- brick = replicas.get(counter).get(i);
- currAlloc = (allocCtr * rsize) + i;
-
- if (hnts == null)
- repl[allocCtr].addHost(brick2host(brick));
- else
- if (currAlloc <= (rsize * rcount) - 1) {
- hnts.put(currAlloc, new GlusterFSBrickClass(brick, stripeStart,
- (stripeEnd - stripeStart),
- true, stripeSize, rcount, rsize));
- } else
- hntsDone = true;
- }
-
- if (hntsDone)
- break;
-
- stripeStart = stripeEnd + 1;
-
- allocCtr++;
- counter++;
-
- if (counter >= replicas.size())
- counter = 0;
- }
-
- if (hnts == null)
- for (int k = 0; k < nrAllocs; k++)
- result[k] = new BlockLocation(null, repl[k].getReplHosts(), repl[k].getStartLen(), repl[k].getOffLen());
-
- break;
-
- case S:
- case DS:
- if (scount == 0)
- throw new IOException("got striped volume with stripe count 0");
-
- stripedBricks = new ArrayList<String>();
- stripeSize = meta.get("block-size");
-
- key = "STRIPE-" + scount;
- brickList = vol.get(key);
- it = brickList.iterator();
- while (it.hasNext()) {
- stripedBricks.add(it.next());
- }
-
- nrAllocs = (int) ((len - start) / stripeSize) + 1;
- if (hnts == null)
- result = new BlockLocation[nrAllocs];
-
- // starting stripe position
- counter = (int) ((start / stripeSize) % stripedBricks.size());
- stripeStart = start;
-
- key = null;
- while ((stripeStart < len) && !done) {
- brick = stripedBricks.get(counter);
-
- stripeEnd = (stripeStart - (stripeStart % stripeSize)) + stripeSize - 1;
- if (stripeEnd > start + len) {
- stripeEnd = start + len - 1;
- done = true;
- }
-
- if (hnts == null)
- result[allocCtr] = new BlockLocation(null, new String[] {brick2host(brick)},
- stripeStart, (stripeEnd - stripeStart));
- else
- if (allocCtr <= stripedBricks.size()) {
- hnts.put(allocCtr, new GlusterFSBrickClass(brick, stripeStart, (stripeEnd - stripeStart),
- true, stripeSize, stripedBricks.size(), -1));
- } else
- break;
-
- stripeStart = stripeEnd + 1;
-
- counter++;
- allocCtr++;
-
- if (counter >= stripedBricks.size())
- counter = 0;
- }
-
- break;
- }
-
- return result;
- }
-
- /* TODO: use meta{dcount,scount,rcount} for checking */
- public static int getReplicationFromLayout (HashMap<String, ArrayList<String>> vol,
- HashMap<String, Integer> meta)
- throws IOException {
- int replication = 0;
- LAYOUT l = LAYOUT.valueOf(vol.get("layout").get(0));
-
- switch (l) {
- case D:
- case S:
- case DS:
- replication = 1;
- break;
-
- case R:
- case DR:
- case SR:
- case DSR:
- final String key = "REPLICATION-1";
- replication = vol.get(key).size();
- }
-
- return replication;
- }
-}
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEInputStream.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEInputStream.java
deleted file mode 100644
index e92237aeea2..00000000000
--- a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEInputStream.java
+++ /dev/null
@@ -1,205 +0,0 @@
-/**
- *
- * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- * This file is part of GlusterFS.
- *
- * Licensed under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- * implied. See the License for the specific language governing
- * permissions and limitations under the License.
- *
- */
-
-package org.apache.hadoop.fs.glusterfs;
-
-import java.io.*;
-import java.util.TreeMap;
-
-import org.apache.hadoop.fs.FSInputStream;
-import org.apache.hadoop.fs.FileSystem;
-
-
-public class GlusterFUSEInputStream extends FSInputStream {
- File f;
- boolean lastActive;
- long pos;
- boolean closed;
- String thisHost;
- RandomAccessFile fuseInputStream;
- RandomAccessFile fsInputStream;
- GlusterFSBrickClass thisBrick;
- int nodeLocation;
- TreeMap<Integer, GlusterFSBrickClass> hnts;
-
- public GlusterFUSEInputStream (File f, TreeMap<Integer, GlusterFSBrickClass> hnts,
- String hostname) throws IOException {
- this.f = f;
- this.pos = 0;
- this.closed = false;
- this.hnts = hnts;
- this.thisHost = hostname;
- this.fsInputStream = null;
- this.fuseInputStream = new RandomAccessFile(f.getPath(), "r");
-
- this.lastActive = true; // true == FUSE, false == backed file
-
- String directFilePath = null;
- if (this.hnts != null) {
- directFilePath = findLocalFile(f.getPath(), this.hnts);
- if (directFilePath != null) {
- this.fsInputStream = new RandomAccessFile(directFilePath, "r");
- this.lastActive = !this.lastActive;
- }
- }
- }
-
- public String findLocalFile (String path, TreeMap<Integer, GlusterFSBrickClass> hnts) {
- int i = 0;
- String actFilePath = null;
- GlusterFSBrickClass gfsBrick = null;
-
- gfsBrick = hnts.get(0);
-
- /* do a linear search for the matching host not worrying
- about file stripes */
- for (i = 0; i < hnts.size(); i++) {
- gfsBrick = hnts.get(i);
- actFilePath = gfsBrick.brickIsLocal(this.thisHost);
- if (actFilePath != null) {
- this.thisBrick = gfsBrick;
- this.nodeLocation = i;
- break;
- }
- }
-
- return actFilePath;
- }
-
- public long getPos () throws IOException {
- return pos;
- }
-
- public synchronized int available () throws IOException {
- return (int) ((f.length()) - getPos());
- }
-
- public void seek (long pos) throws IOException {
- fuseInputStream.seek(pos);
- if (fsInputStream != null)
- fsInputStream.seek(pos);
- }
-
- public boolean seekToNewSource (long pos) throws IOException {
- return false;
- }
-
- public RandomAccessFile chooseStream (long start, int[] nlen)
- throws IOException {
- GlusterFSBrickClass gfsBrick = null;
- RandomAccessFile in = fuseInputStream;
- boolean oldActiveStream = lastActive;
- lastActive = true;
-
- if ((hnts != null) && (fsInputStream != null)) {
- gfsBrick = hnts.get(0);
- if (!gfsBrick.isChunked()) {
- in = fsInputStream;
- lastActive = false;
- } else {
- // find the current location in the tree and the amount of data it can serve
- int[] nodeInTree = thisBrick.getBrickNumberInTree(start, nlen[0]);
-
- // does this node hold the byte ranges we have been requested for ?
- if ((nodeInTree[2] != 0) && thisBrick.brickHasFilePart(nodeInTree[0], nodeLocation)) {
- in = fsInputStream;
- nlen[0] = nodeInTree[2]; // the amount of data that can be read from the stripe
- lastActive = false;
- }
- }
- }
-
- return in;
- }
-
- public synchronized int read () throws IOException {
- int byteRead = 0;
- RandomAccessFile in = null;
-
- if (closed)
- throw new IOException("Stream Closed.");
-
- int[] nlen = { 1 };
-
- in = chooseStream(getPos(), nlen);
-
- byteRead = in.read();
- if (byteRead >= 0) {
- pos++;
- syncStreams(1);
- }
-
- return byteRead;
- }
-
- public synchronized int read (byte buff[], int off, int len) throws
- IOException {
- int result = 0;
- RandomAccessFile in = null;
-
- if (closed)
- throw new IOException("Stream Closed.");
-
- int[] nlen = {len}; // hack to make len mutable
- in = chooseStream(pos, nlen);
-
- result = in.read(buff, off, nlen[0]);
- if (result > 0) {
- pos += result;
- syncStreams(result);
- }
-
- return result;
- }
-
- /**
- * TODO: use seek() insted of skipBytes(); skipBytes does I/O
- */
- public void syncStreams (int bytes) throws IOException {
- if ((hnts != null) && (hnts.get(0).isChunked()) && (fsInputStream != null))
- if (!this.lastActive)
- fuseInputStream.skipBytes(bytes);
- else
- fsInputStream.skipBytes(bytes);
- }
-
- public synchronized void close () throws IOException {
- if (closed)
- throw new IOException("Stream closed.");
-
- super.close();
- if (fsInputStream != null)
- fsInputStream.close();
- fuseInputStream.close();
-
- closed = true;
- }
-
- // Not supported - mark () and reset ()
-
- public boolean markSupported () {
- return false;
- }
-
- public void mark (int readLimit) {}
-
- public void reset () throws IOException {
- throw new IOException("Mark/Reset not supported.");
- }
-}
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEOutputStream.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEOutputStream.java
deleted file mode 100644
index 5192a0a56cb..00000000000
--- a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEOutputStream.java
+++ /dev/null
@@ -1,86 +0,0 @@
-/**
- *
- * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- * This file is part of GlusterFS.
- *
- * Licensed under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- * implied. See the License for the specific language governing
- * permissions and limitations under the License.
- *
- */
-
-package org.apache.hadoop.fs.glusterfs;
-
-import java.io.*;
-
-import org.apache.hadoop.fs.FSOutputSummer;
-import org.apache.hadoop.fs.FileSystem;
-
-public class GlusterFUSEOutputStream extends OutputStream {
- File f;
- long pos;
- boolean closed;
- OutputStream fuseOutputStream;
-
- public GlusterFUSEOutputStream (String file, boolean append) throws
- IOException {
- this.f = new File(file); /* not needed ? */
- this.pos = 0;
- this.fuseOutputStream = new FileOutputStream(file, append);
- this.closed = false;
- }
-
- public long getPos () throws IOException {
- return pos;
- }
-
- public void write (int v) throws IOException {
- if (closed)
- throw new IOException("Stream closed.");
-
- byte[] b = new byte[1];
- b[0] = (byte) v;
-
- write(b, 0, 1);
- }
-
- public void write (byte b[]) throws IOException {
- if (closed)
- throw new IOException("Stream closed.");
-
- fuseOutputStream.write(b, 0, b.length);
- pos += (long) b.length;
- }
-
- public void write (byte b[], int off, int len) throws IOException {
- if (closed)
- throw new IOException("Stream closed.");
-
- fuseOutputStream.write(b, off, len);
- pos += (long) len;
- }
-
- public void flush () throws IOException {
- if (closed)
- throw new IOException("Stream closed.");
-
- fuseOutputStream.flush();
- }
-
- public void close () throws IOException {
- if (closed)
- throw new IOException("Stream closed.");
-
- flush();
- fuseOutputStream.close();
- closed = true;
- }
-}
diff --git a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFileSystem.java b/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFileSystem.java
deleted file mode 100644
index b0501cced27..00000000000
--- a/glusterfs-hadoop/0.20.2/src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFileSystem.java
+++ /dev/null
@@ -1,492 +0,0 @@
-/**
- *
- * Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- * This file is part of GlusterFS.
- *
- * Licensed under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- * implied. See the License for the specific language governing
- * permissions and limitations under the License.
- *
- */
-
-/**
- * Implements the Hadoop FileSystem Interface to allow applications to store
- * files on GlusterFS and run Map/Reduce jobs on the data.
- */
-
-package org.apache.hadoop.fs.glusterfs;
-
-import java.io.*;
-import java.net.*;
-
-import java.util.regex.*;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FSDataInputStream;
-import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.BlockLocation;
-import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.util.Progressable;
-
-import java.util.TreeMap;
-
-/*
- * This package provides interface for hadoop jobs (incl. Map/Reduce)
- * to access files in GlusterFS backed file system via FUSE mount
- */
-public class GlusterFileSystem extends FileSystem {
-
- private FileSystem glusterFs = null;
- private URI uri = null;
- private Path workingDir = null;
- private String glusterMount = null;
- private boolean mounted = false;
-
- /* for quick IO */
- private boolean quickSlaveIO = false;
-
- /* extended attribute class */
- private GlusterFSXattr xattr = null;
-
- /* hostname of this machine */
- private static String hostname;
-
- public GlusterFileSystem () {
-
- }
-
- public URI getUri () {
- return uri;
- }
-
- public boolean FUSEMount (String volname, String server, String mount)
- throws IOException, InterruptedException {
- boolean ret = true;
- int retVal = 0;
- Process p = null;
- String s = null;
- String mountCmd = null;
-
- mountCmd = "mount -t glusterfs " + server + ":" + "/" + volname + " " + mount;
-
- try {
- p = Runtime.getRuntime().exec(mountCmd);
-
- retVal = p.waitFor();
- if (retVal != 0)
- ret = false;
-
- } catch (IOException e) {
- System.out.println ("Problem mounting FUSE mount on: " + mount);
- e.printStackTrace();
- System.exit(-1);
- }
-
- return ret;
- }
-
- public void initialize (URI uri, Configuration conf) throws IOException {
- boolean ret = false;
- String volName = null;
- String remoteGFSServer = null;
- String needQuickRead = null;
-
- if (this.mounted)
- return;
-
- System.out.println("Initializing GlusterFS");
-
- try {
- volName = conf.get("fs.glusterfs.volname", "");
- glusterMount = conf.get("fs.glusterfs.mount", "");
- remoteGFSServer = conf.get("fs.glusterfs.server", "");
- needQuickRead = conf.get("quick.slave.io", "");
-
- /*
- * bail out if we do not have enough information to do a FUSE
- * mount
- */
- if ( (volName.length() == 0) || (remoteGFSServer.length() == 0) ||
- (glusterMount.length() == 0) )
- System.exit (-1);
-
- ret = FUSEMount(volName, remoteGFSServer, glusterMount);
- if (!ret) {
- System.out.println("Failed to initialize GlusterFS");
- System.exit(-1);
- }
-
- if ((needQuickRead.length() != 0)
- && (needQuickRead.equalsIgnoreCase("yes")
- || needQuickRead.equalsIgnoreCase("on")
- || needQuickRead.equals("1")))
- this.quickSlaveIO = true;
-
- this.mounted = true;
- this.glusterFs = FileSystem.getLocal(conf);
- this.workingDir = new Path(glusterMount);
- this.uri = URI.create(uri.getScheme() + "://" + uri.getAuthority());
-
- this.xattr = new GlusterFSXattr();
-
- InetAddress addr = InetAddress.getLocalHost();
- this.hostname = addr.getHostName();
-
- setConf(conf);
-
- } catch (Exception e) {
- e.printStackTrace();
- System.out.println("Unable to initialize GlusterFS");
- System.exit(-1);
- }
- }
-
- @Deprecated
- public String getName () {
- return getUri().toString();
- }
-
- public Path getWorkingDirectory () {
- return this.workingDir;
- }
-
- public Path getHomeDirectory () {
- return this.workingDir;
- }
-
- public Path makeAbsolute (Path path) {
- String pth = path.toUri().getPath();
- if (pth.startsWith(workingDir.toUri().getPath())) {
- return path;
- }
-
- return new Path(workingDir + "/" + pth);
- }
-
- public void setWorkingDirectory (Path dir) {
- this.workingDir = makeAbsolute(dir);
- }
-
- public boolean exists (Path path) throws IOException {
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
-
- return f.exists();
- }
-
- public boolean mkdirs (Path path, FsPermission permission
- ) throws IOException {
- boolean created = false;
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
-
- if (f.exists()) {
- System.out.println("Directory " + f.getPath() + " already exist");
- return true;
- }
-
- return f.mkdirs();
- }
-
- @Deprecated
- public boolean isDirectory (Path path) throws IOException {
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
-
- return f.isDirectory();
- }
-
- public boolean isFile (Path path) throws IOException {
- return !isDirectory(path);
- }
-
- public Path[] listPaths (Path path) throws IOException {
- Path absolute = makeAbsolute(path);
- File f = new File (absolute.toUri().getPath());
- String relPath = path.toUri().getPath();
- String[] fileList = null;
- Path[] filePath = null;
- int fileCnt = 0;
-
- fileList = f.list();
-
- filePath = new Path[fileList.length];
-
- for (; fileCnt < fileList.length; fileCnt++) {
- filePath[fileCnt] = new Path(relPath + "/" + fileList[fileCnt]);
- }
-
- return filePath;
- }
-
- public FileStatus[] listStatus (Path path) throws IOException {
- int fileCnt = 0;
- Path absolute = makeAbsolute(path);
- String relpath = path.toUri().getPath();
- String[] strFileList = null;
- FileStatus[] fileStatus = null;
- File f = new File(absolute.toUri().getPath());
-
- if (!f.exists()) {
- return null;
- }
-
- if (f.isFile())
- return new FileStatus[] {
- getFileStatus(path)
- };
-
- if (relpath.charAt(relpath.length() - 1) != '/')
- relpath += "/";
-
- strFileList = f.list();
-
- fileStatus = new FileStatus[strFileList.length];
-
- for (; fileCnt < strFileList.length; fileCnt++) {
- fileStatus[fileCnt] = getFileStatusFromFileString(relpath + strFileList[fileCnt]);
- }
-
- return fileStatus;
- }
-
- public FileStatus getFileStatusFromFileString (String path)
- throws IOException {
- Path nPath = new Path(path);
- return getFileStatus(nPath);
- }
-
- public FileStatus getFileStatus (Path path) throws IOException {
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
-
- if (!f.exists ())
- throw new FileNotFoundException("File " + f.getPath() + " does not exist.");
-
- if (f.isDirectory ())
- return new FileStatus(0, true, 1, 0, f.lastModified(), path.makeQualified(this));
- else
- return new FileStatus(f.length(), false, 0, getDefaultBlockSize(),
- f.lastModified(), path.makeQualified(this));
-
- }
-
- /*
- * creates a new file in glusterfs namespace. internally the file
- * descriptor is an instance of OutputStream class.
- */
- public FSDataOutputStream create (Path path, FsPermission permission,
- boolean overwrite, int bufferSize,
- short replication, long blockSize,
- Progressable progress)
- throws IOException {
- Path absolute = makeAbsolute(path);
- Path parent = null;
- File f = null;
- File fParent = null;
- FSDataOutputStream glusterFileStream = null;
-
- f = new File(absolute.toUri().getPath());
-
- if (f.exists ()) {
- if (overwrite)
- f.delete ();
- else
- throw new IOException(f.getPath() + " already exist");
- }
-
- parent = path.getParent();
- fParent = new File ((makeAbsolute(parent)).toUri().getPath());
- if ((parent != null) && (fParent != null) && (!fParent.exists()))
- if (!fParent.mkdirs())
- throw new IOException("cannot create parent directory: " + fParent.getPath());
-
- glusterFileStream = new FSDataOutputStream(new GlusterFUSEOutputStream
- (f.getPath(), false));
-
- return glusterFileStream;
- }
-
- /*
- * open the file in read mode (internally the file descriptor is an
- * instance of InputStream class).
- *
- * if quick read mode is set then read the file by by-passing FUSE
- * if we are on same slave where the file exist
- */
- public FSDataInputStream open (Path path) throws IOException {
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
- FSDataInputStream glusterFileStream = null;
- TreeMap<Integer, GlusterFSBrickClass> hnts = null;
-
- if (!f.exists())
- throw new IOException("File " + f.getPath() + " does not exist.");
-
- if (quickSlaveIO)
- hnts = xattr.quickIOPossible(f.getPath(), 0, f.length());
-
- glusterFileStream = new FSDataInputStream(new GlusterFUSEInputStream(f, hnts, hostname));
- return glusterFileStream;
- }
-
- public FSDataInputStream open (Path path, int bufferSize) throws IOException {
- return open(path);
- }
-
- public FSDataOutputStream append (Path f, int bufferSize, Progressable progress)
- throws IOException {
- throw new IOException ("append not supported (as yet).");
- }
-
- public boolean rename (Path src, Path dst) throws IOException {
- Path absoluteSrc = makeAbsolute(src);
- Path absoluteDst = makeAbsolute(dst);
-
- File fSrc = new File(absoluteSrc.toUri().getPath());
- File fDst = new File(absoluteDst.toUri().getPath());
-
- if (fDst.isDirectory()) {
- fDst = null;
- String newPath = absoluteDst.toUri().getPath() + "/" + fSrc.getName();
- fDst = new File(newPath);
- }
- return fSrc.renameTo(fDst);
- }
-
- @Deprecated
- public boolean delete (Path path) throws IOException {
- return delete(path, true);
- }
-
- public boolean delete (Path path, boolean recursive) throws IOException {
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
-
- if (f.isFile())
- return f.delete();
-
- FileStatus[] dirEntries = listStatus(absolute);
- if ((!recursive) && (dirEntries != null) && (dirEntries.length != 0))
- throw new IOException ("Directory " + path.toString() + " is not empty");
-
- if (dirEntries != null)
- for (int i = 0; i < dirEntries.length; i++)
- delete(new Path(absolute, dirEntries[i].getPath()), recursive);
-
- return f.delete();
- }
-
- @Deprecated
- public long getLength (Path path) throws IOException {
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
-
- if (!f.exists())
- throw new IOException(f.getPath() + " does not exist.");
-
- return f.length();
- }
-
- @Deprecated
- public short getReplication (Path path) throws IOException {
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
-
- if (!f.exists())
- throw new IOException(f.getPath() + " does not exist.");
-
- return xattr.getReplication(f.getPath());
- }
-
- public short getDefaultReplication (Path path) throws IOException {
- return getReplication(path);
- }
-
- public boolean setReplication (Path path, short replication)
- throws IOException {
- return true;
- }
-
- public long getBlockSize (Path path) throws IOException {
- long blkSz;
- Path absolute = makeAbsolute(path);
- File f = new File(absolute.toUri().getPath());
-
- blkSz = xattr.getBlockSize(f.getPath());
- if (blkSz == 0)
- blkSz = getLength(path);
-
- return blkSz;
- }
-
- public long getDefaultBlockSize () {
- return 1 << 26; /* default's from hdfs, kfs */
- }
-
- @Deprecated
- public void lock (Path path, boolean shared) throws IOException {
- }
-
- @Deprecated
- public void release (Path path) throws IOException {
- }
-
- public BlockLocation[] getFileBlockLocations (FileStatus file, long start, long len)
- throws IOException {
-
- Path absolute = makeAbsolute(file.getPath());
- File f = new File(absolute.toUri().getPath());
- BlockLocation[] result = null;
-
- if (file == null)
- return null;
-
- result = xattr.getPathInfo(f.getPath(), start, len);
- if (result == null) {
- System.out.println("Problem getting destination host for file "
- + f.getPath());
- return null;
- }
-
- return result;
- }
-
- // getFileBlockLocations (FileStatus, long, long) is called by hadoop
- public BlockLocation[] getFileBlockLocations (Path p, long start, long len)
- throws IOException {
- return null;
- }
-
- public void copyFromLocalFile (boolean delSrc, Path src, Path dst)
- throws IOException {
- FileUtil.copy(glusterFs, src, this, dst, delSrc, getConf());
- }
-
- public void copyToLocalFile (boolean delSrc, Path src, Path dst)
- throws IOException {
- FileUtil.copy(this, src, glusterFs, dst, delSrc, getConf());
- }
-
- public Path startLocalOutput (Path fsOutputFile, Path tmpLocalFile)
- throws IOException {
- return tmpLocalFile;
- }
-
- public void completeLocalOutput (Path fsOutputFile, Path tmpLocalFile)
- throws IOException {
- moveFromLocalFile(tmpLocalFile, fsOutputFile);
- }
-}
diff --git a/glusterfs-hadoop/0.20.2/src/test/java/org/apache/hadoop/fs/glusterfs/AppTest.java b/glusterfs-hadoop/0.20.2/src/test/java/org/apache/hadoop/fs/glusterfs/AppTest.java
deleted file mode 100644
index 21e188c52bc..00000000000
--- a/glusterfs-hadoop/0.20.2/src/test/java/org/apache/hadoop/fs/glusterfs/AppTest.java
+++ /dev/null
@@ -1,38 +0,0 @@
-package org.apache.hadoop.fs.glusterfs;
-
-import junit.framework.Test;
-import junit.framework.TestCase;
-import junit.framework.TestSuite;
-
-/**
- * Unit test for simple App.
- */
-public class AppTest
- extends TestCase
-{
- /**
- * Create the test case
- *
- * @param testName name of the test case
- */
- public AppTest( String testName )
- {
- super( testName );
- }
-
- /**
- * @return the suite of tests being tested
- */
- public static Test suite()
- {
- return new TestSuite( AppTest.class );
- }
-
- /**
- * Rigourous Test :-)
- */
- public void testApp()
- {
- assertTrue( true );
- }
-}
diff --git a/glusterfs-hadoop/0.20.2/tools/build-deploy-jar.py b/glusterfs-hadoop/0.20.2/tools/build-deploy-jar.py
deleted file mode 100755
index c20e53b39b2..00000000000
--- a/glusterfs-hadoop/0.20.2/tools/build-deploy-jar.py
+++ /dev/null
@@ -1,212 +0,0 @@
-#!/usr/bin/python
-
-##
- #
- # Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- # This file is part of GlusterFS.
- #
- # Licensed under the Apache License, Version 2.0
- # (the "License"); you may not use this file except in compliance with
- # the License. You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- # implied. See the License for the specific language governing
- # permissions and limitations under the License.
- #
- ##
-
-import getopt
-import glob
-import sys, os
-import shutil
-import subprocess, shlex
-
-def usage():
- print "usage: python build-deploy-jar.py [-b/--build] -d/--dir <hadoop-home> [-c/--core] [-m/--mapred] [-h/--henv]"
-
-def addSlash(s):
- if not (s[-1] == '/'):
- s = s + '/'
-
- return s
-
-def whereis(program):
- abspath = None
- for path in (os.environ.get('PATH', '')).split(':'):
- abspath = os.path.join(path, program)
- if os.path.exists(abspath) and not os.path.isdir(abspath):
- return abspath
-
- return None
-
-def getLatestJar(targetdir):
- glusterfsJar = glob.glob(targetdir + "*.jar")
- if len(glusterfsJar) == 0:
- print "No GlusterFS jar file found in %s ... exiting" % (targetdir)
- return None
-
- # pick up the latest jar file - just in case ...
- stat = latestJar = None
- ctime = 0
-
- for jar in glusterfsJar:
- stat = os.stat(jar)
- if stat.st_ctime > ctime:
- latestJar = jar
- ctime = stat.st_ctime
-
- return latestJar
-
-# build the glusterfs hadoop plugin using maven
-def build_jar(targetdir):
- location = whereis('mvn')
-
- if location == None:
- print "Cannot find maven to build glusterfs hadoop jar"
- print "please install maven or if it's already installed then fix your PATH environ"
- return None
-
- # do a clean packaging
- if os.path.exists(targetdir) and os.path.isdir(targetdir):
- print "Cleaning up directories ... [ " + targetdir + " ]"
- shutil.rmtree(targetdir)
-
- print "Building glusterfs jar ..."
- process = subprocess.Popen(['package'], shell=True,
- executable=location, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-
- process.wait()
- if not process.returncode == 0:
- print "Building glusterfs jar failed ... exiting"
- return None
-
- latestJar = getLatestJar(targetdir)
- return latestJar
-
-def rcopy(f, host, libdir):
- print " * doing remote copy to host %s" % (host)
- scpCmd = "scp %s %s:%s" % (f, host, libdir)
-
- os.system(scpCmd);
-
-def deployInSlave(f, confdir, libdir, cc, cm, he):
- slavefile = confdir + "slaves"
-
- ccFile = confdir + "core-site.xml"
- cmFile = confdir + "mapred-site.xml"
- heFile = confdir + "hadoop-env.sh"
-
- sf = open(slavefile, 'r')
- for host in sf:
- host = host.rstrip('\n')
- print " >>> Deploying %s on %s ..." % (os.path.basename(f), host)
- rcopy(f, host, libdir)
-
- if cc:
- print " >>> Deploying [%s] on %s ..." % (os.path.basename(ccFile), host)
- rcopy(ccFile, host, confdir)
-
- if cm:
- print " >>> Deploying [%s] on %s ..." % (os.path.basename(cmFile), host)
- rcopy(cmFile, host, confdir)
-
- if he:
- print " >>> Deploying [%s] on %s ..." % (os.path.basename(heFile), host)
- rcopy(heFile, host, confdir);
-
- print "<<< Done\n"
-
- sf.close()
-
-def deployInMaster(f, confdir, libdir):
- import socket
- masterfile = confdir + "masters"
-
- mf = open(masterfile, 'r')
- for host in mf:
- host = host.rstrip('\n')
- print " >>> Deploying %s on %s ..." % (os.path.basename(f), host)
- h = host
- try:
- socket.inet_aton(host)
- h = socket.getfqdn(host)
- except socket.error:
- pass
-
- if h == socket.gethostname() or h == 'localhost':
- # local cp
- print " * doing local copy"
- shutil.copy(f, libdir)
- else:
- # scp the file
- rcopy(f, h, libdir)
-
- print "<<< Done\n"
-
- mf.close()
-
-if __name__ == '__main__':
- opt = args = []
- try:
- opt, args = getopt.getopt(sys.argv[1:], "bd:cmh", ["build", "dir=", "core", "mapred", "henv"]);
- except getopt.GetoptError, err:
- print str(err)
- usage()
- sys.exit(1)
-
- needbuild = hadoop_dir = copyCore = copyMapred = copyHadoopEnv = None
-
- for k, v in opt:
- if k in ("-b", "--build"):
- needbuild = True
- elif k in ("-d", "--dir"):
- hadoop_dir = v
- elif k in ("-c", "--core"):
- copyCore = True
- elif k in ("-m", "--mapred"):
- copyMapred = True
- elif k in ("-h", "--henv"):
- copyHadoopEnv = True
- else:
- pass
-
- if hadoop_dir == None:
- print 'hadoop directory missing'
- usage()
- sys.exit(1)
-
- os.chdir(os.path.dirname(sys.argv[0]) + '/..')
- targetdir = './target/'
-
- if needbuild:
- jar = build_jar(targetdir)
- if jar == None:
- sys.exit(1)
- else:
- jar = getLatestJar(targetdir)
- if jar == None:
- print "Maybe you want to build it ? with -b option"
- sys.exit(1)
-
- print ""
- print "*** Deploying %s *** " % (jar)
-
- # copy jar to local hadoop distribution (master)
- hadoop_home = addSlash(hadoop_dir)
- if not (os.path.exists(hadoop_home) and os.path.isdir(hadoop_home)):
- print "path " + hadoop_home + " does not exist or is not adiretory";
- sys.exit(1);
-
- hadoop_conf = hadoop_home + "conf/"
- hadoop_lib = hadoop_home + "lib/"
-
- print " >>> Scanning hadoop master file for host(s) to deploy"
- deployInMaster(jar, hadoop_conf, hadoop_lib)
-
- print ""
- print " >>> Scanning hadoop slave file for host(s) to deploy"
- deployInSlave(jar, hadoop_conf, hadoop_lib, copyCore, copyMapred, copyHadoopEnv)
diff --git a/glusterfs-hadoop/COPYING b/glusterfs-hadoop/COPYING
deleted file mode 100644
index d6456956733..00000000000
--- a/glusterfs-hadoop/COPYING
+++ /dev/null
@@ -1,202 +0,0 @@
-
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright [yyyy] [name of copyright owner]
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
diff --git a/glusterfs-hadoop/README b/glusterfs-hadoop/README
deleted file mode 100644
index 3026f11c035..00000000000
--- a/glusterfs-hadoop/README
+++ /dev/null
@@ -1,182 +0,0 @@
-GlusterFS Hadoop Plugin
-=======================
-
-INTRODUCTION
-------------
-
-This document describes how to use GlusterFS (http://www.gluster.org/) as a backing store with Hadoop.
-
-
-REQUIREMENTS
-------------
-
-* Supported OS is GNU/Linux
-* GlusterFS and Hadoop installed on all machines in the cluster
-* Java Runtime Environment (JRE)
-* Maven (needed if you are building the plugin from source)
-* JDK (needed if you are building the plugin from source)
-
-NOTE: Plugin relies on two *nix command line utilities to function properly. They are:
-
-* mount: Used to mount GlusterFS volumes.
-* getfattr: Used to fetch Extended Attributes of a file
-
-Make sure they are installed on all hosts in the cluster and their locations are in $PATH
-environment variable.
-
-
-INSTALLATION
-------------
-
-** NOTE: Example below is for Hadoop version 0.20.2 ($GLUSTER_HOME/hdfs/0.20.2) **
-
-* Building the plugin from source [Maven (http://maven.apache.org/) and JDK is required to build the plugin]
-
- Change to glusterfs-hadoop directory in the GlusterFS source tree and build the plugin.
-
- # cd $GLUSTER_HOME/hdfs/0.20.2
- # mvn package
-
- On a successful build the plugin will be present in the `target` directory.
- (NOTE: version number will be a part of the plugin)
-
- # ls target/
- classes glusterfs-0.20.2-0.1.jar maven-archiver surefire-reports test-classes
- ^^^^^^^^^^^^^^^^^^
-
- Copy the plugin to lib/ directory in your $HADOOP_HOME dir.
-
- # cp target/glusterfs-0.20.2-0.1.jar $HADOOP_HOME/lib
-
- Copy the sample configuration file that ships with this source (conf/core-site.xml) to conf
- directory in your $HADOOP_HOME dir.
-
- # cp conf/core-site.xml $HADOOP_HOME/conf
-
-* Installing the plugin from RPM
-
- See the plugin documentation for installing from RPM.
-
-
-CLUSTER INSTALLATION
---------------------
-
- In case it is tedious to do the above steps(s) on all hosts in the cluster; use the build-and-deploy.py script to
- build the plugin in one place and deploy it (along with the configuration file on all other hosts).
-
- This should be run on the host which is that hadoop master [Job Tracker].
-
-* STEPS (You would have done Step 1 and 2 anyway while deploying Hadoop)
-
- 1. Edit conf/slaves file in your hadoop distribution; one line for each slave.
- 2. Setup password-less ssh b/w hadoop master and slave(s).
- 3. Edit conf/core-site.xml with all glusterfs related configurations (see CONFIGURATION)
- 4. Run the following
- # cd $GLUSTER_HOME/hdfs/0.20.2/tools
- # python ./build-and-deploy.py -b -d /path/to/hadoop/home -c
-
- This will build the plugin and copy it (and the config file) to all slaves (mentioned in $HADOOP_HOME/conf/slaves).
-
- Script options:
- -b : build the plugin
- -d : location of hadoop directory
- -c : deploy core-site.xml
- -m : deploy mapred-site.xml
- -h : deploy hadoop-env.sh
-
-
-CONFIGURATION
--------------
-
- All plugin configuration is done in a single XML file (core-site.xml) with <name><value> tags in each <property>
- block.
-
- Brief explanation of the tunables and the values they accept (change them where-ever needed) are mentioned below
-
- name: fs.glusterfs.impl
- value: org.apache.hadoop.fs.glusterfs.GlusterFileSystem
-
- The default FileSystem API to use (there is little reason to modify this).
-
- name: fs.default.name
- value: glusterfs://server:port
-
- The default name that hadoop uses to represent file as a URI (typically a server:port tuple). Use any host
- in the cluster as the server and any port number. This option has to be in server:port format for hadoop
- to create file URI; but is not used by plugin.
-
- name: fs.glusterfs.volname
- value: volume-dist-rep
-
- The volume to mount.
-
-
- name: fs.glusterfs.mount
- value: /mnt/glusterfs
-
- This is the directory that the plugin will use to mount (FUSE mount) the volume.
-
- name: fs.glusterfs.server
- value: 192.168.1.36, hackme.zugzug.org
-
- To mount a volume the plugin needs to know the hostname or the IP of a GlusterFS server in the cluster.
- Mention it here.
-
- name: quick.slave.io
- value: [On/Off], [Yes/No], [1/0]
-
- NOTE: This option is not tested as of now.
-
- This is a performance tunable option. Hadoop schedules jobs to hosts that contain the file data part. The job
- then does I/O on the file (via FUSE in case of GlusterFS). When this option is set, the plugin will try to
- do I/O directly from the backed filesystem (ext3, ext4 etc..) the file resides on. Hence read performance
- will improve and job would run faster.
-
-
-USAGE
------
-
- Once configured, start Hadoop Map/Reduce daemons
-
- # cd $HADOOP_HOME
- # ./bin/start-mapred.sh
-
- If the map/reduce job/task trackers are up, all I/O will be done to GlusterFS.
-
-
-FOR HACKERS
------------
-
-* Source Layout
-
-** version specific: hdfs/<version> **
-./src
-./src/main
-./src/main/java
-./src/main/java/org
-./src/main/java/org/apache
-./src/main/java/org/apache/hadoop
-./src/main/java/org/apache/hadoop/fs
-./src/main/java/org/apache/hadoop/fs/glusterfs
-./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickClass.java
-./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSXattr.java <--- Fetch/Parse Extended Attributes of a file
-./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEInputStream.java <--- Input Stream (instantiated during open() calls; quick read from backed FS)
-./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFSBrickRepl.java
-./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFUSEOutputStream.java <--- Output Stream (instantiated during creat() calls)
-./src/main/java/org/apache/hadoop/fs/glusterfs/GlusterFileSystem.java <--- Entry Point for the plugin (extends Hadoop FileSystem class)
-./src/test
-./src/test/java
-./src/test/java/org
-./src/test/java/org/apache
-./src/test/java/org/apache/hadoop
-./src/test/java/org/apache/hadoop/fs
-./src/test/java/org/apache/hadoop/fs/glusterfs
-./src/test/java/org/apache/hadoop/fs/glusterfs/AppTest.java <--- Your test cases go here (if any :-))
-./tools/build-deploy-jar.py <--- Build and Deployment Script
-./conf
-./conf/core-site.xml <--- Sample configuration file
-./pom.xml <--- build XML file (used by maven)
-
-** toplevel: hdfs/ **
-./COPYING <--- License
-./README <--- This file
diff --git a/glusterfs.spec.in b/glusterfs.spec.in
index e607c811e65..b6d63146e14 100644
--- a/glusterfs.spec.in
+++ b/glusterfs.spec.in
@@ -1,375 +1,2049 @@
-# if you make changes, the it is advised to increment this number, and provide
-# a descriptive suffix to identify who owns or what the change represents
-# e.g. release_version 2.MSW
-%global release 1%{?dist}
-%global _sharedstatedir /var/lib
+%global _hardened_build 1
+%global _for_fedora_koji_builds 0
-# if you wish to compile an rpm without rdma support, compile like this...
-# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without rdma
-%{?_without_rdma:%global _without_rdma --disable-ibverbs}
+# uncomment and add '%' to use the prereltag for pre-releases
+# %%global prereltag qa3
-# No RDMA Support on x390(x)
-%ifarch s390 s390x
-%global _without_rdma --disable-ibverbs
+##-----------------------------------------------------------------------------
+## All argument definitions should be placed here and keep them sorted
+##
+
+# asan
+# if you wish to compile an rpm with address sanitizer...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with asan
+%{?_with_asan:%global _with_asan --enable-asan}
+
+%if ( 0%{?rhel} && 0%{?rhel} < 7 )
+%global _with_asan %{nil}
%endif
+# cmocka
+# if you wish to compile an rpm with cmocka unit testing...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with cmocka
+%{?_with_cmocka:%global _with_cmocka --enable-cmocka}
+
+# debug
+# if you wish to compile an rpm with debugging...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with debug
+%{?_with_debug:%global _with_debug --enable-debug}
+
+# epoll
# if you wish to compile an rpm without epoll...
# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without epoll
%{?_without_epoll:%global _without_epoll --disable-epoll}
-# if you wish to compile an rpm with fusermount...
-# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with fusermount
-%{?_with_fusermount:%global _with_fusermount --enable-fusermount}
-
-%global version @PACKAGE_VERSION@
-%if "%{version}" >= "3.2"
-%global _can_georeplicate 1
+# fusermount
+# if you wish to compile an rpm without fusermount...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without fusermount
+%{?_without_fusermount:%global _without_fusermount --disable-fusermount}
+# geo-rep
# if you wish to compile an rpm without geo-replication support, compile like this...
# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without georeplication
-%{?_without_georeplication:%global _without_georeplication --disable-geo-replication}
-%endif
-
-Summary: Cluster File System
-Name: @PACKAGE_NAME@
-Version: %{version}
-Release: %{release}
-License: GPLv2 or LGPLv3+
-Group: System Environment/Base
-Vendor: Gluster Inc
-Packager: @PACKAGE_BUGREPORT@
-URL: http://www.gluster.org/docs/index.php/GlusterFS
-Source0: @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz
-BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
-Requires(post): /sbin/chkconfig
-Requires(preun): /sbin/service, /sbin/chkconfig
-Requires(postun): /sbin/service
+%{?_without_georeplication:%global _without_georeplication --disable-georeplication}
+
+# gnfs
+# if you wish to compile an rpm with the legacy gNFS server xlator
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with gnfs
+%{?_with_gnfs:%global _with_gnfs --enable-gnfs}
+
+# ipv6default
+# if you wish to compile an rpm with IPv6 default...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with ipv6default
+%{?_with_ipv6default:%global _with_ipv6default --with-ipv6-default}
+
+# libtirpc
+# if you wish to compile an rpm without TIRPC (i.e. use legacy glibc rpc)
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without libtirpc
+%{?_without_libtirpc:%global _without_libtirpc --without-libtirpc}
+
+# Do not use libtirpc on EL6, it does not have xdr_uint64_t() and xdr_uint32_t
+# Do not use libtirpc on EL7, it does not have xdr_sizeof()
+%if ( 0%{?rhel} && 0%{?rhel} <= 7 )
+%global _without_libtirpc --without-libtirpc
+%endif
+
+
+# ocf
+# if you wish to compile an rpm without the OCF resource agents...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without ocf
+%{?_without_ocf:%global _without_ocf --without-ocf}
+
+# server
+# if you wish to build rpms without server components, compile like this
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without server
+%{?_without_server:%global _without_server --without-server}
+
+# disable server components forcefully as rhel <= 6
+%if ( 0%{?rhel} && 0%{?rhel} <= 6 )
+%global _without_server --without-server
+%endif
+
+# syslog
+# if you wish to build rpms without syslog logging, compile like this
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --without syslog
+%{?_without_syslog:%global _without_syslog --disable-syslog}
+
+# disable syslog forcefully as rhel <= 6 doesn't have rsyslog or rsyslog-mmcount
+# Fedora deprecated syslog, see
+# https://fedoraproject.org/wiki/Changes/NoDefaultSyslog
+# (And what about RHEL7?)
+%if ( 0%{?fedora} && 0%{?fedora} >= 20 ) || ( 0%{?rhel} && 0%{?rhel} <= 6 )
+%global _without_syslog --disable-syslog
+%endif
+
+# tsan
+# if you wish to compile an rpm with thread sanitizer...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with tsan
+%{?_with_tsan:%global _with_tsan --enable-tsan}
+
+%if ( 0%{?rhel} && 0%{?rhel} < 7 )
+%global _with_tsan %{nil}
+%endif
+
+# valgrind
+# if you wish to compile an rpm to run all processes under valgrind...
+# rpmbuild -ta @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz --with valgrind
+%{?_with_valgrind:%global _with_valgrind --enable-valgrind}
+
+##-----------------------------------------------------------------------------
+## All %%global definitions should be placed here and keep them sorted
+##
+
+# selinux booleans whose defalut value needs modification
+# these booleans will be consumed by "%%selinux_set_booleans" macro.
+%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+%global selinuxbooleans rsync_full_access=1 rsync_client=1
+%endif
+
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+%global _with_systemd true
+%endif
+
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 7 )
+%global _with_firewalld --enable-firewalld
+%endif
-BuildRequires: bison flex
-BuildRequires: gcc make automake libtool
-BuildRequires: ncurses-devel readline-devel openssl-devel
-BuildRequires: libxml2-devel
-BuildRequires: python-ctypes
-%if 0%{?suse_version}
-BuildRequires: python-devel
+%if 0%{?_tmpfilesdir:1}
+%global _with_tmpfilesdir --with-tmpfilesdir=%{_tmpfilesdir}
+%else
+%global _with_tmpfilesdir --without-tmpfilesdir
+%endif
+
+# without server should also disable some server-only components
+%if 0%{?_without_server:1}
+%global _without_events --disable-events
+%global _without_georeplication --disable-georeplication
+%global _with_gnfs %{nil}
+%global _without_ocf --without-ocf
+%endif
+
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 7 )
+%global _usepython3 1
+%global _pythonver 3
+%else
+%global _usepython3 0
+%global _pythonver 2
+%endif
+
+# From https://fedoraproject.org/wiki/Packaging:Python#Macros
+%if ( 0%{?rhel} && 0%{?rhel} <= 6 )
+%{!?python2_sitelib: %global python2_sitelib %(python2 -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")}
+%{!?python2_sitearch: %global python2_sitearch %(python2 -c "from distutils.sysconfig import get_python_lib; print(get_python_lib(1))")}
+%global _rundir %{_localstatedir}/run
+%endif
+
+%if ( 0%{?_with_systemd:1} )
+%global service_start() /bin/systemctl --quiet start %1.service || : \
+%{nil}
+%global service_stop() /bin/systemctl --quiet stop %1.service || :\
+%{nil}
+%global service_install() install -D -p -m 0644 %1.service %{buildroot}%2 \
+%{nil}
+# can't seem to make a generic macro that works
+%global glusterd_svcfile %{_unitdir}/glusterd.service
+%global glusterfsd_svcfile %{_unitdir}/glusterfsd.service
+%global glusterta_svcfile %{_unitdir}/gluster-ta-volume.service
+%global glustereventsd_svcfile %{_unitdir}/glustereventsd.service
+%global glusterfssharedstorage_svcfile %{_unitdir}/glusterfssharedstorage.service
+%else
+%global systemd_post() /sbin/chkconfig --add %1 >/dev/null 2>&1 || : \
+%{nil}
+%global systemd_preun() /sbin/chkconfig --del %1 >/dev/null 2>&1 || : \
+%{nil}
+%global systemd_postun_with_restart() /sbin/service %1 condrestart >/dev/null 2>&1 || : \
+%{nil}
+%global service_start() /sbin/service %1 start >/dev/null 2>&1 || : \
+%{nil}
+%global service_stop() /sbin/service %1 stop >/dev/null 2>&1 || : \
+%{nil}
+%global service_install() install -D -p -m 0755 %1.init %{buildroot}%2 \
+%{nil}
+# can't seem to make a generic macro that works
+%global glusterd_svcfile %{_sysconfdir}/init.d/glusterd
+%global glusterfsd_svcfile %{_sysconfdir}/init.d/glusterfsd
+%global glustereventsd_svcfile %{_sysconfdir}/init.d/glustereventsd
+%endif
+
+%{!?_pkgdocdir: %global _pkgdocdir %{_docdir}/%{name}-%{version}}
+
+# We do not want to generate useless provides and requires for xlator
+# .so files to be set for glusterfs packages.
+# Filter all generated:
+#
+# TODO: RHEL5 does not have a convenient solution
+%if ( 0%{?rhel} == 6 )
+# filter_setup exists in RHEL6 only
+%filter_provides_in %{_libdir}/glusterfs/%{version}/
+%global __filter_from_req %{?__filter_from_req} | grep -v -P '^(?!lib).*\.so.*$'
+%filter_setup
+%else
+# modern rpm and current Fedora do not generate requires when the
+# provides are filtered
+%global __provides_exclude_from ^%{_libdir}/glusterfs/%{version}/.*$
+%endif
+
+
+##-----------------------------------------------------------------------------
+## All package definitions should be placed here in alphabetical order
+##
+Summary: Distributed File System
+%if ( 0%{_for_fedora_koji_builds} )
+Name: glusterfs
+Version: 3.8.0
+Release: 0.1%{?prereltag:.%{prereltag}}%{?dist}
+%else
+Name: @PACKAGE_NAME@
+Version: @PACKAGE_VERSION@
+Release: 0.@PACKAGE_RELEASE@%{?dist}
+%endif
+License: GPLv2 or LGPLv3+
+URL: http://docs.gluster.org/
+%if ( 0%{_for_fedora_koji_builds} )
+Source0: http://bits.gluster.org/pub/gluster/glusterfs/src/glusterfs-%{version}%{?prereltag}.tar.gz
+Source1: glusterd.sysconfig
+Source2: glusterfsd.sysconfig
+Source7: glusterfsd.service
+Source8: glusterfsd.init
+%else
+Source0: @PACKAGE_NAME@-@PACKAGE_VERSION@.tar.gz
+%endif
+
+BuildRoot: %(mktemp -ud %{_tmppath}/%{name}-%{version}-%{release}-XXXXXX)
+
+Requires(pre): shadow-utils
+%if ( 0%{?_with_systemd:1} )
+BuildRequires: systemd
+%endif
+
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Requires: libgfrpc0%{?_isa} = %{version}-%{release}
+Requires: libgfxdr0%{?_isa} = %{version}-%{release}
+%if ( 0%{?_with_systemd:1} )
+%{?systemd_requires}
+%endif
+%if 0%{?_with_asan:1} && !( 0%{?rhel} && 0%{?rhel} < 7 )
+BuildRequires: libasan
+%endif
+%if 0%{?_with_tsan:1} && !( 0%{?rhel} && 0%{?rhel} < 7 )
+BuildRequires: libtsan
+%endif
+BuildRequires: bison flex
+BuildRequires: gcc make libtool
+BuildRequires: ncurses-devel readline-devel
+BuildRequires: libxml2-devel openssl-devel
+BuildRequires: libaio-devel libacl-devel
+BuildRequires: python%{_pythonver}-devel
+%if ( 0%{?rhel} && 0%{?rhel} < 8 )
+BuildRequires: python-ctypes
+%endif
+%if ( 0%{?_with_ipv6default:1} ) || ( 0%{!?_without_libtirpc:1} )
+BuildRequires: libtirpc-devel
+%endif
+%if ( 0%{?fedora} && 0%{?fedora} > 27 ) || ( 0%{?rhel} && 0%{?rhel} > 7 )
+BuildRequires: rpcgen
+%endif
+BuildRequires: userspace-rcu-devel >= 0.7
+%if ( 0%{?rhel} && 0%{?rhel} <= 6 )
+BuildRequires: automake
+%endif
+BuildRequires: libuuid-devel
+%if ( 0%{?_with_cmocka:1} )
+BuildRequires: libcmocka-devel >= 1.0.1
+%endif
+%if ( 0%{!?_without_georeplication:1} )
+BuildRequires: libattr-devel
+%endif
+
+%if (0%{?_with_firewalld:1})
+BuildRequires: firewalld
%endif
-Requires: openssl
-Obsoletes: %{name}-libs <= 2.0.0
Obsoletes: %{name}-common < %{version}-%{release}
Obsoletes: %{name}-core < %{version}-%{release}
-Provides: %{name}-libs = %{version}-%{release}
+Obsoletes: %{name}-ganesha
+Obsoletes: %{name}-rdma < %{version}-%{release}
+%if ( 0%{!?_with_gnfs:1} )
+Obsoletes: %{name}-gnfs < %{version}-%{release}
+%endif
Provides: %{name}-common = %{version}-%{release}
Provides: %{name}-core = %{version}-%{release}
%description
-GlusterFS is a clustered file-system capable of scaling to several
-peta-bytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file system in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in userspace and easily manageable.
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package includes the glusterfs binary, the glusterfsd daemon and the
+libglusterfs and glusterfs translator modules common to both GlusterFS server
+and client framework.
+
+%package cli
+Summary: GlusterFS CLI
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Requires: libglusterd0%{?_isa} = %{version}-%{release}
+
+%description cli
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the GlusterFS CLI application and its man page
+
+%package cloudsync-plugins
+Summary: Cloudsync Plugins
+BuildRequires: libcurl-devel
+
+%description cloudsync-plugins
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides cloudsync plugins for archival feature.
+
+%package extra-xlators
+Summary: Extra Gluster filesystem Translators
+# We need python-gluster rpm for gluster module's __init__.py in Python
+# site-packages area
+Requires: python%{_pythonver}-gluster = %{version}-%{release}
+Requires: python%{_pythonver}
+
+%description extra-xlators
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides extra filesystem Translators, such as Glupy,
+for GlusterFS.
+
+%package fuse
+Summary: Fuse client
+BuildRequires: fuse-devel
+Requires: attr
+Requires: psmisc
-This package includes libraries and utility scripts.
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
-%if 0%{!?_without_rdma:1}
-%package rdma
-Summary: GlusterFS rdma support for ib-verbs
-License: GPLv2 or LGPLv3+
-Group: Applications/File
-BuildRequires: libibverbs-devel
+Obsoletes: %{name}-client < %{version}-%{release}
+Provides: %{name}-client = %{version}-%{release}
+
+%description fuse
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides support to FUSE based clients and inlcudes the
+glusterfs(d) binary.
+
+%if ( 0%{!?_without_server:1} )
+%package ganesha
+Summary: NFS-Ganesha configuration
+Group: Applications/File
+
+Requires: %{name}-server%{?_isa} = %{version}-%{release}
+Requires: nfs-ganesha-selinux >= 2.7.6
+Requires: nfs-ganesha-gluster >= 2.7.6
+Requires: pcs >= 0.10.0
+Requires: resource-agents >= 4.2.0
+Requires: dbus
+
+%if ( 0%{?rhel} && 0%{?rhel} == 6 )
+Requires: cman, pacemaker, corosync
+%endif
+
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} > 5 )
+# we need portblock resource-agent in 3.9.5 and later.
+Requires: net-tools
+%endif
-Requires: %{name} = %{version}-%{release}
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%if ( 0%{?rhel} && 0%{?rhel} < 8 )
+Requires: selinux-policy >= 3.13.1-160
+Requires(post): policycoreutils-python
+Requires(postun): policycoreutils-python
+%else
+Requires(post): policycoreutils-python-utils
+Requires(postun): policycoreutils-python-utils
+%endif
+%endif
-%description rdma
-GlusterFS is a clustered file-system capable of scaling to several
-peta-bytes. It aggregates various storage bricks over Infiniband RDMA
+%description ganesha
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over Infiniband RDMA
or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file system in
+system. GlusterFS is one of the most sophisticated file systems in
terms of features and extensibility. It borrows a powerful concept
called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in userspace and easily manageable.
+is in user space and easily manageable.
-This package provides support to ib-verbs library.
+This package provides the configuration and related files for using
+NFS-Ganesha as the NFS server using GlusterFS
%endif
-%if 0%{?_can_georeplicate}
-%if 0%{!?_without_georeplication:1}
+%if ( 0%{!?_without_georeplication:1} )
%package geo-replication
-Summary: GlusterFS Geo-replication
-License: GPLv3+
-Group: Applications/File
-Requires: %{name} = %{version}-%{release} , python-ctypes , rsync >= 3.0.0
+Summary: GlusterFS Geo-replication
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-server%{?_isa} = %{version}-%{release}
+Requires: python%{_pythonver}
+Requires: python%{_pythonver}-prettytable
+Requires: python%{_pythonver}-gluster = %{version}-%{release}
+
+Requires: rsync
+Requires: util-linux
+# required for setting selinux bools
+%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+Requires(post): policycoreutils-python-utils
+Requires(postun): policycoreutils-python-utils
+Requires: selinux-policy-targeted
+Requires(post): selinux-policy-targeted
+BuildRequires: selinux-policy-devel
+%endif
%description geo-replication
-GlusterFS is a clustered file-system capable of scaling to several
-peta-bytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file system in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in userspace and easily manageable.
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
This package provides support to geo-replication.
%endif
-%endif
-
-%package fuse
-Summary: GlusterFS Fuse client
-License: GPLv2 or LGPLv3+
-Group: Applications/File
-Requires: %{name} >= %{version}-%{release}
+%if ( 0%{?_with_gnfs:1} )
+%package gnfs
+Summary: GlusterFS gNFS server
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
+Requires: nfs-utils
+
+%description gnfs
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the glusterfs legacy gNFS server xlator
+%endif
-Obsoletes: %{name}-client < %{version}-%{release}
-Provides: %{name}-client = %{version}-%{release}
+%package -n libglusterfs0
+Summary: GlusterFS libglusterfs library
+Requires: libgfrpc0%{?_isa} = %{version}-%{release}
+Requires: libgfxdr0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-libs <= %{version}-%{release}
+Provides: %{name}-libs = %{version}-%{release}
-%description fuse
-GlusterFS is a clustered file-system capable of scaling to several
-peta-bytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file system in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in userspace and easily manageable.
+%description -n libglusterfs0
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the base libglusterfs library
+
+%package -n libglusterfs-devel
+Summary: GlusterFS libglusterfs library
+Requires: libgfrpc-devel%{?_isa} = %{version}-%{release}
+Requires: libgfxdr-devel%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-devel <= %{version}-%{release}
+Provides: %{name}-devel = %{version}-%{release}
+
+%description -n libglusterfs-devel
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides libglusterfs.so and the gluster C header files.
+
+%package -n libgfapi0
+Summary: GlusterFS api library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-api <= %{version}-%{release}
+Provides: %{name}-api = %{version}-%{release}
+
+%description -n libgfapi0
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the glusterfs libgfapi library.
+
+%package -n libgfapi-devel
+Summary: Development Libraries
+Requires: libglusterfs-devel%{?_isa} = %{version}-%{release}
+Requires: libacl-devel
+Obsoletes: %{name}-api-devel <= %{version}-%{release}
+Provides: %{name}-api-devel = %{version}-%{release}
+
+%description -n libgfapi-devel
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides libgfapi.so and the api C header files.
+
+%package -n libgfchangelog0
+Summary: GlusterFS libchangelog library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-libs <= %{version}-%{release}
+
+%description -n libgfchangelog0
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the libgfchangelog library
+
+%package -n libgfchangelog-devel
+Summary: GlusterFS libchangelog library
+Requires: libglusterfs-devel%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-devel <= %{version}-%{release}
+
+%description -n libgfchangelog-devel
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides libgfchangelog.so and changelog C header files.
+
+%package -n libgfrpc0
+Summary: GlusterFS libgfrpc0 library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-libs <= %{version}-%{release}
+
+%description -n libgfrpc0
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the libgfrpc library
+
+%package -n libgfrpc-devel
+Summary: GlusterFS libgfrpc library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-devel <= %{version}-%{release}
+
+%description -n libgfrpc-devel
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides libgfrpc.so and rpc C header files.
+
+%package -n libgfxdr0
+Summary: GlusterFS libgfxdr0 library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-libs <= %{version}-%{release}
+
+%description -n libgfxdr0
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the libgfxdr library
+
+%package -n libgfxdr-devel
+Summary: GlusterFS libgfxdr library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-devel <= %{version}-%{release}
+
+%description -n libgfxdr-devel
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides libgfxdr.so.
+
+%package -n libglusterd0
+Summary: GlusterFS libglusterd library
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Obsoletes: %{name}-libs <= %{version}-%{release}
+
+%description -n libglusterd0
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the libglusterd library
+
+%package -n python%{_pythonver}-gluster
+Summary: GlusterFS python library
+Requires: python%{_pythonver}
+%if ( ! %{_usepython3} )
+%{?python_provide:%python_provide python-gluster}
+Provides: python-gluster = %{version}-%{release}
+Obsoletes: python-gluster < 3.10
+%endif
-This package provides support to FUSE based clients.
+%description -n python%{_pythonver}-gluster
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package contains the python modules of GlusterFS and own gluster
+namespace.
+
+%package regression-tests
+Summary: Development Tools
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-fuse%{?_isa} = %{version}-%{release}
+Requires: %{name}-server%{?_isa} = %{version}-%{release}
+## thin provisioning support
+Requires: lvm2 >= 2.02.89
+Requires: perl(App::Prove) perl(Test::Harness) gcc util-linux-ng
+Requires: python%{_pythonver}
+Requires: attr dbench file git libacl-devel net-tools
+Requires: nfs-utils xfsprogs yajl psmisc bc
+
+%description regression-tests
+The Gluster Test Framework, is a suite of scripts used for
+regression testing of Gluster.
+
+%if ( 0%{!?_without_ocf:1} )
+%package resource-agents
+Summary: OCF Resource Agents for GlusterFS
+License: GPLv3+
+BuildArch: noarch
+# this Group handling comes from the Fedora resource-agents package
+# for glusterd
+Requires: %{name}-server = %{version}-%{release}
+# depending on the distribution, we need pacemaker or resource-agents
+Requires: %{_prefix}/lib/ocf/resource.d
+
+%description resource-agents
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the resource agents which plug glusterd into
+Open Cluster Framework (OCF) compliant cluster resource managers,
+like Pacemaker.
+%endif
+%if ( 0%{!?_without_server:1} )
%package server
Summary: Clustered file-system server
-License: GPLv3+
-Group: System Environment/Daemons
-Requires: %{name} = %{version}-%{release}
-Requires: %{name}-fuse = %{version}-%{release}
-Requires: openssl
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-cli%{?_isa} = %{version}-%{release}
+Requires: libglusterfs0%{?_isa} = %{version}-%{release}
+Requires: libgfchangelog0%{?_isa} = %{version}-%{release}
+%if ( 0%{?fedora} && 0%{?fedora} >= 30 || ( 0%{?rhel} && 0%{?rhel} >= 8 ) )
+Requires: glusterfs-selinux >= 0.1.0-2
+%endif
+# some daemons (like quota) use a fuse-mount, glusterfsd is part of -fuse
+Requires: %{name}-fuse%{?_isa} = %{version}-%{release}
+# self-heal daemon, rebalance, nfs-server etc. are actually clients
+Requires: libgfapi0%{?_isa} = %{version}-%{release}
+Requires: %{name}-client-xlators%{?_isa} = %{version}-%{release}
+# lvm2 for snapshot, and nfs-utils and rpcbind/portmap for gnfs server
+Requires: lvm2
+%if ( 0%{?_with_systemd:1} )
+%{?systemd_requires}
+%else
+Requires(post): /sbin/chkconfig
+Requires(preun): /sbin/service
+Requires(preun): /sbin/chkconfig
+Requires(postun): /sbin/service
+%endif
+%if (0%{?_with_firewalld:1})
+# we install firewalld rules, so we need to have the directory owned
+%if ( 0%{!?rhel} )
+# not on RHEL because firewalld-filesystem appeared in 7.3
+# when EL7 rpm gets weak dependencies we can add a Suggests:
+Requires: firewalld-filesystem
+%endif
+%endif
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+Requires: rpcbind
+%else
+Requires: portmap
+%endif
+%if ( 0%{?rhel} && 0%{?rhel} <= 6 )
+Requires: python-argparse
+%endif
+%if ( 0%{?fedora} && 0%{?fedora} > 27 ) || ( 0%{?rhel} && 0%{?rhel} > 7 )
+Requires: python%{_pythonver}-pyxattr
+%else
+Requires: pyxattr
+%endif
+%if (0%{?_with_valgrind:1})
+Requires: valgrind
+%endif
%description server
-GlusterFS is a clustered file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
-
-This package provides the glusterfs server daemon and translators that
-are loaded on the server.
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the glusterfs server daemon.
+%endif
-%package devel
-Summary: Development Libraries
-License: GPLv2 or LGPLv3+
-Group: Development/Libraries
-Requires: %{name} = %{version}-%{release}
+%package thin-arbiter
+Summary: GlusterFS thin-arbiter module
+Requires: %{name}%{?_isa} = %{version}-%{release}
+Requires: %{name}-server%{?_isa} = %{version}-%{release}
+
+%description thin-arbiter
+This package provides a tie-breaker functionality to GlusterFS
+replicate volume. It includes translators required to provide the
+functionality, and also few other scripts required for getting the setup done.
+
+This package provides the glusterfs thin-arbiter translator.
+
+%package client-xlators
+Summary: GlusterFS client-side translators
+
+%description client-xlators
+GlusterFS is a distributed file-system capable of scaling to several
+petabytes. It aggregates various storage bricks over TCP/IP interconnect
+into one large parallel network filesystem. GlusterFS is one of the
+most sophisticated file systems in terms of features and extensibility.
+It borrows a powerful concept called Translators from GNU Hurd kernel.
+Much of the code in GlusterFS is in user space and easily manageable.
+
+This package provides the translators needed on any GlusterFS client.
+
+%if ( 0%{!?_without_events:1} )
+%package events
+Summary: GlusterFS Events
+Requires: %{name}-server%{?_isa} = %{version}-%{release}
+Requires: python%{_pythonver} python%{_pythonver}-prettytable
+Requires: python%{_pythonver}-gluster = %{version}-%{release}
+%if ( 0%{?rhel} && 0%{?rhel} < 8 )
+Requires: python-requests
+%else
+Requires: python%{_pythonver}-requests
+%endif
+%if ( 0%{?rhel} && 0%{?rhel} < 7 )
+Requires: python-argparse
+%endif
+%if ( 0%{?_with_systemd:1} )
+%{?systemd_requires}
+%endif
-%description devel
-GlusterFS is a clustered file-system capable of scaling to several
-petabytes. It aggregates various storage bricks over Infiniband RDMA
-or TCP/IP interconnect into one large parallel network file
-system. GlusterFS is one of the most sophisticated file systems in
-terms of features and extensibility. It borrows a powerful concept
-called Translators from GNU Hurd kernel. Much of the code in GlusterFS
-is in user space and easily manageable.
+%description events
+GlusterFS Events
-This package provides the development libraries.
+%endif
%prep
-%setup -q -n %{name}-%{version}
+%setup -q -n %{name}-%{version}%{?prereltag}
+%if ( ! %{_usepython3} )
+echo "fixing python shebangs..."
+for f in api events extras geo-replication libglusterfs tools xlators; do
+find $f -type f -exec sed -i 's|/usr/bin/python3|/usr/bin/python2|' {} \;
+done
+%endif
%build
+
+# RHEL6 and earlier need to manually replace config.guess and config.sub
+%if ( 0%{?rhel} && 0%{?rhel} <= 6 )
./autogen.sh
-%configure %{?_without_rdma} %{?_without_epoll} %{?_with_fusermount} %{?_without_georeplication}
+%endif
-# Remove rpath
-sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|g' libtool
-sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|g' libtool
+%configure \
+ %{?_with_asan} \
+ %{?_with_cmocka} \
+ %{?_with_debug} \
+ %{?_with_firewalld} \
+ %{?_with_gnfs} \
+ %{?_with_tmpfilesdir} \
+ %{?_with_tsan} \
+ %{?_with_valgrind} \
+ %{?_without_epoll} \
+ %{?_without_events} \
+ %{?_without_fusermount} \
+ %{?_without_georeplication} \
+ %{?_without_ocf} \
+ %{?_without_server} \
+ %{?_without_syslog} \
+ %{?_with_ipv6default} \
+ %{?_without_libtirpc}
+
+# fix hardening and remove rpath in shlibs
+%if ( 0%{?fedora} && 0%{?fedora} > 17 ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+sed -i 's| \\\$compiler_flags |&\\\$LDFLAGS |' libtool
+%endif
+sed -i 's|^hardcode_libdir_flag_spec=.*|hardcode_libdir_flag_spec=""|' libtool
+sed -i 's|^runpath_var=LD_RUN_PATH|runpath_var=DIE_RPATH_DIE|' libtool
-%{__make} %{?_smp_mflags}
+make %{?_smp_mflags}
+%check
+make check
%install
-%{__rm} -rf %{buildroot}
-%{__make} install DESTDIR=%{buildroot}
-# Install include directory
-%{__mkdir_p} %{buildroot}%{_includedir}/glusterfs
-%{__install} -p -m 0644 libglusterfs/src/*.h \
- %{buildroot}%{_includedir}/glusterfs/
-%{__install} -p -m 0644 contrib/uuid/*.h \
- %{buildroot}%{_includedir}/glusterfs/
-# Following needed by hekafs multi-tenant translator
-%{__mkdir_p} %{buildroot}%{_includedir}/glusterfs/rpc
-%{__install} -p -m 0644 rpc/rpc-lib/src/*.h \
- %{buildroot}%{_includedir}/glusterfs/rpc/
-%{__install} -p -m 0644 rpc/xdr/src/*.h \
- %{buildroot}%{_includedir}/glusterfs/rpc/
-%{__mkdir_p} %{buildroot}%{_includedir}/glusterfs/server
-%{__install} -p -m 0644 xlators/protocol/server/src/*.h \
- %{buildroot}%{_includedir}/glusterfs/server/
+rm -rf %{buildroot}
+make install DESTDIR=%{buildroot}
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{_for_fedora_koji_builds} )
+install -D -p -m 0644 %{SOURCE1} \
+ %{buildroot}%{_sysconfdir}/sysconfig/glusterd
+install -D -p -m 0644 %{SOURCE2} \
+ %{buildroot}%{_sysconfdir}/sysconfig/glusterfsd
+%else
+install -D -p -m 0644 extras/glusterd-sysconfig \
+ %{buildroot}%{_sysconfdir}/sysconfig/glusterd
+%endif
+%endif
+mkdir -p %{buildroot}%{_localstatedir}/log/glusterd
+mkdir -p %{buildroot}%{_localstatedir}/log/glusterfs
+mkdir -p %{buildroot}%{_localstatedir}/log/glusterfsd
+mkdir -p %{buildroot}%{_rundir}/gluster
# Remove unwanted files from all the shared libraries
find %{buildroot}%{_libdir} -name '*.a' -delete
find %{buildroot}%{_libdir} -name '*.la' -delete
-# Remove installed docs, we include them ourselves as %%doc
-%{__rm} -rf %{buildroot}%{_datadir}/doc/glusterfs/
+# Remove installed docs, the ones we want are included by %%doc, in
+# /usr/share/doc/glusterfs or /usr/share/doc/glusterfs-x.y.z depending
+# on the distribution
+%if ( 0%{?fedora} && 0%{?fedora} > 19 ) || ( 0%{?rhel} && 0%{?rhel} > 6 )
+rm -rf %{buildroot}%{_pkgdocdir}/*
+%else
+rm -rf %{buildroot}%{_defaultdocdir}/%{name}
+mkdir -p %{buildroot}%{_pkgdocdir}
+%endif
+head -50 ChangeLog > ChangeLog.head && mv ChangeLog.head ChangeLog
+cat << EOM >> ChangeLog
+
+More commit messages for this ChangeLog can be found at
+https://forge.gluster.org/glusterfs-core/glusterfs/commits/v%{version}%{?prereltag}
+EOM
-# Rename the samples, so we can include them as %%config
-#for file in %{buildroot}%{_sysconfdir}/glusterfs/*.sample; do
-# %{__mv} ${file} `dirname ${file}`/`basename ${file} .sample`
-#done
+# Remove benchmarking and other unpackaged files
+# make install always puts these in %%{_defaultdocdir}/%%{name} so don't
+# use %%{_pkgdocdir}; that will be wrong on later Fedora distributions
+rm -rf %{buildroot}%{_defaultdocdir}/%{name}/benchmarking
+rm -f %{buildroot}%{_defaultdocdir}/%{name}/glusterfs-mode.el
+rm -f %{buildroot}%{_defaultdocdir}/%{name}/glusterfs.vim
+%if ( 0%{!?_without_server:1} )
# Create working directory
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd
# Update configuration file to /var/lib working directory
sed -i 's|option working-directory /etc/glusterd|option working-directory %{_sharedstatedir}/glusterd|g' \
%{buildroot}%{_sysconfdir}/glusterfs/glusterd.vol
+%endif
-# Following needed by the hooks interface
-%{__mkdir_p} %{buildroot}%{_sharedstatedir}/glusterd/hook-scripts
-%{__install} -p -m 0644 extras/hook-scripts/*.sh \
- %{buildroot}%{_sharedstatedir}/glusterd/hook-scripts/
+# Install glusterfsd .service or init.d file
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{_for_fedora_koji_builds} )
+%service_install glusterfsd %{glusterfsd_svcfile}
+%endif
+%endif
-# Clean up the examples we want to include as %%doc
-#%{__cp} -a doc/examples examples
-#%{__rm} -f examples/Makefile*
+install -D -p -m 0644 extras/glusterfs-logrotate \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs
-%clean
-%{__rm} -rf %{buildroot}
+# ganesha ghosts
+%if ( 0%{!?_without_server:1} )
+mkdir -p %{buildroot}%{_sysconfdir}/ganesha
+touch %{buildroot}%{_sysconfdir}/ganesha/ganesha-ha.conf
+mkdir -p %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/
+touch %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha.conf
+touch %{buildroot}%{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha-ha.conf
+%endif
-%post
-/sbin/ldconfig
+%if ( 0%{!?_without_georeplication:1} )
+# geo-rep ghosts
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/geo-replication
+touch %{buildroot}%{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf
+install -D -p -m 0644 extras/glusterfs-georep-logrotate \
+ %{buildroot}%{_sysconfdir}/logrotate.d/glusterfs-georep
+%endif
-# Copy the 'glusterfs-logrotate' file at the right place
-if [ -d /etc/logrotate.d ]; then
- cp %{_docdir}/%{name}-%{version}/glusterfs-logrotate /etc/logrotate.d/glusterfs
-fi
+%if ( 0%{!?_without_server:1} )
+# the rest of the ghosts
+touch %{buildroot}%{_sharedstatedir}/glusterd/glusterd.info
+touch %{buildroot}%{_sharedstatedir}/glusterd/options
+subdirs=(add-brick create copy-file delete gsync-create remove-brick reset set start stop)
+for dir in ${subdirs[@]}; do
+ mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/hooks/1/"$dir"/{pre,post}
+done
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/glustershd
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/peers
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/vols
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/nfs/run
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/bitd
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/quotad
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/scrub
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/snaps
+mkdir -p %{buildroot}%{_sharedstatedir}/glusterd/ss_brick
+touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/nfs-server.vol
+touch %{buildroot}%{_sharedstatedir}/glusterd/nfs/run/nfs.pid
+%endif
-%postun
-/sbin/ldconfig
+find ./tests ./run-tests.sh -type f | cpio -pd %{buildroot}%{_prefix}/share/glusterfs
-%files
-%defattr(-,root,root)
-%doc AUTHORS ChangeLog COPYING-GPLV2 COPYING-LGPLV3 INSTALL NEWS README THANKS
-%doc extras/glusterfs-logrotate
-%{_libdir}/glusterfs
-%{_libdir}/*.so.*
-%{_sbindir}/glusterfs*
-%{_mandir}/man8/*gluster*.8*
-%dir %{_localstatedir}/log/glusterfs
-%if 0%{!?_without_rdma:1}
-%exclude %{_libdir}/glusterfs/%{version}/rpc-transport/rdma*
-%endif
-%exclude %{_libdir}/glusterfs/%{version}/xlator/mount/fuse*
-%exclude %{_libdir}/glusterfs/%{version}/xlator/storage*
-%exclude %{_libdir}/glusterfs/%{version}/xlator/features/posix*
-%exclude %{_libdir}/glusterfs/%{version}/xlator/protocol/server*
-%exclude %{_libdir}/glusterfs/%{version}/xlator/mgmt*
-%exclude %{_libdir}/glusterfs/%{version}/xlator/nfs*
+## Install bash completion for cli
+install -p -m 0744 -D extras/command-completion/gluster.bash \
+ %{buildroot}%{_sysconfdir}/bash_completion.d/gluster
+
+%clean
+rm -rf %{buildroot}
-%if 0%{!?_without_rdma:1}
-%files rdma
-%defattr(-,root,root)
-%{_libdir}/glusterfs/%{version}/rpc-transport/rdma*
+##-----------------------------------------------------------------------------
+## All %%post should be placed here and keep them sorted
+##
+%post
+/sbin/ldconfig
+%if ( 0%{!?_without_syslog:1} )
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+%systemd_postun_with_restart rsyslog
+%endif
%endif
+exit 0
-%if 0%{?_can_georeplicate}
-%if 0%{!?_without_georeplication:1}
-%post geo-replication
-#restart glusterd.
-%{_sysconfdir}/init.d/glusterd restart &> /dev/null
+%if ( 0%{!?_without_events:1} )
+%post events
+%systemd_post glustereventsd
%endif
-%if 0%{!?_without_georeplication:1}
-%files geo-replication
-%defattr(-,root,root)
-%{_libexecdir}/glusterfs/gsyncd
-%{_libexecdir}/glusterfs/python/syncdaemon/*
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%post ganesha
+semanage boolean -m ganesha_use_fusefs --on
+exit 0
%endif
%endif
-%files fuse
-%defattr(-,root,root)
-%{_libdir}/glusterfs/%{version}/xlator/mount/fuse*
-%{_mandir}/man8/mount.glusterfs.8*
-/sbin/mount.glusterfs
-%if 0%{?_with_fusermount:1}
-%{_bindir}/fusermount-glusterfs
+%if ( 0%{!?_without_georeplication:1} )
+%post geo-replication
+%if ( 0%{?rhel} && 0%{?rhel} >= 8 )
+%selinux_set_booleans %{selinuxbooleans}
+%endif
+if [ $1 -ge 1 ]; then
+ %systemd_postun_with_restart glusterd
+fi
+exit 0
%endif
+%post -n libglusterfs0
+/sbin/ldconfig
+
+%post -n libgfapi0
+/sbin/ldconfig
+
+%post -n libgfchangelog0
+/sbin/ldconfig
+
+%post -n libgfrpc0
+/sbin/ldconfig
+
+%post -n libgfxdr0
+/sbin/ldconfig
+
+%post -n libglusterd0
+/sbin/ldconfig
+
+%if ( 0%{!?_without_server:1} )
%post server
-/sbin/chkconfig --add glusterd
+# Legacy server
+%systemd_post glusterd
+%if ( 0%{_for_fedora_koji_builds} )
+%systemd_post glusterfsd
+%endif
+# ".cmd_log_history" is renamed to "cmd_history.log" in GlusterFS-3.7 .
+# While upgrading glusterfs-server package form GlusterFS version <= 3.6 to
+# GlusterFS version 3.7, ".cmd_log_history" should be renamed to
+# "cmd_history.log" to retain cli command history contents.
+if [ -f %{_localstatedir}/log/glusterfs/.cmd_log_history ]; then
+ mv %{_localstatedir}/log/glusterfs/.cmd_log_history \
+ %{_localstatedir}/log/glusterfs/cmd_history.log
+fi
-# Move legacy sysconf files to the correct sysconfdir
-if [ -d /etc/glusterd ]; then
- mkdir -p /var/lib
- mv /etc/glusterd /var/lib/
- ln -sf /var/lib/glusterd /etc/glusterd
+# Genuine Fedora (and EPEL) builds never put gluster files in /etc; if
+# there are any files in /etc from a prior gluster.org install, move them
+# to /var/lib. (N.B. Starting with 3.3.0 all gluster files are in /var/lib
+# in gluster.org RPMs.) Be careful to copy them on the off chance that
+# /etc and /var/lib are on separate file systems
+if [ -d /etc/glusterd -a ! -h %{_sharedstatedir}/glusterd ]; then
+ mkdir -p %{_sharedstatedir}/glusterd
+ cp -a /etc/glusterd %{_sharedstatedir}/glusterd
+ rm -rf /etc/glusterd
+ ln -sf %{_sharedstatedir}/glusterd /etc/glusterd
fi
-if [ -d /var/lib/glusterd/vols ]; then
- # Rename old volfiles in an RPM-standard way. These aren't actually
- # considered package config files, so %config doesn't work for them.
- for file in $(find /var/lib/glusterd/vols -name '*.vol'); do
+# Rename old volfiles in an RPM-standard way. These aren't actually
+# considered package config files, so %%config doesn't work for them.
+if [ -d %{_sharedstatedir}/glusterd/vols ]; then
+ for file in $(find %{_sharedstatedir}/glusterd/vols -name '*.vol'); do
newfile=${file}.rpmsave
echo "warning: ${file} saved as ${newfile}"
cp ${file} ${newfile}
done
fi
+# add marker translator
+# but first make certain that there are no old libs around to bite us
+# BZ 834847
+if [ -e /etc/ld.so.conf.d/glusterfs.conf ]; then
+ rm -f /etc/ld.so.conf.d/glusterfs.conf
+ /sbin/ldconfig
+fi
+
+%if (0%{?_with_firewalld:1})
+ %firewalld_reload
+%endif
+
pidof -c -o %PPID -x glusterd &> /dev/null
if [ $? -eq 0 ]; then
kill -9 `pgrep -f gsyncd.py` &> /dev/null
- killall glusterd &> /dev/null
- #add marker translator
+ killall --wait glusterd &> /dev/null
glusterd --xlator-option *.upgrade=on -N
- glusterd
+
+ #Cleaning leftover glusterd socket file which is created by glusterd in
+ #rpm_script_t context.
+ rm -f %{_rundir}/glusterd.socket
+
+ # glusterd _was_ running, we killed it, it exited after *.upgrade=on,
+ # so start it again
+ %service_start glusterd
else
glusterd --xlator-option *.upgrade=on -N
+
+ #Cleaning leftover glusterd socket file which is created by glusterd in
+ #rpm_script_t context.
+ rm -f %{_rundir}/glusterd.socket
+fi
+exit 0
+%endif
+
+##-----------------------------------------------------------------------------
+## All %%pre should be placed here and keep them sorted
+##
+%pre
+getent group gluster > /dev/null || groupadd -r gluster
+getent passwd gluster > /dev/null || useradd -r -g gluster -d %{_rundir}/gluster -s /sbin/nologin -c "GlusterFS daemons" gluster
+exit 0
+
+##-----------------------------------------------------------------------------
+## All %%preun should be placed here and keep them sorted
+##
+%if ( 0%{!?_without_events:1} )
+%preun events
+if [ $1 -eq 0 ]; then
+ if [ -f %glustereventsd_svcfile ]; then
+ %service_stop glustereventsd
+ %systemd_preun glustereventsd
+ fi
fi
+exit 0
+%endif
+%if ( 0%{!?_without_server:1} )
%preun server
if [ $1 -eq 0 ]; then
- /sbin/service glusterd stop &>/dev/null || :
- /sbin/chkconfig --del glusterd
+ if [ -f %glusterfsd_svcfile ]; then
+ %service_stop glusterfsd
+ fi
+ %service_stop glusterd
+ if [ -f %glusterfsd_svcfile ]; then
+ %systemd_preun glusterfsd
+ fi
+ %systemd_preun glusterd
fi
if [ $1 -ge 1 ]; then
- /sbin/service glusterd condrestart &>/dev/null || :
+ if [ -f %glusterfsd_svcfile ]; then
+ %systemd_postun_with_restart glusterfsd
+ fi
+ %systemd_postun_with_restart glusterd
fi
+exit 0
+%endif
-# Legacy server
+%preun thin-arbiter
if [ $1 -eq 0 ]; then
- /sbin/service glusterfsd stop &>/dev/null || :
- /sbin/chkconfig --del glusterfsd
-fi
-if [ $1 -ge 1 ]; then
- /sbin/service glusterfsd condrestart &>/dev/null || :
+ if [ -f %glusterta_svcfile ]; then
+ %service_stop gluster-ta-volume
+ %systemd_preun gluster-ta-volume
+ fi
fi
+##-----------------------------------------------------------------------------
+## All %%postun should be placed here and keep them sorted
+##
+%postun
+%if ( 0%{!?_without_syslog:1} )
+%if ( 0%{?fedora} ) || ( 0%{?rhel} && 0%{?rhel} >= 6 )
+%systemd_postun_with_restart rsyslog
+%endif
+%endif
+
+%if ( 0%{!?_without_server:1} )
+%postun server
+%if (0%{?_with_firewalld:1})
+ %firewalld_reload
+%endif
+exit 0
+%endif
+
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%postun ganesha
+semanage boolean -m ganesha_use_fusefs --off
+exit 0
+%endif
+%endif
+
+##-----------------------------------------------------------------------------
+## All %%trigger should be placed here and keep them sorted
+##
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%trigger ganesha -- selinux-policy-targeted
+semanage boolean -m ganesha_use_fusefs --on
+exit 0
+%endif
+%endif
+
+##-----------------------------------------------------------------------------
+## All %%triggerun should be placed here and keep them sorted
+##
+%if ( 0%{!?_without_server:1} )
+%if ( 0%{?fedora} && 0%{?fedora} > 25 || ( 0%{?rhel} && 0%{?rhel} > 6 ) )
+%triggerun ganesha -- selinux-policy-targeted
+semanage boolean -m ganesha_use_fusefs --off
+exit 0
+%endif
+%endif
+
+##-----------------------------------------------------------------------------
+## All %%files should be placed here and keep them grouped
+##
+%files
+%doc ChangeLog COPYING-GPLV2 COPYING-LGPLV3 INSTALL README.md THANKS COMMITMENT
+%{_mandir}/man8/*gluster*.8*
+%if ( 0%{!?_without_server:1} )
+%exclude %{_mandir}/man8/gluster.8*
+%endif
+%dir %{_localstatedir}/log/glusterfs
+%if 0%{?!_without_server:1}
+%dir %{_datadir}/glusterfs
+%dir %{_datadir}/glusterfs/scripts
+ %{_datadir}/glusterfs/scripts/post-upgrade-script-for-quota.sh
+ %{_datadir}/glusterfs/scripts/pre-upgrade-script-for-quota.sh
+%endif
+# xlators that are needed on the client- and on the server-side
+%dir %{_libdir}/glusterfs
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/auth
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/auth/addr.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/auth/login.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/rpc-transport/socket.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/error-gen.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/delay-gen.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/io-stats.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/sink.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/debug/trace.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/access-control.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/barrier.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/cdc.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/changelog.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/utime.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/gfid-access.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/namespace.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/read-only.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/shard.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/snapview-client.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/worm.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/cloudsync.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/meta.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/io-cache.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/io-threads.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/md-cache.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/open-behind.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/quick-read.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/read-ahead.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/readdir-ahead.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/stat-prefetch.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/write-behind.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/performance/nl-cache.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/system
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/system/posix-acl.so
+%dir %attr(0775,gluster,gluster) %{_rundir}/gluster
+%if 0%{?_tmpfilesdir:1} && 0%{!?_without_server:1}
+%{_tmpfilesdir}/gluster.conf
+%endif
+
+%if ( 0%{?_without_server:1} )
+#exclude ganesha related files
+%exclude %{_sysconfdir}/ganesha/ganesha-ha.conf.sample
+%exclude %{_libexecdir}/ganesha/*
+%exclude %{_prefix}/lib/ocf/resource.d/heartbeat/*
+%endif
+
+%files cli
+%{_sbindir}/gluster
+%{_mandir}/man8/gluster.8*
+%{_sysconfdir}/bash_completion.d/gluster
+
+%files cloudsync-plugins
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins/cloudsyncs3.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/cloudsync-plugins/cloudsynccvlt.so
+
+%files -n libglusterfs-devel
+%dir %{_includedir}/glusterfs
+ %{_includedir}/glusterfs/*.h
+ %{_includedir}/glusterfs/server/*.h
+%{_libdir}/libglusterfs.so
+
+%files -n libgfapi-devel
+%dir %{_includedir}/glusterfs/api
+ %{_includedir}/glusterfs/api/*.h
+%{_libdir}/libgfapi.so
+%{_libdir}/pkgconfig/glusterfs-api.pc
+
+
+%files -n libgfchangelog-devel
+%dir %{_includedir}/glusterfs/gfchangelog
+ %{_includedir}/glusterfs/gfchangelog/*.h
+%{_libdir}/libgfchangelog.so
+%{_libdir}/pkgconfig/libgfchangelog.pc
+
+%files -n libgfrpc-devel
+%dir %{_includedir}/glusterfs/rpc
+ %{_includedir}/glusterfs/rpc/*.h
+%{_libdir}/libgfrpc.so
+
+%files -n libgfxdr-devel
+%{_libdir}/libgfxdr.so
+
+%files client-xlators
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/cluster
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/cluster/*.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/client.so
+
+%files extra-xlators
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quiesce.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/playground/template.so
+
+%files fuse
+# glusterfs is a symlink to glusterfsd, -server depends on -fuse.
+%{_sbindir}/glusterfs
+%{_sbindir}/glusterfsd
+%config(noreplace) %{_sysconfdir}/logrotate.d/glusterfs
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/fuse.so
+/sbin/mount.glusterfs
+%if ( 0%{!?_without_fusermount:1} )
+%{_bindir}/fusermount-glusterfs
+%endif
+
+%if ( 0%{?_with_gnfs:1} && 0%{!?_without_server:1} )
+%files gnfs
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs/server.so
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs
+%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/nfs-server.vol
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/nfs/run
+%ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/nfs/run/nfs.pid
+%endif
+
+%files thin-arbiter
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/thin-arbiter.so
+%dir %{_datadir}/glusterfs/scripts
+ %{_datadir}/glusterfs/scripts/setup-thin-arbiter.sh
+%config %{_sysconfdir}/glusterfs/thin-arbiter.vol
+
+%if ( 0%{?_with_systemd:1} )
+%{_unitdir}/gluster-ta-volume.service
+%endif
+
+%if ( 0%{!?_without_georeplication:1} )
+%files geo-replication
+%config(noreplace) %{_sysconfdir}/logrotate.d/glusterfs-georep
+
+%{_sbindir}/gfind_missing_files
+%{_sbindir}/gluster-mountbroker
+%dir %{_libexecdir}/glusterfs
+%dir %{_libexecdir}/glusterfs/python
+%dir %{_libexecdir}/glusterfs/python/syncdaemon
+ %{_libexecdir}/glusterfs/gsyncd
+ %{_libexecdir}/glusterfs/python/syncdaemon/*
+%dir %{_libexecdir}/glusterfs/scripts
+ %{_libexecdir}/glusterfs/scripts/get-gfid.sh
+ %{_libexecdir}/glusterfs/scripts/slave-upgrade.sh
+ %{_libexecdir}/glusterfs/scripts/gsync-upgrade.sh
+ %{_libexecdir}/glusterfs/scripts/generate-gfid-file.sh
+ %{_libexecdir}/glusterfs/scripts/gsync-sync-gfid
+ %{_libexecdir}/glusterfs/scripts/schedule_georep.py*
+ %{_libexecdir}/glusterfs/gverify.sh
+ %{_libexecdir}/glusterfs/set_geo_rep_pem_keys.sh
+ %{_libexecdir}/glusterfs/peer_gsec_create
+ %{_libexecdir}/glusterfs/peer_mountbroker
+ %{_libexecdir}/glusterfs/peer_mountbroker.py*
+ %{_libexecdir}/glusterfs/gfind_missing_files
+ %{_libexecdir}/glusterfs/peer_georep-sshkey.py*
+%{_sbindir}/gluster-georep-sshkey
+
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/geo-replication
+%ghost %attr(0644,-,-) %{_sharedstatedir}/glusterd/geo-replication/gsyncd_template.conf
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create/post/S56glusterd-geo-rep-create-post.sh
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/gsync-create/pre
+
+%endif
+
+%files -n libglusterfs0
+%{_libdir}/libglusterfs.so.*
+
+%files -n libgfapi0
+%{_libdir}/libgfapi.so.*
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mount/api.so
+
+%files -n libgfchangelog0
+%{_libdir}/libgfchangelog.so.*
+
+%files -n libgfrpc0
+%{_libdir}/libgfrpc.so.*
+
+%files -n libgfxdr0
+%{_libdir}/libgfxdr.so.*
+
+%files -n libglusterd0
+%{_libdir}/libglusterd.so.*
+%exclude %{_libdir}/libglusterd.so
+
+%files -n python%{_pythonver}-gluster
+# introducing glusterfs module in site packages.
+# so that all other gluster submodules can reside in the same namespace.
+%if ( %{_usepython3} )
+%dir %{python3_sitelib}/gluster
+ %{python3_sitelib}/gluster/__init__.*
+ %{python3_sitelib}/gluster/__pycache__
+ %{python3_sitelib}/gluster/cliutils
+%else
+%dir %{python2_sitelib}/gluster
+ %{python2_sitelib}/gluster/__init__.*
+ %{python2_sitelib}/gluster/cliutils
+%endif
+
+%files regression-tests
+%dir %{_datadir}/glusterfs
+ %{_datadir}/glusterfs/run-tests.sh
+ %{_datadir}/glusterfs/tests
+%exclude %{_datadir}/glusterfs/tests/vagrant
+
+%if ( 0%{!?_without_server:1} )
+%files ganesha
+%dir %{_libexecdir}/ganesha
+%{_sysconfdir}/ganesha/ganesha-ha.conf.sample
+%{_libexecdir}/ganesha/*
+%{_prefix}/lib/ocf/resource.d/heartbeat/*
+%{_sharedstatedir}/glusterd/hooks/1/start/post/S31ganesha-start.sh
+%ghost %attr(0644,-,-) %config(noreplace) %{_sysconfdir}/ganesha/ganesha-ha.conf
+%ghost %dir %attr(0755,-,-) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha
+%ghost %attr(0644,-,-) %config(noreplace) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha.conf
+%ghost %attr(0644,-,-) %config(noreplace) %{_localstatedir}/run/gluster/shared_storage/nfs-ganesha/ganesha-ha.conf
+%endif
+
+%if ( 0%{!?_without_ocf:1} )
+%files resource-agents
+# /usr/lib is the standard for OCF, also on x86_64
+%{_prefix}/lib/ocf/resource.d/glusterfs
+%endif
+
+%if ( 0%{!?_without_server:1} )
%files server
-%defattr(-,root,root,-)
-#%doc examples/ doc/glusterfs*.vol.sample
+%doc extras/clear_xattrs.sh
+# sysconf
%config(noreplace) %{_sysconfdir}/glusterfs
-%{_sharedstatedir}/glusterd
-%{_sysconfdir}/init.d/glusterd
-%{_sbindir}/gluster
+%exclude %{_sysconfdir}/glusterfs/thin-arbiter.vol
+%exclude %{_sysconfdir}/glusterfs/eventsconfig.json
+%exclude %{_sharedstatedir}/glusterd/nfs/nfs-server.vol
+%exclude %{_sharedstatedir}/glusterd/nfs/run/nfs.pid
+%if ( 0%{?_with_gnfs:1} )
+%exclude %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/nfs/*
+%endif
+%config(noreplace) %{_sysconfdir}/sysconfig/glusterd
+%if ( 0%{_for_fedora_koji_builds} )
+%config(noreplace) %{_sysconfdir}/sysconfig/glusterfsd
+%endif
+
+# init files
+%glusterd_svcfile
+%if ( 0%{_for_fedora_koji_builds} )
+%glusterfsd_svcfile
+%endif
+%if ( 0%{?_with_systemd:1} )
+%glusterfssharedstorage_svcfile
+%endif
+
+# binaries
%{_sbindir}/glusterd
-%{_libdir}/glusterfs/%{version}/xlator/storage*
-%{_libdir}/glusterfs/%{version}/xlator/features/posix*
-%{_libdir}/glusterfs/%{version}/xlator/protocol/server*
-%{_libdir}/glusterfs/%{version}/xlator/mgmt*
-%{_libdir}/glusterfs/%{version}/xlator/nfs*
-
-%files devel
-%defattr(-,root,root,-)
-%{_includedir}/glusterfs
-%exclude %{_includedir}/glusterfs/y.tab.h
-%{_libdir}/*.so
+%{_libexecdir}/glusterfs/glfsheal
+%{_sbindir}/gf_attach
+%{_sbindir}/gluster-setgfid2path
+# {_sbindir}/glusterfsd is the actual binary, but glusterfs (client) is a
+# symlink. The binary itself (and symlink) are part of the glusterfs-fuse
+# package, because glusterfs-server depends on that anyway.
+
+# Manpages
+%{_mandir}/man8/gluster-setgfid2path.8*
+
+# xlators
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/arbiter.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bit-rot.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/bitrot-stub.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/sdfs.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/index.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/locks.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/posix*
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/snapview-server.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/marker.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/quota*
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/selinux.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/trash.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/upcall.so
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/features/leases.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/mgmt/glusterd.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/protocol/server.so
+%dir %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage
+ %{_libdir}/glusterfs/%{version}%{?prereltag}/xlator/storage/posix.so
+
+# snap_scheduler
+%{_sbindir}/snap_scheduler.py
+%{_sbindir}/gcron.py
+%{_sbindir}/conf.py
+
+# /var/lib/glusterd, e.g. hookscripts, etc.
+%ghost %attr(0644,-,-) %config(noreplace) %{_sharedstatedir}/glusterd/glusterd.info
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/bitd
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/groups
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/virt
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/metadata-cache
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/gluster-block
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/nl-cache
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/db-workload
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/distributed-virt
+ %attr(0644,-,-) %{_sharedstatedir}/glusterd/groups/samba
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glusterfind
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glusterfind/.keys
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/glustershd
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/disabled-quota-root-xattr-heal.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S10selinux-label-brick.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/post/S13create-subdir-mounts.sh
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/add-brick/pre/S28Quota-enable-root-xattr-heal.sh
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/post/S10selinux-label-brick.sh
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/create/pre
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/copy-file/pre
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/post
+ %{_sharedstatedir}/glusterd/hooks/1/delete/post/S57glusterfind-delete-post
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/delete/pre/S10selinux-del-fcontext.sh
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/remove-brick/pre
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/reset
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/reset/post
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/reset/pre
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/post/S30samba-set.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/post/S32gluster_enable_shared_storage.sh
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/set/pre
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S29CTDBsetup.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/post/S30samba-start.sh
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/start/pre
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/post
+ %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre/S30samba-stop.sh
+ %attr(0755,-,-) %{_sharedstatedir}/glusterd/hooks/1/stop/pre/S29CTDB-teardown.sh
+%config(noreplace) %ghost %attr(0600,-,-) %{_sharedstatedir}/glusterd/options
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/peers
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/quotad
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/scrub
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/snaps
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/ss_brick
+%ghost %dir %attr(0755,-,-) %{_sharedstatedir}/glusterd/vols
+
+# Extra utility script
+%dir %{_libexecdir}/glusterfs
+%dir %{_datadir}/glusterfs/scripts
+ %{_datadir}/glusterfs/scripts/stop-all-gluster-processes.sh
+%if ( 0%{?_with_systemd:1} )
+ %{_libexecdir}/glusterfs/mount-shared-storage.sh
+ %{_datadir}/glusterfs/scripts/control-cpu-load.sh
+ %{_datadir}/glusterfs/scripts/control-mem.sh
+%endif
+
+# Incrementalapi
+ %{_libexecdir}/glusterfs/glusterfind
+%{_bindir}/glusterfind
+ %{_libexecdir}/glusterfs/peer_add_secret_pub
+
+%if ( 0%{?_with_firewalld:1} )
+%{_prefix}/lib/firewalld/services/glusterfs.xml
+%endif
+# end of server files
+%endif
+
+# Events
+%if ( 0%{!?_without_events:1} )
+%files events
+%config(noreplace) %{_sysconfdir}/glusterfs/eventsconfig.json
+%dir %{_sharedstatedir}/glusterd
+%dir %{_sharedstatedir}/glusterd/events
+%dir %{_libexecdir}/glusterfs
+ %{_libexecdir}/glusterfs/gfevents
+ %{_libexecdir}/glusterfs/peer_eventsapi.py*
+%{_sbindir}/glustereventsd
+%{_sbindir}/gluster-eventsapi
+%{_datadir}/glusterfs/scripts/eventsdash.py*
+%if ( 0%{?_with_systemd:1} )
+%{_unitdir}/glustereventsd.service
+%else
+%{_sysconfdir}/init.d/glustereventsd
+%endif
+%endif
%changelog
+* Thu May 14 2020 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- refactor, common practice, Issue #1126
+
+* Mon May 11 2020 Sunny Kumar <sunkumar@redhat.com>
+- added requires policycoreutils-python-utils on rhel8 for geo-replication
+
+* Wed Oct 9 2019 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- remove leftover bd xlator cruft
+
+* Fri Aug 23 2019 Shwetha K Acharya <sacharya@redhat.com>
+- removed {name}-ufs from Obsoletes
+- added "< version" for obsoletes {name}-gnfs and {name}-rdma
+
+* Mon Jul 15 2019 Jiffin Tony Thottan <jthottan@redhat.com>
+- Adding ganesha ha bits back in gluster repository
+
+* Fri Jul 12 2019 Amar Tumballi <amarts@redhat.com>
+- Remove rdma package, and mark older rdma package as 'Obsoletes'
+
+* Fri Jun 14 2019 Niels de Vos <ndevos@redhat.com>
+- always build glusterfs-cli to allow monitoring/managing from clients
+
+* Wed Mar 6 2019 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- remove unneeded ldconfig in scriptlets
+- reported by Igor Gnatenko in Fedora
+- https://src.fedoraproject.org/rpms/glusterfs/pull-request/5
+
+* Mon Mar 4 2019 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- s390x has RDMA, since around Fedora 27 and in RHEL7 since June 2016.
+
+* Tue Feb 26 2019 Ashish Pandey <aspandey@redhat.com>
+- Add thin-arbiter package
+
+* Sun Feb 24 2019 Aravinda VK <avishwan@redhat.com>
+- Renamed events package to gfevents
+
+* Thu Feb 21 2019 Jiffin Tony Thottan <jthottan@redhat.com>
+- Obsoleting gluster-gnfs package
+
+* Wed Nov 28 2018 Krutika Dhananjay <kdhananj@redhat.com>
+- Install /var/lib/glusterd/groups/distributed-virt by default
+
+* Tue Nov 13 2018 Niels de Vos <ndevos@redhat.com>
+- Add an option to build with ThreadSanitizer (TSAN)
+
+* Fri Sep 7 2018 Niels de Vos <ndevos@redhat.com>
+- Add an option to build with address sanitizer (ASAN)
+
+* Sun Jul 29 2018 Niels de Vos <ndevos@redhat.com>
+- Disable building glusterfs-resource-agents on el6 (#1609551)
+
+* Thu Feb 22 2018 Kotresh HR <khiremat@redhat.com>
+- Added util-linux as dependency to georeplication rpm (#1544382)
+
+* Thu Feb 1 2018 Niels de Vos <ndevos@redhat.com>
+- Add '--without server' option to facilitate el6 builds (#1074947)
+
+* Wed Jan 24 2018 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- python-ctypes no long exists, now in python stdlib (#1538258)
+
+* Thu Jan 18 2018 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Fedora 28 glibc has removed rpc headers and rpcgen, use libtirpc
+
+* Mon Dec 25 2017 Niels de Vos <ndevos@redhat.com>
+- Fedora 28 has renamed pyxattr
+
+* Wed Sep 27 2017 Mohit Agrawal <moagrawa@redhat.com>
+- Added control-cpu-load.sh and control-mem.sh scripts to glusterfs-server section(#1496335)
+
+* Tue Aug 22 2017 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- libibverbs-devel, librdmacm-devel -> rdma-core-devel #1483995
+
+* Thu Jul 20 2017 Aravinda VK <avishwan@redhat.com>
+- Added new tool/binary to set the gfid2path xattr on files
+
+* Thu Jul 13 2017 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- various directories not owned by any package
+
+* Fri Jun 16 2017 Jiffin Tony Thottan <jthottan@redhat.com>
+- Add glusterfssharedstorage.service systemd file
+
+* Fri Jun 9 2017 Poornima G <pgurusid@redhat.com>
+- Install /var/lib/glusterd/groups/nl-cache by default
+
+* Wed May 10 2017 Pranith Kumar K <pkarampu@redhat.com>
+- Install /var/lib/glusterd/groups/gluster-block by default
+
+* Thu Apr 27 2017 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- gnfs in an optional subpackage
+
+* Wed Apr 26 2017 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- /var/run/gluster owner gluster:gluster(0775) for qemu(gfapi)
+ statedumps (#1445569)
+
+* Mon Apr 24 2017 Jiffin Tony Thottan <jhottan@redhat.com>
+- Install SELinux hook scripts that manage contexts for bricks (#1047975)
+
+* Thu Apr 20 2017 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- firewalld-filesystem -> firewalld (#1443959)
+
+* Thu Apr 13 2017 Niels de Vos <ndevos@redhat.com>
+- the -regression-tests sub-package needs "bc" for some tests (#1442145)
+
+* Mon Mar 20 2017 Niels de Vos <ndevos@redhat.com>
+- Drop dependency on psmisc, pkill is used instead of killall (#1197308)
+
+* Thu Feb 16 2017 Niels de Vos <ndevos@redhat.com>
+- Obsolete and Provide python-gluster for upgrading from glusterfs < 3.10
+
+* Wed Feb 1 2017 Poornima G <pgurusid@redhat.com>
+- Install /var/lib/glusterd/groups/metadata-cache by default
+
+* Wed Jan 18 2017 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- python2 (versus python3) cleanup (#1414902)
+
+* Fri Jan 13 2017 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- switch to storhaug HA
+
+* Fri Jan 6 2017 Niels de Vos <ndevos@redhat.com>
+- use macro provided by firewalld-filesystem to reload firewalld
+
+* Thu Nov 24 2016 Jiffin Tony Thottan <jhottan@redhat.com>
+- remove S31ganesha-reset.sh from hooks (#1397795)
+
+* Thu Sep 22 2016 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- python-ctypes no long exists, now in python stdlib (#1378436)
+
+* Wed Sep 14 2016 Aravinda VK <avishwan@redhat.com>
+- Changed attribute of eventsconfig.json file as same as other configs (#1375532)
+
+* Thu Sep 08 2016 Aravinda VK <avishwan@redhat.com>
+- Added init script for glustereventsd (#1365395)
+
+* Wed Aug 31 2016 Avra Sengupta <asengupt@redhat.com>
+- Added conf.py for snap scheduler
+
+* Wed Aug 31 2016 Aravinda VK <avishwan@redhat.com>
+- Added new Geo-replication utility "gluster-georep-sshkey" (#1356508)
+
+* Thu Aug 25 2016 Aravinda VK <avishwan@redhat.com>
+- Added gluster-mountbroker utility for geo-rep mountbroker setup (#1343333)
+
+* Mon Aug 22 2016 Milind Changire <mchangir@redhat.com>
+- Add psmisc as dependency for glusterfs-fuse for killall command (#1367665)
+
+* Thu Aug 4 2016 Jiffin Tony Thottan <jthottan@redhat.com>
+- Remove ganesha.so from client xlators
+
+* Sun Jul 31 2016 Soumya Koduri <skoduri@redhat.com>
+- Add dependency on portblock resource agent for ganesha package (#1354439)
+
+* Mon Jul 18 2016 Aravinda VK <avishwan@redhat.com>
+- Added new subpackage events(glusterfs-events) (#1334044)
+
+* Fri Jul 15 2016 Aravinda VK <avishwan@redhat.com>
+- Removed ".py" extension from symlink(S57glusterfind-delete-post)(#1356868)
+
+* Thu Jul 14 2016 Aravinda VK <avishwan@redhat.com>
+- Removed extra packaging line of cliutils(python-gluster)(#1342356)
+
+* Mon Jul 11 2016 Aravinda VK <avishwan@redhat.com>
+- Added Python subpackage "cliutils" under gluster
+
+* Tue May 31 2016 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- broken brp-python-bytecompile in RHEL7 results in installed
+ but unpackaged files.
+
+* Fri May 6 2016 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- additional dirs and files in /var/lib/glusterd/... (#1326410)
+
+* Tue Apr 26 2016 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- %%postun libs w/o firewalld on RHEL6 (#1330583)
+
+* Tue Apr 12 2016 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- additional dirs and files in /var/lib/glusterd/... (#1326410)
+
+* Mon Mar 7 2016 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- %%pre, %%post etc. scriptlet cleanup, ... -p /sbin/ldconfig (#1315024)
+
+* Fri Jan 22 2016 Aravinda VK <avishwan@redhat.com>
+- Added schedule_georep.py script to the glusterfs-geo-replication (#1300956)
+
+* Sat Jan 16 2016 Niels de Vos <ndevos@redhat.com>
+- glusterfs-server depends on -api (#1296931)
+
+* Sun Jan 10 2016 Niels de Vos <ndevos@redhat.com>
+- build system got fixed so that special glupy build is not needed anymore
+
+* Mon Dec 28 2015 Niels de Vos <ndevos@redhat.com>
+- hook scripts in glusterfs-ganesha use dbus-send, add dependency (#1294446)
+
+* Tue Dec 22 2015 Niels de Vos <ndevos@redhat.com>
+- move hook scripts for nfs-ganesha to the -ganesha sub-package
+- use standard 'make' installation for the hook scripts (#1174765)
+
+* Tue Sep 1 2015 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- erroneous ghost of ../hooks/1/delete causes install failure (#1258975)
+
+* Tue Aug 25 2015 Anand Nekkunti <anekkunt@redhat.com>
+- adding glusterfs-firewalld service (#1253967)
+
+* Tue Aug 18 2015 Niels de Vos <ndevos@redhat.com>
+- Include missing directories for glusterfind hooks scripts (#1225465)
+
+* Mon Jun 15 2015 Niels de Vos <ndevos@redhat.com>
+- Replace hook script S31ganesha-set.sh by S31ganesha-start.sh (#1231738)
+
+* Fri Jun 12 2015 Aravinda VK <avishwan@redhat.com>
+- Added rsync as dependency to georeplication rpm (#1231205)
+
+* Tue Jun 02 2015 Aravinda VK <avishwan@redhat.com>
+- Added post hook for volume delete as part of glusterfind (#1225465)
+
+* Wed May 27 2015 Aravinda VK <avishwan@redhat.com>
+- Added stop-all-gluster-processes.sh in glusterfs-server section (#1204641)
+
+* Wed May 20 2015 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- python-gluster should be 'noarch' (#1219954)
+
+* Wed May 20 2015 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- move libgf{db,changelog}.pc from -api-devel to -devel (#1223385)
+
+* Wed May 20 2015 Anand Nekkunti <anekkunt@redhat.com>
+- glusterd.socket file cleanup during post run upgrade (#1210404)
+
+* Tue May 19 2015 Avra Sengupta <asengupt@redhat.com>
+- Added S32gluster_enable_shared_storage.sh as volume set hook script (#1222013)
+
+* Mon May 18 2015 Milind Changire <mchangir@redhat.com>
+- Move file peer_add_secret_pub to the server RPM to support glusterfind (#1221544)
+* Sun May 17 2015 Niels de Vos <ndevos@redhat.com>
+- Fix building on RHEL-5 based distributions (#1222317)
+
+* Tue May 05 2015 Niels de Vos <ndevos@redhat.com>
+- Introduce glusterfs-client-xlators to reduce dependencies (#1195947)
+
+* Wed Apr 15 2015 Humble Chirammal <hchiramm@redhat.com>
+- Introducing python-gluster package to own gluster namespace in sitelib (#1211848)
+
+* Sat Mar 28 2015 Mohammed Rafi KC <rkavunga@redhat.com>
+- Add dependency for librdmacm version >= 1.0.15 (#1206744)
+
+* Tue Mar 24 2015 Niels de Vos <ndevos@redhat.com>
+- move libgfdb (with sqlite dependency) to -server subpackage (#1194753)
+
+* Tue Mar 17 2015 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- glusterfs-ganesha sub-package
+
+* Thu Mar 12 2015 Kotresh H R <khiremat@redhat.com>
+- gfind_missing_files tool is included (#1187140)
+
+* Tue Mar 03 2015 Aravinda VK <avishwan@redhat.com>
+- Included glusterfind files as part of server package.
+
+* Sun Mar 1 2015 Avra Sengupta <asengupt@redhat.com>
+- Added installation of snap-scheduler
+
+* Thu Feb 26 2015 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- enable cmocka unittest support only when asked for (#1067059)
+
+* Tue Feb 24 2015 Niels de Vos <ndevos@redhat.com>
+- POSIX ACL conversion needs BuildRequires libacl-devel (#1185654)
+
+* Wed Feb 18 2015 Andreas Schneider <asn@redhat.com>
+- Change cmockery2 to cmocka.
+
+* Wed Feb 18 2015 Kaushal M <kaushal@redhat.com>
+- add userspace-rcu as a requirement
+
+* Fri Feb 13 2015 Gaurav Kumar Garg <ggarg@redhat.com>
+- .cmd_log_history file should be renamed to cmd_history.log post
+ upgrade (#1165996)
+
+* Fri Jan 30 2015 Nandaja Varma <nvarma@redhat.com>
+- remove checks for rpmbuild/mock from run-tests.sh (#178008)
+
+* Fri Jan 16 2015 Niels de Vos <ndevos@redhat.com>
+- add support for /run/gluster through a tmpfiles.d config file (#1182934)
+
+* Tue Jan 6 2015 Aravinda VK<avishwan@redhat.com>
+- Added new libexec script for mountbroker user management (peer_mountbroker)
+
+* Fri Dec 12 2014 Niels de Vos <ndevos@redhat.com>
+- do not package all /usr/share/glusterfs/* files in regression-tests (#1169005)
+
+* Fri Sep 26 2014 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- smarter logic in %%post server (#1146426)
+
+* Wed Sep 24 2014 Balamurugan Arumugam <barumuga@redhat.com>
+- remove /sbin/ldconfig as interpreter (#1145989)
+
+* Fri Sep 5 2014 Lalatendu Mohanty <lmohanty@redhat.com>
+- Changed the description as "GlusterFS a distributed filesystem"
+
+* Tue Aug 5 2014 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- logrotate files (#1126832)
+
+* Wed Jul 16 2014 Luis Pabon <lpabon@redhat.com>
+- Added cmockery2 dependency
+
+* Fri Jun 27 2014 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- killall --wait in %%post server, (#1113543)
+
+* Thu Jun 19 2014 Humble Chirammal <hchiramm@redhat.com>
+- Added dynamic loading of fuse module with glusterfs-fuse package installation in el5.
+
+* Thu Jun 12 2014 Varun Shastry <vshastry@redhat.com>
+- Add bash completion config to the cli package
+
+* Tue Jun 03 2014 Vikhyat Umrao <vumrao@redhat.com>
+- add nfs-utils package dependency for server package (#1065654)
+
+* Thu May 22 2014 Poornima G <pgurusid@redhat.com>
+- Rename old hookscripts in an RPM-standard way.
+
+* Tue May 20 2014 Niels de Vos <ndevos@redhat.com>
+- Almost drop calling ./autogen.sh
+
+* Fri Apr 25 2014 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora spec (#1091408, #1091392)
+
+* Fri Apr 25 2014 Arumugam Balamurugan <barumuga@redhat.com>
+- fix RHEL 7 build failure "Installed (but unpackaged) file(s) found" (#1058188)
+
+* Wed Apr 02 2014 Arumugam Balamurugan <barumuga@redhat.com>
+- cleanup to rearrange spec file elements
+
+* Wed Apr 02 2014 Arumugam Balamurugan <barumuga@redhat.com>
+- add version/release dynamically (#1074919)
+
+* Thu Mar 27 2014 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- attr dependency (#1184626)
+
+* Wed Mar 26 2014 Poornima G <pgurusid@redhat.com>
+- Include the hook scripts of add-brick, volume start, stop and set
+
+* Wed Feb 26 2014 Niels de Vos <ndevos@redhat.com>
+- Drop glusterfs-devel dependency from glusterfs-api (#1065750)
+
+* Wed Feb 19 2014 Justin Clift <justin@gluster.org>
+- Rename gluster.py to glupy.py to avoid namespace conflict (#1018619)
+- Move the main Glupy files into glusterfs-extra-xlators rpm
+- Move the Glupy Translator examples into glusterfs-devel rpm
+
+* Thu Feb 06 2014 Aravinda VK <avishwan@redhat.com>
+- Include geo-replication upgrade scripts and hook scripts.
+
+* Wed Jan 15 2014 Niels de Vos <ndevos@redhat.com>
+- Install /var/lib/glusterd/groups/virt by default
+
+* Sat Jan 4 2014 Niels de Vos <ndevos@redhat.com>
+- The main glusterfs package should not provide glusterfs-libs (#1048489)
+
+* Tue Dec 10 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec 3.5.0-0.1.qa3
+
+* Fri Oct 11 2013 Harshavardhana <fharshav@redhat.com>
+- Add '_sharedstatedir' macro to `/var/lib` on <= RHEL5 (#1003184)
+
+* Wed Oct 9 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec 3.4.1-2+
+
+* Wed Oct 9 2013 Niels de Vos <ndevos@redhat.com>
+- glusterfs-api-devel requires glusterfs-devel (#1016938, #1017094)
+
+* Mon Sep 30 2013 Niels de Vos <ndevos@redhat.com>
+- Package gfapi.py into the Python site-packages path (#1005146)
+
+* Tue Sep 17 2013 Harshavardhana <fharshav@redhat.com>
+- Provide a new package called "glusterfs-regression-tests" for standalone
+ regression testing.
+
+* Thu Aug 22 2013 Niels de Vos <ndevos@redhat.com>
+- Correct the day/date for some entries in this changelog (#1000019)
+
+* Wed Aug 7 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec
+- add Requires
+- add -cli subpackage,
+- fix other minor differences with Fedora glusterfs.spec
+
+* Tue Jul 30 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec, add glusterfs-libs RPM for oVirt/qemu-kvm
+
+* Thu Jul 25 2013 Csaba Henk <csaba@redhat.com>
+- Added peer_add_secret_pub and peer_gsec_create to %%{_libexecdir}/glusterfs
+
+* Thu Jul 25 2013 Aravinda VK <avishwan@redhat.com>
+- Added gverify.sh to %%{_libexecdir}/glusterfs directory.
+
+* Thu Jul 25 2013 Harshavardhana <fharshav@redhat.com>
+- Allow to build with '--without bd' to disable 'bd' xlator
+
+* Thu Jun 27 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- fix the hardening fix for shlibs, use %%sed macro, shorter ChangeLog
+
+* Wed Jun 26 2013 Niels de Vos <ndevos@redhat.com>
+- move the mount/api xlator to glusterfs-api
+
+* Fri Jun 7 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec, remove G4S/UFO and Swift
+
+* Mon Mar 4 2013 Niels de Vos <ndevos@redhat.com>
+- Package /var/run/gluster so that statedumps can be created
+
+* Wed Feb 6 2013 Kaleb S. KEITHLEY <kkeithle@redhat.com>
+- Sync with Fedora glusterfs.spec
+
+* Tue Dec 11 2012 Filip Pytloun <filip.pytloun@gooddata.com>
+- add sysconfig file
+
+* Thu Oct 25 2012 Niels de Vos <ndevos@redhat.com>
+- Add a sub-package for the OCF resource agents
+
+* Wed Sep 05 2012 Niels de Vos <ndevos@redhat.com>
+- Don't use python-ctypes on SLES (from Jörg Petersen)
+
+* Tue Jul 10 2012 Niels de Vos <ndevos@redhat.com>
+- Include extras/clear_xattrs.sh in the glusterfs-server sub-package
+
+* Thu Jun 07 2012 Niels de Vos <ndevos@redhat.com>
+- Mark /var/lib/glusterd as owned by glusterfs, subdirs belong to -server
+
* Wed May 9 2012 Kaleb S. KEITHLEY <kkeithle[at]redhat.com>
- Add BuildRequires: libxml2-devel so that configure will DTRT on for
- Fedora's Koji build system
@@ -389,7 +2063,7 @@ fi
- Fixed version reporting 3.2git
- Added nfs init script (disabled by default)
-* Fri Sep 1 2011 Joe Julian <me@joejulian.name> - 3.2.3-1
+* Thu Sep 1 2011 Joe Julian <me@joejulian.name> - 3.2.3-1
- Update to 3.2.3
* Tue Jul 19 2011 Joe Julian <me@joejulian.name> - 3.2.2-3
@@ -404,13 +2078,13 @@ fi
* Wed Jul 13 2011 Joe Julian <me@joejulian.name> - 3.2.1-2
- fix hardcoded path to gsyncd in source to match the actual file location
-* Mon Jun 21 2011 Joe Julian <me@joejulian.name> - 3.2.1
+* Tue Jun 21 2011 Joe Julian <me@joejulian.name> - 3.2.1
- Update to 3.2.1
* Mon Jun 20 2011 Joe Julian <me@joejulian.name> - 3.1.5
- Update to 3.1.5
-* Mon May 31 2011 Joe Julian <me@joejulian.name> - 3.1.5-qa1.4
+* Tue May 31 2011 Joe Julian <me@joejulian.name> - 3.1.5-qa1.4
- Current git
* Sun May 29 2011 Joe Julian <me@joejulian.name> - 3.1.5-qa1.2
@@ -423,7 +2097,7 @@ fi
- Add patch to remove forced 64 bit compile
- Obsolete glusterfs-core to allow for upgrading from gluster packaging
-* Sun Mar 19 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.3-1
+* Sat Mar 19 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.3-1
- Update to 3.1.3
- Merge in more upstream SPEC changes
- Remove patches from GlusterFS bugzilla #2309 and #2311
@@ -432,7 +2106,7 @@ fi
* Sun Feb 06 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.2-3
- Add back in legacy SPEC elements to support older branches
-* Tue Feb 03 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.2-2
+* Thu Feb 03 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.2-2
- Add patches from CloudFS project
* Tue Jan 25 2011 Jonathan Steffan <jsteffan@fedoraproject.org> - 3.1.2-1
@@ -468,7 +2142,7 @@ fi
* Sat Jan 2 2010 Jonathan Steffan <jsteffan@fedoraproject.org> - 2.0.9-1
- Update to 2.0.9
-* Sat Nov 8 2009 Jonathan Steffan <jsteffan@fedoraproject.org> - 2.0.8-1
+* Sun Nov 8 2009 Jonathan Steffan <jsteffan@fedoraproject.org> - 2.0.8-1
- Update to 2.0.8
- Remove install of glusterfs-volgen, it's properly added to
automake upstream now
@@ -536,10 +2210,10 @@ fi
* Fri May 9 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-1
- Update to 1.3.8 final.
-* Tue Apr 23 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.10
+* Wed Apr 23 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.10
- Include short patch to include fixes from latest TLA 751.
-* Mon Apr 22 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.9
+* Tue Apr 22 2008 Matthias Saou <http://freshrpms.net/> 1.3.8-0.9
- Update to 1.3.8pre6.
- Include glusterfs binary in both the client and server packages, now that
glusterfsd is a symlink to it instead of a separate binary.
@@ -583,7 +2257,7 @@ fi
* Wed Nov 21 2007 Matthias Saou <http://freshrpms.net/> 1.3.7-1
- Major spec file cleanup.
-- Add misssing %%clean section.
+- Add missing %%clean section.
- Fix ldconfig calls (weren't set for the proper sub-package).
* Sat Aug 4 2007 Matt Paine <matt@mattsoftware.com> - 1.3.pre7
diff --git a/glusterfsd/src/Makefile.am b/glusterfsd/src/Makefile.am
index 17d7a4a8139..a0a778158d8 100644
--- a/glusterfsd/src/Makefile.am
+++ b/glusterfsd/src/Makefile.am
@@ -1,19 +1,38 @@
sbin_PROGRAMS = glusterfsd
+if WITH_SERVER
+sbin_PROGRAMS += glusterfsd gf_attach
+endif
glusterfsd_SOURCES = glusterfsd.c glusterfsd-mgmt.c
glusterfsd_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
$(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
- $(top_builddir)/rpc/xdr/src/libgfxdr.la \
- $(GF_LDADD)
-glusterfsd_LDFLAGS = $(GF_LDFLAGS) $(GF_GLUSTERFS_LDFLAGS)
-noinst_HEADERS = glusterfsd.h glusterfsd-mem-types.h
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la $(GF_LDADD) $(LIB_DL)
+glusterfsd_LDFLAGS = $(GF_LDFLAGS)
-AM_CFLAGS = -fPIC -Wall -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)\
+gf_attach_SOURCES = gf_attach.c
+gf_attach_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/api/src/libgfapi.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la
+gf_attach_LDFLAGS = $(GF_LDFLAGS)
+
+noinst_HEADERS = glusterfsd.h glusterfsd-mem-types.h glusterfsd-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
-I$(top_srcdir)/libglusterfs/src -DDATADIR=\"$(localstatedir)\" \
-DCONFDIR=\"$(sysconfdir)/glusterfs\" $(GF_GLUSTERFS_CFLAGS) \
- -I$(top_srcdir)/rpc/rpc-lib/src -I$(top_srcdir)/rpc/xdr/src
+ -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \
+ -DLIBEXECDIR=\"$(GLUSTERFS_LIBEXECDIR)\"\
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src \
+ -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/xlators/nfs/server/src \
+ -I$(top_srcdir)/xlators/protocol/server/src \
+ -I$(top_srcdir)/api/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
-CLEANFILES =
+CLEANFILES =
$(top_builddir)/libglusterfs/src/libglusterfs.la:
$(MAKE) -C $(top_builddir)/libglusterfs/src/ all
@@ -24,8 +43,12 @@ uninstall-local:
install-data-local:
$(INSTALL) -d -m 755 $(DESTDIR)$(localstatedir)/run
+ $(INSTALL) -d -m 755 $(DESTDIR)$(localstatedir)/run/gluster
$(INSTALL) -d -m 755 $(DESTDIR)$(localstatedir)/log/glusterfs
+ $(INSTALL) -d -m 755 $(DESTDIR)$(sbindir)
rm -f $(DESTDIR)$(sbindir)/glusterfs
- rm -f $(DESTDIR)$(sbindir)/glusterd
ln -s glusterfsd $(DESTDIR)$(sbindir)/glusterfs
+if WITH_SERVER
+ rm -f $(DESTDIR)$(sbindir)/glusterd
ln -s glusterfsd $(DESTDIR)$(sbindir)/glusterd
+endif
diff --git a/glusterfsd/src/gf_attach.c b/glusterfsd/src/gf_attach.c
new file mode 100644
index 00000000000..c553b0b1f61
--- /dev/null
+++ b/glusterfsd/src/gf_attach.c
@@ -0,0 +1,241 @@
+/*
+ * Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <glusterfs/glusterfs.h>
+#include "glfs-internal.h"
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "xdr-generic.h"
+#include "glusterd1-xdr.h"
+
+/* In seconds */
+#define CONNECT_TIMEOUT 60
+#define REPLY_TIMEOUT 120
+
+int done = 0;
+int rpc_status;
+
+pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+
+struct rpc_clnt_procedure gf_attach_actors[GLUSTERD_BRICK_MAXVALUE] = {
+ [GLUSTERD_BRICK_NULL] = {"NULL", NULL},
+ [GLUSTERD_BRICK_OP] = {"BRICK_OP", NULL},
+};
+
+struct rpc_clnt_program gf_attach_prog = {
+ .progname = "brick operations",
+ .prognum = GD_BRICK_PROGRAM,
+ .progver = GD_BRICK_VERSION,
+ .proctable = gf_attach_actors,
+ .numproc = GLUSTERD_BRICK_MAXVALUE,
+};
+
+int32_t
+my_callback(struct rpc_req *req, struct iovec *iov, int count, void *frame)
+{
+ pthread_mutex_lock(&mutex);
+ rpc_status = req->rpc_status;
+ done = 1;
+ /* Signal main thread which is the only waiter */
+ pthread_cond_signal(&cond);
+ pthread_mutex_unlock(&mutex);
+ return 0;
+}
+
+/* copied from gd_syncop_submit_request */
+int
+send_brick_req(xlator_t *this, struct rpc_clnt *rpc, char *path, int op)
+{
+ int ret = -1;
+ struct timespec ts;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec iov = {
+ 0,
+ };
+ ssize_t req_size = 0;
+ call_frame_t *frame = NULL;
+ gd1_mgmt_brick_op_req brick_req;
+ void *req = &brick_req;
+
+ brick_req.op = op;
+ brick_req.name = path;
+ brick_req.input.input_val = NULL;
+ brick_req.input.input_len = 0;
+ brick_req.dict.dict_val = NULL;
+ brick_req.dict.dict_len = 0;
+
+ req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req);
+ iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size);
+ if (!iobuf)
+ goto out;
+
+ iobref = iobref_new();
+ if (!iobref)
+ goto out;
+
+ iobref_add(iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize(iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic(iov, req, (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret == -1)
+ goto out;
+
+ iov.iov_len = ret;
+
+ /* Wait for connection */
+ timespec_now_realtime(&ts);
+ ts.tv_sec += CONNECT_TIMEOUT;
+ pthread_mutex_lock(&rpc->conn.lock);
+ {
+ while (!rpc->conn.connected)
+ if (pthread_cond_timedwait(&rpc->conn.cond, &rpc->conn.lock, &ts) ==
+ ETIMEDOUT) {
+ fprintf(stderr, "timeout waiting for RPC connection\n");
+ pthread_mutex_unlock(&rpc->conn.lock);
+ return EXIT_FAILURE;
+ }
+ }
+ pthread_mutex_unlock(&rpc->conn.lock);
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit(rpc, &gf_attach_prog, op, my_callback, &iov, 1, NULL,
+ 0, iobref, frame, NULL, 0, NULL, 0, NULL);
+ if (!ret) {
+ /* OK, wait for callback */
+ timespec_now_realtime(&ts);
+ ts.tv_sec += REPLY_TIMEOUT;
+ pthread_mutex_lock(&mutex);
+ {
+ while (!done)
+ if (pthread_cond_timedwait(&cond, &mutex, &ts) == ETIMEDOUT) {
+ fprintf(stderr, "timeout waiting for RPC reply\n");
+ pthread_mutex_unlock(&mutex);
+ return EXIT_FAILURE;
+ }
+ }
+ pthread_mutex_unlock(&mutex);
+ }
+
+out:
+
+ iobref_unref(iobref);
+ iobuf_unref(iobuf);
+ if (frame)
+ STACK_DESTROY(frame->root);
+
+ if (rpc_status != 0) {
+ fprintf(stderr, "got error %d on RPC\n", rpc_status);
+ return EXIT_FAILURE;
+ }
+
+ printf("OK\n");
+ return EXIT_SUCCESS;
+}
+
+int
+usage(char *prog)
+{
+ fprintf(stderr, "Usage: %s uds_path volfile_path (to attach)\n", prog);
+ fprintf(stderr, " %s -d uds_path brick_path (to detach)\n", prog);
+
+ return EXIT_FAILURE;
+}
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs;
+ struct rpc_clnt *rpc;
+ dict_t *options;
+ int ret;
+ int op = GLUSTERD_BRICK_ATTACH;
+
+ for (;;) {
+ switch (getopt(argc, argv, "d")) {
+ case 'd':
+ op = GLUSTERD_BRICK_TERMINATE;
+ break;
+ case -1:
+ goto done_parsing;
+ default:
+ return usage(argv[0]);
+ }
+ }
+done_parsing:
+ if (optind != (argc - 2)) {
+ return usage(argv[0]);
+ }
+
+ fs = glfs_new("gf-attach");
+ if (!fs) {
+ fprintf(stderr, "glfs_new failed\n");
+ return EXIT_FAILURE;
+ }
+
+ (void)glfs_set_logging(fs, "/dev/stderr", 7);
+ /*
+ * This will actually fail because we haven't defined a volume, but
+ * it will do enough initialization to get us going.
+ */
+ (void)glfs_init(fs);
+
+ options = dict_new();
+ if (!options) {
+ return EXIT_FAILURE;
+ }
+ ret = dict_set_str(options, "transport-type", "socket");
+ if (ret != 0) {
+ fprintf(stderr, "failed to set transport type\n");
+ return EXIT_FAILURE;
+ }
+ ret = dict_set_str(options, "transport.address-family", "unix");
+ if (ret != 0) {
+ fprintf(stderr, "failed to set address family\n");
+ return EXIT_FAILURE;
+ }
+ ret = dict_set_str(options, "transport.socket.connect-path", argv[optind]);
+ if (ret != 0) {
+ fprintf(stderr, "failed to set connect path\n");
+ return EXIT_FAILURE;
+ }
+
+ rpc = rpc_clnt_new(options, fs->ctx->master, "gf-attach-rpc", 0);
+ if (!rpc) {
+ fprintf(stderr, "rpc_clnt_new failed\n");
+ return EXIT_FAILURE;
+ }
+
+ if (rpc_clnt_register_notify(rpc, NULL, NULL) != 0) {
+ fprintf(stderr, "rpc_clnt_register_notify failed\n");
+ return EXIT_FAILURE;
+ }
+
+ if (rpc_clnt_start(rpc) != 0) {
+ fprintf(stderr, "rpc_clnt_start failed\n");
+ return EXIT_FAILURE;
+ }
+
+ return send_brick_req(fs->ctx->master, rpc, argv[optind + 1], op);
+}
diff --git a/glusterfsd/src/glusterfsd-mem-types.h b/glusterfsd/src/glusterfsd-mem-types.h
index a28a7b2e31b..e59b558deb0 100644
--- a/glusterfsd/src/glusterfsd-mem-types.h
+++ b/glusterfsd/src/glusterfsd-mem-types.h
@@ -1,37 +1,27 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __GLUSTERFSD_MEM_TYPES_H__
#define __GLUSTERFSD_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
#define GF_MEM_TYPE_START (gf_common_mt_end + 1)
enum gfd_mem_types_ {
- gfd_mt_xlator_list_t = GF_MEM_TYPE_START,
- gfd_mt_xlator_t,
- gfd_mt_xlator_cmdline_option_t,
- gfd_mt_char,
- gfd_mt_call_pool_t,
- gfd_mt_vol_top_priv_t,
- gfd_mt_end
+ gfd_mt_xlator_list_t = GF_MEM_TYPE_START,
+ gfd_mt_xlator_t,
+ gfd_mt_server_cmdline_t,
+ gfd_mt_xlator_cmdline_option_t,
+ gfd_mt_char,
+ gfd_mt_call_pool_t,
+ gfd_mt_end
};
#endif
diff --git a/glusterfsd/src/glusterfsd-messages.h b/glusterfsd/src/glusterfsd-messages.h
new file mode 100644
index 00000000000..0cdbffa71ea
--- /dev/null
+++ b/glusterfsd/src/glusterfsd-messages.h
@@ -0,0 +1,93 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERFSD_MESSAGES_H_
+#define _GLUSTERFSD_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(
+ GLUSTERFSD, glusterfsd_msg_1, glusterfsd_msg_2, glusterfsd_msg_3,
+ glusterfsd_msg_4, glusterfsd_msg_5, glusterfsd_msg_6, glusterfsd_msg_7,
+ glusterfsd_msg_8, glusterfsd_msg_9, glusterfsd_msg_10, glusterfsd_msg_11,
+ glusterfsd_msg_12, glusterfsd_msg_13, glusterfsd_msg_14, glusterfsd_msg_15,
+ glusterfsd_msg_16, glusterfsd_msg_17, glusterfsd_msg_18, glusterfsd_msg_19,
+ glusterfsd_msg_20, glusterfsd_msg_21, glusterfsd_msg_22, glusterfsd_msg_23,
+ glusterfsd_msg_24, glusterfsd_msg_25, glusterfsd_msg_26, glusterfsd_msg_27,
+ glusterfsd_msg_28, glusterfsd_msg_29, glusterfsd_msg_30, glusterfsd_msg_31,
+ glusterfsd_msg_32, glusterfsd_msg_33, glusterfsd_msg_34, glusterfsd_msg_35,
+ glusterfsd_msg_36, glusterfsd_msg_37, glusterfsd_msg_38, glusterfsd_msg_39,
+ glusterfsd_msg_40, glusterfsd_msg_41, glusterfsd_msg_42, glusterfsd_msg_43,
+ glusterfsd_msg_029, glusterfsd_msg_041, glusterfsd_msg_042);
+
+#define glusterfsd_msg_1_STR "Could not create absolute mountpoint path"
+#define glusterfsd_msg_2_STR "Could not get current working directory"
+#define glusterfsd_msg_4_STR "failed to set mount-point to options dictionary"
+#define glusterfsd_msg_3_STR "failed to set dict value for key"
+#define glusterfsd_msg_5_STR "failed to set disable for key"
+#define glusterfsd_msg_6_STR "failed to set enable for key"
+#define glusterfsd_msg_7_STR \
+ "Not a client process, not performing mount operation"
+#define glusterfsd_msg_8_STR "MOUNT_POINT initialization failed"
+#define glusterfsd_msg_9_STR "loading volume file failed"
+#define glusterfsd_msg_10_STR "xlator option is invalid"
+#define glusterfsd_msg_11_STR "Fetching the volume file from server..."
+#define glusterfsd_msg_12_STR "volume initialization failed"
+#define glusterfsd_msg_34_STR "memory init failed"
+#define glusterfsd_msg_13_STR "ERROR: glusterfs uuid generation failed"
+#define glusterfsd_msg_14_STR "ERROR: glusterfs pool creation failed"
+#define glusterfsd_msg_15_STR \
+ "ERROR: '--volfile-id' is mandatory if '-s' OR '--volfile-server' option " \
+ "is given"
+#define glusterfsd_msg_16_STR "ERROR: parsing the volfile failed"
+#define glusterfsd_msg_33_STR \
+ "obsolete option '--volfile-max-fecth-attempts or fetch-attempts' was " \
+ "provided"
+#define glusterfsd_msg_17_STR "pidfile open failed"
+#define glusterfsd_msg_18_STR "pidfile lock failed"
+#define glusterfsd_msg_20_STR "pidfile truncation failed"
+#define glusterfsd_msg_21_STR "pidfile write failed"
+#define glusterfsd_msg_22_STR "failed to exeute pthread_sigmask"
+#define glusterfsd_msg_23_STR "failed to create pthread"
+#define glusterfsd_msg_24_STR "daemonization failed"
+#define glusterfsd_msg_25_STR "mount failed"
+#define glusterfsd_msg_26_STR "failed to construct the graph"
+#define glusterfsd_msg_27_STR "fuse xlator cannot be specified in volume file"
+#define glusterfsd_msg_28_STR "Cannot reach volume specification file"
+#define glusterfsd_msg_29_STR "ERROR: glusterfsd context not initialized"
+#define glusterfsd_msg_43_STR \
+ "command line argument --brick-mux is valid only for brick process"
+#define glusterfsd_msg_029_STR "failed to create command line string"
+#define glusterfsd_msg_30_STR "Started running version"
+#define glusterfsd_msg_31_STR "Could not create new sync-environment"
+#define glusterfsd_msg_40_STR "No change in volfile, countinuing"
+#define glusterfsd_msg_39_STR "Unable to create/delete temporary file"
+#define glusterfsd_msg_38_STR \
+ "Not processing brick-op since volume graph is not yet active"
+#define glusterfsd_msg_35_STR "rpc req buffer unserialization failed"
+#define glusterfsd_msg_36_STR "problem in xlator loading"
+#define glusterfsd_msg_37_STR "failed to get dict value"
+#define glusterfsd_msg_41_STR "received attach request for volfile"
+#define glusterfsd_msg_42_STR "failed to unserialize xdata to dictionary"
+#define glusterfsd_msg_041_STR "can't detach. flie not found"
+#define glusterfsd_msg_042_STR \
+ "couldnot detach old graph. Aborting the reconfiguration operation"
+
+#endif /* !_GLUSTERFSD_MESSAGES_H_ */
diff --git a/glusterfsd/src/glusterfsd-mgmt.c b/glusterfsd/src/glusterfsd-mgmt.c
index 6122a76a36e..eaf6796e4c3 100644
--- a/glusterfsd/src/glusterfsd-mgmt.c
+++ b/glusterfsd/src/glusterfsd-mgmt.c
@@ -1,2175 +1,3055 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <stdlib.h>
#include <signal.h>
-#include <pthread.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif /* _CONFIG_H */
-#include "glusterfs.h"
-#include "stack.h"
-#include "dict.h"
-#include "event.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/gf-event.h>
+#include <glusterfs/defaults.h>
#include "rpc-clnt.h"
#include "protocol-common.h"
+#include "glusterfsd-messages.h"
#include "glusterfs3.h"
#include "portmap-xdr.h"
#include "xdr-generic.h"
#include "glusterfsd.h"
-#include "glusterfsd-mem-types.h"
#include "rpcsvc.h"
#include "cli1-xdr.h"
-#include "statedump.h"
-#include "syncop.h"
+#include <glusterfs/statedump.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/monitoring.h>
+#include "server.h"
+
+static gf_boolean_t is_mgmt_rpc_reconnect = _gf_false;
+int need_emancipate = 0;
+
+int
+glusterfs_mgmt_pmap_signin(glusterfs_ctx_t *ctx);
+int
+glusterfs_volfile_fetch(glusterfs_ctx_t *ctx);
+int
+glusterfs_process_volfp(glusterfs_ctx_t *ctx, FILE *fp);
+int
+emancipate(glusterfs_ctx_t *ctx, int ret);
+int
+glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+ char *volfile_id, char *checksum,
+ dict_t *dict);
+int
+glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ gf_volfile_t *volfile_obj, char *checksum,
+ dict_t *dict);
+int
+glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+ char *volfile_id, char *checksum,
+ dict_t *dict);
+int
+glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj);
-static char is_mgmt_rpc_reconnect;
+gf_boolean_t
+mgmt_is_multiplexed_daemon(char *name);
-int glusterfs_mgmt_pmap_signin (glusterfs_ctx_t *ctx);
-int glusterfs_volfile_fetch (glusterfs_ctx_t *ctx);
-int glusterfs_process_volfp (glusterfs_ctx_t *ctx, FILE *fp);
-int glusterfs_graph_unknown_options (glusterfs_graph_t *graph);
+static int
+glusterfs_volume_top_perf(const char *brick_path, dict_t *dict,
+ gf_boolean_t write_test);
int
-mgmt_cbk_spec (void *data)
+mgmt_cbk_spec(struct rpc_clnt *rpc, void *mydata, void *data)
{
- glusterfs_ctx_t *ctx = NULL;
+ glusterfs_ctx_t *ctx = NULL;
- ctx = glusterfs_ctx_get ();
- gf_log ("mgmt", GF_LOG_INFO, "Volume file changed");
+ ctx = glusterfsd_ctx;
+ gf_log("mgmt", GF_LOG_INFO, "Volume file changed");
- glusterfs_volfile_fetch (ctx);
- return 0;
+ glusterfs_volfile_fetch(ctx);
+ return 0;
}
-
int
-mgmt_cbk_event (void *data)
-{
- return 0;
-}
-struct iobuf *
-glusterfs_serialize_reply (rpcsvc_request_t *req, void *arg,
- struct iovec *outmsg, xdrproc_t xdrproc)
+mgmt_process_volfile(const char *volfile, ssize_t size, char *volfile_id,
+ dict_t *dict)
{
- struct iobuf *iob = NULL;
- ssize_t retlen = -1;
- ssize_t xdr_size = 0;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+ FILE *tmpfp = NULL;
+ gf_volfile_t *volfile_obj = NULL;
+ gf_volfile_t *volfile_tmp = NULL;
+ char sha256_hash[SHA256_DIGEST_LENGTH] = {
+ 0,
+ };
+ int tmp_fd = -1;
+ char template[] = "/tmp/glfs.volfile.XXXXXX";
+
+ glusterfs_compute_sha256((const unsigned char *)volfile, size, sha256_hash);
+ ctx = THIS->ctx;
+ LOCK(&ctx->volfile_lock);
+ {
+ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list)
+ {
+ if (!strcmp(volfile_id, volfile_obj->vol_id)) {
+ if (!memcmp(sha256_hash, volfile_obj->volfile_checksum,
+ sizeof(volfile_obj->volfile_checksum))) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_smsg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_40,
+ NULL);
+ goto out;
+ }
+ volfile_tmp = volfile_obj;
+ break;
+ }
+ }
- /* First, get the io buffer into which the reply in arg will
- * be serialized.
- */
- xdr_size = xdr_sizeof (xdrproc, arg);
- iob = iobuf_get2 (req->svc->ctx->iobuf_pool, xdr_size);
- if (!iob) {
- gf_log (THIS->name, GF_LOG_ERROR, "Failed to get iobuf");
- goto ret;
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode */
+ tmp_fd = mkstemp(template);
+ if (-1 == tmp_fd) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, glusterfsd_msg_39,
+ "create template=%s", template, NULL);
+ ret = -1;
+ goto out;
}
- iobuf_to_iovec (iob, outmsg);
- /* Use the given serializer to translate the give C structure in arg
- * to XDR format which will be written into the buffer in outmsg.
+ /* Calling unlink so that when the file is closed or program
+ * terminates the temporary file is deleted.
*/
- /* retlen is used to received the error since size_t is unsigned and we
- * need -1 for error notification during encoding.
- */
- retlen = xdr_serialize_generic (*outmsg, arg, xdrproc);
- if (retlen == -1) {
- gf_log (THIS->name, GF_LOG_ERROR, "Failed to encode message");
- goto ret;
+ ret = sys_unlink(template);
+ if (ret < 0) {
+ gf_smsg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_39,
+ "delete template=%s", template, NULL);
+ ret = 0;
}
- outmsg->iov_len = retlen;
-ret:
- if (retlen == -1) {
- iob = NULL;
+ tmpfp = fdopen(tmp_fd, "w+b");
+ if (!tmpfp) {
+ ret = -1;
+ goto unlock;
+ }
+
+ fwrite(volfile, size, 1, tmpfp);
+ fflush(tmpfp);
+ if (ferror(tmpfp)) {
+ ret = -1;
+ goto unlock;
}
- return iob;
+ if (!volfile_tmp) {
+ /* There is no checksum in the list, which means simple attach
+ * the volfile
+ */
+ ret = glusterfs_process_svc_attach_volfp(ctx, tmpfp, volfile_id,
+ sha256_hash, dict);
+ goto unlock;
+ }
+ ret = glusterfs_mux_volfile_reconfigure(tmpfp, ctx, volfile_obj,
+ sha256_hash, dict);
+ if (ret < 0) {
+ gf_msg_debug("glusterfsd-mgmt", EINVAL, "Reconfigure failed !!");
+ }
+ }
+unlock:
+ UNLOCK(&ctx->volfile_lock);
+out:
+ if (tmpfp)
+ fclose(tmpfp);
+ else if (tmp_fd != -1)
+ sys_close(tmp_fd);
+ return ret;
}
int
-glusterfs_submit_reply (rpcsvc_request_t *req, void *arg,
- struct iovec *payload, int payloadcount,
- struct iobref *iobref, xdrproc_t xdrproc)
+mgmt_cbk_event(struct rpc_clnt *rpc, void *mydata, void *data)
{
- struct iobuf *iob = NULL;
- int ret = -1;
- struct iovec rsp = {0,};
- char new_iobref = 0;
+ return 0;
+}
- if (!req) {
- GF_ASSERT (req);
- goto out;
- }
+struct iobuf *
+glusterfs_serialize_reply(rpcsvc_request_t *req, void *arg,
+ struct iovec *outmsg, xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ ssize_t retlen = -1;
+ ssize_t xdr_size = 0;
+
+ /* First, get the io buffer into which the reply in arg will
+ * be serialized.
+ */
+ xdr_size = xdr_sizeof(xdrproc, arg);
+ iob = iobuf_get2(req->svc->ctx->iobuf_pool, xdr_size);
+ if (!iob) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to get iobuf");
+ goto ret;
+ }
+
+ iobuf_to_iovec(iob, outmsg);
+ /* Use the given serializer to translate the give C structure in arg
+ * to XDR format which will be written into the buffer in outmsg.
+ */
+ /* retlen is used to received the error since size_t is unsigned and we
+ * need -1 for error notification during encoding.
+ */
+ retlen = xdr_serialize_generic(*outmsg, arg, xdrproc);
+ if (retlen == -1) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to encode message");
+ GF_FREE(iob);
+ goto ret;
+ }
+
+ outmsg->iov_len = retlen;
+ret:
+ if (retlen == -1) {
+ iob = NULL;
+ }
- if (!iobref) {
- iobref = iobref_new ();
- if (!iobref) {
- gf_log (THIS->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
+ return iob;
+}
- new_iobref = 1;
+int
+glusterfs_submit_reply(rpcsvc_request_t *req, void *arg, struct iovec *payload,
+ int payloadcount, struct iobref *iobref,
+ xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ int ret = -1;
+ struct iovec rsp = {
+ 0,
+ };
+ char new_iobref = 0;
+
+ if (!req) {
+ GF_ASSERT(req);
+ goto out;
+ }
+
+ if (!iobref) {
+ iobref = iobref_new();
+ if (!iobref) {
+ gf_log(THIS->name, GF_LOG_ERROR, "out of memory");
+ goto out;
}
- iob = glusterfs_serialize_reply (req, arg, &rsp, xdrproc);
- if (!iob) {
- gf_log_callingfn (THIS->name, GF_LOG_ERROR, "Failed to serialize reply");
- goto out;
- }
+ new_iobref = 1;
+ }
- iobref_add (iobref, iob);
+ iob = glusterfs_serialize_reply(req, arg, &rsp, xdrproc);
+ if (!iob) {
+ gf_log_callingfn(THIS->name, GF_LOG_ERROR, "Failed to serialize reply");
+ } else {
+ iobref_add(iobref, iob);
+ }
- ret = rpcsvc_submit_generic (req, &rsp, 1, payload, payloadcount,
- iobref);
+ ret = rpcsvc_submit_generic(req, &rsp, 1, payload, payloadcount, iobref);
- /* Now that we've done our job of handing the message to the RPC layer
- * we can safely unref the iob in the hope that RPC layer must have
- * ref'ed the iob on receiving into the txlist.
- */
- if (ret == -1) {
- gf_log (THIS->name, GF_LOG_ERROR, "Reply submission failed");
- goto out;
- }
+ /* Now that we've done our job of handing the message to the RPC layer
+ * we can safely unref the iob in the hope that RPC layer must have
+ * ref'ed the iob on receiving into the txlist.
+ */
+ if (ret == -1) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Reply submission failed");
+ goto out;
+ }
- ret = 0;
+ ret = 0;
out:
- if (iob)
- iobuf_unref (iob);
+ if (iob)
+ iobuf_unref(iob);
- if (new_iobref && iobref)
- iobref_unref (iobref);
+ if (new_iobref && iobref)
+ iobref_unref(iobref);
- return ret;
+ return ret;
}
int
-glusterfs_terminate_response_send (rpcsvc_request_t *req, int op_ret)
+glusterfs_terminate_response_send(rpcsvc_request_t *req, int op_ret)
{
- gd1_mgmt_brick_op_rsp rsp = {0,};
- dict_t *dict = NULL;
- int ret = 0;
-
- rsp.op_ret = op_ret;
- rsp.op_errno = 0;
- rsp.op_errstr = "";
- dict = dict_new ();
-
- if (dict)
- ret = dict_allocate_and_serialize (dict, &rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
-
+ gd1_mgmt_brick_op_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ int ret = 0;
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = 0;
+ rsp.op_errstr = "";
+ dict = dict_new();
+
+ if (dict)
+ ret = dict_allocate_and_serialize(dict, &rsp.output.output_val,
+ &rsp.output.output_len);
+
+ if (ret == 0)
+ ret = glusterfs_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
- if (ret == 0)
- ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ GF_FREE(rsp.output.output_val);
+ if (dict)
+ dict_unref(dict);
+ return ret;
+}
- if (rsp.output.output_val)
- GF_FREE (rsp.output.output_val);
- if (dict)
- dict_unref (dict);
- return ret;
+int
+glusterfs_handle_terminate(rpcsvc_request_t *req)
+{
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ ssize_t ret;
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_t *top = NULL;
+ xlator_t *victim = NULL;
+ xlator_t *tvictim = NULL;
+ xlator_list_t **trav_p = NULL;
+ gf_boolean_t lockflag = _gf_false;
+ gf_boolean_t still_bricks_attached = _gf_false;
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ return -1;
+ }
+ ctx = glusterfsd_ctx;
+
+ LOCK(&ctx->volfile_lock);
+ {
+ /* Find the xlator_list_t that points to our victim. */
+ if (glusterfsd_ctx->active) {
+ top = glusterfsd_ctx->active->first;
+ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ victim = (*trav_p)->xlator;
+ if (!victim->cleanup_starting &&
+ strcmp(victim->name, xlator_req.name) == 0) {
+ break;
+ }
+ }
+ }
+
+ if (!top)
+ goto err;
+ }
+ if (!*trav_p) {
+ gf_log(THIS->name, GF_LOG_ERROR, "can't terminate %s - not found",
+ xlator_req.name);
+ /*
+ * Used to be -ENOENT. However, the caller asked us to
+ * make sure it's down and if it's already down that's
+ * good enough.
+ */
+ glusterfs_terminate_response_send(req, 0);
+ goto err;
+ }
+
+ glusterfs_terminate_response_send(req, 0);
+ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ tvictim = (*trav_p)->xlator;
+ if (!tvictim->cleanup_starting &&
+ !strcmp(tvictim->name, xlator_req.name)) {
+ continue;
+ }
+ if (!tvictim->cleanup_starting) {
+ still_bricks_attached = _gf_true;
+ break;
+ }
+ }
+ if (!still_bricks_attached) {
+ gf_log(THIS->name, GF_LOG_INFO,
+ "terminating after loss of last child %s", xlator_req.name);
+ rpc_clnt_mgmt_pmap_signout(glusterfsd_ctx, xlator_req.name);
+ kill(getpid(), SIGTERM);
+ } else {
+ /* TODO cleanup sequence needs to be done properly for
+ Quota and Changelog
+ */
+ if (victim->cleanup_starting)
+ goto err;
+
+ rpc_clnt_mgmt_pmap_signout(glusterfsd_ctx, xlator_req.name);
+ victim->cleanup_starting = 1;
+
+ UNLOCK(&ctx->volfile_lock);
+ lockflag = _gf_true;
+
+ gf_log(THIS->name, GF_LOG_INFO,
+ "detaching not-only"
+ " child %s",
+ xlator_req.name);
+ top->notify(top, GF_EVENT_CLEANUP, victim);
+ }
+err:
+ if (!lockflag)
+ UNLOCK(&ctx->volfile_lock);
+ if (xlator_req.input.input_val)
+ free(xlator_req.input.input_val);
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
+ free(xlator_req.name);
+ xlator_req.name = NULL;
+ return 0;
}
int
-glusterfs_handle_terminate (rpcsvc_request_t *req)
+glusterfs_translator_info_response_send(rpcsvc_request_t *req, int ret,
+ char *msg, dict_t *output)
{
+ gd1_mgmt_brick_op_rsp rsp = {
+ 0,
+ };
+ gf_boolean_t free_ptr = _gf_false;
+ GF_ASSERT(req);
+
+ rsp.op_ret = ret;
+ rsp.op_errno = 0;
+ if (ret && msg && msg[0])
+ rsp.op_errstr = msg;
+ else
+ rsp.op_errstr = "";
- glusterfs_terminate_response_send (req, 0);
- cleanup_and_exit (SIGTERM);
- return 0;
+ ret = -1;
+ if (output) {
+ ret = dict_allocate_and_serialize(output, &rsp.output.output_val,
+ &rsp.output.output_len);
+ }
+ if (!ret)
+ free_ptr = _gf_true;
+
+ glusterfs_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ ret = 0;
+ if (free_ptr)
+ GF_FREE(rsp.output.output_val);
+ return ret;
}
int
-glusterfs_translator_info_response_send (rpcsvc_request_t *req, int ret,
- char *msg, dict_t *output)
+glusterfs_xlator_op_response_send(rpcsvc_request_t *req, int op_ret, char *msg,
+ dict_t *output)
{
- gd1_mgmt_brick_op_rsp rsp = {0,};
- gf_boolean_t free_ptr = _gf_false;
- GF_ASSERT (req);
+ gd1_mgmt_brick_op_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ gf_boolean_t free_ptr = _gf_false;
+ GF_ASSERT(req);
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = 0;
+ if (op_ret && msg && msg[0])
+ rsp.op_errstr = msg;
+ else
+ rsp.op_errstr = "";
- rsp.op_ret = ret;
- rsp.op_errno = 0;
- if (ret && msg && msg[0])
- rsp.op_errstr = msg;
- else
- rsp.op_errstr = "";
+ if (output) {
+ ret = dict_allocate_and_serialize(output, &rsp.output.output_val,
+ &rsp.output.output_len);
+ }
+ if (!ret)
+ free_ptr = _gf_true;
- ret = -1;
- if (output) {
- ret = dict_allocate_and_serialize (output,
- &rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
- }
- if (!ret)
- free_ptr = _gf_true;
+ ret = glusterfs_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
- ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
- if (free_ptr)
- GF_FREE (rsp.output.output_val);
- return ret;
+ if (free_ptr)
+ GF_FREE(rsp.output.output_val);
+
+ return ret;
}
int
-glusterfs_handle_translator_info_get_cont (gfd_vol_top_priv_t *priv)
+glusterfs_handle_translator_info_get(rpcsvc_request_t *req)
{
- int ret = -1;
- xlator_t *any = NULL;
- xlator_t *xlator = NULL;
- glusterfs_graph_t *active = NULL;
- glusterfs_ctx_t *ctx = NULL;
- char msg[2048] = {0,};
- dict_t *output = NULL;
- dict_t *dict = NULL;
-
- GF_ASSERT (priv);
-
- dict = dict_new ();
- ret = dict_unserialize (priv->xlator_req.input.input_val,
- priv->xlator_req.input.input_len, &dict);
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to unserialize dict");
- goto cont;
- }
- ret = dict_set_double (dict, "time", priv->time);
- if (ret)
- goto cont;
- ret = dict_set_double (dict, "throughput", priv->throughput);
- if (ret)
- goto cont;
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ gf1_cli_top_op top_op = 0;
+ xlator_t *any = NULL;
+ xlator_t *xlator = NULL;
+ glusterfs_graph_t *active = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ char msg[2048] = {
+ 0,
+ };
+ dict_t *output = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ dict = dict_new();
+ ret = dict_unserialize(xlator_req.input.input_val,
+ xlator_req.input.input_len, &dict);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, "top-op", (int32_t *)&top_op);
+ if (ret)
+ goto cont;
+ if (GF_CLI_TOP_READ_PERF == top_op) {
+ ret = glusterfs_volume_top_perf(xlator_req.name, dict, _gf_false);
+ } else if (GF_CLI_TOP_WRITE_PERF == top_op) {
+ ret = glusterfs_volume_top_perf(xlator_req.name, dict, _gf_true);
+ }
cont:
- ctx = glusterfs_ctx_get ();
- GF_ASSERT (ctx);
- active = ctx->active;
- any = active->first;
+ ctx = glusterfsd_ctx;
+ GF_ASSERT(ctx);
+ active = ctx->active;
+ if (active == NULL) {
+ gf_log(THIS->name, GF_LOG_ERROR, "ctx->active returned NULL");
+ ret = -1;
+ goto out;
+ }
+ any = active->first;
- xlator = xlator_search_by_name (any, priv->xlator_req.name);
+ xlator = get_xlator_by_name(any, xlator_req.name);
+ if (!xlator) {
+ ret = -1;
+ snprintf(msg, sizeof(msg), "xlator %s is not loaded", xlator_req.name);
+ goto out;
+ }
+
+ if (strcmp(xlator->type, "debug/io-stats")) {
+ xlator = get_xlator_by_type(xlator, "debug/io-stats");
if (!xlator) {
- snprintf (msg, sizeof (msg), "xlator %s is not loaded",
- priv->xlator_req.name);
- goto out;
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "xlator-type debug/io-stats is not loaded");
+ goto out;
}
+ }
- output = dict_new ();
- ret = xlator->notify (xlator, GF_EVENT_TRANSLATOR_INFO, dict, output);
+ output = dict_new();
+ ret = xlator->notify(xlator, GF_EVENT_TRANSLATOR_INFO, dict, output);
out:
- ret = glusterfs_translator_info_response_send (priv->req, ret,
- msg, output);
-
- if (priv->xlator_req.name)
- free (priv->xlator_req.name);
- if (priv->xlator_req.input.input_val)
- free (priv->xlator_req.input.input_val);
- if (dict)
- dict_unref (dict);
- if (output)
- dict_unref (output);
- GF_FREE (priv);
-
- return ret;
+ ret = glusterfs_translator_info_response_send(req, ret, msg, output);
+
+ free(xlator_req.name);
+ free(xlator_req.input.input_val);
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
+ if (output)
+ dict_unref(output);
+ if (dict)
+ dict_unref(dict);
+ return ret;
}
-int
-glusterfs_xlator_op_response_send (rpcsvc_request_t *req, int op_ret,
- char *msg, dict_t *output)
+static int
+glusterfs_volume_top_perf(const char *brick_path, dict_t *dict,
+ gf_boolean_t write_test)
{
- gd1_mgmt_brick_op_rsp rsp = {0,};
- int ret = -1;
- gf_boolean_t free_ptr = _gf_false;
- GF_ASSERT (req);
-
- rsp.op_ret = op_ret;
- rsp.op_errno = 0;
- if (op_ret && msg && msg[0])
- rsp.op_errstr = msg;
- else
- rsp.op_errstr = "";
-
- if (output) {
- ret = dict_allocate_and_serialize (output,
- &rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
- }
- if (!ret)
- free_ptr = _gf_true;
-
- ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
-
- if (free_ptr)
- GF_FREE (rsp.output.output_val);
+ int32_t fd = -1;
+ int32_t output_fd = -1;
+ char export_path[PATH_MAX] = {
+ 0,
+ };
+ char *buf = NULL;
+ int32_t iter = 0;
+ int32_t ret = -1;
+ uint64_t total_blks = 0;
+ uint32_t blk_size;
+ uint32_t blk_count;
+ double throughput = 0;
+ double time = 0;
+ struct timeval begin, end = {
+ 0,
+ };
+
+ GF_ASSERT(brick_path);
+
+ ret = dict_get_uint32(dict, "blk-size", &blk_size);
+ if (ret)
+ goto out;
+ ret = dict_get_uint32(dict, "blk-cnt", &blk_count);
+ if (ret)
+ goto out;
+
+ if (!(blk_size > 0) || !(blk_count > 0))
+ goto out;
+
+ buf = GF_CALLOC(1, blk_size * sizeof(*buf), gf_common_mt_char);
+ if (!buf) {
+ ret = -1;
+ gf_log("glusterd", GF_LOG_ERROR, "Could not allocate memory");
+ goto out;
+ }
+
+ snprintf(export_path, sizeof(export_path), "%s/%s", brick_path,
+ ".gf-tmp-stats-perf");
+ fd = open(export_path, O_CREAT | O_RDWR, S_IRWXU);
+ if (-1 == fd) {
+ ret = -1;
+ gf_log("glusterd", GF_LOG_ERROR, "Could not open tmp file");
+ goto out;
+ }
+
+ gettimeofday(&begin, NULL);
+ for (iter = 0; iter < blk_count; iter++) {
+ ret = sys_write(fd, buf, blk_size);
+ if (ret != blk_size) {
+ ret = -1;
+ goto out;
+ }
+ total_blks += ret;
+ }
+ gettimeofday(&end, NULL);
+ if (total_blks != ((uint64_t)blk_size * blk_count)) {
+ gf_log("glusterd", GF_LOG_WARNING, "Error in write");
+ ret = -1;
+ goto out;
+ }
+
+ time = gf_tvdiff(&begin, &end);
+ throughput = total_blks / time;
+ gf_log("glusterd", GF_LOG_INFO,
+ "Throughput %.2f Mbps time %.2f secs "
+ "bytes written %" PRId64,
+ throughput, time, total_blks);
+
+ /* if it's a write test, we are done. Otherwise, we continue to the read
+ * part */
+ if (write_test == _gf_true) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = sys_fsync(fd);
+ if (ret) {
+ gf_log("glusterd", GF_LOG_ERROR, "could not flush cache");
+ goto out;
+ }
+ ret = sys_lseek(fd, 0L, 0);
+ if (ret != 0) {
+ gf_log("glusterd", GF_LOG_ERROR, "could not seek back to start");
+ ret = -1;
+ goto out;
+ }
- return ret;
+ output_fd = open("/dev/null", O_RDWR);
+ if (-1 == output_fd) {
+ ret = -1;
+ gf_log("glusterd", GF_LOG_ERROR, "Could not open output file");
+ goto out;
+ }
+
+ total_blks = 0;
+
+ gettimeofday(&begin, NULL);
+ for (iter = 0; iter < blk_count; iter++) {
+ ret = sys_read(fd, buf, blk_size);
+ if (ret != blk_size) {
+ ret = -1;
+ goto out;
+ }
+ ret = sys_write(output_fd, buf, blk_size);
+ if (ret != blk_size) {
+ ret = -1;
+ goto out;
+ }
+ total_blks += ret;
+ }
+ gettimeofday(&end, NULL);
+ if (total_blks != ((uint64_t)blk_size * blk_count)) {
+ ret = -1;
+ gf_log("glusterd", GF_LOG_WARNING, "Error in read");
+ goto out;
+ }
+
+ time = gf_tvdiff(&begin, &end);
+ throughput = total_blks / time;
+ gf_log("glusterd", GF_LOG_INFO,
+ "Throughput %.2f Mbps time %.2f secs "
+ "bytes read %" PRId64,
+ throughput, time, total_blks);
+ ret = 0;
+out:
+ if (fd >= 0)
+ sys_close(fd);
+ if (output_fd >= 0)
+ sys_close(output_fd);
+ GF_FREE(buf);
+ sys_unlink(export_path);
+ if (ret == 0) {
+ ret = dict_set_double(dict, "time", time);
+ if (ret)
+ goto end;
+ ret = dict_set_double(dict, "throughput", throughput);
+ if (ret)
+ goto end;
+ }
+end:
+ return ret;
}
int
-glusterfs_handle_translator_info_get (rpcsvc_request_t *req)
+glusterfs_handle_translator_op(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- gd1_mgmt_brick_op_req xlator_req = {0,};
- dict_t *dict = NULL;
- xlator_t *this = NULL;
- gf1_cli_top_op top_op = 0;
- uint32_t blk_size = 0;
- uint32_t blk_count = 0;
- gfd_vol_top_priv_t *priv = NULL;
- pthread_t tid = -1;
-
- GF_ASSERT (req);
- this = THIS;
- GF_ASSERT (this);
-
- if (!xdr_to_generic (req->msg[0], &xlator_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
- dict = dict_new ();
-
- ret = dict_unserialize (xlator_req.input.input_val,
- xlator_req.input.input_len,
- &dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- }
+ int32_t ret = -1;
+ int32_t op_ret = 0;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ dict_t *input = NULL;
+ xlator_t *xlator = NULL;
+ xlator_t *any = NULL;
+ dict_t *output = NULL;
+ char key[32] = {0};
+ int len;
+ char *xname = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *this = NULL;
+ int i = 0;
+ int count = 0;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ active = ctx->active;
+ if (!active) {
+ ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, EAGAIN, glusterfsd_msg_38,
+ "brick-op_no.=%d", xlator_req.op, NULL);
+ goto out;
+ }
+ any = active->first;
+ input = dict_new();
+ ret = dict_unserialize(xlator_req.input.input_val,
+ xlator_req.input.input_len, &input);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ } else {
+ input->extra_stdfree = xlator_req.input.input_val;
+ }
+
+ ret = dict_get_int32(input, "count", &count);
+
+ output = dict_new();
+ if (!output) {
+ ret = -1;
+ goto out;
+ }
- priv = GF_MALLOC (sizeof (gfd_vol_top_priv_t), gfd_mt_vol_top_priv_t);
- if (!priv) {
- gf_log ("glusterd", GF_LOG_ERROR, "failed to allocate memory");
- goto out;
- }
- priv->xlator_req = xlator_req;
- priv->req = req;
-
- ret = dict_get_int32 (dict, "top-op", (int32_t *)&top_op);
- if ((!ret) && (GF_CLI_TOP_READ_PERF == top_op ||
- GF_CLI_TOP_WRITE_PERF == top_op)) {
- ret = dict_get_uint32 (dict, "blk-size", &blk_size);
- if (ret)
- goto cont;
- ret = dict_get_uint32 (dict, "blk-cnt", &blk_count);
- if (ret)
- goto cont;
- priv->blk_size = blk_size;
- priv->blk_count = blk_count;
- if (GF_CLI_TOP_READ_PERF == top_op) {
- ret = pthread_create (&tid, NULL,
- glusterfs_volume_top_read_perf,
- priv);
- } else if ( GF_CLI_TOP_WRITE_PERF == top_op) {
- ret = pthread_create (&tid, NULL,
- glusterfs_volume_top_write_perf,
- priv);
- }
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "Thread create failed");
- goto cont;
- }
- gf_log ("glusterd", GF_LOG_DEBUG, "Created new thread with "
- "tid %u", (unsigned int)tid);
- goto out;
+ for (i = 0; i < count; i++) {
+ len = snprintf(key, sizeof(key), "xl-%d", i);
+ ret = dict_get_strn(input, key, len, &xname);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Couldn't get "
+ "xlator %s ",
+ key);
+ goto out;
}
-cont:
- priv->throughput = 0;
- priv->time = 0;
- ret = glusterfs_handle_translator_info_get_cont (priv);
+ xlator = xlator_search_by_name(any, xname);
+ if (!xlator) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "xlator %s is not "
+ "loaded",
+ xname);
+ goto out;
+ }
+ }
+ for (i = 0; i < count; i++) {
+ len = snprintf(key, sizeof(key), "xl-%d", i);
+ ret = dict_get_strn(input, key, len, &xname);
+ xlator = xlator_search_by_name(any, xname);
+ XLATOR_NOTIFY(ret, xlator, GF_EVENT_TRANSLATOR_OP, input, output);
+ /* If notify fails for an xlator we need to capture it but
+ * continue with the loop. */
+ if (ret)
+ op_ret = -1;
+ }
+ ret = op_ret;
out:
- if (dict)
- dict_unref (dict);
- return ret;
+ glusterfs_xlator_op_response_send(req, ret, "", output);
+ if (input)
+ dict_unref(input);
+ if (output)
+ dict_unref(output);
+ free(xlator_req.name); // malloced by xdr
+
+ return 0;
}
-void *
-glusterfs_volume_top_write_perf (void *args)
+int
+glusterfs_handle_bitrot(rpcsvc_request_t *req)
{
- int32_t fd = -1;
- int32_t input_fd = -1;
- char export_path[PATH_MAX];
- char *buf = NULL;
- uint32_t blk_size = 0;
- uint32_t blk_count = 0;
- int32_t iter = 0;
- int32_t ret = -1;
- uint64_t total_blks = 0;
- struct timeval begin, end = {0,};
- double throughput = 0;
- double time = 0;
- gfd_vol_top_priv_t *priv = NULL;
-
- GF_ASSERT (args);
- priv = (gfd_vol_top_priv_t *)args;
-
- blk_size = priv->blk_size;
- blk_count = priv->blk_count;
-
- snprintf (export_path, sizeof (export_path), "%s/%s",
- priv->xlator_req.name, ".gf-tmp-stats-perf");
-
- fd = open (export_path, O_CREAT|O_RDWR, S_IRWXU);
- if (-1 == fd) {
- ret = -1;
- gf_log ("glusterd", GF_LOG_ERROR, "Could not open tmp file");
- goto out;
- }
-
- buf = GF_MALLOC (blk_size * sizeof(*buf), gf_common_mt_char);
- if (!buf) {
- ret = -1;
- goto out;
- }
-
- input_fd = open ("/dev/zero", O_RDONLY);
- if (-1 == input_fd) {
- ret = -1;
- gf_log ("glusterd",GF_LOG_ERROR, "Unable to open input file");
- goto out;
- }
-
- gettimeofday (&begin, NULL);
- for (iter = 0; iter < blk_count; iter++) {
- ret = read (input_fd, buf, blk_size);
- if (ret != blk_size) {
- ret = -1;
- goto out;
- }
- ret = write (fd, buf, blk_size);
- if (ret != blk_size) {
- ret = -1;
- goto out;
- }
- total_blks += ret;
- }
- ret = 0;
- if (total_blks != ((uint64_t)blk_size * blk_count)) {
- gf_log ("glusterd", GF_LOG_WARNING, "Error in write");
- ret = -1;
- goto out;
- }
-
- gettimeofday (&end, NULL);
- time = (end.tv_sec - begin.tv_sec) * 1e6
- + (end.tv_usec - begin.tv_usec);
- throughput = total_blks / time;
- gf_log ("glusterd", GF_LOG_INFO, "Throughput %.2f Mbps time %.2f secs "
- "bytes written %"PRId64, throughput, time, total_blks);
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ dict_t *input = NULL;
+ dict_t *output = NULL;
+ xlator_t *any = NULL;
+ xlator_t *this = NULL;
+ xlator_t *xlator = NULL;
+ char msg[2048] = {
+ 0,
+ };
+ char xname[1024] = {
+ 0,
+ };
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ char *scrub_opt = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+
+ if (ret < 0) {
+ /*failed to decode msg;*/
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT(ctx);
+
+ active = ctx->active;
+ if (!active) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ any = active->first;
+
+ input = dict_new();
+ if (!input)
+ goto out;
+
+ ret = dict_unserialize(xlator_req.input.input_val,
+ xlator_req.input.input_len, &input);
+
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, glusterfsd_msg_35, NULL);
+ goto out;
+ }
+
+ /* Send scrubber request to bitrot xlator */
+ snprintf(xname, sizeof(xname), "%s-bit-rot-0", xlator_req.name);
+ xlator = xlator_search_by_name(any, xname);
+ if (!xlator) {
+ snprintf(msg, sizeof(msg), "xlator %s is not loaded", xname);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, glusterfsd_msg_36, NULL);
+ goto out;
+ }
+
+ output = dict_new();
+ if (!output) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_get_str(input, "scrub-value", &scrub_opt);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Failed to get scrub value");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, glusterfsd_msg_37, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ if (!strncmp(scrub_opt, "status", SLEN("status"))) {
+ ret = xlator->notify(xlator, GF_EVENT_SCRUB_STATUS, input, output);
+ } else if (!strncmp(scrub_opt, "ondemand", SLEN("ondemand"))) {
+ ret = xlator->notify(xlator, GF_EVENT_SCRUB_ONDEMAND, input, output);
+ if (ret == -2) {
+ snprintf(msg, sizeof(msg),
+ "Scrubber is in "
+ "Pause/Inactive/Running state");
+ ret = -1;
+ goto out;
+ }
+ }
out:
- priv->throughput = throughput;
- priv->time = time;
-
- if (fd >= 0)
- close (fd);
- if (input_fd >= 0)
- close (input_fd);
- if (buf)
- GF_FREE (buf);
- unlink (export_path);
-
- (void)glusterfs_handle_translator_info_get_cont (priv);
-
- return NULL;
+ glusterfs_translator_info_response_send(req, ret, msg, output);
+
+ if (input)
+ dict_unref(input);
+ free(xlator_req.input.input_val); /*malloced by xdr*/
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
+ if (output)
+ dict_unref(output);
+ free(xlator_req.name);
+
+ return 0;
}
-void *
-glusterfs_volume_top_read_perf (void *args)
+int
+glusterfs_handle_attach(rpcsvc_request_t *req)
{
- int32_t fd = -1;
- int32_t input_fd = -1;
- int32_t output_fd = -1;
- char export_path[PATH_MAX];
- char *buf = NULL;
- uint32_t blk_size = 0;
- uint32_t blk_count = 0;
- int32_t iter = 0;
- int32_t ret = -1;
- uint64_t total_blks = 0;
- struct timeval begin, end = {0,};
- double throughput = 0;
- double time = 0;
- gfd_vol_top_priv_t *priv = NULL;
-
- GF_ASSERT (args);
- priv = (gfd_vol_top_priv_t *)args;
-
- blk_size = priv->blk_size;
- blk_count = priv->blk_count;
-
- snprintf (export_path, sizeof (export_path), "%s/%s",
- priv->xlator_req.name, ".gf-tmp-stats-perf");
- fd = open (export_path, O_CREAT|O_RDWR, S_IRWXU);
- if (-1 == fd) {
- ret = -1;
- gf_log ("glusterd", GF_LOG_ERROR, "Could not open tmp file");
- goto out;
- }
-
- buf = GF_MALLOC (blk_size * sizeof(*buf), gf_common_mt_char);
- if (!buf) {
- ret = -1;
- gf_log ("glusterd", GF_LOG_ERROR, "Could not allocate memory");
- goto out;
- }
-
- input_fd = open ("/dev/zero", O_RDONLY);
- if (-1 == input_fd) {
- ret = -1;
- gf_log ("glusterd", GF_LOG_ERROR, "Could not open input file");
- goto out;
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ xlator_t *nextchild = NULL;
+ glusterfs_graph_t *newgraph = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_t *srv_xl = NULL;
+ server_conf_t *srv_conf = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ctx = this->ctx;
+ if (!ctx->cmd_args.volfile_id) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "No volfile-id provided, erroring out");
+ return -1;
+ }
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+
+ if (ret < 0) {
+ /*failed to decode msg;*/
+ req->rpc_err = GARBAGE_ARGS;
+ return -1;
+ }
+ ret = 0;
+
+ if (!this->ctx->active) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "got attach for %s but no active graph", xlator_req.name);
+ goto post_unlock;
+ }
+
+ gf_log(this->name, GF_LOG_INFO, "got attach for %s", xlator_req.name);
+
+ LOCK(&ctx->volfile_lock);
+ {
+ ret = glusterfs_graph_attach(this->ctx->active, xlator_req.name,
+ &newgraph);
+ if (!ret && (newgraph && newgraph->first)) {
+ nextchild = newgraph->first;
+ ret = xlator_notify(nextchild, GF_EVENT_PARENT_UP, nextchild);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, LG_MSG_EVENT_NOTIFY_FAILED,
+ "event=ParentUp", "name=%s", nextchild->name, NULL);
+ goto unlock;
+ }
+ /* we need a protocol/server xlator as
+ * nextchild
+ */
+ srv_xl = this->ctx->active->first;
+ srv_conf = (server_conf_t *)srv_xl->private;
+ rpcsvc_autoscale_threads(this->ctx, srv_conf->rpc, 1);
}
-
- output_fd = open ("/dev/null", O_RDWR);
- if (-1 == output_fd) {
- ret = -1;
- gf_log ("glusterd", GF_LOG_ERROR, "Could not open output file");
- goto out;
- }
-
- for (iter = 0; iter < blk_count; iter++) {
- ret = read (input_fd, buf, blk_size);
- if (ret != blk_size) {
- ret = -1;
- goto out;
- }
- ret = write (fd, buf, blk_size);
- if (ret != blk_size) {
- ret = -1;
- goto out;
- }
- }
-
- ret = fsync (fd);
if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "could not flush cache");
- goto out;
- }
- ret = lseek (fd, 0L, 0);
- if (ret != 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "could not seek back to start");
- ret = -1;
- goto out;
- }
- gettimeofday (&begin, NULL);
- for (iter = 0; iter < blk_count; iter++) {
- ret = read (fd, buf, blk_size);
- if (ret != blk_size) {
- ret = -1;
- goto out;
- }
- ret = write (output_fd, buf, blk_size);
- if (ret != blk_size) {
- ret = -1;
- goto out;
- }
- total_blks += ret;
- }
- ret = 0;
- if (total_blks != ((uint64_t)blk_size * blk_count)) {
- ret = -1;
- gf_log ("glusterd", GF_LOG_WARNING, "Error in read");
- goto out;
+ ret = -1;
}
+ ret = glusterfs_translator_info_response_send(req, ret, NULL, NULL);
+ if (ret) {
+ /* Response sent back to glusterd, req is already destroyed. So
+ * resetting the ret to 0. Otherwise another response will be
+ * send from rpcsvc_check_and_reply_error. Which will lead to
+ * double resource leak.
+ */
+ ret = 0;
+ }
+ unlock:
+ UNLOCK(&ctx->volfile_lock);
+ }
+post_unlock:
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
+ free(xlator_req.input.input_val);
+ free(xlator_req.name);
+
+ return ret;
+}
- gettimeofday (&end, NULL);
- time = (end.tv_sec - begin.tv_sec) * 1e6
- + (end.tv_usec - begin.tv_usec);
- throughput = total_blks / time;
- gf_log ("glusterd", GF_LOG_INFO, "Throughput %.2f Mbps time %.2f secs "
- "bytes read %"PRId64, throughput, time, total_blks);
-
+int
+glusterfs_handle_svc_attach(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ dict_t *dict = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+
+ if (ret < 0) {
+ /*failed to decode msg;*/
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_smsg(THIS->name, GF_LOG_INFO, 0, glusterfsd_msg_41, "volfile-id=%s",
+ xlator_req.name, NULL);
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_unserialize(xlator_req.dict.dict_val, xlator_req.dict.dict_len,
+ &dict);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL, glusterfsd_msg_42, NULL);
+ goto out;
+ }
+ dict->extra_stdfree = xlator_req.dict.dict_val;
+
+ ret = 0;
+
+ ret = mgmt_process_volfile(xlator_req.input.input_val,
+ xlator_req.input.input_len, xlator_req.name,
+ dict);
out:
- priv->throughput = throughput;
- priv->time = time;
-
- if (fd >= 0)
- close (fd);
- if (input_fd >= 0)
- close (input_fd);
- if (output_fd >= 0)
- close (output_fd);
- if (buf)
- GF_FREE (buf);
- unlink (export_path);
-
- (void)glusterfs_handle_translator_info_get_cont (priv);
-
- return NULL;
+ if (dict)
+ dict_unref(dict);
+ if (xlator_req.input.input_val)
+ free(xlator_req.input.input_val);
+ if (xlator_req.name)
+ free(xlator_req.name);
+ glusterfs_translator_info_response_send(req, ret, NULL, NULL);
+ return 0;
}
int
-glusterfs_handle_translator_op (void *data)
+glusterfs_handle_svc_detach(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- gd1_mgmt_brick_op_req xlator_req = {0,};
- dict_t *input = NULL;
- xlator_t *xlator = NULL;
- xlator_t *any = NULL;
- dict_t *output = NULL;
- char key[2048] = {0};
- char *xname = NULL;
- glusterfs_ctx_t *ctx = NULL;
- glusterfs_graph_t *active = NULL;
- xlator_t *this = NULL;
- int i = 0;
- int count = 0;
- rpcsvc_request_t *req = data;
-
- GF_ASSERT (req);
- this = THIS;
- GF_ASSERT (this);
-
- if (!xdr_to_generic (req->msg[0], &xlator_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- ctx = glusterfs_ctx_get ();
- active = ctx->active;
- any = active->first;
- input = dict_new ();
- ret = dict_unserialize (xlator_req.input.input_val,
- xlator_req.input.input_len,
- &input);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- } else {
- input->extra_stdfree = xlator_req.input.input_val;
- }
-
- ret = dict_get_int32 (input, "count", &count);
-
- output = dict_new ();
- if (!output) {
- ret = -1;
- goto out;
- }
-
- for (i = 0; i < count; i++) {
- snprintf (key, sizeof (key), "xl-%d", i);
- ret = dict_get_str (input, key, &xname);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Couldn't get "
- "xlator %s ", key);
- goto out;
- }
- xlator = xlator_search_by_name (any, xname);
- if (!xlator) {
- gf_log (this->name, GF_LOG_ERROR, "xlator %s is not "
- "loaded", xname);
- goto out;
- }
- }
- for (i = 0; i < count; i++) {
- snprintf (key, sizeof (key), "xl-%d", i);
- ret = dict_get_str (input, key, &xname);
- xlator = xlator_search_by_name (any, xname);
- XLATOR_NOTIFY (xlator, GF_EVENT_TRANSLATOR_OP, input, output);
- if (ret)
- break;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ ssize_t ret;
+ gf_volfile_t *volfile_obj = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ gf_volfile_t *volfile_tmp = NULL;
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ return -1;
+ }
+ ctx = glusterfsd_ctx;
+
+ LOCK(&ctx->volfile_lock);
+ {
+ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list)
+ {
+ if (!strcmp(xlator_req.name, volfile_obj->vol_id)) {
+ volfile_tmp = volfile_obj;
+ break;
+ }
+ }
+
+ if (!volfile_tmp) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, glusterfsd_msg_041, "name=%s",
+ xlator_req.name, NULL);
+ /*
+ * Used to be -ENOENT. However, the caller asked us to
+ * make sure it's down and if it's already down that's
+ * good enough.
+ */
+ ret = 0;
+ goto out;
+ }
+ /* coverity[ORDER_REVERSAL] */
+ ret = glusterfs_process_svc_detach(ctx, volfile_tmp);
+ if (ret) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_smsg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL, glusterfsd_msg_042,
+ NULL);
+ goto out;
}
+ }
+ UNLOCK(&ctx->volfile_lock);
out:
- glusterfs_xlator_op_response_send (req, ret, "", output);
- if (input)
- dict_unref (input);
- if (output)
- dict_unref (output);
- if (xlator_req.name)
- free (xlator_req.name); //malloced by xdr
+ glusterfs_terminate_response_send(req, ret);
+ free(xlator_req.name);
+ xlator_req.name = NULL;
- return 0;
+ return 0;
}
-
int
-glusterfs_handle_defrag (rpcsvc_request_t *req)
+glusterfs_handle_dump_metrics(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- gd1_mgmt_brick_op_req xlator_req = {0,};
- dict_t *dict = NULL;
- xlator_t *xlator = NULL;
- xlator_t *any = NULL;
- dict_t *output = NULL;
- char msg[2048] = {0};
- glusterfs_ctx_t *ctx = NULL;
- glusterfs_graph_t *active = NULL;
- xlator_t *this = NULL;
-
- GF_ASSERT (req);
- this = THIS;
- GF_ASSERT (this);
-
- ctx = glusterfs_ctx_get ();
- GF_ASSERT (ctx);
-
- active = ctx->active;
- if (!active) {
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ char *filepath = NULL;
+ int fd = -1;
+ struct stat statbuf = {
+ 0,
+ };
+ char *msg = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+
+ if (ret < 0) {
+ /*failed to decode msg;*/
+ req->rpc_err = GARBAGE_ARGS;
+ return -1;
+ }
+ ret = -1;
+ ctx = this->ctx;
+
+ /* Infra for monitoring */
+ filepath = gf_monitor_metrics(ctx);
+ if (!filepath)
+ goto out;
+
+ fd = sys_open(filepath, O_RDONLY, 0);
+ if (fd < 0)
+ goto out;
+
+ if (sys_fstat(fd, &statbuf) < 0)
+ goto out;
+
+ if (statbuf.st_size > GF_UNIT_MB) {
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, LG_MSG_NO_MEMORY,
+ "reconsider logic (%" PRId64 ")", statbuf.st_size, NULL);
+ }
+ msg = GF_CALLOC(1, (statbuf.st_size + 1), gf_common_mt_char);
+ if (!msg)
+ goto out;
+
+ ret = sys_read(fd, msg, statbuf.st_size);
+ if (ret < 0)
+ goto out;
+
+ /* Send all the data in errstr, instead of dictionary for now */
+ glusterfs_translator_info_response_send(req, 0, msg, NULL);
+
+ ret = 0;
+out:
+ if (fd >= 0)
+ sys_close(fd);
- any = active->first;
- if (!xdr_to_generic (req->msg[0], &xlator_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
- dict = dict_new ();
- if (!dict)
- goto out;
+ GF_FREE(msg);
+ GF_FREE(filepath);
+ if (xlator_req.input.input_val)
+ free(xlator_req.input.input_val);
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
- ret = dict_unserialize (xlator_req.input.input_val,
- xlator_req.input.input_len,
- &dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- }
- xlator = xlator_search_by_name (any, xlator_req.name);
- if (!xlator) {
- snprintf (msg, sizeof (msg), "xlator %s is not loaded",
- xlator_req.name);
- goto out;
- }
+ return ret;
+}
- output = dict_new ();
- if (!output) {
- ret = -1;
- goto out;
- }
+int
+glusterfs_handle_defrag(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ xlator_t *xlator = NULL;
+ xlator_t *any = NULL;
+ dict_t *output = NULL;
+ char msg[2048] = {0};
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT(ctx);
+
+ active = ctx->active;
+ if (!active) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ any = active->first;
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize(xlator_req.input.input_val,
+ xlator_req.input.input_len, &dict);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+ xlator = xlator_search_by_name(any, xlator_req.name);
+ if (!xlator) {
+ snprintf(msg, sizeof(msg), "xlator %s is not loaded", xlator_req.name);
+ goto out;
+ }
+
+ output = dict_new();
+ if (!output) {
+ ret = -1;
+ goto out;
+ }
- ret = xlator->notify (xlator, GF_EVENT_VOLUME_DEFRAG, dict, output);
+ ret = xlator->notify(xlator, GF_EVENT_VOLUME_DEFRAG, dict, output);
- ret = glusterfs_translator_info_response_send (req, ret,
- msg, output);
+ ret = glusterfs_translator_info_response_send(req, ret, msg, output);
out:
- if (dict)
- dict_unref (dict);
- if (xlator_req.input.input_val)
- free (xlator_req.input.input_val); // malloced by xdr
- if (output)
- dict_unref (output);
- if (xlator_req.name)
- free (xlator_req.name); //malloced by xdr
-
- return ret;
-
+ if (dict)
+ dict_unref(dict);
+ free(xlator_req.input.input_val); // malloced by xdr
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
+ if (output)
+ dict_unref(output);
+ free(xlator_req.name); // malloced by xdr
+
+ return ret;
}
int
-glusterfs_handle_brick_status (rpcsvc_request_t *req)
+glusterfs_handle_brick_status(rpcsvc_request_t *req)
{
- int ret = -1;
- gd1_mgmt_brick_op_req brick_req = {0,};
- gd1_mgmt_brick_op_rsp rsp = {0,};
- glusterfs_ctx_t *ctx = NULL;
- glusterfs_graph_t *active = NULL;
- xlator_t *this = NULL;
- xlator_t *any = NULL;
- xlator_t *xlator = NULL;
- dict_t *dict = NULL;
- dict_t *output = NULL;
- char *volname = NULL;
- char *xname = NULL;
- uint32_t cmd = 0;
- char *msg = NULL;
-
- GF_ASSERT (req);
- this = THIS;
- GF_ASSERT (this);
-
- if (!xdr_to_generic (req->msg[0], &brick_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- dict = dict_new ();
- ret = dict_unserialize (brick_req.input.input_val,
- brick_req.input.input_len, &dict);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to unserialize "
- "req-buffer to dictionary");
- goto out;
- }
-
- ret = dict_get_uint32 (dict, "cmd", &cmd);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Couldn't get status op");
- goto out;
- }
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Couldn't get volname");
- goto out;
- }
-
- ctx = glusterfs_ctx_get ();
- GF_ASSERT (ctx);
- active = ctx->active;
- any = active->first;
-
- ret = gf_asprintf (&xname, "%s-server", volname);
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR, "Out of memory");
- goto out;
- }
-
- xlator = xlator_search_by_name (any, xname);
- if (!xlator) {
- gf_log (this->name, GF_LOG_ERROR, "xlator %s is not loaded",
- xname);
- ret = -1;
- goto out;
- }
-
-
- output = dict_new ();
- switch (cmd & GF_CLI_STATUS_MASK) {
- case GF_CLI_STATUS_MEM:
- ret = 0;
- gf_proc_dump_mem_info_to_dict (output);
- gf_proc_dump_mempool_info_to_dict (ctx, output);
- break;
-
- case GF_CLI_STATUS_CLIENTS:
- ret = xlator->dumpops->priv_to_dict (xlator, output);
- break;
-
- case GF_CLI_STATUS_INODE:
- ret = xlator->dumpops->inode_to_dict (xlator, output);
- break;
+ int ret = -1;
+ gd1_mgmt_brick_op_req brick_req = {
+ 0,
+ };
+ gd1_mgmt_brick_op_rsp rsp = {
+ 0,
+ };
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *this = NULL;
+ xlator_t *server_xl = NULL;
+ xlator_t *brick_xl = NULL;
+ dict_t *dict = NULL;
+ dict_t *output = NULL;
+ uint32_t cmd = 0;
+ char *msg = NULL;
+ char *brickname = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &brick_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ dict = dict_new();
+ ret = dict_unserialize(brick_req.input.input_val, brick_req.input.input_len,
+ &dict);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to unserialize "
+ "req-buffer to dictionary");
+ goto out;
+ }
+
+ ret = dict_get_uint32(dict, "cmd", &cmd);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Couldn't get status op");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "brick-name", &brickname);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Couldn't get brickname from"
+ " dict");
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ if (ctx == NULL) {
+ gf_log(this->name, GF_LOG_ERROR, "ctx returned NULL");
+ ret = -1;
+ goto out;
+ }
+ if (ctx->active == NULL) {
+ gf_log(this->name, GF_LOG_ERROR, "ctx->active returned NULL");
+ ret = -1;
+ goto out;
+ }
+ active = ctx->active;
+ if (ctx->active->first == NULL) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "ctx->active->first "
+ "returned NULL");
+ ret = -1;
+ goto out;
+ }
+ server_xl = active->first;
- case GF_CLI_STATUS_FD:
- ret = xlator->dumpops->fd_to_dict (xlator, output);
- break;
+ brick_xl = get_xlator_by_name(server_xl, brickname);
+ if (!brick_xl) {
+ gf_log(this->name, GF_LOG_ERROR, "xlator is not loaded");
+ ret = -1;
+ goto out;
+ }
+
+ output = dict_new();
+ switch (cmd & GF_CLI_STATUS_MASK) {
+ case GF_CLI_STATUS_MEM:
+ ret = 0;
+ gf_proc_dump_mem_info_to_dict(output);
+ gf_proc_dump_mempool_info_to_dict(ctx, output);
+ break;
+
+ case GF_CLI_STATUS_CLIENTS:
+ case GF_CLI_STATUS_CLIENT_LIST:
+ ret = server_xl->dumpops->priv_to_dict(server_xl, output,
+ brickname);
+ break;
+
+ case GF_CLI_STATUS_INODE:
+ ret = server_xl->dumpops->inode_to_dict(brick_xl, output);
+ break;
+
+ case GF_CLI_STATUS_FD:
+ ret = server_xl->dumpops->fd_to_dict(brick_xl, output);
+ break;
+
+ case GF_CLI_STATUS_CALLPOOL:
+ ret = 0;
+ gf_proc_dump_pending_frames_to_dict(ctx->pool, output);
+ break;
- case GF_CLI_STATUS_CALLPOOL:
- ret = 0;
- gf_proc_dump_pending_frames_to_dict (ctx->pool, output);
- break;
+ default:
+ ret = -1;
+ msg = gf_strdup("Unknown status op");
+ break;
+ }
+ rsp.op_ret = ret;
+ rsp.op_errno = 0;
+ if (ret && msg)
+ rsp.op_errstr = msg;
+ else
+ rsp.op_errstr = "";
- default:
- ret = -1;
- msg = gf_strdup ("Unknown status op");
- break;
- }
- rsp.op_ret = ret;
- rsp.op_errno = 0;
- if (ret && msg)
- rsp.op_errstr = msg;
- else
- rsp.op_errstr = "";
-
- ret = dict_allocate_and_serialize (output, &rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to serialize output dict to rsp");
- goto out;
- }
+ ret = dict_allocate_and_serialize(output, &rsp.output.output_val,
+ &rsp.output.output_len);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to serialize output dict to rsp");
+ goto out;
+ }
- ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ glusterfs_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ ret = 0;
out:
- if (dict)
- dict_unref (dict);
- if (output)
- dict_unref (output);
- if (brick_req.input.input_val)
- free (brick_req.input.input_val);
- if (xname)
- GF_FREE (xname);
- if (msg)
- GF_FREE (msg);
- if (rsp.output.output_val)
- GF_FREE (rsp.output.output_val);
-
- return ret;
-}
-
-static int
-glusterfs_command_done (int ret, call_frame_t *sync_frame, void *data)
-{
- STACK_DESTROY (sync_frame->root);
- return 0;
+ if (dict)
+ dict_unref(dict);
+ if (output)
+ dict_unref(output);
+ free(brick_req.input.input_val);
+ if (brick_req.dict.dict_val)
+ free(brick_req.dict.dict_val);
+ free(brick_req.name);
+ GF_FREE(msg);
+ GF_FREE(rsp.output.output_val);
+
+ return ret;
}
int
-glusterfs_handle_node_status (rpcsvc_request_t *req)
+glusterfs_handle_node_status(rpcsvc_request_t *req)
{
- int ret = -1;
- gd1_mgmt_brick_op_req node_req = {0,};
- gd1_mgmt_brick_op_rsp rsp = {0,};
- glusterfs_ctx_t *ctx = NULL;
- glusterfs_graph_t *active = NULL;
- xlator_t *any = NULL;
- xlator_t *node = NULL;
- xlator_t *subvol = NULL;
- dict_t *dict = NULL;
- dict_t *output = NULL;
- char *volname = NULL;
- char *node_name = NULL;
- char *subvol_name = NULL;
- uint32_t cmd = 0;
- char *msg = NULL;
-
- GF_ASSERT (req);
-
- if (!xdr_to_generic (req->msg[0], &node_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
+ int ret = -1;
+ gd1_mgmt_brick_op_req node_req = {
+ 0,
+ };
+ gd1_mgmt_brick_op_rsp rsp = {
+ 0,
+ };
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *any = NULL;
+ xlator_t *node = NULL;
+ xlator_t *subvol = NULL;
+ dict_t *dict = NULL;
+ dict_t *output = NULL;
+ char *volname = NULL;
+ char *node_name = NULL;
+ char *subvol_name = NULL;
+ uint32_t cmd = 0;
+ char *msg = NULL;
+
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &node_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ dict = dict_new();
+ ret = dict_unserialize(node_req.input.input_val, node_req.input.input_len,
+ &dict);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to unserialize "
+ "req buffer to dictionary");
+ goto out;
+ }
+
+ ret = dict_get_uint32(dict, "cmd", &cmd);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Couldn't get status op");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Couldn't get volname");
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT(ctx);
+ active = ctx->active;
+ if (active == NULL) {
+ gf_log(THIS->name, GF_LOG_ERROR, "ctx->active returned NULL");
+ ret = -1;
+ goto out;
+ }
+ any = active->first;
+
+ if ((cmd & GF_CLI_STATUS_SHD) != 0)
+ ret = gf_asprintf(&node_name, "%s", "glustershd");
+#ifdef BUILD_GNFS
+ else if ((cmd & GF_CLI_STATUS_NFS) != 0)
+ ret = gf_asprintf(&node_name, "%s", "nfs-server");
+#endif
+ else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0)
+ ret = gf_asprintf(&node_name, "%s", "quotad");
+ else if ((cmd & GF_CLI_STATUS_BITD) != 0)
+ ret = gf_asprintf(&node_name, "%s", "bitd");
+ else if ((cmd & GF_CLI_STATUS_SCRUB) != 0)
+ ret = gf_asprintf(&node_name, "%s", "scrubber");
+
+ else {
+ ret = -1;
+ goto out;
+ }
+ if (ret == -1) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to set node xlator name");
+ goto out;
+ }
+
+ node = xlator_search_by_name(any, node_name);
+ if (!node) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_ERROR, "%s xlator is not loaded", node_name);
+ goto out;
+ }
+
+ if ((cmd & GF_CLI_STATUS_NFS) != 0)
+ ret = gf_asprintf(&subvol_name, "%s", volname);
+ else if ((cmd & GF_CLI_STATUS_SHD) != 0)
+ ret = gf_asprintf(&subvol_name, "%s-replicate-0", volname);
+ else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0)
+ ret = gf_asprintf(&subvol_name, "%s", volname);
+ else if ((cmd & GF_CLI_STATUS_BITD) != 0)
+ ret = gf_asprintf(&subvol_name, "%s", volname);
+ else if ((cmd & GF_CLI_STATUS_SCRUB) != 0)
+ ret = gf_asprintf(&subvol_name, "%s", volname);
+ else {
+ ret = -1;
+ goto out;
+ }
+ if (ret == -1) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to set node xlator name");
+ goto out;
+ }
+
+ subvol = xlator_search_by_name(node, subvol_name);
+ if (!subvol) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_ERROR, "%s xlator is not loaded",
+ subvol_name);
+ goto out;
+ }
+
+ output = dict_new();
+ switch (cmd & GF_CLI_STATUS_MASK) {
+ case GF_CLI_STATUS_MEM:
+ ret = 0;
+ gf_proc_dump_mem_info_to_dict(output);
+ gf_proc_dump_mempool_info_to_dict(ctx, output);
+ break;
+
+ case GF_CLI_STATUS_CLIENTS:
+ // clients not available for SHD
+ if ((cmd & GF_CLI_STATUS_SHD) != 0)
+ break;
- dict = dict_new ();
- ret = dict_unserialize (node_req.input.input_val,
- node_req.input.input_len, &dict);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "Failed to unserialize "
- "req buffer to dictionary");
+ ret = dict_set_str(output, "volname", volname);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Error setting volname to dict");
goto out;
- }
+ }
+ ret = node->dumpops->priv_to_dict(node, output, NULL);
+ break;
- ret = dict_get_uint32 (dict, "cmd", &cmd);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get status op");
- goto out;
- }
+ case GF_CLI_STATUS_INODE:
+ ret = 0;
+ inode_table_dump_to_dict(subvol->itable, "conn0", output);
+ ret = dict_set_int32(output, "conncount", 1);
+ break;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get volname");
- goto out;
- }
+ case GF_CLI_STATUS_FD:
+ // cannot find fd-tables in nfs-server graph
+ // TODO: finish once found
+ break;
- ctx = glusterfs_ctx_get ();
- GF_ASSERT (ctx);
- active = ctx->active;
- any = active->first;
+ case GF_CLI_STATUS_CALLPOOL:
+ ret = 0;
+ gf_proc_dump_pending_frames_to_dict(ctx->pool, output);
+ break;
- if ((cmd & GF_CLI_STATUS_NFS) != 0)
- ret = gf_asprintf (&node_name, "%s", "nfs-server");
- else if ((cmd & GF_CLI_STATUS_SHD) != 0)
- ret = gf_asprintf (&node_name, "%s", "glustershd");
- else {
- ret = -1;
- goto out;
- }
- if (ret == -1) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to set node xlator name");
- goto out;
- }
-
- node = xlator_search_by_name (any, node_name);
- if (!node) {
- ret = -1;
- gf_log (THIS->name, GF_LOG_ERROR, "%s xlator is not loaded",
- node_name);
- goto out;
- }
+ default:
+ ret = -1;
+ msg = gf_strdup("Unknown status op");
+ gf_log(THIS->name, GF_LOG_ERROR, "%s", msg);
+ break;
+ }
+ rsp.op_ret = ret;
+ rsp.op_errno = 0;
+ if (ret && msg)
+ rsp.op_errstr = msg;
+ else
+ rsp.op_errstr = "";
- if ((cmd & GF_CLI_STATUS_NFS) != 0)
- ret = gf_asprintf (&subvol_name, "%s", volname);
- else if ((cmd & GF_CLI_STATUS_SHD) != 0)
- ret = gf_asprintf (&subvol_name, "%s-replicate-0", volname);
- else {
- ret = -1;
- goto out;
- }
- if (ret == -1) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to set node xlator name");
- goto out;
- }
+ ret = dict_allocate_and_serialize(output, &rsp.output.output_val,
+ &rsp.output.output_len);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to serialize output dict to rsp");
+ goto out;
+ }
- subvol = xlator_search_by_name (node, subvol_name);
- if (!subvol) {
- ret = -1;
- gf_log (THIS->name, GF_LOG_ERROR, "%s xlator is not loaded",
- subvol_name);
- goto out;
- }
-
- output = dict_new ();
- switch (cmd & GF_CLI_STATUS_MASK) {
- case GF_CLI_STATUS_MEM:
- ret = 0;
- gf_proc_dump_mem_info_to_dict (output);
- gf_proc_dump_mempool_info_to_dict (ctx, output);
- break;
-
- case GF_CLI_STATUS_CLIENTS:
- // clients not availbale for SHD
- if ((cmd & GF_CLI_STATUS_SHD) != 0)
- break;
-
- ret = dict_set_str (output, "volname", volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Error setting volname to dict");
- goto out;
- }
- ret = node->dumpops->priv_to_dict (node, output);
- break;
-
- case GF_CLI_STATUS_INODE:
- ret = 0;
- inode_table_dump_to_dict (subvol->itable, "conn0",
- output);
- ret = dict_set_int32 (output, "conncount", 1);
- break;
-
- case GF_CLI_STATUS_FD:
- // cannot find fd-tables in nfs-server graph
- // TODO: finish once found
- break;
-
- case GF_CLI_STATUS_CALLPOOL:
- ret = 0;
- gf_proc_dump_pending_frames_to_dict (ctx->pool, output);
- break;
-
- default:
- ret = -1;
- msg = gf_strdup ("Unknown status op");
- gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
- break;
- }
- rsp.op_ret = ret;
- rsp.op_errno = 0;
- if (ret && msg)
- rsp.op_errstr = msg;
- else
- rsp.op_errstr = "";
-
- ret = dict_allocate_and_serialize (output, &rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to serialize output dict to rsp");
- goto out;
- }
-
- ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ glusterfs_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ ret = 0;
out:
- if (dict)
- dict_unref (dict);
- if (node_req.input.input_val)
- free (node_req.input.input_val);
- if (msg)
- GF_FREE (msg);
- if (rsp.output.output_val)
- GF_FREE (rsp.output.output_val);
- if (node_name)
- GF_FREE (node_name);
- if (subvol_name)
- GF_FREE (subvol_name);
-
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (dict)
+ dict_unref(dict);
+ free(node_req.input.input_val);
+ if (node_req.dict.dict_val)
+ free(node_req.dict.dict_val);
+ GF_FREE(msg);
+ GF_FREE(rsp.output.output_val);
+ GF_FREE(node_name);
+ GF_FREE(subvol_name);
+
+ gf_log(THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-glusterfs_handle_nfs_profile (rpcsvc_request_t *req)
+glusterfs_handle_nfs_profile(rpcsvc_request_t *req)
{
- int ret = -1;
- gd1_mgmt_brick_op_req nfs_req = {0,};
- gd1_mgmt_brick_op_rsp rsp = {0,};
- dict_t *dict = NULL;
- glusterfs_ctx_t *ctx = NULL;
- glusterfs_graph_t *active = NULL;
- xlator_t *any = NULL;
- xlator_t *nfs = NULL;
- xlator_t *subvol = NULL;
- char *volname = NULL;
- dict_t *output = NULL;
-
- GF_ASSERT (req);
-
- if (!xdr_to_generic (req->msg[0], &nfs_req,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req)) {
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
+ int ret = -1;
+ gd1_mgmt_brick_op_req nfs_req = {
+ 0,
+ };
+ gd1_mgmt_brick_op_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *any = NULL;
+ xlator_t *nfs = NULL;
+ xlator_t *subvol = NULL;
+ char *volname = NULL;
+ dict_t *output = NULL;
+
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &nfs_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ dict = dict_new();
+ ret = dict_unserialize(nfs_req.input.input_val, nfs_req.input.input_len,
+ &dict);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to "
+ "unserialize req-buffer to dict");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Couldn't get volname");
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT(ctx);
+
+ active = ctx->active;
+ if (active == NULL) {
+ gf_log(THIS->name, GF_LOG_ERROR, "ctx->active returned NULL");
+ ret = -1;
+ goto out;
+ }
+ any = active->first;
+
+ // is this needed?
+ // are problems possible by searching for subvol directly from "any"?
+ nfs = xlator_search_by_name(any, "nfs-server");
+ if (!nfs) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "xlator nfs-server is "
+ "not loaded");
+ goto out;
+ }
+
+ subvol = xlator_search_by_name(nfs, volname);
+ if (!subvol) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_ERROR, "xlator %s is no loaded", volname);
+ goto out;
+ }
- dict = dict_new ();
- ret = dict_unserialize (nfs_req.input.input_val,
- nfs_req.input.input_len, &dict);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "Failed to "
- "unserialize req-buffer to dict");
- goto out;
- }
+ output = dict_new();
+ ret = subvol->notify(subvol, GF_EVENT_TRANSLATOR_INFO, dict, output);
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get volname");
- goto out;
- }
+ rsp.op_ret = ret;
+ rsp.op_errno = 0;
+ rsp.op_errstr = "";
- ctx = glusterfs_ctx_get ();
- GF_ASSERT (ctx);
+ ret = dict_allocate_and_serialize(output, &rsp.output.output_val,
+ &rsp.output.output_len);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to serialize output dict to rsp");
+ goto out;
+ }
- active = ctx->active;
- any = active->first;
+ glusterfs_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ ret = 0;
- // is this needed?
- // are problems possible by searching for subvol directly from "any"?
- nfs = xlator_search_by_name (any, "nfs-server");
- if (!nfs) {
- ret = -1;
- gf_log (THIS->name, GF_LOG_ERROR, "xlator nfs-server is "
- "not loaded");
- goto out;
- }
+out:
+ free(nfs_req.input.input_val);
+ if (nfs_req.dict.dict_val)
+ free(nfs_req.dict.dict_val);
+ if (dict)
+ dict_unref(dict);
+ if (output)
+ dict_unref(output);
+ GF_FREE(rsp.output.output_val);
+
+ gf_log(THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
+}
- subvol = xlator_search_by_name (nfs, volname);
- if (!subvol) {
- ret = -1;
- gf_log (THIS->name, GF_LOG_ERROR, "xlator %s is no loaded",
- volname);
- goto out;
- }
+int
+glusterfs_handle_volume_barrier_op(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_brick_op_req xlator_req = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ xlator_t *xlator = NULL;
+ xlator_t *any = NULL;
+ dict_t *output = NULL;
+ char msg[2048] = {0};
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT(ctx);
+
+ active = ctx->active;
+ if (!active) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ any = active->first;
+ ret = xdr_to_generic(req->msg[0], &xlator_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize(xlator_req.input.input_val,
+ xlator_req.input.input_len, &dict);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+ xlator = xlator_search_by_name(any, xlator_req.name);
+ if (!xlator) {
+ snprintf(msg, sizeof(msg), "xlator %s is not loaded", xlator_req.name);
+ goto out;
+ }
+
+ output = dict_new();
+ if (!output) {
+ ret = -1;
+ goto out;
+ }
- output = dict_new ();
- ret = subvol->notify (subvol, GF_EVENT_TRANSLATOR_INFO, dict, output);
+ ret = xlator->notify(xlator, GF_EVENT_VOLUME_BARRIER_OP, dict, output);
- rsp.op_ret = ret;
- rsp.op_errno = 0;
- rsp.op_errstr = "";
+ ret = glusterfs_translator_info_response_send(req, ret, msg, output);
+out:
+ if (dict)
+ dict_unref(dict);
+ free(xlator_req.input.input_val); // malloced by xdr
+ if (xlator_req.dict.dict_val)
+ free(xlator_req.dict.dict_val);
+ if (output)
+ dict_unref(output);
+ free(xlator_req.name); // malloced by xdr
+
+ return ret;
+}
- ret = dict_allocate_and_serialize (output, &rsp.output.output_val,
- (size_t *)&rsp.output.output_len);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to serialize output dict to rsp");
- goto out;
+int
+glusterfs_handle_barrier(rpcsvc_request_t *req)
+{
+ int ret = -1;
+ gd1_mgmt_brick_op_req brick_req = {
+ 0,
+ };
+ gd1_mgmt_brick_op_rsp brick_rsp = {
+ 0,
+ };
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *top = NULL;
+ xlator_t *xlator = NULL;
+ xlator_t *old_THIS = NULL;
+ dict_t *dict = NULL;
+ gf_boolean_t barrier = _gf_true;
+ xlator_list_t *trav;
+
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &brick_req,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ ctx = glusterfsd_ctx;
+ GF_ASSERT(ctx);
+ active = ctx->active;
+ if (active == NULL) {
+ gf_log(THIS->name, GF_LOG_ERROR, "ctx->active returned NULL");
+ ret = -1;
+ goto out;
+ }
+ top = active->first;
+
+ for (trav = top->children; trav; trav = trav->next) {
+ if (strcmp(trav->xlator->name, brick_req.name) == 0) {
+ break;
}
+ }
+ if (!trav) {
+ ret = -1;
+ goto out;
+ }
+ top = trav->xlator;
- ret = glusterfs_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(brick_req.input.input_val, brick_req.input.input_len,
+ &dict);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to unserialize "
+ "request dictionary");
+ goto out;
+ }
+
+ brick_rsp.op_ret = 0;
+ brick_rsp.op_errstr = ""; // initing to prevent serilaztion failures
+ old_THIS = THIS;
+
+ /* Send barrier request to the barrier xlator */
+ xlator = get_xlator_by_type(top, "features/barrier");
+ if (!xlator) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_ERROR, "%s xlator is not loaded",
+ "features/barrier");
+ goto out;
+ }
+
+ THIS = xlator;
+ // TODO: Extend this to accept return of errnos
+ ret = xlator->notify(xlator, GF_EVENT_TRANSLATOR_OP, dict);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "barrier notify failed");
+ brick_rsp.op_ret = ret;
+ brick_rsp.op_errstr = gf_strdup(
+ "Failed to reconfigure "
+ "barrier.");
+ /* This is to invoke changelog-barrier disable if barrier
+ * disable fails and don't invoke if barrier enable fails.
+ */
+ barrier = dict_get_str_boolean(dict, "barrier", _gf_true);
+ if (barrier)
+ goto submit_reply;
+ }
+
+ /* Reset THIS so that we have it correct in case of an error below
+ */
+ THIS = old_THIS;
+
+ /* Send barrier request to changelog as well */
+ xlator = get_xlator_by_type(top, "features/changelog");
+ if (!xlator) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_ERROR, "%s xlator is not loaded",
+ "features/changelog");
+ goto out;
+ }
+
+ THIS = xlator;
+ ret = xlator->notify(xlator, GF_EVENT_TRANSLATOR_OP, dict);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR, "changelog notify failed");
+ brick_rsp.op_ret = ret;
+ brick_rsp.op_errstr = gf_strdup("changelog notify failed");
+ goto submit_reply;
+ }
+
+submit_reply:
+ THIS = old_THIS;
+
+ ret = glusterfs_submit_reply(req, &brick_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
out:
- if (nfs_req.input.input_val)
- free (nfs_req.input.input_val);
- if (dict)
- dict_unref (dict);
- if (output)
- dict_unref (output);
- if (rsp.output.output_val)
- GF_FREE (rsp.output.output_val);
-
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (dict)
+ dict_unref(dict);
+ free(brick_req.input.input_val);
+ if (brick_req.dict.dict_val)
+ free(brick_req.dict.dict_val);
+ gf_log(THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ return ret;
}
int
-glusterfs_handle_rpc_msg (rpcsvc_request_t *req)
+glusterfs_handle_rpc_msg(rpcsvc_request_t *req)
{
- int ret = -1;
- xlator_t *this = THIS;
- call_frame_t *frame = NULL;
-
- GF_ASSERT (this);
- switch (req->procnum) {
- case GLUSTERD_BRICK_TERMINATE:
- ret = glusterfs_handle_terminate (req);
- break;
- case GLUSTERD_BRICK_XLATOR_INFO:
- ret = glusterfs_handle_translator_info_get (req);
- break;
- case GLUSTERD_BRICK_XLATOR_OP:
- frame = create_frame (this, this->ctx->pool);
- if (!frame)
- goto out;
- ret = synctask_new (this->ctx->env,
- glusterfs_handle_translator_op,
- glusterfs_command_done, frame, req);
- break;
- case GLUSTERD_BRICK_STATUS:
- ret = glusterfs_handle_brick_status (req);
- break;
- case GLUSTERD_BRICK_XLATOR_DEFRAG:
- ret = glusterfs_handle_defrag (req);
- break;
- case GLUSTERD_NODE_PROFILE:
- ret = glusterfs_handle_nfs_profile (req);
- break;
- case GLUSTERD_NODE_STATUS:
- ret = glusterfs_handle_node_status (req);
- default:
- break;
- }
-out:
- return ret;
+ int ret = -1;
+ /* for now, nothing */
+ return ret;
}
-rpcclnt_cb_actor_t gluster_cbk_actors[] = {
- [GF_CBK_FETCHSPEC] = {"FETCHSPEC", GF_CBK_FETCHSPEC, mgmt_cbk_spec },
- [GF_CBK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_CBK_EVENT_NOTIFY,
- mgmt_cbk_event},
+static rpcclnt_cb_actor_t mgmt_cbk_actors[GF_CBK_MAXVALUE] = {
+ [GF_CBK_FETCHSPEC] = {"FETCHSPEC", mgmt_cbk_spec, GF_CBK_FETCHSPEC},
+ [GF_CBK_EVENT_NOTIFY] = {"EVENTNOTIFY", mgmt_cbk_event,
+ GF_CBK_EVENT_NOTIFY},
+ [GF_CBK_STATEDUMP] = {"STATEDUMP", mgmt_cbk_event, GF_CBK_STATEDUMP},
};
-
-struct rpcclnt_cb_program mgmt_cbk_prog = {
- .progname = "GlusterFS Callback",
- .prognum = GLUSTER_CBK_PROGRAM,
- .progver = GLUSTER_CBK_VERSION,
- .actors = gluster_cbk_actors,
- .numactors = GF_CBK_MAXVALUE,
+static struct rpcclnt_cb_program mgmt_cbk_prog = {
+ .progname = "GlusterFS Callback",
+ .prognum = GLUSTER_CBK_PROGRAM,
+ .progver = GLUSTER_CBK_VERSION,
+ .actors = mgmt_cbk_actors,
+ .numactors = GF_CBK_MAXVALUE,
};
-char *clnt_pmap_procs[GF_PMAP_MAXVALUE] = {
- [GF_PMAP_NULL] = "NULL",
- [GF_PMAP_PORTBYBRICK] = "PORTBYBRICK",
- [GF_PMAP_BRICKBYPORT] = "BRICKBYPORT",
- [GF_PMAP_SIGNIN] = "SIGNIN",
- [GF_PMAP_SIGNOUT] = "SIGNOUT",
- [GF_PMAP_SIGNUP] = "SIGNUP",
+static char *clnt_pmap_procs[GF_PMAP_MAXVALUE] = {
+ [GF_PMAP_NULL] = "NULL",
+ [GF_PMAP_PORTBYBRICK] = "PORTBYBRICK",
+ [GF_PMAP_BRICKBYPORT] = "BRICKBYPORT",
+ [GF_PMAP_SIGNIN] = "SIGNIN",
+ [GF_PMAP_SIGNOUT] = "SIGNOUT",
+ [GF_PMAP_SIGNUP] = "SIGNUP", /* DEPRECATED - DON'T USE! */
};
-
-rpc_clnt_prog_t clnt_pmap_prog = {
- .progname = "Gluster Portmap",
- .prognum = GLUSTER_PMAP_PROGRAM,
- .progver = GLUSTER_PMAP_VERSION,
- .procnames = clnt_pmap_procs,
+static rpc_clnt_prog_t clnt_pmap_prog = {
+ .progname = "Gluster Portmap",
+ .prognum = GLUSTER_PMAP_PROGRAM,
+ .progver = GLUSTER_PMAP_VERSION,
+ .procnames = clnt_pmap_procs,
};
-char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
- [GF_HNDSK_NULL] = "NULL",
- [GF_HNDSK_SETVOLUME] = "SETVOLUME",
- [GF_HNDSK_GETSPEC] = "GETSPEC",
- [GF_HNDSK_PING] = "PING",
- [GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY",
+static char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_NULL] = "NULL",
+ [GF_HNDSK_SETVOLUME] = "SETVOLUME",
+ [GF_HNDSK_GETSPEC] = "GETSPEC",
+ [GF_HNDSK_PING] = "PING",
+ [GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY",
};
-rpc_clnt_prog_t clnt_handshake_prog = {
- .progname = "GlusterFS Handshake",
- .prognum = GLUSTER_HNDSK_PROGRAM,
- .progver = GLUSTER_HNDSK_VERSION,
- .procnames = clnt_handshake_procs,
+static rpc_clnt_prog_t clnt_handshake_prog = {
+ .progname = "GlusterFS Handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .procnames = clnt_handshake_procs,
};
-rpcsvc_actor_t glusterfs_actors[] = {
- [GLUSTERD_BRICK_NULL] = { "NULL", GLUSTERD_BRICK_NULL, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_BRICK_TERMINATE] = { "TERMINATE", GLUSTERD_BRICK_TERMINATE, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_BRICK_XLATOR_INFO] = { "TRANSLATOR INFO", GLUSTERD_BRICK_XLATOR_INFO, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_BRICK_XLATOR_OP] = { "TRANSLATOR OP", GLUSTERD_BRICK_XLATOR_OP, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_BRICK_STATUS] = {"STATUS", GLUSTERD_BRICK_STATUS, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_BRICK_XLATOR_DEFRAG] = { "TRANSLATOR DEFRAG", GLUSTERD_BRICK_XLATOR_DEFRAG, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_NODE_PROFILE] = {"NFS PROFILE", GLUSTERD_NODE_PROFILE, glusterfs_handle_rpc_msg, NULL, NULL, 0},
- [GLUSTERD_NODE_STATUS] = {"NFS STATUS", GLUSTERD_NODE_STATUS, glusterfs_handle_rpc_msg, NULL, NULL, 0}
+static rpcsvc_actor_t glusterfs_actors[GLUSTERD_BRICK_MAXVALUE] = {
+ [GLUSTERD_BRICK_NULL] = {"NULL", glusterfs_handle_rpc_msg, NULL,
+ GLUSTERD_BRICK_NULL, DRC_NA, 0},
+ [GLUSTERD_BRICK_TERMINATE] = {"TERMINATE", glusterfs_handle_terminate, NULL,
+ GLUSTERD_BRICK_TERMINATE, DRC_NA, 0},
+ [GLUSTERD_BRICK_XLATOR_INFO] = {"TRANSLATOR INFO",
+ glusterfs_handle_translator_info_get, NULL,
+ GLUSTERD_BRICK_XLATOR_INFO, DRC_NA, 0},
+ [GLUSTERD_BRICK_XLATOR_OP] = {"TRANSLATOR OP",
+ glusterfs_handle_translator_op, NULL,
+ GLUSTERD_BRICK_XLATOR_OP, DRC_NA, 0},
+ [GLUSTERD_BRICK_STATUS] = {"STATUS", glusterfs_handle_brick_status, NULL,
+ GLUSTERD_BRICK_STATUS, DRC_NA, 0},
+ [GLUSTERD_BRICK_XLATOR_DEFRAG] = {"TRANSLATOR DEFRAG",
+ glusterfs_handle_defrag, NULL,
+ GLUSTERD_BRICK_XLATOR_DEFRAG, DRC_NA, 0},
+ [GLUSTERD_NODE_PROFILE] = {"NFS PROFILE", glusterfs_handle_nfs_profile,
+ NULL, GLUSTERD_NODE_PROFILE, DRC_NA, 0},
+ [GLUSTERD_NODE_STATUS] = {"NFS STATUS", glusterfs_handle_node_status, NULL,
+ GLUSTERD_NODE_STATUS, DRC_NA, 0},
+ [GLUSTERD_VOLUME_BARRIER_OP] = {"VOLUME BARRIER OP",
+ glusterfs_handle_volume_barrier_op, NULL,
+ GLUSTERD_VOLUME_BARRIER_OP, DRC_NA, 0},
+ [GLUSTERD_BRICK_BARRIER] = {"BARRIER", glusterfs_handle_barrier, NULL,
+ GLUSTERD_BRICK_BARRIER, DRC_NA, 0},
+ [GLUSTERD_NODE_BITROT] = {"BITROT", glusterfs_handle_bitrot, NULL,
+ GLUSTERD_NODE_BITROT, DRC_NA, 0},
+ [GLUSTERD_BRICK_ATTACH] = {"ATTACH", glusterfs_handle_attach, NULL,
+ GLUSTERD_BRICK_ATTACH, DRC_NA, 0},
+
+ [GLUSTERD_DUMP_METRICS] = {"DUMP METRICS", glusterfs_handle_dump_metrics,
+ NULL, GLUSTERD_DUMP_METRICS, DRC_NA, 0},
+
+ [GLUSTERD_SVC_ATTACH] = {"ATTACH CLIENT", glusterfs_handle_svc_attach, NULL,
+ GLUSTERD_SVC_ATTACH, DRC_NA, 0},
+
+ [GLUSTERD_SVC_DETACH] = {"DETACH CLIENT", glusterfs_handle_svc_detach, NULL,
+ GLUSTERD_SVC_DETACH, DRC_NA, 0},
+
};
-struct rpcsvc_program glusterfs_mop_prog = {
- .progname = "Gluster Brick operations",
- .prognum = GD_BRICK_PROGRAM,
- .progver = GD_BRICK_VERSION,
- .actors = glusterfs_actors,
- .numactors = GLUSTERD_BRICK_MAXVALUE,
+static struct rpcsvc_program glusterfs_mop_prog = {
+ .progname = "Gluster Brick operations",
+ .prognum = GD_BRICK_PROGRAM,
+ .progver = GD_BRICK_VERSION,
+ .actors = glusterfs_actors,
+ .numactors = GLUSTERD_BRICK_MAXVALUE,
+ .synctask = _gf_true,
};
int
-mgmt_submit_request (void *req, call_frame_t *frame,
- glusterfs_ctx_t *ctx,
- rpc_clnt_prog_t *prog, int procnum,
- fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+mgmt_submit_request(void *req, call_frame_t *frame, glusterfs_ctx_t *ctx,
+ rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn,
+ xdrproc_t xdrproc)
{
- int ret = -1;
- int count = 0;
- struct iovec iov = {0, };
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- ssize_t xdr_size = 0;
-
- iobref = iobref_new ();
- if (!iobref) {
- goto out;
- }
-
- if (req) {
- xdr_size = xdr_sizeof (xdrproc, req);
-
- iobuf = iobuf_get2 (ctx->iobuf_pool, xdr_size);
- if (!iobuf) {
- goto out;
- };
-
- iobref_add (iobref, iobuf);
-
- iov.iov_base = iobuf->ptr;
- iov.iov_len = iobuf_pagesize (iobuf);
-
- /* Create the xdr payload */
- ret = xdr_serialize_generic (iov, req, xdrproc);
- if (ret == -1) {
- gf_log (THIS->name, GF_LOG_WARNING, "failed to create XDR payload");
- goto out;
- }
- iov.iov_len = ret;
- count = 1;
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {
+ 0,
+ };
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ ssize_t xdr_size = 0;
+
+ iobref = iobref_new();
+ if (!iobref) {
+ goto out;
+ }
+
+ if (req) {
+ xdr_size = xdr_sizeof(xdrproc, req);
+
+ iobuf = iobuf_get2(ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ iobref_add(iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize(iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic(iov, req, xdrproc);
+ if (ret == -1) {
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to create XDR payload");
+ goto out;
}
+ iov.iov_len = ret;
+ count = 1;
+ }
- /* Send the msg */
- ret = rpc_clnt_submit (ctx->mgmt, prog, procnum, cbkfn,
- &iov, count,
- NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL);
+ /* Send the msg */
+ ret = rpc_clnt_submit(ctx->mgmt, prog, procnum, cbkfn, &iov, count, NULL, 0,
+ iobref, frame, NULL, 0, NULL, 0, NULL);
out:
- if (iobref)
- iobref_unref (iobref);
+ if (iobref)
+ iobref_unref(iobref);
- if (iobuf)
- iobuf_unref (iobuf);
- return ret;
+ if (iobuf)
+ iobuf_unref(iobuf);
+ return ret;
}
-
-/* XXX: move these into @ctx */
-static char oldvolfile[131072];
-static int oldvollen = 0;
-
-static int
-xlator_equal_rec (xlator_t *xl1, xlator_t *xl2)
+int
+mgmt_getspec_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- xlator_list_t *trav1 = NULL;
- xlator_list_t *trav2 = NULL;
- int ret = 0;
-
- if (xl1 == NULL || xl2 == NULL) {
- gf_log ("xlator", GF_LOG_DEBUG, "invalid argument");
- return -1;
- }
-
- trav1 = xl1->children;
- trav2 = xl2->children;
+ gf_getspec_rsp rsp = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0, locked = 0;
+ ssize_t size = 0;
+ FILE *tmpfp = NULL;
+ char *volfile_id = NULL;
+ gf_volfile_t *volfile_obj = NULL;
+ gf_volfile_t *volfile_tmp = NULL;
+ char sha256_hash[SHA256_DIGEST_LENGTH] = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ char *servers_list = NULL;
+ int tmp_fd = -1;
+ char template[] = "/tmp/glfs.volfile.XXXXXX";
+
+ frame = myframe;
+ ctx = frame->this->ctx;
+
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ goto out;
+ }
- while (trav1 && trav2) {
- ret = xlator_equal_rec (trav1->xlator, trav2->xlator);
- if (ret) {
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "xlators children not equal");
- goto out;
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "XDR decoding error");
+ ret = -1;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "failed to get the 'volume file' from server");
+ ret = rsp.op_errno;
+ goto out;
+ }
+
+ if (!rsp.xdata.xdata_len) {
+ goto volfile;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.xdata.xdata_val, rsp.xdata.xdata_len, &dict);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "failed to unserialize xdata to dictionary");
+ goto out;
+ }
+ dict->extra_stdfree = rsp.xdata.xdata_val;
+
+ ret = dict_get_str(dict, "servers-list", &servers_list);
+ if (ret) {
+ /* Server list is set by glusterd at the time of getspec */
+ ret = dict_get_str(dict, GLUSTERD_BRICK_SERVERS, &servers_list);
+ if (ret)
+ goto volfile;
+ }
+
+ gf_log(frame->this->name, GF_LOG_INFO,
+ "Received list of available volfile servers: %s", servers_list);
+
+ ret = gf_process_getspec_servers_list(&ctx->cmd_args, servers_list);
+ if (ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "Failed (%s) to process servers list: %s", strerror(errno),
+ servers_list);
+ }
+
+volfile:
+ size = rsp.op_ret;
+ volfile_id = frame->local;
+ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) {
+ ret = mgmt_process_volfile((const char *)rsp.spec, size, volfile_id,
+ dict);
+ goto post_graph_mgmt;
+ }
+
+ ret = 0;
+ glusterfs_compute_sha256((const unsigned char *)rsp.spec, size,
+ sha256_hash);
+
+ LOCK(&ctx->volfile_lock);
+ {
+ locked = 1;
+
+ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list)
+ {
+ if (!strcmp(volfile_id, volfile_obj->vol_id)) {
+ if (!memcmp(sha256_hash, volfile_obj->volfile_checksum,
+ sizeof(volfile_obj->volfile_checksum))) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_log(frame->this->name, GF_LOG_INFO,
+ "No change in volfile,"
+ "continuing");
+ goto post_unlock;
}
-
- trav1 = trav1->next;
- trav2 = trav2->next;
+ volfile_tmp = volfile_obj;
+ break;
+ }
}
- if (trav1 || trav2) {
- ret = -1;
- goto out;
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode */
+ tmp_fd = mkstemp(template);
+ if (-1 == tmp_fd) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_smsg(frame->this->name, GF_LOG_ERROR, 0, glusterfsd_msg_39,
+ "create template=%s", template, NULL);
+ ret = -1;
+ goto post_unlock;
}
- if (strcmp (xl1->name, xl2->name)) {
- ret = -1;
- goto out;
+ /* Calling unlink so that when the file is closed or program
+ * terminates the temporary file is deleted.
+ */
+ ret = sys_unlink(template);
+ if (ret < 0) {
+ gf_smsg(frame->this->name, GF_LOG_INFO, 0, glusterfsd_msg_39,
+ "delete template=%s", template, NULL);
+ ret = 0;
}
-out :
- return ret;
-}
-
-static gf_boolean_t
-is_graph_topology_equal (glusterfs_graph_t *graph1,
- glusterfs_graph_t *graph2)
-{
- xlator_t *trav1 = NULL;
- xlator_t *trav2 = NULL;
- gf_boolean_t ret = _gf_true;
-
- trav1 = graph1->first;
- trav2 = graph2->first;
-
- ret = xlator_equal_rec (trav1, trav2);
- if (ret) {
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "graphs are not equal");
- ret = _gf_false;
- goto out;
+ tmpfp = fdopen(tmp_fd, "w+b");
+ if (!tmpfp) {
+ ret = -1;
+ goto out;
}
- ret = _gf_true;
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "graphs are equal");
-
-out:
- return ret;
-}
-
-/* Function has 3types of return value 0, -ve , 1
- * return 0 =======> reconfiguration of options has succeeded
- * return 1 =======> the graph has to be reconstructed and all the xlators should be inited
- * return -1(or -ve) =======> Some Internal Error occurred during the operation
- */
-static int
-glusterfs_volfile_reconfigure (FILE *newvolfile_fp)
-{
- glusterfs_graph_t *oldvolfile_graph = NULL;
- glusterfs_graph_t *newvolfile_graph = NULL;
- FILE *oldvolfile_fp = NULL;
- glusterfs_ctx_t *ctx = NULL;
-
- int ret = -1;
-
- oldvolfile_fp = tmpfile ();
- if (!oldvolfile_fp)
- goto out;
-
- if (!oldvollen) {
- ret = 1; // Has to call INIT for the whole graph
- goto out;
+ fwrite(rsp.spec, size, 1, tmpfp);
+ fflush(tmpfp);
+ if (ferror(tmpfp)) {
+ ret = -1;
+ goto out;
}
- fwrite (oldvolfile, oldvollen, 1, oldvolfile_fp);
- fflush (oldvolfile_fp);
+ /* Check if only options have changed. No need to reload the
+ * volfile if topology hasn't changed.
+ * glusterfs_volfile_reconfigure returns 3 possible return states
+ * return 0 =======> reconfiguration of options has succeeded
+ * return 1 =======> the graph has to be reconstructed and all
+ * the xlators should be inited return -1(or -ve) =======> Some Internal
+ * Error occurred during the operation
+ */
- oldvolfile_graph = glusterfs_graph_construct (oldvolfile_fp);
- if (!oldvolfile_graph) {
- goto out;
+ ret = glusterfs_volfile_reconfigure(tmpfp, ctx);
+ if (ret == 0) {
+ gf_log("glusterfsd-mgmt", GF_LOG_DEBUG,
+ "No need to re-load volfile, reconfigure done");
+ if (!volfile_tmp) {
+ ret = -1;
+ UNLOCK(&ctx->volfile_lock);
+ gf_log("mgmt", GF_LOG_ERROR,
+ "Graph reconfigure succeeded with out having "
+ "checksum.");
+ goto post_unlock;
+ }
+ memcpy(volfile_tmp->volfile_checksum, sha256_hash,
+ sizeof(volfile_tmp->volfile_checksum));
+ goto out;
}
- newvolfile_graph = glusterfs_graph_construct (newvolfile_fp);
- if (!newvolfile_graph) {
- goto out;
+ if (ret < 0) {
+ UNLOCK(&ctx->volfile_lock);
+ gf_log("glusterfsd-mgmt", GF_LOG_DEBUG, "Reconfigure failed !!");
+ goto post_unlock;
}
- if (!is_graph_topology_equal (oldvolfile_graph,
- newvolfile_graph)) {
+ ret = glusterfs_process_volfp(ctx, tmpfp);
+ /* tmpfp closed */
+ tmpfp = NULL;
+ tmp_fd = -1;
+ if (ret)
+ goto out;
- ret = 1;
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "Graph topology not equal(should call INIT)");
+ if (!volfile_tmp) {
+ volfile_tmp = GF_CALLOC(1, sizeof(gf_volfile_t),
+ gf_common_volfile_t);
+ if (!volfile_tmp) {
+ ret = -1;
goto out;
- }
-
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "Only options have changed in the new "
- "graph");
+ }
- ctx = glusterfs_ctx_get ();
-
- if (!ctx) {
- gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
- "glusterfs_ctx_get() returned NULL");
- goto out;
+ INIT_LIST_HEAD(&volfile_tmp->volfile_list);
+ volfile_tmp->graph = ctx->active;
+ list_add(&volfile_tmp->volfile_list, &ctx->volfile_list);
+ snprintf(volfile_tmp->vol_id, sizeof(volfile_tmp->vol_id), "%s",
+ volfile_id);
}
+ memcpy(volfile_tmp->volfile_checksum, sha256_hash,
+ sizeof(volfile_tmp->volfile_checksum));
+ }
+ UNLOCK(&ctx->volfile_lock);
- oldvolfile_graph = ctx->active;
-
- if (!oldvolfile_graph) {
- gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
- "glusterfs_ctx->active is NULL");
- goto out;
- }
+ locked = 0;
- /* */
- ret = glusterfs_graph_reconfigure (oldvolfile_graph,
- newvolfile_graph);
- if (ret) {
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "Could not reconfigure new options in old graph");
- goto out;
- }
+post_graph_mgmt:
+ if (!is_mgmt_rpc_reconnect) {
+ need_emancipate = 1;
+ glusterfs_mgmt_pmap_signin(ctx);
+ is_mgmt_rpc_reconnect = _gf_true;
+ }
- ret = 0;
out:
- if (oldvolfile_fp)
- fclose (oldvolfile_fp);
- return ret;
+ if (locked)
+ UNLOCK(&ctx->volfile_lock);
+post_unlock:
+ GF_FREE(frame->local);
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+ free(rsp.spec);
+
+ if (dict)
+ dict_unref(dict);
+
+ // Stop if server is running at an unsupported op-version
+ if (ENOTSUP == ret) {
+ gf_log("mgmt", GF_LOG_ERROR,
+ "Server is operating at an "
+ "op-version which is not supported");
+ cleanup_and_exit(0);
+ }
+
+ if (ret && ctx && !ctx->active) {
+ /* Do it only for the first time */
+ /* Failed to get the volume file, something wrong,
+ restart the process */
+ gf_log("mgmt", GF_LOG_ERROR, "failed to fetch volume file (key:%s)",
+ ctx->cmd_args.volfile_id);
+ cleanup_and_exit(0);
+ }
+
+ if (tmpfp)
+ fclose(tmpfp);
+ else if (tmp_fd != -1)
+ sys_close(tmp_fd);
+
+ return 0;
}
-int
-mgmt_getspec_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
+static int
+glusterfs_volfile_fetch_one(glusterfs_ctx_t *ctx, char *volfile_id)
{
- gf_getspec_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- glusterfs_ctx_t *ctx = NULL;
- int ret = 0;
- ssize_t size = 0;
- FILE *tmpfp = NULL;
-
- frame = myframe;
- ctx = frame->this->ctx;
-
- if (-1 == req->rpc_status) {
- ret = -1;
- goto out;
- }
-
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error");
- ret = -1;
- goto out;
- }
-
- if (-1 == rsp.op_ret) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "failed to get the 'volume file' from server");
- ret = -1;
- goto out;
- }
-
- ret = 0;
- size = rsp.op_ret;
-
- if (size == oldvollen && (memcmp (oldvolfile, rsp.spec, size) == 0)) {
- gf_log (frame->this->name, GF_LOG_INFO,
- "No change in volfile, continuing");
- goto out;
- }
-
- tmpfp = tmpfile ();
- if (!tmpfp) {
- ret = -1;
- goto out;
- }
-
- fwrite (rsp.spec, size, 1, tmpfp);
- fflush (tmpfp);
-
- /* Check if only options have changed. No need to reload the
- * volfile if topology hasn't changed.
- * glusterfs_volfile_reconfigure returns 3 possible return states
- * return 0 =======> reconfiguration of options has succeeded
- * return 1 =======> the graph has to be reconstructed and all the xlators should be inited
- * return -1(or -ve) =======> Some Internal Error occurred during the operation
- */
-
- ret = glusterfs_volfile_reconfigure (tmpfp);
- if (ret == 0) {
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG,
- "No need to re-load volfile, reconfigure done");
- oldvollen = size;
- memcpy (oldvolfile, rsp.spec, size);
- goto out;
- }
+ cmd_args_t *cmd_args = NULL;
+ gf_getspec_req req = {
+ 0,
+ };
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+
+ cmd_args = &ctx->cmd_args;
+ if (!volfile_id) {
+ volfile_id = ctx->cmd_args.volfile_id;
+ if (!volfile_id) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "No volfile-id provided, erroring out");
+ return -1;
+ }
+ }
+
+ frame = create_frame(THIS, ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ req.key = volfile_id;
+ req.flags = 0;
+ /*
+ * We are only storing one variable in local, hence using the same
+ * variable. If multiple local variable is required, create a struct.
+ */
+ frame->local = gf_strdup(volfile_id);
+ if (!frame->local) {
+ ret = -1;
+ goto out;
+ }
- if (ret < 0) {
- gf_log ("glusterfsd-mgmt", GF_LOG_DEBUG, "Reconfigure failed !!");
- goto out;
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ // Set the supported min and max op-versions, so glusterd can make a
+ // decision
+ ret = dict_set_int32(dict, "min-op-version", GD_OP_VERSION_MIN);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to set min-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, "max-op-version", GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to set max-op-version"
+ " in request dict");
+ goto out;
+ }
+
+ /* Ask for a list of volfile (glusterd2 only) servers */
+ if (GF_CLIENT_PROCESS == ctx->process_mode) {
+ req.flags = req.flags | GF_GETSPEC_FLAG_SERVERS_LIST;
+ }
+
+ if (cmd_args->brick_name) {
+ ret = dict_set_dynstr_with_alloc(dict, "brick_name",
+ cmd_args->brick_name);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "Failed to set brick_name in request dict");
+ goto out;
}
+ }
- ret = glusterfs_process_volfp (ctx, tmpfp);
- /* tmpfp closed */
- tmpfp = NULL;
- if (ret)
- goto out;
+ ret = dict_allocate_and_serialize(dict, &req.xdata.xdata_val,
+ &req.xdata.xdata_len);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Failed to serialize dictionary");
+ goto out;
+ }
- oldvollen = size;
- memcpy (oldvolfile, rsp.spec, size);
- if (!is_mgmt_rpc_reconnect) {
- glusterfs_mgmt_pmap_signin (ctx);
- is_mgmt_rpc_reconnect = 1;
- }
+ ret = mgmt_submit_request(&req, frame, ctx, &clnt_handshake_prog,
+ GF_HNDSK_GETSPEC, mgmt_getspec_cbk,
+ (xdrproc_t)xdr_gf_getspec_req);
out:
- STACK_DESTROY (frame->root);
-
- if (rsp.spec)
- free (rsp.spec);
-
- if (ret && ctx && !ctx->active) {
- /* Do it only for the first time */
- /* Failed to get the volume file, something wrong,
- restart the process */
- gf_log ("mgmt", GF_LOG_ERROR,
- "failed to fetch volume file (key:%s)",
- ctx->cmd_args.volfile_id);
- cleanup_and_exit (0);
- }
-
- if (tmpfp)
- fclose (tmpfp);
+ GF_FREE(req.xdata.xdata_val);
+ if (dict)
+ dict_unref(dict);
+ if (ret && frame) {
+ /* Free the frame->local fast, because we have not used memget
+ */
+ GF_FREE(frame->local);
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+ }
- return 0;
+ return ret;
}
-
int
-glusterfs_volfile_fetch (glusterfs_ctx_t *ctx)
+glusterfs_volfile_fetch(glusterfs_ctx_t *ctx)
{
- cmd_args_t *cmd_args = NULL;
- gf_getspec_req req = {0, };
- int ret = 0;
- call_frame_t *frame = NULL;
-
- cmd_args = &ctx->cmd_args;
-
- frame = create_frame (THIS, ctx->pool);
-
- req.key = cmd_args->volfile_id;
- req.flags = 0;
+ xlator_t *server_xl = NULL;
+ xlator_list_t *trav;
+ gf_volfile_t *volfile_obj = NULL;
+ int ret = 0;
+
+ LOCK(&ctx->volfile_lock);
+ {
+ if (ctx->active &&
+ mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name)) {
+ list_for_each_entry(volfile_obj, &ctx->volfile_list, volfile_list)
+ {
+ ret |= glusterfs_volfile_fetch_one(ctx, volfile_obj->vol_id);
+ }
+ UNLOCK(&ctx->volfile_lock);
+ return ret;
+ }
+
+ if (ctx->active) {
+ server_xl = ctx->active->first;
+ if (strcmp(server_xl->type, "protocol/server") != 0) {
+ server_xl = NULL;
+ }
+ }
+ if (!server_xl) {
+ /* Startup (ctx->active not set) or non-server. */
+ UNLOCK(&ctx->volfile_lock);
+ return glusterfs_volfile_fetch_one(ctx, ctx->cmd_args.volfile_id);
+ }
- ret = mgmt_submit_request (&req, frame, ctx, &clnt_handshake_prog,
- GF_HNDSK_GETSPEC, mgmt_getspec_cbk,
- (xdrproc_t)xdr_gf_getspec_req);
- return ret;
+ ret = 0;
+ for (trav = server_xl->children; trav; trav = trav->next) {
+ ret |= glusterfs_volfile_fetch_one(ctx, trav->xlator->volfile_id);
+ }
+ }
+ UNLOCK(&ctx->volfile_lock);
+ return ret;
}
int32_t
-mgmt_event_notify_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
+mgmt_event_notify_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gf_event_notify_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- glusterfs_ctx_t *ctx = NULL;
- int ret = 0;
+ gf_event_notify_rsp rsp = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ int ret = 0;
- frame = myframe;
- ctx = frame->this->ctx;
+ frame = myframe;
- if (-1 == req->rpc_status) {
- ret = -1;
- goto out;
- }
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ goto out;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_event_notify_rsp);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error");
- ret = -1;
- goto out;
- }
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_event_notify_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "XDR decoding error");
+ ret = -1;
+ goto out;
+ }
- if (-1 == rsp.op_ret) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "failed to get the rsp from server");
- ret = -1;
- goto out;
- }
+ if (-1 == rsp.op_ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "failed to get the rsp from server");
+ ret = -1;
+ goto out;
+ }
out:
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val); //malloced by xdr
- return ret;
-
+ free(rsp.dict.dict_val); // malloced by xdr
+ return ret;
}
int32_t
-glusterfs_rebalance_event_notify_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+glusterfs_rebalance_event_notify_cbk(struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
{
- gf_event_notify_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- glusterfs_ctx_t *ctx = NULL;
- int ret = 0;
+ gf_event_notify_rsp rsp = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ int ret = 0;
- frame = myframe;
- ctx = frame->this->ctx;
+ frame = myframe;
- if (-1 == req->rpc_status) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "failed to get the rsp from server");
- ret = -1;
- goto out;
- }
+ if (-1 == req->rpc_status) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "failed to get the rsp from server");
+ ret = -1;
+ goto out;
+ }
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_event_notify_rsp);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_ERROR, "XDR decoding error");
- ret = -1;
- goto out;
- }
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_event_notify_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "XDR decoding error");
+ ret = -1;
+ goto out;
+ }
- if (-1 == rsp.op_ret) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "Recieved error (%s) from server",
- strerror (rsp.op_errno));
- ret = -1;
- goto out;
- }
+ if (-1 == rsp.op_ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "Received error (%s) from server", strerror(rsp.op_errno));
+ ret = -1;
+ goto out;
+ }
out:
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val); //malloced by xdr
- return ret;
+ free(rsp.dict.dict_val); // malloced by xdr
+
+ if (frame) {
+ STACK_DESTROY(frame->root);
+ }
+ return ret;
}
int32_t
-glusterfs_rebalance_event_notify (dict_t *dict)
+glusterfs_rebalance_event_notify(dict_t *dict)
{
- glusterfs_ctx_t *ctx = NULL;
- gf_event_notify_req req = {0,};
- int32_t ret = -1;
- cmd_args_t *cmd_args = NULL;
- call_frame_t *frame = NULL;
-
- ctx = glusterfs_ctx_get ();
- cmd_args = &ctx->cmd_args;
+ glusterfs_ctx_t *ctx = NULL;
+ gf_event_notify_req req = {
+ 0,
+ };
+ int32_t ret = -1;
+ cmd_args_t *cmd_args = NULL;
+ call_frame_t *frame = NULL;
- frame = create_frame (THIS, ctx->pool);
+ ctx = glusterfsd_ctx;
+ cmd_args = &ctx->cmd_args;
- req.op = GF_EN_DEFRAG_STATUS;
+ frame = create_frame(THIS, ctx->pool);
- if (dict) {
- ret = dict_set_str (dict, "volname", cmd_args->volfile_id);
- if (ret)
- gf_log ("", GF_LOG_ERROR, "failed to set volname");
+ req.op = GF_EN_DEFRAG_STATUS;
- ret = dict_allocate_and_serialize (dict, &req.dict.dict_val,
- (size_t *)&req.dict.dict_len);
+ if (dict) {
+ ret = dict_set_str(dict, "volname", cmd_args->volfile_id);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "failed to set volname");
}
+ ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_log("", GF_LOG_ERROR, "failed to serialize dict");
+ }
+ }
- ret = mgmt_submit_request (&req, frame, ctx, &clnt_handshake_prog,
- GF_HNDSK_EVENT_NOTIFY,
- glusterfs_rebalance_event_notify_cbk,
- (xdrproc_t)xdr_gf_event_notify_req);
-
- if (req.dict.dict_val)
- GF_FREE (req.dict.dict_val);
+ ret = mgmt_submit_request(&req, frame, ctx, &clnt_handshake_prog,
+ GF_HNDSK_EVENT_NOTIFY,
+ glusterfs_rebalance_event_notify_cbk,
+ (xdrproc_t)xdr_gf_event_notify_req);
- STACK_DESTROY (frame->root);
- return ret;
+ GF_FREE(req.dict.dict_val);
+ return ret;
}
static int
-mgmt_rpc_notify (struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
- void *data)
+mgmt_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data)
{
- xlator_t *this = NULL;
- cmd_args_t *cmd_args = NULL;
- glusterfs_ctx_t *ctx = NULL;
- int ret = 0;
-
- this = mydata;
- ctx = this->ctx;
- cmd_args = &ctx->cmd_args;
- switch (event) {
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+ server_cmdline_t *server = NULL;
+ rpc_transport_t *rpc_trans = NULL;
+ int need_term = 0;
+ int emval = 0;
+ static int log_ctr1;
+ static int log_ctr2;
+ struct dnscache6 *dnscache = NULL;
+
+ this = mydata;
+ rpc_trans = rpc->conn.trans;
+ ctx = this->ctx;
+
+ switch (event) {
case RPC_CLNT_DISCONNECT:
+ if (rpc_trans->connect_failed) {
+ GF_LOG_OCCASIONALLY(log_ctr1, "glusterfsd-mgmt", GF_LOG_ERROR,
+ "failed to connect to remote-"
+ "host: %s",
+ ctx->cmd_args.volfile_server);
+ } else {
+ GF_LOG_OCCASIONALLY(log_ctr1, "glusterfsd-mgmt", GF_LOG_INFO,
+ "disconnected from remote-"
+ "host: %s",
+ ctx->cmd_args.volfile_server);
+ }
+
+ if (!rpc->disabled) {
+ /*
+ * Check if dnscache is exhausted for current server
+ * and continue until cache is exhausted
+ */
+ dnscache = rpc_trans->dnscache;
+ if (dnscache && dnscache->next) {
+ break;
+ }
+ }
+ server = ctx->cmd_args.curr_server;
+ if (server->list.next == &ctx->cmd_args.volfile_servers) {
+ if (!ctx->active) {
+ need_term = 1;
+ }
+ emval = ENOTCONN;
+ GF_LOG_OCCASIONALLY(log_ctr2, "glusterfsd-mgmt", GF_LOG_INFO,
+ "Exhausted all volfile servers");
+ break;
+ }
+ server = list_entry(server->list.next, typeof(*server), list);
+ ctx->cmd_args.curr_server = server;
+ ctx->cmd_args.volfile_server = server->volfile_server;
+
+ ret = dict_set_str(rpc_trans->options, "remote-host",
+ server->volfile_server);
+ if (ret != 0) {
+ gf_log("glusterfsd-mgmt", GF_LOG_ERROR,
+ "failed to set remote-host: %s", server->volfile_server);
if (!ctx->active) {
- cmd_args->max_connect_attempts--;
- gf_log ("glusterfsd-mgmt", GF_LOG_ERROR,
- "failed to connect with remote-host: %s",
- strerror (errno));
- gf_log ("glusterfsd-mgmt", GF_LOG_INFO,
- "%d connect attempts left",
- cmd_args->max_connect_attempts);
- if (0 >= cmd_args->max_connect_attempts)
- cleanup_and_exit (1);
+ need_term = 1;
}
+ emval = ENOTCONN;
break;
+ }
+ gf_log("glusterfsd-mgmt", GF_LOG_INFO,
+ "connecting to next volfile server %s",
+ server->volfile_server);
+ break;
case RPC_CLNT_CONNECT:
- rpc_clnt_set_connected (&((struct rpc_clnt*)ctx->mgmt)->conn);
-
- ret = glusterfs_volfile_fetch (ctx);
- if (ret && ctx && (ctx->active == NULL)) {
- /* Do it only for the first time */
- /* Exit the process.. there is some wrong options */
- gf_log ("mgmt", GF_LOG_ERROR,
- "failed to fetch volume file (key:%s)",
- ctx->cmd_args.volfile_id);
- cleanup_and_exit (0);
+ ret = glusterfs_volfile_fetch(ctx);
+ if (ret) {
+ emval = ret;
+ if (!ctx->active) {
+ need_term = 1;
+ gf_log("glusterfsd-mgmt", GF_LOG_ERROR,
+ "failed to fetch volume file (key:%s)",
+ ctx->cmd_args.volfile_id);
+ break;
}
+ }
- if (is_mgmt_rpc_reconnect)
- glusterfs_mgmt_pmap_signin (ctx);
- break;
+ if (is_mgmt_rpc_reconnect)
+ glusterfs_mgmt_pmap_signin(ctx);
+
+ break;
default:
- break;
- }
+ break;
+ }
- return 0;
+ if (need_term) {
+ emancipate(ctx, emval);
+ cleanup_and_exit(1);
+ }
+
+ return 0;
}
int
-glusterfs_rpcsvc_notify (rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
- void *data)
+glusterfs_rpcsvc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
+ void *data)
{
- if (!xl || !data) {
- goto out;
- }
+ if (!xl || !data) {
+ goto out;
+ }
- switch (event) {
- case RPCSVC_EVENT_ACCEPT:
- {
- break;
+ switch (event) {
+ case RPCSVC_EVENT_ACCEPT: {
+ break;
}
- case RPCSVC_EVENT_DISCONNECT:
- {
- break;
+ case RPCSVC_EVENT_DISCONNECT: {
+ break;
}
default:
- break;
- }
+ break;
+ }
out:
- return 0;
+ return 0;
}
int
-glusterfs_listener_init (glusterfs_ctx_t *ctx)
+glusterfs_listener_init(glusterfs_ctx_t *ctx)
{
- cmd_args_t *cmd_args = NULL;
- rpcsvc_t *rpc = NULL;
- dict_t *options = NULL;
- int ret = -1;
+ cmd_args_t *cmd_args = NULL;
+ rpcsvc_t *rpc = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
- cmd_args = &ctx->cmd_args;
+ cmd_args = &ctx->cmd_args;
- if (ctx->listener)
- return 0;
+ if (ctx->listener)
+ return 0;
- if (!cmd_args->sock_file)
- return 0;
+ if (!cmd_args->sock_file)
+ return 0;
- ret = rpcsvc_transport_unix_options_build (&options,
- cmd_args->sock_file);
- if (ret)
- goto out;
+ options = dict_new();
+ if (!options)
+ goto out;
- rpc = rpcsvc_init (THIS, ctx, options, 8);
- if (rpc == NULL) {
- goto out;
- }
+ ret = rpcsvc_transport_unix_options_build(options, cmd_args->sock_file);
+ if (ret)
+ goto out;
- ret = rpcsvc_register_notify (rpc, glusterfs_rpcsvc_notify, THIS);
- if (ret) {
- goto out;
- }
+ rpc = rpcsvc_init(THIS, ctx, options, 8);
+ if (rpc == NULL) {
+ goto out;
+ }
- ret = rpcsvc_create_listeners (rpc, options, "glusterfsd");
- if (ret < 1) {
- ret = -1;
- goto out;
- }
+ ret = rpcsvc_register_notify(rpc, glusterfs_rpcsvc_notify, THIS);
+ if (ret) {
+ goto out;
+ }
- ret = rpcsvc_program_register (rpc, &glusterfs_mop_prog);
- if (ret) {
- goto out;
- }
+ ret = rpcsvc_create_listeners(rpc, options, "glusterfsd");
+ if (ret < 1) {
+ goto out;
+ }
+
+ ret = rpcsvc_program_register(rpc, &glusterfs_mop_prog, _gf_false);
+ if (ret) {
+ goto out;
+ }
- ctx->listener = rpc;
+ ctx->listener = rpc;
out:
- return ret;
+ if (options)
+ dict_unref(options);
+ return ret;
}
int
-glusterfs_listener_stop (glusterfs_ctx_t *ctx)
+glusterfs_mgmt_notify(int32_t op, void *data, ...)
{
- cmd_args_t *cmd_args = NULL;
- rpcsvc_t *rpc = NULL;
- rpcsvc_listener_t *listener = NULL;
- rpcsvc_listener_t *next = NULL;
- int ret = 0;
- xlator_t *this = NULL;
-
- GF_ASSERT (ctx);
-
- rpc = ctx->listener;
- ctx->listener = NULL;
+ int ret = 0;
+ switch (op) {
+ case GF_EN_DEFRAG_STATUS:
+ ret = glusterfs_rebalance_event_notify((dict_t *)data);
+ break;
- (void) rpcsvc_program_unregister(rpc, &glusterfs_mop_prog);
-
- list_for_each_entry_safe (listener, next, &rpc->listeners, list) {
- rpcsvc_listener_destroy (listener);
- }
-
- (void) rpcsvc_unregister_notify (rpc, glusterfs_rpcsvc_notify, THIS);
-
- GF_FREE (rpc);
-
- cmd_args = &ctx->cmd_args;
- if (cmd_args->sock_file) {
- ret = unlink (cmd_args->sock_file);
- if (ret && (ENOENT == errno)) {
- ret = 0;
- }
- }
+ default:
+ gf_log("", GF_LOG_ERROR, "Invalid op");
+ break;
+ }
- if (ret) {
- this = THIS;
- gf_log (this->name, GF_LOG_ERROR, "Failed to unlink linstener "
- "socket %s, error: %s", cmd_args->sock_file,
- strerror (errno));
- }
- return ret;
+ return ret;
}
int
-glusterfs_mgmt_init (glusterfs_ctx_t *ctx)
+glusterfs_mgmt_init(glusterfs_ctx_t *ctx)
{
- cmd_args_t *cmd_args = NULL;
- struct rpc_clnt *rpc = NULL;
- dict_t *options = NULL;
- int ret = -1;
- int port = GF_DEFAULT_BASE_PORT;
- char *host = NULL;
-
- cmd_args = &ctx->cmd_args;
-
- if (ctx->mgmt)
- return 0;
+ cmd_args_t *cmd_args = NULL;
+ struct rpc_clnt *rpc = NULL;
+ dict_t *options = NULL;
+ int ret = -1;
+ int port = GF_DEFAULT_BASE_PORT;
+ char *host = NULL;
+ xlator_cmdline_option_t *opt = NULL;
+
+ cmd_args = &ctx->cmd_args;
+ GF_VALIDATE_OR_GOTO(THIS->name, cmd_args->volfile_server, out);
+
+ if (ctx->mgmt)
+ return 0;
- if (cmd_args->volfile_server_port)
- port = cmd_args->volfile_server_port;
+ options = dict_new();
+ if (!options)
+ goto out;
- host = "localhost";
- if (cmd_args->volfile_server)
- host = cmd_args->volfile_server;
+ LOCK_INIT(&ctx->volfile_lock);
- ret = rpc_transport_inet_options_build (&options, host, port);
- if (ret)
- goto out;
+ if (cmd_args->volfile_server_port)
+ port = cmd_args->volfile_server_port;
- rpc = rpc_clnt_new (options, THIS->ctx, THIS->name, 8);
- if (!rpc) {
- ret = -1;
- gf_log (THIS->name, GF_LOG_WARNING, "failed to create rpc clnt");
- goto out;
- }
+ host = cmd_args->volfile_server;
- ret = rpc_clnt_register_notify (rpc, mgmt_rpc_notify, THIS);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "failed to register notify function");
- goto out;
- }
+ if (cmd_args->volfile_server_transport &&
+ !strcmp(cmd_args->volfile_server_transport, "unix")) {
+ ret = rpc_transport_unix_options_build(options, host, 0);
+ } else {
+ opt = find_xlator_option_in_cmd_args_t("address-family", cmd_args);
+ ret = rpc_transport_inet_options_build(options, host, port,
+ (opt ? opt->value : NULL));
+ }
+ if (ret)
+ goto out;
- ret = rpcclnt_cbk_program_register (rpc, &mgmt_cbk_prog);
+ /* Explicitly turn on encrypted transport. */
+ if (ctx->secure_mgmt) {
+ ret = dict_set_dynstr_with_alloc(options,
+ "transport.socket.ssl-enabled", "yes");
if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "failed to register callback function");
- goto out;
- }
-
- /* This value should be set before doing the 'rpc_clnt_start()' as
- the notify function uses this variable */
- ctx->mgmt = rpc;
-
- ret = rpc_clnt_start (rpc);
-out:
- return ret;
-}
-
-static int
-mgmt_pmap_signin2_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- pmap_signin_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- int ret = 0;
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "failed to set 'transport.socket.ssl-enabled' "
+ "in options dict");
+ goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_pmap_signin_rsp);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_ERROR, "XDR decode error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
+ ctx->ssl_cert_depth = glusterfs_read_secure_access_file();
+ }
- if (-1 == rsp.op_ret) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "failed to register the port with glusterd");
- goto out;
- }
+ rpc = rpc_clnt_new(options, THIS, THIS->name, 8);
+ if (!rpc) {
+ ret = -1;
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to create rpc clnt");
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify(rpc, mgmt_rpc_notify, THIS);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "failed to register notify function");
+ goto out;
+ }
+
+ ret = rpcclnt_cbk_program_register(rpc, &mgmt_cbk_prog, THIS);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "failed to register callback function");
+ goto out;
+ }
+
+ ctx->notify = glusterfs_mgmt_notify;
+
+ /* This value should be set before doing the 'rpc_clnt_start()' as
+ the notify function uses this variable */
+ ctx->mgmt = rpc;
+
+ ret = rpc_clnt_start(rpc);
out:
- STACK_DESTROY (frame->root);
- return 0;
-
+ if (options)
+ dict_unref(options);
+ return ret;
}
static int
-mgmt_pmap_signin_cbk (struct rpc_req *req, struct iovec *iov, int count,
+mgmt_pmap_signin2_cbk(struct rpc_req *req, struct iovec *iov, int count,
void *myframe)
{
- pmap_signin_rsp rsp = {0,};
- call_frame_t *frame = NULL;
- int ret = 0;
- pmap_signin_req pmap_req = {0, };
- cmd_args_t *cmd_args = NULL;
- glusterfs_ctx_t *ctx = NULL;
- char brick_name[PATH_MAX] = {0,};
-
- frame = myframe;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_pmap_signin_rsp);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_ERROR, "XDR decode error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 == rsp.op_ret) {
- gf_log (frame->this->name, GF_LOG_ERROR,
- "failed to register the port with glusterd");
- goto out;
- }
+ pmap_signin_rsp rsp = {
+ 0,
+ };
+ glusterfs_ctx_t *ctx = NULL;
+ call_frame_t *frame = NULL;
+ int ret = 0;
- ctx = glusterfs_ctx_get ();
- cmd_args = &ctx->cmd_args;
+ ctx = glusterfsd_ctx;
+ frame = myframe;
- if (!cmd_args->brick_port2) {
- /* We are done with signin process */
- goto out;
- }
-
- snprintf (brick_name, PATH_MAX, "%s.rdma", cmd_args->brick_name);
- pmap_req.port = cmd_args->brick_port2;
- pmap_req.brick = brick_name;
-
- ret = mgmt_submit_request (&pmap_req, frame, ctx, &clnt_pmap_prog,
- GF_PMAP_SIGNIN, mgmt_pmap_signin2_cbk,
- (xdrproc_t)xdr_pmap_signin_req);
- if (ret)
- goto out;
-
- return 0;
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_pmap_signin_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "XDR decode error");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "failed to register the port with glusterd");
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
out:
+ if (need_emancipate)
+ emancipate(ctx, ret);
- STACK_DESTROY (frame->root);
- return 0;
+ STACK_DESTROY(frame->root);
+ return 0;
}
-int
-glusterfs_mgmt_pmap_signin (glusterfs_ctx_t *ctx)
+static int
+mgmt_pmap_signin_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- call_frame_t *frame = NULL;
- pmap_signin_req req = {0, };
- int ret = -1;
- cmd_args_t *cmd_args = NULL;
+ pmap_signin_rsp rsp = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ int ret = 0;
+ int emancipate_ret = -1;
+ pmap_signin_req pmap_req = {
+ 0,
+ };
+ cmd_args_t *cmd_args = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ char brick_name[PATH_MAX] = {
+ 0,
+ };
+
+ frame = myframe;
+ ctx = glusterfsd_ctx;
+ cmd_args = &ctx->cmd_args;
+
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_pmap_signin_rsp);
+ if (ret < 0) {
+ gf_log(frame->this->name, GF_LOG_ERROR, "XDR decode error");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log(frame->this->name, GF_LOG_ERROR,
+ "failed to register the port with glusterd");
+ ret = -1;
+ goto out;
+ }
- frame = create_frame (THIS, ctx->pool);
- cmd_args = &ctx->cmd_args;
+ if (!cmd_args->brick_port2) {
+ /* We are done with signin process */
+ emancipate_ret = 0;
+ goto out;
+ }
- if (!cmd_args->brick_port || !cmd_args->brick_name) {
- gf_log ("fsd-mgmt", GF_LOG_DEBUG,
- "portmapper signin arguments not given");
- goto out;
- }
+ snprintf(brick_name, PATH_MAX, "%s.rdma", cmd_args->brick_name);
+ pmap_req.port = cmd_args->brick_port2;
+ pmap_req.brick = brick_name;
- req.port = cmd_args->brick_port;
- req.brick = cmd_args->brick_name;
+ ret = mgmt_submit_request(&pmap_req, frame, ctx, &clnt_pmap_prog,
+ GF_PMAP_SIGNIN, mgmt_pmap_signin2_cbk,
+ (xdrproc_t)xdr_pmap_signin_req);
+ if (ret)
+ goto out;
- ret = mgmt_submit_request (&req, frame, ctx, &clnt_pmap_prog,
- GF_PMAP_SIGNIN, mgmt_pmap_signin_cbk,
- (xdrproc_t)xdr_pmap_signin_req);
+ return 0;
out:
- return ret;
-}
+ if (need_emancipate && (ret < 0 || !cmd_args->brick_port2))
+ emancipate(ctx, emancipate_ret);
-
-static int
-mgmt_pmap_signout_cbk (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe)
-{
- pmap_signout_rsp rsp = {0,};
- int ret = 0;
- glusterfs_ctx_t *ctx = NULL;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- ctx = glusterfs_ctx_get ();
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_pmap_signout_rsp);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "XDR decoding failed");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- if (-1 == rsp.op_ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to register the port with glusterd");
- goto out;
- }
-out:
- return 0;
+ STACK_DESTROY(frame->root);
+ return 0;
}
-
int
-glusterfs_mgmt_pmap_signout (glusterfs_ctx_t *ctx)
+glusterfs_mgmt_pmap_signin(glusterfs_ctx_t *ctx)
{
- int ret = 0;
- pmap_signout_req req = {0, };
- call_frame_t *frame = NULL;
- cmd_args_t *cmd_args = NULL;
-
- frame = create_frame (THIS, ctx->pool);
- cmd_args = &ctx->cmd_args;
-
- if (!cmd_args->brick_port || !cmd_args->brick_name) {
- gf_log ("fsd-mgmt", GF_LOG_DEBUG,
- "portmapper signout arguments not given");
- goto out;
- }
-
- req.port = cmd_args->brick_port;
- req.brick = cmd_args->brick_name;
+ call_frame_t *frame = NULL;
+ xlator_list_t **trav_p;
+ xlator_t *top;
+ pmap_signin_req req = {
+ 0,
+ };
+ int ret = -1;
+ int emancipate_ret = -1;
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args->brick_port || !cmd_args->brick_name) {
+ gf_log("fsd-mgmt", GF_LOG_DEBUG,
+ "portmapper signin arguments not given");
+ emancipate_ret = 0;
+ goto out;
+ }
+
+ req.port = cmd_args->brick_port;
+ req.pid = (int)getpid(); /* only glusterd2 consumes this */
+
+ if (ctx->active) {
+ top = ctx->active->first;
+ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ frame = create_frame(THIS, ctx->pool);
+ req.brick = (*trav_p)->xlator->name;
+ ret = mgmt_submit_request(&req, frame, ctx, &clnt_pmap_prog,
+ GF_PMAP_SIGNIN, mgmt_pmap_signin_cbk,
+ (xdrproc_t)xdr_pmap_signin_req);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "failed to send sign in request; brick = %s", req.brick);
+ }
+ }
+ }
+
+ /* unfortunately, the caller doesn't care about the returned value */
- ret = mgmt_submit_request (&req, frame, ctx, &clnt_pmap_prog,
- GF_PMAP_SIGNOUT, mgmt_pmap_signout_cbk,
- (xdrproc_t)xdr_pmap_signout_req);
out:
- return ret;
+ if (need_emancipate && ret < 0)
+ emancipate(ctx, emancipate_ret);
+ return ret;
}
diff --git a/glusterfsd/src/glusterfsd.c b/glusterfsd/src/glusterfsd.c
index 433cabef111..dae41f33fef 100644
--- a/glusterfsd/src/glusterfsd.c
+++ b/glusterfsd/src/glusterfsd.c
@@ -1,22 +1,12 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
@@ -29,6 +19,7 @@
#include <netdb.h>
#include <signal.h>
#include <libgen.h>
+#include <dlfcn.h>
#include <sys/utsname.h>
@@ -41,1685 +32,2707 @@
#include <errno.h>
#include <pwd.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
+#ifdef GF_LINUX_HOST_OS
+#ifdef HAVE_LINUX_OOM_H
+#include <linux/oom.h>
+#else
+#define OOM_SCORE_ADJ_MIN (-1000)
+#define OOM_SCORE_ADJ_MAX 1000
+#define OOM_DISABLE (-17)
+#define OOM_ADJUST_MAX 15
+#endif
#endif
#ifdef HAVE_MALLOC_H
#include <malloc.h>
#endif
-#ifdef HAVE_MALLOC_STATS
-#ifdef DEBUG
-#include <mcheck.h>
-#endif
-#endif
-
-#include "xlator.h"
-#include "glusterfs.h"
-#include "compat.h"
-#include "logging.h"
-#include "dict.h"
-#include "list.h"
-#include "timer.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/logging.h>
+#include "glusterfsd-messages.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/list.h>
+#include <glusterfs/timer.h>
#include "glusterfsd.h"
-#include "stack.h"
-#include "revision.h"
-#include "common-utils.h"
-#include "event.h"
-#include "globals.h"
-#include "statedump.h"
-#include "latency.h"
+#include <glusterfs/revision.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/gf-event.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/latency.h>
#include "glusterfsd-mem-types.h"
-#include "syscall.h"
-#include "call-stub.h"
+#include <glusterfs/syscall.h>
+#include <glusterfs/call-stub.h>
#include <fnmatch.h>
#include "rpc-clnt.h"
-#include "syncop.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/client_t.h>
+#include "netgroups.h"
+#include "exports.h"
+#include <glusterfs/monitoring.h>
-#include "daemon.h"
-
-/* process mode definitions */
-#define GF_SERVER_PROCESS 0
-#define GF_CLIENT_PROCESS 1
-#define GF_GLUSTERD_PROCESS 2
+#include <glusterfs/daemon.h>
/* using argp for command line parsing */
static char gf_doc[] = "";
-static char argp_doc[] = "--volfile-server=SERVER [MOUNT-POINT]\n" \
- "--volfile=VOLFILE [MOUNT-POINT]";
-const char *argp_program_version = "" \
- PACKAGE_NAME" "PACKAGE_VERSION" built on "__DATE__" "__TIME__ \
- "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION "\n" \
- "Copyright (c) 2006-2011 Gluster Inc. " \
- "<http://www.gluster.com>\n" \
- "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n" \
- "You may redistribute copies of GlusterFS under the terms of "\
- "the GNU General Public License.";
+static char argp_doc[] =
+ "--volfile-server=SERVER [MOUNT-POINT]\n"
+ "--volfile=VOLFILE [MOUNT-POINT]";
+const char *argp_program_version =
+ "" PACKAGE_NAME " " PACKAGE_VERSION
+ "\nRepository revision: " GLUSTERFS_REPOSITORY_REVISION
+ "\n"
+ "Copyright (c) 2006-2016 Red Hat, Inc. "
+ "<https://www.gluster.org/>\n"
+ "GlusterFS comes with ABSOLUTELY NO WARRANTY.\n"
+ "It is licensed to you under your choice of the GNU Lesser\n"
+ "General Public License, version 3 or any later version (LGPLv3\n"
+ "or later), or the GNU General Public License, version 2 (GPLv2),\n"
+ "in all cases as published by the Free Software Foundation.";
const char *argp_program_bug_address = "<" PACKAGE_BUGREPORT ">";
-static error_t parse_opts (int32_t key, char *arg, struct argp_state *_state);
+static error_t
+parse_opts(int32_t key, char *arg, struct argp_state *_state);
static struct argp_option gf_options[] = {
- {0, 0, 0, 0, "Basic options:"},
- {"volfile-server", ARGP_VOLFILE_SERVER_KEY, "SERVER", 0,
- "Server to get the volume file from. This option overrides "
- "--volfile option"},
- {"volfile-max-fetch-attempts", ARGP_VOLFILE_MAX_FETCH_ATTEMPTS,
- "MAX-ATTEMPTS", 0, "Maximum number of connect attempts to server. "
- "This option should be provided with --volfile-server option"
- "[default: 1]"},
- {"volfile", ARGP_VOLUME_FILE_KEY, "VOLFILE", 0,
- "File to use as VOLUME_FILE"},
- {"spec-file", ARGP_VOLUME_FILE_KEY, "VOLFILE", OPTION_HIDDEN,
- "File to use as VOLUME FILE"},
-
- {"log-level", ARGP_LOG_LEVEL_KEY, "LOGLEVEL", 0,
- "Logging severity. Valid options are DEBUG, INFO, WARNING, ERROR, "
- "CRITICAL and NONE [default: INFO]"},
- {"log-file", ARGP_LOG_FILE_KEY, "LOGFILE", 0,
- "File to use for logging [default: "
- DEFAULT_LOG_FILE_DIRECTORY "/" PACKAGE_NAME ".log" "]"},
-
- {0, 0, 0, 0, "Advanced Options:"},
- {"volfile-server-port", ARGP_VOLFILE_SERVER_PORT_KEY, "PORT", 0,
- "Listening port number of volfile server"},
- {"volfile-server-transport", ARGP_VOLFILE_SERVER_TRANSPORT_KEY,
- "TRANSPORT", 0,
- "Transport type to get volfile from server [default: socket]"},
- {"volfile-id", ARGP_VOLFILE_ID_KEY, "KEY", 0,
- "'key' of the volfile to be fetched from server"},
- {"pid-file", ARGP_PID_FILE_KEY, "PIDFILE", 0,
- "File to use as pid file"},
- {"socket-file", ARGP_SOCK_FILE_KEY, "SOCKFILE", 0,
- "File to use as unix-socket"},
- {"no-daemon", ARGP_NO_DAEMON_KEY, 0, 0,
- "Run in foreground"},
- {"run-id", ARGP_RUN_ID_KEY, "RUN-ID", OPTION_HIDDEN,
- "Run ID for the process, used by scripts to keep track of process "
- "they started, defaults to none"},
- {"debug", ARGP_DEBUG_KEY, 0, 0,
- "Run in debug mode. This option sets --no-daemon, --log-level "
- "to DEBUG and --log-file to console"},
- {"volume-name", ARGP_VOLUME_NAME_KEY, "XLATOR-NAME", 0,
- "Translator name to be used for MOUNT-POINT [default: top most volume "
- "definition in VOLFILE]"},
- {"xlator-option", ARGP_XLATOR_OPTION_KEY,"XLATOR-NAME.OPTION=VALUE", 0,
- "Add/override an option for a translator in volume file with specified"
- " value"},
- {"read-only", ARGP_READ_ONLY_KEY, 0, 0,
- "Mount the filesystem in 'read-only' mode"},
- {"acl", ARGP_ACL_KEY, 0, 0,
- "Mount the filesystem with POSIX ACL support"},
- {"selinux", ARGP_SELINUX_KEY, 0, 0,
- "Enable SELinux label (extened attributes) support on inodes"},
- {"worm", ARGP_WORM_KEY, 0, 0,
- "Mount the filesystem in 'worm' mode"},
- {"mac-compat", ARGP_MAC_COMPAT_KEY, "BOOL", OPTION_ARG_OPTIONAL,
- "Provide stubs for attributes needed for seamless operation on Macs "
+ {0, 0, 0, 0, "Basic options:"},
+ {"volfile-server", ARGP_VOLFILE_SERVER_KEY, "SERVER", 0,
+ "Server to get the volume file from. Unix domain socket path when "
+ "transport type 'unix'. This option overrides --volfile option"},
+ {"volfile", ARGP_VOLUME_FILE_KEY, "VOLFILE", 0,
+ "File to use as VOLUME_FILE"},
+ {"spec-file", ARGP_VOLUME_FILE_KEY, "VOLFILE", OPTION_HIDDEN,
+ "File to use as VOLUME FILE"},
+
+ {"log-level", ARGP_LOG_LEVEL_KEY, "LOGLEVEL", 0,
+ "Logging severity. Valid options are DEBUG, INFO, WARNING, ERROR, "
+ "CRITICAL, TRACE and NONE [default: INFO]"},
+ {"log-file", ARGP_LOG_FILE_KEY, "LOGFILE", 0,
+ "File to use for logging [default: " DEFAULT_LOG_FILE_DIRECTORY
+ "/" PACKAGE_NAME ".log"
+ "]"},
+ {"logger", ARGP_LOGGER, "LOGGER", 0,
+ "Set which logging sub-system to "
+ "log to, valid options are: gluster-log and syslog, "
+ "[default: \"gluster-log\"]"},
+ {"log-format", ARGP_LOG_FORMAT, "LOG-FORMAT", 0,
+ "Set log format, valid"
+ " options are: no-msg-id and with-msg-id, [default: \"with-msg-id\"]"},
+ {"log-buf-size", ARGP_LOG_BUF_SIZE, "LOG-BUF-SIZE", 0,
+ "Set logging "
+ "buffer size, [default: 5]"},
+ {"log-flush-timeout", ARGP_LOG_FLUSH_TIMEOUT, "LOG-FLUSH-TIMEOUT", 0,
+ "Set log flush timeout, [default: 2 minutes]"},
+
+ {0, 0, 0, 0, "Advanced Options:"},
+ {"volfile-server-port", ARGP_VOLFILE_SERVER_PORT_KEY, "PORT", 0,
+ "Listening port number of volfile server"},
+ {"volfile-server-transport", ARGP_VOLFILE_SERVER_TRANSPORT_KEY, "TRANSPORT",
+ 0, "Transport type to get volfile from server [default: socket]"},
+ {"volfile-id", ARGP_VOLFILE_ID_KEY, "KEY", 0,
+ "'key' of the volfile to be fetched from server"},
+ {"pid-file", ARGP_PID_FILE_KEY, "PIDFILE", 0, "File to use as pid file"},
+ {"socket-file", ARGP_SOCK_FILE_KEY, "SOCKFILE", 0,
+ "File to use as unix-socket"},
+ {"no-daemon", ARGP_NO_DAEMON_KEY, 0, 0, "Run in foreground"},
+ {"run-id", ARGP_RUN_ID_KEY, "RUN-ID", OPTION_HIDDEN,
+ "Run ID for the process, used by scripts to keep track of process "
+ "they started, defaults to none"},
+ {"debug", ARGP_DEBUG_KEY, 0, 0,
+ "Run in debug mode. This option sets --no-daemon, --log-level "
+ "to DEBUG and --log-file to console"},
+ {"volume-name", ARGP_VOLUME_NAME_KEY, "XLATOR-NAME", 0,
+ "Translator name to be used for MOUNT-POINT [default: top most volume "
+ "definition in VOLFILE]"},
+ {"xlator-option", ARGP_XLATOR_OPTION_KEY, "XLATOR-NAME.OPTION=VALUE", 0,
+ "Add/override an option for a translator in volume file with specified"
+ " value"},
+ {"read-only", ARGP_READ_ONLY_KEY, 0, 0,
+ "Mount the filesystem in 'read-only' mode"},
+ {"acl", ARGP_ACL_KEY, 0, 0, "Mount the filesystem with POSIX ACL support"},
+ {"selinux", ARGP_SELINUX_KEY, 0, 0,
+ "Enable SELinux label (extended attributes) support on inodes"},
+ {"capability", ARGP_CAPABILITY_KEY, 0, 0,
+ "Enable Capability (extended attributes) support on inodes"},
+ {"subdir-mount", ARGP_SUBDIR_MOUNT_KEY, "SUBDIR-PATH", 0,
+ "Mount subdirectory given [default: NULL]"},
+
+ {"print-netgroups", ARGP_PRINT_NETGROUPS, "NETGROUP-FILE", 0,
+ "Validate the netgroups file and print it out"},
+ {"print-exports", ARGP_PRINT_EXPORTS, "EXPORTS-FILE", 0,
+ "Validate the exports file and print it out"},
+ {"print-xlatordir", ARGP_PRINT_XLATORDIR_KEY, 0, OPTION_ARG_OPTIONAL,
+ "Print xlator directory path"},
+ {"print-statedumpdir", ARGP_PRINT_STATEDUMPDIR_KEY, 0, OPTION_ARG_OPTIONAL,
+ "Print directory path in which statedumps shall be generated"},
+ {"print-logdir", ARGP_PRINT_LOGDIR_KEY, 0, OPTION_ARG_OPTIONAL,
+ "Print path of default log directory"},
+ {"print-libexecdir", ARGP_PRINT_LIBEXECDIR_KEY, 0, OPTION_ARG_OPTIONAL,
+ "Print path of default libexec directory"},
+
+ {"volfile-max-fetch-attempts", ARGP_VOLFILE_MAX_FETCH_ATTEMPTS, "0",
+ OPTION_HIDDEN, "Maximum number of attempts to fetch the volfile"},
+ {"aux-gfid-mount", ARGP_AUX_GFID_MOUNT_KEY, 0, 0,
+ "Enable access to filesystem through gfid directly"},
+ {"enable-ino32", ARGP_INODE32_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Use 32-bit inodes when mounting to workaround broken applications"
+ "that don't support 64-bit inodes"},
+ {"worm", ARGP_WORM_KEY, 0, 0, "Mount the filesystem in 'worm' mode"},
+ {"mac-compat", ARGP_MAC_COMPAT_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Provide stubs for attributes needed for seamless operation on Macs "
#ifdef GF_DARWIN_HOST_OS
- "[default: \"on\" on client side, else \"off\"]"
+ "[default: \"on\" on client side, else \"off\"]"
#else
- "[default: \"off\"]"
+ "[default: \"off\"]"
#endif
- },
- {"brick-name", ARGP_BRICK_NAME_KEY, "BRICK-NAME", OPTION_HIDDEN,
- "Brick name to be registered with Gluster portmapper" },
- {"brick-port", ARGP_BRICK_PORT_KEY, "BRICK-PORT", OPTION_HIDDEN,
- "Brick Port to be registered with Gluster portmapper" },
-
- {0, 0, 0, 0, "Fuse options:"},
- {"direct-io-mode", ARGP_DIRECT_IO_MODE_KEY, "BOOL", OPTION_ARG_OPTIONAL,
- "Use direct I/O mode in fuse kernel module"
- " [default: \"off\" if big writes are supported, else "
- "\"on\" for fds not opened with O_RDONLY]"},
- {"entry-timeout", ARGP_ENTRY_TIMEOUT_KEY, "SECONDS", 0,
- "Set entry timeout to SECONDS in fuse kernel module [default: 1]"},
- {"attribute-timeout", ARGP_ATTRIBUTE_TIMEOUT_KEY, "SECONDS", 0,
- "Set attribute timeout to SECONDS for inodes in fuse kernel module "
- "[default: 1]"},
- {"client-pid", ARGP_CLIENT_PID_KEY, "PID", OPTION_HIDDEN,
- "client will authenticate itself with process id PID to server"},
- {"user-map-root", ARGP_USER_MAP_ROOT_KEY, "USER", OPTION_HIDDEN,
- "replace USER with root in messages"},
- {"dump-fuse", ARGP_DUMP_FUSE_KEY, "PATH", 0,
- "Dump fuse traffic to PATH"},
- {"volfile-check", ARGP_VOLFILE_CHECK_KEY, 0, 0,
- "Enable strict volume file checking"},
- {"mem-accounting", ARGP_MEM_ACCOUNTING_KEY, 0, OPTION_HIDDEN,
- "Enable internal memory accounting"},
- {0, 0, 0, 0, "Miscellaneous Options:"},
- {0, }
-};
-
-
-static struct argp argp = { gf_options, parse_opts, argp_doc, gf_doc };
-
-int glusterfs_pidfile_cleanup (glusterfs_ctx_t *ctx);
-int glusterfs_volumes_init (glusterfs_ctx_t *ctx);
-int glusterfs_mgmt_init (glusterfs_ctx_t *ctx);
-int glusterfs_listener_init (glusterfs_ctx_t *ctx);
-int glusterfs_listener_stop (glusterfs_ctx_t *ctx);
+ },
+ {"brick-name", ARGP_BRICK_NAME_KEY, "BRICK-NAME", OPTION_HIDDEN,
+ "Brick name to be registered with Gluster portmapper"},
+ {"brick-port", ARGP_BRICK_PORT_KEY, "BRICK-PORT", OPTION_HIDDEN,
+ "Brick Port to be registered with Gluster portmapper"},
+ {"fopen-keep-cache", ARGP_FOPEN_KEEP_CACHE_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Do not purge the cache on file open [default: false]"},
+ {"global-timer-wheel", ARGP_GLOBAL_TIMER_WHEEL, "BOOL", OPTION_ARG_OPTIONAL,
+ "Instantiate process global timer-wheel"},
+ {"thin-client", ARGP_THIN_CLIENT_KEY, 0, 0,
+ "Enables thin mount and connects via gfproxyd daemon"},
+ {"global-threading", ARGP_GLOBAL_THREADING_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Use the global thread pool instead of io-threads"},
+ {0, 0, 0, 0, "Fuse options:"},
+ {"direct-io-mode", ARGP_DIRECT_IO_MODE_KEY, "BOOL|auto",
+ OPTION_ARG_OPTIONAL, "Specify direct I/O strategy [default: \"auto\"]"},
+ {"entry-timeout", ARGP_ENTRY_TIMEOUT_KEY, "SECONDS", 0,
+ "Set entry timeout to SECONDS in fuse kernel module [default: 1]"},
+ {"negative-timeout", ARGP_NEGATIVE_TIMEOUT_KEY, "SECONDS", 0,
+ "Set negative timeout to SECONDS in fuse kernel module [default: 0]"},
+ {"attribute-timeout", ARGP_ATTRIBUTE_TIMEOUT_KEY, "SECONDS", 0,
+ "Set attribute timeout to SECONDS for inodes in fuse kernel module "
+ "[default: 1]"},
+ {"gid-timeout", ARGP_GID_TIMEOUT_KEY, "SECONDS", 0,
+ "Set auxiliary group list timeout to SECONDS for fuse translator "
+ "[default: 300]"},
+ {"resolve-gids", ARGP_RESOLVE_GIDS_KEY, 0, 0,
+ "Resolve all auxiliary groups in fuse translator (max 32 otherwise)"},
+ {"lru-limit", ARGP_FUSE_LRU_LIMIT_KEY, "N", 0,
+ "Set fuse module's limit for number of inodes kept in LRU list to N "
+ "[default: 65536]"},
+ {"invalidate-limit", ARGP_FUSE_INVALIDATE_LIMIT_KEY, "N", 0,
+ "Suspend inode invalidations implied by 'lru-limit' if the number of "
+ "outstanding invalidations reaches N"},
+ {"background-qlen", ARGP_FUSE_BACKGROUND_QLEN_KEY, "N", 0,
+ "Set fuse module's background queue length to N "
+ "[default: 64]"},
+ {"congestion-threshold", ARGP_FUSE_CONGESTION_THRESHOLD_KEY, "N", 0,
+ "Set fuse module's congestion threshold to N "
+ "[default: 48]"},
+#ifdef GF_LINUX_HOST_OS
+ {"oom-score-adj", ARGP_OOM_SCORE_ADJ_KEY, "INTEGER", 0,
+ "Set oom_score_adj value for process"
+ "[default: 0]"},
+#endif
+ {"client-pid", ARGP_CLIENT_PID_KEY, "PID", OPTION_HIDDEN,
+ "client will authenticate itself with process id PID to server"},
+ {"no-root-squash", ARGP_FUSE_NO_ROOT_SQUASH_KEY, "BOOL",
+ OPTION_ARG_OPTIONAL,
+ "disable/enable root squashing for the trusted "
+ "client"},
+ {"user-map-root", ARGP_USER_MAP_ROOT_KEY, "USER", OPTION_HIDDEN,
+ "replace USER with root in messages"},
+ {"dump-fuse", ARGP_DUMP_FUSE_KEY, "PATH", 0, "Dump fuse traffic to PATH"},
+ {"volfile-check", ARGP_VOLFILE_CHECK_KEY, 0, 0,
+ "Enable strict volume file checking"},
+ {"no-mem-accounting", ARGP_MEM_ACCOUNTING_KEY, 0, OPTION_HIDDEN,
+ "disable internal memory accounting"},
+ {"fuse-mountopts", ARGP_FUSE_MOUNTOPTS_KEY, "OPTIONS", OPTION_HIDDEN,
+ "Extra mount options to pass to FUSE"},
+ {"use-readdirp", ARGP_FUSE_USE_READDIRP_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Use readdirp mode in fuse kernel module"
+ " [default: \"yes\"]"},
+ {"secure-mgmt", ARGP_SECURE_MGMT_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "Override default for secure (SSL) management connections"},
+ {"localtime-logging", ARGP_LOCALTIME_LOGGING_KEY, 0, 0,
+ "Enable localtime logging"},
+ {"process-name", ARGP_PROCESS_NAME_KEY, "PROCESS-NAME", OPTION_HIDDEN,
+ "option to specify the process type"},
+ {"event-history", ARGP_FUSE_EVENT_HISTORY_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "disable/enable fuse event-history"},
+ {"reader-thread-count", ARGP_READER_THREAD_COUNT_KEY, "INTEGER",
+ OPTION_ARG_OPTIONAL, "set fuse reader thread count"},
+ {"kernel-writeback-cache", ARGP_KERNEL_WRITEBACK_CACHE_KEY, "BOOL",
+ OPTION_ARG_OPTIONAL, "enable fuse in-kernel writeback cache"},
+ {"attr-times-granularity", ARGP_ATTR_TIMES_GRANULARITY_KEY, "NS",
+ OPTION_ARG_OPTIONAL,
+ "declare supported granularity of file attribute"
+ " times in nanoseconds"},
+ {"fuse-flush-handle-interrupt", ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY,
+ "BOOL", OPTION_ARG_OPTIONAL | OPTION_HIDDEN,
+ "handle interrupt in fuse FLUSH handler"},
+ {"auto-invalidation", ARGP_FUSE_AUTO_INVAL_KEY, "BOOL", OPTION_ARG_OPTIONAL,
+ "controls whether fuse-kernel can auto-invalidate "
+ "attribute, dentry and page-cache. "
+ "Disable this only if same files/directories are not accessed across "
+ "two different mounts concurrently [default: \"on\"]"},
+ {"fuse-dev-eperm-ratelimit-ns", ARGP_FUSE_DEV_EPERM_RATELIMIT_NS_KEY,
+ "OPTIONS", OPTION_HIDDEN,
+ "rate limit reading from fuse device upon EPERM failure"},
+ {"brick-mux", ARGP_BRICK_MUX_KEY, 0, 0, "Enable brick mux. "},
+ {0, 0, 0, 0, "Miscellaneous Options:"},
+ {
+ 0,
+ }};
+
+static struct argp argp = {gf_options, parse_opts, argp_doc, gf_doc};
int
-create_fuse_mount (glusterfs_ctx_t *ctx)
-{
- int ret = 0;
- cmd_args_t *cmd_args = NULL;
- xlator_t *master = NULL;
- char *mount_point = NULL;
- char cwd[PATH_MAX] = {0,};
-
- cmd_args = &ctx->cmd_args;
-
- if (!cmd_args->mount_point) {
- gf_log ("", GF_LOG_TRACE,
- "mount point not found, not a client process");
- return 0;
- }
-
- if (ctx->process_mode != GF_CLIENT_PROCESS) {
- gf_log("glusterfsd", GF_LOG_ERROR,
- "Not a client process, not performing mount operation");
- return -1;
- }
-
- master = GF_CALLOC (1, sizeof (*master),
- gfd_mt_xlator_t);
- if (!master)
- goto err;
-
- master->name = gf_strdup ("fuse");
- if (!master->name)
- goto err;
-
- if (xlator_set_type (master, "mount/fuse") == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "MOUNT-POINT %s initialization failed",
- cmd_args->mount_point);
- goto err;
- }
-
- master->ctx = ctx;
- master->options = get_new_dict ();
- if (!master->options)
- goto err;
+glusterfs_pidfile_cleanup(glusterfs_ctx_t *ctx);
+int
+glusterfs_volumes_init(glusterfs_ctx_t *ctx);
+int
+glusterfs_mgmt_init(glusterfs_ctx_t *ctx);
+int
+glusterfs_listener_init(glusterfs_ctx_t *ctx);
- /* Check if mount-point is absolute path,
- * if not convert to absolute path by concating with CWD
- */
- if (cmd_args->mount_point[0] != '/') {
- if (getcwd (cwd, PATH_MAX) != NULL) {
- ret = gf_asprintf (&mount_point, "%s/%s", cwd,
- cmd_args->mount_point);
- if (ret == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "Could not create absolute mountpoint "
- "path");
- goto err;
- }
- } else {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "Could not get current working directory");
- goto err;
- }
- } else
- mount_point = gf_strdup (cmd_args->mount_point);
+#define DICT_SET_VAL(method, dict, key, val, msgid) \
+ if (method(dict, key, val)) { \
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, msgid, "key=%s", key); \
+ goto err; \
+ }
- ret = dict_set_dynstr (master->options, ZR_MOUNTPOINT_OPT, mount_point);
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set mount-point to options dictionary");
+static int
+set_fuse_mount_options(glusterfs_ctx_t *ctx, dict_t *options)
+{
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+ char *mount_point = NULL;
+ char cwd[PATH_MAX] = {
+ 0,
+ };
+
+ cmd_args = &ctx->cmd_args;
+
+ /* Check if mount-point is absolute path,
+ * if not convert to absolute path by concatenating with CWD
+ */
+ if (cmd_args->mount_point[0] != '/') {
+ if (getcwd(cwd, PATH_MAX) != NULL) {
+ ret = gf_asprintf(&mount_point, "%s/%s", cwd,
+ cmd_args->mount_point);
+ if (ret == -1) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_1,
+ "gf_asprintf failed", NULL);
goto err;
- }
-
- if (cmd_args->fuse_attribute_timeout >= 0) {
- ret = dict_set_double (master->options, ZR_ATTR_TIMEOUT_OPT,
- cmd_args->fuse_attribute_timeout);
-
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key %s",
- ZR_ATTR_TIMEOUT_OPT);
- goto err;
- }
- }
-
- if (cmd_args->fuse_entry_timeout >= 0) {
- ret = dict_set_double (master->options, ZR_ENTRY_TIMEOUT_OPT,
- cmd_args->fuse_entry_timeout);
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key %s",
- ZR_ENTRY_TIMEOUT_OPT);
- goto err;
- }
- }
-
- if (cmd_args->client_pid_set) {
- ret = dict_set_int32 (master->options, "client-pid",
- cmd_args->client_pid);
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key %s",
- "client-pid");
- goto err;
- }
- }
-
- if (cmd_args->uid_map_root) {
- ret = dict_set_int32 (master->options, "uid-map-root",
- cmd_args->uid_map_root);
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key %s",
- "uid-map-root");
- goto err;
- }
- }
-
- if (cmd_args->volfile_check) {
- ret = dict_set_int32 (master->options, ZR_STRICT_VOLFILE_CHECK,
- cmd_args->volfile_check);
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key %s",
- ZR_STRICT_VOLFILE_CHECK);
- goto err;
- }
- }
-
- if (cmd_args->dump_fuse) {
- ret = dict_set_static_ptr (master->options, ZR_DUMP_FUSE,
- cmd_args->dump_fuse);
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key %s",
- ZR_DUMP_FUSE);
- goto err;
- }
- }
-
- if (cmd_args->acl) {
- ret = dict_set_static_ptr (master->options, "acl", "on");
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key acl");
- goto err;
- }
- }
-
- if (cmd_args->selinux) {
- ret = dict_set_static_ptr (master->options, "selinux", "on");
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key selinux");
- goto err;
- }
- }
-
- if (cmd_args->read_only) {
- ret = dict_set_static_ptr (master->options, "read-only", "on");
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key read-only");
- goto err;
- }
- }
-
- switch (cmd_args->fuse_direct_io_mode) {
+ }
+ } else {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_2,
+ "getcwd failed", NULL);
+ goto err;
+ }
+
+ } else {
+ mount_point = gf_strdup(cmd_args->mount_point);
+ }
+ DICT_SET_VAL(dict_set_dynstr_sizen, options, ZR_MOUNTPOINT_OPT, mount_point,
+ glusterfsd_msg_3);
+
+ if (cmd_args->fuse_attribute_timeout >= 0) {
+ DICT_SET_VAL(dict_set_double, options, ZR_ATTR_TIMEOUT_OPT,
+ cmd_args->fuse_attribute_timeout, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->fuse_entry_timeout >= 0) {
+ DICT_SET_VAL(dict_set_double, options, ZR_ENTRY_TIMEOUT_OPT,
+ cmd_args->fuse_entry_timeout, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->fuse_negative_timeout >= 0) {
+ DICT_SET_VAL(dict_set_double, options, ZR_NEGATIVE_TIMEOUT_OPT,
+ cmd_args->fuse_negative_timeout, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->client_pid_set) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, "client-pid",
+ cmd_args->client_pid, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->uid_map_root) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, "uid-map-root",
+ cmd_args->uid_map_root, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->volfile_check) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, ZR_STRICT_VOLFILE_CHECK,
+ cmd_args->volfile_check, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->dump_fuse) {
+ DICT_SET_VAL(dict_set_static_ptr, options, ZR_DUMP_FUSE,
+ cmd_args->dump_fuse, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->acl) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "acl", "on",
+ glusterfsd_msg_3);
+ }
+
+ if (cmd_args->selinux) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "selinux", "on",
+ glusterfsd_msg_3);
+ }
+
+ if (cmd_args->capability) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "capability", "on",
+ glusterfsd_msg_3);
+ }
+
+ if (cmd_args->aux_gfid_mount) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "virtual-gfid-access", "on",
+ glusterfsd_msg_3);
+ }
+
+ if (cmd_args->enable_ino32) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "enable-ino32", "on",
+ glusterfsd_msg_3);
+ }
+
+ if (cmd_args->read_only) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "read-only", "on",
+ glusterfsd_msg_3);
+ }
+
+ switch (cmd_args->fopen_keep_cache) {
+ case GF_OPTION_ENABLE:
+
+ DICT_SET_VAL(dict_set_static_ptr, options, "fopen-keep-cache", "on",
+ glusterfsd_msg_3);
+ break;
+ case GF_OPTION_DISABLE:
+ DICT_SET_VAL(dict_set_static_ptr, options, "fopen-keep-cache",
+ "off", glusterfsd_msg_3);
+ break;
+ default:
+ gf_msg_debug("glusterfsd", 0, "fopen-keep-cache mode %d",
+ cmd_args->fopen_keep_cache);
+ break;
+ }
+
+ if (cmd_args->gid_timeout_set) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, "gid-timeout",
+ cmd_args->gid_timeout, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->resolve_gids) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "resolve-gids", "on",
+ glusterfsd_msg_3);
+ }
+
+ if (cmd_args->lru_limit >= 0) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, "lru-limit",
+ cmd_args->lru_limit, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->invalidate_limit >= 0) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, "invalidate-limit",
+ cmd_args->invalidate_limit, glusterfsd_msg_3);
+ }
+
+ if (cmd_args->background_qlen) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, "background-qlen",
+ cmd_args->background_qlen, glusterfsd_msg_3);
+ }
+ if (cmd_args->congestion_threshold) {
+ DICT_SET_VAL(dict_set_int32_sizen, options, "congestion-threshold",
+ cmd_args->congestion_threshold, glusterfsd_msg_3);
+ }
+
+ switch (cmd_args->fuse_direct_io_mode) {
case GF_OPTION_DISABLE: /* disable */
- ret = dict_set_static_ptr (master->options, ZR_DIRECT_IO_OPT,
- "disable");
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set 'disable' for key %s",
- ZR_DIRECT_IO_OPT);
- goto err;
- }
- break;
+ DICT_SET_VAL(dict_set_static_ptr, options, ZR_DIRECT_IO_OPT,
+ "disable", glusterfsd_msg_3);
+ break;
case GF_OPTION_ENABLE: /* enable */
- ret = dict_set_static_ptr (master->options, ZR_DIRECT_IO_OPT,
- "enable");
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set 'enable' for key %s",
- ZR_DIRECT_IO_OPT);
- goto err;
- }
- break;
- case GF_OPTION_DEFERRED: /* default */
+ DICT_SET_VAL(dict_set_static_ptr, options, ZR_DIRECT_IO_OPT,
+ "enable", glusterfsd_msg_3);
+ break;
default:
- gf_log ("", GF_LOG_DEBUG, "fuse direct io type %d",
- cmd_args->fuse_direct_io_mode);
- break;
- }
+ gf_msg_debug("glusterfsd", 0, "fuse direct io type %d",
+ cmd_args->fuse_direct_io_mode);
+ break;
+ }
- if (!cmd_args->no_daemon_mode) {
- ret = dict_set_static_ptr (master->options, "sync-to-mount",
- "enable");
- if (ret < 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "failed to set dict value for key sync-mtab");
- goto err;
- }
- }
+ switch (cmd_args->no_root_squash) {
+ case GF_OPTION_ENABLE: /* enable */
+ DICT_SET_VAL(dict_set_static_ptr, options, "no-root-squash",
+ "enable", glusterfsd_msg_3);
+ break;
+ default:
+ DICT_SET_VAL(dict_set_static_ptr, options, "no-root-squash",
+ "disable", glusterfsd_msg_3);
+ gf_msg_debug("glusterfsd", 0, "fuse no-root-squash mode %d",
+ cmd_args->no_root_squash);
+ break;
+ }
+
+ if (!cmd_args->no_daemon_mode) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "sync-to-mount", "enable",
+ glusterfsd_msg_3);
+ }
+
+ if (cmd_args->use_readdirp) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "use-readdirp",
+ cmd_args->use_readdirp, glusterfsd_msg_3);
+ }
+ if (cmd_args->event_history) {
+ ret = dict_set_str(options, "event-history", cmd_args->event_history);
+ DICT_SET_VAL(dict_set_static_ptr, options, "event-history",
+ cmd_args->event_history, glusterfsd_msg_3);
+ }
+ if (cmd_args->thin_client) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "thin-client", "on",
+ glusterfsd_msg_3);
+ }
+ if (cmd_args->reader_thread_count) {
+ DICT_SET_VAL(dict_set_uint32, options, "reader-thread-count",
+ cmd_args->reader_thread_count, glusterfsd_msg_3);
+ }
+
+ DICT_SET_VAL(dict_set_uint32, options, "auto-invalidation",
+ cmd_args->fuse_auto_inval, glusterfsd_msg_3);
+
+ switch (cmd_args->kernel_writeback_cache) {
+ case GF_OPTION_ENABLE:
+ DICT_SET_VAL(dict_set_static_ptr, options, "kernel-writeback-cache",
+ "on", glusterfsd_msg_3);
+ break;
+ case GF_OPTION_DISABLE:
+ DICT_SET_VAL(dict_set_static_ptr, options, "kernel-writeback-cache",
+ "off", glusterfsd_msg_3);
+ break;
+ default:
+ gf_msg_debug("glusterfsd", 0, "kernel-writeback-cache mode %d",
+ cmd_args->kernel_writeback_cache);
+ break;
+ }
+ if (cmd_args->attr_times_granularity) {
+ DICT_SET_VAL(dict_set_uint32, options, "attr-times-granularity",
+ cmd_args->attr_times_granularity, glusterfsd_msg_3);
+ }
+ switch (cmd_args->fuse_flush_handle_interrupt) {
+ case GF_OPTION_ENABLE:
+ DICT_SET_VAL(dict_set_static_ptr, options, "flush-handle-interrupt",
+ "on", glusterfsd_msg_3);
+ break;
+ case GF_OPTION_DISABLE:
+ DICT_SET_VAL(dict_set_static_ptr, options, "flush-handle-interrupt",
+ "off", glusterfsd_msg_3);
+ break;
+ default:
+ gf_msg_debug("glusterfsd", 0, "fuse-flush-handle-interrupt mode %d",
+ cmd_args->fuse_flush_handle_interrupt);
+ break;
+ }
+ if (cmd_args->global_threading) {
+ DICT_SET_VAL(dict_set_static_ptr, options, "global-threading", "on",
+ glusterfsd_msg_3);
+ }
+ if (cmd_args->fuse_dev_eperm_ratelimit_ns) {
+ DICT_SET_VAL(dict_set_uint32, options, "fuse-dev-eperm-ratelimit-ns",
+ cmd_args->fuse_dev_eperm_ratelimit_ns, glusterfsd_msg_3);
+ }
+
+ ret = 0;
+err:
+ return ret;
+}
- ret = xlator_init (master);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG, "failed to initialize fuse translator");
- goto err;
+int
+create_fuse_mount(glusterfs_ctx_t *ctx)
+{
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+ xlator_t *master = NULL;
+
+ cmd_args = &ctx->cmd_args;
+ if (!cmd_args->mount_point) {
+ gf_msg_trace("glusterfsd", 0,
+ "mount point not found, not a client process");
+ return 0;
+ }
+
+ if (ctx->process_mode != GF_CLIENT_PROCESS) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_7, NULL);
+ return -1;
+ }
+
+ master = GF_CALLOC(1, sizeof(*master), gfd_mt_xlator_t);
+ if (!master)
+ goto err;
+
+ master->name = gf_strdup("fuse");
+ if (!master->name)
+ goto err;
+
+ if (xlator_set_type(master, "mount/fuse") == -1) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_8,
+ "MOUNT-POINT=%s", cmd_args->mount_point, NULL);
+ goto err;
+ }
+
+ master->ctx = ctx;
+ master->options = dict_new();
+ if (!master->options)
+ goto err;
+
+ ret = set_fuse_mount_options(ctx, master->options);
+ if (ret)
+ goto err;
+
+ if (cmd_args->fuse_mountopts) {
+ ret = dict_set_static_ptr(master->options, ZR_FUSE_MOUNTOPTS,
+ cmd_args->fuse_mountopts);
+ if (ret < 0) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_3,
+ ZR_FUSE_MOUNTOPTS, NULL);
+ goto err;
}
+ }
- ctx->master = master;
+ ret = xlator_init(master);
+ if (ret) {
+ gf_msg_debug("glusterfsd", 0, "failed to initialize fuse translator");
+ goto err;
+ }
- return 0;
+ ctx->master = master;
+
+ return 0;
err:
- if (master) {
- xlator_destroy (master);
- }
+ if (master) {
+ xlator_destroy(master);
+ }
- return -1;
+ return 1;
}
-
static FILE *
-get_volfp (glusterfs_ctx_t *ctx)
+get_volfp(glusterfs_ctx_t *ctx)
{
- int ret = 0;
- cmd_args_t *cmd_args = NULL;
- FILE *specfp = NULL;
- struct stat statbuf;
+ cmd_args_t *cmd_args = NULL;
+ FILE *specfp = NULL;
- cmd_args = &ctx->cmd_args;
+ cmd_args = &ctx->cmd_args;
- ret = sys_lstat (cmd_args->volfile, &statbuf);
- if (ret == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "%s: %s", cmd_args->volfile, strerror (errno));
- return NULL;
- }
+ if ((specfp = fopen(cmd_args->volfile, "r")) == NULL) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_9,
+ "volume_file=%s", cmd_args->volfile, NULL);
+ return NULL;
+ }
- if ((specfp = fopen (cmd_args->volfile, "r")) == NULL) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "volume file %s: %s",
- cmd_args->volfile,
- strerror (errno));
- return NULL;
- }
+ gf_msg_debug("glusterfsd", 0, "loading volume file %s", cmd_args->volfile);
- gf_log ("glusterfsd", GF_LOG_DEBUG,
- "loading volume file %s", cmd_args->volfile);
+ return specfp;
+}
- return specfp;
+static int
+gf_remember_backup_volfile_server(char *arg)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = -1;
+
+ ctx = glusterfsd_ctx;
+ if (!ctx)
+ goto out;
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args)
+ goto out;
+
+ ret = gf_set_volfile_server_common(
+ cmd_args, arg, GF_DEFAULT_VOLFILE_TRANSPORT, GF_DEFAULT_BASE_PORT);
+ if (ret) {
+ gf_log("glusterfs", GF_LOG_ERROR, "failed to set volfile server: %s",
+ strerror(errno));
+ }
+out:
+ return ret;
}
static int
-gf_remember_xlator_option (struct list_head *options, char *arg)
+gf_remember_xlator_option(char *arg)
{
- glusterfs_ctx_t *ctx = NULL;
- cmd_args_t *cmd_args = NULL;
- xlator_cmdline_option_t *option = NULL;
- int ret = -1;
- char *dot = NULL;
- char *equals = NULL;
-
- ctx = glusterfs_ctx_get ();
- cmd_args = &ctx->cmd_args;
-
- option = GF_CALLOC (1, sizeof (xlator_cmdline_option_t),
- gfd_mt_xlator_cmdline_option_t);
- if (!option)
- goto out;
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ xlator_cmdline_option_t *option = NULL;
+ int ret = -1;
+ char *dot = NULL;
+ char *equals = NULL;
- INIT_LIST_HEAD (&option->cmd_args);
+ ctx = glusterfsd_ctx;
+ cmd_args = &ctx->cmd_args;
- dot = strchr (arg, '.');
- if (!dot) {
- gf_log ("", GF_LOG_WARNING,
- "xlator option %s is invalid", arg);
- goto out;
- }
+ option = GF_CALLOC(1, sizeof(xlator_cmdline_option_t),
+ gfd_mt_xlator_cmdline_option_t);
+ if (!option)
+ goto out;
- option->volume = GF_CALLOC ((dot - arg) + 1, sizeof (char),
- gfd_mt_char);
- if (!option->volume)
- goto out;
+ INIT_LIST_HEAD(&option->cmd_args);
- strncpy (option->volume, arg, (dot - arg));
+ dot = strchr(arg, '.');
+ if (!dot) {
+ gf_smsg("", GF_LOG_WARNING, 0, glusterfsd_msg_10, "arg=%s", arg, NULL);
+ goto out;
+ }
- equals = strchr (arg, '=');
- if (!equals) {
- gf_log ("", GF_LOG_WARNING,
- "xlator option %s is invalid", arg);
- goto out;
- }
+ option->volume = GF_MALLOC((dot - arg) + 1, gfd_mt_char);
+ if (!option->volume)
+ goto out;
- option->key = GF_CALLOC ((equals - dot) + 1, sizeof (char),
- gfd_mt_char);
- if (!option->key)
- goto out;
+ strncpy(option->volume, arg, (dot - arg));
+ option->volume[(dot - arg)] = '\0';
- strncpy (option->key, dot + 1, (equals - dot - 1));
+ equals = strchr(arg, '=');
+ if (!equals) {
+ gf_smsg("", GF_LOG_WARNING, 0, glusterfsd_msg_10, "arg=%s", arg, NULL);
+ goto out;
+ }
- if (!*(equals + 1)) {
- gf_log ("", GF_LOG_WARNING,
- "xlator option %s is invalid", arg);
- goto out;
- }
+ option->key = GF_MALLOC((equals - dot) + 1, gfd_mt_char);
+ if (!option->key)
+ goto out;
- option->value = gf_strdup (equals + 1);
+ strncpy(option->key, dot + 1, (equals - dot - 1));
+ option->key[(equals - dot - 1)] = '\0';
- list_add (&option->cmd_args, &cmd_args->xlator_options);
+ if (!*(equals + 1)) {
+ gf_smsg("", GF_LOG_WARNING, 0, glusterfsd_msg_10, "arg=%s", arg, NULL);
+ goto out;
+ }
- ret = 0;
+ option->value = gf_strdup(equals + 1);
+
+ list_add(&option->cmd_args, &cmd_args->xlator_options);
+
+ ret = 0;
out:
- if (ret == -1) {
- if (option) {
- if (option->volume)
- GF_FREE (option->volume);
- if (option->key)
- GF_FREE (option->key);
- if (option->value)
- GF_FREE (option->value);
-
- GF_FREE (option);
- }
+ if (ret == -1) {
+ if (option) {
+ GF_FREE(option->volume);
+ GF_FREE(option->key);
+ GF_FREE(option->value);
+
+ GF_FREE(option);
}
+ }
- return ret;
+ return ret;
}
+#ifdef GF_LINUX_HOST_OS
+static struct oom_api_info {
+ char *oom_api_file;
+ int32_t oom_min;
+ int32_t oom_max;
+} oom_api_info[] = {
+ {"/proc/self/oom_score_adj", OOM_SCORE_ADJ_MIN, OOM_SCORE_ADJ_MAX},
+ {"/proc/self/oom_adj", OOM_DISABLE, OOM_ADJUST_MAX},
+ {NULL, 0, 0}};
+
+static struct oom_api_info *
+get_oom_api_info(void)
+{
+ struct oom_api_info *api = NULL;
+
+ for (api = oom_api_info; api->oom_api_file; api++) {
+ if (sys_access(api->oom_api_file, F_OK) != -1) {
+ return api;
+ }
+ }
+ return NULL;
+}
+#endif
static error_t
-parse_opts (int key, char *arg, struct argp_state *state)
+parse_opts(int key, char *arg, struct argp_state *state)
{
- glusterfs_ctx_t *ctx = NULL;
- cmd_args_t *cmd_args = NULL;
- uint32_t n = 0;
- double d = 0.0;
- gf_boolean_t b = _gf_false;
- char *pwd = NULL;
- char tmp_buf[2048] = {0,};
- char *tmp_str = NULL;
- char *port_str = NULL;
- struct passwd *pw = NULL;
-
- cmd_args = state->input;
-
- switch (key) {
- case ARGP_VOLFILE_SERVER_KEY:
- cmd_args->volfile_server = gf_strdup (arg);
- break;
+ cmd_args_t *cmd_args = NULL;
+ uint32_t n = 0;
+#ifdef GF_LINUX_HOST_OS
+ int32_t k = 0;
+ struct oom_api_info *api = NULL;
+#endif
+ double d = 0.0;
+ gf_boolean_t b = _gf_false;
+ char *pwd = NULL;
+ char *tmp_str = NULL;
+ char *port_str = NULL;
+ struct passwd *pw = NULL;
+ int ret = 0;
- case ARGP_VOLFILE_MAX_FETCH_ATTEMPTS:
- n = 0;
+ cmd_args = state->input;
- if (gf_string2uint_base10 (arg, &n) == 0) {
- cmd_args->max_connect_attempts = n;
- break;
- }
+ switch (key) {
+ case ARGP_VOLFILE_SERVER_KEY:
+ gf_remember_backup_volfile_server(arg);
- argp_failure (state, -1, 0,
- "Invalid limit on connect attempts %s", arg);
- break;
+ break;
case ARGP_READ_ONLY_KEY:
- cmd_args->read_only = 1;
- break;
+ cmd_args->read_only = 1;
+ break;
case ARGP_ACL_KEY:
- cmd_args->acl = 1;
- break;
+ cmd_args->acl = 1;
+ gf_remember_xlator_option("*-md-cache.cache-posix-acl=true");
+ break;
case ARGP_SELINUX_KEY:
- cmd_args->selinux = 1;
- break;
+ cmd_args->selinux = 1;
+ gf_remember_xlator_option("*-md-cache.cache-selinux=true");
+ break;
+
+ case ARGP_CAPABILITY_KEY:
+ cmd_args->capability = 1;
+ break;
+
+ case ARGP_AUX_GFID_MOUNT_KEY:
+ cmd_args->aux_gfid_mount = 1;
+ break;
+
+ case ARGP_INODE32_KEY:
+ cmd_args->enable_ino32 = 1;
+ break;
case ARGP_WORM_KEY:
- cmd_args->worm = 1;
- break;
+ cmd_args->worm = 1;
+ break;
- case ARGP_MAC_COMPAT_KEY:
- if (!arg)
- arg = "on";
+ case ARGP_PRINT_NETGROUPS:
+ cmd_args->print_netgroups = arg;
+ break;
- if (gf_string2boolean (arg, &b) == 0) {
- cmd_args->mac_compat = b;
+ case ARGP_PRINT_EXPORTS:
+ cmd_args->print_exports = arg;
+ break;
- break;
- }
+ case ARGP_PRINT_XLATORDIR_KEY:
+ cmd_args->print_xlatordir = _gf_true;
+ break;
+
+ case ARGP_PRINT_STATEDUMPDIR_KEY:
+ cmd_args->print_statedumpdir = _gf_true;
+ break;
+
+ case ARGP_PRINT_LOGDIR_KEY:
+ cmd_args->print_logdir = _gf_true;
+ break;
+
+ case ARGP_PRINT_LIBEXECDIR_KEY:
+ cmd_args->print_libexecdir = _gf_true;
+ break;
+
+ case ARGP_MAC_COMPAT_KEY:
+ if (!arg)
+ arg = "on";
+
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->mac_compat = b;
- argp_failure (state, -1, 0,
- "invalid value \"%s\" for mac-compat", arg);
break;
+ }
+
+ argp_failure(state, -1, 0, "invalid value \"%s\" for mac-compat",
+ arg);
+ break;
case ARGP_VOLUME_FILE_KEY:
- if (cmd_args->volfile)
- GF_FREE (cmd_args->volfile);
-
- if (arg[0] != '/') {
- pwd = getcwd (NULL, PATH_MAX);
- if (!pwd) {
- argp_failure (state, -1, errno,
- "getcwd failed with error no %d",
- errno);
- break;
- }
- snprintf (tmp_buf, 1024, "%s/%s", pwd, arg);
- cmd_args->volfile = gf_strdup (tmp_buf);
- free (pwd);
- } else {
- cmd_args->volfile = gf_strdup (arg);
+ GF_FREE(cmd_args->volfile);
+
+ if (arg[0] != '/') {
+ pwd = getcwd(NULL, PATH_MAX);
+ if (!pwd) {
+ argp_failure(state, -1, errno,
+ "getcwd failed with error no %d", errno);
+ break;
}
+ char tmp_buf[1024];
+ snprintf(tmp_buf, sizeof(tmp_buf), "%s/%s", pwd, arg);
+ cmd_args->volfile = gf_strdup(tmp_buf);
+ free(pwd);
+ } else {
+ cmd_args->volfile = gf_strdup(arg);
+ }
- break;
+ break;
case ARGP_LOG_LEVEL_KEY:
- if (strcasecmp (arg, ARGP_LOG_LEVEL_NONE_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_NONE;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_CRITICAL_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_CRITICAL;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_ERROR_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_ERROR;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_WARNING_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_WARNING;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_INFO_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_INFO;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_DEBUG_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_DEBUG;
- break;
- }
- if (strcasecmp (arg, ARGP_LOG_LEVEL_TRACE_OPTION) == 0) {
- cmd_args->log_level = GF_LOG_TRACE;
- break;
- }
-
- argp_failure (state, -1, 0, "unknown log level %s", arg);
+ if (strcasecmp(arg, ARGP_LOG_LEVEL_NONE_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_NONE;
break;
+ }
+ if (strcasecmp(arg, ARGP_LOG_LEVEL_CRITICAL_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_CRITICAL;
+ break;
+ }
+ if (strcasecmp(arg, ARGP_LOG_LEVEL_ERROR_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_ERROR;
+ break;
+ }
+ if (strcasecmp(arg, ARGP_LOG_LEVEL_WARNING_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_WARNING;
+ break;
+ }
+ if (strcasecmp(arg, ARGP_LOG_LEVEL_INFO_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_INFO;
+ break;
+ }
+ if (strcasecmp(arg, ARGP_LOG_LEVEL_DEBUG_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_DEBUG;
+ break;
+ }
+ if (strcasecmp(arg, ARGP_LOG_LEVEL_TRACE_OPTION) == 0) {
+ cmd_args->log_level = GF_LOG_TRACE;
+ break;
+ }
+
+ argp_failure(state, -1, 0, "unknown log level %s", arg);
+ break;
case ARGP_LOG_FILE_KEY:
- cmd_args->log_file = gf_strdup (arg);
- break;
+ cmd_args->log_file = gf_strdup(arg);
+ break;
case ARGP_VOLFILE_SERVER_PORT_KEY:
- n = 0;
-
- if (gf_string2uint_base10 (arg, &n) == 0) {
- cmd_args->volfile_server_port = n;
- break;
- }
+ n = 0;
- argp_failure (state, -1, 0,
- "unknown volfile server port %s", arg);
+ if (gf_string2uint_base10(arg, &n) == 0) {
+ cmd_args->volfile_server_port = n;
break;
+ }
+
+ argp_failure(state, -1, 0, "unknown volfile server port %s", arg);
+ break;
case ARGP_VOLFILE_SERVER_TRANSPORT_KEY:
- cmd_args->volfile_server_transport = gf_strdup (arg);
- break;
+ cmd_args->volfile_server_transport = gf_strdup(arg);
+ break;
case ARGP_VOLFILE_ID_KEY:
- cmd_args->volfile_id = gf_strdup (arg);
- break;
+ cmd_args->volfile_id = gf_strdup(arg);
+ break;
+
+ case ARGP_THIN_CLIENT_KEY:
+ cmd_args->thin_client = _gf_true;
+ break;
+
+ case ARGP_BRICK_MUX_KEY:
+ cmd_args->brick_mux = _gf_true;
+ break;
case ARGP_PID_FILE_KEY:
- cmd_args->pid_file = gf_strdup (arg);
- break;
+ cmd_args->pid_file = gf_strdup(arg);
+ break;
case ARGP_SOCK_FILE_KEY:
- cmd_args->sock_file = gf_strdup (arg);
- break;
+ cmd_args->sock_file = gf_strdup(arg);
+ break;
case ARGP_NO_DAEMON_KEY:
- cmd_args->no_daemon_mode = ENABLE_NO_DAEMON_MODE;
- break;
+ cmd_args->no_daemon_mode = ENABLE_NO_DAEMON_MODE;
+ break;
case ARGP_RUN_ID_KEY:
- cmd_args->run_id = gf_strdup (arg);
- break;
+ cmd_args->run_id = gf_strdup(arg);
+ break;
case ARGP_DEBUG_KEY:
- cmd_args->debug_mode = ENABLE_DEBUG_MODE;
- break;
+ cmd_args->debug_mode = ENABLE_DEBUG_MODE;
+ break;
+ case ARGP_VOLFILE_MAX_FETCH_ATTEMPTS:
+ cmd_args->max_connect_attempts = 1;
+ break;
case ARGP_DIRECT_IO_MODE_KEY:
- if (!arg)
- arg = "on";
+ if (!arg)
+ arg = "on";
- if (gf_string2boolean (arg, &b) == 0) {
- cmd_args->fuse_direct_io_mode = b;
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->fuse_direct_io_mode = b;
- break;
- }
+ break;
+ }
- argp_failure (state, -1, 0,
- "unknown direct I/O mode setting \"%s\"", arg);
+ if (strcmp(arg, "auto") == 0)
break;
+ argp_failure(state, -1, 0, "unknown direct I/O mode setting \"%s\"",
+ arg);
+ break;
+
+ case ARGP_FUSE_NO_ROOT_SQUASH_KEY:
+ cmd_args->no_root_squash = _gf_true;
+ break;
+
case ARGP_ENTRY_TIMEOUT_KEY:
- d = 0.0;
+ d = 0.0;
- gf_string2double (arg, &d);
- if (!(d < 0.0)) {
- cmd_args->fuse_entry_timeout = d;
- break;
- }
+ gf_string2double(arg, &d);
+ if (!(d < 0.0)) {
+ cmd_args->fuse_entry_timeout = d;
+ break;
+ }
- argp_failure (state, -1, 0, "unknown entry timeout %s", arg);
+ argp_failure(state, -1, 0, "unknown entry timeout %s", arg);
+ break;
+
+ case ARGP_NEGATIVE_TIMEOUT_KEY:
+ d = 0.0;
+
+ ret = gf_string2double(arg, &d);
+ if ((ret == 0) && !(d < 0.0)) {
+ cmd_args->fuse_negative_timeout = d;
break;
+ }
- case ARGP_ATTRIBUTE_TIMEOUT_KEY:
- d = 0.0;
+ argp_failure(state, -1, 0, "unknown negative timeout %s", arg);
+ break;
- gf_string2double (arg, &d);
- if (!(d < 0.0)) {
- cmd_args->fuse_attribute_timeout = d;
- break;
- }
+ case ARGP_ATTRIBUTE_TIMEOUT_KEY:
+ d = 0.0;
- argp_failure (state, -1, 0,
- "unknown attribute timeout %s", arg);
+ gf_string2double(arg, &d);
+ if (!(d < 0.0)) {
+ cmd_args->fuse_attribute_timeout = d;
break;
+ }
- case ARGP_CLIENT_PID_KEY:
- if (gf_string2int (arg, &cmd_args->client_pid) == 0) {
- cmd_args->client_pid_set = 1;
- break;
- }
+ argp_failure(state, -1, 0, "unknown attribute timeout %s", arg);
+ break;
- argp_failure (state, -1, 0,
- "unknown client pid %s", arg);
+ case ARGP_CLIENT_PID_KEY:
+ if (gf_string2int(arg, &cmd_args->client_pid) == 0) {
+ cmd_args->client_pid_set = 1;
break;
+ }
+
+ argp_failure(state, -1, 0, "unknown client pid %s", arg);
+ break;
case ARGP_USER_MAP_ROOT_KEY:
- pw = getpwnam (arg);
- if (pw)
- cmd_args->uid_map_root = pw->pw_uid;
- else
- argp_failure (state, -1, 0,
- "user %s does not exist", arg);
- break;
+ pw = getpwnam(arg);
+ if (pw)
+ cmd_args->uid_map_root = pw->pw_uid;
+ else
+ argp_failure(state, -1, 0, "user %s does not exist", arg);
+ break;
case ARGP_VOLFILE_CHECK_KEY:
- cmd_args->volfile_check = 1;
- break;
+ cmd_args->volfile_check = 1;
+ break;
case ARGP_VOLUME_NAME_KEY:
- cmd_args->volume_name = gf_strdup (arg);
- break;
+ cmd_args->volume_name = gf_strdup(arg);
+ break;
case ARGP_XLATOR_OPTION_KEY:
- if (gf_remember_xlator_option (&cmd_args->xlator_options, arg))
- argp_failure (state, -1, 0, "invalid xlator option %s", arg);
+ if (gf_remember_xlator_option(arg))
+ argp_failure(state, -1, 0, "invalid xlator option %s", arg);
- break;
+ break;
case ARGP_KEY_NO_ARGS:
- break;
+ break;
case ARGP_KEY_ARG:
- if (state->arg_num >= 1)
- argp_usage (state);
-
- cmd_args->mount_point = gf_strdup (arg);
- break;
+ if (state->arg_num >= 1)
+ argp_usage(state);
+ cmd_args->mount_point = gf_strdup(arg);
+ break;
case ARGP_DUMP_FUSE_KEY:
- cmd_args->dump_fuse = gf_strdup (arg);
- break;
+ cmd_args->dump_fuse = gf_strdup(arg);
+ break;
case ARGP_BRICK_NAME_KEY:
- cmd_args->brick_name = gf_strdup (arg);
- break;
+ cmd_args->brick_name = gf_strdup(arg);
+ break;
case ARGP_BRICK_PORT_KEY:
- n = 0;
-
- port_str = strtok_r (arg, ",", &tmp_str);
- if (gf_string2uint_base10 (port_str, &n) == 0) {
- cmd_args->brick_port = n;
- port_str = strtok_r (NULL, ",", &tmp_str);
- if (port_str) {
- if (gf_string2uint_base10 (port_str, &n) == 0)
- cmd_args->brick_port2 = n;
- break;
-
- argp_failure (state, -1, 0,
- "wrong brick (listen) port %s", arg);
+ n = 0;
+
+ if (arg != NULL) {
+ port_str = strtok_r(arg, ",", &tmp_str);
+ if (gf_string2uint_base10(port_str, &n) == 0) {
+ cmd_args->brick_port = n;
+ port_str = strtok_r(NULL, ",", &tmp_str);
+ if (port_str) {
+ if (gf_string2uint_base10(port_str, &n) == 0) {
+ cmd_args->brick_port2 = n;
+ break;
}
- break;
+ argp_failure(state, -1, 0,
+ "wrong brick (listen) port %s", arg);
+ }
+ break;
}
+ }
- argp_failure (state, -1, 0,
- "unknown brick (listen) port %s", arg);
- break;
+ argp_failure(state, -1, 0, "unknown brick (listen) port %s", arg);
+ break;
case ARGP_MEM_ACCOUNTING_KEY:
- /* TODO: it should have got handled much earlier */
- ctx = glusterfs_ctx_get ();
- ctx->mem_accounting = 1;
- break;
- }
-
- return 0;
-}
+ /* TODO: it should have got handled much earlier */
+ // gf_mem_acct_enable_set (THIS->ctx);
+ break;
+ case ARGP_FOPEN_KEEP_CACHE_KEY:
+ if (!arg)
+ arg = "on";
-void
-cleanup_and_exit (int signum)
-{
- glusterfs_ctx_t *ctx = NULL;
- xlator_t *trav = NULL;
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->fopen_keep_cache = b;
- ctx = glusterfs_ctx_get ();
+ break;
+ }
- if (!ctx)
- return;
+ argp_failure(state, -1, 0, "unknown cache setting \"%s\"", arg);
- gf_log_callingfn ("", GF_LOG_WARNING,
- "received signum (%d), shutting down", signum);
+ break;
- if (ctx->cleanup_started)
- return;
+ case ARGP_GLOBAL_TIMER_WHEEL:
+ cmd_args->global_timer_wheel = 1;
+ break;
- ctx->cleanup_started = 1;
- glusterfs_mgmt_pmap_signout (ctx);
- if (ctx->listener) {
- (void) glusterfs_listener_stop (ctx);
- }
+ case ARGP_GID_TIMEOUT_KEY:
+ if (!gf_string2int(arg, &cmd_args->gid_timeout)) {
+ cmd_args->gid_timeout_set = _gf_true;
+ break;
+ }
- /* Call fini() of FUSE xlator first:
- * so there are no more requests coming and
- * 'umount' of mount point is done properly */
- trav = ctx->master;
- if (trav && trav->fini) {
- THIS = trav;
- trav->fini (trav);
- }
+ argp_failure(state, -1, 0, "unknown group list timeout %s", arg);
+ break;
- glusterfs_pidfile_cleanup (ctx);
+ case ARGP_RESOLVE_GIDS_KEY:
+ cmd_args->resolve_gids = 1;
+ break;
- exit (0);
-#if 0
- /* TODO: Properly do cleanup_and_exit(), with synchronization */
- if (ctx->mgmt) {
- /* cleanup the saved-frames before last unref */
- rpc_clnt_connection_cleanup (&ctx->mgmt->conn);
- rpc_clnt_unref (ctx->mgmt);
- }
+ case ARGP_FUSE_LRU_LIMIT_KEY:
+ if (!gf_string2int32(arg, &cmd_args->lru_limit))
+ break;
- /* call fini() of each xlator */
- trav = NULL;
- if (ctx->active)
- trav = ctx->active->top;
- while (trav) {
- if (trav->fini) {
- THIS = trav;
- trav->fini (trav);
- }
- trav = trav->next;
- }
-#endif
-}
+ argp_failure(state, -1, 0, "unknown LRU limit option %s", arg);
+ break;
+ case ARGP_FUSE_INVALIDATE_LIMIT_KEY:
+ if (!gf_string2int32(arg, &cmd_args->invalidate_limit))
+ break;
-static void
-reincarnate (int signum)
-{
- int ret = 0;
- glusterfs_ctx_t *ctx = NULL;
- cmd_args_t *cmd_args = NULL;
+ argp_failure(state, -1, 0, "unknown invalidate limit option %s",
+ arg);
+ break;
- ctx = glusterfs_ctx_get ();
- cmd_args = &ctx->cmd_args;
+ case ARGP_FUSE_BACKGROUND_QLEN_KEY:
+ if (!gf_string2int(arg, &cmd_args->background_qlen))
+ break;
- if (cmd_args->volfile_server) {
- gf_log ("glusterfsd", GF_LOG_INFO,
- "Fetching the volume file from server...");
- ret = glusterfs_volfile_fetch (ctx);
- } else {
- gf_log ("glusterfsd", GF_LOG_DEBUG,
- "Not reloading volume specification file on SIGHUP");
- }
+ argp_failure(state, -1, 0, "unknown background qlen option %s",
+ arg);
+ break;
+ case ARGP_FUSE_CONGESTION_THRESHOLD_KEY:
+ if (!gf_string2int(arg, &cmd_args->congestion_threshold))
+ break;
- /* Also, SIGHUP should do logrotate */
- gf_log_logrotate (1);
+ argp_failure(state, -1, 0, "unknown congestion threshold option %s",
+ arg);
+ break;
- if (ret < 0)
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "volume initialization failed.");
+#ifdef GF_LINUX_HOST_OS
+ case ARGP_OOM_SCORE_ADJ_KEY:
+ k = 0;
- return;
-}
+ api = get_oom_api_info();
+ if (!api)
+ goto no_oom_api;
+ if (gf_string2int(arg, &k) == 0 && k >= api->oom_min &&
+ k <= api->oom_max) {
+ cmd_args->oom_score_adj = gf_strdup(arg);
+ break;
+ }
-static char *
-generate_uuid ()
-{
- char tmp_str[1024] = {0,};
- char hostname[256] = {0,};
- struct timeval tv = {0,};
- char now_str[32];
-
- if (gettimeofday (&tv, NULL) == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "gettimeofday: failed %s",
- strerror (errno));
- }
+ argp_failure(state, -1, 0, "unknown oom_score_adj value %s", arg);
- if (gethostname (hostname, sizeof hostname) == -1) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "gethostname: failed %s",
- strerror (errno));
- }
+ no_oom_api:
+ break;
+#endif
- gf_time_fmt (now_str, sizeof now_str, tv.tv_sec, gf_timefmt_Ymd_T);
- snprintf (tmp_str, sizeof tmp_str, "%s-%d-%s:%" GF_PRI_SUSECONDS,
- hostname, getpid(), now_str, tv.tv_usec);
+ case ARGP_FUSE_MOUNTOPTS_KEY:
+ cmd_args->fuse_mountopts = gf_strdup(arg);
+ break;
- return gf_strdup (tmp_str);
-}
+ case ARGP_FUSE_USE_READDIRP_KEY:
+ if (!arg)
+ arg = "yes";
+ if (gf_string2boolean(arg, &b) == 0) {
+ if (b) {
+ cmd_args->use_readdirp = "yes";
+ } else {
+ cmd_args->use_readdirp = "no";
+ }
-static uint8_t
-gf_get_process_mode (char *exec_name)
-{
- char *dup_execname = NULL, *base = NULL;
- uint8_t ret = 0;
+ break;
+ }
+
+ argp_failure(state, -1, 0, "unknown use-readdirp setting \"%s\"",
+ arg);
+ break;
+
+ case ARGP_LOGGER:
+ if (strcasecmp(arg, GF_LOGGER_GLUSTER_LOG) == 0)
+ cmd_args->logger = gf_logger_glusterlog;
+ else if (strcasecmp(arg, GF_LOGGER_SYSLOG) == 0)
+ cmd_args->logger = gf_logger_syslog;
+ else
+ argp_failure(state, -1, 0, "unknown logger %s", arg);
+
+ break;
+
+ case ARGP_LOG_FORMAT:
+ if (strcasecmp(arg, GF_LOG_FORMAT_NO_MSG_ID) == 0)
+ cmd_args->log_format = gf_logformat_traditional;
+ else if (strcasecmp(arg, GF_LOG_FORMAT_WITH_MSG_ID) == 0)
+ cmd_args->log_format = gf_logformat_withmsgid;
+ else
+ argp_failure(state, -1, 0, "unknown log format %s", arg);
+
+ break;
+
+ case ARGP_LOG_BUF_SIZE:
+ if (gf_string2uint32(arg, &cmd_args->log_buf_size)) {
+ argp_failure(state, -1, 0, "unknown log buf size option %s",
+ arg);
+ } else if (cmd_args->log_buf_size > GF_LOG_LRU_BUFSIZE_MAX) {
+ argp_failure(state, -1, 0,
+ "Invalid log buf size %s. "
+ "Valid range: [" GF_LOG_LRU_BUFSIZE_MIN_STR
+ "," GF_LOG_LRU_BUFSIZE_MAX_STR "]",
+ arg);
+ }
+
+ break;
+
+ case ARGP_LOG_FLUSH_TIMEOUT:
+ if (gf_string2uint32(arg, &cmd_args->log_flush_timeout)) {
+ argp_failure(state, -1, 0,
+ "unknown log flush timeout option %s", arg);
+ } else if ((cmd_args->log_flush_timeout <
+ GF_LOG_FLUSH_TIMEOUT_MIN) ||
+ (cmd_args->log_flush_timeout >
+ GF_LOG_FLUSH_TIMEOUT_MAX)) {
+ argp_failure(state, -1, 0,
+ "Invalid log flush timeout %s. "
+ "Valid range: [" GF_LOG_FLUSH_TIMEOUT_MIN_STR
+ "," GF_LOG_FLUSH_TIMEOUT_MAX_STR "]",
+ arg);
+ }
+
+ break;
+
+ case ARGP_SECURE_MGMT_KEY:
+ if (!arg)
+ arg = "yes";
+
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->secure_mgmt = b ? 1 : 0;
+ break;
+ }
+
+ argp_failure(state, -1, 0, "unknown secure-mgmt setting \"%s\"",
+ arg);
+ break;
+
+ case ARGP_LOCALTIME_LOGGING_KEY:
+ cmd_args->localtime_logging = 1;
+ break;
+ case ARGP_PROCESS_NAME_KEY:
+ cmd_args->process_name = gf_strdup(arg);
+ break;
+ case ARGP_SUBDIR_MOUNT_KEY:
+ if (arg[0] != '/') {
+ argp_failure(state, -1, 0, "expect '/%s', provided just \"%s\"",
+ arg, arg);
+ break;
+ }
+ cmd_args->subdir_mount = gf_strdup(arg);
+ break;
+ case ARGP_FUSE_EVENT_HISTORY_KEY:
+ if (!arg)
+ arg = "no";
+
+ if (gf_string2boolean(arg, &b) == 0) {
+ if (b) {
+ cmd_args->event_history = "yes";
+ } else {
+ cmd_args->event_history = "no";
+ }
- dup_execname = gf_strdup (exec_name);
- base = basename (dup_execname);
+ break;
+ }
+
+ argp_failure(state, -1, 0, "unknown event-history setting \"%s\"",
+ arg);
+ break;
+ case ARGP_READER_THREAD_COUNT_KEY:
+ if (gf_string2uint32(arg, &cmd_args->reader_thread_count)) {
+ argp_failure(state, -1, 0,
+ "unknown reader thread count option %s", arg);
+ } else if ((cmd_args->reader_thread_count < 1) ||
+ (cmd_args->reader_thread_count > 64)) {
+ argp_failure(state, -1, 0,
+ "Invalid reader thread count %s. "
+ "Valid range: [\"1, 64\"]",
+ arg);
+ }
+
+ break;
+
+ case ARGP_KERNEL_WRITEBACK_CACHE_KEY:
+ if (!arg)
+ arg = "yes";
+
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->kernel_writeback_cache = b;
- if (!strncmp (base, "glusterfsd", 10)) {
- ret = GF_SERVER_PROCESS;
- } else if (!strncmp (base, "glusterd", 8)) {
- ret = GF_GLUSTERD_PROCESS;
- } else {
- ret = GF_CLIENT_PROCESS;
- }
+ break;
+ }
+
+ argp_failure(state, -1, 0,
+ "unknown kernel writeback cache setting \"%s\"", arg);
+ break;
+ case ARGP_ATTR_TIMES_GRANULARITY_KEY:
+ if (gf_string2uint32(arg, &cmd_args->attr_times_granularity)) {
+ argp_failure(state, -1, 0,
+ "unknown attribute times granularity option %s",
+ arg);
+ } else if (cmd_args->attr_times_granularity > 1000000000) {
+ argp_failure(state, -1, 0,
+ "Invalid attribute times granularity value %s. "
+ "Valid range: [\"0, 1000000000\"]",
+ arg);
+ }
+
+ break;
+
+ case ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY:
+ if (!arg)
+ arg = "yes";
+
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->fuse_flush_handle_interrupt = b;
- GF_FREE (dup_execname);
+ break;
+ }
- return ret;
-}
+ argp_failure(state, -1, 0,
+ "unknown fuse flush handle interrupt setting \"%s\"",
+ arg);
+ break;
+ case ARGP_FUSE_AUTO_INVAL_KEY:
+ if (!arg)
+ arg = "yes";
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->fuse_auto_inval = b;
+ break;
+ }
-static int
-set_log_file_path (cmd_args_t *cmd_args)
-{
- int i = 0;
- int j = 0;
- int ret = 0;
- int port = 0;
- char *tmp_ptr = NULL;
- char tmp_str[1024] = {0,};
-
- if (cmd_args->mount_point) {
- j = 0;
- i = 0;
- if (cmd_args->mount_point[0] == '/')
- i = 1;
- for (; i < strlen (cmd_args->mount_point); i++,j++) {
- tmp_str[j] = cmd_args->mount_point[i];
- if (cmd_args->mount_point[i] == '/')
- tmp_str[j] = '-';
- }
+ break;
- ret = gf_asprintf (&cmd_args->log_file,
- DEFAULT_LOG_FILE_DIRECTORY "/%s.log",
- tmp_str);
- goto done;
- }
+ case ARGP_GLOBAL_THREADING_KEY:
+ if (!arg || (*arg == 0)) {
+ arg = "yes";
+ }
- if (cmd_args->volfile) {
- j = 0;
- i = 0;
- if (cmd_args->volfile[0] == '/')
- i = 1;
- for (; i < strlen (cmd_args->volfile); i++,j++) {
- tmp_str[j] = cmd_args->volfile[i];
- if (cmd_args->volfile[i] == '/')
- tmp_str[j] = '-';
- }
- ret = gf_asprintf (&cmd_args->log_file,
- DEFAULT_LOG_FILE_DIRECTORY "/%s.log",
- tmp_str);
- goto done;
- }
+ if (gf_string2boolean(arg, &b) == 0) {
+ cmd_args->global_threading = b;
+ break;
+ }
+
+ argp_failure(state, -1, 0,
+ "Invalid value for global threading \"%s\"", arg);
+ break;
+
+ case ARGP_FUSE_DEV_EPERM_RATELIMIT_NS_KEY:
+ if (gf_string2uint32(arg, &cmd_args->fuse_dev_eperm_ratelimit_ns)) {
+ argp_failure(state, -1, 0,
+ "Non-numerical value for "
+ "'fuse-dev-eperm-ratelimit-ns' option %s",
+ arg);
+ } else if (cmd_args->fuse_dev_eperm_ratelimit_ns > 1000000000) {
+ argp_failure(state, -1, 0,
+ "Invalid 'fuse-dev-eperm-ratelimit-ns' value %s. "
+ "Valid range: [\"0, 1000000000\"]",
+ arg);
+ }
+
+ break;
+ }
+ return 0;
+}
- if (cmd_args->volfile_server) {
- port = 1;
- tmp_ptr = "default";
+gf_boolean_t
+should_call_fini(glusterfs_ctx_t *ctx, xlator_t *trav)
+{
+ /* There's nothing to call, so the other checks don't matter. */
+ if (!trav->fini) {
+ return _gf_false;
+ }
- if (cmd_args->volfile_server_port)
- port = cmd_args->volfile_server_port;
- if (cmd_args->volfile_id)
- tmp_ptr = cmd_args->volfile_id;
+ /* This preserves previous behavior in glusterd. */
+ if (ctx->process_mode == GF_GLUSTERD_PROCESS) {
+ return _gf_true;
+ }
- ret = gf_asprintf (&cmd_args->log_file,
- DEFAULT_LOG_FILE_DIRECTORY "/%s-%s-%d.log",
- cmd_args->volfile_server, tmp_ptr, port);
- }
-done:
- return ret;
+ return _gf_false;
}
-
-static int
-glusterfs_ctx_defaults_init (glusterfs_ctx_t *ctx)
+void
+cleanup_and_exit(int signum)
{
- cmd_args_t *cmd_args = NULL;
- struct rlimit lim = {0, };
- call_pool_t *pool = NULL;
- int ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_t *trav = NULL;
+ xlator_t *top;
+ xlator_t *victim;
+ xlator_list_t **trav_p;
- xlator_mem_acct_init (THIS, gfd_mt_end);
+ ctx = glusterfsd_ctx;
- ctx->process_uuid = generate_uuid ();
- if (!ctx->process_uuid) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs uuid generation failed");
- goto out;
- }
+ if (!ctx)
+ return;
- ctx->page_size = 128 * GF_UNIT_KB;
+ /* To take or not to take the mutex here and in the other
+ * signal handler - gf_print_trace() - is the big question here.
+ *
+ * Taking mutex in signal handler would mean that if the process
+ * receives a fatal signal while another thread is holding
+ * ctx->log.log_buf_lock to perhaps log a message in _gf_msg_internal(),
+ * the offending thread hangs on the mutex lock forever without letting
+ * the process exit.
+ *
+ * On the other hand. not taking the mutex in signal handler would cause
+ * it to modify the lru_list of buffered log messages in a racy manner,
+ * corrupt the list and potentially give rise to an unending
+ * cascade of SIGSEGVs and other re-entrancy issues.
+ */
+
+ gf_log_disable_suppression_before_exit(ctx);
+
+ gf_msg_callingfn("", GF_LOG_WARNING, 0, glusterfsd_msg_32,
+ "received signum (%d), shutting down", signum);
+
+ if (ctx->cleanup_started)
+ return;
+ pthread_mutex_lock(&ctx->cleanup_lock);
+ {
+ ctx->cleanup_started = 1;
- ctx->iobuf_pool = iobuf_pool_new ();
- if (!ctx->iobuf_pool) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs iobuf pool creation failed");
- goto out;
+ /* signout should be sent to all the bricks in case brick mux is enabled
+ * and multiple brick instances are attached to this process
+ */
+ if (ctx->active) {
+ top = ctx->active->first;
+ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ victim = (*trav_p)->xlator;
+ rpc_clnt_mgmt_pmap_signout(ctx, victim->name);
+ }
+ } else {
+ rpc_clnt_mgmt_pmap_signout(ctx, NULL);
}
- ctx->event_pool = event_pool_new (DEFAULT_EVENT_POOL_SIZE);
- if (!ctx->event_pool) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs event pool creation failed");
- goto out;
- }
+ /* below part is a racy code where the rpcsvc object is freed.
+ * But in another thread (epoll thread), upon poll error in the
+ * socket the transports are cleaned up where again rpcsvc object
+ * is accessed (which is already freed by the below function).
+ * Since the process is about to be killed don't execute the function
+ * below.
+ */
+ /* if (ctx->listener) { */
+ /* (void) glusterfs_listener_stop (ctx); */
+ /* } */
- pool = GF_CALLOC (1, sizeof (call_pool_t),
- gfd_mt_call_pool_t);
- if (!pool) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs call pool creation failed");
- goto out;
+ /* Call fini() of FUSE xlator first:
+ * so there are no more requests coming and
+ * 'umount' of mount point is done properly */
+ trav = ctx->master;
+ if (trav && trav->fini) {
+ THIS = trav;
+ trav->fini(trav);
}
- /* frame_mem_pool size 112 * 4k */
- pool->frame_mem_pool = mem_pool_new (call_frame_t, 4096);
- if (!pool->frame_mem_pool) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs frame pool creation failed");
- goto out;
- }
- /* stack_mem_pool size 256 * 1024 */
- pool->stack_mem_pool = mem_pool_new (call_stack_t, 1024);
- if (!pool->stack_mem_pool) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs stack pool creation failed");
- goto out;
- }
+ glusterfs_pidfile_cleanup(ctx);
- ctx->stub_mem_pool = mem_pool_new (call_stub_t, 1024);
- if (!ctx->stub_mem_pool) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs stub pool creation failed");
- goto out;
+#if 0
+ /* TODO: Properly do cleanup_and_exit(), with synchronization */
+ if (ctx->mgmt) {
+ /* cleanup the saved-frames before last unref */
+ rpc_clnt_connection_cleanup (&ctx->mgmt->conn);
+ rpc_clnt_unref (ctx->mgmt);
}
+#endif
- ctx->dict_pool = mem_pool_new (dict_t, GF_MEMPOOL_COUNT_OF_DICT_T);
- if (!ctx->dict_pool)
- return -1;
+ trav = NULL;
- ctx->dict_pair_pool = mem_pool_new (data_pair_t,
- GF_MEMPOOL_COUNT_OF_DATA_PAIR_T);
- if (!ctx->dict_pair_pool)
- return -1;
+ /* previously we were releasing the cleanup mutex lock before the
+ process exit. As we are releasing the cleanup mutex lock, before
+ the process can exit some other thread which is blocked on
+ cleanup mutex lock is acquiring the cleanup mutex lock and
+ trying to acquire some resources which are already freed as a
+ part of cleanup. To avoid this, we are exiting the process without
+ releasing the cleanup mutex lock. This will not cause any lock
+ related issues as the process which acquired the lock is going down
+ */
+ /* NOTE: Only the least significant 8 bits i.e (signum & 255)
+ will be available to parent process on calling exit() */
+ exit(abs(signum));
+ }
+}
- ctx->dict_data_pool = mem_pool_new (data_t, GF_MEMPOOL_COUNT_OF_DATA_T);
- if (!ctx->dict_data_pool)
- return -1;
+static void
+reincarnate(int signum)
+{
+ int ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
- INIT_LIST_HEAD (&pool->all_frames);
- LOCK_INIT (&pool->lock);
- ctx->pool = pool;
+ ctx = glusterfsd_ctx;
+ cmd_args = &ctx->cmd_args;
- pthread_mutex_init (&(ctx->lock), NULL);
+ gf_msg_trace("gluster", 0, "received reincarnate request (sig:HUP)");
- cmd_args = &ctx->cmd_args;
+ if (cmd_args->volfile_server) {
+ gf_smsg("glusterfsd", GF_LOG_INFO, 0, glusterfsd_msg_11, NULL);
+ ret = glusterfs_volfile_fetch(ctx);
+ }
- /* parsing command line arguments */
- cmd_args->log_level = DEFAULT_LOG_LEVEL;
+ /* Also, SIGHUP should do logrotate */
+ gf_log_logrotate(1);
- cmd_args->mac_compat = GF_OPTION_DISABLE;
-#ifdef GF_DARWIN_HOST_OS
- /* On Darwin machines, O_APPEND is not handled,
- * which may corrupt the data
- */
- cmd_args->fuse_direct_io_mode = GF_OPTION_DISABLE;
-#else
- cmd_args->fuse_direct_io_mode = GF_OPTION_DEFERRED;
-#endif
- cmd_args->fuse_attribute_timeout = -1;
- cmd_args->fuse_entry_timeout = -1;
+ if (ret < 0)
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_12, NULL);
- INIT_LIST_HEAD (&cmd_args->xlator_options);
+ return;
+}
- lim.rlim_cur = RLIM_INFINITY;
- lim.rlim_max = RLIM_INFINITY;
- setrlimit (RLIMIT_CORE, &lim);
+void
+emancipate(glusterfs_ctx_t *ctx, int ret)
+{
+ /* break free from the parent */
+ if (ctx->daemon_pipe[1] != -1) {
+ sys_write(ctx->daemon_pipe[1], (void *)&ret, sizeof(ret));
+ sys_close(ctx->daemon_pipe[1]);
+ ctx->daemon_pipe[1] = -1;
+ }
+}
- ret = 0;
-out:
+static uint8_t
+gf_get_process_mode(char *exec_name)
+{
+ char *dup_execname = NULL, *base = NULL;
+ uint8_t ret = 0;
- if (ret && pool) {
+ dup_execname = gf_strdup(exec_name);
+ base = basename(dup_execname);
- if (pool->frame_mem_pool)
- mem_pool_destroy (pool->frame_mem_pool);
+ if (!strncmp(base, "glusterfsd", 10)) {
+ ret = GF_SERVER_PROCESS;
+ } else if (!strncmp(base, "glusterd", 8)) {
+ ret = GF_GLUSTERD_PROCESS;
+ } else {
+ ret = GF_CLIENT_PROCESS;
+ }
- if (pool->stack_mem_pool)
- mem_pool_destroy (pool->stack_mem_pool);
+ GF_FREE(dup_execname);
- GF_FREE (pool);
- }
+ return ret;
+}
- if (ret && ctx) {
- if (ctx->stub_mem_pool)
- mem_pool_destroy (ctx->stub_mem_pool);
+static int
+glusterfs_ctx_defaults_init(glusterfs_ctx_t *ctx)
+{
+ cmd_args_t *cmd_args = NULL;
+ struct rlimit lim = {
+ 0,
+ };
+ int ret = -1;
- if (ctx->dict_pool)
- mem_pool_destroy (ctx->dict_pool);
+ if (!ctx)
+ return ret;
- if (ctx->dict_data_pool)
- mem_pool_destroy (ctx->dict_data_pool);
+ ret = xlator_mem_acct_init(THIS, gfd_mt_end);
+ if (ret != 0) {
+ gf_smsg(THIS->name, GF_LOG_CRITICAL, 0, glusterfsd_msg_34, NULL);
+ return ret;
+ }
+
+ /* reset ret to -1 so that we don't need to explicitly
+ * set it in all error paths before "goto err"
+ */
+ ret = -1;
+
+ /* monitoring should be enabled by default */
+ ctx->measure_latency = true;
+
+ ctx->process_uuid = generate_glusterfs_ctx_id();
+ if (!ctx->process_uuid) {
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_13, NULL);
+ goto out;
+ }
+
+ ctx->page_size = 128 * GF_UNIT_KB;
+
+ ctx->iobuf_pool = iobuf_pool_new();
+ if (!ctx->iobuf_pool) {
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "iobuf", NULL);
+ goto out;
+ }
+
+ ctx->event_pool = gf_event_pool_new(DEFAULT_EVENT_POOL_SIZE,
+ STARTING_EVENT_THREADS);
+ if (!ctx->event_pool) {
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "event", NULL);
+ goto out;
+ }
+
+ ctx->pool = GF_CALLOC(1, sizeof(call_pool_t), gfd_mt_call_pool_t);
+ if (!ctx->pool) {
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "call", NULL);
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&ctx->pool->all_frames);
+ LOCK_INIT(&ctx->pool->lock);
+
+ /* frame_mem_pool size 112 * 4k */
+ ctx->pool->frame_mem_pool = mem_pool_new(call_frame_t, 4096);
+ if (!ctx->pool->frame_mem_pool) {
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "frame", NULL);
+ goto out;
+ }
+ /* stack_mem_pool size 256 * 1024 */
+ ctx->pool->stack_mem_pool = mem_pool_new(call_stack_t, 1024);
+ if (!ctx->pool->stack_mem_pool) {
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "stack", NULL);
+ goto out;
+ }
+
+ ctx->stub_mem_pool = mem_pool_new(call_stub_t, 1024);
+ if (!ctx->stub_mem_pool) {
+ gf_smsg("", GF_LOG_CRITICAL, 0, glusterfsd_msg_14, "stub", NULL);
+ goto out;
+ }
+
+ ctx->dict_pool = mem_pool_new(dict_t, GF_MEMPOOL_COUNT_OF_DICT_T);
+ if (!ctx->dict_pool)
+ goto out;
+
+ ctx->dict_pair_pool = mem_pool_new(data_pair_t,
+ GF_MEMPOOL_COUNT_OF_DATA_PAIR_T);
+ if (!ctx->dict_pair_pool)
+ goto out;
+
+ ctx->dict_data_pool = mem_pool_new(data_t, GF_MEMPOOL_COUNT_OF_DATA_T);
+ if (!ctx->dict_data_pool)
+ goto out;
+
+ ctx->logbuf_pool = mem_pool_new(log_buf_t, GF_MEMPOOL_COUNT_OF_LRU_BUF_T);
+ if (!ctx->logbuf_pool)
+ goto out;
+
+ pthread_mutex_init(&ctx->notify_lock, NULL);
+ pthread_mutex_init(&ctx->cleanup_lock, NULL);
+ pthread_cond_init(&ctx->notify_cond, NULL);
+
+ ctx->clienttable = gf_clienttable_alloc();
+ if (!ctx->clienttable)
+ goto out;
+
+ cmd_args = &ctx->cmd_args;
+
+ /* parsing command line arguments */
+ cmd_args->log_level = DEFAULT_LOG_LEVEL;
+ cmd_args->logger = gf_logger_glusterlog;
+ cmd_args->log_format = gf_logformat_withmsgid;
+ cmd_args->log_buf_size = GF_LOG_LRU_BUFSIZE_DEFAULT;
+ cmd_args->log_flush_timeout = GF_LOG_FLUSH_TIMEOUT_DEFAULT;
+
+ cmd_args->mac_compat = GF_OPTION_DISABLE;
+#ifdef GF_DARWIN_HOST_OS
+ /* On Darwin machines, O_APPEND is not handled,
+ * which may corrupt the data
+ */
+ cmd_args->fuse_direct_io_mode = GF_OPTION_DISABLE;
+#else
+ cmd_args->fuse_direct_io_mode = GF_OPTION_DEFERRED;
+#endif
+ cmd_args->fuse_attribute_timeout = -1;
+ cmd_args->fuse_entry_timeout = -1;
+ cmd_args->fopen_keep_cache = GF_OPTION_DEFERRED;
+ cmd_args->kernel_writeback_cache = GF_OPTION_DEFERRED;
+ cmd_args->fuse_flush_handle_interrupt = GF_OPTION_DEFERRED;
+
+ if (ctx->mem_acct_enable)
+ cmd_args->mem_acct = 1;
+
+ INIT_LIST_HEAD(&cmd_args->xlator_options);
+ INIT_LIST_HEAD(&cmd_args->volfile_servers);
+ ctx->pxl_count = 0;
+ pthread_mutex_init(&ctx->fd_lock, NULL);
+ pthread_cond_init(&ctx->fd_cond, NULL);
+ INIT_LIST_HEAD(&ctx->janitor_fds);
+
+ lim.rlim_cur = RLIM_INFINITY;
+ lim.rlim_max = RLIM_INFINITY;
+ setrlimit(RLIMIT_CORE, &lim);
+
+ ret = 0;
+out:
- if (ctx->dict_pair_pool)
- mem_pool_destroy (ctx->dict_pair_pool);
+ if (ret) {
+ if (ctx->pool) {
+ mem_pool_destroy(ctx->pool->frame_mem_pool);
+ mem_pool_destroy(ctx->pool->stack_mem_pool);
}
+ GF_FREE(ctx->pool);
+ mem_pool_destroy(ctx->stub_mem_pool);
+ mem_pool_destroy(ctx->dict_pool);
+ mem_pool_destroy(ctx->dict_data_pool);
+ mem_pool_destroy(ctx->dict_pair_pool);
+ mem_pool_destroy(ctx->logbuf_pool);
+ }
- return ret;
+ return ret;
}
static int
-logging_init (glusterfs_ctx_t *ctx)
+logging_init(glusterfs_ctx_t *ctx, const char *progpath)
{
- cmd_args_t *cmd_args = NULL;
- int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
- cmd_args = &ctx->cmd_args;
+ cmd_args = &ctx->cmd_args;
- if (cmd_args->log_file == NULL) {
- ret = set_log_file_path (cmd_args);
- if (ret == -1) {
- fprintf (stderr, "ERROR: failed to set the log file path\n");
- return -1;
- }
+ if (cmd_args->log_file == NULL) {
+ ret = gf_set_log_file_path(cmd_args, ctx);
+ if (ret == -1) {
+ fprintf(stderr,
+ "ERROR: failed to set the log file "
+ "path\n");
+ return -1;
}
+ }
- if (gf_log_init (cmd_args->log_file) == -1) {
- fprintf (stderr, "ERROR: failed to open logfile %s\n",
- cmd_args->log_file);
- return -1;
+ if (cmd_args->log_ident == NULL) {
+ ret = gf_set_log_ident(cmd_args);
+ if (ret == -1) {
+ fprintf(stderr,
+ "ERROR: failed to set the log "
+ "identity\n");
+ return -1;
}
+ }
- gf_log_set_loglevel (cmd_args->log_level);
+ /* finish log set parameters before init */
+ gf_log_set_loglevel(ctx, cmd_args->log_level);
- return 0;
+ gf_log_set_localtime(cmd_args->localtime_logging);
+
+ gf_log_set_logger(cmd_args->logger);
+
+ gf_log_set_logformat(cmd_args->log_format);
+
+ gf_log_set_log_buf_size(cmd_args->log_buf_size);
+
+ gf_log_set_log_flush_timeout(cmd_args->log_flush_timeout);
+
+ if (gf_log_init(ctx, cmd_args->log_file, cmd_args->log_ident) == -1) {
+ fprintf(stderr, "ERROR: failed to open logfile %s\n",
+ cmd_args->log_file);
+ return -1;
+ }
+
+ /* At this point, all the logging related parameters are initialised
+ * except for the log flush timer, which will be injected post fork(2)
+ * in daemonize() . During this time, any log message that is logged
+ * will be kept buffered. And if the list that holds these messages
+ * overflows, then the same lru policy is used to drive out the least
+ * recently used message and displace it with the message just logged.
+ */
+
+ return 0;
}
void
-gf_check_and_set_mem_acct (int argc, char *argv[], glusterfs_ctx_t *ctx)
+gf_check_and_set_mem_acct(int argc, char *argv[])
{
- int i = 0;
- for (i = 0; i < argc; i++) {
- if (strcmp (argv[i], "--mem-accounting") == 0) {
- ctx->mem_accounting = 1;
- break;
- }
+ int i = 0;
+
+ for (i = 0; i < argc; i++) {
+ if (strcmp(argv[i], "--no-mem-accounting") == 0) {
+ gf_global_mem_acct_enable_set(0);
+ break;
}
+ }
}
+/**
+ * print_exports_file - Print out & verify the syntax
+ * of the exports file specified
+ * in the parameter.
+ *
+ * @exports_file : Path of the exports file to print & verify
+ *
+ * @return : success: 0 when successfully parsed
+ * failure: 1 when failed to parse one or more lines
+ * -1 when other critical errors (dlopen () etc)
+ * Critical errors are treated differently than parse errors. Critical
+ * errors terminate the program immediately here and print out different
+ * error messages. Hence there are different return values.
+ */
int
-parse_cmdline (int argc, char *argv[], glusterfs_ctx_t *ctx)
+print_exports_file(const char *exports_file)
{
- int process_mode = 0;
- int ret = 0;
- struct stat stbuf = {0, };
- char timestr[32];
- char tmp_logfile[1024] = { 0 };
- char *tmp_logfile_dyn = NULL;
- char *tmp_logfilebase = NULL;
- cmd_args_t *cmd_args = NULL;
+ void *libhandle = NULL;
+ char *libpathfull = NULL;
+ struct exports_file *file = NULL;
+ int ret = 0;
+
+ int (*exp_file_parse)(const char *filepath, struct exports_file **expfile,
+ struct mount3_state *ms) = NULL;
+ void (*exp_file_print)(const struct exports_file *file) = NULL;
+ void (*exp_file_deinit)(struct exports_file * ptr) = NULL;
+
+ /* XLATORDIR passed through a -D flag to GCC */
+ ret = gf_asprintf(&libpathfull, "%s/%s/server.so", XLATORDIR, "nfs");
+ if (ret < 0) {
+ gf_log("glusterfs", GF_LOG_CRITICAL, "asprintf () failed.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Load up the library */
+ libhandle = dlopen(libpathfull, RTLD_NOW);
+ if (!libhandle) {
+ gf_log("glusterfs", GF_LOG_CRITICAL,
+ "Error loading NFS server library : "
+ "%s\n",
+ dlerror());
+ ret = -1;
+ goto out;
+ }
+
+ /* Load up the function */
+ exp_file_parse = dlsym(libhandle, "exp_file_parse");
+ if (!exp_file_parse) {
+ gf_log("glusterfs", GF_LOG_CRITICAL,
+ "Error finding function exp_file_parse "
+ "in symbol.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Parse the file */
+ ret = exp_file_parse(exports_file, &file, NULL);
+ if (ret < 0) {
+ ret = 1; /* This means we failed to parse */
+ goto out;
+ }
+
+ /* Load up the function */
+ exp_file_print = dlsym(libhandle, "exp_file_print");
+ if (!exp_file_print) {
+ gf_log("glusterfs", GF_LOG_CRITICAL,
+ "Error finding function exp_file_print in symbol.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Print it out to screen */
+ exp_file_print(file);
+
+ /* Load up the function */
+ exp_file_deinit = dlsym(libhandle, "exp_file_deinit");
+ if (!exp_file_deinit) {
+ gf_log("glusterfs", GF_LOG_CRITICAL,
+ "Error finding function exp_file_deinit in lib.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Free the file */
+ exp_file_deinit(file);
- cmd_args = &ctx->cmd_args;
+out:
+ if (libhandle)
+ dlclose(libhandle);
+ GF_FREE(libpathfull);
+ return ret;
+}
- argp_parse (&argp, argc, argv, ARGP_IN_ORDER, NULL, cmd_args);
+/**
+ * print_netgroups_file - Print out & verify the syntax
+ * of the netgroups file specified
+ * in the parameter.
+ *
+ * @netgroups_file : Path of the netgroups file to print & verify
+ * @return : success: 0 when successfully parsed
+ * failure: 1 when failed to parse one more more lines
+ * -1 when other critical errors (dlopen () etc)
+ *
+ * We have multiple returns here because for critical errors, we abort
+ * operations immediately and exit. For example, if we can't load the
+ * NFS server library, then we have a real bad problem so we don't continue.
+ * Or if we cannot allocate anymore memory, we don't want to continue. Also,
+ * we want to print out a different error messages based on the ret value.
+ */
+int
+print_netgroups_file(const char *netgroups_file)
+{
+ void *libhandle = NULL;
+ char *libpathfull = NULL;
+ struct netgroups_file *file = NULL;
+ int ret = 0;
+
+ struct netgroups_file *(*ng_file_parse)(const char *file_path) = NULL;
+ void (*ng_file_print)(const struct netgroups_file *file) = NULL;
+ void (*ng_file_deinit)(struct netgroups_file * ptr) = NULL;
+
+ /* XLATORDIR passed through a -D flag to GCC */
+ ret = gf_asprintf(&libpathfull, "%s/%s/server.so", XLATORDIR, "nfs");
+ if (ret < 0) {
+ gf_log("glusterfs", GF_LOG_CRITICAL, "asprintf () failed.");
+ ret = -1;
+ goto out;
+ }
+ /* Load up the library */
+ libhandle = dlopen(libpathfull, RTLD_NOW);
+ if (!libhandle) {
+ gf_log("glusterfs", GF_LOG_CRITICAL,
+ "Error loading NFS server library : %s\n", dlerror());
+ ret = -1;
+ goto out;
+ }
+
+ /* Load up the function */
+ ng_file_parse = dlsym(libhandle, "ng_file_parse");
+ if (!ng_file_parse) {
+ gf_log("glusterfs", GF_LOG_CRITICAL,
+ "Error finding function ng_file_parse in symbol.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Parse the file */
+ file = ng_file_parse(netgroups_file);
+ if (!file) {
+ ret = 1; /* This means we failed to parse */
+ goto out;
+ }
+
+ /* Load up the function */
+ ng_file_print = dlsym(libhandle, "ng_file_print");
+ if (!ng_file_print) {
+ gf_log("glusterfs", GF_LOG_CRITICAL,
+ "Error finding function ng_file_print in symbol.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Print it out to screen */
+ ng_file_print(file);
+
+ /* Load up the function */
+ ng_file_deinit = dlsym(libhandle, "ng_file_deinit");
+ if (!ng_file_deinit) {
+ gf_log("glusterfs", GF_LOG_CRITICAL,
+ "Error finding function ng_file_deinit in lib.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Free the file */
+ ng_file_deinit(file);
- if (ENABLE_DEBUG_MODE == cmd_args->debug_mode) {
- cmd_args->log_level = GF_LOG_DEBUG;
- cmd_args->log_file = "/dev/stderr";
- cmd_args->no_daemon_mode = ENABLE_NO_DAEMON_MODE;
- }
+out:
+ if (libhandle)
+ dlclose(libhandle);
+ GF_FREE(libpathfull);
+ return ret;
+}
- process_mode = gf_get_process_mode (argv[0]);
- ctx->process_mode = process_mode;
+int
+parse_cmdline(int argc, char *argv[], glusterfs_ctx_t *ctx)
+{
+ int process_mode = 0;
+ int ret = 0;
+ struct stat stbuf = {
+ 0,
+ };
+ char timestr[GF_TIMESTR_SIZE];
+ char tmp_logfile[1024] = {0};
+ char *tmp_logfile_dyn = NULL;
+ char *tmp_logfilebase = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int len = 0;
+ char *thin_volfileid = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ /* Do this before argp_parse so it can be overridden. */
+ if (sys_access(SECURE_ACCESS_FILE, F_OK) == 0) {
+ cmd_args->secure_mgmt = 1;
+ ctx->ssl_cert_depth = glusterfs_read_secure_access_file();
+ }
+
+ /* Need to set lru_limit to below 0 to indicate there was nothing
+ specified. This is needed as 0 is a valid option, and may not be
+ default value. */
+ cmd_args->lru_limit = -1;
+
+ argp_parse(&argp, argc, argv, ARGP_IN_ORDER, NULL, cmd_args);
+
+ if (cmd_args->print_xlatordir || cmd_args->print_statedumpdir ||
+ cmd_args->print_logdir || cmd_args->print_libexecdir) {
+ /* Just print, nothing else to do */
+ goto out;
+ }
+
+ if (cmd_args->print_netgroups) {
+ /* When this option is set we don't want to do anything else
+ * except for printing & verifying the netgroups file.
+ */
+ ret = 0;
+ goto out;
+ }
- /* Make sure after the parsing cli, if '--volfile-server' option is
- given, then '--volfile-id' is mandatory */
- if (cmd_args->volfile_server && !cmd_args->volfile_id) {
- gf_log ("glusterfs", GF_LOG_CRITICAL,
- "ERROR: '--volfile-id' is mandatory if '-s' OR "
- "'--volfile-server' option is given");
- ret = -1;
+ if (cmd_args->print_exports) {
+ /* When this option is set we don't want to do anything else
+ * except for printing & verifying the exports file.
+ */
+ ret = 0;
+ goto out;
+ }
+
+ ctx->secure_mgmt = cmd_args->secure_mgmt;
+
+ if (ENABLE_DEBUG_MODE == cmd_args->debug_mode) {
+ cmd_args->log_level = GF_LOG_DEBUG;
+ cmd_args->log_file = gf_strdup("/dev/stderr");
+ cmd_args->no_daemon_mode = ENABLE_NO_DAEMON_MODE;
+ }
+
+ process_mode = gf_get_process_mode(argv[0]);
+ ctx->process_mode = process_mode;
+
+ if (cmd_args->process_name) {
+ ctx->cmd_args.process_name = cmd_args->process_name;
+ }
+ /* Make sure after the parsing cli, if '--volfile-server' option is
+ given, then '--volfile-id' is mandatory */
+ if (cmd_args->volfile_server && !cmd_args->volfile_id) {
+ gf_smsg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_15, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ if ((cmd_args->volfile_server == NULL) && (cmd_args->volfile == NULL)) {
+ if (process_mode == GF_SERVER_PROCESS)
+ cmd_args->volfile = gf_strdup(DEFAULT_SERVER_VOLFILE);
+ else if (process_mode == GF_GLUSTERD_PROCESS)
+ cmd_args->volfile = gf_strdup(DEFAULT_GLUSTERD_VOLFILE);
+ else
+ cmd_args->volfile = gf_strdup(DEFAULT_CLIENT_VOLFILE);
+
+ /* Check if the volfile exists, if not give usage output
+ and exit */
+ ret = sys_stat(cmd_args->volfile, &stbuf);
+ if (ret) {
+ gf_smsg("glusterfs", GF_LOG_CRITICAL, errno, glusterfsd_msg_16,
+ NULL);
+ /* argp_usage (argp.) */
+ fprintf(stderr, "USAGE: %s [options] [mountpoint]\n", argv[0]);
+ goto out;
+ }
+ }
+
+ if (cmd_args->thin_client) {
+ len = strlen(cmd_args->volfile_id) + SLEN("gfproxy-client/");
+ thin_volfileid = GF_MALLOC(len + 1, gf_common_mt_char);
+ snprintf(thin_volfileid, len + 1, "gfproxy-client/%s",
+ cmd_args->volfile_id);
+ GF_FREE(cmd_args->volfile_id);
+ cmd_args->volfile_id = thin_volfileid;
+ }
+
+ if (cmd_args->run_id) {
+ ret = sys_lstat(cmd_args->log_file, &stbuf);
+ /* If its /dev/null, or /dev/stdout, /dev/stderr,
+ * let it use the same, no need to alter
+ */
+ if (((ret == 0) &&
+ (S_ISREG(stbuf.st_mode) || S_ISLNK(stbuf.st_mode))) ||
+ (ret == -1)) {
+ /* Have separate logfile per run. */
+ gf_time_fmt(timestr, sizeof timestr, gf_time(), gf_timefmt_FT);
+ sprintf(tmp_logfile, "%s.%s.%d", cmd_args->log_file, timestr,
+ getpid());
+
+ /* Create symlink to actual log file */
+ sys_unlink(cmd_args->log_file);
+
+ tmp_logfile_dyn = gf_strdup(tmp_logfile);
+ tmp_logfilebase = basename(tmp_logfile_dyn);
+ ret = sys_symlink(tmp_logfilebase, cmd_args->log_file);
+ if (ret == -1) {
+ fprintf(stderr, "ERROR: symlink of logfile failed\n");
goto out;
- }
-
- if ((cmd_args->volfile_server == NULL)
- && (cmd_args->volfile == NULL)) {
- if (process_mode == GF_SERVER_PROCESS)
- cmd_args->volfile = gf_strdup (DEFAULT_SERVER_VOLFILE);
- else if (process_mode == GF_GLUSTERD_PROCESS)
- cmd_args->volfile = gf_strdup (DEFAULT_GLUSTERD_VOLFILE);
- else
- cmd_args->volfile = gf_strdup (DEFAULT_CLIENT_VOLFILE);
-
- /* Check if the volfile exists, if not give usage output
- and exit */
- ret = stat (cmd_args->volfile, &stbuf);
- if (ret) {
- gf_log ("glusterfs", GF_LOG_CRITICAL,
- "ERROR: parsing the volfile failed (%s)\n",
- strerror (errno));
- /* argp_usage (argp.) */
- fprintf (stderr, "USAGE: %s [options] [mountpoint]\n",
- argv[0]);
- goto out;
- }
- }
-
- if (cmd_args->run_id) {
- ret = sys_lstat (cmd_args->log_file, &stbuf);
- /* If its /dev/null, or /dev/stdout, /dev/stderr,
- * let it use the same, no need to alter
- */
- if (((ret == 0) &&
- (S_ISREG (stbuf.st_mode) || S_ISLNK (stbuf.st_mode))) ||
- (ret == -1)) {
- /* Have separate logfile per run */
- gf_time_fmt (timestr, sizeof timestr, time (NULL),
- gf_timefmt_FT);
- sprintf (tmp_logfile, "%s.%s.%d",
- cmd_args->log_file, timestr, getpid ());
-
- /* Create symlink to actual log file */
- sys_unlink (cmd_args->log_file);
-
- tmp_logfile_dyn = gf_strdup (tmp_logfile);
- tmp_logfilebase = basename (tmp_logfile_dyn);
- ret = sys_symlink (tmp_logfilebase,
- cmd_args->log_file);
- if (ret == -1) {
- fprintf (stderr, "ERROR: symlink of logfile failed\n");
- goto out;
- }
+ }
- GF_FREE (cmd_args->log_file);
- cmd_args->log_file = gf_strdup (tmp_logfile);
+ GF_FREE(cmd_args->log_file);
+ cmd_args->log_file = gf_strdup(tmp_logfile);
- GF_FREE (tmp_logfile_dyn);
- }
+ GF_FREE(tmp_logfile_dyn);
}
+ }
+
+ /*
+ This option was made obsolete but parsing it for backward
+ compatibility with third party applications
+ */
+ if (cmd_args->max_connect_attempts) {
+ gf_smsg("glusterfs", GF_LOG_WARNING, 0, glusterfsd_msg_33, NULL);
+ }
#ifdef GF_DARWIN_HOST_OS
- if (cmd_args->mount_point)
- cmd_args->mac_compat = GF_OPTION_DEFERRED;
+ if (cmd_args->mount_point)
+ cmd_args->mac_compat = GF_OPTION_DEFERRED;
#endif
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-glusterfs_pidfile_setup (glusterfs_ctx_t *ctx)
+glusterfs_pidfile_setup(glusterfs_ctx_t *ctx)
{
- cmd_args_t *cmd_args = NULL;
- int ret = -1;
- FILE *pidfp = NULL;
-
- cmd_args = &ctx->cmd_args;
+ cmd_args_t *cmd_args = NULL;
+ int ret = -1;
+ FILE *pidfp = NULL;
- if (!cmd_args->pid_file)
- return 0;
-
- pidfp = fopen (cmd_args->pid_file, "a+");
- if (!pidfp) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "pidfile %s error (%s)",
- cmd_args->pid_file, strerror (errno));
- goto out;
- }
+ cmd_args = &ctx->cmd_args;
- ret = lockf (fileno (pidfp), F_TLOCK, 0);
- if (ret) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "pidfile %s lock error (%s)",
- cmd_args->pid_file, strerror (errno));
- goto out;
- }
+ if (!cmd_args->pid_file)
+ return 0;
- gf_log ("glusterfsd", GF_LOG_TRACE,
- "pidfile %s lock acquired",
- cmd_args->pid_file);
+ pidfp = fopen(cmd_args->pid_file, "a+");
+ if (!pidfp) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_17,
+ "pidfile=%s", cmd_args->pid_file, NULL);
+ goto out;
+ }
- ret = lockf (fileno (pidfp), F_ULOCK, 0);
- if (ret) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "pidfile %s unlock error (%s)",
- cmd_args->pid_file, strerror (errno));
- goto out;
- }
+ ctx->pidfp = pidfp;
- ctx->pidfp = pidfp;
-
- ret = 0;
+ ret = 0;
out:
- if (ret && pidfp)
- fclose (pidfp);
- return ret;
+ return ret;
}
-
int
-glusterfs_pidfile_cleanup (glusterfs_ctx_t *ctx)
+glusterfs_pidfile_cleanup(glusterfs_ctx_t *ctx)
{
- cmd_args_t *cmd_args = NULL;
+ cmd_args_t *cmd_args = NULL;
- cmd_args = &ctx->cmd_args;
+ cmd_args = &ctx->cmd_args;
- if (!ctx->pidfp)
- return 0;
+ if (!ctx->pidfp)
+ return 0;
- gf_log ("glusterfsd", GF_LOG_TRACE,
- "pidfile %s cleanup",
- cmd_args->pid_file);
+ gf_msg_trace("glusterfsd", 0, "pidfile %s cleanup", cmd_args->pid_file);
- if (ctx->cmd_args.pid_file) {
- unlink (ctx->cmd_args.pid_file);
- ctx->cmd_args.pid_file = NULL;
- }
+ if (ctx->cmd_args.pid_file) {
+ GF_FREE(ctx->cmd_args.pid_file);
+ ctx->cmd_args.pid_file = NULL;
+ }
- lockf (fileno (ctx->pidfp), F_ULOCK, 0);
- fclose (ctx->pidfp);
- ctx->pidfp = NULL;
+ lockf(fileno(ctx->pidfp), F_ULOCK, 0);
+ fclose(ctx->pidfp);
+ ctx->pidfp = NULL;
- return 0;
+ return 0;
}
int
-glusterfs_pidfile_update (glusterfs_ctx_t *ctx)
+glusterfs_pidfile_update(glusterfs_ctx_t *ctx, pid_t pid)
{
- cmd_args_t *cmd_args = NULL;
- int ret = 0;
- FILE *pidfp = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+ FILE *pidfp = NULL;
- cmd_args = &ctx->cmd_args;
+ cmd_args = &ctx->cmd_args;
- pidfp = ctx->pidfp;
- if (!pidfp)
- return 0;
+ pidfp = ctx->pidfp;
+ if (!pidfp)
+ return 0;
- ret = lockf (fileno (pidfp), F_TLOCK, 0);
- if (ret) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "pidfile %s lock failed",
- cmd_args->pid_file);
- return ret;
- }
+ ret = lockf(fileno(pidfp), F_TLOCK, 0);
+ if (ret) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_18,
+ "pidfile=%s", cmd_args->pid_file, NULL);
+ return ret;
+ }
- ret = ftruncate (fileno (pidfp), 0);
- if (ret) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "pidfile %s truncation failed",
- cmd_args->pid_file);
- return ret;
- }
+ ret = sys_ftruncate(fileno(pidfp), 0);
+ if (ret) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_20,
+ "pidfile=%s", cmd_args->pid_file, NULL);
+ return ret;
+ }
- ret = fprintf (pidfp, "%d\n", getpid ());
- if (ret <= 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "pidfile %s write failed",
- cmd_args->pid_file);
- return ret;
- }
+ ret = fprintf(pidfp, "%d\n", pid);
+ if (ret <= 0) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_21,
+ "pidfile=%s", cmd_args->pid_file, NULL);
+ return ret;
+ }
- ret = fflush (pidfp);
- if (ret) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "pidfile %s write failed",
- cmd_args->pid_file);
- return ret;
- }
+ ret = fflush(pidfp);
+ if (ret) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, errno, glusterfsd_msg_21,
+ "pidfile=%s", cmd_args->pid_file, NULL);
+ return ret;
+ }
- gf_log ("glusterfsd", GF_LOG_DEBUG,
- "pidfile %s updated with pid %d",
- cmd_args->pid_file, getpid ());
+ gf_msg_debug("glusterfsd", 0, "pidfile %s updated with pid %d",
+ cmd_args->pid_file, pid);
- return 0;
+ return 0;
}
-
void *
-glusterfs_sigwaiter (void *arg)
+glusterfs_sigwaiter(void *arg)
{
- sigset_t set;
- int ret = 0;
- int sig = 0;
+ sigset_t set;
+ int ret = 0;
+ int sig = 0;
+ char *file = NULL;
+
+ sigemptyset(&set);
+ sigaddset(&set, SIGINT); /* cleanup_and_exit */
+ sigaddset(&set, SIGTERM); /* cleanup_and_exit */
+ sigaddset(&set, SIGHUP); /* reincarnate */
+ sigaddset(&set, SIGUSR1); /* gf_proc_dump_info */
+ sigaddset(&set, SIGUSR2);
+
+ for (;;) {
+ ret = sigwait(&set, &sig);
+ if (ret)
+ continue;
+
+ switch (sig) {
+ case SIGINT:
+ case SIGTERM:
+ cleanup_and_exit(sig);
+ break;
+ case SIGHUP:
+ reincarnate(sig);
+ break;
+ case SIGUSR1:
+ gf_proc_dump_info(sig, glusterfsd_ctx);
+ break;
+ case SIGUSR2:
+ file = gf_monitor_metrics(glusterfsd_ctx);
+ /* Nothing needed to be done here */
+ GF_FREE(file);
- sigemptyset (&set);
- sigaddset (&set, SIGINT); /* cleanup_and_exit */
- sigaddset (&set, SIGTERM); /* cleanup_and_exit */
- sigaddset (&set, SIGHUP); /* reincarnate */
- sigaddset (&set, SIGUSR1); /* gf_proc_dump_info */
- sigaddset (&set, SIGUSR2); /* gf_latency_toggle */
+ break;
+ default:
- for (;;) {
- ret = sigwait (&set, &sig);
- if (ret)
- continue;
-
-
- switch (sig) {
- case SIGINT:
- case SIGTERM:
- cleanup_and_exit (sig);
- break;
- case SIGHUP:
- reincarnate (sig);
- break;
- case SIGUSR1:
- gf_proc_dump_info (sig);
- break;
- case SIGUSR2:
- gf_latency_toggle (sig);
- break;
- default:
-
- break;
- }
+ break;
}
+ }
- return NULL;
+ return NULL;
}
+void
+glusterfsd_print_trace(int signum)
+{
+ gf_print_trace(signum, glusterfsd_ctx);
+}
int
-glusterfs_signals_setup (glusterfs_ctx_t *ctx)
+glusterfs_signals_setup(glusterfs_ctx_t *ctx)
{
- sigset_t set;
- int ret = 0;
-
- sigemptyset (&set);
-
- /* common setting for all threads */
- signal (SIGSEGV, gf_print_trace);
- signal (SIGABRT, gf_print_trace);
- signal (SIGILL, gf_print_trace);
- signal (SIGTRAP, gf_print_trace);
- signal (SIGFPE, gf_print_trace);
- signal (SIGBUS, gf_print_trace);
- signal (SIGINT, cleanup_and_exit);
- signal (SIGPIPE, SIG_IGN);
-
- /* block these signals from non-sigwaiter threads */
- sigaddset (&set, SIGTERM); /* cleanup_and_exit */
- sigaddset (&set, SIGHUP); /* reincarnate */
- sigaddset (&set, SIGUSR1); /* gf_proc_dump_info */
- sigaddset (&set, SIGUSR2); /* gf_latency_toggle */
-
- ret = pthread_sigmask (SIG_BLOCK, &set, NULL);
- if (ret) {
- gf_log ("", GF_LOG_WARNING,
- "failed to execute pthread_signmask %s",
- strerror (errno));
- return ret;
- }
-
- ret = pthread_create (&ctx->sigwaiter, NULL, glusterfs_sigwaiter,
- (void *) &set);
- if (ret) {
- /*
- TODO:
- fallback to signals getting handled by other threads.
- setup the signal handlers
- */
- gf_log ("", GF_LOG_WARNING,
- "failed to create pthread %s",
- strerror (errno));
- return ret;
- }
-
+ sigset_t set;
+ int ret = 0;
+
+ sigemptyset(&set);
+
+ /* common setting for all threads */
+ signal(SIGSEGV, glusterfsd_print_trace);
+ signal(SIGABRT, glusterfsd_print_trace);
+ signal(SIGILL, glusterfsd_print_trace);
+ signal(SIGTRAP, glusterfsd_print_trace);
+ signal(SIGFPE, glusterfsd_print_trace);
+ signal(SIGBUS, glusterfsd_print_trace);
+ signal(SIGINT, cleanup_and_exit);
+ signal(SIGPIPE, SIG_IGN);
+
+ /* block these signals from non-sigwaiter threads */
+ sigaddset(&set, SIGTERM); /* cleanup_and_exit */
+ sigaddset(&set, SIGHUP); /* reincarnate */
+ sigaddset(&set, SIGUSR1); /* gf_proc_dump_info */
+ sigaddset(&set, SIGUSR2);
+
+ /* Signals needed for asynchronous framework. */
+ sigaddset(&set, GF_ASYNC_SIGQUEUE);
+ sigaddset(&set, GF_ASYNC_SIGCTRL);
+
+ ret = pthread_sigmask(SIG_BLOCK, &set, NULL);
+ if (ret) {
+ gf_smsg("glusterfsd", GF_LOG_WARNING, errno, glusterfsd_msg_22, NULL);
return ret;
-}
+ }
+
+ ret = gf_thread_create(&ctx->sigwaiter, NULL, glusterfs_sigwaiter,
+ (void *)&set, "sigwait");
+ if (ret) {
+ /*
+ TODO:
+ fallback to signals getting handled by other threads.
+ setup the signal handlers
+ */
+ gf_smsg("glusterfsd", GF_LOG_WARNING, errno, glusterfsd_msg_23, NULL);
+ return ret;
+ }
+ return ret;
+}
int
-daemonize (glusterfs_ctx_t *ctx)
+daemonize(glusterfs_ctx_t *ctx)
{
- int ret = -1;
- cmd_args_t *cmd_args = NULL;
- int cstatus = 0;
-
- cmd_args = &ctx->cmd_args;
-
- ret = glusterfs_pidfile_setup (ctx);
- if (ret)
- goto out;
-
- if (cmd_args->no_daemon_mode)
- goto postfork;
-
- if (cmd_args->debug_mode)
- goto postfork;
-
- ret = os_daemon_return (0, 0);
- switch (ret) {
+ int ret = -1;
+ cmd_args_t *cmd_args = NULL;
+ int cstatus = 0;
+ int err = 1;
+ int child_pid = 0;
+
+ cmd_args = &ctx->cmd_args;
+
+ ret = glusterfs_pidfile_setup(ctx);
+ if (ret)
+ goto out;
+
+ if (cmd_args->no_daemon_mode) {
+ goto postfork;
+ }
+
+ if (cmd_args->debug_mode)
+ goto postfork;
+
+ ret = pipe(ctx->daemon_pipe);
+ if (ret) {
+ /* If pipe() fails, retain daemon_pipe[] = {-1, -1}
+ and parent will just not wait for child status
+ */
+ ctx->daemon_pipe[0] = -1;
+ ctx->daemon_pipe[1] = -1;
+ }
+
+ ret = os_daemon_return(0, 0);
+ switch (ret) {
case -1:
- gf_log ("daemonize", GF_LOG_ERROR,
- "Daemonization failed: %s", strerror(errno));
- goto out;
+ if (ctx->daemon_pipe[0] != -1) {
+ sys_close(ctx->daemon_pipe[0]);
+ sys_close(ctx->daemon_pipe[1]);
+ }
+
+ gf_smsg("daemonize", GF_LOG_ERROR, errno, glusterfsd_msg_24, NULL);
+ goto out;
case 0:
- break;
+ /* child */
+ /* close read */
+ sys_close(ctx->daemon_pipe[0]);
+ break;
default:
- if (ctx->mnt_pid > 0) {
- ret = waitpid (ctx->mnt_pid, &cstatus, 0);
- if (!(ret == ctx->mnt_pid && cstatus == 0)) {
- gf_log ("daemonize", GF_LOG_ERROR,
- "mount failed");
- exit (1);
- }
+ /* parent */
+ /* close write */
+ child_pid = ret;
+ sys_close(ctx->daemon_pipe[1]);
+
+ if (ctx->mnt_pid > 0) {
+ ret = waitpid(ctx->mnt_pid, &cstatus, 0);
+ if (!(ret == ctx->mnt_pid)) {
+ if (WIFEXITED(cstatus)) {
+ err = WEXITSTATUS(cstatus);
+ } else {
+ err = cstatus;
+ }
+ gf_smsg("daemonize", GF_LOG_ERROR, 0, glusterfsd_msg_25,
+ NULL);
+ exit(err);
}
- _exit (0);
- }
+ }
+ sys_read(ctx->daemon_pipe[0], (void *)&err, sizeof(err));
+ /* NOTE: Only the least significant 8 bits i.e (err & 255)
+ will be available to parent process on calling exit() */
+ if (err)
+ _exit(abs(err));
+
+ /* Update pid in parent only for glusterd process */
+ if (ctx->process_mode == GF_GLUSTERD_PROCESS) {
+ ret = glusterfs_pidfile_update(ctx, child_pid);
+ if (ret)
+ exit(1);
+ }
+ _exit(0);
+ }
postfork:
- ret = glusterfs_pidfile_update (ctx);
+ /* Update pid in child either process_mode is not belong to glusterd
+ or process is spawned in no daemon mode
+ */
+ if ((ctx->process_mode != GF_GLUSTERD_PROCESS) ||
+ (cmd_args->no_daemon_mode)) {
+ ret = glusterfs_pidfile_update(ctx, getpid());
if (ret)
- goto out;
+ goto out;
+ }
+ gf_log("glusterfs", GF_LOG_INFO, "Pid of current running process is %d",
+ getpid());
+ ret = gf_log_inject_timer_event(ctx);
- glusterfs_signals_setup (ctx);
+ glusterfs_signals_setup(ctx);
out:
- return ret;
+ return ret;
}
+#ifdef GF_LINUX_HOST_OS
+static int
+set_oom_score_adj(glusterfs_ctx_t *ctx)
+{
+ int ret = -1;
+ cmd_args_t *cmd_args = NULL;
+ int fd = -1;
+ size_t oom_score_len = 0;
+ struct oom_api_info *api = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args->oom_score_adj)
+ goto success;
+
+ api = get_oom_api_info();
+ if (!api)
+ goto out;
+
+ fd = open(api->oom_api_file, O_WRONLY);
+ if (fd < 0)
+ goto out;
+
+ oom_score_len = strlen(cmd_args->oom_score_adj);
+ if (sys_write(fd, cmd_args->oom_score_adj, oom_score_len) !=
+ oom_score_len) {
+ sys_close(fd);
+ goto out;
+ }
+
+ if (sys_close(fd) < 0)
+ goto out;
+
+success:
+ ret = 0;
+
+out:
+ return ret;
+}
+#endif
int
-glusterfs_process_volfp (glusterfs_ctx_t *ctx, FILE *fp)
+glusterfs_process_volfp(glusterfs_ctx_t *ctx, FILE *fp)
{
- glusterfs_graph_t *graph = NULL;
- int ret = -1;
- xlator_t *trav = NULL;
+ glusterfs_graph_t *graph = NULL;
+ int ret = -1;
+ xlator_t *trav = NULL;
- graph = glusterfs_graph_construct (fp);
- if (!graph) {
- gf_log ("", GF_LOG_ERROR, "failed to construct the graph");
- goto out;
- }
+ if (!ctx)
+ return -1;
- for (trav = graph->first; trav; trav = trav->next) {
- if (strcmp (trav->type, "mount/fuse") == 0) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "fuse xlator cannot be specified "
- "in volume file");
- goto out;
- }
- }
+ graph = glusterfs_graph_construct(fp);
+ if (!graph) {
+ gf_smsg("", GF_LOG_ERROR, 0, glusterfsd_msg_26, NULL);
+ goto out;
+ }
- ret = glusterfs_graph_prepare (graph, ctx);
- if (ret) {
- glusterfs_graph_destroy (graph);
- goto out;
+ for (trav = graph->first; trav; trav = trav->next) {
+ if (strcmp(trav->type, "mount/fuse") == 0) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_27, NULL);
+ goto out;
}
+ }
- ret = glusterfs_graph_activate (graph, ctx);
+ xlator_t *xl = graph->first;
+ if (xl && (strcmp(xl->type, "protocol/server") == 0)) {
+ (void)copy_opts_to_child(xl, FIRST_CHILD(xl), "*auth*");
+ }
- if (ret) {
- glusterfs_graph_destroy (graph);
- goto out;
- }
+ ret = glusterfs_graph_prepare(graph, ctx, ctx->cmd_args.volume_name);
+ if (ret) {
+ goto out;
+ }
- gf_log_volume_file (fp);
+ ret = glusterfs_graph_activate(graph, ctx);
- ret = 0;
+ if (ret) {
+ goto out;
+ }
+
+ gf_log_dump_graph(fp, graph);
+
+ ret = 0;
out:
- if (fp)
- fclose (fp);
+ if (fp)
+ fclose(fp);
+
+ if (ret) {
+ /* TODO This code makes to generic for all graphs
+ client as well as servers.For now it destroys
+ graph only for server-side xlators not for client-side
+ xlators, before destroying a graph call xlator fini for
+ xlators those call xlator_init to avoid leak
+ */
+ if (graph) {
+ xl = graph->first;
+ if ((ctx->active != graph) &&
+ (xl && !strcmp(xl->type, "protocol/server"))) {
+ /* Take dict ref for every graph xlator to avoid dict leak
+ at the time of graph destroying
+ */
+ glusterfs_graph_fini(graph);
+ glusterfs_graph_destroy(graph);
+ }
+ }
- if (ret && !ctx->active) {
- /* there is some error in setting up the first graph itself */
- cleanup_and_exit (0);
+ /* there is some error in setting up the first graph itself */
+ if (!ctx->active) {
+ emancipate(ctx, ret);
+ cleanup_and_exit(ret);
}
+ }
- return ret;
+ return ret;
}
-
int
-glusterfs_volumes_init (glusterfs_ctx_t *ctx)
+glusterfs_volumes_init(glusterfs_ctx_t *ctx)
{
- FILE *fp = NULL;
- cmd_args_t *cmd_args = NULL;
- int ret = 0;
+ FILE *fp = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
- cmd_args = &ctx->cmd_args;
+ cmd_args = &ctx->cmd_args;
- if (cmd_args->sock_file) {
- ret = glusterfs_listener_init (ctx);
- if (ret)
- goto out;
- }
+ if (cmd_args->sock_file) {
+ ret = glusterfs_listener_init(ctx);
+ if (ret)
+ goto out;
+ }
- if (cmd_args->volfile_server) {
- ret = glusterfs_mgmt_init (ctx);
- goto out;
- }
+ if (cmd_args->volfile_server) {
+ ret = glusterfs_mgmt_init(ctx);
+ /* return, do not emancipate() yet */
+ return ret;
+ }
- fp = get_volfp (ctx);
+ fp = get_volfp(ctx);
- if (!fp) {
- gf_log ("glusterfsd", GF_LOG_ERROR,
- "Cannot reach volume specification file");
- ret = -1;
- goto out;
- }
+ if (!fp) {
+ gf_smsg("glusterfsd", GF_LOG_ERROR, 0, glusterfsd_msg_28, NULL);
+ ret = -1;
+ goto out;
+ }
- ret = glusterfs_process_volfp (ctx, fp);
- if (ret)
- goto out;
+ ret = glusterfs_process_volfp(ctx, fp);
+ if (ret)
+ goto out;
out:
- return ret;
+ emancipate(ctx, ret);
+ return ret;
}
+/* This is the only legal global pointer */
+glusterfs_ctx_t *glusterfsd_ctx;
int
-main (int argc, char *argv[])
+main(int argc, char *argv[])
{
- glusterfs_ctx_t *ctx = NULL;
- int ret = -1;
-
- ret = glusterfs_globals_init ();
- if (ret)
- return ret;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = -1;
+ char cmdlinestr[PATH_MAX] = {
+ 0,
+ };
+ cmd_args_t *cmd = NULL;
+
+ gf_check_and_set_mem_acct(argc, argv);
+
+ ctx = glusterfs_ctx_new();
+ if (!ctx) {
+ gf_smsg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_29, NULL);
+ return ENOMEM;
+ }
+ glusterfsd_ctx = ctx;
+
+ ret = glusterfs_globals_init(ctx);
+ if (ret)
+ return ret;
- ctx = glusterfs_ctx_get ();
- if (!ctx) {
- gf_log ("glusterfs", GF_LOG_CRITICAL,
- "ERROR: glusterfs context not initialized");
- return ENOMEM;
- }
-#ifndef DEBUG
- /* Enable memory accounting on the fly based on argument */
- gf_check_and_set_mem_acct (argc, argv, ctx);
-#endif
- ret = glusterfs_ctx_defaults_init (ctx);
- if (ret)
+ THIS->ctx = ctx;
+
+ ret = glusterfs_ctx_defaults_init(ctx);
+ if (ret)
+ goto out;
+
+ ret = parse_cmdline(argc, argv, ctx);
+ if (ret)
+ goto out;
+ cmd = &ctx->cmd_args;
+
+ if (cmd->print_xlatordir) {
+ /* XLATORDIR passed through a -D flag to GCC */
+ printf("%s\n", XLATORDIR);
+ goto out;
+ }
+
+ if (cmd->print_statedumpdir) {
+ printf("%s\n", DEFAULT_VAR_RUN_DIRECTORY);
+ goto out;
+ }
+
+ if (cmd->print_logdir) {
+ printf("%s\n", DEFAULT_LOG_FILE_DIRECTORY);
+ goto out;
+ }
+
+ if (cmd->print_libexecdir) {
+ printf("%s\n", LIBEXECDIR);
+ goto out;
+ }
+
+ if (cmd->print_netgroups) {
+ /* If this option is set we want to print & verify the file,
+ * set the return value (exit code in this case) and exit.
+ */
+ ret = print_netgroups_file(cmd->print_netgroups);
+ goto out;
+ }
+
+ if (cmd->print_exports) {
+ /* If this option is set we want to print & verify the file,
+ * set the return value (exit code in this case)
+ * and exit.
+ */
+ ret = print_exports_file(cmd->print_exports);
+ goto out;
+ }
+
+ ret = logging_init(ctx, argv[0]);
+ if (ret)
+ goto out;
+
+ /* set brick_mux mode only for server process */
+ if ((ctx->process_mode != GF_SERVER_PROCESS) && cmd->brick_mux) {
+ gf_smsg("glusterfs", GF_LOG_CRITICAL, 0, glusterfsd_msg_43, NULL);
+ goto out;
+ }
+
+ /* log the version of glusterfs running here along with the actual
+ command line options. */
+ {
+ int i = 0;
+ int pos = 0;
+ int len = snprintf(cmdlinestr, sizeof(cmdlinestr), "%s", argv[0]);
+ for (i = 1; (i < argc) && (len > 0); i++) {
+ pos += len;
+ len = snprintf(cmdlinestr + pos, sizeof(cmdlinestr) - pos, " %s",
+ argv[i]);
+ if ((len <= 0) || (len >= (sizeof(cmdlinestr) - pos))) {
+ gf_smsg("glusterfs", GF_LOG_ERROR, 0, glusterfsd_msg_029, NULL);
+ ret = -1;
goto out;
+ }
+ }
+ gf_smsg(argv[0], GF_LOG_INFO, 0, glusterfsd_msg_30, "arg=%s", argv[0],
+ "version=%s", PACKAGE_VERSION, "cmdlinestr=%s", cmdlinestr,
+ NULL);
- ret = parse_cmdline (argc, argv, ctx);
- if (ret)
- goto out;
+ ctx->cmdlinestr = gf_strdup(cmdlinestr);
+ }
- ret = logging_init (ctx);
- if (ret)
- goto out;
+ gf_proc_dump_init();
- /* log the version of glusterfs running here */
- gf_log (argv[0], GF_LOG_INFO,
- "Started running %s version %s",
- argv[0], PACKAGE_VERSION);
+ ret = create_fuse_mount(ctx);
+ if (ret)
+ goto out;
- gf_proc_dump_init();
+ ret = daemonize(ctx);
+ if (ret)
+ goto out;
- ret = create_fuse_mount (ctx);
- if (ret)
- goto out;
+ /*
+ * If we do this before daemonize, the pool-sweeper thread dies with
+ * the parent, but we want to do it as soon as possible after that in
+ * case something else depends on pool allocations.
+ */
+ mem_pools_init();
- ret = daemonize (ctx);
- if (ret)
- goto out;
+ ret = gf_async_init(ctx);
+ if (ret < 0) {
+ goto out;
+ }
- ctx->env = syncenv_new (0);
- if (!ctx->env) {
- gf_log ("", GF_LOG_ERROR,
- "Could not create new sync-environment");
- goto out;
- }
+#ifdef GF_LINUX_HOST_OS
+ ret = set_oom_score_adj(ctx);
+ if (ret)
+ goto out;
+#endif
- ret = glusterfs_volumes_init (ctx);
- if (ret)
- goto out;
+ ctx->env = syncenv_new(0, 0, 0);
+ if (!ctx->env) {
+ gf_smsg("", GF_LOG_ERROR, 0, glusterfsd_msg_31, NULL);
+ goto out;
+ }
- ret = event_dispatch (ctx->event_pool);
+ /* do this _after_ daemonize() */
+ if (!glusterfs_ctx_tw_get(ctx)) {
+ ret = -1;
+ goto out;
+ }
-out:
-// glusterfs_ctx_destroy (ctx);
+ ret = glusterfs_volumes_init(ctx);
+ if (ret)
+ goto out;
- return ret;
+ ret = gf_event_dispatch(ctx->event_pool);
+
+out:
+ // glusterfs_ctx_destroy (ctx);
+ gf_async_fini();
+ return ret;
}
diff --git a/glusterfsd/src/glusterfsd.h b/glusterfsd/src/glusterfsd.h
index 8ec121954bb..4e1413caa70 100644
--- a/glusterfsd/src/glusterfsd.h
+++ b/glusterfsd/src/glusterfsd.h
@@ -1,110 +1,142 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __GLUSTERFSD_H__
#define __GLUSTERFSD_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
#include "rpcsvc.h"
#include "glusterd1-xdr.h"
-#define DEFAULT_GLUSTERD_VOLFILE CONFDIR "/glusterd.vol"
-#define DEFAULT_CLIENT_VOLFILE CONFDIR "/glusterfs.vol"
-#define DEFAULT_SERVER_VOLFILE CONFDIR "/glusterfsd.vol"
-#define DEFAULT_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
-#define DEFAULT_LOG_LEVEL GF_LOG_INFO
+#define DEFAULT_GLUSTERD_VOLFILE CONFDIR "/glusterd.vol"
+#define DEFAULT_CLIENT_VOLFILE CONFDIR "/glusterfs.vol"
+#define DEFAULT_SERVER_VOLFILE CONFDIR "/glusterfsd.vol"
-#define DEFAULT_EVENT_POOL_SIZE 16384
+#define DEFAULT_EVENT_POOL_SIZE 16384
-#define ARGP_LOG_LEVEL_NONE_OPTION "NONE"
-#define ARGP_LOG_LEVEL_TRACE_OPTION "TRACE"
-#define ARGP_LOG_LEVEL_CRITICAL_OPTION "CRITICAL"
-#define ARGP_LOG_LEVEL_ERROR_OPTION "ERROR"
-#define ARGP_LOG_LEVEL_WARNING_OPTION "WARNING"
-#define ARGP_LOG_LEVEL_INFO_OPTION "INFO"
-#define ARGP_LOG_LEVEL_DEBUG_OPTION "DEBUG"
+#define ARGP_LOG_LEVEL_NONE_OPTION "NONE"
+#define ARGP_LOG_LEVEL_TRACE_OPTION "TRACE"
+#define ARGP_LOG_LEVEL_CRITICAL_OPTION "CRITICAL"
+#define ARGP_LOG_LEVEL_ERROR_OPTION "ERROR"
+#define ARGP_LOG_LEVEL_WARNING_OPTION "WARNING"
+#define ARGP_LOG_LEVEL_INFO_OPTION "INFO"
+#define ARGP_LOG_LEVEL_DEBUG_OPTION "DEBUG"
-#define ENABLE_NO_DAEMON_MODE 1
-#define ENABLE_DEBUG_MODE 1
+#define ENABLE_NO_DAEMON_MODE 1
+#define ENABLE_DEBUG_MODE 1
-#define GF_MEMPOOL_COUNT_OF_DICT_T 4096
+#define GF_MEMPOOL_COUNT_OF_DICT_T 4096
/* Considering 4 key/value pairs in a dictionary on an average */
-#define GF_MEMPOOL_COUNT_OF_DATA_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
-#define GF_MEMPOOL_COUNT_OF_DATA_PAIR_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
+#define GF_MEMPOOL_COUNT_OF_DATA_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
+#define GF_MEMPOOL_COUNT_OF_DATA_PAIR_T (GF_MEMPOOL_COUNT_OF_DICT_T * 4)
+
+#define GF_MEMPOOL_COUNT_OF_LRU_BUF_T 256
enum argp_option_keys {
- ARGP_VOLFILE_SERVER_KEY = 's',
- ARGP_VOLUME_FILE_KEY = 'f',
- ARGP_LOG_LEVEL_KEY = 'L',
- ARGP_LOG_FILE_KEY = 'l',
- ARGP_VOLFILE_SERVER_PORT_KEY = 131,
- ARGP_VOLFILE_SERVER_TRANSPORT_KEY = 132,
- ARGP_PID_FILE_KEY = 'p',
- ARGP_SOCK_FILE_KEY = 'S',
- ARGP_NO_DAEMON_KEY = 'N',
- ARGP_RUN_ID_KEY = 'r',
- ARGP_DEBUG_KEY = 133,
- ARGP_ENTRY_TIMEOUT_KEY = 135,
- ARGP_ATTRIBUTE_TIMEOUT_KEY = 136,
- ARGP_VOLUME_NAME_KEY = 137,
- ARGP_XLATOR_OPTION_KEY = 138,
- ARGP_DIRECT_IO_MODE_KEY = 139,
+ ARGP_VOLFILE_SERVER_KEY = 's',
+ ARGP_VOLUME_FILE_KEY = 'f',
+ ARGP_LOG_LEVEL_KEY = 'L',
+ ARGP_LOG_FILE_KEY = 'l',
+ ARGP_VOLFILE_SERVER_PORT_KEY = 131,
+ ARGP_VOLFILE_SERVER_TRANSPORT_KEY = 132,
+ ARGP_PID_FILE_KEY = 'p',
+ ARGP_SOCK_FILE_KEY = 'S',
+ ARGP_NO_DAEMON_KEY = 'N',
+ ARGP_RUN_ID_KEY = 'r',
+ ARGP_PRINT_NETGROUPS = 'n',
+ ARGP_PRINT_EXPORTS = 'e',
+ ARGP_DEBUG_KEY = 133,
+ ARGP_NEGATIVE_TIMEOUT_KEY = 134,
+ ARGP_ENTRY_TIMEOUT_KEY = 135,
+ ARGP_ATTRIBUTE_TIMEOUT_KEY = 136,
+ ARGP_VOLUME_NAME_KEY = 137,
+ ARGP_XLATOR_OPTION_KEY = 138,
+ ARGP_DIRECT_IO_MODE_KEY = 139,
#ifdef GF_DARWIN_HOST_OS
- ARGP_NON_LOCAL_KEY = 140,
+ ARGP_NON_LOCAL_KEY = 140,
#endif /* DARWIN */
- ARGP_VOLFILE_ID_KEY = 143,
- ARGP_VOLFILE_CHECK_KEY = 144,
- ARGP_VOLFILE_MAX_FETCH_ATTEMPTS = 145,
- ARGP_LOG_SERVER_KEY = 146,
- ARGP_LOG_SERVER_PORT_KEY = 147,
- ARGP_READ_ONLY_KEY = 148,
- ARGP_MAC_COMPAT_KEY = 149,
- ARGP_DUMP_FUSE_KEY = 150,
- ARGP_BRICK_NAME_KEY = 151,
- ARGP_BRICK_PORT_KEY = 152,
- ARGP_CLIENT_PID_KEY = 153,
- ARGP_ACL_KEY = 154,
- ARGP_WORM_KEY = 155,
- ARGP_USER_MAP_ROOT_KEY = 156,
- ARGP_MEM_ACCOUNTING_KEY = 157,
- ARGP_SELINUX_KEY = 158,
+ ARGP_VOLFILE_ID_KEY = 143,
+ ARGP_VOLFILE_CHECK_KEY = 144,
+ ARGP_VOLFILE_MAX_FETCH_ATTEMPTS = 145,
+ ARGP_LOG_SERVER_KEY = 146,
+ ARGP_LOG_SERVER_PORT_KEY = 147,
+ ARGP_READ_ONLY_KEY = 148,
+ ARGP_MAC_COMPAT_KEY = 149,
+ ARGP_DUMP_FUSE_KEY = 150,
+ ARGP_BRICK_NAME_KEY = 151,
+ ARGP_BRICK_PORT_KEY = 152,
+ ARGP_CLIENT_PID_KEY = 153,
+ ARGP_ACL_KEY = 154,
+ ARGP_WORM_KEY = 155,
+ ARGP_USER_MAP_ROOT_KEY = 156,
+ ARGP_MEM_ACCOUNTING_KEY = 157,
+ ARGP_SELINUX_KEY = 158,
+ ARGP_FOPEN_KEEP_CACHE_KEY = 159,
+ ARGP_GID_TIMEOUT_KEY = 160,
+ ARGP_FUSE_BACKGROUND_QLEN_KEY = 161,
+ ARGP_FUSE_CONGESTION_THRESHOLD_KEY = 162,
+ ARGP_INODE32_KEY = 163,
+ ARGP_FUSE_MOUNTOPTS_KEY = 164,
+ ARGP_FUSE_USE_READDIRP_KEY = 165,
+ ARGP_AUX_GFID_MOUNT_KEY = 166,
+ ARGP_FUSE_NO_ROOT_SQUASH_KEY = 167,
+ ARGP_LOGGER = 168,
+ ARGP_LOG_FORMAT = 169,
+ ARGP_LOG_BUF_SIZE = 170,
+ ARGP_LOG_FLUSH_TIMEOUT = 171,
+ ARGP_SECURE_MGMT_KEY = 172,
+ ARGP_GLOBAL_TIMER_WHEEL = 173,
+ ARGP_RESOLVE_GIDS_KEY = 174,
+ ARGP_CAPABILITY_KEY = 175,
+#ifdef GF_LINUX_HOST_OS
+ ARGP_OOM_SCORE_ADJ_KEY = 176,
+#endif
+ ARGP_LOCALTIME_LOGGING_KEY = 177,
+ ARGP_SUBDIR_MOUNT_KEY = 178,
+ ARGP_PROCESS_NAME_KEY = 179,
+ ARGP_FUSE_EVENT_HISTORY_KEY = 180,
+ ARGP_THIN_CLIENT_KEY = 181,
+ ARGP_READER_THREAD_COUNT_KEY = 182,
+ ARGP_PRINT_XLATORDIR_KEY = 183,
+ ARGP_PRINT_STATEDUMPDIR_KEY = 184,
+ ARGP_PRINT_LOGDIR_KEY = 185,
+ ARGP_KERNEL_WRITEBACK_CACHE_KEY = 186,
+ ARGP_ATTR_TIMES_GRANULARITY_KEY = 187,
+ ARGP_PRINT_LIBEXECDIR_KEY = 188,
+ ARGP_FUSE_FLUSH_HANDLE_INTERRUPT_KEY = 189,
+ ARGP_FUSE_LRU_LIMIT_KEY = 190,
+ ARGP_FUSE_AUTO_INVAL_KEY = 191,
+ ARGP_GLOBAL_THREADING_KEY = 192,
+ ARGP_BRICK_MUX_KEY = 193,
+ ARGP_FUSE_DEV_EPERM_RATELIMIT_NS_KEY = 194,
+ ARGP_FUSE_INVALIDATE_LIMIT_KEY = 195,
};
-struct _gfd_vol_top_priv_t {
- rpcsvc_request_t *req;
- gd1_mgmt_brick_op_req xlator_req;
- uint32_t blk_count;
- uint32_t blk_size;
- double throughput;
- double time;
- int32_t ret;
+struct _gfd_vol_top_priv {
+ rpcsvc_request_t *req;
+ gd1_mgmt_brick_op_req xlator_req;
+ uint32_t blk_count;
+ uint32_t blk_size;
+ double throughput;
+ double time;
+ int32_t ret;
};
-typedef struct _gfd_vol_top_priv_t gfd_vol_top_priv_t;
+typedef struct _gfd_vol_top_priv gfd_vol_top_priv_t;
+
+int
+glusterfs_mgmt_pmap_signin(glusterfs_ctx_t *ctx);
+int
+glusterfs_volfile_fetch(glusterfs_ctx_t *ctx);
+void
+cleanup_and_exit(int signum);
-int glusterfs_mgmt_pmap_signout (glusterfs_ctx_t *ctx);
-int glusterfs_mgmt_pmap_signin (glusterfs_ctx_t *ctx);
-int glusterfs_volfile_fetch (glusterfs_ctx_t *ctx);
-void cleanup_and_exit (int signum);
+void
+xlator_mem_cleanup(xlator_t *this);
-void *glusterfs_volume_top_read_perf (void *args);
-void *glusterfs_volume_top_write_perf (void *args);
+extern glusterfs_ctx_t *glusterfsd_ctx;
#endif /* __GLUSTERFSD_H__ */
diff --git a/scheduler/nufa/Makefile.am b/heal/Makefile.am
index d471a3f9243..a985f42a877 100644
--- a/scheduler/nufa/Makefile.am
+++ b/heal/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/heal/src/Makefile.am b/heal/src/Makefile.am
new file mode 100644
index 00000000000..aa18d3eff88
--- /dev/null
+++ b/heal/src/Makefile.am
@@ -0,0 +1,29 @@
+if WITH_SERVER
+scriptdir = $(GLUSTERFS_LIBEXECDIR)
+script_PROGRAMS = glfsheal
+endif
+
+glfsheal_SOURCES = glfs-heal.c
+
+glfsheal_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(GF_LDADD)\
+ $(RLLIBS) $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/api/src/libgfapi.la \
+ $(XML_LIBS) $(GFAPI_LIBS) $(UUID_LIBS)
+
+glfsheal_LDFLAGS = $(GF_LDFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/xlators/lib/src\
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src\
+ -I$(top_srcdir)/rpc/xdr/src\
+ -I$(top_builddir)/rpc/xdr/src\
+ -I$(top_srcdir)/api/src\
+ -DDATADIR=\"$(localstatedir)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS) $(XML_CFLAGS)
+
+CLEANFILES =
+
+$(top_builddir)/libglusterfs/src/libglusterfs.la:
+ $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
diff --git a/heal/src/glfs-heal.c b/heal/src/glfs-heal.c
new file mode 100644
index 00000000000..bf4b47f8760
--- /dev/null
+++ b/heal/src/glfs-heal.c
@@ -0,0 +1,1793 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include "glfs.h"
+#include "glfs-internal.h"
+#include "protocol-common.h"
+#include <glusterfs/syscall.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
+#include <string.h>
+#include <glusterfs/glusterfs.h>
+#include <libgen.h>
+
+#if (HAVE_LIB_XML)
+#include <libxml/encoding.h>
+#include <libxml/xmlwriter.h>
+
+xmlTextWriterPtr glfsh_writer;
+xmlDocPtr glfsh_doc = NULL;
+#endif
+
+#define XML_RET_CHECK_AND_GOTO(ret, label) \
+ do { \
+ if (ret < 0) { \
+ ret = -1; \
+ goto label; \
+ } else \
+ ret = 0; \
+ } while (0)
+
+#define MODE_XML (1 << 0)
+#define MODE_NO_LOG (1 << 1)
+
+typedef struct num_entries {
+ uint64_t num_entries;
+ uint64_t pending_entries;
+ uint64_t spb_entries;
+ uint64_t possibly_healing_entries;
+} num_entries_t;
+
+typedef int (*print_status)(dict_t *, char *, uuid_t, num_entries_t *,
+ gf_boolean_t flag);
+
+int
+glfsh_heal_splitbrain_file(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ char *file, dict_t *xattr_req);
+
+typedef struct glfs_info {
+ int (*init)(void);
+ int (*print_brick_from_xl)(xlator_t *xl, loc_t *rootloc);
+ int (*print_heal_op_status)(int ret, uint64_t num_entries, char *fmt_str);
+ int (*print_heal_op_summary)(int ret, num_entries_t *num_entries);
+ int (*print_heal_status)(char *path, uuid_t gfid, char *status);
+ int (*print_spb_status)(char *path, uuid_t gfid, char *status);
+ int (*end)(int op_ret, char *op_errstr);
+} glfsh_info_t;
+
+glfsh_info_t *glfsh_output = NULL;
+int32_t is_xml;
+
+#define DEFAULT_HEAL_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
+#define USAGE_STR \
+ "Usage: %s <VOLNAME> [bigger-file <FILE> | " \
+ "latest-mtime <FILE> | " \
+ "source-brick <HOSTNAME:BRICKNAME> [<FILE>] | " \
+ "split-brain-info | info-summary] [glusterd-sock <FILE>" \
+ "]\n"
+
+typedef enum {
+ GLFSH_MODE_CONTINUE_ON_ERROR = 1,
+ GLFSH_MODE_EXIT_ON_FIRST_FAILURE,
+} glfsh_fail_mode_t;
+
+int
+glfsh_init()
+{
+ return 0;
+}
+
+int
+glfsh_end_op_granular_entry_heal(int op_ret, char *op_errstr)
+{
+ /* If error string is available, give it higher precedence.*/
+
+ if (op_errstr) {
+ printf("%s\n", op_errstr);
+ } else if (op_ret < 0) {
+ if (op_ret == -EAGAIN)
+ printf(
+ "One or more entries need heal. Please execute "
+ "the command again after there are no entries "
+ "to be healed\n");
+ else if (op_ret == -ENOTCONN)
+ printf(
+ "One or more bricks could be down. Please "
+ "execute the command again after bringing all "
+ "bricks online and finishing any pending "
+ "heals\n");
+ else
+ printf(
+ "Command failed - %s. Please check the logs for"
+ " more details\n",
+ strerror(-op_ret));
+ }
+ return 0;
+}
+
+int
+glfsh_end(int op_ret, char *op_errstr)
+{
+ if (op_errstr)
+ printf("%s\n", op_errstr);
+ return 0;
+}
+
+int
+glfsh_print_hr_spb_status(char *path, uuid_t gfid, char *status)
+{
+ printf("%s\n", path);
+ fflush(stdout);
+ return 0;
+}
+
+int
+glfsh_no_print_hr_status(char *path, uuid_t gfid, char *status)
+{
+ return 0;
+}
+
+int
+glfsh_print_hr_heal_status(char *path, uuid_t gfid, char *status)
+{
+ printf("%s%s\n", path, status);
+ fflush(stdout);
+ return 0;
+}
+
+#if (HAVE_LIB_XML)
+
+int
+glfsh_xml_init()
+{
+ int ret = -1;
+ glfsh_writer = xmlNewTextWriterDoc(&glfsh_doc, 0);
+ if (glfsh_writer == NULL) {
+ return -1;
+ }
+
+ ret = xmlTextWriterStartDocument(glfsh_writer, "1.0", "UTF-8", "yes");
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+
+ /* <cliOutput> */
+ ret = xmlTextWriterStartElement(glfsh_writer, (xmlChar *)"cliOutput");
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+
+ /* <healInfo> */
+ ret = xmlTextWriterStartElement(glfsh_writer, (xmlChar *)"healInfo");
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+ /* <bricks> */
+ ret = xmlTextWriterStartElement(glfsh_writer, (xmlChar *)"bricks");
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+ xmlTextWriterFlush(glfsh_writer);
+xml_out:
+ return ret;
+}
+
+int
+glfsh_xml_end(int op_ret, char *op_errstr)
+{
+ int ret = -1;
+ int op_errno = 0;
+ gf_boolean_t alloc = _gf_false;
+
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ if (op_errstr == NULL) {
+ op_errstr = gf_strdup(strerror(op_errno));
+ alloc = _gf_true;
+ }
+ } else {
+ op_errstr = NULL;
+ }
+
+ /* </bricks> */
+ ret = xmlTextWriterEndElement(glfsh_writer);
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+
+ /* </healInfo> */
+ ret = xmlTextWriterEndElement(glfsh_writer);
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+
+ ret = xmlTextWriterWriteFormatElement(glfsh_writer, (xmlChar *)"opRet",
+ "%d", op_ret);
+
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+
+ ret = xmlTextWriterWriteFormatElement(glfsh_writer, (xmlChar *)"opErrno",
+ "%d", op_errno);
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+
+ if (op_errstr)
+ ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"opErrstr", "%s", op_errstr);
+ else
+ ret = xmlTextWriterWriteFormatElement(glfsh_writer,
+ (xmlChar *)"opErrstr", "%s", "");
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+ ret = xmlTextWriterEndDocument(glfsh_writer);
+ XML_RET_CHECK_AND_GOTO(ret, xml_out);
+
+ /* Dump xml document to stdout and pretty format it */
+ xmlSaveFormatFileEnc("-", glfsh_doc, "UTF-8", 1);
+
+ xmlFreeTextWriter(glfsh_writer);
+ xmlFreeDoc(glfsh_doc);
+xml_out:
+ if (alloc)
+ GF_FREE(op_errstr);
+ return ret;
+}
+
+int
+glfsh_print_xml_heal_op_status(int ret, uint64_t num_entries, char *fmt_str)
+{
+ int x_ret = 0;
+ if (ret < 0 && num_entries == 0) {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"status", "%s", strerror(-ret));
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ if (fmt_str) {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"numberOfEntries", "-");
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ }
+ goto out;
+ } else if (ret == 0) {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"status", "%s", "Connected");
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ }
+
+ if (ret < 0) {
+ if (fmt_str) {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"status",
+ "Failed to process entries completely. "
+ "(%s)%s %" PRIu64 "",
+ strerror(-ret), fmt_str, num_entries);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ }
+ } else {
+ if (fmt_str) {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"numberOfEntries", "%" PRIu64 "",
+ num_entries);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ }
+ }
+out:
+ if (x_ret >= 0) {
+ x_ret = xmlTextWriterEndElement(glfsh_writer);
+ if (x_ret >= 0) {
+ xmlTextWriterFlush(glfsh_writer);
+ x_ret = 0;
+ } else {
+ x_ret = -1;
+ }
+ }
+ return x_ret;
+}
+
+int
+glfsh_print_xml_heal_op_summary(int ret, num_entries_t *num_entries)
+{
+ int x_ret = 0;
+
+ if (ret < 0 && num_entries == 0) {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"status", "%s", strerror(-ret));
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"totalNumberOfEntries", "-");
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"numberOfEntriesInHealPending", "-");
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"numberOfEntriesInSplitBrain", "-");
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"numberOfEntriesPossiblyHealing", "-");
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ goto out;
+ } else if (ret == 0) {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"status", "%s", "Connected");
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ }
+
+ if (ret < 0) {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"status",
+ "Failed to process entries"
+ " completely. "
+ "(%s)totalNumberOfEntries%" PRIu64 "",
+ strerror(-ret), num_entries->num_entries);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ } else {
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"totalNumberOfEntries", "%" PRIu64 "",
+ num_entries->num_entries);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"numberOfEntriesInHealPending",
+ "%" PRIu64 "", num_entries->pending_entries);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"numberOfEntriesInSplitBrain",
+ "%" PRIu64 "", num_entries->spb_entries);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"numberOfEntriesPossiblyHealing",
+ "%" PRIu64 "", num_entries->possibly_healing_entries);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ }
+out:
+ if (x_ret >= 0) {
+ x_ret = xmlTextWriterEndElement(glfsh_writer);
+ }
+ return x_ret;
+}
+
+int
+glfsh_print_xml_file_status(char *path, uuid_t gfid, char *status)
+{
+ int x_ret = 0;
+
+ x_ret = xmlTextWriterStartElement(glfsh_writer, (xmlChar *)"file");
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatAttribute(glfsh_writer, (xmlChar *)"gfid",
+ "%s", uuid_utoa(gfid));
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterWriteFormatString(glfsh_writer, "%s", path);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ x_ret = xmlTextWriterEndElement(glfsh_writer);
+ XML_RET_CHECK_AND_GOTO(x_ret, out);
+ xmlTextWriterFlush(glfsh_writer);
+out:
+ return x_ret;
+}
+
+int
+glfsh_print_xml_brick_from_xl(xlator_t *xl, loc_t *rootloc)
+{
+ char *remote_host = NULL;
+ char *remote_subvol = NULL;
+ char *uuid = NULL;
+ int ret = 0;
+ int x_ret = 0;
+
+ ret = dict_get_str(xl->options, "remote-host", &remote_host);
+ if (ret < 0)
+ goto print;
+
+ ret = dict_get_str(xl->options, "remote-subvolume", &remote_subvol);
+ if (ret < 0)
+ goto print;
+ ret = syncop_getxattr(xl, rootloc, &xl->options, GF_XATTR_NODE_UUID_KEY,
+ NULL, NULL);
+ if (ret < 0)
+ goto print;
+
+ ret = dict_get_str(xl->options, GF_XATTR_NODE_UUID_KEY, &uuid);
+ if (ret < 0)
+ goto print;
+print:
+
+ x_ret = xmlTextWriterStartElement(glfsh_writer, (xmlChar *)"brick");
+ XML_RET_CHECK_AND_GOTO(x_ret, xml_out);
+ x_ret = xmlTextWriterWriteFormatAttribute(
+ glfsh_writer, (xmlChar *)"hostUuid", "%s", uuid ? uuid : "-");
+ XML_RET_CHECK_AND_GOTO(x_ret, xml_out);
+
+ x_ret = xmlTextWriterWriteFormatElement(
+ glfsh_writer, (xmlChar *)"name", "%s:%s",
+ remote_host ? remote_host : "-", remote_subvol ? remote_subvol : "-");
+ XML_RET_CHECK_AND_GOTO(x_ret, xml_out);
+ xmlTextWriterFlush(glfsh_writer);
+xml_out:
+ return ret;
+}
+#endif
+
+int
+glfsh_link_inode_update_loc(loc_t *loc, struct iatt *iattr)
+{
+ inode_t *link_inode = NULL;
+ int ret = -1;
+
+ link_inode = inode_link(loc->inode, NULL, NULL, iattr);
+ if (link_inode == NULL)
+ goto out;
+
+ inode_unref(loc->inode);
+ loc->inode = link_inode;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glfsh_no_print_hr_heal_op_status(int ret, uint64_t num_entries, char *fmt_str)
+{
+ return 0;
+}
+
+int
+glfsh_print_hr_heal_op_summary(int ret, num_entries_t *num_entries)
+{
+ if (ret < 0 && num_entries->num_entries == 0) {
+ printf("Status: %s\n", strerror(-ret));
+ printf("Total Number of entries: -\n");
+ printf("Number of entries in heal pending: -\n");
+ printf("Number of entries in split-brain: -\n");
+ printf("Number of entries possibly healing: -\n");
+ goto out;
+ } else if (ret == 0) {
+ printf("Status: Connected\n");
+ }
+
+ if (ret < 0) {
+ printf(
+ "Status: Failed to process entries completely. "
+ "(%s)\nTotal Number of entries: %" PRIu64 "\n",
+ strerror(-ret), num_entries->num_entries);
+ } else {
+ printf("Total Number of entries: %" PRIu64 "\n",
+ num_entries->num_entries);
+ printf("Number of entries in heal pending: %" PRIu64 "\n",
+ num_entries->pending_entries);
+ printf("Number of entries in split-brain: %" PRIu64 "\n",
+ num_entries->spb_entries);
+ printf("Number of entries possibly healing: %" PRIu64 "\n",
+ num_entries->possibly_healing_entries);
+ }
+out:
+ printf("\n");
+ fflush(stdout);
+ return 0;
+}
+
+int
+glfsh_print_hr_heal_op_status(int ret, uint64_t num_entries, char *fmt_str)
+{
+ if (ret < 0 && num_entries == 0) {
+ printf("Status: %s\n", strerror(-ret));
+ if (fmt_str)
+ printf("%s -\n", fmt_str);
+ goto out;
+ } else if (ret == 0) {
+ printf("Status: Connected\n");
+ }
+
+ if (ret < 0) {
+ if (fmt_str)
+ printf(
+ "Status: Failed to process entries completely. "
+ "(%s)\n%s %" PRIu64 "\n",
+ strerror(-ret), fmt_str, num_entries);
+ } else {
+ if (fmt_str)
+ printf("%s %" PRIu64 "\n", fmt_str, num_entries);
+ }
+out:
+ printf("\n");
+ return 0;
+}
+
+int
+glfsh_print_info_summary(int ret, num_entries_t *num_entries)
+{
+ return glfsh_output->print_heal_op_summary(ret, num_entries);
+}
+
+int
+glfsh_print_heal_op_status(int ret, uint64_t num_entries,
+ gf_xl_afr_op_t heal_op)
+{
+ char *fmt_str = NULL;
+
+ if (heal_op == GF_SHD_OP_INDEX_SUMMARY)
+ fmt_str = "Number of entries:";
+ else if (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES)
+ fmt_str = "Number of entries in split-brain:";
+ else if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK)
+ fmt_str = "Number of healed entries:";
+
+ return glfsh_output->print_heal_op_status(ret, num_entries, fmt_str);
+}
+
+int
+glfsh_get_index_dir_loc(loc_t *rootloc, xlator_t *xl, loc_t *dirloc,
+ int32_t *op_errno, char *vgfid)
+{
+ void *index_gfid = NULL;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ struct iatt iattr = {0};
+ struct iatt parent = {0};
+
+ ret = syncop_getxattr(xl, rootloc, &xattr, vgfid, NULL, NULL);
+ if (ret < 0) {
+ *op_errno = -ret;
+ goto out;
+ }
+
+ ret = dict_get_ptr(xattr, vgfid, &index_gfid);
+ if (ret < 0) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_uuid_copy(dirloc->gfid, index_gfid);
+ dirloc->path = "";
+ dirloc->inode = inode_new(rootloc->inode->table);
+ ret = syncop_lookup(xl, dirloc, &iattr, &parent, NULL, NULL);
+ dirloc->path = NULL;
+ if (ret < 0) {
+ *op_errno = -ret;
+ goto out;
+ }
+ ret = glfsh_link_inode_update_loc(dirloc, &iattr);
+ if (ret)
+ goto out;
+
+ ret = glfs_loc_touchup(dirloc);
+ if (ret < 0) {
+ *op_errno = errno;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (xattr)
+ dict_unref(xattr);
+ return ret;
+}
+
+static xlator_t *
+_get_ancestor(xlator_t *xl, gf_xl_afr_op_t heal_op)
+{
+ static char *replica_xl[] = {"cluster/replicate", NULL};
+ static char *heal_xls[] = {"cluster/replicate", "cluster/disperse", NULL};
+ char **ancestors = NULL;
+
+ if (heal_op == GF_SHD_OP_INDEX_SUMMARY || heal_op == GF_SHD_OP_HEAL_SUMMARY)
+ ancestors = heal_xls;
+ else
+ ancestors = replica_xl;
+
+ if (!xl || !xl->parents)
+ return NULL;
+
+ while (xl->parents) {
+ xl = xl->parents->xlator;
+ if (!xl)
+ break;
+ if (gf_get_index_by_elem(ancestors, xl->type) != -1)
+ return xl;
+ }
+
+ return NULL;
+}
+
+int
+glfsh_index_purge(xlator_t *subvol, inode_t *inode, char *name)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = 0;
+
+ loc.parent = inode_ref(inode);
+ loc.name = name;
+
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
+glfsh_print_summary_status(dict_t *dict, char *path, uuid_t gfid,
+ num_entries_t *num_entries, gf_boolean_t flag)
+{
+ int ret = 0;
+ char *value = NULL;
+
+ ret = dict_get_str(dict, "heal-info", &value);
+ if (ret)
+ goto out;
+
+ if ((!strcmp(value, "heal")) || (!strcmp(value, "heal-pending"))) {
+ (num_entries->pending_entries)++;
+ } else if ((!strcmp(value, "split-brain")) ||
+ (!strcmp(value, "split-brain-pending"))) {
+ (num_entries->spb_entries)++;
+ } else if ((!strcmp(value, "possibly-healing-pending")) ||
+ (!strcmp(value, "possibly-healing"))) {
+ (num_entries->possibly_healing_entries)++;
+ } else {
+ goto out;
+ }
+ (num_entries->num_entries)++;
+out:
+ return ret;
+}
+
+int
+glfsh_print_spb_status(dict_t *dict, char *path, uuid_t gfid,
+ num_entries_t *num_entries, gf_boolean_t flag)
+{
+ int ret = 0;
+ gf_boolean_t pending = _gf_false;
+ gf_boolean_t split_b = _gf_false;
+ char *value = NULL;
+ char gfid_str[64] = {0};
+
+ ret = dict_get_str(dict, "heal-info", &value);
+ if (ret)
+ return 0;
+
+ if (!strcmp(value, "split-brain")) {
+ split_b = _gf_true;
+ } else if (!strcmp(value, "split-brain-pending")) {
+ split_b = _gf_true;
+ pending = _gf_true;
+ }
+ /* Consider the entry only iff :
+ * 1) The dir being processed is not indices/dirty, indicated by
+ * flag == _gf_false
+ * 2) The dir being processed is indices/dirty but the entry also
+ * exists in indices/xattrop dir and has already been processed.
+ */
+ if (split_b) {
+ if (!flag || (flag && !pending)) {
+ (num_entries->num_entries)++;
+ glfsh_output->print_spb_status(
+ path ? path : uuid_utoa_r(gfid, gfid_str), gfid, NULL);
+ }
+ }
+ return 0;
+}
+
+int
+glfsh_print_heal_status(dict_t *dict, char *path, uuid_t gfid,
+ num_entries_t *num_entries, gf_boolean_t ignore_dirty)
+{
+ int ret = 0;
+ gf_boolean_t pending = _gf_false;
+ char *status = NULL;
+ char *value = NULL;
+ char gfid_str[64] = {0};
+
+ ret = dict_get_str(dict, "heal-info", &value);
+ if (ret || (!strcmp(value, "no-heal")))
+ return 0;
+
+ if (!strcmp(value, "heal")) {
+ ret = gf_asprintf(&status, " ");
+ if (ret < 0)
+ goto out;
+ } else if (!strcmp(value, "possibly-healing")) {
+ ret = gf_asprintf(&status, " - Possibly undergoing heal");
+ if (ret < 0)
+ goto out;
+ } else if (!strcmp(value, "split-brain")) {
+ ret = gf_asprintf(&status, " - Is in split-brain");
+ if (ret < 0)
+ goto out;
+ } else if (!strcmp(value, "heal-pending")) {
+ pending = _gf_true;
+ ret = gf_asprintf(&status, " ");
+ if (ret < 0)
+ goto out;
+ } else if (!strcmp(value, "split-brain-pending")) {
+ pending = _gf_true;
+ ret = gf_asprintf(&status, " - Is in split-brain");
+ if (ret < 0)
+ goto out;
+ } else if (!strcmp(value, "possibly-healing-pending")) {
+ pending = _gf_true;
+ ret = gf_asprintf(&status, " - Possibly undergoing heal");
+ if (ret < 0)
+ goto out;
+ }
+out:
+ /* If ignore_dirty is set, it means indices/dirty directory is
+ * being processed. Ignore the entry if it also exists in
+ * indices/xattrop.
+ * Boolean pending is set to true if the entry also exists in
+ * indices/xattrop directory.
+ */
+ if (ignore_dirty) {
+ if (pending) {
+ GF_FREE(status);
+ status = NULL;
+ return 0;
+ }
+ }
+ if (ret == -1)
+ status = NULL;
+
+ (num_entries->num_entries)++;
+ glfsh_output->print_heal_status(path ? path : uuid_utoa_r(gfid, gfid_str),
+ gfid, status ? status : "");
+
+ GF_FREE(status);
+ return 0;
+}
+
+int
+glfsh_heal_status_boolean(dict_t *dict, char *path, uuid_t gfid,
+ num_entries_t *num_entries, gf_boolean_t ignore_dirty)
+{
+ int ret = 0;
+ char *value = NULL;
+
+ ret = dict_get_str(dict, "heal-info", &value);
+ if ((!ret) && (!strcmp(value, "no-heal")))
+ return 0;
+ else
+ return -1;
+}
+
+static void
+glfsh_heal_entries(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ gf_dirent_t *entries, uint64_t *offset,
+ num_entries_t *num_entries, dict_t *xattr_req)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ int ret = 0;
+ char file[64] = {0};
+
+ list_for_each_entry_safe(entry, tmp, &entries->list, list)
+ {
+ *offset = entry->d_off;
+ if ((strcmp(entry->d_name, ".") == 0) ||
+ (strcmp(entry->d_name, "..") == 0))
+ continue;
+ snprintf(file, sizeof(file), "gfid:%s", entry->d_name);
+ ret = glfsh_heal_splitbrain_file(fs, top_subvol, rootloc, file,
+ xattr_req);
+ if (ret)
+ continue;
+ (num_entries->num_entries)++;
+ }
+}
+
+static int
+glfsh_process_entries(xlator_t *xl, fd_t *fd, gf_dirent_t *entries,
+ uint64_t *offset, num_entries_t *num_entries,
+ print_status glfsh_print_status,
+ gf_boolean_t ignore_dirty, glfsh_fail_mode_t mode)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ int ret = 0;
+ int print_status = 0;
+ char *path = NULL;
+ uuid_t gfid = {0};
+ xlator_t *this = NULL;
+ dict_t *dict = NULL;
+ loc_t loc = {
+ 0,
+ };
+ this = THIS;
+
+ list_for_each_entry_safe(entry, tmp, &entries->list, list)
+ {
+ *offset = entry->d_off;
+ if ((strcmp(entry->d_name, ".") == 0) ||
+ (strcmp(entry->d_name, "..") == 0))
+ continue;
+
+ if (dict) {
+ dict_unref(dict);
+ dict = NULL;
+ }
+ gf_uuid_clear(gfid);
+ GF_FREE(path);
+ path = NULL;
+
+ gf_uuid_parse(entry->d_name, gfid);
+ gf_uuid_copy(loc.gfid, gfid);
+ ret = syncop_getxattr(this, &loc, &dict, GF_HEAL_INFO, NULL, NULL);
+ if (ret) {
+ if ((mode != GLFSH_MODE_CONTINUE_ON_ERROR) && (ret == -ENOTCONN))
+ goto out;
+ else
+ continue;
+ }
+
+ ret = syncop_gfid_to_path(this->itable, xl, gfid, &path);
+
+ if (ret == -ENOENT || ret == -ESTALE) {
+ glfsh_index_purge(xl, fd->inode, entry->d_name);
+ ret = 0;
+ continue;
+ }
+ if (dict) {
+ print_status = glfsh_print_status(dict, path, gfid, num_entries,
+ ignore_dirty);
+ if ((print_status) && (mode != GLFSH_MODE_CONTINUE_ON_ERROR)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+ }
+ }
+ ret = 0;
+out:
+ GF_FREE(path);
+ if (dict) {
+ dict_unref(dict);
+ dict = NULL;
+ }
+ return ret;
+}
+
+static int
+glfsh_crawl_directory(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ xlator_t *readdir_xl, fd_t *fd, loc_t *loc,
+ dict_t *xattr_req, num_entries_t *num_entries,
+ gf_boolean_t ignore)
+{
+ int ret = 0;
+ int heal_op = -1;
+ uint64_t offset = 0;
+ gf_dirent_t entries;
+ gf_boolean_t free_entries = _gf_false;
+ glfsh_fail_mode_t mode = GLFSH_MODE_CONTINUE_ON_ERROR;
+
+ INIT_LIST_HEAD(&entries.list);
+ ret = dict_get_int32(xattr_req, "heal-op", &heal_op);
+ if (ret)
+ return ret;
+
+ if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE)
+ mode = GLFSH_MODE_EXIT_ON_FIRST_FAILURE;
+
+ while (1) {
+ ret = syncop_readdir(readdir_xl, fd, 131072, offset, &entries, NULL,
+ NULL);
+ if (ret <= 0)
+ break;
+ ret = 0;
+ free_entries = _gf_true;
+
+ if (list_empty(&entries.list))
+ goto out;
+
+ if (heal_op == GF_SHD_OP_INDEX_SUMMARY) {
+ ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset,
+ num_entries, glfsh_print_heal_status,
+ ignore, mode);
+ if (ret < 0)
+ goto out;
+ } else if (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES) {
+ ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset,
+ num_entries, glfsh_print_spb_status,
+ ignore, mode);
+ if (ret < 0)
+ goto out;
+ } else if (heal_op == GF_SHD_OP_HEAL_SUMMARY) {
+ ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset,
+ num_entries, glfsh_print_summary_status,
+ ignore, mode);
+ if (ret < 0)
+ goto out;
+ } else if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) {
+ glfsh_heal_entries(fs, top_subvol, rootloc, &entries, &offset,
+ num_entries, xattr_req);
+ } else if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) {
+ ret = glfsh_process_entries(readdir_xl, fd, &entries, &offset,
+ num_entries, glfsh_heal_status_boolean,
+ ignore, mode);
+ if (ret < 0)
+ goto out;
+ }
+ gf_dirent_free(&entries);
+ free_entries = _gf_false;
+ }
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free(&entries);
+ return ret;
+}
+
+static int
+glfsh_no_print_brick_from_xl(xlator_t *xl, loc_t *rootloc)
+{
+ return 0;
+}
+
+static int
+glfsh_print_brick_from_xl(xlator_t *xl, loc_t *rootloc)
+{
+ char *remote_host = NULL;
+ char *remote_subvol = NULL;
+ int ret = 0;
+
+ ret = dict_get_str(xl->options, "remote-host", &remote_host);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(xl->options, "remote-subvolume", &remote_subvol);
+ if (ret < 0)
+ goto out;
+out:
+ if (ret < 0)
+ printf("Brick - Not able to get brick information\n");
+ else
+ printf("Brick %s:%s\n", remote_host, remote_subvol);
+ return ret;
+}
+
+int
+glfsh_print_pending_heals_type(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ xlator_t *xl, gf_xl_afr_op_t heal_op,
+ dict_t *xattr_req, char *vgfid,
+ num_entries_t *num_entries)
+{
+ int ret = 0;
+ loc_t dirloc = {0};
+ fd_t *fd = NULL;
+ int32_t op_errno = 0;
+ gf_boolean_t ignore = _gf_false;
+
+ if (!strcmp(vgfid, GF_XATTROP_DIRTY_GFID))
+ ignore = _gf_true;
+
+ ret = glfsh_get_index_dir_loc(rootloc, xl, &dirloc, &op_errno, vgfid);
+ if (ret < 0) {
+ if (op_errno == ESTALE || op_errno == ENOENT || op_errno == ENOTSUP)
+ ret = 0;
+ else
+ ret = -op_errno;
+ goto out;
+ }
+
+ ret = syncop_dirfd(xl, &dirloc, &fd, GF_CLIENT_PID_GLFS_HEAL);
+ if (ret)
+ goto out;
+
+ ret = glfsh_crawl_directory(fs, top_subvol, rootloc, xl, fd, &dirloc,
+ xattr_req, num_entries, ignore);
+ if (fd)
+ fd_unref(fd);
+out:
+ loc_wipe(&dirloc);
+ return ret;
+}
+
+int
+glfsh_print_pending_heals(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ xlator_t *xl, gf_xl_afr_op_t heal_op,
+ gf_boolean_t is_parent_replicate)
+{
+ int ret = 0;
+ num_entries_t num_entries = {
+ 0,
+ };
+ num_entries_t total = {
+ 0,
+ };
+
+ dict_t *xattr_req = NULL;
+
+ xattr_req = dict_new();
+ if (!xattr_req)
+ goto out;
+ ret = dict_set_int32(xattr_req, "heal-op", heal_op);
+ if (ret)
+ goto out;
+
+ if ((!is_parent_replicate) &&
+ ((heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE) ||
+ (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE))) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = glfsh_output->print_brick_from_xl(xl, rootloc);
+ if (ret < 0)
+ goto out;
+
+ ret = glfsh_print_pending_heals_type(fs, top_subvol, rootloc, xl, heal_op,
+ xattr_req, GF_XATTROP_INDEX_GFID,
+ &num_entries);
+
+ if (ret < 0 && heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE)
+ goto out;
+
+ total.num_entries += num_entries.num_entries;
+ total.pending_entries += num_entries.pending_entries;
+ total.spb_entries += num_entries.spb_entries;
+ total.possibly_healing_entries += num_entries.possibly_healing_entries;
+ num_entries.num_entries = 0;
+ num_entries.pending_entries = 0;
+ num_entries.spb_entries = 0;
+ num_entries.possibly_healing_entries = 0;
+ if (ret == -ENOTCONN)
+ goto out;
+
+ if (is_parent_replicate) {
+ ret = glfsh_print_pending_heals_type(
+ fs, top_subvol, rootloc, xl, heal_op, xattr_req,
+ GF_XATTROP_DIRTY_GFID, &num_entries);
+ total.num_entries += num_entries.num_entries;
+ total.pending_entries += num_entries.pending_entries;
+ total.spb_entries += num_entries.spb_entries;
+ total.possibly_healing_entries += num_entries.possibly_healing_entries;
+ }
+out:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ if (heal_op == GF_SHD_OP_HEAL_SUMMARY) {
+ glfsh_print_info_summary(ret, &total);
+ } else {
+ glfsh_print_heal_op_status(ret, total.num_entries, heal_op);
+ }
+ return ret;
+}
+
+static int
+glfsh_set_heal_options(glfs_t *fs, gf_xl_afr_op_t heal_op)
+{
+ int ret = 0;
+
+ ret = glfs_set_xlator_option(fs, "*-replicate-*",
+ "background-self-heal-count", "0");
+ if (ret)
+ goto out;
+
+ ret = glfs_set_xlator_option(fs, "*-replicate-*", "halo-enabled", "off");
+ if (ret)
+ goto out;
+
+ if ((heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE) &&
+ (heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) &&
+ (heal_op != GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME))
+ return 0;
+ ret = glfs_set_xlator_option(fs, "*-replicate-*", "data-self-heal", "on");
+ if (ret)
+ goto out;
+
+ ret = glfs_set_xlator_option(fs, "*-replicate-*", "metadata-self-heal",
+ "on");
+ if (ret)
+ goto out;
+
+ ret = glfs_set_xlator_option(fs, "*-replicate-*", "entry-self-heal", "on");
+
+out:
+ return ret;
+}
+
+static int
+glfsh_validate_volume(xlator_t *xl, gf_xl_afr_op_t heal_op)
+{
+ xlator_t *heal_xl = NULL;
+ int ret = -1;
+
+ while (xl->next)
+ xl = xl->next;
+
+ while (xl) {
+ if (strcmp(xl->type, "protocol/client") == 0) {
+ heal_xl = _get_ancestor(xl, heal_op);
+ if (heal_xl) {
+ ret = 0;
+ break;
+ }
+ }
+
+ xl = xl->prev;
+ }
+
+ return ret;
+}
+
+static xlator_t *
+_brick_path_to_client_xlator(xlator_t *top_subvol, char *hostname,
+ char *brickpath)
+{
+ int ret = 0;
+ xlator_t *xl = NULL;
+ char *remote_host = NULL;
+ char *remote_subvol = NULL;
+
+ xl = top_subvol;
+
+ while (xl->next)
+ xl = xl->next;
+
+ while (xl) {
+ if (!strcmp(xl->type, "protocol/client")) {
+ ret = dict_get_str(xl->options, "remote-host", &remote_host);
+ if (ret < 0)
+ goto out;
+ ret = dict_get_str(xl->options, "remote-subvolume", &remote_subvol);
+ if (ret < 0)
+ goto out;
+ if (!strcmp(hostname, remote_host) &&
+ !strcmp(brickpath, remote_subvol))
+ return xl;
+ }
+ xl = xl->prev;
+ }
+
+out:
+ return NULL;
+}
+
+int
+glfsh_gather_heal_info(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ gf_xl_afr_op_t heal_op)
+{
+ int ret = 0;
+ xlator_t *xl = NULL;
+ xlator_t *heal_xl = NULL;
+ xlator_t *old_THIS = NULL;
+
+ xl = top_subvol;
+ while (xl->next)
+ xl = xl->next;
+ while (xl) {
+ if (strcmp(xl->type, "protocol/client") == 0 &&
+ !strstr(xl->name, "-ta-")) {
+ heal_xl = _get_ancestor(xl, heal_op);
+ if (heal_xl) {
+ old_THIS = THIS;
+ THIS = heal_xl;
+ ret = glfsh_print_pending_heals(
+ fs, top_subvol, rootloc, xl, heal_op,
+ !strcmp(heal_xl->type, "cluster/replicate"));
+ THIS = old_THIS;
+
+ if ((ret < 0) &&
+ (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE))
+ goto out;
+ }
+ }
+
+ xl = xl->prev;
+ }
+
+out:
+ if (heal_op != GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE)
+ ret = 0;
+
+ return ret;
+}
+
+int
+_validate_directory(dict_t *xattr_req, char *file)
+{
+ int heal_op = -1;
+ int ret = 0;
+
+ ret = dict_get_int32(xattr_req, "heal-op", &heal_op);
+ if (ret)
+ return ret;
+
+ if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE) {
+ printf("'bigger-file' not a valid option for directories.\n");
+ ret = -1;
+ } else if (heal_op == GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK) {
+ printf(
+ "'source-brick' option used on a directory (%s). "
+ "Performing conservative merge.\n",
+ file);
+ }
+
+ return ret;
+}
+
+int
+glfsh_heal_splitbrain_file(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ char *file, dict_t *xattr_req)
+{
+ int ret = -1;
+ int reval = 0;
+ loc_t loc = {
+ 0,
+ };
+ char *path = NULL;
+ char *path1 = NULL;
+ char *path2 = NULL;
+ char *filename = NULL;
+ char *filename1 = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ xlator_t *xl = top_subvol;
+ dict_t *xattr_rsp = NULL;
+ char *sh_fail_msg = NULL;
+ char *gfid_heal_msg = NULL;
+ int32_t op_errno = 0;
+ gf_boolean_t flag = _gf_false;
+
+ if (!strncmp(file, "gfid:", 5)) {
+ filename = gf_strdup(file);
+ if (!filename) {
+ printf("Error allocating memory to filename\n");
+ goto out;
+ }
+ path = strtok(filename, ":");
+ path = strtok(NULL, ";");
+ gf_uuid_parse(path, loc.gfid);
+ loc.path = gf_strdup(uuid_utoa(loc.gfid));
+ if (!loc.path) {
+ printf("Error allocating memory to path\n");
+ goto out;
+ }
+ loc.inode = inode_new(rootloc->inode->table);
+ if (!loc.inode) {
+ printf("Error getting inode\n");
+ goto out;
+ }
+ ret = syncop_lookup(xl, &loc, &iatt, 0, xattr_req, &xattr_rsp);
+ if (ret) {
+ op_errno = -ret;
+ printf("Lookup failed on %s:%s.\n", file, strerror(op_errno));
+ goto out;
+ }
+ } else {
+ if (file[0] != '/') {
+ printf(
+ "<FILE> must be absolute path w.r.t. the "
+ "volume, starting with '/'\n");
+ ret = -1;
+ goto out;
+ }
+ path1 = gf_strdup(file);
+ if (!path1) {
+ printf("Error allocating memory to path\n");
+ ret = -1;
+ goto out;
+ }
+ path2 = gf_strdup(file);
+ if (!path2) {
+ printf("Error allocating memory to path\n");
+ ret = -1;
+ goto out;
+ }
+ path = dirname(path1);
+ filename1 = basename(path2);
+ retry1:
+ ret = glfs_resolve(fs, xl, path, &loc, &iatt, reval);
+ ESTALE_RETRY(ret, errno, reval, &loc, retry1);
+ if (ret) {
+ printf("Lookup failed on %s:%s\n", path, strerror(errno));
+ goto out;
+ }
+ GF_FREE((char *)loc.path);
+ loc.path = gf_strdup(file);
+ if (!loc.path) {
+ printf("Error allocating memory for path\n");
+ ret = -1;
+ goto out;
+ }
+ loc.parent = inode_unref(loc.parent);
+ loc.parent = inode_ref(loc.inode);
+ loc.inode = inode_unref(loc.inode);
+ loc.inode = inode_new(rootloc->inode->table);
+ if (!loc.inode) {
+ printf("Error getting inode\n");
+ ret = -1;
+ goto out;
+ }
+ loc.name = filename1;
+ gf_uuid_copy(loc.pargfid, loc.gfid);
+ gf_uuid_clear(loc.gfid);
+
+ ret = syncop_lookup(xl, &loc, &iatt, 0, xattr_req, &xattr_rsp);
+ if (ret) {
+ op_errno = -ret;
+ printf("Lookup failed on %s:%s.\n", file, strerror(op_errno));
+ flag = _gf_true;
+ }
+
+ ret = dict_get_str(xattr_rsp, "gfid-heal-msg", &gfid_heal_msg);
+ if (!ret) {
+ printf("%s for file %s\n", gfid_heal_msg, file);
+ loc_wipe(&loc);
+ goto out;
+ }
+ if (flag)
+ goto out;
+
+ reval = 0;
+ loc_wipe(&loc);
+ memset(&iatt, 0, sizeof(iatt));
+
+ retry2:
+ ret = glfs_resolve(fs, xl, file, &loc, &iatt, reval);
+ ESTALE_RETRY(ret, errno, reval, &loc, retry2);
+ if (ret) {
+ printf("Lookup failed on %s:%s\n", file, strerror(errno));
+ goto out;
+ }
+ }
+
+ if (iatt.ia_type == IA_IFDIR) {
+ ret = _validate_directory(xattr_req, file);
+ if (ret)
+ goto out;
+ }
+ ret = syncop_getxattr(xl, &loc, &xattr_rsp, GF_AFR_HEAL_SBRAIN, xattr_req,
+ NULL);
+ if (ret) {
+ op_errno = -ret;
+ printf("Healing %s failed:%s.\n", file, strerror(op_errno));
+ goto out;
+ }
+ ret = dict_get_str(xattr_rsp, "sh-fail-msg", &sh_fail_msg);
+ if (!ret) {
+ printf("Healing %s failed: %s.\n", file, sh_fail_msg);
+ ret = -1;
+ goto out;
+ }
+ printf("Healed %s.\n", file);
+ ret = 0;
+out:
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+ if (path1)
+ GF_FREE(path1);
+ if (path2)
+ GF_FREE(path2);
+ if (filename)
+ GF_FREE(filename);
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
+glfsh_heal_from_brick_type(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ char *hostname, char *brickpath, xlator_t *client,
+ dict_t *xattr_req, char *vgfid,
+ num_entries_t *num_entries)
+{
+ fd_t *fd = NULL;
+ loc_t dirloc = {0};
+ int32_t op_errno = 0;
+ int ret = -1;
+
+ ret = glfsh_get_index_dir_loc(rootloc, client, &dirloc, &op_errno, vgfid);
+ if (ret < 0) {
+ if (op_errno == ESTALE || op_errno == ENOENT)
+ ret = 0;
+ else
+ ret = -op_errno;
+ goto out;
+ }
+
+ ret = syncop_dirfd(client, &dirloc, &fd, GF_CLIENT_PID_GLFS_HEAL);
+ if (ret)
+ goto out;
+ ret = glfsh_crawl_directory(fs, top_subvol, rootloc, client, fd, &dirloc,
+ xattr_req, num_entries, _gf_false);
+ if (fd)
+ fd_unref(fd);
+out:
+ loc_wipe(&dirloc);
+ return ret;
+}
+
+int
+glfsh_heal_from_brick(glfs_t *fs, xlator_t *top_subvol, loc_t *rootloc,
+ char *hostname, char *brickpath, char *file)
+{
+ int ret = -1;
+ dict_t *xattr_req = NULL;
+ xlator_t *client = NULL;
+ num_entries_t num_entries = {
+ 0,
+ };
+ num_entries_t total = {
+ 0,
+ };
+
+ xattr_req = dict_new();
+ if (!xattr_req)
+ goto out;
+ ret = dict_set_int32(xattr_req, "heal-op",
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+ if (ret)
+ goto out;
+ client = _brick_path_to_client_xlator(top_subvol, hostname, brickpath);
+ if (!client) {
+ printf("\"%s:%s\"- No such brick available in the volume.\n", hostname,
+ brickpath);
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str(xattr_req, "child-name", client->name);
+ if (ret)
+ goto out;
+ if (file)
+ ret = glfsh_heal_splitbrain_file(fs, top_subvol, rootloc, file,
+ xattr_req);
+ else {
+ ret = glfsh_heal_from_brick_type(fs, top_subvol, rootloc, hostname,
+ brickpath, client, xattr_req,
+ GF_XATTROP_INDEX_GFID, &num_entries);
+ total.num_entries += num_entries.num_entries;
+ num_entries.num_entries = 0;
+ if (ret == -ENOTCONN)
+ goto out;
+
+ ret = glfsh_heal_from_brick_type(fs, top_subvol, rootloc, hostname,
+ brickpath, client, xattr_req,
+ GF_XATTROP_DIRTY_GFID, &num_entries);
+ total.num_entries += num_entries.num_entries;
+ if (ret < 0)
+ goto out;
+ }
+out:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ if (!file)
+ glfsh_print_heal_op_status(ret, total.num_entries,
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+
+ return ret;
+}
+
+int
+glfsh_heal_from_bigger_file_or_mtime(glfs_t *fs, xlator_t *top_subvol,
+ loc_t *rootloc, char *file,
+ gf_xl_afr_op_t heal_op)
+{
+ int ret = -1;
+ dict_t *xattr_req = NULL;
+
+ xattr_req = dict_new();
+ if (!xattr_req)
+ goto out;
+ ret = dict_set_int32(xattr_req, "heal-op", heal_op);
+ if (ret)
+ goto out;
+ ret = glfsh_heal_splitbrain_file(fs, top_subvol, rootloc, file, xattr_req);
+out:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ return ret;
+}
+
+static void
+cleanup(glfs_t *fs)
+{
+ if (!fs)
+ return;
+#if 0
+ /* glfs fini path is still racy and crashing the program. Since
+ * this program any way has to die, we are not going to call fini
+ * in the released versions. i.e. final builds. For all
+ * internal testing lets enable this so that glfs_fini code
+ * path becomes stable. */
+ glfs_fini (fs);
+#endif
+}
+
+glfsh_info_t glfsh_human_readable = {
+ .init = glfsh_init,
+ .print_brick_from_xl = glfsh_print_brick_from_xl,
+ .print_heal_op_status = glfsh_print_hr_heal_op_status,
+ .print_heal_op_summary = glfsh_print_hr_heal_op_summary,
+ .print_heal_status = glfsh_print_hr_heal_status,
+ .print_spb_status = glfsh_print_hr_spb_status,
+ .end = glfsh_end};
+
+glfsh_info_t glfsh_no_print = {
+ .init = glfsh_init,
+ .print_brick_from_xl = glfsh_no_print_brick_from_xl,
+ .print_heal_op_status = glfsh_no_print_hr_heal_op_status,
+ .print_heal_status = glfsh_no_print_hr_status,
+ .print_spb_status = glfsh_no_print_hr_status,
+ .end = glfsh_end_op_granular_entry_heal};
+
+#if (HAVE_LIB_XML)
+glfsh_info_t glfsh_xml_output = {
+ .init = glfsh_xml_init,
+ .print_brick_from_xl = glfsh_print_xml_brick_from_xl,
+ .print_heal_op_status = glfsh_print_xml_heal_op_status,
+ .print_heal_op_summary = glfsh_print_xml_heal_op_summary,
+ .print_heal_status = glfsh_print_xml_file_status,
+ .print_spb_status = glfsh_print_xml_file_status,
+ .end = glfsh_xml_end};
+#endif
+
+static void
+parse_flags(int *argc, char **argv, int *flags)
+{
+ int i = 0;
+ char *opt = NULL;
+ int count = 0;
+
+ for (i = 0; i < *argc; i++) {
+ opt = strtail(argv[i], "--");
+ if (!opt)
+ continue;
+ if (strcmp(opt, "nolog") == 0) {
+ *flags |= MODE_NO_LOG;
+ count++;
+ } else if (strcmp(opt, "xml") == 0) {
+ *flags |= MODE_XML;
+ count++;
+ }
+ }
+ *argc = *argc - count;
+}
+
+int
+main(int argc, char **argv)
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ char *volname = NULL;
+ xlator_t *top_subvol = NULL;
+ loc_t rootloc = {0};
+ char logfilepath[PATH_MAX] = {0};
+ char *hostname = NULL;
+ char *path = NULL;
+ char *file = NULL;
+ char *op_errstr = NULL;
+ char *socket_filepath = NULL;
+ gf_xl_afr_op_t heal_op = -1;
+ gf_loglevel_t log_level = GF_LOG_INFO;
+ int flags = 0;
+
+ if (argc < 2) {
+ printf(USAGE_STR, argv[0]);
+ ret = -1;
+ goto out;
+ } else if (argc >= 4) {
+ if (!strcmp(argv[argc - 2], "glusterd-sock")) {
+ socket_filepath = argv[argc - 1];
+ argc = argc - 2;
+ }
+ }
+ volname = argv[1];
+
+ parse_flags(&argc, argv, &flags);
+ if (flags & MODE_NO_LOG)
+ log_level = GF_LOG_NONE;
+ if (flags & MODE_XML)
+ is_xml = 1;
+
+ switch (argc) {
+ case 2:
+ heal_op = GF_SHD_OP_INDEX_SUMMARY;
+ break;
+ case 3:
+ if (!strcmp(argv[2], "split-brain-info")) {
+ heal_op = GF_SHD_OP_SPLIT_BRAIN_FILES;
+ } else if (!strcmp(argv[2], "granular-entry-heal-op")) {
+ heal_op = GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE;
+ } else if (!strcmp(argv[2], "info-summary")) {
+ heal_op = GF_SHD_OP_HEAL_SUMMARY;
+ } else {
+ printf(USAGE_STR, argv[0]);
+ ret = -1;
+ goto out;
+ }
+ break;
+ case 4:
+ if (!strcmp(argv[2], "bigger-file")) {
+ heal_op = GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE;
+ file = argv[3];
+ } else if (!strcmp(argv[2], "latest-mtime")) {
+ heal_op = GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME;
+ file = argv[3];
+ } else if (!strcmp(argv[2], "source-brick")) {
+ heal_op = GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK;
+ hostname = strtok(argv[3], ":");
+ path = strtok(NULL, ":");
+ } else {
+ printf(USAGE_STR, argv[0]);
+ ret = -1;
+ goto out;
+ }
+ break;
+ case 5:
+ if (!strcmp(argv[2], "source-brick")) {
+ heal_op = GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK;
+ hostname = strtok(argv[3], ":");
+ path = strtok(NULL, ":");
+ file = argv[4];
+ } else {
+ printf(USAGE_STR, argv[0]);
+ ret = -1;
+ goto out;
+ }
+ break;
+ default:
+ printf(USAGE_STR, argv[0]);
+ ret = -1;
+ goto out;
+ }
+
+ glfsh_output = &glfsh_human_readable;
+ if (is_xml) {
+#if (HAVE_LIB_XML)
+ if ((heal_op == GF_SHD_OP_INDEX_SUMMARY) ||
+ (heal_op == GF_SHD_OP_SPLIT_BRAIN_FILES) ||
+ (heal_op == GF_SHD_OP_HEAL_SUMMARY)) {
+ glfsh_output = &glfsh_xml_output;
+ } else {
+ printf(USAGE_STR, argv[0]);
+ ret = -1;
+ goto out;
+ }
+#else
+ /*No point doing anything, just fail the command*/
+ exit(EXIT_FAILURE);
+#endif
+ }
+
+ if (heal_op == GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE)
+ glfsh_output = &glfsh_no_print;
+
+ ret = glfsh_output->init();
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ ret = -errno;
+ gf_asprintf(&op_errstr, "Not able to initialize volume '%s'", volname);
+ goto out;
+ }
+
+ if (sys_access(SECURE_ACCESS_FILE, F_OK) == 0) {
+ fs->ctx->secure_mgmt = 1;
+ fs->ctx->ssl_cert_depth = glusterfs_read_secure_access_file();
+ }
+ if (socket_filepath != NULL) {
+ ret = glfs_set_volfile_server(fs, "unix", socket_filepath, 0);
+ } else {
+ ret = glfs_set_volfile_server(fs, "unix", DEFAULT_GLUSTERD_SOCKFILE, 0);
+ }
+ if (ret) {
+ ret = -errno;
+ gf_asprintf(&op_errstr,
+ "Setting the volfile server failed, "
+ "%s",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = glfsh_set_heal_options(fs, heal_op);
+ if (ret) {
+ printf("Setting xlator heal options failed, %s\n", strerror(errno));
+ goto out;
+ }
+ snprintf(logfilepath, sizeof(logfilepath),
+ DEFAULT_HEAL_LOG_FILE_DIRECTORY "/glfsheal-%s.log", volname);
+ ret = glfs_set_logging(fs, logfilepath, log_level);
+ if (ret < 0) {
+ ret = -errno;
+ gf_asprintf(&op_errstr,
+ "Failed to set the log file path, "
+ "%s",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_setfspid(fs, GF_CLIENT_PID_GLFS_HEAL);
+ if (ret) {
+ printf("Setting client pid failed, %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ ret = -errno;
+ if (errno == ENOENT) {
+ gf_asprintf(&op_errstr, "Volume %s does not exist", volname);
+ } else {
+ gf_asprintf(&op_errstr,
+ "%s: Not able to fetch "
+ "volfile from glusterd",
+ volname);
+ }
+ goto out;
+ }
+
+ top_subvol = glfs_active_subvol(fs);
+ if (!top_subvol) {
+ ret = -errno;
+ if (errno == ENOTCONN) {
+ gf_asprintf(&op_errstr,
+ "Volume %s is not started "
+ "(Or) All the bricks are not "
+ "running.",
+ volname);
+ } else {
+ gf_asprintf(&op_errstr,
+ "%s: Not able to mount the "
+ "volume, %s",
+ volname, strerror(errno));
+ }
+ goto out;
+ }
+
+ char *var_str = (heal_op == GF_SHD_OP_INDEX_SUMMARY ||
+ heal_op == GF_SHD_OP_HEAL_SUMMARY)
+ ? "replicate/disperse"
+ : "replicate";
+
+ ret = glfsh_validate_volume(top_subvol, heal_op);
+ if (ret < 0) {
+ ret = -EINVAL;
+ gf_asprintf(&op_errstr,
+ "This command is supported "
+ "for only volumes of %s type. Volume %s "
+ "is not of type %s",
+ var_str, volname, var_str);
+ goto out;
+ }
+ rootloc.inode = inode_ref(top_subvol->itable->root);
+ ret = glfs_loc_touchup(&rootloc);
+ if (ret < 0) {
+ ret = -errno;
+ goto out;
+ }
+
+ switch (heal_op) {
+ case GF_SHD_OP_INDEX_SUMMARY:
+ case GF_SHD_OP_SPLIT_BRAIN_FILES:
+ case GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE:
+ case GF_SHD_OP_HEAL_SUMMARY:
+ ret = glfsh_gather_heal_info(fs, top_subvol, &rootloc, heal_op);
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:
+ ret = glfsh_heal_from_bigger_file_or_mtime(fs, top_subvol, &rootloc,
+ file, heal_op);
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+ ret = glfsh_heal_from_brick(fs, top_subvol, &rootloc, hostname,
+ path, file);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ glfsh_output->end(ret, NULL);
+ if (ret < 0)
+ ret = -ret;
+ loc_wipe(&rootloc);
+ glfs_subvol_done(fs, top_subvol);
+ cleanup(fs);
+
+ return ret;
+out:
+ if (fs && top_subvol)
+ glfs_subvol_done(fs, top_subvol);
+ loc_wipe(&rootloc);
+ cleanup(fs);
+ if (glfsh_output)
+ glfsh_output->end(ret, op_errstr);
+ if (op_errstr)
+ GF_FREE(op_errstr);
+ return ret;
+}
diff --git a/libgfchangelog.pc.in b/libgfchangelog.pc.in
new file mode 100644
index 00000000000..79eac2ad2d3
--- /dev/null
+++ b/libgfchangelog.pc.in
@@ -0,0 +1,12 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+
+Name: libgfchangelog
+Description: GlusterFS Changelog Consumer Library
+Version: @LIBGFCHANGELOG_VERSION@
+Requires: @PKGCONFIG_UUID@
+Libs: -L${libdir} -lgfchangelog -lglusterfs
+Cflags: -I${includedir} -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64
diff --git a/mod_glusterfs/apache/2.2/Makefile.am b/libglusterd/Makefile.am
index d471a3f9243..a985f42a877 100644
--- a/mod_glusterfs/apache/2.2/Makefile.am
+++ b/libglusterd/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/libglusterd/src/Makefile.am b/libglusterd/src/Makefile.am
new file mode 100644
index 00000000000..684d2bac96b
--- /dev/null
+++ b/libglusterd/src/Makefile.am
@@ -0,0 +1,31 @@
+libglusterd_la_CFLAGS = $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
+ -DDATADIR=\"$(localstatedir)\"
+
+libglusterd_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \
+ -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \
+ -DXLATORPARENTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)\" \
+ -DXXH_NAMESPACE=GF_ -D__USE_LARGEFILE64 \
+ -I$(CONTRIBDIR)/rbtree \
+ -I$(CONTRIBDIR)/libexecinfo ${ARGP_STANDALONE_CPPFLAGS} \
+ -DSBIN_DIR=\"$(sbindir)\" -I$(CONTRIBDIR)/timer-wheel \
+ -I$(CONTRIBDIR)/xxhash \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src/
+
+libglusterd_la_LIBADD = $(ZLIB_LIBS) $(MATH_LIB) $(UUID_LIBS)
+libglusterd_la_LDFLAGS = -version-info $(LIBGLUSTERFS_LT_VERSION) $(GF_LDFLAGS) \
+ -export-symbols $(top_srcdir)/libglusterd/src/libglusterd.sym
+
+lib_LTLIBRARIES = libglusterd.la
+
+libglusterd_la_SOURCES = gd-common-utils.c
+
+libglusterd_la_HEADERS = gd-common-utils.h
+
+libglusterd_ladir = $(includedir)/glusterfs
+
+noinst_HEADERS = gd-common-utils.h
+
+EXTRA_DIST = libglusterd.sym
+
+CLEANFILES =
diff --git a/libglusterd/src/gd-common-utils.c b/libglusterd/src/gd-common-utils.c
new file mode 100644
index 00000000000..243fab215e6
--- /dev/null
+++ b/libglusterd/src/gd-common-utils.c
@@ -0,0 +1,78 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "gd-common-utils.h"
+#include "cli1-xdr.h"
+
+int
+get_vol_type(int type, int dist_count, int brick_count)
+{
+ if ((type != GF_CLUSTER_TYPE_TIER) && (type > 0) &&
+ (dist_count < brick_count))
+ type = type + GF_CLUSTER_TYPE_MAX - 1;
+
+ return type;
+}
+
+char *
+get_struct_variable(int mem_num, gf_gsync_status_t *sts_val)
+{
+ switch (mem_num) {
+ case 0:
+ return (sts_val->node);
+ case 1:
+ return (sts_val->master);
+ case 2:
+ return (sts_val->brick);
+ case 3:
+ return (sts_val->slave_user);
+ case 4:
+ return (sts_val->slave);
+ case 5:
+ return (sts_val->slave_node);
+ case 6:
+ return (sts_val->worker_status);
+ case 7:
+ return (sts_val->crawl_status);
+ case 8:
+ return (sts_val->last_synced);
+ case 9:
+ return (sts_val->entry);
+ case 10:
+ return (sts_val->data);
+ case 11:
+ return (sts_val->meta);
+ case 12:
+ return (sts_val->failures);
+ case 13:
+ return (sts_val->checkpoint_time);
+ case 14:
+ return (sts_val->checkpoint_completed);
+ case 15:
+ return (sts_val->checkpoint_completion_time);
+ case 16:
+ return (sts_val->brick_host_uuid);
+ case 17:
+ return (sts_val->last_synced_utc);
+ case 18:
+ return (sts_val->checkpoint_time_utc);
+ case 19:
+ return (sts_val->checkpoint_completion_time_utc);
+ case 20:
+ return (sts_val->slavekey);
+ case 21:
+ return (sts_val->session_slave);
+ default:
+ goto out;
+ }
+
+out:
+ return NULL;
+}
diff --git a/libglusterd/src/gd-common-utils.h b/libglusterd/src/gd-common-utils.h
new file mode 100644
index 00000000000..b9bb4f956fe
--- /dev/null
+++ b/libglusterd/src/gd-common-utils.h
@@ -0,0 +1,28 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GD_COMMON_UTILS_H
+#define _GD_COMMON_UTILS_H
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <limits.h>
+#include <stddef.h>
+
+#include "protocol-common.h"
+#include "rpcsvc.h"
+
+int
+get_vol_type(int type, int dist_count, int brick_count);
+
+char *
+get_struct_variable(int mem_num, gf_gsync_status_t *sts_val);
+
+#endif /* _GD_COMMON_UTILS_H */
diff --git a/libglusterd/src/libglusterd.sym b/libglusterd/src/libglusterd.sym
new file mode 100644
index 00000000000..45969a87c12
--- /dev/null
+++ b/libglusterd/src/libglusterd.sym
@@ -0,0 +1,2 @@
+get_vol_type
+get_struct_variable
diff --git a/libglusterfs/src/Makefile.am b/libglusterfs/src/Makefile.am
index 2dab8735c96..385e8ef4600 100644
--- a/libglusterfs/src/Makefile.am
+++ b/libglusterfs/src/Makefile.am
@@ -1,50 +1,116 @@
-libglusterfs_la_CFLAGS = -fPIC -Wall -g -shared -nostartfiles $(GF_CFLAGS) \
- $(GF_DARWIN_LIBGLUSTERFS_CFLAGS)
+noinst_PYTHON = generator.py gen-defaults.py $(top_srcdir)/events/eventskeygen.py
-libglusterfs_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 \
- -D_GNU_SOURCE -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \
- -D$(GF_HOST_OS) -I$(CONTRIBDIR)/rbtree \
- -DSCHEDULERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler\"
+libglusterfs_la_CFLAGS = $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
+ -DDATADIR=\"$(localstatedir)\"
-libglusterfs_la_LIBADD = @LEXLIB@
+libglusterfs_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \
+ -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \
+ -DXLATORPARENTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)\" \
+ -DXXH_NAMESPACE=GF_ -D__USE_LARGEFILE64 \
+ -I$(CONTRIBDIR)/rbtree \
+ -I$(CONTRIBDIR)/libexecinfo ${ARGP_STANDALONE_CPPFLAGS} \
+ -DSBIN_DIR=\"$(sbindir)\" -I$(CONTRIBDIR)/timer-wheel \
+ -I$(CONTRIBDIR)/xxhash
+
+libglusterfs_la_LIBADD = $(ZLIB_LIBS) $(MATH_LIB) $(UUID_LIBS) $(LIB_DL) \
+ $(URCU_LIBS) $(URCU_CDS_LIBS)
+libglusterfs_la_LDFLAGS = -version-info $(LIBGLUSTERFS_LT_VERSION) $(GF_LDFLAGS) \
+ -export-symbols $(top_srcdir)/libglusterfs/src/libglusterfs.sym
lib_LTLIBRARIES = libglusterfs.la
+libgfchangelogdir = $(includedir)/glusterfs/gfchangelog
CONTRIB_BUILDDIR = $(top_builddir)/contrib
libglusterfs_la_SOURCES = dict.c xlator.c logging.c \
- hashfn.c defaults.c common-utils.c timer.c inode.c call-stub.c \
+ hashfn.c common-utils.c timer.c inode.c call-stub.c \
compat.c fd.c compat-errno.c event.c mem-pool.c gf-dirent.c syscall.c \
- iobuf.c globals.c statedump.c stack.c checksum.c daemon.c \
- $(CONTRIBDIR)/rbtree/rb.c rbthash.c latency.c \
- graph.c $(CONTRIBDIR)/uuid/clear.c $(CONTRIBDIR)/uuid/copy.c \
- $(CONTRIBDIR)/uuid/gen_uuid.c $(CONTRIBDIR)/uuid/pack.c \
- $(CONTRIBDIR)/uuid/parse.c $(CONTRIBDIR)/uuid/unparse.c \
- $(CONTRIBDIR)/uuid/uuid_time.c $(CONTRIBDIR)/uuid/compare.c \
- $(CONTRIBDIR)/uuid/isnull.c $(CONTRIBDIR)/uuid/unpack.c syncop.c \
- graph-print.c trie.c run.c options.c fd-lk.c circ-buff.c event-history.c
-
-nodist_libglusterfs_la_SOURCES = y.tab.c graph.lex.c
-
-BUILT_SOURCES = graph.lex.c
-
-noinst_HEADERS = common-utils.h defaults.h dict.h glusterfs.h hashfn.h \
- logging.h xlator.h stack.h timer.h list.h inode.h call-stub.h compat.h \
- fd.h revision.h compat-errno.h event.h mem-pool.h byte-order.h \
- gf-dirent.h locking.h syscall.h iobuf.h globals.h statedump.h \
- checksum.h daemon.h $(CONTRIBDIR)/rbtree/rb.h \
- rbthash.h iatt.h latency.h mem-types.h $(CONTRIBDIR)/uuid/uuidd.h \
- $(CONTRIBDIR)/uuid/uuid.h $(CONTRIBDIR)/uuid/uuidP.h \
- $(CONTRIB_BUILDDIR)/uuid/uuid_types.h syncop.h graph-utils.h trie.h run.h \
- options.h lkowner.h fd-lk.h circ-buff.h event-history.h
-
-EXTRA_DIST = graph.l graph.y
+ iobuf.c globals.c statedump.c stack.c checksum.c daemon.c timespec.c \
+ $(CONTRIBDIR)/rbtree/rb.c rbthash.c store.c latency.c \
+ graph.c syncop.c graph-print.c trie.c run.c options.c fd-lk.c \
+ circ-buff.c event-history.c gidcache.c ctx.c client_t.c event-poll.c \
+ event-epoll.c syncop-utils.c cluster-syncop.c refcount.c \
+ $(CONTRIBDIR)/libgen/basename_r.c \
+ $(CONTRIBDIR)/libgen/dirname_r.c \
+ strfd.c parse-utils.c $(CONTRIBDIR)/mount/mntent.c \
+ $(CONTRIBDIR)/libexecinfo/execinfo.c quota-common-utils.c rot-buffs.c \
+ $(CONTRIBDIR)/timer-wheel/timer-wheel.c \
+ $(CONTRIBDIR)/timer-wheel/find_last_bit.c default-args.c locking.c \
+ $(CONTRIBDIR)/xxhash/xxhash.c \
+ throttle-tbf.c monitoring.c async.c
+
+nodist_libglusterfs_la_SOURCES = y.tab.c graph.lex.c defaults.c
+nodist_libglusterfs_la_HEADERS = y.tab.h
+
+BUILT_SOURCES = graph.lex.c defaults.c eventtypes.h
+
+libglusterfs_la_HEADERS = glusterfs/common-utils.h glusterfs/defaults.h \
+ glusterfs/default-args.h glusterfs/dict.h glusterfs/glusterfs.h \
+ glusterfs/hashfn.h glusterfs/timespec.h glusterfs/logging.h \
+ glusterfs/xlator.h glusterfs/stack.h glusterfs/timer.h glusterfs/list.h \
+ glusterfs/inode.h glusterfs/call-stub.h glusterfs/compat.h glusterfs/fd.h \
+ glusterfs/revision.h glusterfs/compat-errno.h glusterfs/gf-event.h \
+ glusterfs/mem-pool.h glusterfs/byte-order.h glusterfs/gf-dirent.h \
+ glusterfs/locking.h glusterfs/syscall.h glusterfs/iobuf.h \
+ glusterfs/globals.h glusterfs/statedump.h glusterfs/checksum.h \
+ glusterfs/daemon.h glusterfs/store.h glusterfs/rbthash.h glusterfs/iatt.h \
+ glusterfs/latency.h glusterfs/mem-types.h glusterfs/syncop.h \
+ glusterfs/cluster-syncop.h glusterfs/graph-utils.h glusterfs/trie.h \
+ glusterfs/refcount.h glusterfs/run.h glusterfs/options.h \
+ glusterfs/lkowner.h glusterfs/fd-lk.h glusterfs/circ-buff.h \
+ glusterfs/event-history.h glusterfs/gidcache.h glusterfs/client_t.h \
+ glusterfs/glusterfs-acl.h glusterfs/glfs-message-id.h \
+ glusterfs/template-component-messages.h glusterfs/strfd.h \
+ glusterfs/syncop-utils.h glusterfs/parse-utils.h \
+ glusterfs/libglusterfs-messages.h glusterfs/lvm-defaults.h \
+ glusterfs/quota-common-utils.h glusterfs/rot-buffs.h \
+ glusterfs/compat-uuid.h glusterfs/upcall-utils.h glusterfs/throttle-tbf.h \
+ glusterfs/events.h glusterfs/atomic.h glusterfs/monitoring.h \
+ glusterfs/async.h glusterfs/glusterfs-fops.h
+
+libglusterfs_ladir = $(includedir)/glusterfs
+
+noinst_HEADERS = unittest/unittest.h \
+ $(CONTRIBDIR)/rbtree/rb.h \
+ $(CONTRIBDIR)/mount/mntent_compat.h \
+ $(CONTRIBDIR)/libexecinfo/execinfo_compat.h \
+ $(CONTRIBDIR)/timer-wheel/timer-wheel.h \
+ $(CONTRIBDIR)/xxhash/xxhash.h \
+ $(CONTRIBDIR)/userspace-rcu/wfcqueue.h \
+ $(CONTRIBDIR)/userspace-rcu/wfstack.h \
+ $(CONTRIBDIR)/userspace-rcu/static-wfcqueue.h \
+ $(CONTRIBDIR)/userspace-rcu/static-wfstack.h
+
+eventtypes.h: $(top_srcdir)/events/eventskeygen.py
+ $(PYTHON) $(top_srcdir)/events/eventskeygen.py C_HEADER
+
+if BUILD_EVENTS
+libglusterfs_la_SOURCES += events.c
+endif
+
+libgfchangelog_HEADERS = changelog.h
+
+EXTRA_DIST = graph.l graph.y defaults-tmpl.c libglusterfs.sym
graph.lex.c: graph.l y.tab.h
- $(LEX) -t $(srcdir)/graph.l > $@
+ $(LEX) -Pgraphyy -t $(srcdir)/graph.l > $@
+y.tab.c: y.tab.h
y.tab.h: graph.y
- $(YACC) -d $(srcdir)/graph.y
+ $(YACC) -p graphyy -d $(srcdir)/graph.y
+
+defaults.c: defaults-tmpl.c generator.py gen-defaults.py
+ $(PYTHON) $(srcdir)/gen-defaults.py $(srcdir)/defaults-tmpl.c > $@
+
+CLEANFILES = $(nodist_libglusterfs_la_SOURCES) \
+ $(nodist_libglusterfs_la_HEADERS) *.pyc
+
+if UNITTEST
+CLEANFILES += *.gcda *.gcno *_xunit.xml
+noinst_PROGRAMS =
+TESTS =
+endif
-CLEANFILES = graph.lex.c y.tab.c y.tab.h
-CONFIG_CLEAN_FILES = $(CONTRIB_BUILDDIR)/uuid/uuid_types.h
+if BUILD_EVENTS
+CLEANFILES += eventtypes.h
+endif
diff --git a/libglusterfs/src/async.c b/libglusterfs/src/async.c
new file mode 100644
index 00000000000..1d6cfa374b6
--- /dev/null
+++ b/libglusterfs/src/async.c
@@ -0,0 +1,720 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc <https://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* To implement an efficient thread pool with minimum contention we have used
+ * the following ideas:
+ *
+ * - The queue of jobs has been implemented using a Wait-Free queue provided
+ * by the userspace-rcu library. This queue requires a mutex when multiple
+ * consumers can be extracting items from it concurrently, but the locked
+ * region is very small, which minimizes the chances of contention. To
+ * further minimize contention, the number of active worker threads that
+ * are accessing the queue is dynamically adjusted so that we always have
+ * the minimum required amount of workers contending for the queue. Adding
+ * new items can be done with a single atomic operation, without locks.
+ *
+ * - All queue management operations, like creating more threads, enabling
+ * sleeping ones, etc. are done by a single thread. This makes it possible
+ * to manage all scaling related information and workers lists without
+ * locks. This functionality is implemented as a role that can be assigned
+ * to any of the worker threads, which avoids that some lengthy operations
+ * could interfere with this task.
+ *
+ * - Management is based on signals. We used signals for management tasks to
+ * avoid multiple system calls for each request (with signals we can wait
+ * for multiple events and get some additional data for each request in a
+ * single call, instead of first polling and then reading).
+ *
+ * TODO: There are some other changes that can take advantage of this new
+ * thread pool.
+ *
+ * - Use this thread pool as the core threading model for synctasks. I
+ * think this would improve synctask performance because I think we
+ * currently have some contention there for some workloads.
+ *
+ * - Implement a per thread timer that will allow adding and removing
+ * timers without using mutexes.
+ *
+ * - Integrate with userspace-rcu library in QSBR mode, allowing
+ * other portions of code to be implemented using RCU-based
+ * structures with a extremely fast read side without contention.
+ *
+ * - Integrate I/O into the thread pool so that the thread pool is
+ * able to efficiently manage all loads and scale dynamically. This
+ * could make it possible to minimize context switching when serving
+ * requests from fuse or network.
+ *
+ * - Dynamically scale the number of workers based on system load.
+ * This will make it possible to reduce contention when system is
+ * heavily loaded, improving performance under these circumstances
+ * (or minimizing performance loss). This will also make it possible
+ * that gluster can coexist with other processes that also consume
+ * CPU, with minimal interference from each other.
+ */
+
+#include <unistd.h>
+#include <pthread.h>
+#include <errno.h>
+
+#include "glusterfs/list.h"
+#include "glusterfs/mem-types.h"
+#include "glusterfs/async.h"
+
+/* These macros wrap a simple system/library call to check the returned error
+ * and log a message in case of failure. */
+#define GF_ASYNC_CHECK(_func, _args...) \
+ ({ \
+ int32_t __async_error = -_func(_args); \
+ if (caa_unlikely(__async_error != 0)) { \
+ gf_async_error(__async_error, #_func "() failed."); \
+ } \
+ __async_error; \
+ })
+
+#define GF_ASYNC_CHECK_ERRNO(_func, _args...) \
+ ({ \
+ int32_t __async_error = _func(_args); \
+ if (caa_unlikely(__async_error < 0)) { \
+ __async_error = -errno; \
+ gf_async_error(__async_error, #_func "() failed."); \
+ } \
+ __async_error; \
+ })
+
+/* These macros are used when, based on POSIX documentation, the function
+ * should never fail under the conditions we are using it. So any unexpected
+ * error will be handled as a fatal event. It probably means a critical bug
+ * or memory corruption. In both cases we consider that stopping the process
+ * is safer (otherwise it could cause more corruption with unknown effects
+ * that could be worse). */
+#define GF_ASYNC_CANTFAIL(_func, _args...) \
+ do { \
+ int32_t __async_error = -_func(_args); \
+ if (caa_unlikely(__async_error != 0)) { \
+ gf_async_fatal(__async_error, #_func "() failed"); \
+ } \
+ } while (0)
+
+#define GF_ASYNC_CANTFAIL_ERRNO(_func, _args...) \
+ ({ \
+ int32_t __async_error = _func(_args); \
+ if (caa_unlikely(__async_error < 0)) { \
+ __async_error = -errno; \
+ gf_async_fatal(__async_error, #_func "() failed"); \
+ } \
+ __async_error; \
+ })
+
+/* TODO: for now we allocate a static array of workers. There's an issue if we
+ * try to use dynamic memory since these workers are initialized very
+ * early in the process startup and it seems that sometimes not all is
+ * ready to use dynamic memory. */
+static gf_async_worker_t gf_async_workers[GF_ASYNC_MAX_THREADS];
+
+/* This is the only global variable needed to manage the entire framework. */
+gf_async_control_t gf_async_ctrl = {};
+
+static __thread gf_async_worker_t *gf_async_current_worker = NULL;
+
+/* The main function of the worker threads. */
+static void *
+gf_async_worker(void *arg);
+
+static void
+gf_async_sync_init(void)
+{
+ GF_ASYNC_CANTFAIL(pthread_barrier_init, &gf_async_ctrl.sync, NULL, 2);
+}
+
+static void
+gf_async_sync_now(void)
+{
+ int32_t ret;
+
+ ret = pthread_barrier_wait(&gf_async_ctrl.sync);
+ if (ret == PTHREAD_BARRIER_SERIAL_THREAD) {
+ GF_ASYNC_CANTFAIL(pthread_barrier_destroy, &gf_async_ctrl.sync);
+ ret = 0;
+ }
+ if (caa_unlikely(ret != 0)) {
+ gf_async_fatal(-ret, "pthread_barrier_wait() failed");
+ }
+}
+
+static void
+gf_async_sigmask_empty(sigset_t *mask)
+{
+ GF_ASYNC_CANTFAIL_ERRNO(sigemptyset, mask);
+}
+
+static void
+gf_async_sigmask_add(sigset_t *mask, int32_t signal)
+{
+ GF_ASYNC_CANTFAIL_ERRNO(sigaddset, mask, signal);
+}
+
+static void
+gf_async_sigmask_set(int32_t mode, sigset_t *mask, sigset_t *old)
+{
+ GF_ASYNC_CANTFAIL(pthread_sigmask, mode, mask, old);
+}
+
+static void
+gf_async_sigaction(int32_t signum, const struct sigaction *action,
+ struct sigaction *old)
+{
+ GF_ASYNC_CANTFAIL_ERRNO(sigaction, signum, action, old);
+}
+
+static int32_t
+gf_async_sigwait(sigset_t *set)
+{
+ int32_t ret, signum;
+
+ do {
+ ret = sigwait(set, &signum);
+ } while (caa_unlikely((ret < 0) && (errno == EINTR)));
+
+ if (caa_unlikely(ret < 0)) {
+ ret = -errno;
+ gf_async_fatal(ret, "sigwait() failed");
+ }
+
+ return signum;
+}
+
+static int32_t
+gf_async_sigtimedwait(sigset_t *set, struct timespec *timeout)
+{
+ int32_t ret;
+
+ do {
+ ret = sigtimedwait(set, NULL, timeout);
+ } while (caa_unlikely((ret < 0) && (errno == EINTR)));
+ if (caa_unlikely(ret < 0)) {
+ ret = -errno;
+ /* EAGAIN means that the timeout has expired, so we allow this error.
+ * Any other error shouldn't happen. */
+ if (caa_unlikely(ret != -EAGAIN)) {
+ gf_async_fatal(ret, "sigtimedwait() failed");
+ }
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static void
+gf_async_sigbroadcast(int32_t signum)
+{
+ GF_ASYNC_CANTFAIL_ERRNO(kill, gf_async_ctrl.pid, signum);
+}
+
+static void
+gf_async_signal_handler(int32_t signum)
+{
+ /* We should never handle a signal in this function. */
+ gf_async_fatal(-EBUSY,
+ "Unexpected processing of signal %d through a handler.",
+ signum);
+}
+
+static void
+gf_async_signal_setup(void)
+{
+ struct sigaction action;
+
+ /* We configure all related signals so that we can detect threads using an
+ * invalid signal mask that doesn't block our critical signal. */
+ memset(&action, 0, sizeof(action));
+ action.sa_handler = gf_async_signal_handler;
+
+ gf_async_sigaction(GF_ASYNC_SIGCTRL, &action, &gf_async_ctrl.handler_ctrl);
+
+ gf_async_sigaction(GF_ASYNC_SIGQUEUE, &action,
+ &gf_async_ctrl.handler_queue);
+}
+
+static void
+gf_async_signal_restore(void)
+{
+ /* Handlers we have previously changed are restored back to their original
+ * value. */
+
+ if (gf_async_ctrl.handler_ctrl.sa_handler != gf_async_signal_handler) {
+ gf_async_sigaction(GF_ASYNC_SIGCTRL, &gf_async_ctrl.handler_ctrl, NULL);
+ }
+
+ if (gf_async_ctrl.handler_queue.sa_handler != gf_async_signal_handler) {
+ gf_async_sigaction(GF_ASYNC_SIGQUEUE, &gf_async_ctrl.handler_queue,
+ NULL);
+ }
+}
+
+static void
+gf_async_signal_flush(void)
+{
+ struct timespec delay;
+
+ delay.tv_sec = 0;
+ delay.tv_nsec = 0;
+
+ /* We read all pending signals so that they don't trigger once the signal
+ * mask of some thread is changed. */
+ while (gf_async_sigtimedwait(&gf_async_ctrl.sigmask_ctrl, &delay) > 0) {
+ }
+ while (gf_async_sigtimedwait(&gf_async_ctrl.sigmask_queue, &delay) > 0) {
+ }
+}
+
+static int32_t
+gf_async_thread_create(pthread_t *thread, int32_t id, void *data)
+{
+ int32_t ret;
+
+ ret = gf_thread_create(thread, NULL, gf_async_worker, data,
+ GF_ASYNC_THREAD_NAME "%u", id);
+ if (caa_unlikely(ret < 0)) {
+ /* TODO: gf_thread_create() should return a more specific error
+ * code. */
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void
+gf_async_thread_wait(pthread_t thread)
+{
+ /* TODO: this is a blocking call executed inside one of the workers of the
+ * thread pool. This is bad, but this is only executed once we have
+ * received a notification from the thread that it's terminating, so
+ * this should return almost immediately. However, to be more robust
+ * it would be better to use pthread_timedjoin_np() (or even a call
+ * to pthread_tryjoin_np() followed by a delayed recheck if it
+ * fails), but they are not portable. We should see how to do this
+ * in other platforms. */
+ GF_ASYNC_CANTFAIL(pthread_join, thread, NULL);
+}
+
+static int32_t
+gf_async_worker_create(void)
+{
+ struct cds_wfs_node *node;
+ gf_async_worker_t *worker;
+ uint32_t counts, running, max;
+ int32_t ret;
+
+ node = __cds_wfs_pop_blocking(&gf_async_ctrl.available);
+ if (caa_unlikely(node == NULL)) {
+ /* There are no more available workers. We have all threads running. */
+ return 1;
+ }
+ cds_wfs_node_init(node);
+
+ ret = 1;
+
+ counts = uatomic_read(&gf_async_ctrl.counts);
+ max = uatomic_read(&gf_async_ctrl.max_threads);
+ running = GF_ASYNC_COUNT_RUNNING(counts);
+ if (running < max) {
+ uatomic_add(&gf_async_ctrl.counts, GF_ASYNC_COUNTS(1, 0));
+
+ worker = caa_container_of(node, gf_async_worker_t, stack);
+
+ ret = gf_async_thread_create(&worker->thread, worker->id, worker);
+ if (caa_likely(ret >= 0)) {
+ return 0;
+ }
+
+ uatomic_add(&gf_async_ctrl.counts, GF_ASYNC_COUNTS(-1, 0));
+ }
+
+ cds_wfs_push(&gf_async_ctrl.available, node);
+
+ return ret;
+}
+
+static void
+gf_async_worker_enable(void)
+{
+ /* This will wake one of the spare workers. If all workers are busy now,
+ * the signal will be queued so that the first one that completes its
+ * work will become the leader. */
+ gf_async_sigbroadcast(GF_ASYNC_SIGCTRL);
+
+ /* We have consumed a spare worker. We create another one for future
+ * needs. */
+ gf_async_worker_create();
+}
+
+static void
+gf_async_worker_wait(void)
+{
+ int32_t signum;
+
+ signum = gf_async_sigwait(&gf_async_ctrl.sigmask_ctrl);
+ if (caa_unlikely(signum != GF_ASYNC_SIGCTRL)) {
+ gf_async_fatal(-EINVAL, "Worker received an unexpected signal (%d)",
+ signum);
+ }
+}
+
+static void
+gf_async_leader_wait(void)
+{
+ int32_t signum;
+
+ signum = gf_async_sigwait(&gf_async_ctrl.sigmask_queue);
+ if (caa_unlikely(signum != GF_ASYNC_SIGQUEUE)) {
+ gf_async_fatal(-EINVAL, "Leader received an unexpected signal (%d)",
+ signum);
+ }
+}
+
+static void
+gf_async_run(struct cds_wfcq_node *node)
+{
+ gf_async_t *async;
+
+ /* We've just got work from the queue. Process it. */
+ async = caa_container_of(node, gf_async_t, queue);
+ /* TODO: remove dependency from THIS and xl. */
+ THIS = async->xl;
+ async->cbk(async->xl, async);
+}
+
+static void
+gf_async_worker_run(void)
+{
+ struct cds_wfcq_node *node;
+
+ do {
+ /* We keep executing jobs from the queue while it's not empty. Note
+ * that while we do this, we are ignoring any stop request. That's
+ * fine, since we need to process our own 'join' messages to fully
+ * terminate all threads. Note that normal jobs should have already
+ * completed once a stop request is received. */
+ node = cds_wfcq_dequeue_blocking(&gf_async_ctrl.queue.head,
+ &gf_async_ctrl.queue.tail);
+ if (node != NULL) {
+ gf_async_run(node);
+ }
+ } while (node != NULL);
+
+ /* TODO: I've tried to keep the worker looking at the queue for some small
+ * amount of time in a busy loop to see if more jobs come soon. With
+ * this I attempted to avoid the overhead of signal management if
+ * jobs come fast enough. However experimental results seem to
+ * indicate that doing this, CPU utilization grows and performance
+ * is actually reduced. We need to see if that's because I used bad
+ * parameters or it's really better to do it as it's done now. */
+}
+
+static void
+gf_async_leader_run(void)
+{
+ struct cds_wfcq_node *node;
+
+ node = cds_wfcq_dequeue_blocking(&gf_async_ctrl.queue.head,
+ &gf_async_ctrl.queue.tail);
+ while (caa_unlikely(node == NULL)) {
+ gf_async_leader_wait();
+
+ node = cds_wfcq_dequeue_blocking(&gf_async_ctrl.queue.head,
+ &gf_async_ctrl.queue.tail);
+ }
+
+ /* Activate the next available worker thread. It will become the new
+ * leader. */
+ gf_async_worker_enable();
+
+ gf_async_run(node);
+}
+
+static uint32_t
+gf_async_stop_check(gf_async_worker_t *worker)
+{
+ uint32_t counts, old, running, max;
+
+ /* First we check if we should stop without doing any costly atomic
+ * operation. */
+ old = uatomic_read(&gf_async_ctrl.counts);
+ max = uatomic_read(&gf_async_ctrl.max_threads);
+ running = GF_ASYNC_COUNT_RUNNING(old);
+ while (running > max) {
+ /* There are too many threads. We try to stop the current worker. */
+ counts = uatomic_cmpxchg(&gf_async_ctrl.counts, old,
+ old + GF_ASYNC_COUNTS(-1, 1));
+ if (old != counts) {
+ /* Another thread has just updated the counts. We need to retry. */
+ old = counts;
+ running = GF_ASYNC_COUNT_RUNNING(old);
+
+ continue;
+ }
+
+ running--;
+ worker->running = false;
+ }
+
+ return running;
+}
+
+static void
+gf_async_stop_all(xlator_t *xl, gf_async_t *async)
+{
+ if (gf_async_stop_check(gf_async_current_worker) > 0) {
+ /* There are more workers running. We propagate the stop request to
+ * them. */
+ gf_async(async, xl, gf_async_stop_all);
+ }
+}
+
+static void
+gf_async_join(xlator_t *xl, gf_async_t *async)
+{
+ gf_async_worker_t *worker;
+
+ worker = caa_container_of(async, gf_async_worker_t, async);
+
+ gf_async_thread_wait(worker->thread);
+
+ cds_wfs_push(&gf_async_ctrl.available, &worker->stack);
+}
+
+static void
+gf_async_terminate(gf_async_worker_t *worker)
+{
+ uint32_t counts;
+
+ counts = uatomic_add_return(&gf_async_ctrl.counts, GF_ASYNC_COUNTS(0, -1));
+ if (counts == 0) {
+ /* This is the termination of the last worker thread. We need to
+ * synchronize the main thread that is waiting for all workers to
+ * finish. */
+ gf_async_ctrl.sync_thread = worker->thread;
+
+ gf_async_sync_now();
+ } else {
+ /* Force someone else to join this thread to release resources. */
+ gf_async(&worker->async, THIS, gf_async_join);
+ }
+}
+
+static void *
+gf_async_worker(void *arg)
+{
+ gf_async_worker_t *worker;
+
+ worker = (gf_async_worker_t *)arg;
+ gf_async_current_worker = worker;
+
+ worker->running = true;
+ do {
+ /* This thread does nothing until someone enables it to become a
+ * leader. */
+ gf_async_worker_wait();
+
+ /* This thread is now a leader. It will process jobs from the queue
+ * and, if necessary, enable another worker and transfer leadership
+ * to it. */
+ gf_async_leader_run();
+
+ /* This thread is not a leader anymore. It will continue processing
+ * queued jobs until it becomes empty. */
+ gf_async_worker_run();
+
+ /* Stop the current thread if there are too many threads running. */
+ gf_async_stop_check(worker);
+ } while (worker->running);
+
+ gf_async_terminate(worker);
+
+ return NULL;
+}
+
+static void
+gf_async_cleanup(void)
+{
+ /* We do some basic initialization of the global variable 'gf_async_ctrl'
+ * so that it's put into a relatively consistent state. */
+
+ gf_async_ctrl.enabled = false;
+
+ gf_async_ctrl.pid = 0;
+ gf_async_sigmask_empty(&gf_async_ctrl.sigmask_ctrl);
+ gf_async_sigmask_empty(&gf_async_ctrl.sigmask_queue);
+
+ /* This is used to later detect if the handler of these signals have been
+ * changed or not. */
+ gf_async_ctrl.handler_ctrl.sa_handler = gf_async_signal_handler;
+ gf_async_ctrl.handler_queue.sa_handler = gf_async_signal_handler;
+
+ gf_async_ctrl.table = NULL;
+ gf_async_ctrl.max_threads = 0;
+ gf_async_ctrl.counts = 0;
+}
+
+void
+gf_async_fini(void)
+{
+ gf_async_t async;
+
+ if (uatomic_read(&gf_async_ctrl.counts) != 0) {
+ /* We ensure that all threads will quit on the next check. */
+ gf_async_ctrl.max_threads = 0;
+
+ /* Send the stop request to the thread pool. This will cause the
+ * execution of gf_async_stop_all() by one of the worker threads which,
+ * eventually, will terminate all worker threads. */
+ gf_async(&async, THIS, gf_async_stop_all);
+
+ /* We synchronize here with the last thread. */
+ gf_async_sync_now();
+
+ /* We have just synchronized with the latest thread. Now just wait for
+ * it to terminate. */
+ gf_async_thread_wait(gf_async_ctrl.sync_thread);
+
+ gf_async_signal_flush();
+ }
+
+ gf_async_signal_restore();
+
+ gf_async_cleanup();
+}
+
+void
+gf_async_adjust_threads(int32_t threads)
+{
+ if (threads == 0) {
+ /* By default we allow a maximum of 2 * #cores worker threads. This
+ * value is to try to accommodate threads that will do some I/O. Having
+ * more threads than cores we can keep CPU busy even if some threads
+ * are blocked for I/O. In the most efficient case, we can have #cores
+ * computing threads and #cores blocked threads on I/O. However this is
+ * hard to achieve because we can end with more than #cores computing
+ * threads, which won't provide a real benefit and will increase
+ * contention.
+ *
+ * TODO: implement a more intelligent dynamic maximum based on CPU
+ * usage and/or system load. */
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * 2;
+ if (threads < 0) {
+ /* If we can't get the current number of processors, we pick a
+ * random number. */
+ threads = 16;
+ }
+ }
+ if (threads > GF_ASYNC_MAX_THREADS) {
+ threads = GF_ASYNC_MAX_THREADS;
+ }
+ uatomic_set(&gf_async_ctrl.max_threads, threads);
+}
+
+int32_t
+gf_async_init(glusterfs_ctx_t *ctx)
+{
+ sigset_t set;
+ gf_async_worker_t *worker;
+ uint32_t i;
+ int32_t ret;
+ bool running;
+
+ gf_async_cleanup();
+
+ if (!ctx->cmd_args.global_threading ||
+ (ctx->process_mode == GF_GLUSTERD_PROCESS)) {
+ return 0;
+ }
+
+ /* At the init time, the maximum number of threads has not yet been
+ * configured. We use a small starting value that will be layer dynamically
+ * adjusted when ctx->config.max_threads is updated. */
+ gf_async_adjust_threads(GF_ASYNC_SPARE_THREADS + 1);
+
+ gf_async_ctrl.pid = getpid();
+
+ __cds_wfs_init(&gf_async_ctrl.available);
+ cds_wfcq_init(&gf_async_ctrl.queue.head, &gf_async_ctrl.queue.tail);
+
+ gf_async_sync_init();
+
+ /* TODO: it would be cleaner to use dynamic memory, but at this point some
+ * memory management resources are not yet initialized. */
+ gf_async_ctrl.table = gf_async_workers;
+
+ /* We keep all workers in a stack. It will be used when a new thread needs
+ * to be created. */
+ for (i = GF_ASYNC_MAX_THREADS; i > 0; i--) {
+ worker = &gf_async_ctrl.table[i - 1];
+
+ worker->id = i - 1;
+ cds_wfs_node_init(&worker->stack);
+ cds_wfs_push(&gf_async_ctrl.available, &worker->stack);
+ }
+
+ /* Prepare the signal mask for regular workers and the leader. */
+ gf_async_sigmask_add(&gf_async_ctrl.sigmask_ctrl, GF_ASYNC_SIGCTRL);
+ gf_async_sigmask_add(&gf_async_ctrl.sigmask_queue, GF_ASYNC_SIGQUEUE);
+
+ /* TODO: this is needed to block our special signals in the current thread
+ * and all children that it starts. It would be cleaner to do it when
+ * signals are initialized, but there doesn't seem to be a unique
+ * place to do that, so for now we do it here. */
+ gf_async_sigmask_empty(&set);
+ gf_async_sigmask_add(&set, GF_ASYNC_SIGCTRL);
+ gf_async_sigmask_add(&set, GF_ASYNC_SIGQUEUE);
+ gf_async_sigmask_set(SIG_BLOCK, &set, NULL);
+
+ /* Configure the signal handlers. This is mostly for safety, not really
+ * needed, but it doesn't hurt. Note that the caller must ensure that the
+ * signals we need to run are already blocked in any thread already
+ * started. Otherwise this won't work. */
+ gf_async_signal_setup();
+
+ running = false;
+
+ /* We start the spare workers + 1 for the leader. */
+ for (i = 0; i < GF_ASYNC_SPARE_THREADS; i++) {
+ ret = gf_async_worker_create();
+ if (caa_unlikely(ret < 0)) {
+ /* This is the initial start up so we enforce that the spare
+ * threads are created. If this fails at the beginning, it's very
+ * unlikely that the async workers could do its job, so we abort
+ * the initialization. */
+ goto out;
+ }
+
+ /* Once the first thread is started, we can enable it to become the
+ * initial leader. */
+ if ((ret == 0) && !running) {
+ running = true;
+ gf_async_worker_enable();
+ }
+ }
+
+ if (caa_unlikely(!running)) {
+ gf_async_fatal(-ENOMEM, "No worker thread has started");
+ }
+
+ gf_async_ctrl.enabled = true;
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ gf_async_error(ret, "Unable to initialize the thread pool.");
+ gf_async_fini();
+ }
+
+ return ret;
+}
diff --git a/libglusterfs/src/byte-order.h b/libglusterfs/src/byte-order.h
deleted file mode 100644
index 4101db2c71d..00000000000
--- a/libglusterfs/src/byte-order.h
+++ /dev/null
@@ -1,301 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _BYTE_ORDER_H
-#define _BYTE_ORDER_H
-
-#include <inttypes.h>
-
-#define LS1 0x00ffU
-#define MS1 0xff00U
-#define LS2 0x0000ffffU
-#define MS2 0xffff0000U
-#define LS4 0x00000000ffffffffULL
-#define MS4 0xffffffff00000000ULL
-
-
-static uint16_t (*hton16) (uint16_t);
-static uint32_t (*hton32) (uint32_t);
-static uint64_t (*hton64) (uint64_t);
-
-#define ntoh16 hton16
-#define ntoh32 hton32
-#define ntoh64 hton64
-
-static uint16_t (*htole16) (uint16_t);
-static uint32_t (*htole32) (uint32_t);
-static uint64_t (*htole64) (uint64_t);
-
-#define letoh16 htole16
-#define letoh32 htole32
-#define letoh64 htole64
-
-static uint16_t (*htobe16) (uint16_t);
-static uint32_t (*htobe32) (uint32_t);
-static uint64_t (*htobe64) (uint64_t);
-
-#define betoh16 htobe16
-#define betoh32 htobe32
-#define betoh64 htobe64
-
-
-#define do_swap2(x) (((x&LS1) << 8)|(((x&MS1) >> 8)))
-#define do_swap4(x) ((do_swap2(x&LS2) << 16)|(do_swap2((x&MS2) >> 16)))
-#define do_swap8(x) ((do_swap4(x&LS4) << 32)|(do_swap4((x&MS4) >> 32)))
-
-
-static inline uint16_t
-__swap16 (uint16_t x)
-{
- return do_swap2(x);
-}
-
-
-static inline uint32_t
-__swap32 (uint32_t x)
-{
- return do_swap4(x);
-}
-
-
-static inline uint64_t
-__swap64 (uint64_t x)
-{
- return do_swap8(x);
-}
-
-
-static inline uint16_t
-__noswap16 (uint16_t x)
-{
- return x;
-}
-
-
-static inline uint32_t
-__noswap32 (uint32_t x)
-{
- return x;
-}
-
-
-static inline uint64_t
-__noswap64 (uint64_t x)
-{
- return x;
-}
-
-
-static inline uint16_t
-__byte_order_n16 (uint16_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- hton16 = __swap16;
- hton32 = __swap32;
- hton64 = __swap64;
- } else {
- /* cpu is be */
- hton16 = __noswap16;
- hton32 = __noswap32;
- hton64 = __noswap64;
- }
-
- return hton16 (i);
-}
-
-
-static inline uint32_t
-__byte_order_n32 (uint32_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- hton16 = __swap16;
- hton32 = __swap32;
- hton64 = __swap64;
- } else {
- /* cpu is be */
- hton16 = __noswap16;
- hton32 = __noswap32;
- hton64 = __noswap64;
- }
-
- return hton32 (i);
-}
-
-
-static inline uint64_t
-__byte_order_n64 (uint64_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- hton16 = __swap16;
- hton32 = __swap32;
- hton64 = __swap64;
- } else {
- /* cpu is be */
- hton16 = __noswap16;
- hton32 = __noswap32;
- hton64 = __noswap64;
- }
-
- return hton64 (i);
-}
-
-
-static uint16_t (*hton16) (uint16_t) = __byte_order_n16;
-static uint32_t (*hton32) (uint32_t) = __byte_order_n32;
-static uint64_t (*hton64) (uint64_t) = __byte_order_n64;
-
-
-static inline uint16_t
-__byte_order_le16 (uint16_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- htole16 = __noswap16;
- htole32 = __noswap32;
- htole64 = __noswap64;
- } else {
- /* cpu is be */
- htole16 = __swap16;
- htole32 = __swap32;
- htole64 = __swap64;
- }
-
- return htole16 (i);
-}
-
-
-static inline uint32_t
-__byte_order_le32 (uint32_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- htole16 = __noswap16;
- htole32 = __noswap32;
- htole64 = __noswap64;
- } else {
- /* cpu is be */
- htole16 = __swap16;
- htole32 = __swap32;
- htole64 = __swap64;
- }
-
- return htole32 (i);
-}
-
-
-static inline uint64_t
-__byte_order_le64 (uint64_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- htole16 = __noswap16;
- htole32 = __noswap32;
- htole64 = __noswap64;
- } else {
- /* cpu is be */
- htole16 = __swap16;
- htole32 = __swap32;
- htole64 = __swap64;
- }
-
- return htole64 (i);
-}
-
-
-static uint16_t (*htole16) (uint16_t) = __byte_order_le16;
-static uint32_t (*htole32) (uint32_t) = __byte_order_le32;
-static uint64_t (*htole64) (uint64_t) = __byte_order_le64;
-
-
-static inline uint16_t
-__byte_order_be16 (uint16_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- htobe16 = __swap16;
- htobe32 = __swap32;
- htobe64 = __swap64;
- } else {
- /* cpu is be */
- htobe16 = __noswap16;
- htobe32 = __noswap32;
- htobe64 = __noswap64;
- }
-
- return htobe16 (i);
-}
-
-
-static inline uint32_t
-__byte_order_be32 (uint32_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- htobe16 = __swap16;
- htobe32 = __swap32;
- htobe64 = __swap64;
- } else {
- /* cpu is be */
- htobe16 = __noswap16;
- htobe32 = __noswap32;
- htobe64 = __noswap64;
- }
-
- return htobe32 (i);
-}
-
-
-static inline uint64_t
-__byte_order_be64 (uint64_t i)
-{
- uint32_t num = 1;
-
- if (((char *)(&num))[0] == 1) {
- /* cpu is le */
- htobe16 = __swap16;
- htobe32 = __swap32;
- htobe64 = __swap64;
- } else {
- /* cpu is be */
- htobe16 = __noswap16;
- htobe32 = __noswap32;
- htobe64 = __noswap64;
- }
-
- return htobe64 (i);
-}
-
-
-static uint16_t (*htobe16) (uint16_t) = __byte_order_be16;
-static uint32_t (*htobe32) (uint32_t) = __byte_order_be32;
-static uint64_t (*htobe64) (uint64_t) = __byte_order_be64;
-
-
-
-#endif /* _BYTE_ORDER_H */
diff --git a/libglusterfs/src/call-stub.c b/libglusterfs/src/call-stub.c
index 85a1aaa7b5c..ee84f08acd4 100644
--- a/libglusterfs/src/call-stub.c
+++ b/libglusterfs/src/call-stub.c
@@ -8,4153 +8,2460 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <openssl/md5.h>
#include <inttypes.h>
-#include "call-stub.h"
-#include "mem-types.h"
-
+#include "glusterfs/call-stub.h"
+#include "glusterfs/mem-types.h"
+#include "glusterfs/libglusterfs-messages.h"
static call_stub_t *
-stub_new (call_frame_t *frame,
- char wind,
- glusterfs_fop_t fop)
+stub_new(call_frame_t *frame, const char wind, const glusterfs_fop_t fop)
{
- call_stub_t *new = NULL;
+ call_stub_t *new = NULL;
+
+ GF_VALIDATE_OR_GOTO("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ new = mem_get0(frame->this->ctx->stub_mem_pool);
+ GF_VALIDATE_OR_GOTO("call-stub", new, out);
- new = mem_get0 (frame->this->ctx->stub_mem_pool);
- GF_VALIDATE_OR_GOTO ("call-stub", new, out);
+ new->frame = frame;
+ new->wind = wind;
+ new->fop = fop;
+ new->stub_mem_pool = frame->this->ctx->stub_mem_pool;
+ INIT_LIST_HEAD(&new->list);
- new->frame = frame;
- new->wind = wind;
- new->fop = fop;
- new->stub_mem_pool = frame->this->ctx->stub_mem_pool;
- INIT_LIST_HEAD (&new->list);
+ INIT_LIST_HEAD(&new->args_cbk.entries);
out:
- return new;
+ return new;
}
-
call_stub_t *
-fop_lookup_stub (call_frame_t *frame,
- fop_lookup_t fn,
- loc_t *loc,
- dict_t *xdata)
+fop_lookup_stub(call_frame_t *frame, fop_lookup_t fn, loc_t *loc, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_LOOKUP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.lookup.fn = fn;
-
- loc_copy (&stub->args.lookup.loc, loc);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_LOOKUP);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.lookup = fn;
+ args_lookup_store(&stub->args, loc, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_lookup_cbk_stub (call_frame_t *frame,
- fop_lookup_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *xdata,
- struct iatt *postparent)
+fop_lookup_cbk_stub(call_frame_t *frame, fop_lookup_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_LOOKUP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.lookup_cbk.fn = fn;
- stub->args.lookup_cbk.op_ret = op_ret;
- stub->args.lookup_cbk.op_errno = op_errno;
- if (inode)
- stub->args.lookup_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.lookup_cbk.buf = *buf;
- if (postparent)
- stub->args.lookup_cbk.postparent = *postparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_LOOKUP);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.lookup = fn;
+ args_lookup_cbk_store(&stub->args_cbk, op_ret, op_errno, inode, buf, xdata,
+ postparent);
out:
- return stub;
+ return stub;
}
-
-
call_stub_t *
-fop_stat_stub (call_frame_t *frame,
- fop_stat_t fn,
- loc_t *loc, dict_t *xdata)
+fop_stat_stub(call_frame_t *frame, fop_stat_t fn, loc_t *loc, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_STAT);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.stat.fn = fn;
- loc_copy (&stub->args.stat.loc, loc);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_STAT);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.stat = fn;
+ args_stat_store(&stub->args, loc, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_stat_cbk_stub (call_frame_t *frame,
- fop_stat_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf, dict_t *xdata)
+fop_stat_cbk_stub(call_frame_t *frame, fop_stat_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_STAT);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.stat_cbk.fn = fn;
- stub->args.stat_cbk.op_ret = op_ret;
- stub->args.stat_cbk.op_errno = op_errno;
- if (op_ret == 0)
- stub->args.stat_cbk.buf = *buf;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_STAT);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.stat = fn;
+ args_stat_cbk_store(&stub->args_cbk, op_ret, op_errno, buf, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fstat_stub (call_frame_t *frame,
- fop_fstat_t fn,
- fd_t *fd, dict_t *xdata)
+fop_fstat_stub(call_frame_t *frame, fop_fstat_t fn, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new(frame, 1, GF_FOP_FSTAT);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub = stub_new (frame, 1, GF_FOP_FSTAT);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn.fstat = fn;
+ args_fstat_store(&stub->args, fd, xdata);
+out:
+ return stub;
+}
- stub->args.fstat.fn = fn;
+call_stub_t *
+fop_fstat_cbk_stub(call_frame_t *frame, fop_fstat_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (fd)
- stub->args.fstat.fd = fd_ref (fd);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_FSTAT);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.fstat = fn;
+ args_fstat_cbk_store(&stub->args_cbk, op_ret, op_errno, buf, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fstat_cbk_stub (call_frame_t *frame,
- fop_fstat_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf, dict_t *xdata)
+fop_truncate_stub(call_frame_t *frame, fop_truncate_t fn, loc_t *loc, off_t off,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub = stub_new (frame, 0, GF_FOP_FSTAT);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.fstat_cbk.fn = fn;
- stub->args.fstat_cbk.op_ret = op_ret;
- stub->args.fstat_cbk.op_errno = op_errno;
- if (buf)
- stub->args.fstat_cbk.buf = *buf;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_TRUNCATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.truncate = fn;
+ args_truncate_store(&stub->args, loc, off, xdata);
out:
- return stub;
+ return stub;
}
+call_stub_t *
+fop_truncate_cbk_stub(call_frame_t *frame, fop_truncate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
-/* truncate */
+ stub = stub_new(frame, 0, GF_FOP_TRUNCATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+
+ stub->fn_cbk.truncate = fn;
+ args_truncate_cbk_store(&stub->args_cbk, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+out:
+ return stub;
+}
call_stub_t *
-fop_truncate_stub (call_frame_t *frame,
- fop_truncate_t fn,
- loc_t *loc,
- off_t off, dict_t *xdata)
+fop_ftruncate_stub(call_frame_t *frame, fop_ftruncate_t fn, fd_t *fd, off_t off,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_TRUNCATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 1, GF_FOP_FTRUNCATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.truncate.fn = fn;
- loc_copy (&stub->args.truncate.loc, loc);
- stub->args.truncate.off = off;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn.ftruncate = fn;
+ args_ftruncate_store(&stub->args, fd, off, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_truncate_cbk_stub (call_frame_t *frame,
- fop_truncate_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
+fop_ftruncate_cbk_stub(call_frame_t *frame, fop_ftruncate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_TRUNCATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 0, GF_FOP_FTRUNCATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.truncate_cbk.fn = fn;
- stub->args.truncate_cbk.op_ret = op_ret;
- stub->args.truncate_cbk.op_errno = op_errno;
- if (prebuf)
- stub->args.truncate_cbk.prebuf = *prebuf;
- if (postbuf)
- stub->args.truncate_cbk.postbuf = *postbuf;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn_cbk.ftruncate = fn;
+ args_ftruncate_cbk_store(&stub->args_cbk, op_ret, op_errno, prebuf, postbuf,
+ xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_ftruncate_stub (call_frame_t *frame,
- fop_ftruncate_t fn,
- fd_t *fd,
- off_t off, dict_t *xdata)
+fop_access_stub(call_frame_t *frame, fop_access_t fn, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_FTRUNCATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.ftruncate.fn = fn;
- if (fd)
- stub->args.ftruncate.fd = fd_ref (fd);
-
- stub->args.ftruncate.off = off;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_ACCESS);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.access = fn;
+ args_access_store(&stub->args, loc, mask, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_ftruncate_cbk_stub (call_frame_t *frame,
- fop_ftruncate_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+fop_access_cbk_stub(call_frame_t *frame, fop_access_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_FTRUNCATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.ftruncate_cbk.fn = fn;
- stub->args.ftruncate_cbk.op_ret = op_ret;
- stub->args.ftruncate_cbk.op_errno = op_errno;
- if (prebuf)
- stub->args.ftruncate_cbk.prebuf = *prebuf;
- if (postbuf)
- stub->args.ftruncate_cbk.postbuf = *postbuf;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_ACCESS);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.access = fn;
+ args_access_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_access_stub (call_frame_t *frame,
- fop_access_t fn,
- loc_t *loc,
- int32_t mask, dict_t *xdata)
+fop_readlink_stub(call_frame_t *frame, fop_readlink_t fn, loc_t *loc,
+ size_t size, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_ACCESS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.access.fn = fn;
- loc_copy (&stub->args.access.loc, loc);
- stub->args.access.mask = mask;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_READLINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.readlink = fn;
+ args_readlink_store(&stub->args, loc, size, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_access_cbk_stub (call_frame_t *frame,
- fop_access_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_readlink_cbk_stub(call_frame_t *frame, fop_readlink_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *stbuf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_ACCESS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.access_cbk.fn = fn;
- stub->args.access_cbk.op_ret = op_ret;
- stub->args.access_cbk.op_errno = op_errno;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_READLINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.readlink = fn;
+ args_readlink_cbk_store(&stub->args_cbk, op_ret, op_errno, path, stbuf,
+ xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_readlink_stub (call_frame_t *frame,
- fop_readlink_t fn,
- loc_t *loc,
- size_t size, dict_t *xdata)
+fop_mknod_stub(call_frame_t *frame, fop_mknod_t fn, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_READLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.readlink.fn = fn;
- loc_copy (&stub->args.readlink.loc, loc);
- stub->args.readlink.size = size;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_MKNOD);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.mknod = fn;
+ args_mknod_store(&stub->args, loc, mode, rdev, umask, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_readlink_cbk_stub (call_frame_t *frame,
- fop_readlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *sbuf, dict_t *xdata)
+fop_mknod_cbk_stub(call_frame_t *frame, fop_mknod_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_READLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.readlink_cbk.fn = fn;
- stub->args.readlink_cbk.op_ret = op_ret;
- stub->args.readlink_cbk.op_errno = op_errno;
- if (path)
- stub->args.readlink_cbk.buf = gf_strdup (path);
- if (sbuf)
- stub->args.readlink_cbk.sbuf = *sbuf;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_MKNOD);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.mknod = fn;
+ args_mknod_cbk_store(&stub->args_cbk, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_mknod_stub (call_frame_t *frame, fop_mknod_t fn, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata)
+fop_mkdir_stub(call_frame_t *frame, fop_mkdir_t fn, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_MKNOD);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.mknod.fn = fn;
- loc_copy (&stub->args.mknod.loc, loc);
- stub->args.mknod.mode = mode;
- stub->args.mknod.rdev = rdev;
- stub->args.mknod.umask = umask;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_MKDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.mkdir = fn;
+ args_mkdir_store(&stub->args, loc, mode, umask, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_mknod_cbk_stub (call_frame_t *frame,
- fop_mknod_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_mkdir_cbk_stub(call_frame_t *frame, fop_mkdir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_MKNOD);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.mknod_cbk.fn = fn;
- stub->args.mknod_cbk.op_ret = op_ret;
- stub->args.mknod_cbk.op_errno = op_errno;
- if (inode)
- stub->args.mknod_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.mknod_cbk.buf = *buf;
- if (preparent)
- stub->args.mknod_cbk.preparent = *preparent;
- if (postparent)
- stub->args.mknod_cbk.postparent = *postparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ call_stub_t *stub = NULL;
+
+ stub = stub_new(frame, 0, GF_FOP_MKDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.mkdir = fn;
+ args_mkdir_cbk_store(&stub->args_cbk, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_mkdir_stub (call_frame_t *frame, fop_mkdir_t fn,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
+fop_unlink_stub(call_frame_t *frame, fop_unlink_t fn, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_MKDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.mkdir.fn = fn;
- loc_copy (&stub->args.mkdir.loc, loc);
- stub->args.mkdir.mode = mode;
- stub->args.mkdir.umask = umask;
+ stub = stub_new(frame, 1, GF_FOP_UNLINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn.unlink = fn;
+ args_unlink_store(&stub->args, loc, xflag, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_mkdir_cbk_stub (call_frame_t *frame,
- fop_mkdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
+fop_unlink_cbk_stub(call_frame_t *frame, fop_unlink_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_MKDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.mkdir_cbk.fn = fn;
- stub->args.mkdir_cbk.op_ret = op_ret;
- stub->args.mkdir_cbk.op_errno = op_errno;
- if (inode)
- stub->args.mkdir_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.mkdir_cbk.buf = *buf;
- if (preparent)
- stub->args.mkdir_cbk.preparent = *preparent;
- if (postparent)
- stub->args.mkdir_cbk.postparent = *postparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ call_stub_t *stub = NULL;
+
+ stub = stub_new(frame, 0, GF_FOP_UNLINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.unlink = fn;
+ args_unlink_cbk_store(&stub->args_cbk, op_ret, op_errno, preparent,
+ postparent, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_unlink_stub (call_frame_t *frame,
- fop_unlink_t fn,
- loc_t *loc, int xflag, dict_t *xdata)
+fop_rmdir_stub(call_frame_t *frame, fop_rmdir_t fn, loc_t *loc, int flags,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub = stub_new (frame, 1, GF_FOP_UNLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 1, GF_FOP_RMDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.unlink.fn = fn;
- loc_copy (&stub->args.unlink.loc, loc);
- stub->args.unlink.xflag = xflag;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn.rmdir = fn;
+ args_rmdir_store(&stub->args, loc, flags, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_unlink_cbk_stub (call_frame_t *frame,
- fop_unlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_rmdir_cbk_stub(call_frame_t *frame, fop_rmdir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_UNLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- stub->args.unlink_cbk.fn = fn;
- stub->args.unlink_cbk.op_ret = op_ret;
- stub->args.unlink_cbk.op_errno = op_errno;
- if (preparent)
- stub->args.unlink_cbk.preparent = *preparent;
- if (postparent)
- stub->args.unlink_cbk.postparent = *postparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_RMDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.rmdir = fn;
+ args_rmdir_cbk_store(&stub->args_cbk, op_ret, op_errno, preparent,
+ postparent, xdata);
out:
- return stub;
+ return stub;
}
-
-
call_stub_t *
-fop_rmdir_stub (call_frame_t *frame, fop_rmdir_t fn,
- loc_t *loc, int flags, dict_t *xdata)
+fop_symlink_stub(call_frame_t *frame, fop_symlink_t fn, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_RMDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO("call-stub", linkname, out);
- stub->args.rmdir.fn = fn;
- loc_copy (&stub->args.rmdir.loc, loc);
- stub->args.rmdir.flags = flags;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_SYMLINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.symlink = fn;
+ args_symlink_store(&stub->args, linkname, loc, umask, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_rmdir_cbk_stub (call_frame_t *frame,
- fop_rmdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_symlink_cbk_stub(call_frame_t *frame, fop_symlink_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_RMDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.rmdir_cbk.fn = fn;
- stub->args.rmdir_cbk.op_ret = op_ret;
- stub->args.rmdir_cbk.op_errno = op_errno;
- if (preparent)
- stub->args.rmdir_cbk.preparent = *preparent;
- if (postparent)
- stub->args.rmdir_cbk.postparent = *postparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_SYMLINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.symlink = fn;
+ args_symlink_cbk_store(&stub->args_cbk, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_symlink_stub (call_frame_t *frame, fop_symlink_t fn,
- const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata)
+fop_rename_stub(call_frame_t *frame, fop_rename_t fn, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- GF_VALIDATE_OR_GOTO ("call-stub", linkname, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_SYMLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", oldloc, out);
+ GF_VALIDATE_OR_GOTO("call-stub", newloc, out);
- stub->args.symlink.fn = fn;
- stub->args.symlink.linkname = gf_strdup (linkname);
- stub->args.symlink.umask = umask;
- loc_copy (&stub->args.symlink.loc, loc);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_RENAME);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.rename = fn;
+ args_rename_store(&stub->args, oldloc, newloc, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_symlink_cbk_stub (call_frame_t *frame,
- fop_symlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_rename_cbk_stub(call_frame_t *frame, fop_rename_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_SYMLINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.symlink_cbk.fn = fn;
- stub->args.symlink_cbk.op_ret = op_ret;
- stub->args.symlink_cbk.op_errno = op_errno;
- if (inode)
- stub->args.symlink_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.symlink_cbk.buf = *buf;
- if (preparent)
- stub->args.symlink_cbk.preparent = *preparent;
- if (postparent)
- stub->args.symlink_cbk.postparent = *postparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ call_stub_t *stub = NULL;
+
+ stub = stub_new(frame, 0, GF_FOP_RENAME);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.rename = fn;
+ args_rename_cbk_store(&stub->args_cbk, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_rename_stub (call_frame_t *frame,
- fop_rename_t fn,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
+fop_link_stub(call_frame_t *frame, fop_link_t fn, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", oldloc, out);
- GF_VALIDATE_OR_GOTO ("call-stub", newloc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_RENAME);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", oldloc, out);
+ GF_VALIDATE_OR_GOTO("call-stub", newloc, out);
- stub->args.rename.fn = fn;
- loc_copy (&stub->args.rename.old, oldloc);
- loc_copy (&stub->args.rename.new, newloc);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_LINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.link = fn;
+ args_link_store(&stub->args, oldloc, newloc, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_rename_cbk_stub (call_frame_t *frame,
- fop_rename_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent, dict_t *xdata)
+fop_link_cbk_stub(call_frame_t *frame, fop_link_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_RENAME);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.rename_cbk.fn = fn;
- stub->args.rename_cbk.op_ret = op_ret;
- stub->args.rename_cbk.op_errno = op_errno;
- if (buf)
- stub->args.rename_cbk.buf = *buf;
- if (preoldparent)
- stub->args.rename_cbk.preoldparent = *preoldparent;
- if (postoldparent)
- stub->args.rename_cbk.postoldparent = *postoldparent;
- if (prenewparent)
- stub->args.rename_cbk.prenewparent = *prenewparent;
- if (postnewparent)
- stub->args.rename_cbk.postnewparent = *postnewparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ call_stub_t *stub = NULL;
+
+ stub = stub_new(frame, 0, GF_FOP_LINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.link = fn;
+ args_link_cbk_store(&stub->args_cbk, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_link_stub (call_frame_t *frame,
- fop_link_t fn,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
+fop_create_stub(call_frame_t *frame, fop_create_t fn, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", oldloc, out);
- GF_VALIDATE_OR_GOTO ("call-stub", newloc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_LINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.link.fn = fn;
- loc_copy (&stub->args.link.oldloc, oldloc);
- loc_copy (&stub->args.link.newloc, newloc);
-
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_CREATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.create = fn;
+ args_create_store(&stub->args, loc, flags, mode, umask, fd, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_link_cbk_stub (call_frame_t *frame,
- fop_link_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_create_cbk_stub(call_frame_t *frame, fop_create_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_LINK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.link_cbk.fn = fn;
- stub->args.link_cbk.op_ret = op_ret;
- stub->args.link_cbk.op_errno = op_errno;
- if (inode)
- stub->args.link_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.link_cbk.buf = *buf;
- if (preparent)
- stub->args.link_cbk.preparent = *preparent;
- if (postparent)
- stub->args.link_cbk.postparent = *postparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ call_stub_t *stub = NULL;
+
+ stub = stub_new(frame, 0, GF_FOP_CREATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.create = fn;
+ args_create_cbk_store(&stub->args_cbk, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_create_stub (call_frame_t *frame, fop_create_t fn,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata)
+fop_open_stub(call_frame_t *frame, fop_open_t fn, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_CREATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.create.fn = fn;
- loc_copy (&stub->args.create.loc, loc);
- stub->args.create.flags = flags;
- stub->args.create.mode = mode;
- stub->args.create.umask = umask;
- if (fd)
- stub->args.create.fd = fd_ref (fd);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_OPEN);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.open = fn;
+ args_open_store(&stub->args, loc, flags, fd, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_create_cbk_stub (call_frame_t *frame,
- fop_create_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+fop_open_cbk_stub(call_frame_t *frame, fop_open_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_CREATE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.create_cbk.fn = fn;
- stub->args.create_cbk.op_ret = op_ret;
- stub->args.create_cbk.op_errno = op_errno;
- if (fd)
- stub->args.create_cbk.fd = fd_ref (fd);
- if (inode)
- stub->args.create_cbk.inode = inode_ref (inode);
- if (buf)
- stub->args.create_cbk.buf = *buf;
- if (preparent)
- stub->args.create_cbk.preparent = *preparent;
- if (postparent)
- stub->args.create_cbk.postparent = *postparent;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ call_stub_t *stub = NULL;
+
+ stub = stub_new(frame, 0, GF_FOP_OPEN);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.open = fn;
+ args_open_cbk_store(&stub->args_cbk, op_ret, op_errno, fd, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_open_stub (call_frame_t *frame,
- fop_open_t fn,
- loc_t *loc,
- int32_t flags, fd_t *fd,
- dict_t *xdata)
+fop_readv_stub(call_frame_t *frame, fop_readv_t fn, fd_t *fd, size_t size,
+ off_t off, uint32_t flags, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
-
- stub = stub_new (frame, 1, GF_FOP_OPEN);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- stub->args.open.fn = fn;
- loc_copy (&stub->args.open.loc, loc);
- stub->args.open.flags = flags;
- if (fd)
- stub->args.open.fd = fd_ref (fd);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_READ);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.readv = fn;
+ args_readv_store(&stub->args, fd, size, off, flags, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_open_cbk_stub (call_frame_t *frame,
- fop_open_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd, dict_t *xdata)
+fop_readv_cbk_stub(call_frame_t *frame, fop_readv_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_OPEN);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.open_cbk.fn = fn;
- stub->args.open_cbk.op_ret = op_ret;
- stub->args.open_cbk.op_errno = op_errno;
- if (fd)
- stub->args.open_cbk.fd = fd_ref (fd);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_READ);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.readv = fn;
+ args_readv_cbk_store(&stub->args_cbk, op_ret, op_errno, vector, count,
+ stbuf, iobref, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_readv_stub (call_frame_t *frame,
- fop_readv_t fn,
- fd_t *fd,
- size_t size,
- off_t off, uint32_t flags, dict_t *xdata)
+fop_writev_stub(call_frame_t *frame, fop_writev_t fn, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO("call-stub", vector, out);
- stub = stub_new (frame, 1, GF_FOP_READ);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.readv.fn = fn;
- if (fd)
- stub->args.readv.fd = fd_ref (fd);
- stub->args.readv.size = size;
- stub->args.readv.off = off;
- stub->args.readv.flags = flags;
-
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_WRITE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.writev = fn;
+ args_writev_store(&stub->args, fd, vector, count, off, flags, iobref,
+ xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_readv_cbk_stub (call_frame_t *frame,
- fop_readv_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref, dict_t *xdata)
+fop_writev_cbk_stub(call_frame_t *frame, fop_writev_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_READ);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- stub->args.readv_cbk.fn = fn;
- stub->args.readv_cbk.op_ret = op_ret;
- stub->args.readv_cbk.op_errno = op_errno;
- if (op_ret >= 0) {
- stub->args.readv_cbk.vector = iov_dup (vector, count);
- stub->args.readv_cbk.count = count;
- stub->args.readv_cbk.stbuf = *stbuf;
- stub->args.readv_cbk.iobref = iobref_ref (iobref);
- }
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_WRITE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.writev = fn;
+ args_writev_cbk_store(&stub->args_cbk, op_ret, op_errno, prebuf, postbuf,
+ xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_writev_stub (call_frame_t *frame,
- fop_writev_t fn,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
+fop_flush_stub(call_frame_t *frame, fop_flush_t fn, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", vector, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_WRITE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.writev.fn = fn;
- if (fd)
- stub->args.writev.fd = fd_ref (fd);
- stub->args.writev.vector = iov_dup (vector, count);
- stub->args.writev.count = count;
- stub->args.writev.off = off;
- stub->args.writev.flags = flags;
- stub->args.writev.iobref = iobref_ref (iobref);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_FLUSH);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.flush = fn;
+ args_flush_store(&stub->args, fd, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_writev_cbk_stub (call_frame_t *frame,
- fop_writev_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+fop_flush_cbk_stub(call_frame_t *frame, fop_flush_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_WRITE);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.writev_cbk.fn = fn;
- stub->args.writev_cbk.op_ret = op_ret;
- stub->args.writev_cbk.op_errno = op_errno;
- if (op_ret >= 0)
- stub->args.writev_cbk.postbuf = *postbuf;
- if (prebuf)
- stub->args.writev_cbk.prebuf = *prebuf;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_FLUSH);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.flush = fn;
+ args_flush_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
-
-
call_stub_t *
-fop_flush_stub (call_frame_t *frame,
- fop_flush_t fn,
- fd_t *fd, dict_t *xdata)
+fop_fsync_stub(call_frame_t *frame, fop_fsync_t fn, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new(frame, 1, GF_FOP_FSYNC);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub = stub_new (frame, 1, GF_FOP_FLUSH);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn.fsync = fn;
+ args_fsync_store(&stub->args, fd, datasync, xdata);
+out:
+ return stub;
+}
- stub->args.flush.fn = fn;
- if (fd)
- stub->args.flush.fd = fd_ref (fd);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+call_stub_t *
+fop_fsync_cbk_stub(call_frame_t *frame, fop_fsync_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ stub = stub_new(frame, 0, GF_FOP_FSYNC);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+
+ stub->fn_cbk.fsync = fn;
+ args_fsync_cbk_store(&stub->args_cbk, op_ret, op_errno, prebuf, postbuf,
+ xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_flush_cbk_stub (call_frame_t *frame,
- fop_flush_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_opendir_stub(call_frame_t *frame, fop_opendir_t fn, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub = stub_new (frame, 0, GF_FOP_FLUSH);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.flush_cbk.fn = fn;
- stub->args.flush_cbk.op_ret = op_ret;
- stub->args.flush_cbk.op_errno = op_errno;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_OPENDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.opendir = fn;
+ args_opendir_store(&stub->args, loc, fd, xdata);
out:
- return stub;
+ return stub;
}
+call_stub_t *
+fop_opendir_cbk_stub(call_frame_t *frame, fop_opendir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ stub = stub_new(frame, 0, GF_FOP_OPENDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.opendir = fn;
+ args_opendir_cbk_store(&stub->args_cbk, op_ret, op_errno, fd, xdata);
+out:
+ return stub;
+}
call_stub_t *
-fop_fsync_stub (call_frame_t *frame,
- fop_fsync_t fn,
- fd_t *fd,
- int32_t datasync, dict_t *xdata)
+fop_fsyncdir_stub(call_frame_t *frame, fop_fsyncdir_t fn, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_FSYNC);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.fsync.fn = fn;
- if (fd)
- stub->args.fsync.fd = fd_ref (fd);
- stub->args.fsync.datasync = datasync;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_FSYNCDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.fsyncdir = fn;
+ args_fsyncdir_store(&stub->args, fd, datasync, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fsync_cbk_stub (call_frame_t *frame,
- fop_fsync_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+fop_fsyncdir_cbk_stub(call_frame_t *frame, fop_fsyncdir_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_FSYNC);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.fsync_cbk.fn = fn;
- stub->args.fsync_cbk.op_ret = op_ret;
- stub->args.fsync_cbk.op_errno = op_errno;
- if (prebuf)
- stub->args.fsync_cbk.prebuf = *prebuf;
- if (postbuf)
- stub->args.fsync_cbk.postbuf = *postbuf;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_FSYNCDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.fsyncdir = fn;
+ args_fsyncdir_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_opendir_stub (call_frame_t *frame,
- fop_opendir_t fn,
- loc_t *loc, fd_t *fd, dict_t *xdata)
+fop_statfs_stub(call_frame_t *frame, fop_statfs_t fn, loc_t *loc, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_OPENDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.opendir.fn = fn;
- loc_copy (&stub->args.opendir.loc, loc);
- if (fd)
- stub->args.opendir.fd = fd_ref (fd);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_STATFS);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.statfs = fn;
+ args_statfs_store(&stub->args, loc, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_opendir_cbk_stub (call_frame_t *frame,
- fop_opendir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd, dict_t *xdata)
+fop_statfs_cbk_stub(call_frame_t *frame, fop_statfs_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct statvfs *buf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_OPENDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.opendir_cbk.fn = fn;
- stub->args.opendir_cbk.op_ret = op_ret;
- stub->args.opendir_cbk.op_errno = op_errno;
-
- if (fd)
- stub->args.opendir_cbk.fd = fd_ref (fd);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_STATFS);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.statfs = fn;
+ args_statfs_cbk_store(&stub->args_cbk, op_ret, op_errno, buf, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fsyncdir_stub (call_frame_t *frame,
- fop_fsyncdir_t fn,
- fd_t *fd,
- int32_t datasync, dict_t *xdata)
+fop_setxattr_stub(call_frame_t *frame, fop_setxattr_t fn, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_FSYNCDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.fsyncdir.fn = fn;
- if (fd)
- stub->args.fsyncdir.fd = fd_ref (fd);
- stub->args.fsyncdir.datasync = datasync;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_SETXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.setxattr = fn;
+ args_setxattr_store(&stub->args, loc, dict, flags, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fsyncdir_cbk_stub (call_frame_t *frame,
- fop_fsyncdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_setxattr_cbk_stub(call_frame_t *frame, fop_setxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_FSYNCDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- stub->args.fsyncdir_cbk.fn = fn;
- stub->args.fsyncdir_cbk.op_ret = op_ret;
- stub->args.fsyncdir_cbk.op_errno = op_errno;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_SETXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.setxattr = fn;
+ args_setxattr_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_statfs_stub (call_frame_t *frame,
- fop_statfs_t fn,
- loc_t *loc, dict_t *xdata)
+fop_getxattr_stub(call_frame_t *frame, fop_getxattr_t fn, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_STATFS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
- stub->args.statfs.fn = fn;
- loc_copy (&stub->args.statfs.loc, loc);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_GETXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.getxattr = fn;
+ args_getxattr_store(&stub->args, loc, name, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_statfs_cbk_stub (call_frame_t *frame,
- fop_statfs_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf, dict_t *xdata)
+fop_getxattr_cbk_stub(call_frame_t *frame, fop_getxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_STATFS);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.statfs_cbk.fn = fn;
- stub->args.statfs_cbk.op_ret = op_ret;
- stub->args.statfs_cbk.op_errno = op_errno;
- if (op_ret == 0)
- stub->args.statfs_cbk.buf = *buf;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_GETXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.getxattr = fn;
+ args_getxattr_cbk_store(&stub->args_cbk, op_ret, op_errno, dict, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_setxattr_stub (call_frame_t *frame,
- fop_setxattr_t fn,
- loc_t *loc,
- dict_t *dict,
- int32_t flags, dict_t *xdata)
+fop_fsetxattr_stub(call_frame_t *frame, fop_fsetxattr_t fn, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_SETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fd, out);
- stub->args.setxattr.fn = fn;
- loc_copy (&stub->args.setxattr.loc, loc);
- /* TODO */
- if (dict)
- stub->args.setxattr.dict = dict_ref (dict);
- stub->args.setxattr.flags = flags;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_FSETXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.fsetxattr = fn;
+ args_fsetxattr_store(&stub->args, fd, dict, flags, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_setxattr_cbk_stub (call_frame_t *frame,
- fop_setxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_fsetxattr_cbk_stub(call_frame_t *frame, fop_fsetxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_SETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.setxattr_cbk.fn = fn;
- stub->args.setxattr_cbk.op_ret = op_ret;
- stub->args.setxattr_cbk.op_errno = op_errno;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_FSETXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.fsetxattr = fn;
+ args_fsetxattr_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_getxattr_stub (call_frame_t *frame,
- fop_getxattr_t fn,
- loc_t *loc,
+fop_fgetxattr_stub(call_frame_t *frame, fop_fgetxattr_t fn, fd_t *fd,
const char *name, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
-
- stub = stub_new (frame, 1, GF_FOP_GETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- stub->args.getxattr.fn = fn;
- loc_copy (&stub->args.getxattr.loc, loc);
+ GF_VALIDATE_OR_GOTO("call-stub", fd, out);
- if (name)
- stub->args.getxattr.name = gf_strdup (name);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_FGETXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.fgetxattr = fn;
+ args_fgetxattr_store(&stub->args, fd, name, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_getxattr_cbk_stub (call_frame_t *frame,
- fop_getxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+fop_fgetxattr_cbk_stub(call_frame_t *frame, fop_fgetxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_GETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.getxattr_cbk.fn = fn;
- stub->args.getxattr_cbk.op_ret = op_ret;
- stub->args.getxattr_cbk.op_errno = op_errno;
- /* TODO */
- if (dict)
- stub->args.getxattr_cbk.dict = dict_ref (dict);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_GETXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.fgetxattr = fn;
+ args_fgetxattr_cbk_store(&stub->args_cbk, op_ret, op_errno, dict, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fsetxattr_stub (call_frame_t *frame,
- fop_fsetxattr_t fn,
- fd_t *fd,
- dict_t *dict,
- int32_t flags, dict_t *xdata)
+fop_removexattr_stub(call_frame_t *frame, fop_removexattr_t fn, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", fd, out);
+ GF_VALIDATE_OR_GOTO("call-stub", loc, out);
+ GF_VALIDATE_OR_GOTO("call-stub", name, out);
- stub = stub_new (frame, 1, GF_FOP_FSETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.fsetxattr.fn = fn;
- stub->args.fsetxattr.fd = fd_ref (fd);
-
- /* TODO */
- if (dict)
- stub->args.fsetxattr.dict = dict_ref (dict);
- stub->args.fsetxattr.flags = flags;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_REMOVEXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.removexattr = fn;
+ args_removexattr_store(&stub->args, loc, name, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fsetxattr_cbk_stub (call_frame_t *frame,
- fop_fsetxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_removexattr_cbk_stub(call_frame_t *frame, fop_removexattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_FSETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.fsetxattr_cbk.fn = fn;
- stub->args.fsetxattr_cbk.op_ret = op_ret;
- stub->args.fsetxattr_cbk.op_errno = op_errno;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_REMOVEXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.removexattr = fn;
+ args_removexattr_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fgetxattr_stub (call_frame_t *frame,
- fop_fgetxattr_t fn,
- fd_t *fd,
- const char *name, dict_t *xdata)
+fop_fremovexattr_stub(call_frame_t *frame, fop_fremovexattr_t fn, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", fd, out);
-
- stub = stub_new (frame, 1, GF_FOP_FGETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- stub->args.fgetxattr.fn = fn;
- stub->args.fgetxattr.fd = fd_ref (fd);
+ GF_VALIDATE_OR_GOTO("call-stub", fd, out);
+ GF_VALIDATE_OR_GOTO("call-stub", name, out);
- if (name)
- stub->args.fgetxattr.name = gf_strdup (name);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_FREMOVEXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.fremovexattr = fn;
+ args_fremovexattr_store(&stub->args, fd, name, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fgetxattr_cbk_stub (call_frame_t *frame,
- fop_fgetxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+fop_fremovexattr_cbk_stub(call_frame_t *frame, fop_fremovexattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_GETXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.fgetxattr_cbk.fn = fn;
- stub->args.fgetxattr_cbk.op_ret = op_ret;
- stub->args.fgetxattr_cbk.op_errno = op_errno;
+ call_stub_t *stub = NULL;
- /* TODO */
- if (dict)
- stub->args.fgetxattr_cbk.dict = dict_ref (dict);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_FREMOVEXATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.fremovexattr = fn;
+ args_fremovexattr_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_removexattr_stub (call_frame_t *frame,
- fop_removexattr_t fn,
- loc_t *loc,
- const char *name, dict_t *xdata)
+fop_lk_stub(call_frame_t *frame, fop_lk_t fn, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", loc, out);
- GF_VALIDATE_OR_GOTO ("call-stub", name, out);
+ GF_VALIDATE_OR_GOTO("call-stub", lock, out);
- stub = stub_new (frame, 1, GF_FOP_REMOVEXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.removexattr.fn = fn;
- loc_copy (&stub->args.removexattr.loc, loc);
- stub->args.removexattr.name = gf_strdup (name);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_LK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.lk = fn;
+ args_lk_store(&stub->args, fd, cmd, lock, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_removexattr_cbk_stub (call_frame_t *frame,
- fop_removexattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_lk_cbk_stub(call_frame_t *frame, fop_lk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_REMOVEXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.removexattr_cbk.fn = fn;
- stub->args.removexattr_cbk.op_ret = op_ret;
- stub->args.removexattr_cbk.op_errno = op_errno;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_LK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.lk = fn;
+ args_lk_cbk_store(&stub->args_cbk, op_ret, op_errno, lock, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fremovexattr_stub (call_frame_t *frame,
- fop_fremovexattr_t fn,
- fd_t *fd,
- const char *name, dict_t *xdata)
+fop_inodelk_stub(call_frame_t *frame, fop_inodelk_t fn, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", fd, out);
- GF_VALIDATE_OR_GOTO ("call-stub", name, out);
+ GF_VALIDATE_OR_GOTO("call-stub", lock, out);
- stub = stub_new (frame, 1, GF_FOP_FREMOVEXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.fremovexattr.fn = fn;
- stub->args.fremovexattr.fd = fd_ref (fd);
- stub->args.fremovexattr.name = gf_strdup (name);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_INODELK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.inodelk = fn;
+ args_inodelk_store(&stub->args, volume, loc, cmd, lock, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fremovexattr_cbk_stub (call_frame_t *frame,
- fop_fremovexattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_inodelk_cbk_stub(call_frame_t *frame, fop_inodelk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_FREMOVEXATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.fremovexattr_cbk.fn = fn;
- stub->args.fremovexattr_cbk.op_ret = op_ret;
- stub->args.fremovexattr_cbk.op_errno = op_errno;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_INODELK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.inodelk = fn;
+ args_inodelk_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_lk_stub (call_frame_t *frame,
- fop_lk_t fn,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
+fop_finodelk_stub(call_frame_t *frame, fop_finodelk_t fn, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", lock, out);
+ GF_VALIDATE_OR_GOTO("call-stub", lock, out);
- stub = stub_new (frame, 1, GF_FOP_LK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 1, GF_FOP_FINODELK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.lk.fn = fn;
- if (fd)
- stub->args.lk.fd = fd_ref (fd);
- stub->args.lk.cmd = cmd;
- stub->args.lk.lock = *lock;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn.finodelk = fn;
+ args_finodelk_store(&stub->args, volume, fd, cmd, lock, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_lk_cbk_stub (call_frame_t *frame,
- fop_lk_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *lock, dict_t *xdata)
+fop_finodelk_cbk_stub(call_frame_t *frame, fop_inodelk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_LK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.lk_cbk.fn = fn;
- stub->args.lk_cbk.op_ret = op_ret;
- stub->args.lk_cbk.op_errno = op_errno;
- if (op_ret == 0)
- stub->args.lk_cbk.lock = *lock;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_FINODELK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.finodelk = fn;
+ args_finodelk_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_inodelk_stub (call_frame_t *frame, fop_inodelk_t fn,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
+fop_entrylk_stub(call_frame_t *frame, fop_entrylk_t fn, const char *volume,
+ loc_t *loc, const char *name, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", lock, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_INODELK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 1, GF_FOP_ENTRYLK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.inodelk.fn = fn;
-
- if (volume)
- stub->args.inodelk.volume = gf_strdup (volume);
-
- loc_copy (&stub->args.inodelk.loc, loc);
- stub->args.inodelk.cmd = cmd;
- stub->args.inodelk.lock = *lock;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn.entrylk = fn;
+ args_entrylk_store(&stub->args, volume, loc, name, cmd, type, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_inodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+fop_entrylk_cbk_stub(call_frame_t *frame, fop_entrylk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new(frame, 0, GF_FOP_ENTRYLK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub = stub_new (frame, 0, GF_FOP_INODELK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn_cbk.entrylk = fn;
+ args_entrylk_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
+out:
+ return stub;
+}
- stub->args.inodelk_cbk.fn = fn;
- stub->args.inodelk_cbk.op_ret = op_ret;
- stub->args.inodelk_cbk.op_errno = op_errno;
+call_stub_t *
+fop_fentrylk_stub(call_frame_t *frame, fop_fentrylk_t fn, const char *volume,
+ fd_t *fd, const char *name, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_FENTRYLK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.fentrylk = fn;
+ args_fentrylk_store(&stub->args, volume, fd, name, cmd, type, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_finodelk_stub (call_frame_t *frame, fop_finodelk_t fn,
- const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
+fop_fentrylk_cbk_stub(call_frame_t *frame, fop_fentrylk_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", lock, out);
+ stub = stub_new(frame, 0, GF_FOP_FENTRYLK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub = stub_new (frame, 1, GF_FOP_FINODELK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn_cbk.fentrylk = fn;
+ args_fentrylk_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
+out:
+ return stub;
+}
- stub->args.finodelk.fn = fn;
+call_stub_t *
+fop_readdirp_cbk_stub(call_frame_t *frame, fop_readdirp_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (fd)
- stub->args.finodelk.fd = fd_ref (fd);
+ stub = stub_new(frame, 0, GF_FOP_READDIRP);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- if (volume)
- stub->args.finodelk.volume = gf_strdup (volume);
+ stub->fn_cbk.readdirp = fn;
+ args_readdirp_cbk_store(&stub->args_cbk, op_ret, op_errno, entries, xdata);
+out:
+ return stub;
+}
- stub->args.finodelk.cmd = cmd;
- stub->args.finodelk.lock = *lock;
+call_stub_t *
+fop_readdir_cbk_stub(call_frame_t *frame, fop_readdir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_READDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.readdir = fn;
+ args_readdir_cbk_store(&stub->args_cbk, op_ret, op_errno, entries, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_finodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+fop_readdir_stub(call_frame_t *frame, fop_readdir_t fn, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new(frame, 1, GF_FOP_READDIR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub = stub_new (frame, 0, GF_FOP_FINODELK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn.readdir = fn;
+ args_readdir_store(&stub->args, fd, size, off, xdata);
+out:
+ return stub;
+}
- stub->args.finodelk_cbk.fn = fn;
- stub->args.finodelk_cbk.op_ret = op_ret;
- stub->args.finodelk_cbk.op_errno = op_errno;
+call_stub_t *
+fop_readdirp_stub(call_frame_t *frame, fop_readdirp_t fn, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_READDIRP);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.readdirp = fn;
+ args_readdirp_store(&stub->args, fd, size, off, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_entrylk_stub (call_frame_t *frame, fop_entrylk_t fn,
- const char *volume, loc_t *loc, const char *name,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+fop_rchecksum_stub(call_frame_t *frame, fop_rchecksum_t fn, fd_t *fd,
+ off_t offset, int32_t len, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 1, GF_FOP_ENTRYLK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- stub->args.entrylk.fn = fn;
+ GF_VALIDATE_OR_GOTO("call-stub", fd, out);
- if (volume)
- stub->args.entrylk.volume = gf_strdup (volume);
+ stub = stub_new(frame, 1, GF_FOP_RCHECKSUM);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- loc_copy (&stub->args.entrylk.loc, loc);
+ stub->fn.rchecksum = fn;
+ args_rchecksum_store(&stub->args, fd, offset, len, xdata);
+out:
+ return stub;
+}
- stub->args.entrylk.cmd = cmd;
- stub->args.entrylk.type = type;
- if (name)
- stub->args.entrylk.name = gf_strdup (name);
+call_stub_t *
+fop_rchecksum_cbk_stub(call_frame_t *frame, fop_rchecksum_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_RCHECKSUM);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.rchecksum = fn;
+ args_rchecksum_cbk_store(&stub->args_cbk, op_ret, op_errno, weak_checksum,
+ strong_checksum, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_entrylk_cbk_stub (call_frame_t *frame, fop_entrylk_cbk_t fn,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+fop_xattrop_cbk_stub(call_frame_t *frame, fop_xattrop_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new(frame, 0, GF_FOP_XATTROP);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub = stub_new (frame, 0, GF_FOP_ENTRYLK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn_cbk.xattrop = fn;
+ args_xattrop_cbk_store(&stub->args_cbk, op_ret, op_errno, xattr, xdata);
+out:
+ return stub;
+}
- stub->args.entrylk_cbk.fn = fn;
- stub->args.entrylk_cbk.op_ret = op_ret;
- stub->args.entrylk_cbk.op_errno = op_errno;
+call_stub_t *
+fop_fxattrop_cbk_stub(call_frame_t *frame, fop_fxattrop_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_FXATTROP);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.fxattrop = fn;
+ args_xattrop_cbk_store(&stub->args_cbk, op_ret, op_errno, xattr, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fentrylk_stub (call_frame_t *frame, fop_fentrylk_t fn,
- const char *volume, fd_t *fd, const char *name,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+fop_xattrop_stub(call_frame_t *frame, fop_xattrop_t fn, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO("call-stub", xattr, out);
- stub = stub_new (frame, 1, GF_FOP_FENTRYLK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 1, GF_FOP_XATTROP);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.fentrylk.fn = fn;
+ stub->fn.xattrop = fn;
+ args_xattrop_store(&stub->args, loc, optype, xattr, xdata);
+out:
+ return stub;
+}
- if (volume)
- stub->args.fentrylk.volume = gf_strdup (volume);
+call_stub_t *
+fop_fxattrop_stub(call_frame_t *frame, fop_fxattrop_t fn, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (fd)
- stub->args.fentrylk.fd = fd_ref (fd);
- stub->args.fentrylk.cmd = cmd;
- stub->args.fentrylk.type = type;
- if (name)
- stub->args.fentrylk.name = gf_strdup (name);
+ GF_VALIDATE_OR_GOTO("call-stub", xattr, out);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_FXATTROP);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.fxattrop = fn;
+ args_fxattrop_store(&stub->args, fd, optype, xattr, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fentrylk_cbk_stub (call_frame_t *frame, fop_fentrylk_cbk_t fn,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+fop_setattr_cbk_stub(call_frame_t *frame, fop_setattr_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new(frame, 0, GF_FOP_SETATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub = stub_new (frame, 0, GF_FOP_FENTRYLK);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn_cbk.setattr = fn;
+ args_setattr_cbk_store(&stub->args_cbk, op_ret, op_errno, statpre, statpost,
+ xdata);
+out:
+ return stub;
+}
- stub->args.fentrylk_cbk.fn = fn;
- stub->args.fentrylk_cbk.op_ret = op_ret;
- stub->args.fentrylk_cbk.op_errno = op_errno;
+call_stub_t *
+fop_fsetattr_cbk_stub(call_frame_t *frame, fop_setattr_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_FSETATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.fsetattr = fn;
+ args_fsetattr_cbk_store(&stub->args_cbk, op_ret, op_errno, statpre,
+ statpost, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_readdirp_cbk_stub (call_frame_t *frame,
- fop_readdirp_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
+fop_setattr_stub(call_frame_t *frame, fop_setattr_t fn, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- gf_dirent_t *stub_entry = NULL, *entry = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_READDIRP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.readdirp_cbk.fn = fn;
- stub->args.readdirp_cbk.op_ret = op_ret;
- stub->args.readdirp_cbk.op_errno = op_errno;
- INIT_LIST_HEAD (&stub->args.readdirp_cbk.entries.list);
-
- /* This check must come after the init of head above
- * so we're sure the list is empty for list_empty.
- */
- GF_VALIDATE_OR_GOTO ("call-stub", entries, out);
-
- if (op_ret > 0) {
- list_for_each_entry (entry, &entries->list, list) {
- stub_entry = gf_dirent_for_name (entry->d_name);
- if (!stub_entry)
- goto out;
- stub_entry->d_off = entry->d_off;
- stub_entry->d_ino = entry->d_ino;
- stub_entry->d_stat = entry->d_stat;
- list_add_tail (&stub_entry->list,
- &stub->args.readdirp_cbk.entries.list);
- }
- }
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
+
+ stub = stub_new(frame, 1, GF_FOP_SETATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.setattr = fn;
+ args_setattr_store(&stub->args, loc, stbuf, valid, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_readdir_cbk_stub (call_frame_t *frame,
- fop_readdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
+fop_fsetattr_stub(call_frame_t *frame, fop_fsetattr_t fn, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- gf_dirent_t *stub_entry = NULL, *entry = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
-
- stub = stub_new (frame, 0, GF_FOP_READDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
-
- stub->args.readdir_cbk.fn = fn;
- stub->args.readdir_cbk.op_ret = op_ret;
- stub->args.readdir_cbk.op_errno = op_errno;
- INIT_LIST_HEAD (&stub->args.readdir_cbk.entries.list);
-
- /* This check must come after the init of head above
- * so we're sure the list is empty for list_empty.
- */
- GF_VALIDATE_OR_GOTO ("call-stub", entries, out);
-
- if (op_ret > 0) {
- list_for_each_entry (entry, &entries->list, list) {
- stub_entry = gf_dirent_for_name (entry->d_name);
- if (!stub_entry)
- goto out;
- stub_entry->d_off = entry->d_off;
- stub_entry->d_ino = entry->d_ino;
-
- list_add_tail (&stub_entry->list,
- &stub->args.readdir_cbk.entries.list);
- }
- }
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
+ stub = stub_new(frame, 1, GF_FOP_FSETATTR);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+
+ stub->fn.fsetattr = fn;
+ args_fsetattr_store(&stub->args, fd, stbuf, valid, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_readdir_stub (call_frame_t *frame,
- fop_readdir_t fn,
- fd_t *fd,
- size_t size,
- off_t off, dict_t *xdata)
+fop_fallocate_cbk_stub(call_frame_t *frame, fop_fallocate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- stub = stub_new (frame, 1, GF_FOP_READDIR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- stub->args.readdir.fn = fn;
- stub->args.readdir.fd = fd_ref (fd);
- stub->args.readdir.size = size;
- stub->args.readdir.off = off;
+ stub = stub_new(frame, 0, GF_FOP_FALLOCATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn_cbk.fallocate = fn;
+ args_fallocate_cbk_store(&stub->args_cbk, op_ret, op_errno, statpre,
+ statpost, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_readdirp_stub (call_frame_t *frame,
- fop_readdirp_t fn,
- fd_t *fd,
- size_t size,
- off_t off,
- dict_t *xdata)
+fop_fallocate_stub(call_frame_t *frame, fop_fallocate_t fn, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_READDIRP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- stub->args.readdirp.fn = fn;
- stub->args.readdirp.fd = fd_ref (fd);
- stub->args.readdirp.size = size;
- stub->args.readdirp.off = off;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_FALLOCATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.fallocate = fn;
+ args_fallocate_store(&stub->args, fd, mode, offset, len, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_rchecksum_stub (call_frame_t *frame,
- fop_rchecksum_t fn,
- fd_t *fd, off_t offset,
- int32_t len, dict_t *xdata)
+fop_discard_cbk_stub(call_frame_t *frame, fop_discard_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", fd, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_RCHECKSUM);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 0, GF_FOP_DISCARD);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.rchecksum.fn = fn;
- stub->args.rchecksum.fd = fd_ref (fd);
- stub->args.rchecksum.offset = offset;
- stub->args.rchecksum.len = len;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn_cbk.discard = fn;
+ args_discard_cbk_store(&stub->args_cbk, op_ret, op_errno, statpre, statpost,
+ xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_rchecksum_cbk_stub (call_frame_t *frame,
- fop_rchecksum_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- uint32_t weak_checksum,
- uint8_t *strong_checksum, dict_t *xdata)
+fop_discard_stub(call_frame_t *frame, fop_discard_t fn, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ stub = stub_new(frame, 1, GF_FOP_DISCARD);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub = stub_new (frame, 0, GF_FOP_RCHECKSUM);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub->fn.discard = fn;
+ args_discard_store(&stub->args, fd, offset, len, xdata);
+out:
+ return stub;
+}
- stub->args.rchecksum_cbk.fn = fn;
- stub->args.rchecksum_cbk.op_ret = op_ret;
- stub->args.rchecksum_cbk.op_errno = op_errno;
+call_stub_t *
+fop_zerofill_cbk_stub(call_frame_t *frame, fop_zerofill_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (op_ret >= 0)
- {
- stub->args.rchecksum_cbk.weak_checksum =
- weak_checksum;
+ stub = stub_new(frame, 0, GF_FOP_ZEROFILL);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.rchecksum_cbk.strong_checksum =
- memdup (strong_checksum, MD5_DIGEST_LENGTH);
- }
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn_cbk.zerofill = fn;
+ args_zerofill_cbk_store(&stub->args_cbk, op_ret, op_errno, statpre,
+ statpost, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_xattrop_cbk_stub (call_frame_t *frame,
- fop_xattrop_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+fop_zerofill_stub(call_frame_t *frame, fop_zerofill_t fn, fd_t *fd,
+ off_t offset, off_t len, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_XATTROP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- stub->args.xattrop_cbk.fn = fn;
- stub->args.xattrop_cbk.op_ret = op_ret;
- stub->args.xattrop_cbk.op_errno = op_errno;
-
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 1, GF_FOP_ZEROFILL);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn.zerofill = fn;
+ args_zerofill_store(&stub->args, fd, offset, len, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_fxattrop_cbk_stub (call_frame_t *frame,
- fop_fxattrop_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *xattr, dict_t *xdata)
+fop_ipc_cbk_stub(call_frame_t *frame, fop_ipc_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_FXATTROP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 0, GF_FOP_IPC);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.fxattrop_cbk.fn = fn;
- stub->args.fxattrop_cbk.op_ret = op_ret;
- stub->args.fxattrop_cbk.op_errno = op_errno;
- if (xattr)
- stub->args.fxattrop_cbk.xattr = dict_ref (xattr);
-
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn_cbk.ipc = fn;
+ args_ipc_cbk_store(&stub->args_cbk, op_ret, op_errno, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_xattrop_stub (call_frame_t *frame,
- fop_xattrop_t fn,
- loc_t *loc,
- gf_xattrop_flags_t optype,
- dict_t *xattr, dict_t *xdata)
+fop_ipc_stub(call_frame_t *frame, fop_ipc_t fn, int32_t op, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", xattr, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- stub = stub_new (frame, 1, GF_FOP_XATTROP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 1, GF_FOP_IPC);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.xattrop.fn = fn;
-
- loc_copy (&stub->args.xattrop.loc, loc);
+ stub->fn.ipc = fn;
+ args_ipc_store(&stub->args, op, xdata);
+out:
+ return stub;
+}
- stub->args.xattrop.optype = optype;
- stub->args.xattrop.xattr = dict_ref (xattr);
+call_stub_t *
+fop_lease_cbk_stub(call_frame_t *frame, fop_lease_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct gf_lease *lease, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub = stub_new(frame, 0, GF_FOP_LEASE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+ stub->fn_cbk.lease = fn;
+ args_lease_cbk_store(&stub->args_cbk, op_ret, op_errno, lease, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fxattrop_stub (call_frame_t *frame,
- fop_fxattrop_t fn,
- fd_t *fd,
- gf_xattrop_flags_t optype,
- dict_t *xattr, dict_t *xdata)
+fop_lease_stub(call_frame_t *frame, fop_lease_t fn, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", xattr, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
+ GF_VALIDATE_OR_GOTO("call-stub", lease, out);
- stub = stub_new (frame, 1, GF_FOP_FXATTROP);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 1, GF_FOP_LEASE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.fxattrop.fn = fn;
+ stub->fn.lease = fn;
+ args_lease_store(&stub->args, loc, lease, xdata);
+out:
+ return stub;
+}
- stub->args.fxattrop.fd = fd_ref (fd);
+call_stub_t *
+fop_seek_cbk_stub(call_frame_t *frame, fop_seek_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, off_t offset, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- stub->args.fxattrop.optype = optype;
- stub->args.fxattrop.xattr = dict_ref (xattr);
+ stub = stub_new(frame, 0, GF_FOP_SEEK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn_cbk.seek = fn;
+ args_seek_cbk_store(&stub->args_cbk, op_ret, op_errno, offset, xdata);
out:
- return stub;
+ return stub;
}
-
call_stub_t *
-fop_setattr_cbk_stub (call_frame_t *frame,
- fop_setattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+fop_seek_stub(call_frame_t *frame, fop_seek_t fn, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- stub = stub_new (frame, 0, GF_FOP_SETATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ stub = stub_new(frame, 1, GF_FOP_SEEK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.setattr_cbk.fn = fn;
+ stub->fn.seek = fn;
+ args_seek_store(&stub->args, fd, offset, what, xdata);
+out:
+ return stub;
+}
- stub->args.setattr_cbk.op_ret = op_ret;
- stub->args.setattr_cbk.op_errno = op_errno;
+call_stub_t *
+fop_getactivelk_cbk_stub(call_frame_t *frame, fop_getactivelk_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ lock_migration_info_t *lmi, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- if (statpre)
- stub->args.setattr_cbk.statpre = *statpre;
- if (statpost)
- stub->args.setattr_cbk.statpost = *statpost;
+ stub = stub_new(frame, 0, GF_FOP_GETACTIVELK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ stub->fn_cbk.getactivelk = fn;
+ args_getactivelk_cbk_store(&stub->args_cbk, op_ret, op_errno, lmi, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fsetattr_cbk_stub (call_frame_t *frame,
- fop_setattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+fop_getactivelk_stub(call_frame_t *frame, fop_getactivelk_t fn, loc_t *loc,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 0, GF_FOP_FSETATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- stub->args.fsetattr_cbk.fn = fn;
+ stub = stub_new(frame, 1, GF_FOP_GETACTIVELK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.fsetattr_cbk.op_ret = op_ret;
- stub->args.fsetattr_cbk.op_errno = op_errno;
+ stub->fn.getactivelk = fn;
- if (statpre)
- stub->args.setattr_cbk.statpre = *statpre;
- if (statpost)
- stub->args.fsetattr_cbk.statpost = *statpost;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ loc_copy(&stub->args.loc, loc);
+ if (xdata)
+ stub->args.xdata = dict_ref(xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_setattr_stub (call_frame_t *frame,
- fop_setattr_t fn,
- loc_t *loc,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
+fop_setactivelk_cbk_stub(call_frame_t *frame, fop_setactivelk_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
+
+ stub = stub_new(frame, 0, GF_FOP_SETACTIVELK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
+
+ stub->fn_cbk.setactivelk = fn;
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", fn, out);
+ if (xdata)
+ stub->args.xdata = dict_ref(xdata);
- stub = stub_new (frame, 1, GF_FOP_SETATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+out:
+ return stub;
+}
- stub->args.setattr.fn = fn;
+call_stub_t *
+fop_setactivelk_stub(call_frame_t *frame, fop_setactivelk_t fn, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- loc_copy (&stub->args.setattr.loc, loc);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- if (stbuf)
- stub->args.setattr.stbuf = *stbuf;
+ stub = stub_new(frame, 1, GF_FOP_SETACTIVELK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- stub->args.setattr.valid = valid;
+ stub->fn.setactivelk = fn;
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ args_setactivelk_store(&stub->args, loc, locklist, xdata);
out:
- return stub;
+ return stub;
}
call_stub_t *
-fop_fsetattr_stub (call_frame_t *frame,
- fop_fsetattr_t fn,
- fd_t *fd,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
+fop_copy_file_range_stub(call_frame_t *frame, fop_copy_file_range_t fn,
+ fd_t *fd_in, off64_t off_in, fd_t *fd_out,
+ off64_t off_out, size_t len, uint32_t flags,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- GF_VALIDATE_OR_GOTO ("call-stub", frame, out);
- GF_VALIDATE_OR_GOTO ("call-stub", fn, out);
+ call_stub_t *stub = NULL;
- stub = stub_new (frame, 1, GF_FOP_FSETATTR);
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- stub->args.fsetattr.fn = fn;
+ stub = stub_new(frame, 1, GF_FOP_COPY_FILE_RANGE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- if (fd)
- stub->args.fsetattr.fd = fd_ref (fd);
+ stub->fn.copy_file_range = fn;
- if (stbuf)
- stub->args.fsetattr.stbuf = *stbuf;
-
- stub->args.fsetattr.valid = valid;
-
- if (xdata)
- stub->xdata = dict_ref (xdata);
+ args_copy_file_range_store(&stub->args, fd_in, off_in, fd_out, off_out, len,
+ flags, xdata);
out:
- return stub;
+ return stub;
}
-static void
-call_resume_wind (call_stub_t *stub)
+call_stub_t *
+fop_copy_file_range_cbk_stub(call_frame_t *frame, fop_copy_file_range_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata)
{
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ call_stub_t *stub = NULL;
- switch (stub->fop) {
- case GF_FOP_OPEN:
- {
- stub->args.open.fn (stub->frame,
- stub->frame->this,
- &stub->args.open.loc,
- stub->args.open.flags, stub->args.open.fd,
- stub->xdata);
- break;
- }
- case GF_FOP_CREATE:
- {
- stub->args.create.fn (stub->frame,
- stub->frame->this,
- &stub->args.create.loc,
- stub->args.create.flags,
- stub->args.create.mode,
- stub->args.create.umask,
- stub->args.create.fd,
- stub->xdata);
- break;
- }
- case GF_FOP_STAT:
- {
- stub->args.stat.fn (stub->frame,
- stub->frame->this,
- &stub->args.stat.loc, stub->xdata);
- break;
- }
- case GF_FOP_READLINK:
- {
- stub->args.readlink.fn (stub->frame,
- stub->frame->this,
- &stub->args.readlink.loc,
- stub->args.readlink.size, stub->xdata);
- break;
- }
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- case GF_FOP_MKNOD:
- {
- stub->args.mknod.fn (stub->frame, stub->frame->this,
- &stub->args.mknod.loc,
- stub->args.mknod.mode,
- stub->args.mknod.rdev,
- stub->args.mknod.umask, stub->xdata);
- }
- break;
+ stub = stub_new(frame, 0, GF_FOP_COPY_FILE_RANGE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_MKDIR:
- {
- stub->args.mkdir.fn (stub->frame, stub->frame->this,
- &stub->args.mkdir.loc,
- stub->args.mkdir.mode,
- stub->args.mkdir.umask, stub->xdata);
- }
- break;
+ stub->fn_cbk.copy_file_range = fn;
+ args_copy_file_range_cbk_store(&stub->args_cbk, op_ret, op_errno, stbuf,
+ prebuf_dst, postbuf_dst, xdata);
- case GF_FOP_UNLINK:
- {
- stub->args.unlink.fn (stub->frame,
- stub->frame->this,
- &stub->args.unlink.loc,
- stub->args.unlink.xflag, stub->xdata);
- }
- break;
+out:
+ return stub;
+}
- case GF_FOP_RMDIR:
- {
- stub->args.rmdir.fn (stub->frame, stub->frame->this,
- &stub->args.rmdir.loc,
- stub->args.rmdir.flags, stub->xdata);
- }
- break;
+call_stub_t *
+fop_put_stub(call_frame_t *frame, fop_put_t fn, loc_t *loc, mode_t mode,
+ mode_t umask, uint32_t flags, struct iovec *vector, int32_t count,
+ off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- case GF_FOP_SYMLINK:
- {
- stub->args.symlink.fn (stub->frame,
- stub->frame->this,
- stub->args.symlink.linkname,
- &stub->args.symlink.loc,
- stub->args.symlink.umask, stub->xdata);
- }
- break;
+ GF_VALIDATE_OR_GOTO("call-stub", vector, out);
- case GF_FOP_RENAME:
- {
- stub->args.rename.fn (stub->frame,
- stub->frame->this,
- &stub->args.rename.old,
- &stub->args.rename.new, stub->xdata);
- }
- break;
+ stub = stub_new(frame, 1, GF_FOP_PUT);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_LINK:
- {
- stub->args.link.fn (stub->frame,
- stub->frame->this,
- &stub->args.link.oldloc,
- &stub->args.link.newloc, stub->xdata);
- }
- break;
+ stub->fn.put = fn;
+ args_put_store(&stub->args, loc, mode, umask, flags, vector, count, offset,
+ iobref, xattr, xdata);
+out:
+ return stub;
+}
- case GF_FOP_TRUNCATE:
- {
- stub->args.truncate.fn (stub->frame,
- stub->frame->this,
- &stub->args.truncate.loc,
- stub->args.truncate.off, stub->xdata);
- break;
- }
+call_stub_t *
+fop_put_cbk_stub(call_frame_t *frame, fop_put_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- case GF_FOP_READ:
- {
- stub->args.readv.fn (stub->frame,
- stub->frame->this,
- stub->args.readv.fd,
- stub->args.readv.size,
- stub->args.readv.off,
- stub->args.readv.flags, stub->xdata);
- break;
- }
+ stub = stub_new(frame, 0, GF_FOP_PUT);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_WRITE:
- {
- stub->args.writev.fn (stub->frame,
- stub->frame->this,
- stub->args.writev.fd,
- stub->args.writev.vector,
- stub->args.writev.count,
- stub->args.writev.off,
- stub->args.writev.flags,
- stub->args.writev.iobref, stub->xdata);
- break;
- }
+ stub->fn_cbk.put = fn;
+ args_put_cbk_store(&stub->args_cbk, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+out:
+ return stub;
+}
- case GF_FOP_STATFS:
- {
- stub->args.statfs.fn (stub->frame,
- stub->frame->this,
- &stub->args.statfs.loc, stub->xdata);
- break;
- }
- case GF_FOP_FLUSH:
- {
- stub->args.flush.fn (stub->frame,
- stub->frame->this,
- stub->args.flush.fd, stub->xdata);
- break;
- }
+call_stub_t *
+fop_icreate_stub(call_frame_t *frame, fop_icreate_t fn, loc_t *loc, mode_t mode,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- case GF_FOP_FSYNC:
- {
- stub->args.fsync.fn (stub->frame,
- stub->frame->this,
- stub->args.fsync.fd,
- stub->args.fsync.datasync, stub->xdata);
- break;
- }
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- case GF_FOP_SETXATTR:
- {
- stub->args.setxattr.fn (stub->frame,
- stub->frame->this,
- &stub->args.setxattr.loc,
- stub->args.setxattr.dict,
- stub->args.setxattr.flags, stub->xdata);
- break;
- }
+ stub = stub_new(frame, 1, GF_FOP_ICREATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_GETXATTR:
- {
- stub->args.getxattr.fn (stub->frame,
- stub->frame->this,
- &stub->args.getxattr.loc,
- stub->args.getxattr.name, stub->xdata);
- break;
- }
+ stub->fn.icreate = fn;
- case GF_FOP_FSETXATTR:
- {
- stub->args.fsetxattr.fn (stub->frame,
- stub->frame->this,
- stub->args.fsetxattr.fd,
- stub->args.fsetxattr.dict,
- stub->args.fsetxattr.flags, stub->xdata);
- break;
- }
+ stub->args.mode = mode;
+ if (loc)
+ loc_copy(&stub->args.loc, loc);
+ if (xdata)
+ stub->args.xdata = dict_ref(xdata);
- case GF_FOP_FGETXATTR:
- {
- stub->args.fgetxattr.fn (stub->frame,
- stub->frame->this,
- stub->args.fgetxattr.fd,
- stub->args.fgetxattr.name, stub->xdata);
- break;
- }
+out:
+ return stub;
+}
- case GF_FOP_REMOVEXATTR:
- {
- stub->args.removexattr.fn (stub->frame,
- stub->frame->this,
- &stub->args.removexattr.loc,
- stub->args.removexattr.name, stub->xdata);
- break;
- }
+static void
+args_icreate_store_cbk(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (inode)
+ args->inode = inode_ref(inode);
+ if (buf)
+ args->stat = *buf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+}
- case GF_FOP_FREMOVEXATTR:
- {
- stub->args.fremovexattr.fn (stub->frame,
- stub->frame->this,
- stub->args.fremovexattr.fd,
- stub->args.fremovexattr.name, stub->xdata);
- break;
- }
+call_stub_t *
+fop_icreate_cbk_stub(call_frame_t *frame, fop_icreate_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- case GF_FOP_OPENDIR:
- {
- stub->args.opendir.fn (stub->frame,
- stub->frame->this,
- &stub->args.opendir.loc,
- stub->args.opendir.fd, stub->xdata);
- break;
- }
+ stub = stub_new(frame, 0, GF_FOP_ICREATE);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_FSYNCDIR:
- {
- stub->args.fsyncdir.fn (stub->frame,
- stub->frame->this,
- stub->args.fsyncdir.fd,
- stub->args.fsyncdir.datasync, stub->xdata);
- break;
- }
+ stub->fn_cbk.icreate = fn;
+ args_icreate_store_cbk(&stub->args_cbk, op_ret, op_errno, inode, buf,
+ xdata);
- case GF_FOP_ACCESS:
- {
- stub->args.access.fn (stub->frame,
- stub->frame->this,
- &stub->args.access.loc,
- stub->args.access.mask, stub->xdata);
- break;
- }
+out:
+ return stub;
+}
- case GF_FOP_FTRUNCATE:
- {
- stub->args.ftruncate.fn (stub->frame,
- stub->frame->this,
- stub->args.ftruncate.fd,
- stub->args.ftruncate.off, stub->xdata);
- break;
- }
+call_stub_t *
+fop_namelink_stub(call_frame_t *frame, fop_namelink_t fn, loc_t *loc,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- case GF_FOP_FSTAT:
- {
- stub->args.fstat.fn (stub->frame,
- stub->frame->this,
- stub->args.fstat.fd, stub->xdata);
- break;
- }
+ GF_VALIDATE_OR_GOTO("call-stub", fn, out);
- case GF_FOP_LK:
- {
- stub->args.lk.fn (stub->frame,
- stub->frame->this,
- stub->args.lk.fd,
- stub->args.lk.cmd,
- &stub->args.lk.lock, stub->xdata);
- break;
- }
+ stub = stub_new(frame, 1, GF_FOP_NAMELINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_INODELK:
- {
- stub->args.inodelk.fn (stub->frame,
- stub->frame->this,
- stub->args.inodelk.volume,
- &stub->args.inodelk.loc,
- stub->args.inodelk.cmd,
- &stub->args.inodelk.lock, stub->xdata);
- break;
- }
+ stub->fn.namelink = fn;
- case GF_FOP_FINODELK:
- {
- stub->args.finodelk.fn (stub->frame,
- stub->frame->this,
- stub->args.finodelk.volume,
- stub->args.finodelk.fd,
- stub->args.finodelk.cmd,
- &stub->args.finodelk.lock, stub->xdata);
- break;
- }
+ if (loc)
+ loc_copy(&stub->args.loc, loc);
+ if (xdata)
+ stub->args.xdata = dict_ref(xdata);
- case GF_FOP_ENTRYLK:
- {
- stub->args.entrylk.fn (stub->frame,
- stub->frame->this,
- stub->args.entrylk.volume,
- &stub->args.entrylk.loc,
- stub->args.entrylk.name,
- stub->args.entrylk.cmd,
- stub->args.entrylk.type, stub->xdata);
- break;
- }
+out:
+ return stub;
+}
- case GF_FOP_FENTRYLK:
- {
- stub->args.fentrylk.fn (stub->frame,
- stub->frame->this,
- stub->args.fentrylk.volume,
- stub->args.fentrylk.fd,
- stub->args.fentrylk.name,
- stub->args.fentrylk.cmd,
- stub->args.fentrylk.type, stub->xdata);
- break;
- }
-
- break;
+static void
+args_namelink_store_cbk(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
- case GF_FOP_LOOKUP:
- {
- stub->args.lookup.fn (stub->frame,
- stub->frame->this,
- &stub->args.lookup.loc,
- stub->xdata);
- break;
- }
+ if (prebuf)
+ args->prestat = *prebuf;
+ if (postbuf)
+ args->poststat = *postbuf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+}
- case GF_FOP_RCHECKSUM:
- {
- stub->args.rchecksum.fn (stub->frame,
- stub->frame->this,
- stub->args.rchecksum.fd,
- stub->args.rchecksum.offset,
- stub->args.rchecksum.len, stub->xdata);
- break;
- }
+call_stub_t *
+fop_namelink_cbk_stub(call_frame_t *frame, fop_namelink_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
- case GF_FOP_READDIR:
- {
- stub->args.readdir.fn (stub->frame,
- stub->frame->this,
- stub->args.readdir.fd,
- stub->args.readdir.size,
- stub->args.readdir.off, stub->xdata);
- break;
- }
+ stub = stub_new(frame, 0, GF_FOP_NAMELINK);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_READDIRP:
- {
- stub->args.readdirp.fn (stub->frame,
- stub->frame->this,
- stub->args.readdirp.fd,
- stub->args.readdirp.size,
- stub->args.readdirp.off,
- stub->xdata);
- break;
- }
+ stub->fn_cbk.namelink = fn;
+ args_namelink_store_cbk(&stub->args_cbk, op_ret, op_errno, prebuf, postbuf,
+ xdata);
- case GF_FOP_XATTROP:
- {
- stub->args.xattrop.fn (stub->frame,
- stub->frame->this,
- &stub->args.xattrop.loc,
- stub->args.xattrop.optype,
- stub->args.xattrop.xattr, stub->xdata);
-
- break;
- }
- case GF_FOP_FXATTROP:
- {
- stub->args.fxattrop.fn (stub->frame,
- stub->frame->this,
- stub->args.fxattrop.fd,
- stub->args.fxattrop.optype,
- stub->args.fxattrop.xattr, stub->xdata);
-
- break;
- }
- case GF_FOP_SETATTR:
- {
- stub->args.setattr.fn (stub->frame,
- stub->frame->this,
- &stub->args.setattr.loc,
- &stub->args.setattr.stbuf,
- stub->args.setattr.valid, stub->xdata);
- break;
- }
- case GF_FOP_FSETATTR:
- {
- stub->args.fsetattr.fn (stub->frame,
- stub->frame->this,
- stub->args.fsetattr.fd,
- &stub->args.fsetattr.stbuf,
- stub->args.fsetattr.valid, stub->xdata);
- break;
- }
- default:
- {
- gf_log_callingfn ("call-stub", GF_LOG_ERROR,
- "Invalid value of FOP (%d)",
- stub->fop);
- break;
- }
-
- }
out:
- return;
+ return stub;
}
-
-
-static void
-call_resume_unwind (call_stub_t *stub)
+void
+call_resume_wind(call_stub_t *stub)
{
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- switch (stub->fop) {
+ switch (stub->fop) {
case GF_FOP_OPEN:
- {
- if (!stub->args.open_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.open_cbk.op_ret,
- stub->args.open_cbk.op_errno,
- stub->args.open_cbk.fd, stub->xdata);
- else
- stub->args.open_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.open_cbk.op_ret,
- stub->args.open_cbk.op_errno,
- stub->args.open_cbk.fd, stub->xdata);
- break;
- }
-
+ stub->fn.open(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.flags, stub->args.fd, stub->args.xdata);
+ break;
case GF_FOP_CREATE:
- {
- if (!stub->args.create_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.create_cbk.op_ret,
- stub->args.create_cbk.op_errno,
- stub->args.create_cbk.fd,
- stub->args.create_cbk.inode,
- &stub->args.create_cbk.buf,
- &stub->args.create_cbk.preparent,
- &stub->args.create_cbk.postparent, stub->xdata);
- else
- stub->args.create_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.create_cbk.op_ret,
- stub->args.create_cbk.op_errno,
- stub->args.create_cbk.fd,
- stub->args.create_cbk.inode,
- &stub->args.create_cbk.buf,
- &stub->args.create_cbk.preparent,
- &stub->args.create_cbk.postparent, stub->xdata);
-
- break;
- }
-
+ stub->fn.create(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.flags, stub->args.mode, stub->args.umask,
+ stub->args.fd, stub->args.xdata);
+ break;
case GF_FOP_STAT:
- {
- if (!stub->args.stat_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.stat_cbk.op_ret,
- stub->args.stat_cbk.op_errno,
- &stub->args.stat_cbk.buf, stub->xdata);
- else
- stub->args.stat_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.stat_cbk.op_ret,
- stub->args.stat_cbk.op_errno,
- &stub->args.stat_cbk.buf, stub->xdata);
-
- break;
- }
-
+ stub->fn.stat(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.xdata);
+ break;
case GF_FOP_READLINK:
- {
- if (!stub->args.readlink_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.readlink_cbk.op_ret,
- stub->args.readlink_cbk.op_errno,
- stub->args.readlink_cbk.buf,
- &stub->args.readlink_cbk.sbuf, stub->xdata);
- else
- stub->args.readlink_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.readlink_cbk.op_ret,
- stub->args.readlink_cbk.op_errno,
- stub->args.readlink_cbk.buf,
- &stub->args.readlink_cbk.sbuf, stub->xdata);
-
- break;
- }
-
+ stub->fn.readlink(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.size, stub->args.xdata);
+ break;
case GF_FOP_MKNOD:
- {
- if (!stub->args.mknod_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.mknod_cbk.op_ret,
- stub->args.mknod_cbk.op_errno,
- stub->args.mknod_cbk.inode,
- &stub->args.mknod_cbk.buf,
- &stub->args.mknod_cbk.preparent,
- &stub->args.mknod_cbk.postparent, stub->xdata);
- else
- stub->args.mknod_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.mknod_cbk.op_ret,
- stub->args.mknod_cbk.op_errno,
- stub->args.mknod_cbk.inode,
- &stub->args.mknod_cbk.buf,
- &stub->args.mknod_cbk.preparent,
- &stub->args.mknod_cbk.postparent, stub->xdata);
- break;
- }
-
+ stub->fn.mknod(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.mode, stub->args.rdev, stub->args.umask,
+ stub->args.xdata);
+ break;
case GF_FOP_MKDIR:
- {
- if (!stub->args.mkdir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.mkdir_cbk.op_ret,
- stub->args.mkdir_cbk.op_errno,
- stub->args.mkdir_cbk.inode,
- &stub->args.mkdir_cbk.buf,
- &stub->args.mkdir_cbk.preparent,
- &stub->args.mkdir_cbk.postparent, stub->xdata);
- else
- stub->args.mkdir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.mkdir_cbk.op_ret,
- stub->args.mkdir_cbk.op_errno,
- stub->args.mkdir_cbk.inode,
- &stub->args.mkdir_cbk.buf,
- &stub->args.mkdir_cbk.preparent,
- &stub->args.mkdir_cbk.postparent, stub->xdata);
-
- if (stub->args.mkdir_cbk.inode)
- inode_unref (stub->args.mkdir_cbk.inode);
-
- break;
- }
-
+ stub->fn.mkdir(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.mode, stub->args.umask, stub->args.xdata);
+ break;
case GF_FOP_UNLINK:
- {
- if (!stub->args.unlink_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.unlink_cbk.op_ret,
- stub->args.unlink_cbk.op_errno,
- &stub->args.unlink_cbk.preparent,
- &stub->args.unlink_cbk.postparent, stub->xdata);
- else
- stub->args.unlink_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.unlink_cbk.op_ret,
- stub->args.unlink_cbk.op_errno,
- &stub->args.unlink_cbk.preparent,
- &stub->args.unlink_cbk.postparent, stub->xdata);
- break;
- }
-
+ stub->fn.unlink(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.xflag, stub->args.xdata);
+ break;
case GF_FOP_RMDIR:
- {
- if (!stub->args.rmdir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.rmdir_cbk.op_ret,
- stub->args.rmdir_cbk.op_errno,
- &stub->args.rmdir_cbk.preparent,
- &stub->args.rmdir_cbk.postparent, stub->xdata);
- else
- stub->args.rmdir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.rmdir_cbk.op_ret,
- stub->args.rmdir_cbk.op_errno,
- &stub->args.rmdir_cbk.preparent,
- &stub->args.rmdir_cbk.postparent, stub->xdata);
- break;
- }
-
+ stub->fn.rmdir(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.flags, stub->args.xdata);
+ break;
case GF_FOP_SYMLINK:
- {
- if (!stub->args.symlink_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.symlink_cbk.op_ret,
- stub->args.symlink_cbk.op_errno,
- stub->args.symlink_cbk.inode,
- &stub->args.symlink_cbk.buf,
- &stub->args.symlink_cbk.preparent,
- &stub->args.symlink_cbk.postparent, stub->xdata);
- else
- stub->args.symlink_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.symlink_cbk.op_ret,
- stub->args.symlink_cbk.op_errno,
- stub->args.symlink_cbk.inode,
- &stub->args.symlink_cbk.buf,
- &stub->args.symlink_cbk.preparent,
- &stub->args.symlink_cbk.postparent, stub->xdata);
- }
- break;
-
+ stub->fn.symlink(stub->frame, stub->frame->this,
+ stub->args.linkname, &stub->args.loc,
+ stub->args.umask, stub->args.xdata);
+ break;
case GF_FOP_RENAME:
- {
- if (!stub->args.rename_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.rename_cbk.op_ret,
- stub->args.rename_cbk.op_errno,
- &stub->args.rename_cbk.buf,
- &stub->args.rename_cbk.preoldparent,
- &stub->args.rename_cbk.postoldparent,
- &stub->args.rename_cbk.prenewparent,
- &stub->args.rename_cbk.postnewparent, stub->xdata);
- else
- stub->args.rename_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.rename_cbk.op_ret,
- stub->args.rename_cbk.op_errno,
- &stub->args.rename_cbk.buf,
- &stub->args.rename_cbk.preoldparent,
- &stub->args.rename_cbk.postoldparent,
- &stub->args.rename_cbk.prenewparent,
- &stub->args.rename_cbk.postnewparent, stub->xdata);
- break;
- }
-
+ stub->fn.rename(stub->frame, stub->frame->this, &stub->args.loc,
+ &stub->args.loc2, stub->args.xdata);
+ break;
case GF_FOP_LINK:
- {
- if (!stub->args.link_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.link_cbk.op_ret,
- stub->args.link_cbk.op_errno,
- stub->args.link_cbk.inode,
- &stub->args.link_cbk.buf, stub->xdata);
- else
- stub->args.link_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.link_cbk.op_ret,
- stub->args.link_cbk.op_errno,
- stub->args.link_cbk.inode,
- &stub->args.link_cbk.buf,
- &stub->args.link_cbk.preparent,
- &stub->args.link_cbk.postparent, stub->xdata);
- break;
- }
-
+ stub->fn.link(stub->frame, stub->frame->this, &stub->args.loc,
+ &stub->args.loc2, stub->args.xdata);
+ break;
case GF_FOP_TRUNCATE:
- {
- if (!stub->args.truncate_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.truncate_cbk.op_ret,
- stub->args.truncate_cbk.op_errno,
- &stub->args.truncate_cbk.prebuf,
- &stub->args.truncate_cbk.postbuf, stub->xdata);
- else
- stub->args.truncate_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.truncate_cbk.op_ret,
- stub->args.truncate_cbk.op_errno,
- &stub->args.truncate_cbk.prebuf,
- &stub->args.truncate_cbk.postbuf, stub->xdata);
- break;
- }
-
+ stub->fn.truncate(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.offset, stub->args.xdata);
+ break;
case GF_FOP_READ:
- {
- if (!stub->args.readv_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.readv_cbk.op_ret,
- stub->args.readv_cbk.op_errno,
- stub->args.readv_cbk.vector,
- stub->args.readv_cbk.count,
- &stub->args.readv_cbk.stbuf,
- stub->args.readv_cbk.iobref, stub->xdata);
- else
- stub->args.readv_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.readv_cbk.op_ret,
- stub->args.readv_cbk.op_errno,
- stub->args.readv_cbk.vector,
- stub->args.readv_cbk.count,
- &stub->args.readv_cbk.stbuf,
- stub->args.readv_cbk.iobref, stub->xdata);
- }
- break;
-
+ stub->fn.readv(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.size, stub->args.offset, stub->args.flags,
+ stub->args.xdata);
+ break;
case GF_FOP_WRITE:
- {
- if (!stub->args.writev_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.writev_cbk.op_ret,
- stub->args.writev_cbk.op_errno,
- &stub->args.writev_cbk.prebuf,
- &stub->args.writev_cbk.postbuf, stub->xdata);
- else
- stub->args.writev_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.writev_cbk.op_ret,
- stub->args.writev_cbk.op_errno,
- &stub->args.writev_cbk.prebuf,
- &stub->args.writev_cbk.postbuf, stub->xdata);
- break;
- }
-
+ stub->fn.writev(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.vector, stub->args.count,
+ stub->args.offset, stub->args.flags,
+ stub->args.iobref, stub->args.xdata);
+ break;
case GF_FOP_STATFS:
- {
- if (!stub->args.statfs_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.statfs_cbk.op_ret,
- stub->args.statfs_cbk.op_errno,
- &(stub->args.statfs_cbk.buf), stub->xdata);
- else
- stub->args.statfs_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.statfs_cbk.op_ret,
- stub->args.statfs_cbk.op_errno,
- &(stub->args.statfs_cbk.buf), stub->xdata);
- }
- break;
-
+ stub->fn.statfs(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.xdata);
+ break;
case GF_FOP_FLUSH:
- {
- if (!stub->args.flush_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.flush_cbk.op_ret,
- stub->args.flush_cbk.op_errno, stub->xdata);
- else
- stub->args.flush_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.flush_cbk.op_ret,
- stub->args.flush_cbk.op_errno, stub->xdata);
-
- break;
- }
-
+ stub->fn.flush(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.xdata);
+ break;
case GF_FOP_FSYNC:
- {
- if (!stub->args.fsync_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fsync_cbk.op_ret,
- stub->args.fsync_cbk.op_errno,
- &stub->args.fsync_cbk.prebuf,
- &stub->args.fsync_cbk.postbuf, stub->xdata);
- else
- stub->args.fsync_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fsync_cbk.op_ret,
- stub->args.fsync_cbk.op_errno,
- &stub->args.fsync_cbk.prebuf,
- &stub->args.fsync_cbk.postbuf, stub->xdata);
- break;
- }
-
+ stub->fn.fsync(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.datasync, stub->args.xdata);
+ break;
case GF_FOP_SETXATTR:
- {
- if (!stub->args.setxattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.setxattr_cbk.op_ret,
- stub->args.setxattr_cbk.op_errno, stub->xdata);
-
- else
- stub->args.setxattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.setxattr_cbk.op_ret,
- stub->args.setxattr_cbk.op_errno, stub->xdata);
-
- break;
- }
-
+ stub->fn.setxattr(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.xattr, stub->args.flags,
+ stub->args.xdata);
+ break;
case GF_FOP_GETXATTR:
- {
- if (!stub->args.getxattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.getxattr_cbk.op_ret,
- stub->args.getxattr_cbk.op_errno,
- stub->args.getxattr_cbk.dict, stub->xdata);
- else
- stub->args.getxattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.getxattr_cbk.op_ret,
- stub->args.getxattr_cbk.op_errno,
- stub->args.getxattr_cbk.dict, stub->xdata);
- break;
- }
-
+ stub->fn.getxattr(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.name, stub->args.xdata);
+ break;
case GF_FOP_FSETXATTR:
- {
- if (!stub->args.fsetxattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fsetxattr_cbk.op_ret,
- stub->args.fsetxattr_cbk.op_errno, stub->xdata);
-
- else
- stub->args.fsetxattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fsetxattr_cbk.op_ret,
- stub->args.fsetxattr_cbk.op_errno, stub->xdata);
-
- break;
- }
-
+ stub->fn.fsetxattr(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.xattr, stub->args.flags,
+ stub->args.xdata);
+ break;
case GF_FOP_FGETXATTR:
- {
- if (!stub->args.fgetxattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fgetxattr_cbk.op_ret,
- stub->args.fgetxattr_cbk.op_errno,
- stub->args.fgetxattr_cbk.dict, stub->xdata);
- else
- stub->args.fgetxattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fgetxattr_cbk.op_ret,
- stub->args.fgetxattr_cbk.op_errno,
- stub->args.fgetxattr_cbk.dict, stub->xdata);
- break;
- }
-
+ stub->fn.fgetxattr(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.name, stub->args.xdata);
+ break;
case GF_FOP_REMOVEXATTR:
- {
- if (!stub->args.removexattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.removexattr_cbk.op_ret,
- stub->args.removexattr_cbk.op_errno, stub->xdata);
- else
- stub->args.removexattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.removexattr_cbk.op_ret,
- stub->args.removexattr_cbk.op_errno, stub->xdata);
-
- break;
- }
-
+ stub->fn.removexattr(stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.name,
+ stub->args.xdata);
+ break;
case GF_FOP_FREMOVEXATTR:
- {
- if (!stub->args.fremovexattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fremovexattr_cbk.op_ret,
- stub->args.fremovexattr_cbk.op_errno, stub->xdata);
- else
- stub->args.fremovexattr_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fremovexattr_cbk.op_ret,
- stub->args.fremovexattr_cbk.op_errno, stub->xdata);
-
- break;
- }
-
+ stub->fn.fremovexattr(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.name, stub->args.xdata);
+ break;
case GF_FOP_OPENDIR:
- {
- if (!stub->args.opendir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.opendir_cbk.op_ret,
- stub->args.opendir_cbk.op_errno,
- stub->args.opendir_cbk.fd, stub->xdata);
- else
- stub->args.opendir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.opendir_cbk.op_ret,
- stub->args.opendir_cbk.op_errno,
- stub->args.opendir_cbk.fd, stub->xdata);
- break;
- }
-
+ stub->fn.opendir(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.fd, stub->args.xdata);
+ break;
case GF_FOP_FSYNCDIR:
- {
- if (!stub->args.fsyncdir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fsyncdir_cbk.op_ret,
- stub->args.fsyncdir_cbk.op_errno, stub->xdata);
- else
- stub->args.fsyncdir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fsyncdir_cbk.op_ret,
- stub->args.fsyncdir_cbk.op_errno, stub->xdata);
- break;
- }
-
+ stub->fn.fsyncdir(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.datasync, stub->args.xdata);
+ break;
case GF_FOP_ACCESS:
- {
- if (!stub->args.access_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.access_cbk.op_ret,
- stub->args.access_cbk.op_errno, stub->xdata);
- else
- stub->args.access_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.access_cbk.op_ret,
- stub->args.access_cbk.op_errno, stub->xdata);
-
- break;
- }
+ stub->fn.access(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.mask, stub->args.xdata);
+ break;
case GF_FOP_FTRUNCATE:
- {
- if (!stub->args.ftruncate_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.ftruncate_cbk.op_ret,
- stub->args.ftruncate_cbk.op_errno,
- &stub->args.ftruncate_cbk.prebuf,
- &stub->args.ftruncate_cbk.postbuf, stub->xdata);
- else
- stub->args.ftruncate_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.ftruncate_cbk.op_ret,
- stub->args.ftruncate_cbk.op_errno,
- &stub->args.ftruncate_cbk.prebuf,
- &stub->args.ftruncate_cbk.postbuf, stub->xdata);
- break;
- }
-
+ stub->fn.ftruncate(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.offset, stub->args.xdata);
+ break;
case GF_FOP_FSTAT:
- {
- if (!stub->args.fstat_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fstat_cbk.op_ret,
- stub->args.fstat_cbk.op_errno,
- &stub->args.fstat_cbk.buf, stub->xdata);
- else
- stub->args.fstat_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fstat_cbk.op_ret,
- stub->args.fstat_cbk.op_errno,
- &stub->args.fstat_cbk.buf, stub->xdata);
-
- break;
- }
-
+ stub->fn.fstat(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.xdata);
+ break;
case GF_FOP_LK:
- {
- if (!stub->args.lk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.lk_cbk.op_ret,
- stub->args.lk_cbk.op_errno,
- &stub->args.lk_cbk.lock, stub->xdata);
- else
- stub->args.lk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.lk_cbk.op_ret,
- stub->args.lk_cbk.op_errno,
- &stub->args.lk_cbk.lock, stub->xdata);
- break;
- }
-
+ stub->fn.lk(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.cmd, &stub->args.lock, stub->args.xdata);
+ break;
case GF_FOP_INODELK:
- {
- if (!stub->args.inodelk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.inodelk_cbk.op_ret,
- stub->args.inodelk_cbk.op_errno, stub->xdata);
-
- else
- stub->args.inodelk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.inodelk_cbk.op_ret,
- stub->args.inodelk_cbk.op_errno, stub->xdata);
- break;
- }
-
+ stub->fn.inodelk(stub->frame, stub->frame->this, stub->args.volume,
+ &stub->args.loc, stub->args.cmd, &stub->args.lock,
+ stub->args.xdata);
+ break;
case GF_FOP_FINODELK:
- {
- if (!stub->args.finodelk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.finodelk_cbk.op_ret,
- stub->args.finodelk_cbk.op_errno, stub->xdata);
-
- else
- stub->args.finodelk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.finodelk_cbk.op_ret,
- stub->args.finodelk_cbk.op_errno, stub->xdata);
- break;
- }
-
+ stub->fn.finodelk(stub->frame, stub->frame->this, stub->args.volume,
+ stub->args.fd, stub->args.cmd, &stub->args.lock,
+ stub->args.xdata);
+ break;
case GF_FOP_ENTRYLK:
- {
- if (!stub->args.entrylk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.entrylk_cbk.op_ret,
- stub->args.entrylk_cbk.op_errno, stub->xdata);
-
- else
- stub->args.entrylk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.entrylk_cbk.op_ret,
- stub->args.entrylk_cbk.op_errno, stub->xdata);
- break;
- }
-
+ stub->fn.entrylk(stub->frame, stub->frame->this, stub->args.volume,
+ &stub->args.loc, stub->args.name,
+ stub->args.entrylkcmd, stub->args.entrylktype,
+ stub->args.xdata);
+ break;
case GF_FOP_FENTRYLK:
- {
- if (!stub->args.fentrylk_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fentrylk_cbk.op_ret,
- stub->args.fentrylk_cbk.op_errno, stub->xdata);
-
- else
- stub->args.fentrylk_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fentrylk_cbk.op_ret,
- stub->args.fentrylk_cbk.op_errno, stub->xdata);
- break;
- }
-
+ stub->fn.fentrylk(stub->frame, stub->frame->this, stub->args.volume,
+ stub->args.fd, stub->args.name,
+ stub->args.entrylkcmd, stub->args.entrylktype,
+ stub->args.xdata);
+ break;
case GF_FOP_LOOKUP:
- {
- if (!stub->args.lookup_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.lookup_cbk.op_ret,
- stub->args.lookup_cbk.op_errno,
- stub->args.lookup_cbk.inode,
- &stub->args.lookup_cbk.buf,
- stub->xdata,
- &stub->args.lookup_cbk.postparent);
- else
- stub->args.lookup_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.lookup_cbk.op_ret,
- stub->args.lookup_cbk.op_errno,
- stub->args.lookup_cbk.inode,
- &stub->args.lookup_cbk.buf,
- stub->xdata,
- &stub->args.lookup_cbk.postparent);
- /* FIXME NULL should not be passed */
-
- if (stub->args.lookup_cbk.inode)
- inode_unref (stub->args.lookup_cbk.inode);
-
- break;
- }
-
+ stub->fn.lookup(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.xdata);
+ break;
case GF_FOP_RCHECKSUM:
- {
- if (!stub->args.rchecksum_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.rchecksum_cbk.op_ret,
- stub->args.rchecksum_cbk.op_errno,
- stub->args.rchecksum_cbk.weak_checksum,
- stub->args.rchecksum_cbk.strong_checksum, stub->xdata);
- else
- stub->args.rchecksum_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.rchecksum_cbk.op_ret,
- stub->args.rchecksum_cbk.op_errno,
- stub->args.rchecksum_cbk.weak_checksum,
- stub->args.rchecksum_cbk.strong_checksum, stub->xdata);
-
- if (stub->args.rchecksum_cbk.op_ret >= 0)
- GF_FREE (stub->args.rchecksum_cbk.strong_checksum);
-
- break;
- }
-
+ stub->fn.rchecksum(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.offset, stub->args.size,
+ stub->args.xdata);
+ break;
case GF_FOP_READDIR:
- {
- if (!stub->args.readdir_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.readdir_cbk.op_ret,
- stub->args.readdir_cbk.op_errno,
- &stub->args.readdir_cbk.entries, stub->xdata);
- else
- stub->args.readdir_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.readdir_cbk.op_ret,
- stub->args.readdir_cbk.op_errno,
- &stub->args.readdir_cbk.entries, stub->xdata);
-
- if (stub->args.readdir_cbk.op_ret > 0)
- gf_dirent_free (&stub->args.readdir_cbk.entries);
-
- break;
- }
-
+ stub->fn.readdir(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.size, stub->args.offset,
+ stub->args.xdata);
+ break;
case GF_FOP_READDIRP:
- {
- if (!stub->args.readdirp_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.readdirp_cbk.op_ret,
- stub->args.readdirp_cbk.op_errno,
- &stub->args.readdirp_cbk.entries, stub->xdata);
- else
- stub->args.readdirp_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.readdirp_cbk.op_ret,
- stub->args.readdirp_cbk.op_errno,
- &stub->args.readdirp_cbk.entries, stub->xdata);
-
- if (stub->args.readdirp_cbk.op_ret > 0)
- gf_dirent_free (&stub->args.readdirp_cbk.entries);
-
- break;
- }
-
+ stub->fn.readdirp(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.size, stub->args.offset,
+ stub->args.xdata);
+ break;
case GF_FOP_XATTROP:
- {
- if (!stub->args.xattrop_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.xattrop_cbk.op_ret,
- stub->args.xattrop_cbk.op_errno, stub->xdata);
- else
- stub->args.xattrop_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.xattrop_cbk.op_ret,
- stub->args.xattrop_cbk.op_errno,
- stub->args.xattrop_cbk.xattr, stub->xdata);
-
- if (stub->args.xattrop_cbk.xattr)
- dict_unref (stub->args.xattrop_cbk.xattr);
-
- break;
- }
+ stub->fn.xattrop(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.optype, stub->args.xattr,
+ stub->args.xdata);
+ break;
case GF_FOP_FXATTROP:
- {
- if (!stub->args.fxattrop_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fxattrop_cbk.op_ret,
- stub->args.fxattrop_cbk.op_errno, stub->xdata);
- else
- stub->args.fxattrop_cbk.fn (stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fxattrop_cbk.op_ret,
- stub->args.fxattrop_cbk.op_errno,
- stub->args.fxattrop_cbk.xattr, stub->xdata);
-
- if (stub->args.fxattrop_cbk.xattr)
- dict_unref (stub->args.fxattrop_cbk.xattr);
-
- break;
- }
+ stub->fn.fxattrop(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.optype, stub->args.xattr,
+ stub->args.xdata);
+ break;
case GF_FOP_SETATTR:
- {
- if (!stub->args.setattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.setattr_cbk.op_ret,
- stub->args.setattr_cbk.op_errno,
- &stub->args.setattr_cbk.statpre,
- &stub->args.setattr_cbk.statpost, stub->xdata);
- else
- stub->args.setattr_cbk.fn (
- stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.setattr_cbk.op_ret,
- stub->args.setattr_cbk.op_errno,
- &stub->args.setattr_cbk.statpre,
- &stub->args.setattr_cbk.statpost, stub->xdata);
- break;
- }
+ stub->fn.setattr(stub->frame, stub->frame->this, &stub->args.loc,
+ &stub->args.stat, stub->args.valid,
+ stub->args.xdata);
+ break;
case GF_FOP_FSETATTR:
- {
- if (!stub->args.fsetattr_cbk.fn)
- STACK_UNWIND (stub->frame,
- stub->args.fsetattr_cbk.op_ret,
- stub->args.fsetattr_cbk.op_errno,
- &stub->args.fsetattr_cbk.statpre,
- &stub->args.fsetattr_cbk.statpost, stub->xdata);
- else
- stub->args.fsetattr_cbk.fn (
- stub->frame,
- stub->frame->cookie,
- stub->frame->this,
- stub->args.fsetattr_cbk.op_ret,
- stub->args.fsetattr_cbk.op_errno,
- &stub->args.fsetattr_cbk.statpre,
- &stub->args.fsetattr_cbk.statpost, stub->xdata);
- break;
- }
- default:
- {
- gf_log_callingfn ("call-stub", GF_LOG_ERROR,
- "Invalid value of FOP (%d)",
- stub->fop);
- break;
- }
- }
-out:
- return;
-}
+ stub->fn.fsetattr(stub->frame, stub->frame->this, stub->args.fd,
+ &stub->args.stat, stub->args.valid,
+ stub->args.xdata);
+ break;
+ case GF_FOP_FALLOCATE:
+ stub->fn.fallocate(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.flags, stub->args.offset,
+ stub->args.size, stub->args.xdata);
+ break;
+ case GF_FOP_DISCARD:
+ stub->fn.discard(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.offset, stub->args.size,
+ stub->args.xdata);
+ break;
+ case GF_FOP_ZEROFILL:
+ stub->fn.zerofill(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.offset, stub->args.size,
+ stub->args.xdata);
+ break;
+ case GF_FOP_IPC:
+ stub->fn.ipc(stub->frame, stub->frame->this, stub->args.cmd,
+ stub->args.xdata);
+ break;
+ case GF_FOP_SEEK:
+ stub->fn.seek(stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.offset, stub->args.what, stub->args.xdata);
+ break;
+ case GF_FOP_LEASE:
+ stub->fn.lease(stub->frame, stub->frame->this, &stub->args.loc,
+ &stub->args.lease, stub->args.xdata);
+ break;
+
+ case GF_FOP_GETACTIVELK:
+ stub->fn.getactivelk(stub->frame, stub->frame->this,
+ &stub->args.loc, stub->args.xdata);
+ break;
+
+ case GF_FOP_SETACTIVELK:
+ stub->fn.setactivelk(stub->frame, stub->frame->this,
+ &stub->args.loc, &stub->args.locklist,
+ stub->args.xdata);
+ break;
+
+ case GF_FOP_PUT:
+ stub->fn.put(stub->frame, stub->frame->this, &stub->args.loc,
+ stub->args.mode, stub->args.umask, stub->args.flags,
+ stub->args.vector, stub->args.count, stub->args.offset,
+ stub->args.iobref, stub->args.xattr, stub->args.xdata);
+ break;
+
+ case GF_FOP_COPY_FILE_RANGE:
+ stub->fn.copy_file_range(
+ stub->frame, stub->frame->this, stub->args.fd,
+ stub->args.off_in, stub->args.fd_dst, stub->args.off_out,
+ stub->args.size, stub->args.flags, stub->args.xdata);
+ break;
+ default:
+ gf_msg_callingfn("call-stub", GF_LOG_ERROR, EINVAL,
+ LG_MSG_INVALID_ENTRY,
+ "Invalid value of FOP"
+ " (%d)",
+ stub->fop);
+ break;
+ }
+out:
+ return;
+}
+
+#define STUB_UNWIND(stb, fop, args...) \
+ do { \
+ if (stb->fn_cbk.fop) \
+ stb->fn_cbk.fop(stb->frame, stb->frame->cookie, stb->frame->this, \
+ stb->args_cbk.op_ret, stb->args_cbk.op_errno, \
+ args); \
+ else \
+ STACK_UNWIND_STRICT(fop, stb->frame, stb->args_cbk.op_ret, \
+ stb->args_cbk.op_errno, args); \
+ } while (0)
static void
-call_stub_destroy_wind (call_stub_t *stub)
+call_resume_unwind(call_stub_t *stub)
{
- if (stub->xdata)
- dict_unref (stub->xdata);
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- switch (stub->fop) {
+ switch (stub->fop) {
case GF_FOP_OPEN:
- {
- loc_wipe (&stub->args.open.loc);
- if (stub->args.open.fd)
- fd_unref (stub->args.open.fd);
- break;
- }
+ STUB_UNWIND(stub, open, stub->args_cbk.fd, stub->args_cbk.xdata);
+ break;
case GF_FOP_CREATE:
- {
- loc_wipe (&stub->args.create.loc);
- if (stub->args.create.fd)
- fd_unref (stub->args.create.fd);
- break;
- }
+ STUB_UNWIND(stub, create, stub->args_cbk.fd, stub->args_cbk.inode,
+ &stub->args_cbk.stat, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
case GF_FOP_STAT:
- {
- loc_wipe (&stub->args.stat.loc);
- break;
- }
+ STUB_UNWIND(stub, stat, &stub->args_cbk.stat, stub->args_cbk.xdata);
+ break;
case GF_FOP_READLINK:
- {
- loc_wipe (&stub->args.readlink.loc);
- break;
- }
-
+ STUB_UNWIND(stub, readlink, stub->args_cbk.buf,
+ &stub->args_cbk.stat, stub->args.xdata);
+ break;
case GF_FOP_MKNOD:
- {
- loc_wipe (&stub->args.mknod.loc);
- }
- break;
-
+ STUB_UNWIND(stub, mknod, stub->args_cbk.inode, &stub->args_cbk.stat,
+ &stub->args_cbk.preparent, &stub->args_cbk.postparent,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_MKDIR:
- {
- loc_wipe (&stub->args.mkdir.loc);
- }
- break;
-
+ STUB_UNWIND(stub, mkdir, stub->args_cbk.inode, &stub->args_cbk.stat,
+ &stub->args_cbk.preparent, &stub->args_cbk.postparent,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_UNLINK:
- {
- loc_wipe (&stub->args.unlink.loc);
- }
- break;
-
+ STUB_UNWIND(stub, unlink, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
case GF_FOP_RMDIR:
- {
- loc_wipe (&stub->args.rmdir.loc);
- }
- break;
-
+ STUB_UNWIND(stub, rmdir, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
case GF_FOP_SYMLINK:
- {
- GF_FREE ((char *)stub->args.symlink.linkname);
- loc_wipe (&stub->args.symlink.loc);
- }
- break;
-
+ STUB_UNWIND(stub, symlink, stub->args_cbk.inode,
+ &stub->args_cbk.stat, &stub->args_cbk.preparent,
+ &stub->args_cbk.postparent, stub->args_cbk.xdata);
+ break;
case GF_FOP_RENAME:
- {
- loc_wipe (&stub->args.rename.old);
- loc_wipe (&stub->args.rename.new);
- }
- break;
-
+ STUB_UNWIND(stub, rename, &stub->args_cbk.stat,
+ &stub->args_cbk.preparent, &stub->args_cbk.postparent,
+ &stub->args_cbk.preparent2, &stub->args_cbk.postparent2,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_LINK:
- {
- loc_wipe (&stub->args.link.oldloc);
- loc_wipe (&stub->args.link.newloc);
- }
- break;
-
+ STUB_UNWIND(stub, link, stub->args_cbk.inode, &stub->args_cbk.stat,
+ &stub->args_cbk.preparent, &stub->args_cbk.postparent,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_TRUNCATE:
- {
- loc_wipe (&stub->args.truncate.loc);
- break;
- }
-
+ STUB_UNWIND(stub, truncate, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
case GF_FOP_READ:
- {
- if (stub->args.readv.fd)
- fd_unref (stub->args.readv.fd);
- break;
- }
-
+ STUB_UNWIND(stub, readv, stub->args_cbk.vector,
+ stub->args_cbk.count, &stub->args_cbk.stat,
+ stub->args_cbk.iobref, stub->args_cbk.xdata);
+ break;
case GF_FOP_WRITE:
- {
- struct iobref *iobref = stub->args.writev.iobref;
- if (stub->args.writev.fd)
- fd_unref (stub->args.writev.fd);
- GF_FREE (stub->args.writev.vector);
- if (iobref)
- iobref_unref (iobref);
- break;
- }
-
+ STUB_UNWIND(stub, writev, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
case GF_FOP_STATFS:
- {
- loc_wipe (&stub->args.statfs.loc);
- break;
- }
+ STUB_UNWIND(stub, statfs, &stub->args_cbk.statvfs,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_FLUSH:
- {
- if (stub->args.flush.fd)
- fd_unref (stub->args.flush.fd);
- break;
- }
-
+ STUB_UNWIND(stub, flush, stub->args_cbk.xdata);
+ break;
case GF_FOP_FSYNC:
- {
- if (stub->args.fsync.fd)
- fd_unref (stub->args.fsync.fd);
- break;
- }
-
+ STUB_UNWIND(stub, fsync, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
case GF_FOP_SETXATTR:
- {
- loc_wipe (&stub->args.setxattr.loc);
- if (stub->args.setxattr.dict)
- dict_unref (stub->args.setxattr.dict);
- break;
- }
-
+ STUB_UNWIND(stub, setxattr, stub->args_cbk.xdata);
+ break;
case GF_FOP_GETXATTR:
- {
- if (stub->args.getxattr.name)
- GF_FREE ((char *)stub->args.getxattr.name);
- loc_wipe (&stub->args.getxattr.loc);
- break;
- }
-
+ STUB_UNWIND(stub, getxattr, stub->args_cbk.xattr,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_FSETXATTR:
- {
- fd_unref (stub->args.fsetxattr.fd);
- if (stub->args.fsetxattr.dict)
- dict_unref (stub->args.fsetxattr.dict);
- break;
- }
-
+ STUB_UNWIND(stub, fsetxattr, stub->args_cbk.xdata);
+ break;
case GF_FOP_FGETXATTR:
- {
- if (stub->args.fgetxattr.name)
- GF_FREE ((char *)stub->args.fgetxattr.name);
- fd_unref (stub->args.fgetxattr.fd);
- break;
- }
-
+ STUB_UNWIND(stub, fgetxattr, stub->args_cbk.xattr,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_REMOVEXATTR:
- {
- loc_wipe (&stub->args.removexattr.loc);
- GF_FREE ((char *)stub->args.removexattr.name);
- break;
- }
-
+ STUB_UNWIND(stub, removexattr, stub->args_cbk.xdata);
+ break;
case GF_FOP_FREMOVEXATTR:
- {
- fd_unref (stub->args.fremovexattr.fd);
- GF_FREE ((char *)stub->args.fremovexattr.name);
- break;
- }
-
+ STUB_UNWIND(stub, fremovexattr, stub->args_cbk.xdata);
+ break;
case GF_FOP_OPENDIR:
- {
- loc_wipe (&stub->args.opendir.loc);
- if (stub->args.opendir.fd)
- fd_unref (stub->args.opendir.fd);
- break;
- }
-
+ STUB_UNWIND(stub, opendir, stub->args_cbk.fd, stub->args_cbk.xdata);
+ break;
case GF_FOP_FSYNCDIR:
- {
- if (stub->args.fsyncdir.fd)
- fd_unref (stub->args.fsyncdir.fd);
- break;
- }
-
+ STUB_UNWIND(stub, fsyncdir, stub->args_cbk.xdata);
+ break;
case GF_FOP_ACCESS:
- {
- loc_wipe (&stub->args.access.loc);
- break;
- }
-
+ STUB_UNWIND(stub, access, stub->args_cbk.xdata);
+ break;
case GF_FOP_FTRUNCATE:
- {
- if (stub->args.ftruncate.fd)
- fd_unref (stub->args.ftruncate.fd);
- break;
- }
-
+ STUB_UNWIND(stub, ftruncate, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
case GF_FOP_FSTAT:
- {
- if (stub->args.fstat.fd)
- fd_unref (stub->args.fstat.fd);
- break;
- }
-
+ STUB_UNWIND(stub, fstat, &stub->args_cbk.stat,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_LK:
- {
- if (stub->args.lk.fd)
- fd_unref (stub->args.lk.fd);
- break;
- }
-
+ STUB_UNWIND(stub, lk, &stub->args_cbk.lock, stub->args_cbk.xdata);
+ break;
case GF_FOP_INODELK:
- {
- if (stub->args.inodelk.volume)
- GF_FREE ((char *)stub->args.inodelk.volume);
-
- loc_wipe (&stub->args.inodelk.loc);
- break;
- }
+ STUB_UNWIND(stub, inodelk, stub->args_cbk.xdata);
+ break;
case GF_FOP_FINODELK:
- {
- if (stub->args.finodelk.volume)
- GF_FREE ((char *)stub->args.finodelk.volume);
-
- if (stub->args.finodelk.fd)
- fd_unref (stub->args.finodelk.fd);
- break;
- }
+ STUB_UNWIND(stub, finodelk, stub->args_cbk.xdata);
+ break;
case GF_FOP_ENTRYLK:
- {
- if (stub->args.entrylk.volume)
- GF_FREE ((char *)stub->args.entrylk.volume);
-
- if (stub->args.entrylk.name)
- GF_FREE ((char *)stub->args.entrylk.name);
- loc_wipe (&stub->args.entrylk.loc);
- break;
- }
+ STUB_UNWIND(stub, entrylk, stub->args_cbk.xdata);
+ break;
case GF_FOP_FENTRYLK:
- {
- if (stub->args.fentrylk.volume)
- GF_FREE ((char *)stub->args.fentrylk.volume);
-
- if (stub->args.fentrylk.name)
- GF_FREE ((char *)stub->args.fentrylk.name);
-
- if (stub->args.fentrylk.fd)
- fd_unref (stub->args.fentrylk.fd);
- break;
- }
-
+ STUB_UNWIND(stub, fentrylk, stub->args_cbk.xdata);
+ break;
case GF_FOP_LOOKUP:
- {
- loc_wipe (&stub->args.lookup.loc);
- break;
- }
-
+ STUB_UNWIND(stub, lookup, stub->args_cbk.inode,
+ &stub->args_cbk.stat, stub->args_cbk.xdata,
+ &stub->args_cbk.postparent);
+ break;
case GF_FOP_RCHECKSUM:
- {
- if (stub->args.rchecksum.fd)
- fd_unref (stub->args.rchecksum.fd);
- break;
- }
-
+ STUB_UNWIND(stub, rchecksum, stub->args_cbk.weak_checksum,
+ stub->args_cbk.strong_checksum, stub->args_cbk.xdata);
+ break;
case GF_FOP_READDIR:
- {
- if (stub->args.readdir.fd)
- fd_unref (stub->args.readdir.fd);
- break;
- }
-
+ STUB_UNWIND(stub, readdir, &stub->args_cbk.entries,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_READDIRP:
- {
- if (stub->args.readdirp.fd)
- fd_unref (stub->args.readdirp.fd);
-
- break;
- }
-
+ STUB_UNWIND(stub, readdir, &stub->args_cbk.entries,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_XATTROP:
- {
- loc_wipe (&stub->args.xattrop.loc);
- dict_unref (stub->args.xattrop.xattr);
- break;
- }
+ STUB_UNWIND(stub, xattrop, stub->args_cbk.xattr,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_FXATTROP:
- {
- if (stub->args.fxattrop.fd)
- fd_unref (stub->args.fxattrop.fd);
- dict_unref (stub->args.fxattrop.xattr);
- break;
- }
+ STUB_UNWIND(stub, fxattrop, stub->args_cbk.xattr,
+ stub->args_cbk.xdata);
+ break;
case GF_FOP_SETATTR:
- {
- loc_wipe (&stub->args.setattr.loc);
- break;
- }
+ STUB_UNWIND(stub, setattr, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
case GF_FOP_FSETATTR:
- {
- if (stub->args.fsetattr.fd)
- fd_unref (stub->args.fsetattr.fd);
- break;
- }
+ STUB_UNWIND(stub, fsetattr, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_FALLOCATE:
+ STUB_UNWIND(stub, fallocate, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_DISCARD:
+ STUB_UNWIND(stub, discard, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_ZEROFILL:
+ STUB_UNWIND(stub, zerofill, &stub->args_cbk.prestat,
+ &stub->args_cbk.poststat, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_IPC:
+ STUB_UNWIND(stub, ipc, stub->args_cbk.xdata);
+ break;
+ case GF_FOP_SEEK:
+ STUB_UNWIND(stub, seek, stub->args_cbk.offset,
+ stub->args_cbk.xdata);
+ break;
+ case GF_FOP_LEASE:
+ STUB_UNWIND(stub, lease, &stub->args_cbk.lease,
+ stub->args_cbk.xdata);
+ break;
+
+ case GF_FOP_GETACTIVELK:
+ STUB_UNWIND(stub, getactivelk, &stub->args_cbk.locklist,
+ stub->args_cbk.xdata);
+ break;
+
+ case GF_FOP_SETACTIVELK:
+ STUB_UNWIND(stub, setactivelk, stub->args_cbk.xdata);
+ break;
+
+ case GF_FOP_PUT:
+ STUB_UNWIND(stub, put, stub->args_cbk.inode, &stub->args_cbk.stat,
+ &stub->args_cbk.preparent, &stub->args_cbk.postparent,
+ stub->args_cbk.xdata);
+ break;
+
+ case GF_FOP_COPY_FILE_RANGE:
+ STUB_UNWIND(stub, copy_file_range, &stub->args_cbk.stat,
+ &stub->args_cbk.prestat, &stub->args_cbk.poststat,
+ stub->args_cbk.xdata);
+ break;
+
default:
- {
- gf_log_callingfn ("call-stub", GF_LOG_ERROR,
- "Invalid value of FOP (%d)",
- stub->fop);
- break;
- }
- }
+ gf_msg_callingfn("call-stub", GF_LOG_ERROR, EINVAL,
+ LG_MSG_INVALID_ENTRY,
+ "Invalid value of FOP"
+ " (%d)",
+ stub->fop);
+ break;
+ }
+out:
+ return;
}
-
static void
-call_stub_destroy_unwind (call_stub_t *stub)
+call_stub_wipe_args(call_stub_t *stub)
{
- if (stub->xdata)
- dict_unref (stub->xdata);
-
- switch (stub->fop) {
- case GF_FOP_OPEN:
- {
- if (stub->args.open_cbk.fd)
- fd_unref (stub->args.open_cbk.fd);
- }
- break;
-
- case GF_FOP_CREATE:
- {
- if (stub->args.create_cbk.fd)
- fd_unref (stub->args.create_cbk.fd);
-
- if (stub->args.create_cbk.inode)
- inode_unref (stub->args.create_cbk.inode);
- }
- break;
-
- case GF_FOP_STAT:
- break;
-
- case GF_FOP_READLINK:
- {
- if (stub->args.readlink_cbk.buf)
- GF_FREE ((char *)stub->args.readlink_cbk.buf);
- }
- break;
-
- case GF_FOP_MKNOD:
- {
- if (stub->args.mknod_cbk.inode)
- inode_unref (stub->args.mknod_cbk.inode);
- }
- break;
-
- case GF_FOP_MKDIR:
- {
- if (stub->args.mkdir_cbk.inode)
- inode_unref (stub->args.mkdir_cbk.inode);
- }
- break;
-
- case GF_FOP_UNLINK:
- break;
-
- case GF_FOP_RMDIR:
- break;
-
- case GF_FOP_SYMLINK:
- {
- if (stub->args.symlink_cbk.inode)
- inode_unref (stub->args.symlink_cbk.inode);
- }
- break;
-
- case GF_FOP_RENAME:
- break;
-
- case GF_FOP_LINK:
- {
- if (stub->args.link_cbk.inode)
- inode_unref (stub->args.link_cbk.inode);
- }
- break;
-
- case GF_FOP_TRUNCATE:
- break;
-
- case GF_FOP_READ:
- {
- if (stub->args.readv_cbk.op_ret >= 0) {
- struct iobref *iobref = stub->args.readv_cbk.iobref;
- GF_FREE (stub->args.readv_cbk.vector);
-
- if (iobref) {
- iobref_unref (iobref);
- }
- }
- }
- break;
-
- case GF_FOP_WRITE:
- break;
-
- case GF_FOP_STATFS:
- break;
-
- case GF_FOP_FLUSH:
- break;
-
- case GF_FOP_FSYNC:
- break;
-
- case GF_FOP_SETXATTR:
- break;
-
- case GF_FOP_GETXATTR:
- {
- if (stub->args.getxattr_cbk.dict)
- dict_unref (stub->args.getxattr_cbk.dict);
- }
- break;
-
- case GF_FOP_FSETXATTR:
- break;
-
- case GF_FOP_FGETXATTR:
- {
- if (stub->args.fgetxattr_cbk.dict)
- dict_unref (stub->args.fgetxattr_cbk.dict);
- }
- break;
-
- case GF_FOP_REMOVEXATTR:
- break;
- case GF_FOP_FREMOVEXATTR:
- break;
-
- case GF_FOP_OPENDIR:
- {
- if (stub->args.opendir_cbk.fd)
- fd_unref (stub->args.opendir_cbk.fd);
- }
- break;
-
- case GF_FOP_FSYNCDIR:
- break;
-
- case GF_FOP_ACCESS:
- break;
-
- case GF_FOP_FTRUNCATE:
- break;
+ args_wipe(&stub->args);
+}
- case GF_FOP_FSTAT:
- break;
+static void
+call_stub_wipe_args_cbk(call_stub_t *stub)
+{
+ args_cbk_wipe(&stub->args_cbk);
+}
- case GF_FOP_LK:
- break;
+void
+call_stub_destroy(call_stub_t *stub)
+{
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_INODELK:
- break;
+ if (stub->wind)
+ call_stub_wipe_args(stub);
+ else
+ call_stub_wipe_args_cbk(stub);
- case GF_FOP_FINODELK:
- break;
+ stub->stub_mem_pool = NULL;
- case GF_FOP_ENTRYLK:
- break;
+ mem_put(stub);
+out:
+ return;
+}
- case GF_FOP_FENTRYLK:
- break;
+void
+call_resume(call_stub_t *stub)
+{
+ xlator_t *old_THIS = NULL;
- case GF_FOP_LOOKUP:
- {
- if (stub->args.lookup_cbk.inode)
- inode_unref (stub->args.lookup_cbk.inode);
- }
- break;
+ errno = EINVAL;
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- case GF_FOP_RCHECKSUM:
- {
- if (stub->args.rchecksum_cbk.op_ret >= 0) {
- GF_FREE (stub->args.rchecksum_cbk.strong_checksum);
- }
- }
- break;
+ list_del_init(&stub->list);
- case GF_FOP_READDIR:
- {
- if (stub->args.readdir_cbk.op_ret > 0) {
- gf_dirent_free (&stub->args.readdir_cbk.entries);
- }
- }
- break;
+ old_THIS = THIS;
+ THIS = stub->frame->this;
+ {
+ if (stub->wind)
+ call_resume_wind(stub);
+ else
+ call_resume_unwind(stub);
+ }
+ THIS = old_THIS;
- case GF_FOP_READDIRP:
- {
- if (stub->args.readdirp_cbk.op_ret > 0) {
- gf_dirent_free (&stub->args.readdirp_cbk.entries);
- }
- }
- break;
+ call_stub_destroy(stub);
+out:
+ return;
+}
- case GF_FOP_XATTROP:
- {
- if (stub->args.xattrop_cbk.xattr)
- dict_unref (stub->args.xattrop_cbk.xattr);
- }
- break;
+void
+call_unwind_error(call_stub_t *stub, int op_ret, int op_errno)
+{
+ xlator_t *old_THIS = NULL;
- case GF_FOP_FXATTROP:
- {
- if (stub->args.fxattrop_cbk.xattr)
- dict_unref (stub->args.fxattrop_cbk.xattr);
- }
- break;
+ list_del_init(&stub->list);
- case GF_FOP_SETATTR:
- {
- break;
- }
+ old_THIS = THIS;
+ THIS = stub->frame->this;
+ {
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ call_resume_unwind(stub);
+ }
+ THIS = old_THIS;
- case GF_FOP_FSETATTR:
- {
- break;
- }
+ call_stub_destroy(stub);
- default:
- {
- gf_log_callingfn ("call-stub", GF_LOG_ERROR,
- "Invalid value of FOP (%d)",
- stub->fop);
- break;
- }
- }
+ return;
}
-
void
-call_stub_destroy (call_stub_t *stub)
+call_unwind_error_keep_stub(call_stub_t *stub, int op_ret, int op_errno)
{
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ xlator_t *old_THIS = NULL;
- if (stub->wind) {
- call_stub_destroy_wind (stub);
- } else {
- call_stub_destroy_unwind (stub);
- }
+ list_del_init(&stub->list);
- stub->stub_mem_pool = NULL;
- mem_put (stub);
-out:
- return;
+ old_THIS = THIS;
+ THIS = stub->frame->this;
+ {
+ stub->args_cbk.op_ret = op_ret;
+ stub->args_cbk.op_errno = op_errno;
+ call_resume_unwind(stub);
+ }
+
+ THIS = old_THIS;
+
+ return;
}
void
-call_resume (call_stub_t *stub)
+call_resume_keep_stub(call_stub_t *stub)
{
- xlator_t *old_THIS = NULL;
+ xlator_t *old_THIS = NULL;
+
+ errno = EINVAL;
+ GF_VALIDATE_OR_GOTO("call-stub", stub, out);
- errno = EINVAL;
- GF_VALIDATE_OR_GOTO ("call-stub", stub, out);
+ list_del_init(&stub->list);
- list_del_init (&stub->list);
+ old_THIS = THIS;
+ THIS = stub->frame->this;
+ {
+ if (stub->wind)
+ call_resume_wind(stub);
+ else
+ call_resume_unwind(stub);
+ }
- old_THIS = THIS;
- THIS = stub->frame->this;
- {
- if (stub->wind)
- call_resume_wind (stub);
- else
- call_resume_unwind (stub);
- }
- THIS = old_THIS;
+ THIS = old_THIS;
- call_stub_destroy (stub);
out:
- return;
+ return;
}
diff --git a/libglusterfs/src/call-stub.h b/libglusterfs/src/call-stub.h
deleted file mode 100644
index 633fc4cbbe4..00000000000
--- a/libglusterfs/src/call-stub.h
+++ /dev/null
@@ -1,1134 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _CALL_STUB_H_
-#define _CALL_STUB_H_
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "stack.h"
-#include "list.h"
-
-typedef struct {
- struct list_head list;
- char wind;
- call_frame_t *frame;
- glusterfs_fop_t fop;
- struct mem_pool *stub_mem_pool; /* pointer to stub mempool in ctx_t */
- dict_t *xdata; /* common accross all the fops */
-
- union {
- /* lookup */
- struct {
- fop_lookup_t fn;
- loc_t loc;
- } lookup;
- struct {
- fop_lookup_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct iatt buf;
- struct iatt postparent;
- } lookup_cbk;
-
- /* stat */
- struct {
- fop_stat_t fn;
- loc_t loc;
- } stat;
- struct {
- fop_stat_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt buf;
- } stat_cbk;
-
- /* fstat */
- struct {
- fop_fstat_t fn;
- fd_t *fd;
- } fstat;
- struct {
- fop_fstat_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt buf;
- } fstat_cbk;
-
- /* truncate */
- struct {
- fop_truncate_t fn;
- loc_t loc;
- off_t off;
- } truncate;
- struct {
- fop_truncate_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt prebuf;
- struct iatt postbuf;
- } truncate_cbk;
-
- /* ftruncate */
- struct {
- fop_ftruncate_t fn;
- fd_t *fd;
- off_t off;
- } ftruncate;
- struct {
- fop_ftruncate_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt prebuf;
- struct iatt postbuf;
- } ftruncate_cbk;
-
- /* access */
- struct {
- fop_access_t fn;
- loc_t loc;
- int32_t mask;
- } access;
- struct {
- fop_access_cbk_t fn;
- int32_t op_ret, op_errno;
- } access_cbk;
-
- /* readlink */
- struct {
- fop_readlink_t fn;
- loc_t loc;
- size_t size;
- } readlink;
- struct {
- fop_readlink_cbk_t fn;
- int32_t op_ret, op_errno;
- const char *buf;
- struct iatt sbuf;
- } readlink_cbk;
-
- /* mknod */
- struct {
- fop_mknod_t fn;
- loc_t loc;
- mode_t mode;
- dev_t rdev;
- mode_t umask;
- } mknod;
- struct {
- fop_mknod_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- } mknod_cbk;
-
- /* mkdir */
- struct {
- fop_mkdir_t fn;
- loc_t loc;
- mode_t mode;
- mode_t umask;
- } mkdir;
- struct {
- fop_mkdir_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- } mkdir_cbk;
-
- /* unlink */
- struct {
- fop_unlink_t fn;
- loc_t loc;
- int xflag;
- } unlink;
- struct {
- fop_unlink_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt preparent;
- struct iatt postparent;
- } unlink_cbk;
-
- /* rmdir */
- struct {
- fop_rmdir_t fn;
- loc_t loc;
- int flags;
- } rmdir;
- struct {
- fop_rmdir_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt preparent;
- struct iatt postparent;
- } rmdir_cbk;
-
- /* symlink */
- struct {
- fop_symlink_t fn;
- const char *linkname;
- loc_t loc;
- mode_t umask;
- } symlink;
- struct {
- fop_symlink_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- } symlink_cbk;
-
- /* rename */
- struct {
- fop_rename_t fn;
- loc_t old;
- loc_t new;
- } rename;
- struct {
- fop_rename_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt buf;
- struct iatt preoldparent;
- struct iatt postoldparent;
- struct iatt prenewparent;
- struct iatt postnewparent;
- } rename_cbk;
-
- /* link */
- struct {
- fop_link_t fn;
- loc_t oldloc;
- loc_t newloc;
- } link;
- struct {
- fop_link_cbk_t fn;
- int32_t op_ret, op_errno;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- } link_cbk;
-
- /* create */
- struct {
- fop_create_t fn;
- loc_t loc;
- int32_t flags;
- mode_t mode;
- fd_t *fd;
- mode_t umask;
- } create;
- struct {
- fop_create_cbk_t fn;
- int32_t op_ret, op_errno;
- fd_t *fd;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- } create_cbk;
-
- /* open */
- struct {
- fop_open_t fn;
- loc_t loc;
- int32_t flags;
- fd_t *fd;
- } open;
- struct {
- fop_open_cbk_t fn;
- int32_t op_ret, op_errno;
- fd_t *fd;
- } open_cbk;
-
- /* readv */
- struct {
- fop_readv_t fn;
- fd_t *fd;
- size_t size;
- off_t off;
- uint32_t flags;
- } readv;
- struct {
- fop_readv_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- struct iovec *vector;
- int32_t count;
- struct iatt stbuf;
- struct iobref *iobref;
- } readv_cbk;
-
- /* writev */
- struct {
- fop_writev_t fn;
- fd_t *fd;
- struct iovec *vector;
- int32_t count;
- off_t off;
- uint32_t flags;
- struct iobref *iobref;
- } writev;
- struct {
- fop_writev_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt prebuf;
- struct iatt postbuf;
- } writev_cbk;
-
- /* flush */
- struct {
- fop_flush_t fn;
- fd_t *fd;
- } flush;
- struct {
- fop_flush_cbk_t fn;
- int32_t op_ret, op_errno;
- } flush_cbk;
-
- /* fsync */
- struct {
- fop_fsync_t fn;
- fd_t *fd;
- int32_t datasync;
- } fsync;
- struct {
- fop_fsync_cbk_t fn;
- int32_t op_ret, op_errno;
- struct iatt prebuf;
- struct iatt postbuf;
- } fsync_cbk;
-
- /* opendir */
- struct {
- fop_opendir_t fn;
- loc_t loc;
- fd_t *fd;
- } opendir;
- struct {
- fop_opendir_cbk_t fn;
- int32_t op_ret, op_errno;
- fd_t *fd;
- } opendir_cbk;
-
-
- /* fsyncdir */
- struct {
- fop_fsyncdir_t fn;
- fd_t *fd;
- int32_t datasync;
- } fsyncdir;
- struct {
- fop_fsyncdir_cbk_t fn;
- int32_t op_ret, op_errno;
- } fsyncdir_cbk;
-
- /* statfs */
- struct {
- fop_statfs_t fn;
- loc_t loc;
- } statfs;
- struct {
- fop_statfs_cbk_t fn;
- int32_t op_ret, op_errno;
- struct statvfs buf;
- } statfs_cbk;
-
- /* setxattr */
- struct {
- fop_setxattr_t fn;
- loc_t loc;
- dict_t *dict;
- int32_t flags;
- } setxattr;
- struct {
- fop_setxattr_cbk_t fn;
- int32_t op_ret, op_errno;
- } setxattr_cbk;
-
- /* getxattr */
- struct {
- fop_getxattr_t fn;
- loc_t loc;
- const char *name;
- } getxattr;
- struct {
- fop_getxattr_cbk_t fn;
- int32_t op_ret, op_errno;
- dict_t *dict;
- } getxattr_cbk;
-
- /* fsetxattr */
- struct {
- fop_fsetxattr_t fn;
- fd_t *fd;
- dict_t *dict;
- int32_t flags;
- } fsetxattr;
- struct {
- fop_fsetxattr_cbk_t fn;
- int32_t op_ret, op_errno;
- } fsetxattr_cbk;
-
- /* fgetxattr */
- struct {
- fop_fgetxattr_t fn;
- fd_t *fd;
- const char *name;
- } fgetxattr;
- struct {
- fop_fgetxattr_cbk_t fn;
- int32_t op_ret, op_errno;
- dict_t *dict;
- } fgetxattr_cbk;
-
- /* removexattr */
- struct {
- fop_removexattr_t fn;
- loc_t loc;
- const char *name;
- } removexattr;
- struct {
- fop_removexattr_cbk_t fn;
- int32_t op_ret, op_errno;
- } removexattr_cbk;
-
-
- /* fremovexattr */
- struct {
- fop_fremovexattr_t fn;
- fd_t *fd;
- const char *name;
- } fremovexattr;
- struct {
- fop_fremovexattr_cbk_t fn;
- int32_t op_ret, op_errno;
- } fremovexattr_cbk;
-
- /* lk */
- struct {
- fop_lk_t fn;
- fd_t *fd;
- int32_t cmd;
- struct gf_flock lock;
- } lk;
- struct {
- fop_lk_cbk_t fn;
- int32_t op_ret, op_errno;
- struct gf_flock lock;
- } lk_cbk;
-
- /* inodelk */
- struct {
- fop_inodelk_t fn;
- const char *volume;
- loc_t loc;
- int32_t cmd;
- struct gf_flock lock;
- } inodelk;
-
- struct {
- fop_inodelk_cbk_t fn;
- int32_t op_ret, op_errno;
- } inodelk_cbk;
-
- /* finodelk */
- struct {
- fop_finodelk_t fn;
- const char *volume;
- fd_t *fd;
- int32_t cmd;
- struct gf_flock lock;
- } finodelk;
-
- struct {
- fop_finodelk_cbk_t fn;
- int32_t op_ret, op_errno;
- } finodelk_cbk;
-
- /* entrylk */
- struct {
- fop_entrylk_t fn;
- loc_t loc;
- const char *volume;
- const char *name;
- entrylk_cmd cmd;
- entrylk_type type;
- } entrylk;
-
- struct {
- fop_entrylk_cbk_t fn;
- int32_t op_ret, op_errno;
- } entrylk_cbk;
-
- /* fentrylk */
- struct {
- fop_fentrylk_t fn;
- fd_t *fd;
- const char *volume;
- const char *name;
- entrylk_cmd cmd;
- entrylk_type type;
- } fentrylk;
-
- struct {
- fop_fentrylk_cbk_t fn;
- int32_t op_ret, op_errno;
- } fentrylk_cbk;
-
- /* readdir */
- struct {
- fop_readdir_t fn;
- fd_t *fd;
- size_t size;
- off_t off;
- } readdir;
- struct {
- fop_readdir_cbk_t fn;
- int32_t op_ret, op_errno;
- gf_dirent_t entries;
- } readdir_cbk;
-
- /* readdirp */
- struct {
- fop_readdirp_t fn;
- fd_t *fd;
- size_t size;
- off_t off;
- } readdirp;
- struct {
- fop_readdirp_cbk_t fn;
- int32_t op_ret, op_errno;
- gf_dirent_t entries;
- } readdirp_cbk;
-
- /* rchecksum */
- struct {
- fop_rchecksum_t fn;
- fd_t *fd;
- off_t offset;
- int32_t len;
- } rchecksum;
- struct {
- fop_rchecksum_cbk_t fn;
- int32_t op_ret, op_errno;
- uint32_t weak_checksum;
- uint8_t *strong_checksum;
- } rchecksum_cbk;
-
- /* xattrop */
- struct {
- fop_xattrop_t fn;
- loc_t loc;
- gf_xattrop_flags_t optype;
- dict_t *xattr;
- } xattrop;
- struct {
- fop_xattrop_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- dict_t *xattr;
- } xattrop_cbk;
-
- /* fxattrop */
- struct {
- fop_fxattrop_t fn;
- fd_t *fd;
- gf_xattrop_flags_t optype;
- dict_t *xattr;
- } fxattrop;
- struct {
- fop_fxattrop_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- dict_t *xattr;
- } fxattrop_cbk;
-
- /* setattr */
- struct {
- fop_setattr_t fn;
- loc_t loc;
- struct iatt stbuf;
- int32_t valid;
- } setattr;
- struct {
- fop_setattr_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- struct iatt statpre;
- struct iatt statpost;
- } setattr_cbk;
-
- /* fsetattr */
- struct {
- fop_fsetattr_t fn;
- fd_t *fd;
- struct iatt stbuf;
- int32_t valid;
- } fsetattr;
- struct {
- fop_fsetattr_cbk_t fn;
- int32_t op_ret;
- int32_t op_errno;
- struct iatt statpre;
- struct iatt statpost;
- } fsetattr_cbk;
-
- } args;
-} call_stub_t;
-
-call_stub_t *
-fop_lookup_stub (call_frame_t *frame,
- fop_lookup_t fn,
- loc_t *loc,
- dict_t *xdata);
-
-call_stub_t *
-fop_lookup_cbk_stub (call_frame_t *frame,
- fop_lookup_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *xdata,
- struct iatt *postparent);
-call_stub_t *
-fop_stat_stub (call_frame_t *frame,
- fop_stat_t fn,
- loc_t *loc, dict_t *xdata);
-call_stub_t *
-fop_stat_cbk_stub (call_frame_t *frame,
- fop_stat_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf, dict_t *xdata);
-call_stub_t *
-fop_fstat_stub (call_frame_t *frame,
- fop_fstat_t fn,
- fd_t *fd, dict_t *xdata);
-call_stub_t *
-fop_fstat_cbk_stub (call_frame_t *frame,
- fop_fstat_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf, dict_t *xdata);
-
-call_stub_t *
-fop_truncate_stub (call_frame_t *frame,
- fop_truncate_t fn,
- loc_t *loc,
- off_t off, dict_t *xdata);
-
-call_stub_t *
-fop_truncate_cbk_stub (call_frame_t *frame,
- fop_truncate_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-call_stub_t *
-fop_ftruncate_stub (call_frame_t *frame,
- fop_ftruncate_t fn,
- fd_t *fd,
- off_t off, dict_t *xdata);
-
-call_stub_t *
-fop_ftruncate_cbk_stub (call_frame_t *frame,
- fop_ftruncate_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-call_stub_t *
-fop_access_stub (call_frame_t *frame,
- fop_access_t fn,
- loc_t *loc,
- int32_t mask, dict_t *xdata);
-
-call_stub_t *
-fop_access_cbk_stub (call_frame_t *frame,
- fop_access_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_readlink_stub (call_frame_t *frame,
- fop_readlink_t fn,
- loc_t *loc,
- size_t size, dict_t *xdata);
-
-call_stub_t *
-fop_readlink_cbk_stub (call_frame_t *frame,
- fop_readlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *buf, dict_t *xdata);
-
-call_stub_t *
-fop_mknod_stub (call_frame_t *frame, fop_mknod_t fn, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata);
-
-call_stub_t *
-fop_mknod_cbk_stub (call_frame_t *frame,
- fop_mknod_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-call_stub_t *
-fop_mkdir_stub (call_frame_t *frame, fop_mkdir_t fn, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata);
-
-call_stub_t *
-fop_mkdir_cbk_stub (call_frame_t *frame,
- fop_mkdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-call_stub_t *
-fop_unlink_stub (call_frame_t *frame, fop_unlink_t fn,
- loc_t *loc, int xflag, dict_t *xdata);
-
-call_stub_t *
-fop_unlink_cbk_stub (call_frame_t *frame,
- fop_unlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-call_stub_t *
-fop_rmdir_stub (call_frame_t *frame, fop_rmdir_t fn,
- loc_t *loc, int flags, dict_t *xdata);
-
-call_stub_t *
-fop_rmdir_cbk_stub (call_frame_t *frame,
- fop_rmdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-call_stub_t *
-fop_symlink_stub (call_frame_t *frame, fop_symlink_t fn,
- const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata);
-
-call_stub_t *
-fop_symlink_cbk_stub (call_frame_t *frame,
- fop_symlink_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-call_stub_t *
-fop_rename_stub (call_frame_t *frame,
- fop_rename_t fn,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-call_stub_t *
-fop_rename_cbk_stub (call_frame_t *frame,
- fop_rename_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent, dict_t *xdata);
-
-call_stub_t *
-fop_link_stub (call_frame_t *frame,
- fop_link_t fn,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-call_stub_t *
-fop_link_cbk_stub (call_frame_t *frame,
- fop_link_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-call_stub_t *
-fop_create_stub (call_frame_t *frame, fop_create_t fn,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata);
-
-call_stub_t *
-fop_create_cbk_stub (call_frame_t *frame,
- fop_create_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-call_stub_t *
-fop_open_stub (call_frame_t *frame,
- fop_open_t fn,
- loc_t *loc,
- int32_t flags,
- fd_t *fd,
- dict_t *xdata);
-
-call_stub_t *
-fop_open_cbk_stub (call_frame_t *frame,
- fop_open_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd, dict_t *xdata);
-
-call_stub_t *
-fop_readv_stub (call_frame_t *frame,
- fop_readv_t fn,
- fd_t *fd,
- size_t size,
- off_t off, uint32_t flags, dict_t *xdata);
-
-call_stub_t *
-fop_readv_cbk_stub (call_frame_t *frame,
- fop_readv_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref, dict_t *xdata);
-
-call_stub_t *
-fop_writev_stub (call_frame_t *frame,
- fop_writev_t fn,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off, uint32_t flags,
- struct iobref *iobref, dict_t *xdata);
-
-call_stub_t *
-fop_writev_cbk_stub (call_frame_t *frame,
- fop_writev_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-call_stub_t *
-fop_flush_stub (call_frame_t *frame,
- fop_flush_t fn,
- fd_t *fd, dict_t *xdata);
-
-call_stub_t *
-fop_flush_cbk_stub (call_frame_t *frame,
- fop_flush_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_fsync_stub (call_frame_t *frame,
- fop_fsync_t fn,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-call_stub_t *
-fop_fsync_cbk_stub (call_frame_t *frame,
- fop_fsync_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-call_stub_t *
-fop_opendir_stub (call_frame_t *frame,
- fop_opendir_t fn,
- loc_t *loc, fd_t *fd, dict_t *xdata);
-
-call_stub_t *
-fop_opendir_cbk_stub (call_frame_t *frame,
- fop_opendir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd, dict_t *xdata);
-
-call_stub_t *
-fop_fsyncdir_stub (call_frame_t *frame,
- fop_fsyncdir_t fn,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-call_stub_t *
-fop_fsyncdir_cbk_stub (call_frame_t *frame,
- fop_fsyncdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_statfs_stub (call_frame_t *frame,
- fop_statfs_t fn,
- loc_t *loc, dict_t *xdata);
-
-call_stub_t *
-fop_statfs_cbk_stub (call_frame_t *frame,
- fop_statfs_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf, dict_t *xdata);
-
-call_stub_t *
-fop_setxattr_stub (call_frame_t *frame,
- fop_setxattr_t fn,
- loc_t *loc,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-call_stub_t *
-fop_setxattr_cbk_stub (call_frame_t *frame,
- fop_setxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_getxattr_stub (call_frame_t *frame,
- fop_getxattr_t fn,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-call_stub_t *
-fop_getxattr_cbk_stub (call_frame_t *frame,
- fop_getxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *value, dict_t *xdata);
-
-call_stub_t *
-fop_fsetxattr_stub (call_frame_t *frame,
- fop_fsetxattr_t fn,
- fd_t *fd,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-call_stub_t *
-fop_fsetxattr_cbk_stub (call_frame_t *frame,
- fop_fsetxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_fgetxattr_stub (call_frame_t *frame,
- fop_fgetxattr_t fn,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-call_stub_t *
-fop_fgetxattr_cbk_stub (call_frame_t *frame,
- fop_fgetxattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *value, dict_t *xdata);
-
-call_stub_t *
-fop_removexattr_stub (call_frame_t *frame,
- fop_removexattr_t fn,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-call_stub_t *
-fop_removexattr_cbk_stub (call_frame_t *frame,
- fop_removexattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-
-call_stub_t *
-fop_fremovexattr_stub (call_frame_t *frame,
- fop_fremovexattr_t fn,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-call_stub_t *
-fop_fremovexattr_cbk_stub (call_frame_t *frame,
- fop_fremovexattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_lk_stub (call_frame_t *frame,
- fop_lk_t fn,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *lock, dict_t *xdata);
-
-call_stub_t *
-fop_lk_cbk_stub (call_frame_t *frame,
- fop_lk_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *lock, dict_t *xdata);
-
-call_stub_t *
-fop_inodelk_stub (call_frame_t *frame, fop_inodelk_t fn,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata);
-
-call_stub_t *
-fop_finodelk_stub (call_frame_t *frame, fop_finodelk_t fn,
- const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata);
-
-call_stub_t *
-fop_entrylk_stub (call_frame_t *frame, fop_entrylk_t fn,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-call_stub_t *
-fop_fentrylk_stub (call_frame_t *frame, fop_fentrylk_t fn,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-call_stub_t *
-fop_inodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_finodelk_cbk_stub (call_frame_t *frame, fop_inodelk_cbk_t fn,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_entrylk_cbk_stub (call_frame_t *frame, fop_entrylk_cbk_t fn,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_fentrylk_cbk_stub (call_frame_t *frame, fop_entrylk_cbk_t fn,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_readdir_stub (call_frame_t *frame,
- fop_readdir_t fn,
- fd_t *fd,
- size_t size,
- off_t off, dict_t *xdata);
-
-call_stub_t *
-fop_readdirp_stub (call_frame_t *frame,
- fop_readdirp_t fn,
- fd_t *fd,
- size_t size,
- off_t off,
- dict_t *xdata);
-
-call_stub_t *
-fop_readdirp_cbk_stub (call_frame_t *frame,
- fop_readdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata);
-
-call_stub_t *
-fop_readdir_cbk_stub (call_frame_t *frame,
- fop_readdir_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata);
-
-call_stub_t *
-fop_rchecksum_stub (call_frame_t *frame,
- fop_rchecksum_t fn,
- fd_t *fd, off_t offset,
- int32_t len, dict_t *xdata);
-
-call_stub_t *
-fop_rchecksum_cbk_stub (call_frame_t *frame,
- fop_rchecksum_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- uint32_t weak_checksum,
- uint8_t *strong_checksum, dict_t *xdata);
-
-call_stub_t *
-fop_xattrop_stub (call_frame_t *frame,
- fop_xattrop_t fn,
- loc_t *loc,
- gf_xattrop_flags_t optype,
- dict_t *xattr, dict_t *xdata);
-
-call_stub_t *
-fop_xattrop_stub_cbk_stub (call_frame_t *frame,
- fop_xattrop_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_fxattrop_stub (call_frame_t *frame,
- fop_fxattrop_t fn,
- fd_t *fd,
- gf_xattrop_flags_t optype,
- dict_t *xattr, dict_t *xdata);
-
-call_stub_t *
-fop_fxattrop_stub_cbk_stub (call_frame_t *frame,
- fop_xattrop_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-call_stub_t *
-fop_setattr_stub (call_frame_t *frame,
- fop_setattr_t fn,
- loc_t *loc,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata);
-
-call_stub_t *
-fop_setattr_cbk_stub (call_frame_t *frame,
- fop_setattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata);
-
-call_stub_t *
-fop_fsetattr_stub (call_frame_t *frame,
- fop_fsetattr_t fn,
- fd_t *fd,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata);
-
-call_stub_t *
-fop_fsetattr_cbk_stub (call_frame_t *frame,
- fop_setattr_cbk_t fn,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata);
-
-void call_resume (call_stub_t *stub);
-void call_stub_destroy (call_stub_t *stub);
-#endif
diff --git a/libglusterfs/src/changelog.h b/libglusterfs/src/changelog.h
new file mode 100644
index 00000000000..a09d9f25287
--- /dev/null
+++ b/libglusterfs/src/changelog.h
@@ -0,0 +1,115 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_CHANGELOG_H
+#define _GF_CHANGELOG_H
+
+struct gf_brick_spec;
+
+/**
+ * Max bit shiter for event selection
+ */
+#define CHANGELOG_EV_SELECTION_RANGE 5
+
+#define CHANGELOG_OP_TYPE_JOURNAL (1 << 0)
+#define CHANGELOG_OP_TYPE_OPEN (1 << 1)
+#define CHANGELOG_OP_TYPE_CREATE (1 << 2)
+#define CHANGELOG_OP_TYPE_RELEASE (1 << 3)
+#define CHANGELOG_OP_TYPE_BR_RELEASE \
+ (1 << 4) /* logical release (last close()), \
+ sent by bitrot stub */
+#define CHANGELOG_OP_TYPE_MAX (1 << CHANGELOG_EV_SELECTION_RANGE)
+
+struct ev_open {
+ unsigned char gfid[16];
+ int32_t flags;
+};
+
+struct ev_creat {
+ unsigned char gfid[16];
+ int32_t flags;
+};
+
+struct ev_release {
+ unsigned char gfid[16];
+};
+
+struct ev_release_br {
+ unsigned long version;
+ unsigned char gfid[16];
+ int32_t sign_info;
+};
+
+struct ev_changelog {
+ char path[PATH_MAX];
+};
+
+typedef struct changelog_event {
+ unsigned int ev_type;
+
+ union {
+ struct ev_open open;
+ struct ev_creat create;
+ struct ev_release release;
+ struct ev_changelog journal;
+ struct ev_release_br releasebr;
+ } u;
+} changelog_event_t;
+
+#define CHANGELOG_EV_SIZE (sizeof(changelog_event_t))
+
+/**
+ * event callback, connected & disconnection defs
+ */
+typedef void(CALLBACK)(void *, char *, void *, changelog_event_t *);
+typedef void *(INIT)(void *, struct gf_brick_spec *);
+typedef void(FINI)(void *, char *, void *);
+typedef void(CONNECT)(void *, char *, void *);
+typedef void(DISCONNECT)(void *, char *, void *);
+
+struct gf_brick_spec {
+ char *brick_path;
+ unsigned int filter;
+
+ INIT *init;
+ FINI *fini;
+ CALLBACK *callback;
+ CONNECT *connected;
+ DISCONNECT *disconnected;
+
+ void *ptr;
+};
+
+/* API set */
+
+int
+gf_changelog_register(char *brick_path, char *scratch_dir, char *log_file,
+ int log_levl, int max_reconnects);
+ssize_t
+gf_changelog_scan();
+
+int
+gf_changelog_start_fresh();
+
+ssize_t
+gf_changelog_next_change(char *bufptr, size_t maxlen);
+
+int
+gf_changelog_done(char *file);
+
+/* newer flexible API */
+int
+gf_changelog_init(void *xl);
+
+int
+gf_changelog_register_generic(struct gf_brick_spec *bricks, int count,
+ int ordered, char *logfile, int lvl, void *xl);
+
+#endif
diff --git a/libglusterfs/src/checksum.c b/libglusterfs/src/checksum.c
index e14a3044c20..acdaed04ae2 100644
--- a/libglusterfs/src/checksum.c
+++ b/libglusterfs/src/checksum.c
@@ -9,57 +9,36 @@
*/
#include <openssl/md5.h>
+#include <openssl/sha.h>
+#include <zlib.h>
#include <stdint.h>
-
-#include "glusterfs.h"
+#include <string.h>
/*
- * The "weak" checksum required for the rsync algorithm,
- * adapted from the rsync source code. The following comment
- * appears there:
- *
- * "a simple 32 bit checksum that can be upadted from either end
- * (inspired by Mark Adler's Adler-32 checksum)"
+ * The "weak" checksum required for the rsync algorithm.
*
* Note: these functions are only called to compute checksums on
* pathnames; they don't need to handle arbitrarily long strings of
* data. Thus int32_t and uint32_t are sufficient
*/
-
uint32_t
-gf_rsync_weak_checksum (unsigned char *buf, size_t len)
+gf_rsync_weak_checksum(unsigned char *buf, size_t len)
{
- int32_t i = 0;
- uint32_t s1, s2;
-
- uint32_t csum;
-
- s1 = s2 = 0;
- if (len >= 4) {
- for (; i < (len-4); i+=4) {
- s2 += 4*(s1 + buf[i]) + 3*buf[i+1] + 2*buf[i+2] + buf[i+3];
- s1 += buf[i+0] + buf[i+1] + buf[i+2] + buf[i+3];
- }
- }
-
- for (; i < len; i++) {
- s1 += buf[i];
- s2 += s1;
- }
-
- csum = (s1 & 0xffff) + (s2 << 16);
-
- return csum;
+ return adler32(0, buf, len);
}
-
/*
- * The "strong" checksum required for the rsync algorithm,
- * adapted from the rsync source code.
+ * The "strong" checksum required for the rsync algorithm.
*/
+void
+gf_rsync_strong_checksum(unsigned char *data, size_t len,
+ unsigned char *sha256_md)
+{
+ SHA256((const unsigned char *)data, len, sha256_md);
+}
void
-gf_rsync_strong_checksum (unsigned char *data, size_t len, unsigned char *md5)
+gf_rsync_md5_checksum(unsigned char *data, size_t len, unsigned char *md5)
{
- MD5(data, len, md5);
+ MD5(data, len, md5);
}
diff --git a/libglusterfs/src/circ-buff.c b/libglusterfs/src/circ-buff.c
index 6c7907a095c..913115c7be1 100644
--- a/libglusterfs/src/circ-buff.c
+++ b/libglusterfs/src/circ-buff.c
@@ -8,163 +8,186 @@
cases as published by the Free Software Foundation.
*/
-#include "circ-buff.h"
+#include "glusterfs/circ-buff.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+void
+cb_destroy_data(circular_buffer_t *cb, void (*destroy_buffer_data)(void *data))
+{
+ if (destroy_buffer_data)
+ destroy_buffer_data(cb->data);
+ GF_FREE(cb->data);
+ return;
+}
/* hold lock while calling this function */
int
-__cb_add_entry_buffer (buffer_t *buffer, void *item)
+__cb_add_entry_buffer(buffer_t *buffer, void *item)
{
- circular_buffer_t *ptr = NULL;
- int ret = -1;
- //DO we really need the assert here?
- GF_ASSERT (buffer->used_len <= buffer->size_buffer);
-
- if (buffer->use_once == _gf_true &&
- buffer->used_len == buffer->size_buffer) {
- gf_log ("", GF_LOG_WARNING, "buffer %p is use once buffer",
- buffer);
- return -1;
- } else {
- if (buffer->used_len == buffer->size_buffer) {
- if (buffer->cb[buffer->w_index]) {
- ptr = buffer->cb[buffer->w_index];
- if (ptr->data) {
- GF_FREE (ptr->data);
- ptr->data = NULL;
- GF_FREE (ptr);
- }
- buffer->cb[buffer->w_index] = NULL;
- ptr = NULL;
- }
+ circular_buffer_t *ptr = NULL;
+ int ret = -1;
+ // DO we really need the assert here?
+ GF_ASSERT(buffer->used_len <= buffer->size_buffer);
+
+ if (buffer->use_once == _gf_true &&
+ buffer->used_len == buffer->size_buffer) {
+ gf_msg("circ-buff", GF_LOG_WARNING, 0, LG_MSG_BUFFER_ERROR,
+ "buffer %p is use once buffer", buffer);
+ return -1;
+ } else {
+ if (buffer->used_len == buffer->size_buffer) {
+ if (buffer->cb[buffer->w_index]) {
+ ptr = buffer->cb[buffer->w_index];
+ if (ptr->data) {
+ cb_destroy_data(ptr, buffer->destroy_buffer_data);
+ ptr->data = NULL;
+ GF_FREE(ptr);
}
-
- buffer->cb[buffer->w_index] =
- GF_CALLOC (1, sizeof (circular_buffer_t),
- gf_common_mt_circular_buffer_t);
- if (!buffer->cb[buffer->w_index])
- return -1;
-
- buffer->cb[buffer->w_index]->data = item;
- ret = gettimeofday (&buffer->cb[buffer->w_index]->tv, NULL);
- if (ret == -1)
- gf_log_callingfn ("", GF_LOG_WARNING, "getting time of"
- "the day failed");
- buffer->w_index++;
- buffer->w_index %= buffer->size_buffer - 1;
- //used_buffer size cannot be greater than the total buffer size
-
- if (buffer->used_len < buffer->size_buffer)
- buffer->used_len++;
- return buffer->w_index;
+ buffer->cb[buffer->w_index] = NULL;
+ ptr = NULL;
+ }
}
+
+ buffer->cb[buffer->w_index] = GF_CALLOC(1, sizeof(circular_buffer_t),
+ gf_common_mt_circular_buffer_t);
+ if (!buffer->cb[buffer->w_index])
+ return -1;
+
+ buffer->cb[buffer->w_index]->data = item;
+ ret = gettimeofday(&buffer->cb[buffer->w_index]->tv, NULL);
+ if (ret == -1)
+ gf_msg_callingfn("circ-buff", GF_LOG_WARNING, 0,
+ LG_MSG_GETTIMEOFDAY_FAILED,
+ "getting time of the day failed");
+ buffer->w_index++;
+ buffer->w_index %= buffer->size_buffer;
+ // used_buffer size cannot be greater than the total buffer size
+
+ if (buffer->used_len < buffer->size_buffer)
+ buffer->used_len++;
+ return buffer->w_index;
+ }
}
int
-cb_add_entry_buffer (buffer_t *buffer, void *item)
+cb_add_entry_buffer(buffer_t *buffer, void *item)
{
- int write_index = -1;
+ int write_index = -1;
- pthread_mutex_lock (&buffer->lock);
- {
- write_index = __cb_add_entry_buffer (buffer, item);
- }
- pthread_mutex_unlock (&buffer->lock);
+ pthread_mutex_lock(&buffer->lock);
+ {
+ write_index = __cb_add_entry_buffer(buffer, item);
+ }
+ pthread_mutex_unlock(&buffer->lock);
- return write_index;
+ return write_index;
}
void
-cb_buffer_show (buffer_t *buffer)
+cb_buffer_show(buffer_t *buffer)
{
- pthread_mutex_lock (&buffer->lock);
- {
- gf_log ("", GF_LOG_DEBUG, "w_index: %d, size: %"GF_PRI_SIZET
- " used_buffer: %d", buffer->w_index,
- buffer->size_buffer,
- buffer->used_len);
- }
- pthread_mutex_unlock (&buffer->lock);
+ pthread_mutex_lock(&buffer->lock);
+ {
+ gf_msg_debug("circ-buff", 0,
+ "w_index: %d, size: %" GF_PRI_SIZET " used_buffer: %d",
+ buffer->w_index, buffer->size_buffer, buffer->used_len);
+ }
+ pthread_mutex_unlock(&buffer->lock);
}
void
-cb_buffer_dump (buffer_t *buffer, void *data,
- int (fn) (circular_buffer_t *buffer, void *data))
+cb_buffer_dump(buffer_t *buffer, void *data,
+ int(fn)(circular_buffer_t *buffer, void *data))
{
- int i = 0;
- circular_buffer_t *entry = NULL;
- int entries = 0;
-
- pthread_mutex_lock (&buffer->lock);
- {
- if (buffer->use_once == _gf_false) {
- for (i = (buffer->w_index - 1) ; entries <
- buffer->used_len ; entries++) {
- entry = buffer->cb[i];
- if (entry)
- fn (entry, data);
- if (0 == i)
- i = buffer->used_len - 1;
- else
- i = (i - 1) % (buffer->used_len - 1);
- }
- } else {
- for (i = 0; i < buffer->used_len ; i++) {
- entry = buffer->cb[i];
- fn (entry, data);
- }
- }
+ int index = 0;
+ circular_buffer_t *entry = NULL;
+ int entries = 0;
+ int ul = 0;
+ int w_ind = 0;
+ int size_buff = 0;
+ int i = 0;
+
+ ul = buffer->used_len;
+ w_ind = buffer->w_index;
+ size_buff = buffer->size_buffer;
+
+ pthread_mutex_lock(&buffer->lock);
+ {
+ if (buffer->use_once == _gf_false) {
+ index = (size_buff + (w_ind - ul)) % size_buff;
+ for (entries = 0; entries < buffer->used_len; entries++) {
+ entry = buffer->cb[index];
+ if (entry)
+ fn(entry, data);
+ else
+ gf_msg_callingfn("circ-buff", GF_LOG_WARNING, 0,
+ LG_MSG_NULL_PTR,
+ "Null entry in "
+ "circular buffer at "
+ "index %d.",
+ index);
+
+ index++;
+ index %= buffer->size_buffer;
+ }
+ } else {
+ for (i = 0; i < buffer->used_len; i++) {
+ entry = buffer->cb[i];
+ fn(entry, data);
+ }
}
- pthread_mutex_unlock (&buffer->lock);
+ }
+ pthread_mutex_unlock(&buffer->lock);
}
buffer_t *
-cb_buffer_new (size_t buffer_size, gf_boolean_t use_once)
+cb_buffer_new(size_t buffer_size, gf_boolean_t use_once,
+ void (*destroy_buffer_data)(void *data))
{
- buffer_t *buffer = NULL;
-
- buffer = GF_CALLOC (1, sizeof (*buffer), gf_common_mt_buffer_t);
- if (!buffer) {
- gf_log ("", GF_LOG_ERROR, "could not allocate the "
- "buffer");
- goto out;
- }
-
- buffer->cb = GF_CALLOC (buffer_size,
- sizeof (circular_buffer_t *),
- gf_common_mt_circular_buffer_t);
- if (!buffer->cb) {
- gf_log ("", GF_LOG_ERROR, "could not allocate the "
- "memory for the circular buffer");
- GF_FREE (buffer);
- buffer = NULL;
- goto out;
- }
-
- buffer->w_index = 0;
- buffer->size_buffer = buffer_size;
- buffer->use_once = use_once;
- buffer->used_len = 0;
- pthread_mutex_init (&buffer->lock, NULL);
+ buffer_t *buffer = NULL;
+
+ buffer = GF_CALLOC(1, sizeof(*buffer), gf_common_mt_buffer_t);
+ if (!buffer) {
+ goto out;
+ }
+
+ buffer->cb = GF_CALLOC(buffer_size, sizeof(circular_buffer_t *),
+ gf_common_mt_circular_buffer_t);
+ if (!buffer->cb) {
+ GF_FREE(buffer);
+ buffer = NULL;
+ goto out;
+ }
+
+ buffer->w_index = 0;
+ buffer->size_buffer = buffer_size;
+ buffer->use_once = use_once;
+ buffer->used_len = 0;
+ buffer->destroy_buffer_data = destroy_buffer_data;
+ pthread_mutex_init(&buffer->lock, NULL);
out:
- return buffer;
+ return buffer;
}
void
-cb_buffer_destroy (buffer_t *buffer)
+cb_buffer_destroy(buffer_t *buffer)
{
- int i = 0;
-
- if (buffer) {
- if (buffer->cb) {
- for (i = 0; i < buffer->used_len ; i++) {
- if (buffer->cb[i])
- GF_FREE (buffer->cb[i]);
- }
- GF_FREE (buffer->cb);
+ int i = 0;
+ circular_buffer_t *ptr = NULL;
+ if (buffer) {
+ if (buffer->cb) {
+ for (i = 0; i < buffer->used_len; i++) {
+ ptr = buffer->cb[i];
+ if (ptr->data) {
+ cb_destroy_data(ptr, buffer->destroy_buffer_data);
+ ptr->data = NULL;
+ GF_FREE(ptr);
}
- pthread_mutex_destroy (&buffer->lock);
- GF_FREE (buffer);
+ }
+ GF_FREE(buffer->cb);
}
+ pthread_mutex_destroy(&buffer->lock);
+ GF_FREE(buffer);
+ }
}
-
diff --git a/libglusterfs/src/circ-buff.h b/libglusterfs/src/circ-buff.h
deleted file mode 100644
index 5b5acc387bf..00000000000
--- a/libglusterfs/src/circ-buff.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _CB_H
-#define _CB_H
-
-#include "common-utils.h"
-#include "logging.h"
-#include "mem-types.h"
-
-#define BUFFER_SIZE 10
-#define TOTAL_SIZE BUFFER_SIZE + 1
-
-
-struct _circular_buffer {
- struct timeval tv;
- void *data;
-};
-
-typedef struct _circular_buffer circular_buffer_t;
-
-struct _buffer {
- unsigned int w_index;
- size_t size_buffer;
- gf_boolean_t use_once;
- /* This variable is assigned the proper value at the time of initing */
- /* the buffer. It indicates, whether the buffer should be used once */
- /* it becomes full. */
-
- int used_len;
- /* indicates the amount of circular buffer used. */
-
- circular_buffer_t **cb;
-
- pthread_mutex_t lock;
-};
-
-typedef struct _buffer buffer_t;
-
-int
-cb_add_entry_buffer (buffer_t *buffer, void *item);
-
-void
-cb_buffer_show (buffer_t *buffer);
-
-buffer_t *
-cb_buffer_new (size_t buffer_size,gf_boolean_t use_buffer_once);
-
-void
-cb_buffer_destroy (buffer_t *buffer);
-
-void
-cb_buffer_dump (buffer_t *buffer, void *data,
- int (fn) (circular_buffer_t *buffer, void *data));
-
-#endif /* _CB_H */
diff --git a/libglusterfs/src/client_t.c b/libglusterfs/src/client_t.c
new file mode 100644
index 00000000000..9d377c3c2e1
--- /dev/null
+++ b/libglusterfs/src/client_t.c
@@ -0,0 +1,825 @@
+/*
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/dict.h"
+#include "glusterfs/statedump.h"
+#include "glusterfs/client_t.h"
+#include "glusterfs/list.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+static int
+gf_client_chain_client_entries(cliententry_t *entries, uint32_t startidx,
+ uint32_t endcount)
+{
+ uint32_t i = 0;
+
+ if (!entries) {
+ gf_msg_callingfn("client_t", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!entries");
+ return -1;
+ }
+
+ /* Chain only till the second to last entry because we want to
+ * ensure that the last entry has GF_CLIENTTABLE_END.
+ */
+ for (i = startidx; i < (endcount - 1); i++)
+ entries[i].next_free = i + 1;
+
+ /* i has already been incremented up to the last entry. */
+ entries[i].next_free = GF_CLIENTTABLE_END;
+
+ return 0;
+}
+
+static int
+gf_client_clienttable_expand(clienttable_t *clienttable, uint32_t nr)
+{
+ cliententry_t *oldclients = NULL;
+ uint32_t oldmax_clients = -1;
+ int ret = -1;
+
+ if (clienttable == NULL || nr <= clienttable->max_clients) {
+ gf_msg_callingfn("client_t", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ ret = EINVAL;
+ goto out;
+ }
+
+ oldclients = clienttable->cliententries;
+ oldmax_clients = clienttable->max_clients;
+
+ clienttable->cliententries = GF_CALLOC(nr, sizeof(cliententry_t),
+ gf_common_mt_cliententry_t);
+ if (!clienttable->cliententries) {
+ clienttable->cliententries = oldclients;
+ ret = 0;
+ goto out;
+ }
+ clienttable->max_clients = nr;
+
+ if (oldclients) {
+ uint32_t cpy = oldmax_clients * sizeof(cliententry_t);
+ memcpy(clienttable->cliententries, oldclients, cpy);
+ }
+
+ gf_client_chain_client_entries(clienttable->cliententries, oldmax_clients,
+ clienttable->max_clients);
+
+ /* Now that expansion is done, we must update the client list
+ * head pointer so that the client allocation functions can continue
+ * using the expanded table.
+ */
+ clienttable->first_free = oldmax_clients;
+ GF_FREE(oldclients);
+ ret = 0;
+out:
+ return ret;
+}
+
+clienttable_t *
+gf_clienttable_alloc(void)
+{
+ clienttable_t *clienttable = NULL;
+ int result = 0;
+
+ clienttable = GF_CALLOC(1, sizeof(clienttable_t),
+ gf_common_mt_clienttable_t);
+ if (!clienttable)
+ return NULL;
+
+ LOCK_INIT(&clienttable->lock);
+
+ result = gf_client_clienttable_expand(clienttable,
+ GF_CLIENTTABLE_INITIAL_SIZE);
+ if (result != 0) {
+ gf_msg("client_t", GF_LOG_ERROR, 0, LG_MSG_EXPAND_CLIENT_TABLE_FAILED,
+ "gf_client_clienttable_expand failed");
+ GF_FREE(clienttable);
+ return NULL;
+ }
+
+ return clienttable;
+}
+
+/*
+ * Increments ref.bind if the client is already present or creates a new
+ * client with ref.bind = 1,ref.count = 1 it signifies that
+ * as long as ref.bind is > 0 client should be alive.
+ */
+client_t *
+gf_client_get(xlator_t *this, client_auth_data_t *cred, char *client_uid,
+ char *subdir_mount)
+{
+ client_t *client = NULL;
+ cliententry_t *cliententry = NULL;
+ clienttable_t *clienttable = NULL;
+ unsigned int i = 0;
+
+ if (this == NULL || client_uid == NULL) {
+ gf_msg_callingfn("client_t", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ errno = EINVAL;
+ return NULL;
+ }
+
+ clienttable = this->ctx->clienttable;
+
+ LOCK(&clienttable->lock);
+ {
+ for (; i < clienttable->max_clients; i++) {
+ client = clienttable->cliententries[i].client;
+ if (client == NULL)
+ continue;
+ /*
+ * look for matching client_uid, _and_
+ * if auth was used, matching auth flavour and data
+ */
+ if (strcmp(client_uid, client->client_uid) == 0 &&
+ (cred->flavour && (cred->flavour == client->auth.flavour &&
+ (size_t)cred->datalen == client->auth.len &&
+ memcmp(cred->authdata, client->auth.data,
+ client->auth.len) == 0))) {
+ GF_ATOMIC_INC(client->bind);
+ goto unlock;
+ }
+ }
+
+ client = GF_CALLOC(1, sizeof(client_t), gf_common_mt_client_t);
+ if (client == NULL) {
+ errno = ENOMEM;
+ goto unlock;
+ }
+
+ client->this = this;
+ if (subdir_mount != NULL)
+ client->subdir_mount = gf_strdup(subdir_mount);
+
+ LOCK_INIT(&client->scratch_ctx.lock);
+
+ client->client_uid = gf_strdup(client_uid);
+ if (client->client_uid == NULL) {
+ GF_FREE(client);
+ client = NULL;
+ errno = ENOMEM;
+ goto unlock;
+ }
+ client->scratch_ctx.count = GF_CLIENTCTX_INITIAL_SIZE;
+ client->scratch_ctx.ctx = GF_CALLOC(GF_CLIENTCTX_INITIAL_SIZE,
+ sizeof(struct client_ctx),
+ gf_common_mt_client_ctx);
+ if (client->scratch_ctx.ctx == NULL) {
+ GF_FREE(client->client_uid);
+ GF_FREE(client);
+ client = NULL;
+ errno = ENOMEM;
+ goto unlock;
+ }
+
+ GF_ATOMIC_INIT(client->bind, 1);
+ GF_ATOMIC_INIT(client->count, 1);
+ GF_ATOMIC_INIT(client->fd_cnt, 0);
+
+ client->auth.flavour = cred->flavour;
+ if (cred->flavour) {
+ client->auth.data = GF_MALLOC(cred->datalen, gf_common_mt_client_t);
+ if (client->auth.data == NULL) {
+ GF_FREE(client->scratch_ctx.ctx);
+ GF_FREE(client->client_uid);
+ GF_FREE(client);
+ client = NULL;
+ errno = ENOMEM;
+ goto unlock;
+ }
+ memcpy(client->auth.data, cred->authdata, cred->datalen);
+ client->auth.len = cred->datalen;
+ }
+
+ client->tbl_index = clienttable->first_free;
+ cliententry = &clienttable->cliententries[clienttable->first_free];
+ if (cliententry->next_free == GF_CLIENTTABLE_END) {
+ int result = gf_client_clienttable_expand(
+ clienttable,
+ clienttable->max_clients + GF_CLIENTTABLE_INITIAL_SIZE);
+ if (result != 0) {
+ GF_FREE(client->scratch_ctx.ctx);
+ GF_FREE(client->client_uid);
+ GF_FREE(client);
+ client = NULL;
+ errno = result;
+ goto unlock;
+ }
+ cliententry = &clienttable->cliententries[client->tbl_index];
+ cliententry->next_free = clienttable->first_free;
+ }
+ cliententry->client = client;
+ clienttable->first_free = cliententry->next_free;
+ cliententry->next_free = GF_CLIENTENTRY_ALLOCATED;
+ }
+unlock:
+ UNLOCK(&clienttable->lock);
+
+ if (client)
+ gf_msg_callingfn("client_t", GF_LOG_DEBUG, 0, LG_MSG_BIND_REF,
+ "%s: bind_ref: %" GF_PRI_ATOMIC
+ ", ref: "
+ "%" GF_PRI_ATOMIC,
+ client->client_uid, GF_ATOMIC_GET(client->bind),
+ GF_ATOMIC_GET(client->count));
+ return client;
+}
+
+void
+gf_client_put(client_t *client, gf_boolean_t *detached)
+{
+ gf_boolean_t unref = _gf_false;
+ int bind_ref;
+
+ if (client == NULL)
+ goto out;
+
+ if (detached)
+ *detached = _gf_false;
+
+ bind_ref = GF_ATOMIC_DEC(client->bind);
+ if (bind_ref == 0)
+ unref = _gf_true;
+
+ gf_msg_callingfn("client_t", GF_LOG_DEBUG, 0, LG_MSG_BIND_REF,
+ "%s: "
+ "bind_ref: %" GF_PRI_ATOMIC ", ref: %" GF_PRI_ATOMIC
+ ", "
+ "unref: %d",
+ client->client_uid, GF_ATOMIC_GET(client->bind),
+ GF_ATOMIC_GET(client->count), unref);
+ if (unref) {
+ if (detached)
+ *detached = _gf_true;
+ gf_client_unref(client);
+ }
+
+out:
+ return;
+}
+
+client_t *
+gf_client_ref(client_t *client)
+{
+ if (!client) {
+ gf_msg_callingfn("client_t", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "null client");
+ return NULL;
+ }
+
+ GF_ATOMIC_INC(client->count);
+ gf_msg_callingfn("client_t", GF_LOG_DEBUG, 0, LG_MSG_REF_COUNT,
+ "%s: "
+ "ref-count %" GF_PRI_ATOMIC,
+ client->client_uid, GF_ATOMIC_GET(client->count));
+ return client;
+}
+
+static void
+gf_client_destroy_recursive(xlator_t *xl, client_t *client)
+{
+ xlator_list_t *trav;
+
+ if (!xl->call_cleanup && xl->cbks->client_destroy) {
+ xl->cbks->client_destroy(xl, client);
+ }
+
+ for (trav = xl->children; trav; trav = trav->next) {
+ gf_client_destroy_recursive(trav->xlator, client);
+ }
+}
+
+static void
+client_destroy(client_t *client)
+{
+ clienttable_t *clienttable = NULL;
+ glusterfs_graph_t *gtrav = NULL;
+
+ if (client == NULL) {
+ gf_msg_callingfn("xlator", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ goto out;
+ }
+
+ clienttable = client->this->ctx->clienttable;
+
+ LOCK(&clienttable->lock);
+ {
+ clienttable->cliententries[client->tbl_index].client = NULL;
+ clienttable->cliententries[client->tbl_index]
+ .next_free = clienttable->first_free;
+ clienttable->first_free = client->tbl_index;
+ }
+ UNLOCK(&clienttable->lock);
+
+ list_for_each_entry(gtrav, &client->this->ctx->graphs, list)
+ {
+ gf_client_destroy_recursive(gtrav->top, client);
+ }
+
+ if (client->subdir_inode)
+ inode_unref(client->subdir_inode);
+
+ LOCK_DESTROY(&client->scratch_ctx.lock);
+
+ GF_FREE(client->auth.data);
+ GF_FREE(client->auth.username);
+ GF_FREE(client->auth.passwd);
+ GF_FREE(client->scratch_ctx.ctx);
+ GF_FREE(client->client_uid);
+ GF_FREE(client->subdir_mount);
+ GF_FREE(client->client_name);
+ GF_FREE(client);
+out:
+ return;
+}
+
+static int
+gf_client_disconnect_recursive(xlator_t *xl, client_t *client)
+{
+ int ret = 0;
+ xlator_list_t *trav;
+
+ if (!xl->call_cleanup && xl->cbks->client_disconnect) {
+ ret = xl->cbks->client_disconnect(xl, client);
+ }
+
+ for (trav = xl->children; trav; trav = trav->next) {
+ ret |= gf_client_disconnect_recursive(trav->xlator, client);
+ }
+
+ return ret;
+}
+
+int
+gf_client_disconnect(client_t *client)
+{
+ int ret = 0;
+ glusterfs_graph_t *gtrav = NULL;
+
+ list_for_each_entry(gtrav, &client->this->ctx->graphs, list)
+ {
+ ret |= gf_client_disconnect_recursive(gtrav->top, client);
+ }
+
+ return ret;
+}
+
+void
+gf_client_unref(client_t *client)
+{
+ uint64_t refcount;
+
+ if (!client) {
+ gf_msg_callingfn("client_t", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "client is NULL");
+ return;
+ }
+
+ refcount = GF_ATOMIC_DEC(client->count);
+ gf_msg_callingfn("client_t", GF_LOG_DEBUG, 0, LG_MSG_REF_COUNT,
+ "%s: "
+ "ref-count %" GF_PRI_ATOMIC,
+ client->client_uid, refcount);
+ if (refcount == 0) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, LG_MSG_DISCONNECT_CLIENT,
+ "Shutting down connection %s", client->client_uid);
+ client_destroy(client);
+ }
+}
+
+static int
+__client_ctx_get_int(client_t *client, void *key, void **value)
+{
+ int index = 0;
+ int ret = 0;
+
+ for (index = 0; index < client->scratch_ctx.count; index++) {
+ if (client->scratch_ctx.ctx[index].ctx_key == key)
+ break;
+ }
+
+ if (index == client->scratch_ctx.count) {
+ ret = -1;
+ goto out;
+ }
+
+ if (value)
+ *value = client->scratch_ctx.ctx[index].ctx_value;
+
+out:
+ return ret;
+}
+
+static int
+__client_ctx_set_int(client_t *client, void *key, void *value)
+{
+ int index = 0;
+ int ret = 0;
+ int set_idx = -1;
+
+ for (index = 0; index < client->scratch_ctx.count; index++) {
+ if (!client->scratch_ctx.ctx[index].ctx_key) {
+ if (set_idx == -1)
+ set_idx = index;
+ /* don't break, to check if key already exists
+ further on */
+ }
+ if (client->scratch_ctx.ctx[index].ctx_key == key) {
+ set_idx = index;
+ break;
+ }
+ }
+
+ if (set_idx == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ client->scratch_ctx.ctx[set_idx].ctx_key = key;
+ client->scratch_ctx.ctx[set_idx].ctx_value = value;
+
+out:
+ return ret;
+}
+
+/*will return success with old value if exist*/
+void *
+client_ctx_set(client_t *client, void *key, void *value)
+{
+ int ret = 0;
+ void *ret_value = NULL;
+
+ if (!client || !key || !value)
+ return NULL;
+
+ LOCK(&client->scratch_ctx.lock);
+ {
+ ret = __client_ctx_get_int(client, key, &ret_value);
+ if (!ret && ret_value) {
+ UNLOCK(&client->scratch_ctx.lock);
+ return ret_value;
+ }
+
+ ret = __client_ctx_set_int(client, key, value);
+ }
+ UNLOCK(&client->scratch_ctx.lock);
+
+ if (ret)
+ return NULL;
+ return value;
+}
+
+int
+client_ctx_get(client_t *client, void *key, void **value)
+{
+ int ret = 0;
+
+ if (!client || !key)
+ return -1;
+
+ LOCK(&client->scratch_ctx.lock);
+ {
+ ret = __client_ctx_get_int(client, key, value);
+ }
+ UNLOCK(&client->scratch_ctx.lock);
+
+ return ret;
+}
+
+static int
+__client_ctx_del_int(client_t *client, void *key, void **value)
+{
+ int index = 0;
+ int ret = 0;
+
+ for (index = 0; index < client->scratch_ctx.count; index++) {
+ if (client->scratch_ctx.ctx[index].ctx_key == key)
+ break;
+ }
+
+ if (index == client->scratch_ctx.count) {
+ ret = -1;
+ goto out;
+ }
+
+ if (value)
+ *value = client->scratch_ctx.ctx[index].ctx_value;
+
+ client->scratch_ctx.ctx[index].ctx_key = 0;
+ client->scratch_ctx.ctx[index].ctx_value = 0;
+
+out:
+ return ret;
+}
+
+int
+client_ctx_del(client_t *client, void *key, void **value)
+{
+ int ret = 0;
+
+ if (!client || !key)
+ return -1;
+
+ LOCK(&client->scratch_ctx.lock);
+ {
+ ret = __client_ctx_del_int(client, key, value);
+ }
+ UNLOCK(&client->scratch_ctx.lock);
+
+ return ret;
+}
+
+void
+client_ctx_dump(client_t *client, char *prefix)
+{
+#if 0 /* TBD, FIXME */
+ struct client_ctx *client_ctx = NULL;
+ xlator_t *xl = NULL;
+ int i = 0;
+
+ if ((client == NULL) || (client->ctx == NULL)) {
+ goto out;
+ }
+
+ LOCK (&client->ctx_lock);
+ if (client->ctx != NULL) {
+ client_ctx = GF_CALLOC (client->inode->table->xl->graph->ctx_count,
+ sizeof (*client_ctx),
+ gf_common_mt_client_ctx);
+ if (client_ctx == NULL) {
+ goto unlock;
+ }
+
+ for (i = 0; i < client->inode->table->xl->graph->ctx_count; i++) {
+ client_ctx[i] = client->ctx[i];
+ }
+ }
+unlock:
+ UNLOCK (&client->ctx_lock);
+
+ if (client_ctx == NULL) {
+ goto out;
+ }
+
+ for (i = 0; i < client->inode->table->xl->graph->ctx_count; i++) {
+ if (client_ctx[i].xl_key) {
+ xl = (xlator_t *)(long)client_ctx[i].xl_key;
+ if (xl->dumpops && xl->dumpops->clientctx)
+ xl->dumpops->clientctx (xl, client);
+ }
+ }
+out:
+ GF_FREE (client_ctx);
+#endif
+}
+
+/*
+ * the following functions are here to preserve legacy behavior of the
+ * protocol/server xlator dump, but perhaps they should just be folded
+ * into the client dump instead?
+ */
+int
+gf_client_dump_fdtables_to_dict(xlator_t *this, dict_t *dict)
+{
+ clienttable_t *clienttable = NULL;
+ int count = 0;
+ int ret = -1;
+#ifdef NOTYET
+ client_t *client = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+#endif
+
+ GF_VALIDATE_OR_GOTO(THIS->name, this, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+ clienttable = this->ctx->clienttable;
+
+ if (!clienttable)
+ return -1;
+
+#ifdef NOTYET
+ ret = TRY_LOCK(&clienttable->lock);
+ {
+ if (ret) {
+ gf_msg("client_t", GF_LOG_WARNING, 0, LG_MSG_LOCK_FAILED,
+ "Unable to acquire lock");
+ return -1;
+ }
+ for (; count < clienttable->max_clients; count++) {
+ if (GF_CLIENTENTRY_ALLOCATED !=
+ clienttable->cliententries[count].next_free)
+ continue;
+ client = clienttable->cliententries[count].client;
+ if (client->bound_xl &&
+ !strcmp(client->bound_xl->name, this->name)) {
+ snprintf(key, sizeof(key), "conn%d", count++);
+ fdtable_dump_to_dict(client->server_ctx.fdtable, key, dict);
+ }
+ }
+ }
+ UNLOCK(&clienttable->lock);
+#endif
+
+ ret = dict_set_int32(dict, "conncount", count);
+out:
+ return ret;
+}
+
+int
+gf_client_dump_fdtables(xlator_t *this)
+{
+ client_t *client = NULL;
+ clienttable_t *clienttable = NULL;
+ int count = 1;
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO(THIS->name, this, out);
+
+ clienttable = this->ctx->clienttable;
+
+ if (!clienttable)
+ return -1;
+
+ ret = TRY_LOCK(&clienttable->lock);
+ {
+ if (ret) {
+ gf_msg("client_t", GF_LOG_WARNING, 0, LG_MSG_LOCK_FAILED,
+ "Unable to acquire lock");
+ return -1;
+ }
+
+ for (; count < clienttable->max_clients; count++) {
+ if (GF_CLIENTENTRY_ALLOCATED !=
+ clienttable->cliententries[count].next_free)
+ continue;
+ client = clienttable->cliententries[count].client;
+ if (client->client_uid) {
+ gf_proc_dump_build_key(key, "conn", "%d.id", count);
+ gf_proc_dump_write(key, "%s", client->client_uid);
+ }
+
+ if (client->subdir_mount) {
+ gf_proc_dump_build_key(key, "conn", "%d.subdir", count);
+ gf_proc_dump_write(key, "%s", client->subdir_mount);
+ }
+ gf_proc_dump_build_key(key, "conn", "%d.ref", count);
+ gf_proc_dump_write(key, "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(client->count));
+ if (client->bound_xl) {
+ gf_proc_dump_build_key(key, "conn", "%d.bound_xl", count);
+ gf_proc_dump_write(key, "%s", client->bound_xl->name);
+ }
+
+#ifdef NOTYET
+ gf_proc_dump_build_key(key, "conn", "%d.id", count);
+ fdtable_dump(client->server_ctx.fdtable, key);
+#endif
+ }
+ }
+
+ UNLOCK(&clienttable->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+gf_client_dump_inodes_to_dict(xlator_t *this, dict_t *dict)
+{
+ client_t *client = NULL;
+ clienttable_t *clienttable = NULL;
+ xlator_t *prev_bound_xl = NULL;
+ char key[32] = {
+ 0,
+ };
+ int count = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, this, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+ clienttable = this->ctx->clienttable;
+
+ if (!clienttable)
+ return -1;
+
+ ret = LOCK(&clienttable->lock);
+ {
+ if (ret) {
+ gf_msg("client_t", GF_LOG_WARNING, 0, LG_MSG_LOCK_FAILED,
+ "Unable to acquire lock");
+ return -1;
+ }
+ for (; count < clienttable->max_clients; count++) {
+ if (GF_CLIENTENTRY_ALLOCATED !=
+ clienttable->cliententries[count].next_free)
+ continue;
+ client = clienttable->cliententries[count].client;
+ if (!strcmp(client->bound_xl->name, this->name)) {
+ if (client->bound_xl && client->bound_xl->itable) {
+ /* Presently every brick contains only
+ * one bound_xl for all connections.
+ * This will lead to duplicating of
+ * the inode lists, if listing is
+ * done for every connection. This
+ * simple check prevents duplication
+ * in the present case. If need arises
+ * the check can be improved.
+ */
+ if (client->bound_xl == prev_bound_xl)
+ continue;
+ prev_bound_xl = client->bound_xl;
+
+ snprintf(key, sizeof(key), "conn%d", count);
+ inode_table_dump_to_dict(client->bound_xl->itable, key,
+ dict);
+ }
+ }
+ }
+ }
+ UNLOCK(&clienttable->lock);
+
+ ret = dict_set_int32(dict, "conncount", count);
+
+out:
+ if (prev_bound_xl)
+ prev_bound_xl = NULL;
+ return ret;
+}
+
+int
+gf_client_dump_inodes(xlator_t *this)
+{
+ client_t *client = NULL;
+ clienttable_t *clienttable = NULL;
+ xlator_t *prev_bound_xl = NULL;
+ int count = 0;
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO(THIS->name, this, out);
+
+ clienttable = this->ctx->clienttable;
+
+ if (!clienttable)
+ goto out;
+
+ ret = TRY_LOCK(&clienttable->lock);
+ {
+ if (ret) {
+ gf_msg("client_t", GF_LOG_WARNING, 0, LG_MSG_LOCK_FAILED,
+ "Unable to acquire lock");
+ goto out;
+ }
+
+ for (; count < clienttable->max_clients; count++) {
+ if (GF_CLIENTENTRY_ALLOCATED !=
+ clienttable->cliententries[count].next_free)
+ continue;
+ client = clienttable->cliententries[count].client;
+ if (client->bound_xl && client->bound_xl->itable) {
+ /* Presently every brick contains only
+ * one bound_xl for all connections.
+ * This will lead to duplicating of
+ * the inode lists, if listing is
+ * done for every connection. This
+ * simple check prevents duplication
+ * in the present case. If need arises
+ * the check can be improved.
+ */
+ if (client->bound_xl == prev_bound_xl)
+ continue;
+ prev_bound_xl = client->bound_xl;
+
+ gf_proc_dump_build_key(key, "conn", "%d.bound_xl.%s", count,
+ client->bound_xl->name);
+ inode_table_dump(client->bound_xl->itable, key);
+ }
+ }
+ }
+ UNLOCK(&clienttable->lock);
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/libglusterfs/src/cluster-syncop.c b/libglusterfs/src/cluster-syncop.c
new file mode 100644
index 00000000000..6ee89ddfdcf
--- /dev/null
+++ b/libglusterfs/src/cluster-syncop.c
@@ -0,0 +1,1261 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* Perform fop on all subvolumes represented by list[] array and wait
+ for all callbacks to return */
+
+/* NOTE: Cluster-syncop, like syncop blocks the executing thread until the
+ * responses are gathered if it is not executed as part of synctask. So it
+ * shouldn't be invoked in epoll worker thread */
+#include "glusterfs/cluster-syncop.h"
+#include "glusterfs/defaults.h"
+
+#define FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fop, \
+ args...) \
+ do { \
+ int __i = 0; \
+ int __count = 0; \
+ cluster_local_t __local = { \
+ 0, \
+ }; \
+ void *__old_local = frame->local; \
+ \
+ __local.replies = replies; \
+ memset(output, 0, numsubvols); \
+ cluster_replies_wipe(replies, numsubvols); \
+ for (__i = 0; __i < numsubvols; __i++) \
+ INIT_LIST_HEAD(&replies[__i].entries.list); \
+ if (syncbarrier_init(&__local.barrier)) \
+ break; \
+ frame->local = &__local; \
+ for (__i = 0; __i < numsubvols; __i++) { \
+ if (on[__i]) { \
+ __count++; \
+ } \
+ } \
+ __local.barrier.waitfor = __count; \
+ for (__i = 0; __i < numsubvols; __i++) { \
+ if (!on[__i]) \
+ continue; \
+ STACK_WIND_COOKIE(frame, cluster_##fop##_cbk, (void *)(long)__i, \
+ subvols[__i], subvols[__i]->fops->fop, args); \
+ } \
+ syncbarrier_wait(&__local.barrier, __count); \
+ syncbarrier_destroy(&__local.barrier); \
+ frame->local = __old_local; \
+ STACK_RESET(frame->root); \
+ } while (0)
+
+#define FOP_SEQ(subvols, on, numsubvols, replies, output, frame, fop, args...) \
+ do { \
+ int __i = 0; \
+ \
+ cluster_local_t __local = { \
+ 0, \
+ }; \
+ void *__old_local = frame->local; \
+ __local.replies = replies; \
+ memset(output, 0, numsubvols); \
+ cluster_replies_wipe(replies, numsubvols); \
+ for (__i = 0; __i < numsubvols; __i++) \
+ INIT_LIST_HEAD(&replies[__i].entries.list); \
+ if (syncbarrier_init(&__local.barrier)) \
+ break; \
+ frame->local = &__local; \
+ for (__i = 0; __i < numsubvols; __i++) { \
+ if (!on[__i]) \
+ continue; \
+ STACK_WIND_COOKIE(frame, cluster_##fop##_cbk, (void *)(long)__i, \
+ subvols[__i], subvols[__i]->fops->fop, args); \
+ syncbarrier_wait(&__local.barrier, 1); \
+ } \
+ syncbarrier_destroy(&__local.barrier); \
+ frame->local = __old_local; \
+ STACK_RESET(frame->root); \
+ } while (0)
+
+#define FOP_CBK(fop, frame, cookie, args...) \
+ do { \
+ cluster_local_t *__local = frame->local; \
+ int __i = (long)cookie; \
+ args_##fop##_cbk_store(&__local->replies[__i], args); \
+ __local->replies[__i].valid = 1; \
+ syncbarrier_wake(&__local->barrier); \
+ } while (0)
+
+int32_t
+cluster_fop_success_fill(default_args_cbk_t *replies, int numsubvols,
+ unsigned char *success)
+{
+ int i = 0;
+ int count = 0;
+
+ for (i = 0; i < numsubvols; i++) {
+ if (replies[i].valid && replies[i].op_ret >= 0) {
+ success[i] = 1;
+ count++;
+ } else {
+ success[i] = 0;
+ }
+ }
+
+ return count;
+}
+
+void
+cluster_replies_wipe(default_args_cbk_t *replies, int numsubvols)
+{
+ int i = 0;
+
+ if (!replies)
+ return;
+
+ for (i = 0; i < numsubvols; i++)
+ args_cbk_wipe(&replies[i]);
+ memset(replies, 0, numsubvols * sizeof(*replies));
+}
+
+int32_t
+cluster_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ FOP_CBK(lookup, frame, cookie, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
+}
+
+int32_t
+cluster_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ FOP_CBK(stat, frame, cookie, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+cluster_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ FOP_CBK(truncate, frame, cookie, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+cluster_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ FOP_CBK(ftruncate, frame, cookie, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+cluster_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(access, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ FOP_CBK(readlink, frame, cookie, op_ret, op_errno, path, buf, xdata);
+ return 0;
+}
+
+int32_t
+cluster_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ FOP_CBK(mknod, frame, cookie, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+cluster_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ FOP_CBK(mkdir, frame, cookie, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+cluster_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ FOP_CBK(unlink, frame, cookie, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int32_t
+cluster_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ FOP_CBK(rmdir, frame, cookie, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int32_t
+cluster_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ FOP_CBK(symlink, frame, cookie, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+cluster_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ FOP_CBK(rename, frame, cookie, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
+ return 0;
+}
+
+int32_t
+cluster_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ FOP_CBK(link, frame, cookie, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+cluster_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ FOP_CBK(create, frame, cookie, op_ret, op_errno, fd, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+cluster_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ FOP_CBK(open, frame, cookie, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+cluster_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ FOP_CBK(readv, frame, cookie, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
+ return 0;
+}
+
+int32_t
+cluster_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ FOP_CBK(writev, frame, cookie, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+cluster_put_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ FOP_CBK(put, frame, cookie, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+cluster_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(flush, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ FOP_CBK(fsync, frame, cookie, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ FOP_CBK(fstat, frame, cookie, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+cluster_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ FOP_CBK(opendir, frame, cookie, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(fsyncdir, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ FOP_CBK(statfs, frame, cookie, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+cluster_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(setxattr, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(fsetxattr, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ FOP_CBK(fgetxattr, frame, cookie, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+cluster_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ FOP_CBK(getxattr, frame, cookie, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+cluster_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ FOP_CBK(xattrop, frame, cookie, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ FOP_CBK(fxattrop, frame, cookie, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+cluster_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(removexattr, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(fremovexattr, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ FOP_CBK(lk, frame, cookie, op_ret, op_errno, lock, xdata);
+ return 0;
+}
+
+int32_t
+cluster_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(inodelk, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(finodelk, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(entrylk, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(fentrylk, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_rchecksum_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata)
+{
+ FOP_CBK(rchecksum, frame, cookie, op_ret, op_errno, weak_checksum,
+ strong_checksum, xdata);
+ return 0;
+}
+
+int32_t
+cluster_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ FOP_CBK(readdir, frame, cookie, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+int32_t
+cluster_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ FOP_CBK(readdirp, frame, cookie, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+int32_t
+cluster_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ FOP_CBK(setattr, frame, cookie, op_ret, op_errno, statpre, statpost, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ FOP_CBK(fsetattr, frame, cookie, op_ret, op_errno, statpre, statpost,
+ xdata);
+ return 0;
+}
+
+int32_t
+cluster_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ FOP_CBK(fallocate, frame, cookie, op_ret, op_errno, pre, post, xdata);
+ return 0;
+}
+
+int32_t
+cluster_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ FOP_CBK(discard, frame, cookie, op_ret, op_errno, pre, post, xdata);
+ return 0;
+}
+
+int32_t
+cluster_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ FOP_CBK(zerofill, frame, cookie, op_ret, op_errno, pre, post, xdata);
+ return 0;
+}
+
+int32_t
+cluster_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ FOP_CBK(ipc, frame, cookie, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cluster_fgetxattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fgetxattr, fd,
+ name, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_fsetxattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fsetxattr, fd,
+ dict, flags, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_setxattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, setxattr, loc,
+ dict, flags, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_statfs(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, statfs, loc,
+ xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_fsyncdir(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fsyncdir, fd,
+ flags, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_opendir(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, opendir, loc,
+ fd, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_fstat(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fstat, fd,
+ xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_fsync(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fsync, fd,
+ flags, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_flush(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, flush, fd,
+ xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_writev(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, writev, fd,
+ vector, count, off, flags, iobref, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_put(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, uint32_t flags, struct iovec *vector, int32_t count,
+ off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, put, loc, mode,
+ umask, flags, vector, count, offset, iobref, xattr, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_readv(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, readv, fd, size,
+ offset, flags, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_open(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, open, loc,
+ flags, fd, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_create(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, create, loc,
+ flags, mode, umask, fd, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_link(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, link, oldloc,
+ newloc, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_rename(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, rename, oldloc,
+ newloc, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int
+cluster_symlink(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, symlink,
+ linkpath, loc, umask, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_rmdir(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, rmdir, loc,
+ flags, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_unlink(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, unlink, loc,
+ xflag, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int
+cluster_mkdir(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, mkdir, loc,
+ mode, umask, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int
+cluster_mknod(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, mknod, loc,
+ mode, rdev, umask, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_readlink(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, readlink, loc,
+ size, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_access(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, access, loc,
+ mask, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_ftruncate(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, ftruncate, fd,
+ offset, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_getxattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, getxattr, loc,
+ name, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_xattrop(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, xattrop, loc,
+ flags, dict, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_fxattrop(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fxattrop, fd,
+ flags, dict, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_removexattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, removexattr,
+ loc, name, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_fremovexattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fremovexattr,
+ fd, name, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_lk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, lk, fd, cmd,
+ lock, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_rchecksum(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, rchecksum, fd,
+ offset, len, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_readdir(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, readdir, fd,
+ size, off, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_readdirp(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, readdirp, fd,
+ size, off, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_setattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, setattr, loc,
+ stbuf, valid, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_truncate(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, truncate, loc,
+ offset, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_stat(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, stat, loc,
+ xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_lookup(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, lookup, loc,
+ xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_fsetattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fsetattr, fd,
+ stbuf, valid, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_fallocate(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, fallocate, fd,
+ keep_size, offset, len, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_discard(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, discard, fd,
+ offset, len, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_zerofill(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, zerofill, fd,
+ offset, len, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int32_t
+cluster_ipc(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
+{
+ FOP_ONLIST(subvols, on, numsubvols, replies, output, frame, ipc, op, xdata);
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int
+cluster_uninodelk(xlator_t **subvols, unsigned char *locked_on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, char *dom,
+ inode_t *inode, off_t off, size_t size)
+{
+ loc_t loc = {
+ 0,
+ };
+ struct gf_flock flock = {
+ 0,
+ };
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ flock.l_type = F_UNLCK;
+ flock.l_start = off;
+ flock.l_len = size;
+
+ FOP_ONLIST(subvols, locked_on, numsubvols, replies, output, frame, inodelk,
+ dom, &loc, F_SETLK, &flock, NULL);
+
+ loc_wipe(&loc);
+
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int
+cluster_tryinodelk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *locked_on,
+ call_frame_t *frame, xlator_t *this, char *dom,
+ inode_t *inode, off_t off, size_t size)
+{
+ struct gf_flock flock = {
+ 0,
+ };
+ loc_t loc = {0};
+
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ FOP_ONLIST(subvols, on, numsubvols, replies, locked_on, frame, inodelk, dom,
+ &loc, F_SETLK, &flock, NULL);
+
+ loc_wipe(&loc);
+ return cluster_fop_success_fill(replies, numsubvols, locked_on);
+}
+
+int
+cluster_inodelk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *locked_on,
+ call_frame_t *frame, xlator_t *this, char *dom, inode_t *inode,
+ off_t off, size_t size)
+{
+ struct gf_flock flock = {
+ 0,
+ };
+ int i = 0;
+ loc_t loc = {0};
+ unsigned char *output = NULL;
+
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
+
+ output = alloca(numsubvols);
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ FOP_ONLIST(subvols, on, numsubvols, replies, locked_on, frame, inodelk, dom,
+ &loc, F_SETLK, &flock, NULL);
+
+ for (i = 0; i < numsubvols; i++) {
+ if (replies[i].op_ret == -1 && replies[i].op_errno == EAGAIN) {
+ cluster_fop_success_fill(replies, numsubvols, locked_on);
+ cluster_uninodelk(subvols, locked_on, numsubvols, replies, output,
+ frame, this, dom, inode, off, size);
+
+ FOP_SEQ(subvols, on, numsubvols, replies, locked_on, frame, inodelk,
+ dom, &loc, F_SETLKW, &flock, NULL);
+ break;
+ }
+ }
+
+ loc_wipe(&loc);
+ return cluster_fop_success_fill(replies, numsubvols, locked_on);
+}
+
+int
+cluster_unentrylk(xlator_t **subvols, unsigned char *locked_on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, char *dom,
+ inode_t *inode, const char *name)
+{
+ loc_t loc = {
+ 0,
+ };
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ FOP_ONLIST(subvols, locked_on, numsubvols, replies, output, frame, entrylk,
+ dom, &loc, name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
+
+ loc_wipe(&loc);
+
+ return cluster_fop_success_fill(replies, numsubvols, output);
+}
+
+int
+cluster_tryentrylk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *locked_on,
+ call_frame_t *frame, xlator_t *this, char *dom,
+ inode_t *inode, const char *name)
+{
+ loc_t loc = {0};
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ FOP_ONLIST(subvols, on, numsubvols, replies, locked_on, frame, entrylk, dom,
+ &loc, name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
+
+ loc_wipe(&loc);
+ return cluster_fop_success_fill(replies, numsubvols, locked_on);
+}
+
+int
+cluster_entrylk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *locked_on,
+ call_frame_t *frame, xlator_t *this, char *dom, inode_t *inode,
+ const char *name)
+{
+ int i = 0;
+ loc_t loc = {0};
+ unsigned char *output = NULL;
+
+ output = alloca(numsubvols);
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ FOP_ONLIST(subvols, on, numsubvols, replies, locked_on, frame, entrylk, dom,
+ &loc, name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
+
+ for (i = 0; i < numsubvols; i++) {
+ if (replies[i].op_ret == -1 && replies[i].op_errno == EAGAIN) {
+ cluster_fop_success_fill(replies, numsubvols, locked_on);
+ cluster_unentrylk(subvols, locked_on, numsubvols, replies, output,
+ frame, this, dom, inode, name);
+ FOP_SEQ(subvols, on, numsubvols, replies, locked_on, frame, entrylk,
+ dom, &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ break;
+ }
+ }
+
+ loc_wipe(&loc);
+ return cluster_fop_success_fill(replies, numsubvols, locked_on);
+}
+
+int
+cluster_tiebreaker_inodelk(xlator_t **subvols, unsigned char *on,
+ int numsubvols, default_args_cbk_t *replies,
+ unsigned char *locked_on, call_frame_t *frame,
+ xlator_t *this, char *dom, inode_t *inode, off_t off,
+ size_t size)
+{
+ struct gf_flock flock = {
+ 0,
+ };
+ int i = 0;
+ int num_success = 0;
+ loc_t loc = {0};
+ unsigned char *output = NULL;
+
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
+
+ output = alloca(numsubvols);
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ FOP_ONLIST(subvols, on, numsubvols, replies, locked_on, frame, inodelk, dom,
+ &loc, F_SETLK, &flock, NULL);
+
+ for (i = 0; i < numsubvols; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ num_success++;
+ continue;
+ }
+
+ /* TODO: If earlier subvols fail with an error other
+ * than EAGAIN, we could still have 2 clients competing
+ * for the lock*/
+ if (replies[i].op_ret == -1 && replies[i].op_errno == EAGAIN) {
+ cluster_fop_success_fill(replies, numsubvols, locked_on);
+ cluster_uninodelk(subvols, locked_on, numsubvols, replies, output,
+ frame, this, dom, inode, off, size);
+
+ if (num_success) {
+ FOP_SEQ(subvols, on, numsubvols, replies, locked_on, frame,
+ inodelk, dom, &loc, F_SETLKW, &flock, NULL);
+ } else {
+ loc_wipe(&loc);
+ memset(locked_on, 0, numsubvols);
+ return 0;
+ }
+ break;
+ }
+ }
+
+ loc_wipe(&loc);
+ return cluster_fop_success_fill(replies, numsubvols, locked_on);
+}
+
+int
+cluster_tiebreaker_entrylk(xlator_t **subvols, unsigned char *on,
+ int numsubvols, default_args_cbk_t *replies,
+ unsigned char *locked_on, call_frame_t *frame,
+ xlator_t *this, char *dom, inode_t *inode,
+ const char *name)
+{
+ int i = 0;
+ loc_t loc = {0};
+ unsigned char *output = NULL;
+ int num_success = 0;
+
+ output = alloca(numsubvols);
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ FOP_ONLIST(subvols, on, numsubvols, replies, locked_on, frame, entrylk, dom,
+ &loc, name, ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
+
+ for (i = 0; i < numsubvols; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ num_success++;
+ continue;
+ }
+ if (replies[i].op_ret == -1 && replies[i].op_errno == EAGAIN) {
+ cluster_fop_success_fill(replies, numsubvols, locked_on);
+ cluster_unentrylk(subvols, locked_on, numsubvols, replies, output,
+ frame, this, dom, inode, name);
+ if (num_success) {
+ FOP_SEQ(subvols, on, numsubvols, replies, locked_on, frame,
+ entrylk, dom, &loc, name, ENTRYLK_LOCK, ENTRYLK_WRLCK,
+ NULL);
+ } else {
+ loc_wipe(&loc);
+ memset(locked_on, 0, numsubvols);
+ return 0;
+ }
+ break;
+ }
+ }
+
+ loc_wipe(&loc);
+ return cluster_fop_success_fill(replies, numsubvols, locked_on);
+}
diff --git a/libglusterfs/src/common-utils.c b/libglusterfs/src/common-utils.c
index 82a493669ac..682cbf28055 100644
--- a/libglusterfs/src/common-utils.c
+++ b/libglusterfs/src/common-utils.c
@@ -8,13 +8,10 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#ifdef HAVE_BACKTRACE
#include <execinfo.h>
+#else
+#include "execinfo_compat.h"
#endif
#include <stdio.h>
@@ -27,1921 +24,3016 @@
#include <time.h>
#include <locale.h>
#include <sys/socket.h>
-#include <sys/wait.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <signal.h>
-#include <stdlib.h>
+#include <assert.h>
+#include <libgen.h> /* for dirname() */
+#include <grp.h>
-#if defined GF_BSD_HOST_OS || defined GF_DARWIN_HOST_OS
+#if defined(GF_BSD_HOST_OS) || defined(GF_DARWIN_HOST_OS)
#include <sys/sysctl.h>
#endif
+#ifndef GF_LINUX_HOST_OS
+#include <sys/resource.h>
+#endif
+#ifdef HAVE_SYNCFS_SYS
+#include <sys/syscall.h>
+#endif
-#include "logging.h"
-#include "common-utils.h"
-#include "revision.h"
-#include "glusterfs.h"
-#include "stack.h"
-#include "globals.h"
-#include "lkowner.h"
+#include "glusterfs/compat-errno.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/revision.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/stack.h"
+#include "glusterfs/lkowner.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/globals.h"
+#define XXH_INLINE_ALL
+#include "xxhash.h"
+#include <ifaddrs.h>
+#include "glusterfs/libglusterfs-messages.h"
+#include "glusterfs/glusterfs-acl.h"
+#ifdef __FreeBSD__
+#include <pthread_np.h>
+#undef BIT_SET
+#endif
#ifndef AI_ADDRCONFIG
#define AI_ADDRCONFIG 0
#endif /* AI_ADDRCONFIG */
+char *vol_type_str[] = {
+ "Distribute",
+ "Stripe [NOT SUPPORTED from v6.0]",
+ "Replicate",
+ "Striped-Replicate [NOT SUPPORTED from v6.0]",
+ "Disperse",
+ "Tier [NOT SUPPORTED from v6.0]",
+ "Distributed-Stripe [NOT SUPPORTED from v6.0]",
+ "Distributed-Replicate",
+ "Distributed-Striped-Replicate [NOT SUPPORTED from v6.0]",
+ "Distributed-Disperse",
+};
+
typedef int32_t (*rw_op_t)(int32_t fd, char *buf, int32_t size);
typedef int32_t (*rwv_op_t)(int32_t fd, const struct iovec *buf, int32_t size);
-struct dnscache6 {
- struct addrinfo *first;
- struct addrinfo *next;
-};
+char *xattrs_to_heal[] = {"user.",
+ POSIX_ACL_ACCESS_XATTR,
+ POSIX_ACL_DEFAULT_XATTR,
+ QUOTA_LIMIT_KEY,
+ QUOTA_LIMIT_OBJECTS_KEY,
+ GF_SELINUX_XATTR_KEY,
+ GF_XATTR_MDATA_KEY,
+ NULL};
+void
+gf_xxh64_wrapper(const unsigned char *data, size_t const len,
+ unsigned long long const seed, char *xxh64)
+{
+ unsigned short i = 0;
+ const unsigned short lim = GF_XXH64_DIGEST_LENGTH * 2 + 1;
+ XXH64_hash_t hash = 0;
+ XXH64_canonical_t c_hash = {
+ {
+ 0,
+ },
+ };
+ const uint8_t *p = (const uint8_t *)&c_hash;
+
+ hash = XXH64(data, len, seed);
+ XXH64_canonicalFromHash(&c_hash, hash);
+
+ for (i = 0; i < GF_XXH64_DIGEST_LENGTH; i++)
+ snprintf(xxh64 + i * 2, lim - i * 2, "%02x", p[i]);
+}
+
+/**
+ * This function takes following arguments
+ * @this: xlator
+ * @gfid: The gfid which has to be filled
+ * @hash: the 8 byte hash which has to be filled inside the gfid
+ * @index: the array element of the uuid_t structure (which is
+ * a array of unsigned char) from where the 8 bytes of
+ * the hash has to be filled. Since uuid_t contains 16
+ * char elements in the array, each byte of the hash has
+ * to be filled in one array element.
+ *
+ * This function is called twice for 2 hashes (of 8 byte each) to
+ * be filled in the gfid.
+ *
+ * The for loop in this function actually is doing these 2 things
+ * for each hash
+ *
+ * 1) One of the hashes
+ * tmp[0] = (hash_2 >> 56) & 0xff;
+ * tmp[1] = (hash_2 >> 48) & 0xff;
+ * tmp[2] = (hash_2 >> 40) & 0xff;
+ * tmp[3] = (hash_2 >> 32) & 0xff;
+ * tmp[4] = (hash_2 >> 24) & 0xff;
+ * tmp[5] = (hash_2 >> 16) & 0xff;
+ * tmp[6] = (hash_2 >> 8) & 0xff;
+ * tmp[7] = (hash_2) & 0xff;
+ *
+ * 2) The other hash:
+ * tmp[8] = (hash_1 >> 56) & 0xff;
+ * tmp[9] = (hash_1 >> 48) & 0xff;
+ * tmp[10] = (hash_1 >> 40) & 0xff;
+ * tmp[11] = (hash_1 >> 32) & 0xff;
+ * tmp[12] = (hash_1 >> 24) & 0xff;
+ * tmp[13] = (hash_1 >> 16) & 0xff;
+ * tmp[14] = (hash_1 >> 8) & 0xff;
+ * tmp[15] = (hash_1) & 0xff;
+ **/
+static int
+gf_gfid_from_xxh64(xlator_t *this, uuid_t gfid, XXH64_hash_t hash,
+ unsigned short index)
+{
+ int ret = -1;
+ int i = -1;
+
+ if ((index != 0) && (index != 8)) {
+ gf_msg_callingfn("gfid-from-xxh64", GF_LOG_WARNING, 0,
+ LG_MSG_INDEX_NOT_FOUND,
+ "index can only be either 0 or 8, as this"
+ "function's purpose is to encode a 8 byte "
+ "hash inside the gfid (index: %d)",
+ index);
+ goto out;
+ }
+
+ for (i = 0; i < sizeof(hash); i++) {
+ /*
+ * As of now the below statement is equivalent of this.
+ * gfid[index+i] = (hash >> (64 - (8 * (i+1)))) & 0xff;
+ */
+ gfid[index + i] = (hash >> ((sizeof(hash) * 8) - (8 * (i + 1)))) &
+ (0xff);
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/**
+ * This function does the same thing as gf_xxh64_wrapper. But gf_xxh64_wrapper
+ * does not return anything and in this xlator there is a need for both the
+ * actual hash and the canonicalized form of the hash.
+ *
+ * To summarize:
+ * - XXH64_hash_t is needed as return because, those bytes which contain the
+ * hash can be used for different purposes as needed. One example is
+ * to have those bytes copied into the uuid_t structure to be used as gfid
+ * - xxh64 string is needed because, it can be used as the key for generating
+ * the next hash (and any other purpose which might require canonical form
+ * of the hash).
+ **/
+XXH64_hash_t
+gf_xxh64_hash_wrapper(const unsigned char *data, size_t const len,
+ unsigned long long const seed, char *xxh64)
+{
+ unsigned short i = 0;
+ const unsigned short lim = GF_XXH64_DIGEST_LENGTH * 2 + 1;
+ XXH64_hash_t hash = 0;
+ XXH64_canonical_t c_hash = {
+ {
+ 0,
+ },
+ };
+ const uint8_t *p = (const uint8_t *)&c_hash;
+
+ hash = XXH64(data, len, seed);
+ XXH64_canonicalFromHash(&c_hash, hash);
+
+ for (i = 0; i < GF_XXH64_DIGEST_LENGTH; i++)
+ snprintf(xxh64 + i * 2, lim - i * 2, "%02x", p[i]);
+
+ return hash;
+}
+
+/**
+ * This is the algorithm followed for generating new gfid
+ * 1) generate xxh64 hash using snapname and original gfid of the object
+ * 2) Using the canonicalized form of above hash as the key, generate
+ * another hash
+ * 3) Combine both of the 8 byte hashes to generate a 16 byte uuid_t type
+ * 4) Use the above uuid as the gfid
+ *
+ * Each byte of the hash is stored separately in different elements of the
+ * character array represented by uuid_t
+ * Ex: tmp[0] = (hash_2 >> 56) & 0xFF
+ * This saves the most significant byte of hash_2 in tmp[0]
+ * tmp[1] = (hash_2 >> 48) & 0xFF
+ * This saves next most significant byte of hash_2 in tmp[1]
+ * .
+ * .
+ * So on.
+ * tmp[0] - tmp[7] holds the contents of hash_2
+ * tmp[8] - tmp[15] hold the conents of hash_1
+ *
+ * The hash generated (i.e. of type XXH64_hash_t) is 8 bytes long. And for
+ * gfid 16 byte uuid is needed. Hecne the 2 hashes are combined to form
+ * one 16 byte entity.
+ **/
+int
+gf_gfid_generate_from_xxh64(uuid_t gfid, char *key)
+{
+ char xxh64_1[GF_XXH64_DIGEST_LENGTH * 2 + 1] = {
+ 0,
+ };
+ char xxh64_2[GF_XXH64_DIGEST_LENGTH * 2 + 1] = {
+ 0,
+ };
+ XXH64_hash_t hash_1 = 0;
+ XXH64_hash_t hash_2 = 0;
+ int ret = -1;
+ xlator_t *this = THIS;
+
+ hash_1 = gf_xxh64_hash_wrapper((unsigned char *)key, strlen(key),
+ GF_XXHSUM64_DEFAULT_SEED, xxh64_1);
+
+ hash_2 = gf_xxh64_hash_wrapper((unsigned char *)xxh64_1, strlen(xxh64_1),
+ GF_XXHSUM64_DEFAULT_SEED, xxh64_2);
+
+ /* hash_2 is saved in 1st 8 elements of uuid_t char array */
+ if (gf_gfid_from_xxh64(this, gfid, hash_2, 0)) {
+ gf_msg_callingfn(this->name, GF_LOG_WARNING, 0,
+ LG_MSG_XXH64_TO_GFID_FAILED,
+ "failed to encode the hash %llx into the 1st"
+ "half of gfid",
+ hash_2);
+ goto out;
+ }
+
+ /* hash_1 is saved in the remaining 8 elements of uuid_t */
+ if (gf_gfid_from_xxh64(this, gfid, hash_1, 8)) {
+ gf_msg_callingfn(this->name, GF_LOG_WARNING, 0,
+ LG_MSG_XXH64_TO_GFID_FAILED,
+ "failed to encode the hash %llx into the 2nd"
+ "half of gfid",
+ hash_1);
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0,
+ "gfid generated is %s (hash1: %llx) "
+ "hash2: %llx, xxh64_1: %s xxh64_2: %s",
+ uuid_utoa(gfid), hash_1, hash_2, xxh64_1, xxh64_2);
+
+ ret = 0;
+
+out:
+ return ret;
+}
/* works similar to mkdir(1) -p.
*/
int
-mkdir_p (char *path, mode_t mode, gf_boolean_t allow_symlinks)
-{
- int i = 0;
- int ret = -1;
- char dir[PATH_MAX] = {0,};
- struct stat stbuf = {0,};
-
- strcpy (dir, path);
- i = (dir[0] == '/')? 1: 0;
- do {
- if (path[i] != '/' && path[i] != '\0')
- continue;
-
- dir[i] = '\0';
- ret = mkdir (dir, mode);
- if (ret && errno != EEXIST) {
- gf_log ("", GF_LOG_ERROR, "Failed due to reason %s",
- strerror (errno));
- goto out;
- }
-
- if (ret && errno == EEXIST && !allow_symlinks) {
- ret = lstat (dir, &stbuf);
- if (ret)
- goto out;
-
- if (S_ISLNK (stbuf.st_mode)) {
- ret = -1;
- gf_log ("", GF_LOG_ERROR, "%s is a symlink",
- dir);
- goto out;
- }
- }
- dir[i] = '/';
-
- } while (path[i++] != '\0');
-
- ret = stat (dir, &stbuf);
- if (ret || !S_ISDIR (stbuf.st_mode)) {
+mkdir_p(char *path, mode_t mode, gf_boolean_t allow_symlinks)
+{
+ int i = 0;
+ int ret = -1;
+ char dir[PATH_MAX] = {
+ 0,
+ };
+ struct stat stbuf = {
+ 0,
+ };
+
+ const int path_len = min(strlen(path), PATH_MAX - 1);
+
+ snprintf(dir, path_len + 1, "%s", path);
+
+ i = (dir[0] == '/') ? 1 : 0;
+ do {
+ if (path[i] != '/' && path[i] != '\0')
+ continue;
+
+ dir[i] = '\0';
+ ret = sys_mkdir(dir, mode);
+ if (ret && errno != EEXIST) {
+ gf_smsg("", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED, NULL);
+ goto out;
+ }
+
+ if (ret && errno == EEXIST && !allow_symlinks) {
+ ret = sys_lstat(dir, &stbuf);
+ if (ret)
+ goto out;
+
+ if (S_ISLNK(stbuf.st_mode)) {
ret = -1;
- gf_log ("", GF_LOG_ERROR, "Failed to create directory, "
- "possibly some of the components were not directories");
+ gf_smsg("", GF_LOG_ERROR, 0, LG_MSG_DIR_IS_SYMLINK, "dir=%s",
+ dir, NULL);
goto out;
+ }
}
+ dir[i] = '/';
+
+ } while (path[i++] != '\0');
+
+ ret = sys_stat(dir, &stbuf);
+ if (ret || !S_ISDIR(stbuf.st_mode)) {
+ if (ret == 0)
+ errno = 0;
+ ret = -1;
+ gf_smsg("", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED,
+ "possibly some of the components"
+ " were not directories",
+ NULL);
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+
+int
+gf_lstat_dir(const char *path, struct stat *stbuf_in)
+{
+ int ret = -1;
+ struct stat stbuf = {
+ 0,
+ };
+
+ if (path == NULL) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = sys_lstat(path, &stbuf);
+ if (ret)
+ goto out;
+
+ if (!S_ISDIR(stbuf.st_mode)) {
+ errno = ENOTDIR;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
- ret = 0;
out:
+ if (!ret && stbuf_in)
+ *stbuf_in = stbuf;
- return ret;
+ return ret;
}
int
-log_base2 (unsigned long x)
+log_base2(unsigned long x)
{
- int val = 0;
+ int val = 0;
- while (x > 1) {
- x /= 2;
- val++;
- }
+ while (x > 1) {
+ x /= 2;
+ val++;
+ }
- return val;
+ return val;
}
+/**
+ * gf_rev_dns_lookup -- Perform a reverse DNS lookup on the IP address.
+ *
+ * @ip: The IP address to perform a reverse lookup on
+ *
+ * @return: success: Allocated string containing the hostname
+ * failure: NULL
+ */
+char *
+gf_rev_dns_lookup(const char *ip)
+{
+ char *fqdn = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("resolver", ip, out);
+
+ /* Get the FQDN */
+ ret = gf_get_hostname_from_ip((char *)ip, &fqdn);
+ if (ret != 0) {
+ gf_smsg("resolver", GF_LOG_INFO, errno, LG_MSG_RESOLVE_HOSTNAME_FAILED,
+ "hostname=%s", ip, NULL);
+ }
+out:
+ return fqdn;
+}
+
+/**
+ * gf_resolve_path_parent -- Given a path, returns an allocated string
+ * containing the parent's path.
+ * @path: Path to parse
+ * @return: The parent path if found, NULL otherwise
+ */
+char *
+gf_resolve_path_parent(const char *path)
+{
+ char *parent = NULL;
+ char *tmp = NULL;
+ char *pathc = NULL;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, path, out);
+
+ if (0 == strlen(path)) {
+ gf_msg_callingfn(THIS->name, GF_LOG_DEBUG, 0, LG_MSG_INVALID_STRING,
+ "invalid string for 'path'");
+ goto out;
+ }
+
+ /* dup the parameter, we don't want to modify it */
+ pathc = strdupa(path);
+ if (!pathc) {
+ goto out;
+ }
+
+ /* Get the parent directory */
+ tmp = dirname(pathc);
+ if (strcmp(tmp, "/") == 0)
+ goto out;
+
+ parent = gf_strdup(tmp);
+out:
+ return parent;
+}
int32_t
-gf_resolve_ip6 (const char *hostname,
- uint16_t port,
- int family,
- void **dnscache,
- struct addrinfo **addr_info)
-{
- int32_t ret = 0;
- struct addrinfo hints;
- struct dnscache6 *cache = NULL;
- char service[NI_MAXSERV], host[NI_MAXHOST];
-
- if (!hostname) {
- gf_log_callingfn ("resolver", GF_LOG_WARNING, "hostname is NULL");
- return -1;
+gf_resolve_ip6(const char *hostname, uint16_t port, int family, void **dnscache,
+ struct addrinfo **addr_info)
+{
+ int32_t ret = 0;
+ struct addrinfo hints;
+ struct dnscache6 *cache = NULL;
+ char service[NI_MAXSERV], host[NI_MAXHOST];
+
+ if (!hostname) {
+ gf_msg_callingfn("resolver", GF_LOG_WARNING, 0, LG_MSG_HOSTNAME_NULL,
+ "hostname is NULL");
+ return -1;
+ }
+
+ if (!*dnscache) {
+ *dnscache = GF_CALLOC(1, sizeof(struct dnscache6),
+ gf_common_mt_dnscache6);
+ if (!*dnscache)
+ return -1;
+ }
+
+ cache = *dnscache;
+ if (cache->first && !cache->next) {
+ freeaddrinfo(cache->first);
+ cache->first = cache->next = NULL;
+ gf_msg_trace("resolver", 0, "flushing DNS cache");
+ }
+
+ if (!cache->first) {
+ char *port_str = NULL;
+ gf_msg_trace("resolver", 0,
+ "DNS cache not present, freshly "
+ "probing hostname: %s",
+ hostname);
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = family;
+ hints.ai_socktype = SOCK_STREAM;
+
+ ret = gf_asprintf(&port_str, "%d", port);
+ if (-1 == ret) {
+ return -1;
}
-
- if (!*dnscache) {
- *dnscache = GF_CALLOC (1, sizeof (struct dnscache6),
- gf_common_mt_dnscache6);
- if (!*dnscache)
- return -1;
+ if ((ret = getaddrinfo(hostname, port_str, &hints, &cache->first)) !=
+ 0) {
+ gf_smsg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETADDRINFO_FAILED,
+ "family=%d", family, "ret=%s", gai_strerror(ret), NULL);
+
+ GF_FREE(*dnscache);
+ *dnscache = NULL;
+ GF_FREE(port_str);
+ return -1;
}
-
- cache = *dnscache;
- if (cache->first && !cache->next) {
- freeaddrinfo(cache->first);
- cache->first = cache->next = NULL;
- gf_log ("resolver", GF_LOG_TRACE,
- "flushing DNS cache");
+ GF_FREE(port_str);
+
+ cache->next = cache->first;
+ }
+
+ if (cache->next) {
+ ret = getnameinfo((struct sockaddr *)cache->next->ai_addr,
+ cache->next->ai_addrlen, host, sizeof(host), service,
+ sizeof(service), NI_NUMERICHOST);
+ if (ret != 0) {
+ gf_smsg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED,
+ "ret=%s", gai_strerror(ret), NULL);
+ goto err;
}
- if (!cache->first) {
- char *port_str = NULL;
- gf_log ("resolver", GF_LOG_TRACE,
- "DNS cache not present, freshly probing hostname: %s",
- hostname);
-
- memset(&hints, 0, sizeof(hints));
- hints.ai_family = family;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = AI_ADDRCONFIG;
-
- ret = gf_asprintf (&port_str, "%d", port);
- if (-1 == ret) {
- gf_log ("resolver", GF_LOG_ERROR, "asprintf failed");
- return -1;
- }
- if ((ret = getaddrinfo(hostname, port_str, &hints, &cache->first)) != 0) {
- gf_log ("resolver", GF_LOG_ERROR,
- "getaddrinfo failed (%s)", gai_strerror (ret));
-
- GF_FREE (*dnscache);
- *dnscache = NULL;
- GF_FREE (port_str);
- return -1;
- }
- GF_FREE (port_str);
-
- cache->next = cache->first;
+ gf_msg_debug("resolver", 0,
+ "returning ip-%s (port-%s) for "
+ "hostname: %s and port: %d",
+ host, service, hostname, port);
+
+ *addr_info = cache->next;
+ }
+
+ if (cache->next)
+ cache->next = cache->next->ai_next;
+ if (cache->next) {
+ ret = getnameinfo((struct sockaddr *)cache->next->ai_addr,
+ cache->next->ai_addrlen, host, sizeof(host), service,
+ sizeof(service), NI_NUMERICHOST);
+ if (ret != 0) {
+ gf_smsg("resolver", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED,
+ "ret=%s", gai_strerror(ret), NULL);
+ goto err;
}
- if (cache->next) {
- ret = getnameinfo((struct sockaddr *)cache->next->ai_addr,
- cache->next->ai_addrlen,
- host, sizeof (host),
- service, sizeof (service),
- NI_NUMERICHOST);
- if (ret != 0) {
- gf_log ("resolver", GF_LOG_ERROR,
- "getnameinfo failed (%s)", gai_strerror (ret));
- goto err;
- }
-
- gf_log ("resolver", GF_LOG_DEBUG,
- "returning ip-%s (port-%s) for hostname: %s and port: %d",
- host, service, hostname, port);
-
- *addr_info = cache->next;
+ gf_msg_debug("resolver", 0,
+ "next DNS query will return: "
+ "ip-%s port-%s",
+ host, service);
+ }
+
+ return 0;
+
+err:
+ freeaddrinfo(cache->first);
+ cache->first = cache->next = NULL;
+ GF_FREE(cache);
+ *dnscache = NULL;
+ return -1;
+}
+
+/**
+ * gf_dnscache_init -- Initializes a dnscache struct and sets the ttl
+ * to the specified value in the parameter.
+ *
+ * @ttl: the TTL in seconds
+ * @return: SUCCESS: Pointer to an allocated dnscache struct
+ * FAILURE: NULL
+ */
+struct dnscache *
+gf_dnscache_init(time_t ttl)
+{
+ struct dnscache *cache = GF_MALLOC(sizeof(*cache), gf_common_mt_dnscache);
+ if (!cache)
+ return NULL;
+
+ cache->cache_dict = dict_new();
+ if (!cache->cache_dict) {
+ GF_FREE(cache);
+ cache = NULL;
+ } else {
+ cache->ttl = ttl;
+ }
+
+ return cache;
+}
+
+/**
+ * gf_dnscache_deinit -- cleanup resources used by struct dnscache
+ */
+void
+gf_dnscache_deinit(struct dnscache *cache)
+{
+ if (!cache) {
+ gf_msg_plain(GF_LOG_WARNING, "dnscache is NULL");
+ return;
+ }
+ dict_unref(cache->cache_dict);
+ GF_FREE(cache);
+}
+
+/**
+ * gf_dnscache_entry_init -- Initialize a dnscache entry
+ *
+ * @return: SUCCESS: Pointer to an allocated dnscache entry struct
+ * FAILURE: NULL
+ */
+struct dnscache_entry *
+gf_dnscache_entry_init()
+{
+ struct dnscache_entry *entry = GF_CALLOC(1, sizeof(*entry),
+ gf_common_mt_dnscache_entry);
+ return entry;
+}
+
+/**
+ * gf_dnscache_entry_deinit -- Free memory used by a dnscache entry
+ *
+ * @entry: Pointer to deallocate
+ */
+void
+gf_dnscache_entry_deinit(struct dnscache_entry *entry)
+{
+ GF_FREE(entry->ip);
+ GF_FREE(entry->fqdn);
+ GF_FREE(entry);
+}
+
+/**
+ * gf_rev_dns_lookup -- Perform a reverse DNS lookup on the IP address.
+ *
+ * @ip: The IP address to perform a reverse lookup on
+ *
+ * @return: success: Allocated string containing the hostname
+ * failure: NULL
+ */
+char *
+gf_rev_dns_lookup_cached(const char *ip, struct dnscache *dnscache)
+{
+ char *fqdn = NULL;
+ int ret = 0;
+ dict_t *cache = NULL;
+ data_t *entrydata = NULL;
+ struct dnscache_entry *dnsentry = NULL;
+ gf_boolean_t from_cache = _gf_false;
+
+ if (!dnscache)
+ goto out;
+
+ cache = dnscache->cache_dict;
+
+ /* Quick cache lookup to see if we already hold it */
+ entrydata = dict_get(cache, (char *)ip);
+ if (entrydata) {
+ dnsentry = (struct dnscache_entry *)entrydata->data;
+ /* First check the TTL & timestamp */
+ if (gf_time() - dnsentry->timestamp > dnscache->ttl) {
+ gf_dnscache_entry_deinit(dnsentry);
+ entrydata->data = NULL; /* Mark this as 'null' so
+ * dict_del () doesn't try free
+ * this after we've already
+ * freed it.
+ */
+
+ dict_del(cache, (char *)ip); /* Remove this entry */
+ } else {
+ /* Cache entry is valid, get the FQDN and return */
+ fqdn = dnsentry->fqdn;
+ from_cache = _gf_true; /* Mark this as from cache */
+ goto out;
}
+ }
- if (cache->next)
- cache->next = cache->next->ai_next;
- if (cache->next) {
- ret = getnameinfo((struct sockaddr *)cache->next->ai_addr,
- cache->next->ai_addrlen,
- host, sizeof (host),
- service, sizeof (service),
- NI_NUMERICHOST);
- if (ret != 0) {
- gf_log ("resolver", GF_LOG_ERROR,
- "getnameinfo failed (%s)", gai_strerror (ret));
- goto err;
- }
-
- gf_log ("resolver", GF_LOG_DEBUG,
- "next DNS query will return: ip-%s port-%s", host, service);
+ /* Get the FQDN */
+ ret = gf_get_hostname_from_ip((char *)ip, &fqdn);
+ if (ret != 0)
+ goto out;
+
+ if (!fqdn) {
+ gf_log_callingfn("resolver", GF_LOG_CRITICAL,
+ "Allocation failed for the host address");
+ goto out;
+ }
+
+ from_cache = _gf_false;
+out:
+ /* Insert into the cache */
+ if (fqdn && !from_cache && ip) {
+ struct dnscache_entry *entry = gf_dnscache_entry_init();
+
+ if (entry) {
+ entry->fqdn = fqdn;
+ entry->ip = gf_strdup(ip);
+ entry->timestamp = gf_time();
+ entrydata = bin_to_data(entry, sizeof(*entry));
+ dict_set(cache, (char *)ip, entrydata);
}
+ }
+ return fqdn;
+}
- return 0;
+struct xldump {
+ int lineno;
+};
-err:
- freeaddrinfo (cache->first);
- cache->first = cache->next = NULL;
- GF_FREE (cache);
- *dnscache = NULL;
- return -1;
+/* to catch any format discrepencies that may arise in code */
+static int
+nprintf(struct xldump *dump, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+static int
+nprintf(struct xldump *dump, const char *fmt, ...)
+{
+ va_list ap;
+ char *msg = NULL;
+ char header[32];
+ int ret = 0;
+
+ ret = snprintf(header, 32, "%3d:", ++dump->lineno);
+ if (ret < 0)
+ goto out;
+
+ va_start(ap, fmt);
+ ret = vasprintf(&msg, fmt, ap);
+ va_end(ap);
+ if (-1 == ret)
+ goto out;
+
+ /* NOTE: No ret value from gf_msg_plain, so unable to compute printed
+ * characters. The return value from nprintf is not used, so for now
+ * living with it */
+ gf_msg_plain(GF_LOG_WARNING, "%s %s", header, msg);
+
+out:
+ FREE(msg);
+ return 0;
+}
+
+static int
+xldump_options(dict_t *this, char *key, data_t *value, void *d)
+{
+ nprintf(d, " option %s %s", key, value->data);
+ return 0;
+}
+
+static void
+xldump_subvolumes(xlator_t *this, void *d)
+{
+ xlator_list_t *subv = NULL;
+ int len = 0;
+ char *subvstr = NULL;
+
+ if (!this->children)
+ return;
+
+ for (subv = this->children; subv; subv = subv->next)
+ len += (strlen(subv->xlator->name) + 1);
+
+ subvstr = GF_MALLOC(len, gf_common_mt_strdup);
+
+ len = 0;
+ for (subv = this->children; subv; subv = subv->next)
+ len += sprintf(subvstr + len, "%s%s", subv->xlator->name,
+ subv->next ? " " : "");
+
+ nprintf(d, " subvolumes %s", subvstr);
+
+ GF_FREE(subvstr);
}
+static void
+xldump(xlator_t *each, void *d)
+{
+ nprintf(d, "volume %s", each->name);
+ nprintf(d, " type %s", each->type);
+ dict_foreach(each->options, xldump_options, d);
+
+ xldump_subvolumes(each, d);
+
+ nprintf(d, "end-volume");
+ nprintf(d, " ");
+}
void
-gf_log_volume_file (FILE *specfp)
+gf_log_dump_graph(FILE *specfp, glusterfs_graph_t *graph)
{
- extern FILE *gf_log_logfile;
- int lcount = 0;
- char data[GF_UNIT_KB];
+ struct xldump xld = {
+ 0,
+ };
- fseek (specfp, 0L, SEEK_SET);
+ gf_msg_plain(GF_LOG_WARNING, "Final graph:");
+ gf_msg_plain(GF_LOG_WARNING,
+ "+---------------------------------------"
+ "---------------------------------------+");
+
+ xlator_foreach_depth_first(graph->top, xldump, &xld);
- fprintf (gf_log_logfile, "Given volfile:\n");
- fprintf (gf_log_logfile,
+ gf_msg_plain(GF_LOG_WARNING,
"+---------------------------------------"
- "---------------------------------------+\n");
- while (fgets (data, GF_UNIT_KB, specfp) != NULL){
- lcount++;
- fprintf (gf_log_logfile, "%3d: %s", lcount, data);
- }
- fprintf (gf_log_logfile,
- "\n+---------------------------------------"
- "---------------------------------------+\n");
- fflush (gf_log_logfile);
- fseek (specfp, 0L, SEEK_SET);
+ "---------------------------------------+");
}
static void
-gf_dump_config_flags (int fd)
+gf_dump_config_flags()
{
- int ret = 0;
-
- ret = write (fd, "configuration details:\n", 23);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "configuration details:");
/* have argp */
#ifdef HAVE_ARGP
- ret = write (fd, "argp 1\n", 7);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "argp 1");
#endif
/* ifdef if found backtrace */
#ifdef HAVE_BACKTRACE
- ret = write (fd, "backtrace 1\n", 12);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "backtrace 1");
#endif
/* Berkeley-DB version has cursor->get() */
#ifdef HAVE_BDB_CURSOR_GET
- ret = write (fd, "bdb->cursor->get 1\n", 19);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "bdb->cursor->get 1");
#endif
/* Define to 1 if you have the <db.h> header file. */
#ifdef HAVE_DB_H
- ret = write (fd, "db.h 1\n", 7);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "db.h 1");
#endif
/* Define to 1 if you have the <dlfcn.h> header file. */
#ifdef HAVE_DLFCN_H
- ret = write (fd, "dlfcn 1\n", 8);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "dlfcn 1");
#endif
/* define if fdatasync exists */
#ifdef HAVE_FDATASYNC
- ret = write (fd, "fdatasync 1\n", 12);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "fdatasync 1");
#endif
/* Define to 1 if you have the `pthread' library (-lpthread). */
#ifdef HAVE_LIBPTHREAD
- ret = write (fd, "libpthread 1\n", 13);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "libpthread 1");
#endif
/* define if llistxattr exists */
#ifdef HAVE_LLISTXATTR
- ret = write (fd, "llistxattr 1\n", 13);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "llistxattr 1");
#endif
/* define if found setfsuid setfsgid */
#ifdef HAVE_SET_FSID
- ret = write (fd, "setfsid 1\n", 10);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "setfsid 1");
#endif
/* define if found spinlock */
#ifdef HAVE_SPINLOCK
- ret = write (fd, "spinlock 1\n", 11);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "spinlock 1");
#endif
/* Define to 1 if you have the <sys/epoll.h> header file. */
#ifdef HAVE_SYS_EPOLL_H
- ret = write (fd, "epoll.h 1\n", 10);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "epoll.h 1");
#endif
/* Define to 1 if you have the <sys/extattr.h> header file. */
#ifdef HAVE_SYS_EXTATTR_H
- ret = write (fd, "extattr.h 1\n", 12);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "extattr.h 1");
#endif
/* Define to 1 if you have the <sys/xattr.h> header file. */
#ifdef HAVE_SYS_XATTR_H
- ret = write (fd, "xattr.h 1\n", 10);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "xattr.h 1");
#endif
/* define if found st_atim.tv_nsec */
#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
- ret = write (fd, "st_atim.tv_nsec 1\n", 18);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "st_atim.tv_nsec 1");
#endif
/* define if found st_atimespec.tv_nsec */
#ifdef HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC
- ret = write (fd, "st_atimespec.tv_nsec 1\n",23);
- if (ret == -1)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, "st_atimespec.tv_nsec 1");
#endif
/* Define to the full name and version of this package. */
#ifdef PACKAGE_STRING
- {
- char msg[128];
- sprintf (msg, "package-string: %s\n", PACKAGE_STRING);
- ret = write (fd, msg, strlen (msg));
- if (ret == -1)
- goto out;
+ {
+ char *msg = NULL;
+ int ret = -1;
+
+ ret = gf_asprintf(&msg, "package-string: %s", PACKAGE_STRING);
+ if (ret >= 0) {
+ gf_msg_plain_nomem(GF_LOG_ALERT, msg);
+ GF_FREE(msg);
}
+ }
#endif
-out:
- return;
+ return;
}
-/* Obtain a backtrace and print it to stdout. */
-/* TODO: It looks like backtrace_symbols allocates memory,
- it may be problem because mostly memory allocation/free causes 'sigsegv' */
+/* Obtain a backtrace and print it to the log */
void
-gf_print_trace (int32_t signum)
+gf_print_trace(int32_t signum, glusterfs_ctx_t *ctx)
{
- extern FILE *gf_log_logfile;
- char msg[1024] = {0,};
- char timestr[64] = {0,};
- int ret = 0;
- int fd = 0;
-
- fd = fileno (gf_log_logfile);
-
- /* Pending frames, (if any), list them in order */
- ret = write (fd, "pending frames:\n", 16);
- if (ret < 0)
- goto out;
-
+ char msg[1024] = {
+ 0,
+ };
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ call_stack_t *stack = NULL;
+
+ /* Now every gf_log call will just write to a buffer and when the
+ * buffer becomes full, its written to the log-file. Suppose the process
+ * crashes and prints the backtrace in the log-file, then the previous
+ * log information will still be in the buffer itself. So flush the
+ * contents of the buffer to the log file before printing the backtrace
+ * which helps in debugging.
+ */
+ gf_log_flush();
+
+ gf_log_disable_suppression_before_exit(ctx);
+
+ /* Pending frames, (if any), list them in order */
+ gf_msg_plain_nomem(GF_LOG_ALERT, "pending frames:");
+ {
+ /* FIXME: traversing stacks outside pool->lock */
+ list_for_each_entry(stack, &ctx->pool->all_frames, all_frames)
{
- glusterfs_ctx_t *ctx = glusterfs_ctx_get ();
- struct list_head *trav = ((call_pool_t *)ctx->pool)->all_frames.next;
- while (trav != (&((call_pool_t *)ctx->pool)->all_frames)) {
- call_frame_t *tmp = (call_frame_t *)(&((call_stack_t *)trav)->frames);
- if (tmp->root->type == GF_OP_TYPE_FOP)
- sprintf (msg,"frame : type(%d) op(%s)\n",
- tmp->root->type,
- gf_fop_list[tmp->root->op]);
- if (tmp->root->type == GF_OP_TYPE_MGMT)
- sprintf (msg,"frame : type(%d) op(%s)\n",
- tmp->root->type,
- gf_mgmt_list[tmp->root->op]);
-
- ret = write (fd, msg, strlen (msg));
- if (ret < 0)
- goto out;
-
- trav = trav->next;
- }
- ret = write (fd, "\n", 1);
- if (ret < 0)
- goto out;
- }
+ if (stack->type == GF_OP_TYPE_FOP)
+ sprintf(msg, "frame : type(%d) op(%s)", stack->type,
+ gf_fop_list[stack->op]);
+ else
+ sprintf(msg, "frame : type(%d) op(%d)", stack->type, stack->op);
- sprintf (msg, "patchset: %s\n", GLUSTERFS_REPOSITORY_REVISION);
- ret = write (fd, msg, strlen (msg));
- if (ret < 0)
- goto out;
-
- sprintf (msg, "signal received: %d\n", signum);
- ret = write (fd, msg, strlen (msg));
- if (ret < 0)
- goto out;
-
- {
- /* Dump the timestamp of the crash too, so the previous logs
- can be related */
- gf_time_fmt (timestr, sizeof timestr, time (NULL), gf_timefmt_FT);
- ret = write (fd, "time of crash: ", 15);
- if (ret < 0)
- goto out;
- ret = write (fd, timestr, strlen (timestr));
- if (ret < 0)
- goto out;
- }
-
- gf_dump_config_flags (fd);
-#if HAVE_BACKTRACE
- /* Print 'backtrace' */
- {
- void *array[200];
- size_t size;
-
- size = backtrace (array, 200);
- backtrace_symbols_fd (&array[1], size-1, fd);
- sprintf (msg, "---------\n");
- ret = write (fd, msg, strlen (msg));
- if (ret < 0)
- goto out;
+ gf_msg_plain_nomem(GF_LOG_ALERT, msg);
}
-#endif /* HAVE_BACKTRACE */
-
-out:
- /* Send a signal to terminate the process */
- signal (signum, SIG_DFL);
- raise (signum);
+ }
+
+ sprintf(msg, "patchset: %s", GLUSTERFS_REPOSITORY_REVISION);
+ gf_msg_plain_nomem(GF_LOG_ALERT, msg);
+
+ sprintf(msg, "signal received: %d", signum);
+ gf_msg_plain_nomem(GF_LOG_ALERT, msg);
+ {
+ /* Dump the timestamp of the crash too, so the previous logs
+ can be related */
+ gf_time_fmt(timestr, sizeof timestr, gf_time(), gf_timefmt_FT);
+ gf_msg_plain_nomem(GF_LOG_ALERT, "time of crash: ");
+ gf_msg_plain_nomem(GF_LOG_ALERT, timestr);
+ }
+
+ gf_dump_config_flags();
+ gf_msg_backtrace_nomem(GF_LOG_ALERT, 200);
+ sprintf(msg, "---------");
+ gf_msg_plain_nomem(GF_LOG_ALERT, msg);
+
+ /* Send a signal to terminate the process */
+ signal(signum, SIG_DFL);
+ raise(signum);
}
void
-trap (void)
+trap(void)
{
-
}
char *
-gf_trim (char *string)
+gf_trim(char *string)
{
- register char *s, *t;
-
- if (string == NULL) {
- return NULL;
- }
+ register char *s, *t;
- for (s = string; isspace (*s); s++)
- ;
-
- if (*s == 0)
- return s;
+ if (string == NULL) {
+ return NULL;
+ }
- t = s + strlen (s) - 1;
- while (t > s && isspace (*t))
- t--;
- *++t = '\0';
+ for (s = string; isspace(*s); s++)
+ ;
+ if (*s == 0)
return s;
-}
-
-int
-gf_strsplit (const char *str, const char *delim,
- char ***tokens, int *token_count)
-{
- char *_running = NULL;
- char *running = NULL;
- char *token = NULL;
- char **token_list = NULL;
- int count = 0;
- int i = 0;
- int j = 0;
- if (str == NULL || delim == NULL || tokens == NULL || token_count == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- return -1;
- }
-
- _running = gf_strdup (str);
- if (_running == NULL)
- return -1;
-
- running = _running;
-
- while ((token = strsep (&running, delim)) != NULL) {
- if (token[0] != '\0')
- count++;
- }
- GF_FREE (_running);
-
- _running = gf_strdup (str);
- if (_running == NULL)
- return -1;
-
- running = _running;
-
- if ((token_list = GF_CALLOC (count, sizeof (char *),
- gf_common_mt_char)) == NULL) {
- GF_FREE (_running);
- return -1;
- }
+ t = s + strlen(s) - 1;
+ while (t > s && isspace(*t))
+ t--;
+ *++t = '\0';
- while ((token = strsep (&running, delim)) != NULL) {
- if (token[0] == '\0')
- continue;
-
- token_list[i] = gf_strdup (token);
- if (token_list[i] == NULL)
- goto free_exit;
- i++;
- }
-
- GF_FREE (_running);
-
- *tokens = token_list;
- *token_count = count;
- return 0;
-
-free_exit:
- GF_FREE (_running);
- for (j = 0; j < i; j++)
- GF_FREE (token_list[j]);
-
- GF_FREE (token_list);
- return -1;
+ return s;
}
int
-gf_strstr (const char *str, const char *delim, const char *match)
+gf_strstr(const char *str, const char *delim, const char *match)
{
- char *tmp = NULL;
- char *save_ptr = NULL;
- char *tmp_str = NULL;
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char *tmp_str = NULL;
- int ret = 0;
+ int ret = 0;
- tmp_str = strdup (str);
+ tmp_str = strdup(str);
- if (str == NULL || delim == NULL || match == NULL || tmp_str == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- ret = -1;
- goto out;
- }
+ if (str == NULL || delim == NULL || match == NULL || tmp_str == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ ret = -1;
+ goto out;
+ }
+ tmp = strtok_r(tmp_str, delim, &save_ptr);
- tmp = strtok_r (tmp_str, delim, &save_ptr);
+ while (tmp) {
+ ret = strcmp(tmp, match);
- while (tmp) {
- ret = strcmp (tmp, match);
+ if (ret == 0)
+ break;
- if (ret == 0)
- break;
-
- tmp = strtok_r (NULL, delim, &save_ptr);
- }
+ tmp = strtok_r(NULL, delim, &save_ptr);
+ }
out:
- if (tmp_str)
- free (tmp_str);
-
- return ret;
+ free(tmp_str);
+ return ret;
}
int
-gf_volume_name_validate (const char *volume_name)
+gf_volume_name_validate(const char *volume_name)
{
- const char *vname = NULL;
+ const char *vname = NULL;
- if (volume_name == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- return -1;
- }
+ if (volume_name == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ return -1;
+ }
- if (!isalpha (volume_name[0]))
- return 1;
+ if (!isalpha(volume_name[0]))
+ return 1;
- for (vname = &volume_name[1]; *vname != '\0'; vname++) {
- if (!(isalnum (*vname) || *vname == '_'))
- return 1;
- }
+ for (vname = &volume_name[1]; *vname != '\0'; vname++) {
+ if (!(isalnum(*vname) || *vname == '_'))
+ return 1;
+ }
- return 0;
+ return 0;
}
-
int
-gf_string2time (const char *str, uint32_t *n)
+gf_string2time(const char *str, uint32_t *n)
{
- unsigned long value = 0;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
-
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
-
- for (s = str; *s != '\0'; s++) {
- if (isspace (*s))
- continue;
- if (*s == '-')
- return -1;
- break;
- }
-
- old_errno = errno;
- errno = 0;
- value = strtol (str, &tail, 0);
-
- if (errno == ERANGE || errno == EINVAL)
- return -1;
-
- if (errno == 0)
- errno = old_errno;
+ unsigned long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace(*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtol(str, &tail, 0);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
- if (!((tail[0] == '\0') ||
- ((tail[0] == 's') && (tail[1] == '\0')) ||
- ((tail[0] == 's') && (tail[1] == 'e') &&
- (tail[2] == 'c') && (tail[3] == '\0'))))
- return -1;
+ if (errno == 0)
+ errno = old_errno;
+
+ if (((tail[0] == '\0') || ((tail[0] == 's') && (tail[1] == '\0')) ||
+ ((tail[0] == 's') && (tail[1] == 'e') && (tail[2] == 'c') &&
+ (tail[3] == '\0'))))
+ goto out;
+
+ else if (((tail[0] == 'm') && (tail[1] == '\0')) ||
+ ((tail[0] == 'm') && (tail[1] == 'i') && (tail[2] == 'n') &&
+ (tail[3] == '\0'))) {
+ value = value * GF_MINUTE_IN_SECONDS;
+ goto out;
+ }
+
+ else if (((tail[0] == 'h') && (tail[1] == '\0')) ||
+ ((tail[0] == 'h') && (tail[1] == 'r') && (tail[2] == '\0'))) {
+ value = value * GF_HOUR_IN_SECONDS;
+ goto out;
+ }
+
+ else if (((tail[0] == 'd') && (tail[1] == '\0')) ||
+ ((tail[0] == 'd') && (tail[1] == 'a') && (tail[2] == 'y') &&
+ (tail[3] == 's') && (tail[4] == '\0'))) {
+ value = value * GF_DAY_IN_SECONDS;
+ goto out;
+ }
+
+ else if (((tail[0] == 'w') && (tail[1] == '\0')) ||
+ ((tail[0] == 'w') && (tail[1] == 'k') && (tail[2] == '\0'))) {
+ value = value * GF_WEEK_IN_SECONDS;
+ goto out;
+ } else {
+ return -1;
+ }
- *n = value;
+out:
+ *n = value;
- return 0;
+ return 0;
}
-
int
-gf_string2percent (const char *str, uint32_t *n)
+gf_string2percent(const char *str, double *n)
{
- unsigned long value = 0;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
-
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
-
- for (s = str; *s != '\0'; s++) {
- if (isspace (*s))
- continue;
- if (*s == '-')
- return -1;
- break;
- }
-
- old_errno = errno;
- errno = 0;
- value = strtol (str, &tail, 0);
-
- if (errno == ERANGE || errno == EINVAL)
- return -1;
+ double value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace(*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtod(str, &tail);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
- if (errno == 0)
- errno = old_errno;
+ if (errno == 0)
+ errno = old_errno;
- if (!((tail[0] == '\0') ||
- ((tail[0] == '%') && (tail[1] == '\0'))))
- return -1;
+ if (!((tail[0] == '\0') || ((tail[0] == '%') && (tail[1] == '\0'))))
+ return -1;
- *n = value;
+ *n = value;
- return 0;
+ return 0;
}
-
static int
-_gf_string2long (const char *str, long *n, int base)
+_gf_string2long(const char *str, long *n, int base)
{
- long value = 0;
- char *tail = NULL;
- int old_errno = 0;
-
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
+ long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
- old_errno = errno;
- errno = 0;
- value = strtol (str, &tail, base);
+ old_errno = errno;
+ errno = 0;
+ value = strtol(str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
- if (errno == ERANGE || errno == EINVAL)
- return -1;
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
- if (errno == 0)
- errno = old_errno;
+ if (errno == 0)
+ errno = old_errno;
- if (tail[0] != '\0')
- return -1;
+ if (tail[0] != '\0')
+ return -1;
- *n = value;
+ *n = value;
- return 0;
+ return 0;
}
static int
-_gf_string2ulong (const char *str, unsigned long *n, int base)
+_gf_string2ulong(const char *str, unsigned long *n, int base)
{
- unsigned long value = 0;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
-
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
-
- for (s = str; *s != '\0'; s++) {
- if (isspace (*s))
- continue;
- if (*s == '-')
- return -1;
- break;
- }
-
- old_errno = errno;
- errno = 0;
- value = strtoul (str, &tail, base);
-
- if (errno == ERANGE || errno == EINVAL)
- return -1;
+ unsigned long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace(*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtoul(str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
- if (errno == 0)
- errno = old_errno;
+ if (errno == 0)
+ errno = old_errno;
- if (tail[0] != '\0')
- return -1;
+ if (tail[0] != '\0')
+ return -1;
- *n = value;
+ *n = value;
- return 0;
+ return 0;
}
static int
-_gf_string2uint (const char *str, unsigned int *n, int base)
+_gf_string2uint(const char *str, unsigned int *n, int base)
{
- unsigned long value = 0;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
-
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
-
- for (s = str; *s != '\0'; s++) {
- if (isspace (*s))
- continue;
- if (*s == '-')
- return -1;
- break;
- }
-
- old_errno = errno;
- errno = 0;
- value = strtoul (str, &tail, base);
-
- if (errno == ERANGE || errno == EINVAL)
- return -1;
+ unsigned long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace(*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtoul(str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
- if (errno == 0)
- errno = old_errno;
+ if (errno == 0)
+ errno = old_errno;
- if (tail[0] != '\0')
- return -1;
+ if (tail[0] != '\0')
+ return -1;
- *n = (unsigned int)value;
+ *n = (unsigned int)value;
- return 0;
+ return 0;
}
static int
-_gf_string2double (const char *str, double *n)
+_gf_string2double(const char *str, double *n)
{
- double value = 0.0;
- char *tail = NULL;
- int old_errno = 0;
-
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
+ double value = 0.0;
+ char *tail = NULL;
+ int old_errno = 0;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
- old_errno = errno;
- errno = 0;
- value = strtod (str, &tail);
+ old_errno = errno;
+ errno = 0;
+ value = strtod(str, &tail);
+ if (str == tail)
+ errno = EINVAL;
- if (errno == ERANGE || errno == EINVAL)
- return -1;
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
- if (errno == 0)
- errno = old_errno;
+ if (errno == 0)
+ errno = old_errno;
- if (tail[0] != '\0')
- return -1;
+ if (tail[0] != '\0')
+ return -1;
- *n = value;
+ *n = value;
- return 0;
+ return 0;
}
static int
-_gf_string2longlong (const char *str, long long *n, int base)
+_gf_string2longlong(const char *str, long long *n, int base)
{
- long long value = 0;
- char *tail = NULL;
- int old_errno = 0;
+ long long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
+ old_errno = errno;
+ errno = 0;
+ value = strtoll(str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
- old_errno = errno;
- errno = 0;
- value = strtoll (str, &tail, base);
-
- if (errno == ERANGE || errno == EINVAL)
- return -1;
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
- if (errno == 0)
- errno = old_errno;
+ if (errno == 0)
+ errno = old_errno;
- if (tail[0] != '\0')
- return -1;
+ if (tail[0] != '\0')
+ return -1;
- *n = value;
+ *n = value;
- return 0;
+ return 0;
}
static int
-_gf_string2ulonglong (const char *str, unsigned long long *n, int base)
+_gf_string2ulonglong(const char *str, unsigned long long *n, int base)
{
- unsigned long long value = 0;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
-
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
-
- for (s = str; *s != '\0'; s++) {
- if (isspace (*s))
- continue;
- if (*s == '-')
- return -1;
- break;
- }
-
- old_errno = errno;
- errno = 0;
- value = strtoull (str, &tail, base);
-
- if (errno == ERANGE || errno == EINVAL)
- return -1;
+ unsigned long long value = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace(*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtoull(str, &tail, base);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
- if (errno == 0)
- errno = old_errno;
+ if (errno == 0)
+ errno = old_errno;
- if (tail[0] != '\0')
- return -1;
+ if (tail[0] != '\0')
+ return -1;
- *n = value;
+ *n = value;
- return 0;
+ return 0;
}
int
-gf_string2long (const char *str, long *n)
+gf_string2long(const char *str, long *n)
{
- return _gf_string2long (str, n, 0);
+ return _gf_string2long(str, n, 0);
}
int
-gf_string2ulong (const char *str, unsigned long *n)
+gf_string2ulong(const char *str, unsigned long *n)
{
- return _gf_string2ulong (str, n, 0);
+ return _gf_string2ulong(str, n, 0);
}
int
-gf_string2int (const char *str, int *n)
+gf_string2int(const char *str, int *n)
{
- long l = 0;
- int ret = 0;
+ long l = 0;
+ int ret = 0;
- ret = _gf_string2long (str, &l, 0);
+ ret = _gf_string2long(str, &l, 0);
- *n = l;
- return ret;
+ *n = l;
+ return ret;
}
int
-gf_string2uint (const char *str, unsigned int *n)
+gf_string2uint(const char *str, unsigned int *n)
{
- return _gf_string2uint (str, n, 0);
+ return _gf_string2uint(str, n, 0);
}
int
-gf_string2double (const char *str, double *n)
+gf_string2double(const char *str, double *n)
{
- return _gf_string2double (str, n);
+ return _gf_string2double(str, n);
}
int
-gf_string2longlong (const char *str, long long *n)
+gf_string2longlong(const char *str, long long *n)
{
- return _gf_string2longlong (str, n, 0);
+ return _gf_string2longlong(str, n, 0);
}
int
-gf_string2ulonglong (const char *str, unsigned long long *n)
+gf_string2ulonglong(const char *str, unsigned long long *n)
{
- return _gf_string2ulonglong (str, n, 0);
+ return _gf_string2ulonglong(str, n, 0);
}
int
-gf_string2int8 (const char *str, int8_t *n)
+gf_string2int8(const char *str, int8_t *n)
{
- long l = 0L;
- int rv = 0;
+ long l = 0L;
+ int rv = 0;
- rv = _gf_string2long (str, &l, 0);
- if (rv != 0)
- return rv;
+ rv = _gf_string2long(str, &l, 0);
+ if (rv != 0)
+ return rv;
- if (l >= INT8_MIN && l <= INT8_MAX) {
- *n = (int8_t) l;
- return 0;
- }
+ if ((l >= INT8_MIN) && (l <= INT8_MAX)) {
+ *n = (int8_t)l;
+ return 0;
+ }
- errno = ERANGE;
- return -1;
+ errno = ERANGE;
+ return -1;
}
int
-gf_string2int16 (const char *str, int16_t *n)
+gf_string2int16(const char *str, int16_t *n)
{
- long l = 0L;
- int rv = 0;
+ long l = 0L;
+ int rv = 0;
- rv = _gf_string2long (str, &l, 0);
- if (rv != 0)
- return rv;
+ rv = _gf_string2long(str, &l, 0);
+ if (rv != 0)
+ return rv;
- if (l >= INT16_MIN && l <= INT16_MAX) {
- *n = (int16_t) l;
- return 0;
- }
+ if ((l >= INT16_MIN) && (l <= INT16_MAX)) {
+ *n = (int16_t)l;
+ return 0;
+ }
- errno = ERANGE;
- return -1;
+ errno = ERANGE;
+ return -1;
}
int
-gf_string2int32 (const char *str, int32_t *n)
+gf_string2int32(const char *str, int32_t *n)
{
- long l = 0L;
- int rv = 0;
+ long l = 0L;
+ int rv = 0;
- rv = _gf_string2long (str, &l, 0);
- if (rv != 0)
- return rv;
+ rv = _gf_string2long(str, &l, 0);
+ if (rv != 0)
+ return rv;
- if (l >= INT32_MIN && l <= INT32_MAX) {
- *n = (int32_t) l;
- return 0;
- }
+ if ((l >= INT32_MIN) && (l <= INT32_MAX)) {
+ *n = (int32_t)l;
+ return 0;
+ }
- errno = ERANGE;
- return -1;
+ errno = ERANGE;
+ return -1;
}
int
-gf_string2int64 (const char *str, int64_t *n)
+gf_string2int64(const char *str, int64_t *n)
{
- long long l = 0LL;
- int rv = 0;
+ long long l = 0LL;
+ int rv = 0;
- rv = _gf_string2longlong (str, &l, 0);
- if (rv != 0)
- return rv;
+ rv = _gf_string2longlong(str, &l, 0);
+ if (rv != 0)
+ return rv;
- if (l >= INT64_MIN && l <= INT64_MAX) {
- *n = (int64_t) l;
- return 0;
- }
-
- errno = ERANGE;
- return -1;
+ *n = (int64_t)l;
+ return 0;
}
int
-gf_string2uint8 (const char *str, uint8_t *n)
+gf_string2uint8(const char *str, uint8_t *n)
{
- unsigned long l = 0L;
- int rv = 0;
+ unsigned long l = 0L;
+ int rv = 0;
- rv = _gf_string2ulong (str, &l, 0);
- if (rv != 0)
- return rv;
+ rv = _gf_string2ulong(str, &l, 0);
+ if (rv != 0)
+ return rv;
- if (l >= 0 && l <= UINT8_MAX) {
- *n = (uint8_t) l;
- return 0;
- }
+ if (l <= UINT8_MAX) {
+ *n = (uint8_t)l;
+ return 0;
+ }
- errno = ERANGE;
- return -1;
+ errno = ERANGE;
+ return -1;
}
int
-gf_string2uint16 (const char *str, uint16_t *n)
+gf_string2uint16(const char *str, uint16_t *n)
{
- unsigned long l = 0L;
- int rv = 0;
+ unsigned long l = 0L;
+ int rv = 0;
- rv = _gf_string2ulong (str, &l, 0);
- if (rv != 0)
- return rv;
+ rv = _gf_string2ulong(str, &l, 0);
+ if (rv != 0)
+ return rv;
- if (l >= 0 && l <= UINT16_MAX) {
- *n = (uint16_t) l;
- return 0;
- }
+ if (l <= UINT16_MAX) {
+ *n = (uint16_t)l;
+ return 0;
+ }
- errno = ERANGE;
- return -1;
+ errno = ERANGE;
+ return -1;
}
int
-gf_string2uint32 (const char *str, uint32_t *n)
+gf_string2uint32(const char *str, uint32_t *n)
{
- unsigned long l = 0L;
- int rv = 0;
+ unsigned long l = 0L;
+ int rv = 0;
- rv = _gf_string2ulong (str, &l, 0);
- if (rv != 0)
- return rv;
+ rv = _gf_string2ulong(str, &l, 0);
+ if (rv != 0)
+ return rv;
- if (l >= 0 && l <= UINT32_MAX) {
- *n = (uint32_t) l;
- return 0;
- }
+ if (l <= UINT32_MAX) {
+ *n = (uint32_t)l;
+ return 0;
+ }
- errno = ERANGE;
- return -1;
+ errno = ERANGE;
+ return -1;
}
int
-gf_string2uint64 (const char *str, uint64_t *n)
+gf_string2uint64(const char *str, uint64_t *n)
{
- unsigned long long l = 0ULL;
- int rv = 0;
+ unsigned long long l = 0ULL;
+ int rv = 0;
- rv = _gf_string2ulonglong (str, &l, 0);
- if (rv != 0)
- return rv;
+ rv = _gf_string2ulonglong(str, &l, 0);
+ if (rv != 0)
+ return rv;
- if (l >= 0 && l <= UINT64_MAX) {
- *n = (uint64_t) l;
- return 0;
- }
+ if (l <= UINT64_MAX) {
+ *n = (uint64_t)l;
+ return 0;
+ }
- errno = ERANGE;
- return -1;
+ errno = ERANGE;
+ return -1;
}
int
-gf_string2ulong_base10 (const char *str, unsigned long *n)
+gf_string2ulong_base10(const char *str, unsigned long *n)
{
- return _gf_string2ulong (str, n, 10);
+ return _gf_string2ulong(str, n, 10);
}
int
-gf_string2uint_base10 (const char *str, unsigned int *n)
+gf_string2uint_base10(const char *str, unsigned int *n)
{
- return _gf_string2uint (str, n, 10);
+ return _gf_string2uint(str, n, 10);
}
int
-gf_string2uint8_base10 (const char *str, uint8_t *n)
+gf_string2uint8_base10(const char *str, uint8_t *n)
{
- unsigned long l = 0L;
- int rv = 0;
+ unsigned long l = 0L;
+ int rv = 0;
- rv = _gf_string2ulong (str, &l, 10);
- if (rv != 0)
- return rv;
+ rv = _gf_string2ulong(str, &l, 10);
+ if (rv != 0)
+ return rv;
- if (l >= 0 && l <= UINT8_MAX) {
- *n = (uint8_t) l;
- return 0;
- }
+ if (l <= UINT8_MAX) {
+ *n = (uint8_t)l;
+ return 0;
+ }
- errno = ERANGE;
- return -1;
+ errno = ERANGE;
+ return -1;
}
int
-gf_string2uint16_base10 (const char *str, uint16_t *n)
+gf_string2uint16_base10(const char *str, uint16_t *n)
{
- unsigned long l = 0L;
- int rv = 0;
+ unsigned long l = 0L;
+ int rv = 0;
- rv = _gf_string2ulong (str, &l, 10);
- if (rv != 0)
- return rv;
+ rv = _gf_string2ulong(str, &l, 10);
+ if (rv != 0)
+ return rv;
- if (l >= 0 && l <= UINT16_MAX) {
- *n = (uint16_t) l;
- return 0;
- }
+ if (l <= UINT16_MAX) {
+ *n = (uint16_t)l;
+ return 0;
+ }
- errno = ERANGE;
- return -1;
+ errno = ERANGE;
+ return -1;
}
int
-gf_string2uint32_base10 (const char *str, uint32_t *n)
+gf_string2uint32_base10(const char *str, uint32_t *n)
{
- unsigned long l = 0L;
- int rv = 0;
+ unsigned long l = 0L;
+ int rv = 0;
- rv = _gf_string2ulong (str, &l, 10);
- if (rv != 0)
- return rv;
+ rv = _gf_string2ulong(str, &l, 10);
+ if (rv != 0)
+ return rv;
- if (l >= 0 && l <= UINT32_MAX) {
- *n = (uint32_t) l;
- return 0;
- }
+ if (l <= UINT32_MAX) {
+ *n = (uint32_t)l;
+ return 0;
+ }
- errno = ERANGE;
- return -1;
+ errno = ERANGE;
+ return -1;
}
int
-gf_string2uint64_base10 (const char *str, uint64_t *n)
+gf_string2uint64_base10(const char *str, uint64_t *n)
{
- unsigned long long l = 0ULL;
- int rv = 0;
+ unsigned long long l = 0ULL;
+ int rv = 0;
- rv = _gf_string2ulonglong (str, &l, 10);
- if (rv != 0)
- return rv;
+ rv = _gf_string2ulonglong(str, &l, 10);
+ if (rv != 0)
+ return rv;
- if (l >= 0 && l <= UINT64_MAX) {
- *n = (uint64_t) l;
- return 0;
- }
+ if (l <= UINT64_MAX) {
+ *n = (uint64_t)l;
+ return 0;
+ }
- errno = ERANGE;
- return -1;
+ errno = ERANGE;
+ return -1;
}
char *
-gf_uint64_2human_readable (uint64_t n)
-{
- int ret = 0;
- char *str = NULL;
-
- if (n >= GF_UNIT_PB) {
- ret = gf_asprintf (&str, "%.1lfPB", ((double) n)/GF_UNIT_PB);
- if (ret < 0)
- goto err;
- } else if (n >= GF_UNIT_TB) {
- ret = gf_asprintf (&str, "%.1lfTB", ((double) n)/GF_UNIT_TB);
- if (ret < 0)
- goto err;
- } else if (n >= GF_UNIT_GB) {
- ret = gf_asprintf (&str, "%.1lfGB", ((double) n)/GF_UNIT_GB);
- if (ret < 0)
- goto err;
- } else if (n >= GF_UNIT_MB) {
- ret = gf_asprintf (&str, "%.1lfMB", ((double) n)/GF_UNIT_MB);
- if (ret < 0)
- goto err;
- } else if (n >= GF_UNIT_KB) {
- ret = gf_asprintf (&str, "%.1lfKB", ((double) n)/GF_UNIT_KB);
- if (ret < 0)
- goto err;
- } else {
- ret = gf_asprintf (&str, "%luBytes", n);
- if (ret < 0)
- goto err;
- }
- return str;
+gf_uint64_2human_readable(uint64_t n)
+{
+ int ret = 0;
+ char *str = NULL;
+
+ if (n >= GF_UNIT_PB) {
+ ret = gf_asprintf(&str, "%.1lfPB", ((double)n) / GF_UNIT_PB);
+ if (ret < 0)
+ goto err;
+ } else if (n >= GF_UNIT_TB) {
+ ret = gf_asprintf(&str, "%.1lfTB", ((double)n) / GF_UNIT_TB);
+ if (ret < 0)
+ goto err;
+ } else if (n >= GF_UNIT_GB) {
+ ret = gf_asprintf(&str, "%.1lfGB", ((double)n) / GF_UNIT_GB);
+ if (ret < 0)
+ goto err;
+ } else if (n >= GF_UNIT_MB) {
+ ret = gf_asprintf(&str, "%.1lfMB", ((double)n) / GF_UNIT_MB);
+ if (ret < 0)
+ goto err;
+ } else if (n >= GF_UNIT_KB) {
+ ret = gf_asprintf(&str, "%.1lfKB", ((double)n) / GF_UNIT_KB);
+ if (ret < 0)
+ goto err;
+ } else {
+ ret = gf_asprintf(&str, "%" PRIu64 "Bytes", n);
+ if (ret < 0)
+ goto err;
+ }
+ return str;
err:
- return NULL;
+ return NULL;
}
int
-gf_string2bytesize (const char *str, uint64_t *n)
+gf_string2bytesize_range(const char *str, uint64_t *n, uint64_t umax)
{
- uint64_t value = 0ULL;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
+ double value = 0.0;
+ int64_t int_value = 0;
+ uint64_t unit = 0;
+ int64_t max = 0;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+ gf_boolean_t fraction = _gf_false;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- errno = EINVAL;
- return -1;
- }
+ max = umax & 0x7fffffffffffffffLL;
- for (s = str; *s != '\0'; s++) {
- if (isspace (*s))
- continue;
- if (*s == '-')
- return -1;
- break;
- }
+ for (s = str; *s != '\0'; s++) {
+ if (isspace(*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
- old_errno = errno;
- errno = 0;
- value = strtoull (str, &tail, 10);
+ if (strrchr(str, '.'))
+ fraction = _gf_true;
- if (errno == ERANGE || errno == EINVAL)
- return -1;
+ old_errno = errno;
+ errno = 0;
+ if (fraction)
+ value = strtod(str, &tail);
+ else
+ int_value = strtoll(str, &tail, 10);
- if (errno == 0)
- errno = old_errno;
+ if (str == tail)
+ errno = EINVAL;
- if (tail[0] != '\0')
- {
- if (strcasecmp (tail, GF_UNIT_KB_STRING) == 0)
- value *= GF_UNIT_KB;
- else if (strcasecmp (tail, GF_UNIT_MB_STRING) == 0)
- value *= GF_UNIT_MB;
- else if (strcasecmp (tail, GF_UNIT_GB_STRING) == 0)
- value *= GF_UNIT_GB;
- else if (strcasecmp (tail, GF_UNIT_TB_STRING) == 0)
- value *= GF_UNIT_TB;
- else if (strcasecmp (tail, GF_UNIT_PB_STRING) == 0)
- value *= GF_UNIT_PB;
- else
- return -1;
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
+
+ if (errno == 0)
+ errno = old_errno;
+
+ if (tail[0] != '\0') {
+ if (strcasecmp(tail, GF_UNIT_KB_STRING) == 0)
+ unit = GF_UNIT_KB;
+ else if (strcasecmp(tail, GF_UNIT_MB_STRING) == 0)
+ unit = GF_UNIT_MB;
+ else if (strcasecmp(tail, GF_UNIT_GB_STRING) == 0)
+ unit = GF_UNIT_GB;
+ else if (strcasecmp(tail, GF_UNIT_TB_STRING) == 0)
+ unit = GF_UNIT_TB;
+ else if (strcasecmp(tail, GF_UNIT_PB_STRING) == 0)
+ unit = GF_UNIT_PB;
+ else if (strcasecmp(tail, GF_UNIT_B_STRING) != 0)
+ return -1;
+
+ if (unit > 0) {
+ if (fraction)
+ value *= unit;
+ else
+ int_value *= unit;
}
+ }
- *n = value;
+ if (fraction) {
+ if ((max - value) < 0) {
+ errno = ERANGE;
+ return -1;
+ }
+ *n = (uint64_t)value;
+ } else {
+ if ((max - int_value) < 0) {
+ errno = ERANGE;
+ return -1;
+ }
+ *n = int_value;
+ }
- return 0;
+ return 0;
}
int
-gf_string2percent_or_bytesize (const char *str,
- uint64_t *n,
- gf_boolean_t *is_percent)
+gf_string2bytesize_uint64(const char *str, uint64_t *n)
{
- uint64_t value = 0ULL;
- char *tail = NULL;
- int old_errno = 0;
- const char *s = NULL;
+ return gf_string2bytesize_range(str, n, UINT64_MAX);
+}
- if (str == NULL || n == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING,
- "argument invalid");
- errno = EINVAL;
- return -1;
- }
+int
+gf_string2bytesize_int64(const char *str, int64_t *n)
+{
+ uint64_t u64 = 0;
+ int ret = 0;
- for (s = str; *s != '\0'; s++) {
- if (isspace (*s))
- continue;
- if (*s == '-')
- return -1;
- break;
- }
+ ret = gf_string2bytesize_range(str, &u64, INT64_MAX);
+ *n = (int64_t)u64;
+ return ret;
+}
- old_errno = errno;
- errno = 0;
- value = strtoull (str, &tail, 10);
-
- if (errno == ERANGE || errno == EINVAL)
- return -1;
-
- if (errno == 0)
- errno = old_errno;
-
- if (tail[0] != '\0') {
- if (strcasecmp (tail, GF_UNIT_KB_STRING) == 0)
- value *= GF_UNIT_KB;
- else if (strcasecmp (tail, GF_UNIT_MB_STRING) == 0)
- value *= GF_UNIT_MB;
- else if (strcasecmp (tail, GF_UNIT_GB_STRING) == 0)
- value *= GF_UNIT_GB;
- else if (strcasecmp (tail, GF_UNIT_TB_STRING) == 0)
- value *= GF_UNIT_TB;
- else if (strcasecmp (tail, GF_UNIT_PB_STRING) == 0)
- value *= GF_UNIT_PB;
- else if (strcasecmp (tail, GF_UNIT_PERCENT_STRING) == 0)
- *is_percent = _gf_true;
- else
- return -1;
- }
+int
+gf_string2percent_or_bytesize(const char *str, double *n,
+ gf_boolean_t *is_percent)
+{
+ double value = 0ULL;
+ char *tail = NULL;
+ int old_errno = 0;
+ const char *s = NULL;
+
+ if (str == NULL || n == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ errno = EINVAL;
+ return -1;
+ }
+
+ for (s = str; *s != '\0'; s++) {
+ if (isspace(*s))
+ continue;
+ if (*s == '-')
+ return -1;
+ break;
+ }
+
+ old_errno = errno;
+ errno = 0;
+ value = strtod(str, &tail);
+ if (str == tail)
+ errno = EINVAL;
+
+ if (errno == ERANGE || errno == EINVAL)
+ return -1;
- *n = value;
+ if (errno == 0)
+ errno = old_errno;
+
+ /*Maximum accepted value for 64 bit OS will be (2^14 -1)PB*/
+ if (tail[0] != '\0') {
+ if (strcasecmp(tail, GF_UNIT_KB_STRING) == 0)
+ value *= GF_UNIT_KB;
+ else if (strcasecmp(tail, GF_UNIT_MB_STRING) == 0)
+ value *= GF_UNIT_MB;
+ else if (strcasecmp(tail, GF_UNIT_GB_STRING) == 0)
+ value *= GF_UNIT_GB;
+ else if (strcasecmp(tail, GF_UNIT_TB_STRING) == 0)
+ value *= GF_UNIT_TB;
+ else if (strcasecmp(tail, GF_UNIT_PB_STRING) == 0)
+ value *= GF_UNIT_PB;
+ else if (strcasecmp(tail, GF_UNIT_PERCENT_STRING) == 0)
+ *is_percent = _gf_true;
+ else
+ return -1;
+ }
- return 0;
+ /* Error out if we cannot store the value in uint64 */
+ if ((UINT64_MAX - value) < 0) {
+ errno = ERANGE;
+ return -1;
+ }
+
+ *n = value;
+
+ return 0;
}
int64_t
-gf_str_to_long_long (const char *number)
+gf_str_to_long_long(const char *number)
{
- int64_t unit = 1;
- int64_t ret = 0;
- char *endptr = NULL ;
- if (!number)
- return 0;
+ int64_t unit = 1;
+ int64_t ret = 0;
+ char *endptr = NULL;
+ if (!number)
+ return 0;
+
+ ret = strtoll(number, &endptr, 0);
- ret = strtoll (number, &endptr, 0);
-
- if (endptr) {
- switch (*endptr) {
- case 'G':
- case 'g':
- if ((* (endptr + 1) == 'B') ||(* (endptr + 1) == 'b'))
- unit = 1024 * 1024 * 1024;
- break;
- case 'M':
- case 'm':
- if ((* (endptr + 1) == 'B') ||(* (endptr + 1) == 'b'))
- unit = 1024 * 1024;
- break;
- case 'K':
- case 'k':
- if ((* (endptr + 1) == 'B') ||(* (endptr + 1) == 'b'))
- unit = 1024;
- break;
- case '%':
- unit = 1;
- break;
- default:
- unit = 1;
- break;
- }
+ if (endptr) {
+ switch (*endptr) {
+ case 'G':
+ case 'g':
+ if ((*(endptr + 1) == 'B') || (*(endptr + 1) == 'b'))
+ unit = 1024 * 1024 * 1024;
+ break;
+ case 'M':
+ case 'm':
+ if ((*(endptr + 1) == 'B') || (*(endptr + 1) == 'b'))
+ unit = 1024 * 1024;
+ break;
+ case 'K':
+ case 'k':
+ if ((*(endptr + 1) == 'B') || (*(endptr + 1) == 'b'))
+ unit = 1024;
+ break;
+ case '%':
+ unit = 1;
+ break;
+ default:
+ unit = 1;
+ break;
}
- return ret * unit;
+ }
+ return ret * unit;
}
int
-gf_string2boolean (const char *str, gf_boolean_t *b)
+gf_string2boolean(const char *str, gf_boolean_t *b)
{
- if (str == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- return -1;
- }
+ if (str == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ return -1;
+ }
+
+ if ((strcasecmp(str, "1") == 0) || (strcasecmp(str, "on") == 0) ||
+ (strcasecmp(str, "yes") == 0) || (strcasecmp(str, "true") == 0) ||
+ (strcasecmp(str, "enable") == 0)) {
+ *b = _gf_true;
+ return 0;
+ }
+
+ if ((strcasecmp(str, "0") == 0) || (strcasecmp(str, "off") == 0) ||
+ (strcasecmp(str, "no") == 0) || (strcasecmp(str, "false") == 0) ||
+ (strcasecmp(str, "disable") == 0)) {
+ *b = _gf_false;
+ return 0;
+ }
+
+ return -1;
+}
+
+int
+gf_strn2boolean(const char *str, const int len, gf_boolean_t *b)
+{
+ if (str == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ return -1;
+ }
- if ((strcasecmp (str, "1") == 0) ||
- (strcasecmp (str, "on") == 0) ||
- (strcasecmp (str, "yes") == 0) ||
- (strcasecmp (str, "true") == 0) ||
- (strcasecmp (str, "enable") == 0)) {
+ switch (len) {
+ case 1:
+ if (strcasecmp(str, "1") == 0) {
*b = _gf_true;
return 0;
- }
-
- if ((strcasecmp (str, "0") == 0) ||
- (strcasecmp (str, "off") == 0) ||
- (strcasecmp (str, "no") == 0) ||
- (strcasecmp (str, "false") == 0) ||
- (strcasecmp (str, "disable") == 0)) {
+ } else if (strcasecmp(str, "0") == 0) {
*b = _gf_false;
return 0;
- }
-
- return -1;
+ }
+ break;
+ case 2:
+ if (strcasecmp(str, "on") == 0) {
+ *b = _gf_true;
+ return 0;
+ } else if (strcasecmp(str, "no") == 0) {
+ *b = _gf_false;
+ return 0;
+ }
+ break;
+ case 3:
+ if (strcasecmp(str, "yes") == 0) {
+ *b = _gf_true;
+ return 0;
+ } else if (strcasecmp(str, "off") == 0) {
+ *b = _gf_false;
+ return 0;
+ }
+ break;
+ case 4:
+ if (strcasecmp(str, "true") == 0) {
+ *b = _gf_true;
+ return 0;
+ }
+ break;
+ case 5:
+ if (strcasecmp(str, "false") == 0) {
+ *b = _gf_false;
+ return 0;
+ }
+ break;
+ case 6:
+ if (strcasecmp(str, "enable") == 0) {
+ *b = _gf_true;
+ return 0;
+ }
+ break;
+ case 7:
+ if (strcasecmp(str, "disable") == 0) {
+ *b = _gf_false;
+ return 0;
+ }
+ break;
+ default:
+ return -1;
+ break;
+ }
+ return -1;
}
-
int
-gf_lockfd (int fd)
+gf_lockfd(int fd)
{
- struct gf_flock fl;
+ struct gf_flock fl;
- fl.l_type = F_WRLCK;
- fl.l_whence = SEEK_SET;
- fl.l_start = 0;
- fl.l_len = 0;
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 0;
- return fcntl (fd, F_SETLK, &fl);
+ return fcntl(fd, F_SETLK, &fl);
}
-
int
-gf_unlockfd (int fd)
+gf_unlockfd(int fd)
{
- struct gf_flock fl;
+ struct gf_flock fl;
- fl.l_type = F_UNLCK;
- fl.l_whence = SEEK_SET;
- fl.l_start = 0;
- fl.l_len = 0;
+ fl.l_type = F_UNLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 0;
+ fl.l_len = 0;
- return fcntl (fd, F_SETLK, &fl);
+ return fcntl(fd, F_SETLK, &fl);
}
static void
-compute_checksum (char *buf, size_t size, uint32_t *checksum)
+compute_checksum(char *buf, const ssize_t size, uint32_t *checksum)
{
- int ret = -1;
- char *checksum_buf = NULL;
-
- checksum_buf = (char *)(checksum);
-
- if (!(*checksum)) {
- checksum_buf [0] = 0xba;
- checksum_buf [1] = 0xbe;
- checksum_buf [2] = 0xb0;
- checksum_buf [3] = 0x0b;
- }
-
- for (ret = 0; ret < (size - 4); ret += 4) {
- checksum_buf[0] ^= (buf[ret]);
- checksum_buf[1] ^= (buf[ret + 1] << 1) ;
- checksum_buf[2] ^= (buf[ret + 2] << 2);
- checksum_buf[3] ^= (buf[ret + 3] << 3);
- }
-
- for (ret = 0; ret <= (size % 4); ret++) {
- checksum_buf[ret] ^= (buf[(size - 4) + ret] << ret);
- }
-
- return;
+ int ret = -1;
+ char *checksum_buf = NULL;
+
+ checksum_buf = (char *)(checksum);
+
+ if (!(*checksum)) {
+ checksum_buf[0] = 0xba;
+ checksum_buf[1] = 0xbe;
+ checksum_buf[2] = 0xb0;
+ checksum_buf[3] = 0x0b;
+ }
+
+ for (ret = 0; ret < (size - 4); ret += 4) {
+ checksum_buf[0] ^= (buf[ret]);
+ checksum_buf[1] ^= (buf[ret + 1] << 1);
+ checksum_buf[2] ^= (buf[ret + 2] << 2);
+ checksum_buf[3] ^= (buf[ret + 3] << 3);
+ }
+
+ for (ret = 0; ret <= (size % 4); ret++) {
+ checksum_buf[ret] ^= (buf[(size - 4) + ret] << ret);
+ }
+
+ return;
}
#define GF_CHECKSUM_BUF_SIZE 1024
int
-get_checksum_for_file (int fd, uint32_t *checksum)
+get_checksum_for_file(int fd, uint32_t *checksum, int op_version)
{
- int ret = -1;
- char buf[GF_CHECKSUM_BUF_SIZE] = {0,};
-
- /* goto first place */
- lseek (fd, 0L, SEEK_SET);
- do {
- ret = read (fd, &buf, GF_CHECKSUM_BUF_SIZE);
- if (ret > 0)
- compute_checksum (buf, GF_CHECKSUM_BUF_SIZE,
- checksum);
- } while (ret > 0);
+ int ret = -1;
+ char buf[GF_CHECKSUM_BUF_SIZE] = {
+ 0,
+ };
+
+ /* goto first place */
+ sys_lseek(fd, 0L, SEEK_SET);
+ do {
+ ret = sys_read(fd, &buf, GF_CHECKSUM_BUF_SIZE);
+ if (ret > 0) {
+ if (op_version < GD_OP_VERSION_5_4)
+ compute_checksum(buf, GF_CHECKSUM_BUF_SIZE, checksum);
+ else
+ compute_checksum(buf, ret, checksum);
+ }
+ } while (ret > 0);
- /* set it back */
- lseek (fd, 0L, SEEK_SET);
+ /* set it back */
+ sys_lseek(fd, 0L, SEEK_SET);
- return ret;
+ return ret;
}
-
int
-get_checksum_for_path (char *path, uint32_t *checksum)
+get_checksum_for_path(char *path, uint32_t *checksum, int op_version)
{
- int ret = -1;
- int fd = -1;
+ int ret = -1;
+ int fd = -1;
- GF_ASSERT (path);
- GF_ASSERT (checksum);
+ GF_ASSERT(path);
+ GF_ASSERT(checksum);
- fd = open (path, O_RDWR);
+ fd = open(path, O_RDWR);
- if (fd == -1) {
- gf_log (THIS->name, GF_LOG_ERROR, "Unable to open %s, errno: %d",
- path, errno);
- goto out;
- }
+ if (fd == -1) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_PATH_OPEN_FAILED,
+ "path=%s", path, NULL);
+ goto out;
+ }
- ret = get_checksum_for_file (fd, checksum);
+ ret = get_checksum_for_file(fd, checksum, op_version);
out:
- if (fd != -1)
- close (fd);
+ if (fd != -1)
+ sys_close(fd);
- return ret;
+ return ret;
}
-char *
-strtail (char *str, const char *pattern)
+/**
+ * get_file_mtime -- Given a path, get the mtime for the file
+ *
+ * @path: The filepath to check the mtime on
+ * @stamp: The parameter to set after we get the mtime
+ *
+ * @returns: success: 0
+ * errors : Errors returned by the stat () call
+ */
+int
+get_file_mtime(const char *path, time_t *stamp)
{
- int i = 0;
+ struct stat f_stat = {0};
+ int ret = -EINVAL;
- for (i = 0; str[i] == pattern[i] && str[i]; i++);
+ GF_VALIDATE_OR_GOTO(THIS->name, path, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, stamp, out);
- if (pattern[i] == '\0')
- return str + i;
+ ret = sys_stat(path, &f_stat);
+ if (ret < 0) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_FILE_STAT_FAILED,
+ "path=%s", path, NULL);
+ goto out;
+ }
- return NULL;
+ /* Set the mtime */
+ *stamp = f_stat.st_mtime;
+out:
+ return ret;
}
-void
-skipwhite (char **s)
+/**
+ * gf_is_ip_in_net -- Checks if an IP Address is in a network.
+ * A network should be specified by something like
+ * '10.5.153.0/24' (in CIDR notation).
+ *
+ * @result : Sets to true if the IP is in the network
+ * @ip_str : The IP to check
+ * @network: The network to check the IP against.
+ *
+ * @return: success: _gf_true
+ * failure: -EINVAL for bad args, retval of inet_pton otherwise
+ */
+gf_boolean_t
+gf_is_ip_in_net(const char *network, const char *ip_str)
{
- while (isspace (**s))
- (*s)++;
+ unsigned long ip_buf = 0;
+ unsigned long net_ip_buf = 0;
+ unsigned long subnet_mask = 0;
+ int ret = -EINVAL;
+ char *slash = NULL;
+ char *net_ip = NULL;
+ char *subnet = NULL;
+ char *net_str = NULL;
+ int family = AF_INET;
+ gf_boolean_t result = _gf_false;
+
+ GF_ASSERT(network);
+ GF_ASSERT(ip_str);
+
+ if (strchr(network, ':'))
+ family = AF_INET6;
+ else if (strchr(network, '.'))
+ family = AF_INET;
+ else {
+ goto out;
+ }
+
+ net_str = strdupa(network);
+ slash = strchr(net_str, '/');
+ if (!slash)
+ goto out;
+ *slash = '\0';
+
+ subnet = slash + 1;
+ net_ip = net_str;
+
+ /* Convert IP address to a long */
+ ret = inet_pton(family, ip_str, &ip_buf);
+ if (ret < 0)
+ gf_smsg("common-utils", GF_LOG_ERROR, errno, LG_MSG_INET_PTON_FAILED,
+ NULL);
+
+ /* Convert network IP address to a long */
+ ret = inet_pton(family, net_ip, &net_ip_buf);
+ if (ret < 0) {
+ gf_smsg("common-utils", GF_LOG_ERROR, errno, LG_MSG_INET_PTON_FAILED,
+ NULL);
+ goto out;
+ }
+
+ /* Converts /x into a mask */
+ subnet_mask = (1 << atoi(subnet)) - 1;
+
+ result = ((ip_buf & subnet_mask) == (net_ip_buf & subnet_mask));
+out:
+ return result;
}
char *
-nwstrtail (char *str, char *pattern)
+strtail(char *str, const char *pattern)
{
- for (;;) {
- skipwhite (&str);
- skipwhite (&pattern);
+ int i = 0;
- if (*str != *pattern || !*str)
- break;
+ for (i = 0; str[i] == pattern[i] && str[i]; i++)
+ ;
- str++;
- pattern++;
- }
+ if (pattern[i] == '\0')
+ return str + i;
- return *pattern ? NULL : str;
+ return NULL;
}
void
-skipword (char **s)
+skipwhite(char **s)
{
- if (!*s)
- return;
+ while (isspace(**s))
+ (*s)++;
+}
- skipwhite (s);
+void
+gf_strTrim(char **s)
+{
+ char *end = NULL;
+
+ end = *s + strlen(*s) - 1;
+ while (end > *s && isspace((unsigned char)*end))
+ end--;
+
+ *(end + 1) = '\0';
- while (!isspace(**s))
- (*s)++;
+ while (isspace(**s))
+ (*s)++;
+
+ return;
}
char *
-get_nth_word (const char *str, int n)
+nwstrtail(char *str, char *pattern)
{
- char buf[4096] = {0};
- char *start = NULL;
- char *word = NULL;
- int i = 0;
- int word_len = 0;
- const char *end = NULL;
+ for (;;) {
+ skipwhite(&str);
+ skipwhite(&pattern);
- if (!str)
- goto out;
+ if (*str != *pattern || !*str)
+ break;
- snprintf (buf, sizeof (buf), "%s", str);
- start = buf;
+ str++;
+ pattern++;
+ }
- for (i = 0; i < n-1; i++)
- skipword (&start);
-
- skipwhite (&start);
- end = strpbrk ((const char *)start, " \t\n\0");
+ return *pattern ? NULL : str;
+}
- if (!end)
- goto out;
+/**
+ * token_iter_init -- initialize tokenization
+ *
+ * @str: string to be tokenized
+ * @sep: token separator character
+ * @tit: pointer to iteration state
+ *
+ * @return: token string
+ *
+ * The returned token string and tit are
+ * not to be used directly, but through
+ * next_token().
+ */
+char *
+token_iter_init(char *str, char sep, token_iter_t *tit)
+{
+ tit->end = str + strlen(str);
+ tit->sep = sep;
- word_len = abs (end - start);
+ return str;
+}
- word = GF_CALLOC (1, word_len + 1, gf_common_mt_strdup);
- if (!word)
- goto out;
+/**
+ * next_token -- fetch next token in tokenization
+ * inited by token_iter_init().
+ *
+ * @tokenp: pointer to token
+ * @tit: pointer to iteration state
+ *
+ * @return: true if iteration ends, else false
+ *
+ * The token pointed by @tokenp can be used
+ * after a call to next_token(). When next_token()
+ * returns true the iteration is to be stopped
+ * and the string with which the tokenization
+ * was inited (see token_iter_init() is restored,
+ * apart from dropped tokens (see drop_token()).
+ */
+gf_boolean_t
+next_token(char **tokenp, token_iter_t *tit)
+{
+ char *cursor = NULL;
+ gf_boolean_t is_last = _gf_false;
+
+ for (cursor = *tokenp; *cursor; cursor++)
+ ;
+ if (cursor < tit->end) {
+ /*
+ * We detect that in between current token and end a zero
+ * marker has already been inserted. This means that the
+ * token has already been returned. We restore the
+ * separator and move ahead.
+ */
+ *cursor = tit->sep;
+ *tokenp = cursor + 1;
+ }
+
+ for (cursor = *tokenp; *cursor && *cursor != tit->sep; cursor++)
+ ;
+ /* If the cursor ended up on a zero byte, then it's the last token. */
+ is_last = !*cursor;
+ /* Zero-terminate the token. */
+ *cursor = 0;
+
+ return is_last;
+}
- strncpy (word, start, word_len);
- *(word + word_len) = '\0';
- out:
- return word;
+/*
+ * drop_token -- drop a token during iterated calls of next_token().
+ *
+ * Sample program that uses these functions to tokenize
+ * a comma-separated first argument while dropping the
+ * rest of the arguments if they occur as token:
+ *
+ * #include <stdio.h>
+ * #include <stdlib.h>
+ * #include <string.h>
+ * #include "glusterfs/common-utils.h"
+ *
+ * int
+ * main (int argc, char **argv)
+ * {
+ * char *buf;
+ * char *token;
+ * token_iter_t tit;
+ * int i;
+ * gf_boolean_t iter_end;
+ *
+ * if (argc <= 1)
+ * abort();
+ *
+ * buf = strdup (argv[1]);
+ * if (!buf)
+ * abort();
+ *
+ * for (token = token_iter_init (buf, ',', &tit) ;;) {
+ * iter_end = next_token (&token, &tit);
+ * printf("found token: '%s'\n", token);
+ * for (i = 2; i < argc; i++) {
+ * if (strcmp (argv[i], token) == 0) {
+ * printf ("%s\n", "dropping token!");
+ * drop_token (token, &tit);
+ * break;
+ * }
+ * }
+ * if (iter_end)
+ * break;
+ * }
+ *
+ * printf ("finally: '%s'\n", buf);
+ *
+ * return 0;
+ * }
+ */
+void
+drop_token(char *token, token_iter_t *tit)
+{
+ char *cursor = NULL;
+
+ for (cursor = token; *cursor; cursor++)
+ ;
+ if (cursor < tit->end) {
+ /*
+ * We detect a zero inserted by next_token().
+ * Step the cursor and copy what comes after
+ * to token.
+ */
+ for (cursor++; cursor < tit->end; *token++ = *cursor++)
+ ;
+ }
+
+ /*
+ * Zero out the remainder of the buffer.
+ * It would be enough to insert just a single zero,
+ * but we continue 'till the end to have cleaner
+ * memory content.
+ */
+ for (cursor = token; cursor < tit->end; *cursor++ = 0)
+ ;
+
+ /* Adjust the end to point to the new terminating zero. */
+ tit->end = token;
}
-/* RFC 1123 & 952 */
-/* Syntax formed combining RFC 1123 & 952 *
- <hname> ::= <first-name>*["."<gen-name>] *
- <first-name> ::= <let-or-digit> <[*[<let-or-digit-or-hyphen>]<let-or-digit>]
- <gen-name> ::= <let>[*[<let-or-digit-or-hyphen>]<let-or-digit>] */
+/* Syntax formed according to RFC 1912 (RFC 1123 & 952 are more restrictive) *
+ <hname> ::= <gen-name>*["."<gen-name>] *
+ <gen-name> ::= <let-or-digit> <[*[<let-or-digit-or-hyphen>]<let-or-digit>] */
char
-valid_host_name (char *address, int length)
+valid_host_name(char *address, int length)
{
- int i = 0;
- int str_len = 0;
- char ret = 1;
- char *dup_addr = NULL;
- char *temp_str = NULL;
- char *save_ptr = NULL;
+ int i = 0;
+ int str_len = 0;
+ char ret = 1;
+ char *dup_addr = NULL;
+ char *temp_str = NULL;
+ char *save_ptr = NULL;
+
+ if ((length > _POSIX_HOST_NAME_MAX) || (length < 1)) {
+ ret = 0;
+ goto out;
+ }
- if ((length > _POSIX_HOST_NAME_MAX) || (length < 1)) {
- ret = 0;
- goto out;
- }
+ dup_addr = gf_strdup(address);
+ if (!dup_addr) {
+ ret = 0;
+ goto out;
+ }
- dup_addr = gf_strdup (address);
- if (!dup_addr) {
- ret = 0;
- goto out;
- }
- temp_str = strtok_r (dup_addr,".", &save_ptr);
+ if (!isalnum(dup_addr[length - 1]) && (dup_addr[length - 1] != '*')) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Check for consecutive dots, which is invalid in a hostname and is
+ * ignored by strtok()
+ */
+ if (strstr(dup_addr, "..")) {
+ ret = 0;
+ goto out;
+ }
- /* first-name */
- if (!temp_str ||
- !isalnum(temp_str[0]) ||
- !isalnum (temp_str[strlen(temp_str)-1])) {
+ /* gen-name */
+ temp_str = strtok_r(dup_addr, ".", &save_ptr);
+ do {
+ str_len = strlen(temp_str);
+
+ if (!isalnum(temp_str[0]) || !isalnum(temp_str[str_len - 1])) {
+ ret = 0;
+ goto out;
+ }
+ for (i = 1; i < str_len; i++) {
+ if (!isalnum(temp_str[i]) && (temp_str[i] != '-')) {
ret = 0;
goto out;
+ }
}
- for (i = 1; i < (strlen (temp_str) - 1); i++) {
- if (!isalnum (temp_str[i]) && (temp_str[i] != '-')) {
- ret = 0;
- goto out;
- }
- }
-
- /* gen-name */
- while ((temp_str = strtok_r (NULL, ".", &save_ptr))) {
- str_len = strlen (temp_str);
-
- if (!isalpha (temp_str[0]) ||
- !isalnum (temp_str[str_len-1])) {
- ret = 0;
- goto out;
- }
- for (i = 1; i < str_len; i++) {
- if (!isalnum (temp_str[i]) && (temp_str[i] != '-')) {
- ret = 0;
- goto out;
- }
- }
- }
+ } while ((temp_str = strtok_r(NULL, ".", &save_ptr)));
out:
- if (dup_addr)
- GF_FREE (dup_addr);
- return ret;
+ GF_FREE(dup_addr);
+ return ret;
}
/* Matches all ipv4 address, if wildcard_acc is true '*' wildcard pattern for*
- subnets is considerd as valid strings as well */
+ subnets is considered as valid strings as well */
char
-valid_ipv4_address (char *address, int length, gf_boolean_t wildcard_acc)
+valid_ipv4_address(char *address, int length, gf_boolean_t wildcard_acc)
{
- int octets = 0;
- int value = 0;
- char *tmp = NULL, *ptr = NULL, *prev = NULL, *endptr = NULL;
- char ret = 1;
- int is_wildcard = 0;
+ int octets = 0;
+ int value = 0;
+ char *tmp = NULL, *ptr = NULL, *prev = NULL, *endptr = NULL;
+ char ret = 1;
+ int is_wildcard = 0;
+
+ tmp = gf_strdup(address);
+
+ /*
+ * To prevent cases where last character is '.' and which have
+ * consecutive dots like ".." as strtok ignore consecutive
+ * delimiters.
+ */
+ if (length <= 0 || (strstr(address, "..")) ||
+ (!isdigit(tmp[length - 1]) && (tmp[length - 1] != '*'))) {
+ ret = 0;
+ goto out;
+ }
- tmp = gf_strdup (address);
+ prev = strtok_r(tmp, ".", &ptr);
- /* To prevent cases where last character is '.' */
- if (!isdigit (tmp[length - 1]) && (tmp[length - 1] != '*')) {
+ while (prev != NULL) {
+ octets++;
+ if (wildcard_acc && !strcmp(prev, "*")) {
+ is_wildcard = 1;
+ } else {
+ value = strtol(prev, &endptr, 10);
+ if ((value > 255) || (value < 0) ||
+ (endptr != NULL && *endptr != '\0')) {
ret = 0;
goto out;
+ }
}
+ prev = strtok_r(NULL, ".", &ptr);
+ }
- prev = tmp;
- prev = strtok_r (tmp, ".", &ptr);
-
- while (prev != NULL) {
- octets++;
- if (wildcard_acc && !strcmp (prev, "*")) {
- is_wildcard = 1;
- } else {
- value = strtol (prev, &endptr, 10);
- if ((value > 255) || (value < 0) ||
- (endptr != NULL && *endptr != '\0')) {
- ret = 0;
- goto out;
- }
- }
- prev = strtok_r (NULL, ".", &ptr);
- }
-
- if ((octets > 4) || (octets < 4 && !is_wildcard)) {
- ret = 0;
- }
+ if ((octets > 4) || (octets < 4 && !is_wildcard)) {
+ ret = 0;
+ }
out:
- GF_FREE (tmp);
- return ret;
+ GF_FREE(tmp);
+ return ret;
}
char
-valid_ipv6_address (char *address, int length, gf_boolean_t wildcard_acc)
+valid_cidr_address(char *cidr_address, gf_boolean_t wildcard_acc)
{
- int hex_numbers = 0;
- int value = 0;
- int i = 0;
- char *tmp = NULL, *ptr = NULL, *prev = NULL, *endptr = NULL;
- char ret = 1;
- int is_wildcard = 0;
- int is_compressed = 0;
+ unsigned int net_mask = 0, len = 0;
+ char *temp = NULL, *cidr_str = NULL, ret = 1;
+
+ cidr_str = strdupa(cidr_address);
+ temp = strstr(cidr_str, "/");
+ if (temp == NULL)
+ return 0; /* Since Invalid cidr ip address we return 0 */
+
+ *temp = '\0';
+ temp++;
+ net_mask = (unsigned int)atoi(temp);
+
+ if (net_mask > 32 || net_mask < 1)
+ return 0; /* Since Invalid cidr ip address we return 0*/
- tmp = gf_strdup (address);
+ len = strlen(cidr_str);
- /* Check for compressed form */
- if (tmp[length - 1] == ':') {
+ ret = valid_ipv4_address(cidr_str, len, wildcard_acc);
+
+ return ret;
+}
+
+/**
+ * valid_ipv4_subnetwork() takes the pattern and checks if it contains
+ * a valid ipv4 subnetwork pattern i.e. xx.xx.xx.xx/n. IPv4 address
+ * part (xx.xx.xx.xx) and mask bits length part (n). The mask bits length
+ * must be in 0-32 range (ipv4 addr is 32 bit). The pattern must be
+ * in this format.
+ *
+ * Returns _gf_true if both IP addr and mask bits len are valid
+ * _gf_false otherwise.
+ */
+gf_boolean_t
+valid_ipv4_subnetwork(const char *address)
+{
+ char *slash = NULL;
+ char *paddr = NULL;
+ char *endptr = NULL;
+ long prefixlen = -1;
+ gf_boolean_t retv = _gf_true;
+
+ if (address == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ return _gf_false;
+ }
+
+ paddr = gf_strdup(address);
+ if (paddr == NULL) /* ENOMEM */
+ return _gf_false;
+
+ /*
+ * INVALID: If '/' is not present OR
+ * Nothing specified after '/'
+ */
+ slash = strchr(paddr, '/');
+ if ((slash == NULL) || (slash[1] == '\0')) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+ LG_MSG_INVALID_IPV4_FORMAT,
+ "Invalid IPv4 "
+ "subnetwork format");
+ retv = _gf_false;
+ goto out;
+ }
+
+ *slash = '\0';
+ retv = valid_ipv4_address(paddr, strlen(paddr), _gf_false);
+ if (retv == _gf_false) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+ LG_MSG_INVALID_IPV4_FORMAT,
+ "Invalid IPv4 subnetwork address");
+ goto out;
+ }
+ /*
+ * Reset errno before checking it
+ */
+ errno = 0;
+ prefixlen = strtol(slash + 1, &endptr, 10);
+ if ((errno != 0) || (*endptr != '\0') || (prefixlen < 0) ||
+ (prefixlen > IPv4_ADDR_SIZE)) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+ LG_MSG_INVALID_IPV4_FORMAT,
+ "Invalid IPv4 subnetwork mask");
+ retv = _gf_false;
+ goto out;
+ }
+
+ retv = _gf_true;
+out:
+ GF_FREE(paddr);
+ return retv;
+}
+
+char
+valid_ipv6_address(char *address, int length, gf_boolean_t wildcard_acc)
+{
+ int hex_numbers = 0;
+ int value = 0;
+ int i = 0;
+ char *tmp = NULL, *ptr = NULL, *prev = NULL, *endptr = NULL;
+ char ret = 1;
+ int is_wildcard = 0;
+ int is_compressed = 0;
+
+ tmp = gf_strdup(address);
+
+ /* Check for '%' for link local addresses */
+ endptr = strchr(tmp, '%');
+ if (endptr) {
+ *endptr = '\0';
+ length = strlen(tmp);
+ endptr = NULL;
+ }
+
+ /* Check for compressed form */
+ if (length <= 0 || tmp[length - 1] == ':') {
+ ret = 0;
+ goto out;
+ }
+ for (i = 0; i < (length - 1); i++) {
+ if (tmp[i] == ':' && tmp[i + 1] == ':') {
+ if (is_compressed == 0)
+ is_compressed = 1;
+ else {
ret = 0;
goto out;
+ }
}
- for (i = 0; i < (length - 1) ; i++) {
- if (tmp[i] == ':' && tmp[i + 1] == ':') {
- if (is_compressed == 0)
- is_compressed = 1;
- else {
- ret = 0;
- goto out;
- }
- }
- }
+ }
- prev = strtok_r (tmp, ":", &ptr);
-
- while (prev != NULL) {
- hex_numbers++;
- if (wildcard_acc && !strcmp (prev, "*")) {
- is_wildcard = 1;
- } else {
- value = strtol (prev, &endptr, 16);
- if ((value > 0xffff) || (value < 0)
- || (endptr != NULL && *endptr != '\0')) {
- ret = 0;
- goto out;
- }
- }
- prev = strtok_r (NULL, ":", &ptr);
- }
+ prev = strtok_r(tmp, ":", &ptr);
- if ((hex_numbers > 8) || (hex_numbers < 8 && !is_wildcard
- && !is_compressed)) {
+ while (prev != NULL) {
+ hex_numbers++;
+ if (wildcard_acc && !strcmp(prev, "*")) {
+ is_wildcard = 1;
+ } else {
+ value = strtol(prev, &endptr, 16);
+ if ((value > 0xffff) || (value < 0) ||
+ (endptr != NULL && *endptr != '\0')) {
ret = 0;
+ goto out;
+ }
}
+ prev = strtok_r(NULL, ":", &ptr);
+ }
+
+ if ((hex_numbers > 8) ||
+ (hex_numbers < 8 && !is_wildcard && !is_compressed)) {
+ ret = 0;
+ }
out:
- GF_FREE (tmp);
- return ret;
+ GF_FREE(tmp);
+ return ret;
}
char
-valid_internet_address (char *address, gf_boolean_t wildcard_acc)
+valid_internet_address(char *address, gf_boolean_t wildcard_acc,
+ gf_boolean_t cidr)
{
- char ret = 0;
- int length = 0;
+ char ret = 0;
+ int length = 0;
- if (address == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "argument invalid");
- goto out;
- }
+ if (address == NULL) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ goto out;
+ }
- length = strlen (address);
- if (length == 0)
- goto out;
+ length = strlen(address);
+ if (length == 0)
+ goto out;
- if (valid_ipv4_address (address, length, wildcard_acc)
- || valid_ipv6_address (address, length, wildcard_acc)
- || valid_host_name (address, length))
- ret = 1;
+ if (cidr && valid_cidr_address(address, wildcard_acc)) {
+ ret = 1;
+ }
+
+ if (valid_ipv4_address(address, length, wildcard_acc) ||
+ valid_ipv6_address(address, length, wildcard_acc) ||
+ valid_host_name(address, length))
+ ret = 1;
out:
- return ret;
+ return ret;
+}
+
+/**
+ * valid_mount_auth_address - Validate the rpc-auth.addr.allow/reject pattern
+ *
+ * @param address - Pattern to be validated
+ *
+ * @return _gf_true if "address" is "*" (anonymous) 'OR'
+ * if "address" is valid FQDN or valid IPv4/6 address 'OR'
+ * if "address" contains wildcard chars e.g. "'*' or '?' or
+ * '['" if "address" is valid ipv4 subnet pattern (xx.xx.xx.xx/n) _gf_false
+ * otherwise
+ *
+ *
+ * NB: If the user/admin set for wildcard pattern, then it does not have
+ * to be validated. Make it similar to the way exportfs (kNFS) works.
+ */
+gf_boolean_t
+valid_mount_auth_address(char *address)
+{
+ int length = 0;
+ char *cp = NULL;
+
+ /* 1. Check for "NULL and empty string */
+ if ((address == NULL) || (address[0] == '\0')) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "argument invalid");
+ return _gf_false;
+ }
+
+ /* 2. Check for Anonymous */
+ if (strcmp(address, "*") == 0)
+ return _gf_true;
+
+ for (cp = address; *cp; cp++) {
+ /* 3. Check for wildcard pattern */
+ if (*cp == '*' || *cp == '?' || *cp == '[') {
+ return _gf_true;
+ }
+
+ /*
+ * 4. check for IPv4 subnetwork i.e. xx.xx.xx.xx/n
+ * TODO: check for IPv6 subnetwork
+ * NB: Wildcard must not be mixed with subnetwork.
+ */
+ if (*cp == '/') {
+ return valid_ipv4_subnetwork(address);
+ }
+ }
+
+ /* 5. Check for v4/v6 IP addr and FQDN/hostname */
+ length = strlen(address);
+ if ((valid_ipv4_address(address, length, _gf_false)) ||
+ (valid_ipv6_address(address, length, _gf_false)) ||
+ (valid_host_name(address, length))) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+/**
+ * gf_sock_union_equal_addr - check if two given gf_sock_unions have same addr
+ *
+ * @param a - first sock union
+ * @param b - second sock union
+ * @return _gf_true if a and b have same ipv{4,6} addr, _gf_false otherwise
+ */
+gf_boolean_t
+gf_sock_union_equal_addr(union gf_sock_union *a, union gf_sock_union *b)
+{
+ if (!a || !b) {
+ gf_smsg("common-utils", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
+ "gf_sock_union_equal_addr", NULL);
+ return _gf_false;
+ }
+
+ if (a->storage.ss_family != b->storage.ss_family)
+ return _gf_false;
+
+ switch (a->storage.ss_family) {
+ case AF_INET:
+ if (a->sin.sin_addr.s_addr == b->sin.sin_addr.s_addr)
+ return _gf_true;
+ else
+ return _gf_false;
+
+ case AF_INET6:
+ if (memcmp((void *)(&a->sin6.sin6_addr),
+ (void *)(&b->sin6.sin6_addr), sizeof(a->sin6.sin6_addr)))
+ return _gf_false;
+ else
+ return _gf_true;
+
+ default:
+ gf_msg_debug("common-utils", 0,
+ "Unsupported/invalid address "
+ "family");
+ break;
+ }
+
+ return _gf_false;
+}
+
+/*
+ * Check if both have same network address.
+ * Extract the network address from the sockaddr(s) addr by applying the
+ * network mask. If they match, return boolean _gf_true, _gf_false otherwise.
+ *
+ * (x == y) <=> (x ^ y == 0)
+ * (x & y) ^ (x & z) <=> x & (y ^ z)
+ *
+ * ((ip1 & mask) == (ip2 & mask)) <=> ((mask & (ip1 ^ ip2)) == 0)
+ */
+gf_boolean_t
+mask_match(const uint32_t a, const uint32_t b, const uint32_t m)
+{
+ return (((a ^ b) & m) == 0);
}
/*Thread safe conversion function*/
char *
-uuid_utoa (uuid_t uuid)
+uuid_utoa(uuid_t uuid)
{
- char *uuid_buffer = glusterfs_uuid_buf_get();
- uuid_unparse (uuid, uuid_buffer);
- return uuid_buffer;
+ char *uuid_buffer = glusterfs_uuid_buf_get();
+ gf_uuid_unparse(uuid, uuid_buffer);
+ return uuid_buffer;
}
/*Re-entrant conversion function*/
char *
-uuid_utoa_r (uuid_t uuid, char *dst)
+uuid_utoa_r(uuid_t uuid, char *dst)
{
- if(!dst)
- return NULL;
- uuid_unparse (uuid, dst);
- return dst;
+ if (!dst)
+ return NULL;
+ gf_uuid_unparse(uuid, dst);
+ return dst;
}
/*Thread safe conversion function*/
char *
-lkowner_utoa (gf_lkowner_t *lkowner)
+lkowner_utoa(gf_lkowner_t *lkowner)
{
- char *lkowner_buffer = glusterfs_lkowner_buf_get();
- lkowner_unparse (lkowner, lkowner_buffer, GF_LKOWNER_BUF_SIZE);
- return lkowner_buffer;
+ char *lkowner_buffer = glusterfs_lkowner_buf_get();
+ lkowner_unparse(lkowner, lkowner_buffer, GF_LKOWNER_BUF_SIZE);
+ return lkowner_buffer;
}
/*Re-entrant conversion function*/
char *
-lkowner_utoa_r (gf_lkowner_t *lkowner, char *dst, int len)
+lkowner_utoa_r(gf_lkowner_t *lkowner, char *dst, int len)
{
- if(!dst)
- return NULL;
- lkowner_unparse (lkowner, dst, len);
- return dst;
+ if (!dst)
+ return NULL;
+ lkowner_unparse(lkowner, dst, len);
+ return dst;
}
-void* gf_array_elem (void *a, int index, size_t elem_size)
+gf_boolean_t
+is_valid_lease_id(const char *lease_id)
{
- uint8_t* ptr = a;
- return (void*)(ptr + index * elem_size);
+ int i = 0;
+ gf_boolean_t valid = _gf_false;
+
+ for (i = 0; i < LEASE_ID_SIZE; i++) {
+ if (lease_id[i] != 0) {
+ valid = _gf_true;
+ goto out;
+ }
+ }
+out:
+ return valid;
}
-void
-gf_elem_swap (void *x, void *y, size_t l) {
- uint8_t *a = x, *b = y, c;
- while(l--) {
- c = *a;
- *a++ = *b;
- *b++ = c;
+/* Lease_id can be a either in printable or non printable binary
+ * format. This function can be used to print any lease_id.
+ *
+ * This function returns a pointer to a buf, containing the ascii
+ * representation of the value in lease_id, in the following format:
+ * 4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum
+ *
+ * Eg: If lease_id = "lid1-clnt1" the printable string would be:
+ * 6c69-6431-2d63-6c6e-7431-0000-0000-0000
+ *
+ * Note: The pointer returned should not be stored for further use, as any
+ * subsequent call to this function will override the same buffer.
+ */
+char *
+leaseid_utoa(const char *lease_id)
+{
+ char *buf = NULL;
+ int i = 0;
+ int j = 0;
+
+ buf = glusterfs_leaseid_buf_get();
+ if (!buf)
+ goto out;
+
+ for (i = 0; i < LEASE_ID_SIZE; i++) {
+ if (i && !(i % 2)) {
+ buf[j] = '-';
+ j++;
}
+ sprintf(&buf[j], "%02hhx", lease_id[i]);
+ j += 2;
+ if (j == GF_LEASE_ID_BUF_SIZE)
+ break;
+ }
+ buf[GF_LEASE_ID_BUF_SIZE - 1] = '\0';
+out:
+ return buf;
+}
+
+char *
+gf_leaseid_get()
+{
+ return glusterfs_leaseid_buf_get();
+}
+
+char *
+gf_existing_leaseid()
+{
+ return glusterfs_leaseid_exist();
+}
+
+void *
+gf_array_elem(void *a, int index, size_t elem_size)
+{
+ uint8_t *ptr = a;
+ return (void *)(ptr + index * elem_size);
}
void
-gf_array_insertionsort (void *A, int l, int r, size_t elem_size,
- gf_cmp cmp)
-{
- int i = l;
- int N = r+1;
- void *Temp = NULL;
- int j = 0;
-
- for(i = l; i < N; i++) {
- Temp = gf_array_elem (A, i, elem_size);
- j = i - 1;
- while((cmp (Temp, gf_array_elem (A, j, elem_size))
- < 0) && j>=0) {
- gf_elem_swap (Temp, gf_array_elem (A, j, elem_size),
- elem_size);
- Temp = gf_array_elem (A, j, elem_size);
- j = j-1;
- }
+gf_elem_swap(void *x, void *y, size_t l)
+{
+ uint8_t *a = x, *b = y, c;
+ while (l--) {
+ c = *a;
+ *a++ = *b;
+ *b++ = c;
+ }
+}
+
+void
+gf_array_insertionsort(void *A, int l, int r, size_t elem_size, gf_cmp cmp)
+{
+ int i = l;
+ int N = r + 1;
+ void *Temp = NULL;
+ int j = 0;
+
+ for (i = l; i < N; i++) {
+ Temp = gf_array_elem(A, i, elem_size);
+ j = i - 1;
+ while (j >= 0 && (cmp(Temp, gf_array_elem(A, j, elem_size)) < 0)) {
+ gf_elem_swap(Temp, gf_array_elem(A, j, elem_size), elem_size);
+ Temp = gf_array_elem(A, j, elem_size);
+ j = j - 1;
}
+ }
}
int
-gf_is_str_int (const char *value)
+gf_is_str_int(const char *value)
{
- int flag = 0;
- char *str = NULL;
- char *fptr = NULL;
+ int flag = 0;
+ char *str = NULL;
+ char *fptr = NULL;
- GF_VALIDATE_OR_GOTO (THIS->name, value, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, value, out);
- str = gf_strdup (value);
- if (!str)
- goto out;
+ str = gf_strdup(value);
+ if (!str)
+ goto out;
- fptr = str;
+ fptr = str;
- while (*str) {
- if (!isdigit(*str)) {
- flag = 1;
- goto out;
- }
- str++;
+ while (*str) {
+ if (!isdigit(*str)) {
+ flag = 1;
+ goto out;
}
+ str++;
+ }
out:
- if (fptr)
- GF_FREE (fptr);
+ GF_FREE(fptr);
- return flag;
+ return flag;
}
/*
* rounds up nr to power of two. If nr is already a power of two, just returns
* nr
*/
-inline int32_t
-gf_roundup_power_of_two (uint32_t nr)
+int32_t
+gf_roundup_power_of_two(int32_t nr)
{
- uint32_t result = 1;
+ int32_t result = 1;
- if (nr < 0) {
- gf_log ("common-utils", GF_LOG_WARNING,
- "negative number passed");
- result = -1;
- goto out;
- }
+ if (nr < 0) {
+ gf_smsg("common-utils", GF_LOG_WARNING, 0, LG_MSG_NEGATIVE_NUM_PASSED,
+ NULL);
+ result = -1;
+ goto out;
+ }
- while (result < nr)
- result *= 2;
+ while (result < nr)
+ result *= 2;
out:
- return result;
+ return result;
}
/*
@@ -1949,207 +3041,2425 @@ out:
* power of two is returned.
*/
-inline int32_t
-gf_roundup_next_power_of_two (uint32_t nr)
+int32_t
+gf_roundup_next_power_of_two(int32_t nr)
{
- int32_t result = 1;
+ int32_t result = 1;
- if (nr < 0) {
- gf_log ("common-utils", GF_LOG_WARNING,
- "negative number passed");
- result = -1;
- goto out;
- }
+ if (nr < 0) {
+ gf_smsg("common-utils", GF_LOG_WARNING, 0, LG_MSG_NEGATIVE_NUM_PASSED,
+ NULL);
+ result = -1;
+ goto out;
+ }
- while (result <= nr)
- result *= 2;
+ while (result <= nr)
+ result *= 2;
out:
- return result;
+ return result;
}
int
-validate_brick_name (char *brick)
+validate_brick_name(char *brick)
{
- char *delimiter = NULL;
- int ret = 0;
- delimiter = strrchr (brick, ':');
- if (!delimiter || delimiter == brick
- || *(delimiter+1) != '/')
- ret = -1;
+ char *delimiter = NULL;
+ int ret = 0;
+ delimiter = strrchr(brick, ':');
+ if (!delimiter || delimiter == brick || *(delimiter + 1) != '/')
+ ret = -1;
- return ret;
+ return ret;
}
char *
-get_host_name (char *word, char **host)
+get_host_name(char *word, char **host)
{
- char *delimiter = NULL;
- delimiter = strrchr (word, ':');
- if (delimiter)
- *delimiter = '\0';
- else
- return NULL;
- *host = word;
- return *host;
+ char *delimiter = NULL;
+ delimiter = strrchr(word, ':');
+ if (delimiter)
+ *delimiter = '\0';
+ else
+ return NULL;
+ *host = word;
+ return *host;
}
-
char *
-get_path_name (char *word, char **path)
+get_path_name(char *word, char **path)
{
- char *delimiter = NULL;
- delimiter = strchr (word, '/');
- if (!delimiter)
- return NULL;
- *path = delimiter;
- return *path;
+ char *delimiter = NULL;
+ delimiter = strchr(word, '/');
+ if (!delimiter)
+ return NULL;
+ *path = delimiter;
+ return *path;
}
void
-gf_path_strip_trailing_slashes (char *path)
+gf_path_strip_trailing_slashes(char *path)
{
- int i = 0;
- int len = 0;
+ int i = 0;
+ int len = 0;
- if (!path)
- return;
+ if (!path)
+ return;
- len = strlen (path);
- for (i = len - 1; i > 0; i--) {
- if (path[i] != '/')
- break;
- }
+ len = strlen(path);
+ for (i = len - 1; i > 0; i--) {
+ if (path[i] != '/')
+ break;
+ }
- if (i < (len -1))
- path [i+1] = '\0';
+ if (i < (len - 1))
+ path[i + 1] = '\0';
- return;
+ return;
}
uint64_t
-get_mem_size ()
+get_mem_size()
{
- uint64_t memsize = -1;
+ uint64_t memsize = -1;
#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS
- uint64_t page_size = 0;
- uint64_t num_pages = 0;
+ uint64_t page_size = 0;
+ uint64_t num_pages = 0;
- page_size = sysconf (_SC_PAGESIZE);
- num_pages = sysconf (_SC_PHYS_PAGES);
+ page_size = sysconf(_SC_PAGESIZE);
+ num_pages = sysconf(_SC_PHYS_PAGES);
- memsize = page_size * num_pages;
+ memsize = page_size * num_pages;
#endif
-#if defined GF_BSD_HOST_OS || defined GF_DARWIN_HOST_OS
+#if defined GF_DARWIN_HOST_OS || defined __FreeBSD__
- size_t len = sizeof(memsize);
- int name [] = { CTL_HW, HW_PHYSMEM };
+ size_t len = sizeof(memsize);
+ int name[] = {CTL_HW, HW_PHYSMEM};
- sysctl (name, 2, &memsize, &len, NULL, 0);
+ sysctl(name, 2, &memsize, &len, NULL, 0);
#endif
- return memsize;
+
+#if defined __NetBSD__
+
+ size_t len = sizeof(memsize);
+ int name64[] = {CTL_HW, HW_PHYSMEM64};
+
+ sysctl(name64, 2, &memsize, &len, NULL, 0);
+ if (memsize == -1)
+ sysctl(name64, 2, &memsize, &len, NULL, 0);
+#endif
+ return memsize;
}
/* Strips all whitespace characters in a string and returns length of new string
* on success
*/
int
-gf_strip_whitespace (char *str, int len)
+gf_strip_whitespace(char *str, int len)
{
- int i = 0;
- int new_len = 0;
- char *new_str = NULL;
+ int i = 0;
+ int new_len = 0;
+ char *new_str = NULL;
- GF_ASSERT (str);
+ GF_ASSERT(str);
- new_str = GF_CALLOC (1, len + 1, gf_common_mt_char);
- if (new_str == NULL)
- return -1;
+ new_str = GF_MALLOC(len + 1, gf_common_mt_char);
+ if (new_str == NULL)
+ return -1;
- for (i = 0; i < len; i++) {
- if (!isspace (str[i]))
- new_str[new_len++] = str[i];
- }
- new_str[new_len] = '\0';
+ for (i = 0; i < len; i++) {
+ if (!isspace(str[i]))
+ new_str[new_len++] = str[i];
+ }
+ new_str[new_len] = '\0';
- if (new_len != len) {
- memset (str, 0, len);
- strncpy (str, new_str, new_len);
- }
+ if (new_len != len) {
+ snprintf(str, new_len + 1, "%s", new_str);
+ }
- GF_FREE (new_str);
- return new_len;
+ GF_FREE(new_str);
+ return new_len;
}
-/* If the path exists use realpath(3) to handle extra slashes and to resolve
- * symlinks else strip the extra slashes in the path and return */
-
int
-gf_canonicalize_path (char *path)
+gf_canonicalize_path(char *path)
{
- int ret = -1;
- int path_len = 0;
- int dir_path_len = 0;
- char *tmppath = NULL;
- char *dir = NULL;
- char *tmpstr = NULL;
+ int ret = -1;
+ int path_len = 0;
+ int dir_path_len = 0;
+ char *tmppath = NULL;
+ char *dir = NULL;
+ char *tmpstr = NULL;
- if (!path || *path != '/')
- goto out;
+ if (!path || *path != '/')
+ goto out;
- tmppath = gf_strdup (path);
- if (!tmppath)
- goto out;
+ if (!strcmp(path, "/"))
+ return 0;
- /* Strip the extra slashes and return */
- bzero (path, strlen(path));
- path[0] = '/';
- dir = strtok_r(tmppath, "/", &tmpstr);
-
- while (dir) {
- dir_path_len = strlen(dir);
- strncpy ((path + path_len + 1), dir, dir_path_len);
- path_len += dir_path_len + 1;
- dir = strtok_r (NULL, "/", &tmpstr);
- if (dir)
- strncpy ((path + path_len), "/", 1);
+ tmppath = gf_strdup(path);
+ if (!tmppath)
+ goto out;
+
+ /* Strip the extra slashes and return */
+ bzero(path, strlen(path));
+ path[0] = '/';
+ dir = strtok_r(tmppath, "/", &tmpstr);
+
+ while (dir) {
+ dir_path_len = strlen(dir);
+ memcpy((path + path_len + 1), dir, dir_path_len);
+ path_len += dir_path_len + 1;
+ dir = strtok_r(NULL, "/", &tmpstr);
+ if (dir) {
+ path[path_len] = '/';
}
- path[path_len] = '\0';
- ret = 0;
+ }
+ path[path_len] = '\0';
+ ret = 0;
- out:
- if (ret)
- gf_log ("common-utils", GF_LOG_ERROR,
- "Path manipulation failed");
+out:
+ if (ret)
+ gf_smsg("common-utils", GF_LOG_ERROR, 0, LG_MSG_PATH_ERROR, NULL);
- if (tmppath)
- GF_FREE(tmppath);
+ GF_FREE(tmppath);
- return ret;
+ return ret;
}
static const char *__gf_timefmts[] = {
- "%F %T",
- "%Y/%m/%d-%T",
- "%b %d %T",
- "%F %H%M%S"
+ "%F %T", "%Y/%m/%d-%T", "%b %d %T", "%F %H%M%S", "%Y-%m-%d-%T", "%s",
};
static const char *__gf_zerotimes[] = {
- "0000-00-00 00:00:00",
- "0000/00/00-00:00:00",
- "xxx 00 00:00:00",
- "0000-00-00 000000"
+ "0000-00-00 00:00:00", "0000/00/00-00:00:00", "xxx 00 00:00:00",
+ "0000-00-00 000000", "0000-00-00-00:00:00", "0",
};
void
-_gf_timestuff (gf_timefmts *fmt, const char ***fmts, const char ***zeros)
+_gf_timestuff(const char ***fmts, const char ***zeros)
+{
+ *fmts = __gf_timefmts;
+ *zeros = __gf_zerotimes;
+}
+
+char *
+generate_glusterfs_ctx_id(void)
+{
+ uuid_t ctxid;
+ char *tmp = NULL;
+
+ gf_uuid_generate(ctxid);
+ tmp = uuid_utoa(ctxid);
+
+ return gf_strdup(tmp);
+}
+
+char *
+gf_get_reserved_ports()
+{
+ char *ports_info = NULL;
+#if defined GF_LINUX_HOST_OS
+ int proc_fd = -1;
+ char *proc_file = "/proc/sys/net/ipv4/ip_local_reserved_ports";
+ char buffer[4096] = {
+ 0,
+ };
+ int32_t ret = -1;
+
+ proc_fd = open(proc_file, O_RDONLY);
+ if (proc_fd == -1) {
+ /* What should be done in this case? error out from here
+ * and thus stop the glusterfs process from starting or
+ * continue with older method of using any of the available
+ * port? For now 2nd option is considered.
+ */
+ gf_smsg("glusterfs", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ " /proc/sys/net/ipv4/ip_local_reserved_ports", NULL);
+ goto out;
+ }
+
+ ret = sys_read(proc_fd, buffer, sizeof(buffer) - 1);
+ if (ret < 0) {
+ gf_smsg("glusterfs", GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "file=%s", proc_file, NULL);
+ goto out;
+ }
+
+ buffer[ret] = '\0';
+ ports_info = gf_strdup(buffer);
+
+out:
+ if (proc_fd != -1)
+ sys_close(proc_fd);
+#endif /* GF_LINUX_HOST_OS */
+ return ports_info;
+}
+
+int
+gf_process_reserved_ports(unsigned char *ports, uint32_t ceiling)
+{
+ int ret = -1;
+
+ memset(ports, 0, GF_PORT_ARRAY_SIZE);
+
+#if defined GF_LINUX_HOST_OS
+ char *ports_info = NULL;
+ char *tmp = NULL;
+ char *blocked_port = NULL;
+
+ ports_info = gf_get_reserved_ports();
+ if (!ports_info) {
+ gf_smsg("glusterfs", GF_LOG_WARNING, 0, LG_MSG_RESERVED_PORTS_ERROR,
+ NULL);
+ goto out;
+ }
+
+ blocked_port = strtok_r(ports_info, ",\n", &tmp);
+
+ while (blocked_port) {
+ gf_ports_reserved(blocked_port, ports, ceiling);
+ blocked_port = strtok_r(NULL, ",\n", &tmp);
+ }
+
+ ret = 0;
+
+out:
+ GF_FREE(ports_info);
+
+#else /* FIXME: Non Linux Host */
+ ret = 0;
+#endif /* GF_LINUX_HOST_OS */
+
+ return ret;
+}
+
+gf_boolean_t
+gf_ports_reserved(char *blocked_port, unsigned char *ports, uint32_t ceiling)
+{
+ gf_boolean_t result = _gf_false;
+ char *range_port = NULL;
+ int32_t tmp_port1 = -1;
+ int32_t tmp_port2 = -1;
+
+ if (strstr(blocked_port, "-") == NULL) {
+ /* get rid of the new line character*/
+ if (blocked_port[strlen(blocked_port) - 1] == '\n')
+ blocked_port[strlen(blocked_port) - 1] = '\0';
+ if (gf_string2int32(blocked_port, &tmp_port1) == 0) {
+ if (tmp_port1 > GF_PORT_MAX || tmp_port1 < 0) {
+ gf_smsg("glusterfs-socket", GF_LOG_WARNING, 0,
+ LG_MSG_INVALID_PORT, "port=%d", tmp_port1, NULL);
+ result = _gf_true;
+ goto out;
+ } else {
+ gf_msg_debug("glusterfs", 0,
+ "blocking port "
+ "%d",
+ tmp_port1);
+ BIT_SET(ports, tmp_port1);
+ }
+ } else {
+ gf_smsg("glusterfs-socket", GF_LOG_WARNING, 0, LG_MSG_INVALID_PORT,
+ "port=%s", blocked_port, NULL);
+ result = _gf_true;
+ goto out;
+ }
+ } else {
+ range_port = strtok(blocked_port, "-");
+ if (!range_port) {
+ result = _gf_true;
+ goto out;
+ }
+ if (gf_string2int32(range_port, &tmp_port1) == 0) {
+ if (tmp_port1 > ceiling)
+ tmp_port1 = ceiling;
+ if (tmp_port1 < 0)
+ tmp_port1 = 0;
+ }
+ range_port = strtok(NULL, "-");
+ if (!range_port) {
+ result = _gf_true;
+ goto out;
+ }
+ /* get rid of the new line character*/
+ if (range_port[strlen(range_port) - 1] == '\n')
+ range_port[strlen(range_port) - 1] = '\0';
+ if (gf_string2int32(range_port, &tmp_port2) == 0) {
+ if (tmp_port2 > ceiling)
+ tmp_port2 = ceiling;
+ if (tmp_port2 < 0)
+ tmp_port2 = 0;
+ }
+ gf_msg_debug("glusterfs", 0, "lower: %d, higher: %d", tmp_port1,
+ tmp_port2);
+ for (; tmp_port1 <= tmp_port2; tmp_port1++)
+ BIT_SET(ports, tmp_port1);
+ }
+
+out:
+ return result;
+}
+
+/* Takes in client ip{v4,v6} and returns associated hostname, if any
+ * Also, allocates memory for the hostname.
+ * Returns: 0 for success, -1 for failure
+ */
+int
+gf_get_hostname_from_ip(char *client_ip, char **hostname)
+{
+ int ret = -1;
+ struct sockaddr *client_sockaddr = NULL;
+ struct sockaddr_in client_sock_in = {0};
+ struct sockaddr_in6 client_sock_in6 = {0};
+ char client_hostname[NI_MAXHOST] = {0};
+ char *client_ip_copy = NULL;
+ char *tmp = NULL;
+ char *ip = NULL;
+ size_t addr_sz = 0;
+
+ /* if ipv4, reverse lookup the hostname to
+ * allow FQDN based rpc authentication
+ */
+ if (!valid_ipv6_address(client_ip, strlen(client_ip), 0) &&
+ !valid_ipv4_address(client_ip, strlen(client_ip), 0)) {
+ /* most times, we get a.b.c.d:port form, so check that */
+ client_ip_copy = gf_strdup(client_ip);
+ if (!client_ip_copy)
+ goto out;
+
+ ip = strtok_r(client_ip_copy, ":", &tmp);
+ } else {
+ ip = client_ip;
+ }
+
+ if (valid_ipv4_address(ip, strlen(ip), 0) == _gf_true) {
+ client_sockaddr = (struct sockaddr *)&client_sock_in;
+ addr_sz = sizeof(client_sock_in);
+ client_sock_in.sin_family = AF_INET;
+ ret = inet_pton(AF_INET, ip, (void *)&client_sock_in.sin_addr.s_addr);
+
+ } else if (valid_ipv6_address(ip, strlen(ip), 0) == _gf_true) {
+ client_sockaddr = (struct sockaddr *)&client_sock_in6;
+ addr_sz = sizeof(client_sock_in6);
+
+ client_sock_in6.sin6_family = AF_INET6;
+ ret = inet_pton(AF_INET6, ip, (void *)&client_sock_in6.sin6_addr);
+ } else {
+ goto out;
+ }
+
+ if (ret != 1) {
+ ret = -1;
+ goto out;
+ }
+
+ /* You cannot just use sizeof (*client_sockaddr), as per the man page
+ * the (getnameinfo) size must be the size of the underlying sockaddr
+ * struct e.g. sockaddr_in6 or sockaddr_in. Failure to do so will
+ * break IPv6 hostname resolution (IPv4 will work only because
+ * the sockaddr_in struct happens to be of the correct size).
+ */
+ ret = getnameinfo(client_sockaddr, addr_sz, client_hostname,
+ sizeof(client_hostname), NULL, 0, 0);
+ if (ret) {
+ gf_smsg("common-utils", GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED,
+ "ip=%s", client_ip, "ret=%s", gai_strerror(ret), NULL);
+ ret = -1;
+ goto out;
+ }
+
+ *hostname = gf_strdup((char *)client_hostname);
+out:
+ if (client_ip_copy)
+ GF_FREE(client_ip_copy);
+
+ return ret;
+}
+
+gf_boolean_t
+gf_interface_search(char *ip)
+{
+ int32_t ret = -1;
+ gf_boolean_t found = _gf_false;
+ struct ifaddrs *ifaddr, *ifa;
+ int family;
+ char host[NI_MAXHOST];
+ xlator_t *this = NULL;
+ char *pct = NULL;
+
+ this = THIS;
+
+ ret = getifaddrs(&ifaddr);
+
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETIFADDRS_FAILED, "ret=%s",
+ gai_strerror(ret), NULL);
+ goto out;
+ }
+
+ for (ifa = ifaddr; ifa != NULL; ifa = ifa->ifa_next) {
+ if (!ifa->ifa_addr) {
+ /*
+ * This seemingly happens if an interface hasn't
+ * been bound to a particular protocol (seen with
+ * TUN devices).
+ */
+ continue;
+ }
+ family = ifa->ifa_addr->sa_family;
+
+ if (family != AF_INET && family != AF_INET6)
+ continue;
+
+ ret = getnameinfo(ifa->ifa_addr,
+ (family == AF_INET) ? sizeof(struct sockaddr_in)
+ : sizeof(struct sockaddr_in6),
+ host, NI_MAXHOST, NULL, 0, NI_NUMERICHOST);
+
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETNAMEINFO_FAILED,
+ "ret=%s", gai_strerror(ret), NULL);
+ goto out;
+ }
+
+ /*
+ * Sometimes the address comes back as addr%eth0 or
+ * similar. Since % is an invalid character, we can
+ * strip it out with confidence that doing so won't
+ * harm anything.
+ */
+ pct = index(host, '%');
+ if (pct) {
+ *pct = '\0';
+ }
+
+ if (strncmp(ip, host, NI_MAXHOST) == 0) {
+ gf_msg_debug(this->name, 0,
+ "%s is local address at "
+ "interface %s",
+ ip, ifa->ifa_name);
+ found = _gf_true;
+ goto out;
+ }
+ }
+out:
+ if (ifaddr)
+ freeifaddrs(ifaddr);
+ return found;
+}
+
+char *
+get_ip_from_addrinfo(struct addrinfo *addr, char **ip)
+{
+ char buf[64];
+ void *in_addr = NULL;
+ struct sockaddr_in *s4 = NULL;
+ struct sockaddr_in6 *s6 = NULL;
+
+ switch (addr->ai_family) {
+ case AF_INET:
+ s4 = (struct sockaddr_in *)addr->ai_addr;
+ in_addr = &s4->sin_addr;
+ break;
+
+ case AF_INET6:
+ s6 = (struct sockaddr_in6 *)addr->ai_addr;
+ in_addr = &s6->sin6_addr;
+ break;
+
+ default:
+ gf_smsg("glusterd", GF_LOG_ERROR, 0, LG_MSG_INVALID_FAMILY, NULL);
+ return NULL;
+ }
+
+ if (!inet_ntop(addr->ai_family, in_addr, buf, sizeof(buf))) {
+ gf_smsg("glusterd", GF_LOG_ERROR, 0, LG_MSG_CONVERSION_FAILED, NULL);
+ return NULL;
+ }
+
+ *ip = gf_strdup(buf);
+ return *ip;
+}
+
+gf_boolean_t
+gf_is_loopback_localhost(const struct sockaddr *sa, char *hostname)
+{
+ GF_ASSERT(sa);
+
+ gf_boolean_t is_local = _gf_false;
+ const struct in_addr *addr4 = NULL;
+ const struct in6_addr *addr6 = NULL;
+ uint8_t *ap = NULL;
+ struct in6_addr loopbackaddr6 = IN6ADDR_LOOPBACK_INIT;
+
+ switch (sa->sa_family) {
+ case AF_INET:
+ addr4 = &(((struct sockaddr_in *)sa)->sin_addr);
+ ap = (uint8_t *)&addr4->s_addr;
+ if (ap[0] == 127)
+ is_local = _gf_true;
+ break;
+
+ case AF_INET6:
+ addr6 = &(((struct sockaddr_in6 *)sa)->sin6_addr);
+ if (memcmp(addr6, &loopbackaddr6, sizeof(loopbackaddr6)) == 0)
+ is_local = _gf_true;
+ break;
+
+ default:
+ if (hostname)
+ gf_smsg("glusterd", GF_LOG_ERROR, 0, LG_MSG_INVALID_FAMILY,
+ "family=%d", sa->sa_family, "hostname=%s", hostname,
+ NULL);
+ break;
+ }
+
+ return is_local;
+}
+
+gf_boolean_t
+gf_is_local_addr(char *hostname)
+{
+ int32_t ret = -1;
+ struct addrinfo *result = NULL;
+ struct addrinfo *res = NULL;
+ gf_boolean_t found = _gf_false;
+ char *ip = NULL;
+ xlator_t *this = NULL;
+ struct addrinfo hints;
+
+ this = THIS;
+
+ memset(&hints, 0, sizeof(hints));
+ /*
+ * Removing AI_ADDRCONFIG from default_hints
+ * for being able to use link local ipv6 addresses
+ */
+ hints.ai_family = AF_UNSPEC;
+
+ ret = getaddrinfo(hostname, NULL, &hints, &result);
+
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, LG_MSG_GETADDRINFO_FAILED,
+ "ret=%s", gai_strerror(ret), NULL);
+ goto out;
+ }
+
+ for (res = result; res != NULL; res = res->ai_next) {
+ get_ip_from_addrinfo(res, &ip);
+ gf_msg_debug(this->name, 0, "%s ", ip);
+
+ if (ip) {
+ found = (gf_is_loopback_localhost(res->ai_addr, hostname) ||
+ gf_interface_search(ip));
+ }
+ if (found) {
+ GF_FREE(ip);
+ goto out;
+ }
+ GF_FREE(ip);
+ /* the above free will not set ip to NULL, and hence, there is
+ double free possible as the loop continues. set ip to NULL. */
+ ip = NULL;
+ }
+
+out:
+ if (result)
+ freeaddrinfo(result);
+
+ if (!found)
+ gf_msg_debug(this->name, 0, "%s is not local", hostname);
+
+ return found;
+}
+
+gf_boolean_t
+gf_is_same_address(char *name1, char *name2)
+{
+ struct addrinfo *addr1 = NULL;
+ struct addrinfo *addr2 = NULL;
+ struct addrinfo *p = NULL;
+ struct addrinfo *q = NULL;
+ gf_boolean_t ret = _gf_false;
+ int gai_err = 0;
+ struct addrinfo hints;
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+
+ gai_err = getaddrinfo(name1, NULL, &hints, &addr1);
+ if (gai_err != 0) {
+ gf_smsg(name1, GF_LOG_WARNING, 0, LG_MSG_GETADDRINFO_FAILED, "error=%s",
+ gai_strerror(gai_err), NULL);
+ goto out;
+ }
+
+ gai_err = getaddrinfo(name2, NULL, &hints, &addr2);
+ if (gai_err != 0) {
+ gf_smsg(name2, GF_LOG_WARNING, 0, LG_MSG_GETADDRINFO_FAILED, "error=%s",
+ gai_strerror(gai_err), NULL);
+ goto out;
+ }
+
+ for (p = addr1; p; p = p->ai_next) {
+ for (q = addr2; q; q = q->ai_next) {
+ if (p->ai_addrlen != q->ai_addrlen) {
+ continue;
+ }
+ if (memcmp(p->ai_addr, q->ai_addr, p->ai_addrlen)) {
+ continue;
+ }
+ ret = _gf_true;
+ goto out;
+ }
+ }
+
+out:
+ if (addr1) {
+ freeaddrinfo(addr1);
+ }
+ if (addr2) {
+ freeaddrinfo(addr2);
+ }
+ return ret;
+}
+
+/*
+ * Processes list of volfile servers.
+ * Format: <host1>:<port1> <host2>:<port2>...
+ */
+int
+gf_process_getspec_servers_list(cmd_args_t *cmd_args, const char *servers_list)
+{
+ char *tmp = NULL;
+ char *address = NULL;
+ char *host = NULL;
+ char *last_colon = NULL;
+ char *save_ptr = NULL;
+ int port = 0;
+ int ret = -1;
+
+ tmp = gf_strdup(servers_list);
+ if (!tmp) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ address = strtok_r(tmp, " ", &save_ptr);
+ if (!address) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ while (1) {
+ last_colon = strrchr(address, ':');
+ if (!last_colon) {
+ errno = EINVAL;
+ ret = -1;
+ break;
+ }
+ *last_colon = '\0';
+ host = address;
+ port = atoi(last_colon + 1);
+ if (port <= 0) {
+ errno = EINVAL;
+ ret = -1;
+ break;
+ }
+ ret = gf_set_volfile_server_common(cmd_args, host,
+ GF_DEFAULT_VOLFILE_TRANSPORT, port);
+ if (ret && errno != EEXIST) {
+ break;
+ }
+ address = strtok_r(NULL, " ", &save_ptr);
+ if (!address) {
+ errno = 0;
+ ret = 0;
+ break;
+ }
+ }
+
+out:
+ if (tmp) {
+ GF_FREE(tmp);
+ }
+
+ return ret;
+}
+
+int
+gf_set_volfile_server_common(cmd_args_t *cmd_args, const char *host,
+ const char *transport, int port)
+{
+ server_cmdline_t *server = NULL;
+ server_cmdline_t *tmp = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, cmd_args, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, host, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, transport, out);
+
+ server = GF_CALLOC(1, sizeof(server_cmdline_t),
+ gf_common_mt_server_cmdline_t);
+ if (!server) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&server->list);
+
+ server->volfile_server = gf_strdup(host);
+ if (!server->volfile_server) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ server->transport = gf_strdup(transport);
+ if (!server->transport) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ server->port = port;
+
+ if (!cmd_args->volfile_server) {
+ cmd_args->volfile_server = server->volfile_server;
+ cmd_args->volfile_server_transport = server->transport;
+ cmd_args->volfile_server_port = server->port;
+ cmd_args->curr_server = server;
+ }
+
+ list_for_each_entry(tmp, &cmd_args->volfile_servers, list)
+ {
+ if ((!strcmp(tmp->volfile_server, server->volfile_server) &&
+ !strcmp(tmp->transport, server->transport) &&
+ (tmp->port == server->port))) {
+ /* Duplicate option given, log and ignore */
+ gf_smsg("gluster", GF_LOG_INFO, EEXIST, LG_MSG_DUPLICATE_ENTRY,
+ NULL);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ list_add_tail(&server->list, &cmd_args->volfile_servers);
+
+ ret = 0;
+out:
+ if (-1 == ret) {
+ if (server) {
+ GF_FREE(server->volfile_server);
+ GF_FREE(server->transport);
+ GF_FREE(server);
+ }
+ }
+
+ return ret;
+}
+
+/* Sets log file path from user provided arguments */
+int
+gf_set_log_file_path(cmd_args_t *cmd_args, glusterfs_ctx_t *ctx)
+{
+ int i = 0;
+ int j = 0;
+ int ret = 0;
+ int tmp_len = 0;
+ char tmp_str[1024] = {
+ 0,
+ };
+
+ if (!cmd_args)
+ goto done;
+
+ if (cmd_args->mount_point) {
+ j = 0;
+ i = 0;
+ if (cmd_args->mount_point[0] == '/')
+ i = 1;
+ for (; i < strlen(cmd_args->mount_point); i++, j++) {
+ tmp_str[j] = cmd_args->mount_point[i];
+ if (cmd_args->mount_point[i] == '/')
+ tmp_str[j] = '-';
+ }
+
+ ret = gf_asprintf(&cmd_args->log_file,
+ DEFAULT_LOG_FILE_DIRECTORY "/%s.log", tmp_str);
+ if (ret > 0)
+ ret = 0;
+ goto done;
+ }
+
+ if (ctx && GF_GLUSTERD_PROCESS == ctx->process_mode) {
+ ret = gf_asprintf(&cmd_args->log_file,
+ DEFAULT_LOG_FILE_DIRECTORY "/%s.log", GLUSTERD_NAME);
+ if (ret > 0)
+ ret = 0;
+
+ goto done;
+ }
+
+ if (cmd_args->volfile) {
+ j = 0;
+ i = 0;
+ if (cmd_args->volfile[0] == '/')
+ i = 1;
+ for (; i < strlen(cmd_args->volfile); i++, j++) {
+ tmp_str[j] = cmd_args->volfile[i];
+ if (cmd_args->volfile[i] == '/')
+ tmp_str[j] = '-';
+ }
+ ret = gf_asprintf(&cmd_args->log_file,
+ DEFAULT_LOG_FILE_DIRECTORY "/%s.log", tmp_str);
+ if (ret > 0)
+ ret = 0;
+ goto done;
+ }
+
+ if (cmd_args->volfile_server) {
+ if (strncmp(cmd_args->volfile_server_transport, "unix", 4) == 0) {
+ if (cmd_args->volfile_server[0] == '/')
+ i = 1;
+ tmp_len = strlen(cmd_args->volfile_server);
+ for (j = 0; i < tmp_len; i++, j++) {
+ tmp_str[j] = cmd_args->volfile_server[i];
+ if (cmd_args->volfile_server[i] == '/')
+ tmp_str[j] = '-';
+ }
+ ret = gf_asprintf(&cmd_args->log_file, "%s/%s-%s-%d.log",
+ DEFAULT_LOG_FILE_DIRECTORY, tmp_str,
+ cmd_args->volfile_id, getpid());
+ } else {
+ ret = gf_asprintf(&cmd_args->log_file, "%s/%s-%s-%d.log",
+ DEFAULT_LOG_FILE_DIRECTORY,
+ cmd_args->volfile_server, cmd_args->volfile_id,
+ getpid());
+ }
+ if (ret > 0)
+ ret = 0;
+ }
+done:
+ return ret;
+}
+
+int
+gf_set_log_ident(cmd_args_t *cmd_args)
+{
+ int ret = 0;
+ char *ptr = NULL;
+
+ if (cmd_args->log_file == NULL) {
+ /* no ident source */
+ return 0;
+ }
+
+ /* TODO: Some idents would look like, etc-glusterfs-glusterd.vol, which
+ * seems ugly and can be bettered? */
+ /* just get the filename as the ident */
+ if (NULL != (ptr = strrchr(cmd_args->log_file, '/'))) {
+ ret = gf_asprintf(&cmd_args->log_ident, "%s", ptr + 1);
+ } else {
+ ret = gf_asprintf(&cmd_args->log_ident, "%s", cmd_args->log_file);
+ }
+
+ if (ret > 0)
+ ret = 0;
+ else
+ return ret;
+
+ /* remove .log suffix */
+ if (NULL != (ptr = strrchr(cmd_args->log_ident, '.'))) {
+ if (strcmp(ptr, ".log") == 0) {
+ ptr[0] = '\0';
+ }
+ }
+
+ return ret;
+}
+
+int
+gf_thread_cleanup_xint(pthread_t thread)
+{
+ int ret = 0;
+ void *res = NULL;
+
+ ret = pthread_cancel(thread);
+ if (ret != 0)
+ goto error_return;
+
+ ret = pthread_join(thread, &res);
+ if (ret != 0)
+ goto error_return;
+
+ if (res != PTHREAD_CANCELED)
+ goto error_return;
+
+ ret = 0;
+
+error_return:
+ return ret;
+}
+
+void
+gf_thread_set_vname(pthread_t thread, const char *name, va_list args)
{
- *fmt = gf_timefmt_last;
- *fmts = __gf_timefmts;
- *zeros = __gf_zerotimes;
+ char thread_name[GF_THREAD_NAME_LIMIT];
+ int ret;
+
+ /* Initialize the thread name with the prefix (not NULL terminated). */
+ memcpy(thread_name, GF_THREAD_NAME_PREFIX,
+ sizeof(GF_THREAD_NAME_PREFIX) - 1);
+
+ ret = vsnprintf(thread_name + sizeof(GF_THREAD_NAME_PREFIX) - 1,
+ sizeof(thread_name) - sizeof(GF_THREAD_NAME_PREFIX) + 1,
+ name, args);
+ if (ret < 0) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PTHREAD_NAMING_FAILED,
+ "name=%s", name, NULL);
+ return;
+ }
+
+ if (ret >= sizeof(thread_name)) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_THREAD_NAME_TOO_LONG,
+ "name=%s", thread_name, NULL);
+ }
+
+#ifdef GF_LINUX_HOST_OS
+ ret = pthread_setname_np(thread, thread_name);
+#elif defined(__NetBSD__)
+ ret = pthread_setname_np(thread, thread_name, NULL);
+#elif defined(__FreeBSD__)
+ pthread_set_name_np(thread, thread_name);
+ ret = 0;
+#else
+ ret = ENOSYS;
+#endif
+ if (ret != 0) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, ret, LG_MSG_SET_THREAD_FAILED,
+ "name=%s", thread_name, NULL);
+ }
}
+void
+gf_thread_set_name(pthread_t thread, const char *name, ...)
+{
+ va_list args;
+
+ va_start(args, name);
+ gf_thread_set_vname(thread, name, args);
+ va_end(args);
+}
+
+int
+gf_thread_vcreate(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg, const char *name,
+ va_list args)
+{
+ sigset_t set, old;
+ int ret;
+
+ sigemptyset(&old);
+ sigfillset(&set);
+ sigdelset(&set, SIGSEGV);
+ sigdelset(&set, SIGBUS);
+ sigdelset(&set, SIGILL);
+ sigdelset(&set, SIGSYS);
+ sigdelset(&set, SIGFPE);
+ sigdelset(&set, SIGABRT);
+
+ pthread_sigmask(SIG_BLOCK, &set, &old);
+
+ ret = pthread_create(thread, attr, start_routine, arg);
+ if (ret != 0) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ret, LG_MSG_THREAD_CREATE_FAILED,
+ NULL);
+ ret = -1;
+ } else if (name != NULL) {
+ gf_thread_set_vname(*thread, name, args);
+ }
+
+ pthread_sigmask(SIG_SETMASK, &old, NULL);
+
+ return ret;
+}
+
+int
+gf_thread_create(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg, const char *name,
+ ...)
+{
+ va_list args;
+ int ret;
+
+ va_start(args, name);
+ ret = gf_thread_vcreate(thread, attr, start_routine, arg, name, args);
+ va_end(args);
+
+ return ret;
+}
+
+int
+gf_thread_create_detached(pthread_t *thread, void *(*start_routine)(void *),
+ void *arg, const char *name, ...)
+{
+ pthread_attr_t attr;
+ va_list args;
+ int ret = -1;
+
+ ret = pthread_attr_init(&attr);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, ret, LG_MSG_PTHREAD_ATTR_INIT_FAILED,
+ NULL);
+ return -1;
+ }
+
+ pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+
+ va_start(args, name);
+ ret = gf_thread_vcreate(thread, &attr, start_routine, arg, name, args);
+ va_end(args);
+
+ pthread_attr_destroy(&attr);
+
+ return ret;
+}
+
+int
+gf_skip_header_section(int fd, int header_len)
+{
+ int ret = -1;
+
+ ret = sys_lseek(fd, header_len, SEEK_SET);
+ if (ret == (off_t)-1) {
+ gf_smsg("", GF_LOG_ERROR, 0, LG_MSG_SKIP_HEADER_FAILED, NULL);
+ } else {
+ ret = 0;
+ }
+
+ return ret;
+}
+
+/* Below function is use to check at runtime if pid is running */
+
+gf_boolean_t
+gf_is_pid_running(int pid)
+{
+#ifdef __FreeBSD__
+ int ret = -1;
+
+ ret = sys_kill(pid, 0);
+ if (ret < 0) {
+ return _gf_false;
+ }
+#else
+ char fname[32] = {
+ 0,
+ };
+ int fd = -1;
+
+ snprintf(fname, sizeof(fname), "/proc/%d/cmdline", pid);
+
+ fd = sys_open(fname, O_RDONLY, 0);
+ if (fd < 0) {
+ return _gf_false;
+ }
+
+ sys_close(fd);
+#endif
+ return _gf_true;
+}
+
+gf_boolean_t
+gf_is_service_running(char *pidfile, int *pid)
+{
+ FILE *file = NULL;
+ gf_boolean_t running = _gf_false;
+ int ret = 0;
+ int fno = 0;
+
+ file = fopen(pidfile, "r+");
+ if (!file) {
+ goto out;
+ }
+
+ fno = fileno(file);
+ ret = lockf(fno, F_TEST, 0);
+ if (ret == -1) {
+ running = _gf_true;
+ }
+
+ ret = fscanf(file, "%d", pid);
+ if (ret <= 0) {
+ gf_smsg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED, "pidfile=%s",
+ pidfile, NULL);
+ *pid = -1;
+ running = _gf_false;
+ goto out;
+ }
+
+ running = gf_is_pid_running(*pid);
+out:
+ if (file)
+ fclose(file);
+ return running;
+}
+
+/* Check if the pid is > 0 */
+gf_boolean_t
+gf_valid_pid(const char *pid, int length)
+{
+ gf_boolean_t ret = _gf_true;
+ pid_t value = 0;
+ char *end_ptr = NULL;
+
+ if (length <= 0) {
+ ret = _gf_false;
+ goto out;
+ }
+
+ value = strtol(pid, &end_ptr, 10);
+ if (value <= 0) {
+ ret = _gf_false;
+ }
+out:
+ return ret;
+}
+
+static int
+dht_is_linkfile_key(dict_t *this, char *key, data_t *value, void *data)
+{
+ gf_boolean_t *linkfile_key_found = NULL;
+
+ if (!data)
+ goto out;
+
+ linkfile_key_found = data;
+
+ *linkfile_key_found = _gf_true;
+out:
+ return 0;
+}
+
+gf_boolean_t
+dht_is_linkfile(struct iatt *buf, dict_t *dict)
+{
+ gf_boolean_t linkfile_key_found = _gf_false;
+
+ if (!IS_DHT_LINKFILE_MODE(buf))
+ return _gf_false;
+
+ dict_foreach_fnmatch(dict, "*." DHT_LINKFILE_STR, dht_is_linkfile_key,
+ &linkfile_key_found);
+
+ return linkfile_key_found;
+}
+
+int
+gf_check_log_format(const char *value)
+{
+ int log_format = -1;
+
+ if (!strcasecmp(value, GF_LOG_FORMAT_NO_MSG_ID))
+ log_format = gf_logformat_traditional;
+ else if (!strcasecmp(value, GF_LOG_FORMAT_WITH_MSG_ID))
+ log_format = gf_logformat_withmsgid;
+
+ if (log_format == -1)
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_LOG,
+ "possible_values=" GF_LOG_FORMAT_NO_MSG_ID
+ "|" GF_LOG_FORMAT_WITH_MSG_ID,
+ NULL);
+
+ return log_format;
+}
+
+int
+gf_check_logger(const char *value)
+{
+ int logger = -1;
+
+ if (!strcasecmp(value, GF_LOGGER_GLUSTER_LOG))
+ logger = gf_logger_glusterlog;
+ else if (!strcasecmp(value, GF_LOGGER_SYSLOG))
+ logger = gf_logger_syslog;
+
+ if (logger == -1)
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_LOG,
+ "possible_values=" GF_LOGGER_GLUSTER_LOG "|" GF_LOGGER_SYSLOG,
+ NULL);
+
+ return logger;
+}
+
+/* gf_compare_sockaddr compares the given addresses @addr1 and @addr2 for
+ * equality, ie. if they both refer to the same address.
+ *
+ * This was inspired by sock_addr_cmp_addr() from
+ * https://www.opensource.apple.com/source/postfix/postfix-197/postfix/src/util/sock_addr.c
+ */
+gf_boolean_t
+gf_compare_sockaddr(const struct sockaddr *addr1, const struct sockaddr *addr2)
+{
+ GF_ASSERT(addr1 != NULL);
+ GF_ASSERT(addr2 != NULL);
+
+ /* Obviously, the addresses don't match if their families are different
+ */
+ if (addr1->sa_family != addr2->sa_family)
+ return _gf_false;
+
+ if (AF_INET == addr1->sa_family) {
+ if (((struct sockaddr_in *)addr1)->sin_addr.s_addr ==
+ ((struct sockaddr_in *)addr2)->sin_addr.s_addr)
+ return _gf_true;
+
+ } else if (AF_INET6 == addr1->sa_family) {
+ if (memcmp((char *)&((struct sockaddr_in6 *)addr1)->sin6_addr,
+ (char *)&((struct sockaddr_in6 *)addr2)->sin6_addr,
+ sizeof(struct in6_addr)) == 0)
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+/*
+ * gf_set_timestamp:
+ * It sets the mtime and atime of 'dest' file as of 'src'.
+ */
+
+int
+gf_set_timestamp(const char *src, const char *dest)
+{
+ struct stat sb = {
+ 0,
+ };
+#if defined(HAVE_UTIMENSAT)
+ struct timespec new_time[2] = {{
+ 0,
+ },
+ {
+ 0,
+ }};
+#else
+ struct timeval new_time[2] = {{
+ 0,
+ },
+ {
+ 0,
+ }};
+#endif
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(src);
+ GF_ASSERT(dest);
+
+ ret = sys_stat(src, &sb);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, LG_MSG_FILE_STAT_FAILED,
+ "stat=%s", src, NULL);
+ goto out;
+ }
+ /* The granularity is nano seconds if `utimensat()` is available,
+ * and micro seconds otherwise.
+ */
+#if defined(HAVE_UTIMENSAT)
+ new_time[0].tv_sec = sb.st_atime;
+ new_time[0].tv_nsec = ST_ATIM_NSEC(&sb);
+
+ new_time[1].tv_sec = sb.st_mtime;
+ new_time[1].tv_nsec = ST_MTIM_NSEC(&sb);
+
+ /* dirfd = 0 is ignored because `dest` is an absolute path. */
+ ret = sys_utimensat(AT_FDCWD, dest, new_time, AT_SYMLINK_NOFOLLOW);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, LG_MSG_UTIMENSAT_FAILED,
+ "dest=%s", dest, NULL);
+ }
+#else
+ new_time[0].tv_sec = sb.st_atime;
+ new_time[0].tv_usec = ST_ATIM_NSEC(&sb) / 1000;
+
+ new_time[1].tv_sec = sb.st_mtime;
+ new_time[1].tv_usec = ST_MTIM_NSEC(&sb) / 1000;
+
+ ret = sys_utimes(dest, new_time);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, LG_MSG_UTIMES_FAILED,
+ "dest=%s", dest, NULL);
+ }
+#endif
+out:
+ return ret;
+}
+
+static void
+gf_backtrace_end(char *buf, size_t frames)
+{
+ size_t pos = 0;
+
+ if (!buf)
+ return;
+
+ pos = strlen(buf);
+
+ frames = min(frames, GF_BACKTRACE_LEN - pos - 1);
+
+ if (0 == frames)
+ return;
+
+ memset(buf + pos, ')', frames);
+ buf[pos + frames] = '\0';
+}
+
+/*Returns bytes written*/
+static int
+gf_backtrace_append(char *buf, size_t pos, char *framestr)
+{
+ if (pos >= GF_BACKTRACE_LEN)
+ return -1;
+ return snprintf(buf + pos, GF_BACKTRACE_LEN - pos, "(--> %s ", framestr);
+}
+
+static int
+gf_backtrace_fillframes(char *buf)
+{
+ void *array[GF_BACKTRACE_FRAME_COUNT];
+ size_t frames = 0;
+ FILE *fp = NULL;
+ char callingfn[GF_BACKTRACE_FRAME_COUNT - 2][1024] = {
+ {0},
+ };
+ int ret = -1;
+ int fd = -1;
+ size_t idx = 0;
+ size_t pos = 0;
+ size_t inc = 0;
+ char tmpl[] = "/tmp/glfs-bt-XXXXXX";
+
+ frames = backtrace(array, GF_BACKTRACE_FRAME_COUNT);
+ if (!frames)
+ return -1;
+
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+ fd = mkstemp(tmpl);
+ if (fd == -1)
+ return -1;
+
+ /* Calling unlink so that when the file is closed or program
+ * terminates the temporary file is deleted.
+ */
+ ret = sys_unlink(tmpl);
+ if (ret < 0) {
+ gf_smsg(THIS->name, GF_LOG_INFO, 0, LG_MSG_FILE_DELETE_FAILED,
+ "temporary_file=%s", tmpl, NULL);
+ }
+
+ /*The most recent two frames are the calling function and
+ * gf_backtrace_save, which we can infer.*/
+
+ backtrace_symbols_fd(&array[2], frames - 2, fd);
+
+ fp = fdopen(fd, "r");
+ if (!fp) {
+ sys_close(fd);
+ goto out;
+ }
+
+ ret = fseek(fp, 0L, SEEK_SET);
+ if (ret)
+ goto out;
+
+ pos = 0;
+ for (idx = 0; idx < frames - 2; idx++) {
+ ret = fscanf(fp, "%1023s", callingfn[idx]);
+ if (ret == EOF)
+ break;
+ inc = gf_backtrace_append(buf, pos, callingfn[idx]);
+ if (inc == -1)
+ break;
+ pos += inc;
+ }
+ gf_backtrace_end(buf, idx);
+
+out:
+ if (fp)
+ fclose(fp);
+
+ return (idx > 0) ? 0 : -1;
+}
+
+/* Optionally takes @buf to save backtrace. If @buf is NULL, uses the
+ * pre-allocated ctx->btbuf to avoid allocating memory while printing
+ * backtrace.
+ * TODO: This API doesn't provide flexibility in terms of no. of frames
+ * of the backtrace is being saved in the buffer. Deferring fixing it
+ * when there is a real-use for that.*/
+
+char *
+gf_backtrace_save(char *buf)
+{
+ char *bt = NULL;
+
+ if (!buf) {
+ bt = THIS->ctx->btbuf;
+ GF_ASSERT(bt);
+
+ } else {
+ bt = buf;
+ }
+
+ if ((0 == gf_backtrace_fillframes(bt)))
+ return bt;
+
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_BACKTRACE_SAVE_FAILED, NULL);
+ return NULL;
+}
+
+gf_loglevel_t
+fop_log_level(glusterfs_fop_t fop, int op_errno)
+{
+ /* if gfid doesn't exist ESTALE comes */
+ if (op_errno == ENOENT || op_errno == ESTALE)
+ return GF_LOG_DEBUG;
+
+ if ((fop == GF_FOP_ENTRYLK) || (fop == GF_FOP_FENTRYLK) ||
+ (fop == GF_FOP_FINODELK) || (fop == GF_FOP_INODELK) ||
+ (fop == GF_FOP_LK)) {
+ /*
+ * if non-blocking lock fails EAGAIN comes
+ * if locks xlator is not loaded ENOSYS comes
+ */
+ if (op_errno == EAGAIN || op_errno == ENOSYS)
+ return GF_LOG_DEBUG;
+ }
+
+ if ((fop == GF_FOP_GETXATTR) || (fop == GF_FOP_FGETXATTR)) {
+ if (op_errno == ENOTSUP || op_errno == ENODATA)
+ return GF_LOG_DEBUG;
+ }
+
+ if ((fop == GF_FOP_SETXATTR) || (fop == GF_FOP_FSETXATTR) ||
+ (fop == GF_FOP_REMOVEXATTR) || (fop == GF_FOP_FREMOVEXATTR)) {
+ if (op_errno == ENOTSUP)
+ return GF_LOG_DEBUG;
+ }
+
+ if (fop == GF_FOP_MKNOD || fop == GF_FOP_MKDIR)
+ if (op_errno == EEXIST)
+ return GF_LOG_DEBUG;
+
+ if (fop == GF_FOP_SEEK) {
+#ifdef HAVE_SEEK_HOLE
+ if (op_errno == ENXIO) {
+ return GF_LOG_DEBUG;
+ }
+#else
+ return GF_LOG_DEBUG;
+#endif
+ }
+
+ return GF_LOG_ERROR;
+}
+
+/* This function will build absolute path of file/directory from the
+ * current location and relative path given from the current location
+ * For example consider our current path is /a/b/c/ and relative path
+ * from current location is ./../x/y/z .After parsing through this
+ * function the absolute path becomes /a/b/x/y/z/.
+ *
+ * The function gives a pointer to absolute path if it is successful
+ * and also returns zero.
+ * Otherwise function gives NULL pointer with returning an err value.
+ *
+ * So the user need to free memory allocated for path.
+ *
+ */
+
+int32_t
+gf_build_absolute_path(char *current_path, char *relative_path, char **path)
+{
+ char *absolute_path = NULL;
+ char *token = NULL;
+ char *component = NULL;
+ char *saveptr = NULL;
+ char *end = NULL;
+ int ret = 0;
+ size_t relativepath_len = 0;
+ size_t currentpath_len = 0;
+ size_t max_absolutepath_len = 0;
+
+ GF_ASSERT(current_path);
+ GF_ASSERT(relative_path);
+ GF_ASSERT(path);
+
+ if (!path || !current_path || !relative_path) {
+ ret = -EFAULT;
+ goto err;
+ }
+ /* Check for current and relative path
+ * current path should be absolute one and start from '/'
+ * relative path should not start from '/'
+ */
+ currentpath_len = strlen(current_path);
+ if (current_path[0] != '/' || (currentpath_len > PATH_MAX)) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_WRONG_VALUE,
+ "current-path=%s", current_path, NULL);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ relativepath_len = strlen(relative_path);
+ if (relative_path[0] == '/' || (relativepath_len > PATH_MAX)) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_WRONG_VALUE,
+ "relative-path=%s", relative_path, NULL);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ /* It is maximum possible value for absolute path */
+ max_absolutepath_len = currentpath_len + relativepath_len + 2;
+
+ absolute_path = GF_CALLOC(1, max_absolutepath_len, gf_common_mt_char);
+ if (!absolute_path) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ absolute_path[0] = '\0';
+
+ /* If current path is root i.e contains only "/", we do not
+ * need to copy it
+ */
+ if (strcmp(current_path, "/") != 0) {
+ strcpy(absolute_path, current_path);
+
+ /* We trim '/' at the end for easier string manipulation */
+ gf_path_strip_trailing_slashes(absolute_path);
+ }
+
+ /* Used to spilt relative path based on '/' */
+ component = gf_strdup(relative_path);
+ if (!component) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ /* In the relative path, we want to consider ".." and "."
+ * if token is ".." , we just need to reduce one level hierarchy
+ * if token is "." , we just ignore it
+ * if token is NULL , end of relative path
+ * if absolute path becomes '\0' and still "..", then it is a bad
+ * relative path, it points to out of boundary area and stop
+ * building the absolute path
+ * All other cases we just concatenate token to the absolute path
+ */
+ for (token = strtok_r(component, "/", &saveptr),
+ end = strchr(absolute_path, '\0');
+ token; token = strtok_r(NULL, "/", &saveptr)) {
+ if (strcmp(token, ".") == 0)
+ continue;
+
+ else if (strcmp(token, "..") == 0) {
+ if (absolute_path[0] == '\0') {
+ ret = -EACCES;
+ goto err;
+ }
+
+ end = strrchr(absolute_path, '/');
+ *end = '\0';
+ } else {
+ ret = snprintf(end, max_absolutepath_len - strlen(absolute_path),
+ "/%s", token);
+ end = strchr(absolute_path, '\0');
+ }
+ }
+
+ if (strlen(absolute_path) > PATH_MAX) {
+ ret = -EINVAL;
+ goto err;
+ }
+ *path = gf_strdup(absolute_path);
+
+err:
+ if (component)
+ GF_FREE(component);
+ if (absolute_path)
+ GF_FREE(absolute_path);
+ return ret;
+}
+
+/* This is an utility function which will recursively delete
+ * a folder and its contents.
+ *
+ * @param delete_path folder to be deleted.
+ *
+ * @return 0 on success and -1 on failure.
+ */
+int
+recursive_rmdir(const char *delete_path)
+{
+ int ret = -1;
+ char path[PATH_MAX] = {
+ 0,
+ };
+ struct stat st = {
+ 0,
+ };
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_VALIDATE_OR_GOTO(this->name, delete_path, out);
+
+ dir = sys_opendir(delete_path);
+ if (!dir) {
+ gf_msg_debug(this->name, 0,
+ "Failed to open directory %s. "
+ "Reason : %s",
+ delete_path, strerror(errno));
+ ret = 0;
+ goto out;
+ }
+
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
+ snprintf(path, PATH_MAX, "%s/%s", delete_path, entry->d_name);
+ ret = sys_lstat(path, &st);
+ if (ret == -1) {
+ gf_msg_debug(this->name, 0,
+ "Failed to stat entry %s :"
+ " %s",
+ path, strerror(errno));
+ (void)sys_closedir(dir);
+ goto out;
+ }
+
+ if (S_ISDIR(st.st_mode))
+ ret = recursive_rmdir(path);
+ else
+ ret = sys_unlink(path);
+
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ " Failed to remove %s. "
+ "Reason : %s",
+ path, strerror(errno));
+ }
+
+ gf_msg_debug(this->name, 0, "%s %s",
+ ret ? "Failed to remove" : "Removed", entry->d_name);
+ }
+
+ ret = sys_closedir(dir);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Failed to close dir %s. Reason :"
+ " %s",
+ delete_path, strerror(errno));
+ }
+
+ ret = sys_rmdir(delete_path);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to rmdir: %s,err: %s", delete_path,
+ strerror(errno));
+ }
+
+out:
+ return ret;
+}
+/*
+ * Input: Array of strings 'array' terminating in NULL
+ * string 'elem' to be searched in the array
+ *
+ * Output: Index of the element in the array if found, '-1' otherwise
+ */
+int
+gf_get_index_by_elem(char **array, char *elem)
+{
+ int i = 0;
+
+ for (i = 0; array[i]; i++) {
+ if (strcmp(elem, array[i]) == 0)
+ return i;
+ }
+
+ return -1;
+}
+
+static int
+get_pathinfo_host(char *pathinfo, char *hostname, size_t size)
+{
+ char *start = NULL;
+ char *end = NULL;
+ int ret = -1;
+ int i = 0;
+
+ if (!pathinfo)
+ goto out;
+
+ start = strchr(pathinfo, ':');
+ if (!start)
+ goto out;
+
+ end = strrchr(pathinfo, ':');
+ if (start == end)
+ goto out;
+
+ memset(hostname, 0, size);
+ i = 0;
+ while (++start != end)
+ hostname[i++] = *start;
+ ret = 0;
+out:
+ return ret;
+}
+
+/*Note: 'pathinfo' should be gathered only from one brick*/
+int
+glusterfs_is_local_pathinfo(char *pathinfo, gf_boolean_t *is_local)
+{
+ int ret = 0;
+ char pathinfohost[1024] = {0};
+ char localhost[1024] = {0};
+
+ *is_local = _gf_false;
+ ret = get_pathinfo_host(pathinfo, pathinfohost, sizeof(pathinfohost));
+ if (ret)
+ goto out;
+
+ ret = gethostname(localhost, sizeof(localhost));
+ if (ret)
+ goto out;
+
+ if (!strcmp(localhost, pathinfohost))
+ *is_local = _gf_true;
+out:
+ return ret;
+}
+
+ssize_t
+gf_nread(int fd, void *buf, size_t count)
+{
+ ssize_t ret = 0;
+ ssize_t read_bytes = 0;
+
+ for (read_bytes = 0; read_bytes < count; read_bytes += ret) {
+ ret = sys_read(fd, buf + read_bytes, count - read_bytes);
+ if (ret == 0) {
+ break;
+ } else if (ret < 0) {
+ if (errno == EINTR)
+ ret = 0;
+ else
+ goto out;
+ }
+ }
+
+ ret = read_bytes;
+out:
+ return ret;
+}
+
+ssize_t
+gf_nwrite(int fd, const void *buf, size_t count)
+{
+ ssize_t ret = 0;
+ ssize_t written = 0;
+
+ for (written = 0; written != count; written += ret) {
+ ret = sys_write(fd, buf + written, count - written);
+ if (ret < 0) {
+ if (errno == EINTR)
+ ret = 0;
+ else
+ goto out;
+ }
+ }
+
+ ret = written;
+out:
+ return ret;
+}
+
+void
+gf_free_mig_locks(lock_migration_info_t *locks)
+{
+ lock_migration_info_t *current = NULL;
+ lock_migration_info_t *temp = NULL;
+
+ if (!locks)
+ return;
+
+ if (list_empty(&locks->list))
+ return;
+
+ list_for_each_entry_safe(current, temp, &locks->list, list)
+ {
+ list_del_init(&current->list);
+ GF_FREE(current->client_uid);
+ GF_FREE(current);
+ }
+}
+
+void
+_mask_cancellation(void)
+{
+ (void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+}
+
+void
+_unmask_cancellation(void)
+{
+ (void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+}
+
+/* This is a wrapper function to add a pointer to a list,
+ * which doesn't contain list member
+ */
+struct list_node *
+_list_node_add(void *ptr, struct list_head *list,
+ int (*compare)(struct list_head *, struct list_head *))
+{
+ struct list_node *node = NULL;
+
+ if (ptr == NULL || list == NULL)
+ goto out;
+
+ node = GF_CALLOC(1, sizeof(struct list_node), gf_common_list_node);
+
+ if (node == NULL)
+ goto out;
+
+ node->ptr = ptr;
+ if (compare)
+ list_add_order(&node->list, list, compare);
+ else
+ list_add_tail(&node->list, list);
+out:
+ return node;
+}
+
+struct list_node *
+list_node_add(void *ptr, struct list_head *list)
+{
+ return _list_node_add(ptr, list, NULL);
+}
+
+struct list_node *
+list_node_add_order(void *ptr, struct list_head *list,
+ int (*compare)(struct list_head *, struct list_head *))
+{
+ return _list_node_add(ptr, list, compare);
+}
+
+void
+list_node_del(struct list_node *node)
+{
+ if (node == NULL)
+ return;
+
+ list_del_init(&node->list);
+ GF_FREE(node);
+}
+
+const char *
+fop_enum_to_pri_string(glusterfs_fop_t fop)
+{
+ switch (fop) {
+ case GF_FOP_OPEN:
+ case GF_FOP_STAT:
+ case GF_FOP_FSTAT:
+ case GF_FOP_LOOKUP:
+ case GF_FOP_ACCESS:
+ case GF_FOP_READLINK:
+ case GF_FOP_OPENDIR:
+ case GF_FOP_STATFS:
+ case GF_FOP_READDIR:
+ case GF_FOP_READDIRP:
+ case GF_FOP_GETACTIVELK:
+ case GF_FOP_SETACTIVELK:
+ case GF_FOP_ICREATE:
+ case GF_FOP_NAMELINK:
+ return "HIGH";
+
+ case GF_FOP_CREATE:
+ case GF_FOP_FLUSH:
+ case GF_FOP_LK:
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ case GF_FOP_UNLINK:
+ case GF_FOP_SETATTR:
+ case GF_FOP_FSETATTR:
+ case GF_FOP_MKNOD:
+ case GF_FOP_MKDIR:
+ case GF_FOP_RMDIR:
+ case GF_FOP_SYMLINK:
+ case GF_FOP_RENAME:
+ case GF_FOP_LINK:
+ case GF_FOP_SETXATTR:
+ case GF_FOP_GETXATTR:
+ case GF_FOP_FGETXATTR:
+ case GF_FOP_FSETXATTR:
+ case GF_FOP_REMOVEXATTR:
+ case GF_FOP_FREMOVEXATTR:
+ case GF_FOP_IPC:
+ case GF_FOP_LEASE:
+ return "NORMAL";
+
+ case GF_FOP_READ:
+ case GF_FOP_WRITE:
+ case GF_FOP_FSYNC:
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ case GF_FOP_FSYNCDIR:
+ case GF_FOP_XATTROP:
+ case GF_FOP_FXATTROP:
+ case GF_FOP_RCHECKSUM:
+ case GF_FOP_ZEROFILL:
+ case GF_FOP_FALLOCATE:
+ case GF_FOP_SEEK:
+ return "LOW";
+
+ case GF_FOP_NULL:
+ case GF_FOP_FORGET:
+ case GF_FOP_RELEASE:
+ case GF_FOP_RELEASEDIR:
+ case GF_FOP_GETSPEC:
+ case GF_FOP_MAXVALUE:
+ case GF_FOP_DISCARD:
+ return "LEAST";
+ default:
+ return "UNKNOWN";
+ }
+}
+
+const char *
+gf_inode_type_to_str(ia_type_t type)
+{
+ static const char *const str_ia_type[] = {
+ "UNKNOWN", "REGULAR FILE", "DIRECTORY", "LINK",
+ "BLOCK DEVICE", "CHARACTER DEVICE", "PIPE", "SOCKET"};
+ return str_ia_type[type];
+}
+
+gf_boolean_t
+gf_is_zero_filled_stat(struct iatt *buf)
+{
+ if (!buf)
+ return 1;
+
+ /* Do not use st_dev because it is transformed to store the xlator id
+ * in place of the device number. Do not use st_ino because by this time
+ * we've already mapped the root ino to 1 so it is not guaranteed to be
+ * 0.
+ */
+ if ((buf->ia_nlink == 0) && (buf->ia_ctime == 0))
+ return 1;
+
+ return 0;
+}
+
+void
+gf_zero_fill_stat(struct iatt *buf)
+{
+ buf->ia_nlink = 0;
+ buf->ia_ctime = 0;
+}
+
+gf_boolean_t
+gf_is_valid_xattr_namespace(char *key)
+{
+ static char *xattr_namespaces[] = {"trusted.", "system.", "user.",
+ "security.", NULL};
+ int i = 0;
+
+ for (i = 0; xattr_namespaces[i]; i++) {
+ if (strncmp(key, xattr_namespaces[i], strlen(xattr_namespaces[i])) == 0)
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+ino_t
+gfid_to_ino(uuid_t gfid)
+{
+ ino_t ino = 0;
+ int32_t i;
+
+ for (i = 8; i < 16; i++) {
+ ino <<= 8;
+ ino += (uint8_t)gfid[i];
+ }
+
+ return ino;
+}
+
+int
+gf_bits_count(uint64_t n)
+{
+ int val = 0;
+#if defined(__GNUC__) || defined(__clang__)
+ val = __builtin_popcountll(n);
+#else
+ n -= (n >> 1) & 0x5555555555555555ULL;
+ n = ((n >> 2) & 0x3333333333333333ULL) + (n & 0x3333333333333333ULL);
+ n = (n + (n >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
+ n += n >> 8;
+ n += n >> 16;
+ n += n >> 32;
+ val = n & 0xFF;
+#endif
+ return val;
+}
+
+int
+gf_bits_index(uint64_t n)
+{
+#if defined(__GNUC__) || defined(__clang__)
+ return __builtin_ffsll(n) - 1;
+#else
+ return ffsll(n) - 1;
+#endif
+}
+
+const char *
+gf_fop_string(glusterfs_fop_t fop)
+{
+ if ((fop > GF_FOP_NULL) && (fop < GF_FOP_MAXVALUE))
+ return gf_fop_list[fop];
+ return "INVALID";
+}
+
+int
+gf_fop_int(char *fop)
+{
+ int i = 0;
+
+ for (i = GF_FOP_NULL + 1; i < GF_FOP_MAXVALUE; i++) {
+ if (strcasecmp(fop, gf_fop_list[i]) == 0)
+ return i;
+ }
+ return -1;
+}
+
+int
+close_fds_except(int *fdv, size_t count)
+{
+ int i = 0;
+ size_t j = 0;
+ gf_boolean_t should_close = _gf_true;
+#ifdef GF_LINUX_HOST_OS
+ DIR *d = NULL;
+ struct dirent *de = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char *e = NULL;
+
+ d = sys_opendir("/proc/self/fd");
+ if (!d)
+ return -1;
+
+ for (;;) {
+ should_close = _gf_true;
+
+ errno = 0;
+ de = sys_readdir(d, scratch);
+ if (!de || errno != 0)
+ break;
+ i = strtoul(de->d_name, &e, 10);
+ if (*e != '\0' || i == dirfd(d))
+ continue;
+
+ for (j = 0; j < count; j++) {
+ if (i == fdv[j]) {
+ should_close = _gf_false;
+ break;
+ }
+ }
+ if (should_close)
+ sys_close(i);
+ }
+ sys_closedir(d);
+#else /* !GF_LINUX_HOST_OS */
+ struct rlimit rl;
+ int ret = -1;
+
+ ret = getrlimit(RLIMIT_NOFILE, &rl);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < rl.rlim_cur; i++) {
+ should_close = _gf_true;
+ for (j = 0; j < count; j++) {
+ if (i == fdv[j]) {
+ should_close = _gf_false;
+ break;
+ }
+ }
+ if (should_close)
+ sys_close(i);
+ }
+#endif /* !GF_LINUX_HOST_OS */
+ return 0;
+}
+
+/**
+ * gf_getgrouplist - get list of groups to which a user belongs
+ *
+ * A convenience wrapper for getgrouplist(3).
+ *
+ * @param user - same as in getgrouplist(3)
+ * @param group - same as in getgrouplist(3)
+ * @param groups - pointer to a gid_t pointer
+ *
+ * gf_getgrouplist allocates a gid_t buffer which is big enough to
+ * hold the list of auxiliary group ids for user, up to the GF_MAX_AUX_GROUPS
+ * threshold. Upon successful invocation groups will be pointed to that buffer.
+ *
+ * @return success: the number of auxiliary group ids retrieved
+ * failure: -1
+ */
+int
+gf_getgrouplist(const char *user, gid_t group, gid_t **groups)
+{
+ int ret = -1;
+ int ngroups = SMALL_GROUP_COUNT;
+
+ *groups = GF_CALLOC(sizeof(gid_t), ngroups, gf_common_mt_groups_t);
+ if (!*groups)
+ return -1;
+
+ /*
+ * We are running getgrouplist() in a loop until we succeed (or hit
+ * certain exit conditions, see the comments below). This is because
+ * the indicated number of auxiliary groups that we obtain in case of
+ * the failure of the first invocation is not guaranteed to keep its
+ * validity upon the next invocation with a gid buffer of that size.
+ */
+ for (;;) {
+ int ngroups_old = ngroups;
+ ret = getgrouplist(user, group, *groups, &ngroups);
+ if (ret != -1)
+ break;
+
+ if (ngroups >= GF_MAX_AUX_GROUPS) {
+ /*
+ * This should not happen as GF_MAX_AUX_GROUPS is set
+ * to the max value of number of supported auxiliary
+ * groups across all platforms supported by GlusterFS.
+ * However, if it still happened some way, we wouldn't
+ * care about the incompleteness of the result, we'd
+ * just go on with what we got.
+ */
+ return GF_MAX_AUX_GROUPS;
+ } else if (ngroups <= ngroups_old) {
+ /*
+ * There is an edge case that getgrouplist() fails but
+ * ngroups remains the same. This is actually not
+ * specified in getgrouplist(3), but implementations
+ * can do this upon internal failure[1]. To avoid
+ * falling into an infinite loop when this happens, we
+ * break the loop if the getgrouplist call failed
+ * without an increase in the indicated group number.
+ *
+ * [1]
+ * https://sourceware.org/git/?p=glibc.git;a=blob;f=grp/initgroups.c;hb=refs/heads/release/2.25/master#l168
+ */
+ GF_FREE(*groups);
+ return -1;
+ }
+
+ *groups = GF_REALLOC(*groups, ngroups * sizeof(gid_t));
+ if (!*groups)
+ return -1;
+ }
+ return ret;
+}
+
+int
+glusterfs_compute_sha256(const unsigned char *content, size_t size,
+ char *sha256_hash)
+{
+ SHA256_CTX sha256;
+
+ SHA256_Init(&sha256);
+ SHA256_Update(&sha256, (const unsigned char *)(content), size);
+ SHA256_Final((unsigned char *)sha256_hash, &sha256);
+
+ return 0;
+}
+
+/* * Safe wrapper function for strncpy.
+ * This wrapper makes sure that when there is no null byte among the first n in
+ * source srting for strncpy function call, the string placed in dest will be
+ * null-terminated.
+ */
+
+char *
+gf_strncpy(char *dest, const char *src, const size_t dest_size)
+{
+ strncpy(dest, src, dest_size - 1);
+ dest[dest_size - 1] = '\0';
+ return dest;
+}
+
+int
+gf_replace_old_iatt_in_dict(dict_t *xdata)
+{
+ int ret;
+ struct old_iatt *o_iatt; /* old iatt structure */
+ struct iatt *c_iatt; /* current iatt */
+
+ if (!xdata) {
+ return 0;
+ }
+
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&c_iatt);
+ if (ret < 0) {
+ return 0;
+ }
+
+ o_iatt = GF_CALLOC(1, sizeof(struct old_iatt), gf_common_mt_char);
+ if (!o_iatt) {
+ return -1;
+ }
+
+ oldiatt_from_iatt(o_iatt, c_iatt);
+
+ ret = dict_set_bin(xdata, DHT_IATT_IN_XDATA_KEY, o_iatt,
+ sizeof(struct old_iatt));
+ if (ret) {
+ GF_FREE(o_iatt);
+ }
+
+ return ret;
+}
+
+int
+gf_replace_new_iatt_in_dict(dict_t *xdata)
+{
+ int ret;
+ struct old_iatt *o_iatt; /* old iatt structure */
+ struct iatt *c_iatt; /* new iatt */
+
+ if (!xdata) {
+ return 0;
+ }
+
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&o_iatt);
+ if (ret < 0) {
+ return 0;
+ }
+
+ c_iatt = GF_CALLOC(1, sizeof(struct iatt), gf_common_mt_char);
+ if (!c_iatt) {
+ return -1;
+ }
+
+ iatt_from_oldiatt(c_iatt, o_iatt);
+
+ ret = dict_set_bin(xdata, DHT_IATT_IN_XDATA_KEY, c_iatt,
+ sizeof(struct iatt));
+ if (ret) {
+ GF_FREE(c_iatt);
+ }
+
+ return ret;
+}
+
+xlator_cmdline_option_t *
+find_xlator_option_in_cmd_args_t(const char *option_name, cmd_args_t *args)
+{
+ xlator_cmdline_option_t *pos = NULL;
+ xlator_cmdline_option_t *tmp = NULL;
+
+ list_for_each_entry_safe(pos, tmp, &args->xlator_options, cmd_args)
+ {
+ if (strcmp(pos->key, option_name) == 0)
+ return pos;
+ }
+ return NULL;
+}
+
+int
+gf_d_type_from_ia_type(ia_type_t type)
+{
+ switch (type) {
+ case IA_IFDIR:
+ return DT_DIR;
+ case IA_IFCHR:
+ return DT_CHR;
+ case IA_IFBLK:
+ return DT_BLK;
+ case IA_IFIFO:
+ return DT_FIFO;
+ case IA_IFLNK:
+ return DT_LNK;
+ case IA_IFREG:
+ return DT_REG;
+ case IA_IFSOCK:
+ return DT_SOCK;
+ default:
+ return DT_UNKNOWN;
+ }
+}
+
+int
+gf_nanosleep(uint64_t nsec)
+{
+ struct timespec req;
+ struct timespec rem;
+ int ret = -1;
+
+ req.tv_sec = nsec / GF_SEC_IN_NS;
+ req.tv_nsec = nsec % GF_SEC_IN_NS;
+
+ do {
+ ret = nanosleep(&req, &rem);
+ req = rem;
+ } while (ret == -1 && errno == EINTR);
+
+ return ret;
+}
+
+int
+gf_syncfs(int fd)
+{
+ int ret = 0;
+#if defined(HAVE_SYNCFS)
+ /* Linux with glibc recent enough. */
+ ret = syncfs(fd);
+#elif defined(HAVE_SYNCFS_SYS)
+ /* Linux with no library function. */
+ ret = syscall(SYS_syncfs, fd);
+#else
+ /* Fallback to generic UNIX stuff. */
+ sync();
+#endif
+ return ret;
+}
+
+char **
+get_xattrs_to_heal()
+{
+ return xattrs_to_heal;
+}
diff --git a/libglusterfs/src/common-utils.h b/libglusterfs/src/common-utils.h
deleted file mode 100644
index 86e4e53cc96..00000000000
--- a/libglusterfs/src/common-utils.h
+++ /dev/null
@@ -1,518 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _COMMON_UTILS_H
-#define _COMMON_UTILS_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdint.h>
-#include <sys/uio.h>
-#include <netdb.h>
-#include <stdlib.h>
-#include <string.h>
-#include <assert.h>
-#include <pthread.h>
-#ifndef GF_BSD_HOST_OS
-#include <alloca.h>
-#endif
-
-void trap (void);
-
-#define GF_UNIVERSAL_ANSWER 42 /* :O */
-
-/* To solve type punned error */
-#define VOID(ptr) ((void **) ((void *) ptr))
-
-#include "logging.h"
-#include "glusterfs.h"
-#include "locking.h"
-#include "mem-pool.h"
-#include "uuid.h"
-
-
-
-#define min(a,b) ((a)<(b)?(a):(b))
-#define max(a,b) ((a)>(b)?(a):(b))
-#define roof(a,b) ((((a)+(b)-1)/((b)?(b):1))*(b))
-#define floor(a,b) (((a)/((b)?(b):1))*(b))
-
-
-#define GF_UNIT_KB 1024ULL
-#define GF_UNIT_MB 1048576ULL
-#define GF_UNIT_GB 1073741824ULL
-#define GF_UNIT_TB 1099511627776ULL
-#define GF_UNIT_PB 1125899906842624ULL
-
-#define GF_UNIT_KB_STRING "KB"
-#define GF_UNIT_MB_STRING "MB"
-#define GF_UNIT_GB_STRING "GB"
-#define GF_UNIT_TB_STRING "TB"
-#define GF_UNIT_PB_STRING "PB"
-
-#define GF_UNIT_PERCENT_STRING "%"
-
-#define GEOREP "geo-replication"
-#define GHADOOP "glusterfs-hadoop"
-
-#define WIPE(statp) do { typeof(*statp) z = {0,}; if (statp) *statp = z; } while (0)
-
-#define IS_EXT_FS(fs_name) \
- (!strcmp (fs_name, "ext2") || \
- !strcmp (fs_name, "ext3") || \
- !strcmp (fs_name, "ext4"))
-
-/* Defining this here as it is needed by glusterd for setting
- * nfs port in volume status.
- */
-#define GF_NFS3_PORT 38467
-
-enum _gf_boolean
-{
- _gf_false = 0,
- _gf_true = 1
-};
-
-/*
- * we could have initialized these as +ve values and treated
- * them as negative while comparing etc.. (which would have
- * saved us with the pain of assigning values), but since we
- * only have a couple of clients that use this feature, it's
- * okay.
- */
-enum _gf_client_pid
-{
- GF_CLIENT_PID_MAX = 0,
- GF_CLIENT_PID_GSYNCD = -1,
- GF_CLIENT_PID_HADOOP = -2,
- GF_CLIENT_PID_DEFRAG = -3,
-};
-
-typedef enum _gf_boolean gf_boolean_t;
-typedef enum _gf_client_pid gf_client_pid_t;
-typedef int (*gf_cmp) (void *, void *);
-
-void gf_global_variable_init(void);
-
-in_addr_t gf_resolve_ip (const char *hostname, void **dnscache);
-
-void gf_log_volume_file (FILE *specfp);
-void gf_print_trace (int32_t signal);
-
-extern char *gf_fop_list[GF_FOP_MAXVALUE];
-extern char *gf_mgmt_list[GF_MGMT_MAXVALUE];
-
-#define VECTORSIZE(count) (count * (sizeof (struct iovec)))
-
-#define STRLEN_0(str) (strlen(str) + 1)
-#define VALIDATE_OR_GOTO(arg,label) do { \
- if (!arg) { \
- errno = EINVAL; \
- gf_log_callingfn ((this ? (this->name) : \
- "(Govinda! Govinda!)"), \
- GF_LOG_WARNING, \
- "invalid argument: " #arg); \
- goto label; \
- } \
- } while (0)
-
-#define GF_VALIDATE_OR_GOTO(name,arg,label) do { \
- if (!arg) { \
- errno = EINVAL; \
- gf_log_callingfn (name, GF_LOG_ERROR, \
- "invalid argument: " #arg); \
- goto label; \
- } \
- } while (0)
-
-#define GF_VALIDATE_OR_GOTO_WITH_ERROR(name, arg, label, errno, error) do { \
- if (!arg) { \
- errno = error; \
- gf_log_callingfn (name, GF_LOG_ERROR, \
- "invalid argument: " #arg); \
- goto label; \
- } \
- }while (0)
-
-#define GF_ASSERT_AND_GOTO_WITH_ERROR(name, arg, label, errno, error) do { \
- if (!arg) { \
- GF_ASSERT (0); \
- errno = error; \
- goto label; \
- } \
- }while (0)
-
-#define GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO(name,arg,label) \
- do { \
- GF_VALIDATE_OR_GOTO (name, arg, label); \
- if ((arg[0]) != '/') { \
- errno = EINVAL; \
- gf_log_callingfn (name, GF_LOG_ERROR, \
- "invalid argument: " #arg); \
- goto label; \
- } \
- } while (0)
-
-#define GF_REMOVE_SLASH_FROM_PATH(path, string) \
- do { \
- int i = 0; \
- for (i = 1; i < strlen (path); i++) { \
- string[i-1] = path[i]; \
- if (string[i-1] == '/') \
- string[i-1] = '-'; \
- } \
- } while (0)
-
-
-#define GF_IF_INTERNAL_XATTR_GOTO(pattern, dict, trav, op_errno, label) \
- do { \
- if (!dict) { \
- gf_log (this->name, GF_LOG_ERROR, \
- "setxattr dict is null"); \
- goto label; \
- } \
- trav = dict->members_list; \
- while (trav) { \
- if (!fnmatch (pattern, trav->key, 0)) { \
- op_errno = EPERM; \
- gf_log (this->name, GF_LOG_ERROR, \
- "attempt to set internal" \
- " xattr: %s: %s", trav->key, \
- strerror (op_errno)); \
- goto label; \
- } \
- trav = trav->next; \
- } \
- } while (0)
-
-#define GF_IF_NATIVE_XATTR_GOTO(pattern, key, op_errno, label) \
- do { \
- if (!key) { \
- gf_log (this->name, GF_LOG_ERROR, \
- "no key for removexattr"); \
- goto label; \
- } \
- if (!fnmatch (pattern, key, 0)) { \
- op_errno = EPERM; \
- gf_log (this->name, GF_LOG_ERROR, \
- "attempt to remove internal " \
- "xattr: %s: %s", key, \
- strerror (op_errno)); \
- goto label; \
- } \
- } while (0)
-
-
-#define GF_FILE_CONTENT_REQUESTED(_xattr_req,_content_limit) \
- (dict_get_uint64 (_xattr_req, "glusterfs.content", _content_limit) == 0)
-
-#ifdef DEBUG
-#define GF_ASSERT(x) assert (x);
-#else
-#define GF_ASSERT(x) \
- do { \
- if (!(x)) { \
- gf_log_callingfn ("", GF_LOG_ERROR, \
- "Assertion failed: " #x); \
- } \
- } while (0)
-#endif
-
-#define GF_UUID_ASSERT(u) \
- if (uuid_is_null (u))\
- GF_ASSERT (!"uuid null");
-
-union gf_sock_union {
- struct sockaddr_storage storage;
- struct sockaddr_in6 sin6;
- struct sockaddr_in sin;
- struct sockaddr sa;
-};
-
-#define GF_HIDDEN_PATH ".glusterfs"
-
-static inline void
-iov_free (struct iovec *vector, int count)
-{
- int i;
-
- for (i = 0; i < count; i++)
- FREE (vector[i].iov_base);
-
- GF_FREE (vector);
-}
-
-
-static inline int
-iov_length (const struct iovec *vector, int count)
-{
- int i = 0;
- size_t size = 0;
-
- for (i = 0; i < count; i++)
- size += vector[i].iov_len;
-
- return size;
-}
-
-
-static inline struct iovec *
-iov_dup (struct iovec *vector, int count)
-{
- int bytecount = 0;
- int i;
- struct iovec *newvec = NULL;
-
- bytecount = (count * sizeof (struct iovec));
- newvec = GF_MALLOC (bytecount, gf_common_mt_iovec);
- if (!newvec)
- return NULL;
-
- for (i = 0; i < count; i++) {
- newvec[i].iov_len = vector[i].iov_len;
- newvec[i].iov_base = vector[i].iov_base;
- }
-
- return newvec;
-}
-
-
-static inline int
-iov_subset (struct iovec *orig, int orig_count,
- off_t src_offset, off_t dst_offset,
- struct iovec *new)
-{
- int new_count = 0;
- int i;
- off_t offset = 0;
- size_t start_offset = 0;
- size_t end_offset = 0;
-
-
- for (i = 0; i < orig_count; i++) {
- if ((offset + orig[i].iov_len < src_offset)
- || (offset > dst_offset)) {
- goto not_subset;
- }
-
- if (!new) {
- goto count_only;
- }
-
- start_offset = 0;
- end_offset = orig[i].iov_len;
-
- if (src_offset >= offset) {
- start_offset = (src_offset - offset);
- }
-
- if (dst_offset <= (offset + orig[i].iov_len)) {
- end_offset = (dst_offset - offset);
- }
-
- new[new_count].iov_base = orig[i].iov_base + start_offset;
- new[new_count].iov_len = end_offset - start_offset;
-
- count_only:
- new_count++;
-
- not_subset:
- offset += orig[i].iov_len;
- }
-
- return new_count;
-}
-
-
-static inline void
-iov_unload (char *buf, const struct iovec *vector, int count)
-{
- int i;
- int copied = 0;
-
- for (i = 0; i < count; i++) {
- memcpy (buf + copied, vector[i].iov_base, vector[i].iov_len);
- copied += vector[i].iov_len;
- }
-}
-
-
-static inline int
-mem_0filled (const char *buf, size_t size)
-{
- int i = 0;
- int ret = 0;
-
- for (i = 0; i < size; i++) {
- ret = buf[i];
- if (ret)
- break;
- }
-
- return ret;
-}
-
-
-static inline int
-iov_0filled (struct iovec *vector, int count)
-{
- int i = 0;
- int ret = 0;
-
- for (i = 0; i < count; i++) {
- ret = mem_0filled (vector[i].iov_base, vector[i].iov_len);
- if (ret)
- break;
- }
-
- return ret;
-}
-
-
-static inline void *
-memdup (const void *ptr, size_t size)
-{
- void *newptr = NULL;
-
- newptr = GF_MALLOC (size, gf_common_mt_memdup);
- if (!newptr)
- return NULL;
-
- memcpy (newptr, ptr, size);
- return newptr;
-}
-
-typedef enum {
- gf_timefmt_default = 0,
- gf_timefmt_FT = 0, /* YYYY-MM-DD hh:mm:ss */
- gf_timefmt_Ymd_T, /* YYYY/MM-DD-hh:mm:ss */
- gf_timefmt_bdT, /* ddd DD hh:mm:ss */
- gf_timefmt_F_HMS, /* YYYY-MM-DD hhmmss */
- gf_timefmt_last
-} gf_timefmts;
-
-static inline void
-gf_time_fmt (char *dst, size_t sz_dst, time_t utime, unsigned int fmt)
-{
- extern void _gf_timestuff (gf_timefmts *, const char ***, const char ***);
- static gf_timefmts timefmt_last = (gf_timefmts) -1;
- static const char **fmts;
- static const char **zeros;
- struct tm tm;
-
- if (timefmt_last == -1)
- _gf_timestuff (&timefmt_last, &fmts, &zeros);
- if (timefmt_last < fmt) fmt = gf_timefmt_default;
- if (gmtime_r (&utime, &tm) != NULL) {
- strftime (dst, sz_dst, fmts[fmt], &tm);
- } else {
- strncpy (dst, zeros[fmt], sz_dst);
- }
-}
-
-int
-mkdir_p (char *path, mode_t mode, gf_boolean_t allow_symlinks);
-/*
- * rounds up nr to power of two. If nr is already a power of two, just returns
- * nr
- */
-
-int32_t gf_roundup_power_of_two (uint32_t nr);
-
-/*
- * rounds up nr to next power of two. If nr is already a power of two, next
- * power of two is returned.
- */
-
-int32_t gf_roundup_next_power_of_two (uint32_t nr);
-
-char *gf_trim (char *string);
-int gf_strsplit (const char *str, const char *delim,
- char ***tokens, int *token_count);
-int gf_volume_name_validate (const char *volume_name);
-
-int gf_string2long (const char *str, long *n);
-int gf_string2ulong (const char *str, unsigned long *n);
-int gf_string2int (const char *str, int *n);
-int gf_string2uint (const char *str, unsigned int *n);
-int gf_string2double (const char *str, double *n);
-int gf_string2longlong (const char *str, long long *n);
-int gf_string2ulonglong (const char *str, unsigned long long *n);
-
-int gf_string2int8 (const char *str, int8_t *n);
-int gf_string2int16 (const char *str, int16_t *n);
-int gf_string2int32 (const char *str, int32_t *n);
-int gf_string2int64 (const char *str, int64_t *n);
-int gf_string2uint8 (const char *str, uint8_t *n);
-int gf_string2uint16 (const char *str, uint16_t *n);
-int gf_string2uint32 (const char *str, uint32_t *n);
-int gf_string2uint64 (const char *str, uint64_t *n);
-
-int gf_strstr (const char *str, const char *delim, const char *match);
-
-int gf_string2ulong_base10 (const char *str, unsigned long *n);
-int gf_string2uint_base10 (const char *str, unsigned int *n);
-int gf_string2uint8_base10 (const char *str, uint8_t *n);
-int gf_string2uint16_base10 (const char *str, uint16_t *n);
-int gf_string2uint32_base10 (const char *str, uint32_t *n);
-int gf_string2uint64_base10 (const char *str, uint64_t *n);
-
-int gf_string2bytesize (const char *str, uint64_t *n);
-int gf_string2percent_or_bytesize (const char *str, uint64_t *n,
- gf_boolean_t *is_percent);
-
-int gf_string2boolean (const char *str, gf_boolean_t *b);
-int gf_string2percent (const char *str, uint32_t *n);
-int gf_string2time (const char *str, uint32_t *n);
-
-int gf_lockfd (int fd);
-int gf_unlockfd (int fd);
-
-int get_checksum_for_file (int fd, uint32_t *checksum);
-int log_base2 (unsigned long x);
-
-int get_checksum_for_path (char *path, uint32_t *checksum);
-
-char *strtail (char *str, const char *pattern);
-void skipwhite (char **s);
-char *nwstrtail (char *str, char *pattern);
-void skip_word (char **str);
-/* returns a new string with nth word of given string. n>=1 */
-char *get_nth_word (const char *str, int n);
-
-char valid_host_name (char *address, int length);
-char valid_ipv4_address (char *address, int length, gf_boolean_t wildcard_acc);
-char valid_ipv6_address (char *address, int length, gf_boolean_t wildcard_acc);
-char valid_internet_address (char *address, gf_boolean_t wildcard_acc);
-char valid_ipv4_wildcard_check (char *address);
-char valid_ipv6_wildcard_check (char *address);
-char valid_wildcard_internet_address (char *address);
-
-char *uuid_utoa (uuid_t uuid);
-char *uuid_utoa_r (uuid_t uuid, char *dst);
-char *lkowner_utoa (gf_lkowner_t *lkowner);
-char *lkowner_utoa_r (gf_lkowner_t *lkowner, char *dst, int len);
-
-void gf_array_insertionsort (void *a, int l, int r, size_t elem_size,
- gf_cmp cmp);
-int gf_is_str_int (const char *value);
-
-char *gf_uint64_2human_readable (uint64_t);
-int validate_brick_name (char *brick);
-char *get_host_name (char *word, char **host);
-char *get_path_name (char *word, char **path);
-void gf_path_strip_trailing_slashes (char *path);
-uint64_t get_mem_size ();
-int gf_strip_whitespace (char *str, int len);
-int gf_canonicalize_path (char *path);
-#endif /* _COMMON_UTILS_H */
diff --git a/libglusterfs/src/compat-errno.c b/libglusterfs/src/compat-errno.c
index fd5cc49cefa..df57e243239 100644
--- a/libglusterfs/src/compat-errno.c
+++ b/libglusterfs/src/compat-errno.c
@@ -8,15 +8,9 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <stdint.h>
-#include "compat-errno.h"
-
+#include "glusterfs/compat-errno.h"
static int32_t gf_error_to_errno_array[1024];
static int32_t gf_errno_to_error_array[1024];
@@ -25,906 +19,937 @@ static int32_t gf_compat_errno_init_done;
#ifdef GF_SOLARIS_HOST_OS
static void
-init_compat_errno_arrays ()
+init_compat_errno_arrays()
{
-/* ENOMSG 35 / * No message of desired type */
- gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
- gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
-
-/* EIDRM 36 / * Identifier removed */
- gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
- gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
-
-/* ECHRNG 37 / * Channel number out of range */
- gf_error_to_errno_array[GF_ERROR_CODE_CHRNG] = ECHRNG;
- gf_errno_to_error_array[ECHRNG] = GF_ERROR_CODE_CHRNG;
-
-/* EL2NSYNC 38 / * Level 2 not synchronized */
- gf_error_to_errno_array[GF_ERROR_CODE_L2NSYNC] = EL2NSYNC;
- gf_errno_to_error_array[EL2NSYNC] = GF_ERROR_CODE_L2NSYNC;
-
-/* EL3HLT 39 / * Level 3 halted */
- gf_error_to_errno_array[GF_ERROR_CODE_L3HLT] = EL3HLT;
- gf_errno_to_error_array[EL3HLT] = GF_ERROR_CODE_L3HLT;
-
-/* EL3RST 40 / * Level 3 reset */
- gf_error_to_errno_array[GF_ERROR_CODE_L3RST] = EL3RST;
- gf_errno_to_error_array[EL3RST] = GF_ERROR_CODE_L3RST;
-
-/* ELNRNG 41 / * Link number out of range */
- gf_error_to_errno_array[GF_ERROR_CODE_LNRNG] = ELNRNG;
- gf_errno_to_error_array[ELNRNG] = GF_ERROR_CODE_LNRNG;
-
-/* EUNATCH 42 / * Protocol driver not attached */
- gf_error_to_errno_array[GF_ERROR_CODE_UNATCH] = EUNATCH;
- gf_errno_to_error_array[EUNATCH] = GF_ERROR_CODE_UNATCH;
-
-/* ENOCSI 43 / * No CSI structure available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOCSI] = ENOCSI;
- gf_errno_to_error_array[ENOCSI] = GF_ERROR_CODE_NOCSI;
-
-/* EL2HLT 44 / * Level 2 halted */
- gf_error_to_errno_array[GF_ERROR_CODE_L2HLT] = EL2HLT;
- gf_errno_to_error_array[EL2HLT] = GF_ERROR_CODE_L2HLT;
-
-/* EDEADLK 45 / * Deadlock condition. */
- gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
- gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
-
-/* ENOLCK 46 / * No record locks available. */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
- gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
-
-/* ECANCELED 47 / * Operation canceled */
- gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
- gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
-
-/* ENOTSUP 48 / * Operation not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTSUPP] = ENOTSUP;
- gf_errno_to_error_array[ENOTSUP] = GF_ERROR_CODE_NOTSUPP;
-
-/* Filesystem Quotas */
-/* EDQUOT 49 / * Disc quota exceeded */
- gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
- gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
-
-/* Convergent Error Returns */
-/* EBADE 50 / * invalid exchange */
- gf_error_to_errno_array[GF_ERROR_CODE_BADE] = EBADE;
- gf_errno_to_error_array[EBADE] = GF_ERROR_CODE_BADE;
-/* EBADR 51 / * invalid request descriptor */
- gf_error_to_errno_array[GF_ERROR_CODE_BADR] = EBADR;
- gf_errno_to_error_array[EBADR] = GF_ERROR_CODE_BADR;
-/* EXFULL 52 / * exchange full */
- gf_error_to_errno_array[GF_ERROR_CODE_XFULL] = EXFULL;
- gf_errno_to_error_array[EXFULL] = GF_ERROR_CODE_XFULL;
-/* ENOANO 53 / * no anode */
- gf_error_to_errno_array[GF_ERROR_CODE_NOANO] = ENOANO;
- gf_errno_to_error_array[ENOANO] = GF_ERROR_CODE_NOANO;
-/* EBADRQC 54 / * invalid request code */
- gf_error_to_errno_array[GF_ERROR_CODE_BADRQC] = EBADRQC;
- gf_errno_to_error_array[EBADRQC] = GF_ERROR_CODE_BADRQC;
-/* EBADSLT 55 / * invalid slot */
- gf_error_to_errno_array[GF_ERROR_CODE_BADSLT] = EBADSLT;
- gf_errno_to_error_array[EBADSLT] = GF_ERROR_CODE_BADSLT;
-/* EDEADLOCK 56 / * file locking deadlock error */
-/* This is same as EDEADLK on linux */
- gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLOCK;
- gf_errno_to_error_array[EDEADLOCK] = GF_ERROR_CODE_DEADLK;
-
-/* EBFONT 57 / * bad font file fmt */
- gf_error_to_errno_array[GF_ERROR_CODE_BFONT] = EBFONT;
- gf_errno_to_error_array[EBFONT] = GF_ERROR_CODE_BFONT;
-
-/* Interprocess Robust Locks */
-/* EOWNERDEAD 58 / * process died with the lock */
- gf_error_to_errno_array[GF_ERROR_CODE_OWNERDEAD] = EOWNERDEAD;
- gf_errno_to_error_array[EOWNERDEAD] = GF_ERROR_CODE_OWNERDEAD;
-/* ENOTRECOVERABLE 59 / * lock is not recoverable */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTRECOVERABLE] = ENOTRECOVERABLE;
- gf_errno_to_error_array[ENOTRECOVERABLE] = GF_ERROR_CODE_NOTRECOVERABLE;
-
-/* stream problems */
-/* ENOSTR 60 / * Device not a stream */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSTR] = ENOSTR;
- gf_errno_to_error_array[ENOSTR] = GF_ERROR_CODE_NOSTR;
-/* ENODATA 61 / * no data (for no delay io) */
- gf_error_to_errno_array[GF_ERROR_CODE_NODATA] = ENODATA;
- gf_errno_to_error_array[ENODATA] = GF_ERROR_CODE_NODATA;
-/* ETIME 62 / * timer expired */
- gf_error_to_errno_array[GF_ERROR_CODE_TIME] = ETIME;
- gf_errno_to_error_array[ETIME] = GF_ERROR_CODE_TIME;
-/* ENOSR 63 / * out of streams resources */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSR] = ENOSR;
- gf_errno_to_error_array[ENOSR] = GF_ERROR_CODE_NOSR;
-
-/* ENONET 64 / * Machine is not on the network */
- gf_error_to_errno_array[GF_ERROR_CODE_NONET] = ENONET;
- gf_errno_to_error_array[ENONET] = GF_ERROR_CODE_NONET;
-/* ENOPKG 65 / * Package not installed */
- gf_error_to_errno_array[GF_ERROR_CODE_NOPKG] = ENOPKG;
- gf_errno_to_error_array[ENOPKG] = GF_ERROR_CODE_NOPKG;
-/* EREMOTE 66 / * The object is remote */
- gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
- gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
-/* ENOLINK 67 / * the link has been severed */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
- gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
-/* EADV 68 / * advertise error */
- gf_error_to_errno_array[GF_ERROR_CODE_ADV] = EADV;
- gf_errno_to_error_array[EADV] = GF_ERROR_CODE_ADV;
-/* ESRMNT 69 / * srmount error */
- gf_error_to_errno_array[GF_ERROR_CODE_SRMNT] = ESRMNT;
- gf_errno_to_error_array[ESRMNT] = GF_ERROR_CODE_SRMNT;
-
-/* ECOMM 70 / * Communication error on send */
- gf_error_to_errno_array[GF_ERROR_CODE_COMM] = ECOMM;
- gf_errno_to_error_array[ECOMM] = GF_ERROR_CODE_COMM;
-/* EPROTO 71 / * Protocol error */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
- gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
-
-/* Interprocess Robust Locks */
-/* ELOCKUNMAPPED 72 / * locked lock was unmapped */
- gf_error_to_errno_array[GF_ERROR_CODE_LOCKUNMAPPED] = ELOCKUNMAPPED;
- gf_errno_to_error_array[ELOCKUNMAPPED] = GF_ERROR_CODE_LOCKUNMAPPED;
-
-/* ENOTACTIVE 73 / * Facility is not active */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTACTIVE] = ENOTACTIVE;
- gf_errno_to_error_array[ENOTACTIVE] = GF_ERROR_CODE_NOTACTIVE;
-/* EMULTIHOP 74 / * multihop attempted */
- gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
- gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
-/* EBADMSG 77 / * trying to read unreadable message */
- gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
- gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
-/* ENAMETOOLONG 78 / * path name is too long */
- gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
- gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
-/* EOVERFLOW 79 / * value too large to be stored in data type */
- gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
- gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
-/* ENOTUNIQ 80 / * given log. name not unique */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTUNIQ] = ENOTUNIQ;
- gf_errno_to_error_array[ENOTUNIQ] = GF_ERROR_CODE_NOTUNIQ;
-/* EBADFD 81 / * f.d. invalid for this operation */
- gf_error_to_errno_array[GF_ERROR_CODE_BADFD] = EBADFD;
- gf_errno_to_error_array[EBADFD] = GF_ERROR_CODE_BADFD;
-/* EREMCHG 82 / * Remote address changed */
- gf_error_to_errno_array[GF_ERROR_CODE_REMCHG] = EREMCHG;
- gf_errno_to_error_array[EREMCHG] = GF_ERROR_CODE_REMCHG;
-
-/* shared library problems */
-/* ELIBACC 83 / * Can't access a needed shared lib. */
- gf_error_to_errno_array[GF_ERROR_CODE_LIBACC] = ELIBACC;
- gf_errno_to_error_array[ELIBACC] = GF_ERROR_CODE_LIBACC;
-/* ELIBBAD 84 / * Accessing a corrupted shared lib. */
- gf_error_to_errno_array[GF_ERROR_CODE_LIBBAD] = ELIBBAD;
- gf_errno_to_error_array[ELIBBAD] = GF_ERROR_CODE_LIBBAD;
-/* ELIBSCN 85 / * .lib section in a.out corrupted. */
- gf_error_to_errno_array[GF_ERROR_CODE_LIBSCN] = ELIBSCN;
- gf_errno_to_error_array[ELIBSCN] = GF_ERROR_CODE_LIBSCN;
-/* ELIBMAX 86 / * Attempting to link in too many libs. */
- gf_error_to_errno_array[GF_ERROR_CODE_LIBMAX] = ELIBMAX;
- gf_errno_to_error_array[ELIBMAX] = GF_ERROR_CODE_LIBMAX;
-/* ELIBEXEC 87 / * Attempting to exec a shared library. */
- gf_error_to_errno_array[GF_ERROR_CODE_LIBEXEC] = ELIBEXEC;
- gf_errno_to_error_array[ELIBEXEC] = GF_ERROR_CODE_LIBEXEC;
-/* EILSEQ 88 / * Illegal byte sequence. */
- gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
- gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
-/* ENOSYS 89 / * Unsupported file system operation */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
- gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
-/* ELOOP 90 / * Symbolic link loop */
- gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
- gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
-/* ERESTART 91 / * Restartable system call */
- gf_error_to_errno_array[GF_ERROR_CODE_RESTART] = ERESTART;
- gf_errno_to_error_array[ERESTART] = GF_ERROR_CODE_RESTART;
-/* ESTRPIPE 92 / * if pipe/FIFO, don't sleep in stream head */
- gf_error_to_errno_array[GF_ERROR_CODE_STRPIPE] = ESTRPIPE;
- gf_errno_to_error_array[ESTRPIPE] = GF_ERROR_CODE_STRPIPE;
-/* ENOTEMPTY 93 / * directory not empty */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
- gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
-/* EUSERS 94 / * Too many users (for UFS) */
- gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
- gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
-
-/* BSD Networking Software */
- /* argument errors */
-/* ENOTSOCK 95 / * Socket operation on non-socket */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
- gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
-/* EDESTADDRREQ 96 / * Destination address required */
- gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
- gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
-/* EMSGSIZE 97 / * Message too long */
- gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
- gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
-/* EPROTOTYPE 98 / * Protocol wrong type for socket */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
- gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
-/* ENOPROTOOPT 99 / * Protocol not available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
- gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
-/* EPROTONOSUPPORT 120 / * Protocol not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
- gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
-/* ESOCKTNOSUPPORT 121 / * Socket type not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
- gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
-
-/* EOPNOTSUPP 122 / * Operation not supported on socket */
- gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
- gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
-/* EPFNOSUPPORT 123 / * Protocol family not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
- gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
-/* EAFNOSUPPORT 124 / * Address family not supported by */
- /* protocol family */
- gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
- gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
-/* EADDRINUSE 125 / * Address already in use */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
- gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
-/* EADDRNOTAVAIL 126 / * Can't assign requested address */
- /* operational errors */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
- gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
-/* ENETDOWN 127 / * Network is down */
- gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
- gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
-/* ENETUNREACH 128 / * Network is unreachable */
- gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
- gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
-/* ENETRESET 129 / * Network dropped connection because */
- /* of reset */
- gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
- gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
-/* ECONNABORTED 130 / * Software caused connection abort */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
- gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
-/* ECONNRESET 131 / * Connection reset by peer */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
- gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
-/* ENOBUFS 132 / * No buffer space available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
- gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
-/* EISCONN 133 / * Socket is already connected */
- gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
- gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
-/* ENOTCONN 134 / * Socket is not connected */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
- gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
-/* XENIX has 135 - 142 */
-/* ESHUTDOWN 143 / * Can't send after socket shutdown */
- gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
- gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
-/* ETOOMANYREFS 144 / * Too many references: can't splice */
- gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
- gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
-/* ETIMEDOUT 145 / * Connection timed out */
- gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
- gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
-
-/* ECONNREFUSED 146 / * Connection refused */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
- gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
-/* EHOSTDOWN 147 / * Host is down */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
- gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
-/* EHOSTUNREACH 148 / * No route to host */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
- gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
-/* EALREADY 149 / * operation already in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
- gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
-/* EINPROGRESS 150 / * operation now in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
- gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
-
-/* SUN Network File System */
-/* ESTALE 151 / * Stale NFS file handle */
- gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
- gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
-
- return ;
+ /* ENOMSG 35 / * No message of desired type */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
+ gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
+
+ /* EIDRM 36 / * Identifier removed */
+ gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
+ gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
+
+ /* ECHRNG 37 / * Channel number out of range */
+ gf_error_to_errno_array[GF_ERROR_CODE_CHRNG] = ECHRNG;
+ gf_errno_to_error_array[ECHRNG] = GF_ERROR_CODE_CHRNG;
+
+ /* EL2NSYNC 38 / * Level 2 not synchronized */
+ gf_error_to_errno_array[GF_ERROR_CODE_L2NSYNC] = EL2NSYNC;
+ gf_errno_to_error_array[EL2NSYNC] = GF_ERROR_CODE_L2NSYNC;
+
+ /* EL3HLT 39 / * Level 3 halted */
+ gf_error_to_errno_array[GF_ERROR_CODE_L3HLT] = EL3HLT;
+ gf_errno_to_error_array[EL3HLT] = GF_ERROR_CODE_L3HLT;
+
+ /* EL3RST 40 / * Level 3 reset */
+ gf_error_to_errno_array[GF_ERROR_CODE_L3RST] = EL3RST;
+ gf_errno_to_error_array[EL3RST] = GF_ERROR_CODE_L3RST;
+
+ /* ELNRNG 41 / * Link number out of range */
+ gf_error_to_errno_array[GF_ERROR_CODE_LNRNG] = ELNRNG;
+ gf_errno_to_error_array[ELNRNG] = GF_ERROR_CODE_LNRNG;
+
+ /* EUNATCH 42 / * Protocol driver not attached */
+ gf_error_to_errno_array[GF_ERROR_CODE_UNATCH] = EUNATCH;
+ gf_errno_to_error_array[EUNATCH] = GF_ERROR_CODE_UNATCH;
+
+ /* ENOCSI 43 / * No CSI structure available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOCSI] = ENOCSI;
+ gf_errno_to_error_array[ENOCSI] = GF_ERROR_CODE_NOCSI;
+
+ /* EL2HLT 44 / * Level 2 halted */
+ gf_error_to_errno_array[GF_ERROR_CODE_L2HLT] = EL2HLT;
+ gf_errno_to_error_array[EL2HLT] = GF_ERROR_CODE_L2HLT;
+
+ /* EDEADLK 45 / * Deadlock condition. */
+ gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
+ gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
+
+ /* ENOLCK 46 / * No record locks available. */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
+ gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
+
+ /* ECANCELED 47 / * Operation canceled */
+ gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
+ gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
+
+ /* ENOTSUP 48 / * Operation not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTSUPP] = ENOTSUP;
+ gf_errno_to_error_array[ENOTSUP] = GF_ERROR_CODE_NOTSUPP;
+
+ /* Filesystem Quotas */
+ /* EDQUOT 49 / * Disc quota exceeded */
+ gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
+ gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
+
+ /* Convergent Error Returns */
+ /* EBADE 50 / * invalid exchange */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADE] = EBADE;
+ gf_errno_to_error_array[EBADE] = GF_ERROR_CODE_BADE;
+ /* EBADR 51 / * invalid request descriptor */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADR] = EBADR;
+ gf_errno_to_error_array[EBADR] = GF_ERROR_CODE_BADR;
+ /* EXFULL 52 / * exchange full */
+ gf_error_to_errno_array[GF_ERROR_CODE_XFULL] = EXFULL;
+ gf_errno_to_error_array[EXFULL] = GF_ERROR_CODE_XFULL;
+ /* ENOANO 53 / * no anode */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOANO] = ENOANO;
+ gf_errno_to_error_array[ENOANO] = GF_ERROR_CODE_NOANO;
+ /* EBADRQC 54 / * invalid request code */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADRQC] = EBADRQC;
+ gf_errno_to_error_array[EBADRQC] = GF_ERROR_CODE_BADRQC;
+ /* EBADSLT 55 / * invalid slot */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADSLT] = EBADSLT;
+ gf_errno_to_error_array[EBADSLT] = GF_ERROR_CODE_BADSLT;
+ /* EDEADLOCK 56 / * file locking deadlock error */
+ /* This is same as EDEADLK on linux */
+ gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLOCK;
+ gf_errno_to_error_array[EDEADLOCK] = GF_ERROR_CODE_DEADLK;
+
+ /* EBFONT 57 / * bad font file fmt */
+ gf_error_to_errno_array[GF_ERROR_CODE_BFONT] = EBFONT;
+ gf_errno_to_error_array[EBFONT] = GF_ERROR_CODE_BFONT;
+
+ /* Interprocess Robust Locks */
+ /* EOWNERDEAD 58 / * process died with the lock */
+ gf_error_to_errno_array[GF_ERROR_CODE_OWNERDEAD] = EOWNERDEAD;
+ gf_errno_to_error_array[EOWNERDEAD] = GF_ERROR_CODE_OWNERDEAD;
+ /* ENOTRECOVERABLE 59 / * lock is not recoverable */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTRECOVERABLE] = ENOTRECOVERABLE;
+ gf_errno_to_error_array[ENOTRECOVERABLE] = GF_ERROR_CODE_NOTRECOVERABLE;
+
+ /* stream problems */
+ /* ENOSTR 60 / * Device not a stream */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSTR] = ENOSTR;
+ gf_errno_to_error_array[ENOSTR] = GF_ERROR_CODE_NOSTR;
+ /* ENODATA 61 / * no data (for no delay io) */
+ gf_error_to_errno_array[GF_ERROR_CODE_NODATA] = ENODATA;
+ gf_errno_to_error_array[ENODATA] = GF_ERROR_CODE_NODATA;
+ /* ETIME 62 / * timer expired */
+ gf_error_to_errno_array[GF_ERROR_CODE_TIME] = ETIME;
+ gf_errno_to_error_array[ETIME] = GF_ERROR_CODE_TIME;
+ /* ENOSR 63 / * out of streams resources */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSR] = ENOSR;
+ gf_errno_to_error_array[ENOSR] = GF_ERROR_CODE_NOSR;
+
+ /* ENONET 64 / * Machine is not on the network */
+ gf_error_to_errno_array[GF_ERROR_CODE_NONET] = ENONET;
+ gf_errno_to_error_array[ENONET] = GF_ERROR_CODE_NONET;
+ /* ENOPKG 65 / * Package not installed */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOPKG] = ENOPKG;
+ gf_errno_to_error_array[ENOPKG] = GF_ERROR_CODE_NOPKG;
+ /* EREMOTE 66 / * The object is remote */
+ gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
+ gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
+ /* ENOLINK 67 / * the link has been severed */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
+ gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
+ /* EADV 68 / * advertise error */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADV] = EADV;
+ gf_errno_to_error_array[EADV] = GF_ERROR_CODE_ADV;
+ /* ESRMNT 69 / * srmount error */
+ gf_error_to_errno_array[GF_ERROR_CODE_SRMNT] = ESRMNT;
+ gf_errno_to_error_array[ESRMNT] = GF_ERROR_CODE_SRMNT;
+
+ /* ECOMM 70 / * Communication error on send */
+ gf_error_to_errno_array[GF_ERROR_CODE_COMM] = ECOMM;
+ gf_errno_to_error_array[ECOMM] = GF_ERROR_CODE_COMM;
+ /* EPROTO 71 / * Protocol error */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
+ gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
+
+ /* Interprocess Robust Locks */
+ /* ELOCKUNMAPPED 72 / * locked lock was unmapped */
+ gf_error_to_errno_array[GF_ERROR_CODE_LOCKUNMAPPED] = ELOCKUNMAPPED;
+ gf_errno_to_error_array[ELOCKUNMAPPED] = GF_ERROR_CODE_LOCKUNMAPPED;
+
+ /* ENOTACTIVE 73 / * Facility is not active */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTACTIVE] = ENOTACTIVE;
+ gf_errno_to_error_array[ENOTACTIVE] = GF_ERROR_CODE_NOTACTIVE;
+ /* EMULTIHOP 74 / * multihop attempted */
+ gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
+ gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
+ /* EBADMSG 77 / * trying to read unreadable message */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
+ gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
+ /* ENAMETOOLONG 78 / * path name is too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
+ gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
+ /* EOVERFLOW 79 / * value too large to be stored in data type */
+ gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
+ gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
+ /* ENOTUNIQ 80 / * given log. name not unique */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTUNIQ] = ENOTUNIQ;
+ gf_errno_to_error_array[ENOTUNIQ] = GF_ERROR_CODE_NOTUNIQ;
+ /* EBADFD 81 / * f.d. invalid for this operation */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADFD] = EBADFD;
+ gf_errno_to_error_array[EBADFD] = GF_ERROR_CODE_BADFD;
+ /* EREMCHG 82 / * Remote address changed */
+ gf_error_to_errno_array[GF_ERROR_CODE_REMCHG] = EREMCHG;
+ gf_errno_to_error_array[EREMCHG] = GF_ERROR_CODE_REMCHG;
+
+ /* shared library problems */
+ /* ELIBACC 83 / * Can't access a needed shared lib. */
+ gf_error_to_errno_array[GF_ERROR_CODE_LIBACC] = ELIBACC;
+ gf_errno_to_error_array[ELIBACC] = GF_ERROR_CODE_LIBACC;
+ /* ELIBBAD 84 / * Accessing a corrupted shared lib. */
+ gf_error_to_errno_array[GF_ERROR_CODE_LIBBAD] = ELIBBAD;
+ gf_errno_to_error_array[ELIBBAD] = GF_ERROR_CODE_LIBBAD;
+ /* ELIBSCN 85 / * .lib section in a.out corrupted. */
+ gf_error_to_errno_array[GF_ERROR_CODE_LIBSCN] = ELIBSCN;
+ gf_errno_to_error_array[ELIBSCN] = GF_ERROR_CODE_LIBSCN;
+ /* ELIBMAX 86 / * Attempting to link in too many libs. */
+ gf_error_to_errno_array[GF_ERROR_CODE_LIBMAX] = ELIBMAX;
+ gf_errno_to_error_array[ELIBMAX] = GF_ERROR_CODE_LIBMAX;
+ /* ELIBEXEC 87 / * Attempting to exec a shared library. */
+ gf_error_to_errno_array[GF_ERROR_CODE_LIBEXEC] = ELIBEXEC;
+ gf_errno_to_error_array[ELIBEXEC] = GF_ERROR_CODE_LIBEXEC;
+ /* EILSEQ 88 / * Illegal byte sequence. */
+ gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
+ gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
+ /* ENOSYS 89 / * Unsupported file system operation */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
+ gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
+ /* ELOOP 90 / * Symbolic link loop */
+ gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
+ gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
+ /* ERESTART 91 / * Restartable system call */
+ gf_error_to_errno_array[GF_ERROR_CODE_RESTART] = ERESTART;
+ gf_errno_to_error_array[ERESTART] = GF_ERROR_CODE_RESTART;
+ /* ESTRPIPE 92 / * if pipe/FIFO, don't sleep in stream head */
+ gf_error_to_errno_array[GF_ERROR_CODE_STRPIPE] = ESTRPIPE;
+ gf_errno_to_error_array[ESTRPIPE] = GF_ERROR_CODE_STRPIPE;
+ /* ENOTEMPTY 93 / * directory not empty */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
+ gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
+ /* EUSERS 94 / * Too many users (for UFS) */
+ gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
+ gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
+
+ /* BSD Networking Software */
+ /* argument errors */
+ /* ENOTSOCK 95 / * Socket operation on non-socket */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
+ gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
+ /* EDESTADDRREQ 96 / * Destination address required */
+ gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
+ gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
+ /* EMSGSIZE 97 / * Message too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
+ gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
+ /* EPROTOTYPE 98 / * Protocol wrong type for socket */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
+ gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
+ /* ENOPROTOOPT 99 / * Protocol not available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
+ gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
+ /* EPROTONOSUPPORT 120 / * Protocol not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
+ gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
+ /* ESOCKTNOSUPPORT 121 / * Socket type not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
+ gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
+
+ /* EOPNOTSUPP 122 / * Operation not supported on socket */
+ gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
+ gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
+ /* EPFNOSUPPORT 123 / * Protocol family not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
+ gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
+ /* EAFNOSUPPORT 124 / * Address family not supported by */
+ /* protocol family */
+ gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
+ gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
+ /* EADDRINUSE 125 / * Address already in use */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
+ gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
+ /* EADDRNOTAVAIL 126 / * Can't assign requested address */
+ /* operational errors */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
+ gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
+ /* ENETDOWN 127 / * Network is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
+ gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
+ /* ENETUNREACH 128 / * Network is unreachable */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
+ gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
+ /* ENETRESET 129 / * Network dropped connection because */
+ /* of reset */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
+ gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
+ /* ECONNABORTED 130 / * Software caused connection abort */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
+ gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
+ /* ECONNRESET 131 / * Connection reset by peer */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
+ gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
+ /* ENOBUFS 132 / * No buffer space available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
+ gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
+ /* EISCONN 133 / * Socket is already connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
+ gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
+ /* ENOTCONN 134 / * Socket is not connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
+ gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
+ /* XENIX has 135 - 142 */
+ /* ESHUTDOWN 143 / * Can't send after socket shutdown */
+ gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
+ gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
+ /* ETOOMANYREFS 144 / * Too many references: can't splice */
+ gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
+ gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
+ /* ETIMEDOUT 145 / * Connection timed out */
+ gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
+ gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
+
+ /* ECONNREFUSED 146 / * Connection refused */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
+ gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
+ /* EHOSTDOWN 147 / * Host is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
+ gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
+ /* EHOSTUNREACH 148 / * No route to host */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
+ gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
+ /* EALREADY 149 / * operation already in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
+ gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
+ /* EINPROGRESS 150 / * operation now in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
+ gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
+
+ /* SUN Network File System */
+ /* ESTALE 151 / * Stale NFS file handle */
+ gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
+ gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
+
+ return;
}
#endif /* GF_SOLARIS_HOST_OS */
#ifdef GF_DARWIN_HOST_OS
static void
-init_compat_errno_arrays ()
+init_compat_errno_arrays()
{
- /* EDEADLK 11 / * Resource deadlock would occur */
- gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
- gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
-
- /* EAGAIN 35 / * Try Again */
- gf_error_to_errno_array[GF_ERROR_CODE_AGAIN] = EAGAIN;
- gf_errno_to_error_array[EAGAIN] = GF_ERROR_CODE_AGAIN;
-
- /* EINPROGRESS 36 / * Operation now in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
- gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
-
- /* EALREADY 37 / * Operation already in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
- gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
-
- /* ENOTSOCK 38 / * Socket operation on non-socket */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
- gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
-
- /* EDESTADDRREQ 39 / * Destination address required */
- gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
- gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
-
- /* EMSGSIZE 40 / * Message too long */
- gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
- gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
-
- /* EPROTOTYPE 41 / * Protocol wrong type for socket */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
- gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
-
- /* ENOPROTOOPT 42 / * Protocol not available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
- gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
-
- /* EPROTONOSUPPORT 43 / * Protocol not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
- gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
-
- /* ESOCKTNOSUPPORT 44 / * Socket type not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
- gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
-
- /* EOPNOTSUPP 45 / * Operation not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
- gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
-
- /* EPFNOSUPPORT 46 / * Protocol family not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
- gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
-
- /* EAFNOSUPPORT 47 / * Address family not supported by protocol family */
- gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
- gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
-
- /* EADDRINUSE 48 / * Address already in use */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
- gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
-
- /* EADDRNOTAVAIL 49 / * Can't assign requested address */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
- gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
-
- /* ENETDOWN 50 / * Network is down */
- gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
- gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
-
- /* ENETUNREACH 51 / * Network is unreachable */
- gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
- gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
-
- /* ENETRESET 52 / * Network dropped connection on reset */
- gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
- gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
-
- /* ECONNABORTED 53 / * Software caused connection abort */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
- gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
-
- /* ECONNRESET 54 / * Connection reset by peer */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
- gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
-
- /* ENOBUFS 55 / * No buffer space available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
- gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
-
- /* EISCONN 56 / * Socket is already connected */
- gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
- gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
-
- /* ENOTCONN 57 / * Socket is not connected */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
- gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
-
- /* ESHUTDOWN 58 / * Can't send after socket shutdown */
- gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
- gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
-
- /* ETOOMANYREFS 59 / * Too many references: can't splice */
- gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
- gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
-
- /* ETIMEDOUT 60 / * Operation timed out */
- gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
- gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
-
- /* ECONNREFUSED 61 / * Connection refused */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
- gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
-
- /* ELOOP 62 / * Too many levels of symbolic links */
- gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
- gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
-
- /* ENAMETOOLONG 63 / * File name too long */
- gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
- gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
-
- /* EHOSTDOWN 64 / * Host is down */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
- gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
-
- /* EHOSTUNREACH 65 / * No route to host */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
- gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
-
- /* ENOTEMPTY 66 / * Directory not empty */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
- gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
-
- /* EPROCLIM 67 / * Too many processes */
- gf_error_to_errno_array[GF_ERROR_CODE_PROCLIM] = EPROCLIM;
- gf_errno_to_error_array[EPROCLIM] = GF_ERROR_CODE_PROCLIM;
-
- /* EUSERS 68 / * Too many users */
- gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
- gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
-
- /* EDQUOT 69 / * Disc quota exceeded */
- gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
- gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
-
- /* ESTALE 70 / * Stale NFS file handle */
- gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
- gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
-
- /* EREMOTE 71 / * Too many levels of remote in path */
- gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
- gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
-
- /* EBADRPC 72 / * RPC struct is bad */
- gf_error_to_errno_array[GF_ERROR_CODE_BADRPC] = EBADRPC;
- gf_errno_to_error_array[EBADRPC] = GF_ERROR_CODE_BADRPC;
-
- /* ERPCMISMATCH 73 / * RPC version wrong */
- gf_error_to_errno_array[GF_ERROR_CODE_RPCMISMATCH] = ERPCMISMATCH;
- gf_errno_to_error_array[ERPCMISMATCH] = GF_ERROR_CODE_RPCMISMATCH;
-
- /* EPROGUNAVAIL 74 / * RPC prog. not avail */
- gf_error_to_errno_array[GF_ERROR_CODE_PROGUNAVAIL] = EPROGUNAVAIL;
- gf_errno_to_error_array[EPROGUNAVAIL] = GF_ERROR_CODE_PROGUNAVAIL;
-
- /* EPROGMISMATCH 75 / * Program version wrong */
- gf_error_to_errno_array[GF_ERROR_CODE_PROGMISMATCH] = EPROGMISMATCH;
- gf_errno_to_error_array[EPROGMISMATCH] = GF_ERROR_CODE_PROGMISMATCH;
-
- /* EPROCUNAVAIL 76 / * Bad procedure for program */
- gf_error_to_errno_array[GF_ERROR_CODE_PROCUNAVAIL] = EPROCUNAVAIL;
- gf_errno_to_error_array[EPROCUNAVAIL] = GF_ERROR_CODE_PROCUNAVAIL;
-
- /* ENOLCK 77 / * No locks available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
- gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
-
- /* ENOSYS 78 / * Function not implemented */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
- gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
-
- /* EFTYPE 79 / * Inappropriate file type or format */
- gf_error_to_errno_array[GF_ERROR_CODE_FTYPE] = EFTYPE;
- gf_errno_to_error_array[EFTYPE] = GF_ERROR_CODE_FTYPE;
-
- /* EAUTH 80 / * Authentication error */
- gf_error_to_errno_array[GF_ERROR_CODE_AUTH] = EAUTH;
- gf_errno_to_error_array[EAUTH] = GF_ERROR_CODE_AUTH;
-
- /* ENEEDAUTH 81 / * Need authenticator */
- gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
- gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
-/* Intelligent device errors */
-/* EPWROFF 82 / * Device power is off */
- gf_error_to_errno_array[GF_ERROR_CODE_PWROFF] = EPWROFF;
- gf_errno_to_error_array[EPWROFF] = GF_ERROR_CODE_PWROFF;
-/* EDEVERR 83 / * Device error, e.g. paper out */
- gf_error_to_errno_array[GF_ERROR_CODE_DEVERR] = EDEVERR;
- gf_errno_to_error_array[EDEVERR] = GF_ERROR_CODE_DEVERR;
-
- /* EOVERFLOW 84 / * Value too large to be stored in data type */
- gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
- gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
-
-/* Program loading errors */
-/* EBADEXEC 85 / * Bad executable */
- gf_error_to_errno_array[GF_ERROR_CODE_BADEXEC] = EBADEXEC;
- gf_errno_to_error_array[EBADEXEC] = GF_ERROR_CODE_BADEXEC;
-
-/* EBADARCH 86 / * Bad CPU type in executable */
- gf_error_to_errno_array[GF_ERROR_CODE_BADARCH] = EBADARCH;
- gf_errno_to_error_array[EBADARCH] = GF_ERROR_CODE_BADARCH;
-
-/* ESHLIBVERS 87 / * Shared library version mismatch */
- gf_error_to_errno_array[GF_ERROR_CODE_SHLIBVERS] = ESHLIBVERS;
- gf_errno_to_error_array[ESHLIBVERS] = GF_ERROR_CODE_SHLIBVERS;
-
-/* EBADMACHO 88 / * Malformed Macho file */
- gf_error_to_errno_array[GF_ERROR_CODE_BADMACHO] = EBADMACHO;
- gf_errno_to_error_array[EBADMACHO] = GF_ERROR_CODE_BADMACHO;
+ /* EDEADLK 11 / * Resource deadlock would occur */
+ gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
+ gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
+
+ /* EAGAIN 35 / * Try Again */
+ gf_error_to_errno_array[GF_ERROR_CODE_AGAIN] = EAGAIN;
+ gf_errno_to_error_array[EAGAIN] = GF_ERROR_CODE_AGAIN;
+
+ /* EINPROGRESS 36 / * Operation now in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
+ gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
+
+ /* EALREADY 37 / * Operation already in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
+ gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
+
+ /* ENOTSOCK 38 / * Socket operation on non-socket
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
+ gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
+
+ /* EDESTADDRREQ 39 / * Destination address required */
+ gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
+ gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
+
+ /* EMSGSIZE 40 / * Message too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
+ gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
+
+ /* EPROTOTYPE 41 / * Protocol wrong type for socket
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
+ gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
+
+ /* ENOPROTOOPT 42 / * Protocol not available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
+ gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
+
+ /* EPROTONOSUPPORT 43 / * Protocol not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
+ gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
+
+ /* ESOCKTNOSUPPORT 44 / * Socket type not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
+ gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
+
+ /* EOPNOTSUPP 45 / * Operation not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
+ gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
+
+ /* EPFNOSUPPORT 46 / * Protocol family not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
+ gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
+
+ /* EAFNOSUPPORT 47 / * Address family not supported by
+ * protocol family */
+ gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
+ gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
+
+ /* EADDRINUSE 48 / * Address already in use */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
+ gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
+
+ /* EADDRNOTAVAIL 49 / * Can't assign requested address
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
+ gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
+
+ /* ENETDOWN 50 / * Network is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
+ gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
+
+ /* ENETUNREACH 51 / * Network is unreachable */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
+ gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
+
+ /* ENETRESET 52 / * Network dropped connection on
+ * reset */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
+ gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
+
+ /* ECONNABORTED 53 / * Software caused connection abort
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
+ gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
+
+ /* ECONNRESET 54 / * Connection reset by peer */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
+ gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
+
+ /* ENOBUFS 55 / * No buffer space available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
+ gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
+
+ /* EISCONN 56 / * Socket is already connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
+ gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
+
+ /* ENOTCONN 57 / * Socket is not connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
+ gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
+
+ /* ESHUTDOWN 58 / * Can't send after socket shutdown
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
+ gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
+
+ /* ETOOMANYREFS 59 / * Too many references: can't
+ * splice */
+ gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
+ gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
+
+ /* ETIMEDOUT 60 / * Operation timed out */
+ gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
+ gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
+
+ /* ECONNREFUSED 61 / * Connection refused */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
+ gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
+
+ /* ELOOP 62 / * Too many levels of symbolic
+ * links */
+ gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
+ gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
+
+ /* ENAMETOOLONG 63 / * File name too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
+ gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
+
+ /* EHOSTDOWN 64 / * Host is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
+ gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
+
+ /* EHOSTUNREACH 65 / * No route to host */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
+ gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
+
+ /* ENOTEMPTY 66 / * Directory not empty */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
+ gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
+
+ /* EPROCLIM 67 / * Too many processes */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROCLIM] = EPROCLIM;
+ gf_errno_to_error_array[EPROCLIM] = GF_ERROR_CODE_PROCLIM;
+
+ /* EUSERS 68 / * Too many users */
+ gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
+ gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
+
+ /* EDQUOT 69 / * Disc quota exceeded */
+ gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
+ gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
+
+ /* ESTALE 70 / * Stale NFS file handle */
+ gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
+ gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
+
+ /* EREMOTE 71 / * Too many levels of remote in
+ * path */
+ gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
+ gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
+
+ /* EBADRPC 72 / * RPC struct is bad */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADRPC] = EBADRPC;
+ gf_errno_to_error_array[EBADRPC] = GF_ERROR_CODE_BADRPC;
+
+ /* ERPCMISMATCH 73 / * RPC version wrong */
+ gf_error_to_errno_array[GF_ERROR_CODE_RPCMISMATCH] = ERPCMISMATCH;
+ gf_errno_to_error_array[ERPCMISMATCH] = GF_ERROR_CODE_RPCMISMATCH;
+
+ /* EPROGUNAVAIL 74 / * RPC prog. not avail */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROGUNAVAIL] = EPROGUNAVAIL;
+ gf_errno_to_error_array[EPROGUNAVAIL] = GF_ERROR_CODE_PROGUNAVAIL;
+
+ /* EPROGMISMATCH 75 / * Program version wrong */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROGMISMATCH] = EPROGMISMATCH;
+ gf_errno_to_error_array[EPROGMISMATCH] = GF_ERROR_CODE_PROGMISMATCH;
+
+ /* EPROCUNAVAIL 76 / * Bad procedure for program */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROCUNAVAIL] = EPROCUNAVAIL;
+ gf_errno_to_error_array[EPROCUNAVAIL] = GF_ERROR_CODE_PROCUNAVAIL;
+
+ /* ENOLCK 77 / * No locks available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
+ gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
+
+ /* ENOSYS 78 / * Function not implemented */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
+ gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
+
+ /* EFTYPE 79 / * Inappropriate file type or
+ * format */
+ gf_error_to_errno_array[GF_ERROR_CODE_FTYPE] = EFTYPE;
+ gf_errno_to_error_array[EFTYPE] = GF_ERROR_CODE_FTYPE;
+
+ /* EAUTH 80 / * Authentication error */
+ gf_error_to_errno_array[GF_ERROR_CODE_AUTH] = EAUTH;
+ gf_errno_to_error_array[EAUTH] = GF_ERROR_CODE_AUTH;
+
+ /* ENEEDAUTH 81 / * Need authenticator */
+ gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
+ gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
+ /* Intelligent device errors */
+ /* EPWROFF 82 / * Device power is off */
+ gf_error_to_errno_array[GF_ERROR_CODE_PWROFF] = EPWROFF;
+ gf_errno_to_error_array[EPWROFF] = GF_ERROR_CODE_PWROFF;
+ /* EDEVERR 83 / * Device error, e.g. paper out */
+ gf_error_to_errno_array[GF_ERROR_CODE_DEVERR] = EDEVERR;
+ gf_errno_to_error_array[EDEVERR] = GF_ERROR_CODE_DEVERR;
+
+ /* EOVERFLOW 84 / * Value too large to be stored in
+ * data type */
+ gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
+ gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
+
+ /* Program loading errors */
+ /* EBADEXEC 85 / * Bad executable */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADEXEC] = EBADEXEC;
+ gf_errno_to_error_array[EBADEXEC] = GF_ERROR_CODE_BADEXEC;
+
+ /* EBADARCH 86 / * Bad CPU type in executable */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADARCH] = EBADARCH;
+ gf_errno_to_error_array[EBADARCH] = GF_ERROR_CODE_BADARCH;
+
+ /* ESHLIBVERS 87 / * Shared library version mismatch */
+ gf_error_to_errno_array[GF_ERROR_CODE_SHLIBVERS] = ESHLIBVERS;
+ gf_errno_to_error_array[ESHLIBVERS] = GF_ERROR_CODE_SHLIBVERS;
+
+ /* EBADMACHO 88 / * Malformed Macho file */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADMACHO] = EBADMACHO;
+ gf_errno_to_error_array[EBADMACHO] = GF_ERROR_CODE_BADMACHO;
#ifdef EDOOFUS
- /* EDOOFUS 88 / * Programming error */
- gf_error_to_errno_array[GF_ERROR_CODE_DOOFUS] = EDOOFUS;
- gf_errno_to_error_array[EDOOFUS] = GF_ERROR_CODE_DOOFUS;
+ /* EDOOFUS 88 / * Programming error */
+ gf_error_to_errno_array[GF_ERROR_CODE_DOOFUS] = EDOOFUS;
+ gf_errno_to_error_array[EDOOFUS] = GF_ERROR_CODE_DOOFUS;
#endif
- /* ECANCELED 89 / * Operation canceled */
- gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
- gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
-
- /* EIDRM 90 / * Identifier removed */
- gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
- gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
- /* ENOMSG 91 / * No message of desired type */
- gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
- gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
-
- /* EILSEQ 92 / * Illegal byte sequence */
- gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
- gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
-
- /* ENOATTR 93 / * Attribute not found */
- gf_error_to_errno_array[GF_ERROR_CODE_NOATTR] = ENOATTR;
- gf_errno_to_error_array[ENOATTR] = GF_ERROR_CODE_NOATTR;
-
- /* EBADMSG 94 / * Bad message */
- gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
- gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
-
- /* EMULTIHOP 95 / * Reserved */
- gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
- gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
-
- /* ENODATA 96 / * No message available on STREAM */
- gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
- gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
-
- /* ENOLINK 97 / * Reserved */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
- gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
-
- /* ENOSR 98 / * No STREAM resources */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSR] = ENOSR;
- gf_errno_to_error_array[ENOSR] = GF_ERROR_CODE_NOSR;
-
- /* ENOSTR 99 / * Not a STREAM */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSTR] = ENOSTR;
- gf_errno_to_error_array[ENOSTR] = GF_ERROR_CODE_NOSTR;
-
-/* EPROTO 100 / * Protocol error */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
- gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
-/* ETIME 101 / * STREAM ioctl timeout */
- gf_error_to_errno_array[GF_ERROR_CODE_TIME] = ETIME;
- gf_errno_to_error_array[ETIME] = GF_ERROR_CODE_TIME;
-
-/* This value is only discrete when compiling __DARWIN_UNIX03, or KERNEL */
-/* EOPNOTSUPP 102 / * Operation not supported on socket */
- gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
- gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
-
-/* ENOPOLICY 103 / * No such policy registered */
- gf_error_to_errno_array[GF_ERROR_CODE_NOPOLICY] = ENOPOLICY;
- gf_errno_to_error_array[ENOPOLICY] = GF_ERROR_CODE_NOPOLICY;
-
- return ;
+ /* ECANCELED 89 / * Operation canceled */
+ gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
+ gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
+
+ /* EIDRM 90 / * Identifier removed */
+ gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
+ gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
+ /* ENOMSG 91 / * No message of desired type */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
+ gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
+
+ /* EILSEQ 92 / * Illegal byte sequence */
+ gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
+ gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
+
+ /* ENOATTR 93 / * Attribute not found */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOATTR] = ENOATTR;
+ gf_errno_to_error_array[ENOATTR] = GF_ERROR_CODE_NOATTR;
+
+ /* EBADMSG 94 / * Bad message */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
+ gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
+
+ /* EMULTIHOP 95 / * Reserved */
+ gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
+ gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
+
+ /* ENODATA 96 / * No message available on STREAM
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
+ gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
+
+ /* ENOLINK 97 / * Reserved */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
+ gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
+
+ /* ENOSR 98 / * No STREAM resources */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSR] = ENOSR;
+ gf_errno_to_error_array[ENOSR] = GF_ERROR_CODE_NOSR;
+
+ /* ENOSTR 99 / * Not a STREAM */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSTR] = ENOSTR;
+ gf_errno_to_error_array[ENOSTR] = GF_ERROR_CODE_NOSTR;
+
+ /* EPROTO 100 / * Protocol error */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
+ gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
+ /* ETIME 101 / * STREAM ioctl timeout */
+ gf_error_to_errno_array[GF_ERROR_CODE_TIME] = ETIME;
+ gf_errno_to_error_array[ETIME] = GF_ERROR_CODE_TIME;
+
+ /* This value is only discrete when compiling __DARWIN_UNIX03, or KERNEL */
+ /* EOPNOTSUPP 102 / * Operation not supported on
+ * socket */
+ gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
+ gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
+
+ /* ENOPOLICY 103 / * No such policy registered */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOPOLICY] = ENOPOLICY;
+ gf_errno_to_error_array[ENOPOLICY] = GF_ERROR_CODE_NOPOLICY;
+
+ return;
}
#endif /* GF_DARWIN_HOST_OS */
#ifdef GF_BSD_HOST_OS
static void
-init_compat_errno_arrays ()
+init_compat_errno_arrays()
{
- /* Quite a bit of things changed in FreeBSD - current */
-
- /* EAGAIN 35 / * Try Again */
- gf_error_to_errno_array[GF_ERROR_CODE_AGAIN] = EAGAIN;
- gf_errno_to_error_array[EAGAIN] = GF_ERROR_CODE_AGAIN;
-
- /* EDEADLK 11 / * Resource deadlock would occur */
- gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
- gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
-
- /* EINPROGRESS 36 / * Operation now in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
- gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
-
- /* EALREADY 37 / * Operation already in progress */
- gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
- gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
-
- /* ENOTSOCK 38 / * Socket operation on non-socket */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
- gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
-
- /* EDESTADDRREQ 39 / * Destination address required */
- gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
- gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
-
- /* EMSGSIZE 40 / * Message too long */
- gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
- gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
-
- /* EPROTOTYPE 41 / * Protocol wrong type for socket */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
- gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
-
- /* ENOPROTOOPT 42 / * Protocol not available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
- gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
-
- /* EPROTONOSUPPORT 43 / * Protocol not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
- gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
-
- /* ESOCKTNOSUPPORT 44 / * Socket type not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
- gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
-
- /* EOPNOTSUPP 45 / * Operation not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
- gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
-
- /* EPFNOSUPPORT 46 / * Protocol family not supported */
- gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
- gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
+ /* Quite a bit of things changed in FreeBSD - current */
+
+ /* EAGAIN 35 / * Try Again */
+ gf_error_to_errno_array[GF_ERROR_CODE_AGAIN] = EAGAIN;
+ gf_errno_to_error_array[EAGAIN] = GF_ERROR_CODE_AGAIN;
+
+ /* EDEADLK 11 / * Resource deadlock would occur */
+ gf_error_to_errno_array[GF_ERROR_CODE_DEADLK] = EDEADLK;
+ gf_errno_to_error_array[EDEADLK] = GF_ERROR_CODE_DEADLK;
+
+ /* EINPROGRESS 36 / * Operation now in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_INPROGRESS] = EINPROGRESS;
+ gf_errno_to_error_array[EINPROGRESS] = GF_ERROR_CODE_INPROGRESS;
+
+ /* EALREADY 37 / * Operation already in progress */
+ gf_error_to_errno_array[GF_ERROR_CODE_ALREADY] = EALREADY;
+ gf_errno_to_error_array[EALREADY] = GF_ERROR_CODE_ALREADY;
+
+ /* ENOTSOCK 38 / * Socket operation on non-socket
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTSOCK] = ENOTSOCK;
+ gf_errno_to_error_array[ENOTSOCK] = GF_ERROR_CODE_NOTSOCK;
+
+ /* EDESTADDRREQ 39 / * Destination address required */
+ gf_error_to_errno_array[GF_ERROR_CODE_DESTADDRREQ] = EDESTADDRREQ;
+ gf_errno_to_error_array[EDESTADDRREQ] = GF_ERROR_CODE_DESTADDRREQ;
+
+ /* EMSGSIZE 40 / * Message too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_MSGSIZE] = EMSGSIZE;
+ gf_errno_to_error_array[EMSGSIZE] = GF_ERROR_CODE_MSGSIZE;
+
+ /* EPROTOTYPE 41 / * Protocol wrong type for socket
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTOTYPE] = EPROTOTYPE;
+ gf_errno_to_error_array[EPROTOTYPE] = GF_ERROR_CODE_PROTOTYPE;
+
+ /* ENOPROTOOPT 42 / * Protocol not available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOPROTOOPT] = ENOPROTOOPT;
+ gf_errno_to_error_array[ENOPROTOOPT] = GF_ERROR_CODE_NOPROTOOPT;
+
+ /* EPROTONOSUPPORT 43 / * Protocol not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTONOSUPPORT] = EPROTONOSUPPORT;
+ gf_errno_to_error_array[EPROTONOSUPPORT] = GF_ERROR_CODE_PROTONOSUPPORT;
+
+ /* ESOCKTNOSUPPORT 44 / * Socket type not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_SOCKTNOSUPPORT] = ESOCKTNOSUPPORT;
+ gf_errno_to_error_array[ESOCKTNOSUPPORT] = GF_ERROR_CODE_SOCKTNOSUPPORT;
+
+ /* EOPNOTSUPP 45 / * Operation not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_OPNOTSUPP] = EOPNOTSUPP;
+ gf_errno_to_error_array[EOPNOTSUPP] = GF_ERROR_CODE_OPNOTSUPP;
+
+ /* EPFNOSUPPORT 46 / * Protocol family not supported */
+ gf_error_to_errno_array[GF_ERROR_CODE_PFNOSUPPORT] = EPFNOSUPPORT;
+ gf_errno_to_error_array[EPFNOSUPPORT] = GF_ERROR_CODE_PFNOSUPPORT;
+
+ /* EAFNOSUPPORT 47 / * Address family not supported by
+ * protocol family */
+ gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
+ gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
+
+ /* EADDRINUSE 48 / * Address already in use */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
+ gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
+
+ /* EADDRNOTAVAIL 49 / * Can't assign requested address
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
+ gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
+
+ /* ENETDOWN 50 / * Network is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
+ gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
+
+ /* ENETUNREACH 51 / * Network is unreachable */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
+ gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
+
+ /* ENETRESET 52 / * Network dropped connection on
+ * reset */
+ gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
+ gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
+
+ /* ECONNABORTED 53 / * Software caused connection abort
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
+ gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
+
+ /* ECONNRESET 54 / * Connection reset by peer */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
+ gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
+
+ /* ENOBUFS 55 / * No buffer space available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
+ gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
+
+ /* EISCONN 56 / * Socket is already connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
+ gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
+
+ /* ENOTCONN 57 / * Socket is not connected */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
+ gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
+
+ /* ESHUTDOWN 58 / * Can't send after socket shutdown
+ */
+ gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
+ gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
+
+ /* ETOOMANYREFS 59 / * Too many references: can't
+ * splice */
+ gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
+ gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
+
+ /* ETIMEDOUT 60 / * Operation timed out */
+ gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
+ gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
+
+ /* ECONNREFUSED 61 / * Connection refused */
+ gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
+ gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
+
+ /* ELOOP 62 / * Too many levels of symbolic
+ * links */
+ gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
+ gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
+
+ /* ENAMETOOLONG 63 / * File name too long */
+ gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
+ gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
+
+ /* EHOSTDOWN 64 / * Host is down */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
+ gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
+
+ /* EHOSTUNREACH 65 / * No route to host */
+ gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
+ gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
+
+ /* ENOTEMPTY 66 / * Directory not empty */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
+ gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
+
+ /* EPROCLIM 67 / * Too many processes */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROCLIM] = EPROCLIM;
+ gf_errno_to_error_array[EPROCLIM] = GF_ERROR_CODE_PROCLIM;
+
+ /* EUSERS 68 / * Too many users */
+ gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
+ gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
+
+ /* EDQUOT 69 / * Disc quota exceeded */
+ gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
+ gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
+
+ /* ESTALE 70 / * Stale NFS file handle */
+ gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
+ gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
+
+ /* EREMOTE 71 / * Too many levels of remote in
+ * path */
+ gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
+ gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
+
+ /* EBADRPC 72 / * RPC struct is bad */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADRPC] = EBADRPC;
+ gf_errno_to_error_array[EBADRPC] = GF_ERROR_CODE_BADRPC;
+
+ /* ERPCMISMATCH 73 / * RPC version wrong */
+ gf_error_to_errno_array[GF_ERROR_CODE_RPCMISMATCH] = ERPCMISMATCH;
+ gf_errno_to_error_array[ERPCMISMATCH] = GF_ERROR_CODE_RPCMISMATCH;
+
+ /* EPROGUNAVAIL 74 / * RPC prog. not avail */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROGUNAVAIL] = EPROGUNAVAIL;
+ gf_errno_to_error_array[EPROGUNAVAIL] = GF_ERROR_CODE_PROGUNAVAIL;
+
+ /* EPROGMISMATCH 75 / * Program version wrong */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROGMISMATCH] = EPROGMISMATCH;
+ gf_errno_to_error_array[EPROGMISMATCH] = GF_ERROR_CODE_PROGMISMATCH;
+
+ /* EPROCUNAVAIL 76 / * Bad procedure for program */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROCUNAVAIL] = EPROCUNAVAIL;
+ gf_errno_to_error_array[EPROCUNAVAIL] = GF_ERROR_CODE_PROCUNAVAIL;
+
+ /* ENOLCK 77 / * No locks available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
+ gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
- /* EAFNOSUPPORT 47 / * Address family not supported by protocol family */
- gf_error_to_errno_array[GF_ERROR_CODE_AFNOSUPPORT] = EAFNOSUPPORT;
- gf_errno_to_error_array[EAFNOSUPPORT] = GF_ERROR_CODE_AFNOSUPPORT;
+ /* ENOSYS 78 / * Function not implemented */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
+ gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
- /* EADDRINUSE 48 / * Address already in use */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRINUSE] = EADDRINUSE;
- gf_errno_to_error_array[EADDRINUSE] = GF_ERROR_CODE_ADDRINUSE;
+ /* EFTYPE 79 / * Inappropriate file type or
+ * format */
+ gf_error_to_errno_array[GF_ERROR_CODE_FTYPE] = EFTYPE;
+ gf_errno_to_error_array[EFTYPE] = GF_ERROR_CODE_FTYPE;
+
+ /* EAUTH 80 / * Authentication error */
+ gf_error_to_errno_array[GF_ERROR_CODE_AUTH] = EAUTH;
+ gf_errno_to_error_array[EAUTH] = GF_ERROR_CODE_AUTH;
+
+ /* ENEEDAUTH 81 / * Need authenticator */
+ gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
+ gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
+
+ /* EIDRM 82 / * Identifier removed */
+ gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
+ gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
- /* EADDRNOTAVAIL 49 / * Can't assign requested address */
- gf_error_to_errno_array[GF_ERROR_CODE_ADDRNOTAVAIL] = EADDRNOTAVAIL;
- gf_errno_to_error_array[EADDRNOTAVAIL] = GF_ERROR_CODE_ADDRNOTAVAIL;
+ /* ENOMSG 83 / * No message of desired type */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
+ gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
- /* ENETDOWN 50 / * Network is down */
- gf_error_to_errno_array[GF_ERROR_CODE_NETDOWN] = ENETDOWN;
- gf_errno_to_error_array[ENETDOWN] = GF_ERROR_CODE_NETDOWN;
+ /* EOVERFLOW 84 / * Value too large to be stored in
+ * data type */
+ gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
+ gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
- /* ENETUNREACH 51 / * Network is unreachable */
- gf_error_to_errno_array[GF_ERROR_CODE_NETUNREACH] = ENETUNREACH;
- gf_errno_to_error_array[ENETUNREACH] = GF_ERROR_CODE_NETUNREACH;
+ /* ECANCELED 85 / * Operation canceled */
+ gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
+ gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
- /* ENETRESET 52 / * Network dropped connection on reset */
- gf_error_to_errno_array[GF_ERROR_CODE_NETRESET] = ENETRESET;
- gf_errno_to_error_array[ENETRESET] = GF_ERROR_CODE_NETRESET;
+ /* EILSEQ 86 / * Illegal byte sequence */
+ gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
+ gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
- /* ECONNABORTED 53 / * Software caused connection abort */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNABORTED] = ECONNABORTED;
- gf_errno_to_error_array[ECONNABORTED] = GF_ERROR_CODE_CONNABORTED;
-
- /* ECONNRESET 54 / * Connection reset by peer */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNRESET] = ECONNRESET;
- gf_errno_to_error_array[ECONNRESET] = GF_ERROR_CODE_CONNRESET;
-
- /* ENOBUFS 55 / * No buffer space available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOBUFS] = ENOBUFS;
- gf_errno_to_error_array[ENOBUFS] = GF_ERROR_CODE_NOBUFS;
-
- /* EISCONN 56 / * Socket is already connected */
- gf_error_to_errno_array[GF_ERROR_CODE_ISCONN] = EISCONN;
- gf_errno_to_error_array[EISCONN] = GF_ERROR_CODE_ISCONN;
-
- /* ENOTCONN 57 / * Socket is not connected */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTCONN] = ENOTCONN;
- gf_errno_to_error_array[ENOTCONN] = GF_ERROR_CODE_NOTCONN;
-
- /* ESHUTDOWN 58 / * Can't send after socket shutdown */
- gf_error_to_errno_array[GF_ERROR_CODE_SHUTDOWN] = ESHUTDOWN;
- gf_errno_to_error_array[ESHUTDOWN] = GF_ERROR_CODE_SHUTDOWN;
-
- /* ETOOMANYREFS 59 / * Too many references: can't splice */
- gf_error_to_errno_array[GF_ERROR_CODE_TOOMANYREFS] = ETOOMANYREFS;
- gf_errno_to_error_array[ETOOMANYREFS] = GF_ERROR_CODE_TOOMANYREFS;
-
- /* ETIMEDOUT 60 / * Operation timed out */
- gf_error_to_errno_array[GF_ERROR_CODE_TIMEDOUT] = ETIMEDOUT;
- gf_errno_to_error_array[ETIMEDOUT] = GF_ERROR_CODE_TIMEDOUT;
-
- /* ECONNREFUSED 61 / * Connection refused */
- gf_error_to_errno_array[GF_ERROR_CODE_CONNREFUSED] = ECONNREFUSED;
- gf_errno_to_error_array[ECONNREFUSED] = GF_ERROR_CODE_CONNREFUSED;
-
- /* ELOOP 62 / * Too many levels of symbolic links */
- gf_error_to_errno_array[GF_ERROR_CODE_LOOP] = ELOOP;
- gf_errno_to_error_array[ELOOP] = GF_ERROR_CODE_LOOP;
-
- /* ENAMETOOLONG 63 / * File name too long */
- gf_error_to_errno_array[GF_ERROR_CODE_NAMETOOLONG] = ENAMETOOLONG;
- gf_errno_to_error_array[ENAMETOOLONG] = GF_ERROR_CODE_NAMETOOLONG;
-
- /* EHOSTDOWN 64 / * Host is down */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTDOWN] = EHOSTDOWN;
- gf_errno_to_error_array[EHOSTDOWN] = GF_ERROR_CODE_HOSTDOWN;
-
- /* EHOSTUNREACH 65 / * No route to host */
- gf_error_to_errno_array[GF_ERROR_CODE_HOSTUNREACH] = EHOSTUNREACH;
- gf_errno_to_error_array[EHOSTUNREACH] = GF_ERROR_CODE_HOSTUNREACH;
-
- /* ENOTEMPTY 66 / * Directory not empty */
- gf_error_to_errno_array[GF_ERROR_CODE_NOTEMPTY] = ENOTEMPTY;
- gf_errno_to_error_array[ENOTEMPTY] = GF_ERROR_CODE_NOTEMPTY;
-
- /* EPROCLIM 67 / * Too many processes */
- gf_error_to_errno_array[GF_ERROR_CODE_PROCLIM] = EPROCLIM;
- gf_errno_to_error_array[EPROCLIM] = GF_ERROR_CODE_PROCLIM;
-
- /* EUSERS 68 / * Too many users */
- gf_error_to_errno_array[GF_ERROR_CODE_USERS] = EUSERS;
- gf_errno_to_error_array[EUSERS] = GF_ERROR_CODE_USERS;
-
- /* EDQUOT 69 / * Disc quota exceeded */
- gf_error_to_errno_array[GF_ERROR_CODE_DQUOT] = EDQUOT;
- gf_errno_to_error_array[EDQUOT] = GF_ERROR_CODE_DQUOT;
-
- /* ESTALE 70 / * Stale NFS file handle */
- gf_error_to_errno_array[GF_ERROR_CODE_STALE] = ESTALE;
- gf_errno_to_error_array[ESTALE] = GF_ERROR_CODE_STALE;
-
- /* EREMOTE 71 / * Too many levels of remote in path */
- gf_error_to_errno_array[GF_ERROR_CODE_REMOTE] = EREMOTE;
- gf_errno_to_error_array[EREMOTE] = GF_ERROR_CODE_REMOTE;
-
- /* EBADRPC 72 / * RPC struct is bad */
- gf_error_to_errno_array[GF_ERROR_CODE_BADRPC] = EBADRPC;
- gf_errno_to_error_array[EBADRPC] = GF_ERROR_CODE_BADRPC;
-
- /* ERPCMISMATCH 73 / * RPC version wrong */
- gf_error_to_errno_array[GF_ERROR_CODE_RPCMISMATCH] = ERPCMISMATCH;
- gf_errno_to_error_array[ERPCMISMATCH] = GF_ERROR_CODE_RPCMISMATCH;
-
- /* EPROGUNAVAIL 74 / * RPC prog. not avail */
- gf_error_to_errno_array[GF_ERROR_CODE_PROGUNAVAIL] = EPROGUNAVAIL;
- gf_errno_to_error_array[EPROGUNAVAIL] = GF_ERROR_CODE_PROGUNAVAIL;
-
- /* EPROGMISMATCH 75 / * Program version wrong */
- gf_error_to_errno_array[GF_ERROR_CODE_PROGMISMATCH] = EPROGMISMATCH;
- gf_errno_to_error_array[EPROGMISMATCH] = GF_ERROR_CODE_PROGMISMATCH;
-
- /* EPROCUNAVAIL 76 / * Bad procedure for program */
- gf_error_to_errno_array[GF_ERROR_CODE_PROCUNAVAIL] = EPROCUNAVAIL;
- gf_errno_to_error_array[EPROCUNAVAIL] = GF_ERROR_CODE_PROCUNAVAIL;
-
- /* ENOLCK 77 / * No locks available */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLCK] = ENOLCK;
- gf_errno_to_error_array[ENOLCK] = GF_ERROR_CODE_NOLCK;
-
- /* ENOSYS 78 / * Function not implemented */
- gf_error_to_errno_array[GF_ERROR_CODE_NOSYS] = ENOSYS;
- gf_errno_to_error_array[ENOSYS] = GF_ERROR_CODE_NOSYS;
-
- /* EFTYPE 79 / * Inappropriate file type or format */
- gf_error_to_errno_array[GF_ERROR_CODE_FTYPE] = EFTYPE;
- gf_errno_to_error_array[EFTYPE] = GF_ERROR_CODE_FTYPE;
-
- /* EAUTH 80 / * Authentication error */
- gf_error_to_errno_array[GF_ERROR_CODE_AUTH] = EAUTH;
- gf_errno_to_error_array[EAUTH] = GF_ERROR_CODE_AUTH;
-
- /* ENEEDAUTH 81 / * Need authenticator */
- gf_error_to_errno_array[GF_ERROR_CODE_NEEDAUTH] = ENEEDAUTH;
- gf_errno_to_error_array[ENEEDAUTH] = GF_ERROR_CODE_NEEDAUTH;
-
- /* EIDRM 82 / * Identifier removed */
- gf_error_to_errno_array[GF_ERROR_CODE_IDRM] = EIDRM;
- gf_errno_to_error_array[EIDRM] = GF_ERROR_CODE_IDRM;
-
- /* ENOMSG 83 / * No message of desired type */
- gf_error_to_errno_array[GF_ERROR_CODE_NOMSG] = ENOMSG;
- gf_errno_to_error_array[ENOMSG] = GF_ERROR_CODE_NOMSG;
-
- /* EOVERFLOW 84 / * Value too large to be stored in data type */
- gf_error_to_errno_array[GF_ERROR_CODE_OVERFLOW] = EOVERFLOW;
- gf_errno_to_error_array[EOVERFLOW] = GF_ERROR_CODE_OVERFLOW;
-
- /* ECANCELED 85 / * Operation canceled */
- gf_error_to_errno_array[GF_ERROR_CODE_CANCELED] = ECANCELED;
- gf_errno_to_error_array[ECANCELED] = GF_ERROR_CODE_CANCELED;
-
- /* EILSEQ 86 / * Illegal byte sequence */
- gf_error_to_errno_array[GF_ERROR_CODE_ILSEQ] = EILSEQ;
- gf_errno_to_error_array[EILSEQ] = GF_ERROR_CODE_ILSEQ;
-
- /* ENOATTR 87 / * Attribute not found */
- gf_error_to_errno_array[GF_ERROR_CODE_NOATTR] = ENOATTR;
- gf_errno_to_error_array[ENOATTR] = GF_ERROR_CODE_NOATTR;
+ /* ENOATTR 87 / * Attribute not found */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOATTR] = ENOATTR;
+ gf_errno_to_error_array[ENOATTR] = GF_ERROR_CODE_NOATTR;
#ifdef EDOOFUS
- /* EDOOFUS 88 / * Programming error */
- gf_error_to_errno_array[GF_ERROR_CODE_DOOFUS] = EDOOFUS;
- gf_errno_to_error_array[EDOOFUS] = GF_ERROR_CODE_DOOFUS;
+ /* EDOOFUS 88 / * Programming error */
+ gf_error_to_errno_array[GF_ERROR_CODE_DOOFUS] = EDOOFUS;
+ gf_errno_to_error_array[EDOOFUS] = GF_ERROR_CODE_DOOFUS;
#endif
- /* EBADMSG 89 / * Bad message */
- gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
- gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
+ /* EBADMSG 89 / * Bad message */
+ gf_error_to_errno_array[GF_ERROR_CODE_BADMSG] = EBADMSG;
+ gf_errno_to_error_array[EBADMSG] = GF_ERROR_CODE_BADMSG;
- /* EMULTIHOP 90 / * Multihop attempted */
- gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
- gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
+#ifdef __NetBSD__
+ /* ENODATA 89 / * No message available */
+ gf_error_to_errno_array[GF_ERROR_CODE_NODATA] = ENODATA;
+ gf_errno_to_error_array[ENODATA] = GF_ERROR_CODE_NODATA;
+#endif
- /* ENOLINK 91 / * Link has been severed */
- gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
- gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
+ /* EMULTIHOP 90 / * Multihop attempted */
+ gf_error_to_errno_array[GF_ERROR_CODE_MULTIHOP] = EMULTIHOP;
+ gf_errno_to_error_array[EMULTIHOP] = GF_ERROR_CODE_MULTIHOP;
- /* EPROTO 92 / * Protocol error */
- gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
- gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
+ /* ENOLINK 91 / * Link has been severed */
+ gf_error_to_errno_array[GF_ERROR_CODE_NOLINK] = ENOLINK;
+ gf_errno_to_error_array[ENOLINK] = GF_ERROR_CODE_NOLINK;
+ /* EPROTO 92 / * Protocol error */
+ gf_error_to_errno_array[GF_ERROR_CODE_PROTO] = EPROTO;
+ gf_errno_to_error_array[EPROTO] = GF_ERROR_CODE_PROTO;
- return ;
+ return;
}
#endif /* GF_BSD_HOST_OS */
#ifdef GF_LINUX_HOST_OS
static void
-init_compat_errno_arrays ()
+init_compat_errno_arrays()
{
- /* Things are fine. Everything should work seemlessly on GNU/Linux machines */
- return ;
+ /* Things are fine. Everything should work seemlessly on GNU/Linux machines
+ */
+ return;
}
#endif /* GF_LINUX_HOST_OS */
-
static void
-init_errno_arrays ()
+init_errno_arrays()
{
- int i;
- for (i=0; i < GF_ERROR_CODE_UNKNOWN; i++) {
- gf_errno_to_error_array[i] = i;
- gf_error_to_errno_array[i] = i;
- }
- /* Now change the order if it needs to be. */
- init_compat_errno_arrays();
-
- return;
+ int i;
+ for (i = 0; i < GF_ERROR_CODE_UNKNOWN; i++) {
+ gf_errno_to_error_array[i] = i;
+ gf_error_to_errno_array[i] = i;
+ }
+ /* Now change the order if it needs to be. */
+ init_compat_errno_arrays();
+
+ return;
}
int32_t
-gf_errno_to_error (int32_t op_errno)
+gf_errno_to_error(int32_t op_errno)
{
- if (!gf_compat_errno_init_done) {
- init_errno_arrays ();
- gf_compat_errno_init_done = 1;
- }
+ if (!gf_compat_errno_init_done) {
+ init_errno_arrays();
+ gf_compat_errno_init_done = 1;
+ }
- if ((op_errno > GF_ERROR_CODE_SUCCESS) && (op_errno < GF_ERROR_CODE_UNKNOWN))
- return gf_errno_to_error_array[op_errno];
+ if ((op_errno > GF_ERROR_CODE_SUCCESS) &&
+ (op_errno < GF_ERROR_CODE_UNKNOWN))
+ return gf_errno_to_error_array[op_errno];
- return op_errno;
+ return op_errno;
}
-
int32_t
-gf_error_to_errno (int32_t error)
+gf_error_to_errno(int32_t error)
{
- if (!gf_compat_errno_init_done) {
- init_errno_arrays ();
- gf_compat_errno_init_done = 1;
- }
+ if (!gf_compat_errno_init_done) {
+ init_errno_arrays();
+ gf_compat_errno_init_done = 1;
+ }
- if ((error > GF_ERROR_CODE_SUCCESS) && (error < GF_ERROR_CODE_UNKNOWN))
- return gf_error_to_errno_array[error];
+ if ((error > GF_ERROR_CODE_SUCCESS) && (error < GF_ERROR_CODE_UNKNOWN))
+ return gf_error_to_errno_array[error];
- return error;
+ return error;
}
diff --git a/libglusterfs/src/compat-errno.h b/libglusterfs/src/compat-errno.h
deleted file mode 100644
index 65e52081dd3..00000000000
--- a/libglusterfs/src/compat-errno.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __COMPAT_ERRNO_H__
-#define __COMPAT_ERRNO_H__
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <errno.h>
-
-#define GF_ERROR_CODE_SUCCESS 0
-#define GF_ERROR_CODE_UNKNOWN 1024
-#define GF_ERRNO_UNKNOWN 1024
-
-#define GF_ERROR_CODE_PERM 1 /* Operation not permitted */
-#define GF_ERROR_CODE_NOENT 2 /* No such file or directory */
-#define GF_ERROR_CODE_SRCH 3 /* No such process */
-#define GF_ERROR_CODE_INTR 4 /* Interrupted system call */
-#define GF_ERROR_CODE_IO 5 /* I/O error */
-#define GF_ERROR_CODE_NXIO 6 /* No such device or address */
-#define GF_ERROR_CODE_2BIG 7 /* Argument list too long */
-#define GF_ERROR_CODE_NOEXEC 8 /* Exec format error */
-#define GF_ERROR_CODE_BADF 9 /* Bad file number */
-#define GF_ERROR_CODE_CHILD 10 /* No child processes */
-#define GF_ERROR_CODE_AGAIN 11 /* Try again */
-#define GF_ERROR_CODE_NOMEM 12 /* Out of memory */
-#define GF_ERROR_CODE_ACCES 13 /* Permission denied */
-#define GF_ERROR_CODE_FAULT 14 /* Bad address */
-#define GF_ERROR_CODE_NOTBLK 15 /* Block device required */
-#define GF_ERROR_CODE_BUSY 16 /* Device or resource busy */
-#define GF_ERROR_CODE_EXIST 17 /* File exists */
-#define GF_ERROR_CODE_XDEV 18 /* Cross-device link */
-#define GF_ERROR_CODE_NODEV 19 /* No such device */
-#define GF_ERROR_CODE_NOTDIR 20 /* Not a directory */
-#define GF_ERROR_CODE_ISDIR 21 /* Is a directory */
-#define GF_ERROR_CODE_INVAL 22 /* Invalid argument */
-#define GF_ERROR_CODE_NFILE 23 /* File table overflow */
-#define GF_ERROR_CODE_MFILE 24 /* Too many open files */
-#define GF_ERROR_CODE_NOTTY 25 /* Not a typewriter */
-#define GF_ERROR_CODE_TXTBSY 26 /* Text file busy */
-#define GF_ERROR_CODE_FBIG 27 /* File too large */
-#define GF_ERROR_CODE_NOSPC 28 /* No space left on device */
-#define GF_ERROR_CODE_SPIPE 29 /* Illegal seek */
-#define GF_ERROR_CODE_ROFS 30 /* Read-only file system */
-#define GF_ERROR_CODE_MLINK 31 /* Too many links */
-#define GF_ERROR_CODE_PIPE 32 /* Broken pipe */
-#define GF_ERROR_CODE_DOM 33 /* Math argument out of domain of func */
-#define GF_ERROR_CODE_RANGE 34 /* Math result not representable */
-#define GF_ERROR_CODE_DEADLK 35 /* Resource deadlock would occur */
-#define GF_ERROR_CODE_NAMETOOLONG 36 /* File name too long */
-#define GF_ERROR_CODE_NOLCK 37 /* No record locks available */
-#define GF_ERROR_CODE_NOSYS 38 /* Function not implemented */
-#define GF_ERROR_CODE_NOTEMPTY 39 /* Directory not empty */
-#define GF_ERROR_CODE_LOOP 40 /* Too many symbolic links encountered */
-
-#define GF_ERROR_CODE_NOMSG 42 /* No message of desired type */
-#define GF_ERROR_CODE_IDRM 43 /* Identifier removed */
-#define GF_ERROR_CODE_CHRNG 44 /* Channel number out of range */
-#define GF_ERROR_CODE_L2NSYNC 45 /* Level 2 not synchronized */
-#define GF_ERROR_CODE_L3HLT 46 /* Level 3 halted */
-#define GF_ERROR_CODE_L3RST 47 /* Level 3 reset */
-#define GF_ERROR_CODE_LNRNG 48 /* Link number out of range */
-#define GF_ERROR_CODE_UNATCH 49 /* Protocol driver not attached */
-#define GF_ERROR_CODE_NOCSI 50 /* No CSI structure available */
-#define GF_ERROR_CODE_L2HLT 51 /* Level 2 halted */
-#define GF_ERROR_CODE_BADE 52 /* Invalid exchange */
-#define GF_ERROR_CODE_BADR 53 /* Invalid request descriptor */
-#define GF_ERROR_CODE_XFULL 54 /* Exchange full */
-#define GF_ERROR_CODE_NOANO 55 /* No anode */
-#define GF_ERROR_CODE_BADRQC 56 /* Invalid request code */
-#define GF_ERROR_CODE_BADSLT 57 /* Invalid slot */
-#define GF_ERROR_CODE_BFONT 59 /* Bad font file format */
-#define GF_ERROR_CODE_NOSTR 60 /* Device not a stream */
-#define GF_ERROR_CODE_NODATA 61 /* No data available */
-#define GF_ERROR_CODE_TIME 62 /* Timer expired */
-#define GF_ERROR_CODE_NOSR 63 /* Out of streams resources */
-#define GF_ERROR_CODE_NONET 64 /* Machine is not on the network */
-#define GF_ERROR_CODE_NOPKG 65 /* Package not installed */
-#define GF_ERROR_CODE_REMOTE 66 /* Object is remote */
-#define GF_ERROR_CODE_NOLINK 67 /* Link has been severed */
-#define GF_ERROR_CODE_ADV 68 /* Advertise error */
-#define GF_ERROR_CODE_SRMNT 69 /* Srmount error */
-#define GF_ERROR_CODE_COMM 70 /* Communication error on send */
-#define GF_ERROR_CODE_PROTO 71 /* Protocol error */
-#define GF_ERROR_CODE_MULTIHOP 72 /* Multihop attempted */
-#define GF_ERROR_CODE_DOTDOT 73 /* RFS specific error */
-#define GF_ERROR_CODE_BADMSG 74 /* Not a data message */
-#define GF_ERROR_CODE_OVERFLOW 75 /* Value too large for defined data type */
-#define GF_ERROR_CODE_NOTUNIQ 76 /* Name not unique on network */
-#define GF_ERROR_CODE_BADFD 77 /* File descriptor in bad state */
-#define GF_ERROR_CODE_REMCHG 78 /* Remote address changed */
-#define GF_ERROR_CODE_LIBACC 79 /* Can not access a needed shared library */
-#define GF_ERROR_CODE_LIBBAD 80 /* Accessing a corrupted shared library */
-#define GF_ERROR_CODE_LIBSCN 81 /* .lib section in a.out corrupted */
-#define GF_ERROR_CODE_LIBMAX 82 /* Attempting to link in too many shared libraries */
-#define GF_ERROR_CODE_LIBEXEC 83 /* Cannot exec a shared library directly */
-#define GF_ERROR_CODE_ILSEQ 84 /* Illegal byte sequence */
-#define GF_ERROR_CODE_RESTART 85 /* Interrupted system call should be restarted */
-#define GF_ERROR_CODE_STRPIPE 86 /* Streams pipe error */
-#define GF_ERROR_CODE_USERS 87 /* Too many users */
-#define GF_ERROR_CODE_NOTSOCK 88 /* Socket operation on non-socket */
-#define GF_ERROR_CODE_DESTADDRREQ 89 /* Destination address required */
-#define GF_ERROR_CODE_MSGSIZE 90 /* Message too long */
-#define GF_ERROR_CODE_PROTOTYPE 91 /* Protocol wrong type for socket */
-#define GF_ERROR_CODE_NOPROTOOPT 92 /* Protocol not available */
-#define GF_ERROR_CODE_PROTONOSUPPORT 93 /* Protocol not supported */
-#define GF_ERROR_CODE_SOCKTNOSUPPORT 94 /* Socket type not supported */
-#define GF_ERROR_CODE_OPNOTSUPP 95 /* Operation not supported on transport endpoint */
-#define GF_ERROR_CODE_PFNOSUPPORT 96 /* Protocol family not supported */
-#define GF_ERROR_CODE_AFNOSUPPORT 97 /* Address family not supported by protocol */
-#define GF_ERROR_CODE_ADDRINUSE 98 /* Address already in use */
-#define GF_ERROR_CODE_ADDRNOTAVAIL 99 /* Cannot assign requested address */
-#define GF_ERROR_CODE_NETDOWN 100 /* Network is down */
-#define GF_ERROR_CODE_NETUNREACH 101 /* Network is unreachable */
-#define GF_ERROR_CODE_NETRESET 102 /* Network dropped connection because of reset */
-#define GF_ERROR_CODE_CONNABORTED 103 /* Software caused connection abort */
-#define GF_ERROR_CODE_CONNRESET 104 /* Connection reset by peer */
-#define GF_ERROR_CODE_NOBUFS 105 /* No buffer space available */
-#define GF_ERROR_CODE_ISCONN 106 /* Transport endpoint is already connected */
-#define GF_ERROR_CODE_NOTCONN 107 /* Transport endpoint is not connected */
-#define GF_ERROR_CODE_SHUTDOWN 108 /* Cannot send after transport endpoint shutdown */
-#define GF_ERROR_CODE_TOOMANYREFS 109 /* Too many references: cannot splice */
-#define GF_ERROR_CODE_TIMEDOUT 110 /* Connection timed out */
-#define GF_ERROR_CODE_CONNREFUSED 111 /* Connection refused */
-#define GF_ERROR_CODE_HOSTDOWN 112 /* Host is down */
-#define GF_ERROR_CODE_HOSTUNREACH 113 /* No route to host */
-#define GF_ERROR_CODE_ALREADY 114 /* Operation already in progress */
-#define GF_ERROR_CODE_INPROGRESS 115 /* Operation now in progress */
-#define GF_ERROR_CODE_ALREADY 114 /* Operation already in progress */
-#define GF_ERROR_CODE_INPROGRESS 115 /* Operation now in progress */
-#define GF_ERROR_CODE_STALE 116 /* Stale NFS file handle */
-#define GF_ERROR_CODE_UCLEAN 117 /* Structure needs cleaning */
-#define GF_ERROR_CODE_NOTNAM 118 /* Not a XENIX named type file */
-#define GF_ERROR_CODE_NAVAIL 119 /* No XENIX semaphores available */
-#define GF_ERROR_CODE_ISNAM 120 /* Is a named type file */
-#define GF_ERROR_CODE_REMOTEIO 121 /* Remote I/O error */
-#define GF_ERROR_CODE_DQUOT 122 /* Quota exceeded */
-#define GF_ERROR_CODE_NOMEDIUM 123 /* No medium found */
-#define GF_ERROR_CODE_MEDIUMTYPE 124 /* Wrong medium type */
-#define GF_ERROR_CODE_CANCELED 125 /* Operation Canceled */
-#define GF_ERROR_CODE_NOKEY 126 /* Required key not available */
-#define GF_ERROR_CODE_KEYEXPIRED 127 /* Key has expired */
-#define GF_ERROR_CODE_KEYREVOKED 128 /* Key has been revoked */
-#define GF_ERROR_CODE_KEYREJECTED 129 /* Key was rejected by service */
-
-/* for robust mutexes */
-#define GF_ERROR_CODE_OWNERDEAD 130 /* Owner died */
-#define GF_ERROR_CODE_NOTRECOVERABLE 131 /* State not recoverable */
-
-
-
-/* Should never be seen by user programs */
-#define GF_ERROR_CODE_RESTARTSYS 512
-#define GF_ERROR_CODE_RESTARTNOINTR 513
-#define GF_ERROR_CODE_RESTARTNOHAND 514 /* restart if no handler.. */
-#define GF_ERROR_CODE_NOIOCTLCMD 515 /* No ioctl command */
-#define GF_ERROR_CODE_RESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */
-
-/* Defined for the NFSv3 protocol */
-#define GF_ERROR_CODE_BADHANDLE 521 /* Illegal NFS file handle */
-#define GF_ERROR_CODE_NOTSYNC 522 /* Update synchronization mismatch */
-#define GF_ERROR_CODE_BADCOOKIE 523 /* Cookie is stale */
-#define GF_ERROR_CODE_NOTSUPP 524 /* Operation is not supported */
-#define GF_ERROR_CODE_TOOSMALL 525 /* Buffer or request is too small */
-#define GF_ERROR_CODE_SERVERFAULT 526 /* An untranslatable error occurred */
-#define GF_ERROR_CODE_BADTYPE 527 /* Type not supported by server */
-#define GF_ERROR_CODE_JUKEBOX 528 /* Request initiated, but will not complete before timeout */
-#define GF_ERROR_CODE_IOCBQUEUED 529 /* iocb queued, will get completion event */
-#define GF_ERROR_CODE_IOCBRETRY 530 /* iocb queued, will trigger a retry */
-
-/* Darwin OS X */
-#define GF_ERROR_CODE_NOPOLICY 701
-#define GF_ERROR_CODE_BADMACHO 702
-#define GF_ERROR_CODE_PWROFF 703
-#define GF_ERROR_CODE_DEVERR 704
-#define GF_ERROR_CODE_BADARCH 705
-#define GF_ERROR_CODE_BADEXEC 706
-#define GF_ERROR_CODE_SHLIBVERS 707
-
-
-
-/* Solaris */
-/* ENOTACTIVE 73 / * Facility is not active */
-#define GF_ERROR_CODE_NOTACTIVE 801
-/* ELOCKUNMAPPED 72 / * locked lock was unmapped */
-#define GF_ERROR_CODE_LOCKUNMAPPED 802
-
-/* BSD system */
-#define GF_ERROR_CODE_PROCLIM 901 /* Too many processes */
-#define GF_ERROR_CODE_BADRPC 902 /* RPC struct is bad */
-#define GF_ERROR_CODE_RPCMISMATCH 903 /* RPC version wrong */
-#define GF_ERROR_CODE_PROGUNAVAIL 904 /* RPC prog. not avail */
-#define GF_ERROR_CODE_PROGMISMATCH 905 /* Program version wrong */
-#define GF_ERROR_CODE_PROCUNAVAIL 905 /* Bad procedure for program */
-#define GF_ERROR_CODE_FTYPE 906 /* Inappropriate file type or format */
-#define GF_ERROR_CODE_AUTH 907 /* Authentication error */
-#define GF_ERROR_CODE_NEEDAUTH 908 /* Need authenticator */
-#define GF_ERROR_CODE_DOOFUS 909 /* Programming error */
-
-#define GF_ERROR_CODE_NOATTR GF_ERROR_CODE_NODATA /* Attribute not found */
-
-/* Either one of enodata or enoattr will be there in system */
-#ifndef ENOATTR
-#define ENOATTR ENODATA
-#endif /* ENOATTR */
-
-#ifndef ENODATA
-#define ENODATA ENOATTR
-#endif /* ENODATA */
-
-#ifndef EBADFD
-#define EBADFD EBADRPC
-#endif /* EBADFD */
-
-/* These functions are defined for all the OS flags, but content will
- * be different for each OS flag.
- */
-int32_t gf_errno_to_error (int32_t op_errno);
-int32_t gf_error_to_errno (int32_t error);
-
-#endif /* __COMPAT_ERRNO_H__ */
diff --git a/libglusterfs/src/compat.c b/libglusterfs/src/compat.c
index 99a0041a5cf..8a05a30a8fe 100644
--- a/libglusterfs/src/compat.c
+++ b/libglusterfs/src/compat.c
@@ -8,552 +8,611 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <string.h>
#include <stdlib.h>
+#include <unistd.h>
#include <stdarg.h>
#include <getopt.h>
#include <sys/types.h>
#include <dirent.h>
-#ifdef GF_SOLARIS_HOST_OS
-#include "logging.h"
-#endif /* GF_SOLARIS_HOST_OS */
-
-#include "compat.h"
-#include "common-utils.h"
-#include "iatt.h"
-#include "inode.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/compat.h"
+#include "glusterfs/iatt.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/run.h"
+#include "glusterfs/libglusterfs-messages.h"
#ifdef GF_SOLARIS_HOST_OS
int
-solaris_fsetxattr(int fd, const char* key, const char *value, size_t size,
+solaris_fsetxattr(int fd, const char *key, const char *value, size_t size,
int flags)
{
- int attrfd = -1;
- int ret = 0;
-
- attrfd = openat (fd, key, flags|O_CREAT|O_WRONLY|O_XATTR, 0777);
- if (attrfd >= 0) {
- ftruncate (attrfd, 0);
- ret = write (attrfd, value, size);
- close (attrfd);
- } else {
- if (errno != ENOENT)
- gf_log ("libglusterfs", GF_LOG_ERROR,
- "Couldn't set extended attribute for %d (%d)",
- fd, errno);
- return -1;
- }
-
- return 0;
+ int attrfd = -1;
+ int ret = 0;
+
+ attrfd = openat(fd, key, flags | O_CREAT | O_WRONLY | O_XATTR, 0777);
+ if (attrfd >= 0) {
+ ftruncate(attrfd, 0);
+ ret = write(attrfd, value, size);
+ close(attrfd);
+ } else {
+ if (errno != ENOENT)
+ gf_msg("libglusterfs", GF_LOG_ERROR, errno,
+ LG_MSG_SET_ATTRIBUTE_FAILED,
+ "Couldn't set "
+ "extended attribute for %d",
+ fd);
+ return -1;
+ }
+
+ return 0;
}
-
int
-solaris_fgetxattr(int fd, const char* key, char *value, size_t size)
+solaris_fgetxattr(int fd, const char *key, char *value, size_t size)
{
- int attrfd = -1;
- int ret = 0;
-
- attrfd = openat (fd, key, O_RDONLY|O_XATTR);
- if (attrfd >= 0) {
- if (size == 0) {
- struct stat buf;
- fstat (attrfd, &buf);
- ret = buf.st_size;
- } else {
- ret = read (attrfd, value, size);
- }
- close (attrfd);
+ int attrfd = -1;
+ int ret = 0;
+
+ attrfd = openat(fd, key, O_RDONLY | O_XATTR);
+ if (attrfd >= 0) {
+ if (size == 0) {
+ struct stat buf;
+ fstat(attrfd, &buf);
+ ret = buf.st_size;
} else {
- if (errno != ENOENT)
- gf_log ("libglusterfs", GF_LOG_INFO,
- "Couldn't read extended attribute for the file %d (%d)",
- fd, errno);
- if (errno == ENOENT)
- errno = ENODATA;
- return -1;
+ ret = read(attrfd, value, size);
}
-
- return ret;
+ close(attrfd);
+ } else {
+ if (errno != ENOENT)
+ gf_msg("libglusterfs", GF_LOG_INFO, errno,
+ LG_MSG_READ_ATTRIBUTE_FAILED,
+ "Couldn't read "
+ "extended attribute for the file %d",
+ fd);
+ if (errno == ENOENT)
+ errno = ENODATA;
+ return -1;
+ }
+
+ return ret;
}
/* Solaris does not support xattr for symlinks and dev files. Since gfid and
other trusted attributes are stored as xattrs, we need to provide support for
- them. A mapped regular file is stored in the /.glusterfs_xattr_inode of the export dir.
- All xattr ops related to the special files are redirected to this map file.
+ them. A mapped regular file is stored in the /.glusterfs_xattr_inode of the
+ export dir. All xattr ops related to the special files are redirected to this
+ map file.
*/
int
-make_export_path (const char *real_path, char **path)
+make_export_path(const char *real_path, char **path)
{
- int ret = -1;
- char *tmp = NULL;
- char *export_path = NULL;
- char *dup = NULL;
- char *ptr = NULL;
- char *freeptr = NULL;
- uuid_t gfid = {0, };
-
- export_path = GF_CALLOC (1, sizeof (char) * PATH_MAX, 0);
- if (!export_path)
- goto out;
+ int ret = -1;
+ char *tmp = NULL;
+ char *export_path = NULL;
+ char *dup = NULL;
+ char *ptr = NULL;
+ char *freeptr = NULL;
+ uuid_t gfid = {
+ 0,
+ };
+
+ export_path = GF_CALLOC(1, sizeof(char) * PATH_MAX, 0);
+ if (!export_path)
+ goto out;
- dup = gf_strdup (real_path);
- if (!dup)
- goto out;
+ dup = gf_strdup(real_path);
+ if (!dup)
+ goto out;
- freeptr = dup;
- ret = solaris_getxattr ("/", GFID_XATTR_KEY, gfid, 16);
- /* Return value of getxattr */
+ freeptr = dup;
+ ret = solaris_getxattr("/", GFID_XATTR_KEY, gfid, 16);
+ /* Return value of getxattr */
+ if (ret == 16) {
+ if (__is_root_gfid(gfid)) {
+ strcat(export_path, "/");
+ ret = 0;
+ goto done;
+ }
+ }
+
+ do {
+ ptr = strtok_r(dup, "/", &tmp);
+ if (!ptr)
+ break;
+ strcat(export_path, dup);
+ ret = solaris_getxattr(export_path, GFID_XATTR_KEY, gfid, 16);
if (ret == 16) {
- if (__is_root_gfid (gfid)){
- strcat (export_path, "/");
- ret = 0;
- goto done;
- }
+ if (__is_root_gfid(gfid)) {
+ ret = 0;
+ goto done;
+ }
}
+ strcat(export_path, "/");
+ dup = tmp;
+ } while (ptr);
- do {
- ptr = strtok_r (dup, "/", &tmp);
- if (!ptr)
- break;
- strcat (export_path, dup);
- ret = solaris_getxattr (export_path, GFID_XATTR_KEY, gfid, 16);
- if (ret == 16) {
- if (__is_root_gfid (gfid)) {
- ret = 0;
- goto done;
- }
- }
- strcat (export_path, "/");
- dup = tmp;
- } while (ptr);
-
- goto out;
+ goto out;
done:
- if (!ret) {
- *path = export_path;
- }
+ if (!ret) {
+ *path = export_path;
+ }
out:
- if (freeptr)
- GF_FREE (freeptr);
- if (ret && export_path)
- GF_FREE (export_path);
+ GF_FREE(freeptr);
+ if (ret && export_path)
+ GF_FREE(export_path);
- return ret;
+ return ret;
}
int
-solaris_xattr_resolve_path (const char *real_path, char **path)
+solaris_xattr_resolve_path(const char *real_path, char **path)
{
- int ret = -1;
- char *export_path = NULL;
- char xattr_path[PATH_MAX] = {0, };
- struct stat lstatbuf = {0, };
- struct iatt stbuf = {0, };
- struct stat statbuf = {0, };
-
- ret = lstat (real_path, &lstatbuf);
- if (ret != 0 )
- return ret;
- iatt_from_stat (&stbuf, &lstatbuf);
- if (IA_ISREG(stbuf.ia_type) || IA_ISDIR(stbuf.ia_type))
- return -1;
-
- ret = make_export_path (real_path, &export_path);
- if (!ret && export_path) {
- strcat (export_path, "/"GF_SOLARIS_XATTR_DIR);
- if (lstat (export_path, &statbuf)) {
- ret = mkdir (export_path, 0777);
- if (ret && (errno != EEXIST)) {
- gf_log (THIS->name, GF_LOG_DEBUG, "mkdir failed,"
- " errno: %d", errno);
- goto out;
- }
- }
+ int ret = -1;
+ char *export_path = NULL;
+ char xattr_path[PATH_MAX] = {
+ 0,
+ };
+ struct stat lstatbuf = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ struct stat statbuf = {
+ 0,
+ };
+
+ ret = lstat(real_path, &lstatbuf);
+ if (ret != 0)
+ return ret;
+ iatt_from_stat(&stbuf, &lstatbuf);
+ if (IA_ISREG(stbuf.ia_type) || IA_ISDIR(stbuf.ia_type))
+ return -1;
+
+ ret = make_export_path(real_path, &export_path);
+ if (!ret && export_path) {
+ strcat(export_path, "/" GF_SOLARIS_XATTR_DIR);
+ if (lstat(export_path, &statbuf)) {
+ ret = mkdir(export_path, 0755);
+ if (ret && (errno != EEXIST)) {
+ gf_msg_debug(THIS->name, 0,
+ "mkdir failed,"
+ " errno: %d",
+ errno);
+ goto out;
+ }
+ }
- snprintf(xattr_path, PATH_MAX, "%s%s%lu", export_path,
- "/", stbuf.ia_ino);
+ snprintf(xattr_path, PATH_MAX, "%s%s%lu", export_path, "/",
+ stbuf.ia_ino);
- ret = lstat (xattr_path, &statbuf);
+ ret = lstat(xattr_path, &statbuf);
- if (ret) {
- ret = mknod (xattr_path, S_IFREG|O_WRONLY, 0);
- if (ret && (errno != EEXIST)) {
- gf_log (THIS->name, GF_LOG_WARNING,"Failed to create "
- "mapped file %s, error %d", xattr_path,
- errno);
- goto out;
- }
- }
- *path = gf_strdup (xattr_path);
+ if (ret) {
+ ret = mknod(xattr_path, S_IFREG | O_WRONLY, 0);
+ if (ret && (errno != EEXIST)) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to "
+ "create mapped file %s",
+ xattr_path);
+ goto out;
+ }
}
+ *path = gf_strdup(xattr_path);
+ }
out:
- if (export_path)
- GF_FREE (export_path);
- if (*path)
- return 0;
- else
- return -1;
+ GF_FREE(export_path);
+ if (*path)
+ return 0;
+ else
+ return -1;
}
int
-solaris_setxattr(const char *path, const char* key, const char *value,
+solaris_setxattr(const char *path, const char *key, const char *value,
size_t size, int flags)
{
- int attrfd = -1;
- int ret = 0;
- char *mapped_path = NULL;
-
- ret = solaris_xattr_resolve_path (path, &mapped_path);
- if (!ret) {
- attrfd = attropen (mapped_path, key, flags|O_CREAT|O_WRONLY,
- 0777);
- } else {
- attrfd = attropen (path, key, flags|O_CREAT|O_WRONLY, 0777);
- }
- if (attrfd >= 0) {
- ftruncate (attrfd, 0);
- ret = write (attrfd, value, size);
- close (attrfd);
- ret = 0;
- } else {
- if (errno != ENOENT)
- gf_log ("libglusterfs", GF_LOG_ERROR,
- "Couldn't set extended attribute for %s (%d)",
- path, errno);
- ret = -1;
- }
- if (mapped_path)
- GF_FREE (mapped_path);
- return ret;
+ int attrfd = -1;
+ int ret = 0;
+ char *mapped_path = NULL;
+
+ ret = solaris_xattr_resolve_path(path, &mapped_path);
+ if (!ret) {
+ attrfd = attropen(mapped_path, key, flags | O_CREAT | O_WRONLY, 0777);
+ } else {
+ attrfd = attropen(path, key, flags | O_CREAT | O_WRONLY, 0777);
+ }
+ if (attrfd >= 0) {
+ ftruncate(attrfd, 0);
+ ret = write(attrfd, value, size);
+ close(attrfd);
+ ret = 0;
+ } else {
+ if (errno != ENOENT)
+ gf_msg("libglusterfs", GF_LOG_ERROR, errno,
+ LG_MSG_SET_ATTRIBUTE_FAILED,
+ "Couldn't set "
+ "extended attribute for %s",
+ path);
+ ret = -1;
+ }
+ GF_FREE(mapped_path);
+ return ret;
}
-
int
solaris_listxattr(const char *path, char *list, size_t size)
{
- int attrdirfd = -1;
- ssize_t len = 0;
- DIR *dirptr = NULL;
- struct dirent *dent = NULL;
- int newfd = -1;
- char *mapped_path = NULL;
- int ret = -1;
-
- ret = solaris_xattr_resolve_path (path, &mapped_path);
- if (!ret) {
- attrdirfd = attropen (mapped_path, ".", O_RDONLY, 0);
- } else {
- attrdirfd = attropen (path, ".", O_RDONLY, 0);
- }
- if (attrdirfd >= 0) {
- newfd = dup(attrdirfd);
- dirptr = fdopendir(newfd);
- if (dirptr) {
- while ((dent = readdir(dirptr))) {
- size_t listlen = strlen(dent->d_name);
- if (!strcmp(dent->d_name, ".") ||
- !strcmp(dent->d_name, "..")) {
- /* we don't want "." and ".." here */
- continue;
- }
- if (size == 0) {
- /* return the current size of the list
- of extended attribute names*/
- len += listlen + 1;
- } else {
- /* check size and copy entry + null
- into list. */
- if ((len + listlen + 1) > size) {
- errno = ERANGE;
- len = -1;
- break;
- } else {
- strncpy(list + len, dent->d_name, listlen);
- len += listlen;
- list[len] = '\0';
- ++len;
- }
- }
- }
-
- if (closedir(dirptr) == -1) {
- close (attrdirfd);
- len = -1;
- goto out;
- }
+ int attrdirfd = -1;
+ ssize_t len = 0;
+ DIR *dirptr = NULL;
+ struct dirent *dent = NULL;
+ int newfd = -1;
+ char *mapped_path = NULL;
+ int ret = -1;
+
+ ret = solaris_xattr_resolve_path(path, &mapped_path);
+ if (!ret) {
+ attrdirfd = attropen(mapped_path, ".", O_RDONLY, 0);
+ } else {
+ attrdirfd = attropen(path, ".", O_RDONLY, 0);
+ }
+ if (attrdirfd >= 0) {
+ newfd = dup(attrdirfd);
+ dirptr = fdopendir(newfd);
+ if (dirptr) {
+ while ((dent = readdir(dirptr))) {
+ size_t listlen = strlen(dent->d_name);
+ if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) {
+ /* we don't want "." and ".." here */
+ continue;
+ }
+ if (size == 0) {
+ /* return the current size of the list
+ of extended attribute names*/
+ len += listlen + 1;
} else {
- close (attrdirfd);
+ /* check size and copy entry + null
+ into list. */
+ if ((len + listlen + 1) > size) {
+ errno = ERANGE;
len = -1;
- goto out;
+ break;
+ } else {
+ strncpy(list + len, dent->d_name, listlen);
+ len += listlen;
+ list[len] = '\0';
+ ++len;
+ }
}
- close (attrdirfd);
+ }
+
+ if (closedir(dirptr) == -1) {
+ close(attrdirfd);
+ len = -1;
+ goto out;
+ }
+ } else {
+ close(attrdirfd);
+ len = -1;
+ goto out;
}
+ close(attrdirfd);
+ }
out:
- if (mapped_path)
- GF_FREE (mapped_path);
- return len;
+ GF_FREE(mapped_path);
+ return len;
}
-
int
solaris_flistxattr(int fd, char *list, size_t size)
{
- int attrdirfd = -1;
- ssize_t len = 0;
- DIR *dirptr = NULL;
- struct dirent *dent = NULL;
- int newfd = -1;
-
- attrdirfd = openat (fd, ".", O_RDONLY, 0);
- if (attrdirfd >= 0) {
- newfd = dup(attrdirfd);
- dirptr = fdopendir(newfd);
- if (dirptr) {
- while ((dent = readdir(dirptr))) {
- size_t listlen = strlen(dent->d_name);
- if (!strcmp(dent->d_name, ".") ||
- !strcmp(dent->d_name, "..")) {
- /* we don't want "." and ".." here */
- continue;
- }
- if (size == 0) {
- /* return the current size of the list
- of extended attribute names*/
- len += listlen + 1;
- } else {
- /* check size and copy entry + null
- into list. */
- if ((len + listlen + 1) > size) {
- errno = ERANGE;
- len = -1;
- break;
- } else {
- strncpy(list + len, dent->d_name, listlen);
- len += listlen;
- list[len] = '\0';
- ++len;
- }
- }
- }
-
- if (closedir(dirptr) == -1) {
- close (attrdirfd);
- return -1;
- }
+ int attrdirfd = -1;
+ ssize_t len = 0;
+ DIR *dirptr = NULL;
+ struct dirent *dent = NULL;
+ int newfd = -1;
+
+ attrdirfd = openat(fd, ".", O_RDONLY, 0);
+ if (attrdirfd >= 0) {
+ newfd = dup(attrdirfd);
+ dirptr = fdopendir(newfd);
+ if (dirptr) {
+ while ((dent = readdir(dirptr))) {
+ size_t listlen = strlen(dent->d_name);
+ if (!strcmp(dent->d_name, ".") || !strcmp(dent->d_name, "..")) {
+ /* we don't want "." and ".." here */
+ continue;
+ }
+ if (size == 0) {
+ /* return the current size of the list
+ of extended attribute names*/
+ len += listlen + 1;
} else {
- close (attrdirfd);
- return -1;
+ /* check size and copy entry + null
+ into list. */
+ if ((len + listlen + 1) > size) {
+ errno = ERANGE;
+ len = -1;
+ break;
+ } else {
+ strncpy(list + len, dent->d_name, listlen);
+ len += listlen;
+ list[len] = '\0';
+ ++len;
+ }
}
- close (attrdirfd);
+ }
+
+ if (closedir(dirptr) == -1) {
+ close(attrdirfd);
+ return -1;
+ }
+ } else {
+ close(attrdirfd);
+ return -1;
}
- return len;
+ close(attrdirfd);
+ }
+ return len;
}
-
int
-solaris_removexattr(const char *path, const char* key)
+solaris_removexattr(const char *path, const char *key)
{
- int ret = -1;
- int attrfd = -1;
- char *mapped_path = NULL;
-
- ret = solaris_xattr_resolve_path (path, &mapped_path);
- if (!ret) {
- attrfd = attropen (mapped_path, ".", O_RDONLY, 0);
- } else {
- attrfd = attropen (path, ".", O_RDONLY, 0);
- }
- if (attrfd >= 0) {
- ret = unlinkat (attrfd, key, 0);
- close (attrfd);
- } else {
- if (errno == ENOENT)
- errno = ENODATA;
- ret = -1;
- }
-
- if (mapped_path)
- GF_FREE (mapped_path);
-
- return ret;
+ int ret = -1;
+ int attrfd = -1;
+ char *mapped_path = NULL;
+
+ ret = solaris_xattr_resolve_path(path, &mapped_path);
+ if (!ret) {
+ attrfd = attropen(mapped_path, ".", O_RDONLY, 0);
+ } else {
+ attrfd = attropen(path, ".", O_RDONLY, 0);
+ }
+ if (attrfd >= 0) {
+ ret = unlinkat(attrfd, key, 0);
+ close(attrfd);
+ } else {
+ if (errno == ENOENT)
+ errno = ENODATA;
+ ret = -1;
+ }
+
+ GF_FREE(mapped_path);
+
+ return ret;
}
int
-solaris_getxattr(const char *path,
- const char* key,
- char *value,
- size_t size)
+solaris_getxattr(const char *path, const char *key, char *value, size_t size)
{
- int attrfd = -1;
- int ret = 0;
- char *mapped_path = NULL;
-
- ret = solaris_xattr_resolve_path (path, &mapped_path);
- if (!ret) {
- attrfd = attropen (mapped_path, key, O_RDONLY, 0);
+ int attrfd = -1;
+ int ret = 0;
+ char *mapped_path = NULL;
+
+ ret = solaris_xattr_resolve_path(path, &mapped_path);
+ if (!ret) {
+ attrfd = attropen(mapped_path, key, O_RDONLY, 0);
+ } else {
+ attrfd = attropen(path, key, O_RDONLY, 0);
+ }
+
+ if (attrfd >= 0) {
+ if (size == 0) {
+ struct stat buf;
+ fstat(attrfd, &buf);
+ ret = buf.st_size;
} else {
- attrfd = attropen (path, key, O_RDONLY, 0);
+ ret = read(attrfd, value, size);
}
-
- if (attrfd >= 0) {
- if (size == 0) {
- struct stat buf;
- fstat (attrfd, &buf);
- ret = buf.st_size;
- } else {
- ret = read (attrfd, value, size);
- }
- close (attrfd);
- } else {
- if (errno != ENOENT)
- gf_log ("libglusterfs", GF_LOG_INFO,
- "Couldn't read extended attribute for the file %s (%s)",
- path, strerror (errno));
- if (errno == ENOENT)
- errno = ENODATA;
- ret = -1;
- }
- if (mapped_path)
- GF_FREE (mapped_path);
- return ret;
+ close(attrfd);
+ } else {
+ if (errno != ENOENT)
+ gf_msg("libglusterfs", GF_LOG_INFO, errno,
+ LG_MSG_READ_ATTRIBUTE_FAILED,
+ "Couldn't read "
+ "extended attribute for the file %s",
+ path);
+ if (errno == ENOENT)
+ errno = ENODATA;
+ ret = -1;
+ }
+ GF_FREE(mapped_path);
+ return ret;
}
-
-char* strsep(char** str, const char* delims)
+char *
+strsep(char **str, const char *delims)
{
- char* token;
-
- if (*str==NULL) {
- /* No more tokens */
- return NULL;
+ char *token;
+
+ if (*str == NULL) {
+ /* No more tokens */
+ return NULL;
+ }
+
+ token = *str;
+ while (**str != '\0') {
+ if (strchr(delims, **str) != NULL) {
+ **str = '\0';
+ (*str)++;
+ return token;
}
-
- token=*str;
- while (**str!='\0') {
- if (strchr(delims,**str)!=NULL) {
- **str='\0';
- (*str)++;
- return token;
- }
- (*str)++;
- }
- /* There is no other token */
- *str=NULL;
- return token;
+ (*str)++;
+ }
+ /* There is no other token */
+ *str = NULL;
+ return token;
}
/* Code comes from libiberty */
int
-vasprintf (char **result, const char *format, va_list args)
+vasprintf(char **result, const char *format, va_list args)
{
- return gf_vasprintf(result, format, args);
+ return gf_vasprintf(result, format, args);
}
int
-asprintf (char **buf, const char *fmt, ...)
+asprintf(char **buf, const char *fmt, ...)
{
- int status;
- va_list ap;
+ int status;
+ va_list ap;
- va_start (ap, fmt);
- status = vasprintf (buf, fmt, ap);
- va_end (ap);
- return status;
+ va_start(ap, fmt);
+ status = vasprintf(buf, fmt, ap);
+ va_end(ap);
+ return status;
}
-int solaris_unlink (const char *path)
+int
+solaris_unlink(const char *path)
{
- char *mapped_path = NULL;
- struct stat stbuf = {0, };
- int ret = -1;
-
- ret = solaris_xattr_resolve_path (path, &mapped_path);
-
-
- if (!ret && mapped_path) {
- if (lstat(path, &stbuf)) {
- gf_log (THIS->name, GF_LOG_WARNING, "Stat failed on mapped"
- " file %s with error %d", mapped_path, errno);
- goto out;
- }
- if (stbuf.st_nlink == 1) {
- if(remove (mapped_path))
- gf_log (THIS->name, GF_LOG_WARNING, "Failed to remove mapped "
- "file %s. Errno %d", mapped_path, errno);
- }
-
+ char *mapped_path = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ int ret = -1;
+
+ ret = solaris_xattr_resolve_path(path, &mapped_path);
+
+ if (!ret && mapped_path) {
+ if (lstat(path, &stbuf)) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "Stat failed on "
+ "mapped file %s",
+ mapped_path);
+ goto out;
}
+ if (stbuf.st_nlink == 1) {
+ if (remove(mapped_path))
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to "
+ "remove mapped file %s",
+ mapped_path);
+ }
+ }
out:
- if (mapped_path)
- GF_FREE (mapped_path);
+ GF_FREE(mapped_path);
- return unlink (path);
+ return unlink(path);
}
int
-solaris_rename (const char *old_path, const char *new_path)
+solaris_rename(const char *old_path, const char *new_path)
{
- char *mapped_path = NULL;
- int ret = -1;
-
- ret = solaris_xattr_resolve_path (new_path, &mapped_path);
+ char *mapped_path = NULL;
+ int ret = -1;
+ ret = solaris_xattr_resolve_path(new_path, &mapped_path);
- if (!ret && mapped_path) {
- if (!remove (mapped_path))
- gf_log (THIS->name, GF_LOG_WARNING, "Failed to remove mapped "
- "file %s. Errno %d", mapped_path, errno);
- GF_FREE (mapped_path);
- }
-
- return rename(old_path, new_path);
+ if (!ret && mapped_path) {
+ if (!remove(mapped_path))
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to remove "
+ "mapped file %s.",
+ mapped_path);
+ GF_FREE(mapped_path);
+ }
+ return rename(old_path, new_path);
}
char *
-mkdtemp (char *tempstring)
+mkdtemp(char *tempstring)
{
- char *new_string = NULL;
- int ret = 0;
+ char *new_string = NULL;
+ int ret = 0;
- new_string = mkstemp (tempstring);
- if (!new_string)
- goto out;
+ new_string = mkstemp(tempstring);
+ if (!new_string)
+ goto out;
- ret = mkdir (new_string, 0700);
- if (ret < 0)
- new_string = NULL;
+ ret = mkdir(new_string, 0700);
+ if (ret < 0)
+ new_string = NULL;
out:
- return new_string;
+ return new_string;
}
#endif /* GF_SOLARIS_HOST_OS */
+#ifdef GF_BSD_HOST_OS
+void
+gf_extattr_list_reshape(char *bsd_list, ssize_t size)
+{
+ /*
+ * the format of bsd_list is
+ * <attr_len>attr<attr_len>attr...
+ * we try to reformat it as Linux's
+ * attr<\0>attr<\0>...
+ * */
+ if (NULL == bsd_list || size <= 0)
+ return;
+
+ size_t i = 0, j;
+
+ while (i < size) {
+ size_t attr_len = bsd_list[i];
+
+ for (j = i; j < i + attr_len; ++j)
+ bsd_list[j] = bsd_list[j + 1];
+ bsd_list[j] = '\0';
+
+ i += attr_len + 1;
+ gf_msg_debug("syscall", 0, "syscall debug: %lu", attr_len);
+ }
+}
+#endif /* GF_BSD_HOST_OS */
+
#ifndef HAVE_STRNLEN
size_t
strnlen(const char *string, size_t maxlen)
{
- int len = 0;
- while ((len < maxlen) && string[len])
- len++;
- return len;
+ int len = 0;
+ while ((len < maxlen) && string[len])
+ len++;
+ return len;
}
#endif /* STRNLEN */
-#ifdef THREAD_UNSAFE_BASENAME
-#include "../../contrib/libgen/basename_r.c"
+int
+gf_umount_lazy(char *xlname, char *path, int rmdir_flag)
+{
+ int ret = -1;
+ runner_t runner = {
+ 0,
+ };
+
+ runinit(&runner);
+#ifdef GF_LINUX_HOST_OS
+ runner_add_args(&runner, _PATH_UMOUNT, "-l", path, NULL);
+#else
+ if (rmdir_flag)
+ runner_add_args(&runner, SBIN_DIR "/umountd", "-r", path, NULL);
+ else
+ runner_add_args(&runner, SBIN_DIR "/umountd", path, NULL);
#endif
-#ifdef THREAD_UNSAFE_DIRNAME
-#include "../../contrib/libgen/dirname_r.c"
+ ret = runner_run(&runner);
+ if (ret) {
+ gf_msg(xlname, GF_LOG_ERROR, errno, LG_MSG_UNMOUNT_FAILED,
+ "Lazy unmount of %s", path);
+ }
+
+#ifdef GF_LINUX_HOST_OS
+ if (!ret && rmdir_flag) {
+ ret = sys_rmdir(path);
+ if (ret)
+ gf_msg(xlname, GF_LOG_WARNING, errno, LG_MSG_DIR_OP_FAILED,
+ "rmdir %s", path);
+ }
#endif
+
+ return ret;
+}
diff --git a/libglusterfs/src/compat.h b/libglusterfs/src/compat.h
deleted file mode 100644
index a6eec2dedca..00000000000
--- a/libglusterfs/src/compat.h
+++ /dev/null
@@ -1,393 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __COMPAT_H__
-#define __COMPAT_H__
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdint.h>
-#include "dict.h"
-
-#ifndef LLONG_MAX
-#define LLONG_MAX __LONG_LONG_MAX__ /* compat with old gcc */
-#endif /* LLONG_MAX */
-
-
-#ifdef GF_LINUX_HOST_OS
-
-#define UNIX_PATH_MAX 108
-
-#include <sys/un.h>
-#include <linux/limits.h>
-#include <sys/xattr.h>
-#include <endian.h>
-
-
-#ifndef HAVE_LLISTXATTR
-
-/* This part is valid only incase of old glibc which doesn't support
- * 'llistxattr()' system calls.
- */
-
-#define lremovexattr(path,key) removexattr(path,key)
-#define llistxattr(path,key,size) listxattr(path,key,size)
-#define lgetxattr(path, key, value, size) getxattr(path,key,value,size)
-#define lsetxattr(path,key,value,size,flags) setxattr(path,key,value,size,flags)
-
-#endif /* HAVE_LLISTXATTR */
-#endif /* GF_LINUX_HOST_OS */
-
-#ifdef GF_BSD_HOST_OS
-/* In case of FreeBSD and NetBSD */
-
-#define UNIX_PATH_MAX 104
-#include <sys/types.h>
-
-#include <sys/un.h>
-#include <sys/endian.h>
-#include <sys/extattr.h>
-#ifdef HAVE_SYS_XATTR_H
-#include <sys/xattr.h>
-#endif /* HAVE_SYS_XATTR_H */
-#include <limits.h>
-
-#include <libgen.h>
-
-#ifndef XATTR_CREATE
-enum {
- ATTR_CREATE = 1,
-#define XATTR_CREATE ATTR_CREATE
- ATTR_REPLACE = 2
-#define XATTR_REPLACE ATTR_REPLACE
-};
-#endif /* XATTR_CREATE */
-
-
-#ifndef sighandler_t
-#define sighandler_t sig_t
-#endif
-
-#ifndef ino64_t
-#define ino64_t ino_t
-#endif
-
-#ifndef EUCLEAN
-#define EUCLEAN 0
-#endif
-
-#include <netinet/in.h>
-#ifndef s6_addr16
-#define s6_addr16 __u6_addr.__u6_addr16
-#endif
-#ifndef s6_addr32
-#define s6_addr32 __u6_addr.__u6_addr32
-#endif
-
-/* Posix dictates NAME_MAX to be used */
-# ifndef NAME_MAX
-# ifdef MAXNAMLEN
-# define NAME_MAX MAXNAMLEN
-# else
-# define NAME_MAX 255
-# endif
-# endif
-
-#define F_GETLK64 F_GETLK
-#define F_SETLK64 F_SETLK
-#define F_SETLKW64 F_SETLKW
-
-#endif /* GF_BSD_HOST_OS */
-
-#ifdef GF_DARWIN_HOST_OS
-
-#define UNIX_PATH_MAX 104
-#include <sys/types.h>
-
-#include <sys/un.h>
-#include <machine/endian.h>
-#include <sys/xattr.h>
-#include <limits.h>
-
-#include <libgen.h>
-
-
-#if __DARWIN_64_BIT_INO_T == 0
-# error '64 bit ino_t is must for GlusterFS to work, Compile with "CFLAGS=-D__DARWIN_64_BIT_INO_T"'
-#endif /* __DARWIN_64_BIT_INO_T */
-
-
-#if __DARWIN_64_BIT_INO_T == 0
-# error '64 bit ino_t is must for GlusterFS to work, Compile with "CFLAGS=-D__DARWIN_64_BIT_INO_T"'
-#endif /* __DARWIN_64_BIT_INO_T */
-
-#ifndef sighandler_t
-#define sighandler_t sig_t
-#endif
-
-#ifndef EUCLEAN
-#define EUCLEAN 0
-#endif
-
-#include <netinet/in.h>
-#ifndef s6_addr16
-#define s6_addr16 __u6_addr.__u6_addr16
-#endif
-#ifndef s6_addr32
-#define s6_addr32 __u6_addr.__u6_addr32
-#endif
-
-/* Posix dictates NAME_MAX to be used */
-# ifndef NAME_MAX
-# ifdef MAXNAMLEN
-# define NAME_MAX MAXNAMLEN
-# else
-# define NAME_MAX 255
-# endif
-# endif
-
-#define F_GETLK64 F_GETLK
-#define F_SETLK64 F_SETLK
-#define F_SETLKW64 F_SETLKW
-
-#ifndef FTW_CONTINUE
- #define FTW_CONTINUE 0
-#endif
-
-int32_t gf_darwin_compat_listxattr (int len, dict_t *dict, int size);
-int32_t gf_darwin_compat_getxattr (const char *key, dict_t *dict);
-int32_t gf_darwin_compat_setxattr (dict_t *dict);
-
-#endif /* GF_DARWIN_HOST_OS */
-
-#ifdef GF_SOLARIS_HOST_OS
-
-#define UNIX_PATH_MAX 108
-#define EUCLEAN 117
-
-#include <sys/un.h>
-#include <limits.h>
-#include <sys/stat.h>
-#include <unistd.h>
-#include <sys/fcntl.h>
-#include <libgen.h>
-#include <sys/mkdev.h>
-
-#ifndef lchmod
-#define lchmod chmod
-#endif
-
-#define lgetxattr(path, key, value, size) solaris_getxattr(path,key,value,size)
-enum {
- ATTR_CREATE = 1,
-#define XATTR_CREATE ATTR_CREATE
- ATTR_REPLACE = 2
-#define XATTR_REPLACE ATTR_REPLACE
-};
-
-/* This patch is not present in Solaris 10 and before */
-#ifndef dirfd
-#define dirfd(dirp) ((dirp)->dd_fd)
-#endif
-
-/* Posix dictates NAME_MAX to be used */
-# ifndef NAME_MAX
-# ifdef MAXNAMLEN
-# define NAME_MAX MAXNAMLEN
-# else
-# define NAME_MAX 255
-# endif
-# endif
-
-#include <netinet/in.h>
-#ifndef s6_addr16
-#define S6_ADDR16(x) ((uint16_t*) ((char*)&(x).s6_addr))
-#endif
-#ifndef s6_addr32
-#define s6_addr32 _S6_un._S6_u32
-#endif
-
-#define lutimes(filename,times) utimes(filename,times)
-
-#ifndef SEEK_SET
-#define SEEK_SET 0
-#endif
-
-enum {
- DT_UNKNOWN = 0,
-# define DT_UNKNOWN DT_UNKNOWN
- DT_FIFO = 1,
-# define DT_FIFO DT_FIFO
- DT_CHR = 2,
-# define DT_CHR DT_CHR
- DT_DIR = 4,
-# define DT_DIR DT_DIR
- DT_BLK = 6,
-# define DT_BLK DT_BLK
- DT_REG = 8,
-# define DT_REG DT_REG
- DT_LNK = 10,
-# define DT_LNK DT_LNK
- DT_SOCK = 12,
-# define DT_SOCK DT_SOCK
- DT_WHT = 14
-# define DT_WHT DT_WHT
-};
-
-#ifndef _PATH_MOUNTED
- #define _PATH_MOUNTED "/etc/mtab"
-#endif
-
-#ifndef O_ASYNC
- #ifdef FASYNC
- #define O_ASYNC FASYNC
- #else
- #define O_ASYNC 0
- #endif
-#endif
-
-#ifndef FTW_CONTINUE
- #define FTW_CONTINUE 0
-#endif
-
-int asprintf(char **string_ptr, const char *format, ...);
-
-int vasprintf (char **result, const char *format, va_list args);
-char* strsep(char** str, const char* delims);
-int solaris_listxattr(const char *path, char *list, size_t size);
-int solaris_removexattr(const char *path, const char* key);
-int solaris_getxattr(const char *path, const char* key,
- char *value, size_t size);
-int solaris_setxattr(const char *path, const char* key, const char *value,
- size_t size, int flags);
-int solaris_fgetxattr(int fd, const char* key,
- char *value, size_t size);
-int solaris_fsetxattr(int fd, const char* key, const char *value,
- size_t size, int flags);
-int solaris_flistxattr(int fd, char *list, size_t size);
-
-int solaris_rename (const char *oldpath, const char *newpath);
-
-int solaris_unlink (const char *pathname);
-
-char *mkdtemp (char *temp);
-
-#define GF_SOLARIS_XATTR_DIR ".glusterfs_xattr_inode"
-
-int solaris_xattr_resolve_path (const char *real_path, char **path);
-
-#endif /* GF_SOLARIS_HOST_OS */
-
-#ifndef HAVE_ARGP
-#include "argp.h"
-#else
-#include <argp.h>
-#endif /* HAVE_ARGP */
-
-#ifndef HAVE_STRNLEN
-size_t strnlen(const char *string, size_t maxlen);
-#endif /* STRNLEN */
-
-#ifndef strdupa
-#define strdupa(s) \
- (__extension__ \
- ({ \
- __const char *__old = (s); \
- size_t __len = strlen (__old) + 1; \
- char *__new = (char *) __builtin_alloca (__len); \
- (char *) memcpy (__new, __old, __len); \
- }))
-#endif
-
-#define GF_DIR_ALIGN(x) (((x) + sizeof (uint64_t) - 1) & ~(sizeof (uint64_t) - 1))
-
-#include <sys/types.h>
-#include <dirent.h>
-
-static inline int32_t
-dirent_size (struct dirent *entry)
-{
- int32_t size = -1;
-
-#ifdef GF_BSD_HOST_OS
- size = GF_DIR_ALIGN (24 /* FIX MEEEE!!! */ + entry->d_namlen);
-#endif
-#ifdef GF_DARWIN_HOST_OS
- size = GF_DIR_ALIGN (24 /* FIX MEEEE!!! */ + entry->d_namlen);
-#endif
-#ifdef GF_LINUX_HOST_OS
- size = GF_DIR_ALIGN (24 /* FIX MEEEE!!! */ + entry->d_reclen);
-#endif
-#ifdef GF_SOLARIS_HOST_OS
- size = GF_DIR_ALIGN (24 /* FIX MEEEE!!! */ + entry->d_reclen);
-#endif
- return size;
-}
-
-#ifdef THREAD_UNSAFE_BASENAME
-char *basename_r(const char *);
-#define basename(path) basename_r(path)
-#endif /* THREAD_UNSAFE_BASENAME */
-
-#ifdef THREAD_UNSAFE_DIRNAME
-char *dirname_r(const char *path);
-#define dirname(path) dirname_r(path)
-#endif /* THREAD_UNSAFE_DIRNAME */
-
-#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
-/* Linux, Solaris, Cygwin */
-#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atim.tv_nsec)
-#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctim.tv_nsec)
-#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtim.tv_nsec)
-#define ST_ATIM_NSEC_SET(stbuf, val) ((stbuf)->st_atim.tv_nsec = (val))
-#define ST_MTIM_NSEC_SET(stbuf, val) ((stbuf)->st_mtim.tv_nsec = (val))
-#define ST_CTIM_NSEC_SET(stbuf, val) ((stbuf)->st_ctim.tv_nsec = (val))
-#elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC)
-/* FreeBSD, NetBSD */
-#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atimespec.tv_nsec)
-#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctimespec.tv_nsec)
-#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtimespec.tv_nsec)
-#define ST_ATIM_NSEC_SET(stbuf, val) ((stbuf)->st_atimespec.tv_nsec = (val))
-#define ST_MTIM_NSEC_SET(stbuf, val) ((stbuf)->st_mtimespec.tv_nsec = (val))
-#define ST_CTIM_NSEC_SET(stbuf, val) ((stbuf)->st_ctimespec.tv_nsec = (val))
-#else
-#define ST_ATIM_NSEC(stbuf) (0)
-#define ST_CTIM_NSEC(stbuf) (0)
-#define ST_MTIM_NSEC(stbuf) (0)
-#define ST_ATIM_NSEC_SET(stbuf, val) do { } while (0);
-#define ST_MTIM_NSEC_SET(stbuf, val) do { } while (0);
-#define ST_CTIM_NSEC_SET(stbuf, val) do { } while (0);
-#endif
-
-#ifndef IXDR_GET_LONG
-#define IXDR_GET_LONG(buf) ((long)IXDR_GET_U_INT32(buf))
-#endif
-
-#ifndef IXDR_PUT_LONG
-#define IXDR_PUT_LONG(buf, v) ((long)IXDR_PUT_INT32(buf, (long)(v)))
-#endif
-
-#ifndef IXDR_GET_U_LONG
-#define IXDR_GET_U_LONG(buf) ((u_long)IXDR_GET_LONG(buf))
-#endif
-
-#ifndef IXDR_PUT_U_LONG
-#define IXDR_PUT_U_LONG(buf, v) IXDR_PUT_LONG(buf, (long)(v))
-#endif
-
-#if defined(__GNUC__) && !defined(RELAX_POISONING)
-/* Use run API, see run.h */
-#pragma GCC poison system popen
-#endif
-
-#endif /* __COMPAT_H__ */
diff --git a/libglusterfs/src/ctx.c b/libglusterfs/src/ctx.c
new file mode 100644
index 00000000000..3d890b04ec9
--- /dev/null
+++ b/libglusterfs/src/ctx.c
@@ -0,0 +1,97 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <pthread.h>
+
+#include "glusterfs/globals.h"
+#include "glusterfs/glusterfs.h"
+#include "timer-wheel.h"
+
+glusterfs_ctx_t *
+glusterfs_ctx_new()
+{
+ glusterfs_ctx_t *ctx = NULL;
+
+ /* no GF_CALLOC here, gf_acct_mem_set_enable is not
+ yet decided at this point */
+ ctx = CALLOC(1, sizeof(*ctx));
+ if (!ctx) {
+ goto out;
+ }
+
+ ctx->mem_acct_enable = gf_global_mem_acct_enable_get();
+
+ INIT_LIST_HEAD(&ctx->graphs);
+ INIT_LIST_HEAD(&ctx->mempool_list);
+ INIT_LIST_HEAD(&ctx->volfile_list);
+
+ ctx->daemon_pipe[0] = -1;
+ ctx->daemon_pipe[1] = -1;
+
+ ctx->log.loglevel = DEFAULT_LOG_LEVEL;
+
+#if defined(RUN_WITH_MEMCHECK)
+ ctx->cmd_args.vgtool = _gf_memcheck;
+#elif defined(RUN_WITH_DRD)
+ ctx->cmd_args.vgtool = _gf_drd;
+#else
+ ctx->cmd_args.vgtool = _gf_none;
+#endif
+
+ /* lock is never destroyed! */
+ if (LOCK_INIT(&ctx->lock)) {
+ free(ctx);
+ ctx = NULL;
+ goto out;
+ }
+
+ GF_ATOMIC_INIT(ctx->stats.max_dict_pairs, 0);
+ GF_ATOMIC_INIT(ctx->stats.total_pairs_used, 0);
+ GF_ATOMIC_INIT(ctx->stats.total_dicts_used, 0);
+out:
+ return ctx;
+}
+
+static void
+glusterfs_ctx_tw_destroy(struct gf_ctx_tw *ctx_tw)
+{
+ if (ctx_tw->timer_wheel)
+ gf_tw_cleanup_timers(ctx_tw->timer_wheel);
+
+ GF_FREE(ctx_tw);
+}
+
+struct tvec_base *
+glusterfs_ctx_tw_get(glusterfs_ctx_t *ctx)
+{
+ struct gf_ctx_tw *ctx_tw = NULL;
+
+ LOCK(&ctx->lock);
+ {
+ if (ctx->tw) {
+ ctx_tw = GF_REF_GET(ctx->tw);
+ } else {
+ ctx_tw = GF_CALLOC(1, sizeof(struct gf_ctx_tw),
+ gf_common_mt_tw_ctx);
+ ctx_tw->timer_wheel = gf_tw_init_timers();
+ GF_REF_INIT(ctx_tw, glusterfs_ctx_tw_destroy);
+ ctx->tw = ctx_tw;
+ }
+ }
+ UNLOCK(&ctx->lock);
+
+ return ctx_tw->timer_wheel;
+}
+
+void
+glusterfs_ctx_tw_put(glusterfs_ctx_t *ctx)
+{
+ GF_REF_PUT(ctx->tw);
+}
diff --git a/libglusterfs/src/daemon.c b/libglusterfs/src/daemon.c
index 348e3ad4083..0a3e5438325 100644
--- a/libglusterfs/src/daemon.c
+++ b/libglusterfs/src/daemon.c
@@ -8,59 +8,58 @@
cases as published by the Free Software Foundation.
*/
-#include <fcntl.h>
#include <unistd.h>
#include <stdio.h>
-#include "daemon.h"
+#include "glusterfs/daemon.h"
int
-os_daemon_return (int nochdir, int noclose)
+os_daemon_return(int nochdir, int noclose)
{
- pid_t pid = -1;
- int ret = -1;
- FILE *ptr = NULL;
+ pid_t pid = -1;
+ int ret = -1;
+ FILE *ptr = NULL;
- ret = fork();
- if (ret)
- return ret;
+ ret = fork();
+ if (ret)
+ return ret;
- pid = setsid();
+ pid = setsid();
- if (pid == -1) {
- ret = -1;
- goto out;
- }
+ if (pid == -1) {
+ ret = -1;
+ goto out;
+ }
- if (!nochdir)
- ret = chdir("/");
+ if (!nochdir)
+ ret = chdir("/");
- if (!noclose) {
- ptr = freopen (DEVNULLPATH, "r", stdin);
- if (!ptr)
- goto out;
+ if (!noclose) {
+ ptr = freopen(DEVNULLPATH, "r", stdin);
+ if (!ptr)
+ goto out;
- ptr = freopen (DEVNULLPATH, "w", stdout);
- if (!ptr)
- goto out;
+ ptr = freopen(DEVNULLPATH, "w", stdout);
+ if (!ptr)
+ goto out;
- ptr = freopen (DEVNULLPATH, "w", stderr);
- if (!ptr)
- goto out;
- }
+ ptr = freopen(DEVNULLPATH, "w", stderr);
+ if (!ptr)
+ goto out;
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-os_daemon (int nochdir, int noclose)
+os_daemon(int nochdir, int noclose)
{
- int ret = -1;
+ int ret = -1;
- ret = os_daemon_return (nochdir, noclose);
- if (ret <= 0)
- return ret;
+ ret = os_daemon_return(nochdir, noclose);
+ if (ret <= 0)
+ return ret;
- _exit (0);
+ _exit(0);
}
diff --git a/libglusterfs/src/default-args.c b/libglusterfs/src/default-args.c
new file mode 100644
index 00000000000..a0ba1cfb299
--- /dev/null
+++ b/libglusterfs/src/default-args.c
@@ -0,0 +1,1651 @@
+/*
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs/defaults.h"
+
+int
+args_lookup_store(default_args_t *args, loc_t *loc, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_lookup_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (inode)
+ args->inode = inode_ref(inode);
+ if (buf)
+ args->stat = *buf;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_stat_store(default_args_t *args, loc_t *loc, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_stat_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret == 0)
+ args->stat = *buf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fstat_store(default_args_t *args, fd_t *fd, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fstat_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (buf)
+ args->stat = *buf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_truncate_store(default_args_t *args, loc_t *loc, off_t off, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->offset = off;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_truncate_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (prebuf)
+ args->prestat = *prebuf;
+ if (postbuf)
+ args->poststat = *postbuf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_ftruncate_store(default_args_t *args, fd_t *fd, off_t off, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+
+ args->offset = off;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_ftruncate_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (prebuf)
+ args->prestat = *prebuf;
+ if (postbuf)
+ args->poststat = *postbuf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_access_store(default_args_t *args, loc_t *loc, int32_t mask, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->mask = mask;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_access_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_readlink_store(default_args_t *args, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->size = size;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_readlink_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, const char *path, struct iatt *stbuf,
+ dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (path)
+ args->buf = gf_strdup(path);
+ if (stbuf)
+ args->stat = *stbuf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_mknod_store(default_args_t *args, loc_t *loc, mode_t mode, dev_t rdev,
+ mode_t umask, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->mode = mode;
+ args->rdev = rdev;
+ args->umask = umask;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_mknod_cbk_store(default_args_cbk_t *args, int op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (inode)
+ args->inode = inode_ref(inode);
+ if (buf)
+ args->stat = *buf;
+ if (preparent)
+ args->preparent = *preparent;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_mkdir_store(default_args_t *args, loc_t *loc, mode_t mode, mode_t umask,
+ dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->mode = mode;
+ args->umask = umask;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_mkdir_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (inode)
+ args->inode = inode_ref(inode);
+ if (buf)
+ args->stat = *buf;
+ if (preparent)
+ args->preparent = *preparent;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_unlink_store(default_args_t *args, loc_t *loc, int xflag, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->xflag = xflag;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_unlink_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (preparent)
+ args->preparent = *preparent;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_rmdir_store(default_args_t *args, loc_t *loc, int flags, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->flags = flags;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_rmdir_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (preparent)
+ args->preparent = *preparent;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_symlink_store(default_args_t *args, const char *linkname, loc_t *loc,
+ mode_t umask, dict_t *xdata)
+{
+ args->linkname = gf_strdup(linkname);
+ args->umask = umask;
+ loc_copy(&args->loc, loc);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_symlink_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (inode)
+ args->inode = inode_ref(inode);
+ if (buf)
+ args->stat = *buf;
+ if (preparent)
+ args->preparent = *preparent;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_rename_store(default_args_t *args, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ loc_copy(&args->loc, oldloc);
+ loc_copy(&args->loc2, newloc);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_rename_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (buf)
+ args->stat = *buf;
+ if (preoldparent)
+ args->preparent = *preoldparent;
+ if (postoldparent)
+ args->postparent = *postoldparent;
+ if (prenewparent)
+ args->preparent2 = *prenewparent;
+ if (postnewparent)
+ args->postparent2 = *postnewparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_link_store(default_args_t *args, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ loc_copy(&args->loc, oldloc);
+ loc_copy(&args->loc2, newloc);
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_link_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (inode)
+ args->inode = inode_ref(inode);
+ if (buf)
+ args->stat = *buf;
+ if (preparent)
+ args->preparent = *preparent;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_create_store(default_args_t *args, loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->flags = flags;
+ args->mode = mode;
+ args->umask = umask;
+ if (fd)
+ args->fd = fd_ref(fd);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_create_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (fd)
+ args->fd = fd_ref(fd);
+ if (inode)
+ args->inode = inode_ref(inode);
+ if (buf)
+ args->stat = *buf;
+ if (preparent)
+ args->preparent = *preparent;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_open_store(default_args_t *args, loc_t *loc, int32_t flags, fd_t *fd,
+ dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->flags = flags;
+ if (fd)
+ args->fd = fd_ref(fd);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_open_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ fd_t *fd, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (fd)
+ args->fd = fd_ref(fd);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_readv_store(default_args_t *args, fd_t *fd, size_t size, off_t off,
+ uint32_t flags, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+ args->size = size;
+ args->offset = off;
+ args->flags = flags;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_readv_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret >= 0) {
+ args->vector = iov_dup(vector, count);
+ args->count = count;
+ args->stat = *stbuf;
+ args->iobref = iobref_ref(iobref);
+ }
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_writev_store(default_args_t *args, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+ args->vector = iov_dup(vector, count);
+ args->count = count;
+ args->offset = off;
+ args->flags = flags;
+ args->iobref = iobref_ref(iobref);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_writev_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret >= 0)
+ args->poststat = *postbuf;
+ if (prebuf)
+ args->prestat = *prebuf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_put_store(default_args_t *args, loc_t *loc, mode_t mode, mode_t umask,
+ uint32_t flags, struct iovec *vector, int32_t count, off_t off,
+ struct iobref *iobref, dict_t *xattr, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->mode = mode;
+ args->umask = umask;
+ args->flags = flags;
+ args->vector = iov_dup(vector, count);
+ args->count = count;
+ args->offset = off;
+ args->iobref = iobref_ref(iobref);
+ if (xattr)
+ args->xattr = dict_ref(xattr);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_put_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret >= 0)
+ args->stat = *buf;
+ if (inode)
+ args->inode = inode_ref(inode);
+ if (preparent)
+ args->preparent = *preparent;
+ if (postparent)
+ args->postparent = *postparent;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+int
+args_flush_store(default_args_t *args, fd_t *fd, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_flush_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fsync_store(default_args_t *args, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+ args->datasync = datasync;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_fsync_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (prebuf)
+ args->prestat = *prebuf;
+ if (postbuf)
+ args->poststat = *postbuf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_opendir_store(default_args_t *args, loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ if (fd)
+ args->fd = fd_ref(fd);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_opendir_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (fd)
+ args->fd = fd_ref(fd);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fsyncdir_store(default_args_t *args, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+ args->datasync = datasync;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+int
+args_fsyncdir_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_statfs_store(default_args_t *args, loc_t *loc, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_statfs_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct statvfs *buf, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret == 0)
+ args->statvfs = *buf;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_setxattr_store(default_args_t *args, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ /* TODO */
+ if (dict)
+ args->xattr = dict_ref(dict);
+ args->flags = flags;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_setxattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_getxattr_store(default_args_t *args, loc_t *loc, const char *name,
+ dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+
+ if (name)
+ args->name = gf_strdup(name);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_getxattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (dict)
+ args->xattr = dict_ref(dict);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fsetxattr_store(default_args_t *args, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ args->fd = fd_ref(fd);
+
+ if (dict)
+ args->xattr = dict_ref(dict);
+ args->flags = flags;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_fsetxattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fgetxattr_store(default_args_t *args, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ args->fd = fd_ref(fd);
+
+ if (name)
+ args->name = gf_strdup(name);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_fgetxattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (dict)
+ args->xattr = dict_ref(dict);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_removexattr_store(default_args_t *args, loc_t *loc, const char *name,
+ dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->name = gf_strdup(name);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_removexattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fremovexattr_store(default_args_t *args, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ args->fd = fd_ref(fd);
+ args->name = gf_strdup(name);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_fremovexattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_lk_store(default_args_t *args, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+ args->cmd = cmd;
+ args->lock = *lock;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_lk_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret == 0)
+ args->lock = *lock;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_inodelk_store(default_args_t *args, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ if (volume)
+ args->volume = gf_strdup(volume);
+
+ loc_copy(&args->loc, loc);
+ args->cmd = cmd;
+ args->lock = *lock;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_inodelk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_finodelk_store(default_args_t *args, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+
+ if (volume)
+ args->volume = gf_strdup(volume);
+
+ args->cmd = cmd;
+ args->lock = *lock;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_finodelk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_entrylk_store(default_args_t *args, const char *volume, loc_t *loc,
+ const char *name, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ if (volume)
+ args->volume = gf_strdup(volume);
+
+ loc_copy(&args->loc, loc);
+
+ args->entrylkcmd = cmd;
+ args->entrylktype = type;
+
+ if (name)
+ args->name = gf_strdup(name);
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_entrylk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fentrylk_store(default_args_t *args, const char *volume, fd_t *fd,
+ const char *name, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ if (volume)
+ args->volume = gf_strdup(volume);
+
+ if (fd)
+ args->fd = fd_ref(fd);
+ args->entrylkcmd = cmd;
+ args->entrylktype = type;
+ if (name)
+ args->name = gf_strdup(name);
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_fentrylk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_readdirp_store(default_args_t *args, fd_t *fd, size_t size, off_t off,
+ dict_t *xdata)
+{
+ args->fd = fd_ref(fd);
+ args->size = size;
+ args->offset = off;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_readdirp_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *stub_entry = NULL, *entry = NULL;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret > 0) {
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ stub_entry = gf_dirent_for_name(entry->d_name);
+ if (!stub_entry)
+ goto out;
+ stub_entry->d_off = entry->d_off;
+ stub_entry->d_ino = entry->d_ino;
+ stub_entry->d_stat = entry->d_stat;
+ stub_entry->d_type = entry->d_type;
+ if (entry->inode)
+ stub_entry->inode = inode_ref(entry->inode);
+ if (entry->dict)
+ stub_entry->dict = dict_ref(entry->dict);
+ list_add_tail(&stub_entry->list, &args->entries.list);
+ }
+ }
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+out:
+ return 0;
+}
+
+int
+args_readdir_store(default_args_t *args, fd_t *fd, size_t size, off_t off,
+ dict_t *xdata)
+{
+ args->fd = fd_ref(fd);
+ args->size = size;
+ args->offset = off;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_readdir_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
+{
+ gf_dirent_t *stub_entry = NULL, *entry = NULL;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret > 0) {
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ stub_entry = gf_dirent_for_name(entry->d_name);
+ if (!stub_entry)
+ goto out;
+ stub_entry->d_off = entry->d_off;
+ stub_entry->d_ino = entry->d_ino;
+ stub_entry->d_type = entry->d_type;
+ list_add_tail(&stub_entry->list, &args->entries.list);
+ }
+ }
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+out:
+ return 0;
+}
+
+int
+args_rchecksum_store(default_args_t *args, fd_t *fd, off_t offset, int32_t len,
+ dict_t *xdata)
+{
+ args->fd = fd_ref(fd);
+ args->offset = offset;
+ args->size = len;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_rchecksum_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret >= 0) {
+ args->weak_checksum = weak_checksum;
+ args->strong_checksum = gf_memdup(strong_checksum,
+ SHA256_DIGEST_LENGTH);
+ }
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_xattrop_store(default_args_t *args, loc_t *loc, gf_xattrop_flags_t optype,
+ dict_t *xattr, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+
+ args->optype = optype;
+ args->xattr = dict_ref(xattr);
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_xattrop_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xattr)
+ args->xattr = dict_ref(xattr);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fxattrop_store(default_args_t *args, fd_t *fd, gf_xattrop_flags_t optype,
+ dict_t *xattr, dict_t *xdata)
+{
+ args->fd = fd_ref(fd);
+
+ args->optype = optype;
+ args->xattr = dict_ref(xattr);
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_fxattrop_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xattr)
+ args->xattr = dict_ref(xattr);
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_setattr_store(default_args_t *args, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+
+ if (stbuf)
+ args->stat = *stbuf;
+
+ args->valid = valid;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_setattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (statpre)
+ args->prestat = *statpre;
+ if (statpost)
+ args->poststat = *statpost;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fsetattr_store(default_args_t *args, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+
+ if (stbuf)
+ args->stat = *stbuf;
+
+ args->valid = valid;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+int
+args_fsetattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (statpre)
+ args->prestat = *statpre;
+ if (statpost)
+ args->poststat = *statpost;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_fallocate_store(default_args_t *args, fd_t *fd, int32_t mode, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+
+ args->flags = mode;
+ args->offset = offset;
+ args->size = len;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_fallocate_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (statpre)
+ args->prestat = *statpre;
+ if (statpost)
+ args->poststat = *statpost;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_discard_store(default_args_t *args, fd_t *fd, off_t offset, size_t len,
+ dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+
+ args->offset = offset;
+ args->size = len;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_discard_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (statpre)
+ args->prestat = *statpre;
+ if (statpost)
+ args->poststat = *statpost;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_zerofill_store(default_args_t *args, fd_t *fd, off_t offset, off_t len,
+ dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+
+ args->offset = offset;
+ args->size = len;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_zerofill_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (statpre)
+ args->prestat = *statpre;
+ if (statpost)
+ args->poststat = *statpost;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_ipc_store(default_args_t *args, int32_t op, dict_t *xdata)
+{
+ args->cmd = op;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_ipc_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_seek_store(default_args_t *args, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ if (fd)
+ args->fd = fd_ref(fd);
+
+ args->offset = offset;
+ args->what = what;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_seek_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ off_t offset, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ args->offset = offset;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_getactivelk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, lock_migration_info_t *locklist,
+ dict_t *xdata)
+{
+ lock_migration_info_t *stub_entry = NULL, *entry = NULL;
+ int ret = 0;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ /*op_ret needs to carry the number of locks present in the list*/
+ if (op_ret > 0) {
+ list_for_each_entry(entry, &locklist->list, list)
+ {
+ stub_entry = GF_CALLOC(1, sizeof(*stub_entry), gf_common_mt_char);
+ if (!stub_entry) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&stub_entry->list);
+ stub_entry->flock = entry->flock;
+
+ stub_entry->lk_flags = entry->lk_flags;
+
+ stub_entry->client_uid = gf_strdup(entry->client_uid);
+ if (!stub_entry->client_uid) {
+ GF_FREE(stub_entry);
+ ret = -1;
+ goto out;
+ }
+
+ list_add_tail(&stub_entry->list, &args->locklist.list);
+ }
+ }
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+out:
+ return ret;
+}
+
+int
+args_setactivelk_store(default_args_t *args, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata)
+{
+ lock_migration_info_t *stub_entry = NULL, *entry = NULL;
+ int ret = 0;
+
+ list_for_each_entry(entry, &locklist->list, list)
+ {
+ stub_entry = GF_CALLOC(1, sizeof(*stub_entry), gf_common_mt_lock_mig);
+ if (!stub_entry) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&stub_entry->list);
+ stub_entry->flock = entry->flock;
+
+ stub_entry->lk_flags = entry->lk_flags;
+
+ stub_entry->client_uid = gf_strdup(entry->client_uid);
+ if (!stub_entry->client_uid) {
+ GF_FREE(stub_entry);
+ ret = -1;
+ goto out;
+ }
+
+ list_add_tail(&stub_entry->list, &args->locklist.list);
+ }
+
+ loc_copy(&args->loc, loc);
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+out:
+ return ret;
+}
+
+void
+args_lease_store(default_args_t *args, loc_t *loc, struct gf_lease *lease,
+ dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->lease = *lease;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return;
+}
+
+void
+args_lease_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct gf_lease *lease, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret == 0)
+ args->lease = *lease;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+}
+
+int
+args_icreate_store(default_args_t *args, loc_t *loc, mode_t mode, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+ args->mode = mode;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_namelink_store(default_args_t *args, loc_t *loc, dict_t *xdata)
+{
+ loc_copy(&args->loc, loc);
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ return 0;
+}
+
+int
+args_copy_file_range_store(default_args_t *args, fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, dict_t *xdata)
+{
+ if (fd_in)
+ args->fd = fd_ref(fd_in);
+ if (fd_out)
+ args->fd_dst = fd_ref(fd_out);
+ args->size = len;
+ args->off_in = off_in;
+ args->off_out = off_out;
+ args->flags = flags;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+int
+args_copy_file_range_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata)
+{
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (op_ret >= 0) {
+ if (postbuf_dst)
+ args->poststat = *postbuf_dst;
+ if (prebuf_dst)
+ args->prestat = *prebuf_dst;
+ if (stbuf)
+ args->stat = *stbuf;
+ }
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ return 0;
+}
+
+void
+args_cbk_wipe(default_args_cbk_t *args_cbk)
+{
+ if (!args_cbk)
+ return;
+ if (args_cbk->inode)
+ inode_unref(args_cbk->inode);
+
+ GF_FREE((char *)args_cbk->buf);
+
+ GF_FREE(args_cbk->vector);
+
+ if (args_cbk->iobref)
+ iobref_unref(args_cbk->iobref);
+
+ if (args_cbk->fd)
+ fd_unref(args_cbk->fd);
+
+ if (args_cbk->xattr)
+ dict_unref(args_cbk->xattr);
+
+ GF_FREE(args_cbk->strong_checksum);
+
+ if (args_cbk->xdata)
+ dict_unref(args_cbk->xdata);
+
+ if (!list_empty(&args_cbk->entries.list))
+ gf_dirent_free(&args_cbk->entries);
+}
+
+void
+args_wipe(default_args_t *args)
+{
+ if (!args)
+ return;
+
+ loc_wipe(&args->loc);
+
+ loc_wipe(&args->loc2);
+
+ if (args->fd)
+ fd_unref(args->fd);
+
+ GF_FREE((char *)args->linkname);
+
+ GF_FREE(args->vector);
+
+ if (args->iobref)
+ iobref_unref(args->iobref);
+
+ if (args->xattr)
+ dict_unref(args->xattr);
+
+ if (args->xdata)
+ dict_unref(args->xdata);
+
+ GF_FREE((char *)args->name);
+
+ GF_FREE((char *)args->volume);
+}
+
+void
+args_cbk_init(default_args_cbk_t *args_cbk)
+{
+ INIT_LIST_HEAD(&args_cbk->entries);
+}
diff --git a/libglusterfs/src/defaults-tmpl.c b/libglusterfs/src/defaults-tmpl.c
new file mode 100644
index 00000000000..3cf707f42aa
--- /dev/null
+++ b/libglusterfs/src/defaults-tmpl.c
@@ -0,0 +1,247 @@
+/*
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* libglusterfs/src/defaults.c:
+ This file contains functions, which are used to fill the 'fops', 'cbk'
+ structures in the xlator structures, if they are not written. Here, all the
+ function calls are plainly forwarded to the first child of the xlator, and
+ all the *_cbk function does plain STACK_UNWIND of the frame, and returns.
+
+ This function also implements *_resume () functions, which does same
+ operation as a fop().
+
+ All the functions are plain enough to understand.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "glusterfs/xlator.h"
+#include "glusterfs/defaults.h"
+
+#pragma generate
+
+struct xlator_fops _default_fops = {
+ .create = default_create,
+ .open = default_open,
+ .stat = default_stat,
+ .readlink = default_readlink,
+ .mknod = default_mknod,
+ .mkdir = default_mkdir,
+ .unlink = default_unlink,
+ .rmdir = default_rmdir,
+ .symlink = default_symlink,
+ .rename = default_rename,
+ .link = default_link,
+ .truncate = default_truncate,
+ .readv = default_readv,
+ .writev = default_writev,
+ .statfs = default_statfs,
+ .flush = default_flush,
+ .fsync = default_fsync,
+ .setxattr = default_setxattr,
+ .getxattr = default_getxattr,
+ .fsetxattr = default_fsetxattr,
+ .fgetxattr = default_fgetxattr,
+ .removexattr = default_removexattr,
+ .fremovexattr = default_fremovexattr,
+ .opendir = default_opendir,
+ .readdir = default_readdir,
+ .readdirp = default_readdirp,
+ .fsyncdir = default_fsyncdir,
+ .access = default_access,
+ .ftruncate = default_ftruncate,
+ .fstat = default_fstat,
+ .lk = default_lk,
+ .inodelk = default_inodelk,
+ .finodelk = default_finodelk,
+ .entrylk = default_entrylk,
+ .fentrylk = default_fentrylk,
+ .lookup = default_lookup,
+ .rchecksum = default_rchecksum,
+ .xattrop = default_xattrop,
+ .fxattrop = default_fxattrop,
+ .setattr = default_setattr,
+ .fsetattr = default_fsetattr,
+ .fallocate = default_fallocate,
+ .discard = default_discard,
+ .zerofill = default_zerofill,
+ .ipc = default_ipc,
+ .seek = default_seek,
+
+ .getspec = default_getspec,
+ .getactivelk = default_getactivelk,
+ .setactivelk = default_setactivelk,
+ .put = default_put,
+ .icreate = default_icreate,
+ .namelink = default_namelink,
+ .copy_file_range = default_copy_file_range,
+};
+struct xlator_fops *default_fops = &_default_fops;
+
+/*
+ * Remaining functions don't follow the fop calling conventions, so they're
+ * not generated.
+ */
+
+int32_t
+default_forget(xlator_t *this, inode_t *inode)
+{
+ gf_log_callingfn(this->name, GF_LOG_DEBUG,
+ "xlator does not "
+ "implement forget_cbk");
+ return 0;
+}
+
+int32_t
+default_releasedir(xlator_t *this, fd_t *fd)
+{
+ gf_log_callingfn(this->name, GF_LOG_DEBUG,
+ "xlator does not "
+ "implement releasedir_cbk");
+ return 0;
+}
+
+int32_t
+default_release(xlator_t *this, fd_t *fd)
+{
+ gf_log_callingfn(this->name, GF_LOG_DEBUG,
+ "xlator does not "
+ "implement release_cbk");
+ return 0;
+}
+
+/* notify */
+int
+default_notify(xlator_t *this, int32_t event, void *data, ...)
+{
+ GF_UNUSED int ret = 0;
+ xlator_t *victim = data;
+
+ glusterfs_graph_t *graph = NULL;
+
+ GF_VALIDATE_OR_GOTO("notify", this, out);
+ graph = this->graph;
+ GF_VALIDATE_OR_GOTO(this->name, graph, out);
+
+ switch (event) {
+ case GF_EVENT_PARENT_UP:
+ case GF_EVENT_PARENT_DOWN: {
+ xlator_list_t *list = this->children;
+
+ while (list) {
+ if (victim && victim->cleanup_starting)
+ xlator_notify(list->xlator, event, victim);
+ else
+ xlator_notify(list->xlator, event, this);
+ list = list->next;
+ }
+ } break;
+ case GF_EVENT_CHILD_CONNECTING:
+ case GF_EVENT_CHILD_DOWN:
+ case GF_EVENT_CHILD_UP:
+ case GF_EVENT_AUTH_FAILED: {
+ xlator_list_t *parent = this->parents;
+
+ /*
+ * Handle case of CHILD_* & AUTH_FAILED event specially, send
+ * it to fuse.
+ */
+ if (!parent && this->ctx && this->ctx->master) {
+ xlator_notify(this->ctx->master, event, this->graph, NULL);
+ }
+
+ while (parent) {
+ if (parent->xlator->init_succeeded)
+ xlator_notify(parent->xlator, event, this, NULL);
+ parent = parent->next;
+ }
+
+ if (event == GF_EVENT_CHILD_DOWN &&
+ !(this->ctx && this->ctx->master) && (graph->top == this)) {
+ /* Make sure this is not a daemon with master xlator */
+ pthread_mutex_lock(&graph->mutex);
+ {
+ if (graph->parent_down ==
+ graph_total_client_xlator(graph)) {
+ graph->used = 0;
+ pthread_cond_broadcast(&graph->child_down_cond);
+ }
+ }
+ pthread_mutex_unlock(&graph->mutex);
+ }
+ } break;
+ case GF_EVENT_UPCALL: {
+ xlator_list_t *parent = this->parents;
+
+ if (!parent && this->ctx && this->ctx->master)
+ xlator_notify(this->ctx->master, event, data, NULL);
+
+ while (parent) {
+ if (parent->xlator->init_succeeded)
+ xlator_notify(parent->xlator, event, data, NULL);
+ parent = parent->next;
+ }
+ } break;
+ case GF_EVENT_CHILD_PING: {
+ xlator_list_t *parent = this->parents;
+
+ while (parent) {
+ if (parent->xlator->init_succeeded)
+ XLATOR_NOTIFY(ret, parent->xlator, event, this, data);
+ parent = parent->next;
+ }
+ } break;
+ case GF_EVENT_CLEANUP: {
+ xlator_list_t *list = this->children;
+
+ while (list) {
+ xlator_notify(list->xlator, event, this);
+ list = list->next;
+ }
+ } break;
+
+ default: {
+ xlator_list_t *parent = this->parents;
+
+ while (parent) {
+ if (parent->xlator->init_succeeded)
+ xlator_notify(parent->xlator, event, this, NULL);
+ parent = parent->next;
+ }
+ }
+ /*
+ * Apparently our picky-about-everything else coding standard allows
+ * adjacent same-indendation-level close braces. Clearly it has
+ * nothing to do with readability.
+ */
+ }
+out:
+ return 0;
+}
+
+int32_t
+default_mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init(this, gf_common_mt_end);
+
+ return ret;
+}
+
+void
+default_fini(xlator_t *this)
+{
+ if (this && this->private)
+ GF_FREE(this->private);
+}
diff --git a/libglusterfs/src/defaults.c b/libglusterfs/src/defaults.c
deleted file mode 100644
index bf4d019349d..00000000000
--- a/libglusterfs/src/defaults.c
+++ /dev/null
@@ -1,1354 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/* libglusterfs/src/defaults.c:
- This file contains functions, which are used to fill the 'fops', 'cbk'
- structures in the xlator structures, if they are not written. Here, all the
- function calls are plainly forwared to the first child of the xlator, and
- all the *_cbk function does plain STACK_UNWIND of the frame, and returns.
-
- This function also implements *_resume () functions, which does same
- operation as a fop().
-
- All the functions are plain enough to understand.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-
-/* _CBK function section */
-
-int32_t
-default_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata, struct iatt *postparent)
-{
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
- xdata, postparent);
- return 0;
-}
-
-int32_t
-default_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
-
-
-int32_t
-default_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
- postbuf, xdata);
- return 0;
-}
-
-int32_t
-default_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
- postbuf, xdata);
- return 0;
-}
-
-int32_t
-default_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-default_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *buf, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf,
- xdata);
- return 0;
-}
-
-
-int32_t
-default_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode,
- buf, preparent, postparent, xdata);
- return 0;
-}
-
-int32_t
-default_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode,
- buf, preparent, postparent, xdata);
- return 0;
-}
-
-int32_t
-default_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
- postparent, xdata);
- return 0;
-}
-
-int32_t
-default_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
- postparent, xdata);
- return 0;
-}
-
-
-int32_t
-default_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
-}
-
-
-int32_t
-default_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent, xdata);
- return 0;
-}
-
-
-int32_t
-default_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
-}
-
-
-int32_t
-default_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata);
- return 0;
-}
-
-int32_t
-default_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
- return 0;
-}
-
-int32_t
-default_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref, xdata);
- return 0;
-}
-
-
-int32_t
-default_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
- return 0;
-}
-
-
-int32_t
-default_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-
-
-int32_t
-default_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
- return 0;
-}
-
-int32_t
-default_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
-
-int32_t
-default_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
- return 0;
-}
-
-int32_t
-default_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-default_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
-
-
-int32_t
-default_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-
-int32_t
-default_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-
-
-int32_t
-default_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
-
-
-int32_t
-default_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
-
-int32_t
-default_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
-
-int32_t
-default_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
-
-
-int32_t
-default_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-
-int32_t
-default_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-default_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
- return 0;
-}
-
-int32_t
-default_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-
-int32_t
-default_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-default_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-default_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-
-int32_t
-default_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
- uint8_t *strong_checksum,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum,
- strong_checksum, xdata);
- return 0;
-}
-
-
-int32_t
-default_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
- return 0;
-}
-
-
-int32_t
-default_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
- return 0;
-}
-
-int32_t
-default_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre,
- statpost, xdata);
- return 0;
-}
-
-int32_t
-default_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, statpre,
- statpost, xdata);
- return 0;
-}
-
-int32_t
-default_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, char *spec_data)
-{
- STACK_UNWIND_STRICT (getspec, frame, op_ret, op_errno, spec_data);
- return 0;
-}
-
-/* RESUME */
-
-int32_t
-default_fgetxattr_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
- return 0;
-}
-
-int32_t
-default_fsetxattr_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
- return 0;
-}
-
-int32_t
-default_setxattr_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
- return 0;
-}
-
-int32_t
-default_statfs_resume (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- STACK_WIND (frame, default_statfs_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs, loc, xdata);
- return 0;
-}
-
-int32_t
-default_fsyncdir_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_fsyncdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata);
- return 0;
-}
-
-int32_t
-default_opendir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_opendir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
- return 0;
-}
-
-int32_t
-default_fstat_resume (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
- return 0;
-}
-
-int32_t
-default_fsync_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
- return 0;
-}
-
-int32_t
-default_flush_resume (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd, xdata);
- return 0;
-}
-
-int32_t
-default_writev_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t off,
- uint32_t flags, struct iobref *iobref, dict_t *xdata)
-{
- STACK_WIND (frame, default_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, off,
- flags, iobref, xdata);
- return 0;
-}
-
-int32_t
-default_readv_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset, uint32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
- return 0;
-}
-
-
-int32_t
-default_open_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
- return 0;
-}
-
-int32_t
-default_create_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
- fd, xdata);
- return 0;
-}
-
-int32_t
-default_link_resume (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
-{
- STACK_WIND (frame, default_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
- return 0;
-}
-
-int32_t
-default_rename_resume (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
-{
- STACK_WIND (frame, default_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
- return 0;
-}
-
-
-int
-default_symlink_resume (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, mode_t umask,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_symlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
- xdata);
- return 0;
-}
-
-int32_t
-default_rmdir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
- return 0;
-}
-
-int32_t
-default_unlink_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int xflag, dict_t *xdata)
-{
- STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
- return 0;
-}
-
-int
-default_mkdir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, mode_t umask, dict_t *xdata)
-{
- STACK_WIND (frame, default_mkdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
- return 0;
-}
-
-
-int
-default_mknod_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
-{
- STACK_WIND (frame, default_mknod_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
- xdata);
- return 0;
-}
-
-int32_t
-default_readlink_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- size_t size, dict_t *xdata)
-{
- STACK_WIND (frame, default_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
- return 0;
-}
-
-
-int32_t
-default_access_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t mask, dict_t *xdata)
-{
- STACK_WIND (frame, default_access_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access, loc, mask, xdata);
- return 0;
-}
-
-int32_t
-default_ftruncate_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset, dict_t *xdata)
-{
- STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
- return 0;
-}
-
-int32_t
-default_getxattr_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
- return 0;
-}
-
-
-int32_t
-default_xattrop_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
-{
- STACK_WIND (frame, default_xattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata);
- return 0;
-}
-
-int32_t
-default_fxattrop_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
-{
- STACK_WIND (frame, default_fxattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata);
- return 0;
-}
-
-int32_t
-default_removexattr_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
- return 0;
-}
-
-int32_t
-default_fremovexattr_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- STACK_WIND (frame, default_fremovexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
- return 0;
-}
-
-int32_t
-default_lk_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t cmd, struct gf_flock *lock, dict_t *xdata)
-{
- STACK_WIND (frame, default_lk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata);
- return 0;
-}
-
-
-int32_t
-default_inodelk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *lock,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_inodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- volume, loc, cmd, lock, xdata);
- return 0;
-}
-
-int32_t
-default_finodelk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *lock,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk,
- volume, fd, cmd, lock, xdata);
- return 0;
-}
-
-int32_t
-default_entrylk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_entrylk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->entrylk,
- volume, loc, basename, cmd, type, xdata);
- return 0;
-}
-
-int32_t
-default_fentrylk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_fentrylk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fentrylk,
- volume, fd, basename, cmd, type, xdata);
- return 0;
-}
-
-int32_t
-default_rchecksum_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset, int32_t len,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_rchecksum_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rchecksum, fd, offset, len, xdata);
- return 0;
-}
-
-
-int32_t
-default_readdir_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_readdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata);
- return 0;
-}
-
-
-int32_t
-default_readdirp_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off, dict_t *xdata)
-{
- STACK_WIND (frame, default_readdirp_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
- return 0;
-}
-
-int32_t
-default_setattr_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
- return 0;
-}
-
-int32_t
-default_truncate_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
- return 0;
-}
-
-int32_t
-default_stat_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
- return 0;
-}
-
-int32_t
-default_lookup_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
- return 0;
-}
-
-int32_t
-default_fsetattr_resume (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, xdata);
- return 0;
-}
-
-/* FOPS */
-
-int32_t
-default_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
- return 0;
-}
-
-int32_t
-default_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
- return 0;
-}
-
-int32_t
-default_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
- return 0;
-}
-
-int32_t
-default_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- STACK_WIND (frame, default_statfs_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs, loc, xdata);
- return 0;
-}
-
-int32_t
-default_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_fsyncdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata);
- return 0;
-}
-
-int32_t
-default_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_opendir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
- return 0;
-}
-
-int32_t
-default_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_fstat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
- return 0;
-}
-
-int32_t
-default_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_fsync_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
- return 0;
-}
-
-int32_t
-default_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_flush_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd, xdata);
- return 0;
-}
-
-int32_t
-default_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t off, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
-{
- STACK_WIND (frame, default_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, off,
- flags, iobref, xdata);
- return 0;
-}
-
-int32_t
-default_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame, default_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
- return 0;
-}
-
-
-int32_t
-default_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
- return 0;
-}
-
-int32_t
-default_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame, default_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
- fd, xdata);
- return 0;
-}
-
-int32_t
-default_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
- return 0;
-}
-
-int32_t
-default_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
-{
- STACK_WIND (frame, default_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
- return 0;
-}
-
-
-int
-default_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, mode_t umask, dict_t *xdata)
-{
- STACK_WIND (frame, default_symlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
- xdata);
- return 0;
-}
-
-int32_t
-default_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
- return 0;
-}
-
-int32_t
-default_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
- return 0;
-}
-
-int
-default_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
-{
- STACK_WIND (frame, default_mkdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
- return 0;
-}
-
-
-int
-default_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata)
-{
- STACK_WIND (frame, default_mknod_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
- xdata);
- return 0;
-}
-
-int32_t
-default_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, dict_t *xdata)
-{
- STACK_WIND (frame, default_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
- return 0;
-}
-
-
-int32_t
-default_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, dict_t *xdata)
-{
- STACK_WIND (frame, default_access_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access, loc, mask, xdata);
- return 0;
-}
-
-int32_t
-default_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
-{
- STACK_WIND (frame, default_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
- return 0;
-}
-
-int32_t
-default_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
- return 0;
-}
-
-
-int32_t
-default_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
-{
- STACK_WIND (frame, default_xattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata);
- return 0;
-}
-
-int32_t
-default_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
-{
- STACK_WIND (frame, default_fxattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata);
- return 0;
-}
-
-int32_t
-default_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- STACK_WIND (frame, default_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
- return 0;
-}
-
-int32_t
-default_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- STACK_WIND (frame, default_fremovexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
- return 0;
-}
-
-int32_t
-default_lk (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t cmd, struct gf_flock *lock, dict_t *xdata)
-{
- STACK_WIND (frame, default_lk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata);
- return 0;
-}
-
-
-int32_t
-default_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *lock,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_inodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- volume, loc, cmd, lock, xdata);
- return 0;
-}
-
-int32_t
-default_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_finodelk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk,
- volume, fd, cmd, lock, xdata);
- return 0;
-}
-
-int32_t
-default_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_entrylk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->entrylk,
- volume, loc, basename, cmd, type, xdata);
- return 0;
-}
-
-int32_t
-default_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_fentrylk_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fentrylk,
- volume, fd, basename, cmd, type, xdata);
- return 0;
-}
-
-int32_t
-default_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- int32_t len,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_rchecksum_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rchecksum, fd, offset, len, xdata);
- return 0;
-}
-
-
-int32_t
-default_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_readdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata);
- return 0;
-}
-
-
-int32_t
-default_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off, dict_t *xdata)
-{
- STACK_WIND (frame, default_readdirp_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
- return 0;
-}
-
-int32_t
-default_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
- return 0;
-}
-
-int32_t
-default_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
- return 0;
-}
-
-int32_t
-default_stat (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
- return 0;
-}
-
-int32_t
-default_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xdata);
- return 0;
-}
-
-int32_t
-default_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid,
- dict_t *xdata)
-{
- STACK_WIND (frame, default_fsetattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, xdata);
- return 0;
-}
-
-
-int32_t
-default_forget (xlator_t *this, inode_t *inode)
-{
- gf_log_callingfn (this->name, GF_LOG_WARNING, "xlator does not "
- "implement forget_cbk");
- return 0;
-}
-
-
-int32_t
-default_releasedir (xlator_t *this, fd_t *fd)
-{
- gf_log_callingfn (this->name, GF_LOG_WARNING, "xlator does not "
- "implement releasedir_cbk");
- return 0;
-}
-
-int32_t
-default_release (xlator_t *this, fd_t *fd)
-{
- gf_log_callingfn (this->name, GF_LOG_WARNING, "xlator does not "
- "implement release_cbk");
- return 0;
-}
-
-/* End of FOP/_CBK/_RESUME section */
-
-
-/* Management operations */
-
-int32_t
-default_getspec (call_frame_t *frame, xlator_t *this, const char *key,
- int32_t flags)
-{
- STACK_WIND (frame, default_getspec_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getspec, key, flags);
- return 0;
-}
-
-/* notify */
-int
-default_notify (xlator_t *this, int32_t event, void *data, ...)
-{
- switch (event)
- {
- case GF_EVENT_PARENT_UP:
- case GF_EVENT_PARENT_DOWN:
- {
- xlator_list_t *list = this->children;
-
- while (list) {
- xlator_notify (list->xlator, event, this);
- list = list->next;
- }
- }
- break;
- case GF_EVENT_CHILD_CONNECTING:
- case GF_EVENT_CHILD_MODIFIED:
- case GF_EVENT_CHILD_DOWN:
- case GF_EVENT_CHILD_UP:
- case GF_EVENT_AUTH_FAILED:
- {
- xlator_list_t *parent = this->parents;
- /* Handle case of CHILD_* & AUTH_FAILED event specially, send it to fuse */
- if (!parent && this->ctx && this->ctx->master)
- xlator_notify (this->ctx->master, event, this->graph, NULL);
-
- while (parent) {
- if (parent->xlator->init_succeeded)
- xlator_notify (parent->xlator, event,
- this, NULL);
- parent = parent->next;
- }
- }
- break;
- default:
- {
- xlator_list_t *parent = this->parents;
- while (parent) {
- if (parent->xlator->init_succeeded)
- xlator_notify (parent->xlator, event,
- this, NULL);
- parent = parent->next;
- }
- }
- }
-
- return 0;
-}
-
-int32_t
-default_mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- ret = xlator_mem_acct_init (this, gf_common_mt_end);
-
- return ret;
-}
diff --git a/libglusterfs/src/defaults.h b/libglusterfs/src/defaults.h
deleted file mode 100644
index 8a9de7899d3..00000000000
--- a/libglusterfs/src/defaults.h
+++ /dev/null
@@ -1,673 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/* libglusterfs/src/defaults.h:
- This file contains definition of default fops and mops functions.
-*/
-
-#ifndef _DEFAULTS_H
-#define _DEFAULTS_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-
-int32_t default_notify (xlator_t *this,
- int32_t event,
- void *data,
- ...);
-
-int32_t default_forget (xlator_t *this, inode_t *inode);
-
-int32_t default_release (xlator_t *this, fd_t *fd);
-
-int32_t default_releasedir (xlator_t *this, fd_t *fd);
-
-
-/* Management Operations */
-
-int32_t default_getspec (call_frame_t *frame,
- xlator_t *this,
- const char *key,
- int32_t flag);
-
-int32_t default_rchecksum (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, off_t offset,
- int32_t len, dict_t *xdata);
-
-/* FileSystem operations */
-int32_t default_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xdata);
-
-int32_t default_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-int32_t default_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-int32_t default_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset, dict_t *xdata);
-
-int32_t default_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset, dict_t *xdata);
-
-int32_t default_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask, dict_t *xdata);
-
-int32_t default_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size, dict_t *xdata);
-
-int32_t default_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
-
-int32_t default_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
-
-int32_t default_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata);
-
-int32_t default_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata);
-
-int32_t default_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, mode_t umask,
- dict_t *xdata);
-
-int32_t default_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-int32_t default_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-int32_t default_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata);
-
-int32_t default_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd,
- dict_t *xdata);
-
-int32_t default_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- uint32_t flags, dict_t *xdata);
-
-int32_t default_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset,
- uint32_t flags,
- struct iobref *iobref, dict_t *xdata);
-
-int32_t default_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-int32_t default_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-int32_t default_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd, dict_t *xdata);
-
-int32_t default_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-int32_t default_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-int32_t default_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-int32_t default_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-int32_t default_fsetxattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-int32_t default_fgetxattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-int32_t default_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-int32_t default_fremovexattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-int32_t default_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t default_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t default_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t default_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-int32_t default_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-int32_t default_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size, off_t off, dict_t *xdata);
-
-int32_t default_readdirp (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size, off_t off, dict_t *xdata);
-
-int32_t default_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata);
-
-int32_t default_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata);
-
-int32_t default_setattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata);
-
-int32_t default_fsetattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata);
-
-/* Resume */
-int32_t default_getspec_resume (call_frame_t *frame,
- xlator_t *this,
- const char *key,
- int32_t flag);
-
-int32_t default_rchecksum_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, off_t offset,
- int32_t len, dict_t *xdata);
-
-/* FileSystem operations */
-int32_t default_lookup_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xdata);
-
-int32_t default_stat_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-int32_t default_fstat_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-int32_t default_truncate_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset, dict_t *xdata);
-
-int32_t default_ftruncate_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset, dict_t *xdata);
-
-int32_t default_access_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask, dict_t *xdata);
-
-int32_t default_readlink_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size, dict_t *xdata);
-
-int32_t default_mknod_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t rdev, mode_t umask,
- dict_t *xdata);
-
-int32_t default_mkdir_resume (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, mode_t umask, dict_t *xdata);
-
-int32_t default_unlink_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata);
-
-int32_t default_rmdir_resume (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata);
-
-int32_t default_symlink_resume (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, mode_t umask,
- dict_t *xdata);
-
-int32_t default_rename_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-int32_t default_link_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-int32_t default_create_resume (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata);
-
-int32_t default_open_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd, dict_t *xdata);
-
-int32_t default_readv_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset, uint32_t flags, dict_t *xdata);
-
-int32_t default_writev_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset, uint32_t flags,
- struct iobref *iobref, dict_t *xdata);
-
-int32_t default_flush_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-int32_t default_fsync_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-int32_t default_opendir_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd, dict_t *xdata);
-
-int32_t default_fsyncdir_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-int32_t default_statfs_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-int32_t default_setxattr_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-int32_t default_getxattr_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-int32_t default_fsetxattr_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-int32_t default_fgetxattr_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-int32_t default_removexattr_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-int32_t default_fremovexattr_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-int32_t default_lk_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t default_inodelk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t default_finodelk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t default_entrylk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-int32_t default_fentrylk_resume (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-int32_t default_readdir_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size, off_t off, dict_t *xdata);
-
-int32_t default_readdirp_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size, off_t off, dict_t *xdata);
-
-int32_t default_xattrop_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata);
-
-int32_t default_fxattrop_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata);
-int32_t default_rchecksum_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, off_t offset,
- int32_t len, dict_t *xdata);
-
-int32_t default_setattr_resume (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata);
-
-int32_t default_fsetattr_resume (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata);
-
-/* _cbk */
-
-int32_t
-default_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata, struct iatt *postparent);
-
-int32_t
-default_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata);
-
-
-int32_t
-default_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-int32_t
-default_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-int32_t
-default_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-int32_t
-default_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *buf, dict_t *xdata);
-
-
-int32_t
-default_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-int32_t
-default_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-int32_t
-default_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-int32_t
-default_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-
-int32_t
-default_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-
-int32_t
-default_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata);
-
-
-int32_t
-default_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-
-int32_t
-default_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-int32_t
-default_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata);
-
-int32_t
-default_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata);
-
-
-int32_t
-default_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-
-int32_t
-default_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-
-
-int32_t
-default_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-int32_t
-default_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata);
-
-int32_t
-default_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata);
-
-int32_t
-default_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-int32_t
-default_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata);
-
-
-int32_t
-default_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-
-int32_t
-default_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-
-
-int32_t
-default_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
-
-
-int32_t
-default_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
-
-int32_t
-default_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
-
-int32_t
-default_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata);
-
-
-int32_t
-default_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-int32_t
-default_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-int32_t
-default_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata);
-
-int32_t
-default_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-
-int32_t
-default_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-int32_t
-default_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-int32_t
-default_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata);
-
-
-int32_t
-default_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
- uint8_t *strong_checksum, dict_t *xdata);
-
-
-int32_t
-default_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata);
-
-
-int32_t
-default_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata);
-
-int32_t
-default_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata);
-
-int32_t
-default_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata);
-
-int32_t
-default_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, char *spec_data);
-
-int32_t
-default_mem_acct_init (xlator_t *this);
-
-#endif /* _DEFAULTS_H */
diff --git a/libglusterfs/src/dict.c b/libglusterfs/src/dict.c
index 5ae61fd3d3b..1d9be9217a6 100644
--- a/libglusterfs/src/dict.c
+++ b/libglusterfs/src/dict.c
@@ -14,1138 +14,1474 @@
#include <stdio.h>
#include <inttypes.h>
#include <limits.h>
+#include <fnmatch.h>
+
+#include "glusterfs/dict.h"
+#define XXH_INLINE_ALL
+#include "xxhash.h"
+#include "glusterfs/compat.h"
+#include "glusterfs/compat-errno.h"
+#include "glusterfs/byte-order.h"
+#include "glusterfs/statedump.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+struct dict_cmp {
+ dict_t *dict;
+ gf_boolean_t (*value_ignore)(char *k);
+};
+
+#define VALIDATE_DATA_AND_LOG(data, type, key, ret_val) \
+ do { \
+ if (!data || !data->data) { \
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG, \
+ "data is NULL"); \
+ return ret_val; \
+ } \
+ /* Not of the asked type, or old version */ \
+ if ((data->data_type != type) && \
+ (data->data_type != GF_DATA_TYPE_STR_OLD)) { \
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG, \
+ "key %s, %s type asked, has %s type", key, \
+ data_type_name[type], \
+ data_type_name[data->data_type]); \
+ } \
+ } while (0)
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "common-utils.h"
-#include "dict.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "globals.h"
-
-data_t *
-get_new_data ()
+static data_t *
+get_new_data()
{
- data_t *data = NULL;
+ data_t *data = mem_get(THIS->ctx->dict_data_pool);
- data = mem_get0 (THIS->ctx->dict_data_pool);
- if (!data) {
- return NULL;
- }
+ if (!data)
+ return NULL;
+
+ GF_ATOMIC_INIT(data->refcount, 0);
+ data->is_static = _gf_false;
- LOCK_INIT (&data->lock);
- return data;
+ return data;
}
-dict_t *
-get_new_dict_full (int size_hint)
+static dict_t *
+get_new_dict_full(int size_hint)
{
- dict_t *dict = mem_get0 (THIS->ctx->dict_pool);
-
- if (!dict) {
- return NULL;
- }
-
- dict->hash_size = size_hint;
- if (size_hint == 1) {
- /*
- * This is the only case we ever see currently. If we ever
- * need to support resizing the hash table, the resize function
- * will have to take into account the possibility that
- * "members" is not separately allocated (i.e. don't just call
- * realloc() blindly.
- */
- dict->members = &dict->members_internal;
- }
- else {
- /*
- * We actually need to allocate space for size_hint *pointers*
- * but we actually allocate space for one *structure*. Since
- * a data_pair_t consists of five pointers, we're wasting four
- * pointers' worth for N=1, and will overrun what we allocated
- * for N>5. If anybody ever starts using size_hint, we'll need
- * to fix this.
- */
- GF_ASSERT (size_hint <=
- (sizeof(data_pair_t) / sizeof(data_pair_t *)));
- dict->members = mem_get0 (THIS->ctx->dict_pair_pool);
- if (!dict->members) {
- mem_put (dict);
- return NULL;
- }
- }
-
- LOCK_INIT (&dict->lock);
+ dict_t *dict = mem_get0(THIS->ctx->dict_pool);
- return dict;
+ if (!dict) {
+ return NULL;
+ }
+
+ dict->hash_size = size_hint;
+ if (size_hint == 1) {
+ /*
+ * This is the only case we ever see currently. If we ever
+ * need to support resizing the hash table, the resize function
+ * will have to take into account the possibility that
+ * "members" is not separately allocated (i.e. don't just call
+ * realloc() blindly.
+ */
+ dict->members = &dict->members_internal;
+ } else {
+ /*
+ * We actually need to allocate space for size_hint *pointers*
+ * but we actually allocate space for one *structure*. Since
+ * a data_pair_t consists of five pointers, we're wasting four
+ * pointers' worth for N=1, and will overrun what we allocated
+ * for N>5. If anybody ever starts using size_hint, we'll need
+ * to fix this.
+ */
+ GF_ASSERT(size_hint <= (sizeof(data_pair_t) / sizeof(data_pair_t *)));
+ dict->members = mem_get0(THIS->ctx->dict_pair_pool);
+ if (!dict->members) {
+ mem_put(dict);
+ return NULL;
+ }
+ }
+
+ dict->free_pair.key = NULL;
+ dict->totkvlen = 0;
+ LOCK_INIT(&dict->lock);
+
+ return dict;
}
dict_t *
-get_new_dict (void)
+dict_new(void)
{
- return get_new_dict_full (1);
+ dict_t *dict = get_new_dict_full(1);
+
+ if (dict)
+ dict_ref(dict);
+
+ return dict;
}
-dict_t *
-dict_new (void)
+int32_t
+is_data_equal(data_t *one, data_t *two)
{
- dict_t *dict = NULL;
+ struct iatt *iatt1, *iatt2;
+ struct mdata_iatt *mdata_iatt1, *mdata_iatt2;
- dict = get_new_dict_full(1);
+ if (!one || !two || !one->data || !two->data) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "input arguments are provided "
+ "with value data_t as NULL");
+ return -1;
+ }
- if (dict)
- dict_ref (dict);
+ if (one == two)
+ return 1;
- return dict;
-}
+ if (one->data == two->data)
+ return 1;
+ if (one->data_type != two->data_type) {
+ return 0;
+ }
-int32_t
-is_data_equal (data_t *one,
- data_t *two)
-{
- if (!one || !two || !one->data || !two->data) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "input arguments are provided "
- "with value data_t as NULL");
- return -1;
- }
+ if (one->data_type == GF_DATA_TYPE_IATT) {
+ if ((one->len < sizeof(struct iatt)) ||
+ (two->len < sizeof(struct iatt))) {
+ return 0;
+ }
- if (one == two)
- return 1;
+ iatt1 = (struct iatt *)one->data;
+ iatt2 = (struct iatt *)two->data;
- if (one->len != two->len)
+ /* Two iatt structs are considered equal if main fields are
+ * equal, even if times differ.
+ * TODO: maybe when ctime if fully operational we could
+ * enforce time matching. */
+ if (iatt1->ia_ino != iatt2->ia_ino) {
+ return 0;
+ }
+ if (iatt1->ia_type != iatt2->ia_type) {
+ return 0;
+ }
+ if ((iatt1->ia_type == IA_IFBLK) || (iatt1->ia_type == IA_IFCHR)) {
+ if (iatt1->ia_rdev != iatt2->ia_rdev) {
return 0;
+ }
+ }
+ if (gf_uuid_compare(iatt1->ia_gfid, iatt2->ia_gfid) != 0) {
+ return 0;
+ }
- if (one->data == two->data)
- return 1;
+ /* TODO: ia_uid, ia_gid, ia_prot and ia_size can be changed
+ * with some commands. Here we don't have enough
+ * information to decide if they should match or not. */
+ /*
+ if ((iatt1->ia_uid != iatt2->ia_uid) ||
+ (iatt1->ia_gid != iatt2->ia_gid) ||
+ (st_mode_from_ia(iatt1->ia_prot, iatt1->ia_type) !=
+ st_mode_from_ia(iatt2->ia_prot,
+ iatt2->ia_type))) { return 0;
+ }
+ if (iatt1->ia_type == IA_IFREG) {
+ if (iatt1->ia_size != iatt2->ia_size) {
+ return 0;
+ }
+ }
+ */
+ return 1;
+ }
+ if (one->data_type == GF_DATA_TYPE_MDATA) {
+ if ((one->len < sizeof(struct mdata_iatt)) ||
+ (two->len < sizeof(struct mdata_iatt))) {
+ return 0;
+ }
+ mdata_iatt1 = (struct mdata_iatt *)one->data;
+ mdata_iatt2 = (struct mdata_iatt *)two->data;
+
+ if (mdata_iatt1->ia_atime != mdata_iatt2->ia_atime ||
+ mdata_iatt1->ia_mtime != mdata_iatt2->ia_mtime ||
+ mdata_iatt1->ia_ctime != mdata_iatt2->ia_ctime ||
+ mdata_iatt1->ia_atime_nsec != mdata_iatt2->ia_atime_nsec ||
+ mdata_iatt1->ia_mtime_nsec != mdata_iatt2->ia_mtime_nsec ||
+ mdata_iatt1->ia_ctime_nsec != mdata_iatt2->ia_ctime_nsec) {
+ return 0;
+ }
+ return 1;
+ }
+
+ if (one->len != two->len)
+ return 0;
- if (memcmp (one->data, two->data, one->len) == 0)
- return 1;
+ if (memcmp(one->data, two->data, one->len) == 0)
+ return 1;
- return 0;
+ return 0;
+}
+
+static int
+key_value_cmp(dict_t *one, char *key1, data_t *value1, void *data)
+{
+ struct dict_cmp *cmp = data;
+ dict_t *two = cmp->dict;
+ data_t *value2 = dict_get(two, key1);
+
+ if (value2) {
+ if (cmp->value_ignore && cmp->value_ignore(key1))
+ return 0;
+
+ if (is_data_equal(value1, value2) == 1)
+ return 0;
+ }
+
+ if (value2 == NULL) {
+ gf_msg_debug(THIS->name, 0, "'%s' found only on one dict", key1);
+ } else {
+ gf_msg_debug(THIS->name, 0,
+ "'%s' is different in two dicts "
+ "(%u, %u)",
+ key1, value1->len, value2->len);
+ }
+
+ return -1;
+}
+
+/* If both dicts are NULL then equal. If one of the dicts is NULL but the
+ * other has only ignorable keys then also they are equal. If both dicts are
+ * non-null then check if for each non-ignorable key, values are same or
+ * not. value_ignore function is used to skip comparing values for the keys
+ * which must be present in both the dictionaries but the value could be
+ * different.
+ */
+gf_boolean_t
+are_dicts_equal(dict_t *one, dict_t *two,
+ gf_boolean_t (*match)(dict_t *d, char *k, data_t *v,
+ void *data),
+ gf_boolean_t (*value_ignore)(char *k))
+{
+ int num_matches1 = 0;
+ int num_matches2 = 0;
+ struct dict_cmp cmp = {0};
+
+ if (one == two)
+ return _gf_true;
+
+ if (!match)
+ match = dict_match_everything;
+
+ if ((one == NULL) || (two == NULL)) {
+ num_matches1 = dict_foreach_match(one ? one : two, match, NULL,
+ dict_null_foreach_fn, NULL);
+ goto done;
+ }
+
+ cmp.dict = two;
+ cmp.value_ignore = value_ignore;
+ num_matches1 = dict_foreach_match(one, match, NULL, key_value_cmp, &cmp);
+
+ if (num_matches1 == -1)
+ return _gf_false;
+
+ if ((num_matches1 == one->count) && (one->count == two->count))
+ return _gf_true;
+
+ num_matches2 = dict_foreach_match(two, match, NULL, dict_null_foreach_fn,
+ NULL);
+done:
+ /* If the number of matches is same in 'two' then for all the
+ * valid-keys that exist in 'one' the value matched and no extra valid
+ * keys exist in 'two' alone. Otherwise there exists at least one extra
+ * valid-key in 'two' which doesn't exist in 'one' */
+ if (num_matches1 == num_matches2)
+ return _gf_true;
+ return _gf_false;
}
void
-data_destroy (data_t *data)
+data_destroy(data_t *data)
{
- if (data) {
- LOCK_DESTROY (&data->lock);
+ if (data) {
+ if (!data->is_static)
+ GF_FREE(data->data);
- if (!data->is_static) {
- if (data->data) {
- if (data->is_stdalloc)
- free (data->data);
- else
- GF_FREE (data->data);
- }
- }
-
- data->len = 0xbabababa;
- if (!data->is_const)
- mem_put (data);
- }
+ data->len = 0xbabababa;
+ mem_put(data);
+ }
}
data_t *
-data_copy (data_t *old)
+data_copy(data_t *old)
{
- if (!old) {
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "old is NULL");
- return NULL;
- }
+ if (!old) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, 0, LG_MSG_NULL_PTR,
+ "old is NULL");
+ return NULL;
+ }
- data_t *newdata = mem_get0 (THIS->ctx->dict_data_pool);
- if (!newdata) {
- return NULL;
- }
+ data_t *newdata = mem_get0(THIS->ctx->dict_data_pool);
+ if (!newdata) {
+ return NULL;
+ }
- if (old) {
- newdata->len = old->len;
- if (old->data) {
- newdata->data = memdup (old->data, old->len);
- if (!newdata->data)
- goto err_out;
- }
- }
+ newdata->len = old->len;
+ if (old->data) {
+ newdata->data = gf_memdup(old->data, old->len);
+ if (!newdata->data)
+ goto err_out;
+ }
+ newdata->data_type = old->data_type;
- LOCK_INIT (&newdata->lock);
- return newdata;
+ return newdata;
err_out:
+ mem_put(newdata);
- if (newdata->data)
- FREE (newdata->data);
- mem_put (newdata);
-
- return NULL;
+ return NULL;
}
+/* Always need to be called under lock
+ * Always this and key variables are not null -
+ * checked by callers.
+ */
static data_pair_t *
-_dict_lookup (dict_t *this, char *key)
+dict_lookup_common(const dict_t *this, const char *key, const uint32_t hash)
{
- if (!this || !key) {
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "!this || !key (%s)", key);
- return NULL;
- }
+ int hashval = 0;
+ data_pair_t *pair;
- int hashval = SuperFastHash (key, strlen (key)) % this->hash_size;
- data_pair_t *pair;
+ /* If the divisor is 1, the modulo is always 0,
+ * in such case avoid hash calculation.
+ */
+ if (this->hash_size != 1)
+ hashval = hash % this->hash_size;
- for (pair = this->members[hashval]; pair != NULL; pair = pair->hash_next) {
- if (pair->key && !strcmp (pair->key, key))
- return pair;
- }
+ for (pair = this->members[hashval]; pair != NULL; pair = pair->hash_next) {
+ if (pair->key && (hash == pair->key_hash) && !strcmp(pair->key, key))
+ return pair;
+ }
- return NULL;
+ return NULL;
}
int32_t
-dict_lookup (dict_t *this, char *key, data_pair_t **data)
+dict_lookup(dict_t *this, char *key, data_t **data)
{
- if (!this || !key || !data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "!this || !key || !data");
- return -1;
- }
+ if (!this || !key || !data) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!this || !key || "
+ "!data");
+ return -1;
+ }
- LOCK (&this->lock);
- {
- *data = _dict_lookup (this, key);
- }
- UNLOCK (&this->lock);
- if (*data)
- return 0;
- else
- return -1;
+ data_pair_t *tmp = NULL;
-}
+ uint32_t hash = (uint32_t)XXH64(key, strlen(key), 0);
-static int32_t
-_dict_set (dict_t *this,
- char *key,
- data_t *value)
-{
- int hashval;
- data_pair_t *pair;
- char key_free = 0;
- int tmp = 0;
- int ret = 0;
-
- if (!key) {
- ret = gf_asprintf (&key, "ref:%p", value);
- if (-1 == ret) {
- gf_log ("dict", GF_LOG_WARNING, "asprintf failed %s", key);
- return -1;
- }
- key_free = 1;
- }
+ LOCK(&this->lock);
+ {
+ tmp = dict_lookup_common(this, key, hash);
+ }
+ UNLOCK(&this->lock);
- tmp = SuperFastHash (key, strlen (key));
- hashval = (tmp % this->hash_size);
- pair = _dict_lookup (this, key);
-
- if (pair) {
- data_t *unref_data = pair->value;
- pair->value = data_ref (value);
- data_unref (unref_data);
- if (key_free)
- GF_FREE (key);
- /* Indicates duplicate key */
- return 0;
- }
- if (this->free_pair_in_use) {
- pair = mem_get0 (THIS->ctx->dict_pair_pool);
- if (!pair) {
- if (key_free)
- GF_FREE (key);
- return -1;
- }
- }
- else {
- pair = &this->free_pair;
- this->free_pair_in_use = _gf_true;
- }
-
- if (key_free) {
- /* It's ours. Use it. */
- pair->key = key;
- key_free = 0;
- }
- else {
- pair->key = (char *) GF_CALLOC (1, strlen (key) + 1,
- gf_common_mt_char);
- if (!pair->key) {
- if (pair == &this->free_pair) {
- this->free_pair_in_use = _gf_false;
- }
- else {
- mem_put (pair);
- }
- return -1;
- }
- strcpy (pair->key, key);
- }
- pair->value = data_ref (value);
+ if (!tmp)
+ return -1;
- pair->hash_next = this->members[hashval];
- this->members[hashval] = pair;
+ *data = tmp->value;
+ return 0;
+}
- pair->next = this->members_list;
- pair->prev = NULL;
- if (this->members_list)
- this->members_list->prev = pair;
- this->members_list = pair;
- this->count++;
+static int32_t
+dict_set_lk(dict_t *this, char *key, const int key_len, data_t *value,
+ const uint32_t hash, gf_boolean_t replace)
+{
+ int hashval = 0;
+ data_pair_t *pair;
+ int key_free = 0;
+ uint32_t key_hash;
+ int keylen;
+
+ if (!key) {
+ keylen = gf_asprintf(&key, "ref:%p", value);
+ if (-1 == keylen) {
+ return -1;
+ }
+ key_free = 1;
+ key_hash = (uint32_t)XXH64(key, keylen, 0);
+ } else {
+ keylen = key_len;
+ key_hash = hash;
+ }
+
+ /* Search for a existing key if 'replace' is asked for */
+ if (replace) {
+ pair = dict_lookup_common(this, key, key_hash);
+ if (pair) {
+ data_t *unref_data = pair->value;
+ pair->value = data_ref(value);
+ this->totkvlen += (value->len - unref_data->len);
+ data_unref(unref_data);
+ if (key_free)
+ GF_FREE(key);
+ /* Indicates duplicate key */
+ return 0;
+ }
+ }
+
+ if (this->free_pair.key) { /* the free_pair is used */
+ pair = mem_get(THIS->ctx->dict_pair_pool);
+ if (!pair) {
+ if (key_free)
+ GF_FREE(key);
+ return -1;
+ }
+ } else { /* assign the pair to the free pair */
+ pair = &this->free_pair;
+ }
+
+ if (key_free) {
+ /* It's ours. Use it. */
+ pair->key = key;
+ key_free = 0;
+ } else {
+ pair->key = (char *)GF_MALLOC(keylen + 1, gf_common_mt_char);
+ if (!pair->key) {
+ if (pair != &this->free_pair) {
+ mem_put(pair);
+ }
+ return -1;
+ }
+ strcpy(pair->key, key);
+ }
+ pair->key_hash = key_hash;
+ pair->value = data_ref(value);
+ this->totkvlen += (keylen + 1 + value->len);
+
+ /* If the divisor is 1, the modulo is always 0,
+ * in such case avoid hash calculation.
+ */
+ if (this->hash_size != 1) {
+ hashval = (key_hash % this->hash_size);
+ }
+ pair->hash_next = this->members[hashval];
+ this->members[hashval] = pair;
+
+ pair->next = this->members_list;
+ pair->prev = NULL;
+ if (this->members_list)
+ this->members_list->prev = pair;
+ this->members_list = pair;
+ this->count++;
+
+ if (key_free)
+ GF_FREE(key);
+
+ if (this->max_count < this->count)
+ this->max_count = this->count;
+ return 0;
+}
- if (key_free)
- GF_FREE (key);
- return 0;
+int32_t
+dict_set(dict_t *this, char *key, data_t *value)
+{
+ if (key)
+ return dict_setn(this, key, strlen(key), value);
+ else
+ return dict_setn(this, NULL, 0, value);
}
int32_t
-dict_set (dict_t *this,
- char *key,
- data_t *value)
+dict_setn(dict_t *this, char *key, const int keylen, data_t *value)
{
- int32_t ret;
+ int32_t ret;
+ uint32_t key_hash = 0;
- if (!this || !value) {
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "!this || !value for key=%s", key);
- return -1;
- }
+ if (!this || !value) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!this || !value for "
+ "key=%s",
+ key);
+ return -1;
+ }
- LOCK (&this->lock);
+ if (key) {
+ key_hash = (uint32_t)XXH64(key, keylen, 0);
+ }
- ret = _dict_set (this, key, value);
+ LOCK(&this->lock);
- UNLOCK (&this->lock);
+ ret = dict_set_lk(this, key, keylen, value, key_hash, 1);
- return ret;
+ UNLOCK(&this->lock);
+
+ return ret;
}
+int32_t
+dict_add(dict_t *this, char *key, data_t *value)
+{
+ if (key)
+ return dict_addn(this, key, strlen(key), value);
+ else
+ return dict_addn(this, NULL, 0, value);
+}
-data_t *
-dict_get (dict_t *this, char *key)
+int32_t
+dict_addn(dict_t *this, char *key, const int keylen, data_t *value)
{
- data_pair_t *pair;
+ int32_t ret;
+ uint32_t key_hash = 0;
- if (!this || !key) {
- gf_log_callingfn ("dict", GF_LOG_INFO,
- "!this || key=%s", (key) ? key : "()");
- return NULL;
- }
+ if (!this || !value) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!this || !value for key=%s", key);
+ return -1;
+ }
+
+ if (key) {
+ key_hash = (uint32_t)XXH64(key, keylen, 0);
+ }
- LOCK (&this->lock);
+ LOCK(&this->lock);
- pair = _dict_lookup (this, key);
+ ret = dict_set_lk(this, key, keylen, value, key_hash, 0);
- UNLOCK (&this->lock);
+ UNLOCK(&this->lock);
- if (pair)
- return pair->value;
+ return ret;
+}
+data_t *
+dict_get(dict_t *this, char *key)
+{
+ if (!this || !key) {
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG,
+ "!this || key=%s", (key) ? key : "()");
return NULL;
+ }
+
+ return dict_getn(this, key, strlen(key));
}
-void
-dict_del (dict_t *this, char *key)
+data_t *
+dict_getn(dict_t *this, char *key, const int keylen)
{
- if (!this || !key) {
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "!this || key=%s", key);
- return;
- }
+ data_pair_t *pair;
+ uint32_t hash;
- LOCK (&this->lock);
+ if (!this || !key) {
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG,
+ "!this || key=%s", (key) ? key : "()");
+ return NULL;
+ }
- int hashval = SuperFastHash (key, strlen (key)) % this->hash_size;
- data_pair_t *pair = this->members[hashval];
- data_pair_t *prev = NULL;
+ hash = (uint32_t)XXH64(key, keylen, 0);
- while (pair) {
- if (strcmp (pair->key, key) == 0) {
- if (prev)
- prev->hash_next = pair->hash_next;
- else
- this->members[hashval] = pair->hash_next;
+ LOCK(&this->lock);
+ {
+ pair = dict_lookup_common(this, key, hash);
+ }
+ UNLOCK(&this->lock);
- data_unref (pair->value);
+ if (pair)
+ return pair->value;
- if (pair->prev)
- pair->prev->next = pair->next;
- else
- this->members_list = pair->next;
+ return NULL;
+}
- if (pair->next)
- pair->next->prev = pair->prev;
+int
+dict_key_count(dict_t *this)
+{
+ int ret = -1;
- GF_FREE (pair->key);
- if (pair == &this->free_pair) {
- this->free_pair_in_use = _gf_false;
- }
- else {
- mem_put (pair);
- }
- this->count--;
- break;
- }
+ if (!this) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict passed is NULL");
+ return ret;
+ }
- prev = pair;
- pair = pair->hash_next;
- }
+ LOCK(&this->lock);
+ {
+ ret = this->count;
+ }
+ UNLOCK(&this->lock);
- UNLOCK (&this->lock);
+ return ret;
+}
+void
+dict_del(dict_t *this, char *key)
+{
+ if (!this || !key) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!this || key=%s", key);
return;
+ }
+
+ return dict_deln(this, key, strlen(key));
}
void
-dict_destroy (dict_t *this)
+dict_deln(dict_t *this, char *key, const int keylen)
{
- if (!this) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
- return;
- }
+ int hashval = 0;
+ uint32_t hash;
- data_pair_t *pair = this->members_list;
- data_pair_t *prev = this->members_list;
+ if (!this || !key) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!this || key=%s", key);
+ return;
+ }
- LOCK_DESTROY (&this->lock);
+ hash = (uint32_t)XXH64(key, keylen, 0);
- while (prev) {
- pair = pair->next;
- data_unref (prev->value);
- GF_FREE (prev->key);
- if (prev != &this->free_pair) {
- mem_put (prev);
- }
- prev = pair;
- }
+ LOCK(&this->lock);
+
+ /* If the divisor is 1, the modulo is always 0,
+ * in such case avoid hash calculation.
+ */
+ if (this->hash_size != 1)
+ hashval = hash % this->hash_size;
+
+ data_pair_t *pair = this->members[hashval];
+ data_pair_t *prev = NULL;
+
+ while (pair) {
+ if ((hash == pair->key_hash) && strcmp(pair->key, key) == 0) {
+ if (prev)
+ prev->hash_next = pair->hash_next;
+ else
+ this->members[hashval] = pair->hash_next;
- if (this->members != &this->members_internal) {
- mem_put (this->members);
+ this->totkvlen -= pair->value->len;
+ data_unref(pair->value);
+
+ if (pair->prev)
+ pair->prev->next = pair->next;
+ else
+ this->members_list = pair->next;
+
+ if (pair->next)
+ pair->next->prev = pair->prev;
+
+ this->totkvlen -= (strlen(pair->key) + 1);
+ GF_FREE(pair->key);
+ if (pair == &this->free_pair) {
+ this->free_pair.key = NULL;
+ } else {
+ mem_put(pair);
+ }
+ this->count--;
+ break;
}
- if (this->extra_free)
- GF_FREE (this->extra_free);
- if (this->extra_stdfree)
- free (this->extra_stdfree);
+ prev = pair;
+ pair = pair->hash_next;
+ }
- if (!this->is_static)
- mem_put (this);
+ UNLOCK(&this->lock);
- return;
+ return;
}
void
-dict_unref (dict_t *this)
+dict_destroy(dict_t *this)
{
- int32_t ref;
+ if (!this) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is NULL");
+ return;
+ }
- if (!this) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
- return;
- }
+ data_pair_t *pair = this->members_list;
+ data_pair_t *prev = this->members_list;
+ glusterfs_ctx_t *ctx = NULL;
+ uint64_t current_max = 0;
+ uint32_t total_pairs = 0;
- LOCK (&this->lock);
+ LOCK_DESTROY(&this->lock);
- this->refcount--;
- ref = this->refcount;
+ while (prev) {
+ pair = pair->next;
+ data_unref(prev->value);
+ GF_FREE(prev->key);
+ if (prev != &this->free_pair) {
+ mem_put(prev);
+ } else {
+ this->free_pair.key = NULL;
+ }
+ total_pairs++;
+ prev = pair;
+ }
- UNLOCK (&this->lock);
+ this->totkvlen = 0;
+ if (this->members != &this->members_internal) {
+ mem_put(this->members);
+ }
- if (!ref)
- dict_destroy (this);
-}
+ free(this->extra_stdfree);
-dict_t *
-dict_ref (dict_t *this)
-{
- if (!this) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
- return NULL;
- }
+ /* update 'ctx->stats.dict.details' using max_count */
+ ctx = THIS->ctx;
- LOCK (&this->lock);
+ /* NOTE: below logic is not totaly race proof */
+ /* thread0 and thread1 gets current_max as 10 */
+ /* thread0 has 'this->max_count as 11 */
+ /* thread1 has 'this->max_count as 20 */
+ /* thread1 goes ahead and sets the max_dict_pairs to 20 */
+ /* thread0 then goes and sets it to 11 */
+ /* As it is for information purpose only, no functionality will be
+ broken by this, but a point to consider about ATOMIC macros. */
+ current_max = GF_ATOMIC_GET(ctx->stats.max_dict_pairs);
+ if (current_max < this->max_count)
+ GF_ATOMIC_INIT(ctx->stats.max_dict_pairs, this->max_count);
- this->refcount++;
+ GF_ATOMIC_ADD(ctx->stats.total_pairs_used, total_pairs);
+ GF_ATOMIC_INC(ctx->stats.total_dicts_used);
- UNLOCK (&this->lock);
+ mem_put(this);
- return this;
+ return;
}
void
-data_unref (data_t *this)
+dict_unref(dict_t *this)
{
- int32_t ref;
+ uint64_t ref = 0;
- if (!this) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
- return;
- }
+ if (!this) {
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is NULL");
+ return;
+ }
- LOCK (&this->lock);
+ ref = GF_ATOMIC_DEC(this->refcount);
- this->refcount--;
- ref = this->refcount;
+ if (!ref)
+ dict_destroy(this);
+}
- UNLOCK (&this->lock);
+dict_t *
+dict_ref(dict_t *this)
+{
+ if (!this) {
+ gf_msg_callingfn("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is NULL");
+ return NULL;
+ }
- if (!ref)
- data_destroy (this);
+ GF_ATOMIC_INC(this->refcount);
+ return this;
}
-data_t *
-data_ref (data_t *this)
+void
+data_unref(data_t *this)
{
- if (!this) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
- return NULL;
- }
-
- LOCK (&this->lock);
+ uint64_t ref;
- this->refcount++;
+ if (!this) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "data is NULL");
+ return;
+ }
- UNLOCK (&this->lock);
+ ref = GF_ATOMIC_DEC(this->refcount);
- return this;
+ if (!ref)
+ data_destroy(this);
}
data_t *
-int_to_data (int64_t value)
+data_ref(data_t *this)
{
- int ret = 0;
- data_t *data = get_new_data ();
-
- if (!data) {
- return NULL;
- }
+ if (!this) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "data is NULL");
+ return NULL;
+ }
- ret = gf_asprintf (&data->data, "%"PRId64, value);
- if (-1 == ret) {
- gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
- return NULL;
- }
- data->len = strlen (data->data) + 1;
+ GF_ATOMIC_INC(this->refcount);
- return data;
+ return this;
}
data_t *
-data_from_int64 (int64_t value)
+int_to_data(int64_t value)
{
- int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data) {
- return NULL;
- }
- ret = gf_asprintf (&data->data, "%"PRId64, value);
- if (-1 == ret) {
- gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
- return NULL;
- }
- data->len = strlen (data->data) + 1;
+ if (!data) {
+ return NULL;
+ }
+
+ data->len = gf_asprintf(&data->data, "%" PRId64, value);
+ if (-1 == data->len) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_INT;
- return data;
+ return data;
}
data_t *
-data_from_int32 (int32_t value)
+data_from_int64(int64_t value)
{
- int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data) {
- return NULL;
- }
- ret = gf_asprintf (&data->data, "%"PRId32, value);
- if (-1 == ret) {
- gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
- return NULL;
- }
-
- data->len = strlen (data->data) + 1;
+ if (!data) {
+ return NULL;
+ }
+ data->len = gf_asprintf(&data->data, "%" PRId64, value);
+ if (-1 == data->len) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_INT;
- return data;
+ return data;
}
data_t *
-data_from_int16 (int16_t value)
+data_from_int32(int32_t value)
{
- int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data) {
- return NULL;
- }
- ret = gf_asprintf (&data->data, "%"PRId16, value);
- if (-1 == ret) {
- gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
- return NULL;
- }
+ if (!data) {
+ return NULL;
+ }
+ data->len = gf_asprintf(&data->data, "%" PRId32, value);
+ if (-1 == data->len) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
- data->len = strlen (data->data) + 1;
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_INT;
- return data;
+ return data;
}
data_t *
-data_from_int8 (int8_t value)
+data_from_int16(int16_t value)
{
- int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data) {
- return NULL;
- }
- ret = gf_asprintf (&data->data, "%d", value);
- if (-1 == ret) {
- gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
- return NULL;
- }
+ if (!data) {
+ return NULL;
+ }
+ data->len = gf_asprintf(&data->data, "%" PRId16, value);
+ if (-1 == data->len) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
- data->len = strlen (data->data) + 1;
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_INT;
- return data;
+ return data;
}
data_t *
-data_from_uint64 (uint64_t value)
+data_from_int8(int8_t value)
{
- int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data) {
- return NULL;
- }
- ret = gf_asprintf (&data->data, "%"PRIu64, value);
- if (-1 == ret) {
- gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
- return NULL;
- }
+ if (!data) {
+ return NULL;
+ }
+ data->len = gf_asprintf(&data->data, "%d", value);
+ if (-1 == data->len) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
- data->len = strlen (data->data) + 1;
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_INT;
- return data;
+ return data;
}
-static data_t *
-data_from_double (double value)
+data_t *
+data_from_uint64(uint64_t value)
{
- data_t *data = NULL;
- int ret = 0;
+ data_t *data = get_new_data();
- data = get_new_data ();
-
- if (!data) {
- return NULL;
- }
+ if (!data) {
+ return NULL;
+ }
+ data->len = gf_asprintf(&data->data, "%" PRIu64, value);
+ if (-1 == data->len) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
- ret = gf_asprintf (&data->data, "%f", value);
- if (ret == -1) {
- return NULL;
- }
- data->len = strlen (data->data) + 1;
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_UINT;
- return data;
+ return data;
}
-
data_t *
-data_from_uint32 (uint32_t value)
+data_from_double(double value)
{
- int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data) {
- return NULL;
- }
- ret = gf_asprintf (&data->data, "%"PRIu32, value);
- if (-1 == ret) {
- gf_log ("dict", GF_LOG_DEBUG, "asprintf failed");
- return NULL;
- }
+ if (!data) {
+ return NULL;
+ }
- data->len = strlen (data->data) + 1;
+ data->len = gf_asprintf(&data->data, "%f", value);
+ if (data->len == -1) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_DOUBLE;
- return data;
+ return data;
}
-
data_t *
-data_from_uint16 (uint16_t value)
+data_from_uint32(uint32_t value)
{
- int ret = 0;
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data) {
- return NULL;
- }
- ret = gf_asprintf (&data->data, "%"PRIu16, value);
- if (-1 == ret) {
- return NULL;
- }
+ if (!data) {
+ return NULL;
+ }
+ data->len = gf_asprintf(&data->data, "%" PRIu32, value);
+ if (-1 == data->len) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
- data->len = strlen (data->data) + 1;
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_UINT;
- return data;
+ return data;
}
-
data_t *
-data_from_ptr (void *value)
+data_from_uint16(uint16_t value)
{
- if (!value) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "value is NULL");
- return NULL;
- }
+ data_t *data = get_new_data();
- data_t *data = get_new_data ();
+ if (!data) {
+ return NULL;
+ }
+ data->len = gf_asprintf(&data->data, "%" PRIu16, value);
+ if (-1 == data->len) {
+ gf_msg_debug("dict", 0, "asprintf failed");
+ data_destroy(data);
+ return NULL;
+ }
- if (!data) {
- return NULL;
- }
+ data->len++; /* account for terminating NULL */
+ data->data_type = GF_DATA_TYPE_UINT;
- data->data = value;
- return data;
+ return data;
}
-data_t *
-data_from_static_ptr (void *value)
+static data_t *
+data_from_ptr_common(void *value, gf_boolean_t is_static)
{
-/*
- this is valid to set 0 as value..
-
- if (!value) {
- gf_log ("dict", GF_LOG_CRITICAL,
- "@value=%p", value);
- return NULL;
- }
-*/
- data_t *data = get_new_data ();
+ /* it is valid to set 0/NULL as a value, no need to check *value */
- if (!data) {
- return NULL;
- }
+ data_t *data = get_new_data();
+ if (!data) {
+ return NULL;
+ }
- data->is_static = 1;
- data->data = value;
+ data->data = value;
+ data->len = 0;
+ data->is_static = is_static;
- return data;
+ data->data_type = GF_DATA_TYPE_PTR;
+ return data;
}
data_t *
-str_to_data (char *value)
+str_to_data(char *value)
{
- if (!value) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "value is NULL");
- return NULL;
- }
- data_t *data = get_new_data ();
-
- if (!data) {
- return NULL;
- }
- data->len = strlen (value) + 1;
-
- data->data = value;
- data->is_static = 1;
+ if (!value) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "value is NULL");
+ return NULL;
+ }
- return data;
+ return strn_to_data(value, strlen(value));
}
data_t *
-data_from_dynstr (char *value)
+strn_to_data(char *value, const int vallen)
{
- if (!value) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "value is NULL");
- return NULL;
- }
+ if (!value) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "value is NULL");
+ return NULL;
+ }
+ data_t *data = get_new_data();
- data_t *data = get_new_data ();
+ if (!data) {
+ return NULL;
+ }
+ data->len = vallen + 1;
+ data->data_type = GF_DATA_TYPE_STR;
- data->len = strlen (value) + 1;
- data->data = value;
+ data->data = value;
+ data->is_static = _gf_true;
- return data;
+ return data;
}
-data_t *
-data_from_dynmstr (char *value)
+static data_t *
+data_from_dynstr(char *value)
{
- if (!value) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "value is NULL");
- return NULL;
- }
+ if (!value) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "value is NULL");
+ return NULL;
+ }
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- data->len = strlen (value) + 1;
- data->data = value;
- data->is_stdalloc = 1;
+ if (!data)
+ return NULL;
+ data->len = strlen(value) + 1;
+ data->data = value;
+ data->data_type = GF_DATA_TYPE_STR;
- return data;
+ return data;
}
data_t *
-data_from_dynptr (void *value, int32_t len)
+data_from_dynptr(void *value, int32_t len)
{
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data)
- return NULL;
+ if (!data)
+ return NULL;
- data->len = len;
- data->data = value;
+ data->len = len;
+ data->data = value;
+ data->data_type = GF_DATA_TYPE_PTR;
- return data;
+ return data;
}
data_t *
-bin_to_data (void *value, int32_t len)
+bin_to_data(void *value, int32_t len)
{
- if (!value) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "value is NULL");
- return NULL;
- }
+ if (!value) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "value is NULL");
+ return NULL;
+ }
- data_t *data = get_new_data ();
+ data_t *data = get_new_data();
- if (!data)
- return NULL;
+ if (!data)
+ return NULL;
- data->is_static = 1;
- data->len = len;
- data->data = value;
+ data->is_static = _gf_true;
+ data->len = len;
+ data->data = value;
- return data;
+ return data;
}
-int64_t
-data_to_int64 (data_t *data)
-{
- if (!data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
- return -1;
- }
-
- char *str = alloca (data->len + 1);
- if (!str)
- return -1;
+static char *data_type_name[GF_DATA_TYPE_MAX] = {
+ [GF_DATA_TYPE_UNKNOWN] = "unknown",
+ [GF_DATA_TYPE_STR_OLD] = "string-old-version",
+ [GF_DATA_TYPE_INT] = "integer",
+ [GF_DATA_TYPE_UINT] = "unsigned integer",
+ [GF_DATA_TYPE_DOUBLE] = "float",
+ [GF_DATA_TYPE_STR] = "string",
+ [GF_DATA_TYPE_PTR] = "pointer",
+ [GF_DATA_TYPE_GFUUID] = "gf-uuid",
+ [GF_DATA_TYPE_IATT] = "iatt",
+ [GF_DATA_TYPE_MDATA] = "mdata",
+};
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
- return (int64_t) strtoull (str, NULL, 0);
-}
+int64_t
+data_to_int64(data_t *data)
+{
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1);
+
+ char *endptr = NULL;
+ int64_t value = 0;
+
+ errno = 0;
+ value = strtoll(data->data, &endptr, 0);
+
+ if (endptr && *endptr != '\0')
+ /* Unrecognized characters at the end of string. */
+ errno = EINVAL;
+ if (errno) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, errno,
+ LG_MSG_DATA_CONVERSION_ERROR,
+ "Error in data conversion: '%s' can't "
+ "be represented as int64_t",
+ data->data);
+ return -1;
+ }
+ return value;
+}
+
+/* Like above but implies signed range check. */
+
+#define DATA_TO_RANGED_SIGNED(endptr, value, data, type, min, max) \
+ do { \
+ errno = 0; \
+ value = strtoll(data->data, &endptr, 0); \
+ if (endptr && *endptr != '\0') \
+ errno = EINVAL; \
+ if (errno || value > max || value < min) { \
+ gf_msg_callingfn("dict", GF_LOG_WARNING, errno, \
+ LG_MSG_DATA_CONVERSION_ERROR, \
+ "Error in data conversion: '%s' can't " \
+ "be represented as " #type, \
+ data->data); \
+ return -1; \
+ } \
+ return (type)value; \
+ } while (0)
int32_t
-data_to_int32 (data_t *data)
+data_to_int32(data_t *data)
{
- if (!data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
- return -1;
- }
-
- char *str = alloca (data->len + 1);
- if (!str)
- return -1;
+ char *endptr = NULL;
+ int64_t value = 0;
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- return strtoul (str, NULL, 0);
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1);
+ DATA_TO_RANGED_SIGNED(endptr, value, data, int32_t, INT_MIN, INT_MAX);
}
int16_t
-data_to_int16 (data_t *data)
+data_to_int16(data_t *data)
{
- int16_t value = 0;
-
- if (!data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
- return -1;
- }
-
- char *str = alloca (data->len + 1);
- if (!str)
- return -1;
-
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- errno = 0;
- value = strtol (str, NULL, 0);
-
- if ((SHRT_MAX > value) || (SHRT_MIN < value)) {
- errno = ERANGE;
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "Error in data conversion: "
- "detected overflow");
- return -1;
- }
+ char *endptr = NULL;
+ int64_t value = 0;
- return (int16_t)value;
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1);
+ DATA_TO_RANGED_SIGNED(endptr, value, data, int16_t, SHRT_MIN, SHRT_MAX);
}
-
int8_t
-data_to_int8 (data_t *data)
+data_to_int8(data_t *data)
{
- int32_t value = 0;
-
- if (!data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
- return -1;
- }
-
- char *str = alloca (data->len + 1);
- if (!str)
- return -1;
-
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- errno = 0;
- value = strtol (str, NULL, 0);
+ char *endptr = NULL;
+ int64_t value = 0;
- if ((SCHAR_MAX > value) || (SCHAR_MIN < value)) {
- errno = ERANGE;
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "Error in data conversion: "
- "detected overflow");
- return -1;
- }
-
- return (int8_t)value;
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, "null", -1);
+ DATA_TO_RANGED_SIGNED(endptr, value, data, int8_t, CHAR_MIN, CHAR_MAX);
}
-
uint64_t
-data_to_uint64 (data_t *data)
-{
- if (!data)
- return -1;
- char *str = alloca (data->len + 1);
- if (!str)
- return -1;
-
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- return strtoll (str, NULL, 0);
-}
+data_to_uint64(data_t *data)
+{
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1);
+
+ char *endptr = NULL;
+ uint64_t value = 0;
+
+ errno = 0;
+ value = strtoull(data->data, &endptr, 0);
+
+ if (endptr && *endptr != '\0')
+ errno = EINVAL;
+ if (errno) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, errno,
+ LG_MSG_DATA_CONVERSION_ERROR,
+ "Error in data conversion: '%s' can't "
+ "be represented as uint64_t",
+ data->data);
+ return -1;
+ }
+ return value;
+}
+
+/* Like above but implies unsigned range check. */
+
+#define DATA_TO_RANGED_UNSIGNED(endptr, value, data, type, max) \
+ do { \
+ errno = 0; \
+ value = strtoull(data->data, &endptr, 0); \
+ if (endptr && *endptr != '\0') \
+ errno = EINVAL; \
+ if (errno || value > max) { \
+ gf_msg_callingfn("dict", GF_LOG_WARNING, errno, \
+ LG_MSG_DATA_CONVERSION_ERROR, \
+ "Error in data conversion: '%s' can't " \
+ "be represented as " #type, \
+ data->data); \
+ return -1; \
+ } \
+ return (type)value; \
+ } while (0)
uint32_t
-data_to_uint32 (data_t *data)
+data_to_uint32(data_t *data)
{
- if (!data)
- return -1;
+ char *endptr = NULL;
+ uint64_t value = 0;
- char *str = alloca (data->len + 1);
- if (!str)
- return -1;
-
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- return strtol (str, NULL, 0);
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1);
+ DATA_TO_RANGED_UNSIGNED(endptr, value, data, uint32_t, UINT_MAX);
}
uint16_t
-data_to_uint16 (data_t *data)
+data_to_uint16(data_t *data)
{
- uint16_t value = 0;
-
- if (!data)
- return -1;
+ char *endptr = NULL;
+ uint64_t value = 0;
- char *str = alloca (data->len + 1);
- if (!str)
- return -1;
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1);
+ DATA_TO_RANGED_UNSIGNED(endptr, value, data, uint16_t, USHRT_MAX);
+}
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+uint8_t
+data_to_uint8(data_t *data)
+{
+ char *endptr = NULL;
+ uint64_t value = 0;
- errno = 0;
- value = strtol (str, NULL, 0);
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, "null", -1);
+ DATA_TO_RANGED_UNSIGNED(endptr, value, data, uint8_t, UCHAR_MAX);
+}
- if ((USHRT_MAX - value) < 0) {
- errno = ERANGE;
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "Error in data conversion: "
- "overflow detected");
- return -1;
- }
+char *
+data_to_str(data_t *data)
+{
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_STR, "null", NULL);
+ return data->data;
+}
- return (uint16_t)value;
+void *
+data_to_ptr(data_t *data)
+{
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_PTR, "null", NULL);
+ return data->data;
}
-uint8_t
-data_to_uint8 (data_t *data)
+void *
+data_to_bin(data_t *data)
{
- uint32_t value = 0;
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_PTR, "null", NULL);
+ return data->data;
+}
- if (!data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
- return -1;
- }
+struct iatt *
+data_to_iatt(data_t *data, char *key)
+{
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_IATT, key, NULL);
- char *str = alloca (data->len + 1);
- if (!str)
- return -1;
+ /* We only check for smaller size. If it's bigger we simply ignore
+ * the extra data. This way it's easy to do changes in the future that
+ * pass more data but are backward compatible (if the initial contents
+ * of the struct are maintained, of course). */
+ if (data->len < sizeof(struct iatt)) {
+ gf_smsg("glusterfs", GF_LOG_ERROR, ENOBUFS, LG_MSG_UNDERSIZED_BUF,
+ "key=%s", key, NULL);
+ return NULL;
+ }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ return (struct iatt *)data->data;
+}
- errno = 0;
- value = strtol (str, NULL, 0);
+int
+dict_null_foreach_fn(dict_t *d, char *k, data_t *v, void *tmp)
+{
+ return 0;
+}
- if ((UCHAR_MAX - value) < 0) {
- errno = ERANGE;
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "data conversion overflow detected (%s)",
- strerror(errno));
- return -1;
- }
+int
+dict_remove_foreach_fn(dict_t *d, char *k, data_t *v, void *_tmp)
+{
+ if (!d || !k) {
+ gf_smsg("glusterfs", GF_LOG_WARNING, EINVAL, LG_MSG_KEY_OR_VALUE_NULL,
+ "d=%s", d ? "key" : "dictionary", NULL);
+ return -1;
+ }
- return (uint8_t) value;
+ dict_del(d, k);
+ return 0;
}
-char *
-data_to_str (data_t *data)
+gf_boolean_t
+dict_match_everything(dict_t *d, char *k, data_t *v, void *data)
{
- if (!data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
- return NULL;
- }
- return data->data;
+ return _gf_true;
}
-void *
-data_to_ptr (data_t *data)
+int
+dict_foreach(dict_t *dict,
+ int (*fn)(dict_t *this, char *key, data_t *value, void *data),
+ void *data)
{
- if (!data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
- return NULL;
- }
- return data->data;
+ int ret = dict_foreach_match(dict, dict_match_everything, NULL, fn, data);
+
+ if (ret > 0)
+ ret = 0;
+
+ return ret;
}
-void *
-data_to_bin (data_t *data)
+/* return values:
+ -1 = failure,
+ 0 = no matches found,
+ +n = n number of matches
+*/
+int
+dict_foreach_match(dict_t *dict,
+ gf_boolean_t (*match)(dict_t *this, char *key, data_t *value,
+ void *mdata),
+ void *match_data,
+ int (*action)(dict_t *this, char *key, data_t *value,
+ void *adata),
+ void *action_data)
+{
+ if (!dict || !match || !action) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict|match|action is "
+ "NULL");
+ return -1;
+ }
+
+ int ret = -1;
+ int count = 0;
+ data_pair_t *pairs = dict->members_list;
+ data_pair_t *next = NULL;
+
+ while (pairs) {
+ next = pairs->next;
+ if (match(dict, pairs->key, pairs->value, match_data)) {
+ ret = action(dict, pairs->key, pairs->value, action_data);
+ if (ret < 0)
+ return ret;
+ count++;
+ }
+ pairs = next;
+ }
+
+ return count;
+}
+
+static gf_boolean_t
+dict_fnmatch(dict_t *d, char *k, data_t *val, void *match_data)
+{
+ return (fnmatch(match_data, k, 0) == 0);
+}
+/* return values:
+ -1 = failure,
+ 0 = no matches found,
+ +n = n number of matches
+*/
+int
+dict_foreach_fnmatch(dict_t *dict, char *pattern,
+ int (*fn)(dict_t *this, char *key, data_t *value,
+ void *data),
+ void *data)
{
- if (!data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "data is NULL");
- return NULL;
- }
- return data->data;
+ return dict_foreach_match(dict, dict_fnmatch, pattern, fn, data);
}
-void
-dict_foreach (dict_t *dict,
- void (*fn)(dict_t *this,
- char *key,
- data_t *value,
- void *data),
- void *data)
-{
- if (!dict) {
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "dict is NULL");
- return;
- }
+/**
+ * dict_keys_join - pack the keys of the dictionary in a buffer.
+ *
+ * @value : buffer in which the keys will be packed (can be NULL)
+ * @size : size of the buffer which is sent (can be 0, in which case buffer
+ * is not packed but only length is returned)
+ * @dict : dictionary of which all the keys will be packed
+ * @filter_fn : keys matched in filter_fn() is counted.
+ *
+ * @return : @length of string after joining keys.
+ *
+ */
- data_pair_t *pairs = dict->members_list;
- data_pair_t *next = NULL;
+int
+dict_keys_join(void *value, int size, dict_t *dict, int (*filter_fn)(char *k))
+{
+ int len = 0;
+ data_pair_t *pairs = dict->members_list;
+ data_pair_t *next = NULL;
+
+ while (pairs) {
+ next = pairs->next;
- while (pairs) {
- next = pairs->next;
- fn (dict, pairs->key, pairs->value, data);
- pairs = next;
+ if (filter_fn && filter_fn(pairs->key)) {
+ pairs = next;
+ continue;
}
-}
+ if (value && (size > len))
+ strncpy(value + len, pairs->key, size - len);
-static void
-_copy (dict_t *unused,
- char *key,
- data_t *value,
- void *newdict)
-{
- dict_set ((dict_t *)newdict, key, (value));
+ len += (strlen(pairs->key) + 1);
+
+ pairs = next;
+ }
+
+ return len;
}
-static void
-_remove (dict_t *dict,
- char *key,
- data_t *value,
- void *unused)
+static int
+dict_copy_one(dict_t *unused, char *key, data_t *value, void *newdict)
{
- dict_del ((dict_t *)dict, key);
+ return dict_set((dict_t *)newdict, key, (value));
}
-
dict_t *
-dict_copy (dict_t *dict,
- dict_t *new)
+dict_copy(dict_t *dict, dict_t *new)
{
- if (!dict) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
- return NULL;
- }
+ if (!dict) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is NULL");
+ return NULL;
+ }
- if (!new)
- new = get_new_dict_full (dict->hash_size);
+ if (!new)
+ new = get_new_dict_full(dict->hash_size);
- dict_foreach (dict, _copy, new);
+ dict_foreach(dict, dict_copy_one, new);
- return new;
+ return new;
}
int
-dict_reset (dict_t *dict)
-{
- int32_t ret = -1;
- if (!dict) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is NULL");
- goto out;
- }
- dict_foreach (dict, _remove, NULL);
- ret = 0;
+dict_reset(dict_t *dict)
+{
+ int32_t ret = -1;
+ if (!dict) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is NULL");
+ goto out;
+ }
+ dict_foreach(dict, dict_remove_foreach_fn, NULL);
+ ret = 0;
out:
- return ret;
+ return ret;
}
dict_t *
-dict_copy_with_ref (dict_t *dict,
- dict_t *new)
+dict_copy_with_ref(dict_t *dict, dict_t *new)
{
- dict_t *local_new = NULL;
+ dict_t *local_new = NULL;
- GF_VALIDATE_OR_GOTO("dict", dict, fail);
+ GF_VALIDATE_OR_GOTO("dict", dict, fail);
- if (new == NULL) {
- local_new = dict_new ();
- GF_VALIDATE_OR_GOTO("dict", local_new, fail);
- new = local_new;
- }
+ if (new == NULL) {
+ local_new = dict_new();
+ GF_VALIDATE_OR_GOTO("dict", local_new, fail);
+ new = local_new;
+ }
- dict_foreach (dict, _copy, new);
+ dict_foreach(dict, dict_copy_one, new);
fail:
- return new;
+ return new;
}
/*
@@ -1159,900 +1495,1292 @@ fail:
* -val error, val = errno
*/
-
static int
-dict_get_with_ref (dict_t *this, char *key, data_t **data)
+dict_get_with_refn(dict_t *this, char *key, const int keylen, data_t **data)
{
- data_pair_t * pair = NULL;
- int ret = -ENOENT;
+ data_pair_t *pair = NULL;
+ int ret = -ENOENT;
+ uint32_t hash;
- if (!this || !key || !data) {
- gf_log_callingfn ("dict", GF_LOG_WARNING,
- "dict OR key (%s) is NULL", key);
- ret = -EINVAL;
- goto err;
- }
+ hash = (uint32_t)XXH64(key, keylen, 0);
- LOCK (&this->lock);
- {
- pair = _dict_lookup (this, key);
- }
- UNLOCK (&this->lock);
+ LOCK(&this->lock);
+ {
+ pair = dict_lookup_common(this, key, hash);
if (pair) {
- ret = 0;
- *data = data_ref (pair->value);
+ ret = 0;
+ *data = data_ref(pair->value);
}
+ }
+ UNLOCK(&this->lock);
-err:
- return ret;
+ return ret;
+}
+
+int
+dict_get_with_ref(dict_t *this, char *key, data_t **data)
+{
+ if (!this || !key || !data) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict OR key (%s) is NULL", key);
+ return -EINVAL;
+ }
+
+ return dict_get_with_refn(this, key, strlen(key), data);
}
static int
-_data_to_ptr (data_t *data, void **val)
+data_to_ptr_common(data_t *data, void **val)
{
- int ret = 0;
+ int ret = 0;
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- *val = data->data;
+ *val = data->data;
err:
- return ret;
+ return ret;
}
-
static int
-_data_to_int8 (data_t *data, int8_t *val)
+data_to_int8_ptr(data_t *data, int8_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
-
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- errno = 0;
- *val = strtol (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtol(data->data, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
-_data_to_int16 (data_t *data, int16_t *val)
+data_to_int16_ptr(data_t *data, int16_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- errno = 0;
- *val = strtol (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtol(data->data, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
-_data_to_int32 (data_t *data, int32_t *val)
+data_to_int32_ptr(data_t *data, int32_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
-
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- errno = 0;
- *val = strtol (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtol(data->data, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
-_data_to_int64 (data_t *data, int64_t *val)
+data_to_int64_ptr(data_t *data, int64_t *val)
{
- int ret = 0;
- char * str = NULL;
-
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ int ret = 0;
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- errno = 0;
- *val = strtoll (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtoll(data->data, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
-_data_to_uint16 (data_t *data, uint16_t *val)
+data_to_uint16_ptr(data_t *data, uint16_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- errno = 0;
- *val = strtoul (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtoul(data->data, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
-_data_to_uint32 (data_t *data, uint32_t *val)
+data_to_uint32_ptr(data_t *data, uint32_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- errno = 0;
- *val = strtoul (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtoul(data->data, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
-_data_to_uint64 (data_t *data, uint64_t *val)
+data_to_uint64_ptr(data_t *data, uint64_t *val)
{
- int ret = 0;
- char * str = NULL;
+ int ret = 0;
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
-
- errno = 0;
- *val = strtoull (str, NULL, 0);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtoull(data->data, NULL, 0);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
static int
-_data_to_double (data_t *data, double *val)
+data_to_double_ptr(data_t *data, double *val)
{
- int ret = 0;
- char * str = NULL;
-
- if (!data || !val) {
- ret = -EINVAL;
- goto err;
- }
+ int ret = 0;
- str = alloca (data->len + 1);
- if (!str) {
- ret = -ENOMEM;
- goto err;
- }
- memcpy (str, data->data, data->len);
- str[data->len] = '\0';
+ if (!data || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
- errno = 0;
- *val = strtod (str, NULL);
- if (errno != 0)
- ret = -errno;
+ errno = 0;
+ *val = strtod(data->data, NULL);
+ if (errno != 0)
+ ret = -errno;
err:
- return ret;
+ return ret;
}
int
-dict_get_int8 (dict_t *this, char *key, int8_t *val)
+dict_get_int8(dict_t *this, char *key, int8_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, key, -EINVAL);
- ret = _data_to_int8 (data, val);
+ ret = data_to_int8_ptr(data, val);
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
-
int
-dict_set_int8 (dict_t *this, char *key, int8_t val)
+dict_set_int8(dict_t *this, char *key, int8_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- data = data_from_int8 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = data_from_int8(val);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
int
-dict_get_int16 (dict_t *this, char *key, int16_t *val)
+dict_get_int16(dict_t *this, char *key, int16_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
- ret = _data_to_int16 (data, val);
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, key, -EINVAL);
+
+ ret = data_to_int16_ptr(data, val);
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
+int
+dict_set_int16(dict_t *this, char *key, int16_t val)
+{
+ data_t *data = NULL;
+ int ret = 0;
+
+ data = data_from_int16(val);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
+
+err:
+ return ret;
+}
int
-dict_set_int16 (dict_t *this, char *key, int16_t val)
+dict_get_int32n(dict_t *this, char *key, const int keylen, int32_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- data = data_from_int16 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!this || !key || !val) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = dict_get_with_refn(this, key, keylen, &data);
+ if (ret != 0) {
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, key, -EINVAL);
+
+ ret = data_to_int32_ptr(data, val);
err:
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
int
-dict_get_int32 (dict_t *this, char *key, int32_t *val)
+dict_get_int32(dict_t *this, char *key, int32_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, key, -EINVAL);
- ret = _data_to_int32 (data, val);
+ ret = data_to_int32_ptr(data, val);
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
+int
+dict_set_int32n(dict_t *this, char *key, const int keylen, int32_t val)
+{
+ data_t *data = NULL;
+ int ret = 0;
+
+ data = data_from_int32(val);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = dict_setn(this, key, keylen, data);
+ if (ret < 0)
+ data_destroy(data);
+
+err:
+ return ret;
+}
int
-dict_set_int32 (dict_t *this, char *key, int32_t val)
+dict_set_int32(dict_t *this, char *key, int32_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = data_from_int32(val);
+ int ret = 0;
- data = data_from_int32 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
int
-dict_get_int64 (dict_t *this, char *key, int64_t *val)
+dict_get_int64(dict_t *this, char *key, int64_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, key, -EINVAL);
- ret = _data_to_int64 (data, val);
+ ret = data_to_int64_ptr(data, val);
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
-
int
-dict_set_int64 (dict_t *this, char *key, int64_t val)
+dict_set_int64(dict_t *this, char *key, int64_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = data_from_int64(val);
+ int ret = 0;
- data = data_from_int64 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
int
-dict_get_uint16 (dict_t *this, char *key, uint16_t *val)
+dict_get_uint16(dict_t *this, char *key, uint16_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, key, -EINVAL);
- ret = _data_to_uint16 (data, val);
+ ret = data_to_uint16_ptr(data, val);
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
-
int
-dict_set_uint16 (dict_t *this, char *key, uint16_t val)
+dict_set_uint16(dict_t *this, char *key, uint16_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = data_from_uint16(val);
+ int ret = 0;
- data = data_from_uint16 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
int
-dict_get_uint32 (dict_t *this, char *key, uint32_t *val)
+dict_get_uint32(dict_t *this, char *key, uint32_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
- ret = _data_to_uint32 (data, val);
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, key, -EINVAL);
+
+ ret = data_to_uint32_ptr(data, val);
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
-
-
int
-dict_set_uint32 (dict_t *this, char *key, uint32_t val)
+dict_set_uint32(dict_t *this, char *key, uint32_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = data_from_uint32(val);
+ int ret = 0;
- data = data_from_uint32 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
int
-dict_get_uint64 (dict_t *this, char *key, uint64_t *val)
+dict_get_uint64(dict_t *this, char *key, uint64_t *val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- if (!this || !key || !val) {
- ret = -EINVAL;
- goto err;
- }
+ if (!val) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
- ret = _data_to_uint64 (data, val);
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_UINT, key, -EINVAL);
+
+ ret = data_to_uint64_ptr(data, val);
err:
- if (data)
- data_unref (data);
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
-
int
-dict_set_uint64 (dict_t *this, char *key, uint64_t val)
+dict_set_uint64(dict_t *this, char *key, uint64_t val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = data_from_uint64(val);
+ int ret = 0;
- data = data_from_uint64 (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
+/*
+ * dict_check_flag can be used to check a one bit flag in an array of flags
+ * The flag argument indicates the bit position (within the array of bits).
+ * Currently limited to max of 256 flags for a key.
+ * return value,
+ * 1 : flag is set
+ * 0 : flag is not set
+ * <0: Error
+ */
int
-dict_get_double (dict_t *this, char *key, double *val)
+dict_check_flag(dict_t *this, char *key, int flag)
{
- data_t *data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = -ENOENT;
- if (!this || !key || !val) {
- ret = -EINVAL;
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret < 0) {
+ return ret;
+ }
+
+ if (BIT_VALUE((unsigned char *)(data->data), flag))
+ ret = 1;
+ else
+ ret = 0;
+
+ data_unref(data);
+ return ret;
+}
+
+/*
+ * _dict_modify_flag can be used to set/clear a bit flag in an array of flags
+ * flag: indicates the bit position. limited to max of DICT_MAX_FLAGS.
+ * op: Indicates operation DICT_FLAG_SET / DICT_FLAG_CLEAR
+ */
+static int
+_dict_modify_flag(dict_t *this, char *key, int flag, int op)
+{
+ data_t *data = NULL;
+ int ret = 0;
+ data_pair_t *pair = NULL;
+ char *ptr = NULL;
+ int hashval = 0;
+ uint32_t hash;
+
+ if (!this || !key) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict OR key (%s) is NULL", key);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ /*
+ * Using a size of 32 bytes to support max of 256
+ * flags in a single key. This should be suffcient.
+ */
+ GF_ASSERT(flag >= 0 && flag < DICT_MAX_FLAGS);
+
+ hash = (uint32_t)XXH64(key, strlen(key), 0);
+ LOCK(&this->lock);
+ {
+ pair = dict_lookup_common(this, key, hash);
+
+ if (pair) {
+ data = pair->value;
+ if (op == DICT_FLAG_SET)
+ BIT_SET((unsigned char *)(data->data), flag);
+ else
+ BIT_CLEAR((unsigned char *)(data->data), flag);
+ } else {
+ ptr = GF_CALLOC(1, DICT_MAX_FLAGS / 8, gf_common_mt_char);
+ if (!ptr) {
+ gf_smsg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
+ "flag bit array", NULL);
+ ret = -ENOMEM;
goto err;
- }
+ }
+
+ data = data_from_dynptr(ptr, DICT_MAX_FLAGS / 8);
+
+ if (!data) {
+ gf_smsg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY, "data",
+ NULL);
+ GF_FREE(ptr);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ if (op == DICT_FLAG_SET)
+ BIT_SET((unsigned char *)(data->data), flag);
+ else
+ BIT_CLEAR((unsigned char *)(data->data), flag);
+
+ if (this->free_pair.key) { /* the free pair is in use */
+ pair = mem_get0(THIS->ctx->dict_pair_pool);
+ if (!pair) {
+ gf_smsg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
+ "dict pair", NULL);
+ ret = -ENOMEM;
+ goto err;
+ }
+ } else { /* use the free pair */
+ pair = &this->free_pair;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
+ pair->key = (char *)GF_MALLOC(strlen(key) + 1, gf_common_mt_char);
+ if (!pair->key) {
+ gf_smsg("dict", GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
+ "dict pair", NULL);
+ ret = -ENOMEM;
goto err;
+ }
+ strcpy(pair->key, key);
+ pair->key_hash = hash;
+ pair->value = data_ref(data);
+ this->totkvlen += (strlen(key) + 1 + data->len);
+ hashval = hash % this->hash_size;
+ pair->hash_next = this->members[hashval];
+ this->members[hashval] = pair;
+
+ pair->next = this->members_list;
+ pair->prev = NULL;
+ if (this->members_list)
+ this->members_list->prev = pair;
+ this->members_list = pair;
+ this->count++;
+
+ if (this->max_count < this->count)
+ this->max_count = this->count;
}
+ }
- ret = _data_to_double (data, val);
+ UNLOCK(&this->lock);
+ return 0;
err:
- if (data)
- data_unref (data);
- return ret;
+ if (key && this)
+ UNLOCK(&this->lock);
+
+ if (pair) {
+ if (pair->key) {
+ GF_FREE(pair->key);
+ pair->key = NULL;
+ }
+ if (pair != &this->free_pair) {
+ mem_put(pair);
+ }
+ }
+
+ if (data)
+ data_destroy(data);
+
+ gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_DICT_SET_FAILED, "key=%s", key,
+ NULL);
+
+ return ret;
}
+/*
+ * Todo:
+ * Add below primitives as needed:
+ * dict_check_flags(this, key, flag...): variadic function to check
+ * multiple flags at a time.
+ * dict_set_flags(this, key, flag...): set multiple flags
+ * dict_clear_flags(this, key, flag...): reset multiple flags
+ */
+
int
-dict_set_double (dict_t *this, char *key, double val)
+dict_set_flag(dict_t *this, char *key, int flag)
{
- data_t * data = NULL;
- int ret = 0;
+ return _dict_modify_flag(this, key, flag, DICT_FLAG_SET);
+}
- data = data_from_double (val);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+int
+dict_clear_flag(dict_t *this, char *key, int flag)
+{
+ return _dict_modify_flag(this, key, flag, DICT_FLAG_CLEAR);
+}
+
+int
+dict_get_double(dict_t *this, char *key, double *val)
+{
+ data_t *data = NULL;
+ int ret = 0;
+
+ if (!val) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_DOUBLE, key, -EINVAL);
- ret = dict_set (this, key, data);
+ ret = data_to_double_ptr(data, val);
err:
- return ret;
+ if (data)
+ data_unref(data);
+ return ret;
}
int
-dict_set_static_ptr (dict_t *this, char *key, void *ptr)
+dict_set_double(dict_t *this, char *key, double val)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = data_from_double(val);
+ int ret = 0;
- data = data_from_static_ptr (ptr);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
int
-dict_set_dynptr (dict_t *this, char *key, void *ptr, size_t len)
+dict_set_static_ptr(dict_t *this, char *key, void *ptr)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = data_from_ptr_common(ptr, _gf_true);
+ int ret = 0;
- data = data_from_dynptr (ptr, len);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
int
-dict_get_ptr (dict_t *this, char *key, void **ptr)
+dict_set_dynptr(dict_t *this, char *key, void *ptr, size_t len)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = data_from_dynptr(ptr, len);
+ int ret = 0;
- if (!this || !key || !ptr) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
- ret = _data_to_ptr (data, ptr);
- if (ret != 0) {
- goto err;
- }
+err:
+ return ret;
+}
+
+int
+dict_get_ptr(dict_t *this, char *key, void **ptr)
+{
+ data_t *data = NULL;
+ int ret = 0;
+
+ if (!ptr) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_PTR, key, -EINVAL);
+
+ ret = data_to_ptr_common(data, ptr);
+ if (ret != 0) {
+ goto err;
+ }
err:
- if (data)
- data_unref (data);
+ if (data)
+ data_unref(data);
- return ret;
+ return ret;
}
int
-dict_get_ptr_and_len (dict_t *this, char *key, void **ptr, int *len)
+dict_get_ptr_and_len(dict_t *this, char *key, void **ptr, int *len)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- if (!this || !key || !ptr) {
- ret = -EINVAL;
- goto err;
- }
+ if (!ptr) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret != 0) {
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret != 0) {
+ goto err;
+ }
- *len = data->len;
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_PTR, key, -EINVAL);
- ret = _data_to_ptr (data, ptr);
- if (ret != 0) {
- goto err;
- }
+ *len = data->len;
+
+ ret = data_to_ptr_common(data, ptr);
+ if (ret != 0) {
+ goto err;
+ }
err:
- if (data)
- data_unref (data);
+ if (data)
+ data_unref(data);
- return ret;
+ return ret;
}
+/* Get string - with known key length */
int
-dict_set_ptr (dict_t *this, char *key, void *ptr)
+dict_get_strn(dict_t *this, char *key, const int keylen, char **str)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = -EINVAL;
- data = data_from_ptr (ptr);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!this || !key || !str) {
+ goto err;
+ }
+ ret = dict_get_with_refn(this, key, keylen, &data);
+ if (ret < 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_STR, key, -EINVAL);
- ret = dict_set (this, key, data);
+ *str = data->data;
err:
- return ret;
-}
+ if (data)
+ data_unref(data);
+ return ret;
+}
int
-dict_get_str (dict_t *this, char *key, char **str)
+dict_get_str(dict_t *this, char *key, char **str)
{
- data_t * data = NULL;
- int ret = -EINVAL;
+ data_t *data = NULL;
+ int ret = -EINVAL;
- if (!this || !key || !str) {
- goto err;
- }
+ if (!str) {
+ goto err;
+ }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret < 0) {
+ goto err;
+ }
- ret = dict_get_with_ref (this, key, &data);
- if (ret < 0) {
- goto err;
- }
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_STR, key, -EINVAL);
- if (!data || !data->data) {
- goto err;
- }
- *str = data->data;
+ *str = data->data;
err:
- if (data)
- data_unref (data);
+ if (data)
+ data_unref(data);
- return ret;
+ return ret;
}
int
-dict_set_str (dict_t *this, char *key, char *str)
+dict_set_str(dict_t *this, char *key, char *str)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = str_to_data(str);
+ int ret = 0;
- data = str_to_data (str);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
+/* Set string - with known key length */
int
-dict_set_dynstr (dict_t *this, char *key, char *str)
+dict_set_strn(dict_t *this, char *key, const int keylen, char *str)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = 0;
- data = data_from_dynstr (str);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ data = str_to_data(str);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_setn(this, key, keylen, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
-/*
- for malloced strings we should do a free instead of GF_FREE
-*/
+/* Set string - with known key length and known value length */
int
-dict_set_dynmstr (dict_t *this, char *key, char *str)
+dict_set_nstrn(dict_t *this, char *key, const int keylen, char *str,
+ const int vallen)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = strn_to_data(str, vallen);
+ int ret = 0;
- data = data_from_dynmstr (str);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- ret = dict_set (this, key, data);
+ ret = dict_setn(this, key, keylen, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- return ret;
+ return ret;
}
+int
+dict_set_dynstr_with_alloc(dict_t *this, char *key, const char *str)
+{
+ char *alloc_str = gf_strdup(str);
+ int ret = -1;
+
+ if (!alloc_str)
+ return ret;
+
+ ret = dict_set_dynstr(this, key, alloc_str);
+ if (ret == -EINVAL)
+ GF_FREE(alloc_str);
+
+ return ret;
+}
int
-dict_get_bin (dict_t *this, char *key, void **bin)
+dict_set_dynstr(dict_t *this, char *key, char *str)
{
- data_t * data = NULL;
- int ret = -EINVAL;
+ const int keylen = strlen(key);
+ return dict_set_dynstrn(this, key, keylen, str);
+}
- if (!this || !key || !bin) {
- goto err;
- }
+int
+dict_set_dynstrn(dict_t *this, char *key, const int keylen, char *str)
+{
+ data_t *data = data_from_dynstr(str);
+ int ret = 0;
- ret = dict_get_with_ref (this, key, &data);
- if (ret < 0) {
- goto err;
- }
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
- if (!data || !data->data) {
- goto err;
- }
- *bin = data->data;
+ ret = dict_setn(this, key, keylen, data);
+ if (ret < 0)
+ data_destroy(data);
err:
- if (data)
- data_unref (data);
+ return ret;
+}
- return ret;
+/* This function is called only by the volgen for now.
+ Check how else you can handle it */
+int
+dict_set_option(dict_t *this, char *key, char *str)
+{
+ data_t *data = data_from_dynstr(str);
+ int ret = 0;
+
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ data->data_type = GF_DATA_TYPE_STR_OLD;
+ ret = dict_set(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
+err:
+ return ret;
}
+int
+dict_add_dynstr_with_alloc(dict_t *this, char *key, char *str)
+{
+ data_t *data = NULL;
+ int ret = 0;
+ char *alloc_str = gf_strdup(str);
+
+ if (!alloc_str)
+ goto out;
+
+ data = data_from_dynstr(alloc_str);
+ if (!data) {
+ GF_FREE(alloc_str);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = dict_add(this, key, data);
+ if (ret < 0)
+ data_destroy(data);
+
+out:
+ return ret;
+}
int
-dict_set_bin (dict_t *this, char *key, void *ptr, size_t size)
+dict_get_bin(dict_t *this, char *key, void **bin)
{
- data_t * data = NULL;
- int ret = 0;
+ data_t *data = NULL;
+ int ret = -EINVAL;
- if (!ptr || (size < 0)) {
- ret = -EINVAL;
- goto err;
- }
+ if (!bin) {
+ goto err;
+ }
- data = bin_to_data (ptr, size);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret < 0) {
+ goto err;
+ }
- data->data = ptr;
- data->len = size;
- data->is_static = 0;
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_PTR, key, ret);
- ret = dict_set (this, key, data);
+ *bin = data->data;
err:
- return ret;
+ if (data)
+ data_unref(data);
+
+ return ret;
}
+/********************************************************************
+ *
+ * dict_set_bin_common:
+ * This is the common function to set key and its value in
+ * dictionary. Flag(is_static) should be set appropriately based
+ * on the type of memory type used for value(*ptr). If flag is set
+ * to false value(*ptr) will be freed using GF_FREE() on destroy.
+ *
+ *******************************************************************/
+static int
+dict_set_bin_common(dict_t *this, char *key, void *ptr, size_t size,
+ gf_boolean_t is_static, gf_dict_data_type_t type)
+{
+ data_t *data = NULL;
+ int ret = 0;
+
+ if (!ptr || (size > DICT_KEY_VALUE_MAX_SIZE)) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ data = bin_to_data(ptr, size);
+ if (!data) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ data->is_static = is_static;
+ data->data_type = type;
+ ret = dict_set(this, key, data);
+ if (ret < 0) {
+ /* don't free data->data, let callers handle it */
+ data->data = NULL;
+ data_destroy(data);
+ }
+
+err:
+ return ret;
+}
+
+/********************************************************************
+ *
+ * dict_set_bin:
+ * Set key and its value in the dictionary. This function should
+ * be called if the value is stored in dynamic memory.
+ *
+ *******************************************************************/
int
-dict_set_static_bin (dict_t *this, char *key, void *ptr, size_t size)
+dict_set_bin(dict_t *this, char *key, void *ptr, size_t size)
{
- data_t * data = NULL;
- int ret = 0;
+ return dict_set_bin_common(this, key, ptr, size, _gf_false,
+ GF_DATA_TYPE_PTR);
+}
- if (!ptr || (size < 0)) {
- ret = -EINVAL;
- goto err;
- }
+/********************************************************************
+ *
+ * dict_set_static_bin:
+ * Set key and its value in the dictionary. This function should
+ * be called if the value is stored in static memory.
+ *
+ *******************************************************************/
+int
+dict_set_static_bin(dict_t *this, char *key, void *ptr, size_t size)
+{
+ return dict_set_bin_common(this, key, ptr, size, _gf_true,
+ GF_DATA_TYPE_PTR);
+}
- data = bin_to_data (ptr, size);
- if (!data) {
- ret = -EINVAL;
- goto err;
- }
+/* */
+int
+dict_set_gfuuid(dict_t *this, char *key, uuid_t gfid, bool is_static)
+{
+ return dict_set_bin_common(this, key, gfid, sizeof(uuid_t), is_static,
+ GF_DATA_TYPE_GFUUID);
+}
- data->data = ptr;
- data->len = size;
- data->is_static = 1;
+int
+dict_get_gfuuid(dict_t *this, char *key, uuid_t *gfid)
+{
+ data_t *data = NULL;
+ int ret = -EINVAL;
+
+ if (!gfid) {
+ goto err;
+ }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret < 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_GFUUID, key, -EINVAL);
- ret = dict_set (this, key, data);
+ memcpy(*gfid, data->data, min(data->len, sizeof(uuid_t)));
err:
- return ret;
+ if (data)
+ data_unref(data);
+
+ return ret;
}
+int
+dict_set_mdata(dict_t *this, char *key, struct mdata_iatt *mdata,
+ bool is_static)
+{
+ return dict_set_bin_common(this, key, mdata, sizeof(struct mdata_iatt),
+ is_static, GF_DATA_TYPE_MDATA);
+}
+
+int
+dict_get_mdata(dict_t *this, char *key, struct mdata_iatt *mdata)
+{
+ data_t *data = NULL;
+ int ret = -EINVAL;
+
+ if (!mdata) {
+ goto err;
+ }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret < 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_MDATA, key, -EINVAL);
+ if (data->len < sizeof(struct mdata_iatt)) {
+ gf_smsg("glusterfs", GF_LOG_ERROR, ENOBUFS, LG_MSG_UNDERSIZED_BUF,
+ "key=%s", key, NULL);
+ ret = -ENOBUFS;
+ goto err;
+ }
+
+ memcpy(mdata, data->data, min(data->len, sizeof(struct mdata_iatt)));
+
+err:
+ if (data)
+ data_unref(data);
+
+ return ret;
+}
+
+int
+dict_set_iatt(dict_t *this, char *key, struct iatt *iatt, bool is_static)
+{
+ return dict_set_bin_common(this, key, iatt, sizeof(struct iatt), is_static,
+ GF_DATA_TYPE_IATT);
+}
+
+int
+dict_get_iatt(dict_t *this, char *key, struct iatt *iatt)
+{
+ data_t *data = NULL;
+ int ret = -EINVAL;
+
+ if (!iatt) {
+ goto err;
+ }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret < 0) {
+ goto err;
+ }
+
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_IATT, key, -EINVAL);
+
+ memcpy(iatt, data->data, min(data->len, sizeof(struct iatt)));
+
+err:
+ if (data)
+ data_unref(data);
+
+ return ret;
+}
/**
* dict_get_str_boolean - get a boolean value based on string representation.
@@ -2078,41 +2806,77 @@ err:
*/
int
-dict_get_str_boolean (dict_t *this, char *key, int default_val)
+dict_get_str_boolean(dict_t *this, char *key, int default_val)
{
- data_t *data = NULL;
- gf_boolean_t boo = _gf_false;
- int ret = 0;
+ data_t *data = NULL;
+ gf_boolean_t boo = _gf_false;
+ int ret = 0;
- ret = dict_get_with_ref (this, key, &data);
- if (ret < 0) {
- if (ret == -ENOENT)
- ret = default_val;
- else
- ret = -1;
- goto err;
- }
+ ret = dict_get_with_ref(this, key, &data);
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ ret = default_val;
+ else
+ ret = -1;
+ goto err;
+ }
- GF_ASSERT (data);
+ VALIDATE_DATA_AND_LOG(data, GF_DATA_TYPE_INT, key, -EINVAL);
- if (!data->data) {
- ret = -1;
- goto err;
- }
-
- ret = gf_string2boolean (data->data, &boo);
- if (ret == -1)
- goto err;
+ ret = gf_strn2boolean(data->data, data->len - 1, &boo);
+ if (ret == -1)
+ goto err;
- ret = boo;
+ ret = boo;
err:
- if (data)
- data_unref (data);
+ if (data)
+ data_unref(data);
- return ret;
+ return ret;
}
+int
+dict_rename_key(dict_t *this, char *key, char *replace_key)
+{
+ data_pair_t *pair = NULL;
+ int ret = -EINVAL;
+ uint32_t hash;
+ uint32_t replacekey_hash;
+ int replacekey_len;
+
+ /* replacing a key by itself is a NO-OP */
+ if (strcmp(key, replace_key) == 0)
+ return 0;
+
+ if (!this) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is NULL");
+ return ret;
+ }
+
+ hash = (uint32_t)XXH64(key, strlen(key), 0);
+ replacekey_len = strlen(replace_key);
+ replacekey_hash = (uint32_t)XXH64(replace_key, replacekey_len, 0);
+
+ LOCK(&this->lock);
+ {
+ /* no need to data_ref(pair->value), dict_set_lk() does it */
+ pair = dict_lookup_common(this, key, hash);
+ if (!pair)
+ ret = -ENODATA;
+ else
+ ret = dict_set_lk(this, replace_key, replacekey_len, pair->value,
+ replacekey_hash, 1);
+ }
+ UNLOCK(&this->lock);
+
+ if (!ret)
+ /* only delete the key on success */
+ dict_del(this, key);
+
+ return ret;
+}
/**
* Serialization format:
@@ -2122,13 +2886,9 @@ err:
* 4 4 4 <key len> <value len>
*/
-#define DICT_HDR_LEN 4
-#define DICT_DATA_HDR_KEY_LEN 4
-#define DICT_DATA_HDR_VAL_LEN 4
-
/**
- * _dict_serialized_length - return the length of serialized dict. This
- * procedure has to be called with this->lock held.
+ * dict_serialized_length_lk - return the length of serialized dict. This
+ * procedure has to be called with this->lock held.
*
* @this : dict to be serialized
* @return: success: len
@@ -2136,155 +2896,103 @@ err:
*/
int
-_dict_serialized_length (dict_t *this)
+dict_serialized_length_lk(dict_t *this)
{
- int ret = -EINVAL;
- int count = 0;
- int len = 0;
- data_pair_t * pair = NULL;
-
- len = DICT_HDR_LEN;
- count = this->count;
-
- if (count < 0) {
- gf_log ("dict", GF_LOG_ERROR, "count (%d) < 0!", count);
- goto out;
- }
-
- pair = this->members_list;
-
- while (count) {
- if (!pair) {
- gf_log ("dict", GF_LOG_ERROR,
- "less than count data pairs found!");
- goto out;
- }
-
- len += DICT_DATA_HDR_KEY_LEN + DICT_DATA_HDR_VAL_LEN;
-
- if (!pair->key) {
- gf_log ("dict", GF_LOG_ERROR, "pair->key is null!");
- goto out;
- }
+ int ret = -EINVAL;
+ int count = this->count;
+ const int keyhdrlen = DICT_DATA_HDR_KEY_LEN + DICT_DATA_HDR_VAL_LEN;
- len += strlen (pair->key) + 1 /* for '\0' */;
+ if (count < 0) {
+ gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_COUNT_LESS_THAN_ZERO,
+ "count=%d", count, NULL);
+ goto out;
+ }
- if (!pair->value) {
- gf_log ("dict", GF_LOG_ERROR,
- "pair->value is null!");
- goto out;
- }
-
- if (pair->value->len < 0) {
- gf_log ("dict", GF_LOG_ERROR,
- "value->len (%d) < 0",
- pair->value->len);
- goto out;
- }
-
- len += pair->value->len;
-
- pair = pair->next;
- count--;
- }
-
- ret = len;
+ ret = DICT_HDR_LEN + this->totkvlen + (count * keyhdrlen);
out:
- return ret;
+ return ret;
}
/**
- * _dict_serialize - serialize a dictionary into a buffer. This procedure has
- * to be called with this->lock held.
+ * dict_serialize_lk - serialize a dictionary into a buffer. This procedure has
+ * to be called with this->lock held.
*
* @this: dict to serialize
* @buf: buffer to serialize into. This must be
- * atleast dict_serialized_length (this) large
+ * at least dict_serialized_length (this) large
*
* @return: success: 0
* failure: -errno
*/
-int
-_dict_serialize (dict_t *this, char *buf)
+static int
+dict_serialize_lk(dict_t *this, char *buf)
{
- int ret = -1;
- data_pair_t * pair = NULL;
- int32_t count = 0;
- int32_t keylen = 0;
- int32_t vallen = 0;
- int32_t netword = 0;
+ int ret = -1;
+ data_pair_t *pair = this->members_list;
+ int32_t count = this->count;
+ int32_t keylen = 0;
+ int32_t netword = 0;
+ if (!buf) {
+ gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, NULL);
+ goto out;
+ }
- if (!buf) {
- gf_log ("dict", GF_LOG_ERROR,
- "buf is null!");
- goto out;
- }
+ if (count < 0) {
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_COUNT_LESS_THAN_ZERO,
+ "count=%d", count, NULL);
+ goto out;
+ }
+ netword = hton32(count);
+ memcpy(buf, &netword, sizeof(netword));
+ buf += DICT_HDR_LEN;
- count = this->count;
- if (count < 0) {
- gf_log ("dict", GF_LOG_ERROR, "count (%d) < 0!", count);
- goto out;
+ while (count) {
+ if (!pair) {
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_PAIRS_LESS_THAN_COUNT,
+ NULL);
+ goto out;
}
- netword = hton32 (count);
- memcpy (buf, &netword, sizeof(netword));
- buf += DICT_HDR_LEN;
- pair = this->members_list;
-
- while (count) {
- if (!pair) {
- gf_log ("dict", GF_LOG_ERROR,
- "less than count data pairs found!");
- goto out;
- }
-
- if (!pair->key) {
- gf_log ("dict", GF_LOG_ERROR,
- "pair->key is null!");
- goto out;
- }
-
- keylen = strlen (pair->key);
- netword = hton32 (keylen);
- memcpy (buf, &netword, sizeof(netword));
- buf += DICT_DATA_HDR_KEY_LEN;
+ if (!pair->key) {
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR, NULL);
+ goto out;
+ }
- if (!pair->value) {
- gf_log ("dict", GF_LOG_ERROR,
- "pair->value is null!");
- goto out;
- }
+ keylen = strlen(pair->key);
+ netword = hton32(keylen);
+ memcpy(buf, &netword, sizeof(netword));
+ buf += DICT_DATA_HDR_KEY_LEN;
- vallen = pair->value->len;
- netword = hton32 (vallen);
- memcpy (buf, &netword, sizeof(netword));
- buf += DICT_DATA_HDR_VAL_LEN;
+ if (!pair->value) {
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_PTR, NULL);
+ goto out;
+ }
- memcpy (buf, pair->key, keylen);
- buf += keylen;
- *buf++ = '\0';
+ netword = hton32(pair->value->len);
+ memcpy(buf, &netword, sizeof(netword));
+ buf += DICT_DATA_HDR_VAL_LEN;
- if (!pair->value->data) {
- gf_log ("dict", GF_LOG_ERROR,
- "pair->value->data is null!");
- goto out;
- }
- memcpy (buf, pair->value->data, vallen);
- buf += vallen;
+ memcpy(buf, pair->key, keylen);
+ buf += keylen;
+ *buf++ = '\0';
- pair = pair->next;
- count--;
+ if (pair->value->data) {
+ memcpy(buf, pair->value->data, pair->value->len);
+ buf += pair->value->len;
}
- ret = 0;
+ pair = pair->next;
+ count--;
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
/**
* dict_serialized_length - return the length of serialized dict
*
@@ -2294,23 +3002,24 @@ out:
*/
int
-dict_serialized_length (dict_t *this)
+dict_serialized_length(dict_t *this)
{
- int ret = -EINVAL;
+ int ret = -EINVAL;
- if (!this) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is null!");
- goto out;
- }
+ if (!this) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is null!");
+ goto out;
+ }
- LOCK (&this->lock);
- {
- ret = _dict_serialized_length (this);
- }
- UNLOCK (&this->lock);
+ LOCK(&this->lock);
+ {
+ ret = dict_serialized_length_lk(this);
+ }
+ UNLOCK(&this->lock);
out:
- return ret;
+ return ret;
}
/**
@@ -2318,32 +3027,32 @@ out:
*
* @this: dict to serialize
* @buf: buffer to serialize into. This must be
- * atleast dict_serialized_length (this) large
+ * at least dict_serialized_length (this) large
*
* @return: success: 0
* failure: -errno
*/
int
-dict_serialize (dict_t *this, char *buf)
+dict_serialize(dict_t *this, char *buf)
{
- int ret = -1;
+ int ret = -1;
- if (!this || !buf) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is null!");
- goto out;
- }
+ if (!this || !buf) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is null!");
+ goto out;
+ }
- LOCK (&this->lock);
- {
- ret = _dict_serialize (this, buf);
- }
- UNLOCK (&this->lock);
+ LOCK(&this->lock);
+ {
+ ret = dict_serialize_lk(this, buf);
+ }
+ UNLOCK(&this->lock);
out:
- return ret;
+ return ret;
}
-
/**
* dict_unserialize - unserialize a buffer into a dict
*
@@ -2356,124 +3065,137 @@ out:
*/
int32_t
-dict_unserialize (char *orig_buf, int32_t size, dict_t **fill)
-{
- char *buf = NULL;
- int ret = -1;
- int32_t count = 0;
- int i = 0;
-
- data_t * value = NULL;
- char * key = NULL;
- int32_t keylen = 0;
- int32_t vallen = 0;
- int32_t hostord = 0;
-
- buf = orig_buf;
-
- if (!buf) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "buf is null!");
- goto out;
- }
+dict_unserialize(char *orig_buf, int32_t size, dict_t **fill)
+{
+ char *buf = orig_buf;
+ int ret = -1;
+ int32_t count = 0;
+ int i = 0;
+
+ data_t *value = NULL;
+ char *key = NULL;
+ int32_t keylen = 0;
+ int32_t vallen = 0;
+ int32_t hostord = 0;
+
+ if (!buf) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "buf is null!");
+ goto out;
+ }
+
+ if (size == 0) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "size is 0!");
+ goto out;
+ }
+
+ if (!fill) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "fill is null!");
+ goto out;
+ }
+
+ if (!*fill) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "*fill is null!");
+ goto out;
+ }
+
+ if ((buf + DICT_HDR_LEN) > (orig_buf + size)) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
+ "undersized buffer "
+ "passed. available (%lu) < required (%lu)",
+ (long)(orig_buf + size), (long)(buf + DICT_HDR_LEN));
+ goto out;
+ }
+
+ memcpy(&hostord, buf, sizeof(hostord));
+ count = ntoh32(hostord);
+ buf += DICT_HDR_LEN;
+
+ if (count < 0) {
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_COUNT_LESS_THAN_ZERO,
+ "count=%d", count, NULL);
+ goto out;
+ }
+
+ /* count will be set by the dict_set's below */
+ (*fill)->count = 0;
+
+ for (i = 0; i < count; i++) {
+ if ((buf + DICT_DATA_HDR_KEY_LEN) > (orig_buf + size)) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
+ "undersized "
+ "buffer passed. available (%lu) < "
+ "required (%lu)",
+ (long)(orig_buf + size),
+ (long)(buf + DICT_DATA_HDR_KEY_LEN));
+ goto out;
+ }
+ memcpy(&hostord, buf, sizeof(hostord));
+ keylen = ntoh32(hostord);
+ buf += DICT_DATA_HDR_KEY_LEN;
+
+ if ((buf + DICT_DATA_HDR_VAL_LEN) > (orig_buf + size)) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
+ "undersized "
+ "buffer passed. available (%lu) < "
+ "required (%lu)",
+ (long)(orig_buf + size),
+ (long)(buf + DICT_DATA_HDR_VAL_LEN));
+ goto out;
+ }
+ memcpy(&hostord, buf, sizeof(hostord));
+ vallen = ntoh32(hostord);
+ buf += DICT_DATA_HDR_VAL_LEN;
+
+ if ((keylen < 0) || (vallen < 0)) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
+ "undersized length passed "
+ "key:%d val:%d",
+ keylen, vallen);
+ goto out;
+ }
+ if ((buf + keylen) > (orig_buf + size)) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
+ "undersized buffer passed. "
+ "available (%lu) < required (%lu)",
+ (long)(orig_buf + size), (long)(buf + keylen));
+ goto out;
+ }
+ key = buf;
+ buf += keylen + 1; /* for '\0' */
+
+ if ((buf + vallen) > (orig_buf + size)) {
+ gf_msg_callingfn("dict", GF_LOG_ERROR, 0, LG_MSG_UNDERSIZED_BUF,
+ "undersized buffer passed. "
+ "available (%lu) < required (%lu)",
+ (long)(orig_buf + size), (long)(buf + vallen));
+ goto out;
+ }
+ value = get_new_data();
- if (size == 0) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "size is 0!");
- goto out;
- }
-
- if (!fill) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "fill is null!");
- goto out;
- }
-
- if (!*fill) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "*fill is null!");
- goto out;
- }
-
- if ((buf + DICT_HDR_LEN) > (orig_buf + size)) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "undersized buffer passed. "
- "available (%lu) < required (%lu)",
- (long)(orig_buf + size),
- (long)(buf + DICT_HDR_LEN));
- goto out;
+ if (!value) {
+ ret = -1;
+ goto out;
}
+ value->len = vallen;
+ value->data = gf_memdup(buf, vallen);
+ value->data_type = GF_DATA_TYPE_STR_OLD;
+ value->is_static = _gf_false;
+ buf += vallen;
- memcpy (&hostord, buf, sizeof(hostord));
- count = ntoh32 (hostord);
- buf += DICT_HDR_LEN;
+ ret = dict_addn(*fill, key, keylen, value);
+ if (ret < 0)
+ goto out;
+ }
- if (count < 0) {
- gf_log ("dict", GF_LOG_ERROR,
- "count (%d) <= 0", count);
- goto out;
- }
-
- /* count will be set by the dict_set's below */
- (*fill)->count = 0;
-
- for (i = 0; i < count; i++) {
- if ((buf + DICT_DATA_HDR_KEY_LEN) > (orig_buf + size)) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "undersized buffer passed. "
- "available (%lu) < required (%lu)",
- (long)(orig_buf + size),
- (long)(buf + DICT_DATA_HDR_KEY_LEN));
- goto out;
- }
- memcpy (&hostord, buf, sizeof(hostord));
- keylen = ntoh32 (hostord);
- buf += DICT_DATA_HDR_KEY_LEN;
-
- if ((buf + DICT_DATA_HDR_VAL_LEN) > (orig_buf + size)) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "undersized buffer passed. "
- "available (%lu) < required (%lu)",
- (long)(orig_buf + size),
- (long)(buf + DICT_DATA_HDR_VAL_LEN));
- goto out;
- }
- memcpy (&hostord, buf, sizeof(hostord));
- vallen = ntoh32 (hostord);
- buf += DICT_DATA_HDR_VAL_LEN;
-
- if ((buf + keylen) > (orig_buf + size)) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "undersized buffer passed. "
- "available (%lu) < required (%lu)",
- (long)(orig_buf + size),
- (long)(buf + keylen));
- goto out;
- }
- key = buf;
- buf += keylen + 1; /* for '\0' */
-
- if ((buf + vallen) > (orig_buf + size)) {
- gf_log_callingfn ("dict", GF_LOG_ERROR,
- "undersized buffer passed. "
- "available (%lu) < required (%lu)",
- (long)(orig_buf + size),
- (long)(buf + vallen));
- }
- value = get_new_data ();
- value->len = vallen;
- value->data = memdup (buf, vallen);
- value->is_static = 0;
- buf += vallen;
-
- dict_set (*fill, key, value);
- }
-
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
/**
* dict_allocate_and_serialize - serialize a dictionary into an allocated buffer
*
@@ -2486,50 +3208,49 @@ out:
*/
int32_t
-dict_allocate_and_serialize (dict_t *this, char **buf, size_t *length)
+dict_allocate_and_serialize(dict_t *this, char **buf, u_int *length)
{
- int ret = -EINVAL;
- ssize_t len = 0;
+ int ret = -EINVAL;
+ ssize_t len = 0;
- if (!this || !buf) {
- gf_log_callingfn ("dict", GF_LOG_DEBUG,
- "dict OR buf is NULL");
- goto out;
- }
+ if (!this || !buf) {
+ gf_msg_debug("dict", 0, "dict OR buf is NULL");
+ goto out;
+ }
- LOCK (&this->lock);
- {
- len = _dict_serialized_length (this);
- if (len < 0) {
- ret = len;
- goto unlock;
- }
+ LOCK(&this->lock);
+ {
+ len = dict_serialized_length_lk(this);
+ if (len < 0) {
+ ret = len;
+ goto unlock;
+ }
- *buf = GF_CALLOC (1, len, gf_common_mt_char);
- if (*buf == NULL) {
- ret = -ENOMEM;
- goto unlock;
- }
+ *buf = GF_MALLOC(len, gf_common_mt_char);
+ if (*buf == NULL) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
- ret = _dict_serialize (this, *buf);
- if (ret < 0) {
- GF_FREE (*buf);
- *buf = NULL;
- goto unlock;
- }
+ ret = dict_serialize_lk(this, *buf);
+ if (ret < 0) {
+ GF_FREE(*buf);
+ *buf = NULL;
+ goto unlock;
+ }
- if (length != NULL) {
- *length = len;
- }
+ if (length != NULL) {
+ *length = len;
}
+ }
unlock:
- UNLOCK (&this->lock);
+ UNLOCK(&this->lock);
out:
- return ret;
+ return ret;
}
/**
- * _dict_serialize_value_with_delim: serialize the values in the dictionary
+ * dict_serialize_value_with_delim_lk: serialize the values in the dictionary
* into a buffer separated by delimiter (except the last)
*
* @this : dictionary to serialize
@@ -2538,125 +3259,231 @@ out:
* @delimiter : the delimiter to separate the values
*
* @return : 0 -> success
- * : -errno -> faliure
+ * : -errno -> failure
*/
int
-_dict_serialize_value_with_delim (dict_t *this, char *buf, int32_t *serz_len,
- char delimiter)
-{
- int ret = -1;
- int32_t count = 0;
- int32_t vallen = 0;
- int32_t total_len = 0;
- data_pair_t *pair = NULL;
-
- if (!buf) {
- gf_log ("dict", GF_LOG_ERROR, "buf is null");
- goto out;
- }
+dict_serialize_value_with_delim_lk(dict_t *this, char *buf, int32_t *serz_len,
+ char delimiter)
+{
+ int ret = -1;
+ int32_t count = this->count;
+ int32_t vallen = 0;
+ int32_t total_len = 0;
+ data_pair_t *pair = this->members_list;
- count = this->count;
- if (count < 0) {
- gf_log ("dict", GF_LOG_ERROR, "count (%d) < 0", count);
- goto out;
- }
+ if (!buf) {
+ gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, NULL);
+ goto out;
+ }
- pair = this->members_list;
+ if (count < 0) {
+ gf_smsg("dict", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, "count=%d",
+ count, NULL);
+ goto out;
+ }
- while (count) {
- if (!pair) {
- gf_log ("dict", GF_LOG_ERROR,
- "less than count data pairs found");
- goto out;
- }
+ while (count) {
+ if (!pair) {
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_PAIRS_LESS_THAN_COUNT,
+ NULL);
+ goto out;
+ }
- if (!pair->key || !pair->value) {
- gf_log ("dict", GF_LOG_ERROR,
- "key or value is null");
- goto out;
- }
+ if (!pair->key || !pair->value) {
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_KEY_OR_VALUE_NULL, NULL);
+ goto out;
+ }
- if (!pair->value->data) {
- gf_log ("dict", GF_LOG_ERROR,
- "null value found in dict");
- goto out;
- }
+ if (!pair->value->data) {
+ gf_smsg("dict", GF_LOG_ERROR, 0, LG_MSG_NULL_VALUE_IN_DICT, NULL);
+ goto out;
+ }
- vallen = pair->value->len - 1; // length includes \0
- memcpy (buf, pair->value->data, vallen);
- buf += vallen;
- *buf++ = delimiter;
+ vallen = pair->value->len - 1; // length includes \0
+ memcpy(buf, pair->value->data, vallen);
+ buf += vallen;
+ *buf++ = delimiter;
- total_len += (vallen + 1);
+ total_len += (vallen + 1);
- pair = pair->next;
- count--;
- }
+ pair = pair->next;
+ count--;
+ }
- *--buf = '\0'; // remove the last delimiter
- total_len--; // adjust the length
- ret = 0;
+ *--buf = '\0'; // remove the last delimiter
+ total_len--; // adjust the length
+ ret = 0;
- if (serz_len)
- *serz_len = total_len;
+ if (serz_len)
+ *serz_len = total_len;
- out:
- return ret;
+out:
+ return ret;
+}
+
+int
+dict_serialize_value_with_delim(dict_t *this, char *buf, int32_t *serz_len,
+ char delimiter)
+{
+ int ret = -1;
+
+ if (!this || !buf) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is null!");
+ goto out;
+ }
+
+ LOCK(&this->lock);
+ {
+ ret = dict_serialize_value_with_delim_lk(this, buf, serz_len,
+ delimiter);
+ }
+ UNLOCK(&this->lock);
+out:
+ return ret;
}
int
-dict_serialize_value_with_delim (dict_t *this, char *buf, int32_t *serz_len,
- char delimiter)
+dict_dump_to_str(dict_t *dict, char *dump, int dumpsize, char *format)
{
- int ret = -1;
+ int ret = 0;
+ int dumplen = 0;
+ data_pair_t *trav = NULL;
- if (!this || !buf) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict is null!");
- goto out;
- }
+ if (!dict)
+ return 0;
- LOCK (&this->lock);
- {
- ret = _dict_serialize_value_with_delim (this, buf, serz_len, delimiter);
- }
- UNLOCK (&this->lock);
+ for (trav = dict->members_list; trav; trav = trav->next) {
+ ret = snprintf(&dump[dumplen], dumpsize - dumplen, format, trav->key,
+ trav->value->data);
+ if ((ret == -1) || !ret)
+ return ret;
+
+ dumplen += ret;
+ }
+ return 0;
+}
+
+void
+dict_dump_to_log(dict_t *dict)
+{
+ int ret = -1;
+ char *dump = NULL;
+ const int dump_size = 64 * 1024;
+ char *format = "(%s:%s)";
+
+ if (!dict) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is NULL");
+ goto out;
+ }
+
+ dump = GF_MALLOC(dump_size, gf_common_mt_char);
+ if (!dump) {
+ gf_msg_callingfn("dict", GF_LOG_WARNING, ENOMEM, LG_MSG_NO_MEMORY,
+ "dump buffer is NULL");
+ goto out;
+ }
+
+ ret = dict_dump_to_str(dict, dump, dump_size, format);
+ if (ret) {
+ gf_smsg("dict", GF_LOG_WARNING, 0, LG_MSG_FAILED_TO_LOG_DICT, NULL);
+ goto out;
+ }
+ gf_smsg("dict", GF_LOG_INFO, 0, LG_MSG_DICT_ERROR, "dict=%p", dict,
+ "dump=%s", dump, NULL);
out:
- return ret;
+ GF_FREE(dump);
+
+ return;
}
void
-dict_dump (dict_t *this)
+dict_dump_to_statedump(dict_t *dict, char *dict_name, char *domain)
+{
+ int ret = -1;
+ char *dump = NULL;
+ const int dump_size = 64 * 1024;
+ char key[4096] = {
+ 0,
+ };
+ char *format = "\n\t%s:%s";
+
+ if (!dict) {
+ gf_msg_callingfn(domain, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "dict is NULL");
+ goto out;
+ }
+
+ dump = GF_MALLOC(dump_size, gf_common_mt_char);
+ if (!dump) {
+ gf_msg_callingfn(domain, GF_LOG_WARNING, ENOMEM, LG_MSG_NO_MEMORY,
+ "dump buffer is NULL");
+ goto out;
+ }
+
+ ret = dict_dump_to_str(dict, dump, dump_size, format);
+ if (ret) {
+ gf_smsg(domain, GF_LOG_WARNING, 0, LG_MSG_FAILED_TO_LOG_DICT, "name=%s",
+ dict_name, NULL);
+ goto out;
+ }
+ gf_proc_dump_build_key(key, domain, "%s", dict_name);
+ gf_proc_dump_write(key, "%s", dump);
+
+out:
+ GF_FREE(dump);
+
+ return;
+}
+
+dict_t *
+dict_for_key_value(const char *name, const char *value, size_t size,
+ gf_boolean_t is_static)
{
- int ret = 0;
- int dumplen = 0;
- data_pair_t *trav = NULL;
- char dump[64*1024]; /* This is debug only, hence
- performance should not matter */
+ dict_t *xattr = dict_new();
+ int ret = 0;
- if (!this) {
- gf_log_callingfn ("dict", GF_LOG_WARNING, "dict NULL");
- goto out;
- }
+ if (!xattr)
+ return NULL;
- dump[0] = '\0'; /* the array is not initialized to '\0' */
+ if (is_static)
+ ret = dict_set_static_bin(xattr, (char *)name, (void *)value, size);
+ else
+ ret = dict_set_bin(xattr, (char *)name, (void *)value, size);
- /* There is a possibility of issues if data is binary, ignore it
- for now as debugging is more important */
- for (trav = this->members_list; trav; trav = trav->next) {
- ret = snprintf (&dump[dumplen], ((64*1024) - dumplen - 1),
- "(%s:%s)", trav->key, trav->value->data);
- if ((ret == -1) || !ret)
- break;
+ if (ret) {
+ dict_destroy(xattr);
+ xattr = NULL;
+ }
- dumplen += ret;
- /* snprintf doesn't append a trailing '\0', add it here */
- dump[dumplen] = '\0';
- }
+ return xattr;
+}
+
+/*
+ * "strings" should be NULL terminated strings array.
+ */
+int
+dict_has_key_from_array(dict_t *dict, char **strings, gf_boolean_t *result)
+{
+ int i = 0;
+ uint32_t hash = 0;
- if (dumplen)
- gf_log_callingfn ("dict", GF_LOG_INFO,
- "dict=%p (%s)", this, dump);
+ if (!dict || !strings || !result)
+ return -EINVAL;
-out:
- return;
+ LOCK(&dict->lock);
+ {
+ for (i = 0; strings[i]; i++) {
+ hash = (uint32_t)XXH64(strings[i], strlen(strings[i]), 0);
+ if (dict_lookup_common(dict, strings[i], hash)) {
+ *result = _gf_true;
+ goto unlock;
+ }
+ }
+ *result = _gf_false;
+ }
+unlock:
+ UNLOCK(&dict->lock);
+ return 0;
}
diff --git a/libglusterfs/src/dict.h b/libglusterfs/src/dict.h
deleted file mode 100644
index 2a875039661..00000000000
--- a/libglusterfs/src/dict.h
+++ /dev/null
@@ -1,231 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _DICT_H
-#define _DICT_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <inttypes.h>
-#include <sys/uio.h>
-#include <pthread.h>
-
-#include "common-utils.h"
-
-typedef struct _data data_t;
-typedef struct _dict dict_t;
-typedef struct _data_pair data_pair_t;
-
-
-#define GF_PROTOCOL_DICT_SERIALIZE(this,from_dict,to,len,ope,labl) do { \
- int ret = 0; \
- size_t dictlen = 0; \
- \
- if (!from_dict) \
- break; \
- \
- ret = dict_allocate_and_serialize (from_dict, to, \
- &dictlen); \
- if (ret < 0) { \
- gf_log (this->name, GF_LOG_WARNING, \
- "failed to get serialized dict (%s)", \
- (#from_dict)); \
- ope = EINVAL; \
- goto labl; \
- } \
- len = dictlen; \
- } while (0)
-
-
-#define GF_PROTOCOL_DICT_UNSERIALIZE(xl,to,buff,len,ret,ope,labl) do { \
- char *buf = NULL; \
- if (!len) \
- break; \
- to = dict_new(); \
- GF_VALIDATE_OR_GOTO (xl->name, to, labl); \
- \
- buf = memdup (buff, len); \
- GF_VALIDATE_OR_GOTO (xl->name, buf, labl); \
- \
- ret = dict_unserialize (buf, len, &to); \
- if (ret < 0) { \
- gf_log (xl->name, GF_LOG_WARNING, \
- "failed to unserialize dictionary (%s)", \
- (#to)); \
- \
- ope = EINVAL; \
- GF_FREE (buf); \
- goto labl; \
- } \
- \
- to->extra_free = buf; \
- } while (0)
-
-struct _data {
- unsigned char is_static:1;
- unsigned char is_const:1;
- unsigned char is_stdalloc:1;
- int32_t len;
- char *data;
- int32_t refcount;
- gf_lock_t lock;
-};
-
-struct _data_pair {
- struct _data_pair *hash_next;
- struct _data_pair *prev;
- struct _data_pair *next;
- data_t *value;
- char *key;
-};
-
-struct _dict {
- unsigned char is_static:1;
- int32_t hash_size;
- int32_t count;
- int32_t refcount;
- data_pair_t **members;
- data_pair_t *members_list;
- char *extra_free;
- char *extra_stdfree;
- gf_lock_t lock;
- data_pair_t *members_internal;
- data_pair_t free_pair;
- gf_boolean_t free_pair_in_use;
-};
-
-
-int32_t is_data_equal (data_t *one, data_t *two);
-void data_destroy (data_t *data);
-
-int32_t dict_set (dict_t *this, char *key, data_t *value);
-data_t *dict_get (dict_t *this, char *key);
-void dict_del (dict_t *this, char *key);
-int dict_reset (dict_t *dict);
-
-int32_t dict_serialized_length (dict_t *dict);
-int32_t dict_serialize (dict_t *dict, char *buf);
-int32_t dict_unserialize (char *buf, int32_t size, dict_t **fill);
-
-int32_t dict_allocate_and_serialize (dict_t *this, char **buf, size_t *length);
-
-void dict_destroy (dict_t *dict);
-void dict_unref (dict_t *dict);
-dict_t *dict_ref (dict_t *dict);
-data_t *data_ref (data_t *data);
-void data_unref (data_t *data);
-
-int32_t dict_lookup (dict_t *this, char *key, data_pair_t **data);
-/*
- TODO: provide converts for differnt byte sizes, signedness, and void *
- */
-data_t *int_to_data (int64_t value);
-data_t *str_to_data (char *value);
-data_t *data_from_dynstr (char *value);
-data_t *data_from_dynptr (void *value, int32_t len);
-data_t *bin_to_data (void *value, int32_t len);
-data_t *static_str_to_data (char *value);
-data_t *static_bin_to_data (void *value);
-
-int64_t data_to_int64 (data_t *data);
-int32_t data_to_int32 (data_t *data);
-int16_t data_to_int16 (data_t *data);
-int8_t data_to_int8 (data_t *data);
-
-uint64_t data_to_uint64 (data_t *data);
-uint32_t data_to_uint32 (data_t *data);
-uint16_t data_to_uint16 (data_t *data);
-uint8_t data_to_uint8 (data_t *data);
-
-data_t *data_from_ptr (void *value);
-data_t *data_from_static_ptr (void *value);
-
-data_t *data_from_int64 (int64_t value);
-data_t *data_from_int32 (int32_t value);
-data_t *data_from_int16 (int16_t value);
-data_t *data_from_int8 (int8_t value);
-
-data_t *data_from_uint64 (uint64_t value);
-data_t *data_from_uint32 (uint32_t value);
-data_t *data_from_uint16 (uint16_t value);
-
-char *data_to_str (data_t *data);
-void *data_to_bin (data_t *data);
-void *data_to_ptr (data_t *data);
-
-data_t *get_new_data ();
-data_t * data_copy (data_t *old);
-dict_t *get_new_dict_full (int size_hint);
-dict_t *get_new_dict ();
-
-void dict_foreach (dict_t *this,
- void (*fn)(dict_t *this,
- char *key,
- data_t *value,
- void *data),
- void *data);
-
-dict_t *dict_copy (dict_t *this, dict_t *new);
-
-/* CLEANED UP FUNCTIONS DECLARATIONS */
-GF_MUST_CHECK dict_t *dict_new (void);
-dict_t *dict_copy_with_ref (dict_t *this, dict_t *new);
-
-GF_MUST_CHECK int dict_reset (dict_t *dict);
-
-GF_MUST_CHECK int dict_get_int8 (dict_t *this, char *key, int8_t *val);
-GF_MUST_CHECK int dict_set_int8 (dict_t *this, char *key, int8_t val);
-
-GF_MUST_CHECK int dict_get_int16 (dict_t *this, char *key, int16_t *val);
-GF_MUST_CHECK int dict_set_int16 (dict_t *this, char *key, int16_t val);
-
-GF_MUST_CHECK int dict_get_int32 (dict_t *this, char *key, int32_t *val);
-GF_MUST_CHECK int dict_set_int32 (dict_t *this, char *key, int32_t val);
-
-GF_MUST_CHECK int dict_get_int64 (dict_t *this, char *key, int64_t *val);
-GF_MUST_CHECK int dict_set_int64 (dict_t *this, char *key, int64_t val);
-
-GF_MUST_CHECK int dict_get_uint16 (dict_t *this, char *key, uint16_t *val);
-GF_MUST_CHECK int dict_set_uint16 (dict_t *this, char *key, uint16_t val);
-
-GF_MUST_CHECK int dict_get_uint32 (dict_t *this, char *key, uint32_t *val);
-GF_MUST_CHECK int dict_set_uint32 (dict_t *this, char *key, uint32_t val);
-
-GF_MUST_CHECK int dict_get_uint64 (dict_t *this, char *key, uint64_t *val);
-GF_MUST_CHECK int dict_set_uint64 (dict_t *this, char *key, uint64_t val);
-
-GF_MUST_CHECK int dict_get_double (dict_t *this, char *key, double *val);
-GF_MUST_CHECK int dict_set_double (dict_t *this, char *key, double val);
-
-GF_MUST_CHECK int dict_set_static_ptr (dict_t *this, char *key, void *ptr);
-GF_MUST_CHECK int dict_get_ptr (dict_t *this, char *key, void **ptr);
-GF_MUST_CHECK int dict_get_ptr_and_len (dict_t *this, char *key, void **ptr, int *len);
-GF_MUST_CHECK int dict_set_ptr (dict_t *this, char *key, void *ptr);
-GF_MUST_CHECK int dict_set_dynptr (dict_t *this, char *key, void *ptr, size_t size);
-
-GF_MUST_CHECK int dict_get_bin (dict_t *this, char *key, void **ptr);
-GF_MUST_CHECK int dict_set_bin (dict_t *this, char *key, void *ptr, size_t size);
-GF_MUST_CHECK int dict_set_static_bin (dict_t *this, char *key, void *ptr, size_t size);
-
-GF_MUST_CHECK int dict_set_str (dict_t *this, char *key, char *str);
-GF_MUST_CHECK int dict_set_dynmstr (dict_t *this, char *key, char *str);
-GF_MUST_CHECK int dict_set_dynstr (dict_t *this, char *key, char *str);
-GF_MUST_CHECK int dict_get_str (dict_t *this, char *key, char **str);
-
-GF_MUST_CHECK int dict_get_str_boolean (dict_t *this, char *key, int default_val);
-GF_MUST_CHECK int dict_serialize_value_with_delim (dict_t *this, char *buf, int32_t *serz_len,
- char delimiter);
-
-void dict_dump (dict_t *dict);
-
-#endif
diff --git a/libglusterfs/src/event-epoll.c b/libglusterfs/src/event-epoll.c
new file mode 100644
index 00000000000..fb4fb845b40
--- /dev/null
+++ b/libglusterfs/src/event-epoll.c
@@ -0,0 +1,1032 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <pthread.h>
+#include <stdlib.h>
+#include <errno.h>
+
+#include "glusterfs/gf-event.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+#ifdef HAVE_SYS_EPOLL_H
+#include <sys/epoll.h>
+
+struct event_slot_epoll {
+ int fd;
+ int events;
+ int gen;
+ int idx;
+ gf_atomic_t ref;
+ int do_close;
+ int in_handler;
+ int handled_error;
+ void *data;
+ event_handler_t handler;
+ gf_lock_t lock;
+ struct list_head poller_death;
+};
+
+struct event_thread_data {
+ struct event_pool *event_pool;
+ int event_index;
+};
+
+static struct event_slot_epoll *
+__event_newtable(struct event_pool *event_pool, int table_idx)
+{
+ struct event_slot_epoll *table = NULL;
+ int i = -1;
+
+ table = GF_CALLOC(sizeof(*table), EVENT_EPOLL_SLOTS, gf_common_mt_ereg);
+ if (!table)
+ return NULL;
+
+ for (i = 0; i < EVENT_EPOLL_SLOTS; i++) {
+ table[i].fd = -1;
+ LOCK_INIT(&table[i].lock);
+ INIT_LIST_HEAD(&table[i].poller_death);
+ }
+
+ event_pool->ereg[table_idx] = table;
+ event_pool->slots_used[table_idx] = 0;
+
+ return table;
+}
+
+static int
+event_slot_ref(struct event_slot_epoll *slot)
+{
+ if (!slot)
+ return -1;
+
+ return GF_ATOMIC_INC(slot->ref);
+}
+
+static int
+__event_slot_alloc(struct event_pool *event_pool, int fd,
+ char notify_poller_death, struct event_slot_epoll **slot)
+{
+ int i = 0;
+ int j = 0;
+ int table_idx = -1;
+ int gen = -1;
+ struct event_slot_epoll *table = NULL;
+
+retry:
+
+ while (i < EVENT_EPOLL_TABLES) {
+ switch (event_pool->slots_used[i]) {
+ case EVENT_EPOLL_SLOTS:
+ break;
+ case 0:
+ if (!event_pool->ereg[i]) {
+ table = __event_newtable(event_pool, i);
+ if (!table)
+ return -1;
+ } else {
+ table = event_pool->ereg[i];
+ }
+ break;
+ default:
+ table = event_pool->ereg[i];
+ break;
+ }
+
+ if (table)
+ /* break out of the loop */
+ break;
+ i++;
+ }
+
+ if (!table)
+ return -1;
+
+ table_idx = i;
+
+ for (j = 0; j < EVENT_EPOLL_SLOTS; j++) {
+ if (table[j].fd == -1) {
+ /* wipe everything except bump the generation */
+ gen = table[j].gen;
+ memset(&table[j], 0, sizeof(table[j]));
+ table[j].gen = gen + 1;
+
+ LOCK_INIT(&table[j].lock);
+ INIT_LIST_HEAD(&table[j].poller_death);
+
+ table[j].fd = fd;
+ if (notify_poller_death) {
+ table[j].idx = table_idx * EVENT_EPOLL_SLOTS + j;
+ list_add_tail(&table[j].poller_death,
+ &event_pool->poller_death);
+ }
+
+ event_pool->slots_used[table_idx]++;
+
+ break;
+ }
+ }
+
+ if (j == EVENT_EPOLL_SLOTS) {
+ table = NULL;
+ i++;
+ goto retry;
+ } else {
+ (*slot) = &table[j];
+ event_slot_ref(*slot);
+ return table_idx * EVENT_EPOLL_SLOTS + j;
+ }
+}
+
+static int
+event_slot_alloc(struct event_pool *event_pool, int fd,
+ char notify_poller_death, struct event_slot_epoll **slot)
+{
+ int idx = -1;
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ idx = __event_slot_alloc(event_pool, fd, notify_poller_death, slot);
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ return idx;
+}
+
+static void
+__event_slot_dealloc(struct event_pool *event_pool, int idx)
+{
+ int table_idx = 0;
+ int offset = 0;
+ struct event_slot_epoll *table = NULL;
+ struct event_slot_epoll *slot = NULL;
+ int fd = -1;
+
+ table_idx = idx / EVENT_EPOLL_SLOTS;
+ offset = idx % EVENT_EPOLL_SLOTS;
+
+ table = event_pool->ereg[table_idx];
+ if (!table)
+ return;
+
+ slot = &table[offset];
+ slot->gen++;
+
+ fd = slot->fd;
+ slot->fd = -1;
+ slot->handled_error = 0;
+ slot->in_handler = 0;
+ list_del_init(&slot->poller_death);
+ if (fd != -1)
+ event_pool->slots_used[table_idx]--;
+
+ return;
+}
+
+static void
+event_slot_dealloc(struct event_pool *event_pool, int idx)
+{
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ __event_slot_dealloc(event_pool, idx);
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ return;
+}
+
+static struct event_slot_epoll *
+event_slot_get(struct event_pool *event_pool, int idx)
+{
+ struct event_slot_epoll *slot = NULL;
+ struct event_slot_epoll *table = NULL;
+ int table_idx = 0;
+ int offset = 0;
+
+ table_idx = idx / EVENT_EPOLL_SLOTS;
+ offset = idx % EVENT_EPOLL_SLOTS;
+
+ table = event_pool->ereg[table_idx];
+ if (!table)
+ return NULL;
+
+ slot = &table[offset];
+
+ event_slot_ref(slot);
+ return slot;
+}
+
+static void
+__event_slot_unref(struct event_pool *event_pool, struct event_slot_epoll *slot,
+ int idx)
+{
+ int ref = -1;
+ int fd = -1;
+ int do_close = 0;
+
+ ref = GF_ATOMIC_DEC(slot->ref);
+ if (ref)
+ /* slot still alive */
+ goto done;
+
+ LOCK(&slot->lock);
+ {
+ fd = slot->fd;
+ do_close = slot->do_close;
+ slot->do_close = 0;
+ }
+ UNLOCK(&slot->lock);
+
+ __event_slot_dealloc(event_pool, idx);
+
+ if (do_close)
+ sys_close(fd);
+done:
+ return;
+}
+
+static void
+event_slot_unref(struct event_pool *event_pool, struct event_slot_epoll *slot,
+ int idx)
+{
+ int ref = -1;
+ int fd = -1;
+ int do_close = 0;
+
+ ref = GF_ATOMIC_DEC(slot->ref);
+ if (ref)
+ /* slot still alive */
+ goto done;
+
+ LOCK(&slot->lock);
+ {
+ fd = slot->fd;
+ do_close = slot->do_close;
+ slot->do_close = 0;
+ }
+ UNLOCK(&slot->lock);
+
+ event_slot_dealloc(event_pool, idx);
+
+ if (do_close)
+ sys_close(fd);
+done:
+ return;
+}
+
+static struct event_pool *
+event_pool_new_epoll(int count, int eventthreadcount)
+{
+ struct event_pool *event_pool = NULL;
+ int epfd = -1;
+
+ event_pool = GF_CALLOC(1, sizeof(*event_pool), gf_common_mt_event_pool);
+
+ if (!event_pool)
+ goto out;
+
+ epfd = epoll_create(count);
+
+ if (epfd == -1) {
+ gf_smsg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_CREATE_FAILED,
+ NULL);
+ GF_FREE(event_pool->reg);
+ GF_FREE(event_pool);
+ event_pool = NULL;
+ goto out;
+ }
+
+ event_pool->fd = epfd;
+
+ event_pool->count = count;
+ INIT_LIST_HEAD(&event_pool->poller_death);
+ event_pool->eventthreadcount = eventthreadcount;
+ event_pool->auto_thread_count = 0;
+
+ pthread_mutex_init(&event_pool->mutex, NULL);
+
+out:
+ return event_pool;
+}
+
+static void
+__slot_update_events(struct event_slot_epoll *slot, int poll_in, int poll_out)
+{
+ switch (poll_in) {
+ case 1:
+ slot->events |= EPOLLIN;
+ break;
+ case 0:
+ slot->events &= ~EPOLLIN;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_IN,
+ "value=%d", poll_in, NULL);
+ break;
+ }
+
+ switch (poll_out) {
+ case 1:
+ slot->events |= EPOLLOUT;
+ break;
+ case 0:
+ slot->events &= ~EPOLLOUT;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_OUT,
+ "value=%d", poll_out, NULL);
+ break;
+ }
+}
+
+int
+event_register_epoll(struct event_pool *event_pool, int fd,
+ event_handler_t handler, void *data, int poll_in,
+ int poll_out, char notify_poller_death)
+{
+ int idx = -1;
+ int ret = -1;
+ int destroy = 0;
+ struct epoll_event epoll_event = {
+ 0,
+ };
+ struct event_data *ev_data = (void *)&epoll_event.data;
+ struct event_slot_epoll *slot = NULL;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ /* TODO: Even with the below check, there is a possibility of race,
+ * What if the destroy mode is set after the check is done.
+ * Not sure of the best way to prevent this race, ref counting
+ * is one possibility.
+ * There is no harm in registering and unregistering the fd
+ * even after destroy mode is set, just that such fds will remain
+ * open until unregister is called, also the events on that fd will be
+ * notified, until one of the poller thread is alive.
+ */
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ destroy = event_pool->destroy;
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ if (destroy == 1)
+ goto out;
+
+ idx = event_slot_alloc(event_pool, fd, notify_poller_death, &slot);
+ if (idx == -1) {
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "fd=%d", fd,
+ NULL);
+ return -1;
+ }
+
+ assert(slot->fd == fd);
+
+ LOCK(&slot->lock);
+ {
+ /* make epoll 'singleshot', which
+ means we need to re-add the fd with
+ epoll_ctl(EPOLL_CTL_MOD) after delivery of every
+ single event. This assures us that while a poller
+ thread has picked up and is processing an event,
+ another poller will not try to pick this at the same
+ time as well.
+ */
+
+ slot->events = EPOLLPRI | EPOLLHUP | EPOLLERR | EPOLLONESHOT;
+ slot->handler = handler;
+ slot->data = data;
+
+ __slot_update_events(slot, poll_in, poll_out);
+
+ epoll_event.events = slot->events;
+ ev_data->idx = idx;
+ ev_data->gen = slot->gen;
+
+ ret = epoll_ctl(event_pool->fd, EPOLL_CTL_ADD, fd, &epoll_event);
+ /* check ret after UNLOCK() to avoid deadlock in
+ event_slot_unref()
+ */
+ }
+ UNLOCK(&slot->lock);
+
+ if (ret == -1) {
+ gf_smsg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_ADD_FAILED,
+ "fd=%d", fd, "epoll_fd=%d", event_pool->fd, NULL);
+ event_slot_unref(event_pool, slot, idx);
+ idx = -1;
+ }
+
+ /* keep slot->ref (do not event_slot_unref) if successful */
+out:
+ return idx;
+}
+
+static int
+event_unregister_epoll_common(struct event_pool *event_pool, int fd, int idx,
+ int do_close)
+{
+ int ret = -1;
+ struct event_slot_epoll *slot = NULL;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ /* During shutdown, it may happen that a socket registration with
+ * the event sub-system may fail and an rpc_transport_unref() may
+ * be called for such an unregistered socket with idx == -1. This
+ * may cause the following assert(slot->fd == fd) to fail.
+ */
+ if (idx < 0)
+ goto out;
+
+ slot = event_slot_get(event_pool, idx);
+ if (!slot) {
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "fd=%d", fd,
+ "idx=%d", idx, NULL);
+ return -1;
+ }
+
+ assert(slot->fd == fd);
+
+ LOCK(&slot->lock);
+ {
+ ret = epoll_ctl(event_pool->fd, EPOLL_CTL_DEL, fd, NULL);
+
+ if (ret == -1) {
+ gf_smsg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_DEL_FAILED,
+ "fd=%d", fd, "epoll_fd=%d", event_pool->fd, NULL);
+ goto unlock;
+ }
+
+ slot->do_close = do_close;
+ slot->gen++; /* detect unregister in dispatch_handler() */
+ }
+unlock:
+ UNLOCK(&slot->lock);
+
+ event_slot_unref(event_pool, slot, idx); /* one for event_register() */
+ event_slot_unref(event_pool, slot, idx); /* one for event_slot_get() */
+out:
+ return ret;
+}
+
+static int
+event_unregister_epoll(struct event_pool *event_pool, int fd, int idx_hint)
+{
+ int ret = -1;
+
+ ret = event_unregister_epoll_common(event_pool, fd, idx_hint, 0);
+
+ return ret;
+}
+
+static int
+event_unregister_close_epoll(struct event_pool *event_pool, int fd,
+ int idx_hint)
+{
+ int ret = -1;
+
+ ret = event_unregister_epoll_common(event_pool, fd, idx_hint, 1);
+
+ return ret;
+}
+
+static int
+event_select_on_epoll(struct event_pool *event_pool, int fd, int idx,
+ int poll_in, int poll_out)
+{
+ int ret = -1;
+ struct event_slot_epoll *slot = NULL;
+ struct epoll_event epoll_event = {
+ 0,
+ };
+ struct event_data *ev_data = (void *)&epoll_event.data;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ slot = event_slot_get(event_pool, idx);
+ if (!slot) {
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "fd=%d", fd,
+ "idx=%d", idx, NULL);
+ return -1;
+ }
+
+ assert(slot->fd == fd);
+
+ LOCK(&slot->lock);
+ {
+ __slot_update_events(slot, poll_in, poll_out);
+
+ epoll_event.events = slot->events;
+ ev_data->idx = idx;
+ ev_data->gen = slot->gen;
+
+ if (slot->in_handler)
+ /*
+ * in_handler indicates at least one thread
+ * executing event_dispatch_epoll_handler()
+ * which will perform epoll_ctl(EPOLL_CTL_MOD)
+ * anyways (because of EPOLLET)
+ *
+ * This not only saves a system call, but also
+ * avoids possibility of another epoll thread
+ * picking up the next event while the ongoing
+ * handler is still in progress (and resulting
+ * in unnecessary contention on rpc_transport_t->mutex).
+ */
+ goto unlock;
+
+ ret = epoll_ctl(event_pool->fd, EPOLL_CTL_MOD, fd, &epoll_event);
+ if (ret == -1) {
+ gf_smsg("epoll", GF_LOG_ERROR, errno, LG_MSG_EPOLL_FD_MODIFY_FAILED,
+ "fd=%d", fd, "events=%d", epoll_event.events, NULL);
+ }
+ }
+unlock:
+ UNLOCK(&slot->lock);
+
+ event_slot_unref(event_pool, slot, idx);
+
+out:
+ return idx;
+}
+
+static int
+event_dispatch_epoll_handler(struct event_pool *event_pool,
+ struct epoll_event *event)
+{
+ struct event_data *ev_data = NULL;
+ struct event_slot_epoll *slot = NULL;
+ event_handler_t handler = NULL;
+ void *data = NULL;
+ int idx = -1;
+ int gen = -1;
+ int ret = -1;
+ int fd = -1;
+ gf_boolean_t handled_error_previously = _gf_false;
+
+ ev_data = (void *)&event->data;
+ handler = NULL;
+ data = NULL;
+
+ idx = ev_data->idx;
+ gen = ev_data->gen;
+
+ slot = event_slot_get(event_pool, idx);
+ if (!slot) {
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "idx=%d", idx,
+ NULL);
+ return -1;
+ }
+
+ LOCK(&slot->lock);
+ {
+ fd = slot->fd;
+ if (fd == -1) {
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_STALE_FD_FOUND, "idx=%d",
+ idx, "gen=%d", gen, "events=%d", event->events,
+ "slot->gen=%d", slot->gen, NULL);
+ /* fd got unregistered in another thread */
+ goto pre_unlock;
+ }
+
+ if (gen != slot->gen) {
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_GENERATION_MISMATCH,
+ "idx=%d", idx, "gen=%d", gen, "slot->gen=%d", slot->gen,
+ "slot->fd=%d", slot->fd, NULL);
+ /* slot was re-used and therefore is another fd! */
+ goto pre_unlock;
+ }
+
+ handler = slot->handler;
+ data = slot->data;
+
+ if (slot->in_handler > 0) {
+ /* Another handler is inprogress, skip this one. */
+ handler = NULL;
+ goto pre_unlock;
+ }
+
+ if (slot->handled_error) {
+ handled_error_previously = _gf_true;
+ } else {
+ slot->handled_error = (event->events & (EPOLLERR | EPOLLHUP));
+ slot->in_handler++;
+ }
+ }
+pre_unlock:
+ UNLOCK(&slot->lock);
+
+ ret = 0;
+
+ if (!handler)
+ goto out;
+
+ if (!handled_error_previously) {
+ handler(fd, idx, gen, data, (event->events & (EPOLLIN | EPOLLPRI)),
+ (event->events & (EPOLLOUT)),
+ (event->events & (EPOLLERR | EPOLLHUP)), 0);
+ }
+out:
+ event_slot_unref(event_pool, slot, idx);
+
+ return ret;
+}
+
+static void *
+event_dispatch_epoll_worker(void *data)
+{
+ struct epoll_event event;
+ int ret = -1;
+ struct event_thread_data *ev_data = data;
+ struct event_pool *event_pool;
+ int myindex = -1;
+ int timetodie = 0, gen = 0;
+ struct list_head poller_death_notify;
+ struct event_slot_epoll *slot = NULL, *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO("event", ev_data, out);
+
+ event_pool = ev_data->event_pool;
+ myindex = ev_data->event_index;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ gf_smsg("epoll", GF_LOG_INFO, 0, LG_MSG_STARTED_EPOLL_THREAD, "index=%d",
+ myindex - 1, NULL);
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ event_pool->activethreadcount++;
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ for (;;) {
+ if (event_pool->eventthreadcount < myindex) {
+ /* ...time to die, thread count was decreased below
+ * this threads index */
+ /* Start with extra safety at this point, reducing
+ * lock conention in normal case when threads are not
+ * reconfigured always */
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ if (event_pool->eventthreadcount < myindex) {
+ while (event_pool->poller_death_sliced) {
+ pthread_cond_wait(&event_pool->cond,
+ &event_pool->mutex);
+ }
+
+ INIT_LIST_HEAD(&poller_death_notify);
+ /* if found true in critical section,
+ * die */
+ event_pool->pollers[myindex - 1] = 0;
+ event_pool->activethreadcount--;
+ timetodie = 1;
+ gen = ++event_pool->poller_gen;
+ list_for_each_entry(slot, &event_pool->poller_death,
+ poller_death)
+ {
+ event_slot_ref(slot);
+ }
+
+ list_splice_init(&event_pool->poller_death,
+ &poller_death_notify);
+ event_pool->poller_death_sliced = 1;
+ pthread_cond_broadcast(&event_pool->cond);
+ }
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+ if (timetodie) {
+ list_for_each_entry(slot, &poller_death_notify, poller_death)
+ {
+ slot->handler(slot->fd, 0, gen, slot->data, 0, 0, 0, 1);
+ }
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ list_for_each_entry_safe(slot, tmp, &poller_death_notify,
+ poller_death)
+ {
+ __event_slot_unref(event_pool, slot, slot->idx);
+ }
+
+ list_splice(&poller_death_notify,
+ &event_pool->poller_death);
+ event_pool->poller_death_sliced = 0;
+ pthread_cond_broadcast(&event_pool->cond);
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ gf_smsg("epoll", GF_LOG_INFO, 0, LG_MSG_EXITED_EPOLL_THREAD,
+ "index=%d", myindex, NULL);
+
+ goto out;
+ }
+ }
+
+ ret = epoll_wait(event_pool->fd, &event, 1, -1);
+
+ if (ret == 0)
+ /* timeout */
+ continue;
+
+ if (ret == -1 && errno == EINTR)
+ /* sys call */
+ continue;
+
+ ret = event_dispatch_epoll_handler(event_pool, &event);
+ if (ret) {
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_DISPATCH_HANDLER_FAILED,
+ NULL);
+ }
+ }
+out:
+ if (ev_data)
+ GF_FREE(ev_data);
+ return NULL;
+}
+
+/* Attempts to start the # of configured pollers, ensuring at least the first
+ * is started in a joinable state */
+static int
+event_dispatch_epoll(struct event_pool *event_pool)
+{
+ int i = 0;
+ pthread_t t_id;
+ int pollercount = 0;
+ int ret = -1;
+ struct event_thread_data *ev_data = NULL;
+
+ /* Start the configured number of pollers */
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ pollercount = event_pool->eventthreadcount;
+
+ /* Set to MAX if greater */
+ if (pollercount > EVENT_MAX_THREADS)
+ pollercount = EVENT_MAX_THREADS;
+
+ /* Default pollers to 1 in case this is incorrectly set */
+ if (pollercount <= 0)
+ pollercount = 1;
+
+ event_pool->activethreadcount++;
+
+ for (i = 0; i < pollercount; i++) {
+ ev_data = GF_CALLOC(1, sizeof(*ev_data), gf_common_mt_event_pool);
+ if (!ev_data) {
+ if (i == 0) {
+ /* Need to succeed creating 0'th
+ * thread, to joinable and wait */
+ break;
+ } else {
+ /* Inability to create other threads
+ * are a lesser evil, and ignored */
+ continue;
+ }
+ }
+
+ ev_data->event_pool = event_pool;
+ ev_data->event_index = i + 1;
+
+ ret = gf_thread_create(&t_id, NULL, event_dispatch_epoll_worker,
+ ev_data, "epoll%03hx", i & 0x3ff);
+ if (!ret) {
+ event_pool->pollers[i] = t_id;
+
+ /* mark all threads other than one in index 0
+ * as detachable. Errors can be ignored, they
+ * spend their time as zombies if not detched
+ * and the thread counts are decreased */
+ if (i != 0)
+ pthread_detach(event_pool->pollers[i]);
+ } else {
+ gf_smsg("epoll", GF_LOG_WARNING, 0,
+ LG_MSG_START_EPOLL_THREAD_FAILED, "index=%d", i, NULL);
+ if (i == 0) {
+ GF_FREE(ev_data);
+ break;
+ } else {
+ GF_FREE(ev_data);
+ continue;
+ }
+ }
+ }
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ /* Just wait for the first thread, that is created in a joinable state
+ * and will never die, ensuring this function never returns */
+ if (event_pool->pollers[0] != 0)
+ pthread_join(event_pool->pollers[0], NULL);
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ event_pool->activethreadcount--;
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ return ret;
+}
+
+/**
+ * @param event_pool event_pool on which fds of interest are registered for
+ * events.
+ *
+ * @return 1 if at least one epoll worker thread is spawned, 0 otherwise
+ *
+ * NB This function SHOULD be called under event_pool->mutex.
+ */
+
+static int
+event_pool_dispatched_unlocked(struct event_pool *event_pool)
+{
+ return (event_pool->pollers[0] != 0);
+}
+
+int
+event_reconfigure_threads_epoll(struct event_pool *event_pool, int value)
+{
+ int i;
+ int ret = 0;
+ pthread_t t_id;
+ int oldthreadcount;
+ struct event_thread_data *ev_data = NULL;
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ /* Reconfigure to 0 threads is allowed only in destroy mode */
+ if (event_pool->destroy == 1) {
+ value = 0;
+ } else {
+ /* Set to MAX if greater */
+ if (value > EVENT_MAX_THREADS)
+ value = EVENT_MAX_THREADS;
+
+ /* Default pollers to 1 in case this is set incorrectly */
+ if (value <= 0)
+ value = 1;
+ }
+
+ oldthreadcount = event_pool->eventthreadcount;
+
+ /* Start 'worker' threads as necessary only if event_dispatch()
+ * was called before. If event_dispatch() was not called, there
+ * will be no epoll 'worker' threads running yet. */
+
+ if (event_pool_dispatched_unlocked(event_pool) &&
+ (oldthreadcount < value)) {
+ /* create more poll threads */
+ for (i = oldthreadcount; i < value; i++) {
+ /* Start a thread if the index at this location
+ * is a 0, so that the older thread is confirmed
+ * as dead */
+ if (event_pool->pollers[i] == 0) {
+ ev_data = GF_CALLOC(1, sizeof(*ev_data),
+ gf_common_mt_event_pool);
+ if (!ev_data) {
+ continue;
+ }
+
+ ev_data->event_pool = event_pool;
+ ev_data->event_index = i + 1;
+
+ ret = gf_thread_create(&t_id, NULL,
+ event_dispatch_epoll_worker, ev_data,
+ "epoll%03hx", i & 0x3ff);
+ if (ret) {
+ gf_smsg("epoll", GF_LOG_WARNING, 0,
+ LG_MSG_START_EPOLL_THREAD_FAILED, "index=%d", i,
+ NULL);
+ GF_FREE(ev_data);
+ } else {
+ pthread_detach(t_id);
+ event_pool->pollers[i] = t_id;
+ }
+ }
+ }
+ }
+
+ /* if value decreases, threads will terminate, themselves */
+ event_pool->eventthreadcount = value;
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ return 0;
+}
+
+/* This function is the destructor for the event_pool data structure
+ * Should be called only after poller_threads_destroy() is called,
+ * else will lead to crashes.
+ */
+static int
+event_pool_destroy_epoll(struct event_pool *event_pool)
+{
+ int ret = 0, i = 0, j = 0;
+ struct event_slot_epoll *table = NULL;
+
+ ret = sys_close(event_pool->fd);
+
+ for (i = 0; i < EVENT_EPOLL_TABLES; i++) {
+ if (event_pool->ereg[i]) {
+ table = event_pool->ereg[i];
+ event_pool->ereg[i] = NULL;
+ for (j = 0; j < EVENT_EPOLL_SLOTS; j++) {
+ LOCK_DESTROY(&table[j].lock);
+ }
+ GF_FREE(table);
+ }
+ }
+
+ pthread_mutex_destroy(&event_pool->mutex);
+ pthread_cond_destroy(&event_pool->cond);
+
+ GF_FREE(event_pool->evcache);
+ GF_FREE(event_pool->reg);
+ GF_FREE(event_pool);
+
+ return ret;
+}
+
+static int
+event_handled_epoll(struct event_pool *event_pool, int fd, int idx, int gen)
+{
+ struct event_slot_epoll *slot = NULL;
+ struct epoll_event epoll_event = {
+ 0,
+ };
+ struct event_data *ev_data = (void *)&epoll_event.data;
+ int ret = 0;
+
+ slot = event_slot_get(event_pool, idx);
+ if (!slot) {
+ gf_smsg("epoll", GF_LOG_ERROR, 0, LG_MSG_SLOT_NOT_FOUND, "fd=%d", fd,
+ "idx=%d", idx, NULL);
+ return -1;
+ }
+
+ assert(slot->fd == fd);
+
+ LOCK(&slot->lock);
+ {
+ slot->in_handler--;
+
+ if (gen != slot->gen) {
+ /* event_unregister() happened while we were
+ in handler()
+ */
+ gf_msg_debug("epoll", 0,
+ "generation bumped on idx=%d"
+ " from gen=%d to slot->gen=%d, fd=%d, "
+ "slot->fd=%d",
+ idx, gen, slot->gen, fd, slot->fd);
+ goto unlock;
+ }
+
+ /* This call also picks up the changes made by another
+ thread calling event_select_on_epoll() while this
+ thread was busy in handler()
+ */
+ if (slot->in_handler == 0) {
+ epoll_event.events = slot->events;
+ ev_data->idx = idx;
+ ev_data->gen = gen;
+
+ ret = epoll_ctl(event_pool->fd, EPOLL_CTL_MOD, fd, &epoll_event);
+ }
+ }
+unlock:
+ UNLOCK(&slot->lock);
+
+ event_slot_unref(event_pool, slot, idx);
+
+ return ret;
+}
+
+struct event_ops event_ops_epoll = {
+ .new = event_pool_new_epoll,
+ .event_register = event_register_epoll,
+ .event_select_on = event_select_on_epoll,
+ .event_unregister = event_unregister_epoll,
+ .event_unregister_close = event_unregister_close_epoll,
+ .event_dispatch = event_dispatch_epoll,
+ .event_reconfigure_threads = event_reconfigure_threads_epoll,
+ .event_pool_destroy = event_pool_destroy_epoll,
+ .event_handled = event_handled_epoll,
+};
+
+#endif
diff --git a/libglusterfs/src/event-history.c b/libglusterfs/src/event-history.c
index fe511caeb1e..379fed866be 100644
--- a/libglusterfs/src/event-history.c
+++ b/libglusterfs/src/event-history.c
@@ -8,74 +8,75 @@
cases as published by the Free Software Foundation.
*/
-#include "event-history.h"
+#include "glusterfs/event-history.h"
+#include "glusterfs/libglusterfs-messages.h"
eh_t *
-eh_new (size_t buffer_size, gf_boolean_t use_buffer_once)
+eh_new(size_t buffer_size, gf_boolean_t use_buffer_once,
+ void (*destroy_buffer_data)(void *data))
{
- eh_t *history = NULL;
- buffer_t *buffer = NULL;
+ eh_t *history = NULL;
+ buffer_t *buffer = NULL;
- history = GF_CALLOC (1, sizeof (eh_t), gf_common_mt_eh_t);
- if (!history) {
- gf_log ("", GF_LOG_ERROR, "allocating history failed.");
- goto out;
- }
+ history = GF_CALLOC(1, sizeof(eh_t), gf_common_mt_eh_t);
+ if (!history) {
+ goto out;
+ }
- buffer = cb_buffer_new (buffer_size, use_buffer_once);
- if (!buffer) {
- gf_log ("", GF_LOG_ERROR, "allocating circular buffer failed");
- GF_FREE (history);
- history = NULL;
- }
+ buffer = cb_buffer_new(buffer_size, use_buffer_once, destroy_buffer_data);
+ if (!buffer) {
+ GF_FREE(history);
+ history = NULL;
+ goto out;
+ }
- history->buffer = buffer;
+ history->buffer = buffer;
- pthread_mutex_init (&history->lock, NULL);
+ pthread_mutex_init(&history->lock, NULL);
out:
- return history;
+ return history;
}
void
-eh_dump (eh_t *history, void *data,
- int (dump_fn) (circular_buffer_t *buffer, void *data))
+eh_dump(eh_t *history, void *data,
+ int(dump_fn)(circular_buffer_t *buffer, void *data))
{
- if (!history) {
- gf_log ("", GF_LOG_DEBUG, "history is NULL");
- goto out;
- }
+ if (!history) {
+ gf_msg_debug("event-history", 0, "history is NULL");
+ goto out;
+ }
- cb_buffer_dump (history->buffer, data, dump_fn);
+ cb_buffer_dump(history->buffer, data, dump_fn);
out:
- return;
+ return;
}
int
-eh_save_history (eh_t *history, void *data)
+eh_save_history(eh_t *history, void *data)
{
- int ret = -1;
+ int ret = -1;
- ret = cb_add_entry_buffer (history->buffer, data);
+ ret = cb_add_entry_buffer(history->buffer, data);
- return ret;
+ return ret;
}
int
-eh_destroy (eh_t *history)
+eh_destroy(eh_t *history)
{
- if (!history) {
- gf_log ("", GF_LOG_INFO, "history for the xlator is "
- "NULL");
- return -1;
- }
+ if (!history) {
+ gf_msg("event-history", GF_LOG_INFO, 0, LG_MSG_INVALID_ARG,
+ "history for the xlator is NULL");
+ return -1;
+ }
- cb_buffer_destroy (history->buffer);
- history->buffer = NULL;
+ cb_buffer_destroy(history->buffer);
+ history->buffer = NULL;
- pthread_mutex_destroy (&history->lock);
+ pthread_mutex_destroy(&history->lock);
- GF_FREE (history);
+ GF_FREE(history);
- return 0;
+ return 0;
}
diff --git a/libglusterfs/src/event-history.h b/libglusterfs/src/event-history.h
deleted file mode 100644
index b1750bbae96..00000000000
--- a/libglusterfs/src/event-history.h
+++ /dev/null
@@ -1,43 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _EH_H
-#define _EH_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "mem-types.h"
-#include "circ-buff.h"
-
-struct event_hist
-{
- buffer_t *buffer;
- pthread_mutex_t lock;
-};
-
-typedef struct event_hist eh_t;
-
-void
-eh_dump (eh_t *event , void *data,
- int (fn) (circular_buffer_t *buffer, void *data));
-
-eh_t *
-eh_new (size_t buffer_size, gf_boolean_t use_buffer_once);
-
-int
-eh_save_history (eh_t *history, void *string);
-
-int
-eh_destroy (eh_t *history);
-
-#endif /* _EH_H */
diff --git a/libglusterfs/src/event-poll.c b/libglusterfs/src/event-poll.c
new file mode 100644
index 00000000000..2cba963f096
--- /dev/null
+++ b/libglusterfs/src/event-poll.c
@@ -0,0 +1,513 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <sys/poll.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+
+#include "glusterfs/logging.h"
+#include "glusterfs/gf-event.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+struct event_slot_poll {
+ int fd;
+ int events;
+ void *data;
+ event_handler_t handler;
+};
+
+static int
+event_register_poll(struct event_pool *event_pool, int fd,
+ event_handler_t handler, void *data, int poll_in,
+ int poll_out, char notify_poller_death);
+
+static void
+__flush_fd(int fd, int idx, int gen, void *data, int poll_in, int poll_out,
+ int poll_err, char event_thread_died)
+{
+ char buf[64];
+ int ret = -1;
+
+ if (!poll_in)
+ return;
+
+ do {
+ ret = sys_read(fd, buf, 64);
+ if (ret == -1 && errno != EAGAIN) {
+ gf_smsg("poll", GF_LOG_ERROR, errno, LG_MSG_READ_FILE_FAILED,
+ "fd=%d", fd, NULL);
+ }
+ } while (ret == 64);
+
+ return;
+}
+
+static int
+__event_getindex(struct event_pool *event_pool, int fd, int idx)
+{
+ int ret = -1;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ /* lookup in used space based on index provided */
+ if (idx > -1 && idx < event_pool->used) {
+ if (event_pool->reg[idx].fd == fd) {
+ ret = idx;
+ goto out;
+ }
+ }
+
+ /* search in used space, if lookup fails */
+ for (i = 0; i < event_pool->used; i++) {
+ if (event_pool->reg[i].fd == fd) {
+ ret = i;
+ break;
+ }
+ }
+
+out:
+ return ret;
+}
+
+static struct event_pool *
+event_pool_new_poll(int count, int eventthreadcount)
+{
+ struct event_pool *event_pool = NULL;
+ int ret = -1;
+
+ event_pool = GF_CALLOC(1, sizeof(*event_pool), gf_common_mt_event_pool);
+
+ if (!event_pool)
+ return NULL;
+
+ event_pool->count = count;
+ event_pool->reg = GF_CALLOC(event_pool->count, sizeof(*event_pool->reg),
+ gf_common_mt_reg);
+
+ if (!event_pool->reg) {
+ GF_FREE(event_pool);
+ return NULL;
+ }
+
+ pthread_mutex_init(&event_pool->mutex, NULL);
+
+ ret = pipe(event_pool->breaker);
+
+ if (ret == -1) {
+ gf_smsg("poll", GF_LOG_ERROR, errno, LG_MSG_PIPE_CREATE_FAILED, NULL);
+ GF_FREE(event_pool->reg);
+ GF_FREE(event_pool);
+ return NULL;
+ }
+
+ ret = fcntl(event_pool->breaker[0], F_SETFL, O_NONBLOCK);
+ if (ret == -1) {
+ gf_smsg("poll", GF_LOG_ERROR, errno, LG_MSG_SET_PIPE_FAILED, NULL);
+ sys_close(event_pool->breaker[0]);
+ sys_close(event_pool->breaker[1]);
+ event_pool->breaker[0] = event_pool->breaker[1] = -1;
+
+ GF_FREE(event_pool->reg);
+ GF_FREE(event_pool);
+ return NULL;
+ }
+
+ ret = fcntl(event_pool->breaker[1], F_SETFL, O_NONBLOCK);
+ if (ret == -1) {
+ gf_smsg("poll", GF_LOG_ERROR, errno, LG_MSG_SET_PIPE_FAILED, NULL);
+
+ sys_close(event_pool->breaker[0]);
+ sys_close(event_pool->breaker[1]);
+ event_pool->breaker[0] = event_pool->breaker[1] = -1;
+
+ GF_FREE(event_pool->reg);
+ GF_FREE(event_pool);
+ return NULL;
+ }
+
+ ret = event_register_poll(event_pool, event_pool->breaker[0], __flush_fd,
+ NULL, 1, 0, 0);
+ if (ret == -1) {
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_REGISTER_PIPE_FAILED, NULL);
+ sys_close(event_pool->breaker[0]);
+ sys_close(event_pool->breaker[1]);
+ event_pool->breaker[0] = event_pool->breaker[1] = -1;
+
+ GF_FREE(event_pool->reg);
+ GF_FREE(event_pool);
+ return NULL;
+ }
+
+ if (eventthreadcount > 1) {
+ gf_smsg("poll", GF_LOG_INFO, 0, LG_MSG_POLL_IGNORE_MULTIPLE_THREADS,
+ "count=%d", eventthreadcount, NULL);
+ }
+
+ /* although, eventhreadcount for poll implementation is always
+ * going to be 1, eventthreadcount needs to be set to 1 so that
+ * rpcsvc_request_handler() thread scaling works flawlessly in
+ * both epoll and poll models
+ */
+ event_pool->eventthreadcount = 1;
+
+ return event_pool;
+}
+
+static int
+event_register_poll(struct event_pool *event_pool, int fd,
+ event_handler_t handler, void *data, int poll_in,
+ int poll_out, char notify_poller_death)
+{
+ int idx = -1;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ if (event_pool->count == event_pool->used) {
+ event_pool->count += 256;
+
+ event_pool->reg = GF_REALLOC(
+ event_pool->reg, event_pool->count * sizeof(*event_pool->reg));
+ if (!event_pool->reg)
+ goto unlock;
+ }
+
+ idx = event_pool->used++;
+
+ event_pool->reg[idx].fd = fd;
+ event_pool->reg[idx].events = POLLPRI;
+ event_pool->reg[idx].handler = handler;
+ event_pool->reg[idx].data = data;
+
+ switch (poll_in) {
+ case 1:
+ event_pool->reg[idx].events |= POLLIN;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~POLLIN;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_IN,
+ "value=%d", poll_in, NULL);
+ break;
+ }
+
+ switch (poll_out) {
+ case 1:
+ event_pool->reg[idx].events |= POLLOUT;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~POLLOUT;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INVALID_POLL_OUT,
+ "value=%d", poll_out, NULL);
+ break;
+ }
+
+ event_pool->changed = 1;
+ }
+unlock:
+ pthread_mutex_unlock(&event_pool->mutex);
+
+out:
+ return idx;
+}
+
+static int
+event_unregister_poll(struct event_pool *event_pool, int fd, int idx_hint)
+{
+ int idx = -1;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ idx = __event_getindex(event_pool, fd, idx_hint);
+
+ if (idx == -1) {
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND, "fd=%d",
+ fd, "idx_hint=%d", idx_hint, NULL);
+ errno = ENOENT;
+ goto unlock;
+ }
+
+ event_pool->reg[idx] = event_pool->reg[--event_pool->used];
+ event_pool->changed = 1;
+ }
+unlock:
+ pthread_mutex_unlock(&event_pool->mutex);
+
+out:
+ return idx;
+}
+
+static int
+event_unregister_close_poll(struct event_pool *event_pool, int fd, int idx_hint)
+{
+ int ret = -1;
+
+ ret = event_unregister_poll(event_pool, fd, idx_hint);
+
+ sys_close(fd);
+
+ return ret;
+}
+
+static int
+event_select_on_poll(struct event_pool *event_pool, int fd, int idx_hint,
+ int poll_in, int poll_out)
+{
+ int idx = -1;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ idx = __event_getindex(event_pool, fd, idx_hint);
+
+ if (idx == -1) {
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND, "fd=%d",
+ fd, "idx_hint=%d", idx_hint, NULL);
+ errno = ENOENT;
+ goto unlock;
+ }
+
+ switch (poll_in) {
+ case 1:
+ event_pool->reg[idx].events |= POLLIN;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~POLLIN;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ /* TODO: log error */
+ break;
+ }
+
+ switch (poll_out) {
+ case 1:
+ event_pool->reg[idx].events |= POLLOUT;
+ break;
+ case 0:
+ event_pool->reg[idx].events &= ~POLLOUT;
+ break;
+ case -1:
+ /* do nothing */
+ break;
+ default:
+ /* TODO: log error */
+ break;
+ }
+
+ if (poll_in + poll_out > -2)
+ event_pool->changed = 1;
+ }
+unlock:
+ pthread_mutex_unlock(&event_pool->mutex);
+
+out:
+ return idx;
+}
+
+static int
+event_dispatch_poll_handler(struct event_pool *event_pool, struct pollfd *ufds,
+ int i)
+{
+ event_handler_t handler = NULL;
+ void *data = NULL;
+ int idx = -1;
+ int ret = 0;
+
+ handler = NULL;
+ data = NULL;
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ idx = __event_getindex(event_pool, ufds[i].fd, i);
+
+ if (idx == -1) {
+ gf_smsg("poll", GF_LOG_ERROR, 0, LG_MSG_INDEX_NOT_FOUND, "fd=%d",
+ ufds[i].fd, "idx_hint=%d", i, NULL);
+ goto unlock;
+ }
+
+ handler = event_pool->reg[idx].handler;
+ data = event_pool->reg[idx].data;
+ }
+unlock:
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ if (handler)
+ handler(ufds[i].fd, idx, 0, data,
+ (ufds[i].revents & (POLLIN | POLLPRI)),
+ (ufds[i].revents & (POLLOUT)),
+ (ufds[i].revents & (POLLERR | POLLHUP | POLLNVAL)), 0);
+
+ return ret;
+}
+
+static int
+event_dispatch_poll_resize(struct event_pool *event_pool, struct pollfd *ufds,
+ int size)
+{
+ int i = 0;
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ if (event_pool->changed == 0) {
+ goto unlock;
+ }
+
+ if (event_pool->used > event_pool->evcache_size) {
+ GF_FREE(event_pool->evcache);
+
+ event_pool->evcache = ufds = NULL;
+
+ event_pool->evcache_size = event_pool->used;
+
+ ufds = GF_CALLOC(sizeof(struct pollfd), event_pool->evcache_size,
+ gf_common_mt_pollfd);
+ if (!ufds)
+ goto unlock;
+ event_pool->evcache = ufds;
+ }
+
+ if (ufds == NULL) {
+ goto unlock;
+ }
+
+ for (i = 0; i < event_pool->used; i++) {
+ ufds[i].fd = event_pool->reg[i].fd;
+ ufds[i].events = event_pool->reg[i].events;
+ ufds[i].revents = 0;
+ }
+
+ size = i;
+ }
+unlock:
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ return size;
+}
+
+static int
+event_dispatch_poll(struct event_pool *event_pool)
+{
+ struct pollfd *ufds = NULL;
+ int size = 0;
+ int i = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ event_pool->activethreadcount = 1;
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ while (1) {
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ if (event_pool->destroy == 1) {
+ event_pool->activethreadcount = 0;
+ pthread_cond_broadcast(&event_pool->cond);
+ pthread_mutex_unlock(&event_pool->mutex);
+ return 0;
+ }
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ size = event_dispatch_poll_resize(event_pool, ufds, size);
+ ufds = event_pool->evcache;
+
+ ret = poll(ufds, size, 1);
+
+ if (ret == 0)
+ /* timeout */
+ continue;
+
+ if (ret == -1 && errno == EINTR)
+ /* sys call */
+ continue;
+
+ for (i = 0; i < size; i++) {
+ if (!ufds[i].revents)
+ continue;
+
+ event_dispatch_poll_handler(event_pool, ufds, i);
+ }
+ }
+
+out:
+ return -1;
+}
+
+int
+event_reconfigure_threads_poll(struct event_pool *event_pool, int value)
+{
+ /* No-op for poll */
+
+ return 0;
+}
+
+/* This function is the destructor for the event_pool data structure
+ * Should be called only after poller_threads_destroy() is called,
+ * else will lead to crashes.
+ */
+static int
+event_pool_destroy_poll(struct event_pool *event_pool)
+{
+ int ret = 0;
+
+ ret = sys_close(event_pool->breaker[0]);
+ if (ret)
+ return ret;
+
+ ret = sys_close(event_pool->breaker[1]);
+ if (ret)
+ return ret;
+
+ event_pool->breaker[0] = event_pool->breaker[1] = -1;
+
+ GF_FREE(event_pool->reg);
+ GF_FREE(event_pool);
+
+ return ret;
+}
+
+struct event_ops event_ops_poll = {
+ .new = event_pool_new_poll,
+ .event_register = event_register_poll,
+ .event_select_on = event_select_on_poll,
+ .event_unregister = event_unregister_poll,
+ .event_unregister_close = event_unregister_close_poll,
+ .event_dispatch = event_dispatch_poll,
+ .event_reconfigure_threads = event_reconfigure_threads_poll,
+ .event_pool_destroy = event_pool_destroy_poll};
diff --git a/libglusterfs/src/event.c b/libglusterfs/src/event.c
index 8f172fb24b8..402c253ca25 100644
--- a/libglusterfs/src/event.c
+++ b/libglusterfs/src/event.c
@@ -16,936 +16,290 @@
#include <errno.h>
#include <string.h>
-#include "logging.h"
-#include "event.h"
-#include "mem-pool.h"
-#include "common-utils.h"
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-static int
-event_register_poll (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out);
-
-
-static int
-__flush_fd (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- char buf[64];
- int ret = -1;
-
- if (!poll_in)
- return ret;
-
- do {
- ret = read (fd, buf, 64);
- if (ret == -1 && errno != EAGAIN) {
- gf_log ("poll", GF_LOG_ERROR,
- "read on %d returned error (%s)",
- fd, strerror (errno));
- }
- } while (ret == 64);
-
- return ret;
-}
-
-
-static int
-__event_getindex (struct event_pool *event_pool, int fd, int idx)
-{
- int ret = -1;
- int i = 0;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+#include "glusterfs/gf-event.h"
+#include "glusterfs/timespec.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
+#include "glusterfs/syscall.h"
- if (idx > -1 && idx < event_pool->used) {
- if (event_pool->reg[idx].fd == fd)
- ret = idx;
- }
-
- for (i=0; ret == -1 && i<event_pool->used; i++) {
- if (event_pool->reg[i].fd == fd) {
- ret = i;
- break;
- }
- }
-
-out:
- return ret;
-}
-
-
-static struct event_pool *
-event_pool_new_poll (int count)
+struct event_pool *
+gf_event_pool_new(int count, int eventthreadcount)
{
- struct event_pool *event_pool = NULL;
- int ret = -1;
-
- event_pool = GF_CALLOC (1, sizeof (*event_pool),
- gf_common_mt_event_pool);
+ struct event_pool *event_pool = NULL;
+ extern struct event_ops event_ops_poll;
- if (!event_pool)
- return NULL;
-
- event_pool->count = count;
- event_pool->reg = GF_CALLOC (event_pool->count,
- sizeof (*event_pool->reg),
- gf_common_mt_reg);
-
- if (!event_pool->reg) {
- GF_FREE (event_pool);
- return NULL;
- }
-
- pthread_mutex_init (&event_pool->mutex, NULL);
-
- ret = pipe (event_pool->breaker);
-
- if (ret == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "pipe creation failed (%s)", strerror (errno));
- GF_FREE (event_pool->reg);
- GF_FREE (event_pool);
- return NULL;
- }
-
- ret = fcntl (event_pool->breaker[0], F_SETFL, O_NONBLOCK);
- if (ret == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "could not set pipe to non blocking mode (%s)",
- strerror (errno));
- close (event_pool->breaker[0]);
- close (event_pool->breaker[1]);
- event_pool->breaker[0] = event_pool->breaker[1] = -1;
-
- GF_FREE (event_pool->reg);
- GF_FREE (event_pool);
- return NULL;
- }
+#ifdef HAVE_SYS_EPOLL_H
+ extern struct event_ops event_ops_epoll;
- ret = fcntl (event_pool->breaker[1], F_SETFL, O_NONBLOCK);
- if (ret == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "could not set pipe to non blocking mode (%s)",
- strerror (errno));
+ event_pool = event_ops_epoll.new(count, eventthreadcount);
- close (event_pool->breaker[0]);
- close (event_pool->breaker[1]);
- event_pool->breaker[0] = event_pool->breaker[1] = -1;
+ if (event_pool) {
+ event_pool->ops = &event_ops_epoll;
+ } else {
+ gf_msg("event", GF_LOG_WARNING, 0, LG_MSG_FALLBACK_TO_POLL,
+ "falling back to poll based event handling");
+ }
+#endif
- GF_FREE (event_pool->reg);
- GF_FREE (event_pool);
- return NULL;
- }
+ if (!event_pool) {
+ event_pool = event_ops_poll.new(count, eventthreadcount);
- ret = event_register_poll (event_pool, event_pool->breaker[0],
- __flush_fd, NULL, 1, 0);
- if (ret == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "could not register pipe fd with poll event loop");
- close (event_pool->breaker[0]);
- close (event_pool->breaker[1]);
- event_pool->breaker[0] = event_pool->breaker[1] = -1;
-
- GF_FREE (event_pool->reg);
- GF_FREE (event_pool);
- return NULL;
- }
+ if (event_pool)
+ event_pool->ops = &event_ops_poll;
+ }
- return event_pool;
+ return event_pool;
}
-
-static int
-event_register_poll (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out)
+int
+gf_event_register(struct event_pool *event_pool, int fd,
+ event_handler_t handler, void *data, int poll_in,
+ int poll_out, char notify_poller_death)
{
- int idx = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- if (event_pool->count == event_pool->used)
- {
- event_pool->count += 256;
-
- event_pool->reg = GF_REALLOC (event_pool->reg,
- event_pool->count *
- sizeof (*event_pool->reg));
- if (!event_pool->reg)
- goto unlock;
- }
-
- idx = event_pool->used++;
-
- event_pool->reg[idx].fd = fd;
- event_pool->reg[idx].events = POLLPRI;
- event_pool->reg[idx].handler = handler;
- event_pool->reg[idx].data = data;
-
- switch (poll_in) {
- case 1:
- event_pool->reg[idx].events |= POLLIN;
- break;
- case 0:
- event_pool->reg[idx].events &= ~POLLIN;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("poll", GF_LOG_ERROR,
- "invalid poll_in value %d", poll_in);
- break;
- }
-
- switch (poll_out) {
- case 1:
- event_pool->reg[idx].events |= POLLOUT;
- break;
- case 0:
- event_pool->reg[idx].events &= ~POLLOUT;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("poll", GF_LOG_ERROR,
- "invalid poll_out value %d", poll_out);
- break;
- }
-
- event_pool->changed = 1;
+ int ret = -1;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+ ret = event_pool->ops->event_register(
+ event_pool, fd, handler, data, poll_in, poll_out, notify_poller_death);
out:
- return idx;
+ return ret;
}
-
-static int
-event_unregister_poll (struct event_pool *event_pool, int fd, int idx_hint)
+int
+gf_event_unregister(struct event_pool *event_pool, int fd, int idx)
{
- int idx = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+ int ret = -1;
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, fd, idx_hint);
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
- if (idx == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- fd, idx_hint);
- errno = ENOENT;
- goto unlock;
- }
-
- event_pool->reg[idx] = event_pool->reg[--event_pool->used];
- event_pool->changed = 1;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
-out:
- return idx;
-}
-
-
-static int
-event_select_on_poll (struct event_pool *event_pool, int fd, int idx_hint,
- int poll_in, int poll_out)
-{
- int idx = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, fd, idx_hint);
-
- if (idx == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- fd, idx_hint);
- errno = ENOENT;
- goto unlock;
- }
-
- switch (poll_in) {
- case 1:
- event_pool->reg[idx].events |= POLLIN;
- break;
- case 0:
- event_pool->reg[idx].events &= ~POLLIN;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- /* TODO: log error */
- break;
- }
-
- switch (poll_out) {
- case 1:
- event_pool->reg[idx].events |= POLLOUT;
- break;
- case 0:
- event_pool->reg[idx].events &= ~POLLOUT;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- /* TODO: log error */
- break;
- }
-
- if (poll_in + poll_out > -2)
- event_pool->changed = 1;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
+ ret = event_pool->ops->event_unregister(event_pool, fd, idx);
out:
- return idx;
-}
-
-
-static int
-event_dispatch_poll_handler (struct event_pool *event_pool,
- struct pollfd *ufds, int i)
-{
- event_handler_t handler = NULL;
- void *data = NULL;
- int idx = -1;
- int ret = 0;
-
- handler = NULL;
- data = NULL;
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, ufds[i].fd, i);
-
- if (idx == -1) {
- gf_log ("poll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- ufds[i].fd, i);
- goto unlock;
- }
-
- handler = event_pool->reg[idx].handler;
- data = event_pool->reg[idx].data;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
- if (handler)
- ret = handler (ufds[i].fd, idx, data,
- (ufds[i].revents & (POLLIN|POLLPRI)),
- (ufds[i].revents & (POLLOUT)),
- (ufds[i].revents & (POLLERR|POLLHUP|POLLNVAL)));
-
- return ret;
+ return ret;
}
-
-static int
-event_dispatch_poll_resize (struct event_pool *event_pool,
- struct pollfd *ufds, int size)
-{
- int i = 0;
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- if (event_pool->changed == 0) {
- goto unlock;
- }
-
- if (event_pool->used > event_pool->evcache_size) {
- if (event_pool->evcache)
- GF_FREE (event_pool->evcache);
-
- event_pool->evcache = ufds = NULL;
-
- event_pool->evcache_size = event_pool->used;
-
- ufds = GF_CALLOC (sizeof (struct pollfd),
- event_pool->evcache_size,
- gf_common_mt_pollfd);
- if (!ufds)
- goto unlock;
- event_pool->evcache = ufds;
- }
-
- for (i = 0; i < event_pool->used; i++) {
- ufds[i].fd = event_pool->reg[i].fd;
- ufds[i].events = event_pool->reg[i].events;
- ufds[i].revents = 0;
- }
-
- size = i;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
- return size;
-}
-
-
-static int
-event_dispatch_poll (struct event_pool *event_pool)
+int
+gf_event_unregister_close(struct event_pool *event_pool, int fd, int idx)
{
- struct pollfd *ufds = NULL;
- int size = 0;
- int i = 0;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- while (1) {
- size = event_dispatch_poll_resize (event_pool, ufds, size);
- ufds = event_pool->evcache;
-
- ret = poll (ufds, size, 1);
-
- if (ret == 0)
- /* timeout */
- continue;
+ int ret = -1;
- if (ret == -1 && errno == EINTR)
- /* sys call */
- continue;
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
- for (i = 0; i < size; i++) {
- if (!ufds[i].revents)
- continue;
-
- event_dispatch_poll_handler (event_pool, ufds, i);
- }
- }
+ ret = event_pool->ops->event_unregister_close(event_pool, fd, idx);
out:
- return -1;
+ return ret;
}
-
-static struct event_ops event_ops_poll = {
- .new = event_pool_new_poll,
- .event_register = event_register_poll,
- .event_select_on = event_select_on_poll,
- .event_unregister = event_unregister_poll,
- .event_dispatch = event_dispatch_poll
-};
-
-
-
-#ifdef HAVE_SYS_EPOLL_H
-#include <sys/epoll.h>
-
-
-static struct event_pool *
-event_pool_new_epoll (int count)
+int
+gf_event_select_on(struct event_pool *event_pool, int fd, int idx_hint,
+ int poll_in, int poll_out)
{
- struct event_pool *event_pool = NULL;
- int epfd = -1;
-
- event_pool = GF_CALLOC (1, sizeof (*event_pool),
- gf_common_mt_event_pool);
-
- if (!event_pool)
- goto out;
-
- event_pool->count = count;
- event_pool->reg = GF_CALLOC (event_pool->count,
- sizeof (*event_pool->reg),
- gf_common_mt_reg);
-
- if (!event_pool->reg) {
- GF_FREE (event_pool);
- event_pool = NULL;
- goto out;
- }
-
- epfd = epoll_create (count);
-
- if (epfd == -1) {
- gf_log ("epoll", GF_LOG_ERROR, "epoll fd creation failed (%s)",
- strerror (errno));
- GF_FREE (event_pool->reg);
- GF_FREE (event_pool);
- event_pool = NULL;
- goto out;
- }
-
- event_pool->fd = epfd;
+ int ret = -1;
- event_pool->count = count;
-
- pthread_mutex_init (&event_pool->mutex, NULL);
- pthread_cond_init (&event_pool->cond, NULL);
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+ ret = event_pool->ops->event_select_on(event_pool, fd, idx_hint, poll_in,
+ poll_out);
out:
- return event_pool;
+ return ret;
}
-
int
-event_register_epoll (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out)
+gf_event_dispatch(struct event_pool *event_pool)
{
- int idx = -1;
- int ret = -1;
- struct epoll_event epoll_event = {0, };
- struct event_data *ev_data = (void *)&epoll_event.data;
-
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- if (event_pool->count == event_pool->used) {
- event_pool->count *= 2;
-
- event_pool->reg = GF_REALLOC (event_pool->reg,
- event_pool->count *
- sizeof (*event_pool->reg));
-
- if (!event_pool->reg) {
- gf_log ("epoll", GF_LOG_ERROR,
- "event registry re-allocation failed");
- goto unlock;
- }
- }
-
- idx = event_pool->used;
- event_pool->used++;
-
- event_pool->reg[idx].fd = fd;
- event_pool->reg[idx].events = EPOLLPRI;
- event_pool->reg[idx].handler = handler;
- event_pool->reg[idx].data = data;
-
- switch (poll_in) {
- case 1:
- event_pool->reg[idx].events |= EPOLLIN;
- break;
- case 0:
- event_pool->reg[idx].events &= ~EPOLLIN;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("epoll", GF_LOG_ERROR,
- "invalid poll_in value %d", poll_in);
- break;
- }
-
- switch (poll_out) {
- case 1:
- event_pool->reg[idx].events |= EPOLLOUT;
- break;
- case 0:
- event_pool->reg[idx].events &= ~EPOLLOUT;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("epoll", GF_LOG_ERROR,
- "invalid poll_out value %d", poll_out);
- break;
- }
-
- event_pool->changed = 1;
-
- epoll_event.events = event_pool->reg[idx].events;
- ev_data->fd = fd;
- ev_data->idx = idx;
-
- ret = epoll_ctl (event_pool->fd, EPOLL_CTL_ADD, fd,
- &epoll_event);
-
- if (ret == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "failed to add fd(=%d) to epoll fd(=%d) (%s)",
- fd, event_pool->fd, strerror (errno));
- goto unlock;
- }
-
- pthread_cond_broadcast (&event_pool->cond);
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
+ int ret = -1;
-out:
- return ret;
-}
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
-
-static int
-event_unregister_epoll (struct event_pool *event_pool, int fd, int idx_hint)
-{
- int idx = -1;
- int ret = -1;
-
- struct epoll_event epoll_event = {0, };
- struct event_data *ev_data = (void *)&epoll_event.data;
- int lastidx = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, fd, idx_hint);
-
- if (idx == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- fd, idx_hint);
- errno = ENOENT;
- goto unlock;
- }
-
- ret = epoll_ctl (event_pool->fd, EPOLL_CTL_DEL, fd, NULL);
-
- /* if ret is -1, this array member should never be accessed */
- /* if it is 0, the array member might be used by idx_cache
- * in which case the member should not be accessed till
- * it is reallocated
- */
-
- event_pool->reg[idx].fd = -1;
-
- if (ret == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "fail to del fd(=%d) from epoll fd(=%d) (%s)",
- fd, event_pool->fd, strerror (errno));
- goto unlock;
- }
-
- lastidx = event_pool->used - 1;
- if (lastidx == idx) {
- event_pool->used--;
- goto unlock;
- }
-
- epoll_event.events = event_pool->reg[lastidx].events;
- ev_data->fd = event_pool->reg[lastidx].fd;
- ev_data->idx = idx;
-
- ret = epoll_ctl (event_pool->fd, EPOLL_CTL_MOD, ev_data->fd,
- &epoll_event);
- if (ret == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "fail to modify fd(=%d) index %d to %d (%s)",
- ev_data->fd, event_pool->used, idx,
- strerror (errno));
- goto unlock;
- }
-
- /* just replace the unregistered idx by last one */
- event_pool->reg[idx] = event_pool->reg[lastidx];
- event_pool->used--;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
-out:
- return ret;
-}
-
-
-static int
-event_select_on_epoll (struct event_pool *event_pool, int fd, int idx_hint,
- int poll_in, int poll_out)
-{
- int idx = -1;
- int ret = -1;
-
- struct epoll_event epoll_event = {0, };
- struct event_data *ev_data = (void *)&epoll_event.data;
-
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, fd, idx_hint);
-
- if (idx == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "index not found for fd=%d (idx_hint=%d)",
- fd, idx_hint);
- errno = ENOENT;
- goto unlock;
- }
-
- switch (poll_in) {
- case 1:
- event_pool->reg[idx].events |= EPOLLIN;
- break;
- case 0:
- event_pool->reg[idx].events &= ~EPOLLIN;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("epoll", GF_LOG_ERROR,
- "invalid poll_in value %d", poll_in);
- break;
- }
-
- switch (poll_out) {
- case 1:
- event_pool->reg[idx].events |= EPOLLOUT;
- break;
- case 0:
- event_pool->reg[idx].events &= ~EPOLLOUT;
- break;
- case -1:
- /* do nothing */
- break;
- default:
- gf_log ("epoll", GF_LOG_ERROR,
- "invalid poll_out value %d", poll_out);
- break;
- }
-
- epoll_event.events = event_pool->reg[idx].events;
- ev_data->fd = fd;
- ev_data->idx = idx;
-
- ret = epoll_ctl (event_pool->fd, EPOLL_CTL_MOD, fd,
- &epoll_event);
- if (ret == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "failed to modify fd(=%d) events to %d",
- fd, epoll_event.events);
- }
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
+ ret = event_pool->ops->event_dispatch(event_pool);
+ if (ret)
+ goto out;
out:
- return ret;
+ return ret;
}
-
-static int
-event_dispatch_epoll_handler (struct event_pool *event_pool,
- struct epoll_event *events, int i)
-{
- struct event_data *event_data = NULL;
- event_handler_t handler = NULL;
- void *data = NULL;
- int idx = -1;
- int ret = -1;
-
-
- event_data = (void *)&events[i].data;
- handler = NULL;
- data = NULL;
-
- pthread_mutex_lock (&event_pool->mutex);
- {
- idx = __event_getindex (event_pool, event_data->fd,
- event_data->idx);
-
- if (idx == -1) {
- gf_log ("epoll", GF_LOG_ERROR,
- "index not found for fd(=%d) (idx_hint=%d)",
- event_data->fd, event_data->idx);
- goto unlock;
- }
-
- handler = event_pool->reg[idx].handler;
- data = event_pool->reg[idx].data;
- }
-unlock:
- pthread_mutex_unlock (&event_pool->mutex);
-
- if (handler)
- ret = handler (event_data->fd, event_data->idx, data,
- (events[i].events & (EPOLLIN|EPOLLPRI)),
- (events[i].events & (EPOLLOUT)),
- (events[i].events & (EPOLLERR|EPOLLHUP)));
- return ret;
-}
-
-
-static int
-event_dispatch_epoll (struct event_pool *event_pool)
+int
+gf_event_reconfigure_threads(struct event_pool *event_pool, int value)
{
- struct epoll_event *events = NULL;
- int size = 0;
- int i = 0;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
-
- while (1) {
- pthread_mutex_lock (&event_pool->mutex);
- {
- while (event_pool->used == 0)
- pthread_cond_wait (&event_pool->cond,
- &event_pool->mutex);
+ int ret = -1;
- if (event_pool->used > event_pool->evcache_size) {
- if (event_pool->evcache)
- GF_FREE (event_pool->evcache);
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
- event_pool->evcache = events = NULL;
-
- event_pool->evcache_size =
- event_pool->used + 256;
-
- events = GF_CALLOC (event_pool->evcache_size,
- sizeof (struct epoll_event),
- gf_common_mt_epoll_event);
- if (!events)
- break;
-
- event_pool->evcache = events;
- }
- }
- pthread_mutex_unlock (&event_pool->mutex);
-
- ret = epoll_wait (event_pool->fd, event_pool->evcache,
- event_pool->evcache_size, -1);
-
- if (ret == 0)
- /* timeout */
- continue;
-
- if (ret == -1 && errno == EINTR)
- /* sys call */
- continue;
-
- size = ret;
-
- for (i = 0; i < size; i++) {
- if (!events || !events[i].events)
- continue;
-
- ret = event_dispatch_epoll_handler (event_pool,
- events, i);
- }
- }
+ /* call event refresh function */
+ ret = event_pool->ops->event_reconfigure_threads(event_pool, value);
out:
- return ret;
+ return ret;
}
-
-static struct event_ops event_ops_epoll = {
- .new = event_pool_new_epoll,
- .event_register = event_register_epoll,
- .event_select_on = event_select_on_epoll,
- .event_unregister = event_unregister_epoll,
- .event_dispatch = event_dispatch_epoll
-};
-
-#endif
-
-
-struct event_pool *
-event_pool_new (int count)
+int
+gf_event_pool_destroy(struct event_pool *event_pool)
{
- struct event_pool *event_pool = NULL;
-
-#ifdef HAVE_SYS_EPOLL_H
- event_pool = event_ops_epoll.new (count);
+ int ret = -1;
+ int destroy = 0, activethreadcount = 0;
- if (event_pool) {
- event_pool->ops = &event_ops_epoll;
- } else {
- gf_log ("event", GF_LOG_WARNING,
- "falling back to poll based event handling");
- }
-#endif
-
- if (!event_pool) {
- event_pool = event_ops_poll.new (count);
-
- if (event_pool)
- event_pool->ops = &event_ops_poll;
- }
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
- return event_pool;
-}
-
-
-int
-event_register (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out)
-{
- int ret = -1;
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ destroy = event_pool->destroy;
+ activethreadcount = event_pool->activethreadcount;
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+ if (!destroy || (activethreadcount > 0)) {
+ goto out;
+ }
- ret = event_pool->ops->event_register (event_pool, fd, handler, data,
- poll_in, poll_out);
+ ret = event_pool->ops->event_pool_destroy(event_pool);
out:
- return ret;
+ return ret;
}
-
-int
-event_unregister (struct event_pool *event_pool, int fd, int idx)
+void
+poller_destroy_handler(int fd, int idx, int gen, void *data, int poll_out,
+ int poll_in, int poll_err, char event_thread_exit)
{
- int ret = -1;
+ struct event_destroy_data *destroy = NULL;
+ int readfd = -1;
+ char buf = '\0';
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+ destroy = data;
+ readfd = destroy->readfd;
+ if (readfd < 0) {
+ goto out;
+ }
- ret = event_pool->ops->event_unregister (event_pool, fd, idx);
+ while (sys_read(readfd, &buf, 1) > 0) {
+ }
out:
- return ret;
-}
+ gf_event_handled(destroy->pool, fd, idx, gen);
+ return;
+}
+/* This function destroys all the poller threads.
+ * Note: to be called before gf_event_pool_destroy is called.
+ * The order in which cleaning is performed:
+ * - Register a pipe fd(this is for waking threads in poll()/epoll_wait())
+ * - Set the destroy mode, which this no new event registration will succeed
+ * - Reconfigure the thread count to 0(this will succeed only in destroy mode)
+ * - Wake up all the threads in poll() or epoll_wait(), so that they can
+ * destroy themselves.
+ * - Wait for the thread to join(which will happen only after all the other
+ * threads are destroyed)
+ */
int
-event_select_on (struct event_pool *event_pool, int fd, int idx_hint,
- int poll_in, int poll_out)
+gf_event_dispatch_destroy(struct event_pool *event_pool)
{
- int ret = -1;
+ int ret = -1, threadcount = 0;
+ int fd[2] = {-1};
+ int idx = -1;
+ int flags = 0;
+ struct timespec sleep_till = {
+ 0,
+ };
+ struct event_destroy_data data = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("event", event_pool, out);
+
+ ret = pipe(fd);
+ if (ret < 0)
+ goto out;
+
+ /* Make the read end of the pipe nonblocking */
+ flags = fcntl(fd[0], F_GETFL);
+ flags |= O_NONBLOCK;
+ ret = fcntl(fd[0], F_SETFL, flags);
+ if (ret < 0)
+ goto out;
+
+ /* Make the write end of the pipe nonblocking */
+ flags = fcntl(fd[1], F_GETFL);
+ flags |= O_NONBLOCK;
+ ret = fcntl(fd[1], F_SETFL, flags);
+ if (ret < 0)
+ goto out;
+
+ data.pool = event_pool;
+ data.readfd = fd[1];
+
+ /* From the main thread register an event on the pipe fd[0],
+ */
+ idx = gf_event_register(event_pool, fd[0], poller_destroy_handler, &data, 1,
+ 0, 0);
+ if (idx < 0)
+ goto out;
+
+ /* Enter the destroy mode first, set this before reconfiguring to 0
+ * threads, to prevent further reconfigure to thread count > 0.
+ */
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ threadcount = event_pool->eventthreadcount;
+ event_pool->destroy = 1;
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
+
+ ret = gf_event_reconfigure_threads(event_pool, 0);
+ if (ret < 0)
+ goto out;
+
+ /* Write something onto the write end of the pipe(fd[1]) so that
+ * poll wakes up and calls the handler, poller_destroy_handler()
+ */
+ pthread_mutex_lock(&event_pool->mutex);
+ {
+ /* Write to pipe(fd[1]) and then wait for 1 second or until
+ * a poller thread that is dying, broadcasts. Make sure we
+ * do not loop forever by limiting to 10 retries
+ */
+ int retry = 0;
+
+ while (event_pool->activethreadcount > 0 &&
+ (retry++ < (threadcount + 10))) {
+ if (sys_write(fd[1], "dummy", 6) == -1) {
+ break;
+ }
+ timespec_now_realtime(&sleep_till);
+ sleep_till.tv_sec += 1;
+ ret = pthread_cond_timedwait(&event_pool->cond, &event_pool->mutex,
+ &sleep_till);
+ if (ret) {
+ gf_msg_debug("event", 0,
+ "thread cond-timedwait failed "
+ "active-thread-count: %d, "
+ "retry: %d",
+ event_pool->activethreadcount, retry);
+ }
+ }
+ }
+ pthread_mutex_unlock(&event_pool->mutex);
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+ ret = gf_event_unregister(event_pool, fd[0], idx);
- ret = event_pool->ops->event_select_on (event_pool, fd, idx_hint,
- poll_in, poll_out);
out:
- return ret;
-}
+ if (fd[0] != -1)
+ sys_close(fd[0]);
+ if (fd[1] != -1)
+ sys_close(fd[1]);
+ return ret;
+}
int
-event_dispatch (struct event_pool *event_pool)
+gf_event_handled(struct event_pool *event_pool, int fd, int idx, int gen)
{
- int ret = -1;
+ int ret = 0;
- GF_VALIDATE_OR_GOTO ("event", event_pool, out);
+ if (event_pool->ops->event_handled)
+ ret = event_pool->ops->event_handled(event_pool, fd, idx, gen);
- ret = event_pool->ops->event_dispatch (event_pool);
-
-out:
- return ret;
+ return ret;
}
diff --git a/libglusterfs/src/event.h b/libglusterfs/src/event.h
deleted file mode 100644
index f2f8029aeaa..00000000000
--- a/libglusterfs/src/event.h
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _EVENT_H_
-#define _EVENT_H_
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <pthread.h>
-
-struct event_pool;
-struct event_ops;
-struct event_data {
- int fd;
- int idx;
-} __attribute__ ((__packed__, __may_alias__));
-
-
-typedef int (*event_handler_t) (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err);
-
-struct event_pool {
- struct event_ops *ops;
-
- int fd;
- int breaker[2];
-
- int count;
- struct {
- int fd;
- int events;
- void *data;
- event_handler_t handler;
- } *reg;
-
- int used;
- int idx_cache;
- int changed;
-
- pthread_mutex_t mutex;
- pthread_cond_t cond;
-
- void *evcache;
- int evcache_size;
-};
-
-struct event_ops {
- struct event_pool * (*new) (int count);
-
- int (*event_register) (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out);
-
- int (*event_select_on) (struct event_pool *event_pool, int fd, int idx,
- int poll_in, int poll_out);
-
- int (*event_unregister) (struct event_pool *event_pool, int fd, int idx);
-
- int (*event_dispatch) (struct event_pool *event_pool);
-};
-
-struct event_pool * event_pool_new (int count);
-int event_select_on (struct event_pool *event_pool, int fd, int idx,
- int poll_in, int poll_out);
-int event_register (struct event_pool *event_pool, int fd,
- event_handler_t handler,
- void *data, int poll_in, int poll_out);
-int event_unregister (struct event_pool *event_pool, int fd, int idx);
-int event_dispatch (struct event_pool *event_pool);
-
-#endif /* _EVENT_H_ */
diff --git a/libglusterfs/src/events.c b/libglusterfs/src/events.c
new file mode 100644
index 00000000000..33157549897
--- /dev/null
+++ b/libglusterfs/src/events.c
@@ -0,0 +1,136 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <time.h>
+#include <stdarg.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <netdb.h>
+
+#include "glusterfs/syscall.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/globals.h"
+#include "glusterfs/events.h"
+
+#define EVENT_HOST "127.0.0.1"
+#define EVENT_PORT 24009
+
+int
+_gf_event(eventtypes_t event, const char *fmt, ...)
+{
+ int ret = 0;
+ int sock = -1;
+ char *eventstr = NULL;
+ va_list arguments;
+ char *msg = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ char *host = NULL;
+ struct addrinfo hints;
+ struct addrinfo *result = NULL;
+ struct addrinfo *iter_result_ptr = NULL;
+ xlator_t *this = THIS;
+ char *volfile_server_transport = NULL;
+
+ /* Global context */
+ ctx = this->ctx;
+
+ if (event < 0 || event >= EVENT_LAST) {
+ ret = EVENT_ERROR_INVALID_INPUTS;
+ goto out;
+ }
+
+ if (ctx) {
+ volfile_server_transport = ctx->cmd_args.volfile_server_transport;
+ }
+ if (!volfile_server_transport) {
+ volfile_server_transport = "tcp";
+ }
+
+ /* host = NULL returns localhost */
+ if (ctx && ctx->cmd_args.volfile_server &&
+ (strcmp(volfile_server_transport, "unix"))) {
+ /* If it is client code then volfile_server is set
+ use that information to push the events. */
+ host = ctx->cmd_args.volfile_server;
+ }
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_UNSPEC;
+ hints.ai_socktype = SOCK_DGRAM;
+ hints.ai_flags = AI_ADDRCONFIG;
+
+ if ((getaddrinfo(host, TOSTRING(EVENT_PORT), &hints, &result)) != 0) {
+ ret = EVENT_ERROR_RESOLVE;
+ goto out;
+ }
+
+ // iterate over the result and break when socket creation is success.
+ for (iter_result_ptr = result; iter_result_ptr != NULL;
+ iter_result_ptr = iter_result_ptr->ai_next) {
+ sock = socket(iter_result_ptr->ai_family, iter_result_ptr->ai_socktype,
+ iter_result_ptr->ai_protocol);
+ if (sock != -1) {
+ break;
+ }
+ }
+ /*
+ * If none of the addrinfo structures lead to a successful socket
+ * creation, socket creation has failed.
+ */
+ if (sock < 0) {
+ ret = EVENT_ERROR_SOCKET;
+ goto out;
+ }
+
+ va_start(arguments, fmt);
+ ret = gf_vasprintf(&msg, fmt, arguments);
+ va_end(arguments);
+
+ if (ret < 0) {
+ ret = EVENT_ERROR_INVALID_INPUTS;
+ goto out;
+ }
+
+ ret = gf_asprintf(&eventstr, "%u %d %s", (unsigned)gf_time(), event, msg);
+ GF_FREE(msg);
+ if (ret <= 0) {
+ ret = EVENT_ERROR_MSG_FORMAT;
+ goto out;
+ }
+
+ /* Send Message */
+ if (sendto(sock, eventstr, strlen(eventstr), 0, result->ai_addr,
+ result->ai_addrlen) <= 0) {
+ ret = EVENT_ERROR_SEND;
+ goto out;
+ }
+
+ ret = EVENT_SEND_OK;
+
+out:
+ if (sock >= 0) {
+ sys_close(sock);
+ }
+
+ /* Allocated by gf_asprintf */
+ if (eventstr)
+ GF_FREE(eventstr);
+
+ if (result)
+ freeaddrinfo(result);
+
+ return ret;
+}
diff --git a/libglusterfs/src/fd-lk.c b/libglusterfs/src/fd-lk.c
index 0c439fe6e93..c2d34f81c9c 100644
--- a/libglusterfs/src/fd-lk.c
+++ b/libglusterfs/src/fd-lk.c
@@ -8,463 +8,426 @@
cases as published by the Free Software Foundation.
*/
-#include "fd-lk.h"
-#include "common-utils.h"
-
+#include "glusterfs/fd-lk.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
int32_t
-_fd_lk_delete_lock (fd_lk_ctx_node_t *lock)
+_fd_lk_delete_lock(fd_lk_ctx_node_t *lock)
{
- int32_t ret = -1;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("fd-lk", lock, out);
+ GF_VALIDATE_OR_GOTO("fd-lk", lock, out);
- list_del_init (&lock->next);
+ list_del_init(&lock->next);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int32_t
-_fd_lk_destroy_lock (fd_lk_ctx_node_t *lock)
+_fd_lk_destroy_lock(fd_lk_ctx_node_t *lock)
{
- int32_t ret = -1;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("fd-lk", lock, out);
+ GF_VALIDATE_OR_GOTO("fd-lk", lock, out);
- GF_FREE (lock);
+ GF_FREE(lock);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-_fd_lk_destroy_lock_list (fd_lk_ctx_t *lk_ctx)
+_fd_lk_destroy_lock_list(fd_lk_ctx_t *lk_ctx)
{
- int ret = -1;
- fd_lk_ctx_node_t *lk = NULL;
- fd_lk_ctx_node_t *tmp = NULL;
-
- GF_VALIDATE_OR_GOTO ("fd-lk", lk_ctx, out);
-
- list_for_each_entry_safe (lk, tmp, &lk_ctx->lk_list, next) {
- _fd_lk_delete_lock (lk);
- _fd_lk_destroy_lock (lk);
- }
- ret = 0;
+ int ret = -1;
+ fd_lk_ctx_node_t *lk = NULL;
+ fd_lk_ctx_node_t *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO("fd-lk", lk_ctx, out);
+
+ list_for_each_entry_safe(lk, tmp, &lk_ctx->lk_list, next)
+ {
+ _fd_lk_delete_lock(lk);
+ _fd_lk_destroy_lock(lk);
+ }
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-fd_lk_ctx_unref (fd_lk_ctx_t *lk_ctx)
+fd_lk_ctx_unref(fd_lk_ctx_t *lk_ctx)
{
- int ref = -1;
+ int ref = -1;
- GF_VALIDATE_OR_GOTO ("fd-lk", lk_ctx, err);
+ GF_VALIDATE_OR_GOTO("fd-lk", lk_ctx, err);
- LOCK (&lk_ctx->lock);
- {
- ref = --lk_ctx->ref;
- if (ref < 0)
- GF_ASSERT (!ref);
- if (ref == 0)
- _fd_lk_destroy_lock_list (lk_ctx);
- }
- UNLOCK (&lk_ctx->lock);
+ ref = GF_ATOMIC_DEC(lk_ctx->ref);
+ if (ref < 0)
+ GF_ASSERT(!ref);
+ if (ref == 0)
+ _fd_lk_destroy_lock_list(lk_ctx);
- if (ref == 0) {
- LOCK_DESTROY (&lk_ctx->lock);
- GF_FREE (lk_ctx);
- }
+ if (ref == 0) {
+ LOCK_DESTROY(&lk_ctx->lock);
+ GF_FREE(lk_ctx);
+ }
- return 0;
+ return 0;
err:
- return -1;
-}
-
-fd_lk_ctx_t *
-_fd_lk_ctx_ref (fd_lk_ctx_t *lk_ctx)
-{
- if (!lk_ctx) {
- gf_log_callingfn ("fd", GF_LOG_WARNING,
- "invalid argument");
- return NULL;
- }
-
- ++lk_ctx->ref;
-
- return lk_ctx;
-}
-
-fd_lk_ctx_t *
-fd_lk_ctx_ref (fd_lk_ctx_t *lk_ctx)
-{
- fd_lk_ctx_t *new_lk_ctx = NULL;
-
- if (!lk_ctx) {
- gf_log_callingfn ("fd", GF_LOG_WARNING,
- "invalid argument");
- return NULL;
- }
-
- LOCK (&lk_ctx->lock);
- {
- new_lk_ctx = _fd_lk_ctx_ref (lk_ctx);
- }
- UNLOCK (&lk_ctx->lock);
-
- return new_lk_ctx;
+ return -1;
}
fd_lk_ctx_t *
-fd_lk_ctx_try_ref (fd_lk_ctx_t *lk_ctx)
+fd_lk_ctx_ref(fd_lk_ctx_t *lk_ctx)
{
- int ret = -1;
- fd_lk_ctx_t *new_lk_ctx = NULL;
-
- if (!lk_ctx) {
- goto out;
- }
+ if (!lk_ctx) {
+ gf_msg_callingfn("fd-lk", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return NULL;
+ }
- ret = TRY_LOCK (&lk_ctx->lock);
- if (ret)
- goto out;
+ GF_ATOMIC_INC(lk_ctx->ref);
- new_lk_ctx = _fd_lk_ctx_ref (lk_ctx);
- UNLOCK (&lk_ctx->lock);
-
-out:
- return new_lk_ctx;
+ return lk_ctx;
}
fd_lk_ctx_t *
-fd_lk_ctx_create ()
+fd_lk_ctx_create()
{
- fd_lk_ctx_t *fd_lk_ctx = NULL;
+ fd_lk_ctx_t *fd_lk_ctx = NULL;
- fd_lk_ctx = GF_CALLOC (1, sizeof (fd_lk_ctx_t),
- gf_common_mt_fd_lk_ctx_t);
- if (!fd_lk_ctx)
- goto out;
+ fd_lk_ctx = GF_CALLOC(1, sizeof(fd_lk_ctx_t), gf_common_mt_fd_lk_ctx_t);
+ if (!fd_lk_ctx)
+ goto out;
- INIT_LIST_HEAD (&fd_lk_ctx->lk_list);
+ INIT_LIST_HEAD(&fd_lk_ctx->lk_list);
- LOCK_INIT (&fd_lk_ctx->lock);
+ LOCK_INIT(&fd_lk_ctx->lock);
- fd_lk_ctx = fd_lk_ctx_ref (fd_lk_ctx);
+ fd_lk_ctx = fd_lk_ctx_ref(fd_lk_ctx);
out:
- return fd_lk_ctx;
+ return fd_lk_ctx;
}
int
-_fd_lk_insert_lock (fd_lk_ctx_t *lk_ctx,
- fd_lk_ctx_node_t *lock)
+_fd_lk_insert_lock(fd_lk_ctx_t *lk_ctx, fd_lk_ctx_node_t *lock)
{
- list_add_tail (&lock->next, &lk_ctx->lk_list);
- return 0;
+ list_add_tail(&lock->next, &lk_ctx->lk_list);
+ return 0;
}
static off_t
-_fd_lk_get_lock_len (off_t start, off_t end)
+_fd_lk_get_lock_len(off_t start, off_t end)
{
- if (end == LLONG_MAX)
- return 0;
- else
- return (end - start + 1);
+ if (end == LLONG_MAX)
+ return 0;
+ else
+ return (end - start + 1);
}
fd_lk_ctx_node_t *
-fd_lk_ctx_node_new (int32_t cmd, struct gf_flock *flock)
+fd_lk_ctx_node_new(int32_t cmd, struct gf_flock *flock)
{
- fd_lk_ctx_node_t *new_lock = NULL;
+ fd_lk_ctx_node_t *new_lock = NULL;
- /* TODO: get from mem-pool */
- new_lock = GF_CALLOC (1, sizeof (fd_lk_ctx_node_t),
- gf_common_mt_fd_lk_ctx_node_t);
- if (!new_lock)
- goto out;
+ /* TODO: get from mem-pool */
+ new_lock = GF_CALLOC(1, sizeof(fd_lk_ctx_node_t),
+ gf_common_mt_fd_lk_ctx_node_t);
+ if (!new_lock)
+ goto out;
- new_lock->cmd = cmd;
+ new_lock->cmd = cmd;
- if (flock) {
- new_lock->fl_type = flock->l_type;
- new_lock->fl_start = flock->l_start;
+ if (flock) {
+ new_lock->fl_type = flock->l_type;
+ new_lock->fl_start = flock->l_start;
- if (flock->l_len == 0)
- new_lock->fl_end = LLONG_MAX;
- else
- new_lock->fl_end = flock->l_start + flock->l_len - 1;
+ if (flock->l_len == 0)
+ new_lock->fl_end = LLONG_MAX;
+ else
+ new_lock->fl_end = flock->l_start + flock->l_len - 1;
- memcpy (&new_lock->user_flock, flock,
- sizeof (struct gf_flock));
- }
+ memcpy(&new_lock->user_flock, flock, sizeof(struct gf_flock));
+ }
- INIT_LIST_HEAD (&new_lock->next);
+ INIT_LIST_HEAD(&new_lock->next);
out:
- return new_lock;
+ return new_lock;
}
int32_t
-_fd_lk_delete_unlck_locks (fd_lk_ctx_t *lk_ctx)
+_fd_lk_delete_unlck_locks(fd_lk_ctx_t *lk_ctx)
{
- int32_t ret = -1;
- fd_lk_ctx_node_t *tmp = NULL;
- fd_lk_ctx_node_t *lk = NULL;
+ int32_t ret = -1;
+ fd_lk_ctx_node_t *tmp = NULL;
+ fd_lk_ctx_node_t *lk = NULL;
- GF_VALIDATE_OR_GOTO ("fd-lk", lk_ctx, out);
+ GF_VALIDATE_OR_GOTO("fd-lk", lk_ctx, out);
- list_for_each_entry_safe (lk, tmp, &lk_ctx->lk_list, next) {
- if (lk->fl_type == F_UNLCK) {
- _fd_lk_delete_lock (lk);
- _fd_lk_destroy_lock (lk);
- }
+ list_for_each_entry_safe(lk, tmp, &lk_ctx->lk_list, next)
+ {
+ if (lk->fl_type == F_UNLCK) {
+ _fd_lk_delete_lock(lk);
+ _fd_lk_destroy_lock(lk);
}
+ }
out:
- return ret;
+ return ret;
}
int
-fd_lk_overlap (fd_lk_ctx_node_t *l1,
- fd_lk_ctx_node_t *l2)
+fd_lk_overlap(fd_lk_ctx_node_t *l1, fd_lk_ctx_node_t *l2)
{
- if (l1->fl_end >= l2->fl_start &&
- l2->fl_end >= l1->fl_start)
- return 1;
+ if (l1->fl_end >= l2->fl_start && l2->fl_end >= l1->fl_start)
+ return 1;
- return 0;
+ return 0;
}
fd_lk_ctx_node_t *
-_fd_lk_add_locks (fd_lk_ctx_node_t *l1,
- fd_lk_ctx_node_t *l2)
+_fd_lk_add_locks(fd_lk_ctx_node_t *l1, fd_lk_ctx_node_t *l2)
{
- fd_lk_ctx_node_t *sum = NULL;
+ fd_lk_ctx_node_t *sum = NULL;
- sum = fd_lk_ctx_node_new (0, NULL);
- if (!sum)
- goto out;
+ sum = fd_lk_ctx_node_new(0, NULL);
+ if (!sum)
+ goto out;
- sum->fl_start = min (l1->fl_start, l2->fl_start);
- sum->fl_end = max (l1->fl_end, l2->fl_end);
+ sum->fl_start = min(l1->fl_start, l2->fl_start);
+ sum->fl_end = max(l1->fl_end, l2->fl_end);
- sum->user_flock.l_start = sum->fl_start;
- sum->user_flock.l_len = _fd_lk_get_lock_len (sum->fl_start,
- sum->fl_end);
+ sum->user_flock.l_start = sum->fl_start;
+ sum->user_flock.l_len = _fd_lk_get_lock_len(sum->fl_start, sum->fl_end);
out:
- return sum;
+ return sum;
}
/* Subtract two locks */
struct _values {
- fd_lk_ctx_node_t *locks[3];
+ fd_lk_ctx_node_t *locks[3];
};
int32_t
-_fd_lk_sub_locks (struct _values *v,
- fd_lk_ctx_node_t *big,
- fd_lk_ctx_node_t *small)
+_fd_lk_sub_locks(struct _values *v, fd_lk_ctx_node_t *big,
+ fd_lk_ctx_node_t *small)
{
- int32_t ret = -1;
-
- if ((big->fl_start == small->fl_start) &&
- (big->fl_end == small->fl_end)) {
- /* both edges coincide with big */
- v->locks[0] = fd_lk_ctx_node_new (small->cmd, NULL);
- if (!v->locks[0])
- goto out;
-
- memcpy (v->locks[0], big, sizeof (fd_lk_ctx_node_t));
-
- v->locks[0]->fl_type = small->fl_type;
- v->locks[0]->user_flock.l_type = small->fl_type;
- } else if ((small->fl_start > big->fl_start) &&
- (small->fl_end < big->fl_end)) {
- /* small lock is completely inside big lock,
- break it down into 3 different locks. */
- v->locks[0] = fd_lk_ctx_node_new (big->cmd, NULL);
- if (!v->locks[0])
- goto out;
-
- v->locks[1] = fd_lk_ctx_node_new (small->cmd, NULL);
- if (!v->locks[1])
- goto out;
-
- v->locks[2] = fd_lk_ctx_node_new (big->cmd, NULL);
- if (!v->locks[2])
- goto out;
-
- memcpy (v->locks[0], big, sizeof (fd_lk_ctx_node_t));
- v->locks[0]->fl_end = small->fl_start - 1;
- v->locks[0]->user_flock.l_len =
- _fd_lk_get_lock_len (v->locks[0]->fl_start,
- v->locks[0]->fl_end);
-
- memcpy (v->locks[1], small, sizeof (fd_lk_ctx_node_t));
-
- memcpy (v->locks[2], big, sizeof (fd_lk_ctx_node_t));
- v->locks[2]->fl_start = small->fl_end + 1;
- v->locks[2]->user_flock.l_len =
- _fd_lk_get_lock_len (v->locks[2]->fl_start,
- v->locks[2]->fl_end);
- } else if (small->fl_start == big->fl_start) {
- /* One of the ends co-incide, break the
- locks into two seperate parts */
- v->locks[0] = fd_lk_ctx_node_new (small->cmd, NULL);
- if (!v->locks[0])
- goto out;
-
- v->locks[1] = fd_lk_ctx_node_new (big->cmd, NULL);
- if (!v->locks[1])
- goto out;
-
- memcpy (v->locks[0], small, sizeof (fd_lk_ctx_node_t));
-
- memcpy (v->locks[1], big, sizeof (fd_lk_ctx_node_t));
- v->locks[1]->fl_start = small->fl_end + 1;
- v->locks[1]->user_flock.l_start = small->fl_end + 1;
- } else if (small->fl_end == big->fl_end) {
- /* One of the ends co-incide, break the
- locks into two seperate parts */
- v->locks[0] = fd_lk_ctx_node_new (small->cmd, NULL);
- if (!v->locks[0])
- goto out;
-
- v->locks[1] = fd_lk_ctx_node_new (big->cmd, NULL);
- if (!v->locks[1])
- goto out;
-
- memcpy (v->locks[0], big, sizeof (fd_lk_ctx_node_t));
- v->locks[0]->fl_end = small->fl_start - 1;
- v->locks[0]->user_flock.l_len =
- _fd_lk_get_lock_len (v->locks[0]->fl_start,
- v->locks[0]->fl_end);
-
- memcpy (v->locks[1], small, sizeof (fd_lk_ctx_node_t));
- } else {
- /* We should never come to this case */
- GF_ASSERT (!"Invalid case");
- }
- ret = 0;
+ int32_t ret = -1;
+
+ if ((big->fl_start == small->fl_start) && (big->fl_end == small->fl_end)) {
+ /* both edges coincide with big */
+ v->locks[0] = fd_lk_ctx_node_new(small->cmd, NULL);
+ if (!v->locks[0])
+ goto out;
+
+ memcpy(v->locks[0], big, sizeof(fd_lk_ctx_node_t));
+
+ v->locks[0]->fl_type = small->fl_type;
+ v->locks[0]->user_flock.l_type = small->fl_type;
+ } else if ((small->fl_start > big->fl_start) &&
+ (small->fl_end < big->fl_end)) {
+ /* small lock is completely inside big lock,
+ break it down into 3 different locks. */
+ v->locks[0] = fd_lk_ctx_node_new(big->cmd, NULL);
+ if (!v->locks[0])
+ goto out;
+
+ v->locks[1] = fd_lk_ctx_node_new(small->cmd, NULL);
+ if (!v->locks[1])
+ goto out;
+
+ v->locks[2] = fd_lk_ctx_node_new(big->cmd, NULL);
+ if (!v->locks[2])
+ goto out;
+
+ memcpy(v->locks[0], big, sizeof(fd_lk_ctx_node_t));
+ v->locks[0]->fl_end = small->fl_start - 1;
+ v->locks[0]->user_flock.l_len = _fd_lk_get_lock_len(
+ v->locks[0]->fl_start, v->locks[0]->fl_end);
+
+ memcpy(v->locks[1], small, sizeof(fd_lk_ctx_node_t));
+
+ memcpy(v->locks[2], big, sizeof(fd_lk_ctx_node_t));
+ v->locks[2]->fl_start = small->fl_end + 1;
+ v->locks[2]->user_flock.l_len = _fd_lk_get_lock_len(
+ v->locks[2]->fl_start, v->locks[2]->fl_end);
+ } else if (small->fl_start == big->fl_start) {
+ /* One of the ends co-incide, break the
+ locks into two separate parts */
+ v->locks[0] = fd_lk_ctx_node_new(small->cmd, NULL);
+ if (!v->locks[0])
+ goto out;
+
+ v->locks[1] = fd_lk_ctx_node_new(big->cmd, NULL);
+ if (!v->locks[1])
+ goto out;
+
+ memcpy(v->locks[0], small, sizeof(fd_lk_ctx_node_t));
+
+ memcpy(v->locks[1], big, sizeof(fd_lk_ctx_node_t));
+ v->locks[1]->fl_start = small->fl_end + 1;
+ v->locks[1]->user_flock.l_start = small->fl_end + 1;
+ } else if (small->fl_end == big->fl_end) {
+ /* One of the ends co-incide, break the
+ locks into two separate parts */
+ v->locks[0] = fd_lk_ctx_node_new(small->cmd, NULL);
+ if (!v->locks[0])
+ goto out;
+
+ v->locks[1] = fd_lk_ctx_node_new(big->cmd, NULL);
+ if (!v->locks[1])
+ goto out;
+
+ memcpy(v->locks[0], big, sizeof(fd_lk_ctx_node_t));
+ v->locks[0]->fl_end = small->fl_start - 1;
+ v->locks[0]->user_flock.l_len = _fd_lk_get_lock_len(
+ v->locks[0]->fl_start, v->locks[0]->fl_end);
+
+ memcpy(v->locks[1], small, sizeof(fd_lk_ctx_node_t));
+ } else {
+ /* We should never come to this case */
+ GF_ASSERT(!"Invalid case");
+ }
+ ret = 0;
out:
- return ret;
+ return ret;
}
static void
-_fd_lk_insert_and_merge (fd_lk_ctx_t *lk_ctx,
- fd_lk_ctx_node_t *lock)
+_fd_lk_insert_and_merge(fd_lk_ctx_t *lk_ctx, fd_lk_ctx_node_t *lock)
{
- int32_t ret = -1;
- int32_t i = 0;
- fd_lk_ctx_node_t *entry = NULL;
- fd_lk_ctx_node_t *t = NULL;
- fd_lk_ctx_node_t *sum = NULL;
- struct _values v = {.locks = {0, 0, 0 }};
-
- list_for_each_entry_safe (entry, t, &lk_ctx->lk_list, next) {
- if (!fd_lk_overlap (entry, lock))
- continue;
-
- if (entry->fl_type == lock->fl_type) {
- sum = _fd_lk_add_locks (entry, lock);
- if (!sum)
- return;
- sum->fl_type = entry->fl_type;
- sum->user_flock.l_type = entry->fl_type;
- _fd_lk_delete_lock (entry);
- _fd_lk_destroy_lock (entry);
- _fd_lk_destroy_lock (lock);
- _fd_lk_insert_and_merge (lk_ctx, sum);
- return;
- } else {
- sum = _fd_lk_add_locks (entry, lock);
- sum->fl_type = entry->fl_type;
- sum->user_flock.l_type = entry->fl_type;
- ret = _fd_lk_sub_locks (&v, sum, lock);
- if (ret)
- return;
- _fd_lk_delete_lock (entry);
- _fd_lk_destroy_lock (entry);
-
- _fd_lk_delete_lock (lock);
- _fd_lk_destroy_lock (lock);
-
- for (i = 0; i < 3; i++) {
- if (!v.locks[i])
- continue;
-
- INIT_LIST_HEAD (&v.locks[i]->next);
- _fd_lk_insert_and_merge (lk_ctx, v.locks[i]);
- }
- _fd_lk_delete_unlck_locks (lk_ctx);
- return;
- }
- }
-
- /* no conflicts, so just insert */
- if (lock->fl_type != F_UNLCK) {
- _fd_lk_insert_lock (lk_ctx, lock);
+ int32_t ret = -1;
+ int32_t i = 0;
+ fd_lk_ctx_node_t *entry = NULL;
+ fd_lk_ctx_node_t *t = NULL;
+ fd_lk_ctx_node_t *sum = NULL;
+ struct _values v = {.locks = {0, 0, 0}};
+
+ list_for_each_entry_safe(entry, t, &lk_ctx->lk_list, next)
+ {
+ if (!fd_lk_overlap(entry, lock))
+ continue;
+
+ if (entry->fl_type == lock->fl_type) {
+ sum = _fd_lk_add_locks(entry, lock);
+ if (!sum)
+ return;
+ sum->fl_type = entry->fl_type;
+ sum->user_flock.l_type = entry->fl_type;
+ _fd_lk_delete_lock(entry);
+ _fd_lk_destroy_lock(entry);
+ _fd_lk_destroy_lock(lock);
+ _fd_lk_insert_and_merge(lk_ctx, sum);
+ return;
} else {
- _fd_lk_destroy_lock_list (lk_ctx);
+ sum = _fd_lk_add_locks(entry, lock);
+ sum->fl_type = lock->fl_type;
+ sum->user_flock.l_type = lock->fl_type;
+ ret = _fd_lk_sub_locks(&v, sum, lock);
+ if (ret)
+ return;
+ _fd_lk_delete_lock(entry);
+ _fd_lk_destroy_lock(entry);
+
+ _fd_lk_delete_lock(lock);
+ _fd_lk_destroy_lock(lock);
+
+ _fd_lk_destroy_lock(sum);
+
+ for (i = 0; i < 3; i++) {
+ if (!v.locks[i])
+ continue;
+
+ INIT_LIST_HEAD(&v.locks[i]->next);
+ _fd_lk_insert_and_merge(lk_ctx, v.locks[i]);
+ }
+ _fd_lk_delete_unlck_locks(lk_ctx);
+ return;
}
+ }
+
+ /* no conflicts, so just insert */
+ if (lock->fl_type != F_UNLCK) {
+ _fd_lk_insert_lock(lk_ctx, lock);
+ } else {
+ _fd_lk_destroy_lock(lock);
+ }
}
static void
-print_lock_list (fd_lk_ctx_t *lk_ctx)
+print_lock_list(fd_lk_ctx_t *lk_ctx)
{
- fd_lk_ctx_node_t *lk = NULL;
-
- gf_log ("fd-lk", GF_LOG_DEBUG, "lock list:");
-
- list_for_each_entry (lk, &lk_ctx->lk_list, next)
- gf_log ("fd-lk", GF_LOG_DEBUG, "owner = %s, "
- "cmd = %s fl_type = %s, fs_start = %"PRId64", "
- "fs_end = %"PRId64", user_flock: l_type = %s, "
- "l_start = %"PRId64", l_len = %"PRId64", ",
- lkowner_utoa (&lk->user_flock.l_owner),
- get_lk_cmd (lk->cmd), get_lk_type (lk->fl_type),
- lk->fl_start, lk->fl_end,
- get_lk_type (lk->user_flock.l_type),
- lk->user_flock.l_start,
- lk->user_flock.l_len);
+ fd_lk_ctx_node_t *lk = NULL;
+
+ gf_msg_debug("fd-lk", 0, "lock list:");
+
+ list_for_each_entry(lk, &lk_ctx->lk_list, next)
+ gf_msg_debug("fd-lk", 0,
+ "owner = %s, cmd = %s fl_type = %s,"
+ " fs_start = %" PRId64 ", fs_end = %" PRId64
+ ", "
+ "user_flock: l_type = %s, l_start = %" PRId64
+ ", "
+ "l_len = %" PRId64 ", ",
+ lkowner_utoa(&lk->user_flock.l_owner), get_lk_cmd(lk->cmd),
+ get_lk_type(lk->fl_type), lk->fl_start, lk->fl_end,
+ get_lk_type(lk->user_flock.l_type), lk->user_flock.l_start,
+ lk->user_flock.l_len);
}
int
-fd_lk_insert_and_merge (fd_t *fd, int32_t cmd,
- struct gf_flock *flock)
+fd_lk_insert_and_merge(fd_t *fd, int32_t cmd, struct gf_flock *flock)
{
- int32_t ret = -1;
- fd_lk_ctx_t *lk_ctx = NULL;
- fd_lk_ctx_node_t *lk = NULL;
-
- GF_VALIDATE_OR_GOTO ("fd-lk", fd, out);
- GF_VALIDATE_OR_GOTO ("fd-lk", flock, out);
-
- lk_ctx = fd_lk_ctx_ref (fd->lk_ctx);
- lk = fd_lk_ctx_node_new (cmd, flock);
-
- gf_log ("fd-lk", GF_LOG_DEBUG,
- "new lock requrest: owner = %s, fl_type = %s, "
- "fs_start = %"PRId64", fs_end = %"PRId64", "
- "user_flock: l_type = %s, l_start = %"PRId64", "
- "l_len = %"PRId64, lkowner_utoa (&flock->l_owner),
- get_lk_type (lk->fl_type), lk->fl_start,
- lk->fl_end, get_lk_type (lk->user_flock.l_type),
- lk->user_flock.l_start,
- lk->user_flock.l_len);
-
- LOCK (&lk_ctx->lock);
- {
- _fd_lk_insert_and_merge (lk_ctx, lk);
- print_lock_list (lk_ctx);
- }
- UNLOCK (&lk_ctx->lock);
+ int32_t ret = -1;
+ fd_lk_ctx_t *lk_ctx = NULL;
+ fd_lk_ctx_node_t *lk = NULL;
+
+ GF_VALIDATE_OR_GOTO("fd-lk", fd, out);
+ GF_VALIDATE_OR_GOTO("fd-lk", flock, out);
+
+ lk_ctx = fd_lk_ctx_ref(fd->lk_ctx);
+ lk = fd_lk_ctx_node_new(cmd, flock);
+
+ gf_msg_debug("fd-lk", 0,
+ "new lock request: owner = %s, fl_type = %s"
+ ", fs_start = %" PRId64 ", fs_end = %" PRId64
+ ", user_flock:"
+ " l_type = %s, l_start = %" PRId64 ", l_len = %" PRId64,
+ lkowner_utoa(&flock->l_owner), get_lk_type(lk->fl_type),
+ lk->fl_start, lk->fl_end, get_lk_type(lk->user_flock.l_type),
+ lk->user_flock.l_start, lk->user_flock.l_len);
+
+ LOCK(&lk_ctx->lock);
+ {
+ _fd_lk_insert_and_merge(lk_ctx, lk);
+ print_lock_list(lk_ctx);
+ }
+ UNLOCK(&lk_ctx->lock);
+
+ fd_lk_ctx_unref(lk_ctx);
+
+ ret = 0;
+out:
+ return ret;
+}
- fd_lk_ctx_unref (lk_ctx);
+gf_boolean_t
+fd_lk_ctx_empty(fd_lk_ctx_t *lk_ctx)
+{
+ gf_boolean_t verdict = _gf_true;
- ret = 0;
-out:
- return ret;
+ if (!lk_ctx)
+ return _gf_true;
+
+ LOCK(&lk_ctx->lock);
+ {
+ verdict = list_empty(&lk_ctx->lk_list);
+ }
+ UNLOCK(&lk_ctx->lock);
+
+ return verdict;
}
diff --git a/libglusterfs/src/fd-lk.h b/libglusterfs/src/fd-lk.h
deleted file mode 100644
index bdea8c2a007..00000000000
--- a/libglusterfs/src/fd-lk.h
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _FD_LK_H
-#define _FD_LK_H
-
-#include "fd.h"
-#include "locking.h"
-#include "list.h"
-#include "logging.h"
-#include "mem-pool.h"
-#include "mem-types.h"
-#include "glusterfs.h"
-
-#define get_lk_type(type) \
- type == F_UNLCK ? "F_UNLCK" : (type == F_RDLCK ? "F_RDLCK" : "F_WRLCK")
-
-#define get_lk_cmd(cmd) \
- cmd == F_SETLKW ? "F_SETLKW" : (cmd == F_SETLK ? "F_SETLK" : "F_GETLK")
-
-struct _fd;
-
-struct fd_lk_ctx {
- struct list_head lk_list;
- int ref;
- gf_lock_t lock;
-};
-typedef struct fd_lk_ctx fd_lk_ctx_t;
-
-struct fd_lk_ctx_node {
- int32_t cmd;
- struct gf_flock user_flock;
- off_t fl_start;
- off_t fl_end;
- short fl_type;
- struct list_head next;
-};
-typedef struct fd_lk_ctx_node fd_lk_ctx_node_t;
-
-fd_lk_ctx_t *
-_fd_lk_ctx_ref (fd_lk_ctx_t *lk_ctx);
-
-fd_lk_ctx_t *
-fd_lk_ctx_ref (fd_lk_ctx_t *lk_ctx);
-
-fd_lk_ctx_t *
-fd_lk_ctx_try_ref (fd_lk_ctx_t *lk_ctx);
-
-fd_lk_ctx_t *
-fd_lk_ctx_create ();
-
-int
-fd_lk_insert_and_merge (struct _fd *lk_ctx, int32_t cmd,
- struct gf_flock *flock);
-
-int
-fd_lk_ctx_unref (fd_lk_ctx_t *lk_ctx);
-
-#endif /* _FD_LK_H */
diff --git a/libglusterfs/src/fd.c b/libglusterfs/src/fd.c
index 3a7a592786c..62606e91164 100644
--- a/libglusterfs/src/fd.c
+++ b/libglusterfs/src/fd.c
@@ -8,1150 +8,1196 @@
cases as published by the Free Software Foundation.
*/
-#include "fd.h"
-#include "glusterfs.h"
-#include "inode.h"
-#include "dict.h"
-#include "statedump.h"
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
+#include "glusterfs/fd.h"
+#include <errno.h> // for EINVAL, errno, ENOMEM
+#include <inttypes.h> // for PRIu64
+#include <stdint.h> // for UINT32_MAX
+#include <string.h> // for NULL, memcpy, memset, size_t
+#include "glusterfs/statedump.h"
static int
-gf_fd_fdtable_expand (fdtable_t *fdtable, uint32_t nr);
-
+gf_fd_fdtable_expand(fdtable_t *fdtable, uint32_t nr);
fd_t *
-__fd_ref (fd_t *fd);
+__fd_ref(fd_t *fd);
static int
-gf_fd_chain_fd_entries (fdentry_t *entries, uint32_t startidx,
- uint32_t endcount)
+gf_fd_chain_fd_entries(fdentry_t *entries, uint32_t startidx, uint32_t endcount)
{
- uint32_t i = 0;
+ uint32_t i = 0;
- if (!entries) {
- gf_log_callingfn ("fd", GF_LOG_WARNING, "!entries");
- return -1;
- }
+ if (!entries) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!entries");
+ return -1;
+ }
- /* Chain only till the second to last entry because we want to
- * ensure that the last entry has GF_FDTABLE_END.
- */
- for (i = startidx; i < (endcount - 1); i++)
- entries[i].next_free = i + 1;
+ /* Chain only till the second to last entry because we want to
+ * ensure that the last entry has GF_FDTABLE_END.
+ */
+ for (i = startidx; i < (endcount - 1); i++)
+ entries[i].next_free = i + 1;
- /* i has already been incremented upto the last entry. */
- entries[i].next_free = GF_FDTABLE_END;
+ /* i has already been incremented up to the last entry. */
+ entries[i].next_free = GF_FDTABLE_END;
- return 0;
+ return 0;
}
-
static int
-gf_fd_fdtable_expand (fdtable_t *fdtable, uint32_t nr)
+gf_fd_fdtable_expand(fdtable_t *fdtable, uint32_t nr)
{
- fdentry_t *oldfds = NULL;
- uint32_t oldmax_fds = -1;
- int ret = -1;
-
- if (fdtable == NULL || nr < 0) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
- ret = EINVAL;
- goto out;
- }
-
- nr /= (1024 / sizeof (fdentry_t));
- nr = gf_roundup_next_power_of_two (nr + 1);
- nr *= (1024 / sizeof (fdentry_t));
-
- oldfds = fdtable->fdentries;
- oldmax_fds = fdtable->max_fds;
-
- fdtable->fdentries = GF_CALLOC (nr, sizeof (fdentry_t),
- gf_common_mt_fdentry_t);
- if (!fdtable->fdentries) {
- ret = ENOMEM;
- goto out;
- }
- fdtable->max_fds = nr;
-
- if (oldfds) {
- uint32_t cpy = oldmax_fds * sizeof (fdentry_t);
- memcpy (fdtable->fdentries, oldfds, cpy);
- }
-
- gf_fd_chain_fd_entries (fdtable->fdentries, oldmax_fds,
- fdtable->max_fds);
-
- /* Now that expansion is done, we must update the fd list
- * head pointer so that the fd allocation functions can continue
- * using the expanded table.
- */
- fdtable->first_free = oldmax_fds;
- GF_FREE (oldfds);
- ret = 0;
+ fdentry_t *oldfds = NULL;
+ uint32_t oldmax_fds = -1;
+ int ret = -1;
+
+ if (fdtable == NULL || nr > UINT32_MAX) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ ret = EINVAL;
+ goto out;
+ }
+
+ nr /= (1024 / sizeof(fdentry_t));
+ nr = gf_roundup_next_power_of_two(nr + 1);
+ nr *= (1024 / sizeof(fdentry_t));
+
+ oldfds = fdtable->fdentries;
+ oldmax_fds = fdtable->max_fds;
+
+ fdtable->fdentries = GF_CALLOC(nr, sizeof(fdentry_t),
+ gf_common_mt_fdentry_t);
+ if (!fdtable->fdentries) {
+ ret = ENOMEM;
+ goto out;
+ }
+ fdtable->max_fds = nr;
+
+ if (oldfds) {
+ uint32_t cpy = oldmax_fds * sizeof(fdentry_t);
+ memcpy(fdtable->fdentries, oldfds, cpy);
+ }
+
+ gf_fd_chain_fd_entries(fdtable->fdentries, oldmax_fds, fdtable->max_fds);
+
+ /* Now that expansion is done, we must update the fd list
+ * head pointer so that the fd allocation functions can continue
+ * using the expanded table.
+ */
+ fdtable->first_free = oldmax_fds;
+ GF_FREE(oldfds);
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
fdtable_t *
-gf_fd_fdtable_alloc (void)
+gf_fd_fdtable_alloc(void)
{
- fdtable_t *fdtable = NULL;
+ fdtable_t *fdtable = NULL;
- fdtable = GF_CALLOC (1, sizeof (*fdtable), gf_common_mt_fdtable_t);
- if (!fdtable)
- return NULL;
+ fdtable = GF_CALLOC(1, sizeof(*fdtable), gf_common_mt_fdtable_t);
+ if (!fdtable)
+ return NULL;
- pthread_mutex_init (&fdtable->lock, NULL);
+ pthread_rwlock_init(&fdtable->lock, NULL);
- pthread_mutex_lock (&fdtable->lock);
- {
- gf_fd_fdtable_expand (fdtable, 0);
- }
- pthread_mutex_unlock (&fdtable->lock);
+ pthread_rwlock_wrlock(&fdtable->lock);
+ {
+ gf_fd_fdtable_expand(fdtable, 0);
+ }
+ pthread_rwlock_unlock(&fdtable->lock);
- return fdtable;
+ return fdtable;
}
-
-fdentry_t *
-__gf_fd_fdtable_get_all_fds (fdtable_t *fdtable, uint32_t *count)
+static fdentry_t *
+__gf_fd_fdtable_get_all_fds(fdtable_t *fdtable, uint32_t *count)
{
- fdentry_t *fdentries = NULL;
+ fdentry_t *fdentries = NULL;
- if (count == NULL) {
- gf_log_callingfn ("fd", GF_LOG_WARNING, "!count");
- goto out;
- }
+ if (count == NULL) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!count");
+ goto out;
+ }
- fdentries = fdtable->fdentries;
- fdtable->fdentries = GF_CALLOC (fdtable->max_fds, sizeof (fdentry_t),
- gf_common_mt_fdentry_t);
- gf_fd_chain_fd_entries (fdtable->fdentries, 0, fdtable->max_fds);
- *count = fdtable->max_fds;
+ fdentries = fdtable->fdentries;
+ fdtable->fdentries = GF_CALLOC(fdtable->max_fds, sizeof(fdentry_t),
+ gf_common_mt_fdentry_t);
+ gf_fd_chain_fd_entries(fdtable->fdentries, 0, fdtable->max_fds);
+ *count = fdtable->max_fds;
out:
- return fdentries;
+ return fdentries;
}
-
fdentry_t *
-gf_fd_fdtable_get_all_fds (fdtable_t *fdtable, uint32_t *count)
+gf_fd_fdtable_get_all_fds(fdtable_t *fdtable, uint32_t *count)
{
- fdentry_t *entries = NULL;
+ fdentry_t *entries = NULL;
- if (fdtable) {
- pthread_mutex_lock (&fdtable->lock);
- {
- entries = __gf_fd_fdtable_get_all_fds (fdtable, count);
- }
- pthread_mutex_unlock (&fdtable->lock);
+ if (fdtable) {
+ pthread_rwlock_wrlock(&fdtable->lock);
+ {
+ entries = __gf_fd_fdtable_get_all_fds(fdtable, count);
}
+ pthread_rwlock_unlock(&fdtable->lock);
+ }
- return entries;
+ return entries;
}
-
-fdentry_t *
-__gf_fd_fdtable_copy_all_fds (fdtable_t *fdtable, uint32_t *count)
+static fdentry_t *
+__gf_fd_fdtable_copy_all_fds(fdtable_t *fdtable, uint32_t *count)
{
- fdentry_t *fdentries = NULL;
- int i = 0;
+ fdentry_t *fdentries = NULL;
+ int i = 0;
- if (count == NULL) {
- gf_log_callingfn ("fd", GF_LOG_WARNING, "!count");
- goto out;
- }
+ if (count == NULL) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!count");
+ goto out;
+ }
- fdentries = GF_CALLOC (fdtable->max_fds, sizeof (fdentry_t),
- gf_common_mt_fdentry_t);
- if (fdentries == NULL) {
- goto out;
- }
+ fdentries = GF_CALLOC(fdtable->max_fds, sizeof(fdentry_t),
+ gf_common_mt_fdentry_t);
+ if (fdentries == NULL) {
+ goto out;
+ }
- *count = fdtable->max_fds;
+ *count = fdtable->max_fds;
- for (i = 0; i < fdtable->max_fds; i++) {
- if (fdtable->fdentries[i].fd != NULL) {
- fdentries[i].fd = fd_ref (fdtable->fdentries[i].fd);
- }
+ for (i = 0; i < fdtable->max_fds; i++) {
+ if (fdtable->fdentries[i].fd != NULL) {
+ fdentries[i].fd = fd_ref(fdtable->fdentries[i].fd);
}
+ }
out:
- return fdentries;
+ return fdentries;
}
-
fdentry_t *
-gf_fd_fdtable_copy_all_fds (fdtable_t *fdtable, uint32_t *count)
+gf_fd_fdtable_copy_all_fds(fdtable_t *fdtable, uint32_t *count)
{
- fdentry_t *entries = NULL;
+ fdentry_t *entries = NULL;
- if (fdtable) {
- pthread_mutex_lock (&fdtable->lock);
- {
- entries = __gf_fd_fdtable_copy_all_fds (fdtable, count);
- }
- pthread_mutex_unlock (&fdtable->lock);
+ if (fdtable) {
+ pthread_rwlock_rdlock(&fdtable->lock);
+ {
+ entries = __gf_fd_fdtable_copy_all_fds(fdtable, count);
}
+ pthread_rwlock_unlock(&fdtable->lock);
+ }
- return entries;
+ return entries;
}
-
void
-gf_fd_fdtable_destroy (fdtable_t *fdtable)
+gf_fd_fdtable_destroy(fdtable_t *fdtable)
{
- struct list_head list = {0, };
- fd_t *fd = NULL;
- fdentry_t *fdentries = NULL;
- uint32_t fd_count = 0;
- int32_t i = 0;
-
- INIT_LIST_HEAD (&list);
-
- if (!fdtable) {
- gf_log_callingfn ("fd", GF_LOG_WARNING, "!fdtable");
- return;
- }
-
- pthread_mutex_lock (&fdtable->lock);
- {
- fdentries = __gf_fd_fdtable_get_all_fds (fdtable, &fd_count);
- GF_FREE (fdtable->fdentries);
- }
- pthread_mutex_unlock (&fdtable->lock);
-
- if (fdentries != NULL) {
- for (i = 0; i < fd_count; i++) {
- fd = fdentries[i].fd;
- if (fd != NULL) {
- fd_unref (fd);
- }
- }
-
- GF_FREE (fdentries);
- pthread_mutex_destroy (&fdtable->lock);
- GF_FREE (fdtable);
- }
+ struct list_head list = {
+ 0,
+ };
+ fd_t *fd = NULL;
+ fdentry_t *fdentries = NULL;
+ uint32_t fd_count = 0;
+ int32_t i = 0;
+
+ INIT_LIST_HEAD(&list);
+
+ if (!fdtable) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!fdtable");
+ return;
+ }
+
+ pthread_rwlock_wrlock(&fdtable->lock);
+ {
+ fdentries = __gf_fd_fdtable_get_all_fds(fdtable, &fd_count);
+ GF_FREE(fdtable->fdentries);
+ }
+ pthread_rwlock_unlock(&fdtable->lock);
+
+ if (fdentries != NULL) {
+ for (i = 0; i < fd_count; i++) {
+ fd = fdentries[i].fd;
+ if (fd != NULL) {
+ fd_unref(fd);
+ }
+ }
+
+ GF_FREE(fdentries);
+ pthread_rwlock_destroy(&fdtable->lock);
+ GF_FREE(fdtable);
+ }
}
-
int
-gf_fd_unused_get (fdtable_t *fdtable, fd_t *fdptr)
+gf_fd_unused_get(fdtable_t *fdtable, fd_t *fdptr)
{
- int32_t fd = -1;
- fdentry_t *fde = NULL;
- int error;
- int alloc_attempts = 0;
-
- if (fdtable == NULL || fdptr == NULL) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
- return EINVAL;
- }
-
- pthread_mutex_lock (&fdtable->lock);
- {
- fd_alloc_try_again:
- if (fdtable->first_free != GF_FDTABLE_END) {
- fde = &fdtable->fdentries[fdtable->first_free];
- fd = fdtable->first_free;
- fdtable->first_free = fde->next_free;
- fde->next_free = GF_FDENTRY_ALLOCATED;
- fde->fd = fdptr;
- } else {
- /* If this is true, there is something
- * seriously wrong with our data structures.
- */
- if (alloc_attempts >= 2) {
- gf_log ("fd", GF_LOG_ERROR,
- "multiple attempts to expand fd table"
- " have failed.");
- goto out;
- }
- error = gf_fd_fdtable_expand (fdtable,
- fdtable->max_fds + 1);
- if (error) {
- gf_log ("fd", GF_LOG_ERROR,
- "Cannot expand fdtable: %s",
- strerror (error));
- goto out;
- }
- ++alloc_attempts;
- /* At this point, the table stands expanded
- * with the first_free referring to the first
- * free entry in the new set of fdentries that
- * have just been allocated. That means, the
- * above logic should just work.
- */
- goto fd_alloc_try_again;
- }
- }
+ int32_t fd = -1;
+ fdentry_t *fde = NULL;
+ int error;
+ int alloc_attempts = 0;
+
+ if (fdtable == NULL || fdptr == NULL) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return EINVAL;
+ }
+
+ pthread_rwlock_wrlock(&fdtable->lock);
+ {
+ fd_alloc_try_again:
+ if (fdtable->first_free != GF_FDTABLE_END) {
+ fde = &fdtable->fdentries[fdtable->first_free];
+ fd = fdtable->first_free;
+ fdtable->first_free = fde->next_free;
+ fde->next_free = GF_FDENTRY_ALLOCATED;
+ fde->fd = fdptr;
+ } else {
+ /* If this is true, there is something
+ * seriously wrong with our data structures.
+ */
+ if (alloc_attempts >= 2) {
+ gf_msg("fd", GF_LOG_ERROR, 0, LG_MSG_EXPAND_FD_TABLE_FAILED,
+ "multiple attempts to expand fd table"
+ " have failed.");
+ goto out;
+ }
+ error = gf_fd_fdtable_expand(fdtable, fdtable->max_fds + 1);
+ if (error) {
+ gf_msg("fd", GF_LOG_ERROR, error, LG_MSG_EXPAND_FD_TABLE_FAILED,
+ "Cannot expand fdtable");
+ goto out;
+ }
+ ++alloc_attempts;
+ /* At this point, the table stands expanded
+ * with the first_free referring to the first
+ * free entry in the new set of fdentries that
+ * have just been allocated. That means, the
+ * above logic should just work.
+ */
+ goto fd_alloc_try_again;
+ }
+ }
out:
- pthread_mutex_unlock (&fdtable->lock);
+ pthread_rwlock_unlock(&fdtable->lock);
- return fd;
+ return fd;
}
-
-inline void
-gf_fd_put (fdtable_t *fdtable, int32_t fd)
+void
+gf_fd_put(fdtable_t *fdtable, int32_t fd)
{
- fd_t *fdptr = NULL;
- fdentry_t *fde = NULL;
-
- if (fd == -2)
- /* anonymous fd */
- return;
+ fd_t *fdptr = NULL;
+ fdentry_t *fde = NULL;
- if (fdtable == NULL || fd < 0) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
- return;
- }
+ if (fd == GF_ANON_FD_NO)
+ return;
- if (!(fd < fdtable->max_fds)) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
- return;
- }
+ if (fdtable == NULL || fd < 0) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return;
+ }
- pthread_mutex_lock (&fdtable->lock);
- {
- fde = &fdtable->fdentries[fd];
- /* If the entry is not allocated, put operation must return
- * without doing anything.
- * This has the potential of masking out any bugs in a user of
- * fd that ends up calling gf_fd_put twice for the same fd or
- * for an unallocated fd, but it is a price we have to pay for
- * ensuring sanity of our fd-table.
- */
- if (fde->next_free != GF_FDENTRY_ALLOCATED)
- goto unlock_out;
- fdptr = fde->fd;
- fde->fd = NULL;
- fde->next_free = fdtable->first_free;
- fdtable->first_free = fd;
- }
+ if (!(fd < fdtable->max_fds)) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return;
+ }
+
+ pthread_rwlock_wrlock(&fdtable->lock);
+ {
+ fde = &fdtable->fdentries[fd];
+ /* If the entry is not allocated, put operation must return
+ * without doing anything.
+ * This has the potential of masking out any bugs in a user of
+ * fd that ends up calling gf_fd_put twice for the same fd or
+ * for an unallocated fd, but it is a price we have to pay for
+ * ensuring sanity of our fd-table.
+ */
+ if (fde->next_free != GF_FDENTRY_ALLOCATED)
+ goto unlock_out;
+ fdptr = fde->fd;
+ fde->fd = NULL;
+ fde->next_free = fdtable->first_free;
+ fdtable->first_free = fd;
+ }
unlock_out:
- pthread_mutex_unlock (&fdtable->lock);
+ pthread_rwlock_unlock(&fdtable->lock);
- if (fdptr) {
- fd_unref (fdptr);
- }
+ if (fdptr) {
+ fd_unref(fdptr);
+ }
}
-
-inline void
-gf_fdptr_put (fdtable_t *fdtable, fd_t *fd)
+void
+gf_fdptr_put(fdtable_t *fdtable, fd_t *fd)
{
- fdentry_t *fde = NULL;
- int32_t i = 0;
+ fdentry_t *fde = NULL;
+ int32_t i = 0;
- if ((fdtable == NULL) || (fd == NULL)) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
- return;
- }
-
- pthread_mutex_lock (&fdtable->lock);
- {
- for (i = 0; i < fdtable->max_fds; i++) {
- if (fdtable->fdentries[i].fd == fd) {
- fde = &fdtable->fdentries[i];
- break;
- }
- }
+ if ((fdtable == NULL) || (fd == NULL)) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return;
+ }
- if (fde == NULL) {
- gf_log_callingfn ("fd", GF_LOG_WARNING,
- "fd (%p) is not present in fdtable", fd);
- goto unlock_out;
- }
+ pthread_rwlock_wrlock(&fdtable->lock);
+ {
+ for (i = 0; i < fdtable->max_fds; i++) {
+ if (fdtable->fdentries[i].fd == fd) {
+ fde = &fdtable->fdentries[i];
+ break;
+ }
+ }
- /* If the entry is not allocated, put operation must return
- * without doing anything.
- * This has the potential of masking out any bugs in a user of
- * fd that ends up calling gf_fd_put twice for the same fd or
- * for an unallocated fd, but it is a price we have to pay for
- * ensuring sanity of our fd-table.
- */
- if (fde->next_free != GF_FDENTRY_ALLOCATED)
- goto unlock_out;
- fde->fd = NULL;
- fde->next_free = fdtable->first_free;
- fdtable->first_free = i;
+ if (fde == NULL) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, 0,
+ LG_MSG_FD_NOT_FOUND_IN_FDTABLE,
+ "fd (%p) is not present in fdtable", fd);
+ goto unlock_out;
}
+
+ /* If the entry is not allocated, put operation must return
+ * without doing anything.
+ * This has the potential of masking out any bugs in a user of
+ * fd that ends up calling gf_fd_put twice for the same fd or
+ * for an unallocated fd, but it is a price we have to pay for
+ * ensuring sanity of our fd-table.
+ */
+ if (fde->next_free != GF_FDENTRY_ALLOCATED)
+ goto unlock_out;
+ fde->fd = NULL;
+ fde->next_free = fdtable->first_free;
+ fdtable->first_free = i;
+ }
unlock_out:
- pthread_mutex_unlock (&fdtable->lock);
+ pthread_rwlock_unlock(&fdtable->lock);
- if ((fd != NULL) && (fde != NULL)) {
- fd_unref (fd);
- }
+ if ((fd != NULL) && (fde != NULL)) {
+ fd_unref(fd);
+ }
}
-
fd_t *
-gf_fd_fdptr_get (fdtable_t *fdtable, int64_t fd)
+gf_fd_fdptr_get(fdtable_t *fdtable, int64_t fd)
{
- fd_t *fdptr = NULL;
+ fd_t *fdptr = NULL;
- if (fdtable == NULL || fd < 0) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
- errno = EINVAL;
- return NULL;
- }
+ if (fdtable == NULL || fd < 0) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ errno = EINVAL;
+ return NULL;
+ }
- if (!(fd < fdtable->max_fds)) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
- errno = EINVAL;
- return NULL;
- }
+ if (!(fd < fdtable->max_fds)) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ errno = EINVAL;
+ return NULL;
+ }
- pthread_mutex_lock (&fdtable->lock);
- {
- fdptr = fdtable->fdentries[fd].fd;
- if (fdptr) {
- fd_ref (fdptr);
- }
+ pthread_rwlock_rdlock(&fdtable->lock);
+ {
+ fdptr = fdtable->fdentries[fd].fd;
+ if (fdptr) {
+ fd_ref(fdptr);
}
- pthread_mutex_unlock (&fdtable->lock);
+ }
+ pthread_rwlock_unlock(&fdtable->lock);
- return fdptr;
+ return fdptr;
}
-
fd_t *
-__fd_ref (fd_t *fd)
+__fd_ref(fd_t *fd)
{
- ++fd->refcount;
+ GF_ATOMIC_INC(fd->refcount);
- return fd;
+ return fd;
}
-
fd_t *
-fd_ref (fd_t *fd)
+fd_ref(fd_t *fd)
{
- fd_t *refed_fd = NULL;
-
- if (!fd) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "null fd");
- return NULL;
- }
+ if (!fd) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "null fd");
+ return NULL;
+ }
- LOCK (&fd->inode->lock);
- refed_fd = __fd_ref (fd);
- UNLOCK (&fd->inode->lock);
+ GF_ATOMIC_INC(fd->refcount);
- return refed_fd;
+ return fd;
}
-
-fd_t *
-__fd_unref (fd_t *fd)
+static void
+fd_destroy(fd_t *fd, gf_boolean_t bound)
{
- GF_ASSERT (fd->refcount);
-
- --fd->refcount;
-
- if (fd->refcount == 0) {
- list_del_init (&fd->inode_list);
- }
-
- return fd;
+ xlator_t *xl = NULL;
+ int i = 0;
+ xlator_t *old_THIS = NULL;
+
+ if (fd == NULL) {
+ gf_msg_callingfn("xlator", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ goto out;
+ }
+
+ if (fd->inode == NULL) {
+ gf_msg_callingfn("xlator", GF_LOG_ERROR, 0, LG_MSG_FD_INODE_NULL,
+ "fd->inode is NULL");
+ goto out;
+ }
+ if (!fd->_ctx)
+ goto out;
+
+ if (IA_ISDIR(fd->inode->ia_type)) {
+ for (i = 0; i < fd->xl_count; i++) {
+ if (fd->_ctx[i].key) {
+ xl = fd->_ctx[i].xl_key;
+ old_THIS = THIS;
+ THIS = xl;
+ if (!xl->call_cleanup && xl->cbks->releasedir)
+ xl->cbks->releasedir(xl, fd);
+ THIS = old_THIS;
+ }
+ }
+ } else {
+ for (i = 0; i < fd->xl_count; i++) {
+ if (fd->_ctx[i].key) {
+ xl = fd->_ctx[i].xl_key;
+ old_THIS = THIS;
+ THIS = xl;
+ if (!xl->call_cleanup && xl->cbks->release)
+ xl->cbks->release(xl, fd);
+ THIS = old_THIS;
+ }
+ }
+ }
+
+ LOCK_DESTROY(&fd->lock);
+
+ GF_FREE(fd->_ctx);
+ if (bound) {
+ /*Decrease the count only after close happens on file*/
+ LOCK(&fd->inode->lock);
+ {
+ fd->inode->fd_count--;
+ }
+ UNLOCK(&fd->inode->lock);
+ }
+ inode_unref(fd->inode);
+ fd->inode = NULL;
+ fd_lk_ctx_unref(fd->lk_ctx);
+ mem_put(fd);
+out:
+ return;
}
-
-static void
-fd_destroy (fd_t *fd)
+void
+fd_close(fd_t *fd)
{
- xlator_t *xl = NULL;
- int i = 0;
- xlator_t *old_THIS = NULL;
+ xlator_t *xl, *old_THIS;
- if (fd == NULL){
- gf_log_callingfn ("xlator", GF_LOG_ERROR, "invalid argument");
- goto out;
- }
+ old_THIS = THIS;
- if (fd->inode == NULL){
- gf_log_callingfn ("xlator", GF_LOG_ERROR, "fd->inode is NULL");
- goto out;
- }
- if (!fd->_ctx)
- goto out;
+ for (xl = fd->inode->table->xl->graph->first; xl != NULL; xl = xl->next) {
+ if (!xl->call_cleanup) {
+ THIS = xl;
- if (IA_ISDIR (fd->inode->ia_type)) {
- for (i = 0; i < fd->xl_count; i++) {
- if (fd->_ctx[i].key) {
- xl = fd->_ctx[i].xl_key;
- old_THIS = THIS;
- THIS = xl;
- if (xl->cbks->releasedir)
- xl->cbks->releasedir (xl, fd);
- THIS = old_THIS;
- }
+ if (IA_ISDIR(fd->inode->ia_type)) {
+ if (xl->cbks->fdclosedir != NULL) {
+ xl->cbks->fdclosedir(xl, fd);
}
- } else {
- for (i = 0; i < fd->xl_count; i++) {
- if (fd->_ctx[i].key) {
- xl = fd->_ctx[i].xl_key;
- old_THIS = THIS;
- THIS = xl;
- if (xl->cbks->release)
- xl->cbks->release (xl, fd);
- THIS = old_THIS;
- }
+ } else {
+ if (xl->cbks->fdclose != NULL) {
+ xl->cbks->fdclose(xl, fd);
}
+ }
}
+ }
- LOCK_DESTROY (&fd->lock);
-
- GF_FREE (fd->_ctx);
- inode_unref (fd->inode);
- fd->inode = (inode_t *)0xaaaaaaaa;
- fd_lk_ctx_unref (fd->lk_ctx);
- mem_put (fd);
-out:
- return;
+ THIS = old_THIS;
}
-
void
-fd_unref (fd_t *fd)
+fd_unref(fd_t *fd)
{
- int32_t refcount = 0;
-
- if (!fd) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "fd is NULL");
- return;
- }
+ int32_t refcount = 0;
+ gf_boolean_t bound = _gf_false;
- LOCK (&fd->inode->lock);
- {
- __fd_unref (fd);
- refcount = fd->refcount;
- }
- UNLOCK (&fd->inode->lock);
+ if (!fd) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "fd is NULL");
+ return;
+ }
+ LOCK(&fd->inode->lock);
+ {
+ refcount = GF_ATOMIC_DEC(fd->refcount);
if (refcount == 0) {
- fd_destroy (fd);
+ if (!list_empty(&fd->inode_list)) {
+ list_del_init(&fd->inode_list);
+ fd->inode->active_fd_count--;
+ bound = _gf_true;
+ }
}
+ }
+ UNLOCK(&fd->inode->lock);
- return ;
-}
+ if (refcount == 0) {
+ fd_destroy(fd, bound);
+ }
+ return;
+}
-fd_t *
-__fd_bind (fd_t *fd)
+static fd_t *
+__fd_bind(fd_t *fd)
{
- list_del_init (&fd->inode_list);
- list_add (&fd->inode_list, &fd->inode->fd_list);
+ list_del_init(&fd->inode_list);
+ list_add(&fd->inode_list, &fd->inode->fd_list);
+ fd->inode->fd_count++;
+ fd->inode->active_fd_count++;
- return fd;
+ return fd;
}
-
fd_t *
-fd_bind (fd_t *fd)
+fd_bind(fd_t *fd)
{
- if (!fd || !fd->inode) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "!fd || !fd->inode");
- return NULL;
- }
+ if (!fd || !fd->inode) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "!fd || !fd->inode");
+ return NULL;
+ }
- LOCK (&fd->inode->lock);
- {
- fd = __fd_bind (fd);
- }
- UNLOCK (&fd->inode->lock);
+ LOCK(&fd->inode->lock);
+ {
+ fd = __fd_bind(fd);
+ }
+ UNLOCK(&fd->inode->lock);
- return fd;
+ return fd;
}
-
static fd_t *
-__fd_create (inode_t *inode, uint64_t pid)
+fd_allocate(inode_t *inode, uint64_t pid)
{
- fd_t *fd = NULL;
+ fd_t *fd;
- if (inode == NULL) {
- gf_log_callingfn ("fd", GF_LOG_ERROR, "invalid argument");
- return NULL;
- }
-
- fd = mem_get0 (inode->table->fd_mem_pool);
- if (!fd)
- goto out;
-
- fd->xl_count = inode->table->xl->graph->xl_count + 1;
-
- fd->_ctx = GF_CALLOC (1, (sizeof (struct _fd_ctx) * fd->xl_count),
- gf_common_mt_fd_ctx);
- if (!fd->_ctx)
- goto free_fd;
-
- fd->lk_ctx = fd_lk_ctx_create ();
- if (!fd->lk_ctx)
- goto free_fd_ctx;
+ if (inode == NULL) {
+ gf_msg_callingfn("fd", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return NULL;
+ }
- fd->inode = inode_ref (inode);
+ fd = mem_get0(inode->table->fd_mem_pool);
+ if (fd == NULL) {
+ return NULL;
+ }
+
+ fd->xl_count = inode->table->xl->graph->xl_count + 1;
+
+ fd->_ctx = GF_CALLOC(1, (sizeof(struct _fd_ctx) * fd->xl_count),
+ gf_common_mt_fd_ctx);
+ if (fd->_ctx == NULL) {
+ goto failed;
+ }
+
+ fd->lk_ctx = fd_lk_ctx_create();
+ if (fd->lk_ctx != NULL) {
+ /* We need to take a reference from the inode, but we cannot do it
+ * here because this function can be called with the inode lock taken
+ * and inode_ref() takes the inode's table lock. This is the reverse
+ * of the logical lock acquisition order and can cause a deadlock. So
+ * we simply assign the inode here and we delefate the inode reference
+ * responsibility to the caller (when this function succeeds and the
+ * inode lock is released). This is safe because the caller must hold
+ * a reference of the inode to use it, so it's guaranteed that the
+ * number of references won't reach 0 before the caller finishes.
+ *
+ * TODO: minimize use of locks in favor of atomic operations to avoid
+ * these dependencies. */
+ fd->inode = inode;
fd->pid = pid;
- INIT_LIST_HEAD (&fd->inode_list);
-
- LOCK_INIT (&fd->lock);
-out:
+ INIT_LIST_HEAD(&fd->inode_list);
+ LOCK_INIT(&fd->lock);
+ GF_ATOMIC_INIT(fd->refcount, 1);
return fd;
+ }
-free_fd_ctx:
- GF_FREE (fd->_ctx);
-free_fd:
- mem_put (fd);
+ GF_FREE(fd->_ctx);
- return NULL;
-}
+failed:
+ mem_put(fd);
+ return NULL;
+}
fd_t *
-fd_create (inode_t *inode, pid_t pid)
+fd_create_uint64(inode_t *inode, uint64_t pid)
{
- fd_t *fd = NULL;
+ fd_t *fd;
- fd = __fd_create (inode, (uint64_t)pid);
- if (!fd)
- goto out;
+ fd = fd_allocate(inode, pid);
+ if (fd != NULL) {
+ /* fd_allocate() doesn't get a reference from the inode. We need to
+ * take it here in case of success. */
+ inode_ref(inode);
+ }
- fd = fd_ref (fd);
-
-out:
- return fd;
+ return fd;
}
fd_t *
-fd_create_uint64 (inode_t *inode, uint64_t pid)
+fd_create(inode_t *inode, pid_t pid)
{
- fd_t *fd = NULL;
-
- fd = __fd_create (inode, pid);
- if (!fd)
- goto out;
-
- fd = fd_ref (fd);
-
-out:
- return fd;
+ return fd_create_uint64(inode, (uint64_t)pid);
}
-
static fd_t *
-__fd_lookup (inode_t *inode, uint64_t pid)
+__fd_lookup(inode_t *inode, uint64_t pid)
{
- fd_t *iter_fd = NULL;
- fd_t *fd = NULL;
+ fd_t *iter_fd = NULL;
+ fd_t *fd = NULL;
- if (list_empty (&inode->fd_list))
- return NULL;
+ if (list_empty(&inode->fd_list))
+ return NULL;
+ list_for_each_entry(iter_fd, &inode->fd_list, inode_list)
+ {
+ if (iter_fd->anonymous)
+ /* If someone was interested in getting an
+ anonymous fd (or was OK getting an anonymous fd),
+ they can as well call fd_anonymous() directly */
+ continue;
- list_for_each_entry (iter_fd, &inode->fd_list, inode_list) {
- if (!pid || iter_fd->pid == pid) {
- fd = __fd_ref (iter_fd);
- break;
- }
+ if (!pid || iter_fd->pid == pid) {
+ fd = __fd_ref(iter_fd);
+ break;
}
+ }
- return fd;
+ return fd;
}
-
fd_t *
-fd_lookup (inode_t *inode, pid_t pid)
+fd_lookup(inode_t *inode, pid_t pid)
{
- fd_t *fd = NULL;
+ fd_t *fd = NULL;
- if (!inode) {
- gf_log_callingfn ("fd", GF_LOG_WARNING, "!inode");
- return NULL;
- }
+ if (!inode) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!inode");
+ return NULL;
+ }
- LOCK (&inode->lock);
- {
- fd = __fd_lookup (inode, (uint64_t)pid);
- }
- UNLOCK (&inode->lock);
+ LOCK(&inode->lock);
+ {
+ fd = __fd_lookup(inode, (uint64_t)pid);
+ }
+ UNLOCK(&inode->lock);
- return fd;
+ return fd;
}
fd_t *
-fd_lookup_uint64 (inode_t *inode, uint64_t pid)
+fd_lookup_uint64(inode_t *inode, uint64_t pid)
{
- fd_t *fd = NULL;
+ fd_t *fd = NULL;
- if (!inode) {
- gf_log_callingfn ("fd", GF_LOG_WARNING, "!inode");
- return NULL;
- }
+ if (!inode) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!inode");
+ return NULL;
+ }
- LOCK (&inode->lock);
- {
- fd = __fd_lookup (inode, pid);
- }
- UNLOCK (&inode->lock);
+ LOCK(&inode->lock);
+ {
+ fd = __fd_lookup(inode, pid);
+ }
+ UNLOCK(&inode->lock);
- return fd;
+ return fd;
}
+static fd_t *
+__fd_lookup_anonymous(inode_t *inode, int32_t flags)
+{
+ fd_t *iter_fd = NULL;
+ fd_t *fd = NULL;
+
+ if (list_empty(&inode->fd_list))
+ return NULL;
+
+ list_for_each_entry(iter_fd, &inode->fd_list, inode_list)
+ {
+ if ((iter_fd->anonymous) && (flags == iter_fd->flags)) {
+ fd = __fd_ref(iter_fd);
+ break;
+ }
+ }
+
+ return fd;
+}
fd_t *
-__fd_anonymous (inode_t *inode)
+fd_anonymous_with_flags(inode_t *inode, int32_t flags)
{
- fd_t *fd = NULL;
+ fd_t *fd = NULL;
+ bool ref = false;
- fd = __fd_lookup (inode, (uint64_t)-1);
+ LOCK(&inode->lock);
- /* if (fd); then we already have increased the refcount in
- __fd_lookup(), so no need of one more fd_ref().
- if (!fd); then both create and bind wont bump up the ref
- count, so we have to call fd_ref() after bind. */
- if (!fd) {
- fd = __fd_create (inode, (uint64_t)-1);
+ fd = __fd_lookup_anonymous(inode, flags);
- if (!fd)
- return NULL;
+ /* if (fd); then we already have increased the refcount in
+ __fd_lookup_anonymous(), so no need of one more fd_ref().
+ if (!fd); then both create and bind won't bump up the ref
+ count, so we have to call fd_ref() after bind. */
+ if (fd == NULL) {
+ fd = fd_allocate(inode, 0);
+ if (fd != NULL) {
+ fd->anonymous = _gf_true;
+ fd->flags = GF_ANON_FD_FLAGS | (flags & O_DIRECT);
- __fd_bind (fd);
+ __fd_bind(fd);
- __fd_ref (fd);
+ ref = true;
}
+ }
- return fd;
-}
+ UNLOCK(&inode->lock);
+ if (ref) {
+ /* fd_allocate() doesn't get a reference from the inode. We need to
+ * take it here in case of success. */
+ inode_ref(inode);
+ }
+
+ return fd;
+}
fd_t *
-fd_anonymous (inode_t *inode)
+fd_anonymous(inode_t *inode)
{
- fd_t *fd = NULL;
+ return fd_anonymous_with_flags(inode, 0);
+}
- LOCK (&inode->lock);
- {
- fd = __fd_anonymous (inode);
- }
- UNLOCK (&inode->lock);
+fd_t *
+fd_lookup_anonymous(inode_t *inode, int32_t flags)
+{
+ fd_t *fd = NULL;
- return fd;
+ if (!inode) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "!inode");
+ return NULL;
+ }
+
+ LOCK(&inode->lock);
+ {
+ fd = __fd_lookup_anonymous(inode, flags);
+ }
+ UNLOCK(&inode->lock);
+ return fd;
}
-
gf_boolean_t
-fd_is_anonymous (fd_t *fd)
+fd_is_anonymous(fd_t *fd)
{
- return (fd && fd->pid == -1);
+ return (fd && fd->anonymous);
}
-
uint8_t
-fd_list_empty (inode_t *inode)
+fd_list_empty(inode_t *inode)
{
- uint8_t empty = 0;
+ uint8_t empty = 0;
- LOCK (&inode->lock);
- {
- empty = list_empty (&inode->fd_list);
- }
- UNLOCK (&inode->lock);
+ LOCK(&inode->lock);
+ {
+ empty = list_empty(&inode->fd_list);
+ }
+ UNLOCK(&inode->lock);
- return empty;
+ return empty;
}
-
int
-__fd_ctx_set (fd_t *fd, xlator_t *xlator, uint64_t value)
+__fd_ctx_set(fd_t *fd, xlator_t *xlator, uint64_t value)
{
- int index = 0, new_xl_count = 0;
- int ret = 0;
- int set_idx = -1;
- void *begin = NULL;
- size_t diff = 0;
- struct _fd_ctx *tmp = NULL;
-
- if (!fd || !xlator)
- return -1;
-
- for (index = 0; index < fd->xl_count; index++) {
- if (!fd->_ctx[index].key) {
- if (set_idx == -1)
- set_idx = index;
- /* dont break, to check if key already exists
- further on */
- }
- if (fd->_ctx[index].xl_key == xlator) {
- set_idx = index;
- break;
- }
+ int index = 0, new_xl_count = 0;
+ int ret = 0;
+ int set_idx = -1;
+ void *begin = NULL;
+ size_t diff = 0;
+ struct _fd_ctx *tmp = NULL;
+
+ if (!fd || !xlator)
+ return -1;
+
+ for (index = 0; index < fd->xl_count; index++) {
+ if (!fd->_ctx[index].key) {
+ if (set_idx == -1)
+ set_idx = index;
+ /* don't break, to check if key already exists
+ further on */
}
+ if (fd->_ctx[index].xl_key == xlator) {
+ set_idx = index;
+ break;
+ }
+ }
- if (set_idx == -1) {
- set_idx = fd->xl_count;
-
- new_xl_count = fd->xl_count + xlator->graph->xl_count;
-
- tmp = GF_REALLOC (fd->_ctx,
- (sizeof (struct _fd_ctx)
- * new_xl_count));
- if (tmp == NULL) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING,
- "realloc of fd->_ctx for fd "
- "(ptr: %p) failed, cannot set the key"
- , fd);
- ret = -1;
- goto out;
- }
+ if (set_idx == -1) {
+ set_idx = fd->xl_count;
- fd->_ctx = tmp;
+ new_xl_count = fd->xl_count + xlator->graph->xl_count;
- begin = fd->_ctx;
- begin += (fd->xl_count * sizeof (struct _fd_ctx));
+ tmp = GF_REALLOC(fd->_ctx, (sizeof(struct _fd_ctx) * new_xl_count));
+ if (tmp == NULL) {
+ ret = -1;
+ goto out;
+ }
- diff = (new_xl_count - fd->xl_count )
- * sizeof (struct _fd_ctx);
+ fd->_ctx = tmp;
- memset (begin, 0, diff);
+ begin = fd->_ctx;
+ begin += (fd->xl_count * sizeof(struct _fd_ctx));
- fd->xl_count = new_xl_count;
- }
+ diff = (new_xl_count - fd->xl_count) * sizeof(struct _fd_ctx);
+
+ memset(begin, 0, diff);
+
+ fd->xl_count = new_xl_count;
+ }
- fd->_ctx[set_idx].xl_key = xlator;
- fd->_ctx[set_idx].value1 = value;
+ fd->_ctx[set_idx].xl_key = xlator;
+ fd->_ctx[set_idx].value1 = value;
out:
- return ret;
+ return ret;
}
-
int
-fd_ctx_set (fd_t *fd, xlator_t *xlator, uint64_t value)
+fd_ctx_set(fd_t *fd, xlator_t *xlator, uint64_t value)
{
- int ret = 0;
+ int ret = 0;
- if (!fd || !xlator) {
- gf_log_callingfn ("", GF_LOG_WARNING, "%p %p", fd, xlator);
- return -1;
- }
+ if (!fd || !xlator) {
+ gf_msg_callingfn("fd", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "%p %p", fd, xlator);
+ return -1;
+ }
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_set (fd, xlator, value);
- }
- UNLOCK (&fd->lock);
+ LOCK(&fd->lock);
+ {
+ ret = __fd_ctx_set(fd, xlator, value);
+ }
+ UNLOCK(&fd->lock);
- return ret;
+ return ret;
}
-
int
-__fd_ctx_get (fd_t *fd, xlator_t *xlator, uint64_t *value)
+__fd_ctx_get(fd_t *fd, xlator_t *xlator, uint64_t *value)
{
- int index = 0;
- int ret = 0;
+ int index = 0;
+ int ret = 0;
- if (!fd || !xlator)
- return -1;
+ if (!fd || !xlator)
+ return -1;
- for (index = 0; index < fd->xl_count; index++) {
- if (fd->_ctx[index].xl_key == xlator)
- break;
- }
+ for (index = 0; index < fd->xl_count; index++) {
+ if (fd->_ctx[index].xl_key == xlator)
+ break;
+ }
- if (index == fd->xl_count) {
- ret = -1;
- goto out;
- }
+ if (index == fd->xl_count) {
+ ret = -1;
+ goto out;
+ }
- if (value)
- *value = fd->_ctx[index].value1;
+ if (value)
+ *value = fd->_ctx[index].value1;
out:
- return ret;
+ return ret;
}
-
int
-fd_ctx_get (fd_t *fd, xlator_t *xlator, uint64_t *value)
+fd_ctx_get(fd_t *fd, xlator_t *xlator, uint64_t *value)
{
- int ret = 0;
+ int ret = 0;
- if (!fd || !xlator)
- return -1;
+ if (!fd || !xlator)
+ return -1;
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_get (fd, xlator, value);
- }
- UNLOCK (&fd->lock);
+ LOCK(&fd->lock);
+ {
+ ret = __fd_ctx_get(fd, xlator, value);
+ }
+ UNLOCK(&fd->lock);
- return ret;
+ return ret;
}
-
int
-__fd_ctx_del (fd_t *fd, xlator_t *xlator, uint64_t *value)
+__fd_ctx_del(fd_t *fd, xlator_t *xlator, uint64_t *value)
{
- int index = 0;
- int ret = 0;
+ int index = 0;
+ int ret = 0;
- if (!fd || !xlator)
- return -1;
+ if (!fd || !xlator)
+ return -1;
- for (index = 0; index < fd->xl_count; index++) {
- if (fd->_ctx[index].xl_key == xlator)
- break;
- }
+ for (index = 0; index < fd->xl_count; index++) {
+ if (fd->_ctx[index].xl_key == xlator)
+ break;
+ }
- if (index == fd->xl_count) {
- ret = -1;
- goto out;
- }
+ if (index == fd->xl_count) {
+ ret = -1;
+ goto out;
+ }
- if (value)
- *value = fd->_ctx[index].value1;
+ if (value)
+ *value = fd->_ctx[index].value1;
- fd->_ctx[index].key = 0;
- fd->_ctx[index].value1 = 0;
+ fd->_ctx[index].key = 0;
+ fd->_ctx[index].value1 = 0;
out:
- return ret;
+ return ret;
}
-
int
-fd_ctx_del (fd_t *fd, xlator_t *xlator, uint64_t *value)
+fd_ctx_del(fd_t *fd, xlator_t *xlator, uint64_t *value)
{
- int ret = 0;
+ int ret = 0;
- if (!fd || !xlator)
- return -1;
+ if (!fd || !xlator)
+ return -1;
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_del (fd, xlator, value);
- }
- UNLOCK (&fd->lock);
+ LOCK(&fd->lock);
+ {
+ ret = __fd_ctx_del(fd, xlator, value);
+ }
+ UNLOCK(&fd->lock);
- return ret;
+ return ret;
}
-
void
-fd_dump (fd_t *fd, char *prefix)
+fd_dump(fd_t *fd, char *prefix)
{
- char key[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
- if (!fd)
- return;
+ if (!fd)
+ return;
- memset(key, 0, sizeof(key));
- gf_proc_dump_write("pid", "%llu", fd->pid);
- gf_proc_dump_write("refcount", "%d", fd->refcount);
- gf_proc_dump_write("flags", "%d", fd->flags);
-}
+ gf_proc_dump_write("pid", "%" PRIu64, fd->pid);
+ gf_proc_dump_write("refcount", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(fd->refcount));
+ gf_proc_dump_write("flags", "%d", fd->flags);
+ if (fd->inode) {
+ gf_proc_dump_build_key(key, "inode", NULL);
+ gf_proc_dump_add_section("%s", key);
+ inode_dump(fd->inode, key);
+ }
+}
void
-fdentry_dump (fdentry_t *fdentry, char *prefix)
+fdentry_dump(fdentry_t *fdentry, char *prefix)
{
- if (!fdentry)
- return;
+ if (!fdentry)
+ return;
- if (GF_FDENTRY_ALLOCATED != fdentry->next_free)
- return;
+ if (GF_FDENTRY_ALLOCATED != fdentry->next_free)
+ return;
- if (fdentry->fd)
- fd_dump(fdentry->fd, prefix);
+ if (fdentry->fd)
+ fd_dump(fdentry->fd, prefix);
}
-
void
-fdtable_dump (fdtable_t *fdtable, char *prefix)
+fdtable_dump(fdtable_t *fdtable, char *prefix)
{
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
- int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+ int ret = -1;
- if (!fdtable)
- return;
+ if (!fdtable)
+ return;
- ret = pthread_mutex_trylock (&fdtable->lock);
+ ret = pthread_rwlock_tryrdlock(&fdtable->lock);
+ if (ret)
+ goto out;
- if (ret) {
- gf_log ("fd", GF_LOG_WARNING, "Unable to acquire lock");
- return;
- }
+ gf_proc_dump_build_key(key, prefix, "refcount");
+ gf_proc_dump_write(key, "%d", fdtable->refcount);
+ gf_proc_dump_build_key(key, prefix, "maxfds");
+ gf_proc_dump_write(key, "%d", fdtable->max_fds);
+ gf_proc_dump_build_key(key, prefix, "first_free");
+ gf_proc_dump_write(key, "%d", fdtable->first_free);
- memset(key, 0, sizeof(key));
- gf_proc_dump_build_key(key, prefix, "refcount");
- gf_proc_dump_write(key, "%d", fdtable->refcount);
- gf_proc_dump_build_key(key, prefix, "maxfds");
- gf_proc_dump_write(key, "%d", fdtable->max_fds);
- gf_proc_dump_build_key(key, prefix, "first_free");
- gf_proc_dump_write(key, "%d", fdtable->first_free);
-
- for ( i = 0 ; i < fdtable->max_fds; i++) {
- if (GF_FDENTRY_ALLOCATED ==
- fdtable->fdentries[i].next_free) {
- gf_proc_dump_build_key(key, prefix, "fdentry[%d]", i);
- gf_proc_dump_add_section(key);
- fdentry_dump(&fdtable->fdentries[i], key);
- }
+ for (i = 0; i < fdtable->max_fds; i++) {
+ if (GF_FDENTRY_ALLOCATED == fdtable->fdentries[i].next_free) {
+ gf_proc_dump_build_key(key, prefix, "fdentry[%d]", i);
+ gf_proc_dump_add_section("%s", key);
+ fdentry_dump(&fdtable->fdentries[i], key);
}
+ }
- pthread_mutex_unlock(&fdtable->lock);
-}
+ pthread_rwlock_unlock(&fdtable->lock);
+out:
+ if (ret != 0)
+ gf_proc_dump_write("Unable to dump the fdtable",
+ "(Lock acquistion failed) %p", fdtable);
+ return;
+}
void
-fd_ctx_dump (fd_t *fd, char *prefix)
+fd_ctx_dump(fd_t *fd, char *prefix)
{
- struct _fd_ctx *fd_ctx = NULL;
- xlator_t *xl = NULL;
- int i = 0;
-
-
- if ((fd == NULL) || (fd->_ctx == NULL)) {
- goto out;
- }
-
- LOCK (&fd->lock);
- {
- if (fd->_ctx != NULL) {
- fd_ctx = GF_CALLOC (fd->xl_count, sizeof (*fd_ctx),
- gf_common_mt_fd_ctx);
- if (fd_ctx == NULL) {
- goto unlock;
- }
-
- for (i = 0; i < fd->xl_count; i++) {
- fd_ctx[i] = fd->_ctx[i];
- }
- }
- }
+ struct _fd_ctx *fd_ctx = NULL;
+ xlator_t *xl = NULL;
+ int i = 0;
+
+ if ((fd == NULL) || (fd->_ctx == NULL)) {
+ goto out;
+ }
+
+ LOCK(&fd->lock);
+ {
+ if (fd->_ctx != NULL) {
+ fd_ctx = GF_CALLOC(fd->xl_count, sizeof(*fd_ctx),
+ gf_common_mt_fd_ctx);
+ if (fd_ctx == NULL) {
+ goto unlock;
+ }
+
+ for (i = 0; i < fd->xl_count; i++) {
+ fd_ctx[i] = fd->_ctx[i];
+ }
+ }
+ }
unlock:
- UNLOCK (&fd->lock);
+ UNLOCK(&fd->lock);
- if (fd_ctx == NULL) {
- goto out;
- }
+ if (fd_ctx == NULL) {
+ goto out;
+ }
- for (i = 0; i < fd->xl_count; i++) {
- if (fd_ctx[i].xl_key) {
- xl = (xlator_t *)(long)fd_ctx[i].xl_key;
- if (xl->dumpops && xl->dumpops->fdctx)
- xl->dumpops->fdctx (xl, fd);
- }
+ for (i = 0; i < fd->xl_count; i++) {
+ if (fd_ctx[i].xl_key) {
+ xl = (xlator_t *)(long)fd_ctx[i].xl_key;
+ if (xl->dumpops && xl->dumpops->fdctx)
+ xl->dumpops->fdctx(xl, fd);
}
+ }
out:
- if (fd_ctx != NULL) {
- GF_FREE (fd_ctx);
- }
+ GF_FREE(fd_ctx);
- return;
+ return;
}
void
-fdentry_dump_to_dict (fdentry_t *fdentry, char *prefix, dict_t *dict,
- int *openfds)
+fdentry_dump_to_dict(fdentry_t *fdentry, char *prefix, dict_t *dict,
+ int *openfds)
{
- char key[GF_DUMP_MAX_BUF_LEN] = {0,};
- int ret = -1;
-
- if (!fdentry)
- return;
- if (!dict)
- return;
-
- if (GF_FDENTRY_ALLOCATED != fdentry->next_free)
- return;
-
- if (fdentry->fd) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pid", prefix);
- ret = dict_set_int32 (dict, key, fdentry->fd->pid);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.refcount", prefix);
- ret = dict_set_int32 (dict, key, fdentry->fd->refcount);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.flags", prefix);
- ret = dict_set_int32 (dict, key, fdentry->fd->flags);
-
- (*openfds)++;
- }
- return;
-}
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ int ret = -1;
-void
-fdtable_dump_to_dict (fdtable_t *fdtable, char *prefix, dict_t *dict)
-{
- char key[GF_DUMP_MAX_BUF_LEN] = {0,};
- int i = 0;
- int openfds = 0;
- int ret = -1;
+ if (!fdentry)
+ return;
+ if (!dict)
+ return;
- if (!fdtable)
- return;
- if (!dict)
- return;
+ if (GF_FDENTRY_ALLOCATED != fdentry->next_free)
+ return;
- ret = pthread_mutex_trylock (&fdtable->lock);
+ if (fdentry->fd) {
+ snprintf(key, sizeof(key), "%s.pid", prefix);
+ ret = dict_set_uint64(dict, key, fdentry->fd->pid);
if (ret)
- goto out;
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdtable.refcount", prefix);
- ret = dict_set_int32 (dict, key, fdtable->refcount);
+ snprintf(key, sizeof(key), "%s.refcount", prefix);
+ ret = dict_set_int32(dict, key, GF_ATOMIC_GET(fdentry->fd->refcount));
if (ret)
- goto out;
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdtable.maxfds", prefix);
- ret = dict_set_uint32 (dict, key, fdtable->max_fds);
+ snprintf(key, sizeof(key), "%s.flags", prefix);
+ ret = dict_set_int32(dict, key, fdentry->fd->flags);
if (ret)
- goto out;
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdtable.firstfree", prefix);
- ret = dict_set_int32 (dict, key, fdtable->first_free);
- if (ret)
- goto out;
+ (*openfds)++;
+ }
+ return;
+}
- for (i = 0; i < fdtable->max_fds; i++) {
- if (GF_FDENTRY_ALLOCATED ==
- fdtable->fdentries[i].next_free) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdtable.fdentry%d",
- prefix, i);
- fdentry_dump_to_dict (&fdtable->fdentries[i], key,
- dict, &openfds);
- }
+void
+fdtable_dump_to_dict(fdtable_t *fdtable, char *prefix, dict_t *dict)
+{
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ int i = 0;
+ int openfds = 0;
+ int ret = -1;
+
+ if (!fdtable)
+ return;
+ if (!dict)
+ return;
+
+ ret = pthread_rwlock_tryrdlock(&fdtable->lock);
+ if (ret)
+ return;
+
+ snprintf(key, sizeof(key), "%s.fdtable.refcount", prefix);
+ ret = dict_set_int32(dict, key, fdtable->refcount);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.fdtable.maxfds", prefix);
+ ret = dict_set_uint32(dict, key, fdtable->max_fds);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.fdtable.firstfree", prefix);
+ ret = dict_set_int32(dict, key, fdtable->first_free);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < fdtable->max_fds; i++) {
+ if (GF_FDENTRY_ALLOCATED == fdtable->fdentries[i].next_free) {
+ snprintf(key, sizeof(key), "%s.fdtable.fdentry%d", prefix, i);
+ fdentry_dump_to_dict(&fdtable->fdentries[i], key, dict, &openfds);
}
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.fdtable.openfds", prefix);
- ret = dict_set_int32 (dict, key, openfds);
+ snprintf(key, sizeof(key), "%s.fdtable.openfds", prefix);
+ ret = dict_set_int32(dict, key, openfds);
+ if (ret)
+ goto out;
out:
- pthread_mutex_unlock (&fdtable->lock);
- return;
+ pthread_rwlock_unlock(&fdtable->lock);
+ return;
}
diff --git a/libglusterfs/src/fd.h b/libglusterfs/src/fd.h
deleted file mode 100644
index 42df22b95e7..00000000000
--- a/libglusterfs/src/fd.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _FD_H
-#define _FD_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "list.h"
-#include <sys/types.h>
-#include <unistd.h>
-#include "glusterfs.h"
-#include "locking.h"
-#include "fd-lk.h"
-
-struct _inode;
-struct _dict;
-struct fd_lk_ctx;
-
-struct _fd_ctx {
- union {
- uint64_t key;
- void *xl_key;
- };
- union {
- uint64_t value1;
- void *ptr1;
- };
-};
-
-/* If this structure changes, please have mercy on the booster maintainer
- * and update the fd_t struct in booster/src/booster-fd.h.
- * See the comment there to know why.
- */
-struct _fd {
- uint64_t pid;
- int32_t flags;
- int32_t refcount;
- struct list_head inode_list;
- struct _inode *inode;
- gf_lock_t lock; /* used ONLY for manipulating
- 'struct _fd_ctx' array (_ctx).*/
- struct _fd_ctx *_ctx;
- int xl_count; /* Number of xl referred in this fd */
- struct fd_lk_ctx *lk_ctx;
-};
-typedef struct _fd fd_t;
-
-
-struct fd_table_entry {
- fd_t *fd;
- int next_free;
-};
-typedef struct fd_table_entry fdentry_t;
-
-
-struct _fdtable {
- int refcount;
- uint32_t max_fds;
- pthread_mutex_t lock;
- fdentry_t *fdentries;
- int first_free;
-};
-typedef struct _fdtable fdtable_t;
-
-
-/* Signifies no more entries in the fd table. */
-#define GF_FDTABLE_END -1
-
-/* This is used to invalidated
- * the next_free value in an fdentry that has been allocated
- */
-#define GF_FDENTRY_ALLOCATED -2
-
-#include "logging.h"
-#include "xlator.h"
-
-
-inline void
-gf_fd_put (fdtable_t *fdtable, int32_t fd);
-
-
-fd_t *
-gf_fd_fdptr_get (fdtable_t *fdtable, int64_t fd);
-
-
-fdtable_t *
-gf_fd_fdtable_alloc (void);
-
-
-int
-gf_fd_unused_get (fdtable_t *fdtable, fd_t *fdptr);
-
-
-fdentry_t *
-gf_fd_fdtable_get_all_fds (fdtable_t *fdtable, uint32_t *count);
-
-
-void
-gf_fd_fdtable_destroy (fdtable_t *fdtable);
-
-
-fd_t *
-__fd_ref (fd_t *fd);
-
-
-fd_t *
-fd_ref (fd_t *fd);
-
-
-fd_t *
-__fd_unref (fd_t *fd);
-
-
-void
-fd_unref (fd_t *fd);
-
-
-fd_t *
-fd_create (struct _inode *inode, pid_t pid);
-
-fd_t *
-fd_create_uint64 (struct _inode *inode, uint64_t pid);
-
-fd_t *
-fd_lookup (struct _inode *inode, pid_t pid);
-
-fd_t *
-fd_lookup_uint64 (struct _inode *inode, uint64_t pid);
-
-fd_t *
-fd_anonymous (inode_t *inode);
-
-
-gf_boolean_t
-fd_is_anonymous (fd_t *fd);
-
-
-uint8_t
-fd_list_empty (struct _inode *inode);
-
-
-fd_t *
-fd_bind (fd_t *fd);
-
-fd_t *
-__fd_bind (fd_t *fd);
-
-int
-fd_ctx_set (fd_t *fd, xlator_t *xlator, uint64_t value);
-
-
-int
-fd_ctx_get (fd_t *fd, xlator_t *xlator, uint64_t *value);
-
-
-int
-fd_ctx_del (fd_t *fd, xlator_t *xlator, uint64_t *value);
-
-
-int
-__fd_ctx_set (fd_t *fd, xlator_t *xlator, uint64_t value);
-
-
-int
-__fd_ctx_get (fd_t *fd, xlator_t *xlator, uint64_t *value);
-
-
-int
-__fd_ctx_del (fd_t *fd, xlator_t *xlator, uint64_t *value);
-
-fd_t *
-__fd_ref (fd_t *fd);
-
-void
-fd_ctx_dump (fd_t *fd, char *prefix);
-
-fdentry_t *
-gf_fd_fdtable_copy_all_fds (fdtable_t *fdtable, uint32_t *count);
-
-fdentry_t *
-__gf_fd_fdtable_copy_all_fds (fdtable_t *fdtable, uint32_t *count);
-
-void
-gf_fdptr_put (fdtable_t *fdtable, fd_t *fd);
-
-#endif /* _FD_H */
diff --git a/libglusterfs/src/gen-defaults.py b/libglusterfs/src/gen-defaults.py
new file mode 100755
index 00000000000..e31d3a9fe8a
--- /dev/null
+++ b/libglusterfs/src/gen-defaults.py
@@ -0,0 +1,81 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+import sys
+from generator import ops, fop_subs, cbk_subs, generate
+
+FAILURE_CBK_TEMPLATE = """
+int32_t
+default_@NAME@_failure_cbk (call_frame_t *frame, int32_t op_errno)
+{
+ STACK_UNWIND_STRICT (@NAME@, frame, -1, op_errno, @ERROR_ARGS@);
+ return 0;
+}
+"""
+
+CBK_RESUME_TEMPLATE = """
+int32_t
+default_@NAME@_cbk_resume (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, @LONG_ARGS@)
+{
+ STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+CBK_TEMPLATE = """
+int32_t
+default_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, @LONG_ARGS@)
+{
+ STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+RESUME_TEMPLATE = """
+int32_t
+default_@NAME@_resume (call_frame_t *frame, xlator_t *this, @LONG_ARGS@)
+{
+ STACK_WIND (frame, default_@NAME@_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FOP_TEMPLATE = """
+int32_t
+default_@NAME@ (
+ call_frame_t *frame,
+ xlator_t *this,
+ @LONG_ARGS@)
+{
+ STACK_WIND_TAIL (frame,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+def gen_defaults ():
+ for name in list(ops.keys()):
+ print(generate(FAILURE_CBK_TEMPLATE, name, cbk_subs))
+ for name in list(ops.keys()):
+ print(generate(CBK_RESUME_TEMPLATE, name, cbk_subs))
+ for name in list(ops.keys()):
+ print(generate(CBK_TEMPLATE, name, cbk_subs))
+ for name in list(ops.keys()):
+ print(generate(RESUME_TEMPLATE, name, fop_subs))
+ for name in list(ops.keys()):
+ print(generate(FOP_TEMPLATE, name, fop_subs))
+
+for l in open(sys.argv[1], 'r').readlines():
+ if l.find('#pragma generate') != -1:
+ print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
+ gen_defaults()
+ print("/* END GENERATED CODE */")
+ else:
+ print(l[:-1])
diff --git a/libglusterfs/src/generator.py b/libglusterfs/src/generator.py
new file mode 100755
index 00000000000..5b7aa4764a0
--- /dev/null
+++ b/libglusterfs/src/generator.py
@@ -0,0 +1,777 @@
+#!/usr/bin/python3
+
+import string
+
+# ops format: 'fop-arg' name type stub-field [nosync]
+# 'cbk-arg' name type
+# 'extra' name type arg-str
+# 'journal' fop-type
+# 'link' inode iatt
+#
+# 'role' indicates the significance of this line to the code generator (sort of
+# our own type).
+#
+# For fop-arg, we first need to know the name and the type of the arg so that
+# we can generate SHORT_ARGS (for function calls) and LONG_ARGS (for
+# declarations). For code that uses stubs, we also need to know the name of
+# the stub field, which might be different than the argument itself. Lastly,
+# for code that uses syncops, we need to know whether whoever wrote the syncop
+# for this fop "forgot" to include this argument. (Editorial: this kind of
+# creeping inconsistency is why we should have used code generation for stubs
+# and syncops as well as defaults all along.) To address this need, we use the
+# optional 'nosync' field for arguments (e.g. mkdir.umask) that we should skip
+# in generated syncop code.
+#
+# 'cbk-arg' is like fop-arg but simpler and used for generating callbacks
+# instead of fop functions.
+#
+# 'extra' is also like fop-arg, but it's another hack for syncops. This time
+# the problem is that some of what would normally be *callback* arguments are
+# instead created in the caller and passed to the syncop. We handle that by
+# adding an entry at the appropriate place in the fop-arg list, with the name
+# and type to generate a declaration and an argument string to generate the
+# actual syncop call.
+#
+# The mere presence of a 'journal' item is sufficient for most of the journal
+# code to recognize that it should do something. However, reconciliation also
+# needs to decide how reconciliation builds the arguments it needs to call down
+# to the syncop layer, based on what's in the journal. To do that, we divide
+# ops into three types and store those types in the ops table. In general,
+# these three types work as follows.
+#
+# For an fd-op, the GFID in the journal is used (in loc.gfid) field to
+# look up an inode, then an anonymous fd is found/created for that inode.
+#
+# For an inode-op, the GFID in the journal is used the same way, but no fd
+# is needed.
+#
+# For an entry-op, the *parent* GFID and name from the journal are used to
+# look up an inode (via loc.pargfid and par.name respectively).
+#
+# The only places this seems to fall down is for link and create. In link,
+# which is generally an entry-op, the source is looked up as though it's an
+# inode-op. In create, we have an fd argument but it's really a return
+# argument so we get a fresh inode instead of looking one up. Those two cases
+# need to be handled as special cases in the reconciliation code.
+#
+# 'link' is (hopefully) the last of the journal/syncop hacks. Much like
+# 'extra', some values that are returned as callback arguments in the normal
+# case are handled differently for syncops. For syncops that create objects
+# (e.g. mkdir) we need to link those objects into our inode table. The 'inode'
+# and 'iatt' fields here give us the information we need to construct the
+# proper inode_link call(s).
+
+ops = {}
+xlator_cbks = {}
+xlator_dumpops = {}
+
+ops['fgetxattr'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'name', 'const char *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'dict', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['fsetxattr'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'dict', 'dict_t *', 'xattr'),
+ ('fop-arg', 'flags', 'int32_t', 'flags'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['setxattr'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'dict', 'dict_t *', 'xattr'),
+ ('fop-arg', 'flags', 'int32_t', 'flags'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'inode-op'),
+)
+
+ops['statfs'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'buf', 'struct statvfs *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['fsyncdir'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'flags', 'int32_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['opendir'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'fd', 'fd_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['fstat'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['fsync'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'flags', 'int32_t'),
+ ('extra', 'preop', 'struct iatt', '&preop'),
+ ('extra', 'postop', 'struct iatt', '&postop'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'prebuf', 'struct iatt *'),
+ ('cbk-arg', 'postbuf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['flush'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['writev'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'vector', 'struct iovec *', 'vector'),
+ ('fop-arg', 'count', 'int32_t'),
+ ('fop-arg', 'off', 'off_t', 'offset'),
+ ('fop-arg', 'flags', 'uint32_t', 'flags'),
+ ('fop-arg', 'iobref', 'struct iobref *'),
+ ('extra', 'preop', 'struct iatt', '&preop'),
+ ('extra', 'postop', 'struct iatt', '&postop'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'prebuf', 'struct iatt *'),
+ ('cbk-arg', 'postbuf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['readv'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'size', 'size_t'),
+ ('fop-arg', 'offset', 'off_t'),
+ ('fop-arg', 'flags', 'uint32_t'),
+ ('extra', 'iatt', 'struct iatt', '&iatt'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'vector', 'struct iovec *'),
+ ('cbk-arg', 'count', 'int32_t'),
+ ('cbk-arg', 'stbuf', 'struct iatt *'),
+ ('cbk-arg', 'iobref', 'struct iobref *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['open'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'flags', 'int32_t'),
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'fd', 'fd_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['create'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'flags', 'int32_t', 'flags'),
+ ('fop-arg', 'mode', 'mode_t', 'mode'),
+ ('fop-arg', 'umask', 'mode_t', 'umask', 'nosync'),
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('extra', 'iatt', 'struct iatt', '&iatt'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'fd', 'fd_t *'),
+ ('cbk-arg', 'inode', 'inode_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'preparent', 'struct iatt *'),
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+ ('link', 'loc.inode', '&iatt'),
+)
+
+ops['link'] = (
+ ('fop-arg', 'oldloc', 'loc_t *', 'loc'),
+ ('fop-arg', 'newloc', 'loc_t *', 'loc2'),
+ ('extra', 'iatt', 'struct iatt', '&iatt'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'inode', 'inode_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'preparent', 'struct iatt *'),
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+)
+
+ops['rename'] = (
+ ('fop-arg', 'oldloc', 'loc_t *', 'loc'),
+ ('fop-arg', 'newloc', 'loc_t *', 'loc2'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'preoldparent', 'struct iatt *'),
+ ('cbk-arg', 'postoldparent', 'struct iatt *'),
+ ('cbk-arg', 'prenewparent', 'struct iatt *'),
+ ('cbk-arg', 'postnewparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+)
+
+ops['symlink'] = (
+ ('fop-arg', 'linkpath', 'const char *', 'linkname'),
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'umask', 'mode_t', 'mode', 'nosync'),
+ ('extra', 'iatt', 'struct iatt', '&iatt'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'inode', 'inode_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'preparent', 'struct iatt *'),
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+)
+
+ops['rmdir'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'flags', 'int32_t', 'flags'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'preparent', 'struct iatt *'),
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+)
+
+ops['unlink'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'flags', 'int32_t', 'flags', 'nosync'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'preparent', 'struct iatt *'),
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+)
+
+ops['mkdir'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'mode', 'mode_t', 'mode'),
+ ('fop-arg', 'umask', 'mode_t', 'umask', 'nosync'),
+ ('extra', 'iatt', 'struct iatt', '&iatt'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'inode', 'inode_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'preparent', 'struct iatt *'),
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+ ('link', 'loc.inode', '&iatt'),
+)
+
+ops['mknod'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'mode', 'mode_t', 'mode'),
+ ('fop-arg', 'rdev', 'dev_t', 'rdev'),
+ ('fop-arg', 'umask', 'mode_t', 'umask', 'nosync'),
+ ('extra', 'iatt', 'struct iatt', '&iatt'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'inode', 'inode_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'preparent', 'struct iatt *'),
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'entry-op'),
+)
+
+ops['readlink'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'size', 'size_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'path', 'const char *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['access'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'mask', 'int32_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['ftruncate'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'offset', 'off_t', 'offset'),
+ ('extra', 'preop', 'struct iatt', '&preop'),
+ ('extra', 'postop', 'struct iatt', '&postop'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'prebuf', 'struct iatt *'),
+ ('cbk-arg', 'postbuf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['getxattr'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'name', 'const char *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'dict', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['xattrop'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'flags', 'gf_xattrop_flags_t', 'optype'),
+ ('fop-arg', 'dict', 'dict_t *', 'xattr'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'dict', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'inode-op'),
+)
+
+ops['fxattrop'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'flags', 'gf_xattrop_flags_t', 'optype'),
+ ('fop-arg', 'dict', 'dict_t *', 'xattr'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'dict', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['removexattr'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'name', 'const char *', 'name'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'inode-op'),
+)
+
+ops['fremovexattr'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'name', 'const char *', 'name'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['lk'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'cmd', 'int32_t'),
+ ('fop-arg', 'lock', 'struct gf_flock *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'lock', 'struct gf_flock *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['inodelk'] = (
+ ('fop-arg', 'volume', 'const char *'),
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'cmd', 'int32_t'),
+ ('fop-arg', 'lock', 'struct gf_flock *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['finodelk'] = (
+ ('fop-arg', 'volume', 'const char *'),
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'cmd', 'int32_t'),
+ ('fop-arg', 'lock', 'struct gf_flock *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['entrylk'] = (
+ ('fop-arg', 'volume', 'const char *'),
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'basename', 'const char *'),
+ ('fop-arg', 'cmd', 'entrylk_cmd'),
+ ('fop-arg', 'type', 'entrylk_type'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['fentrylk'] = (
+ ('fop-arg', 'volume', 'const char *'),
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'basename', 'const char *'),
+ ('fop-arg', 'cmd', 'entrylk_cmd'),
+ ('fop-arg', 'type', 'entrylk_type'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['rchecksum'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'offset', 'off_t'),
+ ('fop-arg', 'len', 'int32_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'weak_cksum', 'uint32_t'),
+ ('cbk-arg', 'strong_cksum', 'uint8_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['readdir'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'size', 'size_t'),
+ ('fop-arg', 'off', 'off_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'entries', 'gf_dirent_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['readdirp'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'size', 'size_t'),
+ ('fop-arg', 'off', 'off_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'entries', 'gf_dirent_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['setattr'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'stbuf', 'struct iatt *', 'stat'),
+ ('fop-arg', 'valid', 'int32_t', 'valid'),
+ ('extra', 'preop', 'struct iatt', '&preop'),
+ ('extra', 'postop', 'struct iatt', '&postop'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'statpre', 'struct iatt *'),
+ ('cbk-arg', 'statpost', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'inode-op'),
+)
+
+ops['truncate'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'offset', 'off_t', 'offset'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'prebuf', 'struct iatt *'),
+ ('cbk-arg', 'postbuf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'inode-op'),
+)
+
+ops['stat'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['lookup'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'inode', 'inode_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ # We could add xdata everywhere automatically if somebody hadn't put
+ # something after it here.
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+)
+
+ops['fsetattr'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'stbuf', 'struct iatt *', 'stat'),
+ ('fop-arg', 'valid', 'int32_t', 'valid'),
+ ('extra', 'preop', 'struct iatt', '&preop'),
+ ('extra', 'postop', 'struct iatt', '&postop'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'statpre', 'struct iatt *'),
+ ('cbk-arg', 'statpost', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['fallocate'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'keep_size', 'int32_t', 'mode'),
+ ('fop-arg', 'offset', 'off_t', 'offset'),
+ ('fop-arg', 'len', 'size_t', 'size'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'pre', 'struct iatt *'),
+ ('cbk-arg', 'post', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['discard'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'offset', 'off_t', 'offset'),
+ ('fop-arg', 'len', 'size_t', 'size'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'pre', 'struct iatt *'),
+ ('cbk-arg', 'post', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['zerofill'] = (
+ ('fop-arg', 'fd', 'fd_t *', 'fd'),
+ ('fop-arg', 'offset', 'off_t', 'offset'),
+ # As e.g. fallocate/discard (above) "len" should really be a size_t.
+ ('fop-arg', 'len', 'off_t', 'size'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'pre', 'struct iatt *'),
+ ('cbk-arg', 'post', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['ipc'] = (
+ ('fop-arg', 'op', 'int32_t'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+ ('journal', 'fd-op'),
+)
+
+ops['seek'] = (
+ ('fop-arg', 'fd', 'fd_t *'),
+ ('fop-arg', 'offset', 'off_t'),
+ ('fop-arg', 'what', 'gf_seek_what_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'offset', 'off_t'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['getspec'] = (
+ ('fop-arg', 'key', 'const char *'),
+ ('fop-arg', 'flags', 'int32_t'),
+ ('cbk-arg', 'spec_data', 'char *'),
+)
+
+ops['lease'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'lease', 'struct gf_lease *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'lease', 'struct gf_lease *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['getactivelk'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'locklist', 'lock_migration_info_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['setactivelk'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'locklist', 'lock_migration_info_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['put'] = (
+ ('fop-arg', 'loc', 'loc_t *', 'loc'),
+ ('fop-arg', 'mode', 'mode_t', 'mode'),
+ ('fop-arg', 'umask', 'mode_t', 'umask'),
+ ('fop-arg', 'flags', 'uint32_t', 'flags'),
+ ('fop-arg', 'vector', 'struct iovec *', 'vector'),
+ ('fop-arg', 'count', 'int32_t'),
+ ('fop-arg', 'off', 'off_t', 'offset'),
+ ('fop-arg', 'iobref', 'struct iobref *'),
+ ('fop-arg', 'dict', 'dict_t *', 'xattr'),
+ ('fop-arg', 'xdata', 'dict_t *', 'xdata'),
+ ('cbk-arg', 'inode', 'inode_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'preparent', 'struct iatt *'),
+ ('cbk-arg', 'postparent', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['icreate'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'mode', 'mode_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'inode', 'inode_t *'),
+ ('cbk-arg', 'buf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['namelink'] = (
+ ('fop-arg', 'loc', 'loc_t *'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'prebuf', 'struct iatt *'),
+ ('cbk-arg', 'postbuf', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+
+ops['copy_file_range'] = (
+ ('fop-arg', 'fd_in', 'fd_t *'),
+ ('fop-arg', 'off_in', 'off64_t '),
+ ('fop-arg', 'fd_out', 'fd_t *'),
+ ('fop-arg', 'off_out', 'off64_t '),
+ ('fop-arg', 'len', 'size_t'),
+ ('fop-arg', 'flags', 'uint32_t'),
+ ('fop-arg', 'xdata', 'dict_t *'),
+ ('cbk-arg', 'stbuf', 'struct iatt *'),
+ ('cbk-arg', 'prebuf_dst', 'struct iatt *'),
+ ('cbk-arg', 'postbuf_dst', 'struct iatt *'),
+ ('cbk-arg', 'xdata', 'dict_t *'),
+)
+#####################################################################
+xlator_cbks['forget'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'inode', 'inode_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_cbks['release'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'fd', 'fd_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_cbks['releasedir'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'fd', 'fd_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_cbks['invalidate'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'inode', 'inode_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_cbks['client_destroy'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'client', 'client_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_cbks['client_disconnect'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'client', 'client_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_cbks['ictxmerge'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'fd', 'fd_t *'),
+ ('fn-arg', 'inode', 'inode_t *'),
+ ('fn-arg', 'linked_inode', 'inode_t *'),
+ ('ret-val', 'void', ''),
+)
+
+#####################################################################
+xlator_dumpops['priv'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['inode'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['fd'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['inodectx'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'ino', 'inode_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['fdctx'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'fd', 'fd_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['priv_to_dict'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'dict', 'dict_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['inode_to_dict'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'dict', 'dict_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['fd_to_dict'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'dict', 'dict_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['inodectx_to_dict'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'ino', 'inode_t *'),
+ ('fn-arg', 'dict', 'dict_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['fdctx_to_dict'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('fn-arg', 'fd', 'fd_t *'),
+ ('fn-arg', 'dict', 'dict_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+xlator_dumpops['history'] = (
+ ('fn-arg', 'this', 'xlator_t *'),
+ ('ret-val', 'int32_t', '0'),
+)
+
+def get_error_arg (type_str):
+ if type_str.find(" *") != -1:
+ return "NULL"
+ return "-1"
+
+def get_subs (names, types, cbktypes=None):
+ sdict = {}
+ sdict["@SHORT_ARGS@"] = ', '.join(names)
+ # Convert two separate tuples to one of (name, type) sub-tuples.
+ as_tuples = list(zip(types, names))
+ # Convert each sub-tuple into a "type name" string.
+ as_strings = [' '.join(item) for item in as_tuples]
+ # Join all of those into one big string.
+ sdict["@LONG_ARGS@"] = ',\n\t'.join(as_strings)
+ # So much more readable than string.join(map(string.join,zip(...))))
+ sdict["@ERROR_ARGS@"] = ', '.join(list(map(get_error_arg, types)))
+ if cbktypes is not None:
+ sdict["@CBK_ERROR_ARGS@"] = ', '.join(list(map(get_error_arg, cbktypes)))
+ return sdict
+
+def generate (tmpl, name, subs):
+ text = tmpl.replace("@NAME@", name)
+ if name == "writev":
+ # More spurious inconsistency.
+ text = text.replace("@UPNAME@", "WRITE")
+ elif name == "readv":
+ text = text.replace("@UPNAME@", "READ")
+ else:
+ text = text.replace("@UPNAME@", name.upper())
+ for old, new in subs[name].items():
+ text = text.replace(old, new)
+ # TBD: reindent/reformat the result for maximum readability.
+ return text
+
+fop_subs = {}
+cbk_subs = {}
+
+for name, args in ops.items():
+
+ # Create the necessary substitution strings for fops.
+ arg_names = [ a[1] for a in args if a[0] == 'fop-arg']
+ arg_types = [ a[2] for a in args if a[0] == 'fop-arg']
+ cbk_types = [ a[2] for a in args if a[0] == 'cbk-arg']
+ fop_subs[name] = get_subs(arg_names, arg_types, cbk_types)
+
+ # Same thing for callbacks.
+ arg_names = [ a[1] for a in args if a[0] == 'cbk-arg']
+ arg_types = [ a[2] for a in args if a[0] == 'cbk-arg']
+ cbk_subs[name] = get_subs(arg_names, arg_types)
+
+ # Callers can add other subs to these tables, or even create their
+ # own tables, using these same techniques, and then pass the result
+ # to generate() which would Do The Right Thing with them.
diff --git a/libglusterfs/src/gf-dirent.c b/libglusterfs/src/gf-dirent.c
index bb028c9671d..a809efc97ef 100644
--- a/libglusterfs/src/gf-dirent.c
+++ b/libglusterfs/src/gf-dirent.c
@@ -8,63 +8,232 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
#include <stdio.h>
#include <string.h>
#include <stdint.h>
-#include "compat.h"
-#include "xlator.h"
+#include "glusterfs/compat.h"
+#include "glusterfs/syncop.h"
+
+#define ONE 1ULL
+#define PRESENT_D_OFF_BITS 63
+#define BACKEND_D_OFF_BITS 63
+#define TOP_BIT (ONE << (PRESENT_D_OFF_BITS - 1))
+#define MASK (~0ULL)
+#define SHIFT_BITS (max(0, (BACKEND_D_OFF_BITS - PRESENT_D_OFF_BITS + 1)))
+#define PRESENT_MASK (MASK >> (64 - PRESENT_D_OFF_BITS))
+
+static uint64_t
+bits_for(uint64_t num)
+{
+ uint64_t bits = 0, ctrl = 1;
+
+ while (ctrl < num) {
+ ctrl *= 2;
+ bits++;
+ }
+
+ return bits;
+}
+
+int
+gf_deitransform(xlator_t *this, uint64_t offset)
+{
+ int cnt = 0;
+ int max = 0;
+ int max_bits = 0;
+ uint64_t off_mask = 0;
+ uint64_t host_mask = 0;
+
+ max = glusterfs_get_leaf_count(this->graph);
+
+ if (max == 1) {
+ cnt = 0;
+ goto out;
+ }
+
+ if (offset & TOP_BIT) {
+ /* HUGE d_off */
+ max_bits = bits_for(max);
+ off_mask = (MASK << max_bits);
+ host_mask = ~(off_mask);
+
+ cnt = offset & host_mask;
+ } else {
+ /* small d_off */
+ cnt = offset % max;
+ }
+out:
+ return cnt;
+}
+
+uint64_t
+gf_dirent_orig_offset(xlator_t *this, uint64_t offset)
+{
+ int max = 0;
+ int max_bits = 0;
+ uint64_t off_mask = 0;
+ uint64_t orig_offset;
+
+ max = glusterfs_get_leaf_count(this->graph);
+
+ if (max == 1) {
+ orig_offset = offset;
+ goto out;
+ }
+
+ if (offset & TOP_BIT) {
+ /* HUGE d_off */
+ max_bits = bits_for(max);
+ off_mask = (MASK << max_bits);
+ orig_offset = ((offset & ~TOP_BIT) & off_mask) << SHIFT_BITS;
+ } else {
+ /* small d_off */
+ orig_offset = offset / max;
+ }
+out:
+ return orig_offset;
+}
+
+int
+gf_itransform(xlator_t *this, uint64_t x, uint64_t *y_p, int client_id)
+{
+ int max = 0;
+ uint64_t y = 0;
+ uint64_t hi_mask = 0;
+ uint64_t off_mask = 0;
+ int max_bits = 0;
+
+ if (x == ((uint64_t)-1)) {
+ y = (uint64_t)-1;
+ goto out;
+ }
+
+ if (!x) {
+ y = 0;
+ goto out;
+ }
+
+ max = glusterfs_get_leaf_count(this->graph);
+
+ if (max == 1) {
+ y = x;
+ goto out;
+ }
+
+ max_bits = bits_for(max);
+
+ hi_mask = ~(PRESENT_MASK >> (max_bits + 1));
+
+ if (x & hi_mask) {
+ /* HUGE d_off */
+ off_mask = MASK << max_bits;
+ y = TOP_BIT | ((x >> SHIFT_BITS) & off_mask) | client_id;
+ } else {
+ /* small d_off */
+ y = ((x * max) + client_id);
+ }
+
+out:
+ if (y_p)
+ *y_p = y;
+
+ return 0;
+}
gf_dirent_t *
-gf_dirent_for_name (const char *name)
+gf_dirent_for_name(const char *name)
{
- gf_dirent_t *gf_dirent = NULL;
+ gf_dirent_t *gf_dirent = NULL;
- /* TODO: use mem-pool */
- gf_dirent = GF_CALLOC (gf_dirent_size (name), 1,
- gf_common_mt_gf_dirent_t);
- if (!gf_dirent)
- return NULL;
+ /* TODO: use mem-pool */
+ gf_dirent = GF_CALLOC(gf_dirent_size(name), 1, gf_common_mt_gf_dirent_t);
+ if (!gf_dirent)
+ return NULL;
- INIT_LIST_HEAD (&gf_dirent->list);
- strcpy (gf_dirent->d_name, name);
+ INIT_LIST_HEAD(&gf_dirent->list);
+ strcpy(gf_dirent->d_name, name);
- gf_dirent->d_off = 0;
- gf_dirent->d_ino = -1;
- gf_dirent->d_type = 0;
- gf_dirent->d_len = strlen (name);
+ gf_dirent->d_off = 0;
+ gf_dirent->d_ino = -1;
+ gf_dirent->d_type = 0;
+ gf_dirent->d_len = strlen(name);
- return gf_dirent;
+ return gf_dirent;
}
+void
+gf_dirent_entry_free(gf_dirent_t *entry)
+{
+ if (!entry)
+ return;
+
+ if (entry->dict)
+ dict_unref(entry->dict);
+ if (entry->inode)
+ inode_unref(entry->inode);
+
+ list_del_init(&entry->list);
+ GF_FREE(entry);
+}
void
-gf_dirent_free (gf_dirent_t *entries)
+gf_dirent_free(gf_dirent_t *entries)
{
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+
+ if (!entries)
+ return;
+
+ if (list_empty(&entries->list))
+ return;
+
+ list_for_each_entry_safe(entry, tmp, &entries->list, list)
+ {
+ gf_dirent_entry_free(entry);
+ }
+}
+
+gf_dirent_t *
+entry_copy(gf_dirent_t *source)
+{
+ gf_dirent_t *sink = NULL;
+
+ sink = gf_dirent_for_name(source->d_name);
+ if (!sink)
+ return NULL;
- if (!entries)
- return;
+ sink->d_off = source->d_off;
+ sink->d_ino = source->d_ino;
+ sink->d_type = source->d_type;
+ sink->d_stat = source->d_stat;
+ sink->d_len = source->d_len;
- if (list_empty (&entries->list))
- return;
+ if (source->inode)
+ sink->inode = inode_ref(source->inode);
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if (entry->dict)
- dict_unref (entry->dict);
- if (entry->inode)
- inode_unref (entry->inode);
+ if (source->dict)
+ sink->dict = dict_ref(source->dict);
+ return sink;
+}
+
+void
+gf_link_inode_from_dirent(xlator_t *this, inode_t *parent, gf_dirent_t *entry)
+{
+ inode_t *link_inode = NULL;
+ inode_t *tmp = NULL;
- list_del (&entry->list);
- GF_FREE (entry);
- }
+ if (!entry->inode)
+ return;
+ link_inode = inode_link(entry->inode, parent, entry->d_name,
+ &entry->d_stat);
+ if (!link_inode)
+ return;
+
+ inode_lookup(link_inode);
+ tmp = entry->inode;
+ entry->inode = link_inode;
+ inode_unref(tmp);
}
/* TODO: Currently, with this function, we will be breaking the
@@ -73,20 +242,60 @@ gf_dirent_free (gf_dirent_t *entries)
Need more thoughts before finalizing this function
*/
int
-gf_link_inodes_from_dirent (xlator_t *this, inode_t *parent,
- gf_dirent_t *entries)
+gf_link_inodes_from_dirent(xlator_t *this, inode_t *parent,
+ gf_dirent_t *entries)
+{
+ gf_dirent_t *entry = NULL;
+
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ gf_link_inode_from_dirent(this, parent, entry);
+ }
+
+ return 0;
+}
+
+int
+gf_fill_iatt_for_dirent(gf_dirent_t *entry, inode_t *parent, xlator_t *subvol)
{
- gf_dirent_t *entry = NULL;
- inode_t *link_inode = NULL;
-
- list_for_each_entry (entry, &entries->list, list) {
- if (entry->inode) {
- link_inode = inode_link (entry->inode, parent,
- entry->d_name, &entry->d_stat);
- inode_lookup (link_inode);
- inode_unref (link_inode);
- }
- }
-
- return 0;
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ char *path = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+
+ loc.inode = inode_grep(parent->table, parent, entry->d_name);
+ if (!loc.inode) {
+ loc.inode = inode_new(parent->table);
+ gf_uuid_copy(loc.inode->gfid, entry->d_stat.ia_gfid);
+ }
+
+ gf_uuid_copy(loc.pargfid, parent->gfid);
+ loc.name = entry->d_name;
+ loc.parent = inode_ref(parent);
+ ret = inode_path(loc.parent, entry->d_name, &path);
+ loc.path = path;
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_lookup(subvol, &loc, &iatt, NULL, NULL, NULL);
+ if (ret)
+ goto out;
+
+ entry->d_stat = iatt;
+ entry->inode = inode_ref(loc.inode);
+ /* We don't need to link inode here, because as part of readdirp_cbk
+ * we will link all dirents.
+ *
+ * Since we did a proper lookup, we don't need to set need_lookup
+ * flag.
+ */
+
+ ret = 0;
+out:
+ loc_wipe(&loc);
+ return ret;
}
diff --git a/libglusterfs/src/gf-dirent.h b/libglusterfs/src/gf-dirent.h
deleted file mode 100644
index 36a5a629cf2..00000000000
--- a/libglusterfs/src/gf-dirent.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-
-#ifndef _GF_DIRENT_H
-#define _GF_DIRENT_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "iatt.h"
-#include "inode.h"
-
-#define gf_dirent_size(name) (sizeof (gf_dirent_t) + strlen (name) + 1)
-
-struct _dir_entry_t {
- struct _dir_entry_t *next;
- char *name;
- char *link;
- struct iatt buf;
-};
-
-
-struct _gf_dirent_t {
- union {
- struct list_head list;
- struct {
- struct _gf_dirent_t *next;
- struct _gf_dirent_t *prev;
- };
- };
- uint64_t d_ino;
- uint64_t d_off;
- uint32_t d_len;
- uint32_t d_type;
- struct iatt d_stat;
- dict_t *dict;
- inode_t *inode;
- char d_name[0];
-};
-
-
-gf_dirent_t *gf_dirent_for_name (const char *name);
-void gf_dirent_free (gf_dirent_t *entries);
-int gf_link_inodes_from_dirent (xlator_t *this, inode_t *parent,
- gf_dirent_t *entries);
-
-#endif /* _GF_DIRENT_H */
diff --git a/libglusterfs/src/gidcache.c b/libglusterfs/src/gidcache.c
new file mode 100644
index 00000000000..64a93802f76
--- /dev/null
+++ b/libglusterfs/src/gidcache.c
@@ -0,0 +1,211 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/gidcache.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/common-utils.h"
+
+/*
+ * We treat this as a very simple set-associative LRU cache, with entries aged
+ * out after a configurable interval. Hardly rocket science, but lots of
+ * details to worry about.
+ */
+#define BUCKET_START(p, n) ((p) + ((n)*AUX_GID_CACHE_ASSOC))
+
+/*
+ * Initialize the cache.
+ */
+int
+gid_cache_init(gid_cache_t *cache, uint32_t timeout)
+{
+ if (!cache)
+ return -1;
+
+ LOCK_INIT(&cache->gc_lock);
+ cache->gc_max_age = timeout;
+ cache->gc_nbuckets = AUX_GID_CACHE_BUCKETS;
+ memset(cache->gc_cache, 0, sizeof(gid_list_t) * AUX_GID_CACHE_SIZE);
+
+ return 0;
+}
+
+/*
+ * Reconfigure the cache timeout.
+ */
+int
+gid_cache_reconf(gid_cache_t *cache, uint32_t timeout)
+{
+ if (!cache)
+ return -1;
+
+ LOCK(&cache->gc_lock);
+ cache->gc_max_age = timeout;
+ UNLOCK(&cache->gc_lock);
+
+ return 0;
+}
+
+/*
+ * Look up an ID in the cache. If found, return the actual cache entry to avoid
+ * an additional allocation and memory copy. The caller should copy the data and
+ * release (unlock) the cache as soon as possible.
+ */
+const gid_list_t *
+gid_cache_lookup(gid_cache_t *cache, uint64_t id, uint64_t uid, uint64_t gid)
+{
+ int bucket;
+ int i;
+ time_t now;
+ const gid_list_t *agl;
+
+ now = gf_time();
+ LOCK(&cache->gc_lock);
+ bucket = id % cache->gc_nbuckets;
+ agl = BUCKET_START(cache->gc_cache, bucket);
+ for (i = 0; i < AUX_GID_CACHE_ASSOC; i++, agl++) {
+ if (!agl->gl_list)
+ continue;
+ if (agl->gl_id != id)
+ continue;
+
+ /*
+ @uid and @gid reflect the latest UID/GID of the
+ process performing the syscall (taken from frame->root).
+
+ If the UID and GID has changed for the PID since the
+ time we cached it, we should treat the cache as having
+ stale values and query them freshly.
+ */
+ if (agl->gl_uid != uid || agl->gl_gid != gid)
+ break;
+
+ /*
+ * We don't put new entries in the cache when expiration=0, but
+ * there might be entries still in there if expiration was
+ * changed very recently. Writing the check this way ensures
+ * that they're not used.
+ */
+ if (now < agl->gl_deadline) {
+ return agl;
+ }
+
+ /*
+ * We're not going to find any more UID matches, and reaping
+ * is handled further down to maintain LRU order.
+ */
+ break;
+ }
+ UNLOCK(&cache->gc_lock);
+ return NULL;
+}
+
+/*
+ * Release an entry found via lookup.
+ */
+void
+gid_cache_release(gid_cache_t *cache, const gid_list_t *agl)
+{
+ UNLOCK(&cache->gc_lock);
+}
+
+/*
+ * Add a new list entry to the cache. If an entry for this ID already exists,
+ * update it.
+ */
+int
+gid_cache_add(gid_cache_t *cache, gid_list_t *gl)
+{
+ gid_list_t *agl;
+ int bucket;
+ int i;
+ time_t now;
+
+ if (!gl || !gl->gl_list)
+ return -1;
+
+ if (!cache->gc_max_age)
+ return 0;
+
+ now = gf_time();
+ LOCK(&cache->gc_lock);
+
+ /*
+ * Scan for the first free entry or one that matches this id. The id
+ * check is added to address a bug where the cache might contain an
+ * expired entry for this id. Since lookup occurs in LRU order and
+ * does not reclaim entries, it will always return failure on discovery
+ * of an expired entry. This leads to duplicate entries being added,
+ * which still do not satisfy lookups until the expired entry (and
+ * everything before it) is reclaimed.
+ *
+ * We address this through reuse of an entry already allocated to this
+ * id, whether expired or not, since we have obviously already received
+ * more recent data. The entry is repopulated with the new data and a new
+ * deadline and is pushed forward to reside as the last populated entry in
+ * the bucket.
+ */
+ bucket = gl->gl_id % cache->gc_nbuckets;
+ agl = BUCKET_START(cache->gc_cache, bucket);
+ for (i = 0; i < AUX_GID_CACHE_ASSOC; ++i, ++agl) {
+ if (agl->gl_id == gl->gl_id)
+ break;
+ if (!agl->gl_list)
+ break;
+ }
+
+ /*
+ * The way we allocate free entries naturally places the newest
+ * ones at the highest indices, so evicting the lowest makes
+ * sense, but that also means we can't just replace it with the
+ * one that caused the eviction. That would cause us to thrash
+ * the first entry while others remain idle. Therefore, we
+ * need to slide the other entries down and add the new one at
+ * the end just as if the *last* slot had been free.
+ *
+ * Deadline expiration is also handled here, since the oldest
+ * expired entry will be in the first position. This does mean
+ * the bucket can stay full of expired entries if we're idle
+ * but, if the small amount of extra memory or scan time before
+ * we decide to evict someone ever become issues, we could
+ * easily add a reaper thread.
+ */
+
+ if (i >= AUX_GID_CACHE_ASSOC) {
+ /* cache full, evict the first (LRU) entry */
+ i = 0;
+ agl = BUCKET_START(cache->gc_cache, bucket);
+ GF_FREE(agl->gl_list);
+ } else if (agl->gl_list) {
+ /* evict the old entry we plan to reuse */
+ GF_FREE(agl->gl_list);
+ }
+
+ /*
+ * If we have evicted an entry, slide the subsequent populated entries
+ * back and populate the last entry.
+ */
+ for (; i < AUX_GID_CACHE_ASSOC - 1; i++) {
+ if (!agl[1].gl_list)
+ break;
+ agl[0] = agl[1];
+ agl++;
+ }
+
+ agl->gl_id = gl->gl_id;
+ agl->gl_uid = gl->gl_uid;
+ agl->gl_gid = gl->gl_gid;
+ agl->gl_count = gl->gl_count;
+ agl->gl_list = gl->gl_list;
+ agl->gl_deadline = now + cache->gc_max_age;
+
+ UNLOCK(&cache->gc_lock);
+
+ return 1;
+}
diff --git a/libglusterfs/src/globals.c b/libglusterfs/src/globals.c
index 11f62a5501f..ae06f8be386 100644
--- a/libglusterfs/src/globals.c
+++ b/libglusterfs/src/globals.c
@@ -8,392 +8,359 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif /* !_CONFIG_H */
-
#include <pthread.h>
-#include "glusterfs.h"
-#include "globals.h"
-#include "xlator.h"
-#include "mem-pool.h"
+#include "glusterfs/syncop.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+const char *gf_fop_list[GF_FOP_MAXVALUE] = {
+ [GF_FOP_NULL] = "NULL",
+ [GF_FOP_STAT] = "STAT",
+ [GF_FOP_READLINK] = "READLINK",
+ [GF_FOP_MKNOD] = "MKNOD",
+ [GF_FOP_MKDIR] = "MKDIR",
+ [GF_FOP_UNLINK] = "UNLINK",
+ [GF_FOP_RMDIR] = "RMDIR",
+ [GF_FOP_SYMLINK] = "SYMLINK",
+ [GF_FOP_RENAME] = "RENAME",
+ [GF_FOP_LINK] = "LINK",
+ [GF_FOP_TRUNCATE] = "TRUNCATE",
+ [GF_FOP_OPEN] = "OPEN",
+ [GF_FOP_READ] = "READ",
+ [GF_FOP_WRITE] = "WRITE",
+ [GF_FOP_STATFS] = "STATFS",
+ [GF_FOP_FLUSH] = "FLUSH",
+ [GF_FOP_FSYNC] = "FSYNC",
+ [GF_FOP_SETXATTR] = "SETXATTR",
+ [GF_FOP_GETXATTR] = "GETXATTR",
+ [GF_FOP_REMOVEXATTR] = "REMOVEXATTR",
+ [GF_FOP_OPENDIR] = "OPENDIR",
+ [GF_FOP_FSYNCDIR] = "FSYNCDIR",
+ [GF_FOP_ACCESS] = "ACCESS",
+ [GF_FOP_CREATE] = "CREATE",
+ [GF_FOP_FTRUNCATE] = "FTRUNCATE",
+ [GF_FOP_FSTAT] = "FSTAT",
+ [GF_FOP_LK] = "LK",
+ [GF_FOP_LOOKUP] = "LOOKUP",
+ [GF_FOP_READDIR] = "READDIR",
+ [GF_FOP_INODELK] = "INODELK",
+ [GF_FOP_FINODELK] = "FINODELK",
+ [GF_FOP_ENTRYLK] = "ENTRYLK",
+ [GF_FOP_FENTRYLK] = "FENTRYLK",
+ [GF_FOP_XATTROP] = "XATTROP",
+ [GF_FOP_FXATTROP] = "FXATTROP",
+ [GF_FOP_FSETXATTR] = "FSETXATTR",
+ [GF_FOP_FGETXATTR] = "FGETXATTR",
+ [GF_FOP_RCHECKSUM] = "RCHECKSUM",
+ [GF_FOP_SETATTR] = "SETATTR",
+ [GF_FOP_FSETATTR] = "FSETATTR",
+ [GF_FOP_READDIRP] = "READDIRP",
+ [GF_FOP_GETSPEC] = "GETSPEC",
+ [GF_FOP_FORGET] = "FORGET",
+ [GF_FOP_RELEASE] = "RELEASE",
+ [GF_FOP_RELEASEDIR] = "RELEASEDIR",
+ [GF_FOP_FREMOVEXATTR] = "FREMOVEXATTR",
+ [GF_FOP_FALLOCATE] = "FALLOCATE",
+ [GF_FOP_DISCARD] = "DISCARD",
+ [GF_FOP_ZEROFILL] = "ZEROFILL",
+ [GF_FOP_IPC] = "IPC",
+ [GF_FOP_SEEK] = "SEEK",
+ [GF_FOP_LEASE] = "LEASE",
+ [GF_FOP_COMPOUND] = "COMPOUND",
+ [GF_FOP_GETACTIVELK] = "GETACTIVELK",
+ [GF_FOP_SETACTIVELK] = "SETACTIVELK",
+ [GF_FOP_PUT] = "PUT",
+ [GF_FOP_ICREATE] = "ICREATE",
+ [GF_FOP_NAMELINK] = "NAMELINK",
+ [GF_FOP_COPY_FILE_RANGE] = "COPY_FILE_RANGE",
+};
+const char *gf_upcall_list[GF_UPCALL_FLAGS_MAXVALUE] = {
+ [GF_UPCALL_NULL] = "NULL",
+ [GF_UPCALL] = "UPCALL",
+ [GF_UPCALL_CI_STAT] = "CI_IATT",
+ [GF_UPCALL_CI_XATTR] = "CI_XATTR",
+ [GF_UPCALL_CI_RENAME] = "CI_RENAME",
+ [GF_UPCALL_CI_NLINK] = "CI_UNLINK",
+ [GF_UPCALL_CI_FORGET] = "CI_FORGET",
+ [GF_UPCALL_LEASE_RECALL] = "LEASE_RECALL",
+};
+
+/* THIS */
-/* gf_*_list[] */
+/* This global ctx is a bad hack to prevent some of the libgfapi crashes.
+ * This should be removed once the patch on resource pool is accepted
+ */
+glusterfs_ctx_t *global_ctx = NULL;
+pthread_mutex_t global_ctx_mutex = PTHREAD_MUTEX_INITIALIZER;
+xlator_t global_xlator;
+static int gf_global_mem_acct_enable = 1;
+static pthread_once_t globals_inited = PTHREAD_ONCE_INIT;
-char *gf_fop_list[GF_FOP_MAXVALUE];
-char *gf_mgmt_list[GF_MGMT_MAXVALUE];
+static pthread_key_t free_key;
+static __thread xlator_t *thread_xlator = NULL;
+static __thread void *thread_synctask = NULL;
+static __thread void *thread_leaseid = NULL;
+static __thread struct syncopctx thread_syncopctx = {};
+static __thread char thread_uuid_buf[GF_UUID_BUF_SIZE] = {};
+static __thread char thread_lkowner_buf[GF_LKOWNER_BUF_SIZE] = {};
+static __thread char thread_leaseid_buf[GF_LEASE_ID_BUF_SIZE] = {};
-void
-gf_op_list_init()
+int
+gf_global_mem_acct_enable_get(void)
{
- gf_fop_list[GF_FOP_NULL] = "NULL";
- gf_fop_list[GF_FOP_STAT] = "STAT";
- gf_fop_list[GF_FOP_READLINK] = "READLINK";
- gf_fop_list[GF_FOP_MKNOD] = "MKNOD";
- gf_fop_list[GF_FOP_MKDIR] = "MKDIR";
- gf_fop_list[GF_FOP_UNLINK] = "UNLINK";
- gf_fop_list[GF_FOP_RMDIR] = "RMDIR";
- gf_fop_list[GF_FOP_SYMLINK] = "SYMLINK";
- gf_fop_list[GF_FOP_RENAME] = "RENAME";
- gf_fop_list[GF_FOP_LINK] = "LINK";
- gf_fop_list[GF_FOP_TRUNCATE] = "TRUNCATE";
- gf_fop_list[GF_FOP_OPEN] = "OPEN";
- gf_fop_list[GF_FOP_READ] = "READ";
- gf_fop_list[GF_FOP_WRITE] = "WRITE";
- gf_fop_list[GF_FOP_STATFS] = "STATFS";
- gf_fop_list[GF_FOP_FLUSH] = "FLUSH";
- gf_fop_list[GF_FOP_FSYNC] = "FSYNC";
- gf_fop_list[GF_FOP_SETXATTR] = "SETXATTR";
- gf_fop_list[GF_FOP_GETXATTR] = "GETXATTR";
- gf_fop_list[GF_FOP_REMOVEXATTR] = "REMOVEXATTR";
- gf_fop_list[GF_FOP_OPENDIR] = "OPENDIR";
- gf_fop_list[GF_FOP_FSYNCDIR] = "FSYNCDIR";
- gf_fop_list[GF_FOP_ACCESS] = "ACCESS";
- gf_fop_list[GF_FOP_CREATE] = "CREATE";
- gf_fop_list[GF_FOP_FTRUNCATE] = "FTRUNCATE";
- gf_fop_list[GF_FOP_FSTAT] = "FSTAT";
- gf_fop_list[GF_FOP_LK] = "LK";
- gf_fop_list[GF_FOP_LOOKUP] = "LOOKUP";
- gf_fop_list[GF_FOP_READDIR] = "READDIR";
- gf_fop_list[GF_FOP_INODELK] = "INODELK";
- gf_fop_list[GF_FOP_FINODELK] = "FINODELK";
- gf_fop_list[GF_FOP_ENTRYLK] = "ENTRYLK";
- gf_fop_list[GF_FOP_FENTRYLK] = "FENTRYLK";
- gf_fop_list[GF_FOP_XATTROP] = "XATTROP";
- gf_fop_list[GF_FOP_FXATTROP] = "FXATTROP";
- gf_fop_list[GF_FOP_FSETXATTR] = "FSETXATTR";
- gf_fop_list[GF_FOP_FGETXATTR] = "FGETXATTR";
- gf_fop_list[GF_FOP_RCHECKSUM] = "RCHECKSUM";
- gf_fop_list[GF_FOP_SETATTR] = "SETATTR";
- gf_fop_list[GF_FOP_FSETATTR] = "FSETATTR";
- gf_fop_list[GF_FOP_READDIRP] = "READDIRP";
- gf_fop_list[GF_FOP_GETSPEC] = "GETSPEC";
- gf_fop_list[GF_FOP_FORGET] = "FORGET";
- gf_fop_list[GF_FOP_RELEASE] = "RELEASE";
- gf_fop_list[GF_FOP_RELEASEDIR] = "RELEASEDIR";
-
- gf_fop_list[GF_MGMT_NULL] = "NULL";
- return;
+ return gf_global_mem_acct_enable;
}
+int
+gf_global_mem_acct_enable_set(int val)
+{
+ gf_global_mem_acct_enable = val;
+ return 0;
+}
-/* CTX */
-static glusterfs_ctx_t *glusterfs_ctx;
+static struct xlator_cbks global_cbks = {
+ .forget = NULL,
+ .release = NULL,
+ .releasedir = NULL,
+ .invalidate = NULL,
+ .client_destroy = NULL,
+ .client_disconnect = NULL,
+ .ictxmerge = NULL,
+ .ictxsize = NULL,
+ .fdctxsize = NULL,
+};
+/* This is required to get through the check in graph.c */
+static struct xlator_fops global_fops = {};
-int
-glusterfs_ctx_init ()
+static int
+global_xl_reconfigure(xlator_t *this, dict_t *options)
{
- int ret = 0;
+ int ret = -1;
+ gf_boolean_t bool_opt = _gf_false;
- if (glusterfs_ctx) {
- gf_log_callingfn ("", GF_LOG_WARNING, "init called again");
- goto out;
- }
+ /* This is not added in volume dump, hence adding the options in log
+ would be helpful for debugging later */
+ dict_dump_to_log(options);
- glusterfs_ctx = CALLOC (1, sizeof (*glusterfs_ctx));
- if (!glusterfs_ctx) {
- ret = -1;
- goto out;
- }
+ GF_OPTION_RECONF("measure-latency", bool_opt, options, bool, out);
+ this->ctx->measure_latency = bool_opt;
- INIT_LIST_HEAD (&glusterfs_ctx->graphs);
- INIT_LIST_HEAD (&glusterfs_ctx->mempool_list);
- ret = pthread_mutex_init (&glusterfs_ctx->lock, NULL);
+ GF_OPTION_RECONF("metrics-dump-path", this->ctx->config.metrics_dumppath,
+ options, str, out);
+ /* TODO: add more things here */
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
-glusterfs_ctx_t *
-glusterfs_ctx_get ()
+static int
+global_xl_init(xlator_t *this)
{
- return glusterfs_ctx;
-
-}
+ int ret = -1;
+ gf_boolean_t bool_opt = false;
+ GF_OPTION_INIT("measure-latency", bool_opt, bool, out);
+ this->ctx->measure_latency = bool_opt;
-/* THIS */
+ GF_OPTION_INIT("metrics-dump-path", this->ctx->config.metrics_dumppath, str,
+ out);
-xlator_t global_xlator;
-static pthread_key_t this_xlator_key;
+ ret = 0;
-void
-glusterfs_this_destroy (void *ptr)
-{
- if (ptr)
- FREE (ptr);
+out:
+ return ret;
}
-
-int
-glusterfs_this_init ()
+static void
+global_xl_fini(xlator_t *this)
{
- int ret = 0;
-
- ret = pthread_key_create (&this_xlator_key, glusterfs_this_destroy);
- if (ret != 0) {
- gf_log ("", GF_LOG_WARNING, "failed to create the pthread key");
- return ret;
- }
+ return;
+}
- global_xlator.name = "glusterfs";
- global_xlator.type = "global";
- global_xlator.ctx = glusterfs_ctx;
+struct volume_options global_xl_options[] = {
+ {.key = {"measure-latency"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"global", "context"},
+ .description = "Use this option to toggle measuring latency"},
+ {.key = {"metrics-dump-path"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "{{gluster_workdir}}/metrics",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"global", "context"},
+ .description = "Use this option to set the metrics dump path"},
+
+ {
+ .key = {NULL},
+ },
+};
- INIT_LIST_HEAD (&global_xlator.volume_options);
+static volume_opt_list_t global_xl_opt_list;
- return ret;
+void
+glusterfs_this_init()
+{
+ global_xlator.name = "glusterfs";
+ global_xlator.type = GF_GLOBAL_XLATOR_NAME;
+ global_xlator.cbks = &global_cbks;
+ global_xlator.fops = &global_fops;
+ global_xlator.reconfigure = global_xl_reconfigure;
+ global_xlator.init = global_xl_init;
+ global_xlator.fini = global_xl_fini;
+
+ INIT_LIST_HEAD(&global_xlator.volume_options);
+ INIT_LIST_HEAD(&global_xl_opt_list.list);
+ global_xl_opt_list.given_opt = global_xl_options;
+
+ list_add_tail(&global_xl_opt_list.list, &global_xlator.volume_options);
}
-
xlator_t **
-__glusterfs_this_location ()
+__glusterfs_this_location()
{
- xlator_t **this_location = NULL;
- int ret = 0;
-
- this_location = pthread_getspecific (this_xlator_key);
+ xlator_t **this_location;
- if (!this_location) {
- this_location = CALLOC (1, sizeof (*this_location));
- if (!this_location)
- goto out;
+ this_location = &thread_xlator;
+ if (*this_location == NULL) {
+ thread_xlator = &global_xlator;
+ }
- ret = pthread_setspecific (this_xlator_key, this_location);
- if (ret != 0) {
- gf_log ("", GF_LOG_WARNING, "pthread setspecific failed");
-
- FREE (this_location);
- this_location = NULL;
- goto out;
- }
- }
-out:
- if (this_location) {
- if (!*this_location)
- *this_location = &global_xlator;
- }
- return this_location;
+ return this_location;
}
-
xlator_t *
-glusterfs_this_get ()
+glusterfs_this_get()
{
- xlator_t **this_location = NULL;
-
- this_location = __glusterfs_this_location ();
- if (!this_location)
- return &global_xlator;
-
- return *this_location;
+ return *__glusterfs_this_location();
}
-
-int
-glusterfs_this_set (xlator_t *this)
+void
+glusterfs_this_set(xlator_t *this)
{
- xlator_t **this_location = NULL;
-
- this_location = __glusterfs_this_location ();
- if (!this_location)
- return -ENOMEM;
-
- *this_location = this;
-
- return 0;
+ thread_xlator = this;
}
-/* SYNCTASK */
-
-static pthread_key_t synctask_key;
+/* SYNCOPCTX */
-
-int
-synctask_init ()
+void *
+syncopctx_getctx()
{
- int ret = 0;
-
- ret = pthread_key_create (&synctask_key, NULL);
-
- return ret;
+ return &thread_syncopctx;
}
+/* SYNCTASK */
void *
-synctask_get ()
+synctask_get()
{
- void *synctask = NULL;
-
- synctask = pthread_getspecific (synctask_key);
-
- return synctask;
+ return thread_synctask;
}
-
-int
-synctask_set (void *synctask)
+void
+synctask_set(void *synctask)
{
- int ret = 0;
+ thread_synctask = synctask;
+}
- pthread_setspecific (synctask_key, synctask);
+// UUID_BUFFER
- return ret;
+char *
+glusterfs_uuid_buf_get()
+{
+ return thread_uuid_buf;
}
-//UUID_BUFFER
+/* LKOWNER_BUFFER */
-static pthread_key_t uuid_buf_key;
-static char global_uuid_buf[GF_UUID_BUF_SIZE];
-void
-glusterfs_uuid_buf_destroy (void *ptr)
+char *
+glusterfs_lkowner_buf_get()
{
- if (ptr)
- FREE (ptr);
+ return thread_lkowner_buf;
}
-int
-glusterfs_uuid_buf_init ()
+/* Leaseid buffer */
+
+char *
+glusterfs_leaseid_buf_get()
{
- int ret = 0;
+ char *buf = NULL;
- ret = pthread_key_create (&uuid_buf_key,
- glusterfs_uuid_buf_destroy);
- return ret;
+ buf = thread_leaseid;
+ if (buf == NULL) {
+ buf = thread_leaseid_buf;
+ thread_leaseid = buf;
+ }
+
+ return buf;
}
char *
-glusterfs_uuid_buf_get ()
+glusterfs_leaseid_exist()
{
- char *buf;
- int ret = 0;
-
- buf = pthread_getspecific (uuid_buf_key);
- if(!buf) {
- buf = MALLOC (GF_UUID_BUF_SIZE);
- ret = pthread_setspecific (uuid_buf_key, (void *) buf);
- if(ret)
- buf = global_uuid_buf;
- }
- return buf;
+ return thread_leaseid;
}
-/* LKOWNER_BUFFER */
+static void
+glusterfs_cleanup(void *ptr)
+{
+ if (thread_syncopctx.groups != NULL) {
+ GF_FREE(thread_syncopctx.groups);
+ }
+
+ mem_pool_thread_destructor(NULL);
+}
-static pthread_key_t lkowner_buf_key;
-static char global_lkowner_buf[GF_LKOWNER_BUF_SIZE];
void
-glusterfs_lkowner_buf_destroy (void *ptr)
+gf_thread_needs_cleanup(void)
{
- if (ptr)
- FREE (ptr);
+ /* The value stored in free_key TLS is not really used for anything, but
+ * pthread implementation doesn't call the TLS destruction function unless
+ * it's != NULL. This function must be called whenever something is
+ * allocated for this thread so that glusterfs_cleanup() will be called
+ * and resources can be released. */
+ (void)pthread_setspecific(free_key, (void *)1);
}
-int
-glusterfs_lkowner_buf_init ()
+static void
+gf_globals_init_once()
{
- int ret = 0;
+ int ret = 0;
- ret = pthread_key_create (&lkowner_buf_key,
- glusterfs_lkowner_buf_destroy);
- return ret;
-}
+ glusterfs_this_init();
-char *
-glusterfs_lkowner_buf_get ()
-{
- char *buf;
- int ret = 0;
-
- buf = pthread_getspecific (lkowner_buf_key);
- if(!buf) {
- buf = MALLOC (GF_LKOWNER_BUF_SIZE);
- ret = pthread_setspecific (lkowner_buf_key, (void *) buf);
- if(ret)
- buf = global_lkowner_buf;
- }
- return buf;
+ /* This is needed only to cleanup the potential allocation of
+ * thread_syncopctx.groups. */
+ ret = pthread_key_create(&free_key, glusterfs_cleanup);
+ if (ret != 0) {
+ gf_msg("", GF_LOG_ERROR, ret, LG_MSG_PTHREAD_KEY_CREATE_FAILED,
+ "failed to create the pthread key");
+
+ gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_GLOBAL_INIT_FAILED,
+ "Exiting as global initialization failed");
+
+ exit(ret);
+ }
}
int
-glusterfs_globals_init ()
+glusterfs_globals_init(glusterfs_ctx_t *ctx)
{
- int ret = 0;
-
- gf_op_list_init ();
-
- gf_log_globals_init ();
-
- ret = glusterfs_ctx_init ();
- if (ret) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs context init failed");
- goto out;
- }
-
- ret = glusterfs_this_init ();
- if (ret) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs-translator init failed");
- goto out;
- }
-
- ret = glusterfs_uuid_buf_init ();
- if(ret) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs uuid buffer init failed");
- goto out;
- }
-
- ret = glusterfs_lkowner_buf_init ();
- if(ret) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs lkowner buffer init failed");
- goto out;
- }
-
- gf_mem_acct_enable_set ();
-
- ret = synctask_init ();
- if (ret) {
- gf_log ("", GF_LOG_CRITICAL,
- "ERROR: glusterfs synctask init failed");
- goto out;
- }
-out:
- return ret;
-}
+ int ret = 0;
+ gf_log_globals_init(ctx, GF_LOG_INFO);
-char eventstring[GF_EVENT_MAXVAL+1][64] = {
- "Invalid event",
- "Parent Up",
- "Poll In",
- "Poll Out",
- "Poll Err",
- "Child Up",
- "Child Down",
- "Child Connecting",
- "Child Modified",
- "Transport Cleanup",
- "Transport Connected",
- "Volfile Modified",
- "New Volfile",
- "Translator Info",
- "Xlator Op",
- "Authentication Failed",
- "Invalid event",
-};
+ ret = pthread_once(&globals_inited, gf_globals_init_once);
-/* Copy the string ptr contents if needed for yourself */
-char *
-glusterfs_strevent (glusterfs_event_t ev)
-{
- return eventstring[ev];
+ if (ret)
+ gf_msg("", GF_LOG_CRITICAL, ret, LG_MSG_PTHREAD_FAILED,
+ "pthread_once failed");
+
+ return ret;
}
diff --git a/libglusterfs/src/globals.h b/libglusterfs/src/globals.h
deleted file mode 100644
index e797db184bd..00000000000
--- a/libglusterfs/src/globals.h
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _GLOBALS_H
-#define _GLOBALS_H
-
-#define GF_DEFAULT_BASE_PORT 24007
-
-#include "glusterfs.h"
-
-/* CTX */
-#define CTX (glusterfs_ctx_get())
-
-glusterfs_ctx_t *glusterfs_ctx_get ();
-
-#include "xlator.h"
-
-/* THIS */
-#define THIS (*__glusterfs_this_location())
-
-#define GF_UUID_BUF_SIZE 50
-
-xlator_t **__glusterfs_this_location ();
-xlator_t *glusterfs_this_get ();
-int glusterfs_this_set (xlator_t *);
-
-/* task */
-void *synctask_get ();
-int synctask_set (void *);
-
-/* uuid_buf */
-char *glusterfs_uuid_buf_get();
-char *glusterfs_lkowner_buf_get();
-
-/* init */
-int glusterfs_globals_init (void);
-
-#endif /* !_GLOBALS_H */
diff --git a/libglusterfs/src/glusterfs.h b/libglusterfs/src/glusterfs.h
deleted file mode 100644
index 5acefe5613b..00000000000
--- a/libglusterfs/src/glusterfs.h
+++ /dev/null
@@ -1,429 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _GLUSTERFS_H
-#define _GLUSTERFS_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <netinet/in.h>
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <sys/statvfs.h>
-#include <netdb.h>
-#include <errno.h>
-#include <dirent.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <arpa/inet.h>
-#include <sys/poll.h>
-#include <pthread.h>
-
-#include "list.h"
-#include "logging.h"
-
-#define GF_YES 1
-#define GF_NO 0
-
-#ifndef O_LARGEFILE
-/* savannah bug #20053, patch for compiling on darwin */
-#define O_LARGEFILE 0
-#endif
-
-#ifndef O_DIRECT
-/* savannah bug #20050, #20052 */
-#define O_DIRECT 0 /* From asm/fcntl.h */
-#endif
-
-#ifndef O_DIRECTORY
-/* FreeBSD does not need O_DIRECTORY */
-#define O_DIRECTORY 0
-#endif
-
-#ifndef EBADFD
-/* Mac OS X does not have EBADFD */
-#define EBADFD EBADF
-#endif
-
-#ifndef FNM_EXTMATCH
-#define FNM_EXTMATCH 0
-#endif
-
-#define ZR_MOUNTPOINT_OPT "mountpoint"
-#define ZR_ATTR_TIMEOUT_OPT "attribute-timeout"
-#define ZR_ENTRY_TIMEOUT_OPT "entry-timeout"
-#define ZR_DIRECT_IO_OPT "direct-io-mode"
-#define ZR_STRICT_VOLFILE_CHECK "strict-volfile-check"
-#define ZR_DUMP_FUSE "dump-fuse"
-
-#define GF_XATTR_CLRLK_CMD "glusterfs.clrlk"
-#define GF_XATTR_PATHINFO_KEY "trusted.glusterfs.pathinfo"
-#define GF_XATTR_NODE_UUID_KEY "trusted.glusterfs.node-uuid"
-#define GF_XATTR_VOL_ID_KEY "trusted.glusterfs.volume-id"
-
-#define XATTR_IS_PATHINFO(x) (strncmp (x, GF_XATTR_PATHINFO_KEY, \
- strlen (GF_XATTR_PATHINFO_KEY)) == 0)
-#define XATTR_IS_NODE_UUID(x) (strncmp (x, GF_XATTR_NODE_UUID_KEY, \
- strlen (GF_XATTR_NODE_UUID_KEY)) == 0)
-
-#define GF_XATTR_LINKINFO_KEY "trusted.distribute.linkinfo"
-#define GFID_XATTR_KEY "trusted.gfid"
-
-#define GLUSTERFS_INTERNAL_FOP_KEY "glusterfs-internal-fop"
-
-#define ZR_FILE_CONTENT_STR "glusterfs.file."
-#define ZR_FILE_CONTENT_STRLEN 15
-
-#define GLUSTERFS_OPEN_FD_COUNT "glusterfs.open-fd-count"
-#define GLUSTERFS_INODELK_COUNT "glusterfs.inodelk-count"
-#define GLUSTERFS_ENTRYLK_COUNT "glusterfs.entrylk-count"
-#define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count"
-#define GLUSTERFS_PARENT_ENTRYLK "glusterfs.parent-entrylk"
-#define QUOTA_SIZE_KEY "trusted.glusterfs.quota.size"
-#define GFID_TO_PATH_KEY "glusterfs.gfid2path"
-
-/* Index xlator related */
-#define GF_XATTROP_INDEX_GFID "glusterfs.xattrop_index_gfid"
-
-#define GF_GFIDLESS_LOOKUP "gfidless-lookup"
-/* replace-brick and pump related internal xattrs */
-#define RB_PUMP_CMD_START "glusterfs.pump.start"
-#define RB_PUMP_CMD_PAUSE "glusterfs.pump.pause"
-#define RB_PUMP_CMD_COMMIT "glusterfs.pump.commit"
-#define RB_PUMP_CMD_ABORT "glusterfs.pump.abort"
-#define RB_PUMP_CMD_STATUS "glusterfs.pump.status"
-
-#define POSIX_ACL_DEFAULT_XATTR "system.posix_acl_default"
-#define POSIX_ACL_ACCESS_XATTR "system.posix_acl_access"
-
-#define GLUSTERFS_RDMA_INLINE_THRESHOLD (2048)
-#define GLUSTERFS_RDMA_MAX_HEADER_SIZE (228) /* (sizeof (rdma_header_t) \
- + RDMA_MAX_SEGMENTS \
- * sizeof (rdma_read_chunk_t))
- */
-
-#define GLUSTERFS_RPC_REPLY_SIZE 24
-
-#define ZR_FILE_CONTENT_REQUEST(key) (!strncmp(key, ZR_FILE_CONTENT_STR, \
- ZR_FILE_CONTENT_STRLEN))
-
-/* GlusterFS's maximum supported Auxilary GIDs */
-/* TODO: Keeping it to 200, so that we can fit in 2KB buffer for auth data
- * in RPC server code, if there is ever need for having more aux-gids, then
- * we have to add aux-gid in payload of actors */
-#define GF_MAX_AUX_GROUPS 200
-
-/* NOTE: add members ONLY at the end (just before _MAXVALUE) */
-typedef enum {
- GF_FOP_NULL = 0,
- GF_FOP_STAT,
- GF_FOP_READLINK,
- GF_FOP_MKNOD,
- GF_FOP_MKDIR,
- GF_FOP_UNLINK,
- GF_FOP_RMDIR,
- GF_FOP_SYMLINK,
- GF_FOP_RENAME,
- GF_FOP_LINK,
- GF_FOP_TRUNCATE,
- GF_FOP_OPEN,
- GF_FOP_READ,
- GF_FOP_WRITE,
- GF_FOP_STATFS,
- GF_FOP_FLUSH,
- GF_FOP_FSYNC, /* 15 */
- GF_FOP_SETXATTR,
- GF_FOP_GETXATTR,
- GF_FOP_REMOVEXATTR,
- GF_FOP_OPENDIR,
- GF_FOP_FSYNCDIR,
- GF_FOP_ACCESS,
- GF_FOP_CREATE,
- GF_FOP_FTRUNCATE,
- GF_FOP_FSTAT, /* 25 */
- GF_FOP_LK,
- GF_FOP_LOOKUP,
- GF_FOP_READDIR,
- GF_FOP_INODELK,
- GF_FOP_FINODELK,
- GF_FOP_ENTRYLK,
- GF_FOP_FENTRYLK,
- GF_FOP_XATTROP,
- GF_FOP_FXATTROP,
- GF_FOP_FGETXATTR,
- GF_FOP_FSETXATTR,
- GF_FOP_RCHECKSUM,
- GF_FOP_SETATTR,
- GF_FOP_FSETATTR,
- GF_FOP_READDIRP,
- GF_FOP_FORGET,
- GF_FOP_RELEASE,
- GF_FOP_RELEASEDIR,
- GF_FOP_GETSPEC,
- GF_FOP_FREMOVEXATTR,
- GF_FOP_MAXVALUE,
-} glusterfs_fop_t;
-
-
-typedef enum {
- GF_MGMT_NULL = 0,
- GF_MGMT_MAXVALUE,
-} glusterfs_mgmt_t;
-
-typedef enum {
- GF_OP_TYPE_NULL = 0,
- GF_OP_TYPE_FOP,
- GF_OP_TYPE_MGMT,
- GF_OP_TYPE_MAX,
-} gf_op_type_t;
-
-/* NOTE: all the miscellaneous flags used by GlusterFS should be listed here */
-typedef enum {
- GF_LK_GETLK = 0,
- GF_LK_SETLK,
- GF_LK_SETLKW,
- GF_LK_RESLK_LCK,
- GF_LK_RESLK_LCKW,
- GF_LK_RESLK_UNLCK,
- GF_LK_GETLK_FD,
-} glusterfs_lk_cmds_t;
-
-
-typedef enum {
- GF_LK_F_RDLCK = 0,
- GF_LK_F_WRLCK,
- GF_LK_F_UNLCK,
- GF_LK_EOL,
-} glusterfs_lk_types_t;
-
-typedef enum {
- F_RESLK_LCK = 200,
- F_RESLK_LCKW,
- F_RESLK_UNLCK,
- F_GETLK_FD,
-} glusterfs_lk_recovery_cmds_t;
-
-typedef enum {
- GF_LOCK_POSIX,
- GF_LOCK_INTERNAL
-} gf_lk_domain_t;
-
-
-typedef enum {
- ENTRYLK_LOCK,
- ENTRYLK_UNLOCK,
- ENTRYLK_LOCK_NB
-} entrylk_cmd;
-
-
-typedef enum {
- ENTRYLK_RDLCK,
- ENTRYLK_WRLCK
-} entrylk_type;
-
-
-typedef enum {
- GF_XATTROP_ADD_ARRAY,
- GF_XATTROP_ADD_ARRAY64
-} gf_xattrop_flags_t;
-
-
-#define GF_SET_IF_NOT_PRESENT 0x1 /* default behaviour */
-#define GF_SET_OVERWRITE 0x2 /* Overwrite with the buf given */
-#define GF_SET_DIR_ONLY 0x4
-#define GF_SET_EPOCH_TIME 0x8 /* used by afr dir lookup selfheal */
-
-/* key value which quick read uses to get small files in lookup cbk */
-#define GF_CONTENT_KEY "glusterfs.content"
-
-struct _xlator_cmdline_option {
- struct list_head cmd_args;
- char *volume;
- char *key;
- char *value;
-};
-typedef struct _xlator_cmdline_option xlator_cmdline_option_t;
-
-
-#define GF_OPTION_ENABLE _gf_true
-#define GF_OPTION_DISABLE _gf_false
-#define GF_OPTION_DEFERRED 2
-
-struct _cmd_args {
- /* basic options */
- char *volfile_server;
- char *volfile;
- char *log_server;
- gf_loglevel_t log_level;
- char *log_file;
- int32_t max_connect_attempts;
- /* advanced options */
- uint32_t volfile_server_port;
- char *volfile_server_transport;
- uint32_t log_server_port;
- char *pid_file;
- char *sock_file;
- int no_daemon_mode;
- char *run_id;
- int debug_mode;
- int read_only;
- int acl;
- int selinux;
- int worm;
- int mac_compat;
- struct list_head xlator_options; /* list of xlator_option_t */
-
- /* fuse options */
- int fuse_direct_io_mode;
- int volfile_check;
- double fuse_entry_timeout;
- double fuse_attribute_timeout;
- char *volume_name;
- int fuse_nodev;
- int fuse_nosuid;
- char *dump_fuse;
- pid_t client_pid;
- int client_pid_set;
- unsigned uid_map_root;
-
-
- /* key args */
- char *mount_point;
- char *volfile_id;
-
- /* required for portmap */
- int brick_port;
- char *brick_name;
- int brick_port2;
-};
-typedef struct _cmd_args cmd_args_t;
-
-
-struct _glusterfs_graph {
- struct list_head list;
- char graph_uuid[128];
- struct timeval dob;
- void *first;
- void *top; /* selected by -n */
- int xl_count;
- int id; /* Used in logging */
- int used; /* Should be set when fuse gets
- first CHILD_UP */
- uint32_t volfile_checksum;
-};
-typedef struct _glusterfs_graph glusterfs_graph_t;
-
-
-struct _glusterfs_ctx {
- cmd_args_t cmd_args;
- char *process_uuid;
- FILE *pidfp;
- char fin;
- void *timer;
- void *ib;
- void *pool;
- void *event_pool;
- void *iobuf_pool;
- pthread_mutex_t lock;
- size_t page_size;
- struct list_head graphs; /* double linked list of graphs - one per volfile parse */
- glusterfs_graph_t *active; /* the latest graph in use */
- void *master; /* fuse, or libglusterfsclient (however, not protocol/server) */
- void *mgmt; /* xlator implementing MOPs for centralized logging, volfile server */
- void *listener; /* listener of the commands from glusterd */
- unsigned char measure_latency; /* toggle switch for latency measurement */
- pthread_t sigwaiter;
- struct mem_pool *stub_mem_pool;
- unsigned char cleanup_started;
- int graph_id; /* Incremented per graph, value should
- indicate how many times the graph has
- got changed */
- pid_t mnt_pid; /* pid of the mount agent */
- int process_mode; /*mode in which process is runninng*/
- struct syncenv *env; /* The env pointer to the synctasks */
-
- struct list_head mempool_list; /* used to keep a global list of
- mempools, used to log details of
- mempool in statedump */
- char *statedump_path;
-
- struct mem_pool *dict_pool;
- struct mem_pool *dict_pair_pool;
- struct mem_pool *dict_data_pool;
-
- int mem_accounting; /* if value is other than 0, it
- will be set */
-};
-typedef struct _glusterfs_ctx glusterfs_ctx_t;
-
-
-/* If you edit this structure then, make a corresponding change in
- * globals.c in the eventstring.
- */
-typedef enum {
- GF_EVENT_PARENT_UP = 1,
- GF_EVENT_POLLIN,
- GF_EVENT_POLLOUT,
- GF_EVENT_POLLERR,
- GF_EVENT_CHILD_UP,
- GF_EVENT_CHILD_DOWN,
- GF_EVENT_CHILD_CONNECTING,
- GF_EVENT_CHILD_MODIFIED,
- GF_EVENT_TRANSPORT_CLEANUP,
- GF_EVENT_TRANSPORT_CONNECTED,
- GF_EVENT_VOLFILE_MODIFIED,
- GF_EVENT_GRAPH_NEW,
- GF_EVENT_TRANSLATOR_INFO,
- GF_EVENT_TRANSLATOR_OP,
- GF_EVENT_AUTH_FAILED,
- GF_EVENT_VOLUME_DEFRAG,
- GF_EVENT_PARENT_DOWN,
- GF_EVENT_MAXVAL,
-} glusterfs_event_t;
-
-/* gf_lkowner_t is defined in lkowner.h */
-#include "lkowner.h"
-
-struct gf_flock {
- short l_type;
- short l_whence;
- off_t l_start;
- off_t l_len;
- pid_t l_pid;
- gf_lkowner_t l_owner;
-};
-
-extern char *glusterfs_strevent (glusterfs_event_t ev);
-
-#define GF_MUST_CHECK __attribute__((warn_unused_result))
-/*
- * Some macros (e.g. ALLOC_OR_GOTO) set variables in function scope, but the
- * calling function might not only declare the variable to keep the macro happy
- * and not use it otherwise. In such cases, the following can be used to
- * suppress the "set but not used" warning that would otherwise occur.
- */
-#define GF_UNUSED __attribute__((unused))
-
-int glusterfs_graph_prepare (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx);
-int glusterfs_graph_destroy (glusterfs_graph_t *graph);
-int glusterfs_graph_activate (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx);
-glusterfs_graph_t *glusterfs_graph_construct (FILE *fp);
-glusterfs_graph_t *glusterfs_graph_new ();
-int glusterfs_graph_reconfigure (glusterfs_graph_t *oldgraph,
- glusterfs_graph_t *newgraph);
-
-#endif /* _GLUSTERFS_H */
diff --git a/libglusterfs/src/glusterfs/async.h b/libglusterfs/src/glusterfs/async.h
new file mode 100644
index 00000000000..d1d70ae0bc7
--- /dev/null
+++ b/libglusterfs/src/glusterfs/async.h
@@ -0,0 +1,209 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc <https://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __GLUSTERFS_ASYNC_H__
+#define __GLUSTERFS_ASYNC_H__
+
+#define _LGPL_SOURCE
+
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
+
+#ifdef URCU_OLD
+
+/* TODO: Fix the include paths. Since this is a .h included from many places
+ * it makes no sense to append a '-I$(CONTRIBDIR)/userspace-rcu/' to each
+ * Makefile.am. I've also seen some problems with CI builders (they
+ * failed to find the include files, but the same source on another setup
+ * is working fine). */
+#include "wfcqueue.h"
+#include "wfstack.h"
+
+#else /* !URCU_OLD */
+
+#include <urcu/wfcqueue.h>
+#include <urcu/wfstack.h>
+
+#endif /* URCU_OLD */
+
+#include "glusterfs/xlator.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/list.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+/* This is the name prefix that all worker threads will have. A number will
+ * be added to differentiate them. */
+#define GF_ASYNC_THREAD_NAME "tpw"
+
+/* This value determines the maximum number of threads that are allowed. */
+#define GF_ASYNC_MAX_THREADS 128
+
+/* This value determines how many additional threads will be started but will
+ * remain inactive until they are explicitly activated by the leader. This is
+ * useful to react faster to bursts of load, but at the same time we minimize
+ * contention if they are not really needed to handle current load.
+ *
+ * TODO: Instead of a fixed number, it would probably be better to use a
+ * prcentage of the available cores. */
+#define GF_ASYNC_SPARE_THREADS 2
+
+/* This value determines the signal used to wake the leader when new work has
+ * been added to the queue. To do so we reuse SIGALRM, since the most logical
+ * candidates (SIGUSR1/SIGUSR2) are already used. This signal must not be used
+ * by anything else in the process. */
+#define GF_ASYNC_SIGQUEUE SIGALRM
+
+/* This value determines the signal that will be used to transfer leader role
+ * to other workers. */
+#define GF_ASYNC_SIGCTRL SIGVTALRM
+
+#define gf_async_warning(_err, _msg, _args...) \
+ gf_msg("async", GF_LOG_WARNING, -(_err), LG_MSG_ASYNC_WARNING, _msg, \
+ ##_args)
+
+#define gf_async_error(_err, _msg, _args...) \
+ gf_msg("async", GF_LOG_ERROR, -(_err), LG_MSG_ASYNC_FAILURE, _msg, ##_args)
+
+#define gf_async_fatal(_err, _msg, _args...) \
+ do { \
+ GF_ABORT("Critical error in async module. Unable to continue. (" _msg \
+ "). Error %d.", \
+ ##_args, -(_err)); \
+ } while (0)
+
+struct _gf_async;
+typedef struct _gf_async gf_async_t;
+
+struct _gf_async_worker;
+typedef struct _gf_async_worker gf_async_worker_t;
+
+struct _gf_async_queue;
+typedef struct _gf_async_queue gf_async_queue_t;
+
+struct _gf_async_control;
+typedef struct _gf_async_control gf_async_control_t;
+
+typedef void (*gf_async_callback_f)(xlator_t *xl, gf_async_t *async);
+
+struct _gf_async {
+ /* TODO: remove dependency on xl/THIS. */
+ xlator_t *xl;
+ gf_async_callback_f cbk;
+ struct cds_wfcq_node queue;
+};
+
+struct _gf_async_worker {
+ /* Used to send asynchronous jobs related to the worker. */
+ gf_async_t async;
+
+ /* Member of the available workers stack. */
+ struct cds_wfs_node stack;
+
+ /* Thread object of the current worker. */
+ pthread_t thread;
+
+ /* Unique identifier of this worker. */
+ int32_t id;
+
+ /* Indicates if this worker is enabled. */
+ bool running;
+};
+
+struct _gf_async_queue {
+ /* Structures needed to manage a wait-free queue. For better performance
+ * they are placed in two different cache lines, as recommended by URCU
+ * documentation, even though in our case some threads will be producers
+ * and consumers at the same time. */
+ struct cds_wfcq_head head __attribute__((aligned(64)));
+ struct cds_wfcq_tail tail __attribute__((aligned(64)));
+};
+
+#define GF_ASYNC_COUNTS(_run, _stop) (((uint32_t)(_run) << 16) + (_stop))
+#define GF_ASYNC_COUNT_RUNNING(_count) ((_count) >> 16)
+#define GF_ASYNC_COUNT_STOPPING(_count) ((_count)&65535)
+
+struct _gf_async_control {
+ gf_async_queue_t queue;
+
+ /* Stack of unused workers. */
+ struct __cds_wfs_stack available;
+
+ /* Array of preallocated worker structures. */
+ gf_async_worker_t *table;
+
+ /* Used to synchronize main thread with workers on termination. */
+ pthread_barrier_t sync;
+
+ /* The id of the last thread that will be used for synchronization. */
+ pthread_t sync_thread;
+
+ /* Signal mask to wait for control signals from leader. */
+ sigset_t sigmask_ctrl;
+
+ /* Signal mask to wait for queued items. */
+ sigset_t sigmask_queue;
+
+ /* Saved signal handlers. */
+ struct sigaction handler_ctrl;
+ struct sigaction handler_queue;
+
+ /* PID of the current process. */
+ pid_t pid;
+
+ /* Maximum number of allowed threads. */
+ uint32_t max_threads;
+
+ /* Current number of running and stopping workers. This value is split
+ * into 2 16-bits fields to track both counters atomically at the same
+ * time. */
+ uint32_t counts;
+
+ /* It's used to control whether the asynchronous infrastructure is used
+ * or not. */
+ bool enabled;
+};
+
+extern gf_async_control_t gf_async_ctrl;
+
+int32_t
+gf_async_init(glusterfs_ctx_t *ctx);
+
+void
+gf_async_fini(void);
+
+void
+gf_async_adjust_threads(int32_t threads);
+
+static inline void
+gf_async(gf_async_t *async, xlator_t *xl, gf_async_callback_f cbk)
+{
+ if (!gf_async_ctrl.enabled) {
+ cbk(xl, async);
+ return;
+ }
+
+ async->xl = xl;
+ async->cbk = cbk;
+ cds_wfcq_node_init(&async->queue);
+ if (caa_unlikely(!cds_wfcq_enqueue(&gf_async_ctrl.queue.head,
+ &gf_async_ctrl.queue.tail,
+ &async->queue))) {
+ /* The queue was empty, so the leader could be sleeping. We need to
+ * wake it so that the new item can be processed. If the queue was not
+ * empty, we don't need to do anything special since the leader will
+ * take care of it. */
+ if (caa_unlikely(kill(gf_async_ctrl.pid, GF_ASYNC_SIGQUEUE) < 0)) {
+ gf_async_fatal(errno, "Unable to wake leader worker.");
+ };
+ }
+}
+
+#endif /* !__GLUSTERFS_ASYNC_H__ */
diff --git a/libglusterfs/src/glusterfs/atomic.h b/libglusterfs/src/glusterfs/atomic.h
new file mode 100644
index 00000000000..ced81748218
--- /dev/null
+++ b/libglusterfs/src/glusterfs/atomic.h
@@ -0,0 +1,459 @@
+/*
+ Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _ATOMIC_H
+#define _ATOMIC_H
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+#include "glusterfs/locking.h"
+
+/* Macros used to join two arguments and generate a new macro name. */
+#define GF_ATOMIC_MACRO_1(_macro) _macro
+#define GF_ATOMIC_MACRO(_base, _name) GF_ATOMIC_MACRO_1(_base##_name)
+
+/* There's a problem on 32-bit architectures when we try to use atomic
+ * builtins with 64-bit types. Only way to solve the problem is to use
+ * a mutex to protect the access to the atomic, but we don't want to
+ * use mutexes for other smaller types that could work with the atomic
+ * builtins.
+ *
+ * So on each atomic type we add a field for the mutex if atomic operation
+ * is not supported and a dummy zero size field if it's supported. This way
+ * we can have different atomic types, some with a mutex and some without.
+ *
+ * To define these types, we use two macros:
+ *
+ * GF_ATOMIC_MUTEX_FIELD_0 = char lk[0]
+ * GF_ATOMIC_MUTEX_FILED_1 = gf_lock_t lk
+ *
+ * Both macros define the 'lk' field that will be used in the atomic
+ * structure. One when the atomic is supported by the architecture and
+ * another when not. We need to define the field even if it won't be
+ * used. Otherwise the compiler will return an error.
+ *
+ * Now we need to take the mutex or not depending on the existence of
+ * the mutex field in the structure. To do so we check the size of the
+ * structure, and if it's bigger than uint64_t (all structures with a
+ * mutex will be bigger), we use the mutex-based version. Otherwise we
+ * use the atomic builtin. This check is easily optimized out by the
+ * compiler, leaving a clean and efficient compiled code. */
+
+#define GF_ATOMIC_MUTEX_FIELD_0 char lk[0]
+#define GF_ATOMIC_MUTEX_FIELD_1 gf_lock_t lk
+
+/* We'll use SIZEOF_LONG to determine the architecture. 32-bit machines
+ * will have 4 here, while 64-bit machines will have 8. If additional
+ * needs or restrictions appear on other platforms, these tests can be
+ * extended to handle them. */
+
+/* GF_ATOMIC_SIZE_X macros map each type size to one of the
+ * GF_ATOMIC_MUTEX_FIELD_X macros, depending on detected conditions. */
+
+#if defined(HAVE_ATOMIC_BUILTINS) || defined(HAVE_SYNC_BUILTINS)
+
+#define GF_ATOMIC_SIZE_1 GF_ATOMIC_MUTEX_FIELD_0
+#define GF_ATOMIC_SIZE_2 GF_ATOMIC_MUTEX_FIELD_0
+#define GF_ATOMIC_SIZE_4 GF_ATOMIC_MUTEX_FIELD_0
+
+#if SIZEOF_LONG >= 8
+#define GF_ATOMIC_SIZE_8 GF_ATOMIC_MUTEX_FIELD_0
+#endif
+
+#endif /* HAVE_(ATOMIC|SYNC)_BUILTINS */
+
+/* Any GF_ATOMIC_SIZE_X macro not yet defined will use the mutex version */
+#ifndef GF_ATOMIC_SIZE_1
+#define GF_ATOMIC_SIZE_1 GF_ATOMIC_MUTEX_FIELD_1
+#endif
+
+#ifndef GF_ATOMIC_SIZE_2
+#define GF_ATOMIC_SIZE_2 GF_ATOMIC_MUTEX_FIELD_1
+#endif
+
+#ifndef GF_ATOMIC_SIZE_4
+#define GF_ATOMIC_SIZE_4 GF_ATOMIC_MUTEX_FIELD_1
+#endif
+
+#ifndef GF_ATOMIC_SIZE_8
+#define GF_ATOMIC_SIZE_8 GF_ATOMIC_MUTEX_FIELD_1
+#endif
+
+/* This macro is used to define all atomic types supported. First field
+ * represents the size of the type in bytes, and the second one the name. */
+#define GF_ATOMIC_TYPE(_size, _name) \
+ typedef struct _gf_atomic_##_name##_t { \
+ GF_ATOMIC_MACRO(GF_ATOMIC_SIZE_, _size); \
+ _name##_t value; \
+ } gf_atomic_##_name##_t
+
+/* The atomic types we support */
+GF_ATOMIC_TYPE(1, int8); /* gf_atomic_int8_t */
+GF_ATOMIC_TYPE(2, int16); /* gf_atomic_int16_t */
+GF_ATOMIC_TYPE(4, int32); /* gf_atomic_int32_t */
+GF_ATOMIC_TYPE(8, int64); /* gf_atomic_int64_t */
+GF_ATOMIC_TYPE(SIZEOF_LONG, intptr); /* gf_atomic_intptr_t */
+GF_ATOMIC_TYPE(1, uint8); /* gf_atomic_uint8_t */
+GF_ATOMIC_TYPE(2, uint16); /* gf_atomic_uint16_t */
+GF_ATOMIC_TYPE(4, uint32); /* gf_atomic_uint32_t */
+GF_ATOMIC_TYPE(8, uint64); /* gf_atomic_uint64_t */
+GF_ATOMIC_TYPE(SIZEOF_LONG, uintptr); /* gf_atomic_uintptr_t */
+
+/* Define the default atomic type as int64_t */
+#define gf_atomic_t gf_atomic_int64_t
+
+/* This macro will choose between the mutex based version and the atomic
+ * builtin version depending on the size of the atomic structure. */
+#define GF_ATOMIC_CHOOSE(_atomic, _op, _args...) \
+ ((sizeof(_atomic) > sizeof(uint64_t)) \
+ ? ({ \
+ GF_ATOMIC_MACRO(GF_ATOMIC_LOCK_, _op) \
+ (_atomic, ##_args); \
+ }) \
+ : ({ \
+ GF_ATOMIC_MACRO(GF_ATOMIC_BASE_, _op) \
+ (_atomic, ##_args); \
+ }))
+
+/* Macros to implement the mutex-based atomics. */
+#define GF_ATOMIC_OP_PREPARE(_atomic, _name) \
+ typeof(_atomic) *__atomic = &(_atomic); \
+ gf_lock_t *__lock = (gf_lock_t *)&__atomic->lk; \
+ LOCK(__lock); \
+ typeof(__atomic->value) _name = __atomic->value
+
+#define GF_ATOMIC_OP_STORE(_value) (__atomic->value = (_value))
+
+#define GF_ATOMIC_OP_RETURN(_value) \
+ ({ \
+ UNLOCK(__lock); \
+ _value; \
+ })
+
+#define GF_ATOMIC_LOCK_INIT(_atomic, _value) \
+ do { \
+ typeof(_atomic) *__atomic = &(_atomic); \
+ LOCK_INIT((gf_lock_t *)&__atomic->lk); \
+ __atomic->value = (_value); \
+ } while (0)
+
+#define GF_ATOMIC_LOCK_GET(_atomic) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_ADD(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value += (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_SUB(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value -= (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_AND(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value &= (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_OR(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value |= (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_XOR(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value ^= (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_NAND(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value = ~(__value & (_value))); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_FETCH_ADD(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value + (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_FETCH_SUB(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value - (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_FETCH_AND(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value &(_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_FETCH_OR(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value | (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_FETCH_XOR(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(__value ^ (_value)); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_FETCH_NAND(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(~(__value & (_value))); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_SWAP(_atomic, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ GF_ATOMIC_OP_STORE(_value); \
+ GF_ATOMIC_OP_RETURN(__value); \
+ })
+
+#define GF_ATOMIC_LOCK_CMP_SWAP(_atomic, _expected, _value) \
+ ({ \
+ GF_ATOMIC_OP_PREPARE(_atomic, __value); \
+ bool __ret = (__value == (_expected)); \
+ if (__ret) { \
+ GF_ATOMIC_OP_STORE(_value); \
+ } \
+ GF_ATOMIC_OP_RETURN(__ret); \
+ })
+
+#if defined(HAVE_ATOMIC_BUILTINS)
+
+/* If compiler supports __atomic builtins, we use them. */
+
+#define GF_ATOMIC_BASE_INIT(_atomic, _value) \
+ __atomic_store_n(&(_atomic).value, (_value), __ATOMIC_RELEASE)
+
+#define GF_ATOMIC_BASE_GET(_atomic) \
+ __atomic_load_n(&(_atomic).value, __ATOMIC_ACQUIRE)
+
+#define GF_ATOMIC_BASE_ADD(_atomic, _value) \
+ __atomic_add_fetch(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_SUB(_atomic, _value) \
+ __atomic_sub_fetch(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_AND(_atomic, _value) \
+ __atomic_and_fetch(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_OR(_atomic, _value) \
+ __atomic_or_fetch(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_XOR(_atomic, _value) \
+ __atomic_xor_fetch(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_NAND(_atomic, _value) \
+ __atomic_nand_fetch(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_FETCH_ADD(_atomic, _value) \
+ __atomic_fetch_add(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_FETCH_SUB(_atomic, _value) \
+ __atomic_fetch_sub(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_FETCH_AND(_atomic, _value) \
+ __atomic_fetch_and(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_FETCH_OR(_atomic, _value) \
+ __atomic_fetch_or(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_FETCH_XOR(_atomic, _value) \
+ __atomic_fetch_xor(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_FETCH_NAND(_atomic, _value) \
+ __atomic_fetch_nand(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_SWAP(_atomic, _value) \
+ __atomic_exchange_n(&(_atomic).value, (_value), __ATOMIC_ACQ_REL)
+
+#define GF_ATOMIC_BASE_CMP_SWAP(_atomic, _expected, _value) \
+ ({ \
+ typeof((_atomic).value) __expected = (_expected); \
+ __atomic_compare_exchange_n(&(_atomic).value, &__expected, (_value), \
+ 0, __ATOMIC_ACQ_REL, __ATOMIC_ACQUIRE); \
+ })
+
+#elif defined(HAVE_SYNC_BUILTINS)
+
+/* If compiler doesn't support __atomic builtins but supports __sync builtins,
+ * we use them. */
+
+#define GF_ATOMIC_BASE_INIT(_atomic, _value) \
+ do { \
+ (_atomic).value = (_value); \
+ __sync_synchronize(); \
+ } while (0)
+
+#define GF_ATOMIC_BASE_ADD(_atomic, _value) \
+ __sync_add_and_fetch(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_SUB(_atomic, _value) \
+ __sync_sub_and_fetch(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_AND(_atomic, _value) \
+ __sync_and_and_fetch(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_OR(_atomic, _value) \
+ __sync_or_and_fetch(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_XOR(_atomic, _value) \
+ __sync_xor_and_fetch(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_NAND(_atomic, _value) \
+ __sync_nand_and_fetch(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_FETCH_ADD(_atomic, _value) \
+ __sync_fetch_and_add(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_FETCH_SUB(_atomic, _value) \
+ __sync_fetch_and_sub(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_FETCH_AND(_atomic, _value) \
+ __sync_fetch_and_and(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_FETCH_OR(_atomic, _value) \
+ __sync_fetch_and_or(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_FETCH_XOR(_atomic, _value) \
+ __sync_fetch_and_xor(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_FETCH_NAND(_atomic, _value) \
+ __sync_fetch_and_nand(&(_atomic).value, (_value))
+
+#define GF_ATOMIC_BASE_SWAP(_atomic, _value) \
+ ({ \
+ __sync_synchronize(); \
+ __sync_lock_test_and_set(&(_atomic).value, (_value)); \
+ })
+
+#define GF_ATOMIC_BASE_CMP_SWAP(_atomic, _expected, _value) \
+ __sync_bool_compare_and_swap(&(_atomic).value, (_expected), (_value))
+
+#define GF_ATOMIC_BASE_GET(_atomic) GF_ATOMIC_BASE_ADD(_atomic, 0)
+
+#else /* !HAVE_ATOMIC_BUILTINS && !HAVE_SYNC_BUILTINS */
+
+/* The compiler doesn't support any atomic builtin. We fallback to the
+ * mutex-based implementation. */
+
+#define GF_ATOMIC_BASE_INIT(_atomic, _value) \
+ GF_ATOMIC_LOCK_INIT(_atomic, _value)
+
+#define GF_ATOMIC_BASE_GET(_atomic) GF_ATOMIC_LOCK_GET(_atomic)
+
+#define GF_ATOMIC_BASE_ADD(_atomic, _value) GF_ATOMIC_LOCK_ADD(_atomic, _value)
+
+#define GF_ATOMIC_BASE_SUB(_atomic, _value) GF_ATOMIC_LOCK_SUB(_atomic, _value)
+
+#define GF_ATOMIC_BASE_AND(_atomic, _value) GF_ATOMIC_LOCK_AND(_atomic, _value)
+
+#define GF_ATOMIC_BASE_OR(_atomic, _value) GF_ATOMIC_LOCK_OR(_atomic, _value)
+
+#define GF_ATOMIC_BASE_XOR(_atomic, _value) GF_ATOMIC_LOCK_XOR(_atomic, _value)
+
+#define GF_ATOMIC_BASE_NAND(_atomic, _value) \
+ GF_ATOMIC_LOCK_NAND(_atomic, _value)
+
+#define GF_ATOMIC_BASE_FETCH_ADD(_atomic, _value) \
+ GF_ATOMIC_LOCK_FETCH_ADD(_atomic, _value)
+
+#define GF_ATOMIC_BASE_FETCH_SUB(_atomic, _value) \
+ GF_ATOMIC_LOCK_FETCH_SUB(_atomic, _value)
+
+#define GF_ATOMIC_BASE_FETCH_AND(_atomic, _value) \
+ GF_ATOMIC_LOCK_FETCH_AND(_atomic, _value)
+
+#define GF_ATOMIC_BASE_FETCH_OR(_atomic, _value) \
+ GF_ATOMIC_LOCK_FETCH_OR(_atomic, _value)
+
+#define GF_ATOMIC_BASE_FETCH_XOR(_atomic, _value) \
+ GF_ATOMIC_LOCK_FETCH_XOR(_atomic, _value)
+
+#define GF_ATOMIC_BASE_FETCH_NAND(_atomic, _value) \
+ GF_ATOMIC_LOCK_FETCH_NAND(_atomic, _value)
+
+#define GF_ATOMIC_BASE_SWAP(_atomic, _value) \
+ GF_ATOMIC_LOCK_SWAP(_atomic, _value)
+
+#define GF_ATOMIC_BASE_CMP_SWAP(_atomic, _expected, _value) \
+ GF_ATOMIC_LOCK_CMP_SWAP(_atomic, _expected, _value)
+
+#endif /* HAVE_(ATOMIC|SYNC)_BUILTINS */
+
+/* Here we declare the real atomic macros available to the user. */
+
+/* All macros have a 'gf_atomic_xxx' as 1st argument */
+
+#define GF_ATOMIC_INIT(_atomic, _value) GF_ATOMIC_CHOOSE(_atomic, INIT, _value)
+#define GF_ATOMIC_GET(_atomic) GF_ATOMIC_CHOOSE(_atomic, GET)
+#define GF_ATOMIC_ADD(_atomic, _value) GF_ATOMIC_CHOOSE(_atomic, ADD, _value)
+#define GF_ATOMIC_SUB(_atomic, _value) GF_ATOMIC_CHOOSE(_atomic, SUB, _value)
+#define GF_ATOMIC_AND(_atomic, _value) GF_ATOMIC_CHOOSE(_atomic, AND, _value)
+#define GF_ATOMIC_OR(_atomic, _value) GF_ATOMIC_CHOOSE(_atomic, OR, _value)
+#define GF_ATOMIC_XOR(_atomic, _value) GF_ATOMIC_CHOOSE(_atomic, XOR, _value)
+#define GF_ATOMIC_NAND(_atomic, _value) GF_ATOMIC_CHOOSE(_atomic, NAND, _value)
+
+#define GF_ATOMIC_FETCH_ADD(_atomic, _value) \
+ GF_ATOMIC_CHOOSE(_atomic, FETCH_ADD, _value)
+
+#define GF_ATOMIC_FETCH_SUB(_atomic, _value) \
+ GF_ATOMIC_CHOOSE(_atomic, FETCH_SUB, _value)
+
+#define GF_ATOMIC_FETCH_AND(_atomic, _value) \
+ GF_ATOMIC_CHOOSE(_atomic, FETCH_AND, _value)
+
+#define GF_ATOMIC_FETCH_OR(_atomic, _value) \
+ GF_ATOMIC_CHOOSE(_atomic, FETCH_OR, _value)
+
+#define GF_ATOMIC_FETCH_XOR(_atomic, _value) \
+ GF_ATOMIC_CHOOSE(_atomic, FETCH_XOR, _value)
+
+#define GF_ATOMIC_FETCH_NAND(_atomic, _value) \
+ GF_ATOMIC_CHOOSE(_atomic, FETCH_NAND, _value)
+
+#define GF_ATOMIC_SWAP(_atomic, _value) GF_ATOMIC_CHOOSE(_atomic, SWAP, _value)
+
+#define GF_ATOMIC_CMP_SWAP(_atomic, _expected, _value) \
+ GF_ATOMIC_CHOOSE(_atomic, CMP_SWAP, _expected, _value)
+
+#define GF_ATOMIC_INC(_atomic) GF_ATOMIC_ADD(_atomic, 1)
+#define GF_ATOMIC_DEC(_atomic) GF_ATOMIC_SUB(_atomic, 1)
+#define GF_ATOMIC_FETCH_INC(_atomic) GF_ATOMIC_FETCH_ADD(_atomic, 1)
+#define GF_ATOMIC_FETCH_DEC(_atomic) GF_ATOMIC_FETCH_SUB(_atomic, 1)
+
+#endif /* _ATOMIC_H */
diff --git a/libglusterfs/src/glusterfs/byte-order.h b/libglusterfs/src/glusterfs/byte-order.h
new file mode 100644
index 00000000000..fd8cef9e58d
--- /dev/null
+++ b/libglusterfs/src/glusterfs/byte-order.h
@@ -0,0 +1,279 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _BYTE_ORDER_H
+#define _BYTE_ORDER_H
+
+#include <inttypes.h>
+
+#define LS1 0x00ffU
+#define MS1 0xff00U
+#define LS2 0x0000ffffU
+#define MS2 0xffff0000U
+#define LS4 0x00000000ffffffffULL
+#define MS4 0xffffffff00000000ULL
+
+static uint16_t (*hton16)(uint16_t);
+static uint32_t (*hton32)(uint32_t);
+static uint64_t (*hton64)(uint64_t);
+
+#define ntoh16 hton16
+#define ntoh32 hton32
+#define ntoh64 hton64
+
+static uint16_t (*htole16)(uint16_t);
+static uint32_t (*htole32)(uint32_t);
+static uint64_t (*htole64)(uint64_t);
+
+#define letoh16 htole16
+#define letoh32 htole32
+#define letoh64 htole64
+
+static uint16_t (*htobe16)(uint16_t);
+static uint32_t (*htobe32)(uint32_t);
+static uint64_t (*htobe64)(uint64_t);
+
+#define betoh16 htobe16
+#define betoh32 htobe32
+#define betoh64 htobe64
+
+#define do_swap2(x) (((x & LS1) << 8) | (((x & MS1) >> 8)))
+#define do_swap4(x) ((do_swap2(x & LS2) << 16) | (do_swap2((x & MS2) >> 16)))
+#define do_swap8(x) ((do_swap4(x & LS4) << 32) | (do_swap4((x & MS4) >> 32)))
+
+static inline uint16_t
+__swap16(uint16_t x)
+{
+ return do_swap2(x);
+}
+
+static inline uint32_t
+__swap32(uint32_t x)
+{
+ return do_swap4(x);
+}
+
+static inline uint64_t
+__swap64(uint64_t x)
+{
+ return do_swap8(x);
+}
+
+static inline uint16_t
+__noswap16(uint16_t x)
+{
+ return x;
+}
+
+static inline uint32_t
+__noswap32(uint32_t x)
+{
+ return x;
+}
+
+static inline uint64_t
+__noswap64(uint64_t x)
+{
+ return x;
+}
+
+static inline uint16_t
+__byte_order_n16(uint16_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ hton16 = __swap16;
+ hton32 = __swap32;
+ hton64 = __swap64;
+ } else {
+ /* cpu is be */
+ hton16 = __noswap16;
+ hton32 = __noswap32;
+ hton64 = __noswap64;
+ }
+
+ return hton16(i);
+}
+
+static inline uint32_t
+__byte_order_n32(uint32_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ hton16 = __swap16;
+ hton32 = __swap32;
+ hton64 = __swap64;
+ } else {
+ /* cpu is be */
+ hton16 = __noswap16;
+ hton32 = __noswap32;
+ hton64 = __noswap64;
+ }
+
+ return hton32(i);
+}
+
+static inline uint64_t
+__byte_order_n64(uint64_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ hton16 = __swap16;
+ hton32 = __swap32;
+ hton64 = __swap64;
+ } else {
+ /* cpu is be */
+ hton16 = __noswap16;
+ hton32 = __noswap32;
+ hton64 = __noswap64;
+ }
+
+ return hton64(i);
+}
+
+static uint16_t (*hton16)(uint16_t) = __byte_order_n16;
+static uint32_t (*hton32)(uint32_t) = __byte_order_n32;
+static uint64_t (*hton64)(uint64_t) = __byte_order_n64;
+
+static inline uint16_t
+__byte_order_le16(uint16_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htole16 = __noswap16;
+ htole32 = __noswap32;
+ htole64 = __noswap64;
+ } else {
+ /* cpu is be */
+ htole16 = __swap16;
+ htole32 = __swap32;
+ htole64 = __swap64;
+ }
+
+ return htole16(i);
+}
+
+static inline uint32_t
+__byte_order_le32(uint32_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htole16 = __noswap16;
+ htole32 = __noswap32;
+ htole64 = __noswap64;
+ } else {
+ /* cpu is be */
+ htole16 = __swap16;
+ htole32 = __swap32;
+ htole64 = __swap64;
+ }
+
+ return htole32(i);
+}
+
+static inline uint64_t
+__byte_order_le64(uint64_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htole16 = __noswap16;
+ htole32 = __noswap32;
+ htole64 = __noswap64;
+ } else {
+ /* cpu is be */
+ htole16 = __swap16;
+ htole32 = __swap32;
+ htole64 = __swap64;
+ }
+
+ return htole64(i);
+}
+
+static uint16_t (*htole16)(uint16_t) = __byte_order_le16;
+static uint32_t (*htole32)(uint32_t) = __byte_order_le32;
+static uint64_t (*htole64)(uint64_t) = __byte_order_le64;
+
+static inline uint16_t
+__byte_order_be16(uint16_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htobe16 = __swap16;
+ htobe32 = __swap32;
+ htobe64 = __swap64;
+ } else {
+ /* cpu is be */
+ htobe16 = __noswap16;
+ htobe32 = __noswap32;
+ htobe64 = __noswap64;
+ }
+
+ return htobe16(i);
+}
+
+static inline uint32_t
+__byte_order_be32(uint32_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htobe16 = __swap16;
+ htobe32 = __swap32;
+ htobe64 = __swap64;
+ } else {
+ /* cpu is be */
+ htobe16 = __noswap16;
+ htobe32 = __noswap32;
+ htobe64 = __noswap64;
+ }
+
+ return htobe32(i);
+}
+
+static inline uint64_t
+__byte_order_be64(uint64_t i)
+{
+ uint32_t num = 1;
+
+ if (((char *)(&num))[0] == 1) {
+ /* cpu is le */
+ htobe16 = __swap16;
+ htobe32 = __swap32;
+ htobe64 = __swap64;
+ } else {
+ /* cpu is be */
+ htobe16 = __noswap16;
+ htobe32 = __noswap32;
+ htobe64 = __noswap64;
+ }
+
+ return htobe64(i);
+}
+
+static uint16_t (*htobe16)(uint16_t) = __byte_order_be16;
+static uint32_t (*htobe32)(uint32_t) = __byte_order_be32;
+static uint64_t (*htobe64)(uint64_t) = __byte_order_be64;
+
+#endif /* _BYTE_ORDER_H */
diff --git a/libglusterfs/src/glusterfs/call-stub.h b/libglusterfs/src/glusterfs/call-stub.h
new file mode 100644
index 00000000000..8237ea459bf
--- /dev/null
+++ b/libglusterfs/src/glusterfs/call-stub.h
@@ -0,0 +1,622 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CALL_STUB_H_
+#define _CALL_STUB_H_
+
+#include "glusterfs/xlator.h"
+#include "glusterfs/defaults.h"
+#include "glusterfs/default-args.h"
+#include "glusterfs/stack.h"
+#include "glusterfs/list.h"
+
+typedef struct _call_stub {
+ struct list_head list;
+ call_frame_t *frame;
+ struct mem_pool *stub_mem_pool; /* pointer to stub mempool in ctx_t */
+ uint32_t jnl_meta_len;
+ uint32_t jnl_data_len;
+ void (*serialize)(struct _call_stub *, char *, char *);
+ union {
+ fop_lookup_t lookup;
+ fop_stat_t stat;
+ fop_fstat_t fstat;
+ fop_truncate_t truncate;
+ fop_ftruncate_t ftruncate;
+ fop_access_t access;
+ fop_readlink_t readlink;
+ fop_mknod_t mknod;
+ fop_mkdir_t mkdir;
+ fop_unlink_t unlink;
+ fop_rmdir_t rmdir;
+ fop_symlink_t symlink;
+ fop_rename_t rename;
+ fop_link_t link;
+ fop_create_t create;
+ fop_open_t open;
+ fop_readv_t readv;
+ fop_writev_t writev;
+ fop_flush_t flush;
+ fop_fsync_t fsync;
+ fop_opendir_t opendir;
+ fop_fsyncdir_t fsyncdir;
+ fop_statfs_t statfs;
+ fop_setxattr_t setxattr;
+ fop_getxattr_t getxattr;
+ fop_fgetxattr_t fgetxattr;
+ fop_fsetxattr_t fsetxattr;
+ fop_removexattr_t removexattr;
+ fop_fremovexattr_t fremovexattr;
+ fop_lk_t lk;
+ fop_inodelk_t inodelk;
+ fop_finodelk_t finodelk;
+ fop_entrylk_t entrylk;
+ fop_fentrylk_t fentrylk;
+ fop_readdir_t readdir;
+ fop_readdirp_t readdirp;
+ fop_rchecksum_t rchecksum;
+ fop_xattrop_t xattrop;
+ fop_fxattrop_t fxattrop;
+ fop_setattr_t setattr;
+ fop_fsetattr_t fsetattr;
+ fop_fallocate_t fallocate;
+ fop_discard_t discard;
+ fop_zerofill_t zerofill;
+ fop_ipc_t ipc;
+ fop_seek_t seek;
+ fop_lease_t lease;
+ fop_getactivelk_t getactivelk;
+ fop_setactivelk_t setactivelk;
+ fop_put_t put;
+ fop_icreate_t icreate;
+ fop_namelink_t namelink;
+ fop_copy_file_range_t copy_file_range;
+ } fn;
+
+ union {
+ fop_lookup_cbk_t lookup;
+ fop_stat_cbk_t stat;
+ fop_fstat_cbk_t fstat;
+ fop_truncate_cbk_t truncate;
+ fop_ftruncate_cbk_t ftruncate;
+ fop_access_cbk_t access;
+ fop_readlink_cbk_t readlink;
+ fop_mknod_cbk_t mknod;
+ fop_mkdir_cbk_t mkdir;
+ fop_unlink_cbk_t unlink;
+ fop_rmdir_cbk_t rmdir;
+ fop_symlink_cbk_t symlink;
+ fop_rename_cbk_t rename;
+ fop_link_cbk_t link;
+ fop_create_cbk_t create;
+ fop_open_cbk_t open;
+ fop_readv_cbk_t readv;
+ fop_writev_cbk_t writev;
+ fop_flush_cbk_t flush;
+ fop_fsync_cbk_t fsync;
+ fop_opendir_cbk_t opendir;
+ fop_fsyncdir_cbk_t fsyncdir;
+ fop_statfs_cbk_t statfs;
+ fop_setxattr_cbk_t setxattr;
+ fop_getxattr_cbk_t getxattr;
+ fop_fgetxattr_cbk_t fgetxattr;
+ fop_fsetxattr_cbk_t fsetxattr;
+ fop_removexattr_cbk_t removexattr;
+ fop_fremovexattr_cbk_t fremovexattr;
+ fop_lk_cbk_t lk;
+ fop_inodelk_cbk_t inodelk;
+ fop_finodelk_cbk_t finodelk;
+ fop_entrylk_cbk_t entrylk;
+ fop_fentrylk_cbk_t fentrylk;
+ fop_readdir_cbk_t readdir;
+ fop_readdirp_cbk_t readdirp;
+ fop_rchecksum_cbk_t rchecksum;
+ fop_xattrop_cbk_t xattrop;
+ fop_fxattrop_cbk_t fxattrop;
+ fop_setattr_cbk_t setattr;
+ fop_fsetattr_cbk_t fsetattr;
+ fop_fallocate_cbk_t fallocate;
+ fop_discard_cbk_t discard;
+ fop_zerofill_cbk_t zerofill;
+ fop_ipc_cbk_t ipc;
+ fop_seek_cbk_t seek;
+ fop_lease_cbk_t lease;
+ fop_getactivelk_cbk_t getactivelk;
+ fop_setactivelk_cbk_t setactivelk;
+ fop_put_cbk_t put;
+ fop_icreate_cbk_t icreate;
+ fop_namelink_cbk_t namelink;
+ fop_copy_file_range_cbk_t copy_file_range;
+ } fn_cbk;
+ glusterfs_fop_t fop;
+ gf_boolean_t poison;
+ char wind;
+ default_args_t args;
+ default_args_cbk_t args_cbk;
+} call_stub_t;
+
+call_stub_t *
+fop_lookup_stub(call_frame_t *frame, fop_lookup_t fn, loc_t *loc,
+ dict_t *xdata);
+
+call_stub_t *
+fop_lookup_cbk_stub(call_frame_t *frame, fop_lookup_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent);
+call_stub_t *
+fop_stat_stub(call_frame_t *frame, fop_stat_t fn, loc_t *loc, dict_t *xdata);
+call_stub_t *
+fop_stat_cbk_stub(call_frame_t *frame, fop_stat_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata);
+call_stub_t *
+fop_fstat_stub(call_frame_t *frame, fop_fstat_t fn, fd_t *fd, dict_t *xdata);
+call_stub_t *
+fop_fstat_cbk_stub(call_frame_t *frame, fop_fstat_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata);
+
+call_stub_t *
+fop_truncate_stub(call_frame_t *frame, fop_truncate_t fn, loc_t *loc, off_t off,
+ dict_t *xdata);
+
+call_stub_t *
+fop_truncate_cbk_stub(call_frame_t *frame, fop_truncate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+call_stub_t *
+fop_ftruncate_stub(call_frame_t *frame, fop_ftruncate_t fn, fd_t *fd, off_t off,
+ dict_t *xdata);
+
+call_stub_t *
+fop_ftruncate_cbk_stub(call_frame_t *frame, fop_ftruncate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+call_stub_t *
+fop_access_stub(call_frame_t *frame, fop_access_t fn, loc_t *loc, int32_t mask,
+ dict_t *xdata);
+
+call_stub_t *
+fop_access_cbk_stub(call_frame_t *frame, fop_access_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_readlink_stub(call_frame_t *frame, fop_readlink_t fn, loc_t *loc,
+ size_t size, dict_t *xdata);
+
+call_stub_t *
+fop_readlink_cbk_stub(call_frame_t *frame, fop_readlink_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata);
+
+call_stub_t *
+fop_mknod_stub(call_frame_t *frame, fop_mknod_t fn, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata);
+
+call_stub_t *
+fop_mknod_cbk_stub(call_frame_t *frame, fop_mknod_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+call_stub_t *
+fop_mkdir_stub(call_frame_t *frame, fop_mkdir_t fn, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
+
+call_stub_t *
+fop_mkdir_cbk_stub(call_frame_t *frame, fop_mkdir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+call_stub_t *
+fop_unlink_stub(call_frame_t *frame, fop_unlink_t fn, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+call_stub_t *
+fop_unlink_cbk_stub(call_frame_t *frame, fop_unlink_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+call_stub_t *
+fop_rmdir_stub(call_frame_t *frame, fop_rmdir_t fn, loc_t *loc, int flags,
+ dict_t *xdata);
+
+call_stub_t *
+fop_rmdir_cbk_stub(call_frame_t *frame, fop_rmdir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+call_stub_t *
+fop_symlink_stub(call_frame_t *frame, fop_symlink_t fn, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata);
+
+call_stub_t *
+fop_symlink_cbk_stub(call_frame_t *frame, fop_symlink_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+call_stub_t *
+fop_rename_stub(call_frame_t *frame, fop_rename_t fn, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+call_stub_t *
+fop_rename_cbk_stub(call_frame_t *frame, fop_rename_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata);
+
+call_stub_t *
+fop_link_stub(call_frame_t *frame, fop_link_t fn, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+call_stub_t *
+fop_link_cbk_stub(call_frame_t *frame, fop_link_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+call_stub_t *
+fop_create_stub(call_frame_t *frame, fop_create_t fn, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+
+call_stub_t *
+fop_create_cbk_stub(call_frame_t *frame, fop_create_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+call_stub_t *
+fop_open_stub(call_frame_t *frame, fop_open_t fn, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+call_stub_t *
+fop_open_cbk_stub(call_frame_t *frame, fop_open_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata);
+
+call_stub_t *
+fop_readv_stub(call_frame_t *frame, fop_readv_t fn, fd_t *fd, size_t size,
+ off_t off, uint32_t flags, dict_t *xdata);
+
+call_stub_t *
+fop_readv_cbk_stub(call_frame_t *frame, fop_readv_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata);
+
+call_stub_t *
+fop_writev_stub(call_frame_t *frame, fop_writev_t fn, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
+
+call_stub_t *
+fop_writev_cbk_stub(call_frame_t *frame, fop_writev_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+call_stub_t *
+fop_flush_stub(call_frame_t *frame, fop_flush_t fn, fd_t *fd, dict_t *xdata);
+
+call_stub_t *
+fop_flush_cbk_stub(call_frame_t *frame, fop_flush_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_fsync_stub(call_frame_t *frame, fop_fsync_t fn, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
+
+call_stub_t *
+fop_fsync_cbk_stub(call_frame_t *frame, fop_fsync_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+call_stub_t *
+fop_opendir_stub(call_frame_t *frame, fop_opendir_t fn, loc_t *loc, fd_t *fd,
+ dict_t *xdata);
+
+call_stub_t *
+fop_opendir_cbk_stub(call_frame_t *frame, fop_opendir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata);
+
+call_stub_t *
+fop_fsyncdir_stub(call_frame_t *frame, fop_fsyncdir_t fn, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+call_stub_t *
+fop_fsyncdir_cbk_stub(call_frame_t *frame, fop_fsyncdir_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_statfs_stub(call_frame_t *frame, fop_statfs_t fn, loc_t *loc,
+ dict_t *xdata);
+
+call_stub_t *
+fop_statfs_cbk_stub(call_frame_t *frame, fop_statfs_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct statvfs *buf, dict_t *xdata);
+
+call_stub_t *
+fop_setxattr_stub(call_frame_t *frame, fop_setxattr_t fn, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata);
+
+call_stub_t *
+fop_setxattr_cbk_stub(call_frame_t *frame, fop_setxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_getxattr_stub(call_frame_t *frame, fop_getxattr_t fn, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+call_stub_t *
+fop_getxattr_cbk_stub(call_frame_t *frame, fop_getxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *value,
+ dict_t *xdata);
+
+call_stub_t *
+fop_fsetxattr_stub(call_frame_t *frame, fop_fsetxattr_t fn, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata);
+
+call_stub_t *
+fop_fsetxattr_cbk_stub(call_frame_t *frame, fop_fsetxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_fgetxattr_stub(call_frame_t *frame, fop_fgetxattr_t fn, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+call_stub_t *
+fop_fgetxattr_cbk_stub(call_frame_t *frame, fop_fgetxattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *value,
+ dict_t *xdata);
+
+call_stub_t *
+fop_removexattr_stub(call_frame_t *frame, fop_removexattr_t fn, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+call_stub_t *
+fop_removexattr_cbk_stub(call_frame_t *frame, fop_removexattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_fremovexattr_stub(call_frame_t *frame, fop_fremovexattr_t fn, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+call_stub_t *
+fop_fremovexattr_cbk_stub(call_frame_t *frame, fop_fremovexattr_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_lk_stub(call_frame_t *frame, fop_lk_t fn, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata);
+
+call_stub_t *
+fop_lk_cbk_stub(call_frame_t *frame, fop_lk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata);
+
+call_stub_t *
+fop_inodelk_stub(call_frame_t *frame, fop_inodelk_t fn, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+call_stub_t *
+fop_finodelk_stub(call_frame_t *frame, fop_finodelk_t fn, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+call_stub_t *
+fop_entrylk_stub(call_frame_t *frame, fop_entrylk_t fn, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+call_stub_t *
+fop_fentrylk_stub(call_frame_t *frame, fop_fentrylk_t fn, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+call_stub_t *
+fop_inodelk_cbk_stub(call_frame_t *frame, fop_inodelk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_finodelk_cbk_stub(call_frame_t *frame, fop_inodelk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_entrylk_cbk_stub(call_frame_t *frame, fop_entrylk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_fentrylk_cbk_stub(call_frame_t *frame, fop_entrylk_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_readdir_stub(call_frame_t *frame, fop_readdir_t fn, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+call_stub_t *
+fop_readdirp_stub(call_frame_t *frame, fop_readdirp_t fn, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+call_stub_t *
+fop_readdirp_cbk_stub(call_frame_t *frame, fop_readdir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries, dict_t *xdata);
+
+call_stub_t *
+fop_readdir_cbk_stub(call_frame_t *frame, fop_readdir_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries, dict_t *xdata);
+
+call_stub_t *
+fop_rchecksum_stub(call_frame_t *frame, fop_rchecksum_t fn, fd_t *fd,
+ off_t offset, int32_t len, dict_t *xdata);
+
+call_stub_t *
+fop_rchecksum_cbk_stub(call_frame_t *frame, fop_rchecksum_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata);
+
+call_stub_t *
+fop_xattrop_stub(call_frame_t *frame, fop_xattrop_t fn, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+call_stub_t *
+fop_xattrop_stub_cbk_stub(call_frame_t *frame, fop_xattrop_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_fxattrop_stub(call_frame_t *frame, fop_fxattrop_t fn, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+call_stub_t *
+fop_fxattrop_stub_cbk_stub(call_frame_t *frame, fop_xattrop_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_setattr_stub(call_frame_t *frame, fop_setattr_t fn, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+call_stub_t *
+fop_setattr_cbk_stub(call_frame_t *frame, fop_setattr_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+call_stub_t *
+fop_fsetattr_stub(call_frame_t *frame, fop_fsetattr_t fn, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+call_stub_t *
+fop_fsetattr_cbk_stub(call_frame_t *frame, fop_setattr_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+call_stub_t *
+fop_fallocate_stub(call_frame_t *frame, fop_fallocate_t fn, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata);
+
+call_stub_t *
+fop_fallocate_cbk_stub(call_frame_t *frame, fop_fallocate_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+call_stub_t *
+fop_discard_stub(call_frame_t *frame, fop_discard_t fn, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+
+call_stub_t *
+fop_discard_cbk_stub(call_frame_t *frame, fop_discard_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+call_stub_t *
+fop_zerofill_stub(call_frame_t *frame, fop_zerofill_t fn, fd_t *fd,
+ off_t offset, off_t len, dict_t *xdata);
+
+call_stub_t *
+fop_zerofill_cbk_stub(call_frame_t *frame, fop_zerofill_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+call_stub_t *
+fop_ipc_stub(call_frame_t *frame, fop_ipc_t fn, int32_t op, dict_t *xdata);
+
+call_stub_t *
+fop_ipc_cbk_stub(call_frame_t *frame, fop_ipc_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_seek_stub(call_frame_t *frame, fop_seek_t fn, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata);
+
+call_stub_t *
+fop_seek_cbk_stub(call_frame_t *frame, fop_seek_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, off_t offset, dict_t *xdata);
+
+call_stub_t *
+fop_lease_stub(call_frame_t *frame, fop_lease_t fn, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata);
+
+call_stub_t *
+fop_lease_cbk_stub(call_frame_t *frame, fop_lease_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, struct gf_lease *lease, dict_t *xdata);
+
+call_stub_t *
+fop_getactivelk_stub(call_frame_t *frame, fop_getactivelk_t fn, loc_t *loc,
+ dict_t *xdata);
+
+call_stub_t *
+fop_getactivelk_cbk_stub(call_frame_t *frame, fop_getactivelk_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ lock_migration_info_t *lmi, dict_t *xdata);
+
+call_stub_t *
+fop_setactivelk_stub(call_frame_t *frame, fop_setactivelk_t fn, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata);
+
+call_stub_t *
+fop_setactivelk_cbk_stub(call_frame_t *frame, fop_setactivelk_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+call_stub_t *
+fop_put_stub(call_frame_t *frame, fop_put_t fn, loc_t *loc, mode_t mode,
+ mode_t umask, uint32_t flags, struct iovec *vector, int32_t count,
+ off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata);
+
+call_stub_t *
+fop_put_cbk_stub(call_frame_t *frame, fop_put_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+call_stub_t *
+fop_icreate_stub(call_frame_t *frame, fop_icreate_t fn, loc_t *loc, mode_t mode,
+ dict_t *xdata);
+
+call_stub_t *
+fop_namelink_stub(call_frame_t *frame, fop_namelink_t fn, loc_t *loc,
+ dict_t *xdata);
+
+call_stub_t *
+fop_icreate_cbk_stub(call_frame_t *frame, fop_icreate_cbk_t fn, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata);
+
+call_stub_t *
+fop_namelink_cbk_stub(call_frame_t *frame, fop_namelink_cbk_t fn,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+call_stub_t *
+fop_copy_file_range_stub(call_frame_t *frame, fop_copy_file_range_t fn,
+ fd_t *fd_in, off64_t off_in, fd_t *fd_out,
+ off64_t off_out, size_t len, uint32_t flags,
+ dict_t *xdata);
+
+call_stub_t *
+fop_copy_file_range_cbk_stub(call_frame_t *frame, fop_copy_file_range_cbk_t fn,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
+void
+call_resume(call_stub_t *stub);
+void
+call_resume_keep_stub(call_stub_t *stub);
+void
+call_stub_destroy(call_stub_t *stub);
+void
+call_unwind_error(call_stub_t *stub, int op_ret, int op_errno);
+void
+call_unwind_error_keep_stub(call_stub_t *stub, int op_ret, int op_errno);
+
+/*
+ * Sometimes we might want to call just this, perhaps repeatedly, without
+ * having (or being able) to destroy and recreate it.
+ */
+void
+call_resume_wind(call_stub_t *stub);
+
+#endif
diff --git a/libglusterfs/src/checksum.h b/libglusterfs/src/glusterfs/checksum.h
index bf7eeede8fc..019bb14df71 100644
--- a/libglusterfs/src/checksum.h
+++ b/libglusterfs/src/glusterfs/checksum.h
@@ -12,9 +12,11 @@
#define __CHECKSUM_H__
uint32_t
-gf_rsync_weak_checksum (unsigned char *buf, size_t len);
+gf_rsync_weak_checksum(unsigned char *buf, size_t len);
void
-gf_rsync_strong_checksum (unsigned char *buf, size_t len, unsigned char *sum);
+gf_rsync_strong_checksum(unsigned char *buf, size_t len, unsigned char *sum);
+void
+gf_rsync_md5_checksum(unsigned char *data, size_t len, unsigned char *md5);
#endif /* __CHECKSUM_H__ */
diff --git a/libglusterfs/src/glusterfs/circ-buff.h b/libglusterfs/src/glusterfs/circ-buff.h
new file mode 100644
index 00000000000..822345b641b
--- /dev/null
+++ b/libglusterfs/src/glusterfs/circ-buff.h
@@ -0,0 +1,61 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CB_H
+#define _CB_H
+
+#include "glusterfs/common-utils.h"
+
+#define BUFFER_SIZE 10
+#define TOTAL_SIZE BUFFER_SIZE + 1
+
+struct _circular_buffer {
+ struct timeval tv;
+ void *data;
+};
+
+typedef struct _circular_buffer circular_buffer_t;
+
+struct _buffer {
+ unsigned int w_index;
+ size_t size_buffer;
+ gf_boolean_t use_once;
+ /* This variable is assigned the proper value at the time of initing */
+ /* the buffer. It indicates, whether the buffer should be used once */
+ /* it becomes full. */
+
+ int used_len;
+ /* indicates the amount of circular buffer used. */
+
+ circular_buffer_t **cb;
+ void (*destroy_buffer_data)(void *data);
+ pthread_mutex_t lock;
+};
+
+typedef struct _buffer buffer_t;
+
+int
+cb_add_entry_buffer(buffer_t *buffer, void *item);
+
+void
+cb_buffer_show(buffer_t *buffer);
+
+buffer_t *
+cb_buffer_new(size_t buffer_size, gf_boolean_t use_buffer_once,
+ void (*destroy_data)(void *data));
+
+void
+cb_buffer_destroy(buffer_t *buffer);
+
+void
+cb_buffer_dump(buffer_t *buffer, void *data,
+ int(fn)(circular_buffer_t *buffer, void *data));
+
+#endif /* _CB_H */
diff --git a/libglusterfs/src/glusterfs/client_t.h b/libglusterfs/src/glusterfs/client_t.h
new file mode 100644
index 00000000000..a2c508e1d5c
--- /dev/null
+++ b/libglusterfs/src/glusterfs/client_t.h
@@ -0,0 +1,147 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CLIENT_T_H
+#define _CLIENT_T_H
+
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/locking.h" /* for gf_lock_t, not included by glusterfs.h */
+#include "glusterfs/atomic.h" /* for gf_atomic_t */
+
+/* auth_data structure is required by RPC layer. But as it is also used in
+ * client_t structure validation, comparision, it is critical that it is defined
+ * in the larger scope of libglusterfs, instead of libgfrpc. With this change,
+ * even RPC will use this structure */
+#define GF_CLIENTT_AUTH_BYTES 400
+typedef struct client_auth_data {
+ int flavour;
+ int datalen;
+ char authdata[GF_CLIENTT_AUTH_BYTES];
+} client_auth_data_t;
+
+struct client_ctx {
+ void *ctx_key;
+ void *ctx_value;
+};
+
+typedef struct _client {
+ struct {
+ /* e.g. protocol/server stashes its ctx here */
+ gf_lock_t lock;
+ unsigned short count;
+ struct client_ctx *ctx;
+ } scratch_ctx;
+ gf_atomic_t bind;
+ gf_atomic_t count;
+ xlator_t *bound_xl;
+ xlator_t *this;
+ int tbl_index;
+ char *client_uid;
+ char *client_name;
+ struct {
+ int flavour;
+ size_t len;
+ char *data;
+ char *username;
+ char *passwd;
+ } auth;
+
+ /* subdir_mount */
+ char *subdir_mount;
+ inode_t *subdir_inode;
+ uuid_t subdir_gfid;
+ int32_t opversion;
+ /* Variable to save fd_count for detach brick */
+ gf_atomic_t fd_cnt;
+} client_t;
+
+#define GF_CLIENTCTX_INITIAL_SIZE 8
+
+struct client_table_entry {
+ client_t *client;
+ int next_free;
+};
+typedef struct client_table_entry cliententry_t;
+
+struct clienttable {
+ unsigned int max_clients;
+ gf_lock_t lock;
+ cliententry_t *cliententries;
+ int first_free;
+ client_t *local;
+};
+typedef struct clienttable clienttable_t;
+
+#define GF_CLIENTTABLE_INITIAL_SIZE 128
+
+/* Signifies no more entries in the client table. */
+#define GF_CLIENTTABLE_END -1
+
+/* This is used to invalidate
+ * the next_free value in an cliententry that has been allocated
+ */
+#define GF_CLIENTENTRY_ALLOCATED -2
+
+void
+gf_client_put(client_t *client, gf_boolean_t *detached);
+
+clienttable_t *
+gf_clienttable_alloc(void);
+
+client_t *
+gf_client_ref(client_t *client);
+
+void
+gf_client_unref(client_t *client);
+
+int
+gf_client_dump_fdtable_to_dict(xlator_t *this, dict_t *dict);
+
+int
+gf_client_dump_fdtable(xlator_t *this);
+
+int
+gf_client_dump_inodes_to_dict(xlator_t *this, dict_t *dict);
+
+int
+gf_client_dump_inodes(xlator_t *this);
+
+void *
+client_ctx_set(client_t *client, void *key, void *value);
+
+int
+client_ctx_get(client_t *client, void *key, void **value);
+
+int
+client_ctx_del(client_t *client, void *key, void **value);
+
+void
+client_ctx_dump(client_t *client, char *prefix);
+
+int
+gf_client_dump_fdtables_to_dict(xlator_t *this, dict_t *dict);
+
+int
+gf_client_dump_fdtables(xlator_t *this);
+
+int
+gf_client_dump_inodes_to_dict(xlator_t *this, dict_t *dict);
+
+int
+gf_client_dump_inodes(xlator_t *this);
+
+int
+gf_client_disconnect(client_t *client);
+
+client_t *
+gf_client_get(xlator_t *this, client_auth_data_t *cred, char *client_uid,
+ char *subdir_mount);
+
+#endif /* _CLIENT_T_H */
diff --git a/libglusterfs/src/glusterfs/cluster-syncop.h b/libglusterfs/src/glusterfs/cluster-syncop.h
new file mode 100644
index 00000000000..d0ad5ed548c
--- /dev/null
+++ b/libglusterfs/src/glusterfs/cluster-syncop.h
@@ -0,0 +1,227 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CLUSTER_SYNCOP_H
+#define _CLUSTER_SYNCOP_H
+
+#include <sys/time.h>
+#include <pthread.h>
+#include <ucontext.h>
+
+#include "glusterfs/defaults.h"
+#include "glusterfs/default-args.h"
+#include "glusterfs/syncop.h"
+
+/*********************************************************************
+ *
+ * PARALLEL_FOP_ONLIST:
+ * Performs file operations in parallel on bricks.
+ * This macro expects a helper function(func) to implement the
+ * functionality.
+ *
+ ********************************************************************/
+#define PARALLEL_FOP_ONLIST(subvols, on, numsubvols, replies, frame, func, \
+ args...) \
+ do { \
+ int __i = 0; \
+ int __count = 0; \
+ cluster_local_t __local = { \
+ 0, \
+ }; \
+ void *__old_local = frame->local; \
+ \
+ __local.replies = replies; \
+ cluster_replies_wipe(replies, numsubvols); \
+ for (__i = 0; __i < numsubvols; __i++) \
+ INIT_LIST_HEAD(&replies[__i].entries.list); \
+ if (syncbarrier_init(&__local.barrier)) \
+ break; \
+ frame->local = &__local; \
+ for (__i = 0; __i < numsubvols; __i++) { \
+ if (on[__i]) { \
+ __count++; \
+ } \
+ } \
+ __local.barrier.waitfor = __count; \
+ for (__i = 0; __i < numsubvols; __i++) { \
+ if (on[__i]) { \
+ func(frame, subvols[__i], __i, ##args); \
+ } \
+ } \
+ syncbarrier_wait(&__local.barrier, __count); \
+ syncbarrier_destroy(&__local.barrier); \
+ frame->local = __old_local; \
+ STACK_RESET(frame->root); \
+ } while (0)
+
+typedef struct cluster_local_ {
+ default_args_cbk_t *replies;
+ syncbarrier_t barrier;
+} cluster_local_t;
+
+int32_t
+cluster_lookup(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+int32_t
+cluster_setattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+int32_t
+cluster_getxattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
+int32_t
+cluster_setxattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int
+cluster_inodelk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *locked_on,
+ call_frame_t *frame, xlator_t *this, char *dom, inode_t *inode,
+ off_t off, size_t size);
+
+int
+cluster_uninodelk(xlator_t **subvols, unsigned char *locked_on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, char *dom,
+ inode_t *inode, off_t off, size_t size);
+
+int
+cluster_entrylk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *locked_on,
+ call_frame_t *frame, xlator_t *this, char *dom, inode_t *inode,
+ const char *name);
+
+int32_t
+cluster_rmdir(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata);
+
+int32_t
+cluster_unlink(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int
+cluster_mkdir(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
+
+int32_t
+cluster_readlink(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata);
+
+int
+cluster_symlink(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata);
+
+int32_t
+cluster_link(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int
+cluster_mknod(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata);
+
+int
+cluster_unentrylk(xlator_t **subvols, unsigned char *locked_on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, char *dom,
+ inode_t *inode, const char *name);
+
+int
+cluster_tryentrylk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *locked_on,
+ call_frame_t *frame, xlator_t *this, char *dom,
+ inode_t *inode, const char *name);
+
+int32_t
+cluster_fxattrop(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+cluster_xattrop(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+cluster_fstat(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
+
+int32_t
+cluster_ftruncate(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata);
+
+int32_t
+cluster_open(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+int
+cluster_tryinodelk(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *locked_on,
+ call_frame_t *frame, xlator_t *this, char *dom,
+ inode_t *inode, off_t off, size_t size);
+
+int32_t
+cluster_fsetattr(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+int32_t
+cluster_put(xlator_t **subvols, unsigned char *on, int numsubvols,
+ default_args_cbk_t *replies, unsigned char *output,
+ call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, uint32_t flags, struct iovec *vector, int32_t count,
+ off_t offset, struct iobref *iobref, dict_t *xattr, dict_t *xdata);
+
+void
+cluster_replies_wipe(default_args_cbk_t *replies, int num_subvols);
+
+int32_t
+cluster_fop_success_fill(default_args_cbk_t *replies, int numsubvols,
+ unsigned char *success);
+
+int32_t
+cluster_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int
+cluster_tiebreaker_inodelk(xlator_t **subvols, unsigned char *on,
+ int numsubvols, default_args_cbk_t *replies,
+ unsigned char *locked_on, call_frame_t *frame,
+ xlator_t *this, char *dom, inode_t *inode, off_t off,
+ size_t size);
+#endif /* !_CLUSTER_SYNCOP_H */
diff --git a/libglusterfs/src/glusterfs/common-utils.h b/libglusterfs/src/glusterfs/common-utils.h
new file mode 100644
index 00000000000..f297fdab5c9
--- /dev/null
+++ b/libglusterfs/src/glusterfs/common-utils.h
@@ -0,0 +1,1256 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _COMMON_UTILS_H
+#define _COMMON_UTILS_H
+
+#include <stdint.h>
+#include <sys/uio.h>
+#include <netdb.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <pthread.h>
+#include <unistd.h>
+#include <openssl/md5.h>
+#ifndef GF_BSD_HOST_OS
+#include <alloca.h>
+#endif
+#include <limits.h>
+#include <fnmatch.h>
+#include <uuid/uuid.h>
+
+/* FreeBSD, etc. */
+#ifndef __BITS_PER_LONG
+#define __BITS_PER_LONG (CHAR_BIT * (sizeof(long)))
+#endif
+
+#ifndef ffsll
+#define ffsll(x) __builtin_ffsll(x)
+#endif
+
+void
+trap(void);
+
+#define GF_UNIVERSAL_ANSWER 42 /* :O */
+
+/* To solve type punned error */
+#define VOID(ptr) ((void **)((void *)ptr))
+
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/compat-uuid.h"
+#include "glusterfs/iatt.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+#define STRINGIFY(val) #val
+#define TOSTRING(val) STRINGIFY(val)
+
+#define alloca0(size) \
+ ({ \
+ void *__ptr; \
+ __ptr = alloca(size); \
+ memset(__ptr, 0, size); \
+ __ptr; \
+ })
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#define gf_roof(a, b) ((((a) + (b)-1) / ((b != 0) ? (b) : 1)) * (b))
+#define gf_floor(a, b) (((a) / ((b != 0) ? (b) : 1)) * (b))
+
+#define IPv4_ADDR_SIZE 32
+
+#define GF_UNIT_KB 1024ULL
+#define GF_UNIT_MB 1048576ULL
+#define GF_UNIT_GB 1073741824ULL
+#define GF_UNIT_TB 1099511627776ULL
+#define GF_UNIT_PB 1125899906842624ULL
+
+#define GF_UNIT_B_STRING "B"
+#define GF_UNIT_KB_STRING "KB"
+#define GF_UNIT_MB_STRING "MB"
+#define GF_UNIT_GB_STRING "GB"
+#define GF_UNIT_TB_STRING "TB"
+#define GF_UNIT_PB_STRING "PB"
+
+#define GF_UNIT_PERCENT_STRING "%"
+
+#define GEOREP "geo-replication"
+#define GLUSTERD_NAME "glusterd"
+
+#define GF_SELINUX_XATTR_KEY "security.selinux"
+
+#define WIPE(statp) \
+ do { \
+ typeof(*statp) z = { \
+ 0, \
+ }; \
+ if (statp) \
+ *statp = z; \
+ } while (0)
+
+#define IS_EXT_FS(fs_name) \
+ (!strcmp(fs_name, "ext2") || !strcmp(fs_name, "ext3") || \
+ !strcmp(fs_name, "ext4"))
+
+/* process mode definitions */
+#define GF_SERVER_PROCESS 0
+#define GF_CLIENT_PROCESS 1
+#define GF_GLUSTERD_PROCESS 2
+
+/* Defining this here as it is needed by glusterd for setting
+ * nfs port in volume status.
+ */
+#define GF_NFS3_PORT 2049
+
+#define GF_CLIENT_PORT_CEILING 1024
+#define GF_IANA_PRIV_PORTS_START 49152 /* RFC 6335 */
+#define GF_CLNT_INSECURE_PORT_CEILING (GF_IANA_PRIV_PORTS_START - 1)
+#define GF_PORT_MAX 65535
+#define GF_PORT_ARRAY_SIZE ((GF_PORT_MAX + 7) / 8)
+#define GF_LOCK_TIMER 180
+#define GF_MINUTE_IN_SECONDS 60
+#define GF_HOUR_IN_SECONDS (60 * 60)
+#define GF_DAY_IN_SECONDS (24 * 60 * 60)
+#define GF_WEEK_IN_SECONDS (7 * 24 * 60 * 60)
+#define GF_SEC_IN_NS 1000000000
+#define GF_MS_IN_NS 1000000
+#define GF_US_IN_NS 1000
+
+/* Default timeout for both barrier and changelog translator */
+#define BARRIER_TIMEOUT "120"
+
+/* Default value of signing waiting time to sign a file for bitrot */
+#define SIGNING_TIMEOUT "120"
+#define BR_WORKERS "4"
+
+/* xxhash */
+#define GF_XXH64_DIGEST_LENGTH 8
+#define GF_XXHSUM64_DEFAULT_SEED 0
+
+/* Shard */
+#define GF_XATTR_SHARD_FILE_SIZE "trusted.glusterfs.shard.file-size"
+#define SHARD_ROOT_GFID "be318638-e8a0-4c6d-977d-7a937aa84806"
+#define DOT_SHARD_REMOVE_ME_GFID "77dd5a45-dbf5-4592-b31b-b440382302e9"
+
+/* Lease: buffer length for stringified lease id
+ * Format: 4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum-4hexnum
+ * Eg:6c69-6431-2d63-6c6e-7431-0000-0000-0000
+ */
+#define GF_LEASE_ID_BUF_SIZE ((LEASE_ID_SIZE * 2) + (LEASE_ID_SIZE / 2))
+
+#define GF_PERCENTAGE(val, total) (((val)*100) / (total))
+
+/* pthread related */
+/* as per the man page, thread-name should be at max 16 bytes */
+/* with prefix of 'glfs_' (5), we are left with 11 more bytes */
+#define GF_THREAD_NAME_LIMIT 16
+#define GF_THREAD_NAME_PREFIX "glfs_"
+
+/* Advisory buffer size for formatted timestamps (see gf_time_fmt) */
+#define GF_TIMESTR_SIZE 256
+
+/*
+ * we could have initialized these as +ve values and treated
+ * them as negative while comparing etc.. (which would have
+ * saved us with the pain of assigning values), but since we
+ * only have a few clients that use this feature, it's okay.
+ */
+enum _gf_special_pid {
+ GF_CLIENT_PID_MAX = 0,
+ GF_CLIENT_PID_GSYNCD = -1,
+ GF_CLIENT_PID_HADOOP = -2,
+ GF_CLIENT_PID_DEFRAG = -3,
+ GF_CLIENT_PID_NO_ROOT_SQUASH = -4,
+ GF_CLIENT_PID_QUOTA_MOUNT = -5,
+ GF_CLIENT_PID_SELF_HEALD = -6,
+ GF_CLIENT_PID_GLFS_HEAL = -7,
+ GF_CLIENT_PID_BITD = -8,
+ GF_CLIENT_PID_SCRUB = -9,
+ GF_CLIENT_PID_TIER_DEFRAG = -10,
+ GF_SERVER_PID_TRASH = -11,
+ GF_CLIENT_PID_ADD_REPLICA_MOUNT = -12,
+ GF_CLIENT_PID_SET_UTIME = -13,
+};
+
+enum _gf_xlator_ipc_targets {
+ GF_IPC_TARGET_CHANGELOG = 0,
+ GF_IPC_TARGET_CTR = 1,
+ GF_IPC_TARGET_UPCALL = 2
+};
+
+typedef enum _gf_special_pid gf_special_pid_t;
+typedef enum _gf_xlator_ipc_targets _gf_xlator_ipc_targets_t;
+
+/* Array to hold custom xattr keys */
+extern char *xattrs_to_heal[];
+
+char **
+get_xattrs_to_heal();
+
+/* The DHT file rename operation is not a straightforward rename.
+ * It involves creating linkto and linkfiles, and can unlink or rename the
+ * source file depending on the hashed and cached subvols for the source
+ * and target files. this makes it difficult for geo-rep to figure out that
+ * a rename operation has taken place.
+ *
+ * We now send a special key and the values of the source and target pargfids
+ * and basenames to indicate to changelog that the operation in question
+ * should be treated as a rename. We are explicitly filling and sending this
+ * as a binary value in the dictionary as the unlink op will not have the
+ * source file information. The lengths of the src and target basenames
+ * are used to calculate where to start reading the names in the structure.
+ * XFS allows a max of 255 chars for filenames but other file systems might
+ * not have such restrictions
+ */
+typedef struct dht_changelog_rename_info {
+ uuid_t old_pargfid;
+ uuid_t new_pargfid;
+ int32_t oldname_len;
+ int32_t newname_len;
+ char buffer[1];
+} dht_changelog_rename_info_t;
+
+typedef int (*gf_cmp)(void *, void *);
+
+struct _dict;
+
+struct dnscache {
+ struct _dict *cache_dict;
+ time_t ttl;
+};
+
+struct dnscache_entry {
+ char *ip;
+ char *fqdn;
+ time_t timestamp;
+};
+
+struct dnscache6 {
+ struct addrinfo *first;
+ struct addrinfo *next;
+};
+
+struct list_node {
+ void *ptr;
+ struct list_head list;
+};
+
+extern char *vol_type_str[];
+
+struct list_node *
+list_node_add(void *ptr, struct list_head *list);
+struct list_node *
+list_node_add_order(void *ptr, struct list_head *list,
+ int (*compare)(struct list_head *, struct list_head *));
+void
+list_node_del(struct list_node *node);
+
+struct dnscache *
+gf_dnscache_init(time_t ttl);
+void
+gf_dnscache_deinit(struct dnscache *cache);
+struct dnscache_entry *
+gf_dnscache_entry_init(void);
+void
+gf_dnscache_entry_deinit(struct dnscache_entry *entry);
+char *
+gf_rev_dns_lookup_cached(const char *ip, struct dnscache *dnscache);
+
+char *
+gf_resolve_path_parent(const char *path);
+
+void
+gf_global_variable_init(void);
+
+int32_t
+gf_resolve_ip6(const char *hostname, uint16_t port, int family, void **dnscache,
+ struct addrinfo **addr_info);
+
+void
+gf_log_dump_graph(FILE *specfp, glusterfs_graph_t *graph);
+void
+gf_print_trace(int32_t signal, glusterfs_ctx_t *ctx);
+int
+gf_set_log_file_path(cmd_args_t *cmd_args, glusterfs_ctx_t *ctx);
+int
+gf_set_log_ident(cmd_args_t *cmd_args);
+
+int
+gf_process_getspec_servers_list(cmd_args_t *cmd_args, const char *servers_list);
+int
+gf_set_volfile_server_common(cmd_args_t *cmd_args, const char *host,
+ const char *transport, int port);
+
+static inline void
+BIT_SET(unsigned char *array, unsigned int index)
+{
+ unsigned int offset = index / 8;
+ unsigned int shift = index % 8;
+
+ array[offset] |= (1 << shift);
+}
+
+static inline void
+BIT_CLEAR(unsigned char *array, unsigned int index)
+{
+ unsigned int offset = index / 8;
+ unsigned int shift = index % 8;
+
+ array[offset] &= ~(1 << shift);
+}
+
+static inline unsigned int
+BIT_VALUE(unsigned char *array, unsigned int index)
+{
+ unsigned int offset = index / 8;
+ unsigned int shift = index % 8;
+
+ return (array[offset] >> shift) & 0x1;
+}
+
+#define VECTORSIZE(count) (count * (sizeof(struct iovec)))
+
+#define STRLEN_0(str) (strlen(str) + 1)
+
+#define VALIDATE_OR_GOTO(arg, label) \
+ do { \
+ if (!arg) { \
+ errno = EINVAL; \
+ gf_msg_callingfn((this ? (this->name) : "(Govinda! Govinda!)"), \
+ GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG, \
+ "invalid argument: " #arg); \
+ goto label; \
+ } \
+ } while (0)
+
+#define GF_VALIDATE_OR_GOTO(name, arg, label) \
+ do { \
+ if (!arg) { \
+ errno = EINVAL; \
+ gf_msg_callingfn(name, GF_LOG_ERROR, errno, LG_MSG_INVALID_ARG, \
+ "invalid argument: " #arg); \
+ goto label; \
+ } \
+ } while (0)
+
+#define GF_VALIDATE_OR_GOTO_WITH_ERROR(name, arg, label, errno, error) \
+ do { \
+ if (!arg) { \
+ errno = error; \
+ gf_msg_callingfn(name, GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, \
+ "invalid argument: " #arg); \
+ goto label; \
+ } \
+ } while (0)
+
+#define GF_CHECK_ALLOC(arg, retval, label) \
+ do { \
+ if (!(arg)) { \
+ retval = -ENOMEM; \
+ goto label; \
+ } \
+ } while (0)
+
+#define GF_CHECK_ALLOC_AND_LOG(name, item, retval, msg, errlabel) \
+ do { \
+ if (!(item)) { \
+ (retval) = -ENOMEM; \
+ gf_msg(name, GF_LOG_CRITICAL, ENOMEM, LG_MSG_NO_MEMORY, (msg)); \
+ goto errlabel; \
+ } \
+ } while (0)
+
+#define GF_ASSERT_AND_GOTO_WITH_ERROR(name, arg, label, errno, error) \
+ do { \
+ if (!arg) { \
+ GF_ASSERT(0); \
+ errno = error; \
+ goto label; \
+ } \
+ } while (0)
+
+#define GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO(name, arg, label) \
+ do { \
+ GF_VALIDATE_OR_GOTO(name, arg, label); \
+ if ((arg[0]) != '/') { \
+ errno = EINVAL; \
+ gf_msg_callingfn(name, GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG, \
+ "invalid argument: " #arg); \
+ goto label; \
+ } \
+ } while (0)
+
+#define GF_REMOVE_SLASH_FROM_PATH(path, string) \
+ do { \
+ int i = 0; \
+ for (i = 1; i < strlen(path); i++) { \
+ string[i - 1] = path[i]; \
+ if (string[i - 1] == '/') \
+ string[i - 1] = '-'; \
+ } \
+ } while (0)
+
+#define GF_REMOVE_INTERNAL_XATTR(pattern, dict) \
+ do { \
+ if (!dict) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_DICT_NULL, \
+ "dict is null"); \
+ break; \
+ } \
+ dict_foreach_fnmatch(dict, pattern, dict_remove_foreach_fn, NULL); \
+ } while (0)
+
+#define GF_IF_INTERNAL_XATTR_GOTO(pattern, dict, op_errno, label) \
+ do { \
+ if (!dict) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_DICT_NULL, \
+ "setxattr dict is null"); \
+ goto label; \
+ } \
+ if (dict_foreach_fnmatch(dict, pattern, dict_null_foreach_fn, NULL) > \
+ 0) { \
+ op_errno = EPERM; \
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, LG_MSG_NO_PERM, \
+ "attempt to set internal" \
+ " xattr: %s", \
+ pattern); \
+ goto label; \
+ } \
+ } while (0)
+
+#define GF_IF_NATIVE_XATTR_GOTO(pattern, key, op_errno, label) \
+ do { \
+ if (!key) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_NO_KEY, \
+ "no key for removexattr"); \
+ goto label; \
+ } \
+ if (!fnmatch(pattern, key, 0)) { \
+ op_errno = EPERM; \
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, LG_MSG_NO_PERM, \
+ "attempt to remove internal " \
+ "xattr: %s", \
+ key); \
+ goto label; \
+ } \
+ } while (0)
+
+#ifdef DEBUG
+#define GF_ASSERT(x) assert(x);
+#else
+#define GF_ASSERT(x) \
+ do { \
+ if (!(x)) { \
+ gf_msg_callingfn("", GF_LOG_ERROR, 0, LG_MSG_ASSERTION_FAILED, \
+ "Assertion failed: " #x); \
+ } \
+ } while (0)
+#endif
+
+/* Compile-time assert, borrowed from Linux kernel. */
+#ifdef HAVE_STATIC_ASSERT
+#define GF_STATIC_ASSERT(expr, ...) \
+ __gf_static_assert(expr, ##__VA_ARGS__, #expr)
+#define __gf_static_assert(expr, msg, ...) _Static_assert(expr, msg)
+#else
+#define GF_STATIC_ASSERT(expr, ...)
+#endif
+
+#define GF_ABORT(msg...) \
+ do { \
+ gf_msg_callingfn("", GF_LOG_CRITICAL, 0, LG_MSG_ASSERTION_FAILED, \
+ "Assertion failed: " msg); \
+ abort(); \
+ } while (0)
+
+#define GF_UUID_ASSERT(u) \
+ if (gf_uuid_is_null(u)) \
+ GF_ASSERT(!"uuid null");
+
+#define GF_IGNORE_IF_GSYNCD_SAFE_ERROR(frame, op_errno) \
+ (((frame->root->pid == GF_CLIENT_PID_GSYNCD) && \
+ (op_errno == EEXIST || op_errno == ENOENT)) \
+ ? 0 \
+ : 1)
+
+union gf_sock_union {
+ struct sockaddr_storage storage;
+ struct sockaddr_in6 sin6;
+ struct sockaddr_in sin;
+ struct sockaddr sa;
+};
+
+#define GF_HIDDEN_PATH ".glusterfs"
+#define GF_UNLINK_PATH GF_HIDDEN_PATH "/unlink"
+#define GF_LANDFILL_PATH GF_HIDDEN_PATH "/landfill"
+
+#define IOV_MIN(n) min(IOV_MAX, n)
+
+static inline gf_boolean_t
+gf_irrelevant_entry(struct dirent *entry)
+{
+ GF_ASSERT(entry);
+
+ return (!strcmp(entry->d_name, ".") ||
+ !fnmatch("*.tmp", entry->d_name, 0) ||
+ !strcmp(entry->d_name, ".."));
+}
+
+static inline void
+iov_free(struct iovec *vector, int count)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ FREE(vector[i].iov_base);
+
+ GF_FREE(vector);
+}
+
+static inline int
+iov_length(const struct iovec *vector, int count)
+{
+ int i = 0;
+ size_t size = 0;
+
+ for (i = 0; i < count; i++)
+ size += vector[i].iov_len;
+
+ return size;
+}
+
+static inline struct iovec *
+iov_dup(const struct iovec *vector, int count)
+{
+ int bytecount = 0;
+ struct iovec *newvec = NULL;
+
+ bytecount = (count * sizeof(struct iovec));
+ newvec = GF_MALLOC(bytecount, gf_common_mt_iovec);
+ if (newvec != NULL) {
+ memcpy(newvec, vector, bytecount);
+ }
+
+ return newvec;
+}
+
+typedef struct _iov_iter {
+ const struct iovec *iovec;
+ void *ptr;
+ uint32_t len;
+ uint32_t count;
+} iov_iter_t;
+
+static inline bool
+iov_iter_init(iov_iter_t *iter, const struct iovec *iovec, uint32_t count,
+ uint32_t offset)
+{
+ uint32_t len;
+
+ while (count > 0) {
+ count--;
+ len = iovec->iov_len;
+ if (offset < len) {
+ iter->ptr = iovec->iov_base + offset;
+ iter->len = len - offset;
+ iter->iovec = iovec + 1;
+ iter->count = count;
+
+ return true;
+ }
+ offset -= len;
+ }
+
+ memset(iter, 0, sizeof(*iter));
+
+ return false;
+}
+
+static inline bool
+iov_iter_end(iov_iter_t *iter)
+{
+ return iter->count == 0;
+}
+
+static inline bool
+iov_iter_next(iov_iter_t *iter, uint32_t size)
+{
+ GF_ASSERT(size <= iter->len);
+
+ if (iter->len > size) {
+ iter->len -= size;
+ iter->ptr += size;
+
+ return true;
+ }
+ if (iter->count > 0) {
+ iter->count--;
+ iter->ptr = iter->iovec->iov_base;
+ iter->len = iter->iovec->iov_len;
+ iter->iovec++;
+
+ return true;
+ }
+
+ memset(iter, 0, sizeof(*iter));
+
+ return false;
+}
+
+static inline uint32_t
+iov_iter_copy(iov_iter_t *dst, iov_iter_t *src, uint32_t size)
+{
+ uint32_t len;
+
+ len = src->len;
+ if (len > dst->len) {
+ len = dst->len;
+ }
+ if (len > size) {
+ len = size;
+ }
+ memcpy(dst->ptr, src->ptr, len);
+
+ return len;
+}
+
+static inline uint32_t
+iov_iter_to_iovec(iov_iter_t *iter, struct iovec *iovec, int32_t idx,
+ uint32_t size)
+{
+ uint32_t len;
+
+ len = iter->len;
+ if (len > size) {
+ len = size;
+ }
+ iovec[idx].iov_base = iter->ptr;
+ iovec[idx].iov_len = len;
+
+ return len;
+}
+
+static inline int
+iov_subset(struct iovec *src, int src_count, uint32_t start, uint32_t size,
+ struct iovec **dst, int32_t dst_count)
+{
+ struct iovec iovec[src_count];
+ iov_iter_t iter;
+ uint32_t len;
+ int32_t idx;
+
+ if ((size == 0) || !iov_iter_init(&iter, src, src_count, start)) {
+ return 0;
+ }
+
+ idx = 0;
+ do {
+ len = iov_iter_to_iovec(&iter, iovec, idx, size);
+ idx++;
+ size -= len;
+ } while ((size > 0) && iov_iter_next(&iter, len));
+
+ if (*dst == NULL) {
+ *dst = iov_dup(iovec, idx);
+ if (*dst == NULL) {
+ return -1;
+ }
+ } else if (idx > dst_count) {
+ return -1;
+ } else {
+ memcpy(*dst, iovec, idx * sizeof(struct iovec));
+ }
+
+ return idx;
+}
+
+static inline int
+iov_skip(struct iovec *iovec, uint32_t count, uint32_t size)
+{
+ uint32_t len, idx;
+
+ idx = 0;
+ while ((size > 0) && (idx < count)) {
+ len = iovec[idx].iov_len;
+ if (len > size) {
+ iovec[idx].iov_len -= size;
+ iovec[idx].iov_base += size;
+ break;
+ }
+ idx++;
+ size -= len;
+ }
+
+ if (idx > 0) {
+ memmove(iovec, iovec + idx, (count - idx) * sizeof(struct iovec));
+ }
+
+ return count - idx;
+}
+
+static inline size_t
+iov_range_copy(const struct iovec *dst, uint32_t dst_count, uint32_t dst_offset,
+ const struct iovec *src, uint32_t src_count, uint32_t src_offset,
+ uint32_t size)
+{
+ iov_iter_t src_iter, dst_iter;
+ uint32_t len, total;
+
+ if ((size == 0) || !iov_iter_init(&src_iter, src, src_count, src_offset) ||
+ !iov_iter_init(&dst_iter, dst, dst_count, dst_offset)) {
+ return 0;
+ }
+
+ total = 0;
+ do {
+ len = iov_iter_copy(&dst_iter, &src_iter, size);
+ total += len;
+ size -= len;
+ } while ((size > 0) && iov_iter_next(&src_iter, len) &&
+ iov_iter_next(&dst_iter, len));
+
+ return total;
+}
+
+static inline void
+iov_unload(char *buf, const struct iovec *vector, int count)
+{
+ int i;
+ int copied = 0;
+
+ for (i = 0; i < count; i++) {
+ memcpy(buf + copied, vector[i].iov_base, vector[i].iov_len);
+ copied += vector[i].iov_len;
+ }
+}
+
+static inline size_t
+iov_load(const struct iovec *vector, int count, char *buf, int size)
+{
+ size_t left = size;
+ size_t cp = 0;
+ int ret = 0;
+ int i = 0;
+
+ while (left && i < count) {
+ cp = min(vector[i].iov_len, left);
+ if (vector[i].iov_base != buf + (size - left))
+ memcpy(vector[i].iov_base, buf + (size - left), cp);
+ ret += cp;
+ left -= cp;
+ if (left)
+ i++;
+ }
+
+ return ret;
+}
+
+static inline size_t
+iov_copy(const struct iovec *dst, int dcnt, const struct iovec *src, int scnt)
+{
+ return iov_range_copy(dst, dcnt, 0, src, scnt, 0, UINT32_MAX);
+}
+
+/* based on the amusing discussion @ https://rusty.ozlabs.org/?p=560 */
+static bool
+memeqzero(const void *data, size_t length)
+{
+ const unsigned char *p = data;
+ size_t len;
+
+ /* Check first 16 bytes manually */
+ for (len = 0; len < 16; len++) {
+ if (!length)
+ return true;
+ if (*p)
+ return false;
+ p++;
+ length--;
+ }
+
+ /* Now we know that's zero, memcmp with self. */
+ return memcmp(data, p, length) == 0;
+}
+
+static inline int
+mem_0filled(const char *buf, size_t size)
+{
+ return !memeqzero(buf, size);
+}
+
+static inline int
+iov_0filled(const struct iovec *vector, int count)
+{
+ int i = 0;
+ int ret = 0;
+
+ for (i = 0; i < count; i++) {
+ ret = mem_0filled(vector[i].iov_base, vector[i].iov_len);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+typedef enum {
+ gf_timefmt_default = 0,
+ gf_timefmt_FT = 0, /* YYYY-MM-DD hh:mm:ss */
+ gf_timefmt_Ymd_T, /* YYYY/MM-DD-hh:mm:ss */
+ gf_timefmt_bdT, /* MMM DD hh:mm:ss */
+ gf_timefmt_F_HMS, /* YYYY-MM-DD hhmmss */
+ gf_timefmt_dirent,
+ gf_timefmt_s,
+ gf_timefmt_last
+} gf_timefmts;
+
+static inline char *
+gf_time_fmt_tv(char *dst, size_t sz_dst, struct timeval *tv, unsigned int fmt)
+{
+ extern void _gf_timestuff(const char ***, const char ***);
+ static gf_timefmts timefmt_last = (gf_timefmts)-1;
+ static const char **fmts;
+ static const char **zeros;
+ struct tm tm, *res;
+ int localtime = 0;
+ int len = 0;
+ int pos = 0;
+
+ if (timefmt_last == ((gf_timefmts)-1)) {
+ _gf_timestuff(&fmts, &zeros);
+ timefmt_last = gf_timefmt_last;
+ }
+ if (timefmt_last <= fmt) {
+ fmt = gf_timefmt_default;
+ }
+ localtime = gf_log_get_localtime();
+ res = localtime ? localtime_r(&tv->tv_sec, &tm)
+ : gmtime_r(&tv->tv_sec, &tm);
+ if (tv->tv_sec && (res != NULL)) {
+ len = strftime(dst, sz_dst, fmts[fmt], &tm);
+ if (len == 0)
+ return dst;
+ pos += len;
+ if (tv->tv_usec >= 0) {
+ len = snprintf(dst + pos, sz_dst - pos, ".%" GF_PRI_SUSECONDS,
+ tv->tv_usec);
+ if (len >= sz_dst - pos)
+ return dst;
+ pos += len;
+ }
+ strftime(dst + pos, sz_dst - pos, " %z", &tm);
+ } else {
+ strncpy(dst, "N/A", sz_dst);
+ }
+ return dst;
+}
+
+static inline char *
+gf_time_fmt(char *dst, size_t sz_dst, time_t utime, unsigned int fmt)
+{
+ struct timeval tv = {utime, -1};
+
+ return gf_time_fmt_tv(dst, sz_dst, &tv, fmt);
+}
+
+/* This function helps us use gfid (unique identity) to generate inode's unique
+ * number in glusterfs.
+ */
+ino_t
+gfid_to_ino(uuid_t gfid);
+
+int
+mkdir_p(char *path, mode_t mode, gf_boolean_t allow_symlinks);
+/*
+ * rounds up nr to power of two. If nr is already a power of two, just returns
+ * nr
+ */
+
+int
+gf_lstat_dir(const char *path, struct stat *stbuf_in);
+
+int32_t
+gf_roundup_power_of_two(int32_t nr);
+
+/*
+ * rounds up nr to next power of two. If nr is already a power of two, next
+ * power of two is returned.
+ */
+
+int32_t
+gf_roundup_next_power_of_two(int32_t nr);
+
+char *
+gf_trim(char *string);
+int
+gf_volume_name_validate(const char *volume_name);
+
+int
+gf_string2long(const char *str, long *n);
+int
+gf_string2ulong(const char *str, unsigned long *n);
+int
+gf_string2int(const char *str, int *n);
+int
+gf_string2uint(const char *str, unsigned int *n);
+int
+gf_string2double(const char *str, double *n);
+int
+gf_string2longlong(const char *str, long long *n);
+int
+gf_string2ulonglong(const char *str, unsigned long long *n);
+
+int
+gf_string2int8(const char *str, int8_t *n);
+int
+gf_string2int16(const char *str, int16_t *n);
+int
+gf_string2int32(const char *str, int32_t *n);
+int
+gf_string2int64(const char *str, int64_t *n);
+int
+gf_string2uint8(const char *str, uint8_t *n);
+int
+gf_string2uint16(const char *str, uint16_t *n);
+int
+gf_string2uint32(const char *str, uint32_t *n);
+int
+gf_string2uint64(const char *str, uint64_t *n);
+
+int
+gf_strstr(const char *str, const char *delim, const char *match);
+
+int
+gf_string2ulong_base10(const char *str, unsigned long *n);
+int
+gf_string2uint_base10(const char *str, unsigned int *n);
+int
+gf_string2uint8_base10(const char *str, uint8_t *n);
+int
+gf_string2uint16_base10(const char *str, uint16_t *n);
+int
+gf_string2uint32_base10(const char *str, uint32_t *n);
+int
+gf_string2uint64_base10(const char *str, uint64_t *n);
+int
+gf_string2bytesize_uint64(const char *str, uint64_t *n);
+int
+gf_string2bytesize_int64(const char *str, int64_t *n);
+int
+gf_string2percent_or_bytesize(const char *str, double *n,
+ gf_boolean_t *is_percent);
+
+int
+gf_string2boolean(const char *str, gf_boolean_t *b);
+int
+gf_strn2boolean(const char *str, const int len, gf_boolean_t *b);
+int
+gf_string2percent(const char *str, double *n);
+int
+gf_string2time(const char *str, uint32_t *n);
+
+int
+gf_lockfd(int fd);
+int
+gf_unlockfd(int fd);
+
+int
+get_checksum_for_file(int fd, uint32_t *checksum, int op_version);
+int
+log_base2(unsigned long x);
+
+int
+get_checksum_for_path(char *path, uint32_t *checksum, int op_version);
+int
+get_file_mtime(const char *path, time_t *stamp);
+char *
+gf_resolve_path_parent(const char *path);
+
+char *
+strtail(char *str, const char *pattern);
+void
+skipwhite(char **s);
+char *
+nwstrtail(char *str, char *pattern);
+/* returns a new string with nth word of given string. n>=1 */
+
+typedef struct token_iter {
+ char *end;
+ char sep;
+} token_iter_t;
+char *
+token_iter_init(char *str, char sep, token_iter_t *tit);
+gf_boolean_t
+next_token(char **tokenp, token_iter_t *tit);
+void
+drop_token(char *token, token_iter_t *tit);
+
+gf_boolean_t
+mask_match(const uint32_t a, const uint32_t b, const uint32_t m);
+gf_boolean_t
+gf_is_ip_in_net(const char *network, const char *ip_str);
+char
+valid_host_name(char *address, int length);
+char
+valid_ipv4_address(char *address, int length, gf_boolean_t wildcard_acc);
+char
+valid_ipv6_address(char *address, int length, gf_boolean_t wildcard_acc);
+char
+valid_internet_address(char *address, gf_boolean_t wildcard_acc,
+ gf_boolean_t cidr);
+gf_boolean_t
+valid_mount_auth_address(char *address);
+gf_boolean_t
+valid_ipv4_subnetwork(const char *address);
+gf_boolean_t
+gf_sock_union_equal_addr(union gf_sock_union *a, union gf_sock_union *b);
+char *
+gf_rev_dns_lookup(const char *ip);
+
+char *
+uuid_utoa(uuid_t uuid);
+char *
+uuid_utoa_r(uuid_t uuid, char *dst);
+char *
+lkowner_utoa(gf_lkowner_t *lkowner);
+char *
+lkowner_utoa_r(gf_lkowner_t *lkowner, char *dst, int len);
+char *
+leaseid_utoa(const char *lease_id);
+gf_boolean_t
+is_valid_lease_id(const char *lease_id);
+char *
+gf_leaseid_get(void);
+char *
+gf_existing_leaseid(void);
+
+void
+gf_array_insertionsort(void *a, int l, int r, size_t elem_size, gf_cmp cmp);
+int
+gf_is_str_int(const char *value);
+
+char *gf_uint64_2human_readable(uint64_t);
+int
+validate_brick_name(char *brick);
+char *
+get_host_name(char *word, char **host);
+char *
+get_path_name(char *word, char **path);
+void
+gf_path_strip_trailing_slashes(char *path);
+uint64_t
+get_mem_size(void);
+int
+gf_strip_whitespace(char *str, int len);
+int
+gf_canonicalize_path(char *path);
+char *
+generate_glusterfs_ctx_id(void);
+char *
+gf_get_reserved_ports(void);
+int
+gf_process_reserved_ports(unsigned char *ports, uint32_t ceiling);
+gf_boolean_t
+gf_ports_reserved(char *blocked_port, unsigned char *ports, uint32_t ceiling);
+int
+gf_get_hostname_from_ip(char *client_ip, char **hostname);
+gf_boolean_t
+gf_is_local_addr(char *hostname);
+gf_boolean_t
+gf_is_same_address(char *host1, char *host2);
+void
+gf_xxh64_wrapper(const unsigned char *data, size_t const len,
+ unsigned long long const seed, char *xxh64);
+int
+gf_gfid_generate_from_xxh64(uuid_t gfid, char *key);
+
+int
+gf_set_timestamp(const char *src, const char *dest);
+
+int
+gf_thread_create(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg, const char *name,
+ ...) __attribute__((__format__(__printf__, 5, 6)));
+
+int
+gf_thread_vcreate(pthread_t *thread, const pthread_attr_t *attr,
+ void *(*start_routine)(void *), void *arg, const char *name,
+ va_list args);
+int
+gf_thread_create_detached(pthread_t *thread, void *(*start_routine)(void *),
+ void *arg, const char *name, ...)
+ __attribute__((__format__(__printf__, 4, 5)));
+
+void
+gf_thread_set_name(pthread_t thread, const char *name, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+
+void
+gf_thread_set_vname(pthread_t thread, const char *name, va_list args);
+gf_boolean_t
+gf_is_pid_running(int pid);
+gf_boolean_t
+gf_is_service_running(char *pidfile, int *pid);
+gf_boolean_t
+gf_valid_pid(const char *pid, int length);
+int
+gf_skip_header_section(int fd, int header_len);
+
+struct iatt;
+struct _dict;
+
+gf_boolean_t
+dht_is_linkfile(struct iatt *buf, struct _dict *dict);
+
+int
+gf_check_log_format(const char *value);
+
+int
+gf_check_logger(const char *value);
+
+gf_boolean_t
+gf_compare_sockaddr(const struct sockaddr *addr1, const struct sockaddr *addr2);
+
+char *
+gf_backtrace_save(char *buf);
+
+void
+gf_backtrace_done(char *buf);
+
+gf_loglevel_t
+fop_log_level(glusterfs_fop_t fop, int op_errno);
+
+int32_t
+gf_build_absolute_path(char *current_path, char *relative_path, char **path);
+
+int
+recursive_rmdir(const char *delete_path);
+
+int
+gf_get_index_by_elem(char **array, char *elem);
+
+int
+glusterfs_is_local_pathinfo(char *pathinfo, gf_boolean_t *local);
+
+int
+gf_thread_cleanup_xint(pthread_t thread);
+
+ssize_t
+gf_nread(int fd, void *buf, size_t count);
+
+ssize_t
+gf_nwrite(int fd, const void *buf, size_t count);
+
+void
+_mask_cancellation(void);
+void
+_unmask_cancellation(void);
+
+gf_boolean_t
+gf_is_zero_filled_stat(struct iatt *buf);
+
+void
+gf_zero_fill_stat(struct iatt *buf);
+
+gf_boolean_t
+gf_is_valid_xattr_namespace(char *k);
+
+const char *
+gf_inode_type_to_str(ia_type_t type);
+
+int32_t
+gf_bits_count(uint64_t n);
+
+int32_t
+gf_bits_index(uint64_t n);
+
+const char *
+gf_fop_string(glusterfs_fop_t fop);
+
+int
+gf_fop_int(char *fop);
+
+char *
+get_ip_from_addrinfo(struct addrinfo *addr, char **ip);
+
+int
+close_fds_except(int *fdv, size_t count);
+
+int
+gf_getgrouplist(const char *user, gid_t group, gid_t **groups);
+
+int
+glusterfs_compute_sha256(const unsigned char *content, size_t size,
+ char *sha256_hash);
+
+char *
+gf_strncpy(char *dest, const char *src, const size_t dest_size);
+
+void
+gf_strTrim(char **s);
+
+int
+gf_replace_old_iatt_in_dict(struct _dict *);
+
+int
+gf_replace_new_iatt_in_dict(struct _dict *);
+
+xlator_cmdline_option_t *
+find_xlator_option_in_cmd_args_t(const char *option_name, cmd_args_t *args);
+
+int
+gf_d_type_from_ia_type(ia_type_t type);
+
+int
+gf_syncfs(int fd);
+
+int
+gf_nanosleep(uint64_t nsec);
+
+static inline time_t
+gf_time(void)
+{
+ return time(NULL);
+}
+
+/* Return delta value in microseconds. */
+
+static inline double
+gf_tvdiff(struct timeval *start, struct timeval *end)
+{
+ struct timeval t;
+
+ if (start->tv_usec > end->tv_usec)
+ t.tv_sec = end->tv_sec - 1, t.tv_usec = end->tv_usec + 1000000;
+ else
+ t.tv_sec = end->tv_sec, t.tv_usec = end->tv_usec;
+
+ return (double)(t.tv_sec - start->tv_sec) * 1e6 +
+ (double)(t.tv_usec - start->tv_usec);
+}
+
+/* Return delta value in nanoseconds. */
+
+static inline double
+gf_tsdiff(struct timespec *start, struct timespec *end)
+{
+ struct timespec t;
+
+ if (start->tv_nsec > end->tv_nsec)
+ t.tv_sec = end->tv_sec - 1, t.tv_nsec = end->tv_nsec + 1000000000;
+ else
+ t.tv_sec = end->tv_sec, t.tv_nsec = end->tv_nsec;
+
+ return (double)(t.tv_sec - start->tv_sec) * 1e9 +
+ (double)(t.tv_nsec - start->tv_nsec);
+}
+
+#endif /* _COMMON_UTILS_H */
diff --git a/libglusterfs/src/glusterfs/compat-errno.h b/libglusterfs/src/glusterfs/compat-errno.h
new file mode 100644
index 00000000000..c4ab09ab0d5
--- /dev/null
+++ b/libglusterfs/src/glusterfs/compat-errno.h
@@ -0,0 +1,238 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __COMPAT_ERRNO_H__
+#define __COMPAT_ERRNO_H__
+
+#include <errno.h>
+
+#define GF_ERROR_CODE_SUCCESS 0
+#define GF_ERROR_CODE_UNKNOWN 1024
+#define GF_ERRNO_UNKNOWN 1024
+
+#define GF_ERROR_CODE_PERM 1 /* Operation not permitted */
+#define GF_ERROR_CODE_NOENT 2 /* No such file or directory */
+#define GF_ERROR_CODE_SRCH 3 /* No such process */
+#define GF_ERROR_CODE_INTR 4 /* Interrupted system call */
+#define GF_ERROR_CODE_IO 5 /* I/O error */
+#define GF_ERROR_CODE_NXIO 6 /* No such device or address */
+#define GF_ERROR_CODE_2BIG 7 /* Argument list too long */
+#define GF_ERROR_CODE_NOEXEC 8 /* Exec format error */
+#define GF_ERROR_CODE_BADF 9 /* Bad file number */
+#define GF_ERROR_CODE_CHILD 10 /* No child processes */
+#define GF_ERROR_CODE_AGAIN 11 /* Try again */
+#define GF_ERROR_CODE_NOMEM 12 /* Out of memory */
+#define GF_ERROR_CODE_ACCES 13 /* Permission denied */
+#define GF_ERROR_CODE_FAULT 14 /* Bad address */
+#define GF_ERROR_CODE_NOTBLK 15 /* Block device required */
+#define GF_ERROR_CODE_BUSY 16 /* Device or resource busy */
+#define GF_ERROR_CODE_EXIST 17 /* File exists */
+#define GF_ERROR_CODE_XDEV 18 /* Cross-device link */
+#define GF_ERROR_CODE_NODEV 19 /* No such device */
+#define GF_ERROR_CODE_NOTDIR 20 /* Not a directory */
+#define GF_ERROR_CODE_ISDIR 21 /* Is a directory */
+#define GF_ERROR_CODE_INVAL 22 /* Invalid argument */
+#define GF_ERROR_CODE_NFILE 23 /* File table overflow */
+#define GF_ERROR_CODE_MFILE 24 /* Too many open files */
+#define GF_ERROR_CODE_NOTTY 25 /* Not a typewriter */
+#define GF_ERROR_CODE_TXTBSY 26 /* Text file busy */
+#define GF_ERROR_CODE_FBIG 27 /* File too large */
+#define GF_ERROR_CODE_NOSPC 28 /* No space left on device */
+#define GF_ERROR_CODE_SPIPE 29 /* Illegal seek */
+#define GF_ERROR_CODE_ROFS 30 /* Read-only file system */
+#define GF_ERROR_CODE_MLINK 31 /* Too many links */
+#define GF_ERROR_CODE_PIPE 32 /* Broken pipe */
+#define GF_ERROR_CODE_DOM 33 /* Math argument out of domain of func */
+#define GF_ERROR_CODE_RANGE 34 /* Math result not representable */
+#define GF_ERROR_CODE_DEADLK 35 /* Resource deadlock would occur */
+#define GF_ERROR_CODE_NAMETOOLONG 36 /* File name too long */
+#define GF_ERROR_CODE_NOLCK 37 /* No record locks available */
+#define GF_ERROR_CODE_NOSYS 38 /* Function not implemented */
+#define GF_ERROR_CODE_NOTEMPTY 39 /* Directory not empty */
+#define GF_ERROR_CODE_LOOP 40 /* Too many symbolic links encountered */
+
+#define GF_ERROR_CODE_NOMSG 42 /* No message of desired type */
+#define GF_ERROR_CODE_IDRM 43 /* Identifier removed */
+#define GF_ERROR_CODE_CHRNG 44 /* Channel number out of range */
+#define GF_ERROR_CODE_L2NSYNC 45 /* Level 2 not synchronized */
+#define GF_ERROR_CODE_L3HLT 46 /* Level 3 halted */
+#define GF_ERROR_CODE_L3RST 47 /* Level 3 reset */
+#define GF_ERROR_CODE_LNRNG 48 /* Link number out of range */
+#define GF_ERROR_CODE_UNATCH 49 /* Protocol driver not attached */
+#define GF_ERROR_CODE_NOCSI 50 /* No CSI structure available */
+#define GF_ERROR_CODE_L2HLT 51 /* Level 2 halted */
+#define GF_ERROR_CODE_BADE 52 /* Invalid exchange */
+#define GF_ERROR_CODE_BADR 53 /* Invalid request descriptor */
+#define GF_ERROR_CODE_XFULL 54 /* Exchange full */
+#define GF_ERROR_CODE_NOANO 55 /* No anode */
+#define GF_ERROR_CODE_BADRQC 56 /* Invalid request code */
+#define GF_ERROR_CODE_BADSLT 57 /* Invalid slot */
+#define GF_ERROR_CODE_BFONT 59 /* Bad font file format */
+#define GF_ERROR_CODE_NOSTR 60 /* Device not a stream */
+#define GF_ERROR_CODE_NODATA 61 /* No data available */
+#define GF_ERROR_CODE_TIME 62 /* Timer expired */
+#define GF_ERROR_CODE_NOSR 63 /* Out of streams resources */
+#define GF_ERROR_CODE_NONET 64 /* Machine is not on the network */
+#define GF_ERROR_CODE_NOPKG 65 /* Package not installed */
+#define GF_ERROR_CODE_REMOTE 66 /* Object is remote */
+#define GF_ERROR_CODE_NOLINK 67 /* Link has been severed */
+#define GF_ERROR_CODE_ADV 68 /* Advertise error */
+#define GF_ERROR_CODE_SRMNT 69 /* Srmount error */
+#define GF_ERROR_CODE_COMM 70 /* Communication error on send */
+#define GF_ERROR_CODE_PROTO 71 /* Protocol error */
+#define GF_ERROR_CODE_MULTIHOP 72 /* Multihop attempted */
+#define GF_ERROR_CODE_DOTDOT 73 /* RFS specific error */
+#define GF_ERROR_CODE_BADMSG 74 /* Not a data message */
+#define GF_ERROR_CODE_OVERFLOW 75 /* Value too large for defined data type */
+#define GF_ERROR_CODE_NOTUNIQ 76 /* Name not unique on network */
+#define GF_ERROR_CODE_BADFD 77 /* File descriptor in bad state */
+#define GF_ERROR_CODE_REMCHG 78 /* Remote address changed */
+#define GF_ERROR_CODE_LIBACC 79 /* Can not access a needed shared library */
+#define GF_ERROR_CODE_LIBBAD 80 /* Accessing a corrupted shared library */
+#define GF_ERROR_CODE_LIBSCN 81 /* .lib section in a.out corrupted */
+#define GF_ERROR_CODE_LIBMAX \
+ 82 /* Attempting to link in too many shared libraries */
+#define GF_ERROR_CODE_LIBEXEC 83 /* Cannot exec a shared library directly */
+#define GF_ERROR_CODE_ILSEQ 84 /* Illegal byte sequence */
+#define GF_ERROR_CODE_RESTART \
+ 85 /* Interrupted system call should be restarted */
+#define GF_ERROR_CODE_STRPIPE 86 /* Streams pipe error */
+#define GF_ERROR_CODE_USERS 87 /* Too many users */
+#define GF_ERROR_CODE_NOTSOCK 88 /* Socket operation on non-socket */
+#define GF_ERROR_CODE_DESTADDRREQ 89 /* Destination address required */
+#define GF_ERROR_CODE_MSGSIZE 90 /* Message too long */
+#define GF_ERROR_CODE_PROTOTYPE 91 /* Protocol wrong type for socket */
+#define GF_ERROR_CODE_NOPROTOOPT 92 /* Protocol not available */
+#define GF_ERROR_CODE_PROTONOSUPPORT 93 /* Protocol not supported */
+#define GF_ERROR_CODE_SOCKTNOSUPPORT 94 /* Socket type not supported */
+#define GF_ERROR_CODE_OPNOTSUPP \
+ 95 /* Operation not supported on transport endpoint */
+#define GF_ERROR_CODE_PFNOSUPPORT 96 /* Protocol family not supported */
+#define GF_ERROR_CODE_AFNOSUPPORT \
+ 97 /* Address family not supported by protocol */
+#define GF_ERROR_CODE_ADDRINUSE 98 /* Address already in use */
+#define GF_ERROR_CODE_ADDRNOTAVAIL 99 /* Cannot assign requested address */
+#define GF_ERROR_CODE_NETDOWN 100 /* Network is down */
+#define GF_ERROR_CODE_NETUNREACH 101 /* Network is unreachable */
+#define GF_ERROR_CODE_NETRESET \
+ 102 /* Network dropped connection because of reset */
+#define GF_ERROR_CODE_CONNABORTED 103 /* Software caused connection abort */
+#define GF_ERROR_CODE_CONNRESET 104 /* Connection reset by peer */
+#define GF_ERROR_CODE_NOBUFS 105 /* No buffer space available */
+#define GF_ERROR_CODE_ISCONN 106 /* Transport endpoint is already connected */
+#define GF_ERROR_CODE_NOTCONN 107 /* Transport endpoint is not connected */
+#define GF_ERROR_CODE_SHUTDOWN \
+ 108 /* Cannot send after transport endpoint shutdown */
+#define GF_ERROR_CODE_TOOMANYREFS 109 /* Too many references: cannot splice */
+#define GF_ERROR_CODE_TIMEDOUT 110 /* Connection timed out */
+#define GF_ERROR_CODE_CONNREFUSED 111 /* Connection refused */
+#define GF_ERROR_CODE_HOSTDOWN 112 /* Host is down */
+#define GF_ERROR_CODE_HOSTUNREACH 113 /* No route to host */
+#define GF_ERROR_CODE_ALREADY 114 /* Operation already in progress */
+#define GF_ERROR_CODE_INPROGRESS 115 /* Operation now in progress */
+#define GF_ERROR_CODE_ALREADY 114 /* Operation already in progress */
+#define GF_ERROR_CODE_INPROGRESS 115 /* Operation now in progress */
+#define GF_ERROR_CODE_STALE 116 /* Stale NFS file handle */
+#define GF_ERROR_CODE_UCLEAN 117 /* Structure needs cleaning */
+#define GF_ERROR_CODE_NOTNAM 118 /* Not a XENIX named type file */
+#define GF_ERROR_CODE_NAVAIL 119 /* No XENIX semaphores available */
+#define GF_ERROR_CODE_ISNAM 120 /* Is a named type file */
+#define GF_ERROR_CODE_REMOTEIO 121 /* Remote I/O error */
+#define GF_ERROR_CODE_DQUOT 122 /* Quota exceeded */
+#define GF_ERROR_CODE_NOMEDIUM 123 /* No medium found */
+#define GF_ERROR_CODE_MEDIUMTYPE 124 /* Wrong medium type */
+#define GF_ERROR_CODE_CANCELED 125 /* Operation Canceled */
+#define GF_ERROR_CODE_NOKEY 126 /* Required key not available */
+#define GF_ERROR_CODE_KEYEXPIRED 127 /* Key has expired */
+#define GF_ERROR_CODE_KEYREVOKED 128 /* Key has been revoked */
+#define GF_ERROR_CODE_KEYREJECTED 129 /* Key was rejected by service */
+
+/* for robust mutexes */
+#define GF_ERROR_CODE_OWNERDEAD 130 /* Owner died */
+#define GF_ERROR_CODE_NOTRECOVERABLE 131 /* State not recoverable */
+
+/* Should never be seen by user programs */
+#define GF_ERROR_CODE_RESTARTSYS 512
+#define GF_ERROR_CODE_RESTARTNOINTR 513
+#define GF_ERROR_CODE_RESTARTNOHAND 514 /* restart if no handler.. */
+#define GF_ERROR_CODE_NOIOCTLCMD 515 /* No ioctl command */
+#define GF_ERROR_CODE_RESTART_RESTARTBLOCK \
+ 516 /* restart by calling sys_restart_syscall */
+
+/* Defined for the NFSv3 protocol */
+#define GF_ERROR_CODE_BADHANDLE 521 /* Illegal NFS file handle */
+#define GF_ERROR_CODE_NOTSYNC 522 /* Update synchronization mismatch */
+#define GF_ERROR_CODE_BADCOOKIE 523 /* Cookie is stale */
+#define GF_ERROR_CODE_NOTSUPP 524 /* Operation is not supported */
+#define GF_ERROR_CODE_TOOSMALL 525 /* Buffer or request is too small */
+#define GF_ERROR_CODE_SERVERFAULT 526 /* An untranslatable error occurred */
+#define GF_ERROR_CODE_BADTYPE 527 /* Type not supported by server */
+#define GF_ERROR_CODE_JUKEBOX \
+ 528 /* Request initiated, but will not complete before timeout */
+#define GF_ERROR_CODE_IOCBQUEUED \
+ 529 /* iocb queued, will get completion event */
+#define GF_ERROR_CODE_IOCBRETRY 530 /* iocb queued, will trigger a retry */
+
+/* Darwin OS X */
+#define GF_ERROR_CODE_NOPOLICY 701
+#define GF_ERROR_CODE_BADMACHO 702
+#define GF_ERROR_CODE_PWROFF 703
+#define GF_ERROR_CODE_DEVERR 704
+#define GF_ERROR_CODE_BADARCH 705
+#define GF_ERROR_CODE_BADEXEC 706
+#define GF_ERROR_CODE_SHLIBVERS 707
+
+/* Solaris */
+/* ENOTACTIVE 73 / * Facility is not active */
+#define GF_ERROR_CODE_NOTACTIVE 801
+/* ELOCKUNMAPPED 72 / * locked lock was unmapped */
+#define GF_ERROR_CODE_LOCKUNMAPPED 802
+
+/* BSD system */
+#define GF_ERROR_CODE_PROCLIM 901 /* Too many processes */
+#define GF_ERROR_CODE_BADRPC 902 /* RPC struct is bad */
+#define GF_ERROR_CODE_RPCMISMATCH 903 /* RPC version wrong */
+#define GF_ERROR_CODE_PROGUNAVAIL 904 /* RPC prog. not avail */
+#define GF_ERROR_CODE_PROGMISMATCH 905 /* Program version wrong */
+#define GF_ERROR_CODE_PROCUNAVAIL 905 /* Bad procedure for program */
+#define GF_ERROR_CODE_FTYPE 906 /* Inappropriate file type or format */
+#define GF_ERROR_CODE_AUTH 907 /* Authentication error */
+#define GF_ERROR_CODE_NEEDAUTH 908 /* Need authenticator */
+#define GF_ERROR_CODE_DOOFUS 909 /* Programming error */
+
+#define GF_ERROR_CODE_NOATTR GF_ERROR_CODE_NODATA /* Attribute not found */
+
+/* Either one of enodata or enoattr will be there in system */
+#ifndef ENOATTR
+#define ENOATTR ENODATA
+#endif /* ENOATTR */
+
+#ifndef ENODATA
+#define ENODATA ENOATTR
+#endif /* ENODATA */
+
+#ifndef EBADFD
+#define EBADFD EBADRPC
+#endif /* EBADFD */
+
+#if !defined(ENODATA)
+/* This happens on FreeBSD. Value borrowed from Linux. */
+#define ENODATA 61
+#endif
+
+/* These functions are defined for all the OS flags, but content will
+ * be different for each OS flag.
+ */
+int32_t
+gf_errno_to_error(int32_t op_errno);
+int32_t
+gf_error_to_errno(int32_t error);
+
+#endif /* __COMPAT_ERRNO_H__ */
diff --git a/libglusterfs/src/glusterfs/compat-uuid.h b/libglusterfs/src/glusterfs/compat-uuid.h
new file mode 100644
index 00000000000..6e7fdefbfab
--- /dev/null
+++ b/libglusterfs/src/glusterfs/compat-uuid.h
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef _GF_UUID_H
+#define _GF_UUID_H
+
+#include <uuid/uuid.h>
+
+static inline void
+gf_uuid_clear(uuid_t uuid)
+{
+ uuid_clear(uuid);
+}
+
+static inline int
+gf_uuid_compare(uuid_t u1, uuid_t u2)
+{
+ return uuid_compare(u1, u2);
+}
+
+static inline void
+gf_uuid_copy(uuid_t dst, const uuid_t src)
+{
+ uuid_copy(dst, src);
+}
+
+static inline void
+gf_uuid_generate(uuid_t uuid)
+{
+ uuid_generate(uuid);
+}
+
+static inline int
+gf_uuid_is_null(uuid_t uuid)
+{
+ return uuid_is_null(uuid);
+}
+
+static inline int
+gf_uuid_parse(const char *in, uuid_t uuid)
+{
+ return uuid_parse(in, uuid);
+}
+
+static inline void
+gf_uuid_unparse(const uuid_t uuid, char *out)
+{
+ uuid_unparse(uuid, out);
+}
+
+/* TODO: add more uuid APIs, use constructs like this:
+#if defined(__NetBSD__) * NetBSD libc *
+
+#include <string.h>
+
+static inline void
+gf_uuid_clear (uuid_t uuid)
+{
+ memset (uuid, 0, sizeof (uuid_t));
+}
+#endif
+*/
+
+#endif /* _GF_UUID_H */
diff --git a/libglusterfs/src/glusterfs/compat.h b/libglusterfs/src/glusterfs/compat.h
new file mode 100644
index 00000000000..bf00d903152
--- /dev/null
+++ b/libglusterfs/src/glusterfs/compat.h
@@ -0,0 +1,544 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __COMPAT_H__
+#define __COMPAT_H__
+
+#include <stdint.h>
+
+#ifndef LLONG_MAX
+#define LLONG_MAX __LONG_LONG_MAX__ /* compat with old gcc */
+#endif /* LLONG_MAX */
+
+#ifdef GF_LINUX_HOST_OS
+
+#define UNIX_PATH_MAX 108
+
+#include <sys/un.h>
+#include <linux/limits.h>
+#include <sys/xattr.h>
+#include <linux/xattr.h>
+#include <endian.h>
+#ifdef HAVE_LINUX_FALLOC_H
+#include <linux/falloc.h>
+#endif
+
+#ifdef HAVE_ENDIAN_H
+#include <endian.h>
+#endif
+
+#ifndef _PATH_UMOUNT
+#define _PATH_UMOUNT "/bin/umount"
+#endif
+#define GF_XATTR_NAME_MAX XATTR_NAME_MAX
+#endif /* GF_LINUX_HOST_OS */
+
+/*
+ * Define the fallocate flags in case we do not have the header. This also
+ * accounts for older systems that do not define FALLOC_FL_PUNCH_HOLE.
+ */
+
+#ifndef FALLOC_FL_KEEP_SIZE
+#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
+#endif
+#ifndef FALLOC_FL_PUNCH_HOLE
+#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
+#endif
+#ifndef FALLOC_FL_ZERO_RANGE
+#define FALLOC_FL_ZERO_RANGE 0x10 /* zeroes out range */
+#endif
+#ifndef FALLOC_FL_COLLAPSE_RANGE
+#define FALLOC_FL_COLLAPSE_RANGE 0x08 /* reduces the size */
+#endif
+#ifndef FALLOC_FL_INSERT_RANGE
+#define FALLOC_FL_INSERT_RANGE 0x20 /* expands the size */
+#endif
+
+#ifndef HAVE_LLISTXATTR
+
+/* This part is valid only in case of old glibc which doesn't support
+ * 'llistxattr()' system calls.
+ */
+
+#define lremovexattr(path, key) removexattr(path, key)
+#define llistxattr(path, key, size) listxattr(path, key, size)
+#define lgetxattr(path, key, value, size) getxattr(path, key, value, size)
+#define lsetxattr(path, key, value, size, flags) \
+ setxattr(path, key, value, size, flags)
+
+#endif /* HAVE_LLISTXATTR */
+
+#ifdef GF_DARWIN_HOST_OS
+#include <machine/endian.h>
+#include <libkern/OSByteOrder.h>
+#include <sys/xattr.h>
+
+#define htobe16(x) OSSwapHostToBigInt16(x)
+#define htole16(x) OSSwapHostToLittleInt16(x)
+#define be16toh(x) OSSwapBigToHostInt16(x)
+#define le16toh(x) OSSwapLittleToHostInt16(x)
+
+#define htobe32(x) OSSwapHostToBigInt32(x)
+#define htole32(x) OSSwapHostToLittleInt32(x)
+#define be32toh(x) OSSwapBigToHostInt32(x)
+#define le32toh(x) OSSwapLittleToHostInt32(x)
+
+#define htobe64(x) OSSwapHostToBigInt64(x)
+#define htole64(x) OSSwapHostToLittleInt64(x)
+#define be64toh(x) OSSwapBigToHostInt64(x)
+#define le64toh(x) OSSwapLittleToHostInt64(x)
+
+#endif
+
+#ifdef GF_BSD_HOST_OS
+/* In case of FreeBSD and NetBSD */
+
+#define UNIX_PATH_MAX 104
+#include <sys/types.h>
+
+#include <sys/un.h>
+#include <sys/endian.h>
+#include <sys/extattr.h>
+#ifdef HAVE_SYS_XATTR_H
+#include <sys/xattr.h>
+#endif /* HAVE_SYS_XATTR_H */
+#include <limits.h>
+
+#include <libgen.h>
+/*
+ * This is where things like off64_t are defined.
+ * So include it before declaring _OFF64_T_DECLARED.
+ * If the freebsd version has support for off64_t
+ * including stdio.h should be sufficient.
+ */
+#include <stdio.h>
+
+#ifndef _OFF64_T_DECLARED
+/*
+ * Including <stdio.h> (done above) should actually define
+ * _OFF64_T_DECLARED with off64_t data type being available
+ * for consumption. But, off64_t data type is not recognizable
+ * for FreeBSD versions less than 11. Hence, int64_t is typedefed
+ * to off64_t.
+ */
+#define _OFF64_T_DECLARED
+typedef int64_t off64_t;
+#endif /* _OFF64_T_DECLARED */
+
+#ifndef XATTR_CREATE
+enum {
+ ATTR_CREATE = 1,
+#define XATTR_CREATE ATTR_CREATE
+ ATTR_REPLACE = 2
+#define XATTR_REPLACE ATTR_REPLACE
+};
+#endif /* XATTR_CREATE */
+
+#ifndef sighandler_t
+#define sighandler_t sig_t
+#endif
+
+#ifdef __FreeBSD__
+#undef ino_t
+#define ino_t uint64_t
+#include <sys/types.h>
+#include <sys/extattr.h>
+/* Using NAME_MAX since EXTATTR_MAXNAMELEN is inside a preprocessor conditional
+ * for the kernel
+ */
+#define GF_XATTR_NAME_MAX NAME_MAX
+#endif /* __FreeBSD__ */
+
+#ifdef __NetBSD__
+#define GF_XATTR_NAME_MAX XATTR_NAME_MAX
+#endif
+
+#ifndef ino64_t
+#define ino64_t ino_t
+#endif
+
+#ifndef EUCLEAN
+#define EUCLEAN 0
+#endif
+
+#include <netinet/in.h>
+#ifndef s6_addr16
+#define s6_addr16 __u6_addr.__u6_addr16
+#endif
+#ifndef s6_addr32
+#define s6_addr32 __u6_addr.__u6_addr32
+#endif
+
+#ifndef LOGIN_NAME_MAX
+#define LOGIN_NAME_MAX 256
+#endif
+
+/* Posix dictates NAME_MAX to be used */
+#ifndef NAME_MAX
+#ifdef MAXNAMLEN
+#define NAME_MAX MAXNAMLEN
+#else
+#define NAME_MAX 255
+#endif
+#endif
+
+#define F_GETLK64 F_GETLK
+#define F_SETLK64 F_SETLK
+#define F_SETLKW64 F_SETLKW
+#define FALLOC_FL_KEEP_SIZE 0x01 /* default is extend size */
+#define FALLOC_FL_PUNCH_HOLE 0x02 /* de-allocates range */
+#define FALLOC_FL_ZERO_RANGE 0x10 /* zeroes out range */
+#define FALLOC_FL_INSERT_RANGE 0x20 /* Expands the size */
+#define FALLOC_FL_COLLAPSE_RANGE 0x08 /* Reduces the size */
+
+#ifndef _PATH_UMOUNT
+#define _PATH_UMOUNT "/sbin/umount"
+#endif
+
+void
+gf_extattr_list_reshape(char *list, ssize_t size);
+
+#endif /* GF_BSD_HOST_OS */
+
+#ifdef GF_DARWIN_HOST_OS
+#include <machine/endian.h>
+#include <libkern/OSByteOrder.h>
+
+#define htobe16(x) OSSwapHostToBigInt16(x)
+#define htole16(x) OSSwapHostToLittleInt16(x)
+#define be16toh(x) OSSwapBigToHostInt16(x)
+#define le16toh(x) OSSwapLittleToHostInt16(x)
+
+#define htobe32(x) OSSwapHostToBigInt32(x)
+#define htole32(x) OSSwapHostToLittleInt32(x)
+#define be32toh(x) OSSwapBigToHostInt32(x)
+#define le32toh(x) OSSwapLittleToHostInt32(x)
+
+#define htobe64(x) OSSwapHostToBigInt64(x)
+#define htole64(x) OSSwapHostToLittleInt64(x)
+#define be64toh(x) OSSwapBigToHostInt64(x)
+#define le64toh(x) OSSwapLittleToHostInt64(x)
+
+#define UNIX_PATH_MAX 104
+/* OSX Yosemite now has this defined */
+#ifndef AT_SYMLINK_NOFOLLOW
+#define AT_SYMLINK_NOFOLLOW 0x100
+#endif
+#include <sys/types.h>
+
+#include <sys/un.h>
+#include <sys/xattr.h>
+#include <limits.h>
+
+#include <libgen.h>
+
+#if __DARWIN_64_BIT_INO_T == 0
+#error '64 bit ino_t is must for GlusterFS to work, Compile with "CFLAGS=-D__DARWIN_64_BIT_INO_T"'
+#endif /* __DARWIN_64_BIT_INO_T */
+
+#if __DARWIN_64_BIT_INO_T == 0
+#error '64 bit ino_t is must for GlusterFS to work, Compile with "CFLAGS=-D__DARWIN_64_BIT_INO_T"'
+#endif /* __DARWIN_64_BIT_INO_T */
+
+#ifndef sighandler_t
+#define sighandler_t sig_t
+#endif
+
+#ifndef EUCLEAN
+#define EUCLEAN 0
+#endif
+
+#include <netinet/in.h>
+#ifndef s6_addr16
+#define s6_addr16 __u6_addr.__u6_addr16
+#endif
+#ifndef s6_addr32
+#define s6_addr32 __u6_addr.__u6_addr32
+#endif
+
+/* Posix dictates NAME_MAX to be used */
+#ifndef NAME_MAX
+#ifdef MAXNAMLEN
+#define NAME_MAX MAXNAMLEN
+#else
+#define NAME_MAX 255
+#endif
+#endif
+
+#define F_GETLK64 F_GETLK
+#define F_SETLK64 F_SETLK
+#define F_SETLKW64 F_SETLKW
+
+#ifndef FTW_CONTINUE
+#define FTW_CONTINUE 0
+#endif
+
+#ifndef _PATH_UMOUNT
+#define _PATH_UMOUNT "/sbin/umount"
+#endif
+#endif /* GF_DARWIN_HOST_OS */
+
+#ifdef GF_SOLARIS_HOST_OS
+
+#define UNIX_PATH_MAX 108
+#define EUCLEAN 117
+
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <sys/fcntl.h>
+#include <libgen.h>
+#include <sys/mkdev.h>
+
+#ifndef lchmod
+#define lchmod chmod
+#endif
+
+#define lgetxattr(path, key, value, size) \
+ solaris_getxattr(path, key, value, size)
+enum {
+ ATTR_CREATE = 1,
+#define XATTR_CREATE ATTR_CREATE
+ ATTR_REPLACE = 2
+#define XATTR_REPLACE ATTR_REPLACE
+};
+
+/* This patch is not present in Solaris 10 and before */
+#ifndef dirfd
+#define dirfd(dirp) ((dirp)->dd_fd)
+#endif
+
+/* Posix dictates NAME_MAX to be used */
+#ifndef NAME_MAX
+#ifdef MAXNAMLEN
+#define NAME_MAX MAXNAMLEN
+#else
+#define NAME_MAX 255
+#endif
+#endif
+
+#include <netinet/in.h>
+#ifndef s6_addr16
+#define S6_ADDR16(x) ((uint16_t *)((char *)&(x).s6_addr))
+#endif
+#ifndef s6_addr32
+#define s6_addr32 _S6_un._S6_u32
+#endif
+
+#define lutimes(filename, times) utimes(filename, times)
+
+#ifndef SEEK_SET
+#define SEEK_SET 0
+#endif
+
+enum {
+ DT_UNKNOWN = 0,
+#define DT_UNKNOWN DT_UNKNOWN
+ DT_FIFO = 1,
+#define DT_FIFO DT_FIFO
+ DT_CHR = 2,
+#define DT_CHR DT_CHR
+ DT_DIR = 4,
+#define DT_DIR DT_DIR
+ DT_BLK = 6,
+#define DT_BLK DT_BLK
+ DT_REG = 8,
+#define DT_REG DT_REG
+ DT_LNK = 10,
+#define DT_LNK DT_LNK
+ DT_SOCK = 12,
+#define DT_SOCK DT_SOCK
+ DT_WHT = 14
+#define DT_WHT DT_WHT
+};
+
+#ifndef _PATH_MOUNTED
+#define _PATH_MOUNTED "/etc/mtab"
+#endif
+#ifndef _PATH_UMOUNT
+#define _PATH_UMOUNT "/sbin/umount"
+#endif
+
+#ifndef O_ASYNC
+#ifdef FASYNC
+#define O_ASYNC FASYNC
+#else
+#define O_ASYNC 0
+#endif
+#endif
+
+#ifndef FTW_CONTINUE
+#define FTW_CONTINUE 0
+#endif
+
+int
+asprintf(char **string_ptr, const char *format, ...);
+
+int
+vasprintf(char **result, const char *format, va_list args);
+char *
+strsep(char **str, const char *delims);
+int
+solaris_listxattr(const char *path, char *list, size_t size);
+int
+solaris_removexattr(const char *path, const char *key);
+int
+solaris_getxattr(const char *path, const char *key, char *value, size_t size);
+int
+solaris_setxattr(const char *path, const char *key, const char *value,
+ size_t size, int flags);
+int
+solaris_fgetxattr(int fd, const char *key, char *value, size_t size);
+int
+solaris_fsetxattr(int fd, const char *key, const char *value, size_t size,
+ int flags);
+int
+solaris_flistxattr(int fd, char *list, size_t size);
+
+int
+solaris_rename(const char *oldpath, const char *newpath);
+
+int
+solaris_unlink(const char *pathname);
+
+char *
+mkdtemp(char *temp);
+
+#define GF_SOLARIS_XATTR_DIR ".glusterfs_xattr_inode"
+
+int
+solaris_xattr_resolve_path(const char *real_path, char **path);
+
+#endif /* GF_SOLARIS_HOST_OS */
+
+#ifndef HAVE_ARGP
+#include "argp.h"
+#else
+#include <argp.h>
+#endif /* HAVE_ARGP */
+
+#ifndef HAVE_STRNLEN
+size_t
+strnlen(const char *string, size_t maxlen);
+#endif /* STRNLEN */
+
+#ifndef strdupa
+#define strdupa(s) \
+ (__extension__({ \
+ __const char *__old = (s); \
+ size_t __len = strlen(__old) + 1; \
+ char *__new = (char *)__builtin_alloca(__len); \
+ (char *)memcpy(__new, __old, __len); \
+ }))
+#endif
+
+#define GF_DIR_ALIGN(x) (((x) + sizeof(uint64_t) - 1) & ~(sizeof(uint64_t) - 1))
+
+#include <sys/types.h>
+#include <dirent.h>
+
+static inline int32_t
+dirent_size(struct dirent *entry)
+{
+ int32_t size = -1;
+
+#ifdef GF_BSD_HOST_OS
+ size = GF_DIR_ALIGN(24 /* FIX MEEEE!!! */ + entry->d_namlen);
+#endif
+#ifdef GF_DARWIN_HOST_OS
+ size = GF_DIR_ALIGN(24 /* FIX MEEEE!!! */ + entry->d_namlen);
+#endif
+#ifdef GF_LINUX_HOST_OS
+ size = GF_DIR_ALIGN(24 /* FIX MEEEE!!! */ + entry->d_reclen);
+#endif
+#ifdef GF_SOLARIS_HOST_OS
+ size = GF_DIR_ALIGN(24 /* FIX MEEEE!!! */ + entry->d_reclen);
+#endif
+ return size;
+}
+
+#ifdef THREAD_UNSAFE_BASENAME
+char *
+basename_r(const char *);
+#define basename(path) basename_r(path)
+#endif /* THREAD_UNSAFE_BASENAME */
+
+#ifdef THREAD_UNSAFE_DIRNAME
+char *
+dirname_r(char *path);
+#define dirname(path) dirname_r(path)
+#endif /* THREAD_UNSAFE_DIRNAME */
+
+#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
+/* Linux, Solaris, Cygwin */
+#define ST_ATIM_SEC(stbuf) ((stbuf)->st_atim.tv_sec)
+#define ST_CTIM_SEC(stbuf) ((stbuf)->st_ctim.tv_sec)
+#define ST_MTIM_SEC(stbuf) ((stbuf)->st_mtim.tv_sec)
+#define ST_ATIM_SEC_SET(stbuf, val) ((stbuf)->st_atim.tv_sec = (val))
+#define ST_MTIM_SEC_SET(stbuf, val) ((stbuf)->st_mtim.tv_sec = (val))
+#define ST_CTIM_SEC_SET(stbuf, val) ((stbuf)->st_ctim.tv_sec = (val))
+#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atim.tv_nsec)
+#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctim.tv_nsec)
+#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtim.tv_nsec)
+#define ST_ATIM_NSEC_SET(stbuf, val) ((stbuf)->st_atim.tv_nsec = (val))
+#define ST_MTIM_NSEC_SET(stbuf, val) ((stbuf)->st_mtim.tv_nsec = (val))
+#define ST_CTIM_NSEC_SET(stbuf, val) ((stbuf)->st_ctim.tv_nsec = (val))
+#elif defined(HAVE_STRUCT_STAT_ST_ATIMESPEC_TV_NSEC)
+/* FreeBSD, NetBSD */
+#define ST_ATIM_SEC(stbuf) ((stbuf)->st_atimespec.tv_sec)
+#define ST_CTIM_SEC(stbuf) ((stbuf)->st_ctimespec.tv_sec)
+#define ST_MTIM_SEC(stbuf) ((stbuf)->st_mtimespec.tv_sec)
+#define ST_ATIM_SEC_SET(stbuf, val) ((stbuf)->st_atimespec.tv_sec = (val))
+#define ST_MTIM_SEC_SET(stbuf, val) ((stbuf)->st_mtimespec.tv_sec = (val))
+#define ST_CTIM_SEC_SET(stbuf, val) ((stbuf)->st_ctimespec.tv_sec = (val))
+#define ST_ATIM_NSEC(stbuf) ((stbuf)->st_atimespec.tv_nsec)
+#define ST_CTIM_NSEC(stbuf) ((stbuf)->st_ctimespec.tv_nsec)
+#define ST_MTIM_NSEC(stbuf) ((stbuf)->st_mtimespec.tv_nsec)
+#define ST_ATIM_NSEC_SET(stbuf, val) ((stbuf)->st_atimespec.tv_nsec = (val))
+#define ST_MTIM_NSEC_SET(stbuf, val) ((stbuf)->st_mtimespec.tv_nsec = (val))
+#define ST_CTIM_NSEC_SET(stbuf, val) ((stbuf)->st_ctimespec.tv_nsec = (val))
+#else
+#define ST_ATIM_NSEC(stbuf) (0)
+#define ST_CTIM_NSEC(stbuf) (0)
+#define ST_MTIM_NSEC(stbuf) (0)
+#define ST_ATIM_NSEC_SET(stbuf, val) \
+ do { \
+ } while (0);
+#define ST_MTIM_NSEC_SET(stbuf, val) \
+ do { \
+ } while (0);
+#define ST_CTIM_NSEC_SET(stbuf, val) \
+ do { \
+ } while (0);
+#endif
+
+#ifdef GF_BSD_HOST_OS
+#define CLOCK_REALTIME_COARSE CLOCK_REALTIME
+#endif
+
+#if defined(__GNUC__) && !defined(RELAX_POISONING)
+/* Use run API, see run.h */
+#include <stdlib.h> /* system(), mkostemp() */
+#include <stdio.h> /* popen() */
+#ifdef GF_LINUX_HOST_OS
+#include <sys/sysmacros.h>
+#endif
+#pragma GCC poison system mkostemp popen
+#endif
+
+int
+gf_umount_lazy(char *xlname, char *path, int rmdir);
+
+#ifndef GF_XATTR_NAME_MAX
+#error 'Please define GF_XATTR_NAME_MAX for your OS distribution.'
+#endif
+
+#endif /* __COMPAT_H__ */
diff --git a/libglusterfs/src/daemon.h b/libglusterfs/src/glusterfs/daemon.h
index 80836a326d5..48850800b5e 100644
--- a/libglusterfs/src/daemon.h
+++ b/libglusterfs/src/glusterfs/daemon.h
@@ -11,13 +11,10 @@
#ifndef _DAEMON_H
#define _DAEMON_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#define DEVNULLPATH "/dev/null"
-int os_daemon_return(int nochdir, int noclose);
-int os_daemon(int nochdir, int noclose);
+int
+os_daemon_return(int nochdir, int noclose);
+int
+os_daemon(int nochdir, int noclose);
#endif /*_DAEMON_H */
diff --git a/libglusterfs/src/glusterfs/default-args.h b/libglusterfs/src/glusterfs/default-args.h
new file mode 100644
index 00000000000..ca7526fcab6
--- /dev/null
+++ b/libglusterfs/src/glusterfs/default-args.h
@@ -0,0 +1,455 @@
+/*
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* libglusterfs/src/defaults.h:
+ This file contains definition of default fops and mops functions.
+*/
+
+#ifndef _DEFAULT_ARGS_H
+#define _DEFAULT_ARGS_H
+
+#include "glusterfs/xlator.h"
+
+int
+args_lookup_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ dict_t *xdata, struct iatt *postparent);
+
+int
+args_stat_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata);
+
+int
+args_fstat_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iatt *buf, dict_t *xdata);
+
+int
+args_truncate_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int
+args_ftruncate_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int
+args_access_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_readlink_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, const char *path, struct iatt *stbuf,
+ dict_t *xdata);
+
+int
+args_mknod_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+args_mkdir_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+args_unlink_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+args_rmdir_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+int
+args_symlink_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+int
+args_rename_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata);
+
+int
+args_link_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+args_create_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+args_open_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ fd_t *fd, dict_t *xdata);
+
+int
+args_readv_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iovec *vector, int32_t count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata);
+
+int
+args_writev_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int
+args_put_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+args_flush_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata);
+
+int
+args_fsync_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata);
+
+int
+args_opendir_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata);
+
+int
+args_fsyncdir_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_statfs_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct statvfs *buf, dict_t *xdata);
+
+int
+args_setxattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_getxattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *dict, dict_t *xdata);
+
+int
+args_fsetxattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_fgetxattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *dict, dict_t *xdata);
+
+int
+args_removexattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_fremovexattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_lk_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct gf_flock *lock, dict_t *xdata);
+
+int
+args_inodelk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_finodelk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_entrylk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_fentrylk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int
+args_readdirp_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries, dict_t *xdata);
+
+int
+args_readdir_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries, dict_t *xdata);
+
+int
+args_rchecksum_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata);
+
+int
+args_xattrop_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata);
+
+int
+args_fxattrop_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata);
+
+int
+args_setattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+int
+args_fsetattr_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+int
+args_fallocate_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+int
+args_discard_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+int
+args_zerofill_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+int
+args_ipc_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata);
+
+int
+args_seek_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ off_t offset, dict_t *xdata);
+
+void
+args_lease_cbk_store(default_args_cbk_t *args, int32_t op_ret, int32_t op_errno,
+ struct gf_lease *lease, dict_t *xdata);
+
+int
+args_copy_file_range_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
+void
+args_cbk_wipe(default_args_cbk_t *args_cbk);
+
+void
+args_wipe(default_args_t *args);
+
+int
+args_lookup_store(default_args_t *args, loc_t *loc, dict_t *xdata);
+
+int
+args_stat_store(default_args_t *args, loc_t *loc, dict_t *xdata);
+
+int
+args_fstat_store(default_args_t *args, fd_t *fd, dict_t *xdata);
+
+int
+args_truncate_store(default_args_t *args, loc_t *loc, off_t off, dict_t *xdata);
+int
+args_ftruncate_store(default_args_t *args, fd_t *fd, off_t off, dict_t *xdata);
+
+int
+args_access_store(default_args_t *args, loc_t *loc, int32_t mask,
+ dict_t *xdata);
+
+int
+args_readlink_store(default_args_t *args, loc_t *loc, size_t size,
+ dict_t *xdata);
+
+int
+args_mknod_store(default_args_t *args, loc_t *loc, mode_t mode, dev_t rdev,
+ mode_t umask, dict_t *xdata);
+
+int
+args_mkdir_store(default_args_t *args, loc_t *loc, mode_t mode, mode_t umask,
+ dict_t *xdata);
+
+int
+args_unlink_store(default_args_t *args, loc_t *loc, int xflag, dict_t *xdata);
+
+int
+args_rmdir_store(default_args_t *args, loc_t *loc, int flags, dict_t *xdata);
+
+int
+args_symlink_store(default_args_t *args, const char *linkname, loc_t *loc,
+ mode_t umask, dict_t *xdata);
+
+int
+args_rename_store(default_args_t *args, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int
+args_link_store(default_args_t *args, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int
+args_create_store(default_args_t *args, loc_t *loc, int32_t flags, mode_t mode,
+ mode_t umask, fd_t *fd, dict_t *xdata);
+
+int
+args_open_store(default_args_t *args, loc_t *loc, int32_t flags, fd_t *fd,
+ dict_t *xdata);
+
+int
+args_readv_store(default_args_t *args, fd_t *fd, size_t size, off_t off,
+ uint32_t flags, dict_t *xdata);
+
+int
+args_writev_store(default_args_t *args, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
+
+int
+args_put_store(default_args_t *args, loc_t *loc, mode_t mode, mode_t umask,
+ uint32_t flags, struct iovec *vector, int32_t count, off_t off,
+ struct iobref *iobref, dict_t *xattr, dict_t *xdata);
+
+int
+args_flush_store(default_args_t *args, fd_t *fd, dict_t *xdata);
+
+int
+args_fsync_store(default_args_t *args, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
+
+int
+args_opendir_store(default_args_t *args, loc_t *loc, fd_t *fd, dict_t *xdata);
+
+int
+args_fsyncdir_store(default_args_t *args, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
+
+int
+args_statfs_store(default_args_t *args, loc_t *loc, dict_t *xdata);
+
+int
+args_setxattr_store(default_args_t *args, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int
+args_getxattr_store(default_args_t *args, loc_t *loc, const char *name,
+ dict_t *xdata);
+
+int
+args_fsetxattr_store(default_args_t *args, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int
+args_fgetxattr_store(default_args_t *args, fd_t *fd, const char *name,
+ dict_t *xdata);
+
+int
+args_removexattr_store(default_args_t *args, loc_t *loc, const char *name,
+ dict_t *xdata);
+
+int
+args_fremovexattr_store(default_args_t *args, fd_t *fd, const char *name,
+ dict_t *xdata);
+
+int
+args_lk_store(default_args_t *args, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata);
+
+int
+args_inodelk_store(default_args_t *args, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+int
+args_finodelk_store(default_args_t *args, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+int
+args_entrylk_store(default_args_t *args, const char *volume, loc_t *loc,
+ const char *name, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata);
+
+int
+args_fentrylk_store(default_args_t *args, const char *volume, fd_t *fd,
+ const char *name, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata);
+int
+args_readdirp_store(default_args_t *args, fd_t *fd, size_t size, off_t off,
+ dict_t *xdata);
+
+int
+args_readdir_store(default_args_t *args, fd_t *fd, size_t size, off_t off,
+ dict_t *xdata);
+
+int
+args_rchecksum_store(default_args_t *args, fd_t *fd, off_t offset, int32_t len,
+ dict_t *xdata);
+
+int
+args_xattrop_store(default_args_t *args, loc_t *loc, gf_xattrop_flags_t optype,
+ dict_t *xattr, dict_t *xdata);
+
+int
+args_fxattrop_store(default_args_t *args, fd_t *fd, gf_xattrop_flags_t optype,
+ dict_t *xattr, dict_t *xdata);
+
+int
+args_setattr_store(default_args_t *args, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
+
+int
+args_fsetattr_store(default_args_t *args, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
+
+int
+args_fallocate_store(default_args_t *args, fd_t *fd, int32_t mode, off_t offset,
+ size_t len, dict_t *xdata);
+
+int
+args_discard_store(default_args_t *args, fd_t *fd, off_t offset, size_t len,
+ dict_t *xdata);
+
+int
+args_zerofill_store(default_args_t *args, fd_t *fd, off_t offset, off_t len,
+ dict_t *xdata);
+
+int
+args_ipc_store(default_args_t *args, int32_t op, dict_t *xdata);
+
+int
+args_seek_store(default_args_t *args, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata);
+
+void
+args_lease_store(default_args_t *args, loc_t *loc, struct gf_lease *lease,
+ dict_t *xdata);
+
+int
+args_getactivelk_cbk_store(default_args_cbk_t *args, int32_t op_ret,
+ int32_t op_errno, lock_migration_info_t *locklist,
+ dict_t *xdata);
+
+int
+args_setactivelk_store(default_args_t *args, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata);
+
+int
+args_icreate_store(default_args_t *args, loc_t *loc, mode_t mode,
+ dict_t *xdata);
+
+int
+args_namelink_store(default_args_t *args, loc_t *loc, dict_t *xdata);
+
+int
+args_copy_file_range_store(default_args_t *args, fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off_t off64_out, size_t len,
+ uint32_t flags, dict_t *xdata);
+
+void
+args_cbk_init(default_args_cbk_t *args_cbk);
+#endif /* _DEFAULT_ARGS_H */
diff --git a/libglusterfs/src/glusterfs/defaults.h b/libglusterfs/src/glusterfs/defaults.h
new file mode 100644
index 00000000000..5a818eeb91a
--- /dev/null
+++ b/libglusterfs/src/glusterfs/defaults.h
@@ -0,0 +1,1275 @@
+/*
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* libglusterfs/src/defaults.h:
+ This file contains definition of default fops and mops functions.
+*/
+
+#ifndef _DEFAULTS_H
+#define _DEFAULTS_H
+
+#include "glusterfs/xlator.h"
+
+typedef struct {
+ int op_ret;
+ int op_errno;
+ inode_t *inode;
+ struct iatt stat;
+ struct iatt prestat;
+ struct iatt poststat;
+ struct iatt preparent; /* @preoldparent in rename_cbk */
+ struct iatt postparent; /* @postoldparent in rename_cbk */
+ struct iatt preparent2; /* @prenewparent in rename_cbk */
+ struct iatt postparent2; /* @postnewparent in rename_cbk */
+ const char *buf;
+ struct iovec *vector;
+ int count;
+ struct iobref *iobref;
+ fd_t *fd;
+ struct statvfs statvfs;
+ dict_t *xattr;
+ struct gf_flock lock;
+ uint32_t weak_checksum;
+ uint8_t *strong_checksum;
+ dict_t *xdata;
+ gf_dirent_t entries;
+ off_t offset; /* seek hole/data */
+ int valid; /* If the response is valid or not. For call-stub it is
+ always valid irrespective of this */
+ struct gf_lease lease;
+ lock_migration_info_t locklist;
+} default_args_cbk_t;
+
+typedef struct {
+ loc_t loc; /* @old in rename(), link() */
+ loc_t loc2; /* @new in rename(), link() */
+ fd_t *fd; /* for all the fd based ops */
+ fd_t *fd_dst; /* Only for copy_file_range destination */
+ off_t offset;
+ /*
+ * According to the man page of copy_file_range,
+ * the offsets for source and destination file
+ * are of type loff_t. But the type loff_t is
+ * linux specific and is actual a typedef of
+ * off64_t.
+ */
+ off64_t off_in; /* For copy_file_range source fd */
+ off64_t off_out; /* For copy_file_range destination fd only */
+ int mask;
+ size_t size;
+ mode_t mode;
+ dev_t rdev;
+ mode_t umask;
+ int xflag;
+ int flags;
+ const char *linkname;
+ struct iovec *vector;
+ int count;
+ struct iobref *iobref;
+ int datasync;
+ dict_t *xattr;
+ const char *name;
+ int cmd;
+ struct gf_flock lock;
+ const char *volume;
+ entrylk_cmd entrylkcmd;
+ entrylk_type entrylktype;
+ gf_xattrop_flags_t optype;
+ int valid;
+ struct iatt stat;
+ gf_seek_what_t what;
+ dict_t *xdata;
+ struct gf_lease lease;
+ lock_migration_info_t locklist;
+} default_args_t;
+
+typedef struct {
+ int fop_enum;
+ unsigned int fop_length;
+ int *enum_list;
+ default_args_t *req_list;
+ dict_t *xdata;
+} compound_args_t;
+
+typedef struct {
+ int fop_enum;
+ unsigned int fop_length;
+ int *enum_list;
+ default_args_cbk_t *rsp_list;
+ dict_t *xdata;
+} compound_args_cbk_t;
+
+int32_t
+default_notify(xlator_t *this, int32_t event, void *data, ...);
+
+int32_t
+default_forget(xlator_t *this, inode_t *inode);
+
+int32_t
+default_release(xlator_t *this, fd_t *fd);
+
+int32_t
+default_releasedir(xlator_t *this, fd_t *fd);
+
+extern struct xlator_fops *default_fops;
+
+/* Management Operations */
+
+int32_t
+default_getspec(call_frame_t *frame, xlator_t *this, const char *key,
+ int32_t flag);
+
+int32_t
+default_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata);
+
+/* FileSystem operations */
+int32_t
+default_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int32_t
+default_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int32_t
+default_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
+
+int32_t
+default_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata);
+
+int32_t
+default_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata);
+
+int32_t
+default_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata);
+
+int32_t
+default_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata);
+
+int32_t
+default_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata);
+
+int32_t
+default_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
+
+int32_t
+default_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int32_t
+default_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int32_t
+default_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata);
+
+int32_t
+default_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+int32_t
+default_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int32_t
+default_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+
+int32_t
+default_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+int32_t
+default_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int32_t
+default_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
+
+int32_t
+default_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
+
+int32_t
+default_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
+
+int32_t
+default_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata);
+
+int32_t
+default_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+int32_t
+default_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int32_t
+default_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t
+default_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int32_t
+default_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t
+default_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t
+default_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int32_t
+default_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t
+default_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+default_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+default_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+default_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+int32_t
+default_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+int32_t
+default_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+int32_t
+default_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+int32_t
+default_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+default_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+default_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+int32_t
+default_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+int32_t
+default_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata);
+
+int32_t
+default_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+
+int32_t
+default_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata);
+
+int32_t
+default_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata);
+
+int32_t
+default_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata);
+
+int32_t
+default_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata);
+
+int32_t
+default_getactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+int32_t
+default_setactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata);
+
+int32_t
+default_put(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, uint32_t flags, struct iovec *vector, int32_t count,
+ off_t off, struct iobref *iobref, dict_t *xattr, dict_t *xdata);
+
+int32_t
+default_icreate(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dict_t *xdata);
+
+int32_t
+default_namelink(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+int32_t
+default_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off64_t off_in, fd_t *fd_out, off64_t off_out,
+ size_t len, uint32_t flags, dict_t *xdata);
+
+/* Resume */
+int32_t
+default_getspec_resume(call_frame_t *frame, xlator_t *this, const char *key,
+ int32_t flag);
+
+int32_t
+default_rchecksum_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, int32_t len, dict_t *xdata);
+
+/* FileSystem operations */
+int32_t
+default_lookup_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+int32_t
+default_stat_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+int32_t
+default_fstat_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *xdata);
+
+int32_t
+default_truncate_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata);
+
+int32_t
+default_ftruncate_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata);
+
+int32_t
+default_access_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t mask, dict_t *xdata);
+
+int32_t
+default_readlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ size_t size, dict_t *xdata);
+
+int32_t
+default_mknod_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
+
+int32_t
+default_mkdir_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata);
+
+int32_t
+default_unlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflag, dict_t *xdata);
+
+int32_t
+default_rmdir_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int32_t
+default_symlink_resume(call_frame_t *frame, xlator_t *this,
+ const char *linkpath, loc_t *loc, mode_t umask,
+ dict_t *xdata);
+
+int32_t
+default_rename_resume(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+int32_t
+default_link_resume(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+int32_t
+default_create_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata);
+
+int32_t
+default_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata);
+
+int32_t
+default_readv_resume(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int32_t
+default_writev_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata);
+
+int32_t
+default_flush_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *xdata);
+
+int32_t
+default_fsync_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+int32_t
+default_opendir_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, dict_t *xdata);
+
+int32_t
+default_fsyncdir_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+int32_t
+default_statfs_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+int32_t
+default_setxattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata);
+
+int32_t
+default_getxattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int32_t
+default_fsetxattr_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata);
+
+int32_t
+default_fgetxattr_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t
+default_removexattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+int32_t
+default_fremovexattr_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t
+default_lk_resume(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+default_inodelk_resume(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata);
+
+int32_t
+default_finodelk_resume(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock,
+ dict_t *xdata);
+
+int32_t
+default_entrylk_resume(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+int32_t
+default_fentrylk_resume(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+int32_t
+default_readdir_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *xdata);
+
+int32_t
+default_readdirp_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *xdata);
+
+int32_t
+default_xattrop_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+default_fxattrop_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+int32_t
+default_rchecksum_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, int32_t len, dict_t *xdata);
+
+int32_t
+default_setattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+int32_t
+default_fsetattr_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+int32_t
+default_fallocate_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len,
+ dict_t *xdata);
+
+int32_t
+default_discard_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+
+int32_t
+default_zerofill_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, off_t len, dict_t *xdata);
+
+int32_t
+default_ipc_resume(call_frame_t *frame, xlator_t *this, int32_t op,
+ dict_t *xdata);
+
+int32_t
+default_seek_resume(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata);
+
+int32_t
+default_lease_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata);
+
+int32_t
+default_getactivelk_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+int32_t
+default_setactivelk_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata);
+
+int32_t
+default_put_resume(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, uint32_t flags, struct iovec *vector,
+ int32_t count, off_t off, struct iobref *iobref,
+ dict_t *xattr, dict_t *xdata);
+
+int32_t
+default_copy_file_range_resume(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off_t off64_in, fd_t *fd_out, off64_t off_out,
+ size_t len, uint32_t flags, dict_t *xdata);
+
+/* _cbk_resume */
+
+int32_t
+default_lookup_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent);
+
+int32_t
+default_stat_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata);
+
+int32_t
+default_truncate_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int32_t
+default_ftruncate_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int32_t
+default_access_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_readlink_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata);
+
+int32_t
+default_mknod_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_mkdir_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_unlink_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+int32_t
+default_rmdir_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+int32_t
+default_symlink_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_rename_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata);
+
+int32_t
+default_link_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_create_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata);
+
+int32_t
+default_open_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata);
+
+int32_t
+default_readv_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata);
+
+int32_t
+default_writev_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int32_t
+default_flush_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fsync_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int32_t
+default_fstat_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata);
+
+int32_t
+default_opendir_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata);
+
+int32_t
+default_fsyncdir_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_statfs_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata);
+
+int32_t
+default_setxattr_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fsetxattr_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fgetxattr_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+default_getxattr_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+default_xattrop_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+default_fxattrop_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+default_removexattr_cbk_resume(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata);
+
+int32_t
+default_fremovexattr_cbk_resume(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_lk_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata);
+
+int32_t
+default_inodelk_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_finodelk_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_entrylk_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fentrylk_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_rchecksum_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ uint32_t weak_checksum, uint8_t *strong_checksum,
+ dict_t *xdata);
+
+int32_t
+default_readdir_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata);
+
+int32_t
+default_readdirp_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata);
+
+int32_t
+default_setattr_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata);
+
+int32_t
+default_fsetattr_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata);
+
+int32_t
+default_fallocate_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
+int32_t
+default_discard_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
+int32_t
+default_zerofill_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+int32_t
+default_ipc_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_seek_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, off_t offset,
+ dict_t *xdata);
+
+int32_t
+default_getspec_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, char *spec_data);
+
+int32_t
+default_lease_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct gf_lease *lease, dict_t *xdata);
+
+int32_t
+default_getactivelk_cbk_resume(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ lock_migration_info_t *locklist, dict_t *xdata);
+
+int32_t
+default_setactivelk_cbk_resume(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata);
+
+int32_t
+default_put_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_icreate_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dict_t *xdata);
+
+int32_t
+default_namelink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+int32_t
+default_copy_file_range_cbk_resume(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
+/* _CBK */
+int32_t
+default_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent);
+
+int32_t
+default_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata);
+
+int32_t
+default_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int32_t
+default_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int32_t
+default_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata);
+
+int32_t
+default_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata);
+
+int32_t
+default_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata);
+
+int32_t
+default_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata);
+
+int32_t
+default_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int32_t
+default_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int32_t
+default_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata);
+
+int32_t
+default_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata);
+
+int32_t
+default_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata);
+
+int32_t
+default_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+default_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+default_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+default_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+default_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata);
+
+int32_t
+default_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_rchecksum_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata);
+
+int32_t
+default_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata);
+
+int32_t
+default_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata);
+
+int32_t
+default_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+int32_t
+default_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata);
+
+int32_t
+default_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
+int32_t
+default_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
+int32_t
+default_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
+int32_t
+default_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, off_t offset, dict_t *xdata);
+
+int32_t
+default_getspec_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, char *spec_data);
+
+int32_t
+default_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_lease *lease,
+ dict_t *xdata);
+
+int32_t
+default_getactivelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ lock_migration_info_t *locklist, dict_t *xdata);
+
+int32_t
+default_setactivelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int32_t
+default_put_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int32_t
+default_icreate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata);
+
+int32_t
+default_namelink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int32_t
+default_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
+int32_t
+default_lookup_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_stat_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_truncate_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_ftruncate_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_access_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_readlink_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_mknod_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_mkdir_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_unlink_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_rmdir_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_symlink_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_rename_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_link_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_create_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_open_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_readv_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_writev_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_flush_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fsync_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fstat_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_opendir_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fsyncdir_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_statfs_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_setxattr_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fsetxattr_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fgetxattr_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_getxattr_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_xattrop_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fxattrop_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_removexattr_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fremovexattr_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_lk_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_inodelk_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_finodelk_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_entrylk_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fentrylk_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_rchecksum_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_readdir_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_readdirp_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_setattr_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fsetattr_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_fallocate_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_discard_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_zerofill_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_getspec_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_ipc_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_seek_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_lease_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_getactivelk_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_setactivelk_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_put_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_icreate_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_namelink_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_copy_file_range_failure_cbk(call_frame_t *frame, int32_t op_errno);
+
+int32_t
+default_mem_acct_init(xlator_t *this);
+
+void
+default_fini(xlator_t *this);
+
+#endif /* _DEFAULTS_H */
diff --git a/libglusterfs/src/glusterfs/dict.h b/libglusterfs/src/glusterfs/dict.h
new file mode 100644
index 00000000000..d0467c6dfb6
--- /dev/null
+++ b/libglusterfs/src/glusterfs/dict.h
@@ -0,0 +1,420 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _DICT_H
+#define _DICT_H
+
+#include <inttypes.h>
+#include <sys/uio.h>
+#include <pthread.h>
+
+#include "glusterfs/common-utils.h"
+
+typedef struct _data data_t;
+typedef struct _dict dict_t;
+typedef struct _data_pair data_pair_t;
+
+#define dict_set_sizen(this, key, value) dict_setn(this, key, SLEN(key), value)
+
+#define dict_add_sizen(this, key, value) dict_addn(this, key, SLEN(key), value)
+
+#define dict_get_sizen(this, key) dict_getn(this, key, SLEN(key))
+
+#define dict_del_sizen(this, key) dict_deln(this, key, SLEN(key))
+
+#define dict_set_str_sizen(this, key, str) \
+ dict_set_strn(this, key, SLEN(key), str)
+
+#define dict_set_sizen_str_sizen(this, key, str) \
+ dict_set_nstrn(this, key, SLEN(key), str, SLEN(str))
+
+#define dict_set_dynstr_sizen(this, key, str) \
+ dict_set_dynstrn(this, key, SLEN(key), str)
+
+#define dict_get_str_sizen(this, key, str) \
+ dict_get_strn(this, key, SLEN(key), str)
+
+#define dict_get_int32_sizen(this, key, val) \
+ dict_get_int32n(this, key, SLEN(key), val)
+
+#define dict_set_int32_sizen(this, key, val) \
+ dict_set_int32n(this, key, SLEN(key), val)
+
+#define GF_PROTOCOL_DICT_SERIALIZE(this, from_dict, to, len, ope, labl) \
+ do { \
+ int _ret = 0; \
+ \
+ if (!from_dict) \
+ break; \
+ \
+ _ret = dict_allocate_and_serialize(from_dict, to, &len); \
+ if (_ret < 0) { \
+ gf_msg(this->name, GF_LOG_WARNING, 0, LG_MSG_DICT_SERIAL_FAILED, \
+ "failed to get serialized dict (%s)", (#from_dict)); \
+ ope = EINVAL; \
+ goto labl; \
+ } \
+ } while (0)
+
+#define GF_PROTOCOL_DICT_UNSERIALIZE(xl, to, buff, len, ret, ope, labl) \
+ do { \
+ if (!len) \
+ break; \
+ to = dict_new(); \
+ GF_VALIDATE_OR_GOTO(xl->name, to, labl); \
+ \
+ ret = dict_unserialize(buff, len, &to); \
+ if (ret < 0) { \
+ gf_msg(xl->name, GF_LOG_WARNING, 0, LG_MSG_DICT_UNSERIAL_FAILED, \
+ "failed to unserialize dictionary (%s)", (#to)); \
+ \
+ ope = EINVAL; \
+ goto labl; \
+ } \
+ \
+ } while (0)
+
+#define dict_foreach_inline(d, c) for (c = d->members_list; c; c = c->next)
+
+#define DICT_KEY_VALUE_MAX_SIZE 1048576
+#define DICT_MAX_FLAGS 256
+#define DICT_FLAG_SET 1
+#define DICT_FLAG_CLEAR 0
+#define DICT_HDR_LEN 4
+#define DICT_DATA_HDR_KEY_LEN 4
+#define DICT_DATA_HDR_VAL_LEN 4
+
+struct _data {
+ char *data;
+ gf_atomic_t refcount;
+ gf_dict_data_type_t data_type;
+ uint32_t len;
+ gf_boolean_t is_static;
+};
+
+struct _data_pair {
+ struct _data_pair *hash_next;
+ struct _data_pair *prev;
+ struct _data_pair *next;
+ data_t *value;
+ char *key;
+ uint32_t key_hash;
+};
+
+struct _dict {
+ uint64_t max_count;
+ int32_t hash_size;
+ int32_t count;
+ gf_atomic_t refcount;
+ data_pair_t **members;
+ data_pair_t *members_list;
+ char *extra_stdfree;
+ gf_lock_t lock;
+ data_pair_t *members_internal;
+ data_pair_t free_pair;
+ /* Variable to store total keylen + value->len */
+ uint32_t totkvlen;
+};
+
+typedef gf_boolean_t (*dict_match_t)(dict_t *d, char *k, data_t *v, void *data);
+
+int32_t
+is_data_equal(data_t *one, data_t *two);
+void
+data_destroy(data_t *data);
+
+/* function to set a key/value pair (overwrite existing if matches the key */
+int32_t
+dict_set(dict_t *this, char *key, data_t *value);
+int32_t
+dict_setn(dict_t *this, char *key, const int keylen, data_t *value);
+
+/* function to set a new key/value pair (without checking for duplicate) */
+int32_t
+dict_add(dict_t *this, char *key, data_t *value);
+int32_t
+dict_addn(dict_t *this, char *key, const int keylen, data_t *value);
+int
+dict_get_with_ref(dict_t *this, char *key, data_t **data);
+data_t *
+dict_get(dict_t *this, char *key);
+data_t *
+dict_getn(dict_t *this, char *key, const int keylen);
+void
+dict_del(dict_t *this, char *key);
+void
+dict_deln(dict_t *this, char *key, const int keylen);
+int
+dict_reset(dict_t *dict);
+
+int
+dict_key_count(dict_t *this);
+
+int32_t
+dict_serialized_length(dict_t *dict);
+int32_t
+dict_serialize(dict_t *dict, char *buf);
+int32_t
+dict_unserialize(char *buf, int32_t size, dict_t **fill);
+
+int32_t
+dict_allocate_and_serialize(dict_t *this, char **buf, u_int *length);
+
+void
+dict_unref(dict_t *dict);
+dict_t *
+dict_ref(dict_t *dict);
+data_t *
+data_ref(data_t *data);
+void
+data_unref(data_t *data);
+
+int32_t
+dict_lookup(dict_t *this, char *key, data_t **data);
+/*
+ TODO: provide converts for different byte sizes, signedness, and void *
+ */
+data_t *
+int_to_data(int64_t value);
+data_t *
+str_to_data(char *value);
+data_t *
+strn_to_data(char *value, const int vallen);
+data_t *
+data_from_dynptr(void *value, int32_t len);
+data_t *
+bin_to_data(void *value, int32_t len);
+data_t *
+static_str_to_data(char *value);
+data_t *
+static_bin_to_data(void *value);
+
+int64_t
+data_to_int64(data_t *data);
+int32_t
+data_to_int32(data_t *data);
+int16_t
+data_to_int16(data_t *data);
+int8_t
+data_to_int8(data_t *data);
+
+uint64_t
+data_to_uint64(data_t *data);
+uint32_t
+data_to_uint32(data_t *data);
+uint16_t
+data_to_uint16(data_t *data);
+uint8_t
+data_to_uint8(data_t *data);
+
+data_t *
+data_from_int64(int64_t value);
+data_t *
+data_from_int32(int32_t value);
+data_t *
+data_from_int16(int16_t value);
+data_t *
+data_from_int8(int8_t value);
+
+data_t *
+data_from_uint64(uint64_t value);
+data_t *
+data_from_uint32(uint32_t value);
+data_t *
+data_from_uint16(uint16_t value);
+
+char *
+data_to_str(data_t *data);
+void *
+data_to_bin(data_t *data);
+void *
+data_to_ptr(data_t *data);
+data_t *
+data_copy(data_t *old);
+struct iatt *
+data_to_iatt(data_t *data, char *key);
+
+int
+dict_foreach(dict_t *this,
+ int (*fn)(dict_t *this, char *key, data_t *value, void *data),
+ void *data);
+
+int
+dict_foreach_fnmatch(dict_t *dict, char *pattern,
+ int (*fn)(dict_t *this, char *key, data_t *value,
+ void *data),
+ void *data);
+
+int
+dict_foreach_match(dict_t *dict,
+ gf_boolean_t (*match)(dict_t *this, char *key, data_t *value,
+ void *mdata),
+ void *match_data,
+ int (*action)(dict_t *this, char *key, data_t *value,
+ void *adata),
+ void *action_data);
+
+int
+dict_null_foreach_fn(dict_t *d, char *k, data_t *v, void *tmp);
+int
+dict_remove_foreach_fn(dict_t *d, char *k, data_t *v, void *tmp);
+dict_t *
+dict_copy(dict_t *this, dict_t *new);
+int
+dict_keys_join(void *value, int size, dict_t *dict,
+ int (*filter_fn)(char *key));
+
+/* CLEANED UP FUNCTIONS DECLARATIONS */
+GF_MUST_CHECK dict_t *
+dict_new(void);
+dict_t *
+dict_copy_with_ref(dict_t *this, dict_t *new);
+
+GF_MUST_CHECK int
+dict_reset(dict_t *dict);
+
+GF_MUST_CHECK int
+dict_get_int8(dict_t *this, char *key, int8_t *val);
+GF_MUST_CHECK int
+dict_set_int8(dict_t *this, char *key, int8_t val);
+
+GF_MUST_CHECK int
+dict_get_int16(dict_t *this, char *key, int16_t *val);
+GF_MUST_CHECK int
+dict_set_int16(dict_t *this, char *key, int16_t val);
+
+GF_MUST_CHECK int
+dict_get_int32(dict_t *this, char *key, int32_t *val);
+GF_MUST_CHECK int
+dict_get_int32n(dict_t *this, char *key, const int keylen, int32_t *val);
+GF_MUST_CHECK int
+dict_set_int32(dict_t *this, char *key, int32_t val);
+GF_MUST_CHECK int
+dict_set_int32n(dict_t *this, char *key, const int keylen, int32_t val);
+
+GF_MUST_CHECK int
+dict_get_int64(dict_t *this, char *key, int64_t *val);
+GF_MUST_CHECK int
+dict_set_int64(dict_t *this, char *key, int64_t val);
+
+GF_MUST_CHECK int
+dict_get_uint16(dict_t *this, char *key, uint16_t *val);
+GF_MUST_CHECK int
+dict_set_uint16(dict_t *this, char *key, uint16_t val);
+
+GF_MUST_CHECK int
+dict_get_uint32(dict_t *this, char *key, uint32_t *val);
+GF_MUST_CHECK int
+dict_set_uint32(dict_t *this, char *key, uint32_t val);
+
+GF_MUST_CHECK int
+dict_get_uint64(dict_t *this, char *key, uint64_t *val);
+GF_MUST_CHECK int
+dict_set_uint64(dict_t *this, char *key, uint64_t val);
+
+GF_MUST_CHECK int
+dict_check_flag(dict_t *this, char *key, int flag);
+GF_MUST_CHECK int
+dict_set_flag(dict_t *this, char *key, int flag);
+GF_MUST_CHECK int
+dict_clear_flag(dict_t *this, char *key, int flag);
+
+GF_MUST_CHECK int
+dict_get_double(dict_t *this, char *key, double *val);
+GF_MUST_CHECK int
+dict_set_double(dict_t *this, char *key, double val);
+
+GF_MUST_CHECK int
+dict_set_static_ptr(dict_t *this, char *key, void *ptr);
+GF_MUST_CHECK int
+dict_get_ptr(dict_t *this, char *key, void **ptr);
+GF_MUST_CHECK int
+dict_get_ptr_and_len(dict_t *this, char *key, void **ptr, int *len);
+GF_MUST_CHECK int
+dict_set_dynptr(dict_t *this, char *key, void *ptr, size_t size);
+
+GF_MUST_CHECK int
+dict_get_bin(dict_t *this, char *key, void **ptr);
+GF_MUST_CHECK int
+dict_set_bin(dict_t *this, char *key, void *ptr, size_t size);
+GF_MUST_CHECK int
+dict_set_static_bin(dict_t *this, char *key, void *ptr, size_t size);
+
+GF_MUST_CHECK int
+dict_set_option(dict_t *this, char *key, char *str);
+GF_MUST_CHECK int
+dict_set_str(dict_t *this, char *key, char *str);
+GF_MUST_CHECK int
+dict_set_strn(dict_t *this, char *key, const int keylen, char *str);
+GF_MUST_CHECK int
+dict_set_nstrn(dict_t *this, char *key, const int keylen, char *str,
+ const int vallen);
+GF_MUST_CHECK int
+dict_set_dynstr(dict_t *this, char *key, char *str);
+GF_MUST_CHECK int
+dict_set_dynstrn(dict_t *this, char *key, const int keylen, char *str);
+GF_MUST_CHECK int
+dict_set_dynstr_with_alloc(dict_t *this, char *key, const char *str);
+GF_MUST_CHECK int
+dict_add_dynstr_with_alloc(dict_t *this, char *key, char *str);
+GF_MUST_CHECK int
+dict_get_str(dict_t *this, char *key, char **str);
+GF_MUST_CHECK int
+dict_get_strn(dict_t *this, char *key, const int keylen, char **str);
+
+GF_MUST_CHECK int
+dict_get_str_boolean(dict_t *this, char *key, int default_val);
+GF_MUST_CHECK int
+dict_rename_key(dict_t *this, char *key, char *replace_key);
+GF_MUST_CHECK int
+dict_serialize_value_with_delim(dict_t *this, char *buf, int32_t *serz_len,
+ char delimiter);
+
+GF_MUST_CHECK int
+dict_set_gfuuid(dict_t *this, char *key, uuid_t uuid, bool is_static);
+GF_MUST_CHECK int
+dict_get_gfuuid(dict_t *this, char *key, uuid_t *uuid);
+
+GF_MUST_CHECK int
+dict_set_iatt(dict_t *this, char *key, struct iatt *iatt, bool is_static);
+GF_MUST_CHECK int
+dict_get_iatt(dict_t *this, char *key, struct iatt *iatt);
+GF_MUST_CHECK int
+dict_set_mdata(dict_t *this, char *key, struct mdata_iatt *mdata,
+ bool is_static);
+GF_MUST_CHECK int
+dict_get_mdata(dict_t *this, char *key, struct mdata_iatt *mdata);
+
+void
+dict_dump_to_statedump(dict_t *dict, char *dict_name, char *domain);
+
+void
+dict_dump_to_log(dict_t *dict);
+
+int
+dict_dump_to_str(dict_t *dict, char *dump, int dumpsize, char *format);
+gf_boolean_t
+dict_match_everything(dict_t *d, char *k, data_t *v, void *data);
+
+dict_t *
+dict_for_key_value(const char *name, const char *value, size_t size,
+ gf_boolean_t is_static);
+
+gf_boolean_t
+are_dicts_equal(dict_t *one, dict_t *two,
+ gf_boolean_t (*match)(dict_t *d, char *k, data_t *v,
+ void *data),
+ gf_boolean_t (*value_ignore)(char *k));
+int
+dict_has_key_from_array(dict_t *dict, char **strings, gf_boolean_t *result);
+
+int
+dict_serialized_length_lk(dict_t *this);
+#endif
diff --git a/libglusterfs/src/glusterfs/event-history.h b/libglusterfs/src/glusterfs/event-history.h
new file mode 100644
index 00000000000..f0e0422418e
--- /dev/null
+++ b/libglusterfs/src/glusterfs/event-history.h
@@ -0,0 +1,40 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _EH_H
+#define _EH_H
+
+#include <pthread.h> // for pthread_mutex_t
+#include <stddef.h> // for size_t
+#include "glusterfs/circ-buff.h" // for buffer_t, circular_buffer_t
+#include "glusterfs/glusterfs.h" // for gf_boolean_t
+
+struct event_hist {
+ buffer_t *buffer;
+ pthread_mutex_t lock;
+};
+
+typedef struct event_hist eh_t;
+
+void
+eh_dump(eh_t *event, void *data,
+ int(fn)(circular_buffer_t *buffer, void *data));
+
+eh_t *
+eh_new(size_t buffer_size, gf_boolean_t use_buffer_once,
+ void (*destroy_data)(void *data));
+
+int
+eh_save_history(eh_t *history, void *string);
+
+int
+eh_destroy(eh_t *history);
+
+#endif /* _EH_H */
diff --git a/libglusterfs/src/glusterfs/events.h b/libglusterfs/src/glusterfs/events.h
new file mode 100644
index 00000000000..74c5326427b
--- /dev/null
+++ b/libglusterfs/src/glusterfs/events.h
@@ -0,0 +1,34 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EVENTS_H__
+#define __EVENTS_H__
+
+#include "eventtypes.h"
+
+#ifdef USE_EVENTS
+int
+_gf_event(eventtypes_t event, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+#else
+__attribute__((__format__(__printf__, 2, 3))) static inline int
+_gf_event(eventtypes_t event, const char *fmt, ...)
+{
+ return 0;
+}
+#endif /* USE_EVENTS */
+
+#define gf_event(event, fmt...) \
+ do { \
+ FMT_WARN(fmt); \
+ _gf_event(event, ##fmt); \
+ } while (0)
+
+#endif /* __EVENTS_H__ */
diff --git a/libglusterfs/src/glusterfs/fd-lk.h b/libglusterfs/src/glusterfs/fd-lk.h
new file mode 100644
index 00000000000..76cc680306a
--- /dev/null
+++ b/libglusterfs/src/glusterfs/fd-lk.h
@@ -0,0 +1,59 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _FD_LK_H
+#define _FD_LK_H
+
+#include "glusterfs/fd.h"
+#include "glusterfs/locking.h"
+#include "glusterfs/list.h"
+#include "glusterfs/glusterfs.h"
+
+#define get_lk_type(type) \
+ type == F_UNLCK ? "F_UNLCK" : (type == F_RDLCK ? "F_RDLCK" : "F_WRLCK")
+
+#define get_lk_cmd(cmd) \
+ cmd == F_SETLKW ? "F_SETLKW" : (cmd == F_SETLK ? "F_SETLK" : "F_GETLK")
+
+struct _fd;
+
+struct fd_lk_ctx {
+ struct list_head lk_list;
+ gf_atomic_t ref;
+ gf_lock_t lock;
+};
+typedef struct fd_lk_ctx fd_lk_ctx_t;
+
+struct fd_lk_ctx_node {
+ int32_t cmd;
+ struct gf_flock user_flock;
+ off_t fl_start;
+ off_t fl_end;
+ short fl_type;
+ struct list_head next;
+};
+typedef struct fd_lk_ctx_node fd_lk_ctx_node_t;
+
+fd_lk_ctx_t *
+fd_lk_ctx_ref(fd_lk_ctx_t *lk_ctx);
+
+fd_lk_ctx_t *
+fd_lk_ctx_create(void);
+
+int
+fd_lk_insert_and_merge(struct _fd *lk_ctx, int32_t cmd, struct gf_flock *flock);
+
+int
+fd_lk_ctx_unref(fd_lk_ctx_t *lk_ctx);
+
+gf_boolean_t
+fd_lk_ctx_empty(fd_lk_ctx_t *lk_ctx);
+
+#endif /* _FD_LK_H */
diff --git a/libglusterfs/src/glusterfs/fd.h b/libglusterfs/src/glusterfs/fd.h
new file mode 100644
index 00000000000..3ffaaa60504
--- /dev/null
+++ b/libglusterfs/src/glusterfs/fd.h
@@ -0,0 +1,169 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _FD_H
+#define _FD_H
+
+#include "glusterfs/list.h"
+#include <sys/types.h>
+#include <unistd.h>
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/locking.h"
+#include "glusterfs/fd-lk.h"
+
+#define GF_ANON_FD_NO -2
+#define GF_ANON_FD_FLAGS (O_RDWR | O_LARGEFILE)
+
+struct _inode;
+struct _dict;
+struct fd_lk_ctx;
+
+struct _fd_ctx {
+ union {
+ uint64_t key;
+ void *xl_key;
+ };
+ union {
+ uint64_t value1;
+ void *ptr1;
+ };
+};
+
+struct _fd {
+ uint64_t pid;
+ int32_t flags;
+ gf_atomic_t refcount;
+ struct list_head inode_list;
+ struct _inode *inode;
+ gf_lock_t lock; /* used ONLY for manipulating
+ 'struct _fd_ctx' array (_ctx).*/
+ struct _fd_ctx *_ctx;
+ int xl_count; /* Number of xl referred in this fd */
+ struct fd_lk_ctx *lk_ctx;
+ gf_boolean_t anonymous; /* fd which does not have counterpart open
+ fd on backend (server for client, posix
+ for server). */
+};
+typedef struct _fd fd_t;
+
+struct fd_table_entry {
+ fd_t *fd;
+ int next_free;
+};
+typedef struct fd_table_entry fdentry_t;
+
+struct _fdtable {
+ int refcount;
+ uint32_t max_fds;
+ pthread_rwlock_t lock;
+ fdentry_t *fdentries;
+ int first_free;
+};
+typedef struct _fdtable fdtable_t;
+
+/* Signifies no more entries in the fd table. */
+#define GF_FDTABLE_END -1
+
+/* This is used to invalidated
+ * the next_free value in an fdentry that has been allocated
+ */
+#define GF_FDENTRY_ALLOCATED -2
+
+#include "glusterfs/logging.h"
+#include "glusterfs/xlator.h"
+
+void
+gf_fd_put(fdtable_t *fdtable, int32_t fd);
+
+fd_t *
+gf_fd_fdptr_get(fdtable_t *fdtable, int64_t fd);
+
+fdtable_t *
+gf_fd_fdtable_alloc(void);
+
+int
+gf_fd_unused_get(fdtable_t *fdtable, fd_t *fdptr);
+
+fdentry_t *
+gf_fd_fdtable_get_all_fds(fdtable_t *fdtable, uint32_t *count);
+
+void
+gf_fd_fdtable_destroy(fdtable_t *fdtable);
+
+fd_t *
+__fd_ref(fd_t *fd);
+
+fd_t *
+fd_ref(fd_t *fd);
+
+void
+fd_unref(fd_t *fd);
+
+void
+fd_close(fd_t *fd);
+
+fd_t *
+fd_create(struct _inode *inode, pid_t pid);
+
+fd_t *
+fd_create_uint64(struct _inode *inode, uint64_t pid);
+
+fd_t *
+fd_lookup(struct _inode *inode, pid_t pid);
+
+fd_t *
+fd_lookup_uint64(struct _inode *inode, uint64_t pid);
+
+fd_t *
+fd_lookup_anonymous(inode_t *inode, int32_t flags);
+
+fd_t *
+fd_anonymous(inode_t *inode);
+
+fd_t *
+fd_anonymous_with_flags(inode_t *inode, int32_t flags);
+
+gf_boolean_t
+fd_is_anonymous(fd_t *fd);
+
+uint8_t
+fd_list_empty(struct _inode *inode);
+
+fd_t *
+fd_bind(fd_t *fd);
+
+int
+fd_ctx_set(fd_t *fd, xlator_t *xlator, uint64_t value);
+
+int
+fd_ctx_get(fd_t *fd, xlator_t *xlator, uint64_t *value);
+
+int
+fd_ctx_del(fd_t *fd, xlator_t *xlator, uint64_t *value);
+
+int
+__fd_ctx_del(fd_t *fd, xlator_t *xlator, uint64_t *value);
+
+int
+__fd_ctx_set(fd_t *fd, xlator_t *xlator, uint64_t value);
+
+int
+__fd_ctx_get(fd_t *fd, xlator_t *xlator, uint64_t *value);
+
+void
+fd_ctx_dump(fd_t *fd, char *prefix);
+
+fdentry_t *
+gf_fd_fdtable_copy_all_fds(fdtable_t *fdtable, uint32_t *count);
+
+void
+gf_fdptr_put(fdtable_t *fdtable, fd_t *fd);
+
+#endif /* _FD_H */
diff --git a/libglusterfs/src/glusterfs/gf-dirent.h b/libglusterfs/src/glusterfs/gf-dirent.h
new file mode 100644
index 00000000000..e358da30f58
--- /dev/null
+++ b/libglusterfs/src/glusterfs/gf-dirent.h
@@ -0,0 +1,71 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_DIRENT_H
+#define _GF_DIRENT_H
+
+#include "glusterfs/iatt.h"
+#include "glusterfs/inode.h"
+
+#define gf_dirent_size(name) (sizeof(gf_dirent_t) + strlen(name) + 1)
+
+int
+gf_deitransform(xlator_t *this, uint64_t y);
+
+int
+gf_itransform(xlator_t *this, uint64_t x, uint64_t *y_p, int client_id);
+
+uint64_t
+gf_dirent_orig_offset(xlator_t *this, uint64_t offset);
+
+struct _dir_entry {
+ struct _dir_entry *next;
+ char *name;
+ char *link;
+ struct iatt buf;
+};
+
+struct _gf_dirent {
+ union {
+ struct list_head list;
+ struct {
+ struct _gf_dirent *next;
+ struct _gf_dirent *prev;
+ };
+ };
+ uint64_t d_ino;
+ uint64_t d_off;
+ uint32_t d_len;
+ uint32_t d_type;
+ struct iatt d_stat;
+ dict_t *dict;
+ inode_t *inode;
+ char d_name[];
+};
+
+#define DT_ISDIR(mode) (mode == DT_DIR)
+
+gf_dirent_t *
+gf_dirent_for_name(const char *name);
+gf_dirent_t *
+entry_copy(gf_dirent_t *source);
+void
+gf_dirent_entry_free(gf_dirent_t *entry);
+void
+gf_dirent_free(gf_dirent_t *entries);
+int
+gf_link_inodes_from_dirent(xlator_t *this, inode_t *parent,
+ gf_dirent_t *entries);
+int
+gf_fill_iatt_for_dirent(gf_dirent_t *entry, inode_t *parent, xlator_t *subvol);
+
+void
+gf_link_inode_from_dirent(xlator_t *this, inode_t *parent, gf_dirent_t *entry);
+#endif /* _GF_DIRENT_H */
diff --git a/libglusterfs/src/glusterfs/gf-event.h b/libglusterfs/src/glusterfs/gf-event.h
new file mode 100644
index 00000000000..40f8fbdf10a
--- /dev/null
+++ b/libglusterfs/src/glusterfs/gf-event.h
@@ -0,0 +1,140 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_EVENT_H_
+#define _GF_EVENT_H_
+
+#include <pthread.h>
+#include "common-utils.h"
+#include "list.h"
+
+struct event_pool;
+struct event_ops;
+struct event_slot_poll;
+struct event_slot_epoll;
+struct event_data {
+ int idx;
+ int gen;
+} __attribute__((__packed__, __may_alias__));
+
+typedef void (*event_handler_t)(int fd, int idx, int gen, void *data,
+ int poll_in, int poll_out, int poll_err,
+ char event_thread_exit);
+
+#define EVENT_EPOLL_TABLES 1024
+#define EVENT_EPOLL_SLOTS 1024
+#define EVENT_MAX_THREADS 1024
+
+/* See rpcsvc.h to check why. */
+GF_STATIC_ASSERT(EVENT_MAX_THREADS % __BITS_PER_LONG == 0);
+
+struct event_pool {
+ struct event_ops *ops;
+
+ int fd;
+ int breaker[2];
+
+ int count;
+ struct event_slot_poll *reg;
+ struct event_slot_epoll *ereg[EVENT_EPOLL_TABLES];
+ int slots_used[EVENT_EPOLL_TABLES];
+
+ struct list_head poller_death;
+ int poller_death_sliced; /* track whether the list of fds interested
+ * poller_death is sliced. If yes, new thread death
+ * notification has to wait till the list is added
+ * back
+ */
+ int poller_gen;
+ int used;
+ int changed;
+
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ void *evcache;
+ int evcache_size;
+
+ /* NOTE: Currently used only when event processing is done using
+ * epoll. */
+ int eventthreadcount; /* number of event threads to execute. */
+ pthread_t pollers[EVENT_MAX_THREADS]; /* poller thread_id store, and live
+ status */
+ int destroy;
+ int activethreadcount;
+
+ /*
+ * Number of threads created by auto-scaling, *in addition to* the
+ * configured number of threads. This is only applicable on the
+ * server, where we try to keep the number of threads around the number
+ * of bricks. In that case, the configured number is just "extra"
+ * threads to handle requests in excess of one per brick (including
+ * requests on the GlusterD connection). For clients or GlusterD, this
+ * number will always be zero, so the "extra" is all we have.
+ *
+ * TBD: consider auto-scaling for clients as well
+ */
+ int auto_thread_count;
+};
+
+struct event_destroy_data {
+ int readfd;
+ struct event_pool *pool;
+};
+
+struct event_ops {
+ struct event_pool *(*new)(int count, int eventthreadcount);
+
+ int (*event_register)(struct event_pool *event_pool, int fd,
+ event_handler_t handler, void *data, int poll_in,
+ int poll_out, char notify_poller_death);
+
+ int (*event_select_on)(struct event_pool *event_pool, int fd, int idx,
+ int poll_in, int poll_out);
+
+ int (*event_unregister)(struct event_pool *event_pool, int fd, int idx);
+
+ int (*event_unregister_close)(struct event_pool *event_pool, int fd,
+ int idx);
+
+ int (*event_dispatch)(struct event_pool *event_pool);
+
+ int (*event_reconfigure_threads)(struct event_pool *event_pool,
+ int newcount);
+ int (*event_pool_destroy)(struct event_pool *event_pool);
+ int (*event_handled)(struct event_pool *event_pool, int fd, int idx,
+ int gen);
+};
+
+struct event_pool *
+gf_event_pool_new(int count, int eventthreadcount);
+int
+gf_event_select_on(struct event_pool *event_pool, int fd, int idx, int poll_in,
+ int poll_out);
+int
+gf_event_register(struct event_pool *event_pool, int fd,
+ event_handler_t handler, void *data, int poll_in,
+ int poll_out, char notify_poller_death);
+int
+gf_event_unregister(struct event_pool *event_pool, int fd, int idx);
+int
+gf_event_unregister_close(struct event_pool *event_pool, int fd, int idx);
+int
+gf_event_dispatch(struct event_pool *event_pool);
+int
+gf_event_reconfigure_threads(struct event_pool *event_pool, int value);
+int
+gf_event_pool_destroy(struct event_pool *event_pool);
+int
+gf_event_dispatch_destroy(struct event_pool *event_pool);
+int
+gf_event_handled(struct event_pool *event_pool, int fd, int idx, int gen);
+
+#endif /* _GF_EVENT_H_ */
diff --git a/libglusterfs/src/glusterfs/gidcache.h b/libglusterfs/src/glusterfs/gidcache.h
new file mode 100644
index 00000000000..ddaabd765b5
--- /dev/null
+++ b/libglusterfs/src/glusterfs/gidcache.h
@@ -0,0 +1,60 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __GIDCACHE_H__
+#define __GIDCACHE_H__
+
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/locking.h"
+
+/*
+ * TBD: make the cache size tunable
+ *
+ * The current size represents a pretty trivial amount of memory, and should
+ * provide good hit rates even for quite busy systems. If we ever want to
+ * support really large cache sizes, we'll need to do dynamic allocation
+ * instead of just defining an array within a private structure. It doesn't make
+ * a whole lot of sense to change the associativity, because it won't improve
+ * hit rates all that much and will increase the maintenance cost as we have
+ * to scan more entries with every lookup/update.
+ */
+
+#define AUX_GID_CACHE_ASSOC 4
+#define AUX_GID_CACHE_BUCKETS 256
+#define AUX_GID_CACHE_SIZE (AUX_GID_CACHE_ASSOC * AUX_GID_CACHE_BUCKETS)
+
+typedef struct {
+ uint64_t gl_id;
+ uint64_t gl_uid;
+ uint64_t gl_gid;
+ int gl_count;
+ gid_t *gl_list;
+ time_t gl_deadline;
+} gid_list_t;
+
+typedef struct {
+ gf_lock_t gc_lock;
+ uint32_t gc_max_age;
+ unsigned int gc_nbuckets;
+ gid_list_t gc_cache[AUX_GID_CACHE_SIZE];
+} gid_cache_t;
+
+int
+gid_cache_init(gid_cache_t *, uint32_t);
+int
+gid_cache_reconf(gid_cache_t *, uint32_t);
+const gid_list_t *
+gid_cache_lookup(gid_cache_t *, uint64_t, uint64_t, uint64_t);
+void
+gid_cache_release(gid_cache_t *, const gid_list_t *);
+int
+gid_cache_add(gid_cache_t *, gid_list_t *);
+
+#endif /* __GIDCACHE_H__ */
diff --git a/libglusterfs/src/glusterfs/glfs-message-id.h b/libglusterfs/src/glusterfs/glfs-message-id.h
new file mode 100644
index 00000000000..a1a16ca1efb
--- /dev/null
+++ b/libglusterfs/src/glusterfs/glfs-message-id.h
@@ -0,0 +1,102 @@
+/*
+ Copyright (c) 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLFS_MESSAGE_ID_H_
+#define _GLFS_MESSAGE_ID_H_
+
+/* Base of all message IDs, all message IDs would be
+ * greater than this */
+#define GLFS_MSGID_BASE 100000
+
+/* Segment size of allocated range. Any component needing more than this
+ * segment size should take multiple segments (at times non contiguous,
+ * if extensions are being made post the next segment already allocated) */
+#define GLFS_MSGID_SEGMENT 1000
+
+/* Macro to define a range of messages for a component. The first argument is
+ * the name of the component. The second argument is the number of segments
+ * to allocate. The defined values will be GLFS_MSGID_COMP_<name> and
+ * GLFS_MSGID_COMP_<name>_END. */
+#define GLFS_MSGID_COMP(_name, _blocks) \
+ GLFS_MSGID_COMP_##_name, \
+ GLFS_MSGID_COMP_##_name##_END = (GLFS_MSGID_COMP_##_name + \
+ (GLFS_MSGID_SEGMENT * (_blocks)) - 1)
+
+#define GLFS_MSGID(_name, _msgs...) \
+ enum _msgid_table_##_name \
+ { \
+ GLFS_##_name##_COMP_BASE = GLFS_MSGID_COMP_##_name, ##_msgs, \
+ GLGS_##_name##_COMP_END \
+ }
+
+/* Per module message segments allocated */
+/* NOTE: For any new module add to the end the modules */
+enum _msgid_comp {
+ GLFS_MSGID_RESERVED = GLFS_MSGID_BASE - 1,
+
+ GLFS_MSGID_COMP(GLUSTERFSD, 1),
+ GLFS_MSGID_COMP(LIBGLUSTERFS, 1),
+ GLFS_MSGID_COMP(RPC_LIB, 1),
+ GLFS_MSGID_COMP(RPC_TRANS_RDMA, 1),
+ GLFS_MSGID_COMP(API, 1),
+ GLFS_MSGID_COMP(CLI, 1),
+ /* glusterd has a lot of messages, taking 2 segments for the same */
+ GLFS_MSGID_COMP(GLUSTERD, 2),
+ GLFS_MSGID_COMP(AFR, 1),
+ GLFS_MSGID_COMP(DHT, 1),
+ /* there is no component called 'common', however reserving this segment
+ * for common actions/errors like dict_{get/set}, memory accounting*/
+ GLFS_MSGID_COMP(COMMON, 1),
+ GLFS_MSGID_COMP(UPCALL, 1),
+ GLFS_MSGID_COMP(NFS, 1),
+ GLFS_MSGID_COMP(POSIX, 1),
+ GLFS_MSGID_COMP(PC, 1),
+ GLFS_MSGID_COMP(PS, 1),
+ GLFS_MSGID_COMP(BITROT_STUB, 1),
+ GLFS_MSGID_COMP(CHANGELOG, 1),
+ GLFS_MSGID_COMP(BITROT_BITD, 1),
+ GLFS_MSGID_COMP(RPC_TRANS_SOCKET, 1),
+ GLFS_MSGID_COMP(QUOTA, 1),
+ GLFS_MSGID_COMP(CTR, 1),
+ GLFS_MSGID_COMP(EC, 1),
+ GLFS_MSGID_COMP(IO_CACHE, 1),
+ GLFS_MSGID_COMP(IO_THREADS, 1),
+ GLFS_MSGID_COMP(MD_CACHE, 1),
+ GLFS_MSGID_COMP(OPEN_BEHIND, 1),
+ GLFS_MSGID_COMP(QUICK_READ, 1),
+ GLFS_MSGID_COMP(READ_AHEAD, 1),
+ GLFS_MSGID_COMP(READDIR_AHEAD, 1),
+ GLFS_MSGID_COMP(SYMLINK_CACHE, 1),
+ GLFS_MSGID_COMP(WRITE_BEHIND, 1),
+ GLFS_MSGID_COMP(CHANGELOG_LIB, 1),
+ GLFS_MSGID_COMP(SHARD, 1),
+ GLFS_MSGID_COMP(JBR, 1),
+ GLFS_MSGID_COMP(PL, 1),
+ GLFS_MSGID_COMP(DC, 1),
+ GLFS_MSGID_COMP(LEASES, 1),
+ GLFS_MSGID_COMP(INDEX, 1),
+ GLFS_MSGID_COMP(POSIX_ACL, 1),
+ GLFS_MSGID_COMP(NLC, 1),
+ GLFS_MSGID_COMP(SL, 1),
+ GLFS_MSGID_COMP(HAM, 1),
+ GLFS_MSGID_COMP(SDFS, 1),
+ GLFS_MSGID_COMP(QUIESCE, 1),
+ GLFS_MSGID_COMP(TA, 1),
+ GLFS_MSGID_COMP(SNAPVIEW_CLIENT, 1),
+ GLFS_MSGID_COMP(TEMPLATE, 1),
+ GLFS_MSGID_COMP(UTIME, 1),
+ GLFS_MSGID_COMP(SNAPVIEW_SERVER, 1),
+ GLFS_MSGID_COMP(CVLT, 1),
+ /* --- new segments for messages goes above this line --- */
+
+ GLFS_MSGID_END
+};
+
+#endif /* !_GLFS_MESSAGE_ID_H_ */
diff --git a/libglusterfs/src/glusterfs/globals.h b/libglusterfs/src/glusterfs/globals.h
new file mode 100644
index 00000000000..b22eaae6c2f
--- /dev/null
+++ b/libglusterfs/src/glusterfs/globals.h
@@ -0,0 +1,188 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLOBALS_H
+#define _GLOBALS_H
+
+#define GF_DEFAULT_BASE_PORT 24007
+#define GF_DEFAULT_VOLFILE_TRANSPORT "tcp"
+
+#define GF_GLOBAL_XLATOR_NAME "global"
+#define GD_OP_VERSION_KEY "operating-version"
+#define GD_MIN_OP_VERSION_KEY "minimum-operating-version"
+#define GD_MAX_OP_VERSION_KEY "maximum-operating-version"
+
+#define GF_PROTECT_FROM_EXTERNAL_WRITES "trusted.glusterfs.protect.writes"
+#define GF_AVOID_OVERWRITE "glusterfs.avoid.overwrite"
+#define GF_CLEAN_WRITE_PROTECTION "glusterfs.clean.writexattr"
+
+/* Gluster versions - OP-VERSION mapping
+ *
+ * 3.3.x - 1
+ * 3.4.x - 2
+ * 3.5.0 - 3
+ * 3.5.1 - 30501
+ * 3.6.0 - 30600
+ * 3.7.0 - 30700
+ * 3.7.1 - 30701
+ * 3.7.2 - 30702
+ *
+ * Starting with Gluster v3.6, the op-version will be multi-digit integer values
+ * based on the Glusterfs version, instead of a simply incrementing integer
+ * value. The op-version for a given X.Y.Z release will be an integer XYZ, with
+ * Y and Z 2 digit always 2 digits wide and padded with 0 when needed. This
+ * should allow for some gaps between two Y releases for backports of features
+ * in Z releases.
+ */
+#define GD_OP_VERSION_MIN \
+ 1 /* MIN is the fresh start op-version, mostly \
+ should not change */
+#define GD_OP_VERSION_MAX \
+ GD_OP_VERSION_9_0 /* MAX VERSION is the maximum \
+ count in VME table, should \
+ keep changing with \
+ introduction of newer \
+ versions */
+
+#define GD_OP_VERSION_3_6_0 30600 /* Op-Version for GlusterFS 3.6.0 */
+
+#define GD_OP_VERSION_3_7_0 30700 /* Op-version for GlusterFS 3.7.0 */
+
+#define GD_OP_VERSION_3_7_1 30701 /* Op-version for GlusterFS 3.7.1 */
+
+#define GD_OP_VERSION_3_7_2 30702 /* Op-version for GlusterFS 3.7.2 */
+
+#define GD_OP_VERSION_3_7_3 30703 /* Op-version for GlusterFS 3.7.3 */
+
+#define GD_OP_VERSION_3_7_4 30704 /* Op-version for GlusterFS 3.7.4 */
+
+#define GD_OP_VERSION_3_7_5 30705 /* Op-version for GlusterFS 3.7.5 */
+
+#define GD_OP_VERSION_3_7_6 30706 /* Op-version for GlusterFS 3.7.6 */
+
+#define GD_OP_VERSION_3_7_7 30707 /* Op-version for GlusterFS 3.7.7 */
+
+#define GD_OP_VERSION_3_7_10 30710 /* Op-version for GlusterFS 3.7.10 */
+
+#define GD_OP_VERSION_3_7_12 30712 /* Op-version for GlusterFS 3.7.12 */
+
+#define GD_OP_VERSION_3_8_0 30800 /* Op-version for GlusterFS 3.8.0 */
+
+#define GD_OP_VERSION_3_8_3 30803 /* Op-version for GlusterFS 3.8.3 */
+
+#define GD_OP_VERSION_3_8_4 30804 /* Op-version for GlusterFS 3.8.4 */
+
+#define GD_OP_VERSION_3_9_0 30900 /* Op-version for GlusterFS 3.9.0 */
+
+#define GD_OP_VERSION_3_9_1 30901 /* Op-version for GlusterFS 3.9.1 */
+
+#define GD_OP_VERSION_3_10_0 31000 /* Op-version for GlusterFS 3.10.0 */
+
+#define GD_OP_VERSION_3_10_1 31001 /* Op-version for GlusterFS 3.10.1 */
+
+#define GD_OP_VERSION_3_10_2 31002 /* Op-version for GlusterFS 3.10.2 */
+
+#define GD_OP_VERSION_3_11_0 31100 /* Op-version for GlusterFS 3.11.0 */
+
+#define GD_OP_VERSION_3_11_1 31101 /* Op-version for GlusterFS 3.11.1 */
+
+#define GD_OP_VERSION_3_12_0 31200 /* Op-version for GlusterFS 3.12.0 */
+
+#define GD_OP_VERSION_3_12_2 31202 /* Op-version for GlusterFS 3.12.2 */
+
+#define GD_OP_VERSION_3_12_3 31203 /* Op-version for GlusterFS 3.12.3 */
+
+#define GD_OP_VERSION_3_13_0 31300 /* Op-version for GlusterFS 3.13.0 */
+
+#define GD_OP_VERSION_3_13_1 31301 /* Op-version for GlusterFS 3.13.1 */
+
+#define GD_OP_VERSION_3_13_2 31302 /* Op-version for GlusterFS 3.13.2 */
+
+#define GD_OP_VERSION_4_0_0 40000 /* Op-version for GlusterFS 4.0.0 */
+
+#define GD_OP_VERSION_4_1_0 40100 /* Op-version for GlusterFS 4.1.0 */
+
+#define GD_OP_VERSION_5_0 50000 /* Op-version for GlusterFS 5.0 */
+
+#define GD_OP_VERSION_5_4 50400 /* Op-version for GlusterFS 5.4 */
+
+#define GD_OP_VERSION_6_0 60000 /* Op-version for GlusterFS 6.0 */
+
+#define GD_OP_VERSION_7_0 70000 /* Op-version for GlusterFS 7.0 */
+#define GD_OP_VERSION_7_1 70100 /* Op-version for GlusterFS 7.1 */
+#define GD_OP_VERSION_7_2 70200 /* Op-version for GlusterFS 7.2 */
+#define GD_OP_VERSION_7_3 70300 /* Op-version for GlusterFS 7.3 */
+
+#define GD_OP_VERSION_8_0 80000 /* Op-version for GlusterFS 8.0 */
+
+#define GD_OP_VERSION_9_0 90000 /* Op-version for GlusterFS 9.0 */
+
+#define GD_OP_VER_PERSISTENT_AFR_XATTRS GD_OP_VERSION_3_6_0
+
+#include "glusterfs/xlator.h"
+#include "glusterfs/options.h"
+
+/* THIS */
+#define THIS (*__glusterfs_this_location())
+#define DECLARE_OLD_THIS xlator_t *old_THIS = THIS
+
+xlator_t **
+__glusterfs_this_location(void);
+xlator_t *
+glusterfs_this_get(void);
+void
+glusterfs_this_set(xlator_t *);
+
+extern xlator_t global_xlator;
+extern struct volume_options global_xl_options[];
+
+/* syncopctx */
+void *
+syncopctx_getctx(void);
+
+/* task */
+void *
+synctask_get(void);
+void
+synctask_set(void *);
+
+/* uuid_buf */
+char *
+glusterfs_uuid_buf_get(void);
+/* lkowner_buf */
+char *
+glusterfs_lkowner_buf_get(void);
+/* leaseid buf */
+char *
+glusterfs_leaseid_buf_get(void);
+char *
+glusterfs_leaseid_exist(void);
+
+/* init */
+int
+glusterfs_globals_init(glusterfs_ctx_t *ctx);
+
+void
+gf_thread_needs_cleanup(void);
+
+struct tvec_base *
+glusterfs_ctx_tw_get(glusterfs_ctx_t *ctx);
+void
+glusterfs_ctx_tw_put(glusterfs_ctx_t *ctx);
+
+extern const char *gf_fop_list[];
+extern const char *gf_upcall_list[];
+
+/* mem acct enable/disable */
+int
+gf_global_mem_acct_enable_get(void);
+int
+gf_global_mem_acct_enable_set(int val);
+#endif /* !_GLOBALS_H */
diff --git a/libglusterfs/src/glusterfs/glusterfs-acl.h b/libglusterfs/src/glusterfs/glusterfs-acl.h
new file mode 100644
index 00000000000..987bf5fab0b
--- /dev/null
+++ b/libglusterfs/src/glusterfs/glusterfs-acl.h
@@ -0,0 +1,162 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERFS_ACL_H
+#define _GLUSTERFS_ACL_H
+
+/* WARNING: Much if this code is restricted to Linux usage.
+ *
+ * It would be much cleaner to replace the code with something that is based on
+ * libacl (or its libc implementation on *BSD).
+ *
+ * Initial work for replacing this Linux specific implementation has been
+ * started as part of the "Improve POSIX ACLs" feature. Functionality for this
+ * feature has been added to the end of this file.
+ */
+
+#include <stdint.h>
+#include <sys/types.h> /* For uid_t */
+
+#include "glusterfs/locking.h" /* For gf_lock_t in struct posix_acl_conf */
+
+#define ACL_PROGRAM 100227
+#define ACLV3_VERSION 3
+
+#define POSIX_ACL_MINIMAL_ACE_COUNT 3
+
+#define POSIX_ACL_READ (0x04)
+#define POSIX_ACL_WRITE (0x02)
+#define POSIX_ACL_EXECUTE (0x01)
+
+#define POSIX_ACL_UNDEFINED_TAG (0x00)
+#define POSIX_ACL_USER_OBJ (0x01)
+#define POSIX_ACL_USER (0x02)
+#define POSIX_ACL_GROUP_OBJ (0x04)
+#define POSIX_ACL_GROUP (0x08)
+#define POSIX_ACL_MASK (0x10)
+#define POSIX_ACL_OTHER (0x20)
+
+#define POSIX_ACL_UNDEFINED_ID (-1)
+
+#define POSIX_ACL_XATTR_VERSION (0x02)
+
+#define POSIX_ACL_ACCESS_XATTR "system.posix_acl_access"
+#define POSIX_ACL_DEFAULT_XATTR "system.posix_acl_default"
+
+struct posix_acl_xattr_entry {
+ uint16_t tag;
+ uint16_t perm;
+ uint32_t id;
+};
+
+struct posix_acl_xattr_header {
+ uint32_t version;
+ struct posix_acl_xattr_entry entries[];
+};
+
+typedef struct posix_acl_xattr_entry posix_acl_xattr_entry;
+typedef struct posix_acl_xattr_header posix_acl_xattr_header;
+
+static inline size_t
+posix_acl_xattr_size(unsigned int count)
+{
+ return (sizeof(posix_acl_xattr_header) +
+ (count * sizeof(posix_acl_xattr_entry)));
+}
+
+static inline ssize_t
+posix_acl_xattr_count(size_t size)
+{
+ if (size < sizeof(posix_acl_xattr_header))
+ return (-1);
+ size -= sizeof(posix_acl_xattr_header);
+ if (size % sizeof(posix_acl_xattr_entry))
+ return (-1);
+ return (size / sizeof(posix_acl_xattr_entry));
+}
+
+struct posix_ace {
+ uint16_t tag;
+ uint16_t perm;
+ uint32_t id;
+};
+
+struct posix_acl {
+ int refcnt;
+ int count;
+ struct posix_ace entries[];
+};
+
+struct posix_acl_ctx {
+ uid_t uid;
+ gid_t gid;
+ mode_t perm;
+ glusterfs_fop_t fop;
+ struct posix_acl *acl_access;
+ struct posix_acl *acl_default;
+};
+
+struct posix_acl_conf {
+ gf_lock_t acl_lock;
+ uid_t super_uid;
+ struct posix_acl *minimal_acl;
+};
+
+/* Above this comment, the legacy POSIX ACL support is kept until it is not
+ * used anymore. Below you will find the more portable version to support POSIX
+ * ACls based on the implementation of libacl (see sys/acl.h). */
+
+/* virtual xattrs passed over RPC, not stored on disk */
+#define GF_POSIX_ACL_ACCESS "glusterfs.posix.acl"
+#define GF_POSIX_ACL_DEFAULT "glusterfs.posix.default_acl"
+#define GF_POSIX_ACL_REQUEST(key) \
+ (!strncmp(key, GF_POSIX_ACL_ACCESS, SLEN(GF_POSIX_ACL_ACCESS)) || \
+ !strncmp(key, GF_POSIX_ACL_DEFAULT, SLEN(GF_POSIX_ACL_DEFAULT)))
+
+#ifdef HAVE_SYS_ACL_H /* only NetBSD does not support POSIX ACLs */
+
+#include <sys/acl.h>
+
+static inline const char *
+gf_posix_acl_get_key(const acl_type_t type)
+{
+ char *acl_key = NULL;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ acl_key = GF_POSIX_ACL_ACCESS;
+ break;
+ case ACL_TYPE_DEFAULT:
+ acl_key = GF_POSIX_ACL_DEFAULT;
+ break;
+ default:
+ errno = EINVAL;
+ }
+
+ return acl_key;
+}
+
+static inline acl_type_t
+gf_posix_acl_get_type(const char *key)
+{
+ acl_type_t type = 0;
+
+ if (!strncmp(key, GF_POSIX_ACL_ACCESS, SLEN(GF_POSIX_ACL_ACCESS)))
+ type = ACL_TYPE_ACCESS;
+ else if (!strncmp(key, GF_POSIX_ACL_DEFAULT, SLEN(GF_POSIX_ACL_DEFAULT)))
+ type = ACL_TYPE_DEFAULT;
+ else
+ errno = EINVAL;
+
+ return type;
+}
+
+#endif /* HAVE_SYS_ACL_H */
+#endif /* _GLUSTERFS_ACL_H */
diff --git a/libglusterfs/src/glusterfs/glusterfs-fops.h b/libglusterfs/src/glusterfs/glusterfs-fops.h
new file mode 100644
index 00000000000..030b2701608
--- /dev/null
+++ b/libglusterfs/src/glusterfs/glusterfs-fops.h
@@ -0,0 +1,241 @@
+/*
+ Copyright (c) 2008-2019 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERFS_FOPS_H_
+#define _GLUSTERFS_FOPS_H_
+
+#include <glusterfs/compat.h>
+
+enum glusterfs_fop_t {
+ GF_FOP_NULL = 0,
+ GF_FOP_STAT = 0 + 1,
+ GF_FOP_READLINK = 0 + 2,
+ GF_FOP_MKNOD = 0 + 3,
+ GF_FOP_MKDIR = 0 + 4,
+ GF_FOP_UNLINK = 0 + 5,
+ GF_FOP_RMDIR = 0 + 6,
+ GF_FOP_SYMLINK = 0 + 7,
+ GF_FOP_RENAME = 0 + 8,
+ GF_FOP_LINK = 0 + 9,
+ GF_FOP_TRUNCATE = 0 + 10,
+ GF_FOP_OPEN = 0 + 11,
+ GF_FOP_READ = 0 + 12,
+ GF_FOP_WRITE = 0 + 13,
+ GF_FOP_STATFS = 0 + 14,
+ GF_FOP_FLUSH = 0 + 15,
+ GF_FOP_FSYNC = 0 + 16,
+ GF_FOP_SETXATTR = 0 + 17,
+ GF_FOP_GETXATTR = 0 + 18,
+ GF_FOP_REMOVEXATTR = 0 + 19,
+ GF_FOP_OPENDIR = 0 + 20,
+ GF_FOP_FSYNCDIR = 0 + 21,
+ GF_FOP_ACCESS = 0 + 22,
+ GF_FOP_CREATE = 0 + 23,
+ GF_FOP_FTRUNCATE = 0 + 24,
+ GF_FOP_FSTAT = 0 + 25,
+ GF_FOP_LK = 0 + 26,
+ GF_FOP_LOOKUP = 0 + 27,
+ GF_FOP_READDIR = 0 + 28,
+ GF_FOP_INODELK = 0 + 29,
+ GF_FOP_FINODELK = 0 + 30,
+ GF_FOP_ENTRYLK = 0 + 31,
+ GF_FOP_FENTRYLK = 0 + 32,
+ GF_FOP_XATTROP = 0 + 33,
+ GF_FOP_FXATTROP = 0 + 34,
+ GF_FOP_FGETXATTR = 0 + 35,
+ GF_FOP_FSETXATTR = 0 + 36,
+ GF_FOP_RCHECKSUM = 0 + 37,
+ GF_FOP_SETATTR = 0 + 38,
+ GF_FOP_FSETATTR = 0 + 39,
+ GF_FOP_READDIRP = 0 + 40,
+ GF_FOP_FORGET = 0 + 41,
+ GF_FOP_RELEASE = 0 + 42,
+ GF_FOP_RELEASEDIR = 0 + 43,
+ GF_FOP_GETSPEC = 0 + 44,
+ GF_FOP_FREMOVEXATTR = 0 + 45,
+ GF_FOP_FALLOCATE = 0 + 46,
+ GF_FOP_DISCARD = 0 + 47,
+ GF_FOP_ZEROFILL = 0 + 48,
+ GF_FOP_IPC = 0 + 49,
+ GF_FOP_SEEK = 0 + 50,
+ GF_FOP_LEASE = 0 + 51,
+ GF_FOP_COMPOUND = 0 + 52,
+ GF_FOP_GETACTIVELK = 0 + 53,
+ GF_FOP_SETACTIVELK = 0 + 54,
+ GF_FOP_PUT = 0 + 55,
+ GF_FOP_ICREATE = 0 + 56,
+ GF_FOP_NAMELINK = 0 + 57,
+ GF_FOP_COPY_FILE_RANGE = 0 + 58,
+ GF_FOP_MAXVALUE = 0 + 59,
+};
+typedef enum glusterfs_fop_t glusterfs_fop_t;
+
+enum glusterfs_event_t {
+ GF_EVENT_PARENT_UP = 1,
+ GF_EVENT_POLLIN = 1 + 1,
+ GF_EVENT_POLLOUT = 1 + 2,
+ GF_EVENT_POLLERR = 1 + 3,
+ GF_EVENT_CHILD_UP = 1 + 4,
+ GF_EVENT_CHILD_DOWN = 1 + 5,
+ GF_EVENT_CHILD_CONNECTING = 1 + 6,
+ GF_EVENT_CLEANUP = 9,
+ GF_EVENT_TRANSPORT_CONNECTED = 9 + 1,
+ GF_EVENT_VOLFILE_MODIFIED = 9 + 2,
+ GF_EVENT_GRAPH_NEW = 9 + 3,
+ GF_EVENT_TRANSLATOR_INFO = 9 + 4,
+ GF_EVENT_TRANSLATOR_OP = 9 + 5,
+ GF_EVENT_AUTH_FAILED = 9 + 6,
+ GF_EVENT_VOLUME_DEFRAG = 9 + 7,
+ GF_EVENT_PARENT_DOWN = 9 + 8,
+ GF_EVENT_VOLUME_BARRIER_OP = 9 + 9,
+ GF_EVENT_UPCALL = 9 + 10,
+ GF_EVENT_SCRUB_STATUS = 9 + 11,
+ GF_EVENT_SOME_DESCENDENT_DOWN = 9 + 12,
+ GF_EVENT_SCRUB_ONDEMAND = 9 + 13,
+ GF_EVENT_SOME_DESCENDENT_UP = 9 + 14,
+ GF_EVENT_CHILD_PING = 9 + 15,
+ GF_EVENT_MAXVAL = 9 + 16,
+};
+typedef enum glusterfs_event_t glusterfs_event_t;
+
+enum gf_op_type_t {
+ GF_OP_TYPE_NULL = 0,
+ GF_OP_TYPE_FOP = 0 + 1,
+ GF_OP_TYPE_MGMT = 0 + 2,
+ GF_OP_TYPE_MAX = 0 + 3,
+};
+typedef enum gf_op_type_t gf_op_type_t;
+
+enum glusterfs_lk_cmds_t {
+ GF_LK_GETLK = 0,
+ GF_LK_SETLK = 0 + 1,
+ GF_LK_SETLKW = 0 + 2,
+ GF_LK_RESLK_LCK = 0 + 3,
+ GF_LK_RESLK_LCKW = 0 + 4,
+ GF_LK_RESLK_UNLCK = 0 + 5,
+ GF_LK_GETLK_FD = 0 + 6,
+};
+typedef enum glusterfs_lk_cmds_t glusterfs_lk_cmds_t;
+
+enum glusterfs_lk_types_t {
+ GF_LK_F_RDLCK = 0,
+ GF_LK_F_WRLCK = 0 + 1,
+ GF_LK_F_UNLCK = 0 + 2,
+ GF_LK_EOL = 0 + 3,
+};
+typedef enum glusterfs_lk_types_t glusterfs_lk_types_t;
+
+enum gf_lease_types_t {
+ NONE = 0,
+ GF_RD_LEASE = 1,
+ GF_RW_LEASE = 2,
+ GF_LEASE_MAX_TYPE = 2 + 1,
+};
+typedef enum gf_lease_types_t gf_lease_types_t;
+
+enum gf_lease_cmds_t {
+ GF_GET_LEASE = 1,
+ GF_SET_LEASE = 2,
+ GF_UNLK_LEASE = 3,
+};
+typedef enum gf_lease_cmds_t gf_lease_cmds_t;
+
+#define LEASE_ID_SIZE 16 /* 128bits */
+
+struct gf_lease {
+ gf_lease_cmds_t cmd;
+ gf_lease_types_t lease_type;
+ char lease_id[LEASE_ID_SIZE];
+ u_int lease_flags;
+};
+typedef struct gf_lease gf_lease;
+
+enum glusterfs_lk_recovery_cmds_t {
+ F_RESLK_LCK = 200,
+ F_RESLK_LCKW = 200 + 1,
+ F_RESLK_UNLCK = 200 + 2,
+ F_GETLK_FD = 200 + 3,
+};
+typedef enum glusterfs_lk_recovery_cmds_t glusterfs_lk_recovery_cmds_t;
+
+enum gf_lk_domain_t {
+ GF_LOCK_POSIX = 0,
+ GF_LOCK_INTERNAL = 1,
+};
+typedef enum gf_lk_domain_t gf_lk_domain_t;
+
+enum entrylk_cmd {
+ ENTRYLK_LOCK = 0,
+ ENTRYLK_UNLOCK = 1,
+ ENTRYLK_LOCK_NB = 2,
+};
+typedef enum entrylk_cmd entrylk_cmd;
+
+enum entrylk_type {
+ ENTRYLK_RDLCK = 0,
+ ENTRYLK_WRLCK = 1,
+};
+typedef enum entrylk_type entrylk_type;
+#define GF_MAX_LOCK_OWNER_LEN 1024 /* 1kB as per NLM */
+#define GF_LKOWNER_BUF_SIZE \
+ ((GF_MAX_LOCK_OWNER_LEN * 2) + (GF_MAX_LOCK_OWNER_LEN / 8))
+
+struct gf_lkowner_t {
+ int len;
+ char data[GF_MAX_LOCK_OWNER_LEN];
+};
+typedef struct gf_lkowner_t gf_lkowner_t;
+
+enum gf_xattrop_flags_t {
+ GF_XATTROP_ADD_ARRAY = 0,
+ GF_XATTROP_ADD_ARRAY64 = 1,
+ GF_XATTROP_OR_ARRAY = 2,
+ GF_XATTROP_AND_ARRAY = 3,
+ GF_XATTROP_GET_AND_SET = 4,
+ GF_XATTROP_ADD_ARRAY_WITH_DEFAULT = 5,
+ GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT = 6,
+};
+typedef enum gf_xattrop_flags_t gf_xattrop_flags_t;
+
+enum gf_seek_what_t {
+ GF_SEEK_DATA = 0,
+ GF_SEEK_HOLE = 1,
+};
+typedef enum gf_seek_what_t gf_seek_what_t;
+
+enum gf_upcall_flags_t {
+ GF_UPCALL_NULL = 0,
+ GF_UPCALL = 1,
+ GF_UPCALL_CI_STAT = 2,
+ GF_UPCALL_CI_XATTR = 3,
+ GF_UPCALL_CI_RENAME = 4,
+ GF_UPCALL_CI_NLINK = 5,
+ GF_UPCALL_CI_FORGET = 6,
+ GF_UPCALL_LEASE_RECALL = 7,
+ GF_UPCALL_FLAGS_MAXVALUE = 8,
+};
+typedef enum gf_upcall_flags_t gf_upcall_flags_t;
+
+enum gf_dict_data_type_t {
+ GF_DATA_TYPE_UNKNOWN = 0,
+ GF_DATA_TYPE_STR_OLD = 1,
+ GF_DATA_TYPE_INT = 2,
+ GF_DATA_TYPE_UINT = 3,
+ GF_DATA_TYPE_DOUBLE = 4,
+ GF_DATA_TYPE_STR = 5,
+ GF_DATA_TYPE_PTR = 6,
+ GF_DATA_TYPE_GFUUID = 7,
+ GF_DATA_TYPE_IATT = 8,
+ GF_DATA_TYPE_MDATA = 9,
+ GF_DATA_TYPE_MAX = 10,
+};
+typedef enum gf_dict_data_type_t gf_dict_data_type_t;
+
+#endif /* !_GLUSTERFS_FOPS_H */
diff --git a/libglusterfs/src/glusterfs/glusterfs.h b/libglusterfs/src/glusterfs/glusterfs.h
new file mode 100644
index 00000000000..e6425618b7f
--- /dev/null
+++ b/libglusterfs/src/glusterfs/glusterfs.h
@@ -0,0 +1,838 @@
+/*
+ Copyright (c) 2008-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERFS_H
+#define _GLUSTERFS_H
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <netdb.h>
+#include <errno.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <arpa/inet.h>
+#include <sys/poll.h>
+#include <pthread.h>
+#include <limits.h> /* For PATH_MAX */
+#include <openssl/sha.h>
+
+#include "glusterfs/glusterfs-fops.h"
+#include "glusterfs/list.h"
+#include "glusterfs/locking.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/lkowner.h"
+#include "glusterfs/compat-uuid.h"
+#include "glusterfs/refcount.h"
+#include "glusterfs/atomic.h"
+
+#define GF_YES 1
+#define GF_NO 0
+
+#define IS_ERROR(ret) ((ret) < 0)
+#define IS_SUCCESS(ret) ((ret) >= 0)
+
+#ifndef O_LARGEFILE
+/* savannah bug #20053, patch for compiling on darwin */
+#define O_LARGEFILE 0100000 /* from bits/fcntl.h */
+#endif
+
+#ifndef O_FMODE_EXEC
+/* redhat bug 843080, added from linux/fs.h */
+#define O_FMODE_EXEC 040 // 0x20
+#endif
+
+#ifndef O_DIRECT
+/* savannah bug #20050, #20052 */
+#define O_DIRECT 0 /* From asm/fcntl.h */
+#endif
+
+#ifndef O_DIRECTORY
+/* FreeBSD does not need O_DIRECTORY */
+#define O_DIRECTORY 0
+#endif
+
+#ifndef EBADFD
+/* Mac OS X does not have EBADFD */
+#define EBADFD EBADF
+#endif
+
+#ifndef FNM_EXTMATCH
+#define FNM_EXTMATCH 0
+#endif
+
+/*gets max-offset on all architectures correctly*/
+#define GF_OFF_MAX ((1ULL << (sizeof(off_t) * 8 - 1)) - 1ULL)
+
+#define GLUSTERD_MAX_SNAP_NAME 255
+#define GLUSTERFS_SOCKET_LISTEN_BACKLOG 1024
+#define GLUSTERD_BRICK_SERVERS "cluster.brick-vol-servers"
+#define SLEN(str) (sizeof(str) - 1)
+
+#define ZR_MOUNTPOINT_OPT "mountpoint"
+#define ZR_ATTR_TIMEOUT_OPT "attribute-timeout"
+#define ZR_ENTRY_TIMEOUT_OPT "entry-timeout"
+#define ZR_NEGATIVE_TIMEOUT_OPT "negative-timeout"
+#define ZR_DIRECT_IO_OPT "direct-io-mode"
+#define ZR_STRICT_VOLFILE_CHECK "strict-volfile-check"
+#define ZR_DUMP_FUSE "dump-fuse"
+#define ZR_FUSE_MOUNTOPTS "fuse-mountopts"
+#define IO_THREADS_QUEUE_SIZE_KEY "io-thread-queue-size"
+
+#define GF_XATTR_CLRLK_CMD "glusterfs.clrlk"
+#define GF_XATTR_PATHINFO_KEY "trusted.glusterfs.pathinfo"
+#define GF_XATTR_NODE_UUID_KEY "trusted.glusterfs.node-uuid"
+#define GF_XATTR_LIST_NODE_UUIDS_KEY "trusted.glusterfs.list-node-uuids"
+#define GF_REBAL_FIND_LOCAL_SUBVOL "glusterfs.find-local-subvol"
+#define GF_REBAL_OLD_FIND_LOCAL_SUBVOL "glusterfs.old-find-local-subvol"
+#define GF_XATTR_VOL_ID_KEY "trusted.glusterfs.volume-id"
+#define GF_XATTR_LOCKINFO_KEY "trusted.glusterfs.lockinfo"
+#define GF_META_LOCK_KEY "glusterfs.lock-migration-meta-lock"
+#define GF_META_UNLOCK_KEY "glusterfs.lock-migration-meta-unlock"
+#define GF_XATTR_GET_REAL_FILENAME_KEY "glusterfs.get_real_filename:"
+#define GF_XATTR_USER_PATHINFO_KEY "glusterfs.pathinfo"
+#define GF_INTERNAL_IGNORE_DEEM_STATFS "ignore-deem-statfs"
+#define GF_XATTR_IOSTATS_DUMP_KEY "trusted.io-stats-dump"
+
+#define GF_READDIR_SKIP_DIRS "readdir-filter-directories"
+#define GF_MDC_LOADED_KEY_NAMES "glusterfs.mdc.loaded.key.names"
+
+#define BD_XATTR_KEY "user.glusterfs"
+#define GF_PREOP_PARENT_KEY "glusterfs.preop.parent.key"
+#define GF_PREOP_CHECK_FAILED "glusterfs.preop.check.failed"
+
+#define XATTR_IS_PATHINFO(x) \
+ ((strncmp(x, GF_XATTR_PATHINFO_KEY, strlen(x)) == 0) || \
+ (strncmp(x, GF_XATTR_USER_PATHINFO_KEY, strlen(x)) == 0))
+#define XATTR_IS_NODE_UUID(x) \
+ (strncmp(x, GF_XATTR_NODE_UUID_KEY, SLEN(GF_XATTR_NODE_UUID_KEY)) == 0)
+#define XATTR_IS_NODE_UUID_LIST(x) \
+ (strncmp(x, GF_XATTR_LIST_NODE_UUIDS_KEY, \
+ SLEN(GF_XATTR_LIST_NODE_UUIDS_KEY)) == 0)
+#define XATTR_IS_LOCKINFO(x) \
+ (strncmp(x, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) == 0)
+
+#define XATTR_IS_BD(x) (strncmp(x, BD_XATTR_KEY, SLEN(BD_XATTR_KEY)) == 0)
+
+#define GF_XATTR_LINKINFO_KEY "trusted.distribute.linkinfo"
+#define GFID_XATTR_KEY "trusted.gfid"
+#define PGFID_XATTR_KEY_PREFIX "trusted.pgfid."
+#define GFID2PATH_VIRT_XATTR_KEY "glusterfs.gfidtopath"
+#define GFID2PATH_XATTR_KEY_PREFIX "trusted.gfid2path."
+#define GFID2PATH_XATTR_KEY_PREFIX_LENGTH 18
+#define VIRTUAL_GFID_XATTR_KEY_STR "glusterfs.gfid.string"
+#define VIRTUAL_GFID_XATTR_KEY "glusterfs.gfid"
+#define GF_XATTR_MDATA_KEY "trusted.glusterfs.mdata"
+#define UUID_CANONICAL_FORM_LEN 36
+
+#define GET_ANCESTRY_PATH_KEY "glusterfs.ancestry.path"
+#define GET_ANCESTRY_DENTRY_KEY "glusterfs.ancestry.dentry"
+
+#define BITROT_DEFAULT_CURRENT_VERSION (unsigned long)1
+#define BITROT_DEFAULT_SIGNING_VERSION (unsigned long)0
+
+/* on-disk object signature keys */
+#define BITROT_OBJECT_BAD_KEY "trusted.bit-rot.bad-file"
+#define BITROT_CURRENT_VERSION_KEY "trusted.bit-rot.version"
+#define BITROT_SIGNING_VERSION_KEY "trusted.bit-rot.signature"
+
+/* globally usable bad file marker */
+#define GLUSTERFS_BAD_INODE "glusterfs.bad-inode"
+
+/* on-disk size of signing xattr (not the signature itself) */
+#define BITROT_SIGNING_XATTR_SIZE_KEY "trusted.glusterfs.bit-rot.size"
+
+/* GET/SET object signature */
+#define GLUSTERFS_GET_OBJECT_SIGNATURE "trusted.glusterfs.get-signature"
+#define GLUSTERFS_SET_OBJECT_SIGNATURE "trusted.glusterfs.set-signature"
+
+/* operation needs to be durable on-disk */
+#define GLUSTERFS_DURABLE_OP "trusted.glusterfs.durable-op"
+
+/* key for version exchange b/w bitrot stub and changelog */
+#define GLUSTERFS_VERSION_XCHG_KEY "glusterfs.version.xchg"
+
+#define GLUSTERFS_INTERNAL_FOP_KEY "glusterfs-internal-fop"
+
+#define GF_ENFORCE_MANDATORY_LOCK "trusted.glusterfs.enforce-mandatory-lock"
+
+/* GlusterFS Internal FOP Indicator flags
+ * (To pass information on the context in which a paritcular
+ * fop is performed between translators)
+ * The presence of a particular flag must be treated as an
+ * indicator of the context, however the flag is added only in
+ * a scenario where there is a need for such context across translators.
+ * So it cannot be an absolute information on context.
+ */
+#define GF_INTERNAL_CTX_KEY "glusterfs.internal-ctx"
+
+/*
+ * Always append entries to end of the enum, do not delete entries.
+ * Currently dict_set_flag allows to set up to 256 flag, if the enum
+ * needs to grow beyond this dict_set_flag has to be changed accordingly
+ */
+enum gf_internal_fop_indicator {
+ GF_DHT_HEAL_DIR /* Index 0 in bit array*/
+};
+
+/* Todo:
+ * Add GF_FOP_LINK_FILE 0x2ULL
+ * address GLUSTERFS_MARKER_DONT_ACCOUNT_KEY and
+ * GLUSTERFS_INTERNAL_FOP_KEY with this flag
+ */
+
+#define DHT_CHANGELOG_RENAME_OP_KEY "changelog.rename-op"
+
+#define GLUSTERFS_WRITE_IS_APPEND "glusterfs.write-is-append"
+#define GLUSTERFS_WRITE_UPDATE_ATOMIC "glusterfs.write-update-atomic"
+#define GLUSTERFS_OPEN_FD_COUNT "glusterfs.open-fd-count"
+#define GLUSTERFS_ACTIVE_FD_COUNT "glusterfs.open-active-fd-count"
+#define GLUSTERFS_INODELK_COUNT "glusterfs.inodelk-count"
+#define GLUSTERFS_ENTRYLK_COUNT "glusterfs.entrylk-count"
+#define GLUSTERFS_POSIXLK_COUNT "glusterfs.posixlk-count"
+#define GLUSTERFS_PARENT_ENTRYLK "glusterfs.parent-entrylk"
+#define GLUSTERFS_INODELK_DOM_COUNT "glusterfs.inodelk-dom-count"
+#define GLUSTERFS_INODELK_DOM_PREFIX "glusterfs.inodelk-dom-prefix"
+#define GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS "glusterfs.multi-dom-lk-cnt-req"
+#define GFID_TO_PATH_KEY "glusterfs.gfid2path"
+#define GF_XATTR_STIME_PATTERN "trusted.glusterfs.*.stime"
+#define GF_XATTR_XTIME_PATTERN "trusted.glusterfs.*.xtime"
+#define GF_XATTR_TRIGGER_SYNC "glusterfs.geo-rep.trigger-sync"
+
+/* quota xattrs */
+#define QUOTA_SIZE_KEY "trusted.glusterfs.quota.size"
+#define QUOTA_LIMIT_KEY "trusted.glusterfs.quota.limit-set"
+#define QUOTA_LIMIT_OBJECTS_KEY "trusted.glusterfs.quota.limit-objects"
+#define VIRTUAL_QUOTA_XATTR_CLEANUP_KEY "glusterfs.quota-xattr-cleanup"
+#define QUOTA_READ_ONLY_KEY "trusted.glusterfs.quota.read-only"
+
+/* ctime related */
+#define CTIME_MDATA_XDATA_KEY "set-ctime-mdata"
+
+/* afr related */
+#define AFR_XATTR_PREFIX "trusted.afr"
+
+/* Index xlator related */
+#define GF_XATTROP_INDEX_GFID "glusterfs.xattrop_index_gfid"
+#define GF_XATTROP_ENTRY_CHANGES_GFID "glusterfs.xattrop_entry_changes_gfid"
+#define GF_XATTROP_INDEX_COUNT "glusterfs.xattrop_index_count"
+#define GF_XATTROP_DIRTY_GFID "glusterfs.xattrop_dirty_gfid"
+#define GF_XATTROP_DIRTY_COUNT "glusterfs.xattrop_dirty_count"
+#define GF_XATTROP_ENTRY_IN_KEY "glusterfs.xattrop-entry-create"
+#define GF_XATTROP_ENTRY_OUT_KEY "glusterfs.xattrop-entry-delete"
+#define GF_INDEX_IA_TYPE_GET_REQ "glusterfs.index-ia-type-get-req"
+#define GF_INDEX_IA_TYPE_GET_RSP "glusterfs.index-ia-type-get-rsp"
+
+#define GF_HEAL_INFO "glusterfs.heal-info"
+#define GF_AFR_HEAL_SBRAIN "glusterfs.heal-sbrain"
+#define GF_AFR_SBRAIN_STATUS "replica.split-brain-status"
+#define GF_AFR_SBRAIN_CHOICE "replica.split-brain-choice"
+#define GF_AFR_SPB_CHOICE_TIMEOUT "replica.split-brain-choice-timeout"
+#define GF_AFR_SBRAIN_RESOLVE "replica.split-brain-heal-finalize"
+#define GF_AFR_ADD_BRICK "trusted.add-brick"
+#define GF_AFR_REPLACE_BRICK "trusted.replace-brick"
+#define GF_AFR_DIRTY "trusted.afr.dirty"
+#define GF_XATTROP_ENTRY_OUT "glusterfs.xattrop-entry-delete"
+#define GF_XATTROP_PURGE_INDEX "glusterfs.xattrop-purge-index"
+
+#define GF_GFIDLESS_LOOKUP "gfidless-lookup"
+/* replace-brick and pump related internal xattrs */
+#define RB_PUMP_CMD_START "glusterfs.pump.start"
+#define RB_PUMP_CMD_PAUSE "glusterfs.pump.pause"
+#define RB_PUMP_CMD_COMMIT "glusterfs.pump.commit"
+#define RB_PUMP_CMD_ABORT "glusterfs.pump.abort"
+#define RB_PUMP_CMD_STATUS "glusterfs.pump.status"
+
+#define GLUSTERFS_MARKER_DONT_ACCOUNT_KEY "glusters.marker.dont-account"
+#define GLUSTERFS_RDMA_INLINE_THRESHOLD (2048)
+#define GLUSTERFS_RDMA_MAX_HEADER_SIZE \
+ (228) /* (sizeof (rdma_header_t) \
+ + RDMA_MAX_SEGMENTS \
+ * sizeof (rdma_read_chunk_t)) \
+ */
+
+#define GLUSTERFS_RPC_REPLY_SIZE 24
+
+#define STARTING_EVENT_THREADS 2
+
+#define DEFAULT_VAR_RUN_DIRECTORY DATADIR "/run/gluster"
+#define DEFAULT_GLUSTERFSD_MISC_DIRETORY DATADIR "/lib/misc/glusterfsd"
+#ifdef GF_LINUX_HOST_OS
+#define GLUSTERD_DEFAULT_WORKDIR DATADIR "/lib/glusterd"
+#else
+#define GLUSTERD_DEFAULT_WORKDIR DATADIR "/db/glusterd"
+#endif
+#define GF_REPLICATE_TRASH_DIR ".landfill"
+
+/* GlusterFS's maximum supported Auxiliary GIDs */
+#define GF_MAX_AUX_GROUPS 65535
+
+#define GF_UUID_BUF_SIZE 37 /* UUID_CANONICAL_FORM_LEN + NULL */
+#define GF_UUID_BNAME_BUF_SIZE (320) /* (64 + 256) */
+
+#define GF_REBALANCE_TID_KEY "rebalance-id"
+#define GF_REMOVE_BRICK_TID_KEY "remove-brick-id"
+#define GF_TIER_TID_KEY "tier-id"
+#define GF_TIER_ENABLED "tier-enabled"
+
+#define UUID_CANONICAL_FORM_LEN 36
+
+/* Adding this here instead of any glusterd*.h files as it is also required by
+ * cli
+ */
+#define DEFAULT_GLUSTERD_SOCKFILE DATADIR "/run/glusterd.socket"
+
+/* features/marker-quota also needs to have knowledge of link-files so as to
+ * exclude them from accounting.
+ */
+#define DHT_LINKFILE_MODE (S_ISVTX)
+
+#define IS_DHT_LINKFILE_MODE(iabuf) \
+ ((st_mode_from_ia((iabuf)->ia_prot, (iabuf)->ia_type) & ~S_IFMT) == \
+ DHT_LINKFILE_MODE)
+#define DHT_LINKFILE_STR "linkto"
+#define DHT_COMMITHASH_STR "commithash"
+
+#define DHT_SKIP_NON_LINKTO_UNLINK "unlink-only-if-dht-linkto-file"
+#define TIER_SKIP_NON_LINKTO_UNLINK "unlink-only-if-tier-linkto-file"
+#define DHT_SKIP_OPEN_FD_UNLINK "dont-unlink-for-open-fd"
+#define DHT_IATT_IN_XDATA_KEY "dht-get-iatt-in-xattr"
+#define DHT_MODE_IN_XDATA_KEY "dht-get-mode-in-xattr"
+#define GET_LINK_COUNT "get-link-count"
+#define GF_GET_SIZE "get-size"
+#define GF_PRESTAT "virt-gf-prestat"
+#define GF_POSTSTAT "virt-gf-poststat"
+
+/*CTR and Marker requires inode dentry link count from posix*/
+#define GF_RESPONSE_LINK_COUNT_XDATA "gf_response_link_count"
+#define GF_REQUEST_LINK_COUNT_XDATA "gf_request_link_count"
+
+#define GF_GET_FILE_BLOCK_COUNT "gf_get_file_block_count"
+
+#define CTR_ATTACH_TIER_LOOKUP "ctr_attach_tier_lookup"
+
+#define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
+#define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect"
+
+#define GF_LOG_LRU_BUFSIZE_DEFAULT 5
+#define GF_LOG_LRU_BUFSIZE_MIN 0
+#define GF_LOG_LRU_BUFSIZE_MAX 20
+#define GF_LOG_LRU_BUFSIZE_MIN_STR "0"
+#define GF_LOG_LRU_BUFSIZE_MAX_STR "20"
+
+#define GF_LOG_FLUSH_TIMEOUT_DEFAULT 120
+#define GF_LOG_FLUSH_TIMEOUT_MIN 30
+#define GF_LOG_FLUSH_TIMEOUT_MAX 300
+#define GF_LOG_FLUSH_TIMEOUT_MIN_STR "30"
+#define GF_LOG_FLUSH_TIMEOUT_MAX_STR "300"
+#define GF_LOG_LOCALTIME_DEFAULT 0
+
+#define GF_NETWORK_TIMEOUT 42
+
+#define GF_BACKTRACE_LEN 4096
+#define GF_BACKTRACE_FRAME_COUNT 7
+
+#define GF_LK_ADVISORY 0 /* maps to GLFS_LK_ADVISORY from libgfapi*/
+#define GF_LK_MANDATORY 1 /* maps to GLFS_LK_MANDATORY from libgfapi*/
+#define GF_LOCK_MODE "glusterfs.lk.lkmode"
+
+#define GF_CHECK_XATTR_KEY_AND_GOTO(key, cmpkey, errval, lbl) \
+ do { \
+ if (key && strcmp(key, cmpkey) == 0) { \
+ errval = -EINVAL; \
+ goto lbl; \
+ } \
+ } while (0)
+
+#define GF_CS_OBJECT_SIZE "trusted.glusterfs.cs.object_size"
+#define GF_CS_BLOCK_SIZE "trusted.glusterfs.cs.block_size"
+#define GF_CS_NUM_BLOCKS "trusted.glusterfs.cs.num_blocks"
+
+#define GF_CS_XATTR_ARCHIVE_UUID "trusted.cloudsync.uuid"
+
+#define GF_CS_OBJECT_UPLOAD_COMPLETE "trusted.glusterfs.csou.complete"
+#define GF_CS_OBJECT_REMOTE "trusted.glusterfs.cs.remote"
+#define GF_CS_OBJECT_DOWNLOADING "trusted.glusterfs.cs.downloading"
+#define GF_CS_OBJECT_DOWNLOADED "trusted.glusterfs.cs.downloaded"
+#define GF_CS_OBJECT_STATUS "trusted.glusterfs.cs.status"
+#define GF_CS_OBJECT_REPAIR "trusted.glusterfs.cs.repair"
+
+#define gf_boolean_t bool
+#define _gf_false false
+#define _gf_true true
+
+typedef enum {
+ GF_CS_LOCAL = 1,
+ GF_CS_REMOTE = 2,
+ GF_CS_REPAIR = 4,
+ GF_CS_DOWNLOADING = 8,
+ GF_CS_ERROR = 16,
+} gf_cs_obj_state;
+
+typedef enum {
+ GF_FOP_PRI_UNSPEC = -1, /* Priority not specified */
+ GF_FOP_PRI_HI = 0, /* low latency */
+ GF_FOP_PRI_NORMAL, /* normal */
+ GF_FOP_PRI_LO, /* bulk */
+ GF_FOP_PRI_LEAST, /* least */
+ GF_FOP_PRI_MAX, /* Highest */
+} gf_fop_pri_t;
+
+typedef enum {
+ /* The 'component' (xlator / option) is not yet setting the flag */
+ GF_UNCLASSIFIED = 0,
+ /* The 'component' is experimental, should not be recommened
+ in production mode */
+ GF_EXPERIMENTAL,
+ /* The 'component' is tech preview, ie, it is 'mostly' working as
+ expected, but can have some of the corner cases, which is not
+ handled. */
+ GF_TECH_PREVIEW,
+ /* The 'component' is good to run. Has good enough test and
+ documentation coverage. */
+ GF_MAINTAINED,
+ /* The component is:
+ - no more a focus
+ - no more solving a valid use case
+ - no more maintained, no volunteers to maintain
+ - there is 'maintained' or 'tech-preview' feature,
+ which does the same thing, better.
+ */
+ GF_DEPRECATED,
+ /* The 'component' is no more 'built'. */
+ GF_OBSOLETE,
+ /* The 'component' exist for Documentation purposes.
+ No real usecase */
+ GF_DOCUMENT_PURPOSE,
+} gf_category_t;
+
+static const char *const FOP_PRI_STRINGS[] = {"HIGH", "NORMAL", "LOW", "LEAST"};
+
+static inline const char *
+fop_pri_to_string(gf_fop_pri_t pri)
+{
+ if (IS_ERROR(pri))
+ return "UNSPEC";
+
+ if (pri >= GF_FOP_PRI_MAX)
+ return "INVALID";
+
+ return FOP_PRI_STRINGS[pri];
+}
+
+const char *
+fop_enum_to_pri_string(glusterfs_fop_t fop);
+
+#define GF_SET_IF_NOT_PRESENT 0x1 /* default behaviour */
+#define GF_SET_OVERWRITE 0x2 /* Overwrite with the buf given */
+#define GF_SET_DIR_ONLY 0x4
+#define GF_SET_EPOCH_TIME 0x8 /* used by afr dir lookup selfheal */
+#define GF_AUXILLARY_PARGFID 0xd /* RIO dummy parent gfid */
+
+/* key value which quick read uses to get small files in lookup cbk */
+#define GF_CONTENT_KEY "glusterfs.content"
+
+struct _xlator_cmdline_option {
+ struct list_head cmd_args;
+ char *volume;
+ char *key;
+ char *value;
+};
+typedef struct _xlator_cmdline_option xlator_cmdline_option_t;
+
+struct _server_cmdline {
+ struct list_head list;
+ char *volfile_server;
+ char *transport;
+ int port;
+};
+typedef struct _server_cmdline server_cmdline_t;
+
+#define GF_OPTION_ENABLE _gf_true
+#define GF_OPTION_DISABLE _gf_false
+#define GF_OPTION_DEFERRED 2
+
+typedef enum { _gf_none, _gf_memcheck, _gf_drd } gf_valgrind_tool;
+
+struct _cmd_args {
+ /* basic options */
+ char *volfile_server;
+ server_cmdline_t *curr_server;
+ /* List of backup volfile servers, including original */
+ struct list_head volfile_servers;
+ char *volfile;
+ char *log_server;
+ gf_loglevel_t log_level;
+ char *log_file;
+ char *log_ident;
+ gf_log_logger_t logger;
+ gf_log_format_t log_format;
+ uint32_t log_buf_size;
+ uint32_t log_flush_timeout;
+ int32_t max_connect_attempts;
+ char *print_exports;
+ char *print_netgroups;
+ int print_xlatordir;
+ int print_statedumpdir;
+ int print_logdir;
+ int print_libexecdir;
+ /* advanced options */
+ uint32_t volfile_server_port;
+ char *volfile_server_transport;
+ uint32_t log_server_port;
+ char *pid_file;
+ char *sock_file;
+ int no_daemon_mode;
+ char *run_id;
+ int debug_mode;
+ int read_only;
+ int acl;
+ int selinux;
+ int capability;
+ int enable_ino32;
+ int worm;
+ int mac_compat;
+ int fopen_keep_cache;
+ int gid_timeout;
+ char gid_timeout_set;
+ int aux_gfid_mount;
+
+ /* need a process wide timer-wheel? */
+ int global_timer_wheel;
+
+ /* list of xlator_option_t */
+ struct list_head xlator_options;
+
+ /* fuse options */
+ int fuse_direct_io_mode;
+ char *use_readdirp;
+ int no_root_squash;
+ int volfile_check;
+ double fuse_entry_timeout;
+ double fuse_negative_timeout;
+ double fuse_attribute_timeout;
+ char *volume_name;
+ int fuse_nodev;
+ int fuse_nosuid;
+ char *dump_fuse;
+ pid_t client_pid;
+ int client_pid_set;
+ unsigned uid_map_root;
+ int32_t lru_limit;
+ int32_t invalidate_limit;
+ int background_qlen;
+ int congestion_threshold;
+ char *fuse_mountopts;
+ int mem_acct;
+ int resolve_gids;
+
+ /* key args */
+ char *mount_point;
+ char *volfile_id;
+
+ /* required for portmap */
+ int brick_port;
+ char *brick_name;
+ int brick_port2;
+
+ /* Should management connections use SSL? */
+ int secure_mgmt;
+
+ /* Linux-only OOM killer adjustment */
+#ifdef GF_LINUX_HOST_OS
+ char *oom_score_adj;
+#endif
+
+ /* Run this process with valgrind? Might want to prevent calling
+ * functions that prevent valgrind from working correctly, like
+ * dlclose(). */
+ gf_valgrind_tool vgtool;
+
+ int localtime_logging;
+
+ /* For the subdir mount */
+ char *subdir_mount;
+
+ char *process_name;
+ char *event_history;
+ int thin_client;
+ uint32_t reader_thread_count;
+
+ /* FUSE writeback cache support */
+ int kernel_writeback_cache;
+ uint32_t attr_times_granularity;
+
+ int fuse_flush_handle_interrupt;
+ int fuse_auto_inval;
+
+ bool global_threading;
+ bool brick_mux;
+
+ uint32_t fuse_dev_eperm_ratelimit_ns;
+};
+typedef struct _cmd_args cmd_args_t;
+
+struct _glusterfs_graph {
+ struct list_head list;
+ struct timeval dob;
+ void *first;
+ void *top; /* selected by -n */
+ int xl_count;
+ int id; /* Used in logging */
+ int used; /* Should be set when fuse gets
+ first CHILD_UP */
+ uint32_t volfile_checksum;
+ uint32_t leaf_count;
+ void *last_xl; /* Stores the last xl of the graph, as of now only populated
+ in client multiplexed code path */
+ pthread_mutex_t mutex;
+ pthread_cond_t child_down_cond; /* for broadcasting CHILD_DOWN */
+ int parent_down;
+ char graph_uuid[128];
+ char volume_id[GF_UUID_BUF_SIZE];
+};
+typedef struct _glusterfs_graph glusterfs_graph_t;
+
+typedef int32_t (*glusterfsd_mgmt_event_notify_fn_t)(int32_t event, void *data,
+ ...);
+
+typedef enum {
+ MGMT_SSL_NEVER = 0,
+ MGMT_SSL_COPY_IO,
+ MGMT_SSL_ALWAYS
+} mgmt_ssl_t;
+
+struct tvec_base;
+
+/* reference counting for the global (per ctx) timer-wheel */
+struct gf_ctx_tw {
+ GF_REF_DECL;
+ struct tvec_base *timer_wheel; /* global timer-wheel instance */
+};
+
+struct _glusterfs_ctx {
+ cmd_args_t cmd_args;
+ char *process_uuid;
+ FILE *pidfp;
+ char fin;
+ void *timer;
+ void *ib;
+ struct call_pool *pool;
+ void *event_pool;
+ void *iobuf_pool;
+ void *logbuf_pool;
+ gf_lock_t lock;
+ size_t page_size;
+
+ /* one per volfile parse */
+ struct list_head graphs;
+
+ /* the latest graph in use */
+ glusterfs_graph_t *active;
+
+ /* fuse or nfs (but not protocol/server) */
+ void *master;
+
+ /* xlator implementing MOPs for centralized logging, volfile server */
+ void *mgmt;
+
+ /* listener of the commands from glusterd */
+ void *listener;
+
+ /* toggle switch for latency measurement */
+ unsigned char measure_latency;
+ pthread_t sigwaiter;
+ char *cmdlinestr;
+ struct mem_pool *stub_mem_pool;
+ unsigned char cleanup_started;
+ int graph_id; /* Incremented per graph, value should
+ indicate how many times the graph has
+ got changed */
+ pid_t mnt_pid; /* pid of the mount agent */
+ int process_mode; /*mode in which process is runninng*/
+ struct syncenv *env; /* The env pointer to the synctasks */
+
+ struct list_head mempool_list; /* used to keep a global list of
+ mempools, used to log details of
+ mempool in statedump */
+ char *statedump_path;
+
+ struct mem_pool *dict_pool;
+ struct mem_pool *dict_pair_pool;
+ struct mem_pool *dict_data_pool;
+
+ glusterfsd_mgmt_event_notify_fn_t notify; /* Used for xlators to make
+ call to fsd-mgmt */
+ gf_log_handle_t log; /* all logging related variables */
+
+ int mem_acct_enable;
+
+ int daemon_pipe[2];
+
+ struct clienttable *clienttable;
+
+ /*
+ * Should management connections use SSL? This is the only place we
+ * can put it where both daemon-startup and socket code will see it.
+ *
+ * Why is it an int? Because we're included before common-utils.h,
+ * which defines gf_boolean_t (what we really want). It doesn't make
+ * any sense, but it's not worth turning the codebase upside-down to
+ * fix it. Thus, an int.
+ */
+ int secure_mgmt;
+
+ /* The option is use to set cert_depth while management connection
+ use SSL
+ */
+ int ssl_cert_depth;
+
+ /*
+ * Should *our* server/inbound connections use SSL? This is only true
+ * if we're glusterd and secure_mgmt is set, or if we're glusterfsd
+ * and SSL is set on the I/O path. It should never be set e.g. for
+ * NFS.
+ */
+ mgmt_ssl_t secure_srvr;
+ /* Buffer to 'save' backtrace even under OOM-kill like situations*/
+ char btbuf[GF_BACKTRACE_LEN];
+
+ pthread_mutex_t notify_lock;
+ pthread_mutex_t cleanup_lock;
+ pthread_cond_t notify_cond;
+ int notifying;
+
+ struct gf_ctx_tw *tw; /* refcounted timer_wheel */
+
+ gf_lock_t volfile_lock;
+
+ /* configuration related elements, which gets changed
+ from global xlator */
+ struct {
+ char *metrics_dumppath;
+ } config;
+
+ struct {
+ gf_atomic_t max_dict_pairs;
+ gf_atomic_t total_pairs_used;
+ gf_atomic_t total_dicts_used;
+ } stats;
+
+ struct list_head volfile_list;
+ /* Add members to manage janitor threads for cleanup fd */
+ struct list_head janitor_fds;
+ pthread_cond_t fd_cond;
+ pthread_mutex_t fd_lock;
+ pthread_t janitor;
+ /* The variable is use to save total posix xlator count */
+ uint32_t pxl_count;
+
+ char volume_id[GF_UUID_BUF_SIZE]; /* Used only in protocol/client */
+};
+typedef struct _glusterfs_ctx glusterfs_ctx_t;
+
+typedef struct {
+ char volfile_checksum[SHA256_DIGEST_LENGTH];
+ char vol_id[NAME_MAX + 1];
+ struct list_head volfile_list;
+ glusterfs_graph_t *graph;
+ FILE *pidfp;
+} gf_volfile_t;
+
+glusterfs_ctx_t *
+glusterfs_ctx_new(void);
+
+struct gf_flock {
+ short l_type;
+ short l_whence;
+ off_t l_start;
+ off_t l_len;
+ pid_t l_pid;
+ gf_lkowner_t l_owner;
+};
+
+typedef struct lock_migration_info {
+ struct list_head list;
+ struct gf_flock flock;
+ char *client_uid;
+ uint32_t lk_flags;
+} lock_migration_info_t;
+
+#define GF_MUST_CHECK __attribute__((warn_unused_result))
+/*
+ * Some macros (e.g. ALLOC_OR_GOTO) set variables in function scope, but the
+ * calling function might not only declare the variable to keep the macro happy
+ * and not use it otherwise. In such cases, the following can be used to
+ * suppress the "set but not used" warning that would otherwise occur.
+ */
+#define GF_UNUSED __attribute__((unused))
+
+/*
+ * If present, this has the following effects:
+ *
+ * glusterd enables privileged commands over TCP
+ *
+ * all code enables SSL for outbound connections to management port
+ *
+ * glusterd enables SSL for inbound connections
+ *
+ * Servers and clients enable/disable SSL among themselves by other means.
+ * Making secure management connections conditional on a file is a bit of a
+ * hack, but we don't have any other place for such global settings across
+ * all of the affected components. Making it a compile-time option would
+ * reduce functionality, both for users and for testing (which can now be
+ * done using secure connections for all tests without change elsewhere).
+ *
+ */
+#define SECURE_ACCESS_FILE GLUSTERD_DEFAULT_WORKDIR "/secure-access"
+
+int
+glusterfs_graph_prepare(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx,
+ char *volume_name);
+int
+glusterfs_graph_destroy_residual(glusterfs_graph_t *graph);
+int
+glusterfs_graph_deactivate(glusterfs_graph_t *graph);
+int
+glusterfs_graph_destroy(glusterfs_graph_t *graph);
+int
+glusterfs_get_leaf_count(glusterfs_graph_t *graph);
+int
+glusterfs_graph_activate(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx);
+glusterfs_graph_t *
+glusterfs_graph_construct(FILE *fp);
+int
+glusterfs_graph_init(glusterfs_graph_t *graph);
+glusterfs_graph_t *
+glusterfs_graph_new(void);
+int
+glusterfs_graph_reconfigure(glusterfs_graph_t *oldgraph,
+ glusterfs_graph_t *newgraph);
+int
+glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path,
+ glusterfs_graph_t **newgraph);
+int
+glusterfs_graph_parent_up(glusterfs_graph_t *graph);
+
+void
+gf_free_mig_locks(lock_migration_info_t *locks);
+
+int
+glusterfs_read_secure_access_file(void);
+int
+glusterfs_graph_fini(glusterfs_graph_t *graph);
+#endif /* _GLUSTERFS_H */
diff --git a/libglusterfs/src/graph-utils.h b/libglusterfs/src/glusterfs/graph-utils.h
index 207664fdb1f..247f1a55d5a 100644
--- a/libglusterfs/src/graph-utils.h
+++ b/libglusterfs/src/glusterfs/graph-utils.h
@@ -11,10 +11,10 @@
#ifndef _GRAPH_H_
#define _GRAPH_H_
-int glusterfs_graph_print_file (FILE *file, glusterfs_graph_t *graph);
-
-char *glusterfs_graph_print_buf (glusterfs_graph_t *graph);
-
-int glusterfs_xlator_link (xlator_t *pxl, xlator_t *cxl);
-void glusterfs_graph_set_first (glusterfs_graph_t *graph, xlator_t *xl);
+int
+glusterfs_graph_print_file(FILE *file, glusterfs_graph_t *graph);
+int
+glusterfs_xlator_link(xlator_t *pxl, xlator_t *cxl);
+void
+glusterfs_graph_set_first(glusterfs_graph_t *graph, xlator_t *xl);
#endif
diff --git a/libglusterfs/src/hashfn.h b/libglusterfs/src/glusterfs/hashfn.h
index 06ae37e796b..6e92e706d8c 100644
--- a/libglusterfs/src/hashfn.h
+++ b/libglusterfs/src/glusterfs/hashfn.h
@@ -11,17 +11,13 @@
#ifndef __HASHFN_H__
#define __HASHFN_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <sys/types.h>
#include <stdint.h>
-uint32_t SuperFastHash (const char * data, int32_t len);
+uint32_t
+SuperFastHash(const char *data, int32_t len);
-uint32_t gf_dm_hashfn (const char *msg, int len);
+uint32_t
+gf_dm_hashfn(const char *msg, int len);
-uint32_t ReallySimpleHash (char *path, int len);
#endif /* __HASHFN_H__ */
diff --git a/libglusterfs/src/glusterfs/iatt.h b/libglusterfs/src/glusterfs/iatt.h
new file mode 100644
index 00000000000..f03d68b02f0
--- /dev/null
+++ b/libglusterfs/src/glusterfs/iatt.h
@@ -0,0 +1,489 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _IATT_H
+#define _IATT_H
+
+#if defined(GF_LINUX_HOST_OS)
+#include <sys/sysmacros.h> /* for makedev(3), major(3), minor(3) */
+#endif
+#include <sys/types.h>
+#include <sys/stat.h> /* for iatt <--> stat conversions */
+#include <unistd.h>
+
+#include "glusterfs/compat.h"
+#include "glusterfs/compat-uuid.h"
+
+typedef enum {
+ IA_INVAL = 0,
+ IA_IFREG,
+ IA_IFDIR,
+ IA_IFLNK,
+ IA_IFBLK,
+ IA_IFCHR,
+ IA_IFIFO,
+ IA_IFSOCK
+} ia_type_t;
+
+typedef struct {
+ uint8_t suid : 1;
+ uint8_t sgid : 1;
+ uint8_t sticky : 1;
+ struct {
+ uint8_t read : 1;
+ uint8_t write : 1;
+ uint8_t exec : 1;
+ } owner, group, other;
+} ia_prot_t;
+
+struct iatt {
+ uint64_t ia_flags;
+ uint64_t ia_ino; /* inode number */
+ uint64_t ia_dev; /* backing device ID */
+ uint64_t ia_rdev; /* device ID (if special file) */
+ uint64_t ia_size; /* file size in bytes */
+ uint32_t ia_nlink; /* Link count */
+ uint32_t ia_uid; /* user ID of owner */
+ uint32_t ia_gid; /* group ID of owner */
+ uint32_t ia_blksize; /* blocksize for filesystem I/O */
+ uint64_t ia_blocks; /* number of 512B blocks allocated */
+ int64_t ia_atime; /* last access time */
+ int64_t ia_mtime; /* last modification time */
+ int64_t ia_ctime; /* last status change time */
+ int64_t ia_btime; /* creation time. Fill using statx */
+ uint32_t ia_atime_nsec;
+ uint32_t ia_mtime_nsec;
+ uint32_t ia_ctime_nsec;
+ uint32_t ia_btime_nsec;
+ uint64_t ia_attributes; /* chattr related:compressed, immutable,
+ * append only, encrypted etc.*/
+ uint64_t ia_attributes_mask; /* Mask for the attributes */
+
+ uuid_t ia_gfid;
+ ia_type_t ia_type; /* type of file */
+ ia_prot_t ia_prot; /* protection */
+};
+
+struct old_iatt {
+ uint64_t ia_ino; /* inode number */
+ uuid_t ia_gfid;
+ uint64_t ia_dev; /* backing device ID */
+ ia_type_t ia_type; /* type of file */
+ ia_prot_t ia_prot; /* protection */
+ uint32_t ia_nlink; /* Link count */
+ uint32_t ia_uid; /* user ID of owner */
+ uint32_t ia_gid; /* group ID of owner */
+ uint64_t ia_rdev; /* device ID (if special file) */
+ uint64_t ia_size; /* file size in bytes */
+ uint32_t ia_blksize; /* blocksize for filesystem I/O */
+ uint64_t ia_blocks; /* number of 512B blocks allocated */
+ uint32_t ia_atime; /* last access time */
+ uint32_t ia_atime_nsec;
+ uint32_t ia_mtime; /* last modification time */
+ uint32_t ia_mtime_nsec;
+ uint32_t ia_ctime; /* last status change time */
+ uint32_t ia_ctime_nsec;
+};
+
+struct mdata_iatt {
+ int64_t ia_atime; /* last access time */
+ int64_t ia_mtime; /* last modification time */
+ int64_t ia_ctime; /* last status change time */
+ uint32_t ia_atime_nsec;
+ uint32_t ia_mtime_nsec;
+ uint32_t ia_ctime_nsec;
+};
+
+/* 64-bit mask for valid members in struct iatt. */
+#define IATT_TYPE 0x0000000000000001U
+#define IATT_MODE 0x0000000000000002U
+#define IATT_NLINK 0x0000000000000004U
+#define IATT_UID 0x0000000000000008U
+#define IATT_GID 0x0000000000000010U
+#define IATT_ATIME 0x0000000000000020U
+#define IATT_MTIME 0x0000000000000040U
+#define IATT_CTIME 0x0000000000000080U
+#define IATT_INO 0x0000000000000100U
+#define IATT_SIZE 0x0000000000000200U
+#define IATT_BLOCKS 0x0000000000000400U
+#define IATT_BTIME 0x0000000000000800U
+#define IATT_GFID 0x0000000000001000U
+
+/* Macros for checking validity of struct iatt members.*/
+#define IATT_TYPE_VALID(iaflags) (iaflags & IATT_TYPE)
+#define IATT_MODE_VALID(iaflags) (iaflags & IATT_MODE)
+#define IATT_NLINK_VALID(iaflags) (iaflags & IATT_NLINK)
+#define IATT_UID_VALID(iaflags) (iaflags & IATT_UID)
+#define IATT_GID_VALID(iaflags) (iaflags & IATT_GID)
+#define IATT_ATIME_VALID(iaflags) (iaflags & IATT_ATIME)
+#define IATT_MTIME_VALID(iaflags) (iaflags & IATT_MTIME)
+#define IATT_CTIME_VALID(iaflags) (iaflags & IATT_CTIME)
+#define IATT_INO_VALID(iaflags) (iaflags & IATT_INO)
+#define IATT_SIZE_VALID(iaflags) (iaflags & IATT_SIZE)
+#define IATT_BLOCKS_VALID(iaflags) (iaflags & IATT_BLOCKS)
+#define IATT_BTIME_VALID(iaflags) (iaflags & IATT_BTIME)
+#define IATT_GFID_VALID(iaflags) (iaflags & IATT_GFID)
+
+#define IA_ISREG(t) (t == IA_IFREG)
+#define IA_ISDIR(t) (t == IA_IFDIR)
+#define IA_ISLNK(t) (t == IA_IFLNK)
+#define IA_ISBLK(t) (t == IA_IFBLK)
+#define IA_ISCHR(t) (t == IA_IFCHR)
+#define IA_ISFIFO(t) (t == IA_IFIFO)
+#define IA_ISSOCK(t) (t == IA_IFSOCK)
+#define IA_ISINVAL(t) (t == IA_INVAL)
+
+#define IA_PROT_RUSR(prot) ((prot).owner.read == 1)
+#define IA_PROT_WUSR(prot) ((prot).owner.write == 1)
+#define IA_PROT_XUSR(prot) ((prot).owner.exec == 1)
+
+#define IA_PROT_RGRP(prot) ((prot).group.read == 1)
+#define IA_PROT_WGRP(prot) ((prot).group.write == 1)
+#define IA_PROT_XGRP(prot) ((prot).group.exec == 1)
+
+#define IA_PROT_ROTH(prot) ((prot).other.read == 1)
+#define IA_PROT_WOTH(prot) ((prot).other.write == 1)
+#define IA_PROT_XOTH(prot) ((prot).other.exec == 1)
+
+#define IA_PROT_SUID(prot) ((prot).suid == 1)
+#define IA_PROT_SGID(prot) ((prot).sgid == 1)
+#define IA_PROT_STCKY(prot) ((prot).sticky == 1)
+
+#define IA_FILE_OR_DIR(t) (IA_ISREG(t) || IA_ISDIR(t))
+
+static inline uint32_t
+ia_major(uint64_t ia_dev)
+{
+ return (uint32_t)(ia_dev >> 32);
+}
+
+static inline uint32_t
+ia_minor(uint64_t ia_dev)
+{
+ return (uint32_t)(ia_dev & 0xffffffff);
+}
+
+static inline uint64_t
+ia_makedev(uint32_t ia_maj, uint32_t ia_min)
+{
+ return ((((uint64_t)ia_maj) << 32) | ia_min);
+}
+
+static inline ia_prot_t
+ia_prot_from_st_mode(mode_t mode)
+{
+ ia_prot_t ia_prot = {
+ 0,
+ };
+
+ if (mode & S_ISUID)
+ ia_prot.suid = 1;
+ if (mode & S_ISGID)
+ ia_prot.sgid = 1;
+ if (mode & S_ISVTX)
+ ia_prot.sticky = 1;
+
+ if (mode & S_IRUSR)
+ ia_prot.owner.read = 1;
+ if (mode & S_IWUSR)
+ ia_prot.owner.write = 1;
+ if (mode & S_IXUSR)
+ ia_prot.owner.exec = 1;
+
+ if (mode & S_IRGRP)
+ ia_prot.group.read = 1;
+ if (mode & S_IWGRP)
+ ia_prot.group.write = 1;
+ if (mode & S_IXGRP)
+ ia_prot.group.exec = 1;
+
+ if (mode & S_IROTH)
+ ia_prot.other.read = 1;
+ if (mode & S_IWOTH)
+ ia_prot.other.write = 1;
+ if (mode & S_IXOTH)
+ ia_prot.other.exec = 1;
+
+ return ia_prot;
+}
+
+static inline ia_type_t
+ia_type_from_st_mode(mode_t mode)
+{
+ ia_type_t type = IA_INVAL;
+
+ if (S_ISREG(mode))
+ type = IA_IFREG;
+ if (S_ISDIR(mode))
+ type = IA_IFDIR;
+ if (S_ISLNK(mode))
+ type = IA_IFLNK;
+ if (S_ISBLK(mode))
+ type = IA_IFBLK;
+ if (S_ISCHR(mode))
+ type = IA_IFCHR;
+ if (S_ISFIFO(mode))
+ type = IA_IFIFO;
+ if (S_ISSOCK(mode))
+ type = IA_IFSOCK;
+
+ return type;
+}
+
+static inline uint32_t
+st_mode_prot_from_ia(ia_prot_t prot)
+{
+ uint32_t prot_bit = 0;
+
+ if (prot.suid)
+ prot_bit |= S_ISUID;
+ if (prot.sgid)
+ prot_bit |= S_ISGID;
+ if (prot.sticky)
+ prot_bit |= S_ISVTX;
+
+ if (prot.owner.read)
+ prot_bit |= S_IRUSR;
+ if (prot.owner.write)
+ prot_bit |= S_IWUSR;
+ if (prot.owner.exec)
+ prot_bit |= S_IXUSR;
+
+ if (prot.group.read)
+ prot_bit |= S_IRGRP;
+ if (prot.group.write)
+ prot_bit |= S_IWGRP;
+ if (prot.group.exec)
+ prot_bit |= S_IXGRP;
+
+ if (prot.other.read)
+ prot_bit |= S_IROTH;
+ if (prot.other.write)
+ prot_bit |= S_IWOTH;
+ if (prot.other.exec)
+ prot_bit |= S_IXOTH;
+
+ return prot_bit;
+}
+
+static inline uint32_t
+st_mode_type_from_ia(ia_type_t type)
+{
+ uint32_t type_bit = 0;
+
+ switch (type) {
+ case IA_IFREG:
+ type_bit = S_IFREG;
+ break;
+ case IA_IFDIR:
+ type_bit = S_IFDIR;
+ break;
+ case IA_IFLNK:
+ type_bit = S_IFLNK;
+ break;
+ case IA_IFBLK:
+ type_bit = S_IFBLK;
+ break;
+ case IA_IFCHR:
+ type_bit = S_IFCHR;
+ break;
+ case IA_IFIFO:
+ type_bit = S_IFIFO;
+ break;
+ case IA_IFSOCK:
+ type_bit = S_IFSOCK;
+ break;
+ case IA_INVAL:
+ break;
+ }
+
+ return type_bit;
+}
+
+static inline mode_t
+st_mode_from_ia(ia_prot_t prot, ia_type_t type)
+{
+ mode_t st_mode = 0;
+ uint32_t type_bit = 0;
+ uint32_t prot_bit = 0;
+
+ type_bit = st_mode_type_from_ia(type);
+ prot_bit = st_mode_prot_from_ia(prot);
+
+ st_mode = (type_bit | prot_bit);
+
+ return st_mode;
+}
+
+static inline void
+iatt_to_mdata(struct mdata_iatt *mdata, struct iatt *iatt)
+{
+ mdata->ia_atime = iatt->ia_atime;
+ mdata->ia_atime_nsec = iatt->ia_atime_nsec;
+ mdata->ia_mtime = iatt->ia_mtime;
+ mdata->ia_mtime_nsec = iatt->ia_mtime_nsec;
+ mdata->ia_ctime = iatt->ia_ctime;
+ mdata->ia_ctime_nsec = iatt->ia_ctime_nsec;
+}
+
+static inline int
+iatt_from_stat(struct iatt *iatt, struct stat *stat)
+{
+ iatt->ia_dev = stat->st_dev;
+ iatt->ia_ino = stat->st_ino;
+
+ iatt->ia_type = ia_type_from_st_mode(stat->st_mode);
+ iatt->ia_prot = ia_prot_from_st_mode(stat->st_mode);
+
+ iatt->ia_nlink = stat->st_nlink;
+ iatt->ia_uid = stat->st_uid;
+ iatt->ia_gid = stat->st_gid;
+
+ iatt->ia_rdev = ia_makedev(major(stat->st_rdev), minor(stat->st_rdev));
+
+ iatt->ia_size = stat->st_size;
+ iatt->ia_blksize = stat->st_blksize;
+ iatt->ia_blocks = stat->st_blocks;
+
+ /* There is a possibility that the backend FS (like XFS) can
+ allocate blocks beyond EOF for better performance reasons, which
+ results in 'st_blocks' with higher values than what is consumed by
+ the file descriptor. This would break few logic inside GlusterFS,
+ like quota behavior etc, thus we need the exact number of blocks
+ which are consumed by the file to the higher layers inside GlusterFS.
+ Currently, this logic won't work for sparse files (ie, file with
+ holes)
+ */
+ {
+ uint64_t maxblocks;
+
+ maxblocks = (iatt->ia_size + 511) / 512;
+
+ if (iatt->ia_blocks > maxblocks)
+ iatt->ia_blocks = maxblocks;
+ }
+
+ iatt->ia_atime = stat->st_atime;
+ iatt->ia_atime_nsec = ST_ATIM_NSEC(stat);
+
+ iatt->ia_mtime = stat->st_mtime;
+ iatt->ia_mtime_nsec = ST_MTIM_NSEC(stat);
+
+ iatt->ia_ctime = stat->st_ctime;
+ iatt->ia_ctime_nsec = ST_CTIM_NSEC(stat);
+
+ /* Setting IATT_INO in ia_flags is done in posix_fill_ino_from_gfid. */
+ iatt->ia_flags = iatt->ia_flags | IATT_TYPE | IATT_MODE | IATT_NLINK |
+ IATT_UID | IATT_GID | IATT_SIZE | IATT_BLOCKS |
+ IATT_ATIME | IATT_MTIME | IATT_CTIME;
+
+ return 0;
+}
+
+static inline int
+iatt_to_stat(struct iatt *iatt, struct stat *stat)
+{
+ stat->st_dev = iatt->ia_dev;
+ stat->st_ino = iatt->ia_ino;
+
+ stat->st_mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
+
+ stat->st_nlink = iatt->ia_nlink;
+ stat->st_uid = iatt->ia_uid;
+ stat->st_gid = iatt->ia_gid;
+
+ stat->st_rdev = makedev(ia_major(iatt->ia_rdev), ia_minor(iatt->ia_rdev));
+
+ stat->st_size = iatt->ia_size;
+ stat->st_blksize = iatt->ia_blksize;
+ stat->st_blocks = iatt->ia_blocks;
+
+ stat->st_atime = iatt->ia_atime;
+ ST_ATIM_NSEC_SET(stat, iatt->ia_atime_nsec);
+
+ stat->st_mtime = iatt->ia_mtime;
+ ST_MTIM_NSEC_SET(stat, iatt->ia_mtime_nsec);
+
+ stat->st_ctime = iatt->ia_ctime;
+ ST_CTIM_NSEC_SET(stat, iatt->ia_ctime_nsec);
+
+ return 0;
+}
+
+static inline void
+oldiatt_from_iatt(struct old_iatt *o_iatt, struct iatt *c_iatt)
+{
+ o_iatt->ia_dev = c_iatt->ia_dev;
+ o_iatt->ia_ino = c_iatt->ia_ino;
+ o_iatt->ia_type = c_iatt->ia_type;
+ o_iatt->ia_prot = c_iatt->ia_prot;
+ o_iatt->ia_nlink = c_iatt->ia_nlink;
+ o_iatt->ia_uid = c_iatt->ia_uid;
+ o_iatt->ia_gid = c_iatt->ia_gid;
+ o_iatt->ia_rdev = c_iatt->ia_rdev;
+ o_iatt->ia_size = c_iatt->ia_size;
+ o_iatt->ia_blksize = c_iatt->ia_blksize;
+ o_iatt->ia_blocks = c_iatt->ia_blocks;
+ o_iatt->ia_atime = c_iatt->ia_atime;
+ o_iatt->ia_atime_nsec = c_iatt->ia_atime_nsec;
+ o_iatt->ia_mtime = c_iatt->ia_mtime;
+ o_iatt->ia_mtime_nsec = c_iatt->ia_mtime_nsec;
+ o_iatt->ia_ctime = c_iatt->ia_ctime;
+ o_iatt->ia_ctime_nsec = c_iatt->ia_ctime_nsec;
+
+ gf_uuid_copy(o_iatt->ia_gfid, c_iatt->ia_gfid);
+
+ return;
+}
+
+static inline void
+iatt_from_oldiatt(struct iatt *c_iatt, struct old_iatt *o_iatt)
+{
+ c_iatt->ia_dev = o_iatt->ia_dev;
+ c_iatt->ia_ino = o_iatt->ia_ino;
+ c_iatt->ia_type = o_iatt->ia_type;
+ c_iatt->ia_prot = o_iatt->ia_prot;
+ c_iatt->ia_nlink = o_iatt->ia_nlink;
+ c_iatt->ia_uid = o_iatt->ia_uid;
+ c_iatt->ia_gid = o_iatt->ia_gid;
+ c_iatt->ia_rdev = o_iatt->ia_rdev;
+ c_iatt->ia_size = o_iatt->ia_size;
+ c_iatt->ia_blksize = o_iatt->ia_blksize;
+ c_iatt->ia_blocks = o_iatt->ia_blocks;
+ c_iatt->ia_atime = o_iatt->ia_atime;
+ c_iatt->ia_atime_nsec = o_iatt->ia_atime_nsec;
+ c_iatt->ia_mtime = o_iatt->ia_mtime;
+ c_iatt->ia_mtime_nsec = o_iatt->ia_mtime_nsec;
+ c_iatt->ia_ctime = o_iatt->ia_ctime;
+ c_iatt->ia_ctime_nsec = o_iatt->ia_ctime_nsec;
+
+ gf_uuid_copy(c_iatt->ia_gfid, o_iatt->ia_gfid);
+
+ c_iatt->ia_attributes = 0;
+
+ c_iatt->ia_flags = IATT_TYPE | IATT_MODE | IATT_NLINK | IATT_INO |
+ IATT_UID | IATT_GID | IATT_SIZE | IATT_BLOCKS |
+ IATT_ATIME | IATT_MTIME | IATT_CTIME | IATT_GFID;
+
+ return;
+}
+
+static inline int
+is_same_mode(ia_prot_t prot1, ia_prot_t prot2)
+{
+ int ret = 0;
+
+ if (st_mode_prot_from_ia(prot1) != st_mode_prot_from_ia(prot2))
+ ret = -1;
+
+ return ret;
+}
+
+#endif /* _IATT_H */
diff --git a/libglusterfs/src/glusterfs/inode.h b/libglusterfs/src/glusterfs/inode.h
new file mode 100644
index 00000000000..4b28da510c7
--- /dev/null
+++ b/libglusterfs/src/glusterfs/inode.h
@@ -0,0 +1,306 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _INODE_H
+#define _INODE_H
+
+#include <stdint.h>
+#include <sys/types.h>
+
+#define LOOKUP_NEEDED 1
+#define LOOKUP_NOT_NEEDED 2
+
+#define DEFAULT_INODE_MEMPOOL_ENTRIES 32 * 1024
+#define INODE_PATH_FMT "<gfid:%s>"
+struct _inode_table;
+typedef struct _inode_table inode_table_t;
+
+struct _inode;
+typedef struct _inode inode_t;
+
+struct _dentry;
+typedef struct _dentry dentry_t;
+
+#include "glusterfs/list.h"
+#include "glusterfs/iatt.h"
+#include "glusterfs/compat-uuid.h"
+#include "glusterfs/fd.h"
+
+struct _inode_table {
+ pthread_mutex_t lock;
+ size_t hashsize; /* bucket size of inode hash and dentry hash */
+ char *name; /* name of the inode table, just for gf_log() */
+ inode_t *root; /* root directory inode, with number 1 */
+ xlator_t *xl; /* xlator to be called to do purge */
+ uint32_t lru_limit; /* maximum LRU cache size */
+ struct list_head *inode_hash; /* buckets for inode hash table */
+ struct list_head *name_hash; /* buckets for dentry hash table */
+ struct list_head active; /* list of inodes currently active (in an fop) */
+ uint32_t active_size; /* count of inodes in active list */
+ struct list_head lru; /* list of inodes recently used.
+ lru.next most recent */
+ uint32_t lru_size; /* count of inodes in lru list */
+ struct list_head purge; /* list of inodes to be purged soon */
+ uint32_t purge_size; /* count of inodes in purge list */
+
+ struct mem_pool *inode_pool; /* memory pool for inodes */
+ struct mem_pool *dentry_pool; /* memory pool for dentrys */
+ struct mem_pool *fd_mem_pool; /* memory pool for fd_t */
+ int ctxcount; /* number of slots in inode->ctx */
+
+ /* This is required for 'invalidation' when 'nlookup' would be used,
+ specially in case of fuse-bridge */
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *);
+ xlator_t *invalidator_xl;
+ struct list_head invalidate; /* inodes which are in invalidation queue */
+ uint32_t invalidate_size; /* count of inodes in invalidation list */
+
+ /* flag to indicate whether the cleanup of the inode
+ table started or not */
+ gf_boolean_t cleanup_started;
+};
+
+struct _dentry {
+ struct list_head inode_list; /* list of dentries of inode */
+ struct list_head hash; /* hash table pointers */
+ inode_t *inode; /* inode of this directory entry */
+ char *name; /* name of the directory entry */
+ inode_t *parent; /* directory of the entry */
+};
+
+struct _inode_ctx {
+ union {
+ uint64_t key;
+ xlator_t *xl_key;
+ };
+ /* if value1 is 0, then field is not set.. */
+ union {
+ uint64_t value1;
+ void *ptr1;
+ };
+ /* if value2 is 0, then field is not set.. */
+ union {
+ uint64_t value2;
+ void *ptr2;
+ };
+ int ref; /* This is for debugging inode ref leaks,
+ basically helps in identifying the xlator
+ causing th ref leak, it is printed in
+ statedump */
+};
+
+struct _inode {
+ inode_table_t *table; /* the table this inode belongs to */
+ uuid_t gfid;
+ gf_lock_t lock;
+ gf_atomic_t nlookup;
+ uint32_t fd_count; /* Open fd count */
+ uint32_t active_fd_count; /* Active open fd count */
+ uint32_t ref; /* reference count on this inode */
+ ia_type_t ia_type; /* what kind of file */
+ struct list_head fd_list; /* list of open files on this inode */
+ struct list_head dentry_list; /* list of directory entries for this inode */
+ struct list_head hash; /* hash table pointers */
+ struct list_head list; /* active/lru/purge */
+
+ struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
+ bool in_invalidate_list; /* Set if inode is in table invalidate list */
+ bool invalidate_sent; /* Set it if invalidator_fn is called for inode */
+};
+
+#define UUID0_STR "00000000-0000-0000-0000-000000000000"
+#define GFID_STR_PFX "<gfid:" UUID0_STR ">"
+#define GFID_STR_PFX_LEN (sizeof(GFID_STR_PFX) - 1)
+
+inode_table_t *
+inode_table_new(uint32_t lru_limit, xlator_t *xl);
+
+inode_table_t *
+inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *),
+ xlator_t *invalidator_xl);
+
+void
+inode_table_destroy_all(glusterfs_ctx_t *ctx);
+
+void
+inode_table_destroy(inode_table_t *inode_table);
+
+inode_t *
+inode_new(inode_table_t *table);
+
+inode_t *
+inode_link(inode_t *inode, inode_t *parent, const char *name,
+ struct iatt *stbuf);
+
+void
+inode_unlink(inode_t *inode, inode_t *parent, const char *name);
+
+inode_t *
+inode_parent(inode_t *inode, uuid_t pargfid, const char *name);
+
+inode_t *
+inode_ref(inode_t *inode);
+
+inode_t *
+inode_unref(inode_t *inode);
+
+int
+inode_lookup(inode_t *inode);
+
+int
+inode_forget(inode_t *inode, uint64_t nlookup);
+int
+inode_forget_with_unref(inode_t *inode, uint64_t nlookup);
+
+int
+inode_ref_reduce_by_n(inode_t *inode, uint64_t nref);
+
+int
+inode_invalidate(inode_t *inode);
+
+int
+inode_rename(inode_table_t *table, inode_t *olddir, const char *oldname,
+ inode_t *newdir, const char *newname, inode_t *inode,
+ struct iatt *stbuf);
+
+inode_t *
+inode_grep(inode_table_t *table, inode_t *parent, const char *name);
+
+int
+inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name,
+ uuid_t gfid, ia_type_t *type);
+
+inode_t *
+inode_find(inode_table_t *table, uuid_t gfid);
+
+int
+inode_path(inode_t *inode, const char *name, char **bufp);
+
+int
+__inode_path(inode_t *inode, const char *name, char **bufp);
+
+inode_t *
+inode_from_path(inode_table_t *table, const char *path);
+
+inode_t *
+inode_resolve(inode_table_t *table, char *path);
+
+/* deal with inode ctx's both values */
+
+int
+inode_ctx_set2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
+int
+__inode_ctx_set2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
+
+int
+inode_ctx_get2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
+int
+__inode_ctx_get2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
+
+int
+inode_ctx_del2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
+
+int
+inode_ctx_reset2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2);
+
+/* deal with inode ctx's 1st value */
+
+int
+inode_ctx_set0(inode_t *inode, xlator_t *xlator, uint64_t *value1);
+
+int
+__inode_ctx_set0(inode_t *inode, xlator_t *xlator, uint64_t *value1);
+
+int
+inode_ctx_get0(inode_t *inode, xlator_t *xlator, uint64_t *value1);
+int
+__inode_ctx_get0(inode_t *inode, xlator_t *xlator, uint64_t *value1);
+
+int
+inode_ctx_reset0(inode_t *inode, xlator_t *xlator, uint64_t *value1);
+
+/* deal with inode ctx's 2st value */
+
+int
+inode_ctx_set1(inode_t *inode, xlator_t *xlator, uint64_t *value2);
+
+int
+__inode_ctx_set1(inode_t *inode, xlator_t *xlator, uint64_t *value2);
+
+int
+inode_ctx_get1(inode_t *inode, xlator_t *xlator, uint64_t *value2);
+int
+__inode_ctx_get1(inode_t *inode, xlator_t *xlator, uint64_t *value2);
+
+int
+inode_ctx_reset1(inode_t *inode, xlator_t *xlator, uint64_t *value2);
+
+static inline int
+__inode_ctx_put(inode_t *inode, xlator_t *this, uint64_t v)
+{
+ return __inode_ctx_set0(inode, this, &v);
+}
+
+static inline int
+inode_ctx_put(inode_t *inode, xlator_t *this, uint64_t v)
+{
+ return inode_ctx_set0(inode, this, &v);
+}
+
+#define __inode_ctx_set(i, x, v_p) __inode_ctx_set0(i, x, v_p)
+
+#define inode_ctx_set(i, x, v_p) inode_ctx_set0(i, x, v_p)
+
+#define inode_ctx_reset(i, x, v) inode_ctx_reset0(i, x, v)
+
+#define __inode_ctx_get(i, x, v) __inode_ctx_get0(i, x, v)
+
+#define inode_ctx_get(i, x, v) inode_ctx_get0(i, x, v)
+
+#define inode_ctx_del(i, x, v) inode_ctx_del2(i, x, v, 0)
+#define inode_ctx_del1(i, x, v) inode_ctx_del2(i, x, 0, v)
+
+gf_boolean_t
+__is_root_gfid(uuid_t gfid);
+
+void
+__inode_table_set_lru_limit(inode_table_t *table, uint32_t lru_limit);
+
+void
+inode_table_set_lru_limit(inode_table_t *table, uint32_t lru_limit);
+
+void
+inode_ctx_merge(fd_t *fd, inode_t *inode, inode_t *linked_inode);
+
+int
+inode_is_linked(inode_t *inode);
+
+void
+inode_set_need_lookup(inode_t *inode, xlator_t *this);
+
+gf_boolean_t
+inode_needs_lookup(inode_t *inode, xlator_t *this);
+
+int
+inode_has_dentry(inode_t *inode);
+
+size_t
+inode_ctx_size(inode_t *inode);
+
+void
+inode_find_directory_name(inode_t *inode, const char **name);
+#endif /* _INODE_H */
diff --git a/libglusterfs/src/glusterfs/iobuf.h b/libglusterfs/src/glusterfs/iobuf.h
new file mode 100644
index 00000000000..4bd443efd5e
--- /dev/null
+++ b/libglusterfs/src/glusterfs/iobuf.h
@@ -0,0 +1,194 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _IOBUF_H_
+#define _IOBUF_H_
+
+#include <stddef.h> // for size_t
+#include <sys/mman.h>
+#include "glusterfs/atomic.h" // for gf_atomic_t
+#include <sys/uio.h> // for struct iovec
+#include "glusterfs/locking.h" // for gf_lock_t
+#include "glusterfs/list.h"
+
+#define GF_VARIABLE_IOBUF_COUNT 32
+
+#define GF_RDMA_DEVICE_COUNT 8
+
+/* Lets try to define the new anonymous mapping
+ * flag, in case the system is still using the
+ * now deprecated MAP_ANON flag.
+ *
+ * Also, this should ideally be in a centralized/common
+ * header which can be used by other source files also.
+ */
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+#define GF_ALIGN_BUF(ptr, bound) \
+ ((void *)((unsigned long)(ptr + bound - 1) & (unsigned long)(~(bound - 1))))
+
+#define GF_IOBUF_ALIGN_SIZE 512
+
+/* one allocatable unit for the consumers of the IOBUF API */
+/* each unit hosts @page_size bytes of memory */
+struct iobuf;
+
+/* one region of memory mapped from the operating system */
+/* each region MMAPs @arena_size bytes of memory */
+/* each arena hosts @arena_size / @page_size IOBUFs */
+struct iobuf_arena;
+
+/* expandable and contractable pool of memory, internally broken into arenas */
+struct iobuf_pool;
+
+struct iobuf_init_config {
+ size_t pagesize;
+ int32_t num_pages;
+};
+
+struct iobuf {
+ union {
+ struct list_head list;
+ struct {
+ struct iobuf *next;
+ struct iobuf *prev;
+ };
+ };
+ struct iobuf_arena *iobuf_arena;
+
+ gf_lock_t lock; /* for ->ptr and ->ref */
+ gf_atomic_t ref; /* 0 == passive, >0 == active */
+
+ void *ptr; /* usable memory region by the consumer */
+
+ void *free_ptr; /* in case of stdalloc, this is the
+ one to be freed */
+};
+
+struct iobuf_arena {
+ union {
+ struct list_head list;
+ struct {
+ struct iobuf_arena *next;
+ struct iobuf_arena *prev;
+ };
+ };
+
+ struct list_head all_list;
+ size_t page_size; /* size of all iobufs in this arena */
+ size_t arena_size;
+ /* this is equal to rounded_size * num_iobufs.
+ (rounded_size comes with gf_iobuf_get_pagesize().) */
+ size_t page_count;
+
+ struct iobuf_pool *iobuf_pool;
+
+ void *mem_base;
+ struct iobuf *iobufs; /* allocated iobufs list */
+
+ struct iobuf active; /* head node iobuf
+ (unused by itself) */
+ struct iobuf passive; /* head node iobuf
+ (unused by itself) */
+ uint64_t alloc_cnt; /* total allocs in this pool */
+ int active_cnt;
+ int passive_cnt;
+ int max_active; /* max active buffers at a given time */
+};
+
+struct iobuf_pool {
+ pthread_mutex_t mutex;
+ size_t arena_size; /* size of memory region in
+ arena */
+ size_t default_page_size; /* default size of iobuf */
+
+ struct list_head all_arenas;
+ struct list_head arenas[GF_VARIABLE_IOBUF_COUNT];
+ /* array of arenas. Each element of the array is a list of arenas
+ holding iobufs of particular page_size */
+
+ struct list_head filled[GF_VARIABLE_IOBUF_COUNT];
+ /* array of arenas without free iobufs */
+
+ struct list_head purge[GF_VARIABLE_IOBUF_COUNT];
+ /* array of of arenas which can be purged */
+
+ uint64_t request_misses; /* mostly the requests for higher
+ value of iobufs */
+ int arena_cnt;
+ int rdma_device_count;
+ struct list_head *mr_list[GF_RDMA_DEVICE_COUNT];
+ void *device[GF_RDMA_DEVICE_COUNT];
+ int (*rdma_registration)(void **, void *);
+ int (*rdma_deregistration)(struct list_head **, struct iobuf_arena *);
+};
+
+struct iobuf_pool *
+iobuf_pool_new(void);
+void
+iobuf_pool_destroy(struct iobuf_pool *iobuf_pool);
+struct iobuf *
+iobuf_get(struct iobuf_pool *iobuf_pool);
+void
+iobuf_unref(struct iobuf *iobuf);
+struct iobuf *
+iobuf_ref(struct iobuf *iobuf);
+void
+iobuf_pool_destroy(struct iobuf_pool *iobuf_pool);
+void
+iobuf_to_iovec(struct iobuf *iob, struct iovec *iov);
+
+#define iobuf_ptr(iob) ((iob)->ptr)
+#define iobpool_default_pagesize(iobpool) ((iobpool)->default_page_size)
+#define iobuf_pagesize(iob) (iob->iobuf_arena->page_size)
+
+struct iobref {
+ gf_lock_t lock;
+ gf_atomic_t ref;
+ struct iobuf **iobrefs;
+ int allocated;
+ int used;
+};
+
+struct iobref *
+iobref_new(void);
+struct iobref *
+iobref_ref(struct iobref *iobref);
+void
+iobref_unref(struct iobref *iobref);
+int
+iobref_add(struct iobref *iobref, struct iobuf *iobuf);
+int
+iobref_merge(struct iobref *to, struct iobref *from);
+void
+iobref_clear(struct iobref *iobref);
+
+size_t
+iobuf_size(struct iobuf *iobuf);
+size_t
+iobref_size(struct iobref *iobref);
+void
+iobuf_stats_dump(struct iobuf_pool *iobuf_pool);
+
+struct iobuf *
+iobuf_get2(struct iobuf_pool *iobuf_pool, size_t page_size);
+
+struct iobuf *
+iobuf_get_page_aligned(struct iobuf_pool *iobuf_pool, size_t page_size,
+ size_t align_size);
+
+int
+iobuf_copy(struct iobuf_pool *iobuf_pool, const struct iovec *iovec_src,
+ int iovcnt, struct iobref **iobref, struct iobuf **iobuf,
+ struct iovec *iov_dst);
+
+#endif /* !_IOBUF_H_ */
diff --git a/libglusterfs/src/glusterfs/latency.h b/libglusterfs/src/glusterfs/latency.h
new file mode 100644
index 00000000000..4d601bbcbd6
--- /dev/null
+++ b/libglusterfs/src/glusterfs/latency.h
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __LATENCY_H__
+#define __LATENCY_H__
+
+#include <inttypes.h>
+#include <time.h>
+
+typedef struct _gf_latency {
+ uint64_t min; /* min time for the call (nanoseconds) */
+ uint64_t max; /* max time for the call (nanoseconds) */
+ uint64_t total; /* total time (nanoseconds) */
+ uint64_t count;
+} gf_latency_t;
+
+gf_latency_t *
+gf_latency_new(size_t n);
+
+void
+gf_latency_reset(gf_latency_t *lat);
+
+void
+gf_latency_update(gf_latency_t *lat, struct timespec *begin,
+ struct timespec *end);
+#endif /* __LATENCY_H__ */
diff --git a/libglusterfs/src/glusterfs/libglusterfs-messages.h b/libglusterfs/src/glusterfs/libglusterfs-messages.h
new file mode 100644
index 00000000000..cb31dd7614b
--- /dev/null
+++ b/libglusterfs/src/glusterfs/libglusterfs-messages.h
@@ -0,0 +1,245 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _LG_MESSAGES_H_
+#define _LG_MESSAGES_H_
+
+#include "glusterfs/glfs-message-id.h"
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(
+ LIBGLUSTERFS, LG_MSG_ASPRINTF_FAILED, LG_MSG_INVALID_ENTRY,
+ LG_MSG_COUNT_LESS_THAN_ZERO, LG_MSG_COUNT_LESS_THAN_DATA_PAIRS,
+ LG_MSG_VALUE_LENGTH_LESS_THAN_ZERO, LG_MSG_PAIRS_LESS_THAN_COUNT,
+ LG_MSG_KEY_OR_VALUE_NULL, LG_MSG_FAILED_TO_LOG_DICT,
+ LG_MSG_NULL_VALUE_IN_DICT, LG_MSG_DIR_OP_FAILED,
+ LG_MSG_STORE_HANDLE_CREATE_FAILED, LG_MSG_FILE_OP_FAILED,
+ LG_MSG_FILE_STAT_FAILED, LG_MSG_LOCK_FAILED, LG_MSG_UNLOCK_FAILED,
+ LG_MSG_DICT_SERIAL_FAILED, LG_MSG_DICT_UNSERIAL_FAILED, LG_MSG_NO_MEMORY,
+ LG_MSG_VOLUME_ERROR, LG_MSG_SUB_VOLUME_ERROR, LG_MSG_SYNTAX_ERROR,
+ LG_MSG_BACKTICK_PARSE_FAILED, LG_MSG_BUFFER_ERROR, LG_MSG_STRDUP_ERROR,
+ LG_MSG_HASH_FUNC_ERROR, LG_MSG_GET_BUCKET_FAILED, LG_MSG_INSERT_FAILED,
+ LG_MSG_OUT_OF_RANGE, LG_MSG_VALIDATE_RETURNS, LG_MSG_VALIDATE_REC_FAILED,
+ LG_MSG_RB_TABLE_CREATE_FAILED, LG_MSG_PATH_NOT_FOUND,
+ LG_MSG_EXPAND_FD_TABLE_FAILED, LG_MSG_MAPPING_FAILED,
+ LG_MSG_INIT_IOBUF_FAILED, LG_MSG_PAGE_SIZE_EXCEEDED, LG_MSG_ARENA_NOT_FOUND,
+ LG_MSG_IOBUF_NOT_FOUND, LG_MSG_POOL_NOT_FOUND, LG_MSG_SET_ATTRIBUTE_FAILED,
+ LG_MSG_READ_ATTRIBUTE_FAILED, LG_MSG_UNMOUNT_FAILED,
+ LG_MSG_LATENCY_MEASUREMENT_STATE, LG_MSG_NO_PERM, LG_MSG_NO_KEY,
+ LG_MSG_DICT_NULL, LG_MSG_INIT_TIMER_FAILED, LG_MSG_FD_ANONYMOUS_FAILED,
+ LG_MSG_FD_CREATE_FAILED, LG_MSG_BUFFER_FULL, LG_MSG_FWRITE_FAILED,
+ LG_MSG_PRINT_FAILED, LG_MSG_MEM_POOL_DESTROY,
+ LG_MSG_EXPAND_CLIENT_TABLE_FAILED, LG_MSG_DISCONNECT_CLIENT,
+ LG_MSG_PIPE_CREATE_FAILED, LG_MSG_SET_PIPE_FAILED,
+ LG_MSG_REGISTER_PIPE_FAILED, LG_MSG_POLL_IGNORE_MULTIPLE_THREADS,
+ LG_MSG_INDEX_NOT_FOUND, LG_MSG_EPOLL_FD_CREATE_FAILED,
+ LG_MSG_SLOT_NOT_FOUND, LG_MSG_STALE_FD_FOUND, LG_MSG_GENERATION_MISMATCH,
+ LG_MSG_PTHREAD_KEY_CREATE_FAILED, LG_MSG_TRANSLATOR_INIT_FAILED,
+ LG_MSG_UUID_BUF_INIT_FAILED, LG_MSG_LKOWNER_BUF_INIT_FAILED,
+ LG_MSG_SYNCTASK_INIT_FAILED, LG_MSG_SYNCOPCTX_INIT_FAILED,
+ LG_MSG_GLOBAL_INIT_FAILED, LG_MSG_PTHREAD_FAILED, LG_MSG_DIR_IS_SYMLINK,
+ LG_MSG_RESOLVE_HOSTNAME_FAILED, LG_MSG_GETADDRINFO_FAILED,
+ LG_MSG_GETNAMEINFO_FAILED, LG_MSG_PATH_ERROR, LG_MSG_INET_PTON_FAILED,
+ LG_MSG_NEGATIVE_NUM_PASSED, LG_MSG_GETHOSTNAME_FAILED,
+ LG_MSG_RESERVED_PORTS_ERROR, LG_MSG_INVALID_PORT, LG_MSG_INVALID_FAMILY,
+ LG_MSG_CONVERSION_FAILED, LG_MSG_SKIP_HEADER_FAILED, LG_MSG_INVALID_LOG,
+ LG_MSG_UTIMES_FAILED, LG_MSG_BACKTRACE_SAVE_FAILED, LG_MSG_INIT_FAILED,
+ LG_MSG_VALIDATION_FAILED, LG_MSG_GRAPH_ERROR, LG_MSG_UNKNOWN_OPTIONS_FAILED,
+ LG_MSG_CTX_NULL, LG_MSG_TMPFILE_CREATE_FAILED, LG_MSG_DLOPEN_FAILED,
+ LG_MSG_LOAD_FAILED, LG_MSG_DLSYM_ERROR, LG_MSG_TREE_NOT_FOUND,
+ LG_MSG_PER_DENTRY, LG_MSG_DENTRY, LG_MSG_GETIFADDRS_FAILED,
+ LG_MSG_REGEX_OP_FAILED, LG_MSG_FRAME_ERROR, LG_MSG_SET_PARAM_FAILED,
+ LG_MSG_GET_PARAM_FAILED, LG_MSG_PREPARE_FAILED, LG_MSG_EXEC_FAILED,
+ LG_MSG_BINDING_FAILED, LG_MSG_DELETE_FAILED, LG_MSG_GET_ID_FAILED,
+ LG_MSG_CREATE_FAILED, LG_MSG_PARSE_FAILED, LG_MSG_GETCONTEXT_FAILED,
+ LG_MSG_UPDATE_FAILED, LG_MSG_QUERY_CALL_BACK_FAILED,
+ LG_MSG_GET_RECORD_FAILED, LG_MSG_DB_ERROR, LG_MSG_CONNECTION_ERROR,
+ LG_MSG_NOT_MULTITHREAD_MODE, LG_MSG_SKIP_PATH, LG_MSG_INVALID_FOP,
+ LG_MSG_QUERY_FAILED, LG_MSG_CLEAR_COUNTER_FAILED, LG_MSG_LOCK_LIST_FAILED,
+ LG_MSG_UNLOCK_LIST_FAILED, LG_MSG_ADD_TO_LIST_FAILED, LG_MSG_INIT_DB_FAILED,
+ LG_MSG_DELETE_FROM_LIST_FAILED, LG_MSG_CLOSE_CONNECTION_FAILED,
+ LG_MSG_INSERT_OR_UPDATE_FAILED, LG_MSG_FIND_OP_FAILED,
+ LG_MSG_CONNECTION_INIT_FAILED, LG_MSG_COMPLETED_TASK, LG_MSG_WAKE_UP_ZOMBIE,
+ LG_MSG_REWAITING_TASK, LG_MSG_SLEEP_ZOMBIE, LG_MSG_SWAPCONTEXT_FAILED,
+ LG_MSG_UNSUPPORTED_PLUGIN, LG_MSG_INVALID_DB_TYPE, LG_MSG_UNDERSIZED_BUF,
+ LG_MSG_DATA_CONVERSION_ERROR, LG_MSG_DICT_ERROR, LG_MSG_IOBUFS_NOT_FOUND,
+ LG_MSG_ENTRIES_NULL, LG_MSG_FD_NOT_FOUND_IN_FDTABLE,
+ LG_MSG_REALLOC_FOR_FD_PTR_FAILED, LG_MSG_DICT_SET_FAILED, LG_MSG_NULL_PTR,
+ LG_MSG_RBTHASH_INIT_BUCKET_FAILED, LG_MSG_ASSERTION_FAILED,
+ LG_MSG_HOSTNAME_NULL, LG_MSG_INVALID_IPV4_FORMAT,
+ LG_MSG_CTX_CLEANUP_STARTED, LG_MSG_TIMER_REGISTER_ERROR,
+ LG_MSG_PTR_HEADER_CORRUPTED, LG_MSG_INVALID_UPLINK, LG_MSG_CLIENT_NULL,
+ LG_MSG_XLATOR_DOES_NOT_IMPLEMENT, LG_MSG_DENTRY_NOT_FOUND,
+ LG_MSG_INODE_NOT_FOUND, LG_MSG_INODE_TABLE_NOT_FOUND,
+ LG_MSG_DENTRY_CREATE_FAILED, LG_MSG_INODE_CONTEXT_FREED,
+ LG_MSG_UNKNOWN_LOCK_TYPE, LG_MSG_UNLOCK_BEFORE_LOCK,
+ LG_MSG_LOCK_OWNER_ERROR, LG_MSG_MEMPOOL_PTR_NULL,
+ LG_MSG_QUOTA_XATTRS_MISSING, LG_MSG_INVALID_STRING, LG_MSG_BIND_REF,
+ LG_MSG_REF_COUNT, LG_MSG_INVALID_ARG, LG_MSG_VOL_OPTION_ADD,
+ LG_MSG_XLATOR_OPTION_INVALID, LG_MSG_GETTIMEOFDAY_FAILED,
+ LG_MSG_GRAPH_INIT_FAILED, LG_MSG_EVENT_NOTIFY_FAILED,
+ LG_MSG_ACTIVE_GRAPH_NULL, LG_MSG_VOLFILE_PARSE_ERROR, LG_MSG_FD_INODE_NULL,
+ LG_MSG_INVALID_VOLFILE_ENTRY, LG_MSG_PER_DENTRY_FAILED,
+ LG_MSG_PARENT_DENTRY_NOT_FOUND, LG_MSG_DENTRY_CYCLIC_LOOP,
+ LG_MSG_INVALID_POLL_IN, LG_MSG_INVALID_POLL_OUT, LG_MSG_EPOLL_FD_ADD_FAILED,
+ LG_MSG_EPOLL_FD_DEL_FAILED, LG_MSG_EPOLL_FD_MODIFY_FAILED,
+ LG_MSG_STARTED_EPOLL_THREAD, LG_MSG_EXITED_EPOLL_THREAD,
+ LG_MSG_START_EPOLL_THREAD_FAILED, LG_MSG_FALLBACK_TO_POLL,
+ LG_MSG_QUOTA_CONF_ERROR, LG_MSG_RBTHASH_GET_ENTRY_FAILED,
+ LG_MSG_RBTHASH_GET_BUCKET_FAILED, LG_MSG_RBTHASH_INSERT_FAILED,
+ LG_MSG_RBTHASH_INIT_ENTRY_FAILED, LG_MSG_TMPFILE_DELETE_FAILED,
+ LG_MSG_MEMPOOL_INVALID_FREE, LG_MSG_LOCK_FAILURE, LG_MSG_SET_LOG_LEVEL,
+ LG_MSG_VERIFY_PLATFORM, LG_MSG_RUNNER_LOG, LG_MSG_LEASEID_BUF_INIT_FAILED,
+ LG_MSG_PTHREAD_ATTR_INIT_FAILED, LG_MSG_INVALID_INODE_LIST,
+ LG_MSG_COMPACT_FAILED, LG_MSG_COMPACT_STATUS, LG_MSG_UTIMENSAT_FAILED,
+ LG_MSG_PTHREAD_NAMING_FAILED, LG_MSG_SYSCALL_RETURNS_WRONG,
+ LG_MSG_XXH64_TO_GFID_FAILED, LG_MSG_ASYNC_WARNING, LG_MSG_ASYNC_FAILURE,
+ LG_MSG_GRAPH_CLEANUP_FAILED, LG_MSG_GRAPH_SETUP_FAILED,
+ LG_MSG_GRAPH_DETACH_STARTED, LG_MSG_GRAPH_ATTACH_FAILED,
+ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, LG_MSG_DUPLICATE_ENTRY,
+ LG_MSG_THREAD_NAME_TOO_LONG, LG_MSG_SET_THREAD_FAILED,
+ LG_MSG_THREAD_CREATE_FAILED, LG_MSG_FILE_DELETE_FAILED, LG_MSG_WRONG_VALUE,
+ LG_MSG_PATH_OPEN_FAILED, LG_MSG_DISPATCH_HANDLER_FAILED,
+ LG_MSG_READ_FILE_FAILED, LG_MSG_ENTRIES_NOT_PROVIDED,
+ LG_MSG_ENTRIES_PROVIDED, LG_MSG_UNKNOWN_OPTION_TYPE,
+ LG_MSG_OPTION_DEPRECATED, LG_MSG_INVALID_INIT, LG_MSG_OBJECT_NULL,
+ LG_MSG_GRAPH_NOT_SET, LG_MSG_FILENAME_NOT_SPECIFIED, LG_MSG_STRUCT_MISS,
+ LG_MSG_METHOD_MISS, LG_MSG_INPUT_DATA_NULL, LG_MSG_OPEN_LOGFILE_FAILED);
+
+#define LG_MSG_EPOLL_FD_CREATE_FAILED_STR "epoll fd creation failed"
+#define LG_MSG_INVALID_POLL_IN_STR "invalid poll_in value"
+#define LG_MSG_INVALID_POLL_OUT_STR "invalid poll_out value"
+#define LG_MSG_SLOT_NOT_FOUND_STR "could not find slot"
+#define LG_MSG_EPOLL_FD_ADD_FAILED_STR "failed to add fd to epoll"
+#define LG_MSG_EPOLL_FD_DEL_FAILED_STR "fail to delete fd to epoll"
+#define LG_MSG_EPOLL_FD_MODIFY_FAILED_STR "failed to modify fd events"
+#define LG_MSG_STALE_FD_FOUND_STR "stale fd found"
+#define LG_MSG_GENERATION_MISMATCH_STR "generation mismatch"
+#define LG_MSG_STARTED_EPOLL_THREAD_STR "Started thread with index"
+#define LG_MSG_EXITED_EPOLL_THREAD_STR "Exited thread"
+#define LG_MSG_DISPATCH_HANDLER_FAILED_STR "Failed to dispatch handler"
+#define LG_MSG_START_EPOLL_THREAD_FAILED_STR "Failed to start thread"
+#define LG_MSG_PIPE_CREATE_FAILED_STR "pipe creation failed"
+#define LG_MSG_SET_PIPE_FAILED_STR "could not set pipe to non blocking mode"
+#define LG_MSG_REGISTER_PIPE_FAILED_STR \
+ "could not register pipe fd with poll event loop"
+#define LG_MSG_POLL_IGNORE_MULTIPLE_THREADS_STR \
+ "Currently poll does not use multiple event processing threads, count " \
+ "ignored"
+#define LG_MSG_INDEX_NOT_FOUND_STR "index not found"
+#define LG_MSG_READ_FILE_FAILED_STR "read on file returned error"
+#define LG_MSG_RB_TABLE_CREATE_FAILED_STR "Failed to create rb table bucket"
+#define LG_MSG_HASH_FUNC_ERROR_STR "Hash function not given"
+#define LG_MSG_ENTRIES_NOT_PROVIDED_STR \
+ "Both mem-pool and expected entries not provided"
+#define LG_MSG_ENTRIES_PROVIDED_STR \
+ "Both mem-pool and expected entries are provided"
+#define LG_MSG_RBTHASH_INIT_BUCKET_FAILED_STR "failed to init buckets"
+#define LG_MSG_RBTHASH_GET_ENTRY_FAILED_STR "Failed to get entry from mem-pool"
+#define LG_MSG_RBTHASH_GET_BUCKET_FAILED_STR "Failed to get bucket"
+#define LG_MSG_RBTHASH_INSERT_FAILED_STR "Failed to insert entry"
+#define LG_MSG_RBTHASH_INIT_ENTRY_FAILED_STR "Failed to init entry"
+#define LG_MSG_FILE_STAT_FAILED_STR "failed to stat"
+#define LG_MSG_INET_PTON_FAILED_STR "inet_pton() failed"
+#define LG_MSG_INVALID_ENTRY_STR "Invalid arguments"
+#define LG_MSG_NEGATIVE_NUM_PASSED_STR "negative number passed"
+#define LG_MSG_PATH_ERROR_STR "Path manipulation failed"
+#define LG_MSG_FILE_OP_FAILED_STR "could not open/read file, getting ports info"
+#define LG_MSG_RESERVED_PORTS_ERROR_STR \
+ "Not able to get reserved ports, hence there is a possibility that " \
+ "glusterfs may consume reserved port"
+#define LG_MSG_INVALID_PORT_STR "invalid port"
+#define LG_MSG_GETNAMEINFO_FAILED_STR "Could not lookup hostname"
+#define LG_MSG_GETIFADDRS_FAILED_STR "getifaddrs() failed"
+#define LG_MSG_INVALID_FAMILY_STR "Invalid family"
+#define LG_MSG_CONVERSION_FAILED_STR "String conversion failed"
+#define LG_MSG_GETADDRINFO_FAILED_STR "error in getaddrinfo"
+#define LG_MSG_DUPLICATE_ENTRY_STR "duplicate entry for volfile-server"
+#define LG_MSG_PTHREAD_NAMING_FAILED_STR "Failed to compose thread name"
+#define LG_MSG_THREAD_NAME_TOO_LONG_STR \
+ "Thread name is too long. It has been truncated"
+#define LG_MSG_SET_THREAD_FAILED_STR "Could not set thread name"
+#define LG_MSG_THREAD_CREATE_FAILED_STR "Thread creation failed"
+#define LG_MSG_PTHREAD_ATTR_INIT_FAILED_STR \
+ "Thread attribute initialization failed"
+#define LG_MSG_SKIP_HEADER_FAILED_STR "Failed to skip header section"
+#define LG_MSG_INVALID_LOG_STR "Invalid log-format"
+#define LG_MSG_UTIMENSAT_FAILED_STR "utimenstat failed"
+#define LG_MSG_UTIMES_FAILED_STR "utimes failed"
+#define LG_MSG_FILE_DELETE_FAILED_STR "Unable to delete file"
+#define LG_MSG_BACKTRACE_SAVE_FAILED_STR "Failed to save the backtrace"
+#define LG_MSG_WRONG_VALUE_STR "wrong value"
+#define LG_MSG_DIR_OP_FAILED_STR "Failed to create directory"
+#define LG_MSG_DIR_IS_SYMLINK_STR "dir is symlink"
+#define LG_MSG_RESOLVE_HOSTNAME_FAILED_STR "couldnot resolve hostname"
+#define LG_MSG_PATH_OPEN_FAILED_STR "Unable to open path"
+#define LG_MSG_NO_MEMORY_STR "Error allocating memory"
+#define LG_MSG_EVENT_NOTIFY_FAILED_STR "notification failed"
+#define LG_MSG_PER_DENTRY_FAILED_STR "per dentry fn returned"
+#define LG_MSG_PARENT_DENTRY_NOT_FOUND_STR "parent not found"
+#define LG_MSG_DENTRY_CYCLIC_LOOP_STR \
+ "detected cyclic loop formation during inode linkage"
+#define LG_MSG_CTX_NULL_STR "_ctx not found"
+#define LG_MSG_DENTRY_NOT_FOUND_STR "dentry not found"
+#define LG_MSG_OUT_OF_RANGE_STR "out of range"
+#define LG_MSG_UNKNOWN_OPTION_TYPE_STR "unknown option type"
+#define LG_MSG_VALIDATE_RETURNS_STR "validate of returned"
+#define LG_MSG_OPTION_DEPRECATED_STR \
+ "option is deprecated, continuing with correction"
+#define LG_MSG_VALIDATE_REC_FAILED_STR "validate_rec failed"
+#define LG_MSG_MAPPING_FAILED_STR "mapping failed"
+#define LG_MSG_INIT_IOBUF_FAILED_STR "init failed"
+#define LG_MSG_ARENA_NOT_FOUND_STR "arena not found"
+#define LG_MSG_PAGE_SIZE_EXCEEDED_STR \
+ "page_size of iobufs in arena being added is greater than max available"
+#define LG_MSG_POOL_NOT_FOUND_STR "pool not found"
+#define LG_MSG_IOBUF_NOT_FOUND_STR "iobuf not found"
+#define LG_MSG_DLOPEN_FAILED_STR "DL open failed"
+#define LG_MSG_DLSYM_ERROR_STR "dlsym missing"
+#define LG_MSG_LOAD_FAILED_STR "Failed to load xlator options table"
+#define LG_MSG_INPUT_DATA_NULL_STR \
+ "input data is null. cannot update the lru limit of the inode table. " \
+ "continuing with older value."
+#define LG_MSG_INIT_FAILED_STR "No init() found"
+#define LG_MSG_VOLUME_ERROR_STR \
+ "Initialization of volume failed. review your volfile again."
+#define LG_MSG_TREE_NOT_FOUND_STR "Translator tree not found"
+#define LG_MSG_SET_LOG_LEVEL_STR "setting log level"
+#define LG_MSG_INVALID_INIT_STR \
+ "Invalid log-level. possible values are DEBUG|WARNING|ERROR|NONE|TRACE"
+#define LG_MSG_OBJECT_NULL_STR "object is null, returning false."
+#define LG_MSG_GRAPH_NOT_SET_STR "Graph is not set for xlator"
+#define LG_MSG_OPEN_LOGFILE_FAILED_STR "failed to open logfile"
+#define LG_MSG_STRDUP_ERROR_STR "failed to create metrics dir"
+#define LG_MSG_FILENAME_NOT_SPECIFIED_STR "no filename specified"
+#define LG_MSG_UNDERSIZED_BUF_STR "data value is smaller than expected"
+#define LG_MSG_DICT_SET_FAILED_STR "unable to set dict"
+#define LG_MSG_COUNT_LESS_THAN_ZERO_STR "count < 0!"
+#define LG_MSG_PAIRS_LESS_THAN_COUNT_STR "less than count data pairs found"
+#define LG_MSG_NULL_PTR_STR "pair->key is null!"
+#define LG_MSG_VALUE_LENGTH_LESS_THAN_ZERO_STR "value->len < 0"
+#define LG_MSG_INVALID_ARG_STR "buf is null"
+#define LG_MSG_KEY_OR_VALUE_NULL_STR "key or value is null"
+#define LG_MSG_NULL_VALUE_IN_DICT_STR "null value found in dict"
+#define LG_MSG_FAILED_TO_LOG_DICT_STR "Failed to log dictionary"
+#define LG_MSG_DICT_ERROR_STR "dict error"
+#define LG_MSG_STRUCT_MISS_STR "struct missing"
+#define LG_MSG_METHOD_MISS_STR "method missing(init)"
+
+#endif /* !_LG_MESSAGES_H_ */
diff --git a/libglusterfs/src/glusterfs/list.h b/libglusterfs/src/glusterfs/list.h
new file mode 100644
index 00000000000..221a710ca30
--- /dev/null
+++ b/libglusterfs/src/glusterfs/list.h
@@ -0,0 +1,273 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _LLIST_H
+#define _LLIST_H
+
+struct list_head {
+ struct list_head *next;
+ struct list_head *prev;
+};
+
+#define INIT_LIST_HEAD(head) \
+ do { \
+ (head)->next = (head)->prev = head; \
+ } while (0)
+
+static inline void
+list_add(struct list_head *new, struct list_head *head)
+{
+ new->prev = head;
+ new->next = head->next;
+
+ new->prev->next = new;
+ new->next->prev = new;
+}
+
+static inline void
+list_add_tail(struct list_head *new, struct list_head *head)
+{
+ new->next = head;
+ new->prev = head->prev;
+
+ new->prev->next = new;
+ new->next->prev = new;
+}
+
+/* This function will insert the element to the list in a order.
+ Order will be based on the compare function provided as a input.
+ If element to be inserted in ascending order compare should return:
+ 0: if both the arguments are equal
+ >0: if first argument is greater than second argument
+ <0: if first argument is less than second argument */
+static inline void
+list_add_order(struct list_head *new, struct list_head *head,
+ int (*compare)(struct list_head *, struct list_head *))
+{
+ struct list_head *pos = head->prev;
+
+ while (pos != head) {
+ if (compare(new, pos) >= 0)
+ break;
+
+ /* Iterate the list in the reverse order. This will have
+ better efficiency if the elements are inserted in the
+ ascending order */
+ pos = pos->prev;
+ }
+
+ list_add(new, pos);
+}
+
+static inline void
+list_del(struct list_head *old)
+{
+ old->prev->next = old->next;
+ old->next->prev = old->prev;
+
+ old->next = (void *)0xbabebabe;
+ old->prev = (void *)0xcafecafe;
+}
+
+static inline void
+list_del_init(struct list_head *old)
+{
+ old->prev->next = old->next;
+ old->next->prev = old->prev;
+
+ old->next = old;
+ old->prev = old;
+}
+
+static inline void
+list_move(struct list_head *list, struct list_head *head)
+{
+ list_del(list);
+ list_add(list, head);
+}
+
+static inline void
+list_move_tail(struct list_head *list, struct list_head *head)
+{
+ list_del(list);
+ list_add_tail(list, head);
+}
+
+static inline int
+list_empty(struct list_head *head)
+{
+ return (head->next == head);
+}
+
+static inline void
+__list_splice(struct list_head *list, struct list_head *head)
+{
+ (list->prev)->next = (head->next);
+ (head->next)->prev = (list->prev);
+
+ (head)->next = (list->next);
+ (list->next)->prev = (head);
+}
+
+static inline void
+list_splice(struct list_head *list, struct list_head *head)
+{
+ if (list_empty(list))
+ return;
+
+ __list_splice(list, head);
+}
+
+/* Splice moves @list to the head of the list at @head. */
+static inline void
+list_splice_init(struct list_head *list, struct list_head *head)
+{
+ if (list_empty(list))
+ return;
+
+ __list_splice(list, head);
+ INIT_LIST_HEAD(list);
+}
+
+static inline void
+__list_append(struct list_head *list, struct list_head *head)
+{
+ (head->prev)->next = (list->next);
+ (list->next)->prev = (head->prev);
+ (head->prev) = (list->prev);
+ (list->prev)->next = head;
+}
+
+static inline void
+list_append(struct list_head *list, struct list_head *head)
+{
+ if (list_empty(list))
+ return;
+
+ __list_append(list, head);
+}
+
+/* Append moves @list to the end of @head */
+static inline void
+list_append_init(struct list_head *list, struct list_head *head)
+{
+ if (list_empty(list))
+ return;
+
+ __list_append(list, head);
+ INIT_LIST_HEAD(list);
+}
+
+static inline int
+list_is_last(struct list_head *list, struct list_head *head)
+{
+ return (list->next == head);
+}
+
+static inline int
+list_is_singular(struct list_head *head)
+{
+ return !list_empty(head) && (head->next == head->prev);
+}
+
+/**
+ * list_replace - replace old entry by new one
+ * @old : the element to be replaced
+ * @new : the new element to insert
+ *
+ * If @old was empty, it will be overwritten.
+ */
+static inline void
+list_replace(struct list_head *old, struct list_head *new)
+{
+ new->next = old->next;
+ new->next->prev = new;
+ new->prev = old->prev;
+ new->prev->next = new;
+}
+
+static inline void
+list_replace_init(struct list_head *old, struct list_head *new)
+{
+ list_replace(old, new);
+ INIT_LIST_HEAD(old);
+}
+
+/**
+ * list_rotate_left - rotate the list to the left
+ * @head: the head of the list
+ */
+static inline void
+list_rotate_left(struct list_head *head)
+{
+ struct list_head *first;
+
+ if (!list_empty(head)) {
+ first = head->next;
+ list_move_tail(first, head);
+ }
+}
+
+#define list_entry(ptr, type, member) \
+ ((type *)((char *)(ptr) - (unsigned long)(&((type *)0)->member)))
+
+#define list_first_entry(ptr, type, member) \
+ list_entry((ptr)->next, type, member)
+
+#define list_last_entry(ptr, type, member) list_entry((ptr)->prev, type, member)
+
+#define list_next_entry(pos, member) \
+ list_entry((pos)->member.next, typeof(*(pos)), member)
+
+#define list_prev_entry(pos, member) \
+ list_entry((pos)->member.prev, typeof(*(pos)), member)
+
+#define list_for_each(pos, head) \
+ for (pos = (head)->next; pos != (head); pos = pos->next)
+
+#define list_for_each_entry(pos, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.next, typeof(*pos), member))
+
+#define list_for_each_entry_safe(pos, n, head, member) \
+ for (pos = list_entry((head)->next, typeof(*pos), member), \
+ n = list_entry(pos->member.next, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.next, typeof(*n), member))
+
+#define list_for_each_entry_reverse(pos, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = list_entry(pos->member.prev, typeof(*pos), member))
+
+#define list_for_each_entry_safe_reverse(pos, n, head, member) \
+ for (pos = list_entry((head)->prev, typeof(*pos), member), \
+ n = list_entry(pos->member.prev, typeof(*pos), member); \
+ &pos->member != (head); \
+ pos = n, n = list_entry(n->member.prev, typeof(*n), member))
+
+/*
+ * This list implementation has some advantages, but one disadvantage: you
+ * can't use NULL to check whether you're at the head or tail. Thus, the
+ * address of the head has to be an argument for these macros.
+ */
+
+#define list_next(ptr, head, type, member) \
+ (((ptr)->member.next == head) \
+ ? NULL \
+ : list_entry((ptr)->member.next, type, member))
+
+#define list_prev(ptr, head, type, member) \
+ (((ptr)->member.prev == head) \
+ ? NULL \
+ : list_entry((ptr)->member.prev, type, member))
+
+#endif /* _LLIST_H */
diff --git a/libglusterfs/src/glusterfs/lkowner.h b/libglusterfs/src/glusterfs/lkowner.h
new file mode 100644
index 00000000000..692de34bc7a
--- /dev/null
+++ b/libglusterfs/src/glusterfs/lkowner.h
@@ -0,0 +1,93 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _LK_OWNER_H
+#define _LK_OWNER_H
+
+#include "glusterfs/glusterfs-fops.h"
+
+/* LKOWNER to string functions */
+static inline void
+lkowner_unparse(gf_lkowner_t *lkowner, char *buf, int buf_len)
+{
+ int i = 0;
+ int j = 0;
+
+ for (i = 0; i < lkowner->len; i++) {
+ if (i && !(i % 8)) {
+ buf[j] = '-';
+ j++;
+ }
+ sprintf(&buf[j], "%02hhx", lkowner->data[i]);
+ j += 2;
+ if (j == buf_len)
+ break;
+ }
+ if (j < buf_len)
+ buf[j] = '\0';
+}
+
+static inline void
+set_lk_owner_from_ptr(gf_lkowner_t *lkowner, void *data)
+{
+ int i = 0;
+ int j = 0;
+
+ lkowner->len = sizeof(unsigned long);
+ for (i = 0, j = 0; i < lkowner->len; i++, j += 8) {
+ lkowner->data[i] = (char)((((unsigned long)data) >> j) & 0xff);
+ }
+}
+
+static inline void
+set_lk_owner_from_uint64(gf_lkowner_t *lkowner, uint64_t data)
+{
+ int i = 0;
+ int j = 0;
+
+ lkowner->len = 8;
+ for (i = 0, j = 0; i < lkowner->len; i++, j += 8) {
+ lkowner->data[i] = (char)((data >> j) & 0xff);
+ }
+}
+
+/* Return true if the locks have the same owner */
+static inline int
+is_same_lkowner(gf_lkowner_t *l1, gf_lkowner_t *l2)
+{
+ return ((l1->len == l2->len) && !memcmp(l1->data, l2->data, l1->len));
+}
+
+static inline int
+is_lk_owner_null(gf_lkowner_t *lkowner)
+{
+ int is_null = 1;
+ int i = 0;
+
+ if (lkowner == NULL || lkowner->len == 0)
+ goto out;
+
+ for (i = 0; i < lkowner->len; i++) {
+ if (lkowner->data[i] != 0) {
+ is_null = 0;
+ break;
+ }
+ }
+out:
+ return is_null;
+}
+
+static inline void
+lk_owner_copy(gf_lkowner_t *dst, gf_lkowner_t *src)
+{
+ dst->len = src->len;
+ memcpy(dst->data, src->data, src->len);
+}
+#endif /* _LK_OWNER_H */
diff --git a/libglusterfs/src/glusterfs/locking.h b/libglusterfs/src/glusterfs/locking.h
new file mode 100644
index 00000000000..43cc87735d1
--- /dev/null
+++ b/libglusterfs/src/glusterfs/locking.h
@@ -0,0 +1,84 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _LOCKING_H
+#define _LOCKING_H
+
+#include <pthread.h>
+
+#if defined(GF_DARWIN_HOST_OS)
+#include <libkern/OSAtomic.h>
+#define pthread_spinlock_t OSSpinLock
+#define pthread_spin_lock(l) OSSpinLockLock(l)
+#define pthread_spin_unlock(l) OSSpinLockUnlock(l)
+#define pthread_spin_destroy(l) 0
+#define pthread_spin_init(l, v) (*l = v)
+#endif
+
+#if defined(HAVE_SPINLOCK)
+
+typedef union {
+ pthread_spinlock_t spinlock;
+ pthread_mutex_t mutex;
+} gf_lock_t;
+
+#if !defined(LOCKING_IMPL)
+extern int use_spinlocks;
+
+/*
+ * Using a dispatch table would be unpleasant because we're dealing with two
+ * different types. If the dispatch contains direct pointers to pthread_xx
+ * or mutex_xxx then we have to hope that every possible union alternative
+ * starts at the same address as the union itself. I'm old enough to remember
+ * compilers where this was not the case (for alignment reasons) so I'm a bit
+ * paranoid about that. Also, I don't like casting arguments through "void *"
+ * which we'd also have to do to avoid type errors. The other alternative would
+ * be to define actual functions which pick out the right union member, and put
+ * those in the dispatch tables. Now we have a pointer dereference through the
+ * dispatch table plus a function call, which is likely to be worse than the
+ * branching here from the ?: construct. If it were a clear win it might be
+ * worth the extra complexity, but for now this way seems preferable.
+ */
+
+#define LOCK_INIT(x) \
+ (use_spinlocks ? pthread_spin_init(&((x)->spinlock), 0) \
+ : pthread_mutex_init(&((x)->mutex), 0))
+
+#define LOCK(x) \
+ (use_spinlocks ? pthread_spin_lock(&((x)->spinlock)) \
+ : pthread_mutex_lock(&((x)->mutex)))
+
+#define TRY_LOCK(x) \
+ (use_spinlocks ? pthread_spin_trylock(&((x)->spinlock)) \
+ : pthread_mutex_trylock(&((x)->mutex)))
+
+#define UNLOCK(x) \
+ (use_spinlocks ? pthread_spin_unlock(&((x)->spinlock)) \
+ : pthread_mutex_unlock(&((x)->mutex)))
+
+#define LOCK_DESTROY(x) \
+ (use_spinlocks ? pthread_spin_destroy(&((x)->spinlock)) \
+ : pthread_mutex_destroy(&((x)->mutex)))
+
+#endif
+
+#else
+
+typedef pthread_mutex_t gf_lock_t;
+
+#define LOCK_INIT(x) pthread_mutex_init(x, 0)
+#define LOCK(x) pthread_mutex_lock(x)
+#define TRY_LOCK(x) pthread_mutex_trylock(x)
+#define UNLOCK(x) pthread_mutex_unlock(x)
+#define LOCK_DESTROY(x) pthread_mutex_destroy(x)
+
+#endif /* HAVE_SPINLOCK */
+
+#endif /* _LOCKING_H */
diff --git a/libglusterfs/src/glusterfs/logging.h b/libglusterfs/src/glusterfs/logging.h
new file mode 100644
index 00000000000..b3a6ac191f0
--- /dev/null
+++ b/libglusterfs/src/glusterfs/logging.h
@@ -0,0 +1,383 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __LOGGING_H__
+#define __LOGGING_H__
+
+#include <sys/time.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <pthread.h>
+#include "glusterfs/list.h"
+
+#ifdef GF_DARWIN_HOST_OS
+#define GF_PRI_FSBLK "u"
+#define GF_PRI_DEV PRId32
+#define GF_PRI_INODE PRIu64
+#define GF_PRI_NLINK PRIu32
+#define GF_PRI_SECOND "ld"
+#define GF_PRI_SUSECONDS "06d"
+#define GF_PRI_SNSECONDS "09ld"
+#define GF_PRI_USEC "d"
+#else
+#define GF_PRI_FSBLK PRIu64
+#define GF_PRI_DEV PRIu64
+#define GF_PRI_INODE PRIu64
+#define GF_PRI_NLINK PRIu32
+#define GF_PRI_SECOND "lu"
+#define GF_PRI_SUSECONDS "06ld"
+#define GF_PRI_SNSECONDS "09ld"
+#define GF_PRI_USEC "ld"
+#endif
+#define GF_PRI_BLKSIZE PRId32
+#define GF_PRI_SIZET "zu"
+#define GF_PRI_ATOMIC PRIu64
+
+#ifdef GF_DARWIN_HOST_OS
+#define GF_PRI_TIME "ld"
+#else
+#define GF_PRI_TIME PRIu64
+#endif
+
+#if 0
+/* Syslog definitions :-) */
+#define LOG_EMERG 0 /* system is unusable */
+#define LOG_ALERT 1 /* action must be taken immediately */
+#define LOG_CRIT 2 /* critical conditions */
+#define LOG_ERR 3 /* error conditions */
+#define LOG_WARNING 4 /* warning conditions */
+#define LOG_NOTICE 5 /* normal but significant condition */
+#define LOG_INFO 6 /* informational */
+#define LOG_DEBUG 7 /* debug-level messages */
+#endif
+
+#define GF_LOG_FORMAT_NO_MSG_ID "no-msg-id"
+#define GF_LOG_FORMAT_WITH_MSG_ID "with-msg-id"
+
+#define GF_LOGGER_GLUSTER_LOG "gluster-log"
+#define GF_LOGGER_SYSLOG "syslog"
+
+typedef enum {
+ GF_LOG_NONE,
+ GF_LOG_EMERG,
+ GF_LOG_ALERT,
+ GF_LOG_CRITICAL, /* fatal errors */
+ GF_LOG_ERROR, /* major failures (not necessarily fatal) */
+ GF_LOG_WARNING, /* info about normal operation */
+ GF_LOG_NOTICE,
+ GF_LOG_INFO, /* Normal information */
+ GF_LOG_DEBUG, /* internal errors */
+ GF_LOG_TRACE, /* full trace of operation */
+} gf_loglevel_t;
+
+/* format for the logs */
+typedef enum {
+ gf_logformat_traditional = 0, /* Format as in gluster 3.5 */
+ gf_logformat_withmsgid, /* Format enhanced with MsgID, ident, errstr */
+ gf_logformat_cee /* log enhanced format in cee */
+} gf_log_format_t;
+
+/* log infrastructure to log to */
+typedef enum {
+ gf_logger_glusterlog = 0, /* locations and files as in gluster 3.5 */
+ gf_logger_syslog /* log to (r)syslog, based on (r)syslog conf */
+ /* NOTE: In the future journald, lumberjack, next new thing here */
+} gf_log_logger_t;
+
+#define DEFAULT_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
+#define DEFAULT_QUOTA_CRAWL_LOG_DIRECTORY DATADIR "/log/glusterfs/quota_crawl"
+#define DEFAULT_LOG_LEVEL GF_LOG_INFO
+
+typedef struct gf_log_handle_ {
+ pthread_mutex_t logfile_mutex;
+ gf_loglevel_t loglevel;
+ gf_loglevel_t sys_log_level;
+ int gf_log_syslog;
+ char *filename;
+ FILE *logfile;
+ FILE *gf_log_logfile;
+ char *cmd_log_filename;
+ FILE *cmdlogfile;
+ gf_log_logger_t logger;
+ gf_log_format_t logformat;
+ char *ident;
+ int log_control_file_found;
+ struct list_head lru_queue;
+ pthread_mutex_t log_buf_lock;
+ struct _gf_timer *log_flush_timer;
+ int localtime;
+ uint32_t lru_size;
+ uint32_t lru_cur_size;
+ uint32_t timeout;
+ uint8_t logrotate;
+ uint8_t cmd_history_logrotate;
+} gf_log_handle_t;
+
+typedef struct log_buf_ {
+ char *msg;
+ uint64_t msg_id;
+ int errnum;
+ struct timeval oldest;
+ struct timeval latest;
+ char *domain;
+ char *file;
+ char *function;
+ int32_t line;
+ gf_loglevel_t level;
+ int refcount;
+ int graph_id;
+ struct list_head msg_list;
+} log_buf_t;
+
+void
+gf_log_globals_init(void *ctx, gf_loglevel_t level);
+int
+gf_log_init(void *data, const char *filename, const char *ident);
+
+void
+gf_log_logrotate(int signum);
+
+void
+gf_log_cleanup(void);
+
+/* Internal interfaces to log messages with message IDs */
+int
+_gf_msg(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, int errnum, int trace,
+ uint64_t msgid, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 9, 10)));
+
+void
+_gf_msg_backtrace_nomem(gf_loglevel_t level, int stacksize);
+
+int
+_gf_msg_plain(gf_loglevel_t level, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+
+int
+_gf_msg_plain_nomem(gf_loglevel_t level, const char *msg);
+
+int
+_gf_msg_vplain(gf_loglevel_t level, const char *fmt, va_list ap);
+
+int
+_gf_msg_nomem(const char *domain, const char *file, const char *function,
+ int line, gf_loglevel_t level, size_t size);
+
+int
+_gf_log(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 6, 7)));
+
+int
+_gf_log_callingfn(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 6, 7)));
+
+int
+_gf_log_eh(const char *function, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+
+/* treat GF_LOG_TRACE and GF_LOG_NONE as LOG_DEBUG and
+ * other level as is */
+#define SET_LOG_PRIO(level, priority) \
+ do { \
+ if (GF_LOG_TRACE == (level) || GF_LOG_NONE == (level)) { \
+ priority = LOG_DEBUG; \
+ } else { \
+ priority = (level)-1; \
+ } \
+ } while (0)
+
+/* extract just the file name from the path */
+#define GET_FILE_NAME_TO_LOG(file, basename) \
+ do { \
+ basename = strrchr((file), '/'); \
+ if (basename) \
+ basename++; \
+ else \
+ basename = (file); \
+ } while (0)
+
+#define PRINT_SIZE_CHECK(ret, label, strsize) \
+ do { \
+ if (ret < 0) \
+ goto label; \
+ if ((strsize - ret) > 0) { \
+ strsize -= ret; \
+ } else { \
+ ret = 0; \
+ goto label; \
+ } \
+ } while (0)
+
+#define FMT_WARN(fmt...) \
+ do { \
+ if (0) \
+ printf(fmt); \
+ } while (0)
+
+/* Interface to log messages with message IDs */
+#define gf_msg(dom, level, errnum, msgid, fmt...) \
+ do { \
+ _gf_msg(dom, __FILE__, __FUNCTION__, __LINE__, level, errnum, 0, \
+ msgid, ##fmt); \
+ } while (0)
+
+/* no frills, no thrills, just a vanilla message, used to print the graph */
+#define gf_msg_plain(level, fmt...) \
+ do { \
+ _gf_msg_plain(level, ##fmt); \
+ } while (0)
+
+#define gf_msg_plain_nomem(level, msg) \
+ do { \
+ _gf_msg_plain_nomem(level, msg); \
+ } while (0)
+
+#define gf_msg_vplain(level, fmt, va) \
+ do { \
+ _gf_msg_vplain(level, fmt, va); \
+ } while (0)
+
+#define gf_msg_backtrace_nomem(level, stacksize) \
+ do { \
+ _gf_msg_backtrace_nomem(level, stacksize); \
+ } while (0)
+
+#define gf_msg_callingfn(dom, level, errnum, msgid, fmt...) \
+ do { \
+ _gf_msg(dom, __FILE__, __FUNCTION__, __LINE__, level, errnum, 1, \
+ msgid, ##fmt); \
+ } while (0)
+
+/* No malloc or calloc should be called in this function */
+#define gf_msg_nomem(dom, level, size) \
+ do { \
+ _gf_msg_nomem(dom, __FILE__, __FUNCTION__, __LINE__, level, size); \
+ } while (0)
+
+/* Debug or trace messages do not need message IDs as these are more developer
+ * related. Hence, the following abstractions are provided for the same */
+#define gf_msg_debug(dom, errnum, fmt...) \
+ do { \
+ _gf_msg(dom, __FILE__, __FUNCTION__, __LINE__, GF_LOG_DEBUG, errnum, \
+ 0, 0, ##fmt); \
+ } while (0)
+
+#define gf_msg_trace(dom, errnum, fmt...) \
+ do { \
+ _gf_msg(dom, __FILE__, __FUNCTION__, __LINE__, GF_LOG_TRACE, errnum, \
+ 0, 0, ##fmt); \
+ } while (0)
+
+#define gf_log(dom, level, fmt...) \
+ do { \
+ FMT_WARN(fmt); \
+ _gf_log(dom, __FILE__, __FUNCTION__, __LINE__, level, ##fmt); \
+ } while (0)
+
+#define gf_log_eh(fmt...) \
+ do { \
+ FMT_WARN(fmt); \
+ _gf_log_eh(__FUNCTION__, ##fmt); \
+ } while (0)
+
+#define gf_log_callingfn(dom, level, fmt...) \
+ do { \
+ FMT_WARN(fmt); \
+ _gf_log_callingfn(dom, __FILE__, __FUNCTION__, __LINE__, level, \
+ ##fmt); \
+ } while (0)
+
+/* Log once in GF_UNIVERSAL_ANSWER times */
+#define GF_LOG_OCCASIONALLY(var, args...) \
+ if (var++ == 0 || !((var - 1) % GF_UNIVERSAL_ANSWER)) { \
+ gf_log(args); \
+ }
+
+struct _glusterfs_ctx;
+
+void
+gf_log_disable_syslog(void);
+void
+gf_log_enable_syslog(void);
+gf_loglevel_t
+gf_log_get_loglevel(void);
+void
+gf_log_set_loglevel(struct _glusterfs_ctx *ctx, gf_loglevel_t level);
+int
+gf_log_get_localtime(void);
+void
+gf_log_set_localtime(int);
+void
+gf_log_flush(void);
+gf_loglevel_t
+gf_log_get_xl_loglevel(void *xl);
+void
+gf_log_set_xl_loglevel(void *xl, gf_loglevel_t level);
+
+int
+gf_cmd_log(const char *domain, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+
+int
+gf_cmd_log_init(const char *filename);
+
+void
+set_sys_log_level(gf_loglevel_t level);
+
+int
+gf_log_fini(void *data);
+
+void
+gf_log_set_logger(gf_log_logger_t logger);
+
+void
+gf_log_set_logformat(gf_log_format_t format);
+
+void
+gf_log_set_log_buf_size(uint32_t buf_size);
+
+void
+gf_log_set_log_flush_timeout(uint32_t timeout);
+
+void
+gf_log_flush_msgs(struct _glusterfs_ctx *ctx);
+
+int
+gf_log_inject_timer_event(struct _glusterfs_ctx *ctx);
+
+void
+gf_log_disable_suppression_before_exit(struct _glusterfs_ctx *ctx);
+
+#define GF_DEBUG(xl, format, args...) \
+ gf_log((xl)->name, GF_LOG_DEBUG, format, ##args)
+#define GF_INFO(xl, format, args...) \
+ gf_log((xl)->name, GF_LOG_INFO, format, ##args)
+#define GF_WARNING(xl, format, args...) \
+ gf_log((xl)->name, GF_LOG_WARNING, format, ##args)
+#define GF_ERROR(xl, format, args...) \
+ gf_log((xl)->name, GF_LOG_ERROR, format, ##args)
+
+int
+_gf_smsg(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, int errnum, int trace,
+ uint64_t msgid, const char *event, ...);
+
+/* Interface to log messages with message IDs */
+#define gf_smsg(dom, level, errnum, msgid, event...) \
+ do { \
+ _gf_smsg(dom, __FILE__, __FUNCTION__, __LINE__, level, errnum, 0, \
+ msgid, msgid##_STR, ##event); \
+ } while (0)
+
+#endif /* __LOGGING_H__ */
diff --git a/libglusterfs/src/glusterfs/lvm-defaults.h b/libglusterfs/src/glusterfs/lvm-defaults.h
new file mode 100644
index 00000000000..32feebf3f6e
--- /dev/null
+++ b/libglusterfs/src/glusterfs/lvm-defaults.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _LVM_DEFAULTS_H
+#define _LVM_DEFAULTS_H
+
+#define LVM_RESIZE "/sbin/lvresize"
+#define LVM_CREATE "/sbin/lvcreate"
+#define LVM_CONVERT "/sbin/lvconvert"
+#define LVM_REMOVE "/sbin/lvremove"
+#define LVS "/sbin/lvs"
+
+#endif /* _LVM_DEFAULTS_H */
diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h
new file mode 100644
index 00000000000..e5b3276d047
--- /dev/null
+++ b/libglusterfs/src/glusterfs/mem-pool.h
@@ -0,0 +1,336 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _MEM_POOL_H_
+#define _MEM_POOL_H_
+
+#include "glusterfs/list.h"
+#include "glusterfs/atomic.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/mem-types.h"
+#include "glusterfs/glusterfs.h" /* for glusterfs_ctx_t */
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <stdarg.h>
+
+/*
+ * Need this for unit tests since inline functions
+ * access memory allocation and need to use the
+ * unit test versions
+ */
+#ifdef UNIT_TESTING
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka.h>
+#endif
+
+#define GF_MEM_TRAILER_SIZE 8
+#define GF_MEM_HEADER_MAGIC 0xCAFEBABE
+#define GF_MEM_TRAILER_MAGIC 0xBAADF00D
+#define GF_MEM_INVALID_MAGIC 0xDEADC0DE
+
+#define POOL_SMALLEST 7 /* i.e. 128 */
+#define POOL_LARGEST 20 /* i.e. 1048576 */
+#define NPOOLS (POOL_LARGEST - POOL_SMALLEST + 1)
+
+struct mem_acct_rec {
+ const char *typestr;
+ uint64_t size;
+ uint64_t max_size;
+ uint64_t total_allocs;
+ uint32_t num_allocs;
+ uint32_t max_num_allocs;
+ gf_lock_t lock;
+#ifdef DEBUG
+ struct list_head obj_list;
+#endif
+};
+
+struct mem_acct {
+ uint32_t num_types;
+ gf_atomic_t refcnt;
+ struct mem_acct_rec rec[0];
+};
+
+struct mem_header {
+ uint32_t type;
+ size_t size;
+ struct mem_acct *mem_acct;
+ uint32_t magic;
+#ifdef DEBUG
+ struct list_head acct_list;
+#endif
+ int padding[8];
+};
+
+#define GF_MEM_HEADER_SIZE (sizeof(struct mem_header))
+
+#ifdef DEBUG
+struct mem_invalid {
+ uint32_t magic;
+ void *mem_acct;
+ uint32_t type;
+ size_t size;
+ void *baseaddr;
+};
+#endif
+
+void *
+__gf_calloc(size_t cnt, size_t size, uint32_t type, const char *typestr);
+
+void *
+__gf_malloc(size_t size, uint32_t type, const char *typestr);
+
+void *
+__gf_realloc(void *ptr, size_t size);
+
+int
+gf_vasprintf(char **string_ptr, const char *format, va_list arg);
+
+int
+gf_asprintf(char **string_ptr, const char *format, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+
+void
+__gf_free(void *ptr);
+
+static inline void *
+__gf_default_malloc(size_t size)
+{
+ void *ptr = NULL;
+
+ ptr = malloc(size);
+ if (!ptr)
+ gf_msg_nomem("", GF_LOG_ALERT, size);
+
+ return ptr;
+}
+
+static inline void *
+__gf_default_calloc(int cnt, size_t size)
+{
+ void *ptr = NULL;
+
+ ptr = calloc(cnt, size);
+ if (!ptr)
+ gf_msg_nomem("", GF_LOG_ALERT, (cnt * size));
+
+ return ptr;
+}
+
+static inline void *
+__gf_default_realloc(void *oldptr, size_t size)
+{
+ void *ptr = NULL;
+
+ ptr = realloc(oldptr, size);
+ if (!ptr)
+ gf_msg_nomem("", GF_LOG_ALERT, size);
+
+ return ptr;
+}
+
+#define MALLOC(size) __gf_default_malloc(size)
+#define CALLOC(cnt, size) __gf_default_calloc(cnt, size)
+#define REALLOC(ptr, size) __gf_default_realloc(ptr, size)
+
+#define FREE(ptr) \
+ do { \
+ if (ptr != NULL) { \
+ free((void *)ptr); \
+ ptr = (void *)0xeeeeeeee; \
+ } \
+ } while (0)
+
+#define GF_CALLOC(nmemb, size, type) __gf_calloc(nmemb, size, type, #type)
+
+#define GF_MALLOC(size, type) __gf_malloc(size, type, #type)
+
+#define GF_REALLOC(ptr, size) __gf_realloc(ptr, size)
+
+#define GF_FREE(free_ptr) __gf_free(free_ptr)
+
+static inline char *
+gf_strndup(const char *src, size_t len)
+{
+ char *dup_str = NULL;
+
+ if (!src) {
+ goto out;
+ }
+
+ dup_str = GF_MALLOC(len + 1, gf_common_mt_strdup);
+ if (!dup_str) {
+ goto out;
+ }
+
+ memcpy(dup_str, src, len);
+ dup_str[len] = '\0';
+out:
+ return dup_str;
+}
+
+static inline char *
+gf_strdup(const char *src)
+{
+ if (!src)
+ return NULL;
+
+ return gf_strndup(src, strlen(src));
+}
+
+static inline void *
+gf_memdup(const void *src, size_t size)
+{
+ void *dup_mem = NULL;
+
+ dup_mem = GF_MALLOC(size, gf_common_mt_memdup);
+ if (!dup_mem)
+ goto out;
+
+ memcpy(dup_mem, src, size);
+
+out:
+ return dup_mem;
+}
+
+#ifdef GF_DISABLE_MEMPOOL
+
+/* No-op memory pool enough to fit current API without massive redesign. */
+
+struct mem_pool {
+ unsigned long sizeof_type;
+};
+
+#define mem_pools_init() \
+ do { \
+ } while (0)
+#define mem_pools_fini() \
+ do { \
+ } while (0)
+#define mem_pool_thread_destructor(pool_list) (void)pool_list
+
+#else /* !GF_DISABLE_MEMPOOL */
+
+/* kind of 'header' for the actual mem_pool_shared structure, this might make
+ * it possible to dump some more details in a statedump */
+struct mem_pool {
+ /* object size, without pooled_obj_hdr_t */
+ unsigned long sizeof_type;
+ unsigned long count; /* requested pool size (unused) */
+ char *name;
+ char *xl_name;
+ gf_atomic_t active; /* current allocations */
+#ifdef DEBUG
+ gf_atomic_t hit; /* number of allocations served from pt_pool */
+ gf_atomic_t miss; /* number of std allocs due to miss */
+#endif
+ struct list_head owner; /* glusterfs_ctx_t->mempool_list */
+ glusterfs_ctx_t *ctx; /* take ctx->lock when updating owner */
+
+ struct mem_pool_shared *pool; /* the initial pool that was returned */
+};
+
+typedef struct pooled_obj_hdr {
+ unsigned long magic;
+ struct pooled_obj_hdr *next;
+ struct per_thread_pool_list *pool_list;
+ unsigned int power_of_two;
+
+ /* track the pool that was used to request this object */
+ struct mem_pool *pool;
+} pooled_obj_hdr_t;
+
+/* Each memory block inside a pool has a fixed size that is a power of two.
+ * However each object will have a header that will reduce the available
+ * space. */
+#define AVAILABLE_SIZE(p2) ((1UL << (p2)) - sizeof(pooled_obj_hdr_t))
+
+typedef struct per_thread_pool {
+ /* the pool that was used to request this allocation */
+ struct mem_pool_shared *parent;
+ /* Everything else is protected by our own lock. */
+ pooled_obj_hdr_t *hot_list;
+ pooled_obj_hdr_t *cold_list;
+} per_thread_pool_t;
+
+typedef struct per_thread_pool_list {
+ /* thr_list is used to place the TLS pool_list into the active global list
+ * (pool_threads) or the inactive global list (pool_free_threads). It's
+ * protected by the global pool_lock. */
+ struct list_head thr_list;
+
+ /* This lock is used to update poison and the hot/cold lists of members
+ * of 'pools' array. */
+ pthread_spinlock_t lock;
+
+ /* This field is used to mark a pool_list as not being owned by any thread.
+ * This means that the sweeper thread won't be cleaning objects stored in
+ * its pools. mem_put() uses it to decide if the object being released is
+ * placed into its original pool_list or directly destroyed. */
+ bool poison;
+
+ /*
+ * There's really more than one pool, but the actual number is hidden
+ * in the implementation code so we just make it a single-element array
+ * here.
+ */
+ per_thread_pool_t pools[1];
+} per_thread_pool_list_t;
+
+/* actual pool structure, shared between different mem_pools */
+struct mem_pool_shared {
+ unsigned int power_of_two;
+ /*
+ * Updates to these are *not* protected by a global lock, so races
+ * could occur and the numbers might be slightly off. Don't expect
+ * them to line up exactly. It's the general trends that matter, and
+ * it's not worth the locked-bus-cycle overhead to make these precise.
+ */
+ gf_atomic_t allocs_hot;
+ gf_atomic_t allocs_cold;
+ gf_atomic_t allocs_stdc;
+ gf_atomic_t frees_to_list;
+};
+
+void
+mem_pools_init(void); /* start the pool_sweeper thread */
+void
+mem_pools_fini(void); /* cleanup memory pools */
+void
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list);
+
+#endif /* GF_DISABLE_MEMPOOL */
+
+struct mem_pool *
+mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
+ unsigned long count, char *name);
+
+#define mem_pool_new(type, count) \
+ mem_pool_new_fn(THIS->ctx, sizeof(type), count, #type)
+
+#define mem_pool_new_ctx(ctx, type, count) \
+ mem_pool_new_fn(ctx, sizeof(type), count, #type)
+
+void
+mem_put(void *ptr);
+void *
+mem_get(struct mem_pool *pool);
+void *
+mem_get0(struct mem_pool *pool);
+
+void
+mem_pool_destroy(struct mem_pool *pool);
+
+void
+gf_mem_acct_enable_set(void *ctx);
+
+#endif /* _MEM_POOL_H */
diff --git a/libglusterfs/src/glusterfs/mem-types.h b/libglusterfs/src/glusterfs/mem-types.h
new file mode 100644
index 00000000000..d45d5b68c91
--- /dev/null
+++ b/libglusterfs/src/glusterfs/mem-types.h
@@ -0,0 +1,139 @@
+/*
+ Copyright (c) 2008-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __MEM_TYPES_H__
+#define __MEM_TYPES_H__
+
+enum gf_common_mem_types_ {
+ gf_common_mt_dnscache6, /* used only in one location */
+ gf_common_mt_event_pool,
+ gf_common_mt_reg,
+ gf_common_mt_pollfd, /* used only in one location */
+ gf_common_mt_fdentry_t, /* used only in one location */
+ gf_common_mt_fdtable_t, /* used only in one location */
+ gf_common_mt_fd_ctx, /* used only in one location */
+ gf_common_mt_gf_dirent_t,
+ gf_common_mt_inode_t, /* used only in one location */
+ gf_common_mt_inode_ctx, /* used only in one location */
+ gf_common_mt_list_head,
+ gf_common_mt_inode_table_t, /* used only in one location */
+ gf_common_mt_xlator_t,
+ gf_common_mt_xlator_list_t, /* used only in one location */
+ gf_common_mt_volume_opt_list_t,
+ gf_common_mt_gf_timer_t, /* used only in one location */
+ gf_common_mt_gf_timer_registry_t, /* used only in one location */
+ gf_common_mt_auth_handle_t, /* used only in one location */
+ gf_common_mt_iobuf, /* used only in one location */
+ gf_common_mt_iobuf_arena, /* used only in one location */
+ gf_common_mt_iobref, /* used only in one location */
+ gf_common_mt_iobuf_pool, /* used only in one location */
+ gf_common_mt_iovec,
+ gf_common_mt_memdup, /* used only in one location */
+ gf_common_mt_asprintf, /* used only in one location */
+ gf_common_mt_strdup,
+ gf_common_mt_socket_private_t, /* used only in one location */
+ gf_common_mt_ioq, /* used only in one location */
+ gf_common_mt_char,
+ gf_common_mt_rbthash_table_t, /* used only in one location */
+ gf_common_mt_rbthash_bucket, /* used only in one location */
+ gf_common_mt_mem_pool, /* used only in one location */
+ gf_common_mt_rpcsvc_auth_list, /* used only in one location */
+ gf_common_mt_rpcsvc_t, /* used only in one location */
+ gf_common_mt_rpcsvc_program_t, /* used only in one location */
+ gf_common_mt_rpcsvc_listener_t, /* used only in one location */
+ gf_common_mt_rpcsvc_wrapper_t, /* used only in one location */
+ gf_common_mt_rpcclnt_t, /* used only in one location */
+ gf_common_mt_rpcclnt_savedframe_t, /* used only in one location */
+ gf_common_mt_rpc_trans_t,
+ gf_common_mt_rpc_trans_pollin_t, /* used only in one location */
+ gf_common_mt_rpc_trans_reqinfo_t, /* used only in one location */
+ gf_common_mt_glusterfs_graph_t,
+ gf_common_mt_rdma_private_t, /* used only in one location */
+ gf_common_mt_rpc_transport_t, /* used only in one location */
+ gf_common_mt_rdma_post_t, /* used only in one location */
+ gf_common_mt_qpent, /* used only in one location */
+ gf_common_mt_rdma_device_t, /* used only in one location */
+ gf_common_mt_rdma_arena_mr, /* used only in one location */
+ gf_common_mt_sge, /* used only in one location */
+ gf_common_mt_rpcclnt_cb_program_t, /* used only in one location */
+ gf_common_mt_libxl_marker_local, /* used only in one location */
+ gf_common_mt_graph_buf, /* used only in one location */
+ gf_common_mt_trie_trie, /* used only in one location */
+ gf_common_mt_trie_data, /* used only in one location */
+ gf_common_mt_trie_node, /* used only in one location */
+ gf_common_mt_trie_buf, /* used only in one location */
+ gf_common_mt_run_argv, /* used only in one location */
+ gf_common_mt_run_logbuf, /* used only in one location */
+ gf_common_mt_fd_lk_ctx_t, /* used only in one location */
+ gf_common_mt_fd_lk_ctx_node_t, /* used only in one location */
+ gf_common_mt_buffer_t, /* used only in one location */
+ gf_common_mt_circular_buffer_t, /* used only in one location */
+ gf_common_mt_eh_t,
+ gf_common_mt_store_handle_t, /* used only in one location */
+ gf_common_mt_store_iter_t, /* used only in one location */
+ gf_common_mt_drc_client_t, /* used only in one location */
+ gf_common_mt_drc_globals_t, /* used only in one location */
+ gf_common_mt_groups_t,
+ gf_common_mt_cliententry_t, /* used only in one location */
+ gf_common_mt_clienttable_t, /* used only in one location */
+ gf_common_mt_client_t, /* used only in one location */
+ gf_common_mt_client_ctx, /* used only in one location */
+ gf_common_mt_auxgids, /* used only in one location */
+ gf_common_mt_syncopctx, /* used only in one location */
+ gf_common_mt_iobrefs, /* used only in one location */
+ gf_common_mt_gsync_status_t,
+ gf_common_mt_uuid_t,
+ gf_common_mt_mgmt_v3_lock_obj_t, /* used only in one location */
+ gf_common_mt_txn_opinfo_obj_t, /* used only in one location */
+ gf_common_mt_strfd_t, /* used only in one location */
+ gf_common_mt_strfd_data_t, /* used only in one location */
+ gf_common_mt_regex_t, /* used only in one location */
+ gf_common_mt_ereg, /* used only in one location */
+ gf_common_mt_wr, /* used only in one location */
+ gf_common_mt_dnscache, /* used only in one location */
+ gf_common_mt_dnscache_entry, /* used only in one location */
+ gf_common_mt_parser_t, /* used only in one location */
+ gf_common_quota_meta_t,
+ gf_common_mt_rbuf_t, /* used only in one location */
+ gf_common_mt_rlist_t, /* used only in one location */
+ gf_common_mt_rvec_t, /* used only in one location */
+ /* glusterd can load the nfs-xlator dynamically and needs these two */
+ gf_common_mt_nfs_netgroups, /* used only in one location */
+ gf_common_mt_nfs_exports, /* used only in one location */
+ gf_common_mt_gf_brick_spec_t, /* used only in one location */
+ gf_common_mt_int,
+ gf_common_mt_pointer,
+ gf_common_mt_synctask, /* used only in one location */
+ gf_common_mt_syncstack, /* used only in one location */
+ gf_common_mt_syncenv, /* used only in one location */
+ gf_common_mt_scan_data, /* used only in one location */
+ gf_common_list_node,
+ gf_mt_default_args_t, /* used only in one location */
+ gf_mt_default_args_cbk_t, /* used only in one location */
+ /*used for compound fops*/
+ gf_mt_compound_req_t, /* used only in one location */
+ gf_mt_compound_rsp_t, /* used only in one location */
+ gf_common_mt_tw_ctx, /* used only in one location */
+ gf_common_mt_tw_timer_list,
+ /*lock migration*/
+ gf_common_mt_lock_mig,
+ /* throttle */
+ gf_common_mt_tbf_t, /* used only in one location */
+ gf_common_mt_tbf_bucket_t, /* used only in one location */
+ gf_common_mt_tbf_throttle_t, /* used only in one location */
+ gf_common_mt_pthread_t, /* used only in one location */
+ gf_common_ping_local_t, /* used only in one location */
+ gf_common_volfile_t,
+ gf_common_mt_mgmt_v3_lock_timer_t, /* used only in one location */
+ gf_common_mt_server_cmdline_t, /* used only in one location */
+ gf_common_mt_latency_t,
+ gf_common_mt_end
+};
+#endif
diff --git a/libglusterfs/src/glusterfs/monitoring.h b/libglusterfs/src/glusterfs/monitoring.h
new file mode 100644
index 00000000000..09d9f54e734
--- /dev/null
+++ b/libglusterfs/src/glusterfs/monitoring.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __MONITORING_H__
+#define __MONITORING_H__
+
+#include "glusterfs/glusterfs.h"
+
+#define GLUSTER_METRICS_DIR "/var/run/gluster/metrics"
+
+char *
+gf_monitor_metrics(glusterfs_ctx_t *ctx);
+
+#endif /* __MONITORING_H__ */
diff --git a/libglusterfs/src/glusterfs/options.h b/libglusterfs/src/glusterfs/options.h
new file mode 100644
index 00000000000..747b13ba375
--- /dev/null
+++ b/libglusterfs/src/glusterfs/options.h
@@ -0,0 +1,327 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _OPTIONS_H
+#define _OPTIONS_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <inttypes.h>
+
+#include "glusterfs/xlator.h"
+#include "glusterfs/libglusterfs-messages.h"
+/* Add possible new type of option you may need */
+typedef enum {
+ GF_OPTION_TYPE_ANY = 0,
+ GF_OPTION_TYPE_STR,
+ GF_OPTION_TYPE_INT,
+ GF_OPTION_TYPE_SIZET,
+ GF_OPTION_TYPE_PERCENT,
+ GF_OPTION_TYPE_PERCENT_OR_SIZET,
+ GF_OPTION_TYPE_BOOL,
+ GF_OPTION_TYPE_XLATOR,
+ GF_OPTION_TYPE_PATH,
+ GF_OPTION_TYPE_TIME,
+ GF_OPTION_TYPE_DOUBLE,
+ GF_OPTION_TYPE_INTERNET_ADDRESS,
+ GF_OPTION_TYPE_INTERNET_ADDRESS_LIST,
+ GF_OPTION_TYPE_PRIORITY_LIST,
+ GF_OPTION_TYPE_SIZE_LIST,
+ GF_OPTION_TYPE_CLIENT_AUTH_ADDR,
+ GF_OPTION_TYPE_MAX,
+} volume_option_type_t;
+
+typedef enum {
+ GF_OPT_VALIDATE_BOTH = 0,
+ GF_OPT_VALIDATE_MIN,
+ GF_OPT_VALIDATE_MAX,
+} opt_validate_type_t;
+
+typedef enum {
+ OPT_FLAG_NONE = 0,
+ OPT_FLAG_SETTABLE = 1 << 0, /* can be set using volume set */
+ OPT_FLAG_CLIENT_OPT = 1 << 1, /* affects clients */
+ OPT_FLAG_GLOBAL = 1
+ << 2, /* affects all instances of the particular xlator */
+ OPT_FLAG_FORCE = 1 << 3, /* needs force to be reset */
+ OPT_FLAG_NEVER_RESET = 1 << 4, /* which should not be reset */
+ OPT_FLAG_DOC = 1 << 5, /* can be shown in volume set help */
+} opt_flags_t;
+
+typedef enum {
+ OPT_STATUS_ADVANCED = 0,
+ OPT_STATUS_BASIC = 1,
+ OPT_STATUS_EXPERIMENTAL = 2,
+ OPT_STATUS_DEPRECATED = 3,
+} opt_level_t;
+
+#define ZR_VOLUME_MAX_NUM_KEY 4
+#define ZR_OPTION_MAX_ARRAY_SIZE 64
+/* The maximum number of releases that an option could be backported to
+ * based on the release schedule as in August 2017 (3), plus one more
+ * Refer comment on volume_options.op_version for more information.
+ */
+#define GF_MAX_RELEASES 4
+
+/* Custom validation functoins for options
+ * TODO: Need to check what sorts of validation is being done, and decide if
+ * passing the volinfo is actually required. If it is, then we should possibly
+ * try a solution in GD2 for this.
+ */
+/* typedef int (*option_validation_fn) (glusterd_volinfo_t *volinfo, dict_t
+ *dict, char *key, char *value, char **op_errstr);
+ */
+
+/* Each translator should define this structure */
+/* XXX: This structure is in use by GD2, and SHOULD NOT be modified.
+ * If there is a need to add new members, add them to the end of the structure.
+ * If the struct must be modified, GD2 MUST be updated as well
+ */
+typedef struct volume_options {
+ char *key[ZR_VOLUME_MAX_NUM_KEY];
+ /* different key, same meaning */
+ volume_option_type_t type;
+ double min; /* 0 means no range */
+ double max; /* 0 means no range */
+ char *value[ZR_OPTION_MAX_ARRAY_SIZE];
+ /* If specified, will check for one of
+ the value from this array */
+ char *default_value;
+ char *description; /* about the key */
+ /* Required for int options where only the min value
+ * is given and is 0. This will cause validation not to
+ * happen
+ */
+ opt_validate_type_t validate;
+
+ /* The op-version at which this option was introduced.
+ * This is an array to support options that get backported to supported
+ * releases.
+ * Normally, an option introduced for a major release just has a single
+ * entry in the array, with op-version of the major release
+ * For an option that is backported, the op-versions of the all the
+ * releases it was ported to should be added, starting from the newest,
+ * to the oldest.
+ */
+ uint32_t op_version[GF_MAX_RELEASES];
+ /* The op-version at which this option was deprecated.
+ * Follows the same rules as above.
+ */
+ uint32_t deprecated[GF_MAX_RELEASES];
+ /* Additional flags for an option
+ * Check the OPT_FLAG_* enums for available flags
+ */
+ uint32_t flags;
+ /* Tags applicable to this option, which can be used to group similar
+ * options
+ */
+ char *tags[ZR_OPTION_MAX_ARRAY_SIZE];
+ /* A custom validation function if required
+ * TODO: See todo above for option_validation_fn
+ */
+ /* option_validation_fn validate_fn; */
+ /* This is actual key that should be set in the options dict. Can
+ * contain varstrings
+ */
+ char *setkey;
+
+ /* A 'level' is about the technical depth / understanding one
+ needs to handle the option. 'category' is based on
+ quality (ie, tests, people behind it, documentation available) */
+
+ /* The level at which the option is classified */
+ opt_level_t level;
+
+ /* Flag to understand how this option is categorized */
+ gf_category_t category;
+} volume_option_t;
+
+typedef struct vol_opt_list {
+ struct list_head list;
+ volume_option_t *given_opt;
+} volume_opt_list_t;
+
+int
+xlator_tree_reconfigure(xlator_t *old_xl, xlator_t *new_xl);
+int
+xlator_validate_rec(xlator_t *xlator, char **op_errstr);
+int
+graph_reconf_validateopt(glusterfs_graph_t *graph, char **op_errstr);
+int
+xlator_option_info_list(volume_opt_list_t *list, char *key, char **def_val,
+ char **descr);
+/*
+int validate_xlator_volume_options (xlator_t *xl, dict_t *options,
+ volume_option_t *opt, char **op_errstr);
+*/
+int
+xlator_options_validate_list(xlator_t *xl, dict_t *options,
+ volume_opt_list_t *list, char **op_errstr);
+int
+xlator_option_validate(xlator_t *xl, char *key, char *value,
+ volume_option_t *opt, char **op_errstr);
+int
+xlator_options_validate(xlator_t *xl, dict_t *options, char **errstr);
+
+int
+xlator_option_validate_addr_list(xlator_t *xl, const char *key,
+ const char *value, volume_option_t *opt,
+ char **op_errstr);
+
+volume_option_t *
+xlator_volume_option_get(xlator_t *xl, const char *key);
+
+volume_option_t *
+xlator_volume_option_get_list(volume_opt_list_t *vol_list, const char *key);
+
+#define DECLARE_INIT_OPT(type_t, type) \
+ int xlator_option_init_##type(xlator_t *this, dict_t *options, char *key, \
+ type_t *val_p);
+
+DECLARE_INIT_OPT(char *, str);
+DECLARE_INIT_OPT(uint64_t, uint64);
+DECLARE_INIT_OPT(int64_t, int64);
+DECLARE_INIT_OPT(uint32_t, uint32);
+DECLARE_INIT_OPT(int32_t, int32);
+DECLARE_INIT_OPT(uint64_t, size);
+DECLARE_INIT_OPT(uint64_t, size_uint64);
+DECLARE_INIT_OPT(double, percent);
+DECLARE_INIT_OPT(double, percent_or_size);
+DECLARE_INIT_OPT(gf_boolean_t, bool);
+DECLARE_INIT_OPT(xlator_t *, xlator);
+DECLARE_INIT_OPT(char *, path);
+DECLARE_INIT_OPT(double, double);
+DECLARE_INIT_OPT(uint32_t, time);
+
+#define DEFINE_INIT_OPT(type_t, type, conv) \
+ int xlator_option_init_##type(xlator_t *this, dict_t *options, char *key, \
+ type_t *val_p) \
+ { \
+ int ret = 0; \
+ volume_option_t *opt = NULL; \
+ char *def_value = NULL; \
+ char *set_value = NULL; \
+ char *value = NULL; \
+ xlator_t *old_THIS = NULL; \
+ \
+ opt = xlator_volume_option_get(this, key); \
+ if (!opt) { \
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ENTRY, \
+ "unknown option: %s", key); \
+ ret = -1; \
+ return ret; \
+ } \
+ def_value = opt->default_value; \
+ ret = dict_get_str(options, key, &set_value); \
+ \
+ if (def_value) \
+ value = def_value; \
+ if (set_value) \
+ value = set_value; \
+ if (!value) { \
+ gf_msg_trace(this->name, 0, "option %s not set", key); \
+ *val_p = (type_t)0; \
+ return 0; \
+ } \
+ if (value == def_value) { \
+ gf_msg_trace(this->name, 0, \
+ "option %s using default" \
+ " value %s", \
+ key, value); \
+ } else { \
+ gf_msg_debug(this->name, 0, \
+ "option %s using set" \
+ " value %s", \
+ key, value); \
+ } \
+ old_THIS = THIS; \
+ THIS = this; \
+ ret = conv(value, val_p); \
+ THIS = old_THIS; \
+ if (ret) { \
+ gf_msg(this->name, GF_LOG_INFO, 0, LG_MSG_CONVERSION_FAILED, \
+ "option %s conversion failed value %s", key, value); \
+ return ret; \
+ } \
+ ret = xlator_option_validate(this, key, value, opt, NULL); \
+ return ret; \
+ }
+
+#define GF_OPTION_INIT(key, val, type, err_label) \
+ do { \
+ int val_ret = 0; \
+ val_ret = xlator_option_init_##type(THIS, THIS->options, key, &(val)); \
+ if (val_ret) \
+ goto err_label; \
+ } while (0)
+
+#define DECLARE_RECONF_OPT(type_t, type) \
+ int xlator_option_reconf_##type(xlator_t *this, dict_t *options, \
+ char *key, int keylen, type_t *val_p);
+
+DECLARE_RECONF_OPT(char *, str);
+DECLARE_RECONF_OPT(uint64_t, uint64);
+DECLARE_RECONF_OPT(int64_t, int64);
+DECLARE_RECONF_OPT(uint32_t, uint32);
+DECLARE_RECONF_OPT(int32_t, int32);
+DECLARE_RECONF_OPT(uint64_t, size);
+DECLARE_RECONF_OPT(uint64_t, size_uint64);
+DECLARE_RECONF_OPT(double, percent);
+DECLARE_RECONF_OPT(double, percent_or_size);
+DECLARE_RECONF_OPT(gf_boolean_t, bool);
+DECLARE_RECONF_OPT(xlator_t *, xlator);
+DECLARE_RECONF_OPT(char *, path);
+DECLARE_RECONF_OPT(double, double);
+DECLARE_RECONF_OPT(uint32_t, time);
+
+#define DEFINE_RECONF_OPT(type_t, type, conv) \
+ int xlator_option_reconf_##type(xlator_t *this, dict_t *options, \
+ char *key, int keylen, type_t *val_p) \
+ { \
+ int ret = 0; \
+ char *value = NULL; \
+ xlator_t *old_THIS = NULL; \
+ \
+ volume_option_t *opt = xlator_volume_option_get(this, key); \
+ if (!opt) { \
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ENTRY, \
+ "unknown option: %s", key); \
+ return -1; \
+ } \
+ ret = dict_get_strn(options, key, keylen, &value); \
+ if (ret == 0 && value) { \
+ gf_msg(this->name, GF_LOG_INFO, 0, 0, \
+ "option %s using set value %s", key, value); \
+ } else if (opt->default_value) { \
+ value = opt->default_value; \
+ gf_msg_trace(this->name, 0, "option %s using default value %s", \
+ key, value); \
+ } else { \
+ gf_msg_trace(this->name, 0, "option %s not set", key); \
+ *val_p = (type_t)0; \
+ return 0; \
+ } \
+ \
+ old_THIS = THIS; \
+ THIS = this; \
+ ret = conv(value, val_p); \
+ THIS = old_THIS; \
+ if (ret) \
+ return ret; \
+ return xlator_option_validate(this, key, value, opt, NULL); \
+ }
+
+#define GF_OPTION_RECONF(key, val, opt, type, err_label) \
+ do { \
+ if (xlator_option_reconf_##type(THIS, opt, key, SLEN(key), &(val))) \
+ goto err_label; \
+ } while (0)
+
+#endif /* !_OPTIONS_H */
diff --git a/libglusterfs/src/glusterfs/parse-utils.h b/libglusterfs/src/glusterfs/parse-utils.h
new file mode 100644
index 00000000000..8653b9dd180
--- /dev/null
+++ b/libglusterfs/src/glusterfs/parse-utils.h
@@ -0,0 +1,50 @@
+/*
+ Copyright 2014-present Facebook. All Rights Reserved
+
+ This file is part of GlusterFS.
+
+ Author :
+ Shreyas Siravara <shreyas.siravara@gmail.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _PARSE_UTILS_H
+#define _PARSE_UTILS_H
+
+#include <regex.h>
+
+#define GF_PARSE "parse-utils"
+
+struct parser {
+ regex_t preg; /* Compiled regex */
+ regmatch_t pmatch[1]; /* The match */
+ char *complete_str; /* The string we are parsing */
+ char *regex; /* Regex used to parse the string */
+ char *_rstr; /* Temp string to hold offsets */
+};
+
+/* Initializes some of the parsers variables */
+struct parser *
+parser_init(const char *regex);
+
+/* Sets the string to parse */
+int
+parser_set_string(struct parser *parser, const char *complete_str);
+
+/* Frees memory used by the string after all matches are found */
+int
+parser_unset_string(struct parser *parser);
+
+/* Free memory used by the parser */
+void
+parser_deinit(struct parser *ptr);
+
+/* Get the next matching string */
+char *
+parser_get_next_match(struct parser *parser);
+
+#endif /* _PARSE_UTILS_H */
diff --git a/libglusterfs/src/glusterfs/quota-common-utils.h b/libglusterfs/src/glusterfs/quota-common-utils.h
new file mode 100644
index 00000000000..0096e340756
--- /dev/null
+++ b/libglusterfs/src/glusterfs/quota-common-utils.h
@@ -0,0 +1,68 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _QUOTA_COMMON_UTILS_H
+#define _QUOTA_COMMON_UTILS_H
+
+#include "glusterfs/iatt.h"
+
+#define GF_QUOTA_CONF_VERSION 1.2
+#define QUOTA_CONF_HEADER "GlusterFS Quota conf | version: v1.2\n"
+#define QUOTA_CONF_HEADER_1_1 "GlusterFS Quota conf | version: v1.1\n"
+
+typedef enum {
+ GF_QUOTA_CONF_TYPE_USAGE = 1,
+ GF_QUOTA_CONF_TYPE_OBJECTS
+} gf_quota_conf_type_t;
+
+struct _quota_limits {
+ int64_t hl;
+ int64_t sl;
+} __attribute__((__packed__));
+typedef struct _quota_limits quota_limits_t;
+
+struct _quota_meta {
+ int64_t size;
+ int64_t file_count;
+ int64_t dir_count;
+} __attribute__((__packed__));
+typedef struct _quota_meta quota_meta_t;
+
+gf_boolean_t
+quota_meta_is_null(const quota_meta_t *meta);
+
+int32_t
+quota_data_to_meta(data_t *data, quota_meta_t *meta);
+
+int32_t
+quota_dict_get_inode_meta(dict_t *dict, char *key, const int keylen,
+ quota_meta_t *meta);
+
+int32_t
+quota_dict_get_meta(dict_t *dict, char *key, const int keylen,
+ quota_meta_t *meta);
+
+int32_t
+quota_dict_set_meta(dict_t *dict, char *key, const quota_meta_t *meta,
+ ia_type_t ia_type);
+
+int32_t
+quota_conf_read_header(int fd, char *buf);
+
+int32_t
+quota_conf_read_version(int fd, float *version);
+
+int32_t
+quota_conf_read_gfid(int fd, void *buf, char *type, float version);
+
+int32_t
+quota_conf_skip_header(int fd);
+
+#endif /* _QUOTA_COMMON_UTILS_H */
diff --git a/libglusterfs/src/glusterfs/rbthash.h b/libglusterfs/src/glusterfs/rbthash.h
new file mode 100644
index 00000000000..4c731de69c2
--- /dev/null
+++ b/libglusterfs/src/glusterfs/rbthash.h
@@ -0,0 +1,75 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __RBTHASH_TABLE_H_
+#define __RBTHASH_TABLE_H_
+
+#include <stdint.h> // for uint32_t
+#include "glusterfs/glusterfs.h" // for gf_boolean_t, glusterfs_ctx_t
+#include "glusterfs/list.h" // for list_head
+#include "glusterfs/locking.h" // for gf_lock_t
+struct mem_pool;
+
+#define GF_RBTHASH_MEMPOOL 16384 // 1048576
+#define GF_RBTHASH "rbthash"
+
+struct rbthash_bucket {
+ struct rb_table *bucket;
+ gf_lock_t bucketlock;
+};
+
+typedef struct rbthash_entry {
+ void *data;
+ void *key;
+ int keylen;
+ uint32_t keyhash;
+ struct list_head list;
+} rbthash_entry_t;
+
+typedef uint32_t (*rbt_hasher_t)(void *data, int len);
+typedef void (*rbt_data_destroyer_t)(void *data);
+typedef void (*rbt_traverse_t)(void *data, void *mydata);
+
+typedef struct rbthash_table {
+ int size;
+ int numbuckets;
+ struct mem_pool *entrypool;
+ gf_lock_t tablelock;
+ struct rbthash_bucket *buckets;
+ rbt_hasher_t hashfunc;
+ rbt_data_destroyer_t dfunc;
+ gf_boolean_t pool_alloced;
+ struct list_head list;
+} rbthash_table_t;
+
+extern rbthash_table_t *
+rbthash_table_init(glusterfs_ctx_t *ctx, int buckets, rbt_hasher_t hfunc,
+ rbt_data_destroyer_t dfunc, unsigned long expected_entries,
+ struct mem_pool *entrypool);
+
+extern int
+rbthash_insert(rbthash_table_t *tbl, void *data, void *key, int keylen);
+
+extern void *
+rbthash_get(rbthash_table_t *tbl, void *key, int keylen);
+
+extern void *
+rbthash_remove(rbthash_table_t *tbl, void *key, int keylen);
+
+extern void *
+rbthash_replace(rbthash_table_t *tbl, void *key, int keylen, void *newdata);
+
+extern void
+rbthash_table_destroy(rbthash_table_t *tbl);
+
+extern void
+rbthash_table_traverse(rbthash_table_t *tbl, rbt_traverse_t traverse,
+ void *mydata);
+#endif
diff --git a/libglusterfs/src/glusterfs/refcount.h b/libglusterfs/src/glusterfs/refcount.h
new file mode 100644
index 00000000000..cf922dabb05
--- /dev/null
+++ b/libglusterfs/src/glusterfs/refcount.h
@@ -0,0 +1,101 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _REFCOUNT_H
+#define _REFCOUNT_H
+
+/* check for compiler support for __sync_*_and_fetch()
+ *
+ * a more comprehensive feature test is shown at
+ * http://lists.iptel.org/pipermail/semsdev/2010-October/005075.html
+ * this is sufficient for RHEL5 i386 builds
+ */
+#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 1)) && \
+ !defined(__i386__)
+#undef REFCOUNT_NEEDS_LOCK
+#else
+#define REFCOUNT_NEEDS_LOCK
+#include "glusterfs/locking.h"
+#endif /* compiler support for __sync_*_and_fetch() */
+
+typedef void (*gf_ref_release_t)(void *data);
+
+struct _gf_ref {
+#ifdef REFCOUNT_NEEDS_LOCK
+ gf_lock_t lk; /* lock for atomically adjust cnt */
+#endif
+ unsigned int cnt; /* number of users, free on 0 */
+
+ gf_ref_release_t release; /* cleanup when cnt == 0 */
+ void *data; /* parameter passed to release() */
+};
+typedef struct _gf_ref gf_ref_t;
+
+/* _gf_ref_get -- increase the refcount
+ *
+ * @return: greater then 0 when a reference was taken, 0 when not
+ */
+void *
+_gf_ref_get(gf_ref_t *ref);
+
+/* _gf_ref_put -- decrease the refcount
+ *
+ * @return: greater then 0 when there are still references, 0 when cleanup
+ * should be done, gf_ref_release_t is called on cleanup
+ */
+unsigned int
+_gf_ref_put(gf_ref_t *ref);
+
+/* _gf_ref_init -- initialize an embedded refcount object
+ *
+ * @release: function to call when the refcount == 0
+ * @data: parameter to be passed to @release
+ */
+void
+_gf_ref_init(gf_ref_t *ref, gf_ref_release_t release, void *data);
+
+/*
+ * Strong suggestion to use the simplified GF_REF_* API.
+ */
+
+/* GF_REF_DECL -- declaration to put inside your structure
+ *
+ * Example:
+ * struct my_struct {
+ * GF_REF_DECL;
+ *
+ * ... // additional members
+ * };
+ */
+#define GF_REF_DECL gf_ref_t _ref
+
+/* GF_REF_INIT -- initialize a GF_REF_DECL structure
+ *
+ * @p: allocated structure with GF_REF_DECL
+ * @d: destructor to call once refcounting reaches 0
+ *
+ * Sets the refcount to 1.
+ */
+#define GF_REF_INIT(p, d) _gf_ref_init(&(p)->_ref, (gf_ref_release_t)d, p)
+
+/* GF_REF_GET -- increase the refcount of a GF_REF_DECL structure
+ *
+ * @return: greater then 0 when a reference was taken, 0 when not
+ */
+#define GF_REF_GET(p) _gf_ref_get(&(p)->_ref)
+
+/* GF_REF_PUT -- decrease the refcount of a GF_REF_DECL structure
+ *
+ * @return: greater then 0 when there are still references, 0 when cleanup
+ * should be done, gf_ref_release_t is called on cleanup
+ */
+#define GF_REF_PUT(p) _gf_ref_put(&(p)->_ref)
+
+#endif /* _REFCOUNT_H */
diff --git a/libglusterfs/src/glusterfs/revision.h b/libglusterfs/src/glusterfs/revision.h
new file mode 100644
index 00000000000..3c404d30e78
--- /dev/null
+++ b/libglusterfs/src/glusterfs/revision.h
@@ -0,0 +1 @@
+#define GLUSTERFS_REPOSITORY_REVISION "git://git.gluster.org/glusterfs.git"
diff --git a/libglusterfs/src/glusterfs/rot-buffs.h b/libglusterfs/src/glusterfs/rot-buffs.h
new file mode 100644
index 00000000000..9dc227d58b8
--- /dev/null
+++ b/libglusterfs/src/glusterfs/rot-buffs.h
@@ -0,0 +1,125 @@
+/*
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __ROT_BUFFS_H
+#define __ROT_BUFFS_H
+
+#include "glusterfs/list.h"
+#include "glusterfs/locking.h"
+#include "glusterfs/common-utils.h"
+
+typedef struct rbuf_iovec {
+ struct iovec iov;
+
+ struct list_head list;
+} rbuf_iovec_t;
+
+#define RBUF_IOVEC_SIZE (sizeof(rbuf_iovec_t))
+
+typedef struct rbuf_list {
+ gf_lock_t c_lock;
+
+ pthread_mutex_t b_lock; /* protects this structure */
+ pthread_cond_t b_cond; /* signal for writer completion */
+
+ gf_boolean_t awaiting;
+
+ unsigned long long pending; /* pending writers */
+ unsigned long long completed; /* completed writers */
+
+ rbuf_iovec_t *rvec; /* currently used IO vector */
+
+ struct list_head veclist; /* list of attached rbuf_iov */
+
+ unsigned long long used; /* consumable entries
+ attached in ->veclist */
+ unsigned long long total; /* total entries in ->veclist (used
+ during deallocation) */
+
+ unsigned long seq[2]; /* if interested, this whould store
+ the start sequence number and the
+ range */
+
+ struct list_head list; /* attachment to rbuf_t */
+} rbuf_list_t;
+
+struct rlist_iter {
+ struct list_head veclist;
+
+ unsigned long long iter;
+};
+
+#define RLIST_ENTRY_COUNT(rlist) rlist->used
+
+#define rlist_iter_init(riter, rlist) \
+ do { \
+ (riter)->iter = rlist->used; \
+ (riter)->veclist = rlist->veclist; \
+ } while (0)
+
+#define rvec_for_each_entry(pos, riter) \
+ for (pos = list_entry((riter)->veclist.next, typeof(*pos), list); \
+ (riter)->iter > 0; \
+ pos = list_entry(pos->list.next, typeof(*pos), list), \
+ --((riter)->iter))
+
+/**
+ * Sequence number assignment routine is called during buffer
+ * switch under rbuff ->lock.
+ */
+typedef void(sequence_fn)(rbuf_list_t *, void *);
+
+#define RLIST_STORE_SEQ(rlist, start, range) \
+ do { \
+ rlist->seq[0] = start; \
+ rlist->seq[1] = range; \
+ } while (0)
+
+#define RLIST_GET_SEQ(rlist, start, range) \
+ do { \
+ start = rlist->seq[0]; \
+ range = rlist->seq[1]; \
+ } while (0)
+
+typedef struct rbuf {
+ gf_lock_t lock; /* protects "current" rlist */
+
+ rbuf_list_t *current; /* cached pointer to first free rlist */
+
+ struct list_head freelist;
+} rbuf_t;
+
+typedef enum {
+ RBUF_CONSUMABLE = 1,
+ RBUF_BUSY,
+ RBUF_EMPTY,
+ RBUF_WOULD_STARVE,
+} rlist_retval_t;
+
+/* Initialization/Destruction */
+rbuf_t *
+rbuf_init(int);
+void
+rbuf_dtor(rbuf_t *);
+
+/* Producer API */
+char *
+rbuf_reserve_write_area(rbuf_t *, size_t, void **);
+int
+rbuf_write_complete(void *);
+
+/* Consumer API */
+int
+rbuf_get_buffer(rbuf_t *, void **, sequence_fn *, void *);
+int
+rbuf_wait_for_completion(rbuf_t *, void *, void (*)(rbuf_list_t *, void *),
+ void *);
+
+#endif
diff --git a/libglusterfs/src/run.h b/libglusterfs/src/glusterfs/run.h
index 508c59c1375..76af95fd27f 100644
--- a/libglusterfs/src/run.h
+++ b/libglusterfs/src/glusterfs/run.h
@@ -14,12 +14,12 @@
#define RUN_PIPE -1
struct runner {
- char **argv;
- unsigned argvlen;
- int runerr;
- pid_t chpid;
- int chfd[3];
- FILE *chio[3];
+ char **argv;
+ unsigned argvlen;
+ int runerr;
+ pid_t chpid;
+ int chfd[3];
+ FILE *chio[3];
};
typedef struct runner runner_t;
@@ -29,7 +29,8 @@ typedef struct runner runner_t;
*
* @param runner pointer to runner_t instance
*/
-void runinit (runner_t *runner);
+void
+runinit(runner_t *runner);
/**
* get FILE pointer to which child's stdio is redirected.
@@ -40,7 +41,8 @@ void runinit (runner_t *runner);
*
* @see runner_redir()
*/
-FILE *runner_chio (runner_t *runner, int fd);
+FILE *
+runner_chio(runner_t *runner, int fd);
/**
* add an argument.
@@ -52,7 +54,8 @@ FILE *runner_chio (runner_t *runner, int fd);
* @param runner pointer to runner_t instance
* @param arg command line argument
*/
-void runner_add_arg (runner_t *runner, const char *arg);
+void
+runner_add_arg(runner_t *runner, const char *arg);
/**
* add a sequence of arguments.
@@ -66,7 +69,8 @@ void runner_add_arg (runner_t *runner, const char *arg);
*
* @see runner_add_arg()
*/
-void runner_add_args (runner_t *runner, ...);
+void
+runner_add_args(runner_t *runner, ...);
/**
* add an argument with printf style formatting.
@@ -76,8 +80,9 @@ void runner_add_args (runner_t *runner, ...);
* @param runner pointer to runner_t instance
* @param format printf style format specifier
*/
-void runner_argprintf (runner_t *runner, const char *format, ...);
-
+void
+runner_argprintf(runner_t *runner, const char *format, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
/**
* log a message about the command to be run.
*
@@ -89,8 +94,9 @@ void runner_argprintf (runner_t *runner, const char *format, ...);
*
* @see gf_log()
*/
-void runner_log (runner_t *runner, const char *dom, gf_loglevel_t lvl,
- const char *msg);
+void
+runner_log(runner_t *runner, const char *dom, gf_loglevel_t lvl,
+ const char *msg);
/**
* set up redirection for child.
@@ -111,19 +117,20 @@ void runner_log (runner_t *runner, const char *dom, gf_loglevel_t lvl,
* @see runner_start(), dup(2), runner_chio(), runner_start()
*/
void
-runner_redir (runner_t *runner, int fd, int tgt_fd);
+runner_redir(runner_t *runner, int fd, int tgt_fd);
/**
* spawn child with accumulated arg list.
*
* @param runner pointer to runner_t instance
*
- * @return 0 on succesful spawn
+ * @return 0 on successful spawn
* -1 on failure (either due to earlier errors or execve(2) failing)
*
* @see runner_cout()
*/
-int runner_start (runner_t *runner);
+int
+runner_start(runner_t *runner);
/**
* complete operation and free resources.
@@ -140,7 +147,8 @@ int runner_start (runner_t *runner);
*
* @see waitpid(2)
*/
-int runner_end (runner_t *runner);
+int
+runner_end(runner_t *runner);
/**
* variant of runner_end() which does not free internal data
@@ -148,7 +156,8 @@ int runner_end (runner_t *runner);
*
* @see runner_end()
*/
-int runner_end_reuse (runner_t *runner);
+int
+runner_end_reuse(runner_t *runner);
/**
* spawn and child, take it to completion and free resources.
@@ -163,7 +172,15 @@ int runner_end_reuse (runner_t *runner);
*
* @see runner_start(), runner_end()
*/
-int runner_run (runner_t *runner);
+int
+runner_run(runner_t *runner);
+
+/**
+ * variant for runner_run() which does not wait for acknowledgement
+ * from child, and always assumes it succeeds.
+ */
+int
+runner_run_nowait(runner_t *runner);
/**
* variant of runner_run() which does not free internal data
@@ -171,7 +188,8 @@ int runner_run (runner_t *runner);
*
* @see runner_run()
*/
-int runner_run_reuse (runner_t *runner);
+int
+runner_run_reuse(runner_t *runner);
/**
* run a command with args.
@@ -183,6 +201,7 @@ int runner_run_reuse (runner_t *runner);
* @return 0 on success
* -1 on failure
*/
-int runcmd (const char *arg, ...);
+int
+runcmd(const char *arg, ...);
#endif
diff --git a/libglusterfs/src/glusterfs/stack.h b/libglusterfs/src/glusterfs/stack.h
new file mode 100644
index 00000000000..536a330d38b
--- /dev/null
+++ b/libglusterfs/src/glusterfs/stack.h
@@ -0,0 +1,555 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/*
+ This file defines MACROS and static inlines used to emulate a function
+ call over asynchronous communication with remote server
+*/
+
+#ifndef _STACK_H
+#define _STACK_H
+
+struct _call_stack;
+typedef struct _call_stack call_stack_t;
+struct _call_frame;
+typedef struct _call_frame call_frame_t;
+struct call_pool;
+typedef struct call_pool call_pool_t;
+
+#include <sys/time.h>
+
+#include "glusterfs/xlator.h"
+#include "glusterfs/dict.h"
+#include "glusterfs/list.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/lkowner.h"
+#include "glusterfs/client_t.h"
+#include "glusterfs/libglusterfs-messages.h"
+#include "glusterfs/timespec.h"
+
+#define NFS_PID 1
+#define LOW_PRIO_PROC_PID -1
+
+#define STACK_ERR_XL_NAME(stack) (stack->err_xl ? stack->err_xl->name : "-")
+#define STACK_CLIENT_NAME(stack) \
+ (stack->client ? stack->client->client_uid : "-")
+
+typedef int32_t (*ret_fn_t)(call_frame_t *frame, call_frame_t *prev_frame,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ ...);
+
+void
+gf_frame_latency_update(call_frame_t *frame);
+
+struct call_pool {
+ union {
+ struct list_head all_frames;
+ struct {
+ call_stack_t *next_call;
+ call_stack_t *prev_call;
+ } all_stacks;
+ };
+ int64_t cnt;
+ gf_atomic_t total_count;
+ gf_lock_t lock;
+ struct mem_pool *frame_mem_pool;
+ struct mem_pool *stack_mem_pool;
+};
+
+struct _call_frame {
+ call_stack_t *root; /* stack root */
+ call_frame_t *parent; /* previous BP */
+ struct list_head frames;
+ void *local; /* local variables */
+ xlator_t *this; /* implicit object */
+ ret_fn_t ret; /* op_return address */
+ int32_t ref_count;
+ gf_lock_t lock;
+ void *cookie; /* unique cookie */
+ gf_boolean_t complete;
+
+ glusterfs_fop_t op;
+ struct timespec begin; /* when this frame was created */
+ struct timespec end; /* when this frame completed */
+ const char *wind_from;
+ const char *wind_to;
+ const char *unwind_from;
+ const char *unwind_to;
+};
+
+struct _ns_info {
+ uint32_t hash; /* Hash of the namespace from SuperFastHash */
+ gf_boolean_t found; /* Set to true if we found a namespace */
+};
+
+typedef struct _ns_info ns_info_t;
+
+#define SMALL_GROUP_COUNT 128
+
+struct _call_stack {
+ union {
+ struct list_head all_frames;
+ struct {
+ call_stack_t *next_call;
+ call_stack_t *prev_call;
+ };
+ };
+ call_pool_t *pool;
+ gf_lock_t stack_lock;
+ client_t *client;
+ uint64_t unique;
+ void *state; /* pointer to request state */
+ uid_t uid;
+ gid_t gid;
+ pid_t pid;
+ char identifier[UNIX_PATH_MAX];
+ uint16_t ngrps;
+ uint32_t groups_small[SMALL_GROUP_COUNT];
+ uint32_t *groups_large;
+ uint32_t *groups;
+ gf_lkowner_t lk_owner;
+ glusterfs_ctx_t *ctx;
+
+ struct list_head myframes; /* List of call_frame_t that go
+ to make the call stack */
+
+ int32_t op;
+ int8_t type;
+ struct timespec tv;
+ xlator_t *err_xl;
+ int32_t error;
+
+ uint32_t flags; /* use it wisely, think of it as a mechanism to
+ send information over the wire too */
+ struct timespec ctime; /* timestamp, most probably set at
+ creation of stack. */
+
+ ns_info_t ns_info;
+};
+
+/* call_stack flags field users */
+#define MDATA_CTIME (1 << 0)
+#define MDATA_MTIME (1 << 1)
+#define MDATA_ATIME (1 << 2)
+#define MDATA_PAR_CTIME (1 << 3)
+#define MDATA_PAR_MTIME (1 << 4)
+#define MDATA_PAR_ATIME (1 << 5)
+
+#define frame_set_uid_gid(frm, u, g) \
+ do { \
+ if (frm) { \
+ (frm)->root->uid = u; \
+ (frm)->root->gid = g; \
+ (frm)->root->ngrps = 0; \
+ } \
+ } while (0);
+
+struct xlator_fops;
+
+static inline void
+FRAME_DESTROY(call_frame_t *frame)
+{
+ void *local = NULL;
+
+ if (frame->root->ctx->measure_latency)
+ gf_frame_latency_update(frame);
+
+ list_del_init(&frame->frames);
+ if (frame->local) {
+ local = frame->local;
+ frame->local = NULL;
+ }
+
+ LOCK_DESTROY(&frame->lock);
+ mem_put(frame);
+
+ if (local)
+ mem_put(local);
+}
+
+static inline void
+STACK_DESTROY(call_stack_t *stack)
+{
+ call_frame_t *frame = NULL;
+ call_frame_t *tmp = NULL;
+
+ LOCK(&stack->pool->lock);
+ {
+ list_del_init(&stack->all_frames);
+ stack->pool->cnt--;
+ }
+ UNLOCK(&stack->pool->lock);
+
+ LOCK_DESTROY(&stack->stack_lock);
+
+ list_for_each_entry_safe(frame, tmp, &stack->myframes, frames)
+ {
+ FRAME_DESTROY(frame);
+ }
+
+ GF_FREE(stack->groups_large);
+
+ mem_put(stack);
+}
+
+static inline void
+STACK_RESET(call_stack_t *stack)
+{
+ call_frame_t *frame = NULL;
+ call_frame_t *tmp = NULL;
+ call_frame_t *last = NULL;
+ struct list_head toreset = {0};
+
+ INIT_LIST_HEAD(&toreset);
+
+ /* We acquire call_pool->lock only to remove the frames from this stack
+ * to preserve atomicity. This synchronizes across concurrent requests
+ * like statedump, STACK_DESTROY etc. */
+
+ LOCK(&stack->pool->lock);
+ {
+ last = list_last_entry(&stack->myframes, call_frame_t, frames);
+ list_del_init(&last->frames);
+ list_splice_init(&stack->myframes, &toreset);
+ list_add(&last->frames, &stack->myframes);
+ }
+ UNLOCK(&stack->pool->lock);
+
+ list_for_each_entry_safe(frame, tmp, &toreset, frames)
+ {
+ FRAME_DESTROY(frame);
+ }
+}
+
+#define FRAME_SU_DO(frm, local_type) \
+ do { \
+ local_type *__local = (frm)->local; \
+ __local->uid = frm->root->uid; \
+ __local->gid = frm->root->gid; \
+ __local->pid = frm->root->pid; \
+ frm->root->uid = 0; \
+ frm->root->gid = 0; \
+ frm->root->pid = GF_CLIENT_PID_NO_ROOT_SQUASH; \
+ } while (0);
+
+#define FRAME_SU_UNDO(frm, local_type) \
+ do { \
+ local_type *__local = (frm)->local; \
+ frm->root->uid = __local->uid; \
+ frm->root->gid = __local->gid; \
+ frm->root->pid = __local->pid; \
+ } while (0);
+
+/* NOTE: make sure to keep this as an macro, mainly because, we need 'fn'
+ field here to be the proper fn ptr, so its address is valid entry in
+ 'xlator_fops' struct.
+ To understand this, check the `xlator.h:struct xlator_fops`, and then
+ see a STACK_WIND call, which generally calls `subvol->fops->fop`, so
+ the address offset should give the index */
+
+/* +1 is required as 0 means NULL fop, and we don't have a variable for it */
+#define get_fop_index_from_fn(xl, fn) \
+ (1 + (((long)&(fn) - (long)&((xl)->fops->stat)) / sizeof(void *)))
+
+/* NOTE: the above reason holds good here too. But notice that we are getting
+ the base address of the 'stat' fop, which is the first entry in the fop
+ structure. All we need to do is move as much as 'idx' fields, and get the
+ actual pointer from that field. */
+
+static inline void *
+get_the_pt_fop(void *base_fop, int fop_idx)
+{
+ void *target_addr = (base_fop + ((fop_idx - 1) * sizeof(void *)));
+ /* all below type casting is for not getting warning. */
+ return (void *)*(unsigned long *)target_addr;
+}
+
+/* make a call without switching frames */
+#define STACK_WIND_TAIL(frame, obj, fn, params...) \
+ do { \
+ xlator_t *old_THIS = NULL; \
+ xlator_t *next_xl = obj; \
+ typeof(fn) next_xl_fn = fn; \
+ int opn = get_fop_index_from_fn((next_xl), (fn)); \
+ \
+ frame->this = next_xl; \
+ frame->wind_to = #fn; \
+ old_THIS = THIS; \
+ THIS = next_xl; \
+ gf_msg_trace("stack-trace", 0, \
+ "stack-address: %p, " \
+ "winding from %s to %s", \
+ frame->root, old_THIS->name, THIS->name); \
+ /* Need to capture counts at leaf node */ \
+ if (!next_xl->pass_through && !next_xl->children) { \
+ GF_ATOMIC_INC(next_xl->stats.total.metrics[opn].fop); \
+ GF_ATOMIC_INC(next_xl->stats.interval.metrics[opn].fop); \
+ GF_ATOMIC_INC(next_xl->stats.total.count); \
+ GF_ATOMIC_INC(next_xl->stats.interval.count); \
+ } \
+ \
+ if (next_xl->pass_through) { \
+ next_xl_fn = get_the_pt_fop(&next_xl->pass_through_fops->stat, \
+ opn); \
+ } \
+ next_xl_fn(frame, next_xl, params); \
+ THIS = old_THIS; \
+ } while (0)
+
+/* make a call */
+#define STACK_WIND(frame, rfn, obj, fn, params...) \
+ STACK_WIND_COMMON(frame, rfn, 0, NULL, obj, fn, params)
+
+/* make a call with a cookie */
+#define STACK_WIND_COOKIE(frame, rfn, cky, obj, fn, params...) \
+ STACK_WIND_COMMON(frame, rfn, 1, cky, obj, fn, params)
+
+/* Cookie passed as the argument can be NULL (ptr) or 0 (int). Hence we
+ have to have a mechanism to separate out the two STACK_WIND formats.
+ Needed a common macro, as other than for cookie, all the other code
+ is common across.
+ */
+#define STACK_WIND_COMMON(frame, rfn, has_cookie, cky, obj, fn, params...) \
+ do { \
+ call_frame_t *_new = NULL; \
+ xlator_t *old_THIS = NULL; \
+ typeof(fn) next_xl_fn = fn; \
+ \
+ _new = mem_get0(frame->root->pool->frame_mem_pool); \
+ if (!_new) { \
+ break; \
+ } \
+ typeof(fn##_cbk) tmp_cbk = rfn; \
+ _new->root = frame->root; \
+ _new->this = obj; \
+ _new->ret = (ret_fn_t)tmp_cbk; \
+ _new->parent = frame; \
+ /* (void *) is required for avoiding gcc warning */ \
+ _new->cookie = ((has_cookie == 1) ? (void *)(cky) : (void *)_new); \
+ _new->wind_from = __FUNCTION__; \
+ _new->wind_to = #fn; \
+ _new->unwind_to = #rfn; \
+ LOCK_INIT(&_new->lock); \
+ LOCK(&frame->root->stack_lock); \
+ { \
+ list_add(&_new->frames, &frame->root->myframes); \
+ frame->ref_count++; \
+ } \
+ UNLOCK(&frame->root->stack_lock); \
+ fn##_cbk = rfn; \
+ old_THIS = THIS; \
+ THIS = obj; \
+ gf_msg_trace("stack-trace", 0, \
+ "stack-address: %p, " \
+ "winding from %s to %s", \
+ frame->root, old_THIS->name, THIS->name); \
+ if (obj->ctx->measure_latency) \
+ timespec_now(&_new->begin); \
+ _new->op = get_fop_index_from_fn((_new->this), (fn)); \
+ if (!obj->pass_through) { \
+ GF_ATOMIC_INC(obj->stats.total.metrics[_new->op].fop); \
+ GF_ATOMIC_INC(obj->stats.interval.metrics[_new->op].fop); \
+ GF_ATOMIC_INC(obj->stats.total.count); \
+ GF_ATOMIC_INC(obj->stats.interval.count); \
+ } else { \
+ /* we want to get to the actual fop to call */ \
+ next_xl_fn = get_the_pt_fop(&obj->pass_through_fops->stat, \
+ _new->op); \
+ } \
+ next_xl_fn(_new, obj, params); \
+ THIS = old_THIS; \
+ } while (0)
+
+#define STACK_UNWIND STACK_UNWIND_STRICT
+
+/* return from function in type-safe way */
+#define STACK_UNWIND_STRICT(fop, frame, op_ret, op_errno, params...) \
+ do { \
+ fop_##fop##_cbk_t fn = NULL; \
+ call_frame_t *_parent = NULL; \
+ xlator_t *old_THIS = NULL; \
+ \
+ if (!frame) { \
+ gf_msg("stack", GF_LOG_CRITICAL, 0, LG_MSG_FRAME_ERROR, "!frame"); \
+ break; \
+ } \
+ if ((op_ret) < 0) { \
+ gf_msg_debug("stack-trace", op_errno, \
+ "stack-address: %p, " \
+ "%s returned %d error: %s", \
+ frame->root, THIS->name, (int32_t)(op_ret), \
+ strerror(op_errno)); \
+ } else { \
+ gf_msg_trace("stack-trace", 0, \
+ "stack-address: %p, " \
+ "%s returned %d", \
+ frame->root, THIS->name, (int32_t)(op_ret)); \
+ } \
+ fn = (fop_##fop##_cbk_t)frame->ret; \
+ _parent = frame->parent; \
+ LOCK(&frame->root->stack_lock); \
+ { \
+ _parent->ref_count--; \
+ if ((op_ret) < 0 && (op_errno) != frame->root->error) { \
+ frame->root->err_xl = frame->this; \
+ frame->root->error = (op_errno); \
+ } else if ((op_ret) == 0) { \
+ frame->root->err_xl = NULL; \
+ frame->root->error = 0; \
+ } \
+ } \
+ UNLOCK(&frame->root->stack_lock); \
+ old_THIS = THIS; \
+ THIS = _parent->this; \
+ frame->complete = _gf_true; \
+ frame->unwind_from = __FUNCTION__; \
+ if (frame->this->ctx->measure_latency) { \
+ timespec_now(&frame->end); \
+ /* required for top most xlator */ \
+ if (_parent->ret == NULL) \
+ timespec_now(&_parent->end); \
+ } \
+ if (op_ret < 0) { \
+ GF_ATOMIC_INC(THIS->stats.total.metrics[frame->op].cbk); \
+ GF_ATOMIC_INC(THIS->stats.interval.metrics[frame->op].cbk); \
+ } \
+ fn(_parent, frame->cookie, _parent->this, op_ret, op_errno, params); \
+ THIS = old_THIS; \
+ } while (0)
+
+static inline int
+call_stack_alloc_groups(call_stack_t *stack, int ngrps)
+{
+ if (ngrps <= SMALL_GROUP_COUNT) {
+ stack->groups = stack->groups_small;
+ } else {
+ GF_FREE(stack->groups_large);
+ stack->groups_large = GF_CALLOC(ngrps, sizeof(gid_t),
+ gf_common_mt_groups_t);
+ if (!stack->groups_large)
+ return -1;
+ stack->groups = stack->groups_large;
+ }
+
+ stack->ngrps = ngrps;
+
+ return 0;
+}
+
+static inline int
+call_stack_groups_capacity(call_stack_t *stack)
+{
+ return max(stack->ngrps, SMALL_GROUP_COUNT);
+}
+
+static inline int
+call_frames_count(call_stack_t *call_stack)
+{
+ call_frame_t *pos;
+ int32_t count = 0;
+
+ if (!call_stack)
+ return count;
+
+ list_for_each_entry(pos, &call_stack->myframes, frames) count++;
+
+ return count;
+}
+
+static inline call_frame_t *
+copy_frame(call_frame_t *frame)
+{
+ call_stack_t *newstack = NULL;
+ call_stack_t *oldstack = NULL;
+ call_frame_t *newframe = NULL;
+
+ if (!frame) {
+ return NULL;
+ }
+
+ newstack = mem_get0(frame->root->pool->stack_mem_pool);
+ if (newstack == NULL) {
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&newstack->myframes);
+
+ newframe = mem_get0(frame->root->pool->frame_mem_pool);
+ if (!newframe) {
+ mem_put(newstack);
+ return NULL;
+ }
+
+ newframe->this = frame->this;
+ newframe->root = newstack;
+ INIT_LIST_HEAD(&newframe->frames);
+ list_add(&newframe->frames, &newstack->myframes);
+
+ oldstack = frame->root;
+
+ newstack->uid = oldstack->uid;
+ newstack->gid = oldstack->gid;
+ newstack->pid = oldstack->pid;
+ newstack->op = oldstack->op;
+ newstack->type = oldstack->type;
+ newstack->ctime = oldstack->ctime;
+ newstack->flags = oldstack->flags;
+ if (call_stack_alloc_groups(newstack, oldstack->ngrps) != 0) {
+ mem_put(newstack);
+ return NULL;
+ }
+ if (!oldstack->groups) {
+ gf_msg_debug("stack", EINVAL, "groups is null (ngrps: %d)",
+ oldstack->ngrps);
+ /* Considering 'groups' is NULL, set ngrps to 0 */
+ oldstack->ngrps = 0;
+
+ if (oldstack->groups_large)
+ oldstack->groups = oldstack->groups_large;
+ else
+ oldstack->groups = oldstack->groups_small;
+ }
+ newstack->ngrps = oldstack->ngrps;
+ memcpy(newstack->groups, oldstack->groups, sizeof(gid_t) * oldstack->ngrps);
+ newstack->unique = oldstack->unique;
+ newstack->pool = oldstack->pool;
+ newstack->lk_owner = oldstack->lk_owner;
+ newstack->ctx = oldstack->ctx;
+
+ if (newstack->ctx->measure_latency) {
+ timespec_now(&newstack->tv);
+ memcpy(&newframe->begin, &newstack->tv, sizeof(newstack->tv));
+ }
+
+ LOCK_INIT(&newframe->lock);
+ LOCK_INIT(&newstack->stack_lock);
+
+ LOCK(&oldstack->pool->lock);
+ {
+ list_add(&newstack->all_frames, &oldstack->all_frames);
+ newstack->pool->cnt++;
+ }
+ UNLOCK(&oldstack->pool->lock);
+ GF_ATOMIC_INC(newstack->pool->total_count);
+
+ return newframe;
+}
+
+void
+call_stack_set_groups(call_stack_t *stack, int ngrps, gid_t **groupbuf_p);
+void
+gf_proc_dump_pending_frames(call_pool_t *call_pool);
+void
+gf_proc_dump_pending_frames_to_dict(call_pool_t *call_pool, dict_t *dict);
+call_frame_t *
+create_frame(xlator_t *xl, call_pool_t *pool);
+gf_boolean_t
+__is_fuse_call(call_frame_t *frame);
+#endif /* _STACK_H */
diff --git a/libglusterfs/src/glusterfs/statedump.h b/libglusterfs/src/glusterfs/statedump.h
new file mode 100644
index 00000000000..ce082706bdf
--- /dev/null
+++ b/libglusterfs/src/glusterfs/statedump.h
@@ -0,0 +1,132 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef STATEDUMP_H
+#define STATEDUMP_H
+
+#include <stdarg.h>
+#include "glusterfs/inode.h"
+#include "glusterfs/strfd.h"
+
+#define GF_DUMP_MAX_BUF_LEN 4096
+
+typedef struct gf_dump_xl_options_ {
+ gf_boolean_t dump_priv;
+ gf_boolean_t dump_inode;
+ gf_boolean_t dump_fd;
+ gf_boolean_t dump_inodectx;
+ gf_boolean_t dump_fdctx;
+ gf_boolean_t dump_history;
+} gf_dump_xl_options_t;
+
+typedef struct gf_dump_options_ {
+ gf_boolean_t dump_mem;
+ gf_boolean_t dump_iobuf;
+ gf_boolean_t dump_callpool;
+ gf_dump_xl_options_t xl_options; // options for all xlators
+ char *dump_path;
+} gf_dump_options_t;
+
+extern gf_dump_options_t dump_options;
+
+__attribute__((__format__(__printf__, 3, 4))) static inline void
+_gf_proc_dump_build_key(char *key, const char *prefix, const char *fmt, ...)
+{
+ va_list ap;
+ int32_t len;
+
+ len = snprintf(key, GF_DUMP_MAX_BUF_LEN, "%s.", prefix);
+ if (len >= 0) {
+ va_start(ap, fmt);
+ len = vsnprintf(key + len, GF_DUMP_MAX_BUF_LEN - len, fmt, ap);
+ va_end(ap);
+ }
+ if (len < 0) {
+ *key = 0;
+ }
+}
+
+#define gf_proc_dump_build_key(key, key_prefix, fmt...) \
+ { \
+ _gf_proc_dump_build_key(key, key_prefix, ##fmt); \
+ }
+
+#define GF_PROC_DUMP_SET_OPTION(opt, val) opt = val
+
+#define GF_CHECK_DUMP_OPTION_ENABLED(option_dump, var, label) \
+ do { \
+ if (option_dump == _gf_true) { \
+ var = _gf_false; \
+ goto label; \
+ } \
+ } while (0);
+
+void
+gf_proc_dump_init();
+
+void
+gf_proc_dump_fini(void);
+
+void
+gf_proc_dump_cleanup(void);
+
+void
+gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx);
+
+int
+gf_proc_dump_add_section(char *key, ...)
+ __attribute__((__format__(__printf__, 1, 2)));
+
+int
+gf_proc_dump_write(char *key, char *value, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+
+void
+inode_table_dump(inode_table_t *itable, char *prefix);
+
+void
+inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict);
+
+void
+fdtable_dump(fdtable_t *fdtable, char *prefix);
+
+void
+fdtable_dump_to_dict(fdtable_t *fdtable, char *prefix, dict_t *dict);
+
+void
+inode_dump(inode_t *inode, char *prefix);
+
+void
+gf_proc_dump_mem_info_to_dict(dict_t *dict);
+
+void
+gf_proc_dump_mempool_info_to_dict(glusterfs_ctx_t *ctx, dict_t *dict);
+
+void
+glusterd_init(int sig);
+
+void
+gf_proc_dump_xlator_private(xlator_t *this, strfd_t *strfd);
+
+void
+gf_proc_dump_mallinfo(strfd_t *strfd);
+
+void
+gf_proc_dump_xlator_history(xlator_t *this, strfd_t *strfd);
+
+void
+gf_proc_dump_xlator_meminfo(xlator_t *this, strfd_t *strfd);
+
+void
+gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd);
+
+void
+gf_latency_statedump_and_reset(char *key, gf_latency_t *lat);
+#endif /* STATEDUMP_H */
diff --git a/libglusterfs/src/glusterfs/store.h b/libglusterfs/src/glusterfs/store.h
new file mode 100644
index 00000000000..a1f70c7b840
--- /dev/null
+++ b/libglusterfs/src/glusterfs/store.h
@@ -0,0 +1,112 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_STORE_H_
+#define _GLUSTERD_STORE_H_
+
+#include "glusterfs/compat.h"
+#include "glusterfs/glusterfs.h"
+
+struct gf_store_handle_ {
+ char *path;
+ int fd;
+ int tmp_fd;
+ FILE *read;
+ int locked; /* state of lockf() */
+};
+
+typedef struct gf_store_handle_ gf_store_handle_t;
+
+struct gf_store_iter_ {
+ FILE *file;
+ char filepath[PATH_MAX];
+};
+
+typedef struct gf_store_iter_ gf_store_iter_t;
+
+typedef enum {
+ GD_STORE_SUCCESS,
+ GD_STORE_KEY_NULL,
+ GD_STORE_VALUE_NULL,
+ GD_STORE_KEY_VALUE_NULL,
+ GD_STORE_EOF,
+ GD_STORE_ENOMEM,
+ GD_STORE_STAT_FAILED
+} gf_store_op_errno_t;
+
+int32_t
+gf_store_mkdir(char *path);
+
+int32_t
+gf_store_handle_create_on_absence(gf_store_handle_t **shandle, char *path);
+
+int32_t
+gf_store_mkstemp(gf_store_handle_t *shandle);
+
+int
+gf_store_sync_direntry(char *path);
+
+int32_t
+gf_store_rename_tmppath(gf_store_handle_t *shandle);
+
+int32_t
+gf_store_unlink_tmppath(gf_store_handle_t *shandle);
+
+int
+gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
+ gf_store_op_errno_t *store_errno);
+
+int32_t
+gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value);
+
+int32_t
+gf_store_save_value(int fd, char *key, char *value);
+
+int32_t
+gf_store_save_items(int fd, char *items);
+
+int32_t
+gf_store_handle_new(const char *path, gf_store_handle_t **handle);
+
+int
+gf_store_handle_retrieve(char *path, gf_store_handle_t **handle);
+
+int32_t
+gf_store_handle_destroy(gf_store_handle_t *handle);
+
+int32_t
+gf_store_iter_new(gf_store_handle_t *shandle, gf_store_iter_t **iter);
+
+int32_t
+gf_store_validate_key_value(char *storepath, char *key, char *val,
+ gf_store_op_errno_t *op_errno);
+
+int32_t
+gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
+ gf_store_op_errno_t *op_errno);
+
+int32_t
+gf_store_iter_get_matching(gf_store_iter_t *iter, char *key, char **value);
+
+int32_t
+gf_store_iter_destroy(gf_store_iter_t **iter);
+
+char *
+gf_store_strerror(gf_store_op_errno_t op_errno);
+
+int
+gf_store_lock(gf_store_handle_t *sh);
+
+void
+gf_store_unlock(gf_store_handle_t *sh);
+
+int
+gf_store_locked_local(gf_store_handle_t *sh);
+
+#endif
diff --git a/libglusterfs/src/glusterfs/strfd.h b/libglusterfs/src/glusterfs/strfd.h
new file mode 100644
index 00000000000..861cd02e005
--- /dev/null
+++ b/libglusterfs/src/glusterfs/strfd.h
@@ -0,0 +1,34 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _STRFD_H
+#define _STRFD_H
+
+typedef struct {
+ void *data;
+ size_t alloc_size;
+ size_t size;
+ off_t pos;
+} strfd_t;
+
+strfd_t *
+strfd_open();
+
+int
+strprintf(strfd_t *strfd, const char *fmt, ...)
+ __attribute__((__format__(__printf__, 2, 3)));
+
+int
+strvprintf(strfd_t *strfd, const char *fmt, va_list ap);
+
+int
+strfd_close(strfd_t *strfd);
+
+#endif
diff --git a/libglusterfs/src/glusterfs/syncop-utils.h b/libglusterfs/src/glusterfs/syncop-utils.h
new file mode 100644
index 00000000000..1f3ee403edc
--- /dev/null
+++ b/libglusterfs/src/glusterfs/syncop-utils.h
@@ -0,0 +1,54 @@
+/*
+ Copyright (c) 2015, Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _SYNCOP_UTILS_H
+#define _SYNCOP_UTILS_H
+
+typedef int (*syncop_dir_scan_fn_t)(xlator_t *subvol, gf_dirent_t *entry,
+ loc_t *parent, void *data);
+int
+syncop_ftw(xlator_t *subvol, loc_t *loc, int pid, void *data,
+ int (*fn)(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data));
+
+int
+syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
+ void *data, syncop_dir_scan_fn_t fn, dict_t *xdata,
+ uint32_t max_jobs, uint32_t max_qlen);
+
+int
+syncop_dir_scan(xlator_t *subvol, loc_t *loc, int pid, void *data,
+ int (*fn)(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data));
+
+int
+syncop_dirfd(xlator_t *subvol, loc_t *loc, fd_t **fd, int pid);
+
+int
+syncop_is_subvol_local(xlator_t *this, loc_t *loc, gf_boolean_t *is_local);
+
+int
+syncop_gfid_to_path(inode_table_t *itable, xlator_t *subvol, uuid_t gfid,
+ char **path_p);
+
+int
+syncop_ftw_throttle(xlator_t *subvol, loc_t *loc, int pid, void *data,
+ int (*fn)(xlator_t *subvol, gf_dirent_t *entry,
+ loc_t *parent, void *data),
+ int count, int sleep_time);
+int
+syncop_inode_find(xlator_t *this, xlator_t *subvol, uuid_t gfid,
+ inode_t **inode, dict_t *xdata, dict_t **rsp_dict);
+
+int
+syncop_gfid_to_path_hard(inode_table_t *itable, xlator_t *subvol, uuid_t gfid,
+ inode_t *inode, char **path_p,
+ gf_boolean_t hard_resolve);
+#endif /* _SYNCOP_H */
diff --git a/libglusterfs/src/glusterfs/syncop.h b/libglusterfs/src/glusterfs/syncop.h
new file mode 100644
index 00000000000..4e9241a32fc
--- /dev/null
+++ b/libglusterfs/src/glusterfs/syncop.h
@@ -0,0 +1,718 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _SYNCOP_H
+#define _SYNCOP_H
+
+#include <sys/time.h>
+#include <pthread.h>
+#include <ucontext.h>
+#include "glusterfs/dict.h" // for dict_t
+#include "glusterfs/stack.h" // for call_frame_t, STACK_DESTROY, STACK_...
+#include "glusterfs/timer.h"
+
+#define SYNCENV_PROC_MAX 16
+#define SYNCENV_PROC_MIN 2
+#define SYNCPROC_IDLE_TIME 600
+
+/*
+ * Flags for syncopctx valid elements
+ */
+#define SYNCOPCTX_UID 0x00000001
+#define SYNCOPCTX_GID 0x00000002
+#define SYNCOPCTX_GROUPS 0x00000004
+#define SYNCOPCTX_PID 0x00000008
+#define SYNCOPCTX_LKOWNER 0x00000010
+
+#ifdef HAVE_TSAN_API
+/* Currently hardcoded within thread context maintained by the sanitizer. */
+#define TSAN_THREAD_NAMELEN 64
+#endif
+
+struct synctask;
+struct syncproc;
+struct syncenv;
+struct synccond;
+
+typedef int (*synctask_cbk_t)(int ret, call_frame_t *frame, void *opaque);
+
+typedef int (*synctask_fn_t)(void *opaque);
+
+typedef enum {
+ SYNCTASK_INIT = 0,
+ SYNCTASK_RUN,
+ SYNCTASK_SUSPEND,
+ SYNCTASK_WAIT,
+ SYNCTASK_DONE,
+ SYNCTASK_ZOMBIE,
+} synctask_state_t;
+
+/* for one sequential execution of @syncfn */
+struct synctask {
+ struct list_head all_tasks;
+ struct syncenv *env;
+ xlator_t *xl;
+ call_frame_t *frame;
+ call_frame_t *opframe;
+ synctask_cbk_t synccbk;
+ synctask_fn_t syncfn;
+ struct timespec *delta;
+ gf_timer_t *timer;
+ struct synccond *synccond;
+ void *opaque;
+ void *stack;
+ synctask_state_t state;
+ int woken;
+ int slept;
+ int ret;
+
+ uid_t uid;
+ gid_t gid;
+
+#ifdef HAVE_TSAN_API
+ struct {
+ void *fiber;
+ char name[TSAN_THREAD_NAMELEN];
+ } tsan;
+#endif
+
+ ucontext_t ctx;
+ struct syncproc *proc;
+
+ pthread_mutex_t mutex; /* for synchronous spawning of synctask */
+ pthread_cond_t cond;
+ int done;
+
+ struct list_head waitq; /* can wait only "once" at a time */
+};
+
+struct syncproc {
+ pthread_t processor;
+
+#ifdef HAVE_TSAN_API
+ struct {
+ void *fiber;
+ char name[TSAN_THREAD_NAMELEN];
+ } tsan;
+#endif
+
+ ucontext_t sched;
+ struct syncenv *env;
+ struct synctask *current;
+};
+
+/* hosts the scheduler thread and framework for executing synctasks */
+struct syncenv {
+ struct syncproc proc[SYNCENV_PROC_MAX];
+
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ struct list_head runq;
+ struct list_head waitq;
+
+ int procs;
+ int procs_idle;
+
+ int runcount;
+
+ int procmin;
+ int procmax;
+
+ size_t stacksize;
+
+ int destroy; /* FLAG to mark syncenv is in destroy mode
+ so that no more synctasks are accepted*/
+};
+
+typedef enum { LOCK_NULL = 0, LOCK_TASK, LOCK_THREAD } lock_type_t;
+
+typedef enum {
+ SYNC_LOCK_DEFAULT = 0,
+ SYNC_LOCK_RECURSIVE, /*it allows recursive locking*/
+} lock_attr_t;
+
+struct synclock {
+ pthread_mutex_t guard; /* guard the remaining members, pair @cond */
+ pthread_cond_t cond; /* waiting non-synctasks */
+ struct list_head waitq; /* waiting synctasks */
+ volatile int lock; /* true(non zero) or false(zero), lock status */
+ lock_attr_t attr;
+ struct synctask *owner; /* NULL if current owner is not a synctask */
+ pthread_t owner_tid;
+ lock_type_t type;
+};
+typedef struct synclock synclock_t;
+
+struct synccond {
+ pthread_mutex_t pmutex;
+ pthread_cond_t pcond;
+ struct list_head waitq;
+};
+typedef struct synccond synccond_t;
+
+struct syncbarrier {
+ gf_boolean_t initialized; /*Set on successful initialization*/
+ pthread_mutex_t guard; /* guard the remaining members, pair @cond */
+ pthread_cond_t cond; /* waiting non-synctasks */
+ struct list_head waitq; /* waiting synctasks */
+ int count; /* count the number of wakes */
+ int waitfor; /* no. of wakes until which task can be in
+ waitq before being woken up. */
+};
+typedef struct syncbarrier syncbarrier_t;
+
+struct syncargs {
+ int op_ret;
+ int op_errno;
+
+ /*
+ * The below 3 iatt structures are used in the fops
+ * whose callbacks get struct iatt as one of the
+ * a return arguments. Currently, the maximum number
+ * of iatt structures returned is 3 for some fops
+ * such as mknod, copy_file_range, mkdir etc. So
+ * all the following 3 iatt structures would be used
+ * for those fops.
+ */
+ struct iatt iatt1;
+ struct iatt iatt2;
+ struct iatt iatt3;
+ dict_t *xattr;
+ struct statvfs statvfs_buf;
+ struct iovec *vector;
+ int count;
+ struct iobref *iobref;
+ char *buffer;
+ dict_t *xdata;
+ struct gf_flock flock;
+ struct gf_lease lease;
+ dict_t *dict_out;
+
+ /* some more _cbk needs */
+ uuid_t uuid;
+ char *errstr;
+ dict_t *dict;
+ pthread_mutex_t lock_dict;
+
+ syncbarrier_t barrier;
+
+ /* do not touch */
+ struct synctask *task;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ int done;
+
+ gf_dirent_t entries;
+ off_t offset;
+
+ lock_migration_info_t locklist;
+};
+
+struct syncopctx {
+ unsigned int valid; /* valid flags for elements that are set */
+ uid_t uid;
+ gid_t gid;
+ int grpsize;
+ int ngrps;
+ gid_t *groups;
+ pid_t pid;
+ gf_lkowner_t lk_owner;
+};
+
+#define __yawn(args) \
+ do { \
+ args->task = synctask_get(); \
+ if (args->task) \
+ break; \
+ pthread_mutex_init(&args->mutex, NULL); \
+ pthread_cond_init(&args->cond, NULL); \
+ args->done = 0; \
+ } while (0)
+
+#define __wake(args) \
+ do { \
+ if (args->task) { \
+ synctask_wake(args->task); \
+ } else { \
+ pthread_mutex_lock(&args->mutex); \
+ { \
+ args->done = 1; \
+ pthread_cond_signal(&args->cond); \
+ } \
+ pthread_mutex_unlock(&args->mutex); \
+ } \
+ } while (0)
+
+#define __yield(args) \
+ do { \
+ if (args->task) { \
+ synctask_yield(args->task, NULL); \
+ } else { \
+ pthread_mutex_lock(&args->mutex); \
+ { \
+ while (!args->done) \
+ pthread_cond_wait(&args->cond, &args->mutex); \
+ } \
+ pthread_mutex_unlock(&args->mutex); \
+ pthread_mutex_destroy(&args->mutex); \
+ pthread_cond_destroy(&args->cond); \
+ } \
+ } while (0)
+
+#define SYNCOP(subvol, stb, cbk, fn_op, params...) \
+ do { \
+ struct synctask *task = NULL; \
+ call_frame_t *frame = NULL; \
+ \
+ task = synctask_get(); \
+ stb->task = task; \
+ if (task) \
+ frame = copy_frame(task->opframe); \
+ else \
+ frame = syncop_create_frame(THIS); \
+ \
+ if (!frame) { \
+ stb->op_ret = -1; \
+ stb->op_errno = errno; \
+ break; \
+ } \
+ \
+ if (task) { \
+ frame->root->uid = task->uid; \
+ frame->root->gid = task->gid; \
+ } \
+ \
+ __yawn(stb); \
+ \
+ frame->op = get_fop_index_from_fn(subvol, fn_op); \
+ STACK_WIND_COOKIE(frame, cbk, (void *)stb, subvol, fn_op, params); \
+ \
+ __yield(stb); \
+ STACK_DESTROY(frame->root); \
+ } while (0)
+
+/*
+ * syncop_xxx() calls are executed in two ways, one is inside a synctask where
+ * the executing function will do 'swapcontext' and the other is without
+ * synctask where the executing thread is made to wait using pthread_cond_wait.
+ * Executing thread may change when syncop_xxx() is executed inside a synctask.
+ * This leads to errno_location change i.e. errno may give errno of
+ * non-executing thread. So errno is not touched inside a synctask execution.
+ * All gfapi calls are executed using the second way of executing syncop_xxx()
+ * where the executing thread waits using pthread_cond_wait so it is ok to set
+ * errno in these cases. The following macro makes syncop_xxx() behave just
+ * like a system call, where -1 is returned and errno is set when a failure
+ * occurs.
+ */
+#define DECODE_SYNCOP_ERR(ret) \
+ do { \
+ if (ret < 0) { \
+ errno = -ret; \
+ ret = -1; \
+ } else { \
+ errno = 0; \
+ } \
+ } while (0)
+
+#define SYNCENV_DEFAULT_STACKSIZE (2 * 1024 * 1024)
+
+struct syncenv *
+syncenv_new(size_t stacksize, int procmin, int procmax);
+void
+syncenv_destroy(struct syncenv *);
+void
+syncenv_scale(struct syncenv *env);
+
+int
+synctask_new1(struct syncenv *, size_t stacksize, synctask_fn_t, synctask_cbk_t,
+ call_frame_t *frame, void *);
+int
+synctask_new(struct syncenv *, synctask_fn_t, synctask_cbk_t,
+ call_frame_t *frame, void *);
+struct synctask *
+synctask_create(struct syncenv *, size_t stacksize, synctask_fn_t,
+ synctask_cbk_t, call_frame_t *, void *);
+int
+synctask_join(struct synctask *task);
+void
+synctask_wake(struct synctask *task);
+void
+synctask_yield(struct synctask *task, struct timespec *delta);
+void
+synctask_sleep(int32_t secs);
+void
+synctask_waitfor(struct synctask *task, int count);
+
+#define synctask_barrier_init(args) syncbarrier_init(&args->barrier)
+#define synctask_barrier_wait(args, n) syncbarrier_wait(&args->barrier, n)
+#define synctask_barrier_wake(args) syncbarrier_wake(&args->barrier)
+
+int
+synctask_setid(struct synctask *task, uid_t uid, gid_t gid);
+#define SYNCTASK_SETID(uid, gid) synctask_setid(synctask_get(), uid, gid);
+
+int
+syncopctx_setfsuid(void *uid);
+int
+syncopctx_setfsgid(void *gid);
+int
+syncopctx_setfsgroups(int count, const void *groups);
+int
+syncopctx_setfspid(void *pid);
+int
+syncopctx_setfslkowner(gf_lkowner_t *lk_owner);
+
+static inline call_frame_t *
+syncop_create_frame(xlator_t *this)
+{
+ call_frame_t *frame = NULL;
+ int ngrps = -1;
+ struct syncopctx *opctx = NULL;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ return NULL;
+
+ frame->root->type = GF_OP_TYPE_FOP;
+ opctx = syncopctx_getctx();
+
+ if (opctx && (opctx->valid & SYNCOPCTX_PID))
+ frame->root->pid = opctx->pid;
+ else
+ frame->root->pid = getpid();
+
+ if (opctx && (opctx->valid & SYNCOPCTX_UID))
+ frame->root->uid = opctx->uid;
+ else
+ frame->root->uid = geteuid();
+
+ if (opctx && (opctx->valid & SYNCOPCTX_GID))
+ frame->root->gid = opctx->gid;
+ else
+ frame->root->gid = getegid();
+
+ if (opctx && (opctx->valid & SYNCOPCTX_GROUPS)) {
+ ngrps = opctx->ngrps;
+
+ if (ngrps != 0 && opctx->groups != NULL) {
+ if (call_stack_alloc_groups(frame->root, ngrps) != 0) {
+ STACK_DESTROY(frame->root);
+ return NULL;
+ }
+
+ memcpy(frame->root->groups, opctx->groups, (sizeof(gid_t) * ngrps));
+ }
+ } else {
+ ngrps = getgroups(0, 0);
+ if (ngrps < 0) {
+ STACK_DESTROY(frame->root);
+ return NULL;
+ }
+
+ if (call_stack_alloc_groups(frame->root, ngrps) != 0) {
+ STACK_DESTROY(frame->root);
+ return NULL;
+ }
+
+ if (getgroups(ngrps, frame->root->groups) < 0) {
+ STACK_DESTROY(frame->root);
+ return NULL;
+ }
+ }
+
+ if (opctx && (opctx->valid & SYNCOPCTX_LKOWNER))
+ frame->root->lk_owner = opctx->lk_owner;
+
+ return frame;
+}
+
+int
+synclock_init(synclock_t *lock, lock_attr_t attr);
+int
+synclock_destroy(synclock_t *lock);
+int
+synclock_lock(synclock_t *lock);
+int
+synclock_trylock(synclock_t *lock);
+int
+synclock_unlock(synclock_t *lock);
+
+int32_t
+synccond_init(synccond_t *cond);
+
+void
+synccond_destroy(synccond_t *cond);
+
+int
+synccond_wait(synccond_t *cond, synclock_t *lock);
+
+int
+synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta);
+
+void
+synccond_signal(synccond_t *cond);
+
+void
+synccond_broadcast(synccond_t *cond);
+
+int
+syncbarrier_init(syncbarrier_t *barrier);
+int
+syncbarrier_wait(syncbarrier_t *barrier, int waitfor);
+int
+syncbarrier_wake(syncbarrier_t *barrier);
+int
+syncbarrier_destroy(syncbarrier_t *barrier);
+
+int
+syncop_lookup(xlator_t *subvol, loc_t *loc,
+ /* out */
+ struct iatt *iatt, struct iatt *parent,
+ /* xdata */
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_readdirp(xlator_t *subvol, fd_t *fd, size_t size, off_t off,
+ /* out */
+ gf_dirent_t *entries, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_readdir(xlator_t *subvol, fd_t *fd, size_t size, off_t off,
+ gf_dirent_t *entries, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_opendir(xlator_t *subvol, loc_t *loc, fd_t *fd, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_setattr(xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid,
+ /* out */
+ struct iatt *preop, struct iatt *postop, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_fsetattr(xlator_t *subvol, fd_t *fd, struct iatt *iatt, int valid,
+ /* out */
+ struct iatt *preop, struct iatt *postop, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_statfs(xlator_t *subvol, loc_t *loc,
+ /* out */
+ struct statvfs *buf, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_setxattr(xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_fsetxattr(xlator_t *subvol, fd_t *fd, dict_t *dict, int32_t flags,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_listxattr(xlator_t *subvol, loc_t *loc, dict_t **dict, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_getxattr(xlator_t *xl, loc_t *loc, dict_t **dict, const char *key,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_fgetxattr(xlator_t *xl, fd_t *fd, dict_t **dict, const char *key,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_removexattr(xlator_t *subvol, loc_t *loc, const char *name,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_fremovexattr(xlator_t *subvol, fd_t *fd, const char *name,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_create(xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode,
+ fd_t *fd, struct iatt *iatt, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_open(xlator_t *subvol, loc_t *loc, int32_t flags, fd_t *fd,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_close(fd_t *fd);
+
+int
+syncop_write(xlator_t *subvol, fd_t *fd, const char *buf, int size,
+ off_t offset, struct iobref *iobref, uint32_t flags,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_writev(xlator_t *subvol, fd_t *fd, const struct iovec *vector,
+ int32_t count, off_t offset, struct iobref *iobref,
+ uint32_t flags, struct iatt *preiatt, struct iatt *postiatt,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_readv(xlator_t *subvol, fd_t *fd, size_t size, off_t off, uint32_t flags,
+ /* out */
+ struct iovec **vector, int *count, struct iobref **iobref,
+ struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_ftruncate(xlator_t *subvol, fd_t *fd, off_t offset, struct iatt *preiatt,
+ struct iatt *postiatt, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_truncate(xlator_t *subvol, loc_t *loc, off_t offset, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_unlink(xlator_t *subvol, loc_t *loc, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_rmdir(xlator_t *subvol, loc_t *loc, int flags, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_fsync(xlator_t *subvol, fd_t *fd, int dataonly, struct iatt *preiatt,
+ struct iatt *postiatt, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_flush(xlator_t *subvol, fd_t *fd, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_fstat(xlator_t *subvol, fd_t *fd, struct iatt *stbuf, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_stat(xlator_t *subvol, loc_t *loc, struct iatt *stbuf, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_symlink(xlator_t *subvol, loc_t *loc, const char *newpath,
+ struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_readlink(xlator_t *subvol, loc_t *loc, char **buffer, size_t size,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_mknod(xlator_t *subvol, loc_t *loc, mode_t mode, dev_t rdev,
+ struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_mkdir(xlator_t *subvol, loc_t *loc, mode_t mode, struct iatt *iatt,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_link(xlator_t *subvol, loc_t *oldloc, loc_t *newloc, struct iatt *iatt,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_fsyncdir(xlator_t *subvol, fd_t *fd, int datasync, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_access(xlator_t *subvol, loc_t *loc, int32_t mask, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_fallocate(xlator_t *subvol, fd_t *fd, int32_t keep_size, off_t offset,
+ size_t len, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_discard(xlator_t *subvol, fd_t *fd, off_t offset, size_t len,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_zerofill(xlator_t *subvol, fd_t *fd, off_t offset, off_t len,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_rename(xlator_t *subvol, loc_t *oldloc, loc_t *newloc, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_lk(xlator_t *subvol, fd_t *fd, int cmd, struct gf_flock *flock,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_inodelk(xlator_t *subvol, const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_lease(xlator_t *subvol, loc_t *loc, struct gf_lease *lease,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_ipc(xlator_t *subvol, int op, dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_xattrop(xlator_t *subvol, loc_t *loc, gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata_in, dict_t **dict_out,
+ dict_t **xdata_out);
+
+int
+syncop_fxattrop(xlator_t *subvol, fd_t *fd, gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata_in, dict_t **dict_out,
+ dict_t **xdata_out);
+
+int
+syncop_seek(xlator_t *subvol, fd_t *fd, off_t offset, gf_seek_what_t what,
+ dict_t *xdata_in, off_t *off);
+
+int
+syncop_getactivelk(xlator_t *subvol, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_setactivelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int
+syncop_setactivelk(xlator_t *subvol, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata_in,
+ dict_t **xdata_out);
+
+int
+syncop_put(xlator_t *subvol, loc_t *loc, mode_t mode, mode_t umask,
+ uint32_t flags, struct iovec *vector, int32_t count, off_t offset,
+ struct iobref *iobref, dict_t *xattr, struct iatt *iatt,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_setactivelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int
+syncop_icreate(xlator_t *subvol, loc_t *loc, mode_t mode, dict_t *xdata_out);
+
+int
+syncop_entrylk(xlator_t *subvol, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_copy_file_range(xlator_t *subvol, fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, struct iatt *stbuf,
+ struct iatt *preiatt_dst, struct iatt *postiatt_dst,
+ dict_t *xdata_in, dict_t **xdata_out);
+
+int
+syncop_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst, struct iatt *postbuf_dst,
+ dict_t *xdata);
+
+#endif /* _SYNCOP_H */
diff --git a/libglusterfs/src/glusterfs/syscall.h b/libglusterfs/src/glusterfs/syscall.h
new file mode 100644
index 00000000000..b6d3ab4f2ad
--- /dev/null
+++ b/libglusterfs/src/glusterfs/syscall.h
@@ -0,0 +1,278 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __SYSCALL_H__
+#define __SYSCALL_H__
+
+#include <dirent.h>
+#include <sys/uio.h>
+#include <sys/statvfs.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <stdio.h>
+
+/* GF follows the Linux XATTR definition, which differs in Darwin. */
+#define GF_XATTR_CREATE 0x1 /* set value, fail if attr already exists */
+#define GF_XATTR_REPLACE 0x2 /* set value, fail if attr does not exist */
+
+/* Linux kernel version 2.6.x don't have these defined
+ define if not defined */
+
+#ifndef XATTR_SECURITY_PREFIX
+#define XATTR_SECURITY_PREFIX "security."
+#define XATTR_SECURITY_PREFIX_LEN (sizeof(XATTR_SECURITY_PREFIX) - 1)
+#endif
+
+#ifndef XATTR_SYSTEM_PREFIX
+#define XATTR_SYSTEM_PREFIX "system."
+#define XATTR_SYSTEM_PREFIX_LEN (sizeof(XATTR_SYSTEM_PREFIX) - 1)
+#endif
+
+#ifndef XATTR_TRUSTED_PREFIX
+#define XATTR_TRUSTED_PREFIX "trusted."
+#define XATTR_TRUSTED_PREFIX_LEN (sizeof(XATTR_TRUSTED_PREFIX) - 1)
+#endif
+
+#ifndef XATTR_USER_PREFIX
+#define XATTR_USER_PREFIX "user."
+#define XATTR_USER_PREFIX_LEN (sizeof(XATTR_USER_PREFIX) - 1)
+#endif
+
+#if defined(GF_DARWIN_HOST_OS)
+#include <sys/xattr.h>
+#define XATTR_DARWIN_NOSECURITY XATTR_NOSECURITY
+#define XATTR_DARWIN_NODEFAULT XATTR_NODEFAULT
+#define XATTR_DARWIN_SHOWCOMPRESSION XATTR_SHOWCOMPRESSION
+#endif
+
+int
+sys_lstat(const char *path, struct stat *buf);
+
+int
+sys_stat(const char *path, struct stat *buf);
+
+int
+sys_fstat(int fd, struct stat *buf);
+
+int
+sys_fstatat(int dirfd, const char *pathname, struct stat *buf, int flags);
+
+int
+sys_open(const char *pathname, int flags, int mode);
+
+int
+sys_openat(int dirfd, const char *pathname, int flags, int mode);
+
+DIR *
+sys_opendir(const char *name);
+
+struct dirent *
+sys_readdir(DIR *dir, struct dirent *de);
+
+ssize_t
+sys_readlink(const char *path, char *buf, size_t bufsiz);
+
+int
+sys_closedir(DIR *dir);
+
+int
+sys_mknod(const char *pathname, mode_t mode, dev_t dev);
+
+int
+sys_mkdir(const char *pathname, mode_t mode);
+
+int
+sys_mkdirat(int dirfd, const char *pathname, mode_t mode);
+
+int
+sys_unlink(const char *pathname);
+
+int
+sys_unlinkat(int dfd, const char *pathname);
+
+int
+sys_rmdir(const char *pathname);
+
+int
+sys_symlink(const char *oldpath, const char *newpath);
+
+int
+sys_symlinkat(const char *oldpath, int dirfd, const char *newpath);
+
+int
+sys_rename(const char *oldpath, const char *newpath);
+
+int
+sys_link(const char *oldpath, const char *newpath);
+
+int
+sys_linkat(int oldfd, const char *oldpath, int newfd, const char *newpath);
+
+int
+sys_chmod(const char *path, mode_t mode);
+
+int
+sys_fchmod(int fd, mode_t mode);
+
+int
+sys_chown(const char *path, uid_t owner, gid_t group);
+
+int
+sys_fchown(int fd, uid_t owner, gid_t group);
+
+int
+sys_lchown(const char *path, uid_t owner, gid_t group);
+
+int
+sys_truncate(const char *path, off_t length);
+
+int
+sys_ftruncate(int fd, off_t length);
+
+int
+sys_utimes(const char *filename, const struct timeval times[2]);
+
+#if defined(HAVE_UTIMENSAT)
+int
+sys_utimensat(int dirfd, const char *filename, const struct timespec times[2],
+ int flags);
+#endif
+
+int
+sys_futimes(int fd, const struct timeval times[2]);
+
+int
+sys_creat(const char *pathname, mode_t mode);
+
+ssize_t
+sys_readv(int fd, const struct iovec *iov, int iovcnt);
+
+ssize_t
+sys_writev(int fd, const struct iovec *iov, int iovcnt);
+
+ssize_t
+sys_read(int fd, void *buf, size_t count);
+
+ssize_t
+sys_write(int fd, const void *buf, size_t count);
+
+off_t
+sys_lseek(int fd, off_t offset, int whence);
+
+int
+sys_statvfs(const char *path, struct statvfs *buf);
+
+int
+sys_fstatvfs(int fd, struct statvfs *buf);
+
+int
+sys_close(int fd);
+
+int
+sys_fsync(int fd);
+
+int
+sys_fdatasync(int fd);
+
+void
+gf_add_prefix(const char *ns, const char *key, char **newkey);
+
+void
+gf_remove_prefix(const char *ns, const char *key, char **newkey);
+
+int
+sys_lsetxattr(const char *path, const char *name, const void *value,
+ size_t size, int flags);
+
+ssize_t
+sys_llistxattr(const char *path, char *list, size_t size);
+
+ssize_t
+sys_lgetxattr(const char *path, const char *name, void *value, size_t size);
+
+ssize_t
+sys_fgetxattr(int filedes, const char *name, void *value, size_t size);
+
+int
+sys_fsetxattr(int filedes, const char *name, const void *value, size_t size,
+ int flags);
+
+ssize_t
+sys_flistxattr(int filedes, char *list, size_t size);
+
+int
+sys_lremovexattr(const char *path, const char *name);
+
+int
+sys_fremovexattr(int filedes, const char *name);
+
+int
+sys_access(const char *pathname, int mode);
+
+int
+sys_fallocate(int fd, int mode, off_t offset, off_t len);
+
+ssize_t
+sys_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset);
+
+ssize_t
+sys_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset);
+
+ssize_t
+sys_pread(int fd, void *buf, size_t count, off_t offset);
+
+ssize_t
+sys_pwrite(int fd, const void *buf, size_t count, off_t offset);
+
+int
+sys_socket(int domain, int type, int protocol);
+
+int
+sys_accept(int sock, struct sockaddr *sockaddr, socklen_t *socklen, int flags);
+
+#ifdef GF_BSD_HOST_OS
+#ifndef _OFF64_T_DECLARED
+/*
+ * Including <stdio.h> (done above) should actually define
+ * _OFF64_T_DECLARED with off64_t data type being available
+ * for consumption. But, off64_t data type is not recognizable
+ * for FreeBSD versions less than 11. Hence, int64_t is typedefed
+ * to off64_t.
+ */
+#define _OFF64_T_DECLARED
+typedef int64_t off64_t;
+#endif /* _OFF64_T_DECLARED */
+#endif /* GF_BSD_HOST_OS */
+
+/*
+ * According to the man page of copy_file_range, both off_in and off_out are
+ * pointers to the data type loff_t (i.e. loff_t *). But, freebsd does not
+ * have (and recognize) loff_t. Since loff_t is 64 bits, use off64_t
+ * instead. Since it's a pointer type it should be okay. It just needs
+ * to be a pointer-to-64-bit pointer for both 32- and 64-bit platforms.
+ * off64_t is recognized by freebsd.
+ * TODO: In future, when freebsd can recognize loff_t, probably revisit this
+ * and change the off_in and off_out to (loff_t *).
+ */
+ssize_t
+sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out,
+ size_t len, unsigned int flags);
+
+int
+sys_kill(pid_t pid, int sig);
+
+#ifdef __FreeBSD__
+int
+sys_sysctl(const int *name, u_int namelen, void *oldp, size_t *oldlenp,
+ const void *newp, size_t newlen);
+#endif
+
+#endif /* __SYSCALL_H__ */
diff --git a/libglusterfs/src/glusterfs/template-component-messages.h b/libglusterfs/src/glusterfs/template-component-messages.h
new file mode 100644
index 00000000000..aa7ad3d1baa
--- /dev/null
+++ b/libglusterfs/src/glusterfs/template-component-messages.h
@@ -0,0 +1,28 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _component_MESSAGES_H_
+#define _component_MESSAGES_H_
+
+#include "glusterfs/glfs-message-id.h"
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(component, message id, message id, ...);
+
+#endif /* !_component_MESSAGES_H_ */
diff --git a/libglusterfs/src/glusterfs/throttle-tbf.h b/libglusterfs/src/glusterfs/throttle-tbf.h
new file mode 100644
index 00000000000..cccb13c83d9
--- /dev/null
+++ b/libglusterfs/src/glusterfs/throttle-tbf.h
@@ -0,0 +1,74 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/list.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/locking.h"
+
+#ifndef THROTTLE_TBF_H__
+#define THROTTLE_TBF_H__
+
+typedef enum tbf_ops {
+ TBF_OP_MIN = -1,
+ TBF_OP_HASH = 0, /* checksum calculation */
+ TBF_OP_READ = 1, /* inode read(s) */
+ TBF_OP_READDIR = 2, /* dentry read(s) */
+ TBF_OP_MAX = 3,
+} tbf_ops_t;
+
+/**
+ * Operation rate specification
+ */
+typedef struct tbf_opspec {
+ tbf_ops_t op;
+
+ unsigned long rate;
+
+ unsigned long maxlimit;
+
+ unsigned long token_gen_interval; /* Token generation interval in usec */
+} tbf_opspec_t;
+
+/**
+ * Token bucket for each operation type
+ */
+typedef struct tbf_bucket {
+ gf_lock_t lock;
+
+ pthread_t tokener; /* token generator thread */
+
+ unsigned long tokenrate; /* token generation rate */
+
+ unsigned long tokens; /* number of current tokens */
+
+ unsigned long maxtokens; /* maximum token in the bucket */
+
+ struct list_head queued; /* list of non-conformant requests */
+
+ unsigned long token_gen_interval; /* Token generation interval in usec */
+} tbf_bucket_t;
+
+typedef struct tbf {
+ tbf_bucket_t **bucket;
+} tbf_t;
+
+tbf_t *
+tbf_init(tbf_opspec_t *, unsigned int);
+
+int
+tbf_mod(tbf_t *, tbf_opspec_t *);
+
+void
+tbf_throttle(tbf_t *, tbf_ops_t, unsigned long);
+
+#define TBF_THROTTLE_BEGIN(tbf, op, tokens) (tbf_throttle(tbf, op, tokens))
+#define TBF_THROTTLE_END(tbf, op, tokens)
+
+#endif /** THROTTLE_TBF_H__ */
diff --git a/libglusterfs/src/glusterfs/timer.h b/libglusterfs/src/glusterfs/timer.h
new file mode 100644
index 00000000000..ae5b2edf451
--- /dev/null
+++ b/libglusterfs/src/glusterfs/timer.h
@@ -0,0 +1,56 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _TIMER_H
+#define _TIMER_H
+
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/xlator.h"
+#include <sys/time.h>
+#include <pthread.h>
+
+typedef void (*gf_timer_cbk_t)(void *);
+
+struct _gf_timer {
+ union {
+ struct list_head list;
+ struct {
+ struct _gf_timer *next;
+ struct _gf_timer *prev;
+ };
+ };
+ struct timespec at;
+ gf_timer_cbk_t callbk;
+ void *data;
+ xlator_t *xl;
+ gf_boolean_t fired;
+};
+
+struct _gf_timer_registry {
+ struct list_head active;
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+ pthread_t th;
+ char fin;
+};
+
+typedef struct _gf_timer gf_timer_t;
+typedef struct _gf_timer_registry gf_timer_registry_t;
+
+gf_timer_t *
+gf_timer_call_after(glusterfs_ctx_t *ctx, struct timespec delta,
+ gf_timer_cbk_t cbk, void *data);
+
+int32_t
+gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event);
+
+void
+gf_timer_registry_destroy(glusterfs_ctx_t *ctx);
+#endif /* _TIMER_H */
diff --git a/libglusterfs/src/glusterfs/timespec.h b/libglusterfs/src/glusterfs/timespec.h
new file mode 100644
index 00000000000..bb9ab446a5f
--- /dev/null
+++ b/libglusterfs/src/glusterfs/timespec.h
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __INCLUDE_TIMESPEC_H__
+#define __INCLUDE_TIMESPEC_H__
+
+#include <stdint.h>
+#include <sys/time.h>
+
+#define TS(ts) ((ts.tv_sec * 1000000000LL) + ts.tv_nsec)
+#define NANO (+1.0E-9)
+#define GIGA UINT64_C(1000000000)
+
+void
+timespec_now(struct timespec *ts);
+void
+timespec_now_realtime(struct timespec *ts);
+void
+timespec_adjust_delta(struct timespec *ts, struct timespec delta);
+void
+timespec_sub(const struct timespec *begin, const struct timespec *end,
+ struct timespec *res);
+int
+timespec_cmp(const struct timespec *lhs_ts, const struct timespec *rhs_ts);
+
+#endif /* __INCLUDE_TIMESPEC_H__ */
diff --git a/libglusterfs/src/glusterfs/trie.h b/libglusterfs/src/glusterfs/trie.h
new file mode 100644
index 00000000000..6d2d8015964
--- /dev/null
+++ b/libglusterfs/src/glusterfs/trie.h
@@ -0,0 +1,52 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _TRIE_H_
+#define _TRIE_H_
+
+struct trienode;
+typedef struct trienode trienode_t;
+
+struct trie;
+typedef struct trie trie_t;
+
+struct trienodevec {
+ trienode_t **nodes;
+ unsigned cnt;
+};
+
+trie_t *
+trie_new();
+
+int
+trie_add(trie_t *trie, const char *word);
+
+void
+trie_destroy(trie_t *trie);
+
+void
+trie_destroy_bynode(trienode_t *node);
+
+int
+trie_measure(trie_t *trie, const char *word, trienode_t **nodes, int nodecnt);
+
+int
+trie_measure_vec(trie_t *trie, const char *word, struct trienodevec *nodevec);
+
+void
+trie_reset_search(trie_t *trie);
+
+int
+trienode_get_dist(trienode_t *node);
+
+int
+trienode_get_word(trienode_t *node, char **buf);
+
+#endif
diff --git a/libglusterfs/src/glusterfs/upcall-utils.h b/libglusterfs/src/glusterfs/upcall-utils.h
new file mode 100644
index 00000000000..0de8428c5fc
--- /dev/null
+++ b/libglusterfs/src/glusterfs/upcall-utils.h
@@ -0,0 +1,110 @@
+/*
+ Copyright (c) 2015, Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _UPCALL_UTILS_H
+#define _UPCALL_UTILS_H
+
+#include "glusterfs/iatt.h"
+#include "glusterfs/compat-uuid.h"
+#include "glusterfs/compat.h"
+
+/* Flags sent for cache_invalidation */
+#define UP_NLINK 0x00000001 /* update nlink */
+#define UP_MODE 0x00000002 /* update mode and ctime */
+#define UP_OWN 0x00000004 /* update mode,uid,gid and ctime */
+#define UP_SIZE 0x00000008 /* update fsize */
+#define UP_TIMES 0x00000010 /* update all times */
+#define UP_ATIME 0x00000020 /* update atime only */
+#define UP_PERM \
+ 0x00000040 /* update fields needed for permission \
+ checking */
+#define UP_RENAME \
+ 0x00000080 /* this is a rename op - delete the cache \
+ entry */
+#define UP_FORGET \
+ 0x00000100 /* inode_forget on server side - \
+ invalidate the cache entry */
+#define UP_PARENT_TIMES 0x00000200 /* update parent dir times */
+
+#define UP_XATTR 0x00000400 /* update the xattrs and ctime */
+#define UP_XATTR_RM 0x00000800 /* Remove the xattrs and update ctime */
+
+#define UP_EXPLICIT_LOOKUP 0x00001000 /* Request an explicit lookup */
+
+#define UP_INVAL_ATTR 0x00002000 /* Request to invalidate iatt and xatt */
+
+/* for fops - open, read, lk, */
+#define UP_UPDATE_CLIENT (UP_ATIME)
+
+/* for fop - write, truncate */
+#define UP_WRITE_FLAGS (UP_SIZE | UP_TIMES)
+
+/* for fop - setattr */
+#define UP_ATTR_FLAGS (UP_SIZE | UP_TIMES | UP_OWN | UP_MODE | UP_PERM)
+/* for fop - rename */
+#define UP_RENAME_FLAGS (UP_RENAME)
+
+/* to invalidate parent directory entries for fops -rename, unlink, rmdir,
+ * mkdir, create */
+#define UP_PARENT_DENTRY_FLAGS (UP_PARENT_TIMES)
+
+/* for fop - unlink, link, rmdir, mkdir */
+#define UP_NLINK_FLAGS (UP_NLINK | UP_TIMES)
+
+#define IATT_UPDATE_FLAGS \
+ (UP_NLINK | UP_MODE | UP_OWN | UP_SIZE | UP_TIMES | UP_ATIME | UP_PERM)
+
+typedef enum {
+ GF_UPCALL_EVENT_NULL,
+ GF_UPCALL_CACHE_INVALIDATION,
+ GF_UPCALL_RECALL_LEASE,
+ GF_UPCALL_INODELK_CONTENTION,
+ GF_UPCALL_ENTRYLK_CONTENTION,
+} gf_upcall_event_t;
+
+struct gf_upcall {
+ char *client_uid;
+ uuid_t gfid;
+ uint32_t event_type;
+ void *data;
+};
+
+struct gf_upcall_cache_invalidation {
+ uint32_t flags;
+ uint32_t expire_time_attr;
+ struct iatt stat;
+ struct iatt p_stat; /* parent dir stat */
+ struct iatt oldp_stat; /* oldparent dir stat */
+ dict_t *dict; /* For xattrs */
+};
+
+struct gf_upcall_recall_lease {
+ uint32_t lease_type; /* Lease type to which client can downgrade to*/
+ uuid_t tid; /* transaction id of the fop that caused
+ the recall */
+ dict_t *dict;
+};
+
+struct gf_upcall_inodelk_contention {
+ struct gf_flock flock;
+ pid_t pid;
+ const char *domain;
+ dict_t *xdata;
+};
+
+struct gf_upcall_entrylk_contention {
+ uint32_t type;
+ pid_t pid;
+ const char *name;
+ const char *domain;
+ dict_t *xdata;
+};
+
+#endif /* _UPCALL_UTILS_H */
diff --git a/libglusterfs/src/glusterfs/xlator.h b/libglusterfs/src/glusterfs/xlator.h
new file mode 100644
index 00000000000..4fd3abdaeff
--- /dev/null
+++ b/libglusterfs/src/glusterfs/xlator.h
@@ -0,0 +1,1106 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _XLATOR_H
+#define _XLATOR_H
+
+#include <stdint.h> // for int32_t
+#include <sys/types.h> // for off_t, mode_t, off64_t, dev_t
+#include "glusterfs/glusterfs-fops.h" // for GF_FOP_MAXVALUE, entrylk_cmd
+#include "glusterfs/atomic.h" // for gf_atomic_t
+#include "glusterfs/glusterfs.h" // for gf_boolean_t, glusterfs_ctx_t
+#include "glusterfs/compat-uuid.h" // for uuid_t
+#include "glusterfs/compat.h"
+#include "glusterfs/event-history.h"
+#include "glusterfs/dict.h"
+#include "glusterfs/latency.h"
+
+#define FIRST_CHILD(xl) (xl->children->xlator)
+#define SECOND_CHILD(xl) (xl->children->next->xlator)
+
+#define GF_SET_ATTR_MODE 0x1
+#define GF_SET_ATTR_UID 0x2
+#define GF_SET_ATTR_GID 0x4
+#define GF_SET_ATTR_SIZE 0x8
+#define GF_SET_ATTR_ATIME 0x10
+#define GF_SET_ATTR_MTIME 0x20
+#define GF_SET_ATTR_CTIME 0x40
+#define GF_ATTR_ATIME_NOW 0x80
+#define GF_ATTR_MTIME_NOW 0x100
+
+#define gf_attr_mode_set(mode) ((mode)&GF_SET_ATTR_MODE)
+#define gf_attr_uid_set(mode) ((mode)&GF_SET_ATTR_UID)
+#define gf_attr_gid_set(mode) ((mode)&GF_SET_ATTR_GID)
+#define gf_attr_size_set(mode) ((mode)&GF_SET_ATTR_SIZE)
+#define gf_attr_atime_set(mode) ((mode)&GF_SET_ATTR_ATIME)
+#define gf_attr_mtime_set(mode) ((mode)&GF_SET_ATTR_MTIME)
+
+struct _xlator;
+typedef struct _xlator xlator_t;
+struct _dir_entry;
+typedef struct _dir_entry dir_entry_t;
+struct _gf_dirent;
+typedef struct _gf_dirent gf_dirent_t;
+struct _loc;
+typedef struct _loc loc_t;
+
+typedef int32_t (*event_notify_fn_t)(xlator_t *this, int32_t event, void *data,
+ ...);
+
+#include "glusterfs/list.h"
+#include "glusterfs/gf-dirent.h"
+#include "glusterfs/stack.h"
+#include "glusterfs/iobuf.h"
+#include "glusterfs/globals.h"
+#include "glusterfs/iatt.h"
+#include "glusterfs/options.h"
+#include "glusterfs/client_t.h"
+
+struct _loc {
+ const char *path;
+ const char *name;
+ inode_t *inode;
+ inode_t *parent;
+ /* Currently all location based operations are through 'gfid' of inode.
+ * But the 'inode->gfid' only gets set in higher most layer (as in,
+ * 'fuse', 'protocol/server', or 'nfs/server'). So if translators want
+ * to send fops on a inode before the 'inode->gfid' is set, they have to
+ * make use of below 'gfid' fields
+ */
+ uuid_t gfid;
+ uuid_t pargfid;
+};
+
+typedef int32_t (*fop_getspec_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, char *spec_data);
+
+typedef int32_t (*fop_rchecksum_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata);
+
+typedef int32_t (*fop_getspec_t)(call_frame_t *frame, xlator_t *this,
+ const char *key, int32_t flag);
+
+typedef int32_t (*fop_rchecksum_t)(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset, int32_t len,
+ dict_t *xdata);
+
+typedef int32_t (*fop_lookup_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent);
+
+typedef int32_t (*fop_stat_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fstat_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ dict_t *xdata);
+
+typedef int32_t (*fop_truncate_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+typedef int32_t (*fop_ftruncate_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+typedef int32_t (*fop_access_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_readlink_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata);
+
+typedef int32_t (*fop_mknod_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+typedef int32_t (*fop_mkdir_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+typedef int32_t (*fop_unlink_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+typedef int32_t (*fop_rmdir_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+typedef int32_t (*fop_symlink_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+typedef int32_t (*fop_rename_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent,
+ struct iatt *postoldparent,
+ struct iatt *prenewparent,
+ struct iatt *postnewparent, dict_t *xdata);
+
+typedef int32_t (*fop_link_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+typedef int32_t (*fop_create_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+typedef int32_t (*fop_open_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata);
+
+typedef int32_t (*fop_readv_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata);
+
+typedef int32_t (*fop_writev_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+typedef int32_t (*fop_flush_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_fsync_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+typedef int32_t (*fop_opendir_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata);
+
+typedef int32_t (*fop_fsyncdir_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_statfs_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata);
+
+typedef int32_t (*fop_setxattr_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_getxattr_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fsetxattr_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_fgetxattr_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+typedef int32_t (*fop_removexattr_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_fremovexattr_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_lk_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *flock,
+ dict_t *xdata);
+
+typedef int32_t (*fop_inodelk_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_finodelk_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_entrylk_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_fentrylk_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_readdir_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata);
+
+typedef int32_t (*fop_readdirp_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata);
+
+typedef int32_t (*fop_xattrop_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fxattrop_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr,
+ dict_t *xdata);
+
+typedef int32_t (*fop_setattr_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata);
+
+typedef int32_t (*fop_fsetattr_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata);
+
+typedef int32_t (*fop_fallocate_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf,
+ dict_t *xdata);
+
+typedef int32_t (*fop_discard_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata);
+
+typedef int32_t (*fop_zerofill_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno,
+ struct iatt *preop_stbuf,
+ struct iatt *postop_stbuf, dict_t *xdata);
+
+typedef int32_t (*fop_ipc_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_seek_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, off_t offset,
+ dict_t *xdata);
+
+typedef int32_t (*fop_lease_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_lease *lease,
+ dict_t *xdata);
+typedef int32_t (*fop_compound_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, void *data,
+ dict_t *xdata);
+
+typedef int32_t (*fop_getactivelk_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno,
+ lock_migration_info_t *locklist,
+ dict_t *xdata);
+
+typedef int32_t (*fop_setactivelk_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+typedef int32_t (*fop_put_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+typedef int32_t (*fop_icreate_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata);
+
+typedef int32_t (*fop_namelink_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+typedef int32_t (*fop_copy_file_range_cbk_t)(
+ call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata);
+
+typedef int32_t (*fop_lookup_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+typedef int32_t (*fop_stat_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fstat_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *xdata);
+
+typedef int32_t (*fop_truncate_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, off_t offset, dict_t *xdata);
+
+typedef int32_t (*fop_ftruncate_t)(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, off_t offset, dict_t *xdata);
+
+typedef int32_t (*fop_access_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t mask, dict_t *xdata);
+
+typedef int32_t (*fop_readlink_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, size_t size, dict_t *xdata);
+
+typedef int32_t (*fop_mknod_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask,
+ dict_t *xdata);
+
+typedef int32_t (*fop_mkdir_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata);
+
+typedef int32_t (*fop_unlink_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflags, dict_t *xdata);
+
+typedef int32_t (*fop_rmdir_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflags, dict_t *xdata);
+
+typedef int32_t (*fop_symlink_t)(call_frame_t *frame, xlator_t *this,
+ const char *linkname, loc_t *loc, mode_t umask,
+ dict_t *xdata);
+
+typedef int32_t (*fop_rename_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
+
+typedef int32_t (*fop_link_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *oldloc, loc_t *newloc, dict_t *xdata);
+
+typedef int32_t (*fop_create_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask,
+ fd_t *fd, dict_t *xdata);
+
+/* Tell subsequent writes on the fd_t to fsync after every writev fop without
+ * requiring a fsync fop.
+ */
+#define GF_OPEN_FSYNC 0x01
+
+/* Tell write-behind to disable writing behind despite O_SYNC not being set.
+ */
+#define GF_OPEN_NOWB 0x02
+
+typedef int32_t (*fop_open_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata);
+
+typedef int32_t (*fop_readv_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags,
+ dict_t *xdata);
+
+typedef int32_t (*fop_writev_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count,
+ off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
+
+typedef int32_t (*fop_flush_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fsync_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+typedef int32_t (*fop_opendir_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, fd_t *fd, dict_t *xdata);
+
+typedef int32_t (*fop_fsyncdir_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+typedef int32_t (*fop_statfs_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+typedef int32_t (*fop_setxattr_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *dict, int32_t flags,
+ dict_t *xdata);
+
+typedef int32_t (*fop_getxattr_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name, dict_t *xdata);
+
+typedef int32_t (*fop_fsetxattr_t)(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, dict_t *dict, int32_t flags,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fgetxattr_t)(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name, dict_t *xdata);
+
+typedef int32_t (*fop_removexattr_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, const char *name,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fremovexattr_t)(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, const char *name,
+ dict_t *xdata);
+
+typedef int32_t (*fop_lk_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata);
+
+typedef int32_t (*fop_inodelk_t)(call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+typedef int32_t (*fop_finodelk_t)(call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+typedef int32_t (*fop_entrylk_t)(call_frame_t *frame, xlator_t *this,
+ const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+typedef int32_t (*fop_fentrylk_t)(call_frame_t *frame, xlator_t *this,
+ const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+typedef int32_t (*fop_readdir_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata);
+
+typedef int32_t (*fop_readdirp_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata);
+
+typedef int32_t (*fop_xattrop_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, gf_xattrop_flags_t optype,
+ dict_t *xattr, dict_t *xdata);
+
+typedef int32_t (*fop_fxattrop_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr,
+ dict_t *xdata);
+
+typedef int32_t (*fop_setattr_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, struct iatt *stbuf, int32_t valid,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fsetattr_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid,
+ dict_t *xdata);
+
+typedef int32_t (*fop_fallocate_t)(call_frame_t *frame, xlator_t *this,
+ fd_t *fd, int32_t keep_size, off_t offset,
+ size_t len, dict_t *xdata);
+
+typedef int32_t (*fop_discard_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+
+typedef int32_t (*fop_zerofill_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, off_t len, dict_t *xdata);
+
+typedef int32_t (*fop_ipc_t)(call_frame_t *frame, xlator_t *this, int32_t op,
+ dict_t *xdata);
+
+typedef int32_t (*fop_seek_t)(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, gf_seek_what_t what, dict_t *xdata);
+
+typedef int32_t (*fop_lease_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata);
+
+typedef int32_t (*fop_compound_t)(call_frame_t *frame, xlator_t *this,
+ void *args, dict_t *xdata);
+
+typedef int32_t (*fop_getactivelk_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xdata);
+
+typedef int32_t (*fop_setactivelk_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc,
+ lock_migration_info_t *locklist,
+ dict_t *xdata);
+
+typedef int32_t (*fop_put_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, mode_t umask, uint32_t flags,
+ struct iovec *vector, int32_t count, off_t offset,
+ struct iobref *iobref, dict_t *xattr,
+ dict_t *xdata);
+
+typedef int32_t (*fop_icreate_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, dict_t *xdata);
+
+typedef int32_t (*fop_namelink_t)(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, dict_t *xdata);
+typedef int32_t (*fop_copy_file_range_t)(call_frame_t *frame, xlator_t *this,
+ fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off64_t off_out,
+ size_t len, uint32_t flags,
+ dict_t *xdata);
+
+/* WARNING: make sure the list is in order with FOP definition in
+ `rpc/xdr/src/glusterfs-fops.x`.
+ If it is not in order, mainly the metrics related feature would be broken */
+struct xlator_fops {
+ fop_stat_t stat;
+ fop_readlink_t readlink;
+ fop_mknod_t mknod;
+ fop_mkdir_t mkdir;
+ fop_unlink_t unlink;
+ fop_rmdir_t rmdir;
+ fop_symlink_t symlink;
+ fop_rename_t rename;
+ fop_link_t link;
+ fop_truncate_t truncate;
+ fop_open_t open;
+ fop_readv_t readv;
+ fop_writev_t writev;
+ fop_statfs_t statfs;
+ fop_flush_t flush;
+ fop_fsync_t fsync;
+ fop_setxattr_t setxattr;
+ fop_getxattr_t getxattr;
+ fop_removexattr_t removexattr;
+ fop_opendir_t opendir;
+ fop_fsyncdir_t fsyncdir;
+ fop_access_t access;
+ fop_create_t create;
+ fop_ftruncate_t ftruncate;
+ fop_fstat_t fstat;
+ fop_lk_t lk;
+ fop_lookup_t lookup;
+ fop_readdir_t readdir;
+ fop_inodelk_t inodelk;
+ fop_finodelk_t finodelk;
+ fop_entrylk_t entrylk;
+ fop_fentrylk_t fentrylk;
+ fop_xattrop_t xattrop;
+ fop_fxattrop_t fxattrop;
+ fop_fgetxattr_t fgetxattr;
+ fop_fsetxattr_t fsetxattr;
+ fop_rchecksum_t rchecksum;
+ fop_setattr_t setattr;
+ fop_fsetattr_t fsetattr;
+ fop_readdirp_t readdirp;
+
+ /* These 3 are required to keep the index same as GF_FOP_##FOP */
+ void *forget_placeholder;
+ void *release_placeholder;
+ void *releasedir_placeholder;
+
+ fop_getspec_t getspec;
+ fop_fremovexattr_t fremovexattr;
+ fop_fallocate_t fallocate;
+ fop_discard_t discard;
+ fop_zerofill_t zerofill;
+ fop_ipc_t ipc;
+ fop_seek_t seek;
+ fop_lease_t lease;
+ fop_compound_t compound;
+ fop_getactivelk_t getactivelk;
+ fop_setactivelk_t setactivelk;
+ fop_put_t put;
+ fop_icreate_t icreate;
+ fop_namelink_t namelink;
+ fop_copy_file_range_t copy_file_range;
+
+ /* these entries are used for a typechecking hack in STACK_WIND _only_ */
+ /* make sure to add _cbk variables only after defining regular fops as
+ its relative position is used to get the index */
+ fop_stat_cbk_t stat_cbk;
+ fop_readlink_cbk_t readlink_cbk;
+ fop_mknod_cbk_t mknod_cbk;
+ fop_mkdir_cbk_t mkdir_cbk;
+ fop_unlink_cbk_t unlink_cbk;
+ fop_rmdir_cbk_t rmdir_cbk;
+ fop_symlink_cbk_t symlink_cbk;
+ fop_rename_cbk_t rename_cbk;
+ fop_link_cbk_t link_cbk;
+ fop_truncate_cbk_t truncate_cbk;
+ fop_open_cbk_t open_cbk;
+ fop_readv_cbk_t readv_cbk;
+ fop_writev_cbk_t writev_cbk;
+ fop_statfs_cbk_t statfs_cbk;
+ fop_flush_cbk_t flush_cbk;
+ fop_fsync_cbk_t fsync_cbk;
+ fop_setxattr_cbk_t setxattr_cbk;
+ fop_getxattr_cbk_t getxattr_cbk;
+ fop_removexattr_cbk_t removexattr_cbk;
+ fop_opendir_cbk_t opendir_cbk;
+ fop_fsyncdir_cbk_t fsyncdir_cbk;
+ fop_access_cbk_t access_cbk;
+ fop_create_cbk_t create_cbk;
+ fop_ftruncate_cbk_t ftruncate_cbk;
+ fop_fstat_cbk_t fstat_cbk;
+ fop_lk_cbk_t lk_cbk;
+ fop_lookup_cbk_t lookup_cbk;
+ fop_readdir_cbk_t readdir_cbk;
+ fop_inodelk_cbk_t inodelk_cbk;
+ fop_finodelk_cbk_t finodelk_cbk;
+ fop_entrylk_cbk_t entrylk_cbk;
+ fop_fentrylk_cbk_t fentrylk_cbk;
+ fop_xattrop_cbk_t xattrop_cbk;
+ fop_fxattrop_cbk_t fxattrop_cbk;
+ fop_fgetxattr_cbk_t fgetxattr_cbk;
+ fop_fsetxattr_cbk_t fsetxattr_cbk;
+ fop_rchecksum_cbk_t rchecksum_cbk;
+ fop_setattr_cbk_t setattr_cbk;
+ fop_fsetattr_cbk_t fsetattr_cbk;
+ fop_readdirp_cbk_t readdirp_cbk;
+
+ /* These 3 are required to keep the index same as GF_FOP_##FOP */
+ void *forget_placeholder_cbk;
+ void *release_placeholder_cbk;
+ void *releasedir_placeholder_cbk;
+
+ fop_getspec_cbk_t getspec_cbk;
+ fop_fremovexattr_cbk_t fremovexattr_cbk;
+ fop_fallocate_cbk_t fallocate_cbk;
+ fop_discard_cbk_t discard_cbk;
+ fop_zerofill_cbk_t zerofill_cbk;
+ fop_ipc_cbk_t ipc_cbk;
+ fop_seek_cbk_t seek_cbk;
+ fop_lease_cbk_t lease_cbk;
+ fop_compound_cbk_t compound_cbk;
+ fop_getactivelk_cbk_t getactivelk_cbk;
+ fop_setactivelk_cbk_t setactivelk_cbk;
+ fop_put_cbk_t put_cbk;
+ fop_icreate_cbk_t icreate_cbk;
+ fop_namelink_cbk_t namelink_cbk;
+ fop_copy_file_range_cbk_t copy_file_range_cbk;
+};
+
+typedef int32_t (*cbk_forget_t)(xlator_t *this, inode_t *inode);
+
+typedef int32_t (*cbk_release_t)(xlator_t *this, fd_t *fd);
+
+typedef int32_t (*cbk_invalidate_t)(xlator_t *this, inode_t *inode);
+
+typedef int32_t (*cbk_client_t)(xlator_t *this, client_t *client);
+
+typedef void (*cbk_ictxmerge_t)(xlator_t *this, fd_t *fd, inode_t *inode,
+ inode_t *linked_inode);
+
+typedef size_t (*cbk_inodectx_size_t)(xlator_t *this, inode_t *inode);
+
+typedef size_t (*cbk_fdctx_size_t)(xlator_t *this, fd_t *fd);
+
+typedef void (*cbk_fdclose_t)(xlator_t *this, fd_t *fd);
+
+struct xlator_cbks {
+ cbk_forget_t forget;
+ cbk_release_t release;
+ cbk_release_t releasedir;
+ cbk_invalidate_t invalidate;
+ cbk_client_t client_destroy;
+ cbk_client_t client_disconnect;
+ cbk_ictxmerge_t ictxmerge;
+ cbk_inodectx_size_t ictxsize;
+ cbk_fdctx_size_t fdctxsize;
+ cbk_fdclose_t fdclose;
+ cbk_fdclose_t fdclosedir;
+};
+
+typedef int32_t (*dumpop_priv_t)(xlator_t *this);
+
+typedef int32_t (*dumpop_inode_t)(xlator_t *this);
+
+typedef int32_t (*dumpop_fd_t)(xlator_t *this);
+
+typedef int32_t (*dumpop_inodectx_t)(xlator_t *this, inode_t *ino);
+
+typedef int32_t (*dumpop_fdctx_t)(xlator_t *this, fd_t *fd);
+
+typedef int32_t (*dumpop_priv_to_dict_t)(xlator_t *this, dict_t *dict,
+ char *brickname);
+
+typedef int32_t (*dumpop_inode_to_dict_t)(xlator_t *this, dict_t *dict);
+
+typedef int32_t (*dumpop_fd_to_dict_t)(xlator_t *this, dict_t *dict);
+
+typedef int32_t (*dumpop_inodectx_to_dict_t)(xlator_t *this, inode_t *ino,
+ dict_t *dict);
+
+typedef int32_t (*dumpop_fdctx_to_dict_t)(xlator_t *this, fd_t *fd,
+ dict_t *dict);
+
+typedef int32_t (*dumpop_eh_t)(xlator_t *this);
+
+struct xlator_dumpops {
+ dumpop_priv_t priv;
+ dumpop_inode_t inode;
+ dumpop_fd_t fd;
+ dumpop_inodectx_t inodectx;
+ dumpop_fdctx_t fdctx;
+ dumpop_priv_to_dict_t priv_to_dict;
+ dumpop_inode_to_dict_t inode_to_dict;
+ dumpop_fd_to_dict_t fd_to_dict;
+ dumpop_inodectx_to_dict_t inodectx_to_dict;
+ dumpop_fdctx_to_dict_t fdctx_to_dict;
+ dumpop_eh_t history;
+};
+
+typedef struct xlator_list {
+ xlator_t *xlator;
+ struct xlator_list *next;
+} xlator_list_t;
+
+typedef struct fop_metrics {
+ gf_atomic_t fop;
+ gf_atomic_t cbk; /* only updaed when there is failure */
+} fop_metrics_t;
+
+struct _xlator {
+ /* Built during parsing */
+ char *name;
+ char *type;
+ char *instance_name; /* Used for multi NFSd */
+ xlator_t *next;
+ xlator_t *prev;
+ xlator_list_t *parents;
+ xlator_list_t *children;
+ dict_t *options;
+
+ /* Set after doing dlopen() */
+ void *dlhandle;
+ struct xlator_fops *fops;
+ struct xlator_cbks *cbks;
+ struct xlator_dumpops *dumpops;
+ struct list_head volume_options; /* list of volume_option_t */
+
+ void (*fini)(xlator_t *this);
+ int32_t (*init)(xlator_t *this);
+ int32_t (*reconfigure)(xlator_t *this, dict_t *options);
+ int32_t (*mem_acct_init)(xlator_t *this);
+ int32_t (*dump_metrics)(xlator_t *this, int fd);
+
+ event_notify_fn_t notify;
+
+ gf_loglevel_t loglevel; /* Log level for translator */
+
+ struct {
+ struct {
+ /* for latency measurement */
+ fop_metrics_t metrics[GF_FOP_MAXVALUE];
+
+ gf_atomic_t count;
+ } total;
+
+ struct {
+ /* for latency measurement */
+ gf_latency_t latencies[GF_FOP_MAXVALUE];
+ /* for latency measurement */
+ fop_metrics_t metrics[GF_FOP_MAXVALUE];
+
+ gf_atomic_t count;
+ } interval;
+ } stats;
+
+ /* Misc */
+ eh_t *history; /* event history context */
+ glusterfs_ctx_t *ctx;
+ glusterfs_graph_t *graph; /* not set for fuse */
+ inode_table_t *itable;
+ char init_succeeded;
+ void *private;
+ struct mem_acct *mem_acct;
+ uint64_t winds;
+ char switched;
+
+ /* for the memory pool of 'frame->local' */
+ struct mem_pool *local_pool;
+ gf_boolean_t is_autoloaded;
+
+ /* Saved volfile ID (used for multiplexing) */
+ char *volfile_id;
+
+ /* Its used as an index to inode_ctx*/
+ uint32_t xl_id;
+
+ /* op_version: initialized in xlator code itself */
+ uint32_t op_version[GF_MAX_RELEASES];
+
+ /* flags: initialized in xlator code itself */
+ uint32_t flags;
+
+ /* id: unique, initialized in xlator code itself */
+ uint32_t id;
+
+ /* identifier: a full string which can unique identify the xlator */
+ char *identifier;
+
+ /* Is this pass_through? */
+ gf_boolean_t pass_through;
+ struct xlator_fops *pass_through_fops;
+
+ /* cleanup flag to avoid races during xlator cleanup */
+ uint32_t cleanup_starting;
+
+ /* flag to avoid recall of xlator_mem_cleanup for xame xlator */
+ uint32_t call_cleanup;
+
+ /* Flag to understand how this xlator is categorized */
+ gf_category_t category;
+
+ /* Variable to save xprt associated for detach brick */
+ gf_atomic_t xprtrefcnt;
+
+ /* Flag to notify got CHILD_DOWN event for detach brick */
+ uint32_t notify_down;
+
+ /* Flag to avoid throw duplicate PARENT_DOWN event */
+ uint32_t parent_down;
+};
+
+/* This would be the only structure which needs to be exported by
+ the translators. For the backward compatibility, in 4.x series
+ even the old exported fields will be supported */
+/* XXX: This struct is in use by GD2, and hence SHOULD NOT be modified.
+ * If the struct must be modified, see instructions at the comment with
+ * GD2MARKER below.
+ */
+typedef struct {
+ /* op_version: will be used by volume generation logic to figure
+ out whether to insert it in graph or no, based on cluster's
+ operating version.
+ default value: 0, which means good to insert always */
+ uint32_t op_version[GF_MAX_RELEASES];
+
+ /* flags: will be used by volume generation logic to optimize the
+ placements etc.
+ default value: 0, which means don't treat it specially */
+ uint32_t flags;
+
+ /* xlator_id: unique per xlator. make sure to have no collission
+ in this ID */
+ uint32_t xlator_id;
+
+ /* identifier: a string constant */
+ char *identifier;
+
+ /* struct options: if the translator takes any 'options' from the
+ volume file, then that should be defined here. optional. */
+ volume_option_t *options;
+
+ /* Flag to understand how this xlator is categorized */
+ gf_category_t category;
+
+ /* XXX: GD2MARKER
+ * If a new member that needs to be visible to GD2 is introduced,
+ * add it above this comment.
+ * Any other new members need to be added below this comment, or at the
+ * end of the struct
+ */
+
+ /* init(): mandatory method, will be called during the
+ graph initialization */
+ int32_t (*init)(xlator_t *this);
+
+ /* fini(): optional method, will be initialized to default
+ method which would just free the 'xlator->private' variable.
+ This method is called when the graph is no more in use, and
+ is being destroyed. Also when SIGTERM is received */
+ void (*fini)(xlator_t *this);
+
+ /* reconfigure(): optional method, will be initialized to default
+ method in case not provided by xlator. This method is called
+ when there are only option changes in xlator, and no graph change.
+ eg., a 'gluster volume set' command */
+ int32_t (*reconfigure)(xlator_t *this, dict_t *options);
+
+ /* mem_acct_init(): used for memory accounting inside of the xlator.
+ optional. called during translator initialization */
+ int32_t (*mem_acct_init)(xlator_t *this);
+
+ /* dump_metrics(): used for providing internal metrics. optional */
+ int32_t (*dump_metrics)(xlator_t *this, int fd);
+
+ /* notify(): used for handling the notification of events from either
+ the parent or child in the graph. optional. */
+ event_notify_fn_t notify;
+
+ /* struct fops: mandatory. provides all the filesystem operations
+ methods of the xlator */
+ struct xlator_fops *fops;
+ /* struct cbks: optional. provides methods to handle
+ inode forgets, and fd releases */
+ struct xlator_cbks *cbks;
+
+ /* dumpops: a structure again, with methods to dump the details.
+ optional. */
+ struct xlator_dumpops *dumpops;
+
+ /* struct pass_through_fops: optional. provides all the filesystem
+ operations which should be used if the xlator is marked as pass_through
+ */
+ /* by default, the default_fops would be used */
+ struct xlator_fops *pass_through_fops;
+} xlator_api_t;
+
+#define xlator_has_parent(xl) (xl->parents != NULL)
+
+#define XLATOR_NOTIFY(ret, _xl, params...) \
+ do { \
+ xlator_t *_old_THIS = NULL; \
+ \
+ _old_THIS = THIS; \
+ THIS = _xl; \
+ \
+ ret = _xl->notify(_xl, params); \
+ \
+ THIS = _old_THIS; \
+ } while (0);
+
+int32_t
+xlator_set_type_virtual(xlator_t *xl, const char *type);
+
+int32_t
+xlator_set_type(xlator_t *xl, const char *type);
+
+int32_t
+xlator_dynload(xlator_t *xl);
+
+xlator_t *
+file_to_xlator_tree(glusterfs_ctx_t *ctx, FILE *fp);
+
+int
+xlator_notify(xlator_t *this, int32_t event, void *data, ...);
+int
+xlator_init(xlator_t *this);
+int
+xlator_destroy(xlator_t *xl);
+
+int32_t
+xlator_tree_init(xlator_t *xl);
+int32_t
+xlator_tree_free_members(xlator_t *xl);
+int32_t
+xlator_tree_free_memacct(xlator_t *xl);
+
+void
+xlator_tree_fini(xlator_t *xl);
+
+void
+xlator_foreach(xlator_t *this, void (*fn)(xlator_t *each, void *data),
+ void *data);
+
+void
+xlator_foreach_depth_first(xlator_t *this,
+ void (*fn)(xlator_t *each, void *data), void *data);
+
+xlator_t *
+xlator_search_by_name(xlator_t *any, const char *name);
+xlator_t *
+get_xlator_by_name(xlator_t *this, char *target);
+xlator_t *
+get_xlator_by_type(xlator_t *this, char *target);
+
+void
+xlator_set_inode_lru_limit(xlator_t *this, void *data);
+
+void
+inode_destroy_notify(inode_t *inode, const char *xlname);
+
+int
+loc_copy(loc_t *dst, loc_t *src);
+int
+loc_copy_overload_parent(loc_t *dst, loc_t *src, inode_t *parent);
+#define loc_dup(src, dst) loc_copy(dst, src)
+void
+loc_wipe(loc_t *loc);
+int
+loc_path(loc_t *loc, const char *bname);
+void
+loc_gfid(loc_t *loc, uuid_t gfid);
+void
+loc_pargfid(loc_t *loc, uuid_t pargfid);
+char *
+loc_gfid_utoa(loc_t *loc);
+gf_boolean_t
+loc_is_root(loc_t *loc);
+int32_t
+loc_build_child(loc_t *child, loc_t *parent, char *name);
+gf_boolean_t
+loc_is_nameless(loc_t *loc);
+int
+xlator_mem_acct_init(xlator_t *xl, int num_types);
+void
+xlator_mem_acct_unref(struct mem_acct *mem_acct);
+int
+is_gf_log_command(xlator_t *trans, const char *name, char *value, size_t size);
+int
+glusterd_check_log_level(const char *value);
+int
+xlator_volopt_dynload(char *xlator_type, void **dl_handle,
+ volume_opt_list_t *vol_opt_handle);
+enum gf_hdsk_event_notify_op {
+ GF_EN_DEFRAG_STATUS,
+ GF_EN_MAX,
+};
+gf_boolean_t
+is_graph_topology_equal(glusterfs_graph_t *graph1, glusterfs_graph_t *graph2);
+int
+glusterfs_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx);
+
+int
+gf_volfile_reconfigure(int oldvollen, FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ const char *oldvolfile);
+
+int
+loc_touchup(loc_t *loc, const char *name);
+
+int
+glusterfs_leaf_position(xlator_t *tgt);
+
+int
+glusterfs_reachable_leaves(xlator_t *base, dict_t *leaves);
+
+int
+xlator_subvolume_count(xlator_t *this);
+
+void
+xlator_init_lock(void);
+void
+xlator_init_unlock(void);
+int
+copy_opts_to_child(xlator_t *src, xlator_t *dst, char *glob);
+
+int
+glusterfs_delete_volfile_checksum(glusterfs_ctx_t *ctx, const char *volfile_id);
+int
+xlator_memrec_free(xlator_t *xl);
+
+void
+xlator_mem_cleanup(xlator_t *this);
+
+void
+handle_default_options(xlator_t *xl, dict_t *options);
+
+void
+gluster_graph_take_reference(xlator_t *tree);
+
+gf_boolean_t
+mgmt_is_multiplexed_daemon(char *name);
+
+gf_boolean_t
+xlator_is_cleanup_starting(xlator_t *this);
+int
+graph_total_client_xlator(glusterfs_graph_t *graph);
+#endif /* _XLATOR_H */
diff --git a/libglusterfs/src/graph-print.c b/libglusterfs/src/graph-print.c
index 66712986496..595d74330a1 100644
--- a/libglusterfs/src/graph-print.c
+++ b/libglusterfs/src/graph-print.c
@@ -8,181 +8,128 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <sys/uio.h>
-#include "common-utils.h"
-#include "xlator.h"
-#include "graph-utils.h"
-
-
+#include "glusterfs/common-utils.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/graph-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
struct gf_printer {
- ssize_t (*write) (struct gf_printer *gp, char *buf, size_t len);
- void *priv;
+ ssize_t (*write)(struct gf_printer *gp, char *buf, size_t len);
+ void *priv;
+ int len;
};
static ssize_t
-gp_write_file (struct gf_printer *gp, char *buf, size_t len)
+gp_write_file(struct gf_printer *gp, char *buf, size_t len)
{
- FILE *f = gp->priv;
+ FILE *f = gp->priv;
- if (fwrite (buf, len, 1, f) != 1) {
- gf_log ("graph-print", GF_LOG_ERROR, "fwrite failed (%s)",
- strerror (errno));
+ if (fwrite(buf, len, 1, f) != 1) {
+ gf_msg("graph-print", GF_LOG_ERROR, errno, LG_MSG_FWRITE_FAILED,
+ "fwrite failed");
- return -1;
- }
+ return -1;
+ }
- return len;
+ return len;
}
-static ssize_t
-gp_write_buf (struct gf_printer *gp, char *buf, size_t len)
+static int
+gpprintf(struct gf_printer *gp, const char *format, ...)
{
- struct iovec *iov = gp->priv;
+ va_list arg;
+ char *str = NULL;
+ int ret = 0;
- if (iov->iov_len < len) {
- gf_log ("graph-print", GF_LOG_ERROR, "buffer full");
+ va_start(arg, format);
+ ret = gf_vasprintf(&str, format, arg);
+ va_end(arg);
- return -1;
- }
+ if (ret < 0)
+ return ret;
- memcpy (iov->iov_base, buf, len);
- iov->iov_base += len;
- iov->iov_len -= len;
+ ret = gp->write(gp, str, ret);
- return len;
+ GF_FREE(str);
+
+ return ret;
}
+#define GPPRINTF(gp, fmt, ...) \
+ do { \
+ ret = gpprintf(gp, fmt, ##__VA_ARGS__); \
+ if (ret == -1) \
+ goto out; \
+ else \
+ gp->len += ret; \
+ } while (0)
+
static int
-gpprintf (struct gf_printer *gp, const char *format, ...)
+_print_volume_options(dict_t *d, char *k, data_t *v, void *tmp)
{
- va_list arg;
- char *str = NULL;
- int ret = 0;
+ struct gf_printer *gp = tmp;
+ int ret = 0;
+ GPPRINTF(gp, " option %s %s\n", k, v->data);
+ return 0;
+out:
+ /* means, it is a failure */
+ return -1;
+}
- va_start (arg, format);
- ret = gf_vasprintf (&str, format, arg);
- va_end (arg);
+static int
+glusterfs_graph_print(struct gf_printer *gp, glusterfs_graph_t *graph)
+{
+ xlator_t *trav = NULL;
+ xlator_list_t *xch = NULL;
+ int ret = 0;
+ ssize_t len = 0;
- if (ret < 0)
- return ret;
+ if (!graph->first)
+ return 0;
- ret = gp->write (gp, str, ret);
+ for (trav = graph->first; trav->next; trav = trav->next)
+ ;
+ for (; trav; trav = trav->prev) {
+ GPPRINTF(gp, "volume %s\n type %s\n", trav->name, trav->type);
- GF_FREE (str);
+ ret = dict_foreach(trav->options, _print_volume_options, gp);
+ if (ret)
+ goto out;
- return ret;
-}
+ if (trav->children) {
+ GPPRINTF(gp, " subvolumes");
-static int
-glusterfs_graph_print (struct gf_printer *gp, glusterfs_graph_t *graph)
-{
-#define GPPRINTF(gp, fmt, ...) do { \
- ret = gpprintf (gp, fmt, ## __VA_ARGS__); \
- if (ret == -1) \
- goto out; \
- else \
- len += ret; \
- } while (0)
-
- xlator_t *trav = NULL;
- data_pair_t *pair = NULL;
- xlator_list_t *xch = NULL;
- int ret = 0;
- ssize_t len = 0;
-
- if (!graph->first)
- return 0;
-
- for (trav = graph->first; trav->next; trav = trav->next);
- for (; trav; trav = trav->prev) {
- GPPRINTF (gp, "volume %s\n type %s\n", trav->name,
- trav->type);
-
- for (pair = trav->options->members_list; pair && pair->next;
- pair = pair->next);
- for (; pair; pair = pair->prev)
- GPPRINTF (gp, " option %s %s\n", pair->key,
- pair->value->data);
-
- if (trav->children) {
- GPPRINTF (gp, " subvolumes");
-
- for (xch = trav->children; xch; xch = xch->next)
- GPPRINTF (gp, " %s", xch->xlator->name);
-
- GPPRINTF (gp, "\n");
- }
-
- GPPRINTF (gp, "end-volume\n");
- if (trav != graph->first)
- GPPRINTF (gp, "\n");
+ for (xch = trav->children; xch; xch = xch->next)
+ GPPRINTF(gp, " %s", xch->xlator->name);
+
+ GPPRINTF(gp, "\n");
}
+ GPPRINTF(gp, "end-volume\n");
+ if (trav != graph->first)
+ GPPRINTF(gp, "\n");
+ }
+
out:
- if (ret == -1) {
- gf_log ("graph-print", GF_LOG_ERROR, "printing failed");
+ len = gp->len;
+ if (ret == -1) {
+ gf_msg("graph-print", GF_LOG_ERROR, 0, LG_MSG_PRINT_FAILED,
+ "printing failed");
- return -1;
- }
+ return -1;
+ }
- return len;
+ return len;
#undef GPPRINTF
}
int
-glusterfs_graph_print_file (FILE *file, glusterfs_graph_t *graph)
+glusterfs_graph_print_file(FILE *file, glusterfs_graph_t *graph)
{
- struct gf_printer gp = { .write = gp_write_file,
- .priv = file
- };
-
- return glusterfs_graph_print (&gp, graph);
-}
-
-char *
-glusterfs_graph_print_buf (glusterfs_graph_t *graph)
-{
- FILE *f = NULL;
- struct iovec iov = {0,};
- int len = 0;
- char *buf = NULL;
- struct gf_printer gp = { .write = gp_write_buf,
- .priv = &iov
- };
-
- f = fopen ("/dev/null", "a");
- if (!f) {
- gf_log ("graph-print", GF_LOG_ERROR,
- "cannot open /dev/null (%s)", strerror (errno));
-
- return NULL;
- }
- len = glusterfs_graph_print_file (f, graph);
- fclose (f);
- if (len == -1)
- return NULL;
-
- buf = GF_CALLOC (1, len + 1, gf_common_mt_graph_buf);
- if (!buf) {
- return NULL;
- }
- iov.iov_base = buf;
- iov.iov_len = len;
-
- len = glusterfs_graph_print (&gp, graph);
- if (len == -1) {
- GF_FREE (buf);
-
- return NULL;
- }
+ struct gf_printer gp = {.write = gp_write_file, .priv = file};
- return buf;
+ return glusterfs_graph_print(&gp, graph);
}
diff --git a/libglusterfs/src/graph.c b/libglusterfs/src/graph.c
index a42ae7cd7fa..13f298eb3bd 100644
--- a/libglusterfs/src/graph.c
+++ b/libglusterfs/src/graph.c
@@ -8,19 +8,32 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include <dlfcn.h>
-#include <netdb.h>
-#include <fnmatch.h>
-#include "defaults.h"
-
-
-
+#include <stdint.h> // for uint32_t
+#include <sys/time.h> // for timeval
+#include <errno.h> // for EIO, errno, EINVAL, ENOMEM
+#include <fnmatch.h> // for fnmatch, FNM_NOESCAPE
+#include <openssl/sha.h> // for SHA256_DIGEST_LENGTH
+#include <regex.h> // for regmatch_t, regcomp
+#include <stdio.h> // for fclose, fopen, snprintf
+#include <stdlib.h> // for NULL, atoi, mkstemp
+#include <string.h> // for strcmp, strerror, memcpy
+#include <strings.h> // for rindex
+#include <sys/stat.h> // for stat
+#include <sys/time.h> // for gettimeofday
+#include <unistd.h> // for gethostname, getpid
+#include "glusterfs/common-utils.h" // for gf_strncpy, gf_time_fmt
+#include "glusterfs/defaults.h"
+#include "glusterfs/dict.h" // for dict_foreach, dict_set_...
+#include "glusterfs/globals.h" // for xlator_t, xlator_list_t
+#include "glusterfs/glusterfs.h" // for glusterfs_graph_t, glus...
+#include "glusterfs/glusterfs-fops.h" // for GF_EVENT_GRAPH_NEW, GF_...
+#include "glusterfs/libglusterfs-messages.h" // for LG_MSG_GRAPH_ERROR, LG_...
+#include "glusterfs/list.h" // for list_add, list_del_init
+#include "glusterfs/logging.h" // for gf_msg, GF_LOG_ERROR
+#include "glusterfs/mem-pool.h" // for GF_FREE, gf_strdup, GF_...
+#include "glusterfs/mem-types.h" // for gf_common_mt_xlator_list_t
+#include "glusterfs/options.h" // for xlator_tree_reconfigure
+#include "glusterfs/syscall.h" // for sys_close, sys_stat
#if 0
static void
@@ -28,7 +41,7 @@ _gf_dump_details (int argc, char **argv)
{
extern FILE *gf_log_logfile;
int i = 0;
- char timestr[64];
+ char timestr[GF_TIMESTR_SIZE];
time_t utime = 0;
pid_t mypid = 0;
struct utsname uname_buf = {{0, }, };
@@ -71,461 +84,1797 @@ _gf_dump_details (int argc, char **argv)
}
#endif
+int
+glusterfs_read_secure_access_file(void)
+{
+ FILE *fp = NULL;
+ char line[100] = {
+ 0,
+ };
+ int cert_depth = 1; /* Default SSL CERT DEPTH */
+ regex_t regcmpl;
+ char *key = {"^option transport.socket.ssl-cert-depth"};
+ char keyval[50] = {
+ 0,
+ };
+ int start = 0, end = 0, copy_len = 0;
+ regmatch_t result[1] = {{0}};
+
+ fp = fopen(SECURE_ACCESS_FILE, "r");
+ if (!fp)
+ goto out;
+
+ /* Check if any line matches with key */
+ while (fgets(line, sizeof(line), fp) != NULL) {
+ if (regcomp(&regcmpl, key, REG_EXTENDED)) {
+ goto out;
+ }
+ if (!regexec(&regcmpl, line, 1, result, 0)) {
+ start = result[0].rm_so;
+ end = result[0].rm_eo;
+ copy_len = end - start;
+ gf_strncpy(keyval, line + copy_len, sizeof(keyval));
+ if (keyval[0]) {
+ cert_depth = atoi(keyval);
+ if (cert_depth == 0)
+ cert_depth = 1; /* Default SSL CERT DEPTH */
+ break;
+ }
+ }
+ regfree(&regcmpl);
+ }
+out:
+ if (fp)
+ fclose(fp);
+ return cert_depth;
+}
-int
-glusterfs_xlator_link (xlator_t *pxl, xlator_t *cxl)
+xlator_t *
+glusterfs_get_last_xlator(glusterfs_graph_t *graph)
{
- xlator_list_t *xlchild = NULL;
- xlator_list_t *xlparent = NULL;
- xlator_list_t **tmp = NULL;
+ xlator_t *trav = graph->first;
+ if (!trav)
+ return NULL;
- xlparent = (void *) GF_CALLOC (1, sizeof (*xlparent),
- gf_common_mt_xlator_list_t);
- if (!xlparent)
- return -1;
+ while (trav->next)
+ trav = trav->next;
- xlchild = (void *) GF_CALLOC (1, sizeof (*xlchild),
- gf_common_mt_xlator_list_t);
- if (!xlchild) {
- GF_FREE (xlparent);
+ return trav;
+}
- return -1;
+xlator_t *
+glusterfs_mux_xlator_unlink(xlator_t *pxl, xlator_t *cxl)
+{
+ xlator_list_t *unlink = NULL;
+ xlator_list_t *prev = NULL;
+ xlator_list_t **tmp = NULL;
+ xlator_t *next_child = NULL;
+ xlator_t *xl = NULL;
+
+ for (tmp = &pxl->children; *tmp; tmp = &(*tmp)->next) {
+ if ((*tmp)->xlator == cxl) {
+ unlink = *tmp;
+ *tmp = (*tmp)->next;
+ if (*tmp)
+ next_child = (*tmp)->xlator;
+ break;
}
+ prev = *tmp;
+ }
- xlparent->xlator = pxl;
- for (tmp = &cxl->parents; *tmp; tmp = &(*tmp)->next);
- *tmp = xlparent;
+ if (!prev)
+ xl = pxl;
+ else if (prev->xlator)
+ xl = prev->xlator->graph->last_xl;
- xlchild->xlator = cxl;
- for (tmp = &pxl->children; *tmp; tmp = &(*tmp)->next);
- *tmp = xlchild;
+ if (xl)
+ xl->next = next_child;
+ if (next_child)
+ next_child->prev = xl;
- return 0;
+ GF_FREE(unlink);
+ return next_child;
}
+int
+glusterfs_xlator_link(xlator_t *pxl, xlator_t *cxl)
+{
+ xlator_list_t *xlchild = NULL;
+ xlator_list_t *xlparent = NULL;
+ xlator_list_t **tmp = NULL;
+
+ xlparent = (void *)GF_CALLOC(1, sizeof(*xlparent),
+ gf_common_mt_xlator_list_t);
+ if (!xlparent)
+ return -1;
+
+ xlchild = (void *)GF_CALLOC(1, sizeof(*xlchild),
+ gf_common_mt_xlator_list_t);
+ if (!xlchild) {
+ GF_FREE(xlparent);
+
+ return -1;
+ }
+
+ xlparent->xlator = pxl;
+ for (tmp = &cxl->parents; *tmp; tmp = &(*tmp)->next)
+ ;
+ *tmp = xlparent;
+
+ xlchild->xlator = cxl;
+ for (tmp = &pxl->children; *tmp; tmp = &(*tmp)->next)
+ ;
+ *tmp = xlchild;
+
+ return 0;
+}
void
-glusterfs_graph_set_first (glusterfs_graph_t *graph, xlator_t *xl)
+glusterfs_graph_set_first(glusterfs_graph_t *graph, xlator_t *xl)
{
- xl->next = graph->first;
- if (graph->first)
- ((xlator_t *)graph->first)->prev = xl;
- graph->first = xl;
+ xl->next = graph->first;
+ if (graph->first)
+ ((xlator_t *)graph->first)->prev = xl;
+ graph->first = xl;
- graph->xl_count++;
+ graph->xl_count++;
+ xl->xl_id = graph->xl_count;
}
-
int
-glusterfs_graph_insert (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx,
- const char *type, const char *name)
+glusterfs_graph_insert(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx,
+ const char *type, const char *name,
+ gf_boolean_t autoload)
{
- xlator_t *ixl = NULL;
+ xlator_t *ixl = NULL;
- if (!ctx->master) {
- gf_log ("glusterfs", GF_LOG_ERROR,
- "volume \"%s\" can be added from command line only "
- "on client side", type);
+ if (!ctx->master) {
+ gf_msg("glusterfs", GF_LOG_ERROR, 0, LG_MSG_VOLUME_ERROR,
+ "volume \"%s\" can be added from command line only "
+ "on client side",
+ type);
- return -1;
- }
+ return -1;
+ }
- ixl = GF_CALLOC (1, sizeof (*ixl), gf_common_mt_xlator_t);
- if (!ixl)
- return -1;
-
- ixl->ctx = ctx;
- ixl->graph = graph;
- ixl->options = get_new_dict ();
- if (!ixl->options)
- goto err;
-
- ixl->name = gf_strdup (name);
- if (!ixl->name)
- goto err;
-
- if (xlator_set_type (ixl, type) == -1) {
- gf_log ("glusterfs", GF_LOG_ERROR,
- "%s (%s) initialization failed",
- name, type);
- return -1;
- }
+ ixl = GF_CALLOC(1, sizeof(*ixl), gf_common_mt_xlator_t);
+ if (!ixl)
+ return -1;
- if (glusterfs_xlator_link (ixl, graph->top) == -1)
- goto err;
- glusterfs_graph_set_first (graph, ixl);
- graph->top = ixl;
+ ixl->ctx = ctx;
+ ixl->graph = graph;
+ ixl->options = dict_new();
+ if (!ixl->options)
+ goto err;
- return 0;
-err:
- xlator_destroy (ixl);
+ ixl->name = gf_strdup(name);
+ if (!ixl->name)
+ goto err;
+
+ ixl->is_autoloaded = autoload;
+
+ if (xlator_set_type(ixl, type) == -1) {
+ gf_msg("glusterfs", GF_LOG_ERROR, 0, LG_MSG_INIT_FAILED,
+ "%s (%s) initialization failed", name, type);
return -1;
+ }
+
+ if (glusterfs_xlator_link(ixl, graph->top) == -1)
+ goto err;
+ glusterfs_graph_set_first(graph, ixl);
+ graph->top = ixl;
+
+ return 0;
+err:
+ xlator_destroy(ixl);
+ return -1;
}
int
-glusterfs_graph_acl (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+glusterfs_graph_acl(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
{
- int ret = 0;
- cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
- cmd_args = &ctx->cmd_args;
+ cmd_args = &ctx->cmd_args;
- if (!cmd_args->acl)
- return 0;
+ if (!cmd_args->acl)
+ return 0;
- ret = glusterfs_graph_insert (graph, ctx, "system/posix-acl",
- "posix-acl-autoload");
- return ret;
+ ret = glusterfs_graph_insert(graph, ctx, "system/posix-acl",
+ "posix-acl-autoload", 1);
+ return ret;
}
int
-glusterfs_graph_worm (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+glusterfs_graph_worm(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
{
- int ret = 0;
- cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
- cmd_args = &ctx->cmd_args;
+ cmd_args = &ctx->cmd_args;
- if (!cmd_args->worm)
- return 0;
+ if (!cmd_args->worm)
+ return 0;
- ret = glusterfs_graph_insert (graph, ctx, "features/worm",
- "worm-autoload");
- return ret;
+ ret = glusterfs_graph_insert(graph, ctx, "features/worm", "worm-autoload",
+ 1);
+ return ret;
}
int
-glusterfs_graph_mac_compat (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+glusterfs_graph_meta(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
{
- int ret = 0;
- cmd_args_t *cmd_args = NULL;
+ int ret = 0;
- cmd_args = &ctx->cmd_args;
+ if (!ctx->master)
+ return 0;
- if (cmd_args->mac_compat == GF_OPTION_DISABLE)
- return 0;
+ ret = glusterfs_graph_insert(graph, ctx, "meta", "meta-autoload", 1);
+ return ret;
+}
- ret = glusterfs_graph_insert (graph, ctx, "features/mac-compat",
- "mac-compat-autoload");
+int
+glusterfs_graph_mac_compat(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+{
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
- return ret;
+ cmd_args = &ctx->cmd_args;
+
+ if (cmd_args->mac_compat == GF_OPTION_DISABLE)
+ return 0;
+
+ ret = glusterfs_graph_insert(graph, ctx, "features/mac-compat",
+ "mac-compat-autoload", 1);
+
+ return ret;
}
+int
+glusterfs_graph_gfid_access(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+{
+ int ret = 0;
+ cmd_args_t *cmd_args = NULL;
+
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args->aux_gfid_mount)
+ return 0;
+
+ ret = glusterfs_graph_insert(graph, ctx, "features/gfid-access",
+ "gfid-access-autoload", 1);
+ return ret;
+}
static void
-gf_add_cmdline_options (glusterfs_graph_t *graph, cmd_args_t *cmd_args)
-{
- int ret = 0;
- xlator_t *trav = NULL;
- xlator_cmdline_option_t *cmd_option = NULL;
-
- trav = graph->first;
-
- while (trav) {
- list_for_each_entry (cmd_option,
- &cmd_args->xlator_options, cmd_args) {
- if (!fnmatch (cmd_option->volume,
- trav->name, FNM_NOESCAPE)) {
- ret = dict_set_str (trav->options,
- cmd_option->key,
- cmd_option->value);
- if (ret == 0) {
- gf_log (trav->name, GF_LOG_INFO,
- "adding option '%s' for "
- "volume '%s' with value '%s'",
- cmd_option->key, trav->name,
- cmd_option->value);
- } else {
- gf_log (trav->name, GF_LOG_WARNING,
- "adding option '%s' for "
- "volume '%s' failed: %s",
- cmd_option->key, trav->name,
- strerror (-ret));
- }
- }
+gf_add_cmdline_options(glusterfs_graph_t *graph, cmd_args_t *cmd_args)
+{
+ int ret = 0;
+ xlator_t *trav = NULL;
+ xlator_cmdline_option_t *cmd_option = NULL;
+
+ trav = graph->first;
+
+ while (trav) {
+ list_for_each_entry(cmd_option, &cmd_args->xlator_options, cmd_args)
+ {
+ if (!fnmatch(cmd_option->volume, trav->name, FNM_NOESCAPE)) {
+ ret = dict_set_str(trav->options, cmd_option->key,
+ cmd_option->value);
+ if (ret == 0) {
+ gf_msg(trav->name, GF_LOG_TRACE, 0, LG_MSG_VOL_OPTION_ADD,
+ "adding option '%s' for "
+ "volume '%s' with value '%s'",
+ cmd_option->key, trav->name, cmd_option->value);
+ } else {
+ gf_msg(trav->name, GF_LOG_WARNING, -ret,
+ LG_MSG_VOL_OPTION_ADD,
+ "adding option '%s' for "
+ "volume '%s' failed",
+ cmd_option->key, trav->name);
}
- trav = trav->next;
+ }
}
+ trav = trav->next;
+ }
}
-
int
-glusterfs_graph_validate_options (glusterfs_graph_t *graph)
+glusterfs_graph_validate_options(glusterfs_graph_t *graph)
{
- xlator_t *trav = NULL;
- int ret = -1;
- char *errstr = NULL;
+ xlator_t *trav = NULL;
+ int ret = -1;
+ char *errstr = NULL;
- trav = graph->first;
+ trav = graph->first;
- while (trav) {
- if (list_empty (&trav->volume_options))
- continue;
+ while (trav) {
+ if (list_empty(&trav->volume_options)) {
+ trav = trav->next;
+ continue;
+ }
- ret = xlator_options_validate (trav, trav->options, &errstr);
- if (ret) {
- gf_log (trav->name, GF_LOG_ERROR,
- "validation failed: %s", errstr);
- return ret;
- }
- trav = trav->next;
+ ret = xlator_options_validate(trav, trav->options, &errstr);
+ if (ret) {
+ gf_msg(trav->name, GF_LOG_ERROR, 0, LG_MSG_VALIDATION_FAILED,
+ "validation failed: "
+ "%s",
+ errstr);
+ return ret;
}
+ trav = trav->next;
+ }
- return 0;
+ return 0;
}
-
int
-glusterfs_graph_init (glusterfs_graph_t *graph)
+glusterfs_graph_init(glusterfs_graph_t *graph)
{
- xlator_t *trav = NULL;
- int ret = -1;
+ xlator_t *trav = NULL;
+ int ret = -1;
- trav = graph->first;
+ trav = graph->first;
- while (trav) {
- ret = xlator_init (trav);
- if (ret) {
- gf_log (trav->name, GF_LOG_ERROR,
- "initializing translator failed");
- return ret;
- }
- trav = trav->next;
+ while (trav) {
+ ret = xlator_init(trav);
+ if (ret) {
+ gf_msg(trav->name, GF_LOG_ERROR, 0, LG_MSG_TRANSLATOR_INIT_FAILED,
+ "initializing translator failed");
+ return ret;
}
+ trav = trav->next;
+ }
- return 0;
+ return 0;
}
+int
+glusterfs_graph_deactivate(glusterfs_graph_t *graph)
+{
+ xlator_t *top = NULL;
+
+ if (graph == NULL)
+ goto out;
-static void
-_log_if_unknown_option (dict_t *dict, char *key, data_t *value, void *data)
+ top = graph->top;
+ xlator_tree_fini(top);
+out:
+ return 0;
+}
+
+static int
+_log_if_unknown_option(dict_t *dict, char *key, data_t *value, void *data)
{
- volume_option_t *found = NULL;
- xlator_t *xl = NULL;
+ volume_option_t *found = NULL;
+ xlator_t *xl = NULL;
- xl = data;
+ xl = data;
- found = xlator_volume_option_get (xl, key);
+ found = xlator_volume_option_get(xl, key);
- if (!found) {
- gf_log (xl->name, GF_LOG_WARNING,
- "option '%s' is not recognized", key);
- }
+ if (!found) {
+ gf_msg(xl->name, GF_LOG_DEBUG, 0, LG_MSG_XLATOR_OPTION_INVALID,
+ "option '%s' is not recognized", key);
+ }
- return;
+ return 0;
}
-
static void
-_xlator_check_unknown_options (xlator_t *xl, void *data)
+_xlator_check_unknown_options(xlator_t *xl, void *data)
{
- dict_foreach (xl->options, _log_if_unknown_option, xl);
+ dict_foreach(xl->options, _log_if_unknown_option, xl);
}
-
-int
-glusterfs_graph_unknown_options (glusterfs_graph_t *graph)
+static int
+glusterfs_graph_unknown_options(glusterfs_graph_t *graph)
{
- xlator_foreach (graph->first, _xlator_check_unknown_options, NULL);
- return 0;
+ xlator_foreach(graph->first, _xlator_check_unknown_options, NULL);
+ return 0;
}
+static void
+fill_uuid(char *uuid, int size, struct timeval tv)
+{
+ char hostname[50] = {
+ 0,
+ };
+ char now_str[GF_TIMESTR_SIZE];
-void
-fill_uuid (char *uuid, int size)
+ if (gethostname(hostname, sizeof(hostname) - 1) != 0) {
+ gf_msg("graph", GF_LOG_ERROR, errno, LG_MSG_GETHOSTNAME_FAILED,
+ "gethostname failed");
+ hostname[sizeof(hostname) - 1] = '\0';
+ }
+
+ gf_time_fmt_tv(now_str, sizeof now_str, &tv, gf_timefmt_dirent);
+ snprintf(uuid, size, "%s-%d-%s", hostname, getpid(), now_str);
+
+ return;
+}
+
+static int
+glusterfs_graph_settop(glusterfs_graph_t *graph, char *volume_name,
+ gf_boolean_t exact_match)
{
- char hostname[256] = {0,};
- struct timeval tv = {0,};
- char now_str[64];
+ int ret = -1;
+ xlator_t *trav = NULL;
- if (gettimeofday (&tv, NULL) == -1) {
- gf_log ("graph", GF_LOG_ERROR,
- "gettimeofday: failed %s",
- strerror (errno));
+ if (!volume_name || !exact_match) {
+ graph->top = graph->first;
+ ret = 0;
+ } else {
+ for (trav = graph->first; trav; trav = trav->next) {
+ if (strcmp(trav->name, volume_name) == 0) {
+ graph->top = trav;
+ ret = 0;
+ break;
+ }
}
+ }
+
+ return ret;
+}
+
+int
+glusterfs_graph_parent_up(glusterfs_graph_t *graph)
+{
+ xlator_t *trav = NULL;
+ int ret = -1;
+
+ trav = graph->first;
- if (gethostname (hostname, 256) == -1) {
- gf_log ("graph", GF_LOG_ERROR,
- "gethostname: failed %s",
- strerror (errno));
+ while (trav) {
+ if (!xlator_has_parent(trav)) {
+ ret = xlator_notify(trav, GF_EVENT_PARENT_UP, trav);
}
- gf_time_fmt (now_str, sizeof now_str, tv.tv_sec, gf_timefmt_Ymd_T);
- snprintf (uuid, size, "%s-%d-%s:%"GF_PRI_SUSECONDS,
- hostname, getpid(), now_str, tv.tv_usec);
+ if (ret)
+ break;
- return;
-}
+ trav = trav->next;
+ }
+ return ret;
+}
int
-glusterfs_graph_settop (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+glusterfs_graph_prepare(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx,
+ char *volume_name)
{
- const char *volume_name = NULL;
- xlator_t *trav = NULL;
+ xlator_t *trav = NULL;
+ int ret = 0;
+
+ /* XXX: CHECKSUM */
+
+ /* XXX: attach to -n volname */
+ /* A '/' in the volume name suggests brick multiplexing is used, find
+ * the top of the (sub)graph. The volname MUST match the subvol in this
+ * case. In other cases (like for gfapi) the default top for the
+ * (sub)graph is ok. */
+ if (!volume_name) {
+ /* GlusterD does not pass a volume_name */
+ ret = glusterfs_graph_settop(graph, volume_name, _gf_false);
+ } else if (strncmp(volume_name, "/snaps/", 7) == 0) {
+ /* snap shots have their top xlator named like "/snaps/..." */
+ ret = glusterfs_graph_settop(graph, volume_name, _gf_false);
+ } else if (volume_name[0] == '/') {
+ /* brick multiplexing passes the brick path */
+ ret = glusterfs_graph_settop(graph, volume_name, _gf_true);
+ } else {
+ ret = glusterfs_graph_settop(graph, volume_name, _gf_false);
+ }
+
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_ERROR,
+ "glusterfs graph settop failed");
+ errno = EINVAL;
+ return -1;
+ }
- volume_name = ctx->cmd_args.volume_name;
+ /* XXX: WORM VOLUME */
+ ret = glusterfs_graph_worm(graph, ctx);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_GRAPH_ERROR,
+ "glusterfs graph worm failed");
+ return -1;
+ }
+ ret = glusterfs_graph_acl(graph, ctx);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_GRAPH_ERROR,
+ "glusterfs graph ACL failed");
+ return -1;
+ }
- if (!volume_name) {
- graph->top = graph->first;
- return 0;
- }
+ /* XXX: MAC COMPAT */
+ ret = glusterfs_graph_mac_compat(graph, ctx);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_GRAPH_ERROR,
+ "glusterfs graph mac compat failed");
+ return -1;
+ }
- for (trav = graph->first; trav; trav = trav->next) {
- if (strcmp (trav->name, volume_name) == 0) {
- graph->top = trav;
- return 0;
- }
- }
+ /* XXX: gfid-access */
+ ret = glusterfs_graph_gfid_access(graph, ctx);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_GRAPH_ERROR,
+ "glusterfs graph 'gfid-access' failed");
+ return -1;
+ }
+ /* XXX: topmost xlator */
+ ret = glusterfs_graph_meta(graph, ctx);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_GRAPH_ERROR,
+ "glusterfs graph meta failed");
return -1;
+ }
+
+ /* XXX: this->ctx setting */
+ for (trav = graph->first; trav; trav = trav->next) {
+ trav->ctx = ctx;
+ }
+
+ /* XXX: DOB setting */
+ gettimeofday(&graph->dob, NULL);
+
+ fill_uuid(graph->graph_uuid, sizeof(graph->graph_uuid), graph->dob);
+
+ graph->id = ctx->graph_id++;
+
+ /* XXX: --xlator-option additions */
+ gf_add_cmdline_options(graph, &ctx->cmd_args);
+
+ return 0;
+}
+
+static xlator_t *
+glusterfs_root(glusterfs_graph_t *graph)
+{
+ return graph->first;
+}
+
+static int
+glusterfs_is_leaf(xlator_t *xl)
+{
+ int ret = 0;
+
+ if (!xl->children)
+ ret = 1;
+
+ return ret;
+}
+
+static uint32_t
+glusterfs_count_leaves(xlator_t *xl)
+{
+ int n = 0;
+ xlator_list_t *list = NULL;
+
+ if (glusterfs_is_leaf(xl))
+ n = 1;
+ else
+ for (list = xl->children; list; list = list->next)
+ n += glusterfs_count_leaves(list->xlator);
+
+ return n;
}
+int
+glusterfs_get_leaf_count(glusterfs_graph_t *graph)
+{
+ return graph->leaf_count;
+}
+
+static int
+_glusterfs_leaf_position(xlator_t *tgt, int *id, xlator_t *xl)
+{
+ xlator_list_t *list = NULL;
+ int found = 0;
+
+ if (xl == tgt)
+ found = 1;
+ else if (glusterfs_is_leaf(xl))
+ *id += 1;
+ else
+ for (list = xl->children; !found && list; list = list->next)
+ found = _glusterfs_leaf_position(tgt, id, list->xlator);
+
+ return found;
+}
int
-glusterfs_graph_parent_up (glusterfs_graph_t *graph)
+glusterfs_leaf_position(xlator_t *tgt)
{
- xlator_t *trav = NULL;
- int ret = -1;
+ xlator_t *root = NULL;
+ int pos = 0;
- trav = graph->first;
+ root = glusterfs_root(tgt->graph);
- while (trav) {
- if (!xlator_has_parent (trav)) {
- ret = xlator_notify (trav, GF_EVENT_PARENT_UP, trav);
- }
+ if (!_glusterfs_leaf_position(tgt, &pos, root))
+ pos = -1;
+
+ return pos;
+}
+
+static int
+_glusterfs_reachable_leaves(xlator_t *base, xlator_t *xl, dict_t *leaves)
+{
+ xlator_list_t *list = NULL;
+ int err = 1;
+ int pos = 0;
+ char *strpos = NULL;
+
+ if (glusterfs_is_leaf(xl)) {
+ pos = glusterfs_leaf_position(xl);
+ if (pos < 0)
+ goto out;
- if (ret)
- break;
+ err = gf_asprintf(&strpos, "%d", pos);
- trav = trav->next;
+ if (err >= 0) {
+ err = dict_set_static_ptr(leaves, strpos, base);
+ GF_FREE(strpos);
}
+ } else {
+ for (err = 0, list = xl->children; !err && list; list = list->next)
+ err = _glusterfs_reachable_leaves(base, list->xlator, leaves);
+ }
- return ret;
+out:
+ return err;
}
+/*
+ * This function determines which leaves are children (or grandchildren)
+ * of the given base. The base may have multiple sub volumes. Each sub
+ * volumes in turn may have sub volumes.. until the leaves are reached.
+ * Each leaf is numbered 1,2,3,...etc.
+ *
+ * The base translator calls this function to see which of *its* subvolumes
+ * it would forward an FOP to, to *get to* a particular leaf.
+ * That information is built into the "leaves" dictionary.
+ * key:destination leaf# -> value:base subvolume xlator.
+ */
int
-glusterfs_graph_prepare (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+glusterfs_reachable_leaves(xlator_t *base, dict_t *leaves)
{
- xlator_t *trav = NULL;
- int ret = 0;
+ xlator_list_t *list = NULL;
+ int err = 0;
- /* XXX: CHECKSUM */
+ for (list = base->children; !err && list; list = list->next)
+ err = _glusterfs_reachable_leaves(list->xlator, list->xlator, leaves);
- /* XXX: attach to -n volname */
- ret = glusterfs_graph_settop (graph, ctx);
- if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "glusterfs graph settop failed");
- return -1;
- }
+ return err;
+}
- /* XXX: WORM VOLUME */
- ret = glusterfs_graph_worm (graph, ctx);
- if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "glusterfs graph worm failed");
- return -1;
- }
- ret = glusterfs_graph_acl (graph, ctx);
+int
+glusterfs_graph_activate(glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+{
+ int ret = 0;
+ xlator_t *root = NULL;
+
+ root = glusterfs_root(graph);
+
+ graph->leaf_count = glusterfs_count_leaves(root);
+
+ /* XXX: all xlator options validation */
+ ret = glusterfs_graph_validate_options(graph);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_VALIDATION_FAILED,
+ "validate options failed");
+ return ret;
+ }
+
+ /* XXX: perform init () */
+ ret = glusterfs_graph_init(graph);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_GRAPH_INIT_FAILED,
+ "init failed");
+ return ret;
+ }
+
+ ret = glusterfs_graph_unknown_options(graph);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_UNKNOWN_OPTIONS_FAILED,
+ "unknown options "
+ "failed");
+ return ret;
+ }
+
+ /* XXX: log full graph (_gf_dump_details) */
+
+ list_add(&graph->list, &ctx->graphs);
+ ctx->active = graph;
+
+ /* XXX: attach to master and set active pointer */
+ if (ctx->master) {
+ ret = xlator_notify(ctx->master, GF_EVENT_GRAPH_NEW, graph);
if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "glusterfs graph ACL failed");
- return -1;
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_EVENT_NOTIFY_FAILED,
+ "graph new notification failed");
+ return ret;
}
+ ((xlator_t *)ctx->master)->next = graph->top;
+ }
+
+ /* XXX: perform parent up */
+ ret = glusterfs_graph_parent_up(graph);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_EVENT_NOTIFY_FAILED,
+ "parent up notification failed");
+ return ret;
+ }
+
+ return 0;
+}
+
+int
+xlator_equal_rec(xlator_t *xl1, xlator_t *xl2)
+{
+ xlator_list_t *trav1 = NULL;
+ xlator_list_t *trav2 = NULL;
+ int ret = 0;
+
+ if (xl1 == NULL || xl2 == NULL) {
+ gf_msg_debug("xlator", 0, "invalid argument");
+ return -1;
+ }
+
+ trav1 = xl1->children;
+ trav2 = xl2->children;
- /* XXX: MAC COMPAT */
- ret = glusterfs_graph_mac_compat (graph, ctx);
+ while (trav1 && trav2) {
+ ret = xlator_equal_rec(trav1->xlator, trav2->xlator);
if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "glusterfs graph mac compat failed");
- return -1;
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "xlators children "
+ "not equal");
+ goto out;
}
- /* XXX: this->ctx setting */
- for (trav = graph->first; trav; trav = trav->next) {
- trav->ctx = ctx;
- }
+ trav1 = trav1->next;
+ trav2 = trav2->next;
+ }
+
+ if (trav1 || trav2) {
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(xl1->name, xl2->name)) {
+ ret = -1;
+ goto out;
+ }
+
+ /* type could have changed even if xlator names match,
+ e.g cluster/distribute and cluster/nufa share the same
+ xlator name
+ */
+ if (strcmp(xl1->type, xl2->type)) {
+ ret = -1;
+ goto out;
+ }
+out:
+ return ret;
+}
- /* XXX: DOB setting */
- gettimeofday (&graph->dob, NULL);
+gf_boolean_t
+is_graph_topology_equal(glusterfs_graph_t *graph1, glusterfs_graph_t *graph2)
+{
+ xlator_t *trav1 = NULL;
+ xlator_t *trav2 = NULL;
+ gf_boolean_t ret = _gf_true;
+ xlator_list_t *ltrav;
+
+ trav1 = graph1->first;
+ trav2 = graph2->first;
+
+ if (strcmp(trav2->type, "protocol/server") == 0) {
+ trav2 = trav2->children->xlator;
+ for (ltrav = trav1->children; ltrav; ltrav = ltrav->next) {
+ trav1 = ltrav->xlator;
+ if (!trav1->cleanup_starting && !strcmp(trav1->name, trav2->name)) {
+ break;
+ }
+ }
+ if (!ltrav) {
+ return _gf_false;
+ }
+ }
- fill_uuid (graph->graph_uuid, 128);
+ ret = xlator_equal_rec(trav1, trav2);
- graph->id = ctx->graph_id++;
+ if (ret) {
+ gf_msg_debug("glusterfsd-mgmt", 0, "graphs are not equal");
+ ret = _gf_false;
+ goto out;
+ }
- /* XXX: --xlator-option additions */
- gf_add_cmdline_options (graph, &ctx->cmd_args);
+ ret = _gf_true;
+ gf_msg_debug("glusterfsd-mgmt", 0, "graphs are equal");
+out:
+ return ret;
+}
- return 0;
+/* Function has 3types of return value 0, -ve , 1
+ * return 0 =======> reconfiguration of options has succeeded
+ * return 1 =======> the graph has to be reconstructed and all the
+ * xlators should be inited return -1(or -ve) =======> Some Internal Error
+ * occurred during the operation
+ */
+int
+glusterfs_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx)
+{
+ glusterfs_graph_t *oldvolfile_graph = NULL;
+ glusterfs_graph_t *newvolfile_graph = NULL;
+
+ int ret = -1;
+
+ if (!ctx) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL,
+ "ctx is NULL");
+ goto out;
+ }
+
+ oldvolfile_graph = ctx->active;
+ if (!oldvolfile_graph) {
+ ret = 1;
+ goto out;
+ }
+
+ newvolfile_graph = glusterfs_graph_construct(newvolfile_fp);
+
+ if (!newvolfile_graph) {
+ goto out;
+ }
+
+ glusterfs_graph_prepare(newvolfile_graph, ctx, ctx->cmd_args.volume_name);
+
+ if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) {
+ ret = 1;
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Graph topology not "
+ "equal(should call INIT)");
+ goto out;
+ }
+
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Only options have changed in the"
+ " new graph");
+
+ ret = glusterfs_graph_reconfigure(oldvolfile_graph, newvolfile_graph);
+ if (ret) {
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Could not reconfigure "
+ "new options in old graph");
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ if (newvolfile_graph)
+ glusterfs_graph_destroy(newvolfile_graph);
+
+ return ret;
}
+/* This function need to remove. This added to support gfapi volfile
+ * reconfigure.
+ */
int
-glusterfs_graph_activate (glusterfs_graph_t *graph, glusterfs_ctx_t *ctx)
+gf_volfile_reconfigure(int oldvollen, FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ const char *oldvolfile)
{
- int ret = 0;
+ glusterfs_graph_t *oldvolfile_graph = NULL;
+ glusterfs_graph_t *newvolfile_graph = NULL;
+ FILE *oldvolfile_fp = NULL;
+ /*Since the function mkstemp() replaces XXXXXX,
+ * assigning it to a variable
+ */
+ char temp_file[] = "/tmp/temp_vol_file_XXXXXX";
+ gf_boolean_t active_graph_found = _gf_true;
+
+ int ret = -1;
+ int u_ret = -1;
+ int file_desc = -1;
+
+ if (!oldvollen) {
+ ret = 1; // Has to call INIT for the whole graph
+ goto out;
+ }
+
+ if (!ctx) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL,
+ "ctx is NULL");
+ goto out;
+ }
+
+ oldvolfile_graph = ctx->active;
+ if (!oldvolfile_graph) {
+ active_graph_found = _gf_false;
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_ACTIVE_GRAPH_NULL,
+ "glusterfs_ctx->active is NULL");
+
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+ file_desc = mkstemp(temp_file);
+ if (file_desc < 0) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, errno,
+ LG_MSG_TMPFILE_CREATE_FAILED,
+ "Unable to "
+ "create temporary volfile");
+ goto out;
+ }
- /* XXX: all xlator options validation */
- ret = glusterfs_graph_validate_options (graph);
- if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "validate options failed");
- return ret;
+ /*Calling unlink so that when the file is closed or program
+ *terminates the tempfile is deleted.
+ */
+ u_ret = sys_unlink(temp_file);
+
+ if (u_ret < 0) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, errno,
+ LG_MSG_TMPFILE_DELETE_FAILED,
+ "Temporary file"
+ " delete failed.");
+ sys_close(file_desc);
+ goto out;
}
- /* XXX: perform init () */
- ret = glusterfs_graph_init (graph);
- if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "init failed");
- return ret;
+ oldvolfile_fp = fdopen(file_desc, "w+b");
+ if (!oldvolfile_fp)
+ goto out;
+
+ fwrite(oldvolfile, oldvollen, 1, oldvolfile_fp);
+ fflush(oldvolfile_fp);
+ if (ferror(oldvolfile_fp)) {
+ goto out;
}
- ret = glusterfs_graph_unknown_options (graph);
- if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "unknown options failed");
- return ret;
+ oldvolfile_graph = glusterfs_graph_construct(oldvolfile_fp);
+ if (!oldvolfile_graph)
+ goto out;
+ }
+
+ newvolfile_graph = glusterfs_graph_construct(newvolfile_fp);
+ if (!newvolfile_graph) {
+ goto out;
+ }
+
+ glusterfs_graph_prepare(newvolfile_graph, ctx, ctx->cmd_args.volume_name);
+
+ if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) {
+ ret = 1;
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Graph topology not "
+ "equal(should call INIT)");
+ goto out;
+ }
+
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Only options have changed in the"
+ " new graph");
+
+ /* */
+ ret = glusterfs_graph_reconfigure(oldvolfile_graph, newvolfile_graph);
+ if (ret) {
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Could not reconfigure "
+ "new options in old graph");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (oldvolfile_fp)
+ fclose(oldvolfile_fp);
+
+ /* Do not simply destroy the old graph here. If the oldgraph
+ is constructed here in this function itself instead of getting
+ it from ctx->active (which happens only of ctx->active is NULL),
+ then destroy the old graph. If some i/o is still happening in
+ the old graph and the old graph is obtained from ctx->active,
+ then destroying the graph will cause problems.
+ */
+ if (!active_graph_found && oldvolfile_graph)
+ glusterfs_graph_destroy(oldvolfile_graph);
+ if (newvolfile_graph)
+ glusterfs_graph_destroy(newvolfile_graph);
+
+ return ret;
+}
+
+int
+glusterfs_graph_reconfigure(glusterfs_graph_t *oldgraph,
+ glusterfs_graph_t *newgraph)
+{
+ xlator_t *old_xl = NULL;
+ xlator_t *new_xl = NULL;
+ xlator_list_t *trav;
+
+ GF_ASSERT(oldgraph);
+ GF_ASSERT(newgraph);
+
+ old_xl = oldgraph->first;
+ while (old_xl->is_autoloaded) {
+ old_xl = old_xl->children->xlator;
+ }
+
+ new_xl = newgraph->first;
+ while (new_xl->is_autoloaded) {
+ new_xl = new_xl->children->xlator;
+ }
+
+ if (strcmp(old_xl->type, "protocol/server") != 0) {
+ return xlator_tree_reconfigure(old_xl, new_xl);
+ }
+
+ /* Some options still need to be handled by the server translator. */
+ if (old_xl->reconfigure) {
+ old_xl->reconfigure(old_xl, new_xl->options);
+ }
+
+ (void)copy_opts_to_child(new_xl, FIRST_CHILD(new_xl), "*auth*");
+ new_xl = FIRST_CHILD(new_xl);
+
+ for (trav = old_xl->children; trav; trav = trav->next) {
+ if (!trav->xlator->cleanup_starting &&
+ !strcmp(trav->xlator->name, new_xl->name)) {
+ return xlator_tree_reconfigure(trav->xlator, new_xl);
}
+ }
- /* XXX: log full graph (_gf_dump_details) */
+ return -1;
+}
- list_add (&graph->list, &ctx->graphs);
- ctx->active = graph;
+int
+glusterfs_graph_destroy_residual(glusterfs_graph_t *graph)
+{
+ int ret = -1;
- /* XXX: attach to master and set active pointer */
- if (ctx->master)
- ret = xlator_notify (ctx->master, GF_EVENT_GRAPH_NEW, graph);
- if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "graph new notification failed");
- return ret;
+ if (graph == NULL)
+ return ret;
+
+ ret = xlator_tree_free_memacct(graph->first);
+
+ list_del_init(&graph->list);
+ pthread_mutex_destroy(&graph->mutex);
+ pthread_cond_destroy(&graph->child_down_cond);
+ GF_FREE(graph);
+
+ return ret;
+}
+
+/* This function destroys all the xlator members except for the
+ * xlator strcuture and its mem accounting field.
+ *
+ * If otherwise, it would destroy the master xlator object as well
+ * its mem accounting, which would mean after calling glusterfs_graph_destroy()
+ * there cannot be any reference to GF_FREE() from the master xlator, this is
+ * not possible because of the following dependencies:
+ * - glusterfs_ctx_t will have mem pools allocated by the master xlators
+ * - xlator objects will have references to those mem pools(g: dict)
+ *
+ * Ordering the freeing in any of the order will also not solve the dependency:
+ * - Freeing xlator objects(including memory accounting) before mem pools
+ * destruction will mean not use GF_FREE while destroying mem pools.
+ * - Freeing mem pools and then destroying xlator objects would lead to crashes
+ * when xlator tries to unref dict or other mem pool objects.
+ *
+ * Hence the way chosen out of this interdependency is to split xlator object
+ * free into two stages:
+ * - Free all the xlator members excpet for its mem accounting structure
+ * - Free all the mem accouting structures of xlator along with the xlator
+ * object itself.
+ */
+int
+glusterfs_graph_destroy(glusterfs_graph_t *graph)
+{
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("graph", graph, out);
+
+ ret = xlator_tree_free_members(graph->first);
+
+ ret = glusterfs_graph_destroy_residual(graph);
+out:
+ return ret;
+}
+
+int
+glusterfs_graph_fini(glusterfs_graph_t *graph)
+{
+ xlator_t *trav = NULL;
+
+ trav = graph->first;
+
+ while (trav) {
+ if (trav->init_succeeded) {
+ trav->cleanup_starting = 1;
+ trav->fini(trav);
+ if (trav->local_pool) {
+ mem_pool_destroy(trav->local_pool);
+ trav->local_pool = NULL;
+ }
+ if (trav->itable) {
+ inode_table_destroy(trav->itable);
+ trav->itable = NULL;
+ }
+ trav->init_succeeded = 0;
}
+ trav = trav->next;
+ }
- /* XXX: perform parent up */
- ret = glusterfs_graph_parent_up (graph);
- if (ret) {
- gf_log ("graph", GF_LOG_ERROR, "parent up notification failed");
- return ret;
+ return 0;
+}
+
+int
+glusterfs_graph_attach(glusterfs_graph_t *orig_graph, char *path,
+ glusterfs_graph_t **newgraph)
+{
+ xlator_t *this = THIS;
+ FILE *fp;
+ glusterfs_graph_t *graph;
+ xlator_t *xl;
+ char *volfile_id = NULL;
+ char *volfile_content = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ size_t file_len = -1;
+ gf_volfile_t *volfile_obj = NULL;
+ int ret = -1;
+ char sha256_hash[SHA256_DIGEST_LENGTH] = {
+ 0,
+ };
+
+ if (!orig_graph) {
+ return -EINVAL;
+ }
+
+ ret = sys_stat(path, &stbuf);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_ERROR, "Unable to stat %s (%s)", path,
+ strerror(errno));
+ return -EINVAL;
+ }
+
+ file_len = stbuf.st_size;
+ volfile_content = GF_MALLOC(file_len + 1, gf_common_mt_char);
+ if (!volfile_content)
+ return -ENOMEM;
+
+ fp = fopen(path, "r");
+ if (!fp) {
+ gf_log(THIS->name, GF_LOG_WARNING, "oops, %s disappeared on us", path);
+ GF_FREE(volfile_content);
+ return -EIO;
+ }
+
+ ret = fread(volfile_content, sizeof(char), file_len, fp);
+ if (ret == file_len) {
+ glusterfs_compute_sha256((const unsigned char *)volfile_content,
+ file_len, sha256_hash);
+ } else {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "read failed on path %s. File size=%" GF_PRI_SIZET
+ "read size=%d",
+ path, file_len, ret);
+ GF_FREE(volfile_content);
+ fclose(fp);
+ return -EIO;
+ }
+
+ GF_FREE(volfile_content);
+
+ graph = glusterfs_graph_construct(fp);
+ fclose(fp);
+ if (!graph) {
+ gf_log(this->name, GF_LOG_WARNING, "could not create graph from %s",
+ path);
+ return -EIO;
+ }
+
+ /*
+ * If there's a server translator on top, we want whatever's below
+ * that.
+ */
+ xl = graph->first;
+ if (strcmp(xl->type, "protocol/server") == 0) {
+ (void)copy_opts_to_child(xl, FIRST_CHILD(xl), "*auth*");
+ xl = FIRST_CHILD(xl);
+ }
+ graph->first = xl;
+ *newgraph = graph;
+
+ volfile_id = strstr(path, "/snaps/");
+ if (!volfile_id) {
+ volfile_id = rindex(path, '/');
+ if (volfile_id) {
+ ++volfile_id;
}
+ }
+ if (volfile_id) {
+ xl->volfile_id = gf_strdup(volfile_id);
+ /* There's a stray ".vol" at the end. */
+ xl->volfile_id[strlen(xl->volfile_id) - 4] = '\0';
+ }
+
+ /* TODO memory leaks everywhere need to free graph in case of error */
+ if (glusterfs_graph_prepare(graph, this->ctx, xl->name)) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "failed to prepare graph for xlator %s", xl->name);
+ return -EIO;
+ } else if (glusterfs_graph_init(graph)) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "failed to initialize graph for xlator %s", xl->name);
+ return -EIO;
+ } else if (glusterfs_xlator_link(orig_graph->top, graph->top)) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "failed to link the graphs for xlator %s ", xl->name);
+ return -EIO;
+ }
+
+ if (!volfile_obj) {
+ volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t);
+ if (!volfile_obj) {
+ return -EIO;
+ }
+ }
+
+ INIT_LIST_HEAD(&volfile_obj->volfile_list);
+ snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s",
+ xl->volfile_id);
+ memcpy(volfile_obj->volfile_checksum, sha256_hash,
+ sizeof(volfile_obj->volfile_checksum));
+ list_add(&volfile_obj->volfile_list, &this->ctx->volfile_list);
+ return 0;
+}
+int
+glusterfs_muxsvc_cleanup_parent(glusterfs_ctx_t *ctx,
+ glusterfs_graph_t *parent_graph)
+{
+ if (parent_graph) {
+ if (parent_graph->first) {
+ xlator_destroy(parent_graph->first);
+ }
+ ctx->active = NULL;
+ GF_FREE(parent_graph);
+ parent_graph = NULL;
+ }
+ return 0;
+}
+
+void *
+glusterfs_graph_cleanup(void *arg)
+{
+ glusterfs_graph_t *graph = NULL;
+ glusterfs_ctx_t *ctx = THIS->ctx;
+ int ret = -1;
+ graph = arg;
+
+ if (!graph)
+ return NULL;
+
+ /* To destroy the graph, fitst sent a GF_EVENT_PARENT_DOWN
+ * Then wait for GF_EVENT_CHILD_DOWN to get on the top
+ * xl. Once we have GF_EVENT_CHILD_DOWN event, then proceed
+ * to fini.
+ *
+ * During fini call, this will take a last unref on rpc and
+ * rpc_transport_object.
+ */
+ if (graph->first)
+ default_notify(graph->first, GF_EVENT_PARENT_DOWN, graph->first);
+
+ ret = pthread_mutex_lock(&graph->mutex);
+ if (ret != 0) {
+ gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED,
+ "Failed to acquire a lock");
+ goto out;
+ }
+ /* check and wait for CHILD_DOWN for top xlator*/
+ while (graph->used) {
+ ret = pthread_cond_wait(&graph->child_down_cond, &graph->mutex);
+ if (ret != 0)
+ gf_msg("glusterfs", GF_LOG_INFO, 0, LG_MSG_GRAPH_CLEANUP_FAILED,
+ "cond wait failed ");
+ }
+
+ ret = pthread_mutex_unlock(&graph->mutex);
+ if (ret != 0) {
+ gf_msg("glusterfs", GF_LOG_ERROR, EAGAIN, LG_MSG_GRAPH_CLEANUP_FAILED,
+ "Failed to release a lock");
+ }
+
+ /* Though we got a child down on top xlator, we have to wait until
+ * all the notifier to exit. Because there should not be any threads
+ * that access xl variables.
+ */
+ pthread_mutex_lock(&ctx->notify_lock);
+ {
+ while (ctx->notifying)
+ pthread_cond_wait(&ctx->notify_cond, &ctx->notify_lock);
+ }
+ pthread_mutex_unlock(&ctx->notify_lock);
+
+ pthread_mutex_lock(&ctx->cleanup_lock);
+ {
+ glusterfs_graph_fini(graph);
+ glusterfs_graph_destroy(graph);
+ }
+ pthread_mutex_unlock(&ctx->cleanup_lock);
+out:
+ return NULL;
+}
+
+glusterfs_graph_t *
+glusterfs_muxsvc_setup_parent_graph(glusterfs_ctx_t *ctx, char *name,
+ char *type)
+{
+ glusterfs_graph_t *parent_graph = NULL;
+ xlator_t *ixl = NULL;
+ int ret = -1;
+ parent_graph = GF_CALLOC(1, sizeof(*parent_graph),
+ gf_common_mt_glusterfs_graph_t);
+ if (!parent_graph)
+ goto out;
+
+ INIT_LIST_HEAD(&parent_graph->list);
+
+ ctx->active = parent_graph;
+ ixl = GF_CALLOC(1, sizeof(*ixl), gf_common_mt_xlator_t);
+ if (!ixl)
+ goto out;
+
+ ixl->ctx = ctx;
+ ixl->graph = parent_graph;
+ ixl->options = dict_new();
+ if (!ixl->options)
+ goto out;
+
+ ixl->name = gf_strdup(name);
+ if (!ixl->name)
+ goto out;
+
+ ixl->is_autoloaded = 1;
+
+ if (xlator_set_type(ixl, type) == -1) {
+ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED,
+ "%s (%s) set type failed", name, type);
+ goto out;
+ }
+
+ glusterfs_graph_set_first(parent_graph, ixl);
+ parent_graph->top = ixl;
+ ixl = NULL;
+
+ gettimeofday(&parent_graph->dob, NULL);
+ fill_uuid(parent_graph->graph_uuid, 128, parent_graph->dob);
+ parent_graph->id = ctx->graph_id++;
+ ret = 0;
+out:
+ if (ixl)
+ xlator_destroy(ixl);
+
+ if (ret) {
+ glusterfs_muxsvc_cleanup_parent(ctx, parent_graph);
+ parent_graph = NULL;
+ }
+ return parent_graph;
+}
+
+int
+glusterfs_svc_mux_pidfile_cleanup(gf_volfile_t *volfile_obj)
+{
+ if (!volfile_obj || !volfile_obj->pidfp)
return 0;
+
+ gf_msg_trace("glusterfsd", 0, "pidfile %s cleanup", volfile_obj->vol_id);
+
+ lockf(fileno(volfile_obj->pidfp), F_ULOCK, 0);
+ fclose(volfile_obj->pidfp);
+ volfile_obj->pidfp = NULL;
+
+ return 0;
}
int
-glusterfs_graph_reconfigure (glusterfs_graph_t *oldgraph,
- glusterfs_graph_t *newgraph)
+glusterfs_process_svc_detach(glusterfs_ctx_t *ctx, gf_volfile_t *volfile_obj)
{
- xlator_t *old_xl = NULL;
- xlator_t *new_xl = NULL;
+ xlator_t *last_xl = NULL;
+ glusterfs_graph_t *graph = NULL;
+ glusterfs_graph_t *parent_graph = NULL;
+ pthread_t clean_graph = {
+ 0,
+ };
+ int ret = -1;
+ xlator_t *xl = NULL;
+
+ if (!ctx || !ctx->active || !volfile_obj)
+ goto out;
+
+ pthread_mutex_lock(&ctx->cleanup_lock);
+ {
+ parent_graph = ctx->active;
+ graph = volfile_obj->graph;
+ if (!graph)
+ goto unlock;
+ if (graph->first)
+ xl = graph->first;
+
+ last_xl = graph->last_xl;
+ if (last_xl)
+ last_xl->next = NULL;
+ if (!xl || xl->cleanup_starting)
+ goto unlock;
+
+ xl->cleanup_starting = 1;
+ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_DETACH_STARTED,
+ "detaching child %s", volfile_obj->vol_id);
+
+ list_del_init(&volfile_obj->volfile_list);
+ glusterfs_mux_xlator_unlink(parent_graph->top, xl);
+ glusterfs_svc_mux_pidfile_cleanup(volfile_obj);
+ parent_graph->last_xl = glusterfs_get_last_xlator(parent_graph);
+ parent_graph->xl_count -= graph->xl_count;
+ parent_graph->leaf_count -= graph->leaf_count;
+ parent_graph->id++;
+ ret = 0;
+ }
+unlock:
+ pthread_mutex_unlock(&ctx->cleanup_lock);
+out:
+ if (!ret) {
+ list_del_init(&volfile_obj->volfile_list);
+ if (graph) {
+ ret = gf_thread_create_detached(
+ &clean_graph, glusterfs_graph_cleanup, graph, "graph_clean");
+ if (ret) {
+ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL,
+ LG_MSG_GRAPH_CLEANUP_FAILED,
+ "%s failed to create clean "
+ "up thread",
+ volfile_obj->vol_id);
+ ret = 0;
+ }
+ }
+ GF_FREE(volfile_obj);
+ }
+ return ret;
+}
- GF_ASSERT (oldgraph);
- GF_ASSERT (newgraph);
+int
+glusterfs_svc_mux_pidfile_setup(gf_volfile_t *volfile_obj, const char *pid_file)
+{
+ int ret = -1;
+ FILE *pidfp = NULL;
+
+ if (!pid_file || !volfile_obj)
+ goto out;
+
+ if (volfile_obj->pidfp) {
+ ret = 0;
+ goto out;
+ }
+ pidfp = fopen(pid_file, "a+");
+ if (!pidfp) {
+ goto out;
+ }
+ volfile_obj->pidfp = pidfp;
+
+ ret = lockf(fileno(pidfp), F_TLOCK, 0);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+out:
+ return ret;
+}
- old_xl = oldgraph->first;
- new_xl = newgraph->first;
+int
+glusterfs_svc_mux_pidfile_update(gf_volfile_t *volfile_obj,
+ const char *pid_file, pid_t pid)
+{
+ int ret = 0;
+ FILE *pidfp = NULL;
+ int old_pid;
+
+ if (!volfile_obj->pidfp) {
+ ret = glusterfs_svc_mux_pidfile_setup(volfile_obj, pid_file);
+ if (ret == -1)
+ goto out;
+ }
+ pidfp = volfile_obj->pidfp;
+ ret = fscanf(pidfp, "%d", &old_pid);
+ if (ret <= 0) {
+ goto update;
+ }
+ if (old_pid == pid) {
+ ret = 0;
+ goto out;
+ } else {
+ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED,
+ "Old pid=%d found in pidfile %s. Cleaning the old pid and "
+ "Updating new pid=%d",
+ old_pid, pid_file, pid);
+ }
+update:
+ ret = sys_ftruncate(fileno(pidfp), 0);
+ if (ret) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, errno,
+ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED,
+ "pidfile %s truncation failed", pid_file);
+ goto out;
+ }
+
+ ret = fprintf(pidfp, "%d\n", pid);
+ if (ret <= 0) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, errno,
+ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, "pidfile %s write failed",
+ pid_file);
+ goto out;
+ }
+
+ ret = fflush(pidfp);
+ if (ret) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, errno,
+ LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED, "pidfile %s write failed",
+ pid_file);
+ goto out;
+ }
+out:
+ return ret;
+}
- return xlator_tree_reconfigure (old_xl, new_xl);
+int
+glusterfs_update_mux_pid(dict_t *dict, gf_volfile_t *volfile_obj)
+{
+ char *file = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("graph", dict, out);
+ GF_VALIDATE_OR_GOTO("graph", volfile_obj, out);
+
+ ret = dict_get_str(dict, "pidfile", &file);
+ if (ret < 0) {
+ gf_msg("mgmt", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED,
+ "Failed to get pidfile from dict for volfile_id=%s",
+ volfile_obj->vol_id);
+ }
+
+ ret = glusterfs_svc_mux_pidfile_update(volfile_obj, file, getpid());
+ if (ret < 0) {
+ ret = -1;
+ gf_msg("mgmt", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_SETUP_FAILED,
+ "Failed to update "
+ "the pidfile for volfile_id=%s",
+ volfile_obj->vol_id);
+
+ goto out;
+ }
+
+ if (ret == 1)
+ gf_msg("mgmt", GF_LOG_INFO, 0, LG_MSG_GRAPH_ATTACH_PID_FILE_UPDATED,
+ "PID %d updated in pidfile=%s", getpid(), file);
+ ret = 0;
+out:
+ return ret;
+}
+int
+glusterfs_process_svc_attach_volfp(glusterfs_ctx_t *ctx, FILE *fp,
+ char *volfile_id, char *checksum,
+ dict_t *dict)
+{
+ glusterfs_graph_t *graph = NULL;
+ glusterfs_graph_t *parent_graph = NULL;
+ glusterfs_graph_t *clean_graph = NULL;
+ int ret = -1;
+ xlator_t *xl = NULL;
+ xlator_t *last_xl = NULL;
+ gf_volfile_t *volfile_obj = NULL;
+ pthread_t thread_id = {
+ 0,
+ };
+
+ if (!ctx)
+ goto out;
+ parent_graph = ctx->active;
+ graph = glusterfs_graph_construct(fp);
+ if (!graph) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
+ "failed to construct the graph");
+ goto out;
+ }
+ graph->parent_down = 0;
+ graph->last_xl = glusterfs_get_last_xlator(graph);
+
+ for (xl = graph->first; xl; xl = xl->next) {
+ if (strcmp(xl->type, "mount/fuse") == 0) {
+ gf_msg("glusterfsd", GF_LOG_ERROR, EINVAL,
+ LG_MSG_GRAPH_ATTACH_FAILED,
+ "fuse xlator cannot be specified in volume file");
+ goto out;
+ }
+ }
+
+ graph->leaf_count = glusterfs_count_leaves(glusterfs_root(graph));
+ xl = graph->first;
+ /* TODO memory leaks everywhere need to free graph in case of error */
+ if (glusterfs_graph_prepare(graph, ctx, xl->name)) {
+ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
+ "failed to prepare graph for xlator %s", xl->name);
+ ret = -1;
+ goto out;
+ } else if (glusterfs_graph_init(graph)) {
+ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
+ "failed to initialize graph for xlator %s", xl->name);
+ ret = -1;
+ goto out;
+ } else if (glusterfs_graph_parent_up(graph)) {
+ gf_msg("glusterfsd", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_ATTACH_FAILED,
+ "failed to link the graphs for xlator %s ", xl->name);
+ ret = -1;
+ goto out;
+ }
+
+ if (!parent_graph) {
+ parent_graph = glusterfs_muxsvc_setup_parent_graph(ctx, "glustershd",
+ "debug/io-stats");
+ if (!parent_graph)
+ goto out;
+ ((xlator_t *)parent_graph->top)->next = xl;
+ clean_graph = parent_graph;
+ } else {
+ last_xl = parent_graph->last_xl;
+ if (last_xl)
+ last_xl->next = xl;
+ xl->prev = last_xl;
+ }
+ parent_graph->last_xl = graph->last_xl;
+
+ ret = glusterfs_xlator_link(parent_graph->top, xl);
+ if (ret) {
+ gf_msg("graph", GF_LOG_ERROR, 0, LG_MSG_EVENT_NOTIFY_FAILED,
+ "parent up notification failed");
+ goto out;
+ }
+ parent_graph->xl_count += graph->xl_count;
+ parent_graph->leaf_count += graph->leaf_count;
+ parent_graph->id++;
+
+ volfile_obj = GF_CALLOC(1, sizeof(gf_volfile_t), gf_common_volfile_t);
+ if (!volfile_obj) {
+ ret = -1;
+ goto out;
+ }
+ volfile_obj->pidfp = NULL;
+ snprintf(volfile_obj->vol_id, sizeof(volfile_obj->vol_id), "%s",
+ volfile_id);
+
+ if (strcmp(ctx->cmd_args.process_name, "glustershd") == 0) {
+ ret = glusterfs_update_mux_pid(dict, volfile_obj);
+ if (ret == -1) {
+ GF_FREE(volfile_obj);
+ goto out;
+ }
+ }
+
+ graph->used = 1;
+ parent_graph->id++;
+ list_add(&graph->list, &ctx->graphs);
+ INIT_LIST_HEAD(&volfile_obj->volfile_list);
+ volfile_obj->graph = graph;
+ memcpy(volfile_obj->volfile_checksum, checksum,
+ sizeof(volfile_obj->volfile_checksum));
+ list_add_tail(&volfile_obj->volfile_list, &ctx->volfile_list);
+ gf_log_dump_graph(fp, graph);
+ graph = NULL;
+
+ ret = 0;
+out:
+ if (ret) {
+ if (graph) {
+ gluster_graph_take_reference(graph->first);
+ ret = gf_thread_create_detached(&thread_id, glusterfs_graph_cleanup,
+ graph, "graph_clean");
+ if (ret) {
+ gf_msg("glusterfs", GF_LOG_ERROR, EINVAL,
+ LG_MSG_GRAPH_CLEANUP_FAILED,
+ "%s failed to create clean "
+ "up thread",
+ volfile_id);
+ ret = 0;
+ }
+ }
+ if (clean_graph)
+ glusterfs_muxsvc_cleanup_parent(ctx, clean_graph);
+ }
+ return ret;
}
int
-glusterfs_graph_destroy (glusterfs_graph_t *graph)
+glusterfs_mux_volfile_reconfigure(FILE *newvolfile_fp, glusterfs_ctx_t *ctx,
+ gf_volfile_t *volfile_obj, char *checksum,
+ dict_t *dict)
{
- return 0;
+ glusterfs_graph_t *oldvolfile_graph = NULL;
+ glusterfs_graph_t *newvolfile_graph = NULL;
+ char vol_id[NAME_MAX + 1];
+
+ int ret = -1;
+
+ if (!ctx) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL,
+ "ctx is NULL");
+ goto out;
+ }
+
+ /* Change the message id */
+ if (!volfile_obj) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, 0, LG_MSG_CTX_NULL,
+ "failed to get volfile object");
+ goto out;
+ }
+
+ oldvolfile_graph = volfile_obj->graph;
+ if (!oldvolfile_graph) {
+ goto out;
+ }
+
+ newvolfile_graph = glusterfs_graph_construct(newvolfile_fp);
+
+ if (!newvolfile_graph) {
+ goto out;
+ }
+ newvolfile_graph->last_xl = glusterfs_get_last_xlator(newvolfile_graph);
+
+ glusterfs_graph_prepare(newvolfile_graph, ctx, newvolfile_graph->first);
+
+ if (!is_graph_topology_equal(oldvolfile_graph, newvolfile_graph)) {
+ ret = snprintf(vol_id, sizeof(vol_id), "%s", volfile_obj->vol_id);
+ if (ret < 0)
+ goto out;
+ ret = glusterfs_process_svc_detach(ctx, volfile_obj);
+ if (ret) {
+ gf_msg("glusterfsd-mgmt", GF_LOG_ERROR, EINVAL,
+ LG_MSG_GRAPH_CLEANUP_FAILED,
+ "Could not detach "
+ "old graph. Aborting the reconfiguration operation");
+ goto out;
+ }
+ volfile_obj = NULL;
+ ret = glusterfs_process_svc_attach_volfp(ctx, newvolfile_fp, vol_id,
+ checksum, dict);
+ goto out;
+ }
+
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Only options have changed in the"
+ " new graph");
+
+ ret = glusterfs_graph_reconfigure(oldvolfile_graph, newvolfile_graph);
+ if (ret) {
+ gf_msg_debug("glusterfsd-mgmt", 0,
+ "Could not reconfigure "
+ "new options in old graph");
+ goto out;
+ }
+ memcpy(volfile_obj->volfile_checksum, checksum,
+ sizeof(volfile_obj->volfile_checksum));
+
+ ret = 0;
+out:
+
+ if (newvolfile_graph)
+ glusterfs_graph_destroy(newvolfile_graph);
+
+ return ret;
}
diff --git a/libglusterfs/src/graph.l b/libglusterfs/src/graph.l
index 04fff2582fb..b9d4b2b6828 100644
--- a/libglusterfs/src/graph.l
+++ b/libglusterfs/src/graph.l
@@ -1,60 +1,40 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
%x STRING
%option yylineno
%option noinput
%{
#define YYSTYPE char *
-#include "xlator.h"
+#include "glusterfs/xlator.h"
#include "y.tab.h"
#include <string.h>
-#define START_STRSIZE 32
static char *text;
-static int text_asize;
static int text_size;
void append_string(const char *str, int size)
{
- int new_size = text_size + size + 1;
- if (new_size > text_asize) {
- new_size += START_STRSIZE - 1;
- new_size &= -START_STRSIZE;
- if (!text) {
- text = GF_CALLOC (1, new_size,
- gf_common_mt_char);
- } else {
- text = GF_REALLOC (text, new_size);
- }
- if (!text) {
- gf_log ("parser", GF_LOG_ERROR,
- "out of memory");
- return;
- }
- text_asize = new_size;
- }
- memcpy(text + text_size, str, size);
- text_size += size;
- text[text_size] = 0;
+ int new_size = text_size + size + 1;
+ if (!text) {
+ text = GF_CALLOC (1, new_size, gf_common_mt_char);
+ } else {
+ text = GF_REALLOC (text, new_size);
+ }
+ if (!text) {
+ return;
+ }
+ memcpy(text + text_size, str, size);
+ text_size += size;
+ text[text_size] = 0;
}
%}
@@ -77,13 +57,15 @@ TYPE [t][y][p][e]
\\. { append_string (yytext + 1, yyleng - 1); }
\" {
if (0) {
- yyunput (0, NULL);
+ yyunput (0, NULL);
}
BEGIN (INITIAL);
- yylval = text;
+ graphyylval = text;
+ text = NULL;
+ text_size = 0;
return STRING_TOK;
- }
+ }
}
-[^ \t\r\n\"\\]+ { yylval = gf_strdup (yytext) ; return ID; }
+[^ \t\r\n\"\\]+ { graphyylval = gf_strdup (yytext) ; return ID; }
[ \t\r\n]+ ;
%%
diff --git a/libglusterfs/src/graph.y b/libglusterfs/src/graph.y
index 16ee2d43a80..e63febdc08b 100644
--- a/libglusterfs/src/graph.y
+++ b/libglusterfs/src/graph.y
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
%token VOLUME_BEGIN VOLUME_END OPTION NEWLINE SUBVOLUME ID WHITESPACE COMMENT TYPE STRING_TOK
%{
@@ -28,12 +18,15 @@
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/wait.h>
+#include <pthread.h>
#define RELAX_POISONING
-#include "xlator.h"
-#include "graph-utils.h"
-#include "logging.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/graph-utils.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/libglusterfs-messages.h"
static int new_volume (char *name);
static int volume_type (char *type);
@@ -47,8 +40,8 @@ static void option_error (void);
#define YYSTYPE char *
#define GF_CMD_BUFFER_LEN (8 * GF_UNIT_KB)
-int yyerror (const char *);
-int yylex ();
+int graphyyerror (const char *);
+int graphyylex ();
%}
@@ -88,11 +81,11 @@ glusterfs_graph_t *construct;
static void
type_error (void)
{
- extern int yylineno;
+ extern int graphyylineno;
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_VOLFILE_PARSE_ERROR,
"Volume %s, before line %d: Please specify volume type",
- curr->name, yylineno);
+ curr->name, graphyylineno);
return;
}
@@ -100,11 +93,11 @@ type_error (void)
static void
sub_error (void)
{
- extern int yylineno;
+ extern int graphyylineno;
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_VOLFILE_PARSE_ERROR,
"Volume %s, before line %d: Please specify subvolumes",
- curr->name, yylineno);
+ curr->name, graphyylineno);
return;
}
@@ -112,12 +105,12 @@ sub_error (void)
static void
option_error (void)
{
- extern int yylineno;
+ extern int graphyylineno;
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_VOLFILE_PARSE_ERROR,
"Volume %s, before line %d: Please specify "
"option <key> <value>",
- curr->name, yylineno);
+ curr->name, graphyylineno);
return;
}
@@ -125,21 +118,20 @@ option_error (void)
static int
new_volume (char *name)
{
- extern int yylineno;
+ extern int graphyylineno;
xlator_t *trav = NULL;
int ret = 0;
if (!name) {
- gf_log ("parser", GF_LOG_DEBUG,
- "Invalid argument name: '%s'", name);
+ gf_msg_debug ("parser", 0,"Invalid argument name");
ret = -1;
goto out;
}
if (curr) {
- gf_log ("parser", GF_LOG_ERROR,
- "new volume (%s) defintion in line %d unexpected",
- name, yylineno);
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
+ "new volume (%s) definition in line %d unexpected",
+ name, graphyylineno);
ret = -1;
goto out;
}
@@ -148,7 +140,6 @@ new_volume (char *name)
gf_common_mt_xlator_t);
if (!curr) {
- gf_log ("parser", GF_LOG_ERROR, "Out of memory");
ret = -1;
goto out;
}
@@ -157,9 +148,9 @@ new_volume (char *name)
while (trav) {
if (!strcmp (name, trav->name)) {
- gf_log ("parser", GF_LOG_ERROR,
- "Line %d: volume '%s' defined again",
- yylineno, name);
+ gf_msg ("parser", GF_LOG_ERROR, 0,
+ LG_MSG_VOLFILE_PARSE_ERROR, "Line %d: volume "
+ "'%s' defined again", graphyylineno, name);
ret = -1;
goto out;
}
@@ -173,7 +164,8 @@ new_volume (char *name)
goto out;
}
- curr->options = get_new_dict ();
+ INIT_LIST_HEAD(&curr->volume_options);
+ curr->options = dict_new ();
if (!curr->options) {
GF_FREE (curr->name);
@@ -191,8 +183,9 @@ new_volume (char *name)
construct->first = curr;
construct->xl_count++;
+ curr->xl_id = construct->xl_count;
- gf_log ("parser", GF_LOG_TRACE, "New node for '%s'", name);
+ gf_msg_trace ("parser", 0, "New node for '%s'", name);
out:
GF_FREE (name);
@@ -204,26 +197,26 @@ out:
static int
volume_type (char *type)
{
- extern int yylineno;
+ extern int graphyylineno;
int32_t ret = 0;
if (!type) {
- gf_log ("parser", GF_LOG_DEBUG, "Invalid argument type");
+ gf_msg_debug ("parser", 0, "Invalid argument type");
ret = -1;
goto out;
}
ret = xlator_set_type (curr, type);
if (ret) {
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
"Volume '%s', line %d: type '%s' is not valid or "
"not found on this machine",
- curr->name, yylineno, type);
+ curr->name, graphyylineno, type);
ret = -1;
goto out;
}
- gf_log ("parser", GF_LOG_TRACE, "Type:%s:%s", curr->name, type);
+ gf_msg_trace ("parser", 0, "Type:%s:%s", curr->name, type);
out:
GF_FREE (type);
@@ -235,30 +228,30 @@ out:
static int
volume_option (char *key, char *value)
{
- extern int yylineno;
+ extern int graphyylineno;
int ret = 0;
char *set_value = NULL;
if (!key || !value){
- gf_log ("parser", GF_LOG_ERROR, "Invalid argument");
+ gf_msg ("parser", GF_LOG_ERROR, 0,
+ LG_MSG_INVALID_VOLFILE_ENTRY, "Invalid argument");
ret = -1;
goto out;
}
set_value = gf_strdup (value);
- ret = dict_set_dynstr (curr->options, key, set_value);
+ ret = dict_set_option (curr->options, key, set_value);
if (ret == 1) {
- gf_log ("parser", GF_LOG_ERROR,
- "Volume '%s', line %d: duplicate entry "
- "('option %s') present",
- curr->name, yylineno, key);
+ gf_msg ("parser", GF_LOG_ERROR, 0,
+ LG_MSG_INVALID_VOLFILE_ENTRY, "Volume '%s', line %d: "
+ "duplicate entry ('option %s') present",
+ curr->name, graphyylineno, key);
ret = -1;
goto out;
}
- gf_log ("parser", GF_LOG_TRACE, "Option:%s:%s:%s",
- curr->name, key, value);
+ gf_msg_trace ("parser", 0, "Option:%s:%s:%s", curr->name, key, value);
out:
GF_FREE (key);
@@ -271,12 +264,13 @@ out:
static int
volume_sub (char *sub)
{
- extern int yylineno;
+ extern int graphyylineno;
xlator_t *trav = NULL;
int ret = 0;
if (!sub) {
- gf_log ("parser", GF_LOG_ERROR, "Invalid subvolumes argument");
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
+ "Invalid subvolumes argument");
ret = -1;
goto out;
}
@@ -290,30 +284,28 @@ volume_sub (char *sub)
}
if (!trav) {
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_SUB_VOLUME_ERROR,
"Volume '%s', line %d: subvolume '%s' is not defined "
- "prior to usage",
- curr->name, yylineno, sub);
+ "prior to usage",curr->name, graphyylineno, sub);
ret = -1;
goto out;
}
if (trav == curr) {
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
"Volume '%s', line %d: has '%s' itself as subvolume",
- curr->name, yylineno, sub);
+ curr->name, graphyylineno, sub);
ret = -1;
goto out;
}
ret = glusterfs_xlator_link (curr, trav);
if (ret) {
- gf_log ("parser", GF_LOG_ERROR, "Out of memory");
ret = -1;
goto out;
}
- gf_log ("parser", GF_LOG_TRACE, "child:%s->%s", curr->name, sub);
+ gf_msg_trace ("parser", 0, "child:%s->%s", curr->name, sub);
out:
GF_FREE (sub);
@@ -326,11 +318,11 @@ static int
volume_end (void)
{
if (!curr->fops) {
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_VOLUME_ERROR,
"\"type\" not specified for volume %s", curr->name);
return -1;
}
- gf_log ("parser", GF_LOG_TRACE, "end:%s", curr->name);
+ gf_msg_trace ("parser", 0, "end:%s", curr->name);
curr = NULL;
return 0;
@@ -338,55 +330,52 @@ volume_end (void)
int
-yywrap ()
+graphyywrap ()
{
return 1;
}
int
-yyerror (const char *str)
+graphyyerror (const char *str)
{
- extern char *yytext;
- extern int yylineno;
-
- if (curr && curr->name && yytext) {
- if (!strcmp (yytext, "volume")) {
- gf_log ("parser", GF_LOG_ERROR,
- "'end-volume' not defined for volume '%s'",
- curr->name);
- } else if (!strcmp (yytext, "type")) {
- gf_log ("parser", GF_LOG_ERROR,
+ extern char *graphyytext;
+ extern int graphyylineno;
+
+ if (curr && curr->name && graphyytext) {
+ if (!strcmp (graphyytext, "volume")) {
+ gf_msg ("parser", GF_LOG_ERROR, 0,
+ LG_MSG_VOLUME_ERROR, "'end-volume' not"
+ " defined for volume '%s'", curr->name);
+ } else if (!strcmp (graphyytext, "type")) {
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_VOLUME_ERROR,
"line %d: duplicate 'type' defined for "
- "volume '%s'",
- yylineno, curr->name);
- } else if (!strcmp (yytext, "subvolumes")) {
- gf_log ("parser", GF_LOG_ERROR,
- "line %d: duplicate 'subvolumes' defined for "
- "volume '%s'",
- yylineno, curr->name);
+ "volume '%s'", graphyylineno, curr->name);
+ } else if (!strcmp (graphyytext, "subvolumes")) {
+ gf_msg ("parser", GF_LOG_ERROR, 0,
+ LG_MSG_SUB_VOLUME_ERROR, "line %d: duplicate "
+ "'subvolumes' defined for volume '%s'",
+ graphyylineno, curr->name);
} else if (curr) {
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_SYNTAX_ERROR,
"syntax error: line %d (volume '%s'): \"%s\""
"\nallowed tokens are 'volume', 'type', "
"'subvolumes', 'option', 'end-volume'()",
- yylineno, curr->name,
- yytext);
+ graphyylineno, curr->name, graphyytext);
} else {
- gf_log ("parser", GF_LOG_ERROR,
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_SYNTAX_ERROR,
"syntax error: line %d (just after volume "
"'%s'): \"%s\"\n(%s)",
- yylineno, curr->name,
- yytext,
+ graphyylineno, curr->name, graphyytext,
"allowed tokens are 'volume', 'type', "
"'subvolumes', 'option', 'end-volume'");
}
} else {
- gf_log ("parser", GF_LOG_ERROR,
- "syntax error in line %d: \"%s\" \n"
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_SYNTAX_ERROR,
+ "syntax error in line %d: \"%s\"\n"
"(allowed tokens are 'volume', 'type', "
"'subvolumes', 'option', 'end-volume')\n",
- yylineno, yytext);
+ graphyylineno, graphyytext);
}
return -1;
@@ -404,7 +393,8 @@ execute_cmd (char *cmd, char **result, size_t size)
fpp = popen (cmd, "r");
if (!fpp) {
- gf_log ("parser", GF_LOG_ERROR, "%s: failed to popen", cmd);
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_FILE_OP_FAILED,
+ "%s: failed to popen", cmd);
return -1;
}
@@ -454,7 +444,6 @@ preprocess (FILE *srcfp, FILE *dstfp)
cmd = GF_CALLOC (cmd_buf_size, 1,
gf_common_mt_char);
if (cmd == NULL) {
- gf_log ("parser", GF_LOG_ERROR, "Out of memory");
return -1;
}
@@ -462,7 +451,6 @@ preprocess (FILE *srcfp, FILE *dstfp)
gf_common_mt_char);
if (result == NULL) {
GF_FREE (cmd);
- gf_log ("parser", GF_LOG_ERROR, "Out of memory");
return -1;
}
@@ -491,6 +479,7 @@ preprocess (FILE *srcfp, FILE *dstfp)
cmd_buf_size *= 2;
cmd = GF_REALLOC (cmd, cmd_buf_size);
if (cmd == NULL) {
+ GF_FREE (result);
return -1;
}
@@ -523,15 +512,16 @@ preprocess (FILE *srcfp, FILE *dstfp)
}
if (in_backtick) {
- gf_log ("parser", GF_LOG_ERROR,
- "Unterminated backtick in volume specfication file at line (%d), column (%d).",
- line, column);
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_VOLUME_ERROR,
+ "Unterminated backtick in volume specification file at "
+ "line (%d), column (%d).", line, column);
ret = -1;
}
out:
fseek (srcfp, 0L, SEEK_SET);
fseek (dstfp, 0L, SEEK_SET);
+
GF_FREE (cmd);
GF_FREE (result);
@@ -539,7 +529,7 @@ out:
}
-extern FILE *yyin;
+extern FILE *graphyyin;
glusterfs_graph_t *
glusterfs_graph_new ()
@@ -553,6 +543,9 @@ glusterfs_graph_new ()
INIT_LIST_HEAD (&graph->list);
+ pthread_mutex_init(&graph->mutex, NULL);
+ pthread_cond_init(&graph->child_down_cond, NULL);
+
gettimeofday (&graph->dob, NULL);
return graph;
@@ -563,52 +556,66 @@ glusterfs_graph_t *
glusterfs_graph_construct (FILE *fp)
{
int ret = 0;
+ int tmp_fd = -1;
glusterfs_graph_t *graph = NULL;
- FILE *tmp_file = NULL;
+ FILE *tmp_file = NULL;
+ char template[] = "/tmp/tmp.XXXXXX";
+ static pthread_mutex_t graph_mutex = PTHREAD_MUTEX_INITIALIZER;
graph = glusterfs_graph_new ();
if (!graph)
- return NULL;
+ goto err;
- tmp_file = tmpfile ();
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+ tmp_fd = mkstemp (template);
+ if (-1 == tmp_fd)
+ goto err;
- if (tmp_file == NULL) {
- gf_log ("parser", GF_LOG_ERROR,
- "cannot create temporary file");
+ ret = sys_unlink (template);
+ if (ret < 0) {
+ gf_msg ("parser", GF_LOG_WARNING, 0, LG_MSG_FILE_OP_FAILED,
+ "Unable to delete file: %s", template);
+ }
- glusterfs_graph_destroy (graph);
- return NULL;
- }
+ tmp_file = fdopen (tmp_fd, "w+b");
+ if (!tmp_file)
+ goto err;
- ret = preprocess (fp, tmp_file);
- if (ret < 0) {
- gf_log ("parser", GF_LOG_ERROR,
- "parsing of backticks failed");
+ ret = preprocess (fp, tmp_file);
+ if (ret < 0) {
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_BACKTICK_PARSE_FAILED,
+ "parsing of backticks failed");
+ goto err;
+ }
- glusterfs_graph_destroy (graph);
- fclose (tmp_file);
- return NULL;
+ pthread_mutex_lock (&graph_mutex);
+ {
+ graphyyin = tmp_file;
+ construct = graph;
+ ret = yyparse ();
+ construct = NULL;
}
-
- yyin = tmp_file;
-
- construct = graph;
-
- ret = yyparse ();
-
- construct = NULL;
-
- fclose (tmp_file);
+ pthread_mutex_unlock (&graph_mutex);
if (ret == 1) {
- gf_log ("parser", GF_LOG_DEBUG,
- "parsing of volfile failed, please review it "
- "once more");
-
- glusterfs_graph_destroy (graph);
- return NULL;
+ gf_msg_debug ("parser", 0, "parsing of volfile failed, please "
+ "review it once more");
+ goto err;
}
+ fclose (tmp_file);
return graph;
+err:
+ if (tmp_file) {
+ fclose (tmp_file);
+ } else {
+ gf_msg ("parser", GF_LOG_ERROR, 0, LG_MSG_FILE_OP_FAILED,
+ "cannot create temporary file");
+ if (-1 != tmp_fd)
+ sys_close (tmp_fd);
+ }
+
+ glusterfs_graph_destroy (graph);
+ return NULL;
}
diff --git a/libglusterfs/src/hashfn.c b/libglusterfs/src/hashfn.c
index f79165b221e..d2237e99f83 100644
--- a/libglusterfs/src/hashfn.c
+++ b/libglusterfs/src/hashfn.c
@@ -11,29 +11,11 @@
#include <stdint.h>
#include <stdlib.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "hashfn.h"
-
-#define get16bits(d) (*((const uint16_t *) (d)))
+#define get16bits(d) (*((const uint16_t *)(d)))
#define DM_DELTA 0x9E3779B9
-#define DM_FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */
-#define DM_PARTROUNDS 6 /* 6 gets complete mixing */
-
-
-uint32_t
-ReallySimpleHash (char *path, int len)
-{
- uint32_t hash = 0;
- for (;len > 0; len--)
- hash ^= (char)path[len];
-
- return hash;
-}
+#define DM_FULLROUNDS 10 /* 32 is overkill, 16 is strong crypto */
+#define DM_PARTROUNDS 6 /* 6 gets complete mixing */
/*
This is apparently the "fastest hash function for strings".
@@ -42,146 +24,145 @@ ReallySimpleHash (char *path, int len)
/* In any case make sure, you return 1 */
-uint32_t SuperFastHash (const char * data, int32_t len) {
- uint32_t hash = len, tmp;
- int32_t rem;
-
- if (len <= 1 || data == NULL) return 1;
-
- rem = len & 3;
- len >>= 2;
-
- /* Main loop */
- for (;len > 0; len--) {
- hash += get16bits (data);
- tmp = (get16bits (data+2) << 11) ^ hash;
- hash = (hash << 16) ^ tmp;
- data += 2*sizeof (uint16_t);
- hash += hash >> 11;
- }
-
- /* Handle end cases */
- switch (rem) {
- case 3: hash += get16bits (data);
- hash ^= hash << 16;
- hash ^= data[sizeof (uint16_t)] << 18;
- hash += hash >> 11;
- break;
- case 2: hash += get16bits (data);
- hash ^= hash << 11;
- hash += hash >> 17;
- break;
- case 1: hash += *data;
- hash ^= hash << 10;
- hash += hash >> 1;
- }
-
- /* Force "avalanching" of final 127 bits */
- hash ^= hash << 3;
- hash += hash >> 5;
- hash ^= hash << 4;
- hash += hash >> 17;
- hash ^= hash << 25;
- hash += hash >> 6;
-
- return hash;
+uint32_t
+SuperFastHash(const char *data, int32_t len)
+{
+ uint32_t hash = len, tmp;
+ int32_t rem;
+
+ if (len <= 1 || data == NULL)
+ return 1;
+
+ rem = len & 3;
+ len >>= 2;
+
+ /* Main loop */
+ for (; len > 0; len--) {
+ hash += get16bits(data);
+ tmp = (get16bits(data + 2) << 11) ^ hash;
+ hash = (hash << 16) ^ tmp;
+ data += 2 * sizeof(uint16_t);
+ hash += hash >> 11;
+ }
+
+ /* Handle end cases */
+ switch (rem) {
+ case 3:
+ hash += get16bits(data);
+ hash ^= hash << 16;
+ hash ^= data[sizeof(uint16_t)] << 18;
+ hash += hash >> 11;
+ break;
+ case 2:
+ hash += get16bits(data);
+ hash ^= hash << 11;
+ hash += hash >> 17;
+ break;
+ case 1:
+ hash += *data;
+ hash ^= hash << 10;
+ hash += hash >> 1;
+ }
+
+ /* Force "avalanching" of final 127 bits */
+ hash ^= hash << 3;
+ hash += hash >> 5;
+ hash ^= hash << 4;
+ hash += hash >> 17;
+ hash ^= hash << 25;
+ hash += hash >> 6;
+
+ return hash;
}
-
/* Davies-Meyer hashing function implementation
*/
static int
-dm_round (int rounds, uint32_t *array, uint32_t *h0, uint32_t *h1)
+dm_round(int rounds, uint32_t *array, uint32_t *h0, uint32_t *h1)
{
- uint32_t sum = 0;
- int n = 0;
- uint32_t b0 = 0;
- uint32_t b1 = 0;
-
- b0 = *h0;
- b1 = *h1;
-
- n = rounds;
-
- do {
- sum += DM_DELTA;
- b0 += ((b1 << 4) + array[0])
- ^ (b1 + sum)
- ^ ((b1 >> 5) + array[1]);
- b1 += ((b0 << 4) + array[2])
- ^ (b0 + sum)
- ^ ((b0 >> 5) + array[3]);
- } while (--n);
-
- *h0 += b0;
- *h1 += b1;
-
- return 0;
-}
+ uint32_t sum = 0;
+ int n = 0;
+ uint32_t b0 = 0;
+ uint32_t b1 = 0;
+
+ b0 = *h0;
+ b1 = *h1;
+ n = rounds;
+
+ do {
+ sum += DM_DELTA;
+ b0 += ((b1 << 4) + array[0]) ^ (b1 + sum) ^ ((b1 >> 5) + array[1]);
+ b1 += ((b0 << 4) + array[2]) ^ (b0 + sum) ^ ((b0 >> 5) + array[3]);
+ } while (--n);
+
+ *h0 += b0;
+ *h1 += b1;
+
+ return 0;
+}
uint32_t
-__pad (int len)
+__pad(int len)
{
- uint32_t pad = 0;
+ uint32_t pad = 0;
- pad = (uint32_t) len | ((uint32_t) len << 8);
- pad |= pad << 16;
+ pad = (uint32_t)len | ((uint32_t)len << 8);
+ pad |= pad << 16;
- return pad;
+ return pad;
}
uint32_t
-gf_dm_hashfn (const char *msg, int len)
+gf_dm_hashfn(const char *msg, int len)
{
- uint32_t h0 = 0x9464a485;
- uint32_t h1 = 0x542e1a94;
- uint32_t array[4];
- uint32_t pad = 0;
- int i = 0;
- int j = 0;
- int full_quads = 0;
- int full_words = 0;
- int full_bytes = 0;
- uint32_t *intmsg = NULL;
- int word = 0;
-
-
- intmsg = (uint32_t *) msg;
- pad = __pad (len);
-
- full_bytes = len;
- full_words = len / 4;
- full_quads = len / 16;
-
- for (i = 0; i < full_quads; i++) {
- for (j = 0; j < 4; j++) {
- word = *intmsg;
- array[j] = word;
- intmsg++;
- full_words--;
- full_bytes -= 4;
- }
- dm_round (DM_PARTROUNDS, &array[0], &h0, &h1);
- }
-
+ uint32_t h0 = 0x9464a485;
+ uint32_t h1 = 0x542e1a94;
+ uint32_t array[4];
+ uint32_t pad = 0;
+ int i = 0;
+ int j = 0;
+ int full_quads = 0;
+ int full_words = 0;
+ int full_bytes = 0;
+ uint32_t *intmsg = NULL;
+ int word = 0;
+
+ intmsg = (uint32_t *)msg;
+ pad = __pad(len);
+
+ full_bytes = len;
+ full_words = len / 4;
+ full_quads = len / 16;
+
+ for (i = 0; i < full_quads; i++) {
for (j = 0; j < 4; j++) {
- if (full_words) {
- word = *intmsg;
- array[j] = word;
- intmsg++;
- full_words--;
- full_bytes -= 4;
- } else {
- array[j] = pad;
- while (full_bytes) {
- array[j] <<= 8;
- array[j] |= msg[len - full_bytes];
- full_bytes--;
- }
- }
+ word = *intmsg;
+ array[j] = word;
+ intmsg++;
+ full_words--;
+ full_bytes -= 4;
+ }
+ dm_round(DM_PARTROUNDS, &array[0], &h0, &h1);
+ }
+
+ for (j = 0; j < 4; j++) {
+ if (full_words) {
+ word = *intmsg;
+ array[j] = word;
+ intmsg++;
+ full_words--;
+ full_bytes -= 4;
+ } else {
+ array[j] = pad;
+ while (full_bytes) {
+ array[j] <<= 8;
+ array[j] |= msg[len - full_bytes];
+ full_bytes--;
+ }
}
- dm_round (DM_FULLROUNDS, &array[0], &h0, &h1);
+ }
+ dm_round(DM_FULLROUNDS, &array[0], &h0, &h1);
- return h0 ^ h1;
+ return h0 ^ h1;
}
diff --git a/libglusterfs/src/iatt.h b/libglusterfs/src/iatt.h
deleted file mode 100644
index eb916ad5edc..00000000000
--- a/libglusterfs/src/iatt.h
+++ /dev/null
@@ -1,313 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-
-#ifndef _IATT_H
-#define _IATT_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h> /* for iatt <--> stat conversions */
-#include <unistd.h>
-
-#include "compat.h"
-#include "uuid.h"
-
-typedef enum {
- IA_INVAL = 0,
- IA_IFREG,
- IA_IFDIR,
- IA_IFLNK,
- IA_IFBLK,
- IA_IFCHR,
- IA_IFIFO,
- IA_IFSOCK
-} ia_type_t;
-
-
-typedef struct {
- uint8_t suid:1;
- uint8_t sgid:1;
- uint8_t sticky:1;
- struct {
- uint8_t read:1;
- uint8_t write:1;
- uint8_t exec:1;
- } owner, group, other;
-} ia_prot_t;
-
-
-struct iatt {
- uint64_t ia_ino; /* inode number */
- uuid_t ia_gfid;
- uint64_t ia_dev; /* backing device ID */
- ia_type_t ia_type; /* type of file */
- ia_prot_t ia_prot; /* protection */
- uint32_t ia_nlink; /* Link count */
- uint32_t ia_uid; /* user ID of owner */
- uint32_t ia_gid; /* group ID of owner */
- uint64_t ia_rdev; /* device ID (if special file) */
- uint64_t ia_size; /* file size in bytes */
- uint32_t ia_blksize; /* blocksize for filesystem I/O */
- uint64_t ia_blocks; /* number of 512B blocks allocated */
- uint32_t ia_atime; /* last access time */
- uint32_t ia_atime_nsec;
- uint32_t ia_mtime; /* last modification time */
- uint32_t ia_mtime_nsec;
- uint32_t ia_ctime; /* last status change time */
- uint32_t ia_ctime_nsec;
-};
-
-
-#define IA_ISREG(t) (t == IA_IFREG)
-#define IA_ISDIR(t) (t == IA_IFDIR)
-#define IA_ISLNK(t) (t == IA_IFLNK)
-#define IA_ISBLK(t) (t == IA_IFBLK)
-#define IA_ISCHR(t) (t == IA_IFCHR)
-#define IA_ISFIFO(t) (t == IA_IFIFO)
-#define IA_ISSOCK(t) (t == IA_IFSOCK)
-
-#define IA_PROT_RUSR(prot) ((prot).owner.read == 1)
-#define IA_PROT_WUSR(prot) ((prot).owner.write == 1)
-#define IA_PROT_XUSR(prot) ((prot).owner.exec == 1)
-
-#define IA_PROT_RGRP(prot) ((prot).group.read == 1)
-#define IA_PROT_WGRP(prot) ((prot).group.write == 1)
-#define IA_PROT_XGRP(prot) ((prot).group.exec == 1)
-
-#define IA_PROT_ROTH(prot) ((prot).other.read == 1)
-#define IA_PROT_WOTH(prot) ((prot).other.write == 1)
-#define IA_PROT_XOTH(prot) ((prot).other.exec == 1)
-
-#define IA_PROT_SUID(prot) ((prot).suid == 1)
-#define IA_PROT_SGID(prot) ((prot).sgid == 1)
-#define IA_PROT_STCKY(prot) ((prot).sticky == 1)
-
-#define IA_FILE_OR_DIR(t) (IA_ISREG(t) || IA_ISDIR(t))
-
-static inline uint32_t
-ia_major (uint64_t ia_dev)
-{
- return (uint32_t) (ia_dev >> 32);
-}
-
-
-static inline uint32_t
-ia_minor (uint64_t ia_dev)
-{
- return (uint32_t) (ia_dev & 0xffffffff);
-}
-
-
-static inline uint64_t
-ia_makedev (uint32_t ia_maj, uint32_t ia_min)
-{
- return ((((uint64_t) ia_maj) << 32) | ia_min);
-}
-
-
-static inline ia_prot_t
-ia_prot_from_st_mode (mode_t mode)
-{
- ia_prot_t ia_prot = {0, };
-
- if (mode & S_ISUID)
- ia_prot.suid = 1;
- if (mode & S_ISGID)
- ia_prot.sgid = 1;
- if (mode & S_ISVTX)
- ia_prot.sticky = 1;
-
- if (mode & S_IRUSR)
- ia_prot.owner.read = 1;
- if (mode & S_IWUSR)
- ia_prot.owner.write = 1;
- if (mode & S_IXUSR)
- ia_prot.owner.exec = 1;
-
- if (mode & S_IRGRP)
- ia_prot.group.read = 1;
- if (mode & S_IWGRP)
- ia_prot.group.write = 1;
- if (mode & S_IXGRP)
- ia_prot.group.exec = 1;
-
- if (mode & S_IROTH)
- ia_prot.other.read = 1;
- if (mode & S_IWOTH)
- ia_prot.other.write = 1;
- if (mode & S_IXOTH)
- ia_prot.other.exec = 1;
-
- return ia_prot;
-}
-
-
-static inline ia_type_t
-ia_type_from_st_mode (mode_t mode)
-{
- ia_type_t type = IA_INVAL;
-
- if (S_ISREG (mode))
- type = IA_IFREG;
- if (S_ISDIR (mode))
- type = IA_IFDIR;
- if (S_ISLNK (mode))
- type = IA_IFLNK;
- if (S_ISBLK (mode))
- type = IA_IFBLK;
- if (S_ISCHR (mode))
- type = IA_IFCHR;
- if (S_ISFIFO (mode))
- type = IA_IFIFO;
- if (S_ISSOCK (mode))
- type = IA_IFSOCK;
-
- return type;
-}
-
-
-static inline mode_t
-st_mode_from_ia (ia_prot_t prot, ia_type_t type)
-{
- mode_t st_mode = 0;
- uint32_t type_bit = 0;
- uint32_t prot_bit = 0;
-
- switch (type) {
- case IA_IFREG:
- type_bit = S_IFREG;
- break;
- case IA_IFDIR:
- type_bit = S_IFDIR;
- break;
- case IA_IFLNK:
- type_bit = S_IFLNK;
- break;
- case IA_IFBLK:
- type_bit = S_IFBLK;
- break;
- case IA_IFCHR:
- type_bit = S_IFCHR;
- break;
- case IA_IFIFO:
- type_bit = S_IFIFO;
- break;
- case IA_IFSOCK:
- type_bit = S_IFSOCK;
- break;
- case IA_INVAL:
- break;
- }
-
- if (prot.suid)
- prot_bit |= S_ISUID;
- if (prot.sgid)
- prot_bit |= S_ISGID;
- if (prot.sticky)
- prot_bit |= S_ISVTX;
-
- if (prot.owner.read)
- prot_bit |= S_IRUSR;
- if (prot.owner.write)
- prot_bit |= S_IWUSR;
- if (prot.owner.exec)
- prot_bit |= S_IXUSR;
-
- if (prot.group.read)
- prot_bit |= S_IRGRP;
- if (prot.group.write)
- prot_bit |= S_IWGRP;
- if (prot.group.exec)
- prot_bit |= S_IXGRP;
-
- if (prot.other.read)
- prot_bit |= S_IROTH;
- if (prot.other.write)
- prot_bit |= S_IWOTH;
- if (prot.other.exec)
- prot_bit |= S_IXOTH;
-
- st_mode = (type_bit | prot_bit);
-
- return st_mode;
-}
-
-
-static inline int
-iatt_from_stat (struct iatt *iatt, struct stat *stat)
-{
- iatt->ia_dev = stat->st_dev;
- iatt->ia_ino = stat->st_ino;
-
- iatt->ia_type = ia_type_from_st_mode (stat->st_mode);
- iatt->ia_prot = ia_prot_from_st_mode (stat->st_mode);
-
- iatt->ia_nlink = stat->st_nlink;
- iatt->ia_uid = stat->st_uid;
- iatt->ia_gid = stat->st_gid;
-
- iatt->ia_rdev = ia_makedev (major (stat->st_rdev),
- minor (stat->st_rdev));
-
- iatt->ia_size = stat->st_size;
- iatt->ia_blksize = stat->st_blksize;
- iatt->ia_blocks = stat->st_blocks;
-
- iatt->ia_atime = stat->st_atime;
- iatt->ia_atime_nsec = ST_ATIM_NSEC (stat);
-
- iatt->ia_mtime = stat->st_mtime;
- iatt->ia_mtime_nsec = ST_MTIM_NSEC (stat);
-
- iatt->ia_ctime = stat->st_ctime;
- iatt->ia_ctime_nsec = ST_CTIM_NSEC (stat);
-
- return 0;
-}
-
-
-static inline int
-iatt_to_stat (struct iatt *iatt, struct stat *stat)
-{
- stat->st_dev = iatt->ia_dev;
- stat->st_ino = iatt->ia_ino;
-
- stat->st_mode = st_mode_from_ia (iatt->ia_prot, iatt->ia_type);
-
- stat->st_nlink = iatt->ia_nlink;
- stat->st_uid = iatt->ia_uid;
- stat->st_gid = iatt->ia_gid;
-
- stat->st_rdev = makedev (ia_major (iatt->ia_rdev),
- ia_minor (iatt->ia_rdev));
-
- stat->st_size = iatt->ia_size;
- stat->st_blksize = iatt->ia_blksize;
- stat->st_blocks = iatt->ia_blocks;
-
- stat->st_atime = iatt->ia_atime;
- ST_ATIM_NSEC_SET (stat, iatt->ia_atime_nsec);
-
- stat->st_mtime = iatt->ia_mtime;
- ST_MTIM_NSEC_SET (stat, iatt->ia_mtime_nsec);
-
- stat->st_ctime = iatt->ia_ctime;
- ST_CTIM_NSEC_SET (stat, iatt->ia_ctime_nsec);
-
- return 0;
-}
-
-
-#endif /* _IATT_H */
diff --git a/libglusterfs/src/inode.c b/libglusterfs/src/inode.c
index e32eddb5d1c..dbadf77442d 100644
--- a/libglusterfs/src/inode.c
+++ b/libglusterfs/src/inode.c
@@ -8,1754 +8,2670 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "inode.h"
-#include "fd.h"
-#include "common-utils.h"
-#include "statedump.h"
+#include "glusterfs/inode.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/statedump.h"
#include <pthread.h>
#include <sys/types.h>
#include <stdint.h>
-#include "list.h"
-#include <time.h>
+#include "glusterfs/list.h"
#include <assert.h>
+#include "glusterfs/libglusterfs-messages.h"
/* TODO:
move latest accessed dentry to list_head of inode
*/
-#define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type) \
- { \
- int i = 1; \
- inode_t *inode = NULL; \
- list_for_each_entry (inode, head, list) { \
- gf_proc_dump_build_key(key_buf, key_prefix, \
- "%s.%d",list_type, i++); \
- gf_proc_dump_add_section(key_buf); \
- inode_dump(inode, key); \
- } \
- }
+// clang-format off
+/*
+
+Details as per Xavi:
+
+ I think we should have 3 lists: active, lru and invalidate.
+
+We'll need 3 things: refs, nlookups and invalidate_sent flag. Any change of
+refs, invalidate_sent flag and moving from one list to another must be done
+atomically.
+
+With this information, these are the states that cause a transition:
+
+ refs nlookups inv_sent op
+ 1 0 0 unref -> refs = 0, active--->destroy
+ 1 1 0 unref -> refs = 0, active--->lru
+ 1 1 0 forget -> nlookups = 0, active--->active
+ *0 1 0 forget -> nlookups = 0, lru--->destroy
+ *0 1 1 forget -> nlookups = 0, invalidate--->destroy
+ 0 1 0 ref -> refs = 1, lru--->active
+ 0 1 1 ref -> refs = 1, inv_sent = 0, invalidate--->active
+ 0 1 0 overflow -> refs = 1, inv_sent = 1, lru--->invalidate
+ 1 1 1 unref -> refs = 0, invalidate--->invalidate
+ 1 1 1 forget -> nlookups = 0, inv_sent = 0, invalidate--->active
+
+(*) technically these combinations cannot happen because a forget sent by the
+kernel first calls ref() and then unref(). However it's equivalent.
+
+overflow means that lru list has grown beyond the limit and the inode needs to
+be invalidated. All other combinations do not cause a change in state or are not
+possible.
+
+Based on this, the code could be similar to this:
+
+ ref(inode, inv)
+ {
+ if (refs == 0) {
+ if (inv_sent) {
+ invalidate_count--;
+ inv_sent = 0;
+ } else {
+ lru_count--;
+ }
+ if (inv) {
+ inv_sent = 1;
+ invalidate_count++;
+ list_move(inode, invalidate);
+ } else {
+ active_count++;
+ list_move(inode, active);
+ }
+ }
+ refs++;
+ }
+
+ unref(inode, clear)
+ {
+ if (clear && inv_sent) {
+ // there is a case of fuse itself sending forget, without
+ // invalidate, after entry delete, like unlink(), rmdir().
+ inv_sent = 0;
+ invalidate_count--;
+ active_count++;
+ list_move(inode, active);
+ }
+ refs--;
+ if ((refs == 0) && !inv_sent) {
+ active_count--;
+ if (nlookups == 0) {
+ destroy(inode);
+ } else {
+ lru_count++;
+ list_move(inode, lru);
+ }
+ }
+ }
+
+ forget(inode)
+ {
+ ref(inode, false);
+ nlookups--;
+ unref(inode, true);
+ }
+
+ overflow(inode)
+ {
+ ref(inode, true);
+ invalidator(inode);
+ unref(inode, false);
+ }
+
+*/
+// clang-format on
+
+#define INODE_DUMP_LIST(head, key_buf, key_prefix, list_type) \
+ { \
+ int i = 1; \
+ inode_t *inode = NULL; \
+ list_for_each_entry(inode, head, list) \
+ { \
+ gf_proc_dump_build_key(key_buf, key_prefix, "%s.%d", list_type, \
+ i++); \
+ gf_proc_dump_add_section("%s", key_buf); \
+ inode_dump(inode, key); \
+ } \
+ }
static inode_t *
-__inode_unref (inode_t *inode);
+__inode_unref(inode_t *inode, bool clear);
static int
-inode_table_prune (inode_table_t *table);
+inode_table_prune(inode_table_t *table);
void
-fd_dump (struct list_head *head, char *prefix);
+fd_dump(struct list_head *head, char *prefix);
static int
-hash_dentry (inode_t *parent, const char *name, int mod)
+hash_dentry(inode_t *parent, const char *name, int mod)
{
- int hash = 0;
- int ret = 0;
+ int hash = 0;
+ int ret = 0;
- hash = *name;
- if (hash) {
- for (name += 1; *name != '\0'; name++) {
- hash = (hash << 5) - hash + *name;
- }
+ hash = *name;
+ if (hash) {
+ for (name += 1; *name != '\0'; name++) {
+ hash = (hash << 5) - hash + *name;
}
- ret = (hash + (unsigned long)parent) % mod;
+ }
+ ret = (hash + (unsigned long)parent) % mod;
- return ret;
+ return ret;
}
-
static int
-hash_gfid (uuid_t uuid, int mod)
+hash_gfid(uuid_t uuid, int mod)
{
- int ret = 0;
-
- ret = uuid[15] + (uuid[14] << 8);
-
- return ret;
+ return ((uuid[15] + (uuid[14] << 8)) % mod);
}
-
static void
-__dentry_hash (dentry_t *dentry)
+__dentry_hash(dentry_t *dentry, const int hash)
{
- inode_table_t *table = NULL;
- int hash = 0;
+ inode_table_t *table = NULL;
- if (!dentry) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "dentry not found");
- return;
- }
-
- table = dentry->inode->table;
- hash = hash_dentry (dentry->parent, dentry->name,
- table->hashsize);
+ table = dentry->inode->table;
- list_del_init (&dentry->hash);
- list_add (&dentry->hash, &table->name_hash[hash]);
+ list_del_init(&dentry->hash);
+ list_add(&dentry->hash, &table->name_hash[hash]);
}
-
static int
-__is_dentry_hashed (dentry_t *dentry)
+__is_dentry_hashed(dentry_t *dentry)
{
- if (!dentry) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "dentry not found");
- return 0;
- }
-
- return !list_empty (&dentry->hash);
+ return !list_empty(&dentry->hash);
}
-
static void
-__dentry_unhash (dentry_t *dentry)
+__dentry_unhash(dentry_t *dentry)
{
- if (!dentry) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "dentry not found");
- return;
- }
-
- list_del_init (&dentry->hash);
+ list_del_init(&dentry->hash);
}
-
static void
-__dentry_unset (dentry_t *dentry)
+dentry_destroy(dentry_t *dentry)
{
- if (!dentry) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "dentry not found");
- return;
- }
-
- __dentry_unhash (dentry);
+ if (!dentry)
+ return;
- list_del_init (&dentry->inode_list);
+ GF_FREE(dentry->name);
+ dentry->name = NULL;
+ mem_put(dentry);
- if (dentry->name)
- GF_FREE (dentry->name);
+ return;
+}
- if (dentry->parent) {
- __inode_unref (dentry->parent);
- dentry->parent = NULL;
- }
+static dentry_t *
+__dentry_unset(dentry_t *dentry)
+{
+ if (!dentry)
+ return NULL;
- mem_put (dentry);
-}
+ __dentry_unhash(dentry);
+ list_del_init(&dentry->inode_list);
-static int
-__foreach_ancestor_dentry (dentry_t *dentry,
- int (per_dentry_fn) (dentry_t *dentry,
- void *data),
- void *data)
-{
- inode_t *parent = NULL;
- dentry_t *each = NULL;
- int ret = 0;
-
- if (!dentry) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "dentry not found");
- return 0;
- }
+ if (dentry->parent) {
+ __inode_unref(dentry->parent, false);
+ dentry->parent = NULL;
+ }
- ret = per_dentry_fn (dentry, data);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "per dentry fn returned %d", ret);
- goto out;
- }
+ return dentry;
+}
- parent = dentry->parent;
- if (!parent) {
- gf_log (THIS->name, GF_LOG_WARNING, "parent not found");
- goto out;
- }
+static int
+__foreach_ancestor_dentry(dentry_t *dentry,
+ int(per_dentry_fn)(dentry_t *dentry, void *data),
+ void *data)
+{
+ inode_t *parent = NULL;
+ dentry_t *each = NULL;
+ int ret = 0;
- list_for_each_entry (each, &parent->dentry_list, inode_list) {
- ret = __foreach_ancestor_dentry (each, per_dentry_fn, data);
- if (ret)
- goto out;
- }
+ if (!dentry) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
+ "dentry not found");
+ return 0;
+ }
+
+ ret = per_dentry_fn(dentry, data);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PER_DENTRY_FAILED,
+ "ret=%d", ret, NULL);
+ goto out;
+ }
+
+ parent = dentry->parent;
+ if (!parent) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_PARENT_DENTRY_NOT_FOUND,
+ NULL);
+ goto out;
+ }
+
+ list_for_each_entry(each, &parent->dentry_list, inode_list)
+ {
+ ret = __foreach_ancestor_dentry(each, per_dentry_fn, data);
+ if (ret)
+ goto out;
+ }
out:
- return ret;
+ return ret;
}
-
static int
-__check_cycle (dentry_t *a_dentry, void *data)
+__check_cycle(dentry_t *a_dentry, void *data)
{
- inode_t *link_inode = NULL;
+ inode_t *link_inode = NULL;
- link_inode = data;
+ link_inode = data;
- if (a_dentry->parent == link_inode)
- return 1;
+ if (a_dentry->parent == link_inode)
+ return 1;
- return 0;
+ return 0;
}
-
static int
-__is_dentry_cyclic (dentry_t *dentry)
+__is_dentry_cyclic(dentry_t *dentry)
{
- int ret = 0;
- inode_t *inode = NULL;
- char *name = "<nul>";
-
- ret = __foreach_ancestor_dentry (dentry, __check_cycle,
- dentry->inode);
- if (ret) {
- inode = dentry->inode;
+ int ret = 0;
- if (dentry->name)
- name = dentry->name;
+ ret = __foreach_ancestor_dentry(dentry, __check_cycle, dentry->inode);
+ if (ret) {
+ gf_smsg(dentry->inode->table->name, GF_LOG_CRITICAL, 0,
+ LG_MSG_DENTRY_CYCLIC_LOOP, "gfid=%s name=-%s",
+ uuid_utoa(dentry->inode->gfid), dentry->name, NULL);
+ }
- gf_log (dentry->inode->table->name, GF_LOG_CRITICAL,
- "detected cyclic loop formation during inode linkage."
- " inode (%s) linking under itself as %s",
- uuid_utoa (inode->gfid), name);
- }
-
- return ret;
+ return ret;
}
-
static void
-__inode_unhash (inode_t *inode)
+__inode_unhash(inode_t *inode)
{
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return;
- }
-
- list_del_init (&inode->hash);
+ list_del_init(&inode->hash);
}
-
static int
-__is_inode_hashed (inode_t *inode)
+__is_inode_hashed(inode_t *inode)
{
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return 0;
- }
-
- return !list_empty (&inode->hash);
+ return !list_empty(&inode->hash);
}
-
static void
-__inode_hash (inode_t *inode)
+__inode_hash(inode_t *inode, const int hash)
{
- inode_table_t *table = NULL;
- int hash = 0;
-
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return;
- }
+ inode_table_t *table = inode->table;
- table = inode->table;
- hash = hash_gfid (inode->gfid, 65536);
-
- list_del_init (&inode->hash);
- list_add (&inode->hash, &table->inode_hash[hash]);
+ list_del_init(&inode->hash);
+ list_add(&inode->hash, &table->inode_hash[hash]);
}
-
static dentry_t *
-__dentry_search_for_inode (inode_t *inode, uuid_t pargfid, const char *name)
+__dentry_search_for_inode(inode_t *inode, uuid_t pargfid, const char *name)
{
- dentry_t *dentry = NULL;
- dentry_t *tmp = NULL;
+ dentry_t *dentry = NULL;
+ dentry_t *tmp = NULL;
- if (!inode || !name) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode || name not found");
- return NULL;
- }
+ /* earlier, just the ino was sent, which could have been 0, now
+ we deal with gfid, and if sent gfid is null or 0, no need to
+ continue with the check */
+ if (!pargfid || gf_uuid_is_null(pargfid))
+ return NULL;
- /* earlier, just the ino was sent, which could have been 0, now
- we deal with gfid, and if sent gfid is null or 0, no need to
- continue with the check */
- if (!pargfid || uuid_is_null (pargfid))
- return NULL;
-
- list_for_each_entry (tmp, &inode->dentry_list, inode_list) {
- if ((uuid_compare (tmp->parent->gfid, pargfid) == 0) &&
- !strcmp (tmp->name, name)) {
- dentry = tmp;
- break;
- }
+ list_for_each_entry(tmp, &inode->dentry_list, inode_list)
+ {
+ if ((gf_uuid_compare(tmp->parent->gfid, pargfid) == 0) &&
+ !strcmp(tmp->name, name)) {
+ dentry = tmp;
+ break;
}
+ }
- return dentry;
+ return dentry;
}
-
static void
-__inode_destroy (inode_t *inode)
+__inode_ctx_free(inode_t *inode)
{
- int index = 0;
- xlator_t *xl = NULL;
- xlator_t *old_THIS = NULL;
-
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return;
- }
-
- if (!inode->_ctx) {
- gf_log (THIS->name, GF_LOG_WARNING, "_ctx not found");
- goto noctx;
- }
+ int index = 0;
+ xlator_t *xl = NULL;
+ xlator_t *old_THIS = NULL;
+
+ if (!inode->_ctx) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_CTX_NULL, NULL);
+ goto noctx;
+ }
+
+ for (index = 0; index < inode->table->ctxcount; index++) {
+ if (inode->_ctx[index].value1 || inode->_ctx[index].value2) {
+ xl = (xlator_t *)(long)inode->_ctx[index].xl_key;
+ if (xl && !xl->call_cleanup && xl->cbks->forget) {
+ old_THIS = THIS;
+ THIS = xl;
+ xl->cbks->forget(xl, inode);
+ THIS = old_THIS;
+ }
+ }
+ }
+
+ GF_FREE(inode->_ctx);
+ inode->_ctx = NULL;
- for (index = 0; index < inode->table->xl->graph->xl_count; index++) {
- if (inode->_ctx[index].xl_key) {
- xl = (xlator_t *)(long)inode->_ctx[index].xl_key;
- old_THIS = THIS;
- THIS = xl;
- if (xl->cbks->forget)
- xl->cbks->forget (xl, inode);
- THIS = old_THIS;
- }
- }
-
- GF_FREE (inode->_ctx);
noctx:
- LOCK_DESTROY (&inode->lock);
- // memset (inode, 0xb, sizeof (*inode));
- mem_put (inode);
+ return;
}
-
static void
-__inode_activate (inode_t *inode)
+__inode_destroy(inode_t *inode)
{
- if (!inode)
- return;
+ __inode_ctx_free(inode);
- list_move (&inode->list, &inode->table->active);
- inode->table->active_size++;
+ LOCK_DESTROY(&inode->lock);
+ // memset (inode, 0xb, sizeof (*inode));
+ mem_put(inode);
}
-
-static void
-__inode_passivate (inode_t *inode)
+void
+inode_ctx_merge(fd_t *fd, inode_t *inode, inode_t *linked_inode)
{
- dentry_t *dentry = NULL;
- dentry_t *t = NULL;
+ int index = 0;
+ xlator_t *xl = NULL;
+ xlator_t *old_THIS = NULL;
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return;
- }
+ if (!fd || !inode || !linked_inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid inode");
+ return;
+ }
- list_move_tail (&inode->list, &inode->table->lru);
- inode->table->lru_size++;
+ if (!inode->_ctx || !linked_inode->_ctx) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid inode context");
+ return;
+ }
+
+ for (; index < inode->table->ctxcount; index++) {
+ if (inode->_ctx[index].xl_key) {
+ xl = (xlator_t *)(long)inode->_ctx[index].xl_key;
- list_for_each_entry_safe (dentry, t, &inode->dentry_list, inode_list) {
- if (!__is_dentry_hashed (dentry))
- __dentry_unset (dentry);
+ old_THIS = THIS;
+ THIS = xl;
+ if (xl->cbks->ictxmerge)
+ xl->cbks->ictxmerge(xl, fd, inode, linked_inode);
+ THIS = old_THIS;
}
+ }
}
+static void
+__inode_activate(inode_t *inode)
+{
+ list_move(&inode->list, &inode->table->active);
+ inode->table->active_size++;
+}
static void
-__inode_retire (inode_t *inode)
+__inode_passivate(inode_t *inode)
{
- dentry_t *dentry = NULL;
- dentry_t *t = NULL;
+ dentry_t *dentry = NULL;
+ dentry_t *t = NULL;
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return;
- }
+ list_move_tail(&inode->list, &inode->table->lru);
+ inode->table->lru_size++;
- list_move_tail (&inode->list, &inode->table->purge);
- inode->table->purge_size++;
+ list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list)
+ {
+ if (!__is_dentry_hashed(dentry))
+ dentry_destroy(__dentry_unset(dentry));
+ }
+}
- __inode_unhash (inode);
+static void
+__inode_retire(inode_t *inode)
+{
+ dentry_t *dentry = NULL;
+ dentry_t *t = NULL;
- list_for_each_entry_safe (dentry, t, &inode->dentry_list, inode_list) {
- __dentry_unset (dentry);
- }
+ list_move_tail(&inode->list, &inode->table->purge);
+ inode->table->purge_size++;
+
+ __inode_unhash(inode);
+
+ list_for_each_entry_safe(dentry, t, &inode->dentry_list, inode_list)
+ {
+ dentry_destroy(__dentry_unset(dentry));
+ }
}
+static int
+__inode_get_xl_index(inode_t *inode, xlator_t *xlator)
+{
+ int set_idx = -1;
+
+ if ((inode->_ctx[xlator->xl_id].xl_key != NULL) &&
+ (inode->_ctx[xlator->xl_id].xl_key != xlator))
+ goto out;
+
+ set_idx = xlator->xl_id;
+ inode->_ctx[set_idx].xl_key = xlator;
+
+out:
+ return set_idx;
+}
static inode_t *
-__inode_unref (inode_t *inode)
+__inode_unref(inode_t *inode, bool clear)
{
- if (!inode)
- return NULL;
+ int index = 0;
+ xlator_t *this = NULL;
+ uint64_t nlookup = 0;
+
+ /*
+ * Root inode should always be in active list of inode table. So unrefs
+ * on root inode are no-ops.
+ */
+ if (__is_root_gfid(inode->gfid))
+ return inode;
+
+ /*
+ * No need to acquire inode table's lock
+ * as __inode_unref is called after acquiding
+ * the inode table's lock.
+ */
+ if (inode->table->cleanup_started && !inode->ref)
+ /*
+ * There is a good chance that, the inode
+ * on which unref came has already been
+ * zero refed and added to the purge list.
+ * This can happen when inode table is
+ * being destroyed (glfs_fini is something
+ * which destroys the inode table).
+ *
+ * Consider a directory 'a' which has a file
+ * 'b'. Now as part of inode table destruction
+ * zero refing of inodes does not happen from
+ * leaf to the root. It happens in the order
+ * inodes are present in the list. So, in this
+ * example, the dentry of 'b' would have its
+ * parent set to the inode of 'a'. So if
+ * 'a' gets zero refed first (as part of
+ * inode table cleanup) and then 'b' has to
+ * zero refed, then dentry_unset is called on
+ * the dentry of 'b' and it further goes on to
+ * call inode_unref on b's parent which is 'a'.
+ * In this situation, GF_ASSERT would be called
+ * below as the refcount of 'a' has been already set
+ * to zero.
+ *
+ * So return the inode if the inode table cleanup
+ * has already started and inode refcount is 0.
+ */
+ return inode;
- if (__is_root_gfid(inode->gfid))
- return inode;
+ this = THIS;
- GF_ASSERT (inode->ref);
+ if (clear && inode->in_invalidate_list) {
+ inode->in_invalidate_list = false;
+ inode->table->invalidate_size--;
+ __inode_activate(inode);
+ }
+ GF_ASSERT(inode->ref);
- --inode->ref;
+ --inode->ref;
- if (!inode->ref) {
- inode->table->active_size--;
+ index = __inode_get_xl_index(inode, this);
+ if (index >= 0) {
+ inode->_ctx[index].xl_key = this;
+ inode->_ctx[index].ref--;
+ }
- if (inode->nlookup)
- __inode_passivate (inode);
- else
- __inode_retire (inode);
- }
+ if (!inode->ref && !inode->in_invalidate_list) {
+ inode->table->active_size--;
- return inode;
-}
+ nlookup = GF_ATOMIC_GET(inode->nlookup);
+ if (nlookup)
+ __inode_passivate(inode);
+ else
+ __inode_retire(inode);
+ }
+ return inode;
+}
static inode_t *
-__inode_ref (inode_t *inode)
+__inode_ref(inode_t *inode, bool is_invalidate)
{
- if (!inode)
- return NULL;
+ int index = 0;
+ xlator_t *this = NULL;
+
+ if (!inode)
+ return NULL;
+
+ this = THIS;
+
+ /*
+ * Root inode should always be in active list of inode table. So unrefs
+ * on root inode are no-ops. If we do not allow unrefs but allow refs,
+ * it leads to refcount overflows and deleting and adding the inode
+ * to active-list, which is ugly. active_size (check __inode_activate)
+ * in inode table increases which is wrong. So just keep the ref
+ * count as 1 always
+ */
+ if (__is_root_gfid(inode->gfid) && inode->ref)
+ return inode;
- if (!inode->ref) {
- inode->table->lru_size--;
- __inode_activate (inode);
+ if (!inode->ref) {
+ if (inode->in_invalidate_list) {
+ inode->in_invalidate_list = false;
+ inode->table->invalidate_size--;
+ } else {
+ inode->table->lru_size--;
}
- inode->ref++;
+ if (is_invalidate) {
+ inode->in_invalidate_list = true;
+ inode->table->invalidate_size++;
+ list_move_tail(&inode->list, &inode->table->invalidate);
+ } else {
+ __inode_activate(inode);
+ }
+ }
- return inode;
-}
+ inode->ref++;
+
+ index = __inode_get_xl_index(inode, this);
+ if (index >= 0) {
+ inode->_ctx[index].xl_key = this;
+ inode->_ctx[index].ref++;
+ }
+ return inode;
+}
inode_t *
-inode_unref (inode_t *inode)
+inode_unref(inode_t *inode)
{
- inode_table_t *table = NULL;
+ inode_table_t *table = NULL;
- if (!inode)
- return NULL;
+ if (!inode)
+ return NULL;
- table = inode->table;
+ table = inode->table;
- pthread_mutex_lock (&table->lock);
- {
- inode = __inode_unref (inode);
- }
- pthread_mutex_unlock (&table->lock);
+ pthread_mutex_lock(&table->lock);
+ {
+ inode = __inode_unref(inode, false);
+ }
+ pthread_mutex_unlock(&table->lock);
- inode_table_prune (table);
+ inode_table_prune(table);
- return inode;
+ return inode;
}
-
inode_t *
-inode_ref (inode_t *inode)
+inode_ref(inode_t *inode)
{
- inode_table_t *table = NULL;
+ inode_table_t *table = NULL;
- if (!inode)
- return NULL;
+ if (!inode)
+ return NULL;
- table = inode->table;
+ table = inode->table;
- pthread_mutex_lock (&table->lock);
- {
- inode = __inode_ref (inode);
- }
- pthread_mutex_unlock (&table->lock);
+ pthread_mutex_lock(&table->lock);
+ {
+ inode = __inode_ref(inode, false);
+ }
+ pthread_mutex_unlock(&table->lock);
- return inode;
+ return inode;
}
-
static dentry_t *
-__dentry_create (inode_t *inode, inode_t *parent, const char *name)
+dentry_create(inode_t *inode, inode_t *parent, const char *name)
{
- dentry_t *newd = NULL;
+ dentry_t *newd = NULL;
- if (!inode || !parent || !name) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING,
- "inode || parent || name not found");
- return NULL;
- }
+ newd = mem_get0(parent->table->dentry_pool);
+ if (newd == NULL) {
+ goto out;
+ }
- newd = mem_get0 (parent->table->dentry_pool);
- if (newd == NULL) {
- goto out;
- }
+ INIT_LIST_HEAD(&newd->inode_list);
+ INIT_LIST_HEAD(&newd->hash);
- INIT_LIST_HEAD (&newd->inode_list);
- INIT_LIST_HEAD (&newd->hash);
-
- newd->name = gf_strdup (name);
- if (newd->name == NULL) {
- mem_put (newd);
- newd = NULL;
- goto out;
- }
-
- if (parent)
- newd->parent = __inode_ref (parent);
+ newd->name = gf_strdup(name);
+ if (newd->name == NULL) {
+ mem_put(newd);
+ newd = NULL;
+ goto out;
+ }
- list_add (&newd->inode_list, &inode->dentry_list);
- newd->inode = inode;
+ newd->inode = inode;
out:
- return newd;
+ return newd;
}
-
static inode_t *
-__inode_create (inode_table_t *table)
+inode_create(inode_table_t *table)
{
- inode_t *newi = NULL;
-
- if (!table) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "table not found");
- return NULL;
- }
+ inode_t *newi = NULL;
- newi = mem_get0 (table->inode_pool);
- if (!newi) {
- goto out;
- }
-
- newi->table = table;
+ newi = mem_get0(table->inode_pool);
+ if (!newi) {
+ goto out;
+ }
- LOCK_INIT (&newi->lock);
+ newi->table = table;
- INIT_LIST_HEAD (&newi->fd_list);
- INIT_LIST_HEAD (&newi->list);
- INIT_LIST_HEAD (&newi->hash);
- INIT_LIST_HEAD (&newi->dentry_list);
+ LOCK_INIT(&newi->lock);
- newi->_ctx = GF_CALLOC (1, (sizeof (struct _inode_ctx) *
- table->xl->graph->xl_count),
- gf_common_mt_inode_ctx);
-
- if (newi->_ctx == NULL) {
- LOCK_DESTROY (&newi->lock);
- mem_put (newi);
- newi = NULL;
- goto out;
- }
+ INIT_LIST_HEAD(&newi->fd_list);
+ INIT_LIST_HEAD(&newi->list);
+ INIT_LIST_HEAD(&newi->hash);
+ INIT_LIST_HEAD(&newi->dentry_list);
- list_add (&newi->list, &table->lru);
- table->lru_size++;
+ newi->_ctx = GF_CALLOC(1, (sizeof(struct _inode_ctx) * table->ctxcount),
+ gf_common_mt_inode_ctx);
+ if (newi->_ctx == NULL) {
+ LOCK_DESTROY(&newi->lock);
+ mem_put(newi);
+ newi = NULL;
+ goto out;
+ }
out:
-
- return newi;
+ return newi;
}
-
inode_t *
-inode_new (inode_table_t *table)
+inode_new(inode_table_t *table)
{
- inode_t *inode = NULL;
-
- if (!table) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return NULL;
- }
-
- pthread_mutex_lock (&table->lock);
+ inode_t *inode = NULL;
+
+ if (!table) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+ LG_MSG_INODE_TABLE_NOT_FOUND,
+ "inode not "
+ "found");
+ return NULL;
+ }
+
+ inode = inode_create(table);
+ if (inode) {
+ pthread_mutex_lock(&table->lock);
{
- inode = __inode_create (table);
- if (inode != NULL) {
- __inode_ref (inode);
- }
+ list_add(&inode->list, &table->lru);
+ table->lru_size++;
+ __inode_ref(inode, false);
}
- pthread_mutex_unlock (&table->lock);
+ pthread_mutex_unlock(&table->lock);
+ }
- return inode;
+ return inode;
}
-
+/* Reduce the ref count by value 'nref'
+ * Args:
+ * inode - address of the inode to operate on
+ * nref - number to subtracted from inode->ref
+ * if nref is 0, then the ref count is overwritten 0
+ *
+ * This function may cause the purging of the inode,
+ * hence to be used only in destructor functions and not otherwise.
+ */
static inode_t *
-__inode_lookup (inode_t *inode)
+__inode_ref_reduce_by_n(inode_t *inode, uint64_t nref)
{
- if (!inode)
- return NULL;
+ uint64_t nlookup = 0;
- inode->nlookup++;
+ GF_ASSERT(inode->ref >= nref);
- return inode;
-}
+ inode->ref -= nref;
+
+ if (!nref)
+ inode->ref = 0;
+
+ if (!inode->ref) {
+ inode->table->active_size--;
+ nlookup = GF_ATOMIC_GET(inode->nlookup);
+ if (nlookup)
+ __inode_passivate(inode);
+ else
+ __inode_retire(inode);
+ }
+
+ return inode;
+}
static inode_t *
-__inode_forget (inode_t *inode, uint64_t nlookup)
+inode_forget_atomic(inode_t *inode, uint64_t nlookup)
{
- if (!inode)
- return NULL;
-
- GF_ASSERT (inode->nlookup >= nlookup);
+ uint64_t inode_lookup = 0;
- inode->nlookup -= nlookup;
+ if (!inode)
+ return NULL;
- if (!nlookup)
- inode->nlookup = 0;
+ if (nlookup == 0) {
+ GF_ATOMIC_INIT(inode->nlookup, 0);
+ } else {
+ inode_lookup = GF_ATOMIC_FETCH_SUB(inode->nlookup, nlookup);
+ GF_ASSERT(inode_lookup >= nlookup);
+ }
- return inode;
+ return inode;
}
-
dentry_t *
-__dentry_grep (inode_table_t *table, inode_t *parent, const char *name)
+__dentry_grep(inode_table_t *table, inode_t *parent, const char *name,
+ const int hash)
{
- int hash = 0;
- dentry_t *dentry = NULL;
- dentry_t *tmp = NULL;
-
- if (!table || !name || !parent)
- return NULL;
-
- hash = hash_dentry (parent, name, table->hashsize);
+ dentry_t *dentry = NULL;
+ dentry_t *tmp = NULL;
- list_for_each_entry (tmp, &table->name_hash[hash], hash) {
- if (tmp->parent == parent && !strcmp (tmp->name, name)) {
- dentry = tmp;
- break;
- }
+ list_for_each_entry(tmp, &table->name_hash[hash], hash)
+ {
+ if (tmp->parent == parent && !strcmp(tmp->name, name)) {
+ dentry = tmp;
+ break;
}
+ }
- return dentry;
+ return dentry;
}
-
inode_t *
-inode_grep (inode_table_t *table, inode_t *parent, const char *name)
+inode_grep(inode_table_t *table, inode_t *parent, const char *name)
{
- inode_t *inode = NULL;
- dentry_t *dentry = NULL;
-
- if (!table || !parent || !name) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING,
- "table || parent || name not found");
- return NULL;
- }
-
- pthread_mutex_lock (&table->lock);
- {
- dentry = __dentry_grep (table, parent, name);
-
- if (dentry)
- inode = dentry->inode;
-
- if (inode)
- __inode_ref (inode);
- }
- pthread_mutex_unlock (&table->lock);
-
- return inode;
+ inode_t *inode = NULL;
+ dentry_t *dentry = NULL;
+
+ if (!table || !parent || !name) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "table || parent || name"
+ " not found");
+ return NULL;
+ }
+
+ int hash = hash_dentry(parent, name, table->hashsize);
+
+ pthread_mutex_lock(&table->lock);
+ {
+ dentry = __dentry_grep(table, parent, name, hash);
+ if (dentry) {
+ inode = dentry->inode;
+ if (inode)
+ __inode_ref(inode, false);
+ }
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ return inode;
}
-
inode_t *
-inode_resolve (inode_table_t *table, char *path)
+inode_resolve(inode_table_t *table, char *path)
{
- char *tmp = NULL, *bname = NULL, *str = NULL, *saveptr = NULL;
- inode_t *inode = NULL, *parent = NULL;
+ char *tmp = NULL, *bname = NULL, *str = NULL, *saveptr = NULL;
+ inode_t *inode = NULL, *parent = NULL;
- if ((path == NULL) || (table == NULL)) {
- goto out;
- }
+ if ((path == NULL) || (table == NULL)) {
+ goto out;
+ }
- parent = inode_ref (table->root);
- str = tmp = gf_strdup (path);
+ parent = inode_ref(table->root);
+ str = tmp = gf_strdup(path);
+ if (str == NULL) {
+ goto out;
+ }
- while (1) {
- bname = strtok_r (str, "/", &saveptr);
- if (bname == NULL) {
- break;
- }
-
- if (inode != NULL) {
- inode_unref (inode);
- }
+ while (1) {
+ bname = strtok_r(str, "/", &saveptr);
+ if (bname == NULL) {
+ break;
+ }
- inode = inode_grep (table, parent, bname);
- if (inode == NULL) {
- break;
- }
+ if (inode != NULL) {
+ inode_unref(inode);
+ }
- if (parent != NULL) {
- inode_unref (parent);
- }
+ inode = inode_grep(table, parent, bname);
+ if (inode == NULL) {
+ break;
+ }
- parent = inode_ref (inode);
- str = NULL;
+ if (parent != NULL) {
+ inode_unref(parent);
}
- inode_unref (parent);
- GF_FREE (tmp);
+ parent = inode_ref(inode);
+ str = NULL;
+ }
+
+ inode_unref(parent);
+ GF_FREE(tmp);
out:
- return inode;
+ return inode;
}
-
int
-inode_grep_for_gfid (inode_table_t *table, inode_t *parent, const char *name,
- uuid_t gfid, ia_type_t *type)
+inode_grep_for_gfid(inode_table_t *table, inode_t *parent, const char *name,
+ uuid_t gfid, ia_type_t *type)
{
- inode_t *inode = NULL;
- dentry_t *dentry = NULL;
- int ret = -1;
-
- if (!table || !parent || !name) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING,
- "table || parent || name not found");
- return ret;
- }
-
- pthread_mutex_lock (&table->lock);
- {
- dentry = __dentry_grep (table, parent, name);
+ inode_t *inode = NULL;
+ dentry_t *dentry = NULL;
+ int ret = -1;
+
+ if (!table || !parent || !name) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "table || parent || name"
+ " not found");
+ return ret;
+ }
- if (dentry)
- inode = dentry->inode;
+ int hash = hash_dentry(parent, name, table->hashsize);
- if (inode) {
- uuid_copy (gfid, inode->gfid);
- *type = inode->ia_type;
- ret = 0;
- }
+ pthread_mutex_lock(&table->lock);
+ {
+ dentry = __dentry_grep(table, parent, name, hash);
+ if (dentry) {
+ inode = dentry->inode;
+ if (inode) {
+ gf_uuid_copy(gfid, inode->gfid);
+ *type = inode->ia_type;
+ ret = 0;
+ }
}
- pthread_mutex_unlock (&table->lock);
+ }
+ pthread_mutex_unlock(&table->lock);
- return ret;
+ return ret;
}
-
/* return 1 if gfid is of root, 0 if not */
gf_boolean_t
-__is_root_gfid (uuid_t gfid)
+__is_root_gfid(uuid_t gfid)
{
- uuid_t root;
+ static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
- memset (root, 0, 16);
- root[15] = 1;
+ if (gf_uuid_compare(gfid, root) == 0)
+ return _gf_true;
- if (uuid_compare (gfid, root) == 0)
- return _gf_true;
-
- return _gf_false;
+ return _gf_false;
}
-
inode_t *
-__inode_find (inode_table_t *table, uuid_t gfid)
+__inode_find(inode_table_t *table, uuid_t gfid, const int hash)
{
- inode_t *inode = NULL;
- inode_t *tmp = NULL;
- int hash = 0;
+ inode_t *inode = NULL;
+ inode_t *tmp = NULL;
- if (!table) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "table not found");
- goto out;
- }
-
- if (__is_root_gfid (gfid))
- return table->root;
+ if (__is_root_gfid(gfid))
+ return table->root;
- hash = hash_gfid (gfid, 65536);
-
- list_for_each_entry (tmp, &table->inode_hash[hash], hash) {
- if (uuid_compare (tmp->gfid, gfid) == 0) {
- inode = tmp;
- break;
- }
+ list_for_each_entry(tmp, &table->inode_hash[hash], hash)
+ {
+ if (gf_uuid_compare(tmp->gfid, gfid) == 0) {
+ inode = tmp;
+ break;
}
+ }
-out:
- return inode;
+ return inode;
}
-
inode_t *
-inode_find (inode_table_t *table, uuid_t gfid)
+inode_find(inode_table_t *table, uuid_t gfid)
{
- inode_t *inode = NULL;
-
- if (!table) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "table not found");
- return NULL;
- }
-
- pthread_mutex_lock (&table->lock);
- {
- inode = __inode_find (table, gfid);
- if (inode)
- __inode_ref (inode);
- }
- pthread_mutex_unlock (&table->lock);
-
- return inode;
+ inode_t *inode = NULL;
+
+ if (!table) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+ LG_MSG_INODE_TABLE_NOT_FOUND,
+ "table not "
+ "found");
+ return NULL;
+ }
+
+ int hash = hash_gfid(gfid, 65536);
+
+ pthread_mutex_lock(&table->lock);
+ {
+ inode = __inode_find(table, gfid, hash);
+ if (inode)
+ __inode_ref(inode, false);
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ return inode;
}
-
static inode_t *
-__inode_link (inode_t *inode, inode_t *parent, const char *name,
- struct iatt *iatt)
+__inode_link(inode_t *inode, inode_t *parent, const char *name,
+ struct iatt *iatt, const int dhash)
{
- dentry_t *dentry = NULL;
- dentry_t *old_dentry = NULL;
- inode_t *old_inode = NULL;
- inode_table_t *table = NULL;
- inode_t *link_inode = NULL;
+ dentry_t *dentry = NULL;
+ dentry_t *old_dentry = NULL;
+ inode_t *old_inode = NULL;
+ inode_table_t *table = NULL;
+ inode_t *link_inode = NULL;
+ char link_uuid_str[64] = {0}, parent_uuid_str[64] = {0};
- if (!inode)
- return NULL;
+ table = inode->table;
- table = inode->table;
- if (!table)
- return NULL;
+ if (parent) {
+ /* We should prevent inode linking between different
+ inode tables. This can cause errors which is very
+ hard to catch/debug. */
+ if (inode->table != parent->table) {
+ errno = EINVAL;
+ GF_ASSERT(!"link attempted b/w inodes of diff table");
+ }
- if (parent) {
- /* We should prevent inode linking between different
- inode tables. This can cause errors which is very
- hard to catch/debug. */
- if (inode->table != parent->table) {
- GF_ASSERT (!"link attempted b/w inodes of diff table");
- }
+ if (parent->ia_type != IA_IFDIR) {
+ errno = EINVAL;
+ GF_ASSERT(!"link attempted on non-directory parent");
+ return NULL;
}
- link_inode = inode;
+ if (!name || strlen(name) == 0) {
+ errno = EINVAL;
+ GF_ASSERT (!"link attempted with no basename on "
+ "parent");
+ return NULL;
+ }
+ }
- if (!__is_inode_hashed (inode)) {
- if (!iatt)
- return NULL;
+ link_inode = inode;
- if (uuid_is_null (iatt->ia_gfid))
- return NULL;
+ if (!__is_inode_hashed(inode)) {
+ if (!iatt) {
+ errno = EINVAL;
+ return NULL;
+ }
- uuid_copy (inode->gfid, iatt->ia_gfid);
- inode->ia_type = iatt->ia_type;
+ if (gf_uuid_is_null(iatt->ia_gfid)) {
+ errno = EINVAL;
+ return NULL;
+ }
- old_inode = __inode_find (table, inode->gfid);
+ int ihash = hash_gfid(iatt->ia_gfid, 65536);
- if (old_inode) {
- link_inode = old_inode;
- } else {
- __inode_hash (inode);
- }
- }
+ old_inode = __inode_find(table, iatt->ia_gfid, ihash);
- if (name) {
- if (!strcmp(name, ".") || !strcmp(name, ".."))
- return link_inode;
- }
+ if (old_inode) {
+ link_inode = old_inode;
+ } else {
+ gf_uuid_copy(inode->gfid, iatt->ia_gfid);
+ inode->ia_type = iatt->ia_type;
+ __inode_hash(inode, ihash);
+ }
+ } else {
+ /* @old_inode serves another important purpose - it indicates
+ to the code further below whether a dentry cycle check is
+ required or not (a new inode linkage can never result in
+ creation of a loop.)
+
+ if the given @inode is already hashed, it actually means
+ it is an "old" inode and deserves to undergo the cyclic
+ check.
+ */
+ old_inode = inode;
+ }
+
+ if (name && (!strcmp(name, ".") || !strcmp(name, ".."))) {
+ return link_inode;
+ }
+
+ /* use only link_inode beyond this point */
+ if (parent) {
+ old_dentry = __dentry_grep(table, parent, name, dhash);
+
+ if (!old_dentry || old_dentry->inode != link_inode) {
+ dentry = dentry_create(link_inode, parent, name);
+ if (!dentry) {
+ gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0,
+ LG_MSG_DENTRY_CREATE_FAILED,
+ "dentry create failed on "
+ "inode %s with parent %s",
+ uuid_utoa_r(link_inode->gfid, link_uuid_str),
+ uuid_utoa_r(parent->gfid, parent_uuid_str));
+ errno = ENOMEM;
+ return NULL;
+ }
- /* use only link_inode beyond this point */
- if (parent) {
- old_dentry = __dentry_grep (table, parent, name);
-
- if (!old_dentry || old_dentry->inode != link_inode) {
- dentry = __dentry_create (link_inode, parent, name);
- if (!dentry) {
- gf_log_callingfn (THIS->name, GF_LOG_ERROR,
- "dentry create failed on "
- "inode %s with parent %s",
- uuid_utoa (link_inode->gfid),
- uuid_utoa (parent->gfid));
- return NULL;
- }
- if (old_inode && __is_dentry_cyclic (dentry)) {
- __dentry_unset (dentry);
- return NULL;
- }
- __dentry_hash (dentry);
-
- if (old_dentry)
- __dentry_unset (old_dentry);
- }
+ /* dentry linking needs to happen inside lock */
+ dentry->parent = __inode_ref(parent, false);
+ list_add(&dentry->inode_list, &link_inode->dentry_list);
+
+ if (old_inode && __is_dentry_cyclic(dentry)) {
+ errno = ELOOP;
+ dentry_destroy(__dentry_unset(dentry));
+ return NULL;
+ }
+ __dentry_hash(dentry, dhash);
+
+ if (old_dentry)
+ dentry_destroy(__dentry_unset(old_dentry));
}
+ }
- return link_inode;
+ return link_inode;
}
-
inode_t *
-inode_link (inode_t *inode, inode_t *parent, const char *name,
- struct iatt *iatt)
+inode_link(inode_t *inode, inode_t *parent, const char *name, struct iatt *iatt)
{
- inode_table_t *table = NULL;
- inode_t *linked_inode = NULL;
+ int hash = 0;
+ inode_table_t *table = NULL;
+ inode_t *linked_inode = NULL;
+
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return NULL;
+ }
+
+ table = inode->table;
+
+ if (parent && name) {
+ hash = hash_dentry(parent, name, table->hashsize);
+ }
+
+ if (name && strchr(name, '/')) {
+ GF_ASSERT(!"inode link attempted with '/' in name");
+ return NULL;
+ }
+
+ pthread_mutex_lock(&table->lock);
+ {
+ linked_inode = __inode_link(inode, parent, name, iatt, hash);
+ if (linked_inode)
+ __inode_ref(linked_inode, false);
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ inode_table_prune(table);
+
+ return linked_inode;
+}
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return NULL;
- }
+int
+inode_lookup(inode_t *inode)
+{
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return -1;
+ }
- table = inode->table;
+ GF_ATOMIC_INC(inode->nlookup);
- pthread_mutex_lock (&table->lock);
- {
- linked_inode = __inode_link (inode, parent, name, iatt);
+ return 0;
+}
- if (linked_inode)
- __inode_ref (linked_inode);
- }
- pthread_mutex_unlock (&table->lock);
+int
+inode_ref_reduce_by_n(inode_t *inode, uint64_t nref)
+{
+ inode_table_t *table = NULL;
- inode_table_prune (table);
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return -1;
+ }
- return linked_inode;
-}
+ table = inode->table;
+ pthread_mutex_lock(&table->lock);
+ {
+ __inode_ref_reduce_by_n(inode, nref);
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ inode_table_prune(table);
+
+ return 0;
+}
int
-inode_lookup (inode_t *inode)
+inode_forget(inode_t *inode, uint64_t nlookup)
{
- inode_table_t *table = NULL;
+ inode_table_t *table = NULL;
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return -1;
- }
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return -1;
+ }
- table = inode->table;
+ table = inode->table;
- pthread_mutex_lock (&table->lock);
- {
- __inode_lookup (inode);
- }
- pthread_mutex_unlock (&table->lock);
+ inode_forget_atomic(inode, nlookup);
- return 0;
-}
+ inode_table_prune(table);
+ return 0;
+}
int
-inode_forget (inode_t *inode, uint64_t nlookup)
+inode_forget_with_unref(inode_t *inode, uint64_t nlookup)
{
- inode_table_t *table = NULL;
+ inode_table_t *table = NULL;
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return -1;
- }
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return -1;
+ }
- table = inode->table;
+ table = inode->table;
- pthread_mutex_lock (&table->lock);
- {
- __inode_forget (inode, nlookup);
- }
- pthread_mutex_unlock (&table->lock);
+ pthread_mutex_lock(&table->lock);
+ {
+ inode_forget_atomic(inode, nlookup);
+ __inode_unref(inode, true);
+ }
+ pthread_mutex_unlock(&table->lock);
- inode_table_prune (table);
+ inode_table_prune(table);
- return 0;
+ return 0;
}
-
-static void
-__inode_unlink (inode_t *inode, inode_t *parent, const char *name)
+/*
+ * Invalidate an inode. This is invoked when a translator decides that an
+ * inode's cache is no longer valid. Any translator interested in taking action
+ * in this situation can define the invalidate callback.
+ */
+int
+inode_invalidate(inode_t *inode)
{
- dentry_t *dentry = NULL;
+ int ret = 0;
+ xlator_t *xl = NULL;
+ xlator_t *old_THIS = NULL;
+
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return -1;
+ }
+
+ /*
+ * The master xlator is not in the graph but it can define an invalidate
+ * handler.
+ */
+ xl = inode->table->xl->ctx->master;
+ if (xl && xl->cbks->invalidate) {
+ old_THIS = THIS;
+ THIS = xl;
+ ret = xl->cbks->invalidate(xl, inode);
+ THIS = old_THIS;
+ if (ret)
+ return ret;
+ }
- if (!inode || !parent || !name)
- return;
+ xl = inode->table->xl->graph->first;
+ while (xl) {
+ old_THIS = THIS;
+ THIS = xl;
+ if (xl->cbks->invalidate)
+ ret = xl->cbks->invalidate(xl, inode);
+ THIS = old_THIS;
- dentry = __dentry_search_for_inode (inode, parent->gfid, name);
+ if (ret)
+ break;
- /* dentry NULL for corrupted backend */
- if (dentry)
- __dentry_unset (dentry);
+ xl = xl->next;
+ }
+
+ return ret;
}
+static dentry_t *
+__inode_unlink(inode_t *inode, inode_t *parent, const char *name)
+{
+ dentry_t *dentry = NULL;
+ char pgfid[64] = {0};
+ char gfid[64] = {0};
+
+ dentry = __dentry_search_for_inode(inode, parent->gfid, name);
+
+ /* dentry NULL for corrupted backend */
+ if (dentry) {
+ dentry = __dentry_unset(dentry);
+ } else {
+ gf_smsg("inode", GF_LOG_WARNING, 0, LG_MSG_DENTRY_NOT_FOUND,
+ "parent-gfid=%s name=%s gfid%s",
+ uuid_utoa_r(parent->gfid, pgfid), name,
+ uuid_utoa_r(inode->gfid, gfid), NULL);
+ }
+
+ return dentry;
+}
void
-inode_unlink (inode_t *inode, inode_t *parent, const char *name)
+inode_unlink(inode_t *inode, inode_t *parent, const char *name)
{
- inode_table_t *table = NULL;
+ inode_table_t *table;
+ dentry_t *dentry;
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return;
- }
+ if (!inode || !parent || !name)
+ return;
- table = inode->table;
+ table = inode->table;
- pthread_mutex_lock (&table->lock);
- {
- __inode_unlink (inode, parent, name);
- }
- pthread_mutex_unlock (&table->lock);
+ pthread_mutex_lock(&table->lock);
+ {
+ dentry = __inode_unlink(inode, parent, name);
+ }
+ pthread_mutex_unlock(&table->lock);
- inode_table_prune (table);
-}
+ dentry_destroy(dentry);
+ inode_table_prune(table);
+}
int
-inode_rename (inode_table_t *table, inode_t *srcdir, const char *srcname,
- inode_t *dstdir, const char *dstname, inode_t *inode,
- struct iatt *iatt)
+inode_rename(inode_table_t *table, inode_t *srcdir, const char *srcname,
+ inode_t *dstdir, const char *dstname, inode_t *inode,
+ struct iatt *iatt)
{
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return -1;
- }
+ int hash = 0;
+ dentry_t *dentry = NULL;
- table = inode->table;
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return -1;
+ }
- pthread_mutex_lock (&table->lock);
- {
- __inode_link (inode, dstdir, dstname, iatt);
- __inode_unlink (inode, srcdir, srcname);
- }
- pthread_mutex_unlock (&table->lock);
+ table = inode->table;
- inode_table_prune (table);
+ if (dstname && strchr(dstname, '/')) {
+ GF_ASSERT(!"inode link attempted with '/' in name");
+ return -1;
+ }
- return 0;
-}
+ if (dstdir && dstname) {
+ hash = hash_dentry(dstdir, dstname, table->hashsize);
+ }
+
+ pthread_mutex_lock(&table->lock);
+ {
+ __inode_link(inode, dstdir, dstname, iatt, hash);
+ /* pick the old dentry */
+ dentry = __inode_unlink(inode, srcdir, srcname);
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ /* free the old dentry */
+ dentry_destroy(dentry);
+ inode_table_prune(table);
+
+ return 0;
+}
static dentry_t *
-__dentry_search_arbit (inode_t *inode)
+__dentry_search_arbit(inode_t *inode)
{
- dentry_t *dentry = NULL;
- dentry_t *trav = NULL;
+ dentry_t *dentry = NULL;
+ dentry_t *trav = NULL;
- if (!inode)
- return NULL;
+ if (!inode)
+ return NULL;
- list_for_each_entry (trav, &inode->dentry_list, inode_list) {
- if (__is_dentry_hashed (trav)) {
- dentry = trav;
- break;
- }
+ list_for_each_entry(trav, &inode->dentry_list, inode_list)
+ {
+ if (__is_dentry_hashed(trav)) {
+ dentry = trav;
+ break;
}
+ }
- if (!dentry) {
- list_for_each_entry (trav, &inode->dentry_list, inode_list) {
- dentry = trav;
- break;
- }
+ if (!dentry) {
+ list_for_each_entry(trav, &inode->dentry_list, inode_list)
+ {
+ dentry = trav;
+ break;
}
+ }
- return dentry;
+ return dentry;
}
-
inode_t *
-inode_parent (inode_t *inode, uuid_t pargfid, const char *name)
+inode_parent(inode_t *inode, uuid_t pargfid, const char *name)
{
- inode_t *parent = NULL;
- inode_table_t *table = NULL;
- dentry_t *dentry = NULL;
-
- if (!inode) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "inode not found");
- return NULL;
+ inode_t *parent = NULL;
+ inode_table_t *table = NULL;
+ dentry_t *dentry = NULL;
+
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return NULL;
+ }
+
+ table = inode->table;
+
+ pthread_mutex_lock(&table->lock);
+ {
+ if (pargfid && !gf_uuid_is_null(pargfid) && name) {
+ dentry = __dentry_search_for_inode(inode, pargfid, name);
+ } else {
+ dentry = __dentry_search_arbit(inode);
}
- table = inode->table;
-
- pthread_mutex_lock (&table->lock);
- {
- if (pargfid && !uuid_is_null (pargfid) && name) {
- dentry = __dentry_search_for_inode (inode, pargfid, name);
- } else {
- dentry = __dentry_search_arbit (inode);
- }
-
- if (dentry)
- parent = dentry->parent;
+ if (dentry)
+ parent = dentry->parent;
- if (parent)
- __inode_ref (parent);
- }
- pthread_mutex_unlock (&table->lock);
+ if (parent)
+ __inode_ref(parent, false);
+ }
+ pthread_mutex_unlock(&table->lock);
- return parent;
+ return parent;
}
+static int
+__inode_has_dentry(inode_t *inode)
+{
+ return !list_empty(&inode->dentry_list);
+}
int
-__inode_path (inode_t *inode, const char *name, char **bufp)
-{
- inode_table_t *table = NULL;
- inode_t *itrav = NULL;
- dentry_t *trav = NULL;
- size_t i = 0, size = 0;
- int64_t ret = 0;
- int len = 0;
- char *buf = NULL;
-
- if (!inode || uuid_is_null (inode->gfid)) {
- GF_ASSERT (0);
- gf_log_callingfn (THIS->name, GF_LOG_WARNING, "invalid inode");
- return -1;
- }
+inode_has_dentry(inode_t *inode)
+{
+ int dentry_present = 0;
- table = inode->table;
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return 0;
+ }
- itrav = inode;
- for (trav = __dentry_search_arbit (itrav); trav;
- trav = __dentry_search_arbit (itrav)) {
- itrav = trav->parent;
- i ++; /* "/" */
- i += strlen (trav->name);
- if (i > PATH_MAX) {
- gf_log (table->name, GF_LOG_CRITICAL,
- "possible infinite loop detected, "
- "forcing break. name=(%s)", name);
- ret = -ENOENT;
- goto out;
- }
- }
+ LOCK(&inode->lock);
+ {
+ dentry_present = __inode_has_dentry(inode);
+ }
+ UNLOCK(&inode->lock);
- if (!__is_root_gfid (itrav->gfid)) {
- /* "<gfid:00000000-0000-0000-0000-000000000000>"/path */
- i += GFID_STR_PFX_LEN;
- }
+ return dentry_present;
+}
+
+int
+__inode_path(inode_t *inode, const char *name, char **bufp)
+{
+ inode_table_t *table = NULL;
+ inode_t *itrav = NULL;
+ dentry_t *trav = NULL;
+ size_t i = 0, size = 0;
+ int64_t ret = 0;
+ int len = 0;
+ char *buf = NULL;
+
+ if (!inode || gf_uuid_is_null(inode->gfid)) {
+ GF_ASSERT(0);
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid inode");
+ return -EINVAL;
+ }
+
+ table = inode->table;
+
+ itrav = inode;
+ for (trav = __dentry_search_arbit(itrav); trav;
+ trav = __dentry_search_arbit(itrav)) {
+ itrav = trav->parent;
+ i++; /* "/" */
+ i += strlen(trav->name);
+ if (i > PATH_MAX) {
+ gf_smsg(table->name, GF_LOG_CRITICAL, 0, LG_MSG_DENTRY_CYCLIC_LOOP,
+ "name=%s", name, NULL);
+ ret = -ENOENT;
+ goto out;
+ }
+ }
+
+ if (!__is_root_gfid(itrav->gfid)) {
+ /* "<gfid:00000000-0000-0000-0000-000000000000>"/path */
+ i += GFID_STR_PFX_LEN;
+ }
+
+ if (name) {
+ i++;
+ i += strlen(name);
+ }
+
+ ret = i;
+ size = i + 1;
+ buf = GF_CALLOC(size, sizeof(char), gf_common_mt_char);
+ if (buf) {
+ buf[size - 1] = 0;
if (name) {
- i++;
- i += strlen (name);
+ len = strlen(name);
+ memcpy(buf + (i - len), name, len);
+ buf[i - len - 1] = '/';
+ i -= (len + 1);
}
- ret = i;
- size = i + 1;
- buf = GF_CALLOC (size, sizeof (char), gf_common_mt_char);
- if (buf) {
-
- buf[size - 1] = 0;
-
- if (name) {
- len = strlen (name);
- strncpy (buf + (i - len), name, len);
- buf[i-len-1] = '/';
- i -= (len + 1);
- }
-
- itrav = inode;
- for (trav = __dentry_search_arbit (itrav); trav;
- trav = __dentry_search_arbit (itrav)) {
- itrav = trav->parent;
- len = strlen (trav->name);
- strncpy (buf + (i - len), trav->name, len);
- buf[i-len-1] = '/';
- i -= (len + 1);
- }
-
- if (!__is_root_gfid (itrav->gfid)) {
- snprintf (&buf[i-GFID_STR_PFX_LEN], GFID_STR_PFX_LEN,
- INODE_PATH_FMT, uuid_utoa (itrav->gfid));
- buf[i-1] = '>';
- }
+ itrav = inode;
+ for (trav = __dentry_search_arbit(itrav); trav;
+ trav = __dentry_search_arbit(itrav)) {
+ itrav = trav->parent;
+ len = strlen(trav->name);
+ memcpy(buf + (i - len), trav->name, len);
+ buf[i - len - 1] = '/';
+ i -= (len + 1);
+ }
- *bufp = buf;
- } else {
- ret = -ENOMEM;
+ if (!__is_root_gfid(itrav->gfid)) {
+ snprintf(&buf[i - GFID_STR_PFX_LEN], GFID_STR_PFX_LEN,
+ INODE_PATH_FMT, uuid_utoa(itrav->gfid));
+ buf[i - 1] = '>';
}
+ *bufp = buf;
+ } else {
+ ret = -ENOMEM;
+ }
+
out:
- if (__is_root_gfid (inode->gfid) && !name) {
- ret = 1;
- if (buf) {
- GF_FREE (buf);
- }
- buf = GF_CALLOC (ret + 1, sizeof (char), gf_common_mt_char);
- if (buf) {
- strcpy (buf, "/");
- *bufp = buf;
- } else {
- ret = -ENOMEM;
- }
+ if (__is_root_gfid(inode->gfid) && !name) {
+ ret = 1;
+ GF_FREE(buf);
+ buf = GF_CALLOC(ret + 1, sizeof(char), gf_common_mt_char);
+ if (buf) {
+ strcpy(buf, "/");
+ *bufp = buf;
+ } else {
+ ret = -ENOMEM;
}
+ }
- if (ret < 0)
- *bufp = NULL;
- return ret;
+ if (ret < 0)
+ *bufp = NULL;
+ return ret;
}
-
int
-inode_path (inode_t *inode, const char *name, char **bufp)
+inode_path(inode_t *inode, const char *name, char **bufp)
{
- inode_table_t *table = NULL;
- int ret = -1;
+ inode_table_t *table = NULL;
+ int ret = -1;
- if (!inode)
- return -1;
+ if (!inode)
+ return -EINVAL;
- table = inode->table;
+ table = inode->table;
- pthread_mutex_lock (&table->lock);
- {
- ret = __inode_path (inode, name, bufp);
- }
- pthread_mutex_unlock (&table->lock);
+ pthread_mutex_lock(&table->lock);
+ {
+ ret = __inode_path(inode, name, bufp);
+ }
+ pthread_mutex_unlock(&table->lock);
- return ret;
+ return ret;
}
-
-static int
-inode_table_prune (inode_table_t *table)
+void
+__inode_table_set_lru_limit(inode_table_t *table, uint32_t lru_limit)
{
- int ret = 0;
- struct list_head purge = {0, };
- inode_t *del = NULL;
- inode_t *tmp = NULL;
- inode_t *entry = NULL;
-
- if (!table)
- return -1;
-
- INIT_LIST_HEAD (&purge);
+ table->lru_limit = lru_limit;
+ return;
+}
- pthread_mutex_lock (&table->lock);
- {
- while (table->lru_limit
- && table->lru_size > (table->lru_limit)) {
+void
+inode_table_set_lru_limit(inode_table_t *table, uint32_t lru_limit)
+{
+ pthread_mutex_lock(&table->lock);
+ {
+ __inode_table_set_lru_limit(table, lru_limit);
+ }
+ pthread_mutex_unlock(&table->lock);
- entry = list_entry (table->lru.next, inode_t, list);
+ inode_table_prune(table);
- table->lru_size--;
- __inode_retire (entry);
+ return;
+}
- ret++;
+static int
+inode_table_prune(inode_table_t *table)
+{
+ int ret = 0;
+ int ret1 = 0;
+ struct list_head purge = {
+ 0,
+ };
+ inode_t *del = NULL;
+ inode_t *tmp = NULL;
+ inode_t *entry = NULL;
+ uint64_t nlookup = 0;
+ int64_t lru_size = 0;
+
+ if (!table)
+ return -1;
+
+ INIT_LIST_HEAD(&purge);
+
+ pthread_mutex_lock(&table->lock);
+ {
+ if (!table->lru_limit)
+ goto purge_list;
+
+ lru_size = table->lru_size;
+ while (lru_size > (table->lru_limit)) {
+ if (list_empty(&table->lru)) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+ LG_MSG_INVALID_INODE_LIST,
+ "Empty inode lru list found"
+ " but with (%d) lru_size",
+ table->lru_size);
+ break;
+ }
+
+ lru_size--;
+ entry = list_entry(table->lru.next, inode_t, list);
+ /* The logic of invalidation is required only if invalidator_fn
+ is present */
+ if (table->invalidator_fn) {
+ /* check for valid inode with 'nlookup' */
+ nlookup = GF_ATOMIC_GET(entry->nlookup);
+ if (nlookup) {
+ if (entry->invalidate_sent) {
+ list_move_tail(&entry->list, &table->lru);
+ continue;
+ }
+ __inode_ref(entry, true);
+ tmp = entry;
+ break;
}
-
- list_splice_init (&table->purge, &purge);
- table->purge_size = 0;
- }
- pthread_mutex_unlock (&table->lock);
-
+ }
+
+ table->lru_size--;
+ __inode_retire(entry);
+ ret++;
+ }
+
+ purge_list:
+ list_splice_init(&table->purge, &purge);
+ table->purge_size = 0;
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ /* Pick 1 inode for invalidation */
+ if (tmp) {
+ xlator_t *old_THIS = THIS;
+ THIS = table->invalidator_xl;
+ ret1 = table->invalidator_fn(table->invalidator_xl, tmp);
+ THIS = old_THIS;
+ pthread_mutex_lock(&table->lock);
{
- list_for_each_entry_safe (del, tmp, &purge, list) {
- list_del_init (&del->list);
- __inode_forget (del, 0);
- __inode_destroy (del);
- }
- }
-
- return ret;
+ if (!ret1) {
+ tmp->invalidate_sent = true;
+ __inode_unref(tmp, false);
+ } else {
+ /* Move this back to the lru list*/
+ __inode_unref(tmp, true);
+ }
+ }
+ pthread_mutex_unlock(&table->lock);
+ }
+
+ /* Just so that if purge list is handled too, then clear it off */
+ list_for_each_entry_safe(del, tmp, &purge, list)
+ {
+ list_del_init(&del->list);
+ inode_forget_atomic(del, 0);
+ __inode_destroy(del);
+ }
+
+ return ret;
}
-
static void
-__inode_table_init_root (inode_table_t *table)
+__inode_table_init_root(inode_table_t *table)
{
- inode_t *root = NULL;
- struct iatt iatt = {0, };
+ inode_t *root = NULL;
+ struct iatt iatt = {
+ 0,
+ };
- if (!table)
- return;
+ if (!table)
+ return;
- root = __inode_create (table);
+ root = inode_create(table);
- iatt.ia_gfid[15] = 1;
- iatt.ia_ino = 1;
- iatt.ia_type = IA_IFDIR;
+ list_add(&root->list, &table->lru);
+ table->lru_size++;
- table->root = root;
- __inode_link (root, NULL, NULL, &iatt);
-}
+ iatt.ia_gfid[15] = 1;
+ iatt.ia_ino = 1;
+ iatt.ia_type = IA_IFDIR;
+ __inode_link(root, NULL, NULL, &iatt, 0);
+ table->root = root;
+}
inode_table_t *
-inode_table_new (size_t lru_limit, xlator_t *xl)
+inode_table_with_invalidator(uint32_t lru_limit, xlator_t *xl,
+ int32_t (*invalidator_fn)(xlator_t *, inode_t *),
+ xlator_t *invalidator_xl)
{
- inode_table_t *new = NULL;
- int ret = -1;
- int i = 0;
+ inode_table_t *new = NULL;
+ uint32_t mem_pool_size = lru_limit;
+ int ret = -1;
+ int i = 0;
- new = (void *)GF_CALLOC(1, sizeof (*new), gf_common_mt_inode_table_t);
- if (!new)
- return NULL;
+ new = (void *)GF_CALLOC(1, sizeof(*new), gf_common_mt_inode_table_t);
+ if (!new)
+ return NULL;
- new->xl = xl;
+ new->xl = xl;
+ new->ctxcount = xl->graph->xl_count + 1;
- new->lru_limit = lru_limit;
+ new->lru_limit = lru_limit;
+ new->invalidator_fn = invalidator_fn;
+ new->invalidator_xl = invalidator_xl;
- new->hashsize = 14057; /* TODO: Random Number?? */
+ new->hashsize = 14057; /* TODO: Random Number?? */
- /* In case FUSE is initing the inode table. */
- if (lru_limit == 0)
- lru_limit = DEFAULT_INODE_MEMPOOL_ENTRIES;
+ /* In case FUSE is initing the inode table. */
+ if (!mem_pool_size || (mem_pool_size > DEFAULT_INODE_MEMPOOL_ENTRIES))
+ mem_pool_size = DEFAULT_INODE_MEMPOOL_ENTRIES;
- new->inode_pool = mem_pool_new (inode_t, lru_limit);
+ new->inode_pool = mem_pool_new(inode_t, mem_pool_size);
+ if (!new->inode_pool)
+ goto out;
- if (!new->inode_pool)
- goto out;
+ new->dentry_pool = mem_pool_new(dentry_t, mem_pool_size);
+ if (!new->dentry_pool)
+ goto out;
- new->dentry_pool = mem_pool_new (dentry_t, lru_limit);
+ new->inode_hash = (void *)GF_CALLOC(65536, sizeof(struct list_head),
+ gf_common_mt_list_head);
+ if (!new->inode_hash)
+ goto out;
- if (!new->dentry_pool)
- goto out;
+ new->name_hash = (void *)GF_CALLOC(new->hashsize, sizeof(struct list_head),
+ gf_common_mt_list_head);
+ if (!new->name_hash)
+ goto out;
- new->inode_hash = (void *)GF_CALLOC (65536,
- sizeof (struct list_head),
- gf_common_mt_list_head);
- if (!new->inode_hash)
- goto out;
+ /* if number of fd open in one process is more than this,
+ we may hit perf issues */
+ new->fd_mem_pool = mem_pool_new(fd_t, 1024);
- new->name_hash = (void *)GF_CALLOC (new->hashsize,
- sizeof (struct list_head),
- gf_common_mt_list_head);
- if (!new->name_hash)
- goto out;
+ if (!new->fd_mem_pool)
+ goto out;
- /* if number of fd open in one process is more than this,
- we may hit perf issues */
- new->fd_mem_pool = mem_pool_new (fd_t, 1024);
+ for (i = 0; i < 65536; i++) {
+ INIT_LIST_HEAD(&new->inode_hash[i]);
+ }
- if (!new->fd_mem_pool)
- goto out;
+ for (i = 0; i < new->hashsize; i++) {
+ INIT_LIST_HEAD(&new->name_hash[i]);
+ }
- for (i = 0; i < 65536; i++) {
- INIT_LIST_HEAD (&new->inode_hash[i]);
- }
+ INIT_LIST_HEAD(&new->active);
+ INIT_LIST_HEAD(&new->lru);
+ INIT_LIST_HEAD(&new->purge);
+ INIT_LIST_HEAD(&new->invalidate);
+ ret = gf_asprintf(&new->name, "%s/inode", xl->name);
+ if (-1 == ret) {
+ /* TODO: This should be ok to continue, check with avati */
+ ;
+ }
- for (i = 0; i < new->hashsize; i++) {
- INIT_LIST_HEAD (&new->name_hash[i]);
- }
+ new->cleanup_started = _gf_false;
- INIT_LIST_HEAD (&new->active);
- INIT_LIST_HEAD (&new->lru);
- INIT_LIST_HEAD (&new->purge);
+ __inode_table_init_root(new);
- ret = gf_asprintf (&new->name, "%s/inode", xl->name);
- if (-1 == ret) {
- /* TODO: This should be ok to continue, check with avati */
- ;
- }
+ pthread_mutex_init(&new->lock, NULL);
- __inode_table_init_root (new);
+ ret = 0;
+out:
+ if (ret) {
+ if (new) {
+ GF_FREE(new->inode_hash);
+ GF_FREE(new->name_hash);
+ if (new->dentry_pool)
+ mem_pool_destroy(new->dentry_pool);
+ if (new->inode_pool)
+ mem_pool_destroy(new->inode_pool);
+ GF_FREE(new);
+ new = NULL;
+ }
+ }
+
+ return new;
+}
- pthread_mutex_init (&new->lock, NULL);
+inode_table_t *
+inode_table_new(uint32_t lru_limit, xlator_t *xl)
+{
+ /* Only fuse for now requires the inode table with invalidator */
+ return inode_table_with_invalidator(lru_limit, xl, NULL, NULL);
+}
- ret = 0;
-out:
- if (ret) {
- if (new) {
- if (new->inode_hash)
- GF_FREE (new->inode_hash);
- if (new->name_hash)
- GF_FREE (new->name_hash);
- if (new->dentry_pool)
- mem_pool_destroy (new->dentry_pool);
- if (new->inode_pool)
- mem_pool_destroy (new->inode_pool);
- GF_FREE (new);
- new = NULL;
- }
+int
+inode_table_ctx_free(inode_table_t *table)
+{
+ int ret = 0;
+ inode_t *del = NULL;
+ inode_t *tmp = NULL;
+ int purge_count = 0;
+ int lru_count = 0;
+ int active_count = 0;
+ xlator_t *this = NULL;
+ int itable_size = 0;
+
+ if (!table)
+ return -1;
+
+ this = THIS;
+
+ pthread_mutex_lock(&table->lock);
+ {
+ list_for_each_entry_safe(del, tmp, &table->purge, list)
+ {
+ if (del->_ctx) {
+ __inode_ctx_free(del);
+ purge_count++;
+ }
}
- return new;
+ list_for_each_entry_safe(del, tmp, &table->lru, list)
+ {
+ if (del->_ctx) {
+ __inode_ctx_free(del);
+ lru_count++;
+ }
+ }
+
+ /* should the contexts of active inodes be freed?
+ * Since before this function being called fds would have
+ * been migrated and would have held the ref on the new
+ * inode from the new inode table, the older inode would not
+ * be used.
+ */
+ list_for_each_entry_safe(del, tmp, &table->active, list)
+ {
+ if (del->_ctx) {
+ __inode_ctx_free(del);
+ active_count++;
+ }
+ }
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ ret = purge_count + lru_count + active_count;
+ itable_size = table->active_size + table->lru_size + table->purge_size;
+ gf_msg_callingfn(this->name, GF_LOG_INFO, 0, LG_MSG_INODE_CONTEXT_FREED,
+ "total %d (itable size: "
+ "%d) inode contexts have been freed (active: %d, ("
+ "active size: %d), lru: %d, (lru size: %d), purge: "
+ "%d, (purge size: %d))",
+ ret, itable_size, active_count, table->active_size,
+ lru_count, table->lru_size, purge_count,
+ table->purge_size);
+ return ret;
}
+void
+inode_table_destroy_all(glusterfs_ctx_t *ctx)
+{
+ glusterfs_graph_t *trav_graph = NULL, *tmp = NULL;
+ xlator_t *tree = NULL;
+ inode_table_t *inode_table = NULL;
+
+ if (ctx == NULL)
+ goto out;
+
+ /* TODO: Traverse ctx->graphs with in ctx->lock and also the other
+ * graph additions and traversals in ctx->lock.
+ */
+ list_for_each_entry_safe(trav_graph, tmp, &ctx->graphs, list)
+ {
+ tree = trav_graph->first;
+ inode_table = tree->itable;
+ tree->itable = NULL;
+ if (inode_table)
+ inode_table_destroy(inode_table);
+ }
+out:
+ return;
+}
-inode_t *
-inode_from_path (inode_table_t *itable, const char *path)
+void
+inode_table_destroy(inode_table_t *inode_table)
{
- inode_t *inode = NULL;
- inode_t *parent = NULL;
- inode_t *root = NULL;
- inode_t *curr = NULL;
- char *pathname = NULL;
- char *component = NULL, *next_component = NULL;
- char *strtokptr = NULL;
+ inode_t *trav = NULL;
- if (!itable || !path)
- return NULL;
+ if (inode_table == NULL)
+ return;
- /* top-down approach */
- pathname = gf_strdup (path);
- if (pathname == NULL) {
- goto out;
- }
+ /* Ideally at this point in time, there should be no inodes with
+ * refs remaining. But there are quite a few chances where the inodes
+ * leak. So we can take three approaches for cleaning up the inode table:
+ * 1. Assume there are no leaks and then send a forget on all the inodes
+ * in lru list.(If no leaks there should be no inodes in active list)
+ * 2. Knowing there could be leaks and not freeing those inodes will
+ * also not free its inode context and this could leak a lot of
+ * memory, force free the inodes by changing the ref to 0.
+ * The problem with this is that any reference to inode after this
+ * calling this function will lead to a crash.
+ * 3. Knowing there could be leakes, just free the inode contexts of
+ * all the inodes. and let the inodes be alive. This way the major
+ * memory consumed by the inode contexts are freed, but there can
+ * be errors when any inode contexts are accessed after destroying
+ * this table.
+ *
+ * Not sure which is the approach to be taken, going by approach 2.
+ */
+
+ /* Approach 3:
+ * ret = inode_table_ctx_free (inode_table);
+ */
+ pthread_mutex_lock(&inode_table->lock);
+ {
+ inode_table->cleanup_started = _gf_true;
+ /* Process lru list first as we need to unset their dentry
+ * entries (the ones which may not be unset during
+ * '__inode_passivate' as they were hashed) which in turn
+ * shall unref their parent
+ *
+ * These parent inodes when unref'ed may well again fall
+ * into lru list and if we are at the end of traversing
+ * the list, we may miss to delete/retire that entry. Hence
+ * traverse the lru list till it gets empty.
+ */
+ while (!list_empty(&inode_table->lru)) {
+ trav = list_first_entry(&inode_table->lru, inode_t, list);
+ inode_forget_atomic(trav, 0);
+ __inode_retire(trav);
+ inode_table->lru_size--;
+ }
+
+ /* Same logic for invalidate list */
+ while (!list_empty(&inode_table->invalidate)) {
+ trav = list_first_entry(&inode_table->invalidate, inode_t, list);
+ inode_forget_atomic(trav, 0);
+ __inode_retire(trav);
+ inode_table->invalidate_size--;
+ }
+
+ while (!list_empty(&inode_table->active)) {
+ trav = list_first_entry(&inode_table->active, inode_t, list);
+ /* forget and unref the inode to retire and add it to
+ * purge list. By this time there should not be any
+ * inodes present in the active list except for root
+ * inode. Its a ref_leak otherwise. */
+ if (trav && (trav != inode_table->root))
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0,
+ LG_MSG_REF_COUNT,
+ "Active inode(%p) with refcount"
+ "(%d) found during cleanup",
+ trav, trav->ref);
+ inode_forget_atomic(trav, 0);
+ __inode_ref_reduce_by_n(trav, 0);
+ }
+ }
+ pthread_mutex_unlock(&inode_table->lock);
+
+ inode_table_prune(inode_table);
+
+ GF_FREE(inode_table->inode_hash);
+ GF_FREE(inode_table->name_hash);
+ if (inode_table->dentry_pool)
+ mem_pool_destroy(inode_table->dentry_pool);
+ if (inode_table->inode_pool)
+ mem_pool_destroy(inode_table->inode_pool);
+ if (inode_table->fd_mem_pool)
+ mem_pool_destroy(inode_table->fd_mem_pool);
+
+ pthread_mutex_destroy(&inode_table->lock);
+
+ GF_FREE(inode_table->name);
+ GF_FREE(inode_table);
+
+ return;
+}
- root = itable->root;
- parent = inode_ref (root);
- component = strtok_r (pathname, "/", &strtokptr);
+inode_t *
+inode_from_path(inode_table_t *itable, const char *path)
+{
+ inode_t *inode = NULL;
+ inode_t *parent = NULL;
+ inode_t *root = NULL;
+ inode_t *curr = NULL;
+ char *pathname = NULL;
+ char *component = NULL, *next_component = NULL;
+ char *strtokptr = NULL;
- if (component == NULL)
- /* root inode */
- inode = inode_ref (parent);
+ if (!itable || !path)
+ return NULL;
- while (component) {
- curr = inode_grep (itable, parent, component);
+ /* top-down approach */
+ pathname = gf_strdup(path);
+ if (pathname == NULL) {
+ goto out;
+ }
- if (curr == NULL) {
- strtok_r (NULL, "/", &strtokptr);
- break;
- }
+ root = itable->root;
+ parent = inode_ref(root);
+ component = strtok_r(pathname, "/", &strtokptr);
- next_component = strtok_r (NULL, "/", &strtokptr);
+ if (component == NULL)
+ /* root inode */
+ inode = inode_ref(parent);
- if (next_component) {
- inode_unref (parent);
- parent = curr;
- curr = NULL;
- } else {
- inode = curr;
- }
+ while (component) {
+ curr = inode_grep(itable, parent, component);
- component = next_component;
+ if (curr == NULL) {
+ strtok_r(NULL, "/", &strtokptr);
+ break;
}
- if (parent)
- inode_unref (parent);
+ next_component = strtok_r(NULL, "/", &strtokptr);
+
+ if (next_component) {
+ inode_unref(parent);
+ parent = curr;
+ curr = NULL;
+ } else {
+ inode = curr;
+ }
+
+ component = next_component;
+ }
+
+ if (parent)
+ inode_unref(parent);
- if (pathname)
- GF_FREE (pathname);
+ GF_FREE(pathname);
out:
- return inode;
+ return inode;
}
+void
+inode_set_need_lookup(inode_t *inode, xlator_t *this)
+{
+ uint64_t need_lookup = LOOKUP_NEEDED;
-int
-__inode_ctx_set2 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p,
- uint64_t *value2_p)
-{
- int ret = 0;
- int index = 0;
- int set_idx = -1;
-
- if (!inode || !xlator)
- return -1;
-
- for (index = 0; index < xlator->graph->xl_count; index++) {
- if (!inode->_ctx[index].xl_key) {
- if (set_idx == -1)
- set_idx = index;
- /* dont break, to check if key already exists
- further on */
- }
- if (inode->_ctx[index].xl_key == xlator) {
- set_idx = index;
- break;
- }
- }
+ if (!inode || !this)
+ return;
- if (set_idx == -1) {
- ret = -1;
- goto out;;
- }
+ inode_ctx_set(inode, this, &need_lookup);
- inode->_ctx[set_idx].xl_key = xlator;
- if (value1_p)
- inode->_ctx[set_idx].value1 = *value1_p;
- if (value2_p)
- inode->_ctx[set_idx].value2 = *value2_p;
-out:
+ return;
+}
+
+/* Function behaviour:
+ * Function return true if inode_ctx is not present,
+ * or value stored in inode_ctx is LOOKUP_NEEDED.
+ * If inode_ctx value is LOOKUP_NOT_NEEDED, which means
+ * inode_ctx is present for xlator this, but no lookup
+ * needed.
+ */
+gf_boolean_t
+inode_needs_lookup(inode_t *inode, xlator_t *this)
+{
+ uint64_t need_lookup = 0;
+ gf_boolean_t ret = _gf_false;
+ int op_ret = -1;
+
+ if (!inode || !this)
return ret;
+
+ op_ret = inode_ctx_get(inode, this, &need_lookup);
+ if (op_ret == -1) {
+ ret = _gf_true;
+ } else if (need_lookup == LOOKUP_NEEDED) {
+ ret = _gf_true;
+ need_lookup = LOOKUP_NOT_NEEDED;
+ inode_ctx_set(inode, this, &need_lookup);
+ }
+
+ return ret;
+}
+
+int
+__inode_ctx_set2(inode_t *inode, xlator_t *xlator, uint64_t *value1_p,
+ uint64_t *value2_p)
+{
+ int ret = 0;
+ int set_idx = -1;
+
+ if (!inode || !xlator || !inode->_ctx)
+ return -1;
+
+ set_idx = __inode_get_xl_index(inode, xlator);
+ if (set_idx == -1) {
+ ret = -1;
+ goto out;
+ ;
+ }
+
+ inode->_ctx[set_idx].xl_key = xlator;
+ if (value1_p)
+ inode->_ctx[set_idx].value1 = *value1_p;
+ if (value2_p)
+ inode->_ctx[set_idx].value2 = *value2_p;
+out:
+ return ret;
+}
+
+int
+__inode_ctx_set0(inode_t *inode, xlator_t *xlator, uint64_t *value1_p)
+{
+ return __inode_ctx_set2(inode, xlator, value1_p, NULL);
}
+int
+__inode_ctx_set1(inode_t *inode, xlator_t *xlator, uint64_t *value2_p)
+{
+ return __inode_ctx_set2(inode, xlator, NULL, value2_p);
+}
int
-inode_ctx_set2 (inode_t *inode, xlator_t *xlator, uint64_t *value1_p,
- uint64_t *value2_p)
+inode_ctx_set2(inode_t *inode, xlator_t *xlator, uint64_t *value1_p,
+ uint64_t *value2_p)
{
- int ret = 0;
+ int ret = 0;
- if (!inode || !xlator)
- return -1;
+ if (!inode || !xlator)
+ return -1;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_set2 (inode, xlator, value1_p, value2_p);
- }
- UNLOCK (&inode->lock);
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_set2(inode, xlator, value1_p, value2_p);
+ }
+ UNLOCK(&inode->lock);
- return ret;
+ return ret;
}
+int
+inode_ctx_set1(inode_t *inode, xlator_t *xlator, uint64_t *value2_p)
+{
+ int ret = 0;
+
+ if (!inode || !xlator)
+ return -1;
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_set1(inode, xlator, value2_p);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
int
-__inode_ctx_get2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
- uint64_t *value2)
+inode_ctx_set0(inode_t *inode, xlator_t *xlator, uint64_t *value1_p)
{
- int index = 0;
- int ret = 0;
+ int ret = 0;
- if (!inode || !xlator)
- return -1;
+ if (!inode || !xlator)
+ return -1;
- for (index = 0; index < xlator->graph->xl_count; index++) {
- if (inode->_ctx[index].xl_key == xlator)
- break;
- }
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_set0(inode, xlator, value1_p);
+ }
+ UNLOCK(&inode->lock);
- if (index == xlator->graph->xl_count) {
- ret = -1;
- goto out;
- }
+ return ret;
+}
- if (value1)
- *value1 = inode->_ctx[index].value1;
+int
+__inode_ctx_get2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2)
+{
+ int index = 0;
+ int ret = -1;
+
+ if (!inode || !xlator || !inode->_ctx)
+ goto out;
- if (value2)
- *value2 = inode->_ctx[index].value2;
+ index = xlator->xl_id;
+ if (inode->_ctx[index].xl_key != xlator)
+ goto out;
+ if (inode->_ctx[index].value1) {
+ if (value1) {
+ *value1 = inode->_ctx[index].value1;
+ ret = 0;
+ }
+ }
+ if (inode->_ctx[index].value2) {
+ if (value2) {
+ *value2 = inode->_ctx[index].value2;
+ ret = 0;
+ }
+ }
out:
- return ret;
+ return ret;
}
+int
+__inode_ctx_get0(inode_t *inode, xlator_t *xlator, uint64_t *value1)
+{
+ uint64_t tmp_value = 0;
+ int ret = 0;
+
+ ret = __inode_ctx_get2(inode, xlator, &tmp_value, NULL);
+ if (!ret && value1)
+ *value1 = tmp_value;
+
+ return ret;
+}
int
-inode_ctx_get2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
- uint64_t *value2)
+__inode_ctx_get1(inode_t *inode, xlator_t *xlator, uint64_t *value2)
{
- int ret = 0;
+ uint64_t tmp_value = 0;
+ int ret = 0;
- if (!inode || !xlator)
- return -1;
+ ret = __inode_ctx_get2(inode, xlator, NULL, &tmp_value);
+ if (!ret && value2)
+ *value2 = tmp_value;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get2 (inode, xlator, value1, value2);
- }
- UNLOCK (&inode->lock);
+ return ret;
+}
- return ret;
+int
+inode_ctx_get2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2)
+{
+ int ret = 0;
+
+ if (!inode || !xlator)
+ return -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get2(inode, xlator, value1, value2);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
}
+int
+inode_ctx_get1(inode_t *inode, xlator_t *xlator, uint64_t *value2)
+{
+ int ret = 0;
+
+ if (!inode || !xlator)
+ return -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get1(inode, xlator, value2);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
int
-inode_ctx_del2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
- uint64_t *value2)
+inode_ctx_get0(inode_t *inode, xlator_t *xlator, uint64_t *value1)
{
- int index = 0;
- int ret = 0;
+ int ret = 0;
- if (!inode || !xlator)
- return -1;
+ if (!inode || !xlator)
+ return -1;
- LOCK (&inode->lock);
- {
- for (index = 0; index < xlator->graph->xl_count; index++) {
- if (inode->_ctx[index].xl_key == xlator)
- break;
- }
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get0(inode, xlator, value1);
+ }
+ UNLOCK(&inode->lock);
- if (index == xlator->graph->xl_count) {
- ret = -1;
- goto unlock;
- }
+ return ret;
+}
- if (value1)
- *value1 = inode->_ctx[index].value1;
- if (value2)
- *value2 = inode->_ctx[index].value2;
+int
+inode_ctx_del2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2)
+{
+ int index = 0;
+ int ret = 0;
- inode->_ctx[index].key = 0;
- inode->_ctx[index].value1 = 0;
- inode->_ctx[index].value2 = 0;
+ if (!inode || !xlator)
+ return -1;
+
+ LOCK(&inode->lock);
+ {
+ if (!inode->_ctx)
+ goto unlock;
+
+ index = xlator->xl_id;
+ if (inode->_ctx[index].xl_key != xlator) {
+ ret = -1;
+ goto unlock;
}
+
+ if (inode->_ctx[index].value1 && value1)
+ *value1 = inode->_ctx[index].value1;
+ if (inode->_ctx[index].value2 && value2)
+ *value2 = inode->_ctx[index].value2;
+
+ inode->_ctx[index].key = 0;
+ inode->_ctx[index].xl_key = NULL;
+ inode->_ctx[index].value1 = 0;
+ inode->_ctx[index].value2 = 0;
+ }
unlock:
- UNLOCK (&inode->lock);
+ UNLOCK(&inode->lock);
- return ret;
+ return ret;
}
-
-void
-inode_dump (inode_t *inode, char *prefix)
+/* function behavior:
+ - if value1 is set, value1 in ctx is reset to 0 with current value passed
+ back in value1 address.
+ - if value2 is set, value2 in ctx is reset to 0 with current value passed
+ back in value2 address.
+ - if both are set, both fields are reset.
+*/
+static int
+__inode_ctx_reset2(inode_t *inode, xlator_t *xlator, uint64_t *value1,
+ uint64_t *value2)
{
- int ret = -1;
- xlator_t *xl = NULL;
- int i = 0;
- fd_t *fd = NULL;
- struct _inode_ctx *inode_ctx = NULL;
- struct list_head fd_list;
+ int index = 0;
+ int ret = 0;
- if (!inode)
- return;
+ if (!inode || !xlator)
+ return -1;
- INIT_LIST_HEAD (&fd_list);
+ LOCK(&inode->lock);
+ {
+ index = xlator->xl_id;
+ if (inode->_ctx[index].xl_key != xlator) {
+ ret = -1;
+ goto unlock;
+ }
- ret = TRY_LOCK(&inode->lock);
- if (ret != 0) {
- return;
+ if (inode->_ctx[index].value1 && value1) {
+ *value1 = inode->_ctx[index].value1;
+ inode->_ctx[index].value1 = 0;
+ }
+ if (inode->_ctx[index].value2 && value2) {
+ *value2 = inode->_ctx[index].value2;
+ inode->_ctx[index].value2 = 0;
}
+ }
+unlock:
+ UNLOCK(&inode->lock);
- {
- gf_proc_dump_write("gfid", "%s", uuid_utoa (inode->gfid));
- gf_proc_dump_write("nlookup", "%ld", inode->nlookup);
- gf_proc_dump_write("ref", "%u", inode->ref);
- gf_proc_dump_write("ia_type", "%d", inode->ia_type);
- if (inode->_ctx) {
- inode_ctx = GF_CALLOC (inode->table->xl->graph->xl_count,
- sizeof (*inode_ctx),
- gf_common_mt_inode_ctx);
- if (inode_ctx == NULL) {
- goto unlock;
- }
-
- for (i = 0; i < inode->table->xl->graph->xl_count; i++) {
- inode_ctx[i] = inode->_ctx[i];
- }
- }
+ return ret;
+}
- if (dump_options.xl_options.dump_fdctx != _gf_true)
- goto unlock;
+int
+inode_ctx_reset2(inode_t *inode, xlator_t *xlator, uint64_t *value1_p,
+ uint64_t *value2_p)
+{
+ uint64_t tmp_value1 = 0;
+ uint64_t tmp_value2 = 0;
+ int ret = 0;
+ ret = __inode_ctx_reset2(inode, xlator, &tmp_value1, &tmp_value2);
+ if (!ret) {
+ if (value1_p)
+ *value1_p = tmp_value1;
+ if (value2_p)
+ *value2_p = tmp_value2;
+ }
+ return ret;
+}
- list_for_each_entry (fd, &inode->fd_list, inode_list) {
- fd_ctx_dump (fd, prefix);
- }
- }
-unlock:
- UNLOCK(&inode->lock);
-
- if (inode_ctx && (dump_options.xl_options.dump_inodectx == _gf_true)) {
- for (i = 0; i < inode->table->xl->graph->xl_count; i++) {
- if (inode_ctx[i].xl_key) {
- xl = (xlator_t *)(long)inode_ctx[i].xl_key;
- if (xl->dumpops && xl->dumpops->inodectx)
- xl->dumpops->inodectx (xl, inode);
- }
- }
- }
+int
+inode_ctx_reset1(inode_t *inode, xlator_t *xlator, uint64_t *value2_p)
+{
+ uint64_t tmp_value2 = 0;
+ int ret = 0;
- if (inode_ctx != NULL) {
- GF_FREE (inode_ctx);
- }
+ ret = __inode_ctx_reset2(inode, xlator, NULL, &tmp_value2);
- return;
+ if (!ret && value2_p)
+ *value2_p = tmp_value2;
+
+ return ret;
}
+int
+inode_ctx_reset0(inode_t *inode, xlator_t *xlator, uint64_t *value1_p)
+{
+ uint64_t tmp_value1 = 0;
+ int ret = 0;
-void
-inode_table_dump (inode_table_t *itable, char *prefix)
+ ret = __inode_ctx_reset2(inode, xlator, &tmp_value1, NULL);
+
+ if (!ret && value1_p)
+ *value1_p = tmp_value1;
+
+ return ret;
+}
+
+int
+inode_is_linked(inode_t *inode)
{
+ int ret = 0;
+ inode_table_t *table = NULL;
- char key[GF_DUMP_MAX_BUF_LEN];
- int ret = 0;
+ if (!inode) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INODE_NOT_FOUND,
+ "inode not found");
+ return 0;
+ }
- if (!itable)
- return;
+ table = inode->table;
- memset(key, 0, sizeof(key));
- ret = pthread_mutex_trylock(&itable->lock);
+ pthread_mutex_lock(&table->lock);
+ {
+ ret = __is_inode_hashed(inode);
+ }
+ pthread_mutex_unlock(&table->lock);
- if (ret != 0) {
- return;
+ return ret;
+}
+
+void
+inode_dump(inode_t *inode, char *prefix)
+{
+ int ret = -1;
+ xlator_t *xl = NULL;
+ int i = 0;
+ fd_t *fd = NULL;
+ struct _inode_ctx *inode_ctx = NULL;
+ struct list_head fd_list;
+ int ref = 0;
+ char key[GF_DUMP_MAX_BUF_LEN];
+ uint64_t nlookup = 0;
+
+ if (!inode)
+ return;
+
+ INIT_LIST_HEAD(&fd_list);
+
+ ret = TRY_LOCK(&inode->lock);
+ if (ret != 0) {
+ return;
+ }
+
+ {
+ nlookup = GF_ATOMIC_GET(inode->nlookup);
+ gf_proc_dump_write("gfid", "%s", uuid_utoa(inode->gfid));
+ gf_proc_dump_write("nlookup", "%" PRIu64, nlookup);
+ gf_proc_dump_write("fd-count", "%u", inode->fd_count);
+ gf_proc_dump_write("active-fd-count", "%u", inode->active_fd_count);
+ gf_proc_dump_write("ref", "%u", inode->ref);
+ gf_proc_dump_write("invalidate-sent", "%d", inode->invalidate_sent);
+ gf_proc_dump_write("ia_type", "%d", inode->ia_type);
+ if (inode->_ctx) {
+ inode_ctx = GF_CALLOC(inode->table->ctxcount, sizeof(*inode_ctx),
+ gf_common_mt_inode_ctx);
+ if (inode_ctx == NULL) {
+ goto unlock;
+ }
+
+ for (i = 0; i < inode->table->ctxcount; i++) {
+ inode_ctx[i] = inode->_ctx[i];
+ xl = inode_ctx[i].xl_key;
+ ref = inode_ctx[i].ref;
+ if (ref != 0 && xl) {
+ gf_proc_dump_build_key(key, "ref_by_xl:", "%s", xl->name);
+ gf_proc_dump_write(key, "%d", ref);
+ }
+ }
}
- gf_proc_dump_build_key(key, prefix, "hashsize");
- gf_proc_dump_write(key, "%d", itable->hashsize);
- gf_proc_dump_build_key(key, prefix, "name");
- gf_proc_dump_write(key, "%s", itable->name);
+ if (dump_options.xl_options.dump_fdctx != _gf_true)
+ goto unlock;
- gf_proc_dump_build_key(key, prefix, "lru_limit");
- gf_proc_dump_write(key, "%d", itable->lru_limit);
- gf_proc_dump_build_key(key, prefix, "active_size");
- gf_proc_dump_write(key, "%d", itable->active_size);
- gf_proc_dump_build_key(key, prefix, "lru_size");
- gf_proc_dump_write(key, "%d", itable->lru_size);
- gf_proc_dump_build_key(key, prefix, "purge_size");
- gf_proc_dump_write(key, "%d", itable->purge_size);
+ list_for_each_entry(fd, &inode->fd_list, inode_list)
+ {
+ fd_ctx_dump(fd, prefix);
+ }
+ }
+unlock:
+ UNLOCK(&inode->lock);
+
+ if (inode_ctx && (dump_options.xl_options.dump_inodectx == _gf_true)) {
+ for (i = 0; i < inode->table->ctxcount; i++) {
+ if (inode_ctx[i].xl_key) {
+ xl = (xlator_t *)(long)inode_ctx[i].xl_key;
+ if (xl->dumpops && xl->dumpops->inodectx)
+ xl->dumpops->inodectx(xl, inode);
+ }
+ }
+ }
- INODE_DUMP_LIST(&itable->active, key, prefix, "active");
- INODE_DUMP_LIST(&itable->lru, key, prefix, "lru");
- INODE_DUMP_LIST(&itable->purge, key, prefix, "purge");
+ GF_FREE(inode_ctx);
- pthread_mutex_unlock(&itable->lock);
+ return;
}
void
-inode_dump_to_dict (inode_t *inode, char *prefix, dict_t *dict)
+inode_table_dump(inode_table_t *itable, char *prefix)
{
- int ret = -1;
- char key[GF_DUMP_MAX_BUF_LEN] = {0,};
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int ret = 0;
- ret = TRY_LOCK (&inode->lock);
- if (ret)
- return;
+ if (!itable)
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.gfid", prefix);
- ret = dict_set_dynstr (dict, key, gf_strdup (uuid_utoa (inode->gfid)));
- if (ret)
- goto out;
+ ret = pthread_mutex_trylock(&itable->lock);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.nlookup", prefix);
- ret = dict_set_uint64 (dict, key, inode->nlookup);
- if (ret)
- goto out;
+ if (ret != 0) {
+ return;
+ }
+
+ gf_proc_dump_build_key(key, prefix, "hashsize");
+ gf_proc_dump_write(key, "%" GF_PRI_SIZET, itable->hashsize);
+ gf_proc_dump_build_key(key, prefix, "name");
+ gf_proc_dump_write(key, "%s", itable->name);
+
+ gf_proc_dump_build_key(key, prefix, "lru_limit");
+ gf_proc_dump_write(key, "%d", itable->lru_limit);
+ gf_proc_dump_build_key(key, prefix, "active_size");
+ gf_proc_dump_write(key, "%d", itable->active_size);
+ gf_proc_dump_build_key(key, prefix, "lru_size");
+ gf_proc_dump_write(key, "%d", itable->lru_size);
+ gf_proc_dump_build_key(key, prefix, "purge_size");
+ gf_proc_dump_write(key, "%d", itable->purge_size);
+ gf_proc_dump_build_key(key, prefix, "invalidate_size");
+ gf_proc_dump_write(key, "%d", itable->invalidate_size);
+
+ INODE_DUMP_LIST(&itable->active, key, prefix, "active");
+ INODE_DUMP_LIST(&itable->lru, key, prefix, "lru");
+ INODE_DUMP_LIST(&itable->purge, key, prefix, "purge");
+ INODE_DUMP_LIST(&itable->invalidate, key, prefix, "invalidate");
+
+ pthread_mutex_unlock(&itable->lock);
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.ref", prefix);
- ret = dict_set_uint32 (dict, key, inode->ref);
- if (ret)
- goto out;
+void
+inode_dump_to_dict(inode_t *inode, char *prefix, dict_t *dict)
+{
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ uint64_t nlookup = 0;
+
+ ret = TRY_LOCK(&inode->lock);
+ if (ret)
+ return;
+
+ snprintf(key, sizeof(key), "%s.gfid", prefix);
+ ret = dict_set_dynstr(dict, key, gf_strdup(uuid_utoa(inode->gfid)));
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.nlookup", prefix);
+ nlookup = GF_ATOMIC_GET(inode->nlookup);
+ ret = dict_set_uint64(dict, key, nlookup);
+ if (ret)
+ goto out;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.ia_type", prefix);
- ret = dict_set_int32 (dict, key, inode->ia_type);
+ snprintf(key, sizeof(key), "%s.ref", prefix);
+ ret = dict_set_uint32(dict, key, inode->ref);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.ia_type", prefix);
+ ret = dict_set_int32(dict, key, inode->ia_type);
+ if (ret)
+ goto out;
out:
- UNLOCK (&inode->lock);
- return;
+ UNLOCK(&inode->lock);
+ return;
}
void
-inode_table_dump_to_dict (inode_table_t *itable, char *prefix, dict_t *dict)
+inode_table_dump_to_dict(inode_table_t *itable, char *prefix, dict_t *dict)
{
- char key[GF_DUMP_MAX_BUF_LEN] = {0,};
- int ret = 0;
- inode_t *inode = NULL;
- int count = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ int ret = 0;
+#ifdef DEBUG
+ inode_t *inode = NULL;
+ int count = 0;
+#endif
+ ret = pthread_mutex_trylock(&itable->lock);
+ if (ret)
+ return;
- ret = pthread_mutex_trylock (&itable->lock);
- if (ret)
- return;
+ snprintf(key, sizeof(key), "%s.itable.lru_limit", prefix);
+ ret = dict_set_uint32(dict, key, itable->lru_limit);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.itable.active_size", prefix);
+ ret = dict_set_uint32(dict, key, itable->active_size);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.itable.lru_size", prefix);
+ ret = dict_set_uint32(dict, key, itable->lru_size);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "%s.itable.purge_size", prefix);
+ ret = dict_set_uint32(dict, key, itable->purge_size);
+ if (ret)
+ goto out;
+
+#ifdef DEBUG
+ /* Dumping inode details in dictionary and sending it to CLI is not
+ required as when a developer (or support team) asks for this command
+ output, they just want to get top level detail of inode table.
+ If one wants to debug, let them take statedump and debug, this
+ wouldn't be available in CLI during production setup.
+ */
+ list_for_each_entry(inode, &itable->active, list)
+ {
+ snprintf(key, sizeof(key), "%s.itable.active%d", prefix, count++);
+ inode_dump_to_dict(inode, key, dict);
+ }
+ count = 0;
+
+ list_for_each_entry(inode, &itable->lru, list)
+ {
+ snprintf(key, sizeof(key), "%s.itable.lru%d", prefix, count++);
+ inode_dump_to_dict(inode, key, dict);
+ }
+ count = 0;
+
+ list_for_each_entry(inode, &itable->purge, list)
+ {
+ snprintf(key, sizeof(key), "%s.itable.purge%d", prefix, count++);
+ inode_dump_to_dict(inode, key, dict);
+ }
+#endif
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.itable.active_size", prefix);
- ret = dict_set_uint32 (dict, key, itable->active_size);
- if (ret)
- goto out;
+out:
+ pthread_mutex_unlock(&itable->lock);
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.itable.lru_size", prefix);
- ret = dict_set_uint32 (dict, key, itable->lru_size);
- if (ret)
- goto out;
+ return;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.itable.purge_size", prefix);
- ret = dict_set_uint32 (dict, key, itable->purge_size);
- if (ret)
- goto out;
+size_t
+inode_ctx_size(inode_t *inode)
+{
+ int i = 0;
+ size_t size = 0;
+ xlator_t *xl = NULL, *old_THIS = NULL;
- list_for_each_entry (inode, &itable->active, list) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.itable.active%d", prefix,
- count++);
- inode_dump_to_dict (inode, key, dict);
- }
- count = 0;
+ if (!inode)
+ goto out;
- list_for_each_entry (inode, &itable->lru, list) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.itable.lru%d", prefix,
- count++);
- inode_dump_to_dict (inode, key, dict);
- }
- count = 0;
+ LOCK(&inode->lock);
+ {
+ for (i = 0; i < inode->table->ctxcount; i++) {
+ if (!inode->_ctx[i].xl_key)
+ continue;
+
+ xl = (xlator_t *)(long)inode->_ctx[i].xl_key;
+ old_THIS = THIS;
+ THIS = xl;
+
+ /* If inode ref is taken when THIS is global xlator,
+ * the ctx xl_key is set, but the value is NULL.
+ * For global xlator the cbks can be NULL, hence check
+ * for the same */
+ if (!xl->cbks) {
+ THIS = old_THIS;
+ continue;
+ }
+
+ if (xl->cbks->ictxsize)
+ size += xl->cbks->ictxsize(xl, inode);
- list_for_each_entry (inode, &itable->purge, list) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.itable.purge%d", prefix,
- count++);
- inode_dump_to_dict (inode, key, dict);
+ THIS = old_THIS;
}
+ }
+ UNLOCK(&inode->lock);
out:
- pthread_mutex_unlock (&itable->lock);
+ return size;
+}
+
+/* *
+ * This function finds name of the inode, if it has dentry. The dentry will be
+ * created only if inode_link happens with valid parent and name. And this
+ * function is only applicable for directories because multiple dentries are
+ * not possible(no hardlinks)
+ * */
+void
+inode_find_directory_name(inode_t *inode, const char **name)
+{
+ dentry_t *dentry = NULL;
+ GF_VALIDATE_OR_GOTO("inode", inode, out);
+ GF_VALIDATE_OR_GOTO("inode", name, out);
+
+ if (!IA_ISDIR(inode->ia_type))
return;
+
+ pthread_mutex_lock(&inode->table->lock);
+ {
+ dentry = __dentry_search_arbit(inode);
+ if (dentry) {
+ *name = dentry->name;
+ }
+ }
+ pthread_mutex_unlock(&inode->table->lock);
+out:
+ return;
}
diff --git a/libglusterfs/src/inode.h b/libglusterfs/src/inode.h
deleted file mode 100644
index 41003df71ca..00000000000
--- a/libglusterfs/src/inode.h
+++ /dev/null
@@ -1,202 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _INODE_H
-#define _INODE_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdint.h>
-#include <sys/types.h>
-
-#define DEFAULT_INODE_MEMPOOL_ENTRIES 32 * 1024
-#define INODE_PATH_FMT "<gfid:%s>"
-struct _inode_table;
-typedef struct _inode_table inode_table_t;
-
-struct _inode;
-typedef struct _inode inode_t;
-
-struct _dentry;
-typedef struct _dentry dentry_t;
-
-#include "list.h"
-#include "xlator.h"
-#include "iatt.h"
-#include "uuid.h"
-
-
-struct _inode_table {
- pthread_mutex_t lock;
- size_t hashsize; /* bucket size of inode hash and dentry hash */
- char *name; /* name of the inode table, just for gf_log() */
- inode_t *root; /* root directory inode, with number 1 */
- xlator_t *xl; /* xlator to be called to do purge */
- uint32_t lru_limit; /* maximum LRU cache size */
- struct list_head *inode_hash; /* buckets for inode hash table */
- struct list_head *name_hash; /* buckets for dentry hash table */
- struct list_head active; /* list of inodes currently active (in an fop) */
- uint32_t active_size; /* count of inodes in active list */
- struct list_head lru; /* list of inodes recently used.
- lru.next most recent */
- uint32_t lru_size; /* count of inodes in lru list */
- struct list_head purge; /* list of inodes to be purged soon */
- uint32_t purge_size; /* count of inodes in purge list */
-
- struct mem_pool *inode_pool; /* memory pool for inodes */
- struct mem_pool *dentry_pool; /* memory pool for dentrys */
- struct mem_pool *fd_mem_pool; /* memory pool for fd_t */
-};
-
-
-struct _dentry {
- struct list_head inode_list; /* list of dentries of inode */
- struct list_head hash; /* hash table pointers */
- inode_t *inode; /* inode of this directory entry */
- char *name; /* name of the directory entry */
- inode_t *parent; /* directory of the entry */
-};
-
-struct _inode_ctx {
- union {
- uint64_t key;
- xlator_t *xl_key;
- };
- union {
- uint64_t value1;
- void *ptr1;
- };
- union {
- uint64_t value2;
- void *ptr2;
- };
-};
-
-struct _inode {
- inode_table_t *table; /* the table this inode belongs to */
- uuid_t gfid;
- gf_lock_t lock;
- uint64_t nlookup;
- uint32_t ref; /* reference count on this inode */
- ia_type_t ia_type; /* what kind of file */
- struct list_head fd_list; /* list of open files on this inode */
- struct list_head dentry_list; /* list of directory entries for this inode */
- struct list_head hash; /* hash table pointers */
- struct list_head list; /* active/lru/purge */
-
- struct _inode_ctx *_ctx; /* replacement for dict_t *(inode->ctx) */
-};
-
-
-#define UUID0_STR "00000000-0000-0000-0000-000000000000"
-#define GFID_STR_PFX "<gfid:" UUID0_STR ">"
-#define GFID_STR_PFX_LEN (sizeof (GFID_STR_PFX) - 1)
-
-inode_table_t *
-inode_table_new (size_t lru_limit, xlator_t *xl);
-
-inode_t *
-inode_new (inode_table_t *table);
-
-inode_t *
-inode_link (inode_t *inode, inode_t *parent,
- const char *name, struct iatt *stbuf);
-
-void
-inode_unlink (inode_t *inode, inode_t *parent, const char *name);
-
-inode_t *
-inode_parent (inode_t *inode, uuid_t pargfid, const char *name);
-
-inode_t *
-inode_ref (inode_t *inode);
-
-inode_t *
-inode_unref (inode_t *inode);
-
-int
-inode_lookup (inode_t *inode);
-
-int
-inode_forget (inode_t *inode, uint64_t nlookup);
-
-int
-inode_rename (inode_table_t *table, inode_t *olddir, const char *oldname,
- inode_t *newdir, const char *newname,
- inode_t *inode, struct iatt *stbuf);
-
-inode_t *
-inode_grep (inode_table_t *table, inode_t *parent, const char *name);
-
-int
-inode_grep_for_gfid (inode_table_t *table, inode_t *parent, const char *name,
- uuid_t gfid, ia_type_t *type);
-
-inode_t *
-inode_find (inode_table_t *table, uuid_t gfid);
-
-int
-inode_path (inode_t *inode, const char *name, char **bufp);
-
-int
-__inode_path (inode_t *inode, const char *name, char **bufp);
-
-inode_t *
-inode_from_path (inode_table_t *table, const char *path);
-
-int
-inode_ctx_set2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
- uint64_t *value2);
-int
-__inode_ctx_set2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
- uint64_t *value2);
-
-int
-inode_ctx_get2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
- uint64_t *value2);
-int
-__inode_ctx_get2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
- uint64_t *value2);
-
-int
-inode_ctx_del2 (inode_t *inode, xlator_t *xlator, uint64_t *value1,
- uint64_t *value2);
-
-inode_t *
-inode_resolve (inode_table_t *table, char *path);
-
-#define __inode_ctx_set(i,x,v_p) __inode_ctx_set2(i,x,v_p,0)
-#define inode_ctx_set(i,x,v_p) inode_ctx_set2(i,x,v_p,0)
-
-static inline int
-__inode_ctx_put(inode_t *inode, xlator_t *this, uint64_t v)
-{
- return __inode_ctx_set2 (inode, this, &v, 0);
-}
-
-static inline int
-inode_ctx_put(inode_t *inode, xlator_t *this, uint64_t v)
-{
- return inode_ctx_set2(inode, this, &v, 0);
-}
-
-#define __inode_ctx_get(i,x,v) __inode_ctx_get2(i,x,v,0)
-#define inode_ctx_get(i,x,v) inode_ctx_get2(i,x,v,0)
-
-#define inode_ctx_del(i,x,v) inode_ctx_del2(i,x,v,0)
-
-
-gf_boolean_t
-__is_root_gfid (uuid_t gfid);
-
-#endif /* _INODE_H */
diff --git a/libglusterfs/src/iobuf.c b/libglusterfs/src/iobuf.c
index f68c6c74879..4e7d2958764 100644
--- a/libglusterfs/src/iobuf.c
+++ b/libglusterfs/src/iobuf.c
@@ -8,1084 +8,1139 @@
cases as published by the Free Software Foundation.
*/
-
-#include "iobuf.h"
-#include "statedump.h"
+#include "glusterfs/iobuf.h"
+#include "glusterfs/statedump.h"
#include <stdio.h>
-
+#include "glusterfs/libglusterfs-messages.h"
/*
TODO: implement destroy margins and prefetching of arenas
*/
-#define IOBUF_ARENA_MAX_INDEX (sizeof (gf_iobuf_init_config) / \
- (sizeof (struct iobuf_init_config)))
+#define IOBUF_ARENA_MAX_INDEX \
+ (sizeof(gf_iobuf_init_config) / (sizeof(struct iobuf_init_config)))
/* Make sure this array is sorted based on pagesize */
-struct iobuf_init_config gf_iobuf_init_config[] = {
- /* { pagesize, num_pages }, */
- {128, 1024},
- {512, 512},
- {2 * 1024, 512},
- {8 * 1024, 128},
- {32 * 1024, 64},
- {128 * 1024, 32},
- {256 * 1024, 8},
- {1 * 1024 * 1024, 2},
+static const struct iobuf_init_config gf_iobuf_init_config[] = {
+ /* { pagesize, num_pages }, */
+ {128, 1024}, {512, 512}, {2 * 1024, 512}, {8 * 1024, 128},
+ {32 * 1024, 64}, {128 * 1024, 32}, {256 * 1024, 8}, {1 * 1024 * 1024, 2},
};
-int
-gf_iobuf_get_arena_index (size_t page_size)
+static int
+gf_iobuf_get_arena_index(const size_t page_size)
{
- int i = -1;
-
- for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
- if (page_size <= gf_iobuf_init_config[i].pagesize)
- break;
- }
+ int i;
- if (i >= IOBUF_ARENA_MAX_INDEX)
- i = -1;
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ if (page_size <= gf_iobuf_init_config[i].pagesize)
+ return i;
+ }
- return i;
+ return -1;
}
-size_t
-gf_iobuf_get_pagesize (size_t page_size)
+static size_t
+gf_iobuf_get_pagesize(const size_t page_size, int *index)
{
- int i = 0;
- size_t size = 0;
-
- for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
- size = gf_iobuf_init_config[i].pagesize;
- if (page_size <= size)
- break;
+ int i;
+ size_t size = 0;
+
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ size = gf_iobuf_init_config[i].pagesize;
+ if (page_size <= size) {
+ if (index != NULL)
+ *index = i;
+ return size;
}
+ }
- if (i >= IOBUF_ARENA_MAX_INDEX)
- size = -1;
-
- return size;
+ return -1;
}
-void
-__iobuf_arena_init_iobufs (struct iobuf_arena *iobuf_arena)
+static void
+__iobuf_arena_init_iobufs(struct iobuf_arena *iobuf_arena)
{
- int iobuf_cnt = 0;
- struct iobuf *iobuf = NULL;
- int offset = 0;
- int i = 0;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
-
- iobuf_cnt = iobuf_arena->page_count;
-
- iobuf_arena->iobufs = GF_CALLOC (sizeof (*iobuf), iobuf_cnt,
- gf_common_mt_iobuf);
- if (!iobuf_arena->iobufs)
- return;
+ const int iobuf_cnt = iobuf_arena->page_count;
+ struct iobuf *iobuf = NULL;
+ int offset = 0;
+ int i = 0;
+
+ iobuf_arena->iobufs = GF_CALLOC(sizeof(*iobuf), iobuf_cnt,
+ gf_common_mt_iobuf);
+ if (!iobuf_arena->iobufs)
+ return;
- iobuf = iobuf_arena->iobufs;
- for (i = 0; i < iobuf_cnt; i++) {
- INIT_LIST_HEAD (&iobuf->list);
- LOCK_INIT (&iobuf->lock);
+ iobuf = iobuf_arena->iobufs;
+ for (i = 0; i < iobuf_cnt; i++) {
+ INIT_LIST_HEAD(&iobuf->list);
+ LOCK_INIT(&iobuf->lock);
- iobuf->iobuf_arena = iobuf_arena;
+ iobuf->iobuf_arena = iobuf_arena;
- iobuf->ptr = iobuf_arena->mem_base + offset;
+ iobuf->ptr = iobuf_arena->mem_base + offset;
- list_add (&iobuf->list, &iobuf_arena->passive.list);
- iobuf_arena->passive_cnt++;
+ list_add(&iobuf->list, &iobuf_arena->passive.list);
+ iobuf_arena->passive_cnt++;
- offset += iobuf_arena->page_size;
- iobuf++;
- }
+ offset += iobuf_arena->page_size;
+ iobuf++;
+ }
-out:
- return;
+ return;
}
-
-void
-__iobuf_arena_destroy_iobufs (struct iobuf_arena *iobuf_arena)
+static void
+__iobuf_arena_destroy_iobufs(struct iobuf_arena *iobuf_arena)
{
- int iobuf_cnt = 0;
- struct iobuf *iobuf = NULL;
- int i = 0;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
+ int iobuf_cnt = 0;
+ struct iobuf *iobuf = NULL;
+ int i = 0;
- iobuf_cnt = iobuf_arena->page_count;
-
- if (!iobuf_arena->iobufs) {
- gf_log_callingfn (THIS->name, GF_LOG_ERROR, "iobufs not found");
- return;
- }
+ if (!iobuf_arena->iobufs) {
+ gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, LG_MSG_IOBUFS_NOT_FOUND,
+ "iobufs not found");
+ return;
+ }
- iobuf = iobuf_arena->iobufs;
- for (i = 0; i < iobuf_cnt; i++) {
- GF_ASSERT (iobuf->ref == 0);
+ iobuf_cnt = iobuf_arena->page_count;
+ iobuf = iobuf_arena->iobufs;
+ for (i = 0; i < iobuf_cnt; i++) {
+ GF_ASSERT(GF_ATOMIC_GET(iobuf->ref) == 0);
- list_del_init (&iobuf->list);
- iobuf++;
- }
+ LOCK_DESTROY(&iobuf->lock);
+ list_del_init(&iobuf->list);
+ iobuf++;
+ }
- GF_FREE (iobuf_arena->iobufs);
+ GF_FREE(iobuf_arena->iobufs);
-out:
- return;
+ return;
}
-
-void
-__iobuf_arena_destroy (struct iobuf_arena *iobuf_arena)
+static void
+__iobuf_arena_destroy(struct iobuf_pool *iobuf_pool,
+ struct iobuf_arena *iobuf_arena)
{
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out);
- __iobuf_arena_destroy_iobufs (iobuf_arena);
+ if (iobuf_pool->rdma_deregistration)
+ iobuf_pool->rdma_deregistration(iobuf_pool->mr_list, iobuf_arena);
- if (iobuf_arena->mem_base
- && iobuf_arena->mem_base != MAP_FAILED)
- munmap (iobuf_arena->mem_base, iobuf_arena->arena_size);
+ __iobuf_arena_destroy_iobufs(iobuf_arena);
- GF_FREE (iobuf_arena);
+ if (iobuf_arena->mem_base && iobuf_arena->mem_base != MAP_FAILED)
+ munmap(iobuf_arena->mem_base, iobuf_arena->arena_size);
+
+ GF_FREE(iobuf_arena);
out:
- return;
+ return;
}
-
-struct iobuf_arena *
-__iobuf_arena_alloc (struct iobuf_pool *iobuf_pool, size_t page_size,
- int32_t num_iobufs)
+static struct iobuf_arena *
+__iobuf_arena_alloc(struct iobuf_pool *iobuf_pool, size_t page_size,
+ int32_t num_iobufs)
{
- struct iobuf_arena *iobuf_arena = NULL;
- size_t rounded_size = 0;
+ struct iobuf_arena *iobuf_arena = NULL;
+ size_t rounded_size = 0;
+ int index = 0; /* unused */
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
- iobuf_arena = GF_CALLOC (sizeof (*iobuf_arena), 1,
- gf_common_mt_iobuf_arena);
- if (!iobuf_arena)
- goto err;
+ iobuf_arena = GF_CALLOC(sizeof(*iobuf_arena), 1, gf_common_mt_iobuf_arena);
+ if (!iobuf_arena)
+ goto err;
- INIT_LIST_HEAD (&iobuf_arena->list);
- INIT_LIST_HEAD (&iobuf_arena->active.list);
- INIT_LIST_HEAD (&iobuf_arena->passive.list);
- iobuf_arena->iobuf_pool = iobuf_pool;
+ INIT_LIST_HEAD(&iobuf_arena->list);
+ INIT_LIST_HEAD(&iobuf_arena->all_list);
+ INIT_LIST_HEAD(&iobuf_arena->active.list);
+ INIT_LIST_HEAD(&iobuf_arena->passive.list);
+ iobuf_arena->iobuf_pool = iobuf_pool;
- rounded_size = gf_iobuf_get_pagesize (page_size);
+ rounded_size = gf_iobuf_get_pagesize(page_size, &index);
- iobuf_arena->page_size = rounded_size;
- iobuf_arena->page_count = num_iobufs;
+ iobuf_arena->page_size = rounded_size;
+ iobuf_arena->page_count = num_iobufs;
- iobuf_arena->arena_size = rounded_size * num_iobufs;
+ iobuf_arena->arena_size = rounded_size * num_iobufs;
- iobuf_arena->mem_base = mmap (NULL, iobuf_arena->arena_size,
- PROT_READ|PROT_WRITE,
- MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
- if (iobuf_arena->mem_base == MAP_FAILED) {
- gf_log (THIS->name, GF_LOG_WARNING, "maping failed");
- goto err;
- }
+ iobuf_arena->mem_base = mmap(NULL, iobuf_arena->arena_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (iobuf_arena->mem_base == MAP_FAILED) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_MAPPING_FAILED, NULL);
+ goto err;
+ }
- __iobuf_arena_init_iobufs (iobuf_arena);
- if (!iobuf_arena->iobufs) {
- gf_log (THIS->name, GF_LOG_ERROR, "init failed");
- goto err;
- }
+ if (iobuf_pool->rdma_registration) {
+ iobuf_pool->rdma_registration(iobuf_pool->device, iobuf_arena);
+ }
- iobuf_pool->arena_cnt++;
+ list_add_tail(&iobuf_arena->all_list, &iobuf_pool->all_arenas);
- return iobuf_arena;
+ __iobuf_arena_init_iobufs(iobuf_arena);
+ if (!iobuf_arena->iobufs) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INIT_IOBUF_FAILED, NULL);
+ goto err;
+ }
-err:
- __iobuf_arena_destroy (iobuf_arena);
-
-out:
- return NULL;
-}
+ iobuf_pool->arena_cnt++;
+ return iobuf_arena;
-struct iobuf_arena *
-__iobuf_arena_unprune (struct iobuf_pool *iobuf_pool, size_t page_size)
-{
- struct iobuf_arena *iobuf_arena = NULL;
- struct iobuf_arena *tmp = NULL;
- int index = 0;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
-
- index = gf_iobuf_get_arena_index (page_size);
- if (index == -1) {
- gf_log ("iobuf", GF_LOG_ERROR, "page_size (%zu) of "
- "iobufs in arena being added is greater than max "
- "available", page_size);
- return NULL;
- }
+err:
+ __iobuf_arena_destroy(iobuf_pool, iobuf_arena);
- list_for_each_entry (tmp, &iobuf_pool->purge[index], list) {
- list_del_init (&tmp->list);
- iobuf_arena = tmp;
- break;
- }
out:
- return iobuf_arena;
+ return NULL;
}
-
-struct iobuf_arena *
-__iobuf_pool_add_arena (struct iobuf_pool *iobuf_pool, size_t page_size,
- int32_t num_pages)
+static struct iobuf_arena *
+__iobuf_arena_unprune(struct iobuf_pool *iobuf_pool, const size_t page_size,
+ const int index)
{
- struct iobuf_arena *iobuf_arena = NULL;
- int index = 0;
-
- index = gf_iobuf_get_arena_index (page_size);
- if (index == -1) {
- gf_log ("iobuf", GF_LOG_ERROR, "page_size (%zu) of "
- "iobufs in arena being added is greater than max "
- "available", page_size);
- return NULL;
- }
-
- iobuf_arena = __iobuf_arena_unprune (iobuf_pool, page_size);
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *tmp = NULL;
- if (!iobuf_arena)
- iobuf_arena = __iobuf_arena_alloc (iobuf_pool, page_size,
- num_pages);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
- if (!iobuf_arena) {
- gf_log (THIS->name, GF_LOG_WARNING, "arena not found");
- return NULL;
- }
-
- list_add_tail (&iobuf_arena->list, &iobuf_pool->arenas[index]);
-
- return iobuf_arena;
+ list_for_each_entry(tmp, &iobuf_pool->purge[index], list)
+ {
+ list_del_init(&tmp->list);
+ iobuf_arena = tmp;
+ break;
+ }
+out:
+ return iobuf_arena;
}
-
-struct iobuf_arena *
-iobuf_pool_add_arena (struct iobuf_pool *iobuf_pool, size_t page_size,
- int32_t num_pages)
+static struct iobuf_arena *
+__iobuf_pool_add_arena(struct iobuf_pool *iobuf_pool, const size_t page_size,
+ const int32_t num_pages, const int index)
{
- struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+ iobuf_arena = __iobuf_arena_unprune(iobuf_pool, page_size, index);
- pthread_mutex_lock (&iobuf_pool->mutex);
- {
- iobuf_arena = __iobuf_pool_add_arena (iobuf_pool, page_size,
- num_pages);
+ if (!iobuf_arena) {
+ iobuf_arena = __iobuf_arena_alloc(iobuf_pool, page_size, num_pages);
+ if (!iobuf_arena) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND,
+ NULL);
+ return NULL;
}
- pthread_mutex_unlock (&iobuf_pool->mutex);
+ }
+ list_add(&iobuf_arena->list, &iobuf_pool->arenas[index]);
-out:
- return iobuf_arena;
+ return iobuf_arena;
}
-
+/* This function destroys all the iobufs and the iobuf_pool */
void
-iobuf_pool_destroy (struct iobuf_pool *iobuf_pool)
+iobuf_pool_destroy(struct iobuf_pool *iobuf_pool)
{
- struct iobuf_arena *iobuf_arena = NULL;
- struct iobuf_arena *tmp = NULL;
- int i = 0;
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *tmp = NULL;
+ int i = 0;
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
+ pthread_mutex_lock(&iobuf_pool->mutex);
+ {
for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
- list_for_each_entry_safe (iobuf_arena, tmp,
- &iobuf_pool->arenas[i], list) {
- list_del_init (&iobuf_arena->list);
- iobuf_pool->arena_cnt--;
- __iobuf_arena_destroy (iobuf_arena);
- }
-
+ list_for_each_entry_safe(iobuf_arena, tmp, &iobuf_pool->arenas[i],
+ list)
+ {
+ list_del_init(&iobuf_arena->list);
+ iobuf_pool->arena_cnt--;
+
+ __iobuf_arena_destroy(iobuf_pool, iobuf_arena);
+ }
+ list_for_each_entry_safe(iobuf_arena, tmp, &iobuf_pool->purge[i],
+ list)
+ {
+ list_del_init(&iobuf_arena->list);
+ iobuf_pool->arena_cnt--;
+ __iobuf_arena_destroy(iobuf_pool, iobuf_arena);
+ }
+ /* If there are no iobuf leaks, there should be no
+ * arenas in the filled list. If at all there are any
+ * arenas in the filled list, the below function will
+ * assert.
+ */
+ list_for_each_entry_safe(iobuf_arena, tmp, &iobuf_pool->filled[i],
+ list)
+ {
+ list_del_init(&iobuf_arena->list);
+ iobuf_pool->arena_cnt--;
+ __iobuf_arena_destroy(iobuf_pool, iobuf_arena);
+ }
+ /* If there are no iobuf leaks, there shoould be
+ * no standard allocated arenas, iobuf_put will free
+ * such arenas.
+ * TODO: Free the stdalloc arenas forcefully if present?
+ */
}
+ }
+ pthread_mutex_unlock(&iobuf_pool->mutex);
+
+ pthread_mutex_destroy(&iobuf_pool->mutex);
+
+ GF_FREE(iobuf_pool);
out:
- return;
+ return;
}
static void
-iobuf_create_stdalloc_arena (struct iobuf_pool *iobuf_pool)
+iobuf_create_stdalloc_arena(struct iobuf_pool *iobuf_pool)
{
- struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
- /* No locking required here as its called only once during init */
- iobuf_arena = GF_CALLOC (sizeof (*iobuf_arena), 1,
- gf_common_mt_iobuf_arena);
- if (!iobuf_arena)
- goto err;
+ /* No locking required here as its called only once during init */
+ iobuf_arena = GF_CALLOC(sizeof(*iobuf_arena), 1, gf_common_mt_iobuf_arena);
+ if (!iobuf_arena)
+ goto err;
- INIT_LIST_HEAD (&iobuf_arena->list);
- INIT_LIST_HEAD (&iobuf_arena->active.list);
- INIT_LIST_HEAD (&iobuf_arena->passive.list);
+ INIT_LIST_HEAD(&iobuf_arena->list);
+ INIT_LIST_HEAD(&iobuf_arena->active.list);
+ INIT_LIST_HEAD(&iobuf_arena->passive.list);
- iobuf_arena->iobuf_pool = iobuf_pool;
+ iobuf_arena->iobuf_pool = iobuf_pool;
- iobuf_arena->page_size = 0x7fffffff;
+ iobuf_arena->page_size = 0x7fffffff;
- list_add_tail (&iobuf_arena->list,
- &iobuf_pool->arenas[IOBUF_ARENA_MAX_INDEX]);
+ list_add_tail(&iobuf_arena->list,
+ &iobuf_pool->arenas[IOBUF_ARENA_MAX_INDEX]);
err:
- return;
+ return;
}
struct iobuf_pool *
-iobuf_pool_new (void)
+iobuf_pool_new(void)
{
- struct iobuf_pool *iobuf_pool = NULL;
- int i = 0;
- size_t page_size = 0;
- size_t arena_size = 0;
- int32_t num_pages = 0;
-
- iobuf_pool = GF_CALLOC (sizeof (*iobuf_pool), 1,
- gf_common_mt_iobuf_pool);
- if (!iobuf_pool)
- goto out;
-
- pthread_mutex_init (&iobuf_pool->mutex, NULL);
- for (i = 0; i <= IOBUF_ARENA_MAX_INDEX; i++) {
- INIT_LIST_HEAD (&iobuf_pool->arenas[i]);
- INIT_LIST_HEAD (&iobuf_pool->filled[i]);
- INIT_LIST_HEAD (&iobuf_pool->purge[i]);
- }
-
- iobuf_pool->default_page_size = 128 * GF_UNIT_KB;
-
- arena_size = 0;
- for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
- page_size = gf_iobuf_init_config[i].pagesize;
- num_pages = gf_iobuf_init_config[i].num_pages;
-
- iobuf_pool_add_arena (iobuf_pool, page_size, num_pages);
-
- arena_size += page_size * num_pages;
- }
-
- /* Need an arena to handle all the bigger iobuf requests */
- iobuf_create_stdalloc_arena (iobuf_pool);
-
- iobuf_pool->arena_size = arena_size;
+ struct iobuf_pool *iobuf_pool = NULL;
+ int i = 0;
+ size_t page_size = 0;
+ size_t arena_size = 0;
+ int32_t num_pages = 0;
+
+ iobuf_pool = GF_CALLOC(sizeof(*iobuf_pool), 1, gf_common_mt_iobuf_pool);
+ if (!iobuf_pool)
+ goto out;
+ INIT_LIST_HEAD(&iobuf_pool->all_arenas);
+ pthread_mutex_init(&iobuf_pool->mutex, NULL);
+ for (i = 0; i <= IOBUF_ARENA_MAX_INDEX; i++) {
+ INIT_LIST_HEAD(&iobuf_pool->arenas[i]);
+ INIT_LIST_HEAD(&iobuf_pool->filled[i]);
+ INIT_LIST_HEAD(&iobuf_pool->purge[i]);
+ }
+
+ iobuf_pool->default_page_size = 128 * GF_UNIT_KB;
+
+ iobuf_pool->rdma_registration = NULL;
+ iobuf_pool->rdma_deregistration = NULL;
+
+ for (i = 0; i < GF_RDMA_DEVICE_COUNT; i++) {
+ iobuf_pool->device[i] = NULL;
+ iobuf_pool->mr_list[i] = NULL;
+ }
+
+ /* No locking required here
+ * as no one else can use this pool yet
+ */
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ page_size = gf_iobuf_init_config[i].pagesize;
+ num_pages = gf_iobuf_init_config[i].num_pages;
+
+ if (__iobuf_pool_add_arena(iobuf_pool, page_size, num_pages, i) != NULL)
+ arena_size += page_size * num_pages;
+ }
+
+ /* Need an arena to handle all the bigger iobuf requests */
+ iobuf_create_stdalloc_arena(iobuf_pool);
+
+ iobuf_pool->arena_size = arena_size;
out:
- return iobuf_pool;
+ return iobuf_pool;
}
-
-void
-__iobuf_arena_prune (struct iobuf_pool *iobuf_pool,
- struct iobuf_arena *iobuf_arena, int index)
+static void
+__iobuf_arena_prune(struct iobuf_pool *iobuf_pool,
+ struct iobuf_arena *iobuf_arena, const int index)
{
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
-
- /* code flow comes here only if the arena is in purge list and we can
- * free the arena only if we have atleast one arena in 'arenas' list
- * (ie, at least few iobufs free in arena), that way, there won't
- * be spurious mmap/unmap of buffers
- */
- if (list_empty (&iobuf_pool->arenas[index]))
- goto out;
+ /* code flow comes here only if the arena is in purge list and we can
+ * free the arena only if we have at least one arena in 'arenas' list
+ * (ie, at least few iobufs free in arena), that way, there won't
+ * be spurious mmap/unmap of buffers
+ */
+ if (list_empty(&iobuf_pool->arenas[index]))
+ goto out;
- /* All cases matched, destroy */
- list_del_init (&iobuf_arena->list);
- iobuf_pool->arena_cnt--;
+ /* All cases matched, destroy */
+ list_del_init(&iobuf_arena->list);
+ list_del_init(&iobuf_arena->all_list);
+ iobuf_pool->arena_cnt--;
- __iobuf_arena_destroy (iobuf_arena);
+ __iobuf_arena_destroy(iobuf_pool, iobuf_arena);
out:
- return;
+ return;
}
-
void
-iobuf_pool_prune (struct iobuf_pool *iobuf_pool)
+iobuf_pool_prune(struct iobuf_pool *iobuf_pool)
{
- struct iobuf_arena *iobuf_arena = NULL;
- struct iobuf_arena *tmp = NULL;
- int i = 0;
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *tmp = NULL;
+ int i = 0;
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
- pthread_mutex_lock (&iobuf_pool->mutex);
- {
- for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
- if (list_empty (&iobuf_pool->arenas[i])) {
- continue;
- }
-
- list_for_each_entry_safe (iobuf_arena, tmp,
- &iobuf_pool->purge[i], list) {
- __iobuf_arena_prune (iobuf_pool, iobuf_arena, i);
- }
- }
+ pthread_mutex_lock(&iobuf_pool->mutex);
+ {
+ for (i = 0; i < IOBUF_ARENA_MAX_INDEX; i++) {
+ if (list_empty(&iobuf_pool->arenas[i])) {
+ continue;
+ }
+
+ list_for_each_entry_safe(iobuf_arena, tmp, &iobuf_pool->purge[i],
+ list)
+ {
+ __iobuf_arena_prune(iobuf_pool, iobuf_arena, i);
+ }
}
- pthread_mutex_unlock (&iobuf_pool->mutex);
+ }
+ pthread_mutex_unlock(&iobuf_pool->mutex);
out:
- return;
+ return;
}
-
-struct iobuf_arena *
-__iobuf_select_arena (struct iobuf_pool *iobuf_pool, size_t page_size)
+/* Always called under the iobuf_pool mutex lock */
+static struct iobuf_arena *
+__iobuf_select_arena(struct iobuf_pool *iobuf_pool, const size_t page_size,
+ const int index)
{
- struct iobuf_arena *iobuf_arena = NULL;
- struct iobuf_arena *trav = NULL;
- int index = 0;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
-
- index = gf_iobuf_get_arena_index (page_size);
- if (index == -1) {
- gf_log ("iobuf", GF_LOG_ERROR, "page_size (%zu) of "
- "iobufs in arena being added is greater than max "
- "available", page_size);
- return NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *trav = NULL;
+
+ /* look for unused iobuf from the head-most arena */
+ list_for_each_entry(trav, &iobuf_pool->arenas[index], list)
+ {
+ if (trav->passive_cnt) {
+ iobuf_arena = trav;
+ break;
}
+ }
- /* look for unused iobuf from the head-most arena */
- list_for_each_entry (trav, &iobuf_pool->arenas[index], list) {
- if (trav->passive_cnt) {
- iobuf_arena = trav;
- break;
- }
- }
+ if (!iobuf_arena) {
+ /* all arenas were full, find the right count to add */
+ iobuf_arena = __iobuf_pool_add_arena(
+ iobuf_pool, page_size, gf_iobuf_init_config[index].num_pages,
+ index);
+ }
- if (!iobuf_arena) {
- /* all arenas were full, find the right count to add */
- iobuf_arena = __iobuf_pool_add_arena (iobuf_pool, page_size,
- gf_iobuf_init_config[index].num_pages);
- }
-
-out:
- return iobuf_arena;
+ return iobuf_arena;
}
-
-struct iobuf *
-__iobuf_ref (struct iobuf *iobuf)
+/* Always called under the iobuf_pool mutex lock */
+static struct iobuf *
+__iobuf_get(struct iobuf_pool *iobuf_pool, const size_t page_size,
+ const int index)
{
- iobuf->ref++;
-
- return iobuf;
-}
-
+ struct iobuf *iobuf = NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
-struct iobuf *
-__iobuf_unref (struct iobuf *iobuf)
-{
- iobuf->ref--;
-
- return iobuf;
-}
-
-struct iobuf *
-__iobuf_get (struct iobuf_arena *iobuf_arena, size_t page_size)
-{
- struct iobuf *iobuf = NULL;
- struct iobuf_pool *iobuf_pool = NULL;
- int index = 0;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
-
- iobuf_pool = iobuf_arena->iobuf_pool;
-
- list_for_each_entry (iobuf, &iobuf_arena->passive.list, list)
- break;
+ /* most eligible arena for picking an iobuf */
+ iobuf_arena = __iobuf_select_arena(iobuf_pool, page_size, index);
+ if (!iobuf_arena)
+ return NULL;
- list_del (&iobuf->list);
- iobuf_arena->passive_cnt--;
+ list_for_each_entry(iobuf, &iobuf_arena->passive.list, list) break;
- list_add (&iobuf->list, &iobuf_arena->active.list);
- iobuf_arena->active_cnt++;
+ list_del(&iobuf->list);
+ iobuf_arena->passive_cnt--;
- /* no resetting requied for this element */
- iobuf_arena->alloc_cnt++;
+ list_add(&iobuf->list, &iobuf_arena->active.list);
+ iobuf_arena->active_cnt++;
- if (iobuf_arena->max_active < iobuf_arena->active_cnt)
- iobuf_arena->max_active = iobuf_arena->active_cnt;
+ /* no resetting requied for this element */
+ iobuf_arena->alloc_cnt++;
- if (iobuf_arena->passive_cnt == 0) {
- index = gf_iobuf_get_arena_index (page_size);
- if (index == -1) {
- gf_log ("iobuf", GF_LOG_ERROR, "page_size (%zu) of "
- "iobufs in arena being added is greater "
- "than max available", page_size);
- goto out;
- }
+ if (iobuf_arena->max_active < iobuf_arena->active_cnt)
+ iobuf_arena->max_active = iobuf_arena->active_cnt;
- list_del (&iobuf_arena->list);
- list_add (&iobuf_arena->list, &iobuf_pool->filled[index]);
- }
+ if (iobuf_arena->passive_cnt == 0) {
+ list_del(&iobuf_arena->list);
+ list_add(&iobuf_arena->list, &iobuf_pool->filled[index]);
+ }
-out:
- return iobuf;
+ return iobuf;
}
-struct iobuf *
-iobuf_get_from_stdalloc (struct iobuf_pool *iobuf_pool, size_t page_size)
+static struct iobuf *
+iobuf_get_from_stdalloc(struct iobuf_pool *iobuf_pool, const size_t page_size)
{
- struct iobuf *iobuf = NULL;
- struct iobuf_arena *iobuf_arena = NULL;
- struct iobuf_arena *trav = NULL;
- int ret = -1;
-
- /* The first arena in the 'MAX-INDEX' will always be used for misc */
- list_for_each_entry (trav, &iobuf_pool->arenas[IOBUF_ARENA_MAX_INDEX],
- list) {
- iobuf_arena = trav;
- break;
- }
-
- iobuf = GF_CALLOC (1, sizeof (*iobuf), gf_common_mt_iobuf);
- if (!iobuf)
- goto out;
-
- /* 4096 is the alignment */
- iobuf->free_ptr = GF_CALLOC (1, ((page_size + GF_IOBUF_ALIGN_SIZE) - 1),
- gf_common_mt_char);
- if (!iobuf->free_ptr)
- goto out;
-
- iobuf->ptr = GF_ALIGN_BUF (iobuf->free_ptr, GF_IOBUF_ALIGN_SIZE);
- iobuf->iobuf_arena = iobuf_arena;
- LOCK_INIT (&iobuf->lock);
-
- /* Hold a ref because you are allocating and using it */
- iobuf->ref = 1;
-
- ret = 0;
+ struct iobuf *iobuf = NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_arena *trav = NULL;
+ int ret = -1;
+
+ /* The first arena in the 'MAX-INDEX' will always be used for misc */
+ list_for_each_entry(trav, &iobuf_pool->arenas[IOBUF_ARENA_MAX_INDEX], list)
+ {
+ iobuf_arena = trav;
+ break;
+ }
+
+ iobuf = GF_CALLOC(1, sizeof(*iobuf), gf_common_mt_iobuf);
+ if (!iobuf)
+ goto out;
+
+ /* 4096 is the alignment */
+ iobuf->free_ptr = GF_CALLOC(1, ((page_size + GF_IOBUF_ALIGN_SIZE) - 1),
+ gf_common_mt_char);
+ if (!iobuf->free_ptr)
+ goto out;
+
+ iobuf->ptr = GF_ALIGN_BUF(iobuf->free_ptr, GF_IOBUF_ALIGN_SIZE);
+ iobuf->iobuf_arena = iobuf_arena;
+ LOCK_INIT(&iobuf->lock);
+
+ /* Hold a ref because you are allocating and using it */
+ GF_ATOMIC_INIT(iobuf->ref, 1);
+
+ ret = 0;
out:
- if (ret && iobuf) {
- if (iobuf->free_ptr)
- GF_FREE (iobuf->free_ptr);
- GF_FREE (iobuf);
- iobuf = NULL;
- }
+ if (ret && iobuf) {
+ GF_FREE(iobuf->free_ptr);
+ GF_FREE(iobuf);
+ iobuf = NULL;
+ }
- return iobuf;
+ return iobuf;
}
-
struct iobuf *
-iobuf_get2 (struct iobuf_pool *iobuf_pool, size_t page_size)
+iobuf_get2(struct iobuf_pool *iobuf_pool, size_t page_size)
{
- struct iobuf *iobuf = NULL;
- struct iobuf_arena *iobuf_arena = NULL;
- size_t rounded_size = 0;
-
- if (page_size == 0) {
- page_size = iobuf_pool->default_page_size;
- }
-
- rounded_size = gf_iobuf_get_pagesize (page_size);
- if (rounded_size == -1) {
- /* make sure to provide the requested buffer with standard
- memory allocations */
- iobuf = iobuf_get_from_stdalloc (iobuf_pool, page_size);
-
- gf_log ("iobuf", GF_LOG_DEBUG, "request for iobuf of size %zu "
- "is serviced using standard calloc() (%p) as it "
- "exceeds the maximum available buffer size",
- page_size, iobuf);
-
- iobuf_pool->request_misses++;
- return iobuf;
+ struct iobuf *iobuf = NULL;
+ size_t rounded_size = 0;
+ int index = 0;
+
+ if (page_size == 0) {
+ page_size = iobuf_pool->default_page_size;
+ }
+
+ rounded_size = gf_iobuf_get_pagesize(page_size, &index);
+ if (rounded_size == -1) {
+ /* make sure to provide the requested buffer with standard
+ memory allocations */
+ iobuf = iobuf_get_from_stdalloc(iobuf_pool, page_size);
+
+ gf_msg_debug("iobuf", 0,
+ "request for iobuf of size %zu "
+ "is serviced using standard calloc() (%p) as it "
+ "exceeds the maximum available buffer size",
+ page_size, iobuf);
+
+ iobuf_pool->request_misses++;
+ return iobuf;
+ } else if (index == -1) {
+ gf_smsg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED,
+ "page_size=%zu", page_size, NULL);
+ return NULL;
+ }
+
+ pthread_mutex_lock(&iobuf_pool->mutex);
+ {
+ iobuf = __iobuf_get(iobuf_pool, rounded_size, index);
+ if (!iobuf) {
+ pthread_mutex_unlock(&iobuf_pool->mutex);
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_IOBUF_NOT_FOUND,
+ NULL);
+ goto post_unlock;
}
- pthread_mutex_lock (&iobuf_pool->mutex);
- {
- /* most eligible arena for picking an iobuf */
- iobuf_arena = __iobuf_select_arena (iobuf_pool, rounded_size);
- if (!iobuf_arena)
- goto unlock;
-
- iobuf = __iobuf_get (iobuf_arena, rounded_size);
- if (!iobuf)
- goto unlock;
-
- __iobuf_ref (iobuf);
- }
-unlock:
- pthread_mutex_unlock (&iobuf_pool->mutex);
-
- return iobuf;
+ iobuf_ref(iobuf);
+ }
+ pthread_mutex_unlock(&iobuf_pool->mutex);
+post_unlock:
+ return iobuf;
}
struct iobuf *
-iobuf_get (struct iobuf_pool *iobuf_pool)
+iobuf_get_page_aligned(struct iobuf_pool *iobuf_pool, size_t page_size,
+ size_t align_size)
{
- struct iobuf *iobuf = NULL;
- struct iobuf_arena *iobuf_arena = NULL;
+ size_t req_size = 0;
+ struct iobuf *iobuf = NULL;
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+ req_size = page_size;
- pthread_mutex_lock (&iobuf_pool->mutex);
- {
- /* most eligible arena for picking an iobuf */
- iobuf_arena = __iobuf_select_arena (iobuf_pool,
- iobuf_pool->default_page_size);
- if (!iobuf_arena) {
- gf_log (THIS->name, GF_LOG_WARNING, "arena not found");
- goto unlock;
- }
-
- iobuf = __iobuf_get (iobuf_arena,
- iobuf_pool->default_page_size);
- if (!iobuf) {
- gf_log (THIS->name, GF_LOG_WARNING, "iobuf not found");
- goto unlock;
- }
-
- __iobuf_ref (iobuf);
- }
-unlock:
- pthread_mutex_unlock (&iobuf_pool->mutex);
+ if (req_size == 0) {
+ req_size = iobuf_pool->default_page_size;
+ }
-out:
- return iobuf;
+ iobuf = iobuf_get2(iobuf_pool, req_size + align_size);
+ if (!iobuf)
+ return NULL;
+ /* If std allocation was used, then free_ptr will be non-NULL. In this
+ * case, we do not want to modify the original free_ptr.
+ * On the other hand, if the buf was gotten through the available
+ * arenas, then we use iobuf->free_ptr to store the original
+ * pointer to the offset into the mmap'd block of memory and in turn
+ * reuse iobuf->ptr to hold the page-aligned address. And finally, in
+ * iobuf_put(), we copy iobuf->free_ptr into iobuf->ptr - back to where
+ * it was originally when __iobuf_get() returned this iobuf.
+ */
+ if (!iobuf->free_ptr)
+ iobuf->free_ptr = iobuf->ptr;
+ iobuf->ptr = GF_ALIGN_BUF(iobuf->ptr, align_size);
+
+ return iobuf;
}
-void
-__iobuf_put (struct iobuf *iobuf, struct iobuf_arena *iobuf_arena)
+struct iobuf *
+iobuf_get(struct iobuf_pool *iobuf_pool)
{
- struct iobuf_pool *iobuf_pool = NULL;
- int index = 0;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
+ struct iobuf *iobuf = NULL;
+ int index = 0;
- iobuf_pool = iobuf_arena->iobuf_pool;
-
- index = gf_iobuf_get_arena_index (iobuf_arena->page_size);
- if (index == -1) {
- gf_log ("iobuf", GF_LOG_DEBUG, "freeing the iobuf (%p) "
- "allocated with standard calloc()", iobuf);
-
- /* free up properly without bothering about lists and all */
- LOCK_DESTROY (&iobuf->lock);
- GF_FREE (iobuf->free_ptr);
- GF_FREE (iobuf);
- return;
- }
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
- if (iobuf_arena->passive_cnt == 0) {
- list_del (&iobuf_arena->list);
- list_add_tail (&iobuf_arena->list, &iobuf_pool->arenas[index]);
+ index = gf_iobuf_get_arena_index(iobuf_pool->default_page_size);
+ if (index == -1) {
+ gf_smsg("iobuf", GF_LOG_ERROR, 0, LG_MSG_PAGE_SIZE_EXCEEDED,
+ "page_size=%zu", iobuf_pool->default_page_size, NULL);
+ return NULL;
+ }
+
+ pthread_mutex_lock(&iobuf_pool->mutex);
+ {
+ iobuf = __iobuf_get(iobuf_pool, iobuf_pool->default_page_size, index);
+ if (!iobuf) {
+ pthread_mutex_unlock(&iobuf_pool->mutex);
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_IOBUF_NOT_FOUND,
+ NULL);
+ goto out;
}
- list_del_init (&iobuf->list);
- iobuf_arena->active_cnt--;
-
- list_add (&iobuf->list, &iobuf_arena->passive.list);
- iobuf_arena->passive_cnt++;
+ iobuf_ref(iobuf);
+ }
+ pthread_mutex_unlock(&iobuf_pool->mutex);
- if (iobuf_arena->active_cnt == 0) {
- list_del (&iobuf_arena->list);
- list_add_tail (&iobuf_arena->list, &iobuf_pool->purge[index]);
- __iobuf_arena_prune (iobuf_pool, iobuf_arena, index);
- }
out:
- return;
+ return iobuf;
}
+static void
+__iobuf_put(struct iobuf *iobuf, struct iobuf_arena *iobuf_arena)
+{
+ struct iobuf_pool *iobuf_pool = NULL;
+ int index = 0;
+
+ iobuf_pool = iobuf_arena->iobuf_pool;
+
+ index = gf_iobuf_get_arena_index(iobuf_arena->page_size);
+ if (index == -1) {
+ gf_msg_debug("iobuf", 0,
+ "freeing the iobuf (%p) "
+ "allocated with standard calloc()",
+ iobuf);
+
+ /* free up properly without bothering about lists and all */
+ LOCK_DESTROY(&iobuf->lock);
+ GF_FREE(iobuf->free_ptr);
+ GF_FREE(iobuf);
+ return;
+ }
+
+ if (iobuf_arena->passive_cnt == 0) {
+ list_del(&iobuf_arena->list);
+ list_add_tail(&iobuf_arena->list, &iobuf_pool->arenas[index]);
+ }
+
+ list_del_init(&iobuf->list);
+ iobuf_arena->active_cnt--;
+
+ if (iobuf->free_ptr) {
+ iobuf->ptr = iobuf->free_ptr;
+ iobuf->free_ptr = NULL;
+ }
+
+ list_add(&iobuf->list, &iobuf_arena->passive.list);
+ iobuf_arena->passive_cnt++;
+
+ if (iobuf_arena->active_cnt == 0) {
+ list_del(&iobuf_arena->list);
+ list_add_tail(&iobuf_arena->list, &iobuf_pool->purge[index]);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
+ __iobuf_arena_prune(iobuf_pool, iobuf_arena, index);
+ }
+out:
+ return;
+}
void
-iobuf_put (struct iobuf *iobuf)
+iobuf_put(struct iobuf *iobuf)
{
- struct iobuf_arena *iobuf_arena = NULL;
- struct iobuf_pool *iobuf_pool = NULL;
+ struct iobuf_arena *iobuf_arena = NULL;
+ struct iobuf_pool *iobuf_pool = NULL;
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
- iobuf_arena = iobuf->iobuf_arena;
- if (!iobuf_arena) {
- gf_log (THIS->name, GF_LOG_WARNING, "arena not found");
- return;
- }
+ iobuf_arena = iobuf->iobuf_arena;
+ if (!iobuf_arena) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND, NULL);
+ return;
+ }
- iobuf_pool = iobuf_arena->iobuf_pool;
- if (!iobuf_pool) {
- gf_log (THIS->name, GF_LOG_WARNING, "iobuf pool not found");
- return;
- }
+ iobuf_pool = iobuf_arena->iobuf_pool;
+ if (!iobuf_pool) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_POOL_NOT_FOUND, "iobuf",
+ NULL);
+ return;
+ }
- pthread_mutex_lock (&iobuf_pool->mutex);
- {
- __iobuf_put (iobuf, iobuf_arena);
- }
- pthread_mutex_unlock (&iobuf_pool->mutex);
+ pthread_mutex_lock(&iobuf_pool->mutex);
+ {
+ __iobuf_put(iobuf, iobuf_arena);
+ }
+ pthread_mutex_unlock(&iobuf_pool->mutex);
out:
- return;
+ return;
}
-
void
-iobuf_unref (struct iobuf *iobuf)
+iobuf_unref(struct iobuf *iobuf)
{
- int ref = 0;
+ int ref = 0;
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
- LOCK (&iobuf->lock);
- {
- __iobuf_unref (iobuf);
- ref = iobuf->ref;
- }
- UNLOCK (&iobuf->lock);
+ ref = GF_ATOMIC_DEC(iobuf->ref);
- if (!ref)
- iobuf_put (iobuf);
+ if (!ref)
+ iobuf_put(iobuf);
out:
- return;
+ return;
}
-
struct iobuf *
-iobuf_ref (struct iobuf *iobuf)
+iobuf_ref(struct iobuf *iobuf)
{
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
-
- LOCK (&iobuf->lock);
- {
- __iobuf_ref (iobuf);
- }
- UNLOCK (&iobuf->lock);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
+ GF_ATOMIC_INC(iobuf->ref);
out:
- return iobuf;
+ return iobuf;
}
-
struct iobref *
-iobref_new ()
+iobref_new()
{
- struct iobref *iobref = NULL;
+ struct iobref *iobref = NULL;
+
+ iobref = GF_MALLOC(sizeof(*iobref), gf_common_mt_iobref);
+ if (!iobref)
+ return NULL;
- iobref = GF_CALLOC (sizeof (*iobref), 1,
- gf_common_mt_iobref);
- if (!iobref)
- return NULL;
+ iobref->iobrefs = GF_CALLOC(sizeof(*iobref->iobrefs), 16,
+ gf_common_mt_iobrefs);
+ if (!iobref->iobrefs) {
+ GF_FREE(iobref);
+ return NULL;
+ }
- LOCK_INIT (&iobref->lock);
+ iobref->allocated = 16;
+ iobref->used = 0;
- iobref->ref++;
+ LOCK_INIT(&iobref->lock);
- return iobref;
+ GF_ATOMIC_INIT(iobref->ref, 1);
+ return iobref;
}
-
struct iobref *
-iobref_ref (struct iobref *iobref)
+iobref_ref(struct iobref *iobref)
{
- GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
-
- LOCK (&iobref->lock);
- {
- iobref->ref++;
- }
- UNLOCK (&iobref->lock);
+ GF_VALIDATE_OR_GOTO("iobuf", iobref, out);
+ GF_ATOMIC_INC(iobref->ref);
out:
- return iobref;
+ return iobref;
}
-
void
-iobref_destroy (struct iobref *iobref)
+iobref_destroy(struct iobref *iobref)
{
- int i = 0;
- struct iobuf *iobuf = NULL;
+ int i = 0;
+ struct iobuf *iobuf = NULL;
- GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobref, out);
- for (i = 0; i < GF_IOBREF_IOBUF_COUNT; i++) {
- iobuf = iobref->iobrefs[i];
+ for (i = 0; i < iobref->allocated; i++) {
+ iobuf = iobref->iobrefs[i];
- iobref->iobrefs[i] = NULL;
- if (iobuf)
- iobuf_unref (iobuf);
- }
+ iobref->iobrefs[i] = NULL;
+ if (iobuf)
+ iobuf_unref(iobuf);
+ }
- GF_FREE (iobref);
+ GF_FREE(iobref->iobrefs);
+ GF_FREE(iobref);
out:
- return;
+ return;
}
+void
+iobref_unref(struct iobref *iobref)
+{
+ int ref = 0;
+
+ GF_VALIDATE_OR_GOTO("iobuf", iobref, out);
+ ref = GF_ATOMIC_DEC(iobref->ref);
+
+ if (!ref)
+ iobref_destroy(iobref);
+
+out:
+ return;
+}
void
-iobref_unref (struct iobref *iobref)
+iobref_clear(struct iobref *iobref)
{
- int ref = 0;
+ int i = 0;
- GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobref, out);
- LOCK (&iobref->lock);
- {
- ref = (--iobref->ref);
+ for (; i < iobref->allocated; i++) {
+ if (iobref->iobrefs[i] != NULL) {
+ iobuf_unref(iobref->iobrefs[i]);
+ } else {
+ /** iobuf's are attached serially */
+ break;
}
- UNLOCK (&iobref->lock);
+ }
- if (!ref)
- iobref_destroy (iobref);
+ iobref_unref(iobref);
out:
- return;
+ return;
}
+static void
+__iobref_grow(struct iobref *iobref)
+{
+ void *newptr = NULL;
+ int i = 0;
+
+ newptr = GF_REALLOC(iobref->iobrefs,
+ iobref->allocated * 2 * (sizeof(*iobref->iobrefs)));
+ if (newptr) {
+ iobref->iobrefs = newptr;
+ iobref->allocated *= 2;
+
+ for (i = iobref->used; i < iobref->allocated; i++)
+ iobref->iobrefs[i] = NULL;
+ }
+}
int
-__iobref_add (struct iobref *iobref, struct iobuf *iobuf)
+__iobref_add(struct iobref *iobref, struct iobuf *iobuf)
{
- int i = 0;
- int ret = -ENOMEM;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
-
- for (i = 0; i < GF_IOBREF_IOBUF_COUNT; i++) {
- if (iobref->iobrefs[i] == NULL) {
- iobref->iobrefs[i] = iobuf_ref (iobuf);
- ret = 0;
- break;
- }
+ int i = 0;
+ int ret = -ENOMEM;
+
+ GF_VALIDATE_OR_GOTO("iobuf", iobref, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
+
+ if (iobref->used == iobref->allocated) {
+ __iobref_grow(iobref);
+
+ if (iobref->used == iobref->allocated) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ for (i = 0; i < iobref->allocated; i++) {
+ if (iobref->iobrefs[i] == NULL) {
+ iobref->iobrefs[i] = iobuf_ref(iobuf);
+ iobref->used++;
+ ret = 0;
+ break;
}
+ }
out:
- return ret;
+ return ret;
}
-
int
-iobref_add (struct iobref *iobref, struct iobuf *iobuf)
+iobref_add(struct iobref *iobref, struct iobuf *iobuf)
{
- int ret = -EINVAL;
+ int ret = -EINVAL;
- GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobref, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
- LOCK (&iobref->lock);
- {
- ret = __iobref_add (iobref, iobuf);
- }
- UNLOCK (&iobref->lock);
+ LOCK(&iobref->lock);
+ {
+ ret = __iobref_add(iobref, iobuf);
+ }
+ UNLOCK(&iobref->lock);
out:
- return ret;
+ return ret;
}
-
int
-iobref_merge (struct iobref *to, struct iobref *from)
+iobref_merge(struct iobref *to, struct iobref *from)
{
- int i = 0;
- int ret = -1;
- struct iobuf *iobuf = NULL;
+ int i = 0;
+ int ret = 0;
+ struct iobuf *iobuf = NULL;
- GF_VALIDATE_OR_GOTO ("iobuf", to, out);
- GF_VALIDATE_OR_GOTO ("iobuf", from, out);
+ GF_VALIDATE_OR_GOTO("iobuf", to, out);
+ GF_VALIDATE_OR_GOTO("iobuf", from, out);
- LOCK (&from->lock);
- {
- for (i = 0; i < GF_IOBREF_IOBUF_COUNT; i++) {
- iobuf = from->iobrefs[i];
+ LOCK(&from->lock);
+ {
+ for (i = 0; i < from->allocated; i++) {
+ iobuf = from->iobrefs[i];
- if (!iobuf)
- break;
+ if (!iobuf)
+ break;
- ret = iobref_add (to, iobuf);
+ ret = iobref_add(to, iobuf);
- if (ret < 0)
- break;
- }
+ if (ret < 0)
+ break;
}
- UNLOCK (&from->lock);
+ }
+ UNLOCK(&from->lock);
out:
- return ret;
+ return ret;
}
-
size_t
-iobuf_size (struct iobuf *iobuf)
+iobuf_size(struct iobuf *iobuf)
{
- size_t size = 0;
+ size_t size = 0;
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
- if (!iobuf->iobuf_arena) {
- gf_log (THIS->name, GF_LOG_WARNING, "arena not found");
- goto out;
- }
+ if (!iobuf->iobuf_arena) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_ARENA_NOT_FOUND, NULL);
+ goto out;
+ }
- if (!iobuf->iobuf_arena->iobuf_pool) {
- gf_log (THIS->name, GF_LOG_WARNING, "pool not found");
- goto out;
- }
+ if (!iobuf->iobuf_arena->iobuf_pool) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_POOL_NOT_FOUND, NULL);
+ goto out;
+ }
- size = iobuf->iobuf_arena->page_size;
+ size = iobuf->iobuf_arena->page_size;
out:
- return size;
+ return size;
}
-
size_t
-iobref_size (struct iobref *iobref)
+iobref_size(struct iobref *iobref)
{
- size_t size = 0;
- int i = 0;
+ size_t size = 0;
+ int i = 0;
- GF_VALIDATE_OR_GOTO ("iobuf", iobref, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iobref, out);
- LOCK (&iobref->lock);
- {
- for (i = 0; i < GF_IOBREF_IOBUF_COUNT; i++) {
- if (iobref->iobrefs[i])
- size += iobuf_size (iobref->iobrefs[i]);
- }
+ LOCK(&iobref->lock);
+ {
+ for (i = 0; i < iobref->allocated; i++) {
+ if (iobref->iobrefs[i])
+ size += iobuf_size(iobref->iobrefs[i]);
}
- UNLOCK (&iobref->lock);
+ }
+ UNLOCK(&iobref->lock);
out:
- return size;
+ return size;
}
void
-iobuf_info_dump (struct iobuf *iobuf, const char *key_prefix)
+iobuf_info_dump(struct iobuf *iobuf, const char *key_prefix)
{
- char key[GF_DUMP_MAX_BUF_LEN];
- struct iobuf my_iobuf;
- int ret = 0;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf, out);
+ char key[GF_DUMP_MAX_BUF_LEN];
+ struct iobuf my_iobuf;
+ int ret = 0;
- memset(&my_iobuf, 0, sizeof(my_iobuf));
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf, out);
- ret = TRY_LOCK(&iobuf->lock);
- if (ret) {
- return;
- }
- memcpy(&my_iobuf, iobuf, sizeof(my_iobuf));
- UNLOCK(&iobuf->lock);
+ ret = TRY_LOCK(&iobuf->lock);
+ if (ret) {
+ return;
+ }
+ memcpy(&my_iobuf, iobuf, sizeof(my_iobuf));
+ UNLOCK(&iobuf->lock);
- gf_proc_dump_build_key(key, key_prefix,"ref");
- gf_proc_dump_write(key, "%d", my_iobuf.ref);
- gf_proc_dump_build_key(key, key_prefix,"ptr");
- gf_proc_dump_write(key, "%p", my_iobuf.ptr);
+ gf_proc_dump_build_key(key, key_prefix, "ref");
+ gf_proc_dump_write(key, "%" GF_PRI_ATOMIC, GF_ATOMIC_GET(my_iobuf.ref));
+ gf_proc_dump_build_key(key, key_prefix, "ptr");
+ gf_proc_dump_write(key, "%p", my_iobuf.ptr);
out:
- return;
+ return;
}
void
-iobuf_arena_info_dump (struct iobuf_arena *iobuf_arena, const char *key_prefix)
+iobuf_arena_info_dump(struct iobuf_arena *iobuf_arena, const char *key_prefix)
{
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 1;
- struct iobuf *trav;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_arena, out);
-
- gf_proc_dump_build_key(key, key_prefix,"mem_base");
- gf_proc_dump_write(key, "%p", iobuf_arena->mem_base);
- gf_proc_dump_build_key(key, key_prefix, "active_cnt");
- gf_proc_dump_write(key, "%d", iobuf_arena->active_cnt);
- gf_proc_dump_build_key(key, key_prefix, "passive_cnt");
- gf_proc_dump_write(key, "%d", iobuf_arena->passive_cnt);
- gf_proc_dump_build_key(key, key_prefix, "alloc_cnt");
- gf_proc_dump_write(key, "%"PRIu64, iobuf_arena->alloc_cnt);
- gf_proc_dump_build_key(key, key_prefix, "max_active");
- gf_proc_dump_write(key, "%"PRIu64, iobuf_arena->max_active);
- gf_proc_dump_build_key(key, key_prefix, "page_size");
- gf_proc_dump_write(key, "%"PRIu64, iobuf_arena->page_size);
- list_for_each_entry (trav, &iobuf_arena->active.list, list) {
- gf_proc_dump_build_key(key, key_prefix,"active_iobuf.%d", i++);
- gf_proc_dump_add_section(key);
- iobuf_info_dump(trav, key);
- }
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 1;
+ struct iobuf *trav;
+
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_arena, out);
+
+ gf_proc_dump_build_key(key, key_prefix, "mem_base");
+ gf_proc_dump_write(key, "%p", iobuf_arena->mem_base);
+ gf_proc_dump_build_key(key, key_prefix, "active_cnt");
+ gf_proc_dump_write(key, "%d", iobuf_arena->active_cnt);
+ gf_proc_dump_build_key(key, key_prefix, "passive_cnt");
+ gf_proc_dump_write(key, "%d", iobuf_arena->passive_cnt);
+ gf_proc_dump_build_key(key, key_prefix, "alloc_cnt");
+ gf_proc_dump_write(key, "%" PRIu64, iobuf_arena->alloc_cnt);
+ gf_proc_dump_build_key(key, key_prefix, "max_active");
+ gf_proc_dump_write(key, "%d", iobuf_arena->max_active);
+ gf_proc_dump_build_key(key, key_prefix, "page_size");
+ gf_proc_dump_write(key, "%" GF_PRI_SIZET, iobuf_arena->page_size);
+ list_for_each_entry(trav, &iobuf_arena->active.list, list)
+ {
+ gf_proc_dump_build_key(key, key_prefix, "active_iobuf.%d", i++);
+ gf_proc_dump_add_section("%s", key);
+ iobuf_info_dump(trav, key);
+ }
out:
- return;
+ return;
}
void
-iobuf_stats_dump (struct iobuf_pool *iobuf_pool)
+iobuf_stats_dump(struct iobuf_pool *iobuf_pool)
{
- char msg[1024];
- struct iobuf_arena *trav = NULL;
- int i = 1;
- int j = 0;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("iobuf", iobuf_pool, out);
+ char msg[1024];
+ struct iobuf_arena *trav = NULL;
+ int i = 1;
+ int j = 0;
+ int ret = -1;
- memset(msg, 0, sizeof(msg));
+ GF_VALIDATE_OR_GOTO("iobuf", iobuf_pool, out);
- ret = pthread_mutex_trylock(&iobuf_pool->mutex);
+ ret = pthread_mutex_trylock(&iobuf_pool->mutex);
- if (ret) {
- return;
+ if (ret) {
+ return;
+ }
+ gf_proc_dump_add_section("iobuf.global");
+ gf_proc_dump_write("iobuf_pool", "%p", iobuf_pool);
+ gf_proc_dump_write("iobuf_pool.default_page_size", "%" GF_PRI_SIZET,
+ iobuf_pool->default_page_size);
+ gf_proc_dump_write("iobuf_pool.arena_size", "%" GF_PRI_SIZET,
+ iobuf_pool->arena_size);
+ gf_proc_dump_write("iobuf_pool.arena_cnt", "%d", iobuf_pool->arena_cnt);
+ gf_proc_dump_write("iobuf_pool.request_misses", "%" PRId64,
+ iobuf_pool->request_misses);
+
+ for (j = 0; j < IOBUF_ARENA_MAX_INDEX; j++) {
+ list_for_each_entry(trav, &iobuf_pool->arenas[j], list)
+ {
+ snprintf(msg, sizeof(msg), "arena.%d", i);
+ gf_proc_dump_add_section("%s", msg);
+ iobuf_arena_info_dump(trav, msg);
+ i++;
}
- gf_proc_dump_add_section("iobuf.global");
- gf_proc_dump_write("iobuf_pool","%p", iobuf_pool);
- gf_proc_dump_write("iobuf_pool.default_page_size", "%d",
- iobuf_pool->default_page_size);
- gf_proc_dump_write("iobuf_pool.arena_size", "%d",
- iobuf_pool->arena_size);
- gf_proc_dump_write("iobuf_pool.arena_cnt", "%d",
- iobuf_pool->arena_cnt);
- gf_proc_dump_write("iobuf_pool.request_misses", "%"PRId64,
- iobuf_pool->request_misses);
-
- for (j = 0; j < IOBUF_ARENA_MAX_INDEX; j++) {
- list_for_each_entry (trav, &iobuf_pool->arenas[j], list) {
- snprintf(msg, sizeof(msg),
- "arena.%d", i);
- gf_proc_dump_add_section(msg);
- iobuf_arena_info_dump(trav,msg);
- i++;
- }
- list_for_each_entry (trav, &iobuf_pool->purge[j], list) {
- snprintf(msg, sizeof(msg),
- "purge.%d", i);
- gf_proc_dump_add_section(msg);
- iobuf_arena_info_dump(trav,msg);
- i++;
- }
- list_for_each_entry (trav, &iobuf_pool->filled[j], list) {
- snprintf(msg, sizeof(msg),
- "filled.%d", i);
- gf_proc_dump_add_section(msg);
- iobuf_arena_info_dump(trav,msg);
- i++;
- }
-
+ list_for_each_entry(trav, &iobuf_pool->purge[j], list)
+ {
+ snprintf(msg, sizeof(msg), "purge.%d", i);
+ gf_proc_dump_add_section("%s", msg);
+ iobuf_arena_info_dump(trav, msg);
+ i++;
}
+ list_for_each_entry(trav, &iobuf_pool->filled[j], list)
+ {
+ snprintf(msg, sizeof(msg), "filled.%d", i);
+ gf_proc_dump_add_section("%s", msg);
+ iobuf_arena_info_dump(trav, msg);
+ i++;
+ }
+ }
- pthread_mutex_unlock(&iobuf_pool->mutex);
+ pthread_mutex_unlock(&iobuf_pool->mutex);
out:
- return;
+ return;
}
-
void
iobuf_to_iovec(struct iobuf *iob, struct iovec *iov)
{
- GF_VALIDATE_OR_GOTO ("iobuf", iob, out);
- GF_VALIDATE_OR_GOTO ("iobuf", iov, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iob, out);
+ GF_VALIDATE_OR_GOTO("iobuf", iov, out);
- iov->iov_base = iobuf_ptr (iob);
- iov->iov_len = iobuf_pagesize (iob);
+ iov->iov_base = iobuf_ptr(iob);
+ iov->iov_len = iobuf_pagesize(iob);
out:
- return;
+ return;
+}
+
+int
+iobuf_copy(struct iobuf_pool *iobuf_pool, const struct iovec *iovec_src,
+ int iovcnt, struct iobref **iobref, struct iobuf **iobuf,
+ struct iovec *iov_dst)
+{
+ size_t size = -1;
+ int ret = 0;
+
+ size = iov_length(iovec_src, iovcnt);
+
+ *iobuf = iobuf_get2(iobuf_pool, size);
+ if (!(*iobuf)) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ *iobref = iobref_new();
+ if (!(*iobref)) {
+ iobuf_unref(*iobuf);
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = iobref_add(*iobref, *iobuf);
+ if (ret) {
+ iobuf_unref(*iobuf);
+ iobref_unref(*iobref);
+ errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ iov_unload(iobuf_ptr(*iobuf), iovec_src, iovcnt);
+
+ iov_dst->iov_base = iobuf_ptr(*iobuf);
+ iov_dst->iov_len = size;
+
+out:
+ return ret;
}
diff --git a/libglusterfs/src/iobuf.h b/libglusterfs/src/iobuf.h
deleted file mode 100644
index b9c2a380711..00000000000
--- a/libglusterfs/src/iobuf.h
+++ /dev/null
@@ -1,161 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _IOBUF_H_
-#define _IOBUF_H_
-
-#include "list.h"
-#include "common-utils.h"
-#include <pthread.h>
-#include <sys/mman.h>
-#include <sys/uio.h>
-
-#define GF_VARIABLE_IOBUF_COUNT 32
-#define GF_IOBREF_IOBUF_COUNT 16
-
-/* Lets try to define the new anonymous mapping
- * flag, in case the system is still using the
- * now deprecated MAP_ANON flag.
- *
- * Also, this should ideally be in a centralized/common
- * header which can be used by other source files also.
- */
-#ifndef MAP_ANONYMOUS
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-
-#define GF_ALIGN_BUF(ptr,bound) ((void *)((unsigned long)(ptr + bound - 1) & \
- (unsigned long)(~(bound - 1))))
-
-#define GF_IOBUF_ALIGN_SIZE 512
-
-/* one allocatable unit for the consumers of the IOBUF API */
-/* each unit hosts @page_size bytes of memory */
-struct iobuf;
-
-/* one region of memory MMAPed from the operating system */
-/* each region MMAPs @arena_size bytes of memory */
-/* each arena hosts @arena_size / @page_size IOBUFs */
-struct iobuf_arena;
-
-/* expandable and contractable pool of memory, internally broken into arenas */
-struct iobuf_pool;
-
-struct iobuf_init_config {
- size_t pagesize;
- int32_t num_pages;
-};
-
-struct iobuf {
- union {
- struct list_head list;
- struct {
- struct iobuf *next;
- struct iobuf *prev;
- };
- };
- struct iobuf_arena *iobuf_arena;
-
- gf_lock_t lock; /* for ->ptr and ->ref */
- int ref; /* 0 == passive, >0 == active */
-
- void *ptr; /* usable memory region by the consumer */
-
- void *free_ptr; /* in case of stdalloc, this is the
- one to be freed */
-};
-
-
-struct iobuf_arena {
- union {
- struct list_head list;
- struct {
- struct iobuf_arena *next;
- struct iobuf_arena *prev;
- };
- };
-
- size_t page_size; /* size of all iobufs in this arena */
- size_t arena_size; /* this is equal to
- (iobuf_pool->arena_size / page_size)
- * page_size */
- size_t page_count;
-
- struct iobuf_pool *iobuf_pool;
-
- void *mem_base;
- struct iobuf *iobufs; /* allocated iobufs list */
-
- int active_cnt;
- struct iobuf active; /* head node iobuf
- (unused by itself) */
- int passive_cnt;
- struct iobuf passive; /* head node iobuf
- (unused by itself) */
- uint64_t alloc_cnt; /* total allocs in this pool */
- int max_active; /* max active buffers at a given time */
-};
-
-
-struct iobuf_pool {
- pthread_mutex_t mutex;
- size_t arena_size; /* size of memory region in
- arena */
- size_t default_page_size; /* default size of iobuf */
-
- int arena_cnt;
- struct list_head arenas[GF_VARIABLE_IOBUF_COUNT];
- /* array of arenas. Each element of the array is a list of arenas
- holding iobufs of particular page_size */
-
- struct list_head filled[GF_VARIABLE_IOBUF_COUNT];
- /* array of arenas without free iobufs */
-
- struct list_head purge[GF_VARIABLE_IOBUF_COUNT];
- /* array of of arenas which can be purged */
-
- uint64_t request_misses; /* mostly the requests for higher
- value of iobufs */
-};
-
-
-struct iobuf_pool *iobuf_pool_new (void);
-void iobuf_pool_destroy (struct iobuf_pool *iobuf_pool);
-struct iobuf *iobuf_get (struct iobuf_pool *iobuf_pool);
-void iobuf_unref (struct iobuf *iobuf);
-struct iobuf *iobuf_ref (struct iobuf *iobuf);
-void iobuf_pool_destroy (struct iobuf_pool *iobuf_pool);
-void iobuf_to_iovec(struct iobuf *iob, struct iovec *iov);
-
-#define iobuf_ptr(iob) ((iob)->ptr)
-#define iobpool_default_pagesize(iobpool) ((iobpool)->default_page_size)
-#define iobuf_pagesize(iob) (iob->iobuf_arena->page_size)
-
-
-struct iobref {
- gf_lock_t lock;
- int ref;
- struct iobuf *iobrefs[GF_IOBREF_IOBUF_COUNT];
-};
-
-struct iobref *iobref_new ();
-struct iobref *iobref_ref (struct iobref *iobref);
-void iobref_unref (struct iobref *iobref);
-int iobref_add (struct iobref *iobref, struct iobuf *iobuf);
-int iobref_merge (struct iobref *to, struct iobref *from);
-
-
-size_t iobuf_size (struct iobuf *iobuf);
-size_t iobref_size (struct iobref *iobref);
-void iobuf_stats_dump (struct iobuf_pool *iobuf_pool);
-
-struct iobuf *
-iobuf_get2 (struct iobuf_pool *iobuf_pool, size_t page_size);
-#endif /* !_IOBUF_H_ */
diff --git a/libglusterfs/src/latency.c b/libglusterfs/src/latency.c
index 58e8b915852..ce4b0e8255d 100644
--- a/libglusterfs/src/latency.c
+++ b/libglusterfs/src/latency.c
@@ -8,167 +8,77 @@
cases as published by the Free Software Foundation.
*/
-
/*
* This file contains functions to support dumping of
* latencies of FOPs broken down by subvolumes.
*/
-#include "glusterfs.h"
-#include "stack.h"
-#include "xlator.h"
-#include "common-utils.h"
-#include "statedump.h"
-
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/statedump.h"
-void
-gf_set_fop_from_fn_pointer (call_frame_t *frame, struct xlator_fops *fops, void *fn)
+gf_latency_t *
+gf_latency_new(size_t n)
{
- glusterfs_fop_t fop = -1;
+ int i = 0;
+ gf_latency_t *lat = NULL;
- if (fops->stat == fn)
- fop = GF_FOP_STAT;
- else if (fops->readlink == fn)
- fop = GF_FOP_READLINK;
- else if (fops->mknod == fn)
- fop = GF_FOP_MKNOD;
- else if (fops->mkdir == fn)
- fop = GF_FOP_MKDIR;
- else if (fops->unlink == fn)
- fop = GF_FOP_UNLINK;
- else if (fops->rmdir == fn)
- fop = GF_FOP_RMDIR;
- else if (fops->symlink == fn)
- fop = GF_FOP_SYMLINK;
- else if (fops->rename == fn)
- fop = GF_FOP_RENAME;
- else if (fops->link == fn)
- fop = GF_FOP_LINK;
- else if (fops->truncate == fn)
- fop = GF_FOP_TRUNCATE;
- else if (fops->open == fn)
- fop = GF_FOP_OPEN;
- else if (fops->readv == fn)
- fop = GF_FOP_READ;
- else if (fops->writev == fn)
- fop = GF_FOP_WRITE;
- else if (fops->statfs == fn)
- fop = GF_FOP_STATFS;
- else if (fops->flush == fn)
- fop = GF_FOP_FLUSH;
- else if (fops->fsync == fn)
- fop = GF_FOP_FSYNC;
- else if (fops->setxattr == fn)
- fop = GF_FOP_SETXATTR;
- else if (fops->getxattr == fn)
- fop = GF_FOP_GETXATTR;
- else if (fops->removexattr == fn)
- fop = GF_FOP_REMOVEXATTR;
- else if (fops->opendir == fn)
- fop = GF_FOP_OPENDIR;
- else if (fops->fsyncdir == fn)
- fop = GF_FOP_FSYNCDIR;
- else if (fops->access == fn)
- fop = GF_FOP_ACCESS;
- else if (fops->create == fn)
- fop = GF_FOP_CREATE;
- else if (fops->ftruncate == fn)
- fop = GF_FOP_FTRUNCATE;
- else if (fops->fstat == fn)
- fop = GF_FOP_FSTAT;
- else if (fops->lk == fn)
- fop = GF_FOP_LK;
- else if (fops->lookup == fn)
- fop = GF_FOP_LOOKUP;
- else if (fops->readdir == fn)
- fop = GF_FOP_READDIR;
- else if (fops->inodelk == fn)
- fop = GF_FOP_INODELK;
- else if (fops->finodelk == fn)
- fop = GF_FOP_FINODELK;
- else if (fops->entrylk == fn)
- fop = GF_FOP_ENTRYLK;
- else if (fops->fentrylk == fn)
- fop = GF_FOP_FENTRYLK;
- else if (fops->xattrop == fn)
- fop = GF_FOP_XATTROP;
- else if (fops->fxattrop == fn)
- fop = GF_FOP_FXATTROP;
- else if (fops->fgetxattr == fn)
- fop = GF_FOP_FGETXATTR;
- else if (fops->fsetxattr == fn)
- fop = GF_FOP_FSETXATTR;
- else if (fops->rchecksum == fn)
- fop = GF_FOP_RCHECKSUM;
- else if (fops->setattr == fn)
- fop = GF_FOP_SETATTR;
- else if (fops->fsetattr == fn)
- fop = GF_FOP_FSETATTR;
- else if (fops->readdirp == fn)
- fop = GF_FOP_READDIRP;
- else if (fops->getspec == fn)
- fop = GF_FOP_GETSPEC;
- else
- fop = -1;
+ lat = GF_MALLOC(n * sizeof(*lat), gf_common_mt_latency_t);
+ if (!lat)
+ return NULL;
- frame->op = fop;
+ for (i = 0; i < n; i++) {
+ gf_latency_reset(lat + i);
+ }
+ return lat;
}
-
void
-gf_update_latency (call_frame_t *frame)
+gf_latency_update(gf_latency_t *lat, struct timespec *begin,
+ struct timespec *end)
{
- double elapsed;
- struct timeval *begin, *end;
-
- fop_latency_t *lat;
+ if (!(begin->tv_sec && end->tv_sec)) {
+ /*Measure latency might have been enabled/disabled during the op*/
+ return;
+ }
- begin = &frame->begin;
- end = &frame->end;
+ double elapsed = gf_tsdiff(begin, end);
- elapsed = (end->tv_sec - begin->tv_sec) * 1e6
- + (end->tv_usec - begin->tv_usec);
+ if (lat->max < elapsed)
+ lat->max = elapsed;
- lat = &frame->this->latencies[frame->op];
+ if (lat->min > elapsed)
+ lat->min = elapsed;
- lat->total += elapsed;
- lat->count++;
- lat->mean = lat->mean + (elapsed - lat->mean) / lat->count;
+ lat->total += elapsed;
+ lat->count++;
}
-
void
-gf_proc_dump_latency_info (xlator_t *xl)
+gf_latency_reset(gf_latency_t *lat)
{
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i;
-
- snprintf (key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name);
- gf_proc_dump_add_section (key_prefix);
-
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- gf_proc_dump_build_key (key, key_prefix, gf_fop_list[i]);
-
- gf_proc_dump_write (key, "%.03f,%"PRId64",%.03f",
- xl->latencies[i].mean,
- xl->latencies[i].count,
- xl->latencies[i].total);
- }
+ if (!lat)
+ return;
+ memset(lat, 0, sizeof(*lat));
+ lat->min = ULLONG_MAX;
+ /* make sure 'min' is set to high value, so it would be
+ properly set later */
}
-
void
-gf_latency_toggle (int signum)
+gf_frame_latency_update(call_frame_t *frame)
{
- glusterfs_ctx_t *ctx = NULL;
-
- ctx = glusterfs_ctx_get ();
-
- if (ctx) {
- ctx->measure_latency = !ctx->measure_latency;
- gf_log ("[core]", GF_LOG_INFO,
- "Latency measurement turned %s",
- ctx->measure_latency ? "on" : "off");
- }
+ gf_latency_t *lat;
+ /* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros
+ set it right anyways for those frames */
+ if (!frame->op)
+ frame->op = frame->root->op;
+
+ if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) {
+ gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d",
+ frame->op);
+ return;
+ }
+
+ lat = &frame->this->stats.interval.latencies[frame->op];
+ gf_latency_update(lat, &frame->begin, &frame->end);
}
diff --git a/libglusterfs/src/latency.h b/libglusterfs/src/latency.h
deleted file mode 100644
index 16c5994b008..00000000000
--- a/libglusterfs/src/latency.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __LATENCY_H__
-#define __LATENCY_H__
-
-
-typedef struct fop_latency {
- uint64_t min; /* min time for the call (microseconds) */
- uint64_t max; /* max time for the call (microseconds) */
- double total; /* total time (microseconds) */
- double std; /* standard deviation */
- double mean; /* mean (microseconds) */
- uint64_t count;
-} fop_latency_t;
-
-void
-gf_latency_toggle (int signum);
-
-#endif /* __LATENCY_H__ */
diff --git a/libglusterfs/src/libglusterfs.sym b/libglusterfs/src/libglusterfs.sym
new file mode 100644
index 00000000000..5f18cd56cbe
--- /dev/null
+++ b/libglusterfs/src/libglusterfs.sym
@@ -0,0 +1,1193 @@
+are_dicts_equal
+args_access_cbk_store
+args_access_store
+args_create_cbk_store
+args_create_store
+args_discard_cbk_store
+args_discard_store
+args_entrylk_cbk_store
+args_entrylk_store
+args_fallocate_cbk_store
+args_fallocate_store
+args_fentrylk_cbk_store
+args_fentrylk_store
+args_fgetxattr_cbk_store
+args_fgetxattr_store
+args_finodelk_cbk_store
+args_finodelk_store
+args_flush_cbk_store
+args_flush_store
+args_fremovexattr_cbk_store
+args_fremovexattr_store
+args_fsetattr_cbk_store
+args_fsetattr_store
+args_fsetxattr_cbk_store
+args_fsetxattr_store
+args_fstat_cbk_store
+args_fstat_store
+args_fsync_cbk_store
+args_fsyncdir_cbk_store
+args_fsyncdir_store
+args_fsync_store
+args_ftruncate_cbk_store
+args_ftruncate_store
+args_fxattrop_cbk_store
+args_fxattrop_store
+args_getxattr_cbk_store
+args_getxattr_store
+args_inodelk_cbk_store
+args_inodelk_store
+args_ipc_cbk_store
+args_lease_cbk_store
+args_lease_store
+args_link_cbk_store
+args_link_store
+args_lk_cbk_store
+args_lk_store
+args_lookup_cbk_store
+args_lookup_store
+args_mkdir_cbk_store
+args_mkdir_store
+args_mknod_cbk_store
+args_mknod_store
+args_open_cbk_store
+args_opendir_cbk_store
+args_opendir_store
+args_open_store
+args_rchecksum_cbk_store
+args_rchecksum_store
+args_readdir_cbk_store
+args_readdirp_cbk_store
+args_readdirp_store
+args_readdir_store
+args_readlink_cbk_store
+args_readlink_store
+args_readv_cbk_store
+args_readv_store
+args_removexattr_cbk_store
+args_removexattr_store
+args_rename_cbk_store
+args_rename_store
+args_rmdir_cbk_store
+args_rmdir_store
+args_seek_cbk_store
+args_seek_store
+args_setattr_cbk_store
+args_setattr_store
+args_setxattr_cbk_store
+args_setxattr_store
+args_stat_cbk_store
+args_statfs_cbk_store
+args_statfs_store
+args_stat_store
+args_symlink_cbk_store
+args_symlink_store
+args_truncate_cbk_store
+args_truncate_store
+args_unlink_cbk_store
+args_unlink_store
+args_writev_cbk_store
+args_writev_store
+args_xattrop_cbk_store
+args_xattrop_store
+args_zerofill_cbk_store
+args_zerofill_store
+args_copy_file_range_cbk_store
+args_copy_file_range_store
+bin_to_data
+call_resume
+call_resume_keep_stub
+call_resume_wind
+call_stack_set_groups
+call_stub_destroy
+call_unwind_error
+call_unwind_error_keep_stub
+client_ctx_del
+client_ctx_get
+client_ctx_set
+client_dump
+close_fds_except
+cluster_fop_success_fill
+cluster_fstat
+cluster_ftruncate
+cluster_fxattrop
+cluster_getxattr
+cluster_inodelk
+cluster_link
+cluster_lookup
+cluster_mkdir
+cluster_mknod
+cluster_open
+cluster_readlink
+cluster_replies_wipe
+cluster_rmdir
+cluster_setattr
+cluster_setxattr
+cluster_symlink
+cluster_tiebreaker_inodelk
+cluster_uninodelk
+cluster_unlink
+cluster_xattrop
+cluster_xattrop_cbk
+copy_opts_to_child
+create_frame
+data_copy
+data_destroy
+data_from_dynptr
+data_from_uint64
+data_ref
+data_to_bin
+data_to_int32
+data_to_int64
+data_to_ptr
+data_to_str
+data_to_uint16
+data_to_uint32
+data_to_uint64
+data_to_uint8
+data_to_iatt
+data_unref
+default_access
+default_access_cbk
+default_access_failure_cbk
+default_access_resume
+default_create
+default_create_cbk
+default_create_failure_cbk
+default_create_resume
+default_discard
+default_discard_cbk
+default_discard_failure_cbk
+default_discard_resume
+default_entrylk
+default_entrylk_cbk
+default_entrylk_failure_cbk
+default_entrylk_resume
+default_fallocate
+default_fallocate_cbk
+default_fallocate_failure_cbk
+default_fallocate_resume
+default_fentrylk
+default_fentrylk_cbk
+default_fentrylk_failure_cbk
+default_fentrylk_resume
+default_fgetxattr
+default_fgetxattr_cbk
+default_fgetxattr_failure_cbk
+default_fgetxattr_resume
+default_finodelk
+default_finodelk_cbk
+default_finodelk_failure_cbk
+default_finodelk_resume
+default_flush
+default_flush_cbk
+default_flush_failure_cbk
+default_flush_resume
+default_forget
+default_fremovexattr
+default_fremovexattr_cbk
+default_fremovexattr_failure_cbk
+default_fremovexattr_resume
+default_fsetattr
+default_fsetattr_cbk
+default_fsetattr_failure_cbk
+default_fsetattr_resume
+default_fsetxattr
+default_fsetxattr_cbk
+default_fsetxattr_failure_cbk
+default_fsetxattr_resume
+default_fstat
+default_fstat_cbk
+default_fstat_failure_cbk
+default_fstat_resume
+default_fsync
+default_fsync_cbk
+default_fsyncdir
+default_fsyncdir_cbk
+default_fsyncdir_failure_cbk
+default_fsyncdir_resume
+default_fsync_failure_cbk
+default_fsync_resume
+default_ftruncate
+default_ftruncate_cbk
+default_ftruncate_failure_cbk
+default_ftruncate_resume
+default_fxattrop
+default_fxattrop_cbk
+default_fxattrop_failure_cbk
+default_fxattrop_resume
+default_getactivelk
+default_getactivelk_failure_cbk
+default_getactivelk_resume
+default_getspec
+default_getspec_cbk
+default_getxattr
+default_getxattr_cbk
+default_getxattr_failure_cbk
+default_getxattr_resume
+default_inodelk
+default_inodelk_cbk
+default_inodelk_failure_cbk
+default_inodelk_resume
+default_ipc
+default_ipc_cbk
+default_lease
+default_lease_failure_cbk
+default_lease_resume
+default_link
+default_link_cbk
+default_link_failure_cbk
+default_link_resume
+default_lk
+default_lk_cbk
+default_lk_failure_cbk
+default_lk_resume
+default_lookup
+default_lookup_cbk
+default_lookup_failure_cbk
+default_lookup_resume
+default_mem_acct_init
+default_mkdir
+default_mkdir_cbk
+default_mkdir_failure_cbk
+default_mkdir_resume
+default_mknod
+default_mknod_cbk
+default_mknod_failure_cbk
+default_mknod_resume
+default_notify
+default_open
+default_open_cbk
+default_opendir
+default_opendir_cbk
+default_opendir_failure_cbk
+default_opendir_resume
+default_open_failure_cbk
+default_open_resume
+default_rchecksum
+default_rchecksum_cbk
+default_rchecksum_failure_cbk
+default_rchecksum_resume
+default_readdir
+default_readdir_cbk
+default_readdir_failure_cbk
+default_readdirp
+default_readdirp_cbk
+default_readdirp_failure_cbk
+default_readdirp_resume
+default_readdir_resume
+default_readlink
+default_readlink_cbk
+default_readlink_failure_cbk
+default_readlink_resume
+default_readv
+default_readv_cbk
+default_readv_failure_cbk
+default_readv_resume
+default_release
+default_releasedir
+default_removexattr
+default_removexattr_cbk
+default_removexattr_failure_cbk
+default_removexattr_resume
+default_rename
+default_rename_cbk
+default_rename_failure_cbk
+default_rename_resume
+default_rmdir
+default_rmdir_cbk
+default_rmdir_failure_cbk
+default_rmdir_resume
+default_seek
+default_seek_cbk
+default_seek_failure_cbk
+default_seek_resume
+default_setactivelk
+default_setactivelk_failure_cbk
+default_setactivelk_resume
+default_setattr
+default_setattr_cbk
+default_setattr_failure_cbk
+default_setattr_resume
+default_setxattr
+default_setxattr_cbk
+default_setxattr_failure_cbk
+default_setxattr_resume
+default_stat
+default_stat_cbk
+default_stat_failure_cbk
+default_statfs
+default_statfs_cbk
+default_statfs_failure_cbk
+default_statfs_resume
+default_stat_resume
+default_symlink
+default_symlink_cbk
+default_symlink_failure_cbk
+default_symlink_resume
+default_truncate
+default_truncate_cbk
+default_truncate_failure_cbk
+default_truncate_resume
+default_unlink
+default_unlink_cbk
+default_unlink_failure_cbk
+default_unlink_resume
+default_writev
+default_writev_cbk
+default_writev_failure_cbk
+default_writev_resume
+default_xattrop
+default_xattrop_cbk
+default_xattrop_failure_cbk
+default_xattrop_resume
+default_zerofill
+default_zerofill_cbk
+default_zerofill_failure_cbk
+default_zerofill_resume
+default_put
+default_put_cbk
+default_put_failure_cbk
+default_put_resume
+default_copy_file_range
+default_copy_file_range_cbk
+default_copy_file_range_failure_cbk
+default_copy_file_range_resume
+dht_is_linkfile
+dict_add
+dict_addn
+dict_add_dynstr_with_alloc
+dict_allocate_and_serialize
+dict_copy
+dict_copy_with_ref
+dict_del
+dict_deln
+dict_dump_to_statedump
+dict_dump_to_str
+dict_dump_to_log
+dict_foreach
+dict_foreach_fnmatch
+dict_foreach_match
+dict_for_key_value
+dict_get
+dict_getn
+dict_get_bin
+dict_get_double
+dict_get_gfuuid
+dict_get_iatt
+dict_get_mdata
+dict_get_int16
+dict_get_int32
+dict_get_int32n
+dict_get_int64
+dict_get_int8
+dict_get_ptr
+dict_get_ptr_and_len
+dict_get_str
+dict_get_strn
+dict_get_str_boolean
+dict_get_uint32
+dict_get_uint64
+dict_get_with_ref
+dict_has_key_from_array
+dict_key_count
+dict_keys_join
+dict_lookup
+dict_new
+dict_null_foreach_fn
+dict_ref
+dict_remove_foreach_fn
+dict_rename_key
+dict_reset
+dict_serialize
+dict_serialized_length
+dict_serialized_length_lk
+dict_serialize_value_with_delim
+dict_set
+dict_setn
+dict_set_bin
+dict_set_double
+dict_set_dynptr
+dict_set_dynstr
+dict_set_dynstrn
+dict_set_dynstr_with_alloc
+dict_set_gfuuid
+dict_set_iatt
+dict_set_mdata
+dict_set_int16
+dict_set_int32
+dict_set_int32n
+dict_set_int64
+dict_set_int8
+dict_set_static_bin
+dict_set_static_ptr
+dict_set_str
+dict_set_strn
+dict_setn_nstrn
+dict_set_nstrn
+dict_set_uint32
+dict_set_uint64
+dict_set_flag
+dict_clear_flag
+dict_check_flag
+dict_unref
+dict_unserialize
+drop_token
+eh_destroy
+eh_dump
+eh_new
+eh_save_history
+entry_copy
+gf_event_dispatch
+gf_event_dispatch_destroy
+gf_event_handled
+gf_event_pool_destroy
+gf_event_pool_new
+gf_event_reconfigure_threads
+gf_event_register
+gf_event_select_on
+gf_event_unregister
+gf_event_unregister_close
+fd_anonymous
+fd_anonymous_with_flags
+fd_bind
+fd_close
+fd_create
+fd_create_uint64
+__fd_ctx_del
+fd_ctx_del
+fd_ctx_dump
+__fd_ctx_get
+fd_ctx_get
+__fd_ctx_set
+fd_ctx_set
+fd_is_anonymous
+fd_list_empty
+fd_lk_ctx_empty
+fd_lk_ctx_ref
+fd_lk_ctx_unref
+fd_lk_insert_and_merge
+fd_lookup
+fd_lookup_anonymous
+fd_lookup_uint64
+__fd_ref
+fd_ref
+fd_unref
+_fini
+fop_access_stub
+fop_create_stub
+fop_copy_file_range_stub
+fop_copy_file_range_cbk_stub
+fop_discard_stub
+fop_entrylk_stub
+fop_enum_to_pri_string
+fop_fallocate_stub
+fop_fentrylk_stub
+fop_fgetxattr_stub
+fop_finodelk_stub
+fop_flush_stub
+fop_fremovexattr_cbk_stub
+fop_fremovexattr_stub
+fop_fsetattr_stub
+fop_fsetxattr_cbk_stub
+fop_fsetxattr_stub
+fop_fstat_stub
+fop_fsync_cbk_stub
+fop_fsyncdir_stub
+fop_fsync_stub
+fop_ftruncate_cbk_stub
+fop_ftruncate_stub
+fop_fxattrop_stub
+fop_getactivelk_stub
+fop_getxattr_stub
+fop_icreate_stub
+fop_inodelk_stub
+fop_ipc_stub
+fop_lease_stub
+fop_link_stub
+fop_lk_stub
+fop_log_level
+fop_lookup_cbk_stub
+fop_lookup_stub
+fop_mkdir_stub
+fop_mknod_stub
+fop_namelink_stub
+fop_opendir_stub
+fop_open_stub
+fop_put_stub
+fop_rchecksum_stub
+fop_readdirp_stub
+fop_readdir_stub
+fop_readlink_stub
+fop_readv_stub
+fop_removexattr_cbk_stub
+fop_removexattr_stub
+fop_rename_cbk_stub
+fop_rename_stub
+fop_rmdir_cbk_stub
+fop_rmdir_stub
+fop_seek_stub
+fop_setactivelk_stub
+fop_setattr_stub
+fop_setxattr_stub
+fop_statfs_stub
+fop_stat_stub
+fop_symlink_stub
+fop_truncate_cbk_stub
+fop_truncate_stub
+fop_unlink_cbk_stub
+fop_unlink_stub
+fop_writev_cbk_stub
+fop_writev_stub
+fop_xattrop_stub
+fop_zerofill_stub
+generate_glusterfs_ctx_id
+get_checksum_for_file
+get_checksum_for_path
+get_file_mtime
+get_host_name
+get_mem_size
+get_path_name
+get_xlator_by_name
+get_xlator_by_type
+gf_array_insertionsort
+gf_asprintf
+gf_async
+gf_async_adjust_threads
+gf_async_ctrl
+gf_async_init
+gf_async_fini
+gf_backtrace_save
+gf_bits_count
+gf_bits_index
+gf_build_absolute_path
+__gf_calloc
+gf_canonicalize_path
+gf_check_log_format
+gf_check_logger
+gf_client_disconnect
+gf_client_dump_fdtables
+gf_client_dump_fdtables_to_dict
+gf_client_dump_inodes
+gf_client_dump_inodes_to_dict
+gf_client_get
+gf_client_put
+gf_client_ref
+gf_clienttable_alloc
+gf_client_unref
+gf_cmd_log
+gf_cmd_log_init
+gf_compare_sockaddr
+gf_deitransform
+gf_dirent_entry_free
+gf_dirent_for_name
+gf_dirent_free
+gf_dirent_orig_offset
+gf_dm_hashfn
+gf_dnscache_init
+gf_dnscache_deinit
+gf_errno_to_error
+gf_error_to_errno
+_gf_event
+gf_fd_fdptr_get
+gf_fd_fdtable_alloc
+gf_fd_fdtable_copy_all_fds
+gf_fd_fdtable_destroy
+gf_fd_fdtable_get_all_fds
+gf_fdptr_put
+gf_fd_put
+gf_fd_unused_get
+gf_fill_iatt_for_dirent
+gf_fop_int
+gf_fop_string
+__gf_free
+gf_free_mig_locks
+gf_getgrouplist
+gf_get_hostname_from_ip
+gf_get_index_by_elem
+gf_global_mem_acct_enable_set
+gfid_to_ino
+gf_inode_type_to_str
+gf_is_ip_in_net
+gf_is_local_addr
+gf_is_same_address
+gf_is_service_running
+gf_is_str_int
+gf_is_valid_xattr_namespace
+gf_is_zero_filled_stat
+gf_itransform
+gf_link_inodes_from_dirent
+_gf_log
+_gf_log_callingfn
+gf_log_disable_suppression_before_exit
+gf_log_dump_graph
+_gf_log_eh
+gf_log_fini
+gf_log_get_localtime
+gf_log_get_loglevel
+gf_log_globals_init
+gf_log_init
+gf_log_inject_timer_event
+gf_log_logrotate
+gf_log_set_localtime
+gf_log_set_log_buf_size
+gf_log_set_log_flush_timeout
+gf_log_set_logformat
+gf_log_set_logger
+gf_log_set_loglevel
+gf_lstat_dir
+__gf_malloc
+gf_mem_acct_enable_set
+gf_monitor_metrics
+_gf_msg
+_gf_msg_nomem
+gf_nwrite
+gf_path_strip_trailing_slashes
+gf_print_trace
+gf_proc_dump_add_section
+gf_proc_dump_info
+gf_proc_dump_init
+gf_proc_dump_mallinfo
+gf_proc_dump_mem_info
+gf_proc_dump_mem_info_to_dict
+gf_proc_dump_mempool_info
+gf_proc_dump_mempool_info_to_dict
+gf_proc_dump_pending_frames
+gf_proc_dump_pending_frames_to_dict
+gf_proc_dump_write
+gf_proc_dump_xlator_history
+gf_proc_dump_xlator_meminfo
+gf_proc_dump_xlator_private
+gf_proc_dump_xlator_profile
+gf_process_getspec_servers_list
+gf_process_reserved_ports
+__gf_realloc
+_gf_ref_get
+_gf_ref_init
+_gf_ref_put
+gf_resolve_ip6
+gf_resolve_path_parent
+gf_rev_dns_lookup
+gf_rev_dns_lookup_cached
+gf_rsync_strong_checksum
+gf_rsync_md5_checksum
+gf_rsync_weak_checksum
+gf_set_log_file_path
+gf_set_log_ident
+gf_set_timestamp
+gf_set_volfile_server_common
+_gf_smsg
+gf_sock_union_equal_addr
+gf_store_handle_create_on_absence
+gf_store_handle_destroy
+gf_store_handle_new
+gf_store_handle_retrieve
+gf_store_iter_destroy
+gf_store_iter_get_matching
+gf_store_iter_get_next
+gf_store_iter_new
+gf_store_lock
+gf_store_locked_local
+gf_store_mkdir
+gf_store_mkstemp
+gf_store_read_and_tokenize
+gf_store_rename_tmppath
+gf_store_retrieve_value
+gf_store_save_value
+gf_store_save_items
+gf_store_unlink_tmppath
+gf_store_unlock
+gf_string2boolean
+gf_string2bytesize_int64
+gf_string2bytesize_uint64
+gf_string2double
+gf_string2int
+gf_string2int32
+gf_string2percent
+gf_string2time
+gf_string2uint
+gf_string2uint32
+gf_string2uint64
+gf_string2uint_base10
+gf_strip_whitespace
+gf_strncpy
+gf_strTrim
+gf_strstr
+gf_thread_cleanup_xint
+gf_thread_create
+gf_thread_vcreate
+gf_thread_create_detached
+gf_thread_set_name
+gf_thread_set_vname
+gf_timer_call_after
+gf_timer_call_cancel
+gf_timer_registry_destroy
+_gf_timestuff
+gf_trim
+gf_tw_add_timer
+gf_tw_del_timer
+gf_tw_mod_timer
+gf_tw_mod_timer_pending
+gf_uint64_2human_readable
+gf_umount_lazy
+gf_update_latency
+gf_uuid_clear
+gf_uuid_compare
+gf_uuid_copy
+gf_uuid_is_null
+gf_uuid_generate
+gf_uuid_parse
+gf_uuid_unparse
+gf_valid_pid
+gf_vasprintf
+gf_volfile_reconfigure
+gf_xxh64_wrapper
+gf_zero_fill_stat
+gid_cache_add
+gid_cache_init
+gid_cache_lookup
+gid_cache_reconf
+gid_cache_release
+glusterd_check_log_level
+glusterfs_compute_sha256
+glusterfs_ctx_new
+glusterfs_ctx_tw_get
+glusterfs_ctx_tw_put
+glusterfs_delete_volfile_checksum
+glusterfs_globals_init
+glusterfs_graph_activate
+glusterfs_graph_attach
+glusterfs_graph_construct
+glusterfs_graph_deactivate
+glusterfs_graph_destroy
+glusterfs_graph_destroy_residual
+glusterfs_graph_prepare
+glusterfs_read_secure_access_file
+glusterfs_graph_print_file
+glusterfs_graph_set_first
+glusterfs_is_local_pathinfo
+glusterfs_leaf_position
+glusterfs_reachable_leaves
+__glusterfs_this_location
+glusterfs_this_set
+glusterfs_volfile_reconfigure
+glusterfs_xlator_link
+graph_reconf_validateopt
+_init
+inode_ctx_del2
+__inode_ctx_get0
+inode_ctx_get0
+__inode_ctx_get1
+inode_ctx_get1
+__inode_ctx_get2
+inode_ctx_get2
+inode_ctx_merge
+inode_ctx_reset0
+inode_ctx_reset1
+inode_ctx_reset2
+__inode_ctx_set0
+inode_ctx_set0
+__inode_ctx_set1
+inode_ctx_set1
+__inode_ctx_set2
+inode_ctx_set2
+inode_ctx_size
+inode_dump
+inode_dump_to_dict
+__inode_find
+inode_find
+inode_find_directory_name
+inode_forget
+inode_forget_with_unref
+inode_from_path
+inode_grep
+inode_grep_for_gfid
+inode_has_dentry
+inode_invalidate
+inode_is_linked
+inode_link
+inode_lookup
+inode_needs_lookup
+inode_new
+inode_parent
+__inode_path
+inode_path
+inode_ref
+inode_ref_reduce_by_n
+inode_rename
+inode_resolve
+inode_set_need_lookup
+inode_table_ctx_free
+inode_table_destroy
+inode_table_destroy_all
+inode_table_dump
+inode_table_dump_to_dict
+inode_table_new
+inode_table_with_invalidator
+__inode_table_set_lru_limit
+inode_table_set_lru_limit
+inode_unlink
+inode_unref
+int_to_data
+iobref_add
+iobref_clear
+iobref_merge
+iobref_new
+iobref_ref
+iobref_size
+iobref_unref
+iobuf_get
+iobuf_get2
+iobuf_get_page_aligned
+iobuf_pool_destroy
+iobuf_pool_new
+iobuf_size
+iobuf_to_iovec
+iobuf_unref
+iobuf_copy
+is_data_equal
+__is_fuse_call
+is_gf_log_command
+is_graph_topology_equal
+__is_root_gfid
+is_valid_lease_id
+leaseid_utoa
+gf_existing_leaseid
+gf_leaseid_get
+list_node_add
+list_node_add_order
+list_node_del
+lkowner_utoa
+loc_copy
+loc_copy_overload_parent
+loc_gfid
+loc_gfid_utoa
+loc_is_nameless
+loc_is_root
+loc_pargfid
+loc_path
+loc_touchup
+loc_wipe
+log_base2
+_mask_cancellation
+mask_match
+mem_get
+mem_get0
+mem_pool_destroy
+mem_pool_new_fn
+mem_pools_fini
+mem_pools_init
+mem_put
+mkdir_p
+next_token
+nwstrtail
+os_daemon
+os_daemon_return
+parser_deinit
+parser_get_next_match
+parser_init
+parser_set_string
+parser_unset_string
+quota_conf_read_gfid
+quota_conf_read_version
+quota_conf_skip_header
+quota_data_to_meta
+quota_dict_get_inode_meta
+quota_dict_get_meta
+quota_dict_set_meta
+quota_meta_is_null
+rb_create
+rb_delete
+rb_destroy
+rb_find
+rb_probe
+rbthash_get
+rbthash_insert
+rbthash_remove
+rbthash_table_destroy
+rbthash_table_init
+rbuf_dtor
+rbuf_get_buffer
+rbuf_init
+rbuf_reserve_write_area
+rbuf_wait_for_completion
+rbuf_write_complete
+recursive_rmdir
+runcmd
+runinit
+runner
+runner_add_arg
+runner_add_args
+runner_argprintf
+runner_chio
+runner_end
+runner_log
+runner_redir
+runner_run
+runner_run_nowait
+runner_run_reuse
+runner_start
+set_sys_log_level
+skipwhite
+strfd_close
+strfd_open
+strprintf
+strtail
+str_to_data
+SuperFastHash
+syncbarrier_destroy
+syncbarrier_init
+syncbarrier_wait
+syncbarrier_wake
+synccond_init
+synccond_destroy
+synccond_wait
+synccond_timedwait
+synccond_signal
+synccond_broadcast
+syncenv_destroy
+syncenv_new
+synclock_destroy
+synclock_init
+synclock_lock
+synclock_trylock
+synclock_unlock
+syncop_access
+syncop_close
+syncop_create
+syncop_copy_file_range
+syncopctx_getctx
+syncopctx_setfsgid
+syncopctx_setfsgroups
+syncopctx_setfslkowner
+syncopctx_setfspid
+syncopctx_setfsuid
+syncop_dirfd
+syncop_dir_scan
+syncop_discard
+syncop_fallocate
+syncop_flush
+syncop_fgetxattr
+syncop_fremovexattr
+syncop_fsetattr
+syncop_fsetxattr
+syncop_fstat
+syncop_fsync
+syncop_fsyncdir
+syncop_ftruncate
+syncop_ftw
+syncop_ftw_throttle
+syncop_fxattrop
+syncop_getactivelk
+syncop_getxattr
+syncop_gfid_to_path
+syncop_gfid_to_path_hard
+syncop_inode_find
+syncop_inodelk
+syncop_entrylk
+syncop_ipc
+syncop_is_subvol_local
+syncop_link
+syncop_listxattr
+syncop_lk
+syncop_lookup
+syncop_mkdir
+syncop_mknod
+syncop_mt_dir_scan
+syncop_open
+syncop_opendir
+syncop_readdir
+syncop_readdirp
+syncop_readlink
+syncop_readv
+syncop_removexattr
+syncop_rename
+syncop_rmdir
+syncop_seek
+syncop_setactivelk
+syncop_setattr
+syncop_setxattr
+syncop_stat
+syncop_statfs
+syncop_symlink
+syncop_truncate
+syncop_unlink
+syncop_write
+syncop_writev
+syncop_xattrop
+syncop_zerofill
+syncop_lease
+synctask_get
+synctask_new
+synctask_new1
+synctask_set
+synctask_setid
+synctask_sleep
+synctask_wake
+synctask_yield
+sys_access
+sys_chmod
+sys_chown
+sys_close
+sys_closedir
+sys_copy_file_range
+sys_creat
+sys_fallocate
+sys_fchmod
+sys_fchown
+sys_fdatasync
+sys_fgetxattr
+sys_flistxattr
+sys_fremovexattr
+sys_fsetxattr
+sys_fstat
+sys_fstatat
+sys_fsync
+sys_ftruncate
+sys_futimes
+sys_lchown
+sys_lgetxattr
+sys_link
+sys_linkat
+sys_llistxattr
+sys_lremovexattr
+sys_lseek
+sys_lsetxattr
+sys_lstat
+sys_mkdir
+sys_mkdirat
+sys_mknod
+sys_open
+sys_openat
+sys_opendir
+sys_pread
+sys_pwrite
+sys_pwritev
+sys_read
+sys_readdir
+sys_readlink
+sys_readv
+sys_rename
+sys_rmdir
+sys_stat
+sys_statvfs
+sys_symlink
+sys_symlinkat
+sys_truncate
+sys_unlink
+sys_unlinkat
+sys_utimensat
+sys_write
+sys_writev
+sys_socket
+sys_accept
+sys_kill
+sys_sysctl
+tbf_init
+tbf_throttle
+timespec_now
+timespec_now_realtime
+timespec_sub
+timespec_adjust_delta
+timespec_cmp
+token_iter_init
+trap
+trie_add
+trie_destroy
+trie_measure
+trie_measure_vec
+trie_new
+trienode_get_word
+_unmask_cancellation
+uuid_utoa
+uuid_utoa_r
+validate_brick_name
+valid_host_name
+valid_ipv4_address
+valid_internet_address
+xlator_destroy
+xlator_foreach
+xlator_foreach_depth_first
+xlator_init
+xlator_mem_acct_init
+xlator_mem_acct_unref
+xlator_notify
+xlator_option_info_list
+xlator_option_init_bool
+xlator_option_init_double
+xlator_option_init_int32
+xlator_option_init_path
+xlator_option_init_percent
+xlator_option_init_percent_or_size
+xlator_option_init_size
+xlator_option_init_size_uint64
+xlator_option_init_size_int64
+xlator_option_init_str
+xlator_option_init_time
+xlator_option_init_uint32
+xlator_option_init_uint64
+xlator_option_init_int64
+xlator_option_init_xlator
+xlator_option_reconf_bool
+xlator_option_reconf_int32
+xlator_option_reconf_path
+xlator_option_reconf_percent
+xlator_option_reconf_percent_or_size
+xlator_option_reconf_size
+xlator_option_reconf_size_uint64
+xlator_option_reconf_size_int64
+xlator_option_reconf_str
+xlator_option_reconf_time
+xlator_option_reconf_uint32
+xlator_option_reconf_uint64
+xlator_option_reconf_int64
+xlator_option_reconf_xlator
+xlator_options_validate
+xlator_options_validate_list
+xlator_option_validate
+xlator_option_validate_addr_list
+xlator_search_by_name
+xlator_set_inode_lru_limit
+xlator_set_type
+xlator_set_type_virtual
+xlator_subvolume_count
+xlator_tree_free_members
+xlator_volopt_dynload
+xlator_volume_option_get
+xlator_volume_option_get_list
+xlator_memrec_free
+xlator_mem_cleanup
+gluster_graph_take_reference
+default_fops
+gf_fop_list
+gf_upcall_list
+vol_type_str
+global_ctx
+global_ctx_mutex
+global_xlator
+use_spinlocks
+dump_options
+glusterfs_leaseid_buf_get
+glusterfs_leaseid_exist
+gf_replace_old_iatt_in_dict
+gf_replace_new_iatt_in_dict
+gf_changelog_init
+gf_changelog_register_generic
+gf_gfid_generate_from_xxh64
+find_xlator_option_in_cmd_args_t
+gf_d_type_from_ia_type
+glusterfs_graph_fini
+glusterfs_process_svc_attach_volfp
+glusterfs_mux_volfile_reconfigure
+glusterfs_process_svc_detach
+mgmt_is_multiplexed_daemon
+xlator_is_cleanup_starting
+gf_nanosleep
+gf_syncfs
+graph_total_client_xlator
+get_xattrs_to_heal
+gf_latency_statedump_and_reset
+gf_latency_new
+gf_latency_reset
+gf_latency_update
+gf_frame_latency_update
diff --git a/libglusterfs/src/list.h b/libglusterfs/src/list.h
deleted file mode 100644
index 35fccdf25a5..00000000000
--- a/libglusterfs/src/list.h
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _LLIST_H
-#define _LLIST_H
-
-
-struct list_head {
- struct list_head *next;
- struct list_head *prev;
-};
-
-
-#define INIT_LIST_HEAD(head) do { \
- (head)->next = (head)->prev = head; \
- } while (0)
-
-
-static inline void
-list_add (struct list_head *new, struct list_head *head)
-{
- new->prev = head;
- new->next = head->next;
-
- new->prev->next = new;
- new->next->prev = new;
-}
-
-
-static inline void
-list_add_tail (struct list_head *new, struct list_head *head)
-{
- new->next = head;
- new->prev = head->prev;
-
- new->prev->next = new;
- new->next->prev = new;
-}
-
-
-static inline void
-list_del (struct list_head *old)
-{
- old->prev->next = old->next;
- old->next->prev = old->prev;
-
- old->next = (void *)0xbabebabe;
- old->prev = (void *)0xcafecafe;
-}
-
-
-static inline void
-list_del_init (struct list_head *old)
-{
- old->prev->next = old->next;
- old->next->prev = old->prev;
-
- old->next = old;
- old->prev = old;
-}
-
-
-static inline void
-list_move (struct list_head *list, struct list_head *head)
-{
- list_del (list);
- list_add (list, head);
-}
-
-
-static inline void
-list_move_tail (struct list_head *list, struct list_head *head)
-{
- list_del (list);
- list_add_tail (list, head);
-}
-
-
-static inline int
-list_empty (struct list_head *head)
-{
- return (head->next == head);
-}
-
-
-static inline void
-__list_splice (struct list_head *list, struct list_head *head)
-{
- (list->prev)->next = (head->next);
- (head->next)->prev = (list->prev);
-
- (head)->next = (list->next);
- (list->next)->prev = (head);
-}
-
-
-static inline void
-list_splice (struct list_head *list, struct list_head *head)
-{
- if (list_empty (list))
- return;
-
- __list_splice (list, head);
-}
-
-
-/* Splice moves @list to the head of the list at @head. */
-static inline void
-list_splice_init (struct list_head *list, struct list_head *head)
-{
- if (list_empty (list))
- return;
-
- __list_splice (list, head);
- INIT_LIST_HEAD (list);
-}
-
-
-static inline void
-__list_append (struct list_head *list, struct list_head *head)
-{
- (head->prev)->next = (list->next);
- (list->next)->prev = (head->prev);
- (head->prev) = (list->prev);
- (list->prev)->next = head;
-}
-
-
-static inline void
-list_append (struct list_head *list, struct list_head *head)
-{
- if (list_empty (list))
- return;
-
- __list_append (list, head);
-}
-
-
-/* Append moves @list to the end of @head */
-static inline void
-list_append_init (struct list_head *list, struct list_head *head)
-{
- if (list_empty (list))
- return;
-
- __list_append (list, head);
- INIT_LIST_HEAD (list);
-}
-
-
-#define list_entry(ptr, type, member) \
- ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-
-
-#define list_for_each(pos, head) \
- for (pos = (head)->next; pos != (head); pos = pos->next)
-
-
-#define list_for_each_entry(pos, head, member) \
- for (pos = list_entry((head)->next, typeof(*pos), member); \
- &pos->member != (head); \
- pos = list_entry(pos->member.next, typeof(*pos), member))
-
-
-#define list_for_each_entry_safe(pos, n, head, member) \
- for (pos = list_entry((head)->next, typeof(*pos), member), \
- n = list_entry(pos->member.next, typeof(*pos), member); \
- &pos->member != (head); \
- pos = n, n = list_entry(n->member.next, typeof(*n), member))
-
-#endif /* _LLIST_H */
diff --git a/libglusterfs/src/lkowner.h b/libglusterfs/src/lkowner.h
deleted file mode 100644
index 969d13e5044..00000000000
--- a/libglusterfs/src/lkowner.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _LK_OWNER_H
-#define _LK_OWNER_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#define GF_MAX_LOCK_OWNER_LEN 1024 /* 1kB as per NLM */
-
-/* 16strings-16strings-... */
-#define GF_LKOWNER_BUF_SIZE ((GF_MAX_LOCK_OWNER_LEN * 2) + \
- (GF_MAX_LOCK_OWNER_LEN / 8))
-
-typedef struct gf_lkowner_ {
- int len;
- char data[GF_MAX_LOCK_OWNER_LEN];
-} gf_lkowner_t;
-
-
-/* LKOWNER to string functions */
-static inline void
-lkowner_unparse (gf_lkowner_t *lkowner, char *buf, int buf_len)
-{
- int i = 0;
- int j = 0;
-
- for (i = 0; i < lkowner->len; i++) {
- if (i && !(i % 8)) {
- buf[j] = '-';
- j++;
- }
- sprintf (&buf[j], "%02hhx", lkowner->data[i]);
- j += 2;
- if (j == buf_len)
- break;
- }
- if (j < buf_len)
- buf[j] = '\0';
-}
-
-static inline void
-set_lk_owner_from_ptr (gf_lkowner_t *lkowner, void *data)
-{
- int i = 0;
- int j = 0;
-
- lkowner->len = sizeof (unsigned long);
- for (i = 0, j = 0; i < lkowner->len; i++, j += 8) {
- lkowner->data[i] = (char)((((unsigned long)data) >> j) & 0xff);
- }
-}
-
-static inline void
-set_lk_owner_from_uint64 (gf_lkowner_t *lkowner, uint64_t data)
-{
- int i = 0;
- int j = 0;
-
- lkowner->len = 8;
- for (i = 0, j = 0; i < lkowner->len; i++, j += 8) {
- lkowner->data[i] = (char)((data >> j) & 0xff);
- }
-}
-
-/* Return true if the locks have the same owner */
-static inline int
-is_same_lkowner (gf_lkowner_t *l1, gf_lkowner_t *l2)
-{
- return ((l1->len == l2->len) && !memcmp(l1->data, l2->data, l1->len));
-}
-
-#endif /* _LK_OWNER_H */
diff --git a/libglusterfs/src/locking.c b/libglusterfs/src/locking.c
new file mode 100644
index 00000000000..7577054e33a
--- /dev/null
+++ b/libglusterfs/src/locking.c
@@ -0,0 +1,27 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#if defined(HAVE_SPINLOCK)
+/* None of this matters otherwise. */
+
+#include <pthread.h>
+#include <unistd.h>
+
+#define LOCKING_IMPL
+#include "glusterfs/locking.h"
+
+int use_spinlocks = 0;
+
+static void __attribute__((constructor)) gf_lock_setup(void)
+{
+ // use_spinlocks = (sysconf(_SC_NPROCESSORS_ONLN) > 1);
+}
+
+#endif
diff --git a/libglusterfs/src/locking.h b/libglusterfs/src/locking.h
deleted file mode 100644
index 79c6992af0b..00000000000
--- a/libglusterfs/src/locking.h
+++ /dev/null
@@ -1,40 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _LOCKING_H
-#define _LOCKING_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <pthread.h>
-
-#if HAVE_SPINLOCK
-#define LOCK_INIT(x) pthread_spin_init (x, 0)
-#define LOCK(x) pthread_spin_lock (x)
-#define TRY_LOCK(x) pthread_spin_trylock (x)
-#define UNLOCK(x) pthread_spin_unlock (x)
-#define LOCK_DESTROY(x) pthread_spin_destroy (x)
-
-typedef pthread_spinlock_t gf_lock_t;
-#else
-#define LOCK_INIT(x) pthread_mutex_init (x, 0)
-#define LOCK(x) pthread_mutex_lock (x)
-#define TRY_LOCK(x) pthread_mutex_trylock (x)
-#define UNLOCK(x) pthread_mutex_unlock (x)
-#define LOCK_DESTROY(x) pthread_mutex_destroy (x)
-
-typedef pthread_mutex_t gf_lock_t;
-#endif /* HAVE_SPINLOCK */
-
-
-#endif /* _LOCKING_H */
diff --git a/libglusterfs/src/logging.c b/libglusterfs/src/logging.c
index 6071269e01c..a930d3e3b63 100644
--- a/libglusterfs/src/logging.c
+++ b/libglusterfs/src/logging.c
@@ -8,11 +8,6 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <errno.h>
#include <pthread.h>
#include <stdio.h>
@@ -21,696 +16,2431 @@
#include <locale.h>
#include <string.h>
#include <stdlib.h>
-
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-
-#ifdef GF_LINUX_HOST_OS
#include <syslog.h>
-#endif
+#include <sys/resource.h>
#ifdef HAVE_BACKTRACE
#include <execinfo.h>
+#else
+#include "execinfo_compat.h"
#endif
+#include <sys/stat.h>
-static pthread_mutex_t logfile_mutex;
-static char *filename = NULL;
-static uint8_t logrotate = 0;
-static FILE *logfile = NULL;
-static gf_loglevel_t loglevel = GF_LOG_INFO;
-static int gf_log_syslog = 1;
-static gf_loglevel_t sys_log_level = GF_LOG_CRITICAL;
+#include "glusterfs/syscall.h"
-char gf_log_xl_log_set;
-gf_loglevel_t gf_log_loglevel = GF_LOG_INFO; /* extern'd */
-FILE *gf_log_logfile;
+#define GF_JSON_MSG_LENGTH 8192
+#define GF_SYSLOG_CEE_FORMAT \
+ "@cee: {\"msg\": \"%s\", \"gf_code\": \"%u\", \"gf_message\": \"%s\"}"
+#define GF_LOG_CONTROL_FILE "/etc/glusterfs/logger.conf"
+#define GF_LOG_BACKTRACE_DEPTH 5
+#define GF_LOG_BACKTRACE_SIZE 4096
+#define GF_MAX_SLOG_PAIR_COUNT 100
-static char *cmd_log_filename = NULL;
-static FILE *cmdlogfile = NULL;
+#include "glusterfs/logging.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/timer.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+/* Do not replace gf_log in TEST_LOG with gf_msg, as there is a slight chance
+ * that it could lead to an infinite recursion.*/
+#define TEST_LOG(__msg, __args...) \
+ gf_log("logging-infra", GF_LOG_DEBUG, __msg, ##__args);
+
+static void
+gf_log_flush_timeout_cbk(void *data);
+
+int
+gf_log_inject_timer_event(glusterfs_ctx_t *ctx);
+
+static void
+gf_log_flush_extra_msgs(glusterfs_ctx_t *ctx, uint32_t new);
+
+static int
+log_buf_init(log_buf_t *buf, const char *domain, const char *file,
+ const char *function, int32_t line, gf_loglevel_t level,
+ int errnum, uint64_t msgid, char **appmsgstr, int graph_id);
+static void
+gf_log_rotate(glusterfs_ctx_t *ctx);
+
+static char gf_level_strings[] = {
+ ' ', /* NONE */
+ 'M', /* EMERGENCY */
+ 'A', /* ALERT */
+ 'C', /* CRITICAL */
+ 'E', /* ERROR */
+ 'W', /* WARNING */
+ 'N', /* NOTICE */
+ 'I', /* INFO */
+ 'D', /* DEBUG */
+ 'T', /* TRACE */
+};
void
-gf_log_logrotate (int signum)
+gf_log_logrotate(int signum)
{
- logrotate = 1;
+ if (THIS->ctx) {
+ THIS->ctx->log.logrotate = 1;
+ THIS->ctx->log.cmd_history_logrotate = 1;
+ }
}
void
-gf_log_enable_syslog (void)
+gf_log_enable_syslog(void)
{
- gf_log_syslog = 1;
+ if (THIS->ctx)
+ THIS->ctx->log.gf_log_syslog = 1;
}
void
-gf_log_disable_syslog (void)
+gf_log_disable_syslog(void)
{
- gf_log_syslog = 0;
+ if (THIS->ctx)
+ THIS->ctx->log.gf_log_syslog = 0;
}
gf_loglevel_t
-gf_log_get_loglevel (void)
+gf_log_get_loglevel(void)
{
- return loglevel;
+ if (THIS->ctx)
+ return THIS->ctx->log.loglevel;
+ else
+ /* return global defaults (see gf_log_globals_init) */
+ return GF_LOG_INFO;
}
void
-gf_log_set_loglevel (gf_loglevel_t level)
+gf_log_set_loglevel(glusterfs_ctx_t *ctx, gf_loglevel_t level)
{
- gf_log_loglevel = loglevel = level;
+ if (ctx)
+ ctx->log.loglevel = level;
}
+int
+gf_log_get_localtime(void)
+{
+ if (THIS->ctx)
+ return THIS->ctx->log.localtime;
+ else
+ /* return global defaults (see gf_log_globals_init) */
+ return 0;
+}
-gf_loglevel_t
-gf_log_get_xl_loglevel (void *this)
+void
+gf_log_set_localtime(int on_off)
{
- xlator_t *xl = this;
- if (!xl)
- return 0;
- return xl->loglevel;
+ if (THIS->ctx)
+ THIS->ctx->log.localtime = on_off;
}
void
-gf_log_set_xl_loglevel (void *this, gf_loglevel_t level)
+gf_log_flush(void)
{
- xlator_t *xl = this;
- if (!xl)
- return;
- gf_log_xl_log_set = 1;
- xl->loglevel = level;
+ xlator_t *this = THIS;
+ glusterfs_ctx_t *ctx = this->ctx;
+
+ if (ctx && ctx->log.logger == gf_logger_glusterlog) {
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ fflush(ctx->log.gf_log_logfile);
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+ }
+
+ return;
}
void
-gf_log_fini (void)
+gf_log_set_xl_loglevel(void *this, gf_loglevel_t level)
{
- pthread_mutex_destroy (&logfile_mutex);
+ xlator_t *xl = this;
+ if (!xl)
+ return;
+ xl->loglevel = level;
}
+/* TODO: The following get/set functions are yet not invoked from anywhere
+ * in the code. The _intention_ is to pass CLI arguments to various daemons
+ * that are started, which would then invoke these set APIs as required.
+ *
+ * glusterd would read the defaults from its .vol file configuration shipped
+ * as a part of the packages distributed.
+ *
+ * For any gluster* daemon that is started the shipped configuration becomes the
+ * default, if a volume has to change its logging format or logger, then a
+ * gluster CLI is invoked to set this property for the volume in question.
+ *
+ * The property is maintained by glusterd, and passed to the daemon as a CLI
+ * option, IOW persistence of the option is maintained by glusterd persistent
+ * storage (i.e .vol file) only
+ *
+ * care needs to be taken to configure and start daemons based on the versions
+ * that supports these features */
void
-gf_log_globals_init (void)
+gf_log_set_logformat(gf_log_format_t format)
{
- pthread_mutex_init (&logfile_mutex, NULL);
+ if (THIS->ctx)
+ THIS->ctx->log.logformat = format;
+}
-#ifdef GF_LINUX_HOST_OS
- /* For the 'syslog' output. one can grep 'GlusterFS' in syslog
- for serious logs */
- openlog ("GlusterFS", LOG_PID, LOG_DAEMON);
-#endif
+void
+gf_log_set_logger(gf_log_logger_t logger)
+{
+ if (THIS->ctx)
+ THIS->ctx->log.logger = logger;
}
-int
-gf_log_init (const char *file)
+gf_loglevel_t
+gf_log_get_xl_loglevel(void *this)
{
- int fd = -1;
+ xlator_t *xl = this;
+ if (!xl)
+ return 0;
+ return xl->loglevel;
+}
- if (!file){
- fprintf (stderr, "ERROR: no filename specified\n");
- return -1;
- }
+void
+gf_log_set_log_buf_size(uint32_t buf_size)
+{
+ uint32_t old = 0;
+ glusterfs_ctx_t *ctx = THIS->ctx;
+
+ pthread_mutex_lock(&ctx->log.log_buf_lock);
+ {
+ old = ctx->log.lru_size;
+ ctx->log.lru_size = buf_size;
+ }
+ pthread_mutex_unlock(&ctx->log.log_buf_lock);
+
+ /* If the old size is less than/equal to the new size, then do nothing.
+ *
+ * But if the new size is less than the old size, then
+ * a. If the cur size of the buf is less than or equal the new size,
+ * then do nothing.
+ * b. But if the current size of the buf is greater than the new size,
+ * then flush the least recently used (cur size - new_size) msgs
+ * to disk.
+ */
+ if (buf_size < old)
+ gf_log_flush_extra_msgs(ctx, buf_size);
+}
- if (strcmp (file, "-") == 0) {
- gf_log_logfile = stderr;
+void
+gf_log_set_log_flush_timeout(uint32_t timeout)
+{
+ THIS->ctx->log.timeout = timeout;
+}
- return 0;
- }
+/* If log_buf_init() fails (indicated by a return value of -1),
+ * call log_buf_destroy() to clean up memory allocated in heap and to return
+ * the log_buf_t object back to its memory pool.
+ */
+static int
+log_buf_init(log_buf_t *buf, const char *domain, const char *file,
+ const char *function, int32_t line, gf_loglevel_t level,
+ int errnum, uint64_t msgid, char **appmsgstr, int graph_id)
+{
+ int ret = -1;
- filename = gf_strdup (file);
- if (!filename) {
- fprintf (stderr, "ERROR: updating log-filename failed: %s\n",
- strerror (errno));
- return -1;
+ if (!buf || !domain || !file || !function || !appmsgstr || !*appmsgstr)
+ goto out;
+
+ buf->msg = gf_strdup(*appmsgstr);
+ if (!buf->msg)
+ goto out;
+
+ buf->msg_id = msgid;
+ buf->errnum = errnum;
+ buf->domain = gf_strdup(domain);
+ if (!buf->domain)
+ goto out;
+
+ buf->file = gf_strdup(file);
+ if (!buf->file)
+ goto out;
+
+ buf->function = gf_strdup(function);
+ if (!buf->function)
+ goto out;
+
+ buf->line = line;
+ buf->level = level;
+ buf->refcount = 0;
+ buf->graph_id = graph_id;
+ INIT_LIST_HEAD(&buf->msg_list);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+log_buf_destroy(log_buf_t *buf)
+{
+ if (!buf)
+ return -1;
+
+ GF_FREE(buf->msg);
+ GF_FREE(buf->domain);
+ GF_FREE(buf->file);
+ GF_FREE(buf->function);
+
+ mem_put(buf);
+ return 0;
+}
+
+static void
+gf_log_rotate(glusterfs_ctx_t *ctx)
+{
+ int fd = -1;
+ FILE *new_logfile = NULL;
+ FILE *old_logfile = NULL;
+
+ /* not involving locks on initial check to speed it up */
+ if (ctx->log.logrotate) {
+ /* let only one winner through on races */
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+
+ if (!ctx->log.logrotate) {
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+ return;
+ } else {
+ ctx->log.logrotate = 0;
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
}
- fd = open (file, O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
+ fd = sys_open(ctx->log.filename, O_CREAT | O_WRONLY | O_APPEND,
+ S_IRUSR | S_IWUSR);
if (fd < 0) {
- fprintf (stderr, "ERROR: failed to create logfile \"%s\" (%s)\n",
- file, strerror (errno));
- return -1;
+ gf_smsg("logrotate", GF_LOG_ERROR, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, NULL);
+ return;
}
- close (fd);
- logfile = fopen (file, "a");
- if (!logfile){
- fprintf (stderr, "ERROR: failed to open logfile \"%s\" (%s)\n",
- file, strerror (errno));
- return -1;
+ new_logfile = fdopen(fd, "a");
+ if (!new_logfile) {
+ gf_smsg("logrotate", GF_LOG_CRITICAL, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, "filename=%s",
+ ctx->log.filename, NULL);
+ sys_close(fd);
+ return;
}
- gf_log_logfile = logfile;
-
- return 0;
-}
-
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile)
+ old_logfile = ctx->log.logfile;
+ ctx->log.gf_log_logfile = ctx->log.logfile = new_logfile;
+ }
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
-struct _msg_queue {
- struct list_head msgs;
-};
+ if (old_logfile != NULL)
+ fclose(old_logfile);
+ }
-struct _log_msg {
- const char *msg;
- struct list_head queue;
-};
+ return;
+}
+void
+gf_log_globals_fini(void)
+{
+ /* TODO: Nobody is invoking the fini, but cleanup needs to happen here,
+ * needs cleanup for, log.ident, log.filename, closelog, log file close
+ * rotate state, possibly under a lock */
+ pthread_mutex_destroy(&THIS->ctx->log.logfile_mutex);
+ pthread_mutex_destroy(&THIS->ctx->log.log_buf_lock);
+}
void
-gf_log_lock (void)
+gf_log_disable_suppression_before_exit(glusterfs_ctx_t *ctx)
{
- pthread_mutex_lock (&logfile_mutex);
+ /*
+ * First set log buf size to 0. This would ensure two things:
+ * i. that all outstanding log messages are flushed to disk, and
+ * ii. all subsequent calls to gf_msg will result in the logs getting
+ * directly flushed to disk without being buffered.
+ *
+ * Then, cancel the current log timer event.
+ */
+
+ gf_log_set_log_buf_size(0);
+ pthread_mutex_lock(&ctx->log.log_buf_lock);
+ {
+ if (ctx->log.log_flush_timer) {
+ gf_timer_call_cancel(ctx, ctx->log.log_flush_timer);
+ ctx->log.log_flush_timer = NULL;
+ }
+ }
+ pthread_mutex_unlock(&ctx->log.log_buf_lock);
}
+/** gf_log_fini - function to perform the cleanup of the log information
+ * @data - glusterfs context
+ * @return: success: 0
+ * failure: -1
+ */
+int
+gf_log_fini(void *data)
+{
+ glusterfs_ctx_t *ctx = data;
+ int ret = 0;
+ FILE *old_logfile = NULL;
+
+ if (ctx == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_log_disable_suppression_before_exit(ctx);
+
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ old_logfile = ctx->log.logfile;
+
+ /* Logfile needs to be set to NULL, so that any
+ call to gf_log after calling gf_log_fini, will
+ log the message to stderr.
+ */
+ ctx->log.loglevel = GF_LOG_NONE;
+ ctx->log.logfile = NULL;
+ }
+ }
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
-void
-gf_log_unlock (void)
+ if (old_logfile && (fclose(old_logfile) != 0))
+ ret = -1;
+
+ GF_FREE(ctx->log.ident);
+ GF_FREE(ctx->log.filename);
+
+out:
+ return ret;
+}
+
+/**
+ * gf_openlog -function to open syslog specific to gluster based on
+ * existence of file /etc/glusterfs/logger.conf
+ * @ident: optional identification string similar to openlog()
+ * @option: optional value to option to openlog(). Passing -1 uses
+ * 'LOG_PID | LOG_NDELAY' as default
+ * @facility: optional facility code similar to openlog(). Passing -1
+ * uses LOG_DAEMON as default
+ *
+ * @return: void
+ */
+static void
+gf_openlog(const char *ident, int option, int facility)
{
- pthread_mutex_unlock (&logfile_mutex);
+ int _option = option;
+ int _facility = facility;
+
+ if (-1 == _option) {
+ _option = LOG_PID | LOG_NDELAY;
+ }
+ if (-1 == _facility) {
+ _facility = LOG_LOCAL1;
+ }
+
+ /* TODO: Should check for errors here and return appropriately */
+ setlocale(LC_ALL, "");
+ setlocale(LC_NUMERIC, "C"); /* C-locale for strtod, ... */
+ /* close the previous syslog if open as we are changing settings */
+ closelog();
+ openlog(ident, _option, _facility);
}
+/**
+ * _json_escape -function to convert string to json encoded string
+ * @str: input string
+ * @buf: buffer to store encoded string
+ * @len: length of @buf
+ *
+ * @return: success: last unprocessed character position by pointer in @str
+ * failure: NULL
+ *
+ * Internal function. Heavily inspired by _ul_str_escape() function in
+ * libumberlog
+ *
+ * Sample output:
+ * [1] str = "devel error"
+ * buf = "devel error"
+ * [2] str = "devel error"
+ * buf = "devel\terror"
+ * [3] str = "I/O error on "/tmp/foo" file"
+ * buf = "I/O error on \"/tmp/foo\" file"
+ * [4] str = "I/O erroron /tmp/bar file"
+ * buf = "I/O error\u001bon /tmp/bar file"
+ *
+ */
+static char *
+_json_escape(const char *str, char *buf, size_t len)
+{
+ static const unsigned char json_exceptions[UCHAR_MAX + 1] = {
+ [0x01] = 1, [0x02] = 1, [0x03] = 1, [0x04] = 1, [0x05] = 1, [0x06] = 1,
+ [0x07] = 1, [0x08] = 1, [0x09] = 1, [0x0a] = 1, [0x0b] = 1, [0x0c] = 1,
+ [0x0d] = 1, [0x0e] = 1, [0x0f] = 1, [0x10] = 1, [0x11] = 1, [0x12] = 1,
+ [0x13] = 1, [0x14] = 1, [0x15] = 1, [0x16] = 1, [0x17] = 1, [0x18] = 1,
+ [0x19] = 1, [0x1a] = 1, [0x1b] = 1, [0x1c] = 1, [0x1d] = 1, [0x1e] = 1,
+ [0x1f] = 1, ['\\'] = 1, ['"'] = 1};
+ static const char json_hex_chars[16] = "0123456789abcdef";
+ unsigned char *p = NULL;
+ size_t pos = 0;
+
+ if (!str || !buf || len <= 0) {
+ return NULL;
+ }
+
+ for (p = (unsigned char *)str; *p && (pos + 1) < len; p++) {
+ if (json_exceptions[*p] == 0) {
+ buf[pos++] = *p;
+ continue;
+ }
-void
-gf_log_cleanup (void)
+ if ((pos + 2) >= len) {
+ break;
+ }
+
+ switch (*p) {
+ case '\b':
+ buf[pos++] = '\\';
+ buf[pos++] = 'b';
+ break;
+ case '\n':
+ buf[pos++] = '\\';
+ buf[pos++] = 'n';
+ break;
+ case '\r':
+ buf[pos++] = '\\';
+ buf[pos++] = 'r';
+ break;
+ case '\t':
+ buf[pos++] = '\\';
+ buf[pos++] = 't';
+ break;
+ case '\\':
+ buf[pos++] = '\\';
+ buf[pos++] = '\\';
+ break;
+ case '"':
+ buf[pos++] = '\\';
+ buf[pos++] = '"';
+ break;
+ default:
+ if ((pos + 6) >= len) {
+ buf[pos] = '\0';
+ return (char *)p;
+ }
+ buf[pos++] = '\\';
+ buf[pos++] = 'u';
+ buf[pos++] = '0';
+ buf[pos++] = '0';
+ buf[pos++] = json_hex_chars[(*p) >> 4];
+ buf[pos++] = json_hex_chars[(*p) & 0xf];
+ break;
+ }
+ }
+
+ buf[pos] = '\0';
+ return (char *)p;
+}
+
+/**
+ * gf_syslog -function to submit message to syslog specific to gluster
+ * @facility_priority: facility_priority of syslog()
+ * @format: optional format string to syslog()
+ *
+ * @return: void
+ */
+static void
+gf_syslog(int facility_priority, char *format, ...)
{
- pthread_mutex_destroy (&logfile_mutex);
+ char *msg = NULL;
+ char json_msg[GF_JSON_MSG_LENGTH];
+ GF_UNUSED char *p = NULL;
+ va_list ap;
+
+ GF_ASSERT(format);
+
+ va_start(ap, format);
+ if (vasprintf(&msg, format, ap) != -1) {
+ p = _json_escape(msg, json_msg, GF_JSON_MSG_LENGTH);
+ syslog(facility_priority, "%s", msg);
+ free(msg);
+ } else
+ syslog(GF_LOG_CRITICAL, "vasprintf() failed, out of memory?");
+ va_end(ap);
}
void
-set_sys_log_level (gf_loglevel_t level)
+gf_log_globals_init(void *data, gf_loglevel_t level)
{
- sys_log_level = level;
+ glusterfs_ctx_t *ctx = data;
+
+ pthread_mutex_init(&ctx->log.logfile_mutex, NULL);
+
+ ctx->log.loglevel = level;
+ ctx->log.gf_log_syslog = 1;
+ ctx->log.sys_log_level = GF_LOG_CRITICAL;
+ ctx->log.logger = gf_logger_glusterlog;
+ ctx->log.logformat = gf_logformat_withmsgid;
+ ctx->log.lru_size = GF_LOG_LRU_BUFSIZE_DEFAULT;
+ ctx->log.timeout = GF_LOG_FLUSH_TIMEOUT_DEFAULT;
+ ctx->log.localtime = GF_LOG_LOCALTIME_DEFAULT;
+
+ pthread_mutex_init(&ctx->log.log_buf_lock, NULL);
+
+ INIT_LIST_HEAD(&ctx->log.lru_queue);
+
+#ifdef GF_LINUX_HOST_OS
+ /* For the 'syslog' output. one can grep 'GlusterFS' in syslog
+ for serious logs */
+ openlog("GlusterFS", LOG_PID, LOG_DAEMON);
+#endif
}
int
-_gf_log_nomem (const char *domain, const char *file,
- const char *function, int line, gf_loglevel_t level,
- size_t size)
-{
- const char *basename = NULL;
- xlator_t *this = NULL;
- struct timeval tv = {0,};
- int ret = 0;
- char msg[8092] = {0,};
- char timestr[256] = {0,};
- char callstr[4096] = {0,};
-
- this = THIS;
-
- if (gf_log_xl_log_set) {
- if (this->loglevel && (level > this->loglevel))
- goto out;
- else if (level > gf_log_loglevel)
- goto out;
- }
-
- static char *level_strings[] = {"", /* NONE */
- "M", /* EMERGENCY */
- "A", /* ALERT */
- "C", /* CRITICAL */
- "E", /* ERROR */
- "W", /* WARNING */
- "N", /* NOTICE */
- "I", /* INFO */
- "D", /* DEBUG */
- "T", /* TRACE */
- ""};
-
- if (!domain || !file || !function) {
- fprintf (stderr,
- "logging: %s:%s():%d: invalid argument\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- return -1;
+gf_log_init(void *data, const char *file, const char *ident)
+{
+ glusterfs_ctx_t *ctx = data;
+ int fd = -1;
+ struct stat buf;
+
+ if (ctx == NULL) {
+ fprintf(stderr, "ERROR: ctx is NULL\n");
+ return -1;
+ }
+ if (ident) {
+ GF_FREE(ctx->log.ident);
+ ctx->log.ident = gf_strdup(ident);
+ }
+
+ /* we keep the files and the syslog open, so that on logger change, we
+ * are ready to log anywhere, that the new value specifies */
+ if (ctx->log.ident) {
+ gf_openlog(ctx->log.ident, -1, LOG_DAEMON);
+ } else {
+ gf_openlog(NULL, -1, LOG_DAEMON);
+ }
+ /* TODO: make FACILITY configurable than LOG_DAEMON */
+ if (sys_stat(GF_LOG_CONTROL_FILE, &buf) == 0) {
+ /* use syslog logging */
+ ctx->log.log_control_file_found = 1;
+ } else {
+ /* use old style logging */
+ ctx->log.log_control_file_found = 0;
+ }
+
+ if (!file) {
+ fprintf(stderr, "ERROR: no filename specified\n");
+ return -1;
+ }
+
+ /* free the (possible) previous filename */
+ GF_FREE(ctx->log.filename);
+ ctx->log.filename = NULL;
+
+ /* close and reopen logfile for log rotate */
+ if (ctx->log.logfile) {
+ fclose(ctx->log.logfile);
+ ctx->log.logfile = NULL;
+ ctx->log.gf_log_logfile = NULL;
+ }
+
+ if (strcmp(file, "-") == 0) {
+ int dupfd = -1;
+
+ ctx->log.filename = gf_strdup("/dev/stderr");
+ if (!ctx->log.filename) {
+ fprintf(stderr, "ERROR: strdup failed\n");
+ return -1;
}
-#if HAVE_BACKTRACE
- /* Print 'calling function' */
- do {
- void *array[5];
- char **callingfn = NULL;
- size_t bt_size = 0;
-
- bt_size = backtrace (array, 5);
- if (bt_size)
- callingfn = backtrace_symbols (&array[2], bt_size-2);
- if (!callingfn)
- break;
-
- if (bt_size == 5)
- snprintf (callstr, 4096, "(-->%s (-->%s (-->%s)))",
- callingfn[2], callingfn[1], callingfn[0]);
- if (bt_size == 4)
- snprintf (callstr, 4096, "(-->%s (-->%s))",
- callingfn[1], callingfn[0]);
- if (bt_size == 3)
- snprintf (callstr, 4096, "(-->%s)", callingfn[0]);
-
- free (callingfn);
- } while (0);
-#endif /* HAVE_BACKTRACE */
-
- ret = gettimeofday (&tv, NULL);
- if (-1 == ret)
- goto out;
- gf_time_fmt (timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
- snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, tv.tv_usec);
-
- basename = strrchr (file, '/');
- if (basename)
- basename++;
- else
- basename = file;
-
- ret = sprintf (msg, "[%s] %s [%s:%d:%s] %s %s: no memory "
- "available for size (%"GF_PRI_SIZET")",
- timestr, level_strings[level],
- basename, line, function, callstr,
- domain, size);
- if (-1 == ret) {
- goto out;
+ dupfd = dup(fileno(stderr));
+ if (dupfd == -1) {
+ fprintf(stderr, "ERROR: could not dup %d (%s)\n", fileno(stderr),
+ strerror(errno));
+ return -1;
}
- pthread_mutex_lock (&logfile_mutex);
- {
- if (logfile) {
- fprintf (logfile, "%s\n", msg);
- fflush (logfile);
- } else {
- fprintf (stderr, "%s\n", msg);
- }
+ ctx->log.logfile = fdopen(dupfd, "a");
+ if (!ctx->log.logfile) {
+ fprintf(stderr, "ERROR: could not fdopen on %d (%s)\n", dupfd,
+ strerror(errno));
+ sys_close(dupfd);
+ return -1;
+ }
+ } else {
+ /* Also create parent dir */
+ char *logdir = gf_strdup(file);
+ if (!logdir) {
+ return -1;
+ }
+ char *tmp_index = rindex(logdir, '/');
+ if (tmp_index) {
+ tmp_index[0] = '\0';
+ }
+ if (mkdir_p(logdir, 0755, true)) {
+ /* EEXIST is handled in mkdir_p() itself */
+ gf_smsg("logging", GF_LOG_ERROR, 0, LG_MSG_STRDUP_ERROR,
+ "logdir=%s", logdir, "errno=%s", strerror(errno), NULL);
+ GF_FREE(logdir);
+ return -1;
+ }
+ /* no need of this variable */
+ GF_FREE(logdir);
+
+ ctx->log.filename = gf_strdup(file);
+ if (!ctx->log.filename) {
+ fprintf(stderr,
+ "ERROR: updating log-filename failed: "
+ "%s\n",
+ strerror(errno));
+ return -1;
+ }
-#ifdef GF_LINUX_HOST_OS
- /* We want only serious log in 'syslog', not our debug
- and trace logs */
- if (gf_log_syslog && level && (level <= sys_log_level))
- syslog ((level-1), "%s\n", msg);
-#endif
+ fd = sys_open(file, O_CREAT | O_WRONLY | O_APPEND, S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ fprintf(stderr,
+ "ERROR: failed to create logfile"
+ " \"%s\" (%s)\n",
+ file, strerror(errno));
+ return -1;
}
- pthread_mutex_unlock (&logfile_mutex);
-out:
- return ret;
- }
+ ctx->log.logfile = fdopen(fd, "a");
+ if (!ctx->log.logfile) {
+ fprintf(stderr,
+ "ERROR: failed to open logfile \"%s\" "
+ "(%s)\n",
+ file, strerror(errno));
+ sys_close(fd);
+ return -1;
+ }
+ }
+
+ ctx->log.gf_log_logfile = ctx->log.logfile;
+
+ return 0;
+}
+
+void
+set_sys_log_level(gf_loglevel_t level)
+{
+ if (THIS->ctx)
+ THIS->ctx->log.sys_log_level = level;
+}
+
+/* Check if we should be logging
+ * Return value: _gf_false : Print the log
+ * _gf_true : Do not Print the log
+ */
+static gf_boolean_t
+skip_logging(xlator_t *this, gf_loglevel_t level)
+{
+ gf_loglevel_t existing_level = this->loglevel ? this->loglevel
+ : this->ctx->log.loglevel;
+ if (level > existing_level) {
+ return _gf_true;
+ }
+
+ if (level == GF_LOG_NONE) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
int
-_gf_log_callingfn (const char *domain, const char *file, const char *function,
- int line, gf_loglevel_t level, const char *fmt, ...)
-{
- const char *basename = NULL;
- xlator_t *this = NULL;
- char *str1 = NULL;
- char *str2 = NULL;
- char *msg = NULL;
- char timestr[256] = {0,};
- char callstr[4096] = {0,};
- struct timeval tv = {0,};
- size_t len = 0;
- int ret = 0;
- va_list ap;
-
- this = THIS;
-
- if (gf_log_xl_log_set) {
- if (this->loglevel && (level > this->loglevel))
- goto out;
- else if (level > gf_log_loglevel)
- goto out;
- }
-
- static char *level_strings[] = {"", /* NONE */
- "M", /* EMERGENCY */
- "A", /* ALERT */
- "C", /* CRITICAL */
- "E", /* ERROR */
- "W", /* WARNING */
- "N", /* NOTICE */
- "I", /* INFO */
- "D", /* DEBUG */
- "T", /* TRACE */
- ""};
-
- if (!domain || !file || !function || !fmt) {
- fprintf (stderr,
- "logging: %s:%s():%d: invalid argument\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- return -1;
+_gf_log_callingfn(const char *domain, const char *file, const char *function,
+ int line, gf_loglevel_t level, const char *fmt, ...)
+{
+ const char *basename = NULL;
+ xlator_t *this = THIS;
+ char *logline = NULL;
+ char *msg = NULL;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char *callstr = NULL;
+ struct timeval tv = {
+ 0,
+ };
+ int ret = 0;
+ va_list ap;
+ glusterfs_ctx_t *ctx = this->ctx;
+
+ if (!ctx)
+ goto out;
+
+ if (skip_logging(this, level))
+ goto out;
+
+ if (!domain || !file || !function || !fmt) {
+ fprintf(stderr, "logging: %s:%s():%d: invalid argument\n", __FILE__,
+ __PRETTY_FUNCTION__, __LINE__);
+ return -1;
+ }
+
+ basename = strrchr(file, '/');
+ if (basename)
+ basename++;
+ else
+ basename = file;
+
+ /*Saving the backtrace to pre-allocated ctx->btbuf
+ * to avoid allocating memory from the heap*/
+ callstr = gf_backtrace_save(NULL);
+
+ va_start(ap, fmt);
+ ret = vasprintf(&msg, fmt, ap);
+ va_end(ap);
+ if (-1 == ret) {
+ goto out;
+ }
+
+ if (ctx->log.log_control_file_found) {
+ int priority;
+ /* treat GF_LOG_TRACE and GF_LOG_NONE as LOG_DEBUG and
+ other level as is */
+ if (GF_LOG_TRACE == level || GF_LOG_NONE == level) {
+ priority = LOG_DEBUG;
+ } else {
+ priority = level - 1;
}
-#if HAVE_BACKTRACE
- /* Print 'calling function' */
- do {
- void *array[5];
- char **callingfn = NULL;
- size_t size = 0;
-
- size = backtrace (array, 5);
- if (size)
- callingfn = backtrace_symbols (&array[2], size-2);
- if (!callingfn)
- break;
-
- if (size == 5)
- snprintf (callstr, 4096, "(-->%s (-->%s (-->%s)))",
- callingfn[2], callingfn[1], callingfn[0]);
- if (size == 4)
- snprintf (callstr, 4096, "(-->%s (-->%s))",
- callingfn[1], callingfn[0]);
- if (size == 3)
- snprintf (callstr, 4096, "(-->%s)", callingfn[0]);
-
- free (callingfn);
- } while (0);
-#endif /* HAVE_BACKTRACE */
-
- ret = gettimeofday (&tv, NULL);
- if (-1 == ret)
- goto out;
- va_start (ap, fmt);
- gf_time_fmt (timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
- snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, tv.tv_usec);
-
- basename = strrchr (file, '/');
- if (basename)
- basename++;
- else
- basename = file;
-
- ret = gf_asprintf (&str1, "[%s] %s [%s:%d:%s] %s %d-%s: ",
- timestr, level_strings[level],
- basename, line, function, callstr,
- ((this->graph) ? this->graph->id:0), domain);
- if (-1 == ret) {
- goto out;
+ gf_syslog(priority, "[%s:%d:%s] %s %d-%s: %s", basename, line, function,
+ callstr, ((this->graph) ? this->graph->id : 0), domain, msg);
+
+ goto out;
+ }
+
+ ret = gettimeofday(&tv, NULL);
+ if (-1 == ret)
+ goto out;
+
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
+
+ ret = gf_asprintf(&logline, "[%s] %c [%s:%d:%s] %s %d-%s: %s\n", timestr,
+ gf_level_strings[level], basename, line, function,
+ callstr, ((this->graph) ? this->graph->id : 0), domain,
+ msg);
+ if (-1 == ret) {
+ goto out;
+ }
+
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ fputs(logline, ctx->log.logfile);
+ fflush(ctx->log.logfile);
+ } else if (ctx->log.loglevel >= level) {
+ fputs(logline, stderr);
+ fflush(stderr);
}
- ret = vasprintf (&str2, fmt, ap);
- if (-1 == ret) {
- goto out;
- }
+#ifdef GF_LINUX_HOST_OS
+ /* We want only serious log in 'syslog', not our debug
+ and trace logs */
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level))
+ syslog((level - 1), "%s", logline);
+#endif
+ }
- va_end (ap);
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
- len = strlen (str1);
- msg = GF_MALLOC (len + strlen (str2) + 1, gf_common_mt_char);
+out:
- strcpy (msg, str1);
- strcpy (msg + len, str2);
+ GF_FREE(logline);
- pthread_mutex_lock (&logfile_mutex);
- {
- if (logfile) {
- fprintf (logfile, "%s\n", msg);
- fflush (logfile);
+ FREE(msg);
+
+ return ret;
+}
+
+static int
+_gf_msg_plain_internal(gf_loglevel_t level, const char *msg)
+{
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int priority;
+
+ this = THIS;
+ ctx = this->ctx;
+
+ /* log to the configured logging service */
+ switch (ctx->log.logger) {
+ case gf_logger_syslog:
+ if (ctx->log.log_control_file_found && ctx->log.gf_log_syslog) {
+ SET_LOG_PRIO(level, priority);
+
+ syslog(priority, "%s", msg);
+ break;
+ }
+ /* NOTE: If syslog control file is absent, which is another
+ * way to control logging to syslog, then we will fall through
+ * to the gluster log. The ideal way to do things would be to
+ * not have the extra control file check */
+ case gf_logger_glusterlog:
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ fprintf(ctx->log.logfile, "%s\n", msg);
+ fflush(ctx->log.logfile);
} else {
- fprintf (stderr, "%s\n", msg);
+ fprintf(stderr, "%s\n", msg);
+ fflush(stderr);
}
#ifdef GF_LINUX_HOST_OS
- /* We want only serious log in 'syslog', not our debug
- and trace logs */
- if (gf_log_syslog && level && (level <= sys_log_level))
- syslog ((level-1), "%s\n", msg);
+ /* We want only serious logs in 'syslog', not our debug
+ * and trace logs */
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level))
+ syslog((level - 1), "%s\n", msg);
#endif
- }
+ }
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+
+ break;
+ }
+
+ return 0;
+}
+
+int
+_gf_msg_plain(gf_loglevel_t level, const char *fmt, ...)
+{
+ xlator_t *this = NULL;
+ int ret = 0;
+ va_list ap;
+ char *msg = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ this = THIS;
+ ctx = this->ctx;
+
+ if (!ctx)
+ goto out;
+
+ if (skip_logging(this, level))
+ goto out;
+
+ va_start(ap, fmt);
+ ret = vasprintf(&msg, fmt, ap);
+ va_end(ap);
+ if (-1 == ret) {
+ goto out;
+ }
- pthread_mutex_unlock (&logfile_mutex);
+ ret = _gf_msg_plain_internal(level, msg);
+
+ FREE(msg);
out:
- if (msg)
- GF_FREE (msg);
+ return ret;
+}
+
+int
+_gf_msg_vplain(gf_loglevel_t level, const char *fmt, va_list ap)
+{
+ xlator_t *this = NULL;
+ int ret = 0;
+ char *msg = NULL;
+ glusterfs_ctx_t *ctx = NULL;
- if (str1)
- GF_FREE (str1);
+ this = THIS;
+ ctx = this->ctx;
- if (str2)
- FREE (str2);
+ if (!ctx)
+ goto out;
- return ret;
+ if (skip_logging(this, level))
+ goto out;
+
+ ret = vasprintf(&msg, fmt, ap);
+ if (-1 == ret) {
+ goto out;
+ }
+
+ ret = _gf_msg_plain_internal(level, msg);
+
+ FREE(msg);
+out:
+ return ret;
}
int
-_gf_log (const char *domain, const char *file, const char *function, int line,
- gf_loglevel_t level, const char *fmt, ...)
-{
- const char *basename = NULL;
- FILE *new_logfile = NULL;
- va_list ap;
- char timestr[256] = {0,};
- struct timeval tv = {0,};
- char *str1 = NULL;
- char *str2 = NULL;
- char *msg = NULL;
- size_t len = 0;
- int ret = 0;
- int fd = -1;
- xlator_t *this = NULL;
-
- this = THIS;
-
- if (gf_log_xl_log_set) {
- if (this->loglevel && (level > this->loglevel))
- goto out;
- else if (level > gf_log_loglevel)
- goto out;
- }
-
- static char *level_strings[] = {"", /* NONE */
- "M", /* EMERGENCY */
- "A", /* ALERT */
- "C", /* CRITICAL */
- "E", /* ERROR */
- "W", /* WARNING */
- "N", /* NOTICE */
- "I", /* INFO */
- "D", /* DEBUG */
- "T", /* TRACE */
- ""};
-
- if (!domain || !file || !function || !fmt) {
- fprintf (stderr,
- "logging: %s:%s():%d: invalid argument\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- return -1;
+_gf_msg_plain_nomem(gf_loglevel_t level, const char *msg)
+{
+ xlator_t *this = NULL;
+ int ret = 0;
+ glusterfs_ctx_t *ctx = NULL;
+
+ this = THIS;
+ ctx = this->ctx;
+
+ if (!ctx)
+ goto out;
+
+ if (skip_logging(this, level))
+ goto out;
+
+ ret = _gf_msg_plain_internal(level, msg);
+
+out:
+ return ret;
+}
+
+void
+_gf_msg_backtrace_nomem(gf_loglevel_t level, int stacksize)
+{
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ void *array[200];
+ size_t bt_size = 0;
+ int fd = -1;
+
+ this = THIS;
+ ctx = this->ctx;
+
+ if (!ctx)
+ goto out;
+
+ /* syslog does not have fd support, hence no no-mem variant */
+ if (ctx->log.logger != gf_logger_glusterlog)
+ goto out;
+
+ if (skip_logging(this, level))
+ goto out;
+
+ bt_size = backtrace(array, ((stacksize <= 200) ? stacksize : 200));
+ if (!bt_size)
+ goto out;
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ fd = ctx->log.logfile ? fileno(ctx->log.logfile) : fileno(stderr);
+ if (fd != -1) {
+ /* print to the file fd, to prevent any
+ allocations from backtrace_symbols
+ */
+ backtrace_symbols_fd(&array[0], bt_size, fd);
}
+ }
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+out:
+ return;
+}
- if (logrotate) {
- logrotate = 0;
+int
+_gf_msg_backtrace(int stacksize, char *callstr, size_t strsize)
+{
+ int ret = -1;
+ int i = 0;
+ int size = 0;
+ int savstrsize = strsize;
+ void *array[200];
+ char **callingfn = NULL;
+
+ /* We chop off last 2 anyway, so if request is less than tolerance
+ * nothing to do */
+ if (stacksize < 3)
+ goto out;
+
+ size = backtrace(array, ((stacksize <= 200) ? stacksize : 200));
+ if ((size - 3) < 0)
+ goto out;
+ if (size)
+ callingfn = backtrace_symbols(&array[2], size - 2);
+ if (!callingfn)
+ goto out;
+
+ ret = snprintf(callstr, strsize, "(");
+ PRINT_SIZE_CHECK(ret, out, strsize);
+
+ for ((i = size - 3); i >= 0; i--) {
+ ret = snprintf(callstr + savstrsize - strsize, strsize, "-->%s ",
+ callingfn[i]);
+ PRINT_SIZE_CHECK(ret, out, strsize);
+ }
+
+ ret = snprintf(callstr + savstrsize - strsize, strsize, ")");
+ PRINT_SIZE_CHECK(ret, out, strsize);
+out:
+ FREE(callingfn);
+ return ret;
+}
- fd = open (filename, O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
- if (fd < 0) {
- gf_log ("logrotate", GF_LOG_ERROR,
- "%s", strerror (errno));
- return -1;
- }
- close (fd);
-
- new_logfile = fopen (filename, "a");
- if (!new_logfile) {
- gf_log ("logrotate", GF_LOG_CRITICAL,
- "failed to open logfile %s (%s)",
- filename, strerror (errno));
- goto log;
+int
+_gf_msg_nomem(const char *domain, const char *file, const char *function,
+ int line, gf_loglevel_t level, size_t size)
+{
+ const char *basename = NULL;
+ xlator_t *this = NULL;
+ struct timeval tv = {
+ 0,
+ };
+ int ret = 0;
+ int fd = -1;
+ char msg[2048] = {
+ 0,
+ };
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ glusterfs_ctx_t *ctx = NULL;
+ int wlen = 0;
+ int priority;
+ struct rusage r_usage;
+
+ this = THIS;
+ ctx = this->ctx;
+
+ if (!ctx)
+ goto out;
+
+ if (skip_logging(this, level))
+ goto out;
+
+ if (!domain || !file || !function) {
+ fprintf(stderr, "logging: %s:%s():%d: invalid argument\n", __FILE__,
+ __PRETTY_FUNCTION__, __LINE__);
+ return -1;
+ }
+
+ GET_FILE_NAME_TO_LOG(file, basename);
+
+ ret = gettimeofday(&tv, NULL);
+ if (-1 == ret)
+ goto out;
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
+
+ /* TODO: Currently we print in the enhanced format, with a message ID
+ * of 0. Need to enhance this to support format as configured */
+ wlen = snprintf(
+ msg, sizeof msg,
+ "[%s] %c [MSGID: %" PRIu64
+ "]"
+ " [%s:%d:%s] %s: no memory "
+ "available for size (%" GF_PRI_SIZET
+ ") current memory usage in kilobytes %ld"
+ " [call stack follows]\n",
+ timestr, gf_level_strings[level], (uint64_t)0, basename, line, function,
+ domain, size,
+ (!getrusage(RUSAGE_SELF, &r_usage) ? r_usage.ru_maxrss : 0));
+ if (-1 == wlen) {
+ ret = -1;
+ goto out;
+ }
+
+ /* log to the configured logging service */
+ switch (ctx->log.logger) {
+ case gf_logger_syslog:
+ if (ctx->log.log_control_file_found && ctx->log.gf_log_syslog) {
+ SET_LOG_PRIO(level, priority);
+
+ /* if syslog allocates, then this may fail, but we
+ * cannot do much about it at the moment */
+ /* There is no fd for syslog, hence no stack printed */
+ syslog(priority, "%s", msg);
+ break;
+ }
+ /* NOTE: If syslog control file is absent, which is another
+ * way to control logging to syslog, then we will fall through
+ * to the gluster log. The ideal way to do things would be to
+ * not have the extra control file check */
+ case gf_logger_glusterlog:
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ fd = ctx->log.logfile ? fileno(ctx->log.logfile)
+ : fileno(stderr);
+ if (fd == -1) {
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+ goto out;
}
- pthread_mutex_lock (&logfile_mutex);
- {
- if (logfile)
- fclose (logfile);
-
- gf_log_logfile = logfile = new_logfile;
+ /* write directly to the fd to prevent out of order
+ * message and stack */
+ ret = sys_write(fd, msg, wlen);
+ if (ret == -1) {
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+ goto out;
}
- pthread_mutex_unlock (&logfile_mutex);
+#ifdef GF_LINUX_HOST_OS
+ /* We want only serious log in 'syslog', not our debug
+ * and trace logs */
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level))
+ syslog((level - 1), "%s\n", msg);
+#endif
+ }
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
- }
+ _gf_msg_backtrace_nomem(level, GF_LOG_BACKTRACE_DEPTH);
-log:
- ret = gettimeofday (&tv, NULL);
- if (-1 == ret)
- goto out;
- va_start (ap, fmt);
- gf_time_fmt (timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
- snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, tv.tv_usec);
+ break;
+ }
- basename = strrchr (file, '/');
- if (basename)
- basename++;
- else
- basename = file;
+out:
+ return ret;
+}
+
+static int
+gf_log_syslog(glusterfs_ctx_t *ctx, const char *domain, const char *file,
+ const char *function, int32_t line, gf_loglevel_t level,
+ int errnum, uint64_t msgid, char **appmsgstr, char *callstr,
+ int graph_id, gf_log_format_t fmt)
+{
+ int priority;
+
+ SET_LOG_PRIO(level, priority);
+
+ /* log with appropriate format */
+ switch (fmt) {
+ case gf_logformat_traditional:
+ if (!callstr) {
+ if (errnum)
+ syslog(priority, "[%s:%d:%s] %d-%s: %s [%s]", file, line,
+ function, graph_id, domain, *appmsgstr,
+ strerror(errnum));
+ else
+ syslog(priority, "[%s:%d:%s] %d-%s: %s", file, line,
+ function, graph_id, domain, *appmsgstr);
+ } else {
+ if (errnum)
+ syslog(priority,
+ "[%s:%d:%s] %s %d-%s:"
+ " %s [%s]",
+ file, line, function, callstr, graph_id, domain,
+ *appmsgstr, strerror(errnum));
+ else
+ syslog(priority, "[%s:%d:%s] %s %d-%s: %s", file, line,
+ function, callstr, graph_id, domain, *appmsgstr);
+ }
+ break;
+ case gf_logformat_withmsgid:
+ if (!callstr) {
+ if (errnum)
+ syslog(priority,
+ "[MSGID: %" PRIu64
+ "]"
+ " [%s:%d:%s] %d-%s: %s [%s]",
+ msgid, file, line, function, graph_id, domain,
+ *appmsgstr, strerror(errnum));
+ else
+ syslog(priority,
+ "[MSGID: %" PRIu64
+ "]"
+ " [%s:%d:%s] %d-%s: %s",
+ msgid, file, line, function, graph_id, domain,
+ *appmsgstr);
+ } else {
+ if (errnum)
+ syslog(priority,
+ "[MSGID: %" PRIu64
+ "]"
+ " [%s:%d:%s] %s %d-%s: %s [%s]",
+ msgid, file, line, function, callstr, graph_id,
+ domain, *appmsgstr, strerror(errnum));
+ else
+ syslog(priority,
+ "[MSGID: %" PRIu64
+ "]"
+ " [%s:%d:%s] %s %d-%s: %s",
+ msgid, file, line, function, callstr, graph_id,
+ domain, *appmsgstr);
+ }
+ break;
+ case gf_logformat_cee:
+ /* TODO: Enhance CEE with additional parameters */
+ gf_syslog(priority, "[%s:%d:%s] %d-%s: %s", file, line, function,
+ graph_id, domain, *appmsgstr);
+ break;
+
+ default:
+ /* NOTE: should not get here without logging */
+ break;
+ }
+
+ /* TODO: There can be no errors from gf_syslog? */
+ return 0;
+}
- ret = gf_asprintf (&str1, "[%s] %s [%s:%d:%s] %d-%s: ",
- timestr, level_strings[level],
- basename, line, function,
- ((this->graph)?this->graph->id:0), domain);
- if (-1 == ret) {
- goto err;
+static int
+gf_log_glusterlog(glusterfs_ctx_t *ctx, const char *domain, const char *file,
+ const char *function, int32_t line, gf_loglevel_t level,
+ int errnum, uint64_t msgid, char **appmsgstr, char *callstr,
+ struct timeval tv, int graph_id, gf_log_format_t fmt)
+{
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char *header = NULL;
+ char *footer = NULL;
+ int ret = 0;
+
+ /* rotate if required */
+ gf_log_rotate(ctx);
+
+ /* format the time stamp */
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
+
+ /* generate footer */
+ if (errnum) {
+ ret = gf_asprintf(&footer, " [%s]\n", strerror(errnum));
+ } else {
+ ret = gf_asprintf(&footer, " \n");
+ }
+ if (-1 == ret) {
+ goto err;
+ }
+
+ /* generate message, inc. the header */
+ if (fmt == gf_logformat_traditional) {
+ if (!callstr) {
+ ret = gf_asprintf(&header,
+ "[%s] %c [%s:%d:%s]"
+ " %d-%s: %s",
+ timestr, gf_level_strings[level], file, line,
+ function, graph_id, domain, *appmsgstr);
+ } else {
+ ret = gf_asprintf(&header,
+ "[%s] %c [%s:%d:%s] %s"
+ " %d-%s: %s",
+ timestr, gf_level_strings[level], file, line,
+ function, callstr, graph_id, domain, *appmsgstr);
+ }
+ } else { /* gf_logformat_withmsgid */
+ /* CEE log format unsupported in logger_glusterlog, so just
+ * print enhanced log format */
+ if (!callstr) {
+ ret = gf_asprintf(&header,
+ "[%s] %c [MSGID: %" PRIu64
+ "]"
+ " [%s:%d:%s] %d-%s: %s",
+ timestr, gf_level_strings[level], msgid, file,
+ line, function, graph_id, domain, *appmsgstr);
+ } else {
+ ret = gf_asprintf(&header,
+ "[%s] %c [MSGID: %" PRIu64
+ "]"
+ " [%s:%d:%s] %s %d-%s: %s",
+ timestr, gf_level_strings[level], msgid, file,
+ line, function, callstr, graph_id, domain,
+ *appmsgstr);
+ }
+ }
+ if (-1 == ret) {
+ goto err;
+ }
+
+ /* send the full message to log */
+
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ fprintf(ctx->log.logfile, "%s%s", header, footer);
+ fflush(ctx->log.logfile);
+ } else if (ctx->log.loglevel >= level) {
+ fprintf(stderr, "%s%s", header, footer);
+ fflush(stderr);
}
- ret = vasprintf (&str2, fmt, ap);
- if (-1 == ret) {
- goto err;
+#ifdef GF_LINUX_HOST_OS
+ /* We want only serious logs in 'syslog', not our debug
+ * and trace logs */
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level)) {
+ syslog((level - 1), "%s%s", header, footer);
}
+#endif
+ }
- va_end (ap);
+ /* TODO: Plugin in memory log buffer retention here. For logs not
+ * flushed during cores, it would be useful to retain some of the last
+ * few messages in memory */
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+ ret = 0;
- len = strlen (str1);
- msg = GF_MALLOC (len + strlen (str2) + 1, gf_common_mt_char);
+err:
+ GF_FREE(header);
+ GF_FREE(footer);
- strcpy (msg, str1);
- strcpy (msg + len, str2);
+ return ret;
+}
- pthread_mutex_lock (&logfile_mutex);
- {
+static int
+gf_syslog_log_repetitions(const char *domain, const char *file,
+ const char *function, int32_t line,
+ gf_loglevel_t level, int errnum, uint64_t msgid,
+ char **appmsgstr, char *callstr, int refcount,
+ struct timeval oldest, struct timeval latest,
+ int graph_id)
+{
+ int priority;
+ char timestr_latest[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char timestr_oldest[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+
+ SET_LOG_PRIO(level, priority);
+
+ gf_time_fmt_tv(timestr_latest, sizeof timestr_latest, &latest,
+ gf_timefmt_FT);
+ gf_time_fmt_tv(timestr_oldest, sizeof timestr_oldest, &oldest,
+ gf_timefmt_FT);
+
+ if (errnum) {
+ syslog(priority,
+ "The message \"[MSGID: %" PRIu64
+ "] [%s:%d:%s] "
+ "%d-%s: %s [%s] \" repeated %d times between %s"
+ " and %s",
+ msgid, file, line, function, graph_id, domain, *appmsgstr,
+ strerror(errnum), refcount, timestr_oldest, timestr_latest);
+ } else {
+ syslog(priority,
+ "The message \"[MSGID: %" PRIu64
+ "] [%s:%d:%s] "
+ "%d-%s: %s \" repeated %d times between %s"
+ " and %s",
+ msgid, file, line, function, graph_id, domain, *appmsgstr,
+ refcount, timestr_oldest, timestr_latest);
+ }
+ return 0;
+}
- if (logfile) {
- fprintf (logfile, "%s\n", msg);
- fflush (logfile);
- } else {
- fprintf (stderr, "%s\n", msg);
- }
+static int
+gf_glusterlog_log_repetitions(glusterfs_ctx_t *ctx, const char *domain,
+ const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, int errnum,
+ uint64_t msgid, char **appmsgstr, char *callstr,
+ int refcount, struct timeval oldest,
+ struct timeval latest, int graph_id)
+{
+ int ret = 0;
+ char timestr_latest[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char timestr_oldest[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char errstr[256] = {
+ 0,
+ };
+ char *header = NULL;
+ char *footer = NULL;
+
+ if (!ctx)
+ goto err;
+
+ gf_log_rotate(ctx);
+
+ ret = gf_asprintf(&header,
+ "The message \"%c [MSGID: %" PRIu64
+ "]"
+ " [%s:%d:%s] %d-%s: %s",
+ gf_level_strings[level], msgid, file, line, function,
+ graph_id, domain, *appmsgstr);
+ if (-1 == ret) {
+ goto err;
+ }
+
+ gf_time_fmt_tv(timestr_latest, sizeof timestr_latest, &latest,
+ gf_timefmt_FT);
+
+ gf_time_fmt_tv(timestr_oldest, sizeof timestr_oldest, &oldest,
+ gf_timefmt_FT);
+
+ if (errnum)
+ snprintf(errstr, sizeof(errstr) - 1, " [%s]", strerror(errnum));
+
+ ret = gf_asprintf(&footer, "%s\" repeated %d times between [%s] and [%s]",
+ errstr, refcount, timestr_oldest, timestr_latest);
+ if (-1 == ret) {
+ ret = -1;
+ goto err;
+ }
+
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ fprintf(ctx->log.logfile, "%s%s\n", header, footer);
+ fflush(ctx->log.logfile);
+ } else if (ctx->log.loglevel >= level) {
+ fprintf(stderr, "%s%s\n", header, footer);
+ fflush(stderr);
+ }
#ifdef GF_LINUX_HOST_OS
- /* We want only serious log in 'syslog', not our debug
- and trace logs */
- if (gf_log_syslog && level && (level <= sys_log_level))
- syslog ((level-1), "%s\n", msg);
+ /* We want only serious logs in 'syslog', not our debug
+ * and trace logs */
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level))
+ syslog((level - 1), "%s%s\n", header, footer);
#endif
- }
+ }
- pthread_mutex_unlock (&logfile_mutex);
+ /* TODO: Plugin in memory log buffer retention here. For logs not
+ * flushed during cores, it would be useful to retain some of the last
+ * few messages in memory */
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+ ret = 0;
err:
- if (msg) {
- GF_FREE (msg);
+ GF_FREE(header);
+ GF_FREE(footer);
+
+ return ret;
+}
+
+static int
+gf_log_print_with_repetitions(glusterfs_ctx_t *ctx, const char *domain,
+ const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, int errnum,
+ uint64_t msgid, char **appmsgstr, char *callstr,
+ int refcount, struct timeval oldest,
+ struct timeval latest, int graph_id)
+{
+ int ret = -1;
+ gf_log_logger_t logger = ctx->log.logger;
+
+ switch (logger) {
+ case gf_logger_syslog:
+ if (ctx->log.log_control_file_found && ctx->log.gf_log_syslog) {
+ ret = gf_syslog_log_repetitions(
+ domain, file, function, line, level, errnum, msgid,
+ appmsgstr, callstr, refcount, oldest, latest, graph_id);
+ break;
+ }
+ /* NOTE: If syslog control file is absent, which is another
+ * way to control logging to syslog, then we will fall through
+ * to the gluster log. The ideal way to do things would be to
+ * not have the extra control file check */
+
+ case gf_logger_glusterlog:
+ ret = gf_glusterlog_log_repetitions(
+ ctx, domain, file, function, line, level, errnum, msgid,
+ appmsgstr, callstr, refcount, oldest, latest, graph_id);
+ break;
+ }
+
+ return ret;
+}
+
+static int
+gf_log_print_plain_fmt(glusterfs_ctx_t *ctx, const char *domain,
+ const char *file, const char *function, int32_t line,
+ gf_loglevel_t level, int errnum, uint64_t msgid,
+ char **appmsgstr, char *callstr, struct timeval tv,
+ int graph_id, gf_log_format_t fmt)
+{
+ int ret = -1;
+ gf_log_logger_t logger = 0;
+
+ logger = ctx->log.logger;
+
+ /* log to the configured logging service */
+ switch (logger) {
+ case gf_logger_syslog:
+ if (ctx->log.log_control_file_found && ctx->log.gf_log_syslog) {
+ ret = gf_log_syslog(ctx, domain, file, function, line, level,
+ errnum, msgid, appmsgstr, callstr, graph_id,
+ fmt);
+ break;
+ }
+ /* NOTE: If syslog control file is absent, which is another
+ * way to control logging to syslog, then we will fall through
+ * to the gluster log. The ideal way to do things would be to
+ * not have the extra control file check */
+ case gf_logger_glusterlog:
+ ret = gf_log_glusterlog(ctx, domain, file, function, line, level,
+ errnum, msgid, appmsgstr, callstr, tv,
+ graph_id, fmt);
+ break;
+ }
+
+ return ret;
+}
+
+void
+gf_log_flush_message(log_buf_t *buf, glusterfs_ctx_t *ctx)
+{
+ if (buf->refcount == 1) {
+ (void)gf_log_print_plain_fmt(ctx, buf->domain, buf->file, buf->function,
+ buf->line, buf->level, buf->errnum,
+ buf->msg_id, &buf->msg, NULL, buf->latest,
+ buf->graph_id, gf_logformat_withmsgid);
+ }
+
+ if (buf->refcount > 1) {
+ gf_log_print_with_repetitions(
+ ctx, buf->domain, buf->file, buf->function, buf->line, buf->level,
+ buf->errnum, buf->msg_id, &buf->msg, NULL, buf->refcount,
+ buf->oldest, buf->latest, buf->graph_id);
+ }
+ return;
+}
+
+static void
+gf_log_flush_list(struct list_head *copy, glusterfs_ctx_t *ctx)
+{
+ log_buf_t *iter = NULL;
+ log_buf_t *tmp = NULL;
+
+ list_for_each_entry_safe(iter, tmp, copy, msg_list)
+ {
+ gf_log_flush_message(iter, ctx);
+ list_del_init(&iter->msg_list);
+ log_buf_destroy(iter);
+ }
+}
+
+void
+gf_log_flush_msgs(glusterfs_ctx_t *ctx)
+{
+ struct list_head copy;
+
+ INIT_LIST_HEAD(&copy);
+
+ pthread_mutex_lock(&ctx->log.log_buf_lock);
+ {
+ list_splice_init(&ctx->log.lru_queue, &copy);
+ ctx->log.lru_cur_size = 0;
+ }
+ pthread_mutex_unlock(&ctx->log.log_buf_lock);
+
+ gf_log_flush_list(&copy, ctx);
+
+ return;
+}
+
+static void
+gf_log_flush_extra_msgs(glusterfs_ctx_t *ctx, uint32_t new)
+{
+ int count = 0;
+ int i = 0;
+ log_buf_t *iter = NULL;
+ log_buf_t *tmp = NULL;
+ struct list_head copy;
+
+ INIT_LIST_HEAD(&copy);
+
+ /* If the number of outstanding log messages does not cause list
+ * overflow even after reducing the size of the list, then do nothing.
+ * Otherwise (that is if there are more items in the list than there
+ * need to be after reducing its size), move the least recently used
+ * 'diff' elements to be flushed into a separate list...
+ */
+
+ pthread_mutex_lock(&ctx->log.log_buf_lock);
+ {
+ if (ctx->log.lru_cur_size <= new)
+ goto unlock;
+ count = ctx->log.lru_cur_size - new;
+ list_for_each_entry_safe(iter, tmp, &ctx->log.lru_queue, msg_list)
+ {
+ if (i == count)
+ break;
+
+ list_del_init(&iter->msg_list);
+ list_add_tail(&iter->msg_list, &copy);
+ i++;
}
+ ctx->log.lru_cur_size = ctx->log.lru_cur_size - count;
+ }
+ // ... quickly unlock ...
+unlock:
+ pthread_mutex_unlock(&ctx->log.log_buf_lock);
+ if (list_empty(&copy))
+ return;
+
+ TEST_LOG(
+ "Log buffer size reduced. About to flush %d extra log "
+ "messages",
+ count);
+ // ... and then flush them outside the lock.
+ gf_log_flush_list(&copy, ctx);
+ TEST_LOG("Just flushed %d extra log messages", count);
+
+ return;
+}
+
+static int
+__gf_log_inject_timer_event(glusterfs_ctx_t *ctx)
+{
+ int ret = -1;
+ struct timespec timeout = {
+ 0,
+ };
+
+ if (!ctx)
+ goto out;
+
+ if (ctx->log.log_flush_timer) {
+ gf_timer_call_cancel(ctx, ctx->log.log_flush_timer);
+ ctx->log.log_flush_timer = NULL;
+ }
+
+ timeout.tv_sec = ctx->log.timeout;
+ timeout.tv_nsec = 0;
- if (str1)
- GF_FREE (str1);
+ TEST_LOG("Starting timer now. Timeout = %u, current buf size = %d",
+ ctx->log.timeout, ctx->log.lru_size);
+ ctx->log.log_flush_timer = gf_timer_call_after(
+ ctx, timeout, gf_log_flush_timeout_cbk, (void *)ctx);
+ if (!ctx->log.log_flush_timer)
+ goto out;
- if (str2)
- FREE (str2);
+ ret = 0;
out:
- return (0);
+ return ret;
}
int
-gf_log_eh (void *data)
+gf_log_inject_timer_event(glusterfs_ctx_t *ctx)
{
- int ret = -1;
+ int ret = -1;
- ret = eh_save_history (THIS->history, data);
+ if (!ctx)
+ return -1;
- return ret;
+ pthread_mutex_lock(&ctx->log.log_buf_lock);
+ {
+ ret = __gf_log_inject_timer_event(ctx);
+ }
+ pthread_mutex_unlock(&ctx->log.log_buf_lock);
+
+ return ret;
}
-int
-gf_cmd_log_init (const char *filename)
+void
+gf_log_flush_timeout_cbk(void *data)
{
- int fd = -1;
- xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
- this = THIS;
+ ctx = (glusterfs_ctx_t *)data;
- if (!filename){
- gf_log (this->name, GF_LOG_CRITICAL, "gf_cmd_log_init: no "
- "filename specified\n");
- return -1;
+ TEST_LOG(
+ "Log timer timed out. About to flush outstanding messages if "
+ "present");
+ gf_log_flush_msgs(ctx);
+
+ (void)gf_log_inject_timer_event(ctx);
+
+ return;
+}
+
+static int
+_gf_msg_internal(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, int errnum, uint64_t msgid,
+ char **appmsgstr, char *callstr, int graph_id)
+{
+ int ret = -1;
+ uint32_t size = 0;
+ const char *basename = NULL;
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ log_buf_t *iter = NULL;
+ log_buf_t *buf_tmp = NULL;
+ log_buf_t *buf_new = NULL;
+ log_buf_t *first = NULL;
+ struct timeval tv = {
+ 0,
+ };
+ gf_boolean_t found = _gf_false;
+ gf_boolean_t flush_lru = _gf_false;
+ gf_boolean_t flush_logged_msg = _gf_false;
+
+ this = THIS;
+ ctx = this->ctx;
+
+ if (!ctx)
+ goto out;
+
+ GET_FILE_NAME_TO_LOG(file, basename);
+
+ ret = gettimeofday(&tv, NULL);
+ if (ret)
+ goto out;
+
+ /* If this function is called via _gf_msg_callingfn () (indicated by a
+ * non-NULL callstr), or if the logformat is traditional, flush the
+ * message directly to disk.
+ */
+
+ if ((callstr) || (ctx->log.logformat == gf_logformat_traditional)) {
+ ret = gf_log_print_plain_fmt(ctx, domain, basename, function, line,
+ level, errnum, msgid, appmsgstr, callstr,
+ tv, graph_id, gf_logformat_traditional);
+ goto out;
+ }
+
+ pthread_mutex_lock(&ctx->log.log_buf_lock);
+ {
+ /* Check if the msg being logged is already part of the list */
+ list_for_each_entry_safe_reverse(iter, buf_tmp, &ctx->log.lru_queue,
+ msg_list)
+ {
+ if (first == NULL)
+ // Remember the first (lru) element in first ptr
+ first = iter;
+
+ /* Try to fail the search early on by doing the less
+ * expensive integer comparisons and continue to string
+ * parameter comparisons only after all int parameters
+ * are found to be matching.
+ */
+ if (line != iter->line)
+ continue;
+
+ if (errnum != iter->errnum)
+ continue;
+
+ if (msgid != iter->msg_id)
+ continue;
+
+ if (level != iter->level)
+ continue;
+
+ if (graph_id != iter->graph_id)
+ continue;
+
+ if (strcmp(domain, iter->domain))
+ continue;
+
+ if (strcmp(basename, iter->file))
+ continue;
+
+ if (strcmp(function, iter->function))
+ continue;
+
+ if (strcmp(*appmsgstr, iter->msg))
+ continue;
+
+ // Ah! Found a match!
+ list_move_tail(&iter->msg_list, &ctx->log.lru_queue);
+ iter->refcount++;
+ found = _gf_true;
+ // Update the 'latest' timestamp.
+ memcpy((void *)&(iter->latest), (void *)&tv,
+ sizeof(struct timeval));
+ break;
+ }
+ if (found) {
+ ret = 0;
+ goto unlock;
+ }
+ // else ...
+
+ size = ctx->log.lru_size;
+ /* If the upper limit on the log buf size is 0, flush the msg to
+ * disk directly after unlock. There's no need to buffer the
+ * msg here.
+ */
+ if (size == 0) {
+ flush_logged_msg = _gf_true;
+ goto unlock;
+ } else if (((ctx->log.lru_cur_size + 1) > size) && (first)) {
+ /* If the list is full, flush the lru msg to disk and also
+ * release it after unlock, and ...
+ * */
+ if (first->refcount >= 1)
+ TEST_LOG(
+ "Buffer overflow of a buffer whose size limit "
+ "is %d. About to flush least recently used log"
+ " message to disk",
+ size);
+ list_del_init(&first->msg_list);
+ ctx->log.lru_cur_size--;
+ flush_lru = _gf_true;
+ }
+ /* create a new list element, initialise and enqueue it.
+ * Additionally, this being the first occurrence of the msg,
+ * log it directly to disk after unlock. */
+ buf_new = mem_get0(THIS->ctx->logbuf_pool);
+ if (!buf_new) {
+ ret = -1;
+ goto unlock;
+ }
+ ret = log_buf_init(buf_new, domain, basename, function, line, level,
+ errnum, msgid, appmsgstr, graph_id);
+ if (ret) {
+ log_buf_destroy(buf_new);
+ goto unlock;
}
- cmd_log_filename = gf_strdup (filename);
- if (!cmd_log_filename) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "gf_cmd_log_init: strdup error\n");
+ memcpy((void *)&(buf_new->latest), (void *)&tv, sizeof(struct timeval));
+ memcpy((void *)&(buf_new->oldest), (void *)&tv, sizeof(struct timeval));
+
+ list_add_tail(&buf_new->msg_list, &ctx->log.lru_queue);
+ ctx->log.lru_cur_size++;
+ flush_logged_msg = _gf_true;
+ ret = 0;
+ }
+unlock:
+ pthread_mutex_unlock(&ctx->log.log_buf_lock);
+
+ /* Value of @ret is a don't-care below since irrespective of success or
+ * failure post setting of @flush_lru, @first must be flushed and freed.
+ */
+ if (flush_lru) {
+ gf_log_flush_message(first, ctx);
+ log_buf_destroy(first);
+ }
+ /* Similarly, irrespective of whether all operations since setting of
+ * @flush_logged_msg were successful or not, flush the message being
+ * logged to disk in the plain format.
+ */
+ if (flush_logged_msg) {
+ ret = gf_log_print_plain_fmt(ctx, domain, basename, function, line,
+ level, errnum, msgid, appmsgstr, callstr,
+ tv, graph_id, gf_logformat_withmsgid);
+ }
+
+out:
+ return ret;
+}
+
+int
+_gf_msg(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, int errnum, int trace,
+ uint64_t msgid, const char *fmt, ...)
+{
+ int ret = 0;
+ char *msgstr = NULL;
+ va_list ap;
+ xlator_t *this = THIS;
+ glusterfs_ctx_t *ctx = NULL;
+ char *callstr = NULL;
+ int log_inited = 0;
+
+ if (this == NULL)
+ return -1;
+
+ ctx = this->ctx;
+ if (ctx == NULL) {
+ /* messages before context initialization are ignored */
+ return -1;
+ }
+
+ /* check if we should be logging */
+ if (skip_logging(this, level))
+ goto out;
+
+ /* in args check */
+ if (!domain || !file || !function || !fmt) {
+ fprintf(stderr, "logging: %s:%s():%d: invalid argument\n", __FILE__,
+ __PRETTY_FUNCTION__, __LINE__);
+ return -1;
+ }
+
+ /* form the message */
+ va_start(ap, fmt);
+ ret = vasprintf(&msgstr, fmt, ap);
+ va_end(ap);
+
+ /* log */
+ if (ret != -1) {
+ if (trace) {
+ callstr = GF_MALLOC(GF_LOG_BACKTRACE_SIZE, gf_common_mt_char);
+ if (callstr == NULL)
return -1;
+
+ ret = _gf_msg_backtrace(GF_LOG_BACKTRACE_DEPTH, callstr,
+ GF_LOG_BACKTRACE_SIZE);
+ if (ret < 0) {
+ GF_FREE(callstr);
+ callstr = NULL;
+ }
+ }
+
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ log_inited = 1;
+ }
+ }
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+
+ if (!log_inited && ctx->log.gf_log_syslog) {
+ ret = gf_log_syslog(
+ ctx, domain, file, function, line, level, errnum, msgid,
+ &msgstr, (callstr ? callstr : NULL),
+ (this->graph) ? this->graph->id : 0, gf_logformat_traditional);
+ } else {
+ ret = _gf_msg_internal(domain, file, function, line, level, errnum,
+ msgid, &msgstr, (callstr ? callstr : NULL),
+ (this->graph) ? this->graph->id : 0);
}
- /* close and reopen cmdlogfile for log rotate*/
- if (cmdlogfile) {
- fclose (cmdlogfile);
- cmdlogfile = NULL;
+ } else {
+ /* man (3) vasprintf states on error strp contents
+ * are undefined, be safe */
+ msgstr = NULL;
+ }
+ if (callstr)
+ GF_FREE(callstr);
+ FREE(msgstr);
+
+out:
+ return ret;
+}
+
+/* TODO: Deprecate (delete) _gf_log, _gf_log_callingfn,
+ * once messages are changed to use _gf_msgXXX APIs for logging */
+int
+_gf_log(const char *domain, const char *file, const char *function, int line,
+ gf_loglevel_t level, const char *fmt, ...)
+{
+ const char *basename = NULL;
+ FILE *new_logfile = NULL;
+ va_list ap;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ struct timeval tv = {
+ 0,
+ };
+ char *logline = NULL;
+ char *msg = NULL;
+ int ret = 0;
+ int fd = -1;
+ xlator_t *this = THIS;
+ glusterfs_ctx_t *ctx = this->ctx;
+
+ if (!ctx)
+ goto out;
+
+ if (skip_logging(this, level))
+ goto out;
+
+ if (!domain || !file || !function || !fmt) {
+ fprintf(stderr, "logging: %s:%s():%d: invalid argument\n", __FILE__,
+ __PRETTY_FUNCTION__, __LINE__);
+ return -1;
+ }
+
+ basename = strrchr(file, '/');
+ if (basename)
+ basename++;
+ else
+ basename = file;
+
+ va_start(ap, fmt);
+ ret = vasprintf(&msg, fmt, ap);
+ va_end(ap);
+ if (-1 == ret) {
+ goto err;
+ }
+
+ if (ctx->log.log_control_file_found) {
+ int priority;
+ /* treat GF_LOG_TRACE and GF_LOG_NONE as LOG_DEBUG and
+ other level as is */
+ if (GF_LOG_TRACE == level || GF_LOG_NONE == level) {
+ priority = LOG_DEBUG;
+ } else {
+ priority = level - 1;
}
- fd = open (cmd_log_filename, O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
+ gf_syslog(priority, "[%s:%d:%s] %d-%s: %s", basename, line, function,
+ ((this->graph) ? this->graph->id : 0), domain, msg);
+ goto err;
+ }
+
+ if (ctx->log.logrotate) {
+ ctx->log.logrotate = 0;
+
+ fd = sys_open(ctx->log.filename, O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
if (fd < 0) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "%s", strerror (errno));
- return -1;
+ gf_smsg("logrotate", GF_LOG_ERROR, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, NULL);
+ return -1;
+ }
+ sys_close(fd);
+
+ new_logfile = fopen(ctx->log.filename, "a");
+ if (!new_logfile) {
+ gf_smsg("logrotate", GF_LOG_CRITICAL, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, "filename=%s",
+ ctx->log.filename, NULL);
+ goto log;
}
- close (fd);
- cmdlogfile = fopen (cmd_log_filename, "a");
- if (!cmdlogfile){
- gf_log (this->name, GF_LOG_CRITICAL,
- "gf_cmd_log_init: failed to open logfile \"%s\" "
- "(%s)\n", cmd_log_filename, strerror (errno));
- return -1;
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile)
+ fclose(ctx->log.logfile);
+
+ ctx->log.gf_log_logfile = ctx->log.logfile = new_logfile;
}
- return 0;
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+ }
+
+log:
+ ret = gettimeofday(&tv, NULL);
+ if (-1 == ret)
+ goto out;
+
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
+
+ ret = gf_asprintf(&logline, "[%s] %c [%s:%d:%s] %d-%s: %s\n", timestr,
+ gf_level_strings[level], basename, line, function,
+ ((this->graph) ? this->graph->id : 0), domain, msg);
+ if (-1 == ret) {
+ goto err;
+ }
+
+ pthread_mutex_lock(&ctx->log.logfile_mutex);
+ {
+ if (ctx->log.logfile) {
+ fputs(logline, ctx->log.logfile);
+ fflush(ctx->log.logfile);
+ } else if (ctx->log.loglevel >= level) {
+ fputs(logline, stderr);
+ fflush(stderr);
+ }
+
+#ifdef GF_LINUX_HOST_OS
+ /* We want only serious log in 'syslog', not our debug
+ and trace logs */
+ if (ctx->log.gf_log_syslog && level &&
+ (level <= ctx->log.sys_log_level))
+ syslog((level - 1), "%s", logline);
+#endif
+ }
+
+ pthread_mutex_unlock(&ctx->log.logfile_mutex);
+
+err:
+ GF_FREE(logline);
+
+ FREE(msg);
+
+out:
+ return (0);
}
int
-gf_cmd_log (const char *domain, const char *fmt, ...)
-{
- va_list ap;
- char timestr[64];
- struct timeval tv = {0,};
- char *str1 = NULL;
- char *str2 = NULL;
- char *msg = NULL;
- size_t len = 0;
- int ret = 0;
-
- if (!cmdlogfile)
- return -1;
+_gf_log_eh(const char *function, const char *fmt, ...)
+{
+ int ret = -1;
+ va_list ap;
+ char *logline = NULL;
+ char *msg = NULL;
+ xlator_t *this = NULL;
+ this = THIS;
- if (!domain || !fmt) {
- gf_log ("glusterd", GF_LOG_TRACE,
- "logging: invalid argument\n");
- return -1;
+ va_start(ap, fmt);
+ ret = vasprintf(&msg, fmt, ap);
+ va_end(ap);
+ if (-1 == ret) {
+ goto out;
+ }
+
+ ret = gf_asprintf(&logline, "[%d] %s: %s",
+ ((this->graph) ? this->graph->id : 0), function, msg);
+ if (-1 == ret) {
+ goto out;
+ }
+
+ ret = eh_save_history(this->history, logline);
+
+out:
+ GF_FREE(logline);
+
+ FREE(msg);
+
+ return ret;
+}
+
+int
+gf_cmd_log_init(const char *filename)
+{
+ int fd = -1;
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ this = THIS;
+ ctx = this->ctx;
+
+ if (!ctx)
+ return -1;
+
+ if (!filename) {
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0, LG_MSG_FILENAME_NOT_SPECIFIED,
+ "gf_cmd_log_init", NULL);
+ return -1;
+ }
+
+ ctx->log.cmd_log_filename = gf_strdup(filename);
+ if (!ctx->log.cmd_log_filename) {
+ return -1;
+ }
+ /* close and reopen cmdlogfile for log rotate*/
+ if (ctx->log.cmdlogfile) {
+ fclose(ctx->log.cmdlogfile);
+ ctx->log.cmdlogfile = NULL;
+ }
+
+ fd = sys_open(ctx->log.cmd_log_filename, O_CREAT | O_WRONLY | O_APPEND,
+ S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno, LG_MSG_OPEN_LOGFILE_FAILED,
+ "cmd_log_file", NULL);
+ return -1;
+ }
+
+ ctx->log.cmdlogfile = fdopen(fd, "a");
+ if (!ctx->log.cmdlogfile) {
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno, LG_MSG_OPEN_LOGFILE_FAILED,
+ "gf_cmd_log_init: %s", ctx->log.cmd_log_filename, NULL);
+ sys_close(fd);
+ return -1;
+ }
+ return 0;
+}
+
+int
+gf_cmd_log(const char *domain, const char *fmt, ...)
+{
+ va_list ap;
+ char timestr[GF_TIMESTR_SIZE];
+ struct timeval tv = {
+ 0,
+ };
+ char *logline = NULL;
+ char *msg = NULL;
+ int ret = 0;
+ int fd = -1;
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = THIS->ctx;
+
+ if (!ctx)
+ return -1;
+
+ if (!ctx->log.cmdlogfile)
+ return -1;
+
+ if (!domain || !fmt) {
+ gf_msg_trace("glusterd", 0, "logging: invalid argument\n");
+ return -1;
+ }
+
+ ret = gettimeofday(&tv, NULL);
+ if (ret == -1)
+ goto out;
+ va_start(ap, fmt);
+ ret = vasprintf(&msg, fmt, ap);
+ va_end(ap);
+ if (ret == -1) {
+ goto out;
+ }
+
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
+
+ ret = gf_asprintf(&logline, "[%s] %s : %s\n", timestr, domain, msg);
+ if (ret == -1) {
+ goto out;
+ }
+
+ /* close and reopen cmdlogfile fd for in case of log rotate*/
+ if (ctx->log.cmd_history_logrotate) {
+ ctx->log.cmd_history_logrotate = 0;
+
+ if (ctx->log.cmdlogfile) {
+ fclose(ctx->log.cmdlogfile);
+ ctx->log.cmdlogfile = NULL;
}
- ret = gettimeofday (&tv, NULL);
- if (ret == -1)
+ fd = sys_open(ctx->log.cmd_log_filename, O_CREAT | O_WRONLY | O_APPEND,
+ S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ gf_smsg(THIS->name, GF_LOG_CRITICAL, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, "name=%s",
+ ctx->log.cmd_log_filename, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ ctx->log.cmdlogfile = fdopen(fd, "a");
+ if (!ctx->log.cmdlogfile) {
+ gf_smsg(THIS->name, GF_LOG_CRITICAL, errno,
+ LG_MSG_OPEN_LOGFILE_FAILED, "name=%s",
+ ctx->log.cmd_log_filename, NULL);
+ ret = -1;
+ sys_close(fd);
+ goto out;
+ }
+ }
+
+ fputs(logline, ctx->log.cmdlogfile);
+ fflush(ctx->log.cmdlogfile);
+
+out:
+ GF_FREE(logline);
+
+ FREE(msg);
+
+ return ret;
+}
+
+static int
+_do_slog_format(int errnum, const char *event, va_list inp, char **msg)
+{
+ va_list valist_tmp;
+ int i = 0;
+ int j = 0;
+ int k = 0;
+ int ret = 0;
+ char *fmt = NULL;
+ char *buffer = NULL;
+ int num_format_chars = 0;
+ char format_char = '%';
+ char *tmp1 = NULL;
+ char *tmp2 = NULL;
+ char temp_sep[3] = "";
+
+ tmp2 = gf_strdup("");
+ if (!tmp2) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Hardcoded value for max key value pairs, exits early */
+ /* from loop if found NULL */
+ for (i = 0; i < GF_MAX_SLOG_PAIR_COUNT; i++) {
+ fmt = va_arg(inp, char *);
+ if (fmt == NULL) {
+ break;
+ }
+
+ /* Get number of times % is used in input for formatting, */
+ /* this count will be used to skip those many args from the */
+ /* main list and will be used to format inner format */
+ num_format_chars = 0;
+ for (k = 0; fmt[k] != '\0'; k++) {
+ /* If %% is used then that is escaped */
+ if (fmt[k] == format_char && fmt[k + 1] == format_char) {
+ k++;
+ } else if (fmt[k] == format_char) {
+ num_format_chars++;
+ }
+ }
+
+ tmp1 = gf_strdup(tmp2);
+ if (!tmp1) {
+ ret = -1;
+ goto out;
+ }
+
+ GF_FREE(tmp2);
+ tmp2 = NULL;
+
+ if (num_format_chars > 0) {
+ /* Make separate valist and format the string */
+ va_copy(valist_tmp, inp);
+ ret = gf_vasprintf(&buffer, fmt, valist_tmp);
+ if (ret < 0) {
+ va_end(valist_tmp);
goto out;
- va_start (ap, fmt);
- gf_time_fmt (timestr, sizeof timestr, tv.tv_sec, gf_timefmt_FT);
- snprintf (timestr + strlen (timestr), 256 - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, tv.tv_usec);
-
- ret = gf_asprintf (&str1, "[%s] %s : ",
- timestr, domain);
- if (ret == -1) {
+ }
+ va_end(valist_tmp);
+
+ for (j = 0; j < num_format_chars; j++) {
+ /* Skip the va_arg value since these values
+ are already used for internal formatting */
+ (void)va_arg(inp, void *);
+ }
+
+ ret = gf_asprintf(&tmp2, "%s%s{%s}", tmp1, temp_sep, buffer);
+ if (ret < 0)
goto out;
- }
- ret = vasprintf (&str2, fmt, ap);
- if (ret == -1) {
+ GF_FREE(buffer);
+ buffer = NULL;
+ } else {
+ ret = gf_asprintf(&tmp2, "%s%s{%s}", tmp1, temp_sep, fmt);
+ if (ret < 0)
goto out;
}
- va_end (ap);
+ /* Set seperator for next iteration */
+ temp_sep[0] = ',';
+ temp_sep[1] = ' ';
+ temp_sep[2] = 0;
+
+ GF_FREE(tmp1);
+ tmp1 = NULL;
+ }
+
+ tmp1 = gf_strdup(tmp2);
+ if (!tmp1) {
+ ret = -1;
+ goto out;
+ }
+ GF_FREE(tmp2);
+ tmp2 = NULL;
+
+ if (errnum) {
+ ret = gf_asprintf(&tmp2, "%s [%s%s{errno=%d}, {error=%s}]", event, tmp1,
+ temp_sep, errnum, strerror(errnum));
+ } else {
+ ret = gf_asprintf(&tmp2, "%s [%s]", event, tmp1);
+ }
+
+ if (ret == -1)
+ goto out;
+
+ *msg = gf_strdup(tmp2);
+ if (!*msg)
+ ret = -1;
- len = strlen (str1);
- msg = GF_MALLOC (len + strlen (str2) + 1, gf_common_mt_char);
+out:
+ if (buffer)
+ GF_FREE(buffer);
- strcpy (msg, str1);
- strcpy (msg + len, str2);
+ if (tmp1)
+ GF_FREE(tmp1);
- fprintf (cmdlogfile, "%s\n", msg);
- fflush (cmdlogfile);
+ if (tmp2)
+ GF_FREE(tmp2);
-out:
- if (msg) {
- GF_FREE (msg);
- }
+ return ret;
+}
+
+int
+_gf_smsg(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, int errnum, int trace,
+ uint64_t msgid, const char *event, ...)
+{
+ va_list valist;
+ char *msg = NULL;
+ int ret = 0;
+ xlator_t *this = THIS;
- if (str1)
- GF_FREE (str1);
+ if (skip_logging(this, level))
+ return ret;
- if (str2)
- FREE (str2);
+ va_start(valist, event);
+ ret = _do_slog_format(errnum, event, valist, &msg);
+ if (ret == -1)
+ goto out;
- return (0);
+ /* Pass errnum as zero since it is already formated as required */
+ ret = _gf_msg(domain, file, function, line, level, 0, trace, msgid, "%s",
+ msg);
+
+out:
+ va_end(valist);
+ if (msg)
+ GF_FREE(msg);
+ return ret;
}
diff --git a/libglusterfs/src/logging.h b/libglusterfs/src/logging.h
deleted file mode 100644
index bbf0d9a3891..00000000000
--- a/libglusterfs/src/logging.h
+++ /dev/null
@@ -1,148 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __LOGGING_H__
-#define __LOGGING_H__
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdint.h>
-#include <stdio.h>
-#include <stdarg.h>
-
-#ifdef GF_DARWIN_HOST_OS
-#define GF_PRI_FSBLK "u"
-#define GF_PRI_DEV PRId32
-#define GF_PRI_NLINK PRIu16
-#define GF_PRI_SUSECONDS "06d"
-#else
-#define GF_PRI_FSBLK PRIu64
-#define GF_PRI_DEV PRIu64
-#define GF_PRI_NLINK PRIu32
-#define GF_PRI_SUSECONDS "06ld"
-#endif
-#define GF_PRI_BLKSIZE PRId32
-#define GF_PRI_SIZET "zu"
-
-#if 0
-/* Syslog definitions :-) */
-#define LOG_EMERG 0 /* system is unusable */
-#define LOG_ALERT 1 /* action must be taken immediately */
-#define LOG_CRIT 2 /* critical conditions */
-#define LOG_ERR 3 /* error conditions */
-#define LOG_WARNING 4 /* warning conditions */
-#define LOG_NOTICE 5 /* normal but significant condition */
-#define LOG_INFO 6 /* informational */
-#define LOG_DEBUG 7 /* debug-level messages */
-#endif
-
-typedef enum {
- GF_LOG_NONE,
- GF_LOG_EMERG,
- GF_LOG_ALERT,
- GF_LOG_CRITICAL, /* fatal errors */
- GF_LOG_ERROR, /* major failures (not necessarily fatal) */
- GF_LOG_WARNING, /* info about normal operation */
- GF_LOG_NOTICE,
- GF_LOG_INFO, /* Normal information */
- GF_LOG_DEBUG, /* internal errors */
- GF_LOG_TRACE, /* full trace of operation */
-} gf_loglevel_t;
-
-extern gf_loglevel_t gf_log_loglevel;
-extern char gf_log_xl_log_set;
-
-#define FMT_WARN(fmt...) do { if (0) printf (fmt); } while (0)
-
-#define gf_log(dom, levl, fmt...) do { \
- FMT_WARN (fmt); \
- \
- if ((levl > gf_log_loglevel) && !gf_log_xl_log_set) \
- break; \
- _gf_log (dom, __FILE__, __FUNCTION__, __LINE__, \
- levl, ##fmt); \
- } while (0)
-
-#define gf_log_callingfn(dom, levl, fmt...) do { \
- FMT_WARN (fmt); \
- \
- if ((levl > gf_log_loglevel) && !gf_log_xl_log_set) \
- break; \
- _gf_log_callingfn (dom, __FILE__, __FUNCTION__, __LINE__, \
- levl, ##fmt); \
- } while (0)
-
-
-/* No malloc or calloc should be called in this function */
-#define gf_log_nomem(dom, levl, size) do { \
- if ((levl > gf_log_loglevel) && !gf_log_xl_log_set) \
- break; \
- _gf_log_nomem (dom, __FILE__, __FUNCTION__, __LINE__, \
- levl, size); \
- } while (0)
-
-
-/* Log once in GF_UNIVERSAL_ANSWER times */
-#define GF_LOG_OCCASIONALLY(var, args...) if (!(var++%GF_UNIVERSAL_ANSWER)) { \
- gf_log (args); \
- }
-
-
-void gf_log_logrotate (int signum);
-int
-gf_log_eh (void *data);
-void gf_log_globals_init (void);
-int gf_log_init (const char *filename);
-void gf_log_cleanup (void);
-
-int _gf_log (const char *domain, const char *file,
- const char *function, int32_t line, gf_loglevel_t level,
- const char *fmt, ...)
- __attribute__ ((__format__ (__printf__, 6, 7)));
-int _gf_log_callingfn (const char *domain, const char *file,
- const char *function, int32_t line, gf_loglevel_t level,
- const char *fmt, ...)
- __attribute__ ((__format__ (__printf__, 6, 7)));
-
-int _gf_log_nomem (const char *domain, const char *file,
- const char *function, int line, gf_loglevel_t level,
- size_t size);
-
-int gf_log_from_client (const char *msg, char *identifier);
-
-void gf_log_lock (void);
-void gf_log_unlock (void);
-
-void gf_log_disable_syslog (void);
-void gf_log_enable_syslog (void);
-gf_loglevel_t gf_log_get_loglevel (void);
-void gf_log_set_loglevel (gf_loglevel_t level);
-gf_loglevel_t gf_log_get_xl_loglevel (void *xl);
-void gf_log_set_xl_loglevel (void *xl, gf_loglevel_t level);
-
-#define GF_DEBUG(xl, format, args...) \
- gf_log ((xl)->name, GF_LOG_DEBUG, format, ##args)
-#define GF_INFO(xl, format, args...) \
- gf_log ((xl)->name, GF_LOG_INFO, format, ##args)
-#define GF_WARNING(xl, format, args...) \
- gf_log ((xl)->name, GF_LOG_WARNING, format, ##args)
-#define GF_ERROR(xl, format, args...) \
- gf_log ((xl)->name, GF_LOG_ERROR, format, ##args)
-
-int gf_cmd_log (const char *domain, const char *fmt, ...)
- __attribute__ ((__format__ (__printf__, 2, 3)));
-
-int gf_cmd_log_init (const char *filename);
-
-void set_sys_log_level (gf_loglevel_t level);
-#endif /* __LOGGING_H__ */
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
index 3e8100c6438..2d5a12b0a00 100644
--- a/libglusterfs/src/mem-pool.c
+++ b/libglusterfs/src/mem-pool.c
@@ -8,579 +8,925 @@
cases as published by the Free Software Foundation.
*/
-#include "mem-pool.h"
-#include "logging.h"
-#include "xlator.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/common-utils.h" // for GF_ASSERT, gf_thread_cr...
+#include "glusterfs/globals.h" // for xlator_t, THIS
#include <stdlib.h>
#include <stdarg.h>
-#define GF_MEM_POOL_LIST_BOUNDARY (sizeof(struct list_head))
-#define GF_MEM_POOL_PTR (sizeof(struct mem_pool*))
-#define GF_MEM_POOL_PAD_BOUNDARY (GF_MEM_POOL_LIST_BOUNDARY + GF_MEM_POOL_PTR + sizeof(int))
-#define mem_pool_chunkhead2ptr(head) ((head) + GF_MEM_POOL_PAD_BOUNDARY)
-#define mem_pool_ptr2chunkhead(ptr) ((ptr) - GF_MEM_POOL_PAD_BOUNDARY)
-#define is_mem_chunk_in_use(ptr) (*ptr == 1)
-#define mem_pool_from_ptr(ptr) ((ptr) + GF_MEM_POOL_LIST_BOUNDARY)
+#include "unittest/unittest.h"
+#include "glusterfs/libglusterfs-messages.h"
-#define GF_MEM_HEADER_SIZE (4 + sizeof (size_t) + sizeof (xlator_t *) + 4 + 8)
-#define GF_MEM_TRAILER_SIZE 8
+void
+gf_mem_acct_enable_set(void *data)
+{
+ glusterfs_ctx_t *ctx = NULL;
-#define GF_MEM_HEADER_MAGIC 0xCAFEBABE
-#define GF_MEM_TRAILER_MAGIC 0xBAADF00D
+ REQUIRE(data != NULL);
-#define GLUSTERFS_ENV_MEM_ACCT_STR "GLUSTERFS_DISABLE_MEM_ACCT"
+ ctx = data;
-static int gf_mem_acct_enable = 0;
+ GF_ASSERT(ctx != NULL);
-int
-gf_mem_acct_is_enabled ()
-{
- return gf_mem_acct_enable;
+ ctx->mem_acct_enable = 1;
+
+ ENSURE(1 == ctx->mem_acct_enable);
+
+ return;
}
-void
-gf_mem_acct_enable_set ()
+static void *
+gf_mem_header_prepare(struct mem_header *header, size_t size)
{
-#ifdef DEBUG
- gf_mem_acct_enable = 1;
- return;
-#endif
- glusterfs_ctx_t *ctx = NULL;
- char *opt = NULL;
- long val = -1;
-
- gf_mem_acct_enable = 0;
+ void *ptr;
- ctx = glusterfs_ctx_get ();
+ header->size = size;
- if (ctx->mem_accounting) {
- gf_mem_acct_enable = 1;
- return;
- }
+ ptr = header + 1;
- opt = getenv (GLUSTERFS_ENV_MEM_ACCT_STR);
- if (opt) {
- val = strtol (opt, NULL, 0);
- if (val)
- gf_mem_acct_enable = 1;
- }
+ /* data follows in this gap of 'size' bytes */
+ *(uint32_t *)(ptr + size) = GF_MEM_TRAILER_MAGIC;
- return;
+ return ptr;
}
-void
-gf_mem_set_acct_info (xlator_t *xl, char **alloc_ptr,
- size_t size, uint32_t type)
+static void *
+gf_mem_set_acct_info(struct mem_acct *mem_acct, struct mem_header *header,
+ size_t size, uint32_t type, const char *typestr)
{
+ struct mem_acct_rec *rec = NULL;
+ bool new_ref = false;
- char *ptr = NULL;
+ if (mem_acct != NULL) {
+ GF_ASSERT(type <= mem_acct->num_types);
- if (!alloc_ptr)
- return;
-
- ptr = (char *) (*alloc_ptr);
+ rec = &mem_acct->rec[type];
+ LOCK(&rec->lock);
+ {
+ if (!rec->typestr) {
+ rec->typestr = typestr;
+ }
+ rec->size += size;
+ new_ref = (rec->num_allocs == 0);
+ rec->num_allocs++;
+ rec->total_allocs++;
+ rec->max_size = max(rec->max_size, rec->size);
+ rec->max_num_allocs = max(rec->max_num_allocs, rec->num_allocs);
- if (!xl) {
- GF_ASSERT (0);
+#ifdef DEBUG
+ list_add(&header->acct_list, &rec->obj_list);
+#endif
}
+ UNLOCK(&rec->lock);
- if (!(xl->mem_acct.rec)) {
- GF_ASSERT (0);
+ /* We only take a reference for each memory type used, not for each
+ * allocation. This minimizes the use of atomic operations. */
+ if (new_ref) {
+ GF_ATOMIC_INC(mem_acct->refcnt);
}
+ }
- if (type > xl->mem_acct.num_types) {
- GF_ASSERT (0);
- }
+ header->type = type;
+ header->mem_acct = mem_acct;
+ header->magic = GF_MEM_HEADER_MAGIC;
+
+ return gf_mem_header_prepare(header, size);
+}
- LOCK(&xl->mem_acct.rec[type].lock);
+static void *
+gf_mem_update_acct_info(struct mem_acct *mem_acct, struct mem_header *header,
+ size_t size)
+{
+ struct mem_acct_rec *rec = NULL;
+
+ if (mem_acct != NULL) {
+ rec = &mem_acct->rec[header->type];
+ LOCK(&rec->lock);
{
- xl->mem_acct.rec[type].size += size;
- xl->mem_acct.rec[type].num_allocs++;
- xl->mem_acct.rec[type].total_allocs++;
- xl->mem_acct.rec[type].max_size =
- max (xl->mem_acct.rec[type].max_size,
- xl->mem_acct.rec[type].size);
- xl->mem_acct.rec[type].max_num_allocs =
- max (xl->mem_acct.rec[type].max_num_allocs,
- xl->mem_acct.rec[type].num_allocs);
+ rec->size += size - header->size;
+ rec->total_allocs++;
+ rec->max_size = max(rec->max_size, rec->size);
+
+#ifdef DEBUG
+ /* The old 'header' already was present in 'obj_list', but
+ * realloc() could have changed its address. We need to remove
+ * the old item from the list and add the new one. This can be
+ * done this way because list_move() doesn't use the pointers
+ * to the old location (which are not valid anymore) already
+ * present in the list, it simply overwrites them. */
+ list_move(&header->acct_list, &rec->obj_list);
+#endif
}
- UNLOCK(&xl->mem_acct.rec[type].lock);
-
- *(uint32_t *)(ptr) = type;
- ptr = ptr + 4;
- memcpy (ptr, &size, sizeof(size_t));
- ptr += sizeof (size_t);
- memcpy (ptr, &xl, sizeof(xlator_t *));
- ptr += sizeof (xlator_t *);
- *(uint32_t *)(ptr) = GF_MEM_HEADER_MAGIC;
- ptr = ptr + 4;
- ptr = ptr + 8; //padding
- *(uint32_t *) (ptr + size) = GF_MEM_TRAILER_MAGIC;
-
- *alloc_ptr = (void *)ptr;
- return;
+ UNLOCK(&rec->lock);
+ }
+
+ return gf_mem_header_prepare(header, size);
}
+static bool
+gf_mem_acct_enabled(void)
+{
+ xlator_t *x = THIS;
+ /* Low-level __gf_xxx() may be called
+ before ctx is initialized. */
+ return x->ctx && x->ctx->mem_acct_enable;
+}
void *
-__gf_calloc (size_t nmemb, size_t size, uint32_t type)
+__gf_calloc(size_t nmemb, size_t size, uint32_t type, const char *typestr)
{
- size_t tot_size = 0;
- size_t req_size = 0;
- char *ptr = NULL;
- xlator_t *xl = NULL;
+ size_t tot_size = 0;
+ size_t req_size = 0;
+ void *ptr = NULL;
+ xlator_t *xl = NULL;
- if (!gf_mem_acct_enable)
- return CALLOC (nmemb, size);
+ if (!gf_mem_acct_enabled())
+ return CALLOC(nmemb, size);
- xl = THIS;
+ xl = THIS;
- req_size = nmemb * size;
- tot_size = req_size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
+ req_size = nmemb * size;
+ tot_size = req_size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
- ptr = calloc (1, tot_size);
+ ptr = calloc(1, tot_size);
- if (!ptr) {
- gf_log_nomem ("", GF_LOG_ALERT, tot_size);
- return NULL;
- }
- gf_mem_set_acct_info (xl, &ptr, req_size, type);
+ if (!ptr) {
+ gf_msg_nomem("", GF_LOG_ALERT, tot_size);
+ return NULL;
+ }
- return (void *)ptr;
+ return gf_mem_set_acct_info(xl->mem_acct, ptr, req_size, type, typestr);
}
void *
-__gf_malloc (size_t size, uint32_t type)
+__gf_malloc(size_t size, uint32_t type, const char *typestr)
{
- size_t tot_size = 0;
- char *ptr = NULL;
- xlator_t *xl = NULL;
+ size_t tot_size = 0;
+ void *ptr = NULL;
+ xlator_t *xl = NULL;
- if (!gf_mem_acct_enable)
- return MALLOC (size);
+ if (!gf_mem_acct_enabled())
+ return MALLOC(size);
- xl = THIS;
+ xl = THIS;
- tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
+ tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
- ptr = malloc (tot_size);
- if (!ptr) {
- gf_log_nomem ("", GF_LOG_ALERT, tot_size);
- return NULL;
- }
- gf_mem_set_acct_info (xl, &ptr, size, type);
+ ptr = malloc(tot_size);
+ if (!ptr) {
+ gf_msg_nomem("", GF_LOG_ALERT, tot_size);
+ return NULL;
+ }
- return (void *)ptr;
+ return gf_mem_set_acct_info(xl->mem_acct, ptr, size, type, typestr);
}
void *
-__gf_realloc (void *ptr, size_t size)
+__gf_realloc(void *ptr, size_t size)
{
- size_t tot_size = 0;
- char *orig_ptr = NULL;
- xlator_t *xl = NULL;
- uint32_t type = 0;
+ size_t tot_size = 0;
+ struct mem_header *header = NULL;
+
+ if (!gf_mem_acct_enabled())
+ return REALLOC(ptr, size);
- if (!gf_mem_acct_enable)
- return REALLOC (ptr, size);
+ REQUIRE(NULL != ptr);
- tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
+ header = (struct mem_header *)(ptr - GF_MEM_HEADER_SIZE);
+ GF_ASSERT(header->magic == GF_MEM_HEADER_MAGIC);
- orig_ptr = (char *)ptr - 8 - 4;
+ tot_size = size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE;
+ header = realloc(header, tot_size);
+ if (!header) {
+ gf_msg_nomem("", GF_LOG_ALERT, tot_size);
+ return NULL;
+ }
- GF_ASSERT (*(uint32_t *)orig_ptr == GF_MEM_HEADER_MAGIC);
+ return gf_mem_update_acct_info(header->mem_acct, header, size);
+}
- orig_ptr = orig_ptr - sizeof(xlator_t *);
- xl = *((xlator_t **)orig_ptr);
+int
+gf_vasprintf(char **string_ptr, const char *format, va_list arg)
+{
+ va_list arg_save;
+ char *str = NULL;
+ int size = 0;
+ int rv = 0;
+
+ if (!string_ptr || !format)
+ return -1;
+
+ va_copy(arg_save, arg);
+
+ size = vsnprintf(NULL, 0, format, arg);
+ size++;
+ str = GF_MALLOC(size, gf_common_mt_asprintf);
+ if (str == NULL) {
+ /* log is done in GF_MALLOC itself */
+ va_end(arg_save);
+ return -1;
+ }
+ rv = vsnprintf(str, size, format, arg_save);
+
+ *string_ptr = str;
+ va_end(arg_save);
+ return (rv);
+}
- orig_ptr = (char *)ptr - GF_MEM_HEADER_SIZE;
- type = *(uint32_t *)orig_ptr;
+int
+gf_asprintf(char **string_ptr, const char *format, ...)
+{
+ va_list arg;
+ int rv = 0;
- ptr = realloc (orig_ptr, tot_size);
- if (!ptr) {
- gf_log_nomem ("", GF_LOG_ALERT, tot_size);
- return NULL;
- }
+ va_start(arg, format);
+ rv = gf_vasprintf(string_ptr, format, arg);
+ va_end(arg);
- gf_mem_set_acct_info (xl, (char **)&ptr, size, type);
+ return rv;
+}
- return (void *)ptr;
+#ifdef DEBUG
+void
+__gf_mem_invalidate(void *ptr)
+{
+ struct mem_header *header = ptr;
+ void *end = NULL;
+
+ struct mem_invalid inval = {
+ .magic = GF_MEM_INVALID_MAGIC,
+ .mem_acct = header->mem_acct,
+ .type = header->type,
+ .size = header->size,
+ .baseaddr = ptr + GF_MEM_HEADER_SIZE,
+ };
+
+ /* calculate the last byte of the allocated area */
+ end = ptr + GF_MEM_HEADER_SIZE + inval.size + GF_MEM_TRAILER_SIZE;
+
+ /* overwrite the old mem_header */
+ memcpy(ptr, &inval, sizeof(inval));
+ ptr += sizeof(inval);
+
+ /* zero out remaining (old) mem_header bytes) */
+ memset(ptr, 0x00, sizeof(*header) - sizeof(inval));
+ ptr += sizeof(*header) - sizeof(inval);
+
+ /* zero out the first byte of data */
+ *(uint32_t *)(ptr) = 0x00;
+ ptr += 1;
+
+ /* repeated writes of invalid structurein data area */
+ while ((ptr + (sizeof(inval))) < (end - 1)) {
+ memcpy(ptr, &inval, sizeof(inval));
+ ptr += sizeof(inval);
+ }
+
+ /* fill out remaining data area with 0xff */
+ memset(ptr, 0xff, end - ptr);
+}
+#endif /* DEBUG */
+
+/* Coverity taint NOTE: pointers passed to free, would operate on
+pointer-GF_MEM_HEADER_SIZE content and if the pointer was used for any IO
+related purpose, the pointer stands tainted, and hence coverity would consider
+access to the said region as tainted. The following directive to coverity hence
+sanitizes the pointer, thus removing any taint to the same within this function.
+If the pointer is accessed outside the scope of this function without any
+checks on content read from an IO operation, taints will still be reported, and
+needs appropriate addressing. */
+
+/* coverity[ +tainted_data_sanitize : arg-0 ] */
+static void
+gf_free_sanitize(void *s)
+{
}
-int
-gf_vasprintf (char **string_ptr, const char *format, va_list arg)
+void
+__gf_free(void *free_ptr)
{
- va_list arg_save;
- char *str = NULL;
- int size = 0;
- int rv = 0;
-
- if (!string_ptr || !format)
- return -1;
-
- va_copy (arg_save, arg);
-
- size = vsnprintf (NULL, 0, format, arg);
- size++;
- str = GF_MALLOC (size, gf_common_mt_asprintf);
- if (str == NULL) {
- /* log is done in GF_MALLOC itself */
- return -1;
+ void *ptr = NULL;
+ struct mem_acct *mem_acct;
+ struct mem_header *header = NULL;
+ bool last_ref = false;
+
+ if (!gf_mem_acct_enabled()) {
+ FREE(free_ptr);
+ return;
+ }
+
+ if (!free_ptr)
+ return;
+
+ gf_free_sanitize(free_ptr);
+ ptr = free_ptr - GF_MEM_HEADER_SIZE;
+ header = (struct mem_header *)ptr;
+
+ // Possible corruption, assert here
+ GF_ASSERT(GF_MEM_HEADER_MAGIC == header->magic);
+
+ mem_acct = header->mem_acct;
+ if (!mem_acct) {
+ goto free;
+ }
+
+ // This points to a memory overrun
+ GF_ASSERT(GF_MEM_TRAILER_MAGIC ==
+ *(uint32_t *)((char *)free_ptr + header->size));
+
+ LOCK(&mem_acct->rec[header->type].lock);
+ {
+ mem_acct->rec[header->type].size -= header->size;
+ mem_acct->rec[header->type].num_allocs--;
+ /* If all the instances are freed up then ensure typestr is set
+ * to NULL */
+ if (!mem_acct->rec[header->type].num_allocs) {
+ last_ref = true;
+ mem_acct->rec[header->type].typestr = NULL;
}
- rv = vsnprintf (str, size, format, arg_save);
+#ifdef DEBUG
+ list_del(&header->acct_list);
+#endif
+ }
+ UNLOCK(&mem_acct->rec[header->type].lock);
- *string_ptr = str;
- return (rv);
+ if (last_ref) {
+ xlator_mem_acct_unref(mem_acct);
+ }
+
+free:
+#ifdef DEBUG
+ __gf_mem_invalidate(ptr);
+#endif
+
+ FREE(ptr);
}
-int
-gf_asprintf (char **string_ptr, const char *format, ...)
+#if defined(GF_DISABLE_MEMPOOL)
+
+struct mem_pool *
+mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
+ unsigned long count, char *name)
{
- va_list arg;
- int rv = 0;
+ struct mem_pool *new;
- va_start (arg, format);
- rv = gf_vasprintf (string_ptr, format, arg);
- va_end (arg);
+ new = GF_MALLOC(sizeof(struct mem_pool), gf_common_mt_mem_pool);
+ if (!new)
+ return NULL;
- return rv;
+ new->sizeof_type = sizeof_type;
+ return new;
}
void
-__gf_free (void *free_ptr)
+mem_pool_destroy(struct mem_pool *pool)
{
- size_t req_size = 0;
- char *ptr = NULL;
- uint32_t type = 0;
- xlator_t *xl = NULL;
-
- if (!gf_mem_acct_enable) {
- FREE (free_ptr);
- return;
- }
+ GF_FREE(pool);
+}
- if (!free_ptr)
- return;
+#else /* !GF_DISABLE_MEMPOOL */
- ptr = (char *)free_ptr - 8 - 4;
+static pthread_mutex_t pool_lock = PTHREAD_MUTEX_INITIALIZER;
+static struct list_head pool_threads;
+static pthread_mutex_t pool_free_lock = PTHREAD_MUTEX_INITIALIZER;
+static struct list_head pool_free_threads;
+static struct mem_pool_shared pools[NPOOLS];
+static size_t pool_list_size;
- if (GF_MEM_HEADER_MAGIC != *(uint32_t *)ptr) {
- //Possible corruption, assert here
- GF_ASSERT (0);
- }
+static __thread per_thread_pool_list_t *thread_pool_list = NULL;
- *(uint32_t *)ptr = 0;
+#define N_COLD_LISTS 1024
+#define POOL_SWEEP_SECS 30
- ptr = ptr - sizeof(xlator_t *);
- memcpy (&xl, ptr, sizeof(xlator_t *));
+typedef struct {
+ pooled_obj_hdr_t *cold_lists[N_COLD_LISTS];
+ unsigned int n_cold_lists;
+} sweep_state_t;
- if (!xl) {
- //gf_free expects xl to be available
- GF_ASSERT (0);
- }
+enum init_state {
+ GF_MEMPOOL_INIT_NONE = 0,
+ GF_MEMPOOL_INIT_EARLY,
+ GF_MEMPOOL_INIT_LATE,
+ GF_MEMPOOL_INIT_DESTROY
+};
- if (!xl->mem_acct.rec) {
- ptr = (char *)free_ptr - GF_MEM_HEADER_SIZE;
- goto free;
+static enum init_state init_done = GF_MEMPOOL_INIT_NONE;
+static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
+static unsigned int init_count = 0;
+static pthread_t sweeper_tid;
+
+static bool
+collect_garbage(sweep_state_t *state, per_thread_pool_list_t *pool_list)
+{
+ unsigned int i;
+ per_thread_pool_t *pt_pool;
+
+ (void)pthread_spin_lock(&pool_list->lock);
+
+ for (i = 0; i < NPOOLS; ++i) {
+ pt_pool = &pool_list->pools[i];
+ if (pt_pool->cold_list) {
+ if (state->n_cold_lists >= N_COLD_LISTS) {
+ (void)pthread_spin_unlock(&pool_list->lock);
+ return true;
+ }
+ state->cold_lists[state->n_cold_lists++] = pt_pool->cold_list;
}
+ pt_pool->cold_list = pt_pool->hot_list;
+ pt_pool->hot_list = NULL;
+ }
+ (void)pthread_spin_unlock(&pool_list->lock);
- ptr = ptr - sizeof(size_t);
- memcpy (&req_size, ptr, sizeof (size_t));
- ptr = ptr - 4;
- type = *(uint32_t *)ptr;
+ return false;
+}
- if (GF_MEM_TRAILER_MAGIC != *(uint32_t *)
- ((char *)free_ptr + req_size)) {
- // This points to a memory overrun
- GF_ASSERT (0);
- }
- *(uint32_t *) ((char *)free_ptr + req_size) = 0;
+static void
+free_obj_list(pooled_obj_hdr_t *victim)
+{
+ pooled_obj_hdr_t *next;
- LOCK (&xl->mem_acct.rec[type].lock);
+ while (victim) {
+ next = victim->next;
+ free(victim);
+ victim = next;
+ }
+}
+
+static void *
+pool_sweeper(void *arg)
+{
+ sweep_state_t state;
+ per_thread_pool_list_t *pool_list;
+ uint32_t i;
+ bool pending;
+
+ /*
+ * This is all a bit inelegant, but the point is to avoid doing
+ * expensive things (like freeing thousands of objects) while holding a
+ * global lock. Thus, we split each iteration into two passes, with
+ * only the first and fastest holding the lock.
+ */
+
+ pending = true;
+
+ for (;;) {
+ /* If we know there's pending work to do (or it's the first run), we
+ * do collect garbage more often. */
+ sleep(pending ? POOL_SWEEP_SECS / 5 : POOL_SWEEP_SECS);
+
+ (void)pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
+ state.n_cold_lists = 0;
+ pending = false;
+
+ /* First pass: collect stuff that needs our attention. */
+ (void)pthread_mutex_lock(&pool_lock);
+ list_for_each_entry(pool_list, &pool_threads, thr_list)
{
- xl->mem_acct.rec[type].size -= req_size;
- xl->mem_acct.rec[type].num_allocs--;
+ if (collect_garbage(&state, pool_list)) {
+ pending = true;
+ }
}
- UNLOCK (&xl->mem_acct.rec[type].lock);
-free:
- FREE (ptr);
-}
+ (void)pthread_mutex_unlock(&pool_lock);
+ /* Second pass: free cold objects from live pools. */
+ for (i = 0; i < state.n_cold_lists; ++i) {
+ free_obj_list(state.cold_lists[i]);
+ }
+ (void)pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
+ }
+ return NULL;
+}
-struct mem_pool *
-mem_pool_new_fn (unsigned long sizeof_type,
- unsigned long count, char *name)
+void
+mem_pool_thread_destructor(per_thread_pool_list_t *pool_list)
{
- struct mem_pool *mem_pool = NULL;
- unsigned long padded_sizeof_type = 0;
- void *pool = NULL;
- int i = 0;
- int ret = 0;
- struct list_head *list = NULL;
- glusterfs_ctx_t *ctx = NULL;
-
- if (!sizeof_type || !count) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return NULL;
+ per_thread_pool_t *pt_pool;
+ uint32_t i;
+
+ if (pool_list == NULL) {
+ pool_list = thread_pool_list;
+ }
+
+ /* The current thread is terminating. None of the allocated objects will
+ * be used again. We can directly destroy them here instead of delaying
+ * it until the next sweeper loop. */
+ if (pool_list != NULL) {
+ /* Remove pool_list from the global list to avoid that sweeper
+ * could touch it. */
+ pthread_mutex_lock(&pool_lock);
+ list_del(&pool_list->thr_list);
+ pthread_mutex_unlock(&pool_lock);
+
+ /* We need to protect hot/cold changes from potential mem_put() calls
+ * that reference this pool_list. Once poison is set to true, we are
+ * sure that no one else will touch hot/cold lists. The only possible
+ * race is when at the same moment a mem_put() is adding a new item
+ * to the hot list. We protect from that by taking pool_list->lock.
+ * After that we don't need the lock to destroy the hot/cold lists. */
+ pthread_spin_lock(&pool_list->lock);
+ pool_list->poison = true;
+ pthread_spin_unlock(&pool_list->lock);
+
+ for (i = 0; i < NPOOLS; i++) {
+ pt_pool = &pool_list->pools[i];
+
+ free_obj_list(pt_pool->hot_list);
+ pt_pool->hot_list = NULL;
+
+ free_obj_list(pt_pool->cold_list);
+ pt_pool->cold_list = NULL;
}
- padded_sizeof_type = sizeof_type + GF_MEM_POOL_PAD_BOUNDARY;
- mem_pool = GF_CALLOC (sizeof (*mem_pool), 1, gf_common_mt_mem_pool);
- if (!mem_pool)
- return NULL;
+ pthread_mutex_lock(&pool_free_lock);
+ list_add(&pool_list->thr_list, &pool_free_threads);
+ pthread_mutex_unlock(&pool_free_lock);
- ret = gf_asprintf (&mem_pool->name, "%s:%s", THIS->name, name);
- if (ret < 0)
- return NULL;
+ thread_pool_list = NULL;
+ }
+}
- if (!mem_pool->name) {
- GF_FREE (mem_pool);
- return NULL;
- }
+static __attribute__((constructor)) void
+mem_pools_preinit(void)
+{
+ unsigned int i;
- LOCK_INIT (&mem_pool->lock);
- INIT_LIST_HEAD (&mem_pool->list);
- INIT_LIST_HEAD (&mem_pool->global_list);
+ INIT_LIST_HEAD(&pool_threads);
+ INIT_LIST_HEAD(&pool_free_threads);
- mem_pool->padded_sizeof_type = padded_sizeof_type;
- mem_pool->cold_count = count;
- mem_pool->real_sizeof_type = sizeof_type;
+ for (i = 0; i < NPOOLS; ++i) {
+ pools[i].power_of_two = POOL_SMALLEST + i;
- pool = GF_CALLOC (count, padded_sizeof_type, gf_common_mt_long);
- if (!pool) {
- GF_FREE (mem_pool->name);
- GF_FREE (mem_pool);
- return NULL;
- }
+ GF_ATOMIC_INIT(pools[i].allocs_hot, 0);
+ GF_ATOMIC_INIT(pools[i].allocs_cold, 0);
+ GF_ATOMIC_INIT(pools[i].allocs_stdc, 0);
+ GF_ATOMIC_INIT(pools[i].frees_to_list, 0);
+ }
- for (i = 0; i < count; i++) {
- list = pool + (i * (padded_sizeof_type));
- INIT_LIST_HEAD (list);
- list_add_tail (list, &mem_pool->list);
- }
+ pool_list_size = sizeof(per_thread_pool_list_t) +
+ sizeof(per_thread_pool_t) * (NPOOLS - 1);
- mem_pool->pool = pool;
- mem_pool->pool_end = pool + (count * (padded_sizeof_type));
+ init_done = GF_MEMPOOL_INIT_EARLY;
+}
- /* add this pool to the global list */
- ctx = glusterfs_ctx_get ();
- if (!ctx)
- goto out;
+static __attribute__((destructor)) void
+mem_pools_postfini(void)
+{
+ /* TODO: This function should destroy all per thread memory pools that
+ * are still alive, but this is not possible right now because glibc
+ * starts calling destructors as soon as exit() is called, and
+ * gluster doesn't ensure that all threads have been stopped before
+ * calling exit(). Existing threads would crash when they try to use
+ * memory or they terminate if we destroy things here.
+ *
+ * When we propertly terminate all threads, we can add the needed
+ * code here. Till then we need to leave the memory allocated. Most
+ * probably this function will be executed on process termination,
+ * so the memory will be released anyway by the system. */
+}
- list_add (&mem_pool->global_list, &ctx->mempool_list);
+/* Call mem_pools_init() once threading has been configured completely. This
+ * prevent the pool_sweeper thread from getting killed once the main() thread
+ * exits during deamonizing. */
+void
+mem_pools_init(void)
+{
+ pthread_mutex_lock(&init_mutex);
+ if ((init_count++) == 0) {
+ (void)gf_thread_create(&sweeper_tid, NULL, pool_sweeper, NULL,
+ "memsweep");
+
+ init_done = GF_MEMPOOL_INIT_LATE;
+ }
+ pthread_mutex_unlock(&init_mutex);
+}
-out:
- return mem_pool;
+void
+mem_pools_fini(void)
+{
+ pthread_mutex_lock(&init_mutex);
+ switch (init_count) {
+ case 0:
+ /*
+ * If init_count is already zero (as e.g. if somebody called this
+ * before mem_pools_init) then the sweeper was probably never even
+ * started so we don't need to stop it. Even if there's some crazy
+ * circumstance where there is a sweeper but init_count is still
+ * zero, that just means we'll leave it running. Not perfect, but
+ * far better than any known alternative.
+ */
+ break;
+ case 1: {
+ /* if mem_pools_init() was not called, sweeper_tid will be invalid
+ * and the functions will error out. That is not critical. In all
+ * other cases, the sweeper_tid will be valid and the thread gets
+ * stopped. */
+ (void)pthread_cancel(sweeper_tid);
+ (void)pthread_join(sweeper_tid, NULL);
+
+ /* There could be threads still running in some cases, so we can't
+ * destroy pool_lists in use. We can also not destroy unused
+ * pool_lists because some allocated objects may still be pointing
+ * to them. */
+ mem_pool_thread_destructor(NULL);
+
+ init_done = GF_MEMPOOL_INIT_DESTROY;
+ /* Fall through. */
+ }
+ default:
+ --init_count;
+ }
+ pthread_mutex_unlock(&init_mutex);
}
-void*
-mem_get0 (struct mem_pool *mem_pool)
+void
+mem_pool_destroy(struct mem_pool *pool)
{
- void *ptr = NULL;
+ if (!pool)
+ return;
- if (!mem_pool) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return NULL;
- }
+ /* remove this pool from the owner (glusterfs_ctx_t) */
+ LOCK(&pool->ctx->lock);
+ {
+ list_del(&pool->owner);
+ }
+ UNLOCK(&pool->ctx->lock);
+
+ /* free this pool, but keep the mem_pool_shared */
+ GF_FREE(pool);
+
+ /*
+ * Pools are now permanent, so the mem_pool->pool is kept around. All
+ * of the objects *in* the pool will eventually be freed via the
+ * pool-sweeper thread, and this way we don't have to add a lot of
+ * reference-counting complexity.
+ */
+}
- ptr = mem_get(mem_pool);
+struct mem_pool *
+mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
+ unsigned long count, char *name)
+{
+ unsigned long extra_size, size;
+ unsigned int power;
+ struct mem_pool *new = NULL;
+ struct mem_pool_shared *pool = NULL;
+
+ if (!sizeof_type) {
+ gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return NULL;
+ }
+
+ /* This is the overhead we'll have because of memory accounting for each
+ * memory block. */
+ extra_size = sizeof(pooled_obj_hdr_t);
+
+ /* We need to compute the total space needed to hold the data type and
+ * the header. Given that the smallest block size we have in the pools
+ * is 2^POOL_SMALLEST, we need to take the MAX(size, 2^POOL_SMALLEST).
+ * However, since this value is only needed to compute its rounded
+ * logarithm in base 2, and this only depends on the highest bit set,
+ * we can simply do a bitwise or with the minimum size. We need to
+ * subtract 1 for correct handling of sizes that are exactly a power
+ * of 2. */
+ size = (sizeof_type + extra_size - 1UL) | ((1UL << POOL_SMALLEST) - 1UL);
+
+ /* We compute the logarithm in base 2 rounded up of the resulting size.
+ * This value will identify which pool we need to use from the pools of
+ * powers of 2. This is equivalent to finding the position of the highest
+ * bit set. */
+ power = sizeof(size) * 8 - __builtin_clzl(size);
+ if (power > POOL_LARGEST) {
+ gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return NULL;
+ }
+ pool = &pools[power - POOL_SMALLEST];
+
+ new = GF_MALLOC(sizeof(struct mem_pool), gf_common_mt_mem_pool);
+ if (!new)
+ return NULL;
+
+ new->ctx = ctx;
+ new->sizeof_type = sizeof_type;
+ new->count = count;
+ new->name = name;
+ new->xl_name = THIS->name;
+ new->pool = pool;
+ GF_ATOMIC_INIT(new->active, 0);
+#ifdef DEBUG
+ GF_ATOMIC_INIT(new->hit, 0);
+ GF_ATOMIC_INIT(new->miss, 0);
+#endif
+ INIT_LIST_HEAD(&new->owner);
- if (ptr)
- memset(ptr, 0, mem_pool->real_sizeof_type);
+ LOCK(&ctx->lock);
+ {
+ list_add(&new->owner, &ctx->mempool_list);
+ }
+ UNLOCK(&ctx->lock);
- return ptr;
+ return new;
}
-void *
-mem_get (struct mem_pool *mem_pool)
+per_thread_pool_list_t *
+mem_get_pool_list(void)
{
- struct list_head *list = NULL;
- void *ptr = NULL;
- int *in_use = NULL;
- struct mem_pool **pool_ptr = NULL;
-
- if (!mem_pool) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return NULL;
+ per_thread_pool_list_t *pool_list;
+ unsigned int i;
+
+ pool_list = thread_pool_list;
+ if (pool_list) {
+ return pool_list;
+ }
+
+ (void)pthread_mutex_lock(&pool_free_lock);
+ if (!list_empty(&pool_free_threads)) {
+ pool_list = list_entry(pool_free_threads.next, per_thread_pool_list_t,
+ thr_list);
+ list_del(&pool_list->thr_list);
+ }
+ (void)pthread_mutex_unlock(&pool_free_lock);
+
+ if (!pool_list) {
+ pool_list = MALLOC(pool_list_size);
+ if (!pool_list) {
+ return NULL;
}
- LOCK (&mem_pool->lock);
- {
- mem_pool->alloc_count++;
- if (mem_pool->cold_count) {
- list = mem_pool->list.next;
- list_del (list);
-
- mem_pool->hot_count++;
- mem_pool->cold_count--;
-
- if (mem_pool->max_alloc < mem_pool->hot_count)
- mem_pool->max_alloc = mem_pool->hot_count;
-
- ptr = list;
- in_use = (ptr + GF_MEM_POOL_LIST_BOUNDARY +
- GF_MEM_POOL_PTR);
- *in_use = 1;
-
- goto fwd_addr_out;
- }
-
- /* This is a problem area. If we've run out of
- * chunks in our slab above, we need to allocate
- * enough memory to service this request.
- * The problem is, these individual chunks will fail
- * the first address range check in __is_member. Now, since
- * we're not allocating a full second slab, we wont have
- * enough info perform the range check in __is_member.
- *
- * I am working around this by performing a regular allocation
- * , just the way the caller would've done when not using the
- * mem-pool. That also means, we're not padding the size with
- * the list_head structure because, this will not be added to
- * the list of chunks that belong to the mem-pool allocated
- * initially.
- *
- * This is the best we can do without adding functionality for
- * managing multiple slabs. That does not interest us at present
- * because it is too much work knowing that a better slab
- * allocator is coming RSN.
- */
- mem_pool->pool_misses++;
- mem_pool->curr_stdalloc++;
- if (mem_pool->max_stdalloc < mem_pool->curr_stdalloc)
- mem_pool->max_stdalloc = mem_pool->curr_stdalloc;
- ptr = GF_CALLOC (1, mem_pool->padded_sizeof_type,
- gf_common_mt_mem_pool);
- gf_log_callingfn ("mem-pool", GF_LOG_DEBUG, "Mem pool is full. "
- "Callocing mem");
-
- /* Memory coming from the heap need not be transformed from a
- * chunkhead to a usable pointer since it is not coming from
- * the pool.
- */
+ INIT_LIST_HEAD(&pool_list->thr_list);
+ (void)pthread_spin_init(&pool_list->lock, PTHREAD_PROCESS_PRIVATE);
+ for (i = 0; i < NPOOLS; ++i) {
+ pool_list->pools[i].parent = &pools[i];
+ pool_list->pools[i].hot_list = NULL;
+ pool_list->pools[i].cold_list = NULL;
}
-fwd_addr_out:
- pool_ptr = mem_pool_from_ptr (ptr);
- *pool_ptr = (struct mem_pool *)mem_pool;
- ptr = mem_pool_chunkhead2ptr (ptr);
- UNLOCK (&mem_pool->lock);
+ }
- return ptr;
-}
+ /* There's no need to take pool_list->lock, because this is already an
+ * atomic operation and we don't need to synchronize it with any change
+ * in hot/cold lists. */
+ pool_list->poison = false;
+ (void)pthread_mutex_lock(&pool_lock);
+ list_add(&pool_list->thr_list, &pool_threads);
+ (void)pthread_mutex_unlock(&pool_lock);
-static int
-__is_member (struct mem_pool *pool, void *ptr)
-{
- if (!pool || !ptr) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return -1;
- }
+ thread_pool_list = pool_list;
- if (ptr < pool->pool || ptr >= pool->pool_end)
- return 0;
+ /* Ensure that all memory objects associated to the new pool_list are
+ * destroyed when the thread terminates. */
+ gf_thread_needs_cleanup();
- if ((mem_pool_ptr2chunkhead (ptr) - pool->pool)
- % pool->padded_sizeof_type)
- return -1;
-
- return 1;
+ return pool_list;
}
-
-void
-mem_put (void *ptr)
+static pooled_obj_hdr_t *
+mem_get_from_pool(struct mem_pool *mem_pool)
{
- struct list_head *list = NULL;
- int *in_use = NULL;
- void *head = NULL;
- struct mem_pool **tmp = NULL;
- struct mem_pool *pool = NULL;
-
- if (!ptr) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR, "invalid argument");
- return;
- }
+ per_thread_pool_list_t *pool_list;
+ per_thread_pool_t *pt_pool;
+ pooled_obj_hdr_t *retval;
+#ifdef DEBUG
+ gf_boolean_t hit = _gf_true;
+#endif
- list = head = mem_pool_ptr2chunkhead (ptr);
- tmp = mem_pool_from_ptr (head);
- if (!tmp) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR,
- "ptr header is corrupted");
- return;
+ pool_list = mem_get_pool_list();
+ if (!pool_list || pool_list->poison) {
+ return NULL;
+ }
+
+ pt_pool = &pool_list->pools[mem_pool->pool->power_of_two - POOL_SMALLEST];
+
+ (void)pthread_spin_lock(&pool_list->lock);
+
+ retval = pt_pool->hot_list;
+ if (retval) {
+ pt_pool->hot_list = retval->next;
+ (void)pthread_spin_unlock(&pool_list->lock);
+ GF_ATOMIC_INC(pt_pool->parent->allocs_hot);
+ } else {
+ retval = pt_pool->cold_list;
+ if (retval) {
+ pt_pool->cold_list = retval->next;
+ (void)pthread_spin_unlock(&pool_list->lock);
+ GF_ATOMIC_INC(pt_pool->parent->allocs_cold);
+ } else {
+ (void)pthread_spin_unlock(&pool_list->lock);
+ GF_ATOMIC_INC(pt_pool->parent->allocs_stdc);
+ retval = malloc(1 << pt_pool->parent->power_of_two);
+#ifdef DEBUG
+ hit = _gf_false;
+#endif
}
+ }
- pool = *tmp;
- if (!pool) {
- gf_log_callingfn ("mem-pool", GF_LOG_ERROR,
- "mem-pool ptr is NULL");
- return;
- }
- LOCK (&pool->lock);
- {
+ if (retval != NULL) {
+ retval->pool = mem_pool;
+ retval->power_of_two = mem_pool->pool->power_of_two;
+#ifdef DEBUG
+ if (hit == _gf_true)
+ GF_ATOMIC_INC(mem_pool->hit);
+ else
+ GF_ATOMIC_INC(mem_pool->miss);
+#endif
+ retval->magic = GF_MEM_HEADER_MAGIC;
+ retval->pool_list = pool_list;
+ }
- switch (__is_member (pool, ptr))
- {
- case 1:
- in_use = (head + GF_MEM_POOL_LIST_BOUNDARY +
- GF_MEM_POOL_PTR);
- if (!is_mem_chunk_in_use(in_use)) {
- gf_log_callingfn ("mem-pool", GF_LOG_CRITICAL,
- "mem_put called on freed ptr %p of mem "
- "pool %p", ptr, pool);
- break;
- }
- pool->hot_count--;
- pool->cold_count++;
- *in_use = 0;
- list_add (list, &pool->list);
- break;
- case -1:
- /* For some reason, the address given is within
- * the address range of the mem-pool but does not align
- * with the expected start of a chunk that includes
- * the list headers also. Sounds like a problem in
- * layers of clouds up above us. ;)
- */
- abort ();
- break;
- case 0:
- /* The address is outside the range of the mem-pool. We
- * assume here that this address was allocated at a
- * point when the mem-pool was out of chunks in mem_get
- * or the programmer has made a mistake by calling the
- * wrong de-allocation interface. We do
- * not have enough info to distinguish between the two
- * situations.
- */
- pool->curr_stdalloc--;
- GF_FREE (list);
- break;
- default:
- /* log error */
- break;
- }
- }
- UNLOCK (&pool->lock);
+ return retval;
}
-void
-mem_pool_destroy (struct mem_pool *pool)
+#endif /* GF_DISABLE_MEMPOOL */
+
+void *
+mem_get0(struct mem_pool *mem_pool)
{
- if (!pool)
- return;
+ void *ptr = mem_get(mem_pool);
+ if (ptr) {
+#if defined(GF_DISABLE_MEMPOOL)
+ memset(ptr, 0, mem_pool->sizeof_type);
+#else
+ memset(ptr, 0, AVAILABLE_SIZE(mem_pool->pool->power_of_two));
+#endif
+ }
+
+ return ptr;
+}
- gf_log (THIS->name, GF_LOG_INFO, "size=%lu max=%d total=%"PRIu64,
- pool->padded_sizeof_type, pool->max_alloc, pool->alloc_count);
+void *
+mem_get(struct mem_pool *mem_pool)
+{
+ if (!mem_pool) {
+ gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return NULL;
+ }
+
+#if defined(GF_DISABLE_MEMPOOL)
+ return GF_MALLOC(mem_pool->sizeof_type, gf_common_mt_mem_pool);
+#else
+ pooled_obj_hdr_t *retval = mem_get_from_pool(mem_pool);
+ if (!retval) {
+ return NULL;
+ }
+
+ GF_ATOMIC_INC(mem_pool->active);
+
+ return retval + 1;
+#endif /* GF_DISABLE_MEMPOOL */
+}
- list_del (&pool->global_list);
+void
+mem_put(void *ptr)
+{
+#if defined(GF_DISABLE_MEMPOOL)
+ GF_FREE(ptr);
+#else
+ pooled_obj_hdr_t *hdr;
+ per_thread_pool_list_t *pool_list;
+ per_thread_pool_t *pt_pool;
+
+ if (!ptr) {
+ gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return;
+ }
- LOCK_DESTROY (&pool->lock);
- GF_FREE (pool->name);
- GF_FREE (pool->pool);
- GF_FREE (pool);
+ hdr = ((pooled_obj_hdr_t *)ptr) - 1;
+ if (hdr->magic != GF_MEM_HEADER_MAGIC) {
+ /* Not one of ours; don't touch it. */
+ return;
+ }
+ if (!hdr->pool_list) {
+ gf_msg_callingfn("mem-pool", GF_LOG_CRITICAL, EINVAL,
+ LG_MSG_INVALID_ARG,
+ "invalid argument hdr->pool_list NULL");
return;
+ }
+
+ pool_list = hdr->pool_list;
+ pt_pool = &pool_list->pools[hdr->power_of_two - POOL_SMALLEST];
+
+ if (hdr->pool)
+ GF_ATOMIC_DEC(hdr->pool->active);
+
+ hdr->magic = GF_MEM_INVALID_MAGIC;
+
+ (void)pthread_spin_lock(&pool_list->lock);
+ if (!pool_list->poison) {
+ hdr->next = pt_pool->hot_list;
+ pt_pool->hot_list = hdr;
+ (void)pthread_spin_unlock(&pool_list->lock);
+ GF_ATOMIC_INC(pt_pool->parent->frees_to_list);
+ } else {
+ /* If the owner thread of this element has terminated, we simply
+ * release its memory. */
+ (void)pthread_spin_unlock(&pool_list->lock);
+ free(hdr);
+ }
+#endif /* GF_DISABLE_MEMPOOL */
}
diff --git a/libglusterfs/src/mem-pool.h b/libglusterfs/src/mem-pool.h
deleted file mode 100644
index b3a25b25e7e..00000000000
--- a/libglusterfs/src/mem-pool.h
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _MEM_POOL_H_
-#define _MEM_POOL_H_
-
-#include "list.h"
-#include "locking.h"
-#include "logging.h"
-#include "mem-types.h"
-#include <stdlib.h>
-#include <inttypes.h>
-#include <string.h>
-#include <stdarg.h>
-
-
-struct mem_acct {
- uint32_t num_types;
- struct mem_acct_rec *rec;
-};
-
-struct mem_acct_rec {
- size_t size;
- size_t max_size;
- uint32_t num_allocs;
- uint32_t total_allocs;
- uint32_t max_num_allocs;
- gf_lock_t lock;
-};
-
-
-void *
-__gf_calloc (size_t cnt, size_t size, uint32_t type);
-
-void *
-__gf_malloc (size_t size, uint32_t type);
-
-void *
-__gf_realloc (void *ptr, size_t size);
-
-int
-gf_vasprintf (char **string_ptr, const char *format, va_list arg);
-
-int
-gf_asprintf (char **string_ptr, const char *format, ...);
-
-void
-__gf_free (void *ptr);
-
-
-static inline
-void* __gf_default_malloc (size_t size)
-{
- void *ptr = NULL;
-
- ptr = malloc (size);
- if (!ptr)
- gf_log_nomem ("", GF_LOG_ALERT, size);
-
- return ptr;
-}
-
-static inline
-void* __gf_default_calloc (int cnt, size_t size)
-{
- void *ptr = NULL;
-
- ptr = calloc (cnt, size);
- if (!ptr)
- gf_log_nomem ("", GF_LOG_ALERT, (cnt * size));
-
- return ptr;
-}
-
-static inline
-void* __gf_default_realloc (void *oldptr, size_t size)
-{
- void *ptr = NULL;
-
- ptr = realloc (oldptr, size);
- if (!ptr)
- gf_log_nomem ("", GF_LOG_ALERT, size);
-
- return ptr;
-}
-
-#define MALLOC(size) __gf_default_malloc(size)
-#define CALLOC(cnt,size) __gf_default_calloc(cnt,size)
-#define REALLOC(ptr,size) __gf_default_realloc(ptr,size)
-
-#define FREE(ptr) \
- if (ptr != NULL) { \
- free ((void *)ptr); \
- ptr = (void *)0xeeeeeeee; \
- }
-
-#define GF_CALLOC(nmemb, size, type) __gf_calloc (nmemb, size, type)
-
-#define GF_MALLOC(size, type) __gf_malloc (size, type)
-
-#define GF_REALLOC(ptr, size) __gf_realloc (ptr, size)
-
-#define GF_FREE(free_ptr) __gf_free (free_ptr)
-
-static inline
-char * gf_strdup (const char *src)
-{
-
- char *dup_str = NULL;
- size_t len = 0;
-
- len = strlen (src) + 1;
-
- dup_str = GF_CALLOC(1, len, gf_common_mt_strdup);
-
- if (!dup_str)
- return NULL;
-
- memcpy (dup_str, src, len);
-
- return dup_str;
-}
-
-struct mem_pool {
- struct list_head list;
- int hot_count;
- int cold_count;
- gf_lock_t lock;
- unsigned long padded_sizeof_type;
- void *pool;
- void *pool_end;
- int real_sizeof_type;
- uint64_t alloc_count;
- uint64_t pool_misses;
- int max_alloc;
- int curr_stdalloc;
- int max_stdalloc;
- char *name;
- struct list_head global_list;
-};
-
-struct mem_pool *
-mem_pool_new_fn (unsigned long sizeof_type, unsigned long count, char *name);
-
-#define mem_pool_new(type,count) mem_pool_new_fn (sizeof(type), count, #type)
-
-void mem_put (void *ptr);
-void *mem_get (struct mem_pool *pool);
-void *mem_get0 (struct mem_pool *pool);
-
-void mem_pool_destroy (struct mem_pool *pool);
-
-int gf_mem_acct_is_enabled ();
-void gf_mem_acct_enable_set ();
-
-#endif /* _MEM_POOL_H */
diff --git a/libglusterfs/src/mem-types.h b/libglusterfs/src/mem-types.h
deleted file mode 100644
index 12379bf3181..00000000000
--- a/libglusterfs/src/mem-types.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __MEM_TYPES_H__
-#define __MEM_TYPES_H__
-
-
-enum gf_common_mem_types_ {
- gf_common_mt_call_stub_t = 0,
- gf_common_mt_dnscache6 = 1,
- gf_common_mt_data_pair_t = 2,
- gf_common_mt_data_t = 3,
- gf_common_mt_dict_t = 4,
- gf_common_mt_event_pool = 5,
- gf_common_mt_reg = 6,
- gf_common_mt_pollfd = 7,
- gf_common_mt_epoll_event = 8,
- gf_common_mt_fdentry_t = 9,
- gf_common_mt_fdtable_t = 10,
- gf_common_mt_fd_t = 11,
- gf_common_mt_fd_ctx = 12,
- gf_common_mt_gf_dirent_t = 13,
- gf_common_mt_glusterfs_ctx_t = 14,
- gf_common_mt_dentry_t = 15,
- gf_common_mt_inode_t = 16,
- gf_common_mt_inode_ctx = 17,
- gf_common_mt_list_head = 18,
- gf_common_mt_inode_table_t = 19,
- gf_common_mt_xlator_t = 20,
- gf_common_mt_xlator_list_t = 21,
- gf_common_mt_log_msg = 22,
- gf_common_mt_client_log = 23,
- gf_common_mt_volume_opt_list_t = 24,
- gf_common_mt_gf_hdr_common_t = 25,
- gf_common_mt_call_frame_t = 26,
- gf_common_mt_call_stack_t = 27,
- gf_common_mt_gf_timer_t = 28,
- gf_common_mt_gf_timer_registry_t = 29,
- gf_common_mt_transport = 30,
- gf_common_mt_transport_msg = 31,
- gf_common_mt_auth_handle_t = 32,
- gf_common_mt_iobuf = 33,
- gf_common_mt_iobuf_arena = 34,
- gf_common_mt_iobref = 35,
- gf_common_mt_iobuf_pool = 36,
- gf_common_mt_iovec = 37,
- gf_common_mt_memdup = 38,
- gf_common_mt_asprintf = 39,
- gf_common_mt_strdup = 40,
- gf_common_mt_socket_private_t = 41,
- gf_common_mt_ioq = 42,
- gf_common_mt_transport_t = 43,
- gf_common_mt_socket_local_t = 44,
- gf_common_mt_char = 45,
- gf_common_mt_rbthash_table_t = 46,
- gf_common_mt_rbthash_bucket = 47,
- gf_common_mt_mem_pool = 48,
- gf_common_mt_long = 49,
- gf_common_mt_rpcsvc_auth_list = 50,
- gf_common_mt_rpcsvc_t = 51,
- gf_common_mt_rpcsvc_conn_t = 52,
- gf_common_mt_rpcsvc_program_t = 53,
- gf_common_mt_rpcsvc_listener_t = 54,
- gf_common_mt_rpcsvc_wrapper_t = 55,
- gf_common_mt_rpcsvc_stage_t = 56,
- gf_common_mt_rpcclnt_t = 57,
- gf_common_mt_rpcclnt_savedframe_t = 58,
- gf_common_mt_rpc_trans_t = 59,
- gf_common_mt_rpc_trans_pollin_t = 60,
- gf_common_mt_rpc_trans_handover_t = 61,
- gf_common_mt_rpc_trans_reqinfo_t = 62,
- gf_common_mt_rpc_trans_rsp_t = 63,
- gf_common_mt_glusterfs_graph_t = 64,
- gf_common_mt_rdma_private_t = 65,
- gf_common_mt_rdma_ioq_t = 66,
- gf_common_mt_rpc_transport_t = 67,
- gf_common_mt_rdma_local_t = 68,
- gf_common_mt_rdma_post_t = 69,
- gf_common_mt_qpent = 70,
- gf_common_mt_rdma_device_t = 71,
- gf_common_mt_rdma_context_t = 72,
- gf_common_mt_sge = 73,
- gf_common_mt_rpcclnt_cb_program_t = 74,
- gf_common_mt_libxl_marker_local = 75,
- gf_common_mt_graph_buf = 76,
- gf_common_mt_trie_trie = 77,
- gf_common_mt_trie_data = 78,
- gf_common_mt_trie_node = 79,
- gf_common_mt_trie_buf = 80,
- gf_common_mt_trie_end = 81,
- gf_common_mt_run_argv = 82,
- gf_common_mt_run_logbuf = 83,
- gf_common_mt_fd_lk_ctx_t = 84,
- gf_common_mt_fd_lk_ctx_node_t = 85,
- gf_common_mt_buffer_t = 86,
- gf_common_mt_circular_buffer_t = 87,
- gf_common_mt_eh_t = 88,
- gf_common_mt_end = 89
-};
-#endif
diff --git a/libglusterfs/src/monitoring.c b/libglusterfs/src/monitoring.c
new file mode 100644
index 00000000000..fbb68dc8622
--- /dev/null
+++ b/libglusterfs/src/monitoring.c
@@ -0,0 +1,282 @@
+/*
+ Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/monitoring.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/syscall.h"
+
+#include <stdlib.h>
+
+static void
+dump_mem_acct_details(xlator_t *xl, int fd)
+{
+ struct mem_acct_rec *mem_rec;
+ int i = 0;
+
+ if (!xl || !xl->mem_acct || (xl->ctx->active != xl->graph))
+ return;
+
+ dprintf(fd, "# %s.%s.total.num_types %d\n", xl->type, xl->name,
+ xl->mem_acct->num_types);
+
+ dprintf(fd,
+ "# type, in-use-size, in-use-units, max-size, "
+ "max-units, total-allocs\n");
+
+ for (i = 0; i < xl->mem_acct->num_types; i++) {
+ mem_rec = &xl->mem_acct->rec[i];
+ if (mem_rec->num_allocs == 0)
+ continue;
+ dprintf(fd, "# %s, %" PRIu64 ", %u, %" PRIu64 ", %u, %" PRIu64 "\n",
+ mem_rec->typestr, mem_rec->size, mem_rec->num_allocs,
+ mem_rec->max_size, mem_rec->max_num_allocs,
+ mem_rec->total_allocs);
+ }
+}
+
+static void
+dump_global_memory_accounting(int fd)
+{
+#if MEMORY_ACCOUNTING_STATS
+ int i = 0;
+ uint64_t count = 0;
+
+ uint64_t tcalloc = GF_ATOMIC_GET(gf_memory_stat_counts.total_calloc);
+ uint64_t tmalloc = GF_ATOMIC_GET(gf_memory_stat_counts.total_malloc);
+ uint64_t tfree = GF_ATOMIC_GET(gf_memory_stat_counts.total_free);
+
+ dprintf(fd, "memory.total.calloc %lu\n", tcalloc);
+ dprintf(fd, "memory.total.malloc %lu\n", tmalloc);
+ dprintf(fd, "memory.total.realloc %lu\n",
+ GF_ATOMIC_GET(gf_memory_stat_counts.total_realloc));
+ dprintf(fd, "memory.total.free %lu\n", tfree);
+ dprintf(fd, "memory.total.in-use %lu\n", ((tcalloc + tmalloc) - tfree));
+
+ for (i = 0; i < GF_BLK_MAX_VALUE; i++) {
+ count = GF_ATOMIC_GET(gf_memory_stat_counts.blk_size[i]);
+ dprintf(fd, "memory.total.blk_size.%s %lu\n",
+ gf_mem_stats_blk[i].blk_size_str, count);
+ }
+
+ dprintf(fd, "#----\n");
+#endif
+
+ /* This is not a metric to be watched in admin guide,
+ but keeping it here till we resolve all leak-issues
+ would be great */
+}
+
+static void
+dump_latency_and_count(xlator_t *xl, int fd)
+{
+ int32_t index = 0;
+ uint64_t fop;
+ uint64_t cbk;
+ uint64_t count;
+
+ if (xl->winds) {
+ dprintf(fd, "%s.total.pending-winds.count %" PRIu64 "\n", xl->name,
+ xl->winds);
+ }
+
+ /* Need 'fuse' data, and don't need all the old graph info */
+ if ((xl != xl->ctx->master) && (xl->ctx->active != xl->graph))
+ return;
+
+ count = GF_ATOMIC_GET(xl->stats.total.count);
+ dprintf(fd, "%s.total.fop-count %" PRIu64 "\n", xl->name, count);
+
+ count = GF_ATOMIC_GET(xl->stats.interval.count);
+ dprintf(fd, "%s.interval.fop-count %" PRIu64 "\n", xl->name, count);
+ GF_ATOMIC_INIT(xl->stats.interval.count, 0);
+
+ for (index = 0; index < GF_FOP_MAXVALUE; index++) {
+ fop = GF_ATOMIC_GET(xl->stats.total.metrics[index].fop);
+ if (fop) {
+ dprintf(fd, "%s.total.%s.count %" PRIu64 "\n", xl->name,
+ gf_fop_list[index], fop);
+ }
+ fop = GF_ATOMIC_GET(xl->stats.interval.metrics[index].fop);
+ if (fop) {
+ dprintf(fd, "%s.interval.%s.count %" PRIu64 "\n", xl->name,
+ gf_fop_list[index], fop);
+ }
+ cbk = GF_ATOMIC_GET(xl->stats.interval.metrics[index].cbk);
+ if (cbk) {
+ dprintf(fd, "%s.interval.%s.fail_count %" PRIu64 "\n", xl->name,
+ gf_fop_list[index], cbk);
+ }
+ if (xl->stats.interval.latencies[index].count != 0) {
+ dprintf(fd, "%s.interval.%s.latency %lf\n", xl->name,
+ gf_fop_list[index],
+ (((double)xl->stats.interval.latencies[index].total) /
+ xl->stats.interval.latencies[index].count));
+ dprintf(fd, "%s.interval.%s.max %" PRIu64 "\n", xl->name,
+ gf_fop_list[index],
+ xl->stats.interval.latencies[index].max);
+ dprintf(fd, "%s.interval.%s.min %" PRIu64 "\n", xl->name,
+ gf_fop_list[index],
+ xl->stats.interval.latencies[index].min);
+ }
+ GF_ATOMIC_INIT(xl->stats.interval.metrics[index].cbk, 0);
+ GF_ATOMIC_INIT(xl->stats.interval.metrics[index].fop, 0);
+ }
+ memset(xl->stats.interval.latencies, 0,
+ sizeof(xl->stats.interval.latencies));
+}
+
+static inline void
+dump_call_stack_details(glusterfs_ctx_t *ctx, int fd)
+{
+ dprintf(fd, "total.stack.count %" PRIu64 "\n",
+ GF_ATOMIC_GET(ctx->pool->total_count));
+ dprintf(fd, "total.stack.in-flight %" PRIu64 "\n", ctx->pool->cnt);
+}
+
+static inline void
+dump_dict_details(glusterfs_ctx_t *ctx, int fd)
+{
+ uint64_t total_dicts = 0;
+ uint64_t total_pairs = 0;
+
+ total_dicts = GF_ATOMIC_GET(ctx->stats.total_dicts_used);
+ total_pairs = GF_ATOMIC_GET(ctx->stats.total_pairs_used);
+
+ dprintf(fd, "total.dict.max-pairs-per %" PRIu64 "\n",
+ GF_ATOMIC_GET(ctx->stats.max_dict_pairs));
+ dprintf(fd, "total.dict.pairs-used %" PRIu64 "\n", total_pairs);
+ dprintf(fd, "total.dict.used %" PRIu64 "\n", total_dicts);
+ dprintf(fd, "total.dict.average-pairs %" PRIu64 "\n",
+ (total_pairs / total_dicts));
+}
+
+static void
+dump_inode_stats(glusterfs_ctx_t *ctx, int fd)
+{
+}
+
+static void
+dump_global_metrics(glusterfs_ctx_t *ctx, int fd)
+{
+ struct timeval tv;
+ time_t nowtime;
+ struct tm *nowtm;
+ char tmbuf[64] = {
+ 0,
+ };
+
+ gettimeofday(&tv, NULL);
+ nowtime = tv.tv_sec;
+ nowtm = localtime(&nowtime);
+ strftime(tmbuf, sizeof tmbuf, "%Y-%m-%d %H:%M:%S", nowtm);
+
+ /* Let every file have information on which process dumped info */
+ dprintf(fd, "## %s\n", ctx->cmdlinestr);
+ dprintf(fd, "### %s\n", tmbuf);
+ dprintf(fd, "### BrickName: %s\n", ctx->cmd_args.brick_name);
+ dprintf(fd, "### MountName: %s\n", ctx->cmd_args.mount_point);
+ dprintf(fd, "### VolumeName: %s\n", ctx->cmd_args.volume_name);
+
+ /* Dump memory accounting */
+ dump_global_memory_accounting(fd);
+ dprintf(fd, "# -----\n");
+
+ dump_call_stack_details(ctx, fd);
+ dump_dict_details(ctx, fd);
+ dprintf(fd, "# -----\n");
+
+ dump_inode_stats(ctx, fd);
+ dprintf(fd, "# -----\n");
+}
+
+static void
+dump_xl_metrics(glusterfs_ctx_t *ctx, int fd)
+{
+ xlator_t *xl;
+
+ xl = ctx->active->top;
+
+ while (xl) {
+ dump_latency_and_count(xl, fd);
+ dump_mem_acct_details(xl, fd);
+ if (xl->dump_metrics)
+ xl->dump_metrics(xl, fd);
+ xl = xl->next;
+ }
+
+ if (ctx->master) {
+ xl = ctx->master;
+
+ dump_latency_and_count(xl, fd);
+ dump_mem_acct_details(xl, fd);
+ if (xl->dump_metrics)
+ xl->dump_metrics(xl, fd);
+ }
+
+ return;
+}
+
+char *
+gf_monitor_metrics(glusterfs_ctx_t *ctx)
+{
+ int ret = -1;
+ int fd = 0;
+ char *filepath = NULL, *dumppath = NULL;
+
+ gf_msg_trace("monitoring", 0, "received monitoring request (sig:USR2)");
+
+ dumppath = ctx->config.metrics_dumppath;
+ if (dumppath == NULL) {
+ dumppath = GLUSTER_METRICS_DIR;
+ }
+ ret = mkdir_p(dumppath, 0755, true);
+ if (ret) {
+ /* EEXIST is handled in mkdir_p() itself */
+ gf_msg("monitoring", GF_LOG_ERROR, 0, LG_MSG_STRDUP_ERROR,
+ "failed to create metrics dir %s (%s)", dumppath,
+ strerror(errno));
+ return NULL;
+ }
+
+ ret = gf_asprintf(&filepath, "%s/gmetrics.XXXXXX", dumppath);
+ if (ret < 0) {
+ return NULL;
+ }
+
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+ fd = mkstemp(filepath);
+ if (fd < 0) {
+ gf_msg("monitoring", GF_LOG_ERROR, 0, LG_MSG_STRDUP_ERROR,
+ "failed to open tmp file %s (%s)", filepath, strerror(errno));
+ GF_FREE(filepath);
+ return NULL;
+ }
+
+ dump_global_metrics(ctx, fd);
+
+ dump_xl_metrics(ctx, fd);
+
+ /* This below line is used just to capture any errors with dprintf() */
+ ret = dprintf(fd, "\n# End of metrics\n");
+ if (ret < 0) {
+ gf_msg("monitoring", GF_LOG_WARNING, 0, LG_MSG_STRDUP_ERROR,
+ "dprintf() failed: %s", strerror(errno));
+ }
+
+ ret = sys_fsync(fd);
+ if (ret < 0) {
+ gf_msg("monitoring", GF_LOG_WARNING, 0, LG_MSG_STRDUP_ERROR,
+ "fsync() failed: %s", strerror(errno));
+ }
+ sys_close(fd);
+
+ /* Figure this out, not happy with returning this string */
+ return filepath;
+}
diff --git a/libglusterfs/src/options.c b/libglusterfs/src/options.c
index 76e581fddb7..f6b5aa0ea23 100644
--- a/libglusterfs/src/options.c
+++ b/libglusterfs/src/options.c
@@ -8,1064 +8,1242 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <fnmatch.h>
-#include "xlator.h"
-#include "defaults.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/defaults.h"
+#include "glusterfs/libglusterfs-messages.h"
#define GF_OPTION_LIST_EMPTY(_opt) (_opt->value[0] == NULL)
+static int
+xlator_option_validate_path(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ int ret = -1;
+ char errstr[256];
+
+ if (strstr(value, "../")) {
+ snprintf(errstr, 256, "invalid path given '%s'", value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ /* Make sure the given path is valid */
+ if (value[0] != '/') {
+ snprintf(errstr, 256,
+ "option %s %s: '%s' is not an "
+ "absolute path name",
+ key, value, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
+}
static int
-xlator_option_validate_path (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_int(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
{
- int ret = -1;
- char errstr[256];
-
- if (strstr (value, "../")) {
- snprintf (errstr, 256,
- "invalid path given '%s'",
- value);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
+ long long inputll = 0;
+ unsigned long long uinputll = 0;
+ int ret = -1;
+ char errstr[256];
+
+ /* Check the range */
+ if (gf_string2longlong(value, &inputll) != 0) {
+ snprintf(errstr, 256, "invalid number format \"%s\" in option \"%s\"",
+ value, key);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ /* Handle '-0' */
+ if ((inputll == 0) && (gf_string2ulonglong(value, &uinputll) != 0)) {
+ snprintf(errstr, 256, "invalid number format \"%s\" in option \"%s\"",
+ value, key);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ if ((opt->min == 0) && (opt->max == 0) &&
+ (opt->validate == GF_OPT_VALIDATE_BOTH)) {
+ gf_msg_trace(xl->name, 0,
+ "no range check required for "
+ "'option %s %s'",
+ key, value);
+ ret = 0;
+ goto out;
+ }
+
+ if (opt->validate == GF_OPT_VALIDATE_MIN) {
+ if (inputll < opt->min) {
+ snprintf(errstr, 256,
+ "'%lld' in 'option %s %s' is smaller than "
+ "minimum value '%.0f'",
+ inputll, key, value, opt->min);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
}
-
- /* Make sure the given path is valid */
- if (value[0] != '/') {
- snprintf (errstr, 256,
- "option %s %s: '%s' is not an "
- "absolute path name",
- key, value, value);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
+ } else if (opt->validate == GF_OPT_VALIDATE_MAX) {
+ if (inputll > opt->max) {
+ snprintf(errstr, 256,
+ "'%lld' in 'option %s %s' is greater than "
+ "maximum value '%.0f'",
+ inputll, key, value, opt->max);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
}
-
- ret = 0;
+ } else if ((inputll < opt->min) || (inputll > opt->max)) {
+ snprintf(errstr, 256,
+ "'%lld' in 'option %s %s' is out of range "
+ "[%.0f - %.0f]",
+ inputll, key, value, opt->min, opt->max);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ ret = 0;
out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
}
-
static int
-xlator_option_validate_int (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_sizet(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
{
- long long inputll = 0;
- int ret = -1;
- char errstr[256];
+ uint64_t size = 0;
+ int ret = 0;
+ char errstr[256];
+
+ /* Check the range */
+ if (gf_string2bytesize_uint64(value, &size) != 0) {
+ snprintf(errstr, 256, "invalid number format \"%s\" in option \"%s\"",
+ value, key);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ if ((opt->min == 0) && (opt->max == 0)) {
+ gf_msg_trace(xl->name, 0,
+ "no range check required for "
+ "'option %s %s'",
+ key, value);
+ goto out;
+ }
+
+ if ((size < opt->min) || (size > opt->max)) {
+ snprintf(errstr, 256,
+ "'%" PRIu64
+ "' in 'option %s %s' is out of range [%.0f - %.0f]",
+ size, key, value, opt->min, opt->max);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
+ ret = -1;
+ }
- /* Check the range */
- if (gf_string2longlong (value, &inputll) != 0) {
- snprintf (errstr, 256,
- "invalid number format \"%s\" in option \"%s\"",
- value, key);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
+out:
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
+}
- if ((opt->min == 0) && (opt->max == 0) &&
- (opt->validate == GF_OPT_VALIDATE_BOTH)) {
- gf_log (xl->name, GF_LOG_TRACE,
- "no range check required for 'option %s %s'",
- key, value);
- ret = 0;
- goto out;
- }
+static int
+xlator_option_validate_bool(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ int ret = -1;
+ char errstr[256];
+ gf_boolean_t is_valid;
+
+ /* Check if the value is one of
+ '0|1|on|off|no|yes|true|false|enable|disable' */
+
+ if (gf_string2boolean(value, &is_valid) != 0) {
+ snprintf(errstr, 256, "option %s %s: '%s' is not a valid boolean value",
+ key, value, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
+}
- if ((opt->validate == GF_OPT_VALIDATE_MIN)) {
- if (inputll < opt->min) {
- snprintf (errstr, 256,
- "'%lld' in 'option %s %s' is smaller than "
- "minimum value '%"PRId64"'", inputll, key,
- value, opt->min);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
- } else if ((opt->validate == GF_OPT_VALIDATE_MAX)) {
- if ((inputll > opt->max)) {
- snprintf (errstr, 256,
- "'%lld' in 'option %s %s' is greater than "
- "maximum value '%"PRId64"'", inputll, key,
- value, opt->max);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
- } else if ((inputll < opt->min) || (inputll > opt->max)) {
- snprintf (errstr, 256,
- "'%lld' in 'option %s %s' is out of range "
- "[%"PRId64" - %"PRId64"]",
- inputll, key, value, opt->min, opt->max);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
+static int
+xlator_option_validate_xlator(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ int ret = -1;
+ char errstr[256];
+ xlator_t *xlopt = NULL;
+
+ /* Check if the value is one of the xlators */
+ xlopt = xl;
+ while (xlopt->prev)
+ xlopt = xlopt->prev;
+
+ while (xlopt) {
+ if (strcmp(value, xlopt->name) == 0) {
+ ret = 0;
+ break;
}
-
- ret = 0;
+ xlopt = xlopt->next;
+ }
+
+ if (!xlopt) {
+ snprintf(errstr, 256, "option %s %s: '%s' is not a valid volume name",
+ key, value, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ ret = 0;
out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
}
+static void
+set_error_str(char *errstr, size_t len, volume_option_t *opt, const char *key,
+ const char *value)
+{
+ int i = 0;
+ int ret = 0;
+
+ ret = snprintf(errstr, len,
+ "option %s %s: '%s' is not valid "
+ "(possible options are ",
+ key, value, value);
+
+ for (i = 0; (i < ZR_OPTION_MAX_ARRAY_SIZE) && opt->value[i];) {
+ ret += snprintf(errstr + ret, len - ret, "%s", opt->value[i]);
+ if (((++i) < ZR_OPTION_MAX_ARRAY_SIZE) && (opt->value[i]))
+ ret += snprintf(errstr + ret, len - ret, ", ");
+ else
+ ret += snprintf(errstr + ret, len - ret, ".)");
+ }
+ return;
+}
static int
-xlator_option_validate_sizet (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+is_all_whitespaces(const char *value)
{
- uint64_t size = 0;
- int ret = 0;
- char errstr[256];
-
- /* Check the range */
- if (gf_string2bytesize (value, &size) != 0) {
- snprintf (errstr, 256,
- "invalid number format \"%s\" in option \"%s\"",
- value, key);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- ret = -1;
- goto out;
- }
+ int i = 0;
- if ((opt->min == 0) && (opt->max == 0)) {
- gf_log (xl->name, GF_LOG_TRACE,
- "no range check required for 'option %s %s'",
- key, value);
- goto out;
- }
+ if (value == NULL)
+ return -1;
- if ((size < opt->min) || (size > opt->max)) {
- if ((strncmp (key, "cache-size", 10) == 0) &&
- (size > opt->max)) {
- snprintf (errstr, 256, "Cache size %"PRId64" is out of "
- "range [%"PRId64" - %"PRId64"]",
- size, opt->min, opt->max);
- gf_log (xl->name, GF_LOG_WARNING, "%s", errstr);
- } else {
- snprintf (errstr, 256,
- "'%"PRId64"' in 'option %s %s' "
- "is out of range [%"PRId64" - %"PRId64"]",
- size, key, value, opt->min, opt->max);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- ret = -1;
- }
- }
+ for (i = 0; value[i] != '\0'; i++) {
+ if (value[i] == ' ')
+ continue;
+ else
+ return 0;
+ }
-out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ return 1;
}
-
static int
-xlator_option_validate_bool (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_str(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
{
- int ret = -1;
- char errstr[256];
- gf_boolean_t bool;
+ int ret = -1;
+ int i = 0;
+ /* Check if the '*str' is valid */
+ if (GF_OPTION_LIST_EMPTY(opt)) {
+ ret = 0;
+ goto out;
+ }
- /* Check if the value is one of
- '0|1|on|off|no|yes|true|false|enable|disable' */
+ if (is_all_whitespaces(value) == 1)
+ goto out;
- if (gf_string2boolean (value, &bool) != 0) {
- snprintf (errstr, 256,
- "option %s %s: '%s' is not a valid boolean value",
- key, value, value);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
+ for (i = 0; (i < ZR_OPTION_MAX_ARRAY_SIZE) && opt->value[i]; i++) {
+#ifdef GF_DARWIN_HOST_OS
+ if (fnmatch(opt->value[i], value, 0) == 0) {
+ ret = 0;
+ break;
+ }
+#else
+ if (fnmatch(opt->value[i], value, FNM_EXTMATCH) == 0) {
+ ret = 0;
+ break;
}
+#endif
+ }
+
+ if ((i == ZR_OPTION_MAX_ARRAY_SIZE) || (!opt->value[i]))
+ goto out;
+ /* enter here only if
+ * 1. reached end of opt->value array and haven't
+ * validated input
+ * OR
+ * 2. valid input list is less than
+ * ZR_OPTION_MAX_ARRAY_SIZE and input has not
+ * matched all possible input values.
+ */
+
+ ret = 0;
- ret = 0;
out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ if (ret) {
+ char errstr[4096];
+ set_error_str(errstr, sizeof(errstr), opt, key, value);
+
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ if (op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ }
+ return ret;
}
-
static int
-xlator_option_validate_xlator (xlator_t *xl, const char *key, const char *value,
+xlator_option_validate_percent(xlator_t *xl, const char *key, const char *value,
volume_option_t *opt, char **op_errstr)
{
- int ret = -1;
- char errstr[256];
- xlator_t *xlopt = NULL;
-
-
- /* Check if the value is one of the xlators */
- xlopt = xl;
- while (xlopt->prev)
- xlopt = xlopt->prev;
-
- while (xlopt) {
- if (strcmp (value, xlopt->name) == 0) {
- ret = 0;
- break;
- }
- xlopt = xlopt->next;
- }
-
- if (!xlopt) {
- snprintf (errstr, 256,
- "option %s %s: '%s' is not a valid volume name",
- key, value, value);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
-
- ret = 0;
+ double percent = 0;
+ int ret = -1;
+ char errstr[256];
+
+ /* Check if the value is valid percentage */
+ if (gf_string2percent(value, &percent) != 0) {
+ snprintf(errstr, 256, "invalid percent format \"%s\" in \"option %s\"",
+ value, key);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ if ((percent < 0.0) || (percent > 100.0)) {
+ snprintf(errstr, 256,
+ "'%lf' in 'option %s %s' is out of range [0 - 100]", percent,
+ key, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ ret = 0;
out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
}
-
static int
-xlator_option_validate_str (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_fractional_value(const char *value)
{
- int ret = -1;
- int i = 0;
- char errstr[256];
- char given_array[4096] = {0,};
-
- /* Check if the '*str' is valid */
- if (GF_OPTION_LIST_EMPTY(opt)) {
- ret = 0;
- goto out;
- }
+ const char *s = NULL;
+ int ret = 0;
- for (i = 0; (i < ZR_OPTION_MAX_ARRAY_SIZE) && opt->value[i]; i++) {
- #ifdef GF_DARWIN_HOST_OS
- if (fnmatch (opt->value[i], value, 0) == 0) {
- ret = 0;
- break;
- }
- #else
- if (fnmatch (opt->value[i], value, FNM_EXTMATCH) == 0) {
- ret = 0;
- break;
- }
- #endif
- }
-
- if (((i < ZR_OPTION_MAX_ARRAY_SIZE) && (!opt->value[i])) ||
- (i == ZR_OPTION_MAX_ARRAY_SIZE)) {
- /* enter here only if
- * 1. reached end of opt->value array and haven't
- * validated input
- * OR
- * 2. valid input list is less than
- * ZR_OPTION_MAX_ARRAY_SIZE and input has not
- * matched all possible input values.
- */
-
- for (i = 0; (i < ZR_OPTION_MAX_ARRAY_SIZE) && opt->value[i];) {
- strcat (given_array, opt->value[i]);
- if (((++i) < ZR_OPTION_MAX_ARRAY_SIZE) &&
- (opt->value[i]))
- strcat (given_array, ", ");
- else
- strcat (given_array, ".");
- }
- snprintf (errstr, 256,
- "option %s %s: '%s' is not valid "
- "(possible options are %s)",
- key, value, value, given_array);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
+ s = strchr(value, '.');
+ if (s) {
+ for (s = s + 1; *s != '\0'; s++) {
+ if (*s != '0') {
+ return -1;
+ }
}
+ }
- ret = 0;
-out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ return ret;
}
-
static int
-xlator_option_validate_percent (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_percent_or_sizet(xlator_t *xl, const char *key,
+ const char *value, volume_option_t *opt,
+ char **op_errstr)
{
- int ret = -1;
- char errstr[256];
- uint32_t percent = 0;
-
-
- /* Check if the value is valid percentage */
- if (gf_string2percent (value, &percent) != 0) {
- snprintf (errstr, 256,
- "invalid percent format \"%s\" in \"option %s\"",
- value, key);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ int ret = -1;
+ char errstr[256];
+ double size = 0;
+ gf_boolean_t is_percent = _gf_false;
+
+ if (gf_string2percent_or_bytesize(value, &size, &is_percent) == 0) {
+ if (is_percent) {
+ if ((size < 0.0) || (size > 100.0)) {
+ snprintf(errstr, sizeof(errstr),
+ "'%lf' in 'option %s %s' is out"
+ " of range [0 - 100]",
+ size, key, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE,
+ "error=%s", errstr, NULL);
goto out;
+ }
+ ret = 0;
+ goto out;
}
- if ((percent < 0) || (percent > 100)) {
- snprintf (errstr, 256,
- "'%d' in 'option %s %s' is out of range [0 - 100]",
- percent, key, value);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
+ /*Input value of size(in byte) should not be fractional*/
+ ret = xlator_option_validate_fractional_value(value);
+ if (ret) {
+ snprintf(errstr, sizeof(errstr),
+ "'%lf' in 'option %s"
+ " %s' should not be fractional value. Use "
+ "valid unsigned integer value.",
+ size, key, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
}
+ /* Check the range */
+ if ((opt->min == 0) && (opt->max == 0)) {
+ gf_msg_trace(xl->name, 0,
+ "no range check required "
+ "for 'option %s %s'",
+ key, value);
+ ret = 0;
+ goto out;
+ }
+ if ((size < opt->min) || (size > opt->max)) {
+ snprintf(errstr, 256,
+ "'%lf' in 'option %s %s'"
+ " is out of range [%.0f - %.0f]",
+ size, key, value, opt->min, opt->max);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
ret = 0;
-out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
-}
+ goto out;
+ }
-static int
-xlator_option_validate_percent_or_sizet (xlator_t *xl, const char *key,
- const char *value,
- volume_option_t *opt, char **op_errstr)
-{
- int ret = -1;
- char errstr[256];
- uint64_t size = 0;
- gf_boolean_t is_percent = _gf_false;
-
- if (gf_string2percent_or_bytesize (value, &size, &is_percent) == 0) {
- if (is_percent) {
- ret = 0;
- goto out;
- }
- /* Check the range */
- if ((opt->min == 0) && (opt->max == 0)) {
- gf_log (xl->name, GF_LOG_TRACE,
- "no range check required for "
- "'option %s %s'",
- key, value);
- ret = 0;
- goto out;
- }
- if ((size < opt->min) || (size > opt->max)) {
- snprintf (errstr, 256,
- "'%"PRId64"' in 'option %s %s'"
- " is out of range [%"PRId64" -"
- " %"PRId64"]",
- size, key, value, opt->min, opt->max);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
- ret = 0;
- goto out;
- }
-
- /* If control reaches here, invalid argument */
-
- snprintf (errstr, 256,
- "invalid number format \"%s\" in \"option %s\"",
- value, key);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
+ /* If control reaches here, invalid argument */
+ snprintf(errstr, 256, "invalid number format \"%s\" in \"option %s\"",
+ value, key);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s", errstr,
+ NULL);
out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
}
-
static int
-xlator_option_validate_time (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_time(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
{
- int ret = -1;
- char errstr[256];
- uint32_t input_time = 0;
-
- /* Check if the value is valid time */
- if (gf_string2time (value, &input_time) != 0) {
- snprintf (errstr, 256,
- "invalid time format \"%s\" in "
- "\"option %s\"",
- value, key);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
-
- if ((opt->min == 0) && (opt->max == 0)) {
- gf_log (xl->name, GF_LOG_TRACE,
- "no range check required for "
- "'option %s %s'",
- key, value);
- ret = 0;
- goto out;
- }
-
- if ((input_time < opt->min) || (input_time > opt->max)) {
- snprintf (errstr, 256,
- "'%"PRIu32"' in 'option %s %s' is "
- "out of range [%"PRId64" - %"PRId64"]",
- input_time, key, value,
- opt->min, opt->max);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
-
+ int ret = -1;
+ char errstr[256];
+ uint32_t input_time = 0;
+
+ /* Check if the value is valid time */
+ if (gf_string2time(value, &input_time) != 0) {
+ snprintf(errstr, 256,
+ "invalid time format \"%s\" in "
+ "\"option %s\"",
+ value, key);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ if ((opt->min == 0) && (opt->max == 0)) {
+ gf_msg_trace(xl->name, 0,
+ "no range check required for "
+ "'option %s %s'",
+ key, value);
ret = 0;
+ goto out;
+ }
+
+ if ((input_time < opt->min) || (input_time > opt->max)) {
+ snprintf(errstr, 256,
+ "'%" PRIu32
+ "' in 'option %s %s' is "
+ "out of range [%.0f - %.0f]",
+ input_time, key, value, opt->min, opt->max);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ ret = 0;
out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
}
-
static int
-xlator_option_validate_double (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_double(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
{
- int ret = -1;
- char errstr[256];
- double val = 0.0;
-
- /* Check if the value is valid double */
- if (gf_string2double (value, &val) != 0) {
- snprintf (errstr, 256,
- "invalid double \"%s\" in \"option %s\"",
- value, key);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
- }
-
- if (val < 0.0) {
- snprintf (errstr, 256,
- "invalid double \"%s\" in \"option %s\"",
- value, key);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
+ double input = 0.0;
+ int ret = -1;
+ char errstr[256];
+
+ /* Check the range */
+ if (gf_string2double(value, &input) != 0) {
+ snprintf(errstr, 256, "invalid number format \"%s\" in option \"%s\"",
+ value, key);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ if ((opt->min == 0) && (opt->max == 0) &&
+ (opt->validate == GF_OPT_VALIDATE_BOTH)) {
+ gf_msg_trace(xl->name, 0,
+ "no range check required for "
+ "'option %s %s'",
+ key, value);
+ ret = 0;
+ goto out;
+ }
+
+ if (opt->validate == GF_OPT_VALIDATE_MIN) {
+ if (input < opt->min) {
+ snprintf(errstr, 256,
+ "'%f' in 'option %s %s' is smaller than "
+ "minimum value '%f'",
+ input, key, value, opt->min);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
}
-
- if ((opt->min == 0) && (opt->max == 0)) {
- gf_log (xl->name, GF_LOG_TRACE,
- "no range check required for 'option %s %s'",
- key, value);
- ret = 0;
- goto out;
+ } else if (opt->validate == GF_OPT_VALIDATE_MAX) {
+ if (input > opt->max) {
+ snprintf(errstr, 256,
+ "'%f' in 'option %s %s' is greater than "
+ "maximum value '%f'",
+ input, key, value, opt->max);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ goto out;
}
-
- ret = 0;
+ } else if ((input < opt->min) || (input > opt->max)) {
+ snprintf(errstr, 256,
+ "'%f' in 'option %s %s' is out of range "
+ "[%f - %f]",
+ input, key, value, opt->min, opt->max);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_OUT_OF_RANGE, "error=%s",
+ errstr, NULL);
+ goto out;
+ }
+
+ ret = 0;
out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup (errstr);
- return ret;
+ if (ret && op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ return ret;
}
-
static int
-xlator_option_validate_addr (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_addr(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
{
- int ret = -1;
- char errstr[256];
-
- if (!valid_internet_address ((char *)value, _gf_false)) {
- snprintf (errstr, 256,
- "option %s %s: '%s' is not a valid internet-address,"
- " it does not conform to standards.",
- key, value, value);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- if (op_errstr)
- *op_errstr = gf_strdup (errstr);
- }
+ int ret = -1;
+ char errstr[256];
- ret = 0;
+ if (!valid_internet_address((char *)value, _gf_false, _gf_false)) {
+ snprintf(errstr, 256, "option %s %s: Can not parse %s address", key,
+ value, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ if (op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ }
- return ret;
+ ret = 0;
+
+ return ret;
}
-static int
-xlator_option_validate_addr_list (xlator_t *xl, const char *key,
- const char *value, volume_option_t *opt,
- char **op_errstr)
+int
+xlator_option_validate_addr_list(xlator_t *xl, const char *key,
+ const char *value, volume_option_t *opt,
+ char **op_errstr)
{
- int ret = -1;
- char *dup_val = NULL;
- char *addr_tok = NULL;
- char *save_ptr = NULL;
- char errstr[256];
-
- dup_val = gf_strdup (value);
- if (!dup_val) {
- ret = -1;
- snprintf (errstr, 256, "internal error, out of memory.");
+ int ret = -1;
+ char *dup_val = NULL;
+ char *addr_tok = NULL;
+ char *save_ptr = NULL;
+ char *entry = NULL;
+ char *entry_ptr = NULL;
+ char *dir_and_addr = NULL;
+ char *addr_ptr = NULL;
+ char *addr_list = NULL;
+ char *addr = NULL;
+ char *dir = NULL;
+
+ dup_val = gf_strdup(value);
+ if (!dup_val)
+ goto out;
+
+ if (dup_val[0] != '/' && !strchr(dup_val, '(')) {
+ /* Possible old format, handle it for back-ward compatibility */
+ addr_tok = strtok_r(dup_val, ",", &save_ptr);
+ while (addr_tok) {
+ if (!valid_internet_address(addr_tok, _gf_true, _gf_true))
goto out;
- }
- addr_tok = strtok_r (dup_val, ",", &save_ptr);
- while (addr_tok) {
- if (!valid_internet_address (addr_tok, _gf_true)) {
- snprintf (errstr, 256,
- "option %s %s: '%s' is not a valid "
- "internet-address-list",
- key, value, value);
- gf_log (xl->name, GF_LOG_ERROR, "%s", errstr);
- ret = -1;
- goto out;
- }
- addr_tok = strtok_r (NULL, ",", &save_ptr);
+ addr_tok = strtok_r(NULL, ",", &save_ptr);
}
ret = 0;
- out:
- if (op_errstr && ret)
- *op_errstr = gf_strdup (errstr);
- if (dup_val)
- GF_FREE (dup_val);
+ goto out;
+ }
+
+ /* Lets handle the value with new format */
+ entry = strtok_r(dup_val, ",", &entry_ptr);
+ while (entry) {
+ dir_and_addr = gf_strdup(entry);
+ if (!dir_and_addr)
+ goto out;
+
+ dir = strtok_r(dir_and_addr, "(", &addr_ptr);
+ if (dir[0] != '/') {
+ /* Valid format should be starting from '/' */
+ goto out;
+ }
+ /* dir = strtok_r (NULL, " =", &addr_tmp); */
+ addr = strtok_r(NULL, ")", &addr_ptr);
+ if (!addr)
+ goto out;
+
+ addr_list = gf_strdup(addr);
+ if (!addr_list)
+ goto out;
+
+ /* This format be separated by '|' */
+ addr_tok = strtok_r(addr_list, "|", &save_ptr);
+ if (addr_tok == NULL)
+ goto out;
+ while (addr_tok) {
+ if (!valid_internet_address(addr_tok, _gf_true, _gf_true))
+ goto out;
- return ret;
+ addr_tok = strtok_r(NULL, "|", &save_ptr);
+ }
+ entry = strtok_r(NULL, ",", &entry_ptr);
+ GF_FREE(dir_and_addr);
+ GF_FREE(addr_list);
+ addr_list = NULL;
+ dir_and_addr = NULL;
+ }
+
+ ret = 0;
+
+out:
+ if (ret) {
+ char errstr[4096];
+ snprintf(errstr, sizeof(errstr),
+ "option %s %s: '%s' is not "
+ "a valid internet-address-list",
+ key, value, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ if (op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ }
+ GF_FREE(dup_val);
+ GF_FREE(dir_and_addr);
+ GF_FREE(addr_list);
+ return ret;
+}
+
+static int
+xlator_option_validate_mntauth(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
+{
+ int ret = -1;
+ char *dup_val = NULL;
+ char *addr_tok = NULL;
+ char *save_ptr = NULL;
+
+ dup_val = gf_strdup(value);
+ if (!dup_val)
+ goto out;
+
+ addr_tok = strtok_r(dup_val, ",", &save_ptr);
+ if (addr_tok == NULL)
+ goto out;
+ while (addr_tok) {
+ if (!valid_mount_auth_address(addr_tok))
+ goto out;
+
+ addr_tok = strtok_r(NULL, ",", &save_ptr);
+ }
+ ret = 0;
+
+out:
+ if (ret) {
+ char errstr[4096];
+ snprintf(errstr, sizeof(errstr),
+ "option %s %s: '%s' is not "
+ "a valid mount-auth-address",
+ key, value, value);
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY, "error=%s",
+ errstr, NULL);
+ if (op_errstr)
+ *op_errstr = gf_strdup(errstr);
+ }
+ GF_FREE(dup_val);
+
+ return ret;
}
/*XXX: the rules to validate are as per block-size required for stripe xlator */
static int
-gf_validate_size (const char *sizestr, volume_option_t *opt)
+gf_validate_size(const char *sizestr, volume_option_t *opt)
{
- uint64_t value = 0;
- int ret = 0;
+ uint64_t value = 0;
+ int ret = 0;
- GF_ASSERT (opt);
+ GF_ASSERT(opt);
- if (gf_string2bytesize (sizestr, &value) != 0 ||
- value < opt->min ||
- value % 512) {
- ret = -1;
- goto out;
- }
+ if (gf_string2bytesize_uint64(sizestr, &value) != 0 || value < opt->min ||
+ value % 512) {
+ ret = -1;
+ goto out;
+ }
- out:
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+out:
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
}
static int
-gf_validate_number (const char *numstr, volume_option_t *opt)
+gf_validate_number(const char *numstr, volume_option_t *opt)
{
- int32_t value;
- return gf_string2int32 (numstr, &value);
+ int32_t value;
+ return gf_string2int32(numstr, &value);
}
/* Parses the string to be of the form <key1>:<value1>,<key2>:<value2>... *
* takes two optional validaters key_validator and value_validator */
static int
-validate_list_elements (const char *string, volume_option_t *opt,
- int (key_validator)( const char *),
- int (value_validator)( const char *, volume_option_t *))
+validate_list_elements(const char *string, volume_option_t *opt,
+ int(key_validator)(const char *),
+ int(value_validator)(const char *, volume_option_t *))
{
+ char *dup_string = NULL;
+ char *str_sav = NULL;
+ char *substr_sav = NULL;
+ char *str_ptr = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ int ret = 0;
+
+ GF_ASSERT(string);
+
+ dup_string = gf_strdup(string);
+ if (NULL == dup_string)
+ goto out;
+
+ str_ptr = strtok_r(dup_string, ",", &str_sav);
+ if (str_ptr == NULL) {
+ ret = -1;
+ goto out;
+ }
+ while (str_ptr) {
+ key = strtok_r(str_ptr, ":", &substr_sav);
+ if (!key || (key_validator && key_validator(key))) {
+ ret = -1;
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_ENTRY,
+ "list=%s", string, "key=%s", key ? key : "", NULL);
+ goto out;
+ }
- char *dup_string = NULL;
- char *str_sav = NULL;
- char *substr_sav = NULL;
- char *str_ptr = NULL;
- char *key = NULL;
- char *value = NULL;
- int ret = 0;
-
- GF_ASSERT (string);
+ value = strtok_r(NULL, ":", &substr_sav);
+ if (!value || (value_validator && value_validator(value, opt))) {
+ ret = -1;
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0, LG_MSG_INVALID_ENTRY,
+ "list=%s", string, "value=%s", key, NULL);
+ goto out;
+ }
- dup_string = gf_strdup (string);
- if (NULL == dup_string)
- goto out;
+ str_ptr = strtok_r(NULL, ",", &str_sav);
+ substr_sav = NULL;
+ }
- str_ptr = strtok_r (dup_string, ",", &str_sav);
- while (str_ptr) {
-
- key = strtok_r (str_ptr, ":", &substr_sav);
- if (!key ||
- (key_validator && key_validator(key))) {
- ret = -1;
- gf_log (THIS->name, GF_LOG_WARNING,
- "invalid list '%s', key '%s' not valid.",
- string, key);
- goto out;
- }
-
- value = strtok_r (NULL, ":", &substr_sav);
- if (!value ||
- (value_validator && value_validator(value, opt))) {
- ret = -1;
- gf_log (THIS->name, GF_LOG_WARNING,
- "invalid list '%s', value '%s' not valid.",
- string, key);
- goto out;
- }
-
- str_ptr = strtok_r (NULL, ",", &str_sav);
- substr_sav = NULL;
- }
- out:
- if (dup_string)
- GF_FREE (dup_string);
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+out:
+ GF_FREE(dup_string);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
}
static int
-xlator_option_validate_priority_list (xlator_t *xl, const char *key,
- const char *value, volume_option_t *opt,
- char **op_errstr)
+xlator_option_validate_priority_list(xlator_t *xl, const char *key,
+ const char *value, volume_option_t *opt,
+ char **op_errstr)
{
- int ret =0;
- char errstr[1024] = {0, };
-
- GF_ASSERT (value);
-
- ret = validate_list_elements (value, opt, NULL, &gf_validate_number);
- if (ret) {
- snprintf (errstr, 1024,
- "option %s %s: '%s' is not a valid "
- "priority-list", key, value, value);
- *op_errstr = gf_strdup (errstr);
- }
-
- return ret;
+ int ret = 0;
+ char errstr[1024] = {
+ 0,
+ };
+
+ GF_ASSERT(value);
+
+ ret = validate_list_elements(value, opt, NULL, &gf_validate_number);
+ if (ret) {
+ snprintf(errstr, 1024,
+ "option %s %s: '%s' is not a valid "
+ "priority-list",
+ key, value, value);
+ *op_errstr = gf_strdup(errstr);
+ }
+
+ return ret;
}
static int
-xlator_option_validate_size_list (xlator_t *xl, const char *key,
- const char *value, volume_option_t *opt,
- char **op_errstr)
+xlator_option_validate_size_list(xlator_t *xl, const char *key,
+ const char *value, volume_option_t *opt,
+ char **op_errstr)
{
-
- int ret = 0;
- char errstr[1024] = {0, };
-
- GF_ASSERT (value);
-
- ret = gf_validate_size (value, opt);
- if (ret)
- ret = validate_list_elements (value, opt, NULL, &gf_validate_size);
-
- if (ret) {
- snprintf (errstr, 1024,
- "option %s %s: '%s' is not a valid "
- "size-list", key, value, value);
- *op_errstr = gf_strdup (errstr);
- }
-
- return ret;
-
+ int ret = 0;
+ char errstr[1024] = {
+ 0,
+ };
+
+ GF_ASSERT(value);
+
+ ret = gf_validate_size(value, opt);
+ if (ret)
+ ret = validate_list_elements(value, opt, NULL, &gf_validate_size);
+
+ if (ret) {
+ snprintf(errstr, 1024,
+ "option %s %s: '%s' is not a valid "
+ "size-list",
+ key, value, value);
+ *op_errstr = gf_strdup(errstr);
+ }
+
+ return ret;
}
static int
-xlator_option_validate_any (xlator_t *xl, const char *key, const char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate_any(xlator_t *xl, const char *key, const char *value,
+ volume_option_t *opt, char **op_errstr)
{
- return 0;
+ return 0;
}
-typedef int (xlator_option_validator_t) (xlator_t *xl, const char *key,
- const char *value,
- volume_option_t *opt, char **operrstr);
+typedef int(xlator_option_validator_t)(xlator_t *xl, const char *key,
+ const char *value, volume_option_t *opt,
+ char **operrstr);
int
-xlator_option_validate (xlator_t *xl, char *key, char *value,
- volume_option_t *opt, char **op_errstr)
+xlator_option_validate(xlator_t *xl, char *key, char *value,
+ volume_option_t *opt, char **op_errstr)
{
- int ret = -1;
- xlator_option_validator_t *validate;
- xlator_option_validator_t *validators[] = {
- [GF_OPTION_TYPE_PATH] = xlator_option_validate_path,
- [GF_OPTION_TYPE_INT] = xlator_option_validate_int,
- [GF_OPTION_TYPE_SIZET] = xlator_option_validate_sizet,
- [GF_OPTION_TYPE_BOOL] = xlator_option_validate_bool,
- [GF_OPTION_TYPE_XLATOR] = xlator_option_validate_xlator,
- [GF_OPTION_TYPE_STR] = xlator_option_validate_str,
- [GF_OPTION_TYPE_PERCENT] = xlator_option_validate_percent,
- [GF_OPTION_TYPE_PERCENT_OR_SIZET] =
- xlator_option_validate_percent_or_sizet,
- [GF_OPTION_TYPE_TIME] = xlator_option_validate_time,
- [GF_OPTION_TYPE_DOUBLE] = xlator_option_validate_double,
- [GF_OPTION_TYPE_INTERNET_ADDRESS] = xlator_option_validate_addr,
- [GF_OPTION_TYPE_INTERNET_ADDRESS_LIST] =
- xlator_option_validate_addr_list,
- [GF_OPTION_TYPE_PRIORITY_LIST] =
- xlator_option_validate_priority_list,
- [GF_OPTION_TYPE_SIZE_LIST] = xlator_option_validate_size_list,
- [GF_OPTION_TYPE_ANY] = xlator_option_validate_any,
- [GF_OPTION_TYPE_MAX] = NULL,
- };
-
- if (opt->type < 0 || opt->type >= GF_OPTION_TYPE_MAX) {
- gf_log (xl->name, GF_LOG_ERROR,
- "unknown option type '%d'", opt->type);
- goto out;
- }
-
- validate = validators[opt->type];
-
- ret = validate (xl, key, value, opt, op_errstr);
+ int ret = -1;
+ xlator_option_validator_t *validate;
+ xlator_option_validator_t *validators[] = {
+ [GF_OPTION_TYPE_PATH] = xlator_option_validate_path,
+ [GF_OPTION_TYPE_INT] = xlator_option_validate_int,
+ [GF_OPTION_TYPE_SIZET] = xlator_option_validate_sizet,
+ [GF_OPTION_TYPE_BOOL] = xlator_option_validate_bool,
+ [GF_OPTION_TYPE_XLATOR] = xlator_option_validate_xlator,
+ [GF_OPTION_TYPE_STR] = xlator_option_validate_str,
+ [GF_OPTION_TYPE_PERCENT] = xlator_option_validate_percent,
+ [GF_OPTION_TYPE_PERCENT_OR_SIZET] =
+ xlator_option_validate_percent_or_sizet,
+ [GF_OPTION_TYPE_TIME] = xlator_option_validate_time,
+ [GF_OPTION_TYPE_DOUBLE] = xlator_option_validate_double,
+ [GF_OPTION_TYPE_INTERNET_ADDRESS] = xlator_option_validate_addr,
+ [GF_OPTION_TYPE_INTERNET_ADDRESS_LIST] =
+ xlator_option_validate_addr_list,
+ [GF_OPTION_TYPE_PRIORITY_LIST] = xlator_option_validate_priority_list,
+ [GF_OPTION_TYPE_SIZE_LIST] = xlator_option_validate_size_list,
+ [GF_OPTION_TYPE_ANY] = xlator_option_validate_any,
+ [GF_OPTION_TYPE_CLIENT_AUTH_ADDR] = xlator_option_validate_mntauth,
+ [GF_OPTION_TYPE_MAX] = NULL,
+ };
+
+ if (opt->type > GF_OPTION_TYPE_MAX) {
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_UNKNOWN_OPTION_TYPE,
+ "type=%d", opt->type, NULL);
+ goto out;
+ }
+
+ validate = validators[opt->type];
+
+ ret = validate(xl, key, value, opt, op_errstr);
out:
- return ret;
+ return ret;
}
-
-static volume_option_t *
-xlator_volume_option_get_list (volume_opt_list_t *vol_list, const char *key)
+volume_option_t *
+xlator_volume_option_get_list(volume_opt_list_t *vol_list, const char *key)
{
- volume_option_t *opt = NULL;
- volume_opt_list_t *opt_list = NULL;
- volume_option_t *found = NULL;
- int index = 0;
- int i = 0;
- char *cmp_key = NULL;
-
- if (!vol_list->given_opt) {
- opt_list = list_entry (vol_list->list.next, volume_opt_list_t,
- list);
- opt = opt_list->given_opt;
- } else
- opt = vol_list->given_opt;
-
- for (index = 0; opt[index].key[0]; index++) {
- for (i = 0; i < ZR_VOLUME_MAX_NUM_KEY; i++) {
- cmp_key = opt[index].key[i];
- if (!cmp_key)
- break;
- if (fnmatch (cmp_key, key, FNM_NOESCAPE) == 0) {
- found = &opt[index];
- goto out;
- }
- }
+ volume_option_t *opt = NULL;
+ volume_opt_list_t *opt_list = NULL;
+ int index = 0;
+ int i = 0;
+ char *cmp_key = NULL;
+
+ if (!vol_list->given_opt) {
+ opt_list = list_entry(vol_list->list.next, volume_opt_list_t, list);
+ opt = opt_list->given_opt;
+ } else
+ opt = vol_list->given_opt;
+
+ for (index = 0; opt[index].key[0]; index++) {
+ for (i = 0; i < ZR_VOLUME_MAX_NUM_KEY; i++) {
+ cmp_key = opt[index].key[i];
+ if (!cmp_key)
+ break;
+ if (fnmatch(cmp_key, key, FNM_NOESCAPE) == 0) {
+ return &opt[index];
+ }
}
-out:
- return found;
-}
+ }
+ return NULL;
+}
volume_option_t *
-xlator_volume_option_get (xlator_t *xl, const char *key)
+xlator_volume_option_get(xlator_t *xl, const char *key)
{
- volume_opt_list_t *vol_list = NULL;
- volume_option_t *found = NULL;
+ volume_opt_list_t *vol_list = NULL;
+ volume_option_t *found = NULL;
- list_for_each_entry (vol_list, &xl->volume_options, list) {
- found = xlator_volume_option_get_list (vol_list, key);
- if (found)
- break;
- }
+ list_for_each_entry(vol_list, &xl->volume_options, list)
+ {
+ found = xlator_volume_option_get_list(vol_list, key);
+ if (found)
+ break;
+ }
- return found;
+ return found;
}
-
-static void
-xl_opt_validate (dict_t *dict, char *key, data_t *value, void *data)
+static int
+xl_opt_validate(dict_t *dict, char *key, data_t *value, void *data)
{
- xlator_t *xl = NULL;
- volume_opt_list_t *vol_opt = NULL;
- volume_option_t *opt = NULL;
- int ret = 0;
- char *errstr = NULL;
-
- struct {
- xlator_t *this;
- volume_opt_list_t *vol_opt;
- char *errstr;
- } *stub;
-
- stub = data;
- xl = stub->this;
- vol_opt = stub->vol_opt;
-
- opt = xlator_volume_option_get_list (vol_opt, key);
- if (!opt)
- return;
-
- ret = xlator_option_validate (xl, key, value->data, opt, &errstr);
- if (ret)
- gf_log (xl->name, GF_LOG_WARNING, "validate of %s returned %d",
- key, ret);
-
- if (errstr)
- /* possible small leak of previously set stub->errstr */
- stub->errstr = errstr;
-
- if (fnmatch (opt->key[0], key, FNM_NOESCAPE) != 0) {
- gf_log (xl->name, GF_LOG_WARNING, "option '%s' is deprecated, "
- "preferred is '%s', continuing with correction",
- key, opt->key[0]);
- dict_set (dict, opt->key[0], value);
- dict_del (dict, key);
- }
- return;
-}
+ xlator_t *xl = NULL;
+ volume_opt_list_t *vol_opt = NULL;
+ volume_option_t *opt = NULL;
+ int ret = 0;
+ char *errstr = NULL;
+
+ struct {
+ xlator_t *this;
+ volume_opt_list_t *vol_opt;
+ char *errstr;
+ } * stub;
+
+ stub = data;
+ xl = stub->this;
+ vol_opt = stub->vol_opt;
+
+ opt = xlator_volume_option_get_list(vol_opt, key);
+ if (!opt)
+ return 0;
+ ret = xlator_option_validate(xl, key, value->data, opt, &errstr);
+ if (ret)
+ gf_smsg(xl->name, GF_LOG_WARNING, 0, LG_MSG_VALIDATE_RETURNS, "key=%s",
+ key, "ret=%d", ret, NULL);
+
+ if (errstr)
+ /* possible small leak of previously set stub->errstr */
+ stub->errstr = errstr;
+
+ if (fnmatch(opt->key[0], key, FNM_NOESCAPE) != 0) {
+ gf_smsg(xl->name, GF_LOG_DEBUG, 0, LG_MSG_OPTION_DEPRECATED, "key=%s",
+ key, "preferred=%s", opt->key[0], NULL);
+ dict_set(dict, opt->key[0], value);
+ dict_del(dict, key);
+ }
+ return 0;
+}
int
-xlator_options_validate_list (xlator_t *xl, dict_t *options,
- volume_opt_list_t *vol_opt, char **op_errstr)
+xlator_options_validate_list(xlator_t *xl, dict_t *options,
+ volume_opt_list_t *vol_opt, char **op_errstr)
{
- int ret = 0;
- struct {
- xlator_t *this;
- volume_opt_list_t *vol_opt;
- char *errstr;
- } stub;
-
- stub.this = xl;
- stub.vol_opt = vol_opt;
- stub.errstr = NULL;
-
- dict_foreach (options, xl_opt_validate, &stub);
- if (stub.errstr) {
- ret = -1;
- if (op_errstr)
- *op_errstr = stub.errstr;
- }
-
- return ret;
+ int ret = 0;
+ struct {
+ xlator_t *this;
+ volume_opt_list_t *vol_opt;
+ char *errstr;
+ } stub;
+
+ stub.this = xl;
+ stub.vol_opt = vol_opt;
+ stub.errstr = NULL;
+
+ dict_foreach(options, xl_opt_validate, &stub);
+ if (stub.errstr) {
+ ret = -1;
+ if (op_errstr)
+ *op_errstr = stub.errstr;
+ }
+
+ return ret;
}
-
int
-xlator_options_validate (xlator_t *xl, dict_t *options, char **op_errstr)
+xlator_options_validate(xlator_t *xl, dict_t *options, char **op_errstr)
{
- int ret = 0;
- volume_opt_list_t *vol_opt = NULL;
-
-
- if (!xl) {
- gf_log (THIS->name, GF_LOG_DEBUG, "'this' not a valid ptr");
- ret = -1;
- goto out;
- }
-
- if (list_empty (&xl->volume_options))
- goto out;
-
- list_for_each_entry (vol_opt, &xl->volume_options, list) {
- ret = xlator_options_validate_list (xl, options, vol_opt,
- op_errstr);
- }
+ int ret = 0;
+ volume_opt_list_t *vol_opt = NULL;
+
+ if (!xl) {
+ gf_msg_debug(THIS->name, 0, "'this' not a valid ptr");
+ ret = -1;
+ goto out;
+ }
+
+ if (list_empty(&xl->volume_options))
+ goto out;
+
+ list_for_each_entry(vol_opt, &xl->volume_options, list)
+ {
+ ret = xlator_options_validate_list(xl, options, vol_opt, op_errstr);
+ }
out:
- return ret;
+ return ret;
}
-
int
-xlator_validate_rec (xlator_t *xlator, char **op_errstr)
+xlator_validate_rec(xlator_t *xlator, char **op_errstr)
{
- int ret = -1;
- xlator_list_t *trav = NULL;
- xlator_t *old_THIS = NULL;
+ int ret = -1;
+ xlator_list_t *trav = NULL;
+ xlator_t *old_THIS = NULL;
- GF_VALIDATE_OR_GOTO ("xlator", xlator, out);
+ GF_VALIDATE_OR_GOTO("xlator", xlator, out);
- trav = xlator->children;
+ trav = xlator->children;
- while (trav) {
- if (xlator_validate_rec (trav->xlator, op_errstr)) {
- gf_log ("xlator", GF_LOG_WARNING, "validate_rec failed");
- goto out;
- }
-
- trav = trav->next;
+ while (trav) {
+ if (xlator_validate_rec(trav->xlator, op_errstr)) {
+ gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_VALIDATE_REC_FAILED,
+ NULL);
+ goto out;
}
- if (xlator_dynload (xlator))
- gf_log (xlator->name, GF_LOG_DEBUG, "Did not load the symbols");
+ trav = trav->next;
+ }
- old_THIS = THIS;
- THIS = xlator;
+ if (xlator_dynload(xlator))
+ gf_msg_debug(xlator->name, 0, "Did not load the symbols");
- /* Need this here, as this graph has not yet called init() */
- if (!xlator->mem_acct.num_types) {
- if (!xlator->mem_acct_init)
- xlator->mem_acct_init = default_mem_acct_init;
- xlator->mem_acct_init (xlator);
- }
+ old_THIS = THIS;
+ THIS = xlator;
- ret = xlator_options_validate (xlator, xlator->options, op_errstr);
- THIS = old_THIS;
+ /* Need this here, as this graph has not yet called init() */
+ if (!xlator->mem_acct) {
+ if (!xlator->mem_acct_init)
+ xlator->mem_acct_init = default_mem_acct_init;
+ xlator->mem_acct_init(xlator);
+ }
- if (ret) {
- gf_log (xlator->name, GF_LOG_INFO, "%s", *op_errstr);
- goto out;
- }
+ ret = xlator_options_validate(xlator, xlator->options, op_errstr);
+ THIS = old_THIS;
- gf_log (xlator->name, GF_LOG_DEBUG, "Validated options");
+ if (ret) {
+ gf_smsg(xlator->name, GF_LOG_INFO, 0, LG_MSG_INVALID_ENTRY, "%s",
+ *op_errstr, NULL);
+ goto out;
+ }
- ret = 0;
+ gf_msg_debug(xlator->name, 0, "Validated options");
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-graph_reconf_validateopt (glusterfs_graph_t *graph, char **op_errstr)
+graph_reconf_validateopt(glusterfs_graph_t *graph, char **op_errstr)
{
- xlator_t *xlator = NULL;
- int ret = -1;
+ xlator_t *xlator = NULL;
+ int ret = -1;
- GF_ASSERT (graph);
+ GF_ASSERT(graph);
- xlator = graph->first;
+ xlator = graph->first;
- ret = xlator_validate_rec (xlator, op_errstr);
+ ret = xlator_validate_rec(xlator, op_errstr);
- return ret;
+ return ret;
}
-
static int
-xlator_reconfigure_rec (xlator_t *old_xl, xlator_t *new_xl)
+xlator_reconfigure_rec(xlator_t *old_xl, xlator_t *new_xl)
{
- xlator_list_t *trav1 = NULL;
- xlator_list_t *trav2 = NULL;
- int32_t ret = -1;
- xlator_t *old_THIS = NULL;
+ xlator_list_t *trav1 = NULL;
+ xlator_list_t *trav2 = NULL;
+ int32_t ret = -1;
+ xlator_t *old_THIS = NULL;
- GF_VALIDATE_OR_GOTO ("xlator", old_xl, out);
- GF_VALIDATE_OR_GOTO ("xlator", new_xl, out);
+ GF_VALIDATE_OR_GOTO("xlator", old_xl, out);
+ GF_VALIDATE_OR_GOTO("xlator", new_xl, out);
- trav1 = old_xl->children;
- trav2 = new_xl->children;
+ trav1 = old_xl->children;
+ trav2 = new_xl->children;
- while (trav1 && trav2) {
- ret = xlator_reconfigure_rec (trav1->xlator, trav2->xlator);
- if (ret)
- goto out;
+ while (trav1 && trav2) {
+ ret = xlator_reconfigure_rec(trav1->xlator, trav2->xlator);
+ if (ret)
+ goto out;
- gf_log (trav1->xlator->name, GF_LOG_DEBUG, "reconfigured");
+ gf_msg_debug(trav1->xlator->name, 0, "reconfigured");
- trav1 = trav1->next;
- trav2 = trav2->next;
- }
+ trav1 = trav1->next;
+ trav2 = trav2->next;
+ }
- if (old_xl->reconfigure) {
- old_THIS = THIS;
- THIS = old_xl;
+ if (old_xl->reconfigure) {
+ old_THIS = THIS;
+ THIS = old_xl;
- ret = old_xl->reconfigure (old_xl, new_xl->options);
+ xlator_init_lock();
+ handle_default_options(old_xl, new_xl->options);
+ ret = old_xl->reconfigure(old_xl, new_xl->options);
+ xlator_init_unlock();
- THIS = old_THIS;
+ THIS = old_THIS;
- if (ret)
- goto out;
- } else {
- gf_log (old_xl->name, GF_LOG_DEBUG, "No reconfigure() found");
- }
+ if (ret)
+ goto out;
+ } else {
+ gf_msg_debug(old_xl->name, 0, "No reconfigure() found");
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-xlator_tree_reconfigure (xlator_t *old_xl, xlator_t *new_xl)
+xlator_tree_reconfigure(xlator_t *old_xl, xlator_t *new_xl)
{
- xlator_t *new_top = NULL;
- xlator_t *old_top = NULL;
+ xlator_t *new_top = NULL;
+ xlator_t *old_top = NULL;
- GF_ASSERT (old_xl);
- GF_ASSERT (new_xl);
+ GF_ASSERT(old_xl);
+ GF_ASSERT(new_xl);
- old_top = old_xl;
- new_top = new_xl;
+ old_top = old_xl;
+ new_top = new_xl;
- return xlator_reconfigure_rec (old_top, new_top);
+ return xlator_reconfigure_rec(old_top, new_top);
}
-
int
-xlator_option_info_list (volume_opt_list_t *list, char *key,
- char **def_val, char **descr)
+xlator_option_info_list(volume_opt_list_t *list, char *key, char **def_val,
+ char **descr)
{
- int ret = -1;
- volume_option_t *opt = NULL;
+ int ret = -1;
+ volume_option_t *opt = NULL;
+ opt = xlator_volume_option_get_list(list, key);
+ if (!opt)
+ goto out;
- opt = xlator_volume_option_get_list (list, key);
- if (!opt)
- goto out;
-
- if (def_val)
- *def_val = opt->default_value;
- if (descr)
- *descr = opt->description;
+ if (def_val)
+ *def_val = opt->default_value;
+ if (descr)
+ *descr = opt->description;
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
static int
-pass (char *in, char **out)
+pass(char *in, char **out)
{
- *out = in;
- return 0;
+ *out = in;
+ return 0;
}
-
static int
-xl_by_name (char *in, xlator_t **out)
+xl_by_name(char *in, xlator_t **out)
{
- xlator_t *xl = NULL;
+ xlator_t *xl = NULL;
- xl = xlator_search_by_name (THIS, in);
+ xl = xlator_search_by_name(THIS, in);
- if (!xl)
- return -1;
- *out = xl;
- return 0;
+ if (!xl)
+ return -1;
+ *out = xl;
+ return 0;
}
-
static int
-pc_or_size (char *in, uint64_t *out)
+pc_or_size(char *in, double *out)
{
- uint32_t pc = 0;
- int ret = 0;
-
- if (gf_string2percent (in, &pc) == 0) {
- if (pc > 100) {
- ret = gf_string2bytesize (in, out);
- } else {
- *out = pc;
- }
+ double pc = 0;
+ int ret = 0;
+ uint64_t size = 0;
+
+ if (gf_string2percent(in, &pc) == 0) {
+ if (pc > 100.0) {
+ ret = gf_string2bytesize_uint64(in, &size);
+ if (!ret)
+ *out = size;
} else {
- ret = gf_string2bytesize (in, out);
+ *out = pc;
}
- return ret;
+ } else {
+ ret = gf_string2bytesize_uint64(in, &size);
+ if (!ret)
+ *out = size;
+ }
+ return ret;
}
-
DEFINE_INIT_OPT(char *, str, pass);
DEFINE_INIT_OPT(uint64_t, uint64, gf_string2uint64);
DEFINE_INIT_OPT(int64_t, int64, gf_string2int64);
DEFINE_INIT_OPT(uint32_t, uint32, gf_string2uint32);
DEFINE_INIT_OPT(int32_t, int32, gf_string2int32);
-DEFINE_INIT_OPT(uint64_t, size, gf_string2bytesize);
-DEFINE_INIT_OPT(uint32_t, percent, gf_string2percent);
-DEFINE_INIT_OPT(uint64_t, percent_or_size, pc_or_size);
+DEFINE_INIT_OPT(uint64_t, size, gf_string2bytesize_uint64);
+DEFINE_INIT_OPT(uint64_t, size_uint64, gf_string2bytesize_uint64);
+DEFINE_INIT_OPT(double, percent, gf_string2percent);
+DEFINE_INIT_OPT(double, percent_or_size, pc_or_size);
DEFINE_INIT_OPT(gf_boolean_t, bool, gf_string2boolean);
DEFINE_INIT_OPT(xlator_t *, xlator, xl_by_name);
DEFINE_INIT_OPT(char *, path, pass);
-
-
+DEFINE_INIT_OPT(double, double, gf_string2double);
+DEFINE_INIT_OPT(uint32_t, time, gf_string2time);
DEFINE_RECONF_OPT(char *, str, pass);
DEFINE_RECONF_OPT(uint64_t, uint64, gf_string2uint64);
DEFINE_RECONF_OPT(int64_t, int64, gf_string2int64);
DEFINE_RECONF_OPT(uint32_t, uint32, gf_string2uint32);
DEFINE_RECONF_OPT(int32_t, int32, gf_string2int32);
-DEFINE_RECONF_OPT(uint64_t, size, gf_string2bytesize);
-DEFINE_RECONF_OPT(uint32_t, percent, gf_string2percent);
-DEFINE_RECONF_OPT(uint64_t, percent_or_size, pc_or_size);
+DEFINE_RECONF_OPT(uint64_t, size, gf_string2bytesize_uint64);
+DEFINE_RECONF_OPT(uint64_t, size_uint64, gf_string2bytesize_uint64);
+DEFINE_RECONF_OPT(double, percent, gf_string2percent);
+DEFINE_RECONF_OPT(double, percent_or_size, pc_or_size);
DEFINE_RECONF_OPT(gf_boolean_t, bool, gf_string2boolean);
DEFINE_RECONF_OPT(xlator_t *, xlator, xl_by_name);
DEFINE_RECONF_OPT(char *, path, pass);
+DEFINE_RECONF_OPT(double, double, gf_string2double);
+DEFINE_RECONF_OPT(uint32_t, time, gf_string2time);
diff --git a/libglusterfs/src/options.h b/libglusterfs/src/options.h
deleted file mode 100644
index 1775ad9ea71..00000000000
--- a/libglusterfs/src/options.h
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _OPTIONS_H
-#define _OPTIONS_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdio.h>
-#include <stdint.h>
-#include <inttypes.h>
-
-#include "xlator.h"
-/* Add possible new type of option you may need */
-typedef enum {
- GF_OPTION_TYPE_ANY = 0,
- GF_OPTION_TYPE_STR,
- GF_OPTION_TYPE_INT,
- GF_OPTION_TYPE_SIZET,
- GF_OPTION_TYPE_PERCENT,
- GF_OPTION_TYPE_PERCENT_OR_SIZET,
- GF_OPTION_TYPE_BOOL,
- GF_OPTION_TYPE_XLATOR,
- GF_OPTION_TYPE_PATH,
- GF_OPTION_TYPE_TIME,
- GF_OPTION_TYPE_DOUBLE,
- GF_OPTION_TYPE_INTERNET_ADDRESS,
- GF_OPTION_TYPE_INTERNET_ADDRESS_LIST,
- GF_OPTION_TYPE_PRIORITY_LIST,
- GF_OPTION_TYPE_SIZE_LIST,
- GF_OPTION_TYPE_MAX,
-} volume_option_type_t;
-
-typedef enum {
- GF_OPT_VALIDATE_BOTH = 0,
- GF_OPT_VALIDATE_MIN,
- GF_OPT_VALIDATE_MAX,
-} opt_validate_type_t;
-
-#define ZR_VOLUME_MAX_NUM_KEY 4
-#define ZR_OPTION_MAX_ARRAY_SIZE 64
-
-/* Each translator should define this structure */
-typedef struct volume_options {
- char *key[ZR_VOLUME_MAX_NUM_KEY];
- /* different key, same meaning */
- volume_option_type_t type;
- int64_t min; /* 0 means no range */
- int64_t max; /* 0 means no range */
- char *value[ZR_OPTION_MAX_ARRAY_SIZE];
- /* If specified, will check for one of
- the value from this array */
- char *default_value;
- char *description; /* about the key */
- /* Required for int options where only the min value
- * is given and is 0. This will cause validation not to
- * happen
- */
- opt_validate_type_t validate;
-} volume_option_t;
-
-
-typedef struct vol_opt_list {
- struct list_head list;
- volume_option_t *given_opt;
-} volume_opt_list_t;
-
-
-int xlator_tree_reconfigure (xlator_t *old_xl, xlator_t *new_xl);
-int xlator_validate_rec (xlator_t *xlator, char **op_errstr);
-int graph_reconf_validateopt (glusterfs_graph_t *graph, char **op_errstr);
-int xlator_option_info_list (volume_opt_list_t *list, char *key,
- char **def_val, char **descr);
-/*
-int validate_xlator_volume_options (xlator_t *xl, dict_t *options,
- volume_option_t *opt, char **op_errstr);
-*/
-int xlator_options_validate_list (xlator_t *xl, dict_t *options,
- volume_opt_list_t *list, char **op_errstr);
-int xlator_option_validate (xlator_t *xl, char *key, char *value,
- volume_option_t *opt, char **op_errstr);
-int xlator_options_validate (xlator_t *xl, dict_t *options, char **errstr);
-volume_option_t *
-xlator_volume_option_get (xlator_t *xl, const char *key);
-
-
-#define DECLARE_INIT_OPT(type_t, type) \
-int \
-xlator_option_init_##type (xlator_t *this, dict_t *options, char *key, \
- type_t *val_p);
-
-DECLARE_INIT_OPT(char *, str);
-DECLARE_INIT_OPT(uint64_t, uint64);
-DECLARE_INIT_OPT(int64_t, int64);
-DECLARE_INIT_OPT(uint32_t, uint32);
-DECLARE_INIT_OPT(int32_t, int32);
-DECLARE_INIT_OPT(uint64_t, size);
-DECLARE_INIT_OPT(uint32_t, percent);
-DECLARE_INIT_OPT(uint64_t, percent_or_size);
-DECLARE_INIT_OPT(gf_boolean_t, bool);
-DECLARE_INIT_OPT(xlator_t *, xlator);
-DECLARE_INIT_OPT(char *, path);
-
-
-#define DEFINE_INIT_OPT(type_t, type, conv) \
-int \
-xlator_option_init_##type (xlator_t *this, dict_t *options, char *key, \
- type_t *val_p) \
-{ \
- int ret = 0; \
- volume_option_t *opt = NULL; \
- char *def_value = NULL; \
- char *set_value = NULL; \
- char *value = NULL; \
- xlator_t *old_THIS = NULL; \
- \
- opt = xlator_volume_option_get (this, key); \
- if (!opt) { \
- gf_log (this->name, GF_LOG_WARNING, \
- "unknown option: %s", key); \
- ret = -1; \
- return ret; \
- } \
- def_value = opt->default_value; \
- ret = dict_get_str (options, key, &set_value); \
- \
- if (def_value) \
- value = def_value; \
- if (set_value) \
- value = set_value; \
- if (!value) { \
- gf_log (this->name, GF_LOG_TRACE, "option %s not set", \
- key); \
- *val_p = (type_t)0; \
- return 0; \
- } \
- if (value == def_value) { \
- gf_log (this->name, GF_LOG_TRACE, \
- "option %s using default value %s", \
- key, value); \
- } else { \
- gf_log (this->name, GF_LOG_DEBUG, \
- "option %s using set value %s", \
- key, value); \
- } \
- old_THIS = THIS; \
- THIS = this; \
- ret = conv (value, val_p); \
- THIS = old_THIS; \
- if (ret) \
- return ret; \
- ret = xlator_option_validate (this, key, value, opt, NULL); \
- return ret; \
-}
-
-#define GF_OPTION_INIT(key, val, type, err_label) do { \
- int val_ret = 0; \
- val_ret = xlator_option_init_##type (THIS, THIS->options, \
- key, &(val)); \
- if (val_ret) \
- goto err_label; \
- } while (0)
-
-
-
-#define DECLARE_RECONF_OPT(type_t, type) \
-int \
-xlator_option_reconf_##type (xlator_t *this, dict_t *options, char *key,\
- type_t *val_p);
-
-DECLARE_RECONF_OPT(char *, str);
-DECLARE_RECONF_OPT(uint64_t, uint64);
-DECLARE_RECONF_OPT(int64_t, int64);
-DECLARE_RECONF_OPT(uint32_t, uint32);
-DECLARE_RECONF_OPT(int32_t, int32);
-DECLARE_RECONF_OPT(uint64_t, size);
-DECLARE_RECONF_OPT(uint32_t, percent);
-DECLARE_RECONF_OPT(uint64_t, percent_or_size);
-DECLARE_RECONF_OPT(gf_boolean_t, bool);
-DECLARE_RECONF_OPT(xlator_t *, xlator);
-DECLARE_RECONF_OPT(char *, path);
-
-
-#define DEFINE_RECONF_OPT(type_t, type, conv) \
-int \
-xlator_option_reconf_##type (xlator_t *this, dict_t *options, char *key, \
- type_t *val_p) \
-{ \
- int ret = 0; \
- volume_option_t *opt = NULL; \
- char *def_value = NULL; \
- char *set_value = NULL; \
- char *value = NULL; \
- xlator_t *old_THIS = NULL; \
- \
- opt = xlator_volume_option_get (this, key); \
- if (!opt) { \
- gf_log (this->name, GF_LOG_WARNING, \
- "unknown option: %s", key); \
- ret = -1; \
- return ret; \
- } \
- def_value = opt->default_value; \
- ret = dict_get_str (options, key, &set_value); \
- \
- if (def_value) \
- value = def_value; \
- if (set_value) \
- value = set_value; \
- if (!value) { \
- gf_log (this->name, GF_LOG_TRACE, "option %s not set", \
- key); \
- *val_p = (type_t)0; \
- return 0; \
- } \
- if (value == def_value) { \
- gf_log (this->name, GF_LOG_TRACE, \
- "option %s using default value %s", \
- key, value); \
- } else { \
- gf_log (this->name, GF_LOG_DEBUG, \
- "option %s using set value %s", \
- key, value); \
- } \
- old_THIS = THIS; \
- THIS = this; \
- ret = conv (value, val_p); \
- THIS = old_THIS; \
- if (ret) \
- return ret; \
- ret = xlator_option_validate (this, key, value, opt, NULL); \
- return ret; \
-}
-
-#define GF_OPTION_RECONF(key, val, opt, type, err_label) do { \
- int val_ret = 0; \
- val_ret = xlator_option_reconf_##type (THIS, opt, key, \
- &(val)); \
- if (val_ret) \
- goto err_label; \
- } while (0)
-
-
-#endif /* !_OPTIONS_H */
diff --git a/libglusterfs/src/parse-utils.c b/libglusterfs/src/parse-utils.c
new file mode 100644
index 00000000000..4531d5f0170
--- /dev/null
+++ b/libglusterfs/src/parse-utils.c
@@ -0,0 +1,174 @@
+/*
+ Copyright 2014-present Facebook. All Rights Reserved
+
+ This file is part of GlusterFS.
+
+ Author :
+ Shreyas Siravara <shreyas.siravara@gmail.com>
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <regex.h>
+#include <stdio.h>
+
+#include "glusterfs/parse-utils.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+/**
+ * parser_init: Initialize a parser with the a string to parse and
+ * the regex we want to use to parse it.
+ *
+ * @complete_str: the string to parse
+ * @regex : the regex to use
+ *
+ * Notes : It is up to the caller to call the parser_deinit () function
+ * to free the allocated parser.
+ *
+ * @return : success: parser ptr (on successful compilation and allocation)
+ * : failure: NULL (on failure to compile regex or allocate memory)
+ */
+struct parser *
+parser_init(const char *regex)
+{
+ int rc = 0;
+ struct parser *parser = NULL;
+
+ parser = GF_MALLOC(sizeof(*parser), gf_common_mt_parser_t);
+ if (!parser)
+ goto out;
+
+ parser->regex = gf_strdup(regex);
+ if (!parser->regex) {
+ GF_FREE(parser);
+ parser = NULL;
+ goto out;
+ }
+
+ rc = regcomp(&parser->preg, parser->regex, REG_EXTENDED);
+ if (rc != 0) {
+ gf_msg(GF_PARSE, GF_LOG_INFO, 0, LG_MSG_REGEX_OP_FAILED,
+ "Failed to compile regex pattern.");
+ parser_deinit(parser);
+ parser = NULL;
+ goto out;
+ }
+ parser->complete_str = NULL;
+out:
+ return parser;
+}
+
+/**
+ * parser_set_string -- Set the string in the parser that we want to parse.
+ * Subsequent calls to get_next_match () will use this
+ * string along with the regex that the parser was
+ * initialized with.
+ *
+ * @parser : The parser to use
+ * @complete_str: The string to set in the parser (what we are going parse)
+ *
+ * @return: success: 0
+ * failure: -EINVAL for NULL args, -ENOMEM for allocation errors
+ */
+int
+parser_set_string(struct parser *parser, const char *complete_str)
+{
+ int ret = -EINVAL;
+
+ GF_VALIDATE_OR_GOTO(GF_PARSE, parser, out);
+ GF_VALIDATE_OR_GOTO(GF_PARSE, complete_str, out);
+
+ parser->complete_str = gf_strdup(complete_str);
+ GF_CHECK_ALLOC_AND_LOG(GF_PARSE, parser, ret, "Failed to duplicate string!",
+ out);
+
+ /* Point the temp internal string to what we just dup'ed */
+ parser->_rstr = (char *)parser->complete_str;
+ ret = 0;
+out:
+ return ret;
+}
+
+/**
+ * parser_unset_string -- Free the string that was set to be parsed.
+ * This function needs to be called after
+ * parser_set_string and parser_get_next_match
+ * in order to free memory used by the string.
+ *
+ * @parser : The parser to free memory in
+ * @return : success: 0
+ * : failure: -EINVAL on NULL args
+ */
+int
+parser_unset_string(struct parser *parser)
+{
+ int ret = -EINVAL;
+
+ GF_VALIDATE_OR_GOTO(GF_PARSE, parser, out);
+
+ GF_FREE(parser->complete_str);
+ parser->complete_str = NULL; /* Avoid double frees in parser_deinit */
+ ret = 0;
+out:
+ return ret;
+}
+
+/**
+ * parser_deinit: Free the parser and all the memory allocated by it
+ *
+ * @parser : Parser to free
+ *
+ * @return : nothing
+ */
+void
+parser_deinit(struct parser *ptr)
+{
+ if (!ptr)
+ return;
+
+ regfree(&ptr->preg);
+ GF_FREE(ptr->complete_str);
+ GF_FREE(ptr->regex);
+ GF_FREE(ptr);
+}
+
+/**
+ * parser_get_match: Given the parser that is configured with a compiled regex,
+ * return the next match in the string.
+ *
+ * @parser : Parser to use
+ *
+ * @return : success: Pointer to matched character
+ * : failure: NULL
+ */
+char *
+parser_get_next_match(struct parser *parser)
+{
+ int rc = -EINVAL;
+ size_t copy_len = 0;
+ char *match = NULL;
+
+ GF_VALIDATE_OR_GOTO(GF_PARSE, parser, out);
+
+ rc = regexec(&parser->preg, parser->_rstr, 1, parser->pmatch, 0);
+ if (rc != 0) {
+ gf_msg_debug(GF_PARSE, 0, "Could not match %s with regex %s",
+ parser->_rstr, parser->regex);
+ goto out;
+ }
+
+ copy_len = parser->pmatch[0].rm_eo - parser->pmatch[0].rm_so;
+
+ match = gf_strndup(parser->_rstr + parser->pmatch[0].rm_so, copy_len);
+ GF_CHECK_ALLOC_AND_LOG(GF_PARSE, match, rc, "Duplicating match failed!",
+ out);
+
+ parser->_rstr = &parser->_rstr[parser->pmatch[0].rm_eo];
+out:
+ return match;
+}
diff --git a/libglusterfs/src/quota-common-utils.c b/libglusterfs/src/quota-common-utils.c
new file mode 100644
index 00000000000..804e2f0ad4b
--- /dev/null
+++ b/libglusterfs/src/quota-common-utils.c
@@ -0,0 +1,241 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/dict.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/byte-order.h"
+#include "glusterfs/quota-common-utils.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+gf_boolean_t
+quota_meta_is_null(const quota_meta_t *meta)
+{
+ if (meta->size == 0 && meta->file_count == 0 && meta->dir_count == 0)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+int32_t
+quota_data_to_meta(data_t *data, quota_meta_t *meta)
+{
+ int32_t ret = -1;
+ quota_meta_t *value = NULL;
+ int64_t *size = NULL;
+
+ if (!data || !meta)
+ goto out;
+
+ if (data->len > sizeof(int64_t)) {
+ value = (quota_meta_t *)data->data;
+ meta->size = ntoh64(value->size);
+ meta->file_count = ntoh64(value->file_count);
+ if (data->len > (sizeof(int64_t)) * 2)
+ meta->dir_count = ntoh64(value->dir_count);
+ else
+ meta->dir_count = 0;
+ } else {
+ size = (int64_t *)data->data;
+ meta->size = ntoh64(*size);
+ meta->file_count = 0;
+ meta->dir_count = 0;
+ /* This can happen during software upgrade.
+ * Older version of glusterfs will not have inode count.
+ * Return failure, this will be healed as part of lookup
+ */
+ gf_msg_callingfn("quota", GF_LOG_DEBUG, 0, LG_MSG_QUOTA_XATTRS_MISSING,
+ "Object quota "
+ "xattrs missing: len = %d",
+ data->len);
+ ret = -2;
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+
+int32_t
+quota_dict_get_inode_meta(dict_t *dict, char *key, const int keylen,
+ quota_meta_t *meta)
+{
+ int32_t ret = -1;
+ data_t *data = NULL;
+
+ if (!dict || !key || !meta)
+ goto out;
+
+ data = dict_getn(dict, key, keylen);
+ if (!data || !data->data)
+ goto out;
+
+ ret = quota_data_to_meta(data, meta);
+
+out:
+
+ return ret;
+}
+
+int32_t
+quota_dict_get_meta(dict_t *dict, char *key, const int keylen,
+ quota_meta_t *meta)
+{
+ int32_t ret = -1;
+
+ ret = quota_dict_get_inode_meta(dict, key, keylen, meta);
+ if (ret == -2)
+ ret = 0;
+
+ return ret;
+}
+
+int32_t
+quota_dict_set_meta(dict_t *dict, char *key, const quota_meta_t *meta,
+ ia_type_t ia_type)
+{
+ int32_t ret = -ENOMEM;
+ quota_meta_t *value = NULL;
+
+ value = GF_MALLOC(sizeof(quota_meta_t), gf_common_quota_meta_t);
+ if (value == NULL) {
+ goto out;
+ }
+
+ value->size = hton64(meta->size);
+ value->file_count = hton64(meta->file_count);
+ value->dir_count = hton64(meta->dir_count);
+
+ if (ia_type == IA_IFDIR) {
+ ret = dict_set_bin(dict, key, value, sizeof(*value));
+ } else {
+ /* For a file we don't need to store dir_count in the
+ * quota size xattr, so we set the len of the data in the dict
+ * as 128bits, so when the posix xattrop reads the dict, it only
+ * performs operations on size and file_count
+ */
+ ret = dict_set_bin(dict, key, value, sizeof(*value) - sizeof(int64_t));
+ }
+
+ if (ret < 0) {
+ gf_msg_callingfn("quota", GF_LOG_ERROR, 0, LG_MSG_DICT_SET_FAILED,
+ "dict set failed");
+ GF_FREE(value);
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+quota_conf_read_header(int fd, char *buf)
+{
+ int ret = 0;
+ const int header_len = SLEN(QUOTA_CONF_HEADER);
+
+ ret = gf_nread(fd, buf, header_len);
+ if (ret <= 0) {
+ goto out;
+ } else if (ret > 0 && ret != header_len) {
+ ret = -1;
+ goto out;
+ }
+
+ buf[header_len - 1] = 0;
+
+out:
+ if (ret < 0)
+ gf_msg_callingfn("quota", GF_LOG_ERROR, 0, LG_MSG_QUOTA_CONF_ERROR,
+ "failed to read "
+ "header from a quota conf");
+
+ return ret;
+}
+
+int32_t
+quota_conf_read_version(int fd, float *version)
+{
+ int ret = 0;
+ char buf[PATH_MAX] = "";
+ char *tail = NULL;
+ float value = 0.0f;
+
+ ret = quota_conf_read_header(fd, buf);
+ if (ret == 0) {
+ /* quota.conf is empty */
+ value = GF_QUOTA_CONF_VERSION;
+ goto out;
+ } else if (ret < 0) {
+ goto out;
+ }
+
+ value = strtof((buf + strlen(buf) - 3), &tail);
+ if (tail[0] != '\0') {
+ ret = -1;
+ gf_msg_callingfn("quota", GF_LOG_ERROR, 0, LG_MSG_QUOTA_CONF_ERROR,
+ "invalid quota conf"
+ " version");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (ret >= 0)
+ *version = value;
+ else
+ gf_msg_callingfn("quota", GF_LOG_ERROR, 0, LG_MSG_QUOTA_CONF_ERROR,
+ "failed to "
+ "read version from a quota conf header");
+
+ return ret;
+}
+
+int32_t
+quota_conf_read_gfid(int fd, void *buf, char *type, float version)
+{
+ int ret = 0;
+
+ ret = gf_nread(fd, buf, 16);
+ if (ret <= 0)
+ goto out;
+
+ if (ret != 16) {
+ ret = -1;
+ goto out;
+ }
+
+ if (version >= 1.2f) {
+ ret = gf_nread(fd, type, 1);
+ if (ret != 1) {
+ ret = -1;
+ goto out;
+ }
+ ret = 17;
+ } else {
+ *type = GF_QUOTA_CONF_TYPE_USAGE;
+ }
+
+out:
+ if (ret < 0)
+ gf_msg_callingfn("quota", GF_LOG_ERROR, 0, LG_MSG_QUOTA_CONF_ERROR,
+ "failed to "
+ "read gfid from a quota conf");
+
+ return ret;
+}
+
+int32_t
+quota_conf_skip_header(int fd)
+{
+ return gf_skip_header_section(fd, strlen(QUOTA_CONF_HEADER));
+}
diff --git a/libglusterfs/src/rbthash.c b/libglusterfs/src/rbthash.c
index 4f04fed9311..c90b5a21f44 100644
--- a/libglusterfs/src/rbthash.c
+++ b/libglusterfs/src/rbthash.c
@@ -8,68 +8,66 @@
cases as published by the Free Software Foundation.
*/
-
-#include "rbthash.h"
+#include "glusterfs/rbthash.h"
#include "rb.h"
-#include "locking.h"
-#include "mem-pool.h"
-#include "logging.h"
+#include "glusterfs/locking.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/libglusterfs-messages.h"
#include <pthread.h>
#include <string.h>
-
int
-rbthash_comparator (void *entry1, void *entry2, void *param)
+rbthash_comparator(void *entry1, void *entry2, void *param)
{
- int ret = 0;
- rbthash_entry_t *e1 = NULL;
- rbthash_entry_t *e2 = NULL;
+ int ret = 0;
+ rbthash_entry_t *e1 = NULL;
+ rbthash_entry_t *e2 = NULL;
- if ((!entry1) || (!entry2) || (!param))
- return -1;
+ if ((!entry1) || (!entry2) || (!param))
+ return -1;
- e1 = (rbthash_entry_t *)entry1;
- e2 = (rbthash_entry_t *)entry2;
+ e1 = (rbthash_entry_t *)entry1;
+ e2 = (rbthash_entry_t *)entry2;
- if (e1->keylen != e2->keylen) {
- if (e1->keylen < e2->keylen)
- ret = -1;
- else if (e1->keylen > e2->keylen)
- ret = 1;
- } else
- ret = memcmp (e1->key, e2->key, e1->keylen);
+ if (e1->keylen != e2->keylen) {
+ if (e1->keylen < e2->keylen)
+ ret = -1;
+ else if (e1->keylen > e2->keylen)
+ ret = 1;
+ } else
+ ret = memcmp(e1->key, e2->key, e1->keylen);
- return ret;
+ return ret;
}
-
int
-__rbthash_init_buckets (rbthash_table_t *tbl, int buckets)
+__rbthash_init_buckets(rbthash_table_t *tbl, int buckets)
{
- int i = 0;
- int ret = -1;
-
- if (!tbl)
- return -1;
-
- for (; i < buckets; i++) {
- LOCK_INIT (&tbl->buckets[i].bucketlock);
- tbl->buckets[i].bucket = rb_create ((rb_comparison_func *)rbthash_comparator, tbl, NULL);
- if (!tbl->buckets[i].bucket) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to create rb"
- " table bucket");
- ret = -1;
- goto err;
- }
+ int i = 0;
+ int ret = -1;
+
+ if (!tbl)
+ return -1;
+
+ for (; i < buckets; i++) {
+ LOCK_INIT(&tbl->buckets[i].bucketlock);
+ tbl->buckets[i].bucket = rb_create(
+ (rb_comparison_func *)rbthash_comparator, tbl, NULL);
+ if (!tbl->buckets[i].bucket) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RB_TABLE_CREATE_FAILED,
+ NULL);
+ ret = -1;
+ goto err;
}
+ }
- ret = 0;
+ ret = 0;
err:
- return ret;
+ return ret;
}
-
/*
* rbthash_table_init - Initialize a RBT based hash table
* @buckets - Number of buckets in the hash table
@@ -81,379 +79,376 @@ err:
*/
rbthash_table_t *
-rbthash_table_init (int buckets, rbt_hasher_t hfunc,
- rbt_data_destroyer_t dfunc,
- unsigned long expected_entries,
- struct mem_pool *entrypool)
+rbthash_table_init(glusterfs_ctx_t *ctx, int buckets, rbt_hasher_t hfunc,
+ rbt_data_destroyer_t dfunc, unsigned long expected_entries,
+ struct mem_pool *entrypool)
{
- rbthash_table_t *newtab = NULL;
- int ret = -1;
-
- if (!hfunc) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Hash function not given");
- return NULL;
+ rbthash_table_t *newtab = NULL;
+ int ret = -1;
+
+ if (!hfunc) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_HASH_FUNC_ERROR, NULL);
+ return NULL;
+ }
+
+ if (!entrypool && !expected_entries) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_ENTRIES_NOT_PROVIDED, NULL);
+ return NULL;
+ }
+
+ if (entrypool && expected_entries) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_ENTRIES_PROVIDED, NULL);
+ return NULL;
+ }
+
+ newtab = GF_CALLOC(1, sizeof(*newtab), gf_common_mt_rbthash_table_t);
+ if (!newtab)
+ return NULL;
+
+ newtab->buckets = GF_CALLOC(buckets, sizeof(struct rbthash_bucket),
+ gf_common_mt_rbthash_bucket);
+ if (!newtab->buckets) {
+ goto free_newtab;
+ }
+
+ if (expected_entries) {
+ newtab->entrypool = mem_pool_new_ctx(ctx, rbthash_entry_t,
+ expected_entries);
+ if (!newtab->entrypool) {
+ goto free_buckets;
}
-
- if (!entrypool && !expected_entries) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR,
- "Both mem-pool and expected entries not provided");
- return NULL;
- }
-
- if (entrypool && expected_entries) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR,
- "Both mem-pool and expected entries are provided");
- return NULL;
- }
-
-
- newtab = GF_CALLOC (1, sizeof (*newtab),
- gf_common_mt_rbthash_table_t);
- if (!newtab)
- return NULL;
-
- newtab->buckets = GF_CALLOC (buckets, sizeof (struct rbthash_bucket),
- gf_common_mt_rbthash_bucket);
- if (!newtab->buckets) {
- goto free_newtab;
- }
-
- if (expected_entries) {
- newtab->entrypool =
- mem_pool_new (rbthash_entry_t, expected_entries);
- if (!newtab->entrypool) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR,
- "Failed to allocate mem-pool");
- goto free_buckets;
- }
- newtab->pool_alloced = _gf_true;
- } else {
- newtab->entrypool = entrypool;
- }
-
- LOCK_INIT (&newtab->tablelock);
- INIT_LIST_HEAD (&newtab->list);
- newtab->numbuckets = buckets;
- ret = __rbthash_init_buckets (newtab, buckets);
-
- if (ret == -1) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to init buckets");
- if (newtab->pool_alloced)
- mem_pool_destroy (newtab->entrypool);
- } else {
- gf_log (GF_RBTHASH, GF_LOG_TRACE, "Inited hash table: buckets:"
- " %d", buckets);
- }
-
- newtab->hashfunc = hfunc;
- newtab->dfunc = dfunc;
+ newtab->pool_alloced = _gf_true;
+ } else {
+ newtab->entrypool = entrypool;
+ }
+
+ LOCK_INIT(&newtab->tablelock);
+ INIT_LIST_HEAD(&newtab->list);
+ newtab->numbuckets = buckets;
+ ret = __rbthash_init_buckets(newtab, buckets);
+
+ if (ret == -1) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INIT_BUCKET_FAILED,
+ NULL);
+ if (newtab->pool_alloced)
+ mem_pool_destroy(newtab->entrypool);
+ } else {
+ gf_msg_trace(GF_RBTHASH, 0,
+ "Inited hash table: buckets:"
+ " %d",
+ buckets);
+ }
+
+ newtab->hashfunc = hfunc;
+ newtab->dfunc = dfunc;
free_buckets:
- if (ret == -1)
- GF_FREE (newtab->buckets);
+ if (ret == -1)
+ GF_FREE(newtab->buckets);
free_newtab:
- if (ret == -1) {
- GF_FREE (newtab);
- newtab = NULL;
- }
+ if (ret == -1) {
+ GF_FREE(newtab);
+ newtab = NULL;
+ }
- return newtab;
+ return newtab;
}
rbthash_entry_t *
-rbthash_init_entry (rbthash_table_t *tbl, void *data, void *key, int keylen)
+rbthash_init_entry(rbthash_table_t *tbl, void *data, void *key, int keylen)
{
- int ret = -1;
- rbthash_entry_t *entry = NULL;
-
- if ((!tbl) || (!data) || (!key))
- return NULL;
-
- entry = mem_get (tbl->entrypool);
- if (!entry) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to get entry from"
- " mem-pool");
- goto ret;
- }
-
- entry->data = data;
- entry->key = GF_CALLOC (keylen, sizeof (char), gf_common_mt_char);
- if (!entry->key) {
- goto free_entry;
- }
-
- INIT_LIST_HEAD (&entry->list);
- memcpy (entry->key, key, keylen);
- entry->keylen = keylen;
- entry->keyhash = tbl->hashfunc (entry->key, entry->keylen);
- gf_log (GF_RBTHASH, GF_LOG_TRACE, "HASH: %u", entry->keyhash);
-
- ret = 0;
+ int ret = -1;
+ rbthash_entry_t *entry = NULL;
+
+ if ((!tbl) || (!data) || (!key))
+ return NULL;
+
+ entry = mem_get(tbl->entrypool);
+ if (!entry) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_ENTRY_FAILED,
+ NULL);
+ goto ret;
+ }
+
+ entry->data = data;
+ entry->key = GF_MALLOC(keylen, gf_common_mt_char);
+ if (!entry->key) {
+ goto free_entry;
+ }
+
+ INIT_LIST_HEAD(&entry->list);
+ memcpy(entry->key, key, keylen);
+ entry->keylen = keylen;
+ entry->keyhash = tbl->hashfunc(entry->key, entry->keylen);
+ gf_msg_trace(GF_RBTHASH, 0, "HASH: %u", entry->keyhash);
+
+ ret = 0;
free_entry:
- if (ret == -1) {
- mem_put (entry);
- entry = NULL;
- }
+ if (ret == -1) {
+ mem_put(entry);
+ entry = NULL;
+ }
ret:
- return entry;
+ return entry;
}
-
void
-rbthash_deinit_entry (rbthash_table_t *tbl, rbthash_entry_t *entry)
+rbthash_deinit_entry(rbthash_table_t *tbl, rbthash_entry_t *entry)
{
+ if (!entry)
+ return;
- if (!entry)
- return;
-
- if (entry->key)
- GF_FREE (entry->key);
-
- if (tbl) {
- if ((entry->data) && (tbl->dfunc))
- tbl->dfunc (entry->data);
+ GF_FREE(entry->key);
- LOCK (&tbl->tablelock);
- {
- list_del_init (&entry->list);
- }
- UNLOCK (&tbl->tablelock);
+ if (tbl) {
+ if ((entry->data) && (tbl->dfunc))
+ tbl->dfunc(entry->data);
- mem_put (entry);
+ LOCK(&tbl->tablelock);
+ {
+ list_del_init(&entry->list);
}
+ UNLOCK(&tbl->tablelock);
- return;
-}
+ mem_put(entry);
+ }
+ return;
+}
-inline struct rbthash_bucket *
-rbthash_entry_bucket (rbthash_table_t *tbl, rbthash_entry_t * entry)
+static struct rbthash_bucket *
+rbthash_entry_bucket(rbthash_table_t *tbl, rbthash_entry_t *entry)
{
- int nbucket = 0;
+ int nbucket = 0;
- nbucket = (entry->keyhash % tbl->numbuckets);
- gf_log (GF_RBTHASH, GF_LOG_TRACE, "BUCKET: %d", nbucket);
- return &tbl->buckets[nbucket];
+ nbucket = (entry->keyhash % tbl->numbuckets);
+ gf_msg_trace(GF_RBTHASH, 0, "BUCKET: %d", nbucket);
+ return &tbl->buckets[nbucket];
}
-
int
-rbthash_insert_entry (rbthash_table_t *tbl, rbthash_entry_t *entry)
+rbthash_insert_entry(rbthash_table_t *tbl, rbthash_entry_t *entry)
{
- struct rbthash_bucket *bucket = NULL;
- int ret = -1;
-
- if ((!tbl) || (!entry))
- return -1;
-
- bucket = rbthash_entry_bucket (tbl, entry);
- if (!bucket) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to get bucket");
- goto err;
+ struct rbthash_bucket *bucket = NULL;
+ int ret = -1;
+
+ if ((!tbl) || (!entry))
+ return -1;
+
+ bucket = rbthash_entry_bucket(tbl, entry);
+ if (!bucket) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_BUCKET_FAILED,
+ NULL);
+ goto err;
+ }
+
+ ret = 0;
+ LOCK(&bucket->bucketlock);
+ {
+ if (!rb_probe(bucket->bucket, (void *)entry)) {
+ UNLOCK(&bucket->bucketlock);
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INSERT_FAILED,
+ NULL);
+ ret = -1;
+ goto err;
}
-
- ret = 0;
- LOCK (&bucket->bucketlock);
- {
- if (!rb_probe (bucket->bucket, (void *)entry)) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to insert"
- " entry");
- ret = -1;
- }
- }
- UNLOCK (&bucket->bucketlock);
+ }
+ UNLOCK(&bucket->bucketlock);
err:
- return ret;
+ return ret;
}
-
int
-rbthash_insert (rbthash_table_t *tbl, void *data, void *key, int keylen)
+rbthash_insert(rbthash_table_t *tbl, void *data, void *key, int keylen)
{
- rbthash_entry_t *entry = NULL;
- int ret = -1;
-
- if ((!tbl) || (!data) || (!key))
- return -1;
-
- entry = rbthash_init_entry (tbl, data, key, keylen);
- if (!entry) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to init entry");
- goto err;
- }
-
- ret = rbthash_insert_entry (tbl, entry);
-
- if (ret == -1) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to insert entry");
- rbthash_deinit_entry (tbl, entry);
- }
-
- LOCK (&tbl->tablelock);
- {
- list_add_tail (&entry->list, &tbl->list);
- }
- UNLOCK (&tbl->tablelock);
+ rbthash_entry_t *entry = NULL;
+ int ret = -1;
+
+ if ((!tbl) || (!data) || (!key))
+ return -1;
+
+ entry = rbthash_init_entry(tbl, data, key, keylen);
+ if (!entry) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INIT_ENTRY_FAILED,
+ NULL);
+ goto err;
+ }
+
+ ret = rbthash_insert_entry(tbl, entry);
+
+ if (ret == -1) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_INSERT_FAILED,
+ NULL);
+ rbthash_deinit_entry(tbl, entry);
+ goto err;
+ }
+
+ LOCK(&tbl->tablelock);
+ {
+ list_add_tail(&entry->list, &tbl->list);
+ }
+ UNLOCK(&tbl->tablelock);
err:
- return ret;
+ return ret;
}
-inline struct rbthash_bucket *
-rbthash_key_bucket (rbthash_table_t *tbl, void *key, int keylen)
+static struct rbthash_bucket *
+rbthash_key_bucket(rbthash_table_t *tbl, void *key, int keylen)
{
- uint32_t keyhash = 0;
- int nbucket = 0;
+ uint32_t keyhash = 0;
+ int nbucket = 0;
- if ((!tbl) || (!key))
- return NULL;
+ if ((!tbl) || (!key))
+ return NULL;
- keyhash = tbl->hashfunc (key, keylen);
- gf_log (GF_RBTHASH, GF_LOG_TRACE, "HASH: %u", keyhash);
- nbucket = (keyhash % tbl->numbuckets);
- gf_log (GF_RBTHASH, GF_LOG_TRACE, "BUCKET: %u", nbucket);
+ keyhash = tbl->hashfunc(key, keylen);
+ gf_msg_trace(GF_RBTHASH, 0, "HASH: %u", keyhash);
+ nbucket = (keyhash % tbl->numbuckets);
+ gf_msg_trace(GF_RBTHASH, 0, "BUCKET: %u", nbucket);
- return &tbl->buckets[nbucket];
+ return &tbl->buckets[nbucket];
}
-
void *
-rbthash_get (rbthash_table_t *tbl, void *key, int keylen)
+rbthash_get(rbthash_table_t *tbl, void *key, int keylen)
{
- struct rbthash_bucket *bucket = NULL;
- rbthash_entry_t *entry = NULL;
- rbthash_entry_t searchentry = {0, };
-
- if ((!tbl) || (!key))
- return NULL;
-
- bucket = rbthash_key_bucket (tbl, key, keylen);
- if (!bucket) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to get bucket");
- return NULL;
- }
-
- searchentry.key = key;
- searchentry.keylen = keylen;
- LOCK (&bucket->bucketlock);
- {
- entry = rb_find (bucket->bucket, &searchentry);
- }
- UNLOCK (&bucket->bucketlock);
-
- if (!entry)
- return NULL;
-
- return entry->data;
+ struct rbthash_bucket *bucket = NULL;
+ rbthash_entry_t *entry = NULL;
+ rbthash_entry_t searchentry = {
+ 0,
+ };
+
+ if ((!tbl) || (!key))
+ return NULL;
+
+ bucket = rbthash_key_bucket(tbl, key, keylen);
+ if (!bucket) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_BUCKET_FAILED,
+ NULL);
+ return NULL;
+ }
+
+ searchentry.key = key;
+ searchentry.keylen = keylen;
+ LOCK(&bucket->bucketlock);
+ {
+ entry = rb_find(bucket->bucket, &searchentry);
+ }
+ UNLOCK(&bucket->bucketlock);
+
+ if (!entry)
+ return NULL;
+
+ return entry->data;
}
-
void *
-rbthash_remove (rbthash_table_t *tbl, void *key, int keylen)
+rbthash_remove(rbthash_table_t *tbl, void *key, int keylen)
{
- struct rbthash_bucket *bucket = NULL;
- rbthash_entry_t *entry = NULL;
- rbthash_entry_t searchentry = {0, };
- void *dataref = NULL;
-
- if ((!tbl) || (!key))
- return NULL;
-
- bucket = rbthash_key_bucket (tbl, key, keylen);
- if (!bucket) {
- gf_log (GF_RBTHASH, GF_LOG_ERROR, "Failed to get bucket");
- return NULL;
- }
-
- searchentry.key = key;
- searchentry.keylen = keylen;
-
- LOCK (&bucket->bucketlock);
- {
- entry = rb_delete (bucket->bucket, &searchentry);
- }
- UNLOCK (&bucket->bucketlock);
-
- if (!entry)
- return NULL;
-
- GF_FREE (entry->key);
- dataref = entry->data;
-
- LOCK (&tbl->tablelock);
- {
- list_del_init (&entry->list);
- }
- UNLOCK (&tbl->tablelock);
-
- mem_put (entry);
-
- return dataref;
+ struct rbthash_bucket *bucket = NULL;
+ rbthash_entry_t *entry = NULL;
+ rbthash_entry_t searchentry = {
+ 0,
+ };
+ void *dataref = NULL;
+
+ if ((!tbl) || (!key))
+ return NULL;
+
+ bucket = rbthash_key_bucket(tbl, key, keylen);
+ if (!bucket) {
+ gf_smsg(GF_RBTHASH, GF_LOG_ERROR, 0, LG_MSG_RBTHASH_GET_BUCKET_FAILED,
+ NULL);
+ return NULL;
+ }
+
+ searchentry.key = key;
+ searchentry.keylen = keylen;
+
+ LOCK(&bucket->bucketlock);
+ {
+ entry = rb_delete(bucket->bucket, &searchentry);
+ }
+ UNLOCK(&bucket->bucketlock);
+
+ if (!entry)
+ return NULL;
+
+ GF_FREE(entry->key);
+ dataref = entry->data;
+
+ LOCK(&tbl->tablelock);
+ {
+ list_del_init(&entry->list);
+ }
+ UNLOCK(&tbl->tablelock);
+
+ mem_put(entry);
+
+ return dataref;
}
-
void
-rbthash_entry_deiniter (void *entry, void *rbparam)
+rbthash_entry_deiniter(void *entry, void *rbparam)
{
- if (!entry)
- return;
+ if (!entry)
+ return;
- rbthash_deinit_entry (rbparam, entry);
+ rbthash_deinit_entry(rbparam, entry);
}
-
void
-rbthash_table_destroy_buckets (rbthash_table_t *tbl)
+rbthash_table_destroy_buckets(rbthash_table_t *tbl)
{
- int x = 0;
- if (!tbl)
- return;
+ int x = 0;
+ if (!tbl)
+ return;
- for (;x < tbl->numbuckets; x++) {
- LOCK_DESTROY (&tbl->buckets[x].bucketlock);
- rb_destroy (tbl->buckets[x].bucket, rbthash_entry_deiniter);
- }
+ for (; x < tbl->numbuckets; x++) {
+ LOCK_DESTROY(&tbl->buckets[x].bucketlock);
+ rb_destroy(tbl->buckets[x].bucket, rbthash_entry_deiniter);
+ }
- return;
+ return;
}
-
void
-rbthash_table_destroy (rbthash_table_t *tbl)
+rbthash_table_destroy(rbthash_table_t *tbl)
{
- if (!tbl)
- return;
+ if (!tbl)
+ return;
- rbthash_table_destroy_buckets (tbl);
- if (tbl->pool_alloced)
- mem_pool_destroy (tbl->entrypool);
+ rbthash_table_destroy_buckets(tbl);
+ if (tbl->pool_alloced)
+ mem_pool_destroy(tbl->entrypool);
- GF_FREE (tbl->buckets);
- GF_FREE (tbl);
+ GF_FREE(tbl->buckets);
+ GF_FREE(tbl);
}
-
void
-rbthash_table_traverse (rbthash_table_t *tbl, rbt_traverse_t traverse,
- void *mydata)
+rbthash_table_traverse(rbthash_table_t *tbl, rbt_traverse_t traverse,
+ void *mydata)
{
- rbthash_entry_t *entry = NULL;
+ rbthash_entry_t *entry = NULL;
- if ((tbl == NULL) || (traverse == NULL)) {
- goto out;
- }
+ if ((tbl == NULL) || (traverse == NULL)) {
+ goto out;
+ }
- LOCK (&tbl->tablelock);
+ LOCK(&tbl->tablelock);
+ {
+ list_for_each_entry(entry, &tbl->list, list)
{
- list_for_each_entry (entry, &tbl->list, list) {
- traverse (entry->data, mydata);
- }
+ traverse(entry->data, mydata);
}
- UNLOCK (&tbl->tablelock);
+ }
+ UNLOCK(&tbl->tablelock);
out:
- return;
+ return;
}
diff --git a/libglusterfs/src/rbthash.h b/libglusterfs/src/rbthash.h
deleted file mode 100644
index b093ce9982d..00000000000
--- a/libglusterfs/src/rbthash.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __RBTHASH_TABLE_H_
-#define __RBTHASH_TABLE_H_
-#include "rb.h"
-#include "locking.h"
-#include "mem-pool.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
-
-#include <pthread.h>
-
-#define GF_RBTHASH_MEMPOOL 16384 //1048576
-#define GF_RBTHASH "rbthash"
-
-struct rbthash_bucket {
- struct rb_table *bucket;
- gf_lock_t bucketlock;
-};
-
-typedef struct rbthash_entry {
- void *data;
- void *key;
- int keylen;
- uint32_t keyhash;
- struct list_head list;
-} rbthash_entry_t;
-
-typedef uint32_t (*rbt_hasher_t) (void *data, int len);
-typedef void (*rbt_data_destroyer_t) (void *data);
-typedef void (*rbt_traverse_t) (void *data, void *mydata);
-
-typedef struct rbthash_table {
- int size;
- int numbuckets;
- struct mem_pool *entrypool;
- gf_lock_t tablelock;
- struct rbthash_bucket *buckets;
- rbt_hasher_t hashfunc;
- rbt_data_destroyer_t dfunc;
- gf_boolean_t pool_alloced;
- struct list_head list;
-} rbthash_table_t;
-
-extern rbthash_table_t *
-rbthash_table_init (int buckets, rbt_hasher_t hfunc,
- rbt_data_destroyer_t dfunc, unsigned long expected_entries,
- struct mem_pool *entrypool);
-
-extern int
-rbthash_insert (rbthash_table_t *tbl, void *data, void *key, int keylen);
-
-extern void *
-rbthash_get (rbthash_table_t *tbl, void *key, int keylen);
-
-extern void *
-rbthash_remove (rbthash_table_t *tbl, void *key, int keylen);
-
-extern void *
-rbthash_replace (rbthash_table_t *tbl, void *key, int keylen, void *newdata);
-
-extern void
-rbthash_table_destroy (rbthash_table_t *tbl);
-
-extern void
-rbthash_table_traverse (rbthash_table_t *tbl, rbt_traverse_t traverse,
- void *mydata);
-#endif
diff --git a/libglusterfs/src/refcount.c b/libglusterfs/src/refcount.c
new file mode 100644
index 00000000000..d5a5a82fa0f
--- /dev/null
+++ b/libglusterfs/src/refcount.c
@@ -0,0 +1,108 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/common-utils.h"
+#include "glusterfs/refcount.h"
+
+#ifndef REFCOUNT_NEEDS_LOCK
+
+void *
+_gf_ref_get(gf_ref_t *ref)
+{
+ unsigned int cnt = __sync_fetch_and_add(&ref->cnt, 1);
+
+ /* if cnt == 0, we're in a fatal position, the object will be free'd
+ *
+ * There is a race when two threads do a _gf_ref_get(). Only one of
+ * them may get a 0 returned. That is acceptable, because one
+ * _gf_ref_get() returning 0 should be handled as a fatal problem and
+ * when correct usage/locking is used, it should never happen.
+ */
+ GF_ASSERT(cnt != 0);
+
+ return cnt ? ref->data : NULL;
+}
+
+unsigned int
+_gf_ref_put(gf_ref_t *ref)
+{
+ unsigned int cnt = __sync_fetch_and_sub(&ref->cnt, 1);
+
+ /* if cnt == 1, the last user just did a _gf_ref_put()
+ *
+ * When cnt == 0, one _gf_ref_put() was done too much and there has
+ * been a thread using the refcounted structure when it was not
+ * supposed to.
+ */
+ GF_ASSERT(cnt != 0);
+
+ if (cnt == 1 && ref->release)
+ ref->release(ref->data);
+
+ return (cnt != 1);
+}
+
+#else
+
+void *
+_gf_ref_get(gf_ref_t *ref)
+{
+ unsigned int cnt = 0;
+
+ LOCK(&ref->lk);
+ {
+ /* never can be 0, should have been free'd */
+ if (ref->cnt > 0)
+ cnt = ++ref->cnt;
+ else
+ GF_ASSERT(ref->cnt > 0);
+ }
+ UNLOCK(&ref->lk);
+
+ return cnt ? ref->data : NULL;
+}
+
+unsigned int
+_gf_ref_put(gf_ref_t *ref)
+{
+ unsigned int cnt = 0;
+ int release = 0;
+
+ LOCK(&ref->lk);
+ {
+ if (ref->cnt != 0) {
+ cnt = --ref->cnt;
+ /* call release() only when cnt == 0 */
+ release = (cnt == 0);
+ } else
+ GF_ASSERT(ref->cnt != 0);
+ }
+ UNLOCK(&ref->lk);
+
+ if (release && ref->release)
+ ref->release(ref->data);
+
+ return !release;
+}
+
+#endif /* REFCOUNT_NEEDS_LOCK */
+
+void
+_gf_ref_init(gf_ref_t *ref, gf_ref_release_t release, void *data)
+{
+ GF_ASSERT(ref);
+
+#ifdef REFCOUNT_NEEDS_LOCK
+ LOCK_INIT(&ref->lk);
+#endif
+ ref->cnt = 1;
+ ref->release = release;
+ ref->data = data;
+}
diff --git a/libglusterfs/src/revision.h b/libglusterfs/src/revision.h
deleted file mode 100644
index c1ea4a9e9fe..00000000000
--- a/libglusterfs/src/revision.h
+++ /dev/null
@@ -1 +0,0 @@
-#define GLUSTERFS_REPOSITORY_REVISION "git://git.gluster.com/glusterfs.git"
diff --git a/libglusterfs/src/rot-buffs.c b/libglusterfs/src/rot-buffs.c
new file mode 100644
index 00000000000..260bf16ecea
--- /dev/null
+++ b/libglusterfs/src/rot-buffs.c
@@ -0,0 +1,490 @@
+/*
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <math.h>
+
+#include "glusterfs/mem-types.h"
+#include "glusterfs/mem-pool.h"
+
+#include "glusterfs/rot-buffs.h"
+
+/**
+ * Producer-Consumer based on top of rotational buffers.
+ *
+ * This favours writers (producer) and keeps the critical section
+ * light weight. Buffer switch happens when a consumer wants to
+ * consume data. This is the slow path and waits for pending
+ * writes to finish.
+ *
+ * TODO: do away with opaques (use arrays with indexing).
+ */
+
+#define ROT_BUFF_DEFAULT_COUNT 2
+#define ROT_BUFF_ALLOC_SIZE (1 * 1024 * 1024) /* 1MB per iovec */
+
+#define RLIST_IOV_MELDED_ALLOC_SIZE (RBUF_IOVEC_SIZE + ROT_BUFF_ALLOC_SIZE)
+
+/**
+ * iovec list is not shrunk (deallocated) if usage/total count
+ * falls in this range. this is the fast path and should satisfy
+ * most of the workloads. for the rest shrinking iovec list is
+ * generous.
+ */
+#define RVEC_LOW_WATERMARK_COUNT 1
+#define RVEC_HIGH_WATERMARK_COUNT (1 << 4)
+
+static inline rbuf_list_t *
+rbuf_current_buffer(rbuf_t *rbuf)
+{
+ return rbuf->current;
+}
+
+static void
+rlist_mark_waiting(rbuf_list_t *rlist)
+{
+ LOCK(&rlist->c_lock);
+ {
+ rlist->awaiting = _gf_true;
+ }
+ UNLOCK(&rlist->c_lock);
+}
+
+static int
+__rlist_has_waiter(rbuf_list_t *rlist)
+{
+ return (rlist->awaiting == _gf_true);
+}
+
+static void *
+rbuf_alloc_rvec()
+{
+ return GF_CALLOC(1, RLIST_IOV_MELDED_ALLOC_SIZE, gf_common_mt_rvec_t);
+}
+
+static void
+rlist_reset_vector_usage(rbuf_list_t *rlist)
+{
+ rlist->used = 1;
+}
+
+static void
+rlist_increment_vector_usage(rbuf_list_t *rlist)
+{
+ rlist->used++;
+}
+
+static void
+rlist_increment_total_usage(rbuf_list_t *rlist)
+{
+ rlist->total++;
+}
+
+static int
+rvec_in_watermark_range(rbuf_list_t *rlist)
+{
+ return ((rlist->total >= RVEC_LOW_WATERMARK_COUNT) &&
+ (rlist->total <= RVEC_HIGH_WATERMARK_COUNT));
+}
+
+static void
+rbuf_reset_rvec(rbuf_iovec_t *rvec)
+{
+ GF_VALIDATE_OR_GOTO("libglusterfs", rvec, err);
+ /* iov_base is _never_ modified */
+ rvec->iov.iov_len = 0;
+err:
+ return;
+}
+
+/* TODO: alloc multiple rbuf_iovec_t */
+static int
+rlist_add_new_vec(rbuf_list_t *rlist)
+{
+ rbuf_iovec_t *rvec = NULL;
+
+ rvec = (rbuf_iovec_t *)rbuf_alloc_rvec();
+ if (!rvec)
+ return -1;
+ INIT_LIST_HEAD(&rvec->list);
+ rvec->iov.iov_base = ((char *)rvec) + RBUF_IOVEC_SIZE;
+ rvec->iov.iov_len = 0;
+
+ list_add_tail(&rvec->list, &rlist->veclist);
+
+ rlist->rvec = rvec; /* cache the latest */
+
+ rlist_increment_vector_usage(rlist);
+ rlist_increment_total_usage(rlist);
+
+ return 0;
+}
+
+static void
+rlist_free_rvec(rbuf_iovec_t *rvec)
+{
+ if (!rvec)
+ return;
+ list_del(&rvec->list);
+ GF_FREE(rvec);
+}
+
+static void
+rlist_purge_all_rvec(rbuf_list_t *rlist)
+{
+ rbuf_iovec_t *rvec = NULL;
+
+ if (!rlist)
+ return;
+ while (!list_empty(&rlist->veclist)) {
+ rvec = list_first_entry(&rlist->veclist, rbuf_iovec_t, list);
+ rlist_free_rvec(rvec);
+ }
+}
+
+static void
+rlist_shrink_rvec(rbuf_list_t *rlist, unsigned long long shrink)
+{
+ rbuf_iovec_t *rvec = NULL;
+
+ while (!list_empty(&rlist->veclist) && (shrink-- > 0)) {
+ rvec = list_first_entry(&rlist->veclist, rbuf_iovec_t, list);
+ rlist_free_rvec(rvec);
+ }
+}
+
+static void
+rbuf_purge_rlist(rbuf_t *rbuf)
+{
+ rbuf_list_t *rlist = NULL;
+
+ while (!list_empty(&rbuf->freelist)) {
+ rlist = list_first_entry(&rbuf->freelist, rbuf_list_t, list);
+ list_del(&rlist->list);
+
+ rlist_purge_all_rvec(rlist);
+
+ LOCK_DESTROY(&rlist->c_lock);
+
+ (void)pthread_mutex_destroy(&rlist->b_lock);
+ (void)pthread_cond_destroy(&rlist->b_cond);
+
+ GF_FREE(rlist);
+ }
+}
+
+rbuf_t *
+rbuf_init(int bufcount)
+{
+ int j = 0;
+ int ret = 0;
+ rbuf_t *rbuf = NULL;
+ rbuf_list_t *rlist = NULL;
+
+ if (bufcount <= 0)
+ bufcount = ROT_BUFF_DEFAULT_COUNT;
+
+ rbuf = GF_CALLOC(1, sizeof(rbuf_t), gf_common_mt_rbuf_t);
+ if (!rbuf)
+ goto error_return;
+
+ LOCK_INIT(&rbuf->lock);
+ INIT_LIST_HEAD(&rbuf->freelist);
+
+ /* it could have been one big calloc() but this is just once.. */
+ for (j = 0; j < bufcount; j++) {
+ rlist = GF_CALLOC(1, sizeof(rbuf_list_t), gf_common_mt_rlist_t);
+ if (!rlist) {
+ ret = -1;
+ break;
+ }
+
+ INIT_LIST_HEAD(&rlist->list);
+ INIT_LIST_HEAD(&rlist->veclist);
+
+ rlist->pending = rlist->completed = 0;
+
+ ret = rlist_add_new_vec(rlist);
+ if (ret)
+ break;
+
+ LOCK_INIT(&rlist->c_lock);
+
+ rlist->awaiting = _gf_false;
+ ret = pthread_mutex_init(&rlist->b_lock, 0);
+ if (ret != 0) {
+ GF_FREE(rlist);
+ break;
+ }
+
+ ret = pthread_cond_init(&rlist->b_cond, 0);
+ if (ret != 0) {
+ GF_FREE(rlist);
+ break;
+ }
+
+ list_add_tail(&rlist->list, &rbuf->freelist);
+ }
+
+ if (ret != 0)
+ goto dealloc_rlist;
+
+ /* cache currently used buffer: first in the list */
+ rbuf->current = list_first_entry(&rbuf->freelist, rbuf_list_t, list);
+ return rbuf;
+
+dealloc_rlist:
+ rbuf_purge_rlist(rbuf);
+ LOCK_DESTROY(&rbuf->lock);
+ GF_FREE(rbuf);
+error_return:
+ return NULL;
+}
+
+void
+rbuf_dtor(rbuf_t *rbuf)
+{
+ if (!rbuf)
+ return;
+ rbuf->current = NULL;
+ rbuf_purge_rlist(rbuf);
+ LOCK_DESTROY(&rbuf->lock);
+
+ GF_FREE(rbuf);
+}
+
+static char *
+rbuf_adjust_write_area(struct iovec *iov, size_t bytes)
+{
+ char *wbuf = NULL;
+
+ wbuf = iov->iov_base + iov->iov_len;
+ iov->iov_len += bytes;
+ return wbuf;
+}
+
+static char *
+rbuf_alloc_write_area(rbuf_list_t *rlist, size_t bytes)
+{
+ int ret = 0;
+ struct iovec *iov = NULL;
+
+ /* check for available space in _current_ IO buffer */
+ iov = &rlist->rvec->iov;
+ if (iov->iov_len + bytes <= ROT_BUFF_ALLOC_SIZE)
+ return rbuf_adjust_write_area(iov, bytes); /* fast path */
+
+ /* not enough bytes, try next available buffers */
+ if (list_is_last(&rlist->rvec->list, &rlist->veclist)) {
+ /* OH! consumed all vector buffers */
+ GF_ASSERT(rlist->used == rlist->total);
+ ret = rlist_add_new_vec(rlist);
+ if (ret)
+ goto error_return;
+ } else {
+ /* not the end, have available rbuf_iovec's */
+ rlist->rvec = list_next_entry(rlist->rvec, list);
+ rlist->used++;
+ rbuf_reset_rvec(rlist->rvec);
+ }
+
+ iov = &rlist->rvec->iov;
+ return rbuf_adjust_write_area(iov, bytes);
+
+error_return:
+ return NULL;
+}
+
+char *
+rbuf_reserve_write_area(rbuf_t *rbuf, size_t bytes, void **opaque)
+{
+ char *wbuf = NULL;
+ rbuf_list_t *rlist = NULL;
+
+ if (!rbuf || (bytes <= 0) || (bytes > ROT_BUFF_ALLOC_SIZE) || !opaque)
+ return NULL;
+
+ LOCK(&rbuf->lock);
+ {
+ rlist = rbuf_current_buffer(rbuf);
+ wbuf = rbuf_alloc_write_area(rlist, bytes);
+ if (!wbuf)
+ goto unblock;
+ rlist->pending++;
+ }
+unblock:
+ UNLOCK(&rbuf->lock);
+
+ if (wbuf)
+ *opaque = rlist;
+ return wbuf;
+}
+
+static void
+rbuf_notify_waiter(rbuf_list_t *rlist)
+{
+ pthread_mutex_lock(&rlist->b_lock);
+ {
+ pthread_cond_signal(&rlist->b_cond);
+ }
+ pthread_mutex_unlock(&rlist->b_lock);
+}
+
+int
+rbuf_write_complete(void *opaque)
+{
+ rbuf_list_t *rlist = NULL;
+ gf_boolean_t notify = _gf_false;
+
+ if (!opaque)
+ return -1;
+
+ rlist = opaque;
+
+ LOCK(&rlist->c_lock);
+ {
+ rlist->completed++;
+ /**
+ * it's safe to test ->pending without rbuf->lock *only* if
+ * there's a waiter as there can be no new incoming writes.
+ */
+ if (__rlist_has_waiter(rlist) && (rlist->completed == rlist->pending))
+ notify = _gf_true;
+ }
+ UNLOCK(&rlist->c_lock);
+
+ if (notify)
+ rbuf_notify_waiter(rlist);
+
+ return 0;
+}
+
+int
+rbuf_get_buffer(rbuf_t *rbuf, void **opaque, sequence_fn *seqfn, void *mydata)
+{
+ int retval = RBUF_CONSUMABLE;
+ rbuf_list_t *rlist = NULL;
+
+ if (!rbuf || !opaque)
+ return -1;
+
+ LOCK(&rbuf->lock);
+ {
+ rlist = rbuf_current_buffer(rbuf);
+ if (!rlist->pending) {
+ retval = RBUF_EMPTY;
+ goto unblock;
+ }
+
+ if (list_is_singular(&rbuf->freelist)) {
+ /**
+ * removal would lead to writer starvation, disallow
+ * switching.
+ */
+ retval = RBUF_WOULD_STARVE;
+ goto unblock;
+ }
+
+ list_del_init(&rlist->list);
+ if (seqfn)
+ seqfn(rlist, mydata);
+ rbuf->current = list_first_entry(&rbuf->freelist, rbuf_list_t, list);
+ }
+unblock:
+ UNLOCK(&rbuf->lock);
+
+ if (retval == RBUF_CONSUMABLE)
+ *opaque = rlist; /* caller _owns_ the buffer */
+
+ return retval;
+}
+
+/**
+ * Wait for completion of pending writers and invoke dispatcher
+ * routine (for buffer consumption).
+ */
+
+static void
+__rbuf_wait_for_writers(rbuf_list_t *rlist)
+{
+ while (rlist->completed != rlist->pending)
+ pthread_cond_wait(&rlist->b_cond, &rlist->b_lock);
+}
+
+#ifndef M_E
+#define M_E 2.7
+#endif
+
+static void
+rlist_shrink_vector(rbuf_list_t *rlist)
+{
+ unsigned long long shrink = 0;
+
+ /**
+ * fast path: don't bother to deallocate if vectors are hardly
+ * used.
+ */
+ if (rvec_in_watermark_range(rlist))
+ return;
+
+ /**
+ * Calculate the shrink count based on total allocated vectors.
+ * Note that the calculation sticks to rlist->total irrespective
+ * of the actual usage count (rlist->used). Later, ->used could
+ * be used to apply slack to the calculation based on how much
+ * it lags from ->total. For now, let's stick to slow decay.
+ */
+ shrink = rlist->total - (rlist->total * pow(M_E, -0.2));
+
+ rlist_shrink_rvec(rlist, shrink);
+ rlist->total -= shrink;
+}
+
+int
+rbuf_wait_for_completion(rbuf_t *rbuf, void *opaque,
+ void (*fn)(rbuf_list_t *, void *), void *arg)
+{
+ rbuf_list_t *rlist = NULL;
+
+ if (!rbuf || !opaque)
+ return -1;
+
+ rlist = opaque;
+
+ pthread_mutex_lock(&rlist->b_lock);
+ {
+ rlist_mark_waiting(rlist);
+ __rbuf_wait_for_writers(rlist);
+ }
+ pthread_mutex_unlock(&rlist->b_lock);
+
+ /**
+ * from here on, no need of locking until the rlist is put
+ * back into rotation.
+ */
+
+ fn(rlist, arg); /* invoke dispatcher */
+
+ rlist->awaiting = _gf_false;
+ rlist->pending = rlist->completed = 0;
+
+ rlist_shrink_vector(rlist);
+ rlist_reset_vector_usage(rlist);
+
+ rlist->rvec = list_first_entry(&rlist->veclist, rbuf_iovec_t, list);
+ rbuf_reset_rvec(rlist->rvec);
+
+ LOCK(&rbuf->lock);
+ {
+ list_add_tail(&rlist->list, &rbuf->freelist);
+ }
+ UNLOCK(&rbuf->lock);
+
+ return 0;
+}
diff --git a/libglusterfs/src/run.c b/libglusterfs/src/run.c
index 50c1e30bfc6..58f95a7e610 100644
--- a/libglusterfs/src/run.c
+++ b/libglusterfs/src/run.c
@@ -23,9 +23,31 @@
#include <assert.h>
#include <signal.h>
#include <sys/wait.h>
-#include <sys/resource.h>
+#include "glusterfs/syscall.h"
-#ifdef RUN_STANDALONE
+/*
+ * Following defines are available for helping development:
+ * RUN_STANDALONE and RUN_DO_DEMO.
+ *
+ * Compiling a standalone object file with no dependencies
+ * on glusterfs:
+ * $ cc -DRUN_STANDALONE -c run.c
+ *
+ * Compiling a demo program that exercises bits of run.c
+ * functionality (linking to glusterfs):
+ * $ cc -DRUN_DO_DEMO -orun run.c `pkg-config --libs --cflags glusterfs-api`
+ *
+ * Compiling a demo program that exercises bits of run.c
+ * functionality (with no dependence on glusterfs):
+ *
+ * $ cc -DRUN_DO_DEMO -DRUN_STANDALONE -orun run.c
+ */
+#if defined(RUN_STANDALONE) || defined(RUN_DO_DEMO)
+int
+close_fds_except(int *fdv, size_t count);
+#define sys_read(f, b, c) read(f, b, c)
+#define sys_write(f, b, c) write(f, b, c)
+#define sys_close(f) close(f)
#define GF_CALLOC(n, s, t) calloc(n, s)
#define GF_ASSERT(cond) assert(cond)
#define GF_REALLOC(p, s) realloc(p, s)
@@ -33,462 +55,516 @@
#define gf_strdup(s) strdup(s)
#define gf_vasprintf(p, f, va) vasprintf(p, f, va)
#define gf_loglevel_t int
-#define gf_log(dom, levl, fmt, args...) printf("LOG: " fmt "\n", ##args)
+#define gf_msg_callingfn(dom, level, errnum, msgid, fmt, args...) \
+ printf("LOG: " fmt "\n", ##args)
#define LOG_DEBUG 0
+#ifdef RUN_STANDALONE
+#include <stdbool.h>
+#include <sys/resource.h>
+int
+close_fds_except(int *fdv, size_t count)
+{
+ int i = 0;
+ size_t j = 0;
+ bool should_close = true;
+ struct rlimit rl;
+ int ret = -1;
+
+ ret = getrlimit(RLIMIT_NOFILE, &rl);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < rl.rlim_cur; i++) {
+ should_close = true;
+ for (j = 0; j < count; j++) {
+ if (i == fdv[j]) {
+ should_close = false;
+ break;
+ }
+ }
+ if (should_close)
+ sys_close(i);
+ }
+ return 0;
+}
+#endif
#ifdef __linux__
#define GF_LINUX_HOST_OS
#endif
-#else /* ! RUN_STANDALONE */
-#include "glusterfs.h"
-#include "common-utils.h"
+#else /* ! RUN_STANDALONE || RUN_DO_DEMO */
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
#endif
-#include "run.h"
-
+#include "glusterfs/run.h"
void
-runinit (runner_t *runner)
+runinit(runner_t *runner)
{
- int i = 0;
-
- runner->argvlen = 64;
- runner->argv = GF_CALLOC (runner->argvlen,
- sizeof (*runner->argv),
- gf_common_mt_run_argv);
- runner->runerr = runner->argv ? 0 : errno;
- runner->chpid = -1;
- for (i = 0; i < 3; i++) {
- runner->chfd[i] = -1;
- runner->chio[i] = NULL;
- }
+ int i = 0;
+
+ runner->argvlen = 64;
+ runner->argv = GF_CALLOC(runner->argvlen, sizeof(*runner->argv),
+ gf_common_mt_run_argv);
+ runner->runerr = runner->argv ? 0 : errno;
+ runner->chpid = -1;
+ for (i = 0; i < 3; i++) {
+ runner->chfd[i] = -1;
+ runner->chio[i] = NULL;
+ }
}
FILE *
-runner_chio (runner_t *runner, int fd)
+runner_chio(runner_t *runner, int fd)
{
- GF_ASSERT (fd > 0 && fd < 3);
+ GF_ASSERT(fd > 0 && fd < 3);
- if ((fd > 0) && (fd < 3))
- return runner->chio[fd];
+ if ((fd > 0) && (fd < 3))
+ return runner->chio[fd];
- return NULL;
+ return NULL;
}
static void
-runner_insert_arg (runner_t *runner, char *arg)
+runner_insert_arg(runner_t *runner, char *arg)
{
- int i = 0;
+ int i = 0;
- GF_ASSERT (arg);
+ GF_ASSERT(arg);
- if (runner->runerr)
- return;
+ if (runner->runerr || !runner->argv)
+ return;
- for (i = 0; i < runner->argvlen; i++) {
- if (runner->argv[i] == NULL)
- break;
- }
- GF_ASSERT (i < runner->argvlen);
-
- if (i == runner->argvlen - 1) {
- runner->argv = GF_REALLOC (runner->argv,
- runner->argvlen * 2 * sizeof (*runner->argv));
- if (!runner->argv) {
- runner->runerr = errno;
- return;
- }
- memset (/* "+" is aware of the type of its left side,
- * no need to multiply with type-size */
- runner->argv + runner->argvlen,
- 0, runner->argvlen * sizeof (*runner->argv));
- runner->argvlen *= 2;
- }
+ for (i = 0; i < runner->argvlen; i++) {
+ if (runner->argv[i] == NULL)
+ break;
+ }
+ GF_ASSERT(i < runner->argvlen);
- runner->argv[i] = arg;
+ if (i == runner->argvlen - 1) {
+ runner->argv = GF_REALLOC(runner->argv,
+ runner->argvlen * 2 * sizeof(*runner->argv));
+ if (!runner->argv) {
+ runner->runerr = errno;
+ return;
+ }
+ memset(/* "+" is aware of the type of its left side,
+ * no need to multiply with type-size */
+ runner->argv + runner->argvlen, 0,
+ runner->argvlen * sizeof(*runner->argv));
+ runner->argvlen *= 2;
+ }
+
+ runner->argv[i] = arg;
}
void
-runner_add_arg (runner_t *runner, const char *arg)
+runner_add_arg(runner_t *runner, const char *arg)
{
- arg = gf_strdup (arg);
- if (!arg) {
- runner->runerr = errno;
- return;
- }
+ arg = gf_strdup(arg);
+ if (!arg) {
+ runner->runerr = errno;
+ return;
+ }
- runner_insert_arg (runner, (char *)arg);
+ runner_insert_arg(runner, (char *)arg);
}
static void
-runner_va_add_args (runner_t *runner, va_list argp)
+runner_va_add_args(runner_t *runner, va_list argp)
{
- const char *arg;
+ const char *arg;
- while ((arg = va_arg (argp, const char *)))
- runner_add_arg (runner, arg);
+ while ((arg = va_arg(argp, const char *)))
+ runner_add_arg(runner, arg);
}
void
-runner_add_args (runner_t *runner, ...)
+runner_add_args(runner_t *runner, ...)
{
- va_list argp;
+ va_list argp;
- va_start (argp, runner);
- runner_va_add_args (runner, argp);
- va_end (argp);
+ va_start(argp, runner);
+ runner_va_add_args(runner, argp);
+ va_end(argp);
}
void
-runner_argprintf (runner_t *runner, const char *format, ...)
+runner_argprintf(runner_t *runner, const char *format, ...)
{
- va_list argva;
- char *arg = NULL;
- int ret = 0;
+ va_list argva;
+ char *arg = NULL;
+ int ret = 0;
- va_start (argva, format);
- ret = gf_vasprintf (&arg, format, argva);
- va_end (argva);
+ va_start(argva, format);
+ ret = gf_vasprintf(&arg, format, argva);
+ va_end(argva);
- if (ret < 0) {
- runner->runerr = errno;
- return;
- }
+ if (ret < 0) {
+ runner->runerr = errno;
+ return;
+ }
- runner_insert_arg (runner, arg);
+ runner_insert_arg(runner, arg);
}
void
-runner_log (runner_t *runner, const char *dom, gf_loglevel_t lvl,
- const char *msg)
+runner_log(runner_t *runner, const char *dom, gf_loglevel_t lvl,
+ const char *msg)
{
- char *buf = NULL;
- size_t len = 0;
- int i = 0;
-
- if (runner->runerr)
- return;
-
- for (i = 0;; i++) {
- if (runner->argv[i] == NULL)
- break;
- len += (strlen (runner->argv[i]) + 1);
- }
-
- buf = GF_CALLOC (1, len + 1, gf_common_mt_run_logbuf);
- if (!buf) {
- runner->runerr = errno;
- return;
- }
- for (i = 0;; i++) {
- if (runner->argv[i] == NULL)
- break;
- strcat (buf, runner->argv[i]);
- strcat (buf, " ");
- }
- if (len > 0)
- buf[len - 1] = '\0';
-
- gf_log (dom, lvl, "%s: %s", msg, buf);
-
- GF_FREE (buf);
+ char *buf = NULL;
+ size_t len = 0;
+ int i = 0;
+
+ if (runner->runerr)
+ return;
+
+ for (i = 0;; i++) {
+ if (runner->argv[i] == NULL)
+ break;
+ len += (strlen(runner->argv[i]) + 1);
+ }
+
+ buf = GF_CALLOC(1, len + 1, gf_common_mt_run_logbuf);
+ if (!buf) {
+ runner->runerr = errno;
+ return;
+ }
+ for (i = 0;; i++) {
+ if (runner->argv[i] == NULL)
+ break;
+ strcat(buf, runner->argv[i]);
+ strcat(buf, " ");
+ }
+ if (len > 0)
+ buf[len - 1] = '\0';
+
+ gf_msg_callingfn(dom, lvl, 0, LG_MSG_RUNNER_LOG, "%s: %s", msg, buf);
+
+ GF_FREE(buf);
}
void
-runner_redir (runner_t *runner, int fd, int tgt_fd)
+runner_redir(runner_t *runner, int fd, int tgt_fd)
{
- GF_ASSERT (fd > 0 && fd < 3);
+ GF_ASSERT(fd > 0 && fd < 3);
- if ((fd > 0) && (fd < 3))
- runner->chfd[fd] = (tgt_fd >= 0) ? tgt_fd : -2;
+ if ((fd > 0) && (fd < 3))
+ runner->chfd[fd] = (tgt_fd >= 0) ? tgt_fd : -2;
}
int
-runner_start (runner_t *runner)
+runner_start(runner_t *runner)
{
- int pi[3][2] = {{-1, -1}, {-1, -1}, {-1, -1}};
- int xpi[2];
- int ret = 0;
- int errno_priv = 0;
- int i = 0;
- sigset_t set;
-
- if (runner->runerr) {
- errno = runner->runerr;
- return -1;
- }
-
- GF_ASSERT (runner->argv[0]);
-
- /* set up a channel to child to communicate back
- * possible execve(2) failures
- */
- ret = pipe(xpi);
- if (ret != -1)
- ret = fcntl (xpi[1], F_SETFD, FD_CLOEXEC);
-
- for (i = 0; i < 3; i++) {
- if (runner->chfd[i] != -2)
- continue;
- ret = pipe (pi[i]);
- if (ret != -1) {
- runner->chio[i] = fdopen (pi[i][i ? 0 : 1], i ? "r" : "w");
- if (!runner->chio[i])
- ret = -1;
- }
+ int pi[3][2] = {{-1, -1}, {-1, -1}, {-1, -1}};
+ int xpi[2];
+ int ret = 0;
+ int errno_priv = 0;
+ int i = 0;
+ sigset_t set;
+
+ if (runner->runerr || !runner->argv) {
+ errno = (runner->runerr) ? runner->runerr : EINVAL;
+ return -1;
+ }
+
+ GF_ASSERT(runner->argv[0]);
+
+ /* set up a channel to child to communicate back
+ * possible execve(2) failures
+ */
+ ret = pipe(xpi);
+ if (ret != -1)
+ ret = fcntl(xpi[1], F_SETFD, FD_CLOEXEC);
+
+ for (i = 0; i < 3; i++) {
+ if (runner->chfd[i] != -2)
+ continue;
+ ret = pipe(pi[i]);
+ if (ret != -1) {
+ runner->chio[i] = fdopen(pi[i][i ? 0 : 1], i ? "r" : "w");
+ if (!runner->chio[i])
+ ret = -1;
}
+ }
- if (ret != -1)
- runner->chpid = fork ();
- switch (runner->chpid) {
+ if (ret != -1)
+ runner->chpid = fork();
+ switch (runner->chpid) {
case -1:
- errno_priv = errno;
- close (xpi[0]);
- close (xpi[1]);
- for (i = 0; i < 3; i++) {
- close (pi[i][0]);
- close (pi[i][1]);
- }
- errno = errno_priv;
- return -1;
+ errno_priv = errno;
+ sys_close(xpi[0]);
+ sys_close(xpi[1]);
+ for (i = 0; i < 3; i++) {
+ sys_close(pi[i][0]);
+ sys_close(pi[i][1]);
+ }
+ errno = errno_priv;
+ return -1;
case 0:
- for (i = 0; i < 3; i++)
- close (pi[i][i ? 0 : 1]);
- close (xpi[0]);
- ret = 0;
-
- for (i = 0; i < 3; i++) {
- if (ret == -1)
- break;
- switch (runner->chfd[i]) {
- case -1:
- /* no redir */
- break;
- case -2:
- /* redir to pipe */
- ret = dup2 (pi[i][i ? 1 : 0], i);
- break;
- default:
- /* redir to file */
- ret = dup2 (runner->chfd[i], i);
- }
- }
-
- if (ret != -1 ) {
-#ifdef GF_LINUX_HOST_OS
- DIR *d = NULL;
- struct dirent *de = NULL;
- char *e = NULL;
-
- d = opendir ("/proc/self/fd");
- if (d) {
- while ((de = readdir (d))) {
- i = strtoul (de->d_name, &e, 10);
- if (*e == '\0' && i > 2 &&
- i != dirfd (d) && i != xpi[1])
- close (i);
- }
- closedir (d);
- } else
- ret = -1;
-#else
- struct rlimit rl;
- ret = getrlimit (RLIMIT_NOFILE, &rl);
- GF_ASSERT (ret == 0);
-
- for (i = 3; i < rl.rlim_cur; i++) {
- if (i != xpi[1])
- close (i);
- }
-#endif
- }
-
- if (ret != -1) {
- /* save child from inheriting our singal handling */
- sigemptyset (&set);
- sigprocmask (SIG_SETMASK, &set, NULL);
-
- execvp (runner->argv[0], runner->argv);
- }
- ret = write (xpi[1], &errno, sizeof (errno));
- _exit (1);
- }
-
- errno_priv = errno;
- for (i = 0; i < 3; i++)
- close (pi[i][i ? 1 : 0]);
- close (xpi[1]);
- if (ret == -1) {
- for (i = 0; i < 3; i++) {
- if (runner->chio[i]) {
- fclose (runner->chio[i]);
- runner->chio[i] = NULL;
- }
+ for (i = 0; i < 3; i++)
+ sys_close(pi[i][i ? 0 : 1]);
+ sys_close(xpi[0]);
+ ret = 0;
+
+ for (i = 0; i < 3; i++) {
+ if (ret == -1)
+ break;
+ switch (runner->chfd[i]) {
+ case -1:
+ /* no redir */
+ break;
+ case -2:
+ /* redir to pipe */
+ ret = dup2(pi[i][i ? 1 : 0], i);
+ break;
+ default:
+ /* redir to file */
+ ret = dup2(runner->chfd[i], i);
}
- } else {
- ret = read (xpi[0], (char *)&errno_priv, sizeof (errno_priv));
- close (xpi[0]);
- if (ret <= 0)
- return 0;
- GF_ASSERT (ret == sizeof (errno_priv));
+ }
+
+ if (ret != -1) {
+ int fdv[4] = {0, 1, 2, xpi[1]};
+
+ ret = close_fds_except(fdv, sizeof(fdv) / sizeof(*fdv));
+ }
+
+ if (ret != -1) {
+ /* save child from inheriting our signal handling */
+ sigemptyset(&set);
+ sigprocmask(SIG_SETMASK, &set, NULL);
+
+ execvp(runner->argv[0], runner->argv);
+ }
+ ret = sys_write(xpi[1], &errno, sizeof(errno));
+ _exit(1);
+ }
+
+ errno_priv = errno;
+ for (i = 0; i < 3; i++)
+ sys_close(pi[i][i ? 1 : 0]);
+ sys_close(xpi[1]);
+ if (ret == -1) {
+ for (i = 0; i < 3; i++) {
+ if (runner->chio[i]) {
+ fclose(runner->chio[i]);
+ runner->chio[i] = NULL;
+ }
}
- errno = errno_priv;
- return -1;
+ } else {
+ ret = sys_read(xpi[0], (char *)&errno_priv, sizeof(errno_priv));
+ sys_close(xpi[0]);
+ if (ret <= 0)
+ return 0;
+ GF_ASSERT(ret == sizeof(errno_priv));
+ }
+ errno = errno_priv;
+ return -1;
}
int
-runner_end_reuse (runner_t *runner)
+runner_end_reuse(runner_t *runner)
{
- int i = 0;
- int ret = -1;
- int chstat = 0;
-
- if (runner->chpid > 0) {
- if (waitpid (runner->chpid, &chstat, 0) == runner->chpid)
- ret = chstat;
+ int i = 0;
+ int ret = 1;
+ int chstat = 0;
+
+ if (runner->chpid > 0) {
+ if (waitpid(runner->chpid, &chstat, 0) == runner->chpid) {
+ if (WIFEXITED(chstat)) {
+ ret = WEXITSTATUS(chstat);
+ } else {
+ ret = chstat;
+ }
}
+ }
- for (i = 0; i < 3; i++) {
- if (runner->chio[i]) {
- fclose (runner->chio[i]);
- runner->chio[i] = NULL;
- }
+ for (i = 0; i < 3; i++) {
+ if (runner->chio[i]) {
+ fclose(runner->chio[i]);
+ runner->chio[i] = NULL;
}
+ }
- return ret;
+ return -ret;
}
int
-runner_end (runner_t *runner)
+runner_end(runner_t *runner)
{
- int i = 0;
- int ret = -1;
- char **p = NULL;
+ int i = 0;
+ int ret = -1;
+ char **p = NULL;
- ret = runner_end_reuse (runner);
+ ret = runner_end_reuse(runner);
- if (runner->argv) {
- for (p = runner->argv; *p; p++)
- GF_FREE (*p);
- GF_FREE (runner->argv);
- }
- for (i = 0; i < 3; i++)
- close (runner->chfd[i]);
+ if (runner->argv) {
+ for (p = runner->argv; *p; p++)
+ GF_FREE(*p);
+ GF_FREE(runner->argv);
+ }
+ for (i = 0; i < 3; i++)
+ sys_close(runner->chfd[i]);
- return ret;
+ return ret;
}
static int
-runner_run_generic (runner_t *runner, int (*rfin)(runner_t *runner))
+runner_run_generic(runner_t *runner, int (*rfin)(runner_t *runner))
{
- int ret = 0;
+ int ret = 0;
- ret = runner_start (runner);
+ ret = runner_start(runner);
+ if (ret)
+ goto out;
+ ret = rfin(runner);
- return -(rfin (runner) || ret);
+out:
+ return ret;
}
int
-runner_run (runner_t *runner)
+runner_run(runner_t *runner)
{
- return runner_run_generic (runner, runner_end);
+ return runner_run_generic(runner, runner_end);
}
int
-runner_run_reuse (runner_t *runner)
+runner_run_nowait(runner_t *runner)
{
- return runner_run_generic (runner, runner_end_reuse);
+ int pid;
+
+ pid = fork();
+
+ if (!pid) {
+ setsid();
+ _exit(runner_start(runner));
+ }
+
+ if (pid > 0)
+ runner->chpid = pid;
+ return runner_end(runner);
+}
+
+int
+runner_run_reuse(runner_t *runner)
+{
+ return runner_run_generic(runner, runner_end_reuse);
}
int
-runcmd (const char *arg, ...)
+runcmd(const char *arg, ...)
{
- runner_t runner;
- va_list argp;
+ runner_t runner;
+ va_list argp;
- runinit (&runner);
- /* ISO C requires a named argument before '...' */
- runner_add_arg (&runner, arg);
+ runinit(&runner);
+ /* ISO C requires a named argument before '...' */
+ runner_add_arg(&runner, arg);
- va_start (argp, arg);
- runner_va_add_args (&runner, argp);
- va_end (argp);
+ va_start(argp, arg);
+ runner_va_add_args(&runner, argp);
+ va_end(argp);
- return runner_run (&runner);
+ return runner_run(&runner);
}
-#ifdef RUN_DO_TESTS
+#ifdef RUN_DO_DEMO
static void
-TBANNER (const char *txt)
+TBANNER(const char *txt)
{
- printf("######\n### testing %s\n", txt);
+ printf("######\n### demoing %s\n", txt);
}
int
-main (int argc, char **argv)
+main(int argc, char **argv)
{
- runner_t runner;
- char buf[80];
- char *wdbuf;;
- int ret;
- int fd;
- long pathmax = pathconf ("/", _PC_PATH_MAX);
- struct timeval tv = {0,};
- struct timeval *tvp = NULL;
-
- wdbuf = malloc (pathmax);
- assert (wdbuf);
- getcwd (wdbuf, pathmax);
-
- TBANNER ("basic functionality");
- runcmd ("echo", "a", "b", NULL);
-
- TBANNER ("argv extension");
- runcmd ("echo", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
- "11", "12", "13", "14", "15", "16", "17", "18", "19", "20",
- "21", "22", "23", "24", "25", "26", "27", "28", "29", "30",
- "31", "32", "33", "34", "35", "36", "37", "38", "39", "40",
- "41", "42", "43", "44", "45", "46", "47", "48", "49", "50",
- "51", "52", "53", "54", "55", "56", "57", "58", "59", "60",
- "61", "62", "63", "64", "65", "66", "67", "68", "69", "70",
- "71", "72", "73", "74", "75", "76", "77", "78", "79", "80",
- "81", "82", "83", "84", "85", "86", "87", "88", "89", "90",
- "91", "92", "93", "94", "95", "96", "97", "98", "99", "100", NULL);
-
- TBANNER ("add_args, argprintf, log, and popen-style functionality");
- runinit (&runner);
- runner_add_args (&runner, "echo", "pid:", NULL);
- runner_argprintf (&runner, "%d\n", getpid());
- runner_add_arg (&runner, "wd:");
- runner_add_arg (&runner, wdbuf);
- runner_redir (&runner, 1, RUN_PIPE);
- runner_start (&runner);
- runner_log (&runner, "(x)", LOG_DEBUG, "starting program");
- while (fgets (buf, sizeof(buf), runner_chio (&runner, 1)))
- printf ("got: %s", buf);
- runner_end (&runner);
-
- TBANNER ("execve error reporting");
- ret = runcmd ("bafflavvitty", NULL);
- printf ("%d %d [%s]\n", ret, errno, strerror (errno));
-
- TBANNER ("output redirection");
- fd = open ("/tmp/foof", O_WRONLY|O_CREAT|O_TRUNC, 0600);
- assert (fd != -1);
- runinit (&runner);
- runner_add_args (&runner, "echo", "foo", NULL);
- runner_redir (&runner, 1, fd);
- ret = runner_run (&runner);
- printf ("%d", ret);
- if (ret != 0)
- printf (" %d [%s]", errno, strerror (errno));
- putchar ('\n');
-
- if (argc > 1) {
- tv.tv_sec = strtoul (argv[1], NULL, 10);
- if (tv.tv_sec > 0)
- tvp = &tv;
- select (0, 0, 0, 0, tvp);
- }
-
- return 0;
+ runner_t runner;
+ char buf[80];
+ char *wdbuf;
+ ;
+ int ret;
+ int fd;
+ long pathmax = pathconf("/", _PC_PATH_MAX);
+ struct timeval tv = {
+ 0,
+ };
+ struct timeval *tvp = NULL;
+ char *tfile;
+
+ wdbuf = malloc(pathmax);
+ assert(wdbuf);
+ getcwd(wdbuf, pathmax);
+
+ TBANNER("basic functionality: running \"echo a b\"");
+ runcmd("echo", "a", "b", NULL);
+
+ TBANNER("argv extension: running \"echo 1 2 ... 100\"");
+ runcmd("echo", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11",
+ "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22",
+ "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33",
+ "34", "35", "36", "37", "38", "39", "40", "41", "42", "43", "44",
+ "45", "46", "47", "48", "49", "50", "51", "52", "53", "54", "55",
+ "56", "57", "58", "59", "60", "61", "62", "63", "64", "65", "66",
+ "67", "68", "69", "70", "71", "72", "73", "74", "75", "76", "77",
+ "78", "79", "80", "81", "82", "83", "84", "85", "86", "87", "88",
+ "89", "90", "91", "92", "93", "94", "95", "96", "97", "98", "99",
+ "100", NULL);
+
+ TBANNER(
+ "add_args, argprintf, log, and popen-style functionality:\n"
+ " running a multiline echo command, emit a log about it,\n"
+ " redirect it to a pipe, read output lines\n"
+ " and print them prefixed with \"got: \"");
+ runinit(&runner);
+ runner_add_args(&runner, "echo", "pid:", NULL);
+ runner_argprintf(&runner, "%d\n", getpid());
+ runner_add_arg(&runner, "wd:");
+ runner_add_arg(&runner, wdbuf);
+ runner_redir(&runner, 1, RUN_PIPE);
+ runner_start(&runner);
+ runner_log(&runner, "(x)", LOG_DEBUG, "starting program");
+ while (fgets(buf, sizeof(buf), runner_chio(&runner, 1)))
+ printf("got: %s", buf);
+ runner_end(&runner);
+
+ TBANNER("execve error reporting: running a non-existent command");
+ ret = runcmd("bafflavvitty", NULL);
+ printf("%d %d [%s]\n", ret, errno, strerror(errno));
+
+ TBANNER(
+ "output redirection: running \"echo foo\" redirected "
+ "to a temp file");
+ tfile = strdup("/tmp/foofXXXXXX");
+ assert(tfile);
+ fd = mkstemp(tfile);
+ assert(fd != -1);
+ printf("redirecting to %s\n", tfile);
+ runinit(&runner);
+ runner_add_args(&runner, "echo", "foo", NULL);
+ runner_redir(&runner, 1, fd);
+ ret = runner_run(&runner);
+ printf("runner_run returned: %d", ret);
+ if (ret != 0)
+ printf(", with errno %d [%s]", errno, strerror(errno));
+ putchar('\n');
+
+ /* sleep for seconds given as argument (0 means forever)
+ * to allow investigation of post-execution state to
+ * cbeck for resource leaks (eg. zombies).
+ */
+ if (argc > 1) {
+ tv.tv_sec = strtoul(argv[1], NULL, 10);
+ printf("### %s", "sleeping for");
+ if (tv.tv_sec > 0) {
+ printf(" %d seconds\n", tv.tv_sec);
+ tvp = &tv;
+ } else
+ printf("%s\n", "ever");
+ select(0, 0, 0, 0, tvp);
+ }
+
+ return 0;
}
#endif
diff --git a/libglusterfs/src/scheduler.c b/libglusterfs/src/scheduler.c
deleted file mode 100644
index 9817f3e26b6..00000000000
--- a/libglusterfs/src/scheduler.c
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <dlfcn.h>
-#include <netdb.h>
-#include "xlator.h"
-#include "scheduler.h"
-#include "list.h"
-
-struct sched_ops *
-get_scheduler (xlator_t *xl, const char *name)
-{
- struct sched_ops *tmp_sched = NULL;
- volume_opt_list_t *vol_opt = NULL;
- char *sched_file = NULL;
- void *handle = NULL;
- int ret = 0;
-
- if (name == NULL) {
- gf_log ("scheduler", GF_LOG_ERROR,
- "'name' not specified, EINVAL");
- return NULL;
- }
-
- ret = gf_asprintf (&sched_file, "%s/%s.so", SCHEDULERDIR, name);
- if (-1 == ret) {
- gf_log ("scheduler", GF_LOG_ERROR, "asprintf failed");
- return NULL;
- }
-
- gf_log ("scheduler", GF_LOG_DEBUG,
- "attempt to load file %s.so", name);
-
- handle = dlopen (sched_file, RTLD_LAZY);
- if (!handle) {
- gf_log ("scheduler", GF_LOG_ERROR,
- "dlopen(%s): %s", sched_file, dlerror ());
- GF_FREE(sched_file);
- return NULL;
- }
-
- tmp_sched = dlsym (handle, "sched");
- if (!tmp_sched) {
- gf_log ("scheduler", GF_LOG_ERROR,
- "dlsym(sched) on %s", dlerror ());
- GF_FREE(sched_file);
- return NULL;
- }
-
- vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t),
- gf_common_mt_volume_opt_list_t);
- vol_opt->given_opt = dlsym (handle, "options");
- if (vol_opt->given_opt == NULL) {
- gf_log ("scheduler", GF_LOG_DEBUG,
- "volume option validation not specified");
- } else {
- list_add_tail (&vol_opt->list, &xl->volume_options);
- if (validate_xlator_volume_options (xl, vol_opt->given_opt)
- == -1) {
- gf_log ("scheduler", GF_LOG_ERROR,
- "volume option validation failed");
- GF_FREE(sched_file);
- return NULL;
- }
- }
- GF_FREE(sched_file);
- GF_FREE (vol_opt);
-
- return tmp_sched;
-}
diff --git a/libglusterfs/src/scheduler.h b/libglusterfs/src/scheduler.h
deleted file mode 100644
index 2f1e12205df..00000000000
--- a/libglusterfs/src/scheduler.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _SCHEDULER_H
-#define _SCHEDULER_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-
-struct sched_ops {
- int32_t (*init) (xlator_t *this);
- void (*fini) (xlator_t *this);
- void (*update) (xlator_t *this);
- xlator_t *(*schedule) (xlator_t *this, const void *path);
- void (*notify) (xlator_t *xl, int32_t event, void *data);
- int32_t (*mem_acct_init) (xlator_t *this);
-};
-
-extern struct sched_ops *get_scheduler (xlator_t *xl, const char *name);
-
-#endif /* _SCHEDULER_H */
diff --git a/libglusterfs/src/stack.c b/libglusterfs/src/stack.c
index f922be83c7f..1531f0da43f 100644
--- a/libglusterfs/src/stack.c
+++ b/libglusterfs/src/stack.c
@@ -8,351 +8,445 @@
cases as published by the Free Software Foundation.
*/
-#include "statedump.h"
-#include "stack.h"
+#include "glusterfs/statedump.h"
+#include "glusterfs/stack.h"
+#include "glusterfs/libglusterfs-messages.h"
-static inline
-int call_frames_count (call_frame_t *call_frame)
+call_frame_t *
+create_frame(xlator_t *xl, call_pool_t *pool)
{
- call_frame_t *pos;
- int32_t count = 0;
-
- if (!call_frame)
- return count;
-
- for (pos = call_frame; pos != NULL; pos = pos->next)
- count++;
+ call_stack_t *stack = NULL;
+ call_frame_t *frame = NULL;
+ static uint64_t unique = 0;
+
+ if (!xl || !pool) {
+ return NULL;
+ }
+
+ stack = mem_get0(pool->stack_mem_pool);
+ if (!stack)
+ return NULL;
+
+ INIT_LIST_HEAD(&stack->myframes);
+
+ frame = mem_get0(pool->frame_mem_pool);
+ if (!frame) {
+ mem_put(stack);
+ return NULL;
+ }
+
+ frame->root = stack;
+ frame->this = xl;
+ LOCK_INIT(&frame->lock);
+ INIT_LIST_HEAD(&frame->frames);
+ list_add(&frame->frames, &stack->myframes);
+
+ stack->pool = pool;
+ stack->ctx = xl->ctx;
+
+ if (frame->root->ctx->measure_latency) {
+ timespec_now(&stack->tv);
+ memcpy(&frame->begin, &stack->tv, sizeof(stack->tv));
+ }
+
+ LOCK(&pool->lock);
+ {
+ list_add(&stack->all_frames, &pool->all_frames);
+ pool->cnt++;
+ stack->unique = unique++;
+ }
+ UNLOCK(&pool->lock);
+ GF_ATOMIC_INC(pool->total_count);
+
+ LOCK_INIT(&stack->stack_lock);
+
+ return frame;
+}
- return count;
+void
+call_stack_set_groups(call_stack_t *stack, int ngrps, gid_t **groupbuf_p)
+{
+ /* We take the ownership of the passed group buffer. */
+
+ if (ngrps <= SMALL_GROUP_COUNT) {
+ memcpy(stack->groups_small, *groupbuf_p, sizeof(gid_t) * ngrps);
+ stack->groups = stack->groups_small;
+ GF_FREE(*groupbuf_p);
+ } else {
+ stack->groups_large = *groupbuf_p;
+ stack->groups = stack->groups_large;
+ }
+
+ stack->ngrps = ngrps;
+ /* Set a canary. */
+ *groupbuf_p = (void *)0xdeadf00d;
}
void
-gf_proc_dump_call_frame (call_frame_t *call_frame, const char *key_buf,...)
+gf_proc_dump_call_frame(call_frame_t *call_frame, const char *key_buf, ...)
{
+ char prefix[GF_DUMP_MAX_BUF_LEN];
+ va_list ap;
+ call_frame_t my_frame = {
+ 0,
+ };
+
+ int ret = -1;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ int len;
+
+ if (!call_frame)
+ return;
- char prefix[GF_DUMP_MAX_BUF_LEN];
- va_list ap;
- call_frame_t my_frame;
- int ret = -1;
+ GF_ASSERT(key_buf);
- if (!call_frame)
- return;
+ va_start(ap, key_buf);
+ vsnprintf(prefix, GF_DUMP_MAX_BUF_LEN, key_buf, ap);
+ va_end(ap);
- GF_ASSERT (key_buf);
+ ret = TRY_LOCK(&call_frame->lock);
+ if (ret)
+ goto out;
- memset(prefix, 0, sizeof(prefix));
- memset(&my_frame, 0, sizeof(my_frame));
- va_start(ap, key_buf);
- vsnprintf(prefix, GF_DUMP_MAX_BUF_LEN, key_buf, ap);
- va_end(ap);
+ memcpy(&my_frame, call_frame, sizeof(my_frame));
+ UNLOCK(&call_frame->lock);
- ret = TRY_LOCK(&call_frame->lock);
- if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to dump call frame"
- " errno: %s", strerror (errno));
- return;
- }
+ if (my_frame.root->ctx->measure_latency) {
+ gf_time_fmt(timestr, sizeof(timestr), my_frame.begin.tv_sec,
+ gf_timefmt_FT);
+ len = strlen(timestr);
+ snprintf(timestr + len, sizeof(timestr) - len, ".%" GF_PRI_SNSECONDS,
+ my_frame.begin.tv_nsec);
+ gf_proc_dump_write("frame-creation-time", "%s", timestr);
+ gf_proc_dump_write(
+ "timings", "%ld.%" GF_PRI_SNSECONDS " -> %ld.%" GF_PRI_SNSECONDS,
+ my_frame.begin.tv_sec, my_frame.begin.tv_nsec, my_frame.end.tv_sec,
+ my_frame.end.tv_nsec);
+ }
- memcpy(&my_frame, call_frame, sizeof(my_frame));
- UNLOCK(&call_frame->lock);
+ gf_proc_dump_write("frame", "%p", call_frame);
+ gf_proc_dump_write("ref_count", "%d", my_frame.ref_count);
+ gf_proc_dump_write("translator", "%s", my_frame.this->name);
+ gf_proc_dump_write("complete", "%d", my_frame.complete);
- gf_proc_dump_write("ref_count", "%d", my_frame.ref_count);
- gf_proc_dump_write("translator", "%s", my_frame.this->name);
- gf_proc_dump_write("complete", "%d", my_frame.complete);
- if (my_frame.parent)
- gf_proc_dump_write("parent", "%s", my_frame.parent->this->name);
+ if (my_frame.parent)
+ gf_proc_dump_write("parent", "%s", my_frame.parent->this->name);
- if (my_frame.wind_from)
- gf_proc_dump_write("wind_from", "%s", my_frame.wind_from);
+ if (my_frame.wind_from)
+ gf_proc_dump_write("wind_from", "%s", my_frame.wind_from);
- if (my_frame.wind_to)
- gf_proc_dump_write("wind_to", "%s", my_frame.wind_to);
+ if (my_frame.wind_to)
+ gf_proc_dump_write("wind_to", "%s", my_frame.wind_to);
- if (my_frame.unwind_from)
- gf_proc_dump_write("unwind_from", "%s", my_frame.unwind_from);
+ if (my_frame.unwind_from)
+ gf_proc_dump_write("unwind_from", "%s", my_frame.unwind_from);
- if (my_frame.unwind_to)
- gf_proc_dump_write("unwind_to", "%s", my_frame.unwind_to);
-}
+ if (my_frame.unwind_to)
+ gf_proc_dump_write("unwind_to", "%s", my_frame.unwind_to);
+ ret = 0;
+out:
+ if (ret) {
+ gf_proc_dump_write("Unable to dump the frame information",
+ "(Lock acquisition failed)");
+ return;
+ }
+}
void
-gf_proc_dump_call_stack (call_stack_t *call_stack, const char *key_buf,...)
+gf_proc_dump_call_stack(call_stack_t *call_stack, const char *key_buf, ...)
{
- char prefix[GF_DUMP_MAX_BUF_LEN];
- va_list ap;
- call_frame_t *trav;
- int32_t cnt, i;
-
- if (!call_stack)
- return;
-
- GF_ASSERT (key_buf);
-
- cnt = call_frames_count(&call_stack->frames);
-
- memset(prefix, 0, sizeof(prefix));
- va_start(ap, key_buf);
- vsnprintf(prefix, GF_DUMP_MAX_BUF_LEN, key_buf, ap);
- va_end(ap);
-
- gf_proc_dump_write("uid", "%d", call_stack->uid);
- gf_proc_dump_write("gid", "%d", call_stack->gid);
- gf_proc_dump_write("pid", "%d", call_stack->pid);
- gf_proc_dump_write("unique", "%Ld", call_stack->unique);
- gf_proc_dump_write("lk-owner", "%s", lkowner_utoa (&call_stack->lk_owner));
-
- if (call_stack->type == GF_OP_TYPE_FOP)
- gf_proc_dump_write("op", "%s", gf_fop_list[call_stack->op]);
- else if (call_stack->type == GF_OP_TYPE_MGMT)
- gf_proc_dump_write("op", "%s", gf_mgmt_list[call_stack->op]);
-
- gf_proc_dump_write("type", "%d", call_stack->type);
- gf_proc_dump_write("cnt", "%d", cnt);
-
- trav = &call_stack->frames;
-
- for (i = 1; i <= cnt; i++) {
- if (trav) {
- gf_proc_dump_add_section("%s.frame.%d", prefix, i);
- gf_proc_dump_call_frame(trav, "%s.frame.%d", prefix, i);
- trav = trav->next;
- }
- }
+ char prefix[GF_DUMP_MAX_BUF_LEN];
+ va_list ap;
+ call_frame_t *trav;
+ int32_t i = 1, cnt = 0;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ int len;
+
+ if (!call_stack)
+ return;
+
+ GF_ASSERT(key_buf);
+
+ va_start(ap, key_buf);
+ vsnprintf(prefix, GF_DUMP_MAX_BUF_LEN, key_buf, ap);
+ va_end(ap);
+
+ cnt = call_frames_count(call_stack);
+ gf_time_fmt(timestr, sizeof(timestr), call_stack->tv.tv_sec, gf_timefmt_FT);
+ len = strlen(timestr);
+ snprintf(timestr + len, sizeof(timestr) - len, ".%" GF_PRI_SNSECONDS,
+ call_stack->tv.tv_nsec);
+ gf_proc_dump_write("callstack-creation-time", "%s", timestr);
+
+ gf_proc_dump_write("stack", "%p", call_stack);
+ gf_proc_dump_write("uid", "%d", call_stack->uid);
+ gf_proc_dump_write("gid", "%d", call_stack->gid);
+ gf_proc_dump_write("pid", "%d", call_stack->pid);
+ gf_proc_dump_write("unique", "%" PRIu64, call_stack->unique);
+ gf_proc_dump_write("lk-owner", "%s", lkowner_utoa(&call_stack->lk_owner));
+ gf_proc_dump_write("ctime", "%" GF_PRI_SECOND ".%" GF_PRI_SNSECONDS,
+ call_stack->tv.tv_sec, call_stack->tv.tv_nsec);
+
+ if (call_stack->type == GF_OP_TYPE_FOP)
+ gf_proc_dump_write("op", "%s", (char *)gf_fop_list[call_stack->op]);
+ else
+ gf_proc_dump_write("op", "stack");
+
+ gf_proc_dump_write("type", "%d", call_stack->type);
+ gf_proc_dump_write("cnt", "%d", cnt);
+
+ list_for_each_entry(trav, &call_stack->myframes, frames)
+ {
+ gf_proc_dump_add_section("%s.frame.%d", prefix, i);
+ gf_proc_dump_call_frame(trav, "%s.frame.%d", prefix, i);
+ i++;
+ }
}
void
-gf_proc_dump_pending_frames (call_pool_t *call_pool)
+gf_proc_dump_pending_frames(call_pool_t *call_pool)
{
+ call_stack_t *trav = NULL;
+ int i = 1;
+ int ret = -1;
+ gf_boolean_t section_added = _gf_false;
- call_stack_t *trav = NULL;
- int i = 1;
- int ret = -1;
-
- if (!call_pool)
- return;
-
- ret = TRY_LOCK (&(call_pool->lock));
- if (ret) {
- gf_log("", GF_LOG_WARNING, "Unable to dump call pool"
- " errno: %d", errno);
- return;
- }
+ if (!call_pool)
+ return;
+ ret = TRY_LOCK(&(call_pool->lock));
+ if (ret)
+ goto out;
- gf_proc_dump_add_section("global.callpool");
- gf_proc_dump_write("callpool_address","%p", call_pool);
- gf_proc_dump_write("callpool.cnt","%d", call_pool->cnt);
+ gf_proc_dump_add_section("global.callpool");
+ section_added = _gf_true;
+ gf_proc_dump_write("callpool_address", "%p", call_pool);
+ gf_proc_dump_write("callpool.cnt", "%" PRId64, call_pool->cnt);
+ list_for_each_entry(trav, &call_pool->all_frames, all_frames)
+ {
+ gf_proc_dump_add_section("global.callpool.stack.%d", i);
+ gf_proc_dump_call_stack(trav, "global.callpool.stack.%d", i);
+ i++;
+ }
+ UNLOCK(&(call_pool->lock));
- list_for_each_entry (trav, &call_pool->all_frames, all_frames) {
- gf_proc_dump_add_section("global.callpool.stack.%d",i);
- gf_proc_dump_call_stack(trav, "global.callpool.stack.%d", i);
- i++;
- }
- UNLOCK (&(call_pool->lock));
+ ret = 0;
+out:
+ if (ret) {
+ if (_gf_false == section_added)
+ gf_proc_dump_add_section("global.callpool");
+ gf_proc_dump_write("Unable to dump the callpool",
+ "(Lock acquisition failed) %p", call_pool);
+ }
+ return;
}
void
-gf_proc_dump_call_frame_to_dict (call_frame_t *call_frame,
- char *prefix, dict_t *dict)
+gf_proc_dump_call_frame_to_dict(call_frame_t *call_frame, char *prefix,
+ dict_t *dict)
{
- int ret = -1;
- char key[GF_DUMP_MAX_BUF_LEN] = {0,};
- call_frame_t tmp_frame = {0,};
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ char msg[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ call_frame_t tmp_frame = {
+ 0,
+ };
+
+ if (!call_frame || !dict)
+ return;
- if (!call_frame || !dict)
- return;
+ ret = TRY_LOCK(&call_frame->lock);
+ if (ret)
+ return;
+ memcpy(&tmp_frame, call_frame, sizeof(tmp_frame));
+ UNLOCK(&call_frame->lock);
+
+ snprintf(key, sizeof(key), "%s.refcount", prefix);
+ ret = dict_set_int32(dict, key, tmp_frame.ref_count);
+ if (ret)
+ return;
- ret = TRY_LOCK (&call_frame->lock);
+ snprintf(key, sizeof(key), "%s.translator", prefix);
+ ret = dict_set_dynstr(dict, key, gf_strdup(tmp_frame.this->name));
+ if (ret)
+ return;
+
+ snprintf(key, sizeof(key), "%s.complete", prefix);
+ ret = dict_set_int32(dict, key, tmp_frame.complete);
+ if (ret)
+ return;
+
+ if (tmp_frame.root->ctx->measure_latency) {
+ snprintf(key, sizeof(key), "%s.timings", prefix);
+ snprintf(msg, sizeof(msg),
+ "%ld.%" GF_PRI_SNSECONDS " -> %ld.%" GF_PRI_SNSECONDS,
+ tmp_frame.begin.tv_sec, tmp_frame.begin.tv_nsec,
+ tmp_frame.end.tv_sec, tmp_frame.end.tv_nsec);
+ ret = dict_set_str(dict, key, msg);
if (ret)
- return;
- memcpy (&tmp_frame, call_frame, sizeof (tmp_frame));
- UNLOCK (&call_frame->lock);
+ return;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.refcount", prefix);
- ret = dict_set_int32 (dict, key, tmp_frame.ref_count);
+ if (tmp_frame.parent) {
+ snprintf(key, sizeof(key), "%s.parent", prefix);
+ ret = dict_set_dynstr(dict, key,
+ gf_strdup(tmp_frame.parent->this->name));
if (ret)
- return;
+ return;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.translator", prefix);
- ret = dict_set_dynstr (dict, key, gf_strdup (tmp_frame.this->name));
+ if (tmp_frame.wind_from) {
+ snprintf(key, sizeof(key), "%s.windfrom", prefix);
+ ret = dict_set_dynstr(dict, key, gf_strdup(tmp_frame.wind_from));
if (ret)
- return;
+ return;
+ }
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.complete", prefix);
- ret = dict_set_int32 (dict, key, tmp_frame.complete);
+ if (tmp_frame.wind_to) {
+ snprintf(key, sizeof(key), "%s.windto", prefix);
+ ret = dict_set_dynstr(dict, key, gf_strdup(tmp_frame.wind_to));
if (ret)
- return;
-
- if (tmp_frame.parent) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.parent", prefix);
- ret = dict_set_dynstr (dict, key,
- gf_strdup (tmp_frame.parent->this->name));
- if (ret)
- return;
- }
-
- if (tmp_frame.wind_from) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.windfrom", prefix);
- ret = dict_set_dynstr (dict, key,
- gf_strdup (tmp_frame.wind_from));
- if (ret)
- return;
- }
-
- if (tmp_frame.wind_to) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.windto", prefix);
- ret = dict_set_dynstr (dict, key,
- gf_strdup (tmp_frame.wind_to));
- if (ret)
- return;
- }
-
- if (tmp_frame.unwind_from) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unwindfrom", prefix);
- ret = dict_set_dynstr (dict, key,
- gf_strdup (tmp_frame.unwind_from));
- if (ret)
- return;
- }
-
- if (tmp_frame.unwind_to) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unwind_to", prefix);
- ret = dict_set_dynstr (dict, key,
- gf_strdup (tmp_frame.unwind_to));
- }
+ return;
+ }
- return;
+ if (tmp_frame.unwind_from) {
+ snprintf(key, sizeof(key), "%s.unwindfrom", prefix);
+ ret = dict_set_dynstr(dict, key, gf_strdup(tmp_frame.unwind_from));
+ if (ret)
+ return;
+ }
+
+ if (tmp_frame.unwind_to) {
+ snprintf(key, sizeof(key), "%s.unwind_to", prefix);
+ ret = dict_set_dynstr(dict, key, gf_strdup(tmp_frame.unwind_to));
+ if (ret)
+ return;
+ }
+
+ return;
}
void
-gf_proc_dump_call_stack_to_dict (call_stack_t *call_stack,
- char *prefix, dict_t *dict)
+gf_proc_dump_call_stack_to_dict(call_stack_t *call_stack, char *prefix,
+ dict_t *dict)
{
- int ret = -1;
- char key[GF_DUMP_MAX_BUF_LEN] = {0,};
- call_frame_t *trav = NULL;
- int count = 0;
- int i = 0;
-
- if (!call_stack || !dict)
- return;
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ call_frame_t *trav = NULL;
+ int i = 0;
+ int count = 0;
+
+ if (!call_stack || !dict)
+ return;
- count = call_frames_count (&call_stack->frames);
+ count = call_frames_count(call_stack);
+ snprintf(key, sizeof(key), "%s.uid", prefix);
+ ret = dict_set_int32(dict, key, call_stack->uid);
+ if (ret)
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.uid", prefix);
- ret = dict_set_int32 (dict, key, call_stack->uid);
- if (ret)
- return;
+ snprintf(key, sizeof(key), "%s.gid", prefix);
+ ret = dict_set_int32(dict, key, call_stack->gid);
+ if (ret)
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.gid", prefix);
- ret = dict_set_int32 (dict, key, call_stack->gid);
- if (ret)
- return;
+ snprintf(key, sizeof(key), "%s.pid", prefix);
+ ret = dict_set_int32(dict, key, call_stack->pid);
+ if (ret)
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.pid", prefix);
- ret = dict_set_int32 (dict, key, call_stack->pid);
- if (ret)
- return;
+ snprintf(key, sizeof(key), "%s.unique", prefix);
+ ret = dict_set_uint64(dict, key, call_stack->unique);
+ if (ret)
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.unique", prefix);
- ret = dict_set_uint64 (dict, key, call_stack->unique);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.op", prefix);
- if (call_stack->type == GF_OP_TYPE_FOP)
- ret = dict_set_str (dict, key,
- gf_fop_list[call_stack->op]);
- else if (call_stack->type == GF_OP_TYPE_MGMT)
- ret = dict_set_str (dict, key,
- gf_mgmt_list[call_stack->op]);
- if (ret)
- return;
+ snprintf(key, sizeof(key), "%s.op", prefix);
+ if (call_stack->type == GF_OP_TYPE_FOP)
+ ret = dict_set_str(dict, key, (char *)gf_fop_list[call_stack->op]);
+ else
+ ret = dict_set_str(dict, key, "other");
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.type", prefix);
- ret = dict_set_int32 (dict, key, call_stack->type);
- if (ret)
- return;
+ if (ret)
+ return;
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.count", prefix);
- ret = dict_set_int32 (dict, key, count);
- if (ret)
- return;
-
- trav = &call_stack->frames;
- for (i = 0; i < count; i++) {
- if (trav) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%s.frame%d",
- prefix, i);
- gf_proc_dump_call_frame_to_dict (trav, key, dict);
- trav = trav->next;
- }
- }
+ snprintf(key, sizeof(key), "%s.type", prefix);
+ ret = dict_set_int32(dict, key, call_stack->type);
+ if (ret)
+ return;
+ snprintf(key, sizeof(key), "%s.count", prefix);
+ ret = dict_set_int32(dict, key, count);
+ if (ret)
return;
+
+ list_for_each_entry(trav, &call_stack->myframes, frames)
+ {
+ snprintf(key, sizeof(key), "%s.frame%d", prefix, i);
+ gf_proc_dump_call_frame_to_dict(trav, key, dict);
+ i++;
+ }
+
+ return;
}
void
-gf_proc_dump_pending_frames_to_dict (call_pool_t *call_pool, dict_t *dict)
+gf_proc_dump_pending_frames_to_dict(call_pool_t *call_pool, dict_t *dict)
{
- int ret = -1;
- call_stack_t *trav = NULL;
- char key[GF_DUMP_MAX_BUF_LEN] = {0,};
- int i = 0;
-
- if (!call_pool || !dict)
- return;
-
- ret = TRY_LOCK (&call_pool->lock);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "Unable to dump call pool"
- " to dict. errno: %d", errno);
- return;
- }
-
- ret = dict_set_int32 (dict, "callpool.count", call_pool->cnt);
- if (ret)
- goto out;
+ int ret = -1;
+ call_stack_t *trav = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ int i = 0;
+
+ if (!call_pool || !dict)
+ return;
+
+ ret = TRY_LOCK(&call_pool->lock);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_LOCK_FAILURE,
+ "Unable to dump call "
+ "pool to dict.");
+ return;
+ }
+
+ ret = dict_set_int32(dict, "callpool.count", call_pool->cnt);
+ if (ret)
+ goto out;
- list_for_each_entry (trav, &call_pool->all_frames, all_frames) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "callpool.stack%d", i);
- gf_proc_dump_call_stack_to_dict (trav, key, dict);
- i++;
- }
+ list_for_each_entry(trav, &call_pool->all_frames, all_frames)
+ {
+ snprintf(key, sizeof(key), "callpool.stack%d", i);
+ gf_proc_dump_call_stack_to_dict(trav, key, dict);
+ i++;
+ }
out:
- UNLOCK (&call_pool->lock);
+ UNLOCK(&call_pool->lock);
- return;
+ return;
}
gf_boolean_t
-__is_fuse_call (call_frame_t *frame)
+__is_fuse_call(call_frame_t *frame)
{
- gf_boolean_t is_fuse_call = _gf_false;
- GF_ASSERT (frame);
- GF_ASSERT (frame->root);
+ gf_boolean_t is_fuse_call = _gf_false;
+ GF_ASSERT(frame);
+ GF_ASSERT(frame->root);
- if (NFS_PID != frame->root->pid)
- is_fuse_call = _gf_true;
- return is_fuse_call;
+ if (NFS_PID != frame->root->pid)
+ is_fuse_call = _gf_true;
+ return is_fuse_call;
}
diff --git a/libglusterfs/src/stack.h b/libglusterfs/src/stack.h
deleted file mode 100644
index 63307192ae5..00000000000
--- a/libglusterfs/src/stack.h
+++ /dev/null
@@ -1,429 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/*
- This file defines MACROS and static inlines used to emulate a function
- call over asynchronous communication with remote server
-*/
-
-#ifndef _STACK_H
-#define _STACK_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-struct _call_stack_t;
-typedef struct _call_stack_t call_stack_t;
-struct _call_frame_t;
-typedef struct _call_frame_t call_frame_t;
-struct _call_pool_t;
-typedef struct _call_pool_t call_pool_t;
-
-#include <sys/time.h>
-
-#include "xlator.h"
-#include "dict.h"
-#include "list.h"
-#include "common-utils.h"
-#include "globals.h"
-#include "lkowner.h"
-
-#define NFS_PID 1
-#define LOW_PRIO_PROC_PID -1
-typedef int32_t (*ret_fn_t) (call_frame_t *frame,
- call_frame_t *prev_frame,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- ...);
-
-struct _call_pool_t {
- union {
- struct list_head all_frames;
- struct {
- call_stack_t *next_call;
- call_stack_t *prev_call;
- } all_stacks;
- };
- int64_t cnt;
- gf_lock_t lock;
- struct mem_pool *frame_mem_pool;
- struct mem_pool *stack_mem_pool;
-};
-
-struct _call_frame_t {
- call_stack_t *root; /* stack root */
- call_frame_t *parent; /* previous BP */
- call_frame_t *next;
- call_frame_t *prev; /* maintenance list */
- void *local; /* local variables */
- xlator_t *this; /* implicit object */
- ret_fn_t ret; /* op_return address */
- int32_t ref_count;
- gf_lock_t lock;
- void *cookie; /* unique cookie */
- gf_boolean_t complete;
-
- glusterfs_fop_t op;
- struct timeval begin; /* when this frame was created */
- struct timeval end; /* when this frame completed */
- const char *wind_from;
- const char *wind_to;
- const char *unwind_from;
- const char *unwind_to;
-};
-
-struct _call_stack_t {
- union {
- struct list_head all_frames;
- struct {
- call_stack_t *next_call;
- call_stack_t *prev_call;
- };
- };
- call_pool_t *pool;
- gf_lock_t stack_lock;
- void *trans;
- uint64_t unique;
- void *state; /* pointer to request state */
- uid_t uid;
- gid_t gid;
- pid_t pid;
- uint16_t ngrps;
- uint32_t groups[GF_MAX_AUX_GROUPS];
- gf_lkowner_t lk_owner;
-
- call_frame_t frames;
-
- int32_t op;
- int8_t type;
-};
-
-
-#define frame_set_uid_gid(frm, u, g) \
- do { \
- if (frm) { \
- (frm)->root->uid = u; \
- (frm)->root->gid = g; \
- (frm)->root->ngrps = 0; \
- } \
- } while (0); \
-
-
-struct xlator_fops;
-
-void
-gf_set_fop_from_fn_pointer (call_frame_t *frame, struct xlator_fops *fops,
- void *fn);
-
-void
-gf_update_latency (call_frame_t *frame);
-
-static inline void
-FRAME_DESTROY (call_frame_t *frame)
-{
- void *local = NULL;
- if (frame->next)
- frame->next->prev = frame->prev;
- if (frame->prev)
- frame->prev->next = frame->next;
- if (frame->local) {
- local = frame->local;
- frame->local = NULL;
-
- }
-
- LOCK_DESTROY (&frame->lock);
- mem_put (frame);
-
- if (local)
- mem_put (local);
-}
-
-
-static inline void
-STACK_DESTROY (call_stack_t *stack)
-{
- void *local = NULL;
-
- LOCK (&stack->pool->lock);
- {
- list_del_init (&stack->all_frames);
- stack->pool->cnt--;
- }
- UNLOCK (&stack->pool->lock);
-
- if (stack->frames.local) {
- local = stack->frames.local;
- stack->frames.local = NULL;
- }
-
- LOCK_DESTROY (&stack->frames.lock);
- LOCK_DESTROY (&stack->stack_lock);
-
- while (stack->frames.next) {
- FRAME_DESTROY (stack->frames.next);
- }
- mem_put (stack);
-
- if (local)
- mem_put (local);
-}
-
-static inline void
-STACK_RESET (call_stack_t *stack)
-{
- void *local = NULL;
-
- if (stack->frames.local) {
- local = stack->frames.local;
- stack->frames.local = NULL;
- }
-
- while (stack->frames.next) {
- FRAME_DESTROY (stack->frames.next);
- }
-
- if (local)
- mem_put (local);
-}
-
-#define cbk(x) cbk_##x
-
-#define FRAME_SU_DO(frm, local_type) \
- do { \
- local_type *__local = (frm)->local; \
- __local->uid = frm->root->uid; \
- __local->gid = frm->root->gid; \
- frm->root->uid = 0; \
- frm->root->gid = 0; \
- } while (0); \
-
-#define FRAME_SU_UNDO(frm, local_type) \
- do { \
- local_type *__local = (frm)->local; \
- frm->root->uid = __local->uid; \
- frm->root->gid = __local->gid; \
- } while (0); \
-
-
-/* make a call */
-#define STACK_WIND(frame, rfn, obj, fn, params ...) \
- do { \
- call_frame_t *_new = NULL; \
- xlator_t *old_THIS = NULL; \
- \
- _new = mem_get0 (frame->root->pool->frame_mem_pool); \
- if (!_new) { \
- gf_log ("stack", GF_LOG_ERROR, "alloc failed"); \
- break; \
- } \
- typeof(fn##_cbk) tmp_cbk = rfn; \
- _new->root = frame->root; \
- _new->this = obj; \
- _new->ret = (ret_fn_t) tmp_cbk; \
- _new->parent = frame; \
- _new->cookie = _new; \
- _new->wind_from = __FUNCTION__; \
- _new->wind_to = #fn; \
- _new->unwind_to = #rfn; \
- LOCK_INIT (&_new->lock); \
- LOCK(&frame->root->stack_lock); \
- { \
- _new->next = frame->root->frames.next; \
- _new->prev = &frame->root->frames; \
- if (frame->root->frames.next) \
- frame->root->frames.next->prev = _new; \
- frame->root->frames.next = _new; \
- frame->ref_count++; \
- } \
- UNLOCK(&frame->root->stack_lock); \
- old_THIS = THIS; \
- THIS = obj; \
- fn (_new, obj, params); \
- THIS = old_THIS; \
- } while (0)
-
-
-/* make a call with a cookie */
-#define STACK_WIND_COOKIE(frame, rfn, cky, obj, fn, params ...) \
- do { \
- call_frame_t *_new = NULL; \
- xlator_t *old_THIS = NULL; \
- \
- _new = mem_get0 (frame->root->pool->frame_mem_pool); \
- if (!_new) { \
- gf_log ("stack", GF_LOG_ERROR, "alloc failed"); \
- break; \
- } \
- typeof(fn##_cbk) tmp_cbk = rfn; \
- _new->root = frame->root; \
- _new->this = obj; \
- _new->ret = (ret_fn_t) tmp_cbk; \
- _new->parent = frame; \
- _new->cookie = cky; \
- _new->wind_from = __FUNCTION__; \
- _new->wind_to = #fn; \
- _new->unwind_to = #rfn; \
- LOCK_INIT (&_new->lock); \
- LOCK(&frame->root->stack_lock); \
- { \
- frame->ref_count++; \
- _new->next = frame->root->frames.next; \
- _new->prev = &frame->root->frames; \
- if (frame->root->frames.next) \
- frame->root->frames.next->prev = _new; \
- frame->root->frames.next = _new; \
- } \
- UNLOCK(&frame->root->stack_lock); \
- fn##_cbk = rfn; \
- old_THIS = THIS; \
- THIS = obj; \
- fn (_new, obj, params); \
- THIS = old_THIS; \
- } while (0)
-
-
-/* return from function */
-#define STACK_UNWIND(frame, params ...) \
- do { \
- ret_fn_t fn = NULL; \
- call_frame_t *_parent = NULL; \
- xlator_t *old_THIS = NULL; \
- if (!frame) { \
- gf_log ("stack", GF_LOG_CRITICAL, "!frame"); \
- break; \
- } \
- fn = frame->ret; \
- _parent = frame->parent; \
- LOCK(&frame->root->stack_lock); \
- { \
- _parent->ref_count--; \
- } \
- UNLOCK(&frame->root->stack_lock); \
- old_THIS = THIS; \
- THIS = _parent->this; \
- frame->complete = _gf_true; \
- frame->unwind_from = __FUNCTION__; \
- fn (_parent, frame->cookie, _parent->this, params); \
- THIS = old_THIS; \
- } while (0)
-
-
-/* return from function in type-safe way */
-#define STACK_UNWIND_STRICT(op, frame, params ...) \
- do { \
- fop_##op##_cbk_t fn = NULL; \
- call_frame_t *_parent = NULL; \
- xlator_t *old_THIS = NULL; \
- \
- if (!frame) { \
- gf_log ("stack", GF_LOG_CRITICAL, "!frame"); \
- break; \
- } \
- fn = (fop_##op##_cbk_t )frame->ret; \
- _parent = frame->parent; \
- LOCK(&frame->root->stack_lock); \
- { \
- _parent->ref_count--; \
- } \
- UNLOCK(&frame->root->stack_lock); \
- old_THIS = THIS; \
- THIS = _parent->this; \
- frame->complete = _gf_true; \
- frame->unwind_from = __FUNCTION__; \
- fn (_parent, frame->cookie, _parent->this, params); \
- THIS = old_THIS; \
- } while (0)
-
-
-static inline call_frame_t *
-copy_frame (call_frame_t *frame)
-{
- call_stack_t *newstack = NULL;
- call_stack_t *oldstack = NULL;
-
- if (!frame) {
- return NULL;
- }
-
- newstack = mem_get0 (frame->root->pool->stack_mem_pool);
- if (newstack == NULL) {
- return NULL;
- }
-
- oldstack = frame->root;
-
- newstack->uid = oldstack->uid;
- newstack->gid = oldstack->gid;
- newstack->pid = oldstack->pid;
- newstack->ngrps = oldstack->ngrps;
- newstack->op = oldstack->op;
- newstack->type = oldstack->type;
- memcpy (newstack->groups, oldstack->groups,
- sizeof (gid_t) * GF_MAX_AUX_GROUPS);
- newstack->unique = oldstack->unique;
-
- newstack->frames.this = frame->this;
- newstack->frames.root = newstack;
- newstack->pool = oldstack->pool;
- newstack->lk_owner = oldstack->lk_owner;
-
- LOCK_INIT (&newstack->frames.lock);
- LOCK_INIT (&newstack->stack_lock);
-
- LOCK (&oldstack->pool->lock);
- {
- list_add (&newstack->all_frames, &oldstack->all_frames);
- newstack->pool->cnt++;
- }
- UNLOCK (&oldstack->pool->lock);
-
- return &newstack->frames;
-}
-
-
-static inline call_frame_t *
-create_frame (xlator_t *xl, call_pool_t *pool)
-{
- call_stack_t *stack = NULL;
-
- if (!xl || !pool) {
- return NULL;
- }
-
- stack = mem_get0 (pool->stack_mem_pool);
- if (!stack)
- return NULL;
-
- stack->pool = pool;
- stack->frames.root = stack;
- stack->frames.this = xl;
-
- LOCK (&pool->lock);
- {
- list_add (&stack->all_frames, &pool->all_frames);
- pool->cnt++;
- }
- UNLOCK (&pool->lock);
-
- LOCK_INIT (&stack->frames.lock);
- LOCK_INIT (&stack->stack_lock);
-
- return &stack->frames;
-}
-
-void gf_proc_dump_pending_frames(call_pool_t *call_pool);
-void gf_proc_dump_pending_frames_to_dict (call_pool_t *call_pool,
- dict_t *dict);
-gf_boolean_t __is_fuse_call (call_frame_t *frame);
-#endif /* _STACK_H */
diff --git a/libglusterfs/src/statedump.c b/libglusterfs/src/statedump.c
index 6941468747e..65f0eb5c7f3 100644
--- a/libglusterfs/src/statedump.c
+++ b/libglusterfs/src/statedump.c
@@ -9,12 +9,11 @@
*/
#include <stdarg.h>
-#include "glusterfs.h"
-#include "logging.h"
-#include "iobuf.h"
-#include "statedump.h"
-#include "stack.h"
-#include "common-utils.h"
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/statedump.h"
+#include "glusterfs/stack.h"
+#include "glusterfs/syscall.h"
#ifdef HAVE_MALLOC_H
#include <malloc.h>
@@ -24,682 +23,1031 @@
'deadlock' with statedump. This is because statedump happens
inside a signal handler and cannot afford to block on a lock.*/
#ifdef gf_log
-# undef gf_log
+#undef gf_log
#endif
-#define GF_PROC_DUMP_IS_OPTION_ENABLED(opt) \
- (dump_options.dump_##opt == _gf_true)
+#define GF_PROC_DUMP_IS_OPTION_ENABLED(opt) \
+ (dump_options.dump_##opt == _gf_true)
-#define GF_PROC_DUMP_IS_XL_OPTION_ENABLED(opt) \
- (dump_options.xl_options.dump_##opt == _gf_true)
+#define GF_PROC_DUMP_IS_XL_OPTION_ENABLED(opt) \
+ (dump_options.xl_options.dump_##opt == _gf_true)
extern xlator_t global_xlator;
-static pthread_mutex_t gf_proc_dump_mutex;
+static pthread_mutex_t gf_proc_dump_mutex;
static int gf_dump_fd = -1;
gf_dump_options_t dump_options;
+static strfd_t *gf_dump_strfd = NULL;
static void
-gf_proc_dump_lock (void)
+gf_proc_dump_lock(void)
{
- pthread_mutex_lock (&gf_proc_dump_mutex);
+ pthread_mutex_lock(&gf_proc_dump_mutex);
}
-
static void
-gf_proc_dump_unlock (void)
+gf_proc_dump_unlock(void)
{
- pthread_mutex_unlock (&gf_proc_dump_mutex);
+ pthread_mutex_unlock(&gf_proc_dump_mutex);
}
-
static int
-gf_proc_dump_open (char *dump_dir, char *brickname)
+gf_proc_dump_open(char *tmpname)
{
- char path[PATH_MAX] = {0,};
- int dump_fd = -1;
+ int dump_fd = -1;
- snprintf (path, sizeof (path), "%s/%s.%d.dump", (dump_dir ?
- dump_dir : "/tmp"), brickname, getpid());
+ mode_t mask = umask(S_IRWXG | S_IRWXO);
+ dump_fd = mkstemp(tmpname);
+ umask(mask);
+ if (dump_fd < 0)
+ return -1;
- dump_fd = open (path, O_CREAT|O_RDWR|O_TRUNC|O_APPEND, 0600);
- if (dump_fd < 0)
- return -1;
+ gf_dump_fd = dump_fd;
+ return 0;
+}
- gf_dump_fd = dump_fd;
- return 0;
+static void
+gf_proc_dump_close(void)
+{
+ sys_close(gf_dump_fd);
+ gf_dump_fd = -1;
}
+static int
+gf_proc_dump_set_path(char *dump_options_file)
+{
+ int ret = -1;
+ FILE *fp = NULL;
+ char buf[256];
+ char *key = NULL, *value = NULL;
+ char *saveptr = NULL;
+
+ fp = fopen(dump_options_file, "r");
+ if (!fp)
+ goto out;
+
+ ret = fscanf(fp, "%255s", buf);
+
+ while (ret != EOF) {
+ key = strtok_r(buf, "=", &saveptr);
+ if (!key) {
+ ret = fscanf(fp, "%255s", buf);
+ continue;
+ }
-static void
-gf_proc_dump_close (void)
+ value = strtok_r(NULL, "=", &saveptr);
+
+ if (!value) {
+ ret = fscanf(fp, "%255s", buf);
+ continue;
+ }
+ if (!strcmp(key, "path")) {
+ dump_options.dump_path = gf_strdup(value);
+ break;
+ }
+ }
+
+out:
+ if (fp)
+ fclose(fp);
+ return ret;
+}
+
+static int
+gf_proc_dump_add_section_fd(char *key, va_list ap)
{
- close (gf_dump_fd);
- gf_dump_fd = -1;
+ char buf[GF_DUMP_MAX_BUF_LEN];
+ int len;
+
+ GF_ASSERT(key);
+
+ len = snprintf(buf, GF_DUMP_MAX_BUF_LEN, "\n[");
+ len += vsnprintf(buf + len, GF_DUMP_MAX_BUF_LEN - len, key, ap);
+ len += snprintf(buf + len, GF_DUMP_MAX_BUF_LEN - len, "]\n");
+ return sys_write(gf_dump_fd, buf, len);
}
+static int
+gf_proc_dump_add_section_strfd(char *key, va_list ap)
+{
+ int ret = 0;
+
+ ret += strprintf(gf_dump_strfd, "[");
+ ret += strvprintf(gf_dump_strfd, key, ap);
+ ret += strprintf(gf_dump_strfd, "]\n");
+
+ return ret;
+}
int
-gf_proc_dump_add_section (char *key, ...)
+gf_proc_dump_add_section(char *key, ...)
{
+ va_list ap;
+ int ret = 0;
- char buf[GF_DUMP_MAX_BUF_LEN];
- va_list ap;
+ va_start(ap, key);
+ if (gf_dump_strfd)
+ ret = gf_proc_dump_add_section_strfd(key, ap);
+ else
+ ret = gf_proc_dump_add_section_fd(key, ap);
+ va_end(ap);
- GF_ASSERT(key);
+ return ret;
+}
- memset (buf, 0, sizeof(buf));
- snprintf (buf, GF_DUMP_MAX_BUF_LEN, "\n[");
- va_start (ap, key);
- vsnprintf (buf + strlen(buf),
- GF_DUMP_MAX_BUF_LEN - strlen (buf), key, ap);
- va_end (ap);
- snprintf (buf + strlen(buf),
- GF_DUMP_MAX_BUF_LEN - strlen (buf), "]\n");
- return write (gf_dump_fd, buf, strlen (buf));
+static int
+gf_proc_dump_write_fd(char *key, char *value, va_list ap)
+{
+ char buf[GF_DUMP_MAX_BUF_LEN];
+ int len = 0;
+
+ GF_ASSERT(key);
+
+ len = snprintf(buf, GF_DUMP_MAX_BUF_LEN, "%s=", key);
+ len += vsnprintf(buf + len, GF_DUMP_MAX_BUF_LEN - len, value, ap);
+
+ len += snprintf(buf + len, GF_DUMP_MAX_BUF_LEN - len, "\n");
+ return sys_write(gf_dump_fd, buf, len);
}
+static int
+gf_proc_dump_write_strfd(char *key, char *value, va_list ap)
+{
+ int ret = 0;
+
+ ret += strprintf(gf_dump_strfd, "%s = ", key);
+ ret += strvprintf(gf_dump_strfd, value, ap);
+ ret += strprintf(gf_dump_strfd, "\n");
+
+ return ret;
+}
int
-gf_proc_dump_write (char *key, char *value,...)
+gf_proc_dump_write(char *key, char *value, ...)
{
+ int ret = 0;
+ va_list ap;
- char buf[GF_DUMP_MAX_BUF_LEN];
- int offset = 0;
- va_list ap;
+ va_start(ap, value);
+ if (gf_dump_strfd)
+ ret = gf_proc_dump_write_strfd(key, value, ap);
+ else
+ ret = gf_proc_dump_write_fd(key, value, ap);
+ va_end(ap);
- GF_ASSERT (key);
+ return ret;
+}
- offset = strlen (key);
+void
+gf_latency_statedump_and_reset(char *key, gf_latency_t *lat)
+{
+ /* Doesn't make sense to continue if there are no fops
+ came in the given interval */
+ if (!lat || !lat->count)
+ return;
+ gf_proc_dump_write(key,
+ "AVG:%lf CNT:%" PRIu64 " TOTAL:%" PRIu64 " MIN:%" PRIu64
+ " MAX:%" PRIu64,
+ (((double)lat->total) / lat->count), lat->count,
+ lat->total, lat->min, lat->max);
+ gf_latency_reset(lat);
+}
+
+void
+gf_proc_dump_xl_latency_info(xlator_t *xl)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i;
- memset (buf, 0, GF_DUMP_MAX_BUF_LEN);
- snprintf (buf, GF_DUMP_MAX_BUF_LEN, "%s", key);
- snprintf (buf + offset, GF_DUMP_MAX_BUF_LEN - offset, "=");
- offset += 1;
- va_start (ap, value);
- vsnprintf (buf + offset, GF_DUMP_MAX_BUF_LEN - offset, value, ap);
- va_end (ap);
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name);
+ gf_proc_dump_add_section("%s", key_prefix);
- offset = strlen (buf);
- snprintf (buf + offset, GF_DUMP_MAX_BUF_LEN - offset, "\n");
- return write (gf_dump_fd, buf, strlen (buf));
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]);
+
+ gf_latency_t *lat = &xl->stats.interval.latencies[i];
+
+ gf_latency_statedump_and_reset(key, lat);
+ }
}
static void
-gf_proc_dump_xlator_mem_info (xlator_t *xl)
-{
- int i = 0;
- struct mem_acct rec = {0,};
-
- if (!xl)
- return;
-
- if (!xl->mem_acct.rec)
- return;
-
- gf_proc_dump_add_section ("%s.%s - Memory usage", xl->type, xl->name);
- gf_proc_dump_write ("num_types", "%d", xl->mem_acct.num_types);
-
- for (i = 0; i < xl->mem_acct.num_types; i++) {
- if (!(memcmp (&xl->mem_acct.rec[i], &rec,
- sizeof (struct mem_acct))))
- continue;
-
- gf_proc_dump_add_section ("%s.%s - usage-type %d memusage",
- xl->type, xl->name, i);
- gf_proc_dump_write ("size", "%u", xl->mem_acct.rec[i].size);
- gf_proc_dump_write ("num_allocs", "%u",
- xl->mem_acct.rec[i].num_allocs);
- gf_proc_dump_write ("max_size", "%u",
- xl->mem_acct.rec[i].max_size);
- gf_proc_dump_write ("max_num_allocs", "%u",
- xl->mem_acct.rec[i].max_num_allocs);
- gf_proc_dump_write ("total_allocs", "%u",
- xl->mem_acct.rec[i].total_allocs);
- }
+gf_proc_dump_xlator_mem_info(xlator_t *xl)
+{
+ int i = 0;
+
+ if (!xl)
+ return;
+ if (!xl->mem_acct)
return;
+
+ gf_proc_dump_add_section("%s.%s - Memory usage", xl->type, xl->name);
+ gf_proc_dump_write("num_types", "%d", xl->mem_acct->num_types);
+
+ for (i = 0; i < xl->mem_acct->num_types; i++) {
+ if (xl->mem_acct->rec[i].num_allocs == 0)
+ continue;
+
+ gf_proc_dump_add_section("%s.%s - usage-type %s memusage", xl->type,
+ xl->name, xl->mem_acct->rec[i].typestr);
+ gf_proc_dump_write("size", "%" PRIu64, xl->mem_acct->rec[i].size);
+ gf_proc_dump_write("num_allocs", "%u", xl->mem_acct->rec[i].num_allocs);
+ gf_proc_dump_write("max_size", "%" PRIu64,
+ xl->mem_acct->rec[i].max_size);
+ gf_proc_dump_write("max_num_allocs", "%u",
+ xl->mem_acct->rec[i].max_num_allocs);
+ gf_proc_dump_write("total_allocs", "%" PRIu64,
+ xl->mem_acct->rec[i].total_allocs);
+ }
+
+ return;
}
static void
-gf_proc_dump_xlator_mem_info_only_in_use (xlator_t *xl)
+gf_proc_dump_xlator_mem_info_only_in_use(xlator_t *xl)
{
- int i = 0;
+ int i = 0;
- if (!xl)
- return;
+ if (!xl)
+ return;
- if (!xl->mem_acct.rec)
- return;
+ if (!xl->mem_acct)
+ return;
- gf_proc_dump_add_section ("%s.%s - Memory usage", xl->type, xl->name);
- gf_proc_dump_write ("num_types", "%d", xl->mem_acct.num_types);
+ gf_proc_dump_add_section("%s.%s - Memory usage", xl->type, xl->name);
+ gf_proc_dump_write("num_types", "%d", xl->mem_acct->num_types);
- for (i = 0; i < xl->mem_acct.num_types; i++) {
- if (!xl->mem_acct.rec[i].size)
- continue;
+ for (i = 0; i < xl->mem_acct->num_types; i++) {
+ if (!xl->mem_acct->rec[i].size)
+ continue;
- gf_proc_dump_add_section ("%s.%s - usage-type %d", xl->type,
- xl->name,i);
+ gf_proc_dump_add_section("%s.%s - usage-type %d", xl->type, xl->name,
+ i);
- gf_proc_dump_write ("size", "%u",
- xl->mem_acct.rec[i].size);
- gf_proc_dump_write ("max_size", "%u",
- xl->mem_acct.rec[i].max_size);
- gf_proc_dump_write ("num_allocs", "%u",
- xl->mem_acct.rec[i].num_allocs);
- gf_proc_dump_write ("max_num_allocs", "%u",
- xl->mem_acct.rec[i].max_num_allocs);
- gf_proc_dump_write ("total_allocs", "%u",
- xl->mem_acct.rec[i].total_allocs);
- }
+ gf_proc_dump_write("size", "%" PRIu64, xl->mem_acct->rec[i].size);
+ gf_proc_dump_write("max_size", "%" PRIu64,
+ xl->mem_acct->rec[i].max_size);
+ gf_proc_dump_write("num_allocs", "%u", xl->mem_acct->rec[i].num_allocs);
+ gf_proc_dump_write("max_num_allocs", "%u",
+ xl->mem_acct->rec[i].max_num_allocs);
+ gf_proc_dump_write("total_allocs", "%" PRIu64,
+ xl->mem_acct->rec[i].total_allocs);
+ }
- return;
+ return;
}
-
-
/* Currently this dumps only mallinfo. More can be built on here */
void
-gf_proc_dump_mem_info ()
-{
-#ifdef HAVE_MALLOC_STATS
- struct mallinfo info;
-
- memset (&info, 0, sizeof (struct mallinfo));
- info = mallinfo ();
-
- gf_proc_dump_add_section ("mallinfo");
- gf_proc_dump_write ("mallinfo_arena", "%d", info.arena);
- gf_proc_dump_write ("mallinfo_ordblks", "%d", info.ordblks);
- gf_proc_dump_write ("mallinfo_smblks", "%d", info.smblks);
- gf_proc_dump_write ("mallinfo_hblks", "%d", info.hblks);
- gf_proc_dump_write ("mallinfo_hblkhd", "%d", info.hblkhd);
- gf_proc_dump_write ("mallinfo_usmblks", "%d", info.usmblks);
- gf_proc_dump_write ("mallinfo_fsmblks", "%d", info.fsmblks);
- gf_proc_dump_write ("mallinfo_uordblks", "%d", info.uordblks);
- gf_proc_dump_write ("mallinfo_fordblks", "%d", info.fordblks);
- gf_proc_dump_write ("mallinfo_keepcost", "%d", info.keepcost);
+gf_proc_dump_mem_info()
+{
+#ifdef HAVE_MALLINFO
+ struct mallinfo info;
+
+ memset(&info, 0, sizeof(struct mallinfo));
+ info = mallinfo();
+
+ gf_proc_dump_add_section("mallinfo");
+ gf_proc_dump_write("mallinfo_arena", "%d", info.arena);
+ gf_proc_dump_write("mallinfo_ordblks", "%d", info.ordblks);
+ gf_proc_dump_write("mallinfo_smblks", "%d", info.smblks);
+ gf_proc_dump_write("mallinfo_hblks", "%d", info.hblks);
+ gf_proc_dump_write("mallinfo_hblkhd", "%d", info.hblkhd);
+ gf_proc_dump_write("mallinfo_usmblks", "%d", info.usmblks);
+ gf_proc_dump_write("mallinfo_fsmblks", "%d", info.fsmblks);
+ gf_proc_dump_write("mallinfo_uordblks", "%d", info.uordblks);
+ gf_proc_dump_write("mallinfo_fordblks", "%d", info.fordblks);
+ gf_proc_dump_write("mallinfo_keepcost", "%d", info.keepcost);
#endif
- gf_proc_dump_xlator_mem_info(&global_xlator);
-
+ gf_proc_dump_xlator_mem_info(&global_xlator);
}
void
-gf_proc_dump_mem_info_to_dict (dict_t *dict)
+gf_proc_dump_mem_info_to_dict(dict_t *dict)
{
- if (!dict)
- return;
-#ifdef HAVE_MALLOC_STATS
- struct mallinfo info;
- int ret = -1;
+ if (!dict)
+ return;
+#ifdef HAVE_MALLINFO
+ struct mallinfo info;
+ int ret = -1;
- memset (&info, 0, sizeof(struct mallinfo));
- info = mallinfo ();
+ memset(&info, 0, sizeof(struct mallinfo));
+ info = mallinfo();
- ret = dict_set_int32 (dict, "mallinfo.arena", info.arena);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.arena", info.arena);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.ordblks", info.ordblks);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.ordblks", info.ordblks);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.smblks", info.smblks);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.smblks", info.smblks);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.hblks", info.hblks);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.hblks", info.hblks);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.hblkhd", info.hblkhd);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.hblkhd", info.hblkhd);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.usmblks", info.usmblks);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.usmblks", info.usmblks);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.fsmblks", info.fsmblks);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.fsmblks", info.fsmblks);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.uordblks", info.uordblks);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.uordblks", info.uordblks);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.fordblks", info.fordblks);
- if (ret)
- return;
+ ret = dict_set_int32(dict, "mallinfo.fordblks", info.fordblks);
+ if (ret)
+ return;
- ret = dict_set_int32 (dict, "mallinfo.keepcost", info.keepcost);
- if (ret)
- return;
-#endif
+ ret = dict_set_int32(dict, "mallinfo.keepcost", info.keepcost);
+ if (ret)
return;
+#endif
+ return;
}
void
-gf_proc_dump_mempool_info (glusterfs_ctx_t *ctx)
+gf_proc_dump_mempool_info(glusterfs_ctx_t *ctx)
{
- struct mem_pool *pool = NULL;
+#ifdef GF_DISABLE_MEMPOOL
+ gf_proc_dump_write("built with --disable-mempool", " so no memory pools");
+#else
+ struct mem_pool *pool = NULL;
+
+ gf_proc_dump_add_section("mempool");
+
+ LOCK(&ctx->lock);
+ {
+ list_for_each_entry(pool, &ctx->mempool_list, owner)
+ {
+ int64_t active = GF_ATOMIC_GET(pool->active);
+
+ gf_proc_dump_write("-----", "-----");
+ gf_proc_dump_write("pool-name", "%s", pool->name);
+ gf_proc_dump_write("xlator-name", "%s", pool->xl_name);
+ gf_proc_dump_write("active-count", "%" GF_PRI_ATOMIC, active);
+ gf_proc_dump_write("sizeof-type", "%lu", pool->sizeof_type);
+ gf_proc_dump_write("padded-sizeof", "%d",
+ 1 << pool->pool->power_of_two);
+ gf_proc_dump_write("size", "%" PRId64,
+ (1 << pool->pool->power_of_two) * active);
+ gf_proc_dump_write("shared-pool", "%p", pool->pool);
+ }
+ }
+ UNLOCK(&ctx->lock);
+#endif /* GF_DISABLE_MEMPOOL */
+}
- gf_proc_dump_add_section ("mempool");
+void
+gf_proc_dump_mempool_info_to_dict(glusterfs_ctx_t *ctx, dict_t *dict)
+{
+#ifndef GF_DISABLE_MEMPOOL
+ struct mem_pool *pool = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ int count = 0;
+ int ret = -1;
+
+ if (!ctx || !dict)
+ return;
- list_for_each_entry (pool, &ctx->mempool_list, global_list) {
- gf_proc_dump_write ("-----", "-----");
- gf_proc_dump_write ("pool-name", "%s", pool->name);
- gf_proc_dump_write ("hot-count", "%d", pool->hot_count);
- gf_proc_dump_write ("cold-count", "%d", pool->cold_count);
- gf_proc_dump_write ("padded_sizeof", "%lu",
- pool->padded_sizeof_type);
- gf_proc_dump_write ("alloc-count", "%"PRIu64, pool->alloc_count);
- gf_proc_dump_write ("max-alloc", "%d", pool->max_alloc);
+ LOCK(&ctx->lock);
+ {
+ list_for_each_entry(pool, &ctx->mempool_list, owner)
+ {
+ int64_t active = GF_ATOMIC_GET(pool->active);
- gf_proc_dump_write ("pool-misses", "%"PRIu64, pool->pool_misses);
- gf_proc_dump_write ("max-stdalloc", "%d", pool->max_stdalloc);
+ snprintf(key, sizeof(key), "pool%d.name", count);
+ ret = dict_set_str(dict, key, pool->name);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.active-count", count);
+ ret = dict_set_uint64(dict, key, active);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.sizeof-type", count);
+ ret = dict_set_uint64(dict, key, pool->sizeof_type);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.padded-sizeof", count);
+ ret = dict_set_uint64(dict, key, 1 << pool->pool->power_of_two);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.size", count);
+ ret = dict_set_uint64(dict, key,
+ (1 << pool->pool->power_of_two) * active);
+ if (ret)
+ goto out;
+
+ snprintf(key, sizeof(key), "pool%d.shared-pool", count);
+ ret = dict_set_static_ptr(dict, key, pool->pool);
+ if (ret)
+ goto out;
}
+ }
+out:
+ UNLOCK(&ctx->lock);
+#endif /* !GF_DISABLE_MEMPOOL */
}
void
-gf_proc_dump_mempool_info_to_dict (glusterfs_ctx_t *ctx, dict_t *dict)
-{
- struct mem_pool *pool = NULL;
- char key[GF_DUMP_MAX_BUF_LEN] = {0,};
- int count = 0;
- int ret = -1;
-
- if (!ctx || !dict)
- return;
-
- list_for_each_entry (pool, &ctx->mempool_list, global_list) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "pool%d.name", count);
- ret = dict_set_str (dict, key, pool->name);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "pool%d.hotcount", count);
- ret = dict_set_int32 (dict, key, pool->hot_count);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "pool%d.coldcount", count);
- ret = dict_set_int32 (dict, key, pool->cold_count);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "pool%d.paddedsizeof", count);
- ret = dict_set_uint64 (dict, key, pool->padded_sizeof_type);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "pool%d.alloccount", count);
- ret = dict_set_uint64 (dict, key, pool->alloc_count);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "pool%d.max_alloc", count);
- ret = dict_set_int32 (dict, key, pool->max_alloc);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "pool%d.max-stdalloc", count);
- ret = dict_set_int32 (dict, key, pool->max_stdalloc);
- if (ret)
- return;
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "pool%d.pool-misses", count);
- ret = dict_set_uint64 (dict, key, pool->pool_misses);
- if (ret)
- return;
- count++;
- }
- ret = dict_set_int32 (dict, "mempool-count", count);
+gf_proc_dump_latency_info(xlator_t *xl);
- return;
+void
+gf_proc_dump_dict_info(glusterfs_ctx_t *ctx)
+{
+ int64_t total_dicts = 0;
+ int64_t total_pairs = 0;
+
+ total_dicts = GF_ATOMIC_GET(ctx->stats.total_dicts_used);
+ total_pairs = GF_ATOMIC_GET(ctx->stats.total_pairs_used);
+
+ gf_proc_dump_write("max-pairs-per-dict", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ctx->stats.max_dict_pairs));
+ gf_proc_dump_write("total-pairs-used", "%" PRId64, total_pairs);
+ gf_proc_dump_write("total-dicts-used", "%" PRId64, total_dicts);
+ gf_proc_dump_write("average-pairs-per-dict", "%" PRId64,
+ (total_pairs / total_dicts));
}
-void gf_proc_dump_latency_info (xlator_t *xl);
-
-void
-gf_proc_dump_xlator_info (xlator_t *top)
+static void
+gf_proc_dump_single_xlator_info(xlator_t *trav)
{
- xlator_t *trav = NULL;
- glusterfs_ctx_t *ctx = NULL;
- char itable_key[1024] = {0,};
+ glusterfs_ctx_t *ctx = trav->ctx;
+ char itable_key[1024] = {
+ 0,
+ };
- if (!top)
- return;
+ if (trav->cleanup_starting)
+ return;
- ctx = glusterfs_ctx_get ();
+ if (ctx->measure_latency)
+ gf_proc_dump_xl_latency_info(trav);
- trav = top;
- while (trav) {
+ gf_proc_dump_xlator_mem_info(trav);
- if (ctx->measure_latency)
- gf_proc_dump_latency_info (trav);
+ if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED(inode) && (trav->itable)) {
+ snprintf(itable_key, sizeof(itable_key), "%d.%s.itable", ctx->graph_id,
+ trav->name);
+ }
+
+ if (!trav->dumpops) {
+ return;
+ }
- gf_proc_dump_xlator_mem_info(trav);
+ if (trav->dumpops->priv && GF_PROC_DUMP_IS_XL_OPTION_ENABLED(priv))
+ trav->dumpops->priv(trav);
- if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED (inode) &&
- (trav->itable)) {
- snprintf (itable_key, 1024, "%d.%s.itable",
- ctx->graph_id, trav->name);
+ if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED(inode) && (trav->dumpops->inode))
+ trav->dumpops->inode(trav);
+ if (trav->dumpops->fd && GF_PROC_DUMP_IS_XL_OPTION_ENABLED(fd))
+ trav->dumpops->fd(trav);
- inode_table_dump (trav->itable, itable_key);
- }
+ if (trav->dumpops->history && GF_PROC_DUMP_IS_XL_OPTION_ENABLED(history))
+ trav->dumpops->history(trav);
+}
- if (!trav->dumpops) {
- trav = trav->next;
- continue;
- }
+static void
+gf_proc_dump_per_xlator_info(xlator_t *top)
+{
+ xlator_t *trav = top;
- if (trav->dumpops->priv &&
- GF_PROC_DUMP_IS_XL_OPTION_ENABLED (priv))
- trav->dumpops->priv (trav);
+ while (trav && !trav->cleanup_starting) {
+ gf_proc_dump_single_xlator_info(trav);
+ trav = trav->next;
+ }
+}
- if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED (inode) &&
- (trav->dumpops->inode))
- trav->dumpops->inode (trav);
+void
+gf_proc_dump_xlator_info(xlator_t *top, gf_boolean_t brick_mux)
+{
+ xlator_t *trav = NULL;
+ xlator_list_t **trav_p = NULL;
- if (trav->dumpops->fd &&
- GF_PROC_DUMP_IS_XL_OPTION_ENABLED (fd))
- trav->dumpops->fd (trav);
+ if (!top)
+ return;
- if (trav->dumpops->history &&
- GF_PROC_DUMP_IS_XL_OPTION_ENABLED (history))
- trav->dumpops->history (trav);
+ trav = top;
+ gf_proc_dump_per_xlator_info(trav);
- trav = trav->next;
+ if (brick_mux) {
+ trav_p = &top->children;
+ while (*trav_p) {
+ trav = (*trav_p)->xlator;
+ gf_proc_dump_per_xlator_info(trav);
+ trav_p = &(*trav_p)->next;
}
+ }
- return;
+ return;
}
static void
-gf_proc_dump_oldgraph_xlator_info (xlator_t *top)
+gf_proc_dump_oldgraph_xlator_info(xlator_t *top)
{
- xlator_t *trav = NULL;
- glusterfs_ctx_t *ctx = NULL;
- char itable_key[1024] = {0,};
-
- if (!top)
- return;
+ xlator_t *trav = NULL;
- ctx = glusterfs_ctx_get ();
-
- trav = top;
- while (trav) {
- gf_proc_dump_xlator_mem_info_only_in_use (trav);
+ if (!top)
+ return;
- if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED (inode) &&
- (trav->itable)) {
- snprintf (itable_key, 1024, "%d.%s.itable",
- ctx->graph_id, trav->name);
+ trav = top;
+ while (trav) {
+ gf_proc_dump_xlator_mem_info_only_in_use(trav);
- inode_table_dump (trav->itable, itable_key);
- }
+ if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED(inode) && (trav->itable)) {
+ /*TODO: dump inode table info if necessary by
+ printing the graph id (taken by glusterfs_cbtx_t)
+ in the key
+ */
+ }
- if (!trav->dumpops) {
- trav = trav->next;
- continue;
- }
+ if (!trav->dumpops) {
+ trav = trav->next;
+ continue;
+ }
- if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED (inode) &&
- (trav->dumpops->inode))
- trav->dumpops->inode (trav);
+ if (GF_PROC_DUMP_IS_XL_OPTION_ENABLED(inode) && (trav->dumpops->inode))
+ trav->dumpops->inode(trav);
- if (trav->dumpops->fd &&
- GF_PROC_DUMP_IS_XL_OPTION_ENABLED (fd))
- trav->dumpops->fd (trav);
+ if (trav->dumpops->fd && GF_PROC_DUMP_IS_XL_OPTION_ENABLED(fd))
+ trav->dumpops->fd(trav);
- trav = trav->next;
- }
+ trav = trav->next;
+ }
- return;
+ return;
}
static int
-gf_proc_dump_enable_all_options ()
+gf_proc_dump_enable_all_options()
{
+ GF_PROC_DUMP_SET_OPTION(dump_options.dump_mem, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.dump_iobuf, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.dump_callpool, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_priv, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_inode, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_fd, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_inodectx, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_fdctx, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_history, _gf_true);
+
+ return 0;
+}
- GF_PROC_DUMP_SET_OPTION (dump_options.dump_mem, _gf_true);
- GF_PROC_DUMP_SET_OPTION (dump_options.dump_iobuf, _gf_true);
- GF_PROC_DUMP_SET_OPTION (dump_options.dump_callpool, _gf_true);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_priv, _gf_true);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_inode, _gf_true);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_fd, _gf_true);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_inodectx,
- _gf_true);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_fdctx, _gf_true);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_history,
- _gf_true);
+gf_boolean_t
+is_gf_proc_dump_all_disabled()
+{
+ gf_boolean_t all_disabled = _gf_true;
+
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.dump_mem, all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.dump_iobuf, all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.dump_callpool, all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.xl_options.dump_priv,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.xl_options.dump_inode,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.xl_options.dump_fd, all_disabled,
+ out);
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.xl_options.dump_inodectx,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.xl_options.dump_fdctx,
+ all_disabled, out);
+ GF_CHECK_DUMP_OPTION_ENABLED(dump_options.xl_options.dump_history,
+ all_disabled, out);
- return 0;
+out:
+ return all_disabled;
}
+/* These options are dumped by default if glusterdump.options
+ file exists and it is emtpty
+*/
static int
-gf_proc_dump_disable_all_options ()
-{
-
- GF_PROC_DUMP_SET_OPTION (dump_options.dump_mem, _gf_false);
- GF_PROC_DUMP_SET_OPTION (dump_options.dump_iobuf, _gf_false);
- GF_PROC_DUMP_SET_OPTION (dump_options.dump_callpool, _gf_false);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_priv, _gf_false);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_inode,
- _gf_false);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_fd, _gf_false);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_inodectx,
- _gf_false);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_fdctx, _gf_false);
- GF_PROC_DUMP_SET_OPTION (dump_options.xl_options.dump_history,
- _gf_false);
- return 0;
+gf_proc_dump_enable_default_options()
+{
+ GF_PROC_DUMP_SET_OPTION(dump_options.dump_mem, _gf_true);
+ GF_PROC_DUMP_SET_OPTION(dump_options.dump_callpool, _gf_true);
+
+ return 0;
}
static int
-gf_proc_dump_parse_set_option (char *key, char *value)
-{
- gf_boolean_t *opt_key = NULL;
- gf_boolean_t opt_value = _gf_false;
- char buf[GF_DUMP_MAX_BUF_LEN];
- int ret = -1;
-
- if (!strcasecmp (key, "all")) {
- (void)gf_proc_dump_enable_all_options ();
- return 0;
- } else if (!strcasecmp (key, "mem")) {
- opt_key = &dump_options.dump_mem;
- } else if (!strcasecmp (key, "iobuf")) {
- opt_key = &dump_options.dump_iobuf;
- } else if (!strcasecmp (key, "callpool")) {
- opt_key = &dump_options.dump_callpool;
- } else if (!strcasecmp (key, "priv")) {
- opt_key = &dump_options.xl_options.dump_priv;
- } else if (!strcasecmp (key, "fd")) {
- opt_key = &dump_options.xl_options.dump_fd;
- } else if (!strcasecmp (key, "inode")) {
- opt_key = &dump_options.xl_options.dump_inode;
- } else if (!strcasecmp (key, "inodectx")) {
- opt_key = &dump_options.xl_options.dump_inodectx;
- } else if (!strcasecmp (key, "fdctx")) {
- opt_key = &dump_options.xl_options.dump_fdctx;
- } else if (!strcasecmp (key, "history")) {
- opt_key = &dump_options.xl_options.dump_history;
- }
-
- if (!opt_key) {
- //None of dump options match the key, return back
- snprintf (buf, sizeof (buf), "[Warning]:None of the options "
- "matched key : %s\n", key);
- ret = write (gf_dump_fd, buf, strlen (buf));
-
- if (ret >= 0)
- ret = -1;
- goto out;
+gf_proc_dump_disable_all_options()
+{
+ GF_PROC_DUMP_SET_OPTION(dump_options.dump_mem, _gf_false);
+ GF_PROC_DUMP_SET_OPTION(dump_options.dump_iobuf, _gf_false);
+ GF_PROC_DUMP_SET_OPTION(dump_options.dump_callpool, _gf_false);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_priv, _gf_false);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_inode, _gf_false);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_fd, _gf_false);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_inodectx, _gf_false);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_fdctx, _gf_false);
+ GF_PROC_DUMP_SET_OPTION(dump_options.xl_options.dump_history, _gf_false);
+ return 0;
+}
+static int
+gf_proc_dump_parse_set_option(char *key, char *value)
+{
+ gf_boolean_t *opt_key = NULL;
+ gf_boolean_t opt_value = _gf_false;
+ char buf[GF_DUMP_MAX_BUF_LEN];
+ int ret = -1;
+ int len;
+
+ if (!strcasecmp(key, "all")) {
+ (void)gf_proc_dump_enable_all_options();
+ return 0;
+ } else if (!strcasecmp(key, "mem")) {
+ opt_key = &dump_options.dump_mem;
+ } else if (!strcasecmp(key, "iobuf")) {
+ opt_key = &dump_options.dump_iobuf;
+ } else if (!strcasecmp(key, "callpool")) {
+ opt_key = &dump_options.dump_callpool;
+ } else if (!strcasecmp(key, "priv")) {
+ opt_key = &dump_options.xl_options.dump_priv;
+ } else if (!strcasecmp(key, "fd")) {
+ opt_key = &dump_options.xl_options.dump_fd;
+ } else if (!strcasecmp(key, "inode")) {
+ opt_key = &dump_options.xl_options.dump_inode;
+ } else if (!strcasecmp(key, "inodectx")) {
+ opt_key = &dump_options.xl_options.dump_inodectx;
+ } else if (!strcasecmp(key, "fdctx")) {
+ opt_key = &dump_options.xl_options.dump_fdctx;
+ } else if (!strcasecmp(key, "history")) {
+ opt_key = &dump_options.xl_options.dump_history;
+ }
+
+ if (!opt_key) {
+ // None of dump options match the key, return back
+ len = snprintf(buf, sizeof(buf),
+ "[Warning]:None of the options "
+ "matched key : %s\n",
+ key);
+ if (len < 0)
+ ret = -1;
+ else {
+ ret = sys_write(gf_dump_fd, buf, len);
+ if (ret >= 0)
+ ret = -1;
}
+ goto out;
+ }
- opt_value = (strncasecmp (value, "yes", 3) ?
- _gf_false: _gf_true);
+ opt_value = (strncasecmp(value, "yes", 3) ? _gf_false : _gf_true);
- GF_PROC_DUMP_SET_OPTION (*opt_key, opt_value);
+ GF_PROC_DUMP_SET_OPTION(*opt_key, opt_value);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
static int
-gf_proc_dump_options_init ()
+gf_proc_dump_options_init()
{
- int ret = -1;
- FILE *fp = NULL;
- char buf[256];
- char dumpbuf[GF_DUMP_MAX_BUF_LEN];
- char *key = NULL, *value = NULL;
- char *saveptr = NULL;
- char dump_option_file[PATH_MAX];
-
- snprintf (dump_option_file, sizeof (dump_option_file),
- "/tmp/glusterdump.%d.options", getpid ());
-
- fp = fopen (dump_option_file, "r");
+ int ret = -1;
+ FILE *fp = NULL;
+ char buf[256];
+ char *key = NULL, *value = NULL;
+ char *saveptr = NULL;
+ char dump_option_file[PATH_MAX];
+
+ /* glusterd will create a file glusterdump.<pid>.options and
+ sets the statedump options for the process and the file is removed
+ after the statedump is taken. Direct issue of SIGUSR1 does not have
+ mechanism for considering the statedump options. So to have a way
+ of configuring the statedump of all the glusterfs processes through
+ both cli command and SIGUSR1, glusterdump.options file is searched
+ and the options mentioned in it are given the higher priority.
+ */
+ snprintf(dump_option_file, sizeof(dump_option_file),
+ DEFAULT_VAR_RUN_DIRECTORY "/glusterdump.options");
+ fp = fopen(dump_option_file, "r");
+ if (!fp) {
+ snprintf(dump_option_file, sizeof(dump_option_file),
+ DEFAULT_VAR_RUN_DIRECTORY "/glusterdump.%d.options", getpid());
+
+ fp = fopen(dump_option_file, "r");
if (!fp) {
- //ENOENT, return success
- (void) gf_proc_dump_enable_all_options ();
- return 0;
+ // ENOENT, return success
+ (void)gf_proc_dump_enable_all_options();
+ return 0;
}
+ }
- (void) gf_proc_dump_disable_all_options ();
-
- ret = fscanf (fp, "%s", buf);
+ (void)gf_proc_dump_disable_all_options();
- while (ret != EOF) {
+ // swallow the errors if setting statedump file path is failed.
+ (void)gf_proc_dump_set_path(dump_option_file);
- key = strtok_r (buf, "=", &saveptr);
- if (!key) {
- ret = fscanf (fp, "%s", buf);
- continue;
- }
+ ret = fscanf(fp, "%255s", buf);
- value = strtok_r (NULL, "=", &saveptr);
+ while (ret != EOF) {
+ key = strtok_r(buf, "=", &saveptr);
+ if (!key) {
+ ret = fscanf(fp, "%255s", buf);
+ continue;
+ }
- if (!value) {
- ret = fscanf (fp, "%s", buf);
- continue;
- }
+ value = strtok_r(NULL, "=", &saveptr);
- snprintf (dumpbuf, sizeof (dumpbuf), "[Debug]:key=%s, value=%s\n",key,value);
- ret = write (gf_dump_fd, dumpbuf, strlen (dumpbuf));
+ if (!value) {
+ ret = fscanf(fp, "%255s", buf);
+ continue;
+ }
- gf_proc_dump_parse_set_option (key, value);
+ gf_proc_dump_parse_set_option(key, value);
+ }
- }
+ if (is_gf_proc_dump_all_disabled())
+ (void)gf_proc_dump_enable_default_options();
- if (fp)
- fclose (fp);
+ if (fp)
+ fclose(fp);
- return 0;
+ return 0;
}
void
-gf_proc_dump_info (int signum)
+gf_proc_dump_info(int signum, glusterfs_ctx_t *ctx)
{
- int i = 0;
- int ret = -1;
- glusterfs_ctx_t *ctx = NULL;
- glusterfs_graph_t *trav = NULL;
- char brick_name[PATH_MAX] = {0,};
+ int i = 0;
+ int ret = -1;
+ glusterfs_graph_t *trav = NULL;
+ char brick_name[PATH_MAX] = {
+ 0,
+ };
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char sign_string[512] = {
+ 0,
+ };
+ char tmp_dump_name[PATH_MAX] = {
+ 0,
+ };
+ char path[PATH_MAX] = {
+ 0,
+ };
+ struct timeval tv = {
+ 0,
+ };
+ gf_boolean_t is_brick_mux = _gf_false;
+ xlator_t *top = NULL;
+ xlator_list_t **trav_p = NULL;
+ int brick_count = 0;
+ int len = 0;
+
+ gf_msg_trace("dump", 0, "received statedump request (sig:USR1)");
+
+ if (!ctx)
+ goto out;
+
+ /*
+ * Multiplexed daemons can change the active graph when attach/detach
+ * is called. So this has to be protected with the cleanup lock.
+ */
+ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name))
+ pthread_mutex_lock(&ctx->cleanup_lock);
+ gf_proc_dump_lock();
+
+ if (!mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name) &&
+ (ctx && ctx->active)) {
+ top = ctx->active->first;
+ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ brick_count++;
+ }
- gf_proc_dump_lock ();
+ if (brick_count > 1)
+ is_brick_mux = _gf_true;
+ }
+
+ if (ctx->cmd_args.brick_name) {
+ GF_REMOVE_SLASH_FROM_PATH(ctx->cmd_args.brick_name, brick_name);
+ } else
+ snprintf(brick_name, sizeof(brick_name), "glusterdump");
+
+ ret = gf_proc_dump_options_init();
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf(
+ path, sizeof(path), "%s/%s.%d.dump.%" PRIu64,
+ ((dump_options.dump_path != NULL)
+ ? dump_options.dump_path
+ : ((ctx->statedump_path != NULL) ? ctx->statedump_path
+ : DEFAULT_VAR_RUN_DIRECTORY)),
+ brick_name, getpid(), (uint64_t)gf_time());
+ if ((ret < 0) || (ret >= sizeof(path))) {
+ goto out;
+ }
+
+ snprintf(
+ tmp_dump_name, PATH_MAX, "%s/dumpXXXXXX",
+ ((dump_options.dump_path != NULL)
+ ? dump_options.dump_path
+ : ((ctx->statedump_path != NULL) ? ctx->statedump_path
+ : DEFAULT_VAR_RUN_DIRECTORY)));
+
+ ret = gf_proc_dump_open(tmp_dump_name);
+ if (ret < 0)
+ goto out;
+
+ // continue even though gettimeofday() has failed
+ ret = gettimeofday(&tv, NULL);
+ if (0 == ret) {
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
+ }
+
+ len = snprintf(sign_string, sizeof(sign_string), "DUMP-START-TIME: %s\n",
+ timestr);
+
+ // swallow the errors of write for start and end marker
+ (void)sys_write(gf_dump_fd, sign_string, len);
+
+ memset(timestr, 0, sizeof(timestr));
+
+ if (GF_PROC_DUMP_IS_OPTION_ENABLED(mem)) {
+ gf_proc_dump_mem_info();
+ gf_proc_dump_mempool_info(ctx);
+ }
+
+ if (GF_PROC_DUMP_IS_OPTION_ENABLED(iobuf))
+ iobuf_stats_dump(ctx->iobuf_pool);
+ if (GF_PROC_DUMP_IS_OPTION_ENABLED(callpool))
+ gf_proc_dump_pending_frames(ctx->pool);
+
+ /* dictionary stats */
+ gf_proc_dump_add_section("dict");
+ gf_proc_dump_dict_info(ctx);
+
+ if (ctx->master) {
+ gf_proc_dump_add_section("fuse");
+ gf_proc_dump_single_xlator_info(ctx->master);
+ }
+
+ if (ctx->active) {
+ gf_proc_dump_add_section("active graph - %d", ctx->graph_id);
+ gf_proc_dump_xlator_info(ctx->active->top, is_brick_mux);
+ }
+
+ i = 0;
+ list_for_each_entry(trav, &ctx->graphs, list)
+ {
+ if (trav == ctx->active)
+ continue;
+
+ gf_proc_dump_add_section("oldgraph[%d]", i);
+
+ gf_proc_dump_oldgraph_xlator_info(trav->top);
+ i++;
+ }
+
+ ret = gettimeofday(&tv, NULL);
+ if (0 == ret) {
+ gf_time_fmt_tv(timestr, sizeof timestr, &tv, gf_timefmt_FT);
+ }
+
+ len = snprintf(sign_string, sizeof(sign_string), "\nDUMP-END-TIME: %s",
+ timestr);
+ (void)sys_write(gf_dump_fd, sign_string, len);
+
+ if (gf_dump_fd != -1)
+ gf_proc_dump_close();
+ sys_rename(tmp_dump_name, path);
+out:
+ GF_FREE(dump_options.dump_path);
+ dump_options.dump_path = NULL;
+ if (ctx) {
+ gf_proc_dump_unlock();
+ if (mgmt_is_multiplexed_daemon(ctx->cmd_args.process_name))
+ pthread_mutex_unlock(&ctx->cleanup_lock);
+ }
+
+ return;
+}
- ctx = glusterfs_ctx_get ();
- if (!ctx)
- goto out;
+void
+gf_proc_dump_fini(void)
+{
+ pthread_mutex_destroy(&gf_proc_dump_mutex);
+}
- if (ctx->cmd_args.brick_name) {
- GF_REMOVE_SLASH_FROM_PATH (ctx->cmd_args.brick_name, brick_name);
- } else
- strncpy (brick_name, "glusterdump", sizeof (brick_name));
+void
+gf_proc_dump_init()
+{
+ pthread_mutex_init(&gf_proc_dump_mutex, NULL);
- ret = gf_proc_dump_open (ctx->statedump_path, brick_name);
- if (ret < 0)
- goto out;
+ return;
+}
- ret = gf_proc_dump_options_init ();
- if (ret < 0)
- goto out;
+void
+gf_proc_dump_cleanup(void)
+{
+ pthread_mutex_destroy(&gf_proc_dump_mutex);
+}
- if (GF_PROC_DUMP_IS_OPTION_ENABLED (mem)) {
- gf_proc_dump_mem_info ();
- gf_proc_dump_mempool_info (ctx);
- }
+void
+gf_proc_dump_xlator_private(xlator_t *this, strfd_t *strfd)
+{
+ gf_proc_dump_lock();
+ {
+ gf_dump_strfd = strfd;
- if (GF_PROC_DUMP_IS_OPTION_ENABLED (iobuf))
- iobuf_stats_dump (ctx->iobuf_pool);
- if (GF_PROC_DUMP_IS_OPTION_ENABLED (callpool))
- gf_proc_dump_pending_frames (ctx->pool);
+ if (this->dumpops && this->dumpops->priv)
+ this->dumpops->priv(this);
- if (ctx->master) {
- gf_proc_dump_add_section ("fuse");
- gf_proc_dump_xlator_info (ctx->master);
- }
+ gf_dump_strfd = NULL;
+ }
+ gf_proc_dump_unlock();
+}
- if (ctx->active) {
- gf_proc_dump_add_section ("active graph - %d", ctx->graph_id);
- gf_proc_dump_xlator_info (ctx->active->top);
- }
+void
+gf_proc_dump_mallinfo(strfd_t *strfd)
+{
+ gf_proc_dump_lock();
+ {
+ gf_dump_strfd = strfd;
- i = 0;
- list_for_each_entry (trav, &ctx->graphs, list) {
- if (trav == ctx->active)
- continue;
+ gf_proc_dump_mem_info();
- gf_proc_dump_add_section ("oldgraph[%d]", i);
+ gf_dump_strfd = NULL;
+ }
+ gf_proc_dump_unlock();
+}
- gf_proc_dump_oldgraph_xlator_info (trav->top);
- i++;
- }
+void
+gf_proc_dump_xlator_history(xlator_t *this, strfd_t *strfd)
+{
+ gf_proc_dump_lock();
+ {
+ gf_dump_strfd = strfd;
-out:
- if (gf_dump_fd != -1)
- gf_proc_dump_close ();
- gf_proc_dump_unlock ();
+ if (this->dumpops && this->dumpops->history)
+ this->dumpops->history(this);
- return;
+ gf_dump_strfd = NULL;
+ }
+ gf_proc_dump_unlock();
}
-
void
-gf_proc_dump_fini (void)
+gf_proc_dump_xlator_itable(xlator_t *this, strfd_t *strfd)
{
- pthread_mutex_destroy (&gf_proc_dump_mutex);
-}
+ gf_proc_dump_lock();
+ {
+ gf_dump_strfd = strfd;
+ gf_dump_strfd = NULL;
+ }
+ gf_proc_dump_unlock();
+}
void
-gf_proc_dump_init ()
+gf_proc_dump_xlator_meminfo(xlator_t *this, strfd_t *strfd)
{
- pthread_mutex_init (&gf_proc_dump_mutex, NULL);
+ gf_proc_dump_lock();
+ {
+ gf_dump_strfd = strfd;
- return;
-}
+ gf_proc_dump_xlator_mem_info(this);
+ gf_dump_strfd = NULL;
+ }
+ gf_proc_dump_unlock();
+}
void
-gf_proc_dump_cleanup (void)
+gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd)
{
- pthread_mutex_destroy (&gf_proc_dump_mutex);
+ gf_proc_dump_lock();
+ {
+ gf_dump_strfd = strfd;
+
+ gf_proc_dump_xl_latency_info(this);
+
+ gf_dump_strfd = NULL;
+ }
+ gf_proc_dump_unlock();
}
diff --git a/libglusterfs/src/statedump.h b/libglusterfs/src/statedump.h
deleted file mode 100644
index dc56bda0cbb..00000000000
--- a/libglusterfs/src/statedump.h
+++ /dev/null
@@ -1,85 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-
-#ifndef STATEDUMP_H
-#define STATEDUMP_H
-
-#include <stdarg.h>
-#include "inode.h"
-
-#define GF_DUMP_MAX_BUF_LEN 4096
-
-typedef struct gf_dump_xl_options_ {
- gf_boolean_t dump_priv;
- gf_boolean_t dump_inode;
- gf_boolean_t dump_fd;
- gf_boolean_t dump_inodectx;
- gf_boolean_t dump_fdctx;
- gf_boolean_t dump_history;
-} gf_dump_xl_options_t;
-
-typedef struct gf_dump_options_ {
- gf_boolean_t dump_mem;
- gf_boolean_t dump_iobuf;
- gf_boolean_t dump_callpool;
- gf_dump_xl_options_t xl_options; //options for all xlators
-} gf_dump_options_t;
-
-extern gf_dump_options_t dump_options;
-
-static inline
-void _gf_proc_dump_build_key (char *key, const char *prefix, char *fmt,...)
-{
- char buf[GF_DUMP_MAX_BUF_LEN];
- va_list ap;
-
- memset(buf, 0, sizeof(buf));
- va_start(ap, fmt);
- vsnprintf(buf, GF_DUMP_MAX_BUF_LEN, fmt, ap);
- va_end(ap);
- snprintf(key, GF_DUMP_MAX_BUF_LEN, "%s.%s", prefix, buf);
-}
-
-#define gf_proc_dump_build_key(key, key_prefix, fmt...) \
- { \
- _gf_proc_dump_build_key(key, key_prefix, ##fmt); \
- }
-
-#define GF_PROC_DUMP_SET_OPTION(opt,val) opt = val
-
-void gf_proc_dump_init();
-
-void gf_proc_dump_fini(void);
-
-void gf_proc_dump_cleanup(void);
-
-void gf_proc_dump_info(int signum);
-
-int gf_proc_dump_add_section(char *key,...);
-
-int gf_proc_dump_write(char *key, char *value,...);
-
-void inode_table_dump(inode_table_t *itable, char *prefix);
-
-void inode_table_dump_to_dict (inode_table_t *itable, char *prefix, dict_t *dict);
-
-void fdtable_dump(fdtable_t *fdtable, char *prefix);
-
-void fdtable_dump_to_dict (fdtable_t *fdtable, char *prefix, dict_t *dict);
-
-void inode_dump(inode_t *inode, char *prefix);
-
-void gf_proc_dump_mem_info_to_dict (dict_t *dict);
-
-void gf_proc_dump_mempool_info_to_dict (glusterfs_ctx_t *ctx, dict_t *dict);
-
-void glusterd_init (int sig);
-#endif /* STATEDUMP_H */
diff --git a/libglusterfs/src/store.c b/libglusterfs/src/store.c
new file mode 100644
index 00000000000..5c316b9291a
--- /dev/null
+++ b/libglusterfs/src/store.c
@@ -0,0 +1,744 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <inttypes.h>
+#include <libgen.h>
+
+#include "glusterfs/glusterfs.h"
+#include "glusterfs/store.h"
+#include "glusterfs/xlator.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+int32_t
+gf_store_mkdir(char *path)
+{
+ int32_t ret = -1;
+
+ ret = mkdir_p(path, 0755, _gf_true);
+
+ if ((-1 == ret) && (EEXIST != errno)) {
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED,
+ "mkdir()"
+ " failed on path %s.",
+ path);
+ } else {
+ ret = 0;
+ }
+
+ return ret;
+}
+
+int32_t
+gf_store_handle_create_on_absence(gf_store_handle_t **shandle, char *path)
+{
+ GF_ASSERT(shandle);
+ int32_t ret = 0;
+
+ if (*shandle == NULL) {
+ ret = gf_store_handle_new(path, shandle);
+
+ if (ret) {
+ gf_msg("", GF_LOG_ERROR, 0, LG_MSG_STORE_HANDLE_CREATE_FAILED,
+ "Unable to"
+ " create store handle for path: %s",
+ path);
+ }
+ }
+ return ret;
+}
+
+int32_t
+gf_store_mkstemp(gf_store_handle_t *shandle)
+{
+ char tmppath[PATH_MAX] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("store", shandle, out);
+ GF_VALIDATE_OR_GOTO("store", shandle->path, out);
+
+ snprintf(tmppath, sizeof(tmppath), "%s.tmp", shandle->path);
+ shandle->tmp_fd = open(tmppath, O_RDWR | O_CREAT | O_TRUNC, 0600);
+ if (shandle->tmp_fd < 0) {
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to open %s.", tmppath);
+ }
+out:
+ return shandle->tmp_fd;
+}
+
+int
+gf_store_sync_direntry(char *path)
+{
+ int ret = -1;
+ int dirfd = -1;
+ char *dir = NULL;
+ char *pdir = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ dir = gf_strdup(path);
+ if (!dir)
+ goto out;
+
+ pdir = dirname(dir);
+ dirfd = open(pdir, O_RDONLY);
+ if (dirfd == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED,
+ "Failed to open directory %s.", pdir);
+ goto out;
+ }
+
+ ret = sys_fsync(dirfd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED,
+ "Failed to fsync %s.", pdir);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (dirfd >= 0) {
+ ret = sys_close(dirfd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED,
+ "Failed to close %s", pdir);
+ }
+ }
+
+ if (dir)
+ GF_FREE(dir);
+
+ return ret;
+}
+
+int32_t
+gf_store_rename_tmppath(gf_store_handle_t *shandle)
+{
+ int32_t ret = -1;
+ char tmppath[PATH_MAX] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("store", shandle, out);
+ GF_VALIDATE_OR_GOTO("store", shandle->path, out);
+
+ ret = sys_fsync(shandle->tmp_fd);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to fsync %s", shandle->path);
+ goto out;
+ }
+ snprintf(tmppath, sizeof(tmppath), "%s.tmp", shandle->path);
+ ret = sys_rename(tmppath, shandle->path);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to rename %s to %s", tmppath, shandle->path);
+ goto out;
+ }
+
+ ret = gf_store_sync_direntry(tmppath);
+out:
+ if (shandle && shandle->tmp_fd >= 0) {
+ sys_close(shandle->tmp_fd);
+ shandle->tmp_fd = -1;
+ }
+ return ret;
+}
+
+int32_t
+gf_store_unlink_tmppath(gf_store_handle_t *shandle)
+{
+ int32_t ret = -1;
+ char tmppath[PATH_MAX] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("store", shandle, out);
+ GF_VALIDATE_OR_GOTO("store", shandle->path, out);
+
+ snprintf(tmppath, sizeof(tmppath), "%s.tmp", shandle->path);
+ ret = sys_unlink(tmppath);
+ if (ret && (errno != ENOENT)) {
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to mv %s to %s", tmppath, shandle->path);
+ } else {
+ ret = 0;
+ }
+out:
+ if (shandle && shandle->tmp_fd >= 0) {
+ sys_close(shandle->tmp_fd);
+ shandle->tmp_fd = -1;
+ }
+ return ret;
+}
+
+int
+gf_store_read_and_tokenize(FILE *file, char **iter_key, char **iter_val,
+ gf_store_op_errno_t *store_errno)
+{
+ int32_t ret = -1;
+ char *savetok = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char *temp = NULL;
+ size_t str_len = 0;
+ char str[8192];
+
+ GF_ASSERT(file);
+ GF_ASSERT(iter_key);
+ GF_ASSERT(iter_val);
+ GF_ASSERT(store_errno);
+
+retry:
+ temp = fgets(str, 8192, file);
+ if (temp == NULL || feof(file)) {
+ ret = -1;
+ *store_errno = GD_STORE_EOF;
+ goto out;
+ }
+
+ if (strcmp(str, "\n") == 0)
+ goto retry;
+
+ str_len = strlen(str);
+ str[str_len - 1] = '\0';
+ /* Truncate the "\n", as fgets stores "\n" in str */
+
+ key = strtok_r(str, "=", &savetok);
+ if (!key) {
+ ret = -1;
+ *store_errno = GD_STORE_KEY_NULL;
+ goto out;
+ }
+
+ value = strtok_r(NULL, "", &savetok);
+ if (!value) {
+ ret = -1;
+ *store_errno = GD_STORE_VALUE_NULL;
+ goto out;
+ }
+
+ *iter_key = key;
+ *iter_val = value;
+ *store_errno = GD_STORE_SUCCESS;
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+gf_store_retrieve_value(gf_store_handle_t *handle, char *key, char **value)
+{
+ int32_t ret = -1;
+ char *iter_key = NULL;
+ char *iter_val = NULL;
+ gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
+
+ GF_ASSERT(handle);
+
+ if (handle->locked == F_ULOCK)
+ /* no locking is used handle->fd gets closed() after usage */
+ handle->fd = open(handle->path, O_RDWR);
+ else
+ /* handle->fd is valid already, kept open for lockf() */
+ sys_lseek(handle->fd, 0, SEEK_SET);
+
+ if (handle->fd == -1) {
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Unable to open file %s", handle->path);
+ goto out;
+ }
+ if (!handle->read) {
+ int duped_fd = dup(handle->fd);
+
+ if (duped_fd >= 0)
+ handle->read = fdopen(duped_fd, "r");
+ if (!handle->read) {
+ if (duped_fd != -1)
+ sys_close(duped_fd);
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Unable to open file %s", handle->path);
+ goto out;
+ }
+ } else {
+ fseek(handle->read, 0, SEEK_SET);
+ }
+ do {
+ ret = gf_store_read_and_tokenize(handle->read, &iter_key, &iter_val,
+ &store_errno);
+ if (ret < 0) {
+ gf_msg_trace("", 0,
+ "error while reading key '%s': "
+ "%s",
+ key, gf_store_strerror(store_errno));
+ goto out;
+ }
+
+ gf_msg_trace("", 0, "key %s read", iter_key);
+
+ if (!strcmp(key, iter_key)) {
+ gf_msg_debug("", 0, "key %s found", key);
+ ret = 0;
+ if (iter_val)
+ *value = gf_strdup(iter_val);
+ goto out;
+ }
+ } while (1);
+out:
+ if (handle->read) {
+ fclose(handle->read);
+ handle->read = NULL;
+ }
+
+ if (handle->fd > 0 && handle->locked == F_ULOCK) {
+ /* only invalidate handle->fd if not locked */
+ sys_close(handle->fd);
+ }
+
+ return ret;
+}
+
+int32_t
+gf_store_save_value(int fd, char *key, char *value)
+{
+ int32_t ret = -1;
+ int dup_fd = -1;
+ FILE *fp = NULL;
+
+ GF_ASSERT(fd > 0);
+ GF_ASSERT(key);
+ GF_ASSERT(value);
+
+ dup_fd = dup(fd);
+ if (dup_fd == -1)
+ goto out;
+
+ fp = fdopen(dup_fd, "a+");
+ if (fp == NULL) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "fdopen failed.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = fprintf(fp, "%s=%s\n", key, value);
+ if (ret < 0) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "Unable to store key: %s, value: %s.", key, value);
+ ret = -1;
+ goto out;
+ }
+
+ ret = fflush(fp);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "fflush failed.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (fp)
+ fclose(fp);
+
+ gf_msg_debug(THIS->name, 0, "returning: %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_save_items(int fd, char *items)
+{
+ int32_t ret = -1;
+ int dup_fd = -1;
+ FILE *fp = NULL;
+
+ GF_ASSERT(fd > 0);
+ GF_ASSERT(items);
+
+ dup_fd = dup(fd);
+ if (dup_fd == -1)
+ goto out;
+
+ fp = fdopen(dup_fd, "a+");
+ if (fp == NULL) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "fdopen failed.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = fputs(items, fp);
+ if (ret < 0) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "Unable to store items: %s", items);
+ ret = -1;
+ goto out;
+ }
+
+ ret = fflush(fp);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, LG_MSG_FILE_OP_FAILED,
+ "fflush failed.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (fp)
+ fclose(fp);
+
+ gf_msg_debug(THIS->name, 0, "returning: %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_handle_new(const char *path, gf_store_handle_t **handle)
+{
+ int32_t ret = -1;
+ gf_store_handle_t *shandle = NULL;
+ int fd = -1;
+ char *spath = NULL;
+
+ shandle = GF_CALLOC(1, sizeof(*shandle), gf_common_mt_store_handle_t);
+ if (!shandle)
+ goto out;
+
+ spath = gf_strdup(path);
+ if (!spath)
+ goto out;
+
+ fd = open(path, O_RDWR | O_CREAT | O_APPEND, 0600);
+ if (fd < 0) {
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to open file: %s.", path);
+ goto out;
+ }
+
+ ret = gf_store_sync_direntry(spath);
+ if (ret)
+ goto out;
+
+ shandle->path = spath;
+ shandle->locked = F_ULOCK;
+ *handle = shandle;
+ shandle->tmp_fd = -1;
+
+ ret = 0;
+out:
+ if (fd >= 0)
+ sys_close(fd);
+
+ if (ret) {
+ GF_FREE(spath);
+ GF_FREE(shandle);
+ }
+
+ gf_msg_debug("", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+gf_store_handle_retrieve(char *path, gf_store_handle_t **handle)
+{
+ int32_t ret = -1;
+ struct stat statbuf = {0};
+
+ ret = sys_stat(path, &statbuf);
+ if (ret) {
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_PATH_NOT_FOUND,
+ "Path "
+ "corresponding to %s.",
+ path);
+ goto out;
+ }
+ ret = gf_store_handle_new(path, handle);
+out:
+ gf_msg_debug("", 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_handle_destroy(gf_store_handle_t *handle)
+{
+ int32_t ret = -1;
+
+ if (!handle) {
+ ret = 0;
+ goto out;
+ }
+
+ GF_FREE(handle->path);
+
+ GF_FREE(handle);
+
+ ret = 0;
+
+out:
+ gf_msg_debug("", 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+gf_store_iter_new(gf_store_handle_t *shandle, gf_store_iter_t **iter)
+{
+ int32_t ret = -1;
+ FILE *fp = NULL;
+ gf_store_iter_t *tmp_iter = NULL;
+
+ GF_ASSERT(shandle);
+ GF_ASSERT(iter);
+
+ fp = fopen(shandle->path, "r");
+ if (!fp) {
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Unable to open file %s", shandle->path);
+ goto out;
+ }
+
+ tmp_iter = GF_CALLOC(1, sizeof(*tmp_iter), gf_common_mt_store_iter_t);
+ if (!tmp_iter)
+ goto out;
+
+ if (snprintf(tmp_iter->filepath, sizeof(tmp_iter->filepath), "%s",
+ shandle->path) >= sizeof(tmp_iter->filepath))
+ goto out;
+
+ tmp_iter->file = fp;
+
+ *iter = tmp_iter;
+ tmp_iter = NULL;
+ ret = 0;
+
+out:
+ if (ret && fp)
+ fclose(fp);
+
+ GF_FREE(tmp_iter);
+
+ gf_msg_debug("", 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_validate_key_value(char *storepath, char *key, char *val,
+ gf_store_op_errno_t *op_errno)
+{
+ int ret = 0;
+
+ GF_ASSERT(op_errno);
+ GF_ASSERT(storepath);
+
+ if ((key == NULL) && (val == NULL)) {
+ ret = -1;
+ gf_msg("", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
+ "Glusterd "
+ "store may be corrupted, Invalid key and value (null)"
+ " in %s",
+ storepath);
+ *op_errno = GD_STORE_KEY_VALUE_NULL;
+ } else if (key == NULL) {
+ ret = -1;
+ gf_msg("", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
+ "Glusterd "
+ "store may be corrupted, Invalid key (null) in %s",
+ storepath);
+ *op_errno = GD_STORE_KEY_NULL;
+ } else if (val == NULL) {
+ ret = -1;
+ gf_msg("", GF_LOG_ERROR, 0, LG_MSG_INVALID_ENTRY,
+ "Glusterd "
+ "store may be corrupted, Invalid value (null) for key"
+ " %s in %s",
+ key, storepath);
+ *op_errno = GD_STORE_VALUE_NULL;
+ } else {
+ ret = 0;
+ *op_errno = GD_STORE_SUCCESS;
+ }
+
+ return ret;
+}
+
+int32_t
+gf_store_iter_get_next(gf_store_iter_t *iter, char **key, char **value,
+ gf_store_op_errno_t *op_errno)
+{
+ int32_t ret = -1;
+ char *iter_key = NULL;
+ char *iter_val = NULL;
+ gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
+
+ GF_ASSERT(iter);
+ GF_ASSERT(key);
+ GF_ASSERT(value);
+
+ ret = gf_store_read_and_tokenize(iter->file, &iter_key, &iter_val,
+ &store_errno);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = gf_store_validate_key_value(iter->filepath, iter_key, iter_val,
+ &store_errno);
+ if (ret)
+ goto out;
+
+ *key = gf_strdup(iter_key);
+ if (!*key) {
+ ret = -1;
+ store_errno = GD_STORE_ENOMEM;
+ goto out;
+ }
+ *value = gf_strdup(iter_val);
+ if (!*value) {
+ ret = -1;
+ store_errno = GD_STORE_ENOMEM;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ if (ret) {
+ GF_FREE(*key);
+ GF_FREE(*value);
+ *key = NULL;
+ *value = NULL;
+ }
+ if (op_errno)
+ *op_errno = store_errno;
+
+ gf_msg_debug("", 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+gf_store_iter_get_matching(gf_store_iter_t *iter, char *key, char **value)
+{
+ int32_t ret = -1;
+ char *tmp_key = NULL;
+ char *tmp_value = NULL;
+
+ ret = gf_store_iter_get_next(iter, &tmp_key, &tmp_value, NULL);
+ while (!ret) {
+ if (!strncmp(key, tmp_key, strlen(key))) {
+ *value = tmp_value;
+ GF_FREE(tmp_key);
+ goto out;
+ }
+ GF_FREE(tmp_key);
+ tmp_key = NULL;
+ GF_FREE(tmp_value);
+ tmp_value = NULL;
+ ret = gf_store_iter_get_next(iter, &tmp_key, &tmp_value, NULL);
+ }
+out:
+ return ret;
+}
+
+int32_t
+gf_store_iter_destroy(gf_store_iter_t **iter)
+{
+ int32_t ret = -1;
+
+ if (!(*iter))
+ return 0;
+
+ /* gf_store_iter_new will not return a valid iter object with iter->file
+ * being NULL*/
+ ret = fclose((*iter)->file);
+ if (ret)
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Unable"
+ " to close file: %s, ret: %d",
+ (*iter)->filepath, ret);
+
+ GF_FREE(*iter);
+ *iter = NULL;
+
+ return ret;
+}
+
+char *
+gf_store_strerror(gf_store_op_errno_t op_errno)
+{
+ switch (op_errno) {
+ case GD_STORE_SUCCESS:
+ return "Success";
+ case GD_STORE_KEY_NULL:
+ return "Invalid Key";
+ case GD_STORE_VALUE_NULL:
+ return "Invalid Value";
+ case GD_STORE_KEY_VALUE_NULL:
+ return "Invalid Key and Value";
+ case GD_STORE_EOF:
+ return "No data";
+ case GD_STORE_ENOMEM:
+ return "No memory";
+ default:
+ return "Invalid errno";
+ }
+}
+
+int
+gf_store_lock(gf_store_handle_t *sh)
+{
+ int ret;
+
+ GF_ASSERT(sh);
+ GF_ASSERT(sh->path);
+ GF_ASSERT(sh->locked == F_ULOCK);
+
+ sh->fd = open(sh->path, O_RDWR);
+ if (sh->fd == -1) {
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_FILE_OP_FAILED,
+ "Failed to open '%s'", sh->path);
+ return -1;
+ }
+
+ ret = lockf(sh->fd, F_LOCK, 0);
+ if (ret)
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_LOCK_FAILED,
+ "Failed to gain lock on '%s'", sh->path);
+ else
+ /* sh->locked is protected by the lockf(sh->fd) above */
+ sh->locked = F_LOCK;
+
+ return ret;
+}
+
+void
+gf_store_unlock(gf_store_handle_t *sh)
+{
+ GF_ASSERT(sh);
+ GF_ASSERT(sh->locked == F_LOCK);
+
+ sh->locked = F_ULOCK;
+
+ /* does not matter if this fails, locks are released on close anyway */
+ if (lockf(sh->fd, F_ULOCK, 0) == -1)
+ gf_msg("", GF_LOG_ERROR, errno, LG_MSG_UNLOCK_FAILED,
+ "Failed to release lock on '%s'", sh->path);
+
+ sys_close(sh->fd);
+}
+
+int
+gf_store_locked_local(gf_store_handle_t *sh)
+{
+ GF_ASSERT(sh);
+
+ return (sh->locked == F_LOCK);
+}
diff --git a/libglusterfs/src/strfd.c b/libglusterfs/src/strfd.c
new file mode 100644
index 00000000000..8a2580edc85
--- /dev/null
+++ b/libglusterfs/src/strfd.c
@@ -0,0 +1,93 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdarg.h>
+
+#include "glusterfs/mem-types.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/strfd.h"
+#include "glusterfs/common-utils.h"
+
+strfd_t *
+strfd_open()
+{
+ strfd_t *strfd = NULL;
+
+ strfd = GF_CALLOC(1, sizeof(*strfd), gf_common_mt_strfd_t);
+
+ return strfd;
+}
+
+int
+strvprintf(strfd_t *strfd, const char *fmt, va_list ap)
+{
+ char *str = NULL;
+ int size = 0;
+
+ size = vasprintf(&str, fmt, ap);
+
+ if (size < 0)
+ return size;
+
+ if (!strfd->alloc_size) {
+ strfd->data = GF_CALLOC(max(size + 1, 4096), 1,
+ gf_common_mt_strfd_data_t);
+ if (!strfd->data) {
+ free(str); /* NOT GF_FREE */
+ return -1;
+ }
+ strfd->alloc_size = max(size + 1, 4096);
+ }
+
+ if (strfd->alloc_size <= (strfd->size + size)) {
+ char *tmp_ptr = NULL;
+ int new_size = max(
+ (strfd->alloc_size * 2),
+ gf_roundup_next_power_of_two(strfd->size + size + 1));
+ tmp_ptr = GF_REALLOC(strfd->data, new_size);
+ if (!tmp_ptr) {
+ free(str); /* NOT GF_FREE */
+ return -1;
+ }
+ strfd->alloc_size = new_size;
+ strfd->data = tmp_ptr;
+ }
+
+ /* Copy the trailing '\0', but do not account for it in ->size.
+ This allows safe use of strfd->data as a string. */
+ memcpy(strfd->data + strfd->size, str, size + 1);
+ strfd->size += size;
+
+ free(str); /* NOT GF_FREE */
+
+ return size;
+}
+
+int
+strprintf(strfd_t *strfd, const char *fmt, ...)
+{
+ int ret = 0;
+ va_list ap;
+
+ va_start(ap, fmt);
+ ret = strvprintf(strfd, fmt, ap);
+ va_end(ap);
+
+ return ret;
+}
+
+int
+strfd_close(strfd_t *strfd)
+{
+ GF_FREE(strfd->data);
+ GF_FREE(strfd);
+
+ return 0;
+}
diff --git a/libglusterfs/src/syncop-utils.c b/libglusterfs/src/syncop-utils.c
new file mode 100644
index 00000000000..d9f1723856d
--- /dev/null
+++ b/libglusterfs/src/syncop-utils.c
@@ -0,0 +1,669 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/syncop.h"
+#include "glusterfs/syncop-utils.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+struct syncop_dir_scan_data {
+ xlator_t *subvol;
+ loc_t *parent;
+ void *data;
+ gf_dirent_t *q;
+ gf_dirent_t *entry;
+ pthread_cond_t *cond;
+ pthread_mutex_t *mut;
+ syncop_dir_scan_fn_t fn;
+ uint32_t *jobs_running;
+ uint32_t *qlen;
+ int32_t *retval;
+};
+
+int
+syncop_dirfd(xlator_t *subvol, loc_t *loc, fd_t **fd, int pid)
+{
+ int ret = 0;
+ fd_t *dirfd = NULL;
+
+ if (!fd)
+ return -EINVAL;
+
+ dirfd = fd_create(loc->inode, pid);
+ if (!dirfd) {
+ gf_msg(subvol->name, GF_LOG_ERROR, errno, LG_MSG_FD_CREATE_FAILED,
+ "fd_create of %s", uuid_utoa(loc->gfid));
+ ret = -errno;
+ goto out;
+ }
+
+ ret = syncop_opendir(subvol, loc, dirfd, NULL, NULL);
+ if (ret) {
+ /*
+ * On Linux, if the brick was not updated, opendir will
+ * fail. We therefore use backward compatible code
+ * that violate the standards by reusing offsets
+ * in seekdir() from different DIR *, but it works on Linux.
+ *
+ * On other systems it never worked, hence we do not need
+ * to provide backward-compatibility.
+ */
+#ifdef GF_LINUX_HOST_OS
+ fd_unref(dirfd);
+ dirfd = fd_anonymous(loc->inode);
+ if (!dirfd) {
+ gf_msg(subvol->name, GF_LOG_ERROR, errno,
+ LG_MSG_FD_ANONYMOUS_FAILED,
+ "fd_anonymous of "
+ "%s",
+ uuid_utoa(loc->gfid));
+ ret = -errno;
+ goto out;
+ }
+ ret = 0;
+#else /* GF_LINUX_HOST_OS */
+ fd_unref(dirfd);
+ gf_msg(subvol->name, GF_LOG_ERROR, errno, LG_MSG_DIR_OP_FAILED,
+ "opendir of %s", uuid_utoa(loc->gfid));
+ goto out;
+#endif /* GF_LINUX_HOST_OS */
+ } else {
+ fd_bind(dirfd);
+ }
+out:
+ if (ret == 0)
+ *fd = dirfd;
+ return ret;
+}
+
+int
+syncop_ftw(xlator_t *subvol, loc_t *loc, int pid, void *data,
+ int (*fn)(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data))
+{
+ loc_t child_loc = {
+ 0,
+ };
+ fd_t *fd = NULL;
+ uint64_t offset = 0;
+ gf_dirent_t *entry = NULL;
+ int ret = 0;
+ gf_dirent_t entries;
+
+ ret = syncop_dirfd(subvol, loc, &fd, pid);
+ if (ret)
+ goto out;
+
+ INIT_LIST_HEAD(&entries.list);
+
+ while ((ret = syncop_readdirp(subvol, fd, 131072, offset, &entries, NULL,
+ NULL))) {
+ if (ret < 0)
+ break;
+
+ if (ret > 0) {
+ /* If the entries are only '.', and '..' then ret
+ * value will be non-zero. so set it to zero here. */
+ ret = 0;
+ }
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ offset = entry->d_off;
+
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ continue;
+
+ gf_link_inode_from_dirent(NULL, fd->inode, entry);
+
+ ret = fn(subvol, entry, loc, data);
+ if (ret)
+ break;
+
+ if (entry->d_stat.ia_type == IA_IFDIR) {
+ child_loc.inode = inode_ref(entry->inode);
+ gf_uuid_copy(child_loc.gfid, entry->inode->gfid);
+ ret = syncop_ftw(subvol, &child_loc, pid, data, fn);
+ loc_wipe(&child_loc);
+ if (ret)
+ break;
+ }
+ }
+
+ gf_dirent_free(&entries);
+ if (ret)
+ break;
+ }
+
+out:
+ if (fd)
+ fd_unref(fd);
+ return ret;
+}
+
+/**
+ * Syncop_ftw_throttle can be used in a configurable way to control
+ * the speed at which crawling is done. It takes 2 more arguments
+ * compared to syncop_ftw.
+ * After @count entries are finished in a directory (to be
+ * precise, @count files) sleep for @sleep_time seconds.
+ * If either @count or @sleep_time is <=0, then it behaves similar to
+ * syncop_ftw.
+ */
+int
+syncop_ftw_throttle(xlator_t *subvol, loc_t *loc, int pid, void *data,
+ int (*fn)(xlator_t *subvol, gf_dirent_t *entry,
+ loc_t *parent, void *data),
+ int count, int sleep_time)
+{
+ loc_t child_loc = {
+ 0,
+ };
+ fd_t *fd = NULL;
+ uint64_t offset = 0;
+ gf_dirent_t *entry = NULL;
+ int ret = 0;
+ gf_dirent_t entries;
+ int tmp = 0;
+
+ if (sleep_time <= 0) {
+ ret = syncop_ftw(subvol, loc, pid, data, fn);
+ goto out;
+ }
+
+ ret = syncop_dirfd(subvol, loc, &fd, pid);
+ if (ret)
+ goto out;
+
+ INIT_LIST_HEAD(&entries.list);
+
+ while ((ret = syncop_readdirp(subvol, fd, 131072, offset, &entries, NULL,
+ NULL))) {
+ if (ret < 0)
+ break;
+
+ if (ret > 0) {
+ /* If the entries are only '.', and '..' then ret
+ * value will be non-zero. so set it to zero here. */
+ ret = 0;
+ }
+
+ tmp = 0;
+
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ offset = entry->d_off;
+
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ continue;
+
+ if (++tmp >= count) {
+ tmp = 0;
+ sleep(sleep_time);
+ }
+
+ gf_link_inode_from_dirent(NULL, fd->inode, entry);
+
+ ret = fn(subvol, entry, loc, data);
+ if (ret)
+ continue;
+
+ if (entry->d_stat.ia_type == IA_IFDIR) {
+ child_loc.inode = inode_ref(entry->inode);
+ gf_uuid_copy(child_loc.gfid, entry->inode->gfid);
+ ret = syncop_ftw_throttle(subvol, &child_loc, pid, data, fn,
+ count, sleep_time);
+ loc_wipe(&child_loc);
+ if (ret)
+ continue;
+ }
+ }
+
+ gf_dirent_free(&entries);
+ if (ret)
+ break;
+ }
+
+out:
+ if (fd)
+ fd_unref(fd);
+ return ret;
+}
+
+static void
+_scan_data_destroy(struct syncop_dir_scan_data *data)
+{
+ GF_FREE(data);
+}
+
+static int
+_dir_scan_job_fn_cbk(int ret, call_frame_t *frame, void *opaque)
+{
+ struct syncop_dir_scan_data *scan_data = opaque;
+
+ _scan_data_destroy(scan_data);
+ return 0;
+}
+
+static int
+_dir_scan_job_fn(void *data)
+{
+ struct syncop_dir_scan_data *scan_data = data;
+ gf_dirent_t *entry = NULL;
+ int ret = 0;
+
+ entry = scan_data->entry;
+ scan_data->entry = NULL;
+ do {
+ ret = scan_data->fn(scan_data->subvol, entry, scan_data->parent,
+ scan_data->data);
+ gf_dirent_entry_free(entry);
+ entry = NULL;
+ pthread_mutex_lock(scan_data->mut);
+ {
+ if (ret)
+ *scan_data->retval |= ret;
+ if (list_empty(&scan_data->q->list)) {
+ (*scan_data->jobs_running)--;
+ pthread_cond_broadcast(scan_data->cond);
+ } else {
+ entry = list_first_entry(&scan_data->q->list,
+ typeof(*scan_data->q), list);
+ list_del_init(&entry->list);
+ (*scan_data->qlen)--;
+ }
+ }
+ pthread_mutex_unlock(scan_data->mut);
+ } while (entry);
+
+ return ret;
+}
+
+static int
+_run_dir_scan_task(call_frame_t *frame, xlator_t *subvol, loc_t *parent,
+ gf_dirent_t *q, gf_dirent_t *entry, int *retval,
+ pthread_mutex_t *mut, pthread_cond_t *cond,
+ uint32_t *jobs_running, uint32_t *qlen,
+ syncop_dir_scan_fn_t fn, void *data)
+{
+ int ret = 0;
+ struct syncop_dir_scan_data *scan_data = NULL;
+
+ scan_data = GF_CALLOC(1, sizeof(struct syncop_dir_scan_data),
+ gf_common_mt_scan_data);
+ if (!scan_data) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ scan_data->subvol = subvol;
+ scan_data->parent = parent;
+ scan_data->data = data;
+ scan_data->mut = mut;
+ scan_data->cond = cond;
+ scan_data->fn = fn;
+ scan_data->jobs_running = jobs_running;
+ scan_data->entry = entry;
+ scan_data->q = q;
+ scan_data->qlen = qlen;
+ scan_data->retval = retval;
+
+ ret = synctask_new(subvol->ctx->env, _dir_scan_job_fn, _dir_scan_job_fn_cbk,
+ frame, scan_data);
+out:
+ if (ret < 0) {
+ gf_dirent_entry_free(entry);
+ _scan_data_destroy(scan_data);
+ pthread_mutex_lock(mut);
+ {
+ *jobs_running = *jobs_running - 1;
+ }
+ pthread_mutex_unlock(mut);
+ /*No need to cond-broadcast*/
+ }
+ return ret;
+}
+
+int
+syncop_mt_dir_scan(call_frame_t *frame, xlator_t *subvol, loc_t *loc, int pid,
+ void *data, syncop_dir_scan_fn_t fn, dict_t *xdata,
+ uint32_t max_jobs, uint32_t max_qlen)
+{
+ fd_t *fd = NULL;
+ uint64_t offset = 0;
+ gf_dirent_t *last = NULL;
+ int ret = 0;
+ int retval = 0;
+ gf_dirent_t q;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ uint32_t jobs_running = 0;
+ uint32_t qlen = 0;
+ pthread_cond_t cond = PTHREAD_COND_INITIALIZER;
+ pthread_mutex_t mut = PTHREAD_MUTEX_INITIALIZER;
+ gf_dirent_t entries;
+ xlator_t *this = NULL;
+
+ if (frame) {
+ this = frame->this;
+ } else {
+ this = THIS;
+ }
+
+ /*For this functionality to be implemented in general, we need
+ * synccond_t infra which doesn't block the executing thread. Until then
+ * return failures inside synctask if they use this.*/
+ if (synctask_get())
+ return -ENOTSUP;
+
+ if (max_jobs == 0)
+ return -EINVAL;
+
+ /*Code becomes simpler this way. cond_wait just on qlength.
+ * Little bit of cheating*/
+ if (max_qlen == 0)
+ max_qlen = 1;
+
+ ret = syncop_dirfd(subvol, loc, &fd, pid);
+ if (ret)
+ goto out;
+
+ INIT_LIST_HEAD(&entries.list);
+ INIT_LIST_HEAD(&q.list);
+
+ while ((ret = syncop_readdir(subvol, fd, 131072, offset, &entries, xdata,
+ NULL))) {
+ if (ret < 0)
+ break;
+
+ if (ret > 0) {
+ /* If the entries are only '.', and '..' then ret
+ * value will be non-zero. so set it to zero here. */
+ ret = 0;
+ }
+
+ last = list_last_entry(&entries.list, typeof(*last), list);
+ offset = last->d_off;
+
+ list_for_each_entry_safe(entry, tmp, &entries.list, list)
+ {
+ if (this && this->cleanup_starting)
+ goto out;
+
+ list_del_init(&entry->list);
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, "..")) {
+ gf_dirent_entry_free(entry);
+ continue;
+ }
+
+ if (entry->d_stat.ia_type == IA_IFDIR) {
+ ret = fn(subvol, entry, loc, data);
+ gf_dirent_entry_free(entry);
+ if (ret)
+ goto out;
+ continue;
+ }
+
+ if (retval) /*Any jobs failed?*/
+ goto out;
+
+ pthread_mutex_lock(&mut);
+ {
+ while (qlen == max_qlen)
+ pthread_cond_wait(&cond, &mut);
+ if (max_jobs == jobs_running) {
+ list_add_tail(&entry->list, &q.list);
+ qlen++;
+ entry = NULL;
+ } else {
+ jobs_running++;
+ }
+ }
+ pthread_mutex_unlock(&mut);
+
+ if (!entry)
+ continue;
+
+ ret = _run_dir_scan_task(frame, subvol, loc, &q, entry, &retval,
+ &mut, &cond, &jobs_running, &qlen, fn,
+ data);
+ if (ret)
+ goto out;
+ }
+ }
+
+out:
+ if (fd)
+ fd_unref(fd);
+
+ pthread_mutex_lock(&mut);
+ {
+ while (jobs_running)
+ pthread_cond_wait(&cond, &mut);
+ }
+ pthread_mutex_unlock(&mut);
+
+ gf_dirent_free(&q);
+ gf_dirent_free(&entries);
+
+ return ret | retval;
+}
+
+int
+syncop_dir_scan(xlator_t *subvol, loc_t *loc, int pid, void *data,
+ int (*fn)(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data))
+{
+ fd_t *fd = NULL;
+ uint64_t offset = 0;
+ gf_dirent_t *entry = NULL;
+ int ret = 0;
+ gf_dirent_t entries;
+
+ ret = syncop_dirfd(subvol, loc, &fd, pid);
+ if (ret)
+ goto out;
+
+ INIT_LIST_HEAD(&entries.list);
+
+ while ((ret = syncop_readdir(subvol, fd, 131072, offset, &entries, NULL,
+ NULL))) {
+ if (ret < 0)
+ break;
+
+ if (ret > 0) {
+ /* If the entries are only '.', and '..' then ret
+ * value will be non-zero. so set it to zero here. */
+ ret = 0;
+ }
+
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ offset = entry->d_off;
+
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ continue;
+
+ ret = fn(subvol, entry, loc, data);
+ if (ret)
+ break;
+ }
+ gf_dirent_free(&entries);
+ if (ret)
+ break;
+ }
+
+out:
+ if (fd)
+ fd_unref(fd);
+ return ret;
+}
+
+int
+syncop_is_subvol_local(xlator_t *this, loc_t *loc, gf_boolean_t *is_local)
+{
+ char *pathinfo = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+
+ if (!this || !this->type || !is_local)
+ return -EINVAL;
+
+ if (strcmp(this->type, "protocol/client") != 0)
+ return -EINVAL;
+
+ *is_local = _gf_false;
+
+ ret = syncop_getxattr(this, loc, &xattr, GF_XATTR_PATHINFO_KEY, NULL, NULL);
+ if (ret < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ if (!xattr) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = dict_get_str(xattr, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterfs_is_local_pathinfo(pathinfo, is_local);
+
+ gf_msg_debug(this->name, 0, "subvol %s is %slocal", this->name,
+ *is_local ? "" : "not ");
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+
+ return ret;
+}
+
+/**
+ * For hard resove, it it telling posix to make use of the
+ * gfid2path extended attribute stored on disk. Otherwise
+ * posix xlator (with GFID_TO_PATH_KEY as the key) will just
+ * do a in memory inode_path to get the path. Depending upon
+ * the consumer of this function, they can choose how they want
+ * to proceed. If doing a xattr operation sounds costly, then
+ * use GFID_TO_PATH_KEY as the key for getxattr.
+ **/
+
+int
+syncop_gfid_to_path_hard(inode_table_t *itable, xlator_t *subvol, uuid_t gfid,
+ inode_t *inode, char **path_p,
+ gf_boolean_t hard_resolve)
+{
+ int ret = 0;
+ char *path = NULL;
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+
+ gf_uuid_copy(loc.gfid, gfid);
+
+ if (!inode)
+ loc.inode = inode_new(itable);
+ else
+ loc.inode = inode_ref(inode);
+
+ if (!hard_resolve)
+ ret = syncop_getxattr(subvol, &loc, &xattr, GFID_TO_PATH_KEY, NULL,
+ NULL);
+ else
+ ret = syncop_getxattr(subvol, &loc, &xattr, GFID2PATH_VIRT_XATTR_KEY,
+ NULL, NULL);
+
+ if (ret < 0)
+ goto out;
+
+ /*
+ * posix will do dict_set_dynstr for GFID_TO_PATH_KEY i.e.
+ * for in memory search for the path. And for on disk xattr
+ * fetching of the path for the key GFID2PATH_VIRT_XATTR_KEY
+ * it uses dict_set_dynptr. So, for GFID2PATH_VIRT_XATTR_KEY
+ * use dict_get_ptr to avoid dict complaining about type
+ * mismatch (i.e. str vs ptr)
+ */
+ if (!hard_resolve)
+ ret = dict_get_str(xattr, GFID_TO_PATH_KEY, &path);
+ else
+ ret = dict_get_ptr(xattr, GFID2PATH_VIRT_XATTR_KEY, (void **)&path);
+
+ if (ret || !path) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (path_p) {
+ *path_p = gf_strdup(path);
+ if (!*path_p) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+ loc_wipe(&loc);
+
+ return ret;
+}
+
+int
+syncop_gfid_to_path(inode_table_t *itable, xlator_t *subvol, uuid_t gfid,
+ char **path_p)
+{
+ return syncop_gfid_to_path_hard(itable, subvol, gfid, NULL, path_p,
+ _gf_false);
+}
+
+int
+syncop_inode_find(xlator_t *this, xlator_t *subvol, uuid_t gfid,
+ inode_t **inode, dict_t *xdata, dict_t **rsp_dict)
+{
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ *inode = NULL;
+
+ *inode = inode_find(this->itable, gfid);
+ if (*inode)
+ goto out;
+
+ loc.inode = inode_new(this->itable);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ gf_uuid_copy(loc.gfid, gfid);
+
+ ret = syncop_lookup(subvol, &loc, &iatt, NULL, xdata, rsp_dict);
+ if (ret < 0)
+ goto out;
+
+ *inode = inode_link(loc.inode, NULL, NULL, &iatt);
+ if (!*inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+out:
+ loc_wipe(&loc);
+ return ret;
+}
diff --git a/libglusterfs/src/syncop.c b/libglusterfs/src/syncop.c
index c84832dfbcf..df20cec559f 100644
--- a/libglusterfs/src/syncop.c
+++ b/libglusterfs/src/syncop.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2008-2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,1369 +8,3565 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
+#include "glusterfs/syncop.h"
+#include "glusterfs/libglusterfs-messages.h"
+
+#ifdef HAVE_TSAN_API
+#include <sanitizer/tsan_interface.h>
#endif
-#include "syncop.h"
+int
+syncopctx_setfsuid(void *uid)
+{
+ struct syncopctx *opctx = NULL;
+ int ret = 0;
+
+ /* In args check */
+ if (!uid) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ opctx = syncopctx_getctx();
+
+ opctx->uid = *(uid_t *)uid;
+ opctx->valid |= SYNCOPCTX_UID;
+
+out:
+ return ret;
+}
+
+int
+syncopctx_setfsgid(void *gid)
+{
+ struct syncopctx *opctx = NULL;
+ int ret = 0;
+
+ /* In args check */
+ if (!gid) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ opctx = syncopctx_getctx();
+
+ opctx->gid = *(gid_t *)gid;
+ opctx->valid |= SYNCOPCTX_GID;
+
+out:
+ return ret;
+}
+
+int
+syncopctx_setfsgroups(int count, const void *groups)
+{
+ struct syncopctx *opctx = NULL;
+ gid_t *tmpgroups = NULL;
+ int ret = 0;
+
+ /* In args check */
+ if (count != 0 && !groups) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ opctx = syncopctx_getctx();
+
+ /* resize internal groups as required */
+ if (count && opctx->grpsize < count) {
+ if (opctx->groups) {
+ /* Group list will be updated later, so no need to keep current
+ * data and waste time copying it. It's better to free the current
+ * allocation and then allocate a fresh new memory block. */
+ GF_FREE(opctx->groups);
+ opctx->groups = NULL;
+ opctx->grpsize = 0;
+ }
+ tmpgroups = GF_MALLOC(count * sizeof(gid_t), gf_common_mt_syncopctx);
+ if (tmpgroups == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ opctx->groups = tmpgroups;
+ opctx->grpsize = count;
+ }
+
+ /* copy out the groups passed */
+ if (count)
+ memcpy(opctx->groups, groups, (sizeof(gid_t) * count));
+
+ /* set/reset the ngrps, this is where reset of groups is handled */
+ opctx->ngrps = count;
+
+ if ((opctx->valid & SYNCOPCTX_GROUPS) == 0) {
+ /* This is the first time we are storing groups into the TLS structure
+ * so we mark the current thread so that it will be properly cleaned
+ * up when the thread terminates. */
+ gf_thread_needs_cleanup();
+ }
+ opctx->valid |= SYNCOPCTX_GROUPS;
+
+out:
+ return ret;
+}
+
+int
+syncopctx_setfspid(void *pid)
+{
+ struct syncopctx *opctx = NULL;
+ int ret = 0;
+
+ /* In args check */
+ if (!pid) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ opctx = syncopctx_getctx();
+
+ opctx->pid = *(pid_t *)pid;
+ opctx->valid |= SYNCOPCTX_PID;
+
+out:
+ return ret;
+}
+
+int
+syncopctx_setfslkowner(gf_lkowner_t *lk_owner)
+{
+ struct syncopctx *opctx = NULL;
+ int ret = 0;
+
+ /* In args check */
+ if (!lk_owner) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ opctx = syncopctx_getctx();
+
+ opctx->lk_owner = *lk_owner;
+ opctx->valid |= SYNCOPCTX_LKOWNER;
+
+out:
+ return ret;
+}
+
+void *
+syncenv_processor(void *thdata);
static void
-__run (struct synctask *task)
+__run(struct synctask *task)
{
- struct syncenv *env = NULL;
+ struct syncenv *env = NULL;
+ int32_t total, ret, i;
- env = task->env;
+ env = task->env;
- list_del_init (&task->all_tasks);
- switch (task->state) {
- case SYNCTASK_INIT:
+ list_del_init(&task->all_tasks);
+ switch (task->state) {
+ case SYNCTASK_INIT:
case SYNCTASK_SUSPEND:
- break;
- case SYNCTASK_RUN:
- gf_log (task->xl->name, GF_LOG_WARNING,
- "re-running already running task");
- env->runcount--;
- break;
- case SYNCTASK_WAIT:
- env->waitcount--;
- break;
- case SYNCTASK_DONE:
- gf_log (task->xl->name, GF_LOG_WARNING,
- "running completed task");
- break;
- }
-
- list_add_tail (&task->all_tasks, &env->runq);
- env->runcount++;
- task->state = SYNCTASK_RUN;
+ break;
+ case SYNCTASK_RUN:
+ gf_msg_debug(task->xl->name, 0,
+ "re-running already running"
+ " task");
+ env->runcount--;
+ break;
+ case SYNCTASK_WAIT:
+ break;
+ case SYNCTASK_DONE:
+ gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK,
+ "running completed task");
+ return;
+ case SYNCTASK_ZOMBIE:
+ gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_WAKE_UP_ZOMBIE,
+ "attempted to wake up "
+ "zombie!!");
+ return;
+ }
+
+ list_add_tail(&task->all_tasks, &env->runq);
+ task->state = SYNCTASK_RUN;
+
+ env->runcount++;
+
+ total = env->procs + env->runcount - env->procs_idle;
+ if (total > env->procmax) {
+ total = env->procmax;
+ }
+ if (total > env->procs) {
+ for (i = 0; i < env->procmax; i++) {
+ if (env->proc[i].env == NULL) {
+ env->proc[i].env = env;
+ ret = gf_thread_create(&env->proc[i].processor, NULL,
+ syncenv_processor, &env->proc[i],
+ "sproc%d", i);
+ if ((ret < 0) || (++env->procs >= total)) {
+ break;
+ }
+ }
+ }
+ }
}
-
static void
-__wait (struct synctask *task)
+__wait(struct synctask *task)
{
- struct syncenv *env = NULL;
+ struct syncenv *env = NULL;
- env = task->env;
+ env = task->env;
- list_del_init (&task->all_tasks);
- switch (task->state) {
- case SYNCTASK_INIT:
+ list_del_init(&task->all_tasks);
+ switch (task->state) {
+ case SYNCTASK_INIT:
case SYNCTASK_SUSPEND:
- break;
- case SYNCTASK_RUN:
- env->runcount--;
- break;
- case SYNCTASK_WAIT:
- gf_log (task->xl->name, GF_LOG_WARNING,
- "re-waiting already waiting task");
- env->waitcount--;
- break;
- case SYNCTASK_DONE:
- gf_log (task->xl->name, GF_LOG_WARNING,
- "running completed task");
- break;
- }
-
- list_add_tail (&task->all_tasks, &env->waitq);
- env->waitcount++;
- task->state = SYNCTASK_WAIT;
+ break;
+ case SYNCTASK_RUN:
+ env->runcount--;
+ break;
+ case SYNCTASK_WAIT:
+ gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_REWAITING_TASK,
+ "re-waiting already waiting "
+ "task");
+ break;
+ case SYNCTASK_DONE:
+ gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_COMPLETED_TASK,
+ "running completed task");
+ return;
+ case SYNCTASK_ZOMBIE:
+ gf_msg(task->xl->name, GF_LOG_WARNING, 0, LG_MSG_SLEEP_ZOMBIE,
+ "attempted to sleep a zombie!!");
+ return;
+ }
+
+ list_add_tail(&task->all_tasks, &env->waitq);
+ task->state = SYNCTASK_WAIT;
}
+void
+synctask_yield(struct synctask *task, struct timespec *delta)
+{
+ xlator_t *oldTHIS = THIS;
+
+#if defined(__NetBSD__) && defined(_UC_TLSBASE)
+ /* Preserve pthread private pointer through swapcontex() */
+ task->proc->sched.uc_flags &= ~_UC_TLSBASE;
+#endif
+
+ task->delta = delta;
+
+ if (task->state != SYNCTASK_DONE) {
+ task->state = SYNCTASK_SUSPEND;
+ }
+
+#ifdef HAVE_TSAN_API
+ __tsan_switch_to_fiber(task->proc->tsan.fiber, 0);
+#endif
+
+ if (swapcontext(&task->ctx, &task->proc->sched) < 0) {
+ gf_msg("syncop", GF_LOG_ERROR, errno, LG_MSG_SWAPCONTEXT_FAILED,
+ "swapcontext failed");
+ }
+
+ THIS = oldTHIS;
+}
void
-synctask_yield (struct synctask *task)
+synctask_sleep(int32_t secs)
{
- if (swapcontext (&task->ctx, &task->proc->sched) < 0) {
- gf_log ("syncop", GF_LOG_ERROR,
- "swapcontext failed (%s)", strerror (errno));
- }
+ struct timespec delta;
+ struct synctask *task;
+
+ task = synctask_get();
+
+ if (task == NULL) {
+ sleep(secs);
+ } else {
+ delta.tv_sec = secs;
+ delta.tv_nsec = 0;
+
+ synctask_yield(task, &delta);
+ }
}
+static void
+__synctask_wake(struct synctask *task)
+{
+ task->woken = 1;
+
+ if (task->slept)
+ __run(task);
+
+ pthread_cond_broadcast(&task->env->cond);
+}
void
-synctask_wake (struct synctask *task)
+synctask_wake(struct synctask *task)
{
- struct syncenv *env = NULL;
+ struct syncenv *env = NULL;
- env = task->env;
+ env = task->env;
- pthread_mutex_lock (&env->mutex);
- {
- task->woken = 1;
+ pthread_mutex_lock(&env->mutex);
+ {
+ if (task->timer != NULL) {
+ if (gf_timer_call_cancel(task->xl->ctx, task->timer) != 0) {
+ goto unlock;
+ }
- if (task->slept)
- __run (task);
+ task->timer = NULL;
+ task->synccond = NULL;
}
- pthread_mutex_unlock (&env->mutex);
- pthread_cond_broadcast (&env->cond);
+ __synctask_wake(task);
+ }
+unlock:
+ pthread_mutex_unlock(&env->mutex);
}
void
-synctask_wrap (struct synctask *old_task)
+synctask_wrap(void)
{
- struct synctask *task = NULL;
+ struct synctask *task = NULL;
- /* Do not trust the pointer received. It may be
- wrong and can lead to crashes. */
+ /* Do not trust the pointer received. It may be
+ wrong and can lead to crashes. */
- task = synctask_get ();
- task->ret = task->syncfn (task->opaque);
- if (task->synccbk)
- task->synccbk (task->ret, task->frame, task->opaque);
+ task = synctask_get();
+ task->ret = task->syncfn(task->opaque);
+ if (task->synccbk)
+ task->synccbk(task->ret, task->frame, task->opaque);
- task->state = SYNCTASK_DONE;
+ task->state = SYNCTASK_DONE;
- synctask_yield (task);
+ synctask_yield(task, NULL);
}
-
void
-synctask_destroy (struct synctask *task)
+synctask_destroy(struct synctask *task)
{
- if (!task)
- return;
+ if (!task)
+ return;
- if (task->stack)
- FREE (task->stack);
+ GF_FREE(task->stack);
- if (task->opframe)
- STACK_DESTROY (task->opframe->root);
+ if (task->opframe)
+ STACK_DESTROY(task->opframe->root);
- pthread_mutex_destroy (&task->mutex);
+ if (task->synccbk == NULL) {
+ pthread_mutex_destroy(&task->mutex);
+ pthread_cond_destroy(&task->cond);
+ }
- pthread_cond_destroy (&task->cond);
+#ifdef HAVE_TSAN_API
+ __tsan_destroy_fiber(task->tsan.fiber);
+#endif
- FREE (task);
+ GF_FREE(task);
}
-
void
-synctask_done (struct synctask *task)
+synctask_done(struct synctask *task)
{
- if (task->synccbk) {
- synctask_destroy (task);
- return;
- }
+ if (task->synccbk) {
+ synctask_destroy(task);
+ return;
+ }
- pthread_mutex_lock (&task->mutex);
- {
- task->done = 1;
- pthread_cond_broadcast (&task->cond);
- }
- pthread_mutex_unlock (&task->mutex);
+ pthread_mutex_lock(&task->mutex);
+ {
+ task->state = SYNCTASK_ZOMBIE;
+ task->done = 1;
+ pthread_cond_broadcast(&task->cond);
+ }
+ pthread_mutex_unlock(&task->mutex);
}
-
int
-synctask_new (struct syncenv *env, synctask_fn_t fn, synctask_cbk_t cbk,
- call_frame_t *frame, void *opaque)
+synctask_setid(struct synctask *task, uid_t uid, gid_t gid)
{
- struct synctask *newtask = NULL;
- xlator_t *this = THIS;
- int ret = 0;
+ if (!task)
+ return -1;
- VALIDATE_OR_GOTO (env, err);
- VALIDATE_OR_GOTO (fn, err);
+ if (uid != -1)
+ task->uid = uid;
- newtask = CALLOC (1, sizeof (*newtask));
- if (!newtask)
- return -ENOMEM;
+ if (gid != -1)
+ task->gid = gid;
- newtask->frame = frame;
- if (!frame) {
- newtask->opframe = create_frame (this, this->ctx->pool);
- } else {
- newtask->opframe = copy_frame (frame);
- }
- if (!newtask->opframe)
- goto err;
- newtask->env = env;
- newtask->xl = this;
- newtask->syncfn = fn;
- newtask->synccbk = cbk;
- newtask->opaque = opaque;
-
- INIT_LIST_HEAD (&newtask->all_tasks);
-
- if (getcontext (&newtask->ctx) < 0) {
- gf_log ("syncop", GF_LOG_ERROR,
- "getcontext failed (%s)",
- strerror (errno));
- goto err;
- }
+ return 0;
+}
- newtask->stack = CALLOC (1, env->stacksize);
- if (!newtask->stack) {
- gf_log ("syncop", GF_LOG_ERROR,
- "out of memory for stack");
- goto err;
- }
+struct synctask *
+synctask_create(struct syncenv *env, size_t stacksize, synctask_fn_t fn,
+ synctask_cbk_t cbk, call_frame_t *frame, void *opaque)
+{
+ struct synctask *newtask = NULL;
+ xlator_t *this = THIS;
+ int destroymode = 0;
+
+ VALIDATE_OR_GOTO(env, err);
+ VALIDATE_OR_GOTO(fn, err);
+
+ /* Check if the syncenv is in destroymode i.e. destroy is SET.
+ * If YES, then don't allow any new synctasks on it. Return NULL.
+ */
+ pthread_mutex_lock(&env->mutex);
+ {
+ destroymode = env->destroy;
+ }
+ pthread_mutex_unlock(&env->mutex);
+
+ /* syncenv is in DESTROY mode, return from here */
+ if (destroymode)
+ return NULL;
- newtask->ctx.uc_stack.ss_sp = newtask->stack;
+ newtask = GF_CALLOC(1, sizeof(*newtask), gf_common_mt_synctask);
+ if (!newtask)
+ return NULL;
+
+ newtask->frame = frame;
+ if (!frame) {
+ newtask->opframe = create_frame(this, this->ctx->pool);
+ if (!newtask->opframe)
+ goto err;
+ set_lk_owner_from_ptr(&newtask->opframe->root->lk_owner,
+ newtask->opframe->root);
+ } else {
+ newtask->opframe = copy_frame(frame);
+ }
+ if (!newtask->opframe)
+ goto err;
+ newtask->env = env;
+ newtask->xl = this;
+ newtask->syncfn = fn;
+ newtask->synccbk = cbk;
+ newtask->opaque = opaque;
+
+ /* default to the uid/gid of the passed frame */
+ newtask->uid = newtask->opframe->root->uid;
+ newtask->gid = newtask->opframe->root->gid;
+
+ INIT_LIST_HEAD(&newtask->all_tasks);
+ INIT_LIST_HEAD(&newtask->waitq);
+
+ if (getcontext(&newtask->ctx) < 0) {
+ gf_msg("syncop", GF_LOG_ERROR, errno, LG_MSG_GETCONTEXT_FAILED,
+ "getcontext failed");
+ goto err;
+ }
+
+ if (stacksize <= 0) {
+ newtask->stack = GF_CALLOC(1, env->stacksize, gf_common_mt_syncstack);
newtask->ctx.uc_stack.ss_size = env->stacksize;
+ } else {
+ newtask->stack = GF_CALLOC(1, stacksize, gf_common_mt_syncstack);
+ newtask->ctx.uc_stack.ss_size = stacksize;
+ }
- makecontext (&newtask->ctx, (void *) synctask_wrap, 2, newtask);
+ if (!newtask->stack) {
+ goto err;
+ }
- newtask->state = SYNCTASK_INIT;
+ newtask->ctx.uc_stack.ss_sp = newtask->stack;
- newtask->slept = 1;
+ makecontext(&newtask->ctx, (void (*)(void))synctask_wrap, 0);
- if (!cbk) {
- pthread_mutex_init (&newtask->mutex, NULL);
- pthread_cond_init (&newtask->cond, NULL);
- newtask->done = 0;
- }
+#ifdef HAVE_TSAN_API
+ newtask->tsan.fiber = __tsan_create_fiber(0);
+ snprintf(newtask->tsan.name, TSAN_THREAD_NAMELEN, "<synctask of %s>",
+ this->name);
+ __tsan_set_fiber_name(newtask->tsan.fiber, newtask->tsan.name);
+#endif
- synctask_wake (newtask);
+ newtask->state = SYNCTASK_INIT;
- if (!cbk) {
- pthread_mutex_lock (&newtask->mutex);
- {
- while (!newtask->done) {
- pthread_cond_wait (&newtask->cond, &newtask->mutex);
- }
- }
- pthread_mutex_unlock (&newtask->mutex);
+ newtask->slept = 1;
- ret = newtask->ret;
+ if (!cbk) {
+ pthread_mutex_init(&newtask->mutex, NULL);
+ pthread_cond_init(&newtask->cond, NULL);
+ newtask->done = 0;
+ }
- synctask_destroy (newtask);
- }
+ synctask_wake(newtask);
- return ret;
+ return newtask;
err:
- if (newtask) {
- if (newtask->stack)
- FREE (newtask->stack);
- if (newtask->opframe)
- STACK_DESTROY (newtask->opframe->root);
- FREE (newtask);
- }
- return -1;
+ if (newtask) {
+ GF_FREE(newtask->stack);
+ if (newtask->opframe)
+ STACK_DESTROY(newtask->opframe->root);
+ GF_FREE(newtask);
+ }
+
+ return NULL;
}
+int
+synctask_join(struct synctask *task)
+{
+ int ret = 0;
-struct synctask *
-syncenv_task (struct syncproc *proc)
+ pthread_mutex_lock(&task->mutex);
+ {
+ while (!task->done)
+ pthread_cond_wait(&task->cond, &task->mutex);
+ }
+ pthread_mutex_unlock(&task->mutex);
+
+ ret = task->ret;
+
+ synctask_destroy(task);
+
+ return ret;
+}
+
+int
+synctask_new1(struct syncenv *env, size_t stacksize, synctask_fn_t fn,
+ synctask_cbk_t cbk, call_frame_t *frame, void *opaque)
{
- struct syncenv *env = NULL;
- struct synctask *task = NULL;
+ struct synctask *newtask = NULL;
+ int ret = 0;
- env = proc->env;
+ newtask = synctask_create(env, stacksize, fn, cbk, frame, opaque);
+ if (!newtask)
+ return -1;
- pthread_mutex_lock (&env->mutex);
- {
- while (list_empty (&env->runq))
- pthread_cond_wait (&env->cond, &env->mutex);
+ if (!cbk)
+ ret = synctask_join(newtask);
- task = list_entry (env->runq.next, struct synctask, all_tasks);
+ return ret;
+}
- list_del_init (&task->all_tasks);
- env->runcount--;
+int
+synctask_new(struct syncenv *env, synctask_fn_t fn, synctask_cbk_t cbk,
+ call_frame_t *frame, void *opaque)
+{
+ return synctask_new1(env, 0, fn, cbk, frame, opaque);
+}
- task->proc = proc;
+struct synctask *
+syncenv_task(struct syncproc *proc)
+{
+ struct syncenv *env = NULL;
+ struct synctask *task = NULL;
+ struct timespec sleep_till = {
+ 0,
+ };
+ int ret = 0;
+
+ env = proc->env;
+
+ pthread_mutex_lock(&env->mutex);
+ {
+ while (list_empty(&env->runq)) {
+ /* If either of the conditions are met then exit
+ * the current thread:
+ * 1. syncenv has to scale down(procs > procmin)
+ * 2. syncenv is in destroy mode and no tasks in
+ * either waitq or runq.
+ *
+ * At any point in time, a task can be either in runq,
+ * or in executing state or in the waitq. Once the
+ * destroy mode is set, no new synctask creates will
+ * be allowed, but whatever in waitq or runq should be
+ * allowed to finish before exiting any of the syncenv
+ * processor threads.
+ */
+ if (((ret == ETIMEDOUT) && (env->procs > env->procmin)) ||
+ (env->destroy && list_empty(&env->waitq))) {
+ task = NULL;
+ env->procs--;
+ memset(proc, 0, sizeof(*proc));
+ pthread_cond_broadcast(&env->cond);
+ goto unlock;
+ }
+
+ env->procs_idle++;
+
+ sleep_till.tv_sec = gf_time() + SYNCPROC_IDLE_TIME;
+ ret = pthread_cond_timedwait(&env->cond, &env->mutex, &sleep_till);
+
+ env->procs_idle--;
}
- pthread_mutex_unlock (&env->mutex);
- return task;
+ task = list_entry(env->runq.next, struct synctask, all_tasks);
+
+ list_del_init(&task->all_tasks);
+ env->runcount--;
+
+ task->woken = 0;
+ task->slept = 0;
+
+ task->proc = proc;
+ }
+unlock:
+ pthread_mutex_unlock(&env->mutex);
+
+ return task;
}
+static void
+synctask_timer(void *data)
+{
+ struct synctask *task = data;
+ struct synccond *cond;
+
+ cond = task->synccond;
+ if (cond != NULL) {
+ pthread_mutex_lock(&cond->pmutex);
+
+ list_del_init(&task->waitq);
+ task->synccond = NULL;
+
+ pthread_mutex_unlock(&cond->pmutex);
+
+ task->ret = -ETIMEDOUT;
+ }
+
+ pthread_mutex_lock(&task->env->mutex);
+
+ gf_timer_call_cancel(task->xl->ctx, task->timer);
+ task->timer = NULL;
+
+ __synctask_wake(task);
+
+ pthread_mutex_unlock(&task->env->mutex);
+}
void
-synctask_switchto (struct synctask *task)
+synctask_switchto(struct synctask *task)
{
- struct syncenv *env = NULL;
+ struct syncenv *env = NULL;
- env = task->env;
+ env = task->env;
- synctask_set (task);
- THIS = task->xl;
+ synctask_set(task);
+ THIS = task->xl;
- task->woken = 0;
- task->slept = 0;
+#if defined(__NetBSD__) && defined(_UC_TLSBASE)
+ /* Preserve pthread private pointer through swapcontex() */
+ task->ctx.uc_flags &= ~_UC_TLSBASE;
+#endif
- if (swapcontext (&task->proc->sched, &task->ctx) < 0) {
- gf_log ("syncop", GF_LOG_ERROR,
- "swapcontext failed (%s)", strerror (errno));
- }
+#ifdef HAVE_TSAN_API
+ __tsan_switch_to_fiber(task->tsan.fiber, 0);
+#endif
- if (task->state == SYNCTASK_DONE) {
- synctask_done (task);
- return;
- }
+ if (swapcontext(&task->proc->sched, &task->ctx) < 0) {
+ gf_msg("syncop", GF_LOG_ERROR, errno, LG_MSG_SWAPCONTEXT_FAILED,
+ "swapcontext failed");
+ }
- pthread_mutex_lock (&env->mutex);
- {
- if (task->woken) {
- __run (task);
- } else {
- task->slept = 1;
- __wait (task);
- }
+ if (task->state == SYNCTASK_DONE) {
+ synctask_done(task);
+ return;
+ }
+
+ pthread_mutex_lock(&env->mutex);
+ {
+ if (task->woken) {
+ __run(task);
+ } else {
+ task->slept = 1;
+ __wait(task);
+
+ if (task->delta != NULL) {
+ task->timer = gf_timer_call_after(task->xl->ctx, *task->delta,
+ synctask_timer, task);
+ }
}
- pthread_mutex_unlock (&env->mutex);
+
+ task->delta = NULL;
+ }
+ pthread_mutex_unlock(&env->mutex);
}
void *
-syncenv_processor (void *thdata)
+syncenv_processor(void *thdata)
{
- struct syncenv *env = NULL;
- struct syncproc *proc = NULL;
- struct synctask *task = NULL;
+ struct syncproc *proc = NULL;
+ struct synctask *task = NULL;
- proc = thdata;
- env = proc->env;
+ proc = thdata;
- for (;;) {
- task = syncenv_task (proc);
+#ifdef HAVE_TSAN_API
+ proc->tsan.fiber = __tsan_create_fiber(0);
+ snprintf(proc->tsan.name, TSAN_THREAD_NAMELEN, "<sched of syncenv@%p>",
+ proc);
+ __tsan_set_fiber_name(proc->tsan.fiber, proc->tsan.name);
+#endif
- synctask_switchto (task);
+ while ((task = syncenv_task(proc)) != NULL) {
+ synctask_switchto(task);
+ }
- syncenv_scale (env);
+#ifdef HAVE_TSAN_API
+ __tsan_destroy_fiber(proc->tsan.fiber);
+#endif
+
+ return NULL;
+}
+
+/* The syncenv threads are cleaned up in this routine.
+ */
+void
+syncenv_destroy(struct syncenv *env)
+{
+ if (env == NULL)
+ return;
+
+ /* SET the 'destroy' in syncenv structure to prohibit any
+ * further synctask(s) on this syncenv which is in destroy mode.
+ *
+ * If syncenv threads are in pthread cond wait with no tasks in
+ * their run or wait queue, then the threads are woken up by
+ * broadcasting the cond variable and if destroy field is set,
+ * the infinite loop in syncenv_processor is broken and the
+ * threads return.
+ *
+ * If syncenv threads have tasks in runq or waitq, the tasks are
+ * completed and only then the thread returns.
+ */
+ pthread_mutex_lock(&env->mutex);
+ {
+ env->destroy = 1;
+ /* This broadcast will wake threads in pthread_cond_wait
+ * in syncenv_task
+ */
+ pthread_cond_broadcast(&env->cond);
+
+ /* when the syncenv_task() thread is exiting, it broadcasts to
+ * wake the below wait.
+ */
+ while (env->procs != 0) {
+ pthread_cond_wait(&env->cond, &env->mutex);
}
+ }
+ pthread_mutex_unlock(&env->mutex);
+ pthread_mutex_destroy(&env->mutex);
+ pthread_cond_destroy(&env->cond);
+
+ GF_FREE(env);
+
+ return;
+}
+
+struct syncenv *
+syncenv_new(size_t stacksize, int procmin, int procmax)
+{
+ struct syncenv *newenv = NULL;
+ int ret = 0;
+ int i = 0;
+
+ if (!procmin || procmin < 0)
+ procmin = SYNCENV_PROC_MIN;
+ if (!procmax || procmax > SYNCENV_PROC_MAX)
+ procmax = SYNCENV_PROC_MAX;
+
+ if (procmin > procmax)
+ return NULL;
+
+ newenv = GF_CALLOC(1, sizeof(*newenv), gf_common_mt_syncenv);
+
+ if (!newenv)
return NULL;
+
+ pthread_mutex_init(&newenv->mutex, NULL);
+ pthread_cond_init(&newenv->cond, NULL);
+
+ INIT_LIST_HEAD(&newenv->runq);
+ INIT_LIST_HEAD(&newenv->waitq);
+
+ newenv->stacksize = SYNCENV_DEFAULT_STACKSIZE;
+ if (stacksize)
+ newenv->stacksize = stacksize;
+ newenv->procmin = procmin;
+ newenv->procmax = procmax;
+ newenv->procs_idle = 0;
+
+ for (i = 0; i < newenv->procmin; i++) {
+ newenv->proc[i].env = newenv;
+ ret = gf_thread_create(&newenv->proc[i].processor, NULL,
+ syncenv_processor, &newenv->proc[i], "sproc%d",
+ i);
+ if (ret)
+ break;
+ newenv->procs++;
+ }
+
+ if (ret != 0) {
+ syncenv_destroy(newenv);
+ newenv = NULL;
+ }
+
+ return newenv;
}
+int
+synclock_init(synclock_t *lock, lock_attr_t attr)
+{
+ if (!lock)
+ return -1;
-void
-syncenv_scale (struct syncenv *env)
-{
- int thmax = 0;
- int i = 0;
- int ret = 0;
-
- pthread_mutex_lock (&env->mutex);
- {
- if (env->procs > env->runcount)
- goto unlock;
-
- thmax = min (env->runcount, SYNCENV_PROC_MAX);
- for (i = env->procs; i < thmax; i++) {
- env->proc[i].env = env;
- ret = pthread_create (&env->proc[i].processor, NULL,
- syncenv_processor, &env->proc[i]);
- if (ret)
- break;
- env->procs++;
- }
- }
+ pthread_cond_init(&lock->cond, 0);
+ lock->type = LOCK_NULL;
+ lock->owner = NULL;
+ lock->owner_tid = 0;
+ lock->lock = 0;
+ lock->attr = attr;
+ INIT_LIST_HEAD(&lock->waitq);
+
+ return pthread_mutex_init(&lock->guard, 0);
+}
+
+int
+synclock_destroy(synclock_t *lock)
+{
+ if (!lock)
+ return -1;
+
+ pthread_cond_destroy(&lock->cond);
+ return pthread_mutex_destroy(&lock->guard);
+}
+
+static int
+__synclock_lock(struct synclock *lock)
+{
+ struct synctask *task = NULL;
+
+ if (!lock)
+ return -1;
+
+ task = synctask_get();
+
+ if (lock->lock && (lock->attr == SYNC_LOCK_RECURSIVE)) {
+ /*Recursive lock (if same owner requested for lock again then
+ *increment lock count and return success).
+ *Note:same number of unlocks required.
+ */
+ switch (lock->type) {
+ case LOCK_TASK:
+ if (task == lock->owner) {
+ lock->lock++;
+ gf_msg_trace("", 0,
+ "Recursive lock called by"
+ " sync task.owner= %p,lock=%d",
+ lock->owner, lock->lock);
+ return 0;
+ }
+ break;
+ case LOCK_THREAD:
+ if (pthread_equal(pthread_self(), lock->owner_tid)) {
+ lock->lock++;
+ gf_msg_trace("", 0,
+ "Recursive lock called by"
+ " thread ,owner=%u lock=%d",
+ (unsigned int)lock->owner_tid, lock->lock);
+ return 0;
+ }
+ break;
+ default:
+ gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_UNKNOWN_LOCK_TYPE,
+ "unknown lock type");
+ break;
+ }
+ }
+
+ while (lock->lock) {
+ if (task) {
+ /* called within a synctask */
+ task->woken = 0;
+ list_add_tail(&task->waitq, &lock->waitq);
+ pthread_mutex_unlock(&lock->guard);
+ synctask_yield(task, NULL);
+ /* task is removed from waitq in unlock,
+ * under lock->guard.*/
+ pthread_mutex_lock(&lock->guard);
+ } else {
+ /* called by a non-synctask */
+ pthread_cond_wait(&lock->cond, &lock->guard);
+ }
+ }
+
+ if (task) {
+ lock->type = LOCK_TASK;
+ lock->owner = task; /* for synctask*/
+
+ } else {
+ lock->type = LOCK_THREAD;
+ lock->owner_tid = pthread_self(); /* for non-synctask */
+ }
+ lock->lock = 1;
+
+ return 0;
+}
+
+int
+synclock_lock(synclock_t *lock)
+{
+ int ret = 0;
+
+ pthread_mutex_lock(&lock->guard);
+ {
+ ret = __synclock_lock(lock);
+ }
+ pthread_mutex_unlock(&lock->guard);
+
+ return ret;
+}
+
+int
+synclock_trylock(synclock_t *lock)
+{
+ int ret = 0;
+
+ errno = 0;
+
+ pthread_mutex_lock(&lock->guard);
+ {
+ if (lock->lock) {
+ errno = EBUSY;
+ ret = -1;
+ goto unlock;
+ }
+
+ ret = __synclock_lock(lock);
+ }
unlock:
- pthread_mutex_unlock (&env->mutex);
+ pthread_mutex_unlock(&lock->guard);
+
+ return ret;
}
+static int
+__synclock_unlock(synclock_t *lock)
+{
+ struct synctask *task = NULL;
+ struct synctask *curr = NULL;
+
+ if (!lock)
+ return -1;
-void
-syncenv_destroy (struct syncenv *env)
+ if (lock->lock == 0) {
+ gf_msg("", GF_LOG_CRITICAL, 0, LG_MSG_UNLOCK_BEFORE_LOCK,
+ "Unlock called before lock ");
+ return -1;
+ }
+ curr = synctask_get();
+ /*unlock should be called by lock owner
+ *i.e this will not allow the lock in nonsync task and unlock
+ * in sync task and vice-versa
+ */
+ switch (lock->type) {
+ case LOCK_TASK:
+ if (curr == lock->owner) {
+ lock->lock--;
+ gf_msg_trace("", 0,
+ "Unlock success %p, remaining"
+ " locks=%d",
+ lock->owner, lock->lock);
+ } else {
+ gf_msg("", GF_LOG_WARNING, 0, LG_MSG_LOCK_OWNER_ERROR,
+ "Unlock called by %p, but lock held by %p", curr,
+ lock->owner);
+ }
+
+ break;
+ case LOCK_THREAD:
+ if (pthread_equal(pthread_self(), lock->owner_tid)) {
+ lock->lock--;
+ gf_msg_trace("", 0,
+ "Unlock success %u, remaining "
+ "locks=%d",
+ (unsigned int)lock->owner_tid, lock->lock);
+ } else {
+ gf_msg("", GF_LOG_WARNING, 0, LG_MSG_LOCK_OWNER_ERROR,
+ "Unlock called by %u, but lock held by %u",
+ (unsigned int)pthread_self(),
+ (unsigned int)lock->owner_tid);
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ if (lock->lock > 0) {
+ return 0;
+ }
+ lock->type = LOCK_NULL;
+ lock->owner = NULL;
+ lock->owner_tid = 0;
+ lock->lock = 0;
+ /* There could be both synctasks and non synctasks
+ waiting (or none, or either). As a mid-approach
+ between maintaining too many waiting counters
+ at one extreme and a thundering herd on unlock
+ at the other, call a cond_signal (which wakes
+ one waiter) and first synctask waiter. So at
+ most we have two threads waking up to grab the
+ just released lock.
+ */
+ pthread_cond_signal(&lock->cond);
+ if (!list_empty(&lock->waitq)) {
+ task = list_entry(lock->waitq.next, struct synctask, waitq);
+ list_del_init(&task->waitq);
+ synctask_wake(task);
+ }
+
+ return 0;
+}
+
+int
+synclock_unlock(synclock_t *lock)
{
+ int ret = 0;
+
+ pthread_mutex_lock(&lock->guard);
+ {
+ ret = __synclock_unlock(lock);
+ }
+ pthread_mutex_unlock(&lock->guard);
+ return ret;
}
+/* Condition variables */
-struct syncenv *
-syncenv_new (size_t stacksize)
+int32_t
+synccond_init(synccond_t *cond)
{
- struct syncenv *newenv = NULL;
- int ret = 0;
- int i = 0;
+ int32_t ret;
- newenv = CALLOC (1, sizeof (*newenv));
+ INIT_LIST_HEAD(&cond->waitq);
- if (!newenv)
- return NULL;
+ ret = pthread_mutex_init(&cond->pmutex, NULL);
+ if (ret != 0) {
+ return -ret;
+ }
- pthread_mutex_init (&newenv->mutex, NULL);
- pthread_cond_init (&newenv->cond, NULL);
+ ret = pthread_cond_init(&cond->pcond, NULL);
+ if (ret != 0) {
+ pthread_mutex_destroy(&cond->pmutex);
+ }
- INIT_LIST_HEAD (&newenv->runq);
- INIT_LIST_HEAD (&newenv->waitq);
+ return -ret;
+}
- newenv->stacksize = SYNCENV_DEFAULT_STACKSIZE;
- if (stacksize)
- newenv->stacksize = stacksize;
+void
+synccond_destroy(synccond_t *cond)
+{
+ pthread_cond_destroy(&cond->pcond);
+ pthread_mutex_destroy(&cond->pmutex);
+}
+
+int
+synccond_timedwait(synccond_t *cond, synclock_t *lock, struct timespec *delta)
+{
+ struct timespec now;
+ struct synctask *task = NULL;
+ int ret;
- for (i = 0; i < SYNCENV_PROC_MIN; i++) {
- newenv->proc[i].env = newenv;
- ret = pthread_create (&newenv->proc[i].processor, NULL,
- syncenv_processor, &newenv->proc[i]);
- if (ret)
- break;
- newenv->procs++;
+ task = synctask_get();
+
+ if (task == NULL) {
+ if (delta != NULL) {
+ timespec_now_realtime(&now);
+ timespec_adjust_delta(&now, *delta);
+ }
+
+ pthread_mutex_lock(&cond->pmutex);
+
+ if (delta == NULL) {
+ ret = -pthread_cond_wait(&cond->pcond, &cond->pmutex);
+ } else {
+ ret = -pthread_cond_timedwait(&cond->pcond, &cond->pmutex, &now);
}
+ } else {
+ pthread_mutex_lock(&cond->pmutex);
+
+ list_add_tail(&task->waitq, &cond->waitq);
+ task->synccond = cond;
- if (ret != 0)
- syncenv_destroy (newenv);
+ ret = synclock_unlock(lock);
+ if (ret == 0) {
+ pthread_mutex_unlock(&cond->pmutex);
- return newenv;
+ synctask_yield(task, delta);
+
+ ret = synclock_lock(lock);
+ if (ret == 0) {
+ ret = task->ret;
+ }
+ task->ret = 0;
+
+ return ret;
+ }
+
+ list_del_init(&task->waitq);
+ }
+
+ pthread_mutex_unlock(&cond->pmutex);
+
+ return ret;
}
+int
+synccond_wait(synccond_t *cond, synclock_t *lock)
+{
+ return synccond_timedwait(cond, lock, NULL);
+}
-/* FOPS */
+void
+synccond_signal(synccond_t *cond)
+{
+ struct synctask *task;
+
+ pthread_mutex_lock(&cond->pmutex);
+
+ if (!list_empty(&cond->waitq)) {
+ task = list_first_entry(&cond->waitq, struct synctask, waitq);
+ list_del_init(&task->waitq);
+
+ pthread_mutex_unlock(&cond->pmutex);
+
+ synctask_wake(task);
+ } else {
+ pthread_cond_signal(&cond->pcond);
+
+ pthread_mutex_unlock(&cond->pmutex);
+ }
+}
+
+void
+synccond_broadcast(synccond_t *cond)
+{
+ struct list_head list;
+ struct synctask *task;
+
+ INIT_LIST_HEAD(&list);
+
+ pthread_mutex_lock(&cond->pmutex);
+ list_splice_init(&cond->waitq, &list);
+ pthread_cond_broadcast(&cond->pcond);
+
+ pthread_mutex_unlock(&cond->pmutex);
+
+ while (!list_empty(&list)) {
+ task = list_first_entry(&list, struct synctask, waitq);
+ list_del_init(&task->waitq);
+
+ synctask_wake(task);
+ }
+}
+
+/* Barriers */
int
-syncop_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *iatt, dict_t *xdata, struct iatt *parent)
+syncbarrier_init(struct syncbarrier *barrier)
{
- struct syncargs *args = NULL;
+ int ret = 0;
+ if (!barrier) {
+ errno = EINVAL;
+ return -1;
+ }
- args = cookie;
+ ret = pthread_cond_init(&barrier->cond, 0);
+ if (ret) {
+ errno = ret;
+ return -1;
+ }
+ barrier->count = 0;
+ barrier->waitfor = 0;
+ INIT_LIST_HEAD(&barrier->waitq);
+
+ ret = pthread_mutex_init(&barrier->guard, 0);
+ if (ret) {
+ (void)pthread_cond_destroy(&barrier->cond);
+ errno = ret;
+ return -1;
+ }
+ barrier->initialized = _gf_true;
+ return 0;
+}
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+int
+syncbarrier_destroy(struct syncbarrier *barrier)
+{
+ int ret = 0;
+ int ret1 = 0;
+ if (!barrier) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ if (barrier->initialized) {
+ ret = pthread_cond_destroy(&barrier->cond);
+ ret1 = pthread_mutex_destroy(&barrier->guard);
+ barrier->initialized = _gf_false;
+ }
+ if (ret || ret1) {
+ errno = ret ? ret : ret1;
+ return -1;
+ }
+ return 0;
+}
+
+static int
+__syncbarrier_wait(struct syncbarrier *barrier, int waitfor)
+{
+ struct synctask *task = NULL;
+
+ if (!barrier) {
+ errno = EINVAL;
+ return -1;
+ }
- if (op_ret == 0) {
- args->iatt1 = *iatt;
- args->iatt2 = *parent;
- if (xdata)
- args->xdata = dict_ref (xdata);
+ task = synctask_get();
+
+ while (barrier->count < waitfor) {
+ if (task) {
+ /* called within a synctask */
+ list_add_tail(&task->waitq, &barrier->waitq);
+ pthread_mutex_unlock(&barrier->guard);
+ synctask_yield(task, NULL);
+ pthread_mutex_lock(&barrier->guard);
+ } else {
+ /* called by a non-synctask */
+ pthread_cond_wait(&barrier->cond, &barrier->guard);
}
+ }
+
+ barrier->count = 0;
- __wake (args);
+ return 0;
+}
+int
+syncbarrier_wait(struct syncbarrier *barrier, int waitfor)
+{
+ int ret = 0;
+
+ pthread_mutex_lock(&barrier->guard);
+ {
+ ret = __syncbarrier_wait(barrier, waitfor);
+ }
+ pthread_mutex_unlock(&barrier->guard);
+
+ return ret;
+}
+
+static int
+__syncbarrier_wake(struct syncbarrier *barrier)
+{
+ struct synctask *task = NULL;
+
+ if (!barrier) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ barrier->count++;
+ if (barrier->waitfor && (barrier->count < barrier->waitfor))
return 0;
+
+ pthread_cond_signal(&barrier->cond);
+ if (!list_empty(&barrier->waitq)) {
+ task = list_entry(barrier->waitq.next, struct synctask, waitq);
+ list_del_init(&task->waitq);
+ synctask_wake(task);
+ }
+ barrier->waitfor = 0;
+
+ return 0;
}
+int
+syncbarrier_wake(struct syncbarrier *barrier)
+{
+ int ret = 0;
+
+ pthread_mutex_lock(&barrier->guard);
+ {
+ ret = __syncbarrier_wake(barrier);
+ }
+ pthread_mutex_unlock(&barrier->guard);
+
+ return ret;
+}
+
+/* FOPS */
int
-syncop_lookup (xlator_t *subvol, loc_t *loc, dict_t *xdata_req,
- struct iatt *iatt, dict_t **xdata_rsp, struct iatt *parent)
+syncop_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *iatt,
+ dict_t *xdata, struct iatt *parent)
{
- struct syncargs args = {0, };
+ struct syncargs *args = NULL;
- SYNCOP (subvol, (&args), syncop_lookup_cbk, subvol->fops->lookup,
- loc, xdata_req);
+ args = cookie;
- if (iatt)
- *iatt = args.iatt1;
- if (parent)
- *parent = args.iatt2;
- if (xdata_rsp)
- *xdata_rsp = args.xdata;
- else if (args.xdata)
- dict_unref (args.xdata);
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- errno = args.op_errno;
- return args.op_ret;
+ if (op_ret == 0) {
+ args->iatt1 = *iatt;
+ args->iatt2 = *parent;
+ }
+
+ __wake(args);
+
+ return 0;
}
-static gf_dirent_t *
-entry_copy (gf_dirent_t *source)
+int
+syncop_lookup(xlator_t *subvol, loc_t *loc, struct iatt *iatt,
+ struct iatt *parent, dict_t *xdata_in, dict_t **xdata_out)
{
- gf_dirent_t *sink = NULL;
+ struct syncargs args = {
+ 0,
+ };
- sink = gf_dirent_for_name (source->d_name);
+ SYNCOP(subvol, (&args), syncop_lookup_cbk, subvol->fops->lookup, loc,
+ xdata_in);
- sink->d_off = source->d_off;
- sink->d_ino = source->d_ino;
- sink->d_type = source->d_type;
- sink->d_stat = source->d_stat;
+ if (iatt)
+ *iatt = args.iatt1;
+ if (parent)
+ *parent = args.iatt2;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- return sink;
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int32_t
-syncop_readdirp_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
-{
- struct syncargs *args = NULL;
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
-
- int count = 0;
-
- args = cookie;
-
- INIT_LIST_HEAD (&args->entries.list);
-
- args->op_ret = op_ret;
- args->op_errno = op_errno;
-
- if (op_ret >= 0) {
- list_for_each_entry (entry, &entries->list, list) {
- tmp = entry_copy (entry);
- gf_log (this->name, GF_LOG_TRACE,
- "adding entry=%s, count=%d",
- tmp->d_name, count);
- list_add_tail (&tmp->list, &(args->entries.list));
- count++;
- }
- }
+syncop_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
- __wake (args);
+ int count = 0;
- return 0;
+ args = cookie;
+
+ INIT_LIST_HEAD(&args->entries.list);
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ tmp = entry_copy(entry);
+ if (!tmp) {
+ args->op_ret = -1;
+ args->op_errno = ENOMEM;
+ gf_dirent_free(&(args->entries));
+ break;
+ }
+ gf_msg_trace(this->name, 0,
+ "adding entry=%s, "
+ "count=%d",
+ tmp->d_name, count);
+ list_add_tail(&tmp->list, &(args->entries.list));
+ count++;
+ }
+ }
+
+ __wake(args);
+ return 0;
}
int
-syncop_readdirp (xlator_t *subvol,
- fd_t *fd,
- size_t size,
- off_t off,
- dict_t *dict,
- gf_dirent_t *entries)
+syncop_readdirp(xlator_t *subvol, fd_t *fd, size_t size, off_t off,
+ gf_dirent_t *entries, dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_readdirp_cbk, subvol->fops->readdirp,
- fd, size, off, dict);
+ SYNCOP(subvol, (&args), syncop_readdirp_cbk, subvol->fops->readdirp, fd,
+ size, off, xdata_in);
- if (entries)
- list_splice_init (&args.entries.list, &entries->list);
- /* TODO: need to free all the 'args.entries' in 'else' case */
+ if (entries)
+ list_splice_init(&args.entries.list, &entries->list);
+ else
+ gf_dirent_free(&args.entries);
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int32_t
-syncop_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
-{
- struct syncargs *args = NULL;
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
-
- int count = 0;
-
- args = cookie;
-
- INIT_LIST_HEAD (&args->entries.list);
-
- args->op_ret = op_ret;
- args->op_errno = op_errno;
-
- if (op_ret >= 0) {
- list_for_each_entry (entry, &entries->list, list) {
- tmp = entry_copy (entry);
- gf_log (this->name, GF_LOG_TRACE,
- "adding entry=%s, count=%d",
- tmp->d_name, count);
- list_add_tail (&tmp->list, &(args->entries.list));
- count++;
- }
- }
+syncop_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
- __wake (args);
+ int count = 0;
- return 0;
+ args = cookie;
+
+ INIT_LIST_HEAD(&args->entries.list);
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ tmp = entry_copy(entry);
+ if (!tmp) {
+ args->op_ret = -1;
+ args->op_errno = ENOMEM;
+ gf_dirent_free(&(args->entries));
+ break;
+ }
+ gf_msg_trace(this->name, 0,
+ "adding "
+ "entry=%s, count=%d",
+ tmp->d_name, count);
+ list_add_tail(&tmp->list, &(args->entries.list));
+ count++;
+ }
+ }
+ __wake(args);
+
+ return 0;
}
int
-syncop_readdir (xlator_t *subvol,
- fd_t *fd,
- size_t size,
- off_t off,
- gf_dirent_t *entries)
+syncop_readdir(xlator_t *subvol, fd_t *fd, size_t size, off_t off,
+ gf_dirent_t *entries, dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_readdir_cbk, subvol->fops->readdir,
- fd, size, off, NULL);
+ SYNCOP(subvol, (&args), syncop_readdir_cbk, subvol->fops->readdir, fd, size,
+ off, xdata_in);
- if (entries)
- list_splice_init (&args.entries.list, &entries->list);
- /* TODO: need to free all the 'args.entries' in 'else' case */
+ if (entries)
+ list_splice_init(&args.entries.list, &entries->list);
+ else
+ gf_dirent_free(&args.entries);
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int32_t
-syncop_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd, dict_t *xdata)
+syncop_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ __wake(args);
- return 0;
+ return 0;
}
int
-syncop_opendir (xlator_t *subvol,
- loc_t *loc,
- fd_t *fd)
+syncop_opendir(xlator_t *subvol, loc_t *loc, fd_t *fd, dict_t *xdata_in,
+ dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_opendir_cbk, subvol->fops->opendir,
- loc, fd, NULL);
+ SYNCOP(subvol, (&args), syncop_opendir_cbk, subvol->fops->opendir, loc, fd,
+ xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+syncop_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_fsyncdir(xlator_t *subvol, fd_t *fd, int datasync, dict_t *xdata_in,
+ dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_fsyncdir_cbk, subvol->fops->fsyncdir, fd,
+ datasync, xdata_in);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_removexattr(xlator_t *subvol, loc_t *loc, const char *name,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_removexattr_cbk, subvol->fops->removexattr,
+ loc, name, xdata_in);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ __wake(args);
- return 0;
+ return 0;
}
int
-syncop_removexattr (xlator_t *subvol, loc_t *loc, const char *name)
+syncop_fremovexattr(xlator_t *subvol, fd_t *fd, const char *name,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_removexattr_cbk, subvol->fops->removexattr,
- loc, name, NULL);
+ SYNCOP(subvol, (&args), syncop_fremovexattr_cbk, subvol->fops->fremovexattr,
+ fd, name, xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+syncop_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ __wake(args);
- return 0;
+ return 0;
}
int
-syncop_fremovexattr (xlator_t *subvol, fd_t *fd, const char *name)
+syncop_setxattr(xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_fremovexattr_cbk,
- subvol->fops->fremovexattr, fd, name, NULL);
+ SYNCOP(subvol, (&args), syncop_setxattr_cbk, subvol->fops->setxattr, loc,
+ dict, flags, xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+syncop_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ __wake(args);
- return 0;
+ return 0;
}
+int
+syncop_fsetxattr(xlator_t *subvol, fd_t *fd, dict_t *dict, int32_t flags,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_fsetxattr_cbk, subvol->fops->fsetxattr, fd,
+ dict, flags, xdata_in);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
int
-syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags)
+syncop_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
{
- struct syncargs args = {0, };
+ struct syncargs *args = NULL;
- SYNCOP (subvol, (&args), syncop_setxattr_cbk, subvol->fops->setxattr,
- loc, dict, flags, NULL);
+ args = cookie;
- errno = args.op_errno;
- return args.op_ret;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret >= 0)
+ args->xattr = dict_ref(dict);
+
+ __wake(args);
+
+ return 0;
}
int
-syncop_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+syncop_listxattr(xlator_t *subvol, loc_t *loc, dict_t **dict, dict_t *xdata_in,
+ dict_t **xdata_out)
{
- struct syncargs *args = NULL;
+ struct syncargs args = {
+ 0,
+ };
- args = cookie;
+ SYNCOP(subvol, (&args), syncop_getxattr_cbk, subvol->fops->getxattr, loc,
+ NULL, xdata_in);
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ if (dict)
+ *dict = args.xattr;
+ else if (args.xattr)
+ dict_unref(args.xattr);
- __wake (args);
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- return 0;
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
+int
+syncop_getxattr(xlator_t *subvol, loc_t *loc, dict_t **dict, const char *key,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_getxattr_cbk, subvol->fops->getxattr, loc,
+ key, xdata_in);
+
+ if (dict)
+ *dict = args.xattr;
+ else if (args.xattr)
+ dict_unref(args.xattr);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
int
-syncop_fsetxattr (xlator_t *subvol, fd_t *fd, dict_t *dict, int32_t flags)
+syncop_fgetxattr(xlator_t *subvol, fd_t *fd, dict_t **dict, const char *key,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_fsetxattr_cbk, subvol->fops->fsetxattr,
- fd, dict, flags, NULL);
+ SYNCOP(subvol, (&args), syncop_getxattr_cbk, subvol->fops->fgetxattr, fd,
+ key, xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (dict)
+ *dict = args.xattr;
+ else if (args.xattr)
+ dict_unref(args.xattr);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+syncop_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
- if (op_ret >= 0)
- args->xattr = dict_ref (dict);
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ if (op_ret == 0) {
+ args->statvfs_buf = *buf;
+ }
- return 0;
+ __wake(args);
+
+ return 0;
}
int
-syncop_listxattr (xlator_t *subvol, loc_t *loc, dict_t **dict)
+syncop_statfs(xlator_t *subvol, loc_t *loc, struct statvfs *buf,
+ dict_t *xdata_in, dict_t **xdata_out)
+
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->getxattr,
- loc, NULL, NULL);
+ SYNCOP(subvol, (&args), syncop_statfs_cbk, subvol->fops->statfs, loc,
+ xdata_in);
- if (dict)
- *dict = args.xattr;
- else if (args.xattr)
- dict_unref (args.xattr);
+ if (buf)
+ *buf = args.statvfs_buf;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- errno = args.op_errno;
- return args.op_ret;
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_getxattr (xlator_t *subvol, loc_t *loc, dict_t **dict, const char *key)
+syncop_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preop,
+ struct iatt *postop, dict_t *xdata)
{
- struct syncargs args = {0, };
+ struct syncargs *args = NULL;
- SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->getxattr,
- loc, key, NULL);
+ args = cookie;
- if (dict)
- *dict = args.xattr;
- else if (args.xattr)
- dict_unref (args.xattr);
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- errno = args.op_errno;
- return args.op_ret;
+ if (op_ret == 0) {
+ args->iatt1 = *preop;
+ args->iatt2 = *postop;
+ }
+
+ __wake(args);
+
+ return 0;
}
int
-syncop_fgetxattr (xlator_t *subvol, fd_t *fd, dict_t **dict, const char *key)
+syncop_setattr(xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata_in,
+ dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_getxattr_cbk, subvol->fops->fgetxattr,
- fd, key, NULL);
+ SYNCOP(subvol, (&args), syncop_setattr_cbk, subvol->fops->setattr, loc,
+ iatt, valid, xdata_in);
- if (dict)
- *dict = args.xattr;
- else if (args.xattr)
- dict_unref (args.xattr);
+ if (preop)
+ *preop = args.iatt1;
+ if (postop)
+ *postop = args.iatt2;
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct statvfs *buf, dict_t *xdata)
+syncop_fsetattr(xlator_t *subvol, fd_t *fd, struct iatt *iatt, int valid,
+ struct iatt *preop, struct iatt *postop, dict_t *xdata_in,
+ dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_setattr_cbk, subvol->fops->fsetattr, fd,
+ iatt, valid, xdata_in);
+ if (preop)
+ *preop = args.iatt1;
+ if (postop)
+ *postop = args.iatt2;
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
+
+int32_t
+syncop_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- if (op_ret == 0) {
- args->statvfs_buf = *buf;
- }
+ __wake(args);
- __wake (args);
+ return 0;
+}
- return 0;
+int
+syncop_open(xlator_t *subvol, loc_t *loc, int32_t flags, fd_t *fd,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_open_cbk, subvol->fops->open, loc, flags, fd,
+ xdata_in);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
+
+int32_t
+syncop_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ INIT_LIST_HEAD(&args->entries.list);
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (args->op_ret >= 0) {
+ if (iobref)
+ args->iobref = iobref_ref(iobref);
+ args->vector = iov_dup(vector, count);
+ args->count = count;
+ args->iatt1 = *stbuf;
+ }
+
+ __wake(args);
+
+ return 0;
}
+int
+syncop_readv(xlator_t *subvol, fd_t *fd, size_t size, off_t off, uint32_t flags,
+ struct iovec **vector, int *count, struct iobref **iobref,
+ struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_readv_cbk, subvol->fops->readv, fd, size,
+ off, flags, xdata_in);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (iatt)
+ *iatt = args.iatt1;
+
+ if (args.op_ret < 0)
+ goto out;
+
+ if (vector)
+ *vector = args.vector;
+ else
+ GF_FREE(args.vector);
+
+ if (count)
+ *count = args.count;
+
+ /* Do we need a 'ref' here? */
+ if (iobref)
+ *iobref = args.iobref;
+ else if (args.iobref)
+ iobref_unref(args.iobref);
+
+out:
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
int
-syncop_statfs (xlator_t *subvol, loc_t *loc, struct statvfs *buf)
+syncop_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ args->iatt1 = *prebuf;
+ args->iatt2 = *postbuf;
+ }
+
+ __wake(args);
+ return 0;
+}
+
+int
+syncop_writev(xlator_t *subvol, fd_t *fd, const struct iovec *vector,
+ int32_t count, off_t offset, struct iobref *iobref,
+ uint32_t flags, struct iatt *preiatt, struct iatt *postiatt,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_statfs_cbk, subvol->fops->statfs,
- loc, NULL);
+ SYNCOP(subvol, (&args), syncop_writev_cbk, subvol->fops->writev, fd,
+ (struct iovec *)vector, count, offset, flags, iobref, xdata_in);
- if (buf)
- *buf = args.statvfs_buf;
+ if (preiatt)
+ *preiatt = args.iatt1;
+ if (postiatt)
+ *postiatt = args.iatt2;
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
+syncop_write(xlator_t *subvol, fd_t *fd, const char *buf, int size,
+ off_t offset, struct iobref *iobref, uint32_t flags,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs *args = NULL;
+ struct syncargs args = {
+ 0,
+ };
+ struct iovec vec = {
+ 0,
+ };
- args = cookie;
+ vec.iov_len = size;
+ vec.iov_base = (void *)buf;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ SYNCOP(subvol, (&args), syncop_writev_cbk, subvol->fops->writev, fd, &vec,
+ 1, offset, flags, iobref, xdata_in);
- if (op_ret == 0) {
- args->iatt1 = *preop;
- args->iatt2 = *postop;
- }
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- __wake (args);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
- return 0;
+int
+syncop_close(fd_t *fd)
+{
+ if (fd)
+ fd_unref(fd);
+ return 0;
}
+int32_t
+syncop_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (buf)
+ args->iatt1 = *buf;
+
+ __wake(args);
+
+ return 0;
+}
int
-syncop_setattr (xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid,
- struct iatt *preop, struct iatt *postop)
+syncop_create(xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode,
+ fd_t *fd, struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_setattr_cbk, subvol->fops->setattr,
- loc, iatt, valid, NULL);
+ SYNCOP(subvol, (&args), syncop_create_cbk, subvol->fops->create, loc, flags,
+ mode, 0, fd, xdata_in);
- if (preop)
- *preop = args.iatt1;
- if (postop)
- *postop = args.iatt2;
+ if (iatt)
+ *iatt = args.iatt1;
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
+int32_t
+syncop_put_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (buf)
+ args->iatt1 = *buf;
+
+ __wake(args);
+
+ return 0;
+}
int
-syncop_fsetattr (xlator_t *subvol, fd_t *fd, struct iatt *iatt, int valid,
- struct iatt *preop, struct iatt *postop)
+syncop_put(xlator_t *subvol, loc_t *loc, mode_t mode, mode_t umask,
+ uint32_t flags, struct iovec *vector, int32_t count, off_t offset,
+ struct iobref *iobref, dict_t *xattr, struct iatt *iatt,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_setattr_cbk, subvol->fops->fsetattr,
- fd, iatt, valid, NULL);
+ SYNCOP(subvol, (&args), syncop_put_cbk, subvol->fops->put, loc, mode, umask,
+ flags, (struct iovec *)vector, count, offset, iobref, xattr,
+ xdata_in);
- if (preop)
- *preop = args.iatt1;
- if (postop)
- *postop = args.iatt2;
+ if (iatt)
+ *iatt = args.iatt1;
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
+int
+syncop_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
-int32_t
-syncop_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_unlink(xlator_t *subvol, loc_t *loc, dict_t *xdata_in,
+ dict_t **xdata_out)
{
- struct syncargs *args = NULL;
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_unlink_cbk, subvol->fops->unlink, loc, 0,
+ xdata_in);
- args = cookie;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
- if (op_ret != -1)
- fd_ref (fd);
+int
+syncop_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
- __wake (args);
+ args = cookie;
- return 0;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
}
int
-syncop_open (xlator_t *subvol, loc_t *loc, int32_t flags, fd_t *fd)
+syncop_rmdir(xlator_t *subvol, loc_t *loc, int flags, dict_t *xdata_in,
+ dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_open_cbk, subvol->fops->open,
- loc, flags, fd, NULL);
+ SYNCOP(subvol, (&args), syncop_rmdir_cbk, subvol->fops->rmdir, loc, flags,
+ xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
+int
+syncop_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
-int32_t
-syncop_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (buf)
+ args->iatt1 = *buf;
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_link(xlator_t *subvol, loc_t *oldloc, loc_t *newloc, struct iatt *iatt,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_link_cbk, subvol->fops->link, oldloc, newloc,
+ xdata_in);
+
+ if (iatt)
+ *iatt = args.iatt1;
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+
+ return args.op_ret;
+}
+
+int
+syncop_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- INIT_LIST_HEAD (&args->entries.list);
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ __wake(args);
- if (args->op_ret >= 0) {
- if (iobref)
- args->iobref = iobref_ref (iobref);
- args->vector = iov_dup (vector, count);
- args->count = count;
- }
+ return 0;
+}
- __wake (args);
+int
+syncop_rename(xlator_t *subvol, loc_t *oldloc, loc_t *newloc, dict_t *xdata_in,
+ dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
- return 0;
+ SYNCOP(subvol, (&args), syncop_rename_cbk, subvol->fops->rename, oldloc,
+ newloc, xdata_in);
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+
+ return args.op_ret;
}
int
-syncop_readv (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
- uint32_t flags, struct iovec **vector, int *count,
- struct iobref **iobref)
+syncop_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- struct syncargs args = {0, };
+ struct syncargs *args = NULL;
- SYNCOP (subvol, (&args), syncop_readv_cbk, subvol->fops->readv,
- fd, size, off, flags, NULL);
+ args = cookie;
- if (vector)
- *vector = args.vector;
- else if (args.vector)
- GF_FREE (args.vector);
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- if (count)
- *count = args.count;
+ if (op_ret >= 0) {
+ args->iatt1 = *prebuf;
+ args->iatt2 = *postbuf;
+ }
- /* Do we need a 'ref' here? */
- if (iobref)
- *iobref = args.iobref;
- else if (args.iobref)
- iobref_unref (args.iobref);
+ __wake(args);
- errno = args.op_errno;
- return args.op_ret;
+ return 0;
+}
+
+int
+syncop_ftruncate(xlator_t *subvol, fd_t *fd, off_t offset, struct iatt *preiatt,
+ struct iatt *postiatt, dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_ftruncate_cbk, subvol->fops->ftruncate, fd,
+ offset, xdata_in);
+ if (preiatt)
+ *preiatt = args.iatt1;
+ if (postiatt)
+ *postiatt = args.iatt2;
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+syncop_truncate(xlator_t *subvol, loc_t *loc, off_t offset, dict_t *xdata_in,
+ dict_t **xdata_out)
{
- struct syncargs *args = NULL;
+ struct syncargs args = {
+ 0,
+ };
- args = cookie;
+ SYNCOP(subvol, (&args), syncop_ftruncate_cbk, subvol->fops->truncate, loc,
+ offset, xdata_in);
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- __wake (args);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
- return 0;
+int
+syncop_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ args->iatt1 = *prebuf;
+ args->iatt2 = *postbuf;
+ }
+
+ __wake(args);
+
+ return 0;
}
int
-syncop_writev (xlator_t *subvol, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, struct iobref *iobref,
- uint32_t flags)
+syncop_fsync(xlator_t *subvol, fd_t *fd, int dataonly, struct iatt *preiatt,
+ struct iatt *postiatt, dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_writev_cbk, subvol->fops->writev,
- fd, vector, count, offset, flags, iobref, NULL);
+ SYNCOP(subvol, (&args), syncop_fsync_cbk, subvol->fops->fsync, fd, dataonly,
+ xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (preiatt)
+ *preiatt = args.iatt1;
+ if (postiatt)
+ *postiatt = args.iatt2;
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
-int syncop_write (xlator_t *subvol, fd_t *fd, const char *buf, int size,
- off_t offset, struct iobref *iobref, uint32_t flags)
+int
+syncop_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- struct syncargs args = {0,};
- struct iovec vec = {0,};
+ struct syncargs *args = NULL;
- vec.iov_len = size;
- vec.iov_base = (void *)buf;
+ args = cookie;
- SYNCOP (subvol, (&args), syncop_writev_cbk, subvol->fops->writev,
- fd, &vec, 1, offset, flags, iobref, NULL);
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- errno = args.op_errno;
- return args.op_ret;
+ __wake(args);
+
+ return 0;
}
+int
+syncop_flush(xlator_t *subvol, fd_t *fd, dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {0};
+
+ SYNCOP(subvol, (&args), syncop_flush_cbk, subvol->fops->flush, fd,
+ xdata_in);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
int
-syncop_close (fd_t *fd)
+syncop_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ dict_t *xdata)
{
- if (fd)
- fd_unref (fd);
- return 0;
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret == 0)
+ args->iatt1 = *stbuf;
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_fstat(xlator_t *subvol, fd_t *fd, struct iatt *stbuf, dict_t *xdata_in,
+ dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_fstat_cbk, subvol->fops->fstat, fd,
+ xdata_in);
+
+ if (stbuf)
+ *stbuf = args.iatt1;
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_stat(xlator_t *subvol, loc_t *loc, struct iatt *stbuf, dict_t *xdata_in,
+ dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_fstat_cbk, subvol->fops->stat, loc,
+ xdata_in);
+
+ if (stbuf)
+ *stbuf = args.iatt1;
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int32_t
-syncop_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+syncop_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- if (op_ret != -1)
- fd_ref (fd);
+ if (buf)
+ args->iatt1 = *buf;
- __wake (args);
+ __wake(args);
- return 0;
+ return 0;
}
int
-syncop_create (xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *xdata)
+syncop_symlink(xlator_t *subvol, loc_t *loc, const char *newpath,
+ struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_create_cbk, subvol->fops->create,
- loc, flags, mode, 0, fd, xdata);
+ SYNCOP(subvol, (&args), syncop_symlink_cbk, subvol->fops->symlink, newpath,
+ loc, 0, xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (iatt)
+ *iatt = args.iatt1;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+syncop_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, const char *path,
+ struct iatt *stbuf, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ if ((op_ret != -1) && path)
+ args->buffer = gf_strdup(path);
- return 0;
+ __wake(args);
+
+ return 0;
}
int
-syncop_unlink (xlator_t *subvol, loc_t *loc)
+syncop_readlink(xlator_t *subvol, loc_t *loc, char **buffer, size_t size,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_unlink_cbk, subvol->fops->unlink, loc,
- 0, NULL);
+ SYNCOP(subvol, (&args), syncop_readlink_cbk, subvol->fops->readlink, loc,
+ size, xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (buffer)
+ *buffer = args.buffer;
+ else
+ GF_FREE(args.buffer);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
+syncop_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ if (buf)
+ args->iatt1 = *buf;
- return 0;
+ __wake(args);
+
+ return 0;
}
+int
+syncop_mknod(xlator_t *subvol, loc_t *loc, mode_t mode, dev_t rdev,
+ struct iatt *iatt, dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_mknod_cbk, subvol->fops->mknod, loc, mode,
+ rdev, 0, xdata_in);
+
+ if (iatt)
+ *iatt = args.iatt1;
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
int
-syncop_link (xlator_t *subvol, loc_t *oldloc, loc_t *newloc)
+syncop_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- struct syncargs args = {0, };
+ struct syncargs *args = NULL;
- SYNCOP (subvol, (&args), syncop_link_cbk, subvol->fops->link,
- oldloc, newloc, NULL);
+ args = cookie;
- errno = args.op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- return args.op_ret;
+ if (buf)
+ args->iatt1 = *buf;
+
+ __wake(args);
+
+ return 0;
}
int
-syncop_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+syncop_mkdir(xlator_t *subvol, loc_t *loc, mode_t mode, struct iatt *iatt,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs *args = NULL;
+ struct syncargs args = {
+ 0,
+ };
- args = cookie;
+ SYNCOP(subvol, (&args), syncop_mkdir_cbk, subvol->fops->mkdir, loc, mode, 0,
+ xdata_in);
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ if (iatt)
+ *iatt = args.iatt1;
- __wake (args);
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- return 0;
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_ftruncate (xlator_t *subvol, fd_t *fd, off_t offset)
+syncop_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- struct syncargs args = {0, };
+ struct syncargs *args = NULL;
- SYNCOP (subvol, (&args), syncop_ftruncate_cbk, subvol->fops->ftruncate,
- fd, offset, NULL);
+ args = cookie;
- errno = args.op_errno;
- return args.op_ret;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
}
+/* posix_acl xlator will respond in different ways for access calls from
+ fuse and access calls from nfs. For fuse, checking op_ret is sufficient
+ to check whether the access call is successful or not. But for nfs the
+ mode of the access that is permitted is put into op_errno before unwind.
+ With syncop, the caller of syncop_access will not be able to get the
+ mode of the access despite call being successul (since syncop_access
+ returns only the op_ret collected in args).
+ Now, if access call is failed, then args.op_ret is returned to recognise
+ the failure. But if op_ret is zero, then the mode of access which is
+ set in args.op_errno is returned. Thus the caller of syncop_access
+ has to check whether the return value is less than zero or not. If the
+ return value it got is less than zero, then access call is failed.
+ If it is not, then the access call is successful and the value the caller
+ got is the mode of the access.
+*/
int
-syncop_truncate (xlator_t *subvol, loc_t *loc, off_t offset)
+syncop_access(xlator_t *subvol, loc_t *loc, int32_t mask, dict_t *xdata_in,
+ dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_ftruncate_cbk, subvol->fops->truncate,
- loc, offset, NULL);
+ SYNCOP(subvol, (&args), syncop_access_cbk, subvol->fops->access, loc, mask,
+ xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_errno;
}
int
-syncop_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+syncop_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ __wake(args);
- return 0;
+ return 0;
+}
+
+int
+syncop_fallocate(xlator_t *subvol, fd_t *fd, int32_t keep_size, off_t offset,
+ size_t len, dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_fallocate_cbk, subvol->fops->fallocate, fd,
+ keep_size, offset, len, xdata_in);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_fsync (xlator_t *subvol, fd_t *fd)
+syncop_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- struct syncargs args = {0, };
+ struct syncargs *args = NULL;
- SYNCOP (subvol, (&args), syncop_fsync_cbk, subvol->fops->fsync,
- fd, 0, NULL);
+ args = cookie;
- errno = args.op_errno;
- return args.op_ret;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ __wake(args);
+
+ return 0;
}
int
-syncop_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf, dict_t *xdata)
+syncop_discard(xlator_t *subvol, fd_t *fd, off_t offset, size_t len,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs *args = NULL;
+ struct syncargs args = {
+ 0,
+ };
- args = cookie;
+ SYNCOP(subvol, (&args), syncop_discard_cbk, subvol->fops->discard, fd,
+ offset, len, xdata_in);
- args->op_ret = op_ret;
- args->op_errno = op_errno;
- if (op_ret == 0)
- args->iatt1 = *stbuf;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- __wake (args);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
- return 0;
+int
+syncop_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
}
int
-syncop_fstat (xlator_t *subvol, fd_t *fd, struct iatt *stbuf)
+syncop_zerofill(xlator_t *subvol, fd_t *fd, off_t offset, off_t len,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_fstat_cbk, subvol->fops->fstat,
- fd, NULL);
+ SYNCOP(subvol, (&args), syncop_zerofill_cbk, subvol->fops->zerofill, fd,
+ offset, len, xdata_in);
- if (stbuf)
- *stbuf = args.iatt1;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- errno = args.op_errno;
- return args.op_ret;
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_ipc(xlator_t *subvol, int32_t op, dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_ipc_cbk, subvol->fops->ipc, op, xdata_in);
+
+ if (args.xdata) {
+ if (xdata_out) {
+ /*
+ * We're passing this reference to the caller, along
+ * with the pointer itself. That means they're
+ * responsible for calling dict_unref at some point.
+ */
+ *xdata_out = args.xdata;
+ } else {
+ dict_unref(args.xdata);
+ }
+ }
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int
-syncop_stat (xlator_t *subvol, loc_t *loc, struct iatt *stbuf)
+syncop_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, off_t offset, dict_t *xdata)
{
- struct syncargs args = {0, };
+ struct syncargs *args = NULL;
+
+ args = cookie;
- SYNCOP (subvol, (&args), syncop_fstat_cbk, subvol->fops->stat,
- loc, NULL);
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ args->offset = offset;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- if (stbuf)
- *stbuf = args.iatt1;
+ __wake(args);
- errno = args.op_errno;
+ return 0;
+}
+
+int
+syncop_seek(xlator_t *subvol, fd_t *fd, off_t offset, gf_seek_what_t what,
+ dict_t *xdata_in, off_t *off)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_seek_cbk, subvol->fops->seek, fd, offset,
+ what, xdata_in);
+
+ if (args.op_ret < 0) {
+ return -args.op_errno;
+ } else {
+ if (off)
+ *off = args.offset;
return args.op_ret;
+ }
+}
+
+int
+syncop_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct gf_lease *lease, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ if (lease)
+ args->lease = *lease;
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_lease(xlator_t *subvol, loc_t *loc, struct gf_lease *lease,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_lease_cbk, subvol->fops->lease, loc, lease,
+ xdata_in);
+
+ *lease = args.lease;
+
+ if (args.xdata) {
+ if (xdata_out) {
+ /*
+ * We're passing this reference to the caller, along
+ * with the pointer itself. That means they're
+ * responsible for calling dict_unref at some point.
+ */
+ *xdata_out = args.xdata;
+ } else {
+ dict_unref(args.xdata);
+ }
+ }
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct gf_flock *flock, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (flock)
+ args->flock = *flock;
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_lk(xlator_t *subvol, fd_t *fd, int cmd, struct gf_flock *flock,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_lk_cbk, subvol->fops->lk, fd, cmd, flock,
+ xdata_in);
+
+ *flock = args.flock;
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+ if (args.op_ret < 0)
+ return -args.op_errno;
+ return args.op_ret;
}
int32_t
-syncop_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+syncop_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- struct syncargs *args = NULL;
+ struct syncargs *args = NULL;
- args = cookie;
+ args = cookie;
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
- __wake (args);
+ __wake(args);
- return 0;
+ return 0;
}
int
-syncop_symlink (xlator_t *subvol, loc_t *loc, char *newpath, dict_t *dict)
+syncop_inodelk(xlator_t *subvol, const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_symlink_cbk, subvol->fops->symlink,
- newpath, loc, 0, dict);
+ SYNCOP(subvol, (&args), syncop_inodelk_cbk, subvol->fops->inodelk, volume,
+ loc, cmd, lock, xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+
+ return args.op_ret;
+}
+
+int32_t
+syncop_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ __wake(args);
+ return 0;
}
int
-syncop_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, const char *path,
- struct iatt *stbuf, dict_t *xdata)
+syncop_entrylk(xlator_t *subvol, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata_in, dict_t **xdata_out)
{
- struct syncargs *args = NULL;
+ struct syncargs args = {
+ 0,
+ };
- args = cookie;
+ SYNCOP(subvol, (&args), syncop_entrylk_cbk, subvol->fops->entrylk, volume,
+ loc, basename, cmd, type, xdata_in);
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- if ((op_ret != -1) && path)
- args->buffer = gf_strdup (path);
+ if (args.op_ret < 0)
+ return -args.op_errno;
- __wake (args);
+ return args.op_ret;
+}
- return 0;
+int32_t
+syncop_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+ if (dict)
+ args->dict_out = dict_ref(dict);
+
+ __wake(args);
+
+ return 0;
}
int
-syncop_readlink (xlator_t *subvol, loc_t *loc, char **buffer, size_t size)
+syncop_xattrop(xlator_t *subvol, loc_t *loc, gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata_in, dict_t **dict_out,
+ dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_readlink_cbk, subvol->fops->readlink,
- loc, size, NULL);
+ SYNCOP(subvol, (&args), syncop_xattrop_cbk, subvol->fops->xattrop, loc,
+ flags, dict, xdata_in);
- if (buffer)
- *buffer = args.buffer;
- else if (args.buffer)
- GF_FREE (args.buffer);
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- errno = args.op_errno;
- return args.op_ret;
+ if (dict_out)
+ *dict_out = args.dict_out;
+ else if (args.dict_out)
+ dict_unref(args.dict_out);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+
+ return args.op_ret;
}
int
-syncop_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+syncop_fxattrop(xlator_t *subvol, fd_t *fd, gf_xattrop_flags_t flags,
+ dict_t *dict, dict_t *xdata_in, dict_t **dict_out,
+ dict_t **xdata_out)
{
- struct syncargs *args = NULL;
+ struct syncargs args = {
+ 0,
+ };
- args = cookie;
+ SYNCOP(subvol, (&args), syncop_xattrop_cbk, subvol->fops->fxattrop, fd,
+ flags, dict, xdata_in);
- args->op_ret = op_ret;
- args->op_errno = op_errno;
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
- __wake (args);
+ if (dict_out)
+ *dict_out = args.dict_out;
+ else if (args.dict_out)
+ dict_unref(args.dict_out);
- return 0;
+ if (args.op_ret < 0)
+ return -args.op_errno;
+
+ return args.op_ret;
+}
+
+int32_t
+syncop_getactivelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ lock_migration_info_t *locklist, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+ lock_migration_info_t *tmp = NULL;
+ lock_migration_info_t *entry = NULL;
+
+ args = cookie;
+
+ INIT_LIST_HEAD(&args->locklist.list);
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret > 0) {
+ list_for_each_entry(tmp, &locklist->list, list)
+ {
+ entry = GF_CALLOC(1, sizeof(lock_migration_info_t),
+ gf_common_mt_char);
+
+ if (!entry) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, 0,
+ "lock mem allocation failed");
+ gf_free_mig_locks(&args->locklist);
+
+ break;
+ }
+
+ INIT_LIST_HEAD(&entry->list);
+
+ entry->flock = tmp->flock;
+
+ entry->lk_flags = tmp->lk_flags;
+
+ entry->client_uid = gf_strdup(tmp->client_uid);
+
+ list_add_tail(&entry->list, &args->locklist.list);
+ }
+ }
+
+ __wake(args);
+
+ return 0;
}
int
-syncop_mknod (xlator_t *subvol, loc_t *loc, mode_t mode, dev_t rdev,
- dict_t *dict)
+syncop_getactivelk(xlator_t *subvol, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata_in,
+ dict_t **xdata_out)
{
- struct syncargs args = {0, };
+ struct syncargs args = {
+ 0,
+ };
- SYNCOP (subvol, (&args), syncop_mknod_cbk, subvol->fops->mknod,
- loc, mode, rdev, 0, dict);
+ SYNCOP(subvol, (&args), syncop_getactivelk_cbk, subvol->fops->getactivelk,
+ loc, xdata_in);
- errno = args.op_errno;
- return args.op_ret;
+ if (locklist)
+ list_splice_init(&args.locklist.list, &locklist->list);
+ else
+ gf_free_mig_locks(&args.locklist);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+
+ return args.op_ret;
+}
+
+int
+syncop_setactivelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_setactivelk(xlator_t *subvol, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata_in,
+ dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_setactivelk_cbk, subvol->fops->setactivelk,
+ loc, locklist, xdata_in);
+
+ if (xdata_out)
+ *xdata_out = args.xdata;
+ else if (args.xdata)
+ dict_unref(args.xdata);
+
+ if (args.op_ret < 0)
+ return -args.op_errno;
+
+ return args.op_ret;
+}
+
+int
+syncop_icreate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (buf)
+ args->iatt1 = *buf;
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_namelink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ __wake(args);
+
+ return 0;
+}
+
+int
+syncop_copy_file_range(xlator_t *subvol, fd_t *fd_in, off64_t off_in,
+ fd_t *fd_out, off64_t off_out, size_t len,
+ uint32_t flags, struct iatt *stbuf,
+ struct iatt *preiatt_dst, struct iatt *postiatt_dst,
+ dict_t *xdata_in, dict_t **xdata_out)
+{
+ struct syncargs args = {
+ 0,
+ };
+
+ SYNCOP(subvol, (&args), syncop_copy_file_range_cbk,
+ subvol->fops->copy_file_range, fd_in, off_in, fd_out, off_out, len,
+ flags, xdata_in);
+
+ if (stbuf) {
+ *stbuf = args.iatt1;
+ }
+ if (preiatt_dst) {
+ *preiatt_dst = args.iatt2;
+ }
+ if (postiatt_dst) {
+ *postiatt_dst = args.iatt3;
+ }
+
+ if (xdata_out) {
+ *xdata_out = args.xdata;
+ } else if (args.xdata) {
+ dict_unref(args.xdata);
+ }
+
+ errno = args.op_errno;
+ return args.op_ret;
+}
+
+int
+syncop_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *stbuf,
+ struct iatt *prebuf_dst, struct iatt *postbuf_dst,
+ dict_t *xdata)
+{
+ struct syncargs *args = NULL;
+
+ args = cookie;
+
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+ if (xdata)
+ args->xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ args->iatt1 = *stbuf;
+ args->iatt2 = *prebuf_dst;
+ args->iatt3 = *postbuf_dst;
+ }
+
+ __wake(args);
+ return 0;
}
diff --git a/libglusterfs/src/syncop.h b/libglusterfs/src/syncop.h
deleted file mode 100644
index 726e8c49a0d..00000000000
--- a/libglusterfs/src/syncop.h
+++ /dev/null
@@ -1,205 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _SYNCOP_H
-#define _SYNCOP_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include <sys/time.h>
-#include <pthread.h>
-#include <ucontext.h>
-
-#define SYNCENV_PROC_MAX 16
-#define SYNCENV_PROC_MIN 2
-
-struct synctask;
-struct syncproc;
-struct syncenv;
-
-
-typedef int (*synctask_cbk_t) (int ret, call_frame_t *frame, void *opaque);
-
-typedef int (*synctask_fn_t) (void *opaque);
-
-
-typedef enum {
- SYNCTASK_INIT = 0,
- SYNCTASK_RUN,
- SYNCTASK_SUSPEND,
- SYNCTASK_WAIT,
- SYNCTASK_DONE,
-} synctask_state_t;
-
-/* for one sequential execution of @syncfn */
-struct synctask {
- struct list_head all_tasks;
- struct syncenv *env;
- xlator_t *xl;
- call_frame_t *frame;
- call_frame_t *opframe;
- synctask_cbk_t synccbk;
- synctask_fn_t syncfn;
- synctask_state_t state;
- void *opaque;
- void *stack;
- int woken;
- int slept;
- int ret;
-
- ucontext_t ctx;
- struct syncproc *proc;
-
- pthread_mutex_t mutex; /* for synchronous spawning of synctask */
- pthread_cond_t cond;
- int done;
-};
-
-
-struct syncproc {
- pthread_t processor;
- ucontext_t sched;
- struct syncenv *env;
- struct synctask *current;
-};
-
-/* hosts the scheduler thread and framework for executing synctasks */
-struct syncenv {
- struct syncproc proc[SYNCENV_PROC_MAX];
- int procs;
-
- struct list_head runq;
- int runcount;
- struct list_head waitq;
- int waitcount;
-
- pthread_mutex_t mutex;
- pthread_cond_t cond;
-
- size_t stacksize;
-};
-
-
-struct syncargs {
- int op_ret;
- int op_errno;
- struct iatt iatt1;
- struct iatt iatt2;
- dict_t *xattr;
- gf_dirent_t entries;
- struct statvfs statvfs_buf;
- struct iovec *vector;
- int count;
- struct iobref *iobref;
- char *buffer;
- dict_t *xdata;
-
- /* some more _cbk needs */
- uuid_t uuid;
- char *errstr;
- dict_t *dict;
-
- /* do not touch */
- struct synctask *task;
-};
-
-#define __wake(args) synctask_wake(args->task)
-
-
-#define SYNCOP(subvol, stb, cbk, op, params ...) do { \
- struct synctask *task = NULL; \
- \
- task = synctask_get (); \
- stb->task = task; \
- \
- STACK_WIND_COOKIE (task->opframe, cbk, (void *)stb, \
- subvol, op, params); \
- task->state = SYNCTASK_SUSPEND; \
- synctask_yield (stb->task); \
- STACK_RESET (task->opframe->root); \
- } while (0)
-
-
-#define SYNCENV_DEFAULT_STACKSIZE (2 * 1024 * 1024)
-
-struct syncenv * syncenv_new ();
-void syncenv_destroy (struct syncenv *);
-void syncenv_scale (struct syncenv *env);
-
-int synctask_new (struct syncenv *, synctask_fn_t, synctask_cbk_t, call_frame_t* frame, void *);
-void synctask_wake (struct synctask *task);
-void synctask_yield (struct synctask *task);
-
-int syncop_lookup (xlator_t *subvol, loc_t *loc, dict_t *xattr_req,
- /* out */
- struct iatt *iatt, dict_t **xattr_rsp, struct iatt *parent);
-
-int syncop_readdirp (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
- dict_t *dict,
- /* out */
- gf_dirent_t *entries);
-
-int syncop_readdir (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
- gf_dirent_t *entries);
-
-int syncop_opendir (xlator_t *subvol, loc_t *loc, fd_t *fd);
-
-int syncop_setattr (xlator_t *subvol, loc_t *loc, struct iatt *iatt, int valid,
- /* out */
- struct iatt *preop, struct iatt *postop);
-
-int syncop_fsetattr (xlator_t *subvol, fd_t *fd, struct iatt *iatt, int valid,
- /* out */
- struct iatt *preop, struct iatt *postop);
-
-int syncop_statfs (xlator_t *subvol, loc_t *loc, struct statvfs *buf);
-
-int syncop_setxattr (xlator_t *subvol, loc_t *loc, dict_t *dict, int32_t flags);
-int syncop_fsetxattr (xlator_t *subvol, fd_t *fd, dict_t *dict, int32_t flags);
-int syncop_listxattr (xlator_t *subvol, loc_t *loc, dict_t **dict);
-int syncop_getxattr (xlator_t *xl, loc_t *loc, dict_t **dict, const char *key);
-int syncop_fgetxattr (xlator_t *xl, fd_t *fd, dict_t **dict, const char *key);
-int syncop_removexattr (xlator_t *subvol, loc_t *loc, const char *name);
-int syncop_fremovexattr (xlator_t *subvol, fd_t *fd, const char *name);
-
-int syncop_create (xlator_t *subvol, loc_t *loc, int32_t flags, mode_t mode,
- fd_t *fd, dict_t *dict);
-int syncop_open (xlator_t *subvol, loc_t *loc, int32_t flags, fd_t *fd);
-int syncop_close (fd_t *fd);
-
-int syncop_write (xlator_t *subvol, fd_t *fd, const char *buf, int size,
- off_t offset, struct iobref *iobref, uint32_t flags);
-int syncop_writev (xlator_t *subvol, fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset, struct iobref *iobref,
- uint32_t flags);
-int syncop_readv (xlator_t *subvol, fd_t *fd, size_t size, off_t off,
- uint32_t flags,
- /* out */
- struct iovec **vector, int *count, struct iobref **iobref);
-
-int syncop_ftruncate (xlator_t *subvol, fd_t *fd, off_t offset);
-int syncop_truncate (xlator_t *subvol, loc_t *loc, off_t offset);
-
-int syncop_unlink (xlator_t *subvol, loc_t *loc);
-
-int syncop_fsync (xlator_t *subvol, fd_t *fd);
-int syncop_fstat (xlator_t *subvol, fd_t *fd, struct iatt *stbuf);
-int syncop_stat (xlator_t *subvol, loc_t *loc, struct iatt *stbuf);
-
-int syncop_symlink (xlator_t *subvol, loc_t *loc, char *newpath, dict_t *dict);
-int syncop_readlink (xlator_t *subvol, loc_t *loc, char **buffer, size_t size);
-int syncop_mknod (xlator_t *subvol, loc_t *loc, mode_t mode, dev_t rdev,
- dict_t *dict);
-int syncop_link (xlator_t *subvol, loc_t *oldloc, loc_t *newloc);
-#endif /* _SYNCOP_H */
diff --git a/libglusterfs/src/syscall.c b/libglusterfs/src/syscall.c
index bb834dbfd9f..04400f98b6c 100644
--- a/libglusterfs/src/syscall.c
+++ b/libglusterfs/src/syscall.c
@@ -8,453 +8,869 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "compat.h"
-#include "syscall.h"
+#include "glusterfs/compat.h"
+#include "glusterfs/syscall.h"
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/libglusterfs-messages.h"
+#ifdef __FreeBSD__
+#include <sys/sysctl.h>
+#include <signal.h>
+#endif
#include <sys/types.h>
#include <utime.h>
#include <sys/time.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdarg.h>
+#ifdef HAVE_COPY_FILE_RANGE_SYS
+#include <sys/syscall.h>
+#endif
+
+#define FS_ERROR_LOG(result) \
+ do { \
+ gf_msg_callingfn("FS", GF_LOG_CRITICAL, EIO, \
+ LG_MSG_SYSCALL_RETURNS_WRONG, \
+ "returned %zd for the syscall", (ssize_t)result); \
+ } while (0)
+
+/*
+ * Input to these macros is generally a function call, so capture the result
+ * i.e. (_ret) in another variable and use that instead of using _ret again
+ */
+#define FS_RET_CHECK(_ret, err) \
+ ({ \
+ typeof(_ret) _result = (_ret); \
+ if (_result < -1) { \
+ FS_ERROR_LOG(_result); \
+ _result = -1; \
+ err = EIO; \
+ } \
+ _result; \
+ })
+
+#define FS_RET_CHECK0(_ret, err) \
+ ({ \
+ typeof(_ret) _result0 = (_ret); \
+ if (_result0 < -1 || _result0 > 0) { \
+ FS_ERROR_LOG(_result0); \
+ _result0 = -1; \
+ err = EIO; \
+ } \
+ _result0; \
+ })
+
+#define FS_RET_CHECK_ERRNO(_ret, err) \
+ ({ \
+ typeof(_ret) _result1 = (_ret); \
+ if (_result1 < 0) { \
+ FS_ERROR_LOG(_result1); \
+ _result1 = -1; \
+ err = EIO; \
+ } else if (_result1 > 0) { \
+ err = _result1; \
+ _result1 = -1; \
+ } \
+ _result1; \
+ })
int
-sys_lstat (const char *path, struct stat *buf)
+sys_lstat(const char *path, struct stat *buf)
{
- return lstat (path, buf);
+ return FS_RET_CHECK0(lstat(path, buf), errno);
}
+int
+sys_stat(const char *path, struct stat *buf)
+{
+ return FS_RET_CHECK0(stat(path, buf), errno);
+}
int
-sys_stat (const char *path, struct stat *buf)
+sys_fstat(int fd, struct stat *buf)
{
- return stat (path, buf);
+ return FS_RET_CHECK0(fstat(fd, buf), errno);
}
+int
+sys_fstatat(int dirfd, const char *pathname, struct stat *buf, int flags)
+{
+#ifdef GF_DARWIN_HOST_OS
+ if (fchdir(dirfd) < 0)
+ return -1;
+ if (flags & AT_SYMLINK_NOFOLLOW)
+ return FS_RET_CHECK0(lstat(pathname, buf), errno);
+ else
+ return FS_RET_CHECK0(stat(pathname, buf), errno);
+#else
+ return FS_RET_CHECK0(fstatat(dirfd, pathname, buf, flags), errno);
+#endif
+}
int
-sys_fstat (int fd, struct stat *buf)
+sys_openat(int dirfd, const char *pathname, int flags, int mode)
{
- return fstat (fd, buf);
+ int fd;
+
+#ifdef GF_DARWIN_HOST_OS
+ if (fchdir(dirfd) < 0)
+ return -1;
+ fd = open(pathname, flags, mode);
+ /* TODO: Shouldn't we restore the old current directory */
+#else /* GF_DARWIN_HOST_OS */
+ fd = openat(dirfd, pathname, flags, mode);
+#ifdef __FreeBSD__
+ /* On FreeBSD S_ISVTX flag is ignored for an open() with O_CREAT set.
+ * We need to force the flag using fchmod(). */
+ if ((fd >= 0) && ((flags & O_CREAT) != 0) && ((mode & S_ISVTX) != 0)) {
+ sys_fchmod(fd, mode);
+ /* TODO: It's unlikely that fchmod could fail here. However,
+ if it fails we cannot always restore the old state
+ (if the file existed, we cannot recover it). We would
+ need many more system calls to correctly handle all
+ possible cases and it doesn't worth it. For now we
+ simply ignore the error. */
+ }
+#endif /* __FreeBSD__ */
+#endif /* !GF_DARWIN_HOST_OS */
+
+ return FS_RET_CHECK(fd, errno);
}
+int
+sys_open(const char *pathname, int flags, int mode)
+{
+ return FS_RET_CHECK(sys_openat(AT_FDCWD, pathname, flags, mode), errno);
+}
DIR *
-sys_opendir (const char *name)
+sys_opendir(const char *name)
{
- return opendir (name);
+ return opendir(name);
}
-
-struct dirent *
-sys_readdir (DIR *dir)
+int
+sys_mkdirat(int dirfd, const char *pathname, mode_t mode)
{
- return readdir (dir);
+#ifdef GF_DARWIN_HOST_OS
+ if (fchdir(dirfd) < 0)
+ return -1;
+ return FS_RET_CHECK0(mkdir(pathname, mode), errno);
+#else
+ return FS_RET_CHECK0(mkdirat(dirfd, pathname, mode), errno);
+#endif
}
+struct dirent *
+sys_readdir(DIR *dir, struct dirent *de)
+{
+#if !defined(__GLIBC__)
+ /*
+ * World+Dog says glibc's readdir(3) is MT-SAFE as long as
+ * two threads are not accessing the same DIR; there's a
+ * potential buffer overflow in glibc's readdir_r(3); and
+ * glibc's readdir_r(3) is deprecated after version 2.22
+ * with presumed eventual removal.
+ * Given all that, World+Dog says everyone should just use
+ * readdir(3). But it's unknown, unclear whether the same
+ * is also true for *BSD, MacOS, and, etc.
+ */
+ struct dirent *entry = NULL;
+
+ (void)readdir_r(dir, de, &entry);
+ return entry;
+#else
+ return readdir(dir);
+#endif
+}
ssize_t
-sys_readlink (const char *path, char *buf, size_t bufsiz)
+sys_readlink(const char *path, char *buf, size_t bufsiz)
{
- return readlink (path, buf, bufsiz);
+ return FS_RET_CHECK(readlink(path, buf, bufsiz), errno);
}
-
int
-sys_closedir (DIR *dir)
+sys_closedir(DIR *dir)
{
- return closedir (dir);
+ return FS_RET_CHECK0(closedir(dir), errno);
}
-
int
-sys_mknod (const char *pathname, mode_t mode, dev_t dev)
+sys_mknod(const char *pathname, mode_t mode, dev_t dev)
{
- return mknod (pathname, mode, dev);
+ return FS_RET_CHECK0(mknod(pathname, mode, dev), errno);
}
-
int
-sys_mkdir (const char *pathname, mode_t mode)
+sys_mkdir(const char *pathname, mode_t mode)
{
- return mkdir (pathname, mode);
+ return FS_RET_CHECK0(mkdir(pathname, mode), errno);
}
-
int
-sys_unlink (const char *pathname)
+sys_unlink(const char *pathname)
{
#ifdef GF_SOLARIS_HOST_OS
- return solaris_unlink (pathname);
+ return FS_RET_CHECK0(solaris_unlink(pathname), errno);
#endif
- return unlink (pathname);
+ return FS_RET_CHECK0(unlink(pathname), errno);
}
-
int
-sys_rmdir (const char *pathname)
+sys_unlinkat(int dfd, const char *pathname)
{
- return rmdir (pathname);
+#ifdef GF_SOLARIS_HOST_OS
+ return FS_RET_CHECK0(solaris_unlinkat(dfd, pathname, 0), errno);
+#endif
+ return FS_RET_CHECK0(unlinkat(dfd, pathname, 0), errno);
}
+int
+sys_rmdir(const char *pathname)
+{
+ return FS_RET_CHECK0(rmdir(pathname), errno);
+}
int
-sys_symlink (const char *oldpath, const char *newpath)
+sys_symlink(const char *oldpath, const char *newpath)
{
- return symlink (oldpath, newpath);
+ return FS_RET_CHECK0(symlink(oldpath, newpath), errno);
}
+int
+sys_symlinkat(const char *oldpath, int dirfd, const char *newpath)
+{
+ return FS_RET_CHECK0(symlinkat(oldpath, dirfd, newpath), errno);
+}
int
-sys_rename (const char *oldpath, const char *newpath)
+sys_rename(const char *oldpath, const char *newpath)
{
#ifdef GF_SOLARIS_HOST_OS
- return solaris_rename (oldpath, newpath);
+ return FS_RET_CHECK0(solaris_rename(oldpath, newpath), errno);
#endif
- return rename (oldpath, newpath);
+ return FS_RET_CHECK0(rename(oldpath, newpath), errno);
}
+int
+sys_link(const char *oldpath, const char *newpath)
+{
+#ifdef HAVE_LINKAT
+ /*
+ * On most systems (Linux being the notable exception), link(2)
+ * first resolves symlinks. If the target is a directory or
+ * is nonexistent, it will fail. linkat(2) operates on the
+ * symlink instead of its target when the AT_SYMLINK_FOLLOW
+ * flag is not supplied.
+ */
+ return FS_RET_CHECK0(linkat(AT_FDCWD, oldpath, AT_FDCWD, newpath, 0),
+ errno);
+#else
+ return FS_RET_CHECK0(link(oldpath, newpath), errno);
+#endif
+}
int
-sys_link (const char *oldpath, const char *newpath)
+sys_linkat(int oldfd, const char *oldpath, int newfd, const char *newpath)
{
- return link (oldpath, newpath);
+ return FS_RET_CHECK0(linkat(oldfd, oldpath, newfd, newpath, 0), errno);
}
-
int
-sys_chmod (const char *path, mode_t mode)
+sys_chmod(const char *path, mode_t mode)
{
- return chmod (path, mode);
+ return FS_RET_CHECK0(chmod(path, mode), errno);
}
-
int
-sys_fchmod (int fd, mode_t mode)
+sys_fchmod(int fd, mode_t mode)
{
- return fchmod (fd, mode);
+ return FS_RET_CHECK0(fchmod(fd, mode), errno);
}
-
int
-sys_chown (const char *path, uid_t owner, gid_t group)
+sys_chown(const char *path, uid_t owner, gid_t group)
{
- return chown (path, owner, group);
+ return FS_RET_CHECK0(chown(path, owner, group), errno);
}
-
int
-sys_fchown (int fd, uid_t owner, gid_t group)
+sys_fchown(int fd, uid_t owner, gid_t group)
{
- return fchown (fd, owner, group);
+ return FS_RET_CHECK0(fchown(fd, owner, group), errno);
}
-
int
-sys_lchown (const char *path, uid_t owner, gid_t group)
+sys_lchown(const char *path, uid_t owner, gid_t group)
{
- return lchown (path, owner, group);
+ return FS_RET_CHECK0(lchown(path, owner, group), errno);
}
-
int
-sys_truncate (const char *path, off_t length)
+sys_truncate(const char *path, off_t length)
{
- return truncate (path, length);
+ return FS_RET_CHECK0(truncate(path, length), errno);
}
-
int
-sys_ftruncate (int fd, off_t length)
+sys_ftruncate(int fd, off_t length)
{
- return ftruncate (fd, length);
+ return FS_RET_CHECK0(ftruncate(fd, length), errno);
}
-
int
-sys_utimes (const char *filename, const struct timeval times[2])
+sys_utimes(const char *filename, const struct timeval times[2])
{
- return utimes (filename, times);
+ return FS_RET_CHECK0(utimes(filename, times), errno);
}
+#if defined(HAVE_UTIMENSAT)
+int
+sys_utimensat(int dirfd, const char *filename, const struct timespec times[2],
+ int flags)
+{
+ return FS_RET_CHECK0(utimensat(dirfd, filename, times, flags), errno);
+}
+#endif
int
-sys_creat (const char *pathname, mode_t mode)
+sys_futimes(int fd, const struct timeval times[2])
{
- return creat (pathname, mode);
+ return futimes(fd, times);
}
+int
+sys_creat(const char *pathname, mode_t mode)
+{
+ return FS_RET_CHECK(sys_open(pathname, O_CREAT | O_TRUNC | O_WRONLY, mode),
+ errno);
+}
ssize_t
-sys_readv (int fd, const struct iovec *iov, int iovcnt)
+sys_readv(int fd, const struct iovec *iov, int iovcnt)
{
- return readv (fd, iov, iovcnt);
+ return FS_RET_CHECK(readv(fd, iov, iovcnt), errno);
}
+ssize_t
+sys_writev(int fd, const struct iovec *iov, int iovcnt)
+{
+ return FS_RET_CHECK(writev(fd, iov, iovcnt), errno);
+}
ssize_t
-sys_writev (int fd, const struct iovec *iov, int iovcnt)
+sys_read(int fd, void *buf, size_t count)
{
- return writev (fd, iov, iovcnt);
+ return FS_RET_CHECK(read(fd, buf, count), errno);
}
+ssize_t
+sys_write(int fd, const void *buf, size_t count)
+{
+ return FS_RET_CHECK(write(fd, buf, count), errno);
+}
ssize_t
-sys_read (int fd, void *buf, size_t count)
+sys_preadv(int fd, const struct iovec *iov, int iovcnt, off_t offset)
{
- return read (fd, buf, count);
+ return FS_RET_CHECK(preadv(fd, iov, iovcnt, offset), errno);
}
+ssize_t
+sys_pwritev(int fd, const struct iovec *iov, int iovcnt, off_t offset)
+{
+ return FS_RET_CHECK(pwritev(fd, iov, iovcnt, offset), errno);
+}
ssize_t
-sys_write (int fd, const void *buf, size_t count)
+sys_pread(int fd, void *buf, size_t count, off_t offset)
{
- return write (fd, buf, count);
+ return FS_RET_CHECK(pread(fd, buf, count, offset), errno);
}
+ssize_t
+sys_pwrite(int fd, const void *buf, size_t count, off_t offset)
+{
+ return FS_RET_CHECK(pwrite(fd, buf, count, offset), errno);
+}
off_t
-sys_lseek (int fd, off_t offset, int whence)
+sys_lseek(int fd, off_t offset, int whence)
{
- return lseek (fd, offset, whence);
+ return FS_RET_CHECK(lseek(fd, offset, whence), errno);
}
-
int
-sys_statvfs (const char *path, struct statvfs *buf)
+sys_statvfs(const char *path, struct statvfs *buf)
{
- return statvfs (path, buf);
-}
+ int ret;
+
+ ret = statvfs(path, buf);
+#ifdef __FreeBSD__
+ /* FreeBSD doesn't return the expected value in buf->f_bsize. It
+ * contains the optimal I/O size instead of the file system block
+ * size. Gluster expects that this field contains the block size.
+ */
+ if (ret == 0) {
+ buf->f_bsize = buf->f_frsize;
+ }
+#endif /* __FreeBSD__ */
+ return FS_RET_CHECK0(ret, errno);
+}
int
-sys_close (int fd)
+sys_fstatvfs(int fd, struct statvfs *buf)
{
- int ret = -1;
+ int ret;
- if (fd >= 0)
- ret = close (fd);
+ ret = fstatvfs(fd, buf);
+#ifdef __FreeBSD__
+ /* FreeBSD doesn't return the expected value in buf->f_bsize. It
+ * contains the optimal I/O size instead of the file system block
+ * size. Gluster expects this field to contain the block size.
+ */
+ if (ret == 0) {
+ buf->f_bsize = buf->f_frsize;
+ }
+#endif /* __FreeBSD__ */
- return ret;
+ return FS_RET_CHECK0(ret, errno);
}
-
int
-sys_fsync (int fd)
+sys_close(int fd)
{
- return fsync (fd);
+ int ret = -1;
+
+ if (fd >= 0)
+ ret = close(fd);
+
+ return FS_RET_CHECK0(ret, errno);
}
+int
+sys_fsync(int fd)
+{
+ return FS_RET_CHECK0(fsync(fd), errno);
+}
int
-sys_fdatasync (int fd)
+sys_fdatasync(int fd)
{
-#ifdef HAVE_FDATASYNC
- return fdatasync (fd);
+#ifdef GF_DARWIN_HOST_OS
+ return FS_RET_CHECK0(fcntl(fd, F_FULLFSYNC), errno);
+#elif __FreeBSD__
+ return FS_RET_CHECK0(fsync(fd), errno);
#else
- return 0;
+ return FS_RET_CHECK0(fdatasync(fd), errno);
#endif
}
+void
+gf_add_prefix(const char *ns, const char *key, char **newkey)
+{
+ /* if we don't have any namespace, append USER NS */
+ if (strncmp(key, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
+ strncmp(key, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) &&
+ strncmp(key, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
+ strncmp(key, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN)) {
+ int ns_length = strlen(ns);
+ *newkey = GF_MALLOC(ns_length + strlen(key) + 10, gf_common_mt_char);
+ if (!*newkey)
+ return;
+ strcpy(*newkey, ns);
+ strcat(*newkey, key);
+ } else {
+ *newkey = gf_strdup(key);
+ }
+}
-int
-sys_lsetxattr (const char *path, const char *name, const void *value,
- size_t size, int flags)
+void
+gf_remove_prefix(const char *ns, const char *key, char **newkey)
{
+ int ns_length = strlen(ns);
+ if (strncmp(key, ns, ns_length) == 0) {
+ *newkey = GF_MALLOC(-ns_length + strlen(key) + 10, gf_common_mt_char);
+ if (!*newkey)
+ return;
+ strcpy(*newkey, key + ns_length);
+ } else {
+ *newkey = gf_strdup(key);
+ }
+}
+int
+sys_lsetxattr(const char *path, const char *name, const void *value,
+ size_t size, int flags)
+{
#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
- return lsetxattr (path, name, value, size, flags);
+ return FS_RET_CHECK0(lsetxattr(path, name, value, size, flags), errno);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_set_link (path, EXTATTR_NAMESPACE_USER,
- name, value, size);
+ return FS_RET_CHECK(
+ extattr_set_link(path, EXTATTR_NAMESPACE_USER, name, value, size),
+ errno);
#endif
#ifdef GF_SOLARIS_HOST_OS
- return solaris_setxattr (path, name, value, size, flags);
+ return FS_RET_CHECK0(solaris_setxattr(path, name, value, size, flags),
+ errno);
#endif
#ifdef GF_DARWIN_HOST_OS
- return setxattr (path, name, value, size, 0,
- flags|XATTR_NOFOLLOW);
+ /* OS X clients will carry other flags, which will be used on a
+ OS X host, but masked out on others. GF assume NOFOLLOW on Linux,
+ enforcing */
+ return FS_RET_CHECK0(setxattr(path, name, value, size, 0,
+ (flags & ~XATTR_NOSECURITY) | XATTR_NOFOLLOW),
+ errno);
#endif
-
}
-
ssize_t
-sys_llistxattr (const char *path, char *list, size_t size)
+sys_llistxattr(const char *path, char *list, size_t size)
{
-
#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
- return llistxattr (path, list, size);
+ return FS_RET_CHECK(llistxattr(path, list, size), errno);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_list_link (path, EXTATTR_NAMESPACE_USER, list, size);
+ ssize_t ret = FS_RET_CHECK(
+ extattr_list_link(path, EXTATTR_NAMESPACE_USER, list, size), errno);
+ gf_extattr_list_reshape(list, ret);
+ return ret;
#endif
#ifdef GF_SOLARIS_HOST_OS
- return solaris_listxattr (path, list, size);
+ return FS_RET_CHECK(solaris_listxattr(path, list, size), errno);
#endif
#ifdef GF_DARWIN_HOST_OS
- return listxattr (path, list, size, XATTR_NOFOLLOW);
+ return FS_RET_CHECK(listxattr(path, list, size, XATTR_NOFOLLOW), errno);
#endif
-
}
-
ssize_t
-sys_lgetxattr (const char *path, const char *name, void *value, size_t size)
+sys_lgetxattr(const char *path, const char *name, void *value, size_t size)
{
-
#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
- return lgetxattr (path, name, value, size);
+ return FS_RET_CHECK(lgetxattr(path, name, value, size), errno);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_get_link (path, EXTATTR_NAMESPACE_USER, name, value,
- size);
+ return FS_RET_CHECK(
+ extattr_get_link(path, EXTATTR_NAMESPACE_USER, name, value, size),
+ errno);
#endif
#ifdef GF_SOLARIS_HOST_OS
- return solaris_getxattr (path, name, value, size);
+ return FS_RET_CHECK(solaris_getxattr(path, name, value, size), errno);
#endif
#ifdef GF_DARWIN_HOST_OS
- return getxattr (path, name, value, size, 0, XATTR_NOFOLLOW);
+ return FS_RET_CHECK(getxattr(path, name, value, size, 0, XATTR_NOFOLLOW),
+ errno);
#endif
-
}
-
ssize_t
-sys_fgetxattr (int filedes, const char *name, void *value, size_t size)
+sys_fgetxattr(int filedes, const char *name, void *value, size_t size)
{
-
#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
- return fgetxattr (filedes, name, value, size);
+ return FS_RET_CHECK(fgetxattr(filedes, name, value, size), errno);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_get_fd (filedes, EXTATTR_NAMESPACE_USER, name,
- value, size);
+ return FS_RET_CHECK(
+ extattr_get_fd(filedes, EXTATTR_NAMESPACE_USER, name, value, size),
+ errno);
#endif
#ifdef GF_SOLARIS_HOST_OS
- return solaris_fgetxattr (filedes, name, value, size);
+ return FS_RET_CHECK(solaris_fgetxattr(filedes, name, value, size), errno);
#endif
#ifdef GF_DARWIN_HOST_OS
- return fgetxattr (filedes, name, value, size, 0, 0);
+ return FS_RET_CHECK(fgetxattr(filedes, name, value, size, 0, 0), errno);
#endif
-
}
-
int
-sys_fremovexattr (int filedes, const char *name)
+sys_fremovexattr(int filedes, const char *name)
{
-
#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
- return fremovexattr (filedes, name);
+ return FS_RET_CHECK0(fremovexattr(filedes, name), errno);
#endif
- errno = ENOSYS;
- return -1;
-#if 0 /* TODO: to port to other OSes, fill in each of below */
#ifdef GF_BSD_HOST_OS
- return extattr_remove_fd (filedes, EXTATTR_NAMESPACE_USER, name);
+ return FS_RET_CHECK0(
+ extattr_delete_fd(filedes, EXTATTR_NAMESPACE_USER, name), errno);
#endif
#ifdef GF_SOLARIS_HOST_OS
- return solaris_fremovexattr (filedes, name);
+ return FS_RET_CHECK0(solaris_fremovexattr(filedes, name), errno);
#endif
#ifdef GF_DARWIN_HOST_OS
- return fremovexattr (filedes, name, 0);
-#endif
+ return FS_RET_CHECK0(fremovexattr(filedes, name, 0), errno);
#endif
}
-
int
-sys_fsetxattr (int filedes, const char *name, const void *value,
- size_t size, int flags)
+sys_fsetxattr(int filedes, const char *name, const void *value, size_t size,
+ int flags)
{
-
#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
- return fsetxattr (filedes, name, value, size, flags);
+ return FS_RET_CHECK0(fsetxattr(filedes, name, value, size, flags), errno);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_set_fd (filedes, EXTATTR_NAMESPACE_USER, name,
- value, size);
+ return FS_RET_CHECK(
+ extattr_set_fd(filedes, EXTATTR_NAMESPACE_USER, name, value, size),
+ errno);
#endif
#ifdef GF_SOLARIS_HOST_OS
- return solaris_fsetxattr (filedes, name, value, size, flags);
+ return FS_RET_CHECK0(solaris_fsetxattr(filedes, name, value, size, flags),
+ errno);
#endif
#ifdef GF_DARWIN_HOST_OS
- return fsetxattr (filedes, name, value, size, 0, flags);
+ return FS_RET_CHECK0(
+ fsetxattr(filedes, name, value, size, 0, flags & ~XATTR_NOSECURITY),
+ errno);
#endif
-
}
-
ssize_t
-sys_flistxattr (int filedes, char *list, size_t size)
+sys_flistxattr(int filedes, char *list, size_t size)
{
-
#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
- return flistxattr (filedes, list, size);
+ return FS_RET_CHECK(flistxattr(filedes, list, size), errno);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_list_fd (filedes, EXTATTR_NAMESPACE_USER, list, size);
+ ssize_t ret = FS_RET_CHECK(
+ extattr_list_fd(filedes, EXTATTR_NAMESPACE_USER, list, size), errno);
+ gf_extattr_list_reshape(list, ret);
+ return ret;
#endif
#ifdef GF_SOLARIS_HOST_OS
- return solaris_flistxattr (filedes, list, size);
+ return FS_RET_CHECK(solaris_flistxattr(filedes, list, size), errno);
#endif
#ifdef GF_DARWIN_HOST_OS
- return flistxattr (filedes, list, size, XATTR_NOFOLLOW);
+ return FS_RET_CHECK(flistxattr(filedes, list, size, XATTR_NOFOLLOW), errno);
#endif
-
}
-
int
-sys_lremovexattr (const char *path, const char *name)
+sys_lremovexattr(const char *path, const char *name)
{
-
#if defined(GF_LINUX_HOST_OS) || defined(__NetBSD__)
- return lremovexattr (path, name);
+ return FS_RET_CHECK0(lremovexattr(path, name), errno);
#endif
#ifdef GF_BSD_HOST_OS
- return extattr_delete_link (path, EXTATTR_NAMESPACE_USER, name);
+ return FS_RET_CHECK0(
+ extattr_delete_link(path, EXTATTR_NAMESPACE_USER, name), errno);
#endif
#ifdef GF_SOLARIS_HOST_OS
- return solaris_removexattr (path, name);
+ return FS_RET_CHECK0(solaris_removexattr(path, name), errno);
#endif
#ifdef GF_DARWIN_HOST_OS
- return removexattr (path, name, XATTR_NOFOLLOW);
+ return FS_RET_CHECK0(removexattr(path, name, XATTR_NOFOLLOW), errno);
+#endif
+}
+
+int
+sys_access(const char *pathname, int mode)
+{
+ return FS_RET_CHECK0(access(pathname, mode), errno);
+}
+
+int
+sys_fallocate(int fd, int mode, off_t offset, off_t len)
+{
+#ifdef HAVE_FALLOCATE
+ return FS_RET_CHECK0(fallocate(fd, mode, offset, len), errno);
+#endif
+
+#ifdef HAVE_POSIX_FALLOCATE
+ if (mode) {
+ /* keep size not supported */
+ errno = EOPNOTSUPP;
+ return -1;
+ }
+
+ return FS_RET_CHECK_ERRNO(posix_fallocate(fd, offset, len), errno);
+#endif
+
+#if defined(F_ALLOCATECONTIG) && defined(GF_DARWIN_HOST_OS)
+ /* C conversion from C++ implementation for OSX by Mozilla Foundation */
+ if (mode) {
+ /* keep size not supported */
+ errno = EOPNOTSUPP;
+ return -1;
+ }
+ /*
+ * The F_PREALLOCATE command operates on the following structure:
+ *
+ * typedef struct fstore {
+ * u_int32_t fst_flags; // IN: flags word
+ * int fst_posmode; // IN: indicates offset field
+ * off_t fst_offset; // IN: start of the region
+ * off_t fst_length; // IN: size of the region
+ * off_t fst_bytesalloc; // OUT: number of bytes allocated
+ * } fstore_t;
+ *
+ * The flags (fst_flags) for the F_PREALLOCATE command are as follows:
+ * F_ALLOCATECONTIG Allocate contiguous space.
+ * F_ALLOCATEALL Allocate all requested space or no space at all.
+ *
+ * The position modes (fst_posmode) for the F_PREALLOCATE command
+ * indicate how to use the offset field. The modes are as follows:
+ * F_PEOFPOSMODE Allocate from the physical end of file.
+ * F_VOLPOSMODE Allocate from the volume offset.
+ *
+ */
+
+ int ret;
+ fstore_t store = {F_ALLOCATECONTIG, F_PEOFPOSMODE, offset, len, 0};
+ ret = fcntl(fd, F_PREALLOCATE, &store);
+ if (ret == -1) {
+ store.fst_flags = F_ALLOCATEALL;
+ ret = fcntl(fd, F_PREALLOCATE, &store);
+ }
+ if (ret == -1)
+ return ret;
+ return FS_RET_CHECK0(ftruncate(fd, offset + len), errno);
+#endif
+ errno = ENOSYS;
+ return -1;
+}
+
+int
+sys_socket(int domain, int type, int protocol)
+{
+#ifdef SOCK_CLOEXEC
+ return socket(domain, type | SOCK_CLOEXEC, protocol);
+#else
+ int fd = -1;
+
+ fd = socket(domain, type, protocol);
+ if (fd >= 0)
+ fcntl(fd, F_SETFD, FD_CLOEXEC);
+ return fd;
+#endif
+}
+
+#if (defined(HAVE_ACCEPT4) || defined(HAVE_PACCEPT))
+static inline int
+prep_accept_flags(int flags)
+{
+ if (flags & O_NONBLOCK) {
+ flags &= ~O_NONBLOCK;
+ flags |= SOCK_NONBLOCK;
+ }
+
+ flags |= SOCK_CLOEXEC;
+
+ return flags;
+}
#endif
+int
+sys_accept(int sock, struct sockaddr *sockaddr, socklen_t *socklen, int flags)
+{
+ int newsock = -1;
+
+#ifdef HAVE_ACCEPT4
+
+ flags = prep_accept_flags(flags);
+ newsock = accept4(sock, sockaddr, socklen, flags);
+
+#elif HAVE_PACCEPT
+ flags = prep_accept_flags(flags);
+ newsock = paccept(sock, sockaddr, socklen, NULL, flags);
+
+#else
+ int op_errno = 0;
+ int curflag = 0;
+ int ret = 0;
+
+ newsock = accept(sock, sockaddr, socklen);
+ if (newsock != -1) {
+ curflag = fcntl(newsock, F_GETFL);
+ if (fcntl(newsock, F_SETFL, curflag | flags) == -1) {
+ op_errno = errno;
+ goto err;
+ }
+
+ curflag = fcntl(newsock, F_GETFD);
+ if (fcntl(newsock, F_SETFD, curflag | FD_CLOEXEC) == -1) {
+ op_errno = errno;
+ goto err;
+ }
+ }
+
+err:
+ if (op_errno) {
+ close(newsock);
+ errno = op_errno;
+ return -1;
+ }
+
+#endif
+ return newsock;
+}
+
+ssize_t
+sys_copy_file_range(int fd_in, off64_t *off_in, int fd_out, off64_t *off_out,
+ size_t len, unsigned int flags)
+{
+ /*
+ * TODO: Add check for other platofrms like freebsd etc if this syscall is
+ * not generic.
+ * This is what the function does.
+ * 1) Check whether copy_file_range API is present. If so call it.
+ * 2) If copy_file_range API is not present, then check whether
+ * the system call is there. If so, then use syscall to invoke
+ * SYS_copy_file_range system call.
+ * 3) If neither of the above is present, then return ENOSYS.
+ */
+#ifdef HAVE_COPY_FILE_RANGE
+ return FS_RET_CHECK(
+ copy_file_range(fd_in, off_in, fd_out, off_out, len, flags), errno);
+#else
+#ifdef HAVE_COPY_FILE_RANGE_SYS
+ return syscall(SYS_copy_file_range, fd_in, off_in, fd_out, off_out, len,
+ flags);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif /* HAVE_COPY_FILE_RANGE_SYS */
+#endif /* HAVE_COPY_FILE_RANGE */
}
+#ifdef __FreeBSD__
+int
+sys_kill(pid_t pid, int sig)
+{
+ return FS_RET_CHECK0(kill(pid, sig), errno);
+}
int
-sys_access (const char *pathname, int mode)
+sys_sysctl(const int *name, u_int namelen, void *oldp, size_t *oldlenp,
+ const void *newp, size_t newlen)
{
- return access (pathname, mode);
+ return FS_RET_CHECK0(sysctl(name, namelen, oldp, oldlenp, newp, newlen),
+ errno);
}
+#endif
diff --git a/libglusterfs/src/syscall.h b/libglusterfs/src/syscall.h
deleted file mode 100644
index d5c6ce5f67b..00000000000
--- a/libglusterfs/src/syscall.h
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __SYSCALL_H__
-#define __SYSCALL_H__
-
-int
-sys_lstat (const char *path, struct stat *buf);
-
-int
-sys_stat (const char *path, struct stat *buf);
-
-int
-sys_fstat (int fd, struct stat *buf);
-
-DIR *
-sys_opendir (const char *name);
-
-struct dirent *
-sys_readdir (DIR *dir);
-
-ssize_t
-sys_readlink (const char *path, char *buf, size_t bufsiz);
-
-int
-sys_closedir (DIR *dir);
-
-int
-sys_mknod (const char *pathname, mode_t mode, dev_t dev);
-
-int
-sys_mkdir (const char *pathname, mode_t mode);
-
-int
-sys_unlink (const char *pathname);
-
-int
-sys_rmdir (const char *pathname);
-
-int
-sys_symlink (const char *oldpath, const char *newpath);
-
-int
-sys_rename (const char *oldpath, const char *newpath);
-
-int
-sys_link (const char *oldpath, const char *newpath);
-
-int
-sys_chmod (const char *path, mode_t mode);
-
-int
-sys_fchmod (int fd, mode_t mode);
-
-int
-sys_chown (const char *path, uid_t owner, gid_t group);
-
-int
-sys_fchown (int fd, uid_t owner, gid_t group);
-
-int
-sys_lchown (const char *path, uid_t owner, gid_t group);
-
-int
-sys_truncate (const char *path, off_t length);
-
-int
-sys_ftruncate (int fd, off_t length);
-
-int
-sys_utimes (const char *filename, const struct timeval times[2]);
-
-int
-sys_creat (const char *pathname, mode_t mode);
-
-ssize_t
-sys_readv (int fd, const struct iovec *iov, int iovcnt);
-
-ssize_t
-sys_writev (int fd, const struct iovec *iov, int iovcnt);
-
-ssize_t
-sys_read (int fd, void *buf, size_t count);
-
-ssize_t
-sys_write (int fd, const void *buf, size_t count);
-
-off_t
-sys_lseek (int fd, off_t offset, int whence);
-
-int
-sys_statvfs (const char *path, struct statvfs *buf);
-
-int
-sys_close (int fd);
-
-int
-sys_fsync (int fd);
-
-int
-sys_fdatasync (int fd);
-
-int
-sys_lsetxattr (const char *path, const char *name, const void *value,
- size_t size, int flags);
-
-ssize_t
-sys_llistxattr (const char *path, char *list, size_t size);
-
-ssize_t
-sys_lgetxattr (const char *path, const char *name, void *value, size_t size);
-
-ssize_t
-sys_fgetxattr (int filedes, const char *name, void *value, size_t size);
-
-int
-sys_fsetxattr (int filedes, const char *name, const void *value,
- size_t size, int flags);
-
-ssize_t
-sys_flistxattr (int filedes, char *list, size_t size);
-
-int
-sys_lremovexattr (const char *path, const char *name);
-
-int
-sys_fremovexattr (int filedes, const char *name);
-
-int
-sys_access (const char *pathname, int mode);
-
-int
-sys_ftruncate (int fd, off_t length);
-
-#endif /* __SYSCALL_H__ */
diff --git a/libglusterfs/src/throttle-tbf.c b/libglusterfs/src/throttle-tbf.c
new file mode 100644
index 00000000000..e11ca4f9d35
--- /dev/null
+++ b/libglusterfs/src/throttle-tbf.c
@@ -0,0 +1,290 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ *
+ * Basic token bucket implementation for rate limiting. As of now interfaces
+ * to throttle disk read request, directory entry scan and hash calculation
+ * are available. To throttle a particular request (operation), the call needs
+ * to be wrapped in-between throttling APIs, for e.g.
+ *
+ * TBF_THROTTLE_BEGIN (...); <-- induces "delays" if required
+ * {
+ * call (...);
+ * }
+ * TBF_THROTTLE_END (...); <-- not used atm, maybe needed later
+ *
+ */
+
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/throttle-tbf.h"
+
+typedef struct tbf_throttle {
+ char done;
+
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ unsigned long tokens;
+
+ struct list_head list;
+} tbf_throttle_t;
+
+static tbf_throttle_t *
+tbf_init_throttle(unsigned long tokens_required)
+{
+ tbf_throttle_t *throttle = NULL;
+
+ throttle = GF_CALLOC(1, sizeof(*throttle), gf_common_mt_tbf_throttle_t);
+ if (!throttle)
+ return NULL;
+
+ throttle->done = 0;
+ throttle->tokens = tokens_required;
+ INIT_LIST_HEAD(&throttle->list);
+
+ (void)pthread_mutex_init(&throttle->mutex, NULL);
+ (void)pthread_cond_init(&throttle->cond, NULL);
+
+ return throttle;
+}
+
+void
+_tbf_dispatch_queued(tbf_bucket_t *bucket)
+{
+ gf_boolean_t xcont = _gf_false;
+ tbf_throttle_t *tmp = NULL;
+ tbf_throttle_t *throttle = NULL;
+
+ list_for_each_entry_safe(throttle, tmp, &bucket->queued, list)
+ {
+ pthread_mutex_lock(&throttle->mutex);
+ {
+ if (bucket->tokens < throttle->tokens) {
+ xcont = _gf_true;
+ goto unblock;
+ }
+
+ /* this request can now be serviced */
+ throttle->done = 1;
+ list_del_init(&throttle->list);
+
+ bucket->tokens -= throttle->tokens;
+ pthread_cond_signal(&throttle->cond);
+ }
+ unblock:
+ pthread_mutex_unlock(&throttle->mutex);
+ if (xcont)
+ break;
+ }
+}
+
+void *
+tbf_tokengenerator(void *arg)
+{
+ unsigned long tokenrate = 0;
+ unsigned long maxtokens = 0;
+ unsigned long token_gen_interval = 0;
+ tbf_bucket_t *bucket = arg;
+
+ tokenrate = bucket->tokenrate;
+ maxtokens = bucket->maxtokens;
+ token_gen_interval = bucket->token_gen_interval;
+
+ while (1) {
+ gf_nanosleep(token_gen_interval * GF_US_IN_NS);
+
+ LOCK(&bucket->lock);
+ {
+ bucket->tokens += tokenrate;
+ if (bucket->tokens > maxtokens)
+ bucket->tokens = maxtokens;
+
+ if (!list_empty(&bucket->queued))
+ _tbf_dispatch_queued(bucket);
+ }
+ UNLOCK(&bucket->lock);
+ }
+
+ return NULL;
+}
+
+/**
+ * There is lazy synchronization between this routine (when invoked
+ * under tbf_mod() context) and tbf_throttle(). *bucket is
+ * updated _after_ all the required variables are initialized.
+ */
+static int32_t
+tbf_init_bucket(tbf_t *tbf, tbf_opspec_t *spec)
+{
+ int ret = 0;
+ tbf_bucket_t *curr = NULL;
+ tbf_bucket_t **bucket = NULL;
+
+ GF_ASSERT(spec->op >= TBF_OP_MIN);
+ GF_ASSERT(spec->op <= TBF_OP_MAX);
+
+ /* no rate? no throttling. */
+ if (!spec->rate)
+ return 0;
+
+ bucket = tbf->bucket + spec->op;
+
+ curr = GF_CALLOC(1, sizeof(*curr), gf_common_mt_tbf_bucket_t);
+ if (!curr)
+ goto error_return;
+
+ LOCK_INIT(&curr->lock);
+ INIT_LIST_HEAD(&curr->queued);
+
+ curr->tokens = 0;
+ curr->tokenrate = spec->rate;
+ curr->maxtokens = spec->maxlimit;
+ curr->token_gen_interval = spec->token_gen_interval;
+
+ ret = gf_thread_create(&curr->tokener, NULL, tbf_tokengenerator, curr,
+ "tbfclock");
+ if (ret != 0)
+ goto freemem;
+
+ *bucket = curr;
+ return 0;
+
+freemem:
+ LOCK_DESTROY(&curr->lock);
+ GF_FREE(curr);
+error_return:
+ return -1;
+}
+
+#define TBF_ALLOC_SIZE (sizeof(tbf_t) + (TBF_OP_MAX * sizeof(tbf_bucket_t)))
+
+tbf_t *
+tbf_init(tbf_opspec_t *tbfspec, unsigned int count)
+{
+ int32_t i = 0;
+ int32_t ret = 0;
+ tbf_t *tbf = NULL;
+ tbf_opspec_t *opspec = NULL;
+
+ tbf = GF_CALLOC(1, TBF_ALLOC_SIZE, gf_common_mt_tbf_t);
+ if (!tbf)
+ goto error_return;
+
+ tbf->bucket = (tbf_bucket_t **)((char *)tbf + sizeof(*tbf));
+ for (i = 0; i < TBF_OP_MAX; i++) {
+ *(tbf->bucket + i) = NULL;
+ }
+
+ for (i = 0; i < count; i++) {
+ opspec = tbfspec + i;
+
+ ret = tbf_init_bucket(tbf, opspec);
+ if (ret)
+ break;
+ }
+
+ if (ret)
+ goto error_return;
+
+ return tbf;
+
+error_return:
+ return NULL;
+}
+
+static void
+tbf_mod_bucket(tbf_bucket_t *bucket, tbf_opspec_t *spec)
+{
+ LOCK(&bucket->lock);
+ {
+ bucket->tokens = 0;
+ bucket->tokenrate = spec->rate;
+ bucket->maxtokens = spec->maxlimit;
+ }
+ UNLOCK(&bucket->lock);
+
+ /* next token tick would unqueue pending operations */
+}
+
+int
+tbf_mod(tbf_t *tbf, tbf_opspec_t *tbfspec)
+{
+ int ret = 0;
+ tbf_bucket_t *bucket = NULL;
+ tbf_ops_t op = TBF_OP_MIN;
+
+ if (!tbf || !tbfspec)
+ return -1;
+
+ op = tbfspec->op;
+
+ GF_ASSERT(op >= TBF_OP_MIN);
+ GF_ASSERT(op <= TBF_OP_MAX);
+
+ bucket = *(tbf->bucket + op);
+ if (bucket) {
+ tbf_mod_bucket(bucket, tbfspec);
+ } else {
+ ret = tbf_init_bucket(tbf, tbfspec);
+ }
+
+ return ret;
+}
+
+void
+tbf_throttle(tbf_t *tbf, tbf_ops_t op, unsigned long tokens_requested)
+{
+ char waitq = 0;
+ tbf_bucket_t *bucket = NULL;
+ tbf_throttle_t *throttle = NULL;
+
+ GF_ASSERT(op >= TBF_OP_MIN);
+ GF_ASSERT(op <= TBF_OP_MAX);
+
+ bucket = *(tbf->bucket + op);
+ if (!bucket)
+ return;
+
+ LOCK(&bucket->lock);
+ {
+ /**
+ * if there are enough tokens in the bucket there is no need
+ * to throttle the request: therefore, consume the required
+ * number of tokens and continue.
+ */
+ if (tokens_requested <= bucket->tokens) {
+ bucket->tokens -= tokens_requested;
+ } else {
+ throttle = tbf_init_throttle(tokens_requested);
+ if (!throttle) /* let it slip through for now.. */
+ goto unblock;
+
+ waitq = 1;
+ pthread_mutex_lock(&throttle->mutex);
+ list_add_tail(&throttle->list, &bucket->queued);
+ }
+ }
+unblock:
+ UNLOCK(&bucket->lock);
+
+ if (waitq) {
+ while (!throttle->done) {
+ pthread_cond_wait(&throttle->cond, &throttle->mutex);
+ }
+
+ pthread_mutex_unlock(&throttle->mutex);
+
+ pthread_mutex_destroy(&throttle->mutex);
+ pthread_cond_destroy(&throttle->cond);
+
+ GF_FREE(throttle);
+ }
+}
diff --git a/libglusterfs/src/timer.c b/libglusterfs/src/timer.c
index ae40142ad51..66c861b04cd 100644
--- a/libglusterfs/src/timer.c
+++ b/libglusterfs/src/timer.c
@@ -8,213 +8,249 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include "glusterfs/timer.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/globals.h"
+#include "glusterfs/timespec.h"
+#include "glusterfs/libglusterfs-messages.h"
-#include "timer.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "globals.h"
-
-#define TS(tv) ((((unsigned long long) tv.tv_sec) * 1000000) + (tv.tv_usec))
+/* fwd decl */
+static gf_timer_registry_t *
+gf_timer_registry_init(glusterfs_ctx_t *);
gf_timer_t *
-gf_timer_call_after (glusterfs_ctx_t *ctx,
- struct timeval delta,
- gf_timer_cbk_t callbk,
- void *data)
+gf_timer_call_after(glusterfs_ctx_t *ctx, struct timespec delta,
+ gf_timer_cbk_t callbk, void *data)
{
- gf_timer_registry_t *reg = NULL;
- gf_timer_t *event = NULL;
- gf_timer_t *trav = NULL;
- unsigned long long at = 0L;
-
- if (ctx == NULL)
- {
- gf_log_callingfn ("timer", GF_LOG_ERROR, "invalid argument");
- return NULL;
- }
+ gf_timer_registry_t *reg = NULL;
+ gf_timer_t *event = NULL;
+ gf_timer_t *trav = NULL;
+ uint64_t at = 0;
+
+ if ((ctx == NULL) || (ctx->cleanup_started)) {
+ gf_msg_callingfn("timer", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "Either ctx is NULL or"
+ " ctx cleanup started");
+ return NULL;
+ }
- reg = gf_timer_registry_init (ctx);
+ reg = gf_timer_registry_init(ctx);
- if (!reg) {
- gf_log_callingfn ("timer", GF_LOG_ERROR, "!reg");
- return NULL;
- }
+ if (!reg) {
+ gf_msg_callingfn("timer", GF_LOG_ERROR, 0, LG_MSG_TIMER_REGISTER_ERROR,
+ "!reg");
+ return NULL;
+ }
- event = GF_CALLOC (1, sizeof (*event), gf_common_mt_gf_timer_t);
- if (!event) {
- return NULL;
- }
- gettimeofday (&event->at, NULL);
- event->at.tv_usec = ((event->at.tv_usec + delta.tv_usec) % 1000000);
- event->at.tv_sec += ((event->at.tv_usec + delta.tv_usec) / 1000000);
- event->at.tv_sec += delta.tv_sec;
- at = TS (event->at);
- event->callbk = callbk;
- event->data = data;
- event->xl = THIS;
- pthread_mutex_lock (&reg->lock);
+ event = GF_CALLOC(1, sizeof(*event), gf_common_mt_gf_timer_t);
+ if (!event) {
+ return NULL;
+ }
+ timespec_now(&event->at);
+ timespec_adjust_delta(&event->at, delta);
+ at = TS(event->at);
+ event->callbk = callbk;
+ event->data = data;
+ event->xl = THIS;
+ pthread_mutex_lock(&reg->lock);
+ {
+ list_for_each_entry_reverse(trav, &reg->active, list)
{
- trav = reg->active.prev;
- while (trav != &reg->active) {
- if (TS (trav->at) < at)
- break;
- trav = trav->prev;
- }
- event->prev = trav;
- event->next = event->prev->next;
- event->prev->next = event;
- event->next->prev = event;
+ if (TS(trav->at) < at)
+ break;
+ }
+ list_add(&event->list, &trav->list);
+ if (&trav->list == &reg->active) {
+ pthread_cond_signal(&reg->cond);
}
- pthread_mutex_unlock (&reg->lock);
- return event;
+ }
+ pthread_mutex_unlock(&reg->lock);
+ return event;
}
int32_t
-gf_timer_call_stale (gf_timer_registry_t *reg,
- gf_timer_t *event)
+gf_timer_call_cancel(glusterfs_ctx_t *ctx, gf_timer_t *event)
{
- if (reg == NULL || event == NULL)
- {
- gf_log_callingfn ("timer", GF_LOG_ERROR, "invalid argument");
- return 0;
- }
-
- event->next->prev = event->prev;
- event->prev->next = event->next;
- event->next = &reg->stale;
- event->prev = event->next->prev;
- event->next->prev = event;
- event->prev->next = event;
-
+ gf_timer_registry_t *reg = NULL;
+ gf_boolean_t fired = _gf_false;
+
+ if (ctx == NULL || event == NULL) {
+ gf_msg_callingfn("timer", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return -1;
+ }
+
+ if (ctx->cleanup_started) {
+ gf_msg_callingfn("timer", GF_LOG_INFO, 0, LG_MSG_CTX_CLEANUP_STARTED,
+ "ctx cleanup started");
+ return -1;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ reg = ctx->timer;
+ }
+ UNLOCK(&ctx->lock);
+
+ if (!reg) {
+ /* This can happen when cleanup may have just started and
+ * gf_timer_registry_destroy() sets ctx->timer to NULL.
+ * gf_timer_proc() takes care of cleaning up the events.
+ */
+ return -1;
+ }
+
+ pthread_mutex_lock(&reg->lock);
+ {
+ fired = event->fired;
+ if (fired)
+ goto unlock;
+ list_del(&event->list);
+ }
+unlock:
+ pthread_mutex_unlock(&reg->lock);
+
+ if (!fired) {
+ GF_FREE(event);
return 0;
+ }
+ return -1;
}
-int32_t
-gf_timer_call_cancel (glusterfs_ctx_t *ctx,
- gf_timer_t *event)
+static void *
+gf_timer_proc(void *data)
{
- gf_timer_registry_t *reg = NULL;
+ gf_timer_registry_t *reg = data;
+ gf_timer_t *event = NULL;
+ gf_timer_t *tmp = NULL;
+ xlator_t *old_THIS = NULL;
+
+ pthread_mutex_lock(&reg->lock);
+
+ while (!reg->fin) {
+ if (list_empty(&reg->active)) {
+ pthread_cond_wait(&reg->cond, &reg->lock);
+ } else {
+ struct timespec now;
+
+ timespec_now(&now);
+ event = list_first_entry(&reg->active, gf_timer_t, list);
+ if (TS(now) < TS(event->at)) {
+ now = event->at;
+ pthread_cond_timedwait(&reg->cond, &reg->lock, &now);
+ } else {
+ event->fired = _gf_true;
+ list_del_init(&event->list);
+
+ pthread_mutex_unlock(&reg->lock);
+
+ old_THIS = NULL;
+ if (event->xl) {
+ old_THIS = THIS;
+ THIS = event->xl;
+ }
+ event->callbk(event->data);
+ GF_FREE(event);
+ if (old_THIS) {
+ THIS = old_THIS;
+ }
- if (ctx == NULL || event == NULL)
- {
- gf_log_callingfn ("timer", GF_LOG_ERROR, "invalid argument");
- return 0;
+ pthread_mutex_lock(&reg->lock);
+ }
}
+ }
+
+ /* Do not call gf_timer_call_cancel(),
+ * it will lead to deadlock
+ */
+ list_for_each_entry_safe(event, tmp, &reg->active, list)
+ {
+ list_del(&event->list);
+ /* TODO Possible resource leak
+ * Before freeing the event, we need to call the respective
+ * event functions and free any resources.
+ * For example, In case of rpc_clnt_reconnect, we need to
+ * unref rpc object which was taken when added to timer
+ * wheel.
+ */
+ GF_FREE(event);
+ }
+
+ pthread_mutex_unlock(&reg->lock);
+
+ return NULL;
+}
- reg = gf_timer_registry_init (ctx);
- if (!reg) {
- gf_log ("timer", GF_LOG_ERROR, "!reg");
- GF_FREE (event);
- return 0;
+static gf_timer_registry_t *
+gf_timer_registry_init(glusterfs_ctx_t *ctx)
+{
+ gf_timer_registry_t *reg = NULL;
+ int ret = -1;
+ pthread_condattr_t attr;
+
+ LOCK(&ctx->lock);
+ {
+ reg = ctx->timer;
+ if (reg) {
+ UNLOCK(&ctx->lock);
+ goto out;
}
-
- pthread_mutex_lock (&reg->lock);
- {
- event->next->prev = event->prev;
- event->prev->next = event->next;
+ reg = GF_CALLOC(1, sizeof(*reg), gf_common_mt_gf_timer_registry_t);
+ if (!reg) {
+ UNLOCK(&ctx->lock);
+ goto out;
}
- pthread_mutex_unlock (&reg->lock);
+ ctx->timer = reg;
+ pthread_mutex_init(&reg->lock, NULL);
+ pthread_condattr_init(&attr);
+ pthread_condattr_setclock(&attr, CLOCK_MONOTONIC);
+ pthread_cond_init(&reg->cond, &attr);
+ INIT_LIST_HEAD(&reg->active);
+ }
+ UNLOCK(&ctx->lock);
+ ret = gf_thread_create(&reg->th, NULL, gf_timer_proc, reg, "timer");
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, ret, LG_MSG_PTHREAD_FAILED,
+ "Thread creation failed");
+ }
- GF_FREE (event);
- return 0;
+out:
+ return reg;
}
-void *
-gf_timer_proc (void *ctx)
+void
+gf_timer_registry_destroy(glusterfs_ctx_t *ctx)
{
- gf_timer_registry_t *reg = NULL;
- const struct timespec sleepts = {.tv_sec = 1, .tv_nsec = 0, };
+ pthread_t thr_id;
+ gf_timer_registry_t *reg = NULL;
- if (ctx == NULL)
- {
- gf_log_callingfn ("timer", GF_LOG_ERROR, "invalid argument");
- return NULL;
- }
+ if (ctx == NULL)
+ return;
- reg = gf_timer_registry_init (ctx);
- if (!reg) {
- gf_log ("timer", GF_LOG_ERROR, "!reg");
- return NULL;
- }
-
- while (!reg->fin) {
- unsigned long long now;
- struct timeval now_tv;
- gf_timer_t *event = NULL;
-
- gettimeofday (&now_tv, NULL);
- now = TS (now_tv);
- while (1) {
- unsigned long long at;
- char need_cbk = 0;
-
- pthread_mutex_lock (&reg->lock);
- {
- event = reg->active.next;
- at = TS (event->at);
- if (event != &reg->active && now >= at) {
- need_cbk = 1;
- gf_timer_call_stale (reg, event);
- }
- }
- pthread_mutex_unlock (&reg->lock);
- if (event->xl)
- THIS = event->xl;
- if (need_cbk)
- event->callbk (event->data);
-
- else
- break;
- }
- nanosleep (&sleepts, NULL);
- }
+ LOCK(&ctx->lock);
+ {
+ reg = ctx->timer;
+ ctx->timer = NULL;
+ }
+ UNLOCK(&ctx->lock);
- pthread_mutex_lock (&reg->lock);
- {
- while (reg->active.next != &reg->active) {
- gf_timer_call_cancel (ctx, reg->active.next);
- }
+ if (!reg)
+ return;
- while (reg->stale.next != &reg->stale) {
- gf_timer_call_cancel (ctx, reg->stale.next);
- }
- }
- pthread_mutex_unlock (&reg->lock);
- pthread_mutex_destroy (&reg->lock);
- GF_FREE (((glusterfs_ctx_t *)ctx)->timer);
+ thr_id = reg->th;
- return NULL;
-}
+ pthread_mutex_lock(&reg->lock);
-gf_timer_registry_t *
-gf_timer_registry_init (glusterfs_ctx_t *ctx)
-{
- if (ctx == NULL) {
- gf_log_callingfn ("timer", GF_LOG_ERROR, "invalid argument");
- return NULL;
- }
+ reg->fin = 1;
+ pthread_cond_signal(&reg->cond);
- if (!ctx->timer) {
- gf_timer_registry_t *reg = NULL;
+ pthread_mutex_unlock(&reg->lock);
- reg = GF_CALLOC (1, sizeof (*reg),
- gf_common_mt_gf_timer_registry_t);
- if (!reg)
- goto out;
+ pthread_join(thr_id, NULL);
- pthread_mutex_init (&reg->lock, NULL);
- reg->active.next = &reg->active;
- reg->active.prev = &reg->active;
- reg->stale.next = &reg->stale;
- reg->stale.prev = &reg->stale;
+ pthread_cond_destroy(&reg->cond);
+ pthread_mutex_destroy(&reg->lock);
- ctx->timer = reg;
- pthread_create (&reg->th, NULL, gf_timer_proc, ctx);
- }
-out:
- return ctx->timer;
+ GF_FREE(reg);
}
diff --git a/libglusterfs/src/timer.h b/libglusterfs/src/timer.h
deleted file mode 100644
index 2954f6aff5d..00000000000
--- a/libglusterfs/src/timer.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _TIMER_H
-#define _TIMER_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include <sys/time.h>
-#include <pthread.h>
-
-typedef void (*gf_timer_cbk_t) (void *);
-
-struct _gf_timer {
- struct _gf_timer *next, *prev;
- struct timeval at;
- gf_timer_cbk_t callbk;
- void *data;
- xlator_t *xl;
-};
-
-struct _gf_timer_registry {
- pthread_t th;
- char fin;
- struct _gf_timer stale;
- struct _gf_timer active;
- pthread_mutex_t lock;
-};
-
-typedef struct _gf_timer gf_timer_t;
-typedef struct _gf_timer_registry gf_timer_registry_t;
-
-gf_timer_t *
-gf_timer_call_after (glusterfs_ctx_t *ctx,
- struct timeval delta,
- gf_timer_cbk_t cbk,
- void *data);
-
-int32_t
-gf_timer_call_cancel (glusterfs_ctx_t *ctx,
- gf_timer_t *event);
-
-void *
-gf_timer_proc (void *data);
-
-gf_timer_registry_t *
-gf_timer_registry_init (glusterfs_ctx_t *ctx);
-
-#endif /* _TIMER_H */
diff --git a/libglusterfs/src/timespec.c b/libglusterfs/src/timespec.c
new file mode 100644
index 00000000000..96cef5c6f07
--- /dev/null
+++ b/libglusterfs/src/timespec.c
@@ -0,0 +1,129 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <time.h>
+#include <sys/time.h>
+#include <string.h>
+
+#if defined GF_DARWIN_HOST_OS
+#include <mach/mach_time.h>
+static mach_timebase_info_data_t gf_timebase;
+#endif
+
+#include "glusterfs/timespec.h"
+#include "glusterfs/libglusterfs-messages.h"
+#include "glusterfs/common-utils.h"
+
+void
+timespec_now(struct timespec *ts)
+{
+#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS || \
+ defined GF_BSD_HOST_OS
+ if (0 == clock_gettime(CLOCK_MONOTONIC, ts)) {
+ /* All good */
+ return;
+ }
+
+ /* Fall back, but there is hope in gettimeofday() syscall */
+ struct timeval tv;
+ if (0 == gettimeofday(&tv, NULL)) {
+ /* Again, all good */
+ TIMEVAL_TO_TIMESPEC(&tv, ts);
+ return;
+ }
+
+ /* If control hits here, there is surely a problem,
+ mainly because, as per man page too, these syscalls
+ shouldn't fail. Best way is to ABORT, because it is
+ not right */
+ GF_ABORT("gettimeofday() failed!!");
+
+#elif defined GF_DARWIN_HOST_OS
+ uint64_t time = mach_absolute_time();
+ static double scaling = 0.0;
+
+ if (mach_timebase_info(&gf_timebase) != KERN_SUCCESS) {
+ gf_timebase.numer = 1;
+ gf_timebase.denom = 1;
+ }
+ if (gf_timebase.denom == 0) {
+ gf_timebase.numer = 1;
+ gf_timebase.denom = 1;
+ }
+
+ scaling = (double)gf_timebase.numer / (double)gf_timebase.denom;
+ time *= scaling;
+
+ ts->tv_sec = (time * NANO);
+ ts->tv_nsec = (time - (ts->tv_sec * GIGA));
+
+#endif /* Platform verification */
+}
+
+void
+timespec_now_realtime(struct timespec *ts)
+{
+#if defined GF_LINUX_HOST_OS || defined GF_SOLARIS_HOST_OS || \
+ defined GF_BSD_HOST_OS
+ if (0 == clock_gettime(CLOCK_REALTIME, ts)) {
+ return;
+ }
+#endif
+
+ /* Fall back to gettimeofday()*/
+ struct timeval tv = {
+ 0,
+ };
+ if (0 == gettimeofday(&tv, NULL)) {
+ TIMEVAL_TO_TIMESPEC(&tv, ts);
+ return;
+ }
+
+ return;
+}
+
+void
+timespec_adjust_delta(struct timespec *ts, struct timespec delta)
+{
+ ts->tv_nsec = ((ts->tv_nsec + delta.tv_nsec) % 1000000000);
+ ts->tv_sec += ((ts->tv_nsec + delta.tv_nsec) / 1000000000);
+ ts->tv_sec += delta.tv_sec;
+}
+
+void
+timespec_sub(const struct timespec *begin, const struct timespec *end,
+ struct timespec *res)
+{
+ if (end->tv_nsec < begin->tv_nsec) {
+ res->tv_sec = end->tv_sec - begin->tv_sec - 1;
+ res->tv_nsec = end->tv_nsec + 1000000000 - begin->tv_nsec;
+ } else {
+ res->tv_sec = end->tv_sec - begin->tv_sec;
+ res->tv_nsec = end->tv_nsec - begin->tv_nsec;
+ }
+}
+
+int
+timespec_cmp(const struct timespec *lhs_ts, const struct timespec *rhs_ts)
+{
+ if (lhs_ts->tv_sec < rhs_ts->tv_sec) {
+ return -1;
+ } else if (lhs_ts->tv_sec > rhs_ts->tv_sec) {
+ return 1;
+ } else if (lhs_ts->tv_nsec < rhs_ts->tv_nsec) {
+ return -1;
+ } else if (lhs_ts->tv_nsec > rhs_ts->tv_nsec) {
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/libglusterfs/src/trie.c b/libglusterfs/src/trie.c
index b7c59784218..809550b864c 100644
--- a/libglusterfs/src/trie.c
+++ b/libglusterfs/src/trie.c
@@ -10,380 +10,357 @@
#include <stdio.h>
#include <string.h>
-#include <stdlib.h>
-#include <ctype.h>
-#include "common-utils.h"
-#include "trie.h"
+#include "glusterfs/common-utils.h"
+#include "glusterfs/trie.h"
#define DISTANCE_EDIT 1
-#define DISTANCE_INS 1
-#define DISTANCE_DEL 1
-
+#define DISTANCE_INS 1
+#define DISTANCE_DEL 1
struct trienode {
- char id;
- char eow;
- int depth;
- void *data;
- struct trie *trie;
- struct trienode *parent;
- struct trienode *subnodes[255];
+ char id;
+ char eow;
+ int depth;
+ void *data;
+ struct trie *trie;
+ struct trienode *parent;
+ struct trienode *subnodes[255];
};
struct trie {
- struct trienode root;
- int nodecnt;
- size_t len;
+ struct trienode root;
+ int nodecnt;
+ size_t len;
};
-
trie_t *
-trie_new ()
+trie_new()
{
- trie_t *trie = NULL;
+ trie_t *trie = NULL;
- trie = GF_CALLOC (1, sizeof (*trie), gf_common_mt_trie_trie);
- if (!trie)
- return NULL;
+ trie = GF_CALLOC(1, sizeof(*trie), gf_common_mt_trie_trie);
+ if (!trie)
+ return NULL;
- trie->root.trie = trie;
+ trie->root.trie = trie;
- return trie;
+ return trie;
}
-
static trienode_t *
-trie_subnode (trienode_t *node, int id)
+trie_subnode(trienode_t *node, int id)
{
- trienode_t *subnode = NULL;
-
- subnode = node->subnodes[id];
- if (!subnode) {
- subnode = GF_CALLOC (1, sizeof (*subnode),
- gf_common_mt_trie_node);
- if (!subnode)
- return NULL;
-
- subnode->id = id;
- subnode->depth = node->depth + 1;
- node->subnodes[id] = subnode;
- subnode->parent = node;
- subnode->trie = node->trie;
- node->trie->nodecnt++;
- }
-
- return subnode;
+ trienode_t *subnode = NULL;
+
+ subnode = node->subnodes[id];
+ if (!subnode) {
+ subnode = GF_CALLOC(1, sizeof(*subnode), gf_common_mt_trie_node);
+ if (!subnode)
+ return NULL;
+
+ subnode->id = id;
+ subnode->depth = node->depth + 1;
+ node->subnodes[id] = subnode;
+ subnode->parent = node;
+ subnode->trie = node->trie;
+ node->trie->nodecnt++;
+ }
+
+ return subnode;
}
-
int
-trie_add (trie_t *trie, const char *dword)
+trie_add(trie_t *trie, const char *dword)
{
- trienode_t *node = NULL;
- int i = 0;
- char id = 0;
- trienode_t *subnode = NULL;
+ trienode_t *node = NULL;
+ int i = 0;
+ char id = 0;
+ trienode_t *subnode = NULL;
- node = &trie->root;
+ node = &trie->root;
- for (i = 0; i < strlen (dword); i++) {
- id = dword[i];
+ for (i = 0; i < strlen(dword); i++) {
+ id = dword[i];
- subnode = trie_subnode (node, id);
- if (!subnode)
- return -1;
- node = subnode;
- }
+ subnode = trie_subnode(node, id);
+ if (!subnode)
+ return -1;
+ node = subnode;
+ }
- node->eow = 1;
+ node->eow = 1;
- return 0;
+ return 0;
}
static void
-trienode_free (trienode_t *node)
+trienode_free(trienode_t *node)
{
- trienode_t *trav = NULL;
- int i = 0;
+ trienode_t *trav = NULL;
+ int i = 0;
- for (i = 0; i < 255; i++) {
- trav = node->subnodes[i];
+ for (i = 0; i < 255; i++) {
+ trav = node->subnodes[i];
- if (trav)
- trienode_free (trav);
- }
+ if (trav)
+ trienode_free(trav);
+ }
- if (node->data)
- GF_FREE (node->data);
- GF_FREE (node);
+ GF_FREE(node->data);
+ GF_FREE(node);
}
-
void
-trie_destroy (trie_t *trie)
+trie_destroy(trie_t *trie)
{
- trienode_free ((trienode_t *)trie);
+ trienode_free((trienode_t *)trie);
}
-
void
-trie_destroy_bynode (trienode_t *node)
+trie_destroy_bynode(trienode_t *node)
{
- trie_destroy (node->trie);
+ trie_destroy(node->trie);
}
-
static int
-trienode_walk (trienode_t *node, int (*fn)(trienode_t *node, void *data),
- void *data, int eowonly)
+trienode_walk(trienode_t *node, int (*fn)(trienode_t *node, void *data),
+ void *data, int eowonly)
{
- trienode_t *trav = NULL;
- int i = 0;
- int cret = 0;
- int ret = 0;
-
- if (!eowonly || node->eow)
- ret = fn (node, data);
-
- if (ret)
- goto out;
-
- for (i = 0; i < 255; i++) {
- trav = node->subnodes[i];
- if (!trav)
- continue;
-
- cret = trienode_walk (trav, fn, data, eowonly);
- if (cret < 0) {
- ret = cret;
- goto out;
- }
- ret += cret;
+ trienode_t *trav = NULL;
+ int i = 0;
+ int cret = 0;
+ int ret = 0;
+
+ if (!eowonly || node->eow)
+ ret = fn(node, data);
+
+ if (ret)
+ goto out;
+
+ for (i = 0; i < 255; i++) {
+ trav = node->subnodes[i];
+ if (!trav)
+ continue;
+
+ cret = trienode_walk(trav, fn, data, eowonly);
+ if (cret < 0) {
+ ret = cret;
+ goto out;
}
+ ret += cret;
+ }
out:
- return ret;
+ return ret;
}
-
static int
-trie_walk (trie_t *trie, int (*fn)(trienode_t *node, void *data),
- void *data, int eowonly)
+trie_walk(trie_t *trie, int (*fn)(trienode_t *node, void *data), void *data,
+ int eowonly)
{
- return trienode_walk (&trie->root, fn, data, eowonly);
+ return trienode_walk(&trie->root, fn, data, eowonly);
}
-
static void
-print_node (trienode_t *node, char **buf)
+print_node(trienode_t *node, char **buf)
{
- if (!node->parent)
- return;
+ if (!node->parent)
+ return;
- if (node->parent) {
- print_node (node->parent, buf);
- *(*buf)++ = node->id;
- }
+ if (node->parent) {
+ print_node(node->parent, buf);
+ *(*buf)++ = node->id;
+ }
}
-
int
-trienode_get_word (trienode_t *node, char **bufp)
+trienode_get_word(trienode_t *node, char **bufp)
{
- char *buf = NULL;
+ char *buf = NULL;
- buf = GF_CALLOC (1, node->depth + 1, gf_common_mt_trie_buf);
- if (!buf)
- return -1;
- *bufp = buf;
+ buf = GF_CALLOC(1, node->depth + 1, gf_common_mt_trie_buf);
+ if (!buf)
+ return -1;
+ *bufp = buf;
- print_node (node, &buf);
+ print_node(node, &buf);
- return 0;
+ return 0;
}
-
static int
-calc_dist (trienode_t *node, void *data)
+calc_dist(trienode_t *node, void *data)
{
- const char *word = NULL;
- int i = 0;
- int *row = NULL;
- int *uprow = NULL;
- int distu = 0;
- int distl = 0;
- int distul = 0;
-
- word = data;
-
- node->data = GF_CALLOC (node->trie->len, sizeof (int),
- gf_common_mt_trie_data);
- if (!node->data)
- return -1;
- row = node->data;
-
- if (!node->parent) {
- for (i = 0; i < node->trie->len; i++)
- row[i] = i+1;
-
- return 0;
- }
+ const char *word = NULL;
+ int i = 0;
+ int *row = NULL;
+ int *uprow = NULL;
+ int distu = 0;
+ int distl = 0;
+ int distul = 0;
+
+ word = data;
+
+ node->data = GF_CALLOC(node->trie->len, sizeof(int),
+ gf_common_mt_trie_data);
+ if (!node->data)
+ return -1;
+ row = node->data;
+
+ if (!node->parent) {
+ for (i = 0; i < node->trie->len; i++)
+ row[i] = i + 1;
- uprow = node->parent->data;
+ return 0;
+ }
- distu = node->depth; /* up node */
- distul = node->parent->depth; /* up-left node */
+ uprow = node->parent->data;
- for (i = 0; i < node->trie->len; i++) {
- distl = uprow[i]; /* left node */
+ distu = node->depth; /* up node */
+ distul = node->parent->depth; /* up-left node */
- if (word[i] == node->id)
- row[i] = distul;
- else
- row[i] = min ((distul + DISTANCE_EDIT),
- min ((distu + DISTANCE_DEL),
- (distl + DISTANCE_INS)));
+ for (i = 0; i < node->trie->len; i++) {
+ distl = uprow[i]; /* left node */
- distu = row[i];
- distul = distl;
- }
+ if (word[i] == node->id)
+ row[i] = distul;
+ else
+ row[i] = min((distul + DISTANCE_EDIT),
+ min((distu + DISTANCE_DEL), (distl + DISTANCE_INS)));
- return 0;
-}
+ distu = row[i];
+ distul = distl;
+ }
+ return 0;
+}
int
-trienode_get_dist (trienode_t *node)
+trienode_get_dist(trienode_t *node)
{
- int *row = NULL;
+ int *row = NULL;
- row = node->data;
+ row = node->data;
- return row[node->trie->len - 1];
+ return row[node->trie->len - 1];
}
-
struct trienodevec_w {
- struct trienodevec *vec;
- const char *word;
+ struct trienodevec *vec;
+ const char *word;
};
-
static void
-trienodevec_clear (struct trienodevec *nodevec)
+trienodevec_clear(struct trienodevec *nodevec)
{
- memset(nodevec->nodes, 0, sizeof (*nodevec->nodes) * nodevec->cnt);
+ memset(nodevec->nodes, 0, sizeof(*nodevec->nodes) * nodevec->cnt);
}
-
static int
-collect_closest (trienode_t *node, void *data)
+collect_closest(trienode_t *node, void *data)
{
- struct trienodevec_w *nodevec_w = NULL;
- struct trienodevec *nodevec = NULL;
- int dist = 0;
- int i = 0;
-
- nodevec_w = data;
- nodevec = nodevec_w->vec;
-
- if (calc_dist (node, (void *)nodevec_w->word))
- return -1;
-
- if (!node->eow || !nodevec->cnt)
- return 0;
-
- dist = trienode_get_dist (node);
-
- /*
- * I thought that when descending further after some dictionary word dw,
- * if we see that child's distance is bigger than it was for dw, then we
- * can prune this branch, as it can contain only worse nodes.
- *
- * This conjecture fails, see eg:
- *
- * d("AB", "B") = 1;
- * d("AB", "BA") = 2;
- * d("AB", "BAB") = 1;
- *
- * -- if both "B" and "BAB" are in dict., then pruning at "BA" * would
- * miss "BAB".
- *
- * (example courtesy of Richard Bann <richardbann at gmail.com>)
-
- if (node->parent->eow && dist > trienode_get_dist (node->parent))
- return 1;
-
- */
-
- if (nodevec->nodes[0] &&
- dist < trienode_get_dist (nodevec->nodes[0])) {
- /* improving over the findings so far */
- trienodevec_clear (nodevec);
- nodevec->nodes[0] = node;
- } else if (!nodevec->nodes[0] ||
- dist == trienode_get_dist (nodevec->nodes[0])) {
- /* as good as the best so far, add if there is free space */
- for (i = 0; i < nodevec->cnt; i++) {
- if (!nodevec->nodes[i]) {
- nodevec->nodes[i] = node;
- break;
- }
- }
- }
+ struct trienodevec_w *nodevec_w = NULL;
+ struct trienodevec *nodevec = NULL;
+ int dist = 0;
+ int i = 0;
+
+ nodevec_w = data;
+ nodevec = nodevec_w->vec;
+ if (calc_dist(node, (void *)nodevec_w->word))
+ return -1;
+
+ if (!node->eow || !nodevec->cnt)
return 0;
-}
+ dist = trienode_get_dist(node);
+
+ /*
+ * I thought that when descending further after some dictionary word dw,
+ * if we see that child's distance is bigger than it was for dw, then we
+ * can prune this branch, as it can contain only worse nodes.
+ *
+ * This conjecture fails, see eg:
+ *
+ * d("AB", "B") = 1;
+ * d("AB", "BA") = 2;
+ * d("AB", "BAB") = 1;
+ *
+ * -- if both "B" and "BAB" are in dict., then pruning at "BA" * would
+ * miss "BAB".
+ *
+ * (example courtesy of Richard Bann <richardbann at gmail.com>)
+
+ if (node->parent->eow && dist > trienode_get_dist (node->parent))
+ return 1;
+
+ */
+
+ if (nodevec->nodes[0] && dist < trienode_get_dist(nodevec->nodes[0])) {
+ /* improving over the findings so far */
+ trienodevec_clear(nodevec);
+ nodevec->nodes[0] = node;
+ } else if (!nodevec->nodes[0] ||
+ dist == trienode_get_dist(nodevec->nodes[0])) {
+ /* as good as the best so far, add if there is free space */
+ for (i = 0; i < nodevec->cnt; i++) {
+ if (!nodevec->nodes[i]) {
+ nodevec->nodes[i] = node;
+ break;
+ }
+ }
+ }
+
+ return 0;
+}
int
-trie_measure (trie_t *trie, const char *word, trienode_t **nodes,
- int nodecnt)
+trie_measure(trie_t *trie, const char *word, trienode_t **nodes, int nodecnt)
{
- struct trienodevec nodevec = {0,};
+ struct trienodevec nodevec = {
+ 0,
+ };
- nodevec.nodes = nodes;
- nodevec.cnt = nodecnt;
+ nodevec.nodes = nodes;
+ nodevec.cnt = nodecnt;
- return trie_measure_vec (trie, word, &nodevec);
+ return trie_measure_vec(trie, word, &nodevec);
}
-
int
-trie_measure_vec (trie_t *trie, const char *word, struct trienodevec *nodevec)
+trie_measure_vec(trie_t *trie, const char *word, struct trienodevec *nodevec)
{
- struct trienodevec_w nodevec_w = {0,};
- int ret = 0;
+ struct trienodevec_w nodevec_w = {
+ 0,
+ };
+ int ret = 0;
- trie->len = strlen (word);
+ trie->len = strlen(word);
- trienodevec_clear (nodevec);
- nodevec_w.vec = nodevec;
- nodevec_w.word = word;
+ trienodevec_clear(nodevec);
+ nodevec_w.vec = nodevec;
+ nodevec_w.word = word;
- ret = trie_walk (trie, collect_closest, &nodevec_w, 0);
- if (ret > 0)
- ret = 0;
+ ret = trie_walk(trie, collect_closest, &nodevec_w, 0);
+ if (ret > 0)
+ ret = 0;
- return ret;
+ return ret;
}
-
static int
-trienode_reset (trienode_t *node, void *data)
+trienode_reset(trienode_t *node, void *data)
{
- if (node->data)
- GF_FREE (node->data);
+ GF_FREE(node->data);
- return 0;
+ return 0;
}
-
void
-trie_reset_search (trie_t *trie)
+trie_reset_search(trie_t *trie)
{
- trie->len = 0;
+ trie->len = 0;
- trie_walk (trie, trienode_reset, NULL, 0);
+ trie_walk(trie, trienode_reset, NULL, 0);
}
diff --git a/libglusterfs/src/trie.h b/libglusterfs/src/trie.h
deleted file mode 100644
index 0356e66210e..00000000000
--- a/libglusterfs/src/trie.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _TRIE_H_
-#define _TRIE_H_
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-struct trienode;
-typedef struct trienode trienode_t;
-
-struct trie;
-typedef struct trie trie_t;
-
-struct trienodevec {
- trienode_t **nodes;
- unsigned cnt;
-};
-
-
-trie_t *trie_new ();
-
-int trie_add (trie_t *trie, const char *word);
-
-void trie_destroy (trie_t *trie);
-
-void trie_destroy_bynode (trienode_t *node);
-
-int trie_measure (trie_t *trie, const char *word, trienode_t **nodes,
- int nodecnt);
-
-int trie_measure_vec (trie_t *trie, const char *word,
- struct trienodevec *nodevec);
-
-void trie_reset_search (trie_t *trie);
-
-int trienode_get_dist (trienode_t *node);
-
-int trienode_get_word (trienode_t *node, char **buf);
-
-#endif
diff --git a/libglusterfs/src/unittest/global_mock.c b/libglusterfs/src/unittest/global_mock.c
new file mode 100644
index 00000000000..2fcf96dbad8
--- /dev/null
+++ b/libglusterfs/src/unittest/global_mock.c
@@ -0,0 +1,25 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/logging.h"
+#include "glusterfs/xlator.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <inttypes.h>
+
+#include <cmocka.h>
+
+xlator_t **
+__glusterfs_this_location()
+{
+ return ((xlator_t **)(uintptr_t)mock());
+}
diff --git a/libglusterfs/src/unittest/log_mock.c b/libglusterfs/src/unittest/log_mock.c
new file mode 100644
index 00000000000..60f6530726b
--- /dev/null
+++ b/libglusterfs/src/unittest/log_mock.c
@@ -0,0 +1,52 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/logging.h"
+#include "glusterfs/xlator.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <inttypes.h>
+
+#include <cmocka.h>
+
+int
+_gf_log(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, const char *fmt, ...)
+{
+ return 0;
+}
+
+int
+_gf_log_callingfn(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, const char *fmt, ...)
+{
+ return 0;
+}
+
+int
+_gf_log_nomem(const char *domain, const char *file, const char *function,
+ int line, gf_loglevel_t level, size_t size)
+{
+ return 0;
+}
+
+int
+_gf_msg_nomem(const char *domain, const char *file, const char *function,
+ int line, gf_loglevel_t level, size_t size)
+{
+ return 0;
+}
+
+void
+gf_log_globals_init(void *data, gf_loglevel_t level)
+{
+}
diff --git a/libglusterfs/src/unittest/mem_pool_unittest.c b/libglusterfs/src/unittest/mem_pool_unittest.c
new file mode 100644
index 00000000000..9ca324329ba
--- /dev/null
+++ b/libglusterfs/src/unittest/mem_pool_unittest.c
@@ -0,0 +1,483 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterfs/mem-pool.h"
+#include "glusterfs/logging.h"
+#include "glusterfs/xlator.h"
+
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <inttypes.h>
+#include <string.h>
+#include <cmocka_pbc.h>
+#include <cmocka.h>
+
+#ifndef assert_ptr_equal
+#define assert_ptr_equal(a, b) \
+ _assert_int_equal(cast_ptr_to_largest_integral_type(a), \
+ cast_ptr_to_largest_integral_type(b), __FILE__, \
+ __LINE__)
+#endif
+
+/*
+ * memory header for gf_mem_set_acct_info
+ */
+typedef struct __attribute__((packed)) {
+ uint32_t type;
+ size_t size;
+ xlator_t *xl;
+ uint32_t header_magic;
+ uint8_t pad[8];
+} mem_header_t;
+
+/*
+ * Prototypes to private functions
+ */
+int
+gf_mem_set_acct_info(xlator_t *xl, char **alloc_ptr, size_t size, uint32_t type,
+ const char *typestr);
+
+/*
+ * Helper functions
+ */
+static xlator_t *
+helper_xlator_init(uint32_t num_types)
+{
+ xlator_t *xl;
+ int i, ret;
+
+ REQUIRE(num_types > 0);
+
+ xl = test_calloc(1, sizeof(xlator_t));
+ assert_non_null(xl);
+ xl->mem_acct->num_types = num_types;
+ xl->mem_acct = test_calloc(sizeof(struct mem_acct) +
+ sizeof(struct mem_acct_rec) * num_types);
+ assert_non_null(xl->mem_acct);
+
+ xl->ctx = test_calloc(1, sizeof(glusterfs_ctx_t));
+ assert_non_null(xl->ctx);
+
+ for (i = 0; i < num_types; i++) {
+ ret = LOCK_INIT(&(xl->mem_acct->rec[i].lock));
+ assert_int_equal(ret, 0);
+ }
+
+ ENSURE(num_types == xl->mem_acct->num_types);
+ ENSURE(NULL != xl);
+
+ return xl;
+}
+
+static int
+helper_xlator_destroy(xlator_t *xl)
+{
+ int i, ret;
+
+ for (i = 0; i < xl->mem_acct->num_types; i++) {
+ ret = LOCK_DESTROY(&(xl->mem_acct->rec[i].lock));
+ assert_int_equal(ret, 0);
+ }
+
+ free(xl->mem_acct->rec);
+ free(xl->ctx);
+ free(xl);
+ return 0;
+}
+
+static void
+helper_check_memory_headers(char *mem, xlator_t *xl, size_t size, uint32_t type)
+{
+ mem_header_t *p;
+
+ p = (mem_header_t *)mem, assert_int_equal(p->type, type);
+ assert_int_equal(p->size, size);
+ assert_true(p->xl == xl);
+ assert_int_equal(p->header_magic, GF_MEM_HEADER_MAGIC);
+ assert_true(*(uint32_t *)(mem + sizeof(mem_header_t) + size) ==
+ GF_MEM_TRAILER_MAGIC);
+}
+
+/*
+ * Tests
+ */
+static void
+test_gf_mem_acct_enable_set(void **state)
+{
+ (void)state;
+ glusterfs_ctx_t test_ctx;
+
+ expect_assert_failure(gf_mem_acct_enable_set(NULL));
+
+ memset(&test_ctx, 0, sizeof(test_ctx));
+ assert_true(NULL == test_ctx.process_uuid);
+ gf_mem_acct_enable_set((void *)&test_ctx);
+ assert_true(1 == test_ctx.mem_acct_enable);
+ assert_true(NULL == test_ctx.process_uuid);
+}
+
+static void
+test_gf_mem_set_acct_info_asserts(void **state)
+{
+ xlator_t *xl;
+ xlator_t xltest;
+ char *alloc_ptr;
+ size_t size;
+ uint32_t type;
+
+ memset(&xltest, 0, sizeof(xlator_t));
+ xl = (xlator_t *)0xBADD;
+ alloc_ptr = (char *)0xBADD;
+ size = 8196;
+ type = 0;
+
+ // Check xl is NULL
+ expect_assert_failure(
+ gf_mem_set_acct_info(NULL, &alloc_ptr, size, type, ""));
+ // Check xl->mem_acct = NULL
+ expect_assert_failure(
+ gf_mem_set_acct_info(&xltest, &alloc_ptr, 0, type, ""));
+ // Check type <= xl->mem_acct->num_types
+ type = 100;
+ expect_assert_failure(
+ gf_mem_set_acct_info(&xltest, &alloc_ptr, 0, type, ""));
+ // Check alloc is NULL
+ assert_int_equal(-1, gf_mem_set_acct_info(&xltest, NULL, size, type, ""));
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+
+ // Test number of types
+ type = 100;
+ assert_true(NULL != xl->mem_acct);
+ assert_true(type > xl->mem_acct->num_types);
+ expect_assert_failure(gf_mem_set_acct_info(xl, &alloc_ptr, size, type, ""));
+
+ helper_xlator_destroy(xl);
+}
+
+static void
+test_gf_mem_set_acct_info_memory(void **state)
+{
+ xlator_t *xl;
+ char *alloc_ptr;
+ char *temp_ptr;
+ size_t size;
+ uint32_t type;
+ const char *typestr = "TEST";
+
+ size = 8196;
+ type = 9;
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+ assert_null(xl->mem_acct->rec[type].typestr);
+
+ // Test allocation
+ temp_ptr = test_calloc(1, size + GF_MEM_HEADER_SIZE + GF_MEM_TRAILER_SIZE);
+ assert_non_null(temp_ptr);
+ alloc_ptr = temp_ptr;
+ gf_mem_set_acct_info(xl, &alloc_ptr, size, type, typestr);
+
+ // Check values
+ assert_ptr_equal(typestr, xl->mem_acct->rec[type].typestr);
+ assert_int_equal(xl->mem_acct->rec[type].size, size);
+ assert_int_equal(xl->mem_acct->rec[type].num_allocs, 1);
+ assert_int_equal(xl->mem_acct->rec[type].total_allocs, 1);
+ assert_int_equal(xl->mem_acct->rec[type].max_size, size);
+ assert_int_equal(xl->mem_acct->rec[type].max_num_allocs, 1);
+
+ // Check memory
+ helper_check_memory_headers(temp_ptr, xl, size, type);
+
+ // Check that alloc_ptr has been moved correctly
+ // by gf_mem_set_acct_info
+ {
+ mem_header_t *p;
+
+ p = (mem_header_t *)temp_ptr;
+ p++;
+ p->type = 1234;
+ assert_int_equal(*(uint32_t *)alloc_ptr, p->type);
+ }
+
+ free(temp_ptr);
+ helper_xlator_destroy(xl);
+}
+
+static void
+test_gf_calloc_default_calloc(void **state)
+{
+ xlator_t *xl;
+ void *mem;
+ size_t size;
+ uint32_t type;
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+ assert_int_equal(xl->ctx->mem_acct_enable, 0);
+ will_return(__glusterfs_this_location, &xl);
+
+ // Call __gf_calloc
+ size = 1024;
+ type = 3;
+ mem = __gf_calloc(1, size, type, "3");
+ assert_non_null(mem);
+ memset(mem, 0x5A, size);
+
+ // Check xl did not change
+ assert_int_equal(xl->mem_acct->rec[type].size, 0);
+ assert_int_equal(xl->mem_acct->rec[type].num_allocs, 0);
+ assert_int_equal(xl->mem_acct->rec[type].total_allocs, 0);
+ assert_int_equal(xl->mem_acct->rec[type].max_size, 0);
+ assert_int_equal(xl->mem_acct->rec[type].max_num_allocs, 0);
+
+ free(mem);
+ helper_xlator_destroy(xl);
+}
+
+static void
+test_gf_calloc_mem_acct_enabled(void **state)
+{
+ xlator_t *xl;
+ void *mem;
+ size_t size;
+ uint32_t type;
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+ assert_int_equal(xl->ctx->mem_acct_enable, 0);
+ xl->ctx->mem_acct_enable = 1;
+
+ // For line mem-pool.c:115 and mem-pool:118
+ will_return_always(__glusterfs_this_location, &xl);
+
+ // Call __gf_calloc
+ size = 1024;
+ type = 3;
+ mem = __gf_calloc(1, size, type, "3");
+ assert_non_null(mem);
+ memset(mem, 0x5A, size);
+
+ // Check xl values
+ assert_int_equal(xl->mem_acct->rec[type].size, size);
+ assert_int_equal(xl->mem_acct->rec[type].num_allocs, 1);
+ assert_int_equal(xl->mem_acct->rec[type].total_allocs, 1);
+ assert_int_equal(xl->mem_acct->rec[type].max_size, size);
+ assert_int_equal(xl->mem_acct->rec[type].max_num_allocs, 1);
+
+ // Check memory
+ helper_check_memory_headers(mem - sizeof(mem_header_t), xl, size, type);
+ free(mem - sizeof(mem_header_t));
+ helper_xlator_destroy(xl);
+}
+
+static void
+test_gf_malloc_default_malloc(void **state)
+{
+ xlator_t *xl;
+ void *mem;
+ size_t size;
+ uint32_t type;
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+ assert_int_equal(xl->ctx->mem_acct_enable, 0);
+ will_return(__glusterfs_this_location, &xl);
+
+ // Call __gf_malloc
+ size = 1024;
+ type = 3;
+ mem = __gf_malloc(size, type, "3");
+ assert_non_null(mem);
+ memset(mem, 0x5A, size);
+
+ // Check xl did not change
+ assert_int_equal(xl->mem_acct->rec[type].size, 0);
+ assert_int_equal(xl->mem_acct->rec[type].num_allocs, 0);
+ assert_int_equal(xl->mem_acct->rec[type].total_allocs, 0);
+ assert_int_equal(xl->mem_acct->rec[type].max_size, 0);
+ assert_int_equal(xl->mem_acct->rec[type].max_num_allocs, 0);
+
+ free(mem);
+ helper_xlator_destroy(xl);
+}
+
+static void
+test_gf_malloc_mem_acct_enabled(void **state)
+{
+ xlator_t *xl;
+ void *mem;
+ size_t size;
+ uint32_t type;
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+ assert_int_equal(xl->ctx->mem_acct_enable, 0);
+ xl->ctx->mem_acct_enable = 1;
+
+ // For line mem-pool.c:115 and mem-pool:118
+ will_return_always(__glusterfs_this_location, &xl);
+
+ // Call __gf_malloc
+ size = 1024;
+ type = 3;
+ mem = __gf_malloc(size, type, "3");
+ assert_non_null(mem);
+ memset(mem, 0x5A, size);
+
+ // Check xl values
+ assert_int_equal(xl->mem_acct->rec[type].size, size);
+ assert_int_equal(xl->mem_acct->rec[type].num_allocs, 1);
+ assert_int_equal(xl->mem_acct->rec[type].total_allocs, 1);
+ assert_int_equal(xl->mem_acct->rec[type].max_size, size);
+ assert_int_equal(xl->mem_acct->rec[type].max_num_allocs, 1);
+
+ // Check memory
+ helper_check_memory_headers(mem - sizeof(mem_header_t), xl, size, type);
+ free(mem - sizeof(mem_header_t));
+ helper_xlator_destroy(xl);
+}
+
+static void
+test_gf_realloc_default_realloc(void **state)
+{
+ xlator_t *xl;
+ void *mem;
+ size_t size;
+ uint32_t type;
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+ assert_int_equal(xl->ctx->mem_acct_enable, 0);
+ will_return_always(__glusterfs_this_location, &xl);
+
+ // Call __gf_malloc then realloc
+ size = 10;
+ type = 3;
+ mem = __gf_malloc(size, type, "3");
+ assert_non_null(mem);
+ memset(mem, 0xA5, size);
+
+ size = 1024;
+ mem = __gf_realloc(mem, size);
+ assert_non_null(mem);
+ memset(mem, 0x5A, size);
+
+ // Check xl did not change
+ assert_int_equal(xl->mem_acct->rec[type].size, 0);
+ assert_int_equal(xl->mem_acct->rec[type].num_allocs, 0);
+ assert_int_equal(xl->mem_acct->rec[type].total_allocs, 0);
+ assert_int_equal(xl->mem_acct->rec[type].max_size, 0);
+ assert_int_equal(xl->mem_acct->rec[type].max_num_allocs, 0);
+
+ free(mem);
+ helper_xlator_destroy(xl);
+}
+
+static void
+test_gf_realloc_mem_acct_enabled(void **state)
+{
+ xlator_t *xl;
+ void *mem;
+ size_t size;
+ uint32_t type;
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+ assert_int_equal(xl->ctx->mem_acct_enable, 0);
+ xl->ctx->mem_acct_enable = 1;
+
+ // For line mem-pool.c:115 and mem-pool:118
+ will_return_always(__glusterfs_this_location, &xl);
+
+ // Call __gf_malloc then realloc
+ size = 1024;
+ type = 3;
+ mem = __gf_malloc(size, type, "3");
+ assert_non_null(mem);
+ memset(mem, 0xA5, size);
+
+ size = 2048;
+ mem = __gf_realloc(mem, size);
+ assert_non_null(mem);
+ memset(mem, 0x5A, size);
+
+ // Check xl values
+ //
+ // :TODO: This is really weird. I would have expected
+ // xl to only have a size equal to that of the realloc
+ // not to the realloc + the malloc.
+ // Is this a bug?
+ //
+ assert_int_equal(xl->mem_acct->rec[type].size, size + 1024);
+ assert_int_equal(xl->mem_acct->rec[type].num_allocs, 2);
+ assert_int_equal(xl->mem_acct->rec[type].total_allocs, 2);
+ assert_int_equal(xl->mem_acct->rec[type].max_size, size + 1024);
+ assert_int_equal(xl->mem_acct->rec[type].max_num_allocs, 2);
+
+ // Check memory
+ helper_check_memory_headers(mem - sizeof(mem_header_t), xl, size, type);
+ free(mem - sizeof(mem_header_t));
+ helper_xlator_destroy(xl);
+}
+
+static void
+test_gf_realloc_ptr(void **state)
+{
+ xlator_t *xl;
+ void *mem;
+ size_t size;
+
+ // Initialize xl
+ xl = helper_xlator_init(10);
+ assert_int_equal(xl->ctx->mem_acct_enable, 0);
+
+ // For line mem-pool.c:115 and mem-pool:118
+ will_return_always(__glusterfs_this_location, &xl);
+
+ // Tests according to the manpage for realloc
+
+ // Like a malloc
+ size = 1024;
+ mem = __gf_realloc(NULL, size);
+ assert_non_null(mem);
+ memset(mem, 0xA5, size);
+
+ // Like a free
+ mem = __gf_realloc(mem, 0);
+ assert_null(mem);
+
+ // Now enable xl context
+ xl->ctx->mem_acct_enable = 1;
+ expect_assert_failure(__gf_realloc(NULL, size));
+
+ helper_xlator_destroy(xl);
+}
+
+int
+main(void)
+{
+ const struct CMUnitTest libglusterfs_mem_pool_tests[] = {
+ cmocka_unit_test(test_gf_mem_acct_enable_set),
+ cmocka_unit_test(test_gf_mem_set_acct_info_asserts),
+ cmocka_unit_test(test_gf_mem_set_acct_info_memory),
+ cmocka_unit_test(test_gf_calloc_default_calloc),
+ cmocka_unit_test(test_gf_calloc_mem_acct_enabled),
+ cmocka_unit_test(test_gf_malloc_default_malloc),
+ cmocka_unit_test(test_gf_malloc_mem_acct_enabled),
+ cmocka_unit_test(test_gf_realloc_default_realloc),
+ cmocka_unit_test(test_gf_realloc_mem_acct_enabled),
+ cmocka_unit_test(test_gf_realloc_ptr),
+ };
+
+ return cmocka_run_group_tests(libglusterfs_mem_pool_tests, NULL, NULL);
+}
diff --git a/libglusterfs/src/unittest/unittest.h b/libglusterfs/src/unittest/unittest.h
new file mode 100644
index 00000000000..58b3e28bb6e
--- /dev/null
+++ b/libglusterfs/src/unittest/unittest.h
@@ -0,0 +1,47 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_UNITTEST_H_
+#define _GF_UNITTEST_H_
+
+#ifdef UNIT_TESTING
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka_pbc.h>
+#include <cmocka.h>
+
+extern void
+mock_assert(const int result, const char *const expression,
+ const char *const file, const int line);
+
+// Change GF_CALLOC and GF_FREE to use
+// cmocka memory allocation versions
+#ifdef UNIT_TESTING
+#undef GF_CALLOC
+#define GF_CALLOC(n, s, t) test_calloc(n, s)
+#undef GF_FREE
+#define GF_FREE test_free
+
+/* Catch intended assert()'s while unit-testing */
+extern void
+mock_assert(const int result, const char *const expression,
+ const char *const file, const int line);
+
+#undef assert
+#define assert(expression) \
+ mock_assert((int)(expression), #expression, __FILE__, __LINE__);
+#endif
+#else
+#define REQUIRE(p) /**/
+#define ENSURE(p) /**/
+#endif
+
+#endif /* _GF_UNITTEST */
diff --git a/libglusterfs/src/xlator.c b/libglusterfs/src/xlator.c
index 7c081fa5568..9a2582d45d5 100644
--- a/libglusterfs/src/xlator.c
+++ b/libglusterfs/src/xlator.c
@@ -8,772 +8,1586 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
+#include "glusterfs/xlator.h"
#include <dlfcn.h>
#include <netdb.h>
#include <fnmatch.h>
-#include "defaults.h"
+#include "glusterfs/defaults.h"
+#include "glusterfs/libglusterfs-messages.h"
-#define SET_DEFAULT_FOP(fn) do { \
- if (!xl->fops->fn) \
- xl->fops->fn = default_##fn; \
- } while (0)
+#define SET_DEFAULT_FOP(fn) \
+ do { \
+ if (!xl->fops->fn) \
+ xl->fops->fn = default_##fn; \
+ if (!xl->pass_through_fops->fn) \
+ xl->pass_through_fops->fn = default_##fn; \
+ } while (0)
-#define SET_DEFAULT_CBK(fn) do { \
- if (!xl->cbks->fn) \
- xl->cbks->fn = default_##fn; \
- } while (0)
+#define SET_DEFAULT_CBK(fn) \
+ do { \
+ if (!xl->cbks->fn) \
+ xl->cbks->fn = default_##fn; \
+ } while (0)
+pthread_mutex_t xlator_init_mutex = PTHREAD_MUTEX_INITIALIZER;
-static void
-fill_defaults (xlator_t *xl)
+void
+xlator_init_lock(void)
{
- if (xl == NULL) {
- gf_log_callingfn ("xlator", GF_LOG_WARNING, "invalid argument");
- return;
- }
+ (void)pthread_mutex_lock(&xlator_init_mutex);
+}
- SET_DEFAULT_FOP (create);
- SET_DEFAULT_FOP (open);
- SET_DEFAULT_FOP (stat);
- SET_DEFAULT_FOP (readlink);
- SET_DEFAULT_FOP (mknod);
- SET_DEFAULT_FOP (mkdir);
- SET_DEFAULT_FOP (unlink);
- SET_DEFAULT_FOP (rmdir);
- SET_DEFAULT_FOP (symlink);
- SET_DEFAULT_FOP (rename);
- SET_DEFAULT_FOP (link);
- SET_DEFAULT_FOP (truncate);
- SET_DEFAULT_FOP (readv);
- SET_DEFAULT_FOP (writev);
- SET_DEFAULT_FOP (statfs);
- SET_DEFAULT_FOP (flush);
- SET_DEFAULT_FOP (fsync);
- SET_DEFAULT_FOP (setxattr);
- SET_DEFAULT_FOP (getxattr);
- SET_DEFAULT_FOP (fsetxattr);
- SET_DEFAULT_FOP (fgetxattr);
- SET_DEFAULT_FOP (removexattr);
- SET_DEFAULT_FOP (fremovexattr);
- SET_DEFAULT_FOP (opendir);
- SET_DEFAULT_FOP (readdir);
- SET_DEFAULT_FOP (readdirp);
- SET_DEFAULT_FOP (fsyncdir);
- SET_DEFAULT_FOP (access);
- SET_DEFAULT_FOP (ftruncate);
- SET_DEFAULT_FOP (fstat);
- SET_DEFAULT_FOP (lk);
- SET_DEFAULT_FOP (inodelk);
- SET_DEFAULT_FOP (finodelk);
- SET_DEFAULT_FOP (entrylk);
- SET_DEFAULT_FOP (fentrylk);
- SET_DEFAULT_FOP (lookup);
- SET_DEFAULT_FOP (rchecksum);
- SET_DEFAULT_FOP (xattrop);
- SET_DEFAULT_FOP (fxattrop);
- SET_DEFAULT_FOP (setattr);
- SET_DEFAULT_FOP (fsetattr);
-
- SET_DEFAULT_FOP (getspec);
-
- SET_DEFAULT_CBK (release);
- SET_DEFAULT_CBK (releasedir);
- SET_DEFAULT_CBK (forget);
-
- if (!xl->notify)
- xl->notify = default_notify;
-
- if (!xl->mem_acct_init)
- xl->mem_acct_init = default_mem_acct_init;
+void
+xlator_init_unlock(void)
+{
+ (void)pthread_mutex_unlock(&xlator_init_mutex);
+}
- return;
+static struct xlator_cbks default_cbks = {};
+struct volume_options default_options[] = {
+ {
+ .key = {"log-level"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"generic"},
+ .value = {"DEBUG", "WARNING", "ERROR", "INFO", "CRITICAL", "NONE",
+ "TRACE"},
+ .description = "Option to set log-level of given translator",
+ },
+ {
+ .key = {NULL},
+ },
+};
+
+/* Handle the common options in each translator */
+void
+handle_default_options(xlator_t *xl, dict_t *options)
+{
+ int ret;
+ char *value;
+
+ /* log-level */
+ ret = dict_get_str(options, "log-level", &value);
+ if (!ret) {
+ int log_level = glusterd_check_log_level(value);
+ if (log_level != -1) {
+ xl->loglevel = log_level;
+ }
+ }
}
+static void
+fill_defaults(xlator_t *xl)
+{
+ if (xl == NULL) {
+ gf_msg_callingfn("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_INVALID_ARG,
+ "invalid argument");
+ return;
+ }
+
+ if (!xl->pass_through_fops)
+ xl->pass_through_fops = default_fops;
+
+ SET_DEFAULT_FOP(create);
+ SET_DEFAULT_FOP(open);
+ SET_DEFAULT_FOP(stat);
+ SET_DEFAULT_FOP(readlink);
+ SET_DEFAULT_FOP(mknod);
+ SET_DEFAULT_FOP(mkdir);
+ SET_DEFAULT_FOP(unlink);
+ SET_DEFAULT_FOP(rmdir);
+ SET_DEFAULT_FOP(symlink);
+ SET_DEFAULT_FOP(rename);
+ SET_DEFAULT_FOP(link);
+ SET_DEFAULT_FOP(truncate);
+ SET_DEFAULT_FOP(readv);
+ SET_DEFAULT_FOP(writev);
+ SET_DEFAULT_FOP(statfs);
+ SET_DEFAULT_FOP(flush);
+ SET_DEFAULT_FOP(fsync);
+ SET_DEFAULT_FOP(setxattr);
+ SET_DEFAULT_FOP(getxattr);
+ SET_DEFAULT_FOP(fsetxattr);
+ SET_DEFAULT_FOP(fgetxattr);
+ SET_DEFAULT_FOP(removexattr);
+ SET_DEFAULT_FOP(fremovexattr);
+ SET_DEFAULT_FOP(opendir);
+ SET_DEFAULT_FOP(readdir);
+ SET_DEFAULT_FOP(readdirp);
+ SET_DEFAULT_FOP(fsyncdir);
+ SET_DEFAULT_FOP(access);
+ SET_DEFAULT_FOP(ftruncate);
+ SET_DEFAULT_FOP(fstat);
+ SET_DEFAULT_FOP(lk);
+ SET_DEFAULT_FOP(inodelk);
+ SET_DEFAULT_FOP(finodelk);
+ SET_DEFAULT_FOP(entrylk);
+ SET_DEFAULT_FOP(fentrylk);
+ SET_DEFAULT_FOP(lookup);
+ SET_DEFAULT_FOP(rchecksum);
+ SET_DEFAULT_FOP(xattrop);
+ SET_DEFAULT_FOP(fxattrop);
+ SET_DEFAULT_FOP(setattr);
+ SET_DEFAULT_FOP(fsetattr);
+ SET_DEFAULT_FOP(fallocate);
+ SET_DEFAULT_FOP(discard);
+ SET_DEFAULT_FOP(zerofill);
+ SET_DEFAULT_FOP(ipc);
+ SET_DEFAULT_FOP(seek);
+ SET_DEFAULT_FOP(lease);
+ SET_DEFAULT_FOP(getactivelk);
+ SET_DEFAULT_FOP(setactivelk);
+ SET_DEFAULT_FOP(put);
+
+ SET_DEFAULT_FOP(getspec);
+ SET_DEFAULT_FOP(icreate);
+ SET_DEFAULT_FOP(namelink);
+ SET_DEFAULT_FOP(copy_file_range);
+
+ if (!xl->cbks)
+ xl->cbks = &default_cbks;
+
+ SET_DEFAULT_CBK(release);
+ SET_DEFAULT_CBK(releasedir);
+ SET_DEFAULT_CBK(forget);
+
+ if (!xl->fini)
+ xl->fini = default_fini;
+
+ if (!xl->notify)
+ xl->notify = default_notify;
+
+ if (!xl->mem_acct_init)
+ xl->mem_acct_init = default_mem_acct_init;
+
+ return;
+}
int
-xlator_set_type_virtual (xlator_t *xl, const char *type)
+xlator_set_type_virtual(xlator_t *xl, const char *type)
{
- GF_VALIDATE_OR_GOTO ("xlator", xl, out);
- GF_VALIDATE_OR_GOTO ("xlator", type, out);
+ GF_VALIDATE_OR_GOTO("xlator", xl, out);
+ GF_VALIDATE_OR_GOTO("xlator", type, out);
- xl->type = gf_strdup (type);
+ xl->type = gf_strdup(type);
- if (xl->type)
- return 0;
+ if (xl->type)
+ return 0;
out:
- return -1;
+ return -1;
}
-
int
-xlator_volopt_dynload (char *xlator_type, void **dl_handle,
- volume_opt_list_t *opt_list)
+xlator_volopt_dynload(char *xlator_type, void **dl_handle,
+ volume_opt_list_t *opt_list)
{
- int ret = -1;
- char *name = NULL;
- void *handle = NULL;
- volume_opt_list_t *vol_opt = NULL;
-
- GF_VALIDATE_OR_GOTO ("xlator", xlator_type, out);
-
- GF_ASSERT (dl_handle);
-
- if (*dl_handle)
- if (dlclose (*dl_handle))
- gf_log ("xlator", GF_LOG_WARNING, "Unable to close "
- "previously opened handle( may be stale)."
- "Ignoring the invalid handle");
-
- ret = gf_asprintf (&name, "%s/%s.so", XLATORDIR, xlator_type);
- if (-1 == ret) {
- gf_log ("xlator", GF_LOG_ERROR, "asprintf failed");
- goto out;
+ int ret = -1;
+ int flag = 0;
+ char *name = NULL;
+ void *handle = NULL;
+ xlator_api_t *xlapi = NULL;
+ volume_option_t *opt = NULL;
+
+ GF_VALIDATE_OR_GOTO("xlator", xlator_type, out);
+
+ /* socket.so doesn't fall under the default xlator directory, hence we
+ * need this check */
+ if (!strstr(xlator_type, "rpc-transport"))
+ ret = gf_asprintf(&name, "%s/%s.so", XLATORDIR, xlator_type);
+ else {
+ flag = 1;
+ ret = gf_asprintf(&name, "%s/%s.so", XLATORPARENTDIR, xlator_type);
+ }
+ if (-1 == ret) {
+ goto out;
+ }
+
+ ret = -1;
+
+ gf_msg_trace("xlator", 0, "attempt to load file %s", name);
+
+ handle = dlopen(name, RTLD_NOW);
+ if (!handle) {
+ gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLOPEN_FAILED, "error=%s",
+ dlerror(), NULL);
+ goto out;
+ }
+
+ if (flag == 0) {
+ /* check new struct first, and then check this */
+ xlapi = dlsym(handle, "xlator_api");
+ if (!xlapi) {
+ gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_DLSYM_ERROR, "error=%s",
+ dlerror(), NULL);
+ goto out;
}
- ret = -1;
+ opt_list->given_opt = xlapi->options;
+ if (!opt_list->given_opt) {
+ gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_LOAD_FAILED, NULL);
+ goto out;
+ }
+ } else {
+ opt = dlsym(handle, "options");
+ if (!opt) {
+ gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_DLSYM_ERROR, "error=%s",
+ dlerror(), NULL);
+ goto out;
+ }
- gf_log ("xlator", GF_LOG_TRACE, "attempt to load file %s", name);
+ opt_list->given_opt = opt;
+ }
- handle = dlopen (name, RTLD_NOW|RTLD_GLOBAL);
- if (!handle) {
- gf_log ("xlator", GF_LOG_WARNING, "%s", dlerror ());
- goto out;
- }
- *dl_handle = handle;
+ *dl_handle = handle;
+ handle = NULL;
+ ret = 0;
+out:
+ GF_FREE(name);
+ if (handle)
+ dlclose(handle);
- vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t),
- gf_common_mt_volume_opt_list_t);
+ gf_msg_debug("xlator", 0, "Returning %d", ret);
+ return ret;
+}
+static int
+xlator_dynload_apis(xlator_t *xl)
+{
+ int ret = -1;
+ void *handle = NULL;
+ volume_opt_list_t *vol_opt = NULL;
+ xlator_api_t *xlapi = NULL;
+ int i = 0;
+
+ handle = xl->dlhandle;
+
+ xlapi = dlsym(handle, "xlator_api");
+ if (!xlapi) {
+ gf_smsg("xlator", GF_LOG_ERROR, 0, LG_MSG_DLSYM_ERROR, "dlsym=%s",
+ dlerror(), NULL);
+ ret = -1;
+ goto out;
+ }
+
+ xl->fops = xlapi->fops;
+ if (!xl->fops) {
+ gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_STRUCT_MISS, "name=%s",
+ xl->name, NULL);
+ goto out;
+ }
+
+ xl->cbks = xlapi->cbks;
+ if (!xl->cbks) {
+ gf_msg_trace("xlator", 0, "%s: struct missing (cbks)", xl->name);
+ }
+
+ xl->init = xlapi->init;
+ if (!xl->init) {
+ gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_METHOD_MISS, "name=%s",
+ xl->name, NULL);
+ goto out;
+ }
+
+ xl->fini = xlapi->fini;
+ if (!xl->fini) {
+ gf_msg_trace("xlator", 0, "%s: method missing (fini)", xl->name);
+ }
+
+ xl->reconfigure = xlapi->reconfigure;
+ if (!xl->reconfigure) {
+ gf_msg_trace("xlator", 0, "%s: method missing (reconfigure)", xl->name);
+ }
+ xl->notify = xlapi->notify;
+ if (!xl->notify) {
+ gf_msg_trace("xlator", 0, "%s: method missing (notify)", xl->name);
+ }
+ xl->dumpops = xlapi->dumpops;
+ if (!xl->dumpops) {
+ gf_msg_trace("xlator", 0, "%s: method missing (dumpops)", xl->name);
+ }
+ xl->mem_acct_init = xlapi->mem_acct_init;
+ if (!xl->mem_acct_init) {
+ gf_msg_trace("xlator", 0, "%s: method missing (mem_acct_init)",
+ xl->name);
+ }
+
+ xl->dump_metrics = xlapi->dump_metrics;
+ if (!xl->dump_metrics) {
+ gf_msg_trace("xlator", 0, "%s: method missing (dump_metrics)",
+ xl->name);
+ }
+
+ xl->pass_through_fops = xlapi->pass_through_fops;
+ if (!xl->pass_through_fops) {
+ gf_msg_trace("xlator", 0,
+ "%s: method missing (pass_through_fops), "
+ "falling back to default",
+ xl->name);
+ }
+
+ vol_opt = GF_CALLOC(1, sizeof(volume_opt_list_t),
+ gf_common_mt_volume_opt_list_t);
+ if (!vol_opt) {
+ goto out;
+ }
+ INIT_LIST_HEAD(&vol_opt->list);
+
+ vol_opt->given_opt = default_options;
+ list_add_tail(&vol_opt->list, &xl->volume_options);
+
+ if (xlapi->options) {
+ vol_opt = GF_CALLOC(1, sizeof(volume_opt_list_t),
+ gf_common_mt_volume_opt_list_t);
if (!vol_opt) {
- goto out;
+ goto out;
}
+ INIT_LIST_HEAD(&vol_opt->list);
- if (!(vol_opt->given_opt = dlsym (handle, "options"))) {
- dlerror ();
- gf_log ("xlator", GF_LOG_DEBUG,
- "Strict option validation not enforced -- neglecting");
- }
- opt_list->given_opt = vol_opt->given_opt;
+ vol_opt->given_opt = xlapi->options;
+ list_add_tail(&vol_opt->list, &xl->volume_options);
+ }
- INIT_LIST_HEAD (&vol_opt->list);
- list_add_tail (&vol_opt->list, &opt_list->list);
+ xl->id = xlapi->xlator_id;
+ xl->flags = xlapi->flags;
+ xl->identifier = xlapi->identifier;
+ xl->category = xlapi->category;
- ret = 0;
- out:
- if (name)
- GF_FREE (name);
+ memcpy(xl->op_version, xlapi->op_version,
+ sizeof(uint32_t) * GF_MAX_RELEASES);
- gf_log ("xlator", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ gf_latency_reset(&xl->stats.interval.latencies[i]);
+ }
+ ret = 0;
+out:
+ return ret;
}
-
int
-xlator_dynload (xlator_t *xl)
+xlator_dynload(xlator_t *xl)
{
- int ret = -1;
- char *name = NULL;
- void *handle = NULL;
- volume_opt_list_t *vol_opt = NULL;
-
+ int ret = -1;
+ char *name = NULL;
+ void *handle = NULL;
- GF_VALIDATE_OR_GOTO ("xlator", xl, out);
+ GF_VALIDATE_OR_GOTO("xlator", xl, out);
- INIT_LIST_HEAD (&xl->volume_options);
+ INIT_LIST_HEAD(&xl->volume_options);
- ret = gf_asprintf (&name, "%s/%s.so", XLATORDIR, xl->type);
- if (-1 == ret) {
- gf_log ("xlator", GF_LOG_ERROR, "asprintf failed");
- goto out;
- }
-
- ret = -1;
+ ret = gf_asprintf(&name, "%s/%s.so", XLATORDIR, xl->type);
+ if (-1 == ret) {
+ goto out;
+ }
- gf_log ("xlator", GF_LOG_TRACE, "attempt to load file %s", name);
+ ret = -1;
- handle = dlopen (name, RTLD_NOW|RTLD_GLOBAL);
- if (!handle) {
- gf_log ("xlator", GF_LOG_WARNING, "%s", dlerror ());
- goto out;
- }
- xl->dlhandle = handle;
+ gf_msg_trace("xlator", 0, "attempt to load file %s", name);
- if (!(xl->fops = dlsym (handle, "fops"))) {
- gf_log ("xlator", GF_LOG_WARNING, "dlsym(fops) on %s",
- dlerror ());
- goto out;
- }
+ handle = dlopen(name, RTLD_NOW);
+ if (!handle) {
+ gf_smsg("xlator", GF_LOG_WARNING, 0, LG_MSG_DLOPEN_FAILED, "error=%s",
+ dlerror(), NULL);
+ goto out;
+ }
+ xl->dlhandle = handle;
- if (!(xl->cbks = dlsym (handle, "cbks"))) {
- gf_log ("xlator", GF_LOG_WARNING, "dlsym(cbks) on %s",
- dlerror ());
- goto out;
- }
+ ret = xlator_dynload_apis(xl);
+ if (-1 == ret)
+ goto out;
- if (!(xl->init = dlsym (handle, "init"))) {
- gf_log ("xlator", GF_LOG_WARNING, "dlsym(init) on %s",
- dlerror ());
- goto out;
- }
+ fill_defaults(xl);
- if (!(xl->fini = dlsym (handle, "fini"))) {
- gf_log ("xlator", GF_LOG_WARNING, "dlsym(fini) on %s",
- dlerror ());
- goto out;
- }
+ ret = 0;
- if (!(xl->notify = dlsym (handle, "notify"))) {
- gf_log ("xlator", GF_LOG_TRACE,
- "dlsym(notify) on %s -- neglecting", dlerror ());
- }
-
- if (!(xl->dumpops = dlsym (handle, "dumpops"))) {
- gf_log ("xlator", GF_LOG_TRACE,
- "dlsym(dumpops) on %s -- neglecting", dlerror ());
- }
+out:
+ GF_FREE(name);
+ return ret;
+}
- if (!(xl->mem_acct_init = dlsym (handle, "mem_acct_init"))) {
- gf_log (xl->name, GF_LOG_TRACE,
- "dlsym(mem_acct_init) on %s -- neglecting",
- dlerror ());
- }
+int
+xlator_set_type(xlator_t *xl, const char *type)
+{
+ int ret = 0;
- if (!(xl->reconfigure = dlsym (handle, "reconfigure"))) {
- gf_log ("xlator", GF_LOG_TRACE,
- "dlsym(reconfigure) on %s -- neglecting",
- dlerror());
- }
+ /* Handle 'global' translator differently */
+ if (!strncmp(GF_GLOBAL_XLATOR_NAME, type, SLEN(GF_GLOBAL_XLATOR_NAME))) {
+ volume_opt_list_t *vol_opt = NULL;
- vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t),
- gf_common_mt_volume_opt_list_t);
+ /* set the required values from Global xlator */
+ xl->type = gf_strdup(GF_GLOBAL_XLATOR_NAME);
+ xl->cbks = global_xlator.cbks;
+ xl->fops = global_xlator.fops;
+ xl->init = global_xlator.init;
+ xl->fini = global_xlator.fini;
+ xl->reconfigure = global_xlator.reconfigure;
+ vol_opt = GF_CALLOC(1, sizeof(volume_opt_list_t),
+ gf_common_mt_volume_opt_list_t);
if (!vol_opt) {
- goto out;
+ ret = -1;
+ goto out;
}
- if (!(vol_opt->given_opt = dlsym (handle, "options"))) {
- dlerror ();
- gf_log (xl->name, GF_LOG_TRACE,
- "Strict option validation not enforced -- neglecting");
- }
- list_add_tail (&vol_opt->list, &xl->volume_options);
+ vol_opt->given_opt = global_xl_options;
- fill_defaults (xl);
+ INIT_LIST_HEAD(&xl->volume_options);
+ INIT_LIST_HEAD(&vol_opt->list);
+ list_add_tail(&vol_opt->list, &xl->volume_options);
+ fill_defaults(xl);
ret = 0;
+ goto out;
+ }
+ ret = xlator_set_type_virtual(xl, type);
+ if (!ret)
+ ret = xlator_dynload(xl);
out:
- if (name)
- GF_FREE (name);
- return ret;
+ return ret;
}
-
-int
-xlator_set_type (xlator_t *xl, const char *type)
+void
+xlator_set_inode_lru_limit(xlator_t *this, void *data)
{
- int ret = 0;
+ int inode_lru_limit = 0;
- ret = xlator_set_type_virtual (xl, type);
- if (!ret)
- ret = xlator_dynload (xl);
+ if (this->itable) {
+ if (!data) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, LG_MSG_INPUT_DATA_NULL,
+ NULL);
+ goto out;
+ }
+ inode_lru_limit = *(int *)data;
+ inode_table_set_lru_limit(this->itable, inode_lru_limit);
+ }
- return ret;
+out:
+ return;
}
-
void
-xlator_foreach (xlator_t *this,
- void (*fn)(xlator_t *each,
- void *data),
- void *data)
+xlator_foreach(xlator_t *this, void (*fn)(xlator_t *each, void *data),
+ void *data)
{
- xlator_t *first = NULL;
- xlator_t *old_THIS = NULL;
+ xlator_t *first = NULL;
+ xlator_t *old_THIS = NULL;
- GF_VALIDATE_OR_GOTO ("xlator", this, out);
- GF_VALIDATE_OR_GOTO ("xlator", fn, out);
+ GF_VALIDATE_OR_GOTO("xlator", this, out);
+ GF_VALIDATE_OR_GOTO("xlator", fn, out);
- first = this;
+ first = this;
- while (first->prev)
- first = first->prev;
+ while (first->prev)
+ first = first->prev;
- while (first) {
- old_THIS = THIS;
- THIS = first;
+ while (first) {
+ old_THIS = THIS;
+ THIS = first;
- fn (first, data);
+ fn(first, data);
- THIS = old_THIS;
- first = first->next;
- }
+ THIS = old_THIS;
+ first = first->next;
+ }
out:
- return;
+ return;
}
+void
+xlator_foreach_depth_first(xlator_t *this,
+ void (*fn)(xlator_t *each, void *data), void *data)
+{
+ xlator_list_t *subv = NULL;
+
+ subv = this->children;
+
+ while (subv) {
+ xlator_foreach_depth_first(subv->xlator, fn, data);
+ subv = subv->next;
+ }
+
+ fn(this, data);
+}
xlator_t *
-xlator_search_by_name (xlator_t *any, const char *name)
+xlator_search_by_name(xlator_t *any, const char *name)
{
- xlator_t *search = NULL;
+ xlator_t *search = NULL;
- GF_VALIDATE_OR_GOTO ("xlator", any, out);
- GF_VALIDATE_OR_GOTO ("xlator", name, out);
+ GF_VALIDATE_OR_GOTO("xlator", any, out);
+ GF_VALIDATE_OR_GOTO("xlator", name, out);
- search = any;
+ search = any;
- while (search->prev)
- search = search->prev;
+ while (search->prev)
+ search = search->prev;
- while (search) {
- if (!strcmp (search->name, name))
- break;
- search = search->next;
- }
+ while (search) {
+ if (!strcmp(search->name, name))
+ break;
+ search = search->next;
+ }
out:
- return search;
+ return search;
}
+/*
+ * With brick multiplexing, we sort of have multiple graphs, so
+ * xlator_search_by_name might not find what we want. Also, the translator
+ * we're looking for might not be a direct child if something else was put in
+ * between (as already happened with decompounder before that was fixed) and
+ * it's hard to debug why our translator wasn't found. Using a recursive tree
+ * search instead of a linear search works around both problems.
+ */
+static xlator_t *
+get_xlator_by_name_or_type(xlator_t *this, char *target, int is_name)
+{
+ xlator_list_t *trav;
+ xlator_t *child_xl;
+ char *value;
+
+ for (trav = this->children; trav; trav = trav->next) {
+ value = is_name ? trav->xlator->name : trav->xlator->type;
+ if (!strcmp(value, target) && !trav->xlator->cleanup_starting) {
+ return trav->xlator;
+ }
+ child_xl = get_xlator_by_name_or_type(trav->xlator, target, is_name);
+ if (child_xl) {
+ /*
+ * If the xlator we're looking for is somewhere down
+ * the stack, get_xlator_by_name expects to get a
+ * pointer to the top of its subtree (child of "this")
+ * while get_xlator_by_type expects a pointer to what
+ * we actually found. Handle both cases here.
+ *
+ * TBD: rename the functions and fix callers to better
+ * reflect the difference in semantics.
+ */
+ return is_name ? trav->xlator : child_xl;
+ }
+ }
+
+ return NULL;
+}
+
+xlator_t *
+get_xlator_by_name(xlator_t *this, char *target)
+{
+ return get_xlator_by_name_or_type(this, target, 1);
+}
+
+xlator_t *
+get_xlator_by_type(xlator_t *this, char *target)
+{
+ return get_xlator_by_name_or_type(this, target, 0);
+}
static int
__xlator_init(xlator_t *xl)
{
- xlator_t *old_THIS = NULL;
- int ret = 0;
+ xlator_t *old_THIS = NULL;
+ int ret = 0;
+ int fop_idx = 0;
- old_THIS = THIS;
- THIS = xl;
+ old_THIS = THIS;
+ THIS = xl;
- ret = xl->init (xl);
+ /* initialize the metrics related locks */
+ for (fop_idx = 0; fop_idx < GF_FOP_MAXVALUE; fop_idx++) {
+ GF_ATOMIC_INIT(xl->stats.total.metrics[fop_idx].fop, 0);
+ GF_ATOMIC_INIT(xl->stats.total.metrics[fop_idx].cbk, 0);
- THIS = old_THIS;
+ GF_ATOMIC_INIT(xl->stats.interval.metrics[fop_idx].fop, 0);
+ GF_ATOMIC_INIT(xl->stats.interval.metrics[fop_idx].cbk, 0);
+ }
+ GF_ATOMIC_INIT(xl->stats.total.count, 0);
+ GF_ATOMIC_INIT(xl->stats.interval.count, 0);
- return ret;
-}
+ xlator_init_lock();
+ handle_default_options(xl, xl->options);
+ ret = xl->init(xl);
+ xlator_init_unlock();
+ THIS = old_THIS;
+
+ return ret;
+}
int
-xlator_init (xlator_t *xl)
+xlator_init(xlator_t *xl)
{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO ("xlator", xl, out);
+ int32_t ret = -1;
- if (xl->mem_acct_init)
- xl->mem_acct_init (xl);
+ GF_VALIDATE_OR_GOTO("xlator", xl, out);
- if (!xl->init) {
- gf_log (xl->name, GF_LOG_WARNING, "No init() found");
- goto out;
- }
+ if (xl->mem_acct_init)
+ xl->mem_acct_init(xl);
- ret = __xlator_init (xl);
+ xl->instance_name = NULL;
+ GF_ATOMIC_INIT(xl->xprtrefcnt, 0);
+ if (!xl->init) {
+ gf_smsg(xl->name, GF_LOG_WARNING, 0, LG_MSG_INIT_FAILED, NULL);
+ goto out;
+ }
- if (ret) {
- gf_log (xl->name, GF_LOG_ERROR,
- "Initialization of volume '%s' failed,"
- " review your volfile again",
- xl->name);
- goto out;
- }
+ ret = __xlator_init(xl);
- xl->init_succeeded = 1;
+ if (ret) {
+ gf_smsg(xl->name, GF_LOG_ERROR, 0, LG_MSG_VOLUME_ERROR, "name=%s",
+ xl->name, NULL);
+ goto out;
+ }
- ret = 0;
+ xl->init_succeeded = 1;
+ /*xl->cleanup_starting = 0;
+ xl->call_cleanup = 0;
+ */
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
static void
-xlator_fini_rec (xlator_t *xl)
+xlator_fini_rec(xlator_t *xl)
{
- xlator_list_t *trav = NULL;
- xlator_t *old_THIS = NULL;
+ xlator_list_t *trav = NULL;
+ xlator_t *old_THIS = NULL;
- GF_VALIDATE_OR_GOTO ("xlator", xl, out);
+ GF_VALIDATE_OR_GOTO("xlator", xl, out);
- trav = xl->children;
+ trav = xl->children;
- while (trav) {
- if (!trav->xlator->init_succeeded) {
- break;
- }
-
- xlator_fini_rec (trav->xlator);
- gf_log (trav->xlator->name, GF_LOG_DEBUG, "fini done");
- trav = trav->next;
+ while (trav) {
+ if (!trav->xlator->init_succeeded) {
+ break;
}
- if (xl->init_succeeded) {
- if (xl->fini) {
- old_THIS = THIS;
- THIS = xl;
-
- xl->fini (xl);
-
- if (xl->local_pool)
- mem_pool_destroy (xl->local_pool);
-
- THIS = old_THIS;
- } else {
- gf_log (xl->name, GF_LOG_DEBUG, "No fini() found");
- }
- xl->init_succeeded = 0;
+ xlator_fini_rec(trav->xlator);
+ gf_msg_debug(trav->xlator->name, 0, "fini done");
+ trav = trav->next;
+ }
+
+ xl->cleanup_starting = 1;
+ if (xl->init_succeeded) {
+ if (xl->fini) {
+ old_THIS = THIS;
+ THIS = xl;
+
+ xl->fini(xl);
+
+ if (xl->local_pool) {
+ mem_pool_destroy(xl->local_pool);
+ xl->local_pool = NULL;
+ }
+ if (xl->itable) {
+ inode_table_destroy(xl->itable);
+ xl->itable = NULL;
+ }
+
+ THIS = old_THIS;
+ } else {
+ gf_msg_debug(xl->name, 0, "No fini() found");
}
+ xl->init_succeeded = 0;
+ }
out:
- return;
+ return;
}
-
int
-xlator_notify (xlator_t *xl, int event, void *data, ...)
+xlator_notify(xlator_t *xl, int event, void *data, ...)
{
- xlator_t *old_THIS = NULL;
- int ret = 0;
+ xlator_t *old_THIS = NULL;
+ int ret = 0;
- old_THIS = THIS;
- THIS = xl;
+ old_THIS = THIS;
+ THIS = xl;
- ret = xl->notify (xl, event, data);
+ ret = xl->notify(xl, event, data);
- THIS = old_THIS;
+ THIS = old_THIS;
- return ret;
+ return ret;
}
-
int
-xlator_mem_acct_init (xlator_t *xl, int num_types)
+xlator_mem_acct_init(xlator_t *xl, int num_types)
{
- int i = 0;
- int ret = 0;
+ int i = 0;
+ int ret = 0;
+
+ if (!xl)
+ return -1;
- if (!gf_mem_acct_is_enabled())
- return 0;
+ if (!xl->ctx)
+ return -1;
- if (!xl)
- return -1;
+ if (!xl->ctx->mem_acct_enable)
+ return 0;
- xl->mem_acct.num_types = num_types;
+ xl->mem_acct = MALLOC(sizeof(struct mem_acct) +
+ sizeof(struct mem_acct_rec) * num_types);
- xl->mem_acct.rec = CALLOC(num_types, sizeof(struct mem_acct_rec));
+ if (!xl->mem_acct) {
+ return -1;
+ }
- if (!xl->mem_acct.rec) {
- return -1;
- }
+ xl->mem_acct->num_types = num_types;
+ GF_ATOMIC_INIT(xl->mem_acct->refcnt, 1);
- for (i = 0; i < num_types; i++) {
- ret = LOCK_INIT(&(xl->mem_acct.rec[i].lock));
- if (ret) {
- fprintf(stderr, "Unable to lock..errno : %d",errno);
- }
+ for (i = 0; i < num_types; i++) {
+ memset(&xl->mem_acct->rec[i], 0, sizeof(struct mem_acct_rec));
+ ret = LOCK_INIT(&(xl->mem_acct->rec[i].lock));
+ if (ret) {
+ fprintf(stderr, "Unable to lock..errno : %d", errno);
}
+#ifdef DEBUG
+ INIT_LIST_HEAD(&(xl->mem_acct->rec[i].obj_list));
+#endif
+ }
- return 0;
+ return 0;
}
+void
+xlator_mem_acct_unref(struct mem_acct *mem_acct)
+{
+ uint32_t i;
+
+ if (GF_ATOMIC_DEC(mem_acct->refcnt) == 0) {
+ for (i = 0; i < mem_acct->num_types; i++) {
+ LOCK_DESTROY(&(mem_acct->rec[i].lock));
+ }
+ FREE(mem_acct);
+ }
+}
void
-xlator_tree_fini (xlator_t *xl)
+xlator_tree_fini(xlator_t *xl)
{
- xlator_t *top = NULL;
+ xlator_t *top = NULL;
- GF_VALIDATE_OR_GOTO ("xlator", xl, out);
+ GF_VALIDATE_OR_GOTO("xlator", xl, out);
- top = xl;
- xlator_fini_rec (top);
+ top = xl;
+ xlator_fini_rec(top);
out:
- return;
+ return;
}
+int
+xlator_list_destroy(xlator_list_t *list)
+{
+ xlator_list_t *next = NULL;
+
+ while (list) {
+ next = list->next;
+ GF_FREE(list);
+ list = next;
+ }
+
+ return 0;
+}
int
-xlator_tree_free (xlator_t *tree)
+xlator_memrec_free(xlator_t *xl)
{
- xlator_t *trav = tree;
- xlator_t *prev = tree;
+ struct mem_acct *mem_acct = NULL;
- if (!tree) {
- gf_log ("parser", GF_LOG_ERROR, "Translator tree not found");
- return -1;
- }
+ if (!xl) {
+ return 0;
+ }
+ mem_acct = xl->mem_acct;
- while (prev) {
- trav = prev->next;
- dict_destroy (prev->options);
- GF_FREE (prev->name);
- GF_FREE (prev->type);
- GF_FREE (prev);
- prev = trav;
- }
+ if (mem_acct) {
+ xlator_mem_acct_unref(mem_acct);
+ xl->mem_acct = NULL;
+ }
+
+ return 0;
+}
+static int
+xlator_members_free(xlator_t *xl)
+{
+ volume_opt_list_t *vol_opt = NULL;
+ volume_opt_list_t *tmp = NULL;
+
+ if (!xl)
return 0;
+
+ GF_FREE(xl->name);
+ GF_FREE(xl->type);
+ if (!(xl->ctx && xl->ctx->cmd_args.vgtool != _gf_none) && xl->dlhandle)
+ dlclose(xl->dlhandle);
+ if (xl->options)
+ dict_unref(xl->options);
+
+ xlator_list_destroy(xl->children);
+
+ xlator_list_destroy(xl->parents);
+
+ list_for_each_entry_safe(vol_opt, tmp, &xl->volume_options, list)
+ {
+ list_del_init(&vol_opt->list);
+ GF_FREE(vol_opt);
+ }
+
+ return 0;
}
+/* This function destroys all the xlator members except for the
+ * xlator strcuture and its mem accounting field.
+ *
+ * If otherwise, it would destroy the master xlator object as well
+ * its mem accounting, which would mean after calling glusterfs_graph_destroy()
+ * there cannot be any reference to GF_FREE() from the master xlator, this is
+ * not possible because of the following dependencies:
+ * - glusterfs_ctx_t will have mem pools allocated by the master xlators
+ * - xlator objects will have references to those mem pools(g: dict)
+ *
+ * Ordering the freeing in any of the order will also not solve the dependency:
+ * - Freeing xlator objects(including memory accounting) before mem pools
+ * destruction will mean not use GF_FREE while destroying mem pools.
+ * - Freeing mem pools and then destroying xlator objects would lead to crashes
+ * when xlator tries to unref dict or other mem pool objects.
+ *
+ * Hence the way chosen out of this interdependency is to split xlator object
+ * free into two stages:
+ * - Free all the xlator members excpet for its mem accounting structure
+ * - Free all the mem accouting structures of xlator along with the xlator
+ * object itself.
+ *
+ * This two stages of destruction, is mainly required for glfs_fini().
+ */
+
+int
+xlator_tree_free_members(xlator_t *tree)
+{
+ xlator_t *trav = tree;
+ xlator_t *prev = tree;
+
+ if (!tree) {
+ gf_smsg("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND, NULL);
+ return -1;
+ }
+
+ while (prev) {
+ trav = prev->next;
+ xlator_members_free(prev);
+ prev = trav;
+ }
+
+ return 0;
+}
+
+int
+xlator_tree_free_memacct(xlator_t *tree)
+{
+ xlator_t *trav = tree;
+ xlator_t *prev = tree;
+
+ if (!tree) {
+ gf_smsg("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND, NULL);
+ return -1;
+ }
+
+ while (prev) {
+ trav = prev->next;
+ xlator_memrec_free(prev);
+ GF_FREE(prev);
+ prev = trav;
+ }
+
+ return 0;
+}
+
+static int
+xlator_mem_free(xlator_t *xl)
+{
+ volume_opt_list_t *vol_opt = NULL;
+ volume_opt_list_t *tmp = NULL;
+
+ if (!xl)
+ return 0;
+
+ if (xl->options) {
+ dict_unref(xl->options);
+ xl->options = NULL;
+ }
+
+ list_for_each_entry_safe(vol_opt, tmp, &xl->volume_options, list)
+ {
+ list_del_init(&vol_opt->list);
+ GF_FREE(vol_opt);
+ }
+
+ xlator_memrec_free(xl);
+
+ return 0;
+}
+
+static void
+xlator_call_fini(xlator_t *this)
+{
+ if (!this || this->call_cleanup)
+ return;
+ this->cleanup_starting = 1;
+ this->call_cleanup = 1;
+ xlator_call_fini(this->next);
+ this->fini(this);
+}
void
-loc_wipe (loc_t *loc)
+xlator_mem_cleanup(xlator_t *this)
{
- if (loc->inode) {
- inode_unref (loc->inode);
- loc->inode = NULL;
- }
- if (loc->path) {
- GF_FREE ((char *)loc->path);
- loc->path = NULL;
+ xlator_list_t *list = this->children;
+ xlator_t *trav = list->xlator;
+ inode_table_t *inode_table = NULL;
+ xlator_t *prev = trav;
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_list_t **trav_p = NULL;
+ xlator_t *top = NULL;
+ xlator_t *victim = NULL;
+ glusterfs_graph_t *graph = NULL;
+ gf_boolean_t graph_cleanup = _gf_false;
+
+ if (this->call_cleanup || !this->ctx)
+ return;
+
+ this->call_cleanup = 1;
+ ctx = this->ctx;
+
+ inode_table = this->itable;
+ if (inode_table) {
+ inode_table_destroy(inode_table);
+ this->itable = NULL;
+ }
+
+ xlator_call_fini(trav);
+
+ while (prev) {
+ trav = prev->next;
+ xlator_mem_free(prev);
+ prev = trav;
+ }
+
+ if (this->fini) {
+ this->fini(this);
+ }
+
+ xlator_mem_free(this);
+
+ if (ctx->active) {
+ top = ctx->active->first;
+ LOCK(&ctx->volfile_lock);
+ for (trav_p = &top->children; *trav_p; trav_p = &(*trav_p)->next) {
+ victim = (*trav_p)->xlator;
+ if (victim->call_cleanup && !strcmp(victim->name, this->name)) {
+ graph_cleanup = _gf_true;
+ (*trav_p) = (*trav_p)->next;
+ break;
+ }
}
+ UNLOCK(&ctx->volfile_lock);
+ }
- if (loc->parent) {
- inode_unref (loc->parent);
- loc->parent = NULL;
+ if (graph_cleanup) {
+ prev = this;
+ graph = ctx->active;
+ pthread_mutex_lock(&graph->mutex);
+ while (prev) {
+ trav = prev->next;
+ GF_FREE(prev);
+ prev = trav;
}
+ pthread_mutex_unlock(&graph->mutex);
+ }
+}
- memset (loc, 0, sizeof (*loc));
+void
+loc_wipe(loc_t *loc)
+{
+ if (loc->inode) {
+ inode_unref(loc->inode);
+ loc->inode = NULL;
+ }
+ if (loc->path) {
+ GF_FREE((char *)loc->path);
+ loc->path = NULL;
+ }
+
+ if (loc->parent) {
+ inode_unref(loc->parent);
+ loc->parent = NULL;
+ }
+
+ memset(loc, 0, sizeof(*loc));
}
int
-loc_path (loc_t *loc, const char *bname)
+loc_path(loc_t *loc, const char *bname)
{
- int ret = 0;
+ int ret = 0;
- if (loc->path)
- goto out;
+ if (loc->path)
+ goto out;
- ret = -1;
+ ret = -1;
- if (bname && !strlen (bname))
- bname = NULL;
+ if (bname && !strlen(bname))
+ bname = NULL;
- if (!bname)
- goto inode_path;
+ if (!bname)
+ goto inode_path;
- if (loc->parent && !uuid_is_null (loc->parent->gfid)) {
- ret = inode_path (loc->parent, bname, (char**)&loc->path);
- } else if (!uuid_is_null (loc->pargfid)) {
- ret = gf_asprintf ((char**)&loc->path, INODE_PATH_FMT"/%s",
- uuid_utoa (loc->pargfid), bname);
- }
+ if (loc->parent && !gf_uuid_is_null(loc->parent->gfid)) {
+ ret = inode_path(loc->parent, bname, (char **)&loc->path);
+ } else if (!gf_uuid_is_null(loc->pargfid)) {
+ ret = gf_asprintf((char **)&loc->path, INODE_PATH_FMT "/%s",
+ uuid_utoa(loc->pargfid), bname);
+ }
- if (loc->path)
- goto out;
+ if (loc->path)
+ goto out;
inode_path:
- if (loc->inode && !uuid_is_null (loc->inode->gfid)) {
- ret = inode_path (loc->inode, NULL, (char **)&loc->path);
- } else if (!uuid_is_null (loc->gfid)) {
- ret = gf_asprintf ((char**)&loc->path, INODE_PATH_FMT,
- uuid_utoa (loc->gfid));
- }
+ if (loc->inode && !gf_uuid_is_null(loc->inode->gfid)) {
+ ret = inode_path(loc->inode, NULL, (char **)&loc->path);
+ } else if (!gf_uuid_is_null(loc->gfid)) {
+ ret = gf_asprintf((char **)&loc->path, INODE_PATH_FMT,
+ uuid_utoa(loc->gfid));
+ }
+out:
+ return ret;
+}
+
+void
+loc_gfid(loc_t *loc, uuid_t gfid)
+{
+ if (!gfid)
+ goto out;
+ gf_uuid_clear(gfid);
+
+ if (!loc)
+ goto out;
+ else if (!gf_uuid_is_null(loc->gfid))
+ gf_uuid_copy(gfid, loc->gfid);
+ else if (loc->inode && (!gf_uuid_is_null(loc->inode->gfid)))
+ gf_uuid_copy(gfid, loc->inode->gfid);
+out:
+ return;
+}
+
+void
+loc_pargfid(loc_t *loc, uuid_t gfid)
+{
+ if (!gfid)
+ goto out;
+ gf_uuid_clear(gfid);
+
+ if (!loc)
+ goto out;
+ else if (!gf_uuid_is_null(loc->pargfid))
+ gf_uuid_copy(gfid, loc->pargfid);
+ else if (loc->parent && (!gf_uuid_is_null(loc->parent->gfid)))
+ gf_uuid_copy(gfid, loc->parent->gfid);
out:
- return ret;
+ return;
+}
+
+char *
+loc_gfid_utoa(loc_t *loc)
+{
+ uuid_t gfid = {
+ 0,
+ };
+ loc_gfid(loc, gfid);
+ return uuid_utoa(gfid);
}
int
-loc_copy (loc_t *dst, loc_t *src)
+loc_touchup(loc_t *loc, const char *name)
{
- int ret = -1;
+ char *path = NULL;
+ int ret = 0;
+
+ if (loc->path)
+ goto out;
+
+ if (loc->parent && name && strlen(name)) {
+ ret = inode_path(loc->parent, name, &path);
+ if (path) /*Guaranteed to have trailing '/' */
+ loc->name = strrchr(path, '/') + 1;
+
+ if (gf_uuid_is_null(loc->pargfid))
+ gf_uuid_copy(loc->pargfid, loc->parent->gfid);
+ } else if (loc->inode) {
+ ret = inode_path(loc->inode, 0, &path);
+ if (gf_uuid_is_null(loc->gfid))
+ gf_uuid_copy(loc->gfid, loc->inode->gfid);
+ }
+
+ if (ret < 0 || !path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ loc->path = path;
+ ret = 0;
+out:
+ return ret;
+}
- GF_VALIDATE_OR_GOTO ("xlator", dst, err);
- GF_VALIDATE_OR_GOTO ("xlator", src, err);
+int
+loc_copy_overload_parent(loc_t *dst, loc_t *src, inode_t *parent)
+{
+ int ret = -1;
- uuid_copy (dst->gfid, src->gfid);
- uuid_copy (dst->pargfid, src->pargfid);
- uuid_copy (dst->gfid, src->gfid);
+ GF_VALIDATE_OR_GOTO("xlator", dst, err);
+ GF_VALIDATE_OR_GOTO("xlator", src, err);
+ GF_VALIDATE_OR_GOTO("xlator", parent, err);
- if (src->inode)
- dst->inode = inode_ref (src->inode);
+ gf_uuid_copy(dst->gfid, src->gfid);
+ gf_uuid_copy(dst->pargfid, parent->gfid);
- if (src->parent)
- dst->parent = inode_ref (src->parent);
+ if (src->inode)
+ dst->inode = inode_ref(src->inode);
- if (src->path) {
- dst->path = gf_strdup (src->path);
+ if (parent)
+ dst->parent = inode_ref(parent);
- if (!dst->path)
- goto out;
+ if (src->path) {
+ dst->path = gf_strdup(src->path);
- if (src->name)
- dst->name = strrchr (dst->path, '/');
- if (dst->name)
- dst->name++;
- }
+ if (!dst->path)
+ goto out;
- ret = 0;
+ if (src->name)
+ dst->name = strrchr(dst->path, '/');
+ if (dst->name)
+ dst->name++;
+ } else if (src->name) {
+ dst->name = src->name;
+ }
+
+ ret = 0;
out:
- if (ret == -1)
- loc_wipe (dst);
+ if (ret == -1)
+ loc_wipe(dst);
err:
- return ret;
+ return ret;
}
-
int
-xlator_list_destroy (xlator_list_t *list)
+loc_copy(loc_t *dst, loc_t *src)
{
- xlator_list_t *next = NULL;
+ int ret = -1;
- while (list) {
- next = list->next;
- GF_FREE (list);
- list = next;
- }
+ GF_VALIDATE_OR_GOTO("xlator", dst, err);
+ GF_VALIDATE_OR_GOTO("xlator", src, err);
- return 0;
+ if (!gf_uuid_is_null(src->gfid))
+ gf_uuid_copy(dst->gfid, src->gfid);
+ else if (src->inode && !gf_uuid_is_null(src->inode->gfid))
+ gf_uuid_copy(dst->gfid, src->inode->gfid);
+
+ gf_uuid_copy(dst->pargfid, src->pargfid);
+
+ if (src->inode)
+ dst->inode = inode_ref(src->inode);
+
+ if (src->parent)
+ dst->parent = inode_ref(src->parent);
+
+ if (src->path) {
+ dst->path = gf_strdup(src->path);
+
+ if (!dst->path)
+ goto out;
+
+ if (src->name)
+ dst->name = strrchr(dst->path, '/');
+ if (dst->name)
+ dst->name++;
+ } else if (src->name) {
+ dst->name = src->name;
+ }
+
+ ret = 0;
+out:
+ if (ret == -1)
+ loc_wipe(dst);
+
+err:
+ return ret;
}
+gf_boolean_t
+loc_is_root(loc_t *loc)
+{
+ if (loc && __is_root_gfid(loc->gfid)) {
+ return _gf_true;
+ } else if (loc && loc->inode && __is_root_gfid(loc->inode->gfid)) {
+ return _gf_true;
+ }
-int
-xlator_destroy (xlator_t *xl)
+ return _gf_false;
+}
+
+int32_t
+loc_build_child(loc_t *child, loc_t *parent, char *name)
{
- volume_opt_list_t *vol_opt = NULL;
- volume_opt_list_t *tmp = NULL;
+ int32_t ret = -1;
- if (!xl)
- return 0;
+ GF_VALIDATE_OR_GOTO("xlator", child, out);
+ GF_VALIDATE_OR_GOTO("xlator", parent, out);
+ GF_VALIDATE_OR_GOTO("xlator", name, out);
- if (xl->name)
- GF_FREE (xl->name);
- if (xl->type)
- GF_FREE (xl->type);
- if (xl->dlhandle)
- dlclose (xl->dlhandle);
- if (xl->options)
- dict_destroy (xl->options);
+ loc_gfid(parent, child->pargfid);
- xlator_list_destroy (xl->children);
+ if (strcmp(parent->path, "/") == 0)
+ ret = gf_asprintf((char **)&child->path, "/%s", name);
+ else
+ ret = gf_asprintf((char **)&child->path, "%s/%s", parent->path, name);
- xlator_list_destroy (xl->parents);
+ if (ret < 0 || !child->path) {
+ ret = -1;
+ goto out;
+ }
- list_for_each_entry_safe (vol_opt, tmp, &xl->volume_options, list) {
- list_del_init (&vol_opt->list);
- GF_FREE (vol_opt);
- }
+ child->name = strrchr(child->path, '/') + 1;
- GF_FREE (xl);
+ child->parent = inode_ref(parent->inode);
+ child->inode = inode_new(parent->inode->table);
- return 0;
+ if (!child->inode) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if ((ret < 0) && child)
+ loc_wipe(child);
+
+ return ret;
}
+gf_boolean_t
+loc_is_nameless(loc_t *loc)
+{
+ gf_boolean_t ret = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("xlator", loc, out);
+
+ if ((!loc->parent && gf_uuid_is_null(loc->pargfid)) || !loc->name)
+ ret = _gf_true;
+out:
+ return ret;
+}
int
-is_gf_log_command (xlator_t *this, const char *name, char *value)
-{
- xlator_t *trav = NULL;
- char key[1024] = {0,};
- int ret = -1;
- int log_level = -1;
- gf_boolean_t syslog_flag = 0;
- glusterfs_ctx_t *ctx = NULL;
-
- if (!strcmp ("trusted.glusterfs.syslog", name)) {
- ret = gf_string2boolean (value, &syslog_flag);
- if (ret) {
- ret = EOPNOTSUPP;
- goto out;
- }
- if (syslog_flag)
- gf_log_enable_syslog ();
- else
- gf_log_disable_syslog ();
-
- goto out;
- }
+xlator_destroy(xlator_t *xl)
+{
+ if (!xl)
+ return 0;
- if (fnmatch ("trusted.glusterfs*set-log-level", name, FNM_NOESCAPE))
- goto out;
+ xlator_members_free(xl);
+ xlator_memrec_free(xl);
+ GF_FREE(xl);
- log_level = glusterd_check_log_level (value);
- if (log_level == -1) {
- ret = EOPNOTSUPP;
- goto out;
- }
+ return 0;
+}
- /* Some crude way to change the log-level of process */
- if (!strcmp (name, "trusted.glusterfs.set-log-level")) {
- /* */
- gf_log ("glusterfs", gf_log_get_loglevel(),
- "setting log level to %d (old-value=%d)",
- log_level, gf_log_get_loglevel());
- gf_log_set_loglevel (log_level);
- ret = 0;
- goto out;
- }
+static int32_t
+gf_bin_to_string(char *dst, size_t size, void *src, size_t len)
+{
+ if (len >= size) {
+ return EINVAL;
+ }
- if (!strcmp (name, "trusted.glusterfs.fuse.set-log-level")) {
- /* */
- gf_log (this->name, gf_log_get_xl_loglevel (this),
- "setting log level to %d (old-value=%d)",
- log_level, gf_log_get_xl_loglevel (this));
- gf_log_set_xl_loglevel (this, log_level);
- ret = 0;
- goto out;
- }
+ memcpy(dst, src, len);
+ dst[len] = 0;
- ctx = glusterfs_ctx_get();
- if (!ctx)
- goto out;
- if (!ctx->active)
- goto out;
- trav = ctx->active->top;
-
- while (trav) {
- snprintf (key, 1024, "trusted.glusterfs.%s.set-log-level",
- trav->name);
- if (fnmatch (name, key, FNM_NOESCAPE) == 0) {
- gf_log (trav->name, gf_log_get_xl_loglevel (trav),
- "setting log level to %d (old-value=%d)",
- log_level, gf_log_get_xl_loglevel (trav));
- gf_log_set_xl_loglevel (trav, log_level);
- ret = 0;
- }
- trav = trav->next;
+ return 0;
+}
+
+int
+is_gf_log_command(xlator_t *this, const char *name, char *value, size_t size)
+{
+ xlator_t *trav = NULL;
+ char key[1024] = {
+ 0,
+ };
+ int ret = -1;
+ int log_level = -1;
+ gf_boolean_t syslog_flag = 0;
+ glusterfs_ctx_t *ctx = NULL;
+
+ if (!strcmp("trusted.glusterfs.syslog", name)) {
+ ret = gf_bin_to_string(key, sizeof(key), value, size);
+ if (ret != 0) {
+ goto out;
+ }
+ ret = gf_string2boolean(key, &syslog_flag);
+ if (ret) {
+ ret = EOPNOTSUPP;
+ goto out;
+ }
+ if (syslog_flag)
+ gf_log_enable_syslog();
+ else
+ gf_log_disable_syslog();
+
+ goto out;
+ }
+
+ if (fnmatch("trusted.glusterfs*set-log-level", name, FNM_NOESCAPE))
+ goto out;
+
+ ret = gf_bin_to_string(key, sizeof(key), value, size);
+ if (ret != 0) {
+ goto out;
+ }
+
+ log_level = glusterd_check_log_level(key);
+ if (log_level == -1) {
+ ret = EOPNOTSUPP;
+ goto out;
+ }
+
+ /* Some crude way to change the log-level of process */
+ if (!strcmp(name, "trusted.glusterfs.set-log-level")) {
+ gf_smsg("glusterfs", gf_log_get_loglevel(), 0, LG_MSG_SET_LOG_LEVEL,
+ "new-value=%d", log_level, "old-value=%d",
+ gf_log_get_loglevel(), NULL);
+ gf_log_set_loglevel(this->ctx, log_level);
+ ret = 0;
+ goto out;
+ }
+
+ if (!strcmp(name, "trusted.glusterfs.fuse.set-log-level")) {
+ /* */
+ gf_smsg(this->name, gf_log_get_xl_loglevel(this), 0,
+ LG_MSG_SET_LOG_LEVEL, "new-value=%d", log_level, "old-value=%d",
+ gf_log_get_xl_loglevel(this), NULL);
+ gf_log_set_xl_loglevel(this, log_level);
+ ret = 0;
+ goto out;
+ }
+
+ ctx = this->ctx;
+ if (!ctx)
+ goto out;
+ if (!ctx->active)
+ goto out;
+ trav = ctx->active->top;
+
+ while (trav) {
+ snprintf(key, 1024, "trusted.glusterfs.%s.set-log-level", trav->name);
+ if (fnmatch(name, key, FNM_NOESCAPE) == 0) {
+ gf_smsg(trav->name, gf_log_get_xl_loglevel(trav), 0,
+ LG_MSG_SET_LOG_LEVEL, "new-value%d", log_level,
+ "old-value=%d", gf_log_get_xl_loglevel(trav), NULL);
+ gf_log_set_xl_loglevel(trav, log_level);
+ ret = 0;
}
+ trav = trav->next;
+ }
out:
- return ret;
+ return ret;
}
+int
+glusterd_check_log_level(const char *value)
+{
+ int log_level = -1;
+
+ if (!strcasecmp(value, "CRITICAL")) {
+ log_level = GF_LOG_CRITICAL;
+ } else if (!strcasecmp(value, "ERROR")) {
+ log_level = GF_LOG_ERROR;
+ } else if (!strcasecmp(value, "WARNING")) {
+ log_level = GF_LOG_WARNING;
+ } else if (!strcasecmp(value, "INFO")) {
+ log_level = GF_LOG_INFO;
+ } else if (!strcasecmp(value, "DEBUG")) {
+ log_level = GF_LOG_DEBUG;
+ } else if (!strcasecmp(value, "TRACE")) {
+ log_level = GF_LOG_TRACE;
+ } else if (!strcasecmp(value, "NONE")) {
+ log_level = GF_LOG_NONE;
+ }
+
+ if (log_level == -1)
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0, LG_MSG_INVALID_INIT, NULL);
+
+ return log_level;
+}
int
-glusterd_check_log_level (const char *value)
-{
- int log_level = -1;
-
- if (!strcasecmp (value, "CRITICAL")) {
- log_level = GF_LOG_CRITICAL;
- } else if (!strcasecmp (value, "ERROR")) {
- log_level = GF_LOG_ERROR;
- } else if (!strcasecmp (value, "WARNING")) {
- log_level = GF_LOG_WARNING;
- } else if (!strcasecmp (value, "INFO")) {
- log_level = GF_LOG_INFO;
- } else if (!strcasecmp (value, "DEBUG")) {
- log_level = GF_LOG_DEBUG;
- } else if (!strcasecmp (value, "TRACE")) {
- log_level = GF_LOG_TRACE;
- } else if (!strcasecmp (value, "NONE")) {
- log_level = GF_LOG_NONE;
+xlator_subvolume_count(xlator_t *this)
+{
+ int i = 0;
+ xlator_list_t *list = NULL;
+
+ for (list = this->children; list; list = list->next)
+ i++;
+ return i;
+}
+
+static int
+_copy_opt_to_child(dict_t *options, char *key, data_t *value, void *data)
+{
+ xlator_t *child = data;
+
+ gf_log(__func__, GF_LOG_DEBUG, "copying %s to child %s", key, child->name);
+ dict_set(child->options, key, value);
+
+ return 0;
+}
+
+int
+copy_opts_to_child(xlator_t *src, xlator_t *dst, char *glob)
+{
+ return dict_foreach_fnmatch(src->options, glob, _copy_opt_to_child, dst);
+}
+
+int
+glusterfs_delete_volfile_checksum(glusterfs_ctx_t *ctx, const char *volfile_id)
+{
+ gf_volfile_t *volfile_tmp = NULL;
+ gf_volfile_t *volfile_obj = NULL;
+
+ list_for_each_entry(volfile_tmp, &ctx->volfile_list, volfile_list)
+ {
+ if (!strcmp(volfile_id, volfile_tmp->vol_id)) {
+ list_del_init(&volfile_tmp->volfile_list);
+ volfile_obj = volfile_tmp;
+ break;
}
+ }
+
+ if (volfile_obj) {
+ GF_FREE(volfile_obj);
+ } else {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "failed to get volfile "
+ "checksum for volfile id %s.",
+ volfile_id);
+ }
+
+ return 0;
+}
+
+/*
+ The function is required to take dict ref for every xlator at graph.
+ At the time of compare graph topology create a graph and populate
+ key values in the dictionary, after finished graph comparison we do destroy
+ the new graph.At the time of construct graph we don't take any reference
+ so to avoid dict leak at the of destroying graph due to ref counter underflow
+ we need to call dict_ref here.
+
+*/
+
+void
+gluster_graph_take_reference(xlator_t *tree)
+{
+ xlator_t *trav = tree;
+ xlator_t *prev = tree;
+
+ if (!tree) {
+ gf_smsg("parser", GF_LOG_ERROR, 0, LG_MSG_TREE_NOT_FOUND, NULL);
+ return;
+ }
+
+ while (prev) {
+ trav = prev->next;
+ if (prev->options)
+ dict_ref(prev->options);
+ prev = trav;
+ }
+ return;
+}
- if (log_level == -1)
- gf_log (THIS->name, GF_LOG_ERROR, "Invalid log-level. possible values "
- "are DEBUG|WARNING|ERROR|CRITICAL|NONE|TRACE");
+gf_boolean_t
+mgmt_is_multiplexed_daemon(char *name)
+{
+ const char *mux_daemons[] = {"glustershd", NULL};
+ int i;
+
+ if (!name)
+ return _gf_false;
+
+ for (i = 0; mux_daemons[i]; i++) {
+ if (!strcmp(name, mux_daemons[i]))
+ return _gf_true;
+ }
+ return _gf_false;
+}
- return log_level;
+gf_boolean_t
+xlator_is_cleanup_starting(xlator_t *this)
+{
+ gf_boolean_t cleanup = _gf_false;
+ glusterfs_graph_t *graph = NULL;
+ xlator_t *xl = NULL;
+
+ if (!this) {
+ gf_smsg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_OBJECT_NULL, "xlator",
+ NULL);
+ goto out;
+ }
+
+ graph = this->graph;
+ if (!graph) {
+ gf_smsg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_GRAPH_NOT_SET,
+ "name=%s", this->name, NULL);
+ goto out;
+ }
+
+ xl = graph->first;
+ if (xl && xl->cleanup_starting)
+ cleanup = _gf_true;
+out:
+ return cleanup;
}
+int
+graph_total_client_xlator(glusterfs_graph_t *graph)
+{
+ xlator_t *xl = NULL;
+ int count = 0;
+
+ if (!graph) {
+ gf_smsg("xlator", GF_LOG_WARNING, EINVAL, LG_MSG_OBJECT_NULL, "graph",
+ NULL);
+ goto out;
+ }
+
+ xl = graph->first;
+ if (!strcmp(xl->type, "protocol/server")) {
+ gf_msg_debug(xl->name, 0, "Return because it is a server graph");
+ return 0;
+ }
+
+ while (xl) {
+ if (strcmp(xl->type, "protocol/client") == 0) {
+ count++;
+ }
+ xl = xl->next;
+ }
+out:
+ return count;
+}
diff --git a/libglusterfs/src/xlator.h b/libglusterfs/src/xlator.h
deleted file mode 100644
index b7b59fac956..00000000000
--- a/libglusterfs/src/xlator.h
+++ /dev/null
@@ -1,877 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _XLATOR_H
-#define _XLATOR_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdio.h>
-#include <stdint.h>
-#include <inttypes.h>
-
-#include "event-history.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "dict.h"
-#include "compat.h"
-#include "list.h"
-#include "latency.h"
-
-#define FIRST_CHILD(xl) (xl->children->xlator)
-
-#define GF_SET_ATTR_MODE 0x1
-#define GF_SET_ATTR_UID 0x2
-#define GF_SET_ATTR_GID 0x4
-#define GF_SET_ATTR_SIZE 0x8
-#define GF_SET_ATTR_ATIME 0x10
-#define GF_SET_ATTR_MTIME 0x20
-
-#define gf_attr_mode_set(mode) ((mode) & GF_SET_ATTR_MODE)
-#define gf_attr_uid_set(mode) ((mode) & GF_SET_ATTR_UID)
-#define gf_attr_gid_set(mode) ((mode) & GF_SET_ATTR_GID)
-#define gf_attr_size_set(mode) ((mode) & GF_SET_ATTR_SIZE)
-#define gf_attr_atime_set(mode) ((mode) & GF_SET_ATTR_ATIME)
-#define gf_attr_mtime_set(mode) ((mode) & GF_SET_ATTR_MTIME)
-
-struct _xlator;
-typedef struct _xlator xlator_t;
-struct _dir_entry_t;
-typedef struct _dir_entry_t dir_entry_t;
-struct _gf_dirent_t;
-typedef struct _gf_dirent_t gf_dirent_t;
-struct _loc;
-typedef struct _loc loc_t;
-
-
-typedef int32_t (*event_notify_fn_t) (xlator_t *this, int32_t event, void *data,
- ...);
-
-#include "list.h"
-#include "gf-dirent.h"
-#include "stack.h"
-#include "iobuf.h"
-#include "inode.h"
-#include "fd.h"
-#include "globals.h"
-#include "iatt.h"
-#include "options.h"
-
-
-struct _loc {
- const char *path;
- const char *name;
- inode_t *inode;
- inode_t *parent;
- /* Currently all location based operations are through 'gfid' of inode.
- * But the 'inode->gfid' only gets set in higher most layer (as in,
- * 'fuse', 'protocol/server', or 'nfs/server'). So if translators want
- * to send fops on a inode before the 'inode->gfid' is set, they have to
- * make use of below 'gfid' fields
- */
- uuid_t gfid;
- uuid_t pargfid;
-};
-
-
-typedef int32_t (*fop_getspec_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- char *spec_data);
-
-typedef int32_t (*fop_rchecksum_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint32_t weak_checksum,
- uint8_t *strong_checksum,
- dict_t *xdata);
-
-
-typedef int32_t (*fop_getspec_t) (call_frame_t *frame,
- xlator_t *this,
- const char *key,
- int32_t flag);
-
-typedef int32_t (*fop_rchecksum_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, off_t offset,
- int32_t len, dict_t *xdata);
-
-
-typedef int32_t (*fop_lookup_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *xdata,
- struct iatt *postparent);
-
-typedef int32_t (*fop_stat_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf, dict_t *xdata);
-
-typedef int32_t (*fop_fstat_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf, dict_t *xdata);
-
-typedef int32_t (*fop_truncate_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-typedef int32_t (*fop_ftruncate_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-typedef int32_t (*fop_access_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_readlink_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *buf, dict_t *xdata);
-
-typedef int32_t (*fop_mknod_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-typedef int32_t (*fop_mkdir_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-typedef int32_t (*fop_unlink_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-typedef int32_t (*fop_rmdir_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-typedef int32_t (*fop_symlink_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-typedef int32_t (*fop_rename_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent, dict_t *xdata);
-
-typedef int32_t (*fop_link_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-typedef int32_t (*fop_create_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
-
-typedef int32_t (*fop_open_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd, dict_t *xdata);
-
-typedef int32_t (*fop_readv_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref, dict_t *xdata);
-
-typedef int32_t (*fop_writev_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-typedef int32_t (*fop_flush_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_fsync_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata);
-
-typedef int32_t (*fop_opendir_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd, dict_t *xdata);
-
-typedef int32_t (*fop_fsyncdir_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_statfs_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf, dict_t *xdata);
-
-typedef int32_t (*fop_setxattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_getxattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict, dict_t *xdata);
-
-typedef int32_t (*fop_fsetxattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_fgetxattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict, dict_t *xdata);
-
-typedef int32_t (*fop_removexattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_fremovexattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_lk_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *flock, dict_t *xdata);
-
-typedef int32_t (*fop_inodelk_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_finodelk_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_entrylk_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_fentrylk_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata);
-
-typedef int32_t (*fop_readdir_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata);
-
-typedef int32_t (*fop_readdirp_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata);
-
-typedef int32_t (*fop_xattrop_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *xattr, dict_t *xdata);
-
-typedef int32_t (*fop_fxattrop_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *xattr, dict_t *xdata);
-
-
-typedef int32_t (*fop_setattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preop_stbuf,
- struct iatt *postop_stbuf, dict_t *xdata);
-
-typedef int32_t (*fop_fsetattr_cbk_t) (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preop_stbuf,
- struct iatt *postop_stbuf, dict_t *xdata);
-
-typedef int32_t (*fop_lookup_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xdata);
-
-typedef int32_t (*fop_stat_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-typedef int32_t (*fop_fstat_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-typedef int32_t (*fop_truncate_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset, dict_t *xdata);
-
-typedef int32_t (*fop_ftruncate_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset, dict_t *xdata);
-
-typedef int32_t (*fop_access_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask, dict_t *xdata);
-
-typedef int32_t (*fop_readlink_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size, dict_t *xdata);
-
-typedef int32_t (*fop_mknod_t) (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev,
- mode_t umask, dict_t *xdata);
-
-typedef int32_t (*fop_mkdir_t) (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, mode_t umask, dict_t *xdata);
-
-typedef int32_t (*fop_unlink_t) (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int xflags, dict_t *xdata);
-
-typedef int32_t (*fop_rmdir_t) (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int xflags, dict_t *xdata);
-
-typedef int32_t (*fop_symlink_t) (call_frame_t *frame, xlator_t *this,
- const char *linkname, loc_t *loc,
- mode_t umask, dict_t *xdata);
-
-typedef int32_t (*fop_rename_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-typedef int32_t (*fop_link_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-typedef int32_t (*fop_create_t) (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata);
-
-/* Tell subsequent writes on the fd_t to fsync after every writev fop without
- * requiring a fsync fop.
- */
-#define GF_OPEN_FSYNC 0x01
-
-/* Tell write-behind to disable writing behind despite O_SYNC not being set.
- */
-#define GF_OPEN_NOWB 0x02
-
-typedef int32_t (*fop_open_t) (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata);
-
-typedef int32_t (*fop_readv_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- uint32_t flags, dict_t *xdata);
-
-typedef int32_t (*fop_writev_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset,
- uint32_t flags,
- struct iobref *iobref, dict_t *xdata);
-
-typedef int32_t (*fop_flush_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-typedef int32_t (*fop_fsync_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-typedef int32_t (*fop_opendir_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- fd_t *fd, dict_t *xdata);
-
-typedef int32_t (*fop_fsyncdir_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-typedef int32_t (*fop_statfs_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-typedef int32_t (*fop_setxattr_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-typedef int32_t (*fop_getxattr_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-typedef int32_t (*fop_fsetxattr_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-typedef int32_t (*fop_fgetxattr_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-typedef int32_t (*fop_removexattr_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-typedef int32_t (*fop_fremovexattr_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-typedef int32_t (*fop_lk_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-typedef int32_t (*fop_inodelk_t) (call_frame_t *frame,
- xlator_t *this,
- const char *volume,
- loc_t *loc,
- int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-typedef int32_t (*fop_finodelk_t) (call_frame_t *frame,
- xlator_t *this,
- const char *volume,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-typedef int32_t (*fop_entrylk_t) (call_frame_t *frame,
- xlator_t *this,
- const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd,
- entrylk_type type, dict_t *xdata);
-
-typedef int32_t (*fop_fentrylk_t) (call_frame_t *frame,
- xlator_t *this,
- const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd,
- entrylk_type type, dict_t *xdata);
-
-typedef int32_t (*fop_readdir_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset, dict_t *xdata);
-
-typedef int32_t (*fop_readdirp_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- dict_t *xdata);
-
-typedef int32_t (*fop_xattrop_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t optype,
- dict_t *xattr, dict_t *xdata);
-
-typedef int32_t (*fop_fxattrop_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t optype,
- dict_t *xattr, dict_t *xdata);
-
-typedef int32_t (*fop_setattr_t) (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata);
-
-typedef int32_t (*fop_fsetattr_t) (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata);
-
-
-struct xlator_fops {
- fop_lookup_t lookup;
- fop_stat_t stat;
- fop_fstat_t fstat;
- fop_truncate_t truncate;
- fop_ftruncate_t ftruncate;
- fop_access_t access;
- fop_readlink_t readlink;
- fop_mknod_t mknod;
- fop_mkdir_t mkdir;
- fop_unlink_t unlink;
- fop_rmdir_t rmdir;
- fop_symlink_t symlink;
- fop_rename_t rename;
- fop_link_t link;
- fop_create_t create;
- fop_open_t open;
- fop_readv_t readv;
- fop_writev_t writev;
- fop_flush_t flush;
- fop_fsync_t fsync;
- fop_opendir_t opendir;
- fop_readdir_t readdir;
- fop_readdirp_t readdirp;
- fop_fsyncdir_t fsyncdir;
- fop_statfs_t statfs;
- fop_setxattr_t setxattr;
- fop_getxattr_t getxattr;
- fop_fsetxattr_t fsetxattr;
- fop_fgetxattr_t fgetxattr;
- fop_removexattr_t removexattr;
- fop_fremovexattr_t fremovexattr;
- fop_lk_t lk;
- fop_inodelk_t inodelk;
- fop_finodelk_t finodelk;
- fop_entrylk_t entrylk;
- fop_fentrylk_t fentrylk;
- fop_rchecksum_t rchecksum;
- fop_xattrop_t xattrop;
- fop_fxattrop_t fxattrop;
- fop_setattr_t setattr;
- fop_fsetattr_t fsetattr;
- fop_getspec_t getspec;
-
- /* these entries are used for a typechecking hack in STACK_WIND _only_ */
- fop_lookup_cbk_t lookup_cbk;
- fop_stat_cbk_t stat_cbk;
- fop_fstat_cbk_t fstat_cbk;
- fop_truncate_cbk_t truncate_cbk;
- fop_ftruncate_cbk_t ftruncate_cbk;
- fop_access_cbk_t access_cbk;
- fop_readlink_cbk_t readlink_cbk;
- fop_mknod_cbk_t mknod_cbk;
- fop_mkdir_cbk_t mkdir_cbk;
- fop_unlink_cbk_t unlink_cbk;
- fop_rmdir_cbk_t rmdir_cbk;
- fop_symlink_cbk_t symlink_cbk;
- fop_rename_cbk_t rename_cbk;
- fop_link_cbk_t link_cbk;
- fop_create_cbk_t create_cbk;
- fop_open_cbk_t open_cbk;
- fop_readv_cbk_t readv_cbk;
- fop_writev_cbk_t writev_cbk;
- fop_flush_cbk_t flush_cbk;
- fop_fsync_cbk_t fsync_cbk;
- fop_opendir_cbk_t opendir_cbk;
- fop_readdir_cbk_t readdir_cbk;
- fop_readdirp_cbk_t readdirp_cbk;
- fop_fsyncdir_cbk_t fsyncdir_cbk;
- fop_statfs_cbk_t statfs_cbk;
- fop_setxattr_cbk_t setxattr_cbk;
- fop_getxattr_cbk_t getxattr_cbk;
- fop_fsetxattr_cbk_t fsetxattr_cbk;
- fop_fgetxattr_cbk_t fgetxattr_cbk;
- fop_removexattr_cbk_t removexattr_cbk;
- fop_fremovexattr_cbk_t fremovexattr_cbk;
- fop_lk_cbk_t lk_cbk;
- fop_inodelk_cbk_t inodelk_cbk;
- fop_finodelk_cbk_t finodelk_cbk;
- fop_entrylk_cbk_t entrylk_cbk;
- fop_fentrylk_cbk_t fentrylk_cbk;
- fop_rchecksum_cbk_t rchecksum_cbk;
- fop_xattrop_cbk_t xattrop_cbk;
- fop_fxattrop_cbk_t fxattrop_cbk;
- fop_setattr_cbk_t setattr_cbk;
- fop_fsetattr_cbk_t fsetattr_cbk;
- fop_getspec_cbk_t getspec_cbk;
-};
-
-typedef int32_t (*cbk_forget_t) (xlator_t *this,
- inode_t *inode);
-
-typedef int32_t (*cbk_release_t) (xlator_t *this,
- fd_t *fd);
-
-struct xlator_cbks {
- cbk_forget_t forget;
- cbk_release_t release;
- cbk_release_t releasedir;
-};
-
-typedef int32_t (*dumpop_priv_t) (xlator_t *this);
-
-typedef int32_t (*dumpop_inode_t) (xlator_t *this);
-
-typedef int32_t (*dumpop_fd_t) (xlator_t *this);
-
-typedef int32_t (*dumpop_inodectx_t) (xlator_t *this, inode_t *ino);
-
-typedef int32_t (*dumpop_fdctx_t) (xlator_t *this, fd_t *fd);
-
-typedef int32_t (*dumpop_priv_to_dict_t) (xlator_t *this, dict_t *dict);
-
-typedef int32_t (*dumpop_inode_to_dict_t) (xlator_t *this, dict_t *dict);
-
-typedef int32_t (*dumpop_fd_to_dict_t) (xlator_t *this, dict_t *dict);
-
-typedef int32_t (*dumpop_inodectx_to_dict_t) (xlator_t *this, inode_t *ino,
- dict_t *dict);
-
-typedef int32_t (*dumpop_fdctx_to_dict_t) (xlator_t *this, fd_t *fd,
- dict_t *dict);
-
-typedef int32_t (*dumpop_eh_t) (xlator_t *this);
-
-struct xlator_dumpops {
- dumpop_priv_t priv;
- dumpop_inode_t inode;
- dumpop_fd_t fd;
- dumpop_inodectx_t inodectx;
- dumpop_fdctx_t fdctx;
- dumpop_priv_to_dict_t priv_to_dict;
- dumpop_inode_to_dict_t inode_to_dict;
- dumpop_fd_to_dict_t fd_to_dict;
- dumpop_inodectx_to_dict_t inodectx_to_dict;
- dumpop_fdctx_to_dict_t fdctx_to_dict;
- dumpop_eh_t history;
-};
-
-typedef struct xlator_list {
- xlator_t *xlator;
- struct xlator_list *next;
-} xlator_list_t;
-
-
-struct _xlator {
- /* Built during parsing */
- char *name;
- char *type;
- xlator_t *next;
- xlator_t *prev;
- xlator_list_t *parents;
- xlator_list_t *children;
- dict_t *options;
-
- /* Set after doing dlopen() */
- void *dlhandle;
- struct xlator_fops *fops;
- struct xlator_cbks *cbks;
- struct xlator_dumpops *dumpops;
- struct list_head volume_options; /* list of volume_option_t */
-
- void (*fini) (xlator_t *this);
- int32_t (*init) (xlator_t *this);
- int32_t (*reconfigure) (xlator_t *this, dict_t *options);
- int32_t (*mem_acct_init) (xlator_t *this);
- event_notify_fn_t notify;
-
- gf_loglevel_t loglevel; /* Log level for translator */
-
- /* for latency measurement */
- fop_latency_t latencies[GF_FOP_MAXVALUE];
-
- /* Misc */
- eh_t *history; /* event history context */
- glusterfs_ctx_t *ctx;
- glusterfs_graph_t *graph; /* not set for fuse */
- inode_table_t *itable;
- char init_succeeded;
- void *private;
- struct mem_acct mem_acct;
- uint64_t winds;
- char switched;
-
- /* for the memory pool of 'frame->local' */
- struct mem_pool *local_pool;
-};
-
-#define xlator_has_parent(xl) (xl->parents != NULL)
-
-#define XLATOR_NOTIFY(_xl, params ...) \
- do { \
- xlator_t *_old_THIS = NULL; \
- \
- _old_THIS = THIS; \
- THIS = _xl; \
- \
- ret = _xl->notify (_xl, params);\
- \
- THIS = _old_THIS; \
- } while (0);
-
-int32_t xlator_set_type_virtual (xlator_t *xl, const char *type);
-
-int32_t xlator_set_type (xlator_t *xl, const char *type);
-
-int32_t xlator_dynload (xlator_t *xl);
-
-xlator_t *file_to_xlator_tree (glusterfs_ctx_t *ctx,
- FILE *fp);
-
-int xlator_notify (xlator_t *this, int32_t event, void *data, ...);
-int xlator_init (xlator_t *this);
-int xlator_destroy (xlator_t *xl);
-
-int32_t xlator_tree_init (xlator_t *xl);
-int32_t xlator_tree_free (xlator_t *xl);
-
-void xlator_tree_fini (xlator_t *xl);
-
-void xlator_foreach (xlator_t *this,
- void (*fn) (xlator_t *each,
- void *data),
- void *data);
-
-xlator_t *xlator_search_by_name (xlator_t *any, const char *name);
-
-void inode_destroy_notify (inode_t *inode, const char *xlname);
-
-int loc_copy (loc_t *dst, loc_t *src);
-#define loc_dup(src, dst) loc_copy(dst, src)
-void loc_wipe (loc_t *loc);
-int loc_path (loc_t *loc, const char *bname);
-int xlator_mem_acct_init (xlator_t *xl, int num_types);
-int is_gf_log_command (xlator_t *trans, const char *name, char *value);
-int glusterd_check_log_level (const char *value);
-int xlator_volopt_dynload (char *xlator_type, void **dl_handle,
- volume_opt_list_t *vol_opt_handle);
-int32_t glusterfs_rebalance_event_notify (dict_t *dict);
-#endif /* _XLATOR_H */
diff --git a/libglusterfsclient/src/Makefile.am b/libglusterfsclient/src/Makefile.am
deleted file mode 100644
index 32811c0d5bf..00000000000
--- a/libglusterfsclient/src/Makefile.am
+++ /dev/null
@@ -1,16 +0,0 @@
-lib_LTLIBRARIES = libglusterfsclient.la
-noinst_HEADERS = libglusterfsclient-internals.h
-libglusterfsclient_HEADERS = libglusterfsclient.h
-libglusterfsclientdir = $(includedir)
-
-libglusterfsclient_la_SOURCES = libglusterfsclient.c libglusterfsclient-dentry.c
-libglusterfsclient_la_CFLAGS = -fPIC -Wall
-libglusterfsclient_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-libglusterfsclient_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D$(GF_HOST_OS) -D__USE_FILE_OFFSET64 -D_GNU_SOURCE -I$(top_srcdir)/libglusterfs/src -DDATADIR=\"$(localstatedir)\" -DCONFDIR=\"$(sysconfdir)/glusterfs\" $(GF_CFLAGS)
-libglusterfsclient_la_LDFLAGS = -shared -nostartfiles
-
-CLEANFILES =
-
-$(top_builddir)/libglusterfs/src/libglusterfs.la:
- $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
-
diff --git a/libglusterfsclient/src/libglusterfsclient-dentry.c b/libglusterfsclient/src/libglusterfsclient-dentry.c
deleted file mode 100644
index 3fa5f6e1e74..00000000000
--- a/libglusterfsclient/src/libglusterfsclient-dentry.c
+++ /dev/null
@@ -1,404 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include "libglusterfsclient.h"
-#include "libglusterfsclient-internals.h"
-#include <libgen.h>
-
-#define LIBGLUSTERFS_CLIENT_DENTRY_LOC_PREPARE(_new_loc, _loc, _parent, \
- _resolved) do { \
- size_t pathlen = 0; \
- size_t resolvedlen = 0; \
- char *path = NULL; \
- int pad = 0; \
- pathlen = strlen (_loc->path) + 1; \
- path = CALLOC (1, pathlen); \
- _new_loc.parent = _parent; \
- resolvedlen = strlen (_resolved); \
- strncpy (path, _resolved, resolvedlen); \
- if (resolvedlen == 1) /* only root resolved */ \
- pad = 0; \
- else { \
- pad = 1; \
- path[resolvedlen] = '/'; \
- } \
- strcpy_till (path + resolvedlen + pad, \
- loc->path + resolvedlen + pad, '/'); \
- _new_loc.path = path; \
- _new_loc.name = strrchr (path, '/'); \
- if (_new_loc.name) \
- _new_loc.name++; \
- }while (0);
-
-
-/* strcpy_till - copy @dname to @dest, until 'delim' is encountered in @dest
- * @dest - destination string
- * @dname - source string
- * @delim - delimiter character
- *
- * return - NULL is returned if '0' is encountered in @dname, otherwise returns
- * a pointer to remaining string begining in @dest.
- */
-static char *
-strcpy_till (char *dest, const char *dname, char delim)
-{
- char *src = NULL;
- int idx = 0;
- char *ret = NULL;
-
- src = (char *)dname;
- while (src[idx] && (src[idx] != delim)) {
- dest[idx] = src[idx];
- idx++;
- }
-
- dest[idx] = 0;
-
- if (src[idx] == 0)
- ret = NULL;
- else
- ret = &(src[idx]);
-
- return ret;
-}
-
-/* __libgf_client_path_to_parenti - derive parent inode for @path. if immediate
- * parent is not available in the dentry cache, return nearest
- * available parent inode and set @reslv to the path of
- * the returned directory.
- *
- * @itable - inode table
- * @path - path whose parent has to be looked up.
- * @reslv - if immediate parent is not available, reslv will be set to path of the
- * resolved parent.
- *
- * return - should never return NULL. should at least return '/' inode.
- */
-static inode_t *
-__libgf_client_path_to_parenti (libglusterfs_client_ctx_t *ctx,
- inode_table_t *itable, const char *path,
- char **reslv)
-{
- char *resolved_till = NULL;
- char *strtokptr = NULL;
- char *component = NULL;
- char *next_component = NULL;
- char *pathdup = NULL;
- inode_t *curr = NULL;
- inode_t *parent = NULL;
- size_t pathlen = 0;
- loc_t rootloc = {0, };
- int ret = -1;
-
- pathlen = STRLEN_0 (path);
- resolved_till = CALLOC (1, pathlen);
-
- GF_VALIDATE_OR_GOTO("libglusterfsclient-dentry", resolved_till, out);
- pathdup = strdup (path);
- GF_VALIDATE_OR_GOTO("libglusterfsclient-dentry", pathdup, out);
-
- parent = inode_ref (itable->root);
- /* If the root inode's is outdated, send a revalidate on it.
- * A revalidate on root inode also reduces the window in which an
- * op will fail over distribute because the layout of the root
- * directory did not get constructed when we sent the lookup on
- * root in glusterfs_init. That can happen when not all children of a
- * distribute volume were up at the time of glusterfs_init.
- */
- if (!libgf_is_iattr_cache_valid (ctx, parent, NULL,
- LIBGF_VALIDATE_LOOKUP)) {
- libgf_client_loc_fill (&rootloc, ctx, 1, 0, "/");
- ret = libgf_client_lookup (ctx, &rootloc, NULL, NULL, NULL);
- if (ret == -1) {
- gf_log ("libglusterfsclient-dentry", GF_LOG_ERROR,
- "Root inode revalidation failed");
- inode_unref (parent);
- parent = NULL;
- goto out;
- }
- libgf_client_loc_wipe (&rootloc);
- }
-
- curr = NULL;
-
- component = strtok_r (pathdup, "/", &strtokptr);
-
- while (component) {
- curr = inode_search (itable, parent->ino, component);
- if (!curr) {
- break;
- }
- if (!libgf_is_iattr_cache_valid (ctx, curr, NULL,
- LIBGF_VALIDATE_LOOKUP))
- break;
-
- /* It is OK to append the component even if it is the
- last component in the path, because, if 'next_component'
- returns NULL, @parent will remain the same and
- @resolved_till will not be sent back
- */
- strcat (resolved_till, "/");
- strcat (resolved_till, component);
-
- next_component = strtok_r (NULL, "/", &strtokptr);
-
- if (next_component) {
- inode_unref (parent);
- parent = curr;
- curr = NULL;
- } else {
- /* will break */
- inode_unref (curr);
- }
-
- component = next_component;
- }
-
- if (resolved_till[0] == '\0') {
- strcat (resolved_till, "/");
- }
-
- free (pathdup);
-
- if (reslv) {
- *reslv = resolved_till;
- } else {
- FREE (resolved_till);
- }
-
-out:
- return parent;
-}
-
-static inline void
-libgf_client_update_resolved (const char *path, char *resolved)
-{
- int32_t pathlen = 0;
- char *tmp = NULL, *dest = NULL, *dname = NULL;
- char append_slash = 0;
-
- pathlen = strlen (resolved);
- tmp = (char *)(resolved + pathlen);
- if (*((char *) (resolved + pathlen - 1)) != '/') {
- tmp[0] = '/';
- append_slash = 1;
- }
-
- if (append_slash) {
- dest = tmp + 1;
- } else {
- dest = tmp;
- }
-
- if (*((char *) path + pathlen) == '/') {
- dname = (char *) path + pathlen + 1;
- } else {
- dname = (char *) path + pathlen;
- }
-
- strcpy_till (dest, dname, '/');
-}
-
-/* __do_path_resolve - resolve @loc->path into @loc->inode and @loc->parent. also
- * update the dentry cache
- *
- * @loc - loc to resolve.
- * @ctx - libglusterfsclient context
- * @lookup_basename - flag whether to lookup basename(loc->path)
- *
- * return - 0 on success
- * -1 on failure
- *
- */
-static int32_t
-__do_path_resolve (loc_t *loc, libglusterfs_client_ctx_t *ctx,
- char lookup_basename)
-{
- int32_t op_ret = -1;
- char *resolved = NULL;
- inode_t *parent = NULL, *inode = NULL;
- dentry_t *dentry = NULL;
- loc_t new_loc = {0, };
- char *pathname = NULL, *directory = NULL;
- char *file = NULL;
-
- parent = loc->parent;
- if (parent) {
- inode_ref (parent);
- gf_log ("libglusterfsclient-dentry", GF_LOG_DEBUG,
- "loc->parent(%"PRId64") already present. sending "
- "lookup for %"PRId64"/%s", parent->ino, parent->ino,
- loc->path);
- resolved = strdup (loc->path);
- resolved = dirname (resolved);
- } else {
- parent = __libgf_client_path_to_parenti (ctx, ctx->itable,
- loc->path, &resolved);
- }
-
- if (parent == NULL) {
- /* fire in the bush.. run! run!! run!!! */
- gf_log ("libglusterfsclient-dentry",
- GF_LOG_CRITICAL,
- "failed to get parent inode number");
- op_ret = -1;
- goto out;
- }
-
- gf_log ("libglusterfsclient-dentry",
- GF_LOG_DEBUG,
- "resolved path(%s) till %"PRId64"(%s). "
- "sending lookup for remaining path",
- loc->path, parent->ino, resolved);
-
- pathname = strdup (loc->path);
- directory = dirname (pathname);
- pathname = NULL;
-
- while (strcmp (resolved, directory) != 0)
- {
- dentry = NULL;
-
- LIBGLUSTERFS_CLIENT_DENTRY_LOC_PREPARE (new_loc, loc, parent,
- resolved);
-
- if (pathname) {
- free (pathname);
- pathname = NULL;
- }
-
- pathname = strdup (new_loc.path);
- file = basename (pathname);
-
- new_loc.inode = inode_search (ctx->itable, parent->ino, file);
- if (new_loc.inode) {
- if (libgf_is_iattr_cache_valid (ctx, new_loc.inode,
- NULL,
- LIBGF_VALIDATE_LOOKUP))
- dentry = dentry_search_for_inode (new_loc.inode,
- parent->ino,
- file);
- }
-
- if (dentry == NULL) {
- op_ret = libgf_client_lookup (ctx, &new_loc, NULL, NULL,
- 0);
- if (op_ret == -1) {
- inode_ref (new_loc.parent);
- libgf_client_loc_wipe (&new_loc);
- goto out;
- }
- }
-
- parent = inode_ref (new_loc.inode);
- libgf_client_loc_wipe (&new_loc);
-
- libgf_client_update_resolved (loc->path, resolved);
- }
-
- if (pathname) {
- free (pathname);
- pathname = NULL;
- }
-
- if (lookup_basename) {
- pathname = strdup (loc->path);
- file = basename (pathname);
-
- inode = inode_search (ctx->itable, parent->ino, file);
- if (!inode) {
- libgf_client_loc_fill (&new_loc, ctx, 0, parent->ino,
- file);
-
- op_ret = libgf_client_lookup (ctx, &new_loc, NULL, NULL,
- 0);
- if (op_ret == -1) {
- libgf_client_loc_wipe (&new_loc);
- goto out;
- }
-
- inode = inode_ref (new_loc.inode);
- libgf_client_loc_wipe (&new_loc);
- }
- }
-
- op_ret = 0;
-out:
- loc->inode = inode;
- loc->parent = parent;
-
- FREE (resolved);
- if (pathname) {
- FREE (pathname);
- }
-
- if (directory) {
- FREE (directory);
- }
-
- return op_ret;
-}
-
-
-/* resolves loc->path to loc->parent and loc->inode */
-int32_t
-libgf_client_path_lookup (loc_t *loc,
- libglusterfs_client_ctx_t *ctx,
- char lookup_basename)
-{
- char *pathname = NULL;
- char *directory = NULL;
- inode_t *inode = NULL;
- inode_t *parent = NULL;
- int32_t op_ret = 0;
-
- pathname = strdup (loc->path);
- directory = dirname (pathname);
- parent = inode_from_path (ctx->itable, directory);
-
- if (parent != NULL) {
- loc->parent = parent;
-
- if (!lookup_basename) {
- gf_log ("libglusterfsclient",
- GF_LOG_DEBUG,
- "resolved dirname(%s) to %"PRId64,
- loc->path, parent->ino);
- goto out;
- } else {
- inode = inode_from_path (ctx->itable, loc->path);
- if (inode != NULL) {
- gf_log ("libglusterfsclient",
- GF_LOG_DEBUG,
- "resolved path(%s) to %"PRId64"/%"PRId64,
- loc->path, parent->ino, inode->ino);
- loc->inode = inode;
- goto out;
- }
- }
- }
-
- if (parent) {
- inode_unref (parent);
- } else if (inode) {
- inode_unref (inode);
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "undesired behaviour. inode(%"PRId64") for %s "
- "exists without parent (%s)",
- inode->ino, loc->path, directory);
- }
- op_ret = __do_path_resolve (loc, ctx, lookup_basename);
-out:
- if (pathname)
- free (pathname);
-
- return op_ret;
-}
diff --git a/libglusterfsclient/src/libglusterfsclient-internals.h b/libglusterfsclient/src/libglusterfsclient-internals.h
deleted file mode 100755
index 7b62ce8efbf..00000000000
--- a/libglusterfsclient/src/libglusterfsclient-internals.h
+++ /dev/null
@@ -1,289 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __LIBGLUSTERFSCLIENT_INTERNALS_H
-#define __LIBGLUSTERFSCLIENT_INTERNALS_H
-
-#include <glusterfs.h>
-#include <logging.h>
-#include <inode.h>
-#include <pthread.h>
-#include <stack.h>
-#include <list.h>
-#include <signal.h>
-#include <call-stub.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-#include <fd.h>
-#include <dirent.h>
-
-#define LIBGF_IOBUF_SIZE (128 *GF_UNIT_KB)
-typedef void (*sighandler_t) (int);
-typedef struct list_head list_head_t;
-
-typedef struct libglusterfs_client_ctx {
- glusterfs_ctx_t gf_ctx;
- inode_table_t *itable;
- pthread_t reply_thread;
- call_pool_t pool;
- uint32_t counter;
- time_t lookup_timeout;
- time_t stat_timeout;
- /* We generate a fake fsid for the subvolume being
- * accessed through this context.
- */
- dev_t fake_fsid;
- pid_t pid;
-}libglusterfs_client_ctx_t;
-
-typedef struct signal_handler {
- int signo;
- sighandler_t handler;
- list_head_t next;
-}libgf_client_signal_handler_t ;
-
-typedef struct {
- pthread_mutex_t lock;
- pthread_cond_t reply_cond;
- call_stub_t *reply_stub;
- char complete;
- union {
- struct {
- char is_revalidate;
- loc_t *loc;
- int32_t size;
- } lookup;
- }fop;
- fd_t *fd; /* Needed here because we need a ref to the dir
- fd in the libgf_client_readdir_cbk in order
- to process the dirents received, without
- having them added to the reply stub.
- Also used in updating iattr cache. See
- readv_cbk for eg.
- */
-}libgf_client_local_t;
-
-typedef struct {
- pthread_cond_t init_con_established;
- pthread_mutex_t lock;
- char complete;
-}libglusterfs_client_private_t;
-
-typedef struct {
- pthread_mutex_t lock;
- uint32_t previous_lookup_time;
- uint32_t previous_stat_time;
- struct iatt stbuf;
-} libglusterfs_client_inode_ctx_t;
-
-/* Our dirent cache is very simplistic when it comes to directory
- * reading workloads. It assumes that all directory traversal operations happen
- * sequentially and that readdir callers dont go jumping around the directory
- * using seekdir, rewinddir. Thats why you'll notice that seekdir, rewinddir
- * API in libglusterfsclient only set the offset. The consequence is that when
- * libgf_dcache_readdir finds that the offset presented to it, is not
- * the same as the offset of the previous dirent returned by dcache (..stored
- * in struct direntcache->prev_off..), it realises that a non-sequential
- * directory read is in progress and returns 0 to signify that the cache is
- * not valid.
- * This could be made a bit more intelligent by using a data structure like
- * a hash-table or a balanced binary tree that allows us to search for the
- * existence of particular offsets in the cache without performing a list or
- * array traversal.
- * Dont use a simple binary search tree because
- * there is no guarantee that offsets in a sequential reading of the directory
- * will be just random integers. If for some reason they are sequential, a BST
- * will end up becoming a list.
- */
-struct direntcache {
- gf_dirent_t entries; /* Head of list of cached dirents. */
- gf_dirent_t *next; /* Pointer to the next entry that
- * should be sent by readdir */
- uint64_t prev_off; /* Offset where the next read will
- * happen.
- */
-};
-
-typedef struct {
- pthread_mutex_t lock;
- off_t offset;
- libglusterfs_client_ctx_t *ctx;
- /* `man readdir` says readdir is non-re-entrant
- * only if two readdirs are racing on the same
- * handle.
- */
- struct dirent dirp;
- struct direntcache *dcache;
- char vpath[PATH_MAX];
-} libglusterfs_client_fd_ctx_t;
-
-typedef struct libglusterfs_client_async_local {
- void *cbk_data;
- union {
- struct {
- fd_t *fd;
- glusterfs_readv_cbk_t cbk;
- char update_offset;
- }readv_cbk;
-
- struct {
- fd_t *fd;
- glusterfs_write_cbk_t cbk;
- }write_cbk;
-
- struct {
- fd_t *fd;
- }close_cbk;
-
- struct {
- void *buf;
- size_t size;
- loc_t *loc;
- char is_revalidate;
- glusterfs_get_cbk_t cbk;
- }lookup_cbk;
- }fop;
-}libglusterfs_client_async_local_t;
-
-#define LIBGF_STACK_WIND_AND_WAIT(frame, rfn, obj, fn, params ...) \
- do { \
- STACK_WIND (frame, rfn, obj, fn, params); \
- pthread_mutex_lock (&local->lock); \
- { \
- while (!local->complete) { \
- pthread_cond_wait (&local->reply_cond, \
- &local->lock); \
- } \
- } \
- pthread_mutex_unlock (&local->lock); \
- } while (0)
-
-
-#define LIBGF_CLIENT_SIGNAL(signal_handler_list, signo, handler) \
- do { \
- libgf_client_signal_handler_t *libgf_handler = CALLOC (1, \
- sizeof (*libgf_handler)); \
- ERR_ABORT (libgf_handler); \
- libgf_handler->signo = signo; \
- libgf_handler->handler = signal (signo, handler); \
- list_add (&libgf_handler->next, signal_handler_list); \
- } while (0)
-
-#define LIBGF_INSTALL_SIGNAL_HANDLERS(signal_handlers) \
- do { \
- INIT_LIST_HEAD (&signal_handlers); \
- /* Handle SIGABORT and SIGSEGV */ \
- LIBGF_CLIENT_SIGNAL (&signal_handlers, SIGSEGV, gf_print_trace); \
- LIBGF_CLIENT_SIGNAL (&signal_handlers, SIGABRT, gf_print_trace); \
- LIBGF_CLIENT_SIGNAL (&signal_handlers, SIGHUP, gf_log_logrotate); \
- /* LIBGF_CLIENT_SIGNAL (SIGTERM, glusterfs_cleanup_and_exit); */ \
- } while (0)
-
-#define LIBGF_RESTORE_SIGNAL_HANDLERS(local) \
- do { \
- libgf_client_signal_handler_t *ptr = NULL, *tmp = NULL; \
- list_for_each_entry_safe (ptr, tmp, &local->signal_handlers,\
- next) { \
- signal (ptr->signo, ptr->handler); \
- FREE (ptr); \
- } \
- } while (0)
-
-#define LIBGF_CLIENT_FOP_ASYNC(ctx, local, ret_fn, op, args ...) \
- do { \
- call_frame_t *frame = get_call_frame_for_req (ctx, 1); \
- xlator_t *xl = frame->this->children ? \
- frame->this->children->xlator : NULL; \
- frame->root->state = ctx; \
- frame->local = local; \
- STACK_WIND (frame, ret_fn, xl, xl->fops->op, args); \
- } while (0)
-
-#define LIBGF_CLIENT_FOP(ctx, stub, op, local, args ...) \
- do { \
- call_frame_t *frame = get_call_frame_for_req (ctx, 1); \
- xlator_t *xl = frame->this->children ? \
- frame->this->children->xlator : NULL; \
- if (!local) { \
- local = CALLOC (1, sizeof (*local)); \
- } \
- ERR_ABORT (local); \
- frame->local = local; \
- frame->root->state = ctx; \
- pthread_cond_init (&local->reply_cond, NULL); \
- pthread_mutex_init (&local->lock, NULL); \
- LIBGF_STACK_WIND_AND_WAIT (frame, libgf_client_##op##_cbk, xl, \
- xl->fops->op, args); \
- stub = local->reply_stub; \
- FREE (frame->local); \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- } while (0)
-
-#define LIBGF_REPLY_NOTIFY(local) \
- do { \
- pthread_mutex_lock (&local->lock); \
- { \
- local->complete = 1; \
- pthread_cond_broadcast (&local->reply_cond); \
- } \
- pthread_mutex_unlock (&local->lock); \
- } while (0)
-
-
-void
-libgf_client_loc_wipe (loc_t *loc);
-
-int32_t
-libgf_client_loc_fill (loc_t *loc,
- libglusterfs_client_ctx_t *ctx,
- ino_t ino,
- ino_t par,
- const char *name);
-
-int32_t
-libgf_client_path_lookup (loc_t *loc,
- libglusterfs_client_ctx_t *ctx,
- char lookup_basename);
-
-int32_t
-libgf_client_lookup (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- struct iatt *stbuf,
- dict_t **dict,
- dict_t *xattr_req);
-
-/* We're not expecting more than 10-15
- * VMPs per process so a list is acceptable.
- */
-struct vmp_entry {
- struct list_head list;
- char * vmp;
- int vmplen;
- glusterfs_handle_t handle;
-};
-
-#define LIBGF_UPDATE_LOOKUP 0x1
-#define LIBGF_UPDATE_STAT 0x2
-#define LIBGF_UPDATE_ALL (LIBGF_UPDATE_LOOKUP | LIBGF_UPDATE_STAT)
-
-#define LIBGF_VALIDATE_LOOKUP 0x1
-#define LIBGF_VALIDATE_STAT 0x2
-
-#define LIBGF_INVALIDATE_LOOKUP 0x1
-#define LIBGF_INVALIDATE_STAT 0x2
-int
-libgf_is_iattr_cache_valid (libglusterfs_client_ctx_t *ctx, inode_t *inode,
- struct iatt *sbuf, int flags);
-
-int
-libgf_update_iattr_cache (inode_t *inode, int flags, struct iatt *buf);
-
-#endif
diff --git a/libglusterfsclient/src/libglusterfsclient.c b/libglusterfsclient/src/libglusterfsclient.c
deleted file mode 100755
index 39b5bea5068..00000000000
--- a/libglusterfsclient/src/libglusterfsclient.c
+++ /dev/null
@@ -1,8158 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _GNU_SOURCE
-#define _GNU_SOURCE
-#endif
-
-#include <stdio.h>
-#include <errno.h>
-#include <libgen.h>
-#include <stddef.h>
-
-#include <sys/time.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#ifdef GF_SOLARIS_HOST_OS
-#include <sys/statfs.h>
-#endif
-#include <unistd.h>
-#include <xlator.h>
-#include <timer.h>
-#include "defaults.h"
-#include <time.h>
-#include <poll.h>
-#include "transport.h"
-#include "event.h"
-#include "libglusterfsclient.h"
-#include "libglusterfsclient-internals.h"
-#include "compat.h"
-#include "compat-errno.h"
-#ifndef GF_SOLARIS_HOST_OS
-#include <sys/vfs.h>
-#endif
-#include <utime.h>
-#include <sys/param.h>
-#include <list.h>
-#include <stdarg.h>
-#include <sys/statvfs.h>
-#include "hashfn.h"
-#include <sys/select.h>
-
-#define LIBGF_XL_NAME "libglusterfsclient"
-#define LIBGLUSTERFS_INODE_TABLE_LRU_LIMIT 1000 //14057
-#define LIBGF_SENDFILE_BLOCK_SIZE 4096
-#define LIBGF_READDIR_BLOCK 4096
-#define libgf_path_absolute(path) ((path)[0] == '/')
-
-static inline xlator_t *
-libglusterfs_graph (xlator_t *graph);
-int32_t libgf_client_readlink (libglusterfs_client_ctx_t *ctx, loc_t *loc,
- char *buf, size_t bufsize);
-
-int
-libgf_realpath_loc_fill (libglusterfs_client_ctx_t *ctx, char *link,
- loc_t *targetloc);
-static int first_init = 1;
-
-/* The global list of virtual mount points */
-struct {
- struct list_head list;
- int entries;
-}vmplist;
-
-
-/* Protects the VMP list above. */
-pthread_mutex_t vmplock = PTHREAD_MUTEX_INITIALIZER;
-
-/* Ensures only one thread is ever calling glusterfs_mount.
- * Since that function internally calls routines which
- * use the yacc parser code using global vars, this process
- * needs to be syncronised.
- */
-pthread_mutex_t mountlock = PTHREAD_MUTEX_INITIALIZER;
-
-static char cwd[PATH_MAX];
-static char cwd_inited = 0;
-static pthread_mutex_t cwdlock = PTHREAD_MUTEX_INITIALIZER;
-
-char *
-libgf_vmp_virtual_path (struct vmp_entry *entry, const char *path, char *vpath)
-{
- char *tmp = NULL;
-
- tmp = ((char *)(path + (entry->vmplen-1)));
- if (strlen (tmp) > 0) {
- if (tmp[0] != '/') {
- vpath[0] = '/';
- vpath[1] = '\0';
- strcat (&vpath[1], tmp);
- } else
- strcpy (vpath, tmp);
- } else {
- vpath[0] = '/';
- vpath[1] = '\0';
- }
-
- return vpath;
-}
-
-char *
-zr_build_process_uuid ()
-{
- char tmp_str[1024] = {0,};
- char hostname[256] = {0,};
- struct timeval tv = {0,};
- char now_str[32];
-
- if (-1 == gettimeofday(&tv, NULL)) {
- gf_log ("", GF_LOG_ERROR,
- "gettimeofday: failed %s",
- strerror (errno));
- }
-
- if (-1 == gethostname (hostname, 256)) {
- gf_log ("", GF_LOG_ERROR,
- "gethostname: failed %s",
- strerror (errno));
- }
-
- gf_time_fmt (now_str, sizeof now_str, tv.tv_sec, gf_timefmt_Ymd_T);
- snprintf (tmp_str, sizeof tmp_str, "%s-%d-%s:%ld",
- hostname, getpid(), now_str, tv.tv_usec);
-
- return strdup (tmp_str);
-}
-
-
-int32_t
-libgf_client_forget (xlator_t *this,
- inode_t *inode)
-{
- uint64_t ptr = 0;
- libglusterfs_client_inode_ctx_t *ctx = NULL;
-
- inode_ctx_del (inode, this, &ptr);
- ctx = (libglusterfs_client_inode_ctx_t *)(long) ptr;
-
- FREE (ctx);
-
- return 0;
-}
-
-xlator_t *
-libgf_inode_to_xlator (inode_t *inode)
-{
- if (!inode)
- return NULL;
-
- if (!inode->table)
- return NULL;
-
- if (!inode->table->xl)
- return NULL;
-
- if (!inode->table->xl->ctx)
- return NULL;
-
- return inode->table->xl->ctx->top;
-}
-
-libglusterfs_client_fd_ctx_t *
-libgf_get_fd_ctx (fd_t *fd)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_fd_ctx_t *ctx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- if (fd_ctx_get (fd, libgf_inode_to_xlator (fd->inode), &ctxaddr) == -1)
- goto out;
-
- ctx = (libglusterfs_client_fd_ctx_t *)(long)ctxaddr;
-
-out:
- return ctx;
-}
-
-libglusterfs_client_fd_ctx_t *
-libgf_alloc_fd_ctx (libglusterfs_client_ctx_t *ctx, fd_t *fd, char *vpath)
-{
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- uint64_t ctxaddr = 0;
-
- fdctx = CALLOC (1, sizeof (*fdctx));
- if (fdctx == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,
- "memory allocation failure");
- fdctx = NULL;
- goto out;
- }
-
- pthread_mutex_init (&fdctx->lock, NULL);
- fdctx->ctx = ctx;
- ctxaddr = (uint64_t) (long)fdctx;
-
- if (fd->inode) {
- if (IA_ISDIR (fd->inode->ia_type)) {
- fdctx->dcache = CALLOC (1, sizeof (struct direntcache));
- if (fdctx->dcache)
- INIT_LIST_HEAD (&fdctx->dcache->entries.list);
- /* If the calloc fails, we can still continue
- * working as the dcache is not required for correct
- * operation.
- */
- }
- }
-
- if (vpath != NULL) {
- strcpy (fdctx->vpath, vpath);
- if (vpath[strlen(vpath) - 1] != '/') {
- strcat (fdctx->vpath, "/");
- }
- }
-
- fd_ctx_set (fd, libgf_inode_to_xlator (fd->inode), ctxaddr);
-out:
- return fdctx;
-}
-
-libglusterfs_client_fd_ctx_t *
-libgf_del_fd_ctx (fd_t *fd)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_fd_ctx_t *ctx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- if (fd_ctx_del (fd, libgf_inode_to_xlator (fd->inode) , &ctxaddr) == -1)
- goto out;
-
- ctx = (libglusterfs_client_fd_ctx_t *)(long)ctxaddr;
-
-out:
- return ctx;
-}
-
-void
-libgf_dcache_invalidate (fd_t *fd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (!fd)
- return;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- return;
- }
-
- if (!fd_ctx->dcache) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No dcache present");
- return;
- }
-
- if (!list_empty (&fd_ctx->dcache->entries.list))
- gf_dirent_free (&fd_ctx->dcache->entries);
-
- INIT_LIST_HEAD (&fd_ctx->dcache->entries.list);
-
- fd_ctx->dcache->next = NULL;
- fd_ctx->dcache->prev_off = 0;
-
- return;
-}
-
-/* The first entry in the entries is always a placeholder
- * or the list head. The real entries begin from entries->next.
- */
-int
-libgf_dcache_update (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- gf_dirent_t *entries)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int op_ret = -1;
-
- if ((!ctx) || (!fd) || (!entries)) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- /* dcache is not enabled. */
- if (!fd_ctx->dcache) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No dcache present");
- op_ret = 0;
- goto out;
- }
-
- /* If we're updating, we must begin with invalidating any previous
- * entries.
- */
- libgf_dcache_invalidate (fd);
-
- fd_ctx->dcache->next = entries->next;
- /* We still need to store a pointer to the head
- * so we start free'ing from the head when invalidation
- * is required.
- *
- * Need to delink the entries from the list
- * given to us by an underlying translators. Most translators will
- * free this list after this call so we must preserve the dirents in
- * order to cache them.
- */
- list_splice_init (&entries->list, &fd_ctx->dcache->entries.list);
- op_ret = 0;
-out:
- return op_ret;
-}
-
-int
-libgf_dcache_readdir (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- struct dirent *dirp, off_t *offset)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int cachevalid = 0;
-
- if ((!ctx) || (!fd) || (!dirp) || (!offset))
- return 0;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- if (!fd_ctx->dcache) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No dcache present");
- goto out;
- }
-
- /* We've either run out of entries in the cache
- * or the cache is empty.
- */
- if (!fd_ctx->dcache->next) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "No entries present");
- goto out;
- }
-
- /* The dirent list is created as a circular linked list
- * so this check is needed to ensure, we dont start
- * reading old entries again.
- * If we're reached this situation, the cache is exhausted
- * and we'll need to pre-fetch more entries to continue serving.
- */
- if (fd_ctx->dcache->next == &fd_ctx->dcache->entries) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Entries exhausted");
- goto out;
- }
-
- /* During sequential reading we generally expect that the offset
- * requested is the same as the offset we served in the previous call
- * to readdir. But, seekdir, rewinddir and libgf_dcache_invalidate
- * require special handling because seekdir/rewinddir change the offset
- * in the fd_ctx and libgf_dcache_invalidate changes the prev_off.
- */
- if (*offset != fd_ctx->dcache->prev_off) {
- /* For all cases of the if branch above, we know that the
- * cache is now invalid except for the case below. It handles
- * the case where the two offset values above are different
- * but different because the previous readdir block was
- * exhausted, resulting in a prev_off being set to 0 in
- * libgf_dcache_invalidate, while the requested offset is non
- * zero because that is what we returned for the last dirent
- * of the previous readdir block.
- */
- if ((*offset != 0) && (fd_ctx->dcache->prev_off == 0)) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Entries"
- " exhausted");
- cachevalid = 1;
- } else
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Dcache"
- " invalidated previously");
- } else
- cachevalid = 1;
-
- if (!cachevalid)
- goto out;
-
- dirp->d_ino = fd_ctx->dcache->next->d_ino;
- strncpy (dirp->d_name, fd_ctx->dcache->next->d_name,
- fd_ctx->dcache->next->d_len);
-
- *offset = fd_ctx->dcache->next->d_off;
- dirp->d_off = *offset;
- fd_ctx->dcache->prev_off = fd_ctx->dcache->next->d_off;
- fd_ctx->dcache->next = fd_ctx->dcache->next->next;
-
-out:
- return cachevalid;
-}
-
-
-int32_t
-libgf_client_release (xlator_t *this,
- fd_t *fd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_get_fd_ctx (fd);
- if (IA_ISDIR (fd->inode->ia_type)) {
- libgf_dcache_invalidate (fd);
- FREE (fd_ctx->dcache);
- }
-
- libgf_del_fd_ctx (fd);
- if (fd_ctx != NULL) {
- pthread_mutex_destroy (&fd_ctx->lock);
- FREE (fd_ctx);
- }
-
- return 0;
-}
-
-libglusterfs_client_inode_ctx_t *
-libgf_get_inode_ctx (inode_t *inode)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_inode_ctx_t *ictx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, inode, out);
- if (inode_ctx_get (inode, libgf_inode_to_xlator (inode), &ctxaddr) < 0)
- goto out;
-
- ictx = (libglusterfs_client_inode_ctx_t *)(long)ctxaddr;
-
-out:
- return ictx;
-}
-
-libglusterfs_client_inode_ctx_t *
-libgf_del_inode_ctx (inode_t *inode)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_inode_ctx_t *ictx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, inode, out);
- if (inode_ctx_del (inode, libgf_inode_to_xlator (inode), &ctxaddr) < 0)
- goto out;
-
- ictx = (libglusterfs_client_inode_ctx_t *)(long)ctxaddr;
-
-out:
- return ictx;
-}
-
-libglusterfs_client_inode_ctx_t *
-libgf_alloc_inode_ctx (libglusterfs_client_ctx_t *ctx, inode_t *inode)
-{
- uint64_t ctxaddr = 0;
- libglusterfs_client_inode_ctx_t *ictx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, inode, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- ictx = CALLOC (1, sizeof (*ictx));
- if (ictx == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,
- "memory allocation failure");
- goto out;
- }
-
- pthread_mutex_init (&ictx->lock, NULL);
- ctxaddr = (uint64_t) (long)ictx;
- if (inode_ctx_put (inode, libgf_inode_to_xlator (inode), ctxaddr) < 0){
- FREE (ictx);
- ictx = NULL;
- }
-
-out:
- return ictx;
-}
-
-int
-libgf_transform_iattr (libglusterfs_client_ctx_t *libctx, inode_t *inode,
- struct iatt *buf)
-{
-
- if ((!libctx) || (!buf) || (!inode))
- return -1;
-
- buf->ia_dev = libctx->fake_fsid;
- /* If the inode is root, the inode number must be 1 not the
- * ino received from the file system.
- */
- if ((inode->ino == 1) && (buf))
- buf->ia_ino = 1;
-
- return 0;
-}
-
-int
-libgf_update_iattr_cache (inode_t *inode, int flags, struct iatt *buf)
-{
- libglusterfs_client_inode_ctx_t *inode_ctx = NULL;
- time_t current = 0;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, inode, out);
-
- inode_ctx = libgf_get_inode_ctx (inode);
- if (!inode_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No inode context"
- " present");
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- pthread_mutex_lock (&inode_ctx->lock);
- {
- /* Take a timestamp only after we've acquired the
- * lock.
- */
- current = time (NULL);
- if (flags & LIBGF_UPDATE_LOOKUP) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Updating lookup");
- inode_ctx->previous_lookup_time = current;
- }
-
- if (flags & LIBGF_UPDATE_STAT) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Updating stat");
-
- /* Update the cached stat struct only if a new
- * stat buf is given.
- */
- if (buf != NULL) {
- inode_ctx->previous_stat_time = current;
- memcpy (&inode_ctx->stbuf, buf,
- sizeof (inode_ctx->stbuf));
- }
- }
- }
- pthread_mutex_unlock (&inode_ctx->lock);
- op_ret = 0;
-
-out:
- return op_ret;
-}
-
-
-int
-libgf_invalidate_iattr_cache (inode_t *inode, int flags)
-{
- libglusterfs_client_inode_ctx_t *ictx = NULL;
-
- if (!inode)
- return -1;
-
- ictx = libgf_get_inode_ctx (inode);
- if (!ictx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No inode context"
- " present");
- return -1;
- }
-
- pthread_mutex_lock (&ictx->lock);
- {
- if (flags & LIBGF_INVALIDATE_LOOKUP) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Invalidating"
- " lookup");
- ictx->previous_lookup_time = 0;
- }
-
- if (flags & LIBGF_INVALIDATE_STAT) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Invalidating"
- " stat");
- ictx->previous_stat_time = 0;
- }
-
- }
- pthread_mutex_unlock (&ictx->lock);
-
- return 0;
-}
-
-
-int
-libgf_is_iattr_cache_valid (libglusterfs_client_ctx_t *ctx, inode_t *inode,
- struct iatt *sbuf, int flags)
-{
- time_t current = 0;
- time_t prev = 0;
- libglusterfs_client_inode_ctx_t *inode_ctx = NULL;
- int cache_valid = 0;
- time_t timeout = 0;
-
- if (inode == NULL)
- return 0;
-
- inode_ctx = libgf_get_inode_ctx (inode);
- if (!inode_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No inode context"
- " present\n");
- return 0;
- }
-
- pthread_mutex_lock (&inode_ctx->lock);
- {
- current = time (NULL);
- if (flags & LIBGF_VALIDATE_LOOKUP) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Checking lookup");
- prev = inode_ctx->previous_lookup_time;
- timeout = ctx->lookup_timeout;
- } else {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Checking stat");
- prev = inode_ctx->previous_stat_time;
- timeout = ctx->stat_timeout;
- }
-
- /* Even if the timeout is set to -1 to cache
- * infinitely, fops like write must invalidate the
- * stat cache because writev_cbk cannot update
- * the cache using the stat returned to it. This is
- * because write-behind can return a stat bufs filled
- * with zeroes.
- */
- if (prev == 0) {
- cache_valid = 0;
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Cache Invalid");
- goto iattr_unlock_out;
- }
-
- /* Cache infinitely */
- if (timeout == (time_t)-1) {
- cache_valid = 1;
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Caching On and "
- "valid");
- goto iattr_unlock_out;
- }
-
- /* Disable caching completely */
- if (timeout == 0) {
- cache_valid = 0;
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Cache disabled");
- goto iattr_unlock_out;
- }
-
- if ((prev > 0) && (timeout >= (current - prev))) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Cache valid");
- cache_valid = 1;
- }
-
- if (flags & LIBGF_VALIDATE_LOOKUP)
- goto iattr_unlock_out;
-
- if ((cache_valid) && (sbuf))
- *sbuf = inode_ctx->stbuf;
- }
-iattr_unlock_out:
- pthread_mutex_unlock (&inode_ctx->lock);
-
- return cache_valid;
-}
-
-int32_t
-libgf_client_releasedir (xlator_t *this,
- fd_t *fd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_get_fd_ctx (fd);
- if (IA_ISDIR (fd->inode->ia_type)) {
- libgf_dcache_invalidate (fd);
- FREE (fd_ctx->dcache);
- }
-
- libgf_del_fd_ctx (fd);
- if (fd_ctx != NULL) {
- pthread_mutex_destroy (&fd_ctx->lock);
- FREE (fd_ctx);
- }
-
- return 0;
-}
-
-void *poll_proc (void *ptr)
-{
- glusterfs_ctx_t *ctx = ptr;
-
- event_dispatch (ctx->event_pool);
-
- return NULL;
-}
-
-
-int32_t
-xlator_graph_init (xlator_t *xl)
-{
- xlator_t *trav = xl;
- int32_t ret = -1;
-
- while (trav->prev)
- trav = trav->prev;
-
- while (trav) {
- if (!trav->ready) {
- ret = xlator_tree_init (trav);
- if (ret < 0)
- break;
- }
- trav = trav->next;
- }
-
- return ret;
-}
-
-
-void
-xlator_graph_fini (xlator_t *xl)
-{
- xlator_t *trav = xl;
- while (trav->prev)
- trav = trav->prev;
-
- while (trav) {
- if (!trav->init_succeeded) {
- break;
- }
-
- xlator_tree_fini (trav);
- trav = trav->next;
- }
-}
-
-/* Returns a pointer to the @n'th char matching
- * @c in string @str, starting the search from right or
- * end-of-string, rather than starting from left, as rindex
- * function does.
- */
-char *
-libgf_rrindex (char *str, int c, int n)
-{
- int len = 0;
- int occurrence = 0;
-
- if (str == NULL)
- return NULL;
-
- len = strlen (str);
- /* Point to last character of string. */
- str += (len - 1);
- while (len > 0) {
- if ((int)*str == c) {
- ++occurrence;
- if (occurrence == n)
- break;
- }
- --len;
- --str;
- }
-
- return str;
-}
-
-char *
-libgf_trim_to_prev_dir (char * path)
-{
- char *idx = NULL;
- int len = 0;
-
- if (!path)
- return NULL;
-
- /* Check if we're already at root, if yes
- * then there is no prev dir.
- */
- len = strlen (path);
- if (len == 1)
- return path;
-
- if (path[len - 1] == '/') {
- path[len - 1] = '\0';
- }
-
- idx = libgf_rrindex (path, '/', 1);
- /* Move to the char after the / */
- ++idx;
- *idx = '\0';
-
- return path;
-}
-
-
-char *
-libgf_prepend_cwd (const char *userpath, char *abspath, int size)
-{
- if ((!userpath) || (!abspath))
- return NULL;
-
- if (!getcwd (abspath, size))
- return NULL;
-
- strcat (abspath, "/");
- strcat (abspath, userpath);
-
- return abspath;
-}
-
-
-/* Performs a lightweight path resolution that only
- * looks for . and .. and replaces those with the
- * proper names.
- *
- * FIXME: This is a stop-gap measure till we have full
- * fledge path resolution going in here.
- * Function returns path strdup'ed so remember to FREE the
- * string as required.
- */
-char *
-libgf_resolve_path_light (char *path)
-{
- char *respath = NULL;
- char *saveptr = NULL;
- char *tok = NULL;
- int len = 0;
- int addslash = 0;
- char mypath[PATH_MAX];
-
- if (!path)
- goto out;
-
- memset (mypath, 0, PATH_MAX);
-
- if (!libgf_path_absolute (path))
- libgf_prepend_cwd (path, mypath, PATH_MAX);
- else
- strcpy (mypath, path);
-
- len = strlen (mypath);
- if (len == 0) {
- goto out;
- }
-
- respath = calloc (PATH_MAX, sizeof (char));
- if (respath == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,"Memory allocation failed");
- goto out;
- }
-
- /* The path only contains a / or a //, so simply add a /
- * and return.
- * This needs special handling because the loop below does
- * not allow us to do so through strtok.
- */
- if (((mypath[0] == '/') && (len == 1))
- || (strcmp (mypath, "//") == 0)) {
- strcat (respath, "/");
- goto out;
- }
-
- tok = strtok_r (mypath, "/", &saveptr);
- addslash = 0;
- strcat (respath, "/");
- while (tok) {
- if (addslash) {
- if ((strcmp (tok, ".") != 0)
- && (strcmp (tok, "..") != 0)) {
- strcat (respath, "/");
- }
- }
-
- if ((strcmp (tok, ".") != 0) && (strcmp (tok, "..") != 0)) {
- strcat (respath, tok);
- addslash = 1;
- } else if ((strcmp (tok, "..") == 0)) {
- libgf_trim_to_prev_dir (respath);
- addslash = 0;
- }
-
- tok = strtok_r (NULL, "/", &saveptr);
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Path: %s, Resolved Path: %s",
- path, respath);
-out:
- return respath;
-}
-
-void
-libgf_client_loc_wipe (loc_t *loc)
-{
- if (loc->path) {
- FREE (loc->path);
- }
-
- if (loc->parent) {
- inode_unref (loc->parent);
- loc->parent = NULL;
- }
-
- if (loc->inode) {
- inode_unref (loc->inode);
- loc->inode = NULL;
- }
-
- loc->path = loc->name = NULL;
- loc->ino = 0;
-}
-
-
-int32_t
-libgf_client_loc_fill (loc_t *loc,
- libglusterfs_client_ctx_t *ctx,
- ino_t ino,
- ino_t par,
- const char *name)
-{
- inode_t *inode = NULL, *parent = NULL;
- int32_t ret = -1;
- char *path = NULL;
-
- /* resistance against multiple invocation of loc_fill not to get
- reference leaks via inode_search() */
-
- inode = loc->inode;
-
- if (!inode) {
- if (ino)
- inode = inode_search (ctx->itable, ino, NULL);
-
- if (inode)
- goto inode_found;
-
- if (par && name)
- inode = inode_search (ctx->itable, par, name);
- }
-
-inode_found:
- if (inode) {
- loc->ino = inode->ino;
- loc->inode = inode;
- }
-
- parent = loc->parent;
- if (!parent) {
- if (inode)
- parent = inode_parent (inode, par, name);
- else
- parent = inode_search (ctx->itable, par, NULL);
- loc->parent = parent;
- }
-
- if (!loc->path) {
- if (name && parent) {
- ret = inode_path (parent, name, &path);
- if (ret <= 0) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "inode_path failed for %"PRId64"/%s",
- parent->ino, name);
- goto fail;
- } else {
- loc->path = path;
- }
- } else if (inode) {
- ret = inode_path (inode, NULL, &path);
- if (ret <= 0) {
- gf_log ("glusterfs-fuse", GF_LOG_ERROR,
- "inode_path failed for %"PRId64,
- inode->ino);
- goto fail;
- } else {
- loc->path = path;
- }
- }
- }
-
- if (loc->path) {
- loc->name = strrchr (loc->path, '/');
- if (loc->name)
- loc->name++;
- else loc->name = "";
- }
-
- if ((ino != 1) &&
- (parent == NULL)) {
- gf_log ("fuse-bridge", GF_LOG_ERROR,
- "failed to search parent for %"PRId64"/%s (%"PRId64")",
- (ino_t)par, name, (ino_t)ino);
- ret = -1;
- goto fail;
- }
- ret = 0;
-fail:
- return ret;
-}
-
-
-static call_frame_t *
-get_call_frame_for_req (libglusterfs_client_ctx_t *ctx, char d)
-{
- call_pool_t *pool = ctx->gf_ctx.pool;
- xlator_t *this = ctx->gf_ctx.graph;
- call_frame_t *frame = NULL;
-
-
- frame = create_frame (this, pool);
-
- frame->root->uid = geteuid ();
- frame->root->gid = getegid ();
- frame->root->pid = ctx->pid;
- frame->root->unique = ctx->counter++;
-
- return frame;
-}
-
-void
-libgf_client_fini (xlator_t *this)
-{
- FREE (this->private);
- return;
-}
-
-
-int32_t
-libgf_client_notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
-{
- libglusterfs_client_private_t *priv = this->private;
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- pthread_mutex_lock (&priv->lock);
- {
- priv->complete = 1;
- pthread_cond_broadcast (&priv->init_con_established);
- }
- pthread_mutex_unlock (&priv->lock);
- break;
-
- default:
- default_notify (this, event, data);
- }
-
- return 0;
-}
-
-int32_t
-libgf_client_init (xlator_t *this)
-{
- return 0;
-}
-
-glusterfs_handle_t
-glusterfs_init (glusterfs_init_params_t *init_ctx, uint32_t fakefsid)
-{
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_private_t *priv = NULL;
- FILE *specfp = NULL;
- xlator_t *graph = NULL, *trav = NULL;
- call_pool_t *pool = NULL;
- int32_t ret = 0;
- struct rlimit lim;
- uint32_t xl_count = 0;
- loc_t new_loc = {0, };
- struct timeval tv = {0, };
- uint32_t len = 0;
- char buf[PATH_MAX];
-
- if (!init_ctx || (!init_ctx->specfile && !init_ctx->specfp)) {
- errno = EINVAL;
- return NULL;
- }
-
- ctx = CALLOC (1, sizeof (*ctx));
- if (!ctx) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: out of memory\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
-
- errno = ENOMEM;
- return NULL;
- }
-
- ctx->lookup_timeout = init_ctx->lookup_timeout;
- ctx->stat_timeout = init_ctx->stat_timeout;
- ctx->fake_fsid = fakefsid;
- ctx->pid = getpid ();
- pthread_mutex_init (&ctx->gf_ctx.lock, NULL);
-
- pool = ctx->gf_ctx.pool = CALLOC (1, sizeof (call_pool_t));
- if (!pool) {
- errno = ENOMEM;
- FREE (ctx);
- return NULL;
- }
-
- LOCK_INIT (&pool->lock);
- INIT_LIST_HEAD (&pool->all_frames);
-
- /* FIXME: why is count hardcoded to 16384 */
- ctx->gf_ctx.event_pool = event_pool_new (16384);
- ctx->gf_ctx.page_size = LIBGF_IOBUF_SIZE;
- ctx->gf_ctx.iobuf_pool = iobuf_pool_new (8 * 1048576,
- ctx->gf_ctx.page_size);
-
- lim.rlim_cur = RLIM_INFINITY;
- lim.rlim_max = RLIM_INFINITY;
- setrlimit (RLIMIT_CORE, &lim);
- setrlimit (RLIMIT_NOFILE, &lim);
-
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_WARNING;
-
- if (init_ctx->logfile)
- ctx->gf_ctx.cmd_args.log_file = strdup (init_ctx->logfile);
- else
- ctx->gf_ctx.cmd_args.log_file = strdup ("/dev/stderr");
-
- if (init_ctx->loglevel) {
- if (!strncasecmp (init_ctx->loglevel, "DEBUG",
- strlen ("DEBUG"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_DEBUG;
- } else if (!strncasecmp (init_ctx->loglevel, "WARNING",
- strlen ("WARNING"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_WARNING;
- } else if (!strncasecmp (init_ctx->loglevel, "CRITICAL",
- strlen ("CRITICAL"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_CRITICAL;
- } else if (!strncasecmp (init_ctx->loglevel, "NONE",
- strlen ("NONE"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_NONE;
- } else if (!strncasecmp (init_ctx->loglevel, "ERROR",
- strlen ("ERROR"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_ERROR;
- } else if (!strncasecmp (init_ctx->loglevel, "TRACE",
- strlen ("TRACE"))) {
- ctx->gf_ctx.cmd_args.log_level = GF_LOG_TRACE;
- } else {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: Unrecognized log-level \"%s\", possible values are \"DEBUG|WARNING|[ERROR]|CRITICAL|NONE|TRACE\"\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- init_ctx->loglevel);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- errno = EINVAL;
- return NULL;
- }
- }
-
- if (first_init)
- {
- memset (buf, 0, PATH_MAX);
-
- if (getcwd (buf, PATH_MAX) == NULL) {
- fprintf (stderr, "libglusterfsclient: cannot get "
- "current working directory (%s)",
- strerror (errno));
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- len = strlen (buf);
- if ((buf[len - 1] != '/')) {
- if ((len + 2) > PATH_MAX) {
- errno = ENAMETOOLONG;
- fprintf (stderr, "libglusterfsclient: cannot"
- "get current working directory (%s)",
- strerror (errno));
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- strcat (buf, "/");
- }
-
- pthread_mutex_lock (&cwdlock);
- {
- strcpy (cwd, buf);
- cwd_inited = 1;
- }
- pthread_mutex_unlock (&cwdlock);
-
- ret = gf_log_init (ctx->gf_ctx.cmd_args.log_file);
- if (ret == -1) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: failed to open logfile \"%s\"\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- gf_log_set_loglevel (ctx->gf_ctx.cmd_args.log_level);
- }
-
- if (init_ctx->specfp) {
- specfp = init_ctx->specfp;
- if (fseek (specfp, 0L, SEEK_SET)) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: fseek on volume file stream failed (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
- } else if (init_ctx->specfile) {
- specfp = fopen (init_ctx->specfile, "r");
- ctx->gf_ctx.cmd_args.volume_file = strdup (init_ctx->specfile);
- }
-
- if (!specfp) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: could not open volfile: %s\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- if (init_ctx->volume_name) {
- ctx->gf_ctx.cmd_args.volume_name = strdup (init_ctx->volume_name);
- }
-
- graph = file_to_xlator_tree (&ctx->gf_ctx, specfp);
- if (!graph) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: cannot create configuration graph (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
-
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
-
- if (init_ctx->volume_name) {
- trav = graph;
- while (trav) {
- if (strcmp (trav->name, init_ctx->volume_name) == 0) {
- graph = trav;
- break;
- }
- trav = trav->next;
- }
- }
-
- ctx->gf_ctx.graph = libglusterfs_graph (graph);
- if (!ctx->gf_ctx.graph) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: graph creation failed (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
-
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- FREE (ctx);
- return NULL;
- }
- graph = ctx->gf_ctx.graph;
- ctx->gf_ctx.top = graph;
-
- trav = graph;
- while (trav) {
- xl_count++; /* Getting this value right is very important */
- trav = trav->next;
- }
-
- ctx->gf_ctx.xl_count = xl_count + 1;
-
- priv = CALLOC (1, sizeof (*priv));
- if (!priv) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: cannot allocate memory (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
-
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
-
- return NULL;
- }
-
- pthread_cond_init (&priv->init_con_established, NULL);
- pthread_mutex_init (&priv->lock, NULL);
-
- graph->private = priv;
- ctx->itable = inode_table_new (LIBGLUSTERFS_INODE_TABLE_LRU_LIMIT,
- graph);
- if (!ctx->itable) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: cannot create inode table\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
-
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- xlator_tree_free (graph);
- /* TODO: destroy graph */
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
-
- return NULL;
- }
-
- set_global_ctx_ptr (&ctx->gf_ctx);
- ctx->gf_ctx.process_uuid = zr_build_process_uuid ();
-
- if (xlator_graph_init (graph) == -1) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: graph initialization failed\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- /* TODO: destroy graph */
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
- return NULL;
- }
-
- /* Send notify to all translator saying things are ready */
- graph->notify (graph, GF_EVENT_PARENT_UP, graph);
-
- if (gf_timer_registry_init (&ctx->gf_ctx) == NULL) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: timer init failed (%s)\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__,
- strerror (errno));
-
- xlator_graph_fini (graph);
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
-
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- /* TODO: destroy graph */
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
- return NULL;
- }
-
- if ((ret = pthread_create (&ctx->reply_thread, NULL, poll_proc,
- (void *)&ctx->gf_ctx))) {
- fprintf (stderr,
- "libglusterfsclient: %s:%s():%d: reply thread creation failed\n",
- __FILE__, __PRETTY_FUNCTION__, __LINE__);
- xlator_graph_fini (graph);
- xlator_tree_free (graph);
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
-
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- /* TODO: destroy graph */
- /* inode_table_destroy (ctx->itable); */
- FREE (ctx);
- return NULL;
- }
-
- pthread_mutex_lock (&priv->lock);
- {
- while (!priv->complete) {
- pthread_cond_wait (&priv->init_con_established,
- &priv->lock);
- }
- }
- pthread_mutex_unlock (&priv->lock);
-
- /*
- * wait for some time to allow initialization of all children of
- * distribute before sending lookup on '/'
- */
-
- tv.tv_sec = 0;
- tv.tv_usec = (100 * 1000);
- select (0, NULL, NULL, NULL, &tv);
-
- /* workaround for xlators like dht which require lookup to be sent
- * on / */
- libgf_client_loc_fill (&new_loc, ctx, 1, 0, "/");
- ret = libgf_client_lookup (ctx, &new_loc, NULL, NULL, NULL);
- if (ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR, "lookup of /"
- " failed");
- return NULL;
- }
- libgf_client_loc_wipe (&new_loc);
-
- first_init = 0;
-
- return ctx;
-}
-
-struct vmp_entry *
-libgf_init_vmpentry (char *vmp, glusterfs_handle_t *vmphandle)
-{
- struct vmp_entry *entry = NULL;
- int vmplen = 0;
- int appendslash = 0;
- int ret = -1;
-
- entry = CALLOC (1, sizeof (struct vmp_entry));
- if (!entry) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,"Memory allocation failed");
- return NULL;
- }
-
- vmplen = strlen (vmp);
- if (vmp[vmplen - 1] != '/') {
- vmplen++;
- appendslash = 1;
- }
-
- entry->vmp = CALLOC (vmplen + 1, sizeof (char));
- if (!entry->vmp) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Memory allocation "
- "failed");
- goto free_entry;
- }
-
- strcpy (entry->vmp, vmp);
- if (appendslash) {
- entry->vmp[vmplen-1] = '/';
- entry->vmp[vmplen] = '\0';
- }
-
- entry->vmplen = vmplen;
- entry->handle = vmphandle;
- INIT_LIST_HEAD (&entry->list);
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "New VMP entry: %s", vmp);
-
- ret = 0;
-
-free_entry:
- if (ret == -1) {
- if (entry->vmp)
- FREE (entry->vmp);
- if (entry)
- FREE (entry);
- entry = NULL;
- }
- return entry;
-}
-
-void
-libgf_free_vmp_entry (struct vmp_entry *entry)
-{
- FREE (entry->vmp);
- FREE (entry);
-}
-
-int
-libgf_count_path_components (char *path)
-{
- int compos = 0;
- char *pathdup = NULL;
- int len = 0;
-
- if (!path)
- return -1;
-
- pathdup = strdup (path);
- if (!pathdup)
- return -1;
-
- len = strlen (pathdup);
- if (pathdup[len - 1] == '/')
- pathdup[len - 1] = '\0';
-
- path = pathdup;
- while ((path = strchr (path, '/'))) {
- compos++;
- ++path;
- }
-
- free (pathdup);
- return compos;
-}
-
-/* Returns the number of components that match between
- * the VMP and the path. Assumes string1 is vmp entry.
- * Assumes both are absolute paths.
- */
-int
-libgf_strmatchcount (char *string1, char *string2)
-{
- int matchcount = 0;
- char *s1dup = NULL, *s2dup = NULL;
- char *tok1 = NULL, *saveptr1 = NULL;
- char *tok2 = NULL, *saveptr2 = NULL;
-
- if ((!string1) || (!string2))
- return 0;
-
- s1dup = strdup (string1);
- if (!s1dup)
- return 0;
-
- s2dup = strdup (string2);
- if (!s2dup)
- goto free_s1;
-
- string1 = s1dup;
- string2 = s2dup;
-
- tok1 = strtok_r(string1, "/", &saveptr1);
- tok2 = strtok_r (string2, "/", &saveptr2);
- while (tok1) {
- if (!tok2)
- break;
-
- if (strcmp (tok1, tok2) != 0)
- break;
-
- matchcount++;
- tok1 = strtok_r(NULL, "/", &saveptr1);
- tok2 = strtok_r (NULL, "/", &saveptr2);
- }
-
- free (s2dup);
-free_s1:
- free (s1dup);
- return matchcount;
-}
-
-int
-libgf_vmp_entry_match (struct vmp_entry *entry, char *path)
-{
- return libgf_strmatchcount (entry->vmp, path);
-}
-
-#define LIBGF_VMP_EXACT 1
-#define LIBGF_VMP_LONGESTPREFIX 0
-
-
-/* copies vmp from the vmp-entry having glusterfs handle @handle, into @vmp */
-char *
-libgf_vmp_search_vmp (glusterfs_handle_t handle, char *vmp, size_t vmp_size)
-{
- char *res = NULL;
- struct vmp_entry *entry = NULL;
-
- if (handle == NULL) {
- goto out;
- }
-
- pthread_mutex_lock (&vmplock);
- {
- if (vmplist.entries == 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Virtual Mount Point "
- "list is empty.");
- goto unlock;
- }
-
- list_for_each_entry(entry, &vmplist.list, list) {
- if (entry->handle == handle) {
- if ((vmp_size) < (strlen (entry->vmp) + 1)) {
- errno = ENAMETOOLONG;
- goto unlock;
- }
-
- strcpy (vmp, entry->vmp);
- res = vmp;
- break;
- }
- }
- }
-unlock:
- pthread_mutex_unlock (&vmplock);
-
-out:
- return res;
-}
-
-
-struct vmp_entry *
-_libgf_vmp_search_entry (char *path, int searchtype)
-{
- struct vmp_entry *entry = NULL;
- int matchcount = 0;
- struct vmp_entry *maxentry = NULL;
- int maxcount = 0;
- int vmpcompcount = 0;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Search: path %s, type: %s",
- path, (searchtype == LIBGF_VMP_EXACT)?"Exact":"LongestPrefix");
- if (vmplist.entries == 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Virtual Mount Point "
- "list is empty.");
- goto out;
- }
-
- list_for_each_entry(entry, &vmplist.list, list) {
- vmpcompcount = libgf_count_path_components (entry->vmp);
- matchcount = libgf_vmp_entry_match (entry, path);
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Candidate VMP: %s,"
- " Matchcount: %d", entry->vmp, matchcount);
- if ((matchcount > maxcount) && (matchcount == vmpcompcount)) {
- maxcount = matchcount;
- maxentry = entry;
- }
- }
-
- /* To ensure that the longest prefix matched entry is also an exact
- * match, this is used to check whether duplicate entries are present
- * in the vmplist.
- */
- vmpcompcount = 0;
- if ((searchtype == LIBGF_VMP_EXACT) && (maxentry)) {
- vmpcompcount = libgf_count_path_components (maxentry->vmp);
- matchcount = libgf_count_path_components (path);
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Exact Check: VMP: %s,"
- " CompCount: %d, Path: %s, CompCount: %d",
- maxentry->vmp, vmpcompcount, path, matchcount);
- if (vmpcompcount != matchcount) {
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "No Match");
- maxentry = NULL;
- } else
- gf_log (LIBGF_XL_NAME, GF_LOG_TRACE, "Matches!");
- }
-
-out:
- return maxentry;
-}
-
-/* Used to search for a exactly matching VMP entry.
- */
-struct vmp_entry *
-libgf_vmp_search_exact_entry (char *path)
-{
- struct vmp_entry *entry = NULL;
-
- if (!path)
- goto out;
-
- pthread_mutex_lock (&vmplock);
- {
- entry = _libgf_vmp_search_entry (path, LIBGF_VMP_EXACT);
- }
- pthread_mutex_unlock (&vmplock);
-
-out:
- if (entry)
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Entry found: path :%s"
- " vmp: %s", path, entry->vmp);
- else
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Entry not found: path"
- ": %s", path);
-
- return entry;
-}
-
-
-/* Used to search for a longest prefix matching VMP entry.
- */
-struct vmp_entry *
-libgf_vmp_search_entry (char *path)
-{
- struct vmp_entry *entry = NULL;
-
- if (!path)
- goto out;
-
- pthread_mutex_lock (&vmplock);
- {
- entry = _libgf_vmp_search_entry (path, LIBGF_VMP_LONGESTPREFIX);
- }
- pthread_mutex_unlock (&vmplock);
-
-out:
- if (entry)
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Entry found: path :%s"
- " vmp: %s", path, entry->vmp);
- else
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "VMP Entry not found: path"
- ": %s", path);
-
- return entry;
-}
-
-int
-libgf_vmp_map_ghandle (char *vmp, glusterfs_handle_t *vmphandle)
-{
- int ret = -1;
- struct vmp_entry *vmpentry = NULL;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "New Entry: %s", vmp);
- vmpentry = libgf_init_vmpentry (vmp, vmphandle);
- if (!vmpentry) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Failed to create VMP"
- " entry");
- goto out;
- }
-
- pthread_mutex_lock (&vmplock);
- {
- if (vmplist.entries == 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Empty list");
- INIT_LIST_HEAD (&vmplist.list);
- }
-
- list_add_tail (&vmpentry->list, &vmplist.list);
- ++vmplist.entries;
- }
- pthread_mutex_unlock (&vmplock);
- ret = 0;
-
-out:
- return ret;
-}
-
-/* Path must be validated already. */
-glusterfs_handle_t
-libgf_vmp_get_ghandle (char * path)
-{
- struct vmp_entry *entry = NULL;
-
- entry = libgf_vmp_search_entry (path);
-
- if (entry == NULL)
- return NULL;
-
- return entry->handle;
-}
-
-
-/* Returns the handle for the path given in @path,
- * @path can be a relative path. The point is, here we
- * perform any path resolution that is needed and then
- * search for the corresponding vmp handle.
- * @vpath is a result-value argument in that the virtual
- * path inside the handle is copied into it.
- */
-glusterfs_handle_t
-libgf_resolved_path_handle (const char *path, char *vpath)
-{
- char *respath = NULL;
- struct vmp_entry *entry = NULL;
- glusterfs_handle_t handle = NULL;
- char *tmp = NULL;
-
- if ((!path) || (!vpath))
- return NULL;
-
- /* We only want compaction before VMP entry search because the
- * VMP cannot be search unless we have an absolute path.
- * For absolute paths, we search for VMP first, then perform the
- * path compaction on the given virtual path.
- */
- if (!libgf_path_absolute (path)) {
- respath = libgf_resolve_path_light ((char *)path);
- if (respath == NULL)
- return NULL;
- }
-
- /* This condition is needed because in case of absolute paths, the path
- * would already include the VMP and we want to ensure that any path
- * compaction that happens does not exclude the VMP. In the absence of
- * this condition an absolute path might get compacted to "/", i.e.
- * exclude the VMP, and the search will fail.
- *
- * For relative paths, respath will aleady include a potential VMP
- * as a consequence of us prepending the CWD in resolve_light above.
- */
- if (libgf_path_absolute (path)) {
- entry = libgf_vmp_search_entry ((char *)path);
- if (!entry)
- goto free_respath;
- tmp = libgf_vmp_virtual_path (entry, path, vpath);
- if (!tmp)
- goto free_respath;
-
- respath = libgf_resolve_path_light (vpath);
- strcpy (vpath, respath);
- } else {
- entry = libgf_vmp_search_entry (respath);
- if (!entry)
- goto free_respath;
- tmp = libgf_vmp_virtual_path (entry, respath, vpath);
- if (!tmp)
- goto free_respath;
- }
-
- handle = entry->handle;
-free_respath:
- if (respath)
- free (respath); /* Alloced in libgf_resolve_path_light */
-
- return handle;
-}
-
-
-int
-glusterfs_mount (char *vmp, glusterfs_init_params_t *ipars)
-{
- glusterfs_handle_t vmphandle = NULL;
- int ret = -1;
- char *vmp_resolved = NULL;
- struct vmp_entry *vmp_entry = NULL;
- uint32_t vmphash = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, vmp, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ipars, out);
-
- vmp_resolved = libgf_resolve_path_light (vmp);
- if (!vmp_resolved) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- vmphash = (dev_t)ReallySimpleHash (vmp, strlen (vmp));
- pthread_mutex_lock (&mountlock);
- {
- vmp_entry = libgf_vmp_search_exact_entry (vmp);
- if (vmp_entry) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Entry exists");
- ret = 0;
- goto unlock;
- }
-
- vmphandle = glusterfs_init (ipars, vmphash);
- if (!vmphandle) {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "GlusterFS context"
- " init failed");
- goto unlock;
- }
-
- ret = libgf_vmp_map_ghandle (vmp_resolved, vmphandle);
- if (ret == -1) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Failed to map new"
- " handle: %s", vmp);
- glusterfs_fini (vmphandle);
- }
- }
-unlock:
- pthread_mutex_unlock (&mountlock);
-
-out:
- if (vmp_resolved)
- FREE (vmp_resolved);
-
- return ret;
-}
-
-inline int
-_libgf_umount (char *vmp)
-{
- struct vmp_entry *entry= NULL;
- int ret = -1;
-
- entry = _libgf_vmp_search_entry (vmp, LIBGF_VMP_EXACT);
- if (entry == NULL) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path (%s) not mounted", vmp);
- goto out;
- }
-
- if (entry->handle == NULL) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path (%s) has no corresponding glusterfs handle",
- vmp);
- goto out;
- }
-
-/* ret = glusterfs_fini (entry->handle); */
- list_del_init (&entry->list);
- libgf_free_vmp_entry (entry);
-
- vmplist.entries--;
-
-out:
- return ret;
-}
-
-inline int
-libgf_umount (char *vmp)
-{
- int ret = -1;
-
- pthread_mutex_lock (&vmplock);
- {
- ret = _libgf_umount (vmp);
- }
- pthread_mutex_unlock (&vmplock);
-
- return ret;
-}
-
-int
-glusterfs_umount (char *vmp)
-{
- int ret = -1;
- char *vmp_resolved = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, vmp, out);
-
- vmp_resolved = libgf_resolve_path_light (vmp);
- if (!vmp_resolved) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- ret = libgf_umount (vmp_resolved);
-
-out:
- if (vmp_resolved)
- FREE (vmp_resolved);
-
- return ret;
-}
-
-int
-glusterfs_umount_all (void)
-{
- struct vmp_entry *entry = NULL, *tmp = NULL;
-
- pthread_mutex_lock (&vmplock);
- {
- if (vmplist.entries > 0) {
- list_for_each_entry_safe (entry, tmp, &vmplist.list,
- list) {
- /* even if there are errors, continue with other
- mounts
- */
- _libgf_umount (entry->vmp);
- }
- }
- }
- pthread_mutex_unlock (&vmplock);
-
- return 0;
-}
-
-void
-glusterfs_reset (void)
-{
- INIT_LIST_HEAD (&vmplist.list);
- vmplist.entries = 0;
-
- memset (&vmplock, 0, sizeof (vmplock));
- pthread_mutex_init (&vmplock, NULL);
-
- first_init = 1;
-}
-
-void
-glusterfs_log_lock (void)
-{
- gf_log_lock ();
-}
-
-
-void glusterfs_log_unlock (void)
-{
- gf_log_unlock ();
-}
-
-
-void
-libgf_wait_for_frames_unwind (libglusterfs_client_ctx_t *ctx)
-{
- call_pool_t *pool = NULL;
- int canreturn = 0;
-
- if (!ctx)
- return;
-
- pool = (call_pool_t *)ctx->gf_ctx.pool;
- while (1) {
- LOCK (&pool->lock);
- {
- if (pool->cnt == 0) {
- canreturn = 1;
- goto unlock_out;
- }
- }
-unlock_out:
- UNLOCK (&pool->lock);
-
- if (canreturn)
- break;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Waiting for call frames");
- sleep (1);
- }
-
- return;
-}
-
-
-int
-glusterfs_fini (glusterfs_handle_t handle)
-{
- libglusterfs_client_ctx_t *ctx = handle;
-
- libgf_wait_for_frames_unwind (ctx);
-
- FREE (ctx->gf_ctx.cmd_args.log_file);
- FREE (ctx->gf_ctx.cmd_args.volume_file);
- FREE (ctx->gf_ctx.cmd_args.volume_name);
- FREE (ctx->gf_ctx.pool);
- FREE (ctx->gf_ctx.event_pool);
- mem_pool_destroy (ctx->itable->inode_pool);
- mem_pool_destroy (ctx->itable->dentry_pool);
- mem_pool_destroy (ctx->itable->fd_mem_pool);
- /* iobuf_pool_destroy (ctx->gf_ctx.iobuf_pool); */
- ((gf_timer_registry_t *)ctx->gf_ctx.timer)->fin = 1;
-
- xlator_graph_fini (ctx->gf_ctx.graph);
- xlator_tree_free (ctx->gf_ctx.graph);
- ctx->gf_ctx.graph = NULL;
- pthread_cancel (ctx->reply_thread);
-
- FREE (ctx);
-
- return 0;
-}
-
-
-int32_t
-libgf_client_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
- libglusterfs_client_ctx_t *ctx = frame->root->state;
- dict_t *xattr_req = NULL;
-
- if (op_ret == 0) {
- inode_t *parent = NULL;
-
- if (local->fop.lookup.loc->ino == 1) {
- buf->ia_ino = 1;
- }
-
- parent = local->fop.lookup.loc->parent;
- if (inode->ino != 1) {
- inode = inode_link (inode, parent,
- local->fop.lookup.loc->name, buf);
- }
-
- libgf_transform_iattr (ctx, inode, buf);
- inode_lookup (inode);
- } else {
- if ((local->fop.lookup.is_revalidate == 0)
- && (op_errno == ENOENT)) {
- gf_log ("libglusterfsclient", GF_LOG_DEBUG,
- "%"PRId64": (op_num=%d) %s => -1 (%s)",
- frame->root->unique, frame->root->op,
- local->fop.lookup.loc->path,
- strerror (op_errno));
- } else {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "%"PRId64": (op_num=%d) %s => -1 (%s)",
- frame->root->unique, frame->root->op,
- local->fop.lookup.loc->path,
- strerror (op_errno));
- }
-
- if (local->fop.lookup.is_revalidate == 1) {
- int32_t ret = 0;
- inode_unref (local->fop.lookup.loc->inode);
- local->fop.lookup.loc->inode = inode_new (ctx->itable);
- local->fop.lookup.is_revalidate = 2;
-
- if (local->fop.lookup.size > 0) {
- xattr_req = dict_new ();
- ret = dict_set (xattr_req, "glusterfs.content",
- data_from_uint64 (local->fop.lookup.size));
- if (ret == -1) {
- op_ret = -1;
- /* TODO: set proper error code */
- op_errno = errno;
- inode = NULL;
- buf = NULL;
- dict = NULL;
- dict_unref (xattr_req);
- goto out;
- }
- }
-
- STACK_WIND (frame, libgf_client_lookup_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup,
- local->fop.lookup.loc, xattr_req);
-
- if (xattr_req) {
- dict_unref (xattr_req);
- xattr_req = NULL;
- }
-
- return 0;
- }
- }
-
-out:
- local->reply_stub = fop_lookup_cbk_stub (frame, NULL, op_ret, op_errno,
- inode, buf, dict, postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_lookup (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- struct iatt *stbuf,
- dict_t **dict,
- dict_t *xattr_req)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret;
- libgf_client_local_t *local = NULL;
- inode_t *inode = NULL;
-
- local = CALLOC (1, sizeof (*local));
- if (!local) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Memory allocation"
- " failed");
- errno = ENOMEM;
- return -1;
- }
-
- if (loc->inode) {
- local->fop.lookup.is_revalidate = 1;
- loc->ino = loc->inode->ino;
- }
- else
- loc->inode = inode_new (ctx->itable);
-
- local->fop.lookup.loc = loc;
-
- LIBGF_CLIENT_FOP(ctx, stub, lookup, local, loc, xattr_req);
-
- op_ret = stub->args.lookup_cbk.op_ret;
- errno = stub->args.lookup_cbk.op_errno;
-
- if (op_ret == -1)
- goto out;
-
- inode = stub->args.lookup_cbk.inode;
- if (!(libgf_get_inode_ctx (inode)))
- libgf_alloc_inode_ctx (ctx, inode);
- libgf_transform_iattr (ctx, inode, &stub->args.lookup_cbk.buf);
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_ALL,
- &stub->args.lookup_cbk.buf);
- if (stbuf)
- *stbuf = stub->args.lookup_cbk.buf;
-
- if (dict)
- *dict = dict_ref (stub->args.lookup_cbk.dict);
-
- if (inode != loc->inode) {
- inode_unref (loc->inode);
- loc->inode = inode_ref (inode);
- }
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_get (glusterfs_handle_t handle, const char *path, void *buf,
- size_t size, struct stat *stbuf)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- dict_t *dict = NULL;
- dict_t *xattr_req = NULL;
- char *name = NULL, *pathname = NULL;
- struct iatt iatt = {0,};
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, size %lu", path,
- (long unsigned)size);
- if (size < 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid size");
- errno = EINVAL;
- goto out;
- }
-
- if (size == 0) {
- op_ret = 0;
- goto out;
- }
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret < 0) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- if (size) {
- xattr_req = dict_new ();
- op_ret = dict_set (xattr_req, "glusterfs.content",
- data_from_uint64 (size));
- if (op_ret < 0) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "setting requested content size dictionary failed");
- goto out;
- }
- }
-
- op_ret = libgf_client_lookup (ctx, &loc, &iatt, &dict, xattr_req);
- iatt_to_stat (&iatt, stbuf);
- if (!op_ret && stbuf && (iatt.ia_size <= size) && dict && buf) {
- data_t *mem_data = NULL;
- void *mem = NULL;
-
- mem_data = dict_get (dict, "glusterfs.content");
- if (mem_data) {
- mem = data_to_ptr (mem_data);
- }
-
- if (mem != NULL) {
- memcpy (buf, mem, iatt.ia_size);
- }
- }
-
-out:
- if (xattr_req) {
- dict_unref (xattr_req);
- }
-
- if (dict) {
- dict_unref (dict);
- }
-
- if (pathname) {
- FREE (pathname);
- }
- libgf_client_loc_wipe (&loc);
-
- return op_ret;
-}
-
-int
-glusterfs_get (const char *path, void *buf, size_t size, struct stat *stbuf)
-{
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
- char vpath[PATH_MAX];
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, stbuf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, size %lu", path,
- (long unsigned)size);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_get (h, vpath, buf, size, stbuf);
-
-out:
- return op_ret;
-}
-
-int
-libgf_client_lookup_async_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *stbuf,
- dict_t *dict,
- struct iatt *postparent)
-{
- libglusterfs_client_async_local_t *local = frame->local;
- glusterfs_get_cbk_t lookup_cbk = local->fop.lookup_cbk.cbk;
- libglusterfs_client_ctx_t *ctx = frame->root->state;
- glusterfs_iobuf_t *iobuf = NULL;
- dict_t *xattr_req = NULL;
- inode_t *parent = NULL;
- struct stat stat = {0,};
-
- if (op_ret == 0) {
- parent = local->fop.lookup_cbk.loc->parent;
- inode_link (inode, parent, local->fop.lookup_cbk.loc->name,
- stbuf);
- libgf_transform_iattr (ctx, inode, stbuf);
- if (!(libgf_get_inode_ctx (inode)))
- libgf_alloc_inode_ctx (ctx, inode);
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_ALL, stbuf);
- inode_lookup (inode);
- } else {
- if ((local->fop.lookup_cbk.is_revalidate == 0)
- && (op_errno == ENOENT)) {
- gf_log ("libglusterfsclient", GF_LOG_DEBUG,
- "%"PRId64": (op_num=%d) %s => -1 (%s)",
- frame->root->unique, frame->root->op,
- local->fop.lookup_cbk.loc->path,
- strerror (op_errno));
- } else {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "%"PRId64": (op_num=%d) %s => -1 (%s)",
- frame->root->unique, frame->root->op,
- local->fop.lookup_cbk.loc->path,
- strerror (op_errno));
- }
-
- if (local->fop.lookup_cbk.is_revalidate == 1) {
- int32_t ret = 0;
- inode_unref (local->fop.lookup_cbk.loc->inode);
- local->fop.lookup_cbk.loc->inode = inode_new (ctx->itable);
- local->fop.lookup_cbk.is_revalidate = 2;
-
- if (local->fop.lookup_cbk.size > 0) {
- xattr_req = dict_new ();
- ret = dict_set (xattr_req, "glusterfs.content",
- data_from_uint64 (local->fop.lookup_cbk.size));
- if (ret == -1) {
- op_ret = -1;
- /* TODO: set proper error code */
- op_errno = errno;
- inode = NULL;
- stbuf = NULL;
- dict = NULL;
- dict_unref (xattr_req);
- goto out;
- }
- }
-
-
- STACK_WIND (frame, libgf_client_lookup_async_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->lookup,
- local->fop.lookup_cbk.loc, xattr_req);
-
- if (xattr_req) {
- dict_unref (xattr_req);
- xattr_req = NULL;
- }
-
- return 0;
- }
- }
-
-out:
- if (!op_ret && local->fop.lookup_cbk.size && dict) {
- data_t *mem_data = NULL;
- void *mem = NULL;
- struct iovec *vector = NULL;
-
- mem_data = dict_get (dict, "glusterfs.content");
- if (mem_data) {
- mem = data_to_ptr (mem_data);
- }
-
- if (mem && stbuf->ia_size <= local->fop.lookup_cbk.size) {
- iobuf = CALLOC (1, sizeof (*iobuf));
- ERR_ABORT (iobuf);
-
- vector = CALLOC (1, sizeof (*vector));
- ERR_ABORT (vector);
- vector->iov_base = mem;
- vector->iov_len = stbuf->ia_size;
-
- iobuf->vector = vector;
- iobuf->count = 1;
- iobuf->dictref = dict_ref (dict);
- }
- }
-
- iatt_to_stat (stbuf, &stat);
- lookup_cbk (op_ret, op_errno, iobuf, &stat, local->cbk_data);
-
- libgf_client_loc_wipe (local->fop.lookup_cbk.loc);
- free (local->fop.lookup_cbk.loc);
-
- free (local);
- frame->local = NULL;
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-/* TODO: implement async dentry lookup */
-
-int
-glusterfs_get_async (glusterfs_handle_t handle,
- const char *path,
- size_t size,
- glusterfs_get_cbk_t cbk,
- void *cbk_data)
-{
- loc_t *loc = NULL;
- libglusterfs_client_ctx_t *ctx = handle;
- libglusterfs_client_async_local_t *local = NULL;
- int32_t op_ret = 0;
- dict_t *xattr_req = NULL;
- char *name = NULL, *pathname = NULL;
-
- if (!ctx || !path || path[0] != '/') {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- if (size < 0) {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- if (size == 0) {
- op_ret = 0;
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- local->fop.lookup_cbk.is_revalidate = 1;
-
- loc = CALLOC (1, sizeof (*loc));
- loc->path = strdup (path);
- op_ret = libgf_client_path_lookup (loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "path lookup failed for (%s)", path);
- goto out;
- }
-
- pathname = strdup (path);
- name = basename (pathname);
- op_ret = libgf_client_loc_fill (loc, ctx, 0, loc->parent->ino, name);
- if (op_ret < 0) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- if (!loc->inode) {
- loc->inode = inode_new (ctx->itable);
- local->fop.lookup_cbk.is_revalidate = 0;
- }
-
- local->fop.lookup_cbk.cbk = cbk;
- local->fop.lookup_cbk.size = size;
- local->fop.lookup_cbk.loc = loc;
- local->cbk_data = cbk_data;
-
- if (size > 0) {
- xattr_req = dict_new ();
- op_ret = dict_set (xattr_req, "glusterfs.content",
- data_from_uint64 (size));
- if (op_ret < 0) {
- dict_unref (xattr_req);
- xattr_req = NULL;
- goto out;
- }
- }
-
- LIBGF_CLIENT_FOP_ASYNC (ctx,
- local,
- libgf_client_lookup_async_cbk,
- lookup,
- loc,
- xattr_req);
- if (xattr_req) {
- dict_unref (xattr_req);
- xattr_req = NULL;
- }
-
-out:
- if (pathname) {
- FREE (pathname);
- }
-
- return op_ret;
-}
-
-int32_t
-libgf_client_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
-
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_getxattr_cbk_stub (frame, NULL, op_ret,
- op_errno, dict);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-size_t
-libgf_client_getxattr (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- const char *name,
- void *value,
- size_t size)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, getxattr, local, loc, name);
-
- op_ret = stub->args.getxattr_cbk.op_ret;
- errno = stub->args.getxattr_cbk.op_errno;
-
- if (op_ret >= 0) {
- /*
- gf_log ("LIBGF_CLIENT", GF_LOG_DEBUG,
- "%"PRId64": %s => %d", frame->root->unique,
- state->fuse_loc.loc.path, op_ret);
- */
-
- data_t *value_data = dict_get (stub->args.getxattr_cbk.dict,
- (char *)name);
-
- if (value_data) {
- int32_t copy_len = 0;
-
- /* Don't return the value for '\0' */
- op_ret = value_data->len;
- if ((size > 0) && (value != NULL)) {
- copy_len = size < value_data->len ?
- size : value_data->len;
- memcpy (value, value_data->data, copy_len);
- op_ret = copy_len;
- }
- } else {
- errno = ENODATA;
- op_ret = -1;
- }
- }
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-#define LIBGF_DO_GETXATTR 1
-#define LIBGF_DO_LGETXATTR 2
-
-ssize_t
-__glusterfs_glh_getxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size,
- int whichop)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *file = NULL;
- char *pathres = NULL, *tmp = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, name %s, size %lu,"
- " op %d", path, name, (long unsigned)size, whichop);
- if (name[0] == '\0') {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument: Name"
- " not NULL terminated");
- goto out;
- }
-
- if (size < 0) {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument: size is"
- " less than zero");
- goto out;
- }
-
- pathres = strdup (path);
- if (!pathres) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- loc.path = strdup (pathres);
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- tmp = strdup (pathres);
- file = basename (tmp);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, file);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- if (whichop == LIBGF_DO_LGETXATTR)
- goto do_getx;
-
- if (!IA_ISLNK (loc.inode->ia_type))
- goto do_getx;
-
- libgf_client_loc_wipe (&loc);
- op_ret = libgf_realpath_loc_fill (ctx, (char *)pathres, &loc);
- if (op_ret == -1) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "realpath failed");
- goto out;
- }
-
-do_getx:
- op_ret = libgf_client_getxattr (ctx, &loc, name, value, size);
-
-out:
- if (tmp) {
- FREE (tmp);
- }
-
- if (pathres)
- FREE (pathres);
-
- libgf_client_loc_wipe (&loc);
-
- return op_ret;
-}
-
-ssize_t
-glusterfs_glh_getxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size)
-{
- return __glusterfs_glh_getxattr (handle, path, name, value, size,
- LIBGF_DO_GETXATTR);
-}
-
-ssize_t
-glusterfs_glh_lgetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size)
-{
- return __glusterfs_glh_getxattr (handle, path, name, value, size,
- LIBGF_DO_LGETXATTR);
-}
-
-ssize_t
-glusterfs_getxattr (const char *path, const char *name, void *value,
- size_t size)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
-
- if ((size > 0) && (value == NULL)) {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument value");
- goto out;
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, name %s, size %lu",
- path, name, (long unsigned)size);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = __glusterfs_glh_getxattr (h, vpath, name, value, size,
- LIBGF_DO_GETXATTR);
-
-out:
- return op_ret;
-}
-
-ssize_t
-glusterfs_lgetxattr (const char *path, const char *name, void *value,
- size_t size)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
-
- if ((size > 0) && (value == NULL)) {
- errno = EINVAL;
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument value");
- goto out;
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, name %s, size %lu",
- path, name, (long unsigned)size);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = __glusterfs_glh_getxattr (h, vpath, name, value, size,
- LIBGF_DO_LGETXATTR);
-
-out:
- return op_ret;
-}
-
-static int32_t
-libgf_client_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_open_cbk_stub (frame, NULL, op_ret, op_errno,
- fd);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-int
-libgf_client_open (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- fd_t *fd,
- int flags)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, open, local, loc, flags, fd, 0);
-
- op_ret = stub->args.open_cbk.op_ret;
- errno = stub->args.open_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "open: path %s, status: %d, errno"
- " %d", loc->path, op_ret, errno);
- if (op_ret != -1)
- fd_bind (fd);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-static int32_t
-libgf_client_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_create_cbk_stub (frame, NULL, op_ret, op_errno,
- fd, inode, buf, preparent,
- postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_creat (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- fd_t *fd,
- int flags,
- mode_t mode)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
- inode_t *libgf_inode = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, create, local, loc, flags, mode, fd);
-
- op_ret = stub->args.create_cbk.op_ret;
- errno = stub->args.create_cbk.op_errno;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Create: path %s, status: %d,"
- " errno: %d", loc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- libgf_inode = stub->args.create_cbk.inode;
- inode_link (libgf_inode, loc->parent, loc->name,
- &stub->args.create_cbk.buf);
- libgf_transform_iattr (ctx, libgf_inode, &stub->args.create_cbk.buf);
-
- inode_lookup (libgf_inode);
-
- libgf_alloc_inode_ctx (ctx, libgf_inode);
- libgf_update_iattr_cache (libgf_inode, LIBGF_UPDATE_ALL,
- &stub->args.create_cbk.buf);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int32_t
-libgf_client_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_opendir_cbk_stub (frame, NULL, op_ret, op_errno,
- fd);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_opendir (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- fd_t *fd)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = -1;
- libgf_client_local_t *local = NULL;
-
- if (((fd->flags & O_ACCMODE) == O_WRONLY)
- || ((fd->flags & O_ACCMODE) == O_RDWR)) {
- errno = EISDIR;
- goto out;
- }
- LIBGF_CLIENT_FOP (ctx, stub, opendir, local, loc, fd);
-
- op_ret = stub->args.opendir_cbk.op_ret;
- errno = stub->args.opendir_cbk.op_errno;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "opendir: path %s, status %d,"
- " errno %d", loc->path, op_ret, errno);
- if (op_ret != -1)
- fd_bind (fd);
-
- call_stub_destroy (stub);
-out:
- return op_ret;
-}
-
-glusterfs_file_t
-glusterfs_glh_open (glusterfs_handle_t handle, const char *path, int flags,...)
-{
- loc_t loc = {0, };
- long op_ret = -1;
- fd_t *fd = NULL;
- int32_t ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL, *pathname = NULL;
- libglusterfs_client_inode_ctx_t *inode_ctx = NULL;
- mode_t mode = 0;
- va_list ap;
- char *pathres = NULL;
- char *vpath = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- pathres = strdup (path);
- if (!pathres) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- loc.path = strdup (pathres);
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
-
- if ((op_ret == -1) && ((flags & O_CREAT) != O_CREAT)) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- if (!op_ret && ((flags & O_CREAT) == O_CREAT)
- && ((flags & O_EXCL) == O_EXCL)) {
- errno = EEXIST;
- op_ret = -1;
- goto out;
- }
-
- if (op_ret == 0) {
- flags &= ~O_CREAT;
- }
-
- if ((op_ret == -1) && ((flags & O_CREAT) == O_CREAT)) {
- libgf_client_loc_wipe (&loc);
- loc.path = strdup (pathres);
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for parent while trying to"
- " create (%s)", pathres);
- goto out;
- }
-
- loc.inode = inode_new (ctx->itable);
- }
-
- pathname = strdup (pathres);
- name = basename (pathname);
-
- ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- fd = fd_create (loc.inode, ctx->pid);
- fd->flags = flags;
-
- if (((flags & O_CREAT) == O_CREAT)) {
- /* If we have the st_mode for the basename, check if
- * it is a directory here itself, rather than sending
- * a network message through libgf_client_creat, and
- * then receiving a EISDIR.
- */
- if (IA_ISDIR (loc.inode->ia_type)) {
- errno = EISDIR;
- op_ret = -1;
- goto op_over;
- }
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
- op_ret = libgf_client_creat (ctx, &loc, fd, flags, mode);
- } else {
- if (IA_ISDIR (loc.inode->ia_type))
- op_ret = libgf_client_opendir (ctx, &loc, fd);
- else
- op_ret = libgf_client_open (ctx, &loc, fd, flags);
- }
-
-op_over:
- if (op_ret == -1) {
- fd_unref (fd);
- fd = NULL;
- goto out;
- }
-
- vpath = NULL;
- if (IA_ISDIR (loc.inode->ia_type)) {
- vpath = (char *)path;
- }
-
- if (!libgf_get_fd_ctx (fd)) {
- if (!libgf_alloc_fd_ctx (ctx, fd, vpath)) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Failed to"
- " allocate fd context");
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
- }
-
- if ((flags & O_TRUNC) && (((flags & O_ACCMODE) == O_RDWR)
- || ((flags & O_ACCMODE) == O_WRONLY))) {
- inode_ctx = libgf_get_inode_ctx (fd->inode);
- if (IA_ISREG (inode_ctx->stbuf.ia_type)) {
- inode_ctx->stbuf.ia_size = 0;
- inode_ctx->stbuf.ia_blocks = 0;
- }
- }
-
-out:
- libgf_client_loc_wipe (&loc);
-
- if (pathname) {
- FREE (pathname);
- }
-
- if (pathres)
- FREE (pathres);
-
- return fd;
-}
-
-glusterfs_file_t
-glusterfs_open (const char *path, int flags, ...)
-{
- va_list ap;
- glusterfs_file_t fh = NULL;
- glusterfs_handle_t h = NULL;
- mode_t mode = 0;
- char vpath[PATH_MAX];
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- if (flags & O_CREAT) {
- va_start (ap, flags);
- mode = va_arg (ap, mode_t);
- va_end (ap);
- fh = glusterfs_glh_open (h, vpath, flags, mode);
- } else
- fh = glusterfs_glh_open (h, vpath, flags);
-out:
- return fh;
-}
-
-glusterfs_file_t
-glusterfs_glh_creat (glusterfs_handle_t handle, const char *path, mode_t mode)
-{
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- return glusterfs_glh_open (handle, path,
- (O_CREAT | O_WRONLY | O_TRUNC), mode);
-}
-
-glusterfs_file_t
-glusterfs_creat (const char *path, mode_t mode)
-{
- glusterfs_file_t fh = NULL;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- fh = glusterfs_glh_creat (h, vpath, mode);
-
-out:
- return fh;
-}
-
-int32_t
-libgf_client_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_flush_cbk_stub (frame, NULL, op_ret, op_errno);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-int
-libgf_client_flush (libglusterfs_client_ctx_t *ctx, fd_t *fd)
-{
- call_stub_t *stub;
- int32_t op_ret;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, flush, local, fd);
-
- op_ret = stub->args.flush_cbk.op_ret;
- errno = stub->args.flush_cbk.op_errno;
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_close (glusterfs_file_t fd)
-{
- int32_t op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
- ctx = fd_ctx->ctx;
-
- op_ret = libgf_client_flush (ctx, (fd_t *)fd);
-
- fd_unref ((fd_t *)fd);
-
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_setxattr_cbk_stub (frame, NULL, op_ret,
- op_errno);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_setxattr (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- const char *name,
- const void *value,
- size_t size,
- int flags)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- dict_t *dict;
- libgf_client_local_t *local = NULL;
-
- dict = get_new_dict ();
-
- dict_set (dict, (char *)name,
- bin_to_data ((void *)value, size));
- dict_ref (dict);
-
-
- LIBGF_CLIENT_FOP (ctx, stub, setxattr, local, loc, dict, flags);
-
- op_ret = stub->args.setxattr_cbk.op_ret;
- errno = stub->args.setxattr_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "path %s, name %s, status %d,"
- "errno %d", loc->path, name, op_ret, errno);
- dict_unref (dict);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-#define LIBGF_DO_SETXATTR 1
-#define LIBGF_DO_LSETXATTR 2
-
-int
-__glusterfs_glh_setxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value,
- size_t size, int flags, int whichop)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *tmppath = NULL;
- loc_t *realloc = NULL;
- char *pathres = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "path %s, name %s, op %d", path
- ,name, whichop);
- if (size <= 0) {
- errno = EINVAL;
- goto out;
- }
-
- pathres = strdup (path);
- if (!pathres) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- loc.path = strdup (pathres);
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", pathres);
- goto out;
- }
-
- tmppath = strdup (pathres);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (tmppath));
- FREE (tmppath);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- realloc = &loc;
- if (whichop == LIBGF_DO_LSETXATTR)
- goto do_setx;
-
- if (!IA_ISLNK (loc.inode->ia_type))
- goto do_setx;
-
- libgf_client_loc_wipe (&loc);
- realloc = &loc;
- libgf_realpath_loc_fill (ctx, (char *)pathres, realloc);
-
-do_setx:
- if (!op_ret)
- op_ret = libgf_client_setxattr (ctx, realloc, name, value,
- size, flags);
-
-out:
- if (pathres)
- FREE (pathres);
-
- libgf_client_loc_wipe (realloc);
- return op_ret;
-}
-
-int
-glusterfs_glh_setxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value, size_t size,
- int flags)
-{
- return __glusterfs_glh_setxattr (handle, path, name, value, size, flags
- , LIBGF_DO_SETXATTR);
-}
-
-int
-glusterfs_glh_lsetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value, size_t size,
- int flags)
-{
- return __glusterfs_glh_setxattr (handle, path, name, value, size, flags
- , LIBGF_DO_LSETXATTR);
-}
-
-int
-glusterfs_setxattr (const char *path, const char *name, const void *value,
- size_t size, int flags)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, value, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "path %s, name %s", path, name);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = __glusterfs_glh_setxattr (h, vpath, name, value, size, flags,
- LIBGF_DO_SETXATTR);
-
-out:
- return op_ret;
-}
-
-int
-glusterfs_lsetxattr (const char *path, const char *name, const void *value,
- size_t size, int flags)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, name, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, value, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "path %s, name %s", path, name);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = __glusterfs_glh_setxattr (h, vpath, name, value, size, flags,
- LIBGF_DO_LSETXATTR);
-
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_fsetxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fsetxattr_cbk_stub (frame, NULL, op_ret,
- op_errno);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_fsetxattr (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- const char *name,
- const void *value,
- size_t size,
- int flags)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- dict_t *dict;
- libgf_client_local_t *local = NULL;
-
- dict = get_new_dict ();
-
- dict_set (dict, (char *)name,
- bin_to_data ((void *)value, size));
- dict_ref (dict);
-
- LIBGF_CLIENT_FOP (ctx, stub, fsetxattr, local, fd, dict, flags);
-
- op_ret = stub->args.fsetxattr_cbk.op_ret;
- errno = stub->args.fsetxattr_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "name %s, status %d, errno %d",
- name, op_ret, errno);
- dict_unref (dict);
- call_stub_destroy (stub);
-
- return op_ret;
-}
-
-int
-glusterfs_fsetxattr (glusterfs_file_t fd,
- const char *name,
- const void *value,
- size_t size,
- int flags)
-{
- int32_t op_ret = 0;
- fd_t *__fd = fd;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- libglusterfs_client_ctx_t *ctx = NULL;
-
- if (!fd) {
- errno = EINVAL;
- op_ret = -1;
- gf_log("libglusterfsclient",
- GF_LOG_ERROR,
- "invalid fd");
- goto out;
- }
-
- if (size <= 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Invalid argument: size is"
- " less than or equal to zero");
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
- op_ret = libgf_client_fsetxattr (ctx, __fd, name, value, size,
- flags);
-
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_fgetxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
-
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fgetxattr_cbk_stub (frame, NULL, op_ret,
- op_errno, dict);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-size_t
-libgf_client_fgetxattr (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- const char *name,
- void *value,
- size_t size)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, fgetxattr, local, fd, name);
-
- op_ret = stub->args.fgetxattr_cbk.op_ret;
- errno = stub->args.fgetxattr_cbk.op_errno;
-
- if (op_ret >= 0) {
- /*
- gf_log ("LIBGF_CLIENT", GF_LOG_DEBUG,
- "%"PRId64": %s => %d", frame->root->unique,
- state->fuse_loc.loc.path, op_ret);
- */
-
- data_t *value_data = dict_get (stub->args.fgetxattr_cbk.dict,
- (char *)name);
-
- if (value_data) {
- int32_t copy_len = 0;
-
- /* Don't return the value for '\0' */
- op_ret = value_data->len;
- copy_len = size < value_data->len ?
- size : value_data->len;
- memcpy (value, value_data->data, copy_len);
- } else {
- errno = ENODATA;
- op_ret = -1;
- }
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "name %s, status %d, errno %d",
- name, op_ret, errno);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-ssize_t
-glusterfs_fgetxattr (glusterfs_file_t fd,
- const char *name,
- void *value,
- size_t size)
-{
- int32_t op_ret = 0;
- libglusterfs_client_ctx_t *ctx;
- fd_t *__fd = (fd_t *)fd;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "name %s", name);
- if (size < 0) {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- if (size == 0)
- goto out;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
- op_ret = libgf_client_fgetxattr (ctx, __fd, name, value, size);
-out:
- return op_ret;
-}
-
-ssize_t
-glusterfs_listxattr (glusterfs_handle_t handle,
- const char *path,
- char *list,
- size_t size)
-{
- return ENOSYS;
-}
-
-ssize_t
-glusterfs_llistxattr (glusterfs_handle_t handle,
- const char *path,
- char *list,
- size_t size)
-{
- return ENOSYS;
-}
-
-ssize_t
-glusterfs_flistxattr (glusterfs_file_t fd,
- char *list,
- size_t size)
-{
- return ENOSYS;
-}
-
-int
-glusterfs_removexattr (glusterfs_handle_t handle,
- const char *path,
- const char *name)
-{
- return ENOSYS;
-}
-
-int
-glusterfs_lremovexattr (glusterfs_handle_t handle,
- const char *path,
- const char *name)
-{
- return ENOSYS;
-}
-
-int
-glusterfs_fremovexattr (glusterfs_file_t fd,
- const char *name)
-{
- return ENOSYS;
-}
-
-int32_t
-libgf_client_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_readv_cbk_stub (frame, NULL, op_ret, op_errno,
- vector, count, stbuf, iobref);
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_iobuf_read (libglusterfs_client_ctx_t *ctx, fd_t *fd, void *buf,
- size_t size, off_t offset)
-{
- call_stub_t *stub = NULL;
- struct iovec *vector = NULL;
- int32_t op_ret = -1;
- int count = 0;
- libgf_client_local_t *local = NULL;
- struct iatt *stbuf = NULL;
-
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fd = fd;
- LIBGF_CLIENT_FOP (ctx, stub, readv, local, fd, size, offset);
-
- op_ret = stub->args.readv_cbk.op_ret;
- errno = stub->args.readv_cbk.op_errno;
- count = stub->args.readv_cbk.count;
- vector = stub->args.readv_cbk.vector;
- if (op_ret > 0) {
- int i = 0;
- op_ret = 0;
- while (size && (i < count)) {
- int len = (size < vector[i].iov_len) ?
- size : vector[i].iov_len;
- memcpy (buf, vector[i++].iov_base, len);
- buf += len;
- size -= len;
- op_ret += len;
- }
- stbuf = &stub->args.readv_cbk.stbuf;
- libgf_transform_iattr (ctx, fd->inode, stbuf);
- libgf_invalidate_iattr_cache (fd->inode, LIBGF_INVALIDATE_STAT);
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "size %lu, offset %"PRIu64,
- (long unsigned)size, offset);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-libgf_client_read (libglusterfs_client_ctx_t *ctx, fd_t *fd, void *buf,
- size_t size, off_t offset)
-{
- int32_t op_ret = -1;
- int32_t ret = 0;
- size_t tmp = 0;
-
- while (size != 0) {
- tmp = ((size > LIBGF_IOBUF_SIZE) ? LIBGF_IOBUF_SIZE :
- size);
- op_ret = libgf_client_iobuf_read (ctx, fd, buf, tmp, offset);
- if (op_ret < 0) {
- ret = op_ret;
- break;
- }
-
- ret += op_ret;
-
- if (op_ret < tmp)
- break;
-
- size -= op_ret;
- offset += op_ret;
- buf = (char *)buf + op_ret;
- }
-
- return ret;
-}
-
-ssize_t
-glusterfs_read (glusterfs_file_t fd, void *buf, size_t nbytes)
-{
- int32_t op_ret = -1;
- off_t offset = 0;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (nbytes < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (nbytes == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (fd == 0) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- op_ret = libgf_client_read (ctx, (fd_t *)fd, buf, nbytes, offset);
-
- if (op_ret > 0) {
- offset += op_ret;
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-
-ssize_t
-libgf_client_iobuf_readv (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- const struct iovec *dst_vector, int count,
- size_t size, off_t offset, int *idx,
- off_t *vec_offset)
-{
- call_stub_t *stub = NULL;
- struct iovec *src_vector = NULL;
- int32_t op_ret = -1;
- libgf_client_local_t *local = NULL;
- int src = 0, dst = 0;
- int src_count = 0, dst_count = 0;
- int len = 0, src_len = 0, dst_len = 0;
- off_t src_offset = 0, dst_offset = 0;
- struct iatt *stbuf = NULL;
-
- dst = *idx;
- dst_offset = *vec_offset;
-
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fd = fd;
- LIBGF_CLIENT_FOP (ctx, stub, readv, local, fd, size, offset);
-
- op_ret = stub->args.readv_cbk.op_ret;
- errno = stub->args.readv_cbk.op_errno;
- src_count = stub->args.readv_cbk.count;
- src_vector = stub->args.readv_cbk.vector;
- if (op_ret > 0) {
- while ((size != 0) && (dst < dst_count) && (src < src_count)) {
- src_len = src_vector[src].iov_len - src_offset;
- dst_len = dst_vector[dst].iov_len - dst_offset;
-
- len = (src_len < dst_len) ? src_len : dst_len;
- if (len > size) {
- len = size;
- }
-
- memcpy (dst_vector[dst].iov_base + dst_offset,
- src_vector[src].iov_base + src_offset, len);
-
- size -= len;
- src_offset += len;
- dst_offset += len;
-
- if (src_offset == src_vector[src].iov_len) {
- src_offset = 0;
- src++;
- }
-
- if (dst_offset == dst_vector[dst].iov_len) {
- dst_offset = 0;
- dst++;
- }
- }
-
- stbuf = &stub->args.readv_cbk.stbuf;
- libgf_transform_iattr (ctx, fd->inode, stbuf);
- libgf_invalidate_iattr_cache (fd->inode, LIBGF_UPDATE_STAT);
- }
-
- *idx = dst;
- *vec_offset = dst_offset;
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-ssize_t
-libgf_client_readv (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- const struct iovec *dst_vector, int dst_count, off_t offset)
-{
- int32_t op_ret = -1;
- size_t size = 0, tmp = 0, ret = 0;
- int i = 0;
- int dst_idx = 0;
- off_t dst_offset = 0;
-
- for (i = 0; i < dst_count; i++)
- {
- size += dst_vector[i].iov_len;
- }
-
- while (size != 0) {
- tmp = ((size > LIBGF_IOBUF_SIZE) ? LIBGF_IOBUF_SIZE : size);
- op_ret = libgf_client_iobuf_readv (ctx, fd, dst_vector,
- dst_count, tmp, offset,
- &dst_idx, &dst_offset);
- if (op_ret <= 0) {
- break;
- }
-
- offset += op_ret;
- size -= op_ret;
- ret += op_ret;
- }
-
- return ret;
-}
-
-
-ssize_t
-glusterfs_readv (glusterfs_file_t fd, const struct iovec *vec, int count)
-{
- int32_t op_ret = -1;
- off_t offset = 0;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (count < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- op_ret = libgf_client_readv (ctx, (fd_t *)fd, vec, count, offset);
-
- if (op_ret > 0) {
- offset += op_ret;
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-
-ssize_t
-glusterfs_pread (glusterfs_file_t fd,
- void *buf,
- size_t count,
- off_t offset)
-{
- int32_t op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (count < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- op_ret = libgf_client_read (ctx, (fd_t *)fd, buf, count, offset);
-
-out:
- return op_ret;
-}
-
-
-int
-libgf_client_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_writev_cbk_stub (frame, NULL, op_ret, op_errno,
- prebuf, postbuf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-int
-libgf_client_iobuf_write (libglusterfs_client_ctx_t *ctx, fd_t *fd, char *addr,
- size_t size, off_t offset)
-{
- struct iobref *ioref = NULL;
- struct iobuf *iob = NULL;
- int op_ret = -1;
- struct iovec iov = {0, };
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, addr, out);
-
- ioref = iobref_new ();
- if (!ioref) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Out of memory");
- goto out;
- }
-
- iob = iobuf_get (ctx->gf_ctx.iobuf_pool);
- if (!iob) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Out of memory");
- goto out;
- }
-
- memcpy (iob->ptr, addr, size);
- iobref_add (ioref, iob);
-
- iov.iov_base = iob->ptr;
- iov.iov_len = size;
-
- LIBGF_CLIENT_FOP (ctx, stub, writev, local, fd, &iov,
- 1, offset, ioref);
-
- op_ret = stub->args.writev_cbk.op_ret;
- errno = stub->args.writev_cbk.op_errno;
-
- /* We need to invalidate because it is possible that write-behind
- * is a translator below us and returns a stat filled with zeroes.
- */
- libgf_invalidate_iattr_cache (fd->inode, LIBGF_INVALIDATE_STAT);
-
-out:
- if (iob) {
- iobuf_unref (iob);
- }
-
- if (ioref) {
- iobref_unref (ioref);
- }
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-libgf_client_writev (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- struct iovec *vector,
- int count,
- off_t offset)
-{
- int op_ret = 0;
- int written = 0;
- int writesize = 0;
- int size = 0;
- char *base = NULL;
- int i = 0;
-
- for (i = 0; i < count; i++) {
- size = vector[i].iov_len;
- base = vector[i].iov_base;
-
- while (size > 0) {
- writesize = (size > LIBGF_IOBUF_SIZE) ?
- LIBGF_IOBUF_SIZE : size;
-
- written = libgf_client_iobuf_write (ctx, fd, base,
- writesize, offset);
-
- if (written == -1)
- goto out;
-
- op_ret += written;
- base += written;
- size -= written;
- offset += written;
- }
- }
-
-out:
- return op_ret;
-}
-
-
-ssize_t
-glusterfs_write (glusterfs_file_t fd,
- const void *buf,
- size_t n)
-{
- int32_t op_ret = -1;
- off_t offset = 0;
- struct iovec vector;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (n < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (n == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- vector.iov_base = (void *)buf;
- vector.iov_len = n;
-
- op_ret = libgf_client_writev (ctx,
- (fd_t *)fd,
- &vector,
- 1,
- offset);
-
- if (op_ret >= 0) {
- offset += op_ret;
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-ssize_t
-glusterfs_writev (glusterfs_file_t fd,
- const struct iovec *vector,
- int count)
-{
- int32_t op_ret = -1;
- off_t offset = 0;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (count < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-
- op_ret = libgf_client_writev (ctx,
- (fd_t *)fd,
- (struct iovec *)vector,
- count,
- offset);
-
- if (op_ret >= 0) {
- offset += op_ret;
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-
-ssize_t
-glusterfs_pwrite (glusterfs_file_t fd,
- const void *buf,
- size_t count,
- off_t offset)
-{
- int32_t op_ret = -1;
- struct iovec vector;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- if (count < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (count == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (!fd) {
- errno = EINVAL;
- goto out;
- }
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- vector.iov_base = (void *)buf;
- vector.iov_len = count;
-
- op_ret = libgf_client_writev (ctx,
- (fd_t *)fd,
- &vector,
- 1,
- offset);
-
-out:
- return op_ret;
-}
-
-
-int32_t
-libgf_client_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries)
-{
- libgf_client_local_t *local = frame->local;
-
- /* Note, we dont let entries reach the stub because there it gets copied
- * while we can simply delink the entries here and link them into our
- * dcache, thereby avoiding the need to perform more allocations and
- * copies.
- */
- local->reply_stub = fop_readdirp_cbk_stub (frame, NULL, op_ret,
- op_errno, NULL);
- if (op_ret > 0)
- libgf_dcache_update (frame->root->state, local->fd, entries);
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_readdir (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- struct dirent *dirp, off_t *offset)
-{
- call_stub_t *stub = NULL;
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
-
- if (libgf_dcache_readdir (ctx, fd, dirp, offset))
- return 1;
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fd = fd;
- LIBGF_CLIENT_FOP (ctx, stub, readdirp, local, fd,
- LIBGF_READDIR_BLOCK, *offset);
-
- errno = stub->args.readdir_cbk.op_errno;
-
- op_ret = libgf_dcache_readdir (ctx, fd, dirp, offset);
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_readdir_r (glusterfs_dir_t dirfd, struct dirent *entry,
- struct dirent **result)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- off_t offset = 0;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- struct dirent *dirp = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, entry, out);
-
- fd_ctx = libgf_get_fd_ctx (dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "fd context not present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- dirp = &fd_ctx->dirp;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "offset %"PRIu64, offset);
- memset (dirp, 0, sizeof (struct dirent));
- op_ret = libgf_client_readdir (ctx, (fd_t *)dirfd, dirp,
- &offset);
- if (op_ret <= 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "readdir failed:"
- " %s", strerror (errno));
- if (result && (op_ret == 0)) {
- *result = NULL;
- } else if (op_ret < 0){
- op_ret = errno;
- }
- goto unlock;
- }
-
- fd_ctx->offset = offset;
-
- if (result) {
- *result = memcpy (entry, dirp, sizeof (*entry));
- } else {
- memcpy (entry, dirp, sizeof (*entry));
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "new offset %"PRIu64", "
- " entry %s", offset, entry->d_name);
- op_ret = 0;
- }
-unlock:
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return op_ret;
-}
-
-
-void *
-glusterfs_readdir (glusterfs_dir_t dirfd)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- off_t offset = 0;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- struct dirent *dirp = NULL;
-
- fd_ctx = libgf_get_fd_ctx (dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "fd context not present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- dirp = &fd_ctx->dirp;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "offset %"PRIu64, offset);
- memset (dirp, 0, sizeof (struct dirent));
- op_ret = libgf_client_readdir (ctx, (fd_t *)dirfd, dirp, &offset);
-
- if (op_ret <= 0) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "readdir failed: %s",
- strerror (errno));
- dirp = NULL;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "new offset %"PRIu64", entry %s",
- offset, dirp->d_name);
-out:
- return dirp;
-}
-
-
-int
-glusterfs_getdents (glusterfs_file_t fd, struct dirent *dirp,
- unsigned int count)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = NULL;
- off_t offset = 0;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- ctx = fd_ctx->ctx;
- offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- op_ret = libgf_client_readdir (ctx, (fd_t *)fd, dirp, &offset);
-
- if (op_ret > 0) {
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
-out:
- return op_ret;
-}
-
-
-static int32_t
-libglusterfs_readv_async_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref)
-{
- glusterfs_iobuf_t *buf;
- libglusterfs_client_async_local_t *local = frame->local;
- fd_t *__fd = local->fop.readv_cbk.fd;
- glusterfs_readv_cbk_t readv_cbk = local->fop.readv_cbk.cbk;
-
- buf = CALLOC (1, sizeof (*buf));
- ERR_ABORT (buf);
-
- if (vector) {
- buf->vector = iov_dup (vector, count);
- }
-
- buf->count = count;
-
- if (iobref) {
- buf->iobref = iobref_ref (iobref);
- }
-
- if (op_ret > 0) {
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_get_fd_ctx (__fd);
-
- /* update offset only if we have used offset stored in fd_ctx */
- if (local->fop.readv_cbk.update_offset) {
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset += op_ret;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
- }
-
- readv_cbk (op_ret, op_errno, buf, local->cbk_data);
-
- FREE (local);
- frame->local = NULL;
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-void
-glusterfs_free (glusterfs_iobuf_t *buf)
-{
- //iov_free (buf->vector, buf->count);
- FREE (buf->vector);
- if (buf->iobref)
- iobref_unref ((struct iobref *) buf->iobref);
- if (buf->dictref)
- dict_unref ((dict_t *) buf->dictref);
- FREE (buf);
-}
-
-int
-glusterfs_read_async (glusterfs_file_t fd,
- size_t nbytes,
- off_t offset,
- glusterfs_readv_cbk_t readv_cbk,
- void *cbk_data)
-{
- libglusterfs_client_ctx_t *ctx;
- fd_t *__fd = (fd_t *)fd;
- libglusterfs_client_async_local_t *local = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int32_t op_ret = 0;
-
- if (nbytes < 0) {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- if (nbytes == 0) {
- op_ret = 0;
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fop.readv_cbk.fd = __fd;
- local->fop.readv_cbk.cbk = readv_cbk;
- local->cbk_data = cbk_data;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- if (offset < 0) {
- pthread_mutex_lock (&fd_ctx->lock);
- {
- offset = fd_ctx->offset;
- local->fop.readv_cbk.update_offset = 1;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
- LIBGF_CLIENT_FOP_ASYNC (ctx,
- local,
- libglusterfs_readv_async_cbk,
- readv,
- __fd,
- nbytes,
- offset);
-
-out:
- return op_ret;
-}
-
-static int32_t
-libglusterfs_writev_async_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- libglusterfs_client_async_local_t *local = frame->local;
- fd_t *fd = NULL;
- glusterfs_write_cbk_t write_cbk;
-
- write_cbk = local->fop.write_cbk.cbk;
- fd = local->fop.write_cbk.fd;
-
- if (op_ret > 0) {
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- fd_ctx = libgf_get_fd_ctx (fd);
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset += op_ret;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
- write_cbk (op_ret, op_errno, local->cbk_data);
-
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-int32_t
-glusterfs_write_async (glusterfs_file_t fd,
- const void *buf,
- size_t nbytes,
- off_t offset,
- glusterfs_write_cbk_t write_cbk,
- void *cbk_data)
-{
- fd_t *__fd = (fd_t *)fd;
- struct iovec vector;
- off_t __offset = offset;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_async_local_t *local = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int32_t op_ret = 0;
- struct iobref *iobref = NULL;
-
- if (nbytes == 0) {
- op_ret = 0;
- goto out;
- }
-
- if (nbytes < 0) {
- op_ret = -1;
- errno = EINVAL;
- goto out;
- }
-
- local = CALLOC (1, sizeof (*local));
- ERR_ABORT (local);
- local->fop.write_cbk.fd = __fd;
- local->fop.write_cbk.cbk = write_cbk;
- local->cbk_data = cbk_data;
-
- vector.iov_base = (void *)buf;
- vector.iov_len = nbytes;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- if (offset < 0) {
- pthread_mutex_lock (&fd_ctx->lock);
- {
- __offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
- }
-
- iobref = iobref_new ();
- LIBGF_CLIENT_FOP_ASYNC (ctx,
- local,
- libglusterfs_writev_async_cbk,
- writev,
- __fd,
- &vector,
- 1,
- __offset,
- iobref);
- iobref_unref (iobref);
-
-out:
- return op_ret;
-}
-
-off_t
-glusterfs_lseek (glusterfs_file_t fd, off_t offset, int whence)
-{
- off_t __offset = 0;
- int32_t op_ret = -1;
- fd_t *__fd = (fd_t *)fd;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- libglusterfs_client_ctx_t *ctx = NULL;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- __offset = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- switch (whence)
- {
- case SEEK_SET:
- __offset = offset;
- break;
-
- case SEEK_CUR:
- pthread_mutex_lock (&fd_ctx->lock);
- {
- __offset = fd_ctx->offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- __offset += offset;
- break;
-
- case SEEK_END:
- {
- char cache_valid = 0;
- off_t end = 0;
- loc_t loc = {0, };
- struct iatt stbuf = {0, };
-
- cache_valid = libgf_is_iattr_cache_valid (ctx, __fd->inode,
- &stbuf,
- LIBGF_VALIDATE_STAT);
- if (cache_valid) {
- end = stbuf.ia_size;
- } else {
- op_ret = libgf_client_loc_fill (&loc, ctx,
- __fd->inode->ino, 0,
- NULL);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- libgf_client_loc_wipe (&loc);
- __offset = -1;
- goto out;
- }
-
- op_ret = libgf_client_lookup (ctx, &loc, &stbuf, NULL,
- NULL);
- if (op_ret < 0) {
- __offset = -1;
- libgf_client_loc_wipe (&loc);
- goto out;
- }
-
- end = stbuf.ia_size;
- }
-
- __offset = end + offset;
- libgf_client_loc_wipe (&loc);
- }
- break;
-
- default:
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "invalid value for whence");
- __offset = -1;
- errno = EINVAL;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = __offset;
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return __offset;
-}
-
-
-int32_t
-libgf_client_stat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_stat_cbk_stub (frame,
- NULL,
- op_ret,
- op_errno,
- buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_stat (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- struct iatt *stbuf)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
- struct iatt cachedbuf = {0, };
-
- if (libgf_is_iattr_cache_valid (ctx, loc->inode, &cachedbuf,
- LIBGF_VALIDATE_STAT)) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Cache will be used");
- if (stbuf)
- memcpy (stbuf, &cachedbuf, sizeof (struct stat));
- goto out;
- }
-
- LIBGF_CLIENT_FOP (ctx, stub, stat, local, loc);
-
- op_ret = stub->args.stat_cbk.op_ret;
- errno = stub->args.stat_cbk.op_errno;
- libgf_transform_iattr (ctx, loc->inode, &stub->args.stat_cbk.buf);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
-
- if (op_ret == 0) {
- if (stbuf)
- *stbuf = stub->args.stat_cbk.buf;
-
- libgf_update_iattr_cache (loc->inode, LIBGF_UPDATE_STAT,
- &stub->args.stat_cbk.buf);
- }
-
- call_stub_destroy (stub);
-
-out:
- return op_ret;
-}
-
-int
-libgf_realpath_loc_fill (libglusterfs_client_ctx_t *ctx, char *link,
- loc_t *targetloc)
-{
- int op_ret = -1;
- char *target = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, link, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, targetloc, out);
-
- targetloc->path = glusterfs_glh_realpath (ctx, link, NULL);
-
- if (targetloc->path == NULL)
- goto out;
-
- op_ret = libgf_client_path_lookup (targetloc, ctx, 1);
- if (op_ret == -1)
- goto out;
-
- target = strdup (targetloc->path);
- op_ret = libgf_client_loc_fill (targetloc, ctx, 0,
- targetloc->parent->ino,
- basename (target));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
-out:
- if (target)
- FREE (target);
-
- return op_ret;
-}
-
-#define LIBGF_DO_LSTAT 0x01
-#define LIBGF_DO_STAT 0x02
-
-int
-__glusterfs_stat (glusterfs_handle_t handle, const char *path,
- struct stat *buf, int whichstat)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL, *pathname = NULL;
- loc_t targetloc = {0, };
- loc_t *real_loc = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, op: %d", path,
- whichstat);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
- real_loc = &loc;
- /* The stat fop in glusterfs calls lstat. So we have to
- * provide the POSIX compatible stat fop. To do so, we need to ensure
- * that if the @path is a symlink, we must perform a stat on the
- * target of that symlink than the symlink itself(..because if
- * do a stat on the symlink, we're actually doing what lstat
- * should do. See posix_stat
- */
- if (whichstat & LIBGF_DO_LSTAT)
- goto lstat_fop;
-
- if (!IA_ISLNK (loc.inode->ia_type))
- goto lstat_fop;
-
- op_ret = libgf_realpath_loc_fill (ctx, (char *)loc.path, &targetloc);
- if (op_ret == -1)
- goto out;
- real_loc = &targetloc;
-
-lstat_fop:
-
- if (!op_ret) {
- struct iatt iatt;
- op_ret = libgf_client_stat (ctx, real_loc, &iatt);
- iatt_to_stat (&iatt, buf);
- }
-
-out:
- if (pathname) {
- FREE (pathname);
- }
-
- libgf_client_loc_wipe (&loc);
- libgf_client_loc_wipe (&targetloc);
-
- return op_ret;
-}
-
-int
-glusterfs_glh_stat (glusterfs_handle_t handle, const char *path,
- struct stat *buf)
-{
- return __glusterfs_stat (handle, path, buf, LIBGF_DO_STAT);
-}
-
-int
-glusterfs_stat (const char *path, struct stat *buf)
-{
- glusterfs_handle_t h = NULL;
- int op_ret = -1;
- char vpath[PATH_MAX];
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_stat (h, vpath, buf);
-
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_lstat (glusterfs_handle_t handle, const char *path, struct stat *buf)
-{
- return __glusterfs_stat (handle, path, buf, LIBGF_DO_LSTAT);
-}
-
-int
-glusterfs_lstat (const char *path, struct stat *buf)
-{
- glusterfs_handle_t h = NULL;
- int op_ret = -1;
- char vpath[PATH_MAX];
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_lstat (h, vpath, buf);
-out:
- return op_ret;
-}
-
-static int32_t
-libgf_client_fstat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fstat_cbk_stub (frame,
- NULL,
- op_ret,
- op_errno,
- buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-
-}
-
-int32_t
-libgf_client_fstat (libglusterfs_client_ctx_t *ctx,
- fd_t *fd,
- struct stat *buf)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
- struct iatt cachedbuf = {0, };
-
- if (libgf_is_iattr_cache_valid (ctx, fd->inode, &cachedbuf,
- LIBGF_VALIDATE_STAT)) {
- if (buf)
- memcpy (buf, &cachedbuf, sizeof (struct stat));
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Cache will be used");
- goto out;
- }
-
- LIBGF_CLIENT_FOP (ctx, stub, fstat, local, fd);
-
- op_ret = stub->args.fstat_cbk.op_ret;
- errno = stub->args.fstat_cbk.op_errno;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "status %d, errno %d", op_ret,
- errno);
-
- if (op_ret == 0) {
- libgf_transform_iattr (ctx, fd->inode,
- &stub->args.fstat_cbk.buf);
- if (buf)
- iatt_to_stat (&stub->args.fstat_cbk.buf, buf);
- libgf_update_iattr_cache (fd->inode, LIBGF_UPDATE_STAT,
- &stub->args.fstat_cbk.buf);
- }
- call_stub_destroy (stub);
-
-out:
- return op_ret;
-}
-
-int32_t
-glusterfs_fstat (glusterfs_file_t fd, struct stat *buf)
-{
- libglusterfs_client_ctx_t *ctx;
- fd_t *__fd = (fd_t *)fd;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- int32_t op_ret = -1;
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- op_ret = libgf_client_fstat (ctx, __fd, buf);
-
-out:
- return op_ret;
-}
-
-
-static int32_t
-libgf_client_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_mkdir_cbk_stub (frame, NULL, op_ret, op_errno,
- inode, buf, preparent,
- postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-static int32_t
-libgf_client_mkdir (libglusterfs_client_ctx_t *ctx,
- loc_t *loc,
- mode_t mode)
-{
- int32_t op_ret = -1;
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- inode_t *libgf_inode = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, mkdir, local, loc, mode);
- op_ret = stub->args.mkdir_cbk.op_ret;
- errno = stub->args.mkdir_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- libgf_inode = stub->args.mkdir_cbk.inode;
- inode_link (libgf_inode, loc->parent, loc->name,
- &stub->args.mkdir_cbk.buf);
- libgf_transform_iattr (ctx, libgf_inode, &stub->args.mkdir_cbk.buf);
-
- inode_lookup (libgf_inode);
-
- libgf_alloc_inode_ctx (ctx, libgf_inode);
- libgf_update_iattr_cache (libgf_inode, LIBGF_UPDATE_ALL,
- &stub->args.mkdir_cbk.buf);
-
-out:
- call_stub_destroy (stub);
-
- return op_ret;
-}
-
-
-int32_t
-glusterfs_glh_mkdir (glusterfs_handle_t handle, const char *path, mode_t mode)
-{
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *pathname = NULL, *name = NULL;
- int32_t op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == 0) {
- op_ret = -1;
- errno = EEXIST;
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- loc.inode = inode_new (ctx->itable);
- op_ret = libgf_client_mkdir (ctx, &loc, mode);
- if (op_ret == -1) {
- goto out;
- }
-
-out:
- libgf_client_loc_wipe (&loc);
- if (pathname) {
- free (pathname);
- pathname = NULL;
- }
-
- return op_ret;
-}
-
-int32_t
-glusterfs_mkdir (const char *path, mode_t mode)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_mkdir (h, vpath, mode);
-out:
- return op_ret;
-}
-
-static int32_t
-libgf_client_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,struct iatt *preparent,
- struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_rmdir_cbk_stub (frame, NULL, op_ret, op_errno,
- preparent, postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-static int32_t
-libgf_client_rmdir (libglusterfs_client_ctx_t *ctx, loc_t *loc)
-{
- int32_t op_ret = -1;
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, rmdir, local, loc);
-
- op_ret = stub->args.rmdir_cbk.op_ret;
- errno = stub->args.rmdir_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- if (stub->args.rmdir_cbk.op_ret != 0)
- goto out;
-
- inode_unlink (loc->inode, loc->parent, loc->name);
-
-out:
- call_stub_destroy (stub);
-
- return op_ret;
-}
-
-int32_t
-glusterfs_glh_rmdir (glusterfs_handle_t handle, const char *path)
-{
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *pathname = NULL, *name = NULL;
- int32_t op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_rmdir (ctx, &loc);
- if (op_ret == -1) {
- goto out;
- }
-
-out:
- libgf_client_loc_wipe (&loc);
-
- if (pathname) {
- free (pathname);
- pathname = NULL;
- }
-
- return op_ret;
-}
-
-int32_t
-glusterfs_rmdir (const char *path)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_rmdir (h, vpath);
-out:
- return op_ret;
-}
-
-int
-libgf_client_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_setattr_cbk_stub (frame, NULL,
- op_ret, op_errno,
- preop, postop);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_setattr (libglusterfs_client_ctx_t *ctx, loc_t * loc,
- struct iatt *stbuf, int32_t valid)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, setattr, local, loc,
- stbuf, valid);
-
- op_ret = stub->args.setattr_cbk.op_ret;
- errno = stub->args.setattr_cbk.op_errno;
-
- if (op_ret == -1)
- goto out;
-
- libgf_transform_iattr (ctx, loc->inode,
- &stub->args.setattr_cbk.statpost);
- libgf_update_iattr_cache (loc->inode, LIBGF_UPDATE_STAT,
- &stub->args.setattr_cbk.statpost);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_glh_chmod (glusterfs_handle_t handle, const char *path, mode_t mode)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *name = NULL;
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- stbuf.ia_prot = ia_prot_from_st_mode (mode);
- valid |= GF_SET_ATTR_MODE;
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1)
- goto out;
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_setattr (ctx, &loc, &stbuf, valid);
-
-out:
- if (name)
- FREE (name);
-
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_chmod (const char *path, mode_t mode)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_chmod (h, vpath, mode);
-out:
- return op_ret;
-}
-
-
-#define LIBGF_DO_CHOWN 1
-#define LIBGF_DO_LCHOWN 2
-
-int
-__glusterfs_chown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group, int whichop)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *name = NULL;
- loc_t *oploc = NULL;
- loc_t targetloc = {0, };
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, op %d", path, whichop);
- stbuf.ia_uid = owner;
- stbuf.ia_gid = group;
- valid |= (GF_SET_ATTR_UID | GF_SET_ATTR_GID);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1)
- goto out;
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename ((char *)name));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
- oploc = &loc;
- if (whichop == LIBGF_DO_LCHOWN)
- goto do_lchown;
-
- if (!IA_ISLNK (loc.inode->ia_type))
- goto do_lchown;
-
- op_ret = libgf_realpath_loc_fill (ctx, (char *)loc.path, &targetloc);
- if (op_ret == -1)
- goto out;
-
- oploc = &targetloc;
-do_lchown:
- op_ret = libgf_client_setattr (ctx, oploc, &stbuf, valid);
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- libgf_client_loc_wipe (&targetloc);
- return op_ret;
-}
-
-int
-glusterfs_glh_chown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group)
-{
- return __glusterfs_chown (handle, path, owner, group, LIBGF_DO_CHOWN);
-}
-
-int
-glusterfs_chown (const char *path, uid_t owner, gid_t group)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_chown (h, vpath, owner, group);
-
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_lchown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group)
-{
- return __glusterfs_chown (handle, path, owner, group, LIBGF_DO_LCHOWN);
-}
-
-int
-glusterfs_lchown (const char *path, uid_t owner, gid_t group)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_lchown (h, vpath, owner, group);
-out:
- return op_ret;
-}
-
-glusterfs_dir_t
-glusterfs_glh_opendir (glusterfs_handle_t handle, const char *path)
-{
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- fd_t *dirfd = NULL;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
-
- if (op_ret == -1)
- goto out;
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
- if (!IA_ISDIR (loc.inode->ia_type) && !IA_ISLNK (loc.inode->ia_type)) {
- errno = ENOTDIR;
- op_ret = -1;
- goto out;
- }
-
- dirfd = fd_create (loc.inode, ctx->pid);
- op_ret = libgf_client_opendir (ctx, &loc, dirfd);
-
- if (op_ret == -1) {
- fd_unref (dirfd);
- dirfd = NULL;
- goto out;
- }
-
- if (!libgf_get_fd_ctx (dirfd)) {
- if (!(libgf_alloc_fd_ctx (ctx, dirfd, (char *)path))) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Context "
- "allocation failed");
- op_ret = -1;
- errno = EINVAL;
- goto out;
- }
- }
-
-out:
- if (name)
- FREE (name);
-
- if (op_ret == -1) {
- fd_unref (dirfd);
- dirfd = NULL;
- }
-
- libgf_client_loc_wipe (&loc);
- return dirfd;
-}
-
-glusterfs_dir_t
-glusterfs_opendir (const char *path)
-{
- char vpath[PATH_MAX];
- glusterfs_dir_t dir = NULL;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- dir = glusterfs_glh_opendir (h, vpath);
-out:
- return dir;
-}
-
-int
-glusterfs_closedir (glusterfs_dir_t dirfd)
-{
- int op_ret = -1;
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, dirfd, out);
- fdctx = libgf_get_fd_ctx (dirfd);
-
- if (fdctx == NULL) {
- errno = EBADF;
- op_ret = -1;
- goto out;
- }
-
- op_ret = libgf_client_flush (fdctx->ctx, (fd_t *)dirfd);
- fd_unref ((fd_t *)dirfd);
-
-out:
- return op_ret;
-}
-
-
-int
-libgf_client_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fsetattr_cbk_stub (frame, NULL,
- op_ret, op_errno,
- preop, postop);
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-
-int
-libgf_client_fsetattr (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, fsetattr, local, fd, stbuf, valid);
-
- op_ret = stub->args.fsetattr_cbk.op_ret;
- errno = stub->args.fsetattr_cbk.op_errno;
-
- if (op_ret == -1)
- goto out;
-
- libgf_transform_iattr (ctx, fd->inode,
- &stub->args.fsetattr_cbk.statpost);
- libgf_update_iattr_cache (fd->inode, LIBGF_UPDATE_STAT,
- &stub->args.fsetattr_cbk.statpost);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_fchmod (glusterfs_file_t fd, mode_t mode)
-{
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- int op_ret = -1;
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
- fdctx = libgf_get_fd_ctx (fd);
-
- if (!fdctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- stbuf.ia_prot = ia_prot_from_st_mode (mode);
- valid |= GF_SET_ATTR_MODE;
-
- op_ret = libgf_client_fsetattr (fdctx->ctx, fd, &stbuf, valid);
-out:
- return op_ret;
-}
-
-
-int
-glusterfs_fchown (glusterfs_file_t fd, uid_t uid, gid_t gid)
-{
- int op_ret = -1;
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- fdctx = libgf_get_fd_ctx (fd);
- if (!fd) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
- stbuf.ia_uid = uid;
- stbuf.ia_gid = gid;
-
- valid |= (GF_SET_ATTR_UID | GF_SET_ATTR_GID);
-
- op_ret = libgf_client_fsetattr (fdctx->ctx, fd, &stbuf, valid);
-
-out:
- return op_ret;
-}
-
-int
-libgf_client_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *xlator,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_fsync_cbk_stub (frame, NULL, op_ret, op_errno,
- prebuf, postbuf);
-
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-int
-libgf_client_fsync (libglusterfs_client_ctx_t *ctx, fd_t *fd)
-{
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
- int op_ret = -1;
-
- LIBGF_CLIENT_FOP (ctx, stub, fsync, local, fd, 0);
-
- op_ret = stub->args.fsync_cbk.op_ret;
- errno = stub->args.fsync_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "status %d, errno %d", op_ret,
- errno);
- call_stub_destroy (stub);
-
- return op_ret;
-}
-
-int
-glusterfs_fsync (glusterfs_file_t *fd)
-{
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- fdctx = libgf_get_fd_ctx ((fd_t *)fd);
- if (!fdctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- op_ret = libgf_client_fsync (fdctx->ctx, (fd_t *)fd);
-
-out:
- return op_ret;
-}
-
-int
-libgf_client_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *xlator
- ,int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_ftruncate_cbk_stub (frame, NULL, op_ret,
- op_errno, prebuf, postbuf);
-
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-int
-libgf_client_ftruncate (libglusterfs_client_ctx_t *ctx, fd_t *fd,
- off_t length)
-{
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
- int op_ret = -1;
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
-
- if (!(((fd->flags & O_ACCMODE) == O_RDWR)
- || ((fd->flags & O_ACCMODE) == O_WRONLY))) {
- errno = EBADF;
- goto out;
- }
-
- LIBGF_CLIENT_FOP (ctx, stub, ftruncate, local, fd, length);
-
- op_ret = stub->args.ftruncate_cbk.op_ret;
- errno = stub->args.ftruncate_cbk.op_errno;
-
- if (op_ret == -1)
- goto out;
-
- libgf_transform_iattr (ctx, fd->inode,
- &stub->args.ftruncate_cbk.postbuf);
- libgf_update_iattr_cache (fd->inode, LIBGF_UPDATE_STAT,
- &stub->args.ftruncate_cbk.postbuf);
-
- fdctx = libgf_get_fd_ctx (fd);
- if (!fd) {
- errno = EINVAL;
- op_ret = -1;
- goto out;
- }
-
- pthread_mutex_lock (&fdctx->lock);
- {
- fdctx->offset = stub->args.ftruncate_cbk.postbuf.ia_size;
- }
- pthread_mutex_unlock (&fdctx->lock);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_ftruncate (glusterfs_file_t fd, off_t length)
-{
- libglusterfs_client_fd_ctx_t *fdctx = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- fdctx = libgf_get_fd_ctx (fd);
- if (!fdctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- op_ret = libgf_client_ftruncate (fdctx->ctx, fd, length);
-
-out:
- return op_ret;
-}
-
-int
-libgf_client_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_link_cbk_stub (frame, NULL, op_ret, op_errno,
- inode, buf, preparent,
- postparent);
-
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-int
-libgf_client_link (libglusterfs_client_ctx_t *ctx, loc_t *old, loc_t *new)
-{
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- int op_ret = -1;
- inode_t *inode = NULL;
- struct iatt *sbuf = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, link, local, old, new);
-
- op_ret = stub->args.link_cbk.op_ret;
- errno = stub->args.link_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s, status %d,"
- " errno %d", old->path, new->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- inode = stub->args.link_cbk.inode;
- sbuf = &stub->args.link_cbk.buf;
- inode_link (inode, new->parent, basename ((char *)new->path), sbuf);
- libgf_transform_iattr (ctx, inode, sbuf);
- inode_lookup (inode);
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_STAT, sbuf);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_link (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath)
-{
- libglusterfs_client_ctx_t *ctx = handle;
- int op_ret = -1;
- loc_t old = {0,};
- loc_t new = {0,};
- char *oldname = NULL;
- char *newname = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s", oldpath,
- newpath);
-
- old.path = strdup (oldpath);
- if (!old.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&old, ctx, 1);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- oldname = strdup (old.path);
- op_ret = libgf_client_loc_fill (&old, ctx, 0, old.parent->ino,
- basename (oldname));
- if (op_ret == -1) {
- errno = EINVAL;
- goto out;
- }
-
- if (IA_ISDIR (old.inode->ia_type)) {
- errno = EPERM;
- op_ret = -1;
- goto out;
- }
-
- new.path = strdup (newpath);
- if (!new.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&new, ctx, 1);
- if (op_ret == 0) {
- errno = EEXIST;
- op_ret = -1;
- goto out;
- }
-
- newname = strdup (new.path);
- new.inode = inode_ref (old.inode);
- libgf_client_loc_fill (&new, ctx, 0, new.parent->ino,
- basename (newname));
- op_ret = libgf_client_link (ctx, &old, &new);
-
-out:
- if (oldname)
- FREE (oldname);
- if (newname)
- FREE (newname);
- libgf_client_loc_wipe (&old);
- libgf_client_loc_wipe (&new);
-
- return op_ret;
-}
-
-int
-glusterfs_link (const char *oldpath, const char *newpath)
-{
- int op_ret = -1;
- char oldvpath[PATH_MAX];
- char newvpath[PATH_MAX];
- glusterfs_handle_t oldh = NULL;
- glusterfs_handle_t newh = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s", oldpath,
- newpath);
-
- oldh = libgf_resolved_path_handle (oldpath, oldvpath);
- if (!oldh) {
- errno = ENODEV;
- goto out;
- }
-
- newh = libgf_resolved_path_handle (newpath, newvpath);
- if (!newh) {
- errno = ENODEV;
- goto out;
- }
-
- /* Cannot hard link across glusterfs mounts. */
- if (newh != oldh) {
- errno = EXDEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_link (newh, oldvpath, newvpath);
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_statfs_cbk_stub (frame, NULL, op_ret, op_errno,
- buf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_statvfs (libglusterfs_client_ctx_t *ctx, loc_t *loc,
- struct statvfs *buf)
-{
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- int32_t op_ret = -1;
-
- /* statfs fop receives struct statvfs as an argument */
-
- /* libgf_client_statfs_cbk will be the callback, not
- libgf_client_statvfs_cbk. see definition of LIBGF_CLIENT_FOP
- */
- LIBGF_CLIENT_FOP (ctx, stub, statfs, local, loc);
-
- op_ret = stub->args.statfs_cbk.op_ret;
- errno = stub->args.statfs_cbk.op_errno;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- if (buf)
- memcpy (buf, &stub->args.statfs_cbk.buf, sizeof (*buf));
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_statfs (glusterfs_handle_t handle, const char *path,
- struct statfs *buf)
-{
- struct statvfs stvfs = {0, };
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- "returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_statvfs (ctx, &loc, &stvfs);
- if (op_ret == 0) {
-#ifdef GF_SOLARIS_HOST_OS
- buf->f_fstyp = 0;
- buf->f_bsize = stvfs.f_bsize;
- buf->f_blocks = stvfs.f_blocks;
- buf->f_bfree = stvfs.f_bfree;
- buf->f_files = stvfs.f_bavail;
- buf->f_ffree = stvfs.f_ffree;
-#else
- buf->f_type = 0;
- buf->f_bsize = stvfs.f_bsize;
- buf->f_blocks = stvfs.f_blocks;
- buf->f_bfree = stvfs.f_bfree;
- buf->f_bavail = stvfs.f_bavail;
- buf->f_files = stvfs.f_bavail;
- buf->f_ffree = stvfs.f_ffree;
- /* FIXME: buf->f_fsid has either "val" or "__val" as member
- based on conditional macro expansion. see definition of
- fsid_t - Raghu
- It seems have different structure member names on
- different archs, so I am stepping down to doing a struct
- to struct copy. :Shehjar
- */
- memcpy (&buf->f_fsid, &stvfs.f_fsid, sizeof (stvfs.f_fsid));
- buf->f_namelen = stvfs.f_namemax;
-#endif
- }
-
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_statfs (const char *path, struct statfs *buf)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_statfs (h, vpath, buf);
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_statvfs (glusterfs_handle_t handle, const char *path,
- struct statvfs *buf)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning"
- " EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_statvfs (ctx, &loc, buf);
- if (op_ret != -1)
- /* Should've been a call to libgf_transform_iattr but
- * that only handles struct stat
- */
- buf->f_fsid = (unsigned long)ctx->fake_fsid;
-
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_statvfs (const char *path, struct statvfs *buf)
-{
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
- int op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_statvfs (h, vpath, buf);
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_rename_cbk_stub (frame, NULL, op_ret, op_errno,
- buf, preoldparent,
- postoldparent, prenewparent,
- postnewparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_rename (libglusterfs_client_ctx_t *ctx, loc_t *oldloc,
- loc_t *newloc)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, rename, local, oldloc, newloc);
-
- op_ret = stub->args.rename_cbk.op_ret;
- errno = stub->args.rename_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s, status %d, errno"
- " %d", oldloc->path, newloc->path, op_ret, errno);
- if (op_ret == -1)
- goto out;
-
- if (!libgf_get_inode_ctx (newloc->inode))
- libgf_alloc_inode_ctx (ctx, newloc->inode);
-
- libgf_transform_iattr (ctx, newloc->inode, &stub->args.rename_cbk.buf);
- libgf_update_iattr_cache (newloc->inode, LIBGF_UPDATE_STAT,
- &stub->args.rename_cbk.buf);
-
- inode_unlink (oldloc->inode, oldloc->parent, oldloc->name);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_rename (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath)
-{
- int32_t op_ret = -1;
- loc_t oldloc = {0, }, newloc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *newname = NULL;
- char *oldname = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "old %s, new %s", oldpath,
- newpath);
-
- oldloc.path = strdup (oldpath);
- if (!oldloc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&oldloc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", oldloc.path);
- goto out;
- }
-
- newloc.path = strdup (newpath);
- if (!newloc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&newloc, ctx, 1);
-
- oldname = strdup (oldloc.path);
- op_ret = libgf_client_loc_fill (&oldloc, ctx, 0, oldloc.parent->ino,
- basename (oldname));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1,"
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- newname = strdup (newloc.path);
- op_ret = libgf_client_loc_fill (&newloc, ctx, 0, newloc.parent->ino,
- basename (newname));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1,"
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
- op_ret = libgf_client_rename (ctx, &oldloc, &newloc);
-
-out:
- if (oldname)
- FREE (oldname);
- if (newname)
- FREE (newname);
- libgf_client_loc_wipe (&newloc);
- libgf_client_loc_wipe (&oldloc);
-
- return op_ret;
-}
-
-
-int
-glusterfs_rename (const char *oldpath, const char *newpath)
-{
- int op_ret = -1;
- char oldvpath[PATH_MAX];
- char newvpath[PATH_MAX];
- glusterfs_handle_t oldh = NULL;
- glusterfs_handle_t newh = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Old %s, new %s", oldpath,
- newpath);
-
- oldh = libgf_resolved_path_handle (oldpath, oldvpath);
- if (!oldh) {
- errno = ENODEV;
- goto out;
- }
-
- newh = libgf_resolved_path_handle (newpath, newvpath);
- if (!newh) {
- errno = ENODEV;
- goto out;
- }
-
- if (oldh != newh) {
- errno = EXDEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_rename (oldh, oldvpath, newvpath);
-out:
- return op_ret;
-}
-
-
-int
-glusterfs_glh_utimes (glusterfs_handle_t handle, const char *path,
- const struct timeval times[2])
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- stbuf.ia_atime = times[0].tv_sec;
- stbuf.ia_atime_nsec = times[0].tv_usec * 1000;
- stbuf.ia_mtime = times[1].tv_sec;
- stbuf.ia_mtime_nsec = times[1].tv_usec * 1000;
-
- valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1"
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_setattr (ctx, &loc, &stbuf, valid);
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_utimes (const char *path, const struct timeval times[2])
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_utimes (h, vpath, times);
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_utime (glusterfs_handle_t handle, const char *path,
- const struct utimbuf *buf)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- if (buf) {
- stbuf.ia_atime = buf->actime;
- stbuf.ia_atime_nsec = 0;
-
- stbuf.ia_mtime = buf->modtime;
- stbuf.ia_mtime_nsec = 0;
- }
-
- valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1,"
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_setattr (ctx, &loc, &stbuf, valid);
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_utime (const char *path, const struct utimbuf *buf)
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_utime (h, vpath, buf);
-out:
- return op_ret;
-}
-
-static int32_t
-libgf_client_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_mknod_cbk_stub (frame, NULL, op_ret, op_errno,
- inode, buf, preparent,
- postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-static int32_t
-libgf_client_mknod (libglusterfs_client_ctx_t *ctx, loc_t *loc, mode_t mode,
- dev_t rdev)
-{
- int32_t op_ret = -1;
- call_stub_t *stub = NULL;
- libgf_client_local_t *local = NULL;
- inode_t *inode = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, mknod, local, loc, mode, rdev);
-
- op_ret = stub->args.mknod_cbk.op_ret;
- errno = stub->args.mknod_cbk.op_errno;
- if (op_ret == -1)
- goto out;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
- inode = stub->args.mknod_cbk.inode;
- inode_link (inode, loc->parent, loc->name, &stub->args.mknod_cbk.buf);
- libgf_transform_iattr (ctx, inode, &stub->args.mknod_cbk.buf);
- inode_lookup (inode);
-
- if (!libgf_alloc_inode_ctx (ctx, inode))
- libgf_alloc_inode_ctx (ctx, inode);
-
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_STAT,
- &stub->args.mknod_cbk.buf);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_mknod(glusterfs_handle_t handle, const char *path, mode_t mode,
- dev_t dev)
-{
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *name = NULL;
- int32_t op_ret = -1;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == 0) {
- op_ret = -1;
- errno = EEXIST;
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- loc.inode = inode_new (ctx->itable);
- op_ret = libgf_client_mknod (ctx, &loc, mode, dev);
-
-out:
- libgf_client_loc_wipe (&loc);
- if (name)
- FREE (name);
-
- return op_ret;
-}
-
-int
-glusterfs_mknod(const char *pathname, mode_t mode, dev_t dev)
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, pathname, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", pathname);
-
- h = libgf_resolved_path_handle (pathname, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_mkfifo (glusterfs_handle_t handle, const char *path, mode_t mode)
-{
-
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t loc = {0, };
- char *name = NULL;
- int32_t op_ret = -1;
- dev_t dev = 0;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- loc.path = libgf_resolve_path_light ((char *)path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Failed to resolve name");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == 0) {
- op_ret = -1;
- errno = EEXIST;
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 0);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- "returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- loc.inode = inode_new (ctx->itable);
- op_ret = libgf_client_mknod (ctx, &loc, mode | S_IFIFO, dev);
-
-out:
- libgf_client_loc_wipe (&loc);
- if (name)
- free (name);
-
- return op_ret;
-}
-
-int
-glusterfs_mkfifo (const char *path, mode_t mode)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_mkfifo (h, vpath, mode);
-out:
- return op_ret;
-}
-
-int32_t
-libgf_client_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_unlink_cbk_stub (frame, NULL, op_ret, op_errno,
- preparent, postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int
-libgf_client_unlink (libglusterfs_client_ctx_t *ctx, loc_t *loc)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, unlink, local, loc);
-
- op_ret = stub->args.unlink_cbk.op_ret;
- errno = stub->args.unlink_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", loc->path);
- if (op_ret == -1)
- goto out;
-
- inode_unlink (loc->inode, loc->parent, loc->name);
-
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_unlink (glusterfs_handle_t handle, const char *path)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- " returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_unlink (ctx, &loc);
-
-out:
- if (name)
- FREE (name);
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-int
-glusterfs_unlink (const char *path)
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_unlink (h, vpath);
-
-out:
- return op_ret;
-}
-
-static int32_t
-libgf_client_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_symlink_cbk_stub (frame, NULL, op_ret,
- op_errno, inode, buf,
- preparent, postparent);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_symlink (libglusterfs_client_ctx_t *ctx, const char *linkpath,
- loc_t *loc)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
- inode_t *inode = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, symlink, local, linkpath, loc);
-
- op_ret = stub->args.symlink_cbk.op_ret;
- errno = stub->args.symlink_cbk.op_errno;
- if (op_ret == -1)
- goto out;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "target: %s, link: %s, status %d"
- " errno %d", linkpath, loc->path, op_ret, errno);
- inode = stub->args.symlink_cbk.inode;
- inode_link (inode, loc->parent, loc->name,
- &stub->args.symlink_cbk.buf);
- libgf_transform_iattr (ctx, inode, &stub->args.symlink_cbk.buf);
- inode_lookup (inode);
- if (!libgf_get_inode_ctx (inode))
- libgf_alloc_inode_ctx (ctx, inode);
-
- libgf_update_iattr_cache (inode, LIBGF_UPDATE_STAT,
- &stub->args.symlink_cbk.buf);
-out:
- call_stub_destroy (stub);
- return op_ret;
-}
-
-int
-glusterfs_glh_symlink (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath)
-{
- int32_t op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- loc_t oldloc = {0, };
- loc_t newloc = {0, };
- char *oldname = NULL;
- char *newname = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "target: %s, link: %s", oldpath,
- newpath);
- /* Old path does not need to be interpreted or looked up */
- oldloc.path = strdup (oldpath);
-
- newloc.path = strdup (newpath);
- if (!newloc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&newloc, ctx, 1);
- if (op_ret == 0) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "new path (%s) already exists, "
- " returning EEXIST", newloc.path);
- op_ret = -1;
- errno = EEXIST;
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&newloc, ctx, 0);
- if (op_ret == -1) {
- errno = ENOENT;
- goto out;
- }
-
- newloc.inode = inode_new (ctx->itable);
- newname = strdup (newloc.path);
- op_ret = libgf_client_loc_fill (&newloc, ctx, 0, newloc.parent->ino,
- basename (newname));
-
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- "returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_symlink (ctx, oldpath, &newloc);
-
-out:
- if (newname)
- FREE (newname);
-
- if (oldname)
- FREE (oldname);
- libgf_client_loc_wipe (&oldloc);
- libgf_client_loc_wipe (&newloc);
- return op_ret;
-}
-
-int
-glusterfs_symlink (const char *oldpath, const char *newpath)
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, oldpath, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, newpath, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "target: %s, link: %s", oldpath,
- newpath);
-
- h = libgf_resolved_path_handle (newpath, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_symlink (h, oldpath, vpath);
-out:
- return op_ret;
-}
-
-
-int32_t
-libgf_client_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *path, struct iatt *sbuf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_readlink_cbk_stub (frame, NULL, op_ret,
- op_errno, path, sbuf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_readlink (libglusterfs_client_ctx_t *ctx, loc_t *loc, char *buf,
- size_t bufsize)
-{
- int op_ret = -1;
- libgf_client_local_t *local = NULL;
- call_stub_t *stub = NULL;
- size_t cpy_size = 0;
-
- LIBGF_CLIENT_FOP (ctx, stub, readlink, local, loc, bufsize);
-
- op_ret = stub->args.readlink_cbk.op_ret;
- errno = stub->args.readlink_cbk.op_errno;
-
- if (op_ret != -1) {
- cpy_size = ((op_ret <= bufsize) ? op_ret : bufsize);
- memcpy (buf, stub->args.readlink_cbk.buf, cpy_size);
- op_ret = cpy_size;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "link: %s, target: %s,"
- " status %d, errno %d", loc->path, buf, op_ret, errno);
- } else
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "link: %s, status %d, "
- "errno %d", loc->path, op_ret, errno);
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-ssize_t
-glusterfs_glh_readlink (glusterfs_handle_t handle, const char *path, char *buf,
- size_t bufsize)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
- if (bufsize < 0) {
- errno = EINVAL;
- goto out;
- }
-
- if (bufsize == 0) {
- op_ret = 0;
- goto out;
- }
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, "
- "returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_readlink (ctx, &loc, buf, bufsize);
-
-out:
- if (name)
- FREE (name);
-
- libgf_client_loc_wipe (&loc);
- return op_ret;
-}
-
-ssize_t
-glusterfs_readlink (const char *path, char *buf, size_t bufsize)
-{
- char vpath[PATH_MAX];
- int op_ret = -1;
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, buf, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_readlink (h, vpath, buf, bufsize);
-out:
- return op_ret;
-}
-
-char *
-glusterfs_glh_realpath (glusterfs_handle_t handle, const char *path,
- char *resolved_path)
-{
- char *buf = NULL;
- char *rpath = NULL;
- char *start = NULL, *end = NULL;
- char *dest = NULL;
- libglusterfs_client_ctx_t *ctx = handle;
- long int path_max = 0;
- char *ptr = NULL;
- struct stat stbuf = {0, };
- long int new_size = 0;
- char *new_rpath = NULL;
- int dest_offset = 0;
- char *rpath_limit = 0;
- int ret = 0, num_links = 0;
- char *vpath = NULL, *tmppath = NULL;
- char absolute_path[PATH_MAX];
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
-#ifdef PATH_MAX
- path_max = PATH_MAX;
-#else
- path_max = pathconf (path, _PC_PATH_MAX);
- if (path_max <= 0) {
- path_max = 1024;
- }
-#endif
-
- if (resolved_path == NULL) {
- rpath = CALLOC (1, path_max);
- if (rpath == NULL) {
- errno = ENOMEM;
- goto out;
- }
- } else {
- rpath = resolved_path;
- }
-
- rpath_limit = rpath + path_max;
-
- if (path[0] == '/') {
- rpath[0] = '/';
- dest = rpath + 1;
- } else {
- /*
- FIXME: can $CWD be a valid path on glusterfs server? hence is
- it better to handle this case or just return EINVAL for
- relative paths?
- */
- ptr = getcwd (rpath, path_max);
- if (ptr == NULL) {
- goto err;
- }
- dest = rpath + strlen (rpath);
- }
-
- for (start = end = (char *)path; *end; start = end) {
- if (dest[-1] != '/') {
- *dest++ = '/';
- }
-
- while (*start == '/') {
- start++;
- }
-
- for (end = start; *end && *end != '/'; end++);
-
- if ((end - start) == 0) {
- break;
- }
-
- if ((end - start == 1) && (start[0] == '.')) {
- /* do nothing */
- } else if (((end - start) == 2) && (start[0] == '.')
- && (start[1] == '.')) {
- if (dest > rpath + 1) {
- while (--dest[-1] != '/');
- }
- } else {
- if ((dest + (end - start + 1)) >= rpath_limit) {
- if (resolved_path == NULL) {
- errno = ENAMETOOLONG;
- if (dest > rpath + 1)
- dest--;
- *dest = '\0';
- goto err;
- }
-
- dest_offset = dest - rpath;
- new_size = rpath_limit - rpath;
- if ((end - start + 1) > path_max) {
- new_size = (end - start + 1);
- } else {
- new_size = path_max;
- }
-
- new_rpath = realloc (rpath, new_size);
- if (new_rpath == NULL) {
- goto err;
- }
-
-
- dest = new_rpath + dest_offset;
- rpath = new_rpath;
- rpath_limit = rpath + new_size;
- }
-
- memcpy (dest, start, end - start);
- dest += end - start;
- *dest = '\0';
-
- ret = glusterfs_glh_lstat (handle, rpath, &stbuf);
- if (ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "glusterfs_glh_stat returned -1 for"
- " path (%s):(%s)", rpath,
- strerror (errno));
- goto err;
- }
-
- if (S_ISLNK (stbuf.st_mode)) {
- buf = calloc (1, path_max);
- if (buf == NULL) {
- errno = ENOMEM;
- goto err;
- }
-
- if (++num_links > MAXSYMLINKS)
- {
- errno = ELOOP;
- FREE (buf);
- goto err;
- }
-
- ret = glusterfs_glh_readlink (handle, rpath,
- buf,
- path_max - 1);
- if (ret < 0) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "glusterfs_readlink returned %d"
- " for path (%s):(%s)",
- ret, rpath, strerror (errno));
- FREE (buf);
- goto err;
- }
- buf[ret] = '\0';
-
- if (buf[0] != '/') {
- tmppath = strdup (rpath);
- tmppath = dirname (tmppath);
- sprintf (absolute_path, "%s/%s",
- tmppath, buf);
- FREE (buf);
- buf = libgf_resolve_path_light ((char *)absolute_path);
- FREE (tmppath);
- }
-
- rpath = glusterfs_glh_realpath (handle, buf,
- rpath);
- FREE (buf);
- if (rpath == NULL) {
- goto out;
- }
- dest = rpath + strlen (rpath);
-
- } else if (!S_ISDIR (stbuf.st_mode) && *end != '\0') {
- errno = ENOTDIR;
- goto err;
- }
- }
- }
- if (dest > rpath + 1 && dest[-1] == '/')
- --dest;
- *dest = '\0';
-
-out:
- if (vpath)
- FREE (vpath);
- return rpath;
-
-err:
- if (vpath)
- FREE (vpath);
- if (resolved_path == NULL) {
- FREE (rpath);
- }
-
- return NULL;
-}
-
-char *
-glusterfs_realpath (const char *path, char *resolved_path)
-{
- char *res = NULL;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
- char *realp = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- realp = CALLOC (PATH_MAX, sizeof (char));
- if (!realp)
- goto out;
-
- libgf_vmp_search_vmp (h, realp, PATH_MAX);
- res = glusterfs_glh_realpath (h, vpath, resolved_path);
- if (!res)
- goto out;
-
- /* This copy is needed to ensure that when we return the real resolved
- * path, we return a path that accounts for the app's view of the
- * path, i.e. it starts with the VMP, in case this is an absolute path.
- */
- if (libgf_path_absolute (path)) {
- strcat (realp, resolved_path);
- strcpy (resolved_path, realp);
- }
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, resolved %s", path,
- resolved_path);
-out:
- if (realp)
- FREE (realp);
-
- return res;
-}
-
-int
-glusterfs_glh_remove (glusterfs_handle_t handle, const char *path)
-{
- loc_t loc = {0, };
- int op_ret = -1;
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, handle, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", path);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1)
- goto out;
-
- name = strdup (loc.path);
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino,
- basename (name));
- if (op_ret == -1)
- goto out;
-
- if (IA_ISDIR (loc.inode->ia_type))
- op_ret = libgf_client_rmdir (ctx, &loc);
- else
- op_ret = libgf_client_unlink (ctx, &loc);
-
-out:
- if (name)
- FREE (name);
- return op_ret;
-
-}
-
-int
-glusterfs_remove(const char *pathname)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, pathname, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s", pathname);
-
- h = libgf_resolved_path_handle (pathname, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_remove (h, vpath);
-out:
- return op_ret;
-}
-
-void
-glusterfs_rewinddir (glusterfs_dir_t dirfd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = libgf_get_fd_ctx ((fd_t *)dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = 0;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Offset: %"PRIu64,
- fd_ctx->offset);
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return;
-}
-
-void
-glusterfs_seekdir (glusterfs_dir_t dirfd, off_t offset)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- fd_ctx = libgf_get_fd_ctx ((fd_t *)dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- fd_ctx->offset = offset;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Offset: %"PRIu64,
- fd_ctx->offset);
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return;
-}
-
-off_t
-glusterfs_telldir (glusterfs_dir_t dirfd)
-{
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- off_t off = -1;
-
- fd_ctx = libgf_get_fd_ctx ((fd_t *)dirfd);
- if (!fd_ctx) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "No fd context present");
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- off = fd_ctx->offset;
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "Offset: %"PRIu64,
- fd_ctx->offset);
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
-out:
- return off;
-}
-
-struct libgf_client_sendfile_data {
- int reads_sent;
- int reads_completed;
- int out_fd;
- int32_t op_ret;
- int32_t op_errno;
- pthread_mutex_t lock;
- pthread_cond_t cond;
-};
-
-int
-libgf_client_sendfile_read_cbk (int op_ret, int op_errno,
- glusterfs_iobuf_t *buf, void *cbk_data)
-{
- struct libgf_client_sendfile_data *sendfile_data = cbk_data;
- int bytes = 0;
-
- if (op_ret > 0) {
- bytes = writev (sendfile_data->out_fd, buf->vector, buf->count);
- if (bytes != op_ret) {
- op_ret = -1;
- op_errno = errno;
- }
-
- glusterfs_free (buf);
- }
-
- pthread_mutex_lock (&sendfile_data->lock);
- {
- if (sendfile_data->op_ret != -1) {
- if (op_ret == -1) {
- sendfile_data->op_ret = -1;
- sendfile_data->op_errno = op_errno;
- } else {
- sendfile_data->op_ret += op_ret;
- }
- }
-
- sendfile_data->reads_completed++;
-
- if (sendfile_data->reads_completed
- == sendfile_data->reads_sent) {
- pthread_cond_broadcast (&sendfile_data->cond);
- }
- }
- pthread_mutex_unlock (&sendfile_data->lock);
-
- return 0;
-}
-
-
-ssize_t
-glusterfs_sendfile (int out_fd, glusterfs_file_t in_fd, off_t *offset,
- size_t count)
-{
- ssize_t ret = -1;
- struct libgf_client_sendfile_data cbk_data = {0, };
- off_t off = -1;
- size_t size = 0;
- int flags = 0;
- int non_block = 0;
-
-
- pthread_mutex_init (&cbk_data.lock, NULL);
- pthread_cond_init (&cbk_data.cond, NULL);
- cbk_data.out_fd = out_fd;
-
- if (offset) {
- off = *offset;
- }
-
- flags = fcntl (out_fd, F_GETFL);
-
- if (flags != -1) {
- non_block = flags & O_NONBLOCK;
-
- if (non_block) {
- ret = fcntl (out_fd, F_SETFL, flags & ~O_NONBLOCK);
- }
- }
-
- while (count != 0) {
- /*
- * FIXME: what's the optimal size for reads and writes?
- */
- size = (count > LIBGF_SENDFILE_BLOCK_SIZE) ?
- LIBGF_SENDFILE_BLOCK_SIZE : count;
-
- /*
- * we don't wait for reply to previous read, we just send all
- * reads in a single go.
- */
- ret = glusterfs_read_async (in_fd, size, off,
- libgf_client_sendfile_read_cbk,
- &cbk_data);
- if (ret == -1) {
- break;
- }
-
- pthread_mutex_lock (&cbk_data.lock);
- {
- cbk_data.reads_sent++;
- }
- pthread_mutex_unlock (&cbk_data.lock);
-
- if (offset) {
- off += size;
- }
-
- count -= size;
- }
-
- pthread_mutex_lock (&cbk_data.lock);
- {
- /*
- * if we've not received replies to all the reads we've sent,
- * wait for them
- */
- if (cbk_data.reads_sent > cbk_data.reads_completed) {
- pthread_cond_wait (&cbk_data.cond,
- &cbk_data.lock);
- }
- }
- pthread_mutex_unlock (&cbk_data.lock);
-
- if (offset != NULL) {
- *offset = off;
- }
-
- /* if we were able to stack_wind all the reads */
-
- if (ret == 0) {
- ret = cbk_data.op_ret;
- errno = cbk_data.op_errno;
- }
-
- if (non_block) {
- fcntl (out_fd, F_SETFL, flags);
- }
-
- return ret;
-}
-
-
-static int32_t
-libgf_client_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct flock *lock)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_lk_cbk_stub (frame, NULL, op_ret, op_errno,
- lock);
-
- LIBGF_REPLY_NOTIFY (local);
-
- return 0;
-}
-
-
-int
-libgf_client_lk (libglusterfs_client_ctx_t *ctx, fd_t *fd, int cmd,
- struct flock *lock)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP(ctx, stub, lk, local, fd, cmd, lock);
-
- op_ret = stub->args.lk_cbk.op_ret;
- errno = stub->args.lk_cbk.op_errno;
- if (op_ret == 0) {
- *lock = stub->args.lk_cbk.lock;
- }
-
- call_stub_destroy (stub);
- return op_ret;
-}
-
-
-int
-glusterfs_fcntl (glusterfs_file_t fd, int cmd, ...)
-{
- int ret = -1;
- struct flock *lock = NULL;
- va_list ap;
- libglusterfs_client_ctx_t *ctx = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- ctx = fd_ctx->ctx;
-
- switch (cmd) {
- case F_SETLK:
- case F_SETLKW:
- case F_GETLK:
-#if F_SETLK != F_SETLK64
- case F_SETLK64:
-#endif
-#if F_SETLKW != F_SETLKW64
- case F_SETLKW64:
-#endif
-#if F_GETLK != F_GETLK64
- case F_GETLK64:
-#endif
- va_start (ap, cmd);
- lock = va_arg (ap, struct flock *);
- va_end (ap);
-
- if (!lock) {
- errno = EINVAL;
- goto out;
- }
-
- ret = libgf_client_lk (ctx, fd, cmd, lock);
- break;
-
- default:
- errno = EINVAL;
- break;
- }
-
-out:
- return ret;
-}
-
-
-int
-libgf_client_chdir (const char *path)
-{
- int op_ret = 0;
- uint32_t resulting_cwd_len = 0;
-
- pthread_mutex_lock (&cwdlock);
- {
- if (!libgf_path_absolute (path)) {
- resulting_cwd_len = strlen (path) + strlen (cwd)
- + ((path[strlen (path) - 1] == '/')
- ? 0 : 1) + 1;
-
- if (resulting_cwd_len > PATH_MAX) {
- op_ret = -1;
- errno = ENAMETOOLONG;
- goto unlock;
- }
- strcat (cwd, path);
- } else {
- resulting_cwd_len = strlen (path)
- + ((path[strlen (path) - 1] == '/')
- ? 0 : 1) + 1;
-
- if (resulting_cwd_len > PATH_MAX) {
- op_ret = -1;
- errno = ENAMETOOLONG;
- goto unlock;
- }
-
- strcpy (cwd, path);
- }
-
- if (cwd[strlen (cwd) - 1] != '/') {
- strcat (cwd, "/");
- }
- }
-unlock:
- pthread_mutex_unlock (&cwdlock);
-
- return op_ret;
-}
-
-
-int
-glusterfs_fchdir (glusterfs_file_t fd)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- char vmp[PATH_MAX];
- char *res = NULL;
- libglusterfs_client_fd_ctx_t *fd_ctx = NULL;
- glusterfs_handle_t handle = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, fd, out);
-
- /* FIXME: there is a race-condition between glusterfs_fchdir and
- glusterfs_close. If two threads of application call glusterfs_fchdir
- and glusterfs_close on the same fd, there is a possibility of
- glusterfs_fchdir accessing freed memory of fd_ctx.
- */
-
- fd_ctx = libgf_get_fd_ctx (fd);
- if (!fd_ctx) {
- errno = EBADF;
- goto out;
- }
-
- pthread_mutex_lock (&fd_ctx->lock);
- {
- handle = fd_ctx->ctx;
- strcpy (vpath, fd_ctx->vpath);
- }
- pthread_mutex_unlock (&fd_ctx->lock);
-
- if (vpath[0] == '\0') {
- errno = ENOTDIR;
- goto out;
- }
-
- res = libgf_vmp_search_vmp (handle, vmp, PATH_MAX);
- if (res == NULL) {
- errno = EBADF;
- goto out;
- }
-
- /* both vmp and vpath are terminated with '/'. Also path starts with a
- '/'. Hence the extra '/' amounts to NULL character at the end of the
- string.
- */
- if ((strlen (vmp) + strlen (vpath)) > PATH_MAX) {
- errno = ENAMETOOLONG;
- goto out;
- }
-
- pthread_mutex_lock (&cwdlock);
- {
- strcpy (cwd, vmp);
- res = vpath;
- if (res[0] == '/') {
- res++;
- }
-
- strcat (cwd, res);
- }
- pthread_mutex_unlock (&cwdlock);
-
- op_ret = 0;
-out:
- return op_ret;
-}
-
-
-int
-glusterfs_chdir (const char *path)
-{
- int32_t op_ret = -1;
- glusterfs_handle_t handle = NULL;
- loc_t loc = {0, };
- char vpath[PATH_MAX];
-
- handle = libgf_resolved_path_handle (path, vpath);
-
- if (handle != NULL) {
- loc.path = strdup (vpath);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "Path compaction "
- "failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, handle, 0);
- }
-
- if ((handle == NULL) || (op_ret == 0)) {
- op_ret = libgf_client_chdir (path);
- }
-
-out:
- return op_ret;
-}
-
-
-char *
-glusterfs_getcwd (char *buf, size_t size)
-{
- char *res = NULL;
- size_t len = 0;
- loc_t loc = {0, };
- glusterfs_handle_t handle = NULL;
- char vpath[PATH_MAX];
- int32_t op_ret = 0;
-
- pthread_mutex_lock (&cwdlock);
- {
- if (!cwd_inited) {
- errno = ENODEV;
- goto unlock;
- }
-
- if (buf == NULL) {
- buf = CALLOC (1, len);
- if (buf == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,
- "out of memory");
- goto unlock;
- }
- } else {
- if (size == 0) {
- errno = EINVAL;
- goto unlock;
- }
-
- if (len > size) {
- errno = ERANGE;
- goto unlock;
- }
- }
-
- strcpy (buf, cwd);
- res = buf;
- }
-unlock:
- pthread_mutex_unlock (&cwdlock);
-
- if (res != NULL) {
- handle = libgf_resolved_path_handle (res, vpath);
-
- if (handle != NULL) {
- loc.path = strdup (vpath);
- if (loc.path == NULL) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR,
- "strdup failed");
- } else {
- op_ret = libgf_client_path_lookup (&loc, handle,
- 0);
- if (op_ret == -1) {
- res = NULL;
- }
- }
- }
- }
-
- return res;
-}
-
-int32_t
-libgf_client_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
-{
- libgf_client_local_t *local = frame->local;
-
- local->reply_stub = fop_truncate_cbk_stub (frame, NULL, op_ret,
- op_errno, prebuf, postbuf);
-
- LIBGF_REPLY_NOTIFY (local);
- return 0;
-}
-
-int32_t
-libgf_client_truncate (libglusterfs_client_ctx_t *ctx,
- loc_t *loc, off_t length)
-{
- call_stub_t *stub = NULL;
- int32_t op_ret = 0;
- libgf_client_local_t *local = NULL;
-
- LIBGF_CLIENT_FOP (ctx, stub, truncate, local, loc, length);
-
- op_ret = stub->args.truncate_cbk.op_ret;
- errno = stub->args.truncate_cbk.op_errno;
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path %s, status %d, errno %d",
- loc->path, op_ret, errno);
-
- if (op_ret == -1) {
- goto out;
- }
-
- libgf_transform_iattr (ctx, loc->inode,
- &stub->args.truncate_cbk.postbuf);
-
- libgf_update_iattr_cache (loc->inode, LIBGF_UPDATE_STAT,
- &stub->args.truncate_cbk.postbuf);
- call_stub_destroy (stub);
-
-out:
- return op_ret;
-}
-
-int
-glusterfs_glh_truncate (glusterfs_handle_t handle, const char *path,
- off_t length)
-{
- int32_t op_ret = -1;
- loc_t loc = {0, };
- libglusterfs_client_ctx_t *ctx = handle;
- char *name = NULL, *pathname = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, ctx, out);
- GF_VALIDATE_ABSOLUTE_PATH_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- loc.path = strdup (path);
- if (!loc.path) {
- gf_log (LIBGF_XL_NAME, GF_LOG_ERROR, "strdup failed");
- goto out;
- }
-
- op_ret = libgf_client_path_lookup (&loc, ctx, 1);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient", GF_LOG_ERROR,
- "path lookup failed for (%s)", loc.path);
- goto out;
- }
-
- pathname = strdup (loc.path);
- name = basename (pathname);
-
- op_ret = libgf_client_loc_fill (&loc, ctx, 0, loc.parent->ino, name);
- if (op_ret == -1) {
- gf_log ("libglusterfsclient",
- GF_LOG_ERROR,
- "libgf_client_loc_fill returned -1, returning EINVAL");
- errno = EINVAL;
- goto out;
- }
-
- op_ret = libgf_client_truncate (ctx, &loc, length);
-
-out:
- libgf_client_loc_wipe (&loc);
-
- return op_ret;
-}
-
-int
-glusterfs_truncate (const char *path, off_t length)
-{
- int op_ret = -1;
- char vpath[PATH_MAX];
- glusterfs_handle_t h = NULL;
-
- GF_VALIDATE_OR_GOTO (LIBGF_XL_NAME, path, out);
-
- gf_log (LIBGF_XL_NAME, GF_LOG_DEBUG, "path:%s length:%"PRIu64, path,
- length);
- h = libgf_resolved_path_handle (path, vpath);
- if (!h) {
- errno = ENODEV;
- goto out;
- }
-
- op_ret = glusterfs_glh_truncate (h, vpath, length);
-out:
- return op_ret;
-}
-
-static struct xlator_fops libgf_client_fops = {
-};
-
-static struct xlator_cbks libgf_client_cbks = {
- .forget = libgf_client_forget,
- .release = libgf_client_release,
- .releasedir = libgf_client_releasedir,
-};
-
-static inline xlator_t *
-libglusterfs_graph (xlator_t *graph)
-{
- int ret = 0;
- xlator_t *top = NULL;
- xlator_list_t *xlchild, *xlparent;
-
- top = CALLOC (1, sizeof (*top));
- ERR_ABORT (top);
-
- xlchild = CALLOC (1, sizeof(*xlchild));
- ERR_ABORT (xlchild);
- xlchild->xlator = graph;
- top->children = xlchild;
- top->ctx = graph->ctx;
- top->next = graph;
- top->name = strdup (LIBGF_XL_NAME);
-
- xlparent = CALLOC (1, sizeof(*xlparent));
- xlparent->xlator = top;
- graph->parents = xlparent;
- ret = asprintf (&top->type, LIBGF_XL_NAME);
- if (-1 == ret) {
- fprintf (stderr, "failed to set the top xl's type");
- }
-
- top->init = libgf_client_init;
- top->fops = &libgf_client_fops;
- top->mops = &libgf_client_mops;
- top->cbks = &libgf_client_cbks;
- top->notify = libgf_client_notify;
- top->fini = libgf_client_fini;
- // fill_defaults (top);
-
- return top;
-}
diff --git a/libglusterfsclient/src/libglusterfsclient.h b/libglusterfsclient/src/libglusterfsclient.h
deleted file mode 100755
index 1691a2faad3..00000000000
--- a/libglusterfsclient/src/libglusterfsclient.h
+++ /dev/null
@@ -1,1363 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _LIBGLUSTERFSCLIENT_H
-#define _LIBGLUSTERFSCLIENT_H
-
-#ifndef __BEGIN_DECLS
-#ifdef __cplusplus
-#define __BEGIN_DECLS extern "C" {
-#else
-#define __BEGIN_DECLS
-#endif
-#endif
-
-#ifndef __END_DECLS
-#ifdef __cplusplus
-#define __END_DECLS }
-#else
-#define __END_DECLS
-#endif
-#endif
-
-
-__BEGIN_DECLS
-
-#include <stdio.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <dirent.h>
-#include <sys/statfs.h>
-#include <sys/statvfs.h>
-#include <utime.h>
-#include <sys/time.h>
-#include <stdint.h>
-
-typedef struct {
- struct iovec *vector;
- int count;
- void *iobref;
- void *dictref;
-} glusterfs_iobuf_t;
-
-
-typedef
-int (*glusterfs_readv_cbk_t) (int op_ret, int op_errno, glusterfs_iobuf_t *buf,
- void *cbk_data);
-
-typedef
-int (*glusterfs_write_cbk_t) (int op_ret, int op_errno, void *cbk_data);
-
-typedef
-int (*glusterfs_get_cbk_t) (int op_ret, int op_errno, glusterfs_iobuf_t *buf,
- struct stat *stbuf, void *cbk_data);
-
-
-/* Data Interface
- * The first section describes the data structures required for
- * using libglusterfsclient.
- */
-
-/* This structure needs to be filled up and
- * passed to te glusterfs_init function which uses
- * the params passed herein to initialize a glusterfs
- * client context and then connect to a glusterfs server.
- */
-typedef struct {
- char *logfile; /* Path to the file which will store
- the log.
- */
- char *loglevel; /* The log level required for
- reporting various events within
- libglusterfsclient.
- */
- struct {
- char *specfile; /* Users can either open a volume or
- specfile and assign the pointer to
- specfp, or just refer to the volume
- /spec file path in specfile.
- */
- FILE *specfp;
- };
- char *volume_name; /* The volume file could describe many
- volumes but the specific volume
- within that file is chosen by
- specifying the volume name here.
- */
- unsigned long lookup_timeout; /* libglusterclient provides the inode
- numbers to be cached by the library.
- The duration for which these are
- cached are defined by lookup_timeout
- . In Seconds.
- */
- unsigned long stat_timeout; /* The file attributes received from
- a stat syscall can also be cached
- for the duration specified in this
- member. In Seconds.
- */
-} glusterfs_init_params_t;
-
-
-
-/* This is the handle returned by glusterfs_init
- * once the initialization is complete.
- * Users should treat this as an opaque handle.
- */
-typedef void * glusterfs_handle_t;
-
-
-
-/* These identifiers are used as handles for files and dirs.
- * Users of libglusterfsclient should not in anyway try to interpret
- * the actual structures these will point to.
- */
-typedef void * glusterfs_file_t;
-typedef void * glusterfs_dir_t;
-
-
-/* Function Call Interface */
-/* libglusterfsclient initialization function.
- * @ctx : the structure described above filled with required values.
- * @fakefsid: User generated fsid to be used to identify this
- * volume.
- *
- * Returns NULL on failure and the non-NULL pointer on success.
- * On failure, the error description might be present in the logfile
- * depending on the log level.
- */
-glusterfs_handle_t
-glusterfs_init (glusterfs_init_params_t *ctx, uint32_t fakefsid);
-
-
-
-/* Used to destroy a glusterfs client context and the
- * connection to the glusterfs server.
- *
- * @handle : The glusterfs handle returned by glusterfs_init.
- */
-int
-glusterfs_fini (glusterfs_handle_t handle);
-
-
-
-/* libglusterfs client provides two interfaces.
- * 1. handle-based interface
- * Functions that comprise the handle-based interface accept the
- * glusterfs_handle_t as the first argument. It specifies the
- * glusterfs client context over which to perform the operation.
- *
- * 2. Virtual Mount Point based interface:
- * Functions that do not require a handle to be given in order to
- * identify which client context to operate on. This interface
- * internally determines the corresponding client context for the
- * given path. The down-side is that a virtual mount point (VMP) needs to be
- * registered with the library. A VMP is just a string that maps to a
- * glusterfs_handle_t. The advantage of a VMP based interface is that
- * a user program using multiple client contexts does not need to
- * maintain its own mapping between paths and the corresponding
- * handles.
- */
-
-
-
-/* glusterfs_mount is the function that allows users to register a VMP
- * along with the parameters, which will be used to initialize a
- * context. Applications calling glusterfs_mount do not need to
- * initialized a context using the glusterfs_init interface.
- *
- * @vmp : The virtual mount point.
- * @ipars : Initialization parameters populated as described
- * earlier.
- *
- * Returns 0 on success, and -1 on failure.
- */
-int
-glusterfs_mount (char *vmp, glusterfs_init_params_t *ipars);
-
-
-
-/* glusterfs_umount is the VMP equivalent of glusterfs_fini.
- *
- * @vmp : The VMP which was initialized using glusterfs_mount.
- *
- * Returns 0 on sucess, and -1 on failure.
- */
-int
-glusterfs_umount (char *vmp);
-
-
-/* glusterfs_umount_all unmounts all the mounts */
-int
-glusterfs_umount_all (void);
-
-
-/* For smaller files, application can use just
- * glusterfs_get/glusterfs_get_async
- * to read the whole content. Limit of the file-sizes to be read in
- * glusterfs_get/glusterfs_get_async is passed in the size argument
- */
-
-/* glusterfs_glh_get:
- * @handle : glusterfs handle
- * @path : path to be looked upon
- * @size : upper limit of file-sizes to be read in lookup
- * @stbuf : attribute buffer
- */
-
-int
-glusterfs_glh_get (glusterfs_handle_t handle, const char *path, void *buf,
- size_t size, struct stat *stbuf);
-
-int
-glusterfs_get (const char *path, void *buf, size_t size, struct stat *stbuf);
-
-int
-glusterfs_get_async (glusterfs_handle_t handle, const char *path, size_t size,
- glusterfs_get_cbk_t cbk, void *cbk_data);
-
-
-
-/* Opens a file. Corresponds to the open syscall.
- *
- * @handle : Handle returned from glusterfs_init
- * @path : Path to the file or directory on the glusterfs
- * export. Must be absolute to the export on the server.
- * @flags : flags to control open behaviour.
- * @... : The mode_t argument that defines the mode for a new
- * file, in case a new file is being created using the
- * O_CREAT flag in @flags.
- *
- * Returns a non-NULL handle on success. NULL on failure and sets
- * errno accordingly.
- */
-glusterfs_file_t
-glusterfs_glh_open (glusterfs_handle_t handle, const char *path, int flags,
- ...);
-
-
-/* Opens a file without having to specify a handle.
- *
- * @path : Path to the file to open in the glusterfs export.
- * The path to the file in glusterfs export must be
- * pre-fixed with the VMP string registered with
- * glusterfs_mount.
- * @flags : flags to control open behaviour.
- * @... : The mode_t argument that defines the mode for a new
- * file, in case a new file is being created using the
- * O_CREAT flag in @flags.
- *
- * Returns 0 on success, -1 on failure with errno set accordingly.
- */
-glusterfs_file_t
-glusterfs_open (const char *path, int flags, ...);
-
-
-
-/* Creates a file. Corresponds to the creat syscall.
- *
- * @handle : Handle returned from glusterfs_init
- * @path : Path to the file that needs to be created in the
- * glusterfs export.
- * @mode : File creation mode.
- *
- * Returns the file handle on success. NULL on error with errno set as
- * required.
- */
-glusterfs_file_t
-glusterfs_glh_creat (glusterfs_handle_t handle, const char *path, mode_t mode);
-
-
-
-/* VMP-based creat.
- * @path : Path to the file to be created. Must be
- * pre-prepended with the VMP string registered with
- * glusterfs_mount.
- * @mode : File creation mode.
- *
- * Returns file handle on success. NULL handle on error with errno set
- * accordingly.
- */
-glusterfs_file_t
-glusterfs_creat (const char *path, mode_t mode);
-
-
-
-/* Close the file identified by the handle.
- *
- * @fd : Closes the file.
- *
- * Returns 0 on success, -1 on error with errno set accordingly.
- */
-int
-glusterfs_close (glusterfs_file_t fd);
-
-
-
-/* Get struct stat for the file in path.
- *
- * @handle : The handle that identifies a glusterfs client
- * context.
- * @path : The file for which we need to get struct stat.
- * @stbuf : The buffer into which the file's stat is copied.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_glh_stat (glusterfs_handle_t handle, const char *path,
- struct stat *stbuf);
-
-
-/* Get struct stat for file in path.
- *
- * @path : The file for which struct stat is required.
- * @sbuf : The buffer into which the stat structure is copied.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_stat (const char *path, struct stat *buf);
-
-
-
-/* Gets stat struct for the file.
- *
- * @handle : The handle identifying a glusterfs client context.
- * @path : Path to the file for which stat structure is
- * required. If path is a symlink, the symlink is
- * interpreted and the stat structure returned for the
- * target of the link.
- * @buf : The buffer into which the stat structure is copied.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_glh_lstat (glusterfs_handle_t handle, const char *path,
- struct stat *buf);
-
-
-
-/* Gets stat struct for a file.
- *
- * @path : The file to get the struct stat for.
- * @buf : The receiving struct stat buffer.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_lstat (const char *path, struct stat *buf);
-
-
-
-/* Get stat structure for a file.
- *
- * @fd : The file handle identifying a file on the glusterfs
- * server.
- * @stbuf : The buffer into which the stat data is copied.
- *
- * Returns 0 on success and -1 on error with errno set accordingly.
- */
-int
-glusterfs_fstat (glusterfs_file_t fd, struct stat *stbuf);
-
-int
-glusterfs_glh_setxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value,
- size_t size, int flags);
-
-int
-glusterfs_glh_lsetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, const void *value, size_t size,
- int flags);
-
-int
-glusterfs_setxattr (const char *path, const char *name, const void *value,
- size_t size, int flags);
-
-int
-glusterfs_lsetxattr (const char *path, const char *name, const void *value,
- size_t size, int flags);
-
-int
-glusterfs_fsetxattr (glusterfs_file_t fd, const char *name, const void *value,
- size_t size, int flags);
-
-ssize_t
-glusterfs_glh_getxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size);
-
-ssize_t
-glusterfs_glh_lgetxattr (glusterfs_handle_t handle, const char *path,
- const char *name, void *value, size_t size);
-
-ssize_t
-glusterfs_getxattr (const char *path, const char *name, void *value,
- size_t size);
-
-ssize_t
-glusterfs_lgetxattr (const char *path, const char *name, void *value,
- size_t size);
-
-ssize_t
-glusterfs_fgetxattr (glusterfs_file_t fd, const char *name, void *value,
- size_t size);
-
-ssize_t
-glusterfs_listxattr (glusterfs_handle_t handle, const char *path, char *list,
- size_t size);
-
-ssize_t
-glusterfs_llistxattr (glusterfs_handle_t handle, const char *path, char *list,
- size_t size);
-
-ssize_t
-glusterfs_flistxattr (glusterfs_file_t fd, char *list, size_t size);
-
-int
-glusterfs_removexattr (glusterfs_handle_t handle, const char *path,
- const char *name);
-
-int
-glusterfs_lremovexattr (glusterfs_handle_t handle, const char *path,
- const char *name);
-
-int
-glusterfs_fremovexattr (glusterfs_file_t fd, const char *name);
-
-
-
-/* Read data from a file.
- * @fd : Handle returned by glusterfs_open or
- * glusterfs_glh_open.
- * @buf : Buffer to read the data into.
- * @nbytes : Number of bytes to read.
- *
- * Returns number of bytes actually read on success or -1 on error
- * with errno set to the appropriate error number.
- */
-ssize_t
-glusterfs_read (glusterfs_file_t fd, void *buf, size_t nbytes);
-
-
-
-/* Read data into an array of buffers.
- *
- * @fd : File handle returned by glusterfs_open or
- * glusterfs_glh_open.
- * @vec : Array of buffers into which the data is read.
- * @count : Number of iovecs referred to by vec.
- *
- * Returns number of bytes read on success or -1 on error with errno
- * set appropriately.
- */
-ssize_t
-glusterfs_readv (glusterfs_file_t fd, const struct iovec *vec, int count);
-
-int
-glusterfs_read_async (glusterfs_file_t fd, size_t nbytes, off_t offset,
- glusterfs_readv_cbk_t readv_cbk, void *cbk_data);
-
-
-
-/* Write data into a file.
- *
- * @fd : File handle returned from glusterfs_open or
- * glusterfs_glh_open.
- * @buf : Buffer which is written to the file.
- * @nbytes : Number bytes of the @buf written to the file.
- *
- * On success, returns number of bytes written. On error, returns -1
- * with errno set appropriately.
- */
-ssize_t
-glusterfs_write (glusterfs_file_t fd, const void *buf, size_t nbytes);
-
-
-
-/* Writes an array of buffers into a file.
- *
- * @fd : The file handle returned from glusterfs_open or
- * glusterfs_glh_open.
- * @vector : Array of buffers to be written to the file.
- * @count : Number of separate buffers in the @vector array.
- *
- * Returns number of bytes written on success or -1 on error with
- * errno set approriately.
- */
-ssize_t
-glusterfs_writev (glusterfs_file_t fd, const struct iovec *vector, int count);
-
-int
-glusterfs_write_async (glusterfs_file_t fd, const void *buf, size_t nbytes,
- off_t offset, glusterfs_write_cbk_t write_cbk,
- void *cbk_data);
-
-int
-glusterfs_writev_async (glusterfs_file_t fd, const struct iovec *vector,
- int count, off_t offset,
- glusterfs_write_cbk_t write_cbk, void *cbk_data);
-
-
-
-/* Read from a file starting at a given offset.
- *
- * @fd : File handle returned from glusterfs_open or
- * glusterfs_glh_open.
- * @buf : Buffer to read the data into.
- * @nbytes : Number of bytes to read.
- * @offset : The offset to start reading @nbytes from.
- *
- * Returns number of bytes read on success or -1 on error with errno
- * set appropriately.
- */
-ssize_t
-glusterfs_pread (glusterfs_file_t fd, void *buf, size_t nbytes, off_t offset);
-
-
-
-/* Write to a file starting at a given offset.
- *
- * @fd : Flie handle returned from glusterfs_open or
- * glusterfs_glh_open.
- * @buf : Buffer that will be written to the file.
- * @nbytes : Number of bytes to write from @buf.
- * @offset : The starting offset from where @nbytes will be
- * written.
- *
- * Returns number of bytes written on success and -1 on error with
- * errno set appropriately.
- */
-ssize_t
-glusterfs_pwrite (glusterfs_file_t fd, const void *buf, size_t nbytes,
- off_t offset);
-
-
-
-/* Seek to an offset in the file.
- *
- * @fd : File handle in which to seek to. File handle
- * returned by glusterfs_open or glusterfs_glh_open.
- * @offset : Offset to seek to in the given file.
- * @whence : Determines how the offset is interpreted by this
- * syscall. The behaviour is similar to the options
- * provided by the POSIX lseek system call. See man lseek
- * for more details.
- *
- * On success, returns the resulting absolute offset in the file after the seek
- * operation is performed. ON error, returns -1 with errno set
- * appropriately.
- */
-off_t
-glusterfs_lseek (glusterfs_file_t fd, off_t offset, int whence);
-
-
-
-/* Create a directory.
- *
- * @handle : The handle of the glusterfs context in which the
- * directory needs to be created.
- * @path : The absolute path within the glusterfs context where
- * the directory needs to be created.
- * @mode : The mode bits for the newly created directory.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_mkdir (glusterfs_handle_t handle, const char *path, mode_t mode);
-
-
-
-/* Create a directory.
- *
- * @path : Path to the directory that needs to be created. This
- * path must be prefixed with the VMP of the particular glusterfs
- * context.
- * @mode : Mode flags for the newly created directory.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_mkdir (const char *path, mode_t mode);
-
-
-
-/* Remove a directory.
- *
- * @handle : Handle of the glusterfs context from which to remove
- * the directory.
- * @path : The path of the directory to be removed in the glusterfs
- * context.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_rmdir (glusterfs_handle_t handle, const char *path);
-
-
-
-/* Remove a directory.
- *
- * @path : The absolute path to the directory to be removed.
- * This path must be pre-fixed with the VMP of the
- * particular glusterfs context in which this directory
- * resides.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_rmdir (const char *path);
-
-
-
-/* Read directory entries.
- *
- * @fd : The handle of the directory to be read. This handle
- * is the one returned by opendir.
- *
- * Returns the directory entry on success and NULL pointer on error
- * with errno set appropriately.
- */
-void *
-glusterfs_readdir (glusterfs_dir_t dirfd);
-
-
-
-/* re-entrant version of glusterfs_readdir.
- *
- * @dirfd : The handle of directory to be read. This handle is the one
- * returned by opendir.
- * @entry : Pointer to storage to store a directory entry. The storage
- * pointed to by entry shall be large enough for a dirent with
- * an array of char d_name members containing at least
- * {NAME_MAX}+1 elements.
- * @result : Upon successful return, the pointer returned at *result shall
- * have the same value as the argument entry. Upon reaching the
- * end of the directory stream, this pointer shall have the
- * value NULL.
- */
-int
-glusterfs_readdir_r (glusterfs_dir_t dirfd, struct dirent *entry,
- struct dirent **result);
-
-/* Close a directory handle.
- *
- * @fd : The directory handle to be closed.
- *
- * Returns 0 on success and -1 on error with errno set to 0.
- */
-int
-glusterfs_closedir (glusterfs_dir_t dirfd);
-/* FIXME: remove getdents */
-int
-glusterfs_getdents (glusterfs_dir_t fd, struct dirent *dirp,
- unsigned int count);
-
-
-
-/* Create device node.
- *
- * @handle : glusterfs context in which to create the device
- * node.
- * @pathname : The absolute path of the device to be created in the
- * given glusterfs context.
- *
- * @mode : Mode flags to apply to the newly created node.
- * @dev : Device numbers that will apply to the node.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_mknod(glusterfs_handle_t handle, const char *pathname,
- mode_t mode, dev_t dev);
-
-
-
-/* Create a device node.
- *
- * @pathname : The full path of the node to be created. This path
- * should be pre-pended with the VMP of the glusterfs
- * context in which this node is to be created.
- * @mode : Mode flags that will be applied to the newly created
- * device file.
- * @dev : The device numbers that will be associated with the
- * device node.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_mknod(const char *pathname, mode_t mode, dev_t dev);
-
-
-
-/* Returns the real absolute path of the given path.
- *
- * @handle : The glusterfs context in which the path resides in.
- * @path : The path to be resolved.
- * @resolved_path : The resolved path is stored in this buffer
- * provided by the caller.
- *
- * Returns a pointer to resolved_path on success and NULL on error
- * with errno set appropriately.
- *
- * See man realpath for details.
- */
-char *
-glusterfs_glh_realpath (glusterfs_handle_t handle, const char *path,
- char *resolved_path);
-
-
-/* Returns the real absolute path of the given path.
- *
- * @path : The path to be resolved. This path must be
- * pre-fixed with the VMP of the glusterfs
- * context in which the file resides.
- *
- * @resolved_path : The resolved path is stored in this user
- * provided buffer.
- *
- * Returns a pointer to resolved_path on success, and NULL on error
- * with errno set appropriately.
- */
-char *
-glusterfs_realpath (const char *path, char *resolved_path);
-
-
-
-/* Change mode flags on a path.
- *
- * @handle : Handle of the glusterfs instance in which the path
- * resides.
- * @path : The path whose mode bits need to be changed.
- * @mode : The new mode bits.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_chmod (glusterfs_handle_t handle, const char *path, mode_t mode);
-
-
-
-/* Change mode flags on a path.
- *
- * @path : The path whose mode bits need to be changed. The
- * path should be pre-fixed with the VMP that identifies the
- * glusterfs context within which the path resides.
- * @mode : The new mode bits.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_chmod (const char *path, mode_t mode);
-
-
-
-/* Change the owner of a path.
- * If @path is a symlink, it is dereferenced and the ownership change
- * happens on the target.
- *
- * @handle : Handle of the glusterfs context in which the path
- * resides.
- * @path : The path whose owner needs to be changed.
- * @owner : ID of the new owner.
- * @group : ID of the new group.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_chown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group);
-
-
-
-/* Change the owner of a path.
- *
- * If @path is a symlink, it is dereferenced and the ownership change
- * happens on the target.
- * @path : The path whose owner needs to be changed. Path must
- * be pre-fixed with the VMP that identifies the
- * glusterfs context in which the path resides.
- * @owner : ID of the new owner.
- * @group : ID of the new group.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_chown (const char *path, uid_t owner, gid_t group);
-
-
-
-/* Change the owner of the file.
- *
- * @fd : Handle of the file whose owner needs to be changed.
- * @owner : ID of the new owner.
- * @group : ID of the new group.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_fchown (glusterfs_file_t fd, uid_t owner, gid_t group);
-
-
-
-/* Open a directory.
- *
- * @handle : Handle that identifies a glusterfs context.
- * @path : Path to the directory in the glusterfs context.
- *
- * Returns a non-NULL handle on success and NULL on failure with errno
- * set appropriately.
- */
-glusterfs_dir_t
-glusterfs_glh_opendir (glusterfs_handle_t handle, const char *path);
-
-
-
-/* Open a directory.
- *
- * @path : Path to the directory. The path must be prepended
- * with the VMP in order to identify the glusterfs
- * context in which path resides.
- *
- * Returns a non-NULL handle on success and NULL on failure with errno
- * set appropriately.
- */
-glusterfs_dir_t
-glusterfs_opendir (const char *path);
-
-
-
-/* Change the mode bits on an open file.
- *
- * @fd : The file whose mode bits need to be changed.
- * @mode : The new mode bits.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_fchmod (glusterfs_file_t fd, mode_t mode);
-
-
-
-/* Sync the file contents to storage.
- *
- * @fd : The file whose contents need to be sync'ed to
- * storage.
- *
- * Return 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_fsync (glusterfs_file_t *fd);
-
-
-
-/* Truncate an open file.
- *
- * @fd : The file to truncate.
- * @length : The length to truncate to.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_ftruncate (glusterfs_file_t fd, off_t length);
-
-
-
-/* Create a hard link between two paths.
- *
- * @handle : glusterfs context in which both paths should reside.
- * @oldpath : The existing path to link to.
- * @newpath : The new path which will be linked to @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_link (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath);
-
-
-
-/* Create a hard link between two paths.
- *
- * @oldpath : The existing path to link to.
- * @newpath : The new path which will be linked to @oldpath.
- *
- * Both paths should exist on the same glusterfs context and should be
- * prefixed with the same VMP.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_link (const char *oldpath, const char *newpath);
-
-
-
-/* Get stats about the underlying file system.
- *
- * @handle : Identifies the glusterfs context in which resides
- * the given path.
- * @path : stats are returned for the file system on which file
- * is located.
- * @buf : The buffer into which the stats are copied.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_statfs (glusterfs_handle_t handle, const char *path,
- struct statfs *buf);
-
-
-
-/* Get stats about the underlying file system.
- *
- * @path : stats are returned for the file system on which file
- * is located. @path must start with the VMP of the
- * glusterfs context on which the file reside.
- * @buf : The buffer into which the stats are copied.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_statfs (const char *path, struct statfs *buf);
-
-
-
-/* Get stats about the underlying file system.
- *
- * @handle : Identifies the glusterfs context in which resides
- * the given path.
- * @path : stats are returned for the file system on which file
- * is located.
- * @buf : The buffer into which the stats are copied.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_statvfs (glusterfs_handle_t handle, const char *path,
- struct statvfs *buf);
-
-
-
-/* Get stats about the underlying file system.
- *
- * @path : stats are returned for the file system on which file
- * is located. @path must start with the VMP of the
- * glusterfs context on which the file reside.
- * @buf : The buffer into which the stats are copied.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_statvfs (const char *path, struct statvfs *buf);
-
-
-
-/* Set the atime and mtime values for a given path.
- *
- * @handle : The handle identifying the glusterfs context.
- * @path : The path for which the times need to be changed.
- * @times : The array containing new time stamps for the file.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_utimes (glusterfs_handle_t handle, const char *path,
- const struct timeval times[2]);
-
-
-
-/* Set the atime and mtime values for a given path.
- *
- * @path : The path for which the times need to be changed.
- * @times : The array containing new time stamps for the file.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_utimes (const char *path, const struct timeval times[2]);
-
-
-
-/* Set the atime and mtime values for a given path.
- *
- * @handle : The handle identifying the glusterfs context.
- * @path : The path for which the times need to be changed.
- * @buf : The structure containing new time stamps for the file.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_utime (glusterfs_handle_t handle, const char *path,
- const struct utimbuf *buf);
-
-
-
-/* Set the atime and mtime values for a given path.
- *
- * @path : The path for which the times need to be changed.
- * @buf : The structure containing new time stamps for the file.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_utime (const char *path, const struct utimbuf *buf);
-
-
-
-/* Create FIFO at the given path.
- *
- * @handle : The glusterfs context in which to create that FIFO.
- * @path : The path within the context where the FIFO is to be
- * created.
- * @mode : The mode bits for the newly create FIFO.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_mkfifo (glusterfs_handle_t handle, const char *path,
- mode_t mode);
-
-
-
-/* Create FIFO at the given path.
- *
- * @path : The path within the context where the FIFO is to be
- * created. @path should begin with the VMP of the
- * glusterfs context in which the FIFO needs to be
- * created.
- * @mode : The mode bits for the newly create FIFO.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_mkfifo (const char *path, mode_t mode);
-
-
-
-/* Unlink a file.
- *
- * @handle : Handle that identifies a glusterfs instance.
- * @path : Path in the glusterfs instance that needs to be
- * unlinked.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_unlink (glusterfs_handle_t handle, const char *path);
-
-
-
-/* Unlink a file.
- *
- * @path : Path in the glusterfs instance that needs to be
- * unlinked.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_unlink (const char *path);
-
-
-
-/* Create a symbolic link.
- *
- * @handle : The handle identifying the glusterfs context.
- * @oldpath : The existing path to which a symlink needs to be
- * created.
- * @newpath : The new path which will be symlinked to the
- * @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_symlink (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath);
-
-
-
-/* Create a symbolic link.
- *
- * @oldpath : The existing path to which a symlink needs to be
- * created.
- * @newpath : The new path which will be symlinked to the
- * @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_symlink (const char *oldpath, const char *newpath);
-
-
-
-/* Read a symbolic link.
- *
- * @handle : Handle identifying the glusterfs context.
- * @path : The symlink that needs to be read.
- * @buf : The buffer into which the target of @path will be
- * stored.
- * @bufsize : Size of the buffer allocated to @buf.
- *
- * Returns number of bytes copied into @buf and -1 on error with errno
- * set appropriately.
- */
-ssize_t
-glusterfs_glh_readlink (glusterfs_handle_t handle, const char *path, char *buf,
- size_t bufsize);
-
-
-
-/* Read a symbolic link.
- *
- * @path : The symlink that needs to be read.
- * @buf : The buffer into which the target of @path will be
- * stored.
- * @bufsize : Size of the buffer allocated to @buf.
- *
- * Returns number of bytes copied into @buf and -1 on error with errno
- * set appropriately.
- */
-ssize_t
-glusterfs_readlink (const char *path, char *buf, size_t bufsize);
-
-
-
-/* Rename a file or directory.
- *
- * @handle : The identifier of a glusterfs context.
- * @oldpath : The path to be renamed.
- * @newpath : The new name for the @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_rename (glusterfs_handle_t handle, const char *oldpath,
- const char *newpath);
-
-
-
-/* Rename a file or directory.
- * @oldpath : The path to be renamed.
- * @newpath : The new name for the @oldpath.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_rename (const char *oldpath, const char *newpath);
-
-
-
-/* Remove a file or directory in the given glusterfs context.
- *
- * @handle : Handle identifying the glusterfs context.
- * @path : Path of the file or directory to be removed.
- *
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_remove (glusterfs_handle_t handle, const char *path);
-
-
-
-/* Remove a file or directory.
- *
- * @path : Path of the file or directory to be removed. The
- * path must be pre-fixed with the VMP.
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_remove (const char *path);
-
-
-
-/* Change the owner of the given path.
- *
- * If @path is a symlink, the ownership change happens on the symlink.
- *
- * @handle : Handle identifying the glusterfs client context.
- * @path : Path whose owner needs to be changed.
- * @owner : New owner ID
- * @group : New Group ID
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-int
-glusterfs_glh_lchown (glusterfs_handle_t handle, const char *path, uid_t owner,
- gid_t group);
-
-
-
-/* Change the owner of the given path.
- *
- * If @path is a symlink, the ownership change happens on the symlink.
- *
- * @path : Path whose owner needs to be changed.
- * @owner : New owner ID
- * @group : New Group ID
- *
- * Returns 0 on success and -1 on error with errno set appropriately.
- */
-
-int
-glusterfs_lchown (const char *path, uid_t owner, gid_t group);
-
-
-
-/* Rewind directory stream pointer to beginning of the directory.
- *
- * @dirfd : Directory handle returned by glusterfs_open on
- * glusterfs_opendir.
- *
- * Returns no value.
- */
-void
-glusterfs_rewinddir (glusterfs_dir_t dirfd);
-
-
-
-/* Seek to the given offset in the directory handle.
- *
- * @dirfd : Directory handle returned by glusterfs_open on
- * glusterfs_opendir.
- * @offset : The offset to seek to.
- *
- * Returns no value.
- */
-void
-glusterfs_seekdir (glusterfs_dir_t dirfd, off_t offset);
-
-
-
-/* Return the current offset in a directory stream.
- *
- * @dirfd : Directory handle returned by glusterfs_open on
- * glusterfs_opendir.
- *
- * Returns the offset in the directory or -1 on error with errno set
- * appropriately.
- */
-off_t
-glusterfs_telldir (glusterfs_dir_t dirfd);
-
-
-/* Write count bytes from in_fd to out_fd, starting at *offset.
- * glusterfs_sendfile aims at eliminating memory copy at the end of
- * each read from in_fd, copying the file directly to out_fd from the buffer
- * provided by glusterfs.
- *
- * @out_fd: file descriptor opened for writing
- *
- * @in_fd: glusterfs file handle to the file to be read from.
- *
- * @offset: If offset is not NULL, then it points to a variable holding the file
- * offset from which glusterfs_sendfile() will start reading data
- * from in_fd. When glusterfs_sendfile() returns, this variable will
- * be set to the offset of the byte following the last byte that was
- * read. If offset is not NULL, then glusterfs_sendfile() does not
- * modify the current file offset of in_fd; otherwise the current file
- * offset is adjusted to reflect the number of bytes read from in_fd.
- *
- * @count: number of bytes to copy between the file descriptors.
- */
-
-ssize_t
-glusterfs_sendfile (int out_fd, glusterfs_file_t in_fd, off_t *offset,
- size_t count);
-
-/* manipulate file descriptor
- * This api can have 3 forms similar to fcntl(2).
- *
- * int
- * glusterfs_fcntl (glusterfs_file_t fd, int cmd)
- *
- * int
- * glusterfs_fcntl (glusterfs_file_t fd, int cmd, long arg)
- *
- * int
- * glusterfs_fcntl (glusterfs_file_t fd, int cmd, struct flock *lock)
- *
- * @fd : file handle returned by glusterfs_open or glusterfs_create.
- * @cmd : Though the aim is to implement all possible commands supported by
- * fcntl(2), currently following commands are supported.
- * F_SETLK, F_SETLKW, F_GETLK - used to acquire, release, and test for
- * the existence of record locks (also
- * known as file-segment or file-region
- * locks). More detailed explanation is
- * found in 'man 2 fcntl'
- */
-
-int
-glusterfs_fcntl (glusterfs_file_t fd, int cmd, ...);
-
-/*
- * Change the current working directory to @path
- *
- * @path : path to change the current working directory to.
- *
- * Returns 0 on success and -1 on failure with errno set appropriately.
- */
-int
-glusterfs_chdir (const char *path);
-
-/*
- * Change the current working directory to the path @fd is opened on.
- *
- * @fd : current working directory will be changed to path @fd is opened on.
- *
- * Returns 0 on success and -1 on with errno set appropriately.
- */
-int
-glusterfs_fchdir (glusterfs_file_t fd);
-
-/* copies the current working directory into @buf if it is big enough
- *
- * @buf: buffer to copy into it. If @buf is NULL, a buffer will be allocated.
- * The size of the buffer will be @size if it is not zero, otherwise the
- * size will be big enough to hold the current working directory.
- * @size: size of the buffer.
- *
- * Returns the pointer to buffer holding current working directory on success
- * and NULL on failure.
- */
-
-char *
-glusterfs_getcwd (char *buf, size_t size);
-
-/*
- * Truncate the file to a specified length.
- *
- * @path : path to the file.
- * @length : length to which the file has to be truncated.
- *
- * Returns 0 on success and -1 on failure with errno set appropriately
- */
-
-int
-glusterfs_truncate (const char *path, off_t length);
-
-
-/* FIXME: review the need for these apis */
-/* added for log related initialization in booster fork implementation */
-void
-glusterfs_reset (void);
-
-void
-glusterfs_log_lock (void);
-
-void
-glusterfs_log_unlock (void);
-/* Used to free the glusterfs_read_buf passed to the application from
- glusterfs_read_async_cbk
-*/
-void
-glusterfs_free (glusterfs_iobuf_t *buf);
-
-__END_DECLS
-
-#endif /* !_LIBGLUSTERFSCLIENT_H */
diff --git a/mod_glusterfs/Makefile.am b/mod_glusterfs/Makefile.am
deleted file mode 100644
index 0abe8dcfcbb..00000000000
--- a/mod_glusterfs/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = apache lighttpd
-
-CLEANFILES =
diff --git a/mod_glusterfs/apache/1.3/src/Makefile.am b/mod_glusterfs/apache/1.3/src/Makefile.am
deleted file mode 100644
index 6bb3075f564..00000000000
--- a/mod_glusterfs/apache/1.3/src/Makefile.am
+++ /dev/null
@@ -1,30 +0,0 @@
-mod_glusterfs_PROGRAMS = mod_glusterfs.so
-mod_glusterfsdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/apache/1.3
-
-mod_glusterfs_so_SOURCES = mod_glusterfs.c
-
-all: mod_glusterfs.so
-
-mod_glusterfs.so: $(top_srcdir)/mod_glusterfs/apache/1.3/src/mod_glusterfs.c $(top_builddir)/libglusterfsclient/src/libglusterfsclient.la
- ln -sf $(top_srcdir)/mod_glusterfs/apache/1.3/src/mod_glusterfs.c $(top_builddir)/mod_glusterfs/apache/1.3/src/mod_glusterfs-build.c
- $(APXS) -c -Wc,-g3 -Wc,-O0 -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -D_GNU_SOURCE -I$(top_srcdir)/libglusterfsclient/src -Wl,-rpath,$(libdir) -Wl,-rpath,$(top_builddir)/libglusterfsclient/src/.libs/ $(top_builddir)/libglusterfsclient/src/.libs/libglusterfsclient.so mod_glusterfs-build.c -o $(top_builddir)/mod_glusterfs/apache/1.3/src/mod_glusterfs.so
-
-$(top_builddir)/libglusterfsclient/src/libglusterfsclient.la:
- $(MAKE) -C $(top_builddir)/libglusterfsclient/src/ all
-
-install-data-local:
- @echo ""
- @echo ""
- @echo "**********************************************************************************"
- @echo "* TO INSTALL MODGLUSTERFS, PLEASE USE, "
- @echo "* $(APXS) -n glusterfs -ia $(mod_glusterfsdir)/mod_glusterfs.so "
- @echo "**********************************************************************************"
- @echo ""
- @echo ""
-
-#install:
-# cp -fv mod_glusterfs.so $(HTTPD_LIBEXECDIR)
-# cp -fv httpd.conf $(HTTPD_CONF_DIR)
-
-clean:
- -rm -fv *.so *.o mod_glusterfs-build.c
diff --git a/mod_glusterfs/apache/1.3/src/README.txt b/mod_glusterfs/apache/1.3/src/README.txt
deleted file mode 100644
index 378a51d79ae..00000000000
--- a/mod_glusterfs/apache/1.3/src/README.txt
+++ /dev/null
@@ -1,107 +0,0 @@
-What is mod_glusterfs?
-======================
-* mod_glusterfs is a module for apache written for efficient serving of files from glusterfs.
- mod_glusterfs interfaces with glusterfs using apis provided by libglusterfsclient.
-
-* this README speaks about installation of apache-1.3.x, where x is any minor version.
-
-Prerequisites for mod_glusterfs
-===============================
-Though mod_glusterfs has been written as a module, with an intent of making no changes to the way apache has
-been built, currently following points have to be taken care of:
-
-* module "so" has to be enabled, for apache to support modules.
-* since glusterfs is compiled with _FILE_OFFSET_BITS=64 and __USE_FILE_OFFSET64 flags, mod_glusterfs and apache
- in turn have to be compiled with the above two flags.
-
- $ tar xzvf apache-1.3.9.tar.gz
- $ cd apache-1.3.9/
- $ # add -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 to EXTRA_CFLAGS in src/Configuration.
- $ ./configure --prefix=/usr --enable-module=so
- $ cd src
- $ ./Configure
- $ cd ../
- $ make install
- $ httpd -l | grep -i mod_so
- mod_so.c
-
-* if multiple apache installations are present, make sure to pass --with-apxs=/path/to/apxs/of/proper/version to configure script while building glusterfs.
-
-Build/Install mod_glusterfs
-===========================
-* mod_glusterfs is provided with glusterfs--mainline--3.0 and all releases from the same branch.
-
-* building glusterfs also builds mod_glusterfs. But 'make install' of glusterfs installs mod_glusterfs.so to
- glusterfs install directory instead of the apache modules directory.
-
-* 'make install' of glusterfs will print a message similar to the one given below, which is self explanatory.
- Make sure to use apxs of proper apache version in case of multiple apache installations. This will copy
- mod_glusterfs.so to modules directory of proper apache version and modify the appropriate httpd.conf to enable
- mod_glusterfs.
-
-**********************************************************************************************
-* TO INSTALL MODGLUSTERFS, PLEASE USE,
-* apxs -n mod_glusterfs -ia /usr/lib/glusterfs/1.4.0pre2/apache-1.3/mod_glusterfs.so
-**********************************************************************************************
-
-Configuration
-=============
-* Following configuration has to be added to httpd.conf.
-
- <Location "/glusterfs">
- GlusterfsLogfile "/var/log/glusterfs/glusterfs.log"
- GlusterfsLoglevel "warning"
- GlusterfsVolumeSpecfile "/etc/glusterfs/glusterfs-client.spec"
- GlusterfsCacheTimeout "600"
- GlusterfsXattrFileSize "65536"
- SetHandler "glusterfs-handler"
- </Location>
-
-* GlusterfsVolumeSpecfile (COMPULSORY)
- Path to the the glusterfs volume specification file.
-
-* GlusterfsLogfile (COMPULSORY)
- Path to the glusterfs logfile.
-
-* GlusterfsLoglevel (OPTIONAL, default = warning)
- Severity of messages that are to be logged. Allowed values are critical, error, warning, debug, none
- in the decreasing order of severity.
-
-* GlusterfsCacheTimeOut (OPTIONAL, default = 0)
- Timeout values for glusterfs stat and lookup cache.
-
-* GlusterfsXattrFileSize (OPTIONAL, default = 0)
- Files with sizes upto and including this value are fetched through the extended attribute interface of
- glusterfs rather than the usual open-read-close set of operations. For files of small sizes, it is recommended
- to use extended attribute interface.
-
-* With the above configuration all the requests to httpd of the form www.example.org/glusterfs/path/to/file are
- served from glusterfs.
-
-Miscellaneous points
-====================
-* httpd by default runs with username "nobody" and group "nogroup". Permissions of logfile and specfile have to
- be set suitably.
-
-* Since mod_glusterfs runs with permissions of nobody.nogroup, glusterfs has to use only login based
- authentication. See docs/authentication.txt for more details.
-
-* To copy the data served by httpd into glusterfs mountpoint, glusterfs can be started with the
- volume-specification file provided to mod_glusterfs. Any tool like cp can then be used.
-
-* To run in gdb, apache has to be compiled with -lpthread, since libglusterfsclient is multithreaded.
- If not on Linux gdb runs into errors like:
- "Error while reading shared library symbols:
- Cannot find new threads: generic error"
-
-* when used with ib-verbs transport, ib_verbs initialization fails.
- reason for this is that apache runs as non-privileged user and the amount of memory that can be
- locked by default is not sufficient for ib-verbs. to fix this, as root run,
-
- # ulimit -l unlimited
-
- and then start apache.
-
-TODO
-====
-* directory listing for the directories accessed through mod_glusterfs.
diff --git a/mod_glusterfs/apache/1.3/src/mod_glusterfs.c b/mod_glusterfs/apache/1.3/src/mod_glusterfs.c
deleted file mode 100644
index c1380a4fda6..00000000000
--- a/mod_glusterfs/apache/1.3/src/mod_glusterfs.c
+++ /dev/null
@@ -1,507 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef CORE_PRIVATE
-#define CORE_PRIVATE
-#endif
-
-#include <httpd.h>
-#include <http_config.h>
-#include <http_core.h>
-#include <http_request.h>
-#include <http_protocol.h>
-#include <http_log.h>
-#include <http_main.h>
-#include <util_script.h>
-#include <libglusterfsclient.h>
-#include <sys/uio.h>
-#include <pthread.h>
-
-#define GLUSTERFS_INVALID_LOGLEVEL "mod_glusterfs: Unrecognized log-level "\
- "\"%s\", possible values are \"DEBUG|"\
- "WARNING|ERROR|CRITICAL|NONE\"\n"
-
-#define GLUSTERFS_HANDLER "glusterfs-handler"
-#define GLUSTERFS_CHUNK_SIZE 131072
-
-module MODULE_VAR_EXPORT glusterfs_module;
-
-/*TODO: verify error returns to server core */
-
-typedef struct glusterfs_dir_config {
- char *logfile;
- char *loglevel;
- char *specfile;
- char *mount_dir;
- char *buf;
- size_t xattr_file_size;
- uint32_t cache_timeout;
-} glusterfs_dir_config_t;
-
-typedef struct glusterfs_async_local {
- int op_ret;
- int op_errno;
- char async_read_complete;
- off_t length;
- off_t read_bytes;
- glusterfs_iobuf_t *buf;
- request_rec *request;
- pthread_mutex_t lock;
- pthread_cond_t cond;
-}glusterfs_async_local_t;
-
-#define GLUSTERFS_CMD_PERMS ACCESS_CONF
-
-static glusterfs_dir_config_t *
-mod_glusterfs_dconfig(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL;
- if (r->per_dir_config != NULL) {
- dir_config = ap_get_module_config (r->per_dir_config,
- &glusterfs_module);
- }
-
- return dir_config;
-}
-
-static
-const char *add_xattr_file_size(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->xattr_file_size = atoi (arg);
- return NULL;
-}
-
-static
-const char *set_cache_timeout(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->cache_timeout = atoi (arg);
- return NULL;
-}
-
-static
-const char *set_loglevel(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- char *error = NULL;
- if (strncasecmp (arg, "DEBUG", strlen ("DEBUG"))
- && strncasecmp (arg, "WARNING", strlen ("WARNING"))
- && strncasecmp (arg, "CRITICAL", strlen ("CRITICAL"))
- && strncasecmp (arg, "NONE", strlen ("NONE"))
- && strncasecmp (arg, "ERROR", strlen ("ERROR")))
- error = GLUSTERFS_INVALID_LOGLEVEL;
- else
- dir_config->loglevel = arg;
-
- return error;
-}
-
-static
-const char *add_logfile(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->logfile = arg;
-
- return NULL;
-}
-
-static
-const char *add_specfile(cmd_parms *cmd, void *dummy, char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
-
- dir_config->specfile = arg;
-
- return NULL;
-}
-
-static void *
-mod_glusterfs_create_dir_config(pool *p, char *dirspec)
-{
- glusterfs_dir_config_t *dir_config = NULL;
-
- dir_config = (glusterfs_dir_config_t *) ap_pcalloc(p,
- sizeof(*dir_config));
-
- dir_config->mount_dir = dirspec;
- dir_config->logfile = dir_config->specfile = (char *)0;
- dir_config->loglevel = "warning";
- dir_config->cache_timeout = 0;
- dir_config->buf = NULL;
-
- return (void *) dir_config;
-}
-
-static void
-mod_glusterfs_child_init(server_rec *s, pool *p)
-{
- void **urls = NULL;
- int n, i;
- core_server_config *mod_core_config = ap_get_module_config (s->module_config,
- &core_module);
- glusterfs_dir_config_t *dir_config = NULL;
- glusterfs_init_params_t params = {0, };
-
- n = mod_core_config->sec_url->nelts;
- urls = (void **)mod_core_config->sec_url->elts;
- for (i = 0; i < n; i++) {
- dir_config = ap_get_module_config (urls[i], &glusterfs_module);
-
- if (dir_config) {
- memset (&params, 0, sizeof (params));
-
- params.logfile = dir_config->logfile;
- params.loglevel = dir_config->loglevel;
- params.lookup_timeout = dir_config->cache_timeout;
- params.stat_timeout = dir_config->cache_timeout;
- params.specfile = dir_config->specfile;
-
- glusterfs_mount (dir_config->mount_dir, &params);
- }
- dir_config = NULL;
- }
-}
-
-static void
-mod_glusterfs_child_exit(server_rec *s, pool *p)
-{
- void **urls = NULL;
- int n, i;
- core_server_config *mod_core_config = NULL;
- glusterfs_dir_config_t *dir_config = NULL;
-
- mod_core_config = ap_get_module_config (s->module_config, &core_module);
- n = mod_core_config->sec_url->nelts;
- urls = (void **)mod_core_config->sec_url->elts;
- for (i = 0; i < n; i++) {
- dir_config = ap_get_module_config (urls[i], &glusterfs_module);
- if (dir_config) {
- glusterfs_umount (dir_config->mount_dir);
- }
- }
-}
-
-static int mod_glusterfs_fixup(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL;
- int access_status;
- int ret;
- char *path = NULL;
-
- dir_config = mod_glusterfs_dconfig(r);
-
- if (dir_config && dir_config->mount_dir
- && !(strncmp (ap_pstrcat (r->pool, dir_config->mount_dir, "/",
- NULL),
- r->uri, strlen (dir_config->mount_dir) + 1)
- && !r->handler))
- r->handler = ap_pstrdup (r->pool, GLUSTERFS_HANDLER);
-
- if (!r->handler || (r->handler && strcmp (r->handler,
- GLUSTERFS_HANDLER)))
- return DECLINED;
-
- path = r->uri;
-
- memset (&r->finfo, 0, sizeof (r->finfo));
-
- dir_config->buf = calloc (1, dir_config->xattr_file_size);
- if (!dir_config->buf) {
- return HTTP_INTERNAL_SERVER_ERROR;
- }
-
- ret = glusterfs_get (path, dir_config->buf,
- dir_config->xattr_file_size, &r->finfo);
-
- if (ret == -1 || r->finfo.st_size > dir_config->xattr_file_size
- || S_ISDIR (r->finfo.st_mode)) {
- free (dir_config->buf);
- dir_config->buf = NULL;
-
- if (ret == -1) {
- int error = HTTP_NOT_FOUND;
- char *emsg = NULL;
- if (r->path_info == NULL) {
- emsg = ap_pstrcat(r->pool, strerror (errno),
- r->filename, NULL);
- }
- else {
- emsg = ap_pstrcat(r->pool, strerror (errno),
- r->filename, r->path_info,
- NULL);
- }
- ap_log_rerror(APLOG_MARK, APLOG_ERR|APLOG_NOERRNO, r,
- "%s", emsg);
- if (errno != ENOENT) {
- error = HTTP_INTERNAL_SERVER_ERROR;
- }
- return error;
- }
- }
-
- if (r->uri && strlen (r->uri) && r->uri[strlen(r->uri) - 1] == '/')
- r->handler = NULL;
-
- r->filename = ap_pstrcat (r->pool, r->filename, r->path_info, NULL);
-
- if ((access_status = ap_find_types(r)) != 0) {
- return DECLINED;
- }
-
- return OK;
-}
-
-
-int
-mod_glusterfs_readv_async_cbk (int32_t op_ret, int32_t op_errno,
- glusterfs_iobuf_t *buf, void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
-
- pthread_mutex_lock (&local->lock);
- {
- local->async_read_complete = 1;
- local->buf = buf;
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- pthread_cond_signal (&local->cond);
- }
- pthread_mutex_unlock (&local->lock);
-
- return 0;
-}
-
-/* use read_async just to avoid memcpy of read buffer in libglusterfsclient */
-static int
-mod_glusterfs_read_async (request_rec *r, glusterfs_file_t fd, off_t offset,
- off_t length)
-{
- glusterfs_async_local_t local;
- off_t end;
- int nbytes;
- int complete;
- pthread_cond_init (&local.cond, NULL);
- pthread_mutex_init (&local.lock, NULL);
-
- memset (&local, 0, sizeof (local));
- local.request = r;
-
- if (length > 0)
- end = offset + length;
-
- do {
- glusterfs_iobuf_t *buf;
- int i;
- if (length > 0) {
- nbytes = end - offset;
- if (nbytes > GLUSTERFS_CHUNK_SIZE)
- nbytes = GLUSTERFS_CHUNK_SIZE;
- } else
- nbytes = GLUSTERFS_CHUNK_SIZE;
-
- glusterfs_read_async(fd,
- nbytes,
- offset,
- mod_glusterfs_readv_async_cbk,
- (void *)&local);
-
- pthread_mutex_lock (&local.lock);
- {
- while (!local.async_read_complete) {
- pthread_cond_wait (&local.cond, &local.lock);
- }
-
- local.async_read_complete = 0;
- buf = local.buf;
-
- if (length < 0)
- complete = (local.op_ret <= 0);
- else {
- local.read_bytes += local.op_ret;
- complete = ((local.read_bytes == length)
- || (local.op_ret < 0));
- }
- }
- pthread_mutex_unlock (&local.lock);
-
- for (i = 0; i < buf->count; i++) {
- if (ap_rwrite (buf->vector[i].iov_base,
- buf->vector[i].iov_len, r) < 0) {
- local.op_ret = -1;
- complete = 1;
- break;
- }
- }
-
- glusterfs_free (buf);
-
- offset += nbytes;
- } while (!complete);
-
- return (local.op_ret < 0 ? SERVER_ERROR : OK);
-}
-
-static int
-mod_glusterfs_handler(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config;
- char *path = NULL;
- int error = OK;
- int rangestatus = 0;
- int errstatus = OK;
- glusterfs_file_t fd;
-
- if (!r->handler || (r->handler && strcmp (r->handler,
- GLUSTERFS_HANDLER)))
- return DECLINED;
-
- if (r->uri[0] == '\0' || r->uri[strlen(r->uri) - 1] == '/') {
- return DECLINED;
- }
-
- dir_config = mod_glusterfs_dconfig (r);
-
- if (r->method_number != M_GET) {
- return METHOD_NOT_ALLOWED;
- }
-
- ap_update_mtime(r, r->finfo.st_mtime);
- ap_set_last_modified(r);
- ap_set_etag(r);
- ap_table_setn(r->headers_out, "Accept-Ranges", "bytes");
- if (((errstatus = ap_meets_conditions(r)) != OK)
- || (errstatus = ap_set_content_length(r, r->finfo.st_size))) {
- return errstatus;
- }
- rangestatus = ap_set_byterange(r);
- ap_send_http_header(r);
-
- if (r->finfo.st_size <= dir_config->xattr_file_size && dir_config->buf) {
- if (!r->header_only) {
- error = OK;
- ap_log_rerror (APLOG_MARK, APLOG_NOTICE, r,
- "fetching data from glusterfs through "
- "xattr interface\n");
-
- if (!rangestatus) {
- if (ap_rwrite (dir_config->buf,
- r->finfo.st_size, r) < 0) {
- error = HTTP_INTERNAL_SERVER_ERROR;
- }
- } else {
- long offset, length;
- while (ap_each_byterange (r, &offset, &length)) {
- if (ap_rwrite (dir_config->buf + offset,
- length, r) < 0) {
- error = HTTP_INTERNAL_SERVER_ERROR;
- break;
- }
- }
- }
- }
-
- free (dir_config->buf);
- dir_config->buf = NULL;
-
- return error;
- }
-
- path = r->uri;
- fd = glusterfs_open (path , O_RDONLY, 0);
-
- if (fd == 0) {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, r,
- "file permissions deny server access: %s",
- r->filename);
- return FORBIDDEN;
- }
-
- if (!r->header_only) {
- if (!rangestatus) {
- mod_glusterfs_read_async (r, fd, 0, -1);
- } else {
- long offset, length;
- while (ap_each_byterange(r, &offset, &length)) {
- mod_glusterfs_read_async (r, fd, offset, length);
- }
- }
- }
-
- glusterfs_close (fd);
- return error;
-}
-
-static const command_rec mod_glusterfs_cmds[] =
-{
- {"GlusterfsLogfile", add_logfile, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Glusterfs Logfile"},
- {"GlusterfsLoglevel", set_loglevel, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Glusterfs Loglevel:anyone of none, critical, error, warning, debug"},
- {"GlusterfsCacheTimeout", set_cache_timeout, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Timeout value in seconds for caching lookups and stats"},
- {"GlusterfsVolumeSpecfile", add_specfile, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Glusterfs Specfile required to access contents of this directory"},
- {"GlusterfsXattrFileSize", add_xattr_file_size, NULL,
- GLUSTERFS_CMD_PERMS, TAKE1,
- "Maximum size of the file to be fetched using xattr interface of "
- "glusterfs"},
- {NULL}
-};
-
-static const handler_rec mod_glusterfs_handlers[] =
-{
- {GLUSTERFS_HANDLER, mod_glusterfs_handler},
- {NULL}
-};
-
-module glusterfs_module =
-{
- STANDARD_MODULE_STUFF,
- NULL,
- mod_glusterfs_create_dir_config, /* per-directory config creator */
- NULL,
- NULL, /* server config creator */
- NULL, /* server config merger */
- mod_glusterfs_cmds, /* command table */
- mod_glusterfs_handlers, /* [7] list of handlers */
- NULL, /* [2] filename-to-URI translation */
- NULL, /* [5] check/validate user_id */
- NULL, /* [6] check user_id is valid *here* */
- NULL, /* [4] check access by host address */
- NULL, /* [7] MIME type checker/setter */
- mod_glusterfs_fixup, /* [8] fixups */
- NULL, /* [10] logger */
-#if MODULE_MAGIC_NUMBER >= 19970103
- NULL, /* [3] header parser */
-#endif
-#if MODULE_MAGIC_NUMBER >= 19970719
- mod_glusterfs_child_init, /* process initializer */
-#endif
-#if MODULE_MAGIC_NUMBER >= 19970728
- mod_glusterfs_child_exit, /* process exit/cleanup */
-#endif
-#if MODULE_MAGIC_NUMBER >= 19970902
- NULL /* [1] post read_request handling */
-#endif
-};
diff --git a/mod_glusterfs/apache/2.2/src/Makefile.am b/mod_glusterfs/apache/2.2/src/Makefile.am
deleted file mode 100644
index 1e8f3a31ea0..00000000000
--- a/mod_glusterfs/apache/2.2/src/Makefile.am
+++ /dev/null
@@ -1,31 +0,0 @@
-mod_glusterfs_PROGRAMS = mod_glusterfs.so
-mod_glusterfsdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/apache/2.2
-
-mod_glusterfs_so_SOURCES = mod_glusterfs.c
-
-all: mod_glusterfs.so
-
-mod_glusterfs.so: $(top_srcdir)/mod_glusterfs/apache/2.2/src/mod_glusterfs.c $(top_builddir)/libglusterfsclient/src/libglusterfsclient.la
- ln -sf $(top_srcdir)/mod_glusterfs/apache/2.2/src/mod_glusterfs.c $(top_builddir)/mod_glusterfs/apache/2.2/src/mod_glusterfs-build.c
- $(APXS) -c -o mod_glusterfs.la -Wc,-g3 -Wc,-O0 -DLINUX=2 -D_REENTRANT -D_GNU_SOURCE -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 -I$(top_srcdir)/libglusterfsclient/src -L$(top_builddir)/libglusterfsclient/src/.libs/ -lglusterfsclient mod_glusterfs-build.c
- -ln -sf .libs/mod_glusterfs.so mod_glusterfs.so
-
-$(top_builddir)/libglusterfsclient/src/libglusterfsclient.la:
- $(MAKE) -C $(top_builddir)/libglusterfsclient/src/ all
-
-install-data-local:
- @echo ""
- @echo ""
- @echo "**********************************************************************************"
- @echo "* TO INSTALL MODGLUSTERFS, PLEASE USE, "
- @echo "* $(APXS) -n glusterfs -ia $(mod_glusterfsdir)/mod_glusterfs.so "
- @echo "**********************************************************************************"
- @echo ""
- @echo ""
-
-#install:
-# cp -fv mod_glusterfs.so $(HTTPD_LIBEXECDIR)
-# cp -fv httpd.conf $(HTTPD_CONF_DIR)
-
-clean:
- rm -fv *.so *.o
diff --git a/mod_glusterfs/apache/2.2/src/README.txt b/mod_glusterfs/apache/2.2/src/README.txt
deleted file mode 100644
index 214a2535b5a..00000000000
--- a/mod_glusterfs/apache/2.2/src/README.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-What is mod_glusterfs?
-======================
-* mod_glusterfs is a module for apache written for efficient serving of files from glusterfs.
- mod_glusterfs interfaces with glusterfs using apis provided by libglusterfsclient.
-
-* this README speaks about installing mod_glusterfs for httpd-2.2 and higher.
-
-Prerequisites for mod_glusterfs
-===============================
-Though mod_glusterfs has been written as a module, with an intent of making no changes to
-the way apache has been built, currently following points have to be taken care of:
-
-* since glusterfs is compiled with _FILE_OFFSET_BITS=64 and __USE_FILE_OFFSET64 flags, mod_glusterfs and apache
- in turn have to be compiled with the above two flags.
-
- $ tar xzf httpd-2.2.10.tar.gz
- $ cd httpd-2.2.10/
- $ export CFLAGS='-D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64'
- $ ./configure --prefix=/usr
- $ make
- $ make install
- $ httpd -l | grep -i mod_so
- mod_so.c
-
-* if multiple apache installations are present, make sure to pass --with-apxs=/path/to/apxs/of/proper/version
- to configure script while building glusterfs.
-
-Build/Install mod_glusterfs
-===========================
-* mod_glusterfs is provided with glusterfs--mainline--3.0 and all releases from the same branch.
-
-* building glusterfs also builds mod_glusterfs. But 'make install' of glusterfs installs mod_glusterfs.so to
- glusterfs install directory instead of the apache modules directory.
-
-* 'make install' of glusterfs will print a message similar to the one given below, which is self explanatory.
- Make sure to use apxs of proper apache version in case of multiple apache installations. This will copy
- mod_glusterfs.so to modules directory of proper apache version and modify the appropriate httpd.conf to enable
- mod_glusterfs.
-
-**********************************************************************************
-* TO INSTALL MODGLUSTERFS, PLEASE USE,
-* apxs -n glusterfs -ia /usr/lib/glusterfs/2.0.0rc4/apache/2.2/mod_glusterfs.so
-**********************************************************************************
-
-Configuration
-=============
-* Following configuration has to be added to httpd.conf.
-
- <Location "/glusterfs">
- GlusterfsLogfile "/var/log/glusterfs/glusterfs.log"
- GlusterfsLoglevel "warning"
- GlusterfsVolumeSpecfile "/etc/glusterfs/glusterfs-client.spec"
- GlusterfsCacheTimeout "600"
- GlusterfsXattrFileSize "65536"
- SetHandler "glusterfs-handler"
- </Location>
-
-* GlusterfsVolumeSpecfile (COMPULSORY)
- Path to the the glusterfs volume specification file.
-
-* GlusterfsLogfile (COMPULSORY)
- Path to the glusterfs logfile.
-
-* GlusterfsLoglevel (OPTIONAL, default = warning)
- Severity of messages that are to be logged. Allowed values are critical, error, warning, debug, none
- in the decreasing order of severity.
-
-* GlusterfsCacheTimeOut (OPTIONAL, default = 0)
- Timeout values for glusterfs stat and lookup cache.
-
-* GlusterfsXattrFileSize (OPTIONAL, default = 0)
- Files with sizes upto and including this value are fetched through the extended attribute interface of
- glusterfs rather than the usual open-read-close set of operations. For files of small sizes, it is recommended
- to use extended attribute interface.
-
-* With the above configuration all the requests to httpd of the form www.example.org/glusterfs/path/to/file are
- served from glusterfs.
-
-* mod_glusterfs also implements mod_dir and mod_autoindex behaviour for files under glusterfs mount.
- Hence it also takes the directives related to both of these modules. For more details, refer the
- documentation for both of these modules.
-
-Miscellaneous points
-====================
-* httpd by default runs with username "nobody" and group "nogroup". Permissions of logfile and specfile have to
- be set suitably.
-
-* Since mod_glusterfs runs with permissions of nobody.nogroup, glusterfs has to use only login based
- authentication. See docs/authentication.txt for more details.
-
-* To copy the data served by httpd into glusterfs mountpoint, glusterfs can be started with the
- volume-specification file provided to mod_glusterfs. Any tool like cp can then be used.
-
-* To run in gdb, apache has to be compiled with -lpthread, since libglusterfsclient is
- multithreaded. If not on Linux gdb runs into errors like:
- "Error while reading shared library symbols:
- Cannot find new threads: generic error"
-
-* when used with ib-verbs transport, ib_verbs initialization fails.
- reason for this is that apache runs as non-privileged user and the amount of memory that can be
- locked by default is not sufficient for ib-verbs. to fix this, as root run,
-
- # ulimit -l unlimited
-
- and then start apache.
diff --git a/mod_glusterfs/apache/2.2/src/mod_glusterfs.c b/mod_glusterfs/apache/2.2/src/mod_glusterfs.c
deleted file mode 100644
index d2b9f323266..00000000000
--- a/mod_glusterfs/apache/2.2/src/mod_glusterfs.c
+++ /dev/null
@@ -1,3627 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef CORE_PRIVATE
-#define CORE_PRIVATE
-#endif
-
-#ifndef NO_CONTENT_TYPE
-#define NO_CONTENT_TYPE "none"
-#endif
-
-#define BYTERANGE_FMT "%" APR_OFF_T_FMT "-%" APR_OFF_T_FMT "/%" APR_OFF_T_FMT
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#include <httpd.h>
-#include <http_config.h>
-#include <http_core.h>
-#include <http_request.h>
-#include <http_protocol.h>
-#include <http_log.h>
-#include <http_main.h>
-#include <util_script.h>
-#include <util_filter.h>
-#include <libglusterfsclient.h>
-#include <sys/uio.h>
-#include <pthread.h>
-#include <apr.h>
-#include <apr_strings.h>
-#include <apr_buckets.h>
-#include <apr_fnmatch.h>
-#include <apr_lib.h>
-
-#define GLUSTERFS_INVALID_LOGLEVEL "mod_glfs: Unrecognized log-level \"%s\", "\
- " possible values are \"DEBUG|WARNING|"\
- "ERROR|CRITICAL|NONE\"\n"
-
-#define GLUSTERFS_HANDLER "glusterfs-handler"
-#define GLUSTERFS_CHUNK_SIZE 131072
-
-static char c_by_encoding, c_by_type, c_by_path;
-
-#define BY_ENCODING &c_by_encoding
-#define BY_TYPE &c_by_type
-#define BY_PATH &c_by_path
-
-module AP_MODULE_DECLARE_DATA glusterfs_module;
-extern module core_module;
-
-#define NO_OPTIONS (1 << 0) /* Indexing options */
-#define ICONS_ARE_LINKS (1 << 1)
-#define SCAN_HTML_TITLES (1 << 2)
-#define SUPPRESS_ICON (1 << 3)
-#define SUPPRESS_LAST_MOD (1 << 4)
-#define SUPPRESS_SIZE (1 << 5)
-#define SUPPRESS_DESC (1 << 6)
-#define SUPPRESS_PREAMBLE (1 << 7)
-#define SUPPRESS_COLSORT (1 << 8)
-#define SUPPRESS_RULES (1 << 9)
-#define FOLDERS_FIRST (1 << 10)
-#define VERSION_SORT (1 << 11)
-#define TRACK_MODIFIED (1 << 12)
-#define FANCY_INDEXING (1 << 13)
-#define TABLE_INDEXING (1 << 14)
-#define IGNORE_CLIENT (1 << 15)
-#define IGNORE_CASE (1 << 16)
-#define EMIT_XHTML (1 << 17)
-#define SHOW_FORBIDDEN (1 << 18)
-
-#define K_NOADJUST 0
-#define K_ADJUST 1
-#define K_UNSET 2
-
-/*
- * Define keys for sorting.
- */
-#define K_NAME 'N' /* Sort by file name (default) */
-#define K_LAST_MOD 'M' /* Last modification date */
-#define K_SIZE 'S' /* Size (absolute, not as displayed) */
-#define K_DESC 'D' /* Description */
-#define K_VALID "NMSD" /* String containing _all_ valid K_ opts */
-
-#define D_ASCENDING 'A'
-#define D_DESCENDING 'D'
-#define D_VALID "AD" /* String containing _all_ valid D_ opts */
-
-/*
- * These are the dimensions of the default icons supplied with Apache.
- */
-#define DEFAULT_ICON_WIDTH 20
-#define DEFAULT_ICON_HEIGHT 22
-
-/*
- * Other default dimensions.
- */
-#define DEFAULT_NAME_WIDTH 23
-#define DEFAULT_DESC_WIDTH 23
-
-struct mod_glfs_ai_item {
- char *type;
- char *apply_to;
- char *apply_path;
- char *data;
-};
-
-typedef struct mod_glfs_ai_desc_t {
- char *pattern;
- char *description;
- int full_path;
- int wildcards;
-} mod_glfs_ai_desc_t;
-
-typedef enum {
- SLASH_OFF = 0,
- SLASH_ON,
- SLASH_UNSET
-} mod_glfs_dir_slash_cfg;
-
-/* static ap_filter_rec_t *mod_glfs_output_filter_handle; */
-
-/*TODO: verify error returns to server core */
-
-typedef struct glusterfs_dir_config {
- char *logfile;
- char *loglevel;
- char *specfile;
- char *mount_dir;
- char *buf;
-
- size_t xattr_file_size;
- uint32_t cache_timeout;
-
- /* mod_dir options */
- apr_array_header_t *index_names;
- mod_glfs_dir_slash_cfg do_slash;
-
- /* autoindex options */
- char *default_icon;
- char *style_sheet;
- apr_int32_t opts;
- apr_int32_t incremented_opts;
- apr_int32_t decremented_opts;
- int name_width;
- int name_adjust;
- int desc_width;
- int desc_adjust;
- int icon_width;
- int icon_height;
- char default_keyid;
- char default_direction;
-
- apr_array_header_t *icon_list;
- apr_array_header_t *alt_list;
- apr_array_header_t *desc_list;
- apr_array_header_t *ign_list;
- apr_array_header_t *hdr_list;
- apr_array_header_t *rdme_list;
-
- char *ctype;
- char *charset;
-} glusterfs_dir_config_t;
-
-typedef struct glusterfs_async_local {
- int op_ret;
- int op_errno;
- char async_read_complete;
- off_t length;
- off_t read_bytes;
- glusterfs_iobuf_t *buf;
- request_rec *request;
- pthread_mutex_t lock;
- pthread_cond_t cond;
-}glusterfs_async_local_t;
-
-#define GLUSTERFS_CMD_PERMS ACCESS_CONF
-
-
-static glusterfs_dir_config_t *
-mod_glfs_dconfig (request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL;
- if (r->per_dir_config != NULL) {
- dir_config = ap_get_module_config (r->per_dir_config,
- &glusterfs_module);
- }
-
- return dir_config;
-}
-
-
-static const char *
-cmd_add_xattr_file_size (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->xattr_file_size = atoi (arg);
- return NULL;
-}
-
-
-static const char *
-cmd_set_cache_timeout (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->cache_timeout = atoi (arg);
- return NULL;
-}
-
-
-static const char *
-cmd_set_loglevel (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- char *error = NULL;
- if (strncasecmp (arg, "DEBUG", strlen ("DEBUG"))
- && strncasecmp (arg, "WARNING", strlen ("WARNING"))
- && strncasecmp (arg, "CRITICAL", strlen ("CRITICAL"))
- && strncasecmp (arg, "NONE", strlen ("NONE"))
- && strncasecmp (arg, "ERROR", strlen ("ERROR")))
- error = GLUSTERFS_INVALID_LOGLEVEL;
- else
- dir_config->loglevel = apr_pstrdup (cmd->pool, arg);
-
- return error;
-}
-
-static const char *
-cmd_add_logfile (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
- dir_config->logfile = apr_pstrdup (cmd->pool, arg);
-
- return NULL;
-}
-
-
-static const char *
-cmd_add_volume_specfile (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *dir_config = dummy;
-
- dir_config->specfile = apr_pstrdup (cmd->pool, arg);
-
- return NULL;
-}
-
-#define WILDCARDS_REQUIRED 0
-
-static const char *
-cmd_add_desc (cmd_parms *cmd, void *d, const char *desc,
- const char *to)
-{
- glusterfs_dir_config_t *dcfg = NULL;
- mod_glfs_ai_desc_t *desc_entry = NULL;
- char *prefix = "";
-
- dcfg = (glusterfs_dir_config_t *) d;
- desc_entry = (mod_glfs_ai_desc_t *) apr_array_push(dcfg->desc_list);
- desc_entry->full_path = (ap_strchr_c(to, '/') == NULL) ? 0 : 1;
- desc_entry->wildcards = (WILDCARDS_REQUIRED
- || desc_entry->full_path
- || apr_fnmatch_test(to));
- if (desc_entry->wildcards) {
- prefix = desc_entry->full_path ? "*/" : "*";
- desc_entry->pattern = apr_pstrcat(dcfg->desc_list->pool,
- prefix, to, "*", NULL);
- }
- else {
- desc_entry->pattern = apr_pstrdup(dcfg->desc_list->pool, to);
- }
- desc_entry->description = apr_pstrdup(dcfg->desc_list->pool, desc);
- return NULL;
-}
-
-
-static void push_item(apr_array_header_t *arr, char *type, const char *to,
- const char *path, const char *data)
-{
- struct mod_glfs_ai_item *p = NULL;
-
- p = (struct mod_glfs_ai_item *) apr_array_push(arr);
-
- if (!to) {
- to = "";
- }
- if (!path) {
- path = "";
- }
-
- p->type = type;
- p->data = data ? apr_pstrdup(arr->pool, data) : NULL;
- p->apply_path = apr_pstrcat(arr->pool, path, "*", NULL);
-
- if ((type == BY_PATH) && (!ap_is_matchexp(to))) {
- p->apply_to = apr_pstrcat(arr->pool, "*", to, NULL);
- }
- else if (to) {
- p->apply_to = apr_pstrdup(arr->pool, to);
- }
- else {
- p->apply_to = NULL;
- }
-}
-
-
-static const char *
-cmd_add_ignore (cmd_parms *cmd, void *d, const char *ext)
-{
- push_item(((glusterfs_dir_config_t *) d)->ign_list, 0, ext, cmd->path,
- NULL);
- return NULL;
-}
-
-
-static const char *
-cmd_add_header (cmd_parms *cmd, void *d, const char *name)
-{
- push_item(((glusterfs_dir_config_t *) d)->hdr_list, 0, NULL, cmd->path,
- name);
- return NULL;
-}
-
-
-static const char *
-cmd_add_readme (cmd_parms *cmd, void *d, const char *name)
-{
- push_item(((glusterfs_dir_config_t *) d)->rdme_list, 0, NULL, cmd->path,
- name);
- return NULL;
-}
-
-
-static const char *
-cmd_add_opts (cmd_parms *cmd, void *d, int argc, char *const argv[])
-{
- int i = 0, option = 0;
- char *w = NULL;
- apr_int32_t opts;
- apr_int32_t opts_add;
- apr_int32_t opts_remove;
- char action = 0;
- glusterfs_dir_config_t *d_cfg = (glusterfs_dir_config_t *) d;
-
- opts = d_cfg->opts;
- opts_add = d_cfg->incremented_opts;
- opts_remove = d_cfg->decremented_opts;
-
- for (i = 0; i < argc; i++) {
- w = argv[i];
-
- if ((*w == '+') || (*w == '-')) {
- action = *(w++);
- }
- else {
- action = '\0';
- }
- if (!strcasecmp(w, "FancyIndexing")) {
- option = FANCY_INDEXING;
- }
- else if (!strcasecmp(w, "FoldersFirst")) {
- option = FOLDERS_FIRST;
- }
- else if (!strcasecmp(w, "HTMLTable")) {
- option = TABLE_INDEXING;
- }
- else if (!strcasecmp(w, "IconsAreLinks")) {
- option = ICONS_ARE_LINKS;
- }
- else if (!strcasecmp(w, "IgnoreCase")) {
- option = IGNORE_CASE;
- }
- else if (!strcasecmp(w, "IgnoreClient")) {
- option = IGNORE_CLIENT;
- }
- else if (!strcasecmp(w, "ScanHTMLTitles")) {
- option = SCAN_HTML_TITLES;
- }
- else if (!strcasecmp(w, "SuppressColumnSorting")) {
- option = SUPPRESS_COLSORT;
- }
- else if (!strcasecmp(w, "SuppressDescription")) {
- option = SUPPRESS_DESC;
- }
- else if (!strcasecmp(w, "SuppressHTMLPreamble")) {
- option = SUPPRESS_PREAMBLE;
- }
- else if (!strcasecmp(w, "SuppressIcon")) {
- option = SUPPRESS_ICON;
- }
- else if (!strcasecmp(w, "SuppressLastModified")) {
- option = SUPPRESS_LAST_MOD;
- }
- else if (!strcasecmp(w, "SuppressSize")) {
- option = SUPPRESS_SIZE;
- }
- else if (!strcasecmp(w, "SuppressRules")) {
- option = SUPPRESS_RULES;
- }
- else if (!strcasecmp(w, "TrackModified")) {
- option = TRACK_MODIFIED;
- }
- else if (!strcasecmp(w, "VersionSort")) {
- option = VERSION_SORT;
- }
- else if (!strcasecmp(w, "XHTML")) {
- option = EMIT_XHTML;
- }
- else if (!strcasecmp(w, "ShowForbidden")) {
- option = SHOW_FORBIDDEN;
- }
- else if (!strcasecmp(w, "None")) {
- if (action != '\0') {
- return "Cannot combine '+' or '-' with 'None' "
- "keyword";
- }
- opts = NO_OPTIONS;
- opts_add = 0;
- opts_remove = 0;
- }
- else if (!strcasecmp(w, "IconWidth")) {
- if (action != '-') {
- d_cfg->icon_width = DEFAULT_ICON_WIDTH;
- }
- else {
- d_cfg->icon_width = 0;
- }
- }
- else if (!strncasecmp(w, "IconWidth=", 10)) {
- if (action == '-') {
- return "Cannot combine '-' with IconWidth=n";
- }
- d_cfg->icon_width = atoi(&w[10]);
- }
- else if (!strcasecmp(w, "IconHeight")) {
- if (action != '-') {
- d_cfg->icon_height = DEFAULT_ICON_HEIGHT;
- }
- else {
- d_cfg->icon_height = 0;
- }
- }
- else if (!strncasecmp(w, "IconHeight=", 11)) {
- if (action == '-') {
- return "Cannot combine '-' with IconHeight=n";
- }
- d_cfg->icon_height = atoi(&w[11]);
- }
- else if (!strcasecmp(w, "NameWidth")) {
- if (action != '-') {
- return "NameWidth with no value may only appear"
- " as "
- "'-NameWidth'";
- }
- d_cfg->name_width = DEFAULT_NAME_WIDTH;
- d_cfg->name_adjust = K_NOADJUST;
- }
- else if (!strncasecmp(w, "NameWidth=", 10)) {
- if (action == '-') {
- return "Cannot combine '-' with NameWidth=n";
- }
- if (w[10] == '*') {
- d_cfg->name_adjust = K_ADJUST;
- }
- else {
- int width = atoi(&w[10]);
-
- if (width && (width < 5)) {
- return "NameWidth value must be greater"
- " than 5";
- }
- d_cfg->name_width = width;
- d_cfg->name_adjust = K_NOADJUST;
- }
- }
- else if (!strcasecmp(w, "DescriptionWidth")) {
- if (action != '-') {
- return "DescriptionWidth with no value may only"
- " appear as "
- "'-DescriptionWidth'";
- }
- d_cfg->desc_width = DEFAULT_DESC_WIDTH;
- d_cfg->desc_adjust = K_NOADJUST;
- }
- else if (!strncasecmp(w, "DescriptionWidth=", 17)) {
- if (action == '-') {
- return "Cannot combine '-' with "
- "DescriptionWidth=n";
- }
- if (w[17] == '*') {
- d_cfg->desc_adjust = K_ADJUST;
- }
- else {
- int width = atoi(&w[17]);
-
- if (width && (width < 12)) {
- return "DescriptionWidth value must be "
- "greater than 12";
- }
- d_cfg->desc_width = width;
- d_cfg->desc_adjust = K_NOADJUST;
- }
- }
- else if (!strncasecmp(w, "Type=", 5)) {
- d_cfg->ctype = apr_pstrdup(cmd->pool, &w[5]);
- }
- else if (!strncasecmp(w, "Charset=", 8)) {
- d_cfg->charset = apr_pstrdup(cmd->pool, &w[8]);
- }
- else {
- return "Invalid directory indexing option";
- }
- if (action == '\0') {
- opts |= option;
- opts_add = 0;
- opts_remove = 0;
- }
- else if (action == '+') {
- opts_add |= option;
- opts_remove &= ~option;
- }
- else {
- opts_remove |= option;
- opts_add &= ~option;
- }
- }
- if ((opts & NO_OPTIONS) && (opts & ~NO_OPTIONS)) {
- return "Cannot combine other IndexOptions keywords with 'None'";
- }
- d_cfg->incremented_opts = opts_add;
- d_cfg->decremented_opts = opts_remove;
- d_cfg->opts = opts;
- return NULL;
-}
-
-
-static const char *
-cmd_set_default_order(cmd_parms *cmd, void *m,
- const char *direction, const char *key)
-{
- glusterfs_dir_config_t *d_cfg = (glusterfs_dir_config_t *) m;
-
- if (!strcasecmp(direction, "Ascending")) {
- d_cfg->default_direction = D_ASCENDING;
- }
- else if (!strcasecmp(direction, "Descending")) {
- d_cfg->default_direction = D_DESCENDING;
- }
- else {
- return "First keyword must be 'Ascending' or 'Descending'";
- }
-
- if (!strcasecmp(key, "Name")) {
- d_cfg->default_keyid = K_NAME;
- }
- else if (!strcasecmp(key, "Date")) {
- d_cfg->default_keyid = K_LAST_MOD;
- }
- else if (!strcasecmp(key, "Size")) {
- d_cfg->default_keyid = K_SIZE;
- }
- else if (!strcasecmp(key, "Description")) {
- d_cfg->default_keyid = K_DESC;
- }
- else {
- return "Second keyword must be 'Name', 'Date', 'Size', or "
- "'Description'";
- }
-
- return NULL;
-}
-
-
-static char c_by_encoding, c_by_type, c_by_path;
-
-#define BY_ENCODING &c_by_encoding
-#define BY_TYPE &c_by_type
-#define BY_PATH &c_by_path
-
-/*
- * This routine puts the standard HTML header at the top of the index page.
- * We include the DOCTYPE because we may be using features therefrom (i.e.,
- * HEIGHT and WIDTH attributes on the icons if we're FancyIndexing).
- */
-static void emit_preamble(request_rec *r, int xhtml, const char *title)
-{
- glusterfs_dir_config_t *d;
-
- d = (glusterfs_dir_config_t *) ap_get_module_config(r->per_dir_config,
- &glusterfs_module);
-
- if (xhtml) {
- ap_rvputs(r, DOCTYPE_XHTML_1_0T,
- "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n"
- " <head>\n <title>Index of ", title,
- "</title>\n", NULL);
- } else {
- ap_rvputs(r, DOCTYPE_HTML_3_2,
- "<html>\n <head>\n"
- " <title>Index of ", title,
- "</title>\n", NULL);
- }
-
- if (d->style_sheet != NULL) {
- ap_rvputs(r, " <link rel=\"stylesheet\" href=\"",
- d->style_sheet, "\" type=\"text/css\"",
- xhtml ? " />\n" : ">\n", NULL);
- }
- ap_rvputs(r, " </head>\n <body>\n", NULL);
-}
-
-
-static const char *cmd_add_alt(cmd_parms *cmd, void *d, const char *alt,
- const char *to)
-{
- if (cmd->info == BY_PATH) {
- if (!strcmp(to, "**DIRECTORY**")) {
- to = "^^DIRECTORY^^";
- }
- }
- if (cmd->info == BY_ENCODING) {
- char *tmp = apr_pstrdup(cmd->pool, to);
- ap_str_tolower(tmp);
- to = tmp;
- }
-
- push_item(((glusterfs_dir_config_t *) d)->alt_list, cmd->info, to,
- cmd->path, alt);
- return NULL;
-}
-
-static const char *cmd_add_icon(cmd_parms *cmd, void *d, const char *icon,
- const char *to)
-{
- char *iconbak = apr_pstrdup(cmd->pool, icon);
- char *alt = NULL, *cl = NULL, *tmp = NULL;
-
- if (icon[0] == '(') {
- cl = strchr(iconbak, ')');
-
- if (cl == NULL) {
- return "missing closing paren";
- }
- alt = ap_getword_nc(cmd->pool, &iconbak, ',');
- *cl = '\0'; /* Lose closing paren */
- cmd_add_alt(cmd, d, &alt[1], to);
- }
- if (cmd->info == BY_PATH) {
- if (!strcmp(to, "**DIRECTORY**")) {
- to = "^^DIRECTORY^^";
- }
- }
- if (cmd->info == BY_ENCODING) {
- tmp = apr_pstrdup(cmd->pool, to);
- ap_str_tolower(tmp);
- to = tmp;
- }
-
- push_item(((glusterfs_dir_config_t *) d)->icon_list, cmd->info, to,
- cmd->path, iconbak);
- return NULL;
-}
-
-
-static void *
-mod_glfs_create_dir_config(apr_pool_t *p, char *dirspec)
-{
- glusterfs_dir_config_t *dir_config = NULL;
-
- dir_config = (glusterfs_dir_config_t *) apr_pcalloc(p,
- sizeof(*dir_config));
-
- dir_config->mount_dir = dirspec;
- dir_config->logfile = dir_config->specfile = (char *)0;
- dir_config->loglevel = "warning";
- dir_config->cache_timeout = 0;
- dir_config->buf = NULL;
-
- /* mod_dir options init */
- dir_config->index_names = NULL;
- dir_config->do_slash = SLASH_UNSET;
-
- /* autoindex options init */
- dir_config->icon_width = 0;
- dir_config->icon_height = 0;
- dir_config->name_width = DEFAULT_NAME_WIDTH;
- dir_config->name_adjust = K_UNSET;
- dir_config->desc_width = DEFAULT_DESC_WIDTH;
- dir_config->desc_adjust = K_UNSET;
- dir_config->icon_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->alt_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->desc_list = apr_array_make(p, 4,
- sizeof(mod_glfs_ai_desc_t));
- dir_config->ign_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->hdr_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->rdme_list = apr_array_make(p, 4,
- sizeof(struct mod_glfs_ai_item));
- dir_config->opts = 0;
- dir_config->incremented_opts = 0;
- dir_config->decremented_opts = 0;
- dir_config->default_keyid = '\0';
- dir_config->default_direction = '\0';
-
- return (void *) dir_config;
-}
-
-
-static void *
-mod_glfs_merge_dir_config(apr_pool_t *p, void *parent_conf,
- void *newloc_conf)
-{
- glusterfs_dir_config_t *new = NULL;
- glusterfs_dir_config_t *add = NULL;
- glusterfs_dir_config_t *base = NULL;
-
- new = (glusterfs_dir_config_t *)
- apr_pcalloc(p, sizeof(glusterfs_dir_config_t));
- add = newloc_conf;
- base = parent_conf;
-
- if (add->logfile)
- new->logfile = apr_pstrdup (p, add->logfile);
-
- if (add->loglevel)
- new->loglevel = apr_pstrdup (p, add->loglevel);
-
- if (add->specfile)
- new->specfile = apr_pstrdup (p, add->specfile);
-
- if (add->mount_dir)
- new->mount_dir = apr_pstrdup (p, add->mount_dir);
-
- new->xattr_file_size = add->xattr_file_size;
- new->cache_timeout = add->cache_timeout;
- new->buf = add->buf;
-
- /* mod_dir */
- new->index_names = add->index_names ?
- add->index_names : base->index_names;
- new->do_slash =
- (add->do_slash == SLASH_UNSET) ? base->do_slash : add->do_slash;
-
- /* auto index */
- new->default_icon = add->default_icon ? add->default_icon
- : base->default_icon;
- new->style_sheet = add->style_sheet ? add->style_sheet
- : base->style_sheet;
- new->icon_height = add->icon_height ?
- add->icon_height : base->icon_height;
- new->icon_width = add->icon_width ? add->icon_width : base->icon_width;
-
- new->ctype = add->ctype ? add->ctype : base->ctype;
- new->charset = add->charset ? add->charset : base->charset;
-
- new->alt_list = apr_array_append(p, add->alt_list, base->alt_list);
- new->ign_list = apr_array_append(p, add->ign_list, base->ign_list);
- new->hdr_list = apr_array_append(p, add->hdr_list, base->hdr_list);
- new->desc_list = apr_array_append(p, add->desc_list, base->desc_list);
- new->icon_list = apr_array_append(p, add->icon_list, base->icon_list);
- new->rdme_list = apr_array_append(p, add->rdme_list, base->rdme_list);
- if (add->opts & NO_OPTIONS) {
- /*
- * If the current directory says 'no options' then we also
- * clear any incremental mods from being inheritable further down.
- */
- new->opts = NO_OPTIONS;
- new->incremented_opts = 0;
- new->decremented_opts = 0;
- }
- else {
- /*
- * If there were any nonincremental options selected for
- * this directory, they dominate and we don't inherit *anything.*
- * Contrariwise, we *do* inherit if the only settings here are
- * incremental ones.
- */
- if (add->opts == 0) {
- new->incremented_opts = (base->incremented_opts
- | add->incremented_opts)
- & ~add->decremented_opts;
- new->decremented_opts = (base->decremented_opts
- | add->decremented_opts);
- /*
- * We may have incremental settings, so make sure we
- * don't inadvertently inherit an IndexOptions None
- * from above.
- */
- new->opts = (base->opts & ~NO_OPTIONS);
- }
- else {
- /*
- * There are local nonincremental settings, which clear
- * all inheritance from above. They *are* the new
- * base settings.
- */
- new->opts = add->opts;;
- }
- /*
- * We're guaranteed that there'll be no overlap between
- * the add-options and the remove-options.
- */
- new->opts |= new->incremented_opts;
- new->opts &= ~new->decremented_opts;
- }
- /*
- * Inherit the NameWidth settings if there aren't any specific to
- * the new location; otherwise we'll end up using the defaults set
- * in the config-rec creation routine.
- */
- if (add->name_adjust == K_UNSET) {
- new->name_width = base->name_width;
- new->name_adjust = base->name_adjust;
- }
- else {
- new->name_width = add->name_width;
- new->name_adjust = add->name_adjust;
- }
-
- /*
- * Likewise for DescriptionWidth.
- */
- if (add->desc_adjust == K_UNSET) {
- new->desc_width = base->desc_width;
- new->desc_adjust = base->desc_adjust;
- }
- else {
- new->desc_width = add->desc_width;
- new->desc_adjust = add->desc_adjust;
- }
-
- new->default_keyid = add->default_keyid ? add->default_keyid
- : base->default_keyid;
- new->default_direction = add->default_direction ? add->default_direction
- : base->default_direction;
-
- return (void *) new;
-}
-
-
-static void
-mod_glfs_child_init(apr_pool_t *p, server_rec *s)
-{
- int i = 0, num_sec = 0, ret = 0;
- core_server_config *sconf = NULL;
- ap_conf_vector_t **sec_ent = NULL;
- glusterfs_dir_config_t *dir_config = NULL;
- glusterfs_init_params_t ctx = {0, };
-
- sconf = (core_server_config *) ap_get_module_config (s->module_config,
- &core_module);
- sec_ent = (ap_conf_vector_t **) sconf->sec_url->elts;
- num_sec = sconf->sec_url->nelts;
-
- for (i = 0; i < num_sec; i++) {
- dir_config = ap_get_module_config (sec_ent[i],
- &glusterfs_module);
-
- if (dir_config) {
- memset (&ctx, 0, sizeof (ctx));
-
- ctx.logfile = dir_config->logfile;
- ctx.loglevel = dir_config->loglevel;
- ctx.lookup_timeout = dir_config->cache_timeout;
- ctx.stat_timeout = dir_config->cache_timeout;
- ctx.specfile = dir_config->specfile;
-
- ret = glusterfs_mount (dir_config->mount_dir, &ctx);
- if (ret != 0) {
- ap_log_error(APLOG_MARK, APLOG_ERR,
- APR_EGENERAL, s,
- "mod_glfs_child_init: "
- "glusterfs_init failed, check "
- "glusterfs logfile %s for more "
- "details",
- dir_config->logfile);
- }
- }
- dir_config = NULL;
- }
-}
-
-
-static void
-mod_glfs_child_exit(server_rec *s, apr_pool_t *p)
-{
- int i = 0, num_sec = 0;
- core_server_config *sconf = NULL;
- ap_conf_vector_t **sec_ent = NULL;
- glusterfs_dir_config_t *dir_config = NULL;
- glusterfs_init_params_t ctx = {0, };
-
- sconf = ap_get_module_config(s->module_config,
- &core_module);
- sec_ent = (ap_conf_vector_t **) sconf->sec_url->elts;
- num_sec = sconf->sec_url->nelts;
- for (i = 0; i < num_sec; i++) {
- dir_config = ap_get_module_config (sec_ent[i],
- &glusterfs_module);
- if (dir_config) {
- glusterfs_umount (dir_config->mount_dir);
- }
- }
-}
-
-static apr_filetype_e filetype_from_mode(mode_t mode)
-{
- apr_filetype_e type = APR_NOFILE;
-
- if (S_ISREG(mode))
- type = APR_REG;
- else if (S_ISDIR(mode))
- type = APR_DIR;
- else if (S_ISCHR(mode))
- type = APR_CHR;
- else if (S_ISBLK(mode))
- type = APR_BLK;
- else if (S_ISFIFO(mode))
- type = APR_PIPE;
- else if (S_ISLNK(mode))
- type = APR_LNK;
- else if (S_ISSOCK(mode))
- type = APR_SOCK;
- else
- type = APR_UNKFILE;
- return type;
-}
-
-
-static void fill_out_finfo(apr_finfo_t *finfo, struct stat *info,
- apr_int32_t wanted)
-{
- finfo->valid = APR_FINFO_MIN | APR_FINFO_IDENT | APR_FINFO_NLINK
- | APR_FINFO_OWNER | APR_FINFO_PROT;
- finfo->protection = apr_unix_mode2perms(info->st_mode);
- finfo->filetype = filetype_from_mode(info->st_mode);
- finfo->user = info->st_uid;
- finfo->group = info->st_gid;
- finfo->size = info->st_size;
- finfo->device = info->st_dev;
- finfo->nlink = info->st_nlink;
-
- /* Check for overflow if storing a 64-bit st_ino in a 32-bit
- * apr_ino_t for LFS builds: */
- if (sizeof(apr_ino_t) >= sizeof(info->st_ino)
- || (apr_ino_t)info->st_ino == info->st_ino) {
- finfo->inode = info->st_ino;
- } else {
- finfo->valid &= ~APR_FINFO_INODE;
- }
-
- apr_time_ansi_put(&finfo->atime, info->st_atime);
-#ifdef HAVE_STRUCT_STAT_ST_ATIM_TV_NSEC
- finfo->atime += info->st_atim.tv_nsec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_ATIMENSEC)
- finfo->atime += info->st_atimensec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_ATIME_N)
- finfo->ctime += info->st_atime_n / APR_TIME_C(1000);
-#endif
-
- apr_time_ansi_put(&finfo->mtime, info->st_mtime);
-#ifdef HAVE_STRUCT_STAT_ST_MTIM_TV_NSEC
- finfo->mtime += info->st_mtim.tv_nsec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_MTIMENSEC)
- finfo->mtime += info->st_mtimensec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_MTIME_N)
- finfo->ctime += info->st_mtime_n / APR_TIME_C(1000);
-#endif
-
- apr_time_ansi_put(&finfo->ctime, info->st_ctime);
-#ifdef HAVE_STRUCT_STAT_ST_CTIM_TV_NSEC
- finfo->ctime += info->st_ctim.tv_nsec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_CTIMENSEC)
- finfo->ctime += info->st_ctimensec / APR_TIME_C(1000);
-#elif defined(HAVE_STRUCT_STAT_ST_CTIME_N)
- finfo->ctime += info->st_ctime_n / APR_TIME_C(1000);
-#endif
-
-#ifdef HAVE_STRUCT_STAT_ST_BLOCKS
-#ifdef DEV_BSIZE
- finfo->csize = (apr_off_t)info->st_blocks * (apr_off_t)DEV_BSIZE;
-#else
- finfo->csize = (apr_off_t)info->st_blocks * (apr_off_t)512;
-#endif
- finfo->valid |= APR_FINFO_CSIZE;
-#endif
-}
-
-
-static int
-mod_glfs_map_to_storage(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL, *tmp = NULL;
- int access_status = 0, ret = 0;
- char *path = NULL;
- struct stat st = {0, };
- core_server_config *sconf = NULL;
- ap_conf_vector_t **sec_ent = NULL;
- int num_sec = 0, i = 0;
-
- sconf = (core_server_config *) ap_get_module_config (r->server->module_config,
- &core_module);
- sec_ent = (ap_conf_vector_t **) sconf->sec_url->elts;
- num_sec = sconf->sec_url->nelts;
-
- for (i = 0; i < num_sec; i++) {
- tmp = ap_get_module_config (sec_ent[i], &glusterfs_module);
-
- if (tmp && !strncmp (tmp->mount_dir, r->uri,
- strlen (tmp->mount_dir))) {
- if (!dir_config ||
- strlen (tmp->mount_dir)
- > strlen (dir_config->mount_dir)) {
- dir_config = tmp;
- }
- }
-
- }
-
- if (dir_config && dir_config->mount_dir
- && !(strncmp (apr_pstrcat (r->pool, dir_config->mount_dir, "/",
- NULL), r->uri,
- strlen (dir_config->mount_dir) + 1)
- && !r->handler))
- r->handler = GLUSTERFS_HANDLER;
-
- if (!r->handler || (r->handler && strcmp (r->handler,
- GLUSTERFS_HANDLER)))
- return DECLINED;
-
- path = r->uri;
-
- memset (&r->finfo, 0, sizeof (r->finfo));
-
- dir_config->buf = calloc (1, dir_config->xattr_file_size);
- if (!dir_config->buf) {
- return HTTP_INTERNAL_SERVER_ERROR;
- }
-
- ret = glusterfs_get (path, dir_config->buf,
- dir_config->xattr_file_size, &st);
-
- if (ret == -1 || st.st_size > dir_config->xattr_file_size
- || S_ISDIR (st.st_mode)) {
- free (dir_config->buf);
- dir_config->buf = NULL;
-
- if (ret == -1) {
- int error = HTTP_NOT_FOUND;
- char *emsg = NULL;
- if (r->path_info == NULL) {
- emsg = apr_pstrcat(r->pool, strerror (errno),
- r->filename, NULL);
- }
- else {
- emsg = apr_pstrcat(r->pool, strerror (errno),
- r->filename, r->path_info,
- NULL);
- }
- ap_log_rerror(APLOG_MARK, APLOG_ERR|APLOG_NOERRNO, 0,
- r, "%s", emsg);
- if (errno != ENOENT) {
- error = HTTP_INTERNAL_SERVER_ERROR;
- }
- return error;
- }
- }
-
- r->finfo.pool = r->pool;
- r->finfo.fname = r->filename;
- fill_out_finfo (&r->finfo, &st,
- APR_FINFO_MIN | APR_FINFO_IDENT | APR_FINFO_NLINK |
- APR_FINFO_OWNER | APR_FINFO_PROT);
-
- /* allow core module to run directory_walk() and location_walk() */
- return DECLINED;
-}
-
-
-static int
-mod_glfs_readv_async_cbk (int32_t op_ret, int32_t op_errno,
- glusterfs_iobuf_t *buf, void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
-
- pthread_mutex_lock (&local->lock);
- {
- local->async_read_complete = 1;
- local->buf = buf;
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- pthread_cond_signal (&local->cond);
- }
- pthread_mutex_unlock (&local->lock);
-
- return 0;
-}
-
-/* use read_async just to avoid memcpy of read buffer in libglusterfsclient */
-static int
-mod_glfs_read_async (request_rec *r, apr_bucket_brigade *bb,
- glusterfs_file_t fd,
- apr_off_t offset, apr_off_t length)
-{
- glusterfs_async_local_t local = {0, };
- off_t end = 0;
- int nbytes = 0, complete = 0;
- conn_rec *c = r->connection;
- apr_bucket *e = NULL;
- apr_status_t status = APR_SUCCESS;
- glusterfs_iobuf_t *buf = NULL;
-
- if (length == 0) {
- return 0;
- }
-
- pthread_cond_init (&local.cond, NULL);
- pthread_mutex_init (&local.lock, NULL);
-
- memset (&local, 0, sizeof (local));
- local.request = r;
-
- if (length > 0)
- end = offset + length;
-
- do {
- if (length > 0) {
- nbytes = end - offset;
- if (nbytes > GLUSTERFS_CHUNK_SIZE)
- nbytes = GLUSTERFS_CHUNK_SIZE;
- } else
- nbytes = GLUSTERFS_CHUNK_SIZE;
-
- glusterfs_read_async(fd,
- nbytes,
- offset,
- mod_glfs_readv_async_cbk,
- (void *)&local);
-
- pthread_mutex_lock (&local.lock);
- {
- while (!local.async_read_complete) {
- pthread_cond_wait (&local.cond, &local.lock);
- }
-
- local.async_read_complete = 0;
- buf = local.buf;
-
- if (length < 0)
- complete = (local.op_ret <= 0);
- else {
- local.read_bytes += local.op_ret;
- complete = ((local.read_bytes == length) ||
- (local.op_ret < 0));
- }
- }
- pthread_mutex_unlock (&local.lock);
-
- if (!bb) {
- bb = apr_brigade_create (r->pool, c->bucket_alloc);
- }
- apr_brigade_writev (bb, NULL, NULL, buf->vector, buf->count);
-
- /*
- * make sure all the data is written out, since we call
- * glusterfs_free on buf once ap_pass_brigade returns
- */
- e = apr_bucket_flush_create (c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL (bb, e);
-
- status = ap_pass_brigade (r->output_filters, bb);
- if (status != APR_SUCCESS) {
- /* no way to know what type of error occurred */
- ap_log_rerror(APLOG_MARK, APLOG_DEBUG, status, r,
- "mod_glfs_handler: ap_pass_brigade "
- "returned %i",
- status);
- complete = 1;
- local.op_ret = -1;
- }
-
- glusterfs_free (buf);
-
- /*
- * bb has already been cleaned up by core_output_filter,
- * just being paranoid
- */
- apr_brigade_cleanup (bb);
-
- offset += nbytes;
- } while (!complete);
-
- return (local.op_ret < 0 ? HTTP_INTERNAL_SERVER_ERROR : OK);
-}
-
-static int
-parse_byterange(char *range, apr_off_t clength,
- apr_off_t *start, apr_off_t *end)
-{
- char *dash = NULL, *errp = NULL;
- apr_off_t number;
-
- dash = strchr(range, '-');
- if (!dash) {
- return 0;
- }
-
- if ((dash == range)) {
- /* In the form "-5" */
- if (apr_strtoff(&number, dash+1, &errp, 10) || *errp) {
- return 0;
- }
- *start = clength - number;
- *end = clength - 1;
- }
- else {
- *dash++ = '\0';
- if (apr_strtoff(&number, range, &errp, 10) || *errp) {
- return 0;
- }
- *start = number;
- if (*dash) {
- if (apr_strtoff(&number, dash, &errp, 10) || *errp) {
- return 0;
- }
- *end = number;
- }
- else { /* "5-" */
- *end = clength - 1;
- }
- }
-
- if (*start < 0) {
- *start = 0;
- }
-
- if (*end >= clength) {
- *end = clength - 1;
- }
-
- if (*start > *end) {
- return -1;
- }
-
- return (*start > 0 || *end < clength);
-}
-
-
-static int use_range_x(request_rec *r)
-{
- const char *ua = NULL;
- return (apr_table_get(r->headers_in, "Request-Range")
- || ((ua = apr_table_get(r->headers_in, "User-Agent"))
- && ap_strstr_c(ua, "MSIE 3")));
-}
-
-
-static int ap_set_byterange(request_rec *r)
-{
- const char *range = NULL, *if_range = NULL, *match = NULL, *ct = NULL;
- int num_ranges = 0;
-
- if (r->assbackwards) {
- return 0;
- }
-
- /* Check for Range request-header (HTTP/1.1) or Request-Range for
- * backwards-compatibility with second-draft Luotonen/Franks
- * byte-ranges (e.g. Netscape Navigator 2-3).
- *
- * We support this form, with Request-Range, and (farther down) we
- * send multipart/x-byteranges instead of multipart/byteranges for
- * Request-Range based requests to work around a bug in Netscape
- * Navigator 2-3 and MSIE 3.
- */
-
- if (!(range = apr_table_get(r->headers_in, "Range"))) {
- range = apr_table_get(r->headers_in, "Request-Range");
- }
-
- if (!range || strncasecmp(range, "bytes=", 6) || r->status != HTTP_OK) {
- return 0;
- }
-
- /* is content already a single range? */
- if (apr_table_get(r->headers_out, "Content-Range")) {
- return 0;
- }
-
- /* is content already a multiple range? */
- if ((ct = apr_table_get(r->headers_out, "Content-Type"))
- && (!strncasecmp(ct, "multipart/byteranges", 20)
- || !strncasecmp(ct, "multipart/x-byteranges", 22))) {
- return 0;
- }
-
- /* Check the If-Range header for Etag or Date.
- * Note that this check will return false (as required) if either
- * of the two etags are weak.
- */
- if ((if_range = apr_table_get(r->headers_in, "If-Range"))) {
- if (if_range[0] == '"') {
- if (!(match = apr_table_get(r->headers_out, "Etag"))
- || (strcmp(if_range, match) != 0)) {
- return 0;
- }
- }
- else if (!(match = apr_table_get(r->headers_out,
- "Last-Modified"))
- || (strcmp(if_range, match) != 0)) {
- return 0;
- }
- }
-
- if (!ap_strchr_c(range, ',')) {
- /* a single range */
- num_ranges = 1;
- }
- else {
- /* a multiple range */
- num_ranges = 2;
- }
-
- r->status = HTTP_PARTIAL_CONTENT;
- r->range = range + 6;
-
- return num_ranges;
-}
-
-
-static void
-mod_glfs_handle_byte_ranges (request_rec *r, glusterfs_file_t fd,
- int num_ranges)
-{
- conn_rec *c = r->connection;
- char *ts = NULL, *boundary = NULL, *bound_head = NULL;
- const char *orig_ct = NULL;
- char *current = NULL, *end = NULL;
- apr_bucket_brigade *bsend = NULL;
- apr_bucket *e = NULL;
- apr_off_t range_start, range_end;
- apr_status_t rv = APR_SUCCESS;
- char found = 0;
- apr_bucket *e2 = NULL, *ec = NULL;
-
- orig_ct = ap_make_content_type (r, r->content_type);
-
- if (num_ranges > 1) {
- boundary = apr_psprintf(r->pool, "%" APR_UINT64_T_HEX_FMT "%lx",
- (apr_uint64_t)r->request_time,
- (long) getpid());
-
- ap_set_content_type(r, apr_pstrcat(r->pool, "multipart",
- use_range_x(r) ? "/x-" : "/",
- "byteranges; boundary=",
- boundary, NULL));
-
- if (strcasecmp(orig_ct, NO_CONTENT_TYPE)) {
- bound_head = apr_pstrcat(r->pool,
- CRLF "--", boundary,
- CRLF "Content-type: ",
- orig_ct,
- CRLF "Content-range: bytes ",
- NULL);
- }
- else {
- /* if we have no type for the content, do our best */
- bound_head = apr_pstrcat(r->pool,
- CRLF "--", boundary,
- CRLF "Content-range: bytes ",
- NULL);
- }
- }
-
- while ((current = ap_getword(r->pool, &r->range, ','))
- && (rv = parse_byterange(current, r->finfo.size, &range_start,
- &range_end))) {
- bsend = NULL;
- if (rv == -1) {
- continue;
- }
-
- found = 1;
-
- /* For single range requests, we must produce Content-Range
- * header. Otherwise, we need to produce the multipart
- * boundaries.
- */
- if (num_ranges == 1) {
- apr_table_setn(r->headers_out, "Content-Range",
- apr_psprintf(r->pool,
- "bytes " BYTERANGE_FMT,
- range_start, range_end,
- r->finfo.size));
- }
- else {
- /* this brigade holds what we will be sending */
- bsend = apr_brigade_create(r->pool, c->bucket_alloc);
-
- e = apr_bucket_pool_create(bound_head,
- strlen(bound_head),
- r->pool, c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
-
- ts = apr_psprintf(r->pool, BYTERANGE_FMT CRLF CRLF,
- range_start, range_end,
- r->finfo.size);
- e = apr_bucket_pool_create(ts, strlen(ts), r->pool,
- c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
- }
- mod_glfs_read_async (r, bsend, fd, range_start,
- (range_end + 1 - range_start));
- }
-
- bsend = apr_brigade_create (r->pool, c->bucket_alloc);
-
- if (found == 0) {
- r->status = HTTP_OK;
- /* bsend is assumed to be empty if we get here. */
- e = ap_bucket_error_create(HTTP_RANGE_NOT_SATISFIABLE, NULL,
- r->pool, c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
- e = apr_bucket_eos_create(c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
- ap_pass_brigade (r->output_filters, bsend);
- return;
- }
-
- if (num_ranges > 1) {
- /* add the final boundary */
- end = apr_pstrcat(r->pool, CRLF "--", boundary, "--" CRLF,
- NULL);
-// ap_xlate_proto_to_ascii(end, strlen(end));
- e = apr_bucket_pool_create(end, strlen(end), r->pool,
- c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bsend, e);
- }
-
- ap_pass_brigade (r->output_filters, bsend);
-}
-
-
-
-/****************************************************************
- *
- * Looking things up in config entries...
- */
-
-/* Structure used to hold entries when we're actually building an index */
-
-struct ent {
- char *name;
- char *icon;
- char *alt;
- char *desc;
- apr_off_t size;
- apr_time_t lm;
- struct ent *next;
- int ascending, ignore_case, version_sort;
- char key;
- int isdir;
-};
-
-static char *find_item(request_rec *r, apr_array_header_t *list, int path_only)
-{
- const char *content_type = NULL;
- const char *content_encoding = NULL;
- char *path = NULL;
- int i = 0;
- struct mod_glfs_ai_item *items = NULL;
- struct mod_glfs_ai_item *p = NULL;
-
- content_type = ap_field_noparam(r->pool, r->content_type);
- content_encoding = r->content_encoding;
- path = r->filename;
- items = (struct mod_glfs_ai_item *) list->elts;
-
- for (i = 0; i < list->nelts; ++i) {
- p = &items[i];
- /* Special cased for ^^DIRECTORY^^ and ^^BLANKICON^^ */
- if ((path[0] == '^') || (!ap_strcmp_match(path,
- p->apply_path))) {
- if (!*(p->apply_to)) {
- return p->data;
- }
- else if (p->type == BY_PATH || path[0] == '^') {
- if (!ap_strcmp_match(path, p->apply_to)) {
- return p->data;
- }
- }
- else if (!path_only) {
- if (!content_encoding) {
- if (p->type == BY_TYPE) {
- if (content_type
- && !ap_strcasecmp_match(content_type,
- p->apply_to)) {
- return p->data;
- }
- }
- }
- else {
- if (p->type == BY_ENCODING) {
- if (!ap_strcasecmp_match(content_encoding,
- p->apply_to)) {
- return p->data;
- }
- }
- }
- }
- }
- }
- return NULL;
-}
-
-#define find_icon(d,p,t) find_item(p,d->icon_list,t)
-#define find_alt(d,p,t) find_item(p,d->alt_list,t)
-#define find_header(d,p) find_item(p,d->hdr_list,0)
-#define find_readme(d,p) find_item(p,d->rdme_list,0)
-
-static char *find_default_item(char *bogus_name, apr_array_header_t *list)
-{
- request_rec r;
- /* Bleah. I tried to clean up find_item, and it lead to this bit
- * of ugliness. Note that the fields initialized are precisely
- * those that find_item looks at...
- */
- r.filename = bogus_name;
- r.content_type = r.content_encoding = NULL;
- return find_item(&r, list, 1);
-}
-
-#define find_default_icon(d,n) find_default_item(n, d->icon_list)
-#define find_default_alt(d,n) find_default_item(n, d->alt_list)
-
-/*
- * Look through the list of pattern/description pairs and return the first one
- * if any) that matches the filename in the request. If multiple patterns
- * match, only the first one is used; since the order in the array is the
- * same as the order in which directives were processed, earlier matching
- * directives will dominate.
- */
-
-#ifdef CASE_BLIND_FILESYSTEM
-#define MATCH_FLAGS APR_FNM_CASE_BLIND
-#else
-#define MATCH_FLAGS 0
-#endif
-
-static char *find_desc(glusterfs_dir_config_t *dcfg, const char *filename_full)
-{
- int i = 0;
- mod_glfs_ai_desc_t *list = NULL;
- const char *filename_only = NULL, *filename = NULL;
- mod_glfs_ai_desc_t *tuple = &list[i];
- int found = 0;
-
- list = (mod_glfs_ai_desc_t *) dcfg->desc_list->elts;
- /*
- * If the filename includes a path, extract just the name itself
- * for the simple matches.
- */
- if ((filename_only = ap_strrchr_c(filename_full, '/')) == NULL) {
- filename_only = filename_full;
- }
- else {
- filename_only++;
- }
- for (i = 0; i < dcfg->desc_list->nelts; ++i) {
- /*
- * Only use the full-path filename if the pattern contains '/'s.
- */
- filename = (tuple->full_path) ? filename_full : filename_only;
- /*
- * Make the comparison using the cheapest method; only do
- * wildcard checking if we must.
- */
- if (tuple->wildcards) {
- found = (apr_fnmatch(tuple->pattern, filename,
- MATCH_FLAGS) == 0);
- }
- else {
- found = (ap_strstr_c(filename, tuple->pattern) != NULL);
- }
- if (found) {
- return tuple->description;
- }
- }
- return NULL;
-}
-
-static int ignore_entry(glusterfs_dir_config_t *d, char *path)
-{
- apr_array_header_t *list = d->ign_list;
- struct mod_glfs_ai_item *items = (struct mod_glfs_ai_item *) list->elts;
- char *tt = NULL, *ap = NULL;
- int i = 0;
- struct mod_glfs_ai_item *p = &items[i];
-
- if ((tt = strrchr(path, '/')) == NULL) {
- tt = path;
- }
- else {
- tt++;
- }
-
- for (i = 0; i < list->nelts; ++i) {
- p = &items[i];
- if ((ap = strrchr(p->apply_to, '/')) == NULL) {
- ap = p->apply_to;
- }
- else {
- ap++;
- }
-
-#ifndef CASE_BLIND_FILESYSTEM
- if (!ap_strcmp_match(path, p->apply_path)
- && !ap_strcmp_match(tt, ap)) {
- return 1;
- }
-#else /* !CASE_BLIND_FILESYSTEM */
- /*
- * On some platforms, the match must be case-blind. This is really
- * a factor of the filesystem involved, but we can't detect that
- * reliably - so we have to granularise at the OS level.
- */
- if (!ap_strcasecmp_match(path, p->apply_path)
- && !ap_strcasecmp_match(tt, ap)) {
- return 1;
- }
-#endif /* !CASE_BLIND_FILESYSTEM */
- }
- return 0;
-}
-
-/*****************************************************************
- *
- * Actually generating output
- */
-
-/*
- * Elements of the emitted document:
- * Preamble
- * Emitted unless SUPPRESS_PREAMBLE is set AND ap_run_sub_req
- * succeeds for the (content_type == text/html) header file.
- * Header file
- * Emitted if found (and able).
- * H1 tag line
- * Emitted if a header file is NOT emitted.
- * Directory stuff
- * Always emitted.
- * HR
- * Emitted if FANCY_INDEXING is set.
- * Readme file
- * Emitted if found (and able).
- * ServerSig
- * Emitted if ServerSignature is not Off AND a readme file
- * is NOT emitted.
- * Postamble
- * Emitted unless SUPPRESS_PREAMBLE is set AND ap_run_sub_req
- * succeeds for the (content_type == text/html) readme file.
- */
-
-
-/*
- * emit a plain text file
- */
-static void do_emit_plain(request_rec *r, apr_file_t *f)
-{
- char buf[AP_IOBUFSIZE + 1];
- int ch = 0;
- apr_size_t i = 0, c = 0, n = 0;
- apr_status_t rv = APR_SUCCESS;
-
- ap_rputs("<pre>\n", r);
- while (!apr_file_eof(f)) {
- do {
- n = sizeof(char) * AP_IOBUFSIZE;
- rv = apr_file_read(f, buf, &n);
- } while (APR_STATUS_IS_EINTR(rv));
- if (n == 0 || rv != APR_SUCCESS) {
- /* ###: better error here? */
- break;
- }
- buf[n] = '\0';
- c = 0;
- while (c < n) {
- for (i = c; i < n; i++) {
- if (buf[i] == '<' || buf[i] == '>'
- || buf[i] == '&') {
- break;
- }
- }
- ch = buf[i];
- buf[i] = '\0';
- ap_rputs(&buf[c], r);
- if (ch == '<') {
- ap_rputs("&lt;", r);
- }
- else if (ch == '>') {
- ap_rputs("&gt;", r);
- }
- else if (ch == '&') {
- ap_rputs("&amp;", r);
- }
- c = i + 1;
- }
- }
- ap_rputs("</pre>\n", r);
-}
-
-/*
- * Handle the preamble through the H1 tag line, inclusive. Locate
- * the file with a subrequests. Process text/html documents by actually
- * running the subrequest; text/xxx documents get copied verbatim,
- * and any other content type is ignored. This means that a non-text
- * document (such as HEADER.gif) might get multiviewed as the result
- * instead of a text document, meaning nothing will be displayed, but
- * oh well.
- */
-static void emit_head(request_rec *r, char *header_fname, int suppress_amble,
- int emit_xhtml, char *title)
-{
- apr_table_t *hdrs = r->headers_in;
- apr_file_t *f = NULL;
- request_rec *rr = NULL;
- int emit_amble = 1;
- int emit_H1 = 1;
- const char *r_accept = NULL;
- const char *r_accept_enc = NULL;
-
- /*
- * If there's a header file, send a subrequest to look for it. If it's
- * found and html do the subrequest, otherwise handle it
- */
- r_accept = apr_table_get(hdrs, "Accept");
- r_accept_enc = apr_table_get(hdrs, "Accept-Encoding");
- apr_table_setn(hdrs, "Accept", "text/html, text/plain");
- apr_table_unset(hdrs, "Accept-Encoding");
-
-
- if ((header_fname != NULL) && r->args) {
- header_fname = apr_pstrcat(r->pool, header_fname, "?", r->args,
- NULL);
- }
-
- if ((header_fname != NULL)
- && (rr = ap_sub_req_lookup_uri(header_fname, r, r->output_filters))
- && (rr->status == HTTP_OK)
- && (rr->filename != NULL)
- && (rr->finfo.filetype == APR_REG)) {
- /*
- * Check for the two specific cases we allow: text/html and
- * text/anything-else. The former is allowed to be processed for
- * SSIs.
- */
- if (rr->content_type != NULL) {
- if (!strcasecmp(ap_field_noparam(r->pool,
- rr->content_type),
- "text/html")) {
- ap_filter_t *f = NULL;
-
- /* Hope everything will work... */
- emit_amble = 0;
- emit_H1 = 0;
-
- if (! suppress_amble) {
- emit_preamble(r, emit_xhtml, title);
- }
- /* This is a hack, but I can't find any better
- * way to do this. The problem is that we have
- * already created the sub-request,
- * but we just inserted the OLD_WRITE filter,
- * and the sub-request needs to pass its data
- * through the OLD_WRITE filter, or things go
- * horribly wrong (missing data, data in
- * the wrong order, etc). To fix it, if you
- * create a sub-request and then insert the
- * OLD_WRITE filter before you run the request,
- * you need to make sure that the sub-request
- * data goes through the OLD_WRITE filter. Just
- * steal this code. The long-term solution is
- * to remove the ap_r* functions.
- */
- for (f=rr->output_filters;
- f->frec != ap_subreq_core_filter_handle;
- f = f->next);
- f->next = r->output_filters;
-
- /*
- * If there's a problem running the subrequest,
- * display the preamble if we didn't do it
- * before -- the header file didn't get displayed.
- */
- if (ap_run_sub_req(rr) != OK) {
- /* It didn't work */
- emit_amble = suppress_amble;
- emit_H1 = 1;
- }
- }
- else if (!strncasecmp("text/", rr->content_type, 5)) {
- /*
- * If we can open the file, prefix it with the
- * preamble regardless; since we'll be sending
- * a <pre> block around the file's contents,
- * any HTML header it had won't end up
- * where it belongs.
- */
- if (apr_file_open(&f, rr->filename, APR_READ,
- APR_OS_DEFAULT, r->pool)
- == APR_SUCCESS) {
- emit_preamble(r, emit_xhtml, title);
- emit_amble = 0;
- do_emit_plain(r, f);
- apr_file_close(f);
- emit_H1 = 0;
- }
- }
- }
- }
-
- if (r_accept) {
- apr_table_setn(hdrs, "Accept", r_accept);
- }
- else {
- apr_table_unset(hdrs, "Accept");
- }
-
- if (r_accept_enc) {
- apr_table_setn(hdrs, "Accept-Encoding", r_accept_enc);
- }
-
- if (emit_amble) {
- emit_preamble(r, emit_xhtml, title);
- }
- if (emit_H1) {
- ap_rvputs(r, "<h1>Index of ", title, "</h1>\n", NULL);
- }
- if (rr != NULL) {
- ap_destroy_sub_req(rr);
- }
-}
-
-
-/*
- * Handle the Readme file through the postamble, inclusive. Locate
- * the file with a subrequests. Process text/html documents by actually
- * running the subrequest; text/xxx documents get copied verbatim,
- * and any other content type is ignored. This means that a non-text
- * document (such as FOOTER.gif) might get multiviewed as the result
- * instead of a text document, meaning nothing will be displayed, but
- * oh well.
- */
-static void emit_tail(request_rec *r, char *readme_fname, int suppress_amble)
-{
- apr_file_t *f = NULL;
- request_rec *rr = NULL;
- int suppress_post = 0, suppress_sig = 0;
-
- /*
- * If there's a readme file, send a subrequest to look for it. If it's
- * found and a text file, handle it -- otherwise fall through and
- * pretend there's nothing there.
- */
- if ((readme_fname != NULL)
- && (rr = ap_sub_req_lookup_uri(readme_fname, r, r->output_filters))
- && (rr->status == HTTP_OK)
- && (rr->filename != NULL)
- && rr->finfo.filetype == APR_REG) {
- /*
- * Check for the two specific cases we allow: text/html and
- * text/anything-else. The former is allowed to be processed for
- * SSIs.
- */
- if (rr->content_type != NULL) {
- if (!strcasecmp(ap_field_noparam(r->pool,
- rr->content_type),
- "text/html")) {
- ap_filter_t *f;
- for (f=rr->output_filters;
- f->frec != ap_subreq_core_filter_handle;
- f = f->next);
- f->next = r->output_filters;
-
-
- if (ap_run_sub_req(rr) == OK) {
- /* worked... */
- suppress_sig = 1;
- suppress_post = suppress_amble;
- }
- }
- else if (!strncasecmp("text/", rr->content_type, 5)) {
- /*
- * If we can open the file, suppress the signature.
- */
- if (apr_file_open(&f, rr->filename, APR_READ,
- APR_OS_DEFAULT, r->pool)
- == APR_SUCCESS) {
- do_emit_plain(r, f);
- apr_file_close(f);
- suppress_sig = 1;
- }
- }
- }
- }
-
- if (!suppress_sig) {
- ap_rputs(ap_psignature("", r), r);
- }
- if (!suppress_post) {
- ap_rputs("</body></html>\n", r);
- }
- if (rr != NULL) {
- ap_destroy_sub_req(rr);
- }
-}
-
-
-static char *find_title(request_rec *r)
-{
- char titlebuf[MAX_STRING_LEN], *find = "<title>";
- apr_file_t *thefile = NULL;
- int x = 0, y = 0, p = 0;
- apr_size_t n;
-
- if (r->status != HTTP_OK) {
- return NULL;
- }
- if ((r->content_type != NULL)
- && (!strcasecmp(ap_field_noparam(r->pool, r->content_type),
- "text/html")
- || !strcmp(r->content_type, INCLUDES_MAGIC_TYPE))
- && !r->content_encoding) {
- if (apr_file_open(&thefile, r->filename, APR_READ,
- APR_OS_DEFAULT, r->pool) != APR_SUCCESS) {
- return NULL;
- }
- n = sizeof(char) * (MAX_STRING_LEN - 1);
- apr_file_read(thefile, titlebuf, &n);
- if (n <= 0) {
- apr_file_close(thefile);
- return NULL;
- }
- titlebuf[n] = '\0';
- for (x = 0, p = 0; titlebuf[x]; x++) {
- if (apr_tolower(titlebuf[x]) == find[p]) {
- if (!find[++p]) {
- if ((p = ap_ind(&titlebuf[++x], '<'))
- != -1) {
- titlebuf[x + p] = '\0';
- }
- /* Scan for line breaks for Tanmoy's
- secretary
- */
- for (y = x; titlebuf[y]; y++) {
- if ((titlebuf[y] == CR)
- || (titlebuf[y] == LF)) {
- if (y == x) {
- x++;
- }
- else {
- titlebuf[y] = ' ';
- }
- }
- }
- apr_file_close(thefile);
- return apr_pstrdup(r->pool,
- &titlebuf[x]);
- }
- }
- else {
- p = 0;
- }
- }
- apr_file_close(thefile);
- }
- return NULL;
-}
-
-static struct ent *make_parent_entry(apr_int32_t autoindex_opts,
- glusterfs_dir_config_t *d,
- request_rec *r, char keyid,
- char direction)
-{
- struct ent *p = NULL;
- char *testpath = NULL;
- /*
- * p->name is now the true parent URI.
- * testpath is a crafted lie, so that the syntax '/some/..'
- * (or simply '..')be used to describe 'up' from '/some/'
- * when processeing IndexIgnore, and Icon|Alt|Desc configs.
- */
-
- p = (struct ent *) apr_pcalloc(r->pool, sizeof(struct ent));
- /* The output has always been to the parent. Don't make ourself
- * our own parent (worthless cyclical reference).
- */
- if (!(p->name = ap_make_full_path(r->pool, r->uri, "../"))) {
- return (NULL);
- }
- ap_getparents(p->name);
- if (!*p->name) {
- return (NULL);
- }
-
- /* IndexIgnore has always compared "/thispath/.." */
- testpath = ap_make_full_path(r->pool, r->filename, "..");
- if (ignore_entry(d, testpath)) {
- return (NULL);
- }
-
- p->size = -1;
- p->lm = -1;
- p->key = apr_toupper(keyid);
- p->ascending = (apr_toupper(direction) == D_ASCENDING);
- p->version_sort = autoindex_opts & VERSION_SORT;
- if (autoindex_opts & FANCY_INDEXING) {
- if (!(p->icon = find_default_icon(d, testpath))) {
- p->icon = find_default_icon(d, "^^DIRECTORY^^");
- }
- if (!(p->alt = find_default_alt(d, testpath))) {
- if (!(p->alt = find_default_alt(d, "^^DIRECTORY^^"))) {
- p->alt = "DIR";
- }
- }
- p->desc = find_desc(d, testpath);
- }
- return p;
-}
-
-static struct ent *make_autoindex_entry(const apr_finfo_t *dirent,
- int autoindex_opts,
- glusterfs_dir_config_t *d,
- request_rec *r, char keyid,
- char direction,
- const char *pattern)
-{
- request_rec *rr = NULL;
- struct ent *p = NULL;
- int show_forbidden = 0;
-
- /* Dot is ignored, Parent is handled by make_parent_entry() */
- if ((dirent->name[0] == '.') && (!dirent->name[1]
- || ((dirent->name[1] == '.')
- && !dirent->name[2])))
- return (NULL);
-
- /*
- * On some platforms, the match must be case-blind. This is really
- * a factor of the filesystem involved, but we can't detect that
- * reliably - so we have to granularise at the OS level.
- */
- if (pattern && (apr_fnmatch(pattern, dirent->name,
- APR_FNM_NOESCAPE | APR_FNM_PERIOD
-#ifdef CASE_BLIND_FILESYSTEM
- | APR_FNM_CASE_BLIND
-#endif
- )
- != APR_SUCCESS)) {
- return (NULL);
- }
-
- if (ignore_entry(d, ap_make_full_path(r->pool,
- r->filename, dirent->name))) {
- return (NULL);
- }
-
- if (!(rr = ap_sub_req_lookup_dirent(dirent, r, AP_SUBREQ_NO_ARGS,
- NULL))) {
- return (NULL);
- }
-
- if((autoindex_opts & SHOW_FORBIDDEN)
- && (rr->status == HTTP_UNAUTHORIZED
- || rr->status == HTTP_FORBIDDEN)) {
- show_forbidden = 1;
- }
-
- if ((rr->finfo.filetype != APR_DIR && rr->finfo.filetype != APR_REG)
- || !(rr->status == OK || ap_is_HTTP_SUCCESS(rr->status)
- || ap_is_HTTP_REDIRECT(rr->status)
- || show_forbidden == 1)) {
- ap_destroy_sub_req(rr);
- return (NULL);
- }
-
- p = (struct ent *) apr_pcalloc(r->pool, sizeof(struct ent));
- if (dirent->filetype == APR_DIR) {
- p->name = apr_pstrcat(r->pool, dirent->name, "/", NULL);
- }
- else {
- p->name = apr_pstrdup(r->pool, dirent->name);
- }
- p->size = -1;
- p->icon = NULL;
- p->alt = NULL;
- p->desc = NULL;
- p->lm = -1;
- p->isdir = 0;
- p->key = apr_toupper(keyid);
- p->ascending = (apr_toupper(direction) == D_ASCENDING);
- p->version_sort = !!(autoindex_opts & VERSION_SORT);
- p->ignore_case = !!(autoindex_opts & IGNORE_CASE);
-
- if (autoindex_opts & (FANCY_INDEXING | TABLE_INDEXING)) {
- p->lm = rr->finfo.mtime;
- if (dirent->filetype == APR_DIR) {
- if (autoindex_opts & FOLDERS_FIRST) {
- p->isdir = 1;
- }
- rr->filename = ap_make_dirstr_parent (rr->pool,
- rr->filename);
-
- /* omit the trailing slash (1.3 compat) */
- rr->filename[strlen(rr->filename) - 1] = '\0';
-
- if (!(p->icon = find_icon(d, rr, 1))) {
- p->icon = find_default_icon(d, "^^DIRECTORY^^");
- }
- if (!(p->alt = find_alt(d, rr, 1))) {
- if (!(p->alt = find_default_alt(d,
- "^^DIRECTORY^^"))) {
- p->alt = "DIR";
- }
- }
- }
- else {
- p->icon = find_icon(d, rr, 0);
- p->alt = find_alt(d, rr, 0);
- p->size = rr->finfo.size;
- }
-
- p->desc = find_desc(d, rr->filename);
-
- if ((!p->desc) && (autoindex_opts & SCAN_HTML_TITLES)) {
- p->desc = apr_pstrdup(r->pool, find_title(rr));
- }
- }
- ap_destroy_sub_req(rr);
- /*
- * We don't need to take any special action for the file size key.
- * If we did, it would go here.
- */
- if (keyid == K_LAST_MOD) {
- if (p->lm < 0) {
- p->lm = 0;
- }
- }
- return (p);
-}
-
-static char *terminate_description(glusterfs_dir_config_t *d, char *desc,
- apr_int32_t autoindex_opts, int desc_width)
-{
- int maxsize = desc_width;
- register int x = 0;
-
- /*
- * If there's no DescriptionWidth in effect, default to the old
- * behaviour of adjusting the description size depending upon
- * what else is being displayed. Otherwise, stick with the
- * setting.
- */
- if (d->desc_adjust == K_UNSET) {
- if (autoindex_opts & SUPPRESS_ICON) {
- maxsize += 6;
- }
- if (autoindex_opts & SUPPRESS_LAST_MOD) {
- maxsize += 19;
- }
- if (autoindex_opts & SUPPRESS_SIZE) {
- maxsize += 7;
- }
- }
- for (x = 0; desc[x] && ((maxsize > 0) || (desc[x] == '<')); x++) {
- if (desc[x] == '<') {
- while (desc[x] != '>') {
- if (!desc[x]) {
- maxsize = 0;
- break;
- }
- ++x;
- }
- }
- else if (desc[x] == '&') {
- /* entities like &auml; count as one character */
- --maxsize;
- for ( ; desc[x] != ';'; ++x) {
- if (desc[x] == '\0') {
- maxsize = 0;
- break;
- }
- }
- }
- else {
- --maxsize;
- }
- }
- if (!maxsize && desc[x] != '\0') {
- desc[x - 1] = '>'; /* Grump. */
- desc[x] = '\0'; /* Double Grump! */
- }
- return desc;
-}
-
-/*
- * Emit the anchor for the specified field. If a field is the key for the
- * current request, the link changes its meaning to reverse the order when
- * selected again. Non-active fields always start in ascending order.
- */
-static void emit_link(request_rec *r, const char *anchor, char column,
- char curkey, char curdirection,
- const char *colargs, int nosort)
-{
- char qvalue[9];
-
- if (!nosort) {
-
- qvalue[0] = '?';
- qvalue[1] = 'C';
- qvalue[2] = '=';
- qvalue[3] = column;
- qvalue[4] = ';';
- qvalue[5] = 'O';
- qvalue[6] = '=';
- /* reverse? */
- qvalue[7] = ((curkey == column) && (curdirection == D_ASCENDING))
- ? D_DESCENDING : D_ASCENDING;
- qvalue[8] = '\0';
- ap_rvputs(r, "<a href=\"", qvalue, colargs ? colargs : "",
- "\">", anchor, "</a>", NULL);
- }
- else {
- ap_rputs(anchor, r);
- }
-}
-
-static void output_directories(struct ent **ar, int n,
- glusterfs_dir_config_t *d, request_rec *r,
- apr_int32_t autoindex_opts, char keyid,
- char direction, const char *colargs)
-{
- int x = 0;
- apr_size_t rv = APR_SUCCESS;
- char *name = NULL, *tp = NULL;
- int static_columns = 0;
- apr_pool_t *scratch = NULL;
- int name_width = 0, desc_width = 0, cols = 0;
- char *name_scratch = NULL, *pad_scratch = NULL, *breakrow = "";
- char *anchor = NULL, *t = NULL, *t2 = NULL;
- int nwidth = 0;
- char time_str[MAX_STRING_LEN];
- apr_time_exp_t ts = {0, };
- char buf[5];
-
- name = r->uri;
- static_columns = !!(autoindex_opts & SUPPRESS_COLSORT);
- apr_pool_create(&scratch, r->pool);
- if (name[0] == '\0') {
- name = "/";
- }
-
- name_width = d->name_width;
- desc_width = d->desc_width;
-
- if ((autoindex_opts & (FANCY_INDEXING | TABLE_INDEXING))
- == FANCY_INDEXING) {
- if (d->name_adjust == K_ADJUST) {
- for (x = 0; x < n; x++) {
- int t = 0;
- t = strlen(ar[x]->name);
- if (t > name_width) {
- name_width = t;
- }
- }
- }
-
- if (d->desc_adjust == K_ADJUST) {
- for (x = 0; x < n; x++) {
- if (ar[x]->desc != NULL) {
- int t = 0;
- t = strlen(ar[x]->desc);
- if (t > desc_width) {
- desc_width = t;
- }
- }
- }
- }
- }
- name_scratch = apr_palloc(r->pool, name_width + 1);
- pad_scratch = apr_palloc(r->pool, name_width + 1);
- memset(pad_scratch, ' ', name_width);
- pad_scratch[name_width] = '\0';
-
- if (autoindex_opts & TABLE_INDEXING) {
- cols = 1;
- ap_rputs("<table><tr>", r);
- if (!(autoindex_opts & SUPPRESS_ICON)) {
- ap_rputs("<th>", r);
- if ((tp = find_default_icon(d, "^^BLANKICON^^"))) {
- ap_rvputs(r, "<img src=\"",
- ap_escape_html(scratch, tp),
- "\" alt=\"[ICO]\"", NULL);
- if (d->icon_width) {
- ap_rprintf(r, " width=\"%d\"",
- d->icon_width);
- }
- if (d->icon_height) {
- ap_rprintf(r, " height=\"%d\"",
- d->icon_height);
- }
-
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs("></th>", r);
- }
- else {
- ap_rputs("&nbsp;</th>", r);
- }
-
- ++cols;
- }
- ap_rputs("<th>", r);
- emit_link(r, "Name", K_NAME, keyid, direction,
- colargs, static_columns);
- if (!(autoindex_opts & SUPPRESS_LAST_MOD)) {
- ap_rputs("</th><th>", r);
- emit_link(r, "Last modified", K_LAST_MOD, keyid,
- direction, colargs, static_columns);
- ++cols;
- }
- if (!(autoindex_opts & SUPPRESS_SIZE)) {
- ap_rputs("</th><th>", r);
- emit_link(r, "Size", K_SIZE, keyid, direction,
- colargs, static_columns);
- ++cols;
- }
- if (!(autoindex_opts & SUPPRESS_DESC)) {
- ap_rputs("</th><th>", r);
- emit_link(r, "Description", K_DESC, keyid, direction,
- colargs, static_columns);
- ++cols;
- }
- if (!(autoindex_opts & SUPPRESS_RULES)) {
- breakrow = apr_psprintf(r->pool,
- "<tr><th colspan=\"%d\">"
- "<hr%s></th></tr>\n", cols,
- (autoindex_opts & EMIT_XHTML)
- ? " /" : "");
- }
- ap_rvputs(r, "</th></tr>", breakrow, NULL);
- }
- else if (autoindex_opts & FANCY_INDEXING) {
- ap_rputs("<pre>", r);
- if (!(autoindex_opts & SUPPRESS_ICON)) {
- if ((tp = find_default_icon(d, "^^BLANKICON^^"))) {
- ap_rvputs(r, "<img src=\"",
- ap_escape_html(scratch, tp),
- "\" alt=\"Icon \"", NULL);
- if (d->icon_width) {
- ap_rprintf(r, " width=\"%d\"",
- d->icon_width);
- }
- if (d->icon_height) {
- ap_rprintf(r, " height=\"%d\"",
- d->icon_height);
- }
-
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs("> ", r);
- }
- else {
- ap_rputs(" ", r);
- }
- }
- emit_link(r, "Name", K_NAME, keyid, direction,
- colargs, static_columns);
- ap_rputs(pad_scratch + 4, r);
- /*
- * Emit the guaranteed-at-least-one-space-between-columns byte.
- */
- ap_rputs(" ", r);
- if (!(autoindex_opts & SUPPRESS_LAST_MOD)) {
- emit_link(r, "Last modified", K_LAST_MOD, keyid,
- direction, colargs, static_columns);
- ap_rputs(" ", r);
- }
- if (!(autoindex_opts & SUPPRESS_SIZE)) {
- emit_link(r, "Size", K_SIZE, keyid, direction,
- colargs, static_columns);
- ap_rputs(" ", r);
- }
- if (!(autoindex_opts & SUPPRESS_DESC)) {
- emit_link(r, "Description", K_DESC, keyid, direction,
- colargs, static_columns);
- }
- if (!(autoindex_opts & SUPPRESS_RULES)) {
- ap_rputs("<hr", r);
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs(">", r);
- }
- else {
- ap_rputc('\n', r);
- }
- }
- else {
- ap_rputs("<ul>", r);
- }
-
- for (x = 0; x < n; x++) {
- apr_pool_clear(scratch);
-
- t = ar[x]->name;
- anchor = ap_escape_html(scratch, ap_os_escape_path(scratch, t,
- 0));
-
- if (!x && t[0] == '/') {
- t2 = "Parent Directory";
- }
- else {
- t2 = t;
- }
-
- if (autoindex_opts & TABLE_INDEXING) {
- ap_rputs("<tr>", r);
- if (!(autoindex_opts & SUPPRESS_ICON)) {
- ap_rputs("<td valign=\"top\">", r);
- if (autoindex_opts & ICONS_ARE_LINKS) {
- ap_rvputs(r, "<a href=\"", anchor,
- "\">", NULL);
- }
- if ((ar[x]->icon) || d->default_icon) {
- ap_rvputs(r, "<img src=\"",
- ap_escape_html(scratch,
- ar[x]->icon ?
- ar[x]->icon
- : d->default_icon),
- "\" alt=\"[",
- (ar[x]->alt ?
- ar[x]->alt : " "),
- "]\"", NULL);
- if (d->icon_width) {
- ap_rprintf(r, " width=\"%d\"",
- d->icon_width);
- }
- if (d->icon_height) {
- ap_rprintf(r, " height=\"%d\"",
- d->icon_height);
- }
-
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs(">", r);
- }
- else {
- ap_rputs("&nbsp;", r);
- }
- if (autoindex_opts & ICONS_ARE_LINKS) {
- ap_rputs("</a></td>", r);
- }
- else {
- ap_rputs("</td>", r);
- }
- }
- if (d->name_adjust == K_ADJUST) {
- ap_rvputs(r, "<td><a href=\"", anchor, "\">",
- ap_escape_html(scratch, t2), "</a>",
- NULL);
- }
- else {
- nwidth = strlen(t2);
- if (nwidth > name_width) {
- memcpy(name_scratch, t2, name_width - 3);
- name_scratch[name_width - 3] = '.';
- name_scratch[name_width - 2] = '.';
- name_scratch[name_width - 1] = '>';
- name_scratch[name_width] = 0;
- t2 = name_scratch;
- nwidth = name_width;
- }
- ap_rvputs(r, "<td><a href=\"", anchor, "\">",
- ap_escape_html(scratch, t2),
- "</a>", pad_scratch + nwidth, NULL);
- }
- if (!(autoindex_opts & SUPPRESS_LAST_MOD)) {
- if (ar[x]->lm != -1) {
- char time_str[MAX_STRING_LEN];
- apr_time_exp_t ts;
- apr_time_exp_lt(&ts, ar[x]->lm);
- apr_strftime(time_str, &rv,
- MAX_STRING_LEN,
- "</td><td align=\"right\""
- ">%d-%b-%Y %H:%M ",
- &ts);
- ap_rputs(time_str, r);
- }
- else {
- ap_rputs("</td><td>&nbsp;", r);
- }
- }
- if (!(autoindex_opts & SUPPRESS_SIZE)) {
- ap_rvputs(r, "</td><td align=\"right\">",
- apr_strfsize(ar[x]->size, buf), NULL);
- }
- if (!(autoindex_opts & SUPPRESS_DESC)) {
- if (ar[x]->desc) {
- if (d->desc_adjust == K_ADJUST) {
- ap_rvputs(r, "</td><td>",
- ar[x]->desc, NULL);
- }
- else {
- ap_rvputs(r, "</td><td>",
- terminate_description(d, ar[x]->desc,
- autoindex_opts,
- desc_width), NULL);
- }
- }
- }
- else {
- ap_rputs("</td><td>&nbsp;", r);
- }
- ap_rputs("</td></tr>\n", r);
- }
- else if (autoindex_opts & FANCY_INDEXING) {
- if (!(autoindex_opts & SUPPRESS_ICON)) {
- if (autoindex_opts & ICONS_ARE_LINKS) {
- ap_rvputs(r, "<a href=\"", anchor,
- "\">", NULL);
- }
- if ((ar[x]->icon) || d->default_icon) {
- ap_rvputs(r, "<img src=\"",
- ap_escape_html(scratch,
- ar[x]->icon ?
- ar[x]->icon
- : d->default_icon),
- "\" alt=\"[",
- (ar[x]->alt ? ar[x]->alt
- : " "),
- "]\"", NULL);
- if (d->icon_width) {
- ap_rprintf(r, " width=\"%d\"",
- d->icon_width);
- }
- if (d->icon_height) {
- ap_rprintf(r, " height=\"%d\"",
- d->icon_height);
- }
-
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs(">", r);
- }
- else {
- ap_rputs(" ", r);
- }
- if (autoindex_opts & ICONS_ARE_LINKS) {
- ap_rputs("</a> ", r);
- }
- else {
- ap_rputc(' ', r);
- }
- }
- nwidth = strlen(t2);
- if (nwidth > name_width) {
- memcpy(name_scratch, t2, name_width - 3);
- name_scratch[name_width - 3] = '.';
- name_scratch[name_width - 2] = '.';
- name_scratch[name_width - 1] = '>';
- name_scratch[name_width] = 0;
- t2 = name_scratch;
- nwidth = name_width;
- }
- ap_rvputs(r, "<a href=\"", anchor, "\">",
- ap_escape_html(scratch, t2),
- "</a>", pad_scratch + nwidth, NULL);
- /*
- * The blank before the storm.. er, before the next
- * field.
- */
- ap_rputs(" ", r);
- if (!(autoindex_opts & SUPPRESS_LAST_MOD)) {
- if (ar[x]->lm != -1) {
- apr_time_exp_lt(&ts, ar[x]->lm);
- apr_strftime(time_str, &rv,
- MAX_STRING_LEN,
- "%d-%b-%Y %H:%M ", &ts);
- ap_rputs(time_str, r);
- }
- else {
- /* Length="22-Feb-1998 23:42 "
- * (see 4 lines above)
- */
- ap_rputs(" ", r);
- }
- }
- if (!(autoindex_opts & SUPPRESS_SIZE)) {
- ap_rputs(apr_strfsize(ar[x]->size, buf), r);
- ap_rputs(" ", r);
- }
- if (!(autoindex_opts & SUPPRESS_DESC)) {
- if (ar[x]->desc) {
- ap_rputs(terminate_description(d,
- ar[x]->desc,
- autoindex_opts,
- desc_width), r);
- }
- }
- ap_rputc('\n', r);
- }
- else {
- ap_rvputs(r, "<li><a href=\"", anchor, "\"> ",
- ap_escape_html(scratch, t2),
- "</a></li>\n", NULL);
- }
- }
- if (autoindex_opts & TABLE_INDEXING) {
- ap_rvputs(r, breakrow, "</table>\n", NULL);
- }
- else if (autoindex_opts & FANCY_INDEXING) {
- if (!(autoindex_opts & SUPPRESS_RULES)) {
- ap_rputs("<hr", r);
- if (autoindex_opts & EMIT_XHTML) {
- ap_rputs(" /", r);
- }
- ap_rputs("></pre>\n", r);
- }
- else {
- ap_rputs("</pre>\n", r);
- }
- }
- else {
- ap_rputs("</ul>\n", r);
- }
-}
-
-/*
- * Compare two file entries according to the sort criteria. The return
- * is essentially a signum function value.
- */
-
-static int dsortf(struct ent **e1, struct ent **e2)
-{
- struct ent *c1 = NULL, *c2 = NULL;
- int result = 0;
-
- /*
- * First, see if either of the entries is for the parent directory.
- * If so, that *always* sorts lower than anything else.
- */
- if ((*e1)->name[0] == '/') {
- return -1;
- }
- if ((*e2)->name[0] == '/') {
- return 1;
- }
- /*
- * Now see if one's a directory and one isn't, if we're set
- * isdir for FOLDERS_FIRST.
- */
- if ((*e1)->isdir != (*e2)->isdir) {
- return (*e1)->isdir ? -1 : 1;
- }
- /*
- * All of our comparisons will be of the c1 entry against the c2 one,
- * so assign them appropriately to take care of the ordering.
- */
- if ((*e1)->ascending) {
- c1 = *e1;
- c2 = *e2;
- }
- else {
- c1 = *e2;
- c2 = *e1;
- }
-
- switch (c1->key) {
- case K_LAST_MOD:
- if (c1->lm > c2->lm) {
- return 1;
- }
- else if (c1->lm < c2->lm) {
- return -1;
- }
- break;
- case K_SIZE:
- if (c1->size > c2->size) {
- return 1;
- }
- else if (c1->size < c2->size) {
- return -1;
- }
- break;
- case K_DESC:
- if (c1->version_sort) {
- result = apr_strnatcmp(c1->desc ? c1->desc : "",
- c2->desc ? c2->desc : "");
- }
- else {
- result = strcmp(c1->desc ? c1->desc : "",
- c2->desc ? c2->desc : "");
- }
- if (result) {
- return result;
- }
- break;
- }
-
- /* names may identical when treated case-insensitively,
- * so always fall back on strcmp() flavors to put entries
- * in deterministic order. This means that 'ABC' and 'abc'
- * will always appear in the same order, rather than
- * variably between 'ABC abc' and 'abc ABC' order.
- */
-
- if (c1->version_sort) {
- if (c1->ignore_case) {
- result = apr_strnatcasecmp (c1->name, c2->name);
- }
- if (!result) {
- result = apr_strnatcmp(c1->name, c2->name);
- }
- }
-
- /* The names may be identical in respects other other than
- * filename case when strnatcmp is used above, so fall back
- * to strcmp on conflicts so that fn1.01.zzz and fn1.1.zzz
- * are also sorted in a deterministic order.
- */
-
- if (!result && c1->ignore_case) {
- result = strcasecmp (c1->name, c2->name);
- }
-
- if (!result) {
- result = strcmp (c1->name, c2->name);
- }
-
- return result;
-}
-
-
-static int
-mod_glfs_index_directory (request_rec *r,
- glusterfs_dir_config_t *autoindex_conf)
-{
- char *title_name = NULL, *title_endp = NULL;
- char *pstring = NULL, *colargs = NULL;
- char *path = NULL, *fname = NULL, *charset = NULL;
- char *fullpath = NULL, *name = NULL, *ctype = NULL;
- apr_finfo_t dirent;
- glusterfs_file_t fd = NULL;
- apr_status_t status = APR_SUCCESS;
- int num_ent = 0, x;
- struct ent *head = NULL, *p = NULL;
- struct ent **ar = NULL;
- const char *qstring = NULL;
- apr_int32_t autoindex_opts = autoindex_conf->opts;
- char keyid, direction;
- apr_size_t dirpathlen;
- glusterfs_dir_config_t *dir_config = NULL;
- int ret = -1;
- struct dirent *entry = NULL;
- struct stat st = {0, };
-
- name = r->filename;
- title_name = ap_escape_html(r->pool, r->uri);
- ctype = "text/html";
- dir_config = mod_glfs_dconfig (r);
- if (dir_config == NULL) {
- return HTTP_INTERNAL_SERVER_ERROR;
- }
-
- path = r->uri;
- fd = glusterfs_open (path, O_RDONLY, 0);
- if (fd == 0) {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "file permissions deny server access: %s",
- r->filename);
- return HTTP_FORBIDDEN;
- }
-
- if (autoindex_conf->ctype) {
- ctype = autoindex_conf->ctype;
- }
- if (autoindex_conf->charset) {
- charset = autoindex_conf->charset;
- }
- else {
-#if APR_HAS_UNICODE_FS
- charset = "UTF-8";
-#else
- charset = "ISO-8859-1";
-#endif
- }
- if (*charset) {
- ap_set_content_type(r, apr_pstrcat(r->pool, ctype, ";charset=",
- charset, NULL));
- }
- else {
- ap_set_content_type(r, ctype);
- }
-
- if (autoindex_opts & TRACK_MODIFIED) {
- ap_update_mtime(r, r->finfo.mtime);
- ap_set_last_modified(r);
- ap_set_etag(r);
- }
- if (r->header_only) {
- glusterfs_close (fd);
- return 0;
- }
-
- /*
- * If there is no specific ordering defined for this directory,
- * default to ascending by filename.
- */
- keyid = autoindex_conf->default_keyid
- ? autoindex_conf->default_keyid : K_NAME;
- direction = autoindex_conf->default_direction
- ? autoindex_conf->default_direction : D_ASCENDING;
-
- /*
- * Figure out what sort of indexing (if any) we're supposed to use.
- *
- * If no QUERY_STRING was specified or client query strings have been
- * explicitly disabled.
- * If we are ignoring the client, suppress column sorting as well.
- */
- if (autoindex_opts & IGNORE_CLIENT) {
- qstring = NULL;
- autoindex_opts |= SUPPRESS_COLSORT;
- colargs = "";
- }
- else {
- char fval[5], vval[5], *ppre = "", *epattern = "";
- fval[0] = '\0'; vval[0] = '\0';
- qstring = r->args;
-
- while (qstring && *qstring) {
-
- /* C= First Sort key Column (N, M, S, D) */
- if ( qstring[0] == 'C' && qstring[1] == '='
- && qstring[2] && strchr(K_VALID, qstring[2])
- && ( qstring[3] == '&' || qstring[3] == ';'
- || !qstring[3])) {
- keyid = qstring[2];
- qstring += qstring[3] ? 4 : 3;
- }
-
- /* O= Sort order (A, D) */
- else if ( qstring[0] == 'O' && qstring[1] == '='
- && ( (qstring[2] == D_ASCENDING)
- || (qstring[2] == D_DESCENDING))
- && ( qstring[3] == '&'
- || qstring[3] == ';'
- || !qstring[3])) {
- direction = qstring[2];
- qstring += qstring[3] ? 4 : 3;
- }
-
- /* F= Output Format (0 plain, 1 fancy (pre), 2 table) */
- else if ( qstring[0] == 'F' && qstring[1] == '='
- && qstring[2] && strchr("012", qstring[2])
- && ( qstring[3] == '&' || qstring[3] == ';'
- || !qstring[3])) {
- if (qstring[2] == '0') {
- autoindex_opts &= ~(FANCY_INDEXING
- | TABLE_INDEXING);
- }
- else if (qstring[2] == '1') {
- autoindex_opts = (autoindex_opts
- | FANCY_INDEXING)
- & ~TABLE_INDEXING;
- }
- else if (qstring[2] == '2') {
- autoindex_opts |= FANCY_INDEXING
- | TABLE_INDEXING;
- }
- strcpy(fval, ";F= ");
- fval[3] = qstring[2];
- qstring += qstring[3] ? 4 : 3;
- }
-
- /* V= Version sort (0, 1) */
- else if ( qstring[0] == 'V' && qstring[1] == '='
- && (qstring[2] == '0' || qstring[2] == '1')
- && ( qstring[3] == '&' || qstring[3] == ';'
- || !qstring[3])) {
- if (qstring[2] == '0') {
- autoindex_opts &= ~VERSION_SORT;
- }
- else if (qstring[2] == '1') {
- autoindex_opts |= VERSION_SORT;
- }
- strcpy(vval, ";V= ");
- vval[3] = qstring[2];
- qstring += qstring[3] ? 4 : 3;
- }
-
- /* P= wildcard pattern (*.foo) */
- else if (qstring[0] == 'P' && qstring[1] == '=') {
- const char *eos = qstring += 2; /* for efficiency */
-
- while (*eos && *eos != '&' && *eos != ';') {
- ++eos;
- }
-
- if (eos == qstring) {
- pstring = NULL;
- }
- else {
- pstring = apr_pstrndup(r->pool, qstring,
- eos - qstring);
- if (ap_unescape_url(pstring) != OK) {
- /* ignore the pattern, if it's bad. */
- pstring = NULL;
- }
- else {
- ppre = ";P=";
- /* be correct */
- epattern = ap_escape_uri(r->pool,
- pstring);
- }
- }
-
- if (*eos && *++eos) {
- qstring = eos;
- }
- else {
- qstring = NULL;
- }
- }
-
- /* Syntax error? Ignore the remainder! */
- else {
- qstring = NULL;
- }
- }
- colargs = apr_pstrcat(r->pool, fval, vval, ppre, epattern,
- NULL);
- }
-
- /* Spew HTML preamble */
- title_endp = title_name + strlen(title_name) - 1;
-
- while (title_endp > title_name && *title_endp == '/') {
- *title_endp-- = '\0';
- }
-
- emit_head(r, find_header(autoindex_conf, r),
- autoindex_opts & SUPPRESS_PREAMBLE,
- autoindex_opts & EMIT_XHTML, title_name);
-
- /*
- * Since we don't know how many dir. entries there are, put them into a
- * linked list and then arrayificate them so qsort can use them.
- */
- head = NULL;
- p = make_parent_entry(autoindex_opts, autoindex_conf, r, keyid,
- direction);
- if (p != NULL) {
- p->next = head;
- head = p;
- num_ent++;
- }
- fullpath = apr_palloc(r->pool, APR_PATH_MAX);
- dirpathlen = strlen(name);
- memcpy(fullpath, name, dirpathlen);
-
- do {
- entry = glusterfs_readdir (fd);
- if (entry == NULL) {
- break;
- }
-
- fname = apr_pstrcat (r->pool, path, entry->d_name, NULL);
-
- ret = glusterfs_stat (fname, &st);
- if (ret != 0) {
- break;
- }
-
- dirent.fname = fname;
- dirent.name = apr_pstrdup (r->pool, entry->d_name);
- fill_out_finfo (&dirent, &st,
- APR_FINFO_MIN | APR_FINFO_IDENT
- | APR_FINFO_NLINK | APR_FINFO_OWNER
- | APR_FINFO_PROT);
-
- p = make_autoindex_entry(&dirent, autoindex_opts,
- autoindex_conf, r,
- keyid, direction, pstring);
- if (p != NULL) {
- p->next = head;
- head = p;
- num_ent++;
- }
- } while (1);
-
- if (num_ent > 0) {
- ar = (struct ent **) apr_palloc(r->pool,
- num_ent * sizeof(struct ent *));
- p = head;
- x = 0;
- while (p) {
- ar[x++] = p;
- p = p->next;
- }
-
- qsort((void *) ar, num_ent, sizeof(struct ent *),
- (int (*)(const void *, const void *)) dsortf);
- }
- output_directories(ar, num_ent, autoindex_conf, r, autoindex_opts,
- keyid, direction, colargs);
- glusterfs_close (fd);
-
- emit_tail(r, find_readme(autoindex_conf, r),
- autoindex_opts & SUPPRESS_PREAMBLE);
-
- return 0;
-}
-
-
-static int
-handle_autoindex(request_rec *r)
-{
- glusterfs_dir_config_t *dir_config = NULL;
- int allow_opts;
-
- allow_opts = ap_allow_options(r);
-
- r->allowed |= (AP_METHOD_BIT << M_GET);
- if (r->method_number != M_GET) {
- return DECLINED;
- }
-
- dir_config = mod_glfs_dconfig (r);
-
- /* OK, nothing easy. Trot out the heavy artillery... */
-
- if (allow_opts & OPT_INDEXES) {
- int errstatus;
-
- if ((errstatus = ap_discard_request_body(r)) != OK) {
- return errstatus;
- }
-
- /* KLUDGE --- make the sub_req lookups happen in the right
- * directory. Fixing this in the sub_req_lookup functions
- * themselves is difficult, and would probably break
- * virtual includes...
- */
-
- if (r->filename[strlen(r->filename) - 1] != '/') {
- r->filename = apr_pstrcat(r->pool, r->filename, "/",
- NULL);
- }
- return mod_glfs_index_directory(r, dir_config);
- } else {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "Directory index forbidden by "
- "Options directive: %s", r->filename);
- return HTTP_FORBIDDEN;
- }
-}
-
-
-static int
-mod_glfs_handler (request_rec *r)
-{
- conn_rec *c = r->connection;
- apr_bucket_brigade *bb;
- apr_bucket *e;
- core_dir_config *d;
- int errstatus;
- glusterfs_file_t fd = NULL;
- apr_status_t status;
- glusterfs_dir_config_t *dir_config = NULL;
- char *path = NULL;
- int num_ranges = 0;
- apr_size_t size = 0;
- apr_off_t range_start = 0, range_end = 0;
- char *current = NULL;
- apr_status_t rv = 0;
- core_request_config *req_cfg = NULL;
-
- /* XXX if/when somebody writes a content-md5 filter we either need to
- * remove this support or coordinate when to use the filter vs.
- * when to use this code
- * The current choice of when to compute the md5 here matches the 1.3
- * support fairly closely (unlike 1.3, we don't handle computing md5
- * when the charset is translated).
- */
-
- int bld_content_md5;
- if (!r->handler || (r->handler
- && strcmp (r->handler, GLUSTERFS_HANDLER)))
- return DECLINED;
-
- if (r->uri[0] == '\0') {
- return DECLINED;
- }
-
- if (r->finfo.filetype == APR_DIR) {
- return handle_autoindex (r);
- }
-
- dir_config = mod_glfs_dconfig (r);
-
- ap_allow_standard_methods(r, MERGE_ALLOW, M_GET, -1);
-
- /* We understood the (non-GET) method, but it might not be legal for
- this particular resource. Check to see if the 'deliver_script'
- flag is set. If so, then we go ahead and deliver the file since
- it isn't really content (only GET normally returns content).
-
- Note: based on logic further above, the only possible non-GET
- method at this point is POST. In the future, we should enable
- script delivery for all methods. */
- if (r->method_number != M_GET) {
- req_cfg = ap_get_module_config(r->request_config, &core_module);
- if (!req_cfg->deliver_script) {
- /* The flag hasn't been set for this request. Punt. */
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "This resource does not accept the %s "
- "method.",
- r->method);
- return HTTP_METHOD_NOT_ALLOWED;
- }
- }
-
- d = (core_dir_config *)ap_get_module_config(r->per_dir_config,
- &core_module);
- bld_content_md5 = (d->content_md5 & 1)
- && r->output_filters->frec->ftype != AP_FTYPE_RESOURCE;
-
- if ((errstatus = ap_discard_request_body(r)) != OK) {
- return errstatus;
- }
-
- if (r->finfo.filetype == 0) {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "File does not exist: %s", r->filename);
- return HTTP_NOT_FOUND;
- }
-
- if ((r->used_path_info != AP_REQ_ACCEPT_PATH_INFO) &&
- r->path_info && *r->path_info)
- {
- /* default to reject */
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "File does not exist: %s",
- apr_pstrcat(r->pool, r->filename, r->path_info,
- NULL));
- return HTTP_NOT_FOUND;
- }
-
- ap_update_mtime (r, r->finfo.mtime);
- ap_set_last_modified (r);
- ap_set_etag (r);
- apr_table_setn (r->headers_out, "Accept-Ranges", "bytes");
-
- num_ranges = ap_set_byterange(r);
- if (num_ranges == 0) {
- size = r->finfo.size;
- } else {
- char *tmp = apr_pstrdup (r->pool, r->range);
- while ((current = ap_getword(r->pool, (const char **)&tmp, ','))
- && (rv = parse_byterange(current, r->finfo.size,
- &range_start, &range_end))) {
- size += (range_end - range_start);
- }
- }
-
- ap_set_content_length (r, size);
-
- if ((errstatus = ap_meets_conditions(r)) != OK) {
- r->status = errstatus;
- }
-
- /*
- * file is small enough to have already got the content in
- * glusterfs_lookup
- */
- if (r->finfo.size <= dir_config->xattr_file_size && dir_config->buf) {
- if (bld_content_md5) {
- apr_table_setn (r->headers_out, "Content-MD5",
- (const char *)ap_md5_binary(r->pool,
- dir_config->buf
- , r->finfo.size));
- }
-
- ap_log_rerror (APLOG_MARK, APLOG_NOTICE, 0, r,
- "fetching data from glusterfs through xattr "
- "interface\n");
-
- bb = apr_brigade_create(r->pool, c->bucket_alloc);
-
- e = apr_bucket_heap_create (dir_config->buf, r->finfo.size,
- free, c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL (bb, e);
-
- e = apr_bucket_eos_create(c->bucket_alloc);
- APR_BRIGADE_INSERT_TAIL(bb, e);
-
- dir_config->buf = NULL;
-
- /* let the byterange_filter handle multipart requests */
- status = ap_pass_brigade(r->output_filters, bb);
- if (status == APR_SUCCESS
- || r->status != HTTP_OK
- || c->aborted) {
- return OK;
- }
- else {
- /* no way to know what type of error occurred */
- ap_log_rerror(APLOG_MARK, APLOG_DEBUG, status, r,
- "mod_glfs_handler: ap_pass_brigade "
- "returned %i",
- status);
- return HTTP_INTERNAL_SERVER_ERROR;
- }
- }
-
- /* do standard open/read/close to fetch content */
- path = r->uri;
-
- fd = glusterfs_open (path, O_RDONLY, 0);
- if (fd == 0) {
- ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
- "file permissions deny server access: %s",
- r->filename);
- return HTTP_FORBIDDEN;
- }
-
- /*
- * byterange_filter cannot handle range requests, since we are not
- * sending the whole data in a single brigade
- */
-
-
- if (num_ranges == 0) {
- mod_glfs_read_async (r, NULL, fd, 0, -1);
- } else {
- mod_glfs_handle_byte_ranges (r, fd, num_ranges);
- }
-
- glusterfs_close (fd);
-}
-
-
-#if 0
-static apr_status_t
-mod_glfs_output_filter (ap_filter_t *f,
- apr_bucket_brigade *b)
-{
- size_t size = 0;
- apr_bucket_t *e = NULL;
- size = atol (apr_table_get (r->notes, MOD_GLFS_SIZE));
-
- for (e = APR_BRIGADE_FIRST(b);
- e != APR_BRIGADE_SENTINEL(b);
- e = APR_BUCKET_NEXT(e))
- {
- /* FIXME: can there be more than one heap buckets? */
- if (e->type == &apr_bucket_type_heap) {
- break;
- }
- }
-
- if (e != APR_BRIGADE_SENTINEL(b)) {
- e->length = size;
- }
-
- return ap_pass_brigade (f->next, b);
-}
-#endif
-
-static int
-mod_glfs_fixup_dir(request_rec *r)
-{
- glusterfs_dir_config_t *d = NULL;
- char *dummy_ptr[1];
- char **names_ptr = NULL, *name_ptr = NULL;
- int num_names;
- int error_notfound = 0;
- char *ifile = NULL;
- request_rec *rr = NULL;
-
- /* only handle requests against directories */
- if (r->finfo.filetype != APR_DIR) {
- return DECLINED;
- }
-
- if (!r->handler || strcmp (r->handler, GLUSTERFS_HANDLER)) {
- return DECLINED;
- }
-
- /* Never tolerate path_info on dir requests */
- if (r->path_info && *r->path_info) {
- return DECLINED;
- }
-
- d = (glusterfs_dir_config_t *)ap_get_module_config(r->per_dir_config,
- &glusterfs_module);
-
- /* Redirect requests that are not '/' terminated */
- if (r->uri[0] == '\0' || r->uri[strlen(r->uri) - 1] != '/')
- {
- if (!d->do_slash) {
- return DECLINED;
- }
-
- /* Only redirect non-get requests if we have no note to warn
- * that this browser cannot handle redirs on non-GET requests
- * (such as Microsoft's WebFolders).
- */
- if ((r->method_number != M_GET)
- && apr_table_get(r->subprocess_env, "redirect-carefully")) {
- return DECLINED;
- }
-
- if (r->args != NULL) {
- ifile = apr_pstrcat(r->pool, ap_escape_uri(r->pool,
- r->uri),
- "/", "?", r->args, NULL);
- }
- else {
- ifile = apr_pstrcat(r->pool, ap_escape_uri(r->pool,
- r->uri),
- "/", NULL);
- }
-
- apr_table_setn(r->headers_out, "Location",
- ap_construct_url(r->pool, ifile, r));
- return HTTP_MOVED_PERMANENTLY;
- }
-
- if (d->index_names) {
- names_ptr = (char **)d->index_names->elts;
- num_names = d->index_names->nelts;
- }
- else {
- dummy_ptr[0] = AP_DEFAULT_INDEX;
- names_ptr = dummy_ptr;
- num_names = 1;
- }
-
- for (; num_names; ++names_ptr, --num_names) {
- /* XXX: Is this name_ptr considered escaped yet, or not??? */
- name_ptr = *names_ptr;
-
- /* Once upon a time args were handled _after_ the successful
- * redirect. But that redirect might then _refuse_ the
- * given r->args, creating a nasty tangle. It seems safer to
- * consider the r->args while we determine if name_ptr is our
- * viable index, and therefore set them up correctly on redirect.
- */
- if (r->args != NULL) {
- name_ptr = apr_pstrcat(r->pool, name_ptr, "?", r->args,
- NULL);
- }
-
- rr = ap_sub_req_lookup_uri(name_ptr, r, NULL);
-
- /* The sub request lookup is very liberal, and the core
- * map_to_storage handler will almost always result in HTTP_OK
- * as /foo/index.html may be /foo with PATH_INFO="/index.html",
- * or even / with PATH_INFO="/foo/index.html". To get around
- * this we insist that the the index be a regular filetype.
- *
- * Another reason is that the core handler also makes the
- * assumption that if r->finfo is still NULL by the time it
- * gets called, the file does not exist.
- */
- if (rr->status == HTTP_OK
- && ( (rr->handler && !strcmp(rr->handler, "proxy-server"))
- || rr->finfo.filetype == APR_REG)) {
- ap_internal_fast_redirect(rr, r);
- return OK;
- }
-
- /* If the request returned a redirect, propagate it to the
- * client
- */
-
- if (ap_is_HTTP_REDIRECT(rr->status)
- || (rr->status == HTTP_NOT_ACCEPTABLE && num_names == 1)
- || (rr->status == HTTP_UNAUTHORIZED && num_names == 1)) {
-
- apr_pool_join(r->pool, rr->pool);
- error_notfound = rr->status;
- r->notes = apr_table_overlay(r->pool, r->notes,
- rr->notes);
- r->headers_out = apr_table_overlay(r->pool,
- r->headers_out,
- rr->headers_out);
- r->err_headers_out = apr_table_overlay(r->pool,
- r->err_headers_out,
- rr->err_headers_out);
- return error_notfound;
- }
-
- /* If the request returned something other than 404 (or 200),
- * it means the module encountered some sort of problem. To be
- * secure, we should return the error, rather than allow
- * autoindex to create a (possibly unsafe) directory index.
- *
- * So we store the error, and if none of the listed files
- * exist, we return the last error response we got, instead
- * of a directory listing.
- */
- if (rr->status && rr->status != HTTP_NOT_FOUND
- && rr->status != HTTP_OK) {
- error_notfound = rr->status;
- }
-
- ap_destroy_sub_req(rr);
- }
-
- if (error_notfound) {
- return error_notfound;
- }
-
- /* nothing for us to do, pass on through */
- return DECLINED;
-}
-
-
-static void
-mod_glfs_register_hooks(apr_pool_t *p)
-{
- ap_hook_child_init (mod_glfs_child_init, NULL, NULL, APR_HOOK_MIDDLE);
- ap_hook_handler (mod_glfs_handler, NULL, NULL, APR_HOOK_REALLY_FIRST);
- ap_hook_map_to_storage (mod_glfs_map_to_storage, NULL, NULL,
- APR_HOOK_REALLY_FIRST);
- ap_hook_fixups(mod_glfs_fixup_dir,NULL,NULL,APR_HOOK_LAST);
-
-/* mod_glfs_output_filter_handle =
- ap_register_output_filter ("MODGLFS", mod_glfs_output_filter,
- NULL, AP_FTYPE_PROTOCOL); */
-}
-
-static const char *
-cmd_add_index (cmd_parms *cmd, void *dummy, const char *arg)
-{
- glusterfs_dir_config_t *d = dummy;
-
- if (!d->index_names) {
- d->index_names = apr_array_make(cmd->pool, 2, sizeof(char *));
- }
- *(const char **)apr_array_push(d->index_names) = arg;
- return NULL;
-}
-
-static const char *
-cmd_configure_slash (cmd_parms *cmd, void *d_, int arg)
-{
- glusterfs_dir_config_t *d = d_;
-
- d->do_slash = arg ? SLASH_ON : SLASH_OFF;
- return NULL;
-}
-
-#define DIR_CMD_PERMS OR_INDEXES
-
-static const
-command_rec mod_glfs_cmds[] =
-{
- AP_INIT_TAKE1(
- "GlusterfsLogfile",
- cmd_add_logfile,
- NULL,
- ACCESS_CONF, /*FIXME: allow overriding in .htaccess files */
- "Glusterfs logfile"
- ),
-
- AP_INIT_TAKE1(
- "GlusterfsLoglevel",
- cmd_set_loglevel,
- NULL,
- ACCESS_CONF,
- "Glusterfs loglevel:anyone of none, critical, error, warning, "
- "debug"
- ),
-
- AP_INIT_TAKE1(
- "GlusterfsCacheTimeout",
- cmd_set_cache_timeout,
- NULL,
- ACCESS_CONF,
- "Timeout value in seconds for lookup and stat cache of "
- "libglusterfsclient"
- ),
-
- AP_INIT_TAKE1(
- "GlusterfsVolumeSpecfile",
- cmd_add_volume_specfile,
- NULL,
- ACCESS_CONF,
- "Glusterfs Volume specfication file specifying filesystem "
- "under this directory"
- ),
-
- AP_INIT_TAKE1(
- "GlusterfsXattrFileSize",
- cmd_add_xattr_file_size,
- NULL,
- ACCESS_CONF,
- "Maximum size of the file that can be fetched through "
- "extended attribute interface of libglusterfsclient"
- ),
-
- /* mod_dir cmds */
- AP_INIT_ITERATE("DirectoryIndex", cmd_add_index,
- NULL, DIR_CMD_PERMS,
- "a list of file names"),
-
- AP_INIT_FLAG("DirectorySlash", cmd_configure_slash,
- NULL, DIR_CMD_PERMS,
- "On or Off"),
-
- /* autoindex cmds */
- AP_INIT_ITERATE2("AddIcon", cmd_add_icon,
- BY_PATH, DIR_CMD_PERMS,
- "an icon URL followed by one or more filenames"),
-
- AP_INIT_ITERATE2("AddIconByType", cmd_add_icon,
- BY_TYPE, DIR_CMD_PERMS,
- "an icon URL followed by one or more MIME types"),
-
- AP_INIT_ITERATE2("AddIconByEncoding", cmd_add_icon,
- BY_ENCODING, DIR_CMD_PERMS,
- "an icon URL followed by one or more content encodings"),
-
- AP_INIT_ITERATE2("AddAlt", cmd_add_alt, BY_PATH,
- DIR_CMD_PERMS,
- "alternate descriptive text followed by one or more "
- "filenames"),
-
- AP_INIT_ITERATE2("AddAltByType", cmd_add_alt,
- BY_TYPE, DIR_CMD_PERMS,
- "alternate descriptive text followed by one or more "
- "MIME types"),
-
- AP_INIT_ITERATE2("AddAltByEncoding", cmd_add_alt,
- BY_ENCODING, DIR_CMD_PERMS,
- "alternate descriptive text followed by one or more "
- "content encodings"),
-
- AP_INIT_TAKE_ARGV("IndexOptions", cmd_add_opts,
- NULL, DIR_CMD_PERMS,
- "one or more index options [+|-][]"),
-
- AP_INIT_TAKE2("IndexOrderDefault", cmd_set_default_order,
- NULL, DIR_CMD_PERMS,
- "{Ascending,Descending} {Name,Size,Description,Date}"),
-
- AP_INIT_ITERATE("IndexIgnore", cmd_add_ignore,
- NULL, DIR_CMD_PERMS,
- "one or more file extensions"),
-
- AP_INIT_ITERATE2("AddDescription", cmd_add_desc,
- BY_PATH, DIR_CMD_PERMS,
- "Descriptive text followed by one or more filenames"),
-
- AP_INIT_TAKE1("HeaderName", cmd_add_header,
- NULL, DIR_CMD_PERMS,
- "a filename"),
-
- AP_INIT_TAKE1("ReadmeName", cmd_add_readme,
- NULL, DIR_CMD_PERMS,
- "a filename"),
-
- AP_INIT_RAW_ARGS("FancyIndexing", ap_set_deprecated,
- NULL, OR_ALL,
- "The FancyIndexing directive is no longer supported. "
- "Use IndexOptions FancyIndexing."),
-
- AP_INIT_TAKE1("DefaultIcon", ap_set_string_slot,
- (void *)APR_OFFSETOF(glusterfs_dir_config_t,
- default_icon),
- DIR_CMD_PERMS, "an icon URL"),
-
- AP_INIT_TAKE1("IndexStyleSheet", ap_set_string_slot,
- (void *)APR_OFFSETOF(glusterfs_dir_config_t, style_sheet),
- DIR_CMD_PERMS, "URL to style sheet"),
-
- {NULL}
-};
-
-module AP_MODULE_DECLARE_DATA glusterfs_module =
-{
- STANDARD20_MODULE_STUFF,
- mod_glfs_create_dir_config,
- mod_glfs_merge_dir_config,
- NULL, //mod_glfs_create_server_config,
- NULL, //mod_glfs_merge_server_config,
- mod_glfs_cmds,
- mod_glfs_register_hooks,
-};
diff --git a/mod_glusterfs/apache/Makefile.am b/mod_glusterfs/apache/Makefile.am
deleted file mode 100644
index bda03931052..00000000000
--- a/mod_glusterfs/apache/Makefile.am
+++ /dev/null
@@ -1,10 +0,0 @@
-SUBDIRS = $(MOD_GLUSTERFS_HTTPD_VERSION)
-
-EXTRA_DIST = 1.3/Makefile.am 1.3/Makefile.in \
- 1.3/src/Makefile.am 1.3/src/Makefile.in \
- 1.3/src/mod_glusterfs.c \
- 1.3/src/README.txt \
- 2.2/Makefile.am 2.2/Makefile.in \
- 2.2/src/Makefile.am 2.2/src/Makefile.in \
- 2.2/src/mod_glusterfs.c
-CLEANFILES =
diff --git a/mod_glusterfs/lighttpd/1.4/Makefile.am b/mod_glusterfs/lighttpd/1.4/Makefile.am
deleted file mode 100644
index eda329111dc..00000000000
--- a/mod_glusterfs/lighttpd/1.4/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-EXTRA_DIST = Makefile.am.diff mod_glusterfs.c mod_glusterfs.h README.txt
-
-CLEANFILES =
diff --git a/mod_glusterfs/lighttpd/1.4/Makefile.am.diff b/mod_glusterfs/lighttpd/1.4/Makefile.am.diff
deleted file mode 100644
index 375696b5d8f..00000000000
--- a/mod_glusterfs/lighttpd/1.4/Makefile.am.diff
+++ /dev/null
@@ -1,29 +0,0 @@
---- lighttpd-1.4.19/src/Makefile.am 2008-04-16 18:42:18.000000000 +0400
-+++ lighttpd-1.4.19.mod/src/Makefile.am 2008-04-16 18:41:11.000000000 +0400
-@@ -1,4 +1,4 @@
--AM_CFLAGS = $(FAM_CFLAGS)
-+AM_CFLAGS = $(FAM_CFLAGS) -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64
-
- noinst_PROGRAMS=proc_open lemon # simple-fcgi #graphic evalo bench ajp ssl error_test adserver gen-license
- sbin_PROGRAMS=lighttpd lighttpd-angel
-@@ -241,6 +241,11 @@
- mod_accesslog_la_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined
- mod_accesslog_la_LIBADD = $(common_libadd)
-
-+lib_LTLIBRARIES += mod_glusterfs.la
-+mod_glusterfs_la_SOURCES = mod_glusterfs.c
-+mod_glusterfs_la_CFLAGS = $(AM_CFLAGS)
-+mod_glusterfs_la_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined -lglusterfsclient -lpthread
-+mod_glusterfs_la_LIBADD = $(common_libadd)
-
- hdr = server.h buffer.h network.h log.h keyvalue.h \
- response.h request.h fastcgi.h chunk.h \
-@@ -254,7 +259,7 @@
- configparser.h mod_ssi_exprparser.h \
- sys-mmap.h sys-socket.h mod_cml.h mod_cml_funcs.h \
- splaytree.h proc_open.h status_counter.h \
-- mod_magnet_cache.h
-+ mod_magnet_cache.h mod_glusterfs.h
-
- DEFS= @DEFS@ -DLIBRARY_DIR="\"$(libdir)\"" -DSBIN_DIR="\"$(sbindir)\""
-
diff --git a/mod_glusterfs/lighttpd/1.4/README.txt b/mod_glusterfs/lighttpd/1.4/README.txt
deleted file mode 100644
index 786a146e44d..00000000000
--- a/mod_glusterfs/lighttpd/1.4/README.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-Introduction
-============
-mod_glusterfs is a module written for lighttpd to speed up the access of files present on glusterfs. mod_glusterfs uses libglusterfsclient library provided for glusterfs and hence can be used without fuse (File System in User Space).
-
-Usage
-=====
-To use mod_glusterfs with lighttpd-1.4, copy mod_glusterfs.c and mod_glusterfs.h into src/ of lighttpd-1.4 source tree, and apply the Makefile.am.diff to src/Makefile.am. Re-run ./autogen.sh on the top level of the lighttpd-1.4 build tree and recompile.
-
-# cp mod_glusterfs.[ch] /home/glusterfs/lighttpd-1.4/src/
-# cp Makefile.am.diff /home/glusterfs/lighttpd-1.4/
-# cd /home/glusterfs/lighttpd-1.4
-# patch -p1 < Makefile.am.diff
-# ./autogen.sh
-# ./configure
-# make
-# make install
-
-Configuration
-=============
-* mod_glusterfs should be listed at the begining of the list server.modules in lighttpd.conf.
-
-Below is a snippet from lighttpd.conf concerning to mod_glusterfs.
-
-$HTTP["url"] =~ "^/glusterfs" {
- glusterfs.prefix = "/glusterfs"
- glusterfs.document-root = "/home/glusterfs/document-root"
- glusterfs.logfile = "/var/log/glusterfs-logfile"
- glusterfs.volume-specfile = "/etc/glusterfs/glusterfs.vol"
- glusterfs.loglevel = "error"
- glusterfs.cache-timeout = 300
- glusterfs.xattr-interface-size-limit = "65536"
-}
-
-* $HTTP["url"] =~ "^/glusterfs"
- A perl style regular expression used to match against the url. If regular expression matches the url, the url is handled by mod_glusterfs. Note that the pattern given here should match glusterfs.prefix.
-
-* glusterfs.prefix (COMPULSORY)
- A string to be present at the starting of the file path in the url so that the file would be handled by glusterfs.
- Eg., A GET request on the url http://www.example.com/glusterfs-prefix/some-dir/example-file will result in fetching of the file "/some-dir/example-file" from glusterfs mount if glusterfs.prefix is set to "/glusterfs-prefix".
-
-* glusterfs.volume-specfile (COMPULSORY)
- Path to the the glusterfs volume specification file.
-
-* glusterfs.logfile (COMPULSORY)
- Path to the glusterfs logfile.
-
-* glusterfs.loglevel (OPTIONAL, default = warning)
- Allowed values are critical, error, warning, debug, none in the decreasing order of severity of error conditions.
-
-* glusterfs.cache-timeout (OPTIONAL, default = 0)
- Timeout values for glusterfs stat and lookup cache.
-
-* glusterfs.document-root (COMPULSORY)
- An absolute path, relative to which all the files are fetched from glusterfs.
-
-* glusterfs.xattr-interface-size-limit (OPTIONAL, default = 0)
- Files with sizes upto and including this value are fetched through the extended attribute interface of glusterfs rather than the usual open-read-close set of operations. For files of small sizes, it is recommended to use extended attribute interface.
diff --git a/mod_glusterfs/lighttpd/1.4/mod_glusterfs.c b/mod_glusterfs/lighttpd/1.4/mod_glusterfs.c
deleted file mode 100644
index 295c9704c92..00000000000
--- a/mod_glusterfs/lighttpd/1.4/mod_glusterfs.c
+++ /dev/null
@@ -1,1820 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <ctype.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <pthread.h>
-#include <sys/types.h>
-#include <fcntl.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <errno.h>
-#include <unistd.h>
-#include <assert.h>
-
-#include "base.h"
-#include "log.h"
-#include "buffer.h"
-
-#include "plugin.h"
-
-#include "stat_cache.h"
-#include "mod_glusterfs.h"
-#include "etag.h"
-#include "http_chunk.h"
-#include "response.h"
-
-#include "fdevent.h"
-#include <libglusterfsclient.h>
-
-#ifdef HAVE_ATTR_ATTRIBUTES_H
-#include <attr/attributes.h>
-#endif
-
-#ifdef HAVE_FAM_H
-# include <fam.h>
-#endif
-
-#include "sys-mmap.h"
-
-/* NetBSD 1.3.x needs it */
-#ifndef MAP_FAILED
-# define MAP_FAILED -1
-#endif
-
-#ifndef O_LARGEFILE
-# define O_LARGEFILE 0
-#endif
-
-#ifndef HAVE_LSTAT
-#define lstat stat
-#endif
-
-#if 0
-/*
- enables debug code for testing if all nodes in the stat-cache as accessable
-*/
-#define DEBUG_STAT_CACHE
-#endif
-
-#ifdef HAVE_LSTAT
-#undef HAVE_LSTAT
-#endif
-
-#define GLUSTERFS_FILE_CHUNK (FILE_CHUNK + 1)
-
-/*
- Keep this value large. Each glusterfs_async_read of GLUSTERFS_CHUNK_SIZE
- results in a network_backend_write of the read data
-*/
-
-#define GLUSTERFS_CHUNK_SIZE 8192
-
-/**
- * this is a staticfile for a lighttpd plugin
- *
- */
-
-typedef struct glusterfs_async_local {
- int op_ret;
- int op_errno;
- char async_read_complete;
- off_t length;
- size_t read_bytes;
- glusterfs_iobuf_t *buf;
- pthread_mutex_t lock;
- pthread_cond_t cond;
-} glusterfs_async_local_t;
-
-
-typedef struct {
- glusterfs_file_t fd;
- void *buf;
- buffer *glusterfs_path;
- /* off_t response_content_length; */
- int prefix;
-}mod_glusterfs_ctx_t;
-
-/* plugin config for all request/connections */
-typedef struct {
- buffer *logfile;
- buffer *loglevel;
- buffer *specfile;
- buffer *prefix;
- buffer *xattr_file_size;
- buffer *document_root;
- array *exclude_exts;
- unsigned short cache_timeout;
- char mounted;
-} plugin_config;
-
-static int (*network_backend_write)(struct server *srv, connection *con, int fd,
- chunkqueue *cq);
-
-typedef struct {
- PLUGIN_DATA;
- buffer *range_buf;
- plugin_config **config_storage;
-
- plugin_config conf;
-} plugin_data;
-
-typedef struct {
- chunkqueue *cq;
- glusterfs_iobuf_t *buf;
- size_t length;
-}mod_glusterfs_chunkqueue;
-
-#ifdef HAVE_FAM_H
-typedef struct {
- FAMRequest *req;
- FAMConnection *fc;
-
- buffer *name;
-
- int version;
-} fam_dir_entry;
-#endif
-
-/* the directory name is too long to always compare on it
- * - we need a hash
- * - the hash-key is used as sorting criteria for a tree
- * - a splay-tree is used as we can use the caching effect of it
- */
-
-/* we want to cleanup the stat-cache every few seconds, let's say 10
- *
- * - remove entries which are outdated since 30s
- * - remove entries which are fresh but havn't been used since 60s
- * - if we don't have a stat-cache entry for a directory, release it from the
- * monitor
- */
-
-#ifdef DEBUG_STAT_CACHE
-typedef struct {
- int *ptr;
-
- size_t used;
- size_t size;
-} fake_keys;
-
-static fake_keys ctrl;
-#endif
-
-int
-mod_glusterfs_readv_async_cbk (int op_ret, int op_errno,
- glusterfs_iobuf_t *buf,
- void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
- pthread_mutex_lock (&local->lock);
- {
- local->async_read_complete = 1;
- local->buf = buf;
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- pthread_cond_signal (&local->cond);
- }
- pthread_mutex_unlock (&local->lock);
-
- return 0;
-}
-
-static int
-mod_glusterfs_read_async (server *srv, connection *con, chunk *glusterfs_chunk)
-{
- glusterfs_async_local_t local;
- off_t end = 0;
- int nbytes;
- int complete;
- chunkqueue *cq = NULL;
- chunk *c = NULL;
- off_t offset = glusterfs_chunk->file.start;
- size_t length = glusterfs_chunk->file.length;
- glusterfs_file_t fd = glusterfs_chunk->file.name;
-
- pthread_cond_init (&local.cond, NULL);
- pthread_mutex_init (&local.lock, NULL);
-
- //local.fd = fd;
- memset (&local, 0, sizeof (local));
-
- if (length > 0)
- end = offset + length;
-
- cq = chunkqueue_init ();
- if (!cq) {
- con->http_status = 500;
- return HANDLER_FINISHED;
- }
-
- do {
- glusterfs_iobuf_t *buf;
- int i;
- if (length > 0) {
- nbytes = end - offset;
- if (nbytes > GLUSTERFS_CHUNK_SIZE)
- nbytes = GLUSTERFS_CHUNK_SIZE;
- } else
- nbytes = GLUSTERFS_CHUNK_SIZE;
-
- glusterfs_read_async(fd,
- nbytes,
- offset,
- mod_glusterfs_readv_async_cbk,
- (void *)&local);
-
- pthread_mutex_lock (&local.lock);
- {
- while (!local.async_read_complete) {
- pthread_cond_wait (&local.cond, &local.lock);
- }
-
- local.async_read_complete = 0;
- buf = local.buf;
-
- if ((int)length < 0)
- complete = (local.op_ret <= 0);
- else {
- local.read_bytes += local.op_ret;
- complete = ((local.read_bytes == length)
- || (local.op_ret <= 0));
- }
- }
- pthread_mutex_unlock (&local.lock);
-
- if (local.op_ret > 0) {
- unsigned long check = 0;
- for (i = 0; i < buf->count; i++) {
- buffer *nw_write_buf = buffer_init ();
-
- check += buf->vector[i].iov_len;
-
- nw_write_buf->used = buf->vector[i].iov_len + 1;
- nw_write_buf->size = buf->vector[i].iov_len;
- nw_write_buf->ptr = buf->vector[i].iov_base;
-
- offset += local.op_ret;
- chunkqueue_append_buffer_weak(cq, nw_write_buf);
- }
-
- network_backend_write (srv, con, con->fd, cq);
-
- if (chunkqueue_written (cq) != local.op_ret) {
- mod_glusterfs_chunkqueue *gf_cq;
- glusterfs_chunk->file.start = offset;
- if ((int)glusterfs_chunk->file.length > 0)
- glusterfs_chunk->file.length -= local.read_bytes;
-
- gf_cq = calloc (1, sizeof (*gf_cq));
- /* ERR_ABORT (gf_cq); */
- gf_cq->cq = cq;
- gf_cq->buf = buf;
- gf_cq->length = local.op_ret;
- glusterfs_chunk->file.mmap.start =(char *)gf_cq;
- return local.read_bytes;
- }
-
- for (c = cq->first ; c; c = c->next)
- c->mem->ptr = NULL;
-
- chunkqueue_reset (cq);
- }
-
- glusterfs_free (buf);
- } while (!complete);
-
- chunkqueue_free (cq);
- glusterfs_close (fd);
-
- if (local.op_ret < 0)
- con->http_status = 500;
-
- return (local.op_ret < 0 ? HANDLER_FINISHED : HANDLER_GO_ON);
-}
-
-int mod_glusterfs_network_backend_write(struct server *srv, connection *con,
- int fd, chunkqueue *cq)
-{
- chunk *c, *prev, *first;
- int chunks_written = 0;
- int error = 0;
-
- for (first = prev = c = cq->first; c; c = c->next, chunks_written++) {
-
- if (c->type == MEM_CHUNK && c->mem->used && !c->mem->ptr) {
- if (cq->first != c) {
- prev->next = NULL;
-
- /* call stored network_backend_write */
- network_backend_write (srv, con, fd, cq);
-
- prev->next = c;
- }
- cq->first = c->next;
-
- if (c->file.fd < 0) {
- error = HANDLER_ERROR;
- break;
- }
-
- if (c->file.mmap.start) {
- chunk *tmp;
- mod_glusterfs_chunkqueue *gf_cq = NULL;
-
- gf_cq = (mod_glusterfs_chunkqueue *)c->file.mmap.start;
-
- network_backend_write (srv, con, fd, gf_cq->cq);
-
- if ((size_t)chunkqueue_written (gf_cq->cq)
- != gf_cq->length) {
- cq->first = first;
- return chunks_written;
- }
- for (tmp = gf_cq->cq->first ; tmp;
- tmp = tmp->next)
- tmp->mem->ptr = NULL;
-
- chunkqueue_free (gf_cq->cq);
- glusterfs_free (gf_cq->buf);
- free (gf_cq);
- c->file.mmap.start = NULL;
- }
-
- mod_glusterfs_read_async (srv, con, c);
- if (c->file.mmap.start) {
- /* pending chunkqueue from
- mod_glusterfs_read_async to be written to
- network */
- cq->first = first;
- return chunks_written;
- }
-
- buffer_free (c->mem);
- c->mem = NULL;
-
- c->type = FILE_CHUNK;
- c->offset = c->file.length = 0;
- c->file.name = NULL;
-
- if (first == c)
- first = c->next;
-
- if (cq->last == c)
- cq->last = NULL;
-
- prev->next = c->next;
-
- free(c);
- }
- prev = c;
- }
-
- network_backend_write (srv, con, fd, cq);
-
- cq->first = first;
-
- return chunks_written;
-}
-
-int chunkqueue_append_glusterfs_file (connection *con, glusterfs_file_t fd,
- off_t offset, size_t len, size_t buf_size)
-{
- chunk *c = NULL;
- c = chunkqueue_get_append_tempfile (con->write_queue);
-
- if (c->file.is_temp) {
- close (c->file.fd);
- unlink (c->file.name->ptr);
- }
-
- c->type = MEM_CHUNK;
-
- buffer_free (c->mem);
-
- c->mem = buffer_init ();
- c->mem->used = len + 1;
- c->mem->size = buf_size;
- c->mem->ptr = NULL;
- c->offset = 0;
-
- buffer_free (c->file.name);
-
- /* fd returned by libglusterfsclient is a pointer */
- c->file.name = (buffer *)fd;
- c->file.start = offset;
- c->file.length = len;
-
- //c->file.fd = fd;
- c->file.mmap.start = NULL;
- return 0;
-}
-
-/* init the plugin data */
-INIT_FUNC(mod_glusterfs_init) {
- plugin_data *p;
-
- p = calloc(1, sizeof(*p));
- network_backend_write = NULL;
-
- return p;
-}
-
-/* detroy the plugin data */
-FREE_FUNC(mod_glusterfs_free) {
- plugin_data *p = p_d;
-
- UNUSED (srv);
-
- if (!p) return HANDLER_GO_ON;
-
- if (p->config_storage) {
- size_t i;
- for (i = 0; i < srv->config_context->used; i++) {
- plugin_config *s = p->config_storage[i];
-
- buffer_free (s->logfile);
- buffer_free (s->loglevel);
- buffer_free (s->specfile);
- buffer_free (s->prefix);
- buffer_free (s->xattr_file_size);
- buffer_free (s->document_root);
- array_free (s->exclude_exts);
-
- free (s);
- }
- free (p->config_storage);
- }
- buffer_free (p->range_buf);
-
- free (p);
-
- return HANDLER_GO_ON;
-}
-
-SETDEFAULTS_FUNC(mod_glusterfs_set_defaults) {
- plugin_data *p = p_d;
- size_t i = 0;
-
- config_values_t cv[] = {
- { "glusterfs.logfile", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.loglevel", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.volume-specfile", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.cache-timeout", NULL, T_CONFIG_SHORT,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.exclude-extensions", NULL, T_CONFIG_ARRAY,
- T_CONFIG_SCOPE_CONNECTION },
-
- /*TODO: get the prefix from config_conext and
- remove glusterfs.prefix from conf file */
- { "glusterfs.prefix", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.xattr-interface-size-limit", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.document-root", NULL, T_CONFIG_STRING,
- T_CONFIG_SCOPE_CONNECTION },
-
- { NULL, NULL, T_CONFIG_UNSET,
- T_CONFIG_SCOPE_UNSET }
- };
-
- p->config_storage = calloc(1,
- srv->config_context->used
- * sizeof(specific_config *));
- /* ERR_ABORT (p->config_storage);*/
- p->range_buf = buffer_init ();
-
- for (i = 0; i < srv->config_context->used; i++) {
- plugin_config *s;
-
- s = calloc(1, sizeof(plugin_config));
- /* ERR_ABORT (s); */
- s->logfile = buffer_init ();
- s->loglevel = buffer_init ();
- s->specfile = buffer_init ();
- s->document_root = buffer_init ();
- s->exclude_exts = array_init ();
- s->prefix = buffer_init ();
- s->xattr_file_size = buffer_init ();
-
- cv[0].destination = s->logfile;
- cv[1].destination = s->loglevel;
- cv[2].destination = s->specfile;
- cv[3].destination = &s->cache_timeout;
- cv[4].destination = s->exclude_exts;
- cv[5].destination = s->prefix;
- cv[6].destination = s->xattr_file_size;
- cv[7].destination = s->document_root;
- p->config_storage[i] = s;
-
- if (0 != config_insert_values_global(srv,
- ((data_config *)srv->config_context->data[i])->value,
- cv)) {
- return HANDLER_FINISHED;
- }
- }
-
- return HANDLER_GO_ON;
-}
-
-#define PATCH(x) \
- p->conf.x = s->x;
-
-static int mod_glusterfs_patch_connection(server *srv, connection *con,
- plugin_data *p) {
- size_t i, j;
- plugin_config *s;
-
- /* skip the first, the global context */
- /*
- glusterfs related config can only occur inside
- $HTTP["url"] == "<glusterfs-prefix>"
- */
- p->conf.logfile = NULL;
- p->conf.loglevel = NULL;
- p->conf.specfile = NULL;
- p->conf.cache_timeout = 0;
- p->conf.exclude_exts = NULL;
- p->conf.prefix = NULL;
- p->conf.xattr_file_size = NULL;
- p->conf.document_root = NULL;
-
- for (i = 1; i < srv->config_context->used; i++) {
- data_config *dc = (data_config *)srv->config_context->data[i];
- s = p->config_storage[i];
-
- /* condition didn't match */
- if (!config_check_cond(srv, con, dc)) continue;
-
- /* merge config */
- for (j = 0; j < dc->value->used; j++) {
- data_unset *du = dc->value->data[j];
-
- if (buffer_is_equal_string (du->key,
- CONST_STR_LEN("glusterfs.logfile"))) {
- PATCH (logfile);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN("glusterfs.loglevel"))) {
- PATCH (loglevel);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.volume-specfile"))) {
- PATCH (specfile);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN("glusterfs.cache-timeout"))) {
- PATCH (cache_timeout);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.exclude-extensions"))) {
- PATCH (exclude_exts);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.prefix"))) {
- PATCH (prefix);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.xattr-interface-size-limit"))) {
- PATCH (xattr_file_size);
- } else if (buffer_is_equal_string (du->key,
- CONST_STR_LEN ("glusterfs.document-root"))) {
- PATCH (document_root);
- }
- }
- }
- return 0;
-}
-
-#undef PATCH
-
-static int http_response_parse_range(server *srv, connection *con,
- plugin_data *p) {
- int multipart = 0;
- int error;
- off_t start, end;
- const char *s, *minus;
- char *boundary = "fkj49sn38dcn3";
- data_string *ds;
- stat_cache_entry *sce = NULL;
- buffer *content_type = NULL;
- size_t size = 0;
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr) {
- size = atoi (p->conf.xattr_file_size->ptr);
- }
-
- if (HANDLER_ERROR == stat_cache_get_entry(srv, con, con->physical.path,
- &sce)) {
- SEGFAULT();
- }
-
- start = 0;
- end = sce->st.st_size - 1;
-
- con->response.content_length = 0;
-
- if (NULL != (ds = (data_string *)array_get_element(con->response.headers,
- "Content-Type"))) {
- content_type = ds->value;
- }
-
- for (s = con->request.http_range, error = 0;
- !error && *s && NULL != (minus = strchr(s, '-')); ) {
- char *err;
- off_t la, le;
-
- if (s == minus) {
- /* -<stop> */
-
- le = strtoll(s, &err, 10);
-
- if (le == 0) {
- /* RFC 2616 - 14.35.1 */
-
- con->http_status = 416;
- error = 1;
- } else if (*err == '\0') {
- /* end */
- s = err;
-
- end = sce->st.st_size - 1;
- start = sce->st.st_size + le;
- } else if (*err == ',') {
- multipart = 1;
- s = err + 1;
-
- end = sce->st.st_size - 1;
- start = sce->st.st_size + le;
- } else {
- error = 1;
- }
-
- } else if (*(minus+1) == '\0' || *(minus+1) == ',') {
- /* <start>- */
-
- la = strtoll(s, &err, 10);
-
- if (err == minus) {
- /* ok */
-
- if (*(err + 1) == '\0') {
- s = err + 1;
-
- end = sce->st.st_size - 1;
- start = la;
-
- } else if (*(err + 1) == ',') {
- multipart = 1;
- s = err + 2;
-
- end = sce->st.st_size - 1;
- start = la;
- } else {
- error = 1;
- }
- } else {
- /* error */
- error = 1;
- }
- } else {
- /* <start>-<stop> */
-
- la = strtoll(s, &err, 10);
-
- if (err == minus) {
- le = strtoll(minus+1, &err, 10);
-
- /* RFC 2616 - 14.35.1 */
- if (la > le) {
- error = 1;
- }
-
- if (*err == '\0') {
- /* ok, end*/
- s = err;
-
- end = le;
- start = la;
- } else if (*err == ',') {
- multipart = 1;
- s = err + 1;
-
- end = le;
- start = la;
- } else {
- /* error */
-
- error = 1;
- }
- } else {
- /* error */
-
- error = 1;
- }
- }
-
- if (!error) {
- if (start < 0) start = 0;
-
- /* RFC 2616 - 14.35.1 */
- if (end > sce->st.st_size - 1) end = sce->st.st_size - 1;
-
- if (start > sce->st.st_size - 1) {
- error = 1;
-
- con->http_status = 416;
- }
- }
-
- if (!error) {
- if (multipart) {
- /* write boundary-header */
- buffer *b;
-
- b = chunkqueue_get_append_buffer(con->write_queue);
-
- buffer_copy_string(b, "\r\n--");
- buffer_append_string(b, boundary);
-
- /* write Content-Range */
- buffer_append_string(b, "\r\nContent-Range: "
- "bytes ");
- buffer_append_off_t(b, start);
- buffer_append_string(b, "-");
- buffer_append_off_t(b, end);
- buffer_append_string(b, "/");
- buffer_append_off_t(b, sce->st.st_size);
-
- buffer_append_string(b, "\r\nContent-Type: ");
- buffer_append_string_buffer(b, content_type);
-
- /* write END-OF-HEADER */
- buffer_append_string(b, "\r\n\r\n");
-
- con->response.content_length += b->used - 1;
-
- }
-
- if ((size_t)sce->st.st_size >= size) {
- chunkqueue_append_glusterfs_file(con, ctx->fd,
- start,
- end - start,
- size);
- } else {
- if (!start) {
- buffer *mem = buffer_init ();
- mem->ptr = ctx->buf;
- mem->used = mem->size = sce->st.st_size;
- http_chunk_append_buffer (srv, con, mem);
- ctx->buf = NULL;
- } else {
- chunkqueue_append_mem (con->write_queue,
- ((char *)ctx->buf)
- + start,
- end - start + 1);
- }
- }
-
- con->response.content_length += end - start + 1;
- }
- }
-
- if (ctx->buf) {
- free (ctx->buf);
- ctx->buf = NULL;
- }
-
- /* something went wrong */
- if (error) return -1;
-
- if (multipart) {
- /* add boundary end */
- buffer *b;
-
- b = chunkqueue_get_append_buffer(con->write_queue);
-
- buffer_copy_string_len(b, "\r\n--", 4);
- buffer_append_string(b, boundary);
- buffer_append_string_len(b, "--\r\n", 4);
-
- con->response.content_length += b->used - 1;
-
- /* set header-fields */
-
- buffer_copy_string(p->range_buf, "multipart/byteranges; boundary=");
- buffer_append_string(p->range_buf, boundary);
-
- /* overwrite content-type */
- response_header_overwrite(srv, con, CONST_STR_LEN("Content-Type"),
- CONST_BUF_LEN(p->range_buf));
- } else {
- /* add Content-Range-header */
-
- buffer_copy_string(p->range_buf, "bytes ");
- buffer_append_off_t(p->range_buf, start);
- buffer_append_string(p->range_buf, "-");
- buffer_append_off_t(p->range_buf, end);
- buffer_append_string(p->range_buf, "/");
- buffer_append_off_t(p->range_buf, sce->st.st_size);
-
- response_header_insert(srv, con, CONST_STR_LEN("Content-Range"),
- CONST_BUF_LEN(p->range_buf));
- }
-
- /* ok, the file is set-up */
- return 0;
-}
-
-PHYSICALPATH_FUNC(mod_glusterfs_handle_physical) {
- plugin_data *p = p_d;
- stat_cache_entry *sce;
- mod_glusterfs_ctx_t *plugin_ctx = NULL;
- size_t size = 0;
- int ret = 0;
-
- if (con->http_status != 0) return HANDLER_GO_ON;
- if (con->uri.path->used == 0) return HANDLER_GO_ON;
- if (con->physical.path->used == 0) return HANDLER_GO_ON;
-
- if (con->mode != DIRECT) return HANDLER_GO_ON;
-
- /*
- network_backend_write = srv->network_backend_write;
- srv->network_backend_write = mod_glusterfs_network_backend_write;
- */
-
- switch (con->request.http_method) {
- case HTTP_METHOD_GET:
- case HTTP_METHOD_POST:
- case HTTP_METHOD_HEAD:
- break;
-
- default:
- return HANDLER_GO_ON;
- }
-
- mod_glusterfs_patch_connection(srv, con, p);
- if (!p->conf.prefix || p->conf.prefix->used == 0) {
- return HANDLER_GO_ON;
- }
-
- if (!p->conf.document_root || p->conf.document_root->used == 0) {
- log_error_write(srv, __FILE__, __LINE__, "s",
- "glusterfs.document-root is not specified");
- con->http_status = 500;
- return HANDLER_FINISHED;
- }
-
- if (!p->conf.mounted) {
- glusterfs_init_params_t ctx;
-
- if (!p->conf.specfile || p->conf.specfile->used == 0) {
- return HANDLER_GO_ON;
- }
- memset (&ctx, 0, sizeof (ctx));
-
- ctx.specfile = p->conf.specfile->ptr;
- ctx.logfile = p->conf.logfile->ptr;
- ctx.loglevel = p->conf.loglevel->ptr;
- ctx.lookup_timeout = ctx.stat_timeout = p->conf.cache_timeout;
-
- ret = glusterfs_mount (p->conf.prefix->ptr, &ctx);
- if (ret != 0) {
- con->http_status = 500;
- log_error_write(srv, __FILE__, __LINE__, "sbs",
- "glusterfs initialization failed, "
- "please check your configuration. "
- "Glusterfs logfile ",
- p->conf.logfile,
- "might contain details");
- return HANDLER_FINISHED;
- }
- p->conf.mounted = 1;
- }
-
- size = 0;
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if (!con->plugin_ctx[p->id]) {
-/* FIXME: what if multiple files are requested from a single connection? */
-/* TODO: check whether this works fine for HTTP protocol 1.1 */
-
- buffer *tmp_buf = buffer_init_buffer (con->physical.basedir);
-
- plugin_ctx = calloc (1, sizeof (*plugin_ctx));
- /* ERR_ABORT (plugin_ctx); */
- con->plugin_ctx[p->id] = plugin_ctx;
-
- buffer_append_string_buffer (tmp_buf, p->conf.prefix);
- buffer_path_simplify (tmp_buf, tmp_buf);
-
- plugin_ctx->prefix = tmp_buf->used - 1;
- if (tmp_buf->ptr[plugin_ctx->prefix - 1] == '/')
- plugin_ctx->prefix--;
-
- buffer_free (tmp_buf);
- } else
- /*FIXME: error!! error!! */
- plugin_ctx = con->plugin_ctx[p->id];
-
-
- if (size)
- {
- plugin_ctx->buf = malloc (size);
- /* ERR_ABORT (plugin_ctx->buf); */
- }
-
- plugin_ctx->glusterfs_path = buffer_init ();
- buffer_copy_string_buffer (plugin_ctx->glusterfs_path,
- p->conf.prefix);
- buffer_append_string (plugin_ctx->glusterfs_path,
- p->conf.document_root->ptr);
- buffer_append_string (plugin_ctx->glusterfs_path, "/");
- buffer_append_string (plugin_ctx->glusterfs_path,
- con->physical.path->ptr + plugin_ctx->prefix);
- buffer_path_simplify (plugin_ctx->glusterfs_path,
- plugin_ctx->glusterfs_path);
-
- if (glusterfs_stat_cache_get_entry (srv, con,
- plugin_ctx->glusterfs_path,
- con->physical.path, plugin_ctx->buf,
- size, &sce) == HANDLER_ERROR) {
- if (errno == ENOENT)
- con->http_status = 404;
- else
- con->http_status = 403;
-
- free (plugin_ctx->buf);
- buffer_free (plugin_ctx->glusterfs_path);
- plugin_ctx->glusterfs_path = NULL;
- plugin_ctx->buf = NULL;
-
- free (plugin_ctx);
- con->plugin_ctx[p->id] = NULL;
-
- return HANDLER_FINISHED;
- }
-
- if (!(S_ISREG (sce->st.st_mode) && (size_t)sce->st.st_size < size)) {
- free (plugin_ctx->buf);
- plugin_ctx->buf = NULL;
- }
-
- return HANDLER_GO_ON;
-}
-
-static int http_chunk_append_len(server *srv, connection *con, size_t len) {
- size_t i, olen = len, j;
- buffer *b;
-
- b = srv->tmp_chunk_len;
-
- if (len == 0) {
- buffer_copy_string(b, "0");
- } else {
- for (i = 0; i < 8 && len; i++) {
- len >>= 4;
- }
-
- /* i is the number of hex digits we have */
- buffer_prepare_copy(b, i + 1);
-
- for (j = i-1, len = olen; j+1 > 0; j--) {
- b->ptr[j] = (len & 0xf) + (((len & 0xf) <= 9) ?
- '0' : 'a' - 10);
- len >>= 4;
- }
- b->used = i;
- b->ptr[b->used++] = '\0';
- }
-
- buffer_append_string(b, "\r\n");
- chunkqueue_append_buffer(con->write_queue, b);
-
- return 0;
-}
-
-int http_chunk_append_glusterfs_file_chunk(server *srv, connection *con,
- glusterfs_file_t fd, off_t offset,
- off_t len, size_t buf_size) {
- chunkqueue *cq;
-
- if (!con) return -1;
-
- cq = con->write_queue;
-
- if (con->response.transfer_encoding & HTTP_TRANSFER_ENCODING_CHUNKED) {
- http_chunk_append_len(srv, con, len);
- }
-
- chunkqueue_append_glusterfs_file (con, fd, offset, len, buf_size);
-
- if ((con->response.transfer_encoding & HTTP_TRANSFER_ENCODING_CHUNKED)
- && (len > 0)) {
- chunkqueue_append_mem(cq, "\r\n", 2 + 1);
- }
-
- return 0;
-}
-
-int http_chunk_append_glusterfs_mem(server *srv, connection *con,
- char * mem, size_t len,
- size_t buf_size)
-{
- chunkqueue *cq = NULL;
- buffer *buf = NULL;
-
- if (!con) return -1;
-
- cq = con->write_queue;
-
- if (len == 0) {
- free (mem);
- if (con->response.transfer_encoding
- & HTTP_TRANSFER_ENCODING_CHUNKED) {
- chunkqueue_append_mem(cq, "0\r\n\r\n", 5 + 1);
- } else {
- chunkqueue_append_mem(cq, "", 1);
- }
- return 0;
- }
-
- if (con->response.transfer_encoding & HTTP_TRANSFER_ENCODING_CHUNKED) {
- http_chunk_append_len(srv, con, len - 1);
- }
-
- buf = buffer_init ();
-
- buf->used = len + 1;
- buf->size = buf_size;
- buf->ptr = (char *)mem;
- chunkqueue_append_buffer_weak (cq, buf);
-
- if (con->response.transfer_encoding & HTTP_TRANSFER_ENCODING_CHUNKED) {
- chunkqueue_append_mem(cq, "\r\n", 2 + 1);
- }
-
- return 0;
-}
-
-
-
-URIHANDLER_FUNC(mod_glusterfs_subrequest) {
- plugin_data *p = p_d;
- stat_cache_entry *sce = NULL;
- int s_len;
- char allow_caching = 1;
- size_t size = 0;
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
-
- /* someone else has done a decision for us */
- if (con->http_status != 0) return HANDLER_GO_ON;
- if (con->uri.path->used == 0) return HANDLER_GO_ON;
- if (con->physical.path->used == 0) return HANDLER_GO_ON;
-
- /* someone else has handled this request */
- if (con->mode != DIRECT) return HANDLER_GO_ON;
-
- /* we only handle GET, POST and HEAD */
- switch(con->request.http_method) {
- case HTTP_METHOD_GET:
- case HTTP_METHOD_POST:
- case HTTP_METHOD_HEAD:
- break;
- default:
- return HANDLER_GO_ON;
- }
-
- mod_glusterfs_patch_connection(srv, con, p);
-
- if (!p->conf.prefix || !p->conf.prefix->used)
- return HANDLER_GO_ON;
-
- s_len = con->uri.path->used - 1;
- /* ignore certain extensions */
- /*
- for (k = 0; k < p->conf.exclude_exts->used; k++) {
- data_string *ds;
- ds = (data_string *)p->conf.exclude_exts->data[k];
-
- if (ds->value->used == 0) continue;
-
- if (!strncmp (ds->value->ptr, con->uri.path->ptr,
- strlen (ds->value->ptr)))
- break;
- }
-
- if (k == p->conf.exclude_exts->used) {
- return HANDLER_GO_ON;
- }
- */
-
- if (con->conf.log_request_handling) {
- log_error_write(srv, __FILE__, __LINE__, "s",
- "-- serving file from glusterfs");
- }
-
- if (HANDLER_ERROR == stat_cache_get_entry(srv, con, con->physical.path,
- &sce)) {
- con->http_status = 403;
-
- log_error_write(srv, __FILE__, __LINE__, "sbsb",
- "not a regular file:", con->uri.path,
- "->", con->physical.path);
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
-
- return HANDLER_FINISHED;
- }
-
- if (con->uri.path->ptr[s_len] == '/' || !S_ISREG(sce->st.st_mode)) {
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if ((size_t)sce->st.st_size > size) {
- ctx->fd = glusterfs_open (ctx->glusterfs_path->ptr, O_RDONLY,
- 0);
-
- if (((long)ctx->fd) == 0) {
- con->http_status = 403;
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
- }
-
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
-
- /* we only handline regular files */
-#ifdef HAVE_LSTAT
- if ((sce->is_symlink == 1) && !con->conf.follow_symlink) {
- con->http_status = 403;
-
- if (con->conf.log_request_handling) {
- log_error_write(srv, __FILE__, __LINE__, "s",
- "-- access denied due symlink "
- "restriction");
- log_error_write(srv, __FILE__, __LINE__, "sb",
- "Path :", con->physical.path);
- }
-
- buffer_reset(con->physical.path);
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
-#endif
- if (!S_ISREG(sce->st.st_mode)) {
- con->http_status = 404;
-
- if (con->conf.log_file_not_found) {
- log_error_write(srv, __FILE__, __LINE__, "sbsb",
- "not a regular file:", con->uri.path,
- "->", sce->name);
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
-
- return HANDLER_FINISHED;
- }
-
- /* mod_compress might set several data directly, don't overwrite them */
-
- /* set response content-type, if not set already */
-
- if (NULL == array_get_element(con->response.headers, "Content-Type")) {
- if (buffer_is_empty(sce->content_type)) {
- /* we are setting application/octet-stream, but also "
- * announce that this header field might change in "
- * the seconds few requests. This should fix the
- * aggressive caching of FF and the script download
- * seen by the first installations
- */
- response_header_overwrite(srv, con,
- CONST_STR_LEN("Content-Type"),
- CONST_STR_LEN("application/"
- "octet-stream"));
-
- allow_caching = 0;
- } else {
- response_header_overwrite(srv, con,
- CONST_STR_LEN("Content-Type"),
- CONST_BUF_LEN(sce->content_type));
- }
- }
-
- if (con->conf.range_requests) {
- response_header_overwrite(srv, con,
- CONST_STR_LEN("Accept-Ranges"),
- CONST_STR_LEN("bytes"));
- }
-
- /* TODO: Allow Cachable requests */
-#if 0
- if (allow_caching) {
- if (p->conf.etags_used && con->etag_flags != 0
- && !buffer_is_empty(sce->etag)) {
- if (NULL == array_get_element(con->response.headers,
- "ETag")) {
- /* generate e-tag */
- etag_mutate(con->physical.etag, sce->etag);
-
- response_header_overwrite(srv, con,
- CONST_STR_LEN("ETag"),
- CONST_BUF_LEN(con->physical.etag));
- }
- }
-
- /* prepare header */
- if (NULL == (ds = (data_string *)array_get_element(con->response.headers,
- "Last-Modified"))) {
- mtime = strftime_cache_get(srv, sce->st.st_mtime);
- response_header_overwrite(srv, con, CONST_STR_LEN("Last-Modified"),
- CONST_BUF_LEN(mtime));
- } else {
- mtime = ds->value;
- }
-
- if (HANDLER_FINISHED == http_response_handle_cachable(srv, con,
- mtime)) {
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
- }
-#endif
-
- /*TODO: Read about etags */
- if (con->request.http_range && con->conf.range_requests) {
- int do_range_request = 1;
- data_string *ds = NULL;
- buffer *mtime = NULL;
- /* check if we have a conditional GET */
-
- /* prepare header */
- if (NULL == (ds = (data_string *)array_get_element(con->response.headers,
- "Last-Modified"))) {
- mtime = strftime_cache_get(srv, sce->st.st_mtime);
- response_header_overwrite(srv, con, CONST_STR_LEN("Last-Modified"),
- CONST_BUF_LEN(mtime));
- } else {
- mtime = ds->value;
- }
-
- if (NULL != (ds = (data_string *)array_get_element(con->request.headers,
- "If-Range"))) {
- /* if the value is the same as our ETag, we do a Range-request,
- * otherwise a full 200 */
-
- if (ds->value->ptr[0] == '"') {
- /**
- * client wants a ETag
- */
- if (!con->physical.etag) {
- do_range_request = 0;
- } else if (!buffer_is_equal(ds->value,
- con->physical.etag)) {
- do_range_request = 0;
- }
- } else if (!mtime) {
- /**
- * we don't have a Last-Modified and can match
- * the If-Range:
- *
- * sending all
- */
- do_range_request = 0;
- } else if (!buffer_is_equal(ds->value, mtime)) {
- do_range_request = 0;
- }
- }
-
- if (do_range_request) {
- /* content prepared, I'm done */
- con->file_finished = 1;
-
- if (0 == http_response_parse_range(srv, con, p)) {
- con->http_status = 206;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- return HANDLER_FINISHED;
- }
- }
-
- /* if we are still here, prepare body */
-
- /* we add it here for all requests
- * the HEAD request will drop it afterwards again
- */
- /*TODO check whether 1 should be subtracted */
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if (size <= (size_t)sce->st.st_size) {
- http_chunk_append_glusterfs_file_chunk (srv, con, ctx->fd, 0,
- sce->st.st_size, size);
- } else {
- http_chunk_append_glusterfs_mem (srv, con, ctx->buf,
- sce->st.st_size, size);
- }
-
- con->http_status = 200;
- con->file_finished = 1;
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
-
- return HANDLER_FINISHED;
-}
-
-#if 0
-URIHANDLER_FUNC(mod_glusterfs_request_done)
-{
- mod_glusterfs_iobuf_t *cur = first, *prev;
- while (cur) {
- prev = cur;
- glusterfs_free (cur->buf);
- cur = cur->next;
- free (prev);
- }
- first = NULL
- }
-#endif
-
-/* this function is called at dlopen() time and inits the callbacks */
-CONNECTION_FUNC(mod_glusterfs_connection_reset)
-{
- (void) p_d;
- (void) con;
- if (!network_backend_write)
- network_backend_write = srv->network_backend_write;
-
- srv->network_backend_write = mod_glusterfs_network_backend_write;
-
- return HANDLER_GO_ON;
-}
-
-int mod_glusterfs_plugin_init(plugin *p) {
- p->version = LIGHTTPD_VERSION_ID;
- p->name = buffer_init_string("glusterfs");
- p->init = mod_glusterfs_init;
- p->handle_physical = mod_glusterfs_handle_physical;
- p->handle_subrequest_start = mod_glusterfs_subrequest;
- // p->handle_request_done = mod_glusterfs_request_done;
- p->set_defaults = mod_glusterfs_set_defaults;
- p->connection_reset = mod_glusterfs_connection_reset;
- p->cleanup = mod_glusterfs_free;
-
- p->data = NULL;
-
- return 0;
-}
-
-
-/* mod_glusterfs_stat_cache */
-static stat_cache_entry * stat_cache_entry_init(void) {
- stat_cache_entry *sce = NULL;
-
- sce = calloc(1, sizeof(*sce));
- /* ERR_ABORT (sce); */
-
- sce->name = buffer_init();
- sce->etag = buffer_init();
- sce->content_type = buffer_init();
-
- return sce;
-}
-
-#ifdef HAVE_FAM_H
-static fam_dir_entry * fam_dir_entry_init(void) {
- fam_dir_entry *fam_dir = NULL;
-
- fam_dir = calloc(1, sizeof(*fam_dir));
- /* ERR_ABORT (fam_dir); */
-
- fam_dir->name = buffer_init();
-
- return fam_dir;
-}
-
-static void fam_dir_entry_free(void *data) {
- fam_dir_entry *fam_dir = data;
-
- if (!fam_dir) return;
-
- FAMCancelMonitor(fam_dir->fc, fam_dir->req);
-
- buffer_free(fam_dir->name);
- free(fam_dir->req);
-
- free(fam_dir);
-}
-#endif
-
-#ifdef HAVE_XATTR
-static int stat_cache_attr_get(buffer *buf, char *name) {
- int attrlen;
- int ret;
-
- attrlen = 1024;
- buffer_prepare_copy(buf, attrlen);
- attrlen--;
- if(0 == (ret = attr_get(name, "Content-Type", buf->ptr, &attrlen, 0))) {
- buf->used = attrlen + 1;
- buf->ptr[attrlen] = '\0';
- }
- return ret;
-}
-#endif
-
-/* the famous DJB hash function for strings */
-static uint32_t hashme(buffer *str) {
- uint32_t hash = 5381;
- const char *s;
- for (s = str->ptr; *s; s++) {
- hash = ((hash << 5) + hash) + *s;
- }
-
- hash &= ~(1 << 31); /* strip the highest bit */
-
- return hash;
-}
-
-
-#ifdef HAVE_LSTAT
-static int stat_cache_lstat(server *srv, buffer *dname, struct stat *lst) {
- if (lstat(dname->ptr, lst) == 0) {
- return S_ISLNK(lst->st_mode) ? 0 : 1;
- }
- else {
- log_error_write(srv, __FILE__, __LINE__, "sbs",
- "lstat failed for:",
- dname, strerror(errno));
- };
- return -1;
-}
-#endif
-
-/***
- *
- *
- *
- * returns:
- * - HANDLER_FINISHED on cache-miss (don't forget to reopen the file)
- * - HANDLER_ERROR on stat() failed -> see errno for problem
- */
-
-handler_t glusterfs_stat_cache_get_entry(server *srv,
- connection *con,
- buffer *glusterfs_path,
- buffer *name,
- void *buf,
- size_t size,
- stat_cache_entry **ret_sce)
-{
-#ifdef HAVE_FAM_H
- fam_dir_entry *fam_dir = NULL;
- int dir_ndx = -1;
- splay_tree *dir_node = NULL;
-#endif
- stat_cache_entry *sce = NULL;
- stat_cache *sc;
- struct stat st;
- size_t k;
-#ifdef DEBUG_STAT_CACHE
- size_t i;
-#endif
- int file_ndx;
- splay_tree *file_node = NULL;
-
- *ret_sce = NULL;
- memset (&st, 0, sizeof (st));
-
- /*
- * check if the directory for this file has changed
- */
-
- sc = srv->stat_cache;
-
- buffer_copy_string_buffer(sc->hash_key, name);
- buffer_append_long(sc->hash_key, con->conf.follow_symlink);
-
- file_ndx = hashme(sc->hash_key);
- sc->files = splaytree_splay(sc->files, file_ndx);
-
-#ifdef DEBUG_STAT_CACHE
- for (i = 0; i < ctrl.used; i++) {
- if (ctrl.ptr[i] == file_ndx) break;
- }
-#endif
-
- if (sc->files && (sc->files->key == file_ndx)) {
-#ifdef DEBUG_STAT_CACHE
- /* it was in the cache */
- assert(i < ctrl.used);
-#endif
-
- /* we have seen this file already and
- * don't stat() it again in the same second */
-
- file_node = sc->files;
-
- sce = file_node->data;
-
- /* check if the name is the same, we might have a collision */
-
- if (buffer_is_equal(name, sce->name)) {
- if (srv->srvconf.stat_cache_engine
- == STAT_CACHE_ENGINE_SIMPLE) {
- if (sce->stat_ts == srv->cur_ts && !buf) {
- *ret_sce = sce;
- return HANDLER_GO_ON;
- }
- }
- } else {
- /* oops, a collision,
- *
- * file_node is used by the FAM check below to see if
- * we know this file and if we can save a stat().
- *
- * BUT, the sce is not reset here as the entry into
- * the cache is ok, we it is just not pointing to
- * our requested file.
- */
-
- file_node = NULL;
- }
- } else {
-#ifdef DEBUG_STAT_CACHE
- if (i != ctrl.used) {
- fprintf(stderr, "%s.%d: %08x was already inserted "
- "but not found in cache, %s\n",
- __FILE__, __LINE__, file_ndx, name->ptr);
- }
- assert(i == ctrl.used);
-#endif
- }
- /*
- * *lol*
- * - open() + fstat() on a named-pipe results in a (intended) hang.
- * - stat() if regular file + open() to see if we can read from it is
- * better
- *
- * */
- if (-1 == glusterfs_get (glusterfs_path->ptr, buf, size, &st)) {
- return HANDLER_ERROR;
- }
-
- if (NULL == sce) {
- int osize = 0;
-
- if (sc->files) {
- osize = sc->files->size;
- }
-
- sce = stat_cache_entry_init();
- buffer_copy_string_buffer(sce->name, name);
-
- sc->files = splaytree_insert(sc->files, file_ndx, sce);
-#ifdef DEBUG_STAT_CACHE
- if (ctrl.size == 0) {
- ctrl.size = 16;
- ctrl.used = 0;
- ctrl.ptr = malloc(ctrl.size * sizeof(*ctrl.ptr));
- /* ERR_ABORT (ctrl.ptr); */
- } else if (ctrl.size == ctrl.used) {
- ctrl.size += 16;
- ctrl.ptr = realloc(ctrl.ptr,
- ctrl.size * sizeof(*ctrl.ptr));
- /* ERR_ABORT (ctrl.ptr); */
- }
-
- ctrl.ptr[ctrl.used++] = file_ndx;
-
- assert(sc->files);
- assert(sc->files->data == sce);
- assert(osize + 1 == splaytree_size(sc->files));
-#endif
- }
-
- sce->st = st;
- sce->stat_ts = srv->cur_ts;
-
- /* catch the obvious symlinks
- *
- * this is not a secure check as we still have a race-condition between
- * the stat() and the open. We can only solve this by
- * 1. open() the file
- * 2. fstat() the fd
- *
- * and keeping the file open for the rest of the time. But this can
- * only be done at network level.
- *
- * per default it is not a symlink
- * */
-#ifdef HAVE_LSTAT
- sce->is_symlink = 0;
-
- /* we want to only check for symlinks if we should block symlinks.
- */
- if (!con->conf.follow_symlink) {
- if (stat_cache_lstat(srv, name, &lst) == 0) {
-#ifdef DEBUG_STAT_CACHE
- log_error_write(srv, __FILE__, __LINE__, "sb",
- "found symlink", name);
-#endif
- sce->is_symlink = 1;
- }
-
- /*
- * we assume "/" can not be symlink, so
- * skip the symlink stuff if our path is /
- **/
- else if ((name->used > 2)) {
- buffer *dname;
- char *s_cur;
-
- dname = buffer_init();
- buffer_copy_string_buffer(dname, name);
-
- while ((s_cur = strrchr(dname->ptr,'/'))) {
- *s_cur = '\0';
- dname->used = s_cur - dname->ptr + 1;
- if (dname->ptr == s_cur) {
-#ifdef DEBUG_STAT_CACHE
- log_error_write(srv, __FILE__, __LINE__,
- "s", "reached /");
-#endif
- break;
- }
-#ifdef DEBUG_STAT_CACHE
- log_error_write(srv, __FILE__, __LINE__, "sbs",
- "checking if", dname, "is a "
- "symlink");
-#endif
- if (stat_cache_lstat(srv, dname, &lst) == 0) {
- sce->is_symlink = 1;
-#ifdef DEBUG_STAT_CACHE
- log_error_write(srv, __FILE__, __LINE__,
- "sb",
- "found symlink", dname);
-#endif
- break;
- };
- };
- buffer_free(dname);
- };
- };
-#endif
-
- if (S_ISREG(st.st_mode)) {
- /* determine mimetype */
- buffer_reset(sce->content_type);
-
- for (k = 0; k < con->conf.mimetypes->used; k++) {
- data_string *ds = NULL;
- buffer *type = NULL;
-
- ds = (data_string *)con->conf.mimetypes->data[k];
- type = ds->key;
- if (type->used == 0) continue;
-
- /* check if the right side is the same */
- if (type->used > name->used) continue;
-
- if (0 == strncasecmp(name->ptr + name->used - type->used,
- type->ptr, type->used - 1)) {
- buffer_copy_string_buffer(sce->content_type,
- ds->value);
- break;
- }
- }
- etag_create(sce->etag, &(sce->st), con->etag_flags);
-#ifdef HAVE_XATTR
- if (con->conf.use_xattr && buffer_is_empty(sce->content_type)) {
- stat_cache_attr_get(sce->content_type, name->ptr);
- }
-#endif
- } else if (S_ISDIR(st.st_mode)) {
- etag_create(sce->etag, &(sce->st), con->etag_flags);
- }
-
-#ifdef HAVE_FAM_H
- if (sc->fam &&
- (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM)) {
- /* is this directory already registered ? */
- if (!dir_node) {
- fam_dir = fam_dir_entry_init();
- fam_dir->fc = sc->fam;
-
- buffer_copy_string_buffer(fam_dir->name, sc->dir_name);
-
- fam_dir->version = 1;
-
- fam_dir->req = calloc(1, sizeof(FAMRequest));
- /* ERR_ABORT (fam_dir->req); */
-
- if (0 != FAMMonitorDirectory(sc->fam, fam_dir->name->ptr,
- fam_dir->req, fam_dir)) {
-
- log_error_write(srv, __FILE__, __LINE__, "sbsbs",
- "monitoring dir failed:",
- fam_dir->name,
- "file:", name,
- FamErrlist[FAMErrno]);
-
- fam_dir_entry_free(fam_dir);
- } else {
- int osize = 0;
-
- if (sc->dirs) {
- osize = sc->dirs->size;
- }
-
- sc->dirs = splaytree_insert(sc->dirs, dir_ndx,
- fam_dir);
- assert(sc->dirs);
- assert(sc->dirs->data == fam_dir);
- assert(osize == (sc->dirs->size - 1));
- }
- } else {
- fam_dir = dir_node->data;
- }
-
- /* bind the fam_fc to the stat() cache entry */
-
- if (fam_dir) {
- sce->dir_version = fam_dir->version;
- sce->dir_ndx = dir_ndx;
- }
- }
-#endif
-
- *ret_sce = sce;
-
- return HANDLER_GO_ON;
-}
-
-/**
- * remove stat() from cache which havn't been stat()ed for
- * more than 10 seconds
- *
- *
- * walk though the stat-cache, collect the ids which are too old
- * and remove them in a second loop
- */
-
-static int stat_cache_tag_old_entries(server *srv, splay_tree *t, int *keys,
- size_t *ndx) {
- stat_cache_entry *sce;
-
- if (!t) return 0;
-
- stat_cache_tag_old_entries(srv, t->left, keys, ndx);
- stat_cache_tag_old_entries(srv, t->right, keys, ndx);
-
- sce = t->data;
-
- if (srv->cur_ts - sce->stat_ts > 2) {
- keys[(*ndx)++] = t->key;
- }
-
- return 0;
-}
diff --git a/mod_glusterfs/lighttpd/1.4/mod_glusterfs.h b/mod_glusterfs/lighttpd/1.4/mod_glusterfs.h
deleted file mode 100644
index 9d73d6999ed..00000000000
--- a/mod_glusterfs/lighttpd/1.4/mod_glusterfs.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _MOD_GLUSTERFS_FILE_CACHE_H_
-#define _MOD_GLUSTERFS_FILE_CACHE_H_
-
-#include "stat_cache.h"
-#include <libglusterfsclient.h>
-#include "base.h"
-
-handler_t glusterfs_stat_cache_get_entry(server *srv, connection *con,
- buffer *glusterfs_path, buffer *name,
- void *buf, size_t size,
- stat_cache_entry **fce);
-
-#endif
diff --git a/mod_glusterfs/lighttpd/1.5/Makefile.am b/mod_glusterfs/lighttpd/1.5/Makefile.am
deleted file mode 100644
index eda329111dc..00000000000
--- a/mod_glusterfs/lighttpd/1.5/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-EXTRA_DIST = Makefile.am.diff mod_glusterfs.c mod_glusterfs.h README.txt
-
-CLEANFILES =
diff --git a/mod_glusterfs/lighttpd/1.5/Makefile.am.diff b/mod_glusterfs/lighttpd/1.5/Makefile.am.diff
deleted file mode 100644
index 375696b5d8f..00000000000
--- a/mod_glusterfs/lighttpd/1.5/Makefile.am.diff
+++ /dev/null
@@ -1,29 +0,0 @@
---- lighttpd-1.4.19/src/Makefile.am 2008-04-16 18:42:18.000000000 +0400
-+++ lighttpd-1.4.19.mod/src/Makefile.am 2008-04-16 18:41:11.000000000 +0400
-@@ -1,4 +1,4 @@
--AM_CFLAGS = $(FAM_CFLAGS)
-+AM_CFLAGS = $(FAM_CFLAGS) -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64
-
- noinst_PROGRAMS=proc_open lemon # simple-fcgi #graphic evalo bench ajp ssl error_test adserver gen-license
- sbin_PROGRAMS=lighttpd lighttpd-angel
-@@ -241,6 +241,11 @@
- mod_accesslog_la_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined
- mod_accesslog_la_LIBADD = $(common_libadd)
-
-+lib_LTLIBRARIES += mod_glusterfs.la
-+mod_glusterfs_la_SOURCES = mod_glusterfs.c
-+mod_glusterfs_la_CFLAGS = $(AM_CFLAGS)
-+mod_glusterfs_la_LDFLAGS = -module -export-dynamic -avoid-version -no-undefined -lglusterfsclient -lpthread
-+mod_glusterfs_la_LIBADD = $(common_libadd)
-
- hdr = server.h buffer.h network.h log.h keyvalue.h \
- response.h request.h fastcgi.h chunk.h \
-@@ -254,7 +259,7 @@
- configparser.h mod_ssi_exprparser.h \
- sys-mmap.h sys-socket.h mod_cml.h mod_cml_funcs.h \
- splaytree.h proc_open.h status_counter.h \
-- mod_magnet_cache.h
-+ mod_magnet_cache.h mod_glusterfs.h
-
- DEFS= @DEFS@ -DLIBRARY_DIR="\"$(libdir)\"" -DSBIN_DIR="\"$(sbindir)\""
-
diff --git a/mod_glusterfs/lighttpd/1.5/README.txt b/mod_glusterfs/lighttpd/1.5/README.txt
deleted file mode 100644
index bdbdfffbc50..00000000000
--- a/mod_glusterfs/lighttpd/1.5/README.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-Introduction
-============
-mod_glusterfs is a module written for lighttpd to speed up the access of files present on glusterfs. mod_glusterfs uses libglusterfsclient library provided for glusterfs and hence can be used without fuse (File System in User Space).
-
-Usage
-=====
-To use mod_glusterfs with lighttpd-1.5, copy mod_glusterfs.c and mod_glusterfs.h into src/ of lighttpd-1.5 source tree, and apply the Makefile.am.diff to src/Makefile.am. Re-run ./autogen.sh on the top level of the lighttpd-1.5 build tree and recompile.
-
-# cp mod_glusterfs.[ch] /home/glusterfs/lighttpd-1.5/src/
-# cp Makefile.am.diff /home/glusterfs/lighttpd-1.5/
-# cd /home/glusterfs/lighttpd-1.5
-# patch -p1 < Makefile.am.diff
-# ./autogen.sh
-# ./configure
-# make
-# make install
-
-Configuration
-=============
-* mod_glusterfs should be listed at the begining of the list server.modules in lighttpd.conf.
-
-Below is a snippet from lighttpd.conf concerning to mod_glusterfs.
-
-$HTTP["url"] =~ "^/glusterfs" {
- glusterfs.prefix = "/glusterfs"
- glusterfs.logfile = "/var/log/glusterfs-logfile"
- glusterfs.document-root = "/home/glusterfs/document-root"
- glusterfs.volume-specfile = "/etc/glusterfs/glusterfs.vol"
- glusterfs.loglevel = "error"
- glusterfs.cache-timeout = 300
- glusterfs.xattr-interface-size-limit = "65536"
-}
-
-* $HTTP["url"] =~ "^/glusterfs"
- A perl style regular expression used to match against the url. If regular expression matches the url, the url is handled by mod_glusterfs. Note that the pattern given here should match glusterfs.prefix.
-
-* glusterfs.prefix (COMPULSORY)
- A string to be present at the starting of the file path in the url so that the file would be handled by glusterfs.
- Eg., A GET request on the url http://www.example.com/glusterfs-prefix/some-dir/example-file will result in fetching of the file "/some-dir/example-file" from glusterfs mount if glusterfs.prefix is set to "/glusterfs-prefix".
-
-* glusterfs.volume-specfile (COMPULSORY)
- Path to the the glusterfs volume specification file.
-
-* glusterfs.logfile (COMPULSORY)
- Path to the glusterfs logfile.
-
-* glusterfs.loglevel (OPTIONAL, default = warning)
- Allowed values are critical, error, warning, debug, none in the decreasing order of severity of error conditions.
-
-* glusterfs.cache-timeout (OPTIONAL, default = 0)
- Timeout values for glusterfs stat and lookup cache.
-
-* glusterfs.document-root (COMPULSORY)
- An absolute path, relative to which all the files are fetched from glusterfs.
-
-* glusterfs.xattr-interface-size-limit (OPTIONAL, default = 0)
- Files with sizes upto and including this value are fetched through the extended attribute interface of glusterfs rather than the usual open-read-close set of operations. For files of small sizes, it is recommended to use extended attribute interface.
diff --git a/mod_glusterfs/lighttpd/1.5/mod_glusterfs.c b/mod_glusterfs/lighttpd/1.5/mod_glusterfs.c
deleted file mode 100644
index 67f7a7eacf5..00000000000
--- a/mod_glusterfs/lighttpd/1.5/mod_glusterfs.c
+++ /dev/null
@@ -1,1476 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <ctype.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <pthread.h>
-#include <sys/types.h>
-#include <fcntl.h>
-
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <errno.h>
-#include <unistd.h>
-#include <assert.h>
-
-#include "base.h"
-#include "log.h"
-#include "buffer.h"
-
-#include "plugin.h"
-
-#include "stat_cache.h"
-#include "mod_glusterfs.h"
-#include "etag.h"
-#include "response.h"
-
-#include "fdevent.h"
-#include "joblist.h"
-#include "http_req_range.h"
-#include "connections.h"
-#include "configfile.h"
-
-#include <libglusterfsclient.h>
-
-#ifdef HAVE_ATTR_ATTRIBUTES_H
-#include <attr/attributes.h>
-#endif
-
-#ifdef HAVE_FAM_H
-# include <fam.h>
-#endif
-
-#include "sys-mmap.h"
-
-/* NetBSD 1.3.x needs it */
-#ifndef MAP_FAILED
-# define MAP_FAILED -1
-#endif
-
-#ifndef O_LARGEFILE
-# define O_LARGEFILE 0
-#endif
-
-#ifndef HAVE_LSTAT
-#define lstat stat
-#endif
-
-#if 0
-/* enables debug code for testing if all nodes in the stat-cache as accessable */
-#define DEBUG_STAT_CACHE
-#endif
-
-#ifdef HAVE_LSTAT
-#undef HAVE_LSTAT
-#endif
-
-#define GLUSTERFS_FILE_CHUNK (FILE_CHUNK + 1)
-
-/* Keep this value large. Each glusterfs_async_read of GLUSTERFS_CHUNK_SIZE results in a network_backend_write of the read data*/
-
-#define GLUSTERFS_CHUNK_SIZE 8192
-
-/**
- * this is a staticfile for a lighttpd plugin
- *
- */
-
-
-/* plugin config for all request/connections */
-
-typedef struct {
- buffer *logfile;
- buffer *loglevel;
- buffer *specfile;
- buffer *prefix;
- buffer *xattr_file_size;
- buffer *document_root;
- array *exclude_exts;
- unsigned short cache_timeout;
-
- /* FIXME: its a pointer, hence cant be short */
- unsigned long handle;
-} plugin_config;
-
-static network_status_t (*network_backend_write)(struct server *srv, connection *con, iosocket *sock, chunkqueue *cq);
-
-typedef struct {
- PLUGIN_DATA;
- buffer *range_buf;
- plugin_config **config_storage;
- http_req_range *ranges;
- plugin_config conf;
-} plugin_data;
-
-typedef struct glusterfs_async_local {
- int op_ret;
- int op_errno;
- pthread_mutex_t lock;
- pthread_cond_t cond;
- connection *con;
- server *srv;
- plugin_data *p;
-
- union {
- struct {
- char async_read_complete;
- off_t length;
- size_t read_bytes;
- glusterfs_read_buf_t *buf;
- }readv;
-
- struct {
- buffer *name;
- buffer *hash_key;
- size_t size;
- }lookup;
- }fop;
-} glusterfs_async_local_t;
-
-typedef struct {
- unsigned long fd;
- buffer *glusterfs_path;
- void *buf;
- off_t response_content_length;
- int prefix;
-}mod_glusterfs_ctx_t;
-
-typedef struct {
- chunkqueue *cq;
- glusterfs_read_buf_t *buf;
- size_t length;
-}mod_glusterfs_chunkqueue;
-
-#ifdef HAVE_FAM_H
-typedef struct {
- FAMRequest *req;
- FAMConnection *fc;
-
- buffer *name;
-
- int version;
-} fam_dir_entry;
-#endif
-
-/* the directory name is too long to always compare on it
- * - we need a hash
- * - the hash-key is used as sorting criteria for a tree
- * - a splay-tree is used as we can use the caching effect of it
- */
-
-/* we want to cleanup the stat-cache every few seconds, let's say 10
- *
- * - remove entries which are outdated since 30s
- * - remove entries which are fresh but havn't been used since 60s
- * - if we don't have a stat-cache entry for a directory, release it from the monitor
- */
-
-#ifdef DEBUG_STAT_CACHE
-typedef struct {
- int *ptr;
-
- size_t used;
- size_t size;
-} fake_keys;
-
-static fake_keys ctrl;
-#endif
-
-static stat_cache_entry *
-stat_cache_entry_init(void)
-{
- stat_cache_entry *sce = NULL;
-
- sce = calloc(1, sizeof(*sce));
- /* ERR_ABORT (sce); */
-
- sce->name = buffer_init();
- sce->etag = buffer_init();
- sce->content_type = buffer_init();
-
- return sce;
-}
-
-int chunkqueue_append_glusterfs_mem (chunkqueue *cq, const char * mem, size_t len) {
- buffer *buf = NULL;
-
- buf = chunkqueue_get_append_buffer (cq);
-
- if (buf->ptr)
- free (buf->ptr);
-
- buf->used = len + 1;
- buf->ptr = (char *)mem;
- buf->size = len;
-
- return 0;
-}
-
-static int
-glusterfs_lookup_async_cbk (int op_ret,
- int op_errno,
- void *buf,
- struct stat *st,
- void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
-
- mod_glusterfs_ctx_t *ctx = NULL;
- ctx = local->con->plugin_ctx[local->p->id];
-
- assert (ctx->buf== buf);
-
- if (op_ret || !(S_ISREG (st->st_mode) && (size_t)st->st_size <= local->fop.lookup.size)) {
-
- free (ctx->buf);
- ctx->buf = NULL;
-
- if (op_ret) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- free (ctx);
- local->con->plugin_ctx[local->p->id] = NULL;
-
- if (op_errno == ENOENT)
- local->con->http_status = 404;
- else
- local->con->http_status = 403;
- }
- }
-
- if (!op_ret) {
- stat_cache_entry *sce = NULL;
- stat_cache *sc = local->srv->stat_cache;
-
- sce = (stat_cache_entry *)g_hash_table_lookup(sc->files, local->fop.lookup.hash_key);
-
- if (!sce) {
- sce = stat_cache_entry_init();
-
- buffer_copy_string_buffer(sce->name, local->fop.lookup.name);
- g_hash_table_insert(sc->files, buffer_init_string(BUF_STR(local->fop.lookup.hash_key)), sce);
- }
-
- sce->state = STAT_CACHE_ENTRY_STAT_FINISHED;
- sce->stat_ts = time (NULL);
- memcpy (&sce->st, st, sizeof (*st));
- }
-
- g_async_queue_push (local->srv->joblist_queue, local->con);
- /*
- joblist_append (local->srv, local->con);
- kill (getpid(), SIGUSR1);
- */
- free (local);
- return 0;
-}
-
-static handler_t
-glusterfs_stat_cache_get_entry_async (server *srv,
- connection *con,
- plugin_data *p,
- buffer *glusterfs_path,
- buffer *name,
- void *buf,
- size_t size,
- stat_cache_entry **ret_sce)
-{
- stat_cache_entry *sce = NULL;
- stat_cache *sc;
- glusterfs_async_local_t *local = NULL;
-
- *ret_sce = NULL;
-
- /*
- * check if the directory for this file has changed
- */
-
- sc = srv->stat_cache;
-
- buffer_copy_string_buffer(sc->hash_key, name);
- buffer_append_long(sc->hash_key, con->conf.follow_symlink);
-
- if ((sce = (stat_cache_entry *)g_hash_table_lookup(sc->files, sc->hash_key))) {
- /* know this entry already */
-
- if (sce->state == STAT_CACHE_ENTRY_STAT_FINISHED &&
- !buf) {
- /* verify that this entry is still fresh */
-
- *ret_sce = sce;
-
- return HANDLER_GO_ON;
- }
- }
-
-
- /*
- * *lol*
- * - open() + fstat() on a named-pipe results in a (intended) hang.
- * - stat() if regular file + open() to see if we can read from it is better
- *
- * */
-
- /* pass a job to the stat-queue */
-
- local = calloc (1, sizeof (*local));
- /* ERR_ABORT (local); */
- local->con = con;
- local->srv = srv;
- local->p = p;
- local->fop.lookup.name = buffer_init_buffer (name);
- local->fop.lookup.hash_key = buffer_init_buffer (sc->hash_key);
- local->fop.lookup.size = size;
-
- if (glusterfs_lookup_async ((libglusterfs_handle_t )p->conf.handle, glusterfs_path->ptr, buf, size, glusterfs_lookup_async_cbk, (void *) local)) {
- free (local);
- return HANDLER_ERROR;
- }
-
- return HANDLER_WAIT_FOR_EVENT;
-}
-
-int
-mod_glusterfs_readv_async_cbk (glusterfs_read_buf_t *buf,
- void *cbk_data)
-{
- glusterfs_async_local_t *local = cbk_data;
- pthread_mutex_lock (&local->lock);
- {
- local->fop.readv.async_read_complete = 1;
- local->fop.readv.buf = buf;
-
- pthread_cond_signal (&local->cond);
- }
- pthread_mutex_unlock (&local->lock);
-
- return 0;
-}
-
-network_status_t
-mod_glusterfs_read_async (server *srv, connection *con, chunk *glusterfs_chunk)
-{
- glusterfs_async_local_t local;
- off_t end = 0;
- int nbytes;
- int complete;
- chunkqueue *cq = NULL;
- chunk *c = NULL;
- off_t offset = glusterfs_chunk->file.start;
- size_t length = glusterfs_chunk->file.length;
- unsigned long fd = (unsigned long)glusterfs_chunk->file.name;
- network_status_t ret;
-
- pthread_cond_init (&local.cond, NULL);
- pthread_mutex_init (&local.lock, NULL);
-
- //local.fd = fd;
- memset (&local, 0, sizeof (local));
-
- if (length > 0)
- end = offset + length;
-
- cq = chunkqueue_init ();
- if (!cq) {
- con->http_status = 500;
- return NETWORK_STATUS_FATAL_ERROR;
- }
-
- do {
- glusterfs_read_buf_t *buf;
- int i;
- if (length > 0) {
- nbytes = end - offset;
- if (nbytes > GLUSTERFS_CHUNK_SIZE)
- nbytes = GLUSTERFS_CHUNK_SIZE;
- } else
- nbytes = GLUSTERFS_CHUNK_SIZE;
-
- glusterfs_read_async(fd,
- nbytes,
- offset,
- mod_glusterfs_readv_async_cbk,
- (void *)&local);
-
- pthread_mutex_lock (&local.lock);
- {
- while (!local.fop.readv.async_read_complete) {
- pthread_cond_wait (&local.cond, &local.lock);
- }
-
- local.op_ret = local.fop.readv.buf->op_ret;
- local.op_errno = local.fop.readv.buf->op_errno;
-
- local.fop.readv.async_read_complete = 0;
- buf = local.fop.readv.buf;
-
- if ((int)length < 0)
- complete = (local.fop.readv.buf->op_ret <= 0);
- else {
- local.fop.readv.read_bytes += local.fop.readv.buf->op_ret;
- complete = ((local.fop.readv.read_bytes == length) || (local.fop.readv.buf->op_ret <= 0));
- }
- }
- pthread_mutex_unlock (&local.lock);
-
- if (local.op_ret > 0) {
- for (i = 0; i < buf->count; i++) {
- buffer *nw_write_buf = chunkqueue_get_append_buffer (cq);
-
- nw_write_buf->used = nw_write_buf->size = buf->vector[i].iov_len + 1;
- nw_write_buf->ptr = buf->vector[i].iov_base;
-
- // buffer_copy_memory (nw_write_buf, buf->vector[i].iov_base, buf->vector[i].iov_len + 1);
- offset += local.op_ret;
- }
-
- ret = network_backend_write (srv, con, con->sock, cq);
-
- if (chunkqueue_written (cq) != local.op_ret) {
- mod_glusterfs_chunkqueue *gf_cq;
- glusterfs_chunk->file.start = offset;
- if ((int)glusterfs_chunk->file.length > 0)
- glusterfs_chunk->file.length -= local.fop.readv.read_bytes;
-
- gf_cq = calloc (1, sizeof (*gf_cq));
- /* ERR_ABORT (qf_cq); */
- gf_cq->cq = cq;
- gf_cq->buf = buf;
- gf_cq->length = local.op_ret;
- glusterfs_chunk->file.mmap.start = (char *)gf_cq;
- return ret;
- }
-
- for (c = cq->first ; c; c = c->next)
- c->mem->ptr = NULL;
-
- chunkqueue_reset (cq);
- }
-
- glusterfs_free (buf);
- } while (!complete);
-
- chunkqueue_free (cq);
- glusterfs_close (fd);
-
- if (local.op_ret < 0)
- con->http_status = 500;
-
- return (local.op_ret < 0 ? NETWORK_STATUS_FATAL_ERROR : NETWORK_STATUS_SUCCESS);
-}
-
-network_status_t mod_glusterfs_network_backend_write(struct server *srv, connection *con, iosocket *sock, chunkqueue *cq)
-{
- chunk *c, *prev, *first;
- int chunks_written = 0;
- int error = 0;
- network_status_t ret;
-
- for (first = prev = c = cq->first; c; c = c->next, chunks_written++) {
-
- if (c->type == MEM_CHUNK && c->mem->used && !c->mem->ptr) {
- if (cq->first != c) {
- prev->next = NULL;
-
- /* call stored network_backend_write */
- ret = network_backend_write (srv, con, sock, cq);
-
- prev->next = c;
- if (ret != NETWORK_STATUS_SUCCESS) {
- cq->first = first;
- return ret;
- }
- }
- cq->first = c->next;
-
- if (c->file.fd < 0) {
- error = HANDLER_ERROR;
- break;
- }
-
- if (c->file.mmap.start) {
- chunk *tmp;
- size_t len;
- mod_glusterfs_chunkqueue *gf_cq = (mod_glusterfs_chunkqueue *)c->file.mmap.start;
-
- ret = network_backend_write (srv, con, sock, gf_cq->cq);
-
- if ((len = (size_t)chunkqueue_written (gf_cq->cq)) != gf_cq->length) {
- gf_cq->length -= len;
- cq->first = first;
- chunkqueue_remove_finished_chunks (gf_cq->cq);
- return ret;
- }
-
- for (tmp = gf_cq->cq->first ; tmp; tmp = tmp->next)
- tmp->mem->ptr = NULL;
-
- chunkqueue_free (gf_cq->cq);
- glusterfs_free (gf_cq->buf);
- free (gf_cq);
- c->file.mmap.start = NULL;
- }
-
- ret = mod_glusterfs_read_async (srv, con, c); //c->file.fd, c->file.start, -1);//c->file.length);
- if (c->file.mmap.start) {
- /* pending chunkqueue from mod_glusterfs_read_async to be written to network */
- cq->first = first;
- return ret;
- }
-
- buffer_free (c->mem);
- c->mem = NULL;
-
- c->type = FILE_CHUNK;
- c->offset = c->file.length = 0;
- c->file.name = NULL;
-
- if (first == c)
- first = c->next;
-
- if (cq->last == c)
- cq->last = NULL;
-
- prev->next = c->next;
-
- free(c);
- }
- prev = c;
- }
-
- ret = network_backend_write (srv, con, sock, cq);
-
- cq->first = first;
-
- return ret;
-}
-
-#if 0
-int chunkqueue_append_glusterfs_file (chunkqueue *cq, unsigned long fd, off_t offset, off_t len)
-{
- chunk *c = NULL;
- c = chunkqueue_get_append_tempfile (cq);
-
- if (c->file.is_temp) {
- close (c->file.fd);
- unlink (c->file.name->ptr);
- }
-
- c->type = MEM_CHUNK;
-
- c->mem = buffer_init ();
- c->mem->used = len + 1;
- c->mem->ptr = NULL;
- c->offset = 0;
-
- /* buffer_copy_string_buffer (c->file.name, fn); */
- c->file.start = offset;
- c->file.length = len;
- /* buffer_free (c->file.name); */
-
- /* identify chunk as glusterfs related */
- c->file.mmap.start = MAP_FAILED;
- /* c->file.mmap.length = c->file.mmap.offset = len;*/
-
- return 0;
-}
-#endif
-
-int chunkqueue_append_dummy_mem_chunk (chunkqueue *cq, off_t len)
-{
- chunk *c = NULL;
- c = chunkqueue_get_append_tempfile (cq);
-
- if (c->file.is_temp) {
- close (c->file.fd);
- unlink (c->file.name->ptr);
- c->file.is_temp = 0;
- }
-
- c->type = MEM_CHUNK;
-
- c->mem->used = len + 1;
- c->offset = len;
- c->mem->ptr = NULL;
-
- return 0;
-}
-
-int chunkqueue_append_glusterfs_file (chunkqueue *cq, unsigned long fd, off_t offset, off_t len)
-{
- chunk *c = NULL;
- c = chunkqueue_get_append_tempfile (cq);
-
- if (c->file.is_temp) {
- close (c->file.fd);
- unlink (c->file.name->ptr);
- c->file.is_temp = 0;
- }
-
- c->type = MEM_CHUNK;
-
- c->mem = buffer_init ();
- c->mem->used = len + 1;
- c->mem->ptr = NULL;
- c->offset = 0;
-
- /* buffer_copy_string_buffer (c->file.name, fn); */
- buffer_free (c->file.name);
-
- /* fd returned by libglusterfsclient is a pointer */
- c->file.name = (buffer *)fd;
- c->file.start = offset;
- c->file.length = len;
-
- //c->file.fd = fd;
- c->file.mmap.start = NULL;
- return 0;
-}
-
-/* init the plugin data */
-INIT_FUNC(mod_glusterfs_init) {
- plugin_data *p;
-
- UNUSED (srv);
- p = calloc(1, sizeof(*p));
- /* ERR_ABORT (p); */
- network_backend_write = NULL;
- p->ranges = http_request_range_init();
- return p;
-}
-
-/* detroy the plugin data */
-FREE_FUNC(mod_glusterfs_free) {
- plugin_data *p = p_d;
-
- UNUSED (srv);
-
- if (!p) return HANDLER_GO_ON;
-
- if (p->config_storage) {
- size_t i;
- for (i = 0; i < srv->config_context->used; i++) {
- plugin_config *s = p->config_storage[i];
-
- buffer_free (s->logfile);
- buffer_free (s->loglevel);
- buffer_free (s->specfile);
- buffer_free (s->prefix);
- buffer_free (s->xattr_file_size);
- buffer_free (s->document_root);
- array_free (s->exclude_exts);
-
- free (s);
- }
- free (p->config_storage);
- }
- buffer_free (p->range_buf);
- http_request_range_free (p->ranges);
-
- free (p);
-
- return HANDLER_GO_ON;
-}
-
-SETDEFAULTS_FUNC(mod_glusterfs_set_defaults) {
- plugin_data *p = p_d;
- size_t i = 0;
-
- config_values_t cv[] = {
- { "glusterfs.logfile", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.loglevel", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
- { "glusterfs.volume-specfile", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
- { "glusterfs.cache-timeout", NULL, T_CONFIG_SHORT, T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.exclude-extensions", NULL, T_CONFIG_ARRAY, T_CONFIG_SCOPE_CONNECTION },
-
- /*TODO: get the prefix from config_conext and remove glusterfs.prefix from conf file */
- { "glusterfs.prefix", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.xattr-interface-size-limit", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
-
- { "glusterfs.document-root", NULL, T_CONFIG_STRING, T_CONFIG_SCOPE_CONNECTION },
-
- { NULL, NULL, T_CONFIG_UNSET, T_CONFIG_SCOPE_UNSET }
- };
-
- p->config_storage = calloc(1, srv->config_context->used * sizeof(specific_config *));
- /* ERR_ABORT (p->config_storage); */
- p->range_buf = buffer_init ();
-
- for (i = 0; i < srv->config_context->used; i++) {
- plugin_config *s;
-
- s = calloc(1, sizeof(plugin_config));
- /* ERR_ABORT (s); */
- s->logfile = buffer_init ();
- s->loglevel = buffer_init ();
- s->specfile = buffer_init ();
- s->exclude_exts = array_init ();
- s->prefix = buffer_init ();
- s->xattr_file_size = buffer_init ();
- s->document_root = buffer_init ();
-
- cv[0].destination = s->logfile;
- cv[1].destination = s->loglevel;
- cv[2].destination = s->specfile;
- cv[3].destination = &s->cache_timeout;
- cv[4].destination = s->exclude_exts;
- cv[5].destination = s->prefix;
- cv[6].destination = s->xattr_file_size;
- cv[7].destination = s->document_root;
-
- p->config_storage[i] = s;
-
- if (0 != config_insert_values_global(srv, ((data_config *)srv->config_context->data[i])->value, cv)) {
- return HANDLER_FINISHED;
- }
- }
-
- return HANDLER_GO_ON;
-}
-
-#define PATCH(x) \
- p->conf.x = s->x;
-
-static int mod_glusterfs_patch_connection(server *srv, connection *con, plugin_data *p) {
- size_t i, j;
- plugin_config *s;
-
- p->conf.logfile = NULL;
- p->conf.loglevel = NULL;
- p->conf.specfile = NULL;
- p->conf.cache_timeout = 0;
- p->conf.exclude_exts = NULL;
- p->conf.prefix = NULL;
- p->conf.xattr_file_size = NULL;
- p->conf.exclude_exts = NULL;
-
- /* skip the first, the global context */
- /* glusterfs related config can only occur inside $HTTP["url"] == "<glusterfs-prefix>" */
- for (i = 1; i < srv->config_context->used; i++) {
- data_config *dc = (data_config *)srv->config_context->data[i];
- s = p->config_storage[i];
-
- /* condition didn't match */
- if (!config_check_cond(srv, con, dc)) continue;
-
- /* merge config */
- for (j = 0; j < dc->value->used; j++) {
- data_unset *du = dc->value->data[j];
-
- if (buffer_is_equal_string (du->key, CONST_STR_LEN("glusterfs.logfile"))) {
- PATCH (logfile);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN("glusterfs.loglevel"))) {
- PATCH (loglevel);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.volume-specfile"))) {
- PATCH (specfile);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN("glusterfs.cache-timeout"))) {
- PATCH (cache_timeout);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.exclude-extensions"))) {
- PATCH (exclude_exts);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.prefix"))) {
- PATCH (prefix);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.xattr-interface-size-limit"))) {
- PATCH (xattr_file_size);
- } else if (buffer_is_equal_string (du->key, CONST_STR_LEN ("glusterfs.document-root"))) {
- PATCH (document_root);
- }
- }
- }
- return 0;
-}
-
-#undef PATCH
-
-static int http_response_parse_range(server *srv, connection *con, plugin_data *p) {
- int multipart = 0;
- char *boundary = "fkj49sn38dcn3";
- data_string *ds;
- stat_cache_entry *sce = NULL;
- buffer *content_type = NULL;
- buffer *range = NULL;
- http_req_range *ranges, *r;
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
- size_t size = 0;
-
- if (!ctx) {
- return -1;
- }
-
- if (NULL != (ds = (data_string *)array_get_element(con->request.headers, CONST_STR_LEN("Range")))) {
- range = ds->value;
- } else {
- /* we don't have a Range header */
-
- return -1;
- }
-
- if (HANDLER_ERROR == stat_cache_get_entry(srv, con, con->physical.path, &sce)) {
- SEGFAULT();
- }
-
- ctx->response_content_length = con->response.content_length = 0;
-
- if (NULL != (ds = (data_string *)array_get_element(con->response.headers, CONST_STR_LEN("Content-Type")))) {
- content_type = ds->value;
- }
-
- /* start the range-header parser
- * bytes=<num> */
-
- ranges = p->ranges;
- http_request_range_reset(ranges);
- switch (http_request_range_parse(range, ranges)) {
- case PARSE_ERROR:
- return -1; /* no range valid Range Header */
- case PARSE_SUCCESS:
- break;
- default:
- TRACE("%s", "foobar");
- return -1;
- }
-
- if (ranges->next) {
- multipart = 1;
- }
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr) {
- size = atoi (p->conf.xattr_file_size->ptr);
- }
-
- /* patch the '-1' */
- for (r = ranges; r; r = r->next) {
- if (r->start == -1) {
- /* -<end>
- *
- * the last <end> bytes */
- r->start = sce->st.st_size - r->end;
- r->end = sce->st.st_size - 1;
- }
- if (r->end == -1) {
- /* <start>-
- * all but the first <start> bytes */
-
- r->end = sce->st.st_size - 1;
- }
-
- if (r->end > sce->st.st_size - 1) {
- /* RFC 2616 - 14.35.1
- *
- * if last-byte-pos not present or > size-of-file
- * take the size-of-file
- *
- * */
- r->end = sce->st.st_size - 1;
- }
-
- if (r->start > sce->st.st_size - 1) {
- /* RFC 2616 - 14.35.1
- *
- * if first-byte-pos > file-size, 416
- */
-
- con->http_status = 416;
- return -1;
- }
-
- if (r->start > r->end) {
- /* RFC 2616 - 14.35.1
- *
- * if last-byte-pos is present, it has to be >= first-byte-pos
- *
- * invalid ranges have to be handle as no Range specified
- * */
-
- return -1;
- }
- }
-
- if (r) {
- /* we ran into an range violation */
- return -1;
- }
-
- if (multipart) {
- buffer *b;
- for (r = ranges; r; r = r->next) {
- /* write boundary-header */
-
- b = chunkqueue_get_append_buffer(con->send);
-
- buffer_copy_string(b, "\r\n--");
- buffer_append_string(b, boundary);
-
- /* write Content-Range */
- buffer_append_string(b, "\r\nContent-Range: bytes ");
- buffer_append_off_t(b, r->start);
- buffer_append_string(b, "-");
- buffer_append_off_t(b, r->end);
- buffer_append_string(b, "/");
- buffer_append_off_t(b, sce->st.st_size);
-
- buffer_append_string(b, "\r\nContent-Type: ");
- buffer_append_string_buffer(b, content_type);
-
- /* write END-OF-HEADER */
- buffer_append_string(b, "\r\n\r\n");
-
- con->response.content_length += b->used - 1;
- ctx->response_content_length += b->used - 1;
- con->send->bytes_in += b->used - 1;
-
- if ((size_t)sce->st.st_size > size) {
- chunkqueue_append_glusterfs_file(con->send_raw, ctx->fd, r->start, r->end - r->start + 1);
- con->send_raw->bytes_in += (r->end - r->start + 1);
- chunkqueue_append_dummy_mem_chunk (con->send, r->end - r->start + 1);
- } else {
- chunkqueue_append_mem (con->send, ((char *)ctx->buf) + r->start, r->end - r->start + 1);
- free (ctx->buf);
- ctx->buf = NULL;
- }
-
- con->response.content_length += r->end - r->start + 1;
- ctx->response_content_length += r->end - r->start + 1;
- con->send->bytes_in += r->end - r->start + 1;
- }
-
- /* add boundary end */
- b = chunkqueue_get_append_buffer(con->send);
-
- buffer_copy_string_len(b, "\r\n--", 4);
- buffer_append_string(b, boundary);
- buffer_append_string_len(b, "--\r\n", 4);
-
- con->response.content_length += b->used - 1;
- ctx->response_content_length += b->used - 1;
- con->send->bytes_in += b->used - 1;
-
- /* set header-fields */
-
- buffer_copy_string(p->range_buf, "multipart/byteranges; boundary=");
- buffer_append_string(p->range_buf, boundary);
-
- /* overwrite content-type */
- response_header_overwrite(srv, con, CONST_STR_LEN("Content-Type"), CONST_BUF_LEN(p->range_buf));
-
- } else {
- r = ranges;
-
- chunkqueue_append_glusterfs_file(con->send_raw, ctx->fd, r->start, r->end - r->start + 1);
- con->send_raw->bytes_in += (r->end - r->start + 1);
- chunkqueue_append_dummy_mem_chunk (con->send, r->end - r->start + 1);
- con->response.content_length += r->end - r->start + 1;
- ctx->response_content_length += r->end - r->start + 1;
- con->send->bytes_in += r->end - r->start + 1;
-
- buffer_copy_string(p->range_buf, "bytes ");
- buffer_append_off_t(p->range_buf, r->start);
- buffer_append_string(p->range_buf, "-");
- buffer_append_off_t(p->range_buf, r->end);
- buffer_append_string(p->range_buf, "/");
- buffer_append_off_t(p->range_buf, sce->st.st_size);
-
- response_header_insert(srv, con, CONST_STR_LEN("Content-Range"), CONST_BUF_LEN(p->range_buf));
- }
-
- /* ok, the file is set-up */
- return 0;
-}
-
-PHYSICALPATH_FUNC(mod_glusterfs_handle_physical) {
- plugin_data *p = p_d;
- stat_cache_entry *sce;
- size_t size = 0;
- handler_t ret = 0;
- mod_glusterfs_ctx_t *plugin_ctx = NULL;
-
- if (con->http_status != 0) return HANDLER_GO_ON;
- if (con->uri.path->used == 0) return HANDLER_GO_ON;
- if (con->physical.path->used == 0) return HANDLER_GO_ON;
-
- if (con->mode != DIRECT) return HANDLER_GO_ON;
-
- /*
- network_backend_write = srv->network_backend_write;
- srv->network_backend_write = mod_glusterfs_network_backend_write;
- */
-
- switch (con->request.http_method) {
- case HTTP_METHOD_GET:
- case HTTP_METHOD_POST:
- case HTTP_METHOD_HEAD:
- break;
-
- default:
- return HANDLER_GO_ON;
- }
-
- mod_glusterfs_patch_connection(srv, con, p);
-
- if (!p->conf.prefix || !p->conf.prefix->ptr) {
- return HANDLER_GO_ON;
- }
-
- if (!p->conf.document_root || p->conf.document_root->used == 0) {
- log_error_write(srv, __FILE__, __LINE__, "s", "glusterfs.document-root is not specified");
- con->http_status = 500;
- return HANDLER_FINISHED;
- }
-
- if (p->conf.handle <= 0) {
- glusterfs_init_ctx_t ctx;
-
- if (!p->conf.specfile || p->conf.specfile->used == 0) {
- return HANDLER_GO_ON;
- }
- memset (&ctx, 0, sizeof (ctx));
-
- ctx.specfile = p->conf.specfile->ptr;
- ctx.logfile = p->conf.logfile->ptr;
- ctx.loglevel = p->conf.loglevel->ptr;
- ctx.lookup_timeout = ctx.stat_timeout = p->conf.cache_timeout;
-
- p->conf.handle = (unsigned long)glusterfs_init (&ctx);
-
- if (p->conf.handle <= 0) {
- con->http_status = 500;
- log_error_write(srv, __FILE__, __LINE__, "sbs", "glusterfs initialization failed, please check your configuration. Glusterfs logfile ", p->conf.logfile, "might contain details");
- return HANDLER_FINISHED;
- }
- }
-
- size = 0;
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if (!con->plugin_ctx[p->id]) {
- buffer *tmp_buf = buffer_init_buffer (con->physical.basedir);
-
- plugin_ctx = calloc (1, sizeof (*plugin_ctx));
- /* ERR_ABORT (plugin_ctx); */
- con->plugin_ctx[p->id] = plugin_ctx;
-
- buffer_append_string_buffer (tmp_buf, p->conf.prefix);
- buffer_path_simplify (tmp_buf, tmp_buf);
-
- plugin_ctx->prefix = tmp_buf->used - 1;
- if (tmp_buf->ptr[plugin_ctx->prefix - 1] == '/')
- plugin_ctx->prefix--;
-
- buffer_free (tmp_buf);
- } else
- /*FIXME: error!! error!! */
- plugin_ctx = con->plugin_ctx[p->id];
-
-
- if (size)
- {
- plugin_ctx->buf = malloc (size);
- /* ERR_ABORT (plugin_ctx->buf); */
- }
-
- plugin_ctx->glusterfs_path = buffer_init ();
- buffer_copy_string_buffer (plugin_ctx->glusterfs_path, p->conf.document_root);
- buffer_append_string (plugin_ctx->glusterfs_path, "/");
- buffer_append_string (plugin_ctx->glusterfs_path, con->physical.path->ptr + plugin_ctx->prefix);
- buffer_path_simplify (plugin_ctx->glusterfs_path, plugin_ctx->glusterfs_path);
-
- ret = glusterfs_stat_cache_get_entry_async (srv, con, p, plugin_ctx->glusterfs_path, con->physical.path, plugin_ctx->buf, size, &sce);
-
- if (ret == HANDLER_ERROR) {
- free (plugin_ctx->buf);
- plugin_ctx->buf = NULL;
-
- buffer_free (plugin_ctx->glusterfs_path);
- plugin_ctx->glusterfs_path = NULL;
-
- free (plugin_ctx);
- con->plugin_ctx[p->id] = NULL;
-
- con->http_status = 500;
- ret = HANDLER_FINISHED;
- }
-
- return ret;
-}
-
-URIHANDLER_FUNC(mod_glusterfs_subrequest) {
- plugin_data *p = p_d;
- stat_cache_entry *sce = NULL;
- int s_len;
- unsigned long fd;
- char allow_caching = 1;
- size_t size = 0;
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
-
- /* someone else has done a decision for us */
- if (con->http_status != 0) return HANDLER_GO_ON;
- if (con->uri.path->used == 0) return HANDLER_GO_ON;
- if (con->physical.path->used == 0) return HANDLER_GO_ON;
-
- /* someone else has handled this request */
- if (con->mode != DIRECT) return HANDLER_GO_ON;
-
- /* we only handle GET, POST and HEAD */
- switch(con->request.http_method) {
- case HTTP_METHOD_GET:
- case HTTP_METHOD_POST:
- case HTTP_METHOD_HEAD:
- break;
- default:
- return HANDLER_GO_ON;
- }
-
- mod_glusterfs_patch_connection(srv, con, p);
-
- if (!p->conf.prefix || !p->conf.prefix->ptr)
- return HANDLER_GO_ON;
-
- if (!ctx) {
- con->http_status = 500;
- return HANDLER_FINISHED;
- }
-
- s_len = con->uri.path->used - 1;
- /* ignore certain extensions */
- /*
- for (k = 0; k < p->conf.exclude_exts->used; k++) {
- data_string *ds;
- ds = (data_string *)p->conf.exclude_exts->data[k];
-
- if (ds->value->used == 0) continue;
-
- if (!strncmp (ds->value->ptr, con->uri.path->ptr, strlen (ds->value->ptr)))
- break;
- }
-
- if (k == p->conf.exclude_exts->used) {
- return HANDLER_GO_ON;
- }
- */
-
- if (con->conf.log_request_handling) {
- log_error_write(srv, __FILE__, __LINE__, "s", "-- serving file from glusterfs");
- }
-
- if (HANDLER_ERROR == stat_cache_get_entry(srv, con, con->physical.path, &sce)) {
- con->http_status = 403;
-
- /* this might happen if the sce is removed from stat-cache after a successful glusterfs_lookup */
- if (ctx) {
- if (ctx->buf) {
- free (ctx->buf);
- ctx->buf = NULL;
- }
-
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- log_error_write(srv, __FILE__, __LINE__, "sbsb",
- "not a regular file:", con->uri.path,
- "->", con->physical.path);
-
- return HANDLER_FINISHED;
- }
-
- if (con->uri.path->ptr[s_len] == '/' || !S_ISREG(sce->st.st_mode)) {
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if ((size_t)sce->st.st_size > size) {
-
- fd = glusterfs_open ((libglusterfs_handle_t ) ((unsigned long)p->conf.handle), ctx->glusterfs_path->ptr, O_RDONLY, 0);
-
- if (!fd) {
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- con->http_status = 403;
- return HANDLER_FINISHED;
- }
- ctx->fd = fd;
- }
-
- /* we only handline regular files */
-#ifdef HAVE_LSTAT
- if ((sce->is_symlink == 1) && !con->conf.follow_symlink) {
- con->http_status = 403;
-
- if (con->conf.log_request_handling) {
- log_error_write(srv, __FILE__, __LINE__, "s", "-- access denied due symlink restriction");
- log_error_write(srv, __FILE__, __LINE__, "sb", "Path :", con->physical.path);
- }
-
- buffer_reset(con->physical.path);
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
-#endif
- if (!S_ISREG(sce->st.st_mode)) {
- con->http_status = 404;
-
- if (con->conf.log_file_not_found) {
- log_error_write(srv, __FILE__, __LINE__, "sbsb",
- "not a regular file:", con->uri.path,
- "->", sce->name);
- }
-
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
-
- /* mod_compress might set several data directly, don't overwrite them */
-
- /* set response content-type, if not set already */
-
- if (NULL == array_get_element(con->response.headers, CONST_STR_LEN("Content-Type"))) {
- if (buffer_is_empty(sce->content_type)) {
- /* we are setting application/octet-stream, but also announce that
- * this header field might change in the seconds few requests
- *
- * This should fix the aggressive caching of FF and the script download
- * seen by the first installations
- */
- response_header_overwrite(srv, con, CONST_STR_LEN("Content-Type"), CONST_STR_LEN("application/octet-stream"));
-
- allow_caching = 0;
- } else {
- response_header_overwrite(srv, con, CONST_STR_LEN("Content-Type"), CONST_BUF_LEN(sce->content_type));
- }
- }
-
- if (con->conf.range_requests) {
- response_header_overwrite(srv, con, CONST_STR_LEN("Accept-Ranges"), CONST_STR_LEN("bytes"));
- }
-
- /* TODO: Allow Cachable requests */
-#if 0
- if (allow_caching) {
- if (p->conf.etags_used && con->etag_flags != 0 && !buffer_is_empty(sce->etag)) {
- if (NULL == array_get_element(con->response.headers, "ETag")) {
- /* generate e-tag */
- etag_mutate(con->physical.etag, sce->etag);
-
- response_header_overwrite(srv, con, CONST_STR_LEN("ETag"), CONST_BUF_LEN(con->physical.etag));
- }
- }
-
- /* prepare header */
- if (NULL == (ds = (data_string *)array_get_element(con->response.headers, "Last-Modified"))) {
- mtime = strftime_cache_get(srv, sce->st.st_mtime);
- response_header_overwrite(srv, con, CONST_STR_LEN("Last-Modified"), CONST_BUF_LEN(mtime));
- } else {
- mtime = ds->value;
- }
-
- if (HANDLER_FINISHED == http_response_handle_cachable(srv, con, mtime)) {
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
-
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
- }
-#endif
-
- /*TODO: Read about etags */
- if (NULL != array_get_element(con->request.headers, CONST_STR_LEN("Range")) && con->conf.range_requests) {
- int do_range_request = 1;
- data_string *ds = NULL;
- buffer *mtime = NULL;
- /* check if we have a conditional GET */
-
- /* prepare header */
- if (NULL == (ds = (data_string *)array_get_element(con->response.headers, CONST_STR_LEN("Last-Modified")))) {
- mtime = strftime_cache_get(srv, sce->st.st_mtime);
- response_header_overwrite(srv, con, CONST_STR_LEN("Last-Modified"), CONST_BUF_LEN(mtime));
- } else {
- mtime = ds->value;
- }
-
- if (NULL != (ds = (data_string *)array_get_element(con->request.headers, CONST_STR_LEN("If-Range")))) {
- /* if the value is the same as our ETag, we do a Range-request,
- * otherwise a full 200 */
-
- if (ds->value->ptr[0] == '"') {
- /**
- * client wants a ETag
- */
- if (!con->physical.etag) {
- do_range_request = 0;
- } else if (!buffer_is_equal(ds->value, con->physical.etag)) {
- do_range_request = 0;
- }
- } else if (!mtime) {
- /**
- * we don't have a Last-Modified and can match the If-Range:
- *
- * sending all
- */
- do_range_request = 0;
- } else if (!buffer_is_equal(ds->value, mtime)) {
- do_range_request = 0;
- }
- }
-
- if (do_range_request) {
- /* content prepared, I'm done */
- con->send->is_closed = 1;
-
- if (0 == http_response_parse_range(srv, con, p)) {
- con->http_status = 206;
- }
- if (ctx) {
- if (ctx->glusterfs_path) {
- buffer_free (ctx->glusterfs_path);
- ctx->glusterfs_path = NULL;
- }
- free (ctx);
- con->plugin_ctx[p->id] = NULL;
- }
-
- return HANDLER_FINISHED;
- }
- }
-
- /* if we are still here, prepare body */
-
- /* we add it here for all requests
- * the HEAD request will drop it afterwards again
- */
-
- if (p->conf.xattr_file_size && p->conf.xattr_file_size->ptr)
- size = atoi (p->conf.xattr_file_size->ptr);
-
- if (size < (size_t)sce->st.st_size) {
- chunkqueue_append_glusterfs_file (con->send_raw, fd, 0, sce->st.st_size);
- con->send_raw->bytes_in += sce->st.st_size;
- chunkqueue_append_dummy_mem_chunk (con->send, sce->st.st_size);
- } else {
- if (!ctx->buf) {
- con->http_status = 404;
- return HANDLER_ERROR;
- }
- chunkqueue_append_glusterfs_mem (con->send, ctx->buf, sce->st.st_size);
- ctx->buf = NULL;
- }
- ctx->response_content_length = con->response.content_length = sce->st.st_size;
-
- con->send->is_closed = 1;
- con->send->bytes_in = sce->st.st_size;
-
- return HANDLER_FINISHED;
-}
-
-/* this function is called at dlopen() time and inits the callbacks */
-CONNECTION_FUNC(mod_glusterfs_connection_reset)
-{
- (void) p_d;
- (void) con;
- if (!network_backend_write)
- network_backend_write = srv->network_backend_write;
-
- srv->network_backend_write = mod_glusterfs_network_backend_write;
-
- return HANDLER_GO_ON;
-}
-
-URIHANDLER_FUNC(mod_glusterfs_response_done) {
- plugin_data *p = p_d;
- UNUSED (srv);
- mod_glusterfs_ctx_t *ctx = con->plugin_ctx[p->id];
-
- con->plugin_ctx[p->id] = NULL;
- if (ctx->glusterfs_path) {
- free (ctx->glusterfs_path);
- }
-
- free (ctx);
- return HANDLER_GO_ON;
-}
-
-int mod_glusterfs_plugin_init(plugin *p) {
- p->version = LIGHTTPD_VERSION_ID;
- p->name = buffer_init_string("glusterfs");
- p->init = mod_glusterfs_init;
- p->handle_physical = mod_glusterfs_handle_physical;
- p->handle_start_backend = mod_glusterfs_subrequest;
- p->handle_response_done = mod_glusterfs_response_done;
- p->set_defaults = mod_glusterfs_set_defaults;
- p->connection_reset = mod_glusterfs_connection_reset;
- p->cleanup = mod_glusterfs_free;
-
- p->data = NULL;
-
- return 0;
-}
diff --git a/mod_glusterfs/lighttpd/1.5/mod_glusterfs.h b/mod_glusterfs/lighttpd/1.5/mod_glusterfs.h
deleted file mode 100644
index 5f9bb2c5bf0..00000000000
--- a/mod_glusterfs/lighttpd/1.5/mod_glusterfs.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _MOD_GLUSTERFS_FILE_CACHE_H_
-#define _MOD_GLUSTERFS_FILE_CACHE_H_
-
-#include "stat_cache.h"
-#include <libglusterfsclient.h>
-#include "base.h"
-
-handler_t glusterfs_stat_cache_get_entry(server *srv, connection *con, libglusterfs_handle_t handle, buffer *glusterfs_path, buffer *name, void *buf, size_t size, stat_cache_entry **fce);
-
-#endif
diff --git a/mod_glusterfs/lighttpd/Makefile.am b/mod_glusterfs/lighttpd/Makefile.am
deleted file mode 100644
index c934412b3dc..00000000000
--- a/mod_glusterfs/lighttpd/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = 1.4 1.5
-
-CLEANFILES =
diff --git a/rfc.sh b/rfc.sh
index b06ac512d7d..e7faec9ea0f 100755
--- a/rfc.sh
+++ b/rfc.sh
@@ -1,21 +1,62 @@
#!/bin/sh -e
+# Since we run with '#!/bin/sh -e'
+# '$? != 0' will force an exit,
+# i.e. where we are interested in the result of a command,
+# we have to run the command in an if-statement.
+UPSTREAM=${GLUSTER_UPSTREAM}
+if [ "x$UPSTREAM" -eq "x" ]; then
+ for rmt in $(git remote); do
+ rmt_repo=$(git remote show $rmt -n | grep Fetch | awk '{ print $3 }');
+ if [ $rmt_repo -eq "git@github:gluster/glusterfs" ]; then
+ UPSTREAM=$rmt
+ echo "Picked $rmt as upstream remote"
+ break
+ fi
+ done
+fi
-branch="master";
+USER_REPO=${GLUSTER_USER_REPO:-origin}
+if [ "x${USER_REPO}" -eq "x${UPSTREAM}" ] ; then
+ echo "When you submit patches, it should get submitted to your fork, not to upstream directly"
+ echo "If you are not sure, check `for rmt in $(git remote); do git remote show $rmt -n; done`"
+ echo "And pick the correct remote you would like to push to and do `export GLUSTER_USER_REPO=$rmt`"
+ echo ""
+ echo "Exiting..."
+ exit 1
+fi
+
+while getopts "v" opt; do
+ case $opt in
+ v)
+ # Verbose mode
+ git () { >&2 echo "git $@" && `which git` $@; }
+ ;;
+ esac
+done
+# Move the positional arguments to the beginning
+shift $((OPTIND-1))
+
+
+branch="devel";
+
set_hooks_commit_msg()
{
f=".git/hooks/commit-msg";
- u="http://review.gluster.com/tools/hooks/commit-msg";
+ u="http://review.gluster.org/tools/hooks/commit-msg";
if [ -x "$f" ]; then
return;
fi
- curl -o $f $u || wget -O $f $u;
+ curl -L -o $f $u || wget -O $f $u;
+
+ chmod +x $f
- chmod +x .git/hooks/commit-msg;
+ # Let the 'Change-Id: ' header get assigned on first run of rfc.sh
+ GIT_EDITOR=true git commit --amend;
}
@@ -28,15 +69,120 @@ is_num()
[ -z "$(echo $num | sed -e 's/[0-9]//g')" ]
}
+backport_id_message()
+{
+ echo ""
+ echo "This commit is to a non-devel branch, and hence is treated as a backport."
+ echo ""
+ echo "For backports we would like to retain the same gerrit Change-Id across"
+ echo "branches. On auto inspection it is found that a gerrit Change-Id is"
+ echo "missing, or the Change-Id is not found on your local devel branch"
+ echo ""
+ echo "This could mean a few things:"
+ echo " 1. This is not a backport, hence choose Y on the prompt to proceed"
+ echo " 2. Your $USER_REPO/devel is not up to date, hence the script is unable"
+ echo " to find the corresponding Change-Id on devel. Either choose N,"
+ echo " 'git fetch', and try again, OR if you are sure you used the"
+ echo " same Change-Id, choose Y at the prompt to proceed"
+ echo " 3. You commented or removed the Change-Id in your commit message after"
+ echo " cherry picking the commit. Choose N, fix the commit message to"
+ echo " use the same Change-Id as 'devel' (git commit --amend), resubmit"
+ echo ""
+}
-rebase_changes()
+check_backport()
{
- git fetch origin;
+ moveon='N'
+
+ # Backports are never made to 'devel'
+ if [ $branch = "devel" ]; then
+ return;
+ fi
+
+ # Extract the change ID from the commit message
+ changeid=$(git log -n1 --format='%b' | grep -i '^Change-Id: ' | awk '{print $2}')
- GIT_EDITOR=$0 git rebase -i origin/$branch;
+ # If there is no change ID ask if we should continue
+ if [ -z "$changeid" ]; then
+ backport_id_message;
+ echo -n "Did not find a Change-Id for a possible backport. Continue (y/N): "
+ read moveon
+ else
+ # Search 'devel' for the same change ID (rebase_changes has run, so we
+ # should never not find a Change-Id)
+ mchangeid=$(git log $UPSTREAM/devel --format='%b' --grep="^Change-Id: ${changeid}" | grep ${changeid} | awk '{print $2}')
+
+ # Check if we found the change ID on 'devel', else throw a message to
+ # decide if we should continue.
+ # NOTE: If 'devel' was not rebased, we will not find the Change-ID and
+ # could hit a false positive case here (or if someone checks out some
+ # other branch as 'devel').
+ if [ "${mchangeid}" = "${changeid}" ]; then
+ moveon="Y"
+ else
+ backport_id_message;
+ echo "Change-Id of commit: $changeid"
+ echo "Change-Id on devel: $mchangeid"
+ echo -n "Did not find mentioned Change-Id on 'devel' for a possible backport. Continue (y/N): "
+ read moveon
+ fi
+ fi
+
+ if [ "${moveon}" = 'Y' ] || [ "${moveon}" = 'y' ]; then
+ return;
+ else
+ exit 1
+ fi
}
+rebase_changes()
+{
+ GIT_EDITOR=$0 git rebase -i $UPSTREAM/$branch;
+}
+
+
+# Regex elaborated:
+# grep options:
+# -w -> --word-regexp (from the man page)
+# Select only those lines containing matches that form whole words.
+# The test is that the matching substring must either be at the
+# beginning of the line, or preceded by a non-word constituent
+# character. Similarly, it must be either at the end of the line or
+# followed by a non-word constituent character. Word-constituent
+# characters are letters, digits, and the underscore.
+#
+# IOW, the above helps us find the pattern with leading or training
+# spaces or non word consituents like , or ;
+#
+# -i -> --ignore-case (case insensitive search)
+#
+# -o -> --only-matching (only print matching portion of the line)
+#
+# -E -> --extended-regexp (use extended regular expression)
+#
+# ^
+# The search begins at the start of each line
+#
+# [[:space:]]*
+# Any number of spaces is accepted
+#
+# (Fixes|Updates)
+# Finds 'Fixes' OR 'Updates' in any case combination
+#
+# (:)?
+# Followed by an optional : (colon)
+#
+# [[:space:]]+
+# Followed by 1 or more spaces
+#
+# #
+# Followed by #
+#
+# [[:digit:]]+
+# Followed by 1 or more digits
+REFRE="^[[:space:]]*(Fixes|Updates)(:)?[[:space:]]+#[[:digit:]]+"
+
editor_mode()
{
if [ $(basename "$1") = "git-rebase-todo" ]; then
@@ -45,22 +191,34 @@ editor_mode()
fi
if [ $(basename "$1") = "COMMIT_EDITMSG" ]; then
- if grep -qi '^BUG: ' $1; then
+ # see note above function warn_reference_missing for regex elaboration
+ # Lets first check for github issues
+ ref=$(git log -n1 --format='%b' | grep -iow -E "${REFRE}" | awk -F '#' '{print $2}');
+ if [ "x${ref}" != "x" ]; then
return;
fi
+
while true; do
echo Commit: "\"$(head -n 1 $1)\""
- echo -n "Enter Bug ID: "
- read bug
- if [ -z "$bug" ]; then
+ echo -n "Github Issue ID: "
+ read issue
+ if [ -z "$issue" ]; then
return;
fi
- if ! is_num "$bug"; then
- echo "Invalid Bug ID ($bug)!!!";
+ if ! is_num "$issue"; then
+ echo "Invalid Github Issue ID!!!";
continue;
fi
- sed "/^Change-Id:/{p; s/^.*$/BUG: $bug/;}" $1 > $1.new && \
+ echo "Select yes '(y)' if this patch fixes the issue/feature completely,"
+ echo -n "or is the last of the patchset which brings feature (Y/n): "
+ read fixes
+ fixes_string="Fixes"
+ if [ "${fixes}" = 'N' ] || [ "${fixes}" = 'n' ]; then
+ fixes_string="Updates"
+ fi
+
+ sed "/^Change-Id:/{p; s/^.*$/${fixes_string}: #${issue}/;}" $1 > $1.new && \
mv $1.new $1;
return;
done
@@ -76,24 +234,85 @@ EOF
assert_diverge()
{
- git diff origin/$branch..HEAD | grep -q .;
+ git diff $UPSTREAM/$branch..HEAD | grep -q .;
}
+warn_reference_missing()
+{
+ echo ""
+ echo "=== Missing a reference in commit! ==="
+ echo ""
+ echo "Gluster commits are made with a reference to a github issue"
+ echo ""
+ echo "A check on the commit message, reveals that there is no "
+ echo "github issue referenced in the commit message."
+ echo ""
+ echo "https://github.com/gluster/glusterfs/issues/new"
+ echo ""
+ echo "Please open an issue and reference the same in the commit message "
+ echo "using the following tags:"
+ echo ""
+ echo "\"Fixes: #NNNN\" OR \"Updates: #NNNN\","
+ echo "where NNNN is the issue id"
+ echo ""
+ echo "You may abort the submission choosing 'N' below and use"
+ echo "'git commit --amend' to add the issue reference before posting"
+ echo "to gerrit."
+ echo ""
+ echo -n "Missing reference to a github issue. Continue (y/N): "
+ read moveon
+ if [ "${moveon}" = 'Y' ] || [ "${moveon}" = 'y' ]; then
+ return;
+ else
+ exit 1
+ fi
+}
+
main()
{
+ set_hooks_commit_msg;
+
+ # rfc.sh calls itself from rebase_changes, which uses rfc.sh as the EDITOR
+ # thus, getting the commit message to work with in the editor_mode.
if [ -e "$1" ]; then
editor_mode "$@";
return;
fi
- set_hooks_commit_msg;
+ git fetch $UPSTREAM;
rebase_changes;
+ check_backport;
+
assert_diverge;
- bug=$(git show --format='%b' | grep -i '^BUG: ' | awk '{print $2}');
+ # see note above variable REFRE for regex elaboration
+ reference=$(git log -n1 --format='%b' | grep -iow -E "${REFRE}" | awk -F '#' '{print $2}');
+
+ # If this is a commit against 'devel' and does not have a github
+ # issue reference. Warn the contributor that one of the 2 is required
+ if [ -z "${reference}" ] && [ $branch = "devel" ]; then
+ warn_reference_missing;
+ fi
+
+ # TODO: add clang-format command here. It will after the changes are done everywhere else
+ clang_format=$(clang-format --version)
+ if [ ! -z "${clang_format}" ]; then
+ # Considering git show may not give any files as output matching the
+ # criteria, good to tell script not to fail on error
+ set +e
+ list_of_files=$(git show --pretty="format:" --name-only |
+ grep -v "contrib/" | egrep --color=never "*\.[ch]$");
+ if [ ! -z "${list_of_files}" ]; then
+ echo "${list_of_files}" | xargs clang-format -i
+ fi
+ set -e
+ else
+ echo "High probability of your patch not passing smoke due to coding standard check"
+ echo "Please install 'clang-format' to format the patch before submitting"
+ fi
if [ "$DRY_RUN" = 1 ]; then
drier='echo -e Please use the following command to send your commits to review:\n\n'
@@ -101,10 +320,10 @@ main()
drier=
fi
- if [ -z "$bug" ]; then
- $drier git push origin HEAD:refs/for/$branch/rfc;
+ if [ -z "${reference}" ]; then
+ $drier git push $USER_REPO HEAD:temp_${branch}/$(date +%Y-%m-%d_%s);
else
- $drier git push origin HEAD:refs/for/$branch/bug-$bug;
+ $drier git push $USER_REPO HEAD:issue${reference}_${branch};
fi
}
diff --git a/rpc/Makefile.am b/rpc/Makefile.am
index ffb76e901fc..183b7a0352f 100644
--- a/rpc/Makefile.am
+++ b/rpc/Makefile.am
@@ -1 +1 @@
-SUBDIRS = rpc-lib rpc-transport xdr
+SUBDIRS = xdr rpc-lib rpc-transport
diff --git a/rpc/rpc-lib/src/Makefile.am b/rpc/rpc-lib/src/Makefile.am
index 8b087301c63..35c9db07e7f 100644
--- a/rpc/rpc-lib/src/Makefile.am
+++ b/rpc/rpc-lib/src/Makefile.am
@@ -1,16 +1,29 @@
lib_LTLIBRARIES = libgfrpc.la
libgfrpc_la_SOURCES = auth-unix.c rpcsvc-auth.c rpcsvc.c auth-null.c \
- rpc-transport.c xdr-rpc.c xdr-rpcclnt.c rpc-clnt.c auth-glusterfs.c
+ rpc-transport.c xdr-rpc.c xdr-rpcclnt.c rpc-clnt.c auth-glusterfs.c \
+ rpc-drc.c rpc-clnt-ping.c \
+ autoscale-threads.c mgmt-pmap.c
-libgfrpc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+EXTRA_DIST = libgfrpc.sym
-noinst_HEADERS = rpcsvc.h rpc-transport.h xdr-common.h xdr-rpc.h xdr-rpcclnt.h \
- rpc-clnt.h rpcsvc-common.h protocol-common.h
+libgfrpc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la
+libgfrpc_la_LDFLAGS = -version-info $(LIBGFRPC_LT_VERSION) $(GF_LDFLAGS) \
+ -export-symbols $(top_srcdir)/rpc/rpc-lib/src/libgfrpc.sym
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
+libgfrpc_la_HEADERS = rpcsvc.h rpc-transport.h xdr-common.h xdr-rpc.h xdr-rpcclnt.h \
+ rpc-clnt.h rpcsvc-common.h protocol-common.h rpc-drc.h rpc-clnt-ping.h \
+ rpc-lib-messages.h
+
+libgfrpc_ladir = $(includedir)/glusterfs/rpc
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
-I$(top_srcdir)/rpc/xdr/src \
- -DRPC_TRANSPORTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/rpc-transport\"
+ -I$(top_builddir)/rpc/xdr/src \
+ -DRPC_TRANSPORTDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/rpc-transport\" \
+ -I$(top_srcdir)/contrib/rbtree
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES = *~
diff --git a/rpc/rpc-lib/src/auth-glusterfs.c b/rpc/rpc-lib/src/auth-glusterfs.c
index 5f41c829672..69a96f7512f 100644
--- a/rpc/rpc-lib/src/auth-glusterfs.c
+++ b/rpc/rpc-lib/src/auth-glusterfs.c
@@ -8,235 +8,379 @@
cases as published by the Free Software Foundation.
*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "rpcsvc.h"
-#include "list.h"
-#include "dict.h"
+#include <glusterfs/dict.h>
#include "xdr-rpc.h"
#include "xdr-common.h"
#include "rpc-common-xdr.h"
+#include "glusterfs4-xdr.h"
/* V1 */
ssize_t
-xdr_to_glusterfs_auth (char *buf, struct auth_glusterfs_parms *req)
+xdr_to_glusterfs_auth(char *buf, struct auth_glusterfs_parms *req)
{
- XDR xdr;
- ssize_t ret = -1;
-
- if ((!buf) || (!req))
- return -1;
-
- xdrmem_create (&xdr, buf, sizeof (struct auth_glusterfs_parms),
- XDR_DECODE);
- if (!xdr_auth_glusterfs_parms (&xdr, req)) {
- gf_log ("", GF_LOG_WARNING,
- "failed to decode glusterfs parameters");
- ret = -1;
- goto ret;
- }
-
- ret = (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base));
-ret:
- return ret;
+ XDR xdr;
+ ssize_t ret = -1;
+
+ if ((!buf) || (!req))
+ return -1;
+
+ xdrmem_create(&xdr, buf, sizeof(struct auth_glusterfs_parms), XDR_DECODE);
+ if (!xdr_auth_glusterfs_parms(&xdr, req)) {
+ gf_log("", GF_LOG_WARNING, "failed to decode glusterfs parameters");
+ ret = -1;
+ goto ret;
+ }
+ ret = (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base));
+ret:
+ return ret;
}
int
-auth_glusterfs_request_init (rpcsvc_request_t *req, void *priv)
+auth_glusterfs_request_init(rpcsvc_request_t *req, void *priv)
{
- if (!req)
- return -1;
- memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES);
- req->verf.datalen = 0;
- req->verf.flavour = AUTH_NULL;
-
- return 0;
+ return 0;
}
-int auth_glusterfs_authenticate (rpcsvc_request_t *req, void *priv)
+int
+auth_glusterfs_authenticate(rpcsvc_request_t *req, void *priv)
{
- struct auth_glusterfs_parms au = {0,};
-
- int ret = RPCSVC_AUTH_REJECT;
- int gidcount = 0;
- int j = 0;
- int i = 0;
-
- if (!req)
- return ret;
-
- ret = xdr_to_glusterfs_auth (req->cred.authdata, &au);
- if (ret == -1) {
- gf_log ("", GF_LOG_WARNING,
- "failed to decode glusterfs credentials");
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- req->pid = au.pid;
- req->uid = au.uid;
- req->gid = au.gid;
- req->lk_owner.len = 8;
- {
- for (i = 0; i < req->lk_owner.len; i++, j += 8)
- req->lk_owner.data[i] = (char)((au.lk_owner >> j) & 0xff);
- }
- req->auxgidcount = au.ngrps;
-
- if (req->auxgidcount > 16) {
- gf_log ("", GF_LOG_WARNING,
- "more than 16 aux gids found, failing authentication");
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- for (gidcount = 0; gidcount < au.ngrps; ++gidcount)
- req->auxgids[gidcount] = au.groups[gidcount];
-
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth Info: pid: %u, uid: %d"
- ", gid: %d, owner: %s",
- req->pid, req->uid, req->gid, lkowner_utoa (&req->lk_owner));
- ret = RPCSVC_AUTH_ACCEPT;
-err:
+ struct auth_glusterfs_parms au = {
+ 0,
+ };
+
+ int ret = RPCSVC_AUTH_REJECT;
+ int j = 0;
+ int i = 0;
+ int gidcount = 0;
+
+ if (!req)
return ret;
+
+ ret = xdr_to_glusterfs_auth(req->cred.authdata, &au);
+ if (ret == -1) {
+ gf_log("", GF_LOG_WARNING, "failed to decode glusterfs credentials");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ req->pid = au.pid;
+ req->uid = au.uid;
+ req->gid = au.gid;
+ req->lk_owner.len = 8;
+ {
+ for (i = 0; i < req->lk_owner.len; i++, j += 8)
+ req->lk_owner.data[i] = (char)((au.lk_owner >> j) & 0xff);
+ }
+ req->auxgidcount = au.ngrps;
+
+ if (req->auxgidcount > 16) {
+ gf_log("", GF_LOG_WARNING,
+ "more than 16 aux gids found, failing authentication");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ if (req->auxgidcount > SMALL_GROUP_COUNT) {
+ req->auxgidlarge = GF_CALLOC(req->auxgidcount, sizeof(req->auxgids[0]),
+ gf_common_mt_auxgids);
+ req->auxgids = req->auxgidlarge;
+ } else {
+ req->auxgids = req->auxgidsmall;
+ }
+
+ if (!req->auxgids) {
+ gf_log("auth-glusterfs", GF_LOG_WARNING, "cannot allocate gid list");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ for (gidcount = 0; gidcount < au.ngrps; ++gidcount)
+ req->auxgids[gidcount] = au.groups[gidcount];
+
+ gf_log(GF_RPCSVC, GF_LOG_TRACE,
+ "Auth Info: pid: %u, uid: %d"
+ ", gid: %d, owner: %s",
+ req->pid, req->uid, req->gid, lkowner_utoa(&req->lk_owner));
+ ret = RPCSVC_AUTH_ACCEPT;
+err:
+ return ret;
}
rpcsvc_auth_ops_t auth_glusterfs_ops = {
- .transport_init = NULL,
- .request_init = auth_glusterfs_request_init,
- .authenticate = auth_glusterfs_authenticate
-};
-
-rpcsvc_auth_t rpcsvc_auth_glusterfs = {
- .authname = "AUTH_GLUSTERFS",
- .authnum = AUTH_GLUSTERFS,
- .authops = &auth_glusterfs_ops,
- .authprivate = NULL
-};
+ .transport_init = NULL,
+ .request_init = auth_glusterfs_request_init,
+ .authenticate = auth_glusterfs_authenticate};
+rpcsvc_auth_t rpcsvc_auth_glusterfs = {.authname = "AUTH_GLUSTERFS",
+ .authnum = AUTH_GLUSTERFS,
+ .authops = &auth_glusterfs_ops,
+ .authprivate = NULL};
rpcsvc_auth_t *
-rpcsvc_auth_glusterfs_init (rpcsvc_t *svc, dict_t *options)
+rpcsvc_auth_glusterfs_init(rpcsvc_t *svc, dict_t *options)
{
- return &rpcsvc_auth_glusterfs;
+ return &rpcsvc_auth_glusterfs;
}
/* V2 */
ssize_t
-xdr_to_glusterfs_auth_v2 (char *buf, struct auth_glusterfs_parms_v2 *req)
+xdr_to_glusterfs_auth_v2(char *buf, struct auth_glusterfs_parms_v2 *req)
{
- XDR xdr;
- ssize_t ret = -1;
+ XDR xdr;
+ ssize_t ret = -1;
- if ((!buf) || (!req))
- return -1;
+ if ((!buf) || (!req))
+ return -1;
- xdrmem_create (&xdr, buf, GF_MAX_AUTH_BYTES, XDR_DECODE);
- if (!xdr_auth_glusterfs_parms_v2 (&xdr, req)) {
- gf_log ("", GF_LOG_WARNING,
- "failed to decode glusterfs v2 parameters");
- ret = -1;
- goto ret;
- }
+ xdrmem_create(&xdr, buf, GF_MAX_AUTH_BYTES, XDR_DECODE);
+ if (!xdr_auth_glusterfs_parms_v2(&xdr, req)) {
+ gf_log("", GF_LOG_WARNING, "failed to decode glusterfs v2 parameters");
+ ret = -1;
+ goto ret;
+ }
- ret = (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base));
+ ret = (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base));
ret:
- return ret;
-
+ return ret;
}
int
-auth_glusterfs_v2_request_init (rpcsvc_request_t *req, void *priv)
+auth_glusterfs_v2_request_init(rpcsvc_request_t *req, void *priv)
{
- if (!req)
- return -1;
- memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES);
- req->verf.datalen = 0;
- req->verf.flavour = AUTH_NULL;
-
- return 0;
+ return 0;
}
-int auth_glusterfs_v2_authenticate (rpcsvc_request_t *req, void *priv)
+int
+auth_glusterfs_v2_authenticate(rpcsvc_request_t *req, void *priv)
{
- struct auth_glusterfs_parms_v2 au = {0,};
- int ret = RPCSVC_AUTH_REJECT;
- int i = 0;
-
- if (!req)
- return ret;
-
- ret = xdr_to_glusterfs_auth_v2 (req->cred.authdata, &au);
- if (ret == -1) {
- gf_log ("", GF_LOG_WARNING,
- "failed to decode glusterfs credentials");
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- req->pid = au.pid;
- req->uid = au.uid;
- req->gid = au.gid;
- req->lk_owner.len = au.lk_owner.lk_owner_len;
- req->auxgidcount = au.groups.groups_len;
-
- if (req->auxgidcount > GF_MAX_AUX_GROUPS) {
- gf_log ("", GF_LOG_WARNING,
- "more than max aux gids found (%d) , truncating it "
- "to %d and continuing", au.groups.groups_len,
- GF_MAX_AUX_GROUPS);
- req->auxgidcount = GF_MAX_AUX_GROUPS;
- }
-
- if (req->lk_owner.len > GF_MAX_LOCK_OWNER_LEN) {
- gf_log ("", GF_LOG_WARNING,
- "lkowner field > 1k, failing authentication");
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- for (i = 0; i < req->auxgidcount; ++i)
- req->auxgids[i] = au.groups.groups_val[i];
-
- for (i = 0; i < au.lk_owner.lk_owner_len; ++i)
- req->lk_owner.data[i] = au.lk_owner.lk_owner_val[i];
-
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth Info: pid: %u, uid: %d"
- ", gid: %d, owner: %s",
- req->pid, req->uid, req->gid, lkowner_utoa (&req->lk_owner));
- ret = RPCSVC_AUTH_ACCEPT;
+ struct auth_glusterfs_parms_v2 au = {
+ 0,
+ };
+ int ret = RPCSVC_AUTH_REJECT;
+ int i = 0;
+ int max_groups = 0;
+ int max_lk_owner_len = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_glusterfs_auth_v2(req->cred.authdata, &au);
+ if (ret == -1) {
+ gf_log("", GF_LOG_WARNING, "failed to decode glusterfs credentials");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ req->pid = au.pid;
+ req->uid = au.uid;
+ req->gid = au.gid;
+ req->lk_owner.len = au.lk_owner.lk_owner_len;
+ req->auxgidcount = au.groups.groups_len;
+
+ /* the number of groups and size of lk_owner depend on each other */
+ max_groups = GF_AUTH_GLUSTERFS_MAX_GROUPS(req->lk_owner.len,
+ AUTH_GLUSTERFS_v2);
+ max_lk_owner_len = GF_AUTH_GLUSTERFS_MAX_LKOWNER(req->auxgidcount,
+ AUTH_GLUSTERFS_v2);
+
+ if (req->auxgidcount > max_groups) {
+ gf_log("", GF_LOG_WARNING,
+ "more than max aux gids found (%d) , truncating it "
+ "to %d and continuing",
+ au.groups.groups_len, max_groups);
+ req->auxgidcount = max_groups;
+ }
+
+ if (req->lk_owner.len > max_lk_owner_len) {
+ gf_log("", GF_LOG_WARNING,
+ "lkowner field to big (%d), depends on the number of "
+ "groups (%d), failing authentication",
+ req->lk_owner.len, req->auxgidcount);
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ if (req->auxgidcount > SMALL_GROUP_COUNT) {
+ req->auxgidlarge = GF_CALLOC(req->auxgidcount, sizeof(req->auxgids[0]),
+ gf_common_mt_auxgids);
+ req->auxgids = req->auxgidlarge;
+ } else {
+ req->auxgids = req->auxgidsmall;
+ }
+
+ if (!req->auxgids) {
+ gf_log("auth-glusterfs-v2", GF_LOG_WARNING, "cannot allocate gid list");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ for (i = 0; i < req->auxgidcount; ++i)
+ req->auxgids[i] = au.groups.groups_val[i];
+
+ for (i = 0; i < au.lk_owner.lk_owner_len; ++i)
+ req->lk_owner.data[i] = au.lk_owner.lk_owner_val[i];
+
+ gf_log(GF_RPCSVC, GF_LOG_TRACE,
+ "Auth Info: pid: %u, uid: %d"
+ ", gid: %d, owner: %s",
+ req->pid, req->uid, req->gid, lkowner_utoa(&req->lk_owner));
+ ret = RPCSVC_AUTH_ACCEPT;
err:
- /* TODO: instead use alloca() for these variables */
- if (au.groups.groups_val)
- free (au.groups.groups_val);
- if (au.lk_owner.lk_owner_val)
- free (au.lk_owner.lk_owner_val);
+ /* TODO: instead use alloca() for these variables */
+ free(au.groups.groups_val);
+ free(au.lk_owner.lk_owner_val);
- return ret;
+ return ret;
}
rpcsvc_auth_ops_t auth_glusterfs_ops_v2 = {
- .transport_init = NULL,
- .request_init = auth_glusterfs_v2_request_init,
- .authenticate = auth_glusterfs_v2_authenticate
-};
+ .transport_init = NULL,
+ .request_init = auth_glusterfs_v2_request_init,
+ .authenticate = auth_glusterfs_v2_authenticate};
+
+rpcsvc_auth_t rpcsvc_auth_glusterfs_v2 = {.authname = "AUTH_GLUSTERFS-v2",
+ .authnum = AUTH_GLUSTERFS_v2,
+ .authops = &auth_glusterfs_ops_v2,
+ .authprivate = NULL};
+
+rpcsvc_auth_t *
+rpcsvc_auth_glusterfs_v2_init(rpcsvc_t *svc, dict_t *options)
+{
+ return &rpcsvc_auth_glusterfs_v2;
+}
+
+/* V3 */
+
+ssize_t
+xdr_to_glusterfs_auth_v3(char *buf, struct auth_glusterfs_params_v3 *req)
+{
+ XDR xdr;
+ ssize_t ret = -1;
+
+ if ((!buf) || (!req))
+ return -1;
+
+ xdrmem_create(&xdr, buf, GF_MAX_AUTH_BYTES, XDR_DECODE);
+ if (!xdr_auth_glusterfs_params_v3(&xdr, req)) {
+ gf_log("", GF_LOG_WARNING, "failed to decode glusterfs v3 parameters");
+ ret = -1;
+ goto ret;
+ }
+
+ ret = (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base));
+ret:
+ return ret;
+}
+
+int
+auth_glusterfs_v3_request_init(rpcsvc_request_t *req, void *priv)
+{
+ return 0;
+}
+
+int
+auth_glusterfs_v3_authenticate(rpcsvc_request_t *req, void *priv)
+{
+ struct auth_glusterfs_params_v3 au = {
+ 0,
+ };
+ int ret = RPCSVC_AUTH_REJECT;
+ int i = 0;
+ int max_groups = 0;
+ int max_lk_owner_len = 0;
+
+ if (!req)
+ return ret;
+
+ ret = xdr_to_glusterfs_auth_v3(req->cred.authdata, &au);
+ if (ret == -1) {
+ gf_log("", GF_LOG_WARNING, "failed to decode glusterfs credentials");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ req->pid = au.pid;
+ req->uid = au.uid;
+ req->gid = au.gid;
+ req->lk_owner.len = au.lk_owner.lk_owner_len;
+ req->auxgidcount = au.groups.groups_len;
+
+ /* the number of groups and size of lk_owner depend on each other */
+ max_groups = GF_AUTH_GLUSTERFS_MAX_GROUPS(req->lk_owner.len,
+ AUTH_GLUSTERFS_v3);
+ max_lk_owner_len = GF_AUTH_GLUSTERFS_MAX_LKOWNER(req->auxgidcount,
+ AUTH_GLUSTERFS_v3);
+
+ if (req->auxgidcount > max_groups) {
+ gf_log("", GF_LOG_WARNING,
+ "more than max aux gids found (%d) , truncating it "
+ "to %d and continuing",
+ au.groups.groups_len, max_groups);
+ req->auxgidcount = max_groups;
+ }
+
+ if (req->lk_owner.len > max_lk_owner_len) {
+ gf_log("", GF_LOG_WARNING,
+ "lkowner field to big (%d), depends on the number of "
+ "groups (%d), failing authentication",
+ req->lk_owner.len, req->auxgidcount);
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ if (req->auxgidcount > SMALL_GROUP_COUNT) {
+ req->auxgidlarge = GF_CALLOC(req->auxgidcount, sizeof(req->auxgids[0]),
+ gf_common_mt_auxgids);
+ req->auxgids = req->auxgidlarge;
+ } else {
+ req->auxgids = req->auxgidsmall;
+ }
+
+ if (!req->auxgids) {
+ gf_log("auth-glusterfs-v2", GF_LOG_WARNING, "cannot allocate gid list");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ for (i = 0; i < req->auxgidcount; ++i)
+ req->auxgids[i] = au.groups.groups_val[i];
+
+ for (i = 0; i < au.lk_owner.lk_owner_len; ++i)
+ req->lk_owner.data[i] = au.lk_owner.lk_owner_val[i];
+
+ /* All new things, starting glusterfs-4.0.0 */
+ req->flags = au.flags;
+ req->ctime.tv_sec = au.ctime_sec;
+ req->ctime.tv_nsec = au.ctime_nsec;
+
+ gf_log(GF_RPCSVC, GF_LOG_TRACE,
+ "Auth Info: pid: %u, uid: %d"
+ ", gid: %d, owner: %s, flags: %d",
+ req->pid, req->uid, req->gid, lkowner_utoa(&req->lk_owner),
+ req->flags);
+ ret = RPCSVC_AUTH_ACCEPT;
+err:
+ /* TODO: instead use alloca() for these variables */
+ free(au.groups.groups_val);
+ free(au.lk_owner.lk_owner_val);
+
+ return ret;
+}
-rpcsvc_auth_t rpcsvc_auth_glusterfs_v2 = {
- .authname = "AUTH_GLUSTERFS-v2",
- .authnum = AUTH_GLUSTERFS_v2,
- .authops = &auth_glusterfs_ops_v2,
- .authprivate = NULL
-};
+rpcsvc_auth_ops_t auth_glusterfs_ops_v3 = {
+ .transport_init = NULL,
+ .request_init = auth_glusterfs_v3_request_init,
+ .authenticate = auth_glusterfs_v3_authenticate};
+rpcsvc_auth_t rpcsvc_auth_glusterfs_v3 = {.authname = "AUTH_GLUSTERFS-v3",
+ .authnum = AUTH_GLUSTERFS_v3,
+ .authops = &auth_glusterfs_ops_v3,
+ .authprivate = NULL};
rpcsvc_auth_t *
-rpcsvc_auth_glusterfs_v2_init (rpcsvc_t *svc, dict_t *options)
+rpcsvc_auth_glusterfs_v3_init(rpcsvc_t *svc, dict_t *options)
{
- return &rpcsvc_auth_glusterfs_v2;
+ return &rpcsvc_auth_glusterfs_v3;
}
diff --git a/rpc/rpc-lib/src/auth-null.c b/rpc/rpc-lib/src/auth-null.c
index ebdcc8ff8e7..6d059b9da50 100644
--- a/rpc/rpc-lib/src/auth-null.c
+++ b/rpc/rpc-lib/src/auth-null.c
@@ -8,54 +8,33 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "rpcsvc.h"
-#include "list.h"
-#include "dict.h"
-
+#include <glusterfs/dict.h>
int
-auth_null_request_init (rpcsvc_request_t *req, void *priv)
+auth_null_request_init(rpcsvc_request_t *req, void *priv)
{
- if (!req)
- return -1;
-
- memset (req->cred.authdata, 0, GF_MAX_AUTH_BYTES);
- req->cred.datalen = 0;
-
- memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES);
- req->verf.datalen = 0;
-
- return 0;
+ return 0;
}
-int auth_null_authenticate (rpcsvc_request_t *req, void *priv)
+int
+auth_null_authenticate(rpcsvc_request_t *req, void *priv)
{
- /* Always succeed. */
- return RPCSVC_AUTH_ACCEPT;
+ /* Always succeed. */
+ return RPCSVC_AUTH_ACCEPT;
}
-rpcsvc_auth_ops_t auth_null_ops = {
- .transport_init = NULL,
- .request_init = auth_null_request_init,
- .authenticate = auth_null_authenticate
-};
-
-rpcsvc_auth_t rpcsvc_auth_null = {
- .authname = "AUTH_NULL",
- .authnum = AUTH_NULL,
- .authops = &auth_null_ops,
- .authprivate = NULL
-};
+rpcsvc_auth_ops_t auth_null_ops = {.transport_init = NULL,
+ .request_init = auth_null_request_init,
+ .authenticate = auth_null_authenticate};
+rpcsvc_auth_t rpcsvc_auth_null = {.authname = "AUTH_NULL",
+ .authnum = AUTH_NULL,
+ .authops = &auth_null_ops,
+ .authprivate = NULL};
rpcsvc_auth_t *
-rpcsvc_auth_null_init (rpcsvc_t *svc, dict_t *options)
+rpcsvc_auth_null_init(rpcsvc_t *svc, dict_t *options)
{
- return &rpcsvc_auth_null;
+ return &rpcsvc_auth_null;
}
diff --git a/rpc/rpc-lib/src/auth-unix.c b/rpc/rpc-lib/src/auth-unix.c
index 6251d60a896..61d475a5e84 100644
--- a/rpc/rpc-lib/src/auth-unix.c
+++ b/rpc/rpc-lib/src/auth-unix.c
@@ -8,75 +8,59 @@
cases as published by the Free Software Foundation.
*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "rpcsvc.h"
-#include "list.h"
-#include "dict.h"
+#include <glusterfs/dict.h>
#include "xdr-rpc.h"
-
int
-auth_unix_request_init (rpcsvc_request_t *req, void *priv)
+auth_unix_request_init(rpcsvc_request_t *req, void *priv)
{
- if (!req)
- return -1;
- memset (req->verf.authdata, 0, GF_MAX_AUTH_BYTES);
- req->verf.datalen = 0;
- req->verf.flavour = AUTH_NULL;
-
- return 0;
+ return 0;
}
-int auth_unix_authenticate (rpcsvc_request_t *req, void *priv)
+int
+auth_unix_authenticate(rpcsvc_request_t *req, void *priv)
{
- int ret = RPCSVC_AUTH_REJECT;
- struct authunix_parms aup;
- char machname[MAX_MACHINE_NAME];
+ int ret = RPCSVC_AUTH_REJECT;
+ struct authunix_parms aup;
+ char machname[MAX_MACHINE_NAME];
- if (!req)
- return ret;
-
- ret = xdr_to_auth_unix_cred (req->cred.authdata, req->cred.datalen,
- &aup, machname, req->auxgids);
- if (ret == -1) {
- gf_log ("", GF_LOG_WARNING, "failed to decode unix credentials");
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- req->uid = aup.aup_uid;
- req->gid = aup.aup_gid;
- req->auxgidcount = aup.aup_len;
+ if (!req)
+ return ret;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth Info: machine name: %s, uid: %d"
- ", gid: %d", machname, req->uid, req->gid);
- ret = RPCSVC_AUTH_ACCEPT;
+ req->auxgids = req->auxgidsmall;
+ ret = xdr_to_auth_unix_cred(req->cred.authdata, req->cred.datalen, &aup,
+ machname, req->auxgids);
+ if (ret == -1) {
+ gf_log("", GF_LOG_WARNING, "failed to decode unix credentials");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ req->uid = aup.aup_uid;
+ req->gid = aup.aup_gid;
+ req->auxgidcount = aup.aup_len;
+
+ gf_log(GF_RPCSVC, GF_LOG_TRACE,
+ "Auth Info: machine name: %s, uid: %d"
+ ", gid: %d",
+ machname, req->uid, req->gid);
+ ret = RPCSVC_AUTH_ACCEPT;
err:
- return ret;
+ return ret;
}
-rpcsvc_auth_ops_t auth_unix_ops = {
- .transport_init = NULL,
- .request_init = auth_unix_request_init,
- .authenticate = auth_unix_authenticate
-};
-
-rpcsvc_auth_t rpcsvc_auth_unix = {
- .authname = "AUTH_UNIX",
- .authnum = AUTH_UNIX,
- .authops = &auth_unix_ops,
- .authprivate = NULL
-};
+rpcsvc_auth_ops_t auth_unix_ops = {.transport_init = NULL,
+ .request_init = auth_unix_request_init,
+ .authenticate = auth_unix_authenticate};
+rpcsvc_auth_t rpcsvc_auth_unix = {.authname = "AUTH_UNIX",
+ .authnum = AUTH_UNIX,
+ .authops = &auth_unix_ops,
+ .authprivate = NULL};
rpcsvc_auth_t *
-rpcsvc_auth_unix_init (rpcsvc_t *svc, dict_t *options)
+rpcsvc_auth_unix_init(rpcsvc_t *svc, dict_t *options)
{
- return &rpcsvc_auth_unix;
+ return &rpcsvc_auth_unix;
}
diff --git a/rpc/rpc-lib/src/autoscale-threads.c b/rpc/rpc-lib/src/autoscale-threads.c
new file mode 100644
index 00000000000..a954ae7a27a
--- /dev/null
+++ b/rpc/rpc-lib/src/autoscale-threads.c
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/gf-event.h>
+#include "rpcsvc.h"
+
+void
+rpcsvc_autoscale_threads(glusterfs_ctx_t *ctx, rpcsvc_t *rpc, int incr)
+{
+ struct event_pool *pool = ctx->event_pool;
+ int thread_count = pool->eventthreadcount;
+
+ pool->auto_thread_count += incr;
+ (void)gf_event_reconfigure_threads(pool, thread_count + incr);
+}
diff --git a/rpc/rpc-lib/src/libgfrpc.sym b/rpc/rpc-lib/src/libgfrpc.sym
new file mode 100644
index 00000000000..e026d80259b
--- /dev/null
+++ b/rpc/rpc-lib/src/libgfrpc.sym
@@ -0,0 +1,68 @@
+is_rpc_clnt_disconnected
+rpcclnt_cbk_program_register
+rpc_clnt_cleanup_and_start
+rpc_clnt_connection_cleanup
+rpc_clnt_disable
+rpc_clnt_new
+rpc_clnt_reconfig
+rpc_clnt_reconnect
+rpc_clnt_reconnect_cleanup
+rpc_clnt_ref
+rpc_clnt_register_notify
+rpc_clnt_start
+rpc_clnt_submit
+rpc_clnt_unref
+rpc_reply_to_xdr
+rpcsvc_auth_array
+rpcsvc_auth_check
+rpcsvc_auth_reconf
+rpcsvc_auth_unix_auxgids
+rpcsvc_callback_submit
+rpcsvc_create_listener
+rpcsvc_create_listeners
+rpcsvc_drc_init
+rpcsvc_drc_priv
+rpcsvc_drc_reconfigure
+rpcsvc_get_program_vector_sizer
+rpcsvc_init
+rpcsvc_destroy
+rpcsvc_init_options
+rpcsvc_listener_destroy
+rpcsvc_program_register
+rpcsvc_program_register_portmap
+rpcsvc_program_register_rpcbind6
+rpcsvc_program_unregister
+rpcsvc_program_unregister_portmap
+rpcsvc_program_unregister_rpcbind6
+rpcsvc_reconfigure_options
+rpcsvc_register_notify
+rpcsvc_register_portmap_enabled
+rpcsvc_request_submit
+rpcsvc_set_outstanding_rpc_limit
+rpcsvc_set_throttle_on
+rpcsvc_submit_generic
+rpcsvc_submit_message
+rpcsvc_transport_peeraddr
+rpcsvc_transport_peername
+rpcsvc_transport_privport_check
+rpcsvc_transport_unix_options_build
+rpcsvc_transport_volume_allowed
+rpcsvc_transport_connect
+rpcsvc_transport_getpeeraddr
+rpcsvc_unregister_notify
+rpcsvc_volume_allowed
+rpc_transport_count
+rpc_transport_connect
+rpc_transport_disconnect
+rpc_transport_get_peeraddr
+rpc_transport_inet_options_build
+rpc_transport_keepalive_options_set
+rpc_transport_notify
+rpc_transport_pollin_alloc
+rpc_transport_pollin_destroy
+rpc_transport_ref
+rpc_transport_unix_options_build
+rpc_transport_unref
+rpc_clnt_mgmt_pmap_signout
+rpcsvc_autoscale_threads
+rpcsvc_statedump
diff --git a/rpc/rpc-lib/src/mgmt-pmap.c b/rpc/rpc-lib/src/mgmt-pmap.c
new file mode 100644
index 00000000000..25a7148e5a3
--- /dev/null
+++ b/rpc/rpc-lib/src/mgmt-pmap.c
@@ -0,0 +1,147 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "portmap-xdr.h"
+#include "protocol-common.h"
+#include "rpc-clnt.h"
+#include "xdr-generic.h"
+
+/* Defining a minimal RPC client program for portmap signout
+ */
+char *clnt_pmap_signout_procs[GF_PMAP_MAXVALUE] = {
+ [GF_PMAP_SIGNOUT] = "SIGNOUT",
+};
+
+rpc_clnt_prog_t clnt_pmap_signout_prog = {
+ .progname = "Gluster Portmap",
+ .prognum = GLUSTER_PMAP_PROGRAM,
+ .progver = GLUSTER_PMAP_VERSION,
+ .procnames = clnt_pmap_signout_procs,
+};
+
+static int
+mgmt_pmap_signout_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ pmap_signout_rsp rsp = {
+ 0,
+ };
+ int ret = 0;
+ call_frame_t *frame = NULL;
+
+ frame = myframe;
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_pmap_signout_rsp);
+ if (ret < 0) {
+ gf_log(THIS->name, GF_LOG_ERROR, "XDR decoding failed");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ gf_log(THIS->name, GF_LOG_ERROR,
+ "failed to register the port with glusterd");
+ goto out;
+ }
+out:
+ if (frame) {
+ STACK_DESTROY(frame->root);
+ }
+
+ return 0;
+}
+
+int
+rpc_clnt_mgmt_pmap_signout(glusterfs_ctx_t *ctx, char *brickname)
+{
+ int ret = 0;
+ pmap_signout_req req = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ cmd_args_t *cmd_args = NULL;
+ char brick_name[PATH_MAX] = {
+ 0,
+ };
+ struct iovec iov = {
+ 0,
+ };
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ ssize_t xdr_size = 0;
+
+ frame = create_frame(THIS, ctx->pool);
+ cmd_args = &ctx->cmd_args;
+
+ if (!cmd_args->brick_port && (!cmd_args->brick_name || !brickname)) {
+ gf_log("fsd-mgmt", GF_LOG_DEBUG,
+ "portmapper signout arguments not given");
+ goto out;
+ }
+
+ if (cmd_args->volfile_server_transport &&
+ !strcmp(cmd_args->volfile_server_transport, "rdma")) {
+ snprintf(brick_name, sizeof(brick_name), "%s.rdma",
+ cmd_args->brick_name);
+ req.brick = brick_name;
+ } else {
+ if (brickname)
+ req.brick = brickname;
+ else
+ req.brick = cmd_args->brick_name;
+ }
+
+ req.port = cmd_args->brick_port;
+ req.rdma_port = cmd_args->brick_port2;
+
+ /* mgmt_submit_request is not available in libglusterfs.
+ * Need to serialize and submit manually.
+ */
+ iobref = iobref_new();
+ if (!iobref) {
+ goto out;
+ }
+
+ xdr_size = xdr_sizeof((xdrproc_t)xdr_pmap_signout_req, &req);
+ iobuf = iobuf_get2(ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ iobref_add(iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize(iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic(iov, &req, (xdrproc_t)xdr_pmap_signout_req);
+ if (ret == -1) {
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to create XDR payload");
+ goto out;
+ }
+ iov.iov_len = ret;
+
+ ret = rpc_clnt_submit(ctx->mgmt, &clnt_pmap_signout_prog, GF_PMAP_SIGNOUT,
+ mgmt_pmap_signout_cbk, &iov, 1, NULL, 0, iobref,
+ frame, NULL, 0, NULL, 0, NULL);
+out:
+ if (iobref)
+ iobref_unref(iobref);
+
+ if (iobuf)
+ iobuf_unref(iobuf);
+ return ret;
+}
diff --git a/rpc/rpc-lib/src/protocol-common.h b/rpc/rpc-lib/src/protocol-common.h
index e2815d8c129..0cb5862e9a9 100644
--- a/rpc/rpc-lib/src/protocol-common.h
+++ b/rpc/rpc-lib/src/protocol-common.h
@@ -12,217 +12,371 @@
#define _PROTOCOL_COMMON_H
enum gf_fop_procnum {
- GFS3_OP_NULL, /* 0 */
- GFS3_OP_STAT,
- GFS3_OP_READLINK,
- GFS3_OP_MKNOD,
- GFS3_OP_MKDIR,
- GFS3_OP_UNLINK,
- GFS3_OP_RMDIR,
- GFS3_OP_SYMLINK,
- GFS3_OP_RENAME,
- GFS3_OP_LINK,
- GFS3_OP_TRUNCATE,
- GFS3_OP_OPEN,
- GFS3_OP_READ,
- GFS3_OP_WRITE,
- GFS3_OP_STATFS,
- GFS3_OP_FLUSH,
- GFS3_OP_FSYNC,
- GFS3_OP_SETXATTR,
- GFS3_OP_GETXATTR,
- GFS3_OP_REMOVEXATTR,
- GFS3_OP_OPENDIR,
- GFS3_OP_FSYNCDIR,
- GFS3_OP_ACCESS,
- GFS3_OP_CREATE,
- GFS3_OP_FTRUNCATE,
- GFS3_OP_FSTAT,
- GFS3_OP_LK,
- GFS3_OP_LOOKUP,
- GFS3_OP_READDIR,
- GFS3_OP_INODELK,
- GFS3_OP_FINODELK,
- GFS3_OP_ENTRYLK,
- GFS3_OP_FENTRYLK,
- GFS3_OP_XATTROP,
- GFS3_OP_FXATTROP,
- GFS3_OP_FGETXATTR,
- GFS3_OP_FSETXATTR,
- GFS3_OP_RCHECKSUM,
- GFS3_OP_SETATTR,
- GFS3_OP_FSETATTR,
- GFS3_OP_READDIRP,
- GFS3_OP_RELEASE,
- GFS3_OP_RELEASEDIR,
- GFS3_OP_FREMOVEXATTR,
- GFS3_OP_MAXVALUE,
-} ;
+ GFS3_OP_NULL, /* 0 */
+ GFS3_OP_STAT,
+ GFS3_OP_READLINK,
+ GFS3_OP_MKNOD,
+ GFS3_OP_MKDIR,
+ GFS3_OP_UNLINK,
+ GFS3_OP_RMDIR,
+ GFS3_OP_SYMLINK,
+ GFS3_OP_RENAME,
+ GFS3_OP_LINK,
+ GFS3_OP_TRUNCATE,
+ GFS3_OP_OPEN,
+ GFS3_OP_READ,
+ GFS3_OP_WRITE,
+ GFS3_OP_STATFS,
+ GFS3_OP_FLUSH,
+ GFS3_OP_FSYNC,
+ GFS3_OP_SETXATTR,
+ GFS3_OP_GETXATTR,
+ GFS3_OP_REMOVEXATTR,
+ GFS3_OP_OPENDIR,
+ GFS3_OP_FSYNCDIR,
+ GFS3_OP_ACCESS,
+ GFS3_OP_CREATE,
+ GFS3_OP_FTRUNCATE,
+ GFS3_OP_FSTAT,
+ GFS3_OP_LK,
+ GFS3_OP_LOOKUP,
+ GFS3_OP_READDIR,
+ GFS3_OP_INODELK,
+ GFS3_OP_FINODELK,
+ GFS3_OP_ENTRYLK,
+ GFS3_OP_FENTRYLK,
+ GFS3_OP_XATTROP,
+ GFS3_OP_FXATTROP,
+ GFS3_OP_FGETXATTR,
+ GFS3_OP_FSETXATTR,
+ GFS3_OP_RCHECKSUM,
+ GFS3_OP_SETATTR,
+ GFS3_OP_FSETATTR,
+ GFS3_OP_READDIRP,
+ GFS3_OP_RELEASE,
+ GFS3_OP_RELEASEDIR,
+ GFS3_OP_FREMOVEXATTR,
+ GFS3_OP_FALLOCATE,
+ GFS3_OP_DISCARD,
+ GFS3_OP_ZEROFILL,
+ GFS3_OP_IPC,
+ GFS3_OP_SEEK,
+ GFS3_OP_COMPOUND,
+ GFS3_OP_LEASE,
+ GFS3_OP_GETACTIVELK,
+ GFS3_OP_SETACTIVELK,
+ GFS3_OP_ICREATE,
+ GFS3_OP_NAMELINK,
+ GFS3_OP_PUT,
+ GFS3_OP_COPY_FILE_RANGE,
+ GFS3_OP_MAXVALUE,
+};
enum gf_handshake_procnum {
- GF_HNDSK_NULL,
- GF_HNDSK_SETVOLUME,
- GF_HNDSK_GETSPEC,
- GF_HNDSK_PING,
- GF_HNDSK_SET_LK_VER,
- GF_HNDSK_EVENT_NOTIFY,
- GF_HNDSK_MAXVALUE,
+ GF_HNDSK_NULL,
+ GF_HNDSK_SETVOLUME,
+ GF_HNDSK_GETSPEC,
+ GF_HNDSK_PING,
+ GF_HNDSK_SET_LK_VER,
+ GF_HNDSK_EVENT_NOTIFY,
+ GF_HNDSK_GET_VOLUME_INFO,
+ GF_HNDSK_GET_SNAPSHOT_INFO,
+ GF_HNDSK_MAXVALUE,
};
enum gf_pmap_procnum {
- GF_PMAP_NULL = 0,
- GF_PMAP_PORTBYBRICK,
- GF_PMAP_BRICKBYPORT,
- GF_PMAP_SIGNUP,
- GF_PMAP_SIGNIN,
- GF_PMAP_SIGNOUT,
- GF_PMAP_MAXVALUE,
+ GF_PMAP_NULL = 0,
+ GF_PMAP_PORTBYBRICK,
+ GF_PMAP_BRICKBYPORT,
+ /*
+ * SIGNUP is not used, and shouldn't be used. It was kept here only
+ * to avoid changing the numbers for things that come after it in this
+ * list.
+ */
+ GF_PMAP_SIGNUP,
+ GF_PMAP_SIGNIN,
+ GF_PMAP_SIGNOUT,
+ GF_PMAP_MAXVALUE,
+};
+
+enum gf_aggregator_procnum {
+ GF_AGGREGATOR_NULL = 0,
+ GF_AGGREGATOR_LOOKUP,
+ GF_AGGREGATOR_GETLIMIT,
+ GF_AGGREGATOR_MAXVALUE,
};
enum gf_pmap_port_type {
- GF_PMAP_PORT_FREE = 0,
- GF_PMAP_PORT_FOREIGN,
- GF_PMAP_PORT_LEASED,
- GF_PMAP_PORT_NONE,
- GF_PMAP_PORT_BRICKSERVER,
+ GF_PMAP_PORT_FREE = 0,
+ GF_PMAP_PORT_FOREIGN, /* it actually means, not sure who is using it, but it
+ is in-use */
+ GF_PMAP_PORT_LEASED,
+ GF_PMAP_PORT_ANY,
+ GF_PMAP_PORT_BRICKSERVER, /* port used by brick process */
};
typedef enum gf_pmap_port_type gf_pmap_port_type_t;
enum gf_probe_resp {
- GF_PROBE_SUCCESS,
- GF_PROBE_LOCALHOST,
- GF_PROBE_FRIEND,
- GF_PROBE_ANOTHER_CLUSTER,
- GF_PROBE_VOLUME_CONFLICT,
- GF_PROBE_UNKNOWN_PEER,
- GF_PROBE_ADD_FAILED
+ GF_PROBE_SUCCESS,
+ GF_PROBE_LOCALHOST,
+ GF_PROBE_FRIEND,
+ GF_PROBE_ANOTHER_CLUSTER,
+ GF_PROBE_VOLUME_CONFLICT,
+ GF_PROBE_SAME_UUID,
+ GF_PROBE_UNKNOWN_PEER,
+ GF_PROBE_ADD_FAILED,
+ GF_PROBE_QUORUM_NOT_MET,
+ GF_PROBE_MISSED_SNAP_CONFLICT,
+ GF_PROBE_SNAP_CONFLICT,
+ GF_PROBE_FRIEND_DETACHING,
};
enum gf_deprobe_resp {
- GF_DEPROBE_SUCCESS,
- GF_DEPROBE_LOCALHOST,
- GF_DEPROBE_NOT_FRIEND,
- GF_DEPROBE_BRICK_EXIST,
- GF_DEPROBE_FRIEND_DOWN
+ GF_DEPROBE_SUCCESS,
+ GF_DEPROBE_LOCALHOST,
+ GF_DEPROBE_NOT_FRIEND,
+ GF_DEPROBE_BRICK_EXIST,
+ GF_DEPROBE_FRIEND_DOWN,
+ GF_DEPROBE_QUORUM_NOT_MET,
+ GF_DEPROBE_FRIEND_DETACHING,
+ GF_DEPROBE_SNAP_BRICK_EXIST,
};
enum gf_cbk_procnum {
- GF_CBK_NULL = 0,
- GF_CBK_FETCHSPEC,
- GF_CBK_INO_FLUSH,
- GF_CBK_EVENT_NOTIFY,
- GF_CBK_MAXVALUE,
+ GF_CBK_NULL = 0,
+ GF_CBK_FETCHSPEC,
+ GF_CBK_INO_FLUSH,
+ GF_CBK_EVENT_NOTIFY,
+ GF_CBK_GET_SNAPS,
+ GF_CBK_CACHE_INVALIDATION,
+ GF_CBK_CHILD_UP,
+ GF_CBK_CHILD_DOWN,
+ GF_CBK_RECALL_LEASE,
+ GF_CBK_STATEDUMP,
+ GF_CBK_INODELK_CONTENTION,
+ GF_CBK_ENTRYLK_CONTENTION,
+ GF_CBK_MAXVALUE,
};
enum gluster_cli_procnum {
- GLUSTER_CLI_NULL, /* 0 */
- GLUSTER_CLI_PROBE,
- GLUSTER_CLI_DEPROBE,
- GLUSTER_CLI_LIST_FRIENDS,
- GLUSTER_CLI_CREATE_VOLUME,
- GLUSTER_CLI_GET_VOLUME,
- GLUSTER_CLI_GET_NEXT_VOLUME,
- GLUSTER_CLI_DELETE_VOLUME,
- GLUSTER_CLI_START_VOLUME,
- GLUSTER_CLI_STOP_VOLUME,
- GLUSTER_CLI_RENAME_VOLUME,
- GLUSTER_CLI_DEFRAG_VOLUME,
- GLUSTER_CLI_SET_VOLUME,
- GLUSTER_CLI_ADD_BRICK,
- GLUSTER_CLI_REMOVE_BRICK,
- GLUSTER_CLI_REPLACE_BRICK,
- GLUSTER_CLI_LOG_ROTATE,
- GLUSTER_CLI_GETSPEC,
- GLUSTER_CLI_PMAP_PORTBYBRICK,
- GLUSTER_CLI_SYNC_VOLUME,
- GLUSTER_CLI_RESET_VOLUME,
- GLUSTER_CLI_FSM_LOG,
- GLUSTER_CLI_GSYNC_SET,
- GLUSTER_CLI_PROFILE_VOLUME,
- GLUSTER_CLI_QUOTA,
- GLUSTER_CLI_TOP_VOLUME,
- GLUSTER_CLI_GETWD,
- GLUSTER_CLI_STATUS_VOLUME,
- GLUSTER_CLI_STATUS_ALL,
- GLUSTER_CLI_MOUNT,
- GLUSTER_CLI_UMOUNT,
- GLUSTER_CLI_HEAL_VOLUME,
- GLUSTER_CLI_STATEDUMP_VOLUME,
- GLUSTER_CLI_LIST_VOLUME,
- GLUSTER_CLI_CLRLOCKS_VOLUME,
- GLUSTER_CLI_MAXVALUE,
+ GLUSTER_CLI_NULL, /* 0 */
+ GLUSTER_CLI_PROBE,
+ GLUSTER_CLI_DEPROBE,
+ GLUSTER_CLI_LIST_FRIENDS,
+ GLUSTER_CLI_CREATE_VOLUME,
+ GLUSTER_CLI_GET_VOLUME,
+ GLUSTER_CLI_GET_NEXT_VOLUME,
+ GLUSTER_CLI_DELETE_VOLUME,
+ GLUSTER_CLI_START_VOLUME,
+ GLUSTER_CLI_STOP_VOLUME,
+ GLUSTER_CLI_RENAME_VOLUME,
+ GLUSTER_CLI_DEFRAG_VOLUME,
+ GLUSTER_CLI_SET_VOLUME,
+ GLUSTER_CLI_ADD_BRICK,
+ GLUSTER_CLI_REMOVE_BRICK,
+ GLUSTER_CLI_REPLACE_BRICK,
+ GLUSTER_CLI_LOG_ROTATE,
+ GLUSTER_CLI_GETSPEC,
+ GLUSTER_CLI_PMAP_PORTBYBRICK,
+ GLUSTER_CLI_SYNC_VOLUME,
+ GLUSTER_CLI_RESET_VOLUME,
+ GLUSTER_CLI_FSM_LOG,
+ GLUSTER_CLI_GSYNC_SET,
+ GLUSTER_CLI_PROFILE_VOLUME,
+ GLUSTER_CLI_QUOTA,
+ GLUSTER_CLI_TOP_VOLUME,
+ GLUSTER_CLI_GETWD,
+ GLUSTER_CLI_STATUS_VOLUME,
+ GLUSTER_CLI_STATUS_ALL,
+ GLUSTER_CLI_MOUNT,
+ GLUSTER_CLI_UMOUNT,
+ GLUSTER_CLI_HEAL_VOLUME,
+ GLUSTER_CLI_STATEDUMP_VOLUME,
+ GLUSTER_CLI_LIST_VOLUME,
+ GLUSTER_CLI_CLRLOCKS_VOLUME,
+ GLUSTER_CLI_UUID_RESET,
+ GLUSTER_CLI_UUID_GET,
+ GLUSTER_CLI_COPY_FILE,
+ GLUSTER_CLI_SYS_EXEC,
+ GLUSTER_CLI_SNAP,
+ GLUSTER_CLI_BARRIER_VOLUME,
+ GLUSTER_CLI_GET_VOL_OPT,
+ GLUSTER_CLI_GANESHA,
+ GLUSTER_CLI_BITROT,
+ GLUSTER_CLI_ATTACH_TIER,
+ GLUSTER_CLI_TIER,
+ GLUSTER_CLI_GET_STATE,
+ GLUSTER_CLI_RESET_BRICK,
+ GLUSTER_CLI_REMOVE_TIER_BRICK,
+ GLUSTER_CLI_ADD_TIER_BRICK,
+ GLUSTER_CLI_MAXVALUE,
};
enum glusterd_mgmt_procnum {
- GLUSTERD_MGMT_NULL, /* 0 */
- GLUSTERD_MGMT_CLUSTER_LOCK,
- GLUSTERD_MGMT_CLUSTER_UNLOCK,
- GLUSTERD_MGMT_STAGE_OP,
- GLUSTERD_MGMT_COMMIT_OP,
- GLUSTERD_MGMT_MAXVALUE,
+ GLUSTERD_MGMT_NULL, /* 0 */
+ GLUSTERD_MGMT_CLUSTER_LOCK,
+ GLUSTERD_MGMT_CLUSTER_UNLOCK,
+ GLUSTERD_MGMT_STAGE_OP,
+ GLUSTERD_MGMT_COMMIT_OP,
+ GLUSTERD_MGMT_MAXVALUE,
};
enum glusterd_friend_procnum {
- GLUSTERD_FRIEND_NULL, /* 0 */
- GLUSTERD_PROBE_QUERY,
- GLUSTERD_FRIEND_ADD,
- GLUSTERD_FRIEND_REMOVE,
- GLUSTERD_FRIEND_UPDATE,
- GLUSTERD_FRIEND_MAXVALUE,
+ GLUSTERD_FRIEND_NULL, /* 0 */
+ GLUSTERD_PROBE_QUERY,
+ GLUSTERD_FRIEND_ADD,
+ GLUSTERD_FRIEND_REMOVE,
+ GLUSTERD_FRIEND_UPDATE,
+ GLUSTERD_FRIEND_MAXVALUE,
};
enum glusterd_brick_procnum {
- GLUSTERD_BRICK_NULL, /* 0 */
- GLUSTERD_BRICK_TERMINATE,
- GLUSTERD_BRICK_XLATOR_INFO,
- GLUSTERD_BRICK_XLATOR_OP,
- GLUSTERD_BRICK_STATUS,
- GLUSTERD_BRICK_OP,
- GLUSTERD_BRICK_XLATOR_DEFRAG,
- GLUSTERD_NODE_PROFILE,
- GLUSTERD_NODE_STATUS,
- GLUSTERD_BRICK_MAXVALUE,
+ GLUSTERD_BRICK_NULL, /* 0 */
+ GLUSTERD_BRICK_TERMINATE,
+ GLUSTERD_BRICK_XLATOR_INFO,
+ GLUSTERD_BRICK_XLATOR_OP,
+ GLUSTERD_BRICK_STATUS,
+ GLUSTERD_BRICK_OP,
+ GLUSTERD_BRICK_XLATOR_DEFRAG,
+ GLUSTERD_NODE_PROFILE,
+ GLUSTERD_NODE_STATUS,
+ GLUSTERD_VOLUME_BARRIER_OP,
+ GLUSTERD_BRICK_BARRIER,
+ GLUSTERD_NODE_BITROT,
+ GLUSTERD_BRICK_ATTACH,
+ GLUSTERD_DUMP_METRICS,
+ GLUSTERD_SVC_ATTACH,
+ GLUSTERD_SVC_DETACH,
+ GLUSTERD_BRICK_MAXVALUE,
+};
+
+enum glusterd_mgmt_hndsk_procnum {
+ GD_MGMT_HNDSK_NULL,
+ GD_MGMT_HNDSK_VERSIONS,
+ GD_MGMT_HNDSK_VERSIONS_ACK,
+ GD_MGMT_HNDSK_MAXVALUE,
};
typedef enum {
- GF_AFR_OP_INVALID,
- GF_AFR_OP_HEAL_INDEX,
- GF_AFR_OP_HEAL_FULL,
- GF_AFR_OP_INDEX_SUMMARY,
- GF_AFR_OP_HEALED_FILES,
- GF_AFR_OP_HEAL_FAILED_FILES,
- GF_AFR_OP_SPLIT_BRAIN_FILES
-} gf_xl_afr_op_t ;
+ GF_SHD_OP_INVALID,
+ GF_SHD_OP_HEAL_INDEX,
+ GF_SHD_OP_HEAL_FULL,
+ GF_SHD_OP_INDEX_SUMMARY,
+ GF_SHD_OP_HEALED_FILES,
+ GF_SHD_OP_HEAL_FAILED_FILES,
+ GF_SHD_OP_SPLIT_BRAIN_FILES,
+ GF_SHD_OP_STATISTICS,
+ GF_SHD_OP_STATISTICS_HEAL_COUNT,
+ GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA,
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE,
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK,
+ GF_SHD_OP_HEAL_ENABLE,
+ GF_SHD_OP_HEAL_DISABLE,
+ GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME,
+ GF_SHD_OP_GRANULAR_ENTRY_HEAL_ENABLE,
+ GF_SHD_OP_GRANULAR_ENTRY_HEAL_DISABLE,
+ GF_SHD_OP_HEAL_SUMMARY,
+} gf_xl_afr_op_t;
-enum gf_hdsk_event_notify_op {
- GF_EN_DEFRAG_STATUS,
- GF_EN_MAX,
+struct gf_gsync_detailed_status_ {
+ char node[NAME_MAX];
+ char master[NAME_MAX];
+ char brick[PATH_MAX];
+ char slave_user[NAME_MAX];
+ char slave[NAME_MAX];
+ char slave_node[NAME_MAX];
+ char worker_status[NAME_MAX];
+ char crawl_status[NAME_MAX];
+ char last_synced[NAME_MAX];
+ char last_synced_utc[NAME_MAX];
+ char entry[NAME_MAX];
+ char data[NAME_MAX];
+ char meta[NAME_MAX];
+ char failures[NAME_MAX];
+ char checkpoint_time[NAME_MAX];
+ char checkpoint_time_utc[NAME_MAX];
+ char checkpoint_completed[NAME_MAX];
+ char checkpoint_completion_time[NAME_MAX];
+ char checkpoint_completion_time_utc[NAME_MAX];
+ char brick_host_uuid[NAME_MAX];
+ char slavekey[NAME_MAX];
+ char session_slave[NAME_MAX];
};
-#define GLUSTER_HNDSK_PROGRAM 14398633 /* Completely random */
-#define GLUSTER_HNDSK_VERSION 2 /* 0.0.1 */
+enum glusterd_mgmt_v3_procnum {
+ GLUSTERD_MGMT_V3_NULL, /* 0 */
+ GLUSTERD_MGMT_V3_LOCK,
+ GLUSTERD_MGMT_V3_PRE_VALIDATE,
+ GLUSTERD_MGMT_V3_BRICK_OP,
+ GLUSTERD_MGMT_V3_COMMIT,
+ GLUSTERD_MGMT_V3_POST_COMMIT,
+ GLUSTERD_MGMT_V3_POST_VALIDATE,
+ GLUSTERD_MGMT_V3_UNLOCK,
+ GLUSTERD_MGMT_V3_MAXVALUE,
+};
+
+typedef struct gf_gsync_detailed_status_ gf_gsync_status_t;
-#define GLUSTER_PMAP_PROGRAM 34123456
-#define GLUSTER_PMAP_VERSION 1
+enum gf_get_volume_info_type {
+ GF_GET_VOLUME_NONE, /* 0 */
+ GF_GET_VOLUME_UUID
+};
-#define GLUSTER_CBK_PROGRAM 52743234 /* Completely random */
-#define GLUSTER_CBK_VERSION 1 /* 0.0.1 */
+typedef enum gf_get_volume_info_type gf_get_volume_info_type;
+
+enum gf_get_snapshot_info_type {
+ GF_GET_SNAPSHOT_LIST,
+};
+typedef enum gf_get_snapshot_info_type gf_get_snapshot_info_type;
-#define GLUSTER3_1_FOP_PROGRAM 1298437 /* Completely random */
-#define GLUSTER3_1_FOP_VERSION 330 /* 3.3.0 */
-#define GLUSTER3_1_FOP_PROCCNT GFS3_OP_MAXVALUE
+enum gf_getspec_flags_type { GF_GETSPEC_FLAG_SERVERS_LIST = 1 };
+typedef enum gf_getspec_flags_type gf_getspec_flags_type;
+
+#define GLUSTER_HNDSK_PROGRAM 14398633 /* Completely random */
+#define GLUSTER_HNDSK_VERSION 2 /* 0.0.2 */
+
+#define GLUSTER_PMAP_PROGRAM 34123456
+#define GLUSTER_PMAP_VERSION 1
+
+#define GLUSTER_CBK_PROGRAM 52743234 /* Completely random */
+#define GLUSTER_CBK_VERSION 1 /* 0.0.1 */
+
+#define GLUSTER_FOP_PROGRAM 1298437 /* Completely random */
+#define GLUSTER_FOP_VERSION 330 /* 3.3.0 */
+#define GLUSTER_FOP_PROCCNT GFS3_OP_MAXVALUE
+
+#define GLUSTER_FOP_VERSION_v2 400 /* 4.0.0 */
+
+/* Aggregator */
+#define GLUSTER_AGGREGATOR_PROGRAM 29852134 /* Completely random */
+#define GLUSTER_AGGREGATOR_VERSION 1
/* Second version */
-#define GD_MGMT_PROGRAM 1238433 /* Completely random */
-#define GD_MGMT_VERSION 2 /* 0.0.2 */
+#define GD_MGMT_PROGRAM 1238433 /* Completely random */
+#define GD_MGMT_VERSION 2 /* 0.0.2 */
+
+#define GD_FRIEND_PROGRAM 1238437 /* Completely random */
+#define GD_FRIEND_VERSION 2 /* 0.0.2 */
+
+#define GLUSTER_CLI_PROGRAM 1238463 /* Completely random */
+#define GLUSTER_CLI_VERSION 2 /* 0.0.2 */
+
+#define GD_BRICK_PROGRAM 4867634 /*Completely random*/
+#define GD_BRICK_VERSION 2
-#define GD_FRIEND_PROGRAM 1238437 /* Completely random */
-#define GD_FRIEND_VERSION 2 /* 0.0.2 */
+/* Third version */
+#define GD_MGMT_V3_VERSION 3
-#define GLUSTER_CLI_PROGRAM 1238463 /* Completely random */
-#define GLUSTER_CLI_VERSION 2 /* 0.0.2 */
+/* OP-VERSION handshake */
+#define GD_MGMT_HNDSK_PROGRAM 1239873 /* Completely random */
+#define GD_MGMT_HNDSK_VERSION 1
-#define GD_BRICK_PROGRAM 4867634 /*Completely random*/
-#define GD_BRICK_VERSION 2
+#define GD_VOLUME_NAME_MAX \
+ ((NAME_MAX + 1) - 5) /* Maximum size of volume name */
+#define GD_VOLUME_NAME_MAX_TIER \
+ (GD_VOLUME_NAME_MAX + 5) /* +5 needed for '-hot' \
+ and '-cold' suffixes*/
+#define GLUSTER_PROCESS_UUID_FMT \
+ "CTX_ID:%s-GRAPH_ID:%d-PID:%d-HOST:%s-PC_NAME:%s-RECON_NO:%s"
#endif /* !_PROTOCOL_COMMON_H */
diff --git a/rpc/rpc-lib/src/rpc-clnt-ping.c b/rpc/rpc-lib/src/rpc-clnt-ping.c
new file mode 100644
index 00000000000..31f17841bea
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-clnt-ping.c
@@ -0,0 +1,357 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "rpc-clnt.h"
+#include "rpc-clnt-ping.h"
+#include <glusterfs/byte-order.h>
+#include "xdr-rpcclnt.h"
+#include "rpc-transport.h"
+#include "protocol-common.h"
+#include <glusterfs/mem-pool.h>
+#include "xdr-rpc.h"
+#include "rpc-common-xdr.h"
+#include <glusterfs/timespec.h>
+
+char *clnt_ping_procs[GF_DUMP_MAXVALUE] = {
+ [GF_DUMP_PING] = "NULL",
+};
+struct rpc_clnt_program clnt_ping_prog = {
+ .progname = "GF-DUMP",
+ .prognum = GLUSTER_DUMP_PROGRAM,
+ .progver = GLUSTER_DUMP_VERSION,
+ .procnames = clnt_ping_procs,
+};
+
+struct ping_local {
+ struct rpc_clnt *rpc;
+ struct timespec submit_time;
+};
+
+/* Must be called under conn->lock */
+static int
+__rpc_clnt_rearm_ping_timer(struct rpc_clnt *rpc, gf_timer_cbk_t cbk)
+{
+ rpc_clnt_connection_t *conn = &rpc->conn;
+ rpc_transport_t *trans = conn->trans;
+ struct timespec timeout = {
+ 0,
+ };
+ gf_timer_t *timer = NULL;
+
+ if (conn->ping_timer) {
+ gf_log_callingfn("", GF_LOG_CRITICAL,
+ "%s: ping timer event already scheduled",
+ conn->trans->peerinfo.identifier);
+ return -1;
+ }
+
+ timeout.tv_sec = conn->ping_timeout;
+ timeout.tv_nsec = 0;
+
+ rpc_clnt_ref(rpc);
+ timer = gf_timer_call_after(rpc->ctx, timeout, cbk, (void *)rpc);
+ if (timer == NULL) {
+ gf_log(trans->name, GF_LOG_WARNING, "unable to setup ping timer");
+
+ /* This unref can't be the last. We just took a ref few lines
+ * above. So this can be performed under conn->lock. */
+ rpc_clnt_unref(rpc);
+ conn->ping_started = 0;
+ return -1;
+ }
+
+ conn->ping_timer = timer;
+ conn->ping_started = 1;
+ return 0;
+}
+
+/* Must be called under conn->lock */
+int
+rpc_clnt_remove_ping_timer_locked(struct rpc_clnt *rpc)
+{
+ rpc_clnt_connection_t *conn = &rpc->conn;
+ gf_timer_t *timer = NULL;
+
+ if (conn->ping_timer) {
+ timer = conn->ping_timer;
+ conn->ping_timer = NULL;
+ gf_timer_call_cancel(rpc->ctx, timer);
+ conn->ping_started = 0;
+ return 1;
+ }
+
+ /* This is to account for rpc_clnt_disable that might have set
+ * conn->trans to NULL. */
+ if (conn->trans)
+ gf_log_callingfn("", GF_LOG_DEBUG,
+ "%s: ping timer event "
+ "already removed",
+ conn->trans->peerinfo.identifier);
+
+ return 0;
+}
+
+static void
+rpc_clnt_start_ping(void *rpc_ptr);
+
+void
+rpc_clnt_ping_timer_expired(void *rpc_ptr)
+{
+ struct rpc_clnt *rpc = NULL;
+ rpc_transport_t *trans = NULL;
+ rpc_clnt_connection_t *conn = NULL;
+ int disconnect = 0;
+ struct timespec current = {
+ 0,
+ };
+ int unref = 0;
+
+ rpc = (struct rpc_clnt *)rpc_ptr;
+ conn = &rpc->conn;
+ trans = conn->trans;
+
+ if (!trans) {
+ gf_log("ping-timer", GF_LOG_WARNING, "transport not initialized");
+ goto out;
+ }
+
+ timespec_now_realtime(&current);
+ pthread_mutex_lock(&conn->lock);
+ {
+ unref = rpc_clnt_remove_ping_timer_locked(rpc);
+
+ if (((current.tv_sec - conn->last_received.tv_sec) <
+ conn->ping_timeout) ||
+ ((current.tv_sec - conn->last_sent.tv_sec) < conn->ping_timeout)) {
+ gf_log(trans->name, GF_LOG_TRACE,
+ "ping timer expired but transport activity "
+ "detected - not bailing transport");
+ if (__rpc_clnt_rearm_ping_timer(rpc, rpc_clnt_ping_timer_expired) ==
+ -1) {
+ gf_log(trans->name, GF_LOG_WARNING,
+ "unable to setup ping timer");
+ }
+ } else {
+ conn->ping_started = 0;
+ disconnect = 1;
+ }
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ if (unref)
+ rpc_clnt_unref(rpc);
+
+ if (disconnect) {
+ gf_log(trans->name, GF_LOG_CRITICAL,
+ "server %s has not responded in the last %d "
+ "seconds, disconnecting.",
+ trans->peerinfo.identifier, conn->ping_timeout);
+
+ rpc_transport_disconnect(conn->trans, _gf_false);
+ }
+
+out:
+ return;
+}
+
+int
+rpc_clnt_ping_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ struct ping_local *local = NULL;
+ xlator_t *this = NULL;
+ rpc_clnt_connection_t *conn = NULL;
+ call_frame_t *frame = NULL;
+ int unref = 0;
+ gf_boolean_t call_notify = _gf_false;
+
+ struct timespec now;
+ struct timespec delta;
+ int64_t latency_msec = 0;
+ int ret = 0;
+
+ if (!myframe) {
+ gf_log(THIS->name, GF_LOG_WARNING, "frame with the request is NULL");
+ goto out;
+ }
+
+ frame = myframe;
+ this = frame->this;
+ local = frame->local;
+ conn = &local->rpc->conn;
+
+ timespec_now(&now);
+ timespec_sub(&local->submit_time, &now, &delta);
+ latency_msec = delta.tv_sec * 1000 + delta.tv_nsec / 1000000;
+
+ gf_log(THIS->name, GF_LOG_DEBUG, "Ping latency is %" PRIu64 "ms",
+ latency_msec);
+ call_notify = _gf_true;
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ unref = rpc_clnt_remove_ping_timer_locked(local->rpc);
+ if (req->rpc_status == -1) {
+ conn->ping_started = 0;
+ pthread_mutex_unlock(&conn->lock);
+ if (unref) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "socket or ib related error");
+
+ } else {
+ /* timer expired and transport bailed out */
+ gf_log(this->name, GF_LOG_WARNING, "socket disconnected");
+ }
+ goto after_unlock;
+ }
+
+ if (__rpc_clnt_rearm_ping_timer(local->rpc, rpc_clnt_start_ping) ==
+ -1) {
+ /* unlock before logging error */
+ pthread_mutex_unlock(&conn->lock);
+ gf_log(this->name, GF_LOG_WARNING, "failed to set the ping timer");
+ } else {
+ /* just unlock the mutex */
+ pthread_mutex_unlock(&conn->lock);
+ }
+ }
+after_unlock:
+ if (call_notify) {
+ ret = local->rpc->notifyfn(local->rpc, this, RPC_CLNT_PING,
+ (void *)(uintptr_t)latency_msec);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING, "RPC_CLNT_PING notify failed");
+ }
+ }
+out:
+ if (unref)
+ rpc_clnt_unref(local->rpc);
+
+ if (frame) {
+ GF_FREE(frame->local);
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+ }
+ return 0;
+}
+
+int
+rpc_clnt_ping(struct rpc_clnt *rpc)
+{
+ call_frame_t *frame = NULL;
+ int32_t ret = -1;
+ rpc_clnt_connection_t *conn = NULL;
+ struct ping_local *local = NULL;
+
+ conn = &rpc->conn;
+ local = GF_MALLOC(sizeof(struct ping_local), gf_common_ping_local_t);
+ if (!local)
+ return ret;
+ frame = create_frame(THIS, THIS->ctx->pool);
+ if (!frame) {
+ GF_FREE(local);
+ return ret;
+ }
+
+ local->rpc = rpc;
+ timespec_now(&local->submit_time);
+ frame->local = local;
+
+ ret = rpc_clnt_submit(rpc, &clnt_ping_prog, GF_DUMP_PING, rpc_clnt_ping_cbk,
+ NULL, 0, NULL, 0, NULL, frame, NULL, 0, NULL, 0,
+ NULL);
+ if (ret) {
+ /* FIXME: should we free the frame here? Methinks so! */
+ gf_log(THIS->name, GF_LOG_ERROR, "failed to start ping timer");
+ } else {
+ /* ping successfully queued in list of saved frames
+ * for the connection*/
+ pthread_mutex_lock(&conn->lock);
+ conn->pingcnt++;
+ pthread_mutex_unlock(&conn->lock);
+ }
+
+ return ret;
+}
+
+static void
+rpc_clnt_start_ping(void *rpc_ptr)
+{
+ struct rpc_clnt *rpc = NULL;
+ rpc_clnt_connection_t *conn = NULL;
+ int frame_count = 0;
+ int unref = 0;
+
+ rpc = (struct rpc_clnt *)rpc_ptr;
+ conn = &rpc->conn;
+
+ if (conn->ping_timeout == 0) {
+ gf_log(THIS->name, GF_LOG_DEBUG,
+ "ping timeout is 0,"
+ " returning");
+ return;
+ }
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ unref = rpc_clnt_remove_ping_timer_locked(rpc);
+
+ if (conn->saved_frames) {
+ GF_ASSERT(conn->saved_frames->count >= 0);
+ /* treat the case where conn->saved_frames is NULL
+ as no pending frames */
+ frame_count = conn->saved_frames->count;
+ }
+
+ if ((frame_count == 0) || !conn->connected) {
+ gf_log(THIS->name, GF_LOG_DEBUG,
+ "returning as transport is already disconnected"
+ " OR there are no frames (%d || %d)",
+ !conn->connected, frame_count);
+
+ pthread_mutex_unlock(&conn->lock);
+ if (unref)
+ rpc_clnt_unref(rpc);
+ return;
+ }
+
+ if (__rpc_clnt_rearm_ping_timer(rpc, rpc_clnt_ping_timer_expired) ==
+ -1) {
+ gf_log(THIS->name, GF_LOG_WARNING, "unable to setup ping timer");
+ pthread_mutex_unlock(&conn->lock);
+ if (unref)
+ rpc_clnt_unref(rpc);
+ return;
+ }
+ }
+ pthread_mutex_unlock(&conn->lock);
+ if (unref)
+ rpc_clnt_unref(rpc);
+
+ rpc_clnt_ping(rpc);
+}
+
+void
+rpc_clnt_check_and_start_ping(struct rpc_clnt *rpc)
+{
+ char start_ping = 0;
+
+ pthread_mutex_lock(&rpc->conn.lock);
+ {
+ if (!rpc->conn.ping_started)
+ start_ping = 1;
+ }
+ pthread_mutex_unlock(&rpc->conn.lock);
+
+ if (start_ping)
+ rpc_clnt_start_ping((void *)rpc);
+
+ return;
+}
diff --git a/rpc/rpc-lib/src/rpc-clnt-ping.h b/rpc/rpc-lib/src/rpc-clnt-ping.h
new file mode 100644
index 00000000000..e5466a828c2
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-clnt-ping.h
@@ -0,0 +1,16 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+struct rpc_clnt;
+#define RPC_DEFAULT_PING_TIMEOUT 30
+void
+rpc_clnt_check_and_start_ping(struct rpc_clnt *rpc_ptr);
+int
+rpc_clnt_remove_ping_timer_locked(struct rpc_clnt *rpc);
diff --git a/rpc/rpc-lib/src/rpc-clnt.c b/rpc/rpc-lib/src/rpc-clnt.c
index 21066dfc596..517037c4a5d 100644
--- a/rpc/rpc-lib/src/rpc-clnt.c
+++ b/rpc/rpc-lib/src/rpc-clnt.c
@@ -8,507 +8,473 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#define RPC_CLNT_DEFAULT_REQUEST_COUNT 512
#include "rpc-clnt.h"
-#include "byte-order.h"
+#include "rpc-clnt-ping.h"
+#include <glusterfs/byte-order.h>
#include "xdr-rpcclnt.h"
#include "rpc-transport.h"
#include "protocol-common.h"
-#include "mem-pool.h"
+#include <glusterfs/mem-pool.h>
#include "xdr-rpc.h"
#include "rpc-common-xdr.h"
void
-rpc_clnt_reply_deinit (struct rpc_req *req, struct mem_pool *pool);
+rpc_clnt_reply_deinit(struct rpc_req *req, struct mem_pool *pool);
-uint64_t
-rpc_clnt_new_callid (struct rpc_clnt *clnt)
+struct saved_frame *
+__saved_frames_get_timedout(struct saved_frames *frames, uint32_t timeout,
+ struct timeval *current)
{
- uint64_t callid = 0;
+ struct saved_frame *bailout_frame = NULL, *tmp = NULL;
- pthread_mutex_lock (&clnt->lock);
- {
- callid = ++clnt->xid;
+ if (!list_empty(&frames->sf.list)) {
+ tmp = list_entry(frames->sf.list.next, typeof(*tmp), list);
+ if ((tmp->saved_at.tv_sec + timeout) <= current->tv_sec) {
+ bailout_frame = tmp;
+ list_del_init(&bailout_frame->list);
+ frames->count--;
}
- pthread_mutex_unlock (&clnt->lock);
+ }
- return callid;
-}
-
-
-struct saved_frame *
-__saved_frames_get_timedout (struct saved_frames *frames, uint32_t timeout,
- struct timeval *current)
-{
- struct saved_frame *bailout_frame = NULL, *tmp = NULL;
-
- if (!list_empty(&frames->sf.list)) {
- tmp = list_entry (frames->sf.list.next, typeof (*tmp), list);
- if ((tmp->saved_at.tv_sec + timeout) < current->tv_sec) {
- bailout_frame = tmp;
- list_del_init (&bailout_frame->list);
- frames->count--;
- }
- }
-
- return bailout_frame;
+ return bailout_frame;
}
static int
-_is_lock_fop (struct saved_frame *sframe)
+_is_lock_fop(struct saved_frame *sframe)
{
- int fop = 0;
+ int fop = 0;
- if (SFRAME_GET_PROGNUM (sframe) == GLUSTER3_1_FOP_PROGRAM &&
- SFRAME_GET_PROGVER (sframe) == GLUSTER3_1_FOP_VERSION)
- fop = SFRAME_GET_PROCNUM (sframe);
+ if (SFRAME_GET_PROGNUM(sframe) == GLUSTER_FOP_PROGRAM &&
+ SFRAME_GET_PROGVER(sframe) == GLUSTER_FOP_VERSION)
+ fop = SFRAME_GET_PROCNUM(sframe);
- return ((fop == GFS3_OP_LK) ||
- (fop == GFS3_OP_INODELK) ||
- (fop == GFS3_OP_FINODELK) ||
- (fop == GFS3_OP_ENTRYLK) ||
- (fop == GFS3_OP_FENTRYLK));
+ return ((fop == GFS3_OP_LK) || (fop == GFS3_OP_INODELK) ||
+ (fop == GFS3_OP_FINODELK) || (fop == GFS3_OP_ENTRYLK) ||
+ (fop == GFS3_OP_FENTRYLK));
}
-struct saved_frame *
-__saved_frames_put (struct saved_frames *frames, void *frame,
- struct rpc_req *rpcreq)
+static struct saved_frame *
+__saved_frames_put(struct saved_frames *frames, void *frame,
+ struct rpc_req *rpcreq)
{
- struct saved_frame *saved_frame = NULL;
+ struct saved_frame *saved_frame = mem_get(
+ rpcreq->conn->rpc_clnt->saved_frames_pool);
- saved_frame = mem_get (rpcreq->conn->rpc_clnt->saved_frames_pool);
- if (!saved_frame) {
- goto out;
- }
- /* THIS should be saved and set back */
+ if (!saved_frame) {
+ goto out;
+ }
+ /* THIS should be saved and set back */
- memset (saved_frame, 0, sizeof (*saved_frame));
- INIT_LIST_HEAD (&saved_frame->list);
+ INIT_LIST_HEAD(&saved_frame->list);
- saved_frame->capital_this = THIS;
- saved_frame->frame = frame;
- saved_frame->rpcreq = rpcreq;
- gettimeofday (&saved_frame->saved_at, NULL);
+ saved_frame->capital_this = THIS;
+ saved_frame->frame = frame;
+ saved_frame->rpcreq = rpcreq;
+ gettimeofday(&saved_frame->saved_at, NULL);
+ memset(&saved_frame->rsp, 0, sizeof(rpc_transport_rsp_t));
- if (_is_lock_fop (saved_frame))
- list_add_tail (&saved_frame->list, &frames->lk_sf.list);
- else
- list_add_tail (&saved_frame->list, &frames->sf.list);
+ if (_is_lock_fop(saved_frame))
+ list_add_tail(&saved_frame->list, &frames->lk_sf.list);
+ else
+ list_add_tail(&saved_frame->list, &frames->sf.list);
- frames->count++;
+ frames->count++;
out:
- return saved_frame;
+ return saved_frame;
}
-
-void
-saved_frames_delete (struct saved_frame *saved_frame,
- rpc_clnt_connection_t *conn)
-{
- GF_VALIDATE_OR_GOTO ("rpc-clnt", saved_frame, out);
- GF_VALIDATE_OR_GOTO ("rpc-clnt", conn, out);
-
- pthread_mutex_lock (&conn->lock);
- {
- list_del_init (&saved_frame->list);
- conn->saved_frames->count--;
- }
- pthread_mutex_unlock (&conn->lock);
-
- if (saved_frame->rpcreq != NULL) {
- rpc_clnt_reply_deinit (saved_frame->rpcreq,
- conn->rpc_clnt->reqpool);
- }
-
- mem_put (saved_frame);
-out:
- return;
-}
-
-
static void
-call_bail (void *data)
+call_bail(void *data)
{
- struct rpc_clnt *clnt = NULL;
- rpc_clnt_connection_t *conn = NULL;
- struct timeval current;
- struct list_head list;
- struct saved_frame *saved_frame = NULL;
- struct saved_frame *trav = NULL;
- struct saved_frame *tmp = NULL;
- char frame_sent[256] = {0,};
- struct timeval timeout = {0,};
- struct iovec iov = {0,};
-
- GF_VALIDATE_OR_GOTO ("client", data, out);
-
- clnt = data;
-
- conn = &clnt->conn;
-
- gettimeofday (&current, NULL);
- INIT_LIST_HEAD (&list);
-
- pthread_mutex_lock (&conn->lock);
- {
- /* Chaining to get call-always functionality from
- call-once timer */
- if (conn->timer) {
- timeout.tv_sec = 10;
- timeout.tv_usec = 0;
-
- gf_timer_call_cancel (clnt->ctx, conn->timer);
- conn->timer = gf_timer_call_after (clnt->ctx,
- timeout,
- call_bail,
- (void *) clnt);
-
- if (conn->timer == NULL) {
- gf_log (conn->trans->name, GF_LOG_WARNING,
- "Cannot create bailout timer");
- }
- }
-
- do {
- saved_frame =
- __saved_frames_get_timedout (conn->saved_frames,
- conn->frame_timeout,
- &current);
- if (saved_frame)
- list_add (&saved_frame->list, &list);
-
- } while (saved_frame);
- }
- pthread_mutex_unlock (&conn->lock);
-
- list_for_each_entry_safe (trav, tmp, &list, list) {
- gf_time_fmt (frame_sent, sizeof frame_sent,
- trav->saved_at.tv_sec, gf_timefmt_FT);
- snprintf (frame_sent + strlen (frame_sent),
- 256 - strlen (frame_sent),
- ".%"GF_PRI_SUSECONDS, trav->saved_at.tv_usec);
-
- gf_log (conn->trans->name, GF_LOG_ERROR,
- "bailing out frame type(%s) op(%s(%d)) xid = 0x%ux "
- "sent = %s. timeout = %d",
- trav->rpcreq->prog->progname,
- (trav->rpcreq->prog->procnames) ?
- trav->rpcreq->prog->procnames[trav->rpcreq->procnum] :
- "--",
- trav->rpcreq->procnum, trav->rpcreq->xid, frame_sent,
- conn->frame_timeout);
-
- clnt = rpc_clnt_ref (clnt);
- trav->rpcreq->rpc_status = -1;
- trav->rpcreq->cbkfn (trav->rpcreq, &iov, 1, trav->frame);
-
- rpc_clnt_reply_deinit (trav->rpcreq, clnt->reqpool);
- clnt = rpc_clnt_unref (clnt);
- list_del_init (&trav->list);
- mem_put (trav);
- }
+ rpc_transport_t *trans = NULL;
+ struct rpc_clnt *clnt = NULL;
+ rpc_clnt_connection_t *conn = NULL;
+ struct timeval current;
+ struct list_head list;
+ struct saved_frame *saved_frame = NULL;
+ struct saved_frame *trav = NULL;
+ struct saved_frame *tmp = NULL;
+ char frame_sent[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ struct timespec timeout = {
+ 0,
+ };
+ char peerid[UNIX_PATH_MAX] = {0};
+ gf_boolean_t need_unref = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("client", data, out);
+
+ clnt = data;
+
+ conn = &clnt->conn;
+ pthread_mutex_lock(&conn->lock);
+ {
+ trans = conn->trans;
+ if (trans) {
+ (void)snprintf(peerid, sizeof(peerid), "%s",
+ conn->trans->peerinfo.identifier);
+ }
+ }
+ pthread_mutex_unlock(&conn->lock);
+ /*rpc_clnt_connection_cleanup will be unwinding all saved frames,
+ * bailed or otherwise*/
+ if (!trans)
+ goto out;
+
+ gettimeofday(&current, NULL);
+ INIT_LIST_HEAD(&list);
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ /* Chaining to get call-always functionality from
+ call-once timer */
+ if (conn->timer) {
+ timeout.tv_sec = 10;
+ timeout.tv_nsec = 0;
+
+ /* Ref rpc as it's added to timer event queue */
+ rpc_clnt_ref(clnt);
+ gf_timer_call_cancel(clnt->ctx, conn->timer);
+ conn->timer = gf_timer_call_after(clnt->ctx, timeout, call_bail,
+ (void *)clnt);
+
+ if (conn->timer == NULL) {
+ gf_log(conn->name, GF_LOG_WARNING,
+ "Cannot create bailout timer for %s", peerid);
+ need_unref = _gf_true;
+ }
+ }
+
+ do {
+ saved_frame = __saved_frames_get_timedout(
+ conn->saved_frames, conn->frame_timeout, &current);
+ if (saved_frame)
+ list_add(&saved_frame->list, &list);
+
+ } while (saved_frame);
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ if (list_empty(&list))
+ goto out;
+
+ list_for_each_entry_safe(trav, tmp, &list, list)
+ {
+ gf_time_fmt_tv(frame_sent, sizeof frame_sent, &trav->saved_at,
+ gf_timefmt_FT);
+
+ gf_log(conn->name, GF_LOG_ERROR,
+ "bailing out frame type(%s), op(%s(%d)), xid = 0x%x, "
+ "unique = %" PRIu64 ", sent = %s, timeout = %d for %s",
+ trav->rpcreq->prog->progname,
+ (trav->rpcreq->prog->procnames)
+ ? trav->rpcreq->prog->procnames[trav->rpcreq->procnum]
+ : "--",
+ trav->rpcreq->procnum, trav->rpcreq->xid,
+ ((call_frame_t *)(trav->frame))->root->unique, frame_sent,
+ conn->frame_timeout, peerid);
+
+ clnt = rpc_clnt_ref(clnt);
+ trav->rpcreq->rpc_status = -1;
+ trav->rpcreq->cbkfn(trav->rpcreq, NULL, 0, trav->frame);
+
+ rpc_clnt_reply_deinit(trav->rpcreq, clnt->reqpool);
+ clnt = rpc_clnt_unref(clnt);
+ list_del_init(&trav->list);
+ mem_put(trav);
+ }
out:
- return;
+ rpc_clnt_unref(clnt);
+ if (need_unref)
+ rpc_clnt_unref(clnt);
+ return;
}
-
/* to be called with conn->lock held */
-struct saved_frame *
-__save_frame (struct rpc_clnt *rpc_clnt, call_frame_t *frame,
- struct rpc_req *rpcreq)
+static struct saved_frame *
+__save_frame(struct rpc_clnt *rpc_clnt, call_frame_t *frame,
+ struct rpc_req *rpcreq)
{
- rpc_clnt_connection_t *conn = NULL;
- struct timeval timeout = {0, };
- struct saved_frame *saved_frame = NULL;
-
- conn = &rpc_clnt->conn;
-
- saved_frame = __saved_frames_put (conn->saved_frames, frame, rpcreq);
-
- if (saved_frame == NULL) {
- goto out;
- }
-
- /* TODO: make timeout configurable */
- if (conn->timer == NULL) {
- timeout.tv_sec = 10;
- timeout.tv_usec = 0;
- conn->timer = gf_timer_call_after (rpc_clnt->ctx,
- timeout,
- call_bail,
- (void *) rpc_clnt);
- }
+ rpc_clnt_connection_t *conn = &rpc_clnt->conn;
+ struct timespec timeout = {
+ 0,
+ };
+ struct saved_frame *saved_frame = __saved_frames_put(conn->saved_frames,
+ frame, rpcreq);
+
+ if (saved_frame == NULL) {
+ goto out;
+ }
+
+ /* TODO: make timeout configurable */
+ if (conn->timer == NULL) {
+ timeout.tv_sec = 10;
+ timeout.tv_nsec = 0;
+ rpc_clnt_ref(rpc_clnt);
+ conn->timer = gf_timer_call_after(rpc_clnt->ctx, timeout, call_bail,
+ (void *)rpc_clnt);
+ }
out:
- return saved_frame;
+ return saved_frame;
}
-
struct saved_frames *
-saved_frames_new (void)
+saved_frames_new(void)
{
- struct saved_frames *saved_frames = NULL;
+ struct saved_frames *saved_frames = NULL;
- saved_frames = GF_CALLOC (1, sizeof (*saved_frames),
- gf_common_mt_rpcclnt_savedframe_t);
- if (!saved_frames) {
- return NULL;
- }
+ saved_frames = GF_CALLOC(1, sizeof(*saved_frames),
+ gf_common_mt_rpcclnt_savedframe_t);
+ if (!saved_frames) {
+ return NULL;
+ }
- INIT_LIST_HEAD (&saved_frames->sf.list);
- INIT_LIST_HEAD (&saved_frames->lk_sf.list);
+ INIT_LIST_HEAD(&saved_frames->sf.list);
+ INIT_LIST_HEAD(&saved_frames->lk_sf.list);
- return saved_frames;
+ return saved_frames;
}
-
int
-__saved_frame_copy (struct saved_frames *frames, int64_t callid,
- struct saved_frame *saved_frame)
+__saved_frame_copy(struct saved_frames *frames, int64_t callid,
+ struct saved_frame *saved_frame)
{
- struct saved_frame *tmp = NULL;
- int ret = -1;
+ struct saved_frame *tmp = NULL;
+ int ret = -1;
+
+ if (!saved_frame) {
+ ret = 0;
+ goto out;
+ }
- if (!saved_frame) {
- ret = 0;
- goto out;
+ list_for_each_entry(tmp, &frames->sf.list, list)
+ {
+ if (tmp->rpcreq->xid == callid) {
+ *saved_frame = *tmp;
+ ret = 0;
+ goto out;
}
+ }
- list_for_each_entry (tmp, &frames->sf.list, list) {
- if (tmp->rpcreq->xid == callid) {
- *saved_frame = *tmp;
- ret = 0;
- goto out;
- }
- }
-
- list_for_each_entry (tmp, &frames->lk_sf.list, list) {
- if (tmp->rpcreq->xid == callid) {
- *saved_frame = *tmp;
- ret = 0;
- goto out;
- }
- }
+ list_for_each_entry(tmp, &frames->lk_sf.list, list)
+ {
+ if (tmp->rpcreq->xid == callid) {
+ *saved_frame = *tmp;
+ ret = 0;
+ goto out;
+ }
+ }
out:
- return ret;
+ return ret;
}
-
struct saved_frame *
-__saved_frame_get (struct saved_frames *frames, int64_t callid)
+__saved_frame_get(struct saved_frames *frames, int64_t callid)
{
- struct saved_frame *saved_frame = NULL;
- struct saved_frame *tmp = NULL;
-
- list_for_each_entry (tmp, &frames->sf.list, list) {
- if (tmp->rpcreq->xid == callid) {
- list_del_init (&tmp->list);
- frames->count--;
- saved_frame = tmp;
- goto out;
- }
- }
-
- list_for_each_entry (tmp, &frames->lk_sf.list, list) {
- if (tmp->rpcreq->xid == callid) {
- list_del_init (&tmp->list);
- frames->count--;
- saved_frame = tmp;
- goto out;
- }
- }
+ struct saved_frame *saved_frame = NULL;
+ struct saved_frame *tmp = NULL;
-out:
- if (saved_frame) {
- THIS = saved_frame->capital_this;
+ list_for_each_entry(tmp, &frames->sf.list, list)
+ {
+ if (tmp->rpcreq->xid == callid) {
+ list_del_init(&tmp->list);
+ frames->count--;
+ saved_frame = tmp;
+ goto out;
}
+ }
- return saved_frame;
-}
+ list_for_each_entry(tmp, &frames->lk_sf.list, list)
+ {
+ if (tmp->rpcreq->xid == callid) {
+ list_del_init(&tmp->list);
+ frames->count--;
+ saved_frame = tmp;
+ goto out;
+ }
+ }
+out:
+ if (saved_frame) {
+ THIS = saved_frame->capital_this;
+ }
+
+ return saved_frame;
+}
void
-saved_frames_unwind (struct saved_frames *saved_frames)
+saved_frames_unwind(struct saved_frames *saved_frames)
{
- struct rpc_clnt *clnt = NULL;
- struct saved_frame *trav = NULL;
- struct saved_frame *tmp = NULL;
- char timestr[1024] = {0,};
-
- struct iovec iov = {0,};
-
- list_splice_init (&saved_frames->lk_sf.list, &saved_frames->sf.list);
-
- list_for_each_entry_safe (trav, tmp, &saved_frames->sf.list, list) {
- gf_time_fmt (timestr, sizeof timestr,
- trav->saved_at.tv_sec, gf_timefmt_FT);
- snprintf (timestr + strlen (timestr),
- sizeof(timestr) - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, trav->saved_at.tv_usec);
-
- if (!trav->rpcreq || !trav->rpcreq->prog)
- continue;
-
- gf_log_callingfn (trav->rpcreq->conn->trans->name,
- GF_LOG_ERROR,
- "forced unwinding frame type(%s) op(%s(%d)) "
- "called at %s (xid=0x%ux)",
- trav->rpcreq->prog->progname,
- ((trav->rpcreq->prog->procnames) ?
- trav->rpcreq->prog->procnames[trav->rpcreq->procnum]
- : "--"),
- trav->rpcreq->procnum, timestr,
- trav->rpcreq->xid);
- saved_frames->count--;
-
- clnt = rpc_clnt_ref (trav->rpcreq->conn->rpc_clnt);
- trav->rpcreq->rpc_status = -1;
- trav->rpcreq->cbkfn (trav->rpcreq, &iov, 1, trav->frame);
-
- rpc_clnt_reply_deinit (trav->rpcreq,
- trav->rpcreq->conn->rpc_clnt->reqpool);
-
- clnt = rpc_clnt_unref (clnt);
- list_del_init (&trav->list);
- mem_put (trav);
- }
+ struct saved_frame *trav = NULL;
+ struct saved_frame *tmp = NULL;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+
+ list_splice_init(&saved_frames->lk_sf.list, &saved_frames->sf.list);
+
+ list_for_each_entry_safe(trav, tmp, &saved_frames->sf.list, list)
+ {
+ gf_time_fmt_tv(timestr, sizeof timestr, &trav->saved_at, gf_timefmt_FT);
+
+ if (!trav->rpcreq || !trav->rpcreq->prog)
+ continue;
+
+ gf_log_callingfn(
+ trav->rpcreq->conn->name, GF_LOG_ERROR,
+ "forced unwinding frame type(%s) op(%s(%d)) "
+ "called at %s (xid=0x%x)",
+ trav->rpcreq->prog->progname,
+ ((trav->rpcreq->prog->procnames)
+ ? trav->rpcreq->prog->procnames[trav->rpcreq->procnum]
+ : "--"),
+ trav->rpcreq->procnum, timestr, trav->rpcreq->xid);
+ saved_frames->count--;
+
+ trav->rpcreq->rpc_status = -1;
+ trav->rpcreq->cbkfn(trav->rpcreq, NULL, 0, trav->frame);
+
+ rpc_clnt_reply_deinit(trav->rpcreq,
+ trav->rpcreq->conn->rpc_clnt->reqpool);
+
+ list_del_init(&trav->list);
+ mem_put(trav);
+ }
}
-
void
-saved_frames_destroy (struct saved_frames *frames)
+saved_frames_destroy(struct saved_frames *frames)
{
- if (!frames)
- return;
+ if (!frames)
+ return;
- saved_frames_unwind (frames);
+ saved_frames_unwind(frames);
- GF_FREE (frames);
+ GF_FREE(frames);
}
-
void
-rpc_clnt_reconnect (void *trans_ptr)
+rpc_clnt_reconnect(void *conn_ptr)
{
- rpc_transport_t *trans = NULL;
- rpc_clnt_connection_t *conn = NULL;
- struct timeval tv = {0, 0};
- int32_t ret = 0;
- struct rpc_clnt *clnt = NULL;
-
- trans = trans_ptr;
- if (!trans || !trans->mydata)
- return;
-
- conn = trans->mydata;
- clnt = conn->rpc_clnt;
-
- pthread_mutex_lock (&conn->lock);
- {
- if (conn->reconnect)
- gf_timer_call_cancel (clnt->ctx,
- conn->reconnect);
- conn->reconnect = 0;
-
- if (conn->connected == 0) {
- tv.tv_sec = 3;
-
- gf_log (trans->name, GF_LOG_TRACE,
- "attempting reconnect");
- ret = rpc_transport_connect (trans,
- conn->config.remote_port);
- /* Every time there is a disconnection, processes
- should try to connect to 'glusterd' (ie, default
- port) or whichever port given as 'option remote-port'
- in volume file. */
- /* Below code makes sure the (re-)configured port lasts
- for just one successful attempt */
- if (!ret)
- conn->config.remote_port = 0;
-
- conn->reconnect =
- gf_timer_call_after (clnt->ctx, tv,
- rpc_clnt_reconnect,
- trans);
- } else {
- gf_log (trans->name, GF_LOG_TRACE,
- "breaking reconnect chain");
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- if ((ret == -1) && (errno != EINPROGRESS) && (clnt->notifyfn)) {
- clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_DISCONNECT, NULL);
- }
-
- return;
+ rpc_transport_t *trans = NULL;
+ rpc_clnt_connection_t *conn = NULL;
+ struct timespec ts = {0, 0};
+ struct rpc_clnt *clnt = NULL;
+ gf_boolean_t need_unref = _gf_false;
+ gf_boolean_t canceled_unref = _gf_false;
+
+ conn = conn_ptr;
+ clnt = conn->rpc_clnt;
+ pthread_mutex_lock(&conn->lock);
+ {
+ trans = conn->trans;
+ if (!trans)
+ goto out_unlock;
+
+ if (conn->reconnect) {
+ if (!gf_timer_call_cancel(clnt->ctx, conn->reconnect))
+ canceled_unref = _gf_true;
+ }
+ conn->reconnect = 0;
+
+ if ((conn->connected == 0) && !clnt->disabled) {
+ ts.tv_sec = 3;
+ ts.tv_nsec = 0;
+
+ gf_log(conn->name, GF_LOG_TRACE, "attempting reconnect");
+ (void)rpc_transport_connect(trans, conn->config.remote_port);
+ rpc_clnt_ref(clnt);
+ conn->reconnect = gf_timer_call_after(clnt->ctx, ts,
+ rpc_clnt_reconnect, conn);
+ if (!conn->reconnect) {
+ need_unref = _gf_true;
+ gf_log(conn->name, GF_LOG_ERROR,
+ "Error adding to timer event queue");
+ }
+ } else {
+ gf_log(conn->name, GF_LOG_TRACE, "breaking reconnect chain");
+ }
+ }
+out_unlock:
+ pthread_mutex_unlock(&conn->lock);
+
+ rpc_clnt_unref(clnt);
+ if (need_unref)
+ rpc_clnt_unref(clnt);
+ if (canceled_unref)
+ rpc_clnt_unref(clnt);
+ return;
}
-
int
-rpc_clnt_fill_request_info (struct rpc_clnt *clnt, rpc_request_info_t *info)
+rpc_clnt_fill_request_info(struct rpc_clnt *clnt, rpc_request_info_t *info)
{
- struct saved_frame saved_frame = {{}, 0};
- int ret = -1;
-
- pthread_mutex_lock (&clnt->conn.lock);
- {
- ret = __saved_frame_copy (clnt->conn.saved_frames, info->xid,
- &saved_frame);
- }
- pthread_mutex_unlock (&clnt->conn.lock);
-
- if (ret == -1) {
- gf_log (clnt->conn.trans->name, GF_LOG_CRITICAL,
- "cannot lookup the saved "
- "frame corresponding to xid (%d)", info->xid);
- goto out;
- }
-
- info->prognum = saved_frame.rpcreq->prog->prognum;
- info->procnum = saved_frame.rpcreq->procnum;
- info->progver = saved_frame.rpcreq->prog->progver;
- info->rpc_req = saved_frame.rpcreq;
- info->rsp = saved_frame.rsp;
-
- ret = 0;
+ struct saved_frame saved_frame;
+ int ret = -1;
+
+ pthread_mutex_lock(&clnt->conn.lock);
+ {
+ ret = __saved_frame_copy(clnt->conn.saved_frames, info->xid,
+ &saved_frame);
+ }
+ pthread_mutex_unlock(&clnt->conn.lock);
+
+ if (ret == -1) {
+ gf_log(clnt->conn.name, GF_LOG_CRITICAL,
+ "cannot lookup the saved "
+ "frame corresponding to xid (%d)",
+ info->xid);
+ goto out;
+ }
+
+ info->prognum = saved_frame.rpcreq->prog->prognum;
+ info->procnum = saved_frame.rpcreq->procnum;
+ info->progver = saved_frame.rpcreq->prog->progver;
+ info->rpc_req = saved_frame.rpcreq;
+ info->rsp = saved_frame.rsp;
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-rpc_clnt_reconnect_cleanup (rpc_clnt_connection_t *conn)
+rpc_clnt_reconnect_cleanup(rpc_clnt_connection_t *conn)
{
- struct rpc_clnt *clnt = NULL;
+ struct rpc_clnt *clnt = NULL;
+ int ret = 0;
+ gf_boolean_t reconnect_unref = _gf_false;
- if (!conn) {
- goto out;
- }
-
- clnt = conn->rpc_clnt;
-
- pthread_mutex_lock (&conn->lock);
- {
+ if (!conn) {
+ goto out;
+ }
- if (conn->reconnect) {
- gf_timer_call_cancel (clnt->ctx, conn->reconnect);
- conn->reconnect = NULL;
- }
+ clnt = conn->rpc_clnt;
+ pthread_mutex_lock(&conn->lock);
+ {
+ if (conn->reconnect) {
+ ret = gf_timer_call_cancel(clnt->ctx, conn->reconnect);
+ if (!ret) {
+ reconnect_unref = _gf_true;
+ conn->cleanup_gen++;
+ }
+ conn->reconnect = NULL;
}
- pthread_mutex_unlock (&conn->lock);
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ if (reconnect_unref)
+ rpc_clnt_unref(clnt);
out:
- return 0;
+ return 0;
}
/*
@@ -517,45 +483,62 @@ out:
*
*/
int
-rpc_clnt_connection_cleanup (rpc_clnt_connection_t *conn)
+rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn)
{
- struct saved_frames *saved_frames = NULL;
- struct rpc_clnt *clnt = NULL;
-
- if (!conn) {
- goto out;
- }
-
- clnt = conn->rpc_clnt;
-
- gf_log (conn->trans->name, GF_LOG_TRACE,
- "cleaning up state in transport object %p", conn->trans);
-
- pthread_mutex_lock (&conn->lock);
- {
- saved_frames = conn->saved_frames;
- conn->saved_frames = saved_frames_new ();
-
- /* bailout logic cleanup */
- if (conn->timer) {
- gf_timer_call_cancel (clnt->ctx, conn->timer);
- conn->timer = NULL;
- }
-
- conn->connected = 0;
-
- if (conn->ping_timer) {
- gf_timer_call_cancel (clnt->ctx, conn->ping_timer);
- conn->ping_timer = NULL;
- conn->ping_started = 0;
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- saved_frames_destroy (saved_frames);
-
+ struct saved_frames *saved_frames = NULL;
+ struct rpc_clnt *clnt = NULL;
+ int unref = 0;
+ int ret = 0;
+ gf_boolean_t timer_unref = _gf_false;
+ gf_boolean_t reconnect_unref = _gf_false;
+
+ if (!conn) {
+ goto out;
+ }
+
+ clnt = conn->rpc_clnt;
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ saved_frames = conn->saved_frames;
+ conn->saved_frames = saved_frames_new();
+
+ /* bailout logic cleanup */
+ if (conn->timer) {
+ ret = gf_timer_call_cancel(clnt->ctx, conn->timer);
+ if (!ret)
+ timer_unref = _gf_true;
+ conn->timer = NULL;
+ }
+ if (conn->reconnect) {
+ ret = gf_timer_call_cancel(clnt->ctx, conn->reconnect);
+ if (!ret)
+ reconnect_unref = _gf_true;
+ conn->reconnect = NULL;
+ }
+
+ conn->connected = 0;
+ conn->disconnected = 1;
+
+ unref = rpc_clnt_remove_ping_timer_locked(clnt);
+ /*reset rpc msgs stats*/
+ conn->pingcnt = 0;
+ conn->msgcnt = 0;
+ conn->cleanup_gen++;
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ saved_frames_destroy(saved_frames);
+ if (unref)
+ rpc_clnt_unref(clnt);
+
+ if (timer_unref)
+ rpc_clnt_unref(clnt);
+
+ if (reconnect_unref)
+ rpc_clnt_unref(clnt);
out:
- return 0;
+ return 0;
}
/*
@@ -567,1162 +550,1430 @@ out:
*/
static struct saved_frame *
-lookup_frame (rpc_clnt_connection_t *conn, int64_t callid)
+lookup_frame(rpc_clnt_connection_t *conn, int64_t callid)
{
- struct saved_frame *frame = NULL;
+ struct saved_frame *frame = NULL;
- pthread_mutex_lock (&conn->lock);
- {
- frame = __saved_frame_get (conn->saved_frames, callid);
- }
- pthread_mutex_unlock (&conn->lock);
+ pthread_mutex_lock(&conn->lock);
+ {
+ frame = __saved_frame_get(conn->saved_frames, callid);
+ }
+ pthread_mutex_unlock(&conn->lock);
- return frame;
+ return frame;
}
-
int
-rpc_clnt_reply_fill (rpc_transport_pollin_t *msg,
- rpc_clnt_connection_t *conn,
- struct rpc_msg *replymsg, struct iovec progmsg,
- struct rpc_req *req,
- struct saved_frame *saved_frame)
+rpc_clnt_reply_fill(rpc_transport_pollin_t *msg, rpc_clnt_connection_t *conn,
+ struct rpc_msg *replymsg, struct iovec progmsg,
+ struct rpc_req *req, struct saved_frame *saved_frame)
{
- int ret = -1;
-
- if ((!conn) || (!replymsg)|| (!req) || (!saved_frame) || (!msg)) {
- goto out;
- }
-
- req->rpc_status = 0;
- if ((rpc_reply_status (replymsg) == MSG_DENIED)
- || (rpc_accepted_reply_status (replymsg) != SUCCESS)) {
- req->rpc_status = -1;
- }
-
- req->rsp[0] = progmsg;
- req->rsp_iobref = iobref_ref (msg->iobref);
-
- if (msg->vectored) {
- req->rsp[1] = msg->vector[1];
- req->rspcnt = 2;
- } else {
- req->rspcnt = 1;
- }
-
- /* By this time, the data bytes for the auth scheme would have already
- * been copied into the required sections of the req structure,
- * we just need to fill in the meta-data about it now.
+ int ret = -1;
+
+ if ((!conn) || (!replymsg) || (!req) || (!saved_frame) || (!msg)) {
+ goto out;
+ }
+
+ req->rpc_status = 0;
+ if ((rpc_reply_status(replymsg) == MSG_DENIED) ||
+ (rpc_accepted_reply_status(replymsg) != SUCCESS)) {
+ req->rpc_status = -1;
+ }
+
+ req->rsp[0] = progmsg;
+ req->rsp_iobref = iobref_ref(msg->iobref);
+
+ if (msg->vectored) {
+ req->rsp[1] = msg->vector[1];
+ req->rspcnt = 2;
+ } else {
+ req->rspcnt = 1;
+ }
+
+ /* By this time, the data bytes for the auth scheme would have already
+ * been copied into the required sections of the req structure,
+ * we just need to fill in the meta-data about it now.
+ */
+ if (req->rpc_status == 0) {
+ /*
+ * req->verf.flavour = rpc_reply_verf_flavour (replymsg);
+ * req->verf.datalen = rpc_reply_verf_len (replymsg);
*/
- if (req->rpc_status == 0) {
- /*
- * req->verf.flavour = rpc_reply_verf_flavour (replymsg);
- * req->verf.datalen = rpc_reply_verf_len (replymsg);
- */
- }
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
void
-rpc_clnt_reply_deinit (struct rpc_req *req, struct mem_pool *pool)
+rpc_clnt_reply_deinit(struct rpc_req *req, struct mem_pool *pool)
{
- if (!req) {
- goto out;
- }
+ if (!req) {
+ goto out;
+ }
- if (req->rsp_iobref) {
- iobref_unref (req->rsp_iobref);
- }
+ if (req->rsp_iobref) {
+ iobref_unref(req->rsp_iobref);
+ }
- mem_put (req);
+ mem_put(req);
out:
- return;
+ return;
}
-
/* TODO: use mem-pool for allocating requests */
int
-rpc_clnt_reply_init (rpc_clnt_connection_t *conn, rpc_transport_pollin_t *msg,
- struct rpc_req *req, struct saved_frame *saved_frame)
+rpc_clnt_reply_init(rpc_clnt_connection_t *conn, rpc_transport_pollin_t *msg,
+ struct rpc_req *req, struct saved_frame *saved_frame)
{
- char *msgbuf = NULL;
- struct rpc_msg rpcmsg;
- struct iovec progmsg; /* RPC Program payload */
- size_t msglen = 0;
- int ret = -1;
-
- msgbuf = msg->vector[0].iov_base;
- msglen = msg->vector[0].iov_len;
-
- ret = xdr_to_rpc_reply (msgbuf, msglen, &rpcmsg, &progmsg,
- req->verf.authdata);
- if (ret != 0) {
- gf_log (conn->trans->name, GF_LOG_WARNING,
- "RPC reply decoding failed");
- goto out;
- }
-
- ret = rpc_clnt_reply_fill (msg, conn, &rpcmsg, progmsg, req,
- saved_frame);
- if (ret != 0) {
- goto out;
- }
-
- gf_log (conn->trans->name, GF_LOG_TRACE,
- "received rpc message (RPC XID: 0x%ux"
- " Program: %s, ProgVers: %d, Proc: %d) from rpc-transport (%s)",
- saved_frame->rpcreq->xid,
- saved_frame->rpcreq->prog->progname,
- saved_frame->rpcreq->prog->progver,
- saved_frame->rpcreq->procnum, conn->trans->name);
+ char *msgbuf = NULL;
+ struct rpc_msg rpcmsg;
+ struct iovec progmsg; /* RPC Program payload */
+ size_t msglen = 0;
+ int ret = -1;
+
+ msgbuf = msg->vector[0].iov_base;
+ msglen = msg->vector[0].iov_len;
+
+ ret = xdr_to_rpc_reply(msgbuf, msglen, &rpcmsg, &progmsg,
+ req->verf.authdata);
+ if (ret != 0) {
+ gf_log(conn->name, GF_LOG_WARNING, "RPC reply decoding failed");
+ goto out;
+ }
+
+ ret = rpc_clnt_reply_fill(msg, conn, &rpcmsg, progmsg, req, saved_frame);
+ if (ret != 0) {
+ goto out;
+ }
+
+ gf_log(conn->name, GF_LOG_TRACE,
+ "received rpc message (RPC XID: 0x%x"
+ " Program: %s, ProgVers: %d, Proc: %d) from rpc-transport (%s)",
+ saved_frame->rpcreq->xid, saved_frame->rpcreq->prog->progname,
+ saved_frame->rpcreq->prog->progver, saved_frame->rpcreq->procnum,
+ conn->name);
out:
- if (ret != 0) {
- req->rpc_status = -1;
- }
+ if (ret != 0) {
+ req->rpc_status = -1;
+ }
- return ret;
+ return ret;
}
int
-rpc_clnt_handle_cbk (struct rpc_clnt *clnt, rpc_transport_pollin_t *msg)
+rpc_clnt_handle_cbk(struct rpc_clnt *clnt, rpc_transport_pollin_t *msg)
{
- char *msgbuf = NULL;
- rpcclnt_cb_program_t *program = NULL;
- struct rpc_msg rpcmsg;
- struct iovec progmsg; /* RPC Program payload */
- size_t msglen = 0;
- int found = 0;
- int ret = -1;
- int procnum = 0;
-
- msgbuf = msg->vector[0].iov_base;
- msglen = msg->vector[0].iov_len;
-
- clnt = rpc_clnt_ref (clnt);
- ret = xdr_to_rpc_call (msgbuf, msglen, &rpcmsg, &progmsg, NULL,NULL);
- if (ret == -1) {
- gf_log (clnt->conn.trans->name, GF_LOG_WARNING,
- "RPC call decoding failed");
- goto out;
- }
-
- gf_log (clnt->conn.trans->name, GF_LOG_TRACE,
- "received rpc message (XID: 0x%lx, "
- "Ver: %ld, Program: %ld, ProgVers: %ld, Proc: %ld) "
- "from rpc-transport (%s)", rpc_call_xid (&rpcmsg),
- rpc_call_rpcvers (&rpcmsg), rpc_call_program (&rpcmsg),
- rpc_call_progver (&rpcmsg), rpc_call_progproc (&rpcmsg),
- clnt->conn.trans->name);
-
- procnum = rpc_call_progproc (&rpcmsg);
-
- pthread_mutex_lock (&clnt->lock);
+ char *msgbuf = NULL;
+ rpcclnt_cb_program_t *program = NULL;
+ struct rpc_msg rpcmsg;
+ struct iovec progmsg; /* RPC Program payload */
+ size_t msglen = 0;
+ int found = 0;
+ int ret = -1;
+ int procnum = 0;
+
+ msgbuf = msg->vector[0].iov_base;
+ msglen = msg->vector[0].iov_len;
+
+ clnt = rpc_clnt_ref(clnt);
+ ret = xdr_to_rpc_call(msgbuf, msglen, &rpcmsg, &progmsg, NULL, NULL);
+ if (ret == -1) {
+ gf_log(clnt->conn.name, GF_LOG_WARNING, "RPC call decoding failed");
+ goto out;
+ }
+
+ gf_log(clnt->conn.name, GF_LOG_TRACE,
+ "receivd rpc message (XID: 0x%" GF_PRI_RPC_XID
+ ", "
+ "Ver: %" GF_PRI_RPC_VERSION ", Program: %" GF_PRI_RPC_PROG_ID
+ ", "
+ "ProgVers: %" GF_PRI_RPC_PROG_VERS ", Proc: %" GF_PRI_RPC_PROC
+ ") "
+ "from rpc-transport (%s)",
+ rpc_call_xid(&rpcmsg), rpc_call_rpcvers(&rpcmsg),
+ rpc_call_program(&rpcmsg), rpc_call_progver(&rpcmsg),
+ rpc_call_progproc(&rpcmsg), clnt->conn.name);
+
+ procnum = rpc_call_progproc(&rpcmsg);
+
+ pthread_mutex_lock(&clnt->lock);
+ {
+ list_for_each_entry(program, &clnt->programs, program)
{
- list_for_each_entry (program, &clnt->programs, program) {
- if ((program->prognum == rpc_call_program (&rpcmsg))
- && (program->progver
- == rpc_call_progver (&rpcmsg))) {
- found = 1;
- break;
- }
- }
+ if ((program->prognum == rpc_call_program(&rpcmsg)) &&
+ (program->progver == rpc_call_progver(&rpcmsg))) {
+ found = 1;
+ break;
+ }
}
- pthread_mutex_unlock (&clnt->lock);
+ }
+ pthread_mutex_unlock(&clnt->lock);
- if (found && (procnum < program->numactors) &&
- (program->actors[procnum].actor)) {
- program->actors[procnum].actor (&progmsg);
- }
+ if (found && (procnum < program->numactors) &&
+ (program->actors[procnum].actor)) {
+ program->actors[procnum].actor(clnt, program->mydata, &progmsg);
+ }
out:
- clnt = rpc_clnt_unref (clnt);
- return ret;
+ rpc_clnt_unref(clnt);
+ return ret;
}
int
-rpc_clnt_handle_reply (struct rpc_clnt *clnt, rpc_transport_pollin_t *pollin)
+rpc_clnt_handle_reply(struct rpc_clnt *clnt, rpc_transport_pollin_t *pollin)
{
- rpc_clnt_connection_t *conn = NULL;
- struct saved_frame *saved_frame = NULL;
- int ret = -1;
- struct rpc_req *req = NULL;
- uint32_t xid = 0;
-
- clnt = rpc_clnt_ref (clnt);
- conn = &clnt->conn;
-
- xid = ntoh32 (*((uint32_t *)pollin->vector[0].iov_base));
- saved_frame = lookup_frame (conn, xid);
- if (saved_frame == NULL) {
- gf_log (conn->trans->name, GF_LOG_ERROR,
- "cannot lookup the saved frame for reply with xid (%u)",
- xid);
- goto out;
- }
-
- req = saved_frame->rpcreq;
- if (req == NULL) {
- gf_log (conn->trans->name, GF_LOG_ERROR,
- "no request with frame for xid (%u)", xid);
- goto out;
- }
-
- ret = rpc_clnt_reply_init (conn, pollin, req, saved_frame);
- if (ret != 0) {
- req->rpc_status = -1;
- gf_log (conn->trans->name, GF_LOG_WARNING,
- "initialising rpc reply failed");
- }
-
- req->cbkfn (req, req->rsp, req->rspcnt, saved_frame->frame);
-
- if (req) {
- rpc_clnt_reply_deinit (req, conn->rpc_clnt->reqpool);
- }
+ rpc_clnt_connection_t *conn = NULL;
+ struct saved_frame *saved_frame = NULL;
+ int ret = -1;
+ struct rpc_req *req = NULL;
+ uint32_t xid = 0;
+
+ clnt = rpc_clnt_ref(clnt);
+ conn = &clnt->conn;
+
+ xid = ntoh32(*((uint32_t *)pollin->vector[0].iov_base));
+ saved_frame = lookup_frame(conn, xid);
+ if (saved_frame == NULL) {
+ gf_log(conn->name, GF_LOG_ERROR,
+ "cannot lookup the saved frame for reply with xid (%u)", xid);
+ goto out;
+ }
+
+ req = saved_frame->rpcreq;
+ if (req == NULL) {
+ gf_log(conn->name, GF_LOG_ERROR, "no request with frame for xid (%u)",
+ xid);
+ goto out;
+ }
+
+ ret = rpc_clnt_reply_init(conn, pollin, req, saved_frame);
+ if (ret != 0) {
+ req->rpc_status = -1;
+ gf_log(conn->name, GF_LOG_WARNING, "initialising rpc reply failed");
+ }
+
+ req->cbkfn(req, req->rsp, req->rspcnt, saved_frame->frame);
+
+ if (req) {
+ rpc_clnt_reply_deinit(req, conn->rpc_clnt->reqpool);
+ }
out:
- if (saved_frame) {
- mem_put (saved_frame);
- }
+ if (saved_frame) {
+ mem_put(saved_frame);
+ }
- clnt = rpc_clnt_unref (clnt);
- return ret;
+ rpc_clnt_unref(clnt);
+ return ret;
}
-
-inline void
-rpc_clnt_set_connected (rpc_clnt_connection_t *conn)
+gf_boolean_t
+is_rpc_clnt_disconnected(rpc_clnt_connection_t *conn)
{
- if (!conn) {
- goto out;
- }
+ gf_boolean_t disconnected = _gf_true;
- pthread_mutex_lock (&conn->lock);
- {
- conn->connected = 1;
- }
- pthread_mutex_unlock (&conn->lock);
+ if (!conn)
+ return disconnected;
-out:
- return;
+ pthread_mutex_lock(&conn->lock);
+ {
+ disconnected = conn->disconnected;
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ return disconnected;
}
+static void
+rpc_clnt_destroy(struct rpc_clnt *rpc);
-void
-rpc_clnt_unset_connected (rpc_clnt_connection_t *conn)
-{
- if (!conn) {
- goto out;
- }
+#define RPC_THIS_SAVE(xl) \
+ do { \
+ old_THIS = THIS; \
+ if (!old_THIS) \
+ gf_log_callingfn("rpc", GF_LOG_CRITICAL, \
+ "THIS is not initialised."); \
+ THIS = xl; \
+ } while (0)
- pthread_mutex_lock (&conn->lock);
- {
- conn->connected = 0;
- }
- pthread_mutex_unlock (&conn->lock);
+#define RPC_THIS_RESTORE (THIS = old_THIS)
-out:
- return;
+static int
+rpc_clnt_handle_disconnect(struct rpc_clnt *clnt, rpc_clnt_connection_t *conn)
+{
+ struct timespec ts = {
+ 0,
+ };
+ gf_boolean_t unref_clnt = _gf_false;
+ uint64_t pre_notify_gen = 0, post_notify_gen = 0;
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ pre_notify_gen = conn->cleanup_gen;
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ if (clnt->notifyfn)
+ clnt->notifyfn(clnt, clnt->mydata, RPC_CLNT_DISCONNECT, NULL);
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ post_notify_gen = conn->cleanup_gen;
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ if (pre_notify_gen == post_notify_gen) {
+ /* program didn't invoke cleanup, so rpc has to do it */
+ rpc_clnt_connection_cleanup(conn);
+ }
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ if (!conn->rpc_clnt->disabled && (conn->reconnect == NULL)) {
+ ts.tv_sec = 3;
+ ts.tv_nsec = 0;
+
+ rpc_clnt_ref(clnt);
+ conn->reconnect = gf_timer_call_after(clnt->ctx, ts,
+ rpc_clnt_reconnect, conn);
+ if (conn->reconnect == NULL) {
+ gf_log(conn->name, GF_LOG_WARNING,
+ "Cannot create rpc_clnt_reconnect timer");
+ unref_clnt = _gf_true;
+ }
+ }
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ if (unref_clnt)
+ rpc_clnt_unref(clnt);
+
+ return 0;
}
int
-rpc_clnt_notify (rpc_transport_t *trans, void *mydata,
- rpc_transport_event_t event, void *data, ...)
+rpc_clnt_notify(rpc_transport_t *trans, void *mydata,
+ rpc_transport_event_t event, void *data, ...)
{
- rpc_clnt_connection_t *conn = NULL;
- struct rpc_clnt *clnt = NULL;
- int ret = -1;
- rpc_request_info_t *req_info = NULL;
- rpc_transport_pollin_t *pollin = NULL;
- struct timeval tv = {0, };
-
- conn = mydata;
- if (conn == NULL) {
- goto out;
- }
- clnt = conn->rpc_clnt;
- if (!clnt)
- goto out;
-
- switch (event) {
- case RPC_TRANSPORT_DISCONNECT:
- {
- rpc_clnt_connection_cleanup (conn);
-
- pthread_mutex_lock (&conn->lock);
- {
- if (!conn->rpc_clnt->disabled
- && (conn->reconnect == NULL)) {
- tv.tv_sec = 10;
-
- conn->reconnect =
- gf_timer_call_after (clnt->ctx, tv,
- rpc_clnt_reconnect,
- conn->trans);
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
- if (clnt->notifyfn)
- ret = clnt->notifyfn (clnt, clnt->mydata,
- RPC_CLNT_DISCONNECT, NULL);
- break;
+ rpc_clnt_connection_t *conn = NULL;
+ struct rpc_clnt *clnt = NULL;
+ int ret = -1;
+ rpc_request_info_t *req_info = NULL;
+ rpc_transport_pollin_t *pollin = NULL;
+ void *clnt_mydata = NULL;
+ DECLARE_OLD_THIS;
+
+ conn = mydata;
+ if (conn == NULL) {
+ goto out;
+ }
+ clnt = conn->rpc_clnt;
+ if (!clnt)
+ goto out;
+
+ RPC_THIS_SAVE(clnt->owner);
+
+ switch (event) {
+ case RPC_TRANSPORT_DISCONNECT: {
+ rpc_clnt_handle_disconnect(clnt, conn);
+ /* The auth_value was being reset to AUTH_GLUSTERFS_v2.
+ * if (clnt->auth_value)
+ * clnt->auth_value = AUTH_GLUSTERFS_v2;
+ * It should not be reset here. The disconnect during
+ * portmap request can race with handshake. If handshake
+ * happens first and disconnect later, auth_value would set
+ * to default value and it never sets back to actual auth_value
+ * supported by server. But it's important to set to lower
+ * version supported in the case where the server downgrades.
+ * So moving this code to RPC_TRANSPORT_CONNECT. Note that
+ * CONNECT cannot race with handshake as by nature it is
+ * serialized with handhake. An handshake can happen only
+ * on a connected transport and hence its strictly serialized.
+ */
+ break;
}
case RPC_TRANSPORT_CLEANUP:
- /* this event should not be received on a client for, a
- * transport is only disconnected, but never destroyed.
- */
- ret = 0;
- break;
-
- case RPC_TRANSPORT_MAP_XID_REQUEST:
- {
- req_info = data;
- ret = rpc_clnt_fill_request_info (clnt, req_info);
- break;
- }
-
- case RPC_TRANSPORT_MSG_RECEIVED:
- {
- pthread_mutex_lock (&conn->lock);
- {
- gettimeofday (&conn->last_received, NULL);
+ if (clnt->notifyfn) {
+ clnt_mydata = clnt->mydata;
+ clnt->mydata = NULL;
+ ret = clnt->notifyfn(clnt, clnt_mydata, RPC_CLNT_DESTROY, NULL);
+ if (ret < 0) {
+ gf_log(trans->name, GF_LOG_WARNING,
+ "client notify handler returned error "
+ "while handling RPC_CLNT_DESTROY");
}
- pthread_mutex_unlock (&conn->lock);
-
- pollin = data;
- if (pollin->is_reply)
- ret = rpc_clnt_handle_reply (clnt, pollin);
- else
- ret = rpc_clnt_handle_cbk (clnt, pollin);
- /* ret = clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_MSG,
- * data);
- */
- break;
- }
-
- case RPC_TRANSPORT_MSG_SENT:
- {
- pthread_mutex_lock (&conn->lock);
- {
- gettimeofday (&conn->last_sent, NULL);
- }
- pthread_mutex_unlock (&conn->lock);
+ }
+ rpc_clnt_destroy(clnt);
+ ret = 0;
+ break;
+
+ case RPC_TRANSPORT_MAP_XID_REQUEST: {
+ req_info = data;
+ ret = rpc_clnt_fill_request_info(clnt, req_info);
+ break;
+ }
+
+ case RPC_TRANSPORT_MSG_RECEIVED: {
+ timespec_now_realtime(&conn->last_received);
+
+ pollin = data;
+ if (pollin->is_reply)
+ ret = rpc_clnt_handle_reply(clnt, pollin);
+ else
+ ret = rpc_clnt_handle_cbk(clnt, pollin);
+ /* ret = clnt->notifyfn (clnt, clnt->mydata, RPC_CLNT_MSG,
+ * data);
+ */
+ break;
+ }
+
+ case RPC_TRANSPORT_MSG_SENT: {
+ timespec_now_realtime(&conn->last_sent);
+ ret = 0;
+ break;
+ }
+
+ case RPC_TRANSPORT_CONNECT: {
+ pthread_mutex_lock(&conn->lock);
+ {
+ /* Every time there is a disconnection, processes
+ * should try to connect to 'glusterd' (ie, default
+ * port) or whichever port given as 'option remote-port'
+ * in volume file. */
+ /* Below code makes sure the (re-)configured port lasts
+ * for just one successful attempt */
+ conn->config.remote_port = 0;
+ conn->connected = 1;
+ conn->disconnected = 0;
+ pthread_cond_broadcast(&conn->cond);
+ }
+ pthread_mutex_unlock(&conn->lock);
- ret = 0;
- break;
- }
+ /* auth value should be set to lower version available
+ * and will be set to appropriate version supported by
+ * server after the handshake.
+ */
+ if (clnt->auth_value)
+ clnt->auth_value = AUTH_GLUSTERFS_v2;
+ if (clnt->notifyfn)
+ ret = clnt->notifyfn(clnt, clnt->mydata, RPC_CLNT_CONNECT,
+ NULL);
- case RPC_TRANSPORT_CONNECT:
- {
- if (clnt->notifyfn)
- ret = clnt->notifyfn (clnt, clnt->mydata,
- RPC_CLNT_CONNECT, NULL);
- break;
+ break;
}
case RPC_TRANSPORT_ACCEPT:
- /* only meaningful on a server, no need of handling this event
- * in a client.
- */
- ret = 0;
- break;
- }
+ /* only meaningful on a server, no need of handling this event
+ * in a client.
+ */
+ ret = 0;
+ break;
+
+ case RPC_TRANSPORT_EVENT_THREAD_DIED:
+ /* only meaningful on a server, no need of handling this event on a
+ * client */
+ ret = 0;
+ break;
+ }
out:
- return ret;
-}
-
-
-void
-rpc_clnt_connection_deinit (rpc_clnt_connection_t *conn)
-{
- return;
+ RPC_THIS_RESTORE;
+ return ret;
}
-
-inline int
-rpc_clnt_connection_init (struct rpc_clnt *clnt, glusterfs_ctx_t *ctx,
- dict_t *options, char *name)
+static int
+rpc_clnt_connection_init(struct rpc_clnt *clnt, glusterfs_ctx_t *ctx,
+ dict_t *options, char *name)
{
- int ret = -1;
- rpc_clnt_connection_t *conn = NULL;
-
- conn = &clnt->conn;
- pthread_mutex_init (&clnt->conn.lock, NULL);
-
- ret = dict_get_int32 (options, "frame-timeout",
- &conn->frame_timeout);
- if (ret >= 0) {
- gf_log (name, GF_LOG_INFO,
- "setting frame-timeout to %d", conn->frame_timeout);
- } else {
- gf_log (name, GF_LOG_DEBUG,
- "defaulting frame-timeout to 30mins");
- conn->frame_timeout = 1800;
- }
+ int ret = -1;
+ rpc_clnt_connection_t *conn = NULL;
+ rpc_transport_t *trans = NULL;
- conn->trans = rpc_transport_load (ctx, options, name);
- if (!conn->trans) {
- gf_log (name, GF_LOG_WARNING, "loading of new rpc-transport"
- " failed");
- ret = -1;
- goto out;
- }
+ conn = &clnt->conn;
+ pthread_mutex_init(&clnt->conn.lock, NULL);
+ pthread_cond_init(&clnt->conn.cond, NULL);
- rpc_transport_ref (conn->trans);
-
- conn->rpc_clnt = clnt;
-
- ret = rpc_transport_register_notify (conn->trans, rpc_clnt_notify,
- conn);
- if (ret == -1) {
- gf_log (name, GF_LOG_WARNING, "registering notify failed");
- rpc_clnt_connection_cleanup (conn);
- conn = NULL;
- goto out;
- }
-
- conn->saved_frames = saved_frames_new ();
- if (!conn->saved_frames) {
- gf_log (name, GF_LOG_WARNING, "creation of saved_frames "
- "failed");
- rpc_clnt_connection_cleanup (conn);
- goto out;
- }
+ conn->name = gf_strdup(name);
+ if (!conn->name) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_int32(options, "frame-timeout", &conn->frame_timeout);
+ if (ret >= 0) {
+ gf_log(name, GF_LOG_INFO, "setting frame-timeout to %d",
+ conn->frame_timeout);
+ } else {
+ gf_log(name, GF_LOG_DEBUG, "defaulting frame-timeout to 30mins");
+ conn->frame_timeout = 1800;
+ }
+ conn->rpc_clnt = clnt;
+
+ ret = dict_get_int32(options, "ping-timeout", &conn->ping_timeout);
+ if (ret >= 0) {
+ gf_log(name, GF_LOG_DEBUG, "setting ping-timeout to %d",
+ conn->ping_timeout);
+ } else {
+ /*TODO: Once the epoll thread model is fixed,
+ change the default ping-timeout to 30sec */
+ gf_log(name, GF_LOG_DEBUG, "disable ping-timeout");
+ conn->ping_timeout = 0;
+ }
+
+ trans = rpc_transport_load(ctx, options, name);
+ if (!trans) {
+ gf_log(name, GF_LOG_WARNING,
+ "loading of new rpc-transport"
+ " failed");
+ ret = -1;
+ goto out;
+ }
+ rpc_transport_ref(trans);
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ conn->trans = trans;
+ trans = NULL;
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ ret = rpc_transport_register_notify(conn->trans, rpc_clnt_notify, conn);
+ if (ret == -1) {
+ gf_log(name, GF_LOG_WARNING, "registering notify failed");
+ goto out;
+ }
+
+ conn->saved_frames = saved_frames_new();
+ if (!conn->saved_frames) {
+ gf_log(name, GF_LOG_WARNING,
+ "creation of saved_frames "
+ "failed");
+ ret = -1;
+ goto out;
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ if (ret) {
+ pthread_mutex_lock(&conn->lock);
+ {
+ trans = conn->trans;
+ conn->trans = NULL;
+ }
+ pthread_mutex_unlock(&conn->lock);
+ if (trans)
+ rpc_transport_unref(trans);
+ // conn cleanup needs to be done since we might have failed to
+ // register notification.
+ rpc_clnt_connection_cleanup(conn);
+ }
+ return ret;
}
struct rpc_clnt *
-rpc_clnt_new (dict_t *options, glusterfs_ctx_t *ctx, char *name,
- uint32_t reqpool_size)
+rpc_clnt_new(dict_t *options, xlator_t *owner, char *name,
+ uint32_t reqpool_size)
{
- int ret = -1;
- struct rpc_clnt *rpc = NULL;
-
- rpc = GF_CALLOC (1, sizeof (*rpc), gf_common_mt_rpcclnt_t);
- if (!rpc) {
- goto out;
- }
-
- pthread_mutex_init (&rpc->lock, NULL);
- rpc->ctx = ctx;
-
- if (!reqpool_size)
- reqpool_size = RPC_CLNT_DEFAULT_REQUEST_COUNT;
-
- rpc->reqpool = mem_pool_new (struct rpc_req, reqpool_size);
- if (rpc->reqpool == NULL) {
- pthread_mutex_destroy (&rpc->lock);
- GF_FREE (rpc);
- rpc = NULL;
- goto out;
- }
-
- rpc->saved_frames_pool = mem_pool_new (struct saved_frame,
- reqpool_size);
- if (rpc->saved_frames_pool == NULL) {
- pthread_mutex_destroy (&rpc->lock);
- mem_pool_destroy (rpc->reqpool);
- GF_FREE (rpc);
- rpc = NULL;
- goto out;
- }
-
- ret = rpc_clnt_connection_init (rpc, ctx, options, name);
- if (ret == -1) {
- pthread_mutex_destroy (&rpc->lock);
- mem_pool_destroy (rpc->reqpool);
- mem_pool_destroy (rpc->saved_frames_pool);
- GF_FREE (rpc);
- rpc = NULL;
- if (options)
- dict_unref (options);
- goto out;
- }
-
- rpc->auth_null = dict_get_str_boolean (options, "auth-null", 0);
-
- rpc = rpc_clnt_ref (rpc);
- INIT_LIST_HEAD (&rpc->programs);
+ int ret = -1;
+ struct rpc_clnt *rpc = NULL;
+ glusterfs_ctx_t *ctx = owner->ctx;
+
+ rpc = GF_CALLOC(1, sizeof(*rpc), gf_common_mt_rpcclnt_t);
+ if (!rpc) {
+ goto out;
+ }
+
+ pthread_mutex_init(&rpc->lock, NULL);
+ rpc->ctx = ctx;
+ rpc->owner = owner;
+ GF_ATOMIC_INIT(rpc->xid, 1);
+
+ if (!reqpool_size)
+ reqpool_size = RPC_CLNT_DEFAULT_REQUEST_COUNT;
+
+ rpc->reqpool = mem_pool_new(struct rpc_req, reqpool_size);
+ if (rpc->reqpool == NULL) {
+ pthread_mutex_destroy(&rpc->lock);
+ GF_FREE(rpc);
+ rpc = NULL;
+ goto out;
+ }
+
+ rpc->saved_frames_pool = mem_pool_new(struct saved_frame, reqpool_size);
+ if (rpc->saved_frames_pool == NULL) {
+ pthread_mutex_destroy(&rpc->lock);
+ mem_pool_destroy(rpc->reqpool);
+ GF_FREE(rpc);
+ rpc = NULL;
+ goto out;
+ }
+
+ ret = rpc_clnt_connection_init(rpc, ctx, options, name);
+ if (ret == -1) {
+ pthread_mutex_destroy(&rpc->lock);
+ mem_pool_destroy(rpc->reqpool);
+ mem_pool_destroy(rpc->saved_frames_pool);
+ GF_FREE(rpc);
+ rpc = NULL;
+ goto out;
+ }
+
+ /* This is handled to make sure we have modularity in getting the
+ auth data changed */
+ gf_boolean_t auth_null = dict_get_str_boolean(options, "auth-null", 0);
+
+ rpc->auth_value = (auth_null) ? 0 : AUTH_GLUSTERFS_v2;
+
+ rpc = rpc_clnt_ref(rpc);
+ INIT_LIST_HEAD(&rpc->programs);
out:
- return rpc;
+ return rpc;
}
-
int
-rpc_clnt_start (struct rpc_clnt *rpc)
+rpc_clnt_start(struct rpc_clnt *rpc)
{
- struct rpc_clnt_connection *conn = NULL;
+ struct rpc_clnt_connection *conn = NULL;
- if (!rpc)
- return -1;
+ if (!rpc)
+ return -1;
- conn = &rpc->conn;
+ conn = &rpc->conn;
- rpc_clnt_reconnect (conn->trans);
+ pthread_mutex_lock(&conn->lock);
+ {
+ rpc->disabled = 0;
+ }
+ pthread_mutex_unlock(&conn->lock);
+ /* Corresponding unref will be either on successful timer cancel or last
+ * rpc_clnt_reconnect fire event.
+ */
+ rpc_clnt_ref(rpc);
+ rpc_clnt_reconnect(conn);
- return 0;
+ return 0;
}
-
int
-rpc_clnt_register_notify (struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
- void *mydata)
+rpc_clnt_cleanup_and_start(struct rpc_clnt *rpc)
{
- rpc->mydata = mydata;
- rpc->notifyfn = fn;
+ struct rpc_clnt_connection *conn = NULL;
- return 0;
-}
+ if (!rpc)
+ return -1;
-ssize_t
-xdr_serialize_glusterfs_auth (char *dest, struct auth_glusterfs_parms_v2 *au)
-{
- ssize_t ret = -1;
- XDR xdr;
+ conn = &rpc->conn;
- if ((!dest) || (!au))
- return -1;
+ rpc_clnt_connection_cleanup(conn);
- xdrmem_create (&xdr, dest, GF_MAX_AUTH_BYTES, XDR_ENCODE);
+ pthread_mutex_lock(&conn->lock);
+ {
+ rpc->disabled = 0;
+ }
+ pthread_mutex_unlock(&conn->lock);
+ /* Corresponding unref will be either on successful timer cancel or last
+ * rpc_clnt_reconnect fire event.
+ */
+ rpc_clnt_ref(rpc);
+ rpc_clnt_reconnect(conn);
- if (!xdr_auth_glusterfs_parms_v2 (&xdr, au)) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to encode auth glusterfs elements");
- ret = -1;
- goto ret;
- }
-
- ret = (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base));
-
-ret:
- return ret;
+ return 0;
}
-
int
-rpc_clnt_fill_request (int prognum, int progver, int procnum,
- uint64_t xid, struct auth_glusterfs_parms_v2 *au,
- struct rpc_msg *request, char *auth_data)
+rpc_clnt_register_notify(struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
+ void *mydata)
{
- int ret = -1;
+ rpc->mydata = mydata;
+ rpc->notifyfn = fn;
- if (!request) {
- goto out;
- }
-
- memset (request, 0, sizeof (*request));
-
- request->rm_xid = xid;
- request->rm_direction = CALL;
-
- request->rm_call.cb_rpcvers = 2;
- request->rm_call.cb_prog = prognum;
- request->rm_call.cb_vers = progver;
- request->rm_call.cb_proc = procnum;
-
- /* TODO: Using AUTH_(GLUSTERFS/NULL) in a kludgy way for time-being.
- * Make it modular in future so it is easy to plug-in new
- * authentication schemes.
- */
- if (auth_data) {
- ret = xdr_serialize_glusterfs_auth (auth_data, au);
- if (ret == -1) {
- gf_log ("rpc-clnt", GF_LOG_DEBUG,
- "cannot encode credentials");
- goto out;
- }
+ return 0;
+}
- request->rm_call.cb_cred.oa_flavor = AUTH_GLUSTERFS_v2;
- request->rm_call.cb_cred.oa_base = auth_data;
- request->rm_call.cb_cred.oa_length = ret;
- } else {
- request->rm_call.cb_cred.oa_flavor = AUTH_NULL;
- request->rm_call.cb_cred.oa_base = NULL;
- request->rm_call.cb_cred.oa_length = 0;
- }
- request->rm_call.cb_verf.oa_flavor = AUTH_NONE;
- request->rm_call.cb_verf.oa_base = NULL;
- request->rm_call.cb_verf.oa_length = 0;
+/* used for GF_LOG_OCCASIONALLY() */
+static int gf_auth_max_groups_log = 0;
- ret = 0;
+static inline int
+setup_glusterfs_auth_param_v3(call_frame_t *frame, auth_glusterfs_params_v3 *au,
+ int lk_owner_len, char *owner_data)
+{
+ int ret = -1;
+ unsigned int max_groups = 0;
+ int max_lkowner_len = 0;
+
+ au->pid = frame->root->pid;
+ au->uid = frame->root->uid;
+ au->gid = frame->root->gid;
+
+ au->flags = frame->root->flags;
+ au->ctime_sec = frame->root->ctime.tv_sec;
+ au->ctime_nsec = frame->root->ctime.tv_nsec;
+
+ au->lk_owner.lk_owner_val = owner_data;
+ au->lk_owner.lk_owner_len = lk_owner_len;
+ au->groups.groups_val = frame->root->groups;
+ au->groups.groups_len = frame->root->ngrps;
+
+ /* The number of groups and the size of lk_owner depend on oneother.
+ * We can truncate the groups, but should not touch the lk_owner. */
+ max_groups = GF_AUTH_GLUSTERFS_MAX_GROUPS(lk_owner_len, AUTH_GLUSTERFS_v3);
+ if (au->groups.groups_len > max_groups) {
+ GF_LOG_OCCASIONALLY(gf_auth_max_groups_log, "rpc-auth", GF_LOG_WARNING,
+ "truncating grouplist "
+ "from %d to %d",
+ au->groups.groups_len, max_groups);
+
+ au->groups.groups_len = max_groups;
+ }
+
+ max_lkowner_len = GF_AUTH_GLUSTERFS_MAX_LKOWNER(au->groups.groups_len,
+ AUTH_GLUSTERFS_v3);
+ if (lk_owner_len > max_lkowner_len) {
+ gf_log("rpc-clnt", GF_LOG_ERROR,
+ "lkowner field is too "
+ "big (%d), it does not fit in the rpc-header",
+ au->lk_owner.lk_owner_len);
+ errno = E2BIG;
+ goto out;
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
+static inline int
+setup_glusterfs_auth_param_v2(call_frame_t *frame, auth_glusterfs_parms_v2 *au,
+ int lk_owner_len, char *owner_data)
+{
+ unsigned int max_groups = 0;
+ int max_lkowner_len = 0;
+ int ret = -1;
+
+ au->pid = frame->root->pid;
+ au->uid = frame->root->uid;
+ au->gid = frame->root->gid;
+
+ au->lk_owner.lk_owner_val = owner_data;
+ au->lk_owner.lk_owner_len = lk_owner_len;
+ au->groups.groups_val = frame->root->groups;
+ au->groups.groups_len = frame->root->ngrps;
+
+ /* The number of groups and the size of lk_owner depend on oneother.
+ * We can truncate the groups, but should not touch the lk_owner. */
+ max_groups = GF_AUTH_GLUSTERFS_MAX_GROUPS(lk_owner_len, AUTH_GLUSTERFS_v2);
+ if (au->groups.groups_len > max_groups) {
+ GF_LOG_OCCASIONALLY(gf_auth_max_groups_log, "rpc-auth", GF_LOG_WARNING,
+ "truncating grouplist "
+ "from %d to %d",
+ au->groups.groups_len, max_groups);
+
+ au->groups.groups_len = max_groups;
+ }
+
+ max_lkowner_len = GF_AUTH_GLUSTERFS_MAX_LKOWNER(au->groups.groups_len,
+ AUTH_GLUSTERFS_v2);
+ if (lk_owner_len > max_lkowner_len) {
+ gf_log("rpc-auth", GF_LOG_ERROR,
+ "lkowner field is too "
+ "big (%d), it does not fit in the rpc-header",
+ au->lk_owner.lk_owner_len);
+ errno = E2BIG;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
-struct iovec
-rpc_clnt_record_build_header (char *recordstart, size_t rlen,
- struct rpc_msg *request, size_t payload)
+static ssize_t
+xdr_serialize_glusterfs_auth(struct rpc_clnt *clnt, call_frame_t *frame,
+ char *dest)
{
- struct iovec requesthdr = {0, };
- struct iovec txrecord = {0, 0};
- int ret = -1;
- size_t fraglen = 0;
+ ssize_t ret = -1;
+ XDR xdr;
+ char owner[4] = {
+ 0,
+ };
+ int32_t pid = 0;
+ char *lk_owner_data = NULL;
+ int lk_owner_len = 0;
+
+ if ((!dest))
+ return -1;
+
+ xdrmem_create(&xdr, dest, GF_MAX_AUTH_BYTES, XDR_ENCODE);
+
+ if (frame->root->lk_owner.len) {
+ lk_owner_data = frame->root->lk_owner.data;
+ lk_owner_len = frame->root->lk_owner.len;
+ } else {
+ pid = frame->root->pid;
+ owner[0] = (char)(pid & 0xff);
+ owner[1] = (char)((pid >> 8) & 0xff);
+ owner[2] = (char)((pid >> 16) & 0xff);
+ owner[3] = (char)((pid >> 24) & 0xff);
+
+ lk_owner_data = owner;
+ lk_owner_len = 4;
+ }
+
+ if (clnt->auth_value == AUTH_GLUSTERFS_v2) {
+ auth_glusterfs_parms_v2 au_v2 = {
+ 0,
+ };
+
+ ret = setup_glusterfs_auth_param_v2(frame, &au_v2, lk_owner_len,
+ lk_owner_data);
+ if (ret)
+ goto out;
+ if (!xdr_auth_glusterfs_parms_v2(&xdr, &au_v2)) {
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "failed to encode auth glusterfs elements");
+ ret = -1;
+ goto out;
+ }
+ } else if (clnt->auth_value == AUTH_GLUSTERFS_v3) {
+ auth_glusterfs_params_v3 au_v3 = {
+ 0,
+ };
+
+ ret = setup_glusterfs_auth_param_v3(frame, &au_v3, lk_owner_len,
+ lk_owner_data);
+ if (ret)
+ goto out;
- ret = rpc_request_to_xdr (request, recordstart, rlen, &requesthdr);
- if (ret == -1) {
- gf_log ("rpc-clnt", GF_LOG_DEBUG,
- "Failed to create RPC request");
- goto out;
+ if (!xdr_auth_glusterfs_params_v3(&xdr, &au_v3)) {
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "failed to encode auth glusterfs elements");
+ ret = -1;
+ goto out;
}
+ } else {
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "failed to encode auth glusterfs elements");
+ ret = -1;
+ goto out;
+ }
- fraglen = payload + requesthdr.iov_len;
- gf_log ("rpc-clnt", GF_LOG_TRACE, "Request fraglen %zu, payload: %zu, "
- "rpc hdr: %zu", fraglen, payload, requesthdr.iov_len);
-
-
- txrecord.iov_base = recordstart;
-
- /* Remember, this is only the vec for the RPC header and does not
- * include the payload above. We needed the payload only to calculate
- * the size of the full fragment. This size is sent in the fragment
- * header.
- */
- txrecord.iov_len = requesthdr.iov_len;
+ ret = (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base));
out:
- return txrecord;
+ return ret;
}
-
-struct iobuf *
-rpc_clnt_record_build_record (struct rpc_clnt *clnt, int prognum, int progver,
- int procnum, size_t hdrsize, uint64_t xid,
- struct auth_glusterfs_parms_v2 *au,
- struct iovec *recbuf)
+int
+rpc_clnt_fill_request(struct rpc_clnt *clnt, int prognum, int progver,
+ int procnum, uint64_t xid, call_frame_t *fr,
+ struct rpc_msg *request, char *auth_data)
{
- struct rpc_msg request = {0, };
- struct iobuf *request_iob = NULL;
- char *record = NULL;
- struct iovec recordhdr = {0, };
- size_t pagesize = 0;
- int ret = -1;
- size_t xdr_size = 0;
- char auth_data[GF_MAX_AUTH_BYTES] = {0, };
-
- if ((!clnt) || (!recbuf) || (!au)) {
- goto out;
- }
+ int ret = -1;
- /* Fill the rpc structure and XDR it into the buffer got above. */
- if (clnt->auth_null)
- ret = rpc_clnt_fill_request (prognum, progver, procnum,
- xid, NULL, &request, NULL);
- else
- ret = rpc_clnt_fill_request (prognum, progver, procnum,
- xid, au, &request, auth_data);
+ if (!request) {
+ goto out;
+ }
- if (ret == -1) {
- gf_log (clnt->conn.trans->name, GF_LOG_WARNING,
- "cannot build a rpc-request xid (%"PRIu64")", xid);
- goto out;
- }
+ memset(request, 0, sizeof(*request));
- xdr_size = xdr_sizeof ((xdrproc_t)xdr_callmsg, &request);
+ request->rm_xid = xid;
+ request->rm_direction = CALL;
- /* First, try to get a pointer into the buffer which the RPC
- * layer can use.
- */
- request_iob = iobuf_get2 (clnt->ctx->iobuf_pool, (xdr_size + hdrsize));
- if (!request_iob) {
- goto out;
- }
-
- pagesize = iobuf_pagesize (request_iob);
-
- record = iobuf_ptr (request_iob); /* Now we have it. */
+ request->rm_call.cb_rpcvers = 2;
+ request->rm_call.cb_prog = prognum;
+ request->rm_call.cb_vers = progver;
+ request->rm_call.cb_proc = procnum;
- recordhdr = rpc_clnt_record_build_header (record, pagesize, &request,
- hdrsize);
-
- if (!recordhdr.iov_base) {
- gf_log (clnt->conn.trans->name, GF_LOG_ERROR,
- "Failed to build record header");
- iobuf_unref (request_iob);
- request_iob = NULL;
- recbuf->iov_base = NULL;
- goto out;
+ if (!clnt->auth_value) {
+ request->rm_call.cb_cred.oa_flavor = AUTH_NULL;
+ request->rm_call.cb_cred.oa_base = NULL;
+ request->rm_call.cb_cred.oa_length = 0;
+ } else {
+ ret = xdr_serialize_glusterfs_auth(clnt, fr, auth_data);
+ if (ret == -1) {
+ gf_log("rpc-clnt", GF_LOG_WARNING,
+ "cannot encode auth credentials");
+ goto out;
}
- recbuf->iov_base = recordhdr.iov_base;
- recbuf->iov_len = recordhdr.iov_len;
+ request->rm_call.cb_cred.oa_flavor = clnt->auth_value;
+ request->rm_call.cb_cred.oa_base = auth_data;
+ request->rm_call.cb_cred.oa_length = ret;
+ }
+ request->rm_call.cb_verf.oa_flavor = AUTH_NONE;
+ request->rm_call.cb_verf.oa_base = NULL;
+ request->rm_call.cb_verf.oa_length = 0;
+ ret = 0;
out:
- return request_iob;
+ return ret;
}
+struct iovec
+rpc_clnt_record_build_header(char *recordstart, size_t rlen,
+ struct rpc_msg *request, size_t payload)
+{
+ struct iovec requesthdr = {
+ 0,
+ };
+ struct iovec txrecord = {0, 0};
+ int ret = -1;
+ size_t fraglen = 0;
+
+ ret = rpc_request_to_xdr(request, recordstart, rlen, &requesthdr);
+ if (ret == -1) {
+ gf_log("rpc-clnt", GF_LOG_DEBUG, "Failed to create RPC request");
+ goto out;
+ }
+
+ fraglen = payload + requesthdr.iov_len;
+ gf_log("rpc-clnt", GF_LOG_TRACE,
+ "Request fraglen %zu, payload: %zu, "
+ "rpc hdr: %zu",
+ fraglen, payload, requesthdr.iov_len);
+
+ txrecord.iov_base = recordstart;
+
+ /* Remember, this is only the vec for the RPC header and does not
+ * include the payload above. We needed the payload only to calculate
+ * the size of the full fragment. This size is sent in the fragment
+ * header.
+ */
+ txrecord.iov_len = requesthdr.iov_len;
+
+out:
+ return txrecord;
+}
struct iobuf *
-rpc_clnt_record (struct rpc_clnt *clnt, call_frame_t *call_frame,
- rpc_clnt_prog_t *prog, int procnum, size_t hdrlen,
- struct iovec *rpchdr, uint64_t callid)
+rpc_clnt_record_build_record(struct rpc_clnt *clnt, call_frame_t *fr,
+ int prognum, int progver, int procnum,
+ size_t hdrsize, uint64_t xid, struct iovec *recbuf)
{
- struct auth_glusterfs_parms_v2 au = {0, };
- struct iobuf *request_iob = NULL;
- char owner[4] = {0,};
-
- if (!prog || !rpchdr || !call_frame) {
- goto out;
- }
+ struct rpc_msg request = {
+ 0,
+ };
+ struct iobuf *request_iob = NULL;
+ char *record = NULL;
+ struct iovec recordhdr = {
+ 0,
+ };
+ size_t pagesize = 0;
+ int ret = -1;
+ size_t xdr_size = 0;
+ char auth_data[GF_MAX_AUTH_BYTES] = {
+ 0,
+ };
+
+ if ((!clnt) || (!recbuf)) {
+ goto out;
+ }
+
+ /* Fill the rpc structure and XDR it into the buffer got above. */
+ ret = rpc_clnt_fill_request(clnt, prognum, progver, procnum, xid, fr,
+ &request, auth_data);
+
+ if (ret == -1) {
+ gf_log(clnt->conn.name, GF_LOG_WARNING,
+ "cannot build a rpc-request xid (%" PRIu64 ")", xid);
+ goto out;
+ }
+
+ xdr_size = xdr_sizeof((xdrproc_t)xdr_callmsg, &request);
+
+ /* First, try to get a pointer into the buffer which the RPC
+ * layer can use.
+ */
+ request_iob = iobuf_get2(clnt->ctx->iobuf_pool, (xdr_size + hdrsize));
+ if (!request_iob) {
+ goto out;
+ }
+
+ pagesize = iobuf_pagesize(request_iob);
+
+ record = iobuf_ptr(request_iob); /* Now we have it. */
+
+ recordhdr = rpc_clnt_record_build_header(record, pagesize, &request,
+ hdrsize);
+
+ if (!recordhdr.iov_base) {
+ gf_log(clnt->conn.name, GF_LOG_ERROR, "Failed to build record header");
+ iobuf_unref(request_iob);
+ request_iob = NULL;
+ recbuf->iov_base = NULL;
+ goto out;
+ }
+
+ recbuf->iov_base = recordhdr.iov_base;
+ recbuf->iov_len = recordhdr.iov_len;
- au.pid = call_frame->root->pid;
- au.uid = call_frame->root->uid;
- au.gid = call_frame->root->gid;
- au.groups.groups_len = call_frame->root->ngrps;
- au.lk_owner.lk_owner_len = call_frame->root->lk_owner.len;
-
- if (au.groups.groups_len)
- au.groups.groups_val = call_frame->root->groups;
-
- if (call_frame->root->lk_owner.len)
- au.lk_owner.lk_owner_val = call_frame->root->lk_owner.data;
- else {
- owner[0] = (char)(au.pid & 0xff);
- owner[1] = (char)((au.pid >> 8) & 0xff);
- owner[2] = (char)((au.pid >> 16) & 0xff);
- owner[3] = (char)((au.pid >> 24) & 0xff);
-
- au.lk_owner.lk_owner_val = owner;
- au.lk_owner.lk_owner_len = 4;
- }
+out:
+ return request_iob;
+}
- gf_log (clnt->conn.trans->name, GF_LOG_TRACE, "Auth Info: pid: %u, uid: %d"
- ", gid: %d, owner: %s", au.pid, au.uid, au.gid,
- lkowner_utoa (&call_frame->root->lk_owner));
-
- request_iob = rpc_clnt_record_build_record (clnt, prog->prognum,
- prog->progver,
- procnum, hdrlen,
- callid, &au,
- rpchdr);
- if (!request_iob) {
- gf_log (clnt->conn.trans->name, GF_LOG_WARNING,
- "cannot build rpc-record");
- goto out;
- }
+static inline struct iobuf *
+rpc_clnt_record(struct rpc_clnt *clnt, call_frame_t *call_frame,
+ rpc_clnt_prog_t *prog, int procnum, size_t hdrlen,
+ struct iovec *rpchdr, uint64_t callid)
+{
+ if (!prog || !rpchdr || !call_frame) {
+ return NULL;
+ }
-out:
- return request_iob;
+ return rpc_clnt_record_build_record(clnt, call_frame, prog->prognum,
+ prog->progver, procnum, hdrlen, callid,
+ rpchdr);
}
int
-rpcclnt_cbk_program_register (struct rpc_clnt *clnt,
- rpcclnt_cb_program_t *program)
+rpcclnt_cbk_program_register(struct rpc_clnt *clnt,
+ rpcclnt_cb_program_t *program, void *mydata)
{
- int ret = -1;
- char already_registered = 0;
- rpcclnt_cb_program_t *tmp = NULL;
+ int ret = -1;
+ char already_registered = 0;
+ rpcclnt_cb_program_t *tmp = NULL;
- if (!clnt)
- goto out;
+ if (!clnt)
+ goto out;
- if (program->actors == NULL)
- goto out;
+ if (program->actors == NULL)
+ goto out;
- pthread_mutex_lock (&clnt->lock);
+ pthread_mutex_lock(&clnt->lock);
+ {
+ list_for_each_entry(tmp, &clnt->programs, program)
{
- list_for_each_entry (tmp, &clnt->programs, program) {
- if ((program->prognum == tmp->prognum)
- && (program->progver == tmp->progver)) {
- already_registered = 1;
- break;
- }
- }
+ if ((program->prognum == tmp->prognum) &&
+ (program->progver == tmp->progver)) {
+ already_registered = 1;
+ break;
+ }
}
- pthread_mutex_unlock (&clnt->lock);
+ }
+ pthread_mutex_unlock(&clnt->lock);
- if (already_registered) {
- gf_log_callingfn (clnt->conn.trans->name, GF_LOG_DEBUG,
- "already registered");
- ret = 0;
- goto out;
- }
+ if (already_registered) {
+ gf_log_callingfn(clnt->conn.name, GF_LOG_DEBUG, "already registered");
+ ret = 0;
+ goto out;
+ }
- tmp = GF_CALLOC (1, sizeof (*tmp),
- gf_common_mt_rpcclnt_cb_program_t);
- if (tmp == NULL) {
- goto out;
- }
+ tmp = GF_MALLOC(sizeof(*tmp), gf_common_mt_rpcclnt_cb_program_t);
+ if (tmp == NULL) {
+ goto out;
+ }
- memcpy (tmp, program, sizeof (*tmp));
- INIT_LIST_HEAD (&tmp->program);
+ memcpy(tmp, program, sizeof(*tmp));
+ INIT_LIST_HEAD(&tmp->program);
- pthread_mutex_lock (&clnt->lock);
- {
- list_add_tail (&tmp->program, &clnt->programs);
- }
- pthread_mutex_unlock (&clnt->lock);
+ tmp->mydata = mydata;
- ret = 0;
- gf_log (clnt->conn.trans->name, GF_LOG_DEBUG,
- "New program registered: %s, Num: %d, Ver: %d",
- program->progname, program->prognum,
- program->progver);
+ pthread_mutex_lock(&clnt->lock);
+ {
+ list_add_tail(&tmp->program, &clnt->programs);
+ }
+ pthread_mutex_unlock(&clnt->lock);
-out:
- if (ret == -1) {
- gf_log (clnt->conn.trans->name, GF_LOG_ERROR,
- "Program registration failed:"
- " %s, Num: %d, Ver: %d", program->progname,
- program->prognum, program->progver);
- }
+ ret = 0;
+ gf_log(clnt->conn.name, GF_LOG_DEBUG,
+ "New program registered: %s, Num: %d, Ver: %d", program->progname,
+ program->prognum, program->progver);
- return ret;
+out:
+ if (ret == -1 && clnt) {
+ gf_log(clnt->conn.name, GF_LOG_ERROR,
+ "Program registration failed:"
+ " %s, Num: %d, Ver: %d",
+ program->progname, program->prognum, program->progver);
+ }
+
+ return ret;
}
-
int
-rpc_clnt_submit (struct rpc_clnt *rpc, rpc_clnt_prog_t *prog,
- int procnum, fop_cbk_fn_t cbkfn,
- struct iovec *proghdr, int proghdrcount,
- struct iovec *progpayload, int progpayloadcount,
- struct iobref *iobref, void *frame, struct iovec *rsphdr,
- int rsphdr_count, struct iovec *rsp_payload,
- int rsp_payload_count, struct iobref *rsp_iobref)
+rpc_clnt_submit(struct rpc_clnt *rpc, rpc_clnt_prog_t *prog, int procnum,
+ fop_cbk_fn_t cbkfn, struct iovec *proghdr, int proghdrcount,
+ struct iovec *progpayload, int progpayloadcount,
+ struct iobref *iobref, void *frame, struct iovec *rsphdr,
+ int rsphdr_count, struct iovec *rsp_payload,
+ int rsp_payload_count, struct iobref *rsp_iobref)
{
- rpc_clnt_connection_t *conn = NULL;
- struct iobuf *request_iob = NULL;
- struct iovec rpchdr = {0,};
- struct rpc_req *rpcreq = NULL;
- rpc_transport_req_t req;
- int ret = -1;
- int proglen = 0;
- char new_iobref = 0;
- uint64_t callid = 0;
-
- if (!rpc || !prog || !frame) {
- goto out;
- }
-
- conn = &rpc->conn;
-
- if (conn->trans == NULL) {
- goto out;
- }
-
- rpcreq = mem_get (rpc->reqpool);
- if (rpcreq == NULL) {
- goto out;
- }
-
- memset (rpcreq, 0, sizeof (*rpcreq));
- memset (&req, 0, sizeof (req));
-
+ rpc_clnt_connection_t *conn = NULL;
+ struct iobuf *request_iob = NULL;
+ struct iovec rpchdr = {
+ 0,
+ };
+ struct rpc_req *rpcreq = NULL;
+ rpc_transport_req_t req;
+ int ret = -1;
+ int proglen = 0;
+ char new_iobref = 0;
+ uint64_t callid = 0;
+ gf_boolean_t need_unref = _gf_false;
+ call_frame_t *cframe = frame;
+
+ if (!rpc || !prog || !frame) {
+ goto out;
+ }
+
+ conn = &rpc->conn;
+
+ rpcreq = mem_get(rpc->reqpool);
+ if (rpcreq == NULL) {
+ goto out;
+ }
+
+ memset(rpcreq, 0, sizeof(*rpcreq));
+ memset(&req, 0, sizeof(req));
+
+ if (!iobref) {
+ iobref = iobref_new();
if (!iobref) {
- iobref = iobref_new ();
- if (!iobref) {
- goto out;
- }
-
- new_iobref = 1;
- }
-
- callid = rpc_clnt_new_callid (rpc);
-
- rpcreq->prog = prog;
- rpcreq->procnum = procnum;
- rpcreq->conn = conn;
- rpcreq->xid = callid;
- rpcreq->cbkfn = cbkfn;
-
- ret = -1;
-
- if (proghdr) {
- proglen += iov_length (proghdr, proghdrcount);
- }
-
- request_iob = rpc_clnt_record (rpc, frame, prog,
- procnum, proglen,
- &rpchdr, callid);
- if (!request_iob) {
- gf_log (conn->trans->name, GF_LOG_WARNING,
- "cannot build rpc-record");
- goto out;
- }
-
- iobref_add (iobref, request_iob);
-
- req.msg.rpchdr = &rpchdr;
- req.msg.rpchdrcount = 1;
- req.msg.proghdr = proghdr;
- req.msg.proghdrcount = proghdrcount;
- req.msg.progpayload = progpayload;
- req.msg.progpayloadcount = progpayloadcount;
- req.msg.iobref = iobref;
-
- req.rsp.rsphdr = rsphdr;
- req.rsp.rsphdr_count = rsphdr_count;
- req.rsp.rsp_payload = rsp_payload;
- req.rsp.rsp_payload_count = rsp_payload_count;
- req.rsp.rsp_iobref = rsp_iobref;
- req.rpc_req = rpcreq;
-
- pthread_mutex_lock (&conn->lock);
- {
- if (conn->connected == 0) {
- ret = rpc_transport_connect (conn->trans,
- conn->config.remote_port);
- /* Below code makes sure the (re-)configured port lasts
- for just one successful connect attempt */
- if (!ret)
- conn->config.remote_port = 0;
- }
-
- ret = rpc_transport_submit_request (rpc->conn.trans,
- &req);
- if (ret == -1) {
- gf_log (conn->trans->name, GF_LOG_WARNING,
- "failed to submit rpc-request "
- "(XID: 0x%ux Program: %s, ProgVers: %d, "
- "Proc: %d) to rpc-transport (%s)", rpcreq->xid,
- rpcreq->prog->progname, rpcreq->prog->progver,
- rpcreq->procnum, rpc->conn.trans->name);
- }
-
- if ((ret >= 0) && frame) {
- gettimeofday (&conn->last_sent, NULL);
- /* Save the frame in queue */
- __save_frame (rpc, frame, rpcreq);
-
- gf_log ("rpc-clnt", GF_LOG_TRACE, "submitted request "
- "(XID: 0x%ux Program: %s, ProgVers: %d, "
- "Proc: %d) to rpc-transport (%s)", rpcreq->xid,
- rpcreq->prog->progname, rpcreq->prog->progver,
- rpcreq->procnum, rpc->conn.trans->name);
- }
- }
- pthread_mutex_unlock (&conn->lock);
-
+ goto out;
+ }
+
+ new_iobref = 1;
+ }
+
+ callid = GF_ATOMIC_INC(rpc->xid);
+
+ rpcreq->prog = prog;
+ rpcreq->procnum = procnum;
+ rpcreq->conn = conn;
+ rpcreq->xid = callid;
+ rpcreq->cbkfn = cbkfn;
+
+ ret = -1;
+
+ if (proghdr) {
+ proglen += iov_length(proghdr, proghdrcount);
+ }
+
+ request_iob = rpc_clnt_record(rpc, frame, prog, procnum, proglen, &rpchdr,
+ callid);
+ if (!request_iob) {
+ gf_log(conn->name, GF_LOG_WARNING, "cannot build rpc-record");
+ goto out;
+ }
+
+ iobref_add(iobref, request_iob);
+
+ req.msg.rpchdr = &rpchdr;
+ req.msg.rpchdrcount = 1;
+ req.msg.proghdr = proghdr;
+ req.msg.proghdrcount = proghdrcount;
+ req.msg.progpayload = progpayload;
+ req.msg.progpayloadcount = progpayloadcount;
+ req.msg.iobref = iobref;
+
+ req.rsp.rsphdr = rsphdr;
+ req.rsp.rsphdr_count = rsphdr_count;
+ req.rsp.rsp_payload = rsp_payload;
+ req.rsp.rsp_payload_count = rsp_payload_count;
+ req.rsp.rsp_iobref = rsp_iobref;
+ req.rpc_req = rpcreq;
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ if (conn->connected == 0) {
+ if (rpc->disabled)
+ goto unlock;
+ ret = rpc_transport_connect(conn->trans, conn->config.remote_port);
+ if (ret < 0) {
+ gf_log(conn->name,
+ (errno == EINPROGRESS) ? GF_LOG_DEBUG : GF_LOG_WARNING,
+ "error returned while attempting to "
+ "connect to host:%s, port:%d",
+ conn->config.remote_host, conn->config.remote_port);
+ goto unlock;
+ }
+ }
+
+ ret = rpc_transport_submit_request(conn->trans, &req);
if (ret == -1) {
- goto out;
- }
-
- ret = 0;
+ gf_log(conn->name, GF_LOG_WARNING,
+ "failed to submit rpc-request "
+ "(unique: %" PRIu64
+ ", XID: 0x%x Program: %s, "
+ "ProgVers: %d, Proc: %d) to rpc-transport (%s)",
+ cframe->root->unique, rpcreq->xid, rpcreq->prog->progname,
+ rpcreq->prog->progver, rpcreq->procnum, conn->name);
+ } else if ((ret >= 0) && frame) {
+ /* Save the frame in queue */
+ __save_frame(rpc, frame, rpcreq);
+
+ /* A ref on rpc-clnt object is taken while registering
+ * call_bail to timer in __save_frame. If it fails to
+ * register, it needs an unref and should happen outside
+ * conn->lock which otherwise leads to deadlocks */
+ if (conn->timer == NULL)
+ need_unref = _gf_true;
+
+ conn->msgcnt++;
+
+ gf_log("rpc-clnt", GF_LOG_TRACE,
+ "submitted request "
+ "(unique: %" PRIu64
+ ", XID: 0x%x, Program: %s, "
+ "ProgVers: %d, Proc: %d) to rpc-transport (%s)",
+ cframe->root->unique, rpcreq->xid, rpcreq->prog->progname,
+ rpcreq->prog->progver, rpcreq->procnum, conn->name);
+ }
+ }
+unlock:
+ pthread_mutex_unlock(&conn->lock);
+
+ if (need_unref)
+ rpc_clnt_unref(rpc);
+
+ if (ret == -1) {
+ goto out;
+ }
+
+ rpc_clnt_check_and_start_ping(rpc);
+ ret = 0;
out:
- if (request_iob) {
- iobuf_unref (request_iob);
- }
-
- if (new_iobref && iobref) {
- iobref_unref (iobref);
- }
-
- if (frame && (ret == -1)) {
- if (rpcreq) {
- rpcreq->rpc_status = -1;
- cbkfn (rpcreq, NULL, 0, frame);
- mem_put (rpcreq);
- }
- }
- return ret;
+ if (request_iob) {
+ iobuf_unref(request_iob);
+ }
+
+ if (new_iobref && iobref) {
+ iobref_unref(iobref);
+ }
+
+ if (frame && (ret == -1)) {
+ if (rpcreq) {
+ rpcreq->rpc_status = -1;
+ cbkfn(rpcreq, NULL, 0, frame);
+ mem_put(rpcreq);
+ }
+ }
+ return ret;
}
-
struct rpc_clnt *
-rpc_clnt_ref (struct rpc_clnt *rpc)
+rpc_clnt_ref(struct rpc_clnt *rpc)
{
- if (!rpc)
- return NULL;
- pthread_mutex_lock (&rpc->lock);
- {
- rpc->refcount++;
- }
- pthread_mutex_unlock (&rpc->lock);
- return rpc;
-}
+ if (!rpc)
+ return NULL;
+ GF_ATOMIC_INC(rpc->refcount);
+ return rpc;
+}
static void
-rpc_clnt_destroy (struct rpc_clnt *rpc)
+rpc_clnt_trigger_destroy(struct rpc_clnt *rpc)
{
- if (!rpc)
- return;
-
- if (rpc->conn.trans) {
- rpc_transport_unregister_notify (rpc->conn.trans);
- rpc_transport_disconnect (rpc->conn.trans);
- rpc_transport_unref (rpc->conn.trans);
- }
-
- rpc_clnt_reconnect_cleanup (&rpc->conn);
- saved_frames_destroy (rpc->conn.saved_frames);
- pthread_mutex_destroy (&rpc->lock);
- pthread_mutex_destroy (&rpc->conn.lock);
+ rpc_clnt_connection_t *conn = NULL;
+ rpc_transport_t *trans = NULL;
- /* mem-pool should be destroyed, otherwise,
- it will cause huge memory leaks */
- mem_pool_destroy (rpc->reqpool);
- mem_pool_destroy (rpc->saved_frames_pool);
-
- GF_FREE (rpc);
+ if (!rpc)
return;
-}
-
-struct rpc_clnt *
-rpc_clnt_unref (struct rpc_clnt *rpc)
-{
- int count = 0;
- if (!rpc)
- return NULL;
- pthread_mutex_lock (&rpc->lock);
- {
- count = --rpc->refcount;
- }
- pthread_mutex_unlock (&rpc->lock);
- if (!count) {
- rpc_clnt_destroy (rpc);
- return NULL;
- }
- return rpc;
+ /* reading conn->trans outside conn->lock is OK, since this is the last
+ * ref*/
+ conn = &rpc->conn;
+ trans = conn->trans;
+ rpc_clnt_disable(rpc);
+
+ /* This is to account for rpc_clnt_disable that might have been called
+ * before rpc_clnt_unref */
+ if (trans) {
+ /* set conn->trans to NULL before rpc_transport_unref
+ * as rpc_transport_unref can potentially free conn
+ */
+ conn->trans = NULL;
+ rpc_transport_unref(trans);
+ }
}
-
-void
-rpc_clnt_disable (struct rpc_clnt *rpc)
+static void
+rpc_clnt_destroy(struct rpc_clnt *rpc)
{
- rpc_clnt_connection_t *conn = NULL;
-
- if (!rpc) {
- goto out;
- }
-
- conn = &rpc->conn;
-
- pthread_mutex_lock (&conn->lock);
- {
- rpc->disabled = 1;
-
- if (conn->timer) {
- gf_timer_call_cancel (rpc->ctx, conn->timer);
- conn->timer = NULL;
- }
+ rpcclnt_cb_program_t *program = NULL;
+ rpcclnt_cb_program_t *tmp = NULL;
+ struct saved_frames *saved_frames = NULL;
+ rpc_clnt_connection_t *conn = NULL;
- if (conn->reconnect) {
- gf_timer_call_cancel (rpc->ctx, conn->reconnect);
- conn->reconnect = NULL;
- }
- conn->connected = 0;
-
- if (conn->ping_timer) {
- gf_timer_call_cancel (rpc->ctx, conn->ping_timer);
- conn->ping_timer = NULL;
- conn->ping_started = 0;
- }
-
- }
- pthread_mutex_unlock (&conn->lock);
-
- rpc_transport_disconnect (rpc->conn.trans);
-
-out:
+ if (!rpc)
return;
-}
+ conn = &rpc->conn;
+ GF_FREE(rpc->conn.name);
+ /* Access saved_frames in critical-section to avoid
+ crash in rpc_clnt_connection_cleanup at the time
+ of destroying saved frames
+ */
+ pthread_mutex_lock(&conn->lock);
+ {
+ saved_frames = conn->saved_frames;
+ conn->saved_frames = NULL;
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ saved_frames_destroy(saved_frames);
+ pthread_mutex_destroy(&rpc->lock);
+ pthread_mutex_destroy(&rpc->conn.lock);
+ pthread_cond_destroy(&rpc->conn.cond);
+
+ /* mem-pool should be destroyed, otherwise,
+ it will cause huge memory leaks */
+ mem_pool_destroy(rpc->reqpool);
+ mem_pool_destroy(rpc->saved_frames_pool);
+
+ list_for_each_entry_safe(program, tmp, &rpc->programs, program)
+ {
+ GF_FREE(program);
+ }
+
+ GF_FREE(rpc);
+ return;
+}
-void
-rpc_clnt_reconfig (struct rpc_clnt *rpc, struct rpc_clnt_config *config)
+struct rpc_clnt *
+rpc_clnt_unref(struct rpc_clnt *rpc)
{
- if (config->rpc_timeout) {
- if (config->rpc_timeout != rpc->conn.config.rpc_timeout)
- gf_log (rpc->conn.trans->name, GF_LOG_INFO,
- "changing timeout to %d (from %d)",
- config->rpc_timeout,
- rpc->conn.config.rpc_timeout);
- rpc->conn.config.rpc_timeout = config->rpc_timeout;
- }
-
- if (config->remote_port) {
- if (config->remote_port != rpc->conn.config.remote_port)
- gf_log (rpc->conn.trans->name, GF_LOG_INFO,
- "changing port to %d (from %d)",
- config->remote_port,
- rpc->conn.config.remote_port);
+ int count = 0;
- rpc->conn.config.remote_port = config->remote_port;
- }
+ if (!rpc)
+ return NULL;
- if (config->remote_host) {
- if (rpc->conn.config.remote_host) {
- if (strcmp (rpc->conn.config.remote_host,
- config->remote_host))
- gf_log (rpc->conn.trans->name, GF_LOG_INFO,
- "changing port to %s (from %s)",
- config->remote_host,
- rpc->conn.config.remote_host);
- FREE (rpc->conn.config.remote_host);
- } else {
- gf_log (rpc->conn.trans->name, GF_LOG_INFO,
- "setting hostname to %s",
- config->remote_host);
- }
+ count = GF_ATOMIC_DEC(rpc->refcount);
- rpc->conn.config.remote_host = gf_strdup (config->remote_host);
- }
+ if (!count) {
+ rpc_clnt_trigger_destroy(rpc);
+ return NULL;
+ }
+ return rpc;
}
int
-rpc_clnt_transport_unix_options_build (dict_t **options, char *filepath)
+rpc_clnt_disable(struct rpc_clnt *rpc)
{
- dict_t *dict = NULL;
- char *fpath = NULL;
- int ret = -1;
-
- GF_ASSERT (filepath);
- GF_ASSERT (options);
-
- dict = dict_new ();
- if (!dict)
- goto out;
-
- fpath = gf_strdup (filepath);
- if (!fpath) {
- ret = -1;
- goto out;
- }
-
- ret = dict_set_dynstr (dict, "transport.socket.connect-path", fpath);
- if (ret)
- goto out;
+ rpc_clnt_connection_t *conn = NULL;
+ rpc_transport_t *trans = NULL;
+ int unref = 0;
+ int ret = 0;
+ gf_boolean_t timer_unref = _gf_false;
+ gf_boolean_t reconnect_unref = _gf_false;
+
+ if (!rpc) {
+ goto out;
+ }
+
+ conn = &rpc->conn;
+
+ pthread_mutex_lock(&conn->lock);
+ {
+ rpc->disabled = 1;
+
+ if (conn->timer) {
+ ret = gf_timer_call_cancel(rpc->ctx, conn->timer);
+ /* If the event is not fired and it actually cancelled
+ * the timer, do the unref else registered call back
+ * function will take care of it.
+ */
+ if (!ret)
+ timer_unref = _gf_true;
+ conn->timer = NULL;
+ }
+
+ if (conn->reconnect) {
+ ret = gf_timer_call_cancel(rpc->ctx, conn->reconnect);
+ if (!ret)
+ reconnect_unref = _gf_true;
+ conn->reconnect = NULL;
+ }
+ conn->connected = 0;
+
+ unref = rpc_clnt_remove_ping_timer_locked(rpc);
+ trans = conn->trans;
+ }
+ pthread_mutex_unlock(&conn->lock);
+
+ ret = -1;
+ if (trans) {
+ ret = rpc_transport_disconnect(trans, _gf_true);
+ /* The auth_value was being reset to AUTH_GLUSTERFS_v2.
+ * if (clnt->auth_value)
+ * clnt->auth_value = AUTH_GLUSTERFS_v2;
+ * It should not be reset here. The disconnect during
+ * portmap request can race with handshake. If handshake
+ * happens first and disconnect later, auth_value would set
+ * to default value and it never sets back to actual auth_value
+ * supported by server. But it's important to set to lower
+ * version supported in the case where the server downgrades.
+ * So moving this code to RPC_TRANSPORT_CONNECT. Note that
+ * CONNECT cannot race with handshake as by nature it is
+ * serialized with handhake. An handshake can happen only
+ * on a connected transport and hence its strictly serialized.
+ */
+ }
+ if (unref)
+ rpc_clnt_unref(rpc);
- ret = dict_set_str (dict, "transport.address-family", "unix");
- if (ret)
- goto out;
+ if (timer_unref)
+ rpc_clnt_unref(rpc);
- ret = dict_set_str (dict, "transport.socket.nodelay", "off");
- if (ret)
- goto out;
+ if (reconnect_unref)
+ rpc_clnt_unref(rpc);
- ret = dict_set_str (dict, "transport-type", "socket");
- if (ret)
- goto out;
+out:
+ return ret;
+}
- ret = dict_set_str (dict, "transport.socket.keepalive", "off");
- if (ret)
- goto out;
+void
+rpc_clnt_reconfig(struct rpc_clnt *rpc, struct rpc_clnt_config *config)
+{
+ if (config->ping_timeout) {
+ if (config->ping_timeout != rpc->conn.ping_timeout)
+ gf_log(rpc->conn.name, GF_LOG_INFO,
+ "changing ping timeout to %d (from %d)",
+ config->ping_timeout, rpc->conn.ping_timeout);
- *options = dict;
-out:
- if (ret) {
- if (fpath)
- GF_FREE (fpath);
- if (dict)
- dict_unref (dict);
+ pthread_mutex_lock(&rpc->conn.lock);
+ {
+ rpc->conn.ping_timeout = config->ping_timeout;
+ }
+ pthread_mutex_unlock(&rpc->conn.lock);
+ }
+
+ if (config->rpc_timeout) {
+ if (config->rpc_timeout != rpc->conn.config.rpc_timeout)
+ gf_log(rpc->conn.name, GF_LOG_INFO,
+ "changing timeout to %d (from %d)", config->rpc_timeout,
+ rpc->conn.config.rpc_timeout);
+ rpc->conn.config.rpc_timeout = config->rpc_timeout;
+ }
+
+ if (config->remote_port) {
+ if (config->remote_port != rpc->conn.config.remote_port)
+ gf_log(rpc->conn.name, GF_LOG_INFO, "changing port to %d (from %d)",
+ config->remote_port, rpc->conn.config.remote_port);
+
+ rpc->conn.config.remote_port = config->remote_port;
+ }
+
+ if (config->remote_host) {
+ if (rpc->conn.config.remote_host) {
+ if (strcmp(rpc->conn.config.remote_host, config->remote_host))
+ gf_log(rpc->conn.name, GF_LOG_INFO,
+ "changing hostname to %s (from %s)", config->remote_host,
+ rpc->conn.config.remote_host);
+ GF_FREE(rpc->conn.config.remote_host);
+ } else {
+ gf_log(rpc->conn.name, GF_LOG_INFO, "setting hostname to %s",
+ config->remote_host);
}
- return ret;
+
+ rpc->conn.config.remote_host = gf_strdup(config->remote_host);
+ }
}
diff --git a/rpc/rpc-lib/src/rpc-clnt.h b/rpc/rpc-lib/src/rpc-clnt.h
index e7335e38839..2945265200b 100644
--- a/rpc/rpc-lib/src/rpc-clnt.h
+++ b/rpc/rpc-lib/src/rpc-clnt.h
@@ -11,82 +11,82 @@
#ifndef __RPC_CLNT_H
#define __RPC_CLNT_H
-#include "stack.h"
+#include <glusterfs/stack.h>
#include "rpc-transport.h"
-#include "timer.h"
+#include <glusterfs/timer.h>
#include "xdr-common.h"
+#include "glusterfs3.h"
typedef enum {
- RPC_CLNT_CONNECT,
- RPC_CLNT_DISCONNECT,
- RPC_CLNT_MSG
+ RPC_CLNT_CONNECT,
+ RPC_CLNT_DISCONNECT,
+ RPC_CLNT_PING,
+ RPC_CLNT_MSG,
+ RPC_CLNT_DESTROY
} rpc_clnt_event_t;
-
#define SFRAME_GET_PROGNUM(sframe) (sframe->rpcreq->prog->prognum)
#define SFRAME_GET_PROGVER(sframe) (sframe->rpcreq->prog->progver)
#define SFRAME_GET_PROCNUM(sframe) (sframe->rpcreq->procnum)
-struct xptr_clnt;
struct rpc_req;
struct rpc_clnt;
struct rpc_clnt_config;
struct rpc_clnt_program;
-typedef int (*rpc_clnt_notify_t) (struct rpc_clnt *rpc, void *mydata,
- rpc_clnt_event_t fn, void *data);
+typedef int (*rpc_clnt_notify_t)(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t fn, void *data);
-typedef int (*fop_cbk_fn_t) (struct rpc_req *req, struct iovec *iov, int count,
- void *myframe);
+typedef int (*fop_cbk_fn_t)(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe);
-typedef int (*clnt_fn_t) (call_frame_t *fr, xlator_t *xl, void *args);
+typedef int (*clnt_fn_t)(call_frame_t *fr, xlator_t *xl, void *args);
struct saved_frame {
- union {
- struct list_head list;
- struct {
- struct saved_frame *frame_next;
- struct saved_frame *frame_prev;
- };
- };
- void *capital_this;
- void *frame;
- struct timeval saved_at;
- struct rpc_req *rpcreq;
- rpc_transport_rsp_t rsp;
+ union {
+ struct list_head list;
+ struct {
+ struct saved_frame *frame_next;
+ struct saved_frame *frame_prev;
+ };
+ };
+ void *capital_this;
+ void *frame;
+ struct rpc_req *rpcreq;
+ struct timeval saved_at;
+ rpc_transport_rsp_t rsp;
};
struct saved_frames {
- int64_t count;
- struct saved_frame sf;
- struct saved_frame lk_sf;
+ int64_t count;
+ struct saved_frame sf;
+ struct saved_frame lk_sf;
};
-
/* Initialized by procnum */
typedef struct rpc_clnt_procedure {
- char *procname;
- clnt_fn_t fn;
+ char *procname;
+ clnt_fn_t fn;
} rpc_clnt_procedure_t;
typedef struct rpc_clnt_program {
- char *progname;
- int prognum;
- int progver;
- rpc_clnt_procedure_t *proctable;
- char **procnames;
- int numproc;
+ char *progname;
+ int prognum;
+ int progver;
+ rpc_clnt_procedure_t *proctable;
+ char **procnames;
+ int numproc;
} rpc_clnt_prog_t;
-typedef int (*rpcclnt_cb_fn) (void *data);
+typedef int (*rpcclnt_cb_fn)(struct rpc_clnt *rpc, void *mydata, void *data);
/* The descriptor for each procedure/actor that runs
* over the RPC service.
*/
typedef struct rpcclnt_actor_desc {
- char procname[32];
- int procnum;
- rpcclnt_cb_fn actor;
+ char procname[32];
+ rpcclnt_cb_fn actor;
+ int procnum;
} rpcclnt_cb_actor_t;
/* Describes a program and its version along with the function pointers
@@ -94,101 +94,113 @@ typedef struct rpcclnt_actor_desc {
* Never changed ever by any thread so no need for a lock.
*/
typedef struct rpcclnt_cb_program {
- char progname[32];
- int prognum;
- int progver;
- rpcclnt_cb_actor_t *actors; /* All procedure handlers */
- int numactors; /* Num actors in actor array */
+ char progname[32];
+ int prognum;
+ int progver;
+ rpcclnt_cb_actor_t *actors; /* All procedure handlers */
+ /* Program specific state handed to actors */
+ void *private;
- /* Program specific state handed to actors */
- void *private;
+ /* list member to link to list of registered services with rpc_clnt */
+ struct list_head program;
+ /* Needed for passing back in cb_actor */
+ void *mydata;
+ int numactors; /* Num actors in actor array */
- /* list member to link to list of registered services with rpc_clnt */
- struct list_head program;
} rpcclnt_cb_program_t;
-
-
typedef struct rpc_auth_data {
- int flavour;
- int datalen;
- char authdata[GF_MAX_AUTH_BYTES];
+ int flavour;
+ int datalen;
+ char authdata[GF_MAX_AUTH_BYTES];
} rpc_auth_data_t;
-
struct rpc_clnt_config {
- int rpc_timeout;
- int remote_port;
- char * remote_host;
+ int rpc_timeout;
+ int remote_port;
+ char *remote_host;
+ int ping_timeout;
};
-
-#define rpc_auth_flavour(au) ((au).flavour)
+#define rpc_auth_flavour(au) ((au).flavour)
struct rpc_clnt_connection {
- pthread_mutex_t lock;
- rpc_transport_t *trans;
- struct rpc_clnt_config config;
- gf_timer_t *reconnect;
- gf_timer_t *timer;
- gf_timer_t *ping_timer;
- struct rpc_clnt *rpc_clnt;
- char connected;
- struct saved_frames *saved_frames;
- int32_t frame_timeout;
- struct timeval last_sent;
- struct timeval last_received;
- int32_t ping_started;
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+ rpc_transport_t *trans;
+ struct rpc_clnt_config config;
+ gf_timer_t *reconnect;
+ gf_timer_t *timer;
+ gf_timer_t *ping_timer;
+ struct rpc_clnt *rpc_clnt;
+ struct saved_frames *saved_frames;
+ struct timespec last_sent;
+ struct timespec last_received;
+ uint64_t pingcnt;
+ uint64_t msgcnt;
+ uint64_t cleanup_gen;
+ char *name;
+ int32_t ping_started;
+ int32_t frame_timeout;
+ int32_t ping_timeout;
+ gf_boolean_t disconnected;
+ char connected;
};
typedef struct rpc_clnt_connection rpc_clnt_connection_t;
struct rpc_req {
- rpc_clnt_connection_t *conn;
- uint32_t xid;
- struct iovec req[2];
- int reqcnt;
- struct iobref *req_iobref;
- struct iovec rsp[2];
- int rspcnt;
- struct iobref *rsp_iobref;
- int rpc_status;
- rpc_auth_data_t verf;
- rpc_clnt_prog_t *prog;
- int procnum;
- fop_cbk_fn_t cbkfn;
- void *conn_private;
+ rpc_clnt_connection_t *conn;
+ struct iovec req[2];
+ struct iobref *req_iobref;
+ struct iovec rsp[2];
+ int rspcnt;
+ int reqcnt;
+ struct iobref *rsp_iobref;
+ rpc_clnt_prog_t *prog;
+ rpc_auth_data_t verf;
+ fop_cbk_fn_t cbkfn;
+ void *conn_private;
+ int procnum;
+ int rpc_status;
+ uint32_t xid;
};
typedef struct rpc_clnt {
- pthread_mutex_t lock;
- rpc_clnt_notify_t notifyfn;
- rpc_clnt_connection_t conn;
- void *mydata;
- uint64_t xid;
+ pthread_mutex_t lock;
+ rpc_clnt_notify_t notifyfn;
+ rpc_clnt_connection_t conn;
+ void *mydata;
+ gf_atomic_t xid;
- /* list of cb programs registered with rpc-clnt */
- struct list_head programs;
+ /* list of cb programs registered with rpc-clnt */
+ struct list_head programs;
- /* Memory pool for rpc_req_t */
- struct mem_pool *reqpool;
+ /* Memory pool for rpc_req_t */
+ struct mem_pool *reqpool;
- struct mem_pool *saved_frames_pool;
+ struct mem_pool *saved_frames_pool;
- glusterfs_ctx_t *ctx;
- int refcount;
- int auth_null;
- char disabled;
+ glusterfs_ctx_t *ctx;
+ gf_atomic_t refcount;
+ xlator_t *owner;
+ int auth_value;
+ char disabled;
} rpc_clnt_t;
+struct rpc_clnt *
+rpc_clnt_new(dict_t *options, xlator_t *owner, char *name,
+ uint32_t reqpool_size);
-struct rpc_clnt *rpc_clnt_new (dict_t *options, glusterfs_ctx_t *ctx,
- char *name, uint32_t reqpool_size);
+int
+rpc_clnt_start(struct rpc_clnt *rpc);
-int rpc_clnt_start (struct rpc_clnt *rpc);
+int
+rpc_clnt_cleanup_and_start(struct rpc_clnt *rpc);
-int rpc_clnt_register_notify (struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
- void *mydata);
+int
+rpc_clnt_register_notify(struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
+ void *mydata);
/* Some preconditions related to vectors holding responses.
* @rsphdr: should contain pointer to buffer which can hold response header
@@ -205,39 +217,44 @@ int rpc_clnt_register_notify (struct rpc_clnt *rpc, rpc_clnt_notify_t fn,
* of the header.
*/
-int rpc_clnt_submit (struct rpc_clnt *rpc, rpc_clnt_prog_t *prog,
- int procnum, fop_cbk_fn_t cbkfn,
- struct iovec *proghdr, int proghdrcount,
- struct iovec *progpayload, int progpayloadcount,
- struct iobref *iobref, void *frame, struct iovec *rsphdr,
- int rsphdr_count, struct iovec *rsp_payload,
- int rsp_payload_count, struct iobref *rsp_iobref);
+int
+rpc_clnt_submit(struct rpc_clnt *rpc, rpc_clnt_prog_t *prog, int procnum,
+ fop_cbk_fn_t cbkfn, struct iovec *proghdr, int proghdrcount,
+ struct iovec *progpayload, int progpayloadcount,
+ struct iobref *iobref, void *frame, struct iovec *rsphdr,
+ int rsphdr_count, struct iovec *rsp_payload,
+ int rsp_payload_count, struct iobref *rsp_iobref);
struct rpc_clnt *
-rpc_clnt_ref (struct rpc_clnt *rpc);
+rpc_clnt_ref(struct rpc_clnt *rpc);
struct rpc_clnt *
-rpc_clnt_unref (struct rpc_clnt *rpc);
-
-int rpc_clnt_connection_cleanup (rpc_clnt_connection_t *conn);
+rpc_clnt_unref(struct rpc_clnt *rpc);
-void rpc_clnt_set_connected (rpc_clnt_connection_t *conn);
+int
+rpc_clnt_connection_cleanup(rpc_clnt_connection_t *conn);
+int
+rpc_clnt_reconnect_cleanup(rpc_clnt_connection_t *conn);
+gf_boolean_t
+is_rpc_clnt_disconnected(rpc_clnt_connection_t *conn);
-void rpc_clnt_unset_connected (rpc_clnt_connection_t *conn);
-void rpc_clnt_reconnect (void *trans_ptr);
+void
+rpc_clnt_reconnect(void *trans_ptr);
-void rpc_clnt_reconfig (struct rpc_clnt *rpc, struct rpc_clnt_config *config);
+void
+rpc_clnt_reconfig(struct rpc_clnt *rpc, struct rpc_clnt_config *config);
/* All users of RPC services should use this API to register their
* procedure handlers.
*/
-int rpcclnt_cbk_program_register (struct rpc_clnt *svc,
- rpcclnt_cb_program_t *program);
+int
+rpcclnt_cbk_program_register(struct rpc_clnt *svc,
+ rpcclnt_cb_program_t *program, void *mydata);
int
-rpc_clnt_transport_unix_options_build (dict_t **options, char *filepath);
+rpc_clnt_disable(struct rpc_clnt *rpc);
-void
-rpc_clnt_disable (struct rpc_clnt *rpc);
+int
+rpc_clnt_mgmt_pmap_signout(glusterfs_ctx_t *ctx, char *brick_name);
#endif /* !_RPC_CLNT_H */
diff --git a/rpc/rpc-lib/src/rpc-drc.c b/rpc/rpc-lib/src/rpc-drc.c
new file mode 100644
index 00000000000..de8dc630626
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-drc.c
@@ -0,0 +1,855 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "rpcsvc.h"
+#ifndef RPC_DRC_H
+#include "rpc-drc.h"
+#endif
+#include <glusterfs/locking.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/mem-pool.h>
+
+#include <netinet/in.h>
+#include <unistd.h>
+
+/**
+ * rpcsvc_drc_op_destroy - Destroys the cached reply
+ *
+ * @param drc - the main drc structure
+ * @param reply - the cached reply to destroy
+ * @return NULL if reply is destroyed, reply otherwise
+ */
+static drc_cached_op_t *
+rpcsvc_drc_op_destroy(rpcsvc_drc_globals_t *drc, drc_cached_op_t *reply)
+{
+ GF_ASSERT(drc);
+ GF_ASSERT(reply);
+
+ if (reply->state == DRC_OP_IN_TRANSIT)
+ return reply;
+
+ iobref_unref(reply->msg.iobref);
+ if (reply->msg.rpchdr)
+ GF_FREE(reply->msg.rpchdr);
+ if (reply->msg.proghdr)
+ GF_FREE(reply->msg.proghdr);
+ if (reply->msg.progpayload)
+ GF_FREE(reply->msg.progpayload);
+
+ list_del(&reply->global_list);
+ reply->client->op_count--;
+ drc->op_count--;
+ mem_put(reply);
+ reply = NULL;
+
+ return reply;
+}
+
+/**
+ * rpcsvc_drc_op_rb_unref - This function is used in rb tree cleanup only
+ *
+ * @param reply - the cached reply to unref
+ * @param drc - the main drc structure
+ * @return void
+ */
+static void
+rpcsvc_drc_rb_op_destroy(void *reply, void *drc)
+{
+ rpcsvc_drc_op_destroy(drc, (drc_cached_op_t *)reply);
+}
+
+/**
+ * rpcsvc_remove_drc_client - Cleanup the drc client
+ *
+ * @param client - the drc client to be removed
+ * @return void
+ */
+static void
+rpcsvc_remove_drc_client(drc_client_t *client)
+{
+ rb_destroy(client->rbtree, rpcsvc_drc_rb_op_destroy);
+ list_del(&client->client_list);
+ GF_FREE(client);
+}
+
+/**
+ * rpcsvc_client_lookup - Given a sockaddr_storage, find the client if it exists
+ *
+ * @param drc - the main drc structure
+ * @param sockaddr - the network address of the client to be looked up
+ * @return drc client if it exists, NULL otherwise
+ */
+static drc_client_t *
+rpcsvc_client_lookup(rpcsvc_drc_globals_t *drc,
+ struct sockaddr_storage *sockaddr)
+{
+ drc_client_t *client = NULL;
+
+ GF_ASSERT(drc);
+ GF_ASSERT(sockaddr);
+
+ if (list_empty(&drc->clients_head))
+ return NULL;
+
+ list_for_each_entry(client, &drc->clients_head, client_list)
+ {
+ if (gf_sock_union_equal_addr(&client->sock_union,
+ (union gf_sock_union *)sockaddr))
+ return client;
+ }
+
+ return NULL;
+}
+
+/**
+ * drc_compare_reqs - Used by rbtree to determine if incoming req matches with
+ * an existing node(cached reply) in rbtree
+ *
+ * @param item - pointer to the incoming req
+ * @param rb_node_data - pointer to an rbtree node (cached reply)
+ * @param param - drc pointer - unused here, but used in *op_destroy
+ * @return 0 if req matches reply, else (req->xid - reply->xid)
+ */
+int
+drc_compare_reqs(const void *item, const void *rb_node_data, void *param)
+{
+ int ret = -1;
+ drc_cached_op_t *req = NULL;
+ drc_cached_op_t *reply = NULL;
+
+ GF_ASSERT(item);
+ GF_ASSERT(rb_node_data);
+ GF_ASSERT(param);
+
+ req = (drc_cached_op_t *)item;
+ reply = (drc_cached_op_t *)rb_node_data;
+
+ ret = req->xid - reply->xid;
+ if (ret != 0)
+ return ret;
+
+ if (req->prognum == reply->prognum && req->procnum == reply->procnum &&
+ req->progversion == reply->progversion)
+ return 0;
+
+ return 1;
+}
+
+/**
+ * drc_init_client_cache - initialize a drc client and its rb tree
+ *
+ * @param drc - the main drc structure
+ * @param client - the drc client to be initialized
+ * @return 0 on success, -1 on failure
+ */
+static int
+drc_init_client_cache(rpcsvc_drc_globals_t *drc, drc_client_t *client)
+{
+ GF_ASSERT(drc);
+ GF_ASSERT(client);
+
+ client->rbtree = rb_create(drc_compare_reqs, drc, NULL);
+ if (!client->rbtree) {
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "rb tree creation failed");
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * rpcsvc_get_drc_client - find the drc client with given sockaddr, else
+ * allocate and initialize a new drc client
+ *
+ * @param drc - the main drc structure
+ * @param sockaddr - network address of client
+ * @return drc client on success, NULL on failure
+ */
+static drc_client_t *
+rpcsvc_get_drc_client(rpcsvc_drc_globals_t *drc,
+ struct sockaddr_storage *sockaddr)
+{
+ drc_client_t *client = NULL;
+
+ GF_ASSERT(drc);
+ GF_ASSERT(sockaddr);
+
+ client = rpcsvc_client_lookup(drc, sockaddr);
+ if (client)
+ goto out;
+
+ /* if lookup fails, allocate cache for the new client */
+ client = GF_CALLOC(1, sizeof(drc_client_t), gf_common_mt_drc_client_t);
+ if (!client)
+ goto out;
+
+ GF_ATOMIC_INIT(client->ref, 0);
+ client->sock_union = (union gf_sock_union) * sockaddr;
+ client->op_count = 0;
+ INIT_LIST_HEAD(&client->client_list);
+
+ if (drc_init_client_cache(drc, client)) {
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "initialization of drc client failed");
+ GF_FREE(client);
+ client = NULL;
+ goto out;
+ }
+ drc->client_count++;
+
+ list_add(&client->client_list, &drc->clients_head);
+
+out:
+ return client;
+}
+
+/**
+ * rpcsvc_need_drc - Determine if a request needs DRC service
+ *
+ * @param req - incoming request
+ * @return 1 if DRC is needed for req, 0 otherwise
+ */
+int
+rpcsvc_need_drc(rpcsvc_request_t *req)
+{
+ rpcsvc_actor_t *actor = NULL;
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ GF_ASSERT(req);
+ GF_ASSERT(req->svc);
+
+ drc = req->svc->drc;
+
+ if (!drc || drc->status == DRC_UNINITIATED)
+ return 0;
+
+ actor = rpcsvc_program_actor(req);
+ if (!actor)
+ return 0;
+
+ return (actor->op_type == DRC_NON_IDEMPOTENT && drc->type != DRC_TYPE_NONE);
+}
+
+/**
+ * rpcsvc_drc_client_ref - ref the drc client
+ *
+ * @param client - the drc client to ref
+ * @return client
+ */
+static drc_client_t *
+rpcsvc_drc_client_ref(drc_client_t *client)
+{
+ GF_ASSERT(client);
+ GF_ATOMIC_INC(client->ref);
+ return client;
+}
+
+/**
+ * rpcsvc_drc_client_unref - unref the drc client, and destroy
+ * the client on last unref
+ *
+ * @param drc - the main drc structure
+ * @param client - the drc client to unref
+ * @return NULL if it is the last unref, client otherwise
+ */
+static drc_client_t *
+rpcsvc_drc_client_unref(rpcsvc_drc_globals_t *drc, drc_client_t *client)
+{
+ uint32_t refcount;
+
+ GF_ASSERT(drc);
+
+ refcount = GF_ATOMIC_DEC(client->ref);
+ if (!refcount) {
+ drc->client_count--;
+ rpcsvc_remove_drc_client(client);
+ client = NULL;
+ }
+
+ return client;
+}
+
+/**
+ * rpcsvc_drc_lookup - lookup a request to see if it is already cached
+ *
+ * @param req - incoming request
+ * @return cached reply of req if found, NULL otherwise
+ */
+drc_cached_op_t *
+rpcsvc_drc_lookup(rpcsvc_request_t *req)
+{
+ drc_client_t *client = NULL;
+ drc_cached_op_t *reply = NULL;
+ drc_cached_op_t new = {
+ .xid = req->xid,
+ .prognum = req->prognum,
+ .progversion = req->progver,
+ .procnum = req->procnum,
+ };
+
+ GF_ASSERT(req);
+
+ if (!req->trans->drc_client) {
+ client = rpcsvc_get_drc_client(req->svc->drc,
+ &req->trans->peerinfo.sockaddr);
+ if (!client)
+ goto out;
+
+ req->trans->drc_client = rpcsvc_drc_client_ref(client);
+ }
+
+ client = req->trans->drc_client;
+
+ if (client->op_count == 0)
+ goto out;
+
+ reply = rb_find(client->rbtree, &new);
+
+out:
+ return reply;
+}
+
+/**
+ * rpcsvc_send_cached_reply - send the cached reply for the incoming request
+ *
+ * @param req - incoming request (which is a duplicate in this case)
+ * @param reply - the cached reply for req
+ * @return 0 on successful reply submission, -1 or other non-zero value
+ * otherwise
+ */
+int
+rpcsvc_send_cached_reply(rpcsvc_request_t *req, drc_cached_op_t *reply)
+{
+ int ret = 0;
+
+ GF_ASSERT(req);
+ GF_ASSERT(reply);
+
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "sending cached reply: xid: %d, "
+ "client: %s",
+ req->xid, req->trans->peerinfo.identifier);
+
+ rpcsvc_drc_client_ref(reply->client);
+ ret = rpcsvc_transport_submit(
+ req->trans, reply->msg.rpchdr, reply->msg.rpchdrcount,
+ reply->msg.proghdr, reply->msg.proghdrcount, reply->msg.progpayload,
+ reply->msg.progpayloadcount, reply->msg.iobref, req->trans_private);
+ rpcsvc_drc_client_unref(req->svc->drc, reply->client);
+
+ return ret;
+}
+
+/**
+ * rpcsvc_cache_reply - cache the reply for the processed request 'req'
+ *
+ * @param req - processed request
+ * @param iobref - iobref structure of the reply
+ * @param rpchdr - rpc header of the reply
+ * @param rpchdrcount - size of rpchdr
+ * @param proghdr - program header of the reply
+ * @param proghdrcount - size of proghdr
+ * @param payload - payload of the reply if any
+ * @param payloadcount - size of payload
+ * @return 0 on success, -1 on failure
+ */
+int
+rpcsvc_cache_reply(rpcsvc_request_t *req, struct iobref *iobref,
+ struct iovec *rpchdr, int rpchdrcount, struct iovec *proghdr,
+ int proghdrcount, struct iovec *payload, int payloadcount)
+{
+ int ret = -1;
+ drc_cached_op_t *reply = NULL;
+
+ GF_ASSERT(req);
+ GF_ASSERT(req->reply);
+
+ reply = req->reply;
+
+ reply->state = DRC_OP_CACHED;
+
+ reply->msg.iobref = iobref_ref(iobref);
+
+ reply->msg.rpchdrcount = rpchdrcount;
+ reply->msg.rpchdr = iov_dup(rpchdr, rpchdrcount);
+
+ reply->msg.proghdrcount = proghdrcount;
+ reply->msg.proghdr = iov_dup(proghdr, proghdrcount);
+
+ reply->msg.progpayloadcount = payloadcount;
+ if (payloadcount)
+ reply->msg.progpayload = iov_dup(payload, payloadcount);
+
+ // rpcsvc_drc_client_unref (req->svc->drc, req->trans->drc_client);
+ // rpcsvc_drc_op_unref (req->svc->drc, reply);
+ ret = 0;
+
+ return ret;
+}
+
+/**
+ * rpcsvc_vacate_drc_entries - free up some percentage of drc cache
+ * based on the lru factor
+ *
+ * @param drc - the main drc structure
+ * @return void
+ */
+static void
+rpcsvc_vacate_drc_entries(rpcsvc_drc_globals_t *drc)
+{
+ uint32_t i = 0;
+ uint32_t n = 0;
+ drc_cached_op_t *reply = NULL;
+ drc_cached_op_t *tmp = NULL;
+ drc_client_t *client = NULL;
+
+ GF_ASSERT(drc);
+
+ n = drc->global_cache_size / drc->lru_factor;
+
+ list_for_each_entry_safe_reverse(reply, tmp, &drc->cache_head, global_list)
+ {
+ /* Don't delete ops that are in transit */
+ if (reply->state == DRC_OP_IN_TRANSIT)
+ continue;
+
+ client = reply->client;
+
+ rb_delete(client->rbtree, reply);
+
+ rpcsvc_drc_op_destroy(drc, reply);
+ rpcsvc_drc_client_unref(drc, client);
+ i++;
+ if (i >= n)
+ break;
+ }
+}
+
+/**
+ * rpcsvc_add_op_to_cache - insert the cached op into the client rbtree and drc
+ * list
+ *
+ * @param drc - the main drc structure
+ * @param reply - the op to be inserted
+ * @return 0 on success, -1 on failure
+ */
+static int
+rpcsvc_add_op_to_cache(rpcsvc_drc_globals_t *drc, drc_cached_op_t *reply)
+{
+ drc_client_t *client = NULL;
+ drc_cached_op_t **tmp_reply = NULL;
+
+ GF_ASSERT(drc);
+ GF_ASSERT(reply);
+
+ client = reply->client;
+
+ /* cache is full, free up some space */
+ if (drc->op_count >= drc->global_cache_size)
+ rpcsvc_vacate_drc_entries(drc);
+
+ tmp_reply = (drc_cached_op_t **)rb_probe(client->rbtree, reply);
+ if (!tmp_reply) {
+ /* mem alloc failed */
+ return -1;
+ } else if (*tmp_reply != reply) {
+ /* should never happen */
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "DRC failed to detect duplicates");
+ return -1;
+ }
+
+ client->op_count++;
+ list_add(&reply->global_list, &drc->cache_head);
+ drc->op_count++;
+
+ return 0;
+}
+
+/**
+ * rpcsvc_cache_request - cache the in-transition incoming request
+ *
+ * @param req - incoming request
+ * @return 0 on success, -1 on failure
+ */
+int
+rpcsvc_cache_request(rpcsvc_request_t *req)
+{
+ int ret = -1;
+ drc_client_t *client = NULL;
+ drc_cached_op_t *reply = NULL;
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ GF_ASSERT(req);
+
+ drc = req->svc->drc;
+
+ client = req->trans->drc_client;
+ if (!client) {
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "drc client is NULL");
+ goto out;
+ }
+
+ reply = mem_get0(drc->mempool);
+ if (!reply)
+ goto out;
+
+ reply->client = rpcsvc_drc_client_ref(client);
+ reply->xid = req->xid;
+ reply->prognum = req->prognum;
+ reply->progversion = req->progver;
+ reply->procnum = req->procnum;
+ reply->state = DRC_OP_IN_TRANSIT;
+ req->reply = reply;
+ INIT_LIST_HEAD(&reply->global_list);
+
+ ret = rpcsvc_add_op_to_cache(drc, reply);
+ if (ret) {
+ req->reply = NULL;
+ rpcsvc_drc_op_destroy(drc, reply);
+ rpcsvc_drc_client_unref(drc, client);
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "Failed to add op to drc cache");
+ }
+
+out:
+ return ret;
+}
+
+/**
+ *
+ * rpcsvc_drc_priv - function which dumps the drc state
+ *
+ * @param drc - the main drc structure
+ * @return 0 on success, -1 on failure
+ */
+int32_t
+rpcsvc_drc_priv(rpcsvc_drc_globals_t *drc)
+{
+ int i = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {0};
+ drc_client_t *client = NULL;
+ char ip[INET6_ADDRSTRLEN] = {0};
+
+ if (!drc || drc->status == DRC_UNINITIATED) {
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "DRC is "
+ "uninitialized, not dumping its state");
+ return 0;
+ }
+
+ gf_proc_dump_add_section("rpc.drc");
+
+ if (TRY_LOCK(&drc->lock))
+ return -1;
+
+ gf_proc_dump_build_key(key, "drc", "type");
+ gf_proc_dump_write(key, "%d", drc->type);
+
+ gf_proc_dump_build_key(key, "drc", "client_count");
+ gf_proc_dump_write(key, "%d", drc->client_count);
+
+ gf_proc_dump_build_key(key, "drc", "current_cache_size");
+ gf_proc_dump_write(key, "%d", drc->op_count);
+
+ gf_proc_dump_build_key(key, "drc", "max_cache_size");
+ gf_proc_dump_write(key, "%d", drc->global_cache_size);
+
+ gf_proc_dump_build_key(key, "drc", "lru_factor");
+ gf_proc_dump_write(key, "%d", drc->lru_factor);
+
+ gf_proc_dump_build_key(key, "drc", "duplicate_request_count");
+ gf_proc_dump_write(key, "%" PRIu64, drc->cache_hits);
+
+ gf_proc_dump_build_key(key, "drc", "in_transit_duplicate_requests");
+ gf_proc_dump_write(key, "%" PRIu64, drc->intransit_hits);
+
+ list_for_each_entry(client, &drc->clients_head, client_list)
+ {
+ gf_proc_dump_build_key(key, "client", "%d.ip-address", i);
+ memset(ip, 0, INET6_ADDRSTRLEN);
+ switch (client->sock_union.storage.ss_family) {
+ case AF_INET:
+ gf_proc_dump_write(
+ key, "%s",
+ inet_ntop(AF_INET, &client->sock_union.sin.sin_addr.s_addr,
+ ip, INET_ADDRSTRLEN));
+ break;
+ case AF_INET6:
+ gf_proc_dump_write(
+ key, "%s",
+ inet_ntop(AF_INET6, &client->sock_union.sin6.sin6_addr, ip,
+ INET6_ADDRSTRLEN));
+ break;
+ default:
+ gf_proc_dump_write(key, "%s", "N/A");
+ }
+
+ gf_proc_dump_build_key(key, "client", "%d.ref_count", i);
+ gf_proc_dump_write(key, "%" PRIu32, GF_ATOMIC_GET(client->ref));
+ gf_proc_dump_build_key(key, "client", "%d.op_count", i);
+ gf_proc_dump_write(key, "%d", client->op_count);
+ i++;
+ }
+
+ UNLOCK(&drc->lock);
+ return 0;
+}
+
+/**
+ * rpcsvc_drc_notify - function which is notified of RPC transport events
+ *
+ * @param svc - pointer to rpcsvc_t structure of the rpc
+ * @param xl - pointer to the xlator
+ * @param event - the event which triggered this notify
+ * @param data - the transport structure
+ * @return 0 on success, -1 on failure
+ */
+int
+rpcsvc_drc_notify(rpcsvc_t *svc, void *xl, rpcsvc_event_t event, void *data)
+{
+ int ret = -1;
+ rpc_transport_t *trans = NULL;
+ drc_client_t *client = NULL;
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ GF_ASSERT(svc);
+ GF_ASSERT(svc->drc);
+ GF_ASSERT(data);
+
+ drc = svc->drc;
+
+ if (drc->status == DRC_UNINITIATED || drc->type == DRC_TYPE_NONE)
+ return 0;
+
+ LOCK(&drc->lock);
+ {
+ trans = (rpc_transport_t *)data;
+ client = rpcsvc_get_drc_client(drc, &trans->peerinfo.sockaddr);
+ if (!client)
+ goto unlock;
+
+ switch (event) {
+ case RPCSVC_EVENT_ACCEPT:
+ trans->drc_client = rpcsvc_drc_client_ref(client);
+ ret = 0;
+ break;
+
+ case RPCSVC_EVENT_DISCONNECT:
+ ret = 0;
+ if (list_empty(&drc->clients_head))
+ break;
+ /* should be the last unref */
+ trans->drc_client = NULL;
+ rpcsvc_drc_client_unref(drc, client);
+ break;
+
+ default:
+ break;
+ }
+ }
+unlock:
+ UNLOCK(&drc->lock);
+ return ret;
+}
+
+/**
+ * rpcsvc_drc_init - Initialize the duplicate request cache service
+ *
+ * @param svc - pointer to rpcsvc_t structure of the rpc
+ * @param options - the options dictionary which configures drc
+ * @return 0 on success, non-zero integer on failure
+ */
+int
+rpcsvc_drc_init(rpcsvc_t *svc, dict_t *options)
+{
+ int ret = 0;
+ uint32_t drc_type = 0;
+ uint32_t drc_size = 0;
+ uint32_t drc_factor = 0;
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ GF_ASSERT(svc);
+ GF_ASSERT(options);
+
+ /* Toggle DRC on/off, when more drc types(persistent/cluster)
+ * are added, we shouldn't treat this as boolean. */
+ ret = dict_get_str_boolean(options, "nfs.drc", _gf_false);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_INFO, "drc user options need second look");
+ ret = _gf_false;
+ }
+
+ gf_log(GF_RPCSVC, GF_LOG_INFO, "DRC is turned %s", (ret ? "ON" : "OFF"));
+
+ /*DRC off, nothing to do */
+ if (ret == _gf_false)
+ return (0);
+
+ drc = GF_CALLOC(1, sizeof(rpcsvc_drc_globals_t),
+ gf_common_mt_drc_globals_t);
+ if (!drc)
+ return (-1);
+
+ LOCK_INIT(&drc->lock);
+ svc->drc = drc;
+
+ /* Specify type of DRC to be used */
+ ret = dict_get_uint32(options, "nfs.drc-type", &drc_type);
+ if (ret) {
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "drc type not set. Continuing with default");
+ drc_type = DRC_DEFAULT_TYPE;
+ }
+
+ /* Set the global cache size (no. of ops to cache) */
+ ret = dict_get_uint32(options, "nfs.drc-size", &drc_size);
+ if (ret) {
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "drc size not set. Continuing with default size");
+ drc_size = DRC_DEFAULT_CACHE_SIZE;
+ }
+
+ LOCK(&drc->lock);
+
+ drc->type = drc_type;
+ drc->global_cache_size = drc_size;
+
+ /* Mempool for cached ops */
+ drc->mempool = mem_pool_new(drc_cached_op_t, drc->global_cache_size);
+ if (!drc->mempool) {
+ UNLOCK(&drc->lock);
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Failed to get mempool for DRC, drc-size: %d", drc_size);
+ ret = -1;
+ goto post_unlock;
+ }
+
+ /* What percent of cache to be evicted whenever it fills up */
+ ret = dict_get_uint32(options, "nfs.drc-lru-factor", &drc_factor);
+ if (ret) {
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "drc lru factor not set. Continuing with policy default");
+ drc_factor = DRC_DEFAULT_LRU_FACTOR;
+ }
+
+ drc->lru_factor = (drc_lru_factor_t)drc_factor;
+
+ INIT_LIST_HEAD(&drc->clients_head);
+ INIT_LIST_HEAD(&drc->cache_head);
+
+ ret = rpcsvc_register_notify(svc, rpcsvc_drc_notify, THIS);
+ if (ret) {
+ UNLOCK(&drc->lock);
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "registration of drc_notify function failed");
+ goto post_unlock;
+ }
+
+ drc->status = DRC_INITIATED;
+ UNLOCK(&drc->lock);
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "drc init successful");
+post_unlock:
+ if (ret == -1) {
+ if (drc->mempool) {
+ mem_pool_destroy(drc->mempool);
+ drc->mempool = NULL;
+ }
+ GF_FREE(drc);
+ svc->drc = NULL;
+ }
+ return ret;
+}
+
+int
+rpcsvc_drc_deinit(rpcsvc_t *svc)
+{
+ rpcsvc_drc_globals_t *drc = NULL;
+
+ if (!svc)
+ return (-1);
+
+ drc = svc->drc;
+ if (!drc)
+ return (0);
+
+ LOCK(&drc->lock);
+ (void)rpcsvc_unregister_notify(svc, rpcsvc_drc_notify, THIS);
+ if (drc->mempool) {
+ mem_pool_destroy(drc->mempool);
+ drc->mempool = NULL;
+ }
+ UNLOCK(&drc->lock);
+
+ GF_FREE(drc);
+ svc->drc = NULL;
+
+ return (0);
+}
+
+int
+rpcsvc_drc_reconfigure(rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+ gf_boolean_t enable_drc = _gf_false;
+ rpcsvc_drc_globals_t *drc = NULL;
+ uint32_t drc_size = 0;
+
+ /* Input sanitization */
+ if ((!svc) || (!options))
+ return (-1);
+
+ /* If DRC was not enabled before, Let rpcsvc_drc_init() to
+ * take care of DRC initialization part.
+ */
+ drc = svc->drc;
+ if (!drc) {
+ return rpcsvc_drc_init(svc, options);
+ }
+
+ /* DRC was already enabled before. Going to be reconfigured. Check
+ * if reconfigured options contain "nfs.drc" and "nfs.drc-size".
+ *
+ * NB: If DRC is "OFF", "drc-size" has no role to play.
+ * So, "drc-size" gets evaluated IFF DRC is "ON".
+ *
+ * If DRC is reconfigured,
+ * case 1: DRC is "ON"
+ * sub-case 1: drc-size remains same
+ * ACTION: Nothing to do.
+ * sub-case 2: drc-size just changed
+ * ACTION: rpcsvc_drc_deinit() followed by
+ * rpcsvc_drc_init().
+ *
+ * case 2: DRC is "OFF"
+ * ACTION: rpcsvc_drc_deinit()
+ */
+ ret = dict_get_str_boolean(options, "nfs.drc", _gf_false);
+ if (ret < 0)
+ ret = _gf_false;
+
+ enable_drc = ret;
+ gf_log(GF_RPCSVC, GF_LOG_INFO, "DRC is turned %s", (ret ? "ON" : "OFF"));
+
+ /* case 1: DRC is "ON"*/
+ if (enable_drc) {
+ /* Fetch drc-size if reconfigured */
+ if (dict_get_uint32(options, "nfs.drc-size", &drc_size))
+ drc_size = DRC_DEFAULT_CACHE_SIZE;
+
+ /* case 1: sub-case 1*/
+ if (drc->global_cache_size == drc_size)
+ return (0);
+
+ /* case 1: sub-case 2*/
+ (void)rpcsvc_drc_deinit(svc);
+ return rpcsvc_drc_init(svc, options);
+ }
+
+ /* case 2: DRC is "OFF" */
+ return rpcsvc_drc_deinit(svc);
+}
diff --git a/rpc/rpc-lib/src/rpc-drc.h b/rpc/rpc-lib/src/rpc-drc.h
new file mode 100644
index 00000000000..ce66430809b
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-drc.h
@@ -0,0 +1,97 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef RPC_DRC_H
+#define RPC_DRC_H
+
+#include "rpcsvc-common.h"
+#include "rpcsvc.h"
+#include <glusterfs/locking.h>
+#include <glusterfs/dict.h>
+#include "rb.h"
+
+/* per-client cache structure */
+struct drc_client {
+ union gf_sock_union sock_union;
+ /* pointers to the cache */
+ struct rb_table *rbtree;
+ /* no. of ops currently cached */
+ uint32_t op_count;
+ gf_atomic_uint32_t ref;
+ struct list_head client_list;
+};
+
+struct drc_cached_op {
+ drc_op_state_t state;
+ int prognum;
+ int progversion;
+ int procnum;
+ rpc_transport_msg_t msg;
+ drc_client_t *client;
+ struct list_head client_list;
+ struct list_head global_list;
+ int32_t ref;
+ uint32_t xid;
+};
+
+/* global drc definitions */
+enum drc_status { DRC_UNINITIATED, DRC_INITIATED };
+typedef enum drc_status drc_status_t;
+
+struct drc_globals {
+ /* allocator must be the first member since
+ * it is used so in gf_libavl_allocator
+ */
+ struct libavl_allocator allocator;
+ /* configurable size parameter */
+ gf_lock_t lock;
+ uint64_t cache_hits;
+ uint64_t intransit_hits;
+ struct mem_pool *mempool;
+ struct list_head cache_head;
+ struct list_head clients_head;
+ uint32_t op_count;
+ uint32_t client_count;
+ uint32_t global_cache_size;
+ drc_type_t type;
+ drc_lru_factor_t lru_factor;
+ drc_status_t status;
+};
+
+int
+rpcsvc_need_drc(rpcsvc_request_t *req);
+
+drc_cached_op_t *
+rpcsvc_drc_lookup(rpcsvc_request_t *req);
+
+int
+rpcsvc_send_cached_reply(rpcsvc_request_t *req, drc_cached_op_t *reply);
+
+int
+rpcsvc_cache_reply(rpcsvc_request_t *req, struct iobref *iobref,
+ struct iovec *rpchdr, int rpchdrcount, struct iovec *proghdr,
+ int proghdrcount, struct iovec *payload, int payloadcount);
+
+int
+rpcsvc_cache_request(rpcsvc_request_t *req);
+
+int32_t
+rpcsvc_drc_priv(rpcsvc_drc_globals_t *drc);
+
+int
+rpcsvc_drc_init(rpcsvc_t *svc, dict_t *options);
+
+int
+rpcsvc_drc_deinit(rpcsvc_t *svc);
+
+int
+rpcsvc_drc_reconfigure(rpcsvc_t *svc, dict_t *options);
+
+#endif /* RPC_DRC_H */
diff --git a/rpc/rpc-lib/src/rpc-lib-messages.h b/rpc/rpc-lib/src/rpc-lib-messages.h
new file mode 100644
index 00000000000..2c0b820dbf9
--- /dev/null
+++ b/rpc/rpc-lib/src/rpc-lib-messages.h
@@ -0,0 +1,34 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _RPC_LIB_MESSAGES_H_
+#define _RPC_LIB_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(RPC_LIB, TRANS_MSG_ADDR_FAMILY_NOT_SPECIFIED,
+ TRANS_MSG_UNKNOWN_ADDR_FAMILY, TRANS_MSG_REMOTE_HOST_ERROR,
+ TRANS_MSG_DNS_RESOL_FAILED, TRANS_MSG_LISTEN_PATH_ERROR,
+ TRANS_MSG_CONNECT_PATH_ERROR, TRANS_MSG_GET_ADDR_INFO_FAILED,
+ TRANS_MSG_PORT_BIND_FAILED, TRANS_MSG_INET_ERROR,
+ TRANS_MSG_GET_NAME_INFO_FAILED, TRANS_MSG_TRANSPORT_ERROR,
+ TRANS_MSG_TIMEOUT_EXCEEDED, TRANS_MSG_SOCKET_BIND_ERROR);
+
+#endif /* !_RPC_LIB_MESSAGES_H_ */
diff --git a/rpc/rpc-lib/src/rpc-transport.c b/rpc/rpc-lib/src/rpc-transport.c
index 4b693742449..a6e201a9b36 100644
--- a/rpc/rpc-lib/src/rpc-transport.c
+++ b/rpc/rpc-lib/src/rpc-transport.c
@@ -12,614 +12,661 @@
#include <stdlib.h>
#include <stdio.h>
#include <sys/poll.h>
-#include <fnmatch.h>
#include <stdint.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "logging.h"
#include "rpc-transport.h"
-#include "glusterfs.h"
-/* FIXME: xlator.h is needed for volume_option_t, need to define the datatype
- * in some other header
- */
-#include "xlator.h"
-#include "list.h"
#ifndef GF_OPTION_LIST_EMPTY
#define GF_OPTION_LIST_EMPTY(_opt) (_opt->value[0] == NULL)
#endif
+int32_t
+rpc_transport_count(const char *transport_type)
+{
+ char *transport_dup = NULL;
+ char *saveptr = NULL;
+ char *ptr = NULL;
+ int count = 0;
+
+ if (transport_type == NULL)
+ return -1;
+
+ transport_dup = gf_strdup(transport_type);
+ if (transport_dup == NULL) {
+ return -1;
+ }
+
+ ptr = strtok_r(transport_dup, ",", &saveptr);
+ while (ptr != NULL) {
+ count++;
+ ptr = strtok_r(NULL, ",", &saveptr);
+ }
+
+ GF_FREE(transport_dup);
+ return count;
+}
int
-rpc_transport_get_myaddr (rpc_transport_t *this, char *peeraddr, int addrlen,
- struct sockaddr_storage *sa, size_t salen)
+rpc_transport_get_myaddr(rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, size_t salen)
{
- int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", this, out);
+ int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO("rpc", this, out);
- ret = this->ops->get_myaddr (this, peeraddr, addrlen, sa, salen);
+ ret = this->ops->get_myaddr(this, peeraddr, addrlen, sa, salen);
out:
- return ret;
+ return ret;
}
int32_t
-rpc_transport_get_myname (rpc_transport_t *this, char *hostname, int hostlen)
+rpc_transport_get_peername(rpc_transport_t *this, char *hostname, int hostlen)
{
- int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", this, out);
+ int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO("rpc", this, out);
- ret = this->ops->get_myname (this, hostname, hostlen);
+ ret = this->ops->get_peername(this, hostname, hostlen);
out:
- return ret;
+ return ret;
}
-int32_t
-rpc_transport_get_peername (rpc_transport_t *this, char *hostname, int hostlen)
+int
+rpc_transport_throttle(rpc_transport_t *this, gf_boolean_t onoff)
{
- int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", this, out);
+ if (!this->ops->throttle)
+ return -ENOSYS;
- ret = this->ops->get_peername (this, hostname, hostlen);
-out:
- return ret;
+ return this->ops->throttle(this, onoff);
}
int32_t
-rpc_transport_get_peeraddr (rpc_transport_t *this, char *peeraddr, int addrlen,
- struct sockaddr_storage *sa, size_t salen)
+rpc_transport_get_peeraddr(rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, size_t salen)
{
- int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", this, out);
+ int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO("rpc", this, out);
- ret = this->ops->get_peeraddr (this, peeraddr, addrlen, sa, salen);
+ ret = this->ops->get_peeraddr(this, peeraddr, addrlen, sa, salen);
out:
- return ret;
+ return ret;
}
void
-rpc_transport_pollin_destroy (rpc_transport_pollin_t *pollin)
+rpc_transport_pollin_destroy(rpc_transport_pollin_t *pollin)
{
- GF_VALIDATE_OR_GOTO ("rpc", pollin, out);
-
- if (pollin->iobref) {
- iobref_unref (pollin->iobref);
- }
+ GF_VALIDATE_OR_GOTO("rpc", pollin, out);
- if (pollin->hdr_iobuf) {
- iobuf_unref (pollin->hdr_iobuf);
- }
+ if (pollin->iobref) {
+ iobref_unref(pollin->iobref);
+ }
- if (pollin->private) {
- /* */
- GF_FREE (pollin->private);
- }
+ if (pollin->private) {
+ /* */
+ GF_FREE(pollin->private);
+ }
- GF_FREE (pollin);
+ GF_FREE(pollin);
out:
- return;
+ return;
}
-
rpc_transport_pollin_t *
-rpc_transport_pollin_alloc (rpc_transport_t *this, struct iovec *vector,
- int count, struct iobuf *hdr_iobuf,
- struct iobref *iobref, void *private)
+rpc_transport_pollin_alloc(rpc_transport_t *this, struct iovec *vector,
+ int count, struct iobuf *hdr_iobuf,
+ struct iobref *iobref, void *private)
{
- rpc_transport_pollin_t *msg = NULL;
- msg = GF_CALLOC (1, sizeof (*msg), gf_common_mt_rpc_trans_pollin_t);
- if (!msg) {
- goto out;
- }
+ rpc_transport_pollin_t *msg = NULL;
+ msg = GF_CALLOC(1, sizeof(*msg), gf_common_mt_rpc_trans_pollin_t);
+ if (!msg) {
+ goto out;
+ }
- if (count > 1) {
- msg->vectored = 1;
- }
+ msg->trans = this;
- memcpy (msg->vector, vector, count * sizeof (*vector));
- msg->count = count;
- msg->iobref = iobref_ref (iobref);
- msg->private = private;
- if (hdr_iobuf)
- msg->hdr_iobuf = iobuf_ref (hdr_iobuf);
+ if (count > 1) {
+ msg->vectored = 1;
+ }
+
+ memcpy(msg->vector, vector, count * sizeof(*vector));
+ msg->count = count;
+ msg->iobref = iobref_ref(iobref);
+ msg->private = private;
+ if (hdr_iobuf)
+ iobref_add(iobref, hdr_iobuf);
out:
- return msg;
+ return msg;
}
+void
+rpc_transport_cleanup(rpc_transport_t *trans)
+{
+ if (!trans)
+ return;
+
+ if (trans->fini)
+ trans->fini(trans);
+
+ if (trans->options) {
+ dict_unref(trans->options);
+ trans->options = NULL;
+ }
+
+ GF_FREE(trans->name);
+ if (trans->xl)
+ pthread_mutex_destroy(&trans->lock);
+
+ if (trans->dl_handle)
+ dlclose(trans->dl_handle);
+
+ GF_FREE(trans);
+}
rpc_transport_t *
-rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
+rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *trans_name)
{
- struct rpc_transport *trans = NULL, *return_trans = NULL;
- char *name = NULL;
- void *handle = NULL;
- char *type = NULL;
- char str[] = "ERROR";
- int32_t ret = -1;
- int8_t is_tcp = 0, is_unix = 0, is_ibsdp = 0;
- volume_opt_list_t *vol_opt = NULL;
- gf_boolean_t bind_insecure = _gf_false;
-
- GF_VALIDATE_OR_GOTO("rpc-transport", options, fail);
- GF_VALIDATE_OR_GOTO("rpc-transport", ctx, fail);
- GF_VALIDATE_OR_GOTO("rpc-transport", trans_name, fail);
-
- trans = GF_CALLOC (1, sizeof (struct rpc_transport), gf_common_mt_rpc_trans_t);
- if (!trans)
- goto fail;
-
- trans->name = gf_strdup (trans_name);
- if (!trans->name)
- goto fail;
-
- trans->ctx = ctx;
- type = str;
-
- /* Backward compatibility */
- ret = dict_get_str (options, "transport-type", &type);
- if (ret < 0) {
- ret = dict_set_str (options, "transport-type", "socket");
- if (ret < 0)
- gf_log ("dict", GF_LOG_DEBUG,
- "setting transport-type failed");
- else
- gf_log ("rpc-transport", GF_LOG_WARNING,
- "missing 'option transport-type'. defaulting to "
- "\"socket\"");
- } else {
- {
- /* Backword compatibility to handle * /client,
- * * /server.
- */
- char *tmp = strchr (type, '/');
- if (tmp)
- *tmp = '\0';
- }
-
- is_tcp = strcmp (type, "tcp");
- is_unix = strcmp (type, "unix");
- is_ibsdp = strcmp (type, "ib-sdp");
- if ((is_tcp == 0) ||
- (is_unix == 0) ||
- (is_ibsdp == 0)) {
- if (is_unix == 0)
- ret = dict_set_str (options,
- "transport.address-family",
- "unix");
- if (is_ibsdp == 0)
- ret = dict_set_str (options,
- "transport.address-family",
- "inet-sdp");
-
- if (ret < 0)
- gf_log ("dict", GF_LOG_DEBUG,
- "setting address-family failed");
-
- ret = dict_set_str (options,
- "transport-type", "socket");
- if (ret < 0)
- gf_log ("dict", GF_LOG_DEBUG,
- "setting transport-type failed");
- }
- }
-
- /* client-bind-insecure is for clients protocol, and
- * bind-insecure for glusterd. Both mutually exclusive
- */
- ret = dict_get_str (options, "client-bind-insecure", &type);
- if (ret)
- ret = dict_get_str (options, "bind-insecure", &type);
- if (ret == 0) {
- ret = gf_string2boolean (type, &bind_insecure);
- if (ret < 0) {
- gf_log ("rcp-transport", GF_LOG_WARNING,
- "bind-insecure option %s is not a"
- " valid bool option", type);
- goto fail;
- }
- if (_gf_true == bind_insecure)
- trans->bind_insecure = 1;
- else
- trans->bind_insecure = 0;
- } else {
- trans->bind_insecure = 0;
+ struct rpc_transport *trans = NULL, *return_trans = NULL;
+ char *name = NULL;
+ void *handle = NULL;
+ char *type = NULL;
+ static char str[] = "ERROR";
+ int32_t ret = -1;
+ int is_tcp = 0, is_unix = 0, is_ibsdp = 0;
+ volume_opt_list_t *vol_opt = NULL;
+ gf_boolean_t bind_insecure = _gf_false;
+ xlator_t *this = NULL;
+ gf_boolean_t success = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("rpc-transport", options, fail);
+ GF_VALIDATE_OR_GOTO("rpc-transport", ctx, fail);
+ GF_VALIDATE_OR_GOTO("rpc-transport", trans_name, fail);
+
+ trans = GF_CALLOC(1, sizeof(struct rpc_transport),
+ gf_common_mt_rpc_trans_t);
+ if (!trans)
+ goto fail;
+
+ trans->name = gf_strdup(trans_name);
+ if (!trans->name)
+ goto fail;
+
+ trans->ctx = ctx;
+ type = str;
+
+ /* Backward compatibility */
+ ret = dict_get_str_sizen(options, "transport-type", &type);
+ if (ret < 0) {
+ ret = dict_set_str_sizen(options, "transport-type", "socket");
+ if (ret < 0)
+ gf_log("dict", GF_LOG_DEBUG, "setting transport-type failed");
+ else
+ gf_log("rpc-transport", GF_LOG_DEBUG,
+ "missing 'option transport-type'. defaulting to "
+ "\"socket\"");
+ } else {
+ {
+ /* Backward compatibility to handle * /client,
+ * * /server.
+ */
+ char *tmp = strchr(type, '/');
+ if (tmp)
+ *tmp = '\0';
}
- ret = dict_get_str (options, "transport-type", &type);
- if (ret < 0) {
- gf_log ("rpc-transport", GF_LOG_ERROR,
- "'option transport-type <xx>' missing in volume '%s'",
- trans_name);
- goto fail;
- }
-
- ret = gf_asprintf (&name, "%s/%s.so", RPC_TRANSPORTDIR, type);
- if (-1 == ret) {
- goto fail;
+ is_tcp = strcmp(type, "tcp");
+ is_unix = strcmp(type, "unix");
+ is_ibsdp = strcmp(type, "ib-sdp");
+ if ((is_tcp == 0) || (is_unix == 0) || (is_ibsdp == 0)) {
+ if (is_unix == 0)
+ ret = dict_set_str_sizen(options, "transport.address-family",
+ "unix");
+ if (is_ibsdp == 0)
+ ret = dict_set_str_sizen(options, "transport.address-family",
+ "inet-sdp");
+
+ if (ret < 0)
+ gf_log("dict", GF_LOG_DEBUG, "setting address-family failed");
+
+ ret = dict_set_str_sizen(options, "transport-type", "socket");
+ if (ret < 0)
+ gf_log("dict", GF_LOG_DEBUG, "setting transport-type failed");
}
-
- gf_log ("rpc-transport", GF_LOG_DEBUG,
- "attempt to load file %s", name);
-
- handle = dlopen (name, RTLD_NOW|RTLD_GLOBAL);
- if (handle == NULL) {
- gf_log ("rpc-transport", GF_LOG_ERROR, "%s", dlerror ());
- gf_log ("rpc-transport", GF_LOG_ERROR,
- "volume '%s': transport-type '%s' is not valid or "
- "not found on this machine",
- trans_name, type);
- goto fail;
- }
-
- trans->dl_handle = handle;
-
- trans->ops = dlsym (handle, "tops");
- if (trans->ops == NULL) {
- gf_log ("rpc-transport", GF_LOG_ERROR,
- "dlsym (rpc_transport_ops) on %s", dlerror ());
- goto fail;
- }
-
- trans->init = dlsym (handle, "init");
- if (trans->init == NULL) {
- gf_log ("rpc-transport", GF_LOG_ERROR,
- "dlsym (gf_rpc_transport_init) on %s", dlerror ());
- goto fail;
- }
-
- trans->fini = dlsym (handle, "fini");
- if (trans->fini == NULL) {
- gf_log ("rpc-transport", GF_LOG_ERROR,
- "dlsym (gf_rpc_transport_fini) on %s", dlerror ());
- goto fail;
- }
-
- trans->reconfigure = dlsym (handle, "reconfigure");
- if (trans->fini == NULL) {
- gf_log ("rpc-transport", GF_LOG_DEBUG,
- "dlsym (gf_rpc_transport_reconfigure) on %s", dlerror());
+ }
+
+ /* client-bind-insecure is for clients protocol, and
+ * bind-insecure for glusterd. Both mutually exclusive
+ */
+ ret = dict_get_str_sizen(options, "client-bind-insecure", &type);
+ if (ret)
+ ret = dict_get_str_sizen(options, "bind-insecure", &type);
+ if (ret == 0) {
+ ret = gf_string2boolean(type, &bind_insecure);
+ if (ret < 0) {
+ gf_log("rcp-transport", GF_LOG_WARNING,
+ "bind-insecure option %s is not a"
+ " valid bool option",
+ type);
+ goto fail;
}
-
- vol_opt = GF_CALLOC (1, sizeof (volume_opt_list_t),
- gf_common_mt_volume_opt_list_t);
- if (!vol_opt) {
- goto fail;
+ if (_gf_true == bind_insecure)
+ trans->bind_insecure = 1;
+ else
+ trans->bind_insecure = 0;
+ } else {
+ /* By default allow bind insecure */
+ trans->bind_insecure = 1;
+ }
+
+ ret = dict_get_str_sizen(options, "transport-type", &type);
+ if (ret < 0) {
+ gf_log("rpc-transport", GF_LOG_ERROR,
+ "'option transport-type <xx>' missing in volume '%s'",
+ trans_name);
+ goto fail;
+ }
+
+ ret = gf_asprintf(&name, "%s/%s.so", RPC_TRANSPORTDIR, type);
+ if (-1 == ret) {
+ goto fail;
+ }
+
+ if (dict_get_sizen(options, "notify-poller-death")) {
+ trans->notify_poller_death = 1;
+ }
+
+ gf_log("rpc-transport", GF_LOG_DEBUG, "attempt to load file %s", name);
+
+ handle = dlopen(name, RTLD_NOW);
+ if (handle == NULL) {
+ gf_log("rpc-transport", GF_LOG_ERROR, "%s", dlerror());
+ gf_log("rpc-transport", GF_LOG_WARNING,
+ "volume '%s': transport-type '%s' is not valid or "
+ "not found on this machine",
+ trans_name, type);
+ goto fail;
+ }
+
+ trans->dl_handle = handle;
+
+ trans->ops = dlsym(handle, "tops");
+ if (trans->ops == NULL) {
+ gf_log("rpc-transport", GF_LOG_ERROR, "dlsym (rpc_transport_ops) on %s",
+ dlerror());
+ goto fail;
+ }
+
+ *VOID(&(trans->init)) = dlsym(handle, "init");
+ if (trans->init == NULL) {
+ gf_log("rpc-transport", GF_LOG_ERROR,
+ "dlsym (gf_rpc_transport_init) on %s", dlerror());
+ goto fail;
+ }
+
+ *VOID(&(trans->fini)) = dlsym(handle, "fini");
+ if (trans->fini == NULL) {
+ gf_log("rpc-transport", GF_LOG_ERROR,
+ "dlsym (gf_rpc_transport_fini) on %s", dlerror());
+ goto fail;
+ }
+
+ *VOID(&(trans->reconfigure)) = dlsym(handle, "reconfigure");
+ if (trans->reconfigure == NULL) {
+ gf_log("rpc-transport", GF_LOG_DEBUG,
+ "dlsym (gf_rpc_transport_reconfigure) on %s", dlerror());
+ }
+
+ vol_opt = GF_CALLOC(1, sizeof(volume_opt_list_t),
+ gf_common_mt_volume_opt_list_t);
+ if (!vol_opt) {
+ goto fail;
+ }
+
+ this = THIS;
+ vol_opt->given_opt = dlsym(handle, "options");
+ if (vol_opt->given_opt == NULL) {
+ gf_log("rpc-transport", GF_LOG_DEBUG,
+ "volume option validation not specified");
+ } else {
+ INIT_LIST_HEAD(&vol_opt->list);
+ list_add_tail(&vol_opt->list, &(this->volume_options));
+ if (xlator_options_validate_list(this, options, vol_opt, NULL)) {
+ gf_log("rpc-transport", GF_LOG_ERROR,
+ "volume option validation failed");
+ goto fail;
}
+ }
- vol_opt->given_opt = dlsym (handle, "options");
- if (vol_opt->given_opt == NULL) {
- gf_log ("rpc-transport", GF_LOG_DEBUG,
- "volume option validation not specified");
- } else {
- INIT_LIST_HEAD (&vol_opt->list);
- list_add_tail (&vol_opt->list, &(THIS->volume_options));
- if (xlator_options_validate_list (THIS, options, vol_opt,
- NULL)) {
- gf_log ("rpc-transport", GF_LOG_ERROR,
- "volume option validation failed");
- goto fail;
- }
- vol_opt = NULL;
- }
-
- trans->options = options;
+ trans->options = dict_ref(options);
- pthread_mutex_init (&trans->lock, NULL);
- trans->xl = THIS;
+ pthread_mutex_init(&trans->lock, NULL);
+ trans->xl = this;
- ret = trans->init (trans);
- if (ret != 0) {
- gf_log ("rpc-transport", GF_LOG_ERROR,
- "'%s' initialization failed", type);
- goto fail;
- }
+ ret = trans->init(trans);
+ if (ret != 0) {
+ gf_log("rpc-transport", GF_LOG_WARNING, "'%s' initialization failed",
+ type);
+ goto fail;
+ }
- return_trans = trans;
+ INIT_LIST_HEAD(&trans->list);
+ GF_ATOMIC_INIT(trans->disconnect_progress, 0);
- if (name)
- GF_FREE (name);
+ return_trans = trans;
- if (vol_opt)
- GF_FREE (vol_opt);
+ GF_FREE(name);
- return return_trans;
+ success = _gf_true;
fail:
- if (trans) {
- if (trans->name) {
- GF_FREE (trans->name);
- }
+ if (!success) {
+ rpc_transport_cleanup(trans);
+ GF_FREE(name);
- if (trans->dl_handle)
- dlclose (trans->dl_handle);
+ return_trans = NULL;
+ }
- GF_FREE (trans);
+ if (vol_opt) {
+ if (!list_empty(&vol_opt->list)) {
+ list_del_init(&vol_opt->list);
}
+ GF_FREE(vol_opt);
+ }
- if (name)
- GF_FREE (name);
-
- if (vol_opt)
- GF_FREE (vol_opt);
-
- return NULL;
+ return return_trans;
}
-
int32_t
-rpc_transport_submit_request (rpc_transport_t *this, rpc_transport_req_t *req)
+rpc_transport_submit_request(rpc_transport_t *this, rpc_transport_req_t *req)
{
- int32_t ret = -1;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
- GF_VALIDATE_OR_GOTO("rpc_transport", this->ops, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this->ops, fail);
- ret = this->ops->submit_request (this, req);
+ ret = this->ops->submit_request(this, req);
fail:
- return ret;
+ return ret;
}
-
int32_t
-rpc_transport_submit_reply (rpc_transport_t *this, rpc_transport_reply_t *reply)
+rpc_transport_submit_reply(rpc_transport_t *this, rpc_transport_reply_t *reply)
{
- int32_t ret = -1;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
- GF_VALIDATE_OR_GOTO("rpc_transport", this->ops, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this->ops, fail);
- ret = this->ops->submit_reply (this, reply);
+ ret = this->ops->submit_reply(this, reply);
fail:
- return ret;
+ return ret;
}
-
int32_t
-rpc_transport_connect (rpc_transport_t *this, int port)
+rpc_transport_connect(rpc_transport_t *this, int port)
{
- int ret = -1;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
- ret = this->ops->connect (this, port);
+ ret = this->ops->connect(this, port);
fail:
- return ret;
+ return ret;
}
-
int32_t
-rpc_transport_listen (rpc_transport_t *this)
+rpc_transport_listen(rpc_transport_t *this)
{
- int ret = -1;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
- ret = this->ops->listen (this);
+ ret = this->ops->listen(this);
fail:
- return ret;
+ return ret;
}
-
int32_t
-rpc_transport_disconnect (rpc_transport_t *this)
+rpc_transport_disconnect(rpc_transport_t *this, gf_boolean_t wait)
{
- int32_t ret = -1;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
- GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ ret = this->ops->disconnect(this, wait);
- ret = this->ops->disconnect (this);
fail:
- return ret;
+ return ret;
}
-
-int32_t
-rpc_transport_destroy (rpc_transport_t *this)
+static void
+rpc_transport_destroy(rpc_transport_t *this)
{
- int32_t ret = -1;
+ struct dnscache6 *cache = NULL;
- GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ if (this->clnt_options)
+ dict_unref(this->clnt_options);
+ if (this->options)
+ dict_unref(this->options);
+ if (this->fini)
+ this->fini(this);
- if (this->options)
- dict_unref (this->options);
- if (this->fini)
- this->fini (this);
+ pthread_mutex_destroy(&this->lock);
- pthread_mutex_destroy (&this->lock);
+ GF_FREE(this->name);
- if (this->name)
- GF_FREE (this->name);
+ if (this->dl_handle)
+ dlclose(this->dl_handle);
- if (this->dl_handle)
- dlclose (this->dl_handle);
+ if (this->ssl_name) {
+ GF_FREE(this->ssl_name);
+ }
- GF_FREE (this);
-fail:
- return ret;
-}
+ if (this->dnscache) {
+ cache = this->dnscache;
+ if (cache->first)
+ freeaddrinfo(cache->first);
+ GF_FREE(this->dnscache);
+ }
+ GF_FREE(this);
+}
rpc_transport_t *
-rpc_transport_ref (rpc_transport_t *this)
+rpc_transport_ref(rpc_transport_t *this)
{
- rpc_transport_t *return_this = NULL;
+ rpc_transport_t *return_this = NULL;
- GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
- pthread_mutex_lock (&this->lock);
- {
- this->refcount ++;
- }
- pthread_mutex_unlock (&this->lock);
+ GF_ATOMIC_INC(this->refcount);
- return_this = this;
+ return_this = this;
fail:
- return return_this;
+ return return_this;
}
-
int32_t
-rpc_transport_unref (rpc_transport_t *this)
+rpc_transport_unref(rpc_transport_t *this)
{
- int32_t refcount = 0;
- int32_t ret = -1;
+ int32_t refcount = 0;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
+ GF_VALIDATE_OR_GOTO("rpc_transport", this, fail);
- pthread_mutex_lock (&this->lock);
- {
- refcount = --this->refcount;
- }
- pthread_mutex_unlock (&this->lock);
+ refcount = GF_ATOMIC_DEC(this->refcount);
- if (refcount == 0) {
- if (this->mydata)
- this->notify (this, this->mydata, RPC_TRANSPORT_CLEANUP,
- NULL);
- rpc_transport_destroy (this);
- }
+ if (refcount == 0) {
+ if (this->mydata)
+ this->notify(this, this->mydata, RPC_TRANSPORT_CLEANUP, NULL);
+ this->mydata = NULL;
+ this->notify = NULL;
+ rpc_transport_destroy(this);
+ }
- ret = 0;
+ ret = 0;
fail:
- return ret;
+ return ret;
}
-
int32_t
-rpc_transport_notify (rpc_transport_t *this, rpc_transport_event_t event,
- void *data, ...)
+rpc_transport_notify(rpc_transport_t *this, rpc_transport_event_t event,
+ void *data, ...)
{
- int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", this, out);
+ int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO("rpc", this, out);
- if (this->notify != NULL) {
- ret = this->notify (this, this->mydata, event, data);
- } else {
- ret = 0;
- }
+ if (this->notify != NULL) {
+ ret = this->notify(this, this->mydata, event, data);
+ } else {
+ ret = 0;
+ }
out:
- return ret;
+ return ret;
}
-
-
-inline int
-rpc_transport_register_notify (rpc_transport_t *trans,
- rpc_transport_notify_t notify, void *mydata)
+int
+rpc_transport_register_notify(rpc_transport_t *trans,
+ rpc_transport_notify_t notify, void *mydata)
{
- int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", trans, out);
+ int32_t ret = -1;
+ GF_VALIDATE_OR_GOTO("rpc", trans, out);
- trans->notify = notify;
- trans->mydata = mydata;
+ trans->notify = notify;
+ trans->mydata = mydata;
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
-inline int
-rpc_transport_unregister_notify (rpc_transport_t *trans)
+// give negative values to skip setting that value
+// this function asserts if both the values are negative.
+// why call it if you don't set it.
+int
+rpc_transport_keepalive_options_set(dict_t *options, int32_t interval,
+ int32_t time, int32_t timeout)
{
- GF_VALIDATE_OR_GOTO ("rpc-transport", trans, out);
+ int ret = -1;
+
+ GF_ASSERT(options);
+ GF_ASSERT((interval > 0) || (time > 0));
- trans->notify = NULL;
- trans->mydata = NULL;
+ ret = dict_set_int32_sizen(options, "transport.socket.keepalive-interval",
+ interval);
+ if (ret)
+ goto out;
+ ret = dict_set_int32_sizen(options, "transport.socket.keepalive-time",
+ time);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32_sizen(options, "transport.tcp-user-timeout", timeout);
+ if (ret)
+ goto out;
out:
- return 0;
+ return ret;
}
-
-//give negative values to skip setting that value
-//this function asserts if both the values are negative.
-//why call it if you dont set it.
int
-rpc_transport_keepalive_options_set (dict_t *options, int32_t interval,
- int32_t time)
+rpc_transport_unix_options_build(dict_t *dict, char *filepath,
+ int frame_timeout)
{
- int ret = -1;
-
- GF_ASSERT (options);
- GF_ASSERT ((interval > 0) || (time > 0));
-
- ret = dict_set_int32 (options,
- "transport.socket.keepalive-interval", interval);
+ char *fpath = NULL;
+ int ret = -1;
+
+ GF_ASSERT(filepath);
+ GF_VALIDATE_OR_GOTO("rpc-transport", dict, out);
+
+ fpath = gf_strdup(filepath);
+ if (!fpath) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_sizen(dict, "transport.socket.connect-path", fpath);
+ if (ret) {
+ GF_FREE(fpath);
+ goto out;
+ }
+
+ ret = dict_set_str_sizen(dict, "transport.address-family", "unix");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str_sizen(dict, "transport.socket.nodelay", "off");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str_sizen(dict, "transport-type", "socket");
+ if (ret)
+ goto out;
+
+ ret = dict_set_str_sizen(dict, "transport.socket.keepalive", "off");
+ if (ret)
+ goto out;
+
+ if (frame_timeout > 0) {
+ ret = dict_set_int32_sizen(dict, "frame-timeout", frame_timeout);
if (ret)
- goto out;
-
- ret = dict_set_int32 (options,
- "transport.socket.keepalive-time", time);
- if (ret)
- goto out;
+ goto out;
+ }
out:
- return ret;
+ return ret;
}
int
-rpc_transport_inet_options_build (dict_t **options, const char *hostname,
- int port)
+rpc_transport_inet_options_build(dict_t *dict, const char *hostname, int port,
+ char *af)
{
- dict_t *dict = NULL;
- char *host = NULL;
- int ret = -1;
-
- GF_ASSERT (options);
- GF_ASSERT (hostname);
- GF_ASSERT (port >= 1024);
-
- dict = dict_new ();
- if (!dict)
- goto out;
-
- host = gf_strdup ((char*)hostname);
- if (!hostname) {
- ret = -1;
- goto out;
- }
-
- ret = dict_set_dynstr (dict, "remote-host", host);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set remote-host with %s", host);
- goto out;
- }
-
- ret = dict_set_int32 (dict, "remote-port", port);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set remote-port with %d", port);
- goto out;
- }
- ret = dict_set_str (dict, "transport.address-family", "inet");
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set addr-family with inet");
- goto out;
- }
-
- ret = dict_set_str (dict, "transport-type", "socket");
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set trans-type with socket");
- goto out;
- }
+ char *host = NULL;
+ int ret = -1;
+#ifdef IPV6_DEFAULT
+ static char *addr_family = "inet6";
+#else
+ static char *addr_family = "inet";
+#endif
- *options = dict;
+ GF_ASSERT(hostname);
+ GF_ASSERT(port >= 1024);
+ GF_VALIDATE_OR_GOTO("rpc-transport", dict, out);
+
+ host = gf_strdup((char *)hostname);
+ if (!host) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_sizen(dict, "remote-host", host);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set remote-host with %s",
+ host);
+ GF_FREE(host);
+ goto out;
+ }
+
+ ret = dict_set_int32_sizen(dict, "remote-port", port);
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set remote-port with %d",
+ port);
+ goto out;
+ }
+
+ ret = dict_set_str_sizen(dict, "address-family",
+ (af != NULL ? af : addr_family));
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set address-family to %s",
+ addr_family);
+ goto out;
+ }
+
+ ret = dict_set_str_sizen(dict, "transport-type", "socket");
+ if (ret) {
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "failed to set trans-type with socket");
+ goto out;
+ }
out:
- if (ret) {
- if (host)
- GF_FREE (host);
- if (dict)
- dict_unref (dict);
- }
-
- return ret;
+ return ret;
}
diff --git a/rpc/rpc-lib/src/rpc-transport.h b/rpc/rpc-lib/src/rpc-transport.h
index 91d802220e2..c499f0bb955 100644
--- a/rpc/rpc-lib/src/rpc-transport.h
+++ b/rpc/rpc-lib/src/rpc-transport.h
@@ -11,12 +11,6 @@
#ifndef __RPC_TRANSPORT_H__
#define __RPC_TRANSPORT_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
#include <inttypes.h>
#ifdef GF_SOLARIS_HOST_OS
#include <rpc/auth.h>
@@ -26,7 +20,6 @@
#include <rpc/rpc_msg.h>
-
#ifndef MAX_IOVEC
#define MAX_IOVEC 16
#endif
@@ -48,39 +41,45 @@
*/
#define RPC_FRAGSIZE(fraghdr) ((uint32_t)(fraghdr & 0x7fffffffU))
-#define RPC_FRAGHDR_SIZE 4
-#define RPC_MSGTYPE_SIZE 8
+#define RPC_FRAGHDR_SIZE 4
+#define RPC_MSGTYPE_SIZE 8
/* size of the msg from the start of call-body till and including credlen */
-#define RPC_CALL_BODY_SIZE 24
+#define RPC_CALL_BODY_SIZE 24
-#define RPC_REPLY_STATUS_SIZE 4
+#define RPC_REPLY_STATUS_SIZE 4
#define RPC_AUTH_FLAVOUR_N_LENGTH_SIZE 8
-#define RPC_ACCEPT_STATUS_LEN 4
+#define RPC_ACCEPT_STATUS_LEN 4
struct rpc_transport_ops;
typedef struct rpc_transport rpc_transport_t;
-#include "dict.h"
-#include "compat.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/async.h>
#include "rpcsvc-common.h"
struct peer_info {
- struct sockaddr_storage sockaddr;
- socklen_t sockaddr_len;
- char identifier[UNIX_PATH_MAX];
+ // OP-VERSION of clients
+ uint32_t max_op_version;
+ uint32_t min_op_version;
+ struct sockaddr_storage sockaddr;
+ socklen_t sockaddr_len;
+ char identifier[UNIX_PATH_MAX];
+ // Volume mounted by client
+ char volname[NAME_MAX];
};
typedef struct peer_info peer_info_t;
typedef enum msg_type msg_type_t;
typedef enum {
- RPC_TRANSPORT_ACCEPT, /* New client has been accepted */
- RPC_TRANSPORT_DISCONNECT, /* Connection is disconnected */
- RPC_TRANSPORT_CLEANUP, /* connection is about to be freed */
- /*RPC_TRANSPORT_READ,*/ /* An event used to enable rpcsvc to instruct
+ RPC_TRANSPORT_ACCEPT, /* New client has been accepted */
+ RPC_TRANSPORT_DISCONNECT, /* Connection is disconnected */
+ RPC_TRANSPORT_CLEANUP, /* connection is about to be freed */
+ /*RPC_TRANSPORT_READ,*/ /* An event used to enable rpcsvc to instruct
* transport the number of bytes to read.
* This helps in reading large msgs, wherein
* the rpc actors might decide to place the
@@ -92,54 +91,55 @@ typedef enum {
* reading a single msg, this event may be
* delivered more than once.
*/
- RPC_TRANSPORT_MAP_XID_REQUEST, /* receiver of this event should send
- * the prognum and procnum corresponding
- * to xid.
- */
- RPC_TRANSPORT_MSG_RECEIVED, /* Complete rpc msg has been read */
- RPC_TRANSPORT_CONNECT, /* client is connected to server */
- RPC_TRANSPORT_MSG_SENT,
+ RPC_TRANSPORT_MAP_XID_REQUEST, /* receiver of this event should send
+ * the prognum and procnum corresponding
+ * to xid.
+ */
+ RPC_TRANSPORT_MSG_RECEIVED, /* Complete rpc msg has been read */
+ RPC_TRANSPORT_CONNECT, /* client is connected to server */
+ RPC_TRANSPORT_MSG_SENT,
+ RPC_TRANSPORT_EVENT_THREAD_DIED /* event-thread has died */
} rpc_transport_event_t;
struct rpc_transport_msg {
- struct iovec *rpchdr;
- int rpchdrcount;
- struct iovec *proghdr;
- int proghdrcount;
- struct iovec *progpayload;
- int progpayloadcount;
- struct iobref *iobref;
+ struct iovec *rpchdr;
+ struct iovec *proghdr;
+ int rpchdrcount;
+ int proghdrcount;
+ struct iovec *progpayload;
+ struct iobref *iobref;
+ int progpayloadcount;
};
typedef struct rpc_transport_msg rpc_transport_msg_t;
struct rpc_transport_rsp {
- struct iovec *rsphdr;
- int rsphdr_count;
- struct iovec *rsp_payload;
- int rsp_payload_count;
- struct iobref *rsp_iobref;
+ struct iovec *rsphdr;
+ struct iovec *rsp_payload;
+ int rsphdr_count;
+ int rsp_payload_count;
+ struct iobref *rsp_iobref;
};
typedef struct rpc_transport_rsp rpc_transport_rsp_t;
struct rpc_transport_req {
- rpc_transport_msg_t msg;
- rpc_transport_rsp_t rsp;
- struct rpc_req *rpc_req;
+ struct rpc_req *rpc_req;
+ rpc_transport_msg_t msg;
+ rpc_transport_rsp_t rsp;
};
typedef struct rpc_transport_req rpc_transport_req_t;
struct rpc_transport_reply {
- rpc_transport_msg_t msg;
- void *private;
+ void *private;
+ rpc_transport_msg_t msg;
};
typedef struct rpc_transport_reply rpc_transport_reply_t;
struct rpc_transport_data {
- char is_request;
- union {
- rpc_transport_req_t req;
- rpc_transport_reply_t reply;
- } data;
+ union {
+ rpc_transport_req_t req;
+ rpc_transport_reply_t reply;
+ } data;
+ char is_request;
};
typedef struct rpc_transport_data rpc_transport_data_t;
@@ -147,155 +147,166 @@ typedef struct rpc_transport_data rpc_transport_data_t;
* rpc_request, hence these should be removed from request_info
*/
struct rpc_request_info {
- uint32_t xid;
- int prognum;
- int progver;
- int procnum;
- void *rpc_req; /* struct rpc_req */
- rpc_transport_rsp_t rsp;
+ int prognum;
+ int progver;
+ void *rpc_req; /* struct rpc_req */
+ rpc_transport_rsp_t rsp;
+ int procnum;
+ uint32_t xid;
};
typedef struct rpc_request_info rpc_request_info_t;
+typedef int (*rpc_transport_notify_t)(rpc_transport_t *, void *mydata,
+ rpc_transport_event_t, void *data, ...);
-struct rpc_transport_pollin {
- struct iovec vector[2];
- int count;
- char vectored;
- void *private;
- struct iobref *iobref;
- struct iobuf *hdr_iobuf;
- char is_reply;
+struct rpc_transport {
+ struct rpc_transport_ops *ops;
+ rpc_transport_t *listener; /* listener transport to which
+ * request for creation of this
+ * transport came from. valid only
+ * on server process.
+ */
+
+ void *private;
+ struct _client *xl_private;
+ void *xl; /* Used for THIS */
+ void *mydata;
+ pthread_mutex_t lock;
+ gf_atomic_t refcount;
+ glusterfs_ctx_t *ctx;
+ dict_t *options;
+ char *name;
+ void *dnscache;
+ void *drc_client;
+ data_t *buf;
+ int32_t (*init)(rpc_transport_t *this);
+ void (*fini)(rpc_transport_t *this);
+ int (*reconfigure)(rpc_transport_t *this, dict_t *options);
+ rpc_transport_notify_t notify;
+ void *notify_data;
+ peer_info_t peerinfo;
+ peer_info_t myinfo;
+
+ uint64_t total_bytes_read;
+ uint64_t total_bytes_write;
+ uint32_t xid; /* RPC/XID used for callbacks */
+ int32_t outstanding_rpc_count;
+
+ struct list_head list;
+ void *dl_handle; /* handle of dlopen() */
+ char *ssl_name;
+ dict_t *clnt_options; /* store options received from
+ * client */
+ gf_atomic_t disconnect_progress;
+ int bind_insecure;
+ /* connect_failed: saves the connect() syscall status as socket_t
+ * member holding connect() status can't be accessed by higher gfapi
+ * layer or in client management notification handler functions
+ */
+ gf_boolean_t connect_failed;
+ char notify_poller_death;
+ char poller_death_accept;
};
-typedef struct rpc_transport_pollin rpc_transport_pollin_t;
-
-typedef int (*rpc_transport_notify_t) (rpc_transport_t *, void *mydata,
- rpc_transport_event_t, void *data, ...);
-
-struct rpc_transport {
- struct rpc_transport_ops *ops;
- rpc_transport_t *listener; /* listener transport to which
- * request for creation of this
- * transport came from. valid only
- * on server process.
- */
-
- void *private;
- void *xl_private;
- void *xl; /* Used for THIS */
- void *mydata;
- pthread_mutex_t lock;
- int32_t refcount;
-
- glusterfs_ctx_t *ctx;
- dict_t *options;
- char *name;
- void *dnscache;
- data_t *buf;
- int32_t (*init) (rpc_transport_t *this);
- void (*fini) (rpc_transport_t *this);
- int (*reconfigure) (rpc_transport_t *this, dict_t *options);
- rpc_transport_notify_t notify;
- void *notify_data;
- peer_info_t peerinfo;
- peer_info_t myinfo;
-
- uint64_t total_bytes_read;
- uint64_t total_bytes_write;
-
- struct list_head list;
- int bind_insecure;
- void *dl_handle; /* handle of dlopen() */
+struct rpc_transport_pollin {
+ struct rpc_transport *trans;
+ void *private;
+ struct iobref *iobref;
+ struct iovec vector[MAX_IOVEC];
+ gf_async_t async;
+ int count;
+ char is_reply;
+ char vectored;
};
+typedef struct rpc_transport_pollin rpc_transport_pollin_t;
struct rpc_transport_ops {
- /* no need of receive op, msg will be delivered through an event
- * notification
- */
- int32_t (*submit_request) (rpc_transport_t *this,
- rpc_transport_req_t *req);
- int32_t (*submit_reply) (rpc_transport_t *this,
- rpc_transport_reply_t *reply);
- int32_t (*connect) (rpc_transport_t *this, int port);
- int32_t (*listen) (rpc_transport_t *this);
- int32_t (*disconnect) (rpc_transport_t *this);
- int32_t (*get_peername) (rpc_transport_t *this, char *hostname,
- int hostlen);
- int32_t (*get_peeraddr) (rpc_transport_t *this, char *peeraddr,
- int addrlen, struct sockaddr_storage *sa,
- socklen_t sasize);
- int32_t (*get_myname) (rpc_transport_t *this, char *hostname,
- int hostlen);
- int32_t (*get_myaddr) (rpc_transport_t *this, char *peeraddr,
- int addrlen, struct sockaddr_storage *sa,
- socklen_t sasize);
+ /* no need of receive op, msg will be delivered through an event
+ * notification
+ */
+ int32_t (*submit_request)(rpc_transport_t *this, rpc_transport_req_t *req);
+ int32_t (*submit_reply)(rpc_transport_t *this,
+ rpc_transport_reply_t *reply);
+ int32_t (*connect)(rpc_transport_t *this, int port);
+ int32_t (*listen)(rpc_transport_t *this);
+ int32_t (*disconnect)(rpc_transport_t *this, gf_boolean_t wait);
+ int32_t (*get_peername)(rpc_transport_t *this, char *hostname, int hostlen);
+ int32_t (*get_peeraddr)(rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, socklen_t sasize);
+ int32_t (*get_myname)(rpc_transport_t *this, char *hostname, int hostlen);
+ int32_t (*get_myaddr)(rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, socklen_t sasize);
+ int32_t (*throttle)(rpc_transport_t *this, gf_boolean_t onoff);
};
-
int32_t
-rpc_transport_listen (rpc_transport_t *this);
+rpc_transport_count(const char *transport_type);
int32_t
-rpc_transport_connect (rpc_transport_t *this, int port);
+rpc_transport_listen(rpc_transport_t *this);
int32_t
-rpc_transport_disconnect (rpc_transport_t *this);
+rpc_transport_connect(rpc_transport_t *this, int port);
int32_t
-rpc_transport_destroy (rpc_transport_t *this);
+rpc_transport_disconnect(rpc_transport_t *this, gf_boolean_t wait);
int32_t
-rpc_transport_notify (rpc_transport_t *this, rpc_transport_event_t event,
- void *data, ...);
+rpc_transport_notify(rpc_transport_t *this, rpc_transport_event_t event,
+ void *data, ...);
int32_t
-rpc_transport_submit_request (rpc_transport_t *this, rpc_transport_req_t *req);
+rpc_transport_submit_request(rpc_transport_t *this, rpc_transport_req_t *req);
int32_t
-rpc_transport_submit_reply (rpc_transport_t *this,
- rpc_transport_reply_t *reply);
+rpc_transport_submit_reply(rpc_transport_t *this, rpc_transport_reply_t *reply);
rpc_transport_t *
-rpc_transport_load (glusterfs_ctx_t *ctx, dict_t *options, char *name);
+rpc_transport_load(glusterfs_ctx_t *ctx, dict_t *options, char *name);
rpc_transport_t *
-rpc_transport_ref (rpc_transport_t *trans);
+rpc_transport_ref(rpc_transport_t *trans);
int32_t
-rpc_transport_unref (rpc_transport_t *trans);
-
-int
-rpc_transport_register_notify (rpc_transport_t *trans, rpc_transport_notify_t,
- void *mydata);
+rpc_transport_unref(rpc_transport_t *trans);
int
-rpc_transport_unregister_notify (rpc_transport_t *trans);
+rpc_transport_register_notify(rpc_transport_t *trans, rpc_transport_notify_t,
+ void *mydata);
int32_t
-rpc_transport_get_peername (rpc_transport_t *this, char *hostname, int hostlen);
+rpc_transport_get_peername(rpc_transport_t *this, char *hostname, int hostlen);
int32_t
-rpc_transport_get_peeraddr (rpc_transport_t *this, char *peeraddr, int addrlen,
- struct sockaddr_storage *sa, size_t salen);
+rpc_transport_get_peeraddr(rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, size_t salen);
int32_t
-rpc_transport_get_myname (rpc_transport_t *this, char *hostname, int hostlen);
+rpc_transport_get_myaddr(rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, size_t salen);
-int32_t
-rpc_transport_get_myaddr (rpc_transport_t *this, char *peeraddr, int addrlen,
- struct sockaddr_storage *sa, size_t salen);
+int
+rpc_transport_throttle(rpc_transport_t *this, gf_boolean_t onoff);
rpc_transport_pollin_t *
-rpc_transport_pollin_alloc (rpc_transport_t *this, struct iovec *vector,
- int count, struct iobuf *hdr_iobuf,
- struct iobref *iobref, void *private);
+rpc_transport_pollin_alloc(rpc_transport_t *this, struct iovec *vector,
+ int count, struct iobuf *hdr_iobuf,
+ struct iobref *iobref, void *private);
void
-rpc_transport_pollin_destroy (rpc_transport_pollin_t *pollin);
+rpc_transport_pollin_destroy(rpc_transport_pollin_t *pollin);
+
+int
+rpc_transport_keepalive_options_set(dict_t *options, int32_t interval,
+ int32_t time, int32_t timeout);
int
-rpc_transport_keepalive_options_set (dict_t *options, int32_t interval,
- int32_t time);
+rpc_transport_unix_options_build(dict_t *options, char *filepath,
+ int frame_timeout);
int
-rpc_transport_inet_options_build (dict_t **options, const char *hostname, int port);
+rpc_transport_inet_options_build(dict_t *options, const char *hostname,
+ int port, char *af);
+
+void
+rpc_transport_cleanup(rpc_transport_t *);
#endif /* __RPC_TRANSPORT_H__ */
diff --git a/rpc/rpc-lib/src/rpcsvc-auth.c b/rpc/rpc-lib/src/rpcsvc-auth.c
index 3a46cc498e5..8e76b4188bb 100644
--- a/rpc/rpc-lib/src/rpcsvc-auth.c
+++ b/rpc/rpc-lib/src/rpcsvc-auth.c
@@ -9,447 +9,553 @@
*/
#include "rpcsvc.h"
-#include "logging.h"
-#include "dict.h"
+#include <glusterfs/dict.h>
extern rpcsvc_auth_t *
-rpcsvc_auth_null_init (rpcsvc_t *svc, dict_t *options);
+rpcsvc_auth_null_init(rpcsvc_t *svc, dict_t *options);
extern rpcsvc_auth_t *
-rpcsvc_auth_unix_init (rpcsvc_t *svc, dict_t *options);
+rpcsvc_auth_unix_init(rpcsvc_t *svc, dict_t *options);
extern rpcsvc_auth_t *
-rpcsvc_auth_glusterfs_init (rpcsvc_t *svc, dict_t *options);
+rpcsvc_auth_glusterfs_init(rpcsvc_t *svc, dict_t *options);
extern rpcsvc_auth_t *
-rpcsvc_auth_glusterfs_v2_init (rpcsvc_t *svc, dict_t *options);
+rpcsvc_auth_glusterfs_v2_init(rpcsvc_t *svc, dict_t *options);
+extern rpcsvc_auth_t *
+rpcsvc_auth_glusterfs_v3_init(rpcsvc_t *svc, dict_t *options);
int
-rpcsvc_auth_add_initer (struct list_head *list, char *idfier,
- rpcsvc_auth_initer_t init)
+rpcsvc_auth_add_initer(struct list_head *list, char *idfier,
+ rpcsvc_auth_initer_t init)
{
- struct rpcsvc_auth_list *new = NULL;
+ struct rpcsvc_auth_list *new = NULL;
- if ((!list) || (!init) || (!idfier))
- return -1;
+ if ((!list) || (!init) || (!idfier))
+ return -1;
- new = GF_CALLOC (1, sizeof (*new), gf_common_mt_rpcsvc_auth_list);
- if (!new) {
- return -1;
- }
+ new = GF_CALLOC(1, sizeof(*new), gf_common_mt_rpcsvc_auth_list);
+ if (!new) {
+ return -1;
+ }
- new->init = init;
- strcpy (new->name, idfier);
- INIT_LIST_HEAD (&new->authlist);
- list_add_tail (&new->authlist, list);
- return 0;
+ new->init = init;
+ strncpy(new->name, idfier, sizeof(new->name) - 1);
+ INIT_LIST_HEAD(&new->authlist);
+ list_add_tail(&new->authlist, list);
+ return 0;
}
-
-
int
-rpcsvc_auth_add_initers (rpcsvc_t *svc)
+rpcsvc_auth_add_initers(rpcsvc_t *svc)
{
- int ret = -1;
-
- ret = rpcsvc_auth_add_initer (&svc->authschemes, "auth-glusterfs",
- (rpcsvc_auth_initer_t)
- rpcsvc_auth_glusterfs_init);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_GLUSTERFS");
- goto err;
- }
-
-
- ret = rpcsvc_auth_add_initer (&svc->authschemes, "auth-glusterfs-v2",
- (rpcsvc_auth_initer_t)
- rpcsvc_auth_glusterfs_v2_init);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR,
- "Failed to add AUTH_GLUSTERFS-v2");
- goto err;
- }
-
- ret = rpcsvc_auth_add_initer (&svc->authschemes, "auth-unix",
- (rpcsvc_auth_initer_t)
- rpcsvc_auth_unix_init);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_UNIX");
- goto err;
- }
-
- ret = rpcsvc_auth_add_initer (&svc->authschemes, "auth-null",
- (rpcsvc_auth_initer_t)
- rpcsvc_auth_null_init);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_NULL");
- goto err;
- }
-
- ret = 0;
+ int ret = -1;
+
+ ret = rpcsvc_auth_add_initer(
+ &svc->authschemes, "auth-glusterfs",
+ (rpcsvc_auth_initer_t)rpcsvc_auth_glusterfs_init);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_GLUSTERFS");
+ goto err;
+ }
+
+ ret = rpcsvc_auth_add_initer(
+ &svc->authschemes, "auth-glusterfs-v2",
+ (rpcsvc_auth_initer_t)rpcsvc_auth_glusterfs_v2_init);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_GLUSTERFS-v2");
+ goto err;
+ }
+
+ ret = rpcsvc_auth_add_initer(
+ &svc->authschemes, "auth-glusterfs-v3",
+ (rpcsvc_auth_initer_t)rpcsvc_auth_glusterfs_v3_init);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_GLUSTERFS-v3");
+ goto err;
+ }
+
+ ret = rpcsvc_auth_add_initer(&svc->authschemes, "auth-unix",
+ (rpcsvc_auth_initer_t)rpcsvc_auth_unix_init);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_UNIX");
+ goto err;
+ }
+
+ ret = rpcsvc_auth_add_initer(&svc->authschemes, "auth-null",
+ (rpcsvc_auth_initer_t)rpcsvc_auth_null_init);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Failed to add AUTH_NULL");
+ goto err;
+ }
+
+ ret = 0;
err:
- return ret;
+ return ret;
}
-
int
-rpcsvc_auth_init_auth (rpcsvc_t *svc, dict_t *options,
- struct rpcsvc_auth_list *authitem)
+rpcsvc_auth_init_auth(rpcsvc_t *svc, dict_t *options,
+ struct rpcsvc_auth_list *authitem)
{
- int ret = -1;
-
- if ((!svc) || (!options) || (!authitem))
- return -1;
-
- if (!authitem->init) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "No init function defined");
- ret = -1;
- goto err;
- }
+ int ret = -1;
+
+ if ((!svc) || (!options) || (!authitem))
+ return -1;
+
+ if (!authitem->init) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "No init function defined");
+ ret = -1;
+ goto err;
+ }
+
+ authitem->auth = authitem->init(svc, options);
+ if (!authitem->auth) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Registration of auth failed:"
+ " %s",
+ authitem->name);
+ ret = -1;
+ goto err;
+ }
+
+ authitem->enable = 1;
+ gf_log(GF_RPCSVC, GF_LOG_TRACE, "Authentication enabled: %s",
+ authitem->auth->authname);
+
+ ret = 0;
+err:
+ return ret;
+}
- authitem->auth = authitem->init (svc, options);
- if (!authitem->auth) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Registration of auth failed:"
- " %s", authitem->name);
- ret = -1;
- goto err;
- }
+int
+rpcsvc_auth_init_auths(rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+ struct rpcsvc_auth_list *auth = NULL;
+ struct rpcsvc_auth_list *tmp = NULL;
- authitem->enable = 1;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Authentication enabled: %s",
- authitem->auth->authname);
+ if (!svc)
+ return -1;
+ if (list_empty(&svc->authschemes)) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING, "No authentication!");
ret = 0;
+ goto err;
+ }
+
+ /* If auth null and sys are not disabled by the user, we must enable
+ * it by default. This is a globally default rule, the user is still
+ * allowed to disable the two for particular subvolumes.
+ */
+ if (!dict_get(options, "rpc-auth.auth-null")) {
+ ret = dict_set_str(options, "rpc-auth.auth-null", "on");
+ if (ret)
+ gf_log("rpc-auth", GF_LOG_DEBUG, "dict_set failed for 'auth-nill'");
+ }
+
+ if (!dict_get(options, "rpc-auth.auth-unix")) {
+ ret = dict_set_str(options, "rpc-auth.auth-unix", "on");
+ if (ret)
+ gf_log("rpc-auth", GF_LOG_DEBUG, "dict_set failed for 'auth-unix'");
+ }
+
+ if (!dict_get(options, "rpc-auth.auth-glusterfs")) {
+ ret = dict_set_str(options, "rpc-auth.auth-glusterfs", "on");
+ if (ret)
+ gf_log("rpc-auth", GF_LOG_DEBUG, "dict_set failed for 'auth-unix'");
+ }
+
+ list_for_each_entry_safe(auth, tmp, &svc->authschemes, authlist)
+ {
+ ret = rpcsvc_auth_init_auth(svc, options, auth);
+ if (ret == -1)
+ goto err;
+ }
+
+ ret = 0;
err:
- return ret;
+ return ret;
}
-
int
-rpcsvc_auth_init_auths (rpcsvc_t *svc, dict_t *options)
+rpcsvc_set_addr_namelookup(rpcsvc_t *svc, dict_t *options)
{
- int ret = -1;
- struct rpcsvc_auth_list *auth = NULL;
- struct rpcsvc_auth_list *tmp = NULL;
-
- if (!svc)
- return -1;
-
- if (list_empty (&svc->authschemes)) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "No authentication!");
- ret = 0;
- goto err;
- }
-
- /* If auth null and sys are not disabled by the user, we must enable
- * it by default. This is a globally default rule, the user is still
- * allowed to disable the two for particular subvolumes.
- */
- if (!dict_get (options, "rpc-auth.auth-null")) {
- ret = dict_set_str (options, "rpc-auth.auth-null", "on");
- if (ret)
- gf_log ("rpc-auth", GF_LOG_DEBUG,
- "dict_set failed for 'auth-nill'");
- }
+ int ret;
+ static char *addrlookup_key = "rpc-auth.addr.namelookup";
- if (!dict_get (options, "rpc-auth.auth-unix")) {
- ret = dict_set_str (options, "rpc-auth.auth-unix", "on");
- if (ret)
- gf_log ("rpc-auth", GF_LOG_DEBUG,
- "dict_set failed for 'auth-unix'");
- }
-
- if (!dict_get (options, "rpc-auth.auth-glusterfs")) {
- ret = dict_set_str (options, "rpc-auth.auth-glusterfs", "on");
- if (ret)
- gf_log ("rpc-auth", GF_LOG_DEBUG,
- "dict_set failed for 'auth-unix'");
- }
+ if (!svc || !options)
+ return (-1);
- list_for_each_entry_safe (auth, tmp, &svc->authschemes, authlist) {
- ret = rpcsvc_auth_init_auth (svc, options, auth);
- if (ret == -1)
- goto err;
- }
+ /* By default it's disabled */
+ ret = dict_get_str_boolean(options, addrlookup_key, _gf_false);
+ if (ret < 0) {
+ svc->addr_namelookup = _gf_false;
+ } else {
+ svc->addr_namelookup = ret;
+ }
- ret = 0;
-err:
- return ret;
+ if (svc->addr_namelookup)
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "Addr-Name lookup enabled");
+ return (0);
}
int
-rpcsvc_set_allow_insecure (rpcsvc_t *svc, dict_t *options)
+rpcsvc_set_allow_insecure(rpcsvc_t *svc, dict_t *options)
{
- int ret = -1;
- char *allow_insecure_str = NULL;
- gf_boolean_t is_allow_insecure = _gf_false;
+ int ret = -1;
+ char *allow_insecure_str = NULL;
+ gf_boolean_t is_allow_insecure = _gf_false;
- GF_ASSERT (svc);
- GF_ASSERT (options);
+ GF_ASSERT(svc);
+ GF_ASSERT(options);
- ret = dict_get_str (options, "rpc-auth-allow-insecure",
- &allow_insecure_str);
+ ret = dict_get_str(options, "rpc-auth-allow-insecure", &allow_insecure_str);
+ if (0 == ret) {
+ ret = gf_string2boolean(allow_insecure_str, &is_allow_insecure);
if (0 == ret) {
- ret = gf_string2boolean (allow_insecure_str,
- &is_allow_insecure);
- if (0 == ret) {
- if (_gf_true == is_allow_insecure)
- svc->allow_insecure = 1;
- else
- svc->allow_insecure = 0;
- }
+ if (_gf_true == is_allow_insecure)
+ svc->allow_insecure = 1;
+ else
+ svc->allow_insecure = 0;
}
+ } else {
+ /* By default set allow-insecure to true */
+ svc->allow_insecure = 1;
+
+ /* setting in options for the sake of functions that look
+ * configuration params for allow insecure, eg: gf_auth
+ */
+ ret = dict_set_str(options, "rpc-auth-allow-insecure", "on");
+ if (ret < 0)
+ gf_log("rpc-auth", GF_LOG_DEBUG,
+ "dict_set failed for 'allow-insecure'");
+ }
- return 0;
+ return ret;
}
int
-rpcsvc_auth_init (rpcsvc_t *svc, dict_t *options)
+rpcsvc_set_root_squash(rpcsvc_t *svc, dict_t *options)
{
- int ret = -1;
-
- if ((!svc) || (!options))
- return -1;
+ int ret = -1;
+ uid_t anonuid = -1;
+ gid_t anongid = -1;
+
+ GF_ASSERT(svc);
+ GF_ASSERT(options);
+
+ ret = dict_get_str_boolean(options, "root-squash", 0);
+ if (ret != -1)
+ svc->root_squash = ret;
+ else
+ svc->root_squash = _gf_false;
+
+ ret = dict_get_uint32(options, "anonuid", &anonuid);
+ if (!ret)
+ svc->anonuid = anonuid;
+ else
+ svc->anonuid = RPC_NOBODY_UID;
+
+ ret = dict_get_uint32(options, "anongid", &anongid);
+ if (!ret)
+ svc->anongid = anongid;
+ else
+ svc->anongid = RPC_NOBODY_GID;
+
+ if (svc->root_squash)
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "root squashing enabled "
+ "(uid=%d, gid=%d)",
+ svc->anonuid, svc->anongid);
+
+ return 0;
+}
- (void) rpcsvc_set_allow_insecure (svc, options);
- ret = rpcsvc_auth_add_initers (svc);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to add initers");
- goto out;
- }
+int
+rpcsvc_set_all_squash(rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+
+ uid_t anonuid = -1;
+ gid_t anongid = -1;
+
+ GF_ASSERT(svc);
+ GF_ASSERT(options);
+
+ ret = dict_get_str_boolean(options, "all-squash", 0);
+ if (ret != -1)
+ svc->all_squash = ret;
+ else
+ svc->all_squash = _gf_false;
+
+ ret = dict_get_uint32(options, "anonuid", &anonuid);
+ if (!ret)
+ svc->anonuid = anonuid;
+ else
+ svc->anonuid = RPC_NOBODY_UID;
+
+ ret = dict_get_uint32(options, "anongid", &anongid);
+ if (!ret)
+ svc->anongid = anongid;
+ else
+ svc->anongid = RPC_NOBODY_GID;
+
+ if (svc->all_squash)
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "all squashing enabled "
+ "(uid=%d, gid=%d)",
+ svc->anonuid, svc->anongid);
+
+ return 0;
+}
- ret = rpcsvc_auth_init_auths (svc, options);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to init auth schemes");
- goto out;
- }
+int
+rpcsvc_auth_init(rpcsvc_t *svc, dict_t *options)
+{
+ int ret = -1;
+
+ if ((!svc) || (!options))
+ return -1;
+
+ (void)rpcsvc_set_allow_insecure(svc, options);
+ (void)rpcsvc_set_root_squash(svc, options);
+ (void)rpcsvc_set_all_squash(svc, options);
+ (void)rpcsvc_set_addr_namelookup(svc, options);
+ ret = rpcsvc_auth_add_initers(svc);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Failed to add initers");
+ goto out;
+ }
+
+ ret = rpcsvc_auth_init_auths(svc, options);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Failed to init auth schemes");
+ goto out;
+ }
out:
- return ret;
+ return ret;
}
-
-rpcsvc_auth_t *
-__rpcsvc_auth_get_handler (rpcsvc_request_t *req)
+int
+rpcsvc_auth_reconf(rpcsvc_t *svc, dict_t *options)
{
- struct rpcsvc_auth_list *auth = NULL;
- struct rpcsvc_auth_list *tmp = NULL;
- rpcsvc_t *svc = NULL;
+ int ret = 0;
- if (!req)
- return NULL;
+ if ((!svc) || (!options))
+ return (-1);
- svc = req->svc;
- if (!svc) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "!svc");
- goto err;
- }
+ ret = rpcsvc_set_allow_insecure(svc, options);
+ if (ret)
+ return (-1);
- if (list_empty (&svc->authschemes)) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "No authentication!");
- goto err;
- }
+ ret = rpcsvc_set_root_squash(svc, options);
+ if (ret)
+ return (-1);
- list_for_each_entry_safe (auth, tmp, &svc->authschemes, authlist) {
- if (!auth->enable)
- continue;
- if (auth->auth->authnum == req->cred.flavour)
- goto err;
+ ret = rpcsvc_set_all_squash(svc, options);
+ if (ret)
+ return (-1);
- }
+ return rpcsvc_set_addr_namelookup(svc, options);
+}
- auth = NULL;
+rpcsvc_auth_t *
+__rpcsvc_auth_get_handler(rpcsvc_request_t *req)
+{
+ struct rpcsvc_auth_list *auth = NULL;
+ struct rpcsvc_auth_list *tmp = NULL;
+ rpcsvc_t *svc = NULL;
+
+ if (!req)
+ return NULL;
+
+ svc = req->svc;
+ if (!svc) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "!svc");
+ goto err;
+ }
+
+ if (list_empty(&svc->authschemes)) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING, "No authentication!");
+ goto err;
+ }
+
+ list_for_each_entry_safe(auth, tmp, &svc->authschemes, authlist)
+ {
+ if (!auth->enable)
+ continue;
+ if (auth->auth->authnum == req->cred.flavour)
+ goto err;
+ }
+
+ auth = NULL;
err:
- if (auth)
- return auth->auth;
- else
- return NULL;
+ if (auth)
+ return auth->auth;
+ else
+ return NULL;
}
rpcsvc_auth_t *
-rpcsvc_auth_get_handler (rpcsvc_request_t *req)
+rpcsvc_auth_get_handler(rpcsvc_request_t *req)
{
- rpcsvc_auth_t *auth = NULL;
+ rpcsvc_auth_t *auth = NULL;
- auth = __rpcsvc_auth_get_handler (req);
- if (auth)
- goto ret;
+ auth = __rpcsvc_auth_get_handler(req);
+ if (auth)
+ goto ret;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "No auth handler: %d",
- req->cred.flavour);
+ gf_log(GF_RPCSVC, GF_LOG_TRACE, "No auth handler: %d", req->cred.flavour);
- /* The requested scheme was not available so fall back the to one
- * scheme that will always be present.
- */
- req->cred.flavour = AUTH_NULL;
- req->verf.flavour = AUTH_NULL;
- auth = __rpcsvc_auth_get_handler (req);
+ /* The requested scheme was not available so fall back the to one
+ * scheme that will always be present.
+ */
+ req->cred.flavour = AUTH_NULL;
+ req->verf.flavour = AUTH_NULL;
+ auth = __rpcsvc_auth_get_handler(req);
ret:
- return auth;
+ return auth;
}
-
int
-rpcsvc_auth_request_init (rpcsvc_request_t *req)
+rpcsvc_auth_request_init(rpcsvc_request_t *req, struct rpc_msg *callmsg)
{
- int ret = -1;
- rpcsvc_auth_t *auth = NULL;
-
- if (!req)
- return -1;
-
- auth = rpcsvc_auth_get_handler (req);
- if (!auth)
- goto err;
- ret = 0;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Auth handler: %s", auth->authname);
- if (!auth->authops->request_init)
- ret = auth->authops->request_init (req, auth->authprivate);
-
+ int32_t ret = 0;
+ rpcsvc_auth_t *auth = NULL;
+
+ if (!req || !callmsg) {
+ ret = -1;
+ goto err;
+ }
+
+ req->cred.flavour = rpc_call_cred_flavour(callmsg);
+ req->cred.datalen = rpc_call_cred_len(callmsg);
+ req->verf.flavour = rpc_call_verf_flavour(callmsg);
+ req->verf.datalen = rpc_call_verf_len(callmsg);
+
+ auth = rpcsvc_auth_get_handler(req);
+ if (!auth) {
+ ret = -1;
+ goto err;
+ }
+
+ gf_log(GF_RPCSVC, GF_LOG_TRACE, "Auth handler: %s", auth->authname);
+
+ if (auth->authops->request_init)
+ ret = auth->authops->request_init(req, auth->authprivate);
+
+ /* reset to auxgidlarge during
+ unsersialize if necessary */
+ req->auxgids = req->auxgidsmall;
+ req->auxgidlarge = NULL;
err:
- return ret;
+ return ret;
}
-
int
-rpcsvc_authenticate (rpcsvc_request_t *req)
+rpcsvc_authenticate(rpcsvc_request_t *req)
{
- int ret = RPCSVC_AUTH_REJECT;
- rpcsvc_auth_t *auth = NULL;
- int minauth = 0;
-
- if (!req)
- return ret;
-
- /* FIXME use rpcsvc_request_prog_minauth() */
- minauth = 0;
- if (minauth > rpcsvc_request_cred_flavour (req)) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "Auth too weak");
- rpcsvc_request_set_autherr (req, AUTH_TOOWEAK);
- goto err;
- }
+ int ret = RPCSVC_AUTH_REJECT;
+ rpcsvc_auth_t *auth = NULL;
+ int minauth = 0;
- auth = rpcsvc_auth_get_handler (req);
- if (!auth) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "No auth handler found");
- goto err;
- }
+ if (!req)
+ return ret;
+
+ /* FIXME use rpcsvc_request_prog_minauth() */
+ minauth = 0;
+ if (minauth > rpcsvc_request_cred_flavour(req)) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING, "Auth too weak");
+ rpcsvc_request_set_autherr(req, AUTH_TOOWEAK);
+ goto err;
+ }
+
+ auth = rpcsvc_auth_get_handler(req);
+ if (!auth) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING, "No auth handler found");
+ goto err;
+ }
- if (auth->authops->authenticate)
- ret = auth->authops->authenticate (req, auth->authprivate);
+ if (auth->authops->authenticate)
+ ret = auth->authops->authenticate(req, auth->authprivate);
err:
- return ret;
+ return ret;
}
int
-rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen)
+rpcsvc_auth_array(rpcsvc_t *svc, char *volname, int *autharr, int arrlen)
{
- int count = 0;
- int gen = RPCSVC_AUTH_REJECT;
- int spec = RPCSVC_AUTH_REJECT;
- int final = RPCSVC_AUTH_REJECT;
- char *srchstr = NULL;
- char *valstr = NULL;
- gf_boolean_t boolval = _gf_false;
- int ret = 0;
-
- struct rpcsvc_auth_list *auth = NULL;
- struct rpcsvc_auth_list *tmp = NULL;
-
- if ((!svc) || (!autharr) || (!volname))
- return -1;
-
- memset (autharr, 0, arrlen * sizeof(int));
- if (list_empty (&svc->authschemes)) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "No authentication!");
- goto err;
+ int count = 0;
+ int result = RPCSVC_AUTH_REJECT;
+ char *srchstr = NULL;
+ int ret = 0;
+
+ struct rpcsvc_auth_list *auth = NULL;
+ struct rpcsvc_auth_list *tmp = NULL;
+
+ if ((!svc) || (!autharr) || (!volname))
+ return -1;
+
+ memset(autharr, 0, arrlen * sizeof(int));
+ if (list_empty(&svc->authschemes)) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "No authentication!");
+ goto err;
+ }
+
+ list_for_each_entry_safe(auth, tmp, &svc->authschemes, authlist)
+ {
+ if (count >= arrlen)
+ break;
+
+ result = gf_asprintf(&srchstr, "rpc-auth.%s.%s", auth->name, volname);
+ if (result == -1) {
+ count = -1;
+ goto err;
}
- list_for_each_entry_safe (auth, tmp, &svc->authschemes, authlist) {
- if (count >= arrlen)
- break;
-
- gen = gf_asprintf (&srchstr, "rpc-auth.%s", auth->name);
- if (gen == -1) {
- count = -1;
- goto err;
- }
-
- gen = RPCSVC_AUTH_REJECT;
- if (dict_get (svc->options, srchstr)) {
- ret = dict_get_str (svc->options, srchstr, &valstr);
- if (ret == 0) {
- ret = gf_string2boolean (valstr, &boolval);
- if (ret == 0) {
- if (boolval == _gf_true)
- gen = RPCSVC_AUTH_ACCEPT;
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
- "d to read auth val");
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
- "d to read auth val");
- }
-
- GF_FREE (srchstr);
- spec = gf_asprintf (&srchstr, "rpc-auth.%s.%s", auth->name,
- volname);
- if (spec == -1) {
- count = -1;
- goto err;
- }
-
- spec = RPCSVC_AUTH_DONTCARE;
- if (dict_get (svc->options, srchstr)) {
- ret = dict_get_str (svc->options, srchstr, &valstr);
- if (ret == 0) {
- ret = gf_string2boolean (valstr, &boolval);
- if (ret == 0) {
- if (boolval == _gf_true)
- spec = RPCSVC_AUTH_ACCEPT;
- else
- spec = RPCSVC_AUTH_REJECT;
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
- "d to read auth val");
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Faile"
- "d to read auth val");
- }
-
- GF_FREE (srchstr);
- final = rpcsvc_combine_gen_spec_volume_checks (gen, spec);
- if (final == RPCSVC_AUTH_ACCEPT) {
- autharr[count] = auth->auth->authnum;
- ++count;
- }
+ ret = dict_get_str_boolean(svc->options, srchstr, 0xC00FFEE);
+ GF_FREE(srchstr);
+
+ switch (ret) {
+ case _gf_true:
+ autharr[count] = auth->auth->authnum;
+ ++count;
+ break;
+
+ default:
+ /* nothing to do */
+ break;
}
+ }
err:
- return count;
+ return count;
}
gid_t *
-rpcsvc_auth_unix_auxgids (rpcsvc_request_t *req, int *arrlen)
+rpcsvc_auth_unix_auxgids(rpcsvc_request_t *req, int *arrlen)
{
- if ((!req) || (!arrlen))
- return NULL;
+ if ((!req) || (!arrlen))
+ return NULL;
- /* In case of AUTH_NULL auxgids are not used */
- switch (req->cred.flavour) {
+ /* In case of AUTH_NULL auxgids are not used */
+ switch (req->cred.flavour) {
case AUTH_UNIX:
case AUTH_GLUSTERFS:
case AUTH_GLUSTERFS_v2:
- break;
+ case AUTH_GLUSTERFS_v3:
+ break;
default:
- gf_log ("rpc", GF_LOG_DEBUG, "auth type not unix or glusterfs");
- return NULL;
- }
+ gf_log("rpc", GF_LOG_DEBUG, "auth type not unix or glusterfs");
+ return NULL;
+ }
- *arrlen = req->auxgidcount;
- if (*arrlen == 0)
- return NULL;
+ *arrlen = req->auxgidcount;
+ if (*arrlen == 0)
+ return NULL;
- return &req->auxgids[0];
+ return &req->auxgids[0];
}
diff --git a/rpc/rpc-lib/src/rpcsvc-common.h b/rpc/rpc-lib/src/rpcsvc-common.h
index 81f79811612..6c4ec49a6ef 100644
--- a/rpc/rpc-lib/src/rpcsvc-common.h
+++ b/rpc/rpc-lib/src/rpcsvc-common.h
@@ -12,62 +12,102 @@
#define _RPCSVC_COMMON_H
#include <pthread.h>
-#include "list.h"
-#include "compat.h"
-#include "glusterfs.h"
-#include "dict.h"
+#include <glusterfs/compat.h>
+#include <glusterfs/dict.h>
typedef enum {
- RPCSVC_EVENT_ACCEPT,
- RPCSVC_EVENT_DISCONNECT,
- RPCSVC_EVENT_TRANSPORT_DESTROY,
- RPCSVC_EVENT_LISTENER_DEAD,
+ RPCSVC_EVENT_ACCEPT,
+ RPCSVC_EVENT_DISCONNECT,
+ RPCSVC_EVENT_TRANSPORT_DESTROY,
+ RPCSVC_EVENT_LISTENER_DEAD,
} rpcsvc_event_t;
-
struct rpcsvc_state;
-typedef int (*rpcsvc_notify_t) (struct rpcsvc_state *, void *mydata,
- rpcsvc_event_t, void *data);
+typedef int (*rpcsvc_notify_t)(struct rpcsvc_state *, void *mydata,
+ rpcsvc_event_t, void *data);
+struct drc_globals;
+typedef struct drc_globals rpcsvc_drc_globals_t;
/* Contains global state required for all the RPC services.
*/
typedef struct rpcsvc_state {
+ /* Contains list of (program, version) handlers.
+ * other options.
+ */
+
+ pthread_rwlock_t rpclock;
+
+ /* List of the authentication schemes available. */
+ struct list_head authschemes;
+
+ /* Reference to the options */
+ dict_t *options;
+
+ uid_t anonuid;
+ gid_t anongid;
+ glusterfs_ctx_t *ctx;
+
+ /* list of connections which will listen for incoming connections */
+ struct list_head listeners;
+
+ /* list of programs registered with rpcsvc */
+ struct list_head programs;
+
+ /* list of notification callbacks */
+ struct list_head notify;
+ int notify_count;
+
+ unsigned int memfactor;
+
+ xlator_t *xl; /* xlator */
+ void *mydata;
+ rpcsvc_notify_t notifyfn;
+ struct mem_pool *rxpool;
+ rpcsvc_drc_globals_t *drc;
+
+ /* per-client limit of outstanding rpc requests */
+ int outstanding_rpc_limit;
+ gf_boolean_t addr_namelookup;
+ /* determine whether throttling is needed, by default OFF */
+ gf_boolean_t throttle;
+ /* Allow insecure ports. */
+ gf_boolean_t allow_insecure;
+ gf_boolean_t register_portmap;
+ gf_boolean_t root_squash;
+ gf_boolean_t all_squash;
+} rpcsvc_t;
- /* Contains list of (program, version) handlers.
- * other options.
- */
-
- pthread_mutex_t rpclock;
-
- unsigned int memfactor;
-
- /* List of the authentication schemes available. */
- struct list_head authschemes;
+/* DRC START */
+enum drc_op_type { DRC_NA = 0, DRC_IDEMPOTENT = 1, DRC_NON_IDEMPOTENT = 2 };
+typedef enum drc_op_type drc_op_type_t;
- /* Reference to the options */
- dict_t *options;
+enum drc_type { DRC_TYPE_NONE = 0, DRC_TYPE_IN_MEMORY = 1 };
+typedef enum drc_type drc_type_t;
- /* Allow insecure ports. */
- int allow_insecure;
- gf_boolean_t register_portmap;
- glusterfs_ctx_t *ctx;
+enum drc_lru_factor {
+ DRC_LRU_5_PC = 20,
+ DRC_LRU_10_PC = 10,
+ DRC_LRU_25_PC = 4,
+ DRC_LRU_50_PC = 2
+};
+typedef enum drc_lru_factor drc_lru_factor_t;
- /* list of connections which will listen for incoming connections */
- struct list_head listeners;
+enum drc_xid_state { DRC_XID_MONOTONOUS = 0, DRC_XID_WRAPPED = 1 };
+typedef enum drc_xid_state drc_xid_state_t;
- /* list of programs registered with rpcsvc */
- struct list_head programs;
+enum drc_op_state { DRC_OP_IN_TRANSIT = 0, DRC_OP_CACHED = 1 };
+typedef enum drc_op_state drc_op_state_t;
- /* list of notification callbacks */
- struct list_head notify;
- int notify_count;
+enum drc_policy { DRC_LRU = 0 };
+typedef enum drc_policy drc_policy_t;
- void *mydata; /* This is xlator */
- rpcsvc_notify_t notifyfn;
- struct mem_pool *rxpool;
-} rpcsvc_t;
+/* Default policies for DRC */
+#define DRC_DEFAULT_TYPE DRC_TYPE_IN_MEMORY
+#define DRC_DEFAULT_CACHE_SIZE 0x20000
+#define DRC_DEFAULT_LRU_FACTOR DRC_LRU_25_PC
+/* DRC END */
#endif /* #ifndef _RPCSVC_COMMON_H */
diff --git a/rpc/rpc-lib/src/rpcsvc.c b/rpc/rpc-lib/src/rpcsvc.c
index ee9e1c72507..39910d481bf 100644
--- a/rpc/rpc-lib/src/rpcsvc.c
+++ b/rpc/rpc-lib/src/rpcsvc.c
@@ -8,25 +8,20 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "rpcsvc.h"
#include "rpc-transport.h"
-#include "dict.h"
-#include "logging.h"
-#include "byte-order.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "list.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/statedump.h>
#include "xdr-rpc.h"
-#include "iobuf.h"
-#include "globals.h"
+#include <glusterfs/iobuf.h>
#include "xdr-common.h"
#include "xdr-generic.h"
#include "rpc-common-xdr.h"
+#include <glusterfs/syncop.h>
+#include "rpc-drc.h"
+#include "protocol-common.h"
#include <errno.h>
#include <pthread.h>
@@ -38,901 +33,1364 @@
#include <fnmatch.h>
#include <stdarg.h>
#include <stdio.h>
+#include <dlfcn.h>
+
+#ifdef IPV6_DEFAULT
+#include <netconfig.h>
+#endif
#include "xdr-rpcclnt.h"
+#include <glusterfs/glusterfs-acl.h>
-#define ACL_PROGRAM 100227
+#ifndef PTHREAD_MUTEX_ADAPTIVE_NP
+#define PTHREAD_MUTEX_ADAPTIVE_NP PTHREAD_MUTEX_DEFAULT
+#endif
-struct rpcsvc_program gluster_dump_prog;
+static struct rpcsvc_program gluster_dump_prog;
-#define rpcsvc_alloc_request(svc, request) \
- do { \
- request = (rpcsvc_request_t *) mem_get ((svc)->rxpool); \
- memset (request, 0, sizeof (rpcsvc_request_t)); \
- } while (0)
+#define rpcsvc_alloc_request(svc, request) \
+ do { \
+ request = (rpcsvc_request_t *)mem_get((svc)->rxpool); \
+ if (request) { \
+ memset(request, 0, sizeof(rpcsvc_request_t)); \
+ } else { \
+ gf_log("rpcsvc", GF_LOG_ERROR, \
+ "error getting memory for rpc request"); \
+ } \
+ } while (0)
rpcsvc_listener_t *
-rpcsvc_get_listener (rpcsvc_t *svc, uint16_t port, rpc_transport_t *trans);
+rpcsvc_get_listener(rpcsvc_t *svc, uint16_t port, rpc_transport_t *trans);
int
-rpcsvc_notify (rpc_transport_t *trans, void *mydata,
- rpc_transport_event_t event, void *data, ...);
+rpcsvc_notify(rpc_transport_t *trans, void *mydata, rpc_transport_event_t event,
+ void *data, ...);
+void *
+rpcsvc_request_handler(void *arg);
-rpcsvc_notify_wrapper_t *
-rpcsvc_notify_wrapper_alloc (void)
+static int
+rpcsvc_match_subnet_v4(const char *addrtok, const char *ipaddr);
+
+static void
+rpcsvc_toggle_queue_status(rpcsvc_program_t *prog,
+ rpcsvc_request_queue_t *queue,
+ unsigned long status[])
{
- rpcsvc_notify_wrapper_t *wrapper = NULL;
+ unsigned queue_index = queue - prog->request_queue;
- wrapper = GF_CALLOC (1, sizeof (*wrapper), gf_common_mt_rpcsvc_wrapper_t);
- if (!wrapper) {
- goto out;
+ status[queue_index / __BITS_PER_LONG] ^= (1UL << (queue_index %
+ __BITS_PER_LONG));
+}
+
+int
+rpcsvc_get_free_queue_index(rpcsvc_program_t *prog)
+{
+ unsigned i, j = 0;
+
+ for (i = 0; i < EVENT_MAX_THREADS / __BITS_PER_LONG; i++)
+ if (prog->request_queue_status[i] != ULONG_MAX) {
+ j = __builtin_ctzl(~prog->request_queue_status[i]);
+ break;
}
- INIT_LIST_HEAD (&wrapper->list);
-out:
- return wrapper;
+ if (i == EVENT_MAX_THREADS / __BITS_PER_LONG)
+ return -1;
+
+ prog->request_queue_status[i] |= (1UL << j);
+ return i * __BITS_PER_LONG + j;
}
+rpcsvc_notify_wrapper_t *
+rpcsvc_notify_wrapper_alloc(void)
+{
+ rpcsvc_notify_wrapper_t *wrapper = NULL;
+
+ wrapper = GF_CALLOC(1, sizeof(*wrapper), gf_common_mt_rpcsvc_wrapper_t);
+ if (!wrapper) {
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&wrapper->list);
+out:
+ return wrapper;
+}
void
-rpcsvc_listener_destroy (rpcsvc_listener_t *listener)
+rpcsvc_listener_destroy(rpcsvc_listener_t *listener)
{
- rpcsvc_t *svc = NULL;
+ rpcsvc_t *svc = NULL;
- if (!listener) {
- goto out;
- }
+ if (!listener) {
+ goto out;
+ }
- svc = listener->svc;
- if (!svc) {
- goto listener_free;
- }
+ svc = listener->svc;
+ if (!svc) {
+ goto listener_free;
+ }
- pthread_mutex_lock (&svc->rpclock);
- {
- list_del_init (&listener->list);
- }
- pthread_mutex_unlock (&svc->rpclock);
+ pthread_rwlock_wrlock(&svc->rpclock);
+ {
+ list_del_init(&listener->list);
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
listener_free:
- GF_FREE (listener);
+ GF_FREE(listener);
out:
- return;
+ return;
}
rpcsvc_vector_sizer
-rpcsvc_get_program_vector_sizer (rpcsvc_t *svc, uint32_t prognum,
- uint32_t progver, uint32_t procnum)
+rpcsvc_get_program_vector_sizer(rpcsvc_t *svc, uint32_t prognum,
+ uint32_t progver, int procnum)
{
- rpcsvc_program_t *program = NULL;
- char found = 0;
+ rpcsvc_program_t *program = NULL;
+ char found = 0;
- if (!svc)
- return NULL;
+ if (!svc)
+ return NULL;
- pthread_mutex_lock (&svc->rpclock);
+ pthread_rwlock_rdlock(&svc->rpclock);
+ {
+ /* Find the matching RPC program from registered list */
+ list_for_each_entry(program, &svc->programs, program)
{
- list_for_each_entry (program, &svc->programs, program) {
- if ((program->prognum == prognum)
- && (program->progver == progver)) {
- found = 1;
- break;
- }
- }
+ if ((program->prognum == prognum) &&
+ (program->progver == progver)) {
+ found = 1;
+ break;
+ }
}
- pthread_mutex_unlock (&svc->rpclock);
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
- if (found)
- return program->actors[procnum].vector_sizer;
- else
- return NULL;
+ if (found) {
+ /* Make sure the requested procnum is supported by RPC prog */
+ if ((procnum < 0) || (procnum >= program->numactors)) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "RPC procedure %d not available for Program %s", procnum,
+ program->progname);
+ return NULL;
+ }
+
+ /* SUCCESS: Supported procedure */
+ return program->actors[procnum].vector_sizer;
+ }
+
+ return NULL; /* FAIL */
}
-/* This needs to change to returning errors, since
- * we need to return RPC specific error messages when some
- * of the pointers below are NULL.
- */
-rpcsvc_actor_t *
-rpcsvc_program_actor (rpcsvc_request_t *req)
+gf_boolean_t
+rpcsvc_can_outstanding_req_be_ignored(rpcsvc_request_t *req)
{
- rpcsvc_program_t *program = NULL;
- int err = SYSTEM_ERR;
- rpcsvc_actor_t *actor = NULL;
- rpcsvc_t *svc = NULL;
- char found = 0;
+ /*
+ * If outstanding_rpc_limit is reached because of blocked locks and
+ * throttling is attempted then no unlock requests will be received. So
+ * the outstanding request count will never change i.e. it will always
+ * be equal to the limit. This also leads to ping timer expiry on
+ * client.
+ */
+
+ /*
+ * This is a hack and a necessity until grantedlock == fop completion.
+ * Ideally if we get a blocking lock request which cannot be granted
+ * right now, we should unwind the fop saying “request registered, will
+ * notify you when grantedâ€, which is very hard to implement at the
+ * moment. Until we bring in such mechanism, we will need to live with
+ * not rate-limiting INODELK/ENTRYLK/LK fops
+ */
+
+ if ((req->prognum == GLUSTER_FOP_PROGRAM) &&
+ (req->progver == GLUSTER_FOP_VERSION)) {
+ if ((req->procnum == GFS3_OP_INODELK) ||
+ (req->procnum == GFS3_OP_FINODELK) ||
+ (req->procnum == GFS3_OP_ENTRYLK) ||
+ (req->procnum == GFS3_OP_FENTRYLK) || (req->procnum == GFS3_OP_LK))
+ return _gf_true;
+ }
+ return _gf_false;
+}
- if (!req)
- goto err;
+int
+rpcsvc_request_outstanding(rpcsvc_request_t *req, int delta)
+{
+ int ret = -1;
+ int old_count = 0;
+ int new_count = 0;
+ int limit = 0;
+ gf_boolean_t throttle = _gf_false;
- svc = req->svc;
- pthread_mutex_lock (&svc->rpclock);
- {
- list_for_each_entry (program, &svc->programs, program) {
- if (program->prognum == req->prognum) {
- err = PROG_MISMATCH;
- }
+ if (!req)
+ goto out;
- if ((program->prognum == req->prognum)
- && (program->progver == req->progver)) {
- found = 1;
- break;
- }
- }
- }
- pthread_mutex_unlock (&svc->rpclock);
-
- if (!found) {
- if (err != PROG_MISMATCH) {
- /* log in DEBUG when nfs clients try to see if
- * ACL requests are accepted by nfs server
- */
- gf_log (GF_RPCSVC, (req->prognum == ACL_PROGRAM) ?
- GF_LOG_DEBUG : GF_LOG_WARNING,
- "RPC program not available (req %u %u)",
- req->prognum, req->progver);
- err = PROG_UNAVAIL;
- goto err;
- }
+ throttle = rpcsvc_get_throttle(req->svc);
+ if (!throttle) {
+ ret = 0;
+ goto out;
+ }
- gf_log (GF_RPCSVC, GF_LOG_WARNING,
- "RPC program version not available (req %u %u)",
- req->prognum, req->progver);
- goto err;
- }
- req->prog = program;
- if (!program->actors) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING,
- "RPC Actor not found for program %s %d",
- program->progname, program->prognum);
- err = SYSTEM_ERR;
- goto err;
- }
+ if (rpcsvc_can_outstanding_req_be_ignored(req)) {
+ ret = 0;
+ goto out;
+ }
- if ((req->procnum < 0) || (req->procnum >= program->numactors)) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC Program procedure not"
- " available for procedure %d in %s", req->procnum,
- program->progname);
- err = PROC_UNAVAIL;
- goto err;
- }
+ pthread_mutex_lock(&req->trans->lock);
+ {
+ limit = req->svc->outstanding_rpc_limit;
+ if (!limit)
+ goto unlock;
- actor = &program->actors[req->procnum];
- if (!actor->actor) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC Program procedure not"
- " available for procedure %d in %s", req->procnum,
- program->progname);
- err = PROC_UNAVAIL;
- actor = NULL;
- goto err;
- }
+ old_count = req->trans->outstanding_rpc_count;
+ req->trans->outstanding_rpc_count += delta;
+ new_count = req->trans->outstanding_rpc_count;
- err = SUCCESS;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Actor found: %s - %s",
- program->progname, actor->procname);
-err:
- if (req)
- req->rpc_err = err;
+ if (old_count <= limit && new_count > limit)
+ ret = rpc_transport_throttle(req->trans, _gf_true);
+
+ if (old_count > limit && new_count <= limit)
+ ret = rpc_transport_throttle(req->trans, _gf_false);
+ }
+unlock:
+ pthread_mutex_unlock(&req->trans->lock);
- return actor;
+out:
+ return ret;
}
+/* This needs to change to returning errors, since
+ * we need to return RPC specific error messages when some
+ * of the pointers below are NULL.
+ */
+rpcsvc_actor_t *
+rpcsvc_program_actor(rpcsvc_request_t *req)
+{
+ rpcsvc_program_t *program = NULL;
+ int err = SYSTEM_ERR;
+ rpcsvc_actor_t *actor = NULL;
+ rpcsvc_t *svc = NULL;
+ char found = 0;
+ char *peername = NULL;
+
+ if (!req)
+ goto err;
+
+ svc = req->svc;
+ peername = req->trans->peerinfo.identifier;
+ pthread_rwlock_rdlock(&svc->rpclock);
+ {
+ list_for_each_entry(program, &svc->programs, program)
+ {
+ if (program->prognum == req->prognum) {
+ err = PROG_MISMATCH;
+ }
+
+ if ((program->prognum == req->prognum) &&
+ (program->progver == req->progver)) {
+ found = 1;
+ break;
+ }
+ }
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
+
+ if (!found) {
+ if (err != PROG_MISMATCH) {
+ /* log in DEBUG when nfs clients try to see if
+ * ACL requests are accepted by nfs server
+ */
+ gf_log(
+ GF_RPCSVC,
+ (req->prognum == ACL_PROGRAM) ? GF_LOG_DEBUG : GF_LOG_WARNING,
+ "RPC program not available (req %u %u) for %s", req->prognum,
+ req->progver, peername);
+ err = PROG_UNAVAIL;
+ goto err;
+ }
+
+ gf_log(GF_RPCSVC, GF_LOG_WARNING,
+ "RPC program version not available (req %u %u) for %s",
+ req->prognum, req->progver, peername);
+ goto err;
+ }
+ req->prog = program;
+ if (!program->actors) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING,
+ "RPC Actor not found for program %s %d for %s",
+ program->progname, program->prognum, peername);
+ err = SYSTEM_ERR;
+ goto err;
+ }
+
+ if ((req->procnum < 0) || (req->procnum >= program->numactors)) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "RPC Program procedure not"
+ " available for procedure %d in %s for %s",
+ req->procnum, program->progname, peername);
+ err = PROC_UNAVAIL;
+ goto err;
+ }
+
+ actor = &program->actors[req->procnum];
+ if (!actor->actor) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "RPC Program procedure not"
+ " available for procedure %d in %s for %s",
+ req->procnum, program->progname, peername);
+ err = PROC_UNAVAIL;
+ actor = NULL;
+ goto err;
+ }
+
+ if (svc->xl->ctx->measure_latency) {
+ timespec_now(&req->begin);
+ }
+
+ req->ownthread = program->ownthread;
+ req->synctask = program->synctask;
+
+ err = SUCCESS;
+ gf_log(GF_RPCSVC, GF_LOG_TRACE, "Actor found: %s - %s for %s",
+ program->progname, actor->procname, peername);
+err:
+ if (req)
+ req->rpc_err = err;
+
+ return actor;
+}
/* this procedure can only pass 4 arguments to registered notifyfn. To send more
* arguments call wrapper->notify directly.
*/
-inline void
-rpcsvc_program_notify (rpcsvc_listener_t *listener, rpcsvc_event_t event,
- void *data)
+static void
+rpcsvc_program_notify(rpcsvc_listener_t *listener, rpcsvc_event_t event,
+ void *data)
{
- rpcsvc_notify_wrapper_t *wrapper = NULL;
+ rpcsvc_notify_wrapper_t *wrapper = NULL;
- if (!listener) {
- goto out;
- }
+ if (!listener) {
+ goto out;
+ }
- list_for_each_entry (wrapper, &listener->svc->notify, list) {
- if (wrapper->notify) {
- wrapper->notify (listener->svc,
- wrapper->data,
- event, data);
- }
+ list_for_each_entry(wrapper, &listener->svc->notify, list)
+ {
+ if (wrapper->notify) {
+ wrapper->notify(listener->svc, wrapper->data, event, data);
}
+ }
out:
- return;
+ return;
}
-
-inline int
-rpcsvc_accept (rpcsvc_t *svc, rpc_transport_t *listen_trans,
- rpc_transport_t *new_trans)
+static int
+rpcsvc_accept(rpcsvc_t *svc, rpc_transport_t *listen_trans,
+ rpc_transport_t *new_trans)
{
- rpcsvc_listener_t *listener = NULL;
- int32_t ret = -1;
+ rpcsvc_listener_t *listener = NULL;
+ int32_t ret = -1;
- listener = rpcsvc_get_listener (svc, -1, listen_trans);
- if (listener == NULL) {
- goto out;
- }
+ listener = rpcsvc_get_listener(svc, -1, listen_trans);
+ if (listener == NULL) {
+ goto out;
+ }
- rpcsvc_program_notify (listener, RPCSVC_EVENT_ACCEPT, new_trans);
- ret = 0;
+ rpcsvc_program_notify(listener, RPCSVC_EVENT_ACCEPT, new_trans);
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
void
-rpcsvc_request_destroy (rpcsvc_request_t *req)
+rpcsvc_request_destroy(rpcsvc_request_t *req)
{
- if (!req) {
- goto out;
- }
+ if (!req) {
+ goto out;
+ }
- if (req->iobref) {
- iobref_unref (req->iobref);
- }
+ if (req->iobref) {
+ iobref_unref(req->iobref);
+ }
- rpc_transport_unref (req->trans);
+ /* This marks the "end" of an RPC request. Reply is
+ completely written to the socket and is on the way
+ to the client. It is time to decrement the
+ outstanding request counter by 1.
+ */
+ if (req->prognum) // Only for initialized requests
+ rpcsvc_request_outstanding(req, -1);
- mem_put (req);
+ rpc_transport_unref(req->trans);
+
+ GF_FREE(req->auxgidlarge);
+
+ mem_put(req);
out:
- return;
+ return;
}
+rpcsvc_request_t *
+rpcsvc_request_init(rpcsvc_t *svc, rpc_transport_t *trans,
+ struct rpc_msg *callmsg, struct iovec progmsg,
+ rpc_transport_pollin_t *msg, rpcsvc_request_t *req)
+{
+ int i = 0;
+
+ if ((!trans) || (!callmsg) || (!req) || (!msg))
+ return NULL;
+
+ /* We start a RPC request as always denied. */
+ req->rpc_status = MSG_DENIED;
+ req->xid = rpc_call_xid(callmsg);
+ req->prognum = rpc_call_program(callmsg);
+ req->progver = rpc_call_progver(callmsg);
+ req->procnum = rpc_call_progproc(callmsg);
+ req->trans = rpc_transport_ref(trans);
+ req->count = msg->count;
+ req->msg[0] = progmsg;
+ req->iobref = iobref_ref(msg->iobref);
+ if (msg->vectored) {
+ /* msg->vector[MAX_IOVEC] is defined in structure. prevent a
+ out of bound access */
+ for (i = 1; i < min(msg->count, MAX_IOVEC); i++) {
+ req->msg[i] = msg->vector[i];
+ }
+ }
+
+ req->svc = svc;
+ req->trans_private = msg->private;
+
+ INIT_LIST_HEAD(&req->txlist);
+ INIT_LIST_HEAD(&req->request_list);
+ req->payloadsize = 0;
+
+ /* By this time, the data bytes for the auth scheme would have already
+ * been copied into the required sections of the req structure,
+ * we just need to fill in the meta-data about it now.
+ */
+ rpcsvc_auth_request_init(req, callmsg);
+ return req;
+}
rpcsvc_request_t *
-rpcsvc_request_init (rpcsvc_t *svc, rpc_transport_t *trans,
- struct rpc_msg *callmsg,
- struct iovec progmsg, rpc_transport_pollin_t *msg,
- rpcsvc_request_t *req)
+rpcsvc_request_create(rpcsvc_t *svc, rpc_transport_t *trans,
+ rpc_transport_pollin_t *msg)
{
- int i = 0;
+ char *msgbuf = NULL;
+ struct rpc_msg rpcmsg;
+ struct iovec progmsg; /* RPC Program payload */
+ rpcsvc_request_t *req = NULL;
+ size_t msglen = 0;
+ int ret = -1;
+
+ if (!svc || !trans || !svc->rxpool)
+ return NULL;
+
+ /* We need to allocate the request before actually calling
+ * rpcsvc_request_init on the request so that we, can fill the auth
+ * data directly into the request structure from the message iobuf.
+ * This avoids a need to keep a temp buffer into which the auth data
+ * would've been copied otherwise.
+ */
+ rpcsvc_alloc_request(svc, req);
+ if (!req) {
+ goto err;
+ }
+
+ msgbuf = msg->vector[0].iov_base;
+ msglen = msg->vector[0].iov_len;
+
+ ret = xdr_to_rpc_call(msgbuf, msglen, &rpcmsg, &progmsg, req->cred.authdata,
+ req->verf.authdata);
+
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING, "RPC call decoding failed");
+ rpcsvc_request_seterr(req, GARBAGE_ARGS);
+ req->trans = rpc_transport_ref(trans);
+ req->svc = svc;
+ goto err;
+ }
+
+ ret = -1;
+ rpcsvc_request_init(svc, trans, &rpcmsg, progmsg, msg, req);
+
+ gf_log(GF_RPCSVC, GF_LOG_TRACE,
+ "received rpc-message "
+ "(XID: 0x%" GF_PRI_RPC_XID ", Ver: %" GF_PRI_RPC_VERSION
+ ", Program: %" GF_PRI_RPC_PROG_ID
+ ", "
+ "ProgVers: %" GF_PRI_RPC_PROG_VERS ", Proc: %" GF_PRI_RPC_PROC
+ ") "
+ "from rpc-transport (%s)",
+ rpc_call_xid(&rpcmsg), rpc_call_rpcvers(&rpcmsg),
+ rpc_call_program(&rpcmsg), rpc_call_progver(&rpcmsg),
+ rpc_call_progproc(&rpcmsg), trans->name);
+
+ /* We just received a new request from the wire. Account for
+ it in the outsanding request counter to make sure we don't
+ ingest too many concurrent requests from the same client.
+ */
+ if (req->prognum) // Only for initialized requests
+ ret = rpcsvc_request_outstanding(req, +1);
+
+ if (rpc_call_rpcvers(&rpcmsg) != 2) {
+ /* LOG- TODO: print rpc version, also print the peerinfo
+ from transport */
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "RPC version not supported "
+ "(XID: 0x%" GF_PRI_RPC_XID ", Ver: %" GF_PRI_RPC_VERSION
+ ", Program: %" GF_PRI_RPC_PROG_ID
+ ", "
+ "ProgVers: %" GF_PRI_RPC_PROG_VERS ", Proc: %" GF_PRI_RPC_PROC
+ ") "
+ "from trans (%s)",
+ rpc_call_xid(&rpcmsg), rpc_call_rpcvers(&rpcmsg),
+ rpc_call_program(&rpcmsg), rpc_call_progver(&rpcmsg),
+ rpc_call_progproc(&rpcmsg), trans->name);
+ rpcsvc_request_seterr(req, RPC_MISMATCH);
+ goto err;
+ }
+
+ ret = rpcsvc_authenticate(req);
+ if (ret == RPCSVC_AUTH_REJECT) {
+ /* No need to set auth_err, that is the responsibility of
+ * the authentication handler since only that know what exact
+ * error happened.
+ */
+ rpcsvc_request_seterr(req, AUTH_ERROR);
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "auth failed on request. "
+ "(XID: 0x%" GF_PRI_RPC_XID ", Ver: %" GF_PRI_RPC_VERSION
+ ", Program: %" GF_PRI_RPC_PROG_ID
+ ", "
+ "ProgVers: %" GF_PRI_RPC_PROG_VERS ", Proc: %" GF_PRI_RPC_PROC
+ ") "
+ "from trans (%s)",
+ rpc_call_xid(&rpcmsg), rpc_call_rpcvers(&rpcmsg),
+ rpc_call_program(&rpcmsg), rpc_call_progver(&rpcmsg),
+ rpc_call_progproc(&rpcmsg), trans->name);
+ ret = -1;
+ goto err;
+ }
+
+ /* If the error is not RPC_MISMATCH, we consider the call as accepted
+ * since we are not handling authentication failures for now.
+ */
+ req->rpc_status = MSG_ACCEPTED;
+ req->reply = NULL;
+ ret = 0;
+err:
+ if (ret == -1) {
+ ret = rpcsvc_error_reply(req);
+ if (ret)
+ gf_log("rpcsvc", GF_LOG_WARNING, "failed to queue error reply");
+ req = NULL;
+ }
- if ((!trans) || (!callmsg)|| (!req) || (!msg))
- return NULL;
+ return req;
+}
- /* We start a RPC request as always denied. */
- req->rpc_status = MSG_DENIED;
- req->xid = rpc_call_xid (callmsg);
- req->prognum = rpc_call_program (callmsg);
- req->progver = rpc_call_progver (callmsg);
- req->procnum = rpc_call_progproc (callmsg);
- req->trans = rpc_transport_ref (trans);
- req->count = msg->count;
- req->msg[0] = progmsg;
- req->iobref = iobref_ref (msg->iobref);
- if (msg->vectored) {
- /* msg->vector[2] is defined in structure. prevent a
- out of bound access */
- for (i = 1; i < min (msg->count, 2); i++) {
- req->msg[i] = msg->vector[i];
- }
- }
+int
+rpcsvc_check_and_reply_error(int ret, call_frame_t *frame, void *opaque)
+{
+ rpcsvc_request_t *req = NULL;
- req->svc = svc;
- req->trans_private = msg->private;
+ req = opaque;
- INIT_LIST_HEAD (&req->txlist);
- req->payloadsize = 0;
+ if (ret)
+ gf_log("rpcsvc", GF_LOG_ERROR,
+ "rpc actor (%d:%d:%d) failed to complete successfully",
+ req->prognum, req->progver, req->procnum);
- /* By this time, the data bytes for the auth scheme would have already
- * been copied into the required sections of the req structure,
- * we just need to fill in the meta-data about it now.
- */
- req->cred.flavour = rpc_call_cred_flavour (callmsg);
- req->cred.datalen = rpc_call_cred_len (callmsg);
- req->verf.flavour = rpc_call_verf_flavour (callmsg);
- req->verf.datalen = rpc_call_verf_len (callmsg);
+ if (ret == RPCSVC_ACTOR_ERROR) {
+ ret = rpcsvc_error_reply(req);
+ if (ret)
+ gf_log("rpcsvc", GF_LOG_WARNING, "failed to queue error reply");
+ }
- /* AUTH */
- rpcsvc_auth_request_init (req);
- return req;
+ return 0;
}
-
-rpcsvc_request_t *
-rpcsvc_request_create (rpcsvc_t *svc, rpc_transport_t *trans,
- rpc_transport_pollin_t *msg)
+void
+rpcsvc_queue_event_thread_death(rpcsvc_t *svc, rpcsvc_program_t *prog, int gen)
{
- char *msgbuf = NULL;
- struct rpc_msg rpcmsg;
- struct iovec progmsg; /* RPC Program payload */
- rpcsvc_request_t *req = NULL;
- size_t msglen = 0;
- int ret = -1;
-
- if (!svc || !trans)
- return NULL;
-
- /* We need to allocate the request before actually calling
- * rpcsvc_request_init on the request so that we, can fill the auth
- * data directly into the request structure from the message iobuf.
- * This avoids a need to keep a temp buffer into which the auth data
- * would've been copied otherwise.
- */
- rpcsvc_alloc_request (svc, req);
- if (!req) {
- goto err;
- }
+ rpcsvc_request_queue_t *queue = NULL;
+ int num = 0;
+ void *value = NULL;
+ rpcsvc_request_t *req = NULL;
+ char empty = 0;
+
+ value = pthread_getspecific(prog->req_queue_key);
+ if (value == NULL) {
+ return;
+ }
- msgbuf = msg->vector[0].iov_base;
- msglen = msg->vector[0].iov_len;
+ num = ((unsigned long)value) - 1;
- ret = xdr_to_rpc_call (msgbuf, msglen, &rpcmsg, &progmsg,
- req->cred.authdata,req->verf.authdata);
+ queue = &prog->request_queue[num];
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "RPC call decoding failed");
- rpcsvc_request_seterr (req, GARBAGE_ARGS);
- req->trans = rpc_transport_ref (trans);
- req->svc = svc;
- goto err;
- }
+ if (queue->gen == gen) {
+ /* duplicate event */
+ gf_log(GF_RPCSVC, GF_LOG_INFO,
+ "not queuing duplicate event thread death. "
+ "queue %d program %s",
+ num, prog->progname);
+ return;
+ }
- ret = -1;
- rpcsvc_request_init (svc, trans, &rpcmsg, progmsg, msg, req);
-
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "received rpc-message (XID: 0x%lx, "
- "Ver: %ld, Program: %ld, ProgVers: %ld, Proc: %ld) from"
- " rpc-transport (%s)", rpc_call_xid (&rpcmsg),
- rpc_call_rpcvers (&rpcmsg), rpc_call_program (&rpcmsg),
- rpc_call_progver (&rpcmsg), rpc_call_progproc (&rpcmsg),
- trans->name);
-
- if (rpc_call_rpcvers (&rpcmsg) != 2) {
- /* LOG- TODO: print rpc version, also print the peerinfo
- from transport */
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "RPC version not supported "
- "(XID: 0x%lx, Ver: %ld, Prog: %ld, ProgVers: %ld, "
- "Proc: %ld) from trans (%s)", rpc_call_xid (&rpcmsg),
- rpc_call_rpcvers (&rpcmsg), rpc_call_program (&rpcmsg),
- rpc_call_progver (&rpcmsg), rpc_call_progproc (&rpcmsg),
- trans->name);
- rpcsvc_request_seterr (req, RPC_MISMATCH);
- goto err;
- }
+ rpcsvc_alloc_request(svc, req);
+ req->prognum = RPCSVC_INFRA_PROGRAM;
+ req->procnum = RPCSVC_PROC_EVENT_THREAD_DEATH;
+ gf_log(GF_RPCSVC, GF_LOG_INFO,
+ "queuing event thread death request to queue %d of program %s", num,
+ prog->progname);
- ret = rpcsvc_authenticate (req);
- if (ret == RPCSVC_AUTH_REJECT) {
- /* No need to set auth_err, that is the responsibility of
- * the authentication handler since only that know what exact
- * error happened.
- */
- rpcsvc_request_seterr (req, AUTH_ERROR);
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "auth failed on request. "
- "(XID: 0x%lx, Ver: %ld, Prog: %ld, ProgVers: %ld, "
- "Proc: %ld) from trans (%s)", rpc_call_xid (&rpcmsg),
- rpc_call_rpcvers (&rpcmsg), rpc_call_program (&rpcmsg),
- rpc_call_progver (&rpcmsg), rpc_call_progproc (&rpcmsg),
- trans->name);
- ret = -1;
- goto err;
- }
+ pthread_mutex_lock(&queue->queue_lock);
+ {
+ empty = list_empty(&queue->request_queue);
+ list_add_tail(&req->request_list, &queue->request_queue);
+ queue->gen = gen;
- /* If the error is not RPC_MISMATCH, we consider the call as accepted
- * since we are not handling authentication failures for now.
- */
- req->rpc_status = MSG_ACCEPTED;
- ret = 0;
-err:
- if (ret == -1) {
- ret = rpcsvc_error_reply (req);
- if (ret)
- gf_log ("rpcsvc", GF_LOG_WARNING,
- "failed to queue error reply");
- req = NULL;
- }
+ if (empty && queue->waiting)
+ pthread_cond_signal(&queue->queue_cond);
+ }
+ pthread_mutex_unlock(&queue->queue_lock);
- return req;
+ return;
}
-
int
-rpcsvc_handle_rpc_call (rpcsvc_t *svc, rpc_transport_t *trans,
- rpc_transport_pollin_t *msg)
+rpcsvc_handle_event_thread_death(rpcsvc_t *svc, rpc_transport_t *trans, int gen)
{
- rpcsvc_actor_t *actor = NULL;
- rpcsvc_request_t *req = NULL;
- int ret = -1;
- uint16_t port = 0;
- gf_boolean_t is_unix = _gf_false;
- gf_boolean_t unprivileged = _gf_false;
+ rpcsvc_program_t *prog = NULL;
+
+ pthread_rwlock_rdlock(&svc->rpclock);
+ {
+ list_for_each_entry(prog, &svc->programs, program)
+ {
+ if (prog->ownthread)
+ rpcsvc_queue_event_thread_death(svc, prog, gen);
+ }
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
- if (!trans || !svc)
- return -1;
+ return 0;
+}
- switch (trans->peerinfo.sockaddr.ss_family) {
+int
+rpcsvc_handle_rpc_call(rpcsvc_t *svc, rpc_transport_t *trans,
+ rpc_transport_pollin_t *msg)
+{
+ rpcsvc_actor_t *actor = NULL;
+ rpcsvc_actor actor_fn = NULL;
+ rpcsvc_request_t *req = NULL;
+ int ret = -1;
+ uint16_t port = 0;
+ gf_boolean_t is_unix = _gf_false, empty = _gf_false;
+ gf_boolean_t unprivileged = _gf_false, spawn_request_handler = 0;
+ drc_cached_op_t *reply = NULL;
+ rpcsvc_drc_globals_t *drc = NULL;
+ rpcsvc_request_queue_t *queue = NULL;
+ long num = 0;
+ void *value = NULL;
+
+ if (!trans || !svc)
+ return -1;
+
+ switch (trans->peerinfo.sockaddr.ss_family) {
case AF_INET:
- port = ((struct sockaddr_in *)&trans->peerinfo.sockaddr)->sin_port;
- break;
+ port = ((struct sockaddr_in *)&trans->peerinfo.sockaddr)->sin_port;
+ break;
case AF_INET6:
- port = ((struct sockaddr_in6 *)&trans->peerinfo.sockaddr)->sin6_port;
- break;
+ port = ((struct sockaddr_in6 *)&trans->peerinfo.sockaddr)
+ ->sin6_port;
+ break;
case AF_UNIX:
- is_unix = _gf_true;
- break;
+ is_unix = _gf_true;
+ break;
default:
- gf_log (GF_RPCSVC, GF_LOG_ERROR,
- "invalid address family (%d)",
- trans->peerinfo.sockaddr.ss_family);
- return -1;
- }
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "invalid address family (%d)",
+ trans->peerinfo.sockaddr.ss_family);
+ return -1;
+ }
+ if (is_unix == _gf_false) {
+ port = ntohs(port);
+ gf_log("rpcsvc", GF_LOG_TRACE, "Client port: %d", (int)port);
- if (is_unix == _gf_false) {
- port = ntohs (port);
+ if (port >= 1024)
+ unprivileged = _gf_true;
+ }
- gf_log ("rpcsvc", GF_LOG_TRACE, "Client port: %d", (int)port);
+ req = rpcsvc_request_create(svc, trans, msg);
+ if (!req)
+ goto out;
- if (port > 1024)
- unprivileged = _gf_true;
- }
+ if (!rpcsvc_request_accepted(req))
+ goto err_reply;
- req = rpcsvc_request_create (svc, trans, msg);
- if (!req)
- goto err;
+ actor = rpcsvc_program_actor(req);
+ if (!actor)
+ goto err_reply;
- if (!rpcsvc_request_accepted (req))
- goto err_reply;
+ if (0 == svc->allow_insecure && unprivileged && !actor->unprivileged) {
+ /* Non-privileged user, fail request */
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Request received from non-"
+ "privileged port. Failing request for %s.",
+ req->trans->peerinfo.identifier);
+ req->rpc_status = MSG_DENIED;
+ req->rpc_err = AUTH_ERROR;
+ req->auth_err = RPCSVC_AUTH_REJECT;
+ goto err_reply;
+ }
- actor = rpcsvc_program_actor (req);
- if (!actor)
- goto err_reply;
+ /* DRC */
+ if (rpcsvc_need_drc(req)) {
+ drc = req->svc->drc;
- if (0 == svc->allow_insecure && unprivileged && !actor->unprivileged) {
- /* Non-privileged user, fail request */
- gf_log ("glusterd", GF_LOG_ERROR,
- "Request received from non-"
- "privileged port. Failing request");
- rpcsvc_request_destroy (req);
- return -1;
- }
+ LOCK(&drc->lock);
+ {
+ reply = rpcsvc_drc_lookup(req);
+
+ /* retransmission of completed request, send cached reply */
+ if (reply && reply->state == DRC_OP_CACHED) {
+ gf_log(GF_RPCSVC, GF_LOG_INFO,
+ "duplicate request:"
+ " XID: 0x%x",
+ req->xid);
+ ret = rpcsvc_send_cached_reply(req, reply);
+ drc->cache_hits++;
+ UNLOCK(&drc->lock);
+ goto out;
- if (req->rpc_err == SUCCESS) {
- /* Before going to xlator code, set the THIS properly */
- THIS = svc->mydata;
+ } /* retransmitted request, original op in transit, drop it */
+ else if (reply && reply->state == DRC_OP_IN_TRANSIT) {
+ gf_log(GF_RPCSVC, GF_LOG_INFO,
+ "op in transit,"
+ " discarding. XID: 0x%x",
+ req->xid);
+ ret = 0;
+ drc->intransit_hits++;
+ rpcsvc_request_destroy(req);
+ UNLOCK(&drc->lock);
+ goto out;
- if (req->count == 2) {
- if (actor->vector_actor) {
- ret = actor->vector_actor (req, &req->msg[1], 1,
- req->iobref);
+ } /* fresh request, cache it as in-transit and proceed */
+ else {
+ ret = rpcsvc_cache_request(req);
+ }
+ }
+ UNLOCK(&drc->lock);
+ }
+
+ if (req->rpc_err == SUCCESS) {
+ /* Before going to xlator code, set the THIS properly */
+ THIS = svc->xl;
+
+ actor_fn = actor->actor;
+
+ if (!actor_fn) {
+ rpcsvc_request_seterr(req, PROC_UNAVAIL);
+ /* LOG TODO: print more info about procnum,
+ prognum etc, also print transport info */
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "No vectored handler present");
+ ret = RPCSVC_ACTOR_ERROR;
+ goto err_reply;
+ }
+
+ if (req->synctask) {
+ ret = synctask_new(THIS->ctx->env, (synctask_fn_t)actor_fn,
+ rpcsvc_check_and_reply_error, NULL, req);
+ } else if (req->ownthread) {
+ value = pthread_getspecific(req->prog->req_queue_key);
+ if (value == NULL) {
+ pthread_mutex_lock(&req->prog->thr_lock);
+ {
+ num = rpcsvc_get_free_queue_index(req->prog);
+ if (num != -1) {
+ num++;
+ value = (void *)num;
+ ret = pthread_setspecific(req->prog->req_queue_key,
+ value);
+ if (ret < 0) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING,
+ "setting request queue in TLS failed");
+ rpcsvc_toggle_queue_status(
+ req->prog, &req->prog->request_queue[num - 1],
+ req->prog->request_queue_status);
+ num = -1;
} else {
- rpcsvc_request_seterr (req, PROC_UNAVAIL);
- /* LOG TODO: print more info about procnum,
- prognum etc, also print transport info */
- gf_log (GF_RPCSVC, GF_LOG_ERROR,
- "No vectored handler present");
- ret = RPCSVC_ACTOR_ERROR;
+ spawn_request_handler = 1;
}
- } else if (actor->actor) {
- ret = actor->actor (req);
+ }
}
- }
+ pthread_mutex_unlock(&req->prog->thr_lock);
+ }
+
+ if (num == -1)
+ goto noqueue;
+
+ num = ((unsigned long)value) - 1;
+
+ queue = &req->prog->request_queue[num];
+
+ if (spawn_request_handler) {
+ ret = gf_thread_create(&queue->thread, NULL,
+ rpcsvc_request_handler, queue,
+ "rpcrqhnd");
+ if (!ret) {
+ gf_log(GF_RPCSVC, GF_LOG_INFO,
+ "spawned a request handler thread for queue %d",
+ (int)num);
+
+ req->prog->threadcount++;
+ } else {
+ gf_log(
+ GF_RPCSVC, GF_LOG_INFO,
+ "spawning a request handler thread for queue %d failed",
+ (int)num);
+ ret = pthread_setspecific(req->prog->req_queue_key, 0);
+ if (ret < 0) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING,
+ "resetting request queue in TLS failed");
+ }
+
+ rpcsvc_toggle_queue_status(
+ req->prog, &req->prog->request_queue[num - 1],
+ req->prog->request_queue_status);
+
+ goto noqueue;
+ }
+ }
-err_reply:
- if (ret == RPCSVC_ACTOR_ERROR) {
- ret = rpcsvc_error_reply (req);
+ pthread_mutex_lock(&queue->queue_lock);
+ {
+ empty = list_empty(&queue->request_queue);
+
+ list_add_tail(&req->request_list, &queue->request_queue);
+
+ if (empty && queue->waiting)
+ pthread_cond_signal(&queue->queue_cond);
+ }
+ pthread_mutex_unlock(&queue->queue_lock);
+
+ ret = 0;
+ } else {
+ noqueue:
+ ret = actor_fn(req);
}
+ }
- if (ret)
- gf_log ("rpcsvc", GF_LOG_WARNING, "failed to queue error reply");
+err_reply:
- /* No need to propagate error beyond this function since the reply
- * has now been queued. */
- ret = 0;
+ ret = rpcsvc_check_and_reply_error(ret, NULL, req);
+ /* No need to propagate error beyond this function since the reply
+ * has now been queued. */
+ ret = 0;
-err:
- return ret;
+out:
+ return ret;
}
-
int
-rpcsvc_handle_disconnect (rpcsvc_t *svc, rpc_transport_t *trans)
+rpcsvc_handle_disconnect(rpcsvc_t *svc, rpc_transport_t *trans)
{
- rpcsvc_event_t event;
- rpcsvc_notify_wrapper_t *wrappers = NULL, *wrapper;
- int32_t ret = -1, i = 0, wrapper_count = 0;
- rpcsvc_listener_t *listener = NULL;
-
- event = (trans->listener == NULL) ? RPCSVC_EVENT_LISTENER_DEAD
- : RPCSVC_EVENT_DISCONNECT;
+ rpcsvc_event_t event;
+ rpcsvc_notify_wrapper_t *wrappers = NULL, *wrapper;
+ int32_t ret = -1, i = 0, wrapper_count = 0;
+ rpcsvc_listener_t *listener = NULL;
- pthread_mutex_lock (&svc->rpclock);
- {
- if (!svc->notify_count)
- goto unlock;
+ event = (trans->listener == NULL) ? RPCSVC_EVENT_LISTENER_DEAD
+ : RPCSVC_EVENT_DISCONNECT;
- wrappers = GF_CALLOC (svc->notify_count, sizeof (*wrapper),
- gf_common_mt_rpcsvc_wrapper_t);
- if (!wrappers) {
- goto unlock;
- }
+ pthread_rwlock_rdlock(&svc->rpclock);
+ {
+ if (!svc->notify_count)
+ goto unlock;
- list_for_each_entry (wrapper, &svc->notify, list) {
- if (wrapper->notify) {
- wrappers[i++] = *wrapper;
- }
- }
+ wrappers = GF_CALLOC(svc->notify_count, sizeof(*wrapper),
+ gf_common_mt_rpcsvc_wrapper_t);
+ if (!wrappers) {
+ goto unlock;
+ }
- wrapper_count = i;
+ list_for_each_entry(wrapper, &svc->notify, list)
+ {
+ if (wrapper->notify) {
+ wrappers[i++] = *wrapper;
+ }
}
-unlock:
- pthread_mutex_unlock (&svc->rpclock);
- if (wrappers) {
- for (i = 0; i < wrapper_count; i++) {
- wrappers[i].notify (svc, wrappers[i].data,
- event, trans);
- }
+ wrapper_count = i;
+ }
+unlock:
+ pthread_rwlock_unlock(&svc->rpclock);
- GF_FREE (wrappers);
+ if (wrappers) {
+ for (i = 0; i < wrapper_count; i++) {
+ wrappers[i].notify(svc, wrappers[i].data, event, trans);
}
- if (event == RPCSVC_EVENT_LISTENER_DEAD) {
- listener = rpcsvc_get_listener (svc, -1, trans->listener);
- rpcsvc_listener_destroy (listener);
- }
+ GF_FREE(wrappers);
+ }
- return ret;
-}
+ if (event == RPCSVC_EVENT_LISTENER_DEAD) {
+ listener = rpcsvc_get_listener(svc, -1, trans->listener);
+ rpcsvc_listener_destroy(listener);
+ }
+ return ret;
+}
int
-rpcsvc_notify (rpc_transport_t *trans, void *mydata,
- rpc_transport_event_t event, void *data, ...)
+rpcsvc_notify(rpc_transport_t *trans, void *mydata, rpc_transport_event_t event,
+ void *data, ...)
{
- int ret = -1;
- rpc_transport_pollin_t *msg = NULL;
- rpc_transport_t *new_trans = NULL;
- rpcsvc_t *svc = NULL;
- rpcsvc_listener_t *listener = NULL;
-
- svc = mydata;
- if (svc == NULL) {
- goto out;
- }
-
- switch (event) {
+ int ret = -1;
+ rpc_transport_pollin_t *msg = NULL;
+ rpc_transport_t *new_trans = NULL;
+ rpcsvc_t *svc = NULL;
+ rpcsvc_listener_t *listener = NULL;
+
+ svc = mydata;
+ if (svc == NULL) {
+ goto out;
+ }
+
+ switch (event) {
case RPC_TRANSPORT_ACCEPT:
- new_trans = data;
- ret = rpcsvc_accept (svc, trans, new_trans);
- break;
+ new_trans = data;
+ ret = rpcsvc_accept(svc, trans, new_trans);
+ break;
case RPC_TRANSPORT_DISCONNECT:
- ret = rpcsvc_handle_disconnect (svc, trans);
- break;
+ ret = rpcsvc_handle_disconnect(svc, trans);
+ break;
case RPC_TRANSPORT_MSG_RECEIVED:
- msg = data;
- ret = rpcsvc_handle_rpc_call (svc, trans, msg);
- break;
+ msg = data;
+ ret = rpcsvc_handle_rpc_call(svc, trans, msg);
+ break;
case RPC_TRANSPORT_MSG_SENT:
- ret = 0;
- break;
+ ret = 0;
+ break;
case RPC_TRANSPORT_CONNECT:
- /* do nothing, no need for rpcsvc to handle this, client should
- * handle this event
- */
- /* print info about transport too : LOG TODO */
- gf_log ("rpcsvc", GF_LOG_CRITICAL,
- "got CONNECT event, which should have not come");
- ret = 0;
- break;
+ /* do nothing, no need for rpcsvc to handle this, client should
+ * handle this event
+ */
+ /* print info about transport too : LOG TODO */
+ gf_log("rpcsvc", GF_LOG_CRITICAL,
+ "got CONNECT event, which should have not come");
+ ret = 0;
+ break;
case RPC_TRANSPORT_CLEANUP:
- listener = rpcsvc_get_listener (svc, -1, trans->listener);
- if (listener == NULL) {
- goto out;
- }
+ listener = rpcsvc_get_listener(svc, -1, trans->listener);
+ if (listener == NULL) {
+ goto out;
+ }
- rpcsvc_program_notify (listener, RPCSVC_EVENT_TRANSPORT_DESTROY,
- trans);
- ret = 0;
- break;
+ rpcsvc_program_notify(listener, RPCSVC_EVENT_TRANSPORT_DESTROY,
+ trans);
+ ret = 0;
+ break;
case RPC_TRANSPORT_MAP_XID_REQUEST:
- /* FIXME: think about this later */
- gf_log ("rpcsvc", GF_LOG_CRITICAL,
- "got MAP_XID event, which should have not come");
- ret = 0;
- break;
- }
+ /* FIXME: think about this later */
+ gf_log("rpcsvc", GF_LOG_CRITICAL,
+ "got MAP_XID event, which should have not come");
+ ret = 0;
+ break;
+
+ case RPC_TRANSPORT_EVENT_THREAD_DIED:
+ rpcsvc_handle_event_thread_death(svc, trans,
+ (int)(unsigned long)data);
+ ret = 0;
+ break;
+ }
out:
- return ret;
+ return ret;
}
-
/* Given the RPC reply structure and the payload handed by the RPC program,
* encode the RPC record header into the buffer pointed by recordstart.
*/
struct iovec
-rpcsvc_record_build_header (char *recordstart, size_t rlen,
- struct rpc_msg reply, size_t payload)
+rpcsvc_record_build_header(char *recordstart, size_t rlen, struct rpc_msg reply,
+ size_t payload)
{
- struct iovec replyhdr;
- struct iovec txrecord = {0, 0};
- size_t fraglen = 0;
- int ret = -1;
-
- /* After leaving aside the 4 bytes for the fragment header, lets
- * encode the RPC reply structure into the buffer given to us.
- */
- ret = rpc_reply_to_xdr (&reply, recordstart, rlen, &replyhdr);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "Failed to create RPC reply");
- goto err;
- }
-
- fraglen = payload + replyhdr.iov_len;
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Reply fraglen %zu, payload: %zu, "
- "rpc hdr: %zu", fraglen, payload, replyhdr.iov_len);
-
- txrecord.iov_base = recordstart;
-
- /* Remember, this is only the vec for the RPC header and does not
- * include the payload above. We needed the payload only to calculate
- * the size of the full fragment. This size is sent in the fragment
- * header.
- */
- txrecord.iov_len = replyhdr.iov_len;
+ struct iovec replyhdr;
+ struct iovec txrecord = {0, 0};
+ size_t fraglen = 0;
+ int ret = -1;
+
+ /* After leaving aside the 4 bytes for the fragment header, lets
+ * encode the RPC reply structure into the buffer given to us.
+ */
+ ret = rpc_reply_to_xdr(&reply, recordstart, rlen, &replyhdr);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING, "Failed to create RPC reply");
+ goto err;
+ }
+
+ fraglen = payload + replyhdr.iov_len;
+ gf_log(GF_RPCSVC, GF_LOG_TRACE,
+ "Reply fraglen %zu, payload: %zu, "
+ "rpc hdr: %zu",
+ fraglen, payload, replyhdr.iov_len);
+
+ txrecord.iov_base = recordstart;
+
+ /* Remember, this is only the vec for the RPC header and does not
+ * include the payload above. We needed the payload only to calculate
+ * the size of the full fragment. This size is sent in the fragment
+ * header.
+ */
+ txrecord.iov_len = replyhdr.iov_len;
err:
- return txrecord;
+ return txrecord;
}
-inline int
-rpcsvc_get_callid (rpcsvc_t *rpc)
+static uint32_t
+rpc_callback_new_callid(struct rpc_transport *trans)
{
- return GF_UNIVERSAL_ANSWER;
+ uint32_t callid = 0;
+
+ pthread_mutex_lock(&trans->lock);
+ {
+ callid = ++trans->xid;
+ }
+ pthread_mutex_unlock(&trans->lock);
+
+ return callid;
}
int
-rpcsvc_fill_callback (int prognum, int progver, int procnum, int payload,
- uint64_t xid, struct rpc_msg *request)
+rpcsvc_fill_callback(int prognum, int progver, int procnum, int payload,
+ uint32_t xid, struct rpc_msg *request)
{
- int ret = -1;
+ int ret = -1;
- if (!request) {
- goto out;
- }
+ if (!request) {
+ goto out;
+ }
- memset (request, 0, sizeof (*request));
+ memset(request, 0, sizeof(*request));
- request->rm_xid = xid;
- request->rm_direction = CALL;
+ request->rm_xid = xid;
+ request->rm_direction = CALL;
- request->rm_call.cb_rpcvers = 2;
- request->rm_call.cb_prog = prognum;
- request->rm_call.cb_vers = progver;
- request->rm_call.cb_proc = procnum;
+ request->rm_call.cb_rpcvers = 2;
+ request->rm_call.cb_prog = prognum;
+ request->rm_call.cb_vers = progver;
+ request->rm_call.cb_proc = procnum;
- request->rm_call.cb_cred.oa_flavor = AUTH_NONE;
- request->rm_call.cb_cred.oa_base = NULL;
- request->rm_call.cb_cred.oa_length = 0;
+ request->rm_call.cb_cred.oa_flavor = AUTH_NONE;
+ request->rm_call.cb_cred.oa_base = NULL;
+ request->rm_call.cb_cred.oa_length = 0;
- request->rm_call.cb_verf.oa_flavor = AUTH_NONE;
- request->rm_call.cb_verf.oa_base = NULL;
- request->rm_call.cb_verf.oa_length = 0;
+ request->rm_call.cb_verf.oa_flavor = AUTH_NONE;
+ request->rm_call.cb_verf.oa_base = NULL;
+ request->rm_call.cb_verf.oa_length = 0;
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
struct iovec
-rpcsvc_callback_build_header (char *recordstart, size_t rlen,
+rpcsvc_callback_build_header(char *recordstart, size_t rlen,
struct rpc_msg *request, size_t payload)
{
- struct iovec requesthdr = {0, };
- struct iovec txrecord = {0, 0};
- int ret = -1;
- size_t fraglen = 0;
-
- ret = rpc_request_to_xdr (request, recordstart, rlen, &requesthdr);
- if (ret == -1) {
- gf_log ("rpcsvc", GF_LOG_WARNING,
- "Failed to create RPC request");
- goto out;
- }
-
- fraglen = payload + requesthdr.iov_len;
- gf_log ("rpcsvc", GF_LOG_TRACE, "Request fraglen %zu, payload: %zu, "
- "rpc hdr: %zu", fraglen, payload, requesthdr.iov_len);
-
- txrecord.iov_base = recordstart;
-
- /* Remember, this is only the vec for the RPC header and does not
- * include the payload above. We needed the payload only to calculate
- * the size of the full fragment. This size is sent in the fragment
- * header.
- */
- txrecord.iov_len = requesthdr.iov_len;
+ struct iovec requesthdr = {
+ 0,
+ };
+ struct iovec txrecord = {0, 0};
+ int ret = -1;
+ size_t fraglen = 0;
+
+ ret = rpc_request_to_xdr(request, recordstart, rlen, &requesthdr);
+ if (ret == -1) {
+ gf_log("rpcsvc", GF_LOG_WARNING, "Failed to create RPC request");
+ goto out;
+ }
+
+ fraglen = payload + requesthdr.iov_len;
+ gf_log("rpcsvc", GF_LOG_TRACE,
+ "Request fraglen %zu, payload: %zu, "
+ "rpc hdr: %zu",
+ fraglen, payload, requesthdr.iov_len);
+
+ txrecord.iov_base = recordstart;
+
+ /* Remember, this is only the vec for the RPC header and does not
+ * include the payload above. We needed the payload only to calculate
+ * the size of the full fragment. This size is sent in the fragment
+ * header.
+ */
+ txrecord.iov_len = requesthdr.iov_len;
out:
- return txrecord;
+ return txrecord;
}
-struct iobuf *
-rpcsvc_callback_build_record (rpcsvc_t *rpc, int prognum, int progver,
- int procnum, size_t payload, uint64_t xid,
- struct iovec *recbuf)
-{
- struct rpc_msg request = {0, };
- struct iobuf *request_iob = NULL;
- char *record = NULL;
- struct iovec recordhdr = {0, };
- size_t pagesize = 0;
- size_t xdr_size = 0;
- int ret = -1;
-
- if ((!rpc) || (!recbuf)) {
- goto out;
- }
-
- /* Fill the rpc structure and XDR it into the buffer got above. */
- ret = rpcsvc_fill_callback (prognum, progver, procnum, payload, xid,
- &request);
- if (ret == -1) {
- gf_log ("rpcsvc", GF_LOG_WARNING, "cannot build a rpc-request "
- "xid (%"PRIu64")", xid);
- goto out;
- }
-
- /* First, try to get a pointer into the buffer which the RPC
- * layer can use.
- */
- xdr_size = xdr_sizeof ((xdrproc_t)xdr_callmsg, &request);
+static struct iobuf *
+rpcsvc_callback_build_record(rpcsvc_t *rpc, int prognum, int progver,
+ int procnum, size_t payload, u_long xid,
+ struct iovec *recbuf)
+{
+ struct rpc_msg request = {
+ 0,
+ };
+ struct iobuf *request_iob = NULL;
+ char *record = NULL;
+ struct iovec recordhdr = {
+ 0,
+ };
+ size_t pagesize = 0;
+ size_t xdr_size = 0;
+ int ret = -1;
+
+ if ((!rpc) || (!recbuf)) {
+ goto out;
+ }
+
+ /* Fill the rpc structure and XDR it into the buffer got above. */
+ ret = rpcsvc_fill_callback(prognum, progver, procnum, payload, xid,
+ &request);
+ if (ret == -1) {
+ gf_log("rpcsvc", GF_LOG_WARNING,
+ "cannot build a rpc-request "
+ "xid (%lu)",
+ xid);
+ goto out;
+ }
+
+ /* First, try to get a pointer into the buffer which the RPC
+ * layer can use.
+ */
+ xdr_size = xdr_sizeof((xdrproc_t)xdr_callmsg, &request);
+
+ request_iob = iobuf_get2(rpc->ctx->iobuf_pool, (xdr_size + payload));
+ if (!request_iob) {
+ goto out;
+ }
+
+ pagesize = iobuf_pagesize(request_iob);
+
+ record = iobuf_ptr(request_iob); /* Now we have it. */
+
+ recordhdr = rpcsvc_callback_build_header(record, pagesize, &request,
+ payload);
+
+ if (!recordhdr.iov_base) {
+ gf_log("rpc-clnt", GF_LOG_ERROR,
+ "Failed to build record "
+ " header");
+ iobuf_unref(request_iob);
+ request_iob = NULL;
+ recbuf->iov_base = NULL;
+ goto out;
+ }
+
+ recbuf->iov_base = recordhdr.iov_base;
+ recbuf->iov_len = recordhdr.iov_len;
- request_iob = iobuf_get2 (rpc->ctx->iobuf_pool, (xdr_size + payload));
- if (!request_iob) {
- goto out;
- }
+out:
+ return request_iob;
+}
- pagesize = iobuf_pagesize (request_iob);
+int
+rpcsvc_request_submit(rpcsvc_t *rpc, rpc_transport_t *trans,
+ rpcsvc_cbk_program_t *prog, int procnum, void *req,
+ glusterfs_ctx_t *ctx, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {
+ 0,
+ };
+ struct iobuf *iobuf = NULL;
+ ssize_t xdr_size = 0;
+ struct iobref *iobref = NULL;
+
+ if (!req)
+ goto out;
+
+ xdr_size = xdr_sizeof(xdrproc, req);
+
+ iobuf = iobuf_get2(ctx->iobuf_pool, xdr_size);
+ if (!iobuf)
+ goto out;
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize(iobuf);
+
+ ret = xdr_serialize_generic(iov, req, xdrproc);
+ if (ret == -1) {
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to create XDR payload");
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+
+ iobref = iobref_new();
+ if (!iobref) {
+ ret = -1;
+ gf_log("rpcsvc", GF_LOG_WARNING, "Failed to create iobref");
+ goto out;
+ }
- record = iobuf_ptr (request_iob); /* Now we have it. */
+ iobref_add(iobref, iobuf);
- recordhdr = rpcsvc_callback_build_header (record, pagesize, &request,
- payload);
+ ret = rpcsvc_callback_submit(rpc, trans, prog, procnum, &iov, count,
+ iobref);
- if (!recordhdr.iov_base) {
- gf_log ("rpc-clnt", GF_LOG_ERROR, "Failed to build record "
- " header");
- iobuf_unref (request_iob);
- request_iob = NULL;
- recbuf->iov_base = NULL;
- goto out;
- }
+out:
+ if (iobuf)
+ iobuf_unref(iobuf);
- recbuf->iov_base = recordhdr.iov_base;
- recbuf->iov_len = recordhdr.iov_len;
+ if (iobref)
+ iobref_unref(iobref);
-out:
- return request_iob;
+ return ret;
}
int
-rpcsvc_callback_submit (rpcsvc_t *rpc, rpc_transport_t *trans,
- rpcsvc_cbk_program_t *prog, int procnum,
- struct iovec *proghdr, int proghdrcount)
-{
- struct iobuf *request_iob = NULL;
- struct iovec rpchdr = {0,};
- rpc_transport_req_t req;
- int ret = -1;
- int proglen = 0;
- uint64_t callid = 0;
-
- if (!rpc) {
- goto out;
- }
-
- memset (&req, 0, sizeof (req));
-
- callid = rpcsvc_get_callid (rpc);
-
- if (proghdr) {
- proglen += iov_length (proghdr, proghdrcount);
+rpcsvc_callback_submit(rpcsvc_t *rpc, rpc_transport_t *trans,
+ rpcsvc_cbk_program_t *prog, int procnum,
+ struct iovec *proghdr, int proghdrcount,
+ struct iobref *iobref)
+{
+ struct iobuf *request_iob = NULL;
+ struct iovec rpchdr = {
+ 0,
+ };
+ rpc_transport_req_t req;
+ int ret = -1;
+ int proglen = 0;
+ uint32_t xid = 0;
+ gf_boolean_t new_iobref = _gf_false;
+
+ if (!rpc) {
+ goto out;
+ }
+
+ memset(&req, 0, sizeof(req));
+
+ if (proghdr) {
+ proglen += iov_length(proghdr, proghdrcount);
+ }
+
+ xid = rpc_callback_new_callid(trans);
+
+ request_iob = rpcsvc_callback_build_record(
+ rpc, prog->prognum, prog->progver, procnum, proglen, xid, &rpchdr);
+ if (!request_iob) {
+ gf_log("rpcsvc", GF_LOG_WARNING, "cannot build rpc-record");
+ goto out;
+ }
+ if (!iobref) {
+ iobref = iobref_new();
+ if (!iobref) {
+ gf_log("rpcsvc", GF_LOG_WARNING, "Failed to create iobref");
+ goto out;
}
+ new_iobref = 1;
+ }
- request_iob = rpcsvc_callback_build_record (rpc, prog->prognum,
- prog->progver, procnum,
- proglen, callid,
- &rpchdr);
- if (!request_iob) {
- gf_log ("rpcsvc", GF_LOG_WARNING,
- "cannot build rpc-record");
- goto out;
- }
+ iobref_add(iobref, request_iob);
- req.msg.rpchdr = &rpchdr;
- req.msg.rpchdrcount = 1;
- req.msg.proghdr = proghdr;
- req.msg.proghdrcount = proghdrcount;
+ req.msg.rpchdr = &rpchdr;
+ req.msg.rpchdrcount = 1;
+ req.msg.proghdr = proghdr;
+ req.msg.proghdrcount = proghdrcount;
+ req.msg.iobref = iobref;
- ret = rpc_transport_submit_request (trans, &req);
- if (ret == -1) {
- gf_log ("rpcsvc", GF_LOG_WARNING,
- "transmission of rpc-request failed");
- goto out;
- }
+ ret = rpc_transport_submit_request(trans, &req);
+ if (ret == -1) {
+ gf_log("rpcsvc", GF_LOG_WARNING, "transmission of rpc-request failed");
+ goto out;
+ }
- ret = 0;
+ ret = 0;
out:
- iobuf_unref (request_iob);
+ iobuf_unref(request_iob);
- return ret;
+ if (new_iobref)
+ iobref_unref(iobref);
+
+ return ret;
}
-inline int
-rpcsvc_transport_submit (rpc_transport_t *trans, struct iovec *hdrvec,
- int hdrcount, struct iovec *proghdr, int proghdrcount,
- struct iovec *progpayload, int progpayloadcount,
- struct iobref *iobref, void *priv)
+int
+rpcsvc_transport_submit(rpc_transport_t *trans, struct iovec *rpchdr,
+ int rpchdrcount, struct iovec *proghdr,
+ int proghdrcount, struct iovec *progpayload,
+ int progpayloadcount, struct iobref *iobref, void *priv)
{
- int ret = -1;
- rpc_transport_reply_t reply = {{0, }};
-
- if ((!trans) || (!hdrvec) || (!hdrvec->iov_base)) {
- goto out;
- }
-
- reply.msg.rpchdr = hdrvec;
- reply.msg.rpchdrcount = hdrcount;
- reply.msg.proghdr = proghdr;
- reply.msg.proghdrcount = proghdrcount;
- reply.msg.progpayload = progpayload;
- reply.msg.progpayloadcount = progpayloadcount;
- reply.msg.iobref = iobref;
- reply.private = priv;
-
- ret = rpc_transport_submit_reply (trans, &reply);
+ int ret = -1;
+ rpc_transport_reply_t reply = {
+ 0,
+ };
+
+ if ((!trans) || (!rpchdr) || (!rpchdr->iov_base)) {
+ goto out;
+ }
+
+ reply.msg.rpchdr = rpchdr;
+ reply.msg.rpchdrcount = rpchdrcount;
+ reply.msg.proghdr = proghdr;
+ reply.msg.proghdrcount = proghdrcount;
+ reply.msg.progpayload = progpayload;
+ reply.msg.progpayloadcount = progpayloadcount;
+ reply.msg.iobref = iobref;
+ reply.private = priv;
+
+ ret = rpc_transport_submit_reply(trans, &reply);
out:
- return ret;
+ return ret;
}
-
int
-rpcsvc_fill_reply (rpcsvc_request_t *req, struct rpc_msg *reply)
+rpcsvc_fill_reply(rpcsvc_request_t *req, struct rpc_msg *reply)
{
- int ret = -1;
- rpcsvc_program_t *prog = NULL;
- if ((!req) || (!reply))
- goto out;
-
- ret = 0;
- rpc_fill_empty_reply (reply, req->xid);
- if (req->rpc_status == MSG_DENIED) {
- rpc_fill_denied_reply (reply, req->rpc_err, req->auth_err);
- goto out;
- }
-
- prog = rpcsvc_request_program (req);
-
- if (req->rpc_status == MSG_ACCEPTED)
- rpc_fill_accepted_reply (reply, req->rpc_err,
- (prog) ? prog->proglowvers : 0,
- (prog) ? prog->proghighvers: 0,
- req->verf.flavour, req->verf.datalen,
- req->verf.authdata);
- else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Invalid rpc_status value");
+ int ret = -1;
+ rpcsvc_program_t *prog = NULL;
+ if ((!req) || (!reply))
+ goto out;
+
+ ret = 0;
+ rpc_fill_empty_reply(reply, req->xid);
+ if (req->rpc_status == MSG_DENIED) {
+ rpc_fill_denied_reply(reply, req->rpc_err, req->auth_err);
+ goto out;
+ }
+
+ prog = rpcsvc_request_program(req);
+
+ if (req->rpc_status == MSG_ACCEPTED)
+ rpc_fill_accepted_reply(
+ reply, req->rpc_err, (prog) ? prog->proglowvers : 0,
+ (prog) ? prog->proghighvers : 0, req->verf.flavour,
+ req->verf.datalen, req->verf.authdata);
+ else
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Invalid rpc_status value");
out:
- return ret;
+ return ret;
}
-
/* Given a request and the reply payload, build a reply and encodes the reply
* into a record header. This record header is encoded into the vector pointed
* to be recbuf.
@@ -942,59 +1400,60 @@ out:
* we should account for the length of that buffer in the RPC fragment header.
*/
struct iobuf *
-rpcsvc_record_build_record (rpcsvc_request_t *req, size_t payload,
- size_t hdrlen, struct iovec *recbuf)
+rpcsvc_record_build_record(rpcsvc_request_t *req, size_t payload, size_t hdrlen,
+ struct iovec *recbuf)
{
- struct rpc_msg reply;
- struct iobuf *replyiob = NULL;
- char *record = NULL;
- struct iovec recordhdr = {0, };
- size_t pagesize = 0;
- size_t xdr_size = 0;
- rpcsvc_t *svc = NULL;
- int ret = -1;
-
- if ((!req) || (!req->trans) || (!req->svc) || (!recbuf))
- return NULL;
-
- svc = req->svc;
-
- /* Fill the rpc structure and XDR it into the buffer got above. */
- ret = rpcsvc_fill_reply (req, &reply);
- if (ret)
- goto err_exit;
-
- xdr_size = xdr_sizeof ((xdrproc_t)xdr_replymsg, &reply);
-
- /* Payload would include 'readv' size etc too, where as
- that comes as another payload iobuf */
- replyiob = iobuf_get2 (svc->ctx->iobuf_pool, (xdr_size + hdrlen));
- if (!replyiob) {
- goto err_exit;
- }
-
- pagesize = iobuf_pagesize (replyiob);
-
- record = iobuf_ptr (replyiob); /* Now we have it. */
-
- recordhdr = rpcsvc_record_build_header (record, pagesize, reply,
- payload);
- if (!recordhdr.iov_base) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to build record "
- " header");
- iobuf_unref (replyiob);
- replyiob = NULL;
- recbuf->iov_base = NULL;
- goto err_exit;
- }
-
- recbuf->iov_base = recordhdr.iov_base;
- recbuf->iov_len = recordhdr.iov_len;
+ struct rpc_msg reply;
+ struct iobuf *replyiob = NULL;
+ char *record = NULL;
+ struct iovec recordhdr = {
+ 0,
+ };
+ size_t pagesize = 0;
+ size_t xdr_size = 0;
+ rpcsvc_t *svc = NULL;
+ int ret = -1;
+
+ if ((!req) || (!req->trans) || (!req->svc) || (!recbuf))
+ return NULL;
+
+ svc = req->svc;
+
+ /* Fill the rpc structure and XDR it into the buffer got above. */
+ ret = rpcsvc_fill_reply(req, &reply);
+ if (ret)
+ goto err_exit;
+
+ xdr_size = xdr_sizeof((xdrproc_t)xdr_replymsg, &reply);
+
+ /* Payload would include 'readv' size etc too, where as
+ that comes as another payload iobuf */
+ replyiob = iobuf_get2(svc->ctx->iobuf_pool, (xdr_size + hdrlen));
+ if (!replyiob) {
+ goto err_exit;
+ }
+
+ pagesize = iobuf_pagesize(replyiob);
+
+ record = iobuf_ptr(replyiob); /* Now we have it. */
+
+ recordhdr = rpcsvc_record_build_header(record, pagesize, reply, payload);
+ if (!recordhdr.iov_base) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Failed to build record "
+ " header");
+ iobuf_unref(replyiob);
+ replyiob = NULL;
+ recbuf->iov_base = NULL;
+ goto err_exit;
+ }
+
+ recbuf->iov_base = recordhdr.iov_base;
+ recbuf->iov_len = recordhdr.iov_len;
err_exit:
- return replyiob;
+ return replyiob;
}
-
/*
* The function to submit a program message to the RPC service.
* This message is added to the transmission queue of the
@@ -1022,231 +1481,344 @@ err_exit:
*/
int
-rpcsvc_submit_generic (rpcsvc_request_t *req, struct iovec *proghdr,
- int hdrcount, struct iovec *payload, int payloadcount,
- struct iobref *iobref)
+rpcsvc_submit_generic(rpcsvc_request_t *req, struct iovec *proghdr,
+ int hdrcount, struct iovec *payload, int payloadcount,
+ struct iobref *iobref)
{
- int ret = -1, i = 0;
- struct iobuf *replyiob = NULL;
- struct iovec recordhdr = {0, };
- rpc_transport_t *trans = NULL;
- size_t msglen = 0;
- size_t hdrlen = 0;
- char new_iobref = 0;
-
- if ((!req) || (!req->trans))
- return -1;
-
- trans = req->trans;
-
- for (i = 0; i < hdrcount; i++) {
- msglen += proghdr[i].iov_len;
- }
-
- for (i = 0; i < payloadcount; i++) {
- msglen += payload[i].iov_len;
- }
-
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Tx message: %zu", msglen);
-
- /* Build the buffer containing the encoded RPC reply. */
- replyiob = rpcsvc_record_build_record (req, msglen, hdrlen, &recordhdr);
- if (!replyiob) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR,"Reply record creation failed");
- goto disconnect_exit;
- }
-
+ int ret = -1, i = 0;
+ struct iobuf *replyiob = NULL;
+ struct iovec recordhdr = {
+ 0,
+ };
+ rpc_transport_t *trans = NULL;
+ size_t msglen = 0;
+ size_t hdrlen = 0;
+ char new_iobref = 0;
+ rpcsvc_drc_globals_t *drc = NULL;
+ gf_latency_t *lat = NULL;
+
+ if ((!req) || (!req->trans))
+ return -1;
+
+ if (req->prog && req->begin.tv_sec) {
+ if ((req->procnum >= 0) && (req->procnum < req->prog->numactors)) {
+ timespec_now(&req->end);
+ lat = &req->prog->latencies[req->procnum];
+ gf_latency_update(lat, &req->begin, &req->end);
+ }
+ }
+ trans = req->trans;
+
+ for (i = 0; i < hdrcount; i++) {
+ msglen += proghdr[i].iov_len;
+ }
+
+ for (i = 0; i < payloadcount; i++) {
+ msglen += payload[i].iov_len;
+ }
+
+ gf_log(GF_RPCSVC, GF_LOG_TRACE, "Tx message: %zu", msglen);
+
+ /* Build the buffer containing the encoded RPC reply. */
+ replyiob = rpcsvc_record_build_record(req, msglen, hdrlen, &recordhdr);
+ if (!replyiob) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Reply record creation failed");
+ goto disconnect_exit;
+ }
+
+ if (!iobref) {
+ iobref = iobref_new();
if (!iobref) {
- iobref = iobref_new ();
- if (!iobref) {
- goto disconnect_exit;
- }
-
- new_iobref = 1;
+ goto disconnect_exit;
}
- iobref_add (iobref, replyiob);
+ new_iobref = 1;
+ }
- ret = rpcsvc_transport_submit (trans, &recordhdr, 1, proghdr, hdrcount,
- payload, payloadcount, iobref,
- req->trans_private);
+ iobref_add(iobref, replyiob);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "failed to submit message "
- "(XID: 0x%ux, Program: %s, ProgVers: %d, Proc: %d) to "
- "rpc-transport (%s)", req->xid,
- req->prog ? req->prog->progname : "(not matched)",
- req->prog ? req->prog->progver : 0,
- req->procnum, trans->name);
- } else {
- gf_log (GF_RPCSVC, GF_LOG_TRACE,
- "submitted reply for rpc-message (XID: 0x%ux, "
- "Program: %s, ProgVers: %d, Proc: %d) to rpc-transport "
- "(%s)", req->xid, req->prog ? req->prog->progname: "-",
- req->prog ? req->prog->progver : 0,
- req->procnum, trans->name);
- }
+ /* cache the request in the duplicate request cache for appropriate ops */
+ if ((req->reply) && (rpcsvc_need_drc(req))) {
+ drc = req->svc->drc;
+
+ LOCK(&drc->lock);
+ ret = rpcsvc_cache_reply(req, iobref, &recordhdr, 1, proghdr, hdrcount,
+ payload, payloadcount);
+ UNLOCK(&drc->lock);
+ if (ret < 0) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "failed to cache reply");
+ }
+ }
+
+ ret = rpcsvc_transport_submit(trans, &recordhdr, 1, proghdr, hdrcount,
+ payload, payloadcount, iobref,
+ req->trans_private);
+
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "failed to submit message "
+ "(XID: 0x%x, Program: %s, ProgVers: %d, Proc: %d) to "
+ "rpc-transport (%s)",
+ req->xid, req->prog ? req->prog->progname : "(not matched)",
+ req->prog ? req->prog->progver : 0, req->procnum,
+ trans ? trans->name : "");
+ } else {
+ gf_log(GF_RPCSVC, GF_LOG_TRACE,
+ "submitted reply for rpc-message (XID: 0x%x, "
+ "Program: %s, ProgVers: %d, Proc: %d) to rpc-transport "
+ "(%s)",
+ req->xid, req->prog ? req->prog->progname : "-",
+ req->prog ? req->prog->progver : 0, req->procnum,
+ trans ? trans->name : "");
+ }
disconnect_exit:
- if (replyiob) {
- iobuf_unref (replyiob);
- }
+ if (replyiob) {
+ iobuf_unref(replyiob);
+ }
- if (new_iobref) {
- iobref_unref (iobref);
- }
+ if (new_iobref) {
+ iobref_unref(iobref);
+ }
- rpcsvc_request_destroy (req);
+ rpcsvc_request_destroy(req);
- return ret;
+ return ret;
}
-
int
-rpcsvc_error_reply (rpcsvc_request_t *req)
+rpcsvc_error_reply(rpcsvc_request_t *req)
{
- struct iovec dummyvec = {0, };
+ struct iovec dummyvec = {
+ 0,
+ };
- if (!req)
- return -1;
+ if (!req)
+ return -1;
- gf_log_callingfn ("", GF_LOG_DEBUG, "sending a RPC error reply");
+ gf_log_callingfn("", GF_LOG_DEBUG, "sending a RPC error reply");
- /* At this point the req should already have been filled with the
- * appropriate RPC error numbers.
- */
- return rpcsvc_submit_generic (req, &dummyvec, 0, NULL, 0, NULL);
+ /* At this point the req should already have been filled with the
+ * appropriate RPC error numbers.
+ */
+ return rpcsvc_submit_generic(req, &dummyvec, 0, NULL, 0, NULL);
}
-
-/* Register the program with the local portmapper service. */
-inline int
-rpcsvc_program_register_portmap (rpcsvc_program_t *newprog, uint32_t port)
+#ifdef IPV6_DEFAULT
+int
+rpcsvc_program_register_rpcbind6(rpcsvc_program_t *newprog, uint32_t port)
{
- int ret = 0;
+ const int IP_BUF_LEN = 64;
+ char addr_buf[IP_BUF_LEN];
+
+ int err = 0;
+ bool_t success = 0;
+ struct netconfig *nc;
+ struct netbuf *nb;
+
+ if (!newprog) {
+ goto out;
+ }
+
+ nc = getnetconfigent("tcp6");
+ if (!nc) {
+ err = -1;
+ goto out;
+ }
+
+ err = sprintf(addr_buf, "::.%d.%d", port >> 8 & 0xff, port & 0xff);
+ if (err < 0) {
+ err = -1;
+ goto out;
+ }
+
+ nb = uaddr2taddr(nc, addr_buf);
+ if (!nb) {
+ err = -1;
+ goto out;
+ }
+
+ /* Force the unregistration of the program first.
+ * This call may fail if nothing has been registered,
+ * which is fine.
+ */
+ rpcsvc_program_unregister_rpcbind6(newprog);
+
+ success = rpcb_set(newprog->prognum, newprog->progver, nc, nb);
+ if (!success) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Could not register the IPv6"
+ " service with rpcbind");
+ }
+
+ err = 0;
- if (!newprog) {
- goto out;
- }
-
- if (!(pmap_set (newprog->prognum, newprog->progver, IPPROTO_TCP,
- port))) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Could not register with"
- " portmap");
- goto out;
- }
+out:
+ return err;
+}
- ret = 0;
+int
+rpcsvc_program_unregister_rpcbind6(rpcsvc_program_t *newprog)
+{
+ int err = 0;
+ bool_t success = 0;
+ struct netconfig *nc;
+
+ if (!newprog) {
+ goto out;
+ }
+
+ nc = getnetconfigent("tcp6");
+ if (!nc) {
+ err = -1;
+ goto out;
+ }
+
+ success = rpcb_unset(newprog->prognum, newprog->progver, nc);
+ if (!success) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Could not unregister the IPv6"
+ " service with rpcbind");
+ }
+
+ err = 0;
out:
- return ret;
+ return err;
}
+#endif
+/* Register the program with the local portmapper service. */
+int
+rpcsvc_program_register_portmap(rpcsvc_program_t *newprog, uint32_t port)
+{
+ int ret = -1; /* FAIL */
+
+ if (!newprog) {
+ goto out;
+ }
+
+ /* pmap_set() returns 0 for FAIL and 1 for SUCCESS */
+ if (!(pmap_set(newprog->prognum, newprog->progver, IPPROTO_TCP, port))) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Could not register with"
+ " portmap %d %d %u",
+ newprog->prognum, newprog->progver, port);
+ goto out;
+ }
+
+ ret = 0; /* SUCCESS */
+out:
+ return ret;
+}
-inline int
-rpcsvc_program_unregister_portmap (rpcsvc_program_t *prog)
+int
+rpcsvc_program_unregister_portmap(rpcsvc_program_t *prog)
{
- int ret = 0;
+ int ret = -1;
- if (!prog)
- goto out;
+ if (!prog)
+ goto out;
- if (!(pmap_unset(prog->prognum, prog->progver))) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Could not unregister with"
- " portmap");
- goto out;
- }
+ if (!(pmap_unset(prog->prognum, prog->progver))) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Could not unregister with"
+ " portmap");
+ goto out;
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-rpcsvc_register_portmap_enabled (rpcsvc_t *svc)
+rpcsvc_register_portmap_enabled(rpcsvc_t *svc)
{
- return svc->register_portmap;
+ return svc->register_portmap;
}
int32_t
-rpcsvc_get_listener_port (rpcsvc_listener_t *listener)
+rpcsvc_get_listener_port(rpcsvc_listener_t *listener)
{
- int32_t listener_port = -1;
+ int32_t listener_port = -1;
- if ((listener == NULL) || (listener->trans == NULL)) {
- goto out;
- }
+ if ((listener == NULL) || (listener->trans == NULL)) {
+ goto out;
+ }
- switch (listener->trans->myinfo.sockaddr.ss_family) {
+ switch (listener->trans->myinfo.sockaddr.ss_family) {
case AF_INET:
- listener_port = ((struct sockaddr_in *)&listener->trans->myinfo.sockaddr)->sin_port;
- break;
+ listener_port = ((struct sockaddr_in *)&listener->trans->myinfo
+ .sockaddr)
+ ->sin_port;
+ break;
case AF_INET6:
- listener_port = ((struct sockaddr_in6 *)&listener->trans->myinfo.sockaddr)->sin6_port;
- break;
+ listener_port = ((struct sockaddr_in6 *)&listener->trans->myinfo
+ .sockaddr)
+ ->sin6_port;
+ break;
default:
- gf_log (GF_RPCSVC, GF_LOG_DEBUG,
- "invalid address family (%d)",
- listener->trans->myinfo.sockaddr.ss_family);
- goto out;
- }
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "invalid address family (%d)",
+ listener->trans->myinfo.sockaddr.ss_family);
+ goto out;
+ }
- listener_port = ntohs (listener_port);
+ listener_port = ntohs(listener_port);
out:
- return listener_port;
+ return listener_port;
}
-
rpcsvc_listener_t *
-rpcsvc_get_listener (rpcsvc_t *svc, uint16_t port, rpc_transport_t *trans)
+rpcsvc_get_listener(rpcsvc_t *svc, uint16_t port, rpc_transport_t *trans)
{
- rpcsvc_listener_t *listener = NULL;
- char found = 0;
- uint32_t listener_port = 0;
-
- if (!svc) {
- goto out;
- }
-
- pthread_mutex_lock (&svc->rpclock);
+ rpcsvc_listener_t *listener = NULL;
+ char found = 0;
+ rpcsvc_listener_t *next = NULL;
+ uint32_t listener_port = 0;
+
+ if (!svc) {
+ goto out;
+ }
+
+ pthread_rwlock_rdlock(&svc->rpclock);
+ {
+ list_for_each_entry_safe(listener, next, &svc->listeners, list)
{
- list_for_each_entry (listener, &svc->listeners, list) {
- if (trans != NULL) {
- if (listener->trans == trans) {
- found = 1;
- break;
- }
-
- continue;
- }
+ if (trans != NULL) {
+ if (listener->trans == trans) {
+ found = 1;
+ break;
+ }
- listener_port = rpcsvc_get_listener_port (listener);
- if (listener_port == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR,
- "invalid port for listener %s",
- listener->trans->name);
- continue;
- }
+ continue;
+ }
- if (listener_port == port) {
- found = 1;
- break;
- }
- }
- }
- pthread_mutex_unlock (&svc->rpclock);
+ listener_port = rpcsvc_get_listener_port(listener);
+ if (listener_port == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "invalid port for listener %s",
+ listener->trans->name);
+ continue;
+ }
- if (!found) {
- listener = NULL;
+ if (listener_port == port) {
+ found = 1;
+ break;
+ }
}
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
+
+ if (!found) {
+ listener = NULL;
+ }
out:
- return listener;
+ return listener;
}
-
/* The only difference between the generic submit and this one is that the
* generic submit is also used for submitting RPC error replies in where there
* are no payloads so the msgvec and msgbuf can be NULL.
@@ -1254,1199 +1826,1470 @@ out:
* we must perform NULL checks before calling the generic submit.
*/
int
-rpcsvc_submit_message (rpcsvc_request_t *req, struct iovec *proghdr,
- int hdrcount, struct iovec *payload, int payloadcount,
- struct iobref *iobref)
+rpcsvc_submit_message(rpcsvc_request_t *req, struct iovec *proghdr,
+ int hdrcount, struct iovec *payload, int payloadcount,
+ struct iobref *iobref)
{
- if ((!req) || (!req->trans) || (!proghdr) || (!proghdr->iov_base))
- return -1;
+ if ((!req) || (!req->trans) || (!proghdr) || (!proghdr->iov_base))
+ return -1;
- return rpcsvc_submit_generic (req, proghdr, hdrcount, payload,
- payloadcount, iobref);
+ return rpcsvc_submit_generic(req, proghdr, hdrcount, payload, payloadcount,
+ iobref);
}
+void
+rpcsvc_program_destroy(rpcsvc_program_t *program)
+{
+ if (program) {
+ GF_FREE(program->latencies);
+ GF_FREE(program);
+ }
+}
int
-rpcsvc_program_unregister (rpcsvc_t *svc, rpcsvc_program_t *program)
+rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
{
- int ret = -1;
- rpcsvc_program_t *prog = NULL;
- if (!svc || !program) {
- goto out;
- }
-
- ret = rpcsvc_program_unregister_portmap (program);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "portmap unregistration of"
- " program failed");
- goto out;
- }
-
- pthread_mutex_lock (&svc->rpclock);
+ int ret = -1;
+ rpcsvc_program_t *prog = NULL;
+ if (!svc || !program) {
+ goto out;
+ }
+
+ pthread_rwlock_rdlock(&svc->rpclock);
+ {
+ list_for_each_entry(prog, &svc->programs, program)
{
- list_for_each_entry (prog, &svc->programs, program) {
- if ((prog->prognum == program->prognum)
- && (prog->progver == program->progver)) {
- break;
- }
- }
- }
- pthread_mutex_unlock (&svc->rpclock);
-
- if (prog == NULL) {
- ret = -1;
- goto out;
- }
-
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Program unregistered: %s, Num: %d,"
- " Ver: %d, Port: %d", prog->progname, prog->prognum,
- prog->progver, prog->progport);
+ if ((prog->prognum == program->prognum) &&
+ (prog->progver == program->progver)) {
+ break;
+ }
+ }
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
+
+ ret = rpcsvc_program_unregister_portmap(program);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "portmap unregistration of"
+ " program failed");
+ goto out;
+ }
+#ifdef IPV6_DEFAULT
+ ret = rpcsvc_program_unregister_rpcbind6(program);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "rpcbind (ipv6)"
+ " unregistration of program failed");
+ goto out;
+ }
+#endif
- pthread_mutex_lock (&svc->rpclock);
- {
- list_del_init (&prog->program);
- }
- pthread_mutex_unlock (&svc->rpclock);
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "Program unregistered: %s, Num: %d,"
+ " Ver: %d, Port: %d",
+ prog->progname, prog->prognum, prog->progver, prog->progport);
+ if (prog->ownthread) {
+ prog->alive = _gf_false;
ret = 0;
-out:
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Program unregistration failed"
- ": %s, Num: %d, Ver: %d, Port: %d", program->progname,
- program->prognum, program->progver, program->progport);
- }
-
- return ret;
-}
+ goto out;
+ }
+ pthread_rwlock_wrlock(&svc->rpclock);
+ {
+ list_del_init(&prog->program);
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
-inline int
-rpcsvc_transport_peername (rpc_transport_t *trans, char *hostname, int hostlen)
-{
- if (!trans) {
- return -1;
+ ret = 0;
+out:
+ rpcsvc_program_destroy(prog);
+
+ if (ret == -1) {
+ if (program) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Program "
+ "unregistration failed"
+ ": %s, Num: %d, Ver: %d, Port: %d",
+ program->progname, program->prognum, program->progver,
+ program->progport);
+ } else {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Program not found");
}
+ }
- return rpc_transport_get_peername (trans, hostname, hostlen);
+ return ret;
}
-
-inline int
-rpcsvc_transport_peeraddr (rpc_transport_t *trans, char *addrstr, int addrlen,
- struct sockaddr_storage *sa, socklen_t sasize)
+int
+rpcsvc_transport_peername(rpc_transport_t *trans, char *hostname, int hostlen)
{
- if (!trans) {
- return -1;
- }
+ if (!trans) {
+ return -1;
+ }
- return rpc_transport_get_peeraddr(trans, addrstr, addrlen, sa,
- sasize);
+ return rpc_transport_get_peername(trans, hostname, hostlen);
}
-
-rpc_transport_t *
-rpcsvc_transport_create (rpcsvc_t *svc, dict_t *options, char *name)
+int
+rpcsvc_transport_peeraddr(rpc_transport_t *trans, char *addrstr, int addrlen,
+ struct sockaddr_storage *sa, socklen_t sasize)
{
- int ret = -1;
- rpc_transport_t *trans = NULL;
-
- trans = rpc_transport_load (svc->ctx, options, name);
- if (!trans) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "cannot create listener, "
- "initing the transport failed");
- goto out;
- }
-
- ret = rpc_transport_listen (trans);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING,
- "listening on transport failed");
- goto out;
- }
-
- ret = rpc_transport_register_notify (trans, rpcsvc_notify, svc);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_WARNING, "registering notify failed");
- goto out;
- }
-
- ret = 0;
-out:
- if ((ret == -1) && (trans)) {
- rpc_transport_disconnect (trans);
- trans = NULL;
- }
+ if (!trans) {
+ return -1;
+ }
- return trans;
+ return rpc_transport_get_peeraddr(trans, addrstr, addrlen, sa, sasize);
}
rpcsvc_listener_t *
-rpcsvc_listener_alloc (rpcsvc_t *svc, rpc_transport_t *trans)
+rpcsvc_listener_alloc(rpcsvc_t *svc, rpc_transport_t *trans)
{
- rpcsvc_listener_t *listener = NULL;
+ rpcsvc_listener_t *listener = NULL;
- listener = GF_CALLOC (1, sizeof (*listener),
- gf_common_mt_rpcsvc_listener_t);
- if (!listener) {
- goto out;
- }
+ listener = GF_CALLOC(1, sizeof(*listener), gf_common_mt_rpcsvc_listener_t);
+ if (!listener) {
+ goto out;
+ }
- listener->trans = trans;
- listener->svc = svc;
+ listener->trans = trans;
+ listener->svc = svc;
- INIT_LIST_HEAD (&listener->list);
+ INIT_LIST_HEAD(&listener->list);
- pthread_mutex_lock (&svc->rpclock);
- {
- list_add_tail (&listener->list, &svc->listeners);
- }
- pthread_mutex_unlock (&svc->rpclock);
+ pthread_rwlock_wrlock(&svc->rpclock);
+ {
+ list_add_tail(&listener->list, &svc->listeners);
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
out:
- return listener;
+ return listener;
}
-
int32_t
-rpcsvc_create_listener (rpcsvc_t *svc, dict_t *options, char *name)
+rpcsvc_create_listener(rpcsvc_t *svc, dict_t *options, char *name)
{
- rpc_transport_t *trans = NULL;
- rpcsvc_listener_t *listener = NULL;
- int32_t ret = -1;
-
- if (!svc || !options) {
- goto out;
- }
-
- trans = rpcsvc_transport_create (svc, options, name);
- if (!trans) {
- /* LOG TODO */
- goto out;
- }
-
- listener = rpcsvc_listener_alloc (svc, trans);
- if (listener == NULL) {
- goto out;
- }
+ rpc_transport_t *trans = NULL;
+ rpcsvc_listener_t *listener = NULL;
+ int32_t ret = -1;
+
+ if (!svc || !options) {
+ goto out;
+ }
+
+ trans = rpc_transport_load(svc->ctx, options, name);
+ if (!trans) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING,
+ "cannot create listener, "
+ "initing the transport failed");
+ goto out;
+ }
+
+ ret = rpc_transport_listen(trans);
+ if (ret == -EADDRINUSE || ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING, "listening on transport failed");
+ goto out;
+ }
+
+ ret = rpc_transport_register_notify(trans, rpcsvc_notify, svc);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_WARNING, "registering notify failed");
+ goto out;
+ }
+
+ listener = rpcsvc_listener_alloc(svc, trans);
+ if (listener == NULL) {
+ ret = -1;
+ goto out;
+ }
- ret = 0;
+ ret = 0;
out:
- if (!listener && trans) {
- rpc_transport_disconnect (trans);
- }
+ if (!listener && trans) {
+ rpc_transport_disconnect(trans, _gf_true);
+ rpc_transport_cleanup(trans);
+ }
- return ret;
+ return ret;
}
-
int32_t
-rpcsvc_create_listeners (rpcsvc_t *svc, dict_t *options, char *name)
+rpcsvc_create_listeners(rpcsvc_t *svc, dict_t *options, char *name)
{
- int32_t ret = -1, count = 0;
- data_t *data = NULL;
- char *str = NULL, *ptr = NULL, *transport_name = NULL;
- char *transport_type = NULL, *saveptr = NULL, *tmp = NULL;
-
- if ((svc == NULL) || (options == NULL) || (name == NULL)) {
- goto out;
- }
-
- data = dict_get (options, "transport-type");
- if (data == NULL) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR,
- "option transport-type not set");
- goto out;
+ int32_t ret = -1, count = 0;
+ data_t *data = NULL;
+ char *str = NULL, *ptr = NULL, *transport_name = NULL;
+ char *transport_type = NULL, *saveptr = NULL, *tmp = NULL;
+
+ if ((svc == NULL) || (options == NULL) || (name == NULL)) {
+ goto out;
+ }
+
+ data = dict_get(options, "transport-type");
+ if (data == NULL) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "option transport-type not set");
+ goto out;
+ }
+
+ transport_type = data_to_str(data);
+ if (transport_type == NULL) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "option transport-type not set");
+ goto out;
+ }
+
+ /* duplicate transport_type, since following dict_set will free it */
+ transport_type = gf_strdup(transport_type);
+ if (transport_type == NULL) {
+ goto out;
+ }
+
+ str = gf_strdup(transport_type);
+ if (str == NULL) {
+ goto out;
+ }
+
+ ptr = strtok_r(str, ",", &saveptr);
+
+ while (ptr != NULL) {
+ tmp = gf_strdup(ptr);
+ if (tmp == NULL) {
+ goto out;
+ }
+
+ ret = gf_asprintf(&transport_name, "%s.%s", tmp, name);
+ if (ret == -1) {
+ goto out;
}
- transport_type = data_to_str (data);
- if (transport_type == NULL) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR,
- "option transport-type not set");
- goto out;
+ ret = dict_set_dynstr(options, "transport-type", tmp);
+ if (ret == -1) {
+ goto out;
}
- /* duplicate transport_type, since following dict_set will free it */
- transport_type = gf_strdup (transport_type);
- if (transport_type == NULL) {
- goto out;
- }
+ tmp = NULL;
+ ptr = strtok_r(NULL, ",", &saveptr);
- str = gf_strdup (transport_type);
- if (str == NULL) {
- goto out;
+ ret = rpcsvc_create_listener(svc, options, transport_name);
+ if (ret != 0) {
+ goto out;
}
- ptr = strtok_r (str, ",", &saveptr);
+ dict_del(options, "notify-poller-death");
+ GF_FREE(transport_name);
+ transport_name = NULL;
+ count++;
+ }
- while (ptr != NULL) {
- tmp = gf_strdup (ptr);
- if (tmp == NULL) {
- goto out;
- }
-
- ret = gf_asprintf (&transport_name, "%s.%s", tmp, name);
- if (ret == -1) {
- goto out;
- }
-
- ret = dict_set_dynstr (options, "transport-type", tmp);
- if (ret == -1) {
- goto out;
- }
-
- tmp = NULL;
- ptr = strtok_r (NULL, ",", &saveptr);
-
- ret = rpcsvc_create_listener (svc, options, transport_name);
- if (ret != 0) {
- goto out;
- }
-
- GF_FREE (transport_name);
- transport_name = NULL;
- count++;
- }
-
- ret = dict_set_dynstr (options, "transport-type", transport_type);
- if (ret == -1) {
- goto out;
- }
+ ret = dict_set_dynstr(options, "transport-type", transport_type);
+ if (ret == -1) {
+ goto out;
+ }
- transport_type = NULL;
+ transport_type = NULL;
out:
- if (str)
- GF_FREE (str);
+ GF_FREE(str);
- if (transport_type)
- GF_FREE (transport_type);
+ GF_FREE(transport_type);
- if (tmp)
- GF_FREE (tmp);
+ GF_FREE(tmp);
- if (transport_name)
- GF_FREE (transport_name);
+ GF_FREE(transport_name);
+ if (count > 0) {
return count;
+ } else {
+ return ret;
+ }
}
-
int
-rpcsvc_unregister_notify (rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata)
+rpcsvc_unregister_notify(rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata)
{
- rpcsvc_notify_wrapper_t *wrapper = NULL, *tmp = NULL;
- int ret = 0;
+ rpcsvc_notify_wrapper_t *wrapper = NULL, *tmp = NULL;
+ int ret = 0;
- if (!svc || !notify) {
- goto out;
- }
+ if (!svc || !notify) {
+ goto out;
+ }
- pthread_mutex_lock (&svc->rpclock);
+ pthread_rwlock_wrlock(&svc->rpclock);
+ {
+ list_for_each_entry_safe(wrapper, tmp, &svc->notify, list)
{
- list_for_each_entry_safe (wrapper, tmp, &svc->notify, list) {
- if ((wrapper->notify == notify)
- && (mydata == wrapper->data)) {
- list_del_init (&wrapper->list);
- GF_FREE (wrapper);
- ret++;
- }
- }
+ if ((wrapper->notify == notify) && (mydata == wrapper->data)) {
+ list_del_init(&wrapper->list);
+ GF_FREE(wrapper);
+ ret++;
+ }
}
- pthread_mutex_unlock (&svc->rpclock);
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
out:
- return ret;
+ return ret;
}
int
-rpcsvc_register_notify (rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata)
+rpcsvc_register_notify(rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata)
{
- rpcsvc_notify_wrapper_t *wrapper = NULL;
- int ret = -1;
-
- wrapper = rpcsvc_notify_wrapper_alloc ();
- if (!wrapper) {
- goto out;
- }
- svc->mydata = mydata; /* this_xlator */
- wrapper->data = mydata;
- wrapper->notify = notify;
-
- pthread_mutex_lock (&svc->rpclock);
- {
- list_add_tail (&wrapper->list, &svc->notify);
- svc->notify_count++;
- }
- pthread_mutex_unlock (&svc->rpclock);
-
- ret = 0;
+ rpcsvc_notify_wrapper_t *wrapper = NULL;
+ int ret = -1;
+
+ wrapper = rpcsvc_notify_wrapper_alloc();
+ if (!wrapper) {
+ goto out;
+ }
+ svc->mydata = mydata;
+ wrapper->data = mydata;
+ wrapper->notify = notify;
+
+ pthread_rwlock_wrlock(&svc->rpclock);
+ {
+ list_add_tail(&wrapper->list, &svc->notify);
+ svc->notify_count++;
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
-inline int
-rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t *program)
+void *
+rpcsvc_request_handler(void *arg)
{
- int ret = -1;
- rpcsvc_program_t *newprog = NULL;
- char already_registered = 0;
+ rpcsvc_request_queue_t *queue = NULL;
+ rpcsvc_program_t *program = NULL;
+ rpcsvc_request_t *req = NULL, *tmp_req = NULL;
+ rpcsvc_actor_t *actor = NULL;
+ gf_boolean_t done = _gf_false;
+ int ret = 0;
+ struct list_head tmp_list;
- if (!svc) {
- goto out;
- }
+ queue = arg;
+ program = queue->program;
- if (program->actors == NULL) {
- goto out;
- }
+ INIT_LIST_HEAD(&tmp_list);
- pthread_mutex_lock (&svc->rpclock);
+ if (!program)
+ return NULL;
+
+ while (1) {
+ pthread_mutex_lock(&queue->queue_lock);
{
- list_for_each_entry (newprog, &svc->programs, program) {
- if ((newprog->prognum == program->prognum)
- && (newprog->progver == program->progver)) {
- already_registered = 1;
- break;
- }
- }
- }
- pthread_mutex_unlock (&svc->rpclock);
+ if (!program->alive && list_empty(&queue->request_queue)) {
+ done = 1;
+ goto unlock;
+ }
- if (already_registered) {
- ret = 0;
- goto out;
+ while (list_empty(&queue->request_queue)) {
+ queue->waiting = _gf_true;
+ pthread_cond_wait(&queue->queue_cond, &queue->queue_lock);
+ }
+
+ queue->waiting = _gf_false;
+
+ if (!list_empty(&queue->request_queue)) {
+ INIT_LIST_HEAD(&tmp_list);
+ list_splice_init(&queue->request_queue, &tmp_list);
+ }
}
+ unlock:
+ pthread_mutex_unlock(&queue->queue_lock);
- newprog = GF_CALLOC (1, sizeof(*newprog),gf_common_mt_rpcsvc_program_t);
- if (newprog == NULL) {
- goto out;
+ list_for_each_entry_safe(req, tmp_req, &tmp_list, request_list)
+ {
+ if (req) {
+ list_del_init(&req->request_list);
+
+ if (req->prognum == RPCSVC_INFRA_PROGRAM) {
+ switch (req->procnum) {
+ case RPCSVC_PROC_EVENT_THREAD_DEATH:
+ gf_log(GF_RPCSVC, GF_LOG_INFO,
+ "event thread died, exiting request handler "
+ "thread for queue %d of program %s",
+ (int)(queue - &program->request_queue[0]),
+ program->progname);
+ done = 1;
+ pthread_mutex_lock(&program->thr_lock);
+ {
+ rpcsvc_toggle_queue_status(
+ program, queue,
+ program->request_queue_status);
+ program->threadcount--;
+ }
+ pthread_mutex_unlock(&program->thr_lock);
+ rpcsvc_request_destroy(req);
+ break;
+
+ default:
+ break;
+ }
+ } else {
+ THIS = req->svc->xl;
+ actor = rpcsvc_program_actor(req);
+ ret = actor->actor(req);
+
+ if (ret != 0) {
+ rpcsvc_check_and_reply_error(ret, NULL, req);
+ }
+ req = NULL;
+ }
+ }
}
- memcpy (newprog, program, sizeof (*program));
+ if (done)
+ break;
+ }
- INIT_LIST_HEAD (&newprog->program);
+ return NULL;
+}
- pthread_mutex_lock (&svc->rpclock);
+int
+rpcsvc_program_register(rpcsvc_t *svc, rpcsvc_program_t *program,
+ gf_boolean_t add_to_head)
+{
+ int ret = -1, i = 0;
+ rpcsvc_program_t *newprog = NULL;
+ char already_registered = 0;
+ pthread_mutexattr_t attr[EVENT_MAX_THREADS];
+ pthread_mutexattr_t thr_attr;
+
+ if (!svc) {
+ goto out;
+ }
+
+ if (program->actors == NULL) {
+ goto out;
+ }
+
+ pthread_rwlock_rdlock(&svc->rpclock);
+ {
+ list_for_each_entry(newprog, &svc->programs, program)
{
- list_add_tail (&newprog->program, &svc->programs);
+ if ((newprog->prognum == program->prognum) &&
+ (newprog->progver == program->progver)) {
+ already_registered = 1;
+ break;
+ }
}
- pthread_mutex_unlock (&svc->rpclock);
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
+ if (already_registered) {
ret = 0;
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "New program registered: %s, Num: %d,"
- " Ver: %d, Port: %d", newprog->progname, newprog->prognum,
- newprog->progver, newprog->progport);
+ goto out;
+ }
+
+ newprog = GF_CALLOC(1, sizeof(*newprog), gf_common_mt_rpcsvc_program_t);
+ if (newprog == NULL) {
+ goto out;
+ }
+
+ memcpy(newprog, program, sizeof(*program));
+ newprog->latencies = gf_latency_new(program->numactors);
+ if (!newprog->latencies) {
+ rpcsvc_program_destroy(newprog);
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&newprog->program);
+ pthread_mutexattr_init(&thr_attr);
+ pthread_mutexattr_settype(&thr_attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+
+ for (i = 0; i < EVENT_MAX_THREADS; i++) {
+ pthread_mutexattr_init(&attr[i]);
+ pthread_mutexattr_settype(&attr[i], PTHREAD_MUTEX_ADAPTIVE_NP);
+ INIT_LIST_HEAD(&newprog->request_queue[i].request_queue);
+ pthread_mutex_init(&newprog->request_queue[i].queue_lock, &attr[i]);
+ pthread_cond_init(&newprog->request_queue[i].queue_cond, NULL);
+ newprog->request_queue[i].program = newprog;
+ }
+
+ pthread_mutex_init(&newprog->thr_lock, &thr_attr);
+ pthread_cond_init(&newprog->thr_cond, NULL);
+
+ newprog->alive = _gf_true;
+
+ if (gf_async_ctrl.enabled) {
+ newprog->ownthread = _gf_false;
+ newprog->synctask = _gf_false;
+ }
+
+ /* make sure synctask gets priority over ownthread */
+ if (newprog->synctask)
+ newprog->ownthread = _gf_false;
+
+ if (newprog->ownthread) {
+ struct event_pool *ep = svc->ctx->event_pool;
+ newprog->eventthreadcount = ep->eventthreadcount;
+
+ pthread_key_create(&newprog->req_queue_key, NULL);
+ newprog->thr_queue = 1;
+ }
+
+ pthread_rwlock_wrlock(&svc->rpclock);
+ {
+ if (add_to_head)
+ list_add(&newprog->program, &svc->programs);
+ else
+ list_add_tail(&newprog->program, &svc->programs);
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
-out:
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Program registration failed:"
- " %s, Num: %d, Ver: %d, Port: %d", program->progname,
- program->prognum, program->progver, program->progport);
- }
+ ret = 0;
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "New program registered: %s, Num: %d,"
+ " Ver: %d, Port: %d",
+ newprog->progname, newprog->prognum, newprog->progver,
+ newprog->progport);
- return ret;
+out:
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Program registration failed:"
+ " %s, Num: %d, Ver: %d, Port: %d",
+ program->progname, program->prognum, program->progver,
+ program->progport);
+ }
+
+ return ret;
}
static void
-free_prog_details (gf_dump_rsp *rsp)
+free_prog_details(gf_dump_rsp *rsp)
{
- gf_prog_detail *prev = NULL;
- gf_prog_detail *trav = NULL;
-
- trav = rsp->prog;
- while (trav) {
- prev = trav;
- trav = trav->next;
- GF_FREE (prev);
- }
+ gf_prog_detail *prev = NULL;
+ gf_prog_detail *trav = NULL;
+
+ trav = rsp->prog;
+ while (trav) {
+ prev = trav;
+ trav = trav->next;
+ GF_FREE(prev);
+ }
}
static int
-build_prog_details (rpcsvc_request_t *req, gf_dump_rsp *rsp)
+build_prog_details(rpcsvc_request_t *req, gf_dump_rsp *rsp)
{
- int ret = -1;
- rpcsvc_program_t *program = NULL;
- gf_prog_detail *prog = NULL;
- gf_prog_detail *prev = NULL;
+ int ret = -1;
+ rpcsvc_program_t *program = NULL;
+ gf_prog_detail *prog = NULL;
+ gf_prog_detail *prev = NULL;
- if (!req || !req->trans || !req->svc)
- goto out;
+ if (!req || !req->trans || !req->svc)
+ goto out;
+
+ pthread_rwlock_rdlock(&req->svc->rpclock);
+ {
+ list_for_each_entry(program, &req->svc->programs, program)
+ {
+ prog = GF_CALLOC(1, sizeof(*prog), 0);
+ if (!prog)
+ goto unlock;
+
+ prog->progname = program->progname;
+ prog->prognum = program->prognum;
+ prog->progver = program->progver;
- list_for_each_entry (program, &req->svc->programs, program) {
- prog = GF_CALLOC (1, sizeof (*prog), 0);
- if (!prog)
- goto out;
- prog->progname = program->progname;
- prog->prognum = program->prognum;
- prog->progver = program->progver;
- if (!rsp->prog)
- rsp->prog = prog;
- if (prev)
- prev->next = prog;
- prev = prog;
+ if (!rsp->prog)
+ rsp->prog = prog;
+ if (prev)
+ prev->next = prog;
+ prev = prog;
}
if (prev)
- ret = 0;
+ ret = 0;
+ }
+unlock:
+ pthread_rwlock_unlock(&req->svc->rpclock);
out:
- return ret;
+ return ret;
}
static int
-rpcsvc_dump (rpcsvc_request_t *req)
+rpcsvc_ping(rpcsvc_request_t *req)
{
- char rsp_buf[8 * 1024] = {0,};
- gf_dump_rsp rsp = {0,};
- struct iovec iov = {0,};
- int op_errno = EINVAL;
- int ret = -1;
- uint32_t dump_rsp_len = 0;
+ char rsp_buf[8 * 1024] = {
+ 0,
+ };
+ gf_common_rsp rsp = {
+ 0,
+ };
+ struct iovec iov = {
+ 0,
+ };
+ int ret = -1;
+ uint32_t ping_rsp_len = 0;
+
+ ping_rsp_len = xdr_sizeof((xdrproc_t)xdr_gf_common_rsp, &rsp);
+
+ iov.iov_base = rsp_buf;
+ iov.iov_len = ping_rsp_len;
+
+ ret = xdr_serialize_generic(iov, &rsp, (xdrproc_t)xdr_gf_common_rsp);
+ if (ret < 0) {
+ ret = RPCSVC_ACTOR_ERROR;
+ } else {
+ rsp.op_ret = 0;
+ rpcsvc_submit_generic(req, &iov, 1, NULL, 0, NULL);
+ }
+
+ return 0;
+}
- if (!req)
- goto fail;
+static int
+rpcsvc_dump(rpcsvc_request_t *req)
+{
+ char rsp_buf[8 * 1024] = {
+ 0,
+ };
+ gf_dump_rsp rsp = {
+ 0,
+ };
+ struct iovec iov = {
+ 0,
+ };
+ int op_errno = EINVAL;
+ int ret = -1;
+ uint32_t dump_rsp_len = 0;
+
+ if (!req)
+ goto sendrsp;
+
+ ret = build_prog_details(req, &rsp);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto sendrsp;
+ }
+
+ op_errno = 0;
+
+sendrsp:
+ rsp.op_errno = gf_errno_to_error(op_errno);
+ rsp.op_ret = ret;
+
+ dump_rsp_len = xdr_sizeof((xdrproc_t)xdr_gf_dump_rsp, &rsp);
+
+ iov.iov_base = rsp_buf;
+ iov.iov_len = dump_rsp_len;
+
+ ret = xdr_serialize_generic(iov, &rsp, (xdrproc_t)xdr_gf_dump_rsp);
+ if (ret < 0) {
+ ret = RPCSVC_ACTOR_ERROR;
+ } else {
+ rpcsvc_submit_generic(req, &iov, 1, NULL, 0, NULL);
+ ret = 0;
+ }
- ret = build_prog_details (req, &rsp);
- if (ret < 0) {
- op_errno = -ret;
- goto fail;
- }
+ free_prog_details(&rsp);
-fail:
- rsp.op_errno = gf_errno_to_error (op_errno);
- rsp.op_ret = ret;
+ return ret;
+}
- dump_rsp_len = xdr_sizeof ((xdrproc_t) xdr_gf_dump_rsp,
- &rsp);
+int
+rpcsvc_init_options(rpcsvc_t *svc, dict_t *options)
+{
+ char *optstr = NULL;
+ int ret = -1;
+
+ if ((!svc) || (!options))
+ return -1;
- iov.iov_base = rsp_buf;
- iov.iov_len = dump_rsp_len;
+ svc->memfactor = RPCSVC_DEFAULT_MEMFACTOR;
+
+ svc->register_portmap = _gf_true;
+ if (dict_get(options, "rpc.register-with-portmap")) {
+ ret = dict_get_str(options, "rpc.register-with-portmap", &optstr);
+ if (ret < 0) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Failed to parse "
+ "dict");
+ goto out;
+ }
- ret = xdr_serialize_generic (iov, &rsp, (xdrproc_t)xdr_gf_dump_rsp);
+ ret = gf_string2boolean(optstr, &svc->register_portmap);
if (ret < 0) {
- if (req)
- req->rpc_err = GARBAGE_ARGS;
- op_errno = EINVAL;
- goto fail;
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Failed to parse bool "
+ "string");
+ goto out;
+ }
+ }
+
+ if (!svc->register_portmap)
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "Portmap registration "
+ "disabled");
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+rpcsvc_reconfigure_options(rpcsvc_t *svc, dict_t *options)
+{
+ xlator_t *xlator = NULL;
+ xlator_list_t *volentry = NULL;
+ char *srchkey = NULL;
+ char *keyval = NULL;
+ int ret = -1;
+
+ if ((!svc) || (!svc->options) || (!options))
+ return (-1);
+
+ /* Fetch the xlator from svc */
+ xlator = svc->xl;
+ if (!xlator)
+ return (-1);
+
+ /* Reconfigure the volume specific rpc-auth.addr allow part */
+ volentry = xlator->children;
+ while (volentry) {
+ ret = gf_asprintf(&srchkey, "rpc-auth.addr.%s.allow",
+ volentry->xlator->name);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ return (-1);
+ }
+
+ /* key-string: rpc-auth.addr.<volname>.allow
+ *
+ * IMP: Delete the OLD key/value pair from dict.
+ * And set the NEW key/value pair IFF the option is SET
+ * in reconfigured volfile.
+ *
+ * NB: If rpc-auth.addr.<volname>.allow is not SET explicitly,
+ * build_nfs_graph() sets it as "*" i.e. anonymous.
+ */
+ dict_del(svc->options, srchkey);
+ if (!dict_get_str(options, srchkey, &keyval)) {
+ ret = dict_set_dynstr_with_alloc(svc->options, srchkey, keyval);
+ if (ret < 0) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "dict_set_str error");
+ GF_FREE(srchkey);
+ return (-1);
+ }
+ }
+
+ GF_FREE(srchkey);
+ volentry = volentry->next;
+ }
+
+ /* Reconfigure the volume specific rpc-auth.addr reject part */
+ volentry = xlator->children;
+ while (volentry) {
+ ret = gf_asprintf(&srchkey, "rpc-auth.addr.%s.reject",
+ volentry->xlator->name);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ return (-1);
}
- ret = rpcsvc_submit_generic (req, &iov, 1, NULL, 0,
- NULL);
+ /* key-string: rpc-auth.addr.<volname>.reject
+ *
+ * IMP: Delete the OLD key/value pair from dict.
+ * And set the NEW key/value pair IFF the option is SET
+ * in reconfigured volfile.
+ *
+ * NB: No default value for reject key.
+ */
+ dict_del(svc->options, srchkey);
+ if (!dict_get_str(options, srchkey, &keyval)) {
+ ret = dict_set_dynstr_with_alloc(svc->options, srchkey, keyval);
+ if (ret < 0) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "dict_set_str error");
+ GF_FREE(srchkey);
+ return (-1);
+ }
+ }
- free_prog_details (&rsp);
+ GF_FREE(srchkey);
+ volentry = volentry->next;
+ }
- return ret;
+ ret = rpcsvc_init_options(svc, options);
+ if (ret)
+ return (-1);
+
+ return rpcsvc_auth_reconf(svc, options);
}
int
-rpcsvc_init_options (rpcsvc_t *svc, dict_t *options)
+rpcsvc_transport_unix_options_build(dict_t *dict, char *filepath)
{
- char *optstr = NULL;
- int ret = -1;
+ char *fpath = NULL;
+ int ret = -1;
- if ((!svc) || (!options))
- return -1;
+ GF_ASSERT(filepath);
+ GF_VALIDATE_OR_GOTO("rpcsvc", dict, out);
- svc->memfactor = RPCSVC_DEFAULT_MEMFACTOR;
+ fpath = gf_strdup(filepath);
+ if (!fpath) {
+ ret = -1;
+ goto out;
+ }
- svc->register_portmap = _gf_true;
- if (dict_get (options, "rpc.register-with-portmap")) {
- ret = dict_get_str (options, "rpc.register-with-portmap",
- &optstr);
- if (ret < 0) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to parse "
- "dict");
- goto out;
- }
+ ret = dict_set_dynstr(dict, "transport.socket.listen-path", fpath);
+ if (ret)
+ goto out;
- ret = gf_string2boolean (optstr, &svc->register_portmap);
- if (ret < 0) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to parse bool "
- "string");
- goto out;
- }
- }
+ ret = dict_set_str(dict, "transport.address-family", "unix");
+ if (ret)
+ goto out;
- if (!svc->register_portmap)
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Portmap registration "
- "disabled");
+ ret = dict_set_str(dict, "transport.socket.nodelay", "off");
+ if (ret)
+ goto out;
- ret = 0;
+ ret = dict_set_str(dict, "transport-type", "socket");
+ if (ret)
+ goto out;
out:
- return ret;
+ if (ret) {
+ GF_FREE(fpath);
+ }
+ return ret;
}
+/*
+ * Configure() the rpc.outstanding-rpc-limit param.
+ * If dict_get_int32() for dict-key "rpc.outstanding-rpc-limit" FAILS,
+ * it would set the value as "defvalue". Otherwise it would fetch the
+ * value and round up to multiple-of-8. defvalue must be +ve.
+ *
+ * NB: defval or set-value "0" is special which means unlimited/65536.
+ */
int
-rpcsvc_transport_unix_options_build (dict_t **options, char *filepath)
+rpcsvc_set_outstanding_rpc_limit(rpcsvc_t *svc, dict_t *options, int defvalue)
{
- dict_t *dict = NULL;
- char *fpath = NULL;
- int ret = -1;
-
- GF_ASSERT (filepath);
- GF_ASSERT (options);
-
- dict = dict_new ();
- if (!dict)
- goto out;
+ int ret = -1; /* FAILURE */
+ int rpclim = 0;
+ static char *rpclimkey = "rpc.outstanding-rpc-limit";
+
+ if ((!svc) || (!options))
+ return (-1);
+
+ if ((defvalue < RPCSVC_MIN_OUTSTANDING_RPC_LIMIT) ||
+ (defvalue > RPCSVC_MAX_OUTSTANDING_RPC_LIMIT)) {
+ return (-1);
+ }
+
+ /* Fetch the rpc.outstanding-rpc-limit from dict. */
+ ret = dict_get_int32(options, rpclimkey, &rpclim);
+ if (ret < 0) {
+ /* Fall back to default for FAILURE */
+ rpclim = defvalue;
+ }
+
+ /* Round up to multiple-of-8. It must not exceed
+ * RPCSVC_MAX_OUTSTANDING_RPC_LIMIT.
+ */
+ rpclim = ((rpclim + 8 - 1) >> 3) * 8;
+ if (rpclim > RPCSVC_MAX_OUTSTANDING_RPC_LIMIT) {
+ rpclim = RPCSVC_MAX_OUTSTANDING_RPC_LIMIT;
+ }
+
+ if (svc->outstanding_rpc_limit != rpclim) {
+ svc->outstanding_rpc_limit = rpclim;
+ gf_log(GF_RPCSVC, GF_LOG_INFO, "Configured %s with value %d", rpclimkey,
+ rpclim);
+ }
+
+ return (0);
+}
- fpath = gf_strdup (filepath);
- if (!fpath) {
- ret = -1;
- goto out;
- }
+/*
+ * Enable throttling for rpcsvc_t svc.
+ * Returns 0 on success, -1 otherwise.
+ */
+int
+rpcsvc_set_throttle_on(rpcsvc_t *svc)
+{
+ if (!svc)
+ return -1;
- ret = dict_set_dynstr (dict, "transport.socket.listen-path", fpath);
- if (ret)
- goto out;
+ svc->throttle = _gf_true;
- ret = dict_set_str (dict, "transport.address-family", "unix");
- if (ret)
- goto out;
+ return 0;
+}
- ret = dict_set_str (dict, "transport.socket.nodelay", "off");
- if (ret)
- goto out;
+/*
+ * Disable throttling for rpcsvc_t svc.
+ * Returns 0 on success, -1 otherwise.
+ */
+int
+rpcsvc_set_throttle_off(rpcsvc_t *svc)
+{
+ if (!svc)
+ return -1;
- ret = dict_set_str (dict, "transport-type", "socket");
- if (ret)
- goto out;
+ svc->throttle = _gf_false;
- *options = dict;
-out:
- if (ret) {
- if (fpath)
- GF_FREE (fpath);
- if (dict)
- dict_unref (dict);
- }
- return ret;
+ return 0;
}
-/* The global RPC service initializer.
+/*
+ * Get throttle state for rpcsvc_t svc.
+ * Returns value of attribute throttle on success, _gf_false otherwise.
*/
-rpcsvc_t *
-rpcsvc_init (xlator_t *xl, glusterfs_ctx_t *ctx, dict_t *options,
- uint32_t poolcount)
+gf_boolean_t
+rpcsvc_get_throttle(rpcsvc_t *svc)
{
- rpcsvc_t *svc = NULL;
- int ret = -1;
+ if (!svc)
+ return _gf_false;
- if ((!ctx) || (!options))
- return NULL;
-
- svc = GF_CALLOC (1, sizeof (*svc), gf_common_mt_rpcsvc_t);
- if (!svc)
- return NULL;
+ return svc->throttle;
+}
- pthread_mutex_init (&svc->rpclock, NULL);
- INIT_LIST_HEAD (&svc->authschemes);
- INIT_LIST_HEAD (&svc->notify);
- INIT_LIST_HEAD (&svc->listeners);
- INIT_LIST_HEAD (&svc->programs);
+/* Function call to cleanup resources for svc
+ */
+int
+rpcsvc_destroy(rpcsvc_t *svc)
+{
+ struct rpcsvc_auth_list *auth = NULL;
+ struct rpcsvc_auth_list *tmp = NULL;
+ rpcsvc_listener_t *listener = NULL;
+ rpcsvc_listener_t *next = NULL;
+ int ret = 0;
- ret = rpcsvc_init_options (svc, options);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to init options");
- goto free_svc;
- }
+ if (!svc)
+ return ret;
- if (!poolcount)
- poolcount = RPCSVC_POOLCOUNT_MULT * svc->memfactor;
+ list_for_each_entry_safe(listener, next, &svc->listeners, list)
+ {
+ rpcsvc_listener_destroy(listener);
+ }
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "rx pool: %d", poolcount);
- svc->rxpool = mem_pool_new (rpcsvc_request_t, poolcount);
- /* TODO: leak */
- if (!svc->rxpool) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "mem pool allocation failed");
- goto free_svc;
- }
+ list_for_each_entry_safe(auth, tmp, &svc->authschemes, authlist)
+ {
+ list_del_init(&auth->authlist);
+ GF_FREE(auth);
+ }
- ret = rpcsvc_auth_init (svc, options);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to init "
- "authentication");
- goto free_svc;
- }
+ rpcsvc_program_unregister(svc, &gluster_dump_prog);
+ if (svc->rxpool) {
+ mem_pool_destroy(svc->rxpool);
+ svc->rxpool = NULL;
+ }
- ret = -1;
- svc->options = options;
- svc->ctx = ctx;
- svc->mydata = xl;
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "RPC service inited.");
+ pthread_rwlock_destroy(&svc->rpclock);
+ GF_FREE(svc);
- gluster_dump_prog.options = options;
+ return ret;
+}
- ret = rpcsvc_program_register (svc, &gluster_dump_prog);
- if (ret) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR,
- "failed to register DUMP program");
- goto free_svc;
- }
- ret = 0;
+/* The global RPC service initializer.
+ */
+rpcsvc_t *
+rpcsvc_init(xlator_t *xl, glusterfs_ctx_t *ctx, dict_t *options,
+ uint32_t poolcount)
+{
+ rpcsvc_t *svc = NULL;
+ int ret = -1;
+
+ if ((!xl) || (!ctx) || (!options))
+ return NULL;
+
+ svc = GF_CALLOC(1, sizeof(*svc), gf_common_mt_rpcsvc_t);
+ if (!svc)
+ return NULL;
+
+ pthread_rwlock_init(&svc->rpclock, NULL);
+ INIT_LIST_HEAD(&svc->authschemes);
+ INIT_LIST_HEAD(&svc->notify);
+ INIT_LIST_HEAD(&svc->listeners);
+ INIT_LIST_HEAD(&svc->programs);
+
+ ret = rpcsvc_init_options(svc, options);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "Failed to init options");
+ goto free_svc;
+ }
+
+ if (!poolcount)
+ poolcount = RPCSVC_POOLCOUNT_MULT * svc->memfactor;
+
+ gf_log(GF_RPCSVC, GF_LOG_TRACE, "rx pool: %d", poolcount);
+ svc->rxpool = mem_pool_new(rpcsvc_request_t, poolcount);
+ /* TODO: leak */
+ if (!svc->rxpool) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "mem pool allocation failed");
+ goto free_svc;
+ }
+
+ ret = rpcsvc_auth_init(svc, options);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Failed to init "
+ "authentication");
+ goto free_svc;
+ }
+
+ ret = -1;
+ svc->options = options;
+ svc->ctx = ctx;
+ svc->xl = xl;
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "RPC service inited.");
+
+ gluster_dump_prog.options = options;
+
+ ret = rpcsvc_program_register(svc, &gluster_dump_prog, _gf_false);
+ if (ret) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "failed to register DUMP program");
+ goto free_svc;
+ }
+
+ ret = 0;
free_svc:
- if (ret == -1) {
- GF_FREE (svc);
- svc = NULL;
- }
+ if (ret == -1) {
+ GF_FREE(svc);
+ svc = NULL;
+ }
- return svc;
+ return svc;
}
-
int
-rpcsvc_transport_peer_check_search (dict_t *options, char *pattern, char *clstr)
+rpcsvc_transport_peer_check_search(dict_t *options, char *pattern, char *ip,
+ char *hostname)
{
- int ret = -1;
- char *addrtok = NULL;
- char *addrstr = NULL;
- char *dup_addrstr = NULL;
- char *svptr = NULL;
+ int ret = -1;
+ char *addrtok = NULL;
+ char *addrstr = NULL;
+ char *dup_addrstr = NULL;
+ char *svptr = NULL;
- if ((!options) || (!clstr))
- return -1;
+ if ((!options) || (!ip))
+ return -1;
- if (!dict_get (options, pattern))
- return -1;
-
- ret = dict_get_str (options, pattern, &addrstr);
- if (ret < 0) {
- ret = -1;
- goto err;
- }
+ ret = dict_get_str(options, pattern, &addrstr);
+ if (ret < 0) {
+ ret = -1;
+ goto err;
+ }
- if (!addrstr) {
- ret = -1;
- goto err;
- }
+ if (!addrstr) {
+ ret = -1;
+ goto err;
+ }
- dup_addrstr = gf_strdup (addrstr);
- addrtok = strtok_r (dup_addrstr, ",", &svptr);
- while (addrtok) {
+ dup_addrstr = gf_strdup(addrstr);
+ if (dup_addrstr == NULL) {
+ ret = -1;
+ goto err;
+ }
+ addrtok = strtok_r(dup_addrstr, ",", &svptr);
+ while (addrtok) {
+ /* CASEFOLD not present on Solaris */
+#ifdef FNM_CASEFOLD
+ ret = fnmatch(addrtok, ip, FNM_CASEFOLD);
+#else
+ ret = fnmatch(addrtok, ip, 0);
+#endif
+ if (ret == 0)
+ goto err;
- /* CASEFOLD not present on Solaris */
+ /* compare hostnames if applicable */
+ if (hostname) {
#ifdef FNM_CASEFOLD
- ret = fnmatch (addrtok, clstr, FNM_CASEFOLD);
+ ret = fnmatch(addrtok, hostname, FNM_CASEFOLD);
#else
- ret = fnmatch (addrtok, clstr, 0);
+ ret = fnmatch(addrtok, hostname, 0);
#endif
- if (ret == 0)
- goto err;
+ if (ret == 0)
+ goto err;
+ }
- addrtok = strtok_r (NULL, ",", &svptr);
+ /* Compare IPv4 subnetwork, TODO: IPv6 subnet support */
+ if (strchr(addrtok, '/')) {
+ ret = rpcsvc_match_subnet_v4(addrtok, ip);
+ if (ret == 0)
+ goto err;
}
- ret = -1;
+ addrtok = strtok_r(NULL, ",", &svptr);
+ }
+
+ ret = -1;
err:
- if (dup_addrstr)
- GF_FREE (dup_addrstr);
+ GF_FREE(dup_addrstr);
- return ret;
+ return ret;
}
-
-int
-rpcsvc_transport_peer_check_allow (dict_t *options, char *volname, char *clstr)
+static int
+rpcsvc_transport_peer_check_allow(dict_t *options, char *volname, char *ip,
+ char *hostname)
{
- int ret = RPCSVC_AUTH_DONTCARE;
- char *srchstr = NULL;
- char globalrule[] = "rpc-auth.addr.allow";
+ int ret = RPCSVC_AUTH_DONTCARE;
+ char *srchstr = NULL;
- if ((!options) || (!clstr))
- return ret;
+ if ((!options) || (!ip) || (!volname))
+ return ret;
- /* If volname is NULL, then we're searching for the general rule to
- * determine the current address in clstr is allowed or not for all
- * subvolumes.
- */
- if (volname) {
- ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.allow", volname);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
- ret = RPCSVC_AUTH_DONTCARE;
- goto out;
- }
- } else
- srchstr = globalrule;
+ ret = gf_asprintf(&srchstr, "rpc-auth.addr.%s.allow", volname);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ ret = RPCSVC_AUTH_DONTCARE;
+ goto out;
+ }
- ret = rpcsvc_transport_peer_check_search (options, srchstr, clstr);
- if (volname)
- GF_FREE (srchstr);
+ ret = rpcsvc_transport_peer_check_search(options, srchstr, ip, hostname);
+ GF_FREE(srchstr);
- if (ret == 0)
- ret = RPCSVC_AUTH_ACCEPT;
- else
- ret = RPCSVC_AUTH_DONTCARE;
+ if (ret == 0)
+ ret = RPCSVC_AUTH_ACCEPT;
+ else
+ ret = RPCSVC_AUTH_REJECT;
out:
- return ret;
+ return ret;
}
-int
-rpcsvc_transport_peer_check_reject (dict_t *options, char *volname, char *clstr)
-{
- int ret = RPCSVC_AUTH_DONTCARE;
- char *srchstr = NULL;
- char generalrule[] = "rpc-auth.addr.reject";
-
- if ((!options) || (!clstr))
- return ret;
-
- if (volname) {
- ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.reject",
- volname);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
- ret = RPCSVC_AUTH_REJECT;
- goto out;
- }
- } else
- srchstr = generalrule;
-
- ret = rpcsvc_transport_peer_check_search (options, srchstr, clstr);
- if (volname)
- GF_FREE (srchstr);
+static int
+rpcsvc_transport_peer_check_reject(dict_t *options, char *volname, char *ip,
+ char *hostname)
+{
+ int ret = RPCSVC_AUTH_DONTCARE;
+ char *srchstr = NULL;
- if (ret == 0)
- ret = RPCSVC_AUTH_REJECT;
- else
- ret = RPCSVC_AUTH_DONTCARE;
-out:
+ if ((!options) || (!ip) || (!volname))
return ret;
-}
+ ret = gf_asprintf(&srchstr, "rpc-auth.addr.%s.reject", volname);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ ret = RPCSVC_AUTH_REJECT;
+ goto out;
+ }
-/* This function tests the results of the allow rule and the reject rule to
- * combine them into a single result that can be used to determine if the
- * connection should be allowed to proceed.
- * Heres the test matrix we need to follow in this function.
- *
- * A - Allow, the result of the allow test. Never returns R.
- * R - Reject, result of the reject test. Never returns A.
- * Both can return D or dont care if no rule was given.
- *
- * | @allow | @reject | Result |
- * | A | R | R |
- * | D | D | D |
- * | A | D | A |
- * | D | R | R |
- */
-int
-rpcsvc_combine_allow_reject_volume_check (int allow, int reject)
-{
- int final = RPCSVC_AUTH_REJECT;
-
- /* If allowed rule allows but reject rule rejects, we stay cautious
- * and reject. */
- if ((allow == RPCSVC_AUTH_ACCEPT) && (reject == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- /* if both are dont care, that is user did not specify for either allow
- * or reject, we leave it up to the general rule to apply, in the hope
- * that there is one.
- */
- else if ((allow == RPCSVC_AUTH_DONTCARE) &&
- (reject == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_DONTCARE;
- /* If one is dont care, the other one applies. */
- else if ((allow == RPCSVC_AUTH_ACCEPT) &&
- (reject == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((allow == RPCSVC_AUTH_DONTCARE) &&
- (reject == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
+ ret = rpcsvc_transport_peer_check_search(options, srchstr, ip, hostname);
+ GF_FREE(srchstr);
- return final;
+ if (ret == 0)
+ ret = RPCSVC_AUTH_REJECT;
+ else
+ ret = RPCSVC_AUTH_DONTCARE;
+out:
+ return ret;
}
-
-/* Combines the result of the general rule test against, the specific rule
- * to determine final permission for the client's address.
- *
- * | @gen | @spec | Result |
- * | A | A | A |
- * | A | R | R |
- * | A | D | A |
- * | D | A | A |
- * | D | R | R |
- * | D | D | D |
- * | R | A | A |
- * | R | D | R |
- * | R | R | R |
+/* Combines rpc auth's allow and reject options.
+ * Order of checks is important.
+ * First, REJECT if either rejects.
+ * If neither rejects, ACCEPT if either accepts.
+ * If neither accepts, DONTCARE
*/
int
-rpcsvc_combine_gen_spec_addr_checks (int gen, int spec)
+rpcsvc_combine_allow_reject_volume_check(int allow, int reject)
{
- int final = RPCSVC_AUTH_REJECT;
-
- if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec== RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_DONTCARE;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
-
- return final;
-}
-
+ if (allow == RPCSVC_AUTH_REJECT || reject == RPCSVC_AUTH_REJECT)
+ return RPCSVC_AUTH_REJECT;
+ if (allow == RPCSVC_AUTH_ACCEPT || reject == RPCSVC_AUTH_ACCEPT)
+ return RPCSVC_AUTH_ACCEPT;
-/* Combines the result of the general rule test against, the specific rule
- * to determine final test for the connection coming in for a given volume.
- *
- * | @gen | @spec | Result |
- * | A | A | A |
- * | A | R | R |
- * | A | D | A |
- * | D | A | A |
- * | D | R | R |
- * | D | D | R |, special case, we intentionally disallow this.
- * | R | A | A |
- * | R | D | R |
- * | R | R | R |
- */
-int
-rpcsvc_combine_gen_spec_volume_checks (int gen, int spec)
-{
- int final = RPCSVC_AUTH_REJECT;
-
- if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_ACCEPT) && (spec == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
- /* On no rule, we reject. */
- else if ((gen == RPCSVC_AUTH_DONTCARE) && (spec== RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_ACCEPT))
- final = RPCSVC_AUTH_ACCEPT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_DONTCARE))
- final = RPCSVC_AUTH_REJECT;
- else if ((gen == RPCSVC_AUTH_REJECT) && (spec == RPCSVC_AUTH_REJECT))
- final = RPCSVC_AUTH_REJECT;
-
- return final;
+ return RPCSVC_AUTH_DONTCARE;
}
-
int
-rpcsvc_transport_peer_check_name (dict_t *options, char *volname,
- rpc_transport_t *trans)
+rpcsvc_auth_check(rpcsvc_t *svc, char *volname, char *ipaddr)
{
- int ret = RPCSVC_AUTH_REJECT;
- int aret = RPCSVC_AUTH_REJECT;
- int rjret = RPCSVC_AUTH_REJECT;
- char clstr[RPCSVC_PEER_STRLEN];
-
- if (!trans)
- return ret;
-
- ret = rpcsvc_transport_peername (trans, clstr, RPCSVC_PEER_STRLEN);
- if (ret != 0) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get remote addr: "
- "%s", gai_strerror (ret));
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
-
- aret = rpcsvc_transport_peer_check_allow (options, volname, clstr);
- rjret = rpcsvc_transport_peer_check_reject (options, volname, clstr);
-
- ret = rpcsvc_combine_allow_reject_volume_check (aret, rjret);
-
-err:
+ int ret = RPCSVC_AUTH_REJECT;
+ int accept = RPCSVC_AUTH_REJECT;
+ int reject = RPCSVC_AUTH_REJECT;
+ char *hostname = NULL;
+ char *allow_str = NULL;
+ char *reject_str = NULL;
+ char *srchstr = NULL;
+ dict_t *options = NULL;
+
+ if (!svc || !volname || !ipaddr)
return ret;
-}
+ /* Fetch the options from svc struct and validate */
+ options = svc->options;
+ if (!options)
+ return ret;
-int
-rpcsvc_transport_peer_check_addr (dict_t *options, char *volname,
- rpc_transport_t *trans)
-{
- int ret = RPCSVC_AUTH_REJECT;
- int aret = RPCSVC_AUTH_DONTCARE;
- int rjret = RPCSVC_AUTH_REJECT;
- char clstr[RPCSVC_PEER_STRLEN];
- char *tmp = NULL;
- struct sockaddr_storage sastorage = {0,};
- struct sockaddr *sockaddr = NULL;
-
- if (!trans)
- return ret;
-
- ret = rpcsvc_transport_peeraddr (trans, clstr, RPCSVC_PEER_STRLEN,
- &sastorage, sizeof (sastorage));
- if (ret != 0) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get remote addr: "
- "%s", gai_strerror (ret));
- ret = RPCSVC_AUTH_REJECT;
- goto err;
+ /* Accept if its the default case: Allow all, Reject none
+ * The default volfile always contains a 'allow *' rule
+ * for each volume. If allow rule is missing (which implies
+ * there is some bad volfile generating code doing this), we
+ * assume no one is allowed mounts, and thus, we reject mounts.
+ */
+ ret = gf_asprintf(&srchstr, "rpc-auth.addr.%s.allow", volname);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ return RPCSVC_AUTH_REJECT;
+ }
+
+ ret = dict_get_str(options, srchstr, &allow_str);
+ GF_FREE(srchstr);
+ if (ret < 0)
+ return RPCSVC_AUTH_REJECT;
+
+ ret = gf_asprintf(&srchstr, "rpc-auth.addr.%s.reject", volname);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ return RPCSVC_AUTH_REJECT;
+ }
+
+ ret = dict_get_str(options, srchstr, &reject_str);
+ GF_FREE(srchstr);
+
+ /*
+ * If "reject_str" is being set as '*' (anonymous), then NFS-server
+ * would reject everything. If the "reject_str" is not set and
+ * "allow_str" is set as '*' (anonymous), then NFS-server would
+ * accept mount requests from all clients.
+ */
+ if (reject_str != NULL) {
+ if (!strcmp("*", reject_str))
+ return RPCSVC_AUTH_REJECT;
+ } else {
+ if (!strcmp("*", allow_str))
+ return RPCSVC_AUTH_ACCEPT;
+ }
+
+ /* addr-namelookup check */
+ if (svc->addr_namelookup == _gf_true) {
+ ret = gf_get_hostname_from_ip(ipaddr, &hostname);
+ if (ret) {
+ if (hostname)
+ GF_FREE(hostname);
+ /* failed to get hostname, but hostname auth
+ * is enabled, so authentication will not be
+ * 100% correct. reject mounts
+ */
+ return RPCSVC_AUTH_REJECT;
}
+ }
- sockaddr = (struct sockaddr *) &sastorage;
- switch (sockaddr->sa_family) {
-
- case AF_INET:
- case AF_INET6:
- tmp = strrchr (clstr, ':');
- if (tmp)
- *tmp = '\0';
- break;
- }
+ accept = rpcsvc_transport_peer_check_allow(options, volname, ipaddr,
+ hostname);
- aret = rpcsvc_transport_peer_check_allow (options, volname, clstr);
- rjret = rpcsvc_transport_peer_check_reject (options, volname, clstr);
+ reject = rpcsvc_transport_peer_check_reject(options, volname, ipaddr,
+ hostname);
- ret = rpcsvc_combine_allow_reject_volume_check (aret, rjret);
-err:
- return ret;
+ if (hostname)
+ GF_FREE(hostname);
+ return rpcsvc_combine_allow_reject_volume_check(accept, reject);
}
-
int
-rpcsvc_transport_check_volume_specific (dict_t *options, char *volname,
- rpc_transport_t *trans)
-{
- int namechk = RPCSVC_AUTH_REJECT;
- int addrchk = RPCSVC_AUTH_REJECT;
- gf_boolean_t namelookup = _gf_false;
- char *namestr = NULL;
- int ret = 0;
-
- if ((!options) || (!volname) || (!trans))
- return RPCSVC_AUTH_REJECT;
-
- /* Disabled by default */
- if ((dict_get (options, "rpc-auth.addr.namelookup"))) {
- ret = dict_get_str (options, "rpc-auth.addr.namelookup"
- , &namestr);
- if (ret == 0)
- ret = gf_string2boolean (namestr, &namelookup);
- }
-
- /* We need two separate checks because the rules with addresses in them
- * can be network addresses which can be general and names can be
- * specific which will over-ride the network address rules.
- */
- if (namelookup)
- namechk = rpcsvc_transport_peer_check_name (options, volname,
- trans);
- addrchk = rpcsvc_transport_peer_check_addr (options, volname, trans);
-
- if (namelookup)
- ret = rpcsvc_combine_gen_spec_addr_checks (addrchk,
- namechk);
- else
- ret = addrchk;
+rpcsvc_transport_privport_check(rpcsvc_t *svc, char *volname, uint16_t port)
+{
+ int ret = RPCSVC_AUTH_REJECT;
+ char *srchstr = NULL;
+ char *valstr = NULL;
+ gf_boolean_t insecure = _gf_false;
+ if ((!svc) || (!volname))
return ret;
-}
+ gf_log(GF_RPCSVC, GF_LOG_TRACE, "Client port: %d", (int)port);
+ /* If the port is already a privileged one, don't bother with checking
+ * options.
+ */
+ if (port <= 1024) {
+ ret = RPCSVC_AUTH_ACCEPT;
+ goto err;
+ }
+
+ /* Disabled by default */
+ ret = gf_asprintf(&srchstr, "rpc-auth.ports.%s.insecure", volname);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ ret = RPCSVC_AUTH_REJECT;
+ goto err;
+ }
+
+ ret = dict_get_str(svc->options, srchstr, &valstr);
+ if (ret) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Failed to"
+ " read rpc-auth.ports.insecure value");
+ goto err;
+ }
+
+ ret = gf_string2boolean(valstr, &insecure);
+ if (ret) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR,
+ "Failed to"
+ " convert rpc-auth.ports.insecure value");
+ goto err;
+ }
+
+ ret = insecure ? RPCSVC_AUTH_ACCEPT : RPCSVC_AUTH_REJECT;
+
+ if (ret == RPCSVC_AUTH_ACCEPT)
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG, "Unprivileged port allowed");
+ else
+ gf_log(GF_RPCSVC, GF_LOG_DEBUG,
+ "Unprivileged port not"
+ " allowed");
-int
-rpcsvc_transport_check_volume_general (dict_t *options, rpc_transport_t *trans)
-{
- int addrchk = RPCSVC_AUTH_REJECT;
- int namechk = RPCSVC_AUTH_REJECT;
- gf_boolean_t namelookup = _gf_false;
- char *namestr = NULL;
- int ret = 0;
-
- if ((!options) || (!trans))
- return RPCSVC_AUTH_REJECT;
-
- /* Disabled by default */
- if ((dict_get (options, "rpc-auth.addr.namelookup"))) {
- ret = dict_get_str (options, "rpc-auth.addr.namelookup"
- , &namestr);
- if (ret == 0)
- ret = gf_string2boolean (namestr, &namelookup);
- }
-
- /* We need two separate checks because the rules with addresses in them
- * can be network addresses which can be general and names can be
- * specific which will over-ride the network address rules.
- */
- if (namelookup)
- namechk = rpcsvc_transport_peer_check_name (options, NULL, trans);
- addrchk = rpcsvc_transport_peer_check_addr (options, NULL, trans);
-
- if (namelookup)
- ret = rpcsvc_combine_gen_spec_addr_checks (addrchk, namechk);
- else
- ret = addrchk;
+err:
+ if (srchstr)
+ GF_FREE(srchstr);
- return ret;
+ return ret;
}
-int
-rpcsvc_transport_peer_check (dict_t *options, char *volname,
- rpc_transport_t *trans)
+char *
+rpcsvc_volume_allowed(dict_t *options, char *volname)
{
- int general_chk = RPCSVC_AUTH_REJECT;
- int specific_chk = RPCSVC_AUTH_REJECT;
+ char globalrule[] = "rpc-auth.addr.allow";
+ char *srchstr = NULL;
+ char *addrstr = NULL;
+ int ret = -1;
- if ((!options) || (!volname) || (!trans))
- return RPCSVC_AUTH_REJECT;
+ if ((!options) || (!volname))
+ return NULL;
- general_chk = rpcsvc_transport_check_volume_general (options, trans);
- specific_chk = rpcsvc_transport_check_volume_specific (options, volname,
- trans);
+ ret = gf_asprintf(&srchstr, "rpc-auth.addr.%s.allow", volname);
+ if (ret == -1) {
+ gf_log(GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
+ goto out;
+ }
- return rpcsvc_combine_gen_spec_volume_checks (general_chk,
- specific_chk);
-}
+ if (!dict_get(options, srchstr))
+ ret = dict_get_str(options, globalrule, &addrstr);
+ else
+ ret = dict_get_str(options, srchstr, &addrstr);
+out:
+ GF_FREE(srchstr);
-int
-rpcsvc_transport_privport_check (rpcsvc_t *svc, char *volname,
- rpc_transport_t *trans)
-{
- struct sockaddr_storage sastorage = {0,};
- struct sockaddr_in *sa = NULL;
- int ret = RPCSVC_AUTH_REJECT;
- socklen_t sasize = sizeof (sa);
- char *srchstr = NULL;
- char *valstr = NULL;
- int globalinsecure = RPCSVC_AUTH_REJECT;
- int exportinsecure = RPCSVC_AUTH_DONTCARE;
- uint16_t port = 0;
- gf_boolean_t insecure = _gf_false;
-
- if ((!svc) || (!volname) || (!trans))
- return ret;
-
- sa = (struct sockaddr_in*) &sastorage;
- ret = rpcsvc_transport_peeraddr (trans, NULL, 0, &sastorage,
- sasize);
- if (ret != 0) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to get peer addr: %s",
- gai_strerror (ret));
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
+ return addrstr;
+}
- port = ntohs (sa->sin_port);
- gf_log (GF_RPCSVC, GF_LOG_TRACE, "Client port: %d", (int)port);
- /* If the port is already a privileged one, dont bother with checking
- * options.
+/*
+ * rpcsvc_match_subnet_v4() takes subnetwork address pattern and checks
+ * if the target IPv4 address has the same network address with the help
+ * of network mask.
+ *
+ * Returns 0 for SUCCESS and -1 otherwise.
+ *
+ * NB: Validation of subnetwork address pattern is not required
+ * as it's already being done at the time of CLI SET.
+ */
+static int
+rpcsvc_match_subnet_v4(const char *addrtok, const char *ipaddr)
+{
+ char *slash = NULL;
+ char *netaddr = NULL;
+ int ret = -1;
+ uint32_t prefixlen = 0;
+ uint32_t shift = 0;
+ struct sockaddr_in sin1 = {
+ 0,
+ };
+ struct sockaddr_in sin2 = {
+ 0,
+ };
+ struct sockaddr_in mask = {
+ 0,
+ };
+
+ /* Copy the input */
+ netaddr = gf_strdup(addrtok);
+ if (netaddr == NULL) /* ENOMEM */
+ goto out;
+
+ /* Find the network socket addr of target */
+ if (inet_pton(AF_INET, ipaddr, &sin1.sin_addr) == 0)
+ goto out;
+
+ slash = strchr(netaddr, '/');
+ if (slash) {
+ *slash = '\0';
+ /*
+ * Find the IPv4 network mask in network byte order.
+ * IMP: String slash+1 is already validated, it can't have value
+ * more than IPv4_ADDR_SIZE (32).
*/
- if (port <= 1024) {
- ret = RPCSVC_AUTH_ACCEPT;
- goto err;
- }
+ prefixlen = (uint32_t)atoi(slash + 1);
+ if (prefixlen > 31)
+ goto out;
+ } else {
+ /* if there is no '/', then this function wouldn't be called */
+ goto out;
+ }
+
+ /* Need to do this after removing '/', as inet_pton() take IP address as
+ * second argument. Once we get sin2, then comparison is oranges to orange
+ */
+ if (inet_pton(AF_INET, netaddr, &sin2.sin_addr) == 0)
+ goto out;
+
+ shift = IPv4_ADDR_SIZE - prefixlen;
+ mask.sin_addr.s_addr = htonl((uint32_t)~0 << shift);
+
+ if (mask_match(sin1.sin_addr.s_addr, sin2.sin_addr.s_addr,
+ mask.sin_addr.s_addr)) {
+ ret = 0; /* SUCCESS */
+ }
+out:
+ GF_FREE(netaddr);
+ return ret;
+}
- /* Disabled by default */
- if ((dict_get (svc->options, "rpc-auth.ports.insecure"))) {
- ret = dict_get_str (svc->options, "rpc-auth.ports.insecure"
- , &srchstr);
- if (ret == 0) {
- ret = gf_string2boolean (srchstr, &insecure);
- if (ret == 0) {
- if (insecure == _gf_true)
- globalinsecure = RPCSVC_AUTH_ACCEPT;
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
- " read rpc-auth.ports.insecure value");
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
- " read rpc-auth.ports.insecure value");
- }
+void
+rpcsvc_program_dump(rpcsvc_program_t *prog)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i;
- /* Disabled by default */
- ret = gf_asprintf (&srchstr, "rpc-auth.ports.%s.insecure", volname);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
- ret = RPCSVC_AUTH_REJECT;
- goto err;
- }
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s", prog->progname);
+ gf_proc_dump_add_section("%s", key_prefix);
- if (dict_get (svc->options, srchstr)) {
- ret = dict_get_str (svc->options, srchstr, &valstr);
- if (ret == 0) {
- ret = gf_string2boolean (valstr, &insecure);
- if (ret == 0) {
- if (insecure == _gf_true)
- exportinsecure = RPCSVC_AUTH_ACCEPT;
- else
- exportinsecure = RPCSVC_AUTH_REJECT;
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
- " read rpc-auth.ports.insecure value");
- } else
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "Failed to"
- " read rpc-auth.ports.insecure value");
- }
+ gf_proc_dump_build_key(key, key_prefix, "program-number");
+ gf_proc_dump_write(key, "%d", prog->prognum);
- ret = rpcsvc_combine_gen_spec_volume_checks (globalinsecure,
- exportinsecure);
- if (ret == RPCSVC_AUTH_ACCEPT)
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Unprivileged port allowed");
- else
- gf_log (GF_RPCSVC, GF_LOG_DEBUG, "Unprivileged port not"
- " allowed");
+ gf_proc_dump_build_key(key, key_prefix, "program-version");
+ gf_proc_dump_write(key, "%d", prog->progver);
-err:
- if (srchstr)
- GF_FREE (srchstr);
+ strncat(key_prefix, ".latency",
+ sizeof(key_prefix) - strlen(key_prefix) - 1);
- return ret;
+ for (i = 0; i < prog->numactors; i++) {
+ gf_proc_dump_build_key(key, key_prefix, "%s", prog->actors[i].procname);
+ gf_latency_statedump_and_reset(key, &prog->latencies[i]);
+ }
}
-
-char *
-rpcsvc_volume_allowed (dict_t *options, char *volname)
+void
+rpcsvc_statedump(rpcsvc_t *svc)
{
- char globalrule[] = "rpc-auth.addr.allow";
- char *srchstr = NULL;
- char *addrstr = NULL;
- int ret = -1;
-
- if ((!options) || (!volname))
- return NULL;
-
- ret = gf_asprintf (&srchstr, "rpc-auth.addr.%s.allow", volname);
- if (ret == -1) {
- gf_log (GF_RPCSVC, GF_LOG_ERROR, "asprintf failed");
- goto out;
+ rpcsvc_program_t *prog = NULL;
+ int ret = 0;
+ ret = pthread_rwlock_tryrdlock(&svc->rpclock);
+ if (ret)
+ return;
+ {
+ list_for_each_entry(prog, &svc->programs, program)
+ {
+ rpcsvc_program_dump(prog);
}
-
- if (!dict_get (options, srchstr))
- ret = dict_get_str (options, globalrule, &addrstr);
- else
- ret = dict_get_str (options, srchstr, &addrstr);
-
-out:
- if (srchstr)
- GF_FREE (srchstr);
-
- return addrstr;
+ }
+ pthread_rwlock_unlock(&svc->rpclock);
}
-
-rpcsvc_actor_t gluster_dump_actors[] = {
- [GF_DUMP_NULL] = {"NULL", GF_DUMP_NULL, NULL, NULL, NULL, 0},
- [GF_DUMP_DUMP] = {"DUMP", GF_DUMP_DUMP, rpcsvc_dump, NULL, NULL, 0},
- [GF_DUMP_MAXVALUE] = {"MAXVALUE", GF_DUMP_MAXVALUE, NULL, NULL, NULL, 0},
+static rpcsvc_actor_t gluster_dump_actors[GF_DUMP_MAXVALUE] = {
+ [GF_DUMP_NULL] = {"NULL", NULL, NULL, GF_DUMP_NULL, DRC_NA, 0},
+ [GF_DUMP_DUMP] = {"DUMP", rpcsvc_dump, NULL, GF_DUMP_DUMP, DRC_NA, 0},
+ [GF_DUMP_PING] = {"PING", rpcsvc_ping, NULL, GF_DUMP_PING, DRC_NA, 0},
};
-
-struct rpcsvc_program gluster_dump_prog = {
- .progname = "GF-DUMP",
- .prognum = GLUSTER_DUMP_PROGRAM,
- .progver = GLUSTER_DUMP_VERSION,
- .actors = gluster_dump_actors,
- .numactors = 2,
+static struct rpcsvc_program gluster_dump_prog = {
+ .progname = "GF-DUMP",
+ .prognum = GLUSTER_DUMP_PROGRAM,
+ .progver = GLUSTER_DUMP_VERSION,
+ .actors = gluster_dump_actors,
+ .numactors = GF_DUMP_MAXVALUE,
};
diff --git a/rpc/rpc-lib/src/rpcsvc.h b/rpc/rpc-lib/src/rpcsvc.h
index 847ec1b336e..7b3030926c8 100644
--- a/rpc/rpc-lib/src/rpcsvc.h
+++ b/rpc/rpc-lib/src/rpcsvc.h
@@ -11,47 +11,57 @@
#ifndef _RPCSVC_H
#define _RPCSVC_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "event.h"
+#include <glusterfs/gf-event.h>
#include "rpc-transport.h"
-#include "logging.h"
-#include "dict.h"
-#include "mem-pool.h"
-#include "list.h"
-#include "iobuf.h"
+#include <glusterfs/dict.h>
#include "xdr-rpc.h"
-#include "glusterfs.h"
-#include "xlator.h"
#include "rpcsvc-common.h"
#include <pthread.h>
#include <sys/uio.h>
#include <inttypes.h>
#include <rpc/rpc_msg.h>
-#include "compat.h"
+#include <glusterfs/compat.h>
+#include <glusterfs/client_t.h>
#ifndef MAX_IOVEC
#define MAX_IOVEC 16
#endif
-#define GF_RPCSVC "rpc-service"
+/* TODO: we should store prognums at a centralized location to avoid conflict
+ or use a robust random number generator to avoid conflicts
+*/
+
+#define RPCSVC_INFRA_PROGRAM 7712846 /* random number */
+
+typedef enum {
+ RPCSVC_PROC_EVENT_THREAD_DEATH = 0,
+} rpcsvc_infra_procnum_t;
+
+#define RPCSVC_DEFAULT_OUTSTANDING_RPC_LIMIT \
+ 64 /* Default for protocol/server */
+#define RPCSVC_DEF_NFS_OUTSTANDING_RPC_LIMIT 16 /* Default for nfs/server */
+#define RPCSVC_MAX_OUTSTANDING_RPC_LIMIT 65536
+#define RPCSVC_MIN_OUTSTANDING_RPC_LIMIT 0 /* No limit i.e. Unlimited */
+
+#define GF_RPCSVC "rpc-service"
#define RPCSVC_THREAD_STACK_SIZE ((size_t)(1024 * GF_UNIT_KB))
-#define RPCSVC_FRAGHDR_SIZE 4 /* 4-byte RPC fragment header size */
-#define RPCSVC_DEFAULT_LISTEN_PORT GF_DEFAULT_BASE_PORT
-#define RPCSVC_DEFAULT_MEMFACTOR 8
-#define RPCSVC_EVENTPOOL_SIZE_MULT 1024
-#define RPCSVC_POOLCOUNT_MULT 64
-#define RPCSVC_CONN_READ (128 * GF_UNIT_KB)
-#define RPCSVC_PAGE_SIZE (128 * GF_UNIT_KB)
+#define RPCSVC_FRAGHDR_SIZE 4 /* 4-byte RPC fragment header size */
+#define RPCSVC_DEFAULT_LISTEN_PORT GF_DEFAULT_BASE_PORT
+#define RPCSVC_DEFAULT_MEMFACTOR 8
+#define RPCSVC_EVENTPOOL_SIZE_MULT 1024
+#define RPCSVC_POOLCOUNT_MULT 64
+#define RPCSVC_CONN_READ (128 * GF_UNIT_KB)
+#define RPCSVC_PAGE_SIZE (128 * GF_UNIT_KB)
+#define RPC_ROOT_UID 0
+#define RPC_ROOT_GID 0
+#define RPC_NOBODY_UID 65534
+#define RPC_NOBODY_GID 65534
/* RPC Record States */
-#define RPCSVC_READ_FRAGHDR 1
-#define RPCSVC_READ_FRAG 2
+#define RPCSVC_READ_FRAGHDR 1
+#define RPCSVC_READ_FRAG 2
/* The size in bytes, if crossed by a fragment will be handed over to the
* vectored actor so that it can allocate its buffers the way it wants.
* In our RPC layer, we assume that vectored RPC requests/records are never
@@ -59,21 +69,28 @@
* whether the record should be handled in RPC layer completely or handed to
* the vectored handler.
*/
-#define RPCSVC_VECTORED_FRAGSZ 4096
-#define RPCSVC_VECTOR_READCRED 1003
-#define RPCSVC_VECTOR_READVERFSZ 1004
-#define RPCSVC_VECTOR_READVERF 1005
-#define RPCSVC_VECTOR_IGNORE 1006
-#define RPCSVC_VECTOR_READVEC 1007
-#define RPCSVC_VECTOR_READPROCHDR 1008
-
-#define rpcsvc_record_vectored_baremsg(rs) (((rs)->state == RPCSVC_READ_FRAG) && (rs)->vecstate == 0)
-#define rpcsvc_record_vectored_cred(rs) ((rs)->vecstate == RPCSVC_VECTOR_READCRED)
-#define rpcsvc_record_vectored_verfsz(rs) ((rs)->vecstate == RPCSVC_VECTOR_READVERFSZ)
-#define rpcsvc_record_vectored_verfread(rs) ((rs)->vecstate == RPCSVC_VECTOR_READVERF)
-#define rpcsvc_record_vectored_ignore(rs) ((rs)->vecstate == RPCSVC_VECTOR_IGNORE)
-#define rpcsvc_record_vectored_readvec(rs) ((rs)->vecstate == RPCSVC_VECTOR_READVEC)
-#define rpcsvc_record_vectored_readprochdr(rs) ((rs)->vecstate == RPCSVC_VECTOR_READPROCHDR)
+#define RPCSVC_VECTORED_FRAGSZ 4096
+#define RPCSVC_VECTOR_READCRED 1003
+#define RPCSVC_VECTOR_READVERFSZ 1004
+#define RPCSVC_VECTOR_READVERF 1005
+#define RPCSVC_VECTOR_IGNORE 1006
+#define RPCSVC_VECTOR_READVEC 1007
+#define RPCSVC_VECTOR_READPROCHDR 1008
+
+#define rpcsvc_record_vectored_baremsg(rs) \
+ (((rs)->state == RPCSVC_READ_FRAG) && (rs)->vecstate == 0)
+#define rpcsvc_record_vectored_cred(rs) \
+ ((rs)->vecstate == RPCSVC_VECTOR_READCRED)
+#define rpcsvc_record_vectored_verfsz(rs) \
+ ((rs)->vecstate == RPCSVC_VECTOR_READVERFSZ)
+#define rpcsvc_record_vectored_verfread(rs) \
+ ((rs)->vecstate == RPCSVC_VECTOR_READVERF)
+#define rpcsvc_record_vectored_ignore(rs) \
+ ((rs)->vecstate == RPCSVC_VECTOR_IGNORE)
+#define rpcsvc_record_vectored_readvec(rs) \
+ ((rs)->vecstate == RPCSVC_VECTOR_READVEC)
+#define rpcsvc_record_vectored_readprochdr(rs) \
+ ((rs)->vecstate == RPCSVC_VECTOR_READPROCHDR)
#define rpcsvc_record_vectored(rs) ((rs)->fragsize > RPCSVC_VECTORED_FRAGSZ)
/* Includes bytes up to and including the credential length field. The credlen
* will be followed by @credlen bytes of credential data which will have to be
@@ -81,187 +98,232 @@
* verifier which will also have to be read separately including the 8 bytes of
* verf flavour and verflen.
*/
-#define RPCSVC_BARERPC_MSGSZ 32
-#define rpcsvc_record_readfraghdr(rs) ((rs)->state == RPCSVC_READ_FRAGHDR)
-#define rpcsvc_record_readfrag(rs) ((rs)->state == RPCSVC_READ_FRAG)
+#define RPCSVC_BARERPC_MSGSZ 32
+#define rpcsvc_record_readfraghdr(rs) ((rs)->state == RPCSVC_READ_FRAGHDR)
+#define rpcsvc_record_readfrag(rs) ((rs)->state == RPCSVC_READ_FRAG)
-#define RPCSVC_LOWVERS 2
+#define RPCSVC_LOWVERS 2
#define RPCSVC_HIGHVERS 2
-
#if 0
#error "defined in /usr/include/rpc/auth.h"
-#define AUTH_NONE 0 /* no authentication */
-#define AUTH_NULL 0 /* backward compatibility */
-#define AUTH_SYS 1 /* unix style (uid, gids) */
-#define AUTH_UNIX AUTH_SYS
-#define AUTH_SHORT 2 /* short hand unix style */
-#define AUTH_DES 3 /* des style (encrypted timestamps) */
-#define AUTH_DH AUTH_DES /* Diffie-Hellman (this is DES) */
-#define AUTH_KERB 4 /* kerberos style */
-#endif /* */
+#define AUTH_NONE 0 /* no authentication */
+#define AUTH_NULL 0 /* backward compatibility */
+#define AUTH_SYS 1 /* unix style (uid, gids) */
+#define AUTH_UNIX AUTH_SYS
+#define AUTH_SHORT 2 /* short hand unix style */
+#define AUTH_DES 3 /* des style (encrypted timestamps) */
+#define AUTH_DH AUTH_DES /* Diffie-Hellman (this is DES) */
+#define AUTH_KERB 4 /* kerberos style */
+#endif /* */
typedef struct rpcsvc_program rpcsvc_program_t;
struct rpcsvc_notify_wrapper {
- struct list_head list;
- void *data;
- rpcsvc_notify_t notify;
+ struct list_head list;
+ void *data;
+ rpcsvc_notify_t notify;
};
typedef struct rpcsvc_notify_wrapper rpcsvc_notify_wrapper_t;
-
typedef struct rpcsvc_request rpcsvc_request_t;
typedef struct {
- rpc_transport_t *trans;
- rpcsvc_t *svc;
- /* FIXME: remove address from this structure. Instead use get_myaddr
- * interface implemented by individual transports.
- */
- struct sockaddr_storage sa;
- struct list_head list;
+ rpc_transport_t *trans;
+ rpcsvc_t *svc;
+ /* FIXME: remove address from this structure. Instead use get_myaddr
+ * interface implemented by individual transports.
+ */
+ struct sockaddr_storage sa;
+ struct list_head list;
} rpcsvc_listener_t;
struct rpcsvc_config {
- int max_block_size;
+ int max_block_size;
};
-typedef struct rpcsvc_auth_data {
- int flavour;
- int datalen;
- char authdata[GF_MAX_AUTH_BYTES];
-} rpcsvc_auth_data_t;
+#define rpcsvc_auth_flavour(au) ((au).flavour)
-#define rpcsvc_auth_flavour(au) ((au).flavour)
+typedef struct drc_client drc_client_t;
+typedef struct drc_cached_op drc_cached_op_t;
/* The container for the RPC call handed up to an actor.
* Dynamically allocated. Lives till the call reply is completely
* transmitted.
* */
struct rpcsvc_request {
- /* connection over which this request came. */
- rpc_transport_t *trans;
-
- rpcsvc_t *svc;
-
- rpcsvc_program_t *prog;
-
- /* The identifier for the call from client.
- * Needed to pair the reply with the call.
- */
- uint32_t xid;
-
- int prognum;
-
- int progver;
-
- int procnum;
-
- int type;
-
- /* Uid and gid filled by the rpc-auth module during the authentication
- * phase.
- */
- uid_t uid;
- gid_t gid;
- pid_t pid;
-
- gf_lkowner_t lk_owner;
- uint64_t gfs_id;
+ /* connection over which this request came. */
+ rpc_transport_t *trans;
- /* Might want to move this to AUTH_UNIX specific state since this array
- * is not available for every authentication scheme.
- */
- gid_t auxgids[GF_MAX_AUX_GROUPS];
- int auxgidcount;
+ rpcsvc_t *svc;
+
+ rpcsvc_program_t *prog;
+ int prognum;
+
+ int progver;
- /* The RPC message payload, contains the data required
- * by the program actors. This is the buffer that will need to
- * be de-xdred by the actor.
- */
- struct iovec msg[MAX_IOVEC];
- int count;
-
- struct iobref *iobref;
-
- /* Status of the RPC call, whether it was accepted or denied. */
- int rpc_status;
-
- /* In case, the call was denied, the RPC error is stored here
- * till the reply is sent.
- */
- int rpc_err;
-
- /* In case the failure happened because of an authentication problem
- * , this value needs to be assigned the correct auth error number.
- */
- int auth_err;
-
- /* There can be cases of RPC requests where the reply needs to
- * be built from multiple sources. E.g. where even the NFS reply
- * can contain a payload, as in the NFSv3 read reply. Here the RPC header
- * ,NFS header and the read data are brought together separately from
- * different buffers, so we need to stage the buffers temporarily here
- * before all of them get added to the connection's transmission list.
- */
- struct list_head txlist;
-
- /* While the reply record is being built, this variable keeps track
- * of how many bytes have been added to the record.
- */
- size_t payloadsize;
-
- /* The credentials extracted from the rpc request */
- rpcsvc_auth_data_t cred;
-
- /* The verified extracted from the rpc request. In request side
- * processing this contains the verifier sent by the client, on reply
- * side processing, it is filled with the verified that will be
- * sent to the client.
- */
- rpcsvc_auth_data_t verf;
-
- /* Container for a RPC program wanting to store a temp
- * request-specific item.
- */
- void *private;
-
- /* Container for transport to store request-specific item */
- void *trans_private;
+ int procnum;
+
+ int type;
+
+ /* Uid and gid filled by the rpc-auth module during the authentication
+ * phase.
+ */
+ uid_t uid;
+ gid_t gid;
+ pid_t pid;
+
+ gf_lkowner_t lk_owner;
+ uint64_t gfs_id;
+
+ /* Might want to move this to AUTH_UNIX specific state since this array
+ * is not available for every authentication scheme.
+ */
+ gid_t *auxgids;
+ gid_t auxgidsmall[SMALL_GROUP_COUNT];
+ gid_t *auxgidlarge;
+ int auxgidcount;
+
+ /* The RPC message payload, contains the data required
+ * by the program actors. This is the buffer that will need to
+ * be de-xdred by the actor.
+ */
+ int count;
+ struct iovec msg[MAX_IOVEC];
+
+ struct iobref *iobref;
+
+ /* There can be cases of RPC requests where the reply needs to
+ * be built from multiple sources. E.g. where even the NFS reply
+ * can contain a payload, as in the NFSv3 read reply. Here the RPC header
+ * ,NFS header and the read data are brought together separately from
+ * different buffers, so we need to stage the buffers temporarily here
+ * before all of them get added to the connection's transmission list.
+ */
+ struct list_head txlist;
+
+ /* While the reply record is being built, this variable keeps track
+ * of how many bytes have been added to the record.
+ */
+ size_t payloadsize;
+
+ /* The credentials extracted from the rpc request */
+ client_auth_data_t cred;
+
+ /* The verified extracted from the rpc request. In request side
+ * processing this contains the verifier sent by the client, on reply
+ * side processing, it is filled with the verified that will be
+ * sent to the client.
+ */
+ client_auth_data_t verf;
+ /* Container for a RPC program wanting to store a temp
+ * request-specific item.
+ */
+ void *private;
+
+ /* Container for transport to store request-specific item */
+ void *trans_private;
+
+ /* pointer to cached reply for use in DRC */
+ drc_cached_op_t *reply;
+
+ /* request queue in rpcsvc */
+ struct list_head request_list;
+
+ /* Status of the RPC call, whether it was accepted or denied. */
+ int rpc_status;
+
+ /* In case, the call was denied, the RPC error is stored here
+ * till the reply is sent.
+ */
+ int rpc_err;
+
+ /* In case the failure happened because of an authentication problem
+ * , this value needs to be assigned the correct auth error number.
+ */
+ int auth_err;
+
+ /* Things passed to rpc layer from client */
+
+ /* @flags: Can be used for binary data passed in xdata to be
+ passed here instead */
+ unsigned int flags;
+
+ /* ctime: origin of time on the client side, ideally this is
+ the one we should consider for time */
+ struct timespec ctime;
+
+ /* The identifier for the call from client.
+ * Needed to pair the reply with the call.
+ */
+ uint32_t xid;
+
+ /* Execute this request's actor function in ownthread of program?*/
+ gf_boolean_t ownthread;
+
+ gf_boolean_t synctask;
+ struct timespec begin; /*req handling start time*/
+ struct timespec end; /*req handling end time*/
};
#define rpcsvc_request_program(req) ((rpcsvc_program_t *)((req)->prog))
#define rpcsvc_request_procnum(req) (((req)->procnum))
-#define rpcsvc_request_program_private(req) (((rpcsvc_program_t *)((req)->prog))->private)
-#define rpcsvc_request_accepted(req) ((req)->rpc_status == MSG_ACCEPTED)
+#define rpcsvc_request_program_private(req) \
+ (((rpcsvc_program_t *)((req)->prog))->private)
+#define rpcsvc_request_accepted(req) ((req)->rpc_status == MSG_ACCEPTED)
#define rpcsvc_request_accepted_success(req) ((req)->rpc_err == SUCCESS)
-#define rpcsvc_request_uid(req) ((req)->uid)
-#define rpcsvc_request_gid(req) ((req)->gid)
#define rpcsvc_request_prog_minauth(req) (rpcsvc_request_program(req)->min_auth)
#define rpcsvc_request_cred_flavour(req) (rpcsvc_auth_flavour(req->cred))
#define rpcsvc_request_verf_flavour(req) (rpcsvc_auth_flavour(req->verf))
-#define rpcsvc_request_service(req) ((req)->svc)
-#define rpcsvc_request_uid(req) ((req)->uid)
-#define rpcsvc_request_gid(req) ((req)->gid)
-#define rpcsvc_request_private(req) ((req)->private)
-#define rpcsvc_request_xid(req) ((req)->xid)
-#define rpcsvc_request_set_private(req,prv) (req)->private = (void *)(prv)
-#define rpcsvc_request_iobref_ref(req) (iobref_ref ((req)->iobref))
-#define rpcsvc_request_record_ref(req) (iobuf_ref ((req)->recordiob))
-#define rpcsvc_request_record_unref(req) (iobuf_unref ((req)->recordiob))
-#define rpcsvc_request_record_iob(req) ((req)->recordiob)
-#define rpcsvc_request_set_vecstate(req, state) ((req)->vecstate = state)
+#define rpcsvc_request_service(req) ((req)->svc)
+#define rpcsvc_request_uid(req) ((req)->uid)
+#define rpcsvc_request_gid(req) ((req)->gid)
+#define rpcsvc_request_private(req) ((req)->private)
+#define rpcsvc_request_xid(req) ((req)->xid)
+#define rpcsvc_request_set_private(req, prv) (req)->private = (void *)(prv)
+#define rpcsvc_request_iobref_ref(req) (iobref_ref((req)->iobref))
+#define rpcsvc_request_record_ref(req) (iobuf_ref((req)->recordiob))
+#define rpcsvc_request_record_unref(req) (iobuf_unref((req)->recordiob))
+#define rpcsvc_request_record_iob(req) ((req)->recordiob)
+#define rpcsvc_request_set_vecstate(req, state) ((req)->vecstate = state)
#define rpcsvc_request_vecstate(req) ((req)->vecstate)
#define rpcsvc_request_transport(req) ((req)->trans)
#define rpcsvc_request_transport_ref(req) (rpc_transport_ref((req)->trans))
-
-
-#define RPCSVC_ACTOR_SUCCESS 0
-#define RPCSVC_ACTOR_ERROR (-1)
-#define RPCSVC_ACTOR_IGNORE (-2)
+#define RPC_AUTH_ROOT_SQUASH(req) \
+ do { \
+ int gidcount = 0; \
+ if (req->svc->root_squash) { \
+ if (req->uid == RPC_ROOT_UID) \
+ req->uid = req->svc->anonuid; \
+ if (req->gid == RPC_ROOT_GID) \
+ req->gid = req->svc->anongid; \
+ \
+ for (gidcount = 0; gidcount < req->auxgidcount; ++gidcount) { \
+ if (!req->auxgids[gidcount]) \
+ req->auxgids[gidcount] = req->svc->anongid; \
+ } \
+ } \
+ } while (0);
+
+#define RPC_AUTH_ALL_SQUASH(req) \
+ do { \
+ int gidcount = 0; \
+ if (req->svc->all_squash) { \
+ req->uid = req->svc->anonuid; \
+ req->gid = req->svc->anongid; \
+ \
+ for (gidcount = 0; gidcount < req->auxgidcount; ++gidcount) { \
+ if (!req->auxgids[gidcount]) \
+ req->auxgids[gidcount] = req->svc->anongid; \
+ } \
+ } \
+ } while (0);
+
+#define RPCSVC_ACTOR_SUCCESS 0
+#define RPCSVC_ACTOR_ERROR (-1)
+#define RPCSVC_ACTOR_IGNORE (-2)
/* Functor for every type of protocol actor
* must be defined like this.
@@ -275,118 +337,146 @@ struct rpcsvc_request {
* should return RPCSVC_ACTOR_ERROR.
*
*/
-typedef int (*rpcsvc_actor) (rpcsvc_request_t *req);
-typedef int (*rpcsvc_vector_actor) (rpcsvc_request_t *req, struct iovec *vec,
- int count, struct iobref *iobref);
-typedef int (*rpcsvc_vector_sizer) (int state, ssize_t *readsize,
- char *base_addr, char *curr_addr);
+typedef int (*rpcsvc_actor)(rpcsvc_request_t *req);
+typedef int (*rpcsvc_vector_sizer)(int state, ssize_t *readsize,
+ char *base_addr, char *curr_addr);
/* Every protocol actor will also need to specify the function the RPC layer
* will use to serialize or encode the message into XDR format just before
* transmitting on the connection.
*/
-typedef void *(*rpcsvc_encode_reply) (void *msg);
+typedef void *(*rpcsvc_encode_reply)(void *msg);
/* Once the reply has been transmitted, the message will have to be de-allocated
* , so every actor will need to provide a function that deallocates the message
* it had allocated as a response.
*/
-typedef void (*rpcsvc_deallocate_reply) (void *msg);
-
+typedef void (*rpcsvc_deallocate_reply)(void *msg);
-#define RPCSVC_NAME_MAX 32
+#define RPCSVC_NAME_MAX 32
/* The descriptor for each procedure/actor that runs
* over the RPC service.
*/
typedef struct rpcsvc_actor_desc {
- char procname[RPCSVC_NAME_MAX];
- int procnum;
- rpcsvc_actor actor;
-
- /* Handler for cases where the RPC requests fragments are large enough
- * to benefit from being decoded into aligned memory addresses. While
- * decoding the request in a non-vectored manner, due to the nature of
- * the XDR scheme, RPC cannot guarantee memory aligned addresses for
- * the resulting message-specific structures. Allowing a specialized
- * handler for letting the RPC program read the data from the network
- * directly into its aligned buffers.
- */
- rpcsvc_vector_actor vector_actor;
- rpcsvc_vector_sizer vector_sizer;
-
- /* Can actor be ran on behalf an unprivileged requestor? */
- gf_boolean_t unprivileged;
+ char procname[RPCSVC_NAME_MAX];
+ rpcsvc_actor actor;
+
+ /* Handler for cases where the RPC requests fragments are large enough
+ * to benefit from being decoded into aligned memory addresses. While
+ * decoding the request in a non-vectored manner, due to the nature of
+ * the XDR scheme, RPC cannot guarantee memory aligned addresses for
+ * the resulting message-specific structures. Allowing a specialized
+ * handler for letting the RPC program read the data from the network
+ * directly into its aligned buffers.
+ */
+ rpcsvc_vector_sizer vector_sizer;
+
+ int procnum;
+
+ /* Can actor be ran on behalf an unprivileged requestor? */
+ drc_op_type_t op_type;
+ gf_boolean_t unprivileged;
} rpcsvc_actor_t;
+typedef struct rpcsvc_request_queue {
+ struct list_head request_queue;
+ pthread_mutex_t queue_lock;
+ pthread_cond_t queue_cond;
+ pthread_t thread;
+ struct rpcsvc_program *program;
+ int gen;
+ gf_boolean_t waiting;
+} rpcsvc_request_queue_t;
+
/* Describes a program and its version along with the function pointers
* required to handle the procedures/actors of each program/version.
* Never changed ever by any thread so no need for a lock.
*/
struct rpcsvc_program {
- char progname[RPCSVC_NAME_MAX];
- int prognum;
- int progver;
- /* FIXME */
- dict_t *options; /* An opaque dictionary
- * populated by the program
- * (probably from xl->options)
- * which contain enough
- * information for transport to
- * initialize. As a part of
- * cleanup, the members of
- * options which are of interest
- * to transport should be put
- * into a structure for better
- * readability and structure
- * should replace options member
- * here.
- */
- uint16_t progport; /* Registered with portmap */
+ char progname[RPCSVC_NAME_MAX];
+ int prognum;
+ int progver;
+ /* FIXME */
+ dict_t *options; /* An opaque dictionary
+ * populated by the program
+ * (probably from xl->options)
+ * which contain enough
+ * information for transport to
+ * initialize. As a part of
+ * cleanup, the members of
+ * options which are of interest
+ * to transport should be put
+ * into a structure for better
+ * readability and structure
+ * should replace options member
+ * here.
+ */
#if 0
int progaddrfamily; /* AF_INET or AF_INET6 */
char *proghost; /* Bind host, can be NULL */
#endif
- rpcsvc_actor_t *actors; /* All procedure handlers */
- int numactors; /* Num actors in actor array */
- int proghighvers; /* Highest ver for program
- supported by the system. */
- int proglowvers; /* Lowest ver */
-
- /* Program specific state handed to actors */
- void *private;
-
-
- /* This upcall is provided by the program during registration.
- * It is used to notify the program about events like connection being
- * destroyed etc. The rpc program may take appropriate actions, for eg.,
- * in the case of connection being destroyed, it should cleanup its
- * state stored in the connection.
- */
- rpcsvc_notify_t notify;
-
- /* An integer that identifies the min auth strength that is required
- * by this protocol, for eg. MOUNT3 needs AUTH_UNIX at least.
- * See RFC 1813, Section 5.2.1.
- */
- int min_auth;
-
- /* list member to link to list of registered services with rpcsvc */
- struct list_head program;
+ rpcsvc_actor_t *actors; /* All procedure handlers */
+ int numactors; /* Num actors in actor array */
+ int proghighvers; /* Highest ver for program
+ supported by the system. */
+ /* Program specific state handed to actors */
+ void *private;
+ gf_latency_t *latencies; /*Tracks latency statistics for the rpc call*/
+
+ /* This upcall is provided by the program during registration.
+ * It is used to notify the program about events like connection being
+ * destroyed etc. The rpc program may take appropriate actions, for eg.,
+ * in the case of connection being destroyed, it should cleanup its
+ * state stored in the connection.
+ */
+ rpcsvc_notify_t notify;
+
+ int proglowvers; /* Lowest ver */
+
+ /* An integer that identifies the min auth strength that is required
+ * by this protocol, for eg. MOUNT3 needs AUTH_UNIX at least.
+ * See RFC 1813, Section 5.2.1.
+ */
+ int min_auth;
+
+ /* list member to link to list of registered services with rpcsvc */
+ struct list_head program;
+ rpcsvc_request_queue_t request_queue[EVENT_MAX_THREADS];
+ pthread_mutex_t thr_lock;
+ pthread_cond_t thr_cond;
+ int threadcount;
+ int thr_queue;
+ pthread_key_t req_queue_key;
+
+ /* eventthreadcount is just a readonly copy of the actual value
+ * owned by the event sub-system
+ * It is used to control the scaling of rpcsvc_request_handler threads
+ */
+ int eventthreadcount;
+ uint16_t progport; /* Registered with portmap */
+ /* Execute actor function in program's own thread? This will reduce */
+ /* the workload on poller threads */
+ gf_boolean_t ownthread;
+ gf_boolean_t alive;
+
+ gf_boolean_t synctask;
+ unsigned long request_queue_status[EVENT_MAX_THREADS / __BITS_PER_LONG];
};
typedef struct rpcsvc_cbk_program {
- char *progname;
- int prognum;
- int progver;
+ char *progname;
+ int prognum;
+ int progver;
} rpcsvc_cbk_program_t;
/* All users of RPC services should use this API to register their
* procedure handlers.
*/
extern int
-rpcsvc_program_register (rpcsvc_t *svc, rpcsvc_program_t *program);
+rpcsvc_program_register(rpcsvc_t *svc, rpcsvc_program_t *program,
+ gf_boolean_t add_to_head);
extern int
-rpcsvc_program_unregister (rpcsvc_t *svc, rpcsvc_program_t *program);
+rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program);
/* This will create and add a listener to listener pool. Programs can
* use any of the listener in this pool. A single listener can be used by
@@ -397,127 +487,150 @@ rpcsvc_program_unregister (rpcsvc_t *svc, rpcsvc_program_t *program);
*/
/* FIXME: can multiple programs registered on same port? */
extern int32_t
-rpcsvc_create_listeners (rpcsvc_t *svc, dict_t *options, char *name);
+rpcsvc_create_listeners(rpcsvc_t *svc, dict_t *options, char *name);
void
-rpcsvc_listener_destroy (rpcsvc_listener_t *listener);
+rpcsvc_listener_destroy(rpcsvc_listener_t *listener);
extern int
-rpcsvc_program_register_portmap (rpcsvc_program_t *newprog, uint32_t port);
+rpcsvc_program_register_portmap(rpcsvc_program_t *newprog, uint32_t port);
+#ifdef IPV6_DEFAULT
+extern int
+rpcsvc_program_register_rpcbind6(rpcsvc_program_t *newprog, uint32_t port);
extern int
-rpcsvc_register_portmap_enabled (rpcsvc_t *svc);
+rpcsvc_program_unregister_rpcbind6(rpcsvc_program_t *newprog);
+#endif
+
+extern int
+rpcsvc_program_unregister_portmap(rpcsvc_program_t *newprog);
+
+extern int
+rpcsvc_register_portmap_enabled(rpcsvc_t *svc);
/* Inits the global RPC service data structures.
* Called in main.
*/
extern rpcsvc_t *
-rpcsvc_init (xlator_t *xl, glusterfs_ctx_t *ctx, dict_t *options,
- uint32_t poolcount);
+rpcsvc_init(xlator_t *xl, glusterfs_ctx_t *ctx, dict_t *options,
+ uint32_t poolcount);
+
+extern int
+rpcsvc_reconfigure_options(rpcsvc_t *svc, dict_t *options);
int
-rpcsvc_register_notify (rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata);
+rpcsvc_register_notify(rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata);
/* unregister a notification callback @notify with data @mydata from svc.
* returns the number of notification callbacks unregistered.
*/
int
-rpcsvc_unregister_notify (rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata);
+rpcsvc_unregister_notify(rpcsvc_t *svc, rpcsvc_notify_t notify, void *mydata);
int
-rpcsvc_submit_message (rpcsvc_request_t *req, struct iovec *proghdr,
- int hdrcount, struct iovec *payload, int payloadcount,
- struct iobref *iobref);
+rpcsvc_transport_submit(rpc_transport_t *trans, struct iovec *rpchdr,
+ int rpchdrcount, struct iovec *proghdr,
+ int proghdrcount, struct iovec *progpayload,
+ int progpayloadcount, struct iobref *iobref,
+ void *priv);
int
-rpcsvc_submit_generic (rpcsvc_request_t *req, struct iovec *proghdr,
- int hdrcount, struct iovec *payload, int payloadcount,
- struct iobref *iobref);
+rpcsvc_submit_message(rpcsvc_request_t *req, struct iovec *proghdr,
+ int hdrcount, struct iovec *payload, int payloadcount,
+ struct iobref *iobref);
+
+int
+rpcsvc_submit_generic(rpcsvc_request_t *req, struct iovec *proghdr,
+ int hdrcount, struct iovec *payload, int payloadcount,
+ struct iobref *iobref);
extern int
-rpcsvc_error_reply (rpcsvc_request_t *req);
+rpcsvc_error_reply(rpcsvc_request_t *req);
-#define RPCSVC_PEER_STRLEN 1024
-#define RPCSVC_AUTH_ACCEPT 1
-#define RPCSVC_AUTH_REJECT 2
-#define RPCSVC_AUTH_DONTCARE 3
+#define RPCSVC_PEER_STRLEN 1024
+#define RPCSVC_AUTH_ACCEPT 1
+#define RPCSVC_AUTH_REJECT 2
+#define RPCSVC_AUTH_DONTCARE 3
extern int
-rpcsvc_transport_peername (rpc_transport_t *trans, char *hostname, int hostlen);
+rpcsvc_transport_peername(rpc_transport_t *trans, char *hostname, int hostlen);
-extern inline int
-rpcsvc_transport_peeraddr (rpc_transport_t *trans, char *addrstr, int addrlen,
- struct sockaddr_storage *returnsa, socklen_t sasize);
+extern int
+rpcsvc_transport_peeraddr(rpc_transport_t *trans, char *addrstr, int addrlen,
+ struct sockaddr_storage *returnsa, socklen_t sasize);
extern int
-rpcsvc_transport_peer_check (dict_t *options, char *volname,
- rpc_transport_t *trans);
+rpcsvc_auth_check(rpcsvc_t *svc, char *volname, char *ipaddr);
extern int
-rpcsvc_transport_privport_check (rpcsvc_t *svc, char *volname,
- rpc_transport_t *trans);
-#define rpcsvc_request_seterr(req, err) (req)->rpc_err = err
-#define rpcsvc_request_set_autherr(req, err) (req)->auth_err = err
+rpcsvc_transport_privport_check(rpcsvc_t *svc, char *volname, uint16_t port);
-extern int rpcsvc_submit_vectors (rpcsvc_request_t *req);
+#define rpcsvc_request_seterr(req, err) ((req)->rpc_err = (int)(err))
+#define rpcsvc_request_set_autherr(req, err) ((req)->auth_err = (int)(err))
-extern int rpcsvc_request_attach_vector (rpcsvc_request_t *req,
- struct iovec msgvec, struct iobuf *iob,
- struct iobref *ioref, int finalvector);
+extern int
+rpcsvc_submit_vectors(rpcsvc_request_t *req);
+extern int
+rpcsvc_request_attach_vector(rpcsvc_request_t *req, struct iovec msgvec,
+ struct iobuf *iob, struct iobref *ioref,
+ int finalvector);
-typedef int (*auth_init_trans) (rpc_transport_t *trans, void *priv);
-typedef int (*auth_init_request) (rpcsvc_request_t *req, void *priv);
-typedef int (*auth_request_authenticate) (rpcsvc_request_t *req, void *priv);
+typedef int (*auth_init_trans)(rpc_transport_t *trans, void *priv);
+typedef int (*auth_init_request)(rpcsvc_request_t *req, void *priv);
+typedef int (*auth_request_authenticate)(rpcsvc_request_t *req, void *priv);
/* This structure needs to be registered by every authentication scheme.
* Our authentication schemes are stored per connection because
* each connection will end up using a different authentication scheme.
*/
typedef struct rpcsvc_auth_ops {
- auth_init_trans transport_init;
- auth_init_request request_init;
- auth_request_authenticate authenticate;
+ auth_init_trans transport_init;
+ auth_init_request request_init;
+ auth_request_authenticate authenticate;
} rpcsvc_auth_ops_t;
typedef struct rpcsvc_auth_flavour_desc {
- char authname[RPCSVC_NAME_MAX];
- int authnum;
- rpcsvc_auth_ops_t *authops;
- void *authprivate;
+ char authname[RPCSVC_NAME_MAX];
+ rpcsvc_auth_ops_t *authops;
+ void *authprivate;
+ int authnum;
} rpcsvc_auth_t;
-typedef void * (*rpcsvc_auth_initer_t) (rpcsvc_t *svc, dict_t *options);
+typedef void *(*rpcsvc_auth_initer_t)(rpcsvc_t *svc, dict_t *options);
struct rpcsvc_auth_list {
- struct list_head authlist;
- rpcsvc_auth_initer_t init;
- /* Should be the name with which we identify the auth scheme given
- * in the volfile options.
- * This should be different from the authname in rpc_auth_t
- * in way that makes it easier to specify this scheme in the volfile.
- * This is because the technical names of the schemes can be a bit
- * arcane.
- */
- char name[RPCSVC_NAME_MAX];
- rpcsvc_auth_t *auth;
- int enable;
+ struct list_head authlist;
+ rpcsvc_auth_initer_t init;
+ /* Should be the name with which we identify the auth scheme given
+ * in the volfile options.
+ * This should be different from the authname in rpc_auth_t
+ * in way that makes it easier to specify this scheme in the volfile.
+ * This is because the technical names of the schemes can be a bit
+ * arcane.
+ */
+ char name[RPCSVC_NAME_MAX];
+ rpcsvc_auth_t *auth;
+ int enable;
};
extern int
-rpcsvc_auth_request_init (rpcsvc_request_t *req);
+rpcsvc_auth_request_init(rpcsvc_request_t *req, struct rpc_msg *callmsg);
+
+extern int
+rpcsvc_auth_init(rpcsvc_t *svc, dict_t *options);
extern int
-rpcsvc_auth_init (rpcsvc_t *svc, dict_t *options);
+rpcsvc_auth_reconf(rpcsvc_t *svc, dict_t *options);
extern int
-rpcsvc_auth_transport_init (rpc_transport_t *xprt);
+rpcsvc_auth_transport_init(rpc_transport_t *xprt);
extern int
-rpcsvc_authenticate (rpcsvc_request_t *req);
+rpcsvc_authenticate(rpcsvc_request_t *req);
extern int
-rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen);
+rpcsvc_auth_array(rpcsvc_t *svc, char *volname, int *autharr, int arrlen);
/* If the request has been sent using AUTH_UNIX, this function returns the
* auxiliary gids as an array, otherwise, it returns NULL.
@@ -525,28 +638,57 @@ rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen);
* authentication code even further to support mode auth schemes.
*/
extern gid_t *
-rpcsvc_auth_unix_auxgids (rpcsvc_request_t *req, int *arrlen);
-
-extern int
-rpcsvc_combine_gen_spec_volume_checks (int gen, int spec);
+rpcsvc_auth_unix_auxgids(rpcsvc_request_t *req, int *arrlen);
extern char *
-rpcsvc_volume_allowed (dict_t *options, char *volname);
+rpcsvc_volume_allowed(dict_t *options, char *volname);
+
+int
+rpcsvc_request_submit(rpcsvc_t *rpc, rpc_transport_t *trans,
+ rpcsvc_cbk_program_t *prog, int procnum, void *req,
+ glusterfs_ctx_t *ctx, xdrproc_t xdrproc);
-int rpcsvc_callback_submit (rpcsvc_t *rpc, rpc_transport_t *trans,
- rpcsvc_cbk_program_t *prog, int procnum,
- struct iovec *proghdr, int proghdrcount);
+int
+rpcsvc_callback_submit(rpcsvc_t *rpc, rpc_transport_t *trans,
+ rpcsvc_cbk_program_t *prog, int procnum,
+ struct iovec *proghdr, int proghdrcount,
+ struct iobref *iobref);
+rpcsvc_actor_t *
+rpcsvc_program_actor(rpcsvc_request_t *req);
+
+int
+rpcsvc_transport_unix_options_build(dict_t *options, char *filepath);
int
-rpcsvc_transport_unix_options_build (dict_t **options, char *filepath);
+rpcsvc_set_allow_insecure(rpcsvc_t *svc, dict_t *options);
int
-rpcsvc_set_allow_insecure (rpcsvc_t *svc, dict_t *options);
+rpcsvc_set_addr_namelookup(rpcsvc_t *svc, dict_t *options);
+int
+rpcsvc_set_root_squash(rpcsvc_t *svc, dict_t *options);
+int
+rpcsvc_set_all_squash(rpcsvc_t *svc, dict_t *options);
+int
+rpcsvc_set_outstanding_rpc_limit(rpcsvc_t *svc, dict_t *options, int defvalue);
+
int
-rpcsvc_auth_array (rpcsvc_t *svc, char *volname, int *autharr, int arrlen);
-char *
-rpcsvc_volume_allowed (dict_t *options, char *volname);
+rpcsvc_set_throttle_on(rpcsvc_t *svc);
+
+int
+rpcsvc_set_throttle_off(rpcsvc_t *svc);
+
+gf_boolean_t
+rpcsvc_get_throttle(rpcsvc_t *svc);
+
+int
+rpcsvc_auth_array(rpcsvc_t *svc, char *volname, int *autharr, int arrlen);
rpcsvc_vector_sizer
-rpcsvc_get_program_vector_sizer (rpcsvc_t *svc, uint32_t prognum,
- uint32_t progver, uint32_t procnum);
+rpcsvc_get_program_vector_sizer(rpcsvc_t *svc, uint32_t prognum,
+ uint32_t progver, int procnum);
+void
+rpcsvc_autoscale_threads(glusterfs_ctx_t *ctx, rpcsvc_t *rpc, int incr);
+extern int
+rpcsvc_destroy(rpcsvc_t *svc);
+void
+rpcsvc_statedump(rpcsvc_t *svc);
#endif
diff --git a/rpc/rpc-lib/src/xdr-common.h b/rpc/rpc-lib/src/xdr-common.h
index 34dc9c6a228..752736b3d4d 100644
--- a/rpc/rpc-lib/src/xdr-common.h
+++ b/rpc/rpc-lib/src/xdr-common.h
@@ -11,14 +11,10 @@
#ifndef _XDR_COMMON_H_
#define _XDR_COMMON_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <rpc/types.h>
#include <sys/types.h>
#include <rpc/xdr.h>
+#include <rpc/auth.h>
#include <sys/uio.h>
#ifdef __NetBSD__
@@ -26,37 +22,76 @@
#endif /* __NetBSD__ */
enum gf_dump_procnum {
- GF_DUMP_NULL,
- GF_DUMP_DUMP,
- GF_DUMP_MAXVALUE,
+ GF_DUMP_NULL,
+ GF_DUMP_DUMP,
+ GF_DUMP_PING,
+ GF_DUMP_MAXVALUE,
};
#define GLUSTER_DUMP_PROGRAM 123451501 /* Completely random */
#define GLUSTER_DUMP_VERSION 1
-#define GF_MAX_AUTH_BYTES 2048
+/* MAX_AUTH_BYTES is restricted to 400 bytes, see
+ * http://tools.ietf.org/html/rfc5531#section-8.2 */
+#define GF_MAX_AUTH_BYTES MAX_AUTH_BYTES
+
+/* The size of an AUTH_GLUSTERFS_V2 structure:
+ *
+ * 1 | pid
+ * 1 | uid
+ * 1 | gid
+ * 1 | groups_len
+ * XX | groups_val (GF_MAX_AUX_GROUPS=65535)
+ * 1 | lk_owner_len
+ * YY | lk_owner_val (GF_MAX_LOCK_OWNER_LEN=1024)
+ * ----+-------------------------------------------
+ * 5 | total xdr-units
+ *
+ * one XDR-unit is defined as BYTES_PER_XDR_UNIT = 4 bytes
+ * MAX_AUTH_BYTES = 400 is the maximum, this is 100 xdr-units.
+ * XX + YY can be 95 to fill the 100 xdr-units.
+ *
+ * Note that the on-wire protocol has tighter requirements than the internal
+ * structures. It is possible for xlators to use more groups and a bigger
+ * lk_owner than that can be sent by a GlusterFS-client.
+ *
+ * -------
+ * On v3, there are 4 more units, and hence it will be 9 xdr-units
+ */
+#define GF_AUTH_GLUSTERFS_MAX_GROUPS(lk_len, type) \
+ ((type == AUTH_GLUSTERFS_v2) ? (95 - lk_len) : (91 - lk_len))
+#define GF_AUTH_GLUSTERFS_MAX_LKOWNER(groups_len, type) \
+ ((type == AUTH_GLUSTERFS_v2) ? (95 - groups_len) : (91 - groups_len))
+
+#ifdef GF_LINUX_HOST_OS
+#define xdr_u_int32_t xdr_uint32_t
+#define xdr_u_int64_t xdr_uint64_t
+unsigned long
+xdr_sizeof(xdrproc_t func, void *data);
+#endif
-#if GF_DARWIN_HOST_OS
+#ifdef GF_DARWIN_HOST_OS
#define xdr_u_quad_t xdr_u_int64_t
-#define xdr_quad_t xdr_int64_t
+#define xdr_quad_t xdr_int64_t
#define xdr_uint32_t xdr_u_int32_t
+#define xdr_uint64_t xdr_u_int64_t
#define uint64_t u_int64_t
+unsigned long
+xdr_sizeof(xdrproc_t func, void *data);
#endif
#if defined(__NetBSD__)
#define xdr_u_quad_t xdr_u_int64_t
-#define xdr_quad_t xdr_int64_t
+#define xdr_quad_t xdr_int64_t
#define xdr_uint32_t xdr_u_int32_t
#define xdr_uint64_t xdr_u_int64_t
#endif
-
-#if GF_SOLARIS_HOST_OS
+#ifdef GF_SOLARIS_HOST_OS
#define u_quad_t uint64_t
#define quad_t int64_t
#define xdr_u_quad_t xdr_uint64_t
-#define xdr_quad_t xdr_int64_t
-#define xdr_uint32_t xdr_uint32_t
+#define xdr_quad_t xdr_int64_t
#endif
/* Returns the address of the byte that follows the
@@ -64,16 +99,18 @@ enum gf_dump_procnum {
* E.g. once the RPC call for NFS has been decoded, the macro will return
* the address from which the NFS header starts.
*/
-#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
+#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
/* Returns the length of the remaining record after the previous decode
* operation completed.
*/
-#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
+#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
/* Returns the number of bytes used by the last encode operation. */
-#define xdr_encoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+#define xdr_encoded_length(xdr) \
+ (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
-#define xdr_decoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+#define xdr_decoded_length(xdr) \
+ (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
#endif
diff --git a/rpc/rpc-lib/src/xdr-rpc.c b/rpc/rpc-lib/src/xdr-rpc.c
index ef52764c3a8..4992dc5a7ce 100644
--- a/rpc/rpc-lib/src/xdr-rpc.c
+++ b/rpc/rpc-lib/src/xdr-rpc.c
@@ -8,205 +8,191 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <rpc/rpc.h>
-#include <rpc/pmap_clnt.h>
-#include <arpa/inet.h>
#include <rpc/xdr.h>
#include <sys/uio.h>
#include <rpc/auth_unix.h>
-#include "mem-pool.h"
#include "xdr-rpc.h"
#include "xdr-common.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
/* Decodes the XDR format in msgbuf into rpc_msg.
* The remaining payload is returned into payload.
*/
int
-xdr_to_rpc_call (char *msgbuf, size_t len, struct rpc_msg *call,
- struct iovec *payload, char *credbytes, char *verfbytes)
+xdr_to_rpc_call(char *msgbuf, size_t len, struct rpc_msg *call,
+ struct iovec *payload, char *credbytes, char *verfbytes)
{
- XDR xdr;
- char opaquebytes[MAX_AUTH_BYTES];
- struct opaque_auth *oa = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("rpc", msgbuf, out);
- GF_VALIDATE_OR_GOTO ("rpc", call, out);
-
- memset (call, 0, sizeof (*call));
-
- oa = &call->rm_call.cb_cred;
- if (!credbytes)
- oa->oa_base = opaquebytes;
- else
- oa->oa_base = credbytes;
-
- oa = &call->rm_call.cb_verf;
- if (!verfbytes)
- oa->oa_base = opaquebytes;
- else
- oa->oa_base = verfbytes;
-
- xdrmem_create (&xdr, msgbuf, len, XDR_DECODE);
- if (!xdr_callmsg (&xdr, call)) {
- gf_log ("rpc", GF_LOG_WARNING, "failed to decode call msg");
- goto out;
- }
-
- if (payload) {
- payload->iov_base = xdr_decoded_remaining_addr (xdr);
- payload->iov_len = xdr_decoded_remaining_len (xdr);
- }
-
- ret = 0;
+ XDR xdr;
+ char opaquebytes[GF_MAX_AUTH_BYTES];
+ struct opaque_auth *oa = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("rpc", msgbuf, out);
+ GF_VALIDATE_OR_GOTO("rpc", call, out);
+
+ memset(call, 0, sizeof(*call));
+
+ oa = &call->rm_call.cb_cred;
+ if (!credbytes)
+ oa->oa_base = opaquebytes;
+ else
+ oa->oa_base = credbytes;
+
+ oa = &call->rm_call.cb_verf;
+ if (!verfbytes)
+ oa->oa_base = opaquebytes;
+ else
+ oa->oa_base = verfbytes;
+
+ xdrmem_create(&xdr, msgbuf, len, XDR_DECODE);
+ if (!xdr_callmsg(&xdr, call)) {
+ gf_log("rpc", GF_LOG_WARNING, "failed to decode call msg");
+ goto out;
+ }
+
+ if (payload) {
+ payload->iov_base = xdr_decoded_remaining_addr(xdr);
+ payload->iov_len = xdr_decoded_remaining_len(xdr);
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
bool_t
-true_func (XDR *s, caddr_t *a)
+true_func(XDR *s, caddr_t *a)
{
- return TRUE;
+ return TRUE;
}
-
int
-rpc_fill_empty_reply (struct rpc_msg *reply, uint32_t xid)
+rpc_fill_empty_reply(struct rpc_msg *reply, uint32_t xid)
{
- int ret = -1;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", reply, out);
+ GF_VALIDATE_OR_GOTO("rpc", reply, out);
- /* Setting to 0 also results in reply verifier flavor to be
- * set to AUTH_NULL which is what we want right now.
- */
- memset (reply, 0, sizeof (*reply));
- reply->rm_xid = xid;
- reply->rm_direction = REPLY;
+ /* Setting to 0 also results in reply verifier flavor to be
+ * set to AUTH_NULL which is what we want right now.
+ */
+ memset(reply, 0, sizeof(*reply));
+ reply->rm_xid = xid;
+ reply->rm_direction = REPLY;
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-rpc_fill_denied_reply (struct rpc_msg *reply, int rjstat, int auth_err)
+rpc_fill_denied_reply(struct rpc_msg *reply, int rjstat, int auth_err)
{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("rpc", reply, out);
-
- reply->rm_reply.rp_stat = MSG_DENIED;
- reply->rjcted_rply.rj_stat = rjstat;
- if (rjstat == RPC_MISMATCH) {
- /* No problem with hardcoding
- * RPC version numbers. We only support
- * v2 anyway.
- */
- reply->rjcted_rply.rj_vers.low = 2;
- reply->rjcted_rply.rj_vers.high = 2;
- } else if (rjstat == AUTH_ERROR)
- reply->rjcted_rply.rj_why = auth_err;
-
- ret = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("rpc", reply, out);
+
+ reply->rm_reply.rp_stat = MSG_DENIED;
+ reply->rjcted_rply.rj_stat = rjstat;
+ if (rjstat == RPC_MISMATCH) {
+ /* No problem with hardcoding
+ * RPC version numbers. We only support
+ * v2 anyway.
+ */
+ reply->rjcted_rply.rj_vers.low = 2;
+ reply->rjcted_rply.rj_vers.high = 2;
+ } else if (rjstat == AUTH_ERROR)
+ reply->rjcted_rply.rj_why = auth_err;
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-rpc_fill_accepted_reply (struct rpc_msg *reply, int arstat, int proglow,
- int proghigh, int verf, int len, char *vdata)
+rpc_fill_accepted_reply(struct rpc_msg *reply, int arstat, int proglow,
+ int proghigh, int verf, int len, char *vdata)
{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("rpc", reply, out);
-
- reply->rm_reply.rp_stat = MSG_ACCEPTED;
- reply->acpted_rply.ar_stat = arstat;
-
- reply->acpted_rply.ar_verf.oa_flavor = verf;
- reply->acpted_rply.ar_verf.oa_length = len;
- reply->acpted_rply.ar_verf.oa_base = vdata;
- if (arstat == PROG_MISMATCH) {
- reply->acpted_rply.ar_vers.low = proglow;
- reply->acpted_rply.ar_vers.high = proghigh;
- } else if (arstat == SUCCESS) {
-
- /* This is a hack. I'd really like to build a custom
- * XDR library because Sun RPC interface is not very flexible.
- */
- reply->acpted_rply.ar_results.proc = (xdrproc_t)true_func;
- reply->acpted_rply.ar_results.where = NULL;
- }
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("rpc", reply, out);
+
+ reply->rm_reply.rp_stat = MSG_ACCEPTED;
+ reply->acpted_rply.ar_stat = arstat;
+
+ reply->acpted_rply.ar_verf.oa_flavor = verf;
+ reply->acpted_rply.ar_verf.oa_length = len;
+ reply->acpted_rply.ar_verf.oa_base = vdata;
+ if (arstat == PROG_MISMATCH) {
+ reply->acpted_rply.ar_vers.low = proglow;
+ reply->acpted_rply.ar_vers.high = proghigh;
+ } else if (arstat == SUCCESS) {
+ /* This is a hack. I'd really like to build a custom
+ * XDR library because Sun RPC interface is not very flexible.
+ */
+ reply->acpted_rply.ar_results.proc = (xdrproc_t)true_func;
+ reply->acpted_rply.ar_results.where = NULL;
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-rpc_reply_to_xdr (struct rpc_msg *reply, char *dest, size_t len,
- struct iovec *dst)
+rpc_reply_to_xdr(struct rpc_msg *reply, char *dest, size_t len,
+ struct iovec *dst)
{
- XDR xdr;
- int ret = -1;
+ XDR xdr;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", reply, out);
- GF_VALIDATE_OR_GOTO ("rpc", dest, out);
- GF_VALIDATE_OR_GOTO ("rpc", dst, out);
+ GF_VALIDATE_OR_GOTO("rpc", reply, out);
+ GF_VALIDATE_OR_GOTO("rpc", dest, out);
+ GF_VALIDATE_OR_GOTO("rpc", dst, out);
- xdrmem_create (&xdr, dest, len, XDR_ENCODE);
- if (!xdr_replymsg(&xdr, reply)) {
- gf_log ("rpc", GF_LOG_WARNING, "failed to encode reply msg");
- goto out;
- }
+ xdrmem_create(&xdr, dest, len, XDR_ENCODE);
+ if (!xdr_replymsg(&xdr, reply)) {
+ gf_log("rpc", GF_LOG_WARNING, "failed to encode reply msg");
+ goto out;
+ }
- dst->iov_base = dest;
- dst->iov_len = xdr_encoded_length (xdr);
+ dst->iov_base = dest;
+ dst->iov_len = xdr_encoded_length(xdr);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-xdr_to_auth_unix_cred (char *msgbuf, int msglen, struct authunix_parms *au,
- char *machname, gid_t *gids)
+xdr_to_auth_unix_cred(char *msgbuf, int msglen, struct authunix_parms *au,
+ char *machname, gid_t *gids)
{
- XDR xdr;
- int ret = -1;
+ XDR xdr;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", msgbuf, out);
- GF_VALIDATE_OR_GOTO ("rpc", machname, out);
- GF_VALIDATE_OR_GOTO ("rpc", gids, out);
- GF_VALIDATE_OR_GOTO ("rpc", au, out);
+ GF_VALIDATE_OR_GOTO("rpc", msgbuf, out);
+ GF_VALIDATE_OR_GOTO("rpc", machname, out);
+ GF_VALIDATE_OR_GOTO("rpc", gids, out);
+ GF_VALIDATE_OR_GOTO("rpc", au, out);
- au->aup_machname = machname;
-#ifdef GF_DARWIN_HOST_OS
- au->aup_gids = (int *)gids;
+ au->aup_machname = machname;
+#if defined(GF_DARWIN_HOST_OS) || defined(__FreeBSD__)
+ au->aup_gids = (int *)gids;
#else
- au->aup_gids = gids;
+ au->aup_gids = gids;
#endif
- xdrmem_create (&xdr, msgbuf, msglen, XDR_DECODE);
+ xdrmem_create(&xdr, msgbuf, msglen, XDR_DECODE);
- if (!xdr_authunix_parms (&xdr, au)) {
- gf_log ("rpc", GF_LOG_WARNING, "failed to decode auth unix parms");
- goto out;
- }
+ if (!xdr_authunix_parms(&xdr, au)) {
+ gf_log("rpc", GF_LOG_WARNING, "failed to decode auth unix parms");
+ goto out;
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
diff --git a/rpc/rpc-lib/src/xdr-rpc.h b/rpc/rpc-lib/src/xdr-rpc.h
index f5f4a941e92..7baed273846 100644
--- a/rpc/rpc-lib/src/xdr-rpc.h
+++ b/rpc/rpc-lib/src/xdr-rpc.h
@@ -11,11 +11,6 @@
#ifndef _XDR_RPC_H_
#define _XDR_RPC_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#ifndef GF_SOLARIS_HOST_OS
#include <rpc/rpc.h>
#endif
@@ -25,7 +20,6 @@
#include <rpc/auth_sys.h>
#endif
-//#include <rpc/pmap_clnt.h>
#include <arpa/inet.h>
#include <rpc/xdr.h>
#include <sys/uio.h>
@@ -33,49 +27,69 @@
#include "xdr-common.h"
typedef enum {
- AUTH_GLUSTERFS = 5,
- AUTH_GLUSTERFS_v2 = 390039, /* using a number from 'unused' range,
- from the list available in RFC5531 */
+ AUTH_GLUSTERFS = 5,
+ AUTH_GLUSTERFS_v2 = 390039, /* using a number from 'unused' range,
+ from the list available in RFC5531 */
+ AUTH_GLUSTERFS_v3 = 390040, /* this too is unused */
} gf_rpc_authtype_t;
/* Converts a given network buffer from its XDR format to a structure
* that contains everything an RPC call needs to work.
*/
extern int
-xdr_to_rpc_call (char *msgbuf, size_t len, struct rpc_msg *call,
- struct iovec *payload, char *credbytes, char *verfbytes);
+xdr_to_rpc_call(char *msgbuf, size_t len, struct rpc_msg *call,
+ struct iovec *payload, char *credbytes, char *verfbytes);
extern int
-rpc_fill_empty_reply (struct rpc_msg *reply, uint32_t xid);
+rpc_fill_empty_reply(struct rpc_msg *reply, uint32_t xid);
extern int
-rpc_fill_denied_reply (struct rpc_msg *reply, int rjstat, int auth_err);
+rpc_fill_denied_reply(struct rpc_msg *reply, int rjstat, int auth_err);
extern int
-rpc_fill_accepted_reply (struct rpc_msg *reply, int arstat, int proglow,
- int proghigh, int verf, int len, char *vdata);
+rpc_fill_accepted_reply(struct rpc_msg *reply, int arstat, int proglow,
+ int proghigh, int verf, int len, char *vdata);
extern int
-rpc_reply_to_xdr (struct rpc_msg *reply, char *dest, size_t len,
- struct iovec *dst);
+rpc_reply_to_xdr(struct rpc_msg *reply, char *dest, size_t len,
+ struct iovec *dst);
extern int
-xdr_to_auth_unix_cred (char *msgbuf, int msglen, struct authunix_parms *au,
- char *machname, gid_t *gids);
+xdr_to_auth_unix_cred(char *msgbuf, int msglen, struct authunix_parms *au,
+ char *machname, gid_t *gids);
/* Macros that simplify accessing the members of an RPC call structure. */
-#define rpc_call_xid(call) ((call)->rm_xid)
-#define rpc_call_direction(call) ((call)->rm_direction)
-#define rpc_call_rpcvers(call) ((call)->ru.RM_cmb.cb_rpcvers)
-#define rpc_call_program(call) ((call)->ru.RM_cmb.cb_prog)
-#define rpc_call_progver(call) ((call)->ru.RM_cmb.cb_vers)
-#define rpc_call_progproc(call) ((call)->ru.RM_cmb.cb_proc)
-#define rpc_opaque_auth_flavour(oa) ((oa)->oa_flavor)
-#define rpc_opaque_auth_len(oa) ((oa)->oa_length)
-
-#define rpc_call_cred_flavour(call) (rpc_opaque_auth_flavour ((&(call)->ru.RM_cmb.cb_cred)))
-#define rpc_call_cred_len(call) (rpc_opaque_auth_len ((&(call)->ru.RM_cmb.cb_cred)))
-
-
-#define rpc_call_verf_flavour(call) (rpc_opaque_auth_flavour ((&(call)->ru.RM_cmb.cb_verf)))
-#define rpc_call_verf_len(call) (rpc_opaque_auth_len ((&(call)->ru.RM_cmb.cb_verf)))
+#define rpc_call_xid(call) ((call)->rm_xid)
+#define rpc_call_direction(call) ((call)->rm_direction)
+#define rpc_call_rpcvers(call) ((call)->ru.RM_cmb.cb_rpcvers)
+#define rpc_call_program(call) ((call)->ru.RM_cmb.cb_prog)
+#define rpc_call_progver(call) ((call)->ru.RM_cmb.cb_vers)
+#define rpc_call_progproc(call) ((call)->ru.RM_cmb.cb_proc)
+#define rpc_opaque_auth_flavour(oa) ((oa)->oa_flavor)
+#define rpc_opaque_auth_len(oa) ((oa)->oa_length)
+
+#define rpc_call_cred_flavour(call) \
+ (rpc_opaque_auth_flavour((&(call)->ru.RM_cmb.cb_cred)))
+#define rpc_call_cred_len(call) \
+ (rpc_opaque_auth_len((&(call)->ru.RM_cmb.cb_cred)))
+
+#define rpc_call_verf_flavour(call) \
+ (rpc_opaque_auth_flavour((&(call)->ru.RM_cmb.cb_verf)))
+#define rpc_call_verf_len(call) \
+ (rpc_opaque_auth_len((&(call)->ru.RM_cmb.cb_verf)))
+
+#if defined(GF_DARWIN_HOST_OS) || !defined(HAVE_RPC_RPC_H)
+#define GF_PRI_RPC_XID PRIu32
+#define GF_PRI_RPC_VERSION PRIu32
+#define GF_PRI_RPC_PROG_ID PRIu32
+#define GF_PRI_RPC_PROG_VERS PRIu32
+#define GF_PRI_RPC_PROC PRIu32
+#define GF_PRI_RPC_PROC_VERSION PRIu32
+#else
+#define GF_PRI_RPC_XID PRIu64
+#define GF_PRI_RPC_VERSION PRIu64
+#define GF_PRI_RPC_PROG_ID PRIu64
+#define GF_PRI_RPC_PROG_VERS PRIu64
+#define GF_PRI_RPC_PROC PRIu64
+#define GF_PRI_RPC_PROC_VERSION PRIu64
+#endif
#endif
diff --git a/rpc/rpc-lib/src/xdr-rpcclnt.c b/rpc/rpc-lib/src/xdr-rpcclnt.c
index 810d1961b9d..8dcdcfeda83 100644
--- a/rpc/rpc-lib/src/xdr-rpcclnt.c
+++ b/rpc/rpc-lib/src/xdr-rpcclnt.c
@@ -8,110 +8,98 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <rpc/rpc.h>
-#include <rpc/pmap_clnt.h>
-#include <arpa/inet.h>
#include <rpc/xdr.h>
#include <sys/uio.h>
#include <rpc/auth_unix.h>
#include <errno.h>
-#include "mem-pool.h"
#include "xdr-rpc.h"
#include "xdr-common.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
/* Decodes the XDR format in msgbuf into rpc_msg.
* The remaining payload is returned into payload.
*/
int
-xdr_to_rpc_reply (char *msgbuf, size_t len, struct rpc_msg *reply,
- struct iovec *payload, char *verfbytes)
+xdr_to_rpc_reply(char *msgbuf, size_t len, struct rpc_msg *reply,
+ struct iovec *payload, char *verfbytes)
{
- XDR xdr;
- int ret = -EINVAL;
-
- GF_VALIDATE_OR_GOTO ("rpc", msgbuf, out);
- GF_VALIDATE_OR_GOTO ("rpc", reply, out);
-
- memset (reply, 0, sizeof (struct rpc_msg));
-
- reply->acpted_rply.ar_verf = _null_auth;
- reply->acpted_rply.ar_results.where = NULL;
- reply->acpted_rply.ar_results.proc = (xdrproc_t)(xdr_void);
-
- xdrmem_create (&xdr, msgbuf, len, XDR_DECODE);
- if (!xdr_replymsg (&xdr, reply)) {
- gf_log ("rpc", GF_LOG_WARNING, "failed to decode reply msg");
- ret = -errno;
- goto out;
- }
- if (payload) {
- payload->iov_base = xdr_decoded_remaining_addr (xdr);
- payload->iov_len = xdr_decoded_remaining_len (xdr);
- }
-
- ret = 0;
+ XDR xdr;
+ int ret = -EINVAL;
+
+ GF_VALIDATE_OR_GOTO("rpc", msgbuf, out);
+ GF_VALIDATE_OR_GOTO("rpc", reply, out);
+
+ memset(reply, 0, sizeof(struct rpc_msg));
+
+ reply->acpted_rply.ar_verf = _null_auth;
+ reply->acpted_rply.ar_results.where = NULL;
+ reply->acpted_rply.ar_results.proc = (xdrproc_t)(xdr_void);
+
+ xdrmem_create(&xdr, msgbuf, len, XDR_DECODE);
+ if (!xdr_replymsg(&xdr, reply)) {
+ gf_log("rpc", GF_LOG_WARNING, "failed to decode reply msg");
+ goto out;
+ }
+ if (payload) {
+ payload->iov_base = xdr_decoded_remaining_addr(xdr);
+ payload->iov_len = xdr_decoded_remaining_len(xdr);
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-rpc_request_to_xdr (struct rpc_msg *request, char *dest, size_t len,
- struct iovec *dst)
+rpc_request_to_xdr(struct rpc_msg *request, char *dest, size_t len,
+ struct iovec *dst)
{
- XDR xdr;
- int ret = -1;
+ XDR xdr;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", dest, out);
- GF_VALIDATE_OR_GOTO ("rpc", request, out);
- GF_VALIDATE_OR_GOTO ("rpc", dst, out);
+ GF_VALIDATE_OR_GOTO("rpc", dest, out);
+ GF_VALIDATE_OR_GOTO("rpc", request, out);
+ GF_VALIDATE_OR_GOTO("rpc", dst, out);
- xdrmem_create (&xdr, dest, len, XDR_ENCODE);
- if (!xdr_callmsg (&xdr, request)) {
- gf_log ("rpc", GF_LOG_WARNING, "failed to encode call msg");
- goto out;
- }
+ xdrmem_create(&xdr, dest, len, XDR_ENCODE);
+ if (!xdr_callmsg(&xdr, request)) {
+ gf_log("rpc", GF_LOG_WARNING, "failed to encode call msg");
+ goto out;
+ }
- dst->iov_base = dest;
- dst->iov_len = xdr_encoded_length (xdr);
+ dst->iov_base = dest;
+ dst->iov_len = xdr_encoded_length(xdr);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-auth_unix_cred_to_xdr (struct authunix_parms *au, char *dest, size_t len,
- struct iovec *iov)
+auth_unix_cred_to_xdr(struct authunix_parms *au, char *dest, size_t len,
+ struct iovec *iov)
{
- XDR xdr;
- int ret = -1;
+ XDR xdr;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("rpc", au, out);
- GF_VALIDATE_OR_GOTO ("rpc", dest, out);
- GF_VALIDATE_OR_GOTO ("rpc", iov, out);
+ GF_VALIDATE_OR_GOTO("rpc", au, out);
+ GF_VALIDATE_OR_GOTO("rpc", dest, out);
+ GF_VALIDATE_OR_GOTO("rpc", iov, out);
- xdrmem_create (&xdr, dest, len, XDR_DECODE);
+ xdrmem_create(&xdr, dest, len, XDR_DECODE);
- if (!xdr_authunix_parms (&xdr, au)) {
- gf_log ("rpc", GF_LOG_WARNING, "failed to decode authunix parms");
- goto out;
- }
+ if (!xdr_authunix_parms(&xdr, au)) {
+ gf_log("rpc", GF_LOG_WARNING, "failed to decode authunix parms");
+ goto out;
+ }
- iov->iov_base = dest;
- iov->iov_len = xdr_encoded_length (xdr);
+ iov->iov_base = dest;
+ iov->iov_len = xdr_encoded_length(xdr);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
diff --git a/rpc/rpc-lib/src/xdr-rpcclnt.h b/rpc/rpc-lib/src/xdr-rpcclnt.h
index c08d872f8c2..58eda4892a9 100644
--- a/rpc/rpc-lib/src/xdr-rpcclnt.h
+++ b/rpc/rpc-lib/src/xdr-rpcclnt.h
@@ -11,13 +11,6 @@
#ifndef _XDR_RPCCLNT_H
#define _XDR_RPCCLNT_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-//#include <rpc/rpc.h>
-//#include <rpc/pmap_clnt.h>
#include <arpa/inet.h>
#include <rpc/xdr.h>
#include <sys/uio.h>
@@ -25,18 +18,19 @@
#include <rpc/auth_unix.h>
/* Macros that simplify accessing the members of an RPC call structure. */
-#define rpc_reply_xid(reply) ((reply)->rm_xid)
-#define rpc_reply_status(reply) ((reply)->ru.RM_rmb.rp_stat)
-#define rpc_accepted_reply_status(reply) ((reply)->acpted_rply.ar_stat)
-#define rpc_reply_verf_flavour(reply) ((reply)->acpted_rply.ar_verf.oa_flavor)
+#define rpc_reply_xid(reply) ((reply)->rm_xid)
+#define rpc_reply_status(reply) ((reply)->ru.RM_rmb.rp_stat)
+#define rpc_accepted_reply_status(reply) ((reply)->acpted_rply.ar_stat)
+#define rpc_reply_verf_flavour(reply) ((reply)->acpted_rply.ar_verf.oa_flavor)
-int xdr_to_rpc_reply (char *msgbuf, size_t len, struct rpc_msg *reply,
- struct iovec *payload, char *verfbytes);
int
-rpc_request_to_xdr (struct rpc_msg *request, char *dest, size_t len,
- struct iovec *dst);
+xdr_to_rpc_reply(char *msgbuf, size_t len, struct rpc_msg *reply,
+ struct iovec *payload, char *verfbytes);
+int
+rpc_request_to_xdr(struct rpc_msg *request, char *dest, size_t len,
+ struct iovec *dst);
int
-auth_unix_cred_to_xdr (struct authunix_parms *au, char *dest, size_t len,
- struct iovec *iov);
+auth_unix_cred_to_xdr(struct authunix_parms *au, char *dest, size_t len,
+ struct iovec *iov);
#endif
diff --git a/rpc/rpc-transport/Makefile.am b/rpc/rpc-transport/Makefile.am
index 221fd640514..7dd9f026cfc 100644
--- a/rpc/rpc-transport/Makefile.am
+++ b/rpc/rpc-transport/Makefile.am
@@ -1 +1 @@
-SUBDIRS = socket $(RDMA_SUBDIR)
+SUBDIRS = socket
diff --git a/rpc/rpc-transport/rdma/src/Makefile.am b/rpc/rpc-transport/rdma/src/Makefile.am
deleted file mode 100644
index b4b940bca25..00000000000
--- a/rpc/rpc-transport/rdma/src/Makefile.am
+++ /dev/null
@@ -1,20 +0,0 @@
-# TODO : need to change transportdir
-
-transport_LTLIBRARIES = rdma.la
-transportdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/rpc-transport
-
-rdma_la_LDFLAGS = -module -avoidversion
-
-rdma_la_SOURCES = rdma.c name.c
-rdma_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- -libverbs
-
-noinst_HEADERS = rdma.h name.h
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src/ \
- -I$(top_srcdir)/xlators/protocol/lib/src/ -shared -nostartfiles $(GF_CFLAGS)
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src/ \
- -I$(top_srcdir)/rpc/xdr/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES = *~
diff --git a/rpc/rpc-transport/rdma/src/name.c b/rpc/rpc-transport/rdma/src/name.c
deleted file mode 100644
index 3718cd7648e..00000000000
--- a/rpc/rpc-transport/rdma/src/name.c
+++ /dev/null
@@ -1,694 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <errno.h>
-#include <netdb.h>
-#include <string.h>
-
-#ifdef CLIENT_PORT_CEILING
-#undef CLIENT_PORT_CEILING
-#endif
-
-#define CLIENT_PORT_CEILING 1024
-
-#ifndef AF_INET_SDP
-#define AF_INET_SDP 27
-#endif
-
-#include "rpc-transport.h"
-#include "rdma.h"
-
-int32_t
-gf_resolve_ip6 (const char *hostname,
- uint16_t port,
- int family,
- void **dnscache,
- struct addrinfo **addr_info);
-
-static int32_t
-af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr,
- socklen_t sockaddr_len, int ceiling)
-{
- int32_t ret = -1;
- /* struct sockaddr_in sin = {0, }; */
- uint16_t port = ceiling - 1;
-
- while (port)
- {
- switch (sockaddr->sa_family)
- {
- case AF_INET6:
- ((struct sockaddr_in6 *)sockaddr)->sin6_port = htons (port);
- break;
-
- case AF_INET_SDP:
- case AF_INET:
- ((struct sockaddr_in *)sockaddr)->sin_port = htons (port);
- break;
- }
-
- ret = bind (fd, sockaddr, sockaddr_len);
-
- if (ret == 0)
- break;
-
- if (ret == -1 && errno == EACCES)
- break;
-
- port--;
- }
-
- return ret;
-}
-
-static int32_t
-af_unix_client_bind (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t sockaddr_len,
- int sock)
-{
- data_t *path_data = NULL;
- struct sockaddr_un *addr = NULL;
- int32_t ret = -1;
-
- path_data = dict_get (this->options,
- "transport.rdma.bind-path");
- if (path_data) {
- char *path = data_to_str (path_data);
- if (!path || strlen (path) > UNIX_PATH_MAX) {
- gf_log (this->name, GF_LOG_DEBUG,
- "transport.rdma.bind-path not specified "
- "for unix socket, letting connect to assign "
- "default value");
- goto err;
- }
-
- addr = (struct sockaddr_un *) sockaddr;
- strcpy (addr->sun_path, path);
- ret = bind (sock, (struct sockaddr *)addr, sockaddr_len);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot bind to unix-domain socket %d (%s)",
- sock, strerror (errno));
- goto err;
- }
- }
-
-err:
- return ret;
-}
-
-static int32_t
-client_fill_address_family (rpc_transport_t *this, struct sockaddr *sockaddr)
-{
- data_t *address_family_data = NULL;
-
- address_family_data = dict_get (this->options,
- "transport.address-family");
- if (!address_family_data) {
- data_t *remote_host_data = NULL, *connect_path_data = NULL;
- remote_host_data = dict_get (this->options, "remote-host");
- connect_path_data = dict_get (this->options,
- "transport.rdma.connect-path");
-
- if (!(remote_host_data || connect_path_data) ||
- (remote_host_data && connect_path_data)) {
- gf_log (this->name, GF_LOG_ERROR,
- "address-family not specified and not able to "
- "determine the same from other options "
- "(remote-host:%s and connect-path:%s)",
- data_to_str (remote_host_data),
- data_to_str (connect_path_data));
- return -1;
- }
-
- if (remote_host_data) {
- gf_log (this->name, GF_LOG_DEBUG,
- "address-family not specified, guessing it "
- "to be inet/inet6");
- sockaddr->sa_family = AF_UNSPEC;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "address-family not specified, guessing it "
- "to be unix");
- sockaddr->sa_family = AF_UNIX;
- }
-
- } else {
- char *address_family = data_to_str (address_family_data);
- if (!strcasecmp (address_family, "unix")) {
- sockaddr->sa_family = AF_UNIX;
- } else if (!strcasecmp (address_family, "inet")) {
- sockaddr->sa_family = AF_INET;
- } else if (!strcasecmp (address_family, "inet6")) {
- sockaddr->sa_family = AF_INET6;
- } else if (!strcasecmp (address_family, "inet-sdp")) {
- sockaddr->sa_family = AF_INET_SDP;
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address-family (%s) specified",
- address_family);
- sockaddr->sa_family = AF_UNSPEC;
- return -1;
- }
- }
-
- return 0;
-}
-
-static int32_t
-af_inet_client_get_remote_sockaddr (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int16_t remote_port)
-{
- dict_t *options = this->options;
- data_t *remote_host_data = NULL;
- data_t *remote_port_data = NULL;
- char *remote_host = NULL;
- struct addrinfo *addr_info = NULL;
- int32_t ret = 0;
-
- remote_host_data = dict_get (options, "remote-host");
- if (remote_host_data == NULL)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "option remote-host missing in volume %s",
- this->name);
- ret = -1;
- goto err;
- }
-
- remote_host = data_to_str (remote_host_data);
- if (remote_host == NULL)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "option remote-host has data NULL in volume %s",
- this->name);
- ret = -1;
- goto err;
- }
-
- if (remote_port == 0) {
- remote_port_data = dict_get (options, "remote-port");
- if (remote_port_data == NULL)
- {
- gf_log (this->name, GF_LOG_DEBUG,
- "option remote-port missing in volume %s. "
- "Defaulting to %d",
- this->name, GF_DEFAULT_RDMA_LISTEN_PORT);
-
- remote_port = GF_DEFAULT_RDMA_LISTEN_PORT;
- }
- else
- {
- remote_port = data_to_uint16 (remote_port_data);
- }
- }
-
- if (remote_port == -1)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "option remote-port has invalid port in volume %s",
- this->name);
- ret = -1;
- goto err;
- }
-
- /* TODO: gf_resolve is a blocking call. kick in some
- non blocking dns techniques */
- ret = gf_resolve_ip6 (remote_host, remote_port,
- sockaddr->sa_family,
- &this->dnscache, &addr_info);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "DNS resolution failed on host %s", remote_host);
- goto err;
- }
-
- memcpy (sockaddr, addr_info->ai_addr, addr_info->ai_addrlen);
- *sockaddr_len = addr_info->ai_addrlen;
-
-err:
- return ret;
-}
-
-static int32_t
-af_unix_client_get_remote_sockaddr (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len)
-{
- struct sockaddr_un *sockaddr_un = NULL;
- char *connect_path = NULL;
- data_t *connect_path_data = NULL;
- int32_t ret = 0;
-
- connect_path_data = dict_get (this->options,
- "transport.rdma.connect-path");
- if (!connect_path_data) {
- gf_log (this->name, GF_LOG_ERROR,
- "option transport.rdma.connect-path not "
- "specified for address-family unix");
- ret = -1;
- goto err;
- }
-
- connect_path = data_to_str (connect_path_data);
- if (!connect_path) {
- gf_log (this->name, GF_LOG_ERROR,
- "connect-path is null-string");
- ret = -1;
- goto err;
- }
-
- if (strlen (connect_path) > UNIX_PATH_MAX) {
- gf_log (this->name, GF_LOG_ERROR,
- "connect-path value length %"GF_PRI_SIZET" > "
- "%d octets", strlen (connect_path), UNIX_PATH_MAX);
- ret = -1;
- goto err;
- }
-
- gf_log (this->name,
- GF_LOG_DEBUG,
- "using connect-path %s", connect_path);
- sockaddr_un = (struct sockaddr_un *)sockaddr;
- strcpy (sockaddr_un->sun_path, connect_path);
- *sockaddr_len = sizeof (struct sockaddr_un);
-
-err:
- return ret;
-}
-
-static int32_t
-af_unix_server_get_local_sockaddr (rpc_transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len)
-{
- data_t *listen_path_data = NULL;
- char *listen_path = NULL;
- int32_t ret = 0;
- struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
-
-
- listen_path_data = dict_get (this->options,
- "transport.rdma.listen-path");
- if (!listen_path_data) {
- gf_log (this->name, GF_LOG_ERROR,
- "missing option listen-path");
- ret = -1;
- goto err;
- }
-
- listen_path = data_to_str (listen_path_data);
-
-#ifndef UNIX_PATH_MAX
-#define UNIX_PATH_MAX 108
-#endif
-
- if (strlen (listen_path) > UNIX_PATH_MAX) {
- gf_log (this->name, GF_LOG_ERROR,
- "option listen-path has value length %"GF_PRI_SIZET" > %d",
- strlen (listen_path), UNIX_PATH_MAX);
- ret = -1;
- goto err;
- }
-
- sunaddr->sun_family = AF_UNIX;
- strcpy (sunaddr->sun_path, listen_path);
- *addr_len = sizeof (struct sockaddr_un);
-
-err:
- return ret;
-}
-
-static int32_t
-af_inet_server_get_local_sockaddr (rpc_transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len)
-{
- struct addrinfo hints, *res = 0;
- data_t *listen_port_data = NULL, *listen_host_data = NULL;
- uint16_t listen_port = -1;
- char service[NI_MAXSERV], *listen_host = NULL;
- dict_t *options = NULL;
- int32_t ret = 0;
-
- options = this->options;
-
- listen_port_data = dict_get (options, "transport.rdma.listen-port");
- listen_host_data = dict_get (options,
- "transport.rdma.bind-address");
-
- if (listen_port_data) {
- listen_port = data_to_uint16 (listen_port_data);
- } else {
- if (addr->sa_family == AF_INET6) {
- struct sockaddr_in6 *in = (struct sockaddr_in6 *) addr;
- in->sin6_addr = in6addr_any;
- in->sin6_port = htons(listen_port);
- *addr_len = sizeof(struct sockaddr_in6);
- goto out;
- } else if (addr->sa_family == AF_INET) {
- struct sockaddr_in *in = (struct sockaddr_in *) addr;
- in->sin_addr.s_addr = htonl(INADDR_ANY);
- in->sin_port = htons(listen_port);
- *addr_len = sizeof(struct sockaddr_in);
- goto out;
- }
- }
-
- if (listen_port == (uint16_t) -1)
- listen_port = GF_DEFAULT_RDMA_LISTEN_PORT;
-
-
- if (listen_host_data) {
- listen_host = data_to_str (listen_host_data);
- }
-
- memset (service, 0, sizeof (service));
- sprintf (service, "%d", listen_port);
-
- memset (&hints, 0, sizeof (hints));
- hints.ai_family = addr->sa_family;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = AI_ADDRCONFIG | AI_PASSIVE;
-
- ret = getaddrinfo(listen_host, service, &hints, &res);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "getaddrinfo failed for host %s, service %s (%s)",
- listen_host, service, gai_strerror (ret));
- ret = -1;
- goto out;
- }
-
- memcpy (addr, res->ai_addr, res->ai_addrlen);
- *addr_len = res->ai_addrlen;
-
- freeaddrinfo (res);
-
-out:
- return ret;
-}
-
-int32_t
-gf_rdma_client_bind (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int sock)
-{
- int ret = 0;
-
- *sockaddr_len = sizeof (struct sockaddr_in6);
- switch (sockaddr->sa_family)
- {
- case AF_INET_SDP:
- case AF_INET:
- *sockaddr_len = sizeof (struct sockaddr_in);
-
- case AF_INET6:
- ret = af_inet_bind_to_port_lt_ceiling (sock, sockaddr,
- *sockaddr_len,
- CLIENT_PORT_CEILING);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot bind inet socket (%d) to port "
- "less than %d (%s)",
- sock, CLIENT_PORT_CEILING, strerror (errno));
- ret = 0;
- }
- break;
-
- case AF_UNIX:
- *sockaddr_len = sizeof (struct sockaddr_un);
- ret = af_unix_client_bind (this, (struct sockaddr *)sockaddr,
- *sockaddr_len, sock);
- break;
-
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address family %d", sockaddr->sa_family);
- ret = -1;
- break;
- }
-
- return ret;
-}
-
-int32_t
-gf_rdma_client_get_remote_sockaddr (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int16_t remote_port)
-{
- int32_t ret = 0;
- char is_inet_sdp = 0;
-
- ret = client_fill_address_family (this, sockaddr);
- if (ret) {
- ret = -1;
- goto err;
- }
-
- switch (sockaddr->sa_family)
- {
- case AF_INET_SDP:
- sockaddr->sa_family = AF_INET;
- is_inet_sdp = 1;
-
- case AF_INET:
- case AF_INET6:
- case AF_UNSPEC:
- ret = af_inet_client_get_remote_sockaddr (this,
- sockaddr,
- sockaddr_len,
- remote_port);
-
- if (is_inet_sdp) {
- sockaddr->sa_family = AF_INET_SDP;
- }
-
- break;
-
- case AF_UNIX:
- ret = af_unix_client_get_remote_sockaddr (this,
- sockaddr,
- sockaddr_len);
- break;
-
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address-family %d", sockaddr->sa_family);
- ret = -1;
- }
-
-err:
- return ret;
-}
-
-int32_t
-gf_rdma_server_get_local_sockaddr (rpc_transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len)
-{
- data_t *address_family_data = NULL;
- int32_t ret = 0;
- char is_inet_sdp = 0;
-
- address_family_data = dict_get (this->options,
- "transport.address-family");
- if (address_family_data) {
- char *address_family = NULL;
- address_family = data_to_str (address_family_data);
-
- if (!strcasecmp (address_family, "inet")) {
- addr->sa_family = AF_INET;
- } else if (!strcasecmp (address_family, "inet6")) {
- addr->sa_family = AF_INET6;
- } else if (!strcasecmp (address_family, "inet-sdp")) {
- addr->sa_family = AF_INET_SDP;
- } else if (!strcasecmp (address_family, "unix")) {
- addr->sa_family = AF_UNIX;
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address family (%s) specified",
- address_family);
- addr->sa_family = AF_UNSPEC;
- ret = -1;
- goto err;
- }
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "option address-family not specified, defaulting "
- "to inet");
- addr->sa_family = AF_INET;
- }
-
- switch (addr->sa_family)
- {
- case AF_INET_SDP:
- is_inet_sdp = 1;
- addr->sa_family = AF_INET;
-
- case AF_INET:
- case AF_INET6:
- case AF_UNSPEC:
- ret = af_inet_server_get_local_sockaddr (this, addr, addr_len);
- if (is_inet_sdp && !ret) {
- addr->sa_family = AF_INET_SDP;
- }
- break;
-
- case AF_UNIX:
- ret = af_unix_server_get_local_sockaddr (this, addr, addr_len);
- break;
- }
-
-err:
- return ret;
-}
-
-int32_t
-fill_inet6_inet_identifiers (rpc_transport_t *this, struct sockaddr_storage *addr,
- int32_t addr_len, char *identifier)
-{
- int32_t ret = 0, tmpaddr_len = 0;
- char service[NI_MAXSERV], host[NI_MAXHOST];
- union gf_sock_union sock_union;
-
- memset (&sock_union, 0, sizeof (sock_union));
- sock_union.storage = *addr;
- tmpaddr_len = addr_len;
-
- if (sock_union.sa.sa_family == AF_INET6) {
- int32_t one_to_four, four_to_eight, twelve_to_sixteen;
- int16_t eight_to_ten, ten_to_twelve;
-
- one_to_four = four_to_eight = twelve_to_sixteen = 0;
- eight_to_ten = ten_to_twelve = 0;
-
- one_to_four = sock_union.sin6.sin6_addr.s6_addr32[0];
- four_to_eight = sock_union.sin6.sin6_addr.s6_addr32[1];
-#ifdef GF_SOLARIS_HOST_OS
- eight_to_ten = S6_ADDR16(sock_union.sin6.sin6_addr)[4];
-#else
- eight_to_ten = sock_union.sin6.sin6_addr.s6_addr16[4];
-#endif
-
-#ifdef GF_SOLARIS_HOST_OS
- ten_to_twelve = S6_ADDR16(sock_union.sin6.sin6_addr)[5];
-#else
- ten_to_twelve = sock_union.sin6.sin6_addr.s6_addr16[5];
-#endif
- twelve_to_sixteen = sock_union.sin6.sin6_addr.s6_addr32[3];
-
- /* ipv4 mapped ipv6 address has
- bits 0-80: 0
- bits 80-96: 0xffff
- bits 96-128: ipv4 address
- */
-
- if (one_to_four == 0 &&
- four_to_eight == 0 &&
- eight_to_ten == 0 &&
- ten_to_twelve == -1) {
- struct sockaddr_in *in_ptr = &sock_union.sin;
- memset (&sock_union, 0, sizeof (sock_union));
-
- in_ptr->sin_family = AF_INET;
- in_ptr->sin_port = ((struct sockaddr_in6 *)addr)->sin6_port;
- in_ptr->sin_addr.s_addr = twelve_to_sixteen;
- tmpaddr_len = sizeof (*in_ptr);
- }
- }
-
- ret = getnameinfo (&sock_union.sa,
- tmpaddr_len,
- host, sizeof (host),
- service, sizeof (service),
- NI_NUMERICHOST | NI_NUMERICSERV);
- if (ret != 0) {
- gf_log (this->name,
- GF_LOG_ERROR,
- "getnameinfo failed (%s)", gai_strerror (ret));
- }
-
- sprintf (identifier, "%s:%s", host, service);
-
- return ret;
-}
-
-int32_t
-gf_rdma_get_transport_identifiers (rpc_transport_t *this)
-{
- int32_t ret = 0;
- char is_inet_sdp = 0;
-
- switch (((struct sockaddr *) &this->myinfo.sockaddr)->sa_family)
- {
- case AF_INET_SDP:
- is_inet_sdp = 1;
- ((struct sockaddr *) &this->peerinfo.sockaddr)->sa_family = ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = AF_INET;
-
- case AF_INET:
- case AF_INET6:
- {
- ret = fill_inet6_inet_identifiers (this,
- &this->myinfo.sockaddr,
- this->myinfo.sockaddr_len,
- this->myinfo.identifier);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "can't fill inet/inet6 identifier for server");
- goto err;
- }
-
- ret = fill_inet6_inet_identifiers (this,
- &this->peerinfo.sockaddr,
- this->peerinfo.sockaddr_len,
- this->peerinfo.identifier);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "can't fill inet/inet6 identifier for client");
- goto err;
- }
-
- if (is_inet_sdp) {
- ((struct sockaddr *) &this->peerinfo.sockaddr)->sa_family = ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = AF_INET_SDP;
- }
- }
- break;
-
- case AF_UNIX:
- {
- struct sockaddr_un *sunaddr = NULL;
-
- sunaddr = (struct sockaddr_un *) &this->myinfo.sockaddr;
- strcpy (this->myinfo.identifier, sunaddr->sun_path);
-
- sunaddr = (struct sockaddr_un *) &this->peerinfo.sockaddr;
- strcpy (this->peerinfo.identifier, sunaddr->sun_path);
- }
- break;
-
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address family (%d)",
- ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family);
- ret = -1;
- break;
- }
-
-err:
- return ret;
-}
diff --git a/rpc/rpc-transport/rdma/src/name.h b/rpc/rpc-transport/rdma/src/name.h
deleted file mode 100644
index 114ed1661a3..00000000000
--- a/rpc/rpc-transport/rdma/src/name.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _IB_VERBS_NAME_H
-#define _IB_VERBS_NAME_H
-
-#include <sys/socket.h>
-#include <sys/un.h>
-
-#include "compat.h"
-
-int32_t
-gf_rdma_client_bind (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int sock);
-
-int32_t
-gf_rdma_client_get_remote_sockaddr (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int16_t remote_port);
-
-int32_t
-gf_rdma_server_get_local_sockaddr (rpc_transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len);
-
-int32_t
-gf_rdma_get_transport_identifiers (rpc_transport_t *this);
-
-#endif /* _IB_VERBS_NAME_H */
diff --git a/rpc/rpc-transport/rdma/src/rdma.c b/rpc/rpc-transport/rdma/src/rdma.c
deleted file mode 100644
index 3d4dfe3620f..00000000000
--- a/rpc/rpc-transport/rdma/src/rdma.c
+++ /dev/null
@@ -1,5094 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "dict.h"
-#include "glusterfs.h"
-#include "logging.h"
-#include "rdma.h"
-#include "name.h"
-#include "byte-order.h"
-#include "xlator.h"
-#include "xdr-rpc.h"
-#include <signal.h>
-
-#define GF_RDMA_LOG_NAME "rpc-transport/rdma"
-
-static int32_t
-__gf_rdma_ioq_churn (gf_rdma_peer_t *peer);
-
-gf_rdma_post_t *
-gf_rdma_post_ref (gf_rdma_post_t *post);
-
-int
-gf_rdma_post_unref (gf_rdma_post_t *post);
-
-int32_t
-gf_resolve_ip6 (const char *hostname,
- uint16_t port,
- int family,
- void **dnscache,
- struct addrinfo **addr_info);
-
-static uint16_t
-gf_rdma_get_local_lid (struct ibv_context *context,
- int32_t port)
-{
- struct ibv_port_attr attr;
-
- if (ibv_query_port (context, port, &attr))
- return 0;
-
- return attr.lid;
-}
-
-static const char *
-get_port_state_str(enum ibv_port_state pstate)
-{
- switch (pstate) {
- case IBV_PORT_DOWN: return "PORT_DOWN";
- case IBV_PORT_INIT: return "PORT_INIT";
- case IBV_PORT_ARMED: return "PORT_ARMED";
- case IBV_PORT_ACTIVE: return "PORT_ACTIVE";
- case IBV_PORT_ACTIVE_DEFER: return "PORT_ACTIVE_DEFER";
- default: return "invalid state";
- }
-}
-
-static int32_t
-ib_check_active_port (struct ibv_context *ctx, uint8_t port)
-{
- struct ibv_port_attr port_attr = {0, };
- int32_t ret = 0;
- const char *state_str = NULL;
-
- if (!ctx) {
- gf_log_callingfn (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "Error in supplied context");
- return -1;
- }
-
- ret = ibv_query_port (ctx, port, &port_attr);
-
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "Failed to query port %u properties", port);
- return -1;
- }
-
- state_str = get_port_state_str (port_attr.state);
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_TRACE,
- "Infiniband PORT: (%u) STATE: (%s)",
- port, state_str);
-
- if (port_attr.state == IBV_PORT_ACTIVE)
- return 0;
-
- return -1;
-}
-
-static int32_t
-ib_get_active_port (struct ibv_context *ib_ctx)
-{
- struct ibv_device_attr ib_device_attr = {{0, }, };
- int32_t ret = -1;
- uint8_t ib_port = 0;
-
- if (!ib_ctx) {
- gf_log_callingfn (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "Error in supplied context");
- return -1;
- }
- if (ibv_query_device (ib_ctx, &ib_device_attr)) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "Failed to query device properties");
- return -1;
- }
-
- for (ib_port = 1; ib_port <= ib_device_attr.phys_port_cnt; ++ib_port) {
- ret = ib_check_active_port (ib_ctx, ib_port);
- if (ret == 0)
- return ib_port;
-
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_TRACE,
- "Port:(%u) not active", ib_port);
- continue;
- }
- return ret;
-}
-
-
-static void
-gf_rdma_put_post (gf_rdma_queue_t *queue, gf_rdma_post_t *post)
-{
- post->ctx.is_request = 0;
-
- pthread_mutex_lock (&queue->lock);
- {
- if (post->prev) {
- queue->active_count--;
- post->prev->next = post->next;
- }
-
- if (post->next) {
- post->next->prev = post->prev;
- }
-
- post->prev = &queue->passive_posts;
- post->next = post->prev->next;
- post->prev->next = post;
- post->next->prev = post;
- queue->passive_count++;
- }
- pthread_mutex_unlock (&queue->lock);
-}
-
-
-static gf_rdma_post_t *
-gf_rdma_new_post (gf_rdma_device_t *device, int32_t len,
- gf_rdma_post_type_t type)
-{
- gf_rdma_post_t *post = NULL;
- int ret = -1;
-
- post = (gf_rdma_post_t *) GF_CALLOC (1, sizeof (*post),
- gf_common_mt_rdma_post_t);
- if (post == NULL) {
- goto out;
- }
-
- pthread_mutex_init (&post->lock, NULL);
-
- post->buf_size = len;
-
- post->buf = valloc (len);
- if (!post->buf) {
- gf_log_nomem (GF_RDMA_LOG_NAME, GF_LOG_ERROR, len);
- goto out;
- }
-
- post->mr = ibv_reg_mr (device->pd,
- post->buf,
- post->buf_size,
- IBV_ACCESS_LOCAL_WRITE);
- if (!post->mr) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "memory registration failed (%s)",
- strerror (errno));
- goto out;
- }
-
- post->device = device;
- post->type = type;
-
- ret = 0;
-out:
- if (ret != 0) {
- if (post->buf != NULL) {
- free (post->buf);
- }
-
- GF_FREE (post);
- post = NULL;
- }
-
- return post;
-}
-
-
-static gf_rdma_post_t *
-gf_rdma_get_post (gf_rdma_queue_t *queue)
-{
- gf_rdma_post_t *post = NULL;
-
- pthread_mutex_lock (&queue->lock);
- {
- post = queue->passive_posts.next;
- if (post == &queue->passive_posts)
- post = NULL;
-
- if (post) {
- if (post->prev)
- post->prev->next = post->next;
- if (post->next)
- post->next->prev = post->prev;
- post->prev = &queue->active_posts;
- post->next = post->prev->next;
- post->prev->next = post;
- post->next->prev = post;
- post->reused++;
- queue->active_count++;
- }
- }
- pthread_mutex_unlock (&queue->lock);
-
- return post;
-}
-
-void
-gf_rdma_destroy_post (gf_rdma_post_t *post)
-{
- ibv_dereg_mr (post->mr);
- free (post->buf);
- GF_FREE (post);
-}
-
-
-static int32_t
-__gf_rdma_quota_get (gf_rdma_peer_t *peer)
-{
- int32_t ret = -1;
- gf_rdma_private_t *priv = NULL;
-
- priv = peer->trans->private;
-
- if (priv->connected && peer->quota > 0) {
- ret = peer->quota--;
- }
-
- return ret;
-}
-
-/*
- static int32_t
- gf_rdma_quota_get (gf_rdma_peer_t *peer)
- {
- int32_t ret = -1;
- gf_rdma_private_t *priv = peer->trans->private;
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- ret = __gf_rdma_quota_get (peer);
- }
- pthread_mutex_unlock (&priv->write_mutex);
-
- return ret;
- }
-*/
-
-static void
-__gf_rdma_ioq_entry_free (gf_rdma_ioq_t *entry)
-{
- list_del_init (&entry->list);
-
- if (entry->iobref) {
- iobref_unref (entry->iobref);
- entry->iobref = NULL;
- }
-
- if (entry->msg.request.rsp_iobref) {
- iobref_unref (entry->msg.request.rsp_iobref);
- entry->msg.request.rsp_iobref = NULL;
- }
- mem_put (entry);
-}
-
-
-static void
-__gf_rdma_ioq_flush (gf_rdma_peer_t *peer)
-{
- gf_rdma_ioq_t *entry = NULL, *dummy = NULL;
-
- list_for_each_entry_safe (entry, dummy, &peer->ioq, list) {
- __gf_rdma_ioq_entry_free (entry);
- }
-}
-
-
-static int32_t
-__gf_rdma_disconnect (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- int32_t ret = 0;
-
- priv = this->private;
-
- if (priv->connected || priv->tcp_connected) {
- fcntl (priv->sock, F_SETFL, O_NONBLOCK);
- if (shutdown (priv->sock, SHUT_RDWR) != 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "shutdown () - error: %s",
- strerror (errno));
- ret = -errno;
- priv->tcp_connected = 0;
- priv->connected = 0;
- }
- }
-
- return ret;
-}
-
-
-static int32_t
-gf_rdma_post_send (struct ibv_qp *qp, gf_rdma_post_t *post, int32_t len)
-{
- struct ibv_sge list = {
- .addr = (unsigned long) post->buf,
- .length = len,
- .lkey = post->mr->lkey
- };
-
- struct ibv_send_wr wr = {
- .wr_id = (unsigned long) post,
- .sg_list = &list,
- .num_sge = 1,
- .opcode = IBV_WR_SEND,
- .send_flags = IBV_SEND_SIGNALED,
- }, *bad_wr;
-
- if (!qp)
- return EINVAL;
-
- return ibv_post_send (qp, &wr, &bad_wr);
-}
-
-int
-__gf_rdma_encode_error(gf_rdma_peer_t *peer, gf_rdma_reply_info_t *reply_info,
- struct iovec *rpchdr, gf_rdma_header_t *hdr,
- gf_rdma_errcode_t err)
-{
- struct rpc_msg *rpc_msg = NULL;
-
- if (reply_info != NULL) {
- hdr->rm_xid = hton32(reply_info->rm_xid);
- } else {
- rpc_msg = rpchdr[0].iov_base; /* assume rpchdr contains
- * only one vector.
- * (which is true)
- */
- hdr->rm_xid = rpc_msg->rm_xid;
- }
-
- hdr->rm_vers = hton32(GF_RDMA_VERSION);
- hdr->rm_credit = hton32(peer->send_count);
- hdr->rm_type = hton32(GF_RDMA_ERROR);
- hdr->rm_body.rm_error.rm_type = hton32(err);
- if (err == ERR_VERS) {
- hdr->rm_body.rm_error.rm_version.gf_rdma_vers_low
- = hton32(GF_RDMA_VERSION);
- hdr->rm_body.rm_error.rm_version.gf_rdma_vers_high
- = hton32(GF_RDMA_VERSION);
- }
-
- return sizeof (*hdr);
-}
-
-
-int32_t
-__gf_rdma_send_error (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post, gf_rdma_reply_info_t *reply_info,
- gf_rdma_errcode_t err)
-{
- int32_t ret = -1, len = 0;
-
- len = __gf_rdma_encode_error (peer, reply_info, entry->rpchdr,
- (gf_rdma_header_t *)post->buf, err);
- if (len == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "encode error returned -1");
- goto out;
- }
-
- gf_rdma_post_ref (post);
-
- ret = gf_rdma_post_send (peer->qp, post, len);
- if (!ret) {
- ret = len;
- } else {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "gf_rdma_post_send (to %s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror (ret) : "");
- gf_rdma_post_unref (post);
- __gf_rdma_disconnect (peer->trans);
- ret = -1;
- }
-
-out:
- return ret;
-}
-
-
-int32_t
-__gf_rdma_create_read_chunks_from_vector (gf_rdma_peer_t *peer,
- gf_rdma_read_chunk_t **readch_ptr,
- int32_t *pos, struct iovec *vector,
- int count,
- gf_rdma_request_context_t *request_ctx)
-{
- int i = 0;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
- struct ibv_mr *mr = NULL;
- gf_rdma_read_chunk_t *readch = NULL;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, readch_ptr, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, *readch_ptr, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, request_ctx, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, vector, out);
-
- priv = peer->trans->private;
- device = priv->device;
- readch = *readch_ptr;
-
- for (i = 0; i < count; i++) {
- readch->rc_discrim = hton32 (1);
- readch->rc_position = hton32 (*pos);
-
- mr = ibv_reg_mr (device->pd, vector[i].iov_base,
- vector[i].iov_len,
- IBV_ACCESS_REMOTE_READ);
- if (!mr) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "memory registration failed (%s) (peer:%s)",
- strerror (errno),
- peer->trans->peerinfo.identifier);
- goto out;
- }
-
- request_ctx->mr[request_ctx->mr_count++] = mr;
-
- readch->rc_target.rs_handle = hton32 (mr->rkey);
- readch->rc_target.rs_length
- = hton32 (vector[i].iov_len);
- readch->rc_target.rs_offset
- = hton64 ((uint64_t)(unsigned long)vector[i].iov_base);
-
- *pos = *pos + vector[i].iov_len;
- readch++;
- }
-
- *readch_ptr = readch;
-
- ret = 0;
-out:
- return ret;
-}
-
-
-int32_t
-__gf_rdma_create_read_chunks (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_chunktype_t type, uint32_t **ptr,
- gf_rdma_request_context_t *request_ctx)
-{
- int32_t ret = -1;
- int pos = 0;
-
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, entry, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, ptr, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, *ptr, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, request_ctx, out);
-
- request_ctx->iobref = iobref_ref (entry->iobref);
-
- if (type == gf_rdma_areadch) {
- pos = 0;
- ret = __gf_rdma_create_read_chunks_from_vector (peer,
- (gf_rdma_read_chunk_t **)ptr,
- &pos,
- entry->rpchdr,
- entry->rpchdr_count,
- request_ctx);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "cannot create read chunks from vector "
- "entry->rpchdr");
- goto out;
- }
-
- ret = __gf_rdma_create_read_chunks_from_vector (peer,
- (gf_rdma_read_chunk_t **)ptr,
- &pos,
- entry->proghdr,
- entry->proghdr_count,
- request_ctx);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "cannot create read chunks from vector "
- "entry->proghdr");
- }
-
- if (entry->prog_payload_count != 0) {
- ret = __gf_rdma_create_read_chunks_from_vector (peer,
- (gf_rdma_read_chunk_t **)ptr,
- &pos,
- entry->prog_payload,
- entry->prog_payload_count,
- request_ctx);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "cannot create read chunks from vector"
- " entry->prog_payload");
- }
- }
- } else {
- pos = iov_length (entry->rpchdr, entry->rpchdr_count);
- ret = __gf_rdma_create_read_chunks_from_vector (peer,
- (gf_rdma_read_chunk_t **)ptr,
- &pos,
- entry->prog_payload,
- entry->prog_payload_count,
- request_ctx);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "cannot create read chunks from vector "
- "entry->prog_payload");
- }
- }
-
- /* terminate read-chunk list*/
- **ptr = 0;
- *ptr = *ptr + 1;
-out:
- return ret;
-}
-
-
-int32_t
-__gf_rdma_create_write_chunks_from_vector (gf_rdma_peer_t *peer,
- gf_rdma_write_chunk_t **writech_ptr,
- struct iovec *vector, int count,
- gf_rdma_request_context_t *request_ctx)
-{
- int i = 0;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
- struct ibv_mr *mr = NULL;
- gf_rdma_write_chunk_t *writech = NULL;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, writech_ptr, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, *writech_ptr, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, request_ctx, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, vector, out);
-
- writech = *writech_ptr;
-
- priv = peer->trans->private;
- device = priv->device;
-
- for (i = 0; i < count; i++) {
- mr = ibv_reg_mr (device->pd, vector[i].iov_base,
- vector[i].iov_len,
- IBV_ACCESS_REMOTE_WRITE
- | IBV_ACCESS_LOCAL_WRITE);
- if (!mr) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "memory registration failed (%s) (peer:%s)",
- strerror (errno),
- peer->trans->peerinfo.identifier);
- goto out;
- }
-
- request_ctx->mr[request_ctx->mr_count++] = mr;
-
- writech->wc_target.rs_handle = hton32 (mr->rkey);
- writech->wc_target.rs_length = hton32 (vector[i].iov_len);
- writech->wc_target.rs_offset
- = hton64 (((uint64_t)(unsigned long)vector[i].iov_base));
-
- writech++;
- }
-
- *writech_ptr = writech;
-
- ret = 0;
-out:
- return ret;
-}
-
-
-int32_t
-__gf_rdma_create_write_chunks (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_chunktype_t chunk_type, uint32_t **ptr,
- gf_rdma_request_context_t *request_ctx)
-{
- int32_t ret = -1;
- gf_rdma_write_array_t *warray = NULL;
-
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, ptr, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, *ptr, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, request_ctx, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, entry, out);
-
- if ((chunk_type == gf_rdma_replych)
- && ((entry->msg.request.rsphdr_count != 1) ||
- (entry->msg.request.rsphdr_vec[0].iov_base == NULL))) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- (entry->msg.request.rsphdr_count == 1)
- ? "chunktype specified as reply chunk but the vector "
- "specifying the buffer to be used for holding reply"
- " header is not correct" :
- "chunktype specified as reply chunk, but more than one "
- "buffer provided for holding reply");
- goto out;
- }
-
-/*
- if ((chunk_type == gf_rdma_writech)
- && ((entry->msg.request.rsphdr_count == 0)
- || (entry->msg.request.rsphdr_vec[0].iov_base == NULL))) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "vector specifying buffer to hold the program's reply "
- "header should also be provided when buffers are "
- "provided for holding the program's payload in reply");
- goto out;
- }
-*/
-
- if (chunk_type == gf_rdma_writech) {
- warray = (gf_rdma_write_array_t *)*ptr;
- warray->wc_discrim = hton32 (1);
- warray->wc_nchunks
- = hton32 (entry->msg.request.rsp_payload_count);
-
- *ptr = (uint32_t *)&warray->wc_array[0];
-
- ret = __gf_rdma_create_write_chunks_from_vector (peer,
- (gf_rdma_write_chunk_t **)ptr,
- entry->msg.request.rsp_payload,
- entry->msg.request.rsp_payload_count,
- request_ctx);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "cannot create write chunks from vector "
- "entry->rpc_payload");
- goto out;
- }
-
- /* terminate write chunklist */
- **ptr = 0;
- *ptr = *ptr + 1;
-
- /* no reply chunklist */
- **ptr = 0;
- *ptr = *ptr + 1;
- } else {
- /* no write chunklist */
- **ptr = 0;
- *ptr = *ptr + 1;
-
- warray = (gf_rdma_write_array_t *)*ptr;
- warray->wc_discrim = hton32 (1);
- warray->wc_nchunks = hton32 (entry->msg.request.rsphdr_count);
-
- *ptr = (uint32_t *)&warray->wc_array[0];
-
- ret = __gf_rdma_create_write_chunks_from_vector (peer,
- (gf_rdma_write_chunk_t **)ptr,
- entry->msg.request.rsphdr_vec,
- entry->msg.request.rsphdr_count,
- request_ctx);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "cannot create write chunks from vector "
- "entry->rpchdr");
- goto out;
- }
-
- /* terminate reply chunklist */
- **ptr = 0;
- *ptr = *ptr + 1;
- }
-
-out:
- return ret;
-}
-
-
-inline void
-__gf_rdma_deregister_mr (struct ibv_mr **mr, int count)
-{
- int i = 0;
-
- if (mr == NULL) {
- goto out;
- }
-
- for (i = 0; i < count; i++) {
- ibv_dereg_mr (mr[i]);
- }
-
-out:
- return;
-}
-
-
-static int32_t
-__gf_rdma_quota_put (gf_rdma_peer_t *peer)
-{
- int32_t ret = 0;
-
- peer->quota++;
- ret = peer->quota;
-
- if (!list_empty (&peer->ioq)) {
- ret = __gf_rdma_ioq_churn (peer);
- }
-
- return ret;
-}
-
-
-static int32_t
-gf_rdma_quota_put (gf_rdma_peer_t *peer)
-{
- int32_t ret = 0;
- gf_rdma_private_t *priv = NULL;
-
- priv = peer->trans->private;
- pthread_mutex_lock (&priv->write_mutex);
- {
- ret = __gf_rdma_quota_put (peer);
- }
- pthread_mutex_unlock (&priv->write_mutex);
-
- return ret;
-}
-
-
-/* to be called with priv->mutex held */
-void
-__gf_rdma_request_context_destroy (gf_rdma_request_context_t *context)
-{
- gf_rdma_peer_t *peer = NULL;
- gf_rdma_private_t *priv = NULL;
- int32_t ret = 0;
-
- if (context == NULL) {
- goto out;
- }
-
- peer = context->peer;
-
- __gf_rdma_deregister_mr (context->mr, context->mr_count);
-
- priv = peer->trans->private;
-
- if (priv->connected) {
- ret = __gf_rdma_quota_put (peer);
- if (ret < 0) {
- gf_log ("rdma", GF_LOG_DEBUG,
- "failed to send "
- "message");
- mem_put (context);
- __gf_rdma_disconnect (peer->trans);
- goto out;
- }
- }
-
- if (context->iobref != NULL) {
- iobref_unref (context->iobref);
- context->iobref = NULL;
- }
-
- if (context->rsp_iobref != NULL) {
- iobref_unref (context->rsp_iobref);
- context->rsp_iobref = NULL;
- }
-
- mem_put (context);
-
-out:
- return;
-}
-
-
-void
-gf_rdma_post_context_destroy (gf_rdma_post_context_t *ctx)
-{
- if (ctx == NULL) {
- goto out;
- }
-
- __gf_rdma_deregister_mr (ctx->mr, ctx->mr_count);
-
- if (ctx->iobref != NULL) {
- iobref_unref (ctx->iobref);
- }
-
- if (ctx->hdr_iobuf != NULL) {
- iobuf_unref (ctx->hdr_iobuf);
- }
-
- memset (ctx, 0, sizeof (*ctx));
-out:
- return;
-}
-
-
-static int32_t
-gf_rdma_post_recv (struct ibv_srq *srq,
- gf_rdma_post_t *post)
-{
- struct ibv_sge list = {
- .addr = (unsigned long) post->buf,
- .length = post->buf_size,
- .lkey = post->mr->lkey
- };
-
- struct ibv_recv_wr wr = {
- .wr_id = (unsigned long) post,
- .sg_list = &list,
- .num_sge = 1,
- }, *bad_wr;
-
- gf_rdma_post_ref (post);
-
- return ibv_post_srq_recv (srq, &wr, &bad_wr);
-}
-
-
-int
-gf_rdma_post_unref (gf_rdma_post_t *post)
-{
- int refcount = -1;
-
- if (post == NULL) {
- goto out;
- }
-
- pthread_mutex_lock (&post->lock);
- {
- refcount = --post->refcount;
- }
- pthread_mutex_unlock (&post->lock);
-
- if (refcount == 0) {
- gf_rdma_post_context_destroy (&post->ctx);
- if (post->type == GF_RDMA_SEND_POST) {
- gf_rdma_put_post (&post->device->sendq, post);
- } else {
- gf_rdma_post_recv (post->device->srq, post);
- }
- }
-out:
- return refcount;
-}
-
-
-int
-gf_rdma_post_get_refcount (gf_rdma_post_t *post)
-{
- int refcount = -1;
-
- if (post == NULL) {
- goto out;
- }
-
- pthread_mutex_lock (&post->lock);
- {
- refcount = post->refcount;
- }
- pthread_mutex_unlock (&post->lock);
-
-out:
- return refcount;
-}
-
-gf_rdma_post_t *
-gf_rdma_post_ref (gf_rdma_post_t *post)
-{
- if (post == NULL) {
- goto out;
- }
-
- pthread_mutex_lock (&post->lock);
- {
- post->refcount++;
- }
- pthread_mutex_unlock (&post->lock);
-
-out:
- return post;
-}
-
-
-int32_t
-__gf_rdma_ioq_churn_request (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post)
-{
- gf_rdma_chunktype_t rtype = gf_rdma_noch;
- gf_rdma_chunktype_t wtype = gf_rdma_noch;
- uint64_t send_size = 0;
- gf_rdma_header_t *hdr = NULL;
- struct rpc_msg *rpc_msg = NULL;
- uint32_t *chunkptr = NULL;
- char *buf = NULL;
- int32_t ret = 0;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
- int chunk_count = 0;
- gf_rdma_request_context_t *request_ctx = NULL;
- uint32_t prog_payload_length = 0, len = 0;
- struct rpc_req *rpc_req = NULL;
-
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, entry, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, post, out);
-
- if ((entry->msg.request.rsphdr_count != 0)
- && (entry->msg.request.rsp_payload_count != 0)) {
- ret = -1;
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "both write-chunklist and reply-chunk cannot be "
- "present");
- goto out;
- }
-
- post->ctx.is_request = 1;
- priv = peer->trans->private;
- device = priv->device;
-
- hdr = (gf_rdma_header_t *)post->buf;
-
- send_size = iov_length (entry->rpchdr, entry->rpchdr_count)
- + iov_length (entry->proghdr, entry->proghdr_count)
- + GLUSTERFS_RDMA_MAX_HEADER_SIZE;
-
- if (entry->prog_payload_count != 0) {
- prog_payload_length
- = iov_length (entry->prog_payload,
- entry->prog_payload_count);
- }
-
- if (send_size > GLUSTERFS_RDMA_INLINE_THRESHOLD) {
- rtype = gf_rdma_areadch;
- } else if ((send_size + prog_payload_length)
- < GLUSTERFS_RDMA_INLINE_THRESHOLD) {
- rtype = gf_rdma_noch;
- } else if (entry->prog_payload_count != 0) {
- rtype = gf_rdma_readch;
- }
-
- if (entry->msg.request.rsphdr_count != 0) {
- wtype = gf_rdma_replych;
- } else if (entry->msg.request.rsp_payload_count != 0) {
- wtype = gf_rdma_writech;
- }
-
- if (rtype == gf_rdma_readch) {
- chunk_count += entry->prog_payload_count;
- } else if (rtype == gf_rdma_areadch) {
- chunk_count += entry->rpchdr_count;
- chunk_count += entry->proghdr_count;
- }
-
- if (wtype == gf_rdma_writech) {
- chunk_count += entry->msg.request.rsp_payload_count;
- } else if (wtype == gf_rdma_replych) {
- chunk_count += entry->msg.request.rsphdr_count;
- }
-
- if (chunk_count > GF_RDMA_MAX_SEGMENTS) {
- ret = -1;
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "chunk count(%d) exceeding maximum allowed RDMA "
- "segment count(%d)", chunk_count, GF_RDMA_MAX_SEGMENTS);
- goto out;
- }
-
- if ((wtype != gf_rdma_noch) || (rtype != gf_rdma_noch)) {
- request_ctx = mem_get (device->request_ctx_pool);
- if (request_ctx == NULL) {
- ret = -1;
- goto out;
- }
-
- memset (request_ctx, 0, sizeof (*request_ctx));
-
- request_ctx->pool = device->request_ctx_pool;
- request_ctx->peer = peer;
-
- entry->msg.request.rpc_req->conn_private = request_ctx;
-
- if (entry->msg.request.rsp_iobref != NULL) {
- request_ctx->rsp_iobref
- = iobref_ref (entry->msg.request.rsp_iobref);
- }
- }
-
- rpc_msg = (struct rpc_msg *) entry->rpchdr[0].iov_base;
-
- hdr->rm_xid = rpc_msg->rm_xid; /* no need of hton32(rpc_msg->rm_xid),
- * since rpc_msg->rm_xid is already
- * hton32ed value of actual xid
- */
- hdr->rm_vers = hton32 (GF_RDMA_VERSION);
- hdr->rm_credit = hton32 (peer->send_count);
- if (rtype == gf_rdma_areadch) {
- hdr->rm_type = hton32 (GF_RDMA_NOMSG);
- } else {
- hdr->rm_type = hton32 (GF_RDMA_MSG);
- }
-
- chunkptr = &hdr->rm_body.rm_chunks[0];
- if (rtype != gf_rdma_noch) {
- ret = __gf_rdma_create_read_chunks (peer, entry, rtype,
- &chunkptr,
- request_ctx);
- if (ret != 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "creation of read chunks failed");
- goto out;
- }
- } else {
- *chunkptr++ = 0; /* no read chunks */
- }
-
- if (wtype != gf_rdma_noch) {
- ret = __gf_rdma_create_write_chunks (peer, entry, wtype,
- &chunkptr,
- request_ctx);
- if (ret != 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "creation of write/reply chunk failed");
- goto out;
- }
- } else {
- *chunkptr++ = 0; /* no write chunks */
- *chunkptr++ = 0; /* no reply chunk */
- }
-
- buf = (char *)chunkptr;
-
- if (rtype != gf_rdma_areadch) {
- iov_unload (buf, entry->rpchdr, entry->rpchdr_count);
- buf += iov_length (entry->rpchdr, entry->rpchdr_count);
-
- iov_unload (buf, entry->proghdr, entry->proghdr_count);
- buf += iov_length (entry->proghdr, entry->proghdr_count);
-
- if (rtype != gf_rdma_readch) {
- iov_unload (buf, entry->prog_payload,
- entry->prog_payload_count);
- buf += iov_length (entry->prog_payload,
- entry->prog_payload_count);
- }
- }
-
- len = buf - post->buf;
-
- gf_rdma_post_ref (post);
-
- ret = gf_rdma_post_send (peer->qp, post, len);
- if (!ret) {
- ret = len;
- } else {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "gf_rdma_post_send (to %s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror (ret) : "");
- gf_rdma_post_unref (post);
- __gf_rdma_disconnect (peer->trans);
- ret = -1;
- }
-
-out:
- if (ret == -1) {
- rpc_req = entry->msg.request.rpc_req;
-
- if (request_ctx != NULL) {
- __gf_rdma_request_context_destroy (rpc_req->conn_private);
- }
-
- rpc_req->conn_private = NULL;
- }
-
- return ret;
-}
-
-
-inline void
-__gf_rdma_fill_reply_header (gf_rdma_header_t *header, struct iovec *rpchdr,
- gf_rdma_reply_info_t *reply_info, int credits)
-{
- struct rpc_msg *rpc_msg = NULL;
-
- if (reply_info != NULL) {
- header->rm_xid = hton32 (reply_info->rm_xid);
- } else {
- rpc_msg = rpchdr[0].iov_base; /* assume rpchdr contains
- * only one vector.
- * (which is true)
- */
- header->rm_xid = rpc_msg->rm_xid;
- }
-
- header->rm_type = hton32 (GF_RDMA_MSG);
- header->rm_vers = hton32 (GF_RDMA_VERSION);
- header->rm_credit = hton32 (credits);
-
- header->rm_body.rm_chunks[0] = 0; /* no read chunks */
- header->rm_body.rm_chunks[1] = 0; /* no write chunks */
- header->rm_body.rm_chunks[2] = 0; /* no reply chunks */
-
- return;
-}
-
-
-int32_t
-__gf_rdma_send_reply_inline (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post,
- gf_rdma_reply_info_t *reply_info)
-{
- gf_rdma_header_t *header = NULL;
- int32_t send_size = 0, ret = 0;
- char *buf = NULL;
-
- send_size = iov_length (entry->rpchdr, entry->rpchdr_count)
- + iov_length (entry->proghdr, entry->proghdr_count)
- + iov_length (entry->prog_payload, entry->prog_payload_count)
- + sizeof (gf_rdma_header_t); /*
- * remember, no chunklists in the
- * reply
- */
-
- if (send_size > GLUSTERFS_RDMA_INLINE_THRESHOLD) {
- ret = __gf_rdma_send_error (peer, entry, post, reply_info,
- ERR_CHUNK);
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "msg size (%d) is greater than maximum size "
- "of msg that can be sent inlined (%d)",
- send_size, GLUSTERFS_RDMA_INLINE_THRESHOLD);
- goto out;
- }
-
- header = (gf_rdma_header_t *)post->buf;
-
- __gf_rdma_fill_reply_header (header, entry->rpchdr, reply_info,
- peer->send_count);
-
- buf = (char *)&header->rm_body.rm_chunks[3];
-
- if (entry->rpchdr_count != 0) {
- iov_unload (buf, entry->rpchdr, entry->rpchdr_count);
- buf += iov_length (entry->rpchdr, entry->rpchdr_count);
- }
-
- if (entry->proghdr_count != 0) {
- iov_unload (buf, entry->proghdr, entry->proghdr_count);
- buf += iov_length (entry->proghdr, entry->proghdr_count);
- }
-
- if (entry->prog_payload_count != 0) {
- iov_unload (buf, entry->prog_payload,
- entry->prog_payload_count);
- buf += iov_length (entry->prog_payload,
- entry->prog_payload_count);
- }
-
- gf_rdma_post_ref (post);
-
- ret = gf_rdma_post_send (peer->qp, post, (buf - post->buf));
- if (!ret) {
- ret = send_size;
- } else {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "posting send (to %s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror (ret) : "");
- gf_rdma_post_unref (post);
- __gf_rdma_disconnect (peer->trans);
- ret = -1;
- }
-
-out:
- return ret;
-}
-
-
-int32_t
-__gf_rdma_reply_encode_write_chunks (gf_rdma_peer_t *peer,
- uint32_t payload_size,
- gf_rdma_post_t *post,
- gf_rdma_reply_info_t *reply_info,
- uint32_t **ptr)
-{
- uint32_t chunk_size = 0;
- int32_t ret = -1;
- gf_rdma_write_array_t *target_array = NULL;
- int i = 0;
-
- target_array = (gf_rdma_write_array_t *)*ptr;
-
- for (i = 0; i < reply_info->wc_array->wc_nchunks; i++) {
- chunk_size +=
- reply_info->wc_array->wc_array[i].wc_target.rs_length;
- }
-
- if (chunk_size < payload_size) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "length of payload (%d) is exceeding the total "
- "write chunk length (%d)", payload_size, chunk_size);
- goto out;
- }
-
- target_array->wc_discrim = hton32 (1);
- for (i = 0; (i < reply_info->wc_array->wc_nchunks)
- && (payload_size != 0);
- i++) {
- target_array->wc_array[i].wc_target.rs_offset
- = hton64 (reply_info->wc_array->wc_array[i].wc_target.rs_offset);
-
- target_array->wc_array[i].wc_target.rs_length
- = hton32 (min (payload_size,
- reply_info->wc_array->wc_array[i].wc_target.rs_length));
- }
-
- target_array->wc_nchunks = hton32 (i);
- target_array->wc_array[i].wc_target.rs_handle = 0; /* terminate
- chunklist */
-
- ret = 0;
-
- *ptr = &target_array->wc_array[i].wc_target.rs_length;
-out:
- return ret;
-}
-
-
-inline int32_t
-__gf_rdma_register_local_mr_for_rdma (gf_rdma_peer_t *peer,
- struct iovec *vector, int count,
- gf_rdma_post_context_t *ctx)
-{
- int i = 0;
- int32_t ret = -1;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
-
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, ctx, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, vector, out);
-
- priv = peer->trans->private;
- device = priv->device;
-
- for (i = 0; i < count; i++) {
- /* what if the memory is registered more than once?
- * Assume that a single write buffer is passed to afr, which
- * then passes it to its children. If more than one children
- * happen to use rdma, then the buffer is registered more than
- * once.
- * Ib-verbs specification says that multiple registrations of
- * same memory location is allowed. Refer to 10.6.3.8 of
- * Infiniband Architecture Specification Volume 1
- * (Release 1.2.1)
- */
- ctx->mr[ctx->mr_count] = ibv_reg_mr (device->pd,
- vector[i].iov_base,
- vector[i].iov_len,
- IBV_ACCESS_LOCAL_WRITE);
- if (ctx->mr[ctx->mr_count] == NULL) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "registering memory for IBV_ACCESS_LOCAL_WRITE "
- "failed (%s)", strerror (errno));
- goto out;
- }
-
- ctx->mr_count++;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-/* 1. assumes xfer_len of data is pointed by vector(s) starting from vec[*idx]
- * 2. modifies vec
- */
-int32_t
-__gf_rdma_write (gf_rdma_peer_t *peer, gf_rdma_post_t *post, struct iovec *vec,
- uint32_t xfer_len, int *idx, gf_rdma_write_chunk_t *writech)
-{
- int size = 0, num_sge = 0, i = 0;
- int32_t ret = -1;
- struct ibv_sge *sg_list = NULL;
- struct ibv_send_wr wr = {
- .opcode = IBV_WR_RDMA_WRITE,
- .send_flags = IBV_SEND_SIGNALED,
- }, *bad_wr;
-
- if ((peer == NULL) || (writech == NULL) || (idx == NULL)
- || (post == NULL) || (vec == NULL) || (xfer_len == 0)) {
- goto out;
- }
-
- for (i = *idx; size < xfer_len; i++) {
- size += vec[i].iov_len;
- }
-
- num_sge = i - *idx;
-
- sg_list = GF_CALLOC (num_sge, sizeof (struct ibv_sge),
- gf_common_mt_sge);
- if (sg_list == NULL) {
- ret = -1;
- goto out;
- }
-
- for ((i = *idx), (num_sge = 0); (xfer_len != 0); i++, num_sge++) {
- size = min (xfer_len, vec[i].iov_len);
-
- sg_list [num_sge].addr = (unsigned long)vec[i].iov_base;
- sg_list [num_sge].length = size;
- sg_list [num_sge].lkey = post->ctx.mr[i]->lkey;
-
- xfer_len -= size;
- }
-
- *idx = i;
-
- if (size < vec[i - 1].iov_len) {
- vec[i - 1].iov_base += size;
- vec[i - 1].iov_len -= size;
- *idx = i - 1;
- }
-
- wr.sg_list = sg_list;
- wr.num_sge = num_sge;
- wr.wr_id = (unsigned long) gf_rdma_post_ref (post);
- wr.wr.rdma.rkey = writech->wc_target.rs_handle;
- wr.wr.rdma.remote_addr = writech->wc_target.rs_offset;
-
- ret = ibv_post_send(peer->qp, &wr, &bad_wr);
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "rdma write to "
- "client (%s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror (ret) : "");
- ret = -1;
- }
-
- GF_FREE (sg_list);
-out:
- return ret;
-}
-
-
-int32_t
-__gf_rdma_do_gf_rdma_write (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- struct iovec *vector, int count,
- struct iobref *iobref,
- gf_rdma_reply_info_t *reply_info)
-{
- int i = 0, payload_idx = 0;
- uint32_t payload_size = 0, xfer_len = 0;
- int32_t ret = -1;
-
- if (count != 0) {
- payload_size = iov_length (vector, count);
- }
-
- if (payload_size == 0) {
- ret = 0;
- goto out;
- }
-
- ret = __gf_rdma_register_local_mr_for_rdma (peer, vector, count,
- &post->ctx);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "registering memory region for rdma failed");
- goto out;
- }
-
- post->ctx.iobref = iobref_ref (iobref);
-
- for (i = 0; (i < reply_info->wc_array->wc_nchunks)
- && (payload_size != 0);
- i++) {
- xfer_len = min (payload_size,
- reply_info->wc_array->wc_array[i].wc_target.rs_length);
-
- ret = __gf_rdma_write (peer, post, vector, xfer_len,
- &payload_idx,
- &reply_info->wc_array->wc_array[i]);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "rdma write to client (%s) failed",
- peer->trans->peerinfo.identifier);
- goto out;
- }
-
- payload_size -= xfer_len;
- }
-
- ret = 0;
-out:
-
- return ret;
-}
-
-
-int32_t
-__gf_rdma_send_reply_type_nomsg (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post,
- gf_rdma_reply_info_t *reply_info)
-{
- gf_rdma_header_t *header = NULL;
- char *buf = NULL;
- uint32_t payload_size = 0;
- int count = 0, i = 0;
- int32_t ret = 0;
- struct iovec vector[MAX_IOVEC];
-
- header = (gf_rdma_header_t *)post->buf;
-
- __gf_rdma_fill_reply_header (header, entry->rpchdr, reply_info,
- peer->send_count);
-
- header->rm_type = hton32 (GF_RDMA_NOMSG);
-
- payload_size = iov_length (entry->rpchdr, entry->rpchdr_count) +
- iov_length (entry->proghdr, entry->proghdr_count);
-
- /* encode reply chunklist */
- buf = (char *)&header->rm_body.rm_chunks[2];
- ret = __gf_rdma_reply_encode_write_chunks (peer, payload_size, post,
- reply_info,
- (uint32_t **)&buf);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "encoding write chunks failed");
- ret = __gf_rdma_send_error (peer, entry, post, reply_info,
- ERR_CHUNK);
- goto out;
- }
-
- gf_rdma_post_ref (post);
-
- for (i = 0; i < entry->rpchdr_count; i++) {
- vector[count++] = entry->rpchdr[i];
- }
-
- for (i = 0; i < entry->proghdr_count; i++) {
- vector[count++] = entry->proghdr[i];
- }
-
- ret = __gf_rdma_do_gf_rdma_write (peer, post, vector, count,
- entry->iobref, reply_info);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "rdma write to peer (%s) failed",
- peer->trans->peerinfo.identifier);
- gf_rdma_post_unref (post);
- goto out;
- }
-
- ret = gf_rdma_post_send (peer->qp, post, (buf - post->buf));
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "posting a send request to client (%s) failed with "
- "ret = %d (%s)", peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror (ret) : "");
- ret = -1;
- gf_rdma_post_unref (post);
- } else {
- ret = payload_size;
- }
-
-out:
- return ret;
-}
-
-
-int32_t
-__gf_rdma_send_reply_type_msg (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post,
- gf_rdma_reply_info_t *reply_info)
-{
- gf_rdma_header_t *header = NULL;
- int32_t send_size = 0, ret = 0;
- char *ptr = NULL;
- uint32_t payload_size = 0;
-
- send_size = iov_length (entry->rpchdr, entry->rpchdr_count)
- + iov_length (entry->proghdr, entry->proghdr_count)
- + GLUSTERFS_RDMA_MAX_HEADER_SIZE;
-
- if (send_size > GLUSTERFS_RDMA_INLINE_THRESHOLD) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "client has provided only write chunks, but the "
- "combined size of rpc and program header (%d) is "
- "exceeding the size of msg that can be sent using "
- "RDMA send (%d)", send_size,
- GLUSTERFS_RDMA_INLINE_THRESHOLD);
-
- ret = __gf_rdma_send_error (peer, entry, post, reply_info,
- ERR_CHUNK);
- goto out;
- }
-
- header = (gf_rdma_header_t *)post->buf;
-
- __gf_rdma_fill_reply_header (header, entry->rpchdr, reply_info,
- peer->send_count);
-
- payload_size = iov_length (entry->prog_payload,
- entry->prog_payload_count);
- ptr = (char *)&header->rm_body.rm_chunks[1];
-
- ret = __gf_rdma_reply_encode_write_chunks (peer, payload_size, post,
- reply_info,
- (uint32_t **)&ptr);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "encoding write chunks failed");
- ret = __gf_rdma_send_error (peer, entry, post, reply_info,
- ERR_CHUNK);
- goto out;
- }
-
- *(uint32_t *)ptr = 0; /* terminate reply chunklist */
- ptr += sizeof (uint32_t);
-
- gf_rdma_post_ref (post);
-
- ret = __gf_rdma_do_gf_rdma_write (peer, post, entry->prog_payload,
- entry->prog_payload_count,
- entry->iobref, reply_info);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING, "rdma write to peer "
- "(%s) failed", peer->trans->peerinfo.identifier);
- gf_rdma_post_unref (post);
- goto out;
- }
-
- iov_unload (ptr, entry->rpchdr, entry->rpchdr_count);
- ptr += iov_length (entry->rpchdr, entry->rpchdr_count);
-
- iov_unload (ptr, entry->proghdr, entry->proghdr_count);
- ptr += iov_length (entry->proghdr, entry->proghdr_count);
-
- ret = gf_rdma_post_send (peer->qp, post, (ptr - post->buf));
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "rdma send to client (%s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier, ret,
- (ret > 0) ? strerror (ret) : "");
- gf_rdma_post_unref (post);
- ret = -1;
- } else {
- ret = send_size + payload_size;
- }
-
-out:
- return ret;
-}
-
-
-void
-gf_rdma_reply_info_destroy (gf_rdma_reply_info_t *reply_info)
-{
- if (reply_info == NULL) {
- goto out;
- }
-
- if (reply_info->wc_array != NULL) {
- GF_FREE (reply_info->wc_array);
- reply_info->wc_array = NULL;
- }
-
- mem_put (reply_info);
-out:
- return;
-}
-
-
-gf_rdma_reply_info_t *
-gf_rdma_reply_info_alloc (gf_rdma_peer_t *peer)
-{
- gf_rdma_reply_info_t *reply_info = NULL;
- gf_rdma_private_t *priv = NULL;
-
- priv = peer->trans->private;
-
- reply_info = mem_get (priv->device->reply_info_pool);
- if (reply_info == NULL) {
- goto out;
- }
-
- memset (reply_info, 0, sizeof (*reply_info));
- reply_info->pool = priv->device->reply_info_pool;
-
-out:
- return reply_info;
-}
-
-
-int32_t
-__gf_rdma_ioq_churn_reply (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry,
- gf_rdma_post_t *post)
-{
- gf_rdma_reply_info_t *reply_info = NULL;
- int32_t ret = -1;
- gf_rdma_chunktype_t type = gf_rdma_noch;
-
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, peer, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, entry, out);
- GF_VALIDATE_OR_GOTO (GF_RDMA_LOG_NAME, post, out);
-
- reply_info = entry->msg.reply_info;
- if (reply_info != NULL) {
- type = reply_info->type;
- }
-
- switch (type) {
- case gf_rdma_noch:
- ret = __gf_rdma_send_reply_inline (peer, entry, post,
- reply_info);
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "failed to send reply to peer (%s) as an "
- "inlined rdma msg",
- peer->trans->peerinfo.identifier);
- }
- break;
-
- case gf_rdma_replych:
- ret = __gf_rdma_send_reply_type_nomsg (peer, entry, post,
- reply_info);
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "failed to send reply to peer (%s) as "
- "RDMA_NOMSG", peer->trans->peerinfo.identifier);
- }
- break;
-
- case gf_rdma_writech:
- ret = __gf_rdma_send_reply_type_msg (peer, entry, post,
- reply_info);
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "failed to send reply with write chunks "
- "to peer (%s)",
- peer->trans->peerinfo.identifier);
- }
- break;
-
- default:
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "invalid chunktype (%d) specified for sending reply "
- " (peer:%s)", type, peer->trans->peerinfo.identifier);
- break;
- }
-
- if (reply_info != NULL) {
- gf_rdma_reply_info_destroy (reply_info);
- }
-out:
- return ret;
-}
-
-
-int32_t
-__gf_rdma_ioq_churn_entry (gf_rdma_peer_t *peer, gf_rdma_ioq_t *entry)
-{
- int32_t ret = 0, quota = 0;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
- gf_rdma_options_t *options = NULL;
- gf_rdma_post_t *post = NULL;
-
- priv = peer->trans->private;
- options = &priv->options;
- device = priv->device;
-
- quota = __gf_rdma_quota_get (peer);
- if (quota > 0) {
- post = gf_rdma_get_post (&device->sendq);
- if (post == NULL) {
- post = gf_rdma_new_post (device,
- (options->send_size + 2048),
- GF_RDMA_SEND_POST);
- }
-
- if (post == NULL) {
- ret = -1;
- gf_log_callingfn (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "not able to get a post to send msg");
- goto out;
- }
-
- if (entry->is_request) {
- ret = __gf_rdma_ioq_churn_request (peer, entry, post);
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "failed to process request ioq entry "
- "to peer(%s)",
- peer->trans->peerinfo.identifier);
- }
- } else {
- ret = __gf_rdma_ioq_churn_reply (peer, entry, post);
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "failed to process reply ioq entry "
- "to peer (%s)",
- peer->trans->peerinfo.identifier);
- }
- }
-
- if (ret != 0) {
- __gf_rdma_ioq_entry_free (entry);
- }
- } else {
- ret = 0;
- }
-
-out:
- return ret;
-}
-
-
-static int32_t
-__gf_rdma_ioq_churn (gf_rdma_peer_t *peer)
-{
- gf_rdma_ioq_t *entry = NULL;
- int32_t ret = 0;
-
- while (!list_empty (&peer->ioq))
- {
- /* pick next entry */
- entry = peer->ioq_next;
-
- ret = __gf_rdma_ioq_churn_entry (peer, entry);
-
- if (ret <= 0)
- break;
- }
-
- /*
- list_for_each_entry_safe (entry, dummy, &peer->ioq, list) {
- ret = __gf_rdma_ioq_churn_entry (peer, entry);
- if (ret <= 0) {
- break;
- }
- }
- */
-
- return ret;
-}
-
-
-static int32_t
-gf_rdma_writev (rpc_transport_t *this, gf_rdma_ioq_t *entry)
-{
- int32_t ret = 0, need_append = 1;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_peer_t *peer = NULL;
-
- priv = this->private;
- pthread_mutex_lock (&priv->write_mutex);
- {
- if (!priv->connected) {
- gf_log (this->name, GF_LOG_WARNING,
- "rdma is not connected to peer (%s)",
- this->peerinfo.identifier);
- ret = -1;
- goto unlock;
- }
-
- peer = &priv->peer;
- if (list_empty (&peer->ioq)) {
- ret = __gf_rdma_ioq_churn_entry (peer, entry);
- if (ret != 0) {
- need_append = 0;
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "processing ioq entry destined "
- "to (%s) failed",
- this->peerinfo.identifier);
- }
- }
- }
-
- if (need_append) {
- list_add_tail (&entry->list, &peer->ioq);
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
- return ret;
-}
-
-
-gf_rdma_ioq_t *
-gf_rdma_ioq_new (rpc_transport_t *this, rpc_transport_data_t *data)
-{
- gf_rdma_ioq_t *entry = NULL;
- int count = 0, i = 0;
- rpc_transport_msg_t *msg = NULL;
- gf_rdma_private_t *priv = NULL;
-
- if ((data == NULL) || (this == NULL)) {
- goto out;
- }
-
- priv = this->private;
-
- entry = mem_get (priv->device->ioq_pool);
- if (entry == NULL) {
- goto out;
- }
- memset (entry, 0, sizeof (*entry));
- entry->pool = priv->device->ioq_pool;
-
- if (data->is_request) {
- msg = &data->data.req.msg;
- if (data->data.req.rsp.rsphdr_count != 0) {
- for (i = 0; i < data->data.req.rsp.rsphdr_count; i++) {
- entry->msg.request.rsphdr_vec[i]
- = data->data.req.rsp.rsphdr[i];
- }
-
- entry->msg.request.rsphdr_count =
- data->data.req.rsp.rsphdr_count;
- }
-
- if (data->data.req.rsp.rsp_payload_count != 0) {
- for (i = 0; i < data->data.req.rsp.rsp_payload_count;
- i++) {
- entry->msg.request.rsp_payload[i]
- = data->data.req.rsp.rsp_payload[i];
- }
-
- entry->msg.request.rsp_payload_count =
- data->data.req.rsp.rsp_payload_count;
- }
-
- entry->msg.request.rpc_req = data->data.req.rpc_req;
-
- if (data->data.req.rsp.rsp_iobref != NULL) {
- entry->msg.request.rsp_iobref
- = iobref_ref (data->data.req.rsp.rsp_iobref);
- }
- } else {
- msg = &data->data.reply.msg;
- entry->msg.reply_info = data->data.reply.private;
- }
-
- entry->is_request = data->is_request;
-
- count = msg->rpchdrcount + msg->proghdrcount + msg->progpayloadcount;
-
- GF_ASSERT (count <= MAX_IOVEC);
-
- if (msg->rpchdr != NULL) {
- memcpy (&entry->rpchdr[0], msg->rpchdr,
- sizeof (struct iovec) * msg->rpchdrcount);
- entry->rpchdr_count = msg->rpchdrcount;
- }
-
- if (msg->proghdr != NULL) {
- memcpy (&entry->proghdr[0], msg->proghdr,
- sizeof (struct iovec) * msg->proghdrcount);
- entry->proghdr_count = msg->proghdrcount;
- }
-
- if (msg->progpayload != NULL) {
- memcpy (&entry->prog_payload[0], msg->progpayload,
- sizeof (struct iovec) * msg->progpayloadcount);
- entry->prog_payload_count = msg->progpayloadcount;
- }
-
- if (msg->iobref != NULL) {
- entry->iobref = iobref_ref (msg->iobref);
- }
-
- INIT_LIST_HEAD (&entry->list);
-
-out:
- return entry;
-}
-
-
-int32_t
-gf_rdma_submit_request (rpc_transport_t *this, rpc_transport_req_t *req)
-{
- int32_t ret = 0;
- gf_rdma_ioq_t *entry = NULL;
- rpc_transport_data_t data = {0, };
-
- if (req == NULL) {
- goto out;
- }
-
- data.is_request = 1;
- data.data.req = *req;
-
- entry = gf_rdma_ioq_new (this, &data);
- if (entry == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "getting a new ioq entry failed (peer:%s)",
- this->peerinfo.identifier);
- goto out;
- }
-
- ret = gf_rdma_writev (this, entry);
-
- if (ret > 0) {
- ret = 0;
- } else if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "sending request to peer (%s) failed",
- this->peerinfo.identifier);
- rpc_transport_disconnect (this);
- }
-
-out:
- return ret;
-}
-
-int32_t
-gf_rdma_submit_reply (rpc_transport_t *this, rpc_transport_reply_t *reply)
-{
- int32_t ret = 0;
- gf_rdma_ioq_t *entry = NULL;
- rpc_transport_data_t data = {0, };
-
- if (reply == NULL) {
- goto out;
- }
-
- data.data.reply = *reply;
-
- entry = gf_rdma_ioq_new (this, &data);
- if (entry == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "getting a new ioq entry failed (peer:%s)",
- this->peerinfo.identifier);
- goto out;
- }
-
- ret = gf_rdma_writev (this, entry);
- if (ret > 0) {
- ret = 0;
- } else if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "sending request to peer (%s) failed",
- this->peerinfo.identifier);
- rpc_transport_disconnect (this);
- }
-
-out:
- return ret;
-}
-
-#if 0
-static int
-gf_rdma_receive (rpc_transport_t *this, char **hdr_p, size_t *hdrlen_p,
- struct iobuf **iobuf_p)
-{
- gf_rdma_private_t *priv = this->private;
- /* TODO: return error if !priv->connected, check with locks */
- /* TODO: boundry checks for data_ptr/offset */
- char *copy_from = NULL;
- gf_rdma_header_t *header = NULL;
- uint32_t size1, size2, data_len = 0;
- char *hdr = NULL;
- struct iobuf *iobuf = NULL;
- int32_t ret = 0;
-
- pthread_mutex_lock (&priv->recv_mutex);
- {
-/*
- while (!priv->data_ptr)
- pthread_cond_wait (&priv->recv_cond, &priv->recv_mutex);
-*/
-
- copy_from = priv->data_ptr + priv->data_offset;
-
- priv->data_ptr = NULL;
- data_len = priv->data_len;
- pthread_cond_broadcast (&priv->recv_cond);
- }
- pthread_mutex_unlock (&priv->recv_mutex);
-
- header = (gf_rdma_header_t *)copy_from;
- if (strcmp (header->colonO, ":O")) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "%s: corrupt header received", this->name);
- ret = -1;
- goto err;
- }
-
- size1 = ntoh32 (header->size1);
- size2 = ntoh32 (header->size2);
-
- if (data_len != (size1 + size2 + sizeof (*header))) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "%s: sizeof data read from transport is not equal "
- "to the size specified in the header",
- this->name);
- ret = -1;
- goto err;
- }
-
- copy_from += sizeof (*header);
-
- if (size1) {
- hdr = GF_CALLOC (1, size1, gf_common_mt_char);
- if (!hdr) {
- gf_log (this->name, GF_LOG_ERROR,
- "unable to allocate header for peer %s",
- this->peerinfo.identifier);
- ret = -ENOMEM;
- goto err;
- }
- memcpy (hdr, copy_from, size1);
- copy_from += size1;
- *hdr_p = hdr;
- }
- *hdrlen_p = size1;
-
- if (size2) {
- iobuf = iobuf_get2 (this->ctx->iobuf_pool, size2);
- if (!iobuf) {
- gf_log (this->name, GF_LOG_ERROR,
- "unable to allocate IO buffer for peer %s",
- this->peerinfo.identifier);
- ret = -ENOMEM;
- goto err;
- }
- memcpy (iobuf->ptr, copy_from, size2);
- *iobuf_p = iobuf;
- }
-
-err:
- return ret;
-}
-#endif
-
-
-static void
-gf_rdma_destroy_cq (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
-
- priv = this->private;
- device = priv->device;
-
- if (device->recv_cq)
- ibv_destroy_cq (device->recv_cq);
- device->recv_cq = NULL;
-
- if (device->send_cq)
- ibv_destroy_cq (device->send_cq);
- device->send_cq = NULL;
-
- return;
-}
-
-
-static int32_t
-gf_rdma_create_cq (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- gf_rdma_device_t *device = NULL;
- uint64_t send_cqe = 0;
- int32_t ret = 0;
- struct ibv_device_attr device_attr = {{0}, };
-
- priv = this->private;
- options = &priv->options;
- device = priv->device;
-
- device->recv_cq = ibv_create_cq (priv->device->context,
- options->recv_count * 2,
- device,
- device->recv_chan,
- 0);
- if (!device->recv_cq) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: creation of CQ for device %s failed",
- this->name, device->device_name);
- ret = -1;
- goto out;
- } else if (ibv_req_notify_cq (device->recv_cq, 0)) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: ibv_req_notify_cq on recv CQ of device %s failed",
- this->name, device->device_name);
- ret = -1;
- goto out;
- }
-
- do {
- ret = ibv_query_device (priv->device->context, &device_attr);
- if (ret != 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: ibv_query_device on %s returned %d (%s)",
- this->name, priv->device->device_name, ret,
- (ret > 0) ? strerror (ret) : "");
- ret = -1;
- goto out;
- }
-
- send_cqe = options->send_count * 128;
- send_cqe = (send_cqe > device_attr.max_cqe)
- ? device_attr.max_cqe : send_cqe;
-
- /* TODO: make send_cq size dynamically adaptive */
- device->send_cq = ibv_create_cq (priv->device->context,
- send_cqe, device,
- device->send_chan, 0);
- if (!device->send_cq) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: creation of send_cq for device %s failed",
- this->name, device->device_name);
- ret = -1;
- goto out;
- }
-
- if (ibv_req_notify_cq (device->send_cq, 0)) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: ibv_req_notify_cq on send_cq for device %s"
- " failed", this->name, device->device_name);
- ret = -1;
- goto out;
- }
- } while (0);
-
-out:
- if (ret != 0)
- gf_rdma_destroy_cq (this);
-
- return ret;
-}
-
-
-static int
-gf_rdma_register_peer (gf_rdma_device_t *device, int32_t qp_num,
- gf_rdma_peer_t *peer)
-{
- struct _qpent *ent = NULL;
- gf_rdma_qpreg_t *qpreg = NULL;
- int32_t hash = 0;
- int ret = -1;
-
- qpreg = &device->qpreg;
- hash = qp_num % 42;
-
- pthread_mutex_lock (&qpreg->lock);
- {
- ent = qpreg->ents[hash].next;
- while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num)) {
- ent = ent->next;
- }
-
- if (ent->qp_num == qp_num) {
- ret = 0;
- goto unlock;
- }
-
- ent = (struct _qpent *) GF_CALLOC (1, sizeof (*ent),
- gf_common_mt_qpent);
- if (ent == NULL) {
- goto unlock;
- }
-
- /* TODO: ref reg->peer */
- ent->peer = peer;
- ent->next = &qpreg->ents[hash];
- ent->prev = ent->next->prev;
- ent->next->prev = ent;
- ent->prev->next = ent;
- ent->qp_num = qp_num;
- qpreg->count++;
- ret = 0;
- }
-unlock:
- pthread_mutex_unlock (&qpreg->lock);
-
- return ret;
-}
-
-
-static void
-gf_rdma_unregister_peer (gf_rdma_device_t *device, int32_t qp_num)
-{
- struct _qpent *ent = NULL;
- gf_rdma_qpreg_t *qpreg = NULL;
- int32_t hash = 0;
-
- qpreg = &device->qpreg;
- hash = qp_num % 42;
-
- pthread_mutex_lock (&qpreg->lock);
- {
- ent = qpreg->ents[hash].next;
- while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num))
- ent = ent->next;
- if (ent->qp_num != qp_num) {
- pthread_mutex_unlock (&qpreg->lock);
- return;
- }
- ent->prev->next = ent->next;
- ent->next->prev = ent->prev;
- /* TODO: unref reg->peer */
- GF_FREE (ent);
- qpreg->count--;
- }
- pthread_mutex_unlock (&qpreg->lock);
-}
-
-
-static gf_rdma_peer_t *
-__gf_rdma_lookup_peer (gf_rdma_device_t *device, int32_t qp_num)
-{
- struct _qpent *ent = NULL;
- gf_rdma_peer_t *peer = NULL;
- gf_rdma_qpreg_t *qpreg = NULL;
- int32_t hash = 0;
-
- qpreg = &device->qpreg;
- hash = qp_num % 42;
- ent = qpreg->ents[hash].next;
- while ((ent != &qpreg->ents[hash]) && (ent->qp_num != qp_num))
- ent = ent->next;
-
- if (ent != &qpreg->ents[hash]) {
- peer = ent->peer;
- }
-
- return peer;
-}
-
-/*
- static gf_rdma_peer_t *
- gf_rdma_lookup_peer (gf_rdma_device_t *device,
- int32_t qp_num)
- {
- gf_rdma_qpreg_t *qpreg = NULL;
- gf_rdma_peer_t *peer = NULL;
-
- qpreg = &device->qpreg;
- pthread_mutex_lock (&qpreg->lock);
- {
- peer = __gf_rdma_lookup_peer (device, qp_num);
- }
- pthread_mutex_unlock (&qpreg->lock);
-
- return peer;
- }
-*/
-
-
-static void
-__gf_rdma_destroy_qp (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
-
- priv = this->private;
- if (priv->peer.qp) {
- gf_rdma_unregister_peer (priv->device, priv->peer.qp->qp_num);
- ibv_destroy_qp (priv->peer.qp);
- }
- priv->peer.qp = NULL;
-
- return;
-}
-
-
-static int32_t
-gf_rdma_create_qp (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- gf_rdma_device_t *device = NULL;
- int32_t ret = 0;
- gf_rdma_peer_t *peer = NULL;
-
- priv = this->private;
- options = &priv->options;
- device = priv->device;
-
- peer = &priv->peer;
-
- struct ibv_qp_init_attr init_attr = {
- .send_cq = device->send_cq,
- .recv_cq = device->recv_cq,
- .srq = device->srq,
- .cap = {
- .max_send_wr = peer->send_count,
- .max_recv_wr = peer->recv_count,
- .max_send_sge = 2,
- .max_recv_sge = 1
- },
- .qp_type = IBV_QPT_RC
- };
-
- struct ibv_qp_attr attr = {
- .qp_state = IBV_QPS_INIT,
- .pkey_index = 0,
- .port_num = options->port,
- .qp_access_flags
- = IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE
- };
-
- peer->qp = ibv_create_qp (device->pd, &init_attr);
- if (!peer->qp) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_CRITICAL,
- "%s: could not create QP",
- this->name);
- ret = -1;
- goto out;
- } else if (ibv_modify_qp (peer->qp, &attr,
- IBV_QP_STATE |
- IBV_QP_PKEY_INDEX |
- IBV_QP_PORT |
- IBV_QP_ACCESS_FLAGS)) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_ERROR,
- "%s: failed to modify QP to INIT state",
- this->name);
- ret = -1;
- goto out;
- }
-
- peer->local_lid = gf_rdma_get_local_lid (device->context,
- options->port);
- peer->local_qpn = peer->qp->qp_num;
- peer->local_psn = lrand48 () & 0xffffff;
-
- ret = gf_rdma_register_peer (device, peer->qp->qp_num, peer);
-
-out:
- if (ret == -1)
- __gf_rdma_destroy_qp (this);
-
- return ret;
-}
-
-
-static void
-gf_rdma_destroy_posts (rpc_transport_t *this)
-{
-
-}
-
-
-static int32_t
-__gf_rdma_create_posts (rpc_transport_t *this, int32_t count, int32_t size,
- gf_rdma_queue_t *q, gf_rdma_post_type_t type)
-{
- int32_t i = 0;
- int32_t ret = 0;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_device_t *device = NULL;
-
- priv = this->private;
- device = priv->device;
-
- for (i=0 ; i<count ; i++) {
- gf_rdma_post_t *post = NULL;
-
- post = gf_rdma_new_post (device, size + 2048, type);
- if (!post) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: post creation failed",
- this->name);
- ret = -1;
- break;
- }
-
- gf_rdma_put_post (q, post);
- }
- return ret;
-}
-
-
-static int32_t
-gf_rdma_create_posts (rpc_transport_t *this)
-{
- int32_t i = 0, ret = 0;
- gf_rdma_post_t *post = NULL;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- gf_rdma_device_t *device = NULL;
-
- priv = this->private;
- options = &priv->options;
- device = priv->device;
-
- ret = __gf_rdma_create_posts (this, options->send_count,
- options->send_size,
- &device->sendq, GF_RDMA_SEND_POST);
- if (!ret)
- ret = __gf_rdma_create_posts (this, options->recv_count,
- options->recv_size,
- &device->recvq,
- GF_RDMA_RECV_POST);
-
- if (!ret) {
- for (i=0 ; i<options->recv_count ; i++) {
- post = gf_rdma_get_post (&device->recvq);
- if (gf_rdma_post_recv (device->srq, post) != 0) {
- ret = -1;
- break;
- }
- }
- }
-
- if (ret)
- gf_rdma_destroy_posts (this);
-
- return ret;
-}
-
-
-static int32_t
-gf_rdma_connect_qp (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = this->private;
- gf_rdma_options_t *options = &priv->options;
- struct ibv_qp_attr attr = {
- .qp_state = IBV_QPS_RTR,
- .path_mtu = options->mtu,
- .dest_qp_num = priv->peer.remote_qpn,
- .rq_psn = priv->peer.remote_psn,
- .max_dest_rd_atomic = 1,
- .min_rnr_timer = 12,
- .qp_access_flags
- = IBV_ACCESS_REMOTE_READ | IBV_ACCESS_REMOTE_WRITE,
- .ah_attr = {
- .is_global = 0,
- .dlid = priv->peer.remote_lid,
- .sl = 0,
- .src_path_bits = 0,
- .port_num = options->port
- }
- };
- if (ibv_modify_qp (priv->peer.qp, &attr,
- IBV_QP_STATE |
- IBV_QP_AV |
- IBV_QP_PATH_MTU |
- IBV_QP_DEST_QPN |
- IBV_QP_RQ_PSN |
- IBV_QP_MAX_DEST_RD_ATOMIC |
- IBV_QP_MIN_RNR_TIMER)) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_CRITICAL,
- "Failed to modify QP to RTR\n");
- return -1;
- }
-
- attr.qp_state = IBV_QPS_RTS;
- attr.timeout = options->attr_timeout;
- attr.retry_cnt = options->attr_retry_cnt;
- attr.rnr_retry = options->attr_rnr_retry;
- attr.sq_psn = priv->peer.local_psn;
- attr.max_rd_atomic = 1;
- if (ibv_modify_qp (priv->peer.qp, &attr,
- IBV_QP_STATE |
- IBV_QP_TIMEOUT |
- IBV_QP_RETRY_CNT |
- IBV_QP_RNR_RETRY |
- IBV_QP_SQ_PSN |
- IBV_QP_MAX_QP_RD_ATOMIC)) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_CRITICAL,
- "Failed to modify QP to RTS\n");
- return -1;
- }
-
- return 0;
-}
-
-static int32_t
-__gf_rdma_teardown (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
-
- priv = this->private;
- __gf_rdma_destroy_qp (this);
-
- if (!list_empty (&priv->peer.ioq)) {
- __gf_rdma_ioq_flush (&priv->peer);
- }
-
- /* TODO: decrement cq size */
- return 0;
-}
-
-/*
- * return value:
- * 0 = success (completed)
- * -1 = error
- * > 0 = incomplete
- */
-
-static int
-__tcp_rwv (rpc_transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count,
- int write)
-{
- gf_rdma_private_t *priv = NULL;
- int sock = -1;
- int ret = -1;
- struct iovec *opvector = NULL;
- int opcount = 0;
- int moved = 0;
-
- priv = this->private;
- sock = priv->sock;
- opvector = vector;
- opcount = count;
-
- while (opcount)
- {
- if (write)
- {
- ret = writev (sock, opvector, opcount);
-
- if (ret == 0 || (ret == -1 && errno == EAGAIN))
- {
- /* done for now */
- break;
- }
- }
- else
- {
- ret = readv (sock, opvector, opcount);
-
- if (ret == -1 && errno == EAGAIN)
- {
- /* done for now */
- break;
- }
- }
-
- if (ret == 0)
- {
- gf_log (this->name, GF_LOG_DEBUG,
- "EOF from peer %s", this->peerinfo.identifier);
- opcount = -1;
- errno = ENOTCONN;
- break;
- }
-
- if (ret == -1)
- {
- if (errno == EINTR)
- continue;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%s failed (%s)", write ? "writev" : "readv",
- strerror (errno));
- if (write && !priv->connected &&
- (errno == ECONNREFUSED))
- gf_log (this->name, GF_LOG_ERROR,
- "possible mismatch of 'rpc-transport-type'"
- " in protocol server and client. "
- "check volume file");
- opcount = -1;
- break;
- }
-
- moved = 0;
-
- while (moved < ret)
- {
- if ((ret - moved) >= opvector[0].iov_len)
- {
- moved += opvector[0].iov_len;
- opvector++;
- opcount--;
- }
- else
- {
- opvector[0].iov_len -= (ret - moved);
- opvector[0].iov_base += (ret - moved);
- moved += (ret - moved);
- }
- while (opcount && !opvector[0].iov_len)
- {
- opvector++;
- opcount--;
- }
- }
- }
-
- if (pending_vector)
- *pending_vector = opvector;
-
- if (pending_count)
- *pending_count = opcount;
-
- return opcount;
-}
-
-
-static int
-__tcp_readv (rpc_transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count)
-{
- int ret = -1;
-
- ret = __tcp_rwv (this, vector, count,
- pending_vector, pending_count, 0);
-
- return ret;
-}
-
-
-static int
-__tcp_writev (rpc_transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count)
-{
- int ret = -1;
- gf_rdma_private_t *priv = NULL;
-
- priv = this->private;
-
- ret = __tcp_rwv (this, vector, count, pending_vector,
- pending_count, 1);
-
- if (ret > 0) {
- /* TODO: Avoid multiple calls when socket is already
- registered for POLLOUT */
- priv->idx = event_select_on (this->ctx->event_pool,
- priv->sock, priv->idx, -1, 1);
- } else if (ret == 0) {
- priv->idx = event_select_on (this->ctx->event_pool,
- priv->sock,
- priv->idx, -1, 0);
- }
-
- return ret;
-}
-
-
-/*
- * allocates new memory to hold write-chunklist. New memory is needed since
- * write-chunklist will be used while sending reply and the post holding initial
- * write-chunklist sent from client will be put back to srq before a pollin
- * event is sent to upper layers.
- */
-int32_t
-gf_rdma_get_write_chunklist (char **ptr, gf_rdma_write_array_t **write_ary)
-{
- gf_rdma_write_array_t *from = NULL, *to = NULL;
- int32_t ret = -1, size = 0, i = 0;
-
- from = (gf_rdma_write_array_t *) *ptr;
- if (from->wc_discrim == 0) {
- ret = 0;
- goto out;
- }
-
- from->wc_nchunks = ntoh32 (from->wc_nchunks);
-
- size = sizeof (*from)
- + (sizeof (gf_rdma_write_chunk_t) * from->wc_nchunks);
-
- to = GF_CALLOC (1, size, gf_common_mt_char);
- if (to == NULL) {
- ret = -1;
- goto out;
- }
-
- to->wc_discrim = ntoh32 (from->wc_discrim);
- to->wc_nchunks = from->wc_nchunks;
-
- for (i = 0; i < to->wc_nchunks; i++) {
- to->wc_array[i].wc_target.rs_handle
- = ntoh32 (from->wc_array[i].wc_target.rs_handle);
- to->wc_array[i].wc_target.rs_length
- = ntoh32 (from->wc_array[i].wc_target.rs_length);
- to->wc_array[i].wc_target.rs_offset
- = ntoh64 (from->wc_array[i].wc_target.rs_offset);
- }
-
- *write_ary = to;
- ret = 0;
- *ptr = (char *)&from->wc_array[i].wc_target.rs_handle;
-out:
- return ret;
-}
-
-
-/*
- * does not allocate new memory to hold read-chunklist. New memory is not
- * needed, since post is not put back to srq till we've completed all the
- * rdma-reads and hence readchunk-list can point to memory held by post.
- */
-int32_t
-gf_rdma_get_read_chunklist (char **ptr, gf_rdma_read_chunk_t **readch)
-{
- int32_t ret = -1;
- gf_rdma_read_chunk_t *chunk = NULL;
- int i = 0;
-
- chunk = (gf_rdma_read_chunk_t *)*ptr;
- if (chunk[0].rc_discrim == 0) {
- ret = 0;
- goto out;
- }
-
- for (i = 0; chunk[i].rc_discrim != 0; i++) {
- chunk[i].rc_discrim = ntoh32 (chunk[i].rc_discrim);
- chunk[i].rc_position = ntoh32 (chunk[i].rc_position);
- chunk[i].rc_target.rs_handle
- = ntoh32 (chunk[i].rc_target.rs_handle);
- chunk[i].rc_target.rs_length
- = ntoh32 (chunk[i].rc_target.rs_length);
- chunk[i].rc_target.rs_offset
- = ntoh64 (chunk[i].rc_target.rs_offset);
- }
-
- *readch = &chunk[0];
- ret = 0;
- *ptr = (char *)&chunk[i].rc_discrim;
-out:
- return ret;
-}
-
-
-inline int32_t
-gf_rdma_decode_error_msg (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- size_t bytes_in_post)
-{
- gf_rdma_header_t *header = NULL;
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- int32_t ret = -1;
- struct rpc_msg rpc_msg = {0, };
-
- header = (gf_rdma_header_t *)post->buf;
- header->rm_body.rm_error.rm_type
- = ntoh32 (header->rm_body.rm_error.rm_type);
- if (header->rm_body.rm_error.rm_type == ERR_VERS) {
- header->rm_body.rm_error.rm_version.gf_rdma_vers_low =
- ntoh32 (header->rm_body.rm_error.rm_version.gf_rdma_vers_low);
- header->rm_body.rm_error.rm_version.gf_rdma_vers_high =
- ntoh32 (header->rm_body.rm_error.rm_version.gf_rdma_vers_high);
- }
-
- rpc_msg.rm_xid = header->rm_xid;
- rpc_msg.rm_direction = REPLY;
- rpc_msg.rm_reply.rp_stat = MSG_DENIED;
-
- iobuf = iobuf_get2 (peer->trans->ctx->iobuf_pool, bytes_in_post);
- if (iobuf == NULL) {
- ret = -1;
- goto out;
- }
-
- post->ctx.iobref = iobref = iobref_new ();
- if (iobref == NULL) {
- ret = -1;
- goto out;
- }
-
- iobref_add (iobref, iobuf);
- iobuf_unref (iobuf);
-
- ret = rpc_reply_to_xdr (&rpc_msg, iobuf_ptr (iobuf),
- iobuf_pagesize (iobuf), &post->ctx.vector[0]);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "Failed to create RPC reply");
- goto out;
- }
-
- post->ctx.count = 1;
-
- iobuf = NULL;
- iobref = NULL;
-
-out:
- if (ret == -1) {
- if (iobuf != NULL) {
- iobuf_unref (iobuf);
- }
-
- if (iobref != NULL) {
- iobref_unref (iobref);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-gf_rdma_decode_msg (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- gf_rdma_read_chunk_t **readch, size_t bytes_in_post)
-{
- int32_t ret = -1;
- gf_rdma_header_t *header = NULL;
- gf_rdma_reply_info_t *reply_info = NULL;
- char *ptr = NULL;
- gf_rdma_write_array_t *write_ary = NULL;
- size_t header_len = 0;
-
- header = (gf_rdma_header_t *)post->buf;
-
- ptr = (char *)&header->rm_body.rm_chunks[0];
-
- ret = gf_rdma_get_read_chunklist (&ptr, readch);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "cannot get read chunklist from msg");
- goto out;
- }
-
- /* skip terminator of read-chunklist */
- ptr = ptr + sizeof (uint32_t);
-
- ret = gf_rdma_get_write_chunklist (&ptr, &write_ary);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "cannot get write chunklist from msg");
- goto out;
- }
-
- /* skip terminator of write-chunklist */
- ptr = ptr + sizeof (uint32_t);
-
- if (write_ary != NULL) {
- reply_info = gf_rdma_reply_info_alloc (peer);
- if (reply_info == NULL) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "reply_info_alloc failed");
- ret = -1;
- goto out;
- }
-
- reply_info->type = gf_rdma_writech;
- reply_info->wc_array = write_ary;
- reply_info->rm_xid = header->rm_xid;
- } else {
- ret = gf_rdma_get_write_chunklist (&ptr, &write_ary);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "cannot get reply chunklist from msg");
- goto out;
- }
-
- if (write_ary != NULL) {
- reply_info = gf_rdma_reply_info_alloc (peer);
- if (reply_info == NULL) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "reply_info_alloc_failed");
- ret = -1;
- goto out;
- }
-
- reply_info->type = gf_rdma_replych;
- reply_info->wc_array = write_ary;
- reply_info->rm_xid = header->rm_xid;
- }
- }
-
- /* skip terminator of reply chunk */
- ptr = ptr + sizeof (uint32_t);
- if (header->rm_type != GF_RDMA_NOMSG) {
- header_len = (long)ptr - (long)post->buf;
- post->ctx.vector[0].iov_len = (bytes_in_post - header_len);
-
- post->ctx.hdr_iobuf = iobuf_get2 (peer->trans->ctx->iobuf_pool,
- (bytes_in_post - header_len));
- if (post->ctx.hdr_iobuf == NULL) {
- ret = -1;
- goto out;
- }
-
- post->ctx.vector[0].iov_base = iobuf_ptr (post->ctx.hdr_iobuf);
- memcpy (post->ctx.vector[0].iov_base, ptr,
- post->ctx.vector[0].iov_len);
- post->ctx.count = 1;
- }
-
- post->ctx.reply_info = reply_info;
-out:
- if (ret == -1) {
- if (*readch != NULL) {
- GF_FREE (*readch);
- *readch = NULL;
- }
-
- if (write_ary != NULL) {
- GF_FREE (write_ary);
- }
- }
-
- return ret;
-}
-
-
-/* Assumes only one of either write-chunklist or a reply chunk is present */
-int32_t
-gf_rdma_decode_header (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- gf_rdma_read_chunk_t **readch, size_t bytes_in_post)
-{
- int32_t ret = -1;
- gf_rdma_header_t *header = NULL;
-
- header = (gf_rdma_header_t *)post->buf;
-
- header->rm_xid = ntoh32 (header->rm_xid);
- header->rm_vers = ntoh32 (header->rm_vers);
- header->rm_credit = ntoh32 (header->rm_credit);
- header->rm_type = ntoh32 (header->rm_type);
-
- switch (header->rm_type) {
- case GF_RDMA_MSG:
- case GF_RDMA_NOMSG:
- ret = gf_rdma_decode_msg (peer, post, readch, bytes_in_post);
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "cannot decode msg of type (%d)",
- header->rm_type);
- }
-
- break;
-
- case GF_RDMA_MSGP:
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "rdma msg of msg-type GF_RDMA_MSGP should not have "
- "been received");
- ret = -1;
- break;
-
- case GF_RDMA_DONE:
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "rdma msg of msg-type GF_RDMA_DONE should not have "
- "been received");
- ret = -1;
- break;
-
- case GF_RDMA_ERROR:
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "recieved a msg of type RDMA_ERROR");
- ret = gf_rdma_decode_error_msg (peer, post, bytes_in_post);
- break;
-
- default:
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "unknown rdma msg-type (%d)", header->rm_type);
- }
-
- return ret;
-}
-
-
-int32_t
-__gf_rdma_read (gf_rdma_peer_t *peer, gf_rdma_post_t *post, struct iovec *to,
- gf_rdma_read_chunk_t *readch)
-{
- int32_t ret = -1;
- struct ibv_sge list = {0, };
- struct ibv_send_wr wr = {0, }, *bad_wr = NULL;
-
- ret = __gf_rdma_register_local_mr_for_rdma (peer, to, 1, &post->ctx);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "registering local memory for rdma read failed");
- goto out;
- }
-
- list.addr = (unsigned long) to->iov_base;
- list.length = to->iov_len;
- list.lkey = post->ctx.mr[post->ctx.mr_count - 1]->lkey;
-
- wr.wr_id = (unsigned long) gf_rdma_post_ref (post);
- wr.sg_list = &list;
- wr.num_sge = 1;
- wr.opcode = IBV_WR_RDMA_READ;
- wr.send_flags = IBV_SEND_SIGNALED;
- wr.wr.rdma.remote_addr = readch->rc_target.rs_offset;
- wr.wr.rdma.rkey = readch->rc_target.rs_handle;
-
- ret = ibv_post_send (peer->qp, &wr, &bad_wr);
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "rdma read from client "
- "(%s) failed with ret = %d (%s)",
- peer->trans->peerinfo.identifier,
- ret, (ret > 0) ? strerror (ret) : "");
- ret = -1;
- gf_rdma_post_unref (post);
- }
-out:
- return ret;
-}
-
-
-int32_t
-gf_rdma_do_reads (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- gf_rdma_read_chunk_t *readch)
-{
- int32_t ret = -1, i = 0, count = 0;
- size_t size = 0;
- char *ptr = NULL;
- struct iobuf *iobuf = NULL;
- gf_rdma_private_t *priv = NULL;
-
- priv = peer->trans->private;
-
- for (i = 0; readch[i].rc_discrim != 0; i++) {
- size += readch[i].rc_target.rs_length;
- }
-
- if (i == 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "message type specified as rdma-read but there are no "
- "rdma read-chunks present");
- goto out;
- }
-
- post->ctx.gf_rdma_reads = i;
-
- iobuf = iobuf_get2 (peer->trans->ctx->iobuf_pool, size);
- if (iobuf == NULL) {
- goto out;
- }
-
- if (post->ctx.iobref == NULL) {
- post->ctx.iobref = iobref_new ();
- if (post->ctx.iobref == NULL) {
- iobuf_unref (iobuf);
- goto out;
- }
- }
-
- iobref_add (post->ctx.iobref, iobuf);
- iobuf_unref (iobuf);
-
- ptr = iobuf_ptr (iobuf);
- iobuf = NULL;
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- if (!priv->connected) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "transport not connected to peer (%s), "
- "not doing rdma reads",
- peer->trans->peerinfo.identifier);
- goto unlock;
- }
-
- for (i = 0; readch[i].rc_discrim != 0; i++) {
- count = post->ctx.count++;
- post->ctx.vector[count].iov_base = ptr;
- post->ctx.vector[count].iov_len
- = readch[i].rc_target.rs_length;
-
- ret = __gf_rdma_read (peer, post,
- &post->ctx.vector[count],
- &readch[i]);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "rdma read from peer (%s) failed",
- peer->trans->peerinfo.identifier);
- goto unlock;
- }
-
- ptr += readch[i].rc_target.rs_length;
- }
-
- ret = 0;
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-out:
-
- if (ret == -1) {
- if (iobuf != NULL) {
- iobuf_unref (iobuf);
- }
- }
-
- return ret;
-}
-
-
-int32_t
-gf_rdma_pollin_notify (gf_rdma_peer_t *peer, gf_rdma_post_t *post)
-{
- int32_t ret = -1;
- enum msg_type msg_type = 0;
- struct rpc_req *rpc_req = NULL;
- gf_rdma_request_context_t *request_context = NULL;
- rpc_request_info_t request_info = {0, };
- gf_rdma_private_t *priv = NULL;
- uint32_t *ptr = NULL;
- rpc_transport_pollin_t *pollin = NULL;
-
- if ((peer == NULL) || (post == NULL)) {
- goto out;
- }
-
- if (post->ctx.iobref == NULL) {
- post->ctx.iobref = iobref_new ();
- if (post->ctx.iobref == NULL) {
- goto out;
- }
-
- /* handling the case where both hdr and payload of
- * GF_FOP_READ_CBK were received in a single iobuf
- * because of server sending entire msg as inline without
- * doing rdma writes.
- */
- if (post->ctx.hdr_iobuf)
- iobref_add (post->ctx.iobref, post->ctx.hdr_iobuf);
- }
-
- pollin = rpc_transport_pollin_alloc (peer->trans,
- post->ctx.vector,
- post->ctx.count,
- post->ctx.hdr_iobuf,
- post->ctx.iobref,
- post->ctx.reply_info);
- if (pollin == NULL) {
- goto out;
- }
-
- ptr = (uint32_t *)pollin->vector[0].iov_base;
-
- request_info.xid = ntoh32 (*ptr);
- msg_type = ntoh32 (*(ptr + 1));
-
- if (msg_type == REPLY) {
- ret = rpc_transport_notify (peer->trans,
- RPC_TRANSPORT_MAP_XID_REQUEST,
- &request_info);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "cannot get request information from rpc "
- "layer");
- goto out;
- }
-
- rpc_req = request_info.rpc_req;
- if (rpc_req == NULL) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "rpc request structure not found");
- ret = -1;
- goto out;
- }
-
- request_context = rpc_req->conn_private;
- rpc_req->conn_private = NULL;
-
- priv = peer->trans->private;
- if (request_context != NULL) {
- pthread_mutex_lock (&priv->write_mutex);
- {
- __gf_rdma_request_context_destroy (request_context);
- }
- pthread_mutex_unlock (&priv->write_mutex);
- } else {
- gf_rdma_quota_put (peer);
- }
-
- pollin->is_reply = 1;
- }
-
- ret = rpc_transport_notify (peer->trans, RPC_TRANSPORT_MSG_RECEIVED,
- pollin);
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "transport_notify failed");
- }
-
-out:
- if (pollin != NULL) {
- pollin->private = NULL;
- rpc_transport_pollin_destroy (pollin);
- }
-
- return ret;
-}
-
-
-int32_t
-gf_rdma_recv_reply (gf_rdma_peer_t *peer, gf_rdma_post_t *post)
-{
- int32_t ret = -1;
- gf_rdma_header_t *header = NULL;
- gf_rdma_reply_info_t *reply_info = NULL;
- gf_rdma_write_array_t *wc_array = NULL;
- int i = 0;
- uint32_t *ptr = NULL;
- gf_rdma_request_context_t *ctx = NULL;
- rpc_request_info_t request_info = {0, };
- struct rpc_req *rpc_req = NULL;
-
- header = (gf_rdma_header_t *)post->buf;
- reply_info = post->ctx.reply_info;
-
- /* no write chunklist, just notify upper layers */
- if (reply_info == NULL) {
- ret = 0;
- goto out;
- }
-
- wc_array = reply_info->wc_array;
-
- if (header->rm_type == GF_RDMA_NOMSG) {
- post->ctx.vector[0].iov_base
- = (void *)(long)wc_array->wc_array[0].wc_target.rs_offset;
- post->ctx.vector[0].iov_len
- = wc_array->wc_array[0].wc_target.rs_length;
-
- post->ctx.count = 1;
- } else {
- for (i = 0; i < wc_array->wc_nchunks; i++) {
- post->ctx.vector[i + 1].iov_base
- = (void *)(long)wc_array->wc_array[i].wc_target.rs_offset;
- post->ctx.vector[i + 1].iov_len
- = wc_array->wc_array[i].wc_target.rs_length;
- }
-
- post->ctx.count += wc_array->wc_nchunks;
- }
-
- ptr = (uint32_t *)post->ctx.vector[0].iov_base;
- request_info.xid = ntoh32 (*ptr);
-
- ret = rpc_transport_notify (peer->trans,
- RPC_TRANSPORT_MAP_XID_REQUEST,
- &request_info);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "cannot get request information (peer:%s) from rpc "
- "layer", peer->trans->peerinfo.identifier);
- goto out;
- }
-
- rpc_req = request_info.rpc_req;
- if (rpc_req == NULL) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "rpc request structure not found");
- ret = -1;
- goto out;
- }
-
- ctx = rpc_req->conn_private;
- if ((post->ctx.iobref == NULL) && ctx->rsp_iobref) {
- post->ctx.iobref = iobref_ref (ctx->rsp_iobref);
- }
-
- ret = 0;
-
- gf_rdma_reply_info_destroy (reply_info);
-
-out:
- if (ret == 0) {
- ret = gf_rdma_pollin_notify (peer, post);
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "pollin notify failed");
- }
- }
-
- return ret;
-}
-
-
-inline int32_t
-gf_rdma_recv_request (gf_rdma_peer_t *peer, gf_rdma_post_t *post,
- gf_rdma_read_chunk_t *readch)
-{
- int32_t ret = -1;
-
- if (readch != NULL) {
- ret = gf_rdma_do_reads (peer, post, readch);
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "rdma read from peer (%s) failed",
- peer->trans->peerinfo.identifier);
- }
- } else {
- ret = gf_rdma_pollin_notify (peer, post);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "pollin notification failed");
- }
- }
-
- return ret;
-}
-
-void
-gf_rdma_process_recv (gf_rdma_peer_t *peer, struct ibv_wc *wc)
-{
- gf_rdma_post_t *post = NULL;
- gf_rdma_read_chunk_t *readch = NULL;
- int ret = -1;
- uint32_t *ptr = NULL;
- enum msg_type msg_type = 0;
- gf_rdma_header_t *header = NULL;
-
- post = (gf_rdma_post_t *) (long) wc->wr_id;
- if (post == NULL) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "no post found in successful work completion element");
- goto out;
- }
-
- ret = gf_rdma_decode_header (peer, post, &readch, wc->byte_len);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "decoding of header failed");
- goto out;
- }
-
- header = (gf_rdma_header_t *)post->buf;
-
- switch (header->rm_type) {
- case GF_RDMA_MSG:
- ptr = (uint32_t *)post->ctx.vector[0].iov_base;
- msg_type = ntoh32 (*(ptr + 1));
- break;
-
- case GF_RDMA_NOMSG:
- if (readch != NULL) {
- msg_type = CALL;
- } else {
- msg_type = REPLY;
- }
- break;
-
- case GF_RDMA_ERROR:
- if (header->rm_body.rm_error.rm_type == ERR_CHUNK) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "peer (%s), couldn't encode or decode the msg "
- "properly or write chunks were not provided "
- "for replies that were bigger than "
- "RDMA_INLINE_THRESHOLD (%d)",
- peer->trans->peerinfo.identifier,
- GLUSTERFS_RDMA_INLINE_THRESHOLD);
- ret = gf_rdma_pollin_notify (peer, post);
- if (ret == -1) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "pollin notification failed");
- }
- goto out;
- } else {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "an error has happened while transmission of "
- "msg, disconnecting the transport");
- ret = -1;
- goto out;
- }
-
- default:
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "invalid rdma msg-type (%d)", header->rm_type);
- goto out;
- }
-
- if (msg_type == CALL) {
- ret = gf_rdma_recv_request (peer, post, readch);
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "receiving a request from peer (%s) failed",
- peer->trans->peerinfo.identifier);
- }
- } else {
- ret = gf_rdma_recv_reply (peer, post);
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "receiving a reply from peer (%s) failed",
- peer->trans->peerinfo.identifier);
- }
- }
-
-out:
- if (ret == -1) {
- rpc_transport_disconnect (peer->trans);
- }
-
- return;
-}
-
-
-static void *
-gf_rdma_recv_completion_proc (void *data)
-{
- struct ibv_comp_channel *chan = NULL;
- gf_rdma_device_t *device = NULL;;
- gf_rdma_post_t *post = NULL;
- gf_rdma_peer_t *peer = NULL;
- struct ibv_cq *event_cq = NULL;
- struct ibv_wc wc = {0, };
- void *event_ctx = NULL;
- int32_t ret = 0;
-
- chan = data;
-
- while (1) {
- ret = ibv_get_cq_event (chan, &event_cq, &event_ctx);
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "ibv_get_cq_event failed, terminating recv "
- "thread %d (%d)", ret, errno);
- continue;
- }
-
- device = event_ctx;
-
- ret = ibv_req_notify_cq (event_cq, 0);
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "ibv_req_notify_cq on %s failed, terminating "
- "recv thread: %d (%d)",
- device->device_name, ret, errno);
- continue;
- }
-
- device = (gf_rdma_device_t *) event_ctx;
-
- while ((ret = ibv_poll_cq (event_cq, 1, &wc)) > 0) {
- post = (gf_rdma_post_t *) (long) wc.wr_id;
-
- pthread_mutex_lock (&device->qpreg.lock);
- {
- peer = __gf_rdma_lookup_peer (device,
- wc.qp_num);
-
- /*
- * keep a refcount on transport so that it
- * does not get freed because of some error
- * indicated by wc.status till we are done
- * with usage of peer and thereby that of trans.
- */
- if (peer != NULL) {
- rpc_transport_ref (peer->trans);
- }
- }
- pthread_mutex_unlock (&device->qpreg.lock);
-
- if (wc.status != IBV_WC_SUCCESS) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "recv work request on `%s' returned "
- "error (%d)", device->device_name,
- wc.status);
- if (peer) {
- rpc_transport_unref (peer->trans);
- rpc_transport_disconnect (peer->trans);
- }
-
- if (post) {
- gf_rdma_post_unref (post);
- }
- continue;
- }
-
- if (peer) {
- gf_rdma_process_recv (peer, &wc);
- rpc_transport_unref (peer->trans);
- } else {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_DEBUG,
- "could not lookup peer for qp_num: %d",
- wc.qp_num);
- }
-
- gf_rdma_post_unref (post);
- }
-
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_ERROR,
- "ibv_poll_cq on `%s' returned error "
- "(ret = %d, errno = %d)",
- device->device_name, ret, errno);
- continue;
- }
- ibv_ack_cq_events (event_cq, 1);
- }
-
- return NULL;
-}
-
-
-void
-gf_rdma_handle_failed_send_completion (gf_rdma_peer_t *peer, struct ibv_wc *wc)
-{
- gf_rdma_post_t *post = NULL;
- gf_rdma_device_t *device = NULL;
- gf_rdma_private_t *priv = NULL;
-
- if (peer != NULL) {
- priv = peer->trans->private;
- if (priv != NULL) {
- device = priv->device;
- }
- }
-
-
- post = (gf_rdma_post_t *) (long) wc->wr_id;
-
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "send work request on `%s' returned error "
- "wc.status = %d, wc.vendor_err = %d, post->buf = %p, "
- "wc.byte_len = %d, post->reused = %d",
- (device != NULL) ? device->device_name : NULL, wc->status,
- wc->vendor_err, post->buf, wc->byte_len, post->reused);
-
- if (wc->status == IBV_WC_RETRY_EXC_ERR) {
- gf_log ("rdma", GF_LOG_ERROR, "connection between client and"
- " server not working. check by running "
- "'ibv_srq_pingpong'. also make sure subnet manager"
- " is running (eg: 'opensm'), or check if rdma port is "
- "valid (or active) by running 'ibv_devinfo'. contact "
- "Gluster Support Team if the problem persists.");
- }
-
- if (peer) {
- rpc_transport_disconnect (peer->trans);
- }
-
- return;
-}
-
-
-void
-gf_rdma_handle_successful_send_completion (gf_rdma_peer_t *peer,
- struct ibv_wc *wc)
-{
- gf_rdma_post_t *post = NULL;
- int reads = 0, ret = 0;
- gf_rdma_header_t *header = NULL;
-
- if (wc->opcode != IBV_WC_RDMA_READ) {
- goto out;
- }
-
- post = (gf_rdma_post_t *)(long) wc->wr_id;
-
- pthread_mutex_lock (&post->lock);
- {
- reads = --post->ctx.gf_rdma_reads;
- }
- pthread_mutex_unlock (&post->lock);
-
- if (reads != 0) {
- /* if it is not the last rdma read, we've got nothing to do */
- goto out;
- }
-
- header = (gf_rdma_header_t *)post->buf;
-
- if (header->rm_type == GF_RDMA_NOMSG) {
- post->ctx.count = 1;
- post->ctx.vector[0].iov_len += post->ctx.vector[1].iov_len;
- }
-
- ret = gf_rdma_pollin_notify (peer, post);
- if ((ret == -1) && (peer != NULL)) {
- rpc_transport_disconnect (peer->trans);
- }
-
-out:
- return;
-}
-
-
-static void *
-gf_rdma_send_completion_proc (void *data)
-{
- struct ibv_comp_channel *chan = NULL;
- gf_rdma_post_t *post = NULL;
- gf_rdma_peer_t *peer = NULL;
- struct ibv_cq *event_cq = NULL;
- void *event_ctx = NULL;
- gf_rdma_device_t *device = NULL;
- struct ibv_wc wc = {0, };
- char is_request = 0;
- int32_t ret = 0, quota_ret = 0;
-
- chan = data;
- while (1) {
- ret = ibv_get_cq_event (chan, &event_cq, &event_ctx);
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "ibv_get_cq_event on failed, terminating "
- "send thread: %d (%d)", ret, errno);
- continue;
- }
-
- device = event_ctx;
-
- ret = ibv_req_notify_cq (event_cq, 0);
- if (ret) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "ibv_req_notify_cq on %s failed, terminating "
- "send thread: %d (%d)",
- device->device_name, ret, errno);
- continue;
- }
-
- while ((ret = ibv_poll_cq (event_cq, 1, &wc)) > 0) {
- post = (gf_rdma_post_t *) (long) wc.wr_id;
-
- pthread_mutex_lock (&device->qpreg.lock);
- {
- peer = __gf_rdma_lookup_peer (device, wc.qp_num);
-
- /*
- * keep a refcount on transport so that it
- * does not get freed because of some error
- * indicated by wc.status, till we are done
- * with usage of peer and thereby that of trans.
- */
- if (peer != NULL) {
- rpc_transport_ref (peer->trans);
- }
- }
- pthread_mutex_unlock (&device->qpreg.lock);
-
- if (wc.status != IBV_WC_SUCCESS) {
- gf_rdma_handle_failed_send_completion (peer, &wc);
- } else {
- gf_rdma_handle_successful_send_completion (peer,
- &wc);
- }
-
- if (post) {
- is_request = post->ctx.is_request;
-
- ret = gf_rdma_post_unref (post);
- if ((ret == 0)
- && (wc.status == IBV_WC_SUCCESS)
- && !is_request
- && (post->type == GF_RDMA_SEND_POST)
- && (peer != NULL)) {
- /* An GF_RDMA_RECV_POST can end up in
- * gf_rdma_send_completion_proc for
- * rdma-reads, and we do not take
- * quota for getting an GF_RDMA_RECV_POST.
- */
-
- /*
- * if it is request, quota is returned
- * after reply has come.
- */
- quota_ret = gf_rdma_quota_put (peer);
- if (quota_ret < 0) {
- gf_log ("rdma", GF_LOG_DEBUG,
- "failed to send "
- "message");
- }
- }
- }
-
- if (peer) {
- rpc_transport_unref (peer->trans);
- } else {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_DEBUG,
- "could not lookup peer for qp_num: %d",
- wc.qp_num);
- }
- }
-
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "ibv_poll_cq on `%s' returned error (ret = %d,"
- " errno = %d)",
- device->device_name, ret, errno);
- continue;
- }
-
- ibv_ack_cq_events (event_cq, 1);
- }
-
- return NULL;
-}
-
-
-static void
-gf_rdma_options_init (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- int32_t mtu = 0;
- data_t *temp = NULL;
-
- /* TODO: validate arguments from options below */
-
- priv = this->private;
- options = &priv->options;
- options->send_size = GLUSTERFS_RDMA_INLINE_THRESHOLD;/*this->ctx->page_size * 4; 512 KB*/
- options->recv_size = GLUSTERFS_RDMA_INLINE_THRESHOLD;/*this->ctx->page_size * 4; 512 KB*/
- options->send_count = 4096;
- options->recv_count = 4096;
- options->attr_timeout = GF_RDMA_TIMEOUT;
- options->attr_retry_cnt = GF_RDMA_RETRY_CNT;
- options->attr_rnr_retry = GF_RDMA_RNR_RETRY;
-
- temp = dict_get (this->options,
- "transport.rdma.work-request-send-count");
- if (temp)
- options->send_count = data_to_int32 (temp);
-
- temp = dict_get (this->options,
- "transport.rdma.work-request-recv-count");
- if (temp)
- options->recv_count = data_to_int32 (temp);
-
- temp = dict_get (this->options, "transport.rdma.attr-timeout");
-
- if (temp)
- options->attr_timeout = data_to_uint8 (temp);
-
- temp = dict_get (this->options, "transport.rdma.attr-retry-cnt");
-
- if (temp)
- options->attr_retry_cnt = data_to_uint8 (temp);
-
- temp = dict_get (this->options, "transport.rdma.attr-rnr-retry");
-
- if (temp)
- options->attr_rnr_retry = data_to_uint8 (temp);
-
- options->port = 1;
- temp = dict_get (this->options,
- "transport.rdma.port");
- if (temp)
- options->port = data_to_uint64 (temp);
-
- options->mtu = mtu = IBV_MTU_2048;
- temp = dict_get (this->options,
- "transport.rdma.mtu");
- if (temp)
- mtu = data_to_int32 (temp);
- switch (mtu) {
- case 256: options->mtu = IBV_MTU_256;
- break;
- case 512: options->mtu = IBV_MTU_512;
- break;
- case 1024: options->mtu = IBV_MTU_1024;
- break;
- case 2048: options->mtu = IBV_MTU_2048;
- break;
- case 4096: options->mtu = IBV_MTU_4096;
- break;
- default:
- if (temp)
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "%s: unrecognized MTU value '%s', defaulting "
- "to '2048'", this->name,
- data_to_str (temp));
- else
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_TRACE,
- "%s: defaulting MTU to '2048'",
- this->name);
- options->mtu = IBV_MTU_2048;
- break;
- }
-
- temp = dict_get (this->options,
- "transport.rdma.device-name");
- if (temp)
- options->device_name = gf_strdup (temp->data);
-
- return;
-}
-
-static void
-gf_rdma_queue_init (gf_rdma_queue_t *queue)
-{
- pthread_mutex_init (&queue->lock, NULL);
-
- queue->active_posts.next = &queue->active_posts;
- queue->active_posts.prev = &queue->active_posts;
- queue->passive_posts.next = &queue->passive_posts;
- queue->passive_posts.prev = &queue->passive_posts;
-}
-
-
-static gf_rdma_device_t *
-gf_rdma_get_device (rpc_transport_t *this, struct ibv_context *ibctx)
-{
- glusterfs_ctx_t *ctx = NULL;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- char *device_name = NULL;
- uint32_t port = 0;
- uint8_t active_port = 0;
- int32_t ret = 0;
- int32_t i = 0;
- gf_rdma_device_t *trav = NULL;
-
- priv = this->private;
- options = &priv->options;
- device_name = priv->options.device_name;
- ctx = this->ctx;
- trav = ctx->ib;
- port = priv->options.port;
-
- while (trav) {
- if ((!strcmp (trav->device_name, device_name)) &&
- (trav->port == port))
- break;
- trav = trav->next;
- }
-
- if (!trav) {
-
- trav = GF_CALLOC (1, sizeof (*trav),
- gf_common_mt_rdma_device_t);
- if (trav == NULL) {
- return NULL;
- }
-
- priv->device = trav;
-
- trav->context = ibctx;
-
- ret = ib_get_active_port (trav->context);
-
- if (ret < 0) {
- if (!port) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "Failed to find any active ports and "
- "none specified in volume file,"
- " exiting");
- GF_FREE (trav);
- return NULL;
- }
- }
-
- trav->request_ctx_pool = mem_pool_new (gf_rdma_request_context_t,
- GF_RDMA_POOL_SIZE);
- if (trav->request_ctx_pool == NULL) {
- return NULL;
- }
-
- trav->ioq_pool = mem_pool_new (gf_rdma_ioq_t, GF_RDMA_POOL_SIZE);
- if (trav->ioq_pool == NULL) {
- mem_pool_destroy (trav->request_ctx_pool);
- return NULL;
- }
-
- trav->reply_info_pool = mem_pool_new (gf_rdma_reply_info_t,
- GF_RDMA_POOL_SIZE);
- if (trav->reply_info_pool == NULL) {
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->ioq_pool);
- return NULL;
- }
-
-
- active_port = ret;
-
- if (port) {
- ret = ib_check_active_port (trav->context, port);
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "On device %s: provided port:%u is "
- "found to be offline, continuing to "
- "use the same port", device_name, port);
- }
- } else {
- priv->options.port = active_port;
- port = active_port;
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_TRACE,
- "Port unspecified in volume file using active "
- "port: %u", port);
- }
-
- trav->device_name = gf_strdup (device_name);
- trav->port = port;
-
- trav->next = ctx->ib;
- ctx->ib = trav;
-
- trav->send_chan = ibv_create_comp_channel (trav->context);
- if (!trav->send_chan) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: could not create send completion channel",
- device_name);
- mem_pool_destroy (trav->ioq_pool);
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->reply_info_pool);
- GF_FREE ((char *)trav->device_name);
- GF_FREE (trav);
- return NULL;
- }
-
- trav->recv_chan = ibv_create_comp_channel (trav->context);
- if (!trav->recv_chan) {
- mem_pool_destroy (trav->ioq_pool);
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->reply_info_pool);
- ibv_destroy_comp_channel (trav->send_chan);
- GF_FREE ((char *)trav->device_name);
- GF_FREE (trav);
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "could not create recv completion channel");
- /* TODO: cleanup current mess */
- return NULL;
- }
-
- if (gf_rdma_create_cq (this) < 0) {
- mem_pool_destroy (trav->ioq_pool);
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->reply_info_pool);
- ibv_destroy_comp_channel (trav->recv_chan);
- ibv_destroy_comp_channel (trav->send_chan);
- GF_FREE ((char *)trav->device_name);
- GF_FREE (trav);
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: could not create CQ",
- this->name);
- return NULL;
- }
-
- /* protection domain */
- trav->pd = ibv_alloc_pd (trav->context);
-
- if (!trav->pd) {
- mem_pool_destroy (trav->ioq_pool);
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->reply_info_pool);
- gf_rdma_destroy_cq (this);
- ibv_destroy_comp_channel (trav->recv_chan);
- ibv_destroy_comp_channel (trav->send_chan);
- GF_FREE ((char *)trav->device_name);
- GF_FREE (trav);
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: could not allocate protection domain",
- this->name);
- return NULL;
- }
-
- struct ibv_srq_init_attr attr = {
- .attr = {
- .max_wr = options->recv_count,
- .max_sge = 1
- }
- };
- trav->srq = ibv_create_srq (trav->pd, &attr);
-
- if (!trav->srq) {
- mem_pool_destroy (trav->ioq_pool);
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->reply_info_pool);
- ibv_dealloc_pd (trav->pd);
- gf_rdma_destroy_cq (this);
- ibv_destroy_comp_channel (trav->recv_chan);
- ibv_destroy_comp_channel (trav->send_chan);
- GF_FREE ((char *)trav->device_name);
- GF_FREE (trav);
-
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: could not create SRQ",
- this->name);
- return NULL;
- }
-
- /* queue init */
- gf_rdma_queue_init (&trav->sendq);
- gf_rdma_queue_init (&trav->recvq);
-
- if (gf_rdma_create_posts (this) < 0) {
- mem_pool_destroy (trav->ioq_pool);
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->reply_info_pool);
- ibv_dealloc_pd (trav->pd);
- gf_rdma_destroy_cq (this);
- ibv_destroy_comp_channel (trav->recv_chan);
- ibv_destroy_comp_channel (trav->send_chan);
- GF_FREE ((char *)trav->device_name);
- GF_FREE (trav);
-
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: could not allocate posts",
- this->name);
- return NULL;
- }
-
- /* completion threads */
- ret = pthread_create (&trav->send_thread,
- NULL,
- gf_rdma_send_completion_proc,
- trav->send_chan);
- if (ret) {
- gf_rdma_destroy_posts (this);
- mem_pool_destroy (trav->ioq_pool);
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->reply_info_pool);
- ibv_dealloc_pd (trav->pd);
- gf_rdma_destroy_cq (this);
- ibv_destroy_comp_channel (trav->recv_chan);
- ibv_destroy_comp_channel (trav->send_chan);
- GF_FREE ((char *)trav->device_name);
- GF_FREE (trav);
-
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "could not create send completion thread");
- return NULL;
- }
-
- ret = pthread_create (&trav->recv_thread,
- NULL,
- gf_rdma_recv_completion_proc,
- trav->recv_chan);
- if (ret) {
- gf_rdma_destroy_posts (this);
- mem_pool_destroy (trav->ioq_pool);
- mem_pool_destroy (trav->request_ctx_pool);
- mem_pool_destroy (trav->reply_info_pool);
- ibv_dealloc_pd (trav->pd);
- gf_rdma_destroy_cq (this);
- ibv_destroy_comp_channel (trav->recv_chan);
- ibv_destroy_comp_channel (trav->send_chan);
- GF_FREE ((char *)trav->device_name);
- GF_FREE (trav);
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "could not create recv completion thread");
- return NULL;
- }
-
- /* qpreg */
- pthread_mutex_init (&trav->qpreg.lock, NULL);
- for (i=0; i<42; i++) {
- trav->qpreg.ents[i].next = &trav->qpreg.ents[i];
- trav->qpreg.ents[i].prev = &trav->qpreg.ents[i];
- }
- }
- return trav;
-}
-
-static int32_t
-gf_rdma_init (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- struct ibv_device **dev_list;
- struct ibv_context *ib_ctx = NULL;
- int32_t ret = 0;
-
- priv = this->private;
- options = &priv->options;
-
- ibv_fork_init ();
- gf_rdma_options_init (this);
-
- {
- dev_list = ibv_get_device_list (NULL);
-
- if (!dev_list) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_CRITICAL,
- "Failed to get IB devices");
- ret = -1;
- goto cleanup;
- }
-
- if (!*dev_list) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_CRITICAL,
- "No IB devices found");
- ret = -1;
- goto cleanup;
- }
-
- if (!options->device_name) {
- if (*dev_list) {
- options->device_name =
- gf_strdup (ibv_get_device_name (*dev_list));
- } else {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_CRITICAL,
- "IB device list is empty. Check for "
- "'ib_uverbs' module");
- return -1;
- goto cleanup;
- }
- }
-
- while (*dev_list) {
- if (!strcmp (ibv_get_device_name (*dev_list),
- options->device_name)) {
- ib_ctx = ibv_open_device (*dev_list);
-
- if (!ib_ctx) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_ERROR,
- "Failed to get infiniband"
- "device context");
- ret = -1;
- goto cleanup;
- }
- break;
- }
- ++dev_list;
- }
-
- priv->device = gf_rdma_get_device (this, ib_ctx);
-
- if (!priv->device) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "could not create rdma device for %s",
- options->device_name);
- ret = -1;
- goto cleanup;
- }
- }
-
- priv->peer.trans = this;
- INIT_LIST_HEAD (&priv->peer.ioq);
-
- pthread_mutex_init (&priv->read_mutex, NULL);
- pthread_mutex_init (&priv->write_mutex, NULL);
- pthread_mutex_init (&priv->recv_mutex, NULL);
- pthread_cond_init (&priv->recv_cond, NULL);
-
-cleanup:
- if (-1 == ret) {
- if (ib_ctx)
- ibv_close_device (ib_ctx);
- }
-
- if (dev_list)
- ibv_free_device_list (dev_list);
-
- return ret;
-}
-
-
-static int32_t
-gf_rdma_disconnect (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- int32_t ret = 0;
-
- priv = this->private;
- gf_log_callingfn (this->name, GF_LOG_WARNING,
- "disconnect called (peer:%s)",
- this->peerinfo.identifier);
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- ret = __gf_rdma_disconnect (this);
- }
- pthread_mutex_unlock (&priv->write_mutex);
-
- return ret;
-}
-
-
-static int32_t
-__tcp_connect_finish (int fd)
-{
- int ret = -1;
- int optval = 0;
- socklen_t optlen = sizeof (int);
-
- ret = getsockopt (fd, SOL_SOCKET, SO_ERROR,
- (void *)&optval, &optlen);
-
- if (ret == 0 && optval)
- {
- errno = optval;
- ret = -1;
- }
-
- return ret;
-}
-
-static inline void
-gf_rdma_fill_handshake_data (char *buf, struct gf_rdma_nbio *nbio,
- gf_rdma_private_t *priv)
-{
- sprintf (buf,
- "QP1:RECV_BLKSIZE=%08x:SEND_BLKSIZE=%08x\n"
- "QP1:LID=%04x:QPN=%06x:PSN=%06x\n",
- priv->peer.recv_size,
- priv->peer.send_size,
- priv->peer.local_lid,
- priv->peer.local_qpn,
- priv->peer.local_psn);
-
- nbio->vector.iov_base = buf;
- nbio->vector.iov_len = strlen (buf) + 1;
- nbio->count = 1;
- return;
-}
-
-static inline void
-gf_rdma_fill_handshake_ack (char *buf, struct gf_rdma_nbio *nbio)
-{
- sprintf (buf, "DONE\n");
- nbio->vector.iov_base = buf;
- nbio->vector.iov_len = strlen (buf) + 1;
- nbio->count = 1;
- return;
-}
-
-static int
-gf_rdma_handshake_pollin (rpc_transport_t *this)
-{
- int ret = 0;
- gf_rdma_private_t *priv = NULL;
- char *buf = NULL;
- int32_t recv_buf_size = 0, send_buf_size;
- socklen_t sock_len = 0;
-
- priv = this->private;
- buf = priv->handshake.incoming.buf;
-
- if (priv->handshake.incoming.state == GF_RDMA_HANDSHAKE_COMPLETE) {
- return -1;
- }
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- while (priv->handshake.incoming.state != GF_RDMA_HANDSHAKE_COMPLETE)
- {
- switch (priv->handshake.incoming.state)
- {
- case GF_RDMA_HANDSHAKE_START:
- buf = priv->handshake.incoming.buf = GF_CALLOC (1, 256, gf_common_mt_char);
- gf_rdma_fill_handshake_data (buf, &priv->handshake.incoming, priv);
- buf[0] = 0;
- priv->handshake.incoming.state = GF_RDMA_HANDSHAKE_RECEIVING_DATA;
- break;
-
- case GF_RDMA_HANDSHAKE_RECEIVING_DATA:
- ret = __tcp_readv (this,
- &priv->handshake.incoming.vector,
- priv->handshake.incoming.count,
- &priv->handshake.incoming.pending_vector,
- &priv->handshake.incoming.pending_count);
- if (ret == -1) {
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "partial header read on NB socket. continue later");
- goto unlock;
- }
-
- if (!ret) {
- priv->handshake.incoming.state = GF_RDMA_HANDSHAKE_RECEIVED_DATA;
- }
- break;
-
- case GF_RDMA_HANDSHAKE_RECEIVED_DATA:
- ret = sscanf (buf,
- "QP1:RECV_BLKSIZE=%08x:SEND_BLKSIZE=%08x\n"
- "QP1:LID=%04x:QPN=%06x:PSN=%06x\n",
- &recv_buf_size,
- &send_buf_size,
- &priv->peer.remote_lid,
- &priv->peer.remote_qpn,
- &priv->peer.remote_psn);
-
- if ((ret != 5) && (strncmp (buf, "QP1:", 4))) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_CRITICAL,
- "%s: remote-host(%s)'s "
- "transport type is different",
- this->name,
- this->peerinfo.identifier);
- ret = -1;
- goto unlock;
- }
-
- if (recv_buf_size < priv->peer.recv_size)
- priv->peer.recv_size = recv_buf_size;
- if (send_buf_size < priv->peer.send_size)
- priv->peer.send_size = send_buf_size;
-
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_TRACE,
- "%s: transacted recv_size=%d "
- "send_size=%d",
- this->name, priv->peer.recv_size,
- priv->peer.send_size);
-
- priv->peer.quota = priv->peer.send_count;
-
- if (gf_rdma_connect_qp (this)) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_ERROR,
- "%s: failed to connect with "
- "remote QP", this->name);
- ret = -1;
- goto unlock;
- }
- gf_rdma_fill_handshake_ack (buf, &priv->handshake.incoming);
- buf[0] = 0;
- priv->handshake.incoming.state = GF_RDMA_HANDSHAKE_RECEIVING_ACK;
- break;
-
- case GF_RDMA_HANDSHAKE_RECEIVING_ACK:
- ret = __tcp_readv (this,
- &priv->handshake.incoming.vector,
- priv->handshake.incoming.count,
- &priv->handshake.incoming.pending_vector,
- &priv->handshake.incoming.pending_count);
- if (ret == -1) {
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "partial header read on NB "
- "socket. continue later");
- goto unlock;
- }
-
- if (!ret) {
- priv->handshake.incoming.state = GF_RDMA_HANDSHAKE_RECEIVED_ACK;
- }
- break;
-
- case GF_RDMA_HANDSHAKE_RECEIVED_ACK:
- if (strncmp (buf, "DONE", 4)) {
- gf_log (GF_RDMA_LOG_NAME,
- GF_LOG_DEBUG,
- "%s: handshake-3 did not "
- "return 'DONE' (%s)",
- this->name, buf);
- ret = -1;
- goto unlock;
- }
- ret = 0;
- priv->connected = 1;
- sock_len = sizeof (struct sockaddr_storage);
- getpeername (priv->sock,
- (struct sockaddr *) &this->peerinfo.sockaddr,
- &sock_len);
-
- GF_FREE (priv->handshake.incoming.buf);
- priv->handshake.incoming.buf = NULL;
- priv->handshake.incoming.state = GF_RDMA_HANDSHAKE_COMPLETE;
- }
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-
- if (ret == -1) {
- rpc_transport_disconnect (this);
- } else {
- ret = 0;
- }
-
-
- if (!ret && priv->connected) {
- if (priv->is_server) {
- ret = rpc_transport_notify (priv->listener,
- RPC_TRANSPORT_ACCEPT,
- this);
- } else {
- ret = rpc_transport_notify (this, RPC_TRANSPORT_CONNECT,
- this);
- }
- }
-
- return ret;
-}
-
-static int
-gf_rdma_handshake_pollout (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- char *buf = NULL;
- int32_t ret = 0;
-
- priv = this->private;
- buf = priv->handshake.outgoing.buf;
-
- if (priv->handshake.outgoing.state == GF_RDMA_HANDSHAKE_COMPLETE) {
- return 0;
- }
-
- pthread_mutex_unlock (&priv->write_mutex);
- {
- while (priv->handshake.outgoing.state
- != GF_RDMA_HANDSHAKE_COMPLETE)
- {
- switch (priv->handshake.outgoing.state)
- {
- case GF_RDMA_HANDSHAKE_START:
- buf = priv->handshake.outgoing.buf
- = GF_CALLOC (1, 256, gf_common_mt_char);
- gf_rdma_fill_handshake_data (buf,
- &priv->handshake.outgoing, priv);
- priv->handshake.outgoing.state
- = GF_RDMA_HANDSHAKE_SENDING_DATA;
- break;
-
- case GF_RDMA_HANDSHAKE_SENDING_DATA:
- ret = __tcp_writev (this,
- &priv->handshake.outgoing.vector,
- priv->handshake.outgoing.count,
- &priv->handshake.outgoing.pending_vector,
- &priv->handshake.outgoing.pending_count);
- if (ret == -1) {
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "partial header read on NB "
- "socket. continue later");
- goto unlock;
- }
-
- if (!ret) {
- priv->handshake.outgoing.state
- = GF_RDMA_HANDSHAKE_SENT_DATA;
- }
- break;
-
- case GF_RDMA_HANDSHAKE_SENT_DATA:
- gf_rdma_fill_handshake_ack (buf,
- &priv->handshake.outgoing);
- priv->handshake.outgoing.state
- = GF_RDMA_HANDSHAKE_SENDING_ACK;
- break;
-
- case GF_RDMA_HANDSHAKE_SENDING_ACK:
- ret = __tcp_writev (this,
- &priv->handshake.outgoing.vector,
- priv->handshake.outgoing.count,
- &priv->handshake.outgoing.pending_vector,
- &priv->handshake.outgoing.pending_count);
-
- if (ret == -1) {
- goto unlock;
- }
-
- if (ret > 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "partial header read on NB "
- "socket. continue later");
- goto unlock;
- }
-
- if (!ret) {
- GF_FREE (priv->handshake.outgoing.buf);
- priv->handshake.outgoing.buf = NULL;
- priv->handshake.outgoing.state
- = GF_RDMA_HANDSHAKE_COMPLETE;
- }
- break;
- }
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-
- if (ret == -1) {
- rpc_transport_disconnect (this);
- } else {
- ret = 0;
- }
-
- return ret;
-}
-
-static int
-gf_rdma_handshake_pollerr (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = this->private;
- char need_unref = 0, connected = 0;
-
- gf_log_callingfn (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "%s: peer (%s) disconnected, cleaning up",
- this->name, this->peerinfo.identifier);
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- __gf_rdma_teardown (this);
-
- connected = priv->connected;
- if (priv->sock != -1) {
- event_unregister (this->ctx->event_pool,
- priv->sock, priv->idx);
- need_unref = 1;
-
- if (close (priv->sock) != 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "close () - error: %s",
- strerror (errno));
- }
- priv->tcp_connected = priv->connected = 0;
- priv->sock = -1;
- }
-
- if (priv->handshake.incoming.buf) {
- GF_FREE (priv->handshake.incoming.buf);
- priv->handshake.incoming.buf = NULL;
- }
-
- priv->handshake.incoming.state = GF_RDMA_HANDSHAKE_START;
-
- if (priv->handshake.outgoing.buf) {
- GF_FREE (priv->handshake.outgoing.buf);
- priv->handshake.outgoing.buf = NULL;
- }
-
- priv->handshake.outgoing.state = GF_RDMA_HANDSHAKE_START;
- }
- pthread_mutex_unlock (&priv->write_mutex);
-
- if (connected) {
- rpc_transport_notify (this, RPC_TRANSPORT_DISCONNECT, this);
- }
-
- if (need_unref)
- rpc_transport_unref (this);
-
- return 0;
-}
-
-
-static int
-tcp_connect_finish (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
- int error = 0, ret = 0;
-
- priv = this->private;
- pthread_mutex_lock (&priv->write_mutex);
- {
- ret = __tcp_connect_finish (priv->sock);
-
- if (!ret) {
- this->myinfo.sockaddr_len =
- sizeof (this->myinfo.sockaddr);
- ret = getsockname (priv->sock,
- (struct sockaddr *)&this->myinfo.sockaddr,
- &this->myinfo.sockaddr_len);
- if (ret == -1)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "getsockname on new client-socket %d "
- "failed (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- error = 1;
- goto unlock;
- }
-
- gf_rdma_get_transport_identifiers (this);
- priv->tcp_connected = 1;
- }
-
- if (ret == -1 && errno != EINPROGRESS) {
- gf_log (this->name, GF_LOG_ERROR,
- "tcp connect to %s failed (%s)",
- this->peerinfo.identifier, strerror (errno));
- error = 1;
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-
- if (error) {
- rpc_transport_disconnect (this);
- }
-
- return ret;
-}
-
-static int
-gf_rdma_event_handler (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- rpc_transport_t *this = NULL;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_options_t *options = NULL;
- int ret = 0;
-
- this = data;
- priv = this->private;
- if (!priv->tcp_connected) {
- ret = tcp_connect_finish (this);
- if (priv->tcp_connected) {
- options = &priv->options;
-
- priv->peer.send_count = options->send_count;
- priv->peer.recv_count = options->recv_count;
- priv->peer.send_size = options->send_size;
- priv->peer.recv_size = options->recv_size;
-
- if ((ret = gf_rdma_create_qp (this)) < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: could not create QP",
- this->name);
- rpc_transport_disconnect (this);
- }
- }
- }
-
- if (!ret && poll_out && priv->tcp_connected) {
- ret = gf_rdma_handshake_pollout (this);
- }
-
- if (!ret && !poll_err && poll_in && priv->tcp_connected) {
- if (priv->handshake.incoming.state
- == GF_RDMA_HANDSHAKE_COMPLETE) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: pollin received on tcp socket (peer: %s) "
- "after handshake is complete",
- this->name, this->peerinfo.identifier);
- gf_rdma_handshake_pollerr (this);
- return 0;
- }
-
- ret = gf_rdma_handshake_pollin (this);
- if (ret < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_WARNING,
- "handshake pollin failed");
- }
- }
-
- if (ret < 0 || poll_err) {
- ret = gf_rdma_handshake_pollerr (this);
- }
-
- return 0;
-}
-
-static int
-__tcp_nonblock (int fd)
-{
- int flags = 0;
- int ret = -1;
-
- flags = fcntl (fd, F_GETFL);
-
- if (flags != -1)
- ret = fcntl (fd, F_SETFL, flags | O_NONBLOCK);
-
- return ret;
-}
-
-static int32_t
-gf_rdma_connect (struct rpc_transport *this, int port)
-{
- gf_rdma_private_t *priv = NULL;
- int32_t ret = 0;
- gf_boolean_t non_blocking = 1;
- union gf_sock_union sock_union = {{0, }, };
- socklen_t sockaddr_len = 0;
-
- priv = this->private;
-
- ret = gf_rdma_client_get_remote_sockaddr (this,
- &sock_union.sa,
- &sockaddr_len, port);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "cannot get remote address to connect");
- return ret;
- }
-
-
- pthread_mutex_lock (&priv->write_mutex);
- {
- if (priv->sock != -1) {
- ret = 0;
- goto unlock;
- }
-
- priv->sock = socket (sock_union.sa.sa_family, SOCK_STREAM, 0);
-
- if (priv->sock == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "socket () - error: %s", strerror (errno));
- ret = -errno;
- goto unlock;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "socket fd = %d", priv->sock);
-
- memcpy (&this->peerinfo.sockaddr, &sock_union.storage,
- sockaddr_len);
- this->peerinfo.sockaddr_len = sockaddr_len;
-
- if (port > 0)
- sock_union.sin.sin_port = htons (port);
-
- ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family =
- ((struct sockaddr *)&this->peerinfo.sockaddr)->sa_family;
-
- if (non_blocking)
- {
- ret = __tcp_nonblock (priv->sock);
-
- if (ret == -1)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set socket %d to non "
- "blocking mode (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
- }
-
- ret = gf_rdma_client_bind (this,
- (struct sockaddr *)&this->myinfo.sockaddr,
- &this->myinfo.sockaddr_len,
- priv->sock);
- if (ret == -1)
- {
- gf_log (this->name, GF_LOG_WARNING,
- "client bind failed: %s", strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
-
- ret = connect (priv->sock,
- (struct sockaddr *)&this->peerinfo.sockaddr,
- this->peerinfo.sockaddr_len);
- if (ret == -1 && errno != EINPROGRESS)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "connection attempt failed (%s)",
- strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
-
- priv->tcp_connected = priv->connected = 0;
-
- rpc_transport_ref (this);
-
- priv->handshake.incoming.state = GF_RDMA_HANDSHAKE_START;
- priv->handshake.outgoing.state = GF_RDMA_HANDSHAKE_START;
-
- priv->idx = event_register (this->ctx->event_pool,
- priv->sock, gf_rdma_event_handler,
- this, 1, 1);
- }
-unlock:
- pthread_mutex_unlock (&priv->write_mutex);
-
- return ret;
-}
-
-static int
-gf_rdma_server_event_handler (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- int32_t main_sock = -1;
- rpc_transport_t *this = NULL, *trans = NULL;
- gf_rdma_private_t *priv = NULL;
- gf_rdma_private_t *trans_priv = NULL;
- gf_rdma_options_t *options = NULL;
-
- if (!poll_in) {
- return 0;
- }
-
- trans = data;
- trans_priv = (gf_rdma_private_t *) trans->private;
-
- this = GF_CALLOC (1, sizeof (rpc_transport_t),
- gf_common_mt_rpc_transport_t);
- if (this == NULL) {
- return -1;
- }
-
- this->listener = trans;
-
- priv = GF_CALLOC (1, sizeof (gf_rdma_private_t),
- gf_common_mt_rdma_private_t);
- if (priv == NULL) {
- GF_FREE (priv);
- return -1;
- }
- this->private = priv;
- /* Copy all the rdma related values in priv, from trans_priv
- as other than QP, all the values remain same */
- priv->device = trans_priv->device;
- priv->options = trans_priv->options;
- priv->is_server = 1;
- priv->listener = trans;
-
- options = &priv->options;
-
- this->ops = trans->ops;
- this->init = trans->init;
- this->fini = trans->fini;
- this->ctx = trans->ctx;
- this->name = gf_strdup (trans->name);
- this->notify = trans->notify;
- this->mydata = trans->mydata;
-
- memcpy (&this->myinfo.sockaddr, &trans->myinfo.sockaddr,
- trans->myinfo.sockaddr_len);
- this->myinfo.sockaddr_len = trans->myinfo.sockaddr_len;
-
- main_sock = (trans_priv)->sock;
- this->peerinfo.sockaddr_len = sizeof (this->peerinfo.sockaddr);
- priv->sock = accept (main_sock,
- (struct sockaddr *)&this->peerinfo.sockaddr,
- &this->peerinfo.sockaddr_len);
- if (priv->sock == -1) {
- gf_log ("rdma/server", GF_LOG_ERROR,
- "accept() failed: %s",
- strerror (errno));
- GF_FREE (this->private);
- GF_FREE (this);
- return -1;
- }
-
- priv->peer.trans = this;
- rpc_transport_ref (this);
-
- gf_rdma_get_transport_identifiers (this);
-
- priv->tcp_connected = 1;
- priv->handshake.incoming.state = GF_RDMA_HANDSHAKE_START;
- priv->handshake.outgoing.state = GF_RDMA_HANDSHAKE_START;
-
- priv->peer.send_count = options->send_count;
- priv->peer.recv_count = options->recv_count;
- priv->peer.send_size = options->send_size;
- priv->peer.recv_size = options->recv_size;
- INIT_LIST_HEAD (&priv->peer.ioq);
-
- if (gf_rdma_create_qp (this) < 0) {
- gf_log (GF_RDMA_LOG_NAME, GF_LOG_ERROR,
- "%s: could not create QP",
- this->name);
- rpc_transport_disconnect (this);
- return -1;
- }
-
- priv->idx = event_register (this->ctx->event_pool, priv->sock,
- gf_rdma_event_handler, this, 1, 1);
-
- pthread_mutex_init (&priv->read_mutex, NULL);
- pthread_mutex_init (&priv->write_mutex, NULL);
- pthread_mutex_init (&priv->recv_mutex, NULL);
- /* pthread_cond_init (&priv->recv_cond, NULL); */
- return 0;
-}
-
-static int32_t
-gf_rdma_listen (rpc_transport_t *this)
-{
- union gf_sock_union sock_union = {{0, }, };
- socklen_t sockaddr_len = 0;
- gf_rdma_private_t *priv = NULL;
- int opt = 1, ret = 0;
- char service[NI_MAXSERV], host[NI_MAXHOST];
-
- priv = this->private;
- memset (&sock_union, 0, sizeof (sock_union));
- ret = gf_rdma_server_get_local_sockaddr (this,
- &sock_union.sa,
- &sockaddr_len);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "cannot find network address of server to bind to");
- goto err;
- }
-
- priv->sock = socket (sock_union.sa.sa_family, SOCK_STREAM, 0);
- if (priv->sock == -1) {
- gf_log ("rdma/server", GF_LOG_CRITICAL,
- "init: failed to create socket, error: %s",
- strerror (errno));
- GF_FREE (this->private);
- ret = -1;
- goto err;
- }
-
- memcpy (&this->myinfo.sockaddr, &sock_union.storage, sockaddr_len);
- this->myinfo.sockaddr_len = sockaddr_len;
-
- ret = getnameinfo ((struct sockaddr *)&this->myinfo.sockaddr,
- this->myinfo.sockaddr_len,
- host, sizeof (host),
- service, sizeof (service),
- NI_NUMERICHOST);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "getnameinfo failed (%s)", gai_strerror (ret));
- goto err;
- }
- sprintf (this->myinfo.identifier, "%s:%s", host, service);
-
- setsockopt (priv->sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof (opt));
- if (bind (priv->sock, &sock_union.sa, sockaddr_len) != 0) {
- ret = -1;
- gf_log ("rdma/server", GF_LOG_ERROR,
- "init: failed to bind to socket for %s (%s)",
- this->myinfo.identifier, strerror (errno));
- goto err;
- }
-
- if (listen (priv->sock, 10) != 0) {
- gf_log ("rdma/server", GF_LOG_ERROR,
- "init: listen () failed on socket for %s (%s)",
- this->myinfo.identifier, strerror (errno));
- ret = -1;
- goto err;
- }
-
- /* Register the main socket */
- priv->idx = event_register (this->ctx->event_pool, priv->sock,
- gf_rdma_server_event_handler,
- rpc_transport_ref (this), 1, 0);
-
-err:
- return ret;
-}
-
-struct rpc_transport_ops tops = {
- .submit_request = gf_rdma_submit_request,
- .submit_reply = gf_rdma_submit_reply,
- .connect = gf_rdma_connect,
- .disconnect = gf_rdma_disconnect,
- .listen = gf_rdma_listen,
-};
-
-int32_t
-init (rpc_transport_t *this)
-{
- gf_rdma_private_t *priv = NULL;
-
- priv = GF_CALLOC (1, sizeof (*priv), gf_common_mt_rdma_private_t);
- if (!priv)
- return -1;
-
- this->private = priv;
- priv->sock = -1;
-
- if (gf_rdma_init (this)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to initialize IB Device");
- return -1;
- }
-
- return 0;
-}
-
-void
-fini (struct rpc_transport *this)
-{
- /* TODO: verify this function does graceful finish */
- gf_rdma_private_t *priv = NULL;
-
- priv = this->private;
-
- this->private = NULL;
-
- if (priv) {
- pthread_mutex_destroy (&priv->recv_mutex);
- pthread_mutex_destroy (&priv->write_mutex);
- pthread_mutex_destroy (&priv->read_mutex);
-
- /* pthread_cond_destroy (&priv->recv_cond); */
- if (priv->sock != -1) {
- event_unregister (this->ctx->event_pool,
- priv->sock, priv->idx);
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "called fini on transport: %p", this);
- GF_FREE (priv);
- }
- return;
-}
-
-/* TODO: expand each option */
-struct volume_options options[] = {
- { .key = {"transport.rdma.port",
- "rdma-port"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 4,
- .description = "check the option by 'ibv_devinfo'"
- },
- { .key = {"transport.rdma.mtu",
- "rdma-mtu"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"transport.rdma.device-name",
- "rdma-device-name"},
- .type = GF_OPTION_TYPE_ANY,
- .description = "check by 'ibv_devinfo'"
- },
- { .key = {"transport.rdma.work-request-send-count",
- "rdma-work-request-send-count"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"transport.rdma.work-request-recv-count",
- "rdma-work-request-recv-count"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"remote-port",
- "transport.remote-port",
- "transport.rdma.remote-port"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.rdma.attr-timeout",
- "rdma-attr-timeout"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.rdma.attr-retry-cnt",
- "rdma-attr-retry-cnt"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.rdma.attr-rnr-retry",
- "rdma-attr-rnr-retry"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.rdma.listen-port", "listen-port"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.rdma.connect-path", "connect-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.rdma.bind-path", "bind-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.rdma.listen-path", "listen-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.address-family",
- "address-family"},
- .value = {"inet", "inet6", "inet/inet6", "inet6/inet",
- "unix", "inet-sdp" },
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"transport.socket.lowlat"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {NULL} }
-};
diff --git a/rpc/rpc-transport/rdma/src/rdma.h b/rpc/rpc-transport/rdma/src/rdma.h
deleted file mode 100644
index 687d6005feb..00000000000
--- a/rpc/rpc-transport/rdma/src/rdma.h
+++ /dev/null
@@ -1,403 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef _XPORT_RDMA_H
-#define _XPORT_RDMA_H
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef MAX_IOVEC
-#define MAX_IOVEC 16
-#endif /* MAX_IOVEC */
-
-#include "rpc-clnt.h"
-#include "rpc-transport.h"
-#include "xlator.h"
-#include "event.h"
-#include <stdio.h>
-#include <list.h>
-#include <arpa/inet.h>
-#include <infiniband/verbs.h>
-
-/* FIXME: give appropriate values to these macros */
-#define GF_DEFAULT_RDMA_LISTEN_PORT (GF_DEFAULT_BASE_PORT + 1)
-
-/* If you are changing GF_RDMA_MAX_SEGMENTS, please make sure to update
- * GLUSTERFS_GF_RDMA_MAX_HEADER_SIZE defined in glusterfs.h .
- */
-#define GF_RDMA_MAX_SEGMENTS 8
-
-#define GF_RDMA_VERSION 1
-#define GF_RDMA_POOL_SIZE 512
-
-/* Additional attributes */
-#define GF_RDMA_TIMEOUT 14
-#define GF_RDMA_RETRY_CNT 7
-#define GF_RDMA_RNR_RETRY 7
-
-typedef enum gf_rdma_errcode {
- ERR_VERS = 1,
- ERR_CHUNK = 2
-}gf_rdma_errcode_t;
-
-struct gf_rdma_err_vers {
- uint32_t gf_rdma_vers_low; /* Version range supported by peer */
- uint32_t gf_rdma_vers_high;
-}__attribute__ ((packed));
-typedef struct gf_rdma_err_vers gf_rdma_err_vers_t;
-
-typedef enum gf_rdma_proc {
- GF_RDMA_MSG = 0, /* An RPC call or reply msg */
- GF_RDMA_NOMSG = 1, /* An RPC call or reply msg - separate body */
- GF_RDMA_MSGP = 2, /* An RPC call or reply msg with padding */
- GF_RDMA_DONE = 3, /* Client signals reply completion */
- GF_RDMA_ERROR = 4 /* An RPC RDMA encoding error */
-}gf_rdma_proc_t;
-
-typedef enum gf_rdma_chunktype {
- gf_rdma_noch = 0, /* no chunk */
- gf_rdma_readch, /* some argument through rdma read */
- gf_rdma_areadch, /* entire request through rdma read */
- gf_rdma_writech, /* some result through rdma write */
- gf_rdma_replych /* entire reply through rdma write */
-}gf_rdma_chunktype_t;
-
-/* If you are modifying __gf_rdma_header, please make sure to change
- * GLUSTERFS_GF_RDMA_MAX_HEADER_SIZE defined in glusterfs.h to reflect your changes
- */
-struct __gf_rdma_header {
- uint32_t rm_xid; /* Mirrors the RPC header xid */
- uint32_t rm_vers; /* Version of this protocol */
- uint32_t rm_credit; /* Buffers requested/granted */
- uint32_t rm_type; /* Type of message (enum gf_rdma_proc) */
- union {
- struct { /* no chunks */
- uint32_t rm_empty[3]; /* 3 empty chunk lists */
- }__attribute__((packed)) rm_nochunks;
-
- struct { /* no chunks and padded */
- uint32_t rm_align; /* Padding alignment */
- uint32_t rm_thresh; /* Padding threshold */
- uint32_t rm_pempty[3]; /* 3 empty chunk lists */
- }__attribute__((packed)) rm_padded;
-
- struct {
- uint32_t rm_type;
- gf_rdma_err_vers_t rm_version;
- }__attribute__ ((packed)) rm_error;
-
- uint32_t rm_chunks[0]; /* read, write and reply chunks */
- }__attribute__ ((packed)) rm_body;
-} __attribute__((packed));
-typedef struct __gf_rdma_header gf_rdma_header_t;
-
-/* If you are modifying __gf_rdma_segment or __gf_rdma_read_chunk, please make sure
- * to change GLUSTERFS_GF_RDMA_MAX_HEADER_SIZE defined in glusterfs.h to reflect
- * your changes.
- */
-struct __gf_rdma_segment {
- uint32_t rs_handle; /* Registered memory handle */
- uint32_t rs_length; /* Length of the chunk in bytes */
- uint64_t rs_offset; /* Chunk virtual address or offset */
-} __attribute__((packed));
-typedef struct __gf_rdma_segment gf_rdma_segment_t;
-
-/* read chunk(s), encoded as a linked list. */
-struct __gf_rdma_read_chunk {
- uint32_t rc_discrim; /* 1 indicates presence */
- uint32_t rc_position; /* Position in XDR stream */
- gf_rdma_segment_t rc_target;
-} __attribute__((packed));
-typedef struct __gf_rdma_read_chunk gf_rdma_read_chunk_t;
-
-/* write chunk, and reply chunk. */
-struct __gf_rdma_write_chunk {
- gf_rdma_segment_t wc_target;
-} __attribute__((packed));
-typedef struct __gf_rdma_write_chunk gf_rdma_write_chunk_t;
-
-/* write chunk(s), encoded as a counted array. */
-struct __gf_rdma_write_array {
- uint32_t wc_discrim; /* 1 indicates presence */
- uint32_t wc_nchunks; /* Array count */
- struct __gf_rdma_write_chunk wc_array[0];
-} __attribute__((packed));
-typedef struct __gf_rdma_write_array gf_rdma_write_array_t;
-
-/* options per transport end point */
-struct __gf_rdma_options {
- int32_t port;
- char *device_name;
- enum ibv_mtu mtu;
- int32_t send_count;
- int32_t recv_count;
- uint64_t recv_size;
- uint64_t send_size;
- uint8_t attr_timeout;
- uint8_t attr_retry_cnt;
- uint8_t attr_rnr_retry;
-};
-typedef struct __gf_rdma_options gf_rdma_options_t;
-
-struct __gf_rdma_reply_info {
- uint32_t rm_xid; /* xid in network endian */
- gf_rdma_chunktype_t type; /*
- * can be either gf_rdma_replych
- * or gf_rdma_writech.
- */
- gf_rdma_write_array_t *wc_array;
- struct mem_pool *pool;
-};
-typedef struct __gf_rdma_reply_info gf_rdma_reply_info_t;
-
-struct __gf_rdma_ioq {
- union {
- struct list_head list;
- struct {
- struct __gf_rdma_ioq *next;
- struct __gf_rdma_ioq *prev;
- };
- };
-
- char is_request;
- struct iovec rpchdr[MAX_IOVEC];
- int rpchdr_count;
- struct iovec proghdr[MAX_IOVEC];
- int proghdr_count;
- struct iovec prog_payload[MAX_IOVEC];
- int prog_payload_count;
-
- struct iobref *iobref;
-
- union {
- struct __gf_rdma_ioq_request {
- /* used to build reply_chunk for GF_RDMA_NOMSG type msgs */
- struct iovec rsphdr_vec[MAX_IOVEC];
- int rsphdr_count;
-
- /*
- * used to build write_array during operations like
- * read.
- */
- struct iovec rsp_payload[MAX_IOVEC];
- int rsp_payload_count;
-
- struct rpc_req *rpc_req; /* FIXME: hack! hack! should be
- * cleaned up later
- */
- struct iobref *rsp_iobref;
- }request;
-
- gf_rdma_reply_info_t *reply_info;
- }msg;
-
- struct mem_pool *pool;
-};
-typedef struct __gf_rdma_ioq gf_rdma_ioq_t;
-
-typedef enum __gf_rdma_send_post_type {
- GF_RDMA_SEND_POST_NO_CHUNKLIST, /* post which is sent using rdma-send
- * and the msg carries no
- * chunklists.
- */
- GF_RDMA_SEND_POST_READ_CHUNKLIST, /* post which is sent using rdma-send
- * and the msg carries only read
- * chunklist.
- */
- GF_RDMA_SEND_POST_WRITE_CHUNKLIST, /* post which is sent using
- * rdma-send and the msg carries
- * only write chunklist.
- */
- GF_RDMA_SEND_POST_READ_WRITE_CHUNKLIST, /* post which is sent using
- * rdma-send and the msg
- * carries both read and
- * write chunklists.
- */
- GF_RDMA_SEND_POST_GF_RDMA_READ, /* RDMA read */
- GF_RDMA_SEND_POST_GF_RDMA_WRITE, /* RDMA write */
-}gf_rdma_send_post_type_t;
-
-/* represents one communication peer, two per transport_t */
-struct __gf_rdma_peer {
- rpc_transport_t *trans;
- struct ibv_qp *qp;
-
- int32_t recv_count;
- int32_t send_count;
- int32_t recv_size;
- int32_t send_size;
-
- int32_t quota;
- union {
- struct list_head ioq;
- struct {
- gf_rdma_ioq_t *ioq_next;
- gf_rdma_ioq_t *ioq_prev;
- };
- };
-
- /* QP attributes, needed to connect with remote QP */
- int32_t local_lid;
- int32_t local_psn;
- int32_t local_qpn;
- int32_t remote_lid;
- int32_t remote_psn;
- int32_t remote_qpn;
-};
-typedef struct __gf_rdma_peer gf_rdma_peer_t;
-
-struct __gf_rdma_post_context {
- struct ibv_mr *mr[GF_RDMA_MAX_SEGMENTS];
- int mr_count;
- struct iovec vector[MAX_IOVEC];
- int count;
- struct iobref *iobref;
- struct iobuf *hdr_iobuf;
- char is_request;
- int gf_rdma_reads;
- gf_rdma_reply_info_t *reply_info;
-};
-typedef struct __gf_rdma_post_context gf_rdma_post_context_t;
-
-typedef enum {
- GF_RDMA_SEND_POST,
- GF_RDMA_RECV_POST
-} gf_rdma_post_type_t;
-
-struct __gf_rdma_post {
- struct __gf_rdma_post *next, *prev;
- struct ibv_mr *mr;
- char *buf;
- int32_t buf_size;
- char aux;
- int32_t reused;
- struct __gf_rdma_device *device;
- gf_rdma_post_type_t type;
- gf_rdma_post_context_t ctx;
- int refcount;
- pthread_mutex_t lock;
-};
-typedef struct __gf_rdma_post gf_rdma_post_t;
-
-struct __gf_rdma_queue {
- gf_rdma_post_t active_posts, passive_posts;
- int32_t active_count, passive_count;
- pthread_mutex_t lock;
-};
-typedef struct __gf_rdma_queue gf_rdma_queue_t;
-
-struct __gf_rdma_qpreg {
- pthread_mutex_t lock;
- int32_t count;
- struct _qpent {
- struct _qpent *next, *prev;
- int32_t qp_num;
- gf_rdma_peer_t *peer;
- } ents[42];
-};
-typedef struct __gf_rdma_qpreg gf_rdma_qpreg_t;
-
-/* context per device, stored in global glusterfs_ctx_t->ib */
-struct __gf_rdma_device {
- struct __gf_rdma_device *next;
- const char *device_name;
- struct ibv_context *context;
- int32_t port;
- struct ibv_pd *pd;
- struct ibv_srq *srq;
- gf_rdma_qpreg_t qpreg;
- struct ibv_comp_channel *send_chan, *recv_chan;
- struct ibv_cq *send_cq, *recv_cq;
- gf_rdma_queue_t sendq, recvq;
- pthread_t send_thread, recv_thread;
- struct mem_pool *request_ctx_pool;
- struct mem_pool *ioq_pool;
- struct mem_pool *reply_info_pool;
-};
-typedef struct __gf_rdma_device gf_rdma_device_t;
-
-typedef enum {
- GF_RDMA_HANDSHAKE_START = 0,
- GF_RDMA_HANDSHAKE_SENDING_DATA,
- GF_RDMA_HANDSHAKE_RECEIVING_DATA,
- GF_RDMA_HANDSHAKE_SENT_DATA,
- GF_RDMA_HANDSHAKE_RECEIVED_DATA,
- GF_RDMA_HANDSHAKE_SENDING_ACK,
- GF_RDMA_HANDSHAKE_RECEIVING_ACK,
- GF_RDMA_HANDSHAKE_RECEIVED_ACK,
- GF_RDMA_HANDSHAKE_COMPLETE,
-} gf_rdma_handshake_state_t;
-
-struct gf_rdma_nbio {
- int state;
- char *buf;
- int count;
- struct iovec vector;
- struct iovec *pending_vector;
- int pending_count;
-};
-
-struct __gf_rdma_request_context {
- struct ibv_mr *mr[GF_RDMA_MAX_SEGMENTS];
- int mr_count;
- struct mem_pool *pool;
- gf_rdma_peer_t *peer;
- struct iobref *iobref;
- struct iobref *rsp_iobref;
-};
-typedef struct __gf_rdma_request_context gf_rdma_request_context_t;
-
-struct __gf_rdma_private {
- int32_t sock;
- int32_t idx;
- unsigned char connected;
- unsigned char tcp_connected;
- unsigned char ib_connected;
- in_addr_t addr;
- unsigned short port;
-
- /* IB Verbs Driver specific variables, pointers */
- gf_rdma_peer_t peer;
- struct __gf_rdma_device *device;
- gf_rdma_options_t options;
-
- /* Used by trans->op->receive */
- char *data_ptr;
- int32_t data_offset;
- int32_t data_len;
-
- /* Mutex */
- pthread_mutex_t read_mutex;
- pthread_mutex_t write_mutex;
- pthread_barrier_t handshake_barrier;
- char handshake_ret;
- char is_server;
- rpc_transport_t *listener;
-
- pthread_mutex_t recv_mutex;
- pthread_cond_t recv_cond;
-
- /* used during gf_rdma_handshake */
- struct {
- struct gf_rdma_nbio incoming;
- struct gf_rdma_nbio outgoing;
- int state;
- gf_rdma_header_t header;
- char *buf;
- size_t size;
- } handshake;
-};
-typedef struct __gf_rdma_private gf_rdma_private_t;
-
-#endif /* _XPORT_GF_RDMA_H */
diff --git a/rpc/rpc-transport/socket/src/Makefile.am b/rpc/rpc-transport/socket/src/Makefile.am
index 2c918c7e313..7b488583771 100644
--- a/rpc/rpc-transport/socket/src/Makefile.am
+++ b/rpc/rpc-transport/socket/src/Makefile.am
@@ -1,15 +1,22 @@
-noinst_HEADERS = socket.h name.h
+noinst_HEADERS = socket.h name.h socket-mem-types.h
rpctransport_LTLIBRARIES = socket.la
rpctransportdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/rpc-transport
-socket_la_LDFLAGS = -module -avoidversion
+socket_la_LDFLAGS = -module -avoid-version
socket_la_SOURCES = socket.c name.c
-socket_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+socket_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ -lssl
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src/ \
- -I$(top_srcdir)/rpc/xdr/src/ -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src/ \
+ -I$(top_srcdir)/rpc/xdr/src/ \
+ -I$(top_builddir)/rpc/xdr/src/
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES = *~
diff --git a/rpc/rpc-transport/socket/src/name.c b/rpc/rpc-transport/socket/src/name.c
index d37c83e1857..9286bbb236d 100644
--- a/rpc/rpc-transport/socket/src/name.c
+++ b/rpc/rpc-transport/socket/src/name.c
@@ -15,723 +15,754 @@
#include <netdb.h>
#include <string.h>
-#ifdef CLIENT_PORT_CEILING
-#undef CLIENT_PORT_CEILING
-#endif
-
-#define CLIENT_PORT_CEILING 1024
-
#ifndef AF_INET_SDP
#define AF_INET_SDP 27
#endif
#include "rpc-transport.h"
#include "socket.h"
-#include "common-utils.h"
-
-int32_t
-gf_resolve_ip6 (const char *hostname,
- uint16_t port,
- int family,
- void **dnscache,
- struct addrinfo **addr_info);
+#include <glusterfs/common-utils.h>
-static int32_t
-af_inet_bind_to_port_lt_ceiling (int fd, struct sockaddr *sockaddr,
- socklen_t sockaddr_len, int ceiling)
+static void
+_assign_port(struct sockaddr *sockaddr, uint16_t port)
{
- int32_t ret = -1;
- /* struct sockaddr_in sin = {0, }; */
- uint16_t port = ceiling - 1;
-
- while (port)
- {
- switch (sockaddr->sa_family)
- {
- case AF_INET6:
- ((struct sockaddr_in6 *)sockaddr)->sin6_port = htons (port);
- break;
-
- case AF_INET_SDP:
- case AF_INET:
- ((struct sockaddr_in *)sockaddr)->sin_port = htons (port);
- break;
- }
-
- ret = bind (fd, sockaddr, sockaddr_len);
-
- if (ret == 0)
- break;
-
- if (ret == -1 && errno == EACCES)
- break;
-
- port--;
- }
+ switch (sockaddr->sa_family) {
+ case AF_INET6:
+ ((struct sockaddr_in6 *)sockaddr)->sin6_port = htons(port);
+ break;
- return ret;
+ case AF_INET_SDP:
+ case AF_INET:
+ ((struct sockaddr_in *)sockaddr)->sin_port = htons(port);
+ break;
+ }
}
static int32_t
-af_unix_client_bind (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t sockaddr_len,
- int sock)
+af_inet_bind_to_port_lt_ceiling(int fd, struct sockaddr *sockaddr,
+ socklen_t sockaddr_len, uint32_t ceiling)
{
- data_t *path_data = NULL;
- struct sockaddr_un *addr = NULL;
- int32_t ret = 0;
-
- path_data = dict_get (this->options, "transport.socket.bind-path");
- if (path_data) {
- char *path = data_to_str (path_data);
- if (!path || strlen (path) > UNIX_PATH_MAX) {
- gf_log (this->name, GF_LOG_TRACE,
- "bind-path not specified for unix socket, "
- "letting connect to assign default value");
- goto err;
- }
+#if GF_DISABLE_PRIVPORT_TRACKING
+ _assign_port(sockaddr, 0);
+ return bind(fd, sockaddr, sockaddr_len);
+#else
+ int32_t ret = -1;
+ uint16_t port = ceiling - 1;
+ unsigned char ports[GF_PORT_ARRAY_SIZE] = {
+ 0,
+ };
+ int i = 0;
- addr = (struct sockaddr_un *) sockaddr;
- strcpy (addr->sun_path, path);
- ret = bind (sock, (struct sockaddr *)addr, sockaddr_len);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot bind to unix-domain socket %d (%s)",
- sock, strerror (errno));
- goto err;
- }
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "bind-path not specified for unix socket, "
- "letting connect to assign default value");
+loop:
+ ret = gf_process_reserved_ports(ports, ceiling);
+
+ while (port) {
+ if (port == GF_CLIENT_PORT_CEILING) {
+ ret = -1;
+ break;
}
-err:
- return ret;
-}
+ /* ignore the reserved ports */
+ if (BIT_VALUE(ports, port)) {
+ port--;
+ continue;
+ }
-int32_t
-client_fill_address_family (rpc_transport_t *this, sa_family_t *sa_family)
-{
- data_t *address_family_data = NULL;
- int32_t ret = -1;
+ _assign_port(sockaddr, port);
- if (sa_family == NULL) {
- gf_log_callingfn ("", GF_LOG_WARNING,
- "sa_family argument is NULL");
- goto out;
- }
+ ret = bind(fd, sockaddr, sockaddr_len);
- address_family_data = dict_get (this->options,
- "transport.address-family");
- if (!address_family_data) {
- data_t *remote_host_data = NULL, *connect_path_data = NULL;
- remote_host_data = dict_get (this->options, "remote-host");
- connect_path_data = dict_get (this->options,
- "transport.socket.connect-path");
-
- if (!(remote_host_data || connect_path_data) ||
- (remote_host_data && connect_path_data)) {
- gf_log (this->name, GF_LOG_ERROR,
- "transport.address-family not specified. "
- "Could not guess default value from (remote-host:%s or "
- "transport.unix.connect-path:%s) options",
- data_to_str (remote_host_data),
- data_to_str (connect_path_data));
- *sa_family = AF_UNSPEC;
- goto out;
- }
+ if (ret == 0)
+ break;
- if (remote_host_data) {
- gf_log (this->name, GF_LOG_DEBUG,
- "address-family not specified, guessing it "
- "to be inet from (remote-host: %s)", data_to_str (remote_host_data));
- *sa_family = AF_INET;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "address-family not specified, guessing it "
- "to be unix from (transport.unix.connect-path: %s)", data_to_str (connect_path_data));
- *sa_family = AF_UNIX;
- }
+ if (ret == -1 && errno == EACCES)
+ break;
- } else {
- char *address_family = data_to_str (address_family_data);
- if (!strcasecmp (address_family, "unix")) {
- *sa_family = AF_UNIX;
- } else if (!strcasecmp (address_family, "inet")) {
- *sa_family = AF_INET;
- } else if (!strcasecmp (address_family, "inet6")) {
- *sa_family = AF_INET6;
- } else if (!strcasecmp (address_family, "inet-sdp")) {
- *sa_family = AF_INET_SDP;
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address-family (%s) specified",
- address_family);
- *sa_family = AF_UNSPEC;
- goto out;
- }
- }
+ port--;
+ }
- ret = 0;
+ /* In case if all the secure ports are exhausted, we are no more
+ * binding to secure ports, hence instead of getting a random
+ * port, lets define the range to restrict it from getting from
+ * ports reserved for bricks i.e from range of 49152 - 65535
+ * which further may lead to port clash */
+ if (!port) {
+ ceiling = port = GF_CLNT_INSECURE_PORT_CEILING;
+ for (i = 0; i <= ceiling; i++)
+ BIT_CLEAR(ports, i);
+ goto loop;
+ }
-out:
- return ret;
+ return ret;
+#endif /* GF_DISABLE_PRIVPORT_TRACKING */
}
static int32_t
-af_inet_client_get_remote_sockaddr (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len)
+af_unix_client_bind(rpc_transport_t *this, struct sockaddr *sockaddr,
+ socklen_t sockaddr_len, int sock)
{
- dict_t *options = this->options;
- data_t *remote_host_data = NULL;
- data_t *remote_port_data = NULL;
- char *remote_host = NULL;
- uint16_t remote_port = 0;
- struct addrinfo *addr_info = NULL;
- int32_t ret = 0;
-
- remote_host_data = dict_get (options, "remote-host");
- if (remote_host_data == NULL)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "option remote-host missing in volume %s", this->name);
- ret = -1;
- goto err;
- }
-
- remote_host = data_to_str (remote_host_data);
- if (remote_host == NULL)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "option remote-host has data NULL in volume %s", this->name);
- ret = -1;
- goto err;
- }
-
- remote_port_data = dict_get (options, "remote-port");
- if (remote_port_data == NULL)
- {
- gf_log (this->name, GF_LOG_TRACE,
- "option remote-port missing in volume %s. Defaulting to %d",
- this->name, GF_DEFAULT_SOCKET_LISTEN_PORT);
-
- remote_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
- }
- else
- {
- remote_port = data_to_uint16 (remote_port_data);
- }
-
- if (remote_port == (uint16_t)-1)
- {
- gf_log (this->name, GF_LOG_ERROR,
- "option remote-port has invalid port in volume %s",
- this->name);
- ret = -1;
- goto err;
- }
-
- /* TODO: gf_resolve is a blocking call. kick in some
- non blocking dns techniques */
- ret = gf_resolve_ip6 (remote_host, remote_port,
- sockaddr->sa_family, &this->dnscache, &addr_info);
+ data_t *path_data = NULL;
+ struct sockaddr_un *addr = NULL;
+ int32_t ret = 0;
+
+ path_data = dict_get_sizen(this->options, "transport.socket.bind-path");
+ if (path_data) {
+ char *path = data_to_str(path_data);
+ if (!path || path_data->len > 108) { /* 108 = addr->sun_path length */
+ gf_log(this->name, GF_LOG_TRACE,
+ "bind-path not specified for unix socket, "
+ "letting connect to assign default value");
+ goto err;
+ }
+
+ addr = (struct sockaddr_un *)sockaddr;
+ strcpy(addr->sun_path, path);
+ ret = bind(sock, (struct sockaddr *)addr, sockaddr_len);
if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "DNS resolution failed on host %s", remote_host);
- goto err;
+ gf_log(this->name, GF_LOG_ERROR,
+ "cannot bind to unix-domain socket %d (%s)", sock,
+ strerror(errno));
+ goto err;
}
-
- memcpy (sockaddr, addr_info->ai_addr, addr_info->ai_addrlen);
- *sockaddr_len = addr_info->ai_addrlen;
+ } else {
+ gf_log(this->name, GF_LOG_TRACE,
+ "bind-path not specified for unix socket, "
+ "letting connect to assign default value");
+ }
err:
- return ret;
+ return ret;
}
static int32_t
-af_unix_client_get_remote_sockaddr (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len)
+client_fill_address_family(rpc_transport_t *this, sa_family_t *sa_family)
{
- struct sockaddr_un *sockaddr_un = NULL;
- char *connect_path = NULL;
- data_t *connect_path_data = NULL;
- int32_t ret = 0;
-
- connect_path_data = dict_get (this->options,
- "transport.socket.connect-path");
- if (!connect_path_data) {
- gf_log (this->name, GF_LOG_ERROR,
- "option transport.unix.connect-path not specified for "
- "address-family unix");
- ret = -1;
- goto err;
+ data_t *address_family_data = NULL;
+ int32_t ret = -1;
+
+ if (sa_family == NULL) {
+ gf_log_callingfn("", GF_LOG_WARNING, "sa_family argument is NULL");
+ goto out;
+ }
+
+ address_family_data = dict_get_sizen(this->options,
+ "transport.address-family");
+ if (!address_family_data) {
+ data_t *remote_host_data = NULL, *connect_path_data = NULL;
+ remote_host_data = dict_get_sizen(this->options, "remote-host");
+ connect_path_data = dict_get_sizen(this->options,
+ "transport.socket.connect-path");
+
+ if (!(remote_host_data || connect_path_data) ||
+ (remote_host_data && connect_path_data)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "transport.address-family not specified. "
+ "Could not guess default value from (remote-host:%s or "
+ "transport.unix.connect-path:%s) options",
+ data_to_str(remote_host_data),
+ data_to_str(connect_path_data));
+ *sa_family = AF_UNSPEC;
+ goto out;
+ }
+
+ if (remote_host_data) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "address-family not specified, marking it as unspec "
+ "for getaddrinfo to resolve from (remote-host: %s)",
+ data_to_str(remote_host_data));
+ *sa_family = AF_UNSPEC;
+ } else {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "address-family not specified, guessing it "
+ "to be unix from (transport.unix.connect-path: %s)",
+ data_to_str(connect_path_data));
+ *sa_family = AF_UNIX;
+ }
+
+ } else {
+ const char *address_family = data_to_str(address_family_data);
+ if (!strcasecmp(address_family, "unix")) {
+ *sa_family = AF_UNIX;
+ } else if (!strcasecmp(address_family, "inet")) {
+ *sa_family = AF_INET;
+ } else if (!strcasecmp(address_family, "inet6")) {
+ *sa_family = AF_INET6;
+ } else if (!strcasecmp(address_family, "inet-sdp")) {
+ *sa_family = AF_INET_SDP;
+ } else {
+ gf_log(this->name, GF_LOG_ERROR,
+ "unknown address-family (%s) specified", address_family);
+ *sa_family = AF_UNSPEC;
+ goto out;
}
+ }
- connect_path = data_to_str (connect_path_data);
- if (!connect_path) {
- gf_log (this->name, GF_LOG_ERROR,
- "transport.unix.connect-path is null-string");
- ret = -1;
- goto err;
- }
+ ret = 0;
- if (strlen (connect_path) > UNIX_PATH_MAX) {
- gf_log (this->name, GF_LOG_ERROR,
- "connect-path value length %"GF_PRI_SIZET" > %d octets",
- strlen (connect_path), UNIX_PATH_MAX);
- ret = -1;
- goto err;
- }
+out:
+ return ret;
+}
- gf_log (this->name, GF_LOG_TRACE,
- "using connect-path %s", connect_path);
- sockaddr_un = (struct sockaddr_un *)sockaddr;
- strcpy (sockaddr_un->sun_path, connect_path);
- *sockaddr_len = sizeof (struct sockaddr_un);
+static int32_t
+af_inet_client_get_remote_sockaddr(rpc_transport_t *this,
+ struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len)
+{
+ dict_t *options = this->options;
+ data_t *remote_host_data = NULL;
+ data_t *remote_port_data = NULL;
+ char *remote_host = NULL;
+ uint16_t remote_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
+ struct addrinfo *addr_info = NULL;
+ int32_t ret = 0;
+ struct in6_addr serveraddr;
+
+ remote_host_data = dict_get_sizen(options, "remote-host");
+ if (remote_host_data == NULL) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "option remote-host missing in volume %s", this->name);
+ ret = -1;
+ goto err;
+ }
+
+ remote_host = data_to_str(remote_host_data);
+ if (remote_host == NULL) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "option remote-host has data NULL in volume %s", this->name);
+ ret = -1;
+ goto err;
+ }
+
+ remote_port_data = dict_get_sizen(options, "remote-port");
+ if (remote_port_data == NULL) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "option remote-port missing in volume %s. Defaulting to %d",
+ this->name, GF_DEFAULT_SOCKET_LISTEN_PORT);
+ } else {
+ remote_port = data_to_uint16(remote_port_data);
+ if (remote_port == (uint16_t)-1) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "option remote-port has invalid port in volume %s",
+ this->name);
+ ret = -1;
+ goto err;
+ }
+ }
+
+ /* Need to update transport-address family if address-family is not provided
+ to command-line arguments
+ */
+ if (inet_pton(AF_INET6, remote_host, &serveraddr)) {
+ sockaddr->sa_family = AF_INET6;
+ }
+
+ /* TODO: gf_resolve is a blocking call. kick in some
+ non blocking dns techniques */
+ ret = gf_resolve_ip6(remote_host, remote_port, sockaddr->sa_family,
+ &this->dnscache, &addr_info);
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_ERROR, "DNS resolution failed on host %s",
+ remote_host);
+ goto err;
+ }
+
+ memcpy(sockaddr, addr_info->ai_addr, addr_info->ai_addrlen);
+ *sockaddr_len = addr_info->ai_addrlen;
err:
- return ret;
+ return ret;
}
static int32_t
-af_unix_server_get_local_sockaddr (rpc_transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len)
+af_unix_client_get_remote_sockaddr(rpc_transport_t *this,
+ struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len)
{
- data_t *listen_path_data = NULL;
- char *listen_path = NULL;
- int32_t ret = 0;
- struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
-
-
- listen_path_data = dict_get (this->options,
- "transport.socket.listen-path");
- if (!listen_path_data) {
- gf_log (this->name, GF_LOG_ERROR,
- "missing option transport.socket.listen-path");
- ret = -1;
- goto err;
- }
+ struct sockaddr_un *sockaddr_un = NULL;
+ char *connect_path = NULL;
+ data_t *connect_path_data = NULL;
+ int32_t ret = -1;
+
+ connect_path_data = dict_get_sizen(this->options,
+ "transport.socket.connect-path");
+ if (!connect_path_data) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "option transport.unix.connect-path not specified for "
+ "address-family unix");
+ goto err;
+ }
+
+ /* 108 = sockaddr_un->sun_path length */
+ if ((connect_path_data->len + 1) > 108) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "connect-path value length %d > %d octets",
+ connect_path_data->len + 1, UNIX_PATH_MAX);
+ goto err;
+ }
+
+ connect_path = data_to_str(connect_path_data);
+ if (!connect_path) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "transport.unix.connect-path is null-string");
+ goto err;
+ }
+
+ gf_log(this->name, GF_LOG_TRACE, "using connect-path %s", connect_path);
+ sockaddr_un = (struct sockaddr_un *)sockaddr;
+ strcpy(sockaddr_un->sun_path, connect_path);
+ *sockaddr_len = sizeof(struct sockaddr_un);
+
+ ret = 0;
+err:
+ return ret;
+}
- listen_path = data_to_str (listen_path_data);
+static int32_t
+af_unix_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
+ socklen_t *addr_len)
+{
+ data_t *listen_path_data = NULL;
+ char *listen_path = NULL;
+ int32_t ret = 0;
+ struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
+
+ listen_path_data = dict_get_sizen(this->options,
+ "transport.socket.listen-path");
+ if (!listen_path_data) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "missing option transport.socket.listen-path");
+ ret = -1;
+ goto err;
+ }
+
+ listen_path = data_to_str(listen_path_data);
#ifndef UNIX_PATH_MAX
#define UNIX_PATH_MAX 108
#endif
- if (strlen (listen_path) > UNIX_PATH_MAX) {
- gf_log (this->name, GF_LOG_ERROR,
- "option transport.unix.listen-path has value length "
- "%"GF_PRI_SIZET" > %d",
- strlen (listen_path), UNIX_PATH_MAX);
- ret = -1;
- goto err;
- }
+ if ((listen_path_data->len + 1) > UNIX_PATH_MAX) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "option transport.unix.listen-path has value length "
+ "%" GF_PRI_SIZET " > %d",
+ strlen(listen_path), UNIX_PATH_MAX);
+ ret = -1;
+ goto err;
+ }
- sunaddr->sun_family = AF_UNIX;
- strcpy (sunaddr->sun_path, listen_path);
- *addr_len = sizeof (struct sockaddr_un);
+ sunaddr->sun_family = AF_UNIX;
+ strcpy(sunaddr->sun_path, listen_path);
+ *addr_len = sizeof(struct sockaddr_un);
err:
- return ret;
+ return ret;
}
static int32_t
-af_inet_server_get_local_sockaddr (rpc_transport_t *this,
- struct sockaddr *addr,
- socklen_t *addr_len)
+af_inet_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
+ socklen_t *addr_len)
{
- struct addrinfo hints, *res = 0, *rp = NULL;
- data_t *listen_port_data = NULL, *listen_host_data = NULL;
- uint16_t listen_port = -1;
- char service[NI_MAXSERV], *listen_host = NULL;
- dict_t *options = NULL;
- int32_t ret = 0;
-
- options = this->options;
-
- listen_port_data = dict_get (options, "transport.socket.listen-port");
- listen_host_data = dict_get (options, "transport.socket.bind-address");
-
- if (listen_port_data)
- {
- listen_port = data_to_uint16 (listen_port_data);
- }
-
- if (listen_port == (uint16_t) -1)
- listen_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
-
-
- if (listen_host_data)
- {
- listen_host = data_to_str (listen_host_data);
+ struct addrinfo hints, *res = 0, *rp = NULL;
+ data_t *listen_port_data = NULL, *listen_host_data = NULL;
+ uint16_t listen_port = 0;
+ char service[NI_MAXSERV], *listen_host = NULL;
+ dict_t *options = NULL;
+ int32_t ret = 0;
+
+ /* initializes addr_len */
+ *addr_len = 0;
+
+ options = this->options;
+
+ listen_port_data = dict_get_sizen(options, "transport.socket.listen-port");
+ if (listen_port_data) {
+ listen_port = data_to_uint16(listen_port_data);
+ } else {
+ listen_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
+ }
+
+ listen_host_data = dict_get_sizen(options, "transport.socket.bind-address");
+ if (listen_host_data) {
+ listen_host = data_to_str(listen_host_data);
+ } else {
+ if (addr->sa_family == AF_INET6) {
+ struct sockaddr_in6 *in = (struct sockaddr_in6 *)addr;
+ in->sin6_addr = in6addr_any;
+ in->sin6_port = htons(listen_port);
+ *addr_len = sizeof(struct sockaddr_in6);
+ goto out;
+ } else if (addr->sa_family == AF_INET) {
+ struct sockaddr_in *in = (struct sockaddr_in *)addr;
+ in->sin_addr.s_addr = htonl(INADDR_ANY);
+ in->sin_port = htons(listen_port);
+ *addr_len = sizeof(struct sockaddr_in);
+ goto out;
+ }
+ }
+
+ sprintf(service, "%d", listen_port);
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = addr->sa_family;
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_flags = AI_PASSIVE;
+
+ ret = getaddrinfo(listen_host, service, &hints, &res);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "getaddrinfo failed for host %s, service %s (%s)", listen_host,
+ service, gai_strerror(ret));
+ ret = -1;
+ goto out;
+ }
+ /* IPV6 server can handle both ipv4 and ipv6 clients */
+ for (rp = res; rp != NULL; rp = rp->ai_next) {
+ if (rp->ai_addr == NULL)
+ continue;
+ if (rp->ai_family == AF_INET6) {
+ memcpy(addr, rp->ai_addr, rp->ai_addrlen);
+ *addr_len = rp->ai_addrlen;
+ }
+ }
+
+ if (!(*addr_len)) {
+ if (res && res->ai_addr) {
+ memcpy(addr, res->ai_addr, res->ai_addrlen);
+ *addr_len = res->ai_addrlen;
} else {
- if (addr->sa_family == AF_INET6) {
- struct sockaddr_in6 *in = (struct sockaddr_in6 *) addr;
- in->sin6_addr = in6addr_any;
- in->sin6_port = htons(listen_port);
- *addr_len = sizeof(struct sockaddr_in6);
- goto out;
- } else if (addr->sa_family == AF_INET) {
- struct sockaddr_in *in = (struct sockaddr_in *) addr;
- in->sin_addr.s_addr = htonl(INADDR_ANY);
- in->sin_port = htons(listen_port);
- *addr_len = sizeof(struct sockaddr_in);
- goto out;
- }
+ ret = -1;
}
+ }
- memset (service, 0, sizeof (service));
- sprintf (service, "%d", listen_port);
-
- memset (&hints, 0, sizeof (hints));
- hints.ai_family = addr->sa_family;
- hints.ai_socktype = SOCK_STREAM;
- hints.ai_flags = AI_PASSIVE | AI_ADDRCONFIG;
-
- ret = getaddrinfo(listen_host, service, &hints, &res);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "getaddrinfo failed for host %s, service %s (%s)",
- listen_host, service, gai_strerror (ret));
- ret = -1;
- goto out;
- }
- /* IPV6 server can handle both ipv4 and ipv6 clients */
- for (rp = res; rp != NULL; rp = rp->ai_next) {
- if (rp->ai_addr == NULL)
- continue;
- if (rp->ai_family == AF_INET6) {
- memcpy (addr, rp->ai_addr, rp->ai_addrlen);
- *addr_len = rp->ai_addrlen;
- }
- }
-
- if (!(*addr_len)) {
- memcpy (addr, res->ai_addr, res->ai_addrlen);
- *addr_len = res->ai_addrlen;
- }
-
- freeaddrinfo (res);
+ freeaddrinfo(res);
out:
- return ret;
+ return ret;
}
int32_t
-client_bind (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int sock)
+client_bind(rpc_transport_t *this, struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len, int sock)
{
- int ret = 0;
+ int ret = 0;
- *sockaddr_len = sizeof (struct sockaddr_in6);
- switch (sockaddr->sa_family)
- {
+ *sockaddr_len = sizeof(struct sockaddr_in6);
+ switch (sockaddr->sa_family) {
case AF_INET_SDP:
case AF_INET:
- *sockaddr_len = sizeof (struct sockaddr_in);
-
+ *sockaddr_len = sizeof(struct sockaddr_in);
+ /* Fall through */
case AF_INET6:
- if (!this->bind_insecure) {
- ret = af_inet_bind_to_port_lt_ceiling (sock, sockaddr,
- *sockaddr_len, CLIENT_PORT_CEILING);
+ if (!this->bind_insecure) {
+ ret = af_inet_bind_to_port_lt_ceiling(
+ sock, sockaddr, *sockaddr_len, GF_CLIENT_PORT_CEILING);
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "cannot bind inet socket (%d) "
+ "to port less than %d (%s)",
+ sock, GF_CLIENT_PORT_CEILING, strerror(errno));
+ ret = 0;
}
+ } else {
+ ret = af_inet_bind_to_port_lt_ceiling(
+ sock, sockaddr, *sockaddr_len, GF_IANA_PRIV_PORTS_START);
if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "cannot bind inet socket (%d) to port less than %d (%s)",
- sock, CLIENT_PORT_CEILING, strerror (errno));
- ret = 0;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "failed while binding to less than "
+ "%d (%s)",
+ GF_IANA_PRIV_PORTS_START, strerror(errno));
+ ret = 0;
}
- break;
+ }
+ break;
case AF_UNIX:
- *sockaddr_len = sizeof (struct sockaddr_un);
- ret = af_unix_client_bind (this, (struct sockaddr *)sockaddr,
- *sockaddr_len, sock);
- break;
+ *sockaddr_len = sizeof(struct sockaddr_un);
+ ret = af_unix_client_bind(this, (struct sockaddr *)sockaddr,
+ *sockaddr_len, sock);
+ break;
default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address family %d", sockaddr->sa_family);
- ret = -1;
- break;
- }
+ gf_log(this->name, GF_LOG_ERROR, "unknown address family %d",
+ sockaddr->sa_family);
+ ret = -1;
+ break;
+ }
- return ret;
+ return ret;
}
int32_t
-socket_client_get_remote_sockaddr (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- sa_family_t *sa_family)
+socket_client_get_remote_sockaddr(rpc_transport_t *this,
+ struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len,
+ sa_family_t *sa_family)
{
- int32_t ret = 0;
+ int32_t ret = 0;
- GF_VALIDATE_OR_GOTO ("socket", sockaddr, err);
- GF_VALIDATE_OR_GOTO ("socket", sockaddr_len, err);
- GF_VALIDATE_OR_GOTO ("socket", sa_family, err);
+ GF_VALIDATE_OR_GOTO("socket", sockaddr, err);
+ GF_VALIDATE_OR_GOTO("socket", sockaddr_len, err);
+ GF_VALIDATE_OR_GOTO("socket", sa_family, err);
- ret = client_fill_address_family (this, &sockaddr->sa_family);
- if (ret) {
- ret = -1;
- goto err;
- }
+ ret = client_fill_address_family(this, &sockaddr->sa_family);
+ if (ret) {
+ ret = -1;
+ goto err;
+ }
- *sa_family = sockaddr->sa_family;
+ *sa_family = sockaddr->sa_family;
- switch (sockaddr->sa_family)
- {
+ switch (sockaddr->sa_family) {
case AF_INET_SDP:
- sockaddr->sa_family = AF_INET;
-
+ sockaddr->sa_family = AF_INET;
+ /* Fall through */
case AF_INET:
case AF_INET6:
case AF_UNSPEC:
- ret = af_inet_client_get_remote_sockaddr (this, sockaddr,
- sockaddr_len);
- break;
+ ret = af_inet_client_get_remote_sockaddr(this, sockaddr,
+ sockaddr_len);
+ break;
case AF_UNIX:
- ret = af_unix_client_get_remote_sockaddr (this, sockaddr,
- sockaddr_len);
- break;
+ ret = af_unix_client_get_remote_sockaddr(this, sockaddr,
+ sockaddr_len);
+ break;
default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address-family %d", sockaddr->sa_family);
- ret = -1;
- }
-
- if (*sa_family == AF_UNSPEC) {
- *sa_family = sockaddr->sa_family;
- }
+ gf_log(this->name, GF_LOG_ERROR, "unknown address-family %d",
+ sockaddr->sa_family);
+ ret = -1;
+ }
+
+ /* Address-family is updated based on remote_host in
+ af_inet_client_get_remote_sockaddr
+ */
+ if (*sa_family != sockaddr->sa_family) {
+ *sa_family = sockaddr->sa_family;
+ }
err:
- return ret;
+ return ret;
}
-
-int32_t
-server_fill_address_family (rpc_transport_t *this, sa_family_t *sa_family)
+static int32_t
+server_fill_address_family(rpc_transport_t *this, sa_family_t *sa_family)
{
- data_t *address_family_data = NULL;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO ("socket", sa_family, out);
-
- address_family_data = dict_get (this->options,
- "transport.address-family");
- if (address_family_data) {
- char *address_family = NULL;
- address_family = data_to_str (address_family_data);
-
- if (!strcasecmp (address_family, "inet")) {
- *sa_family = AF_INET;
- } else if (!strcasecmp (address_family, "inet6")) {
- *sa_family = AF_INET6;
- } else if (!strcasecmp (address_family, "inet-sdp")) {
- *sa_family = AF_INET_SDP;
- } else if (!strcasecmp (address_family, "unix")) {
- *sa_family = AF_UNIX;
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address family (%s) specified", address_family);
- *sa_family = AF_UNSPEC;
- goto out;
- }
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "option address-family not specified, defaulting to inet");
- *sa_family = AF_INET;
- }
+ data_t *address_family_data = NULL;
+ int32_t ret = -1;
- ret = 0;
+#ifdef IPV6_DEFAULT
+ const char *addr_family = "inet6";
+ sa_family_t default_family = AF_INET6;
+#else
+ const char *addr_family = "inet";
+ sa_family_t default_family = AF_INET;
+#endif
+
+ GF_VALIDATE_OR_GOTO("socket", sa_family, out);
+
+ address_family_data = dict_get_sizen(this->options,
+ "transport.address-family");
+ if (address_family_data) {
+ char *address_family = NULL;
+ address_family = data_to_str(address_family_data);
+
+ if (!strcasecmp(address_family, "inet")) {
+ *sa_family = AF_INET;
+ } else if (!strcasecmp(address_family, "inet6")) {
+ *sa_family = AF_INET6;
+ } else if (!strcasecmp(address_family, "inet-sdp")) {
+ *sa_family = AF_INET_SDP;
+ } else if (!strcasecmp(address_family, "unix")) {
+ *sa_family = AF_UNIX;
+ } else {
+ gf_log(this->name, GF_LOG_ERROR,
+ "unknown address family (%s) specified", address_family);
+ *sa_family = AF_UNSPEC;
+ goto out;
+ }
+ } else {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "option address-family not specified, "
+ "defaulting to %s",
+ addr_family);
+ *sa_family = default_family;
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int32_t
-socket_server_get_local_sockaddr (rpc_transport_t *this, struct sockaddr *addr,
- socklen_t *addr_len, sa_family_t *sa_family)
+socket_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
+ socklen_t *addr_len, sa_family_t *sa_family)
{
- int32_t ret = -1;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("socket", sa_family, err);
- GF_VALIDATE_OR_GOTO ("socket", addr, err);
- GF_VALIDATE_OR_GOTO ("socket", addr_len, err);
+ GF_VALIDATE_OR_GOTO("socket", sa_family, err);
+ GF_VALIDATE_OR_GOTO("socket", addr, err);
+ GF_VALIDATE_OR_GOTO("socket", addr_len, err);
- ret = server_fill_address_family (this, &addr->sa_family);
- if (ret == -1) {
- goto err;
- }
+ ret = server_fill_address_family(this, &addr->sa_family);
+ if (ret == -1) {
+ goto err;
+ }
- *sa_family = addr->sa_family;
+ *sa_family = addr->sa_family;
- switch (addr->sa_family)
- {
+ switch (addr->sa_family) {
case AF_INET_SDP:
- addr->sa_family = AF_INET;
-
+ addr->sa_family = AF_INET;
+ /* Fall through */
case AF_INET:
case AF_INET6:
case AF_UNSPEC:
- ret = af_inet_server_get_local_sockaddr (this, addr, addr_len);
- break;
+ ret = af_inet_server_get_local_sockaddr(this, addr, addr_len);
+ break;
case AF_UNIX:
- ret = af_unix_server_get_local_sockaddr (this, addr, addr_len);
- break;
- }
+ ret = af_unix_server_get_local_sockaddr(this, addr, addr_len);
+ break;
+ }
- if (*sa_family == AF_UNSPEC) {
- *sa_family = addr->sa_family;
- }
+ if (*sa_family == AF_UNSPEC) {
+ *sa_family = addr->sa_family;
+ }
err:
- return ret;
+ return ret;
}
-int32_t
-fill_inet6_inet_identifiers (rpc_transport_t *this, struct sockaddr_storage *addr,
- int32_t addr_len, char *identifier)
+static int32_t
+fill_inet6_inet_identifiers(rpc_transport_t *this,
+ struct sockaddr_storage *addr, int32_t addr_len,
+ char *identifier)
{
- union gf_sock_union sock_union;
-
- char service[NI_MAXSERV] = {0,};
- char host[NI_MAXHOST] = {0,};
- int32_t ret = 0;
- int32_t tmpaddr_len = 0;
- int32_t one_to_four = 0;
- int32_t four_to_eight = 0;
- int32_t twelve_to_sixteen = 0;
- int16_t eight_to_ten = 0;
- int16_t ten_to_twelve = 0;
-
- memset (&sock_union, 0, sizeof (sock_union));
- sock_union.storage = *addr;
- tmpaddr_len = addr_len;
-
- if (sock_union.sa.sa_family == AF_INET6) {
- one_to_four = sock_union.sin6.sin6_addr.s6_addr32[0];
- four_to_eight = sock_union.sin6.sin6_addr.s6_addr32[1];
+ union gf_sock_union sock_union;
+
+ char service[NI_MAXSERV] = {
+ 0,
+ };
+ char host[NI_MAXHOST] = {
+ 0,
+ };
+ int32_t ret = 0;
+ int32_t tmpaddr_len = 0;
+ int32_t one_to_four = 0;
+ int32_t four_to_eight = 0;
+ int32_t twelve_to_sixteen = 0;
+ int16_t eight_to_ten = 0;
+ int16_t ten_to_twelve = 0;
+
+ memset(&sock_union, 0, sizeof(sock_union));
+ sock_union.storage = *addr;
+ tmpaddr_len = addr_len;
+
+ if (sock_union.sa.sa_family == AF_INET6) {
+ one_to_four = sock_union.sin6.sin6_addr.s6_addr32[0];
+ four_to_eight = sock_union.sin6.sin6_addr.s6_addr32[1];
#ifdef GF_SOLARIS_HOST_OS
- eight_to_ten = S6_ADDR16(sock_union.sin6.sin6_addr)[4];
+ eight_to_ten = S6_ADDR16(sock_union.sin6.sin6_addr)[4];
#else
- eight_to_ten = sock_union.sin6.sin6_addr.s6_addr16[4];
+ eight_to_ten = sock_union.sin6.sin6_addr.s6_addr16[4];
#endif
#ifdef GF_SOLARIS_HOST_OS
- ten_to_twelve = S6_ADDR16(sock_union.sin6.sin6_addr)[5];
+ ten_to_twelve = S6_ADDR16(sock_union.sin6.sin6_addr)[5];
#else
- ten_to_twelve = sock_union.sin6.sin6_addr.s6_addr16[5];
+ ten_to_twelve = sock_union.sin6.sin6_addr.s6_addr16[5];
#endif
- twelve_to_sixteen = sock_union.sin6.sin6_addr.s6_addr32[3];
-
- /* ipv4 mapped ipv6 address has
- bits 0-80: 0
- bits 80-96: 0xffff
- bits 96-128: ipv4 address
- */
-
- if (one_to_four == 0 &&
- four_to_eight == 0 &&
- eight_to_ten == 0 &&
- ten_to_twelve == -1) {
- struct sockaddr_in *in_ptr = &sock_union.sin;
- memset (&sock_union, 0, sizeof (sock_union));
-
- in_ptr->sin_family = AF_INET;
- in_ptr->sin_port = ((struct sockaddr_in6 *)addr)->sin6_port;
- in_ptr->sin_addr.s_addr = twelve_to_sixteen;
- tmpaddr_len = sizeof (*in_ptr);
- }
- }
+ twelve_to_sixteen = sock_union.sin6.sin6_addr.s6_addr32[3];
+
+ /* ipv4 mapped ipv6 address has
+ bits 0-80: 0
+ bits 80-96: 0xffff
+ bits 96-128: ipv4 address
+ */
- ret = getnameinfo (&sock_union.sa,
- tmpaddr_len,
- host, sizeof (host),
- service, sizeof (service),
- NI_NUMERICHOST | NI_NUMERICSERV);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "getnameinfo failed (%s)", gai_strerror (ret));
+ if (one_to_four == 0 && four_to_eight == 0 && eight_to_ten == 0 &&
+ ten_to_twelve == -1) {
+ struct sockaddr_in *in_ptr = &sock_union.sin;
+ memset(&sock_union, 0, sizeof(sock_union));
+
+ in_ptr->sin_family = AF_INET;
+ in_ptr->sin_port = ((struct sockaddr_in6 *)addr)->sin6_port;
+ in_ptr->sin_addr.s_addr = twelve_to_sixteen;
+ tmpaddr_len = sizeof(*in_ptr);
}
+ }
+
+ ret = getnameinfo(&sock_union.sa, tmpaddr_len, host, sizeof(host), service,
+ sizeof(service), NI_NUMERICHOST | NI_NUMERICSERV);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "getnameinfo failed (%s)",
+ gai_strerror(ret));
+ }
- sprintf (identifier, "%s:%s", host, service);
+ sprintf(identifier, "%s:%s", host, service);
- return ret;
+ return ret;
}
int32_t
-get_transport_identifiers (rpc_transport_t *this)
+get_transport_identifiers(rpc_transport_t *this)
{
- int32_t ret = 0;
- char is_inet_sdp = 0;
+ int32_t ret = 0;
+ char is_inet_sdp = 0;
- switch (((struct sockaddr *) &this->myinfo.sockaddr)->sa_family)
- {
+ switch (((struct sockaddr *)&this->myinfo.sockaddr)->sa_family) {
case AF_INET_SDP:
- is_inet_sdp = 1;
- ((struct sockaddr *) &this->peerinfo.sockaddr)->sa_family = ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = AF_INET;
-
+ is_inet_sdp = 1;
+ ((struct sockaddr *)&this->peerinfo.sockaddr)
+ ->sa_family = ((struct sockaddr *)&this->myinfo.sockaddr)
+ ->sa_family = AF_INET;
+ /* Fall through */
case AF_INET:
- case AF_INET6:
- {
- ret = fill_inet6_inet_identifiers (this,
- &this->myinfo.sockaddr,
- this->myinfo.sockaddr_len,
- this->myinfo.identifier);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot fill inet/inet6 identifier for server");
- goto err;
- }
-
- ret = fill_inet6_inet_identifiers (this,
- &this->peerinfo.sockaddr,
- this->peerinfo.sockaddr_len,
- this->peerinfo.identifier);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "cannot fill inet/inet6 identifier for client");
- goto err;
- }
+ case AF_INET6: {
+ ret = fill_inet6_inet_identifiers(this, &this->myinfo.sockaddr,
+ this->myinfo.sockaddr_len,
+ this->myinfo.identifier);
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "cannot fill inet/inet6 identifier for server");
+ goto err;
+ }
+
+ ret = fill_inet6_inet_identifiers(this, &this->peerinfo.sockaddr,
+ this->peerinfo.sockaddr_len,
+ this->peerinfo.identifier);
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "cannot fill inet/inet6 identifier for client");
+ goto err;
+ }
- if (is_inet_sdp) {
- ((struct sockaddr *) &this->peerinfo.sockaddr)->sa_family = ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family = AF_INET_SDP;
- }
- }
- break;
+ if (is_inet_sdp) {
+ ((struct sockaddr *)&this->peerinfo.sockaddr)
+ ->sa_family = ((struct sockaddr *)&this->myinfo.sockaddr)
+ ->sa_family = AF_INET_SDP;
+ }
+ } break;
- case AF_UNIX:
- {
- struct sockaddr_un *sunaddr = NULL;
+ case AF_UNIX: {
+ struct sockaddr_un *sunaddr = NULL;
- sunaddr = (struct sockaddr_un *) &this->myinfo.sockaddr;
- strcpy (this->myinfo.identifier, sunaddr->sun_path);
+ sunaddr = (struct sockaddr_un *)&this->myinfo.sockaddr;
+ strcpy(this->myinfo.identifier, sunaddr->sun_path);
- sunaddr = (struct sockaddr_un *) &this->peerinfo.sockaddr;
- strcpy (this->peerinfo.identifier, sunaddr->sun_path);
- }
- break;
+ sunaddr = (struct sockaddr_un *)&this->peerinfo.sockaddr;
+ strcpy(this->peerinfo.identifier, sunaddr->sun_path);
+ } break;
default:
- gf_log (this->name, GF_LOG_ERROR,
- "unknown address family (%d)",
- ((struct sockaddr *) &this->myinfo.sockaddr)->sa_family);
- ret = -1;
- break;
- }
+ gf_log(this->name, GF_LOG_ERROR, "unknown address family (%d)",
+ ((struct sockaddr *)&this->myinfo.sockaddr)->sa_family);
+ ret = -1;
+ break;
+ }
err:
- return ret;
+ return ret;
}
diff --git a/rpc/rpc-transport/socket/src/name.h b/rpc/rpc-transport/socket/src/name.h
index 0a13d8a9624..080c7588f5a 100644
--- a/rpc/rpc-transport/socket/src/name.h
+++ b/rpc/rpc-transport/socket/src/name.h
@@ -11,25 +11,23 @@
#ifndef _SOCKET_NAME_H
#define _SOCKET_NAME_H
-#include "compat.h"
+#include <glusterfs/compat.h>
int32_t
-client_bind (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- int sock);
+client_bind(rpc_transport_t *this, struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len, int sock);
int32_t
-socket_client_get_remote_sockaddr (rpc_transport_t *this,
- struct sockaddr *sockaddr,
- socklen_t *sockaddr_len,
- sa_family_t *sa_family);
+socket_client_get_remote_sockaddr(rpc_transport_t *this,
+ struct sockaddr *sockaddr,
+ socklen_t *sockaddr_len,
+ sa_family_t *sa_family);
int32_t
-socket_server_get_local_sockaddr (rpc_transport_t *this, struct sockaddr *addr,
- socklen_t *addr_len, sa_family_t *sa_family);
+socket_server_get_local_sockaddr(rpc_transport_t *this, struct sockaddr *addr,
+ socklen_t *addr_len, sa_family_t *sa_family);
int32_t
-get_transport_identifiers (rpc_transport_t *this);
+get_transport_identifiers(rpc_transport_t *this);
#endif /* _SOCKET_NAME_H */
diff --git a/rpc/rpc-transport/socket/src/socket-mem-types.h b/rpc/rpc-transport/socket/src/socket-mem-types.h
new file mode 100644
index 00000000000..241ce67f670
--- /dev/null
+++ b/rpc/rpc-transport/socket/src/socket-mem-types.h
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2008-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __SOCKET_MEM_TYPES_H__
+#define __SOCKET_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+typedef enum gf_sock_mem_types_ {
+ gf_sock_connect_error_state_t = gf_common_mt_end + 1,
+ gf_sock_mt_lock_array,
+ gf_sock_mt_end
+} gf_sock_mem_types_t;
+
+#endif
diff --git a/rpc/rpc-transport/socket/src/socket.c b/rpc/rpc-transport/socket/src/socket.c
index 0fee6cb4166..ed8b473be23 100644
--- a/rpc/rpc-transport/socket/src/socket.c
+++ b/rpc/rpc-transport/socket/src/socket.c
@@ -8,792 +8,1463 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "socket.h"
#include "name.h"
-#include "dict.h"
-#include "rpc-transport.h"
-#include "logging.h"
-#include "xlator.h"
-#include "byte-order.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-
+#include <glusterfs/dict.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/compat-errno.h>
+#include "socket-mem-types.h"
/* ugly #includes below */
#include "protocol-common.h"
#include "glusterfs3-xdr.h"
-#include "xdr-nfs3.h"
+#include "glusterfs4-xdr.h"
#include "rpcsvc.h"
-#include <fcntl.h>
-#include <errno.h>
+/* for TCP_USER_TIMEOUT */
+#if !defined(TCP_USER_TIMEOUT) && defined(GF_LINUX_HOST_OS)
+#include <linux/tcp.h>
+#else
#include <netinet/tcp.h>
+#endif
+
+#include <errno.h>
#include <rpc/xdr.h>
+#include <sys/ioctl.h>
#define GF_LOG_ERRNO(errno) ((errno == ENOTCONN) ? GF_LOG_DEBUG : GF_LOG_ERROR)
#define SA(ptr) ((struct sockaddr *)ptr)
+#define SSL_ENABLED_OPT "transport.socket.ssl-enabled"
+#define SSL_OWN_CERT_OPT "transport.socket.ssl-own-cert"
+#define SSL_PRIVATE_KEY_OPT "transport.socket.ssl-private-key"
+#define SSL_CA_LIST_OPT "transport.socket.ssl-ca-list"
+#define SSL_CERT_DEPTH_OPT "transport.socket.ssl-cert-depth"
+#define SSL_CIPHER_LIST_OPT "transport.socket.ssl-cipher-list"
+#define SSL_DH_PARAM_OPT "transport.socket.ssl-dh-param"
+#define SSL_EC_CURVE_OPT "transport.socket.ssl-ec-curve"
+#define SSL_CRL_PATH_OPT "transport.socket.ssl-crl-path"
+#define OWN_THREAD_OPT "transport.socket.own-thread"
+
+/* TBD: do automake substitutions etc. (ick) to set these. */
+#if !defined(DEFAULT_ETC_SSL)
+#ifdef GF_LINUX_HOST_OS
+#define DEFAULT_ETC_SSL "/etc/ssl"
+#endif
+#ifdef GF_BSD_HOST_OS
+#define DEFAULT_ETC_SSL "/etc/openssl"
+#endif
+#ifdef GF_DARWIN_HOST_OS
+#define DEFAULT_ETC_SSL "/usr/local/etc/openssl"
+#endif
+#if !defined(DEFAULT_ETC_SSL)
+#define DEFAULT_ETC_SSL "/etc/ssl"
+#endif
+#endif
-#define __socket_proto_reset_pending(priv) do { \
- memset (&priv->incoming.frag.vector, 0, \
- sizeof (priv->incoming.frag.vector)); \
- priv->incoming.frag.pending_vector = \
- &priv->incoming.frag.vector; \
- priv->incoming.frag.pending_vector->iov_base = \
- priv->incoming.frag.fragcurrent; \
- priv->incoming.pending_vector = \
- priv->incoming.frag.pending_vector; \
- } while (0);
-
-
-#define __socket_proto_update_pending(priv) \
- do { \
- uint32_t remaining_fragsize = 0; \
- if (priv->incoming.frag.pending_vector->iov_len == 0) { \
- remaining_fragsize = RPC_FRAGSIZE (priv->incoming.fraghdr) \
- - priv->incoming.frag.bytes_read; \
- \
- priv->incoming.frag.pending_vector->iov_len = \
- remaining_fragsize > priv->incoming.frag.remaining_size \
- ? priv->incoming.frag.remaining_size : remaining_fragsize; \
- \
- priv->incoming.frag.remaining_size -= \
- priv->incoming.frag.pending_vector->iov_len; \
- } \
- } while (0);
-
-#define __socket_proto_update_priv_after_read(priv, ret, bytes_read) \
- { \
- priv->incoming.frag.fragcurrent += bytes_read; \
- priv->incoming.frag.bytes_read += bytes_read; \
- \
- if ((ret > 0) || (priv->incoming.frag.remaining_size != 0)) { \
- if (priv->incoming.frag.remaining_size != 0 && ret == 0) { \
- __socket_proto_reset_pending (priv); \
- } \
- \
- gf_log (this->name, GF_LOG_TRACE, "partial read on non-blocking socket"); \
- \
- break; \
- } \
- }
-
-#define __socket_proto_init_pending(priv, size) \
- do { \
- uint32_t remaining_fragsize = 0; \
- remaining_fragsize = RPC_FRAGSIZE (priv->incoming.fraghdr) \
- - priv->incoming.frag.bytes_read; \
- \
- __socket_proto_reset_pending (priv); \
- \
- priv->incoming.frag.pending_vector->iov_len = \
- remaining_fragsize > size ? size : remaining_fragsize; \
- \
- priv->incoming.frag.remaining_size = \
- size - priv->incoming.frag.pending_vector->iov_len; \
- \
- } while (0);
-
+#if !defined(DEFAULT_CERT_PATH)
+#define DEFAULT_CERT_PATH DEFAULT_ETC_SSL "/glusterfs.pem"
+#endif
+#if !defined(DEFAULT_KEY_PATH)
+#define DEFAULT_KEY_PATH DEFAULT_ETC_SSL "/glusterfs.key"
+#endif
+#if !defined(DEFAULT_CA_PATH)
+#define DEFAULT_CA_PATH DEFAULT_ETC_SSL "/glusterfs.ca"
+#endif
+#if !defined(DEFAULT_VERIFY_DEPTH)
+#define DEFAULT_VERIFY_DEPTH 1
+#endif
+#define DEFAULT_CIPHER_LIST "EECDH:EDH:HIGH:!3DES:!RC4:!DES:!MD5:!aNULL:!eNULL"
+#define DEFAULT_DH_PARAM DEFAULT_ETC_SSL "/dhparam.pem"
+#define DEFAULT_EC_CURVE "prime256v1"
+
+#define POLL_MASK_INPUT (POLLIN | POLLPRI)
+#define POLL_MASK_OUTPUT (POLLOUT)
+#define POLL_MASK_ERROR (POLLERR | POLLHUP | POLLNVAL)
+
+typedef int
+SSL_unary_func(SSL *);
+typedef int
+SSL_trinary_func(SSL *, void *, int);
+static int
+ssl_setup_connection_params(rpc_transport_t *this);
+
+#define __socket_proto_reset_pending(priv) \
+ do { \
+ struct gf_sock_incoming_frag *frag; \
+ frag = &priv->incoming.frag; \
+ \
+ memset(&frag->vector, 0, sizeof(frag->vector)); \
+ frag->pending_vector = &frag->vector; \
+ frag->pending_vector->iov_base = frag->fragcurrent; \
+ priv->incoming.pending_vector = frag->pending_vector; \
+ } while (0)
+
+#define __socket_proto_update_pending(priv) \
+ do { \
+ uint32_t remaining; \
+ struct gf_sock_incoming_frag *frag; \
+ frag = &priv->incoming.frag; \
+ if (frag->pending_vector->iov_len == 0) { \
+ remaining = (RPC_FRAGSIZE(priv->incoming.fraghdr) - \
+ frag->bytes_read); \
+ \
+ frag->pending_vector->iov_len = (remaining > frag->remaining_size) \
+ ? frag->remaining_size \
+ : remaining; \
+ \
+ frag->remaining_size -= frag->pending_vector->iov_len; \
+ } \
+ } while (0)
+
+#define __socket_proto_update_priv_after_read(priv, ret, bytes_read) \
+ { \
+ struct gf_sock_incoming_frag *frag; \
+ frag = &priv->incoming.frag; \
+ \
+ frag->fragcurrent += bytes_read; \
+ frag->bytes_read += bytes_read; \
+ \
+ if ((ret > 0) || (frag->remaining_size != 0)) { \
+ if (frag->remaining_size != 0 && ret == 0) { \
+ __socket_proto_reset_pending(priv); \
+ } \
+ \
+ gf_log(this->name, GF_LOG_TRACE, \
+ "partial read on non-blocking socket"); \
+ ret = 0; \
+ break; \
+ } \
+ }
+
+#define __socket_proto_init_pending(priv, size) \
+ do { \
+ uint32_t remaining = 0; \
+ struct gf_sock_incoming_frag *frag; \
+ frag = &priv->incoming.frag; \
+ \
+ remaining = (RPC_FRAGSIZE(priv->incoming.fraghdr) - frag->bytes_read); \
+ \
+ __socket_proto_reset_pending(priv); \
+ \
+ frag->pending_vector->iov_len = (remaining > size) ? size : remaining; \
+ \
+ frag->remaining_size = (size - frag->pending_vector->iov_len); \
+ \
+ } while (0)
/* This will be used in a switch case and breaks from the switch case if all
* the pending data is not read.
*/
-#define __socket_proto_read(priv, ret) \
- { \
- size_t bytes_read = 0; \
- \
- __socket_proto_update_pending (priv); \
- \
- ret = __socket_readv (this, \
- priv->incoming.pending_vector, 1, \
- &priv->incoming.pending_vector, \
- &priv->incoming.pending_count, \
- &bytes_read); \
- if (ret == -1) { \
- gf_log (this->name, GF_LOG_WARNING, \
- "reading from socket failed. Error (%s), " \
- "peer (%s)", strerror (errno), \
- this->peerinfo.identifier); \
- break; \
- } \
- __socket_proto_update_priv_after_read (priv, ret, bytes_read); \
- }
-
-
-int socket_init (rpc_transport_t *this);
+#define __socket_proto_read(priv, ret) \
+ { \
+ size_t bytes_read = 0; \
+ struct gf_sock_incoming *in; \
+ in = &priv->incoming; \
+ \
+ __socket_proto_update_pending(priv); \
+ \
+ ret = __socket_readv(this, in->pending_vector, 1, &in->pending_vector, \
+ &in->pending_count, &bytes_read); \
+ if (ret < 0) \
+ break; \
+ __socket_proto_update_priv_after_read(priv, ret, bytes_read); \
+ }
+
+struct socket_connect_error_state_ {
+ xlator_t *this;
+ rpc_transport_t *trans;
+ gf_boolean_t refd;
+};
+typedef struct socket_connect_error_state_ socket_connect_error_state_t;
-/*
- * return value:
- * 0 = success (completed)
- * -1 = error
- * > 0 = incomplete
- */
+static int
+socket_init(rpc_transport_t *this);
+static int
+__socket_nonblock(int fd);
-int
-__socket_rwv (rpc_transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count, size_t *bytes,
- int write)
+static void
+socket_dump_info(struct sockaddr *sa, int is_server, int is_ssl, int sock,
+ char *log_domain, char *log_label)
{
- socket_private_t *priv = NULL;
- int sock = -1;
- int ret = -1;
- struct iovec *opvector = NULL;
- int opcount = 0;
- int moved = 0;
+ char addr_buf[INET6_ADDRSTRLEN + 1] = {
+ 0,
+ };
+ char *addr = NULL;
+ const char *peer_type = NULL;
+ int af = sa->sa_family;
+ int so_error = -1;
+ socklen_t slen = sizeof(so_error);
+
+ if (af == AF_UNIX) {
+ addr = ((struct sockaddr_un *)(sa))->sun_path;
+ } else {
+ if (af == AF_INET6) {
+ struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)(sa);
+
+ inet_ntop(af, &sin6->sin6_addr, addr_buf, sizeof(addr_buf));
+ addr = addr_buf;
+ } else {
+ struct sockaddr_in *sin = (struct sockaddr_in *)(sa);
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ inet_ntop(af, &sin->sin_addr, addr_buf, sizeof(addr_buf));
+ addr = addr_buf;
+ }
+ }
+ if (is_server)
+ peer_type = "server";
+ else
+ peer_type = "client";
+
+ (void)getsockopt(sock, SOL_SOCKET, SO_ERROR, &so_error, &slen);
+
+ gf_log(log_domain, GF_LOG_TRACE,
+ "$$$ %s: %s (af:%d,sock:%d) %s %s (errno:%d:%s)", peer_type,
+ log_label, af, sock, addr, (is_ssl ? "SSL" : "non-SSL"), so_error,
+ strerror(so_error));
+}
- priv = this->private;
- sock = priv->sock;
+static void
+ssl_dump_error_stack(const char *caller)
+{
+ unsigned long errnum = 0;
+ char errbuf[120] = {
+ 0,
+ };
- opvector = vector;
- opcount = count;
+ /* OpenSSL docs explicitly give 120 as the error-string length. */
- if (bytes != NULL) {
- *bytes = 0;
+ while ((errnum = ERR_get_error())) {
+ ERR_error_string(errnum, errbuf);
+ gf_log(caller, GF_LOG_ERROR, " %s", errbuf);
+ }
+}
+
+static int
+ssl_do(rpc_transport_t *this, void *buf, size_t len, SSL_trinary_func *func)
+{
+ int r = (-1);
+ socket_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (buf) {
+ if (priv->connected == -1) {
+ /*
+ * Fields in the SSL structure (especially
+ * the BIO pointers) are not valid at this
+ * point, so we'll segfault if we pass them
+ * to SSL_read/SSL_write.
+ */
+ gf_log(this->name, GF_LOG_INFO, "lost connection in %s", __func__);
+ return -1;
}
+ r = func(priv->ssl_ssl, buf, len);
+ } else {
+ /* This should be treated as error */
+ gf_log(this->name, GF_LOG_ERROR, "buffer is empty %s", __func__);
+ goto out;
+ }
+ switch (SSL_get_error(priv->ssl_ssl, r)) {
+ case SSL_ERROR_NONE:
+ /* fall through */
+ case SSL_ERROR_WANT_READ:
+ /* fall through */
+ case SSL_ERROR_WANT_WRITE:
+ errno = EAGAIN;
+ return r;
+
+ case SSL_ERROR_SYSCALL:
+ /* Sometimes SSL_ERROR_SYSCALL returns errno as
+ * EAGAIN. In such a case we should reattempt operation
+ * So, for now, just return the return value and the
+ * errno as is.
+ */
+ gf_log(this->name, GF_LOG_DEBUG,
+ "syscall error (probably remote disconnect) "
+ "errno:%d:%s",
+ errno, strerror(errno));
+ return r;
+ default:
+ errno = EIO;
+ goto out; /* "break" would just loop again */
+ }
+out:
+ return -1;
+}
- while (opcount) {
- if (write) {
- ret = writev (sock, opvector, opcount);
+#define ssl_read_one(t, b, l) \
+ ssl_do((t), (b), (l), (SSL_trinary_func *)SSL_read)
+#define ssl_write_one(t, b, l) \
+ ssl_do((t), (b), (l), (SSL_trinary_func *)SSL_write)
+
+/* set crl verify flags only for server */
+/* see man X509_VERIFY_PARAM_SET_FLAGS(3)
+ * X509_V_FLAG_CRL_CHECK enables CRL checking for the certificate chain
+ * leaf certificate. An error occurs if a suitable CRL cannot be found.
+ * Since we're never going to revoke a gluster node cert, we better disable
+ * CRL check for server certs to avoid getting error and failed connection
+ * attempts.
+ */
+static void
+ssl_clear_crl_verify_flags(SSL_CTX *ssl_ctx)
+{
+#ifdef X509_V_FLAG_CRL_CHECK_ALL
+#ifdef HAVE_SSL_CTX_GET0_PARAM
+ X509_VERIFY_PARAM *vpm;
+
+ vpm = SSL_CTX_get0_param(ssl_ctx);
+ if (vpm) {
+ X509_VERIFY_PARAM_clear_flags(
+ vpm, (X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL));
+ }
+#else
+ /* CRL verify flag need not be cleared for rhel6 kind of clients */
+#endif
+#else
+ gf_log(this->name, GF_LOG_ERROR, "OpenSSL version does not support CRL");
+#endif
+ return;
+}
- if (ret == 0 || (ret == -1 && errno == EAGAIN)) {
- /* done for now */
- break;
- }
- this->total_bytes_write += ret;
- } else {
- ret = readv (sock, opvector, opcount);
- if (ret == -1 && errno == EAGAIN) {
- /* done for now */
- break;
- }
- this->total_bytes_read += ret;
- }
+/* set crl verify flags only for server */
+static void
+ssl_set_crl_verify_flags(SSL_CTX *ssl_ctx)
+{
+#ifdef X509_V_FLAG_CRL_CHECK_ALL
+#ifdef HAVE_SSL_CTX_GET0_PARAM
+ X509_VERIFY_PARAM *vpm;
+
+ vpm = SSL_CTX_get0_param(ssl_ctx);
+ if (vpm) {
+ unsigned long flags;
+
+ flags = X509_VERIFY_PARAM_get_flags(vpm);
+ flags |= (X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL);
+ X509_VERIFY_PARAM_set_flags(vpm, flags);
+ }
+#else
+ X509_STORE *x509store;
- if (ret == 0) {
- /* Mostly due to 'umount' in client */
+ x509store = SSL_CTX_get_cert_store(ssl_ctx);
+ X509_STORE_set_flags(x509store,
+ X509_V_FLAG_CRL_CHECK | X509_V_FLAG_CRL_CHECK_ALL);
+#endif
+#else
+ gf_log(this->name, GF_LOG_ERROR, "OpenSSL version does not support CRL");
+#endif
+}
- gf_log (this->name, GF_LOG_DEBUG,
- "EOF from peer %s", this->peerinfo.identifier);
- opcount = -1;
- errno = ENOTCONN;
- break;
- }
- if (ret == -1) {
- if (errno == EINTR)
- continue;
-
- gf_log (this->name, GF_LOG_WARNING,
- "%s failed (%s)", write ? "writev" : "readv",
- strerror (errno));
- opcount = -1;
- break;
- }
+static int
+ssl_setup_connection_prefix(rpc_transport_t *this, gf_boolean_t server)
+{
+ int ret = -1;
+ socket_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (ssl_setup_connection_params(this) < 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "+ ssl_setup_connection_params() failed!");
+ goto done;
+ } else {
+ gf_log(this->name, GF_LOG_TRACE,
+ "+ ssl_setup_connection_params() done!");
+ }
+
+ priv->ssl_error_required = SSL_ERROR_NONE;
+ priv->ssl_connected = _gf_false;
+ priv->ssl_accepted = _gf_false;
+ priv->ssl_context_created = _gf_false;
+
+ if (!server && priv->crl_path)
+ ssl_clear_crl_verify_flags(priv->ssl_ctx);
+
+ priv->ssl_ssl = SSL_new(priv->ssl_ctx);
+ if (!priv->ssl_ssl) {
+ gf_log(this->name, GF_LOG_ERROR, "SSL_new failed");
+ ssl_dump_error_stack(this->name);
+ goto done;
+ }
+
+ priv->ssl_sbio = BIO_new_socket(priv->sock, BIO_NOCLOSE);
+ if (!priv->ssl_sbio) {
+ gf_log(this->name, GF_LOG_ERROR, "BIO_new_socket failed");
+ ssl_dump_error_stack(this->name);
+ goto free_ssl;
+ }
+
+ SSL_set_bio(priv->ssl_ssl, priv->ssl_sbio, priv->ssl_sbio);
+ ret = 0;
+ goto done;
+
+free_ssl:
+ SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
+done:
+ return ret;
+}
- if (bytes != NULL) {
- *bytes += ret;
- }
+static char *
+ssl_setup_connection_postfix(rpc_transport_t *this)
+{
+ X509 *peer = NULL;
+ char peer_CN[256] = "";
+ socket_private_t *priv = NULL;
+
+ priv = this->private;
+
+ /* Make sure _SSL verification_ succeeded, yielding an identity. */
+ if (SSL_get_verify_result(priv->ssl_ssl) != X509_V_OK) {
+ goto ssl_error;
+ }
+ peer = SSL_get_peer_certificate(priv->ssl_ssl);
+ if (!peer) {
+ goto ssl_error;
+ }
+
+ SSL_set_mode(priv->ssl_ssl, SSL_MODE_ENABLE_PARTIAL_WRITE);
+
+ /* Finally, everything seems OK. */
+ X509_NAME_get_text_by_NID(X509_get_subject_name(peer), NID_commonName,
+ peer_CN, sizeof(peer_CN) - 1);
+ peer_CN[sizeof(peer_CN) - 1] = '\0';
+ gf_log(this->name, GF_LOG_DEBUG, "peer CN = %s", peer_CN);
+ gf_log(this->name, GF_LOG_DEBUG,
+ "SSL verification succeeded (client: %s) (server: %s)",
+ this->peerinfo.identifier, this->myinfo.identifier);
+ X509_free(peer);
+ return gf_strdup(peer_CN);
+
+ /* Error paths. */
+ssl_error:
+ gf_log(this->name, GF_LOG_ERROR,
+ "SSL connect error (client: %s) (server: %s)",
+ this->peerinfo.identifier, this->myinfo.identifier);
+ ssl_dump_error_stack(this->name);
+
+ SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
+ return NULL;
+}
- moved = 0;
-
- while (moved < ret) {
- if ((ret - moved) >= opvector[0].iov_len) {
- moved += opvector[0].iov_len;
- opvector++;
- opcount--;
- } else {
- opvector[0].iov_len -= (ret - moved);
- opvector[0].iov_base += (ret - moved);
- moved += (ret - moved);
- }
- while (opcount && !opvector[0].iov_len) {
- opvector++;
- opcount--;
- }
+static int
+ssl_complete_connection(rpc_transport_t *this)
+{
+ int ret = -1; /* 1 : implies go back to epoll_wait()
+ * 0 : implies successful ssl connection
+ * -1: implies continue processing current event
+ * as if EPOLLERR has been encountered
+ */
+ char *cname = NULL;
+ int r = -1;
+ int ssl_error = -1;
+ socket_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->is_server) {
+ r = SSL_accept(priv->ssl_ssl);
+ } else {
+ r = SSL_connect(priv->ssl_ssl);
+ }
+
+ ssl_error = SSL_get_error(priv->ssl_ssl, r);
+ priv->ssl_error_required = ssl_error;
+
+ switch (ssl_error) {
+ case SSL_ERROR_NONE:
+ cname = ssl_setup_connection_postfix(this);
+ if (!cname) {
+ /* we've failed to get the cname so
+ * we must close the connection
+ *
+ * treat this as EPOLLERR
+ */
+ gf_log(this->name, GF_LOG_TRACE, "error getting cname");
+ errno = ECONNRESET;
+ ret = -1;
+ } else {
+ this->ssl_name = cname;
+ if (priv->is_server) {
+ priv->ssl_accepted = _gf_true;
+ gf_log(this->name, GF_LOG_TRACE, "ssl_accepted!");
+ } else {
+ priv->ssl_connected = _gf_true;
+ gf_log(this->name, GF_LOG_TRACE, "ssl_connected!");
}
- }
-
- if (pending_vector)
- *pending_vector = opvector;
-
- if (pending_count)
- *pending_count = opcount;
+ ret = 0;
+ }
+ break;
+
+ case SSL_ERROR_WANT_READ:
+ /* fall through */
+ case SSL_ERROR_WANT_WRITE:
+ errno = EAGAIN;
+ break;
+
+ case SSL_ERROR_SYSCALL:
+ /* Sometimes SSL_ERROR_SYSCALL returns with errno as EAGAIN
+ * So, we should retry the operation.
+ * So, for now, we just return the return value and errno as is.
+ */
+ break;
+
+ case SSL_ERROR_SSL:
+ /* treat this as EPOLLERR */
+ ret = -1;
+ break;
+
+ default:
+ /* treat this as EPOLLERR */
+ errno = EIO;
+ ret = -1;
+ break;
+ }
+ return ret;
+}
-out:
- return opcount;
+static void
+ssl_teardown_connection(socket_private_t *priv)
+{
+ if (priv->ssl_ssl) {
+ SSL_shutdown(priv->ssl_ssl);
+ SSL_clear(priv->ssl_ssl);
+ SSL_free(priv->ssl_ssl);
+ SSL_CTX_free(priv->ssl_ctx);
+ priv->ssl_ssl = NULL;
+ priv->ssl_ctx = NULL;
+ if (priv->ssl_private_key) {
+ GF_FREE(priv->ssl_private_key);
+ priv->ssl_private_key = NULL;
+ }
+ if (priv->ssl_own_cert) {
+ GF_FREE(priv->ssl_own_cert);
+ priv->ssl_own_cert = NULL;
+ }
+ if (priv->ssl_ca_list) {
+ GF_FREE(priv->ssl_ca_list);
+ priv->ssl_ca_list = NULL;
+ }
+ }
+ priv->use_ssl = _gf_false;
}
+static ssize_t
+__socket_ssl_readv(rpc_transport_t *this, struct iovec *opvector, int opcount)
+{
+ socket_private_t *priv = NULL;
+ int sock = -1;
+ int ret = -1;
+
+ priv = this->private;
+ sock = priv->sock;
+
+ if (priv->use_ssl) {
+ gf_log(this->name, GF_LOG_TRACE, "***** reading over SSL");
+ ret = ssl_read_one(this, opvector->iov_base, opvector->iov_len);
+ } else {
+ gf_log(this->name, GF_LOG_TRACE, "***** reading over non-SSL");
+ ret = sys_readv(sock, opvector, IOV_MIN(opcount));
+ }
+
+ return ret;
+}
-int
-__socket_readv (rpc_transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count,
- size_t *bytes)
+static ssize_t
+__socket_ssl_read(rpc_transport_t *this, void *buf, size_t count)
{
- int ret = -1;
+ struct iovec iov = {
+ 0,
+ };
+ int ret = -1;
- ret = __socket_rwv (this, vector, count,
- pending_vector, pending_count, bytes, 0);
+ iov.iov_base = buf;
+ iov.iov_len = count;
- return ret;
+ ret = __socket_ssl_readv(this, &iov, 1);
+
+ return ret;
}
+static int
+__socket_cached_read(rpc_transport_t *this, struct iovec *opvector, int opcount)
+{
+ socket_private_t *priv = NULL;
+ struct gf_sock_incoming *in = NULL;
+ int req_len = -1;
+ int ret = -1;
+
+ priv = this->private;
+ in = &priv->incoming;
+ req_len = iov_length(opvector, opcount);
+
+ if (in->record_state == SP_STATE_READING_FRAGHDR) {
+ in->ra_read = 0;
+ in->ra_served = 0;
+ in->ra_max = 0;
+ in->ra_buf = NULL;
+ goto uncached;
+ }
+
+ if (!in->ra_max) {
+ /* first call after passing SP_STATE_READING_FRAGHDR */
+ in->ra_max = min(RPC_FRAGSIZE(in->fraghdr), GF_SOCKET_RA_MAX);
+ /* Note that the in->iobuf is the primary iobuf into which
+ headers are read into, and in->frag.fragcurrent points to
+ some position in the buffer. By using this itself as our
+ read-ahead cache, we can avoid memory copies in iov_load
+ */
+ in->ra_buf = in->frag.fragcurrent;
+ }
+
+ /* fill read-ahead */
+ if (in->ra_read < in->ra_max) {
+ ret = __socket_ssl_read(this, &in->ra_buf[in->ra_read],
+ (in->ra_max - in->ra_read));
+ if (ret > 0)
+ in->ra_read += ret;
+
+ /* we proceed to test if there is still cached data to
+ be served even if readahead could not progress */
+ }
+
+ /* serve cached */
+ if (in->ra_served < in->ra_read) {
+ ret = iov_load(opvector, opcount, &in->ra_buf[in->ra_served],
+ min(req_len, (in->ra_read - in->ra_served)));
+
+ in->ra_served += ret;
+ /* Do not read uncached and cached in the same call */
+ goto out;
+ }
+
+ if (in->ra_read < in->ra_max)
+ /* If there was no cached data to be served, (and we are
+ guaranteed to have already performed an attempt to progress
+ readahead above), and we have not yet read out the full
+ readahead capacity, then bail out for now without doing
+ the uncached read below (as that will overtake future cached
+ read)
+ */
+ goto out;
+uncached:
+ ret = __socket_ssl_readv(this, opvector, opcount);
+out:
+ return ret;
+}
-int
-__socket_writev (rpc_transport_t *this, struct iovec *vector, int count,
- struct iovec **pending_vector, int *pending_count)
+static gf_boolean_t
+__does_socket_rwv_error_need_logging(socket_private_t *priv, int write)
{
- int ret = -1;
+ int read = !write;
- ret = __socket_rwv (this, vector, count,
- pending_vector, pending_count, NULL, 1);
+ if (priv->connected == -1) /* Didn't even connect, of course it fails */
+ return _gf_false;
- return ret;
+ if (read && (priv->read_fail_log == _gf_false))
+ return _gf_false;
+
+ return _gf_true;
}
+/*
+ * return value:
+ * 0 = success (completed)
+ * -1 = error
+ * > 0 = incomplete
+ */
-int
-__socket_disconnect (rpc_transport_t *this)
+static int
+__socket_rwv(rpc_transport_t *this, struct iovec *vector, int count,
+ struct iovec **pending_vector, int *pending_count, size_t *bytes,
+ int write)
{
- socket_private_t *priv = NULL;
- int ret = -1;
+ socket_private_t *priv = NULL;
+ int sock = -1;
+ int ret = -1;
+ struct iovec *opvector = NULL;
+ int opcount = 0;
+ int moved = 0;
+
+ GF_VALIDATE_OR_GOTO("socket", this->private, out);
+
+ priv = this->private;
+ sock = priv->sock;
+
+ opvector = vector;
+ opcount = count;
+
+ if (bytes != NULL) {
+ *bytes = 0;
+ }
+
+ while (opcount > 0) {
+ if (opvector->iov_len == 0) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "would have passed zero length to read/write");
+ ++opvector;
+ --opcount;
+ continue;
+ }
+ if (priv->use_ssl && !priv->ssl_ssl) {
+ /*
+ * We could end up here with priv->ssl_ssl still NULL
+ * if (a) the connection failed and (b) some fool
+ * called other socket functions anyway. Demoting to
+ * non-SSL might be insecure, so just fail it outright.
+ */
+ ret = -1;
+ gf_log(this->name, GF_LOG_TRACE,
+ "### no priv->ssl_ssl yet; ret = -1;");
+ } else if (write) {
+ if (priv->use_ssl) {
+ ret = ssl_write_one(this, opvector->iov_base,
+ opvector->iov_len);
+ } else {
+ ret = sys_writev(sock, opvector, IOV_MIN(opcount));
+ }
+
+ if ((ret == 0) || ((ret < 0) && (errno == EAGAIN))) {
+ /* done for now */
+ break;
+ } else if (ret > 0)
+ this->total_bytes_write += ret;
+ } else {
+ ret = __socket_cached_read(this, opvector, opcount);
+ if (ret == 0) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "EOF on socket %d (errno:%d:%s); returning ENODATA",
+ sock, errno, strerror(errno));
+
+ errno = ENODATA;
+ ret = -1;
+ }
+ if ((ret < 0) && (errno == EAGAIN)) {
+ /* done for now */
+ break;
+ } else if (ret > 0)
+ this->total_bytes_read += ret;
+ }
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ if (ret == 0) {
+ /* Mostly due to 'umount' in client */
- priv = this->private;
+ gf_log(this->name, GF_LOG_DEBUG, "EOF from peer %s",
+ this->peerinfo.identifier);
+ opcount = -1;
+ errno = ENOTCONN;
+ break;
+ }
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+
+ if (__does_socket_rwv_error_need_logging(priv, write)) {
+ GF_LOG_OCCASIONALLY(priv->log_ctr, this->name, GF_LOG_WARNING,
+ "%s on %s failed (%s)",
+ write ? "writev" : "readv",
+ this->peerinfo.identifier, strerror(errno));
+ }
+
+ if (priv->use_ssl && priv->ssl_ssl) {
+ ssl_dump_error_stack(this->name);
+ }
+ opcount = -1;
+ break;
+ }
- if (priv->sock != -1) {
- priv->connected = -1;
- ret = shutdown (priv->sock, SHUT_RDWR);
- if (ret) {
- /* its already disconnected.. no need to understand
- why it failed to shutdown in normal cases */
- gf_log (this->name, GF_LOG_DEBUG,
- "shutdown() returned %d. %s",
- ret, strerror (errno));
- }
+ if (bytes != NULL) {
+ *bytes += ret;
+ }
+
+ moved = 0;
+
+ while (moved < ret) {
+ if (!opcount) {
+ gf_log(this->name, GF_LOG_DEBUG, "ran out of iov, moved %d/%d",
+ moved, ret);
+ goto ran_out;
+ }
+ if (!opvector[0].iov_len) {
+ opvector++;
+ opcount--;
+ continue;
+ }
+ if ((ret - moved) >= opvector[0].iov_len) {
+ moved += opvector[0].iov_len;
+ opvector++;
+ opcount--;
+ } else {
+ opvector[0].iov_len -= (ret - moved);
+ opvector[0].iov_base += (ret - moved);
+ moved += (ret - moved);
+ }
}
+ }
+
+ran_out:
+
+ if (pending_vector)
+ *pending_vector = opvector;
+
+ if (pending_count)
+ *pending_count = opcount;
out:
- return ret;
+ return opcount;
}
+static int
+__socket_readv(rpc_transport_t *this, struct iovec *vector, int count,
+ struct iovec **pending_vector, int *pending_count, size_t *bytes)
+{
+ return __socket_rwv(this, vector, count, pending_vector, pending_count,
+ bytes, 0);
+}
-int
-__socket_server_bind (rpc_transport_t *this)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
- int opt = 1;
- int reuse_check_sock = -1;
- struct sockaddr_storage unix_addr = {0};
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
-
- ret = setsockopt (priv->sock, SOL_SOCKET, SO_REUSEADDR,
- &opt, sizeof (opt));
-
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "setsockopt() for SO_REUSEADDR failed (%s)",
- strerror (errno));
- }
-
- /* reuse-address doesn't work for unix type sockets */
- if (AF_UNIX == SA (&this->myinfo.sockaddr)->sa_family) {
- memcpy (&unix_addr, SA (&this->myinfo.sockaddr),
- this->myinfo.sockaddr_len);
- reuse_check_sock = socket (AF_UNIX, SOCK_STREAM, 0);
- if (reuse_check_sock >= 0) {
- ret = connect (reuse_check_sock, SA (&unix_addr),
- this->myinfo.sockaddr_len);
- if ((ret == -1) && (ECONNREFUSED == errno)) {
- unlink (((struct sockaddr_un*)&unix_addr)->sun_path);
- }
- close (reuse_check_sock);
- }
+static int
+__socket_writev(rpc_transport_t *this, struct iovec *vector, int count,
+ struct iovec **pending_vector, int *pending_count)
+{
+ return __socket_rwv(this, vector, count, pending_vector, pending_count,
+ NULL, 1);
+}
+
+static int
+__socket_shutdown(rpc_transport_t *this)
+{
+ int ret = -1;
+ socket_private_t *priv = this->private;
+
+ priv->connected = -1;
+ ret = shutdown(priv->sock, SHUT_RDWR);
+ if (ret) {
+ /* its already disconnected.. no need to understand
+ why it failed to shutdown in normal cases */
+ gf_log(this->name, GF_LOG_DEBUG, "shutdown() returned %d. %s", ret,
+ strerror(errno));
+ } else {
+ GF_LOG_OCCASIONALLY(priv->shutdown_log_ctr, this->name, GF_LOG_INFO,
+ "intentional socket shutdown(%d)", priv->sock);
+ }
+
+ return ret;
+}
+
+static int
+__socket_teardown_connection(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->use_ssl)
+ ssl_teardown_connection(priv);
+
+ return __socket_shutdown(this);
+}
+
+static int
+__socket_disconnect(rpc_transport_t *this)
+{
+ int ret = -1;
+ socket_private_t *priv = NULL;
+
+ priv = this->private;
+
+ gf_log(this->name, GF_LOG_TRACE, "disconnecting %p, sock=%d", this,
+ priv->sock);
+
+ if (priv->sock >= 0) {
+ gf_log_callingfn(this->name, GF_LOG_TRACE,
+ "tearing down socket connection");
+ ret = __socket_teardown_connection(this);
+ if (ret) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "__socket_teardown_connection () failed: %s",
+ strerror(errno));
}
+ }
- ret = bind (priv->sock, (struct sockaddr *)&this->myinfo.sockaddr,
- this->myinfo.sockaddr_len);
+ return ret;
+}
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "binding to %s failed: %s",
- this->myinfo.identifier, strerror (errno));
+static int
+__socket_server_bind(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ struct sockaddr_storage unix_addr = {0};
+ int ret = -1;
+ int opt = 1;
+ int reuse_check_sock = -1;
+ uint16_t sin_port = 0;
+ int retries = 0;
+
+ priv = this->private;
+ ctx = this->ctx;
+ cmd_args = &ctx->cmd_args;
+
+ ret = setsockopt(priv->sock, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setsockopt() for SO_REUSEADDR failed (%s)", strerror(errno));
+ }
+
+ /* reuse-address doesn't work for unix type sockets */
+ if (AF_UNIX == SA(&this->myinfo.sockaddr)->sa_family) {
+ memcpy(&unix_addr, SA(&this->myinfo.sockaddr),
+ this->myinfo.sockaddr_len);
+ reuse_check_sock = sys_socket(AF_UNIX, SOCK_STREAM, 0);
+ if (reuse_check_sock >= 0) {
+ ret = connect(reuse_check_sock, SA(&unix_addr),
+ this->myinfo.sockaddr_len);
+ if ((ret != 0) && (ECONNREFUSED == errno)) {
+ sys_unlink(((struct sockaddr_un *)&unix_addr)->sun_path);
+ }
+ gf_log(this->name, GF_LOG_INFO,
+ "closing (AF_UNIX) reuse check socket %d", reuse_check_sock);
+ sys_close(reuse_check_sock);
+ }
+ }
+
+ if (AF_UNIX != SA(&this->myinfo.sockaddr)->sa_family) {
+ sin_port = (int)ntohs(
+ ((struct sockaddr_in *)&this->myinfo.sockaddr)->sin_port);
+ if (!sin_port) {
+ sin_port = GF_DEFAULT_SOCKET_LISTEN_PORT;
+ ((struct sockaddr_in *)&this->myinfo.sockaddr)->sin_port = htons(
+ sin_port);
+ }
+ retries = 10;
+ while (retries) {
+ ret = bind(priv->sock, (struct sockaddr *)&this->myinfo.sockaddr,
+ this->myinfo.sockaddr_len);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "binding to %s failed: %s",
+ this->myinfo.identifier, strerror(errno));
if (errno == EADDRINUSE) {
- gf_log (this->name, GF_LOG_ERROR,
- "Port is already in use");
+ gf_log(this->name, GF_LOG_ERROR, "Port is already in use");
+ sleep(1);
+ retries--;
+ } else {
+ break;
}
+ } else {
+ break;
+ }
+ }
+ } else {
+ ret = bind(priv->sock, (struct sockaddr *)&this->myinfo.sockaddr,
+ this->myinfo.sockaddr_len);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "binding to %s failed: %s",
+ this->myinfo.identifier, strerror(errno));
+ if (errno == EADDRINUSE) {
+ gf_log(this->name, GF_LOG_ERROR, "Port is already in use");
+ }
+ }
+ }
+ if (AF_UNIX != SA(&this->myinfo.sockaddr)->sa_family) {
+ if (getsockname(priv->sock, SA(&this->myinfo.sockaddr),
+ &this->myinfo.sockaddr_len) != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "getsockname on (%d) failed (%s)", priv->sock,
+ strerror(errno));
+ ret = -1;
+ goto out;
}
+ if (!cmd_args->brick_port) {
+ cmd_args->brick_port = (int)ntohs(
+ ((struct sockaddr_in *)&this->myinfo.sockaddr)->sin_port);
+ gf_log(this->name, GF_LOG_INFO,
+ "process started listening on port (%d)",
+ cmd_args->brick_port);
+ }
+ }
out:
- return ret;
+ return ret;
}
-
-int
-__socket_nonblock (int fd)
+static int
+__socket_nonblock(int fd)
{
- int flags = 0;
- int ret = -1;
+ int flags = 0;
+ int ret = -1;
- flags = fcntl (fd, F_GETFL);
+ flags = fcntl(fd, F_GETFL);
- if (flags != -1)
- ret = fcntl (fd, F_SETFL, flags | O_NONBLOCK);
+ if (flags >= 0)
+ ret = fcntl(fd, F_SETFL, flags | O_NONBLOCK);
- return ret;
+ return ret;
}
-
-int
-__socket_nodelay (int fd)
+static int
+__socket_nodelay(int fd)
{
- int on = 1;
- int ret = -1;
+ int on = 1;
+ int ret = -1;
- ret = setsockopt (fd, IPPROTO_TCP, TCP_NODELAY,
- &on, sizeof (on));
- if (!ret)
- gf_log (THIS->name, GF_LOG_TRACE,
- "NODELAY enabled for socket %d", fd);
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, &on, sizeof(on));
+ if (!ret)
+ gf_log(THIS->name, GF_LOG_TRACE, "NODELAY enabled for socket %d", fd);
- return ret;
+ return ret;
}
-
static int
-__socket_keepalive (int fd, int keepalive_intvl, int keepalive_idle)
+__socket_keepalive(int fd, int family, int keepaliveintvl, int keepaliveidle,
+ int keepalivecnt, int timeout)
{
- int on = 1;
- int ret = -1;
+ int on = 1;
+ int ret = -1;
+#if defined(TCP_USER_TIMEOUT)
+ int timeout_ms = timeout * 1000;
+#endif
- ret = setsockopt (fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof (on));
- if (ret == -1) {
- gf_log ("socket", GF_LOG_WARNING,
- "failed to set keep alive option on socket %d", fd);
- goto err;
- }
+ ret = setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &on, sizeof(on));
+ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set keep alive option on socket %d", fd);
+ goto err;
+ }
- if (keepalive_intvl == GF_USE_DEFAULT_KEEPALIVE)
- goto done;
+ if (keepaliveintvl == GF_USE_DEFAULT_KEEPALIVE)
+ goto done;
#if !defined(GF_LINUX_HOST_OS) && !defined(__NetBSD__)
-#ifdef GF_SOLARIS_HOST_OS
- ret = setsockopt (fd, SOL_SOCKET, SO_KEEPALIVE, &keepalive_intvl,
- sizeof (keepalive_intvl));
+#if defined(GF_SOLARIS_HOST_OS) || defined(__FreeBSD__)
+ ret = setsockopt(fd, SOL_SOCKET, SO_KEEPALIVE, &keepaliveintvl,
+ sizeof(keepaliveintvl));
#else
- ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPALIVE, &keepalive_intvl,
- sizeof (keepalive_intvl));
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPALIVE, &keepaliveintvl,
+ sizeof(keepaliveintvl));
#endif
- if (ret == -1) {
- gf_log ("socket", GF_LOG_WARNING,
- "failed to set keep alive interval on socket %d", fd);
- goto err;
- }
+ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set keep alive interval on socket %d", fd);
+ goto err;
+ }
#else
- ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPIDLE, &keepalive_idle,
- sizeof (keepalive_intvl));
- if (ret == -1) {
- gf_log ("socket", GF_LOG_WARNING,
- "failed to set keep idle on socket %d", fd);
- goto err;
- }
- ret = setsockopt (fd, IPPROTO_TCP, TCP_KEEPINTVL, &keepalive_intvl,
- sizeof (keepalive_intvl));
- if (ret == -1) {
- gf_log ("socket", GF_LOG_WARNING,
- "failed to set keep alive interval on socket %d", fd);
- goto err;
- }
+ if (family != AF_INET && family != AF_INET6)
+ goto done;
+
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPIDLE, &keepaliveidle,
+ sizeof(keepaliveidle));
+ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set keep idle %d on socket %d, %s", keepaliveidle, fd,
+ strerror(errno));
+ goto err;
+ }
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPINTVL, &keepaliveintvl,
+ sizeof(keepaliveintvl));
+ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set keep interval %d on socket %d, %s",
+ keepaliveintvl, fd, strerror(errno));
+ goto err;
+ }
+
+#if defined(TCP_USER_TIMEOUT)
+ if (timeout_ms < 0)
+ goto done;
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_USER_TIMEOUT, &timeout_ms,
+ sizeof(timeout_ms));
+ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set "
+ "TCP_USER_TIMEOUT %d on socket %d, %s",
+ timeout_ms, fd, strerror(errno));
+ goto err;
+ }
+#endif
+#if defined(TCP_KEEPCNT)
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_KEEPCNT, &keepalivecnt,
+ sizeof(keepalivecnt));
+ if (ret != 0) {
+ gf_log("socket", GF_LOG_WARNING,
+ "failed to set "
+ "TCP_KEEPCNT %d on socket %d, %s",
+ keepalivecnt, fd, strerror(errno));
+ goto err;
+ }
+#endif
#endif
done:
- gf_log (THIS->name, GF_LOG_TRACE, "Keep-alive enabled for socket %d, interval "
- "%d, idle: %d", fd, keepalive_intvl, keepalive_idle);
+ gf_log(THIS->name, GF_LOG_TRACE,
+ "Keep-alive enabled for socket: %d, "
+ "(idle: %d, interval: %d, max-probes: %d, timeout: %d)",
+ fd, keepaliveidle, keepaliveintvl, keepalivecnt, timeout);
err:
- return ret;
+ return ret;
}
-
-int
-__socket_connect_finish (int fd)
+static int
+__socket_connect_finish(int fd)
{
- int ret = -1;
- int optval = 0;
- socklen_t optlen = sizeof (int);
+ int ret = -1;
+ int optval = 0;
+ socklen_t optlen = sizeof(int);
- ret = getsockopt (fd, SOL_SOCKET, SO_ERROR, (void *)&optval, &optlen);
+ ret = getsockopt(fd, SOL_SOCKET, SO_ERROR, (void *)&optval, &optlen);
- if (ret == 0 && optval) {
- errno = optval;
- ret = -1;
- }
+ if (ret == 0 && optval) {
+ errno = optval;
+ ret = -1;
+ }
- return ret;
+ return ret;
}
-
-void
-__socket_reset (rpc_transport_t *this)
+static void
+__socket_reset(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
-
- /* TODO: use mem-pool on incoming data */
-
- if (priv->incoming.iobref) {
- iobref_unref (priv->incoming.iobref);
- priv->incoming.iobref = NULL;
- }
+ socket_private_t *priv = NULL;
- if (priv->incoming.iobuf) {
- iobuf_unref (priv->incoming.iobuf);
- }
-
- if (priv->incoming.request_info != NULL) {
- GF_FREE (priv->incoming.request_info);
- }
-
- memset (&priv->incoming, 0, sizeof (priv->incoming));
+ priv = this->private;
- event_unregister (this->ctx->event_pool, priv->sock, priv->idx);
+ /* TODO: use mem-pool on incoming data */
- close (priv->sock);
- priv->sock = -1;
- priv->idx = -1;
- priv->connected = -1;
+ if (priv->incoming.iobref) {
+ iobref_unref(priv->incoming.iobref);
+ priv->incoming.iobref = NULL;
+ }
-out:
- return;
+ if (priv->incoming.iobuf) {
+ iobuf_unref(priv->incoming.iobuf);
+ priv->incoming.iobuf = NULL;
+ }
+
+ GF_FREE(priv->incoming.request_info);
+
+ memset(&priv->incoming, 0, sizeof(priv->incoming));
+
+ gf_event_unregister_close(this->ctx->event_pool, priv->sock, priv->idx);
+ if (priv->use_ssl && priv->ssl_ssl) {
+ SSL_clear(priv->ssl_ssl);
+ SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
+ }
+ if (priv->ssl_ctx) {
+ SSL_CTX_free(priv->ssl_ctx);
+ priv->ssl_ctx = NULL;
+ }
+ priv->sock = -1;
+ priv->idx = -1;
+ priv->connected = -1;
+ priv->ssl_connected = _gf_false;
+ priv->ssl_accepted = _gf_false;
+ priv->ssl_context_created = _gf_false;
+
+ if (priv->ssl_private_key) {
+ GF_FREE(priv->ssl_private_key);
+ priv->ssl_private_key = NULL;
+ }
+ if (priv->ssl_own_cert) {
+ GF_FREE(priv->ssl_own_cert);
+ priv->ssl_own_cert = NULL;
+ }
+ if (priv->ssl_ca_list) {
+ GF_FREE(priv->ssl_ca_list);
+ priv->ssl_ca_list = NULL;
+ }
}
-
-void
-socket_set_lastfrag (uint32_t *fragsize) {
- (*fragsize) |= 0x80000000U;
+static void
+socket_set_lastfrag(uint32_t *fragsize)
+{
+ (*fragsize) |= 0x80000000U;
}
-
-void
-socket_set_frag_header_size (uint32_t size, char *haddr)
+static void
+socket_set_frag_header_size(uint32_t size, char *haddr)
{
- size = htonl (size);
- memcpy (haddr, &size, sizeof (size));
+ size = htonl(size);
+ memcpy(haddr, &size, sizeof(size));
}
-
-void
-socket_set_last_frag_header_size (uint32_t size, char *haddr)
+static void
+socket_set_last_frag_header_size(uint32_t size, char *haddr)
{
- socket_set_lastfrag (&size);
- socket_set_frag_header_size (size, haddr);
+ socket_set_lastfrag(&size);
+ socket_set_frag_header_size(size, haddr);
}
-struct ioq *
-__socket_ioq_new (rpc_transport_t *this, rpc_transport_msg_t *msg)
+static struct ioq *
+__socket_ioq_new(rpc_transport_t *this, rpc_transport_msg_t *msg)
{
- struct ioq *entry = NULL;
- int count = 0;
- uint32_t size = 0;
+ struct ioq *entry = NULL;
+ int count = 0;
+ uint32_t size = 0;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
+ /* TODO: use mem-pool */
+ entry = GF_CALLOC(1, sizeof(*entry), gf_common_mt_ioq);
+ if (!entry)
+ return NULL;
- /* TODO: use mem-pool */
- entry = GF_CALLOC (1, sizeof (*entry), gf_common_mt_ioq);
- if (!entry)
- return NULL;
+ count = msg->rpchdrcount + msg->proghdrcount + msg->progpayloadcount;
- count = msg->rpchdrcount + msg->proghdrcount + msg->progpayloadcount;
+ GF_ASSERT(count <= (MAX_IOVEC - 1));
- GF_ASSERT (count <= (MAX_IOVEC - 1));
+ size = iov_length(msg->rpchdr, msg->rpchdrcount) +
+ iov_length(msg->proghdr, msg->proghdrcount) +
+ iov_length(msg->progpayload, msg->progpayloadcount);
- size = iov_length (msg->rpchdr, msg->rpchdrcount)
- + iov_length (msg->proghdr, msg->proghdrcount)
- + iov_length (msg->progpayload, msg->progpayloadcount);
+ if (size > RPC_MAX_FRAGMENT_SIZE) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "msg size (%u) bigger than the maximum allowed size on "
+ "sockets (%u)",
+ size, RPC_MAX_FRAGMENT_SIZE);
+ GF_FREE(entry);
+ return NULL;
+ }
- if (size > RPC_MAX_FRAGMENT_SIZE) {
- gf_log (this->name, GF_LOG_ERROR,
- "msg size (%u) bigger than the maximum allowed size on "
- "sockets (%u)", size, RPC_MAX_FRAGMENT_SIZE);
- GF_FREE (entry);
- return NULL;
- }
+ socket_set_last_frag_header_size(size, (char *)&entry->fraghdr);
- socket_set_last_frag_header_size (size, (char *)&entry->fraghdr);
+ entry->vector[0].iov_base = (char *)&entry->fraghdr;
+ entry->vector[0].iov_len = sizeof(entry->fraghdr);
+ entry->count = 1;
- entry->vector[0].iov_base = (char *)&entry->fraghdr;
- entry->vector[0].iov_len = sizeof (entry->fraghdr);
- entry->count = 1;
+ if (msg->rpchdr != NULL) {
+ memcpy(&entry->vector[1], msg->rpchdr,
+ sizeof(struct iovec) * msg->rpchdrcount);
+ entry->count += msg->rpchdrcount;
+ }
- if (msg->rpchdr != NULL) {
- memcpy (&entry->vector[1], msg->rpchdr,
- sizeof (struct iovec) * msg->rpchdrcount);
- entry->count += msg->rpchdrcount;
- }
+ if (msg->proghdr != NULL) {
+ memcpy(&entry->vector[entry->count], msg->proghdr,
+ sizeof(struct iovec) * msg->proghdrcount);
+ entry->count += msg->proghdrcount;
+ }
- if (msg->proghdr != NULL) {
- memcpy (&entry->vector[entry->count], msg->proghdr,
- sizeof (struct iovec) * msg->proghdrcount);
- entry->count += msg->proghdrcount;
- }
+ if (msg->progpayload != NULL) {
+ memcpy(&entry->vector[entry->count], msg->progpayload,
+ sizeof(struct iovec) * msg->progpayloadcount);
+ entry->count += msg->progpayloadcount;
+ }
- if (msg->progpayload != NULL) {
- memcpy (&entry->vector[entry->count], msg->progpayload,
- sizeof (struct iovec) * msg->progpayloadcount);
- entry->count += msg->progpayloadcount;
- }
-
- entry->pending_vector = entry->vector;
- entry->pending_count = entry->count;
+ entry->pending_vector = entry->vector;
+ entry->pending_count = entry->count;
- if (msg->iobref != NULL)
- entry->iobref = iobref_ref (msg->iobref);
+ if (msg->iobref != NULL)
+ entry->iobref = iobref_ref(msg->iobref);
- INIT_LIST_HEAD (&entry->list);
+ INIT_LIST_HEAD(&entry->list);
-out:
- return entry;
+ return entry;
}
-
-void
-__socket_ioq_entry_free (struct ioq *entry)
+static void
+__socket_ioq_entry_free(struct ioq *entry)
{
- GF_VALIDATE_OR_GOTO ("socket", entry, out);
+ GF_VALIDATE_OR_GOTO("socket", entry, out);
- list_del_init (&entry->list);
- if (entry->iobref)
- iobref_unref (entry->iobref);
+ list_del_init(&entry->list);
+ if (entry->iobref)
+ iobref_unref(entry->iobref);
- /* TODO: use mem-pool */
- GF_FREE (entry);
+ /* TODO: use mem-pool */
+ GF_FREE(entry);
out:
- return;
+ return;
}
-
-void
-__socket_ioq_flush (rpc_transport_t *this)
+static void
+__socket_ioq_flush(socket_private_t *priv)
{
- socket_private_t *priv = NULL;
- struct ioq *entry = NULL;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
+ struct ioq *entry = NULL;
- while (!list_empty (&priv->ioq)) {
- entry = priv->ioq_next;
- __socket_ioq_entry_free (entry);
- }
-
-out:
- return;
+ while (!list_empty(&priv->ioq)) {
+ entry = priv->ioq_next;
+ __socket_ioq_entry_free(entry);
+ }
}
-
-int
-__socket_ioq_churn_entry (rpc_transport_t *this, struct ioq *entry)
+static int
+__socket_ioq_churn_entry(rpc_transport_t *this, struct ioq *entry)
{
- int ret = -1;
+ int ret = -1;
- ret = __socket_writev (this, entry->pending_vector,
- entry->pending_count,
- &entry->pending_vector,
- &entry->pending_count);
+ ret = __socket_writev(this, entry->pending_vector, entry->pending_count,
+ &entry->pending_vector, &entry->pending_count);
- if (ret == 0) {
- /* current entry was completely written */
- GF_ASSERT (entry->pending_count == 0);
- __socket_ioq_entry_free (entry);
- }
+ if (ret == 0) {
+ /* current entry was completely written */
+ GF_ASSERT(entry->pending_count == 0);
+ __socket_ioq_entry_free(entry);
+ }
- return ret;
+ return ret;
}
-
-int
-__socket_ioq_churn (rpc_transport_t *this)
+static int
+__socket_ioq_churn(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
- int ret = 0;
- struct ioq *entry = NULL;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ socket_private_t *priv = NULL;
+ int ret = 0;
+ struct ioq *entry = NULL;
- priv = this->private;
+ priv = this->private;
- while (!list_empty (&priv->ioq)) {
- /* pick next entry */
- entry = priv->ioq_next;
+ while (!list_empty(&priv->ioq)) {
+ /* pick next entry */
+ entry = priv->ioq_next;
- ret = __socket_ioq_churn_entry (this, entry);
+ ret = __socket_ioq_churn_entry(this, entry);
- if (ret != 0)
- break;
- }
+ if (ret != 0)
+ break;
+ }
- if (list_empty (&priv->ioq)) {
- /* all pending writes done, not interested in POLLOUT */
- priv->idx = event_select_on (this->ctx->event_pool,
- priv->sock, priv->idx, -1, 0);
- }
+ if (list_empty(&priv->ioq)) {
+ /* all pending writes done, not interested in POLLOUT */
+ priv->idx = gf_event_select_on(this->ctx->event_pool, priv->sock,
+ priv->idx, -1, 0);
+ }
-out:
- return ret;
+ return ret;
}
-
-int
-socket_event_poll_err (rpc_transport_t *this)
+static gf_boolean_t
+socket_event_poll_err(rpc_transport_t *this, int gen, int idx)
{
- socket_private_t *priv = NULL;
- int ret = -1;
+ socket_private_t *priv = NULL;
+ gf_boolean_t socket_closed = _gf_false;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ priv = this->private;
- priv = this->private;
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ if ((priv->gen == gen) && (priv->idx == idx) && (priv->sock >= 0)) {
+ __socket_ioq_flush(priv);
+ __socket_reset(this);
+ socket_closed = _gf_true;
+ }
+ }
+ pthread_mutex_unlock(&priv->out_lock);
- pthread_mutex_lock (&priv->lock);
+ if (socket_closed) {
+ pthread_mutex_lock(&priv->notify.lock);
{
- __socket_ioq_flush (this);
- __socket_reset (this);
+ while (priv->notify.in_progress)
+ pthread_cond_wait(&priv->notify.cond, &priv->notify.lock);
}
- pthread_mutex_unlock (&priv->lock);
+ pthread_mutex_unlock(&priv->notify.lock);
- rpc_transport_notify (this, RPC_TRANSPORT_DISCONNECT, this);
+ rpc_transport_notify(this, RPC_TRANSPORT_DISCONNECT, this);
+ }
-out:
- return ret;
+ return socket_closed;
}
-
-int
-socket_event_poll_out (rpc_transport_t *this)
+static int
+socket_event_poll_out(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
-
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->connected == 1) {
- ret = __socket_ioq_churn (this);
-
- if (ret == -1) {
- __socket_disconnect (this);
- }
- }
+ socket_private_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ if (priv->connected == 1) {
+ ret = __socket_ioq_churn(this);
+
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "__socket_ioq_churn returned -1; "
+ "disconnecting socket");
+ __socket_disconnect(this);
+ }
}
- pthread_mutex_unlock (&priv->lock);
+ }
+ pthread_mutex_unlock(&priv->out_lock);
- ret = rpc_transport_notify (this, RPC_TRANSPORT_MSG_SENT, NULL);
+ if (ret == 0)
+ rpc_transport_notify(this, RPC_TRANSPORT_MSG_SENT, NULL);
-out:
- return ret;
-}
+ if (ret > 0)
+ ret = 0;
+ return ret;
+}
-inline int
-__socket_read_simple_msg (rpc_transport_t *this)
+static int
+__socket_read_simple_msg(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
- int ret = 0;
- uint32_t remaining_size = 0;
- size_t bytes_read = 0;
+ int ret = 0;
+ uint32_t remaining_size = 0;
+ size_t bytes_read = 0;
+ socket_private_t *priv = NULL;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", this->private, out);
- priv = this->private;
+ priv = this->private;
- switch (priv->incoming.frag.simple_state) {
+ in = &priv->incoming;
+ frag = &in->frag;
+ switch (frag->simple_state) {
case SP_STATE_SIMPLE_MSG_INIT:
- remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
- - priv->incoming.frag.bytes_read;
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
- __socket_proto_init_pending (priv, remaining_size);
+ __socket_proto_init_pending(priv, remaining_size);
- priv->incoming.frag.simple_state =
- SP_STATE_READING_SIMPLE_MSG;
+ frag->simple_state = SP_STATE_READING_SIMPLE_MSG;
- /* fall through */
+ /* fall through */
case SP_STATE_READING_SIMPLE_MSG:
- ret = 0;
+ ret = 0;
- remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
- - priv->incoming.frag.bytes_read;
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
- if (remaining_size > 0) {
- ret = __socket_readv (this,
- priv->incoming.pending_vector, 1,
- &priv->incoming.pending_vector,
- &priv->incoming.pending_count,
- &bytes_read);
- }
+ if (remaining_size > 0) {
+ ret = __socket_readv(this, in->pending_vector, 1,
+ &in->pending_vector, &in->pending_count,
+ &bytes_read);
+ }
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "reading from socket failed. Error (%s), "
- "peer (%s)", strerror (errno),
- this->peerinfo.identifier);
- break;
- }
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "reading from socket failed. Error (%s), "
+ "peer (%s)",
+ strerror(errno), this->peerinfo.identifier);
+ break;
+ }
- priv->incoming.frag.bytes_read += bytes_read;
- priv->incoming.frag.fragcurrent += bytes_read;
+ frag->bytes_read += bytes_read;
+ frag->fragcurrent += bytes_read;
- if (ret > 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "partial read on non-blocking socket.");
- break;
- }
+ if (ret > 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "partial read on non-blocking socket.");
+ ret = 0;
+ break;
+ }
- if (ret == 0) {
- priv->incoming.frag.simple_state
- = SP_STATE_SIMPLE_MSG_INIT;
- }
- }
+ if (ret == 0) {
+ frag->simple_state = SP_STATE_SIMPLE_MSG_INIT;
+ }
+ }
out:
- return ret;
+ return ret;
}
-
-inline int
-__socket_read_simple_request (rpc_transport_t *this)
-{
- return __socket_read_simple_msg (this);
-}
-
-
#define rpc_cred_addr(buf) (buf + RPC_MSGTYPE_SIZE + RPC_CALL_BODY_SIZE - 4)
#define rpc_verf_addr(fragcurrent) (fragcurrent - 4)
@@ -804,2061 +1475,3327 @@ __socket_read_simple_request (rpc_transport_t *this)
#define rpc_progver_addr(buf) (buf + RPC_MSGTYPE_SIZE + 8)
#define rpc_procnum_addr(buf) (buf + RPC_MSGTYPE_SIZE + 12)
-inline int
-__socket_read_vectored_request (rpc_transport_t *this, rpcsvc_vector_sizer vector_sizer)
+static int
+__socket_read_vectored_request(rpc_transport_t *this,
+ rpcsvc_vector_sizer vector_sizer)
{
- socket_private_t *priv = NULL;
- int ret = 0;
- uint32_t credlen = 0, verflen = 0;
- char *addr = NULL;
- struct iobuf *iobuf = NULL;
- uint32_t remaining_size = 0;
- ssize_t readsize = 0;
- size_t size = 0;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
-
- switch (priv->incoming.frag.call_body.request.vector_state) {
+ socket_private_t *priv = NULL;
+ int ret = 0;
+ uint32_t credlen = 0, verflen = 0;
+ char *addr = NULL;
+ struct iobuf *iobuf = NULL;
+ uint32_t remaining_size = 0;
+ ssize_t readsize = 0;
+ size_t size = 0;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+ sp_rpcfrag_request_state_t *request = NULL;
+
+ priv = this->private;
+
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+ request = &frag->call_body.request;
+
+ switch (request->vector_state) {
case SP_STATE_VECTORED_REQUEST_INIT:
- priv->incoming.frag.call_body.request.vector_sizer_state = 0;
- addr = rpc_cred_addr (iobuf_ptr (priv->incoming.iobuf));
+ request->vector_sizer_state = 0;
- /* also read verf flavour and verflen */
- credlen = ntoh32 (*((uint32_t *)addr))
- + RPC_AUTH_FLAVOUR_N_LENGTH_SIZE;
+ addr = rpc_cred_addr(iobuf_ptr(in->iobuf));
- __socket_proto_init_pending (priv, credlen);
+ /* also read verf flavour and verflen */
+ credlen = ntoh32(*((uint32_t *)addr)) +
+ RPC_AUTH_FLAVOUR_N_LENGTH_SIZE;
- priv->incoming.frag.call_body.request.vector_state =
- SP_STATE_READING_CREDBYTES;
+ __socket_proto_init_pending(priv, credlen);
- /* fall through */
+ request->vector_state = SP_STATE_READING_CREDBYTES;
+
+ /* fall through */
case SP_STATE_READING_CREDBYTES:
- __socket_proto_read (priv, ret);
+ __socket_proto_read(priv, ret);
- priv->incoming.frag.call_body.request.vector_state =
- SP_STATE_READ_CREDBYTES;
+ request->vector_state = SP_STATE_READ_CREDBYTES;
- /* fall through */
+ /* fall through */
case SP_STATE_READ_CREDBYTES:
- addr = rpc_verf_addr (priv->incoming.frag.fragcurrent);
- verflen = ntoh32 (*((uint32_t *)addr));
+ addr = rpc_verf_addr(frag->fragcurrent);
+ verflen = ntoh32(*((uint32_t *)addr));
- if (verflen == 0) {
- priv->incoming.frag.call_body.request.vector_state
- = SP_STATE_READ_VERFBYTES;
- goto sp_state_read_verfbytes;
- }
- __socket_proto_init_pending (priv, verflen);
+ if (verflen == 0) {
+ request->vector_state = SP_STATE_READ_VERFBYTES;
+ goto sp_state_read_verfbytes;
+ }
+ __socket_proto_init_pending(priv, verflen);
- priv->incoming.frag.call_body.request.vector_state
- = SP_STATE_READING_VERFBYTES;
+ request->vector_state = SP_STATE_READING_VERFBYTES;
- /* fall through */
+ /* fall through */
case SP_STATE_READING_VERFBYTES:
- __socket_proto_read (priv, ret);
+ __socket_proto_read(priv, ret);
- priv->incoming.frag.call_body.request.vector_state =
- SP_STATE_READ_VERFBYTES;
+ request->vector_state = SP_STATE_READ_VERFBYTES;
- /* fall through */
+ /* fall through */
case SP_STATE_READ_VERFBYTES:
-sp_state_read_verfbytes:
- /* set the base_addr 'persistently' across multiple calls
- into the state machine */
- priv->incoming.proghdr_base_addr = priv->incoming.frag.fragcurrent;
-
- priv->incoming.frag.call_body.request.vector_sizer_state =
- vector_sizer (priv->incoming.frag.call_body.request.vector_sizer_state,
- &readsize, priv->incoming.proghdr_base_addr,
- priv->incoming.frag.fragcurrent);
- __socket_proto_init_pending (priv, readsize);
- priv->incoming.frag.call_body.request.vector_state
- = SP_STATE_READING_PROGHDR;
+ sp_state_read_verfbytes:
+ /* set the base_addr 'persistently' across multiple calls
+ into the state machine */
+ in->proghdr_base_addr = frag->fragcurrent;
- /* fall through */
+ request->vector_sizer_state = vector_sizer(
+ request->vector_sizer_state, &readsize, in->proghdr_base_addr,
+ frag->fragcurrent);
+ __socket_proto_init_pending(priv, readsize);
+
+ request->vector_state = SP_STATE_READING_PROGHDR;
+
+ /* fall through */
case SP_STATE_READING_PROGHDR:
- __socket_proto_read (priv, ret);
- priv->incoming.frag.call_body.request.vector_state =
- SP_STATE_READ_PROGHDR;
-
- /* fall through */
-
- case SP_STATE_READ_PROGHDR:
-sp_state_read_proghdr:
- priv->incoming.frag.call_body.request.vector_sizer_state =
- vector_sizer (priv->incoming.frag.call_body.request.vector_sizer_state,
- &readsize,
- priv->incoming.proghdr_base_addr,
- priv->incoming.frag.fragcurrent);
- if (readsize == 0) {
- priv->incoming.frag.call_body.request.vector_state =
- SP_STATE_READ_PROGHDR_XDATA;
- goto sp_state_read_proghdr_xdata;
- }
+ __socket_proto_read(priv, ret);
- __socket_proto_init_pending (priv, readsize);
+ request->vector_state = SP_STATE_READ_PROGHDR;
- priv->incoming.frag.call_body.request.vector_state =
- SP_STATE_READING_PROGHDR_XDATA;
+ /* fall through */
- /* fall through */
+ case SP_STATE_READ_PROGHDR:
+ sp_state_read_proghdr:
+ request->vector_sizer_state = vector_sizer(
+ request->vector_sizer_state, &readsize, in->proghdr_base_addr,
+ frag->fragcurrent);
+ if (readsize == 0) {
+ request->vector_state = SP_STATE_READ_PROGHDR_XDATA;
+ goto sp_state_read_proghdr_xdata;
+ }
- case SP_STATE_READING_PROGHDR_XDATA:
- __socket_proto_read (priv, ret);
+ __socket_proto_init_pending(priv, readsize);
- priv->incoming.frag.call_body.request.vector_state =
- SP_STATE_READ_PROGHDR;
- /* check if the vector_sizer() has more to say */
- goto sp_state_read_proghdr;
+ request->vector_state = SP_STATE_READING_PROGHDR_XDATA;
- case SP_STATE_READ_PROGHDR_XDATA:
-sp_state_read_proghdr_xdata:
- if (priv->incoming.payload_vector.iov_base == NULL) {
-
- size = RPC_FRAGSIZE (priv->incoming.fraghdr) -
- priv->incoming.frag.bytes_read;
- iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
- if (!iobuf) {
- ret = -1;
- break;
- }
+ /* fall through */
- if (priv->incoming.iobref == NULL) {
- priv->incoming.iobref = iobref_new ();
- if (priv->incoming.iobref == NULL) {
- ret = -1;
- iobuf_unref (iobuf);
- break;
- }
- }
+ case SP_STATE_READING_PROGHDR_XDATA:
+ __socket_proto_read(priv, ret);
- iobref_add (priv->incoming.iobref, iobuf);
- iobuf_unref (iobuf);
+ request->vector_state = SP_STATE_READ_PROGHDR;
+ /* check if the vector_sizer() has more to say */
+ goto sp_state_read_proghdr;
- priv->incoming.payload_vector.iov_base
- = iobuf_ptr (iobuf);
+ case SP_STATE_READ_PROGHDR_XDATA:
+ sp_state_read_proghdr_xdata:
+ if (in->payload_vector.iov_base == NULL) {
+ size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, size);
+ if (!iobuf) {
+ ret = -1;
+ break;
+ }
- priv->incoming.frag.fragcurrent = iobuf_ptr (iobuf);
+ if (in->iobref == NULL) {
+ in->iobref = iobref_new();
+ if (in->iobref == NULL) {
+ ret = -1;
+ iobuf_unref(iobuf);
+ break;
+ }
}
- priv->incoming.frag.call_body.request.vector_state =
- SP_STATE_READING_PROG;
+ iobref_add(in->iobref, iobuf);
- /* fall through */
+ in->payload_vector.iov_base = iobuf_ptr(iobuf);
+ frag->fragcurrent = iobuf_ptr(iobuf);
- case SP_STATE_READING_PROG:
- /* now read the remaining rpc msg into buffer pointed by
- * fragcurrent
- */
+ iobuf_unref(iobuf);
+ }
- ret = __socket_read_simple_msg (this);
-
- remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
- - priv->incoming.frag.bytes_read;
-
- if ((ret == -1)
- || ((ret == 0)
- && (remaining_size == 0)
- && RPC_LASTFRAG (priv->incoming.fraghdr))) {
- priv->incoming.frag.call_body.request.vector_state
- = SP_STATE_VECTORED_REQUEST_INIT;
- priv->incoming.payload_vector.iov_len
- = (unsigned long)priv->incoming.frag.fragcurrent
- - (unsigned long)
- priv->incoming.payload_vector.iov_base;
- }
- break;
- }
+ request->vector_state = SP_STATE_READING_PROG;
-out:
- return ret;
-}
+ /* fall through */
-inline int
-__socket_read_request (rpc_transport_t *this)
-{
- socket_private_t *priv = NULL;
- uint32_t prognum = 0, procnum = 0, progver = 0;
- uint32_t remaining_size = 0;
- int ret = -1;
- char *buf = NULL;
- rpcsvc_vector_sizer vector_sizer = NULL;
+ case SP_STATE_READING_PROG:
+ /* now read the remaining rpc msg into buffer pointed by
+ * fragcurrent
+ */
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ ret = __socket_read_simple_msg(this);
- priv = this->private;
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
- switch (priv->incoming.frag.call_body.request.header_state) {
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ RPC_LASTFRAG(in->fraghdr))) {
+ request->vector_state = SP_STATE_VECTORED_REQUEST_INIT;
+ in->payload_vector.iov_len = ((unsigned long)frag->fragcurrent -
+ (unsigned long)
+ in->payload_vector.iov_base);
+ }
+ break;
+ }
+ return ret;
+}
+
+static int
+__socket_read_request(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ uint32_t prognum = 0, procnum = 0, progver = 0;
+ uint32_t remaining_size = 0;
+ int ret = -1;
+ char *buf = NULL;
+ rpcsvc_vector_sizer vector_sizer = NULL;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+ sp_rpcfrag_request_state_t *request = NULL;
+
+ priv = this->private;
+
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+ request = &frag->call_body.request;
+
+ switch (request->header_state) {
case SP_STATE_REQUEST_HEADER_INIT:
- __socket_proto_init_pending (priv, RPC_CALL_BODY_SIZE);
+ __socket_proto_init_pending(priv, RPC_CALL_BODY_SIZE);
- priv->incoming.frag.call_body.request.header_state
- = SP_STATE_READING_RPCHDR1;
+ request->header_state = SP_STATE_READING_RPCHDR1;
- /* fall through */
+ /* fall through */
case SP_STATE_READING_RPCHDR1:
- __socket_proto_read (priv, ret);
+ __socket_proto_read(priv, ret);
- priv->incoming.frag.call_body.request.header_state =
- SP_STATE_READ_RPCHDR1;
+ request->header_state = SP_STATE_READ_RPCHDR1;
- /* fall through */
+ /* fall through */
case SP_STATE_READ_RPCHDR1:
- buf = rpc_prognum_addr (iobuf_ptr (priv->incoming.iobuf));
- prognum = ntoh32 (*((uint32_t *)buf));
+ buf = rpc_prognum_addr(iobuf_ptr(in->iobuf));
+ prognum = ntoh32(*((uint32_t *)buf));
- buf = rpc_progver_addr (iobuf_ptr (priv->incoming.iobuf));
- progver = ntoh32 (*((uint32_t *)buf));
+ buf = rpc_progver_addr(iobuf_ptr(in->iobuf));
+ progver = ntoh32(*((uint32_t *)buf));
- buf = rpc_procnum_addr (iobuf_ptr (priv->incoming.iobuf));
- procnum = ntoh32 (*((uint32_t *)buf));
+ buf = rpc_procnum_addr(iobuf_ptr(in->iobuf));
+ procnum = ntoh32(*((uint32_t *)buf));
- if (this->listener) {
- /* this check is needed as rpcsvc and rpc-clnt actor structures are
- * not same */
- vector_sizer = rpcsvc_get_program_vector_sizer ((rpcsvc_t *)this->mydata,
- prognum, progver, procnum);
- }
+ if (priv->is_server) {
+ /* this check is needed as rpcsvc and rpc-clnt
+ * actor structures are not same */
+ vector_sizer = rpcsvc_get_program_vector_sizer(
+ (rpcsvc_t *)this->mydata, prognum, progver, procnum);
+ }
- if (vector_sizer) {
- ret = __socket_read_vectored_request (this, vector_sizer);
- } else {
- ret = __socket_read_simple_request (this);
- }
+ if (vector_sizer) {
+ ret = __socket_read_vectored_request(this, vector_sizer);
+ } else {
+ ret = __socket_read_simple_msg(this);
+ }
- remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
- - priv->incoming.frag.bytes_read;
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
- if ((ret == -1)
- || ((ret == 0)
- && (remaining_size == 0)
- && (RPC_LASTFRAG (priv->incoming.fraghdr)))) {
- priv->incoming.frag.call_body.request.header_state =
- SP_STATE_REQUEST_HEADER_INIT;
- }
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ (RPC_LASTFRAG(in->fraghdr)))) {
+ request->header_state = SP_STATE_REQUEST_HEADER_INIT;
+ }
- break;
- }
+ break;
+ }
-out:
- return ret;
+ return ret;
}
-
-inline int
-__socket_read_accepted_successful_reply (rpc_transport_t *this)
+static int
+__socket_read_accepted_successful_reply(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
- int ret = 0;
- struct iobuf *iobuf = NULL;
- gfs3_read_rsp read_rsp = {0, };
- ssize_t size = 0;
- ssize_t default_read_size = 0;
- char *proghdr_buf = NULL;
- XDR xdr;
+ socket_private_t *priv = NULL;
+ int ret = 0;
+ struct iobuf *iobuf = NULL;
+ gfs3_read_rsp read_rsp = {
+ 0,
+ };
+ ssize_t size = 0;
+ ssize_t default_read_size = 0;
+ XDR xdr;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+ uint32_t remaining_size = 0;
+
+ priv = this->private;
+
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ switch (frag->call_body.reply.accepted_success_state) {
+ case SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT:
+ default_read_size = xdr_sizeof((xdrproc_t)xdr_gfs3_read_rsp,
+ &read_rsp);
+
+ /* We need to store the current base address because we will
+ * need it after a partial read. */
+ in->proghdr_base_addr = frag->fragcurrent;
+
+ __socket_proto_init_pending(priv, default_read_size);
+
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READING_PROC_HEADER;
+
+ /* fall through */
+
+ case SP_STATE_READING_PROC_HEADER:
+ __socket_proto_read(priv, ret);
+
+ /* there can be 'xdata' in read response, figure it out */
+ default_read_size = frag->fragcurrent - in->proghdr_base_addr;
+ xdrmem_create(&xdr, in->proghdr_base_addr, default_read_size,
+ XDR_DECODE);
+
+ /* This will fail if there is xdata sent from server, if not,
+ well and good, we don't need to worry about */
+ xdr_gfs3_read_rsp(&xdr, &read_rsp);
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ free(read_rsp.xdata.xdata_val);
- priv = this->private;
+ /* need to round off to proper gf_roof (%4), as XDR packing pads
+ the end of opaque object with '0' */
+ size = gf_roof(read_rsp.xdata.xdata_len, 4);
- switch (priv->incoming.frag.call_body.reply.accepted_success_state) {
+ if (!size) {
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READ_PROC_OPAQUE;
+ goto read_proc_opaque;
+ }
+ __socket_proto_init_pending(priv, size);
+
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READING_PROC_OPAQUE;
+ /* fall through */
+
+ case SP_STATE_READING_PROC_OPAQUE:
+ __socket_proto_read(priv, ret);
+
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READ_PROC_OPAQUE;
+ /* fall through */
+
+ case SP_STATE_READ_PROC_OPAQUE:
+ read_proc_opaque:
+ if (in->payload_vector.iov_base == NULL) {
+ size = (RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read);
+
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, size);
+ if (iobuf == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ if (in->iobref == NULL) {
+ in->iobref = iobref_new();
+ if (in->iobref == NULL) {
+ ret = -1;
+ iobuf_unref(iobuf);
+ goto out;
+ }
+ }
+
+ ret = iobref_add(in->iobref, iobuf);
+ iobuf_unref(iobuf);
+ if (ret < 0) {
+ goto out;
+ }
+
+ in->payload_vector.iov_base = iobuf_ptr(iobuf);
+ in->payload_vector.iov_len = size;
+ }
+
+ frag->fragcurrent = in->payload_vector.iov_base;
+
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READ_PROC_HEADER;
+
+ /* fall through */
+
+ case SP_STATE_READ_PROC_HEADER:
+ /* now read the entire remaining msg into new iobuf */
+ ret = __socket_read_simple_msg(this);
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ RPC_LASTFRAG(in->fraghdr))) {
+ frag->call_body.reply.accepted_success_state =
+ SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT;
+ }
+
+ break;
+ }
+
+out:
+ return ret;
+}
+
+static int
+__socket_read_accepted_successful_reply_v2(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ int ret = 0;
+ struct iobuf *iobuf = NULL;
+ gfx_read_rsp read_rsp = {
+ 0,
+ };
+ ssize_t size = 0;
+ ssize_t default_read_size = 0;
+ XDR xdr;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+ uint32_t remaining_size = 0;
+
+ priv = this->private;
+
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ switch (frag->call_body.reply.accepted_success_state) {
case SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT:
- default_read_size = xdr_sizeof ((xdrproc_t) xdr_gfs3_read_rsp,
- &read_rsp);
+ default_read_size = xdr_sizeof((xdrproc_t)xdr_gfx_read_rsp,
+ &read_rsp);
- proghdr_buf = priv->incoming.frag.fragcurrent;
+ /* We need to store the current base address because we will
+ * need it after a partial read. */
+ in->proghdr_base_addr = frag->fragcurrent;
- __socket_proto_init_pending (priv, default_read_size);
+ __socket_proto_init_pending(priv, default_read_size);
- priv->incoming.frag.call_body.reply.accepted_success_state
- = SP_STATE_READING_PROC_HEADER;
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READING_PROC_HEADER;
- /* fall through */
+ /* fall through */
case SP_STATE_READING_PROC_HEADER:
- __socket_proto_read (priv, ret);
+ __socket_proto_read(priv, ret);
- /* there can be 'xdata' in read response, figure it out */
- xdrmem_create (&xdr, proghdr_buf, default_read_size,
- XDR_DECODE);
+ /* there can be 'xdata' in read response, figure it out */
+ default_read_size = frag->fragcurrent - in->proghdr_base_addr;
- /* This will fail if there is xdata sent from server, if not,
- well and good, we don't need to worry about */
- xdr_gfs3_read_rsp (&xdr, &read_rsp);
+ xdrmem_create(&xdr, in->proghdr_base_addr, default_read_size,
+ XDR_DECODE);
- if (read_rsp.xdata.xdata_val)
- free (read_rsp.xdata.xdata_val);
+ /* This will fail if there is xdata sent from server, if not,
+ well and good, we don't need to worry about */
+ xdr_gfx_read_rsp(&xdr, &read_rsp);
- /* need to round off to proper roof (%4), as XDR packing pads
- the end of opaque object with '0' */
- size = roof (read_rsp.xdata.xdata_len, 4);
+ free(read_rsp.xdata.pairs.pairs_val);
- if (!size) {
- priv->incoming.frag.call_body.reply.accepted_success_state
- = SP_STATE_READ_PROC_OPAQUE;
- goto read_proc_opaque;
- }
+ /* need to round off to proper gf_roof (%4), as XDR packing pads
+ the end of opaque object with '0' */
+ size = gf_roof(read_rsp.xdata.xdr_size, 4);
+
+ if (!size) {
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READ_PROC_OPAQUE;
+ goto read_proc_opaque;
+ }
- __socket_proto_init_pending (priv, size);
+ __socket_proto_init_pending(priv, size);
- priv->incoming.frag.call_body.reply.accepted_success_state
- = SP_STATE_READING_PROC_OPAQUE;
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READING_PROC_OPAQUE;
+ /* fall through */
case SP_STATE_READING_PROC_OPAQUE:
- __socket_proto_read (priv, ret);
+ __socket_proto_read(priv, ret);
- priv->incoming.frag.call_body.reply.accepted_success_state
- = SP_STATE_READ_PROC_OPAQUE;
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READ_PROC_OPAQUE;
+ /* fall through */
case SP_STATE_READ_PROC_OPAQUE:
read_proc_opaque:
- if (priv->incoming.payload_vector.iov_base == NULL) {
-
- size = (RPC_FRAGSIZE (priv->incoming.fraghdr) -
- priv->incoming.frag.bytes_read);
-
- iobuf = iobuf_get2 (this->ctx->iobuf_pool, size);
- if (iobuf == NULL) {
- ret = -1;
- goto out;
- }
-
- if (priv->incoming.iobref == NULL) {
- priv->incoming.iobref = iobref_new ();
- if (priv->incoming.iobref == NULL) {
- ret = -1;
- iobuf_unref (iobuf);
- goto out;
- }
- }
+ if (in->payload_vector.iov_base == NULL) {
+ size = (RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read);
- iobref_add (priv->incoming.iobref, iobuf);
- iobuf_unref (iobuf);
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, size);
+ if (iobuf == NULL) {
+ ret = -1;
+ goto out;
+ }
- priv->incoming.payload_vector.iov_base
- = iobuf_ptr (iobuf);
+ if (in->iobref == NULL) {
+ in->iobref = iobref_new();
+ if (in->iobref == NULL) {
+ ret = -1;
+ iobuf_unref(iobuf);
+ goto out;
+ }
+ }
- priv->incoming.payload_vector.iov_len = size;
+ ret = iobref_add(in->iobref, iobuf);
+ iobuf_unref(iobuf);
+ if (ret < 0) {
+ goto out;
}
- priv->incoming.frag.fragcurrent
- = priv->incoming.payload_vector.iov_base;
+ in->payload_vector.iov_base = iobuf_ptr(iobuf);
+ in->payload_vector.iov_len = size;
+ }
- priv->incoming.frag.call_body.reply.accepted_success_state
- = SP_STATE_READ_PROC_HEADER;
+ frag->fragcurrent = in->payload_vector.iov_base;
- /* fall through */
+ frag->call_body.reply
+ .accepted_success_state = SP_STATE_READ_PROC_HEADER;
- case SP_STATE_READ_PROC_HEADER:
- /* now read the entire remaining msg into new iobuf */
- ret = __socket_read_simple_msg (this);
- if ((ret == -1)
- || ((ret == 0)
- && RPC_LASTFRAG (priv->incoming.fraghdr))) {
- priv->incoming.frag.call_body.reply.accepted_success_state
- = SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT;
- }
+ /* fall through */
- break;
- }
+ case SP_STATE_READ_PROC_HEADER:
+ /* now read the entire remaining msg into new iobuf */
+ ret = __socket_read_simple_msg(this);
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ RPC_LASTFRAG(in->fraghdr))) {
+ frag->call_body.reply.accepted_success_state =
+ SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT;
+ }
+
+ break;
+ }
out:
- return ret;
+ return ret;
}
#define rpc_reply_verflen_addr(fragcurrent) ((char *)fragcurrent - 4)
#define rpc_reply_accept_status_addr(fragcurrent) ((char *)fragcurrent - 4)
-inline int
-__socket_read_accepted_reply (rpc_transport_t *this)
+static int
+__socket_read_accepted_reply(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
- int ret = -1;
- char *buf = NULL;
- uint32_t verflen = 0, len = 0;
- uint32_t remaining_size = 0;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
-
- switch (priv->incoming.frag.call_body.reply.accepted_state) {
-
+ socket_private_t *priv = NULL;
+ int ret = -1;
+ char *buf = NULL;
+ uint32_t verflen = 0, len = 0;
+ uint32_t remaining_size = 0;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+
+ priv = this->private;
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ switch (frag->call_body.reply.accepted_state) {
case SP_STATE_ACCEPTED_REPLY_INIT:
- __socket_proto_init_pending (priv,
- RPC_AUTH_FLAVOUR_N_LENGTH_SIZE);
+ __socket_proto_init_pending(priv, RPC_AUTH_FLAVOUR_N_LENGTH_SIZE);
- priv->incoming.frag.call_body.reply.accepted_state
- = SP_STATE_READING_REPLY_VERFLEN;
+ frag->call_body.reply
+ .accepted_state = SP_STATE_READING_REPLY_VERFLEN;
- /* fall through */
+ /* fall through */
case SP_STATE_READING_REPLY_VERFLEN:
- __socket_proto_read (priv, ret);
+ __socket_proto_read(priv, ret);
- priv->incoming.frag.call_body.reply.accepted_state
- = SP_STATE_READ_REPLY_VERFLEN;
+ frag->call_body.reply.accepted_state = SP_STATE_READ_REPLY_VERFLEN;
- /* fall through */
+ /* fall through */
case SP_STATE_READ_REPLY_VERFLEN:
- buf = rpc_reply_verflen_addr (priv->incoming.frag.fragcurrent);
+ buf = rpc_reply_verflen_addr(frag->fragcurrent);
- verflen = ntoh32 (*((uint32_t *) buf));
+ verflen = ntoh32(*((uint32_t *)buf));
- /* also read accept status along with verf data */
- len = verflen + RPC_ACCEPT_STATUS_LEN;
+ /* also read accept status along with verf data */
+ len = verflen + RPC_ACCEPT_STATUS_LEN;
- __socket_proto_init_pending (priv, len);
+ __socket_proto_init_pending(priv, len);
- priv->incoming.frag.call_body.reply.accepted_state
- = SP_STATE_READING_REPLY_VERFBYTES;
+ frag->call_body.reply
+ .accepted_state = SP_STATE_READING_REPLY_VERFBYTES;
- /* fall through */
+ /* fall through */
case SP_STATE_READING_REPLY_VERFBYTES:
- __socket_proto_read (priv, ret);
+ __socket_proto_read(priv, ret);
- priv->incoming.frag.call_body.reply.accepted_state
- = SP_STATE_READ_REPLY_VERFBYTES;
+ frag->call_body.reply
+ .accepted_state = SP_STATE_READ_REPLY_VERFBYTES;
- buf = rpc_reply_accept_status_addr (priv->incoming.frag.fragcurrent);
+ buf = rpc_reply_accept_status_addr(frag->fragcurrent);
- priv->incoming.frag.call_body.reply.accept_status
- = ntoh32 (*(uint32_t *) buf);
+ frag->call_body.reply.accept_status = ntoh32(*(uint32_t *)buf);
- /* fall through */
+ /* fall through */
case SP_STATE_READ_REPLY_VERFBYTES:
- if (priv->incoming.frag.call_body.reply.accept_status
- == SUCCESS) {
- ret = __socket_read_accepted_successful_reply (this);
+ if (frag->call_body.reply.accept_status == SUCCESS) {
+ /* Need two different methods here for different protocols
+ Mainly because the exact XDR is used to calculate the
+ size of response */
+ if ((in->request_info->procnum == GFS3_OP_READ) &&
+ (in->request_info->prognum == GLUSTER_FOP_PROGRAM) &&
+ (in->request_info->progver == GLUSTER_FOP_VERSION_v2)) {
+ ret = __socket_read_accepted_successful_reply_v2(this);
} else {
- /* read entire remaining msg into buffer pointed to by
- * fragcurrent
- */
- ret = __socket_read_simple_msg (this);
+ ret = __socket_read_accepted_successful_reply(this);
}
+ } else {
+ /* read entire remaining msg into buffer pointed to by
+ * fragcurrent
+ */
+ ret = __socket_read_simple_msg(this);
+ }
- remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
- - priv->incoming.frag.bytes_read;
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
- if ((ret == -1)
- || ((ret == 0)
- && (remaining_size == 0)
- && (RPC_LASTFRAG (priv->incoming.fraghdr)))) {
- priv->incoming.frag.call_body.reply.accepted_state
- = SP_STATE_ACCEPTED_REPLY_INIT;
- }
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ (RPC_LASTFRAG(in->fraghdr)))) {
+ frag->call_body.reply
+ .accepted_state = SP_STATE_ACCEPTED_REPLY_INIT;
+ }
- break;
- }
+ break;
+ }
-out:
- return ret;
+ return ret;
}
-
-inline int
-__socket_read_denied_reply (rpc_transport_t *this)
+static int
+__socket_read_denied_reply(rpc_transport_t *this)
{
- return __socket_read_simple_msg (this);
+ return __socket_read_simple_msg(this);
}
-
#define rpc_reply_status_addr(fragcurrent) ((char *)fragcurrent - 4)
-
-inline int
-__socket_read_vectored_reply (rpc_transport_t *this)
+static int
+__socket_read_vectored_reply(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
- int ret = 0;
- char *buf = NULL;
- uint32_t remaining_size = 0;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
-
- switch (priv->incoming.frag.call_body.reply.status_state) {
-
+ socket_private_t *priv = NULL;
+ int ret = 0;
+ char *buf = NULL;
+ uint32_t remaining_size = 0;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+
+ priv = this->private;
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ switch (frag->call_body.reply.status_state) {
case SP_STATE_ACCEPTED_REPLY_INIT:
- __socket_proto_init_pending (priv, RPC_REPLY_STATUS_SIZE);
+ __socket_proto_init_pending(priv, RPC_REPLY_STATUS_SIZE);
- priv->incoming.frag.call_body.reply.status_state
- = SP_STATE_READING_REPLY_STATUS;
+ frag->call_body.reply.status_state = SP_STATE_READING_REPLY_STATUS;
- /* fall through */
+ /* fall through */
case SP_STATE_READING_REPLY_STATUS:
- __socket_proto_read (priv, ret);
+ __socket_proto_read(priv, ret);
- buf = rpc_reply_status_addr (priv->incoming.frag.fragcurrent);
+ buf = rpc_reply_status_addr(frag->fragcurrent);
- priv->incoming.frag.call_body.reply.accept_status
- = ntoh32 (*((uint32_t *) buf));
+ frag->call_body.reply.accept_status = ntoh32(*((uint32_t *)buf));
- priv->incoming.frag.call_body.reply.status_state
- = SP_STATE_READ_REPLY_STATUS;
+ frag->call_body.reply.status_state = SP_STATE_READ_REPLY_STATUS;
- /* fall through */
+ /* fall through */
case SP_STATE_READ_REPLY_STATUS:
- if (priv->incoming.frag.call_body.reply.accept_status
- == MSG_ACCEPTED) {
- ret = __socket_read_accepted_reply (this);
- } else {
- ret = __socket_read_denied_reply (this);
- }
-
- remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
- - priv->incoming.frag.bytes_read;
-
- if ((ret == -1)
- || ((ret == 0)
- && (remaining_size == 0)
- && (RPC_LASTFRAG (priv->incoming.fraghdr)))) {
- priv->incoming.frag.call_body.reply.status_state
- = SP_STATE_ACCEPTED_REPLY_INIT;
- priv->incoming.payload_vector.iov_len
- = (unsigned long)priv->incoming.frag.fragcurrent
- - (unsigned long)
- priv->incoming.payload_vector.iov_base;
- }
- break;
- }
-
-out:
- return ret;
+ if (frag->call_body.reply.accept_status == MSG_ACCEPTED) {
+ ret = __socket_read_accepted_reply(this);
+ } else {
+ ret = __socket_read_denied_reply(this);
+ }
+
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
+
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ (RPC_LASTFRAG(in->fraghdr)))) {
+ frag->call_body.reply
+ .status_state = SP_STATE_VECTORED_REPLY_STATUS_INIT;
+ in->payload_vector.iov_len = (unsigned long)frag->fragcurrent -
+ (unsigned long)
+ in->payload_vector.iov_base;
+ }
+ break;
+ }
+
+ return ret;
}
-
-inline int
-__socket_read_simple_reply (rpc_transport_t *this)
+static int
+__socket_read_simple_reply(rpc_transport_t *this)
{
- return __socket_read_simple_msg (this);
+ return __socket_read_simple_msg(this);
}
#define rpc_xid_addr(buf) (buf)
-inline int
-__socket_read_reply (rpc_transport_t *this)
+static int
+__socket_read_reply(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
- char *buf = NULL;
- int32_t ret = -1;
- rpc_request_info_t *request_info = NULL;
- char map_xid = 0;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
-
- buf = rpc_xid_addr (iobuf_ptr (priv->incoming.iobuf));
+ socket_private_t *priv = NULL;
+ char *buf = NULL;
+ int32_t ret = -1;
+ rpc_request_info_t *request_info = NULL;
+ char map_xid = 0;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+
+ priv = this->private;
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ buf = rpc_xid_addr(iobuf_ptr(in->iobuf));
+
+ if (in->request_info == NULL) {
+ in->request_info = GF_CALLOC(1, sizeof(*request_info),
+ gf_common_mt_rpc_trans_reqinfo_t);
+ if (in->request_info == NULL) {
+ goto out;
+ }
- if (priv->incoming.request_info == NULL) {
- priv->incoming.request_info = GF_CALLOC (1,
- sizeof (*request_info),
- gf_common_mt_rpc_trans_reqinfo_t);
- if (priv->incoming.request_info == NULL) {
- goto out;
- }
+ map_xid = 1;
+ }
- map_xid = 1;
- }
+ request_info = in->request_info;
- request_info = priv->incoming.request_info;
+ if (map_xid) {
+ request_info->xid = ntoh32(*((uint32_t *)buf));
- if (map_xid) {
- request_info->xid = ntoh32 (*((uint32_t *) buf));
+ /* release priv->lock, so as to avoid deadlock b/w conn->lock
+ * and priv->lock, since we are doing an upcall here.
+ */
+ frag->state = SP_STATE_NOTIFYING_XID;
+ ret = rpc_transport_notify(this, RPC_TRANSPORT_MAP_XID_REQUEST,
+ in->request_info);
- /* release priv->lock, so as to avoid deadlock b/w conn->lock
- * and priv->lock, since we are doing an upcall here.
- */
- pthread_mutex_unlock (&priv->lock);
- {
- ret = rpc_transport_notify (this,
- RPC_TRANSPORT_MAP_XID_REQUEST,
- priv->incoming.request_info);
- }
- pthread_mutex_lock (&priv->lock);
+ /* Transition back to externally visible state. */
+ frag->state = SP_STATE_READ_MSGTYPE;
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "notify for event MAP_XID failed");
- goto out;
- }
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "notify for event MAP_XID failed for %s",
+ this->peerinfo.identifier);
+ goto out;
}
+ }
- if ((request_info->prognum == GLUSTER3_1_FOP_PROGRAM)
- && (request_info->procnum == GF_FOP_READ)) {
- if (map_xid && request_info->rsp.rsp_payload_count != 0) {
- priv->incoming.iobref
- = iobref_ref (request_info->rsp.rsp_iobref);
- priv->incoming.payload_vector
- = *request_info->rsp.rsp_payload;
- }
-
- ret = __socket_read_vectored_reply (this);
- } else {
- ret = __socket_read_simple_reply (this);
+ if ((request_info->prognum == GLUSTER_FOP_PROGRAM) &&
+ (request_info->procnum == GF_FOP_READ)) {
+ if (map_xid && request_info->rsp.rsp_payload_count != 0) {
+ in->iobref = iobref_ref(request_info->rsp.rsp_iobref);
+ in->payload_vector = *request_info->rsp.rsp_payload;
}
+
+ ret = __socket_read_vectored_reply(this);
+ } else {
+ ret = __socket_read_simple_reply(this);
+ }
out:
- return ret;
+ return ret;
}
-
/* returns the number of bytes yet to be read in a fragment */
-inline int
-__socket_read_frag (rpc_transport_t *this)
+static int
+__socket_read_frag(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
- int32_t ret = 0;
- char *buf = NULL;
- uint32_t remaining_size = 0;
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
-
- switch (priv->incoming.frag.state) {
+ socket_private_t *priv = NULL;
+ int32_t ret = 0;
+ char *buf = NULL;
+ uint32_t remaining_size = 0;
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
+
+ priv = this->private;
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
+
+ switch (frag->state) {
case SP_STATE_NADA:
- __socket_proto_init_pending (priv, RPC_MSGTYPE_SIZE);
+ __socket_proto_init_pending(priv, RPC_MSGTYPE_SIZE);
- priv->incoming.frag.state = SP_STATE_READING_MSGTYPE;
+ frag->state = SP_STATE_READING_MSGTYPE;
- /* fall through */
+ /* fall through */
case SP_STATE_READING_MSGTYPE:
- __socket_proto_read (priv, ret);
+ __socket_proto_read(priv, ret);
- priv->incoming.frag.state = SP_STATE_READ_MSGTYPE;
- /* fall through */
+ frag->state = SP_STATE_READ_MSGTYPE;
+ /* fall through */
case SP_STATE_READ_MSGTYPE:
- buf = rpc_msgtype_addr (iobuf_ptr (priv->incoming.iobuf));
- priv->incoming.msg_type = ntoh32 (*((uint32_t *)buf));
-
- if (priv->incoming.msg_type == CALL) {
- ret = __socket_read_request (this);
- } else if (priv->incoming.msg_type == REPLY) {
- ret = __socket_read_reply (this);
- } else if (priv->incoming.msg_type == GF_UNIVERSAL_ANSWER) {
- gf_log ("rpc", GF_LOG_ERROR,
- "older version of protocol/process trying to "
- "connect from %s. use newer version on that node",
- this->peerinfo.identifier);
- } else {
- gf_log ("rpc", GF_LOG_ERROR,
- "wrong MSG-TYPE (%d) received from %s",
- priv->incoming.msg_type,
- this->peerinfo.identifier);
- ret = -1;
- }
+ buf = rpc_msgtype_addr(iobuf_ptr(in->iobuf));
+ in->msg_type = ntoh32(*((uint32_t *)buf));
+
+ if (in->msg_type == CALL) {
+ ret = __socket_read_request(this);
+ } else if (in->msg_type == REPLY) {
+ ret = __socket_read_reply(this);
+ } else if (in->msg_type == (msg_type_t)GF_UNIVERSAL_ANSWER) {
+ gf_log("rpc", GF_LOG_ERROR,
+ "older version of protocol/process trying to "
+ "connect from %s. use newer version on that node",
+ this->peerinfo.identifier);
+ } else {
+ gf_log("rpc", GF_LOG_ERROR,
+ "wrong MSG-TYPE (%d) received from %s", in->msg_type,
+ this->peerinfo.identifier);
+ ret = -1;
+ }
- remaining_size = RPC_FRAGSIZE (priv->incoming.fraghdr)
- - priv->incoming.frag.bytes_read;
+ remaining_size = RPC_FRAGSIZE(in->fraghdr) - frag->bytes_read;
- if ((ret == -1)
- || ((ret == 0)
- && (remaining_size == 0)
- && (RPC_LASTFRAG (priv->incoming.fraghdr)))) {
- priv->incoming.frag.state = SP_STATE_NADA;
- }
+ if ((ret < 0) || ((ret == 0) && (remaining_size == 0) &&
+ (RPC_LASTFRAG(in->fraghdr)))) {
+ /* frag->state = SP_STATE_NADA; */
+ frag->state = SP_STATE_RPCFRAG_INIT;
+ }
- break;
- }
+ break;
-out:
- return ret;
-}
+ case SP_STATE_NOTIFYING_XID:
+ /* Another epoll thread is notifying higher layers
+ *of reply's xid. */
+ errno = EAGAIN;
+ return -1;
+ break;
+ }
+ return ret;
+}
-inline
-void __socket_reset_priv (socket_private_t *priv)
+static void
+__socket_reset_priv(socket_private_t *priv)
{
- if (priv->incoming.iobref) {
- iobref_unref (priv->incoming.iobref);
- priv->incoming.iobref = NULL;
- }
+ struct gf_sock_incoming *in = NULL;
- if (priv->incoming.iobuf) {
- iobuf_unref (priv->incoming.iobuf);
- }
+ /* used to reduce the indirection */
+ in = &priv->incoming;
- if (priv->incoming.request_info != NULL) {
- GF_FREE (priv->incoming.request_info);
- priv->incoming.request_info = NULL;
- }
+ if (in->iobref) {
+ iobref_unref(in->iobref);
+ in->iobref = NULL;
+ }
- memset (&priv->incoming.payload_vector, 0,
- sizeof (priv->incoming.payload_vector));
+ if (in->iobuf) {
+ iobuf_unref(in->iobuf);
+ in->iobuf = NULL;
+ }
- priv->incoming.iobuf = NULL;
+ if (in->request_info != NULL) {
+ GF_FREE(in->request_info);
+ in->request_info = NULL;
+ }
+
+ memset(&in->payload_vector, 0, sizeof(in->payload_vector));
}
+static int
+__socket_proto_state_machine(rpc_transport_t *this,
+ rpc_transport_pollin_t **pollin)
+{
+ int ret = -1;
+ socket_private_t *priv = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec vector[2];
+ struct gf_sock_incoming *in = NULL;
+ struct gf_sock_incoming_frag *frag = NULL;
-int
-__socket_proto_state_machine (rpc_transport_t *this,
- rpc_transport_pollin_t **pollin)
-{
- int ret = -1;
- socket_private_t *priv = NULL;
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- struct iovec vector[2];
-
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
-
- priv = this->private;
- while (priv->incoming.record_state != SP_STATE_COMPLETE) {
- switch (priv->incoming.record_state) {
-
- case SP_STATE_NADA:
- priv->incoming.total_bytes_read = 0;
- priv->incoming.payload_vector.iov_len = 0;
-
- priv->incoming.pending_vector = priv->incoming.vector;
- priv->incoming.pending_vector->iov_base =
- &priv->incoming.fraghdr;
-
- priv->incoming.pending_vector->iov_len =
- sizeof (priv->incoming.fraghdr);
-
- priv->incoming.record_state = SP_STATE_READING_FRAGHDR;
-
- /* fall through */
-
- case SP_STATE_READING_FRAGHDR:
- ret = __socket_readv (this,
- priv->incoming.pending_vector, 1,
- &priv->incoming.pending_vector,
- &priv->incoming.pending_count,
- NULL);
- if (ret == -1) {
- if (priv->read_fail_log == 1) {
- gf_log (this->name,
- ((priv->connected == 1) ?
- GF_LOG_WARNING : GF_LOG_DEBUG),
- "reading from socket failed. Error (%s)"
- ", peer (%s)", strerror (errno),
- this->peerinfo.identifier);
- }
- goto out;
- }
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", this->private, out);
- if (ret > 0) {
- gf_log (this->name, GF_LOG_TRACE, "partial "
- "fragment header read");
- goto out;
- }
+ priv = this->private;
+ /* used to reduce the indirection */
+ in = &priv->incoming;
+ frag = &in->frag;
- if (ret == 0) {
- priv->incoming.record_state =
- SP_STATE_READ_FRAGHDR;
- }
- /* fall through */
-
- case SP_STATE_READ_FRAGHDR:
-
- priv->incoming.fraghdr = ntoh32 (priv->incoming.fraghdr);
- priv->incoming.total_bytes_read
- += RPC_FRAGSIZE(priv->incoming.fraghdr);
- iobuf = iobuf_get2 (this->ctx->iobuf_pool,
- priv->incoming.total_bytes_read +
- sizeof (priv->incoming.fraghdr));
- if (!iobuf) {
- ret = -ENOMEM;
- goto out;
- }
+ while (in->record_state != SP_STATE_COMPLETE) {
+ switch (in->record_state) {
+ case SP_STATE_NADA:
+ in->total_bytes_read = 0;
+ in->payload_vector.iov_len = 0;
- priv->incoming.iobuf = iobuf;
- priv->incoming.iobuf_size = 0;
- priv->incoming.frag.fragcurrent = iobuf_ptr (iobuf);
- priv->incoming.record_state = SP_STATE_READING_FRAG;
- /* fall through */
+ in->pending_vector = in->vector;
+ in->pending_vector->iov_base = &in->fraghdr;
- case SP_STATE_READING_FRAG:
- ret = __socket_read_frag (this);
+ in->pending_vector->iov_len = sizeof(in->fraghdr);
- if ((ret == -1)
- || (priv->incoming.frag.bytes_read !=
- RPC_FRAGSIZE (priv->incoming.fraghdr))) {
- goto out;
- }
+ in->record_state = SP_STATE_READING_FRAGHDR;
- priv->incoming.frag.bytes_read = 0;
+ /* fall through */
- if (!RPC_LASTFRAG (priv->incoming.fraghdr)) {
- priv->incoming.record_state =
- SP_STATE_READING_FRAGHDR;
- break;
- }
+ case SP_STATE_READING_FRAGHDR:
+ ret = __socket_readv(this, in->pending_vector, 1,
+ &in->pending_vector, &in->pending_count,
+ NULL);
+ if (ret < 0)
+ goto out;
- /* we've read the entire rpc record, notify the
- * upper layers.
- */
- if (pollin != NULL) {
- int count = 0;
- priv->incoming.iobuf_size
- = priv->incoming.total_bytes_read
- - priv->incoming.payload_vector.iov_len;
-
- memset (vector, 0, sizeof (vector));
-
- if (priv->incoming.iobref == NULL) {
- priv->incoming.iobref = iobref_new ();
- if (priv->incoming.iobref == NULL) {
- ret = -1;
- goto out;
- }
- }
-
- vector[count].iov_base
- = iobuf_ptr (priv->incoming.iobuf);
- vector[count].iov_len
- = priv->incoming.iobuf_size;
-
- iobref = priv->incoming.iobref;
-
- count++;
-
- if (priv->incoming.payload_vector.iov_base
- != NULL) {
- vector[count]
- = priv->incoming.payload_vector;
- count++;
- }
-
- *pollin = rpc_transport_pollin_alloc (this,
- vector,
- count,
- priv->incoming.iobuf,
- iobref,
- priv->incoming.request_info);
- iobuf_unref (priv->incoming.iobuf);
- priv->incoming.iobuf = NULL;
-
- if (*pollin == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "transport pollin allocation failed");
- ret = -1;
- goto out;
- }
- if (priv->incoming.msg_type == REPLY)
- (*pollin)->is_reply = 1;
-
- priv->incoming.request_info = NULL;
- }
- priv->incoming.record_state = SP_STATE_COMPLETE;
- break;
+ if (ret > 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "partial "
+ "fragment header read");
+ ret = 0;
+ goto out;
+ }
- case SP_STATE_COMPLETE:
- /* control should not reach here */
- gf_log (this->name, GF_LOG_WARNING, "control reached to "
- "SP_STATE_COMPLETE, which should not have "
- "happened");
- break;
+ if (ret == 0) {
+ in->record_state = SP_STATE_READ_FRAGHDR;
}
- }
+ /* fall through */
- if (priv->incoming.record_state == SP_STATE_COMPLETE) {
- priv->incoming.record_state = SP_STATE_NADA;
- __socket_reset_priv (priv);
- }
+ case SP_STATE_READ_FRAGHDR:
-out:
- if ((ret == -1) && (errno == EAGAIN)) {
- ret = 0;
- }
- return ret;
-}
+ in->fraghdr = ntoh32(in->fraghdr);
+ in->total_bytes_read += RPC_FRAGSIZE(in->fraghdr);
+ if (in->total_bytes_read >= GF_UNIT_GB) {
+ ret = -1;
+ goto out;
+ }
-int
-socket_proto_state_machine (rpc_transport_t *this,
- rpc_transport_pollin_t **pollin)
-{
- socket_private_t *priv = NULL;
- int ret = 0;
+ iobuf = iobuf_get2(
+ this->ctx->iobuf_pool,
+ (in->total_bytes_read + sizeof(in->fraghdr)));
+ if (!iobuf) {
+ ret = -1;
+ goto out;
+ }
+
+ if (in->iobuf == NULL) {
+ /* first fragment */
+ frag->fragcurrent = iobuf_ptr(iobuf);
+ } else {
+ /* second or further fragment */
+ memcpy(iobuf_ptr(iobuf), iobuf_ptr(in->iobuf),
+ in->total_bytes_read - RPC_FRAGSIZE(in->fraghdr));
+ iobuf_unref(in->iobuf);
+ frag->fragcurrent = (char *)iobuf_ptr(iobuf) +
+ in->total_bytes_read -
+ RPC_FRAGSIZE(in->fraghdr);
+ frag->pending_vector->iov_base = frag->fragcurrent;
+ in->pending_vector = frag->pending_vector;
+ }
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ in->iobuf = iobuf;
+ in->iobuf_size = 0;
+ in->record_state = SP_STATE_READING_FRAG;
+ /* fall through */
- priv = this->private;
+ case SP_STATE_READING_FRAG:
+ ret = __socket_read_frag(this);
- pthread_mutex_lock (&priv->lock);
- {
- ret = __socket_proto_state_machine (this, pollin);
+ if ((ret < 0) ||
+ (frag->bytes_read != RPC_FRAGSIZE(in->fraghdr))) {
+ goto out;
+ }
+
+ frag->bytes_read = 0;
+
+ if (!RPC_LASTFRAG(in->fraghdr)) {
+ in->pending_vector = in->vector;
+ in->pending_vector->iov_base = &in->fraghdr;
+ in->pending_vector->iov_len = sizeof(in->fraghdr);
+ in->record_state = SP_STATE_READING_FRAGHDR;
+ break;
+ }
+
+ /* we've read the entire rpc record, notify the
+ * upper layers.
+ */
+ if (pollin != NULL) {
+ int count = 0;
+ in->iobuf_size = (in->total_bytes_read -
+ in->payload_vector.iov_len);
+
+ memset(vector, 0, sizeof(vector));
+
+ if (in->iobref == NULL) {
+ in->iobref = iobref_new();
+ if (in->iobref == NULL) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ vector[count].iov_base = iobuf_ptr(in->iobuf);
+ vector[count].iov_len = in->iobuf_size;
+
+ iobref = in->iobref;
+
+ count++;
+
+ if (in->payload_vector.iov_base != NULL) {
+ vector[count] = in->payload_vector;
+ count++;
+ }
+
+ *pollin = rpc_transport_pollin_alloc(this, vector, count,
+ in->iobuf, iobref,
+ in->request_info);
+ iobuf_unref(in->iobuf);
+ in->iobuf = NULL;
+
+ if (*pollin == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "transport pollin allocation failed");
+ ret = -1;
+ goto out;
+ }
+ if (in->msg_type == REPLY)
+ (*pollin)->is_reply = 1;
+
+ in->request_info = NULL;
+ }
+ in->record_state = SP_STATE_COMPLETE;
+ break;
+
+ case SP_STATE_COMPLETE:
+ /* control should not reach here */
+ gf_log(this->name, GF_LOG_WARNING,
+ "control reached to "
+ "SP_STATE_COMPLETE, which should not have "
+ "happened");
+ break;
}
- pthread_mutex_unlock (&priv->lock);
+ }
+
+ if (in->record_state == SP_STATE_COMPLETE) {
+ in->record_state = SP_STATE_NADA;
+ __socket_reset_priv(priv);
+ }
out:
- return ret;
+ return ret;
}
+static int
+socket_proto_state_machine(rpc_transport_t *this,
+ rpc_transport_pollin_t **pollin)
+{
+ return __socket_proto_state_machine(this, pollin);
+}
-int
-socket_event_poll_in (rpc_transport_t *this)
+static void
+socket_event_poll_in_async(xlator_t *xl, gf_async_t *async)
{
- int ret = -1;
- rpc_transport_pollin_t *pollin = NULL;
+ rpc_transport_pollin_t *pollin;
+ rpc_transport_t *this;
+ socket_private_t *priv;
- ret = socket_proto_state_machine (this, &pollin);
+ pollin = caa_container_of(async, rpc_transport_pollin_t, async);
+ this = pollin->trans;
+ priv = this->private;
- if (pollin != NULL) {
- ret = rpc_transport_notify (this, RPC_TRANSPORT_MSG_RECEIVED,
- pollin);
+ rpc_transport_notify(this, RPC_TRANSPORT_MSG_RECEIVED, pollin);
- rpc_transport_pollin_destroy (pollin);
- }
+ rpc_transport_unref(this);
- return ret;
-}
+ rpc_transport_pollin_destroy(pollin);
+ pthread_mutex_lock(&priv->notify.lock);
+ {
+ --priv->notify.in_progress;
-int
-socket_connect_finish (rpc_transport_t *this)
+ if (!priv->notify.in_progress)
+ pthread_cond_signal(&priv->notify.cond);
+ }
+ pthread_mutex_unlock(&priv->notify.lock);
+}
+
+static int
+socket_event_poll_in(rpc_transport_t *this, gf_boolean_t notify_handled)
{
- int ret = -1;
- socket_private_t *priv = NULL;
- rpc_transport_event_t event = 0;
- char notify_rpc = 0;
+ int ret = -1;
+ rpc_transport_pollin_t *pollin = NULL;
+ socket_private_t *priv = this->private;
+ glusterfs_ctx_t *ctx = NULL;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ ctx = this->ctx;
- priv = this->private;
+ ret = socket_proto_state_machine(this, &pollin);
- pthread_mutex_lock (&priv->lock);
+ if (pollin) {
+ pthread_mutex_lock(&priv->notify.lock);
{
- if (priv->connected)
- goto unlock;
+ priv->notify.in_progress++;
+ }
+ pthread_mutex_unlock(&priv->notify.lock);
+ }
- ret = __socket_connect_finish (priv->sock);
+ if (notify_handled && (ret >= 0))
+ gf_event_handled(ctx->event_pool, priv->sock, priv->idx, priv->gen);
- if (ret == -1 && errno == EINPROGRESS)
- ret = 1;
+ if (pollin) {
+ rpc_transport_ref(this);
+ gf_async(&pollin->async, THIS, socket_event_poll_in_async);
+ }
- if (ret == -1 && errno != EINPROGRESS) {
- if (!priv->connect_finish_log) {
- gf_log (this->name, GF_LOG_ERROR,
- "connection to %s failed (%s)",
- this->peerinfo.identifier,
- strerror (errno));
- priv->connect_finish_log = 1;
- }
- __socket_disconnect (this);
- notify_rpc = 1;
- event = RPC_TRANSPORT_DISCONNECT;
- goto unlock;
- }
+ return ret;
+}
- if (ret == 0) {
- notify_rpc = 1;
-
- this->myinfo.sockaddr_len =
- sizeof (this->myinfo.sockaddr);
-
- ret = getsockname (priv->sock,
- SA (&this->myinfo.sockaddr),
- &this->myinfo.sockaddr_len);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "getsockname on (%d) failed (%s)",
- priv->sock, strerror (errno));
- __socket_disconnect (this);
- event = GF_EVENT_POLLERR;
- goto unlock;
- }
+static int
+socket_connect_finish(rpc_transport_t *this)
+{
+ int ret = -1;
+ socket_private_t *priv = NULL;
+ rpc_transport_event_t event = 0;
+ char notify_rpc = 0;
+
+ priv = this->private;
+
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ if (priv->connected != 0)
+ goto unlock;
+
+ get_transport_identifiers(this);
+
+ ret = __socket_connect_finish(priv->sock);
+
+ if ((ret < 0) && (errno == EINPROGRESS))
+ ret = 1;
+
+ if ((ret < 0) && (errno != EINPROGRESS)) {
+ if (!priv->connect_finish_log) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "connection to %s failed (%s); "
+ "disconnecting socket",
+ this->peerinfo.identifier, strerror(errno));
+ priv->connect_finish_log = 1;
+ }
+ __socket_disconnect(this);
+ goto unlock;
+ }
- priv->connected = 1;
- priv->connect_finish_log = 0;
- event = RPC_TRANSPORT_CONNECT;
- get_transport_identifiers (this);
- }
+ if (ret == 0) {
+ notify_rpc = 1;
+
+ this->myinfo.sockaddr_len = sizeof(this->myinfo.sockaddr);
+
+ ret = getsockname(priv->sock, SA(&this->myinfo.sockaddr),
+ &this->myinfo.sockaddr_len);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "getsockname on (%d) failed (%s) - "
+ "disconnecting socket",
+ priv->sock, strerror(errno));
+ __socket_disconnect(this);
+ event = RPC_TRANSPORT_DISCONNECT;
+ goto unlock;
+ }
+
+ priv->connected = 1;
+ priv->connect_finish_log = 0;
+ event = RPC_TRANSPORT_CONNECT;
}
+ }
unlock:
- pthread_mutex_unlock (&priv->lock);
+ pthread_mutex_unlock(&priv->out_lock);
- if (notify_rpc) {
- rpc_transport_notify (this, event, this);
- }
-out:
- return 0;
+ if (notify_rpc) {
+ rpc_transport_notify(this, event, this);
+ }
+
+ return ret;
}
+static int
+socket_disconnect(rpc_transport_t *this, gf_boolean_t wait);
-/* reads rpc_requests during pollin */
-int
-socket_event_handler (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
+/* socket_is_connected() is for use only in socket_event_handler() */
+static inline gf_boolean_t
+socket_is_connected(socket_private_t *priv)
{
- rpc_transport_t *this = NULL;
- socket_private_t *priv = NULL;
- int ret = 0;
-
- this = data;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
- GF_VALIDATE_OR_GOTO ("socket", this->xl, out);
-
- THIS = this->xl;
- priv = this->private;
-
+ if (priv->use_ssl) {
+ return priv->is_server ? priv->ssl_accepted : priv->ssl_connected;
+ } else {
+ return priv->is_server ? priv->accepted : priv->connected;
+ }
+}
- pthread_mutex_lock (&priv->lock);
- {
- priv->idx = idx;
- }
- pthread_mutex_unlock (&priv->lock);
+static void
+ssl_rearm_event_fd(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int idx = -1;
+ int gen = -1;
+ int fd = -1;
+
+ priv = this->private;
+ ctx = this->ctx;
+
+ idx = priv->idx;
+ gen = priv->gen;
+ fd = priv->sock;
+
+ if (priv->ssl_error_required == SSL_ERROR_WANT_READ)
+ gf_event_select_on(ctx->event_pool, fd, idx, 1, -1);
+ if (priv->ssl_error_required == SSL_ERROR_WANT_WRITE)
+ gf_event_select_on(ctx->event_pool, fd, idx, -1, 1);
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
+}
- if (!priv->connected) {
- ret = socket_connect_finish (this);
+static int
+ssl_handle_server_connection_attempt(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int idx = -1;
+ int gen = -1;
+ int ret = -1;
+ int fd = -1;
+
+ priv = this->private;
+ ctx = this->ctx;
+
+ idx = priv->idx;
+ gen = priv->gen;
+ fd = priv->sock;
+
+ if (!priv->ssl_context_created) {
+ ret = ssl_setup_connection_prefix(this, _gf_true);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "> ssl_setup_connection_prefix() failed!");
+ ret = -1;
+ goto out;
+ } else {
+ priv->ssl_context_created = _gf_true;
}
-
- if (!ret && poll_out) {
- ret = socket_event_poll_out (this);
+ }
+ ret = ssl_complete_connection(this);
+ if (ret == 0) {
+ /* nothing to do */
+ gf_event_select_on(ctx->event_pool, fd, idx, 1, 0);
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
+ ret = 1;
+ } else {
+ if (errno == EAGAIN) {
+ ssl_rearm_event_fd(this);
+ ret = 1;
+ } else {
+ ret = -1;
+ gf_log(this->name, GF_LOG_TRACE,
+ "ssl_complete_connection returned error");
}
+ }
+out:
+ return ret;
+}
- if (!ret && poll_in) {
- ret = socket_event_poll_in (this);
+static int
+ssl_handle_client_connection_attempt(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int idx = -1;
+ int ret = -1;
+ int fd = -1;
+
+ priv = this->private;
+ ctx = this->ctx;
+
+ idx = priv->idx;
+ fd = priv->sock;
+
+ /* SSL client */
+ if (priv->connect_failed) {
+ gf_log(this->name, GF_LOG_TRACE, ">>> disconnecting SSL socket");
+ (void)socket_disconnect(this, _gf_false);
+ /* Force ret to be -1, as we are officially done with
+ this socket */
+ ret = -1;
+ } else {
+ if (!priv->ssl_context_created) {
+ ret = ssl_setup_connection_prefix(this, _gf_false);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "> ssl_setup_connection_prefix() "
+ "failed!");
+ ret = -1;
+ goto out;
+ } else {
+ priv->ssl_context_created = _gf_true;
+ }
}
-
- if ((ret < 0) || poll_err) {
- /* Logging has happened already in earlier cases */
- gf_log ("transport", ((ret >= 0) ? GF_LOG_INFO : GF_LOG_DEBUG),
- "disconnecting now");
- socket_event_poll_err (this);
- rpc_transport_unref (this);
+ ret = ssl_complete_connection(this);
+ if (ret == 0) {
+ ret = socket_connect_finish(this);
+ gf_event_select_on(ctx->event_pool, fd, idx, 1, 0);
+ gf_log(this->name, GF_LOG_TRACE, ">>> completed client connect");
+ } else {
+ if (errno == EAGAIN) {
+ gf_log(this->name, GF_LOG_TRACE,
+ ">>> retrying client connect 2");
+ ssl_rearm_event_fd(this);
+ ret = 1;
+ } else {
+ /* this is a connection failure */
+ (void)socket_connect_finish(this);
+ gf_log(this->name, GF_LOG_TRACE,
+ "ssl_complete_connection "
+ "returned error");
+ ret = -1;
+ }
}
-
+ }
out:
- return 0;
+ return ret;
}
+static int
+socket_handle_client_connection_attempt(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int idx = -1;
+ int gen = -1;
+ int ret = -1;
+ int fd = -1;
+
+ priv = this->private;
+ ctx = this->ctx;
+
+ idx = priv->idx;
+ gen = priv->gen;
+ fd = priv->sock;
+
+ /* non-SSL client */
+ if (priv->connect_failed) {
+ /* connect failed with some other error than
+ EINPROGRESS or ENOENT, so nothing more to
+ do, fail reading/writing anything even if
+ poll_in or poll_out
+ is set
+ */
+ gf_log("transport", GF_LOG_DEBUG,
+ "connect failed with some other error "
+ "than EINPROGRESS or ENOENT, so "
+ "nothing more to do; disconnecting "
+ "socket");
+ (void)socket_disconnect(this, _gf_false);
+
+ /* Force ret to be -1, as we are officially
+ * done with this socket
+ */
+ ret = -1;
+ } else {
+ ret = socket_connect_finish(this);
+ gf_log(this->name, GF_LOG_TRACE, "socket_connect_finish() returned %d",
+ ret);
+ if (ret == 0 || ret == 1) {
+ /* we don't want to do any reads or
+ * writes on the connection yet in
+ * socket_event_handler, so just
+ * return 1
+ */
+ ret = 1;
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
+ }
+ }
+ return ret;
+}
-int
-socket_server_event_handler (int fd, int idx, void *data,
- int poll_in, int poll_out, int poll_err)
-{
- rpc_transport_t *this = NULL;
- socket_private_t *priv = NULL;
- int ret = 0;
- int new_sock = -1;
- rpc_transport_t *new_trans = NULL;
- struct sockaddr_storage new_sockaddr = {0, };
- socklen_t addrlen = sizeof (new_sockaddr);
- socket_private_t *new_priv = NULL;
- glusterfs_ctx_t *ctx = NULL;
-
- this = data;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
- GF_VALIDATE_OR_GOTO ("socket", this->xl, out);
-
- THIS = this->xl;
- priv = this->private;
- ctx = this->ctx;
-
- pthread_mutex_lock (&priv->lock);
- {
- priv->idx = idx;
+static int
+socket_complete_connection(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int idx = -1;
+ int gen = -1;
+ int ret = -1;
+ int fd = -1;
+
+ priv = this->private;
+ ctx = this->ctx;
+
+ idx = priv->idx;
+ gen = priv->gen;
+ fd = priv->sock;
+
+ if (priv->use_ssl) {
+ if (priv->is_server) {
+ ret = ssl_handle_server_connection_attempt(this);
+ } else {
+ ret = ssl_handle_client_connection_attempt(this);
+ }
+ } else {
+ if (priv->is_server) {
+ /* non-SSL server: nothing much to do
+ * connection has already been accepted in
+ * socket_server_event_handler()
+ */
+ priv->accepted = _gf_true;
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
+ ret = 1;
+ } else {
+ ret = socket_handle_client_connection_attempt(this);
+ }
+ }
+ return ret;
+}
- if (poll_in) {
- new_sock = accept (priv->sock, SA (&new_sockaddr),
- &addrlen);
+/* reads rpc_requests during pollin */
+static void
+socket_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+ int poll_out, int poll_err, char event_thread_died)
+{
+ rpc_transport_t *this = NULL;
+ socket_private_t *priv = NULL;
+ int ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
+ gf_boolean_t socket_closed = _gf_false, notify_handled = _gf_false;
- if (new_sock == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "accept on %d failed (%s)",
- priv->sock, strerror (errno));
- goto unlock;
- }
+ this = data;
- if (!priv->bio) {
- ret = __socket_nonblock (new_sock);
+ if (event_thread_died) {
+ /* to avoid duplicate notifications, notify only for listener sockets */
+ return;
+ }
+
+ /* At this point we are sure no other thread is using the transport because
+ * we cannot receive more events until we call gf_event_handled(). However
+ * this function may call gf_event_handled() in some cases. When this is
+ * done, the transport may be destroyed at any moment if another thread
+ * handled an error event. To prevent that we take a reference here. */
+ rpc_transport_ref(this);
+
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", this->private, out);
+ GF_VALIDATE_OR_GOTO("socket", this->xl, out);
+
+ THIS = this->xl;
+ priv = this->private;
+ ctx = this->ctx;
+
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ priv->idx = idx;
+ priv->gen = gen;
+ }
+ pthread_mutex_unlock(&priv->out_lock);
+
+ gf_log(this->name, GF_LOG_TRACE, "%s (sock:%d) in:%d, out:%d, err:%d",
+ (priv->is_server ? "server" : "client"), priv->sock, poll_in,
+ poll_out, poll_err);
+
+ if (!poll_err) {
+ if (!socket_is_connected(priv)) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (sock:%d) socket is not connected, "
+ "completing connection",
+ (priv->is_server ? "server" : "client"), priv->sock);
+
+ ret = socket_complete_connection(this);
+
+ gf_log(this->name, GF_LOG_TRACE,
+ "(sock:%d) "
+ "socket_complete_connection() returned %d",
+ priv->sock, ret);
+
+ if (ret > 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "(sock:%d) returning to wait on socket", priv->sock);
+ goto out;
+ }
+ } else {
+ char *sock_type = (priv->is_server ? "Server" : "Client");
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "NBIO on %d failed (%s)",
- new_sock, strerror (errno));
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s socket (%d) is already connected", sock_type,
+ priv->sock);
+ ret = 0;
+ }
+ }
+
+ if (!ret && poll_out) {
+ ret = socket_event_poll_out(this);
+ gf_log(this->name, GF_LOG_TRACE,
+ "(sock:%d) "
+ "socket_event_poll_out returned %d",
+ priv->sock, ret);
+ }
+
+ if (!ret && poll_in) {
+ ret = socket_event_poll_in(this, !poll_err);
+ gf_log(this->name, GF_LOG_TRACE,
+ "(sock:%d) "
+ "socket_event_poll_in returned %d",
+ priv->sock, ret);
+ notify_handled = _gf_true;
+ }
+
+ if ((ret < 0) || poll_err) {
+ struct sockaddr *sa = SA(&this->peerinfo.sockaddr);
+
+ if (priv->is_server &&
+ SA(&this->myinfo.sockaddr)->sa_family == AF_UNIX) {
+ sa = SA(&this->myinfo.sockaddr);
+ }
- close (new_sock);
- goto unlock;
- }
- }
+ socket_dump_info(sa, priv->is_server, priv->use_ssl, priv->sock,
+ this->name, "disconnecting from");
- if (priv->nodelay) {
- ret = __socket_nodelay (new_sock);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "setsockopt() failed for "
- "NODELAY (%s)",
- strerror (errno));
- }
- }
+ /* Dump the SSL error stack to clear any errors that may otherwise
+ * resurface in the future.
+ */
+ if (priv->use_ssl && priv->ssl_ssl) {
+ ssl_dump_error_stack(this->name);
+ }
- if (priv->keepalive) {
- ret = __socket_keepalive (new_sock,
- priv->keepaliveintvl,
- priv->keepaliveidle);
- if (ret == -1)
- gf_log (this->name, GF_LOG_WARNING,
- "Failed to set keep-alive: %s",
- strerror (errno));
- }
+ /* Logging has happened already in earlier cases */
+ gf_log("transport", ((ret >= 0) ? GF_LOG_INFO : GF_LOG_DEBUG),
+ "EPOLLERR - disconnecting (sock:%d) (%s)", priv->sock,
+ (priv->use_ssl ? "SSL" : "non-SSL"));
- new_trans = GF_CALLOC (1, sizeof (*new_trans),
- gf_common_mt_rpc_trans_t);
- if (!new_trans)
- goto unlock;
-
- new_trans->name = gf_strdup (this->name);
-
- memcpy (&new_trans->peerinfo.sockaddr, &new_sockaddr,
- addrlen);
- new_trans->peerinfo.sockaddr_len = addrlen;
-
- new_trans->myinfo.sockaddr_len =
- sizeof (new_trans->myinfo.sockaddr);
-
- ret = getsockname (new_sock,
- SA (&new_trans->myinfo.sockaddr),
- &new_trans->myinfo.sockaddr_len);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "getsockname on %d failed (%s)",
- new_sock, strerror (errno));
- close (new_sock);
- goto unlock;
- }
+ socket_closed = socket_event_poll_err(this, gen, idx);
- get_transport_identifiers (new_trans);
- socket_init (new_trans);
- new_trans->ops = this->ops;
- new_trans->init = this->init;
- new_trans->fini = this->fini;
- new_trans->ctx = ctx;
- new_trans->xl = this->xl;
- new_trans->mydata = this->mydata;
- new_trans->notify = this->notify;
- new_trans->listener = this;
- new_priv = new_trans->private;
-
- pthread_mutex_lock (&new_priv->lock);
- {
- new_priv->sock = new_sock;
- new_priv->connected = 1;
- rpc_transport_ref (new_trans);
-
- new_priv->idx =
- event_register (ctx->event_pool,
- new_sock,
- socket_event_handler,
- new_trans, 1, 0);
-
- if (new_priv->idx == -1)
- ret = -1;
- }
- pthread_mutex_unlock (&new_priv->lock);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to register the socket with event");
- goto unlock;
- }
+ if (socket_closed)
+ rpc_transport_unref(this);
- ret = rpc_transport_notify (this, RPC_TRANSPORT_ACCEPT,
- new_trans);
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->lock);
+ } else if (!notify_handled) {
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
+ }
out:
- return ret;
+ rpc_transport_unref(this);
}
-
-int
-socket_disconnect (rpc_transport_t *this)
+static void
+socket_server_event_handler(int fd, int idx, int gen, void *data, int poll_in,
+ int poll_out, int poll_err, char event_thread_died)
{
- socket_private_t *priv = NULL;
- int ret = -1;
+ rpc_transport_t *this = NULL;
+ socket_private_t *priv = NULL;
+ int ret = 0;
+ int new_sock = -1;
+ rpc_transport_t *new_trans = NULL;
+ struct sockaddr_storage new_sockaddr = {
+ 0,
+ };
+ socklen_t addrlen = sizeof(new_sockaddr);
+ socket_private_t *new_priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ this = data;
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", this->private, out);
+ GF_VALIDATE_OR_GOTO("socket", this->xl, out);
+ GF_VALIDATE_OR_GOTO("socket", this->ctx, out);
+
+ THIS = this->xl;
+ priv = this->private;
+ ctx = this->ctx;
+
+ if (event_thread_died) {
+ rpc_transport_notify(this, RPC_TRANSPORT_EVENT_THREAD_DIED,
+ (void *)(unsigned long)gen);
+ return;
+ }
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ /* NOTE:
+ * We have done away with the critical section in this function. since
+ * there's little that it helps with. There's no other code that
+ * attempts to unref the listener socket/transport from any other
+ * thread context while we are using it here.
+ */
+ priv->idx = idx;
+ priv->gen = gen;
- priv = this->private;
+ if (poll_err) {
+ socket_event_poll_err(this, gen, idx);
+ goto out;
+ }
- pthread_mutex_lock (&priv->lock);
- {
- ret = __socket_disconnect (this);
- }
- pthread_mutex_unlock (&priv->lock);
+ if (poll_in) {
+ int aflags = 0;
-out:
- return ret;
-}
+ if (!priv->bio)
+ aflags = O_NONBLOCK;
+ new_sock = sys_accept(priv->sock, SA(&new_sockaddr), &addrlen, aflags);
-int
-socket_connect (rpc_transport_t *this, int port)
-{
- int ret = -1;
- int sock = -1;
- socket_private_t *priv = NULL;
- socklen_t sockaddr_len = 0;
- glusterfs_ctx_t *ctx = NULL;
- sa_family_t sa_family = {0, };
- union gf_sock_union sock_union;
+ gf_event_handled(ctx->event_pool, fd, idx, gen);
- GF_VALIDATE_OR_GOTO ("socket", this, err);
- GF_VALIDATE_OR_GOTO ("socket", this->private, err);
+ if (new_sock < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "accept on %d failed (%s)",
+ priv->sock, strerror(errno));
+ goto out;
+ }
- priv = this->private;
- ctx = this->ctx;
+ if (new_sockaddr.ss_family != AF_UNIX) {
+ if (priv->nodelay) {
+ ret = __socket_nodelay(new_sock);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "setsockopt() failed for "
+ "NODELAY (%s)",
+ strerror(errno));
+ }
+ }
- if (!priv) {
- gf_log_callingfn (this->name, GF_LOG_WARNING,
- "connect() called on uninitialized transport");
- goto err;
+ if (priv->keepalive) {
+ ret = __socket_keepalive(
+ new_sock, new_sockaddr.ss_family, priv->keepaliveintvl,
+ priv->keepaliveidle, priv->keepalivecnt, priv->timeout);
+ if (ret != 0)
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to set keep-alive: %s", strerror(errno));
+ }
}
- pthread_mutex_lock (&priv->lock);
- {
- sock = priv->sock;
+ new_trans = GF_CALLOC(1, sizeof(*new_trans), gf_common_mt_rpc_trans_t);
+ if (!new_trans) {
+ sys_close(new_sock);
+ gf_log(this->name, GF_LOG_WARNING,
+ "transport object allocation failure; "
+ "closed newly accepted socket %d",
+ new_sock);
+ goto out;
}
- pthread_mutex_unlock (&priv->lock);
- if (sock != -1) {
- gf_log_callingfn (this->name, GF_LOG_TRACE,
- "connect () called on transport already connected");
- errno = EINPROGRESS;
- ret = -1;
- goto err;
+ ret = pthread_mutex_init(&new_trans->lock, NULL);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "pthread_mutex_init() failed: %s; closing newly accepted "
+ "socket %d",
+ strerror(errno), new_sock);
+ sys_close(new_sock);
+ GF_FREE(new_trans);
+ goto out;
+ }
+ INIT_LIST_HEAD(&new_trans->list);
+
+ new_trans->name = gf_strdup(this->name);
+
+ memcpy(&new_trans->peerinfo.sockaddr, &new_sockaddr, addrlen);
+ new_trans->peerinfo.sockaddr_len = addrlen;
+
+ new_trans->myinfo.sockaddr_len = sizeof(new_trans->myinfo.sockaddr);
+
+ ret = getsockname(new_sock, SA(&new_trans->myinfo.sockaddr),
+ &new_trans->myinfo.sockaddr_len);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "getsockname on socket %d "
+ "failed (errno:%s); closing newly accepted socket",
+ new_sock, strerror(errno));
+ sys_close(new_sock);
+ GF_FREE(new_trans->name);
+ GF_FREE(new_trans);
+ goto out;
}
- ret = socket_client_get_remote_sockaddr (this, &sock_union.sa,
- &sockaddr_len, &sa_family);
- if (ret == -1) {
- /* logged inside client_get_remote_sockaddr */
- goto err;
+ get_transport_identifiers(new_trans);
+ gf_log(this->name, GF_LOG_TRACE, "XXX server:%s, client:%s",
+ new_trans->myinfo.identifier, new_trans->peerinfo.identifier);
+
+ /* Make options available to local socket_init() to create new
+ * SSL_CTX per transport. A separate SSL_CTX per transport is
+ * required to avoid setting crl checking options for client
+ * connections. The verification options eventually get copied
+ * to the SSL object. Unfortunately, there's no way to identify
+ * whether socket_init() is being called after a client-side
+ * connect() or a server-side accept(). Although, we could pass
+ * a flag from the transport init() to the socket_init() and
+ * from this place, this doesn't identify the case where the
+ * server-side transport loading is done for the first time.
+ * Also, SSL doesn't apply for UNIX sockets.
+ */
+ if (new_sockaddr.ss_family != AF_UNIX)
+ new_trans->options = dict_ref(this->options);
+ new_trans->ctx = this->ctx;
+
+ ret = socket_init(new_trans);
+
+ /* reset options to NULL to avoid double free */
+ if (new_sockaddr.ss_family != AF_UNIX) {
+ dict_unref(new_trans->options);
+ new_trans->options = NULL;
}
- if (port > 0) {
- sock_union.sin.sin_port = htons (port);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "initialization of new_trans "
+ "failed; closing newly accepted socket %d",
+ new_sock);
+ sys_close(new_sock);
+ GF_FREE(new_trans->name);
+ GF_FREE(new_trans);
+ goto out;
+ }
+ new_trans->ops = this->ops;
+ new_trans->init = this->init;
+ new_trans->fini = this->fini;
+ new_trans->ctx = ctx;
+ new_trans->xl = this->xl;
+ new_trans->mydata = this->mydata;
+ new_trans->notify = this->notify;
+ new_trans->listener = this;
+ new_trans->notify_poller_death = this->poller_death_accept;
+ new_priv = new_trans->private;
+
+ if (new_sockaddr.ss_family == AF_UNIX) {
+ new_priv->use_ssl = _gf_false;
+ } else {
+ switch (priv->srvr_ssl) {
+ case MGMT_SSL_ALWAYS:
+ /* Glusterd with secure_mgmt. */
+ new_priv->use_ssl = _gf_true;
+ break;
+ case MGMT_SSL_COPY_IO:
+ /* Glusterfsd. */
+ new_priv->use_ssl = priv->ssl_enabled;
+ break;
+ default:
+ new_priv->use_ssl = _gf_false;
+ }
}
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->sock != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "connect() -- already connected");
- goto unlock;
- }
- memcpy (&this->peerinfo.sockaddr, &sock_union.storage,
- sockaddr_len);
- this->peerinfo.sockaddr_len = sockaddr_len;
+ new_priv->sock = new_sock;
- priv->sock = socket (sa_family, SOCK_STREAM, 0);
- if (priv->sock == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "socket creation failed (%s)",
- strerror (errno));
- goto unlock;
- }
+ new_priv->ssl_enabled = priv->ssl_enabled;
+ new_priv->connected = 1;
+ new_priv->is_server = _gf_true;
- /* Cant help if setting socket options fails. We can continue
- * working nonetheless.
- */
- if (priv->windowsize != 0) {
- if (setsockopt (priv->sock, SOL_SOCKET, SO_RCVBUF,
- &priv->windowsize,
- sizeof (priv->windowsize)) < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting receive window "
- "size failed: %d: %d: %s",
- priv->sock, priv->windowsize,
- strerror (errno));
- }
+ /*
+ * This is the first ref on the newly accepted
+ * transport.
+ */
+ rpc_transport_ref(new_trans);
- if (setsockopt (priv->sock, SOL_SOCKET, SO_SNDBUF,
- &priv->windowsize,
- sizeof (priv->windowsize)) < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting send window size "
- "failed: %d: %d: %s",
- priv->sock, priv->windowsize,
- strerror (errno));
- }
+ {
+ /* Take a ref on the new_trans to avoid
+ * getting deleted when event_register()
+ * causes socket_event_handler() to race
+ * ahead of this path to eventually find
+ * a disconnect and unref the transport
+ */
+ rpc_transport_ref(new_trans);
+
+ /* Send a notification to RPCSVC layer
+ * to save the new_trans in its service
+ * list before we register the new_sock
+ * with epoll to begin receiving notifications
+ * for data handling.
+ */
+ ret = rpc_transport_notify(this, RPC_TRANSPORT_ACCEPT, new_trans);
+
+ if (ret >= 0) {
+ new_priv->idx = gf_event_register(
+ ctx->event_pool, new_sock, socket_event_handler, new_trans,
+ 1, 0, new_trans->notify_poller_death);
+ if (new_priv->idx == -1) {
+ ret = -1;
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to register the socket "
+ "with event");
+
+ /* event_register() could have failed for some
+ * reason, implying that the new_sock cannot be
+ * added to the epoll set. If we won't get any
+ * more notifications for new_sock from epoll,
+ * then we better remove the corresponding
+ * new_trans object from the RPCSVC service list.
+ * Since we've notified RPC service of new_trans
+ * before we attempted event_register(), we better
+ * unlink the new_trans from the RPCSVC service list
+ * to cleanup the stateby sending out a DISCONNECT
+ * notification.
+ */
+ rpc_transport_notify(this, RPC_TRANSPORT_DISCONNECT,
+ new_trans);
}
+ }
+
+ /* this rpc_transport_unref() is for managing race between
+ * 1. socket_server_event_handler and
+ * 2. socket_event_handler
+ * trying to add and remove new_trans from the rpcsvc
+ * service list
+ * now that we are done with the notifications, lets
+ * reduce the reference
+ */
+ rpc_transport_unref(new_trans);
+ }
- if (priv->nodelay) {
- ret = __socket_nodelay (priv->sock);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "closing newly accepted socket");
+ sys_close(new_sock);
+ /* this unref is to actually cause the destruction of
+ * the new_trans since we've failed at everything so far
+ */
+ rpc_transport_unref(new_trans);
+ }
+ }
+out:
+ return;
+}
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "NODELAY on %d failed (%s)",
- priv->sock, strerror (errno));
- }
- }
+static int
+socket_disconnect(rpc_transport_t *this, gf_boolean_t wait)
+{
+ socket_private_t *priv = NULL;
+ int ret = -1;
- if (!priv->bio) {
- ret = __socket_nonblock (priv->sock);
+ priv = this->private;
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "NBIO on %d failed (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
- }
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ ret = __socket_disconnect(this);
+ }
+ pthread_mutex_unlock(&priv->out_lock);
- if (priv->keepalive) {
- ret = __socket_keepalive (priv->sock,
- priv->keepaliveintvl,
- priv->keepaliveidle);
- if (ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to set keep-alive: %s",
- strerror (errno));
- }
+ return ret;
+}
- SA (&this->myinfo.sockaddr)->sa_family =
- SA (&this->peerinfo.sockaddr)->sa_family;
-
- ret = client_bind (this, SA (&this->myinfo.sockaddr),
- &this->myinfo.sockaddr_len, priv->sock);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "client bind failed: %s", strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
+void *
+socket_connect_error_cbk(void *opaque)
+{
+ socket_connect_error_state_t *arg;
- ret = connect (priv->sock, SA (&this->peerinfo.sockaddr),
- this->peerinfo.sockaddr_len);
+ GF_ASSERT(opaque);
- if (ret == -1 && ((errno != EINPROGRESS) && (errno != ENOENT))) {
- gf_log (this->name, GF_LOG_ERROR,
- "connection attempt failed (%s)",
- strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
+ arg = opaque;
+ THIS = arg->this;
- priv->connected = 0;
+ rpc_transport_notify(arg->trans, RPC_TRANSPORT_DISCONNECT, arg->trans);
- rpc_transport_ref (this);
+ if (arg->refd)
+ rpc_transport_unref(arg->trans);
- priv->idx = event_register (ctx->event_pool, priv->sock,
- socket_event_handler, this, 1, 1);
- if (priv->idx == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to register the event");
- ret = -1;
- }
- }
-unlock:
- pthread_mutex_unlock (&priv->lock);
+ GF_FREE(opaque);
+ return NULL;
+}
-err:
- return ret;
+static void
+socket_fix_ssl_opts(rpc_transport_t *this, socket_private_t *priv,
+ uint16_t port)
+{
+ if (port == GF_DEFAULT_SOCKET_LISTEN_PORT) {
+ gf_log(this->name, GF_LOG_DEBUG, "%s SSL for portmapper connection",
+ priv->mgmt_ssl ? "enabling" : "disabling");
+ priv->use_ssl = priv->mgmt_ssl;
+ } else if (priv->ssl_enabled && !priv->use_ssl) {
+ gf_log(this->name, GF_LOG_DEBUG, "re-enabling SSL for I/O connection");
+ priv->use_ssl = _gf_true;
+ }
}
+/*
+ * If we might just be trying to connect prematurely, e.g. to a brick that's
+ * slow coming up, all we need is a simple retry. Don't worry about sleeping
+ * in some arbitrary thread. The connect(2) could already have the exact same
+ * effect, and we deal with it in that case so we can deal with it for sleep(2)
+ * as well.
+ */
+static int
+connect_loop(int sockfd, const struct sockaddr *addr, socklen_t addrlen)
+{
+ int ret;
+ int connect_fails = 0;
-int
-socket_listen (rpc_transport_t *this)
+ for (;;) {
+ ret = connect(sockfd, addr, addrlen);
+ if (ret >= 0) {
+ break;
+ }
+ if ((errno != ENOENT) || (++connect_fails >= 5)) {
+ break;
+ }
+ sleep(1);
+ }
+
+ return ret;
+}
+
+static int
+socket_connect(rpc_transport_t *this, int port)
{
- socket_private_t * priv = NULL;
- int ret = -1;
- int sock = -1;
- struct sockaddr_storage sockaddr;
- socklen_t sockaddr_len = 0;
- peer_info_t *myinfo = NULL;
- glusterfs_ctx_t *ctx = NULL;
- sa_family_t sa_family = {0, };
+ int ret = -1;
+ int th_ret = -1;
+ int sock = -1;
+ socket_private_t *priv = NULL;
+ socklen_t sockaddr_len = 0;
+ glusterfs_ctx_t *ctx = NULL;
+ sa_family_t sa_family = {
+ 0,
+ };
+ char *local_addr = NULL;
+ union gf_sock_union sock_union;
+ struct sockaddr_in *addr = NULL;
+ gf_boolean_t refd = _gf_false;
+ socket_connect_error_state_t *arg = NULL;
+ pthread_t th_id = {
+ 0,
+ };
+ gf_boolean_t ign_enoent = _gf_false;
+ gf_boolean_t connect_attempted = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("socket", this, err);
+ GF_VALIDATE_OR_GOTO("socket", this->private, err);
+
+ priv = this->private;
+ ctx = this->ctx;
+
+ if (!priv) {
+ gf_log_callingfn(this->name, GF_LOG_WARNING,
+ "connect() called on uninitialized transport");
+ goto err;
+ }
+
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ if (priv->sock >= 0) {
+ gf_log_callingfn(this->name, GF_LOG_TRACE,
+ "connect () called on transport "
+ "already connected");
+ errno = EINPROGRESS;
+ ret = -1;
+ goto unlock;
+ }
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ gf_log(this->name, GF_LOG_TRACE, "connecting %p, sock=%d", this,
+ priv->sock);
- priv = this->private;
- myinfo = &this->myinfo;
- ctx = this->ctx;
+ ret = socket_client_get_remote_sockaddr(this, &sock_union.sa,
+ &sockaddr_len, &sa_family);
+ if (ret < 0) {
+ /* logged inside client_get_remote_sockaddr */
+ goto unlock;
+ }
- pthread_mutex_lock (&priv->lock);
- {
- sock = priv->sock;
+ if (sa_family == AF_UNIX) {
+ priv->ssl_enabled = _gf_false;
+ priv->mgmt_ssl = _gf_false;
+ } else {
+ if (port > 0) {
+ sock_union.sin.sin_port = htons(port);
+ }
+ socket_fix_ssl_opts(this, priv, ntohs(sock_union.sin.sin_port));
}
- pthread_mutex_unlock (&priv->lock);
- if (sock != -1) {
- gf_log_callingfn (this->name, GF_LOG_DEBUG,
- "already listening");
- return ret;
+ memcpy(&this->peerinfo.sockaddr, &sock_union.storage, sockaddr_len);
+ this->peerinfo.sockaddr_len = sockaddr_len;
+
+ priv->sock = sys_socket(sa_family, SOCK_STREAM, 0);
+ if (priv->sock < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "socket creation failed (%s)",
+ strerror(errno));
+ ret = -1;
+ goto unlock;
}
- ret = socket_server_get_local_sockaddr (this, SA (&sockaddr),
- &sockaddr_len, &sa_family);
- if (ret == -1) {
- return ret;
+ /* Can't help if setting socket options fails. We can continue
+ * working nonetheless.
+ */
+ if (priv->windowsize != 0) {
+ if (setsockopt(priv->sock, SOL_SOCKET, SO_RCVBUF, &priv->windowsize,
+ sizeof(priv->windowsize)) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setting receive window "
+ "size failed: %d: %d: %s",
+ priv->sock, priv->windowsize, strerror(errno));
+ }
+
+ if (setsockopt(priv->sock, SOL_SOCKET, SO_SNDBUF, &priv->windowsize,
+ sizeof(priv->windowsize)) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setting send window size "
+ "failed: %d: %d: %s",
+ priv->sock, priv->windowsize, strerror(errno));
+ }
}
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->sock != -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "already listening");
- goto unlock;
+ /* Make sure we are not vulnerable to someone setting
+ * net.ipv6.bindv6only to 1 so that gluster services are
+ * available over IPv4 & IPv6.
+ */
+#ifdef IPV6_DEFAULT
+ int disable_v6only = 0;
+ if (setsockopt(priv->sock, IPPROTO_IPV6, IPV6_V6ONLY,
+ (void *)&disable_v6only, sizeof(disable_v6only)) < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Error disabling sockopt IPV6_V6ONLY: \"%s\"",
+ strerror(errno));
+ }
+#endif
+
+ if (sa_family != AF_UNIX) {
+ if (priv->nodelay) {
+ ret = __socket_nodelay(priv->sock);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "NODELAY on %d failed (%s)", priv->sock,
+ strerror(errno));
}
+ }
- memcpy (&myinfo->sockaddr, &sockaddr, sockaddr_len);
- myinfo->sockaddr_len = sockaddr_len;
+ if (priv->keepalive) {
+ ret = __socket_keepalive(
+ priv->sock, sa_family, priv->keepaliveintvl,
+ priv->keepaliveidle, priv->keepalivecnt, priv->timeout);
+ if (ret != 0)
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to set keep-alive: %s", strerror(errno));
+ }
+ }
- priv->sock = socket (sa_family, SOCK_STREAM, 0);
+ SA(&this->myinfo.sockaddr)->sa_family = SA(&this->peerinfo.sockaddr)
+ ->sa_family;
- if (priv->sock == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "socket creation failed (%s)",
- strerror (errno));
- goto unlock;
- }
+ /* If a source addr is explicitly specified, use it */
+ ret = dict_get_str_sizen(this->options, "transport.socket.source-addr",
+ &local_addr);
+ if (!ret && SA(&this->myinfo.sockaddr)->sa_family == AF_INET) {
+ addr = (struct sockaddr_in *)(&this->myinfo.sockaddr);
+ ret = inet_pton(AF_INET, local_addr, &(addr->sin_addr.s_addr));
+ }
- /* Cant help if setting socket options fails. We can continue
- * working nonetheless.
- */
- if (priv->windowsize != 0) {
- if (setsockopt (priv->sock, SOL_SOCKET, SO_RCVBUF,
- &priv->windowsize,
- sizeof (priv->windowsize)) < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting receive window size "
- "failed: %d: %d: %s", priv->sock,
- priv->windowsize,
- strerror (errno));
- }
+ /* If client wants ENOENT to be ignored */
+ ign_enoent = dict_get_str_boolean(
+ this->options, "transport.socket.ignore-enoent", _gf_false);
- if (setsockopt (priv->sock, SOL_SOCKET, SO_SNDBUF,
- &priv->windowsize,
- sizeof (priv->windowsize)) < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "setting send window size failed:"
- " %d: %d: %s", priv->sock,
- priv->windowsize,
- strerror (errno));
- }
- }
+ ret = client_bind(this, SA(&this->myinfo.sockaddr),
+ &this->myinfo.sockaddr_len, priv->sock);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "client bind failed: %s",
+ strerror(errno));
+ goto handler;
+ }
- if (priv->nodelay) {
- ret = __socket_nodelay (priv->sock);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "setsockopt() failed for NODELAY (%s)",
- strerror (errno));
- }
- }
+ /* make socket non-blocking for all types of sockets */
+ if (!priv->bio) {
+ ret = __socket_nonblock(priv->sock);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "NBIO on %d failed (%s)",
+ priv->sock, strerror(errno));
+ goto handler;
+ } else {
+ gf_log(this->name, GF_LOG_TRACE,
+ ">>> connect() with non-blocking IO for ALL");
+ }
+ }
+ this->connect_failed = _gf_false;
+ priv->connect_failed = 0;
+ priv->connected = 0;
- if (!priv->bio) {
- ret = __socket_nonblock (priv->sock);
+ socket_dump_info(SA(&this->peerinfo.sockaddr), priv->is_server,
+ priv->use_ssl, priv->sock, this->name,
+ "connecting to");
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "NBIO on %d failed (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
- }
+ if (ign_enoent) {
+ ret = connect_loop(priv->sock, SA(&this->peerinfo.sockaddr),
+ this->peerinfo.sockaddr_len);
+ } else {
+ ret = connect(priv->sock, SA(&this->peerinfo.sockaddr),
+ this->peerinfo.sockaddr_len);
+ }
- ret = __socket_server_bind (this);
+ connect_attempted = _gf_true;
- if (ret == -1) {
- /* logged inside __socket_server_bind() */
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
+ if ((ret != 0) && (errno == ENOENT) && ign_enoent) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Ignore failed connection attempt on %s, (%s) ",
+ this->peerinfo.identifier, strerror(errno));
- if (priv->backlog)
- ret = listen (priv->sock, priv->backlog);
- else
- ret = listen (priv->sock, 10);
-
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set socket %d to listen mode (%s)",
- priv->sock, strerror (errno));
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
+ /* connect failed with some other error than EINPROGRESS
+ so, getsockopt (... SO_ERROR ...), will not catch any
+ errors and return them to us, we need to remember this
+ state, and take actions in socket_event_handler
+ appropriately */
+ /* TBD: What about ENOENT, we will do getsockopt there
+ as well, so how is that exempt from such a problem? */
+ priv->connect_failed = 1;
+ this->connect_failed = _gf_true;
- rpc_transport_ref (this);
+ goto handler;
+ }
- priv->idx = event_register (ctx->event_pool, priv->sock,
- socket_server_event_handler,
- this, 1, 0);
+ if ((ret != 0) && (errno != EINPROGRESS) && (errno != ENOENT)) {
+ /* For unix path based sockets, the socket path is
+ * cryptic (md5sum of path) and may not be useful for
+ * the user in debugging so log it in DEBUG
+ */
+ gf_log(this->name,
+ ((sa_family == AF_UNIX) ? GF_LOG_DEBUG : GF_LOG_ERROR),
+ "connection attempt on %s failed, (%s)",
+ this->peerinfo.identifier, strerror(errno));
+
+ /* connect failed with some other error than EINPROGRESS
+ so, getsockopt (... SO_ERROR ...), will not catch any
+ errors and return them to us, we need to remember this
+ state, and take actions in socket_event_handler
+ appropriately */
+ /* TBD: What about ENOENT, we will do getsockopt there
+ as well, so how is that exempt from such a problem? */
+ priv->connect_failed = 1;
+
+ goto handler;
+ } else {
+ /* reset connect_failed so that any previous attempts
+ state is not carried forward */
+ priv->connect_failed = 0;
+ ret = 0;
+ }
- if (priv->idx == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "could not register socket %d with events",
- priv->sock);
- ret = -1;
- close (priv->sock);
- priv->sock = -1;
- goto unlock;
- }
+ handler:
+ if (ret < 0 && !connect_attempted) {
+ /* Ignore error from connect. epoll events
+ should be handled in the socket handler. shutdown(2)
+ will result in EPOLLERR, so cleanup is done in
+ socket_event_handler or socket_poller */
+ shutdown(priv->sock, SHUT_RDWR);
+ ret = 0;
+ gf_log(this->name, GF_LOG_INFO,
+ "intentional client shutdown(%d, SHUT_RDWR)", priv->sock);
}
-unlock:
- pthread_mutex_unlock (&priv->lock);
-out:
- return ret;
-}
+ priv->connected = 0;
+ priv->is_server = _gf_false;
+ rpc_transport_ref(this);
+ refd = _gf_true;
+
+ this->listener = this;
+ priv->idx = gf_event_register(ctx->event_pool, priv->sock,
+ socket_event_handler, this, 1, 1,
+ this->notify_poller_death);
+ if (priv->idx == -1) {
+ gf_log("", GF_LOG_WARNING,
+ "failed to register the event; "
+ "closing socket %d",
+ priv->sock);
+ sys_close(priv->sock);
+ priv->sock = -1;
+ ret = -1;
+ }
+ unlock:
+ sock = priv->sock;
+ }
+ pthread_mutex_unlock(&priv->out_lock);
-int32_t
-socket_submit_request (rpc_transport_t *this, rpc_transport_req_t *req)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
- char need_poll_out = 0;
- char need_append = 1;
- struct ioq *entry = NULL;
- glusterfs_ctx_t *ctx = NULL;
+err:
+ /* if sock >= 0, then cleanup is done from the event handler */
+ if ((ret < 0) && (sock < 0)) {
+ /* Cleaup requires to send notification to upper layer which
+ intern holds the big_lock. There can be dead-lock situation
+ if big_lock is already held by the current thread.
+ So transfer the ownership to separate thread for cleanup.
+ */
+ arg = GF_CALLOC(1, sizeof(*arg), gf_sock_connect_error_state_t);
+ arg->this = THIS;
+ arg->trans = this;
+ arg->refd = refd;
+ th_ret = gf_thread_create_detached(&th_id, socket_connect_error_cbk,
+ arg, "scleanup");
+ if (th_ret) {
+ /* Error will be logged by gf_thread_create_attached */
+ gf_log(this->name, GF_LOG_ERROR,
+ "Thread creation "
+ "failed");
+ GF_FREE(arg);
+ GF_ASSERT(0);
+ }
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ ret = 0;
+ }
- priv = this->private;
- ctx = this->ctx;
+ return ret;
+}
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->connected != 1) {
- if (!priv->submit_log && !priv->connect_finish_log) {
- gf_log (this->name, GF_LOG_INFO,
- "not connected (priv->connected = %d)",
- priv->connected);
- priv->submit_log = 1;
- }
- goto unlock;
- }
+static int
+socket_listen(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ int ret = -1;
+ int sock = -1;
+ struct sockaddr_storage sockaddr;
+ socklen_t sockaddr_len = 0;
+ peer_info_t *myinfo = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ sa_family_t sa_family = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", this->private, out);
+
+ priv = this->private;
+ myinfo = &this->myinfo;
+ ctx = this->ctx;
+
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ sock = priv->sock;
+ }
+ pthread_mutex_unlock(&priv->out_lock);
- priv->submit_log = 0;
- entry = __socket_ioq_new (this, &req->msg);
- if (!entry)
- goto unlock;
+ if (sock >= 0) {
+ gf_log_callingfn(this->name, GF_LOG_DEBUG, "already listening");
+ return ret;
+ }
- if (list_empty (&priv->ioq)) {
- ret = __socket_ioq_churn_entry (this, entry);
+ ret = socket_server_get_local_sockaddr(this, SA(&sockaddr), &sockaddr_len,
+ &sa_family);
+ if (ret < 0) {
+ return ret;
+ }
- if (ret == 0)
- need_append = 0;
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ if (priv->sock >= 0) {
+ gf_log(this->name, GF_LOG_DEBUG, "already listening");
+ goto unlock;
+ }
- if (ret > 0)
- need_poll_out = 1;
- }
+ memcpy(&myinfo->sockaddr, &sockaddr, sockaddr_len);
+ myinfo->sockaddr_len = sockaddr_len;
- if (need_append) {
- list_add_tail (&entry->list, &priv->ioq);
- ret = 0;
- }
+ priv->sock = sys_socket(sa_family, SOCK_STREAM, 0);
- if (need_poll_out) {
- /* first entry to wait. continue writing on POLLOUT */
- priv->idx = event_select_on (ctx->event_pool,
- priv->sock,
- priv->idx, -1, 1);
- }
+ if (priv->sock < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "socket creation failed (%s)",
+ strerror(errno));
+ goto unlock;
}
-unlock:
- pthread_mutex_unlock (&priv->lock);
-
-out:
- return ret;
-}
+ /* Can't help if setting socket options fails. We can continue
+ * working nonetheless.
+ */
+ if (priv->windowsize != 0) {
+ if (setsockopt(priv->sock, SOL_SOCKET, SO_RCVBUF, &priv->windowsize,
+ sizeof(priv->windowsize)) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setting receive window size "
+ "failed: %d: %d: %s",
+ priv->sock, priv->windowsize, strerror(errno));
+ }
+
+ if (setsockopt(priv->sock, SOL_SOCKET, SO_SNDBUF, &priv->windowsize,
+ sizeof(priv->windowsize)) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setting send window size failed:"
+ " %d: %d: %s",
+ priv->sock, priv->windowsize, strerror(errno));
+ }
+ }
-int32_t
-socket_submit_reply (rpc_transport_t *this, rpc_transport_reply_t *reply)
-{
- socket_private_t *priv = NULL;
- int ret = -1;
- char need_poll_out = 0;
- char need_append = 1;
- struct ioq *entry = NULL;
- glusterfs_ctx_t *ctx = NULL;
+ if (priv->nodelay && (sa_family != AF_UNIX)) {
+ ret = __socket_nodelay(priv->sock);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "setsockopt() failed for NODELAY (%s)", strerror(errno));
+ }
+ }
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ if (!priv->bio) {
+ ret = __socket_nonblock(priv->sock);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "NBIO on socket %d failed "
+ "(errno:%s); closing socket",
+ priv->sock, strerror(errno));
+ sys_close(priv->sock);
+ priv->sock = -1;
+ goto unlock;
+ }
+ }
- priv = this->private;
- ctx = this->ctx;
+ /* coverity[SLEEP] */
+ ret = __socket_server_bind(this);
+
+ if (ret < 0) {
+ /* logged inside __socket_server_bind() */
+ gf_log(this->name, GF_LOG_ERROR,
+ "__socket_server_bind failed;"
+ "closing socket %d",
+ priv->sock);
+ sys_close(priv->sock);
+ priv->sock = -1;
+ goto unlock;
+ }
- pthread_mutex_lock (&priv->lock);
- {
- if (priv->connected != 1) {
- if (!priv->submit_log && !priv->connect_finish_log) {
- gf_log (this->name, GF_LOG_INFO,
- "not connected (priv->connected = %d)",
- priv->connected);
- priv->submit_log = 1;
- }
- goto unlock;
- }
- priv->submit_log = 0;
- entry = __socket_ioq_new (this, &reply->msg);
- if (!entry)
- goto unlock;
- if (list_empty (&priv->ioq)) {
- ret = __socket_ioq_churn_entry (this, entry);
-
- if (ret == 0)
- need_append = 0;
-
- if (ret > 0)
- need_poll_out = 1;
- }
+ socket_dump_info(SA(&this->myinfo.sockaddr), priv->is_server,
+ priv->use_ssl, priv->sock, this->name, "listening on");
- if (need_append) {
- list_add_tail (&entry->list, &priv->ioq);
- ret = 0;
- }
+ ret = listen(priv->sock, priv->backlog);
- if (need_poll_out) {
- /* first entry to wait. continue writing on POLLOUT */
- priv->idx = event_select_on (ctx->event_pool,
- priv->sock,
- priv->idx, -1, 1);
- }
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "could not set socket %d to listen mode (errno:%s); "
+ "closing socket",
+ priv->sock, strerror(errno));
+ sys_close(priv->sock);
+ priv->sock = -1;
+ goto unlock;
}
+ rpc_transport_ref(this);
+
+ priv->idx = gf_event_register(ctx->event_pool, priv->sock,
+ socket_server_event_handler, this, 1, 0,
+ this->notify_poller_death);
+
+ if (priv->idx == -1) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "could not register socket %d with events; "
+ "closing socket",
+ priv->sock);
+ ret = -1;
+ sys_close(priv->sock);
+ priv->sock = -1;
+ goto unlock;
+ }
+ }
unlock:
- pthread_mutex_unlock (&priv->lock);
+ pthread_mutex_unlock(&priv->out_lock);
out:
- return ret;
+ return ret;
}
-
-int32_t
-socket_getpeername (rpc_transport_t *this, char *hostname, int hostlen)
+static int32_t
+socket_submit_outgoing_msg(rpc_transport_t *this, rpc_transport_msg_t *msg)
{
- int32_t ret = -1;
+ int ret = -1;
+ char need_poll_out = 0;
+ char need_append = 1;
+ struct ioq *entry = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ socket_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", this->private, out);
+
+ priv = this->private;
+ ctx = this->ctx;
+
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ if (priv->connected != 1) {
+ if (!priv->submit_log && !priv->connect_finish_log) {
+ gf_log(this->name, GF_LOG_INFO,
+ "not connected (priv->connected = %d)", priv->connected);
+ priv->submit_log = 1;
+ }
+ goto unlock;
+ }
+
+ priv->submit_log = 0;
+ entry = __socket_ioq_new(this, msg);
+ if (!entry)
+ goto unlock;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", hostname, out);
+ if (list_empty(&priv->ioq)) {
+ ret = __socket_ioq_churn_entry(this, entry);
- if (hostlen < (strlen (this->peerinfo.identifier) + 1)) {
- goto out;
+ if (ret == 0) {
+ need_append = 0;
+ }
+ if (ret > 0) {
+ need_poll_out = 1;
+ }
}
- strcpy (hostname, this->peerinfo.identifier);
- ret = 0;
+ if (need_append) {
+ list_add_tail(&entry->list, &priv->ioq);
+ ret = 0;
+ }
+ if (need_poll_out) {
+ /* first entry to wait. continue writing on POLLOUT */
+ priv->idx = gf_event_select_on(ctx->event_pool, priv->sock,
+ priv->idx, -1, 1);
+ }
+ }
+unlock:
+ pthread_mutex_unlock(&priv->out_lock);
+
out:
- return ret;
+ return ret;
}
+static int32_t
+socket_submit_request(rpc_transport_t *this, rpc_transport_req_t *req)
+{
+ return socket_submit_outgoing_msg(this, &req->msg);
+}
-int32_t
-socket_getpeeraddr (rpc_transport_t *this, char *peeraddr, int addrlen,
- struct sockaddr_storage *sa, socklen_t salen)
+static int32_t
+socket_submit_reply(rpc_transport_t *this, rpc_transport_reply_t *reply)
{
- int32_t ret = -1;
+ return socket_submit_outgoing_msg(this, &reply->msg);
+}
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", sa, out);
+static int32_t
+socket_getpeername(rpc_transport_t *this, char *hostname, int hostlen)
+{
+ int32_t ret = -1;
- *sa = this->peerinfo.sockaddr;
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", hostname, out);
- if (peeraddr != NULL) {
- ret = socket_getpeername (this, peeraddr, addrlen);
- }
- ret = 0;
+ if (hostlen < (strlen(this->peerinfo.identifier) + 1)) {
+ goto out;
+ }
+ strcpy(hostname, this->peerinfo.identifier);
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
-int32_t
-socket_getmyname (rpc_transport_t *this, char *hostname, int hostlen)
+static int32_t
+socket_getpeeraddr(rpc_transport_t *this, char *peeraddr, int addrlen,
+ struct sockaddr_storage *sa, socklen_t salen)
{
- int32_t ret = -1;
+ int32_t ret = -1;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", hostname, out);
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", sa, out);
+ ret = 0;
- if (hostlen < (strlen (this->myinfo.identifier) + 1)) {
- goto out;
- }
+ *sa = this->peerinfo.sockaddr;
- strcpy (hostname, this->myinfo.identifier);
- ret = 0;
+ if (peeraddr != NULL) {
+ ret = socket_getpeername(this, peeraddr, addrlen);
+ }
out:
- return ret;
+ return ret;
}
+static int32_t
+socket_getmyname(rpc_transport_t *this, char *hostname, int hostlen)
+{
+ int32_t ret = -1;
-int32_t
-socket_getmyaddr (rpc_transport_t *this, char *myaddr, int addrlen,
- struct sockaddr_storage *sa, socklen_t salen)
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", hostname, out);
+
+ if (hostlen < (strlen(this->myinfo.identifier) + 1)) {
+ goto out;
+ }
+
+ strcpy(hostname, this->myinfo.identifier);
+ ret = 0;
+out:
+ return ret;
+}
+
+static int32_t
+socket_getmyaddr(rpc_transport_t *this, char *myaddr, int addrlen,
+ struct sockaddr_storage *sa, socklen_t salen)
{
- int32_t ret = 0;
+ int32_t ret = 0;
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", sa, out);
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", sa, out);
- *sa = this->myinfo.sockaddr;
+ *sa = this->myinfo.sockaddr;
- if (myaddr != NULL) {
- ret = socket_getmyname (this, myaddr, addrlen);
- }
+ if (myaddr != NULL) {
+ ret = socket_getmyname(this, myaddr, addrlen);
+ }
out:
- return ret;
+ return ret;
}
+static int
+socket_throttle(rpc_transport_t *this, gf_boolean_t onoff)
+{
+ socket_private_t *priv = NULL;
+
+ priv = this->private;
+
+ /* The way we implement throttling is by taking off
+ POLLIN event from the polled flags. This way we
+ never get called with the POLLIN event and therefore
+ will never read() any more data until throttling
+ is turned off.
+ */
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ /* Throttling is useless on a disconnected transport. In fact,
+ * it's dangerous since priv->idx and priv->sock are set to -1
+ * on a disconnected transport, which breaks epoll's event to
+ * registered fd mapping. */
+
+ if (priv->connected == 1)
+ priv->idx = gf_event_select_on(this->ctx->event_pool, priv->sock,
+ priv->idx, (int)!onoff, -1);
+ }
+ pthread_mutex_unlock(&priv->out_lock);
+ return 0;
+}
struct rpc_transport_ops tops = {
- .listen = socket_listen,
- .connect = socket_connect,
- .disconnect = socket_disconnect,
- .submit_request = socket_submit_request,
- .submit_reply = socket_submit_reply,
- .get_peername = socket_getpeername,
- .get_peeraddr = socket_getpeeraddr,
- .get_myname = socket_getmyname,
- .get_myaddr = socket_getmyaddr,
+ .listen = socket_listen,
+ .connect = socket_connect,
+ .disconnect = socket_disconnect,
+ .submit_request = socket_submit_request,
+ .submit_reply = socket_submit_reply,
+ .get_peername = socket_getpeername,
+ .get_peeraddr = socket_getpeeraddr,
+ .get_myname = socket_getmyname,
+ .get_myaddr = socket_getmyaddr,
+ .throttle = socket_throttle,
};
int
-reconfigure (rpc_transport_t *this, dict_t *options)
+reconfigure(rpc_transport_t *this, dict_t *options)
{
- socket_private_t *priv = NULL;
- gf_boolean_t tmp_bool = _gf_false;
- char *optstr = NULL;
- int ret = 0;
- uint64_t windowsize = 0;
+ socket_private_t *priv = NULL;
+ gf_boolean_t tmp_bool = _gf_false;
+ char *optstr = NULL;
+ int ret = -1;
+ uint32_t backlog = 0;
+ uint64_t windowsize = 0;
+ data_t *data;
+
+ GF_VALIDATE_OR_GOTO("socket", this, out);
+ GF_VALIDATE_OR_GOTO("socket", this->private, out);
+
+ priv = this->private;
+
+ if (dict_get_str_sizen(options, "transport.socket.keepalive", &optstr) ==
+ 0) {
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'transport.socket.keepalive' takes only "
+ "boolean options, not taking any action");
+ priv->keepalive = 1;
+ goto out;
+ }
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.socket.keepalive");
- GF_VALIDATE_OR_GOTO ("socket", this, out);
- GF_VALIDATE_OR_GOTO ("socket", this->private, out);
+ priv->keepalive = tmp_bool;
+ } else
+ priv->keepalive = 1;
- if (!this || !this->private) {
- ret =-1;
- goto out;
+ if (dict_get_int32_sizen(options, "transport.tcp-user-timeout",
+ &(priv->timeout)) != 0)
+ priv->timeout = GF_NETWORK_TIMEOUT;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.tcp-user-timeout=%d", priv->timeout);
+
+ if (dict_get_uint32(options, "transport.listen-backlog", &backlog) == 0) {
+ priv->backlog = backlog;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.listen-backlog=%d", priv->backlog);
+ }
+
+ if (priv->keepalive) {
+ if (dict_get_int32_sizen(options, "transport.socket.keepalive-time",
+ &(priv->keepaliveidle)) != 0)
+ priv->keepaliveidle = GF_KEEPALIVE_TIME;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.socket.keepalive-time=%d",
+ priv->keepaliveidle);
+
+ if (dict_get_int32_sizen(options, "transport.socket.keepalive-interval",
+ &(priv->keepaliveintvl)) != 0)
+ priv->keepaliveintvl = GF_KEEPALIVE_INTERVAL;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.socket.keepalive-interval=%d",
+ priv->keepaliveintvl);
+
+ if (dict_get_int32_sizen(options, "transport.socket.keepalive-count",
+ &(priv->keepalivecnt)) != 0)
+ priv->keepalivecnt = GF_KEEPALIVE_COUNT;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.socket.keepalive-count=%d",
+ priv->keepalivecnt);
+ }
+
+ optstr = NULL;
+ if (dict_get_str_sizen(options, "tcp-window-size", &optstr) == 0) {
+ if (gf_string2uint64(optstr, &windowsize) != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "invalid number format: %s",
+ optstr);
+ goto out;
}
+ }
- priv = this->private;
+ priv->windowsize = (int)windowsize;
- if (dict_get_str (this->options, "transport.socket.keepalive",
- &optstr) == 0) {
- if (gf_string2boolean (optstr, &tmp_bool) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'transport.socket.keepalive' takes only "
- "boolean options, not taking any action");
- priv->keepalive = 1;
- ret = -1;
- goto out;
- }
- gf_log (this->name, GF_LOG_DEBUG, "Reconfigured transport.socket.keepalive");
+ data = dict_get_sizen(options, "non-blocking-io");
+ if (data) {
+ optstr = data_to_str(data);
- priv->keepalive = tmp_bool;
- }
- else
- priv->keepalive = 1;
-
- optstr = NULL;
- if (dict_get_str (this->options, "tcp-window-size",
- &optstr) == 0) {
- if (gf_string2bytesize (optstr, &windowsize) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format: %s", optstr);
- goto out;
- }
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'non-blocking-io' takes only boolean options,"
+ " not taking any action");
+ tmp_bool = 1;
}
- priv->windowsize = (int)windowsize;
+ if (!tmp_bool) {
+ priv->bio = 1;
+ gf_log(this->name, GF_LOG_WARNING, "disabling non-blocking IO");
+ }
+ }
+
+ if (!priv->bio) {
+ ret = __socket_nonblock(priv->sock);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING, "NBIO on %d failed (%s)",
+ priv->sock, strerror(errno));
+ goto out;
+ }
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
+}
+#if OPENSSL_VERSION_NUMBER < 0x1010000f
+static pthread_mutex_t *lock_array = NULL;
+
+static void
+locking_func(int mode, int type, const char *file, int line)
+{
+ if (mode & CRYPTO_UNLOCK) {
+ pthread_mutex_unlock(&lock_array[type]);
+ } else {
+ pthread_mutex_lock(&lock_array[type]);
+ }
}
-int
-socket_init (rpc_transport_t *this)
+#if OPENSSL_VERSION_NUMBER >= 0x1000000f
+static void
+threadid_func(CRYPTO_THREADID *id)
+{
+ /*
+ * We're not supposed to know whether a pthread_t is a number or a
+ * pointer, but we definitely need an unsigned long. Even though it
+ * happens to be an unsigned long already on Linux, do the cast just in
+ * case that's not so on another platform. Note that this can still
+ * break if any platforms are left where a pointer is larger than an
+ * unsigned long. In that case there's not much we can do; hopefully
+ * anyone porting to such a platform will be aware enough to notice the
+ * compile warnings about truncating the pointer value.
+ */
+ CRYPTO_THREADID_set_numeric(id, (unsigned long)pthread_self());
+}
+#else /* older openssl */
+static unsigned long
+legacy_threadid_func(void)
{
- socket_private_t *priv = NULL;
- gf_boolean_t tmp_bool = 0;
- uint64_t windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE;
- char *optstr = NULL;
- uint32_t keepalive = 0;
- uint32_t backlog = 0;
+ /* See comments above, it applies here too. */
+ return (unsigned long)pthread_self();
+}
+#endif /* OPENSSL_VERSION_NUMBER >= 0x1000000f */
+#endif /* OPENSSL_VERSION_NUMBER < 0x1010000f */
+
+static void
+init_openssl_mt(void)
+{
+ static gf_boolean_t initialized = _gf_false;
+
+ if (initialized) {
+ /* this only needs to be initialized once GLOBALLY no
+ matter how many translators/sockets we end up with. */
+ return;
+ }
+
+ SSL_library_init();
+ SSL_load_error_strings();
+
+ initialized = _gf_true;
- if (this->private) {
- gf_log_callingfn (this->name, GF_LOG_ERROR,
- "double init attempted");
- return -1;
+#if OPENSSL_VERSION_NUMBER < 0x1010000f
+ int num_locks = CRYPTO_num_locks();
+ int i;
+
+ lock_array = GF_CALLOC(num_locks, sizeof(pthread_mutex_t),
+ gf_sock_mt_lock_array);
+ if (lock_array) {
+ for (i = 0; i < num_locks; ++i) {
+ pthread_mutex_init(&lock_array[i], NULL);
}
+#if OPENSSL_VERSION_NUMBER >= 0x1000000f
+ CRYPTO_THREADID_set_callback(threadid_func);
+#else /* older openssl */
+ CRYPTO_set_id_callback(legacy_threadid_func);
+#endif
+ CRYPTO_set_locking_callback(locking_func);
+ }
+#endif
+}
+
+static void __attribute__((destructor)) fini_openssl_mt(void)
+{
+#if OPENSSL_VERSION_NUMBER < 0x1010000f
+ int i;
+
+ if (!lock_array) {
+ return;
+ }
+
+ CRYPTO_set_locking_callback(NULL);
+#if OPENSSL_VERSION_NUMBER >= 0x1000000f
+ CRYPTO_THREADID_set_callback(NULL);
+#else /* older openssl */
+ CRYPTO_set_id_callback(NULL);
+#endif
+
+ for (i = 0; i < CRYPTO_num_locks(); ++i) {
+ pthread_mutex_destroy(&lock_array[i]);
+ }
+
+ GF_FREE(lock_array);
+ lock_array = NULL;
+#endif
+
+ ERR_free_strings();
+}
- priv = GF_CALLOC (1, sizeof (*priv), gf_common_mt_socket_private_t);
- if (!priv) {
- return -1;
+/* The function returns 0 if AES bit is enabled on the CPU */
+static int
+ssl_check_aes_bit(void)
+{
+ FILE *fp = fopen("/proc/cpuinfo", "r");
+ int ret = 1;
+ size_t len = 0;
+ char *line = NULL;
+ char *match = NULL;
+
+ GF_ASSERT(fp != NULL);
+
+ while (getline(&line, &len, fp) > 0) {
+ if (!strncmp(line, "flags", 5)) {
+ match = strstr(line, " aes");
+ if ((match != NULL) && ((match[4] == ' ') || (match[4] == 0))) {
+ ret = 0;
+ break;
+ }
}
+ }
- pthread_mutex_init (&priv->lock, NULL);
+ free(line);
+ fclose(fp);
- priv->sock = -1;
- priv->idx = -1;
- priv->connected = -1;
- priv->nodelay = 1;
- priv->bio = 0;
- priv->windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE;
- INIT_LIST_HEAD (&priv->ioq);
+ return ret;
+}
- /* All the below section needs 'this->options' to be present */
- if (!this->options)
- goto out;
+static int
+ssl_setup_connection_params(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ char *optstr = NULL;
+ static int session_id = 1;
+ int32_t cert_depth = DEFAULT_VERIFY_DEPTH;
+ char *cipher_list = DEFAULT_CIPHER_LIST;
+ char *dh_param = DEFAULT_DH_PARAM;
+ char *ec_curve = DEFAULT_EC_CURVE;
+ gf_boolean_t dh_flag = _gf_false;
+
+ priv = this->private;
+
+ if (priv->ssl_ctx != NULL) {
+ gf_log(this->name, GF_LOG_TRACE, "found old SSL context!");
+ return 0;
+ }
- if (dict_get (this->options, "non-blocking-io")) {
- optstr = data_to_str (dict_get (this->options,
- "non-blocking-io"));
+ if (!priv->ssl_enabled && !priv->mgmt_ssl) {
+ return 0;
+ }
+
+ if (!ssl_check_aes_bit()) {
+ cipher_list = "AES128:" DEFAULT_CIPHER_LIST;
+ }
+
+ priv->ssl_own_cert = DEFAULT_CERT_PATH;
+ if (dict_get_str_sizen(this->options, SSL_OWN_CERT_OPT, &optstr) == 0) {
+ if (!priv->ssl_enabled) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s specified without %s (ignored)", SSL_OWN_CERT_OPT,
+ SSL_ENABLED_OPT);
+ }
+ priv->ssl_own_cert = optstr;
+ }
+ priv->ssl_own_cert = gf_strdup(priv->ssl_own_cert);
+
+ priv->ssl_private_key = DEFAULT_KEY_PATH;
+ if (dict_get_str_sizen(this->options, SSL_PRIVATE_KEY_OPT, &optstr) == 0) {
+ if (!priv->ssl_enabled) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s specified without %s (ignored)", SSL_PRIVATE_KEY_OPT,
+ SSL_ENABLED_OPT);
+ }
+ priv->ssl_private_key = optstr;
+ }
+ priv->ssl_private_key = gf_strdup(priv->ssl_private_key);
+
+ priv->ssl_ca_list = DEFAULT_CA_PATH;
+ if (dict_get_str_sizen(this->options, SSL_CA_LIST_OPT, &optstr) == 0) {
+ if (!priv->ssl_enabled) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s specified without %s (ignored)", SSL_CA_LIST_OPT,
+ SSL_ENABLED_OPT);
+ }
+ priv->ssl_ca_list = optstr;
+ }
+ priv->ssl_ca_list = gf_strdup(priv->ssl_ca_list);
+
+ optstr = NULL;
+ if (dict_get_str_sizen(this->options, SSL_CRL_PATH_OPT, &optstr) == 0) {
+ if (!priv->ssl_enabled) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s specified without %s (ignored)", SSL_CRL_PATH_OPT,
+ SSL_ENABLED_OPT);
+ }
+ if (strcasecmp(optstr, "NULL") == 0)
+ priv->crl_path = NULL;
+ else
+ priv->crl_path = gf_strdup(optstr);
+ }
- if (gf_string2boolean (optstr, &tmp_bool) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'non-blocking-io' takes only boolean options,"
- " not taking any action");
- tmp_bool = 1;
- }
+ if (!priv->mgmt_ssl) {
+ if (!dict_get_int32_sizen(this->options, SSL_CERT_DEPTH_OPT,
+ &cert_depth)) {
+ }
+ } else {
+ cert_depth = this->ctx->ssl_cert_depth;
+ }
+ gf_log(this->name, priv->ssl_enabled ? GF_LOG_INFO : GF_LOG_DEBUG,
+ "SSL support for MGMT is %s IO path is %s certificate depth is %d "
+ "for peer %s",
+ (priv->mgmt_ssl ? "ENABLED" : "NOT enabled"),
+ (priv->ssl_enabled ? "ENABLED" : "NOT enabled"), cert_depth,
+ this->peerinfo.identifier);
+
+ if (!dict_get_str_sizen(this->options, SSL_CIPHER_LIST_OPT, &cipher_list)) {
+ gf_log(this->name, GF_LOG_INFO, "using cipher list %s", cipher_list);
+ }
+ if (!dict_get_str_sizen(this->options, SSL_DH_PARAM_OPT, &dh_param)) {
+ dh_flag = _gf_true;
+ gf_log(this->name, GF_LOG_INFO, "using DH parameters %s", dh_param);
+ }
+ if (!dict_get_str_sizen(this->options, SSL_EC_CURVE_OPT, &ec_curve)) {
+ gf_log(this->name, GF_LOG_INFO, "using EC curve %s", ec_curve);
+ }
+
+ if (priv->ssl_enabled || priv->mgmt_ssl) {
+ BIO *bio = NULL;
+
+#if HAVE_TLS_METHOD
+ priv->ssl_meth = (SSL_METHOD *)TLS_method();
+#elif HAVE_TLSV1_2_METHOD
+ priv->ssl_meth = (SSL_METHOD *)TLSv1_2_method();
+#else
+/*
+ * Nobody should use an OpenSSL so old it does not support TLS 1.2.
+ * If that is really required, build with -DUSE_INSECURE_OPENSSL
+ */
+#ifndef USE_INSECURE_OPENSSL
+#error Old and insecure OpenSSL, use -DUSE_INSECURE_OPENSSL to use it anyway
+#endif
+ /* SSLv23_method uses highest available protocol */
+ priv->ssl_meth = SSLv23_method();
+#endif
+ priv->ssl_ctx = SSL_CTX_new(priv->ssl_meth);
- if (!tmp_bool) {
- priv->bio = 1;
- gf_log (this->name, GF_LOG_WARNING,
- "disabling non-blocking IO");
- }
+ SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_NO_SSLv2);
+ SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_NO_SSLv3);
+#ifdef SSL_OP_NO_TICKET
+ SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_NO_TICKET);
+#endif
+#ifdef SSL_OP_NO_COMPRESSION
+ SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_NO_COMPRESSION);
+#endif
+ /* Upload file to bio wrapper only if dh param is configured
+ */
+ if (dh_flag) {
+ if ((bio = BIO_new_file(dh_param, "r")) == NULL) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to open %s, "
+ "DH ciphers are disabled",
+ dh_param);
+ }
}
- optstr = NULL;
+ if (bio != NULL) {
+#ifdef HAVE_OPENSSL_DH_H
+ DH *dh;
+ unsigned long err;
+
+ dh = PEM_read_bio_DHparams(bio, NULL, NULL, NULL);
+ BIO_free(bio);
+ if (dh != NULL) {
+ SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_SINGLE_DH_USE);
+ SSL_CTX_set_tmp_dh(priv->ssl_ctx, dh);
+ DH_free(dh);
+ } else {
+ err = ERR_get_error();
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to read DH param from %s: %s "
+ "DH ciphers are disabled.",
+ dh_param, ERR_error_string(err, NULL));
+ }
+#else /* HAVE_OPENSSL_DH_H */
+ BIO_free(bio);
+ gf_log(this->name, GF_LOG_ERROR, "OpenSSL has no DH support");
+#endif /* HAVE_OPENSSL_DH_H */
+ }
- // By default, we enable NODELAY
- if (dict_get (this->options, "transport.socket.nodelay")) {
- optstr = data_to_str (dict_get (this->options,
- "transport.socket.nodelay"));
+ if (ec_curve != NULL) {
+#ifdef HAVE_OPENSSL_ECDH_H
+ EC_KEY *ecdh = NULL;
+ int nid;
+ unsigned long err;
+
+ nid = OBJ_sn2nid(ec_curve);
+ if (nid != 0)
+ ecdh = EC_KEY_new_by_curve_name(nid);
+
+ if (ecdh != NULL) {
+ SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_SINGLE_ECDH_USE);
+ SSL_CTX_set_tmp_ecdh(priv->ssl_ctx, ecdh);
+ EC_KEY_free(ecdh);
+ } else {
+ err = ERR_get_error();
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to load EC curve %s: %s. "
+ "ECDH ciphers are disabled.",
+ ec_curve, ERR_error_string(err, NULL));
+ }
+#else /* HAVE_OPENSSL_ECDH_H */
+ gf_log(this->name, GF_LOG_ERROR, "OpenSSL has no ECDH support");
+#endif /* HAVE_OPENSSL_ECDH_H */
+ }
- if (gf_string2boolean (optstr, &tmp_bool) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'transport.socket.nodelay' takes only "
- "boolean options, not taking any action");
- tmp_bool = 1;
- }
- if (!tmp_bool) {
- priv->nodelay = 0;
- gf_log (this->name, GF_LOG_DEBUG,
- "disabling nodelay");
- }
+ /* This must be done after DH and ECDH setups */
+ if (SSL_CTX_set_cipher_list(priv->ssl_ctx, cipher_list) == 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to find any valid ciphers");
+ goto err;
}
- optstr = NULL;
- if (dict_get_str (this->options, "tcp-window-size",
- &optstr) == 0) {
- if (gf_string2bytesize (optstr, &windowsize) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format: %s", optstr);
- return -1;
- }
+ SSL_CTX_set_options(priv->ssl_ctx, SSL_OP_CIPHER_SERVER_PREFERENCE);
+
+ if (!SSL_CTX_use_certificate_chain_file(priv->ssl_ctx,
+ priv->ssl_own_cert)) {
+ gf_log(this->name, GF_LOG_ERROR, "could not load our cert at %s",
+ priv->ssl_own_cert);
+ ssl_dump_error_stack(this->name);
+ goto err;
}
- priv->windowsize = (int)windowsize;
+ if (!SSL_CTX_use_PrivateKey_file(priv->ssl_ctx, priv->ssl_private_key,
+ SSL_FILETYPE_PEM)) {
+ gf_log(this->name, GF_LOG_ERROR, "could not load private key at %s",
+ priv->ssl_private_key);
+ ssl_dump_error_stack(this->name);
+ goto err;
+ }
- optstr = NULL;
- /* Enable Keep-alive by default. */
- priv->keepalive = 1;
- priv->keepaliveintvl = 2;
- priv->keepaliveidle = 20;
- if (dict_get_str (this->options, "transport.socket.keepalive",
- &optstr) == 0) {
- if (gf_string2boolean (optstr, &tmp_bool) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'transport.socket.keepalive' takes only "
- "boolean options, not taking any action");
- tmp_bool = 1;
- }
+ if (!SSL_CTX_load_verify_locations(priv->ssl_ctx, priv->ssl_ca_list,
+ priv->crl_path)) {
+ gf_log(this->name, GF_LOG_ERROR, "could not load CA list");
+ goto err;
+ }
+
+ SSL_CTX_set_verify_depth(priv->ssl_ctx, cert_depth);
+
+ if (priv->crl_path)
+ ssl_set_crl_verify_flags(priv->ssl_ctx);
+
+ priv->ssl_session_id = session_id++;
+ SSL_CTX_set_session_id_context(priv->ssl_ctx,
+ (void *)&priv->ssl_session_id,
+ sizeof(priv->ssl_session_id));
+
+ SSL_CTX_set_verify(priv->ssl_ctx, SSL_VERIFY_PEER, 0);
+
+ /*
+ * Since glusterfs shares the same settings for client-side
+ * and server-side of SSL, we need to ignore any certificate
+ * usage specification (SSL client vs SSL server), otherwise
+ * SSL connexions will fail with 'unsupported cerritifcate"
+ */
+ SSL_CTX_set_purpose(priv->ssl_ctx, X509_PURPOSE_ANY);
+ }
+ return 0;
+
+err:
+ return -1;
+}
- if (!tmp_bool)
- priv->keepalive = 0;
+static int
+socket_init(rpc_transport_t *this)
+{
+ socket_private_t *priv = NULL;
+ gf_boolean_t tmp_bool = 0;
+ uint64_t windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE;
+ char *optstr = NULL;
+ data_t *data;
+
+ if (this->private) {
+ gf_log_callingfn(this->name, GF_LOG_ERROR, "double init attempted");
+ return -1;
+ }
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_common_mt_socket_private_t);
+ if (!priv) {
+ return -1;
+ }
+
+ this->private = priv;
+ pthread_mutex_init(&priv->out_lock, NULL);
+ pthread_mutex_init(&priv->cond_lock, NULL);
+ pthread_cond_init(&priv->cond, NULL);
+
+ /*GF_REF_INIT (priv, socket_poller_mayday);*/
+
+ priv->sock = -1;
+ priv->idx = -1;
+ priv->connected = -1;
+ priv->nodelay = 1;
+ priv->bio = 0;
+ priv->ssl_accepted = _gf_false;
+ priv->ssl_connected = _gf_false;
+ priv->windowsize = GF_DEFAULT_SOCKET_WINDOW_SIZE;
+ INIT_LIST_HEAD(&priv->ioq);
+ pthread_mutex_init(&priv->notify.lock, NULL);
+ pthread_cond_init(&priv->notify.cond, NULL);
+
+ /* All the below section needs 'this->options' to be present */
+ if (!this->options)
+ goto out;
+
+ data = dict_get_sizen(this->options, "non-blocking-io");
+ if (data) {
+ optstr = data_to_str(data);
+
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'non-blocking-io' takes only boolean options,"
+ " not taking any action");
+ tmp_bool = 1;
}
- if (dict_get_uint32 (this->options,
- "transport.socket.keepalive-interval",
- &keepalive) == 0) {
- priv->keepaliveintvl = keepalive;
+ if (!tmp_bool) {
+ priv->bio = 1;
+ gf_log(this->name, GF_LOG_WARNING, "disabling non-blocking IO");
}
+ }
- if (dict_get_uint32 (this->options,
- "transport.socket.keepalive-time",
- &keepalive) == 0) {
- priv->keepaliveidle = keepalive;
+ optstr = NULL;
+
+ /* By default, we enable NODELAY */
+ data = dict_get_sizen(this->options, "transport.socket.nodelay");
+ if (data) {
+ optstr = data_to_str(data);
+
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'transport.socket.nodelay' takes only "
+ "boolean options, not taking any action");
+ tmp_bool = 1;
+ }
+ if (!tmp_bool) {
+ priv->nodelay = 0;
+ gf_log(this->name, GF_LOG_DEBUG, "disabling nodelay");
+ }
+ }
+
+ optstr = NULL;
+ if (dict_get_str_sizen(this->options, "tcp-window-size", &optstr) == 0) {
+ if (gf_string2uint64(optstr, &windowsize) != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "invalid number format: %s",
+ optstr);
+ return -1;
+ }
+ }
+
+ priv->windowsize = (int)windowsize;
+
+ optstr = NULL;
+ /* Enable Keep-alive by default. */
+ priv->keepalive = 1;
+ priv->keepaliveintvl = GF_KEEPALIVE_INTERVAL;
+ priv->keepaliveidle = GF_KEEPALIVE_TIME;
+ priv->keepalivecnt = GF_KEEPALIVE_COUNT;
+ if (dict_get_str_sizen(this->options, "transport.socket.keepalive",
+ &optstr) == 0) {
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'transport.socket.keepalive' takes only "
+ "boolean options, not taking any action");
+ tmp_bool = 1;
}
- if (dict_get_uint32 (this->options,
- "transport.socket.listen-backlog",
- &backlog) == 0) {
- priv->backlog = backlog;
+ if (!tmp_bool)
+ priv->keepalive = 0;
+ }
+
+ if (dict_get_int32_sizen(this->options, "transport.tcp-user-timeout",
+ &(priv->timeout)) != 0)
+ priv->timeout = GF_NETWORK_TIMEOUT;
+ gf_log(this->name, GF_LOG_DEBUG, "Configured transport.tcp-user-timeout=%d",
+ priv->timeout);
+
+ if (priv->keepalive) {
+ if (dict_get_int32_sizen(this->options,
+ "transport.socket.keepalive-time",
+ &(priv->keepaliveidle)) != 0) {
+ priv->keepaliveidle = GF_KEEPALIVE_TIME;
}
- optstr = NULL;
+ if (dict_get_int32_sizen(this->options,
+ "transport.socket.keepalive-interval",
+ &(priv->keepaliveintvl)) != 0) {
+ priv->keepaliveintvl = GF_KEEPALIVE_INTERVAL;
+ }
- /* Check if socket read failures are to be logged */
- priv->read_fail_log = 1;
- if (dict_get (this->options, "transport.socket.read-fail-log")) {
- optstr = data_to_str (dict_get (this->options, "transport.socket.read-fail-log"));
- if (gf_string2boolean (optstr, &tmp_bool) == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "'transport.socket.read-fail-log' takes only "
- "boolean options; logging socket read fails");
- }
- else if (tmp_bool == _gf_false) {
- priv->read_fail_log = 0;
- }
+ if (dict_get_int32_sizen(this->options,
+ "transport.socket.keepalive-count",
+ &(priv->keepalivecnt)) != 0)
+ priv->keepalivecnt = GF_KEEPALIVE_COUNT;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Reconfigured transport.keepalivecnt=%d", priv->keepalivecnt);
+ }
+
+ if (dict_get_uint32(this->options, "transport.listen-backlog",
+ &(priv->backlog)) != 0) {
+ priv->backlog = GLUSTERFS_SOCKET_LISTEN_BACKLOG;
+ }
+
+ optstr = NULL;
+
+ /* Check if socket read failures are to be logged */
+ priv->read_fail_log = 1;
+ data = dict_get_sizen(this->options, "transport.socket.read-fail-log");
+ if (data) {
+ optstr = data_to_str(data);
+ if (gf_string2boolean(optstr, &tmp_bool) != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "'transport.socket.read-fail-log' takes only "
+ "boolean options; logging socket read fails");
+ } else if (tmp_bool == _gf_false) {
+ priv->read_fail_log = 0;
}
+ }
- optstr = NULL;
-out:
- this->private = priv;
+ priv->windowsize = (int)windowsize;
- return 0;
-}
+ priv->ssl_enabled = _gf_false;
+ if (dict_get_str_sizen(this->options, SSL_ENABLED_OPT, &optstr) == 0) {
+ if (gf_string2boolean(optstr, &priv->ssl_enabled) != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "invalid value given for ssl-enabled boolean");
+ }
+ }
+ priv->mgmt_ssl = this->ctx->secure_mgmt;
+ priv->srvr_ssl = this->ctx->secure_srvr;
+ ssl_setup_connection_params(this);
+out:
+ this->private = priv;
+ return 0;
+}
void
-fini (rpc_transport_t *this)
+fini(rpc_transport_t *this)
{
- socket_private_t *priv = NULL;
+ socket_private_t *priv = NULL;
- if (!this)
- return;
+ if (!this)
+ return;
- priv = this->private;
- if (priv) {
- if (priv->sock != -1) {
- pthread_mutex_lock (&priv->lock);
- {
- __socket_ioq_flush (this);
- __socket_reset (this);
- }
- pthread_mutex_unlock (&priv->lock);
- }
- gf_log (this->name, GF_LOG_TRACE,
- "transport %p destroyed", this);
+ priv = this->private;
+ if (priv) {
+ if (priv->sock >= 0) {
+ pthread_mutex_lock(&priv->out_lock);
+ {
+ __socket_ioq_flush(priv);
+ __socket_reset(this);
+ }
+ pthread_mutex_unlock(&priv->out_lock);
+ }
+ gf_log(this->name, GF_LOG_TRACE, "transport %p destroyed", this);
+
+ pthread_mutex_destroy(&priv->out_lock);
+ pthread_mutex_destroy(&priv->cond_lock);
+ pthread_cond_destroy(&priv->cond);
- pthread_mutex_destroy (&priv->lock);
- GF_FREE (priv);
+ GF_ASSERT(priv->notify.in_progress == 0);
+ pthread_mutex_destroy(&priv->notify.lock);
+ pthread_cond_destroy(&priv->notify.cond);
+
+ if (priv->use_ssl && priv->ssl_ssl) {
+ SSL_clear(priv->ssl_ssl);
+ SSL_free(priv->ssl_ssl);
+ priv->ssl_ssl = NULL;
+ }
+ if (priv->ssl_ctx) {
+ SSL_CTX_free(priv->ssl_ctx);
+ priv->ssl_ctx = NULL;
}
- this->private = NULL;
-}
+ if (priv->ssl_private_key) {
+ GF_FREE(priv->ssl_private_key);
+ }
+ if (priv->ssl_own_cert) {
+ GF_FREE(priv->ssl_own_cert);
+ }
+ if (priv->ssl_ca_list) {
+ GF_FREE(priv->ssl_ca_list);
+ }
+ GF_FREE(priv);
+ }
+ this->private = NULL;
+}
int32_t
-init (rpc_transport_t *this)
+init(rpc_transport_t *this)
{
- int ret = -1;
+ int ret = -1;
- ret = socket_init (this);
+ init_openssl_mt();
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "socket_init() failed");
- }
+ ret = socket_init(this);
- return ret;
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_DEBUG, "socket_init() failed");
+ }
+
+ return ret;
}
struct volume_options options[] = {
- { .key = {"remote-port",
- "transport.remote-port",
- "transport.socket.remote-port"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.socket.listen-port", "listen-port"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.socket.bind-address", "bind-address" },
- .type = GF_OPTION_TYPE_INTERNET_ADDRESS
- },
- { .key = {"transport.socket.connect-path", "connect-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.socket.bind-path", "bind-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"transport.socket.listen-path", "listen-path"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = { "transport.address-family",
- "address-family" },
- .value = {"inet", "inet6", "unix", "inet-sdp" },
- .type = GF_OPTION_TYPE_STR
- },
-
- { .key = {"non-blocking-io"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"tcp-window-size"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = GF_MIN_SOCKET_WINDOW_SIZE,
- .max = GF_MAX_SOCKET_WINDOW_SIZE
- },
- { .key = {"transport.socket.nodelay"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"transport.socket.lowlat"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"transport.socket.keepalive"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"transport.socket.keepalive-interval"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.socket.keepalive-time"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.socket.listen-backlog"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {"transport.socket.read-fail-log"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {NULL} }
-};
+ {.key = {"remote-port", "transport.remote-port",
+ "transport.socket.remote-port"},
+ .type = GF_OPTION_TYPE_INT},
+ {.key = {"transport.socket.listen-port", "listen-port"},
+ .type = GF_OPTION_TYPE_INT},
+ {.key = {"transport.socket.bind-address", "bind-address"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS},
+ {.key = {"transport.socket.connect-path", "connect-path"},
+ .type = GF_OPTION_TYPE_ANY},
+ {.key = {"transport.socket.bind-path", "bind-path"},
+ .type = GF_OPTION_TYPE_ANY},
+ {.key = {"transport.socket.listen-path", "listen-path"},
+ .type = GF_OPTION_TYPE_ANY},
+ {.key = {"transport.address-family", "address-family"},
+ .value = {"inet", "inet6", "unix", "inet-sdp"},
+ .op_version = {GD_OP_VERSION_3_7_4},
+ .type = GF_OPTION_TYPE_STR},
+ {.key = {"non-blocking-io"}, .type = GF_OPTION_TYPE_BOOL},
+ {.key = {"tcp-window-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Option to set TCP SEND/RECV BUFFER SIZE",
+ .min = GF_MIN_SOCKET_WINDOW_SIZE,
+ .max = GF_MAX_SOCKET_WINDOW_SIZE},
+ {
+ .key = {"transport.listen-backlog"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .op_version = {GD_OP_VERSION_3_11_1},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "This option uses the value of backlog argument that "
+ "defines the maximum length to which the queue of "
+ "pending connections for socket fd may grow.",
+ .default_value = "1024",
+ },
+ {.key = {"transport.tcp-user-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {GD_OP_VERSION_3_10_2},
+ .default_value = TOSTRING(GF_NETWORK_TIMEOUT)},
+ {.key = {"transport.socket.nodelay"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "1"},
+ {.key = {"transport.socket.keepalive"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .op_version = {1},
+ .default_value = "1"},
+ {.key = {"transport.socket.keepalive-interval"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {GD_OP_VERSION_3_10_2},
+ .default_value = "2"},
+ {.key = {"transport.socket.keepalive-time"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {GD_OP_VERSION_3_10_2},
+ .default_value = "20"},
+ {.key = {"transport.socket.keepalive-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {GD_OP_VERSION_3_10_2},
+ .default_value = "9"},
+ {.key = {"transport.socket.read-fail-log"}, .type = GF_OPTION_TYPE_BOOL},
+ {.key = {SSL_ENABLED_OPT}, .type = GF_OPTION_TYPE_BOOL},
+ {.key = {SSL_OWN_CERT_OPT}, .type = GF_OPTION_TYPE_STR},
+ {.key = {SSL_PRIVATE_KEY_OPT}, .type = GF_OPTION_TYPE_STR},
+ {.key = {SSL_CA_LIST_OPT}, .type = GF_OPTION_TYPE_STR},
+ {.key = {SSL_CERT_DEPTH_OPT}, .type = GF_OPTION_TYPE_STR},
+ {.key = {SSL_CIPHER_LIST_OPT}, .type = GF_OPTION_TYPE_STR},
+ {.key = {SSL_DH_PARAM_OPT}, .type = GF_OPTION_TYPE_STR},
+ {.key = {SSL_EC_CURVE_OPT}, .type = GF_OPTION_TYPE_STR},
+ {.key = {SSL_CRL_PATH_OPT}, .type = GF_OPTION_TYPE_STR},
+ {.key = {OWN_THREAD_OPT}, .type = GF_OPTION_TYPE_BOOL},
+ {.key = {"ssl-own-cert"},
+ .op_version = {GD_OP_VERSION_3_7_4},
+ .flags = OPT_FLAG_SETTABLE,
+ .type = GF_OPTION_TYPE_STR,
+ .description = "SSL certificate. Ignored if SSL is not enabled."},
+ {.key = {"ssl-private-key"},
+ .op_version = {GD_OP_VERSION_3_7_4},
+ .flags = OPT_FLAG_SETTABLE,
+ .type = GF_OPTION_TYPE_STR,
+ .description = "SSL private key. Ignored if SSL is not enabled."},
+ {.key = {"ssl-ca-list"},
+ .op_version = {GD_OP_VERSION_3_7_4},
+ .flags = OPT_FLAG_SETTABLE,
+ .type = GF_OPTION_TYPE_STR,
+ .description = "SSL CA list. Ignored if SSL is not enabled."},
+ {.key = {"ssl-cert-depth"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Maximum certificate-chain depth. If zero, the "
+ "peer's certificate itself must be in the local "
+ "certificate list. Otherwise, there may be up to N "
+ "signing certificates between the peer's and the "
+ "local list. Ignored if SSL is not enabled."},
+ {.key = {"ssl-cipher-list"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Allowed SSL ciphers. Ignored if SSL is not enabled."},
+ {.key = {"ssl-dh-param"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_3_7_4},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "DH parameters file. Ignored if SSL is not enabled."},
+ {.key = {"ssl-ec-curve"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_3_7_4},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "ECDH curve name. Ignored if SSL is not enabled."},
+ {.key = {"ssl-crl-path"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_3_7_4},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Path to directory containing CRL. "
+ "Ignored if SSL is not enabled."},
+ {.key = {NULL}}};
diff --git a/rpc/rpc-transport/socket/src/socket.h b/rpc/rpc-transport/socket/src/socket.h
index 0304f1db1eb..8a2eda70605 100644
--- a/rpc/rpc-transport/socket/src/socket.h
+++ b/rpc/rpc-transport/socket/src/socket.h
@@ -11,24 +11,24 @@
#ifndef _SOCKET_H
#define _SOCKET_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
+#include <openssl/ssl.h>
+#include <openssl/err.h>
+#include <openssl/x509v3.h>
+#include <openssl/x509_vfy.h>
+#ifdef HAVE_OPENSSL_DH_H
+#include <openssl/dh.h>
+#endif
+#ifdef HAVE_OPENSSL_ECDH_H
+#include <openssl/ecdh.h>
#endif
-#include "event.h"
#include "rpc-transport.h"
-#include "logging.h"
-#include "dict.h"
-#include "mem-pool.h"
-#include "globals.h"
#ifndef MAX_IOVEC
#define MAX_IOVEC 16
#endif /* MAX_IOVEC */
-#define GF_DEFAULT_SOCKET_LISTEN_PORT GF_DEFAULT_BASE_PORT
+#define GF_DEFAULT_SOCKET_LISTEN_PORT GF_DEFAULT_BASE_PORT
#define RPC_MAX_FRAGMENT_SIZE 0x7fffffff
@@ -40,162 +40,237 @@
* setsockopt will fail. Having larger values might be beneficial for
* IB links.
*/
-#define GF_DEFAULT_SOCKET_WINDOW_SIZE (0)
-#define GF_MAX_SOCKET_WINDOW_SIZE (1 * GF_UNIT_MB)
-#define GF_MIN_SOCKET_WINDOW_SIZE (0)
-#define GF_USE_DEFAULT_KEEPALIVE (-1)
+#define GF_DEFAULT_SOCKET_WINDOW_SIZE (0)
+#define GF_MAX_SOCKET_WINDOW_SIZE (1 * GF_UNIT_MB)
+#define GF_MIN_SOCKET_WINDOW_SIZE (0)
+#define GF_USE_DEFAULT_KEEPALIVE (-1)
+
+#define GF_KEEPALIVE_TIME (20)
+#define GF_KEEPALIVE_INTERVAL (2)
+#define GF_KEEPALIVE_COUNT (9)
typedef enum {
- SP_STATE_NADA = 0,
- SP_STATE_COMPLETE,
- SP_STATE_READING_FRAGHDR,
- SP_STATE_READ_FRAGHDR,
- SP_STATE_READING_FRAG,
+ SP_STATE_NADA = 0,
+ SP_STATE_COMPLETE,
+ SP_STATE_READING_FRAGHDR,
+ SP_STATE_READ_FRAGHDR,
+ SP_STATE_READING_FRAG,
} sp_rpcrecord_state_t;
typedef enum {
- SP_STATE_RPCFRAG_INIT,
- SP_STATE_READING_MSGTYPE,
- SP_STATE_READ_MSGTYPE,
+ SP_STATE_RPCFRAG_INIT,
+ SP_STATE_READING_MSGTYPE,
+ SP_STATE_READ_MSGTYPE,
+ SP_STATE_NOTIFYING_XID
} sp_rpcfrag_state_t;
typedef enum {
- SP_STATE_SIMPLE_MSG_INIT,
- SP_STATE_READING_SIMPLE_MSG,
+ SP_STATE_SIMPLE_MSG_INIT,
+ SP_STATE_READING_SIMPLE_MSG,
} sp_rpcfrag_simple_msg_state_t;
typedef enum {
- SP_STATE_VECTORED_REQUEST_INIT,
- SP_STATE_READING_CREDBYTES,
- SP_STATE_READ_CREDBYTES, /* read credential data. */
- SP_STATE_READING_VERFBYTES,
- SP_STATE_READ_VERFBYTES, /* read verifier data */
- SP_STATE_READING_PROGHDR,
- SP_STATE_READ_PROGHDR,
- SP_STATE_READING_PROGHDR_XDATA,
- SP_STATE_READ_PROGHDR_XDATA, /* It's a bad "name" in the generic
- RPC state machine, but greatly
- aids code review (and xdata is
- the only "consumer" of this state)
- */
- SP_STATE_READING_PROG,
+ SP_STATE_VECTORED_REQUEST_INIT,
+ SP_STATE_READING_CREDBYTES,
+ SP_STATE_READ_CREDBYTES, /* read credential data. */
+ SP_STATE_READING_VERFBYTES,
+ SP_STATE_READ_VERFBYTES, /* read verifier data */
+ SP_STATE_READING_PROGHDR,
+ SP_STATE_READ_PROGHDR,
+ SP_STATE_READING_PROGHDR_XDATA,
+ SP_STATE_READ_PROGHDR_XDATA, /* It's a bad "name" in the generic
+ RPC state machine, but greatly
+ aids code review (and xdata is
+ the only "consumer" of this state)
+ */
+ SP_STATE_READING_PROG,
} sp_rpcfrag_vectored_request_state_t;
typedef enum {
- SP_STATE_REQUEST_HEADER_INIT,
- SP_STATE_READING_RPCHDR1,
- SP_STATE_READ_RPCHDR1, /* read msg from beginning till and
- * including credlen
- */
+ SP_STATE_REQUEST_HEADER_INIT,
+ SP_STATE_READING_RPCHDR1,
+ SP_STATE_READ_RPCHDR1, /* read msg from beginning till and
+ * including credlen
+ */
} sp_rpcfrag_request_header_state_t;
struct ioq {
- union {
- struct list_head list;
- struct {
- struct ioq *next;
- struct ioq *prev;
- };
+ union {
+ struct list_head list;
+ struct {
+ struct ioq *next;
+ struct ioq *prev;
};
+ };
- uint32_t fraghdr;
- struct iovec vector[MAX_IOVEC];
- int count;
- struct iovec *pending_vector;
- int pending_count;
- struct iobref *iobref;
+ struct iovec vector[MAX_IOVEC];
+ struct iovec *pending_vector;
+ int count;
+ int pending_count;
+ struct iobref *iobref;
+ uint32_t fraghdr;
+ char _pad[4];
};
typedef struct {
- sp_rpcfrag_request_header_state_t header_state;
- sp_rpcfrag_vectored_request_state_t vector_state;
- int vector_sizer_state;
+ sp_rpcfrag_request_header_state_t header_state;
+ sp_rpcfrag_vectored_request_state_t vector_state;
+ int vector_sizer_state;
} sp_rpcfrag_request_state_t;
typedef enum {
- SP_STATE_VECTORED_REPLY_STATUS_INIT,
- SP_STATE_READING_REPLY_STATUS,
- SP_STATE_READ_REPLY_STATUS,
+ SP_STATE_VECTORED_REPLY_STATUS_INIT,
+ SP_STATE_READING_REPLY_STATUS,
+ SP_STATE_READ_REPLY_STATUS,
} sp_rpcfrag_vectored_reply_status_state_t;
typedef enum {
- SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT,
- SP_STATE_READING_PROC_HEADER,
- SP_STATE_READING_PROC_OPAQUE,
- SP_STATE_READ_PROC_OPAQUE,
- SP_STATE_READ_PROC_HEADER,
+ SP_STATE_ACCEPTED_SUCCESS_REPLY_INIT,
+ SP_STATE_READING_PROC_HEADER,
+ SP_STATE_READING_PROC_OPAQUE,
+ SP_STATE_READ_PROC_OPAQUE,
+ SP_STATE_READ_PROC_HEADER,
} sp_rpcfrag_vectored_reply_accepted_success_state_t;
typedef enum {
- SP_STATE_ACCEPTED_REPLY_INIT,
- SP_STATE_READING_REPLY_VERFLEN,
- SP_STATE_READ_REPLY_VERFLEN,
- SP_STATE_READING_REPLY_VERFBYTES,
- SP_STATE_READ_REPLY_VERFBYTES,
+ SP_STATE_ACCEPTED_REPLY_INIT,
+ SP_STATE_READING_REPLY_VERFLEN,
+ SP_STATE_READ_REPLY_VERFLEN,
+ SP_STATE_READING_REPLY_VERFBYTES,
+ SP_STATE_READ_REPLY_VERFBYTES,
} sp_rpcfrag_vectored_reply_accepted_state_t;
typedef struct {
- uint32_t accept_status;
- sp_rpcfrag_vectored_reply_status_state_t status_state;
- sp_rpcfrag_vectored_reply_accepted_state_t accepted_state;
- sp_rpcfrag_vectored_reply_accepted_success_state_t accepted_success_state;
+ uint32_t accept_status;
+ sp_rpcfrag_vectored_reply_status_state_t status_state;
+ sp_rpcfrag_vectored_reply_accepted_state_t accepted_state;
+ sp_rpcfrag_vectored_reply_accepted_success_state_t accepted_success_state;
} sp_rpcfrag_vectored_reply_state_t;
+struct gf_sock_incoming_frag {
+ char *fragcurrent;
+ uint32_t bytes_read;
+ uint32_t remaining_size;
+ struct iovec vector;
+ struct iovec *pending_vector;
+ union {
+ sp_rpcfrag_request_state_t request;
+ sp_rpcfrag_vectored_reply_state_t reply;
+ } call_body;
+
+ sp_rpcfrag_simple_msg_state_t simple_state;
+ sp_rpcfrag_state_t state;
+};
+
+#define GF_SOCKET_RA_MAX 1024
+
+struct gf_sock_incoming {
+ char *proghdr_base_addr;
+ struct iobuf *iobuf;
+ size_t iobuf_size;
+ struct gf_sock_incoming_frag frag;
+ struct iovec vector[2];
+ struct iovec payload_vector;
+ struct iobref *iobref;
+ rpc_request_info_t *request_info;
+ struct iovec *pending_vector;
+ int count;
+ int pending_count;
+ size_t total_bytes_read;
+
+ size_t ra_read;
+ size_t ra_max;
+ size_t ra_served;
+ char *ra_buf;
+ uint32_t fraghdr;
+ msg_type_t msg_type;
+ sp_rpcrecord_state_t record_state;
+ char _pad[4];
+};
+
typedef struct {
- int32_t sock;
- int32_t idx;
- unsigned char connected; // -1 = not connected. 0 = in progress. 1 = connected
- char bio;
- char connect_finish_log;
- char submit_log;
- union {
- struct list_head ioq;
- struct {
- struct ioq *ioq_next;
- struct ioq *ioq_prev;
- };
- };
+ union {
+ struct list_head ioq;
struct {
- sp_rpcrecord_state_t record_state;
- struct {
- char *fragcurrent;
- uint32_t bytes_read;
- uint32_t remaining_size;
- struct iovec vector;
- struct iovec *pending_vector;
- union {
- sp_rpcfrag_request_state_t request;
- sp_rpcfrag_vectored_reply_state_t reply;
- } call_body;
-
- sp_rpcfrag_simple_msg_state_t simple_state;
- sp_rpcfrag_state_t state;
- } frag;
- char *proghdr_base_addr;
- struct iobuf *iobuf;
- size_t iobuf_size;
- struct iovec vector[2];
- int count;
- struct iovec payload_vector;
- struct iobref *iobref;
- rpc_request_info_t *request_info;
- struct iovec *pending_vector;
- int pending_count;
- uint32_t fraghdr;
- char complete_record;
- msg_type_t msg_type;
- size_t total_bytes_read;
- } incoming;
- pthread_mutex_t lock;
- int windowsize;
- char lowlat;
- char nodelay;
- int keepalive;
- int keepaliveidle;
- int keepaliveintvl;
- uint32_t backlog;
- gf_boolean_t read_fail_log;
-} socket_private_t;
+ struct ioq *ioq_next;
+ struct ioq *ioq_prev;
+ };
+ };
+ pthread_mutex_t out_lock;
+ pthread_mutex_t cond_lock;
+ pthread_cond_t cond;
+ int windowsize;
+ int keepalive;
+ int keepaliveidle;
+ int keepaliveintvl;
+ int keepalivecnt;
+ int timeout;
+ int log_ctr;
+ int shutdown_log_ctr;
+ /* ssl_error_required is used only during the SSL connection setup
+ * phase.
+ * It holds the error code returned by SSL_get_error() and is used to
+ * arm the epoll event set for the required event for the specific fd.
+ */
+ int ssl_error_required;
+ int ssl_session_id;
+ GF_REF_DECL; /* refcount to keep track of socket_poller
+ threads */
+ struct {
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+ uint64_t in_progress;
+ } notify;
+ int32_t sock;
+ int32_t idx;
+ int32_t gen;
+ uint32_t backlog;
+ SSL_METHOD *ssl_meth;
+ SSL_CTX *ssl_ctx;
+ BIO *ssl_sbio;
+ SSL *ssl_ssl;
+ char *ssl_own_cert;
+ char *ssl_private_key;
+ char *ssl_ca_list;
+ char *crl_path;
+ struct gf_sock_incoming incoming;
+ mgmt_ssl_t srvr_ssl;
+ /* -1 = not connected. 0 = in progress. 1 = connected */
+ char connected;
+ /* 1 = connect failed for reasons other than EINPROGRESS/ENOENT
+ see socket_connect for details */
+ char connect_failed;
+ char bio;
+ char connect_finish_log;
+ char submit_log;
+ char nodelay;
+ gf_boolean_t read_fail_log;
+ gf_boolean_t ssl_enabled; /* outbound I/O */
+ gf_boolean_t mgmt_ssl; /* outbound mgmt */
+ gf_boolean_t is_server;
+ gf_boolean_t use_ssl;
+ gf_boolean_t ssl_accepted; /* To indicate SSL_accept() */
+ gf_boolean_t ssl_connected; /* or SSL_connect() has been
+ * been completed on this socket.
+ * These are valid only when
+ * use_ssl is true.
+ */
+ /* SSL_CTX is created for each transport. Since we are now using non-
+ * blocking mechanism for SSL_accept() and SSL_connect(), the SSL
+ * context is created on the first EPOLLIN event which may lead to
+ * SSL_ERROR_WANT_READ/SSL_ERROR_WANT_WRITE and may not complete the
+ * SSL connection at the first attempt.
+ * ssl_context_created is a flag to note that we've created the SSL
+ * context for the connection so that we don't blindly create any more
+ * while !ssl_accepted or !ssl_connected.
+ */
+ gf_boolean_t ssl_context_created;
+ gf_boolean_t accepted; /* explicit flag to be set in
+ * socket_event_handler() for
+ * newly accepted socket
+ */
+ char _pad[4];
+} socket_private_t;
#endif
diff --git a/rpc/xdr/src/.gitignore b/rpc/xdr/src/.gitignore
new file mode 100644
index 00000000000..a0c8b7ca2b6
--- /dev/null
+++ b/rpc/xdr/src/.gitignore
@@ -0,0 +1,25 @@
+acl3-xdr.c
+acl3-xdr.h
+changelog-xdr.c
+changelog-xdr.h
+cli1-xdr.c
+cli1-xdr.h
+glusterd1-xdr.c
+glusterd1-xdr.h
+glusterfs3-xdr.c
+glusterfs3-xdr.h
+glusterfs4-xdr.c
+glusterfs4-xdr.h
+mount3udp.c
+mount3udp.h
+nlm4-xdr.c
+nlm4-xdr.h
+nlmcbk-xdr.c
+nlmcbk-xdr.h
+nsm-xdr.c
+nsm-xdr.h
+portmap-xdr.c
+portmap-xdr.h
+rpc-common-xdr.c
+rpc-common-xdr.h
+*-e
diff --git a/rpc/xdr/src/Makefile.am b/rpc/xdr/src/Makefile.am
index 7174815b841..0e9c377ec93 100644
--- a/rpc/xdr/src/Makefile.am
+++ b/rpc/xdr/src/Makefile.am
@@ -1,26 +1,85 @@
+if BUILD_GNFS
+ NFS_XDRS = nlm4-xdr.x nsm-xdr.x acl3-xdr.x mount3udp.x
+ NFS_SRCS = xdr-nfs3.c msg-nfs3.c
+ NFS_HDRS = xdr-nfs3.h msg-nfs3.h
+else
+ NFS_EXTRA_XDRS = nlm4-xdr.x nsm-xdr.x acl3-xdr.x mount3udp.x
+endif
+
+XDRGENFILES = glusterfs3-xdr.x glusterfs4-xdr.x cli1-xdr.x \
+ rpc-common-xdr.x glusterd1-xdr.x changelog-xdr.x \
+ portmap-xdr.x ${NFS_XDRS}
+
+XDRHEADERS = $(XDRGENFILES:.x=.h)
+XDRSOURCES = $(XDRGENFILES:.x=.c)
+
+EXTRA_DIST = $(XDRGENFILES) libgfxdr.sym ${NFS_EXTRA_XDRS}
+
lib_LTLIBRARIES = libgfxdr.la
-libgfxdr_la_CFLAGS = -fPIC -Wall -g -shared -nostartfiles $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS)
-
-libgfxdr_la_CPPFLAGS = -D_FILE_OFFSET_BITS=64 -D__USE_FILE_OFFSET64 \
- -D_GNU_SOURCE -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src
-
-libgfxdr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
-
-libgfxdr_la_SOURCES = xdr-generic.c rpc-common-xdr.c \
- glusterfs3-xdr.c \
- cli1-xdr.c \
- glusterd1-xdr.c \
- portmap-xdr.c \
- nlm4-xdr.c xdr-nfs3.c msg-nfs3.c nsm-xdr.c \
- nlmcbk-xdr.c
-
-noinst_HEADERS = xdr-generic.h rpc-common-xdr.h \
- glusterfs3-xdr.h glusterfs3.h \
- cli1-xdr.h \
- glusterd1-xdr.h \
- portmap-xdr.h \
- nlm4-xdr.h xdr-nfs3.h msg-nfs3.h nsm-xdr.h \
- nlmcbk-xdr.h
+libgfxdr_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS)
+
+libgfxdr_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_builddir)/rpc/xdr/src
+
+libgfxdr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+libgfxdr_la_LDFLAGS = -version-info $(LIBGFXDR_LT_VERSION) $(GF_LDFLAGS) \
+ -export-symbols $(top_srcdir)/rpc/xdr/src/libgfxdr.sym
+
+libgfxdr_la_SOURCES = xdr-generic.c ${NFS_SRCS}
+nodist_libgfxdr_la_SOURCES = $(XDRSOURCES)
+
+libgfxdr_la_HEADERS = xdr-generic.h glusterfs3.h rpc-pragmas.h ${NFS_HDRS}
+nodist_libgfxdr_la_HEADERS = $(XDRHEADERS)
+
+libgfxdr_ladir = $(includedir)/glusterfs/rpc
+
+CLEANFILES = $(XDRSOURCES) $(XDRHEADERS)
+
+# trick automake into doing BUILT_SOURCES magic
+BUILT_SOURCES = $(XDRHEADERS) $(XDRSOURCES)
+
+xdrsrc=$(top_srcdir)/rpc/xdr/src
+xdrdst=$(top_builddir)/rpc/xdr/src
+
+# make's dependency resolution may mean that it decides to run
+# rpcgen again (unnecessarily), but as the .c file already exists,
+# rpcgen will exit with an error, resulting in a build error. We
+# could use a '-' (i.e. -@rpcgen ...) and suffer with noisy warnings
+# in the build. Or we do this crufty thing instead.
+$(XDRSOURCES): $(XDRGENFILES)
+ @if [ ! -e $(xdrdst)/$@ -o $(@:.c=.x) -nt $(xdrdst)/$@ ]; then \
+ rpcgen -c -o $(xdrdst)/$@ $(@:.c=.x) ;\
+ fi
+
+# d*mn sed in netbsd6 doesn't do -i (inline)
+# (why are we still running smoke on netbsd6 and not netbsd7?)
+$(XDRHEADERS): $(XDRGENFILES)
+ @if [ ! -e $(xdrdst)/$@ -o $(@:.h=.x) -nt $(xdrdst)/$@ ]; then \
+ rpcgen -h -o $(@:.h=.tmp) $(@:.h=.x) && \
+ sed -e '/#ifndef/ s/-/_/g' -e '/#define/ s/-/_/g' \
+ -e '/#endif/ s/-/_/' -e 's/TMP_/H_/g' \
+ $(@:.h=.tmp) > $(xdrdst)/$@ && \
+ rm -f $(@:.h=.tmp) ; \
+ fi
+
+
+# link .x files when doing out-of-tree builds
+# have to use .PHONY here to force it; all versions of make
+# will think the file already exists "here" by virtue of the
+# VPATH. And we have to have the .x file in $cwd in order to
+# have rpcgen generate "nice" #include directives
+# i.e. (nice):
+# #include "acl3-xdr.h"
+# versus (not nice):
+# #include "../../../../foo/src/rpc/xdr/src/acl3-xdr.h"
+.PHONY : $(XDRGENFILES)
+$(XDRGENFILES):
+ @if [ ! -e $@ ]; then ln -s $(xdrsrc)/$@ . ; fi;
+
+clean-local:
+ @if [ $(top_builddir) != $(top_srcdir) ]; then \
+ rm -f $(xdrdst)/*.x; \
+ fi
diff --git a/rpc/xdr/src/acl3-xdr.x b/rpc/xdr/src/acl3-xdr.x
new file mode 100644
index 00000000000..7f7364971e6
--- /dev/null
+++ b/rpc/xdr/src/acl3-xdr.x
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/compat.h>
+%#include "xdr-nfs3.h"
+
+struct aclentry {
+ int type;
+ int uid;
+ int perm;
+};
+
+struct getaclargs {
+ netobj fh;
+ int mask;
+};
+
+struct getaclreply {
+ int status;
+ int attr_follows;
+ fattr3 attr;
+ int mask;
+ int aclcount;
+ aclentry aclentry<>;
+ int daclcount;
+ aclentry daclentry<>;
+};
+
+struct setaclargs {
+ netobj fh;
+ int mask;
+ int aclcount;
+ aclentry aclentry<>;
+ int daclcount;
+ aclentry daclentry<>;
+};
+
+struct setaclreply {
+ int status;
+ int attr_follows;
+ fattr3 attr;
+};
diff --git a/rpc/xdr/src/changelog-xdr.x b/rpc/xdr/src/changelog-xdr.x
new file mode 100644
index 00000000000..5956245d5ce
--- /dev/null
+++ b/rpc/xdr/src/changelog-xdr.x
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/compat.h>
+
+/* XDR: libgfchangelog -> changelog */
+
+struct changelog_probe_req {
+ unsigned int filter;
+ char sock[UNIX_PATH_MAX];
+};
+
+struct changelog_probe_rsp {
+ int op_ret;
+};
+
+/* XDR: changelog -> libgfchangelog */
+struct changelog_event_req {
+ /* sequence number for the buffer */
+ unsigned hyper seq;
+
+ /* time of dispatch */
+ unsigned hyper tv_sec;
+ unsigned hyper tv_usec;
+};
+
+struct changelog_event_rsp {
+ int op_ret;
+
+ /* ack'd buffers sequence number */
+ unsigned hyper seq;
+};
diff --git a/rpc/xdr/src/cli1-xdr.c b/rpc/xdr/src/cli1-xdr.c
deleted file mode 100644
index 53e1807658a..00000000000
--- a/rpc/xdr/src/cli1-xdr.c
+++ /dev/null
@@ -1,459 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#include "cli1-xdr.h"
-
-bool_t
-xdr_gf_cli_defrag_type (XDR *xdrs, gf_cli_defrag_type *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_defrag_status_t (XDR *xdrs, gf_defrag_status_t *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cluster_type (XDR *xdrs, gf1_cluster_type *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_replace_op (XDR *xdrs, gf1_cli_replace_op *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_op_commands (XDR *xdrs, gf1_op_commands *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_quota_type (XDR *xdrs, gf_quota_type *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_friends_list (XDR *xdrs, gf1_cli_friends_list *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_get_volume (XDR *xdrs, gf1_cli_get_volume *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_sync_volume (XDR *xdrs, gf1_cli_sync_volume *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_op_flags (XDR *xdrs, gf1_cli_op_flags *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_gsync_set (XDR *xdrs, gf1_cli_gsync_set *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_stats_op (XDR *xdrs, gf1_cli_stats_op *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_top_op (XDR *xdrs, gf1_cli_top_op *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_cli_status_type (XDR *xdrs, gf_cli_status_type *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_cli_req (XDR *xdrs, gf_cli_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_cli_rsp (XDR *xdrs, gf_cli_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_probe_req (XDR *xdrs, gf1_cli_probe_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_probe_rsp (XDR *xdrs, gf1_cli_probe_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- IXDR_PUT_LONG(buf, objp->port);
- }
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
-
- } else {
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- objp->port = IXDR_GET_LONG(buf);
- }
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_deprobe_req (XDR *xdrs, gf1_cli_deprobe_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- if (!xdr_int (xdrs, &objp->flags))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_deprobe_rsp (XDR *xdrs, gf1_cli_deprobe_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_peer_list_req (XDR *xdrs, gf1_cli_peer_list_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_peer_list_rsp (XDR *xdrs, gf1_cli_peer_list_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->friends.friends_val, (u_int *) &objp->friends.friends_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_fsm_log_req (XDR *xdrs, gf1_cli_fsm_log_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_fsm_log_rsp (XDR *xdrs, gf1_cli_fsm_log_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->fsm_log.fsm_log_val, (u_int *) &objp->fsm_log.fsm_log_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_getwd_req (XDR *xdrs, gf1_cli_getwd_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->unused))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_getwd_rsp (XDR *xdrs, gf1_cli_getwd_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_string (xdrs, &objp->wd, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_mount_req (XDR *xdrs, gf1_cli_mount_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->label, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_mount_rsp (XDR *xdrs, gf1_cli_mount_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_umount_req (XDR *xdrs, gf1_cli_umount_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->lazy))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf1_cli_umount_rsp (XDR *xdrs, gf1_cli_umount_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- return TRUE;
-}
diff --git a/rpc/xdr/src/cli1-xdr.h b/rpc/xdr/src/cli1-xdr.h
deleted file mode 100644
index b30fd740ae2..00000000000
--- a/rpc/xdr/src/cli1-xdr.h
+++ /dev/null
@@ -1,369 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#ifndef _CLI1_XDR_H_RPCGEN
-#define _CLI1_XDR_H_RPCGEN
-
-#include <rpc/rpc.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-enum gf_cli_defrag_type {
- GF_DEFRAG_CMD_START = 1,
- GF_DEFRAG_CMD_STOP = 1 + 1,
- GF_DEFRAG_CMD_STATUS = 1 + 2,
- GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
- GF_DEFRAG_CMD_START_FORCE = 1 + 4,
-};
-typedef enum gf_cli_defrag_type gf_cli_defrag_type;
-
-enum gf_defrag_status_t {
- GF_DEFRAG_STATUS_NOT_STARTED = 0,
- GF_DEFRAG_STATUS_STARTED = 1,
- GF_DEFRAG_STATUS_STOPPED = 2,
- GF_DEFRAG_STATUS_COMPLETE = 3,
- GF_DEFRAG_STATUS_FAILED = 4,
-};
-typedef enum gf_defrag_status_t gf_defrag_status_t;
-
-enum gf1_cluster_type {
- GF_CLUSTER_TYPE_NONE = 0,
- GF_CLUSTER_TYPE_STRIPE = 0 + 1,
- GF_CLUSTER_TYPE_REPLICATE = 0 + 2,
- GF_CLUSTER_TYPE_STRIPE_REPLICATE = 0 + 3,
-};
-typedef enum gf1_cluster_type gf1_cluster_type;
-
-enum gf1_cli_replace_op {
- GF_REPLACE_OP_NONE = 0,
- GF_REPLACE_OP_START = 0 + 1,
- GF_REPLACE_OP_COMMIT = 0 + 2,
- GF_REPLACE_OP_PAUSE = 0 + 3,
- GF_REPLACE_OP_ABORT = 0 + 4,
- GF_REPLACE_OP_STATUS = 0 + 5,
- GF_REPLACE_OP_COMMIT_FORCE = 0 + 6,
-};
-typedef enum gf1_cli_replace_op gf1_cli_replace_op;
-
-enum gf1_op_commands {
- GF_OP_CMD_NONE = 0,
- GF_OP_CMD_START = 0 + 1,
- GF_OP_CMD_COMMIT = 0 + 2,
- GF_OP_CMD_STOP = 0 + 3,
- GF_OP_CMD_STATUS = 0 + 4,
- GF_OP_CMD_COMMIT_FORCE = 0 + 5,
-};
-typedef enum gf1_op_commands gf1_op_commands;
-
-enum gf_quota_type {
- GF_QUOTA_OPTION_TYPE_NONE = 0,
- GF_QUOTA_OPTION_TYPE_ENABLE = 0 + 1,
- GF_QUOTA_OPTION_TYPE_DISABLE = 0 + 2,
- GF_QUOTA_OPTION_TYPE_LIMIT_USAGE = 0 + 3,
- GF_QUOTA_OPTION_TYPE_REMOVE = 0 + 4,
- GF_QUOTA_OPTION_TYPE_LIST = 0 + 5,
- GF_QUOTA_OPTION_TYPE_VERSION = 0 + 6,
-};
-typedef enum gf_quota_type gf_quota_type;
-
-enum gf1_cli_friends_list {
- GF_CLI_LIST_ALL = 1,
-};
-typedef enum gf1_cli_friends_list gf1_cli_friends_list;
-
-enum gf1_cli_get_volume {
- GF_CLI_GET_VOLUME_ALL = 1,
- GF_CLI_GET_VOLUME = 1 + 1,
- GF_CLI_GET_NEXT_VOLUME = 1 + 2,
-};
-typedef enum gf1_cli_get_volume gf1_cli_get_volume;
-
-enum gf1_cli_sync_volume {
- GF_CLI_SYNC_ALL = 1,
-};
-typedef enum gf1_cli_sync_volume gf1_cli_sync_volume;
-
-enum gf1_cli_op_flags {
- GF_CLI_FLAG_OP_FORCE = 1,
-};
-typedef enum gf1_cli_op_flags gf1_cli_op_flags;
-
-enum gf1_cli_gsync_set {
- GF_GSYNC_OPTION_TYPE_NONE = 0,
- GF_GSYNC_OPTION_TYPE_START = 1,
- GF_GSYNC_OPTION_TYPE_STOP = 2,
- GF_GSYNC_OPTION_TYPE_CONFIG = 3,
- GF_GSYNC_OPTION_TYPE_STATUS = 4,
- GF_GSYNC_OPTION_TYPE_ROTATE = 5,
-};
-typedef enum gf1_cli_gsync_set gf1_cli_gsync_set;
-
-enum gf1_cli_stats_op {
- GF_CLI_STATS_NONE = 0,
- GF_CLI_STATS_START = 1,
- GF_CLI_STATS_STOP = 2,
- GF_CLI_STATS_INFO = 3,
- GF_CLI_STATS_TOP = 4,
-};
-typedef enum gf1_cli_stats_op gf1_cli_stats_op;
-
-enum gf1_cli_top_op {
- GF_CLI_TOP_NONE = 0,
- GF_CLI_TOP_OPEN = 0 + 1,
- GF_CLI_TOP_READ = 0 + 2,
- GF_CLI_TOP_WRITE = 0 + 3,
- GF_CLI_TOP_OPENDIR = 0 + 4,
- GF_CLI_TOP_READDIR = 0 + 5,
- GF_CLI_TOP_READ_PERF = 0 + 6,
- GF_CLI_TOP_WRITE_PERF = 0 + 7,
-};
-typedef enum gf1_cli_top_op gf1_cli_top_op;
-
-enum gf_cli_status_type {
- GF_CLI_STATUS_NONE = 0x0000,
- GF_CLI_STATUS_MEM = 0x0001,
- GF_CLI_STATUS_CLIENTS = 0x0002,
- GF_CLI_STATUS_INODE = 0x0004,
- GF_CLI_STATUS_FD = 0x0008,
- GF_CLI_STATUS_CALLPOOL = 0x0010,
- GF_CLI_STATUS_DETAIL = 0x0020,
- GF_CLI_STATUS_MASK = 0x00FF,
- GF_CLI_STATUS_VOL = 0x0100,
- GF_CLI_STATUS_ALL = 0x0200,
- GF_CLI_STATUS_BRICK = 0x0400,
- GF_CLI_STATUS_NFS = 0x0800,
- GF_CLI_STATUS_SHD = 0x1000,
-};
-typedef enum gf_cli_status_type gf_cli_status_type;
-
-struct gf_cli_req {
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf_cli_req gf_cli_req;
-
-struct gf_cli_rsp {
- int op_ret;
- int op_errno;
- char *op_errstr;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf_cli_rsp gf_cli_rsp;
-
-struct gf1_cli_probe_req {
- char *hostname;
- int port;
-};
-typedef struct gf1_cli_probe_req gf1_cli_probe_req;
-
-struct gf1_cli_probe_rsp {
- int op_ret;
- int op_errno;
- int port;
- char *hostname;
- char *op_errstr;
-};
-typedef struct gf1_cli_probe_rsp gf1_cli_probe_rsp;
-
-struct gf1_cli_deprobe_req {
- char *hostname;
- int port;
- int flags;
-};
-typedef struct gf1_cli_deprobe_req gf1_cli_deprobe_req;
-
-struct gf1_cli_deprobe_rsp {
- int op_ret;
- int op_errno;
- char *hostname;
- char *op_errstr;
-};
-typedef struct gf1_cli_deprobe_rsp gf1_cli_deprobe_rsp;
-
-struct gf1_cli_peer_list_req {
- int flags;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf1_cli_peer_list_req gf1_cli_peer_list_req;
-
-struct gf1_cli_peer_list_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int friends_len;
- char *friends_val;
- } friends;
-};
-typedef struct gf1_cli_peer_list_rsp gf1_cli_peer_list_rsp;
-
-struct gf1_cli_fsm_log_req {
- char *name;
-};
-typedef struct gf1_cli_fsm_log_req gf1_cli_fsm_log_req;
-
-struct gf1_cli_fsm_log_rsp {
- int op_ret;
- int op_errno;
- char *op_errstr;
- struct {
- u_int fsm_log_len;
- char *fsm_log_val;
- } fsm_log;
-};
-typedef struct gf1_cli_fsm_log_rsp gf1_cli_fsm_log_rsp;
-
-struct gf1_cli_getwd_req {
- int unused;
-};
-typedef struct gf1_cli_getwd_req gf1_cli_getwd_req;
-
-struct gf1_cli_getwd_rsp {
- int op_ret;
- int op_errno;
- char *wd;
-};
-typedef struct gf1_cli_getwd_rsp gf1_cli_getwd_rsp;
-
-struct gf1_cli_mount_req {
- char *label;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf1_cli_mount_req gf1_cli_mount_req;
-
-struct gf1_cli_mount_rsp {
- int op_ret;
- int op_errno;
- char *path;
-};
-typedef struct gf1_cli_mount_rsp gf1_cli_mount_rsp;
-
-struct gf1_cli_umount_req {
- int lazy;
- char *path;
-};
-typedef struct gf1_cli_umount_req gf1_cli_umount_req;
-
-struct gf1_cli_umount_rsp {
- int op_ret;
- int op_errno;
-};
-typedef struct gf1_cli_umount_rsp gf1_cli_umount_rsp;
-
-/* the xdr functions */
-
-#if defined(__STDC__) || defined(__cplusplus)
-extern bool_t xdr_gf_cli_defrag_type (XDR *, gf_cli_defrag_type*);
-extern bool_t xdr_gf_defrag_status_t (XDR *, gf_defrag_status_t*);
-extern bool_t xdr_gf1_cluster_type (XDR *, gf1_cluster_type*);
-extern bool_t xdr_gf1_cli_replace_op (XDR *, gf1_cli_replace_op*);
-extern bool_t xdr_gf1_op_commands (XDR *, gf1_op_commands*);
-extern bool_t xdr_gf_quota_type (XDR *, gf_quota_type*);
-extern bool_t xdr_gf1_cli_friends_list (XDR *, gf1_cli_friends_list*);
-extern bool_t xdr_gf1_cli_get_volume (XDR *, gf1_cli_get_volume*);
-extern bool_t xdr_gf1_cli_sync_volume (XDR *, gf1_cli_sync_volume*);
-extern bool_t xdr_gf1_cli_op_flags (XDR *, gf1_cli_op_flags*);
-extern bool_t xdr_gf1_cli_gsync_set (XDR *, gf1_cli_gsync_set*);
-extern bool_t xdr_gf1_cli_stats_op (XDR *, gf1_cli_stats_op*);
-extern bool_t xdr_gf1_cli_top_op (XDR *, gf1_cli_top_op*);
-extern bool_t xdr_gf_cli_status_type (XDR *, gf_cli_status_type*);
-extern bool_t xdr_gf_cli_req (XDR *, gf_cli_req*);
-extern bool_t xdr_gf_cli_rsp (XDR *, gf_cli_rsp*);
-extern bool_t xdr_gf1_cli_probe_req (XDR *, gf1_cli_probe_req*);
-extern bool_t xdr_gf1_cli_probe_rsp (XDR *, gf1_cli_probe_rsp*);
-extern bool_t xdr_gf1_cli_deprobe_req (XDR *, gf1_cli_deprobe_req*);
-extern bool_t xdr_gf1_cli_deprobe_rsp (XDR *, gf1_cli_deprobe_rsp*);
-extern bool_t xdr_gf1_cli_peer_list_req (XDR *, gf1_cli_peer_list_req*);
-extern bool_t xdr_gf1_cli_peer_list_rsp (XDR *, gf1_cli_peer_list_rsp*);
-extern bool_t xdr_gf1_cli_fsm_log_req (XDR *, gf1_cli_fsm_log_req*);
-extern bool_t xdr_gf1_cli_fsm_log_rsp (XDR *, gf1_cli_fsm_log_rsp*);
-extern bool_t xdr_gf1_cli_getwd_req (XDR *, gf1_cli_getwd_req*);
-extern bool_t xdr_gf1_cli_getwd_rsp (XDR *, gf1_cli_getwd_rsp*);
-extern bool_t xdr_gf1_cli_mount_req (XDR *, gf1_cli_mount_req*);
-extern bool_t xdr_gf1_cli_mount_rsp (XDR *, gf1_cli_mount_rsp*);
-extern bool_t xdr_gf1_cli_umount_req (XDR *, gf1_cli_umount_req*);
-extern bool_t xdr_gf1_cli_umount_rsp (XDR *, gf1_cli_umount_rsp*);
-
-#else /* K&R C */
-extern bool_t xdr_gf_cli_defrag_type ();
-extern bool_t xdr_gf_defrag_status_t ();
-extern bool_t xdr_gf1_cluster_type ();
-extern bool_t xdr_gf1_cli_replace_op ();
-extern bool_t xdr_gf1_op_commands ();
-extern bool_t xdr_gf_quota_type ();
-extern bool_t xdr_gf1_cli_friends_list ();
-extern bool_t xdr_gf1_cli_get_volume ();
-extern bool_t xdr_gf1_cli_sync_volume ();
-extern bool_t xdr_gf1_cli_op_flags ();
-extern bool_t xdr_gf1_cli_gsync_set ();
-extern bool_t xdr_gf1_cli_stats_op ();
-extern bool_t xdr_gf1_cli_top_op ();
-extern bool_t xdr_gf_cli_status_type ();
-extern bool_t xdr_gf_cli_req ();
-extern bool_t xdr_gf_cli_rsp ();
-extern bool_t xdr_gf1_cli_probe_req ();
-extern bool_t xdr_gf1_cli_probe_rsp ();
-extern bool_t xdr_gf1_cli_deprobe_req ();
-extern bool_t xdr_gf1_cli_deprobe_rsp ();
-extern bool_t xdr_gf1_cli_peer_list_req ();
-extern bool_t xdr_gf1_cli_peer_list_rsp ();
-extern bool_t xdr_gf1_cli_fsm_log_req ();
-extern bool_t xdr_gf1_cli_fsm_log_rsp ();
-extern bool_t xdr_gf1_cli_getwd_req ();
-extern bool_t xdr_gf1_cli_getwd_rsp ();
-extern bool_t xdr_gf1_cli_mount_req ();
-extern bool_t xdr_gf1_cli_mount_rsp ();
-extern bool_t xdr_gf1_cli_umount_req ();
-extern bool_t xdr_gf1_cli_umount_rsp ();
-
-#endif /* K&R C */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_CLI1_XDR_H_RPCGEN */
diff --git a/rpc/xdr/src/cli1-xdr.x b/rpc/xdr/src/cli1-xdr.x
index cb22080cce3..777cb0046a2 100644
--- a/rpc/xdr/src/cli1-xdr.x
+++ b/rpc/xdr/src/cli1-xdr.x
@@ -1,35 +1,76 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/compat.h>
+
enum gf_cli_defrag_type {
- GF_DEFRAG_CMD_START = 1,
+ GF_DEFRAG_CMD_NONE = 0,
+ GF_DEFRAG_CMD_START,
GF_DEFRAG_CMD_STOP,
GF_DEFRAG_CMD_STATUS,
GF_DEFRAG_CMD_START_LAYOUT_FIX,
- GF_DEFRAG_CMD_START_FORCE /* used by remove-brick data migration */
-} ;
+ GF_DEFRAG_CMD_START_FORCE, /* used by remove-brick data migration */
+ GF_DEFRAG_CMD_START_TIER,
+ GF_DEFRAG_CMD_STATUS_TIER,
+ GF_DEFRAG_CMD_START_DETACH_TIER,
+ GF_DEFRAG_CMD_STOP_DETACH_TIER,
+ GF_DEFRAG_CMD_PAUSE_TIER,
+ GF_DEFRAG_CMD_RESUME_TIER,
+ GF_DEFRAG_CMD_DETACH_STATUS,
+ GF_DEFRAG_CMD_STOP_TIER,
+ GF_DEFRAG_CMD_DETACH_START,
+ GF_DEFRAG_CMD_DETACH_COMMIT,
+ GF_DEFRAG_CMD_DETACH_COMMIT_FORCE,
+ GF_DEFRAG_CMD_DETACH_STOP,
+ GF_DEFRAG_CMD_TYPE_MAX
+};
enum gf_defrag_status_t {
GF_DEFRAG_STATUS_NOT_STARTED,
GF_DEFRAG_STATUS_STARTED,
GF_DEFRAG_STATUS_STOPPED,
GF_DEFRAG_STATUS_COMPLETE,
- GF_DEFRAG_STATUS_FAILED
-} ;
+ GF_DEFRAG_STATUS_FAILED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED,
+ GF_DEFRAG_STATUS_MAX
+};
- enum gf1_cluster_type {
+enum gf1_cluster_type {
GF_CLUSTER_TYPE_NONE = 0,
GF_CLUSTER_TYPE_STRIPE,
GF_CLUSTER_TYPE_REPLICATE,
- GF_CLUSTER_TYPE_STRIPE_REPLICATE
-} ;
-
- enum gf1_cli_replace_op {
- GF_REPLACE_OP_NONE = 0,
- GF_REPLACE_OP_START,
- GF_REPLACE_OP_COMMIT,
- GF_REPLACE_OP_PAUSE,
- GF_REPLACE_OP_ABORT,
- GF_REPLACE_OP_STATUS,
- GF_REPLACE_OP_COMMIT_FORCE
-} ;
+ GF_CLUSTER_TYPE_STRIPE_REPLICATE,
+ GF_CLUSTER_TYPE_DISPERSE,
+ GF_CLUSTER_TYPE_TIER,
+ GF_CLUSTER_TYPE_MAX
+};
+
+enum gf_bitrot_type {
+ GF_BITROT_OPTION_TYPE_NONE = 0,
+ GF_BITROT_OPTION_TYPE_ENABLE,
+ GF_BITROT_OPTION_TYPE_DISABLE,
+ GF_BITROT_OPTION_TYPE_SCRUB_THROTTLE,
+ GF_BITROT_OPTION_TYPE_SCRUB_FREQ,
+ GF_BITROT_OPTION_TYPE_SCRUB,
+ GF_BITROT_OPTION_TYPE_EXPIRY_TIME,
+ GF_BITROT_CMD_SCRUB_STATUS,
+ GF_BITROT_CMD_SCRUB_ONDEMAND,
+ GF_BITROT_OPTION_TYPE_SIGNER_THREADS,
+ GF_BITROT_OPTION_TYPE_MAX
+};
enum gf1_op_commands {
GF_OP_CMD_NONE = 0,
@@ -37,8 +78,12 @@
GF_OP_CMD_COMMIT,
GF_OP_CMD_STOP,
GF_OP_CMD_STATUS,
- GF_OP_CMD_COMMIT_FORCE
-} ;
+ GF_OP_CMD_COMMIT_FORCE,
+ GF_OP_CMD_DETACH_START,
+ GF_OP_CMD_DETACH_COMMIT,
+ GF_OP_CMD_DETACH_COMMIT_FORCE,
+ GF_OP_CMD_STOP_DETACH_TIER
+};
enum gf_quota_type {
GF_QUOTA_OPTION_TYPE_NONE = 0,
@@ -47,22 +92,34 @@ enum gf_quota_type {
GF_QUOTA_OPTION_TYPE_LIMIT_USAGE,
GF_QUOTA_OPTION_TYPE_REMOVE,
GF_QUOTA_OPTION_TYPE_LIST,
- GF_QUOTA_OPTION_TYPE_VERSION
+ GF_QUOTA_OPTION_TYPE_VERSION,
+ GF_QUOTA_OPTION_TYPE_ALERT_TIME,
+ GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT,
+ GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT,
+ GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT,
+ GF_QUOTA_OPTION_TYPE_VERSION_OBJECTS,
+ GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS,
+ GF_QUOTA_OPTION_TYPE_LIST_OBJECTS,
+ GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS,
+ GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS,
+ GF_QUOTA_OPTION_TYPE_UPGRADE,
+ GF_QUOTA_OPTION_TYPE_MAX
};
enum gf1_cli_friends_list {
- GF_CLI_LIST_ALL = 1
-} ;
+ GF_CLI_LIST_PEERS = 1,
+ GF_CLI_LIST_POOL_NODES = 2
+};
enum gf1_cli_get_volume {
GF_CLI_GET_VOLUME_ALL = 1,
GF_CLI_GET_VOLUME,
GF_CLI_GET_NEXT_VOLUME
-} ;
+};
enum gf1_cli_sync_volume {
GF_CLI_SYNC_ALL = 1
-} ;
+};
enum gf1_cli_op_flags {
GF_CLI_FLAG_OP_FORCE = 1
@@ -74,7 +131,11 @@ enum gf1_cli_gsync_set {
GF_GSYNC_OPTION_TYPE_STOP,
GF_GSYNC_OPTION_TYPE_CONFIG,
GF_GSYNC_OPTION_TYPE_STATUS,
- GF_GSYNC_OPTION_TYPE_ROTATE
+ GF_GSYNC_OPTION_TYPE_ROTATE,
+ GF_GSYNC_OPTION_TYPE_CREATE,
+ GF_GSYNC_OPTION_TYPE_DELETE,
+ GF_GSYNC_OPTION_TYPE_PAUSE,
+ GF_GSYNC_OPTION_TYPE_RESUME
};
enum gf1_cli_stats_op {
@@ -85,6 +146,19 @@ enum gf1_cli_stats_op {
GF_CLI_STATS_TOP = 4
};
+enum gf1_cli_info_op {
+ GF_CLI_INFO_NONE = 0,
+ GF_CLI_INFO_ALL = 1,
+ GF_CLI_INFO_INCREMENTAL = 2,
+ GF_CLI_INFO_CUMULATIVE = 3,
+ GF_CLI_INFO_CLEAR = 4
+};
+
+enum gf_cli_get_state_op {
+ GF_CLI_GET_STATE_DETAIL = 1,
+ GF_CLI_GET_STATE_VOLOPTS = 2
+};
+
enum gf1_cli_top_op {
GF_CLI_TOP_NONE = 0,
GF_CLI_TOP_OPEN,
@@ -99,68 +173,95 @@ enum gf1_cli_top_op {
/* The unconventional hex numbers help us perform
bit-wise operations which reduces complexity */
enum gf_cli_status_type {
- GF_CLI_STATUS_NONE = 0x0000,
- GF_CLI_STATUS_MEM = 0x0001, /*0000000000001*/
- GF_CLI_STATUS_CLIENTS = 0x0002, /*0000000000010*/
- GF_CLI_STATUS_INODE = 0x0004, /*0000000000100*/
- GF_CLI_STATUS_FD = 0x0008, /*0000000001000*/
- GF_CLI_STATUS_CALLPOOL = 0x0010, /*0000000010000*/
- GF_CLI_STATUS_DETAIL = 0x0020, /*0000000100000*/
- GF_CLI_STATUS_MASK = 0x00FF, /*0000011111111 Used to get the op*/
- GF_CLI_STATUS_VOL = 0x0100, /*0000100000000*/
- GF_CLI_STATUS_ALL = 0x0200, /*0001000000000*/
- GF_CLI_STATUS_BRICK = 0x0400, /*0010000000000*/
- GF_CLI_STATUS_NFS = 0x0800, /*0100000000000*/
- GF_CLI_STATUS_SHD = 0x1000 /*1000000000000*/
-};
-
- struct gf_cli_req {
- opaque dict<>;
-} ;
+ GF_CLI_STATUS_NONE = 0x000000,
+ GF_CLI_STATUS_MEM = 0x000001, /*000000000000001*/
+ GF_CLI_STATUS_CLIENTS = 0x000002, /*000000000000010*/
+ GF_CLI_STATUS_INODE = 0x000004, /*000000000000100*/
+ GF_CLI_STATUS_FD = 0x000008, /*000000000001000*/
+ GF_CLI_STATUS_CALLPOOL = 0x000010, /*000000000010000*/
+ GF_CLI_STATUS_DETAIL = 0x000020, /*000000000100000*/
+ GF_CLI_STATUS_TASKS = 0x000040, /*00000001000000*/
+ GF_CLI_STATUS_CLIENT_LIST = 0x000080, /*00000010000000*/
+ GF_CLI_STATUS_MASK = 0x0000FF, /*000000011111111 Used to get the op*/
+ GF_CLI_STATUS_VOL = 0x000100, /*00000000100000000*/
+ GF_CLI_STATUS_ALL = 0x000200, /*00000001000000000*/
+ GF_CLI_STATUS_BRICK = 0x000400, /*00000010000000000*/
+ GF_CLI_STATUS_NFS = 0x000800, /*00000100000000000*/
+ GF_CLI_STATUS_SHD = 0x001000, /*00001000000000000*/
+ GF_CLI_STATUS_QUOTAD = 0x002000, /*00010000000000000*/
+ GF_CLI_STATUS_SNAPD = 0x004000, /*00100000000000000*/
+ GF_CLI_STATUS_BITD = 0x008000, /*01000000000000000*/
+ GF_CLI_STATUS_SCRUB = 0x010000, /*10000000000000000*/
+ GF_CLI_STATUS_TIERD = 0x020000 /*100000000000000000*/
+};
- struct gf_cli_rsp {
- int op_ret;
- int op_errno;
- string op_errstr<>;
- opaque dict<>;
-} ;
+/* Identifiers for snapshot clis */
+enum gf1_cli_snapshot {
+ GF_SNAP_OPTION_TYPE_NONE = 0,
+ GF_SNAP_OPTION_TYPE_CREATE,
+ GF_SNAP_OPTION_TYPE_DELETE,
+ GF_SNAP_OPTION_TYPE_RESTORE,
+ GF_SNAP_OPTION_TYPE_ACTIVATE,
+ GF_SNAP_OPTION_TYPE_DEACTIVATE,
+ GF_SNAP_OPTION_TYPE_LIST,
+ GF_SNAP_OPTION_TYPE_STATUS,
+ GF_SNAP_OPTION_TYPE_CONFIG,
+ GF_SNAP_OPTION_TYPE_CLONE,
+ GF_SNAP_OPTION_TYPE_INFO
+};
- struct gf1_cli_probe_req {
- string hostname<>;
- int port;
-} ;
+enum gf1_cli_snapshot_info {
+ GF_SNAP_INFO_TYPE_ALL = 0,
+ GF_SNAP_INFO_TYPE_SNAP,
+ GF_SNAP_INFO_TYPE_VOL
+};
- struct gf1_cli_probe_rsp {
- int op_ret;
- int op_errno;
- int port;
- string hostname<>;
- string op_errstr<>;
-} ;
+enum gf1_cli_snapshot_config {
+ GF_SNAP_CONFIG_TYPE_NONE = 0,
+ GF_SNAP_CONFIG_TYPE_SET,
+ GF_SNAP_CONFIG_DISPLAY
+};
- struct gf1_cli_deprobe_req {
- string hostname<>;
- int port;
- int flags;
-} ;
+enum gf1_cli_snapshot_status {
+ GF_SNAP_STATUS_TYPE_ALL = 0,
+ GF_SNAP_STATUS_TYPE_SNAP,
+ GF_SNAP_STATUS_TYPE_VOL,
+ GF_SNAP_STATUS_TYPE_ITER
+};
+
+/* Changing order of GF_SNAP_DELETE_TYPE_VOL *
+ * and GF_SNAP_DELETE_TYPE_SNAP so that they don't *
+ * overlap with the enums of GF_SNAP_STATUS_TYPE_SNAP, *
+ * and GF_SNAP_STATUS_TYPE_VOL *
+ */
+enum gf1_cli_snapshot_delete {
+ GF_SNAP_DELETE_TYPE_ALL = 0,
+ GF_SNAP_DELETE_TYPE_VOL = 1,
+ GF_SNAP_DELETE_TYPE_SNAP = 2,
+ GF_SNAP_DELETE_TYPE_ITER = 3
+};
- struct gf1_cli_deprobe_rsp {
+struct gf_cli_req {
+ opaque dict<>;
+};
+
+ struct gf_cli_rsp {
int op_ret;
int op_errno;
- string hostname<>;
string op_errstr<>;
-} ;
+ opaque dict<>;
+};
struct gf1_cli_peer_list_req {
int flags;
opaque dict<>;
-} ;
+};
struct gf1_cli_peer_list_rsp {
int op_ret;
int op_errno;
opaque friends<>;
-} ;
+};
struct gf1_cli_fsm_log_req {
string name<>;
@@ -175,7 +276,7 @@ struct gf1_cli_fsm_log_rsp {
struct gf1_cli_getwd_req {
int unused;
-} ;
+};
struct gf1_cli_getwd_rsp {
int op_ret;
diff --git a/rpc/xdr/src/glusterd1-xdr.c b/rpc/xdr/src/glusterd1-xdr.c
deleted file mode 100644
index 28ab49b6f73..00000000000
--- a/rpc/xdr/src/glusterd1-xdr.c
+++ /dev/null
@@ -1,502 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#include "glusterd1-xdr.h"
-
-bool_t
-xdr_glusterd_volume_status (XDR *xdrs, glusterd_volume_status *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_probe_req (XDR *xdrs, gd1_mgmt_probe_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_probe_rsp (XDR *xdrs, gd1_mgmt_probe_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->port);
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- }
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
-
- } else {
- objp->port = IXDR_GET_LONG(buf);
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- }
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_friend_req (XDR *xdrs, gd1_mgmt_friend_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->vols.vols_val, (u_int *) &objp->vols.vols_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_friend_rsp (XDR *xdrs, gd1_mgmt_friend_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_unfriend_req (XDR *xdrs, gd1_mgmt_unfriend_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_unfriend_rsp (XDR *xdrs, gd1_mgmt_unfriend_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_string (xdrs, &objp->hostname, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_cluster_lock_req (XDR *xdrs, gd1_mgmt_cluster_lock_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_cluster_lock_rsp (XDR *xdrs, gd1_mgmt_cluster_lock_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_cluster_unlock_req (XDR *xdrs, gd1_mgmt_cluster_unlock_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_cluster_unlock_rsp (XDR *xdrs, gd1_mgmt_cluster_unlock_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_stage_op_req (XDR *xdrs, gd1_mgmt_stage_op_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->buf.buf_val, (u_int *) &objp->buf.buf_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_stage_op_rsp (XDR *xdrs, gd1_mgmt_stage_op_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->op);
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- }
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
-
- } else {
- objp->op = IXDR_GET_LONG(buf);
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- }
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_commit_op_req (XDR *xdrs, gd1_mgmt_commit_op_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->buf.buf_val, (u_int *) &objp->buf.buf_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_commit_op_rsp (XDR *xdrs, gd1_mgmt_commit_op_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->op);
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- }
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
-
- } else {
- objp->op = IXDR_GET_LONG(buf);
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- }
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_friend_update (XDR *xdrs, gd1_mgmt_friend_update *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->friends.friends_val, (u_int *) &objp->friends.friends_len, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_friend_update_rsp (XDR *xdrs, gd1_mgmt_friend_update_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_vector (xdrs, (char *)objp->uuid, 16,
- sizeof (u_char), (xdrproc_t) xdr_u_char))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_brick_op_req (XDR *xdrs, gd1_mgmt_brick_op_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->input.input_val, (u_int *) &objp->input.input_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gd1_mgmt_brick_op_rsp (XDR *xdrs, gd1_mgmt_brick_op_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->output.output_val, (u_int *) &objp->output.output_len, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->op_errstr, ~0))
- return FALSE;
- return TRUE;
-}
diff --git a/rpc/xdr/src/glusterd1-xdr.h b/rpc/xdr/src/glusterd1-xdr.h
deleted file mode 100644
index 89de04ebef2..00000000000
--- a/rpc/xdr/src/glusterd1-xdr.h
+++ /dev/null
@@ -1,264 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#ifndef _GLUSTERD1_XDR_H_RPCGEN
-#define _GLUSTERD1_XDR_H_RPCGEN
-
-#include <rpc/rpc.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-enum glusterd_volume_status {
- GLUSTERD_STATUS_NONE = 0,
- GLUSTERD_STATUS_STARTED = 0 + 1,
- GLUSTERD_STATUS_STOPPED = 0 + 2,
-};
-typedef enum glusterd_volume_status glusterd_volume_status;
-
-struct gd1_mgmt_probe_req {
- u_char uuid[16];
- char *hostname;
- int port;
-};
-typedef struct gd1_mgmt_probe_req gd1_mgmt_probe_req;
-
-struct gd1_mgmt_probe_rsp {
- u_char uuid[16];
- char *hostname;
- int port;
- int op_ret;
- int op_errno;
- char *op_errstr;
-};
-typedef struct gd1_mgmt_probe_rsp gd1_mgmt_probe_rsp;
-
-struct gd1_mgmt_friend_req {
- u_char uuid[16];
- char *hostname;
- int port;
- struct {
- u_int vols_len;
- char *vols_val;
- } vols;
-};
-typedef struct gd1_mgmt_friend_req gd1_mgmt_friend_req;
-
-struct gd1_mgmt_friend_rsp {
- u_char uuid[16];
- char *hostname;
- int op_ret;
- int op_errno;
- int port;
-};
-typedef struct gd1_mgmt_friend_rsp gd1_mgmt_friend_rsp;
-
-struct gd1_mgmt_unfriend_req {
- u_char uuid[16];
- char *hostname;
- int port;
-};
-typedef struct gd1_mgmt_unfriend_req gd1_mgmt_unfriend_req;
-
-struct gd1_mgmt_unfriend_rsp {
- u_char uuid[16];
- char *hostname;
- int op_ret;
- int op_errno;
- int port;
-};
-typedef struct gd1_mgmt_unfriend_rsp gd1_mgmt_unfriend_rsp;
-
-struct gd1_mgmt_cluster_lock_req {
- u_char uuid[16];
-};
-typedef struct gd1_mgmt_cluster_lock_req gd1_mgmt_cluster_lock_req;
-
-struct gd1_mgmt_cluster_lock_rsp {
- u_char uuid[16];
- int op_ret;
- int op_errno;
-};
-typedef struct gd1_mgmt_cluster_lock_rsp gd1_mgmt_cluster_lock_rsp;
-
-struct gd1_mgmt_cluster_unlock_req {
- u_char uuid[16];
-};
-typedef struct gd1_mgmt_cluster_unlock_req gd1_mgmt_cluster_unlock_req;
-
-struct gd1_mgmt_cluster_unlock_rsp {
- u_char uuid[16];
- int op_ret;
- int op_errno;
-};
-typedef struct gd1_mgmt_cluster_unlock_rsp gd1_mgmt_cluster_unlock_rsp;
-
-struct gd1_mgmt_stage_op_req {
- u_char uuid[16];
- int op;
- struct {
- u_int buf_len;
- char *buf_val;
- } buf;
-};
-typedef struct gd1_mgmt_stage_op_req gd1_mgmt_stage_op_req;
-
-struct gd1_mgmt_stage_op_rsp {
- u_char uuid[16];
- int op;
- int op_ret;
- int op_errno;
- char *op_errstr;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gd1_mgmt_stage_op_rsp gd1_mgmt_stage_op_rsp;
-
-struct gd1_mgmt_commit_op_req {
- u_char uuid[16];
- int op;
- struct {
- u_int buf_len;
- char *buf_val;
- } buf;
-};
-typedef struct gd1_mgmt_commit_op_req gd1_mgmt_commit_op_req;
-
-struct gd1_mgmt_commit_op_rsp {
- u_char uuid[16];
- int op;
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- char *op_errstr;
-};
-typedef struct gd1_mgmt_commit_op_rsp gd1_mgmt_commit_op_rsp;
-
-struct gd1_mgmt_friend_update {
- u_char uuid[16];
- struct {
- u_int friends_len;
- char *friends_val;
- } friends;
- int port;
-};
-typedef struct gd1_mgmt_friend_update gd1_mgmt_friend_update;
-
-struct gd1_mgmt_friend_update_rsp {
- u_char uuid[16];
- int op;
- int op_ret;
- int op_errno;
-};
-typedef struct gd1_mgmt_friend_update_rsp gd1_mgmt_friend_update_rsp;
-
-struct gd1_mgmt_brick_op_req {
- char *name;
- int op;
- struct {
- u_int input_len;
- char *input_val;
- } input;
-};
-typedef struct gd1_mgmt_brick_op_req gd1_mgmt_brick_op_req;
-
-struct gd1_mgmt_brick_op_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int output_len;
- char *output_val;
- } output;
- char *op_errstr;
-};
-typedef struct gd1_mgmt_brick_op_rsp gd1_mgmt_brick_op_rsp;
-
-/* the xdr functions */
-
-#if defined(__STDC__) || defined(__cplusplus)
-extern bool_t xdr_glusterd_volume_status (XDR *, glusterd_volume_status*);
-extern bool_t xdr_gd1_mgmt_probe_req (XDR *, gd1_mgmt_probe_req*);
-extern bool_t xdr_gd1_mgmt_probe_rsp (XDR *, gd1_mgmt_probe_rsp*);
-extern bool_t xdr_gd1_mgmt_friend_req (XDR *, gd1_mgmt_friend_req*);
-extern bool_t xdr_gd1_mgmt_friend_rsp (XDR *, gd1_mgmt_friend_rsp*);
-extern bool_t xdr_gd1_mgmt_unfriend_req (XDR *, gd1_mgmt_unfriend_req*);
-extern bool_t xdr_gd1_mgmt_unfriend_rsp (XDR *, gd1_mgmt_unfriend_rsp*);
-extern bool_t xdr_gd1_mgmt_cluster_lock_req (XDR *, gd1_mgmt_cluster_lock_req*);
-extern bool_t xdr_gd1_mgmt_cluster_lock_rsp (XDR *, gd1_mgmt_cluster_lock_rsp*);
-extern bool_t xdr_gd1_mgmt_cluster_unlock_req (XDR *, gd1_mgmt_cluster_unlock_req*);
-extern bool_t xdr_gd1_mgmt_cluster_unlock_rsp (XDR *, gd1_mgmt_cluster_unlock_rsp*);
-extern bool_t xdr_gd1_mgmt_stage_op_req (XDR *, gd1_mgmt_stage_op_req*);
-extern bool_t xdr_gd1_mgmt_stage_op_rsp (XDR *, gd1_mgmt_stage_op_rsp*);
-extern bool_t xdr_gd1_mgmt_commit_op_req (XDR *, gd1_mgmt_commit_op_req*);
-extern bool_t xdr_gd1_mgmt_commit_op_rsp (XDR *, gd1_mgmt_commit_op_rsp*);
-extern bool_t xdr_gd1_mgmt_friend_update (XDR *, gd1_mgmt_friend_update*);
-extern bool_t xdr_gd1_mgmt_friend_update_rsp (XDR *, gd1_mgmt_friend_update_rsp*);
-extern bool_t xdr_gd1_mgmt_brick_op_req (XDR *, gd1_mgmt_brick_op_req*);
-extern bool_t xdr_gd1_mgmt_brick_op_rsp (XDR *, gd1_mgmt_brick_op_rsp*);
-
-#else /* K&R C */
-extern bool_t xdr_glusterd_volume_status ();
-extern bool_t xdr_gd1_mgmt_probe_req ();
-extern bool_t xdr_gd1_mgmt_probe_rsp ();
-extern bool_t xdr_gd1_mgmt_friend_req ();
-extern bool_t xdr_gd1_mgmt_friend_rsp ();
-extern bool_t xdr_gd1_mgmt_unfriend_req ();
-extern bool_t xdr_gd1_mgmt_unfriend_rsp ();
-extern bool_t xdr_gd1_mgmt_cluster_lock_req ();
-extern bool_t xdr_gd1_mgmt_cluster_lock_rsp ();
-extern bool_t xdr_gd1_mgmt_cluster_unlock_req ();
-extern bool_t xdr_gd1_mgmt_cluster_unlock_rsp ();
-extern bool_t xdr_gd1_mgmt_stage_op_req ();
-extern bool_t xdr_gd1_mgmt_stage_op_rsp ();
-extern bool_t xdr_gd1_mgmt_commit_op_req ();
-extern bool_t xdr_gd1_mgmt_commit_op_rsp ();
-extern bool_t xdr_gd1_mgmt_friend_update ();
-extern bool_t xdr_gd1_mgmt_friend_update_rsp ();
-extern bool_t xdr_gd1_mgmt_brick_op_req ();
-extern bool_t xdr_gd1_mgmt_brick_op_rsp ();
-
-#endif /* K&R C */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_GLUSTERD1_XDR_H_RPCGEN */
diff --git a/rpc/xdr/src/glusterd1-xdr.x b/rpc/xdr/src/glusterd1-xdr.x
index fc1bb58b4a8..b631dea3502 100644
--- a/rpc/xdr/src/glusterd1-xdr.x
+++ b/rpc/xdr/src/glusterd1-xdr.x
@@ -1,3 +1,18 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/compat.h>
+
enum glusterd_volume_status {
GLUSTERD_STATUS_NONE = 0,
GLUSTERD_STATUS_STARTED,
@@ -117,6 +132,7 @@ struct gd1_mgmt_brick_op_req {
string name<>;
int op;
opaque input<>;
+ opaque dict<>;
} ;
struct gd1_mgmt_brick_op_rsp {
@@ -125,3 +141,109 @@ struct gd1_mgmt_brick_op_rsp {
opaque output<>;
string op_errstr<>;
} ;
+
+struct gd1_mgmt_v3_lock_req {
+ unsigned char uuid[16];
+ unsigned char txn_id[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_lock_rsp {
+ unsigned char uuid[16];
+ unsigned char txn_id[16];
+ opaque dict<>;
+ int op_ret;
+ int op_errno;
+} ;
+
+struct gd1_mgmt_v3_pre_val_req {
+ unsigned char uuid[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_pre_val_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ string op_errstr<>;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_brick_op_req {
+ unsigned char uuid[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_brick_op_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ string op_errstr<>;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_commit_req {
+ unsigned char uuid[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_commit_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ opaque dict<>;
+ string op_errstr<>;
+} ;
+
+struct gd1_mgmt_v3_post_commit_req {
+ unsigned char uuid[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_post_commit_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ opaque dict<>;
+ string op_errstr<>;
+} ;
+
+struct gd1_mgmt_v3_post_val_req {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_post_val_rsp {
+ unsigned char uuid[16];
+ int op;
+ int op_ret;
+ int op_errno;
+ string op_errstr<>;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_unlock_req {
+ unsigned char uuid[16];
+ unsigned char txn_id[16];
+ int op;
+ opaque dict<>;
+} ;
+
+struct gd1_mgmt_v3_unlock_rsp {
+ unsigned char uuid[16];
+ unsigned char txn_id[16];
+ opaque dict<>;
+ int op_ret;
+ int op_errno;
+} ;
diff --git a/rpc/xdr/src/glusterfs3-xdr.c b/rpc/xdr/src/glusterfs3-xdr.c
deleted file mode 100644
index dd8281ee223..00000000000
--- a/rpc/xdr/src/glusterfs3-xdr.c
+++ /dev/null
@@ -1,1922 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#include "glusterfs3-xdr.h"
-
-bool_t
-xdr_gf_statfs (XDR *xdrs, gf_statfs *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_u_quad_t (xdrs, &objp->bsize))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->frsize))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->blocks))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->bfree))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->bavail))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->files))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ffree))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->favail))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->fsid))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->flag))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->namemax))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_proto_flock (XDR *xdrs, gf_proto_flock *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->whence))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->start))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->len))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->pid))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->lk_owner.lk_owner_val, (u_int *) &objp->lk_owner.lk_owner_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_iatt (XDR *xdrs, gf_iatt *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_opaque (xdrs, objp->ia_gfid, 16))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_dev))
- return FALSE;
- buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_nlink))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_gid))
- return FALSE;
-
- } else {
- IXDR_PUT_U_LONG(buf, objp->mode);
- IXDR_PUT_U_LONG(buf, objp->ia_nlink);
- IXDR_PUT_U_LONG(buf, objp->ia_uid);
- IXDR_PUT_U_LONG(buf, objp->ia_gid);
- }
- if (!xdr_u_quad_t (xdrs, &objp->ia_rdev))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_size))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_blksize))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_blocks))
- return FALSE;
- buf = XDR_INLINE (xdrs, 6 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->ia_atime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_atime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime_nsec))
- return FALSE;
- } else {
- IXDR_PUT_U_LONG(buf, objp->ia_atime);
- IXDR_PUT_U_LONG(buf, objp->ia_atime_nsec);
- IXDR_PUT_U_LONG(buf, objp->ia_mtime);
- IXDR_PUT_U_LONG(buf, objp->ia_mtime_nsec);
- IXDR_PUT_U_LONG(buf, objp->ia_ctime);
- IXDR_PUT_U_LONG(buf, objp->ia_ctime_nsec);
- }
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_opaque (xdrs, objp->ia_gfid, 16))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_dev))
- return FALSE;
- buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_nlink))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_gid))
- return FALSE;
-
- } else {
- objp->mode = IXDR_GET_U_LONG(buf);
- objp->ia_nlink = IXDR_GET_U_LONG(buf);
- objp->ia_uid = IXDR_GET_U_LONG(buf);
- objp->ia_gid = IXDR_GET_U_LONG(buf);
- }
- if (!xdr_u_quad_t (xdrs, &objp->ia_rdev))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_size))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_blksize))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_blocks))
- return FALSE;
- buf = XDR_INLINE (xdrs, 6 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->ia_atime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_atime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime_nsec))
- return FALSE;
- } else {
- objp->ia_atime = IXDR_GET_U_LONG(buf);
- objp->ia_atime_nsec = IXDR_GET_U_LONG(buf);
- objp->ia_mtime = IXDR_GET_U_LONG(buf);
- objp->ia_mtime_nsec = IXDR_GET_U_LONG(buf);
- objp->ia_ctime = IXDR_GET_U_LONG(buf);
- objp->ia_ctime_nsec = IXDR_GET_U_LONG(buf);
- }
- return TRUE;
- }
-
- if (!xdr_opaque (xdrs, objp->ia_gfid, 16))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_dev))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_nlink))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_gid))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_rdev))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_size))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_blksize))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->ia_blocks))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_atime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_atime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_mtime_nsec))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ia_ctime_nsec))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_stat_req (XDR *xdrs, gfs3_stat_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_stat_rsp (XDR *xdrs, gfs3_stat_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readlink_req (XDR *xdrs, gfs3_readlink_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readlink_rsp (XDR *xdrs, gfs3_readlink_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->buf))
- return FALSE;
- if (!xdr_string (xdrs, &objp->path, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_mknod_req (XDR *xdrs, gfs3_mknod_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->dev))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->umask))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_mknod_rsp (XDR *xdrs, gfs3_mknod_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_mkdir_req (XDR *xdrs, gfs3_mkdir_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->umask))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_mkdir_rsp (XDR *xdrs, gfs3_mkdir_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_unlink_req (XDR *xdrs, gfs3_unlink_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->xflags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_unlink_rsp (XDR *xdrs, gfs3_unlink_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rmdir_req (XDR *xdrs, gfs3_rmdir_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- if (!xdr_int (xdrs, &objp->xflags))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rmdir_rsp (XDR *xdrs, gfs3_rmdir_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_symlink_req (XDR *xdrs, gfs3_symlink_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->umask))
- return FALSE;
- if (!xdr_string (xdrs, &objp->linkname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_symlink_rsp (XDR *xdrs, gfs3_symlink_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rename_req (XDR *xdrs, gfs3_rename_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->oldgfid, 16))
- return FALSE;
- if (!xdr_opaque (xdrs, objp->newgfid, 16))
- return FALSE;
- if (!xdr_string (xdrs, &objp->oldbname, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->newbname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rename_rsp (XDR *xdrs, gfs3_rename_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preoldparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postoldparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->prenewparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postnewparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_link_req (XDR *xdrs, gfs3_link_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->oldgfid, 16))
- return FALSE;
- if (!xdr_opaque (xdrs, objp->newgfid, 16))
- return FALSE;
- if (!xdr_string (xdrs, &objp->newbname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_link_rsp (XDR *xdrs, gfs3_link_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_truncate_req (XDR *xdrs, gfs3_truncate_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_truncate_rsp (XDR *xdrs, gfs3_truncate_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->prestat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->poststat))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_open_req (XDR *xdrs, gfs3_open_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_open_rsp (XDR *xdrs, gfs3_open_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_read_req (XDR *xdrs, gfs3_read_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flag))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_read_rsp (XDR *xdrs, gfs3_read_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_lookup_req (XDR *xdrs, gfs3_lookup_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_lookup_rsp (XDR *xdrs, gfs3_lookup_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_write_req (XDR *xdrs, gfs3_write_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flag))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_write_rsp (XDR *xdrs, gfs3_write_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->prestat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->poststat))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_statfs_req (XDR *xdrs, gfs3_statfs_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_statfs_rsp (XDR *xdrs, gfs3_statfs_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_statfs (xdrs, &objp->statfs))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_lk_req (XDR *xdrs, gfs3_lk_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->cmd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_gf_proto_flock (xdrs, &objp->flock))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_lk_rsp (XDR *xdrs, gfs3_lk_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_proto_flock (xdrs, &objp->flock))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_inodelk_req (XDR *xdrs, gfs3_inodelk_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->cmd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_gf_proto_flock (xdrs, &objp->flock))
- return FALSE;
- if (!xdr_string (xdrs, &objp->volume, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_finodelk_req (XDR *xdrs, gfs3_finodelk_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->cmd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_gf_proto_flock (xdrs, &objp->flock))
- return FALSE;
- if (!xdr_string (xdrs, &objp->volume, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_flush_req (XDR *xdrs, gfs3_flush_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsync_req (XDR *xdrs, gfs3_fsync_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->data))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsync_rsp (XDR *xdrs, gfs3_fsync_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->prestat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->poststat))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_setxattr_req (XDR *xdrs, gfs3_setxattr_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsetxattr_req (XDR *xdrs, gfs3_fsetxattr_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_xattrop_req (XDR *xdrs, gfs3_xattrop_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_xattrop_rsp (XDR *xdrs, gfs3_xattrop_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fxattrop_req (XDR *xdrs, gfs3_fxattrop_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fxattrop_rsp (XDR *xdrs, gfs3_fxattrop_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_getxattr_req (XDR *xdrs, gfs3_getxattr_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->namelen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_getxattr_rsp (XDR *xdrs, gfs3_getxattr_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fgetxattr_req (XDR *xdrs, gfs3_fgetxattr_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->namelen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fgetxattr_rsp (XDR *xdrs, gfs3_fgetxattr_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_removexattr_req (XDR *xdrs, gfs3_removexattr_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fremovexattr_req (XDR *xdrs, gfs3_fremovexattr_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_opendir_req (XDR *xdrs, gfs3_opendir_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_opendir_rsp (XDR *xdrs, gfs3_opendir_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsyncdir_req (XDR *xdrs, gfs3_fsyncdir_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_int (xdrs, &objp->data))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readdir_req (XDR *xdrs, gfs3_readdir_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readdirp_req (XDR *xdrs, gfs3_readdirp_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->size))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_setvolume_req (XDR *xdrs, gf_setvolume_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_setvolume_rsp (XDR *xdrs, gf_setvolume_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_access_req (XDR *xdrs, gfs3_access_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mask))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_create_req (XDR *xdrs, gfs3_create_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->umask))
- return FALSE;
-
- } else {
- IXDR_PUT_U_LONG(buf, objp->flags);
- IXDR_PUT_U_LONG(buf, objp->mode);
- IXDR_PUT_U_LONG(buf, objp->umask);
- }
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->umask))
- return FALSE;
-
- } else {
- objp->flags = IXDR_GET_U_LONG(buf);
- objp->mode = IXDR_GET_U_LONG(buf);
- objp->umask = IXDR_GET_U_LONG(buf);
- }
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_opaque (xdrs, objp->pargfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->umask))
- return FALSE;
- if (!xdr_string (xdrs, &objp->bname, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_create_rsp (XDR *xdrs, gfs3_create_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->preparent))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->postparent))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_ftruncate_req (XDR *xdrs, gfs3_ftruncate_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_ftruncate_rsp (XDR *xdrs, gfs3_ftruncate_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->prestat))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->poststat))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fstat_req (XDR *xdrs, gfs3_fstat_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fstat_rsp (XDR *xdrs, gfs3_fstat_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_entrylk_req (XDR *xdrs, gfs3_entrylk_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->cmd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->namelen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->volume, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fentrylk_req (XDR *xdrs, gfs3_fentrylk_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->cmd))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->type))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->namelen))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_string (xdrs, &objp->volume, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_setattr_req (XDR *xdrs, gfs3_setattr_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stbuf))
- return FALSE;
- if (!xdr_int (xdrs, &objp->valid))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_setattr_rsp (XDR *xdrs, gfs3_setattr_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->statpre))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->statpost))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsetattr_req (XDR *xdrs, gfs3_fsetattr_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stbuf))
- return FALSE;
- if (!xdr_int (xdrs, &objp->valid))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_fsetattr_rsp (XDR *xdrs, gfs3_fsetattr_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->statpre))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->statpost))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rchecksum_req (XDR *xdrs, gfs3_rchecksum_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->len))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_rchecksum_rsp (XDR *xdrs, gfs3_rchecksum_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->weak_checksum))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- IXDR_PUT_U_LONG(buf, objp->weak_checksum);
- }
- if (!xdr_bytes (xdrs, (char **)&objp->strong_checksum.strong_checksum_val, (u_int *) &objp->strong_checksum.strong_checksum_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->weak_checksum))
- return FALSE;
-
- } else {
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- objp->weak_checksum = IXDR_GET_U_LONG(buf);
- }
- if (!xdr_bytes (xdrs, (char **)&objp->strong_checksum.strong_checksum_val, (u_int *) &objp->strong_checksum.strong_checksum_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->weak_checksum))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->strong_checksum.strong_checksum_val, (u_int *) &objp->strong_checksum.strong_checksum_len, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_getspec_req (XDR *xdrs, gf_getspec_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_string (xdrs, &objp->key, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_getspec_rsp (XDR *xdrs, gf_getspec_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_string (xdrs, &objp->spec, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_log_req (XDR *xdrs, gf_log_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_bytes (xdrs, (char **)&objp->msg.msg_val, (u_int *) &objp->msg.msg_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_notify_req (XDR *xdrs, gf_notify_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_string (xdrs, &objp->buf, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_notify_rsp (XDR *xdrs, gf_notify_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- IXDR_PUT_U_LONG(buf, objp->flags);
- }
- if (!xdr_string (xdrs, &objp->buf, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
-
- } else {
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- objp->flags = IXDR_GET_U_LONG(buf);
- }
- if (!xdr_string (xdrs, &objp->buf, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->flags))
- return FALSE;
- if (!xdr_string (xdrs, &objp->buf, ~0))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_releasedir_req (XDR *xdrs, gfs3_releasedir_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_release_req (XDR *xdrs, gfs3_release_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_opaque (xdrs, objp->gfid, 16))
- return FALSE;
- if (!xdr_quad_t (xdrs, &objp->fd))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_common_rsp (XDR *xdrs, gf_common_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_dirlist (XDR *xdrs, gfs3_dirlist *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_u_quad_t (xdrs, &objp->d_ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->d_off))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->d_len))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->d_type))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (gfs3_dirlist), (xdrproc_t) xdr_gfs3_dirlist))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readdir_rsp (XDR *xdrs, gfs3_readdir_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->reply, sizeof (gfs3_dirlist), (xdrproc_t) xdr_gfs3_dirlist))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_dirplist (XDR *xdrs, gfs3_dirplist *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_u_quad_t (xdrs, &objp->d_ino))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->d_off))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->d_len))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->d_type))
- return FALSE;
- if (!xdr_string (xdrs, &objp->name, ~0))
- return FALSE;
- if (!xdr_gf_iatt (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (gfs3_dirplist), (xdrproc_t) xdr_gfs3_dirplist))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gfs3_readdirp_rsp (XDR *xdrs, gfs3_readdirp_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->reply, sizeof (gfs3_dirplist), (xdrproc_t) xdr_gfs3_dirplist))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->xdata.xdata_val, (u_int *) &objp->xdata.xdata_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_set_lk_ver_rsp (XDR *xdrs, gf_set_lk_ver_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->lk_ver))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_set_lk_ver_req (XDR *xdrs, gf_set_lk_ver_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->uid, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->lk_ver))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_event_notify_req (XDR *xdrs, gf_event_notify_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_event_notify_rsp (XDR *xdrs, gf_event_notify_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->dict.dict_val, (u_int *) &objp->dict.dict_len, ~0))
- return FALSE;
- return TRUE;
-}
diff --git a/rpc/xdr/src/glusterfs3-xdr.h b/rpc/xdr/src/glusterfs3-xdr.h
deleted file mode 100644
index ed29deea271..00000000000
--- a/rpc/xdr/src/glusterfs3-xdr.h
+++ /dev/null
@@ -1,1295 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#ifndef _GLUSTERFS3_XDR_H_RPCGEN
-#define _GLUSTERFS3_XDR_H_RPCGEN
-
-#include <rpc/rpc.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-struct gf_statfs {
- u_quad_t bsize;
- u_quad_t frsize;
- u_quad_t blocks;
- u_quad_t bfree;
- u_quad_t bavail;
- u_quad_t files;
- u_quad_t ffree;
- u_quad_t favail;
- u_quad_t fsid;
- u_quad_t flag;
- u_quad_t namemax;
-};
-typedef struct gf_statfs gf_statfs;
-
-struct gf_proto_flock {
- u_int type;
- u_int whence;
- u_quad_t start;
- u_quad_t len;
- u_int pid;
- struct {
- u_int lk_owner_len;
- char *lk_owner_val;
- } lk_owner;
-};
-typedef struct gf_proto_flock gf_proto_flock;
-
-struct gf_iatt {
- char ia_gfid[16];
- u_quad_t ia_ino;
- u_quad_t ia_dev;
- u_int mode;
- u_int ia_nlink;
- u_int ia_uid;
- u_int ia_gid;
- u_quad_t ia_rdev;
- u_quad_t ia_size;
- u_int ia_blksize;
- u_quad_t ia_blocks;
- u_int ia_atime;
- u_int ia_atime_nsec;
- u_int ia_mtime;
- u_int ia_mtime_nsec;
- u_int ia_ctime;
- u_int ia_ctime_nsec;
-};
-typedef struct gf_iatt gf_iatt;
-
-struct gfs3_stat_req {
- char gfid[16];
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_stat_req gfs3_stat_req;
-
-struct gfs3_stat_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_stat_rsp gfs3_stat_rsp;
-
-struct gfs3_readlink_req {
- char gfid[16];
- u_int size;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_readlink_req gfs3_readlink_req;
-
-struct gfs3_readlink_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt buf;
- char *path;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_readlink_rsp gfs3_readlink_rsp;
-
-struct gfs3_mknod_req {
- char pargfid[16];
- u_quad_t dev;
- u_int mode;
- u_int umask;
- char *bname;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_mknod_req gfs3_mknod_req;
-
-struct gfs3_mknod_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_mknod_rsp gfs3_mknod_rsp;
-
-struct gfs3_mkdir_req {
- char pargfid[16];
- u_int mode;
- u_int umask;
- char *bname;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_mkdir_req gfs3_mkdir_req;
-
-struct gfs3_mkdir_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_mkdir_rsp gfs3_mkdir_rsp;
-
-struct gfs3_unlink_req {
- char pargfid[16];
- char *bname;
- u_int xflags;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_unlink_req gfs3_unlink_req;
-
-struct gfs3_unlink_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_unlink_rsp gfs3_unlink_rsp;
-
-struct gfs3_rmdir_req {
- char pargfid[16];
- int xflags;
- char *bname;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_rmdir_req gfs3_rmdir_req;
-
-struct gfs3_rmdir_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_rmdir_rsp gfs3_rmdir_rsp;
-
-struct gfs3_symlink_req {
- char pargfid[16];
- char *bname;
- u_int umask;
- char *linkname;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_symlink_req gfs3_symlink_req;
-
-struct gfs3_symlink_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_symlink_rsp gfs3_symlink_rsp;
-
-struct gfs3_rename_req {
- char oldgfid[16];
- char newgfid[16];
- char *oldbname;
- char *newbname;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_rename_req gfs3_rename_req;
-
-struct gfs3_rename_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preoldparent;
- struct gf_iatt postoldparent;
- struct gf_iatt prenewparent;
- struct gf_iatt postnewparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_rename_rsp gfs3_rename_rsp;
-
-struct gfs3_link_req {
- char oldgfid[16];
- char newgfid[16];
- char *newbname;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_link_req gfs3_link_req;
-
-struct gfs3_link_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_link_rsp gfs3_link_rsp;
-
-struct gfs3_truncate_req {
- char gfid[16];
- u_quad_t offset;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_truncate_req gfs3_truncate_req;
-
-struct gfs3_truncate_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_truncate_rsp gfs3_truncate_rsp;
-
-struct gfs3_open_req {
- char gfid[16];
- u_int flags;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_open_req gfs3_open_req;
-
-struct gfs3_open_rsp {
- int op_ret;
- int op_errno;
- quad_t fd;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_open_rsp gfs3_open_rsp;
-
-struct gfs3_read_req {
- char gfid[16];
- quad_t fd;
- u_quad_t offset;
- u_int size;
- u_int flag;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_read_req gfs3_read_req;
-
-struct gfs3_read_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- u_int size;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_read_rsp gfs3_read_rsp;
-
-struct gfs3_lookup_req {
- char gfid[16];
- char pargfid[16];
- u_int flags;
- char *bname;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_lookup_req gfs3_lookup_req;
-
-struct gfs3_lookup_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct gf_iatt postparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_lookup_rsp gfs3_lookup_rsp;
-
-struct gfs3_write_req {
- char gfid[16];
- quad_t fd;
- u_quad_t offset;
- u_int size;
- u_int flag;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_write_req gfs3_write_req;
-
-struct gfs3_write_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_write_rsp gfs3_write_rsp;
-
-struct gfs3_statfs_req {
- char gfid[16];
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_statfs_req gfs3_statfs_req;
-
-struct gfs3_statfs_rsp {
- int op_ret;
- int op_errno;
- struct gf_statfs statfs;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_statfs_rsp gfs3_statfs_rsp;
-
-struct gfs3_lk_req {
- char gfid[16];
- quad_t fd;
- u_int cmd;
- u_int type;
- struct gf_proto_flock flock;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_lk_req gfs3_lk_req;
-
-struct gfs3_lk_rsp {
- int op_ret;
- int op_errno;
- struct gf_proto_flock flock;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_lk_rsp gfs3_lk_rsp;
-
-struct gfs3_inodelk_req {
- char gfid[16];
- u_int cmd;
- u_int type;
- struct gf_proto_flock flock;
- char *volume;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_inodelk_req gfs3_inodelk_req;
-
-struct gfs3_finodelk_req {
- char gfid[16];
- quad_t fd;
- u_int cmd;
- u_int type;
- struct gf_proto_flock flock;
- char *volume;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_finodelk_req gfs3_finodelk_req;
-
-struct gfs3_flush_req {
- char gfid[16];
- quad_t fd;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_flush_req gfs3_flush_req;
-
-struct gfs3_fsync_req {
- char gfid[16];
- quad_t fd;
- u_int data;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fsync_req gfs3_fsync_req;
-
-struct gfs3_fsync_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fsync_rsp gfs3_fsync_rsp;
-
-struct gfs3_setxattr_req {
- char gfid[16];
- u_int flags;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_setxattr_req gfs3_setxattr_req;
-
-struct gfs3_fsetxattr_req {
- char gfid[16];
- quad_t fd;
- u_int flags;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fsetxattr_req gfs3_fsetxattr_req;
-
-struct gfs3_xattrop_req {
- char gfid[16];
- u_int flags;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_xattrop_req gfs3_xattrop_req;
-
-struct gfs3_xattrop_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_xattrop_rsp gfs3_xattrop_rsp;
-
-struct gfs3_fxattrop_req {
- char gfid[16];
- quad_t fd;
- u_int flags;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fxattrop_req gfs3_fxattrop_req;
-
-struct gfs3_fxattrop_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fxattrop_rsp gfs3_fxattrop_rsp;
-
-struct gfs3_getxattr_req {
- char gfid[16];
- u_int namelen;
- char *name;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_getxattr_req gfs3_getxattr_req;
-
-struct gfs3_getxattr_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_getxattr_rsp gfs3_getxattr_rsp;
-
-struct gfs3_fgetxattr_req {
- char gfid[16];
- quad_t fd;
- u_int namelen;
- char *name;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fgetxattr_req gfs3_fgetxattr_req;
-
-struct gfs3_fgetxattr_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fgetxattr_rsp gfs3_fgetxattr_rsp;
-
-struct gfs3_removexattr_req {
- char gfid[16];
- char *name;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_removexattr_req gfs3_removexattr_req;
-
-struct gfs3_fremovexattr_req {
- char gfid[16];
- quad_t fd;
- char *name;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fremovexattr_req gfs3_fremovexattr_req;
-
-struct gfs3_opendir_req {
- char gfid[16];
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_opendir_req gfs3_opendir_req;
-
-struct gfs3_opendir_rsp {
- int op_ret;
- int op_errno;
- quad_t fd;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_opendir_rsp gfs3_opendir_rsp;
-
-struct gfs3_fsyncdir_req {
- char gfid[16];
- quad_t fd;
- int data;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fsyncdir_req gfs3_fsyncdir_req;
-
-struct gfs3_readdir_req {
- char gfid[16];
- quad_t fd;
- u_quad_t offset;
- u_int size;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_readdir_req gfs3_readdir_req;
-
-struct gfs3_readdirp_req {
- char gfid[16];
- quad_t fd;
- u_quad_t offset;
- u_int size;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gfs3_readdirp_req gfs3_readdirp_req;
-
-struct gf_setvolume_req {
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf_setvolume_req gf_setvolume_req;
-
-struct gf_setvolume_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf_setvolume_rsp gf_setvolume_rsp;
-
-struct gfs3_access_req {
- char gfid[16];
- u_int mask;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_access_req gfs3_access_req;
-
-struct gfs3_create_req {
- char pargfid[16];
- u_int flags;
- u_int mode;
- u_int umask;
- char *bname;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_create_req gfs3_create_req;
-
-struct gfs3_create_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- u_quad_t fd;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_create_rsp gfs3_create_rsp;
-
-struct gfs3_ftruncate_req {
- char gfid[16];
- quad_t fd;
- u_quad_t offset;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_ftruncate_req gfs3_ftruncate_req;
-
-struct gfs3_ftruncate_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_ftruncate_rsp gfs3_ftruncate_rsp;
-
-struct gfs3_fstat_req {
- char gfid[16];
- quad_t fd;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fstat_req gfs3_fstat_req;
-
-struct gfs3_fstat_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt stat;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fstat_rsp gfs3_fstat_rsp;
-
-struct gfs3_entrylk_req {
- char gfid[16];
- u_int cmd;
- u_int type;
- u_quad_t namelen;
- char *name;
- char *volume;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_entrylk_req gfs3_entrylk_req;
-
-struct gfs3_fentrylk_req {
- char gfid[16];
- quad_t fd;
- u_int cmd;
- u_int type;
- u_quad_t namelen;
- char *name;
- char *volume;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fentrylk_req gfs3_fentrylk_req;
-
-struct gfs3_setattr_req {
- char gfid[16];
- struct gf_iatt stbuf;
- int valid;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_setattr_req gfs3_setattr_req;
-
-struct gfs3_setattr_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt statpre;
- struct gf_iatt statpost;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_setattr_rsp gfs3_setattr_rsp;
-
-struct gfs3_fsetattr_req {
- quad_t fd;
- struct gf_iatt stbuf;
- int valid;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fsetattr_req gfs3_fsetattr_req;
-
-struct gfs3_fsetattr_rsp {
- int op_ret;
- int op_errno;
- struct gf_iatt statpre;
- struct gf_iatt statpost;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_fsetattr_rsp gfs3_fsetattr_rsp;
-
-struct gfs3_rchecksum_req {
- quad_t fd;
- u_quad_t offset;
- u_int len;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_rchecksum_req gfs3_rchecksum_req;
-
-struct gfs3_rchecksum_rsp {
- int op_ret;
- int op_errno;
- u_int weak_checksum;
- struct {
- u_int strong_checksum_len;
- char *strong_checksum_val;
- } strong_checksum;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_rchecksum_rsp gfs3_rchecksum_rsp;
-
-struct gf_getspec_req {
- u_int flags;
- char *key;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gf_getspec_req gf_getspec_req;
-
-struct gf_getspec_rsp {
- int op_ret;
- int op_errno;
- char *spec;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gf_getspec_rsp gf_getspec_rsp;
-
-struct gf_log_req {
- struct {
- u_int msg_len;
- char *msg_val;
- } msg;
-};
-typedef struct gf_log_req gf_log_req;
-
-struct gf_notify_req {
- u_int flags;
- char *buf;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gf_notify_req gf_notify_req;
-
-struct gf_notify_rsp {
- int op_ret;
- int op_errno;
- u_int flags;
- char *buf;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gf_notify_rsp gf_notify_rsp;
-
-struct gfs3_releasedir_req {
- char gfid[16];
- quad_t fd;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_releasedir_req gfs3_releasedir_req;
-
-struct gfs3_release_req {
- char gfid[16];
- quad_t fd;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_release_req gfs3_release_req;
-
-struct gf_common_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gf_common_rsp gf_common_rsp;
-
-struct gfs3_dirlist {
- u_quad_t d_ino;
- u_quad_t d_off;
- u_int d_len;
- u_int d_type;
- char *name;
- struct gfs3_dirlist *nextentry;
-};
-typedef struct gfs3_dirlist gfs3_dirlist;
-
-struct gfs3_readdir_rsp {
- int op_ret;
- int op_errno;
- struct gfs3_dirlist *reply;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_readdir_rsp gfs3_readdir_rsp;
-
-struct gfs3_dirplist {
- u_quad_t d_ino;
- u_quad_t d_off;
- u_int d_len;
- u_int d_type;
- char *name;
- struct gf_iatt stat;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
- struct gfs3_dirplist *nextentry;
-};
-typedef struct gfs3_dirplist gfs3_dirplist;
-
-struct gfs3_readdirp_rsp {
- int op_ret;
- int op_errno;
- struct gfs3_dirplist *reply;
- struct {
- u_int xdata_len;
- char *xdata_val;
- } xdata;
-};
-typedef struct gfs3_readdirp_rsp gfs3_readdirp_rsp;
-
-struct gf_set_lk_ver_rsp {
- int op_ret;
- int op_errno;
- int lk_ver;
-};
-typedef struct gf_set_lk_ver_rsp gf_set_lk_ver_rsp;
-
-struct gf_set_lk_ver_req {
- char *uid;
- int lk_ver;
-};
-typedef struct gf_set_lk_ver_req gf_set_lk_ver_req;
-
-struct gf_event_notify_req {
- int op;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf_event_notify_req gf_event_notify_req;
-
-struct gf_event_notify_rsp {
- int op_ret;
- int op_errno;
- struct {
- u_int dict_len;
- char *dict_val;
- } dict;
-};
-typedef struct gf_event_notify_rsp gf_event_notify_rsp;
-
-/* the xdr functions */
-
-#if defined(__STDC__) || defined(__cplusplus)
-extern bool_t xdr_gf_statfs (XDR *, gf_statfs*);
-extern bool_t xdr_gf_proto_flock (XDR *, gf_proto_flock*);
-extern bool_t xdr_gf_iatt (XDR *, gf_iatt*);
-extern bool_t xdr_gfs3_stat_req (XDR *, gfs3_stat_req*);
-extern bool_t xdr_gfs3_stat_rsp (XDR *, gfs3_stat_rsp*);
-extern bool_t xdr_gfs3_readlink_req (XDR *, gfs3_readlink_req*);
-extern bool_t xdr_gfs3_readlink_rsp (XDR *, gfs3_readlink_rsp*);
-extern bool_t xdr_gfs3_mknod_req (XDR *, gfs3_mknod_req*);
-extern bool_t xdr_gfs3_mknod_rsp (XDR *, gfs3_mknod_rsp*);
-extern bool_t xdr_gfs3_mkdir_req (XDR *, gfs3_mkdir_req*);
-extern bool_t xdr_gfs3_mkdir_rsp (XDR *, gfs3_mkdir_rsp*);
-extern bool_t xdr_gfs3_unlink_req (XDR *, gfs3_unlink_req*);
-extern bool_t xdr_gfs3_unlink_rsp (XDR *, gfs3_unlink_rsp*);
-extern bool_t xdr_gfs3_rmdir_req (XDR *, gfs3_rmdir_req*);
-extern bool_t xdr_gfs3_rmdir_rsp (XDR *, gfs3_rmdir_rsp*);
-extern bool_t xdr_gfs3_symlink_req (XDR *, gfs3_symlink_req*);
-extern bool_t xdr_gfs3_symlink_rsp (XDR *, gfs3_symlink_rsp*);
-extern bool_t xdr_gfs3_rename_req (XDR *, gfs3_rename_req*);
-extern bool_t xdr_gfs3_rename_rsp (XDR *, gfs3_rename_rsp*);
-extern bool_t xdr_gfs3_link_req (XDR *, gfs3_link_req*);
-extern bool_t xdr_gfs3_link_rsp (XDR *, gfs3_link_rsp*);
-extern bool_t xdr_gfs3_truncate_req (XDR *, gfs3_truncate_req*);
-extern bool_t xdr_gfs3_truncate_rsp (XDR *, gfs3_truncate_rsp*);
-extern bool_t xdr_gfs3_open_req (XDR *, gfs3_open_req*);
-extern bool_t xdr_gfs3_open_rsp (XDR *, gfs3_open_rsp*);
-extern bool_t xdr_gfs3_read_req (XDR *, gfs3_read_req*);
-extern bool_t xdr_gfs3_read_rsp (XDR *, gfs3_read_rsp*);
-extern bool_t xdr_gfs3_lookup_req (XDR *, gfs3_lookup_req*);
-extern bool_t xdr_gfs3_lookup_rsp (XDR *, gfs3_lookup_rsp*);
-extern bool_t xdr_gfs3_write_req (XDR *, gfs3_write_req*);
-extern bool_t xdr_gfs3_write_rsp (XDR *, gfs3_write_rsp*);
-extern bool_t xdr_gfs3_statfs_req (XDR *, gfs3_statfs_req*);
-extern bool_t xdr_gfs3_statfs_rsp (XDR *, gfs3_statfs_rsp*);
-extern bool_t xdr_gfs3_lk_req (XDR *, gfs3_lk_req*);
-extern bool_t xdr_gfs3_lk_rsp (XDR *, gfs3_lk_rsp*);
-extern bool_t xdr_gfs3_inodelk_req (XDR *, gfs3_inodelk_req*);
-extern bool_t xdr_gfs3_finodelk_req (XDR *, gfs3_finodelk_req*);
-extern bool_t xdr_gfs3_flush_req (XDR *, gfs3_flush_req*);
-extern bool_t xdr_gfs3_fsync_req (XDR *, gfs3_fsync_req*);
-extern bool_t xdr_gfs3_fsync_rsp (XDR *, gfs3_fsync_rsp*);
-extern bool_t xdr_gfs3_setxattr_req (XDR *, gfs3_setxattr_req*);
-extern bool_t xdr_gfs3_fsetxattr_req (XDR *, gfs3_fsetxattr_req*);
-extern bool_t xdr_gfs3_xattrop_req (XDR *, gfs3_xattrop_req*);
-extern bool_t xdr_gfs3_xattrop_rsp (XDR *, gfs3_xattrop_rsp*);
-extern bool_t xdr_gfs3_fxattrop_req (XDR *, gfs3_fxattrop_req*);
-extern bool_t xdr_gfs3_fxattrop_rsp (XDR *, gfs3_fxattrop_rsp*);
-extern bool_t xdr_gfs3_getxattr_req (XDR *, gfs3_getxattr_req*);
-extern bool_t xdr_gfs3_getxattr_rsp (XDR *, gfs3_getxattr_rsp*);
-extern bool_t xdr_gfs3_fgetxattr_req (XDR *, gfs3_fgetxattr_req*);
-extern bool_t xdr_gfs3_fgetxattr_rsp (XDR *, gfs3_fgetxattr_rsp*);
-extern bool_t xdr_gfs3_removexattr_req (XDR *, gfs3_removexattr_req*);
-extern bool_t xdr_gfs3_fremovexattr_req (XDR *, gfs3_fremovexattr_req*);
-extern bool_t xdr_gfs3_opendir_req (XDR *, gfs3_opendir_req*);
-extern bool_t xdr_gfs3_opendir_rsp (XDR *, gfs3_opendir_rsp*);
-extern bool_t xdr_gfs3_fsyncdir_req (XDR *, gfs3_fsyncdir_req*);
-extern bool_t xdr_gfs3_readdir_req (XDR *, gfs3_readdir_req*);
-extern bool_t xdr_gfs3_readdirp_req (XDR *, gfs3_readdirp_req*);
-extern bool_t xdr_gf_setvolume_req (XDR *, gf_setvolume_req*);
-extern bool_t xdr_gf_setvolume_rsp (XDR *, gf_setvolume_rsp*);
-extern bool_t xdr_gfs3_access_req (XDR *, gfs3_access_req*);
-extern bool_t xdr_gfs3_create_req (XDR *, gfs3_create_req*);
-extern bool_t xdr_gfs3_create_rsp (XDR *, gfs3_create_rsp*);
-extern bool_t xdr_gfs3_ftruncate_req (XDR *, gfs3_ftruncate_req*);
-extern bool_t xdr_gfs3_ftruncate_rsp (XDR *, gfs3_ftruncate_rsp*);
-extern bool_t xdr_gfs3_fstat_req (XDR *, gfs3_fstat_req*);
-extern bool_t xdr_gfs3_fstat_rsp (XDR *, gfs3_fstat_rsp*);
-extern bool_t xdr_gfs3_entrylk_req (XDR *, gfs3_entrylk_req*);
-extern bool_t xdr_gfs3_fentrylk_req (XDR *, gfs3_fentrylk_req*);
-extern bool_t xdr_gfs3_setattr_req (XDR *, gfs3_setattr_req*);
-extern bool_t xdr_gfs3_setattr_rsp (XDR *, gfs3_setattr_rsp*);
-extern bool_t xdr_gfs3_fsetattr_req (XDR *, gfs3_fsetattr_req*);
-extern bool_t xdr_gfs3_fsetattr_rsp (XDR *, gfs3_fsetattr_rsp*);
-extern bool_t xdr_gfs3_rchecksum_req (XDR *, gfs3_rchecksum_req*);
-extern bool_t xdr_gfs3_rchecksum_rsp (XDR *, gfs3_rchecksum_rsp*);
-extern bool_t xdr_gf_getspec_req (XDR *, gf_getspec_req*);
-extern bool_t xdr_gf_getspec_rsp (XDR *, gf_getspec_rsp*);
-extern bool_t xdr_gf_log_req (XDR *, gf_log_req*);
-extern bool_t xdr_gf_notify_req (XDR *, gf_notify_req*);
-extern bool_t xdr_gf_notify_rsp (XDR *, gf_notify_rsp*);
-extern bool_t xdr_gfs3_releasedir_req (XDR *, gfs3_releasedir_req*);
-extern bool_t xdr_gfs3_release_req (XDR *, gfs3_release_req*);
-extern bool_t xdr_gf_common_rsp (XDR *, gf_common_rsp*);
-extern bool_t xdr_gfs3_dirlist (XDR *, gfs3_dirlist*);
-extern bool_t xdr_gfs3_readdir_rsp (XDR *, gfs3_readdir_rsp*);
-extern bool_t xdr_gfs3_dirplist (XDR *, gfs3_dirplist*);
-extern bool_t xdr_gfs3_readdirp_rsp (XDR *, gfs3_readdirp_rsp*);
-extern bool_t xdr_gf_set_lk_ver_rsp (XDR *, gf_set_lk_ver_rsp*);
-extern bool_t xdr_gf_set_lk_ver_req (XDR *, gf_set_lk_ver_req*);
-extern bool_t xdr_gf_event_notify_req (XDR *, gf_event_notify_req*);
-extern bool_t xdr_gf_event_notify_rsp (XDR *, gf_event_notify_rsp*);
-
-#else /* K&R C */
-extern bool_t xdr_gf_statfs ();
-extern bool_t xdr_gf_proto_flock ();
-extern bool_t xdr_gf_iatt ();
-extern bool_t xdr_gfs3_stat_req ();
-extern bool_t xdr_gfs3_stat_rsp ();
-extern bool_t xdr_gfs3_readlink_req ();
-extern bool_t xdr_gfs3_readlink_rsp ();
-extern bool_t xdr_gfs3_mknod_req ();
-extern bool_t xdr_gfs3_mknod_rsp ();
-extern bool_t xdr_gfs3_mkdir_req ();
-extern bool_t xdr_gfs3_mkdir_rsp ();
-extern bool_t xdr_gfs3_unlink_req ();
-extern bool_t xdr_gfs3_unlink_rsp ();
-extern bool_t xdr_gfs3_rmdir_req ();
-extern bool_t xdr_gfs3_rmdir_rsp ();
-extern bool_t xdr_gfs3_symlink_req ();
-extern bool_t xdr_gfs3_symlink_rsp ();
-extern bool_t xdr_gfs3_rename_req ();
-extern bool_t xdr_gfs3_rename_rsp ();
-extern bool_t xdr_gfs3_link_req ();
-extern bool_t xdr_gfs3_link_rsp ();
-extern bool_t xdr_gfs3_truncate_req ();
-extern bool_t xdr_gfs3_truncate_rsp ();
-extern bool_t xdr_gfs3_open_req ();
-extern bool_t xdr_gfs3_open_rsp ();
-extern bool_t xdr_gfs3_read_req ();
-extern bool_t xdr_gfs3_read_rsp ();
-extern bool_t xdr_gfs3_lookup_req ();
-extern bool_t xdr_gfs3_lookup_rsp ();
-extern bool_t xdr_gfs3_write_req ();
-extern bool_t xdr_gfs3_write_rsp ();
-extern bool_t xdr_gfs3_statfs_req ();
-extern bool_t xdr_gfs3_statfs_rsp ();
-extern bool_t xdr_gfs3_lk_req ();
-extern bool_t xdr_gfs3_lk_rsp ();
-extern bool_t xdr_gfs3_inodelk_req ();
-extern bool_t xdr_gfs3_finodelk_req ();
-extern bool_t xdr_gfs3_flush_req ();
-extern bool_t xdr_gfs3_fsync_req ();
-extern bool_t xdr_gfs3_fsync_rsp ();
-extern bool_t xdr_gfs3_setxattr_req ();
-extern bool_t xdr_gfs3_fsetxattr_req ();
-extern bool_t xdr_gfs3_xattrop_req ();
-extern bool_t xdr_gfs3_xattrop_rsp ();
-extern bool_t xdr_gfs3_fxattrop_req ();
-extern bool_t xdr_gfs3_fxattrop_rsp ();
-extern bool_t xdr_gfs3_getxattr_req ();
-extern bool_t xdr_gfs3_getxattr_rsp ();
-extern bool_t xdr_gfs3_fgetxattr_req ();
-extern bool_t xdr_gfs3_fgetxattr_rsp ();
-extern bool_t xdr_gfs3_removexattr_req ();
-extern bool_t xdr_gfs3_fremovexattr_req ();
-extern bool_t xdr_gfs3_opendir_req ();
-extern bool_t xdr_gfs3_opendir_rsp ();
-extern bool_t xdr_gfs3_fsyncdir_req ();
-extern bool_t xdr_gfs3_readdir_req ();
-extern bool_t xdr_gfs3_readdirp_req ();
-extern bool_t xdr_gf_setvolume_req ();
-extern bool_t xdr_gf_setvolume_rsp ();
-extern bool_t xdr_gfs3_access_req ();
-extern bool_t xdr_gfs3_create_req ();
-extern bool_t xdr_gfs3_create_rsp ();
-extern bool_t xdr_gfs3_ftruncate_req ();
-extern bool_t xdr_gfs3_ftruncate_rsp ();
-extern bool_t xdr_gfs3_fstat_req ();
-extern bool_t xdr_gfs3_fstat_rsp ();
-extern bool_t xdr_gfs3_entrylk_req ();
-extern bool_t xdr_gfs3_fentrylk_req ();
-extern bool_t xdr_gfs3_setattr_req ();
-extern bool_t xdr_gfs3_setattr_rsp ();
-extern bool_t xdr_gfs3_fsetattr_req ();
-extern bool_t xdr_gfs3_fsetattr_rsp ();
-extern bool_t xdr_gfs3_rchecksum_req ();
-extern bool_t xdr_gfs3_rchecksum_rsp ();
-extern bool_t xdr_gf_getspec_req ();
-extern bool_t xdr_gf_getspec_rsp ();
-extern bool_t xdr_gf_log_req ();
-extern bool_t xdr_gf_notify_req ();
-extern bool_t xdr_gf_notify_rsp ();
-extern bool_t xdr_gfs3_releasedir_req ();
-extern bool_t xdr_gfs3_release_req ();
-extern bool_t xdr_gf_common_rsp ();
-extern bool_t xdr_gfs3_dirlist ();
-extern bool_t xdr_gfs3_readdir_rsp ();
-extern bool_t xdr_gfs3_dirplist ();
-extern bool_t xdr_gfs3_readdirp_rsp ();
-extern bool_t xdr_gf_set_lk_ver_rsp ();
-extern bool_t xdr_gf_set_lk_ver_req ();
-extern bool_t xdr_gf_event_notify_req ();
-extern bool_t xdr_gf_event_notify_rsp ();
-
-#endif /* K&R C */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_GLUSTERFS3_XDR_H_RPCGEN */
diff --git a/rpc/xdr/src/glusterfs3-xdr.x b/rpc/xdr/src/glusterfs3-xdr.x
index ad261423d2e..1c99099a721 100644
--- a/rpc/xdr/src/glusterfs3-xdr.x
+++ b/rpc/xdr/src/glusterfs3-xdr.x
@@ -1,40 +1,63 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/glusterfs-fops.h>
+%#include "rpc-common-xdr.h"
+
#define GF_REQUEST_MAXGROUPS 16
struct gf_statfs {
- unsigned hyper bsize;
- unsigned hyper frsize;
- unsigned hyper blocks;
- unsigned hyper bfree;
- unsigned hyper bavail;
- unsigned hyper files;
- unsigned hyper ffree;
- unsigned hyper favail;
- unsigned hyper fsid;
- unsigned hyper flag;
- unsigned hyper namemax;
+ u_quad_t bsize;
+ u_quad_t frsize;
+ u_quad_t blocks;
+ u_quad_t bfree;
+ u_quad_t bavail;
+ u_quad_t files;
+ u_quad_t ffree;
+ u_quad_t favail;
+ u_quad_t fsid;
+ u_quad_t flag;
+ u_quad_t namemax;
};
+
struct gf_proto_flock {
unsigned int type;
unsigned int whence;
- unsigned hyper start;
- unsigned hyper len;
+ u_quad_t start;
+ u_quad_t len;
unsigned int pid;
opaque lk_owner<>;
} ;
+struct gf_proto_lease {
+ unsigned int cmd;
+ unsigned int lease_type;
+ opaque lease_id[16];
+ unsigned int lease_flags;
+} ;
struct gf_iatt {
opaque ia_gfid[16];
- unsigned hyper ia_ino; /* inode number */
- unsigned hyper ia_dev; /* backing device ID */
+ u_quad_t ia_ino; /* inode number */
+ u_quad_t ia_dev; /* backing device ID */
unsigned int mode; /* mode (type + protection )*/
unsigned int ia_nlink; /* Link count */
unsigned int ia_uid; /* user ID of owner */
unsigned int ia_gid; /* group ID of owner */
- unsigned hyper ia_rdev; /* device ID (if special file) */
- unsigned hyper ia_size; /* file size in bytes */
+ u_quad_t ia_rdev; /* device ID (if special file) */
+ u_quad_t ia_size; /* file size in bytes */
unsigned int ia_blksize; /* blocksize for filesystem I/O */
- unsigned hyper ia_blocks; /* number of 512B blocks allocated */
+ u_quad_t ia_blocks; /* number of 512B blocks allocated */
unsigned int ia_atime; /* last access time */
unsigned int ia_atime_nsec;
unsigned int ia_mtime; /* last modification time */
@@ -43,6 +66,22 @@ struct gf_iatt {
unsigned int ia_ctime_nsec;
};
+
+struct gfs3_cbk_cache_invalidation_req {
+ string gfid<>;
+ unsigned int event_type; /* Upcall event type */
+ unsigned int flags; /* or mask of events incase of inotify */
+ unsigned int expire_time_attr; /* the amount of time which client
+ * can cache this entry */
+ gf_iatt stat; /* Updated/current stat of the file/dir */
+ gf_iatt parent_stat; /* Updated stat of the parent dir
+ * needed in case of create, mkdir,
+ * unlink, rmdir, rename fops */
+ gf_iatt oldparent_stat; /* Updated stat of the oldparent dir
+ needed in case of rename fop */
+ opaque xdata<>; /* Extra data */
+};
+
struct gfs3_stat_req {
opaque gfid[16];
opaque xdata<>; /* Extra data */
@@ -50,7 +89,7 @@ struct gfs3_stat_req {
struct gfs3_stat_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
+ gf_iatt stat;
opaque xdata<>; /* Extra data */
} ;
@@ -63,7 +102,7 @@ struct gfs3_readlink_req {
struct gfs3_readlink_rsp {
int op_ret;
int op_errno;
- struct gf_iatt buf;
+ gf_iatt buf;
string path<>; /* NULL terminated */
opaque xdata<>; /* Extra data */
} ;
@@ -71,7 +110,7 @@ struct gfs3_readlink_req {
struct gfs3_mknod_req {
opaque pargfid[16];
- unsigned hyper dev;
+ u_quad_t dev;
unsigned int mode;
unsigned int umask;
string bname<>; /* NULL terminated */
@@ -80,9 +119,9 @@ struct gfs3_readlink_req {
struct gfs3_mknod_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
+ gf_iatt stat;
+ gf_iatt preparent;
+ gf_iatt postparent;
opaque xdata<>; /* Extra data */
};
@@ -97,9 +136,9 @@ struct gfs3_readlink_req {
struct gfs3_mkdir_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
+ gf_iatt stat;
+ gf_iatt preparent;
+ gf_iatt postparent;
opaque xdata<>; /* Extra data */
} ;
@@ -113,8 +152,8 @@ struct gfs3_readlink_req {
struct gfs3_unlink_rsp {
int op_ret;
int op_errno;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
+ gf_iatt preparent;
+ gf_iatt postparent;
opaque xdata<>; /* Extra data */
};
@@ -128,8 +167,8 @@ struct gfs3_readlink_req {
struct gfs3_rmdir_rsp {
int op_ret;
int op_errno;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
+ gf_iatt preparent;
+ gf_iatt postparent;
opaque xdata<>; /* Extra data */
};
@@ -144,9 +183,9 @@ struct gfs3_readlink_req {
struct gfs3_symlink_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
+ gf_iatt stat;
+ gf_iatt preparent;
+ gf_iatt postparent;
opaque xdata<>; /* Extra data */
};
@@ -161,11 +200,11 @@ struct gfs3_readlink_req {
struct gfs3_rename_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preoldparent;
- struct gf_iatt postoldparent;
- struct gf_iatt prenewparent;
- struct gf_iatt postnewparent;
+ gf_iatt stat;
+ gf_iatt preoldparent;
+ gf_iatt postoldparent;
+ gf_iatt prenewparent;
+ gf_iatt postnewparent;
opaque xdata<>; /* Extra data */
};
@@ -179,22 +218,22 @@ struct gfs3_readlink_req {
struct gfs3_link_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
+ gf_iatt stat;
+ gf_iatt preparent;
+ gf_iatt postparent;
opaque xdata<>; /* Extra data */
};
struct gfs3_truncate_req {
opaque gfid[16];
- unsigned hyper offset;
+ u_quad_t offset;
opaque xdata<>; /* Extra data */
};
struct gfs3_truncate_rsp {
int op_ret;
int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
+ gf_iatt prestat;
+ gf_iatt poststat;
opaque xdata<>; /* Extra data */
};
@@ -207,15 +246,15 @@ struct gfs3_readlink_req {
struct gfs3_open_rsp {
int op_ret;
int op_errno;
- hyper fd;
+ quad_t fd;
opaque xdata<>; /* Extra data */
};
struct gfs3_read_req {
opaque gfid[16];
- hyper fd;
- unsigned hyper offset;
+ quad_t fd;
+ u_quad_t offset;
unsigned int size;
unsigned int flag;
opaque xdata<>; /* Extra data */
@@ -223,7 +262,7 @@ struct gfs3_readlink_req {
struct gfs3_read_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
+ gf_iatt stat;
unsigned int size;
opaque xdata<>; /* Extra data */
} ;
@@ -238,8 +277,8 @@ struct gfs3_lookup_req {
struct gfs3_lookup_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
- struct gf_iatt postparent;
+ gf_iatt stat;
+ gf_iatt postparent;
opaque xdata<>; /* Extra data */
} ;
@@ -247,8 +286,8 @@ struct gfs3_lookup_req {
struct gfs3_write_req {
opaque gfid[16];
- hyper fd;
- unsigned hyper offset;
+ quad_t fd;
+ u_quad_t offset;
unsigned int size;
unsigned int flag;
opaque xdata<>; /* Extra data */
@@ -256,8 +295,8 @@ struct gfs3_lookup_req {
struct gfs3_write_rsp {
int op_ret;
int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
+ gf_iatt prestat;
+ gf_iatt poststat;
opaque xdata<>; /* Extra data */
} ;
@@ -269,40 +308,60 @@ struct gfs3_lookup_req {
struct gfs3_statfs_rsp {
int op_ret;
int op_errno;
- struct gf_statfs statfs;
+ gf_statfs statfs;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_lk_req {
opaque gfid[16];
- hyper fd;
+ int64_t fd;
unsigned int cmd;
unsigned int type;
- struct gf_proto_flock flock;
+ gf_proto_flock flock;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_lk_rsp {
int op_ret;
int op_errno;
- struct gf_proto_flock flock;
+ gf_proto_flock flock;
opaque xdata<>; /* Extra data */
} ;
+struct gfs3_lease_req {
+ opaque gfid[16];
+ gf_proto_lease lease;
+ opaque xdata<>; /* Extra data */
+} ;
+
+struct gfs3_lease_rsp {
+ int op_ret;
+ int op_errno;
+ gf_proto_lease lease;
+ opaque xdata<>; /* Extra data */
+} ;
+
+struct gfs3_recall_lease_req {
+ opaque gfid[16];
+ unsigned int lease_type;
+ opaque tid[16];
+ opaque xdata<>; /* Extra data */
+} ;
+
struct gfs3_inodelk_req {
opaque gfid[16];
unsigned int cmd;
unsigned int type;
- struct gf_proto_flock flock;
+ gf_proto_flock flock;
string volume<>;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_finodelk_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
unsigned int cmd;
unsigned int type;
- struct gf_proto_flock flock;
+ gf_proto_flock flock;
string volume<>;
opaque xdata<>; /* Extra data */
} ;
@@ -310,22 +369,22 @@ struct gfs3_finodelk_req {
struct gfs3_flush_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_fsync_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
unsigned int data;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_fsync_rsp {
int op_ret;
int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
+ gf_iatt prestat;
+ gf_iatt poststat;
opaque xdata<>; /* Extra data */
} ;
@@ -341,7 +400,7 @@ struct gfs3_finodelk_req {
struct gfs3_fsetxattr_req {
opaque gfid[16];
- hyper fd;
+ int64_t fd;
unsigned int flags;
opaque dict<>;
opaque xdata<>; /* Extra data */
@@ -366,7 +425,7 @@ struct gfs3_finodelk_req {
struct gfs3_fxattrop_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
unsigned int flags;
opaque dict<>;
opaque xdata<>; /* Extra data */
@@ -396,7 +455,7 @@ struct gfs3_finodelk_req {
struct gfs3_fgetxattr_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
unsigned int namelen;
string name<>;
opaque xdata<>; /* Extra data */
@@ -417,7 +476,7 @@ struct gfs3_finodelk_req {
struct gfs3_fremovexattr_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
string name<>;
opaque xdata<>; /* Extra data */
} ;
@@ -431,44 +490,35 @@ struct gfs3_finodelk_req {
struct gfs3_opendir_rsp {
int op_ret;
int op_errno;
- hyper fd;
+ quad_t fd;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_fsyncdir_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
int data;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_readdir_req {
opaque gfid[16];
- hyper fd;
- unsigned hyper offset;
+ quad_t fd;
+ u_quad_t offset;
unsigned int size;
opaque xdata<>; /* Extra data */
};
struct gfs3_readdirp_req {
opaque gfid[16];
- hyper fd;
- unsigned hyper offset;
+ quad_t fd;
+ u_quad_t offset;
unsigned int size;
opaque dict<>;
} ;
- struct gf_setvolume_req {
- opaque dict<>;
-} ;
- struct gf_setvolume_rsp {
- int op_ret;
- int op_errno;
- opaque dict<>;
-} ;
-
struct gfs3_access_req {
opaque gfid[16];
unsigned int mask;
@@ -487,10 +537,10 @@ struct gfs3_create_req {
struct gfs3_create_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
- unsigned hyper fd;
- struct gf_iatt preparent;
- struct gf_iatt postparent;
+ gf_iatt stat;
+ u_quad_t fd;
+ gf_iatt preparent;
+ gf_iatt postparent;
opaque xdata<>; /* Extra data */
} ;
@@ -498,28 +548,28 @@ struct gfs3_create_rsp {
struct gfs3_ftruncate_req {
opaque gfid[16];
- hyper fd;
- unsigned hyper offset;
+ quad_t fd;
+ u_quad_t offset;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_ftruncate_rsp {
int op_ret;
int op_errno;
- struct gf_iatt prestat;
- struct gf_iatt poststat;
+ gf_iatt prestat;
+ gf_iatt poststat;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_fstat_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_fstat_rsp {
int op_ret;
int op_errno;
- struct gf_iatt stat;
+ gf_iatt stat;
opaque xdata<>; /* Extra data */
} ;
@@ -529,7 +579,7 @@ struct gfs3_fstat_req {
opaque gfid[16];
unsigned int cmd;
unsigned int type;
- unsigned hyper namelen;
+ u_quad_t namelen;
string name<>;
string volume<>;
opaque xdata<>; /* Extra data */
@@ -537,10 +587,10 @@ struct gfs3_fstat_req {
struct gfs3_fentrylk_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
unsigned int cmd;
unsigned int type;
- unsigned hyper namelen;
+ u_quad_t namelen;
string name<>;
string volume<>;
opaque xdata<>; /* Extra data */
@@ -549,35 +599,85 @@ struct gfs3_fstat_req {
struct gfs3_setattr_req {
opaque gfid[16];
- struct gf_iatt stbuf;
+ gf_iatt stbuf;
int valid;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_setattr_rsp {
int op_ret;
int op_errno;
- struct gf_iatt statpre;
- struct gf_iatt statpost;
+ gf_iatt statpre;
+ gf_iatt statpost;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_fsetattr_req {
- hyper fd;
- struct gf_iatt stbuf;
+ quad_t fd;
+ gf_iatt stbuf;
int valid;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_fsetattr_rsp {
int op_ret;
int op_errno;
- struct gf_iatt statpre;
- struct gf_iatt statpost;
+ gf_iatt statpre;
+ gf_iatt statpost;
opaque xdata<>; /* Extra data */
} ;
+ struct gfs3_fallocate_req {
+ opaque gfid[16];
+ quad_t fd;
+ unsigned int flags;
+ u_quad_t offset;
+ u_quad_t size;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_fallocate_rsp {
+ int op_ret;
+ int op_errno;
+ gf_iatt statpre;
+ gf_iatt statpost;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_discard_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ u_quad_t size;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_discard_rsp {
+ int op_ret;
+ int op_errno;
+ gf_iatt statpre;
+ gf_iatt statpost;
+ opaque xdata<>; /* Extra data */
+} ;
+
+ struct gfs3_zerofill_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ u_quad_t size;
+ opaque xdata<>;
+} ;
+
+ struct gfs3_zerofill_rsp {
+ int op_ret;
+ int op_errno;
+ gf_iatt statpre;
+ gf_iatt statpost;
+ opaque xdata<>;
+} ;
+
+
struct gfs3_rchecksum_req {
- hyper fd;
- unsigned hyper offset;
+ quad_t fd;
+ u_quad_t offset;
unsigned int len;
opaque xdata<>; /* Extra data */
} ;
@@ -590,6 +690,44 @@ struct gfs3_fstat_req {
} ;
+struct gfs3_ipc_req {
+ int op;
+ opaque xdata<>;
+};
+
+struct gfs3_ipc_rsp {
+ int op_ret;
+ int op_errno;
+ opaque xdata<>;
+};
+
+
+struct gfs3_seek_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ int what;
+ opaque xdata<>;
+};
+
+struct gfs3_seek_rsp {
+ int op_ret;
+ int op_errno;
+ u_quad_t offset;
+ opaque xdata<>;
+};
+
+
+ struct gf_setvolume_req {
+ opaque dict<>;
+} ;
+ struct gf_setvolume_rsp {
+ int op_ret;
+ int op_errno;
+ opaque dict<>;
+} ;
+
+
struct gf_getspec_req {
unsigned int flags;
string key<>;
@@ -602,10 +740,29 @@ struct gfs3_fstat_req {
opaque xdata<>; /* Extra data */
} ;
+ struct gf_get_volume_info_req {
+ opaque dict<>; /* Extra data */
+} ;
+ struct gf_get_volume_info_rsp {
+ int op_ret;
+ int op_errno;
+ string op_errstr<>;
+ opaque dict<>; /* Extra data */
+} ;
+
+ struct gf_mgmt_hndsk_req {
+ opaque hndsk<>;
+} ;
+
+ struct gf_mgmt_hndsk_rsp {
+ int op_ret;
+ int op_errno;
+ opaque hndsk<>;
+} ;
struct gf_log_req {
- opaque msg<>;
-};
+ opaque msg<>;
+} ;
struct gf_notify_req {
unsigned int flags;
@@ -622,54 +779,48 @@ struct gfs3_fstat_req {
struct gfs3_releasedir_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
opaque xdata<>; /* Extra data */
} ;
struct gfs3_release_req {
opaque gfid[16];
- hyper fd;
+ quad_t fd;
opaque xdata<>; /* Extra data */
} ;
-struct gf_common_rsp {
- int op_ret;
- int op_errno;
- opaque xdata<>; /* Extra data */
-} ;
-
struct gfs3_dirlist {
- unsigned hyper d_ino;
- unsigned hyper d_off;
+ u_quad_t d_ino;
+ u_quad_t d_off;
unsigned int d_len;
unsigned int d_type;
string name<>;
- struct gfs3_dirlist *nextentry;
+ gfs3_dirlist *nextentry;
};
struct gfs3_readdir_rsp {
int op_ret;
int op_errno;
- struct gfs3_dirlist *reply;
+ gfs3_dirlist *reply;
opaque xdata<>; /* Extra data */
};
struct gfs3_dirplist {
- unsigned hyper d_ino;
- unsigned hyper d_off;
+ u_quad_t d_ino;
+ u_quad_t d_off;
unsigned int d_len;
unsigned int d_type;
string name<>;
- struct gf_iatt stat;
+ gf_iatt stat;
opaque dict<>;
- struct gfs3_dirplist *nextentry;
+ gfs3_dirplist *nextentry;
};
struct gfs3_readdirp_rsp {
int op_ret;
int op_errno;
- struct gfs3_dirplist *reply;
+ gfs3_dirplist *reply;
opaque xdata<>; /* Extra data */
};
@@ -694,3 +845,46 @@ struct gf_event_notify_rsp {
int op_errno;
opaque dict<>;
};
+
+
+struct gf_getsnap_name_uuid_req {
+ opaque dict<>;
+};
+
+struct gf_getsnap_name_uuid_rsp {
+ int op_ret;
+ int op_errno;
+ string op_errstr<>;
+ opaque dict<>;
+};
+
+struct gfs3_locklist {
+ gf_proto_flock flock;
+ string client_uid<>;
+ unsigned int lk_flags;
+ gfs3_locklist *nextentry;
+};
+
+struct gfs3_getactivelk_rsp {
+ int op_ret;
+ int op_errno;
+ gfs3_locklist *reply;
+ opaque xdata<>;
+};
+
+struct gfs3_getactivelk_req {
+ opaque gfid[16];
+ opaque xdata<>;
+};
+
+struct gfs3_setactivelk_rsp {
+ int op_ret;
+ int op_errno;
+ opaque xdata<>;
+};
+
+struct gfs3_setactivelk_req {
+ opaque gfid[16];
+ gfs3_locklist *request;
+ opaque xdata<>;
+};
diff --git a/rpc/xdr/src/glusterfs3.h b/rpc/xdr/src/glusterfs3.h
index 82a9e200189..86b3a4c0e5d 100644
--- a/rpc/xdr/src/glusterfs3.h
+++ b/rpc/xdr/src/glusterfs3.h
@@ -1,23 +1,13 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _GLUSTERFS3_H
#define _GLUSTERFS3_H
@@ -25,252 +15,943 @@
#include "xdr-generic.h"
#include "glusterfs3-xdr.h"
-#include "iatt.h"
-
-#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
-#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
-#define xdr_encoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
-#define xdr_decoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
-
-
-#define GF_O_ACCMODE 003
-#define GF_O_RDONLY 00
-#define GF_O_WRONLY 01
-#define GF_O_RDWR 02
-#define GF_O_CREAT 0100
-#define GF_O_EXCL 0200
-#define GF_O_NOCTTY 0400
-#define GF_O_TRUNC 01000
-#define GF_O_APPEND 02000
-#define GF_O_NONBLOCK 04000
-#define GF_O_SYNC 010000
-#define GF_O_ASYNC 020000
-
-#define GF_O_DIRECT 040000
-#define GF_O_DIRECTORY 0200000
-#define GF_O_NOFOLLOW 0400000
-#define GF_O_NOATIME 01000000
-#define GF_O_CLOEXEC 02000000
-
-#define GF_O_LARGEFILE 0100000
-
-#define XLATE_BIT(from, to, bit) do { \
- if (from & bit) \
- to = to | GF_##bit; \
- } while (0)
-
-#define UNXLATE_BIT(from, to, bit) do { \
- if (from & GF_##bit) \
- to = to | bit; \
- } while (0)
-
-#define XLATE_ACCESSMODE(from, to) do { \
- switch (from & O_ACCMODE) { \
- case O_RDONLY: to |= GF_O_RDONLY; \
- break; \
- case O_WRONLY: to |= GF_O_WRONLY; \
- break; \
- case O_RDWR: to |= GF_O_RDWR; \
- break; \
- } \
- } while (0)
-
-#define UNXLATE_ACCESSMODE(from, to) do { \
- switch (from & GF_O_ACCMODE) { \
- case GF_O_RDONLY: to |= O_RDONLY; \
- break; \
- case GF_O_WRONLY: to |= O_WRONLY; \
- break; \
- case GF_O_RDWR: to |= O_RDWR; \
- break; \
- } \
- } while (0)
+#include "glusterfs4-xdr.h"
+#include <glusterfs/iatt.h>
+#include "protocol-common.h"
+#include <glusterfs/upcall-utils.h>
+
+#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
+#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
+#define xdr_encoded_length(xdr) \
+ (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+#define xdr_decoded_length(xdr) \
+ (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+
+#define GF_O_ACCMODE 003
+#define GF_O_RDONLY 00
+#define GF_O_WRONLY 01
+#define GF_O_RDWR 02
+#define GF_O_CREAT 0100
+#define GF_O_EXCL 0200
+#define GF_O_NOCTTY 0400
+#define GF_O_TRUNC 01000
+#define GF_O_APPEND 02000
+#define GF_O_NONBLOCK 04000
+#define GF_O_SYNC 010000
+#define GF_O_ASYNC 020000
+
+#define GF_O_DIRECT 040000
+#define GF_O_DIRECTORY 0200000
+#define GF_O_NOFOLLOW 0400000
+#define GF_O_NOATIME 01000000
+#define GF_O_CLOEXEC 02000000
+
+#define GF_O_LARGEFILE 0100000
+
+#define GF_O_FMODE_EXEC 040
+
+#define XLATE_BIT(from, to, bit) \
+ do { \
+ if (from & bit) \
+ to = to | GF_##bit; \
+ } while (0)
+
+#define UNXLATE_BIT(from, to, bit) \
+ do { \
+ if (from & GF_##bit) \
+ to = to | bit; \
+ } while (0)
+
+#define XLATE_ACCESSMODE(from, to) \
+ do { \
+ switch (from & O_ACCMODE) { \
+ case O_RDONLY: \
+ to |= GF_O_RDONLY; \
+ break; \
+ case O_WRONLY: \
+ to |= GF_O_WRONLY; \
+ break; \
+ case O_RDWR: \
+ to |= GF_O_RDWR; \
+ break; \
+ } \
+ } while (0)
+
+#define UNXLATE_ACCESSMODE(from, to) \
+ do { \
+ switch (from & GF_O_ACCMODE) { \
+ case GF_O_RDONLY: \
+ to |= O_RDONLY; \
+ break; \
+ case GF_O_WRONLY: \
+ to |= O_WRONLY; \
+ break; \
+ case GF_O_RDWR: \
+ to |= O_RDWR; \
+ break; \
+ } \
+ } while (0)
static inline uint32_t
-gf_flags_from_flags (uint32_t flags)
+gf_flags_from_flags(uint32_t flags)
{
- uint32_t gf_flags = 0;
-
- XLATE_ACCESSMODE (flags, gf_flags);
-
- XLATE_BIT (flags, gf_flags, O_CREAT);
- XLATE_BIT (flags, gf_flags, O_EXCL);
- XLATE_BIT (flags, gf_flags, O_NOCTTY);
- XLATE_BIT (flags, gf_flags, O_TRUNC);
- XLATE_BIT (flags, gf_flags, O_APPEND);
- XLATE_BIT (flags, gf_flags, O_NONBLOCK);
- XLATE_BIT (flags, gf_flags, O_SYNC);
- XLATE_BIT (flags, gf_flags, O_ASYNC);
-
- XLATE_BIT (flags, gf_flags, O_DIRECT);
- XLATE_BIT (flags, gf_flags, O_DIRECTORY);
- XLATE_BIT (flags, gf_flags, O_NOFOLLOW);
+ uint32_t gf_flags = 0;
+
+ XLATE_ACCESSMODE(flags, gf_flags);
+
+ XLATE_BIT(flags, gf_flags, O_CREAT);
+ XLATE_BIT(flags, gf_flags, O_EXCL);
+ XLATE_BIT(flags, gf_flags, O_NOCTTY);
+ XLATE_BIT(flags, gf_flags, O_TRUNC);
+ XLATE_BIT(flags, gf_flags, O_APPEND);
+ XLATE_BIT(flags, gf_flags, O_NONBLOCK);
+ XLATE_BIT(flags, gf_flags, O_SYNC);
+ XLATE_BIT(flags, gf_flags, O_ASYNC);
+
+ XLATE_BIT(flags, gf_flags, O_DIRECT);
+ XLATE_BIT(flags, gf_flags, O_DIRECTORY);
+ XLATE_BIT(flags, gf_flags, O_NOFOLLOW);
#ifdef O_NOATIME
- XLATE_BIT (flags, gf_flags, O_NOATIME);
+ XLATE_BIT(flags, gf_flags, O_NOATIME);
#endif
#ifdef O_CLOEXEC
- XLATE_BIT (flags, gf_flags, O_CLOEXEC);
+ XLATE_BIT(flags, gf_flags, O_CLOEXEC);
#endif
- XLATE_BIT (flags, gf_flags, O_LARGEFILE);
+ XLATE_BIT(flags, gf_flags, O_LARGEFILE);
+ XLATE_BIT(flags, gf_flags, O_FMODE_EXEC);
- return gf_flags;
+ return gf_flags;
}
static inline uint32_t
-gf_flags_to_flags (uint32_t gf_flags)
+gf_flags_to_flags(uint32_t gf_flags)
{
- uint32_t flags = 0;
-
- UNXLATE_ACCESSMODE (gf_flags, flags);
-
- UNXLATE_BIT (gf_flags, flags, O_CREAT);
- UNXLATE_BIT (gf_flags, flags, O_EXCL);
- UNXLATE_BIT (gf_flags, flags, O_NOCTTY);
- UNXLATE_BIT (gf_flags, flags, O_TRUNC);
- UNXLATE_BIT (gf_flags, flags, O_APPEND);
- UNXLATE_BIT (gf_flags, flags, O_NONBLOCK);
- UNXLATE_BIT (gf_flags, flags, O_SYNC);
- UNXLATE_BIT (gf_flags, flags, O_ASYNC);
-
- UNXLATE_BIT (gf_flags, flags, O_DIRECT);
- UNXLATE_BIT (gf_flags, flags, O_DIRECTORY);
- UNXLATE_BIT (gf_flags, flags, O_NOFOLLOW);
+ uint32_t flags = 0;
+
+ UNXLATE_ACCESSMODE(gf_flags, flags);
+
+ UNXLATE_BIT(gf_flags, flags, O_CREAT);
+ UNXLATE_BIT(gf_flags, flags, O_EXCL);
+ UNXLATE_BIT(gf_flags, flags, O_NOCTTY);
+ UNXLATE_BIT(gf_flags, flags, O_TRUNC);
+ UNXLATE_BIT(gf_flags, flags, O_APPEND);
+ UNXLATE_BIT(gf_flags, flags, O_NONBLOCK);
+ UNXLATE_BIT(gf_flags, flags, O_SYNC);
+ UNXLATE_BIT(gf_flags, flags, O_ASYNC);
+
+ UNXLATE_BIT(gf_flags, flags, O_DIRECT);
+ UNXLATE_BIT(gf_flags, flags, O_DIRECTORY);
+ UNXLATE_BIT(gf_flags, flags, O_NOFOLLOW);
#ifdef O_NOATIME
- UNXLATE_BIT (gf_flags, flags, O_NOATIME);
+ UNXLATE_BIT(gf_flags, flags, O_NOATIME);
#endif
#ifdef O_CLOEXEC
- UNXLATE_BIT (gf_flags, flags, O_CLOEXEC);
+ UNXLATE_BIT(gf_flags, flags, O_CLOEXEC);
#endif
- UNXLATE_BIT (gf_flags, flags, O_LARGEFILE);
+ UNXLATE_BIT(gf_flags, flags, O_LARGEFILE);
+ UNXLATE_BIT(gf_flags, flags, O_FMODE_EXEC);
- return flags;
+ return flags;
}
+static inline void
+gf_statfs_to_statfs(struct gf_statfs *gf_stat, struct statvfs *stat)
+{
+ if (!stat || !gf_stat)
+ return;
+
+ stat->f_bsize = (gf_stat->bsize);
+ stat->f_frsize = (gf_stat->frsize);
+ stat->f_blocks = (gf_stat->blocks);
+ stat->f_bfree = (gf_stat->bfree);
+ stat->f_bavail = (gf_stat->bavail);
+ stat->f_files = (gf_stat->files);
+ stat->f_ffree = (gf_stat->ffree);
+ stat->f_favail = (gf_stat->favail);
+ stat->f_fsid = (gf_stat->fsid);
+ stat->f_flag = (gf_stat->flag);
+ stat->f_namemax = (gf_stat->namemax);
+}
static inline void
-gf_statfs_to_statfs (struct gf_statfs *gf_stat, struct statvfs *stat)
+gf_statfs_from_statfs(struct gf_statfs *gf_stat, struct statvfs *stat)
{
- if (!stat || !gf_stat)
- return;
-
- stat->f_bsize = (gf_stat->bsize);
- stat->f_frsize = (gf_stat->frsize);
- stat->f_blocks = (gf_stat->blocks);
- stat->f_bfree = (gf_stat->bfree);
- stat->f_bavail = (gf_stat->bavail);
- stat->f_files = (gf_stat->files);
- stat->f_ffree = (gf_stat->ffree);
- stat->f_favail = (gf_stat->favail);
- stat->f_fsid = (gf_stat->fsid);
- stat->f_flag = (gf_stat->flag);
- stat->f_namemax = (gf_stat->namemax);
+ if (!stat || !gf_stat)
+ return;
+
+ gf_stat->bsize = stat->f_bsize;
+ gf_stat->frsize = stat->f_frsize;
+ gf_stat->blocks = stat->f_blocks;
+ gf_stat->bfree = stat->f_bfree;
+ gf_stat->bavail = stat->f_bavail;
+ gf_stat->files = stat->f_files;
+ gf_stat->ffree = stat->f_ffree;
+ gf_stat->favail = stat->f_favail;
+ gf_stat->fsid = stat->f_fsid;
+ gf_stat->flag = stat->f_flag;
+ gf_stat->namemax = stat->f_namemax;
}
+static inline void
+gf_proto_lease_to_lease(struct gf_proto_lease *gf_proto_lease,
+ struct gf_lease *gf_lease)
+{
+ if (!gf_lease || !gf_proto_lease)
+ return;
+
+ gf_lease->cmd = gf_proto_lease->cmd;
+ gf_lease->lease_type = gf_proto_lease->lease_type;
+ memcpy(gf_lease->lease_id, gf_proto_lease->lease_id, LEASE_ID_SIZE);
+}
static inline void
-gf_statfs_from_statfs (struct gf_statfs *gf_stat, struct statvfs *stat)
+gf_proto_lease_from_lease(struct gf_proto_lease *gf_proto_lease,
+ struct gf_lease *gf_lease)
{
- if (!stat || !gf_stat)
- return;
-
- gf_stat->bsize = stat->f_bsize;
- gf_stat->frsize = stat->f_frsize;
- gf_stat->blocks = stat->f_blocks;
- gf_stat->bfree = stat->f_bfree;
- gf_stat->bavail = stat->f_bavail;
- gf_stat->files = stat->f_files;
- gf_stat->ffree = stat->f_ffree;
- gf_stat->favail = stat->f_favail;
- gf_stat->fsid = stat->f_fsid;
- gf_stat->flag = stat->f_flag;
- gf_stat->namemax = stat->f_namemax;
+ if (!gf_lease || !gf_proto_lease)
+ return;
+
+ gf_proto_lease->cmd = gf_lease->cmd;
+ gf_proto_lease->lease_type = gf_lease->lease_type;
+ memcpy(gf_proto_lease->lease_id, gf_lease->lease_id, LEASE_ID_SIZE);
+}
+
+static inline int
+gf_proto_recall_lease_to_upcall(struct gfs3_recall_lease_req *recall_lease,
+ struct gf_upcall *gf_up_data)
+{
+ struct gf_upcall_recall_lease *tmp = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, recall_lease, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, gf_up_data, out);
+
+ tmp = (struct gf_upcall_recall_lease *)gf_up_data->data;
+ tmp->lease_type = recall_lease->lease_type;
+ memcpy(gf_up_data->gfid, recall_lease->gfid, 16);
+ memcpy(tmp->tid, recall_lease->tid, 16);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE(
+ THIS, tmp->dict, (recall_lease->xdata).xdata_val,
+ (recall_lease->xdata).xdata_len, ret, errno, out);
+out:
+ return ret;
+}
+
+static inline int
+gf_proto_recall_lease_from_upcall(xlator_t *this,
+ struct gfs3_recall_lease_req *recall_lease,
+ struct gf_upcall *gf_up_data)
+{
+ struct gf_upcall_recall_lease *tmp = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO(this->name, recall_lease, out);
+ GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+ tmp = (struct gf_upcall_recall_lease *)gf_up_data->data;
+ recall_lease->lease_type = tmp->lease_type;
+ memcpy(recall_lease->gfid, gf_up_data->gfid, 16);
+ memcpy(recall_lease->tid, tmp->tid, 16);
+
+ GF_PROTOCOL_DICT_SERIALIZE(this, tmp->dict,
+ &(recall_lease->xdata).xdata_val,
+ (recall_lease->xdata).xdata_len, ret, out);
+out:
+ return ret;
}
static inline void
-gf_proto_flock_to_flock (struct gf_proto_flock *gf_proto_flock, struct gf_flock *gf_flock)
+gf_proto_flock_to_flock(struct gf_proto_flock *gf_proto_flock,
+ struct gf_flock *gf_flock)
{
- if (!gf_flock || !gf_proto_flock)
- return;
-
- gf_flock->l_type = gf_proto_flock->type;
- gf_flock->l_whence = gf_proto_flock->whence;
- gf_flock->l_start = gf_proto_flock->start;
- gf_flock->l_len = gf_proto_flock->len;
- gf_flock->l_pid = gf_proto_flock->pid;
- gf_flock->l_owner.len = gf_proto_flock->lk_owner.lk_owner_len;
- if (gf_flock->l_owner.len &&
- (gf_flock->l_owner.len < GF_MAX_LOCK_OWNER_LEN))
- memcpy (gf_flock->l_owner.data, gf_proto_flock->lk_owner.lk_owner_val,
- gf_flock->l_owner.len);
+ if (!gf_flock || !gf_proto_flock)
+ return;
+
+ gf_flock->l_type = gf_proto_flock->type;
+ gf_flock->l_whence = gf_proto_flock->whence;
+ gf_flock->l_start = gf_proto_flock->start;
+ gf_flock->l_len = gf_proto_flock->len;
+ gf_flock->l_pid = gf_proto_flock->pid;
+ gf_flock->l_owner.len = gf_proto_flock->lk_owner.lk_owner_len;
+ if (gf_flock->l_owner.len &&
+ (gf_flock->l_owner.len < GF_MAX_LOCK_OWNER_LEN))
+ memcpy(gf_flock->l_owner.data, gf_proto_flock->lk_owner.lk_owner_val,
+ gf_flock->l_owner.len);
}
+static inline void
+gf_proto_flock_from_flock(struct gf_proto_flock *gf_proto_flock,
+ struct gf_flock *gf_flock)
+{
+ if (!gf_flock || !gf_proto_flock)
+ return;
+
+ gf_proto_flock->type = (gf_flock->l_type);
+ gf_proto_flock->whence = (gf_flock->l_whence);
+ gf_proto_flock->start = (gf_flock->l_start);
+ gf_proto_flock->len = (gf_flock->l_len);
+ gf_proto_flock->pid = (gf_flock->l_pid);
+ gf_proto_flock->lk_owner.lk_owner_len = gf_flock->l_owner.len;
+ if (gf_flock->l_owner.len)
+ gf_proto_flock->lk_owner.lk_owner_val = gf_flock->l_owner.data;
+}
static inline void
-gf_proto_flock_from_flock (struct gf_proto_flock *gf_proto_flock, struct gf_flock *gf_flock)
+gf_stat_to_iatt(struct gf_iatt *gf_stat, struct iatt *iatt)
{
- if (!gf_flock || !gf_proto_flock)
- return;
-
- gf_proto_flock->type = (gf_flock->l_type);
- gf_proto_flock->whence = (gf_flock->l_whence);
- gf_proto_flock->start = (gf_flock->l_start);
- gf_proto_flock->len = (gf_flock->l_len);
- gf_proto_flock->pid = (gf_flock->l_pid);
- gf_proto_flock->lk_owner.lk_owner_len = gf_flock->l_owner.len;
- if (gf_flock->l_owner.len)
- gf_proto_flock->lk_owner.lk_owner_val = gf_flock->l_owner.data;
+ if (!iatt || !gf_stat)
+ return;
+
+ memcpy(iatt->ia_gfid, gf_stat->ia_gfid, 16);
+ iatt->ia_ino = gf_stat->ia_ino;
+ iatt->ia_dev = gf_stat->ia_dev;
+ iatt->ia_type = ia_type_from_st_mode(gf_stat->mode);
+ iatt->ia_prot = ia_prot_from_st_mode(gf_stat->mode);
+ iatt->ia_nlink = gf_stat->ia_nlink;
+ iatt->ia_uid = gf_stat->ia_uid;
+ iatt->ia_gid = gf_stat->ia_gid;
+ iatt->ia_rdev = gf_stat->ia_rdev;
+ iatt->ia_size = gf_stat->ia_size;
+ iatt->ia_blksize = gf_stat->ia_blksize;
+ iatt->ia_blocks = gf_stat->ia_blocks;
+ iatt->ia_atime = gf_stat->ia_atime;
+ iatt->ia_atime_nsec = gf_stat->ia_atime_nsec;
+ iatt->ia_mtime = gf_stat->ia_mtime;
+ iatt->ia_mtime_nsec = gf_stat->ia_mtime_nsec;
+ iatt->ia_ctime = gf_stat->ia_ctime;
+ iatt->ia_ctime_nsec = gf_stat->ia_ctime_nsec;
}
static inline void
-gf_stat_to_iatt (struct gf_iatt *gf_stat, struct iatt *iatt)
+gf_stat_from_iatt(struct gf_iatt *gf_stat, struct iatt *iatt)
+{
+ if (!iatt || !gf_stat)
+ return;
+
+ memcpy(gf_stat->ia_gfid, iatt->ia_gfid, 16);
+ gf_stat->ia_ino = iatt->ia_ino;
+ gf_stat->ia_dev = iatt->ia_dev;
+ gf_stat->mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
+ gf_stat->ia_nlink = iatt->ia_nlink;
+ gf_stat->ia_uid = iatt->ia_uid;
+ gf_stat->ia_gid = iatt->ia_gid;
+ gf_stat->ia_rdev = iatt->ia_rdev;
+ gf_stat->ia_size = iatt->ia_size;
+ gf_stat->ia_blksize = iatt->ia_blksize;
+ gf_stat->ia_blocks = iatt->ia_blocks;
+ gf_stat->ia_atime = iatt->ia_atime;
+ gf_stat->ia_atime_nsec = iatt->ia_atime_nsec;
+ gf_stat->ia_mtime = iatt->ia_mtime;
+ gf_stat->ia_mtime_nsec = iatt->ia_mtime_nsec;
+ gf_stat->ia_ctime = iatt->ia_ctime;
+ gf_stat->ia_ctime_nsec = iatt->ia_ctime_nsec;
+}
+
+static inline int
+gf_proto_cache_invalidation_from_upcall(
+ xlator_t *this, gfs3_cbk_cache_invalidation_req *gf_c_req,
+ struct gf_upcall *gf_up_data)
{
- if (!iatt || !gf_stat)
- return;
-
- memcpy (iatt->ia_gfid, gf_stat->ia_gfid, 16);
- iatt->ia_ino = gf_stat->ia_ino ;
- iatt->ia_dev = gf_stat->ia_dev ;
- iatt->ia_type = ia_type_from_st_mode (gf_stat->mode) ;
- iatt->ia_prot = ia_prot_from_st_mode (gf_stat->mode) ;
- iatt->ia_nlink = gf_stat->ia_nlink ;
- iatt->ia_uid = gf_stat->ia_uid ;
- iatt->ia_gid = gf_stat->ia_gid ;
- iatt->ia_rdev = gf_stat->ia_rdev ;
- iatt->ia_size = gf_stat->ia_size ;
- iatt->ia_blksize = gf_stat->ia_blksize ;
- iatt->ia_blocks = gf_stat->ia_blocks ;
- iatt->ia_atime = gf_stat->ia_atime ;
- iatt->ia_atime_nsec = gf_stat->ia_atime_nsec ;
- iatt->ia_mtime = gf_stat->ia_mtime ;
- iatt->ia_mtime_nsec = gf_stat->ia_mtime_nsec ;
- iatt->ia_ctime = gf_stat->ia_ctime ;
- iatt->ia_ctime_nsec = gf_stat->ia_ctime_nsec ;
+ struct gf_upcall_cache_invalidation *gf_c_data = NULL;
+ int is_cache_inval = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(this->name, gf_c_req, out);
+ GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+ is_cache_inval = ((gf_up_data->event_type == GF_UPCALL_CACHE_INVALIDATION)
+ ? 1
+ : 0);
+ GF_VALIDATE_OR_GOTO(this->name, is_cache_inval, out);
+
+ gf_c_data = (struct gf_upcall_cache_invalidation *)gf_up_data->data;
+ GF_VALIDATE_OR_GOTO(this->name, gf_c_data, out);
+
+ gf_c_req->gfid = uuid_utoa(gf_up_data->gfid);
+ gf_c_req->event_type = gf_up_data->event_type;
+ gf_c_req->flags = gf_c_data->flags;
+ gf_c_req->expire_time_attr = gf_c_data->expire_time_attr;
+ gf_stat_from_iatt(&gf_c_req->stat, &gf_c_data->stat);
+ gf_stat_from_iatt(&gf_c_req->parent_stat, &gf_c_data->p_stat);
+ gf_stat_from_iatt(&gf_c_req->oldparent_stat, &gf_c_data->oldp_stat);
+
+ ret = 0;
+ GF_PROTOCOL_DICT_SERIALIZE(this, gf_c_data->dict,
+ &(gf_c_req->xdata).xdata_val,
+ (gf_c_req->xdata).xdata_len, ret, out);
+out:
+ return ret;
}
+static inline int
+gf_proto_cache_invalidation_to_upcall(xlator_t *this,
+ gfs3_cbk_cache_invalidation_req *gf_c_req,
+ struct gf_upcall *gf_up_data)
+{
+ struct gf_upcall_cache_invalidation *gf_c_data = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(this->name, gf_c_req, out);
+ GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+ gf_c_data = (struct gf_upcall_cache_invalidation *)gf_up_data->data;
+ GF_VALIDATE_OR_GOTO(this->name, gf_c_data, out);
+
+ ret = gf_uuid_parse(gf_c_req->gfid, gf_up_data->gfid);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING, "gf_uuid_parse(%s) failed",
+ gf_c_req->gfid);
+ gf_up_data->event_type = GF_UPCALL_EVENT_NULL;
+ goto out;
+ }
+
+ gf_up_data->event_type = gf_c_req->event_type;
+
+ gf_c_data->flags = gf_c_req->flags;
+ gf_c_data->expire_time_attr = gf_c_req->expire_time_attr;
+ gf_stat_to_iatt(&gf_c_req->stat, &gf_c_data->stat);
+ gf_stat_to_iatt(&gf_c_req->parent_stat, &gf_c_data->p_stat);
+ gf_stat_to_iatt(&gf_c_req->oldparent_stat, &gf_c_data->oldp_stat);
+
+ ret = 0;
+ GF_PROTOCOL_DICT_UNSERIALIZE(this, gf_c_data->dict,
+ (gf_c_req->xdata).xdata_val,
+ (gf_c_req->xdata).xdata_len, ret, ret, out);
+
+ /* If no dict was sent, create an empty dict, so that each xlator
+ * need not check if empty then create new dict. Will be unref'd by the
+ * caller */
+ if (!gf_c_data->dict)
+ gf_c_data->dict = dict_new();
+out:
+ return ret;
+}
+
+static inline int
+gf_proto_inodelk_contention_to_upcall(struct gfs4_inodelk_contention_req *lc,
+ struct gf_upcall *gf_up_data)
+{
+ struct gf_upcall_inodelk_contention *tmp = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ this = THIS;
+
+ GF_VALIDATE_OR_GOTO(this->name, lc, out);
+ GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+ tmp = (struct gf_upcall_inodelk_contention *)gf_up_data->data;
+
+ gf_uuid_copy(gf_up_data->gfid, (unsigned char *)lc->gfid);
+
+ gf_proto_flock_to_flock(&lc->flock, &tmp->flock);
+ tmp->pid = lc->pid;
+ tmp->domain = lc->domain;
+ if ((tmp->domain != NULL) && (*tmp->domain == 0)) {
+ tmp->domain = NULL;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE(this, tmp->xdata, lc->xdata.xdata_val,
+ lc->xdata.xdata_len, ret, op_errno, out);
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ ret = -op_errno;
+ }
+
+ return ret;
+}
+
+static inline int
+gf_proto_inodelk_contention_from_upcall(xlator_t *this,
+ struct gfs4_inodelk_contention_req *lc,
+ struct gf_upcall *gf_up_data)
+{
+ struct gf_upcall_inodelk_contention *tmp = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ GF_VALIDATE_OR_GOTO(this->name, lc, out);
+ GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+ tmp = (struct gf_upcall_inodelk_contention *)gf_up_data->data;
+
+ gf_uuid_copy((unsigned char *)lc->gfid, gf_up_data->gfid);
+
+ gf_proto_flock_from_flock(&lc->flock, &tmp->flock);
+ lc->pid = tmp->pid;
+ lc->domain = (char *)tmp->domain;
+ if (lc->domain == NULL) {
+ lc->domain = "";
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE(this, tmp->xdata, &lc->xdata.xdata_val,
+ lc->xdata.xdata_len, op_errno, out);
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ ret = -op_errno;
+ }
+
+ return ret;
+}
+
+static inline int
+gf_proto_entrylk_contention_to_upcall(struct gfs4_entrylk_contention_req *lc,
+ struct gf_upcall *gf_up_data)
+{
+ struct gf_upcall_entrylk_contention *tmp = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ this = THIS;
+
+ GF_VALIDATE_OR_GOTO(this->name, lc, out);
+ GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+ tmp = (struct gf_upcall_entrylk_contention *)gf_up_data->data;
+
+ gf_uuid_copy(gf_up_data->gfid, (unsigned char *)lc->gfid);
+
+ tmp->type = lc->type;
+ tmp->name = lc->name;
+ if ((tmp->name != NULL) && (*tmp->name == 0)) {
+ tmp->name = NULL;
+ }
+ tmp->pid = lc->pid;
+ tmp->domain = lc->domain;
+ if ((tmp->domain != NULL) && (*tmp->domain == 0)) {
+ tmp->domain = NULL;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE(this, tmp->xdata, lc->xdata.xdata_val,
+ lc->xdata.xdata_len, ret, op_errno, out);
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ ret = -op_errno;
+ }
+
+ return ret;
+}
+
+static inline int
+gf_proto_entrylk_contention_from_upcall(xlator_t *this,
+ struct gfs4_entrylk_contention_req *lc,
+ struct gf_upcall *gf_up_data)
+{
+ struct gf_upcall_entrylk_contention *tmp = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ GF_VALIDATE_OR_GOTO(this->name, lc, out);
+ GF_VALIDATE_OR_GOTO(this->name, gf_up_data, out);
+
+ tmp = (struct gf_upcall_entrylk_contention *)gf_up_data->data;
+
+ gf_uuid_copy((unsigned char *)lc->gfid, gf_up_data->gfid);
+
+ lc->type = tmp->type;
+ lc->name = (char *)tmp->name;
+ if (lc->name == NULL) {
+ lc->name = "";
+ }
+ lc->pid = tmp->pid;
+ lc->domain = (char *)tmp->domain;
+ if (lc->domain == NULL) {
+ lc->domain = "";
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE(this, tmp->xdata, &lc->xdata.xdata_val,
+ lc->xdata.xdata_len, op_errno, out);
+
+ ret = 0;
+
+out:
+ if (ret < 0) {
+ ret = -op_errno;
+ }
+
+ return ret;
+}
+
+static inline void
+gfx_mdata_iatt_to_mdata_iatt(struct gfx_mdata_iatt *gf_mdata_iatt,
+ struct mdata_iatt *mdata_iatt)
+{
+ if (!mdata_iatt || !gf_mdata_iatt)
+ return;
+ mdata_iatt->ia_atime = gf_mdata_iatt->ia_atime;
+ mdata_iatt->ia_atime_nsec = gf_mdata_iatt->ia_atime_nsec;
+ mdata_iatt->ia_mtime = gf_mdata_iatt->ia_mtime;
+ mdata_iatt->ia_mtime_nsec = gf_mdata_iatt->ia_mtime_nsec;
+ mdata_iatt->ia_ctime = gf_mdata_iatt->ia_ctime;
+ mdata_iatt->ia_ctime_nsec = gf_mdata_iatt->ia_ctime_nsec;
+}
+
+static inline void
+gfx_mdata_iatt_from_mdata_iatt(struct gfx_mdata_iatt *gf_mdata_iatt,
+ struct mdata_iatt *mdata_iatt)
+{
+ if (!mdata_iatt || !gf_mdata_iatt)
+ return;
+ gf_mdata_iatt->ia_atime = mdata_iatt->ia_atime;
+ gf_mdata_iatt->ia_atime_nsec = mdata_iatt->ia_atime_nsec;
+ gf_mdata_iatt->ia_mtime = mdata_iatt->ia_mtime;
+ gf_mdata_iatt->ia_mtime_nsec = mdata_iatt->ia_mtime_nsec;
+ gf_mdata_iatt->ia_ctime = mdata_iatt->ia_ctime;
+ gf_mdata_iatt->ia_ctime_nsec = mdata_iatt->ia_ctime_nsec;
+}
+
+static inline void
+gfx_stat_to_iattx(struct gfx_iattx *gf_stat, struct iatt *iatt)
+{
+ if (!iatt || !gf_stat)
+ return;
+
+ memcpy(iatt->ia_gfid, gf_stat->ia_gfid, 16);
+
+ iatt->ia_flags = gf_stat->ia_flags;
+ iatt->ia_ino = gf_stat->ia_ino;
+ iatt->ia_dev = gf_stat->ia_dev;
+ iatt->ia_rdev = gf_stat->ia_rdev;
+ iatt->ia_size = gf_stat->ia_size;
+ iatt->ia_nlink = gf_stat->ia_nlink;
+ iatt->ia_uid = gf_stat->ia_uid;
+ iatt->ia_gid = gf_stat->ia_gid;
+ iatt->ia_blksize = gf_stat->ia_blksize;
+ iatt->ia_blocks = gf_stat->ia_blocks;
+ iatt->ia_atime = gf_stat->ia_atime;
+ iatt->ia_atime_nsec = gf_stat->ia_atime_nsec;
+ iatt->ia_mtime = gf_stat->ia_mtime;
+ iatt->ia_mtime_nsec = gf_stat->ia_mtime_nsec;
+ iatt->ia_ctime = gf_stat->ia_ctime;
+ iatt->ia_ctime_nsec = gf_stat->ia_ctime_nsec;
+ iatt->ia_btime = gf_stat->ia_btime;
+ iatt->ia_btime_nsec = gf_stat->ia_btime_nsec;
+ iatt->ia_attributes = gf_stat->ia_attributes;
+ iatt->ia_attributes_mask = gf_stat->ia_attributes_mask;
+
+ iatt->ia_type = ia_type_from_st_mode(gf_stat->mode);
+ iatt->ia_prot = ia_prot_from_st_mode(gf_stat->mode);
+}
static inline void
-gf_stat_from_iatt (struct gf_iatt *gf_stat, struct iatt *iatt)
+gfx_stat_from_iattx(struct gfx_iattx *gf_stat, struct iatt *iatt)
+{
+ if (!iatt || !gf_stat)
+ return;
+
+ memcpy(gf_stat->ia_gfid, iatt->ia_gfid, 16);
+ gf_stat->ia_ino = iatt->ia_ino;
+ gf_stat->ia_dev = iatt->ia_dev;
+
+ gf_stat->ia_nlink = iatt->ia_nlink;
+ gf_stat->ia_uid = iatt->ia_uid;
+ gf_stat->ia_gid = iatt->ia_gid;
+ gf_stat->ia_rdev = iatt->ia_rdev;
+ gf_stat->ia_size = iatt->ia_size;
+ gf_stat->ia_blksize = iatt->ia_blksize;
+ gf_stat->ia_blocks = iatt->ia_blocks;
+ gf_stat->ia_atime = iatt->ia_atime;
+ gf_stat->ia_atime_nsec = iatt->ia_atime_nsec;
+ gf_stat->ia_mtime = iatt->ia_mtime;
+ gf_stat->ia_mtime_nsec = iatt->ia_mtime_nsec;
+ gf_stat->ia_ctime = iatt->ia_ctime;
+ gf_stat->ia_ctime_nsec = iatt->ia_ctime_nsec;
+
+ gf_stat->ia_flags = iatt->ia_flags;
+ gf_stat->ia_btime = iatt->ia_btime;
+ gf_stat->ia_btime_nsec = iatt->ia_btime_nsec;
+ gf_stat->ia_attributes = iatt->ia_attributes;
+ gf_stat->ia_attributes_mask = iatt->ia_attributes_mask;
+
+ gf_stat->mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
+}
+
+/* dict_to_xdr () */
+static inline int
+dict_to_xdr(dict_t *this, gfx_dict *dict)
+{
+ int ret = -1;
+ int i = 0;
+ int index = 0;
+ data_pair_t *dpair = NULL;
+ gfx_dict_pair *xpair = NULL;
+ ssize_t size = 0;
+
+ /* This is a failure as we expect destination to be valid */
+ if (!dict)
+ goto out;
+
+ /* This is OK as dictionary can be null, in which case, destination
+ should also know that it is NULL. */
+ if (!this) {
+ /* encode special meaning data here,
+ while decoding, you know it is NULL dict */
+ dict->count = -1;
+ /* everything else is normal */
+ dict->pairs.pairs_len = 0;
+ ret = 0;
+ goto out;
+ }
+
+ /* Do the whole operation in locked region */
+ LOCK(&this->lock);
+
+ dict->pairs.pairs_val = GF_CALLOC(1, (this->count * sizeof(gfx_dict_pair)),
+ gf_common_mt_char);
+ if (!dict->pairs.pairs_val)
+ goto out;
+
+ dpair = this->members_list;
+ for (i = 0; i < this->count; i++) {
+ xpair = &dict->pairs.pairs_val[index];
+
+ xpair->key.key_val = dpair->key;
+ xpair->key.key_len = strlen(dpair->key) + 1;
+ xpair->value.type = dpair->value->data_type;
+ switch (dpair->value->data_type) {
+ /* Add more type here */
+ case GF_DATA_TYPE_INT:
+ index++;
+ xpair->value.gfx_value_u.value_int = strtoll(dpair->value->data,
+ NULL, 0);
+ break;
+ case GF_DATA_TYPE_UINT:
+ index++;
+ xpair->value.gfx_value_u.value_uint = strtoull(
+ dpair->value->data, NULL, 0);
+ break;
+ case GF_DATA_TYPE_DOUBLE:
+ index++;
+ xpair->value.gfx_value_u.value_dbl = strtod(dpair->value->data,
+ NULL);
+ break;
+ case GF_DATA_TYPE_STR:
+ index++;
+ xpair->value.gfx_value_u.val_string
+ .val_string_val = dpair->value->data;
+ xpair->value.gfx_value_u.val_string
+ .val_string_len = dpair->value->len;
+ break;
+ case GF_DATA_TYPE_IATT:
+ index++;
+ gfx_stat_from_iattx(&xpair->value.gfx_value_u.iatt,
+ (struct iatt *)dpair->value->data);
+ break;
+ case GF_DATA_TYPE_MDATA:
+ index++;
+ gfx_mdata_iatt_from_mdata_iatt(
+ &xpair->value.gfx_value_u.mdata_iatt,
+ (struct mdata_iatt *)dpair->value->data);
+ break;
+ case GF_DATA_TYPE_GFUUID:
+ index++;
+ memcpy(&xpair->value.gfx_value_u.uuid, dpair->value->data,
+ sizeof(uuid_t));
+ break;
+
+ case GF_DATA_TYPE_PTR:
+ case GF_DATA_TYPE_STR_OLD:
+ index++;
+ /* Ideally, each type of data stored in dictionary
+ should have type. A pointer type shouldn't be
+ sent on wire */
+
+ /* This is done for backward compatibility as dict is
+ heavily used for transporting data over wire.
+ Ideally, wherever there is an issue, fix and
+ move on */
+ xpair->value.gfx_value_u.other.other_val = dpair->value->data;
+ xpair->value.gfx_value_u.other.other_len = dpair->value->len;
+
+ /* Change this to INFO, after taking the above down */
+ gf_msg("dict", GF_LOG_DEBUG, EINVAL, LG_MSG_DICT_SERIAL_FAILED,
+ "key '%s' would not be sent on wire in the future",
+ dpair->key);
+ break;
+ default:
+ /* Unknown type and ptr type is not sent on wire */
+ gf_msg("dict", GF_LOG_WARNING, EINVAL,
+ LG_MSG_DICT_SERIAL_FAILED,
+ "key '%s' is not sent on wire", dpair->key);
+ break;
+ }
+ dpair = dpair->next;
+ }
+
+ dict->pairs.pairs_len = index;
+ dict->count = index;
+
+ /* This is required mainly in the RPC layer to understand the
+ boundary for proper payload. Hence only send the size of
+ variable XDR size. ie, the formula should be:
+ xdr_size = total size - (xdr_size + count + pairs.pairs_len)) */
+ size = xdr_sizeof((xdrproc_t)xdr_gfx_dict, dict);
+
+ dict->xdr_size = (size > 12) ? (size - 12) : 0;
+
+ ret = 0;
+out:
+ /* this can be null here, so unlock only if its not null */
+ if (this)
+ UNLOCK(&this->lock);
+
+ return ret;
+}
+
+static inline int
+xdr_to_dict(gfx_dict *dict, dict_t **to)
{
- if (!iatt || !gf_stat)
- return;
-
- memcpy (gf_stat->ia_gfid, iatt->ia_gfid, 16);
- gf_stat->ia_ino = iatt->ia_ino ;
- gf_stat->ia_dev = iatt->ia_dev ;
- gf_stat->mode = st_mode_from_ia (iatt->ia_prot, iatt->ia_type);
- gf_stat->ia_nlink = iatt->ia_nlink ;
- gf_stat->ia_uid = iatt->ia_uid ;
- gf_stat->ia_gid = iatt->ia_gid ;
- gf_stat->ia_rdev = iatt->ia_rdev ;
- gf_stat->ia_size = iatt->ia_size ;
- gf_stat->ia_blksize = iatt->ia_blksize ;
- gf_stat->ia_blocks = iatt->ia_blocks ;
- gf_stat->ia_atime = iatt->ia_atime ;
- gf_stat->ia_atime_nsec = iatt->ia_atime_nsec ;
- gf_stat->ia_mtime = iatt->ia_mtime ;
- gf_stat->ia_mtime_nsec = iatt->ia_mtime_nsec ;
- gf_stat->ia_ctime = iatt->ia_ctime ;
- gf_stat->ia_ctime_nsec = iatt->ia_ctime_nsec ;
+ int ret = -1;
+ int index = 0;
+ char *key = NULL;
+ char *value = NULL;
+ gfx_dict_pair *xpair = NULL;
+ dict_t *this = NULL;
+ unsigned char *uuid = NULL;
+ struct iatt *iatt = NULL;
+ struct mdata_iatt *mdata_iatt = NULL;
+
+ if (!to || !dict)
+ goto out;
+
+ if (dict->count < 0) {
+ /* indicates NULL dict was passed for encoding */
+ ret = 0;
+ goto out;
+ }
+
+ this = dict_new();
+ if (!this)
+ goto out;
+
+ for (index = 0; index < dict->pairs.pairs_len; index++) {
+ ret = -1;
+ xpair = &dict->pairs.pairs_val[index];
+
+ key = xpair->key.key_val;
+ switch (xpair->value.type) {
+ /* Add more type here */
+ case GF_DATA_TYPE_INT:
+ ret = dict_set_int64(this, key,
+ xpair->value.gfx_value_u.value_int);
+ break;
+ case GF_DATA_TYPE_UINT:
+ ret = dict_set_uint64(this, key,
+ xpair->value.gfx_value_u.value_uint);
+ break;
+ case GF_DATA_TYPE_DOUBLE:
+ ret = dict_set_double(this, key,
+ xpair->value.gfx_value_u.value_dbl);
+ break;
+ case GF_DATA_TYPE_STR:
+ value = GF_MALLOC(
+ xpair->value.gfx_value_u.val_string.val_string_len + 1,
+ gf_common_mt_char);
+ if (!value) {
+ errno = ENOMEM;
+ goto out;
+ }
+ memcpy(value,
+ xpair->value.gfx_value_u.val_string.val_string_val,
+ xpair->value.gfx_value_u.val_string.val_string_len);
+ value[xpair->value.gfx_value_u.val_string.val_string_len] =
+ '\0';
+ free(xpair->value.gfx_value_u.val_string.val_string_val);
+ ret = dict_set_dynstr(this, key, value);
+ break;
+ case GF_DATA_TYPE_GFUUID:
+ uuid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!uuid) {
+ errno = ENOMEM;
+ goto out;
+ }
+ memcpy(uuid, xpair->value.gfx_value_u.uuid, sizeof(uuid_t));
+ ret = dict_set_gfuuid(this, key, uuid, false);
+ break;
+ case GF_DATA_TYPE_IATT:
+ iatt = GF_CALLOC(1, sizeof(struct iatt), gf_common_mt_char);
+ if (!iatt) {
+ errno = ENOMEM;
+ goto out;
+ }
+ gfx_stat_to_iattx(&xpair->value.gfx_value_u.iatt, iatt);
+ ret = dict_set_iatt(this, key, iatt, false);
+ break;
+ case GF_DATA_TYPE_MDATA:
+ mdata_iatt = GF_CALLOC(1, sizeof(struct mdata_iatt),
+ gf_common_mt_char);
+ if (!mdata_iatt) {
+ errno = ENOMEM;
+ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, LG_MSG_NO_MEMORY,
+ "failed to allocate memory. key: %s", key);
+ ret = -1;
+ goto out;
+ }
+ gfx_mdata_iatt_to_mdata_iatt(
+ &xpair->value.gfx_value_u.mdata_iatt, mdata_iatt);
+ ret = dict_set_mdata(this, key, mdata_iatt, false);
+ if (ret != 0) {
+ GF_FREE(mdata_iatt);
+ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM,
+ LG_MSG_DICT_SET_FAILED,
+ "failed to set the key (%s)"
+ " into dict",
+ key);
+ ret = -1;
+ goto out;
+ }
+ break;
+ case GF_DATA_TYPE_PTR:
+ case GF_DATA_TYPE_STR_OLD:
+ value = GF_MALLOC(xpair->value.gfx_value_u.other.other_len + 1,
+ gf_common_mt_char);
+ if (!value) {
+ errno = ENOMEM;
+ goto out;
+ }
+ memcpy(value, xpair->value.gfx_value_u.other.other_val,
+ xpair->value.gfx_value_u.other.other_len);
+ value[xpair->value.gfx_value_u.other.other_len] = '\0';
+ free(xpair->value.gfx_value_u.other.other_val);
+ ret = dict_set_dynptr(this, key, value,
+ xpair->value.gfx_value_u.other.other_len);
+ break;
+ default:
+ ret = 0;
+ /* Unknown type and ptr type is not sent on wire */
+ break;
+ }
+ if (ret) {
+ gf_msg_debug(THIS->name, ENOMEM,
+ "failed to set the key (%s) into dict", key);
+ }
+ free(xpair->key.key_val);
+ }
+
+ free(dict->pairs.pairs_val);
+ ret = 0;
+
+ /* If everything is fine, assign the dictionary to target */
+ *to = this;
+ this = NULL;
+
+out:
+ if (this)
+ dict_unref(this);
+
+ return ret;
}
#endif /* !_GLUSTERFS3_H */
diff --git a/rpc/xdr/src/glusterfs4-xdr.x b/rpc/xdr/src/glusterfs4-xdr.x
new file mode 100644
index 00000000000..d3b1d0dfaf0
--- /dev/null
+++ b/rpc/xdr/src/glusterfs4-xdr.x
@@ -0,0 +1,797 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/glusterfs-fops.h>
+%#include "glusterfs3-xdr.h"
+
+/* Need to consume iattx and new dict in all the fops */
+struct gfx_iattx {
+ opaque ia_gfid[16];
+
+ unsigned hyper ia_flags;
+ unsigned hyper ia_ino; /* inode number */
+ unsigned hyper ia_dev; /* backing device ID */
+ unsigned hyper ia_rdev; /* device ID (if special file) */
+ unsigned hyper ia_size; /* file size in bytes */
+ unsigned hyper ia_blocks; /* number of 512B blocks allocated */
+ unsigned hyper ia_attributes; /* chattr related:compressed, immutable,
+ * append only, encrypted etc.*/
+ unsigned hyper ia_attributes_mask; /* Mask for the attributes */
+
+ hyper ia_atime; /* last access time */
+ hyper ia_mtime; /* last modification time */
+ hyper ia_ctime; /* last status change time */
+ hyper ia_btime; /* creation time. Fill using statx */
+
+ unsigned int ia_atime_nsec;
+ unsigned int ia_mtime_nsec;
+ unsigned int ia_ctime_nsec;
+ unsigned int ia_btime_nsec;
+ unsigned int ia_nlink; /* Link count */
+ unsigned int ia_uid; /* user ID of owner */
+ unsigned int ia_gid; /* group ID of owner */
+ unsigned int ia_blksize; /* blocksize for filesystem I/O */
+ unsigned int mode; /* type of file and rwx mode */
+};
+
+struct gfx_mdata_iatt {
+ hyper ia_atime; /* last access time */
+ hyper ia_mtime; /* last modification time */
+ hyper ia_ctime; /* last status change time */
+
+ unsigned int ia_atime_nsec;
+ unsigned int ia_mtime_nsec;
+ unsigned int ia_ctime_nsec;
+};
+
+union gfx_value switch (int type) {
+ case GF_DATA_TYPE_INT:
+ hyper value_int;
+ case GF_DATA_TYPE_UINT:
+ unsigned hyper value_uint;
+ case GF_DATA_TYPE_DOUBLE:
+ double value_dbl;
+ case GF_DATA_TYPE_STR:
+ opaque val_string<>;
+ case GF_DATA_TYPE_IATT:
+ gfx_iattx iatt;
+ case GF_DATA_TYPE_GFUUID:
+ opaque uuid[16];
+ case GF_DATA_TYPE_PTR:
+ case GF_DATA_TYPE_STR_OLD:
+ opaque other<>;
+ case GF_DATA_TYPE_MDATA:
+ gfx_mdata_iatt mdata_iatt;
+};
+
+/* AUTH */
+/* This is used in the rpc header part itself, And not program payload.
+ Avoid sending large data load here. Allowed maximum is 400 bytes.
+ Ref: http://tools.ietf.org/html/rfc5531#section-8.2
+ this is also handled in xdr-common.h
+*/
+struct auth_glusterfs_params_v3 {
+ int pid;
+ unsigned int uid;
+ unsigned int gid;
+
+ /* flags */
+ /* Makes sense to use it for each bits */
+ /* 0x1 == IS_INTERNAL? */
+ /* Another 31 bits are reserved */
+ unsigned int flags;
+
+ /* birth time of the frame / call */
+ unsigned int ctime_nsec; /* good to have 32bit for this */
+ unsigned hyper ctime_sec;
+
+ unsigned int groups<>;
+ opaque lk_owner<>;
+};
+
+struct gfx_dict_pair {
+ opaque key<>;
+ gfx_value value;
+};
+
+struct gfx_dict {
+ unsigned int xdr_size;
+ int count;
+ gfx_dict_pair pairs<>;
+};
+
+/* FOPS */
+struct gfx_common_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_common_iatt_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata;
+ gfx_iattx stat;
+};
+
+struct gfx_common_2iatt_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata;
+ gfx_iattx prestat;
+ gfx_iattx poststat;
+};
+
+struct gfx_common_3iatt_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gfx_iattx stat;
+ gfx_iattx preparent;
+ gfx_iattx postparent;
+};
+
+struct gfx_fsetattr_req {
+ opaque gfid[16];
+ hyper fd;
+ gfx_iattx stbuf;
+ int valid;
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_rchecksum_req {
+ opaque gfid[16];
+ hyper fd;
+ unsigned hyper offset;
+ unsigned int len;
+ unsigned int flags;
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_icreate_req {
+ opaque gfid[16];
+ unsigned int mode;
+ gfx_dict xdata;
+};
+
+struct gfx_put_req {
+ opaque pargfid[16];
+ string bname<>;
+ unsigned int mode;
+ unsigned int umask;
+ unsigned int flag;
+ u_quad_t offset;
+ unsigned int size;
+ gfx_dict xattr;
+ gfx_dict xdata;
+};
+
+struct gfx_namelink_req {
+ opaque pargfid[16];
+ string bname<>;
+ gfx_dict xdata;
+};
+
+/* Define every fops */
+/* Changes from Version 3:
+ 1. Dict has its own type instead of being opaque
+ 2. Iattx instead of iatt on wire
+ 3. gfid has 4 extra bytes so it can be used for future
+*/
+struct gfx_stat_req {
+ opaque gfid[16];
+ gfx_dict xdata;
+};
+
+struct gfx_readlink_req {
+ opaque gfid[16];
+ unsigned int size;
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_readlink_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gfx_iattx buf;
+ string path<>; /* NULL terminated */
+};
+
+struct gfx_mknod_req {
+ opaque pargfid[16];
+ u_quad_t dev;
+ unsigned int mode;
+ unsigned int umask;
+ string bname<>; /* NULL terminated */
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_mkdir_req {
+ opaque pargfid[16];
+ unsigned int mode;
+ unsigned int umask;
+ string bname<>; /* NULL terminated */
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_unlink_req {
+ opaque pargfid[16];
+ string bname<>; /* NULL terminated */
+ unsigned int xflags;
+ gfx_dict xdata; /* Extra data */
+};
+
+
+struct gfx_rmdir_req {
+ opaque pargfid[16];
+ int xflags;
+ string bname<>; /* NULL terminated */
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_symlink_req {
+ opaque pargfid[16];
+ string bname<>;
+ unsigned int umask;
+ string linkname<>;
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_rename_req {
+ opaque oldgfid[16];
+ opaque newgfid[16];
+ string oldbname<>; /* NULL terminated */
+ string newbname<>; /* NULL terminated */
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_rename_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gfx_iattx stat;
+ gfx_iattx preoldparent;
+ gfx_iattx postoldparent;
+ gfx_iattx prenewparent;
+ gfx_iattx postnewparent;
+};
+
+
+ struct gfx_link_req {
+ opaque oldgfid[16];
+ opaque newgfid[16];
+ string newbname<>;
+ gfx_dict xdata; /* Extra data */
+};
+
+ struct gfx_truncate_req {
+ opaque gfid[16];
+ u_quad_t offset;
+ gfx_dict xdata; /* Extra data */
+};
+
+ struct gfx_open_req {
+ opaque gfid[16];
+ unsigned int flags;
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_open_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ quad_t fd;
+};
+
+struct gfx_opendir_req {
+ opaque gfid[16];
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+ struct gfx_read_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ unsigned int size;
+ unsigned int flag;
+ gfx_dict xdata; /* Extra data */
+};
+ struct gfx_read_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_iattx stat;
+ unsigned int size;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_lookup_req {
+ opaque gfid[16];
+ opaque pargfid[16];
+ unsigned int flags;
+ string bname<>;
+ gfx_dict xdata; /* Extra data */
+};
+
+
+ struct gfx_write_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ unsigned int size;
+ unsigned int flag;
+ gfx_dict xdata; /* Extra data */
+};
+
+ struct gfx_statfs_req {
+ opaque gfid[16];
+ gfx_dict xdata; /* Extra data */
+} ;
+ struct gfx_statfs_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gf_statfs statfs;
+} ;
+
+ struct gfx_lk_req {
+ opaque gfid[16];
+ int64_t fd;
+ unsigned int cmd;
+ unsigned int type;
+ gf_proto_flock flock;
+ gfx_dict xdata; /* Extra data */
+} ;
+ struct gfx_lk_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gf_proto_flock flock;
+} ;
+
+struct gfx_lease_req {
+ opaque gfid[16];
+ gf_proto_lease lease;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_lease_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gf_proto_lease lease;
+} ;
+
+struct gfx_recall_lease_req {
+ opaque gfid[16];
+ unsigned int lease_type;
+ opaque tid[16];
+ gfx_dict xdata; /* Extra data */
+} ;
+
+ struct gfx_inodelk_req {
+ opaque gfid[16];
+ unsigned int cmd;
+ unsigned int type;
+ gf_proto_flock flock;
+ string volume<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_finodelk_req {
+ opaque gfid[16];
+ quad_t fd;
+ unsigned int cmd;
+ unsigned int type;
+ gf_proto_flock flock;
+ string volume<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+ struct gfx_flush_req {
+ opaque gfid[16];
+ quad_t fd;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+ struct gfx_fsync_req {
+ opaque gfid[16];
+ quad_t fd;
+ unsigned int data;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+ struct gfx_setxattr_req {
+ opaque gfid[16];
+ unsigned int flags;
+ gfx_dict dict;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+
+ struct gfx_fsetxattr_req {
+ opaque gfid[16];
+ int64_t fd;
+ unsigned int flags;
+ gfx_dict dict;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+
+ struct gfx_xattrop_req {
+ opaque gfid[16];
+ unsigned int flags;
+ gfx_dict dict;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_common_dict_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gfx_dict dict;
+ gfx_iattx prestat;
+ gfx_iattx poststat;
+};
+
+
+ struct gfx_fxattrop_req {
+ opaque gfid[16];
+ quad_t fd;
+ unsigned int flags;
+ gfx_dict dict;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+ struct gfx_getxattr_req {
+ opaque gfid[16];
+ unsigned int namelen;
+ string name<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+ struct gfx_fgetxattr_req {
+ opaque gfid[16];
+ quad_t fd;
+ unsigned int namelen;
+ string name<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+ struct gfx_removexattr_req {
+ opaque gfid[16];
+ string name<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+ struct gfx_fremovexattr_req {
+ opaque gfid[16];
+ quad_t fd;
+ string name<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+ struct gfx_fsyncdir_req {
+ opaque gfid[16];
+ quad_t fd;
+ int data;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+ struct gfx_readdir_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ unsigned int size;
+ gfx_dict xdata; /* Extra data */
+};
+
+ struct gfx_readdirp_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ unsigned int size;
+ gfx_dict xdata;
+} ;
+
+
+struct gfx_access_req {
+ opaque gfid[16];
+ unsigned int mask;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+struct gfx_create_req {
+ opaque pargfid[16];
+ unsigned int flags;
+ unsigned int mode;
+ unsigned int umask;
+ string bname<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+struct gfx_create_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gfx_iattx stat;
+ u_quad_t fd;
+ gfx_iattx preparent;
+ gfx_iattx postparent;
+} ;
+
+struct gfx_ftruncate_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+struct gfx_fstat_req {
+ opaque gfid[16];
+ quad_t fd;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+struct gfx_entrylk_req {
+ opaque gfid[16];
+ unsigned int cmd;
+ unsigned int type;
+ u_quad_t namelen;
+ string name<>;
+ string volume<>;
+ gfx_dict xdata; /* Extra data */
+};
+
+struct gfx_fentrylk_req {
+ opaque gfid[16];
+ quad_t fd;
+ unsigned int cmd;
+ unsigned int type;
+ u_quad_t namelen;
+ string name<>;
+ string volume<>;
+ gfx_dict xdata; /* Extra data */
+};
+
+ struct gfx_setattr_req {
+ opaque gfid[16];
+ gfx_iattx stbuf;
+ int valid;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+ struct gfx_fallocate_req {
+ opaque gfid[16];
+ quad_t fd;
+ unsigned int flags;
+ u_quad_t offset;
+ u_quad_t size;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_discard_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ u_quad_t size;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_zerofill_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ u_quad_t size;
+ gfx_dict xdata;
+} ;
+
+struct gfx_rchecksum_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ unsigned int flags;
+ unsigned int weak_checksum;
+ opaque strong_checksum<>;
+} ;
+
+
+struct gfx_ipc_req {
+ int op;
+ gfx_dict xdata;
+};
+
+
+struct gfx_seek_req {
+ opaque gfid[16];
+ quad_t fd;
+ u_quad_t offset;
+ int what;
+ gfx_dict xdata;
+};
+
+struct gfx_seek_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata;
+ u_quad_t offset;
+};
+
+
+ struct gfx_setvolume_req {
+ gfx_dict dict;
+} ;
+
+ struct gfx_copy_file_range_req {
+ opaque gfid1[16];
+ opaque gfid2[16];
+ quad_t fd_in;
+ quad_t fd_out;
+ u_quad_t off_in;
+ u_quad_t off_out;
+ unsigned int size;
+ unsigned int flag;
+ gfx_dict xdata; /* Extra data */
+};
+
+ struct gfx_setvolume_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict dict;
+} ;
+
+
+ struct gfx_getspec_req {
+ unsigned int flags;
+ string key<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+ struct gfx_getspec_rsp {
+ int op_ret;
+ int op_errno;
+ string spec<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+
+ struct gfx_notify_req {
+ unsigned int flags;
+ string buf<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+ struct gfx_notify_rsp {
+ int op_ret;
+ int op_errno;
+ unsigned int flags;
+ string buf<>;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_releasedir_req {
+ opaque gfid[16];
+ quad_t fd;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_release_req {
+ opaque gfid[16];
+ quad_t fd;
+ gfx_dict xdata; /* Extra data */
+} ;
+
+struct gfx_dirlist {
+ u_quad_t d_ino;
+ u_quad_t d_off;
+ unsigned int d_len;
+ unsigned int d_type;
+ string name<>;
+ gfx_dirlist *nextentry;
+};
+
+
+struct gfx_readdir_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gfx_dirlist *reply;
+};
+
+struct gfx_dirplist {
+ u_quad_t d_ino;
+ u_quad_t d_off;
+ unsigned int d_len;
+ unsigned int d_type;
+ string name<>;
+ gfx_iattx stat;
+ gfx_dict dict;
+ gfx_dirplist *nextentry;
+};
+
+struct gfx_readdirp_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata; /* Extra data */
+ gfx_dirplist *reply;
+};
+
+struct gfx_set_lk_ver_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata;
+ int lk_ver;
+};
+
+struct gfx_set_lk_ver_req {
+ string uid<>;
+ int lk_ver;
+};
+
+struct gfx_event_notify_req {
+ int op;
+ gfx_dict dict;
+};
+
+
+struct gfx_getsnap_name_uuid_req {
+ gfx_dict dict;
+};
+
+struct gfx_getsnap_name_uuid_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict dict;
+ string op_errstr<>;
+};
+
+struct gfx_getactivelk_rsp {
+ int op_ret;
+ int op_errno;
+ gfx_dict xdata;
+ gfs3_locklist *reply;
+};
+
+struct gfx_getactivelk_req {
+ opaque gfid[16];
+ gfx_dict xdata;
+};
+
+struct gfx_setactivelk_req {
+ opaque gfid[16];
+ gfs3_locklist *request;
+ gfx_dict xdata;
+};
+
+struct gfs4_inodelk_contention_req {
+ opaque gfid[16];
+ struct gf_proto_flock flock;
+ unsigned int pid;
+ string domain<>;
+ opaque xdata<>;
+};
+
+struct gfs4_entrylk_contention_req {
+ opaque gfid[16];
+ unsigned int type;
+ unsigned int pid;
+ string name<>;
+ string domain<>;
+ opaque xdata<>;
+};
diff --git a/rpc/xdr/src/libgfxdr.sym b/rpc/xdr/src/libgfxdr.sym
new file mode 100644
index 00000000000..8fa0e0ddd8a
--- /dev/null
+++ b/rpc/xdr/src/libgfxdr.sym
@@ -0,0 +1,350 @@
+xdr_auth_glusterfs_parms
+xdr_auth_glusterfs_parms_v2
+xdr_auth_glusterfs_params_v3
+xdr_changelog_event_req
+xdr_changelog_event_rsp
+xdr_changelog_probe_req
+xdr_changelog_probe_rsp
+xdr_dirpath
+xdr_free_exports_list
+xdr_free_mountlist
+xdr_gd1_mgmt_brick_op_req
+xdr_gd1_mgmt_brick_op_rsp
+xdr_gd1_mgmt_cluster_lock_req
+xdr_gd1_mgmt_cluster_lock_rsp
+xdr_gd1_mgmt_cluster_unlock_req
+xdr_gd1_mgmt_cluster_unlock_rsp
+xdr_gd1_mgmt_commit_op_req
+xdr_gd1_mgmt_commit_op_rsp
+xdr_gd1_mgmt_friend_req
+xdr_gd1_mgmt_friend_rsp
+xdr_gd1_mgmt_friend_update
+xdr_gd1_mgmt_friend_update_rsp
+xdr_gd1_mgmt_probe_req
+xdr_gd1_mgmt_probe_rsp
+xdr_gd1_mgmt_stage_op_req
+xdr_gd1_mgmt_stage_op_rsp
+xdr_gd1_mgmt_v3_brick_op_req
+xdr_gd1_mgmt_v3_brick_op_rsp
+xdr_gd1_mgmt_v3_commit_req
+xdr_gd1_mgmt_v3_commit_rsp
+xdr_gd1_mgmt_v3_post_commit_req
+xdr_gd1_mgmt_v3_post_commit_rsp
+xdr_gd1_mgmt_v3_lock_req
+xdr_gd1_mgmt_v3_lock_rsp
+xdr_gd1_mgmt_v3_post_val_req
+xdr_gd1_mgmt_v3_post_val_rsp
+xdr_gd1_mgmt_v3_pre_val_req
+xdr_gd1_mgmt_v3_pre_val_rsp
+xdr_gd1_mgmt_v3_unlock_req
+xdr_gd1_mgmt_v3_unlock_rsp
+xdr_gf1_cli_fsm_log_req
+xdr_gf1_cli_fsm_log_rsp
+xdr_gf1_cli_getwd_req
+xdr_gf1_cli_getwd_rsp
+xdr_gf1_cli_mount_req
+xdr_gf1_cli_mount_rsp
+xdr_gf1_cli_peer_list_req
+xdr_gf1_cli_peer_list_rsp
+xdr_gf1_cli_umount_req
+xdr_gf1_cli_umount_rsp
+xdr_gf_cli_req
+xdr_gf_cli_rsp
+xdr_gf_common_rsp
+xdr_gf_dump_req
+xdr_gf_dump_rsp
+xdr_gf_event_notify_req
+xdr_gf_event_notify_rsp
+xdr_gf_getsnap_name_uuid_req
+xdr_gf_getsnap_name_uuid_rsp
+xdr_gf_getspec_req
+xdr_gf_getspec_rsp
+xdr_gf_get_volume_info_req
+xdr_gf_get_volume_info_rsp
+xdr_gf_mgmt_hndsk_req
+xdr_gf_mgmt_hndsk_rsp
+xdr_gfs3_access_req
+xdr_gfs3_cbk_cache_invalidation_req
+xdr_gfs3_compound_req
+xdr_gfs3_compound_rsp
+xdr_gfs3_create_req
+xdr_gfs3_create_rsp
+xdr_gfs3_discard_req
+xdr_gfs3_discard_rsp
+xdr_gfs3_entrylk_req
+xdr_gfs3_fallocate_req
+xdr_gfs3_fallocate_rsp
+xdr_gfs3_fentrylk_req
+xdr_gfs3_fgetxattr_req
+xdr_gfs3_fgetxattr_rsp
+xdr_gfs3_finodelk_req
+xdr_gfs3_flush_req
+xdr_gfs3_fremovexattr_req
+xdr_gfs3_fsetattr_req
+xdr_gfs3_fsetattr_req_v2
+xdr_gfs3_fsetattr_rsp
+xdr_gfs3_fsetxattr_req
+xdr_gfs3_fstat_req
+xdr_gfs3_fstat_rsp
+xdr_gfs3_fsyncdir_req
+xdr_gfs3_fsync_req
+xdr_gfs3_fsync_rsp
+xdr_gfs3_ftruncate_req
+xdr_gfs3_ftruncate_rsp
+xdr_gfs3_fxattrop_req
+xdr_gfs3_fxattrop_rsp
+xdr_gfs3_getactivelk_req
+xdr_gfs3_getactivelk_rsp
+xdr_gfs3_getxattr_req
+xdr_gfs3_getxattr_rsp
+xdr_gfs3_inodelk_req
+xdr_gfs3_ipc_req
+xdr_gfs3_ipc_rsp
+xdr_gfs3_lease_req
+xdr_gfs3_lease_rsp
+xdr_gfs3_link_req
+xdr_gfs3_link_rsp
+xdr_gfs3_lk_req
+xdr_gfs3_lk_rsp
+xdr_gfs3_lookup_req
+xdr_gfs3_lookup_rsp
+xdr_gfs3_mkdir_req
+xdr_gfs3_mkdir_rsp
+xdr_gfs3_mknod_req
+xdr_gfs3_mknod_rsp
+xdr_gfs3_opendir_req
+xdr_gfs3_opendir_rsp
+xdr_gfs3_open_req
+xdr_gfs3_open_rsp
+xdr_gfs3_rchecksum_req
+xdr_gfs3_rchecksum_req_v2
+xdr_gfs3_rchecksum_rsp
+xdr_gfs3_readdirp_req
+xdr_gfs3_readdirp_rsp
+xdr_gfs3_readdir_req
+xdr_gfs3_readdir_rsp
+xdr_gfs3_readlink_req
+xdr_gfs3_readlink_rsp
+xdr_gfs3_read_req
+xdr_gfs3_read_rsp
+xdr_gfs3_recall_lease_req
+xdr_gfs3_releasedir_req
+xdr_gfs3_release_req
+xdr_gfs3_removexattr_req
+xdr_gfs3_rename_req
+xdr_gfs3_rename_rsp
+xdr_gfs3_rmdir_req
+xdr_gfs3_rmdir_rsp
+xdr_gfs3_seek_req
+xdr_gfs3_seek_rsp
+xdr_gfs3_setactivelk_req
+xdr_gfs3_setactivelk_rsp
+xdr_gfs3_setattr_req
+xdr_gfs3_setattr_rsp
+xdr_gfs3_setxattr_req
+xdr_gfs3_statfs_req
+xdr_gfs3_statfs_rsp
+xdr_gfs3_stat_req
+xdr_gfs3_stat_rsp
+xdr_gfs3_symlink_req
+xdr_gfs3_symlink_rsp
+xdr_gfs3_truncate_req
+xdr_gfs3_truncate_rsp
+xdr_gfs3_unlink_req
+xdr_gfs3_unlink_rsp
+xdr_gfs3_write_req
+xdr_gfs3_write_rsp
+xdr_gfs3_xattrop_req
+xdr_gfs3_xattrop_rsp
+xdr_gfs3_zerofill_req
+xdr_gfs3_zerofill_rsp
+xdr_gfs4_entrylk_contention_req
+xdr_gfs4_entrylk_contention_rsp
+xdr_gfs4_icreate_req
+xdr_gfs4_icreate_rsp
+xdr_gfs4_inodelk_contention_req
+xdr_gfs4_inodelk_contention_rsp
+xdr_gfs4_namelink_req
+xdr_gfs4_namelink_rsp
+xdr_gf_set_lk_ver_req
+xdr_gf_set_lk_ver_rsp
+xdr_gf_setvolume_req
+xdr_gf_setvolume_rsp
+xdr_gf_statedump
+xdr_length_round_up
+xdr_mon
+xdr_mountres3
+xdr_mountstat3
+xdr_nlm_sm_status
+xdr_pmap_brick_by_port_req
+xdr_pmap_brick_by_port_rsp
+xdr_pmap_port_by_brick_req
+xdr_pmap_port_by_brick_rsp
+xdr_pmap_signin_req
+xdr_pmap_signin_rsp
+xdr_pmap_signout_req
+xdr_pmap_signout_rsp
+xdr_serialize_access3res
+xdr_serialize_commit3res
+xdr_serialize_create3res
+xdr_serialize_exports
+xdr_serialize_fsinfo3res
+xdr_serialize_fsstat3res
+xdr_serialize_generic
+xdr_serialize_getaclreply
+xdr_serialize_getattr3res
+xdr_serialize_link3res
+xdr_serialize_lookup3res
+xdr_serialize_mkdir3res
+xdr_serialize_mknod3res
+xdr_serialize_mountlist
+xdr_serialize_mountres3
+xdr_serialize_mountstat3
+xdr_serialize_nlm4_res
+xdr_serialize_nlm4_shareres
+xdr_serialize_nlm4_testargs
+xdr_serialize_nlm4_testres
+xdr_serialize_pathconf3res
+xdr_serialize_read3res
+xdr_serialize_read3res_nocopy
+xdr_serialize_readdir3res
+xdr_serialize_readdirp3res
+xdr_serialize_readlink3res
+xdr_serialize_remove3res
+xdr_serialize_rename3res
+xdr_serialize_rmdir3res
+xdr_serialize_setaclreply
+xdr_serialize_setattr3res
+xdr_serialize_symlink3res
+xdr_serialize_write3res
+xdr_sm_stat
+xdr_sm_stat_res
+xdr_to_access3args
+xdr_to_commit3args
+xdr_to_create3args
+xdr_to_fsinfo3args
+xdr_to_fsstat3args
+xdr_to_generic
+xdr_to_getaclargs
+xdr_to_getattr3args
+xdr_to_link3args
+xdr_to_lookup3args
+xdr_to_mkdir3args
+xdr_to_mknod3args
+xdr_to_mountpath
+xdr_to_nlm4_cancelargs
+xdr_to_nlm4_freeallargs
+xdr_to_nlm4_lockargs
+xdr_to_nlm4_shareargs
+xdr_to_nlm4_testargs
+xdr_to_nlm4_unlockargs
+xdr_to_pathconf3args
+xdr_to_read3args
+xdr_to_readdir3args
+xdr_to_readdirp3args
+xdr_to_readlink3args
+xdr_to_remove3args
+xdr_to_rename3args
+xdr_to_rmdir3args
+xdr_to_setaclargs
+xdr_to_setattr3args
+xdr_to_symlink3args
+xdr_to_write3args
+xdr_vector_round_up
+xdr_gfx_read_rsp
+xdr_gfx_iattx
+xdr_gfx_mdata_iatt
+xdr_gfx_value
+xdr_gfx_dict_pair
+xdr_gfx_dict
+xdr_gfx_common_rsp
+xdr_gfx_common_iatt_rsp
+xdr_gfx_common_2iatt_rsp
+xdr_gfx_common_3iatt_rsp
+xdr_gfx_fsetattr_req
+xdr_gfx_rchecksum_req
+xdr_gfx_icreate_req
+xdr_gfx_namelink_req
+xdr_gfx_stat_req
+xdr_gfx_readlink_req
+xdr_gfx_readlink_rsp
+xdr_gfx_mknod_req
+xdr_gfx_mkdir_req
+xdr_gfx_unlink_req
+xdr_gfx_rmdir_req
+xdr_gfx_symlink_req
+xdr_gfx_rename_req
+xdr_gfx_rename_rsp
+xdr_gfx_link_req
+xdr_gfx_truncate_req
+xdr_gfx_open_req
+xdr_gfx_open_rsp
+xdr_gfx_opendir_req
+xdr_gfx_read_req
+xdr_gfx_read_rsp
+xdr_gfx_lookup_req
+xdr_gfx_write_req
+xdr_gfx_statfs_req
+xdr_gfx_statfs_rsp
+xdr_gfx_lk_req
+xdr_gfx_lk_rsp
+xdr_gfx_lease_req
+xdr_gfx_lease_rsp
+xdr_gfx_recall_lease_req
+xdr_gfx_inodelk_req
+xdr_gfx_finodelk_req
+xdr_gfx_flush_req
+xdr_gfx_fsync_req
+xdr_gfx_setxattr_req
+xdr_gfx_fsetxattr_req
+xdr_gfx_xattrop_req
+xdr_gfx_common_dict_rsp
+xdr_gfx_fxattrop_req
+xdr_gfx_getxattr_req
+xdr_gfx_fgetxattr_req
+xdr_gfx_removexattr_req
+xdr_gfx_fremovexattr_req
+xdr_gfx_fsyncdir_req
+xdr_gfx_readdir_req
+xdr_gfx_readdirp_req
+xdr_gfx_access_req
+xdr_gfx_create_req
+xdr_gfx_create_rsp
+xdr_gfx_ftruncate_req
+xdr_gfx_fstat_req
+xdr_gfx_entrylk_req
+xdr_gfx_fentrylk_req
+xdr_gfx_setattr_req
+xdr_gfx_fallocate_req
+xdr_gfx_discard_req
+xdr_gfx_zerofill_req
+xdr_gfx_rchecksum_rsp
+xdr_gfx_ipc_req
+xdr_gfx_seek_req
+xdr_gfx_seek_rsp
+xdr_gfx_setvolume_req
+xdr_gfx_setvolume_rsp
+xdr_gfx_getspec_req
+xdr_gfx_getspec_rsp
+xdr_gfx_notify_req
+xdr_gfx_notify_rsp
+xdr_gfx_releasedir_req
+xdr_gfx_release_req
+xdr_gfx_dirlist
+xdr_gfx_readdir_rsp
+xdr_gfx_dirplist
+xdr_gfx_readdirp_rsp
+xdr_gfx_set_lk_ver_rsp
+xdr_gfx_set_lk_ver_req
+xdr_gfx_event_notify_req
+xdr_gfx_getsnap_name_uuid_req
+xdr_gfx_getsnap_name_uuid_rsp
+xdr_gfx_getactivelk_rsp
+xdr_gfx_getactivelk_req
+xdr_gfx_setactivelk_req
+xdr_gfx_put_req
+xdr_compound_req_v2
+xdr_gfx_compound_req
+xdr_compound_rsp_v2
+xdr_gfx_compound_rsp
+xdr_gfx_copy_file_range_req
diff --git a/rpc/xdr/src/mount3udp.x b/rpc/xdr/src/mount3udp.x
index 90b5b741abe..e8366df400c 100644
--- a/rpc/xdr/src/mount3udp.x
+++ b/rpc/xdr/src/mount3udp.x
@@ -1,26 +1,24 @@
/*
- Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/compat.h>
+
/* This is used by rpcgen to auto generate the rpc stubs.
* mount3udp_svc.c is heavily modified though
*/
+%#include "xdr-nfs3.h"
+
const MNTUDPPATHLEN = 1024;
typedef string mntudpdirpath<MNTPATHLEN>;
diff --git a/rpc/xdr/src/msg-nfs3.c b/rpc/xdr/src/msg-nfs3.c
index 8078f23f097..d14a731b62a 100644
--- a/rpc/xdr/src/msg-nfs3.c
+++ b/rpc/xdr/src/msg-nfs3.c
@@ -1,27 +1,13 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <sys/uio.h>
#include <rpc/rpc.h>
#include <rpc/xdr.h>
@@ -32,35 +18,34 @@
#include "xdr-generic.h"
#include "xdr-common.h"
-
/* Decode the mount path from the network message in inmsg
* into the memory referenced by outpath.iov_base.
* The size allocated for outpath.iov_base is outpath.iov_len.
* The size of the path extracted from the message is returned.
*/
ssize_t
-xdr_to_mountpath (struct iovec outpath, struct iovec inmsg)
+xdr_to_mountpath(struct iovec outpath, struct iovec inmsg)
{
- XDR xdr;
- ssize_t ret = -1;
- char *mntpath = NULL;
+ XDR xdr;
+ ssize_t ret = -1;
+ char *mntpath = NULL;
- if ((!outpath.iov_base) || (!inmsg.iov_base))
- return -1;
+ if ((!outpath.iov_base) || (!inmsg.iov_base))
+ return -1;
- xdrmem_create (&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
- XDR_DECODE);
+ xdrmem_create(&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
+ XDR_DECODE);
- mntpath = outpath.iov_base;
- if (!xdr_dirpath (&xdr, (dirpath *)&mntpath)) {
- ret = -1;
- goto ret;
- }
+ mntpath = outpath.iov_base;
+ if (!xdr_dirpath(&xdr, (dirpath *)&mntpath)) {
+ ret = -1;
+ goto ret;
+ }
- ret = xdr_decoded_length (xdr);
+ ret = xdr_decoded_length(xdr);
ret:
- return ret;
+ return ret;
}
/* Translate the mountres3 structure in res into XDR format into memory
@@ -68,485 +53,429 @@ ret:
* Returns the number of bytes used in encoding into XDR format.
*/
ssize_t
-xdr_serialize_mountres3 (struct iovec outmsg, mountres3 *res)
+xdr_serialize_mountres3(struct iovec outmsg, mountres3 *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_mountres3);
+ return xdr_serialize_generic(outmsg, (void *)res, (xdrproc_t)xdr_mountres3);
}
-
ssize_t
-xdr_serialize_mountbody (struct iovec outmsg, mountbody *mb)
+xdr_serialize_mountbody(struct iovec outmsg, mountbody *mb)
{
- return xdr_serialize_generic (outmsg, (void *)mb,
- (xdrproc_t)xdr_mountbody);
+ return xdr_serialize_generic(outmsg, (void *)mb, (xdrproc_t)xdr_mountbody);
}
ssize_t
-xdr_serialize_mountlist (struct iovec outmsg, mountlist *ml)
+xdr_serialize_mountlist(struct iovec outmsg, mountlist *ml)
{
- return xdr_serialize_generic (outmsg, (void *)ml,
- (xdrproc_t)xdr_mountlist);
+ return xdr_serialize_generic(outmsg, (void *)ml, (xdrproc_t)xdr_mountlist);
}
-
ssize_t
-xdr_serialize_mountstat3 (struct iovec outmsg, mountstat3 *m)
+xdr_serialize_mountstat3(struct iovec outmsg, mountstat3 *m)
{
- return xdr_serialize_generic (outmsg, (void *)m,
- (xdrproc_t)xdr_mountstat3);
+ return xdr_serialize_generic(outmsg, (void *)m, (xdrproc_t)xdr_mountstat3);
}
-
ssize_t
-xdr_to_getattr3args (struct iovec inmsg, getattr3args *ga)
+xdr_to_getattr3args(struct iovec inmsg, getattr3args *ga)
{
- return xdr_to_generic (inmsg, (void *)ga,
- (xdrproc_t)xdr_getattr3args);
+ return xdr_to_generic(inmsg, (void *)ga, (xdrproc_t)xdr_getattr3args);
}
-
ssize_t
-xdr_serialize_getattr3res (struct iovec outmsg, getattr3res *res)
+xdr_serialize_getattr3res(struct iovec outmsg, getattr3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_getattr3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_getattr3res);
}
-
ssize_t
-xdr_serialize_setattr3res (struct iovec outmsg, setattr3res *res)
+xdr_serialize_setattr3res(struct iovec outmsg, setattr3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_setattr3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_setattr3res);
}
-
ssize_t
-xdr_to_setattr3args (struct iovec inmsg, setattr3args *sa)
+xdr_to_setattr3args(struct iovec inmsg, setattr3args *sa)
{
- return xdr_to_generic (inmsg, (void *)sa,
- (xdrproc_t)xdr_setattr3args);
+ return xdr_to_generic(inmsg, (void *)sa, (xdrproc_t)xdr_setattr3args);
}
-
ssize_t
-xdr_serialize_lookup3res (struct iovec outmsg, lookup3res *res)
+xdr_serialize_lookup3res(struct iovec outmsg, lookup3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_lookup3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_lookup3res);
}
-
ssize_t
-xdr_to_lookup3args (struct iovec inmsg, lookup3args *la)
+xdr_to_lookup3args(struct iovec inmsg, lookup3args *la)
{
- return xdr_to_generic (inmsg, (void *)la,
- (xdrproc_t)xdr_lookup3args);
+ return xdr_to_generic(inmsg, (void *)la, (xdrproc_t)xdr_lookup3args);
}
-
ssize_t
-xdr_to_access3args (struct iovec inmsg, access3args *ac)
+xdr_to_access3args(struct iovec inmsg, access3args *ac)
{
- return xdr_to_generic (inmsg,(void *)ac,
- (xdrproc_t)xdr_access3args);
+ return xdr_to_generic(inmsg, (void *)ac, (xdrproc_t)xdr_access3args);
}
-
ssize_t
-xdr_serialize_access3res (struct iovec outmsg, access3res *res)
+xdr_serialize_access3res(struct iovec outmsg, access3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_access3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_access3res);
}
-
ssize_t
-xdr_to_readlink3args (struct iovec inmsg, readlink3args *ra)
+xdr_to_readlink3args(struct iovec inmsg, readlink3args *ra)
{
- return xdr_to_generic (inmsg, (void *)ra,
- (xdrproc_t)xdr_readlink3args);
+ return xdr_to_generic(inmsg, (void *)ra, (xdrproc_t)xdr_readlink3args);
}
-
ssize_t
-xdr_serialize_readlink3res (struct iovec outmsg, readlink3res *res)
+xdr_serialize_readlink3res(struct iovec outmsg, readlink3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_readlink3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_readlink3res);
}
-
ssize_t
-xdr_to_read3args (struct iovec inmsg, read3args *ra)
+xdr_to_read3args(struct iovec inmsg, read3args *ra)
{
- return xdr_to_generic (inmsg, (void *)ra, (xdrproc_t)xdr_read3args);
+ return xdr_to_generic(inmsg, (void *)ra, (xdrproc_t)xdr_read3args);
}
-
ssize_t
-xdr_serialize_read3res (struct iovec outmsg, read3res *res)
+xdr_serialize_read3res(struct iovec outmsg, read3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_read3res);
+ return xdr_serialize_generic(outmsg, (void *)res, (xdrproc_t)xdr_read3res);
}
ssize_t
-xdr_serialize_read3res_nocopy (struct iovec outmsg, read3res *res)
+xdr_serialize_read3res_nocopy(struct iovec outmsg, read3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_read3res_nocopy);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_read3res_nocopy);
}
-
ssize_t
-xdr_to_write3args (struct iovec inmsg, write3args *wa)
+xdr_to_write3args(struct iovec inmsg, write3args *wa)
{
- return xdr_to_generic (inmsg, (void *)wa,(xdrproc_t)xdr_write3args);
+ return xdr_to_generic(inmsg, (void *)wa, (xdrproc_t)xdr_write3args);
}
-
ssize_t
-xdr_to_write3args_nocopy (struct iovec inmsg, write3args *wa,
- struct iovec *payload)
+xdr_to_write3args_nocopy(struct iovec inmsg, write3args *wa,
+ struct iovec *payload)
{
- return xdr_to_generic_payload (inmsg, (void *)wa,
- (xdrproc_t)xdr_write3args, payload);
+ return xdr_to_generic_payload(inmsg, (void *)wa, (xdrproc_t)xdr_write3args,
+ payload);
}
-
ssize_t
-xdr_serialize_write3res (struct iovec outmsg, write3res *res)
+xdr_serialize_write3res(struct iovec outmsg, write3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_write3res);
+ return xdr_serialize_generic(outmsg, (void *)res, (xdrproc_t)xdr_write3res);
}
-
ssize_t
-xdr_to_create3args (struct iovec inmsg, create3args *ca)
+xdr_to_create3args(struct iovec inmsg, create3args *ca)
{
- return xdr_to_generic (inmsg, (void *)ca,
- (xdrproc_t)xdr_create3args);
+ return xdr_to_generic(inmsg, (void *)ca, (xdrproc_t)xdr_create3args);
}
-
ssize_t
-xdr_serialize_create3res (struct iovec outmsg, create3res *res)
+xdr_serialize_create3res(struct iovec outmsg, create3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_create3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_create3res);
}
-
ssize_t
-xdr_serialize_mkdir3res (struct iovec outmsg, mkdir3res *res)
+xdr_serialize_mkdir3res(struct iovec outmsg, mkdir3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_mkdir3res);
+ return xdr_serialize_generic(outmsg, (void *)res, (xdrproc_t)xdr_mkdir3res);
}
-
ssize_t
-xdr_to_mkdir3args (struct iovec inmsg, mkdir3args *ma)
+xdr_to_mkdir3args(struct iovec inmsg, mkdir3args *ma)
{
- return xdr_to_generic (inmsg, (void *)ma,
- (xdrproc_t)xdr_mkdir3args);
+ return xdr_to_generic(inmsg, (void *)ma, (xdrproc_t)xdr_mkdir3args);
}
-
ssize_t
-xdr_to_symlink3args (struct iovec inmsg, symlink3args *sa)
+xdr_to_symlink3args(struct iovec inmsg, symlink3args *sa)
{
- return xdr_to_generic (inmsg, (void *)sa,
- (xdrproc_t)xdr_symlink3args);
+ return xdr_to_generic(inmsg, (void *)sa, (xdrproc_t)xdr_symlink3args);
}
-
ssize_t
-xdr_serialize_symlink3res (struct iovec outmsg, symlink3res *res)
+xdr_serialize_symlink3res(struct iovec outmsg, symlink3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_symlink3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_symlink3res);
}
-
ssize_t
-xdr_to_mknod3args (struct iovec inmsg, mknod3args *ma)
+xdr_to_mknod3args(struct iovec inmsg, mknod3args *ma)
{
- return xdr_to_generic (inmsg, (void *)ma,
- (xdrproc_t)xdr_mknod3args);
+ return xdr_to_generic(inmsg, (void *)ma, (xdrproc_t)xdr_mknod3args);
}
-
ssize_t
-xdr_serialize_mknod3res (struct iovec outmsg, mknod3res *res)
+xdr_serialize_mknod3res(struct iovec outmsg, mknod3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_mknod3res);
+ return xdr_serialize_generic(outmsg, (void *)res, (xdrproc_t)xdr_mknod3res);
}
-
ssize_t
-xdr_to_remove3args (struct iovec inmsg, remove3args *ra)
+xdr_to_remove3args(struct iovec inmsg, remove3args *ra)
{
- return xdr_to_generic (inmsg, (void *)ra,
- (xdrproc_t)xdr_remove3args);
+ return xdr_to_generic(inmsg, (void *)ra, (xdrproc_t)xdr_remove3args);
}
-
ssize_t
-xdr_serialize_remove3res (struct iovec outmsg, remove3res *res)
+xdr_serialize_remove3res(struct iovec outmsg, remove3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_remove3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_remove3res);
}
-
ssize_t
-xdr_to_rmdir3args (struct iovec inmsg, rmdir3args *ra)
+xdr_to_rmdir3args(struct iovec inmsg, rmdir3args *ra)
{
- return xdr_to_generic (inmsg, (void *)ra,
- (xdrproc_t)xdr_rmdir3args);
+ return xdr_to_generic(inmsg, (void *)ra, (xdrproc_t)xdr_rmdir3args);
}
-
ssize_t
-xdr_serialize_rmdir3res (struct iovec outmsg, rmdir3res *res)
+xdr_serialize_rmdir3res(struct iovec outmsg, rmdir3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_rmdir3res);
+ return xdr_serialize_generic(outmsg, (void *)res, (xdrproc_t)xdr_rmdir3res);
}
-
ssize_t
-xdr_serialize_rename3res (struct iovec outmsg, rename3res *res)
+xdr_serialize_rename3res(struct iovec outmsg, rename3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_rename3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_rename3res);
}
-
ssize_t
-xdr_to_rename3args (struct iovec inmsg, rename3args *ra)
+xdr_to_rename3args(struct iovec inmsg, rename3args *ra)
{
- return xdr_to_generic (inmsg, (void *)ra,
- (xdrproc_t)xdr_rename3args);
+ return xdr_to_generic(inmsg, (void *)ra, (xdrproc_t)xdr_rename3args);
}
-
ssize_t
-xdr_serialize_link3res (struct iovec outmsg, link3res *li)
+xdr_serialize_link3res(struct iovec outmsg, link3res *li)
{
- return xdr_serialize_generic (outmsg, (void *)li,
- (xdrproc_t)xdr_link3res);
+ return xdr_serialize_generic(outmsg, (void *)li, (xdrproc_t)xdr_link3res);
}
-
ssize_t
-xdr_to_link3args (struct iovec inmsg, link3args *la)
+xdr_to_link3args(struct iovec inmsg, link3args *la)
{
- return xdr_to_generic (inmsg, (void *)la, (xdrproc_t)xdr_link3args);
+ return xdr_to_generic(inmsg, (void *)la, (xdrproc_t)xdr_link3args);
}
-
ssize_t
-xdr_to_readdir3args (struct iovec inmsg, readdir3args *rd)
+xdr_to_readdir3args(struct iovec inmsg, readdir3args *rd)
{
- return xdr_to_generic (inmsg, (void *)rd,
- (xdrproc_t)xdr_readdir3args);
+ return xdr_to_generic(inmsg, (void *)rd, (xdrproc_t)xdr_readdir3args);
}
-
ssize_t
-xdr_serialize_readdir3res (struct iovec outmsg, readdir3res *res)
+xdr_serialize_readdir3res(struct iovec outmsg, readdir3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_readdir3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_readdir3res);
}
-
ssize_t
-xdr_to_readdirp3args (struct iovec inmsg, readdirp3args *rp)
+xdr_to_readdirp3args(struct iovec inmsg, readdirp3args *rp)
{
- return xdr_to_generic (inmsg, (void *)rp,
- (xdrproc_t)xdr_readdirp3args);
+ return xdr_to_generic(inmsg, (void *)rp, (xdrproc_t)xdr_readdirp3args);
}
-
ssize_t
-xdr_serialize_readdirp3res (struct iovec outmsg, readdirp3res *res)
+xdr_serialize_readdirp3res(struct iovec outmsg, readdirp3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_readdirp3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_readdirp3res);
}
-
ssize_t
-xdr_to_fsstat3args (struct iovec inmsg, fsstat3args *fa)
+xdr_to_fsstat3args(struct iovec inmsg, fsstat3args *fa)
{
- return xdr_to_generic (inmsg, (void *)fa,
- (xdrproc_t)xdr_fsstat3args);
+ return xdr_to_generic(inmsg, (void *)fa, (xdrproc_t)xdr_fsstat3args);
}
-
ssize_t
-xdr_serialize_fsstat3res (struct iovec outmsg, fsstat3res *res)
+xdr_serialize_fsstat3res(struct iovec outmsg, fsstat3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_fsstat3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_fsstat3res);
}
ssize_t
-xdr_to_fsinfo3args (struct iovec inmsg, fsinfo3args *fi)
+xdr_to_fsinfo3args(struct iovec inmsg, fsinfo3args *fi)
{
- return xdr_to_generic (inmsg, (void *)fi,
- (xdrproc_t)xdr_fsinfo3args);
+ return xdr_to_generic(inmsg, (void *)fi, (xdrproc_t)xdr_fsinfo3args);
}
-
ssize_t
-xdr_serialize_fsinfo3res (struct iovec outmsg, fsinfo3res *res)
+xdr_serialize_fsinfo3res(struct iovec outmsg, fsinfo3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_fsinfo3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_fsinfo3res);
}
-
ssize_t
-xdr_to_pathconf3args (struct iovec inmsg, pathconf3args *pc)
+xdr_to_pathconf3args(struct iovec inmsg, pathconf3args *pc)
{
- return xdr_to_generic (inmsg, (void *)pc,
- (xdrproc_t)xdr_pathconf3args);}
-
+ return xdr_to_generic(inmsg, (void *)pc, (xdrproc_t)xdr_pathconf3args);
+}
ssize_t
-xdr_serialize_pathconf3res (struct iovec outmsg, pathconf3res *res)
+xdr_serialize_pathconf3res(struct iovec outmsg, pathconf3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_pathconf3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_pathconf3res);
}
-
ssize_t
-xdr_to_commit3args (struct iovec inmsg, commit3args *ca)
+xdr_to_commit3args(struct iovec inmsg, commit3args *ca)
{
- return xdr_to_generic (inmsg, (void *)ca,
- (xdrproc_t)xdr_commit3args);
+ return xdr_to_generic(inmsg, (void *)ca, (xdrproc_t)xdr_commit3args);
}
-
ssize_t
-xdr_serialize_commit3res (struct iovec outmsg, commit3res *res)
+xdr_serialize_commit3res(struct iovec outmsg, commit3res *res)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_commit3res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_commit3res);
}
-
ssize_t
-xdr_serialize_exports (struct iovec outmsg, exports *elist)
+xdr_serialize_exports(struct iovec outmsg, exports *elist)
{
- XDR xdr;
- ssize_t ret = -1;
+ XDR xdr;
+ ssize_t ret = -1;
- if ((!outmsg.iov_base) || (!elist))
- return -1;
+ if ((!outmsg.iov_base) || (!elist))
+ return -1;
- xdrmem_create (&xdr, outmsg.iov_base, (unsigned int)outmsg.iov_len,
- XDR_ENCODE);
+ xdrmem_create(&xdr, outmsg.iov_base, (unsigned int)outmsg.iov_len,
+ XDR_ENCODE);
- if (!xdr_exports (&xdr, elist))
- goto ret;
+ if (!xdr_exports(&xdr, elist))
+ goto ret;
- ret = xdr_decoded_length (xdr);
+ ret = xdr_decoded_length(xdr);
ret:
- return ret;
+ return ret;
+}
+
+ssize_t
+xdr_serialize_nfsstat3(struct iovec outmsg, nfsstat3 *s)
+{
+ return xdr_serialize_generic(outmsg, (void *)s, (xdrproc_t)xdr_nfsstat3);
+}
+
+ssize_t
+xdr_to_nlm4_testargs(struct iovec inmsg, nlm4_testargs *args)
+{
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_nlm4_testargs);
+}
+
+ssize_t
+xdr_serialize_nlm4_testres(struct iovec outmsg, nlm4_testres *res)
+{
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_nlm4_testres);
}
+ssize_t
+xdr_to_nlm4_lockargs(struct iovec inmsg, nlm4_lockargs *args)
+{
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_nlm4_lockargs);
+}
ssize_t
-xdr_serialize_nfsstat3 (struct iovec outmsg, nfsstat3 *s)
+xdr_serialize_nlm4_res(struct iovec outmsg, nlm4_res *res)
{
- return xdr_serialize_generic (outmsg, (void *)s,
- (xdrproc_t)xdr_nfsstat3);
+ return xdr_serialize_generic(outmsg, (void *)res, (xdrproc_t)xdr_nlm4_res);
}
ssize_t
-xdr_to_nlm4_testargs (struct iovec inmsg, nlm4_testargs *args)
+xdr_to_nlm4_cancelargs(struct iovec inmsg, nlm4_cancargs *args)
{
- return xdr_to_generic (inmsg, (void*)args,
- (xdrproc_t)xdr_nlm4_testargs);
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_nlm4_cancargs);
}
ssize_t
-xdr_serialize_nlm4_testres (struct iovec outmsg, nlm4_testres *res)
+xdr_to_nlm4_unlockargs(struct iovec inmsg, nlm4_unlockargs *args)
{
- return xdr_serialize_generic (outmsg, (void*)res,
- (xdrproc_t)xdr_nlm4_testres);
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_nlm4_unlockargs);
}
ssize_t
-xdr_to_nlm4_lockargs (struct iovec inmsg, nlm4_lockargs *args)
+xdr_to_nlm4_shareargs(struct iovec inmsg, nlm4_shareargs *args)
{
- return xdr_to_generic (inmsg, (void*)args,
- (xdrproc_t)xdr_nlm4_lockargs);
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_nlm4_shareargs);
}
ssize_t
-xdr_serialize_nlm4_res (struct iovec outmsg, nlm4_res *res)
+xdr_serialize_nlm4_shareres(struct iovec outmsg, nlm4_shareres *res)
{
- return xdr_serialize_generic (outmsg, (void*)res,
- (xdrproc_t)xdr_nlm4_res);
+ return xdr_serialize_generic(outmsg, (void *)res,
+ (xdrproc_t)xdr_nlm4_shareres);
}
ssize_t
-xdr_to_nlm4_cancelargs (struct iovec inmsg, nlm4_cancargs *args)
+xdr_serialize_nlm4_testargs(struct iovec outmsg, nlm4_testargs *args)
{
- return xdr_to_generic (inmsg, (void*)args,
- (xdrproc_t)xdr_nlm4_cancargs);
+ return xdr_serialize_generic(outmsg, (void *)args,
+ (xdrproc_t)xdr_nlm4_testargs);
}
ssize_t
-xdr_to_nlm4_unlockargs (struct iovec inmsg, nlm4_unlockargs *args)
+xdr_to_nlm4_res(struct iovec inmsg, nlm4_res *args)
{
- return xdr_to_generic (inmsg, (void*)args,
- (xdrproc_t)xdr_nlm4_unlockargs);
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_nlm4_res);
}
ssize_t
-xdr_to_nlm4_shareargs (struct iovec inmsg, nlm4_shareargs *args)
+xdr_to_nlm4_freeallargs(struct iovec inmsg, nlm4_freeallargs *args)
{
- return xdr_to_generic (inmsg, (void*)args,
- (xdrproc_t)xdr_nlm4_shareargs);
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_nlm4_freeallargs);
}
ssize_t
-xdr_serialize_nlm4_shareres (struct iovec outmsg, nlm4_shareres *res)
+xdr_to_getaclargs(struct iovec inmsg, getaclargs *args)
{
- return xdr_serialize_generic (outmsg, (void *)res,
- (xdrproc_t)xdr_nlm4_shareres);
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_getaclargs);
}
ssize_t
-xdr_serialize_nlm4_testargs (struct iovec outmsg, nlm4_testargs *args)
+xdr_to_setaclargs(struct iovec inmsg, setaclargs *args)
{
- return xdr_serialize_generic (outmsg, (void*)args,
- (xdrproc_t)xdr_nlm4_testargs);
+ return xdr_to_generic(inmsg, (void *)args, (xdrproc_t)xdr_setaclargs);
}
ssize_t
-xdr_to_nlm4_res (struct iovec inmsg, nlm4_res *args)
+xdr_serialize_getaclreply(struct iovec inmsg, getaclreply *res)
{
- return xdr_to_generic (inmsg, (void*)args,
- (xdrproc_t)xdr_nlm4_res);
+ return xdr_serialize_generic(inmsg, (void *)res,
+ (xdrproc_t)xdr_getaclreply);
}
ssize_t
-xdr_to_nlm4_freeallargs (struct iovec inmsg, nlm4_freeallargs *args)
+xdr_serialize_setaclreply(struct iovec inmsg, setaclreply *res)
{
- return xdr_to_generic (inmsg, (void*)args,
- (xdrproc_t)xdr_nlm4_freeallargs);
+ return xdr_serialize_generic(inmsg, (void *)res,
+ (xdrproc_t)xdr_setaclreply);
}
diff --git a/rpc/xdr/src/msg-nfs3.h b/rpc/xdr/src/msg-nfs3.h
index 701df300a6e..869ddc3524a 100644
--- a/rpc/xdr/src/msg-nfs3.h
+++ b/rpc/xdr/src/msg-nfs3.h
@@ -1,220 +1,219 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _MSG_NFS3_H_
#define _MSG_NFS3_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "xdr-nfs3.h"
#include "nlm4-xdr.h"
+#include "acl3-xdr.h"
#include <sys/types.h>
#include <sys/uio.h>
extern ssize_t
-xdr_to_mountpath (struct iovec outpath, struct iovec inmsg);
+xdr_to_mountpath(struct iovec outpath, struct iovec inmsg);
+
+extern ssize_t
+xdr_serialize_mountres3(struct iovec outmsg, mountres3 *res);
+
+extern ssize_t
+xdr_serialize_mountbody(struct iovec outmsg, mountbody *mb);
+
+extern ssize_t
+xdr_to_getattr3args(struct iovec inmsg, getattr3args *ga);
+
+extern ssize_t
+xdr_serialize_getattr3res(struct iovec outmsg, getattr3res *res);
extern ssize_t
-xdr_serialize_mountres3 (struct iovec outmsg, mountres3 *res);
+xdr_serialize_setattr3res(struct iovec outmsg, setattr3res *res);
extern ssize_t
-xdr_serialize_mountbody (struct iovec outmsg, mountbody *mb);
+xdr_to_setattr3args(struct iovec inmsg, setattr3args *sa);
extern ssize_t
-xdr_to_getattr3args (struct iovec inmsg, getattr3args *ga);
+xdr_serialize_lookup3res(struct iovec outmsg, lookup3res *res);
extern ssize_t
-xdr_serialize_getattr3res (struct iovec outmsg, getattr3res *res);
+xdr_to_lookup3args(struct iovec inmsg, lookup3args *la);
extern ssize_t
-xdr_serialize_setattr3res (struct iovec outmsg, setattr3res *res);
+xdr_to_access3args(struct iovec inmsg, access3args *ac);
extern ssize_t
-xdr_to_setattr3args (struct iovec inmsg, setattr3args *sa);
+xdr_serialize_access3res(struct iovec outmsg, access3res *res);
extern ssize_t
-xdr_serialize_lookup3res (struct iovec outmsg, lookup3res *res);
+xdr_to_readlink3args(struct iovec inmsg, readlink3args *ra);
extern ssize_t
-xdr_to_lookup3args (struct iovec inmsg, lookup3args *la);
+xdr_serialize_readlink3res(struct iovec outmsg, readlink3res *res);
extern ssize_t
-xdr_to_access3args (struct iovec inmsg, access3args *ac);
+xdr_to_read3args(struct iovec inmsg, read3args *ra);
extern ssize_t
-xdr_serialize_access3res (struct iovec outmsg, access3res *res);
+xdr_serialize_read3res(struct iovec outmsg, read3res *res);
extern ssize_t
-xdr_to_readlink3args (struct iovec inmsg, readlink3args *ra);
+xdr_serialize_read3res_nocopy(struct iovec outmsg, read3res *res);
extern ssize_t
-xdr_serialize_readlink3res (struct iovec outmsg, readlink3res *res);
+xdr_to_write3args(struct iovec inmsg, write3args *wa);
extern ssize_t
-xdr_to_read3args (struct iovec inmsg, read3args *ra);
+xdr_to_write3args_nocopy(struct iovec inmsg, write3args *wa,
+ struct iovec *payload);
extern ssize_t
-xdr_serialize_read3res (struct iovec outmsg, read3res *res);
+xdr_serialize_write3res(struct iovec outmsg, write3res *res);
extern ssize_t
-xdr_serialize_read3res_nocopy (struct iovec outmsg, read3res *res);
+xdr_to_create3args(struct iovec inmsg, create3args *ca);
extern ssize_t
-xdr_to_write3args (struct iovec inmsg, write3args *wa);
+xdr_serialize_create3res(struct iovec outmsg, create3res *res);
extern ssize_t
-xdr_to_write3args_nocopy (struct iovec inmsg, write3args *wa,
- struct iovec *payload);
+xdr_serialize_mkdir3res(struct iovec outmsg, mkdir3res *res);
extern ssize_t
-xdr_serialize_write3res (struct iovec outmsg, write3res *res);
+xdr_to_mkdir3args(struct iovec inmsg, mkdir3args *ma);
extern ssize_t
-xdr_to_create3args (struct iovec inmsg, create3args *ca);
+xdr_to_symlink3args(struct iovec inmsg, symlink3args *sa);
extern ssize_t
-xdr_serialize_create3res (struct iovec outmsg, create3res *res);
+xdr_serialize_symlink3res(struct iovec outmsg, symlink3res *res);
extern ssize_t
-xdr_serialize_mkdir3res (struct iovec outmsg, mkdir3res *res);
+xdr_to_mknod3args(struct iovec inmsg, mknod3args *ma);
extern ssize_t
-xdr_to_mkdir3args (struct iovec inmsg, mkdir3args *ma);
+xdr_serialize_mknod3res(struct iovec outmsg, mknod3res *res);
extern ssize_t
-xdr_to_symlink3args (struct iovec inmsg, symlink3args *sa);
+xdr_to_remove3args(struct iovec inmsg, remove3args *ra);
extern ssize_t
-xdr_serialize_symlink3res (struct iovec outmsg, symlink3res *res);
+xdr_serialize_remove3res(struct iovec outmsg, remove3res *res);
extern ssize_t
-xdr_to_mknod3args (struct iovec inmsg, mknod3args *ma);
+xdr_to_rmdir3args(struct iovec inmsg, rmdir3args *ra);
extern ssize_t
-xdr_serialize_mknod3res (struct iovec outmsg, mknod3res *res);
+xdr_serialize_rmdir3res(struct iovec outmsg, rmdir3res *res);
extern ssize_t
-xdr_to_remove3args (struct iovec inmsg, remove3args *ra);
+xdr_serialize_rename3res(struct iovec outmsg, rename3res *res);
extern ssize_t
-xdr_serialize_remove3res (struct iovec outmsg, remove3res *res);
+xdr_to_rename3args(struct iovec inmsg, rename3args *ra);
extern ssize_t
-xdr_to_rmdir3args (struct iovec inmsg, rmdir3args *ra);
+xdr_serialize_link3res(struct iovec outmsg, link3res *li);
extern ssize_t
-xdr_serialize_rmdir3res (struct iovec outmsg, rmdir3res *res);
+xdr_to_link3args(struct iovec inmsg, link3args *la);
extern ssize_t
-xdr_serialize_rename3res (struct iovec outmsg, rename3res *res);
+xdr_to_readdir3args(struct iovec inmsg, readdir3args *rd);
extern ssize_t
-xdr_to_rename3args (struct iovec inmsg, rename3args *ra);
+xdr_serialize_readdir3res(struct iovec outmsg, readdir3res *res);
extern ssize_t
-xdr_serialize_link3res (struct iovec outmsg, link3res *li);
+xdr_to_readdirp3args(struct iovec inmsg, readdirp3args *rp);
extern ssize_t
-xdr_to_link3args (struct iovec inmsg, link3args *la);
+xdr_serialize_readdirp3res(struct iovec outmsg, readdirp3res *res);
extern ssize_t
-xdr_to_readdir3args (struct iovec inmsg, readdir3args *rd);
+xdr_to_fsstat3args(struct iovec inmsg, fsstat3args *fa);
extern ssize_t
-xdr_serialize_readdir3res (struct iovec outmsg, readdir3res *res);
+xdr_serialize_fsstat3res(struct iovec outmsg, fsstat3res *res);
extern ssize_t
-xdr_to_readdirp3args (struct iovec inmsg, readdirp3args *rp);
+xdr_to_fsinfo3args(struct iovec inmsg, fsinfo3args *fi);
extern ssize_t
-xdr_serialize_readdirp3res (struct iovec outmsg, readdirp3res *res);
+xdr_serialize_fsinfo3res(struct iovec outmsg, fsinfo3res *res);
extern ssize_t
-xdr_to_fsstat3args (struct iovec inmsg, fsstat3args *fa);
+xdr_to_pathconf3args(struct iovec inmsg, pathconf3args *pc);
extern ssize_t
-xdr_serialize_fsstat3res (struct iovec outmsg, fsstat3res *res);
+xdr_serialize_pathconf3res(struct iovec outmsg, pathconf3res *res);
extern ssize_t
-xdr_to_fsinfo3args (struct iovec inmsg, fsinfo3args *fi);
+xdr_to_commit3args(struct iovec inmsg, commit3args *ca);
extern ssize_t
-xdr_serialize_fsinfo3res (struct iovec outmsg, fsinfo3res *res);
+xdr_serialize_commit3res(struct iovec outmsg, commit3res *res);
extern ssize_t
-xdr_to_pathconf3args (struct iovec inmsg, pathconf3args *pc);
+xdr_serialize_exports(struct iovec outmsg, exports *elist);
extern ssize_t
-xdr_serialize_pathconf3res (struct iovec outmsg, pathconf3res *res);
+xdr_serialize_mountlist(struct iovec outmsg, mountlist *ml);
extern ssize_t
-xdr_to_commit3args (struct iovec inmsg, commit3args *ca);
+xdr_serialize_mountstat3(struct iovec outmsg, mountstat3 *m);
extern ssize_t
-xdr_serialize_commit3res (struct iovec outmsg, commit3res *res);
+xdr_serialize_nfsstat3(struct iovec outmsg, nfsstat3 *s);
extern ssize_t
-xdr_serialize_exports (struct iovec outmsg, exports *elist);
+xdr_to_nlm4_testargs(struct iovec inmsg, nlm4_testargs *args);
extern ssize_t
-xdr_serialize_mountlist (struct iovec outmsg, mountlist *ml);
+xdr_serialize_nlm4_testres(struct iovec outmsg, nlm4_testres *res);
extern ssize_t
-xdr_serialize_mountstat3 (struct iovec outmsg, mountstat3 *m);
+xdr_to_nlm4_lockargs(struct iovec inmsg, nlm4_lockargs *args);
extern ssize_t
-xdr_serialize_nfsstat3 (struct iovec outmsg, nfsstat3 *s);
+xdr_serialize_nlm4_res(struct iovec outmsg, nlm4_res *res);
extern ssize_t
-xdr_to_nlm4_testargs (struct iovec inmsg, nlm4_testargs *args);
+xdr_to_nlm4_cancelargs(struct iovec inmsg, nlm4_cancargs *args);
extern ssize_t
-xdr_serialize_nlm4_testres (struct iovec outmsg, nlm4_testres *res);
+xdr_to_nlm4_unlockargs(struct iovec inmsg, nlm4_unlockargs *args);
extern ssize_t
-xdr_to_nlm4_lockargs (struct iovec inmsg, nlm4_lockargs *args);
+xdr_to_nlm4_shareargs(struct iovec inmsg, nlm4_shareargs *args);
extern ssize_t
-xdr_serialize_nlm4_res (struct iovec outmsg, nlm4_res *res);
+xdr_serialize_nlm4_shareres(struct iovec outmsg, nlm4_shareres *res);
extern ssize_t
-xdr_to_nlm4_cancelargs (struct iovec inmsg, nlm4_cancargs *args);
+xdr_serialize_nlm4_testargs(struct iovec outmsg, nlm4_testargs *args);
extern ssize_t
-xdr_to_nlm4_unlockargs (struct iovec inmsg, nlm4_unlockargs *args);
+xdr_to_nlm4_res(struct iovec inmsg, nlm4_res *args);
extern ssize_t
-xdr_to_nlm4_shareargs (struct iovec inmsg, nlm4_shareargs *args);
+xdr_to_nlm4_freeallargs(struct iovec inmsg, nlm4_freeallargs *args);
extern ssize_t
-xdr_serialize_nlm4_shareres (struct iovec outmsg, nlm4_shareres *res);
+xdr_to_getaclargs(struct iovec inmsg, getaclargs *args);
extern ssize_t
-xdr_serialize_nlm4_testargs (struct iovec outmsg, nlm4_testargs *args);
+xdr_to_setaclargs(struct iovec inmsg, setaclargs *args);
extern ssize_t
-xdr_to_nlm4_res (struct iovec inmsg, nlm4_res *args);
+xdr_serialize_getaclreply(struct iovec inmsg, getaclreply *res);
extern ssize_t
-xdr_to_nlm4_freeallargs (struct iovec inmsg, nlm4_freeallargs *args);
+xdr_serialize_setaclreply(struct iovec inmsg, setaclreply *res);
#endif
diff --git a/rpc/xdr/src/nlm4-xdr.c b/rpc/xdr/src/nlm4-xdr.c
deleted file mode 100644
index 4cf6a51e8fd..00000000000
--- a/rpc/xdr/src/nlm4-xdr.c
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#include "nlm4-xdr.h"
-
-bool_t
-xdr_netobj (XDR *xdrs, netobj *objp)
-{
- if (!xdr_bytes (xdrs, (char **)&objp->n_bytes, (u_int *) &objp->n_len, MAXNETOBJ_SZ))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_fsh_mode (XDR *xdrs, fsh_mode *objp)
-{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_fsh_access (XDR *xdrs, fsh_access *objp)
-{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_stats (XDR *xdrs, nlm4_stats *objp)
-{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_stat (XDR *xdrs, nlm4_stat *objp)
-{
- if (!xdr_nlm4_stats (xdrs, &objp->stat))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_holder (XDR *xdrs, nlm4_holder *objp)
-{
- if (!xdr_bool (xdrs, &objp->exclusive))
- return FALSE;
- if (!xdr_uint32_t (xdrs, &objp->svid))
- return FALSE;
- if (!xdr_netobj (xdrs, &objp->oh))
- return FALSE;
- if (!xdr_uint64_t (xdrs, &objp->l_offset))
- return FALSE;
- if (!xdr_uint64_t (xdrs, &objp->l_len))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_lock (XDR *xdrs, nlm4_lock *objp)
-{
- if (!xdr_string (xdrs, &objp->caller_name, MAXNAMELEN))
- return FALSE;
- if (!xdr_netobj (xdrs, &objp->fh))
- return FALSE;
- if (!xdr_netobj (xdrs, &objp->oh))
- return FALSE;
- if (!xdr_uint32_t (xdrs, &objp->svid))
- return FALSE;
- if (!xdr_uint64_t (xdrs, &objp->l_offset))
- return FALSE;
- if (!xdr_uint64_t (xdrs, &objp->l_len))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_share (XDR *xdrs, nlm4_share *objp)
-{
- if (!xdr_string (xdrs, &objp->caller_name, MAXNAMELEN))
- return FALSE;
- if (!xdr_netobj (xdrs, &objp->fh))
- return FALSE;
- if (!xdr_netobj (xdrs, &objp->oh))
- return FALSE;
- if (!xdr_fsh_mode (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_fsh_access (xdrs, &objp->access))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_testrply (XDR *xdrs, nlm4_testrply *objp)
-{
- if (!xdr_nlm4_stats (xdrs, &objp->stat))
- return FALSE;
- switch (objp->stat) {
- case nlm4_denied:
- if (!xdr_nlm4_holder (xdrs, &objp->nlm4_testrply_u.holder))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_testres (XDR *xdrs, nlm4_testres *objp)
-{
- if (!xdr_netobj (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_nlm4_testrply (xdrs, &objp->stat))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_testargs (XDR *xdrs, nlm4_testargs *objp)
-{
- if (!xdr_netobj (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->exclusive))
- return FALSE;
- if (!xdr_nlm4_lock (xdrs, &objp->alock))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_res (XDR *xdrs, nlm4_res *objp)
-{
- if (!xdr_netobj (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_nlm4_stat (xdrs, &objp->stat))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_lockargs (XDR *xdrs, nlm4_lockargs *objp)
-{
- if (!xdr_netobj (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->block))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->exclusive))
- return FALSE;
- if (!xdr_nlm4_lock (xdrs, &objp->alock))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->reclaim))
- return FALSE;
- if (!xdr_int (xdrs, &objp->state))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_cancargs (XDR *xdrs, nlm4_cancargs *objp)
-{
- if (!xdr_netobj (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->block))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->exclusive))
- return FALSE;
- if (!xdr_nlm4_lock (xdrs, &objp->alock))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_unlockargs (XDR *xdrs, nlm4_unlockargs *objp)
-{
- if (!xdr_netobj (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_nlm4_lock (xdrs, &objp->alock))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_shareargs (XDR *xdrs, nlm4_shareargs *objp)
-{
- if (!xdr_netobj (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_nlm4_share (xdrs, &objp->share))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->reclaim))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_shareres (XDR *xdrs, nlm4_shareres *objp)
-{
- if (!xdr_netobj (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_nlm4_stats (xdrs, &objp->stat))
- return FALSE;
- if (!xdr_int (xdrs, &objp->sequence))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nlm4_freeallargs (XDR *xdrs, nlm4_freeallargs *objp)
-{
- if (!xdr_string (xdrs, &objp->name, LM_MAXSTRLEN))
- return FALSE;
- if (!xdr_uint32_t (xdrs, &objp->state))
- return FALSE;
- return TRUE;
-}
-
-
-/*
-bool_t
-xdr_nlm_sm_status (XDR *xdrs, nlm_sm_status *objp)
-{
- if (!xdr_string (xdrs, &objp->mon_name, LM_MAXSTRLEN))
- return FALSE;
- if (!xdr_int (xdrs, &objp->state))
- return FALSE;
- if (!xdr_opaque (xdrs, objp->priv, 16))
- return FALSE;
- return TRUE;
-}
-*/
diff --git a/rpc/xdr/src/nlm4-xdr.h b/rpc/xdr/src/nlm4-xdr.h
deleted file mode 100644
index 3799f6b1d98..00000000000
--- a/rpc/xdr/src/nlm4-xdr.h
+++ /dev/null
@@ -1,267 +0,0 @@
-/*
- Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#ifndef _NLM_H_RPCGEN
-#define _NLM_H_RPCGEN
-
-#include <rpc/rpc.h>
-
-#if defined(__NetBSD__)
-#define xdr_u_quad_t xdr_u_int64_t
-#define xdr_quad_t xdr_int64_t
-#define xdr_uint32_t xdr_u_int32_t
-#define xdr_uint64_t xdr_u_int64_t
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define MAXNETOBJ_SZ 1024
-#define LM_MAXSTRLEN 1024
-#define MAXNAMELEN 1025
-
-#if defined(__NetBSD__)
-#define xdr_u_quad_t xdr_u_int64_t
-#define xdr_quad_t xdr_int64_t
-#define xdr_uint32_t xdr_u_int32_t
-#define xdr_uint64_t xdr_u_int64_t
-#endif
-
-/*
- * The following enums are actually bit encoded for efficient
- * boolean algebra.... DON'T change them.....
- */
-
-enum fsh_mode {
- fsm_DN = 0,
- fsm_DR = 1,
- fsm_DW = 2,
- fsm_DRW = 3,
-};
-typedef enum fsh_mode fsh_mode;
-
-enum fsh_access {
- fsa_NONE = 0,
- fsa_R = 1,
- fsa_W = 2,
- fsa_RW = 3,
-};
-typedef enum fsh_access fsh_access;
-/* definitions for NLM version 4 */
-
-enum nlm4_stats {
- nlm4_granted = 0,
- nlm4_denied = 1,
- nlm4_denied_nolock = 2,
- nlm4_blocked = 3,
- nlm4_denied_grace_period = 4,
- nlm4_deadlck = 5,
- nlm4_rofs = 6,
- nlm4_stale_fh = 7,
- nlm4_fbig = 8,
- nlm4_failed = 9,
-};
-typedef enum nlm4_stats nlm4_stats;
-
-struct nlm4_stat {
- nlm4_stats stat;
-};
-typedef struct nlm4_stat nlm4_stat;
-
-struct nlm4_holder {
- bool_t exclusive;
- u_int32_t svid;
- netobj oh;
- u_int64_t l_offset;
- u_int64_t l_len;
-};
-typedef struct nlm4_holder nlm4_holder;
-
-struct nlm4_lock {
- char *caller_name;
- netobj fh;
- netobj oh;
- u_int32_t svid;
- u_int64_t l_offset;
- u_int64_t l_len;
-};
-typedef struct nlm4_lock nlm4_lock;
-
-struct nlm4_share {
- char *caller_name;
- netobj fh;
- netobj oh;
- fsh_mode mode;
- fsh_access access;
-};
-typedef struct nlm4_share nlm4_share;
-
-struct nlm4_testrply {
- nlm4_stats stat;
- union {
- struct nlm4_holder holder;
- } nlm4_testrply_u;
-};
-typedef struct nlm4_testrply nlm4_testrply;
-
-struct nlm4_testres {
- netobj cookie;
- nlm4_testrply stat;
-};
-typedef struct nlm4_testres nlm4_testres;
-
-struct nlm4_testargs {
- netobj cookie;
- bool_t exclusive;
- struct nlm4_lock alock;
-};
-typedef struct nlm4_testargs nlm4_testargs;
-
-struct nlm4_res {
- netobj cookie;
- nlm4_stat stat;
-};
-typedef struct nlm4_res nlm4_res;
-
-struct nlm4_lockargs {
- netobj cookie;
- bool_t block;
- bool_t exclusive;
- struct nlm4_lock alock;
- bool_t reclaim;
- int state;
-};
-typedef struct nlm4_lockargs nlm4_lockargs;
-
-struct nlm4_cancargs {
- netobj cookie;
- bool_t block;
- bool_t exclusive;
- struct nlm4_lock alock;
-};
-typedef struct nlm4_cancargs nlm4_cancargs;
-
-struct nlm4_unlockargs {
- netobj cookie;
- struct nlm4_lock alock;
-};
-typedef struct nlm4_unlockargs nlm4_unlockargs;
-
-struct nlm4_shareargs {
- netobj cookie;
- nlm4_share share;
- bool_t reclaim;
-};
-typedef struct nlm4_shareargs nlm4_shareargs;
-
-struct nlm4_shareres {
- netobj cookie;
- nlm4_stats stat;
- int sequence;
-};
-typedef struct nlm4_shareres nlm4_shareres;
-
-struct nlm4_freeallargs {
- char *name;
- u_int32_t state;
-};
-typedef struct nlm4_freeallargs nlm4_freeallargs;
-
-
-#define NLM4_NULL 0
-#define NLM4_TEST 1
-#define NLM4_LOCK 2
-#define NLM4_CANCEL 3
-#define NLM4_UNLOCK 4
-#define NLM4_GRANTED 5
-#define NLM4_TEST_MSG 6
-#define NLM4_LOCK_MSG 7
-#define NLM4_CANCEL_MSG 8
-#define NLM4_UNLOCK_MSG 9
-#define NLM4_GRANTED_MSG 10
-#define NLM4_TEST_RES 11
-#define NLM4_LOCK_RES 12
-#define NLM4_CANCEL_RES 13
-#define NLM4_UNLOCK_RES 14
-#define NLM4_GRANTED_RES 15
-#define NLM4_SM_NOTIFY 16
-#define NLM4_SEVENTEEN 17
-#define NLM4_EIGHTEEN 18
-#define NLM4_NINETEEN 19
-#define NLM4_SHARE 20
-#define NLM4_UNSHARE 21
-#define NLM4_NM_LOCK 22
-#define NLM4_FREE_ALL 23
-#define NLM4_PROC_COUNT 24
-
-/* the xdr functions */
-
-#if defined(__STDC__) || defined(__cplusplus)
-extern bool_t xdr_netobj (XDR *, netobj*);
-extern bool_t xdr_fsh_mode (XDR *, fsh_mode*);
-extern bool_t xdr_fsh_access (XDR *, fsh_access*);
-extern bool_t xdr_nlm4_stats (XDR *, nlm4_stats*);
-extern bool_t xdr_nlm4_stat (XDR *, nlm4_stat*);
-extern bool_t xdr_nlm4_holder (XDR *, nlm4_holder*);
-extern bool_t xdr_nlm4_lock (XDR *, nlm4_lock*);
-extern bool_t xdr_nlm4_share (XDR *, nlm4_share*);
-extern bool_t xdr_nlm4_testrply (XDR *, nlm4_testrply*);
-extern bool_t xdr_nlm4_testres (XDR *, nlm4_testres*);
-extern bool_t xdr_nlm4_testargs (XDR *, nlm4_testargs*);
-extern bool_t xdr_nlm4_res (XDR *, nlm4_res*);
-extern bool_t xdr_nlm4_lockargs (XDR *, nlm4_lockargs*);
-extern bool_t xdr_nlm4_cancargs (XDR *, nlm4_cancargs*);
-extern bool_t xdr_nlm4_unlockargs (XDR *, nlm4_unlockargs*);
-extern bool_t xdr_nlm4_shareargs (XDR *, nlm4_shareargs*);
-extern bool_t xdr_nlm4_shareres (XDR *, nlm4_shareres*);
-extern bool_t xdr_nlm4_freeallargs (XDR *, nlm4_freeallargs*);
-
-#else /* K&R C */
-extern bool_t xdr_netobj ();
-extern bool_t xdr_fsh_mode ();
-extern bool_t xdr_fsh_access ();
-extern bool_t xdr_nlm4_stats ();
-extern bool_t xdr_nlm4_stat ();
-extern bool_t xdr_nlm4_holder ();
-extern bool_t xdr_nlm4_lock ();
-extern bool_t xdr_nlm4_share ();
-extern bool_t xdr_nlm4_testrply ();
-extern bool_t xdr_nlm4_testres ();
-extern bool_t xdr_nlm4_testargs ();
-extern bool_t xdr_nlm4_res ();
-extern bool_t xdr_nlm4_lockargs ();
-extern bool_t xdr_nlm4_cancargs ();
-extern bool_t xdr_nlm4_unlockargs ();
-extern bool_t xdr_nlm4_shareargs ();
-extern bool_t xdr_nlm4_shareres ();
-extern bool_t xdr_nlm4_freeallargs ();
-
-#endif /* K&R C */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_NLM_H_RPCGEN */
diff --git a/rpc/xdr/src/nlm4.x b/rpc/xdr/src/nlm4-xdr.x
index a6de6f32bf4..847b0e64491 100644
--- a/rpc/xdr/src/nlm4.x
+++ b/rpc/xdr/src/nlm4-xdr.x
@@ -1,29 +1,27 @@
/*
- Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/compat.h>
+
/* .x file defined as according to the RFC */
+%#include "xdr-common.h"
+
const MAXNETOBJ_SZ = 1024;
const LM_MAXSTRLEN = 1024;
const MAXNAMELEN = 1025;
-typedef opaque netobj<MAXNETOBJ_SZ>;
+typedef opaque nlm4_netobj<MAXNETOBJ_SZ>;
#ifdef RPC_HDR
%/*
@@ -68,15 +66,15 @@ struct nlm4_stat {
struct nlm4_holder {
bool exclusive;
u_int32_t svid;
- netobj oh;
+ nlm4_netobj oh;
u_int64_t l_offset;
u_int64_t l_len;
};
struct nlm4_lock {
string caller_name<LM_MAXSTRLEN>;
- netobj fh;
- netobj oh;
+ nlm4_netobj fh;
+ nlm4_netobj oh;
u_int32_t svid;
u_int64_t l_offset;
u_int64_t l_len;
@@ -84,71 +82,71 @@ struct nlm4_lock {
struct nlm4_share {
string caller_name<LM_MAXSTRLEN>;
- netobj fh;
- netobj oh;
+ nlm4_netobj fh;
+ nlm4_netobj oh;
fsh_mode mode;
fsh_access access;
};
union nlm4_testrply switch (nlm4_stats stat) {
- case nlm_denied:
- struct nlm4_holder holder;
+ case nlm4_denied:
+ nlm4_holder holder;
default:
void;
};
struct nlm4_testres {
- netobj cookie;
+ nlm4_netobj cookie;
nlm4_testrply stat;
};
struct nlm4_testargs {
- netobj cookie;
+ nlm4_netobj cookie;
bool exclusive;
- struct nlm4_lock alock;
+ nlm4_lock alock;
};
struct nlm4_res {
- netobj cookie;
+ nlm4_netobj cookie;
nlm4_stat stat;
};
struct nlm4_lockargs {
- netobj cookie;
+ nlm4_netobj cookie;
bool block;
bool exclusive;
- struct nlm4_lock alock;
+ nlm4_lock alock;
bool reclaim; /* used for recovering locks */
- int state; /* specify local status monitor state */
+ int32_t state; /* specify local status monitor state */
};
struct nlm4_cancargs {
- netobj cookie;
+ nlm4_netobj cookie;
bool block;
bool exclusive;
- struct nlm4_lock alock;
+ nlm4_lock alock;
};
struct nlm4_unlockargs {
- netobj cookie;
- struct nlm4_lock alock;
+ nlm4_netobj cookie;
+ nlm4_lock alock;
};
struct nlm4_shareargs {
- netobj cookie;
+ nlm4_netobj cookie;
nlm4_share share;
bool reclaim;
};
struct nlm4_shareres {
- netobj cookie;
+ nlm4_netobj cookie;
nlm4_stats stat;
- int sequence;
+ int32_t sequence;
};
struct nlm4_freeallargs {
string name<LM_MAXSTRLEN>; /* client hostname */
- uint32 state; /* unused */
+ uint32_t state; /* unused */
};
/*
@@ -161,3 +159,9 @@ struct nlm_sm_status {
int state; /* new state */
opaque priv[16]; /* private data */
};
+
+program NLMCBK_PROGRAM {
+ version NLMCBK_V1 {
+ void NLMCBK_SM_NOTIFY(nlm_sm_status) = 16;
+ } = 1;
+} = 100021;
diff --git a/rpc/xdr/src/nlmcbk-xdr.c b/rpc/xdr/src/nlmcbk-xdr.c
deleted file mode 100644
index 26446ab1bf4..00000000000
--- a/rpc/xdr/src/nlmcbk-xdr.c
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#include "nlmcbk-xdr.h"
-
-bool_t
-xdr_nlm_sm_status (XDR *xdrs, nlm_sm_status *objp)
-{
- if (!xdr_string (xdrs, &objp->mon_name, LM_MAXSTRLEN))
- return FALSE;
- if (!xdr_int (xdrs, &objp->state))
- return FALSE;
- if (!xdr_opaque (xdrs, objp->priv, 16))
- return FALSE;
- return TRUE;
-}
diff --git a/rpc/xdr/src/nlmcbk-xdr.h b/rpc/xdr/src/nlmcbk-xdr.h
deleted file mode 100644
index 4d6d670ab38..00000000000
--- a/rpc/xdr/src/nlmcbk-xdr.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#ifndef _NLMCBK_H_RPCGEN
-#define _NLMCBK_H_RPCGEN
-
-#include <rpc/rpc.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define LM_MAXSTRLEN 1024
-
-struct nlm_sm_status {
- char *mon_name;
- int state;
- char priv[16];
-};
-typedef struct nlm_sm_status nlm_sm_status;
-
-#define NLMCBK_PROGRAM 100021
-#define NLMCBK_V1 1
-
-#if defined(__STDC__) || defined(__cplusplus)
-#define NLMCBK_SM_NOTIFY 16
-extern void * nlmcbk_sm_notify_0(struct nlm_sm_status *, CLIENT *);
-extern void * nlmcbk_sm_notify_0_svc(struct nlm_sm_status *, struct svc_req *);
-extern int nlmcbk_program_0_freeresult (SVCXPRT *, xdrproc_t, caddr_t);
-
-#else /* K&R C */
-#define NLMCBK_SM_NOTIFY 16
-extern void * nlmcbk_sm_notify_0();
-extern void * nlmcbk_sm_notify_0_svc();
-extern int nlmcbk_program_0_freeresult ();
-#endif /* K&R C */
-
-/* the xdr functions */
-
-#if defined(__STDC__) || defined(__cplusplus)
-extern bool_t xdr_nlm_sm_status (XDR *, nlm_sm_status*);
-
-#else /* K&R C */
-extern bool_t xdr_nlm_sm_status ();
-
-#endif /* K&R C */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_NLMCBK_H_RPCGEN */
diff --git a/rpc/xdr/src/nlmcbk.x b/rpc/xdr/src/nlmcbk.x
deleted file mode 100644
index 49901047eb3..00000000000
--- a/rpc/xdr/src/nlmcbk.x
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-const LM_MAXSTRLEN = 1024;
-
-struct nlm_sm_status {
- string mon_name<LM_MAXSTRLEN>; /* name of host */
- int state; /* new state */
- opaque priv[16]; /* private data */
-};
-
-program NLMCBK_PROGRAM {
- version NLMCBK_V0 {
- void NLMCBK_SM_NOTIFY(struct nlm_sm_status) = 1;
- } = 0;
-} = 1238477;
-
diff --git a/rpc/xdr/src/nsm-xdr.c b/rpc/xdr/src/nsm-xdr.c
deleted file mode 100644
index 8c41b4365a2..00000000000
--- a/rpc/xdr/src/nsm-xdr.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/*
- Copyright (c) 2012 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#include "nsm-xdr.h"
-
-bool_t
-xdr_sm_name (XDR *xdrs, sm_name *objp)
-{
- if (!xdr_string (xdrs, &objp->mon_name, SM_MAXSTRLEN))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_res (XDR *xdrs, res *objp)
-{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_sm_stat_res (XDR *xdrs, sm_stat_res *objp)
-{
- if (!xdr_res (xdrs, &objp->res_stat))
- return FALSE;
- if (!xdr_int (xdrs, &objp->state))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_sm_stat (XDR *xdrs, sm_stat *objp)
-{
- if (!xdr_int (xdrs, &objp->state))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_my_id (XDR *xdrs, my_id *objp)
-{
- if (!xdr_string (xdrs, &objp->my_name, SM_MAXSTRLEN))
- return FALSE;
- if (!xdr_int (xdrs, &objp->my_prog))
- return FALSE;
- if (!xdr_int (xdrs, &objp->my_vers))
- return FALSE;
- if (!xdr_int (xdrs, &objp->my_proc))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_mon_id (XDR *xdrs, mon_id *objp)
-{
- if (!xdr_string (xdrs, &objp->mon_name, SM_MAXSTRLEN))
- return FALSE;
- if (!xdr_my_id (xdrs, &objp->my_id))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_mon (XDR *xdrs, mon *objp)
-{
- if (!xdr_mon_id (xdrs, &objp->mon_id))
- return FALSE;
- if (!xdr_opaque (xdrs, objp->priv, 16))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_nsm_callback_status (XDR *xdrs, nsm_callback_status *objp)
-{
- if (!xdr_string (xdrs, &objp->mon_name, SM_MAXSTRLEN))
- return FALSE;
- if (!xdr_int (xdrs, &objp->state))
- return FALSE;
- if (!xdr_opaque (xdrs, objp->priv, 16))
- return FALSE;
- return TRUE;
-}
diff --git a/rpc/xdr/src/nsm-xdr.h b/rpc/xdr/src/nsm-xdr.h
deleted file mode 100644
index 9de642c1572..00000000000
--- a/rpc/xdr/src/nsm-xdr.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#ifndef _NSM_H_RPCGEN
-#define _NSM_H_RPCGEN
-
-#include <rpc/rpc.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#define SM_MAXSTRLEN 1024
-
-struct sm_name {
- char *mon_name;
-};
-typedef struct sm_name sm_name;
-
-enum res {
- STAT_SUCC = 0,
- STAT_FAIL = 1,
-};
-typedef enum res res;
-
-struct sm_stat_res {
- res res_stat;
- int state;
-};
-typedef struct sm_stat_res sm_stat_res;
-
-struct sm_stat {
- int state;
-};
-typedef struct sm_stat sm_stat;
-
-struct my_id {
- char *my_name;
- int my_prog;
- int my_vers;
- int my_proc;
-};
-typedef struct my_id my_id;
-
-struct mon_id {
- char *mon_name;
- struct my_id my_id;
-};
-typedef struct mon_id mon_id;
-
-struct mon {
- struct mon_id mon_id;
- char priv[16];
-};
-typedef struct mon mon;
-
-struct nsm_callback_status {
- char *mon_name;
- int state;
- char priv[16];
-};
-typedef struct nsm_callback_status nsm_callback_status;
-
-/* the xdr functions */
-
-#if defined(__STDC__) || defined(__cplusplus)
-extern bool_t xdr_sm_name (XDR *, sm_name*);
-extern bool_t xdr_res (XDR *, res*);
-extern bool_t xdr_sm_stat_res (XDR *, sm_stat_res*);
-extern bool_t xdr_sm_stat (XDR *, sm_stat*);
-extern bool_t xdr_my_id (XDR *, my_id*);
-extern bool_t xdr_mon_id (XDR *, mon_id*);
-extern bool_t xdr_mon (XDR *, mon*);
-extern bool_t xdr_nsm_callback_status (XDR *, nsm_callback_status*);
-
-#else /* K&R C */
-extern bool_t xdr_sm_name ();
-extern bool_t xdr_res ();
-extern bool_t xdr_sm_stat_res ();
-extern bool_t xdr_sm_stat ();
-extern bool_t xdr_my_id ();
-extern bool_t xdr_mon_id ();
-extern bool_t xdr_mon ();
-extern bool_t xdr_nsm_callback_status ();
-
-#endif /* K&R C */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_NSM_H_RPCGEN */
diff --git a/rpc/xdr/src/nsm.x b/rpc/xdr/src/nsm-xdr.x
index 8f97b1aaa1f..7c16a741f1d 100644
--- a/rpc/xdr/src/nsm.x
+++ b/rpc/xdr/src/nsm-xdr.x
@@ -1,4 +1,19 @@
/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/compat.h>
+
+/*
* This defines the maximum length of the string
* identifying the caller.
*/
@@ -31,11 +46,11 @@ struct my_id {
struct mon_id {
string mon_name<SM_MAXSTRLEN>; /* name of the host to be monitored */
- struct my_id my_id;
+ my_id my_id;
};
struct mon {
- struct mon_id mon_id;
+ mon_id mon_id;
opaque priv[16]; /* private information */
};
diff --git a/rpc/xdr/src/portmap-xdr.c b/rpc/xdr/src/portmap-xdr.c
deleted file mode 100644
index 7033213c0bd..00000000000
--- a/rpc/xdr/src/portmap-xdr.c
+++ /dev/null
@@ -1,246 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#include "portmap-xdr.h"
-
-bool_t
-xdr_pmap_port_by_brick_req (XDR *xdrs, pmap_port_by_brick_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->brick, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_port_by_brick_rsp (XDR *xdrs, pmap_port_by_brick_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->status))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- } else {
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- IXDR_PUT_LONG(buf, objp->status);
- IXDR_PUT_LONG(buf, objp->port);
- }
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->status))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- } else {
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- objp->status = IXDR_GET_LONG(buf);
- objp->port = IXDR_GET_LONG(buf);
- }
- return TRUE;
- }
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->status))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_brick_by_port_req (XDR *xdrs, pmap_brick_by_port_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_brick_by_port_rsp (XDR *xdrs, pmap_brick_by_port_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->status))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->op_ret);
- IXDR_PUT_LONG(buf, objp->op_errno);
- IXDR_PUT_LONG(buf, objp->status);
- }
- if (!xdr_string (xdrs, &objp->brick, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->status))
- return FALSE;
-
- } else {
- objp->op_ret = IXDR_GET_LONG(buf);
- objp->op_errno = IXDR_GET_LONG(buf);
- objp->status = IXDR_GET_LONG(buf);
- }
- if (!xdr_string (xdrs, &objp->brick, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_int (xdrs, &objp->status))
- return FALSE;
- if (!xdr_string (xdrs, &objp->brick, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_signup_req (XDR *xdrs, pmap_signup_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->brick, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_signup_rsp (XDR *xdrs, pmap_signup_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_signin_req (XDR *xdrs, pmap_signin_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->brick, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_signin_rsp (XDR *xdrs, pmap_signin_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_signout_req (XDR *xdrs, pmap_signout_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->brick, ~0))
- return FALSE;
- if (!xdr_int (xdrs, &objp->port))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pmap_signout_rsp (XDR *xdrs, pmap_signout_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- return TRUE;
-}
diff --git a/rpc/xdr/src/portmap-xdr.h b/rpc/xdr/src/portmap-xdr.h
deleted file mode 100644
index 2686da287b4..00000000000
--- a/rpc/xdr/src/portmap-xdr.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#ifndef _PORTMAP_XDR_H_RPCGEN
-#define _PORTMAP_XDR_H_RPCGEN
-
-#include <rpc/rpc.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-struct pmap_port_by_brick_req {
- char *brick;
-};
-typedef struct pmap_port_by_brick_req pmap_port_by_brick_req;
-
-struct pmap_port_by_brick_rsp {
- int op_ret;
- int op_errno;
- int status;
- int port;
-};
-typedef struct pmap_port_by_brick_rsp pmap_port_by_brick_rsp;
-
-struct pmap_brick_by_port_req {
- int port;
-};
-typedef struct pmap_brick_by_port_req pmap_brick_by_port_req;
-
-struct pmap_brick_by_port_rsp {
- int op_ret;
- int op_errno;
- int status;
- char *brick;
-};
-typedef struct pmap_brick_by_port_rsp pmap_brick_by_port_rsp;
-
-struct pmap_signup_req {
- char *brick;
- int port;
-};
-typedef struct pmap_signup_req pmap_signup_req;
-
-struct pmap_signup_rsp {
- int op_ret;
- int op_errno;
-};
-typedef struct pmap_signup_rsp pmap_signup_rsp;
-
-struct pmap_signin_req {
- char *brick;
- int port;
-};
-typedef struct pmap_signin_req pmap_signin_req;
-
-struct pmap_signin_rsp {
- int op_ret;
- int op_errno;
-};
-typedef struct pmap_signin_rsp pmap_signin_rsp;
-
-struct pmap_signout_req {
- char *brick;
- int port;
-};
-typedef struct pmap_signout_req pmap_signout_req;
-
-struct pmap_signout_rsp {
- int op_ret;
- int op_errno;
-};
-typedef struct pmap_signout_rsp pmap_signout_rsp;
-
-/* the xdr functions */
-
-#if defined(__STDC__) || defined(__cplusplus)
-extern bool_t xdr_pmap_port_by_brick_req (XDR *, pmap_port_by_brick_req*);
-extern bool_t xdr_pmap_port_by_brick_rsp (XDR *, pmap_port_by_brick_rsp*);
-extern bool_t xdr_pmap_brick_by_port_req (XDR *, pmap_brick_by_port_req*);
-extern bool_t xdr_pmap_brick_by_port_rsp (XDR *, pmap_brick_by_port_rsp*);
-extern bool_t xdr_pmap_signup_req (XDR *, pmap_signup_req*);
-extern bool_t xdr_pmap_signup_rsp (XDR *, pmap_signup_rsp*);
-extern bool_t xdr_pmap_signin_req (XDR *, pmap_signin_req*);
-extern bool_t xdr_pmap_signin_rsp (XDR *, pmap_signin_rsp*);
-extern bool_t xdr_pmap_signout_req (XDR *, pmap_signout_req*);
-extern bool_t xdr_pmap_signout_rsp (XDR *, pmap_signout_rsp*);
-
-#else /* K&R C */
-extern bool_t xdr_pmap_port_by_brick_req ();
-extern bool_t xdr_pmap_port_by_brick_rsp ();
-extern bool_t xdr_pmap_brick_by_port_req ();
-extern bool_t xdr_pmap_brick_by_port_rsp ();
-extern bool_t xdr_pmap_signup_req ();
-extern bool_t xdr_pmap_signup_rsp ();
-extern bool_t xdr_pmap_signin_req ();
-extern bool_t xdr_pmap_signin_rsp ();
-extern bool_t xdr_pmap_signout_req ();
-extern bool_t xdr_pmap_signout_rsp ();
-
-#endif /* K&R C */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_PORTMAP_XDR_H_RPCGEN */
diff --git a/rpc/xdr/src/portmap-xdr.x b/rpc/xdr/src/portmap-xdr.x
index f60dcc76c8e..23515572b9f 100644
--- a/rpc/xdr/src/portmap-xdr.x
+++ b/rpc/xdr/src/portmap-xdr.x
@@ -1,3 +1,17 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/compat.h>
struct pmap_port_by_brick_req {
string brick<>;
@@ -23,20 +37,10 @@ struct pmap_brick_by_port_rsp {
};
-struct pmap_signup_req {
- string brick<>;
- int port;
-};
-
-struct pmap_signup_rsp {
- int op_ret;
- int op_errno;
-};
-
-
struct pmap_signin_req {
string brick<>;
int port;
+ int pid;
};
struct pmap_signin_rsp {
@@ -47,6 +51,7 @@ struct pmap_signin_rsp {
struct pmap_signout_req {
string brick<>;
int port;
+ int rdma_port;
};
struct pmap_signout_rsp {
diff --git a/rpc/xdr/src/rpc-common-xdr.c b/rpc/xdr/src/rpc-common-xdr.c
deleted file mode 100644
index 14ddea71537..00000000000
--- a/rpc/xdr/src/rpc-common-xdr.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#include "rpc-common-xdr.h"
-
-bool_t
-xdr_auth_glusterfs_parms_v2 (XDR *xdrs, auth_glusterfs_parms_v2 *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->pid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->gid))
- return FALSE;
-
- } else {
- IXDR_PUT_LONG(buf, objp->pid);
- IXDR_PUT_U_LONG(buf, objp->uid);
- IXDR_PUT_U_LONG(buf, objp->gid);
- }
- if (!xdr_array (xdrs, (char **)&objp->groups.groups_val, (u_int *) &objp->groups.groups_len, ~0,
- sizeof (u_int), (xdrproc_t) xdr_u_int))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->lk_owner.lk_owner_val, (u_int *) &objp->lk_owner.lk_owner_len, ~0))
- return FALSE;
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- buf = XDR_INLINE (xdrs, 3 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_int (xdrs, &objp->pid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->gid))
- return FALSE;
-
- } else {
- objp->pid = IXDR_GET_LONG(buf);
- objp->uid = IXDR_GET_U_LONG(buf);
- objp->gid = IXDR_GET_U_LONG(buf);
- }
- if (!xdr_array (xdrs, (char **)&objp->groups.groups_val, (u_int *) &objp->groups.groups_len, ~0,
- sizeof (u_int), (xdrproc_t) xdr_u_int))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->lk_owner.lk_owner_val, (u_int *) &objp->lk_owner.lk_owner_len, ~0))
- return FALSE;
- return TRUE;
- }
-
- if (!xdr_int (xdrs, &objp->pid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->gid))
- return FALSE;
- if (!xdr_array (xdrs, (char **)&objp->groups.groups_val, (u_int *) &objp->groups.groups_len, ~0,
- sizeof (u_int), (xdrproc_t) xdr_u_int))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->lk_owner.lk_owner_val, (u_int *) &objp->lk_owner.lk_owner_len, ~0))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_auth_glusterfs_parms (XDR *xdrs, auth_glusterfs_parms *objp)
-{
- register int32_t *buf;
- int i;
- buf = NULL;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_u_quad_t (xdrs, &objp->lk_owner))
- return FALSE;
- buf = XDR_INLINE (xdrs, (4 + 16 )* BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->pid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->gid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ngrps))
- return FALSE;
- if (!xdr_vector (xdrs, (char *)objp->groups, 16,
- sizeof (u_int), (xdrproc_t) xdr_u_int))
- return FALSE;
- } else {
- IXDR_PUT_U_LONG(buf, objp->pid);
- IXDR_PUT_U_LONG(buf, objp->uid);
- IXDR_PUT_U_LONG(buf, objp->gid);
- IXDR_PUT_U_LONG(buf, objp->ngrps);
- {
- register u_int *genp;
-
- for (i = 0, genp = objp->groups;
- i < 16; ++i) {
- IXDR_PUT_U_LONG(buf, *genp++);
- }
- }
- }
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_u_quad_t (xdrs, &objp->lk_owner))
- return FALSE;
- buf = XDR_INLINE (xdrs, (4 + 16 )* BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_u_int (xdrs, &objp->pid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->gid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ngrps))
- return FALSE;
- if (!xdr_vector (xdrs, (char *)objp->groups, 16,
- sizeof (u_int), (xdrproc_t) xdr_u_int))
- return FALSE;
- } else {
- objp->pid = IXDR_GET_U_LONG(buf);
- objp->uid = IXDR_GET_U_LONG(buf);
- objp->gid = IXDR_GET_U_LONG(buf);
- objp->ngrps = IXDR_GET_U_LONG(buf);
- {
- register u_int *genp;
-
- for (i = 0, genp = objp->groups;
- i < 16; ++i) {
- *genp++ = IXDR_GET_U_LONG(buf);
- }
- }
- }
- return TRUE;
- }
-
- if (!xdr_u_quad_t (xdrs, &objp->lk_owner))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->pid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->gid))
- return FALSE;
- if (!xdr_u_int (xdrs, &objp->ngrps))
- return FALSE;
- if (!xdr_vector (xdrs, (char *)objp->groups, 16,
- sizeof (u_int), (xdrproc_t) xdr_u_int))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_dump_req (XDR *xdrs, gf_dump_req *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_prog_detail (XDR *xdrs, gf_prog_detail *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_string (xdrs, &objp->progname, ~0))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->prognum))
- return FALSE;
- if (!xdr_u_quad_t (xdrs, &objp->progver))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->next, sizeof (gf_prog_detail), (xdrproc_t) xdr_gf_prog_detail))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_gf_dump_rsp (XDR *xdrs, gf_dump_rsp *objp)
-{
- register int32_t *buf;
- buf = NULL;
-
- if (!xdr_u_quad_t (xdrs, &objp->gfs_id))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_ret))
- return FALSE;
- if (!xdr_int (xdrs, &objp->op_errno))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->prog, sizeof (gf_prog_detail), (xdrproc_t) xdr_gf_prog_detail))
- return FALSE;
- return TRUE;
-}
diff --git a/rpc/xdr/src/rpc-common-xdr.h b/rpc/xdr/src/rpc-common-xdr.h
deleted file mode 100644
index 66d9c3772fe..00000000000
--- a/rpc/xdr/src/rpc-common-xdr.h
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xdr-common.h"
-#include "compat.h"
-
-#if defined(__GNUC__)
-#if __GNUC__ >= 4
-#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
-#endif
-#endif
-
-/*
- * Please do not edit this file.
- * It was generated using rpcgen.
- */
-
-#ifndef _RPC_COMMON_XDR_H_RPCGEN
-#define _RPC_COMMON_XDR_H_RPCGEN
-
-#include <rpc/rpc.h>
-
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-
-struct auth_glusterfs_parms_v2 {
- int pid;
- u_int uid;
- u_int gid;
- struct {
- u_int groups_len;
- u_int *groups_val;
- } groups;
- struct {
- u_int lk_owner_len;
- char *lk_owner_val;
- } lk_owner;
-};
-typedef struct auth_glusterfs_parms_v2 auth_glusterfs_parms_v2;
-
-struct auth_glusterfs_parms {
- u_quad_t lk_owner;
- u_int pid;
- u_int uid;
- u_int gid;
- u_int ngrps;
- u_int groups[16];
-};
-typedef struct auth_glusterfs_parms auth_glusterfs_parms;
-
-struct gf_dump_req {
- u_quad_t gfs_id;
-};
-typedef struct gf_dump_req gf_dump_req;
-
-struct gf_prog_detail {
- char *progname;
- u_quad_t prognum;
- u_quad_t progver;
- struct gf_prog_detail *next;
-};
-typedef struct gf_prog_detail gf_prog_detail;
-
-struct gf_dump_rsp {
- u_quad_t gfs_id;
- int op_ret;
- int op_errno;
- struct gf_prog_detail *prog;
-};
-typedef struct gf_dump_rsp gf_dump_rsp;
-
-/* the xdr functions */
-
-#if defined(__STDC__) || defined(__cplusplus)
-extern bool_t xdr_auth_glusterfs_parms_v2 (XDR *, auth_glusterfs_parms_v2*);
-extern bool_t xdr_auth_glusterfs_parms (XDR *, auth_glusterfs_parms*);
-extern bool_t xdr_gf_dump_req (XDR *, gf_dump_req*);
-extern bool_t xdr_gf_prog_detail (XDR *, gf_prog_detail*);
-extern bool_t xdr_gf_dump_rsp (XDR *, gf_dump_rsp*);
-
-#else /* K&R C */
-extern bool_t xdr_auth_glusterfs_parms_v2 ();
-extern bool_t xdr_auth_glusterfs_parms ();
-extern bool_t xdr_gf_dump_req ();
-extern bool_t xdr_gf_prog_detail ();
-extern bool_t xdr_gf_dump_rsp ();
-
-#endif /* K&R C */
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif /* !_RPC_COMMON_XDR_H_RPCGEN */
diff --git a/rpc/xdr/src/rpc-common-xdr.x b/rpc/xdr/src/rpc-common-xdr.x
index ca28f38b5db..baf8b4313c8 100644
--- a/rpc/xdr/src/rpc-common-xdr.x
+++ b/rpc/xdr/src/rpc-common-xdr.x
@@ -1,6 +1,23 @@
+/*
+ * Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifdef RPC_XDR
+%#include "rpc-pragmas.h"
+#endif
+%#include <glusterfs/glusterfs-fops.h>
+
/* This file has definition of few XDR structures which are
* not captured in any section specific file */
+%#include "xdr-common.h"
+
struct auth_glusterfs_parms_v2 {
int pid;
unsigned int uid;
@@ -10,7 +27,7 @@ struct auth_glusterfs_parms_v2 {
};
struct auth_glusterfs_parms {
- unsigned hyper lk_owner;
+ u_quad_t lk_owner;
unsigned int pid;
unsigned int uid;
unsigned int gid;
@@ -19,21 +36,31 @@ struct auth_glusterfs_parms {
};
struct gf_dump_req {
- unsigned hyper gfs_id;
+ u_quad_t gfs_id;
};
+struct gf_statedump {
+ unsigned int pid;
+};
struct gf_prog_detail {
string progname<>;
- unsigned hyper prognum;
- unsigned hyper progver;
+ u_quad_t prognum;
+ u_quad_t progver;
struct gf_prog_detail *next;
};
struct gf_dump_rsp {
- unsigned hyper gfs_id;
+ u_quad_t gfs_id;
int op_ret;
int op_errno;
struct gf_prog_detail *prog;
};
+
+
+struct gf_common_rsp {
+ int op_ret;
+ int op_errno;
+ opaque xdata<>; /* Extra data */
+} ;
diff --git a/rpc/xdr/src/rpc-pragmas.h b/rpc/xdr/src/rpc-pragmas.h
new file mode 100644
index 00000000000..4c54cf6f1df
--- /dev/null
+++ b/rpc/xdr/src/rpc-pragmas.h
@@ -0,0 +1,28 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef GLUSTERFS_RPC_PRAGMAS_H
+#define GLUSTERFS_RPC_PRAGMAS_H
+
+#if defined(__GNUC__)
+#if __GNUC__ >= 4
+#if !defined(__clang__)
+#if !defined(__NetBSD__)
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+#else
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wunused-value"
+#endif
+#endif
+#endif
+
+#endif /* GLUSTERFS_RPC_PRAGMAS_H */
diff --git a/rpc/xdr/src/xdr-generic.c b/rpc/xdr/src/xdr-generic.c
index 5d5cf7197fb..20b54eb0a8a 100644
--- a/rpc/xdr/src/xdr-generic.c
+++ b/rpc/xdr/src/xdr-generic.c
@@ -1,134 +1,120 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#include "xdr-generic.h"
-
ssize_t
-xdr_serialize_generic (struct iovec outmsg, void *res, xdrproc_t proc)
+xdr_serialize_generic(struct iovec outmsg, void *res, xdrproc_t proc)
{
- ssize_t ret = -1;
- XDR xdr;
+ ssize_t ret = -1;
+ XDR xdr;
- if ((!outmsg.iov_base) || (!res) || (!proc))
- return -1;
+ if ((!outmsg.iov_base) || (!res) || (!proc))
+ return -1;
- xdrmem_create (&xdr, outmsg.iov_base, (unsigned int)outmsg.iov_len,
- XDR_ENCODE);
+ xdrmem_create(&xdr, outmsg.iov_base, (unsigned int)outmsg.iov_len,
+ XDR_ENCODE);
- if (!proc (&xdr, res)) {
- ret = -1;
- goto ret;
- }
+ if (!PROC(&xdr, res)) {
+ ret = -1;
+ goto ret;
+ }
- ret = xdr_encoded_length (xdr);
+ ret = xdr_encoded_length(xdr);
ret:
- return ret;
+ return ret;
}
-
ssize_t
-xdr_to_generic (struct iovec inmsg, void *args, xdrproc_t proc)
+xdr_to_generic(struct iovec inmsg, void *args, xdrproc_t proc)
{
- XDR xdr;
- ssize_t ret = -1;
+ XDR xdr;
+ ssize_t ret = -1;
- if ((!inmsg.iov_base) || (!args) || (!proc))
- return -1;
+ if ((!inmsg.iov_base) || (!args) || (!proc))
+ return -1;
- xdrmem_create (&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
- XDR_DECODE);
+ xdrmem_create(&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
+ XDR_DECODE);
- if (!proc (&xdr, args)) {
- ret = -1;
- goto ret;
- }
+ if (!PROC(&xdr, args)) {
+ ret = -1;
+ goto ret;
+ }
- ret = xdr_decoded_length (xdr);
+ ret = xdr_decoded_length(xdr);
ret:
- return ret;
+ return ret;
}
-
ssize_t
-xdr_to_generic_payload (struct iovec inmsg, void *args, xdrproc_t proc,
- struct iovec *pendingpayload)
+xdr_to_generic_payload(struct iovec inmsg, void *args, xdrproc_t proc,
+ struct iovec *pendingpayload)
{
- XDR xdr;
- ssize_t ret = -1;
+ XDR xdr;
+ ssize_t ret = -1;
- if ((!inmsg.iov_base) || (!args) || (!proc))
- return -1;
+ if ((!inmsg.iov_base) || (!args) || (!proc))
+ return -1;
- xdrmem_create (&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
- XDR_DECODE);
+ xdrmem_create(&xdr, inmsg.iov_base, (unsigned int)inmsg.iov_len,
+ XDR_DECODE);
- if (!proc (&xdr, args)) {
- ret = -1;
- goto ret;
- }
+ if (!PROC(&xdr, args)) {
+ ret = -1;
+ goto ret;
+ }
- ret = xdr_decoded_length (xdr);
+ ret = xdr_decoded_length(xdr);
- if (pendingpayload) {
- pendingpayload->iov_base = xdr_decoded_remaining_addr (xdr);
- pendingpayload->iov_len = xdr_decoded_remaining_len (xdr);
- }
+ if (pendingpayload) {
+ pendingpayload->iov_base = xdr_decoded_remaining_addr(xdr);
+ pendingpayload->iov_len = xdr_decoded_remaining_len(xdr);
+ }
ret:
- return ret;
+ return ret;
}
ssize_t
-xdr_length_round_up (size_t len, size_t bufsize)
+xdr_length_round_up(size_t len, size_t bufsize)
{
- int roundup = 0;
+ int roundup = 0;
- roundup = len % XDR_BYTES_PER_UNIT;
- if (roundup > 0)
- roundup = XDR_BYTES_PER_UNIT - roundup;
+ roundup = len % XDR_BYTES_PER_UNIT;
+ if (roundup > 0)
+ roundup = XDR_BYTES_PER_UNIT - roundup;
- if ((roundup > 0) && ((roundup + len) <= bufsize))
- len += roundup;
+ if ((roundup > 0) && ((roundup + len) <= bufsize))
+ len += roundup;
- return len;
+ return len;
}
int
-xdr_bytes_round_up (struct iovec *vec, size_t bufsize)
+xdr_bytes_round_up(struct iovec *vec, size_t bufsize)
{
- vec->iov_len = xdr_length_round_up (vec->iov_len, bufsize);
- return 0;
+ vec->iov_len = xdr_length_round_up(vec->iov_len, bufsize);
+ return 0;
}
-
void
-xdr_vector_round_up (struct iovec *vec, int vcount, uint32_t count)
+xdr_vector_round_up(struct iovec *vec, int vcount, uint32_t count)
{
- uint32_t round_count = 0;
+ uint32_t round_count = 0;
- round_count = xdr_length_round_up (count, 1048576);
- round_count -= count;
- if (round_count == 0)
- return;
+ round_count = xdr_length_round_up(count, 1048576);
+ round_count -= count;
+ if (round_count == 0 || vcount <= 0)
+ return;
- vec[vcount-1].iov_len += round_count;
+ vec[vcount - 1].iov_len += round_count;
}
diff --git a/rpc/xdr/src/xdr-generic.h b/rpc/xdr/src/xdr-generic.h
index 24054e11c75..794dda508cc 100644
--- a/rpc/xdr/src/xdr-generic.h
+++ b/rpc/xdr/src/xdr-generic.h
@@ -1,58 +1,76 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _XDR_GENERIC_H
#define _XDR_GENERIC_H
#include <sys/uio.h>
-//#include <rpc/rpc.h>
#include <rpc/types.h>
#include <rpc/xdr.h>
-#include "compat.h"
+#include <glusterfs/compat.h>
-#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
-#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
-#define xdr_encoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
-#define xdr_decoded_length(xdr) (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+#define xdr_decoded_remaining_addr(xdr) ((&xdr)->x_private)
+#define xdr_decoded_remaining_len(xdr) ((&xdr)->x_handy)
+#define xdr_encoded_length(xdr) \
+ (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
+#define xdr_decoded_length(xdr) \
+ (((size_t)(&xdr)->x_private) - ((size_t)(&xdr)->x_base))
-#define XDR_BYTES_PER_UNIT 4
+#define XDR_BYTES_PER_UNIT 4
-ssize_t
-xdr_serialize_generic (struct iovec outmsg, void *res, xdrproc_t proc);
+/*
+ On OSX > 10.9
+ -------------
+ typedef bool_t (*xdrproc_t)(XDR *, void *, unsigned int);
+
+ On OSX < 10.9
+ ------------
+ typedef bool_t (*xdrproc_t)(XDR *, ...);
+
+ FreeBSD all versions
+ ------------
+ typedef bool_t (*xdrproc_t)(XDR *, ...);
+
+ NetBSD 6.1.4
+ -----------
+ typedef bool_t (*xdrproc_t)(XDR *, const void *);
+
+ Linux all versions
+ -----------
+ typedef bool_t (*xdrproc_t)(XDR *, void *,...);
+*/
+
+#if defined(__NetBSD__)
+#define PROC(xdr, res) proc(xdr, res)
+#else
+#define PROC(xdr, res) proc(xdr, res, 0)
+#endif
ssize_t
-xdr_to_generic (struct iovec inmsg, void *args, xdrproc_t proc);
+xdr_serialize_generic(struct iovec outmsg, void *res, xdrproc_t proc);
ssize_t
-xdr_to_generic_payload (struct iovec inmsg, void *args, xdrproc_t proc,
- struct iovec *pendingpayload);
+xdr_to_generic(struct iovec inmsg, void *args, xdrproc_t proc);
+ssize_t
+xdr_to_generic_payload(struct iovec inmsg, void *args, xdrproc_t proc,
+ struct iovec *pendingpayload);
extern int
-xdr_bytes_round_up (struct iovec *vec, size_t bufsize);
+xdr_bytes_round_up(struct iovec *vec, size_t bufsize);
extern ssize_t
-xdr_length_round_up (size_t len, size_t bufsize);
+xdr_length_round_up(size_t len, size_t bufsize);
void
-xdr_vector_round_up (struct iovec *vec, int vcount, uint32_t count);
+xdr_vector_round_up(struct iovec *vec, int vcount, uint32_t count);
#endif /* !_XDR_GENERIC_H */
diff --git a/rpc/xdr/src/xdr-nfs3.c b/rpc/xdr/src/xdr-nfs3.c
index 35fca59ff80..cfccaaa89b8 100644
--- a/rpc/xdr/src/xdr-nfs3.c
+++ b/rpc/xdr/src/xdr-nfs3.c
@@ -1,1899 +1,1907 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#if defined(__GNUC__)
+#if __GNUC__ >= 4
+#if !defined(__clang__)
+#if !defined(__NetBSD__)
+#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
+#pragma GCC diagnostic ignored "-Wunused-variable"
+#endif
+#else
+#pragma clang diagnostic ignored "-Wunused-variable"
+#pragma clang diagnostic ignored "-Wunused-value"
+#endif
+#endif
+#endif
+
#include "xdr-nfs3.h"
-#include "mem-pool.h"
+#include <glusterfs/mem-pool.h>
#include "xdr-common.h"
-#if GF_DARWIN_HOST_OS
-#define xdr_u_quad_t xdr_u_int64_t
-#define xdr_quad_t xdr_int64_t
-#define xdr_uint32_t xdr_u_int32_t
-#define xdr_uint64_t xdr_u_int64_t
-#endif
-
bool_t
-xdr_uint64 (XDR *xdrs, uint64 *objp)
+xdr_uint64(XDR *xdrs, uint64 *objp)
{
- if (!xdr_uint64_t (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint64_t(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_int64 (XDR *xdrs, int64 *objp)
+xdr_int64(XDR *xdrs, int64 *objp)
{
- if (!xdr_int64_t (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_int64_t(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_uint32 (XDR *xdrs, uint32 *objp)
+xdr_uint32(XDR *xdrs, uint32 *objp)
{
- if (!xdr_uint32_t (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint32_t(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_int32 (XDR *xdrs, int32 *objp)
+xdr_int32(XDR *xdrs, int32 *objp)
{
- if (!xdr_int32_t (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_int32_t(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_filename3 (XDR *xdrs, filename3 *objp)
+xdr_filename3(XDR *xdrs, filename3 *objp)
{
- if (!xdr_string (xdrs, objp, ~0))
- return FALSE;
- return TRUE;
+ if (!xdr_string(xdrs, objp, ~0))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_nfspath3 (XDR *xdrs, nfspath3 *objp)
+xdr_nfspath3(XDR *xdrs, nfspath3 *objp)
{
- if (!xdr_string (xdrs, objp, ~0))
- return FALSE;
- return TRUE;
+ if (!xdr_string(xdrs, objp, ~0))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_fileid3 (XDR *xdrs, fileid3 *objp)
+xdr_fileid3(XDR *xdrs, fileid3 *objp)
{
- if (!xdr_uint64 (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint64(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_cookie3 (XDR *xdrs, cookie3 *objp)
+xdr_cookie3(XDR *xdrs, cookie3 *objp)
{
- if (!xdr_uint64 (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint64(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_cookieverf3 (XDR *xdrs, cookieverf3 objp)
+xdr_cookieverf3(XDR *xdrs, cookieverf3 objp)
{
- if (!xdr_opaque (xdrs, objp, NFS3_COOKIEVERFSIZE))
- return FALSE;
- return TRUE;
+ if (!xdr_opaque(xdrs, objp, NFS3_COOKIEVERFSIZE))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_createverf3 (XDR *xdrs, createverf3 objp)
+xdr_createverf3(XDR *xdrs, createverf3 objp)
{
- if (!xdr_opaque (xdrs, objp, NFS3_CREATEVERFSIZE))
- return FALSE;
- return TRUE;
+ if (!xdr_opaque(xdrs, objp, NFS3_CREATEVERFSIZE))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_writeverf3 (XDR *xdrs, writeverf3 objp)
+xdr_writeverf3(XDR *xdrs, writeverf3 objp)
{
- if (!xdr_opaque (xdrs, objp, NFS3_WRITEVERFSIZE))
- return FALSE;
- return TRUE;
+ if (!xdr_opaque(xdrs, objp, NFS3_WRITEVERFSIZE))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_uid3 (XDR *xdrs, uid3 *objp)
+xdr_uid3(XDR *xdrs, uid3 *objp)
{
- if (!xdr_uint32 (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint32(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_gid3 (XDR *xdrs, gid3 *objp)
+xdr_gid3(XDR *xdrs, gid3 *objp)
{
- if (!xdr_uint32 (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint32(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_size3 (XDR *xdrs, size3 *objp)
+xdr_size3(XDR *xdrs, size3 *objp)
{
- if (!xdr_uint64 (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint64(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_offset3 (XDR *xdrs, offset3 *objp)
+xdr_offset3(XDR *xdrs, offset3 *objp)
{
- if (!xdr_uint64 (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint64(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mode3 (XDR *xdrs, mode3 *objp)
+xdr_mode3(XDR *xdrs, mode3 *objp)
{
- if (!xdr_uint32 (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint32(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_count3 (XDR *xdrs, count3 *objp)
+xdr_count3(XDR *xdrs, count3 *objp)
{
- if (!xdr_uint32 (xdrs, objp))
- return FALSE;
- return TRUE;
+ if (!xdr_uint32(xdrs, objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_nfsstat3 (XDR *xdrs, nfsstat3 *objp)
+xdr_nfsstat3(XDR *xdrs, nfsstat3 *objp)
{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
+ if (!xdr_enum(xdrs, (enum_t *)objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_ftype3 (XDR *xdrs, ftype3 *objp)
+xdr_ftype3(XDR *xdrs, ftype3 *objp)
{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
+ if (!xdr_enum(xdrs, (enum_t *)objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_specdata3 (XDR *xdrs, specdata3 *objp)
+xdr_specdata3(XDR *xdrs, specdata3 *objp)
{
- if (!xdr_uint32 (xdrs, &objp->specdata1))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->specdata2))
- return FALSE;
- return TRUE;
+ if (!xdr_uint32(xdrs, &objp->specdata1))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->specdata2))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_nfs_fh3 (XDR *xdrs, nfs_fh3 *objp)
+xdr_nfs_fh3(XDR *xdrs, nfs_fh3 *objp)
{
- if (!xdr_bytes (xdrs, (char **)&objp->data.data_val, (u_int *) &objp->data.data_len, NFS3_FHSIZE))
- return FALSE;
- return TRUE;
+ if (!xdr_bytes(xdrs, (char **)&objp->data.data_val,
+ (u_int *)&objp->data.data_len, NFS3_FHSIZE))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_nfstime3 (XDR *xdrs, nfstime3 *objp)
+xdr_nfstime3(XDR *xdrs, nfstime3 *objp)
{
- if (!xdr_uint32 (xdrs, &objp->seconds))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->nseconds))
- return FALSE;
- return TRUE;
+ if (!xdr_uint32(xdrs, &objp->seconds))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->nseconds))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_fattr3 (XDR *xdrs, fattr3 *objp)
+xdr_fattr3(XDR *xdrs, fattr3 *objp)
{
- if (!xdr_ftype3 (xdrs, &objp->type))
- return FALSE;
- if (!xdr_mode3 (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->nlink))
- return FALSE;
- if (!xdr_uid3 (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_gid3 (xdrs, &objp->gid))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->size))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->used))
- return FALSE;
- if (!xdr_specdata3 (xdrs, &objp->rdev))
- return FALSE;
- if (!xdr_uint64 (xdrs, &objp->fsid))
- return FALSE;
- if (!xdr_fileid3 (xdrs, &objp->fileid))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->atime))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->mtime))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->ctime))
- return FALSE;
- return TRUE;
+ if (!xdr_ftype3(xdrs, &objp->type))
+ return FALSE;
+ if (!xdr_mode3(xdrs, &objp->mode))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->nlink))
+ return FALSE;
+ if (!xdr_uid3(xdrs, &objp->uid))
+ return FALSE;
+ if (!xdr_gid3(xdrs, &objp->gid))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->used))
+ return FALSE;
+ if (!xdr_specdata3(xdrs, &objp->rdev))
+ return FALSE;
+ if (!xdr_uint64(xdrs, &objp->fsid))
+ return FALSE;
+ if (!xdr_fileid3(xdrs, &objp->fileid))
+ return FALSE;
+ if (!xdr_nfstime3(xdrs, &objp->atime))
+ return FALSE;
+ if (!xdr_nfstime3(xdrs, &objp->mtime))
+ return FALSE;
+ if (!xdr_nfstime3(xdrs, &objp->ctime))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_post_op_attr (XDR *xdrs, post_op_attr *objp)
+xdr_post_op_attr(XDR *xdrs, post_op_attr *objp)
{
- if (!xdr_bool (xdrs, &objp->attributes_follow))
- return FALSE;
- switch (objp->attributes_follow) {
- case TRUE:
- if (!xdr_fattr3 (xdrs, &objp->post_op_attr_u.attributes))
- return FALSE;
- break;
- case FALSE:
- break;
- default:
- return FALSE;
- }
- return TRUE;
+ if (!xdr_bool(xdrs, &objp->attributes_follow))
+ return FALSE;
+ switch (objp->attributes_follow) {
+ case TRUE:
+ if (!xdr_fattr3(xdrs, &objp->post_op_attr_u.attributes))
+ return FALSE;
+ break;
+ case FALSE:
+ break;
+ default:
+ return FALSE;
+ }
+ return TRUE;
}
bool_t
-xdr_wcc_attr (XDR *xdrs, wcc_attr *objp)
+xdr_wcc_attr(XDR *xdrs, wcc_attr *objp)
{
- if (!xdr_size3 (xdrs, &objp->size))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->mtime))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->ctime))
- return FALSE;
- return TRUE;
+ if (!xdr_size3(xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_nfstime3(xdrs, &objp->mtime))
+ return FALSE;
+ if (!xdr_nfstime3(xdrs, &objp->ctime))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_pre_op_attr (XDR *xdrs, pre_op_attr *objp)
+xdr_pre_op_attr(XDR *xdrs, pre_op_attr *objp)
{
- if (!xdr_bool (xdrs, &objp->attributes_follow))
- return FALSE;
- switch (objp->attributes_follow) {
- case TRUE:
- if (!xdr_wcc_attr (xdrs, &objp->pre_op_attr_u.attributes))
- return FALSE;
- break;
- case FALSE:
- break;
- default:
- return FALSE;
- }
- return TRUE;
+ if (!xdr_bool(xdrs, &objp->attributes_follow))
+ return FALSE;
+ switch (objp->attributes_follow) {
+ case TRUE:
+ if (!xdr_wcc_attr(xdrs, &objp->pre_op_attr_u.attributes))
+ return FALSE;
+ break;
+ case FALSE:
+ break;
+ default:
+ return FALSE;
+ }
+ return TRUE;
}
bool_t
-xdr_wcc_data (XDR *xdrs, wcc_data *objp)
+xdr_wcc_data(XDR *xdrs, wcc_data *objp)
{
- if (!xdr_pre_op_attr (xdrs, &objp->before))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->after))
- return FALSE;
- return TRUE;
+ if (!xdr_pre_op_attr(xdrs, &objp->before))
+ return FALSE;
+ if (!xdr_post_op_attr(xdrs, &objp->after))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_post_op_fh3 (XDR *xdrs, post_op_fh3 *objp)
-{
- if (!xdr_bool (xdrs, &objp->handle_follows))
- return FALSE;
- switch (objp->handle_follows) {
- case TRUE:
- if (!xdr_nfs_fh3 (xdrs, &objp->post_op_fh3_u.handle))
- return FALSE;
- break;
- case FALSE:
- break;
- default:
- return FALSE;
- }
- return TRUE;
-}
-
-bool_t
-xdr_time_how (XDR *xdrs, time_how *objp)
-{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
+xdr_post_op_fh3(XDR *xdrs, post_op_fh3 *objp)
+{
+ if (!xdr_bool(xdrs, &objp->handle_follows))
+ return FALSE;
+ switch (objp->handle_follows) {
+ case TRUE:
+ if (!xdr_nfs_fh3(xdrs, &objp->post_op_fh3_u.handle))
+ return FALSE;
+ break;
+ case FALSE:
+ break;
+ default:
+ return FALSE;
+ }
+ return TRUE;
+}
+
+bool_t
+xdr_time_how(XDR *xdrs, time_how *objp)
+{
+ if (!xdr_enum(xdrs, (enum_t *)objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_set_mode3 (XDR *xdrs, set_mode3 *objp)
+xdr_set_mode3(XDR *xdrs, set_mode3 *objp)
{
- if (!xdr_bool (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case TRUE:
- if (!xdr_mode3 (xdrs, &objp->set_mode3_u.mode))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_bool(xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case TRUE:
+ if (!xdr_mode3(xdrs, &objp->set_mode3_u.mode))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_set_uid3 (XDR *xdrs, set_uid3 *objp)
+xdr_set_uid3(XDR *xdrs, set_uid3 *objp)
{
- if (!xdr_bool (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case TRUE:
- if (!xdr_uid3 (xdrs, &objp->set_uid3_u.uid))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_bool(xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case TRUE:
+ if (!xdr_uid3(xdrs, &objp->set_uid3_u.uid))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_set_gid3 (XDR *xdrs, set_gid3 *objp)
+xdr_set_gid3(XDR *xdrs, set_gid3 *objp)
{
- if (!xdr_bool (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case TRUE:
- if (!xdr_gid3 (xdrs, &objp->set_gid3_u.gid))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_bool(xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case TRUE:
+ if (!xdr_gid3(xdrs, &objp->set_gid3_u.gid))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_set_size3 (XDR *xdrs, set_size3 *objp)
+xdr_set_size3(XDR *xdrs, set_size3 *objp)
{
- if (!xdr_bool (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case TRUE:
- if (!xdr_size3 (xdrs, &objp->set_size3_u.size))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_bool(xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case TRUE:
+ if (!xdr_size3(xdrs, &objp->set_size3_u.size))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_set_atime (XDR *xdrs, set_atime *objp)
+xdr_set_atime(XDR *xdrs, set_atime *objp)
{
- if (!xdr_time_how (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case SET_TO_CLIENT_TIME:
- if (!xdr_nfstime3 (xdrs, &objp->set_atime_u.atime))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_time_how(xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case SET_TO_CLIENT_TIME:
+ if (!xdr_nfstime3(xdrs, &objp->set_atime_u.atime))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_set_mtime (XDR *xdrs, set_mtime *objp)
+xdr_set_mtime(XDR *xdrs, set_mtime *objp)
{
- if (!xdr_time_how (xdrs, &objp->set_it))
- return FALSE;
- switch (objp->set_it) {
- case SET_TO_CLIENT_TIME:
- if (!xdr_nfstime3 (xdrs, &objp->set_mtime_u.mtime))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_time_how(xdrs, &objp->set_it))
+ return FALSE;
+ switch (objp->set_it) {
+ case SET_TO_CLIENT_TIME:
+ if (!xdr_nfstime3(xdrs, &objp->set_mtime_u.mtime))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_sattr3 (XDR *xdrs, sattr3 *objp)
+xdr_sattr3(XDR *xdrs, sattr3 *objp)
{
- if (!xdr_set_mode3 (xdrs, &objp->mode))
- return FALSE;
- if (!xdr_set_uid3 (xdrs, &objp->uid))
- return FALSE;
- if (!xdr_set_gid3 (xdrs, &objp->gid))
- return FALSE;
- if (!xdr_set_size3 (xdrs, &objp->size))
- return FALSE;
- if (!xdr_set_atime (xdrs, &objp->atime))
- return FALSE;
- if (!xdr_set_mtime (xdrs, &objp->mtime))
- return FALSE;
- return TRUE;
+ if (!xdr_set_mode3(xdrs, &objp->mode))
+ return FALSE;
+ if (!xdr_set_uid3(xdrs, &objp->uid))
+ return FALSE;
+ if (!xdr_set_gid3(xdrs, &objp->gid))
+ return FALSE;
+ if (!xdr_set_size3(xdrs, &objp->size))
+ return FALSE;
+ if (!xdr_set_atime(xdrs, &objp->atime))
+ return FALSE;
+ if (!xdr_set_mtime(xdrs, &objp->mtime))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_diropargs3 (XDR *xdrs, diropargs3 *objp)
+xdr_diropargs3(XDR *xdrs, diropargs3 *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->dir))
- return FALSE;
- if (!xdr_filename3 (xdrs, &objp->name))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->dir))
+ return FALSE;
+ if (!xdr_filename3(xdrs, &objp->name))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_getattr3args (XDR *xdrs, getattr3args *objp)
+xdr_getattr3args(XDR *xdrs, getattr3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->object))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->object))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_getattr3resok (XDR *xdrs, getattr3resok *objp)
+xdr_getattr3resok(XDR *xdrs, getattr3resok *objp)
{
- if (!xdr_fattr3 (xdrs, &objp->obj_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_fattr3(xdrs, &objp->obj_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_getattr3res (XDR *xdrs, getattr3res *objp)
+xdr_getattr3res(XDR *xdrs, getattr3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_getattr3resok (xdrs, &objp->getattr3res_u.resok))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_getattr3resok(xdrs, &objp->getattr3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_sattrguard3 (XDR *xdrs, sattrguard3 *objp)
+xdr_sattrguard3(XDR *xdrs, sattrguard3 *objp)
{
- if (!xdr_bool (xdrs, &objp->check))
- return FALSE;
- switch (objp->check) {
- case TRUE:
- if (!xdr_nfstime3 (xdrs, &objp->sattrguard3_u.obj_ctime))
- return FALSE;
- break;
- case FALSE:
- break;
- default:
- return FALSE;
- }
- return TRUE;
+ if (!xdr_bool(xdrs, &objp->check))
+ return FALSE;
+ switch (objp->check) {
+ case TRUE:
+ if (!xdr_nfstime3(xdrs, &objp->sattrguard3_u.obj_ctime))
+ return FALSE;
+ break;
+ case FALSE:
+ break;
+ default:
+ return FALSE;
+ }
+ return TRUE;
}
bool_t
-xdr_setattr3args (XDR *xdrs, setattr3args *objp)
+xdr_setattr3args(XDR *xdrs, setattr3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->object))
- return FALSE;
- if (!xdr_sattr3 (xdrs, &objp->new_attributes))
- return FALSE;
- if (!xdr_sattrguard3 (xdrs, &objp->guard))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->object))
+ return FALSE;
+ if (!xdr_sattr3(xdrs, &objp->new_attributes))
+ return FALSE;
+ if (!xdr_sattrguard3(xdrs, &objp->guard))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_setattr3resok (XDR *xdrs, setattr3resok *objp)
+xdr_setattr3resok(XDR *xdrs, setattr3resok *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->obj_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->obj_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_setattr3resfail (XDR *xdrs, setattr3resfail *objp)
+xdr_setattr3resfail(XDR *xdrs, setattr3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->obj_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->obj_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_setattr3res (XDR *xdrs, setattr3res *objp)
+xdr_setattr3res(XDR *xdrs, setattr3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_setattr3resok (xdrs, &objp->setattr3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_setattr3resfail (xdrs, &objp->setattr3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_setattr3resok(xdrs, &objp->setattr3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_setattr3resfail(xdrs, &objp->setattr3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_lookup3args (XDR *xdrs, lookup3args *objp)
+xdr_lookup3args(XDR *xdrs, lookup3args *objp)
{
- if (!xdr_diropargs3 (xdrs, &objp->what))
- return FALSE;
- return TRUE;
+ if (!xdr_diropargs3(xdrs, &objp->what))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_lookup3resok (XDR *xdrs, lookup3resok *objp)
+xdr_lookup3resok(XDR *xdrs, lookup3resok *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->object))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->object))
+ return FALSE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_post_op_attr(xdrs, &objp->dir_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_lookup3resfail (XDR *xdrs, lookup3resfail *objp)
+xdr_lookup3resfail(XDR *xdrs, lookup3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->dir_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_lookup3res (XDR *xdrs, lookup3res *objp)
+xdr_lookup3res(XDR *xdrs, lookup3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_lookup3resok (xdrs, &objp->lookup3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_lookup3resfail (xdrs, &objp->lookup3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_lookup3resok(xdrs, &objp->lookup3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_lookup3resfail(xdrs, &objp->lookup3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_access3args (XDR *xdrs, access3args *objp)
+xdr_access3args(XDR *xdrs, access3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->object))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->access))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->object))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->access))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_access3resok (XDR *xdrs, access3resok *objp)
+xdr_access3resok(XDR *xdrs, access3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->access))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->access))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_access3resfail (XDR *xdrs, access3resfail *objp)
+xdr_access3resfail(XDR *xdrs, access3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_access3res (XDR *xdrs, access3res *objp)
+xdr_access3res(XDR *xdrs, access3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_access3resok (xdrs, &objp->access3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_access3resfail (xdrs, &objp->access3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_access3resok(xdrs, &objp->access3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_access3resfail(xdrs, &objp->access3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_readlink3args (XDR *xdrs, readlink3args *objp)
+xdr_readlink3args(XDR *xdrs, readlink3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->symlink))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->symlink))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readlink3resok (XDR *xdrs, readlink3resok *objp)
+xdr_readlink3resok(XDR *xdrs, readlink3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->symlink_attributes))
- return FALSE;
- if (!xdr_nfspath3 (xdrs, &objp->data))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->symlink_attributes))
+ return FALSE;
+ if (!xdr_nfspath3(xdrs, &objp->data))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readlink3resfail (XDR *xdrs, readlink3resfail *objp)
+xdr_readlink3resfail(XDR *xdrs, readlink3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->symlink_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->symlink_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readlink3res (XDR *xdrs, readlink3res *objp)
+xdr_readlink3res(XDR *xdrs, readlink3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_readlink3resok (xdrs, &objp->readlink3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_readlink3resfail (xdrs, &objp->readlink3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_readlink3resok(xdrs, &objp->readlink3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_readlink3resfail(xdrs, &objp->readlink3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_read3args (XDR *xdrs, read3args *objp)
+xdr_read3args(XDR *xdrs, read3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->file))
- return FALSE;
- if (!xdr_offset3 (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->file))
+ return FALSE;
+ if (!xdr_offset3(xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->count))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_read3resok_nocopy (XDR *xdrs, read3resok *objp)
+xdr_read3resok_nocopy(XDR *xdrs, read3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->eof))
- return FALSE;
- if (!xdr_u_int (xdrs, (u_int *) &objp->data.data_len))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->file_attributes))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->count))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->eof))
+ return FALSE;
+ if (!xdr_u_int(xdrs, (u_int *)&objp->data.data_len))
+ return FALSE;
+ return TRUE;
}
-
bool_t
-xdr_read3resok (XDR *xdrs, read3resok *objp)
+xdr_read3resok(XDR *xdrs, read3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->eof))
- return FALSE;
- if (!xdr_bytes (xdrs, (char **)&objp->data.data_val, (u_int *) &objp->data.data_len, ~0))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->file_attributes))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->count))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->eof))
+ return FALSE;
+ if (!xdr_bytes(xdrs, (char **)&objp->data.data_val,
+ (u_int *)&objp->data.data_len, ~0))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_read3resfail (XDR *xdrs, read3resfail *objp)
+xdr_read3resfail(XDR *xdrs, read3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->file_attributes))
+ return FALSE;
+ return TRUE;
}
-
bool_t
-xdr_read3res_nocopy (XDR *xdrs, read3res *objp)
+xdr_read3res_nocopy(XDR *xdrs, read3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_read3resok_nocopy (xdrs, &objp->read3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_read3resfail (xdrs, &objp->read3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_read3resok_nocopy(xdrs, &objp->read3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_read3resfail(xdrs, &objp->read3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
-
bool_t
-xdr_read3res (XDR *xdrs, read3res *objp)
+xdr_read3res(XDR *xdrs, read3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_read3resok (xdrs, &objp->read3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_read3resfail (xdrs, &objp->read3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_read3resok(xdrs, &objp->read3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_read3resfail(xdrs, &objp->read3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_stable_how (XDR *xdrs, stable_how *objp)
+xdr_stable_how(XDR *xdrs, stable_how *objp)
{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
+ if (!xdr_enum(xdrs, (enum_t *)objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_write3args (XDR *xdrs, write3args *objp)
+xdr_write3args(XDR *xdrs, write3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->file))
- return FALSE;
- if (!xdr_offset3 (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- if (!xdr_stable_how (xdrs, &objp->stable))
- return FALSE;
+ if (!xdr_nfs_fh3(xdrs, &objp->file))
+ return FALSE;
+ if (!xdr_offset3(xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->count))
+ return FALSE;
+ if (!xdr_stable_how(xdrs, &objp->stable))
+ return FALSE;
- /* Added specifically to avoid copies from the xdr buffer into
- * the write3args structure, which will also require an already
- * allocated buffer. That is not optimal.
- */
- if (!xdr_u_int (xdrs, (u_int *) &objp->data.data_len))
- return FALSE;
+ /* Added specifically to avoid copies from the xdr buffer into
+ * the write3args structure, which will also require an already
+ * allocated buffer. That is not optimal.
+ */
+ if (!xdr_u_int(xdrs, (u_int *)&objp->data.data_len))
+ return FALSE;
- /* The remaining bytes in the xdr buffer are the bytes that need to be
- * written. See how these bytes are extracted in the xdr_to_write3args
- * code path. Be careful, while using the write3args structure, since
- * only the data.data_len has been filled. The actual data is
- * extracted in xdr_to_write3args path.
- */
+ /* The remaining bytes in the xdr buffer are the bytes that need to be
+ * written. See how these bytes are extracted in the xdr_to_write3args
+ * code path. Be careful, while using the write3args structure, since
+ * only the data.data_len has been filled. The actual data is
+ * extracted in xdr_to_write3args path.
+ */
- /* if (!xdr_bytes (xdrs, (char **)&objp->data.data_val, (u_int *) &objp->data.data_len, ~0))
- return FALSE;
- */
- return TRUE;
+ /* if (!xdr_bytes (xdrs, (char **)&objp->data.data_val, (u_int *)
+ &objp->data.data_len, ~0)) return FALSE;
+ */
+ return TRUE;
}
bool_t
-xdr_write3resok (XDR *xdrs, write3resok *objp)
+xdr_write3resok(XDR *xdrs, write3resok *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->file_wcc))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- if (!xdr_stable_how (xdrs, &objp->committed))
- return FALSE;
- if (!xdr_writeverf3 (xdrs, objp->verf))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->file_wcc))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->count))
+ return FALSE;
+ if (!xdr_stable_how(xdrs, &objp->committed))
+ return FALSE;
+ if (!xdr_writeverf3(xdrs, objp->verf))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_write3resfail (XDR *xdrs, write3resfail *objp)
+xdr_write3resfail(XDR *xdrs, write3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->file_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->file_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_write3res (XDR *xdrs, write3res *objp)
+xdr_write3res(XDR *xdrs, write3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_write3resok (xdrs, &objp->write3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_write3resfail (xdrs, &objp->write3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_write3resok(xdrs, &objp->write3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_write3resfail(xdrs, &objp->write3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_createmode3 (XDR *xdrs, createmode3 *objp)
+xdr_createmode3(XDR *xdrs, createmode3 *objp)
{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
+ if (!xdr_enum(xdrs, (enum_t *)objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_createhow3 (XDR *xdrs, createhow3 *objp)
+xdr_createhow3(XDR *xdrs, createhow3 *objp)
{
- if (!xdr_createmode3 (xdrs, &objp->mode))
- return FALSE;
- switch (objp->mode) {
- case UNCHECKED:
- case GUARDED:
- if (!xdr_sattr3 (xdrs, &objp->createhow3_u.obj_attributes))
- return FALSE;
- break;
- case EXCLUSIVE:
- if (!xdr_createverf3 (xdrs, objp->createhow3_u.verf))
- return FALSE;
- break;
- default:
- return FALSE;
- }
- return TRUE;
+ if (!xdr_createmode3(xdrs, &objp->mode))
+ return FALSE;
+ switch (objp->mode) {
+ case UNCHECKED:
+ case GUARDED:
+ if (!xdr_sattr3(xdrs, &objp->createhow3_u.obj_attributes))
+ return FALSE;
+ break;
+ case EXCLUSIVE:
+ if (!xdr_createverf3(xdrs, objp->createhow3_u.verf))
+ return FALSE;
+ break;
+ default:
+ return FALSE;
+ }
+ return TRUE;
}
bool_t
-xdr_create3args (XDR *xdrs, create3args *objp)
+xdr_create3args(XDR *xdrs, create3args *objp)
{
- if (!xdr_diropargs3 (xdrs, &objp->where))
- return FALSE;
- if (!xdr_createhow3 (xdrs, &objp->how))
- return FALSE;
- return TRUE;
+ if (!xdr_diropargs3(xdrs, &objp->where))
+ return FALSE;
+ if (!xdr_createhow3(xdrs, &objp->how))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_create3resok (XDR *xdrs, create3resok *objp)
+xdr_create3resok(XDR *xdrs, create3resok *objp)
{
- if (!xdr_post_op_fh3 (xdrs, &objp->obj))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_fh3(xdrs, &objp->obj))
+ return FALSE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_create3resfail (XDR *xdrs, create3resfail *objp)
+xdr_create3resfail(XDR *xdrs, create3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_create3res (XDR *xdrs, create3res *objp)
+xdr_create3res(XDR *xdrs, create3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_create3resok (xdrs, &objp->create3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_create3resfail (xdrs, &objp->create3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_create3resok(xdrs, &objp->create3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_create3resfail(xdrs, &objp->create3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_mkdir3args (XDR *xdrs, mkdir3args *objp)
+xdr_mkdir3args(XDR *xdrs, mkdir3args *objp)
{
- if (!xdr_diropargs3 (xdrs, &objp->where))
- return FALSE;
- if (!xdr_sattr3 (xdrs, &objp->attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_diropargs3(xdrs, &objp->where))
+ return FALSE;
+ if (!xdr_sattr3(xdrs, &objp->attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mkdir3resok (XDR *xdrs, mkdir3resok *objp)
+xdr_mkdir3resok(XDR *xdrs, mkdir3resok *objp)
{
- if (!xdr_post_op_fh3 (xdrs, &objp->obj))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_fh3(xdrs, &objp->obj))
+ return FALSE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mkdir3resfail (XDR *xdrs, mkdir3resfail *objp)
+xdr_mkdir3resfail(XDR *xdrs, mkdir3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mkdir3res (XDR *xdrs, mkdir3res *objp)
+xdr_mkdir3res(XDR *xdrs, mkdir3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_mkdir3resok (xdrs, &objp->mkdir3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_mkdir3resfail (xdrs, &objp->mkdir3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_mkdir3resok(xdrs, &objp->mkdir3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_mkdir3resfail(xdrs, &objp->mkdir3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_symlinkdata3 (XDR *xdrs, symlinkdata3 *objp)
+xdr_symlinkdata3(XDR *xdrs, symlinkdata3 *objp)
{
- if (!xdr_sattr3 (xdrs, &objp->symlink_attributes))
- return FALSE;
- if (!xdr_nfspath3 (xdrs, &objp->symlink_data))
- return FALSE;
- return TRUE;
+ if (!xdr_sattr3(xdrs, &objp->symlink_attributes))
+ return FALSE;
+ if (!xdr_nfspath3(xdrs, &objp->symlink_data))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_symlink3args (XDR *xdrs, symlink3args *objp)
+xdr_symlink3args(XDR *xdrs, symlink3args *objp)
{
- if (!xdr_diropargs3 (xdrs, &objp->where))
- return FALSE;
- if (!xdr_symlinkdata3 (xdrs, &objp->symlink))
- return FALSE;
- return TRUE;
+ if (!xdr_diropargs3(xdrs, &objp->where))
+ return FALSE;
+ if (!xdr_symlinkdata3(xdrs, &objp->symlink))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_symlink3resok (XDR *xdrs, symlink3resok *objp)
+xdr_symlink3resok(XDR *xdrs, symlink3resok *objp)
{
- if (!xdr_post_op_fh3 (xdrs, &objp->obj))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_fh3(xdrs, &objp->obj))
+ return FALSE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_symlink3resfail (XDR *xdrs, symlink3resfail *objp)
+xdr_symlink3resfail(XDR *xdrs, symlink3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_symlink3res (XDR *xdrs, symlink3res *objp)
+xdr_symlink3res(XDR *xdrs, symlink3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_symlink3resok (xdrs, &objp->symlink3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_symlink3resfail (xdrs, &objp->symlink3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_symlink3resok(xdrs, &objp->symlink3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_symlink3resfail(xdrs, &objp->symlink3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_devicedata3 (XDR *xdrs, devicedata3 *objp)
+xdr_devicedata3(XDR *xdrs, devicedata3 *objp)
{
- if (!xdr_sattr3 (xdrs, &objp->dev_attributes))
- return FALSE;
- if (!xdr_specdata3 (xdrs, &objp->spec))
- return FALSE;
- return TRUE;
+ if (!xdr_sattr3(xdrs, &objp->dev_attributes))
+ return FALSE;
+ if (!xdr_specdata3(xdrs, &objp->spec))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mknoddata3 (XDR *xdrs, mknoddata3 *objp)
+xdr_mknoddata3(XDR *xdrs, mknoddata3 *objp)
{
- if (!xdr_ftype3 (xdrs, &objp->type))
- return FALSE;
- switch (objp->type) {
- case NF3CHR:
- case NF3BLK:
- if (!xdr_devicedata3 (xdrs, &objp->mknoddata3_u.device))
- return FALSE;
- break;
- case NF3SOCK:
- case NF3FIFO:
- if (!xdr_sattr3 (xdrs, &objp->mknoddata3_u.pipe_attributes))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_ftype3(xdrs, &objp->type))
+ return FALSE;
+ switch (objp->type) {
+ case NF3CHR:
+ case NF3BLK:
+ if (!xdr_devicedata3(xdrs, &objp->mknoddata3_u.device))
+ return FALSE;
+ break;
+ case NF3SOCK:
+ case NF3FIFO:
+ if (!xdr_sattr3(xdrs, &objp->mknoddata3_u.pipe_attributes))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_mknod3args (XDR *xdrs, mknod3args *objp)
+xdr_mknod3args(XDR *xdrs, mknod3args *objp)
{
- if (!xdr_diropargs3 (xdrs, &objp->where))
- return FALSE;
- if (!xdr_mknoddata3 (xdrs, &objp->what))
- return FALSE;
- return TRUE;
+ if (!xdr_diropargs3(xdrs, &objp->where))
+ return FALSE;
+ if (!xdr_mknoddata3(xdrs, &objp->what))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mknod3resok (XDR *xdrs, mknod3resok *objp)
+xdr_mknod3resok(XDR *xdrs, mknod3resok *objp)
{
- if (!xdr_post_op_fh3 (xdrs, &objp->obj))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_fh3(xdrs, &objp->obj))
+ return FALSE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mknod3resfail (XDR *xdrs, mknod3resfail *objp)
+xdr_mknod3resfail(XDR *xdrs, mknod3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mknod3res (XDR *xdrs, mknod3res *objp)
+xdr_mknod3res(XDR *xdrs, mknod3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_mknod3resok (xdrs, &objp->mknod3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_mknod3resfail (xdrs, &objp->mknod3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_mknod3resok(xdrs, &objp->mknod3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_mknod3resfail(xdrs, &objp->mknod3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_remove3args (XDR *xdrs, remove3args *objp)
+xdr_remove3args(XDR *xdrs, remove3args *objp)
{
- if (!xdr_diropargs3 (xdrs, &objp->object))
- return FALSE;
- return TRUE;
+ if (!xdr_diropargs3(xdrs, &objp->object))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_remove3resok (XDR *xdrs, remove3resok *objp)
+xdr_remove3resok(XDR *xdrs, remove3resok *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_remove3resfail (XDR *xdrs, remove3resfail *objp)
+xdr_remove3resfail(XDR *xdrs, remove3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_remove3res (XDR *xdrs, remove3res *objp)
+xdr_remove3res(XDR *xdrs, remove3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_remove3resok (xdrs, &objp->remove3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_remove3resfail (xdrs, &objp->remove3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_remove3resok(xdrs, &objp->remove3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_remove3resfail(xdrs, &objp->remove3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_rmdir3args (XDR *xdrs, rmdir3args *objp)
+xdr_rmdir3args(XDR *xdrs, rmdir3args *objp)
{
- if (!xdr_diropargs3 (xdrs, &objp->object))
- return FALSE;
- return TRUE;
+ if (!xdr_diropargs3(xdrs, &objp->object))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_rmdir3resok (XDR *xdrs, rmdir3resok *objp)
+xdr_rmdir3resok(XDR *xdrs, rmdir3resok *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_rmdir3resfail (XDR *xdrs, rmdir3resfail *objp)
+xdr_rmdir3resfail(XDR *xdrs, rmdir3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->dir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->dir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_rmdir3res (XDR *xdrs, rmdir3res *objp)
+xdr_rmdir3res(XDR *xdrs, rmdir3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_rmdir3resok (xdrs, &objp->rmdir3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_rmdir3resfail (xdrs, &objp->rmdir3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_rmdir3resok(xdrs, &objp->rmdir3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_rmdir3resfail(xdrs, &objp->rmdir3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_rename3args (XDR *xdrs, rename3args *objp)
+xdr_rename3args(XDR *xdrs, rename3args *objp)
{
- if (!xdr_diropargs3 (xdrs, &objp->from))
- return FALSE;
- if (!xdr_diropargs3 (xdrs, &objp->to))
- return FALSE;
- return TRUE;
+ if (!xdr_diropargs3(xdrs, &objp->from))
+ return FALSE;
+ if (!xdr_diropargs3(xdrs, &objp->to))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_rename3resok (XDR *xdrs, rename3resok *objp)
+xdr_rename3resok(XDR *xdrs, rename3resok *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->fromdir_wcc))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->todir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->fromdir_wcc))
+ return FALSE;
+ if (!xdr_wcc_data(xdrs, &objp->todir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_rename3resfail (XDR *xdrs, rename3resfail *objp)
+xdr_rename3resfail(XDR *xdrs, rename3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->fromdir_wcc))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->todir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->fromdir_wcc))
+ return FALSE;
+ if (!xdr_wcc_data(xdrs, &objp->todir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_rename3res (XDR *xdrs, rename3res *objp)
+xdr_rename3res(XDR *xdrs, rename3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_rename3resok (xdrs, &objp->rename3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_rename3resfail (xdrs, &objp->rename3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_rename3resok(xdrs, &objp->rename3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_rename3resfail(xdrs, &objp->rename3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_link3args (XDR *xdrs, link3args *objp)
+xdr_link3args(XDR *xdrs, link3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->file))
- return FALSE;
- if (!xdr_diropargs3 (xdrs, &objp->link))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->file))
+ return FALSE;
+ if (!xdr_diropargs3(xdrs, &objp->link))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_link3resok (XDR *xdrs, link3resok *objp)
+xdr_link3resok(XDR *xdrs, link3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->linkdir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->file_attributes))
+ return FALSE;
+ if (!xdr_wcc_data(xdrs, &objp->linkdir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_link3resfail (XDR *xdrs, link3resfail *objp)
+xdr_link3resfail(XDR *xdrs, link3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->file_attributes))
- return FALSE;
- if (!xdr_wcc_data (xdrs, &objp->linkdir_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->file_attributes))
+ return FALSE;
+ if (!xdr_wcc_data(xdrs, &objp->linkdir_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_link3res (XDR *xdrs, link3res *objp)
+xdr_link3res(XDR *xdrs, link3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_link3resok (xdrs, &objp->link3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_link3resfail (xdrs, &objp->link3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_link3resok(xdrs, &objp->link3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_link3resfail(xdrs, &objp->link3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_readdir3args (XDR *xdrs, readdir3args *objp)
+xdr_readdir3args(XDR *xdrs, readdir3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->dir))
- return FALSE;
- if (!xdr_cookie3 (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_cookieverf3 (xdrs, objp->cookieverf))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->dir))
+ return FALSE;
+ if (!xdr_cookie3(xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_cookieverf3(xdrs, objp->cookieverf))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->count))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_entry3 (XDR *xdrs, entry3 *objp)
+xdr_entry3(XDR *xdrs, entry3 *objp)
{
- if (!xdr_fileid3 (xdrs, &objp->fileid))
- return FALSE;
- if (!xdr_filename3 (xdrs, &objp->name))
- return FALSE;
- if (!xdr_cookie3 (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (entry3), (xdrproc_t) xdr_entry3))
- return FALSE;
- return TRUE;
+ if (!xdr_fileid3(xdrs, &objp->fileid))
+ return FALSE;
+ if (!xdr_filename3(xdrs, &objp->name))
+ return FALSE;
+ if (!xdr_cookie3(xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_pointer(xdrs, (char **)&objp->nextentry, sizeof(entry3),
+ (xdrproc_t)xdr_entry3))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_dirlist3 (XDR *xdrs, dirlist3 *objp)
+xdr_dirlist3(XDR *xdrs, dirlist3 *objp)
{
- if (!xdr_pointer (xdrs, (char **)&objp->entries, sizeof (entry3), (xdrproc_t) xdr_entry3))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->eof))
- return FALSE;
- return TRUE;
+ if (!xdr_pointer(xdrs, (char **)&objp->entries, sizeof(entry3),
+ (xdrproc_t)xdr_entry3))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->eof))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readdir3resok (XDR *xdrs, readdir3resok *objp)
+xdr_readdir3resok(XDR *xdrs, readdir3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- if (!xdr_cookieverf3 (xdrs, objp->cookieverf))
- return FALSE;
- if (!xdr_dirlist3 (xdrs, &objp->reply))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->dir_attributes))
+ return FALSE;
+ if (!xdr_cookieverf3(xdrs, objp->cookieverf))
+ return FALSE;
+ if (!xdr_dirlist3(xdrs, &objp->reply))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readdir3resfail (XDR *xdrs, readdir3resfail *objp)
+xdr_readdir3resfail(XDR *xdrs, readdir3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->dir_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readdir3res (XDR *xdrs, readdir3res *objp)
+xdr_readdir3res(XDR *xdrs, readdir3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_readdir3resok (xdrs, &objp->readdir3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_readdir3resfail (xdrs, &objp->readdir3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_readdir3resok(xdrs, &objp->readdir3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_readdir3resfail(xdrs, &objp->readdir3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_readdirp3args (XDR *xdrs, readdirp3args *objp)
+xdr_readdirp3args(XDR *xdrs, readdirp3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->dir))
- return FALSE;
- if (!xdr_cookie3 (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_cookieverf3 (xdrs, objp->cookieverf))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->dircount))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->maxcount))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->dir))
+ return FALSE;
+ if (!xdr_cookie3(xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_cookieverf3(xdrs, objp->cookieverf))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->dircount))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->maxcount))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_entryp3 (XDR *xdrs, entryp3 *objp)
+xdr_entryp3(XDR *xdrs, entryp3 *objp)
{
- if (!xdr_fileid3 (xdrs, &objp->fileid))
- return FALSE;
- if (!xdr_filename3 (xdrs, &objp->name))
- return FALSE;
- if (!xdr_cookie3 (xdrs, &objp->cookie))
- return FALSE;
- if (!xdr_post_op_attr (xdrs, &objp->name_attributes))
- return FALSE;
- if (!xdr_post_op_fh3 (xdrs, &objp->name_handle))
- return FALSE;
- if (!xdr_pointer (xdrs, (char **)&objp->nextentry, sizeof (entryp3), (xdrproc_t) xdr_entryp3))
- return FALSE;
- return TRUE;
+ if (!xdr_fileid3(xdrs, &objp->fileid))
+ return FALSE;
+ if (!xdr_filename3(xdrs, &objp->name))
+ return FALSE;
+ if (!xdr_cookie3(xdrs, &objp->cookie))
+ return FALSE;
+ if (!xdr_post_op_attr(xdrs, &objp->name_attributes))
+ return FALSE;
+ if (!xdr_post_op_fh3(xdrs, &objp->name_handle))
+ return FALSE;
+ if (!xdr_pointer(xdrs, (char **)&objp->nextentry, sizeof(entryp3),
+ (xdrproc_t)xdr_entryp3))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_dirlistp3 (XDR *xdrs, dirlistp3 *objp)
+xdr_dirlistp3(XDR *xdrs, dirlistp3 *objp)
{
- if (!xdr_pointer (xdrs, (char **)&objp->entries, sizeof (entryp3), (xdrproc_t) xdr_entryp3))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->eof))
- return FALSE;
- return TRUE;
+ if (!xdr_pointer(xdrs, (char **)&objp->entries, sizeof(entryp3),
+ (xdrproc_t)xdr_entryp3))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->eof))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readdirp3resok (XDR *xdrs, readdirp3resok *objp)
+xdr_readdirp3resok(XDR *xdrs, readdirp3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- if (!xdr_cookieverf3 (xdrs, objp->cookieverf))
- return FALSE;
- if (!xdr_dirlistp3 (xdrs, &objp->reply))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->dir_attributes))
+ return FALSE;
+ if (!xdr_cookieverf3(xdrs, objp->cookieverf))
+ return FALSE;
+ if (!xdr_dirlistp3(xdrs, &objp->reply))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readdirp3resfail (XDR *xdrs, readdirp3resfail *objp)
+xdr_readdirp3resfail(XDR *xdrs, readdirp3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->dir_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->dir_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_readdirp3res (XDR *xdrs, readdirp3res *objp)
+xdr_readdirp3res(XDR *xdrs, readdirp3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_readdirp3resok (xdrs, &objp->readdirp3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_readdirp3resfail (xdrs, &objp->readdirp3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_readdirp3resok(xdrs, &objp->readdirp3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_readdirp3resfail(xdrs, &objp->readdirp3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_fsstat3args (XDR *xdrs, fsstat3args *objp)
+xdr_fsstat3args(XDR *xdrs, fsstat3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->fsroot))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->fsroot))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_fsstat3resok (XDR *xdrs, fsstat3resok *objp)
+xdr_fsstat3resok(XDR *xdrs, fsstat3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->tbytes))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->fbytes))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->abytes))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->tfiles))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->ffiles))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->afiles))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->invarsec))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->tbytes))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->fbytes))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->abytes))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->tfiles))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->ffiles))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->afiles))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->invarsec))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_fsstat3resfail (XDR *xdrs, fsstat3resfail *objp)
+xdr_fsstat3resfail(XDR *xdrs, fsstat3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_fsstat3res (XDR *xdrs, fsstat3res *objp)
+xdr_fsstat3res(XDR *xdrs, fsstat3res *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_fsstat3resok (xdrs, &objp->fsstat3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_fsstat3resfail (xdrs, &objp->fsstat3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_fsstat3resok(xdrs, &objp->fsstat3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_fsstat3resfail(xdrs, &objp->fsstat3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_fsinfo3args (XDR *xdrs, fsinfo3args *objp)
+xdr_fsinfo3args(XDR *xdrs, fsinfo3args *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->fsroot))
- return FALSE;
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->fsroot))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_fsinfo3resok (XDR *xdrs, fsinfo3resok *objp)
+xdr_fsinfo3resok(XDR *xdrs, fsinfo3resok *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->rtmax))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->rtpref))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->rtmult))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->wtmax))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->wtpref))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->wtmult))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->dtpref))
- return FALSE;
- if (!xdr_size3 (xdrs, &objp->maxfilesize))
- return FALSE;
- if (!xdr_nfstime3 (xdrs, &objp->time_delta))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->properties))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_fsinfo3resfail (XDR *xdrs, fsinfo3resfail *objp)
-{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_fsinfo3res (XDR *xdrs, fsinfo3res *objp)
-{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_fsinfo3resok (xdrs, &objp->fsinfo3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_fsinfo3resfail (xdrs, &objp->fsinfo3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
-}
-
-bool_t
-xdr_pathconf3args (XDR *xdrs, pathconf3args *objp)
-{
- if (!xdr_nfs_fh3 (xdrs, &objp->object))
- return FALSE;
- return TRUE;
-}
-
-bool_t
-xdr_pathconf3resok (XDR *xdrs, pathconf3resok *objp)
-{
- register int32_t *buf;
-
-
- if (xdrs->x_op == XDR_ENCODE) {
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->linkmax))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->name_max))
- return FALSE;
- buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_bool (xdrs, &objp->no_trunc))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->chown_restricted))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_insensitive))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_preserving))
- return FALSE;
- } else {
- IXDR_PUT_BOOL(buf, objp->no_trunc);
- IXDR_PUT_BOOL(buf, objp->chown_restricted);
- IXDR_PUT_BOOL(buf, objp->case_insensitive);
- IXDR_PUT_BOOL(buf, objp->case_preserving);
- }
- return TRUE;
- } else if (xdrs->x_op == XDR_DECODE) {
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->linkmax))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->name_max))
- return FALSE;
- buf = XDR_INLINE (xdrs, 4 * BYTES_PER_XDR_UNIT);
- if (buf == NULL) {
- if (!xdr_bool (xdrs, &objp->no_trunc))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->chown_restricted))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_insensitive))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_preserving))
- return FALSE;
- } else {
- objp->no_trunc = IXDR_GET_BOOL(buf);
- objp->chown_restricted = IXDR_GET_BOOL(buf);
- objp->case_insensitive = IXDR_GET_BOOL(buf);
- objp->case_preserving = IXDR_GET_BOOL(buf);
- }
- return TRUE;
- }
-
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->linkmax))
- return FALSE;
- if (!xdr_uint32 (xdrs, &objp->name_max))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->no_trunc))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->chown_restricted))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_insensitive))
- return FALSE;
- if (!xdr_bool (xdrs, &objp->case_preserving))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->rtmax))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->rtpref))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->rtmult))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->wtmax))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->wtpref))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->wtmult))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->dtpref))
+ return FALSE;
+ if (!xdr_size3(xdrs, &objp->maxfilesize))
+ return FALSE;
+ if (!xdr_nfstime3(xdrs, &objp->time_delta))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->properties))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_pathconf3resfail (XDR *xdrs, pathconf3resfail *objp)
+xdr_fsinfo3resfail(XDR *xdrs, fsinfo3resfail *objp)
{
- if (!xdr_post_op_attr (xdrs, &objp->obj_attributes))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ return TRUE;
+}
+
+bool_t
+xdr_fsinfo3res(XDR *xdrs, fsinfo3res *objp)
+{
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_fsinfo3resok(xdrs, &objp->fsinfo3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_fsinfo3resfail(xdrs, &objp->fsinfo3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_pathconf3res (XDR *xdrs, pathconf3res *objp)
+xdr_pathconf3args(XDR *xdrs, pathconf3args *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_pathconf3resok (xdrs, &objp->pathconf3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_pathconf3resfail (xdrs, &objp->pathconf3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->object))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_commit3args (XDR *xdrs, commit3args *objp)
+xdr_pathconf3resok(XDR *xdrs, pathconf3resok *objp)
{
- if (!xdr_nfs_fh3 (xdrs, &objp->file))
- return FALSE;
- if (!xdr_offset3 (xdrs, &objp->offset))
- return FALSE;
- if (!xdr_count3 (xdrs, &objp->count))
- return FALSE;
- return TRUE;
+ register int32_t *buf;
+
+ if (xdrs->x_op == XDR_ENCODE) {
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->linkmax))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->name_max))
+ return FALSE;
+ buf = XDR_INLINE(xdrs, 4 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_bool(xdrs, &objp->no_trunc))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->chown_restricted))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->case_insensitive))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->case_preserving))
+ return FALSE;
+ } else {
+ IXDR_PUT_BOOL(buf, objp->no_trunc);
+ IXDR_PUT_BOOL(buf, objp->chown_restricted);
+ IXDR_PUT_BOOL(buf, objp->case_insensitive);
+ IXDR_PUT_BOOL(buf, objp->case_preserving);
+ }
+ return TRUE;
+ } else if (xdrs->x_op == XDR_DECODE) {
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->linkmax))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->name_max))
+ return FALSE;
+ buf = XDR_INLINE(xdrs, 4 * BYTES_PER_XDR_UNIT);
+ if (buf == NULL) {
+ if (!xdr_bool(xdrs, &objp->no_trunc))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->chown_restricted))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->case_insensitive))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->case_preserving))
+ return FALSE;
+ } else {
+ objp->no_trunc = IXDR_GET_BOOL(buf);
+ objp->chown_restricted = IXDR_GET_BOOL(buf);
+ objp->case_insensitive = IXDR_GET_BOOL(buf);
+ objp->case_preserving = IXDR_GET_BOOL(buf);
+ }
+ return TRUE;
+ }
+
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->linkmax))
+ return FALSE;
+ if (!xdr_uint32(xdrs, &objp->name_max))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->no_trunc))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->chown_restricted))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->case_insensitive))
+ return FALSE;
+ if (!xdr_bool(xdrs, &objp->case_preserving))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_commit3resok (XDR *xdrs, commit3resok *objp)
+xdr_pathconf3resfail(XDR *xdrs, pathconf3resfail *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->file_wcc))
- return FALSE;
- if (!xdr_writeverf3 (xdrs, objp->verf))
- return FALSE;
- return TRUE;
+ if (!xdr_post_op_attr(xdrs, &objp->obj_attributes))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_commit3resfail (XDR *xdrs, commit3resfail *objp)
+xdr_pathconf3res(XDR *xdrs, pathconf3res *objp)
{
- if (!xdr_wcc_data (xdrs, &objp->file_wcc))
- return FALSE;
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_pathconf3resok(xdrs, &objp->pathconf3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_pathconf3resfail(xdrs, &objp->pathconf3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_commit3res (XDR *xdrs, commit3res *objp)
+xdr_commit3args(XDR *xdrs, commit3args *objp)
{
- if (!xdr_nfsstat3 (xdrs, &objp->status))
- return FALSE;
- switch (objp->status) {
- case NFS3_OK:
- if (!xdr_commit3resok (xdrs, &objp->commit3res_u.resok))
- return FALSE;
- break;
- default:
- if (!xdr_commit3resfail (xdrs, &objp->commit3res_u.resfail))
- return FALSE;
- break;
- }
- return TRUE;
+ if (!xdr_nfs_fh3(xdrs, &objp->file))
+ return FALSE;
+ if (!xdr_offset3(xdrs, &objp->offset))
+ return FALSE;
+ if (!xdr_count3(xdrs, &objp->count))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_fhandle3 (XDR *xdrs, fhandle3 *objp)
+xdr_commit3resok(XDR *xdrs, commit3resok *objp)
{
- if (!xdr_bytes (xdrs, (char **)&objp->fhandle3_val, (u_int *) &objp->fhandle3_len, FHSIZE3))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->file_wcc))
+ return FALSE;
+ if (!xdr_writeverf3(xdrs, objp->verf))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_dirpath (XDR *xdrs, dirpath *objp)
+xdr_commit3resfail(XDR *xdrs, commit3resfail *objp)
{
- if (!xdr_string (xdrs, objp, MNTPATHLEN))
- return FALSE;
- return TRUE;
+ if (!xdr_wcc_data(xdrs, &objp->file_wcc))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_name (XDR *xdrs, name *objp)
+xdr_commit3res(XDR *xdrs, commit3res *objp)
{
- if (!xdr_string (xdrs, objp, MNTNAMLEN))
- return FALSE;
- return TRUE;
+ if (!xdr_nfsstat3(xdrs, &objp->status))
+ return FALSE;
+ switch (objp->status) {
+ case NFS3_OK:
+ if (!xdr_commit3resok(xdrs, &objp->commit3res_u.resok))
+ return FALSE;
+ break;
+ default:
+ if (!xdr_commit3resfail(xdrs, &objp->commit3res_u.resfail))
+ return FALSE;
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_mountstat3 (XDR *xdrs, mountstat3 *objp)
+xdr_fhandle3(XDR *xdrs, fhandle3 *objp)
{
- if (!xdr_enum (xdrs, (enum_t *) objp))
- return FALSE;
- return TRUE;
+ if (!xdr_bytes(xdrs, (char **)&objp->fhandle3_val,
+ (u_int *)&objp->fhandle3_len, FHSIZE3))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mountres3_ok (XDR *xdrs, mountres3_ok *objp)
+xdr_dirpath(XDR *xdrs, dirpath *objp)
{
- if (!xdr_fhandle3 (xdrs, &objp->fhandle))
- return FALSE;
- if (!xdr_array (xdrs, (char **)&objp->auth_flavors.auth_flavors_val, (u_int *) &objp->auth_flavors.auth_flavors_len, ~0,
- sizeof (int), (xdrproc_t) xdr_int))
- return FALSE;
- return TRUE;
+ if (!xdr_string(xdrs, objp, MNTPATHLEN))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mountres3 (XDR *xdrs, mountres3 *objp)
+xdr_name(XDR *xdrs, name *objp)
{
- if (!xdr_mountstat3 (xdrs, &objp->fhs_status))
- return FALSE;
- switch (objp->fhs_status) {
- case MNT3_OK:
- if (!xdr_mountres3_ok (xdrs, &objp->mountres3_u.mountinfo))
- return FALSE;
- break;
- default:
- break;
- }
- return TRUE;
+ if (!xdr_string(xdrs, objp, MNTNAMLEN))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mountlist (XDR *xdrs, mountlist *objp)
+xdr_mountstat3(XDR *xdrs, mountstat3 *objp)
{
- if (!xdr_pointer (xdrs, (char **)objp, sizeof (struct mountbody), (xdrproc_t) xdr_mountbody))
- return FALSE;
- return TRUE;
+ if (!xdr_enum(xdrs, (enum_t *)objp))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_mountbody (XDR *xdrs, mountbody *objp)
+xdr_mountres3_ok(XDR *xdrs, mountres3_ok *objp)
{
- if (!xdr_name (xdrs, &objp->ml_hostname))
- return FALSE;
- if (!xdr_dirpath (xdrs, &objp->ml_directory))
- return FALSE;
- if (!xdr_mountlist (xdrs, &objp->ml_next))
- return FALSE;
- return TRUE;
+ if (!xdr_fhandle3(xdrs, &objp->fhandle))
+ return FALSE;
+ if (!xdr_array(xdrs, (char **)&objp->auth_flavors.auth_flavors_val,
+ (u_int *)&objp->auth_flavors.auth_flavors_len, ~0,
+ sizeof(int), (xdrproc_t)xdr_int))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_groups (XDR *xdrs, groups *objp)
+xdr_mountres3(XDR *xdrs, mountres3 *objp)
{
- if (!xdr_pointer (xdrs, (char **)objp, sizeof (struct groupnode), (xdrproc_t) xdr_groupnode))
- return FALSE;
- return TRUE;
+ if (!xdr_mountstat3(xdrs, &objp->fhs_status))
+ return FALSE;
+ switch (objp->fhs_status) {
+ case MNT3_OK:
+ if (!xdr_mountres3_ok(xdrs, &objp->mountres3_u.mountinfo))
+ return FALSE;
+ break;
+ default:
+ break;
+ }
+ return TRUE;
}
bool_t
-xdr_groupnode (XDR *xdrs, groupnode *objp)
+xdr_mountlist(XDR *xdrs, mountlist *objp)
{
- if (!xdr_name (xdrs, &objp->gr_name))
- return FALSE;
- if (!xdr_groups (xdrs, &objp->gr_next))
- return FALSE;
- return TRUE;
+ if (!xdr_pointer(xdrs, (char **)objp, sizeof(struct mountbody),
+ (xdrproc_t)xdr_mountbody))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_exports (XDR *xdrs, exports *objp)
+xdr_mountbody(XDR *xdrs, mountbody *objp)
{
- if (!xdr_pointer (xdrs, (char **)objp, sizeof (struct exportnode), (xdrproc_t) xdr_exportnode))
- return FALSE;
- return TRUE;
+ if (!xdr_name(xdrs, &objp->ml_hostname))
+ return FALSE;
+ if (!xdr_dirpath(xdrs, &objp->ml_directory))
+ return FALSE;
+ if (!xdr_mountlist(xdrs, &objp->ml_next))
+ return FALSE;
+ return TRUE;
}
bool_t
-xdr_exportnode (XDR *xdrs, exportnode *objp)
+xdr_groups(XDR *xdrs, groups *objp)
{
- if (!xdr_dirpath (xdrs, &objp->ex_dir))
- return FALSE;
- if (!xdr_groups (xdrs, &objp->ex_groups))
- return FALSE;
- if (!xdr_exports (xdrs, &objp->ex_next))
- return FALSE;
- return TRUE;
+ if (!xdr_pointer(xdrs, (char **)objp, sizeof(struct groupnode),
+ (xdrproc_t)xdr_groupnode))
+ return FALSE;
+ return TRUE;
}
-void
-xdr_free_exports_list (struct exportnode *first)
+bool_t
+xdr_groupnode(XDR *xdrs, groupnode *objp)
{
- struct exportnode *elist = NULL;
+ if (!xdr_name(xdrs, &objp->gr_name))
+ return FALSE;
+ if (!xdr_groups(xdrs, &objp->gr_next))
+ return FALSE;
+ return TRUE;
+}
- if (!first)
- return;
+bool_t
+xdr_exports(XDR *xdrs, exports *objp)
+{
+ if (!xdr_pointer(xdrs, (char **)objp, sizeof(struct exportnode),
+ (xdrproc_t)xdr_exportnode))
+ return FALSE;
+ return TRUE;
+}
- while (first) {
- elist = first->ex_next;
- if (first->ex_dir)
- GF_FREE (first->ex_dir);
+bool_t
+xdr_exportnode(XDR *xdrs, exportnode *objp)
+{
+ if (!xdr_dirpath(xdrs, &objp->ex_dir))
+ return FALSE;
+ if (!xdr_groups(xdrs, &objp->ex_groups))
+ return FALSE;
+ if (!xdr_exports(xdrs, &objp->ex_next))
+ return FALSE;
+ return TRUE;
+}
- if (first->ex_groups) {
- if (first->ex_groups->gr_name)
- GF_FREE (first->ex_groups->gr_name);
- GF_FREE (first->ex_groups);
- }
+static void
+xdr_free_groupnode(struct groupnode *group)
+{
+ if (!group)
+ return;
- GF_FREE (first);
- first = elist;
- }
+ if (group->gr_next)
+ xdr_free_groupnode(group->gr_next);
+ GF_FREE(group->gr_name);
+ GF_FREE(group);
}
-
void
-xdr_free_mountlist (mountlist ml)
+xdr_free_exports_list(struct exportnode *first)
{
- struct mountbody *next = NULL;
+ struct exportnode *elist = NULL;
- if (!ml)
- return;
+ if (!first)
+ return;
- while (ml) {
- GF_FREE (ml->ml_hostname);
- GF_FREE (ml->ml_directory);
- next = ml->ml_next;
- GF_FREE (ml);
- ml = next;
- }
+ while (first) {
+ elist = first->ex_next;
+ GF_FREE(first->ex_dir);
- return;
+ xdr_free_groupnode(first->ex_groups);
+
+ GF_FREE(first);
+ first = elist;
+ }
}
+void
+xdr_free_mountlist(mountlist ml)
+{
+ struct mountbody *next = NULL;
+
+ if (!ml)
+ return;
+
+ while (ml) {
+ GF_FREE(ml->ml_hostname);
+ GF_FREE(ml->ml_directory);
+ next = ml->ml_next;
+ GF_FREE(ml);
+ ml = next;
+ }
+
+ return;
+}
/* Free statements are based on the way sunrpc xdr decoding
* code performs memory allocations.
*/
void
-xdr_free_write3args_nocopy (write3args *wa)
+xdr_free_write3args_nocopy(write3args *wa)
{
- if (!wa)
- return;
+ if (!wa)
+ return;
- FREE (wa->file.data.data_val);
+ FREE(wa->file.data.data_val);
}
-
-
diff --git a/rpc/xdr/src/xdr-nfs3.h b/rpc/xdr/src/xdr-nfs3.h
index 9e5ccd186ca..b7f5abefffd 100644
--- a/rpc/xdr/src/xdr-nfs3.h
+++ b/rpc/xdr/src/xdr-nfs3.h
@@ -1,20 +1,11 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
#ifndef _XDR_NFS3_H
@@ -23,25 +14,27 @@
#include <rpc/rpc.h>
#include <sys/types.h>
-#define NFS3_FHSIZE 64
-#define NFS3_COOKIEVERFSIZE 8
-#define NFS3_CREATEVERFSIZE 8
-#define NFS3_WRITEVERFSIZE 8
+#define NFS3_FHSIZE 64
+#define NFS3_COOKIEVERFSIZE 8
+#define NFS3_CREATEVERFSIZE 8
+#define NFS3_WRITEVERFSIZE 8
-#define NFS3_ENTRY3_FIXED_SIZE 24
-#define NFS3_POSTOPATTR_SIZE 88
-#define NFS3_READDIR_RESOK_SIZE (NFS3_POSTOPATTR_SIZE + sizeof (bool_t) + NFS3_COOKIEVERFSIZE)
+#define NFS3_ENTRY3_FIXED_SIZE 24
+#define NFS3_POSTOPATTR_SIZE 88
+#define NFS3_READDIR_RESOK_SIZE \
+ (NFS3_POSTOPATTR_SIZE + sizeof(bool_t) + NFS3_COOKIEVERFSIZE)
/* In size of post_op_fh3, the length of the file handle will have to be
* included separately since we have variable length fh. Here we only account
* for the field for handle_follows and for the file handle length field.
*/
-#define NFS3_POSTOPFH3_FIXED_SIZE (sizeof (bool_t) + sizeof (uint32_t))
+#define NFS3_POSTOPFH3_FIXED_SIZE (sizeof(bool_t) + sizeof(uint32_t))
/* Similarly, the size of the entry will have to include the variable length
* file handle and the length of the entry name.
*/
-#define NFS3_ENTRYP3_FIXED_SIZE (NFS3_ENTRY3_FIXED_SIZE + NFS3_POSTOPATTR_SIZE + NFS3_POSTOPFH3_FIXED_SIZE)
+#define NFS3_ENTRYP3_FIXED_SIZE \
+ (NFS3_ENTRY3_FIXED_SIZE + NFS3_POSTOPATTR_SIZE + NFS3_POSTOPFH3_FIXED_SIZE)
typedef uint64_t uint64;
typedef int64_t int64;
@@ -61,283 +54,284 @@ typedef uint64 offset3;
typedef uint32 mode3;
typedef uint32 count3;
-#define NFS3MODE_SETXUID 0x00800
-#define NFS3MODE_SETXGID 0x00400
-#define NFS3MODE_SAVESWAPTXT 0x00200
-#define NFS3MODE_ROWNER 0x00100
-#define NFS3MODE_WOWNER 0x00080
-#define NFS3MODE_XOWNER 0x00040
-#define NFS3MODE_RGROUP 0x00020
-#define NFS3MODE_WGROUP 0x00010
-#define NFS3MODE_XGROUP 0x00008
-#define NFS3MODE_ROTHER 0x00004
-#define NFS3MODE_WOTHER 0x00002
-#define NFS3MODE_XOTHER 0x00001
+#define NFS3MODE_SETXUID 0x00800
+#define NFS3MODE_SETXGID 0x00400
+#define NFS3MODE_SAVESWAPTXT 0x00200
+#define NFS3MODE_ROWNER 0x00100
+#define NFS3MODE_WOWNER 0x00080
+#define NFS3MODE_XOWNER 0x00040
+#define NFS3MODE_RGROUP 0x00020
+#define NFS3MODE_WGROUP 0x00010
+#define NFS3MODE_XGROUP 0x00008
+#define NFS3MODE_ROTHER 0x00004
+#define NFS3MODE_WOTHER 0x00002
+#define NFS3MODE_XOTHER 0x00001
enum nfsstat3 {
- NFS3_OK = 0,
- NFS3ERR_PERM = 1,
- NFS3ERR_NOENT = 2,
- NFS3ERR_IO = 5,
- NFS3ERR_NXIO = 6,
- NFS3ERR_ACCES = 13,
- NFS3ERR_EXIST = 17,
- NFS3ERR_XDEV = 18,
- NFS3ERR_NODEV = 19,
- NFS3ERR_NOTDIR = 20,
- NFS3ERR_ISDIR = 21,
- NFS3ERR_INVAL = 22,
- NFS3ERR_FBIG = 27,
- NFS3ERR_NOSPC = 28,
- NFS3ERR_ROFS = 30,
- NFS3ERR_MLINK = 31,
- NFS3ERR_NAMETOOLONG = 63,
- NFS3ERR_NOTEMPTY = 66,
- NFS3ERR_DQUOT = 69,
- NFS3ERR_STALE = 70,
- NFS3ERR_REMOTE = 71,
- NFS3ERR_BADHANDLE = 10001,
- NFS3ERR_NOT_SYNC = 10002,
- NFS3ERR_BAD_COOKIE = 10003,
- NFS3ERR_NOTSUPP = 10004,
- NFS3ERR_TOOSMALL = 10005,
- NFS3ERR_SERVERFAULT = 10006,
- NFS3ERR_BADTYPE = 10007,
- NFS3ERR_JUKEBOX = 10008,
+ NFS3_OK = 0,
+ NFS3ERR_PERM = 1,
+ NFS3ERR_NOENT = 2,
+ NFS3ERR_IO = 5,
+ NFS3ERR_NXIO = 6,
+ NFS3ERR_ACCES = 13,
+ NFS3ERR_EXIST = 17,
+ NFS3ERR_XDEV = 18,
+ NFS3ERR_NODEV = 19,
+ NFS3ERR_NOTDIR = 20,
+ NFS3ERR_ISDIR = 21,
+ NFS3ERR_INVAL = 22,
+ NFS3ERR_FBIG = 27,
+ NFS3ERR_NOSPC = 28,
+ NFS3ERR_ROFS = 30,
+ NFS3ERR_MLINK = 31,
+ NFS3ERR_NAMETOOLONG = 63,
+ NFS3ERR_NOTEMPTY = 66,
+ NFS3ERR_DQUOT = 69,
+ NFS3ERR_STALE = 70,
+ NFS3ERR_REMOTE = 71,
+ NFS3ERR_BADHANDLE = 10001,
+ NFS3ERR_NOT_SYNC = 10002,
+ NFS3ERR_BAD_COOKIE = 10003,
+ NFS3ERR_NOTSUPP = 10004,
+ NFS3ERR_TOOSMALL = 10005,
+ NFS3ERR_SERVERFAULT = 10006,
+ NFS3ERR_BADTYPE = 10007,
+ NFS3ERR_JUKEBOX = 10008,
+ NFS3ERR_END_OF_LIST = -1,
};
typedef enum nfsstat3 nfsstat3;
enum ftype3 {
- NF3REG = 1,
- NF3DIR = 2,
- NF3BLK = 3,
- NF3CHR = 4,
- NF3LNK = 5,
- NF3SOCK = 6,
- NF3FIFO = 7,
+ NF3REG = 1,
+ NF3DIR = 2,
+ NF3BLK = 3,
+ NF3CHR = 4,
+ NF3LNK = 5,
+ NF3SOCK = 6,
+ NF3FIFO = 7,
};
typedef enum ftype3 ftype3;
struct specdata3 {
- uint32 specdata1;
- uint32 specdata2;
+ uint32 specdata1;
+ uint32 specdata2;
};
typedef struct specdata3 specdata3;
struct nfs_fh3 {
- struct {
- u_int data_len;
- char *data_val;
- } data;
+ struct {
+ u_int data_len;
+ char *data_val;
+ } data;
};
typedef struct nfs_fh3 nfs_fh3;
struct nfstime3 {
- uint32 seconds;
- uint32 nseconds;
+ uint32 seconds;
+ uint32 nseconds;
};
typedef struct nfstime3 nfstime3;
struct fattr3 {
- ftype3 type;
- mode3 mode;
- uint32 nlink;
- uid3 uid;
- gid3 gid;
- size3 size;
- size3 used;
- specdata3 rdev;
- uint64 fsid;
- fileid3 fileid;
- nfstime3 atime;
- nfstime3 mtime;
- nfstime3 ctime;
+ ftype3 type;
+ mode3 mode;
+ uint32 nlink;
+ uid3 uid;
+ gid3 gid;
+ size3 size;
+ size3 used;
+ specdata3 rdev;
+ uint64 fsid;
+ fileid3 fileid;
+ nfstime3 atime;
+ nfstime3 mtime;
+ nfstime3 ctime;
};
typedef struct fattr3 fattr3;
struct post_op_attr {
- bool_t attributes_follow;
- union {
- fattr3 attributes;
- } post_op_attr_u;
+ bool_t attributes_follow;
+ union {
+ fattr3 attributes;
+ } post_op_attr_u;
};
typedef struct post_op_attr post_op_attr;
struct wcc_attr {
- size3 size;
- nfstime3 mtime;
- nfstime3 ctime;
+ size3 size;
+ nfstime3 mtime;
+ nfstime3 ctime;
};
typedef struct wcc_attr wcc_attr;
struct pre_op_attr {
- bool_t attributes_follow;
- union {
- wcc_attr attributes;
- } pre_op_attr_u;
+ bool_t attributes_follow;
+ union {
+ wcc_attr attributes;
+ } pre_op_attr_u;
};
typedef struct pre_op_attr pre_op_attr;
struct wcc_data {
- pre_op_attr before;
- post_op_attr after;
+ pre_op_attr before;
+ post_op_attr after;
};
typedef struct wcc_data wcc_data;
struct post_op_fh3 {
- bool_t handle_follows;
- union {
- nfs_fh3 handle;
- } post_op_fh3_u;
+ bool_t handle_follows;
+ union {
+ nfs_fh3 handle;
+ } post_op_fh3_u;
};
typedef struct post_op_fh3 post_op_fh3;
enum time_how {
- DONT_CHANGE = 0,
- SET_TO_SERVER_TIME = 1,
- SET_TO_CLIENT_TIME = 2,
+ DONT_CHANGE = 0,
+ SET_TO_SERVER_TIME = 1,
+ SET_TO_CLIENT_TIME = 2,
};
typedef enum time_how time_how;
struct set_mode3 {
- bool_t set_it;
- union {
- mode3 mode;
- } set_mode3_u;
+ bool_t set_it;
+ union {
+ mode3 mode;
+ } set_mode3_u;
};
typedef struct set_mode3 set_mode3;
struct set_uid3 {
- bool_t set_it;
- union {
- uid3 uid;
- } set_uid3_u;
+ bool_t set_it;
+ union {
+ uid3 uid;
+ } set_uid3_u;
};
typedef struct set_uid3 set_uid3;
struct set_gid3 {
- bool_t set_it;
- union {
- gid3 gid;
- } set_gid3_u;
+ bool_t set_it;
+ union {
+ gid3 gid;
+ } set_gid3_u;
};
typedef struct set_gid3 set_gid3;
struct set_size3 {
- bool_t set_it;
- union {
- size3 size;
- } set_size3_u;
+ bool_t set_it;
+ union {
+ size3 size;
+ } set_size3_u;
};
typedef struct set_size3 set_size3;
struct set_atime {
- time_how set_it;
- union {
- nfstime3 atime;
- } set_atime_u;
+ time_how set_it;
+ union {
+ nfstime3 atime;
+ } set_atime_u;
};
typedef struct set_atime set_atime;
struct set_mtime {
- time_how set_it;
- union {
- nfstime3 mtime;
- } set_mtime_u;
+ time_how set_it;
+ union {
+ nfstime3 mtime;
+ } set_mtime_u;
};
typedef struct set_mtime set_mtime;
struct sattr3 {
- set_mode3 mode;
- set_uid3 uid;
- set_gid3 gid;
- set_size3 size;
- set_atime atime;
- set_mtime mtime;
+ set_mode3 mode;
+ set_uid3 uid;
+ set_gid3 gid;
+ set_size3 size;
+ set_atime atime;
+ set_mtime mtime;
};
typedef struct sattr3 sattr3;
struct diropargs3 {
- nfs_fh3 dir;
- filename3 name;
+ nfs_fh3 dir;
+ filename3 name;
};
typedef struct diropargs3 diropargs3;
struct getattr3args {
- nfs_fh3 object;
+ nfs_fh3 object;
};
typedef struct getattr3args getattr3args;
struct getattr3resok {
- fattr3 obj_attributes;
+ fattr3 obj_attributes;
};
typedef struct getattr3resok getattr3resok;
struct getattr3res {
- nfsstat3 status;
- union {
- getattr3resok resok;
- } getattr3res_u;
+ nfsstat3 status;
+ union {
+ getattr3resok resok;
+ } getattr3res_u;
};
typedef struct getattr3res getattr3res;
struct sattrguard3 {
- bool_t check;
- union {
- nfstime3 obj_ctime;
- } sattrguard3_u;
+ bool_t check;
+ union {
+ nfstime3 obj_ctime;
+ } sattrguard3_u;
};
typedef struct sattrguard3 sattrguard3;
struct setattr3args {
- nfs_fh3 object;
- sattr3 new_attributes;
- sattrguard3 guard;
+ nfs_fh3 object;
+ sattr3 new_attributes;
+ sattrguard3 guard;
};
typedef struct setattr3args setattr3args;
struct setattr3resok {
- wcc_data obj_wcc;
+ wcc_data obj_wcc;
};
typedef struct setattr3resok setattr3resok;
struct setattr3resfail {
- wcc_data obj_wcc;
+ wcc_data obj_wcc;
};
typedef struct setattr3resfail setattr3resfail;
struct setattr3res {
- nfsstat3 status;
- union {
- setattr3resok resok;
- setattr3resfail resfail;
- } setattr3res_u;
+ nfsstat3 status;
+ union {
+ setattr3resok resok;
+ setattr3resfail resfail;
+ } setattr3res_u;
};
typedef struct setattr3res setattr3res;
struct lookup3args {
- diropargs3 what;
+ diropargs3 what;
};
typedef struct lookup3args lookup3args;
struct lookup3resok {
- nfs_fh3 object;
- post_op_attr obj_attributes;
- post_op_attr dir_attributes;
+ nfs_fh3 object;
+ post_op_attr obj_attributes;
+ post_op_attr dir_attributes;
};
typedef struct lookup3resok lookup3resok;
struct lookup3resfail {
- post_op_attr dir_attributes;
+ post_op_attr dir_attributes;
};
typedef struct lookup3resfail lookup3resfail;
struct lookup3res {
- nfsstat3 status;
- union {
- lookup3resok resok;
- lookup3resfail resfail;
- } lookup3res_u;
+ nfsstat3 status;
+ union {
+ lookup3resok resok;
+ lookup3resfail resfail;
+ } lookup3res_u;
};
typedef struct lookup3res lookup3res;
#define ACCESS3_READ 0x0001
@@ -348,104 +342,104 @@ typedef struct lookup3res lookup3res;
#define ACCESS3_EXECUTE 0x0020
struct access3args {
- nfs_fh3 object;
- uint32 access;
+ nfs_fh3 object;
+ uint32 access;
};
typedef struct access3args access3args;
struct access3resok {
- post_op_attr obj_attributes;
- uint32 access;
+ post_op_attr obj_attributes;
+ uint32 access;
};
typedef struct access3resok access3resok;
struct access3resfail {
- post_op_attr obj_attributes;
+ post_op_attr obj_attributes;
};
typedef struct access3resfail access3resfail;
struct access3res {
- nfsstat3 status;
- union {
- access3resok resok;
- access3resfail resfail;
- } access3res_u;
+ nfsstat3 status;
+ union {
+ access3resok resok;
+ access3resfail resfail;
+ } access3res_u;
};
typedef struct access3res access3res;
struct readlink3args {
- nfs_fh3 symlink;
+ nfs_fh3 symlink;
};
typedef struct readlink3args readlink3args;
struct readlink3resok {
- post_op_attr symlink_attributes;
- nfspath3 data;
+ post_op_attr symlink_attributes;
+ nfspath3 data;
};
typedef struct readlink3resok readlink3resok;
struct readlink3resfail {
- post_op_attr symlink_attributes;
+ post_op_attr symlink_attributes;
};
typedef struct readlink3resfail readlink3resfail;
struct readlink3res {
- nfsstat3 status;
- union {
- readlink3resok resok;
- readlink3resfail resfail;
- } readlink3res_u;
+ nfsstat3 status;
+ union {
+ readlink3resok resok;
+ readlink3resfail resfail;
+ } readlink3res_u;
};
typedef struct readlink3res readlink3res;
struct read3args {
- nfs_fh3 file;
- offset3 offset;
- count3 count;
+ nfs_fh3 file;
+ offset3 offset;
+ count3 count;
};
typedef struct read3args read3args;
struct read3resok {
- post_op_attr file_attributes;
- count3 count;
- bool_t eof;
- struct {
- u_int data_len;
- char *data_val;
- } data;
+ post_op_attr file_attributes;
+ count3 count;
+ bool_t eof;
+ struct {
+ u_int data_len;
+ char *data_val;
+ } data;
};
typedef struct read3resok read3resok;
struct read3resfail {
- post_op_attr file_attributes;
+ post_op_attr file_attributes;
};
typedef struct read3resfail read3resfail;
struct read3res {
- nfsstat3 status;
- union {
- read3resok resok;
- read3resfail resfail;
- } read3res_u;
+ nfsstat3 status;
+ union {
+ read3resok resok;
+ read3resfail resfail;
+ } read3res_u;
};
typedef struct read3res read3res;
enum stable_how {
- UNSTABLE = 0,
- DATA_SYNC = 1,
- FILE_SYNC = 2,
+ UNSTABLE = 0,
+ DATA_SYNC = 1,
+ FILE_SYNC = 2,
};
typedef enum stable_how stable_how;
struct write3args {
- nfs_fh3 file;
- offset3 offset;
- count3 count;
- stable_how stable;
- struct {
- u_int data_len;
- char *data_val;
- } data;
+ nfs_fh3 file;
+ offset3 offset;
+ count3 count;
+ stable_how stable;
+ struct {
+ u_int data_len;
+ char *data_val;
+ } data;
};
typedef struct write3args write3args;
@@ -454,393 +448,395 @@ typedef struct write3args write3args;
* sizeof (nfs_fh3) rather than first trying to extract the fh size of the
* network followed by a sized-read of the file handle.
*/
-#define NFS3_WRITE3ARGS_SIZE (sizeof (uint32_t) + NFS3_FHSIZE + sizeof (offset3) + sizeof (count3) + sizeof (uint32_t))
+#define NFS3_WRITE3ARGS_SIZE \
+ (sizeof(uint32_t) + NFS3_FHSIZE + sizeof(offset3) + sizeof(count3) + \
+ sizeof(uint32_t))
struct write3resok {
- wcc_data file_wcc;
- count3 count;
- stable_how committed;
- writeverf3 verf;
+ wcc_data file_wcc;
+ count3 count;
+ stable_how committed;
+ writeverf3 verf;
};
typedef struct write3resok write3resok;
struct write3resfail {
- wcc_data file_wcc;
+ wcc_data file_wcc;
};
typedef struct write3resfail write3resfail;
struct write3res {
- nfsstat3 status;
- union {
- write3resok resok;
- write3resfail resfail;
- } write3res_u;
+ nfsstat3 status;
+ union {
+ write3resok resok;
+ write3resfail resfail;
+ } write3res_u;
};
typedef struct write3res write3res;
enum createmode3 {
- UNCHECKED = 0,
- GUARDED = 1,
- EXCLUSIVE = 2,
+ UNCHECKED = 0,
+ GUARDED = 1,
+ EXCLUSIVE = 2,
};
typedef enum createmode3 createmode3;
struct createhow3 {
- createmode3 mode;
- union {
- sattr3 obj_attributes;
- createverf3 verf;
- } createhow3_u;
+ createmode3 mode;
+ union {
+ sattr3 obj_attributes;
+ createverf3 verf;
+ } createhow3_u;
};
typedef struct createhow3 createhow3;
struct create3args {
- diropargs3 where;
- createhow3 how;
+ diropargs3 where;
+ createhow3 how;
};
typedef struct create3args create3args;
struct create3resok {
- post_op_fh3 obj;
- post_op_attr obj_attributes;
- wcc_data dir_wcc;
+ post_op_fh3 obj;
+ post_op_attr obj_attributes;
+ wcc_data dir_wcc;
};
typedef struct create3resok create3resok;
struct create3resfail {
- wcc_data dir_wcc;
+ wcc_data dir_wcc;
};
typedef struct create3resfail create3resfail;
struct create3res {
- nfsstat3 status;
- union {
- create3resok resok;
- create3resfail resfail;
- } create3res_u;
+ nfsstat3 status;
+ union {
+ create3resok resok;
+ create3resfail resfail;
+ } create3res_u;
};
typedef struct create3res create3res;
struct mkdir3args {
- diropargs3 where;
- sattr3 attributes;
+ diropargs3 where;
+ sattr3 attributes;
};
typedef struct mkdir3args mkdir3args;
struct mkdir3resok {
- post_op_fh3 obj;
- post_op_attr obj_attributes;
- wcc_data dir_wcc;
+ post_op_fh3 obj;
+ post_op_attr obj_attributes;
+ wcc_data dir_wcc;
};
typedef struct mkdir3resok mkdir3resok;
struct mkdir3resfail {
- wcc_data dir_wcc;
+ wcc_data dir_wcc;
};
typedef struct mkdir3resfail mkdir3resfail;
struct mkdir3res {
- nfsstat3 status;
- union {
- mkdir3resok resok;
- mkdir3resfail resfail;
- } mkdir3res_u;
+ nfsstat3 status;
+ union {
+ mkdir3resok resok;
+ mkdir3resfail resfail;
+ } mkdir3res_u;
};
typedef struct mkdir3res mkdir3res;
struct symlinkdata3 {
- sattr3 symlink_attributes;
- nfspath3 symlink_data;
+ sattr3 symlink_attributes;
+ nfspath3 symlink_data;
};
typedef struct symlinkdata3 symlinkdata3;
struct symlink3args {
- diropargs3 where;
- symlinkdata3 symlink;
+ diropargs3 where;
+ symlinkdata3 symlink;
};
typedef struct symlink3args symlink3args;
struct symlink3resok {
- post_op_fh3 obj;
- post_op_attr obj_attributes;
- wcc_data dir_wcc;
+ post_op_fh3 obj;
+ post_op_attr obj_attributes;
+ wcc_data dir_wcc;
};
typedef struct symlink3resok symlink3resok;
struct symlink3resfail {
- wcc_data dir_wcc;
+ wcc_data dir_wcc;
};
typedef struct symlink3resfail symlink3resfail;
struct symlink3res {
- nfsstat3 status;
- union {
- symlink3resok resok;
- symlink3resfail resfail;
- } symlink3res_u;
+ nfsstat3 status;
+ union {
+ symlink3resok resok;
+ symlink3resfail resfail;
+ } symlink3res_u;
};
typedef struct symlink3res symlink3res;
struct devicedata3 {
- sattr3 dev_attributes;
- specdata3 spec;
+ sattr3 dev_attributes;
+ specdata3 spec;
};
typedef struct devicedata3 devicedata3;
struct mknoddata3 {
- ftype3 type;
- union {
- devicedata3 device;
- sattr3 pipe_attributes;
- } mknoddata3_u;
+ ftype3 type;
+ union {
+ devicedata3 device;
+ sattr3 pipe_attributes;
+ } mknoddata3_u;
};
typedef struct mknoddata3 mknoddata3;
struct mknod3args {
- diropargs3 where;
- mknoddata3 what;
+ diropargs3 where;
+ mknoddata3 what;
};
typedef struct mknod3args mknod3args;
struct mknod3resok {
- post_op_fh3 obj;
- post_op_attr obj_attributes;
- wcc_data dir_wcc;
+ post_op_fh3 obj;
+ post_op_attr obj_attributes;
+ wcc_data dir_wcc;
};
typedef struct mknod3resok mknod3resok;
struct mknod3resfail {
- wcc_data dir_wcc;
+ wcc_data dir_wcc;
};
typedef struct mknod3resfail mknod3resfail;
struct mknod3res {
- nfsstat3 status;
- union {
- mknod3resok resok;
- mknod3resfail resfail;
- } mknod3res_u;
+ nfsstat3 status;
+ union {
+ mknod3resok resok;
+ mknod3resfail resfail;
+ } mknod3res_u;
};
typedef struct mknod3res mknod3res;
struct remove3args {
- diropargs3 object;
+ diropargs3 object;
};
typedef struct remove3args remove3args;
struct remove3resok {
- wcc_data dir_wcc;
+ wcc_data dir_wcc;
};
typedef struct remove3resok remove3resok;
struct remove3resfail {
- wcc_data dir_wcc;
+ wcc_data dir_wcc;
};
typedef struct remove3resfail remove3resfail;
struct remove3res {
- nfsstat3 status;
- union {
- remove3resok resok;
- remove3resfail resfail;
- } remove3res_u;
+ nfsstat3 status;
+ union {
+ remove3resok resok;
+ remove3resfail resfail;
+ } remove3res_u;
};
typedef struct remove3res remove3res;
struct rmdir3args {
- diropargs3 object;
+ diropargs3 object;
};
typedef struct rmdir3args rmdir3args;
struct rmdir3resok {
- wcc_data dir_wcc;
+ wcc_data dir_wcc;
};
typedef struct rmdir3resok rmdir3resok;
struct rmdir3resfail {
- wcc_data dir_wcc;
+ wcc_data dir_wcc;
};
typedef struct rmdir3resfail rmdir3resfail;
struct rmdir3res {
- nfsstat3 status;
- union {
- rmdir3resok resok;
- rmdir3resfail resfail;
- } rmdir3res_u;
+ nfsstat3 status;
+ union {
+ rmdir3resok resok;
+ rmdir3resfail resfail;
+ } rmdir3res_u;
};
typedef struct rmdir3res rmdir3res;
struct rename3args {
- diropargs3 from;
- diropargs3 to;
+ diropargs3 from;
+ diropargs3 to;
};
typedef struct rename3args rename3args;
struct rename3resok {
- wcc_data fromdir_wcc;
- wcc_data todir_wcc;
+ wcc_data fromdir_wcc;
+ wcc_data todir_wcc;
};
typedef struct rename3resok rename3resok;
struct rename3resfail {
- wcc_data fromdir_wcc;
- wcc_data todir_wcc;
+ wcc_data fromdir_wcc;
+ wcc_data todir_wcc;
};
typedef struct rename3resfail rename3resfail;
struct rename3res {
- nfsstat3 status;
- union {
- rename3resok resok;
- rename3resfail resfail;
- } rename3res_u;
+ nfsstat3 status;
+ union {
+ rename3resok resok;
+ rename3resfail resfail;
+ } rename3res_u;
};
typedef struct rename3res rename3res;
struct link3args {
- nfs_fh3 file;
- diropargs3 link;
+ nfs_fh3 file;
+ diropargs3 link;
};
typedef struct link3args link3args;
struct link3resok {
- post_op_attr file_attributes;
- wcc_data linkdir_wcc;
+ post_op_attr file_attributes;
+ wcc_data linkdir_wcc;
};
typedef struct link3resok link3resok;
struct link3resfail {
- post_op_attr file_attributes;
- wcc_data linkdir_wcc;
+ post_op_attr file_attributes;
+ wcc_data linkdir_wcc;
};
typedef struct link3resfail link3resfail;
struct link3res {
- nfsstat3 status;
- union {
- link3resok resok;
- link3resfail resfail;
- } link3res_u;
+ nfsstat3 status;
+ union {
+ link3resok resok;
+ link3resfail resfail;
+ } link3res_u;
};
typedef struct link3res link3res;
struct readdir3args {
- nfs_fh3 dir;
- cookie3 cookie;
- cookieverf3 cookieverf;
- count3 count;
+ nfs_fh3 dir;
+ cookie3 cookie;
+ cookieverf3 cookieverf;
+ count3 count;
};
typedef struct readdir3args readdir3args;
struct entry3 {
- fileid3 fileid;
- filename3 name;
- cookie3 cookie;
- struct entry3 *nextentry;
+ fileid3 fileid;
+ filename3 name;
+ cookie3 cookie;
+ struct entry3 *nextentry;
};
typedef struct entry3 entry3;
struct dirlist3 {
- entry3 *entries;
- bool_t eof;
+ entry3 *entries;
+ bool_t eof;
};
typedef struct dirlist3 dirlist3;
struct readdir3resok {
- post_op_attr dir_attributes;
- cookieverf3 cookieverf;
- dirlist3 reply;
+ post_op_attr dir_attributes;
+ cookieverf3 cookieverf;
+ dirlist3 reply;
};
typedef struct readdir3resok readdir3resok;
struct readdir3resfail {
- post_op_attr dir_attributes;
+ post_op_attr dir_attributes;
};
typedef struct readdir3resfail readdir3resfail;
struct readdir3res {
- nfsstat3 status;
- union {
- readdir3resok resok;
- readdir3resfail resfail;
- } readdir3res_u;
+ nfsstat3 status;
+ union {
+ readdir3resok resok;
+ readdir3resfail resfail;
+ } readdir3res_u;
};
typedef struct readdir3res readdir3res;
struct readdirp3args {
- nfs_fh3 dir;
- cookie3 cookie;
- cookieverf3 cookieverf;
- count3 dircount;
- count3 maxcount;
+ nfs_fh3 dir;
+ cookie3 cookie;
+ cookieverf3 cookieverf;
+ count3 dircount;
+ count3 maxcount;
};
typedef struct readdirp3args readdirp3args;
struct entryp3 {
- fileid3 fileid;
- filename3 name;
- cookie3 cookie;
- post_op_attr name_attributes;
- post_op_fh3 name_handle;
- struct entryp3 *nextentry;
+ fileid3 fileid;
+ filename3 name;
+ cookie3 cookie;
+ post_op_attr name_attributes;
+ post_op_fh3 name_handle;
+ struct entryp3 *nextentry;
};
typedef struct entryp3 entryp3;
struct dirlistp3 {
- entryp3 *entries;
- bool_t eof;
+ entryp3 *entries;
+ bool_t eof;
};
typedef struct dirlistp3 dirlistp3;
struct readdirp3resok {
- post_op_attr dir_attributes;
- cookieverf3 cookieverf;
- dirlistp3 reply;
+ post_op_attr dir_attributes;
+ cookieverf3 cookieverf;
+ dirlistp3 reply;
};
typedef struct readdirp3resok readdirp3resok;
struct readdirp3resfail {
- post_op_attr dir_attributes;
+ post_op_attr dir_attributes;
};
typedef struct readdirp3resfail readdirp3resfail;
struct readdirp3res {
- nfsstat3 status;
- union {
- readdirp3resok resok;
- readdirp3resfail resfail;
- } readdirp3res_u;
+ nfsstat3 status;
+ union {
+ readdirp3resok resok;
+ readdirp3resfail resfail;
+ } readdirp3res_u;
};
typedef struct readdirp3res readdirp3res;
struct fsstat3args {
- nfs_fh3 fsroot;
+ nfs_fh3 fsroot;
};
typedef struct fsstat3args fsstat3args;
struct fsstat3resok {
- post_op_attr obj_attributes;
- size3 tbytes;
- size3 fbytes;
- size3 abytes;
- size3 tfiles;
- size3 ffiles;
- size3 afiles;
- uint32 invarsec;
+ post_op_attr obj_attributes;
+ size3 tbytes;
+ size3 fbytes;
+ size3 abytes;
+ size3 tfiles;
+ size3 ffiles;
+ size3 afiles;
+ uint32 invarsec;
};
typedef struct fsstat3resok fsstat3resok;
struct fsstat3resfail {
- post_op_attr obj_attributes;
+ post_op_attr obj_attributes;
};
typedef struct fsstat3resfail fsstat3resfail;
struct fsstat3res {
- nfsstat3 status;
- union {
- fsstat3resok resok;
- fsstat3resfail resfail;
- } fsstat3res_u;
+ nfsstat3 status;
+ union {
+ fsstat3resok resok;
+ fsstat3resfail resfail;
+ } fsstat3res_u;
};
typedef struct fsstat3res fsstat3res;
#define FSF3_LINK 0x0001
@@ -849,93 +845,93 @@ typedef struct fsstat3res fsstat3res;
#define FSF3_CANSETTIME 0x0010
struct fsinfo3args {
- nfs_fh3 fsroot;
+ nfs_fh3 fsroot;
};
typedef struct fsinfo3args fsinfo3args;
struct fsinfo3resok {
- post_op_attr obj_attributes;
- uint32 rtmax;
- uint32 rtpref;
- uint32 rtmult;
- uint32 wtmax;
- uint32 wtpref;
- uint32 wtmult;
- uint32 dtpref;
- size3 maxfilesize;
- nfstime3 time_delta;
- uint32 properties;
+ post_op_attr obj_attributes;
+ uint32 rtmax;
+ uint32 rtpref;
+ uint32 rtmult;
+ uint32 wtmax;
+ uint32 wtpref;
+ uint32 wtmult;
+ uint32 dtpref;
+ size3 maxfilesize;
+ nfstime3 time_delta;
+ uint32 properties;
};
typedef struct fsinfo3resok fsinfo3resok;
struct fsinfo3resfail {
- post_op_attr obj_attributes;
+ post_op_attr obj_attributes;
};
typedef struct fsinfo3resfail fsinfo3resfail;
struct fsinfo3res {
- nfsstat3 status;
- union {
- fsinfo3resok resok;
- fsinfo3resfail resfail;
- } fsinfo3res_u;
+ nfsstat3 status;
+ union {
+ fsinfo3resok resok;
+ fsinfo3resfail resfail;
+ } fsinfo3res_u;
};
typedef struct fsinfo3res fsinfo3res;
struct pathconf3args {
- nfs_fh3 object;
+ nfs_fh3 object;
};
typedef struct pathconf3args pathconf3args;
struct pathconf3resok {
- post_op_attr obj_attributes;
- uint32 linkmax;
- uint32 name_max;
- bool_t no_trunc;
- bool_t chown_restricted;
- bool_t case_insensitive;
- bool_t case_preserving;
+ post_op_attr obj_attributes;
+ uint32 linkmax;
+ uint32 name_max;
+ bool_t no_trunc;
+ bool_t chown_restricted;
+ bool_t case_insensitive;
+ bool_t case_preserving;
};
typedef struct pathconf3resok pathconf3resok;
struct pathconf3resfail {
- post_op_attr obj_attributes;
+ post_op_attr obj_attributes;
};
typedef struct pathconf3resfail pathconf3resfail;
struct pathconf3res {
- nfsstat3 status;
- union {
- pathconf3resok resok;
- pathconf3resfail resfail;
- } pathconf3res_u;
+ nfsstat3 status;
+ union {
+ pathconf3resok resok;
+ pathconf3resfail resfail;
+ } pathconf3res_u;
};
typedef struct pathconf3res pathconf3res;
struct commit3args {
- nfs_fh3 file;
- offset3 offset;
- count3 count;
+ nfs_fh3 file;
+ offset3 offset;
+ count3 count;
};
typedef struct commit3args commit3args;
struct commit3resok {
- wcc_data file_wcc;
- writeverf3 verf;
+ wcc_data file_wcc;
+ writeverf3 verf;
};
typedef struct commit3resok commit3resok;
struct commit3resfail {
- wcc_data file_wcc;
+ wcc_data file_wcc;
};
typedef struct commit3resfail commit3resfail;
struct commit3res {
- nfsstat3 status;
- union {
- commit3resok resok;
- commit3resfail resfail;
- } commit3res_u;
+ nfsstat3 status;
+ union {
+ commit3resok resok;
+ commit3resfail resfail;
+ } commit3res_u;
};
typedef struct commit3res commit3res;
#define MNTPATHLEN 1024
@@ -943,8 +939,8 @@ typedef struct commit3res commit3res;
#define FHSIZE3 NFS3_FHSIZE
typedef struct {
- u_int fhandle3_len;
- char *fhandle3_val;
+ u_int fhandle3_len;
+ char *fhandle3_val;
} fhandle3;
typedef char *dirpath;
@@ -952,255 +948,404 @@ typedef char *dirpath;
typedef char *name;
enum mountstat3 {
- MNT3_OK = 0,
- MNT3ERR_PERM = 1,
- MNT3ERR_NOENT = 2,
- MNT3ERR_IO = 5,
- MNT3ERR_ACCES = 13,
- MNT3ERR_NOTDIR = 20,
- MNT3ERR_INVAL = 22,
- MNT3ERR_NAMETOOLONG = 63,
- MNT3ERR_NOTSUPP = 10004,
- MNT3ERR_SERVERFAULT = 10006,
+ MNT3_OK = 0,
+ MNT3ERR_PERM = 1,
+ MNT3ERR_NOENT = 2,
+ MNT3ERR_IO = 5,
+ MNT3ERR_ACCES = 13,
+ MNT3ERR_NOTDIR = 20,
+ MNT3ERR_INVAL = 22,
+ MNT3ERR_NAMETOOLONG = 63,
+ MNT3ERR_NOTSUPP = 10004,
+ MNT3ERR_SERVERFAULT = 10006,
};
typedef enum mountstat3 mountstat3;
struct mountres3_ok {
- fhandle3 fhandle;
- struct {
- u_int auth_flavors_len;
- int *auth_flavors_val;
- } auth_flavors;
+ fhandle3 fhandle;
+ struct {
+ u_int auth_flavors_len;
+ int *auth_flavors_val;
+ } auth_flavors;
};
typedef struct mountres3_ok mountres3_ok;
struct mountres3 {
- mountstat3 fhs_status;
- union {
- mountres3_ok mountinfo;
- } mountres3_u;
+ mountstat3 fhs_status;
+ union {
+ mountres3_ok mountinfo;
+ } mountres3_u;
};
typedef struct mountres3 mountres3;
typedef struct mountbody *mountlist;
struct mountbody {
- name ml_hostname;
- dirpath ml_directory;
- mountlist ml_next;
+ name ml_hostname;
+ dirpath ml_directory;
+ mountlist ml_next;
};
typedef struct mountbody mountbody;
typedef struct groupnode *groups;
struct groupnode {
- name gr_name;
- groups gr_next;
+ name gr_name;
+ groups gr_next;
};
typedef struct groupnode groupnode;
typedef struct exportnode *exports;
struct exportnode {
- dirpath ex_dir;
- groups ex_groups;
- exports ex_next;
+ dirpath ex_dir;
+ groups ex_groups;
+ exports ex_next;
};
typedef struct exportnode exportnode;
-#define NFS_PROGRAM 100003
-#define NFS_V3 3
-
-#define NFS3_NULL 0
-#define NFS3_GETATTR 1
-#define NFS3_SETATTR 2
-#define NFS3_LOOKUP 3
-#define NFS3_ACCESS 4
-#define NFS3_READLINK 5
-#define NFS3_READ 6
-#define NFS3_WRITE 7
-#define NFS3_CREATE 8
-#define NFS3_MKDIR 9
-#define NFS3_SYMLINK 10
-#define NFS3_MKNOD 11
-#define NFS3_REMOVE 12
-#define NFS3_RMDIR 13
-#define NFS3_RENAME 14
-#define NFS3_LINK 15
-#define NFS3_READDIR 16
-#define NFS3_READDIRP 17
-#define NFS3_FSSTAT 18
-#define NFS3_FSINFO 19
-#define NFS3_PATHCONF 20
-#define NFS3_COMMIT 21
-#define NFS3_PROC_COUNT 22
-
-#define MOUNT_PROGRAM 100005
-#define MOUNT_V3 3
-#define MOUNT_V1 1
-
-#define MOUNT3_NULL 0
-#define MOUNT3_MNT 1
-#define MOUNT3_DUMP 2
-#define MOUNT3_UMNT 3
-#define MOUNT3_UMNTALL 4
-#define MOUNT3_EXPORT 5
-#define MOUNT3_PROC_COUNT 6
-
-#define MOUNT1_NULL 0
-#define MOUNT1_DUMP 2
-#define MOUNT1_UMNT 3
-#define MOUNT1_EXPORT 5
-#define MOUNT1_PROC_COUNT 6
+#define NFS_PROGRAM 100003
+#define NFS_V3 3
+
+#define NFS3_NULL 0
+#define NFS3_GETATTR 1
+#define NFS3_SETATTR 2
+#define NFS3_LOOKUP 3
+#define NFS3_ACCESS 4
+#define NFS3_READLINK 5
+#define NFS3_READ 6
+#define NFS3_WRITE 7
+#define NFS3_CREATE 8
+#define NFS3_MKDIR 9
+#define NFS3_SYMLINK 10
+#define NFS3_MKNOD 11
+#define NFS3_REMOVE 12
+#define NFS3_RMDIR 13
+#define NFS3_RENAME 14
+#define NFS3_LINK 15
+#define NFS3_READDIR 16
+#define NFS3_READDIRP 17
+#define NFS3_FSSTAT 18
+#define NFS3_FSINFO 19
+#define NFS3_PATHCONF 20
+#define NFS3_COMMIT 21
+#define NFS3_PROC_COUNT 22
+
+#define MOUNT_PROGRAM 100005
+#define MOUNT_V3 3
+#define MOUNT_V1 1
+
+#define MOUNT3_NULL 0
+#define MOUNT3_MNT 1
+#define MOUNT3_DUMP 2
+#define MOUNT3_UMNT 3
+#define MOUNT3_UMNTALL 4
+#define MOUNT3_EXPORT 5
+#define MOUNT3_PROC_COUNT 6
+
+#define MOUNT1_NULL 0
+#define MOUNT1_MNT 1
+#define MOUNT1_DUMP 2
+#define MOUNT1_UMNT 3
+#define MOUNT1_UMNTALL 4
+#define MOUNT1_EXPORT 5
+#define MOUNT1_PROC_COUNT 6
/* the xdr functions */
-extern bool_t xdr_uint64 (XDR *, uint64*);
-extern bool_t xdr_int64 (XDR *, int64*);
-extern bool_t xdr_uint32 (XDR *, uint32*);
-extern bool_t xdr_int32 (XDR *, int32*);
-extern bool_t xdr_filename3 (XDR *, filename3*);
-extern bool_t xdr_nfspath3 (XDR *, nfspath3*);
-extern bool_t xdr_fileid3 (XDR *, fileid3*);
-extern bool_t xdr_cookie3 (XDR *, cookie3*);
-extern bool_t xdr_cookieverf3 (XDR *, cookieverf3);
-extern bool_t xdr_createverf3 (XDR *, createverf3);
-extern bool_t xdr_writeverf3 (XDR *, writeverf3);
-extern bool_t xdr_uid3 (XDR *, uid3*);
-extern bool_t xdr_gid3 (XDR *, gid3*);
-extern bool_t xdr_size3 (XDR *, size3*);
-extern bool_t xdr_offset3 (XDR *, offset3*);
-extern bool_t xdr_mode3 (XDR *, mode3*);
-extern bool_t xdr_count3 (XDR *, count3*);
-extern bool_t xdr_nfsstat3 (XDR *, nfsstat3*);
-extern bool_t xdr_ftype3 (XDR *, ftype3*);
-extern bool_t xdr_specdata3 (XDR *, specdata3*);
-extern bool_t xdr_nfs_fh3 (XDR *, nfs_fh3*);
-extern bool_t xdr_nfstime3 (XDR *, nfstime3*);
-extern bool_t xdr_fattr3 (XDR *, fattr3*);
-extern bool_t xdr_post_op_attr (XDR *, post_op_attr*);
-extern bool_t xdr_wcc_attr (XDR *, wcc_attr*);
-extern bool_t xdr_pre_op_attr (XDR *, pre_op_attr*);
-extern bool_t xdr_wcc_data (XDR *, wcc_data*);
-extern bool_t xdr_post_op_fh3 (XDR *, post_op_fh3*);
-extern bool_t xdr_time_how (XDR *, time_how*);
-extern bool_t xdr_set_mode3 (XDR *, set_mode3*);
-extern bool_t xdr_set_uid3 (XDR *, set_uid3*);
-extern bool_t xdr_set_gid3 (XDR *, set_gid3*);
-extern bool_t xdr_set_size3 (XDR *, set_size3*);
-extern bool_t xdr_set_atime (XDR *, set_atime*);
-extern bool_t xdr_set_mtime (XDR *, set_mtime*);
-extern bool_t xdr_sattr3 (XDR *, sattr3*);
-extern bool_t xdr_diropargs3 (XDR *, diropargs3*);
-extern bool_t xdr_getattr3args (XDR *, getattr3args*);
-extern bool_t xdr_getattr3resok (XDR *, getattr3resok*);
-extern bool_t xdr_getattr3res (XDR *, getattr3res*);
-extern bool_t xdr_sattrguard3 (XDR *, sattrguard3*);
-extern bool_t xdr_setattr3args (XDR *, setattr3args*);
-extern bool_t xdr_setattr3resok (XDR *, setattr3resok*);
-extern bool_t xdr_setattr3resfail (XDR *, setattr3resfail*);
-extern bool_t xdr_setattr3res (XDR *, setattr3res*);
-extern bool_t xdr_lookup3args (XDR *, lookup3args*);
-extern bool_t xdr_lookup3resok (XDR *, lookup3resok*);
-extern bool_t xdr_lookup3resfail (XDR *, lookup3resfail*);
-extern bool_t xdr_lookup3res (XDR *, lookup3res*);
-extern bool_t xdr_access3args (XDR *, access3args*);
-extern bool_t xdr_access3resok (XDR *, access3resok*);
-extern bool_t xdr_access3resfail (XDR *, access3resfail*);
-extern bool_t xdr_access3res (XDR *, access3res*);
-extern bool_t xdr_readlink3args (XDR *, readlink3args*);
-extern bool_t xdr_readlink3resok (XDR *, readlink3resok*);
-extern bool_t xdr_readlink3resfail (XDR *, readlink3resfail*);
-extern bool_t xdr_readlink3res (XDR *, readlink3res*);
-extern bool_t xdr_read3args (XDR *, read3args*);
-extern bool_t xdr_read3resok (XDR *, read3resok*);
-extern bool_t xdr_read3resfail (XDR *, read3resfail*);
-extern bool_t xdr_read3res (XDR *, read3res*);
-extern bool_t xdr_read3res_nocopy (XDR *xdrs, read3res *objp);
-extern bool_t xdr_stable_how (XDR *, stable_how*);
-extern bool_t xdr_write3args (XDR *, write3args*);
-extern bool_t xdr_write3resok (XDR *, write3resok*);
-extern bool_t xdr_write3resfail (XDR *, write3resfail*);
-extern bool_t xdr_write3res (XDR *, write3res*);
-extern bool_t xdr_createmode3 (XDR *, createmode3*);
-extern bool_t xdr_createhow3 (XDR *, createhow3*);
-extern bool_t xdr_create3args (XDR *, create3args*);
-extern bool_t xdr_create3resok (XDR *, create3resok*);
-extern bool_t xdr_create3resfail (XDR *, create3resfail*);
-extern bool_t xdr_create3res (XDR *, create3res*);
-extern bool_t xdr_mkdir3args (XDR *, mkdir3args*);
-extern bool_t xdr_mkdir3resok (XDR *, mkdir3resok*);
-extern bool_t xdr_mkdir3resfail (XDR *, mkdir3resfail*);
-extern bool_t xdr_mkdir3res (XDR *, mkdir3res*);
-extern bool_t xdr_symlinkdata3 (XDR *, symlinkdata3*);
-extern bool_t xdr_symlink3args (XDR *, symlink3args*);
-extern bool_t xdr_symlink3resok (XDR *, symlink3resok*);
-extern bool_t xdr_symlink3resfail (XDR *, symlink3resfail*);
-extern bool_t xdr_symlink3res (XDR *, symlink3res*);
-extern bool_t xdr_devicedata3 (XDR *, devicedata3*);
-extern bool_t xdr_mknoddata3 (XDR *, mknoddata3*);
-extern bool_t xdr_mknod3args (XDR *, mknod3args*);
-extern bool_t xdr_mknod3resok (XDR *, mknod3resok*);
-extern bool_t xdr_mknod3resfail (XDR *, mknod3resfail*);
-extern bool_t xdr_mknod3res (XDR *, mknod3res*);
-extern bool_t xdr_remove3args (XDR *, remove3args*);
-extern bool_t xdr_remove3resok (XDR *, remove3resok*);
-extern bool_t xdr_remove3resfail (XDR *, remove3resfail*);
-extern bool_t xdr_remove3res (XDR *, remove3res*);
-extern bool_t xdr_rmdir3args (XDR *, rmdir3args*);
-extern bool_t xdr_rmdir3resok (XDR *, rmdir3resok*);
-extern bool_t xdr_rmdir3resfail (XDR *, rmdir3resfail*);
-extern bool_t xdr_rmdir3res (XDR *, rmdir3res*);
-extern bool_t xdr_rename3args (XDR *, rename3args*);
-extern bool_t xdr_rename3resok (XDR *, rename3resok*);
-extern bool_t xdr_rename3resfail (XDR *, rename3resfail*);
-extern bool_t xdr_rename3res (XDR *, rename3res*);
-extern bool_t xdr_link3args (XDR *, link3args*);
-extern bool_t xdr_link3resok (XDR *, link3resok*);
-extern bool_t xdr_link3resfail (XDR *, link3resfail*);
-extern bool_t xdr_link3res (XDR *, link3res*);
-extern bool_t xdr_readdir3args (XDR *, readdir3args*);
-extern bool_t xdr_entry3 (XDR *, entry3*);
-extern bool_t xdr_dirlist3 (XDR *, dirlist3*);
-extern bool_t xdr_readdir3resok (XDR *, readdir3resok*);
-extern bool_t xdr_readdir3resfail (XDR *, readdir3resfail*);
-extern bool_t xdr_readdir3res (XDR *, readdir3res*);
-extern bool_t xdr_readdirp3args (XDR *, readdirp3args*);
-extern bool_t xdr_entryp3 (XDR *, entryp3*);
-extern bool_t xdr_dirlistp3 (XDR *, dirlistp3*);
-extern bool_t xdr_readdirp3resok (XDR *, readdirp3resok*);
-extern bool_t xdr_readdirp3resfail (XDR *, readdirp3resfail*);
-extern bool_t xdr_readdirp3res (XDR *, readdirp3res*);
-extern bool_t xdr_fsstat3args (XDR *, fsstat3args*);
-extern bool_t xdr_fsstat3resok (XDR *, fsstat3resok*);
-extern bool_t xdr_fsstat3resfail (XDR *, fsstat3resfail*);
-extern bool_t xdr_fsstat3res (XDR *, fsstat3res*);
-extern bool_t xdr_fsinfo3args (XDR *, fsinfo3args*);
-extern bool_t xdr_fsinfo3resok (XDR *, fsinfo3resok*);
-extern bool_t xdr_fsinfo3resfail (XDR *, fsinfo3resfail*);
-extern bool_t xdr_fsinfo3res (XDR *, fsinfo3res*);
-extern bool_t xdr_pathconf3args (XDR *, pathconf3args*);
-extern bool_t xdr_pathconf3resok (XDR *, pathconf3resok*);
-extern bool_t xdr_pathconf3resfail (XDR *, pathconf3resfail*);
-extern bool_t xdr_pathconf3res (XDR *, pathconf3res*);
-extern bool_t xdr_commit3args (XDR *, commit3args*);
-extern bool_t xdr_commit3resok (XDR *, commit3resok*);
-extern bool_t xdr_commit3resfail (XDR *, commit3resfail*);
-extern bool_t xdr_commit3res (XDR *, commit3res*);
-extern bool_t xdr_fhandle3 (XDR *, fhandle3*);
-extern bool_t xdr_dirpath (XDR *, dirpath*);
-extern bool_t xdr_name (XDR *, name*);
-extern bool_t xdr_mountstat3 (XDR *, mountstat3*);
-extern bool_t xdr_mountres3_ok (XDR *, mountres3_ok*);
-extern bool_t xdr_mountres3 (XDR *, mountres3*);
-extern bool_t xdr_mountlist (XDR *, mountlist*);
-extern bool_t xdr_mountbody (XDR *, mountbody*);
-extern bool_t xdr_groups (XDR *, groups*);
-extern bool_t xdr_groupnode (XDR *, groupnode*);
-extern bool_t xdr_exports (XDR *, exports*);
-extern bool_t xdr_exportnode (XDR *, exportnode*);
-
-extern void xdr_free_exports_list (struct exportnode *first);
-extern void xdr_free_mountlist (mountlist ml);
-
-extern void xdr_free_write3args_nocopy (write3args *wa);
+extern bool_t
+xdr_uint64(XDR *, uint64 *);
+extern bool_t
+xdr_int64(XDR *, int64 *);
+extern bool_t
+xdr_uint32(XDR *, uint32 *);
+extern bool_t
+xdr_int32(XDR *, int32 *);
+extern bool_t
+xdr_filename3(XDR *, filename3 *);
+extern bool_t
+xdr_nfspath3(XDR *, nfspath3 *);
+extern bool_t
+xdr_fileid3(XDR *, fileid3 *);
+extern bool_t
+xdr_cookie3(XDR *, cookie3 *);
+extern bool_t
+xdr_cookieverf3(XDR *, cookieverf3);
+extern bool_t
+xdr_createverf3(XDR *, createverf3);
+extern bool_t
+xdr_writeverf3(XDR *, writeverf3);
+extern bool_t
+xdr_uid3(XDR *, uid3 *);
+extern bool_t
+xdr_gid3(XDR *, gid3 *);
+extern bool_t
+xdr_size3(XDR *, size3 *);
+extern bool_t
+xdr_offset3(XDR *, offset3 *);
+extern bool_t
+xdr_mode3(XDR *, mode3 *);
+extern bool_t
+xdr_count3(XDR *, count3 *);
+extern bool_t
+xdr_nfsstat3(XDR *, nfsstat3 *);
+extern bool_t
+xdr_ftype3(XDR *, ftype3 *);
+extern bool_t
+xdr_specdata3(XDR *, specdata3 *);
+extern bool_t
+xdr_nfs_fh3(XDR *, nfs_fh3 *);
+extern bool_t
+xdr_nfstime3(XDR *, nfstime3 *);
+extern bool_t
+xdr_fattr3(XDR *, fattr3 *);
+extern bool_t
+xdr_post_op_attr(XDR *, post_op_attr *);
+extern bool_t
+xdr_wcc_attr(XDR *, wcc_attr *);
+extern bool_t
+xdr_pre_op_attr(XDR *, pre_op_attr *);
+extern bool_t
+xdr_wcc_data(XDR *, wcc_data *);
+extern bool_t
+xdr_post_op_fh3(XDR *, post_op_fh3 *);
+extern bool_t
+xdr_time_how(XDR *, time_how *);
+extern bool_t
+xdr_set_mode3(XDR *, set_mode3 *);
+extern bool_t
+xdr_set_uid3(XDR *, set_uid3 *);
+extern bool_t
+xdr_set_gid3(XDR *, set_gid3 *);
+extern bool_t
+xdr_set_size3(XDR *, set_size3 *);
+extern bool_t
+xdr_set_atime(XDR *, set_atime *);
+extern bool_t
+xdr_set_mtime(XDR *, set_mtime *);
+extern bool_t
+xdr_sattr3(XDR *, sattr3 *);
+extern bool_t
+xdr_diropargs3(XDR *, diropargs3 *);
+extern bool_t
+xdr_getattr3args(XDR *, getattr3args *);
+extern bool_t
+xdr_getattr3resok(XDR *, getattr3resok *);
+extern bool_t
+xdr_getattr3res(XDR *, getattr3res *);
+extern bool_t
+xdr_sattrguard3(XDR *, sattrguard3 *);
+extern bool_t
+xdr_setattr3args(XDR *, setattr3args *);
+extern bool_t
+xdr_setattr3resok(XDR *, setattr3resok *);
+extern bool_t
+xdr_setattr3resfail(XDR *, setattr3resfail *);
+extern bool_t
+xdr_setattr3res(XDR *, setattr3res *);
+extern bool_t
+xdr_lookup3args(XDR *, lookup3args *);
+extern bool_t
+xdr_lookup3resok(XDR *, lookup3resok *);
+extern bool_t
+xdr_lookup3resfail(XDR *, lookup3resfail *);
+extern bool_t
+xdr_lookup3res(XDR *, lookup3res *);
+extern bool_t
+xdr_access3args(XDR *, access3args *);
+extern bool_t
+xdr_access3resok(XDR *, access3resok *);
+extern bool_t
+xdr_access3resfail(XDR *, access3resfail *);
+extern bool_t
+xdr_access3res(XDR *, access3res *);
+extern bool_t
+xdr_readlink3args(XDR *, readlink3args *);
+extern bool_t
+xdr_readlink3resok(XDR *, readlink3resok *);
+extern bool_t
+xdr_readlink3resfail(XDR *, readlink3resfail *);
+extern bool_t
+xdr_readlink3res(XDR *, readlink3res *);
+extern bool_t
+xdr_read3args(XDR *, read3args *);
+extern bool_t
+xdr_read3resok(XDR *, read3resok *);
+extern bool_t
+xdr_read3resfail(XDR *, read3resfail *);
+extern bool_t
+xdr_read3res(XDR *, read3res *);
+extern bool_t
+xdr_read3res_nocopy(XDR *xdrs, read3res *objp);
+extern bool_t
+xdr_stable_how(XDR *, stable_how *);
+extern bool_t
+xdr_write3args(XDR *, write3args *);
+extern bool_t
+xdr_write3resok(XDR *, write3resok *);
+extern bool_t
+xdr_write3resfail(XDR *, write3resfail *);
+extern bool_t
+xdr_write3res(XDR *, write3res *);
+extern bool_t
+xdr_createmode3(XDR *, createmode3 *);
+extern bool_t
+xdr_createhow3(XDR *, createhow3 *);
+extern bool_t
+xdr_create3args(XDR *, create3args *);
+extern bool_t
+xdr_create3resok(XDR *, create3resok *);
+extern bool_t
+xdr_create3resfail(XDR *, create3resfail *);
+extern bool_t
+xdr_create3res(XDR *, create3res *);
+extern bool_t
+xdr_mkdir3args(XDR *, mkdir3args *);
+extern bool_t
+xdr_mkdir3resok(XDR *, mkdir3resok *);
+extern bool_t
+xdr_mkdir3resfail(XDR *, mkdir3resfail *);
+extern bool_t
+xdr_mkdir3res(XDR *, mkdir3res *);
+extern bool_t
+xdr_symlinkdata3(XDR *, symlinkdata3 *);
+extern bool_t
+xdr_symlink3args(XDR *, symlink3args *);
+extern bool_t
+xdr_symlink3resok(XDR *, symlink3resok *);
+extern bool_t
+xdr_symlink3resfail(XDR *, symlink3resfail *);
+extern bool_t
+xdr_symlink3res(XDR *, symlink3res *);
+extern bool_t
+xdr_devicedata3(XDR *, devicedata3 *);
+extern bool_t
+xdr_mknoddata3(XDR *, mknoddata3 *);
+extern bool_t
+xdr_mknod3args(XDR *, mknod3args *);
+extern bool_t
+xdr_mknod3resok(XDR *, mknod3resok *);
+extern bool_t
+xdr_mknod3resfail(XDR *, mknod3resfail *);
+extern bool_t
+xdr_mknod3res(XDR *, mknod3res *);
+extern bool_t
+xdr_remove3args(XDR *, remove3args *);
+extern bool_t
+xdr_remove3resok(XDR *, remove3resok *);
+extern bool_t
+xdr_remove3resfail(XDR *, remove3resfail *);
+extern bool_t
+xdr_remove3res(XDR *, remove3res *);
+extern bool_t
+xdr_rmdir3args(XDR *, rmdir3args *);
+extern bool_t
+xdr_rmdir3resok(XDR *, rmdir3resok *);
+extern bool_t
+xdr_rmdir3resfail(XDR *, rmdir3resfail *);
+extern bool_t
+xdr_rmdir3res(XDR *, rmdir3res *);
+extern bool_t
+xdr_rename3args(XDR *, rename3args *);
+extern bool_t
+xdr_rename3resok(XDR *, rename3resok *);
+extern bool_t
+xdr_rename3resfail(XDR *, rename3resfail *);
+extern bool_t
+xdr_rename3res(XDR *, rename3res *);
+extern bool_t
+xdr_link3args(XDR *, link3args *);
+extern bool_t
+xdr_link3resok(XDR *, link3resok *);
+extern bool_t
+xdr_link3resfail(XDR *, link3resfail *);
+extern bool_t
+xdr_link3res(XDR *, link3res *);
+extern bool_t
+xdr_readdir3args(XDR *, readdir3args *);
+extern bool_t
+xdr_entry3(XDR *, entry3 *);
+extern bool_t
+xdr_dirlist3(XDR *, dirlist3 *);
+extern bool_t
+xdr_readdir3resok(XDR *, readdir3resok *);
+extern bool_t
+xdr_readdir3resfail(XDR *, readdir3resfail *);
+extern bool_t
+xdr_readdir3res(XDR *, readdir3res *);
+extern bool_t
+xdr_readdirp3args(XDR *, readdirp3args *);
+extern bool_t
+xdr_entryp3(XDR *, entryp3 *);
+extern bool_t
+xdr_dirlistp3(XDR *, dirlistp3 *);
+extern bool_t
+xdr_readdirp3resok(XDR *, readdirp3resok *);
+extern bool_t
+xdr_readdirp3resfail(XDR *, readdirp3resfail *);
+extern bool_t
+xdr_readdirp3res(XDR *, readdirp3res *);
+extern bool_t
+xdr_fsstat3args(XDR *, fsstat3args *);
+extern bool_t
+xdr_fsstat3resok(XDR *, fsstat3resok *);
+extern bool_t
+xdr_fsstat3resfail(XDR *, fsstat3resfail *);
+extern bool_t
+xdr_fsstat3res(XDR *, fsstat3res *);
+extern bool_t
+xdr_fsinfo3args(XDR *, fsinfo3args *);
+extern bool_t
+xdr_fsinfo3resok(XDR *, fsinfo3resok *);
+extern bool_t
+xdr_fsinfo3resfail(XDR *, fsinfo3resfail *);
+extern bool_t
+xdr_fsinfo3res(XDR *, fsinfo3res *);
+extern bool_t
+xdr_pathconf3args(XDR *, pathconf3args *);
+extern bool_t
+xdr_pathconf3resok(XDR *, pathconf3resok *);
+extern bool_t
+xdr_pathconf3resfail(XDR *, pathconf3resfail *);
+extern bool_t
+xdr_pathconf3res(XDR *, pathconf3res *);
+extern bool_t
+xdr_commit3args(XDR *, commit3args *);
+extern bool_t
+xdr_commit3resok(XDR *, commit3resok *);
+extern bool_t
+xdr_commit3resfail(XDR *, commit3resfail *);
+extern bool_t
+xdr_commit3res(XDR *, commit3res *);
+extern bool_t
+xdr_fhandle3(XDR *, fhandle3 *);
+extern bool_t
+xdr_dirpath(XDR *, dirpath *);
+extern bool_t
+xdr_name(XDR *, name *);
+extern bool_t
+xdr_mountstat3(XDR *, mountstat3 *);
+extern bool_t
+xdr_mountres3_ok(XDR *, mountres3_ok *);
+extern bool_t
+xdr_mountres3(XDR *, mountres3 *);
+extern bool_t
+xdr_mountlist(XDR *, mountlist *);
+extern bool_t
+xdr_mountbody(XDR *, mountbody *);
+extern bool_t
+xdr_groups(XDR *, groups *);
+extern bool_t
+xdr_groupnode(XDR *, groupnode *);
+extern bool_t
+xdr_exports(XDR *, exports *);
+extern bool_t
+xdr_exportnode(XDR *, exportnode *);
+
+extern void
+xdr_free_exports_list(struct exportnode *first);
+extern void
+xdr_free_mountlist(mountlist ml);
+
+extern void
+xdr_free_write3args_nocopy(write3args *wa);
#endif
diff --git a/run-tests-in-vagrant.sh b/run-tests-in-vagrant.sh
new file mode 100755
index 00000000000..a3f2ac7c72d
--- /dev/null
+++ b/run-tests-in-vagrant.sh
@@ -0,0 +1,311 @@
+#!/bin/bash
+
+###############################################################################
+# TODO: Support other OSes. #
+###############################################################################
+
+ORIGIN_DIR=$PWD
+autostart="no"
+destroy_after_test="no"
+os="fedora"
+destroy_now="no"
+run_tests_args=""
+redirect=">/dev/null 2>&1"
+ssh="no"
+custom_cflags=""
+
+
+pushd () {
+ command pushd "$@" >/dev/null
+}
+
+popd () {
+ command popd "$@" >/dev/null
+}
+
+usage() {
+ echo "Usage: $0 [...]"
+ echo ''
+ echo 'The options that this script accepts are:'
+ echo ''
+ echo '-a, --autostart configure the testVM to autostart on boot'
+ echo '--destroy-now cleanup the testVM'
+ echo '--destroy-after-test cleanup once the tests finishes'
+ echo '-h, --help show this help text'
+ echo '--os=<flavor> select the OS for the testVM (fedora, centos6)'
+ echo '--ssh ssh into the testVM'
+ echo '--verbose show what commands in the testVM are executed'
+ echo ''
+}
+
+function parse_args () {
+ args=`getopt \
+ --options ah \
+ --long autostart,os:,destroy-now,destroy-after-test,verbose,ssh,help \
+ -n 'run-tests-in-vagrant.sh' \
+ -- "$@"`
+ eval set -- "$args"
+ while true; do
+ case "$1" in
+ -a|--autostart) autostart="yes"; shift ;;
+ --destroy-after-test) destroy_after_test="yes"; shift ;;
+ --destroy-now) destroy_now="yes"; shift ;;
+ -h|--help) usage ; exit 0 ;;
+ --ssh) sshvm="yes"; shift ;;
+ --os)
+ case "$2" in
+ "") shift 2 ;;
+ *) os="$2" ; shift 2 ;;
+ esac ;;
+ --verbose) redirect=""; shift ;;
+ --) shift ; break ;;
+ *) echo "Internal error!" ; exit 1;;
+ esac
+ done
+ run_tests_args="$@"
+}
+
+function force_location()
+{
+ current_dir=$(dirname $0);
+
+ if [ ! -f ${current_dir}/tests/vagrant/vagrant-template-fedora/Vagrantfile ]; then
+ echo "Aborting."
+ echo "The tests/vagrant subdirectory seems to be missing."
+ echo "Please correct the problem and try again."
+ exit 1
+ fi
+}
+
+function vagrant_check()
+{
+ vagrant -v >/dev/null 2>&1;
+
+ if [ $? -ne 0 ]; then
+ echo "Aborting"
+ echo "Vagrant not found. Please install Vagrant and try again."
+ echo "On Fedora, run "dnf install vagrant vagrant-libvirt" "
+ exit 1
+ fi
+}
+
+function ansible_check()
+{
+ ansible --version >/dev/null 2>&1 ;
+
+ if [ $? -ne 0 ]; then
+ echo "Aborting"
+ echo "Ansible not found. Please install Ansible and try again."
+ echo "On Fedora, run "dnf install ansible" "
+ exit 1
+ fi
+}
+
+function set_branchname_from_git_branch()
+{
+ BRANCHNAME=`git rev-parse --abbrev-ref HEAD`
+ if [ $? -ne 0 ]; then
+ echo "Could not get branch name from git, will exit"
+ exit 1
+ fi
+}
+
+
+function destroy_vm()
+{
+ local retval=0
+
+ echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!CAUTION!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
+ echo "This will destroy VM and delete tests/vagrant/${BRANCHNAME} dir"
+ echo
+ while true; do
+ read -p "Do you want to continue?" yn
+ case $yn in
+ [Yy]* ) break;;
+ * ) echo "Did not get an yes, exiting."; exit 1 ;;
+ esac
+ done
+ if [ -d "tests/vagrant/${BRANCHNAME}" ]; then
+ pushd "tests/vagrant/${BRANCHNAME}"
+ eval vagrant destroy $redirect
+ popd
+ rm -rf "tests/vagrant/${BRANCHNAME}"
+ else
+ echo "Could not find vagrant dir for corresponding git branch, exiting"
+ retval=1
+ fi
+
+ return ${retval}
+}
+
+
+function create_vagrant_dir()
+{
+ mkdir -p tests/vagrant/$BRANCHNAME
+ if [ -d "tests/vagrant/vagrant-template-${os}" ]; then
+ echo "Copying tests/vagrant/vagrant-template-${os} dir to tests/vagrant/${BRANCHNAME} ...."
+ cp -R tests/vagrant/vagrant-template-${os}/* tests/vagrant/$BRANCHNAME
+ else
+ echo "Could not find template files for requested os $os, exiting"
+ exit 1
+ fi
+}
+
+
+function start_vm()
+{
+ echo "Doing vagrant up...."
+ pushd "tests/vagrant/${BRANCHNAME}"
+ eval vagrant up $redirect
+ if [ $? -eq 0 ]
+ then
+ popd
+ else
+ echo "Vagrant up failed, exiting....";
+ popd
+ exit 1
+ fi
+}
+
+function set_vm_attributes()
+{
+ if [ "x$autostart" == "xyes" ] ; then
+ virsh autostart ${BRANCHNAME}_vagrant-testVM
+ fi
+}
+
+function copy_source_code()
+{
+ echo "Copying source code from host machine to VM...."
+ pushd "tests/vagrant/${BRANCHNAME}"
+ vagrant ssh-config > ssh_config
+ rsync -az -e "ssh -F ssh_config" --rsync-path="sudo rsync" "$ORIGIN_DIR/." vagrant-testVM:/home/vagrant/glusterfs
+ if [ $? -eq 0 ]
+ then
+ popd
+ else
+ echo "Copy failed, exiting...."
+ popd
+ exit 1
+ fi
+}
+
+function compile_gluster()
+{
+ echo "Source compile and install Gluster...."
+ pushd "tests/vagrant/${BRANCHNAME}"
+ vagrant ssh -c "cd /home/vagrant/glusterfs ; sudo make clean $redirect" -- -t
+ vagrant ssh -c "cd /home/vagrant/glusterfs ; sudo ./autogen.sh $redirect" -- -t
+ if [ $? -ne 0 ]
+ then
+ echo "autogen failed, exiting...."
+ popd
+ exit 1
+ fi
+
+ # GCC on fedora complains about uninitialized variables and
+ # GCC on centos6 does not under don't warn on uninitialized variables flag.
+ if [ "x$os" == "fedora" ] ; then
+ custom_cflags="CFLAGS='-g -O0 -Werror -Wall -Wno-error=cpp -Wno-error=maybe-uninitialized'"
+ else
+ custom_cflags="CFLAGS='-g -O0 -Werror -Wall'"
+ fi
+
+
+ custom_cflags=
+ vagrant ssh -c "cd /home/vagrant/glusterfs ; \
+ sudo \
+ $custom_cflags \
+ ./configure \
+ --prefix=/usr \
+ --exec-prefix=/usr \
+ --bindir=/usr/bin \
+ --sbindir=/usr/sbin \
+ --sysconfdir=/etc \
+ --datadir=/usr/share \
+ --includedir=/usr/include \
+ --libdir=/usr/lib64 \
+ --libexecdir=/usr/libexec \
+ --localstatedir=/var \
+ --sharedstatedir=/var/lib \
+ --mandir=/usr/share/man \
+ --infodir=/usr/share/info \
+ --libdir=/usr/lib64 \
+ --enable-gnfs \
+ --enable-debug $redirect" -- -t
+ if [ $? -ne 0 ]
+ then
+ echo "configure failed, exiting...."
+ popd
+ exit 1
+ fi
+ # Test for missing dependencies based on the BuildRequires in the
+ # glusterfs.spec. If anything is missing, install them (and only then, dnf
+ # cache is a large download).
+ vagrant ssh -c "cd /home/vagrant/glusterfs; ( sudo dnf -C -y builddep --spec glusterfs.spec || sudo dnf -y builddep --spec glusterfs.spec ) $redirect" -- -t
+ vagrant ssh -c "cd /home/vagrant/glusterfs; sudo make -j install $redirect" -- -t
+ if [ $? -ne 0 ]
+ then
+ echo "make failed, exiting...."
+ popd
+ exit 1
+ fi
+ popd
+}
+
+function run_tests()
+{
+ local retval=0
+
+ pushd "tests/vagrant/${BRANCHNAME}"
+ vagrant ssh -c "cd /home/vagrant/glusterfs; sudo ./run-tests.sh $run_tests_args" -- -t
+ retval=$?
+ popd
+
+ return ${retval}
+}
+
+function ssh_into_vm_using_exec()
+{
+ pushd "tests/vagrant/${BRANCHNAME}"
+ exec vagrant ssh
+ popd
+}
+
+echo
+parse_args "$@"
+
+# Check environment for dependencies
+force_location
+vagrant_check
+ansible_check
+
+# We have one vm per git branch, query git branch
+set_branchname_from_git_branch
+
+if [ "x$destroy_now" == "xyes" ] ; then
+ destroy_vm
+ exit $?
+fi
+
+if [ "x$sshvm" == "xyes" ] ; then
+ ssh_into_vm_using_exec
+fi
+
+
+create_vagrant_dir
+start_vm
+set_vm_attributes
+
+
+
+copy_source_code
+compile_gluster
+run_tests
+RET=$?
+
+if [ "x$destroy_after_test" == "xyes" ] ; then
+ destroy_vm
+fi
+
+exit ${RET}
diff --git a/run-tests.sh b/run-tests.sh
new file mode 100755
index 00000000000..e2a1655d8e0
--- /dev/null
+++ b/run-tests.sh
@@ -0,0 +1,615 @@
+#!/bin/bash
+# Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+#
+
+# As many tests are designed to take values of variables from 'env.rc',
+# it is good to source the file. While it is also required to source the
+# file individually in each tests (as it should be possible to run the
+# tests separately), exporting variables from env.rc is not harmful if
+# done here
+
+source ./tests/env.rc
+
+export TZ=UTC
+force="no"
+head="yes"
+retry="yes"
+tests=""
+exit_on_failure="yes"
+skip_bad_tests="yes"
+skip_known_bugs="yes"
+result_output="/tmp/gluster_regression.txt"
+section_separator="========================================"
+run_timeout=200
+kill_after_time=5
+nfs_tests=$RUN_NFS_TESTS
+
+# Option below preserves log tarballs for each run of a test separately
+# named: <test>-iteration-<n>.tar
+# If set to any other value, then log tarball is just named after the test and
+# overwritten in each iteration (saves space)
+# named: <test>.tar
+# Use option -p to override default behavior
+skip_preserve_logs="yes"
+
+OSTYPE=$(uname -s)
+
+# Function for use in generating filenames with increasing "-<n>" index
+# In:
+# $1 basepath: Directory where file needs to be created
+# $2 filename: Name of the file sans extension
+# $3 extension: Extension string that would be appended to the generated
+# filename
+# Out:
+# string of next available filename with appended "-<n>"
+# Example:
+# Interested routines that want to create a file name, say foo-<n>.txt at
+# location /var/log/gluster would pass in "/var/log/gluster" "foo" "txt"
+# and be returned next available foo-<n> filename to create.
+# Notes:
+# Function will not accept empty extension, and will return the same name
+# over and over (which can be fixed when there is a need for it)
+function get_next_filename()
+{
+ local basepath=$1
+ local filename=$2
+ local extension=$3
+ local next=1
+ local tfilename="${filename}-${next}"
+ while [ -e "${basepath}/${tfilename}.${extension}" ]; do
+ next=$((next+1))
+ tfilename="${filename}-${next}"
+ done
+
+ echo "$tfilename"
+}
+
+# Tar the gluster logs and generate a tarball named after the first parameter
+# passed in to the function. Ideally the test name is passed to this function
+# to generate the required tarball.
+# Tarball name is further controlled by the variable skip_preserve_logs
+function tar_logs()
+{
+ t=$1
+
+ logdir=$(gluster --print-logdir)
+ basetarname=$(basename "$t" .t)
+
+ if [ -n "$logdir" ]
+ then
+ if [[ $skip_preserve_logs == "yes" ]]; then
+ savetarname=$(get_next_filename "${logdir}" \
+ "${basetarname}-iteration" "tar" \
+ | tail -1)
+ else
+ savetarname="$basetarname"
+ fi
+
+ # Can't use --exclude here because NetBSD doesn't have it.
+ # However, both it and Linux have -X to take patterns from
+ # a file, so use that.
+ (echo '*.tar'; echo .notar) > "${logdir}"/.notar \
+ && \
+ tar -cf "${logdir}"/"${savetarname}".tar -X "${logdir}"/.notar \
+ "${logdir}"/* 2> /dev/null \
+ && \
+ find "$logdir"/* -maxdepth 0 -name '*.tar' -prune \
+ -o -exec rm -rf '{}' ';'
+
+ echo "Logs preserved in tarball $savetarname.tar"
+ else
+ echo "Logs not preserved, as logdir is not set"
+ fi
+}
+
+function check_dependencies()
+{
+ ## Check all dependencies are present
+ MISSING=""
+
+ # Check for dbench
+ env dbench --usage > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING dbench"
+ fi
+
+ # Check for git
+ env git --version > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING git"
+ fi
+
+ # Check for nfs-utils (Linux-only: built-in NetBSD with different name)
+ if [ "x`uname -s`" = "xLinux" ] ; then
+ env mount.nfs -V > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING nfs-utils"
+ fi
+ fi
+
+ # Check for netstat
+ env netstat --version > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING netstat"
+ fi
+
+ # Check for the Perl Test Harness
+ env prove --version > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING perl-Test-Harness"
+ fi
+
+ which json_verify > /dev/null
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING json_verify"
+ fi
+
+ # Check for XFS programs (Linux Only: NetBSD does without)
+ if [ "x`uname -s`" = "xLinux" ] ; then
+ env mkfs.xfs -V > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING xfsprogs"
+ fi
+ fi
+
+ # Check for attr
+ env getfattr --version > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING attr"
+ fi
+
+ # Check for pidof
+ pidof pidof > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING pidof"
+ fi
+
+ # Check for netstat
+ env netstat --version > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING netstat"
+ fi
+
+ # check for psutil python package
+ test `uname -s` == "Darwin" || test `uname -s` == "FreeBSD" && {
+ pip show psutil | grep -q psutil >/dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ MISSING="$MISSING psutil"
+ fi
+ }
+
+ ## If dependencies are missing, warn the user and abort
+ if [ "x$MISSING" != "x" ]; then
+ test "x${force}" != "xyes" && echo "Aborting."
+ echo
+ echo "The following required tools are missing:"
+ echo
+ for pkg in $MISSING; do
+ echo " * $pkg"
+ done
+ echo
+ test "x${force}" = "xyes" && return
+ echo "Please install them and try again."
+ echo
+ exit 2
+ fi
+}
+
+function check_location()
+{
+ regression_testsdir=$(dirname $0);
+
+ if [ ! -f ${regression_testsdir}/tests/include.rc ]; then
+ echo "Aborting."
+ echo
+ echo "The tests/ subdirectory seems to be missing."
+ echo
+ echo "Please correct the problem and try again."
+ echo
+ exit 1
+ fi
+}
+
+function check_user()
+{
+ # If we're not running as root, warn the user and abort
+ MYUID=`/usr/bin/id -u`
+ if [ 0${MYUID} -ne 0 ]; then
+ echo "Aborting."
+ echo
+ echo "The GlusterFS Test Framework must be run as root."
+ echo
+ echo "Please change to the root user and try again."
+ echo
+ exit 3
+ fi
+}
+
+function match()
+{
+ # Patterns considered valid:
+ # 0. Empty means everything
+ # "" matches ** i.e all
+ # 1. full or partial file/directory names
+ # basic matches tests/basic
+ # basic/afr matches tests/basic/afr
+ # 2. globs
+ # basic/* matches all files and directories in basic
+ # basic/*/ matches subdirectories in basic (afr|ec)
+ # 3. numbered bug matching
+ # 884455 matches bugs/bug-884455.t
+ # 859927 matches bugs/859927, bugs/bug-859927.t
+ # 1015990 matches /bugs/bug-1015990-rep.t, bug-1015990.t
+ # ...lots of other cases accepted as well, since globbing is tricky.
+ local t=$1
+ shift
+ local a
+ local match=1
+ if [ -z "$@" ]; then
+ match=0
+ return $match
+ fi
+ for a in $@ ; do
+ case "$t" in
+ *$a*)
+ match=0
+ ;;
+ esac
+ done
+ return $match
+}
+
+# Tests can have comment lines with some comma separated values within them.
+# Key names used to determine test status are
+# G_TESTDEF_TEST_STATUS_CENTOS6
+# G_TESTDEF_TEST_STATUS_NETBSD7
+# Some examples:
+# G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=123456
+# G_TESTDEF_TEST_STATUS_CENTOS6=BRICK_MUX_BAD_TEST,BUG=123456
+# G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=4444444
+# G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=123456;555555
+# G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TESTS,BUG=1385758
+# You can change status of test to enabled or delete the line only if all the
+# bugs are closed or modified or if the patch fixes it.
+function get_test_status ()
+{
+ local test_name=$1
+ local host_os=""
+ local result=""
+
+ host_os=$(uname -s)
+
+ case "$host_os" in
+ # Leaving out the logic to determine the particular distro and version
+ # for later. Why does the key have the distro and version then?
+ # Because changing the key in all test files would be very big process
+ # updating just this function with a better logic much simpler.
+ Linux)
+ result=$(grep -e "^#G_TESTDEF_TEST_STATUS_CENTOS6" $test_name | \
+ awk -F"," {'print $1'} | awk -F"=" {'print $2'}) ;;
+ NetBSD)
+ result=$(grep -e "^#G_TESTDEF_TEST_STATUS_NETBSD7" $test_name | \
+ awk -F"," {'print $1'} | awk -F"=" {'print $2'}) ;;
+ *)
+ result="ENABLED" ;;
+ esac
+
+ echo "$result"
+
+}
+
+function get_bug_list_for_disabled_test ()
+{
+ local test_name=$1
+ local host_os=""
+ local result=""
+
+ host_os=$(uname -s)
+
+ case "$host_os" in
+ # Leaving out the logic to determine the particular distro and version
+ # for later. Why does the key have the distro and version then?
+ # Because changing the key in all test files would be very big process
+ # updating just this function with a better logic much simpler.
+ Linux)
+ result=$(grep -e "^#G_TESTDEF_TEST_STATUS_CENTOS6" $test_name | \
+ awk -F"," {'print $2'} | awk -F"=" {'print $2'}) ;;
+ NetBSD)
+ result=$(grep -e "^#G_TESTDEF_TEST_STATUS_NETBSD7" $test_name | \
+ awk -F"," {'print $2'} | awk -F"=" {'print $2'}) ;;
+ *)
+ result="0000000" ;;
+ esac
+
+ echo "$result"
+
+}
+
+function run_tests()
+{
+ RES=0
+ FLAKY=''
+ FAILED=''
+ TESTS_NEEDED_RETRY=''
+ GENERATED_CORE=''
+ total_tests=0
+ selected_tests=0
+ skipped_bad_tests=0
+ skipped_known_issue_tests=0
+ total_run_tests=0
+
+ # key = path of .t file; value = time taken to run the .t file
+ declare -A ELAPSEDTIMEMAP
+
+ # Test if -k is supported for timeout command
+ # This is not supported on centos6, but spuported on centos7
+ # The flags is required for running the command in both flavors
+ timeout_cmd_exists="yes"
+ timeout -k 1 10 echo "testing 'timeout' command"
+ if [ $? -ne 0 ]; then
+ timeout_cmd_exists="no"
+ fi
+
+ all_tests=($(find ${regression_testsdir}/tests -name '*.t' | sort))
+ all_tests_cnt=${#all_tests[@]}
+ for t in "${all_tests[@]}" ; do
+ old_cores=$(ls /*-*.core 2> /dev/null | wc -l)
+ total_tests=$((total_tests+1))
+ if match $t "$@" ; then
+ selected_tests=$((selected_tests+1))
+ echo
+ echo $section_separator "(${total_tests} / ${all_tests_cnt})" $section_separator
+ if [[ $(get_test_status $t) =~ "BAD_TEST" ]] && \
+ [[ $skip_bad_tests == "yes" ]]
+ then
+ skipped_bad_tests=$((skipped_bad_tests+1))
+ echo "Skipping bad test file $t"
+ echo "Reason: bug(s):" $(get_bug_list_for_disabled_test $t)
+ echo $section_separator$section_separator
+ echo
+ continue
+ fi
+ if [[ $(get_test_status $t) == "KNOWN_ISSUE" ]] && \
+ [[ $skip_known_bugs == "yes" ]]
+ then
+ skipped_known_issue_tests=$((skipped_known_issue_tests+1))
+ echo "Skipping test file $t due to known issue"
+ echo "Reason: bug(s):" $(get_bug_list_for_disabled_test $t)
+ echo $section_separator$section_separator
+ echo
+ continue
+ fi
+ if [[ $(get_test_status $t) == "NFS_TEST" ]] && \
+ [[ $nfs_tests == "no" ]]
+ then
+ echo "Skipping nfs test file $t"
+ echo $section_separator$section_separator
+ echo
+ continue
+ fi
+ total_run_tests=$((total_run_tests+1))
+ echo "[$(date +%H:%M:%S)] Running tests in file $t"
+ starttime="$(date +%s)"
+
+ local cmd_timeout=$run_timeout;
+ if [ ${timeout_cmd_exists} == "yes" ]; then
+ if [ $(grep -c "SCRIPT_TIMEOUT=" ${t}) == 1 ] ; then
+ cmd_timeout=$(grep "SCRIPT_TIMEOUT=" ${t} | cut -f2 -d'=');
+ echo "Timeout set is ${cmd_timeout}, default ${run_timeout}"
+ fi
+ timeout --foreground -k ${kill_after_time} ${cmd_timeout} prove -vmfe '/bin/bash' ${t}
+ else
+ prove -vmfe '/bin/bash' ${t}
+ fi
+ TMP_RES=$?
+ ELAPSEDTIMEMAP[$t]=`expr $(date +%s) - $starttime`
+ tar_logs "$t"
+
+ # timeout always return 124 if it is actually a timeout.
+ if ((${TMP_RES} == 124)); then
+ echo "${t} timed out after ${cmd_timeout} seconds"
+ fi
+
+ if [ ${TMP_RES} -ne 0 ] && [ "x${retry}" = "xyes" ] ; then
+ echo "$t: bad status $TMP_RES"
+ echo ""
+ echo " *********************************"
+ echo " * REGRESSION FAILED *"
+ echo " * Retrying failed tests in case *"
+ echo " * we got some spurious failures *"
+ echo " *********************************"
+ echo ""
+
+ if [ ${timeout_cmd_exists} == "yes" ]; then
+ timeout --foreground -k ${kill_after_time} ${cmd_timeout} prove -vmfe '/bin/bash' ${t}
+ else
+ prove -vmfe '/bin/bash' ${t}
+ fi
+ TMP_RES=$?
+ tar_logs "$t"
+
+ if ((${TMP_RES} == 124)); then
+ echo "${t} timed out after ${cmd_timeout} seconds"
+ fi
+
+ TESTS_NEEDED_RETRY="${TESTS_NEEDED_RETRY}${t} "
+ fi
+
+
+ if [ ${TMP_RES} -ne 0 ] ; then
+ if [[ "$t" == *"tests/000-flaky/"* ]]; then
+ FLAKY="${FLAKY}${t} "
+ echo "FAILURE -> SUCCESS: Flaky test"
+ TMP_RES=0
+ else
+ RES=${TMP_RES}
+ FAILED="${FAILED}${t} "
+ fi
+ fi
+
+ new_cores=$(ls /*-*.core 2> /dev/null | wc -l)
+ if [ x"$new_cores" != x"$old_cores" ]; then
+ core_diff=$((new_cores-old_cores))
+ echo "$t: $core_diff new core files"
+ RES=1
+ GENERATED_CORE="${GENERATED_CORE}${t} "
+ fi
+ echo "End of test $t"
+ echo $section_separator$section_separator
+ echo
+ if [ $RES -ne 0 ] && [ x"$exit_on_failure" = "xyes" ] ; then
+ break;
+ fi
+ fi
+ done
+ echo
+ echo "Run complete"
+ echo $section_separator$section_separator
+ echo "Number of tests found: $total_tests"
+ echo "Number of tests selected for run based on pattern: $selected_tests"
+ echo "Number of tests skipped as they were marked bad: $skipped_bad_tests"
+ echo "Number of tests skipped because of known_issues: $skipped_known_issue_tests"
+ echo "Number of tests that were run: $total_run_tests"
+
+ echo
+ echo "Tests ordered by time taken, slowest to fastest: "
+ echo $section_separator$section_separator
+ for key in "${!ELAPSEDTIMEMAP[@]}"
+ do
+ echo "$key - ${ELAPSEDTIMEMAP["$key"]} second"
+ done | sort -rn -k3
+
+ # initialize the output file
+ echo > "${result_output}"
+
+ # Output the errors into a file
+ if [ ${RES} -ne 0 ] ; then
+ FAILED=$( echo ${FAILED} | tr ' ' '\n' | sort -u )
+ FAILED_COUNT=$( echo -n "${FAILED}" | grep -c '^' )
+ echo -e "\n$FAILED_COUNT test(s) failed \n${FAILED}" >> "${result_output}"
+ GENERATED_CORE=$( echo ${GENERATED_CORE} | tr ' ' '\n' | sort -u )
+ GENERATED_CORE_COUNT=$( echo -n "${GENERATED_CORE}" | grep -c '^' )
+ echo -e "\n$GENERATED_CORE_COUNT test(s) generated core \n${GENERATED_CORE}" >> "${result_output}"
+ cat "${result_output}"
+ fi
+ TESTS_NEEDED_RETRY=$( echo ${TESTS_NEEDED_RETRY} | tr ' ' '\n' | sort -u )
+ RETRY_COUNT=$( echo -n "${TESTS_NEEDED_RETRY}" | grep -c '^' )
+ if [ ${RETRY_COUNT} -ne 0 ] ; then
+ echo -e "\n${RETRY_COUNT} test(s) needed retry \n${TESTS_NEEDED_RETRY}" >> "${result_output}"
+ fi
+
+ FLAKY_TESTS_FAILED=$( echo ${FLAKY} | tr ' ' '\n' | sort -u )
+ RETRY_COUNT=$( echo -n "${FLAKY_TESTS_FAILED}" | grep -c '^' )
+ if [ ${RETRY_COUNT} -ne 0 ] ; then
+ echo -e "\n${RETRY_COUNT} flaky test(s) marked as success even though they failed \n${FLAKY_TESTS_FAILED}" >> "${result_output}"
+ fi
+
+ echo
+ echo "Result is $RES"
+ echo
+ return ${RES}
+}
+
+function run_head_tests()
+{
+ [ -d ${regression_testsdir}/.git ] || return 0
+
+ # The git command needs $cwd to be within the repository, but run_tests
+ # needs it to be back where we started.
+ pushd $regression_testsdir
+ git_cmd="git diff-tree --no-commit-id --name-only --diff-filter=ACMRTUXB"
+ htests=$($git_cmd -r HEAD tests | grep '.t$')
+ popd
+ [ -n "$htests" ] || return 0
+
+ # Perhaps it's not ideal that we'll end up re-running these tests, but the
+ # gains from letting them fail fast in the first round should outweigh the
+ # losses from running them again in the second. OTOH, with so many of our
+ # tests being non-deterministic, maybe it doesn't hurt to give the newest
+ # tests an extra workout.
+ run_tests "$htests"
+}
+
+function show_usage ()
+{
+ cat <<EOF
+Usage: $0 <opts> [<glob>|<bzid>]...
+
+Options:
+
+-f force
+-h skip tests altering from HEAD
+-H run only tests altering from HEAD
+-r retry failed tests
+-R do not retry failed tests
+-c dont't exit on failure
+-b don't skip bad tests
+-k don't skip known bugs
+-p don't keep logs from preceding runs
+-o OUTPUT
+-t TIMEOUT
+-n skip NFS tests
+--help
+EOF
+}
+
+usage="no"
+
+function parse_args ()
+{
+ args=`getopt -u -l help frRcbkphHno:t: "$@"`
+ if ! [ $? -eq 0 ]; then
+ show_usage
+ exit 1
+ fi
+ set -- $args
+ while [ $# -gt 0 ]; do
+ case "$1" in
+ -f) force="yes" ;;
+ -h) head="no" ;;
+ -H) head="only" ;;
+ -r) retry="yes" ;;
+ -R) retry="no" ;;
+ -c) exit_on_failure="no" ;;
+ -b) skip_bad_tests="no" ;;
+ -k) skip_known_bugs="no" ;;
+ -p) skip_preserve_logs="no" ;;
+ -o) result_output="$2"; shift;;
+ -t) run_timeout="$2"; shift;;
+ -n) nfs_tests="no";;
+ --help) usage="yes" ;;
+ --) shift; break;;
+ esac
+ shift
+ done
+ tests="$@"
+}
+
+
+echo
+echo ... GlusterFS Test Framework ...
+echo
+
+# Get user options
+parse_args "$@"
+if [ x"$usage" == x"yes" ]; then
+ show_usage
+ exit 0
+fi
+
+# Make sure we're running as the root user
+check_user
+
+# Make sure the needed programs are available
+check_dependencies
+
+# Check we're running from the right location
+check_location
+
+# Run the tests
+if [ x"$head" != x"no" ]; then
+ run_head_tests || exit 1
+fi
+if [ x"$head" != x"only" ]; then
+ run_tests "$tests" || exit 1
+fi
diff --git a/scheduler/Makefile.am b/scheduler/Makefile.am
deleted file mode 100644
index 618fa7dd8b8..00000000000
--- a/scheduler/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = alu random nufa rr switch
-
-CLEANFILES =
diff --git a/scheduler/alu/src/Makefile.am b/scheduler/alu/src/Makefile.am
deleted file mode 100644
index eb7d0db07e0..00000000000
--- a/scheduler/alu/src/Makefile.am
+++ /dev/null
@@ -1,14 +0,0 @@
-sched_LTLIBRARIES = alu.la
-scheddir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler
-
-alu_la_LDFLAGS = -module -avoidversion
-
-alu_la_SOURCES = alu.c
-alu_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = alu.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/scheduler/alu/src/alu-mem-types.h b/scheduler/alu/src/alu-mem-types.h
deleted file mode 100644
index 92702f28690..00000000000
--- a/scheduler/alu/src/alu-mem-types.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __ALU_MEM_TYPES_H__
-#define __ALU_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_alu_mem_types_ {
- gf_alu_mt_alu_threshold = gf_common_mt_end + 1,
- gf_alu_mt_alu_sched,
- gf_alu_mt_alu_limits,
- gf_alu_mt_alu_sched_struct,
- gf_alu_mt_alu_sched_node,
- gf_alu_mt_end
-};
-#endif
-
diff --git a/scheduler/alu/src/alu.c b/scheduler/alu/src/alu.c
deleted file mode 100644
index 58bef60ae9f..00000000000
--- a/scheduler/alu/src/alu.c
+++ /dev/null
@@ -1,1019 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-
-/* ALU code needs a complete re-write. This is one of the most important
- * part of GlusterFS and so needs more and more reviews and testing
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/time.h>
-#include <stdint.h>
-#include "stack.h"
-#include "alu.h"
-#include "alu-mem-types.h"
-
-#define ALU_DISK_USAGE_ENTRY_THRESHOLD_DEFAULT (1 * GF_UNIT_GB)
-#define ALU_DISK_USAGE_EXIT_THRESHOLD_DEFAULT (512 * GF_UNIT_MB)
-
-#define ALU_WRITE_USAGE_ENTRY_THRESHOLD_DEFAULT 25
-#define ALU_WRITE_USAGE_EXIT_THRESHOLD_DEFAULT 5
-
-#define ALU_READ_USAGE_ENTRY_THRESHOLD_DEFAULT 25
-#define ALU_READ_USAGE_EXIT_THRESHOLD_DEFAULT 5
-
-#define ALU_OPEN_FILES_USAGE_ENTRY_THRESHOLD_DEFAULT 1000
-#define ALU_OPEN_FILES_USAGE_EXIT_THRESHOLD_DEFAULT 100
-
-#define ALU_LIMITS_TOTAL_DISK_SIZE_DEFAULT 100
-
-#define ALU_REFRESH_INTERVAL_DEFAULT 5
-#define ALU_REFRESH_CREATE_COUNT_DEFAULT 5
-
-
-static int64_t
-get_stats_disk_usage (struct xlator_stats *this)
-{
- return this->disk_usage;
-}
-
-static int64_t
-get_stats_write_usage (struct xlator_stats *this)
-{
- return this->write_usage;
-}
-
-static int64_t
-get_stats_read_usage (struct xlator_stats *this)
-{
- return this->read_usage;
-}
-
-static int64_t
-get_stats_disk_speed (struct xlator_stats *this)
-{
- return this->disk_speed;
-}
-
-static int64_t
-get_stats_file_usage (struct xlator_stats *this)
-{
- /* Avoid warning "defined but not used" */
- (void) &get_stats_file_usage;
-
- return this->nr_files;
-}
-
-static int64_t
-get_stats_free_disk (struct xlator_stats *this)
-{
- if (this->total_disk_size > 0)
- return (this->free_disk * 100) / this->total_disk_size;
- return 0;
-}
-
-static int64_t
-get_max_diff_write_usage (struct xlator_stats *max, struct xlator_stats *min)
-{
- return (max->write_usage - min->write_usage);
-}
-
-static int64_t
-get_max_diff_read_usage (struct xlator_stats *max, struct xlator_stats *min)
-{
- return (max->read_usage - min->read_usage);
-}
-
-static int64_t
-get_max_diff_disk_usage (struct xlator_stats *max, struct xlator_stats *min)
-{
- return (max->disk_usage - min->disk_usage);
-}
-
-static int64_t
-get_max_diff_disk_speed (struct xlator_stats *max, struct xlator_stats *min)
-{
- return (max->disk_speed - min->disk_speed);
-}
-
-static int64_t
-get_max_diff_file_usage (struct xlator_stats *max, struct xlator_stats *min)
-{
- return (max->nr_files - min->nr_files);
-}
-
-
-int
-alu_parse_options (xlator_t *xl, struct alu_sched *alu_sched)
-{
- data_t *order = dict_get (xl->options, "scheduler.alu.order");
- if (!order) {
- gf_log (xl->name, GF_LOG_ERROR,
- "option 'scheduler.alu.order' not specified");
- return -1;
- }
- struct alu_threshold *_threshold_fn;
- struct alu_threshold *tmp_threshold;
- data_t *entry_fn = NULL;
- data_t *exit_fn = NULL;
- char *tmp_str = NULL;
- char *order_str = strtok_r (order->data, ":", &tmp_str);
- /* Get the scheduling priority order, specified by the user. */
- while (order_str) {
- gf_log ("alu", GF_LOG_DEBUG,
- "alu_init: order string: %s",
- order_str);
- if (strcmp (order_str, "disk-usage") == 0) {
- /* Disk usage */
- _threshold_fn =
- GF_CALLOC (1,
- sizeof (struct alu_threshold),
- gf_alu_mt_alu_threshold);
- ERR_ABORT (_threshold_fn);
- _threshold_fn->diff_value = get_max_diff_disk_usage;
- _threshold_fn->sched_value = get_stats_disk_usage;
- entry_fn =
- dict_get (xl->options,
- "scheduler.alu.disk-usage.entry-threshold");
- if (entry_fn) {
- if (gf_string2bytesize (entry_fn->data,
- &alu_sched->entry_limit.disk_usage) != 0) {
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "disk-usage.entry-threshold\"",
- entry_fn->data);
- return -1;
- }
- } else {
- alu_sched->entry_limit.disk_usage = ALU_DISK_USAGE_ENTRY_THRESHOLD_DEFAULT;
- }
- _threshold_fn->entry_value = get_stats_disk_usage;
- exit_fn = dict_get (xl->options,
- "scheduler.alu.disk-usage.exit-threshold");
- if (exit_fn) {
- if (gf_string2bytesize (exit_fn->data, &alu_sched->exit_limit.disk_usage) != 0) {
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "disk-usage.exit-threshold\"",
- exit_fn->data);
- return -1;
- }
- } else {
- alu_sched->exit_limit.disk_usage = ALU_DISK_USAGE_EXIT_THRESHOLD_DEFAULT;
- }
- _threshold_fn->exit_value = get_stats_disk_usage;
- tmp_threshold = alu_sched->threshold_fn;
- if (!tmp_threshold) {
- alu_sched->threshold_fn = _threshold_fn;
- } else {
- while (tmp_threshold->next) {
- tmp_threshold = tmp_threshold->next;
- }
- tmp_threshold->next = _threshold_fn;
- }
- gf_log ("alu",
- GF_LOG_DEBUG, "alu_init: = %"PRId64",%"PRId64"",
- alu_sched->entry_limit.disk_usage,
- alu_sched->exit_limit.disk_usage);
-
- } else if (strcmp (order_str, "write-usage") == 0) {
- /* Handle "write-usage" */
-
- _threshold_fn = GF_CALLOC (1, sizeof (struct alu_threshold), gf_alu_mt_alu_threshold);
- ERR_ABORT (_threshold_fn);
- _threshold_fn->diff_value = get_max_diff_write_usage;
- _threshold_fn->sched_value = get_stats_write_usage;
- entry_fn = dict_get (xl->options,
- "scheduler.alu.write-usage.entry-threshold");
- if (entry_fn) {
- if (gf_string2bytesize (entry_fn->data,
- &alu_sched->entry_limit.write_usage) != 0) {
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of option scheduler.alu."
- "write-usage.entry-threshold",
- entry_fn->data);
- return -1;
- }
- } else {
- alu_sched->entry_limit.write_usage = ALU_WRITE_USAGE_ENTRY_THRESHOLD_DEFAULT;
- }
- _threshold_fn->entry_value = get_stats_write_usage;
- exit_fn = dict_get (xl->options,
- "scheduler.alu.write-usage.exit-threshold");
- if (exit_fn) {
- if (gf_string2bytesize (exit_fn->data,
- &alu_sched->exit_limit.write_usage) != 0) {
- gf_log (xl->name, GF_LOG_ERROR,
- "invalid number format \"%s\""
- " of \"option scheduler.alu."
- "write-usage.exit-threshold\"",
- exit_fn->data);
- return -1;
- }
- } else {
- alu_sched->exit_limit.write_usage = ALU_WRITE_USAGE_EXIT_THRESHOLD_DEFAULT;
- }
- _threshold_fn->exit_value = get_stats_write_usage;
- tmp_threshold = alu_sched->threshold_fn;
- if (!tmp_threshold) {
- alu_sched->threshold_fn = _threshold_fn;
- } else {
- while (tmp_threshold->next) {
- tmp_threshold = tmp_threshold->next;
- }
- tmp_threshold->next = _threshold_fn;
- }
- gf_log (xl->name, GF_LOG_DEBUG,
- "alu_init: = %"PRId64",%"PRId64"",
- alu_sched->entry_limit.write_usage,
- alu_sched->exit_limit.write_usage);
-
- } else if (strcmp (order_str, "read-usage") == 0) {
- /* Read usage */
-
- _threshold_fn = GF_CALLOC (1, sizeof (struct alu_threshold), gf_alu_mt_alu_threshold);
- ERR_ABORT (_threshold_fn);
- _threshold_fn->diff_value = get_max_diff_read_usage;
- _threshold_fn->sched_value = get_stats_read_usage;
- entry_fn = dict_get (xl->options,
- "scheduler.alu.read-usage.entry-threshold");
- if (entry_fn) {
- if (gf_string2bytesize (entry_fn->data,
- &alu_sched->entry_limit.read_usage) != 0) {
- gf_log (xl->name,
- GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "read-usage.entry-threshold\"",
- entry_fn->data);
- return -1;
- }
- } else {
- alu_sched->entry_limit.read_usage = ALU_READ_USAGE_ENTRY_THRESHOLD_DEFAULT;
- }
- _threshold_fn->entry_value = get_stats_read_usage;
- exit_fn = dict_get (xl->options,
- "scheduler.alu.read-usage.exit-threshold");
- if (exit_fn)
- {
- if (gf_string2bytesize (exit_fn->data,
- &alu_sched->exit_limit.read_usage) != 0)
- {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "read-usage.exit-threshold\"",
- exit_fn->data);
- return -1;
- }
- }
- else
- {
- alu_sched->exit_limit.read_usage = ALU_READ_USAGE_EXIT_THRESHOLD_DEFAULT;
- }
- _threshold_fn->exit_value = get_stats_read_usage;
- tmp_threshold = alu_sched->threshold_fn;
- if (!tmp_threshold) {
- alu_sched->threshold_fn = _threshold_fn;
- }
- else {
- while (tmp_threshold->next) {
- tmp_threshold = tmp_threshold->next;
- }
- tmp_threshold->next = _threshold_fn;
- }
- gf_log ("alu", GF_LOG_DEBUG,
- "alu_init: = %"PRId64",%"PRId64"",
- alu_sched->entry_limit.read_usage,
- alu_sched->exit_limit.read_usage);
-
- } else if (strcmp (order_str, "open-files-usage") == 0) {
- /* Open files counter */
-
- _threshold_fn = GF_CALLOC (1, sizeof (struct alu_threshold), gf_alu_mt_alu_threshold);
- ERR_ABORT (_threshold_fn);
- _threshold_fn->diff_value = get_max_diff_file_usage;
- _threshold_fn->sched_value = get_stats_file_usage;
- entry_fn = dict_get (xl->options,
- "scheduler.alu.open-files-usage.entry-threshold");
- if (entry_fn) {
- if (gf_string2uint64 (entry_fn->data,
- &alu_sched->entry_limit.nr_files) != 0)
- {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "open-files-usage.entry-"
- "threshold\"", entry_fn->data);
- return -1;
- }
- }
- else
- {
- alu_sched->entry_limit.nr_files = ALU_OPEN_FILES_USAGE_ENTRY_THRESHOLD_DEFAULT;
- }
- _threshold_fn->entry_value = get_stats_file_usage;
- exit_fn = dict_get (xl->options,
- "scheduler.alu.open-files-usage.exit-threshold");
- if (exit_fn)
- {
- if (gf_string2uint64 (exit_fn->data,
- &alu_sched->exit_limit.nr_files) != 0)
- {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.alu."
- "open-files-usage.exit-"
- "threshold\"", exit_fn->data);
- return -1;
- }
- }
- else
- {
- alu_sched->exit_limit.nr_files = ALU_OPEN_FILES_USAGE_EXIT_THRESHOLD_DEFAULT;
- }
- _threshold_fn->exit_value = get_stats_file_usage;
- tmp_threshold = alu_sched->threshold_fn;
- if (!tmp_threshold) {
- alu_sched->threshold_fn = _threshold_fn;
- }
- else {
- while (tmp_threshold->next) {
- tmp_threshold = tmp_threshold->next;
- }
- tmp_threshold->next = _threshold_fn;
- }
- gf_log ("alu", GF_LOG_DEBUG,
- "alu.c->alu_init: = %"PRIu64",%"PRIu64"",
- alu_sched->entry_limit.nr_files,
- alu_sched->exit_limit.nr_files);
-
- } else if (strcmp (order_str, "disk-speed-usage") == 0) {
- /* Disk speed */
-
- _threshold_fn = GF_CALLOC (1, sizeof (struct alu_threshold), gf_alu_mt_alu_threshold);
- ERR_ABORT (_threshold_fn);
- _threshold_fn->diff_value = get_max_diff_disk_speed;
- _threshold_fn->sched_value = get_stats_disk_speed;
- entry_fn = dict_get (xl->options,
- "scheduler.alu.disk-speed-usage.entry-threshold");
- if (entry_fn) {
- gf_log ("alu", GF_LOG_DEBUG,
- "entry-threshold is given, "
- "value is constant");
- }
- _threshold_fn->entry_value = NULL;
- exit_fn = dict_get (xl->options,
- "scheduler.alu.disk-speed-usage.exit-threshold");
- if (exit_fn) {
- gf_log ("alu", GF_LOG_DEBUG,
- "exit-threshold is given, "
- "value is constant");
- }
- _threshold_fn->exit_value = NULL;
- tmp_threshold = alu_sched->threshold_fn;
- if (!tmp_threshold) {
- alu_sched->threshold_fn = _threshold_fn;
- }
- else {
- while (tmp_threshold->next) {
- tmp_threshold = tmp_threshold->next;
- }
- tmp_threshold->next = _threshold_fn;
- }
-
- } else {
- gf_log ("alu", GF_LOG_DEBUG,
- "%s, unknown option provided to scheduler",
- order_str);
- }
- order_str = strtok_r (NULL, ":", &tmp_str);
- }
-
- return 0;
-}
-
-static int32_t
-alu_init (xlator_t *xl)
-{
- struct alu_sched *alu_sched = NULL;
- struct alu_limits *_limit_fn = NULL;
- struct alu_limits *tmp_limits = NULL;
- uint32_t min_free_disk = 0;
- data_t *limits = NULL;
-
- alu_sched = GF_CALLOC (1, sizeof (struct alu_sched),
- gf_alu_mt_alu_sched);
- ERR_ABORT (alu_sched);
-
- {
- alu_parse_options (xl, alu_sched);
- }
-
- /* Get the limits */
-
- limits = dict_get (xl->options,
- "scheduler.limits.min-free-disk");
- if (limits) {
- _limit_fn = GF_CALLOC (1, sizeof (struct alu_limits),
- gf_alu_mt_alu_limits);
- ERR_ABORT (_limit_fn);
- _limit_fn->min_value = get_stats_free_disk;
- _limit_fn->cur_value = get_stats_free_disk;
- tmp_limits = alu_sched->limits_fn ;
- _limit_fn->next = tmp_limits;
- alu_sched->limits_fn = _limit_fn;
-
- if (gf_string2percent (limits->data,
- &min_free_disk) != 0) {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" "
- "of \"option scheduler.limits."
- "min-free-disk\"", limits->data);
- return -1;
- }
- alu_sched->spec_limit.free_disk = min_free_disk;
-
- if (alu_sched->spec_limit.free_disk >= 100) {
- gf_log ("alu", GF_LOG_ERROR,
- "check the \"option scheduler."
- "limits.min-free-disk\", it should "
- "be percentage value");
- return -1;
- }
- alu_sched->spec_limit.total_disk_size = ALU_LIMITS_TOTAL_DISK_SIZE_DEFAULT; /* Its in % */
- gf_log ("alu", GF_LOG_DEBUG,
- "alu.limit.min-disk-free = %"PRId64"",
- _limit_fn->cur_value (&(alu_sched->spec_limit)));
- }
-
- limits = dict_get (xl->options,
- "scheduler.limits.max-open-files");
- if (limits) {
- // Update alu_sched->priority properly
- _limit_fn = GF_CALLOC (1, sizeof (struct alu_limits),
- gf_alu_mt_alu_limits);
- ERR_ABORT (_limit_fn);
- _limit_fn->max_value = get_stats_file_usage;
- _limit_fn->cur_value = get_stats_file_usage;
- tmp_limits = alu_sched->limits_fn ;
- _limit_fn->next = tmp_limits;
- alu_sched->limits_fn = _limit_fn;
- if (gf_string2uint64_base10 (limits->data,
- &alu_sched->spec_limit.nr_files) != 0)
- {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format '%s' of option "
- "scheduler.limits.max-open-files",
- limits->data);
- return -1;
- }
-
- gf_log ("alu", GF_LOG_DEBUG,
- "alu_init: limit.max-open-files = %"PRId64"",
- _limit_fn->cur_value (&(alu_sched->spec_limit)));
- }
-
-
- /* Stats refresh options */
- limits = dict_get (xl->options,
- "scheduler.refresh-interval");
- if (limits) {
- if (gf_string2time (limits->data,
- &alu_sched->refresh_interval) != 0) {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "option scheduler.refresh-interval",
- limits->data);
- return -1;
- }
- } else {
- alu_sched->refresh_interval = ALU_REFRESH_INTERVAL_DEFAULT;
- }
- gettimeofday (&(alu_sched->last_stat_fetch), NULL);
-
-
- limits = dict_get (xl->options,
- "scheduler.alu.stat-refresh.num-file-create");
- if (limits) {
- if (gf_string2uint32 (limits->data,
- &alu_sched->refresh_create_count) != 0)
- {
- gf_log ("alu", GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "alu.stat-refresh.num-file-create\"",
- limits->data);
- return -1;
- }
- } else {
- alu_sched->refresh_create_count = ALU_REFRESH_CREATE_COUNT_DEFAULT;
- }
-
- {
- /* Build an array of child_nodes */
- struct alu_sched_struct *sched_array = NULL;
- xlator_list_t *trav_xl = xl->children;
- data_t *data = NULL;
- int32_t index = 0;
-
- while (trav_xl) {
- index++;
- trav_xl = trav_xl->next;
- }
- alu_sched->child_count = index;
- sched_array = GF_CALLOC (index, sizeof (struct alu_sched_struct), gf_alu_mt_alu_sched_struct);
- ERR_ABORT (sched_array);
- trav_xl = xl->children;
- index = 0;
- while (trav_xl) {
- sched_array[index].xl = trav_xl->xlator;
- sched_array[index].eligible = 1;
- index++;
- trav_xl = trav_xl->next;
- }
- alu_sched->array = sched_array;
-
- data = dict_get (xl->options,
- "scheduler.read-only-subvolumes");
- if (data) {
- char *child = NULL;
- char *tmp = NULL;
- char *childs_data = gf_strdup (data->data);
-
- child = strtok_r (childs_data, ",", &tmp);
- while (child) {
- for (index = 1; index < alu_sched->child_count; index++) {
- if (strcmp (alu_sched->array[index -1].xl->name, child) == 0) {
- memcpy (&(alu_sched->array[index -1]),
- &(alu_sched->array[alu_sched->child_count -1]),
- sizeof (struct alu_sched_struct));
- alu_sched->child_count--;
- break;
- }
- }
- child = strtok_r (NULL, ",", &tmp);
- }
- }
- }
-
- *((long *)xl->private) = (long)alu_sched;
-
- /* Initialize all the alu_sched structure's elements */
- {
- alu_sched->sched_nodes_pending = 0;
-
- alu_sched->min_limit.free_disk = 0x00FFFFFF;
- alu_sched->min_limit.disk_usage = 0xFFFFFFFF;
- alu_sched->min_limit.total_disk_size = 0xFFFFFFFF;
- alu_sched->min_limit.disk_speed = 0xFFFFFFFF;
- alu_sched->min_limit.write_usage = 0xFFFFFFFF;
- alu_sched->min_limit.read_usage = 0xFFFFFFFF;
- alu_sched->min_limit.nr_files = 0xFFFFFFFF;
- alu_sched->min_limit.nr_clients = 0xFFFFFFFF;
- }
-
- pthread_mutex_init (&alu_sched->alu_mutex, NULL);
- return 0;
-}
-
-static void
-alu_fini (xlator_t *xl)
-{
- if (!xl)
- return;
- struct alu_sched *alu_sched = (struct alu_sched *)*((long *)xl->private);
- struct alu_limits *limit = alu_sched->limits_fn;
- struct alu_threshold *threshold = alu_sched->threshold_fn;
- void *tmp = NULL;
- pthread_mutex_destroy (&alu_sched->alu_mutex);
- GF_FREE (alu_sched->array);
- while (limit) {
- tmp = limit;
- limit = limit->next;
- GF_FREE (tmp);
- }
- while (threshold) {
- tmp = threshold;
- threshold = threshold->next;
- GF_FREE (tmp);
- }
- GF_FREE (alu_sched);
-}
-
-static int32_t
-update_stat_array_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *xl,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *trav_stats)
-{
- struct alu_sched *alu_sched = (struct alu_sched *)*((long *)xl->private);
- struct alu_limits *limits_fn = alu_sched->limits_fn;
- int32_t idx = 0;
-
- pthread_mutex_lock (&alu_sched->alu_mutex);
- for (idx = 0; idx < alu_sched->child_count; idx++) {
- if (alu_sched->array[idx].xl == (xlator_t *)cookie)
- break;
- }
- pthread_mutex_unlock (&alu_sched->alu_mutex);
-
- if (op_ret == -1) {
- alu_sched->array[idx].eligible = 0;
- } else {
- memcpy (&(alu_sched->array[idx].stats), trav_stats, sizeof (struct xlator_stats));
-
- /* Get stats from all the child node */
- /* Here check the limits specified by the user to
- consider the nodes to be used by scheduler */
- alu_sched->array[idx].eligible = 1;
- limits_fn = alu_sched->limits_fn;
- while (limits_fn){
- if (limits_fn->max_value &&
- (limits_fn->cur_value (trav_stats) >
- limits_fn->max_value (&(alu_sched->spec_limit)))) {
- alu_sched->array[idx].eligible = 0;
- }
- if (limits_fn->min_value &&
- (limits_fn->cur_value (trav_stats) <
- limits_fn->min_value (&(alu_sched->spec_limit)))) {
- alu_sched->array[idx].eligible = 0;
- }
- limits_fn = limits_fn->next;
- }
-
- /* Select minimum and maximum disk_usage */
- if (trav_stats->disk_usage > alu_sched->max_limit.disk_usage) {
- alu_sched->max_limit.disk_usage = trav_stats->disk_usage;
- }
- if (trav_stats->disk_usage < alu_sched->min_limit.disk_usage) {
- alu_sched->min_limit.disk_usage = trav_stats->disk_usage;
- }
-
- /* Select minimum and maximum disk_speed */
- if (trav_stats->disk_speed > alu_sched->max_limit.disk_speed) {
- alu_sched->max_limit.disk_speed = trav_stats->disk_speed;
- }
- if (trav_stats->disk_speed < alu_sched->min_limit.disk_speed) {
- alu_sched->min_limit.disk_speed = trav_stats->disk_speed;
- }
-
- /* Select minimum and maximum number of open files */
- if (trav_stats->nr_files > alu_sched->max_limit.nr_files) {
- alu_sched->max_limit.nr_files = trav_stats->nr_files;
- }
- if (trav_stats->nr_files < alu_sched->min_limit.nr_files) {
- alu_sched->min_limit.nr_files = trav_stats->nr_files;
- }
-
- /* Select minimum and maximum write-usage */
- if (trav_stats->write_usage > alu_sched->max_limit.write_usage) {
- alu_sched->max_limit.write_usage = trav_stats->write_usage;
- }
- if (trav_stats->write_usage < alu_sched->min_limit.write_usage) {
- alu_sched->min_limit.write_usage = trav_stats->write_usage;
- }
-
- /* Select minimum and maximum read-usage */
- if (trav_stats->read_usage > alu_sched->max_limit.read_usage) {
- alu_sched->max_limit.read_usage = trav_stats->read_usage;
- }
- if (trav_stats->read_usage < alu_sched->min_limit.read_usage) {
- alu_sched->min_limit.read_usage = trav_stats->read_usage;
- }
-
- /* Select minimum and maximum free-disk */
- if (trav_stats->free_disk > alu_sched->max_limit.free_disk) {
- alu_sched->max_limit.free_disk = trav_stats->free_disk;
- }
- if (trav_stats->free_disk < alu_sched->min_limit.free_disk) {
- alu_sched->min_limit.free_disk = trav_stats->free_disk;
- }
- }
-
- STACK_DESTROY (frame->root);
-
- return 0;
-}
-
-static void
-update_stat_array (xlator_t *xl)
-{
- /* This function schedules the file in one of the child nodes */
- struct alu_sched *alu_sched = (struct alu_sched *)*((long *)xl->private);
- int32_t idx = 0;
- call_frame_t *frame = NULL;
- call_pool_t *pool = xl->ctx->pool;
-
- for (idx = 0 ; idx < alu_sched->child_count; idx++) {
- frame = create_frame (xl, pool);
-
- STACK_WIND_COOKIE (frame,
- update_stat_array_cbk,
- alu_sched->array[idx].xl, //cookie
- alu_sched->array[idx].xl,
- (alu_sched->array[idx].xl)->mops->stats,
- 0); //flag
- }
- return;
-}
-
-static void
-alu_update (xlator_t *xl)
-{
- struct timeval tv;
- struct alu_sched *alu_sched = (struct alu_sched *)*((long *)xl->private);
-
- gettimeofday (&tv, NULL);
- if (tv.tv_sec > (alu_sched->refresh_interval + alu_sched->last_stat_fetch.tv_sec)) {
- /* Update the stats from all the server */
- update_stat_array (xl);
- alu_sched->last_stat_fetch.tv_sec = tv.tv_sec;
- }
-}
-
-static xlator_t *
-alu_scheduler (xlator_t *xl, const void *path)
-{
- /* This function schedules the file in one of the child nodes */
- struct alu_sched *alu_sched = (struct alu_sched *)*((long *)xl->private);
- int32_t sched_index = 0;
- int32_t sched_index_orig = 0;
- int32_t idx = 0;
-
- alu_update (xl);
-
- /* Now check each threshold one by one if some nodes are classified */
- {
- struct alu_threshold *trav_threshold = alu_sched->threshold_fn;
- struct alu_threshold *tmp_threshold = alu_sched->sched_method;
- struct alu_sched_node *tmp_sched_node;
-
- /* This pointer 'trav_threshold' contains function pointers according to spec file
- give by user, */
- while (trav_threshold) {
- /* This check is needed for seeing if already there are nodes in this criteria
- to be scheduled */
- if (!alu_sched->sched_nodes_pending) {
- for (idx = 0; idx < alu_sched->child_count; idx++) {
- if (!alu_sched->array[idx].eligible) {
- continue;
- }
- if (trav_threshold->entry_value) {
- if (trav_threshold->diff_value (&(alu_sched->max_limit),
- &(alu_sched->array[idx].stats)) <
- trav_threshold->entry_value (&(alu_sched->entry_limit))) {
- continue;
- }
- }
- tmp_sched_node = GF_CALLOC (1, sizeof (struct alu_sched_node), gf_alu_mt_alu_sched_node);
- ERR_ABORT (tmp_sched_node);
- tmp_sched_node->index = idx;
- if (!alu_sched->sched_node) {
- alu_sched->sched_node = tmp_sched_node;
- } else {
- pthread_mutex_lock (&alu_sched->alu_mutex);
- tmp_sched_node->next = alu_sched->sched_node;
- alu_sched->sched_node = tmp_sched_node;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- }
- alu_sched->sched_nodes_pending++;
- }
- } /* end of if (sched_nodes_pending) */
-
- /* This loop is required to check the eligible nodes */
- struct alu_sched_node *trav_sched_node;
- while (alu_sched->sched_nodes_pending) {
- trav_sched_node = alu_sched->sched_node;
- sched_index = trav_sched_node->index;
- if (alu_sched->array[sched_index].eligible)
- break;
- alu_sched->sched_node = trav_sched_node->next;
- GF_FREE (trav_sched_node);
- alu_sched->sched_nodes_pending--;
- }
- if (alu_sched->sched_nodes_pending) {
- /* There are some node in this criteria to be scheduled, no need
- * to sort and check other methods
- */
- if (tmp_threshold && tmp_threshold->exit_value) {
- /* verify the exit value && whether node is eligible or not */
- if (tmp_threshold->diff_value (&(alu_sched->max_limit),
- &(alu_sched->array[sched_index].stats)) >
- tmp_threshold->exit_value (&(alu_sched->exit_limit))) {
- /* Free the allocated info for the node :) */
- pthread_mutex_lock (&alu_sched->alu_mutex);
- alu_sched->sched_node = trav_sched_node->next;
- GF_FREE (trav_sched_node);
- trav_sched_node = alu_sched->sched_node;
- alu_sched->sched_nodes_pending--;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- }
- } else {
- /* if there is no exit value, then exit after scheduling once */
- pthread_mutex_lock (&alu_sched->alu_mutex);
- alu_sched->sched_node = trav_sched_node->next;
- GF_FREE (trav_sched_node);
- trav_sched_node = alu_sched->sched_node;
- alu_sched->sched_nodes_pending--;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- }
-
- alu_sched->sched_method = tmp_threshold; /* this is the method used for selecting */
-
- /* */
- if (trav_sched_node) {
- tmp_sched_node = trav_sched_node;
- while (trav_sched_node->next) {
- trav_sched_node = trav_sched_node->next;
- }
- if (tmp_sched_node->next) {
- pthread_mutex_lock (&alu_sched->alu_mutex);
- alu_sched->sched_node = tmp_sched_node->next;
- tmp_sched_node->next = NULL;
- trav_sched_node->next = tmp_sched_node;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- }
- }
- /* return the scheduled node */
- return alu_sched->array[sched_index].xl;
- } /* end of if (pending_nodes) */
-
- tmp_threshold = trav_threshold;
- trav_threshold = trav_threshold->next;
- }
- }
-
- /* This is used only when there is everything seems ok, or no eligible nodes */
- sched_index_orig = alu_sched->sched_index;
- alu_sched->sched_method = NULL;
- while (1) {
- //lock
- pthread_mutex_lock (&alu_sched->alu_mutex);
- sched_index = alu_sched->sched_index++;
- alu_sched->sched_index = alu_sched->sched_index % alu_sched->child_count;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- //unlock
- if (alu_sched->array[sched_index].eligible)
- break;
- if (sched_index_orig == (sched_index + 1) % alu_sched->child_count) {
- gf_log ("alu", GF_LOG_WARNING, "No node is eligible to schedule");
- //lock
- pthread_mutex_lock (&alu_sched->alu_mutex);
- alu_sched->sched_index++;
- alu_sched->sched_index = alu_sched->sched_index % alu_sched->child_count;
- pthread_mutex_unlock (&alu_sched->alu_mutex);
- //unlock
- break;
- }
- }
- return alu_sched->array[sched_index].xl;
-}
-
-/**
- * notify
- */
-void
-alu_notify (xlator_t *xl, int32_t event, void *data)
-{
- struct alu_sched *alu_sched = NULL;
- int32_t idx = 0;
-
- alu_sched = (struct alu_sched *)*((long *)xl->private);
- if (!alu_sched)
- return;
-
- for (idx = 0; idx < alu_sched->child_count; idx++) {
- if (alu_sched->array[idx].xl == (xlator_t *)data)
- break;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- //alu_sched->array[idx].eligible = 1;
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- alu_sched->array[idx].eligible = 0;
- }
- break;
- default:
- {
- ;
- }
- break;
- }
-
-}
-
-int32_t
-alu_mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_alu_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-struct sched_ops sched = {
- .init = alu_init,
- .fini = alu_fini,
- .update = alu_update,
- .schedule = alu_scheduler,
- .notify = alu_notify,
- .mem_acct_init = alu_mem_acct_init,
-};
-
-struct volume_options options[] = {
- { .key = { "scheduler.alu.order", "alu.order" },
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = { "scheduler.alu.disk-usage.entry-threshold",
- "alu.disk-usage.entry-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.disk-usage.exit-threshold",
- "alu.disk-usage.exit-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.write-usage.entry-threshold",
- "alu.write-usage.entry-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.write-usage.exit-threshold",
- "alu.write-usage.exit-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.read-usage.entry-threshold",
- "alu.read-usage.entry-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.read-usage.exit-threshold",
- "alu.read-usage.exit-threshold" },
- .type = GF_OPTION_TYPE_SIZET
- },
- { .key = { "scheduler.alu.open-files-usage.entry-threshold",
- "alu.open-files-usage.entry-threshold" },
- .type = GF_OPTION_TYPE_INT
- },
- { .key = { "scheduler.alu.open-files-usage.exit-threshold",
- "alu.open-files-usage.exit-threshold" },
- .type = GF_OPTION_TYPE_INT
- },
- { .key = { "scheduler.read-only-subvolumes",
- "alu.read-only-subvolumes" },
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = { "scheduler.refresh-interval",
- "alu.refresh-interval",
- "alu.stat-refresh.interval" },
- .type = GF_OPTION_TYPE_TIME
- },
- { .key = { "scheduler.limits.min-free-disk",
- "alu.limits.min-free-disk" },
- .type = GF_OPTION_TYPE_PERCENT
- },
- { .key = { "scheduler.alu.stat-refresh.num-file-create"
- "alu.stat-refresh.num-file-create"},
- .type = GF_OPTION_TYPE_INT
- },
- { .key = {NULL}, }
-};
diff --git a/scheduler/alu/src/alu.h b/scheduler/alu/src/alu.h
deleted file mode 100644
index c716ad8e5e7..00000000000
--- a/scheduler/alu/src/alu.h
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _ALU_H
-#define _ALU_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "scheduler.h"
-
-struct alu_sched;
-
-struct alu_sched_struct {
- xlator_t *xl;
- struct xlator_stats stats;
- unsigned char eligible;
-};
-
-// Write better name for these functions
-struct alu_limits {
- struct alu_limits *next;
- int64_t (*max_value) (struct xlator_stats *); /* Max limit, specified by the user */
- int64_t (*min_value) (struct xlator_stats *); /* Min limit, specified by the user */
- int64_t (*cur_value) (struct xlator_stats *); /* Current values of variables got from stats call */
-};
-
-struct alu_threshold {
- struct alu_threshold *next;
- int64_t (*diff_value) (struct xlator_stats *max, struct xlator_stats *min); /* Diff b/w max and min */
- int64_t (*entry_value) (struct xlator_stats *); /* Limit specified user */
- int64_t (*exit_value) (struct xlator_stats *); /* Exit point for the limit */
- int64_t (*sched_value) (struct xlator_stats *); /* This will return the index of the child area */
-};
-
-struct alu_sched_node {
- struct alu_sched_node *next;
- int32_t index;
-};
-
-struct alu_sched {
- struct alu_limits *limits_fn;
- struct alu_threshold *threshold_fn;
- struct alu_sched_struct *array;
- struct alu_sched_node *sched_node;
- struct alu_threshold *sched_method;
- struct xlator_stats max_limit;
- struct xlator_stats min_limit;
- struct xlator_stats entry_limit;
- struct xlator_stats exit_limit;
- struct xlator_stats spec_limit; /* User given limit */
-
- pthread_mutex_t alu_mutex;
- struct timeval last_stat_fetch;
- uint32_t refresh_interval; /* in seconds */
- uint32_t refresh_create_count; /* num-file-create */
-
- int32_t sched_nodes_pending;
-
- int32_t sched_index; /* used for round robin scheduling in case of many nodes getting the criteria match. */
- int32_t child_count;
-
-};
-
-struct _alu_local_t {
- int32_t call_count;
-};
-
-typedef struct _alu_local_t alu_local_t;
-
-#endif /* _ALU_H */
diff --git a/scheduler/nufa/src/Makefile.am b/scheduler/nufa/src/Makefile.am
deleted file mode 100644
index 6eb3d39f1e2..00000000000
--- a/scheduler/nufa/src/Makefile.am
+++ /dev/null
@@ -1,12 +0,0 @@
-sched_LTLIBRARIES = nufa.la
-scheddir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler
-
-nufa_la_LDFLAGS = -module -avoidversion
-
-nufa_la_SOURCES = nufa.c
-nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/scheduler/nufa/src/nufa-mem-types.h b/scheduler/nufa/src/nufa-mem-types.h
deleted file mode 100644
index 945dafa7c31..00000000000
--- a/scheduler/nufa/src/nufa-mem-types.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __NUFA_MEM_TYPES_H__
-#define __NUFA_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_locks_mem_types_ {
- gf_nufa_mt_nufa_struct = gf_common_mt_end + 1,
- gf_nufa_mt_nufa_sched_struct,
- gf_nufa_mt_int32_t,
- gf_nufa_mt_end
-};
-#endif
-
diff --git a/scheduler/nufa/src/nufa.c b/scheduler/nufa/src/nufa.c
deleted file mode 100644
index 1bf477bdc2c..00000000000
--- a/scheduler/nufa/src/nufa.c
+++ /dev/null
@@ -1,429 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/time.h>
-
-#include "scheduler.h"
-#include "common-utils.h"
-#include "nufa-mem-types.h"
-
-struct nufa_sched_struct {
- xlator_t *xl;
- struct timeval last_stat_fetch;
- int64_t free_disk;
- int32_t refresh_interval;
- unsigned char eligible;
-};
-
-struct nufa_struct {
- struct nufa_sched_struct *array;
- struct timeval last_stat_fetch;
-
- int32_t *local_array; /* Used to keep the index of the local xlators */
- int32_t local_xl_index; /* index in the above array */
- int32_t local_xl_count; /* Count of the local subvolumes */
-
- uint32_t refresh_interval;
- uint32_t min_free_disk;
-
- gf_lock_t nufa_lock;
- int32_t child_count;
- int32_t sched_index;
-};
-
-#define NUFA_LIMITS_MIN_FREE_DISK_DEFAULT 15
-#define NUFA_REFRESH_INTERVAL_DEFAULT 30
-
-static int32_t
-nufa_init (xlator_t *xl)
-{
- int32_t index = 0;
- data_t *local_name = NULL;
- data_t *data = NULL;
- xlator_list_t *trav_xl = xl->children;
- struct nufa_struct *nufa_buf = NULL;
-
- nufa_buf = GF_CALLOC (1, sizeof (struct nufa_struct),
- gf_nufa_mt_nufa_struct);
- ERR_ABORT (nufa_buf);
-
- data = dict_get (xl->options, "scheduler.limits.min-free-disk");
- if (data) {
- if (gf_string2percent (data->data,
- &nufa_buf->min_free_disk) != 0) {
- gf_log ("nufa", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "\"option scheduler.limits.min-free-disk\"",
- data->data);
- return -1;
- }
- if (nufa_buf->min_free_disk >= 100) {
- gf_log ("nufa", GF_LOG_ERROR,
- "check \"option scheduler.limits.min-free-disk"
- "\", it should be percentage value");
- return -1;
- }
- } else {
- gf_log ("nufa", GF_LOG_WARNING,
- "No option for limit min-free-disk given, "
- "defaulting it to 15%%");
- nufa_buf->min_free_disk = NUFA_LIMITS_MIN_FREE_DISK_DEFAULT;
- }
- data = dict_get (xl->options, "scheduler.refresh-interval");
- if (data != NULL) {
- if (gf_string2time (data->data,
- &nufa_buf->refresh_interval) != 0) {
- gf_log ("nufa", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "\"option scheduler.refresh-interval\"",
- data->data);
- return -1;
- }
- } else {
- gf_log ("nufa", GF_LOG_WARNING,
- "No option for scheduler.refresh-interval given, "
- "defaulting it to 30");
- nufa_buf->refresh_interval = NUFA_REFRESH_INTERVAL_DEFAULT;
- }
-
- /* Get the array built */
- while (trav_xl) {
- index++;
- trav_xl = trav_xl->next;
- }
- nufa_buf->child_count = index;
- nufa_buf->sched_index = 0;
- nufa_buf->array = GF_CALLOC (index, sizeof (struct nufa_sched_struct),
- gf_nufa_mt_nufa_sched_struct);
- ERR_ABORT (nufa_buf->array);
- nufa_buf->local_array = GF_CALLOC (index, sizeof (int32_t),
- gf_nufa_mt_int32_t);
- ERR_ABORT (nufa_buf->array);
- trav_xl = xl->children;
-
- local_name = dict_get (xl->options, "scheduler.local-volume-name");
- if (!local_name) {
- /* Error */
- gf_log ("nufa", GF_LOG_ERROR,
- "No 'local-volume-name' option given in volume file");
- GF_FREE (nufa_buf->array);
- GF_FREE (nufa_buf->local_array);
- GF_FREE (nufa_buf);
- return -1;
- }
-
- /* Get the array properly */
- index = 0;
- trav_xl = xl->children;
- while (trav_xl) {
- nufa_buf->array[index].xl = trav_xl->xlator;
- nufa_buf->array[index].eligible = 1;
- nufa_buf->array[index].free_disk = nufa_buf->min_free_disk;
- nufa_buf->array[index].refresh_interval =
- nufa_buf->refresh_interval;
-
- trav_xl = trav_xl->next;
- index++;
- }
-
- {
- int32_t array_index = 0;
- char *child = NULL;
- char *tmp = NULL;
- char *childs_data = gf_strdup (local_name->data);
-
- child = strtok_r (childs_data, ",", &tmp);
- while (child) {
- /* Check if the local_volume specified is proper
- subvolume of unify */
- trav_xl = xl->children;
- index=0;
- while (trav_xl) {
- if (strcmp (child, trav_xl->xlator->name) == 0)
- break;
- trav_xl = trav_xl->next;
- index++;
- }
-
- if (!trav_xl) {
- /* entry for 'local-volume-name' is wrong,
- not present in subvolumes */
- gf_log ("nufa", GF_LOG_ERROR,
- "option 'scheduler.local-volume-name' "
- "%s is wrong", child);
- GF_FREE (nufa_buf->array);
- GF_FREE (nufa_buf->local_array);
- GF_FREE (nufa_buf);
- return -1;
- } else {
- nufa_buf->local_array[array_index++] = index;
- nufa_buf->local_xl_count++;
- }
- child = strtok_r (NULL, ",", &tmp);
- }
- GF_FREE (childs_data);
- }
-
- LOCK_INIT (&nufa_buf->nufa_lock);
- *((long *)xl->private) = (long)nufa_buf; // put it at the proper place
- return 0;
-}
-
-static void
-nufa_fini (xlator_t *xl)
-{
- struct nufa_struct *nufa_buf =
- (struct nufa_struct *)*((long *)xl->private);
-
- LOCK_DESTROY (&nufa_buf->nufa_lock);
- GF_FREE (nufa_buf->local_array);
- GF_FREE (nufa_buf->array);
- GF_FREE (nufa_buf);
-}
-
-static int32_t
-update_stat_array_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *xl,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *trav_stats)
-{
- struct nufa_struct *nufa_struct = NULL;
- int32_t idx = 0;
- int32_t percent = 0;
-
- nufa_struct = (struct nufa_struct *)*((long *)xl->private);
- LOCK (&nufa_struct->nufa_lock);
- for (idx = 0; idx < nufa_struct->child_count; idx++) {
- if (nufa_struct->array[idx].xl->name == (char *)cookie)
- break;
- }
- UNLOCK (&nufa_struct->nufa_lock);
-
- if (op_ret == 0) {
- percent = ((trav_stats->free_disk * 100) /
- trav_stats->total_disk_size);
- if (nufa_struct->array[idx].free_disk > percent) {
- if (nufa_struct->array[idx].eligible)
- gf_log ("nufa", GF_LOG_CRITICAL,
- "node \"%s\" is _almost_ (%d %%) full",
- nufa_struct->array[idx].xl->name,
- 100 - percent);
- nufa_struct->array[idx].eligible = 0;
- } else {
- nufa_struct->array[idx].eligible = 1;
- }
- } else {
- nufa_struct->array[idx].eligible = 0;
- }
-
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-static void
-update_stat_array (xlator_t *xl)
-{
- /* This function schedules the file in one of the child nodes */
- int32_t idx;
- struct nufa_struct *nufa_buf =
- (struct nufa_struct *)*((long *)xl->private);
- call_frame_t *frame = NULL;
- call_pool_t *pool = xl->ctx->pool;
-
- for (idx = 0; idx < nufa_buf->child_count; idx++) {
- frame = create_frame (xl, pool);
-
- STACK_WIND_COOKIE (frame,
- update_stat_array_cbk,
- nufa_buf->array[idx].xl->name,
- nufa_buf->array[idx].xl,
- (nufa_buf->array[idx].xl)->mops->stats,
- 0); //flag
- }
-
- return;
-}
-
-static void
-nufa_update (xlator_t *xl)
-{
- struct nufa_struct *nufa_buf =
- (struct nufa_struct *)*((long *)xl->private);
- struct timeval tv;
- gettimeofday (&tv, NULL);
- if (tv.tv_sec > (nufa_buf->refresh_interval
- + nufa_buf->last_stat_fetch.tv_sec)) {
- /* Update the stats from all the server */
- update_stat_array (xl);
- nufa_buf->last_stat_fetch.tv_sec = tv.tv_sec;
- }
-}
-
-static xlator_t *
-nufa_schedule (xlator_t *xl, const void *path)
-{
- struct nufa_struct *nufa_buf =
- (struct nufa_struct *)*((long *)xl->private);
- int32_t nufa_orig = nufa_buf->local_xl_index;
- int32_t rr;
-
- nufa_update (xl);
-
- while (1) {
- LOCK (&nufa_buf->nufa_lock);
- rr = nufa_buf->local_xl_index++;
- nufa_buf->local_xl_index %= nufa_buf->local_xl_count;
- UNLOCK (&nufa_buf->nufa_lock);
-
- /* if 'eligible' or there are _no_ eligible nodes */
- if (nufa_buf->array[nufa_buf->local_array[rr]].eligible) {
- /* Return the local node */
- return nufa_buf->array[nufa_buf->local_array[rr]].xl;
- }
- if ((rr + 1) % nufa_buf->local_xl_count == nufa_orig) {
- gf_log ("nufa", GF_LOG_CRITICAL,
- "No free space available on any local "
- "volumes, using RR scheduler");
- LOCK (&nufa_buf->nufa_lock);
- nufa_buf->local_xl_index++;
- nufa_buf->local_xl_index %= nufa_buf->local_xl_count;
- UNLOCK (&nufa_buf->nufa_lock);
- break;
- }
- }
-
- nufa_orig = nufa_buf->sched_index;
- while (1) {
- LOCK (&nufa_buf->nufa_lock);
- rr = nufa_buf->sched_index++;
- nufa_buf->sched_index = (nufa_buf->sched_index %
- nufa_buf->child_count);
- UNLOCK (&nufa_buf->nufa_lock);
-
- /* if 'eligible' or there are _no_ eligible nodes */
- if (nufa_buf->array[rr].eligible) {
- break;
- }
- if ((rr + 1) % nufa_buf->child_count == nufa_orig) {
- gf_log ("nufa", GF_LOG_CRITICAL,
- "No free space available on any server, "
- "using RR scheduler.");
- LOCK (&nufa_buf->nufa_lock);
- nufa_buf->sched_index++;
- nufa_buf->sched_index = (nufa_buf->sched_index %
- nufa_buf->child_count);
- UNLOCK (&nufa_buf->nufa_lock);
- break;
- }
- }
- return nufa_buf->array[rr].xl;
-}
-
-
-/**
- * notify
- */
-void
-nufa_notify (xlator_t *xl, int32_t event, void *data)
-{
- struct nufa_struct *nufa_buf =
- (struct nufa_struct *)*((long *)xl->private);
- int32_t idx = 0;
-
- if (!nufa_buf)
- return;
-
- for (idx = 0; idx < nufa_buf->child_count; idx++) {
- if (strcmp (nufa_buf->array[idx].xl->name,
- ((xlator_t *)data)->name) == 0)
- break;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- //nufa_buf->array[idx].eligible = 1;
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- nufa_buf->array[idx].eligible = 0;
- }
- break;
- default:
- {
- ;
- }
- break;
- }
-
-}
-
-int32_t
-nufa_mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_nufa_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-struct sched_ops sched = {
- .init = nufa_init,
- .fini = nufa_fini,
- .update = nufa_update,
- .schedule = nufa_schedule,
- .notify = nufa_notify,
- .mem_acct_init = nufa_mem_acct_init,
-};
-
-struct volume_options options[] = {
- { .key = { "scheduler.refresh-interval",
- "nufa.refresh-interval" },
- .type = GF_OPTION_TYPE_TIME
- },
- { .key = { "scheduler.limits.min-free-disk",
- "nufa.limits.min-free-disk" },
- .type = GF_OPTION_TYPE_PERCENT
- },
- { .key = { "scheduler.local-volume-name",
- "nufa.local-volume-name" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {NULL} }
-};
-
diff --git a/scheduler/random/src/Makefile.am b/scheduler/random/src/Makefile.am
deleted file mode 100644
index 572181336c2..00000000000
--- a/scheduler/random/src/Makefile.am
+++ /dev/null
@@ -1,14 +0,0 @@
-sched_LTLIBRARIES = random.la
-scheddir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler
-
-random_la_LDFLAGS = -module -avoidversion
-
-random_la_SOURCES = random.c
-random_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = random.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/scheduler/random/src/random-mem-types.h b/scheduler/random/src/random-mem-types.h
deleted file mode 100644
index ff30d9244fc..00000000000
--- a/scheduler/random/src/random-mem-types.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __RANDOM_MEM_TYPES_H__
-#define __RANDOM_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_random_mem_types_ {
- gf_random_mt_random_struct = gf_common_mt_end + 1,
- gf_random_mt_random_sched_struct,
- gf_random_mt_end
-};
-#endif
-
diff --git a/scheduler/random/src/random.c b/scheduler/random/src/random.c
deleted file mode 100644
index 03a284b66bd..00000000000
--- a/scheduler/random/src/random.c
+++ /dev/null
@@ -1,305 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <stdlib.h>
-#include <sys/time.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "random.h"
-#include "random-mem-types.h"
-
-#define RANDOM_LIMITS_MIN_FREE_DISK_DEFAULT 15
-#define RANDOM_REFRESH_INTERVAL_DEFAULT 10
-
-
-static int32_t
-random_init (xlator_t *xl)
-{
- struct random_struct *random_buf = NULL;
- xlator_list_t *trav_xl = xl->children;
- data_t *limit = NULL;
- int32_t index = 0;
-
- random_buf = GF_CALLOC (1, sizeof (struct random_struct),
- gf_random_mt_random_struct);
- ERR_ABORT (random_buf);
-
- /* Set the seed for the 'random' function */
- srandom ((uint32_t) time (NULL));
-
- limit = dict_get (xl->options, "scheduler.limits.min-free-disk");
- if (limit) {
- if (gf_string2percent (data_to_str (limit),
- &random_buf->min_free_disk) != 0) {
- gf_log ("random", GF_LOG_ERROR,
- "invalid number format \"%s\" of \"option "
- "scheduler.limits.min-free-disk\"",
- limit->data);
- return -1;
- }
- if (random_buf->min_free_disk >= 100) {
- gf_log ("random", GF_LOG_ERROR,
- "check the \"option random.limits.min-free"
- "-disk\", it should be percentage value");
- return -1;
- }
-
- } else {
- gf_log ("random", GF_LOG_WARNING,
- "No option for limit min-free-disk given, "
- "defaulting it to 5%%");
- random_buf->min_free_disk =
- RANDOM_LIMITS_MIN_FREE_DISK_DEFAULT;
- }
-
- limit = dict_get (xl->options, "scheduler.refresh-interval");
- if (limit) {
- if (gf_string2time (data_to_str (limit),
- &random_buf->refresh_interval) != 0) {
- gf_log ("random", GF_LOG_ERROR,
- "invalid number format \"%s\" of "
- "\"option random.refresh-interval\"",
- limit->data);
- return -1;
- }
- } else {
- random_buf->refresh_interval = RANDOM_REFRESH_INTERVAL_DEFAULT;
- }
-
- while (trav_xl) {
- index++;
- trav_xl = trav_xl->next;
- }
- random_buf->child_count = index;
- random_buf->array = GF_CALLOC (index,
- sizeof (struct random_sched_struct),
- gf_random_mt_random_sched_struct);
- ERR_ABORT (random_buf->array);
- trav_xl = xl->children;
- index = 0;
-
- while (trav_xl) {
- random_buf->array[index].xl = trav_xl->xlator;
- random_buf->array[index].eligible = 1;
- trav_xl = trav_xl->next;
- index++;
- }
- pthread_mutex_init (&random_buf->random_mutex, NULL);
-
- // put it at the proper place
- *((long *)xl->private) = (long)random_buf;
- return 0;
-}
-
-static void
-random_fini (xlator_t *xl)
-{
- struct random_struct *random_buf = NULL;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
- pthread_mutex_destroy (&random_buf->random_mutex);
- GF_FREE (random_buf->array);
- GF_FREE (random_buf);
-}
-
-
-static int32_t
-update_stat_array_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *xl,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *trav_stats)
-{
- int32_t idx = 0;
- int32_t percent = 0;
- struct random_struct *random_buf = NULL;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
-
- pthread_mutex_lock (&random_buf->random_mutex);
- for (idx = 0; idx < random_buf->child_count; idx++) {
- if (strcmp (random_buf->array[idx].xl->name,
- (char *)cookie) == 0)
- break;
- }
- pthread_mutex_unlock (&random_buf->random_mutex);
-
- if (op_ret == 0) {
- percent = ((trav_stats->free_disk *100) /
- trav_stats->total_disk_size);
- if (random_buf->min_free_disk > percent) {
- random_buf->array[idx].eligible = 0;
- } else {
- random_buf->array[idx].eligible = 1;
- }
- } else {
- random_buf->array[idx].eligible = 0;
- }
-
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-static void
-update_stat_array (xlator_t *xl)
-{
- int32_t idx;
- struct random_struct *random_buf = NULL;
- call_frame_t *frame = NULL;
- call_pool_t *pool = xl->ctx->pool;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
- for (idx = 0; idx < random_buf->child_count; idx++) {
- frame = create_frame (xl, pool);
-
- STACK_WIND_COOKIE (frame,
- update_stat_array_cbk,
- random_buf->array[idx].xl->name,
- random_buf->array[idx].xl,
- (random_buf->array[idx].xl)->mops->stats,
- 0);
- }
- return ;
-}
-
-static void
-random_update (xlator_t *xl)
-{
- struct timeval tv;
- struct random_struct *random_buf = NULL;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
-
- gettimeofday(&tv, NULL);
- if (tv.tv_sec > (random_buf->refresh_interval +
- random_buf->last_stat_entry.tv_sec)) {
- update_stat_array (xl);
- random_buf->last_stat_entry.tv_sec = tv.tv_sec;
- }
-}
-
-static xlator_t *
-random_schedule (xlator_t *xl, const void *path)
-{
- struct random_struct *random_buf = NULL;
- int32_t rand = 0;
- int32_t try = 0;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
-
- rand = (int32_t) (1.0*random_buf->child_count *
- (random() / (RAND_MAX + 1.0)));
-
- random_update (xl);
-
- while (!random_buf->array[rand].eligible) {
- if (try++ > 100) {
- /* there is a chance of this becoming a
- infinite loop otherwise. */
- break;
- }
- rand = (int32_t) (1.0*random_buf->child_count *
- (random() / (RAND_MAX + 1.0)));
- }
- return random_buf->array[rand].xl;
-}
-
-int32_t
-random_mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_random_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- " failed");
- return ret;
- }
-
- return ret;
-}
-
-/**
- * notify
- */
-void
-random_notify (xlator_t *xl, int32_t event, void *data)
-{
- struct random_struct *random_buf = NULL;
- int32_t idx = 0;
-
- random_buf = (struct random_struct *)*((long *)xl->private);
- if (!random_buf)
- return;
-
- for (idx = 0; idx < random_buf->child_count; idx++) {
- if (random_buf->array[idx].xl == (xlator_t *)data)
- break;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- //random_buf->array[idx].eligible = 1;
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- random_buf->array[idx].eligible = 0;
- }
- break;
- default:
- {
- ;
- }
- break;
- }
-
-}
-
-struct sched_ops sched = {
- .init = random_init,
- .fini = random_fini,
- .update = random_update,
- .schedule = random_schedule,
- .notify = random_notify,
- .mem_acct_init = random_mem_acct_init,
-};
-
-struct volume_options options[] = {
- { .key = { "scheduler.refresh-interval",
- "random.refresh-interval" },
- .type = GF_OPTION_TYPE_TIME
- },
- { .key = { "scheduler.limits.min-free-disk",
- "random.limits.min-free-disk" },
- .type = GF_OPTION_TYPE_PERCENT
- },
- { .key = {NULL} }
-};
diff --git a/scheduler/random/src/random.h b/scheduler/random/src/random.h
deleted file mode 100644
index 154b1bdfb24..00000000000
--- a/scheduler/random/src/random.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _RANDOM_H
-#define _RANDOM_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include <sys/time.h>
-#include "scheduler.h"
-
-struct random_sched_struct {
- xlator_t *xl;
- unsigned char eligible;
-};
-
-struct random_struct {
- int32_t child_count;
- uint32_t refresh_interval;
- uint32_t min_free_disk;
- struct timeval last_stat_entry;
- struct random_sched_struct *array;
- pthread_mutex_t random_mutex;
-};
-
-#endif /* _RANDOM_H */
diff --git a/scheduler/rr/Makefile.am b/scheduler/rr/Makefile.am
deleted file mode 100644
index d471a3f9243..00000000000
--- a/scheduler/rr/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/scheduler/rr/src/Makefile.am b/scheduler/rr/src/Makefile.am
deleted file mode 100644
index 7e911c0eda8..00000000000
--- a/scheduler/rr/src/Makefile.am
+++ /dev/null
@@ -1,13 +0,0 @@
-sched_LTLIBRARIES = rr.la
-scheddir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler
-
-rr_la_LDFLAGS = -module -avoidversion
-
-rr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-rr_la_SOURCES = rr.c rr-options.c
-noinst_HEADERS = rr.h rr-options.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/scheduler/rr/src/rr-mem-types.h b/scheduler/rr/src/rr-mem-types.h
deleted file mode 100644
index cb05323cd77..00000000000
--- a/scheduler/rr/src/rr-mem-types.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __RR_MEM_TYPES_H__
-#define __RR_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_rr_mem_types_ {
- gf_rr_mt_rr_t = gf_common_mt_end + 1,
- gf_rr_mt_rr_subvolume_t,
- gf_rr_mt_end
-};
-#endif
-
diff --git a/scheduler/rr/src/rr-options.c b/scheduler/rr/src/rr-options.c
deleted file mode 100644
index 505b1713e33..00000000000
--- a/scheduler/rr/src/rr-options.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "scheduler.h"
-#include "rr-options.h"
-
-#define RR_LIMITS_MIN_FREE_DISK_OPTION_STRING "scheduler.limits.min-free-disk"
-#define RR_LIMITS_MIN_FREE_DISK_VALUE_DEFAULT 15
-#define RR_LIMITS_MIN_FREE_DISK_VALUE_MIN 0
-#define RR_LIMITS_MIN_FREE_DISK_VALUE_MAX 100
-
-#define RR_REFRESH_INTERVAL_OPTION_STRING "scheduler.refresh-interval"
-#define RR_REFRESH_INTERVAL_VALUE_DEFAULT 10
-
-#define RR_READ_ONLY_SUBVOLUMES_OPTION_STRING "scheduler.read-only-subvolumes"
-
-#define LOG_ERROR(args...) gf_log ("rr-options", GF_LOG_ERROR, ##args)
-#define LOG_WARNING(args...) gf_log ("rr-options", GF_LOG_WARNING, ##args)
-
-static int
-_rr_options_min_free_disk_validate (const char *value_string, uint32_t *n)
-{
- uint32_t value = 0;
-
- if (value_string == NULL)
- {
- return -1;
- }
-
- if (gf_string2percent (value_string, &value) != 0)
- {
- gf_log ("rr",
- GF_LOG_ERROR,
- "invalid number format [%s] of option [%s]",
- value_string,
- RR_LIMITS_MIN_FREE_DISK_OPTION_STRING);
- return -1;
- }
-
- if ((value <= RR_LIMITS_MIN_FREE_DISK_VALUE_MIN) ||
- (value >= RR_LIMITS_MIN_FREE_DISK_VALUE_MAX))
- {
- gf_log ("rr",
- GF_LOG_ERROR,
- "out of range [%d] of option [%s]. Allowed range is 0 to 100.",
- value,
- RR_LIMITS_MIN_FREE_DISK_OPTION_STRING);
- return -1;
- }
-
- *n = value;
-
- return 0;
-}
-
-static int
-_rr_options_refresh_interval_validate (const char *value_string, uint32_t *n)
-{
- uint32_t value = 0;
-
- if (value_string == NULL)
- {
- return -1;
- }
-
- if (gf_string2time (value_string, &value) != 0)
- {
- gf_log ("rr",
- GF_LOG_ERROR,
- "invalid number format [%s] of option [%s]",
- value_string,
- RR_REFRESH_INTERVAL_OPTION_STRING);
- return -1;
- }
-
- *n = value;
-
- return 0;
-}
-
-static int
-_rr_options_read_only_subvolumes_validate (const char *value_string,
- char ***volume_list,
- uint64_t *volume_count)
-{
- char **vlist = NULL;
- int vcount = 0;
- int i = 0;
-
- if (value_string == NULL || volume_list == NULL || volume_count)
- {
- return -1;
- }
-
- if (gf_strsplit (value_string,
- ", ",
- &vlist,
- &vcount) != 0)
- {
- gf_log ("rr",
- GF_LOG_ERROR,
- "invalid subvolume list [%s] of option [%s]",
- value_string,
- RR_READ_ONLY_SUBVOLUMES_OPTION_STRING);
- return -1;
- }
-
- for (i = 0; i < vcount; i++)
- {
- if (gf_volume_name_validate (vlist[i]) != 0)
- {
- gf_log ("rr",
- GF_LOG_ERROR,
- "invalid subvolume name [%s] in [%s] of option [%s]",
- vlist[i],
- value_string,
- RR_READ_ONLY_SUBVOLUMES_OPTION_STRING);
- goto free_exit;
- }
- }
-
- *volume_list = vlist;
- *volume_count = vcount;
-
- return 0;
-
- free_exit:
- for (i = 0; i < vcount; i++)
- {
- GF_FREE (vlist[i]);
- }
- GF_FREE (vlist);
-
- return -1;
-}
-
-int
-rr_options_validate (dict_t *options, rr_options_t *rr_options)
-{
- char *value_string = NULL;
-
- if (options == NULL || rr_options == NULL)
- {
- return -1;
- }
-
- if (dict_get (options, RR_LIMITS_MIN_FREE_DISK_OPTION_STRING))
- if (data_to_str (dict_get (options, RR_LIMITS_MIN_FREE_DISK_OPTION_STRING)))
- value_string = data_to_str (dict_get (options,
- RR_LIMITS_MIN_FREE_DISK_OPTION_STRING));
- if (value_string != NULL)
- {
- if (_rr_options_min_free_disk_validate (value_string,
- &rr_options->min_free_disk) != 0)
- {
- return -1;
- }
-
- gf_log ("rr",
- GF_LOG_WARNING,
- "using %s = %d",
- RR_LIMITS_MIN_FREE_DISK_OPTION_STRING,
- rr_options->min_free_disk);
- }
- else
- {
- rr_options->min_free_disk = RR_LIMITS_MIN_FREE_DISK_VALUE_DEFAULT;
-
- gf_log ("rr", GF_LOG_DEBUG,
- "using %s = %d [default]",
- RR_LIMITS_MIN_FREE_DISK_OPTION_STRING,
- rr_options->min_free_disk);
- }
-
- value_string = NULL;
- if (dict_get (options, RR_REFRESH_INTERVAL_OPTION_STRING))
- value_string = data_to_str (dict_get (options,
- RR_REFRESH_INTERVAL_OPTION_STRING));
- if (value_string != NULL)
- {
- if (_rr_options_refresh_interval_validate (value_string,
- &rr_options->refresh_interval) != 0)
- {
- return -1;
- }
-
- gf_log ("rr",
- GF_LOG_WARNING,
- "using %s = %d",
- RR_REFRESH_INTERVAL_OPTION_STRING,
- rr_options->refresh_interval);
- }
- else
- {
- rr_options->refresh_interval = RR_REFRESH_INTERVAL_VALUE_DEFAULT;
-
- gf_log ("rr", GF_LOG_DEBUG,
- "using %s = %d [default]",
- RR_REFRESH_INTERVAL_OPTION_STRING,
- rr_options->refresh_interval);
- }
-
- value_string = NULL;
- if (dict_get (options, RR_READ_ONLY_SUBVOLUMES_OPTION_STRING))
- value_string = data_to_str (dict_get (options,
- RR_READ_ONLY_SUBVOLUMES_OPTION_STRING));
- if (value_string != NULL)
- {
- if (_rr_options_read_only_subvolumes_validate (value_string,
- &rr_options->read_only_subvolume_list,
- &rr_options->read_only_subvolume_count) != 0)
- {
- return -1;
- }
-
- gf_log ("rr",
- GF_LOG_WARNING,
- "using %s = [%s]",
- RR_READ_ONLY_SUBVOLUMES_OPTION_STRING,
- value_string);
- }
-
- return 0;
-}
-
-struct volume_options options[] = {
- { .key = { "scheduler.refresh-interval",
- "rr.refresh-interval" },
- .type = GF_OPTION_TYPE_TIME
- },
- { .key = { "scheduler.limits.min-free-disk",
- "rr.limits.min-free-disk" },
- .type = GF_OPTION_TYPE_PERCENT
- },
- { .key = { "scheduler.read-only-subvolumes",
- "rr.read-only-subvolumes" },
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {NULL} }
-};
diff --git a/scheduler/rr/src/rr-options.h b/scheduler/rr/src/rr-options.h
deleted file mode 100644
index 1b0a1ef3d90..00000000000
--- a/scheduler/rr/src/rr-options.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _RR_OPTIONS_H
-#define _RR_OPTIONS_H
-
-struct rr_options
-{
- uint32_t min_free_disk;
- uint32_t refresh_interval;
- char **read_only_subvolume_list;
- uint64_t read_only_subvolume_count;
-};
-typedef struct rr_options rr_options_t;
-
-int rr_options_validate (dict_t *options, rr_options_t *rr_options);
-
-#endif
diff --git a/scheduler/rr/src/rr.c b/scheduler/rr/src/rr.c
deleted file mode 100644
index e7b556e671b..00000000000
--- a/scheduler/rr/src/rr.c
+++ /dev/null
@@ -1,567 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/time.h>
-#include <stdlib.h>
-
-#include <stdint.h>
-
-#include "scheduler.h"
-
-#include "rr-options.h"
-#include "rr.h"
-#include "rr-mem-types.h"
-
-#define RR_MIN_FREE_DISK_NOT_REACHED 0
-#define RR_MIN_FREE_DISK_REACHED 1
-
-#define RR_SUBVOLUME_OFFLINE 0
-#define RR_SUBVOLUME_ONLINE 1
-
-#define LOG_ERROR(args...) gf_log ("rr", GF_LOG_ERROR, ##args)
-#define LOG_WARNING(args...) gf_log ("rr", GF_LOG_WARNING, ##args)
-#define LOG_CRITICAL(args...) gf_log ("rr", GF_LOG_CRITICAL, ##args)
-
-#define ROUND_ROBIN(index, count) ((index + 1) % count)
-
-static int
-_cleanup_rr (rr_t *rr)
-{
- int i;
-
- if (rr == NULL)
- {
- return -1;
- }
-
- if (rr->options.read_only_subvolume_list != NULL)
- {
- for (i = 0; i < rr->options.read_only_subvolume_count; i++)
- {
- GF_FREE (rr->options.read_only_subvolume_list[i]);
- }
- GF_FREE (rr->options.read_only_subvolume_list);
- }
-
- GF_FREE (rr->subvolume_list);
-
- GF_FREE (rr);
-
- return 0;
-}
-
-int
-rr_init (xlator_t *this_xl)
-{
- rr_t *rr = NULL;
- dict_t *options = NULL;
- xlator_list_t *children = NULL;
- uint64_t children_count = 0;
- int i = 0;
- int j = 0;
-
- if (this_xl == NULL)
- {
- return -1;
- }
-
- if ((options = this_xl->options) == NULL)
- {
- return -1;
- }
-
- if ((children = this_xl->children) == NULL)
- {
- return -1;
- }
-
- if ((rr = GF_CALLOC (1, sizeof (rr_t), gf_rr_mt_rr_t)) == NULL)
- {
- return -1;
- }
-
- if (rr_options_validate (options, &rr->options) != 0)
- {
- GF_FREE (rr);
- return -1;
- }
-
- for (i = 0; i < rr->options.read_only_subvolume_count; i++)
- {
- char found = 0;
-
- for (children = this_xl->children;
- children != NULL;
- children = children->next)
- {
- if (strcmp (rr->options.read_only_subvolume_list[i],
- children->xlator->name) == 0)
- {
- found = 1;
- break;
- }
- }
-
- if (!found)
- {
- LOG_ERROR ("read-only subvolume [%s] not found in volume list",
- rr->options.read_only_subvolume_list[i]);
- _cleanup_rr (rr);
- return -1;
- }
- }
-
- for (children = this_xl->children;
- children != NULL;
- children = children->next)
- {
- children_count++;
- }
-
- /* bala: excluding read_only_subvolumes */
- if ((rr->subvolume_count = children_count -
- rr->options.read_only_subvolume_count) == 0)
- {
- LOG_ERROR ("no writable volumes found for scheduling");
- _cleanup_rr (rr);
- return -1;
- }
-
- if ((rr->subvolume_list = GF_CALLOC (rr->subvolume_count,
- sizeof (rr_subvolume_t),
- gf_rr_mt_rr_subvolume_t)) == NULL)
- {
- _cleanup_rr (rr);
- return -1;
- }
-
- i = 0;
- j = 0;
- for (children = this_xl->children;
- children != NULL;
- children = children->next)
- {
- char found = 0;
-
- for (j = 0; j < rr->options.read_only_subvolume_count; j++)
- {
- if (strcmp (rr->options.read_only_subvolume_list[i],
- children->xlator->name) == 0)
- {
- found = 1;
- break;
- }
- }
-
- if (!found)
- {
- rr_subvolume_t *subvolume = NULL;
-
- subvolume = &rr->subvolume_list[i];
-
- subvolume->xl = children->xlator;
- subvolume->free_disk_status = RR_MIN_FREE_DISK_NOT_REACHED;
- subvolume->status = RR_SUBVOLUME_ONLINE;
-
- i++;
- }
- }
-
- rr->schedule_index = UINT64_MAX;
- rr->last_stat_fetched_time.tv_sec = 0;
- rr->last_stat_fetched_time.tv_usec = 0;
- pthread_mutex_init (&rr->mutex, NULL);
-
- *((long *)this_xl->private) = (long)rr;
-
- return 0;
-}
-
-void
-rr_fini (xlator_t *this_xl)
-{
- rr_t *rr = NULL;
-
- if (this_xl == NULL)
- {
- return;
- }
-
- if ((rr = (rr_t *) *((long *)this_xl->private)) != NULL)
- {
- pthread_mutex_destroy (&rr->mutex);
- _cleanup_rr (rr);
- this_xl->private = NULL;
- }
-
- return;
-}
-
-xlator_t *
-rr_schedule (xlator_t *this_xl, const void *path)
-{
- rr_t *rr = NULL;
- uint64_t next_schedule_index = 0;
- int i = 0;
-
- if (this_xl == NULL || path == NULL)
- {
- return NULL;
- }
-
- rr = (rr_t *) *((long *)this_xl->private);
- next_schedule_index = ROUND_ROBIN (rr->schedule_index,
- rr->subvolume_count);
-
- rr_update (this_xl);
-
- for (i = next_schedule_index; i < rr->subvolume_count; i++)
- {
- if (rr->subvolume_list[i].status == RR_SUBVOLUME_ONLINE &&
- rr->subvolume_list[i].status == RR_MIN_FREE_DISK_NOT_REACHED)
- {
- pthread_mutex_lock (&rr->mutex);
- rr->schedule_index = i;
- pthread_mutex_unlock (&rr->mutex);
- return rr->subvolume_list[i].xl;
- }
- }
-
- for (i = 0; i < next_schedule_index; i++)
- {
- if (rr->subvolume_list[i].status == RR_SUBVOLUME_ONLINE &&
- rr->subvolume_list[i].status == RR_MIN_FREE_DISK_NOT_REACHED)
- {
- pthread_mutex_lock (&rr->mutex);
- rr->schedule_index = i;
- pthread_mutex_unlock (&rr->mutex);
- return rr->subvolume_list[i].xl;
- }
- }
-
- for (i = next_schedule_index; i < rr->subvolume_count; i++)
- {
- if (rr->subvolume_list[i].status == RR_SUBVOLUME_ONLINE)
- {
- pthread_mutex_lock (&rr->mutex);
- rr->schedule_index = i;
- pthread_mutex_unlock (&rr->mutex);
- return rr->subvolume_list[i].xl;
- }
- }
-
- for (i = 0; i < next_schedule_index; i++)
- {
- if (rr->subvolume_list[i].status == RR_SUBVOLUME_ONLINE)
- {
- pthread_mutex_lock (&rr->mutex);
- rr->schedule_index = i;
- pthread_mutex_unlock (&rr->mutex);
- return rr->subvolume_list[i].xl;
- }
- }
-
- return NULL;
-}
-
-void
-rr_update (xlator_t *this_xl)
-{
- rr_t *rr = NULL;
- struct timeval ctime = {0, 0};
- int i = 0;
-
- if (this_xl == NULL)
- {
- return ;
- }
-
- if ((rr = (rr_t *) *((long *)this_xl->private)) == NULL)
- {
- return ;
- }
-
- if (gettimeofday (&ctime, NULL) != 0)
- {
- return ;
- }
-
- if (ctime.tv_sec > (rr->options.refresh_interval +
- rr->last_stat_fetched_time.tv_sec))
- {
- pthread_mutex_lock (&rr->mutex);
- rr->last_stat_fetched_time = ctime;
- pthread_mutex_unlock (&rr->mutex);
-
- for (i = 0; i < rr->subvolume_count; i++)
- {
- xlator_t *subvolume_xl = NULL;
- call_frame_t *frame = NULL;
- call_pool_t *pool = NULL;
-
- subvolume_xl = rr->subvolume_list[i].xl;
-
- pool = this_xl->ctx->pool;
-
- frame = create_frame (this_xl, pool);
-
- STACK_WIND_COOKIE (frame,
- rr_update_cbk,
- subvolume_xl->name,
- subvolume_xl,
- subvolume_xl->mops->stats,
- 0);
- }
- }
-
- return ;
-}
-
-int
-rr_update_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this_xl,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *stats)
-{
- rr_t *rr = NULL;
- rr_subvolume_t *subvolume = NULL;
- uint8_t free_disk_percent = 0;
- int i = 0;
-
- if (frame == NULL)
- {
- return -1;
- }
-
- if (cookie == NULL || this_xl == NULL)
- {
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- if (op_ret == 0 && stats == NULL)
- {
- LOG_CRITICAL ("fatal! op_ret is 0 and stats is NULL. "
- "Please report this to <gluster-devel@nongnu.org>");
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- if ((rr = (rr_t *) *((long *)this_xl->private)) == NULL)
- {
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- for (i = 0; i < rr->subvolume_count; i++)
- {
- if (rr->subvolume_list[i].xl->name == (char *) cookie)
- {
- subvolume = &rr->subvolume_list[i];
- break;
- }
- }
-
- if (subvolume == NULL)
- {
- LOG_ERROR ("unknown cookie [%s]", (char *) cookie);
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- if (op_ret == 0)
- {
- free_disk_percent = (stats->free_disk * 100) / stats->total_disk_size;
- if (free_disk_percent > rr->options.min_free_disk)
- {
- if (subvolume->free_disk_status != RR_MIN_FREE_DISK_NOT_REACHED)
- {
- pthread_mutex_lock (&rr->mutex);
- subvolume->free_disk_status = RR_MIN_FREE_DISK_NOT_REACHED;
- pthread_mutex_unlock (&rr->mutex);
- LOG_WARNING ("subvolume [%s] is available with free space for scheduling",
- subvolume->xl->name);
- }
- }
- else
- {
- if (subvolume->free_disk_status != RR_MIN_FREE_DISK_REACHED)
- {
- pthread_mutex_lock (&rr->mutex);
- subvolume->free_disk_status = RR_MIN_FREE_DISK_REACHED;
- pthread_mutex_unlock (&rr->mutex);
- LOG_WARNING ("subvolume [%s] reached minimum disk space requirement",
- subvolume->xl->name);
- }
- }
- }
- else
- {
- pthread_mutex_lock (&rr->mutex);
- subvolume->status = RR_SUBVOLUME_OFFLINE;
- pthread_mutex_unlock (&rr->mutex);
- LOG_ERROR ("unable to get subvolume [%s] status information and "
- "scheduling is disabled",
- subvolume->xl->name);
- }
-
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-void
-rr_notify (xlator_t *this_xl, int32_t event, void *data)
-{
- rr_t *rr = NULL;
- rr_subvolume_t *subvolume = NULL;
- xlator_t *subvolume_xl = NULL;
- int i = 0, ret = 0;
- call_frame_t *frame = NULL;
- call_pool_t *pool = NULL;
- dict_t *xattr = get_new_dict ();
- int32_t version[1] = {1};
-
- if (this_xl == NULL || data == NULL) {
- return ;
- }
-
- if ((rr = (rr_t *) *((long *)this_xl->private)) == NULL) {
- return ;
- }
-
- subvolume_xl = (xlator_t *) data;
-
- for (i = 0; i < rr->subvolume_count; i++) {
- if (rr->subvolume_list[i].xl == subvolume_xl) {
- subvolume = &rr->subvolume_list[i];
- break;
- }
- }
-
- switch (event) {
- case GF_EVENT_CHILD_UP:
- /* Seeding, to be done only once */
- if (rr->first_time && (i == rr->subvolume_count)) {
- loc_t loc = {0,};
- xlator_t *trav = NULL;
-
- pool = this_xl->ctx->pool;
- frame = create_frame (this_xl, pool);
- ret = dict_set_bin (xattr, "trusted.glusterfs.scheduler.rr",
- version, sizeof (int32_t));
- if (-1 == ret) {
- gf_log (this_xl->name, GF_LOG_ERROR, "rr seed setting failed");
- }
- if (xattr)
- dict_ref (xattr);
-
- loc.path = gf_strdup ("/");
- for (trav = this_xl->parents->xlator; trav; trav = trav->parents->xlator) {
- if (trav->itable) {
- loc.inode = trav->itable->root;
- break;
- }
- }
- STACK_WIND (frame,
- rr_notify_cbk,
- (xlator_t *)data,
- ((xlator_t *)data)->fops->xattrop,
- &loc,
- GF_XATTROP_ADD_ARRAY,
- xattr);
-
- if (xattr)
- dict_unref (xattr);
-
- rr->first_time = 0;
- }
- if (subvolume) {
- pthread_mutex_lock (&rr->mutex);
- subvolume->status = RR_SUBVOLUME_ONLINE;
- pthread_mutex_unlock (&rr->mutex);
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- if (subvolume) {
- pthread_mutex_lock (&rr->mutex);
- subvolume->status = RR_SUBVOLUME_OFFLINE;
- pthread_mutex_unlock (&rr->mutex);
- }
- break;
- }
-
- return ;
-}
-
-int
-rr_notify_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this_xl,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *xattr)
-{
- rr_t *rr = NULL;
- int32_t *index = NULL;
- int32_t ret = -1;
- void *tmp_index_ptr = NULL;
-
- if (frame == NULL)
- {
- return -1;
- }
-
- if ((this_xl == NULL) || (op_ret == -1))
- {
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- if ((rr = (rr_t *) *((long *)this_xl->private)) == NULL)
- {
- STACK_DESTROY (frame->root);
- return -1;
- }
-
- ret = dict_get_bin (xattr, "trusted.glusterfs.scheduler.rr", &tmp_index_ptr);
- index = tmp_index_ptr;
- if (ret == 0)
- rr->schedule_index = (index[0] % rr->subvolume_count);
- else
- rr->schedule_index = 0;
-
- STACK_DESTROY (frame->root);
- return 0;
-}
-
-struct sched_ops sched = {
- .init = rr_init,
- .fini = rr_fini,
- .update = rr_update,
- .schedule = rr_schedule,
- .notify = rr_notify
-};
-
diff --git a/scheduler/rr/src/rr.h b/scheduler/rr/src/rr.h
deleted file mode 100644
index 3bd95929b61..00000000000
--- a/scheduler/rr/src/rr.h
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _RR_H
-#define _RR_H
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "scheduler.h"
-#include <stdint.h>
-#include <sys/time.h>
-
-struct rr_subvolume
-{
- xlator_t *xl;
- uint8_t free_disk_status;
- uint8_t status;
-};
-typedef struct rr_subvolume rr_subvolume_t;
-
-struct rr
-{
- rr_options_t options;
- rr_subvolume_t *subvolume_list;
- uint64_t subvolume_count;
- uint64_t schedule_index;
- struct timeval last_stat_fetched_time;
- pthread_mutex_t mutex;
- char first_time;
-};
-typedef struct rr rr_t;
-
-int rr_init (xlator_t *this_xl);
-void rr_fini (xlator_t *this_xl);
-xlator_t *rr_schedule (xlator_t *this_xl, const void *path);
-void rr_update (xlator_t *this_xl);
-int rr_update_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this_xl,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *stats);
-void rr_notify (xlator_t *this_xl, int32_t event, void *data);
-int rr_notify_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this_xl,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *xattr);
-
-#endif /* _RR_H */
diff --git a/scheduler/switch/Makefile.am b/scheduler/switch/Makefile.am
deleted file mode 100644
index d471a3f9243..00000000000
--- a/scheduler/switch/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/scheduler/switch/src/Makefile.am b/scheduler/switch/src/Makefile.am
deleted file mode 100644
index dc7d16d40d2..00000000000
--- a/scheduler/switch/src/Makefile.am
+++ /dev/null
@@ -1,12 +0,0 @@
-sched_LTLIBRARIES = switch.la
-scheddir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/scheduler
-
-switch_la_LDFLAGS = -module -avoidversion
-
-switch_la_SOURCES = switch.c
-switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
diff --git a/scheduler/switch/src/switch-mem-types.h b/scheduler/switch/src/switch-mem-types.h
deleted file mode 100644
index 7039b035d0f..00000000000
--- a/scheduler/switch/src/switch-mem-types.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __SWITCH_MEM_TYPES_H__
-#define __SWITCH_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_switch_mem_types_ {
- gf_switch_mt_switch_struct = gf_common_mt_end + 1,
- gf_switch_mt_switch_sched_struct,
- gf_switch_mt_switch_sched_array,
- gf_switch_mt_end
-};
-#endif
-
diff --git a/scheduler/switch/src/switch.c b/scheduler/switch/src/switch.c
deleted file mode 100644
index 1362e5cc994..00000000000
--- a/scheduler/switch/src/switch.c
+++ /dev/null
@@ -1,451 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <sys/time.h>
-#include <stdlib.h>
-#include <fnmatch.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "scheduler.h"
-#include "switch-mem-types.h"
-
-struct switch_sched_array {
- xlator_t *xl;
- int32_t eligible;
- int32_t considered;
-};
-
-/* Select one of this struct based on the path's pattern match */
-struct switch_sched_struct {
- struct switch_sched_struct *next;
- struct switch_sched_array *array;
- char path_pattern[256];
- int32_t node_index; /* Index of the node in
- this pattern. */
- int32_t num_child; /* Total num of child nodes
- with this pattern. */
-};
-
-struct switch_struct {
- struct switch_sched_struct *cond;
- struct switch_sched_array *array;
- pthread_mutex_t switch_mutex;
- int32_t child_count;
-};
-
-/* This function should return child node as '*:subvolumes' is inserterd */
-static xlator_t *
-switch_get_matching_xl (const char *path, struct switch_sched_struct *cond)
-{
- struct switch_sched_struct *trav = cond;
- char *pathname = gf_strdup (path);
- int index = 0;
-
- while (trav) {
- if (fnmatch (trav->path_pattern,
- pathname, FNM_NOESCAPE) == 0) {
- GF_FREE (pathname);
- trav->node_index %= trav->num_child;
- index = (trav->node_index++) % trav->num_child;
- return trav->array[index].xl;
- }
- trav = trav->next;
- }
- GF_FREE (pathname);
- return NULL;
-}
-
-static int32_t
-gf_unify_valid_child (const char *child,
- xlator_t *xl)
-{
- xlator_list_t *children = NULL, *prev = NULL;
- int32_t ret = 0;
-
- children = xl->children;
- do {
- if (!strcmp (child, children->xlator->name)) {
- ret = 1;
- break;
- }
-
- prev = children;
- children = prev->next;
- } while (children);
-
- return ret;
-}
-
-static int32_t
-switch_init (xlator_t *xl)
-{
- int32_t index = 0;
- data_t *data = NULL;
- char *child = NULL;
- char *tmp = NULL;
- char *childs_data = NULL;
- xlator_list_t *trav_xl = xl->children;
- struct switch_struct *switch_buf = NULL;
-
- switch_buf = GF_CALLOC (1, sizeof (struct switch_struct),
- gf_switch_mt_switch_struct);
- ERR_ABORT (switch_buf);
-
- while (trav_xl) {
- index++;
- trav_xl = trav_xl->next;
- }
- switch_buf->child_count = index;
- switch_buf->array = GF_CALLOC (index + 1,
- sizeof (struct switch_sched_struct),
- gf_switch_mt_switch_sched_struct);
- ERR_ABORT (switch_buf->array);
- trav_xl = xl->children;
- index = 0;
-
- while (trav_xl) {
- switch_buf->array[index].xl = trav_xl->xlator;
- switch_buf->array[index].eligible = 1;
- trav_xl = trav_xl->next;
- index++;
- }
-
- data = dict_get (xl->options, "scheduler.read-only-subvolumes");
- if (data) {
- childs_data = gf_strdup (data->data);
- child = strtok_r (childs_data, ",", &tmp);
- while (child) {
- for (index = 1;
- index < switch_buf->child_count; index++) {
- if (strcmp (switch_buf->array[index - 1].xl->name, child) == 0) {
- gf_log ("switch", GF_LOG_DEBUG,
- "Child '%s' is read-only",
- child);
- memcpy (&(switch_buf->array[index-1]),
- &(switch_buf->array[switch_buf->child_count - 1]),
- sizeof (struct switch_sched_array));
- switch_buf->child_count--;
- break;
- }
- }
- child = strtok_r (NULL, ",", &tmp);
- }
- GF_FREE (childs_data);
- }
-
- data = dict_get (xl->options, "scheduler.local-volume-name");
- if (data) {
- /* Means, give preference to that node first */
- gf_log ("switch", GF_LOG_DEBUG,
- "local volume defined as %s", data->data);
-
- /* TODO: parse it properly, have an extra index to
- specify that first */
- }
-
- /* *jpg:child1,child2;*mpg:child3;*:child4,child5,child6 */
- data = dict_get (xl->options, "scheduler.switch.case");
- if (data) {
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *dup_str = NULL;
- char *switch_str = NULL;
- char *pattern = NULL;
- char *childs = NULL;
- struct switch_sched_struct *switch_opt = NULL;
- struct switch_sched_struct *trav = NULL;
- /* Get the pattern for considering switch case.
- "option block-size *avi:10MB" etc */
- switch_str = strtok_r (data->data, ";", &tmp_str);
- while (switch_str) {
- dup_str = gf_strdup (switch_str);
- switch_opt =
- GF_CALLOC (1, sizeof (struct switch_sched_struct), gf_switch_mt_switch_sched_struct);
- ERR_ABORT (switch_opt);
-
- /* Link it to the main structure */
- if (switch_buf->cond) {
- /* there are already few entries */
- trav = switch_buf->cond;
- while (trav->next)
- trav = trav->next;
- trav->next = switch_opt;
- } else {
- /* First entry */
- switch_buf->cond = switch_opt;
- }
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- childs = strtok_r (NULL, ":", &tmp_str1);
- if (strncmp (pattern, "*", 2) == 0) {
- gf_log ("switch", GF_LOG_WARNING,
- "'*' pattern will be taken by default "
- "for all the unconfigured child nodes,"
- " hence neglecting current option");
- switch_str = strtok_r (NULL, ";", &tmp_str);
- GF_FREE (dup_str);
- continue;
- }
- memcpy (switch_opt->path_pattern,
- pattern, strlen (pattern));
- if (childs) {
- int32_t idx = 0;
- char *tmp1 = NULL;
- char *dup_childs = NULL;
- /* TODO: get the list of child nodes for
- the given pattern */
- dup_childs = gf_strdup (childs);
- child = strtok_r (dup_childs, ",", &tmp);
- while (child) {
- if (gf_unify_valid_child (child, xl)) {
- idx++;
- child = strtok_r (NULL, ",", &tmp);
- } else {
- gf_log ("switch", GF_LOG_ERROR,
- "%s is not a subvolume "
- "of %s. pattern can "
- "only be scheduled only"
- " to a subvolume of %s",
- child, xl->name,
- xl->name);
- return -1;
- }
- }
- GF_FREE (dup_childs);
- child = strtok_r (childs, ",", &tmp1);
- switch_opt->num_child = idx;
- switch_opt->array =
- GF_CALLOC (1, idx * sizeof (struct switch_sched_array), gf_switch_mt_switch_sched_array);
- ERR_ABORT (switch_opt->array);
- idx = 0;
- child = strtok_r (childs, ",", &tmp);
- while (child) {
- for (index = 1;
- index < switch_buf->child_count;
- index++) {
- if (strcmp (switch_buf->array[index - 1].xl->name,
- child) == 0) {
- gf_log ("switch",
- GF_LOG_DEBUG,
- "'%s' pattern will be scheduled to \"%s\"",
- switch_opt->path_pattern, child);
- /*
- if (switch_buf->array[index-1].considered) {
- gf_log ("switch", GF_LOG_DEBUG,
- "ambiguity found, exiting");
- return -1;
- }
- */
- switch_opt->array[idx].xl = switch_buf->array[index-1].xl;
- switch_buf->array[index-1].considered = 1;
- idx++;
- break;
- }
- }
- child = strtok_r (NULL, ",", &tmp1);
- }
- } else {
- /* error */
- gf_log ("switch", GF_LOG_ERROR,
- "Check \"scheduler.switch.case\" "
- "option in unify volume. Exiting");
- GF_FREE (switch_buf->array);
- GF_FREE (switch_buf);
- return -1;
- }
- GF_FREE (dup_str);
- switch_str = strtok_r (NULL, ";", &tmp_str);
- }
- }
- /* Now, all the pattern based considerations done, so for all the
- * remaining pattern, '*' to all the remaining child nodes
- */
- {
- struct switch_sched_struct *switch_opt = NULL;
- int32_t flag = 0;
- int32_t index = 0;
- for (index=0; index < switch_buf->child_count; index++) {
- /* check for considered flag */
- if (switch_buf->array[index].considered)
- continue;
- flag++;
- }
- if (!flag) {
- gf_log ("switch", GF_LOG_ERROR,
- "No nodes left for pattern '*'. Exiting.");
- return -1;
- }
- switch_opt = GF_CALLOC (1, sizeof (struct switch_sched_struct), gf_switch_mt_switch_sched_struct);
- ERR_ABORT (switch_opt);
- if (switch_buf->cond) {
- /* there are already few entries */
- struct switch_sched_struct *trav = switch_buf->cond;
- while (trav->next)
- trav = trav->next;
- trav->next = switch_opt;
- } else {
- /* First entry */
- switch_buf->cond = switch_opt;
- }
- /* Add the '*' pattern to the array */
- memcpy (switch_opt->path_pattern, "*", 2);
- switch_opt->num_child = flag;
- switch_opt->array =
- GF_CALLOC (1, flag * sizeof (struct switch_sched_array), gf_switch_mt_switch_sched_array);
- ERR_ABORT (switch_opt->array);
- flag = 0;
- for (index=0; index < switch_buf->child_count; index++) {
- /* check for considered flag */
- if (switch_buf->array[index].considered)
- continue;
- gf_log ("switch", GF_LOG_DEBUG,
- "'%s' pattern will be scheduled to \"%s\"",
- switch_opt->path_pattern,
- switch_buf->array[index].xl->name);
- switch_opt->array[flag].xl =
- switch_buf->array[index].xl;
- switch_buf->array[index].considered = 1;
- flag++;
- }
- }
-
- pthread_mutex_init (&switch_buf->switch_mutex, NULL);
-
- // put it at the proper place
- *((long *)xl->private) = (long)switch_buf;
-
- return 0;
-}
-
-static void
-switch_fini (xlator_t *xl)
-{
- /* TODO: free all the allocated entries */
- struct switch_struct *switch_buf = NULL;
- switch_buf = (struct switch_struct *)*((long *)xl->private);
-
- pthread_mutex_destroy (&switch_buf->switch_mutex);
- GF_FREE (switch_buf->array);
- GF_FREE (switch_buf);
-}
-
-static xlator_t *
-switch_schedule (xlator_t *xl, const void *path)
-{
- struct switch_struct *switch_buf = NULL;
- switch_buf = (struct switch_struct *)*((long *)xl->private);
-
- return switch_get_matching_xl (path, switch_buf->cond);
-}
-
-
-/**
- * notify
- */
-void
-switch_notify (xlator_t *xl, int32_t event, void *data)
-{
- /* TODO: This should be checking in switch_sched_struct */
-#if 0
- struct switch_struct *switch_buf = NULL;
- int32_t idx = 0;
-
- switch_buf = (struct switch_struct *)*((long *)xl->private);
- if (!switch_buf)
- return;
-
- for (idx = 0; idx < switch_buf->child_count; idx++) {
- if (switch_buf->array[idx].xl == (xlator_t *)data)
- break;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- switch_buf->array[idx].eligible = 1;
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- switch_buf->array[idx].eligible = 0;
- }
- break;
- default:
- {
- ;
- }
- break;
- }
-#endif
-}
-
-static void
-switch_update (xlator_t *xl)
-{
- return;
-}
-
-int32_t
-switch_mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_switch_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- " failed");
- return ret;
- }
-
- return ret;
-}
-
-struct sched_ops sched = {
- .init = switch_init,
- .fini = switch_fini,
- .update = switch_update,
- .schedule = switch_schedule,
- .notify = switch_notify,
- .mem_acct_init = switch_mem_acct_init,
-};
-
-struct volume_options options[] = {
- { .key = { "scheduler.read-only-subvolumes" ,
- "switch.read-only-subvolumes"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = { "scheduler.local-volume-name",
- "switch.nufa.local-volume-name" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = { "scheduler.switch.case",
- "switch.case" },
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {NULL} }
-};
diff --git a/site.h.in b/site.h.in
new file mode 100644
index 00000000000..eb2f062e60c
--- /dev/null
+++ b/site.h.in
@@ -0,0 +1,44 @@
+/*
+ * Guidelines for using this file vs. configure.ac
+ *
+ * (1) If it already exists in configure.ac, leave it there.
+ *
+ * (2) If it needs to take effect at configure (not compile) time, it *needs*
+ * to go in configure.ac.
+ *
+ * (3) If it affects file paths, which are the things most likely to be based
+ * on an OS or distribution's generic filesystem hierarchy and not on a
+ * particular package's definition (e.g. an RPM specfile), it should probably
+ * go in configure.ac.
+ *
+ * (4) If it affects default sizes, limits, thresholds, or modes of operation
+ * (e.g. IPv4 vs. IPv6), it should probably go here.
+ *
+ * (5) For anything else, is it more like the things in 3 or the things in 4?
+ * Which approach is more convenient for the people who are likely to use the
+ * new option(s)? Make your best guesses, confirm with others, and go with
+ * what works.
+ */
+
+#define SITE_H_ENABLE_LEAST_PRIORITY "on"
+#define SITE_H_MD_CACHE_TIMEOUT "1"
+#define SITE_H_NFS_DISABLE "on"
+
+/*
+ * As an example of how to use this file, here's what the Facebook version looks
+ * like:
+
+#define SITE_H_ENABLE_LEAST_PRIORITY "off"
+#define SITE_H_MD_CACHE_TIMEOUT "180"
+#define SITE_H_NFS_DISABLE "off"
+
+ * Each time we add a value here, we lessen the risk of values being
+ * inconsistent across production automation, test automation, and manual
+ * developer testing. We also save effort compared to updating values for each
+ * kind of external automation. To do the same thing with configure scripts or
+ * specfiles, we'd have to make much more complicated and less discoverable
+ * changes there.
+ *
+ * Other orgs are likely to have the same issues regarding their preferred
+ * settings, and likewise should add their favorites here as well.
+ */
diff --git a/smoke.sh b/smoke.sh
deleted file mode 100755
index a87908d794e..00000000000
--- a/smoke.sh
+++ /dev/null
@@ -1,83 +0,0 @@
-#!/bin/bash
-
-set -e;
-
-M=/mnt;
-P=/build;
-H=$(hostname);
-T=600;
-V=patchy;
-
-
-function cleanup()
-{
- killall -15 glusterfs glusterfsd glusterd glusterd 2>&1 || true;
- killall -9 glusterfs glusterfsd glusterd glusterd 2>&1 || true;
- umount -l $M 2>&1 || true;
- rm -rf /var/lib/glusterd /etc/glusterd $P/export;
-}
-
-function start_fs()
-{
- mkdir -p $P/export;
- chmod 0755 $P/export;
-
- glusterd;
- gluster --mode=script volume create $V replica 2 $H:$P/export/export{1,2,3,4};
- gluster volume start $V;
- glusterfs -s $H --volfile-id $V $M;
-# mount -t glusterfs $H:/$V $M;
-}
-
-
-function run_tests()
-{
- cd $M;
-
- (sleep 1; dbench -s -t 60 10 >/dev/null) &
-
- (sleep 1; /opt/qa/tools/posix_compliance.sh) &
-
- wait %2
- wait %3
-
- rm -rf clients;
-
- cd -;
-}
-
-
-function watchdog ()
-{
- # insurance against hangs during the test
-
- sleep $1;
-
- echo "Kicking in watchdog after $1 secs";
-
- cleanup;
-}
-
-
-function finish ()
-{
- cleanup;
- kill %1;
-}
-
-function main ()
-{
- cleanup;
-
- watchdog $T &
-
- trap finish EXIT;
-
- set -x;
-
- start_fs;
-
- run_tests;
-}
-
-main "$@";
diff --git a/submit-for-review.sh b/submit-for-review.sh
new file mode 120000
index 00000000000..a21c0e2869a
--- /dev/null
+++ b/submit-for-review.sh
@@ -0,0 +1 @@
+rfc.sh \ No newline at end of file
diff --git a/swift/1.4.8/README b/swift/1.4.8/README
deleted file mode 100644
index 0ea91535ef7..00000000000
--- a/swift/1.4.8/README
+++ /dev/null
@@ -1,22 +0,0 @@
-Gluster Unified File and Object Storage allows files and directories created
-via gluster-native/nfs mount to be accessed as containers and objects. It is
-a plugin for OpenStack Swift project.
-
-Install
-* Clone the swift repo from git://github.com/openstack/swift.git
-* Apply the swift.diff present in glusterfs.git/swift/1.4.8 to the swift repo.
-* Create a directory named "plugins" under swift.git/swift directory.
-* Copy the contents of glusterfs.git/swift/1.4.8/plugins/ under swift.git/swift/
- except the conf directory.
-* Copy the contents of glusterfs.git/swift/1.4.5/plugins/conf under /etc/swift/.
-* Run python setup.py install
-
-Once this is done, you can access the GlusterFS volumes as Swift accounts.
-Add the Volume names with the user-name and its corresponding password to the
-/etc/swift/proxy-server.conf (follow the syntax used in the sample conf file).
-
-Command to start the servers
- swift-init main start
-
-Command to stop the servers
- swift-init main stop
diff --git a/swift/1.4.8/gluster-swift-plugin.spec b/swift/1.4.8/gluster-swift-plugin.spec
deleted file mode 100644
index 746f75c5f5d..00000000000
--- a/swift/1.4.8/gluster-swift-plugin.spec
+++ /dev/null
@@ -1,60 +0,0 @@
-############################################################################################################
-# Command to build rpms.#
-# $ rpmbuild -ta %{name}-%{version}-%{release}.tar.gz #
-############################################################################################################
-# Setting up the environment. #
-# * Create a directory %{name}-%{version} under $HOME/rpmbuild/SOURCES #
-# * Copy the contents of plugins directory into $HOME/rpmbuild/SOURCES/%{name}-%{version} #
-# * tar zcvf %{name}-%{version}-%{release}.tar.gz $HOME/rpmbuild/SOURCES/%{name}-%{version} %{name}.spec #
-# For more information refer #
-# http://fedoraproject.org/wiki/How_to_create_an_RPM_package #
-############################################################################################################
-
-%define _confdir /etc/swift
-%define _swiftdir /usr/lib/python2.6/site-packages/swift
-%define _ufo_version 1.0
-%define _ufo_release 3
-
-Summary : GlusterFS Unified File and Object Storage.
-Name : gluster-swift-plugin
-Version : %{_ufo_version}
-Release : %{_ufo_release}
-Group : Application/File
-Vendor : Red Hat Inc.
-Source0 : %{name}-%{version}-%{release}.tar.gz
-Packager : gluster-users@gluster.org
-License : Apache
-BuildArch: noarch
-Requires : memcached
-Requires : openssl
-Requires : python
-Requires : gluster-swift
-
-%description
-Gluster Unified File and Object Storage unifies NAS and object storage
-technology. This provides a system for data storage that enables users to access
-the same data as an object and as a file, simplifying management and controlling
-storage costs.
-
-%prep
-%setup -q
-
-%install
-rm -rf %{buildroot}
-
-mkdir -p %{buildroot}/%{_swiftdir}/plugins
-mkdir -p %{buildroot}/%{_confdir}/
-
-cp constraints.py %{buildroot}/%{_swiftdir}/plugins
-cp DiskDir.py %{buildroot}/%{_swiftdir}/plugins
-cp DiskFile.py %{buildroot}/%{_swiftdir}/plugins
-cp Glusterfs.py %{buildroot}/%{_swiftdir}/plugins
-cp __init__.py %{buildroot}/%{_swiftdir}/plugins
-cp utils.py %{buildroot}/%{_swiftdir}/plugins
-
-cp -r conf/* %{buildroot}/%{_confdir}/
-
-%files
-%defattr(-,root,root)
-%{_swiftdir}/plugins
-%{_confdir}/
diff --git a/swift/1.4.8/gluster-swift.spec b/swift/1.4.8/gluster-swift.spec
deleted file mode 100644
index 640b944e8cb..00000000000
--- a/swift/1.4.8/gluster-swift.spec
+++ /dev/null
@@ -1,396 +0,0 @@
-%if ! (0%{?fedora} > 12 || 0%{?rhel} > 5)
-%{!?python_sitelib: %global python_sitelib %(%{__python} -c "from distutils.sysconfig import get_python_lib; print(get_python_lib())")}
-%endif
-
-Name: gluster-swift
-Version: 1.4.8
-Release: 3%{?dist}
-Summary: OpenStack Object Storage (swift)
-
-Group: Development/Languages
-License: ASL 2.0
-URL: http://launchpad.net/swift
-Source0: http://launchpad.net/swift/essex/%{version}/+download/swift-%{version}.tar.gz
-Source1: %{name}-functions
-Source2: %{name}-account.init
-Source4: %{name}-container.init
-Source5: %{name}-object.init
-Source6: %{name}-proxy.init
-Patch0: openstack-swift-newdeps.patch
-Patch1: openstack-swift-docmod.patch
-Patch2: openstack-swift-nonet.patch
-Patch3: gluster.patch
-
-BuildRoot: %{_tmppath}/swift-%{version}-%{release}-root-%(%{__id_u} -n)
-
-BuildArch: noarch
-BuildRequires: dos2unix
-BuildRequires: python-devel
-BuildRequires: python-setuptools
-BuildRequires: python-netifaces
-BuildRequires: python-paste-deploy
-Requires: python-configobj
-Requires: python-eventlet >= 0.9.8
-Requires: python-greenlet >= 0.3.1
-Requires: python-paste-deploy
-Requires: python-simplejson
-Requires: python-webob1.0
-Requires: pyxattr
-Requires: python-setuptools
-Requires: python-netifaces
-Requires: python-netifaces
-
-Conflicts: openstack-swift
-
-Requires(post): chkconfig
-Requires(postun): initscripts
-Requires(preun): chkconfig
-Requires(pre): shadow-utils
-Obsoletes: openstack-swift-auth <= 1.4.0
-
-%description
-OpenStack Object Storage (swift) aggregates commodity servers to work together
-in clusters for reliable, redundant, and large-scale storage of static objects.
-Objects are written to multiple hardware devices in the data center, with the
-OpenStack software responsible for ensuring data replication and integrity
-across the cluster. Storage clusters can scale horizontally by adding new nodes,
-which are automatically configured. Should a node fail, OpenStack works to
-replicate its content from other active nodes. Because OpenStack uses software
-logic to ensure data replication and distribution across different devices,
-inexpensive commodity hard drives and servers can be used in lieu of more
-expensive equipment.
-
-%package account
-Summary: A swift account server
-Group: Applications/System
-
-Requires: %{name} = %{version}-%{release}
-
-%description account
-OpenStack Object Storage (swift) aggregates commodity servers to work together
-in clusters for reliable, redundant, and large-scale storage of static objects.
-
-This package contains the %{name} account server.
-
-%package container
-Summary: A swift container server
-Group: Applications/System
-
-Requires: %{name} = %{version}-%{release}
-
-%description container
-OpenStack Object Storage (swift) aggregates commodity servers to work together
-in clusters for reliable, redundant, and large-scale storage of static objects.
-
-This package contains the %{name} container server.
-
-%package object
-Summary: A swift object server
-Group: Applications/System
-
-Requires: %{name} = %{version}-%{release}
-Requires: rsync >= 3.0
-
-%description object
-OpenStack Object Storage (swift) aggregates commodity servers to work together
-in clusters for reliable, redundant, and large-scale storage of static objects.
-
-This package contains the %{name} object server.
-
-%package proxy
-Summary: A swift proxy server
-Group: Applications/System
-
-Requires: %{name} = %{version}-%{release}
-
-%description proxy
-OpenStack Object Storage (swift) aggregates commodity servers to work together
-in clusters for reliable, redundant, and large-scale storage of static objects.
-
-This package contains the %{name} proxy server.
-
-%package doc
-Summary: Documentation for %{name}
-Group: Documentation
-#%if 0%{?rhel} >= 6
-#BuildRequires: python-sphinx10 >= 1.0
-#%endif
-%if 0%{?fedora} >= 14
-BuildRequires: python-sphinx >= 1.0
-%endif
-# Required for generating docs
-BuildRequires: python-eventlet
-BuildRequires: python-simplejson
-BuildRequires: python-webob1.0
-BuildRequires: pyxattr
-
-%description doc
-OpenStack Object Storage (swift) aggregates commodity servers to work together
-in clusters for reliable, redundant, and large-scale storage of static objects.
-
-This package contains documentation files for %{name}.
-
-%prep
-%setup -q -n swift-%{version}
-%patch0 -p1 -b .newdeps
-%patch1 -p1 -b .docmod
-%patch2 -p1 -b .nonet
-%patch3 -p1 -b .gluster
-# Fix wrong-file-end-of-line-encoding warning
-dos2unix LICENSE
-
-%build
-%{__python} setup.py build
-# Fails unless we create the build directory
-mkdir -p doc/build
-# Build docs
-%if 0%{?fedora} >= 14
-%{__python} setup.py build_sphinx
-%endif
-#%if 0%{?rhel} >= 6
-#export PYTHONPATH="$( pwd ):$PYTHONPATH"
-#SPHINX_DEBUG=1 sphinx-1.0-build -b html doc/source doc/build/html
-#SPHINX_DEBUG=1 sphinx-1.0-build -b man doc/source doc/build/man
-#%endif
-# Fix hidden-file-or-dir warning
-#rm doc/build/html/.buildinfo
-
-%install
-rm -rf %{buildroot}
-%{__python} setup.py install -O1 --skip-build --root %{buildroot}
-# Init helper functions
-install -p -D -m 644 %{SOURCE1} %{buildroot}%{_datarootdir}/%{name}/functions
-# Init scripts
-install -p -D -m 755 %{SOURCE2} %{buildroot}%{_initrddir}/%{name}-account
-install -p -D -m 755 %{SOURCE4} %{buildroot}%{_initrddir}/%{name}-container
-install -p -D -m 755 %{SOURCE5} %{buildroot}%{_initrddir}/%{name}-object
-install -p -D -m 755 %{SOURCE6} %{buildroot}%{_initrddir}/%{name}-proxy
-# Remove tests
-rm -fr %{buildroot}/%{python_sitelib}/test
-# Misc other
-install -d -m 755 %{buildroot}%{_sysconfdir}/swift
-install -d -m 755 %{buildroot}%{_sysconfdir}/swift/account-server
-install -d -m 755 %{buildroot}%{_sysconfdir}/swift/container-server
-install -d -m 755 %{buildroot}%{_sysconfdir}/swift/object-server
-install -d -m 755 %{buildroot}%{_sysconfdir}/swift/proxy-server
-# Install pid directory
-install -d -m 755 %{buildroot}%{_localstatedir}/run/swift
-install -d -m 755 %{buildroot}%{_localstatedir}/run/swift/account-server
-install -d -m 755 %{buildroot}%{_localstatedir}/run/swift/container-server
-install -d -m 755 %{buildroot}%{_localstatedir}/run/swift/object-server
-install -d -m 755 %{buildroot}%{_localstatedir}/run/swift/proxy-server
-
-%clean
-rm -rf %{buildroot}
-
-%pre
-getent group swift >/dev/null || groupadd -r swift -g 160
-getent passwd swift >/dev/null || \
-useradd -r -g swift -u 160 -d %{_sharedstatedir}/swift -s /sbin/nologin \
--c "OpenStack Swift Daemons" swift
-exit 0
-
-%post account
-/sbin/chkconfig --add %{name}-account
-
-%preun account
-if [ $1 = 0 ] ; then
- /sbin/service %{name}-account stop >/dev/null 2>&1
- /sbin/chkconfig --del %{name}-account
-fi
-
-%postun account
-if [ "$1" -ge "1" ] ; then
- /sbin/service %{name}-account condrestart >/dev/null 2>&1 || :
-fi
-
-%post container
-/sbin/chkconfig --add %{name}-container
-
-%preun container
-if [ $1 = 0 ] ; then
- /sbin/service %{name}-container stop >/dev/null 2>&1
- /sbin/chkconfig --del %{name}-container
-fi
-
-%postun container
-if [ "$1" -ge "1" ] ; then
- /sbin/service %{name}-container condrestart >/dev/null 2>&1 || :
-fi
-
-%post object
-/sbin/chkconfig --add %{name}-object
-
-%preun object
-if [ $1 = 0 ] ; then
- /sbin/service %{name}-object stop >/dev/null 2>&1
- /sbin/chkconfig --del %{name}-object
-fi
-
-%postun object
-if [ "$1" -ge "1" ] ; then
- /sbin/service %{name}-object condrestart >/dev/null 2>&1 || :
-fi
-
-%post proxy
-/sbin/chkconfig --add %{name}-proxy
-
-%preun proxy
-if [ $1 = 0 ] ; then
- /sbin/service %{name}-proxy stop >/dev/null 2>&1
- /sbin/chkconfig --del %{name}-proxy
-fi
-
-%postun proxy
-if [ "$1" -ge "1" ] ; then
- /sbin/service %{name}-proxy condrestart >/dev/null 2>&1 || :
-fi
-
-%files
-%defattr(-,root,root,-)
-%doc AUTHORS LICENSE README
-%doc etc/dispersion.conf-sample etc/drive-audit.conf-sample etc/object-expirer.conf-sample
-%doc etc/swift.conf-sample
-%dir %{_datarootdir}/%{name}/functions
-%dir %attr(0755, swift, swift) %{_localstatedir}/run/swift
-%dir %{_sysconfdir}/swift
-%dir %{python_sitelib}/swift
-%{_bindir}/swift
-%{_bindir}/swift-account-audit
-%{_bindir}/swift-bench
-%{_bindir}/swift-drive-audit
-%{_bindir}/swift-get-nodes
-%{_bindir}/swift-init
-%{_bindir}/swift-ring-builder
-%{_bindir}/swift-dispersion-populate
-%{_bindir}/swift-dispersion-report
-%{_bindir}/swift-recon*
-%{_bindir}/swift-object-expirer
-%{_bindir}/swift-oldies
-%{_bindir}/swift-orphans
-%{_bindir}/swift-form-signature
-%{_bindir}/swift-temp-url
-%{python_sitelib}/swift/*.py*
-%{python_sitelib}/swift/common
-%{python_sitelib}/swift-%{version}-*.egg-info
-
-%files account
-%defattr(-,root,root,-)
-%doc etc/account-server.conf-sample
-%dir %{_initrddir}/%{name}-account
-%dir %attr(0755, swift, swift) %{_localstatedir}/run/swift/account-server
-%dir %{_sysconfdir}/swift/account-server
-%{_bindir}/swift-account-auditor
-%{_bindir}/swift-account-reaper
-%{_bindir}/swift-account-replicator
-%{_bindir}/swift-account-server
-%{python_sitelib}/swift/account
-
-
-%files container
-%defattr(-,root,root,-)
-%doc etc/container-server.conf-sample
-%dir %{_initrddir}/%{name}-container
-%dir %attr(0755, swift, swift) %{_localstatedir}/run/swift/container-server
-%dir %{_sysconfdir}/swift/container-server
-%{_bindir}/swift-container-auditor
-%{_bindir}/swift-container-server
-%{_bindir}/swift-container-replicator
-%{_bindir}/swift-container-updater
-%{_bindir}/swift-container-sync
-%{python_sitelib}/swift/container
-
-%files object
-%defattr(-,root,root,-)
-%doc etc/object-server.conf-sample etc/rsyncd.conf-sample
-%dir %{_initrddir}/%{name}-object
-%dir %attr(0755, swift, swift) %{_localstatedir}/run/swift/object-server
-%dir %{_sysconfdir}/swift/object-server
-%{_bindir}/swift-object-auditor
-%{_bindir}/swift-object-info
-%{_bindir}/swift-object-replicator
-%{_bindir}/swift-object-server
-%{_bindir}/swift-object-updater
-%{python_sitelib}/swift/obj
-
-%files proxy
-%defattr(-,root,root,-)
-%doc etc/proxy-server.conf-sample
-%dir %{_initrddir}/%{name}-proxy
-%dir %attr(0755, swift, swift) %{_localstatedir}/run/swift/proxy-server
-%dir %{_sysconfdir}/swift/proxy-server
-%{_bindir}/swift-proxy-server
-%{python_sitelib}/swift/proxy
-
-%files doc
-%defattr(-,root,root,-)
-%doc LICENSE
-#%doc doc/build/html
-
-%changelog
-* Thu Apr 26 2012 Anthony Towns <atowns@redhat.com> 1.4.8-2
-- Apply gluster patches
-- Rename to gluster-swift
-
-* Thu Mar 22 2012 Alan Pevec <apevec@redhat.com> 1.4.8-1
-- Update to 1.4.8
-
-* Fri Mar 09 2012 Alan Pevec <apevec@redhat.com> 1.4.7-1
-- Update to 1.4.7
-
-* Mon Feb 13 2012 Alan Pevec <apevec@redhat.com> 1.4.6-1
-- Update to 1.4.6
-
-* Thu Jan 12 2012 Alan Pevec <apevec@redhat.com> 1.4.4-2
-- add back /var/run/swift for el6
-
-* Wed Jan 04 2012 Alan Pevec <apevec@redhat.com> 1.4.4-1
-- Use updated parallel install versions of epel packages (pbrady)
-- Ensure the docs aren't built with the system glance module (pbrady)
-- Ensure we don't access the net when building docs (pbrady)
-- Update to 1.4.4
-
-* Wed Nov 23 2011 David Nalley <david@gnsa.us> -1.4.3-2
-* fixed some missing requires
-
-* Sat Nov 05 2011 David Nalley <david@gnsa.us> - 1.4.3-1
-- Update to 1.4.3
-- fix init script add, registration, deletion BZ 685155
-- fixing BR to facilitate epel6 building
-
-* Tue Aug 23 2011 David Nalley <david@gnsa.us> - 1.4.0-2
-- adding uid:gid for bz 732693
-
-* Wed Jun 22 2011 David Nalley <david@gnsa.us> - 1.4.1-1
-- Update to 1.4.0
-- change the name of swift binary from st to swift
-
-* Sat Jun 04 2011 David Nalley <david@gnsa.us> - 1.4.0-1
-- Update to 1.4.0
-
-* Fri May 20 2011 David Nalley <david@gnsa.us> - 1.3.0-1
-- Update to 1.3.0
-
-* Tue Feb 08 2011 Fedora Release Engineering <rel-eng@lists.fedoraproject.org> - 1.1.0-2
-- Rebuilt for https://fedoraproject.org/wiki/Fedora_15_Mass_Rebuild
-
-* Sun Dec 05 2010 Silas Sewell <silas@sewell.ch> - 1.1.0-1
-- Update to 1.1.0
-
-* Sun Aug 08 2010 Silas Sewell <silas@sewell.ch> - 1.0.2-5
-- Update for new Python macro guidelines
-- Use dos2unix instead of sed
-- Make gecos field more descriptive
-
-* Wed Jul 28 2010 Silas Sewell <silas@sewell.ch> - 1.0.2-4
-- Rename to openstack-swift
-
-* Wed Jul 28 2010 Silas Sewell <silas@sewell.ch> - 1.0.2-3
-- Fix return value in swift-functions
-
-* Tue Jul 27 2010 Silas Sewell <silas@sewell.ch> - 1.0.2-2
-- Add swift user
-- Update init scripts
-
-* Sun Jul 18 2010 Silas Sewell <silas@sewell.ch> - 1.0.2-1
-- Initial build
diff --git a/swift/1.4.8/plugins/DiskDir.py b/swift/1.4.8/plugins/DiskDir.py
deleted file mode 100644
index 28671be3cd9..00000000000
--- a/swift/1.4.8/plugins/DiskDir.py
+++ /dev/null
@@ -1,484 +0,0 @@
-# Copyright (c) 2011 Red Hat, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-
-from swift.plugins.utils import clean_metadata, dir_empty, rmdirs, mkdirs, \
- validate_account, validate_container, check_valid_account, is_marker, \
- get_container_details, get_account_details, create_container_metadata, \
- create_account_metadata, DEFAULT_GID, DEFAULT_UID, get_account_details, \
- validate_object, create_object_metadata, read_metadata, write_metadata
-
-from swift.common.constraints import CONTAINER_LISTING_LIMIT, \
- check_mount
-
-from swift.plugins.utils import X_CONTENT_TYPE, X_CONTENT_LENGTH, X_TIMESTAMP,\
- X_PUT_TIMESTAMP, X_TYPE, X_ETAG, X_OBJECTS_COUNT, X_BYTES_USED, \
- X_CONTAINER_COUNT, CONTAINER
-
-from swift import plugins
-def strip_obj_storage_path(path, string='/mnt/gluster-object'):
- """
- strip /mnt/gluster-object
- """
- return path.replace(string, '').strip('/')
-
-DATADIR = 'containers'
-
-
-class DiskCommon(object):
- def is_deleted(self):
- return not os.path.exists(self.datadir)
-
- def filter_prefix(self, objects, prefix):
- """
- Accept sorted list.
- """
- found = 0
- filtered_objs = []
- for object_name in objects:
- if object_name.startswith(prefix):
- filtered_objs.append(object_name)
- found = 1
- else:
- if found:
- break
- return filtered_objs
-
- def filter_delimiter(self, objects, delimiter, prefix):
- """
- Accept sorted list.
- Objects should start with prefix.
- """
- filtered_objs=[]
- for object_name in objects:
- tmp_obj = object_name.replace(prefix, '', 1)
- sufix = tmp_obj.split(delimiter, 1)
- new_obj = prefix + sufix[0]
- if new_obj and new_obj not in filtered_objs:
- filtered_objs.append(new_obj)
-
- return filtered_objs
-
- def filter_marker(self, objects, marker):
- """
- TODO: We can traverse in reverse order to optimize.
- Accept sorted list.
- """
- filtered_objs=[]
- found = 0
- if objects[-1] < marker:
- return filtered_objs
- for object_name in objects:
- if object_name > marker:
- filtered_objs.append(object_name)
-
- return filtered_objs
-
- def filter_end_marker(self, objects, end_marker):
- """
- Accept sorted list.
- """
- filtered_objs=[]
- for object_name in objects:
- if object_name < end_marker:
- filtered_objs.append(object_name)
- else:
- break
-
- return filtered_objs
-
- def filter_limit(self, objects, limit):
- filtered_objs=[]
- for i in range(0, limit):
- filtered_objs.append(objects[i])
-
- return filtered_objs
-
- def update_account(self, metadata):
- acc_path = self.datadir
- write_metadata(acc_path, metadata)
- self.metadata = metadata
-
-class DiskDir(DiskCommon):
- """
- Manage object files on disk.
-
- :param path: path to devices on the node
- :param device: device name
- :param partition: partition on the device the object lives in
- :param account: account name for the object
- :param container: container name for the object
- :param obj: object name for the object
- :param keep_data_fp: if True, don't close the fp, otherwise close it
- :param disk_chunk_Size: size of chunks on file reads
- """
-
- def __init__(self, path, device, partition, account, container, logger,
- uid=DEFAULT_UID, gid=DEFAULT_GID, fs_object=None):
- self.root = path
- device = account
- if container:
- self.name = container
- else:
- self.name = None
- if self.name:
- self.datadir = os.path.join(path, account, self.name)
- else:
- self.datadir = os.path.join(path, device)
- self.account = account
- self.device_path = os.path.join(path, device)
- if not check_mount(path, device):
- check_valid_account(account, fs_object)
- self.logger = logger
- self.metadata = {}
- self.uid = int(uid)
- self.gid = int(gid)
- # Create a dummy db_file in /etc/swift
- self.db_file = '/etc/swift/db_file.db'
- if not os.path.exists(self.db_file):
- file(self.db_file, 'w+')
- self.dir_exists = os.path.exists(self.datadir)
- if self.dir_exists:
- try:
- self.metadata = read_metadata(self.datadir)
- except EOFError:
- create_container_metadata(self.datadir)
- else:
- return
- if container:
- if not self.metadata:
- create_container_metadata(self.datadir)
- self.metadata = read_metadata(self.datadir)
- else:
- if not validate_container(self.metadata):
- create_container_metadata(self.datadir)
- self.metadata = read_metadata(self.datadir)
- else:
- if not self.metadata:
- create_account_metadata(self.datadir)
- self.metadata = read_metadata(self.datadir)
- else:
- if not validate_account(self.metadata):
- create_account_metadata(self.datadir)
- self.metadata = read_metadata(self.datadir)
-
- def empty(self):
- return dir_empty(self.datadir)
-
- def delete(self):
- if self.empty():
- #For delete account.
- if os.path.ismount(self.datadir):
- clean_metadata(self.datadir)
- else:
- rmdirs(self.datadir)
- self.dir_exists = False
-
-
- def put_metadata(self, metadata):
- """
- Write metadata to directory/container.
- """
- write_metadata(self.datadir, metadata)
- self.metadata = metadata
-
- def put(self, metadata):
- """
- Create and write metatdata to directory/container.
- :param metadata: Metadata to write.
- """
- if not self.dir_exists:
- mkdirs(self.datadir)
-
- os.chown(self.datadir, self.uid, self.gid)
- write_metadata(self.datadir, metadata)
- self.metadata = metadata
- self.dir_exists = True
-
- def put_obj(self, content_length, timestamp):
- self.metadata[X_OBJECTS_COUNT] = int(self.metadata[X_OBJECTS_COUNT]) + 1
- self.metadata[X_PUT_TIMESTAMP] = timestamp
- self.metadata[X_BYTES_USED] = int(self.metadata[X_BYTES_USED]) + int(content_length)
- #TODO: define update_metadata instad of writing whole metadata again.
- self.put_metadata(self.metadata)
-
- def delete_obj(self, content_length):
- self.metadata[X_OBJECTS_COUNT] = int(self.metadata[X_OBJECTS_COUNT]) - 1
- self.metadata[X_BYTES_USED] = int(self.metadata[X_BYTES_USED]) - int(content_length)
- self.put_metadata(self.metadata)
-
- def put_container(self, container, put_timestamp, del_timestamp, object_count, bytes_used):
- """
- For account server.
- """
- self.metadata[X_OBJECTS_COUNT] = 0
- self.metadata[X_BYTES_USED] = 0
- self.metadata[X_CONTAINER_COUNT] = int(self.metadata[X_CONTAINER_COUNT]) + 1
- self.metadata[X_PUT_TIMESTAMP] = 1
- self.put_metadata(self.metadata)
-
- def delete_container(self, object_count, bytes_used):
- """
- For account server.
- """
- self.metadata[X_OBJECTS_COUNT] = 0
- self.metadata[X_BYTES_USED] = 0
- self.metadata[X_CONTAINER_COUNT] = int(self.metadata[X_CONTAINER_COUNT]) - 1
- self.put_metadata(self.metadata)
-
- def unlink(self):
- """
- Remove directory/container if empty.
- """
- if dir_empty(self.datadir):
- rmdirs(self.datadir)
-
- def list_objects_iter(self, limit, marker, end_marker,
- prefix, delimiter, path):
- """
- Returns tuple of name, created_at, size, content_type, etag.
- """
- if path:
- prefix = path = path.rstrip('/') + '/'
- delimiter = '/'
- if delimiter and not prefix:
- prefix = ''
-
- objects = []
- object_count = 0
- bytes_used = 0
- container_list = []
-
- objects, object_count, bytes_used = get_container_details(self.datadir)
-
- if int(self.metadata[X_OBJECTS_COUNT]) != object_count or \
- int(self.metadata[X_BYTES_USED]) != bytes_used:
- self.metadata[X_OBJECTS_COUNT] = object_count
- self.metadata[X_BYTES_USED] = bytes_used
- self.update_container(self.metadata)
-
- if objects:
- objects.sort()
-
- if objects and prefix:
- objects = self.filter_prefix(objects, prefix)
-
- if objects and delimiter:
- objects = self.filter_delimiter(objects, delimiter, prefix)
-
- if objects and marker:
- objects = self.filter_marker(objects, marker)
-
- if objects and end_marker:
- objects = self.filter_end_marker(objects, end_marker)
-
- if objects and limit:
- if len(objects) > limit:
- objects = self.filter_limit(objects, limit)
-
- if objects:
- for obj in objects:
- list_item = []
- list_item.append(obj)
- metadata = read_metadata(self.datadir + '/' + obj)
- if not metadata or not validate_object(metadata):
- metadata = create_object_metadata(self.datadir + '/' + obj)
- if metadata:
- list_item.append(metadata[X_TIMESTAMP])
- list_item.append(int(metadata[X_CONTENT_LENGTH]))
- list_item.append(metadata[X_CONTENT_TYPE])
- list_item.append(metadata[X_ETAG])
- container_list.append(list_item)
-
- return container_list
-
- def update_container(self, metadata):
- cont_path = self.datadir
- write_metadata(cont_path, metadata)
- self.metadata = metadata
-
- def update_object_count(self):
- objects = []
- object_count = 0
- bytes_used = 0
- objects, object_count, bytes_used = get_container_details(self.datadir)
-
-
- if int(self.metadata[X_OBJECTS_COUNT]) != object_count or \
- int(self.metadata[X_BYTES_USED]) != bytes_used:
- self.metadata[X_OBJECTS_COUNT] = object_count
- self.metadata[X_BYTES_USED] = bytes_used
- self.update_container(self.metadata)
-
- def update_container_count(self):
- containers = []
- container_count = 0
-
- containers, container_count = get_account_details(self.datadir)
-
- if int(self.metadata[X_CONTAINER_COUNT]) != container_count:
- self.metadata[X_CONTAINER_COUNT] = container_count
- self.update_account(self.metadata)
-
- def get_info(self, include_metadata=False):
- """
- Get global data for the container.
- :returns: dict with keys: account, container, created_at,
- put_timestamp, delete_timestamp, object_count, bytes_used,
- reported_put_timestamp, reported_delete_timestamp,
- reported_object_count, reported_bytes_used, hash, id,
- x_container_sync_point1, and x_container_sync_point2.
- If include_metadata is set, metadata is included as a key
- pointing to a dict of tuples of the metadata
- """
- # TODO: delete_timestamp, reported_put_timestamp
- # reported_delete_timestamp, reported_object_count,
- # reported_bytes_used, created_at
-
- metadata = {}
- if os.path.exists(self.datadir):
- metadata = read_metadata(self.datadir)
-
- data = {'account' : self.account, 'container' : self.name,
- 'object_count' : metadata.get(X_OBJECTS_COUNT, '0'),
- 'bytes_used' : metadata.get(X_BYTES_USED, '0'),
- 'hash': '', 'id' : '', 'created_at' : '1',
- 'put_timestamp' : metadata.get(X_PUT_TIMESTAMP, '0'),
- 'delete_timestamp' : '1',
- 'reported_put_timestamp' : '1', 'reported_delete_timestamp' : '1',
- 'reported_object_count' : '1', 'reported_bytes_used' : '1'}
- if include_metadata:
- data['metadata'] = metadata
- return data
-
- def put_object(self, name, timestamp, size, content_type,
- etag, deleted=0):
- # TODO: Implement the specifics of this func.
- pass
-
- def initialize(self, timestamp):
- pass
-
- def update_put_timestamp(self, timestamp):
- """
- Create the container if it doesn't exist and update the timestamp
- """
- if not os.path.exists(self.datadir):
- self.put(self.metadata)
-
- def delete_object(self, name, timestamp):
- # TODO: Implement the delete object
- pass
-
- def delete_db(self, timestamp):
- """
- Delete the container
- """
- self.unlink()
-
- def update_metadata(self, metadata):
- self.metadata.update(metadata)
- write_metadata(self.datadir, self.metadata)
-
-
-class DiskAccount(DiskDir):
- def __init__(self, root, account, fs_object = None):
- self.root = root
- self.account = account
- self.datadir = os.path.join(self.root, self.account)
- if not check_mount(root, account):
- check_valid_account(account, fs_object)
- self.metadata = read_metadata(self.datadir)
- if not self.metadata or not validate_account(self.metadata):
- self.metadata = create_account_metadata(self.datadir)
-
- def list_containers_iter(self, limit, marker, end_marker,
- prefix, delimiter):
- """
- Return tuple of name, object_count, bytes_used, 0(is_subdir).
- Used by account server.
- """
- if delimiter and not prefix:
- prefix = ''
- containers = []
- container_count = 0
- account_list = []
-
- containers, container_count = get_account_details(self.datadir)
-
- if int(self.metadata[X_CONTAINER_COUNT]) != container_count:
- self.metadata[X_CONTAINER_COUNT] = container_count
- self.update_account(self.metadata)
-
- if containers:
- containers.sort()
-
- if containers and prefix:
- containers = self.filter_prefix(containers, prefix)
-
- if containers and delimiter:
- containers = self.filter_delimiter(containers, delimiter, prefix)
-
- if containers and marker:
- containers = self.filter_marker(containers, marker)
-
- if containers and end_marker:
- containers = self.filter_end_marker(containers, end_marker)
-
- if containers and limit:
- if len(containers) > limit:
- containers = self.filter_limit(containers, limit)
-
- if containers:
- for cont in containers:
- list_item = []
- metadata = None
- list_item.append(cont)
- metadata = read_metadata(self.datadir + '/' + cont)
- if not metadata or not validate_container(metadata):
- metadata = create_container_metadata(self.datadir + '/' + cont)
-
- if metadata:
- list_item.append(metadata[X_OBJECTS_COUNT])
- list_item.append(metadata[X_BYTES_USED])
- list_item.append(0)
- account_list.append(list_item)
-
- return account_list
-
- def get_info(self, include_metadata=False):
- """
- Get global data for the account.
- :returns: dict with keys: account, created_at, put_timestamp,
- delete_timestamp, container_count, object_count,
- bytes_used, hash, id
- """
- metadata = {}
- if (os.path.exists(self.datadir)):
- metadata = read_metadata(self.datadir)
- if not metadata:
- metadata = create_account_metadata(self.datadir)
-
- data = {'account' : self.account, 'created_at' : '1',
- 'put_timestamp' : '1', 'delete_timestamp' : '1',
- 'container_count' : metadata.get(X_CONTAINER_COUNT, 0),
- 'object_count' : metadata.get(X_OBJECTS_COUNT, 0),
- 'bytes_used' : metadata.get(X_BYTES_USED, 0),
- 'hash' : '', 'id' : ''}
-
- if include_metadata:
- data['metadata'] = metadata
- return data
diff --git a/swift/1.4.8/plugins/DiskFile.py b/swift/1.4.8/plugins/DiskFile.py
deleted file mode 100644
index 6f77eaaa57a..00000000000
--- a/swift/1.4.8/plugins/DiskFile.py
+++ /dev/null
@@ -1,316 +0,0 @@
-# Copyright (c) 2011 Red Hat, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-from eventlet import tpool
-from tempfile import mkstemp
-from contextlib import contextmanager
-from swift.common.utils import normalize_timestamp, renamer
-from swift.plugins.utils import mkdirs, rmdirs, validate_object, \
- check_valid_account, create_object_metadata, do_open, \
- do_close, do_unlink, do_chown, do_stat, do_listdir, read_metadata,\
- write_metadata
-from swift.common.constraints import check_mount
-from swift.plugins.utils import X_CONTENT_TYPE, X_CONTENT_LENGTH, X_TIMESTAMP,\
- X_PUT_TIMESTAMP, X_TYPE, X_ETAG, X_OBJECTS_COUNT, X_BYTES_USED, \
- X_OBJECT_TYPE, FILE, DIR, MARKER_DIR, OBJECT, \
- DIR_TYPE, FILE_TYPE, DEFAULT_UID, DEFAULT_GID
-
-import logging
-from swift.obj.server import DiskFile
-
-
-DATADIR = 'objects'
-ASYNCDIR = 'async_pending'
-KEEP_CACHE_SIZE = (5 * 1024 * 1024)
-# keep these lower-case
-DISALLOWED_HEADERS = set('content-length content-type deleted etag'.split())
-
-
-class Gluster_DiskFile(DiskFile):
- """
- Manage object files on disk.
-
- :param path: path to devices on the node/mount path for UFO.
- :param device: device name/account_name for UFO.
- :param partition: partition on the device the object lives in
- :param account: account name for the object
- :param container: container name for the object
- :param obj: object name for the object
- :param keep_data_fp: if True, don't close the fp, otherwise close it
- :param disk_chunk_Size: size of chunks on file reads
- """
-
- def __init__(self, path, device, partition, account, container, obj,
- logger, keep_data_fp=False, disk_chunk_size=65536,
- uid=DEFAULT_UID, gid=DEFAULT_GID, fs_object = None):
- self.disk_chunk_size = disk_chunk_size
- device = account
- #Don't support obj_name ending/begining with '/', like /a, a/, /a/b/ etc
- obj = obj.strip('/')
- if '/' in obj:
- self.obj_path, self.obj = obj.rsplit('/', 1)
- else:
- self.obj_path = ''
- self.obj = obj
-
- if self.obj_path:
- self.name = '/'.join((container, self.obj_path))
- else:
- self.name = container
- #Absolute path for obj directory.
- self.datadir = os.path.join(path, device, self.name)
-
- self.device_path = os.path.join(path, device)
- if not check_mount(path, device):
- check_valid_account(account, fs_object)
-
- self.container_path = os.path.join(path, device, container)
- self.tmpdir = os.path.join(path, device, 'tmp')
- self.logger = logger
- self.metadata = {}
- self.meta_file = None
- self.data_file = None
- self.fp = None
- self.iter_etag = None
- self.started_at_0 = False
- self.read_to_eof = False
- self.quarantined_dir = None
- self.keep_cache = False
- self.is_dir = False
- self.is_valid = True
- self.uid = int(uid)
- self.gid = int(gid)
- if not os.path.exists(self.datadir + '/' + self.obj):
- return
-
- self.data_file = os.path.join(self.datadir, self.obj)
- self.metadata = read_metadata(self.datadir + '/' + self.obj)
- if not self.metadata:
- create_object_metadata(self.datadir + '/' + self.obj)
- self.metadata = read_metadata(self.datadir + '/' + self.obj)
-
- if not validate_object(self.metadata):
- create_object_metadata(self.datadir + '/' + self.obj)
- self.metadata = read_metadata(self.datadir + '/' +
- self.obj)
-
- self.filter_metadata()
-
- if os.path.isdir(self.datadir + '/' + self.obj):
- self.is_dir = True
- else:
- self.fp = do_open(self.data_file, 'rb')
- if not keep_data_fp:
- self.close(verify_file=False)
-
- def close(self, verify_file=True):
- """
- Close the file. Will handle quarantining file if necessary.
-
- :param verify_file: Defaults to True. If false, will not check
- file to see if it needs quarantining.
- """
- #Marker directory
- if self.is_dir:
- return
- if self.fp:
- do_close(self.fp)
- self.fp = None
-
- def is_deleted(self):
- """
- Check if the file is deleted.
-
- :returns: True if the file doesn't exist or has been flagged as
- deleted.
- """
- return not self.data_file
-
- def create_dir_object(self, dir_path):
- #TODO: if object already exists???
- if os.path.exists(dir_path) and not os.path.isdir(dir_path):
- self.logger.error("Deleting file %s", dir_path)
- do_unlink(dir_path)
- #If dir aleady exist just override metadata.
- mkdirs(dir_path)
- do_chown(dir_path, self.uid, self.gid)
- create_object_metadata(dir_path)
- return True
-
-
-
- def put_metadata(self, metadata):
- obj_path = self.datadir + '/' + self.obj
- write_metadata(obj_path, metadata)
- self.metadata = metadata
-
-
- def put(self, fd, tmppath, metadata, extension=''):
- """
- Finalize writing the file on disk, and renames it from the temp file to
- the real location. This should be called after the data has been
- written to the temp file.
-
- :params fd: file descriptor of the temp file
- :param tmppath: path to the temporary file being used
- :param metadata: dictionary of metadata to be written
- :param extention: extension to be used when making the file
- """
- #Marker dir.
- if extension == '.ts':
- return True
- if extension == '.meta':
- self.put_metadata(metadata)
- return True
- else:
- extension = ''
- if metadata[X_OBJECT_TYPE] == MARKER_DIR:
- self.create_dir_object(os.path.join(self.datadir, self.obj))
- self.put_metadata(metadata)
- self.data_file = self.datadir + '/' + self.obj
- return True
- #Check if directory already exists.
- if self.is_dir:
- self.logger.error('Directory already exists %s/%s' % \
- (self.datadir , self.obj))
- return False
- #metadata['name'] = self.name
- timestamp = normalize_timestamp(metadata[X_TIMESTAMP])
- write_metadata(tmppath, metadata)
- if X_CONTENT_LENGTH in metadata:
- self.drop_cache(fd, 0, int(metadata[X_CONTENT_LENGTH]))
- tpool.execute(os.fsync, fd)
- if self.obj_path:
- dir_objs = self.obj_path.split('/')
- tmp_path = ''
- if len(dir_objs):
- for dir_name in dir_objs:
- if tmp_path:
- tmp_path = tmp_path + '/' + dir_name
- else:
- tmp_path = dir_name
- if not self.create_dir_object(os.path.join(self.container_path,
- tmp_path)):
- self.logger.error("Failed in subdir %s",\
- os.path.join(self.container_path,tmp_path))
- return False
-
- renamer(tmppath, os.path.join(self.datadir,
- self.obj + extension))
- do_chown(os.path.join(self.datadir, self.obj + extension), \
- self.uid, self.gid)
- self.metadata = metadata
- #self.logger.error("Meta %s", self.metadata)
- self.data_file = self.datadir + '/' + self.obj + extension
- return True
-
-
- def unlinkold(self, timestamp):
- """
- Remove any older versions of the object file. Any file that has an
- older timestamp than timestamp will be deleted.
-
- :param timestamp: timestamp to compare with each file
- """
- if self.metadata and self.metadata['X-Timestamp'] != timestamp:
- self.unlink()
-
- def unlink(self):
- """
- Remove the file.
- """
- #Marker dir.
- if self.is_dir:
- rmdirs(os.path.join(self.datadir, self.obj))
- if not os.path.isdir(os.path.join(self.datadir, self.obj)):
- self.metadata = {}
- self.data_file = None
- else:
- logging.error('Unable to delete dir %s' % os.path.join(self.datadir, self.obj))
- return
-
- for fname in do_listdir(self.datadir):
- if fname == self.obj:
- try:
- do_unlink(os.path.join(self.datadir, fname))
- except OSError, err:
- if err.errno != errno.ENOENT:
- raise
-
- #Remove entire path for object.
- #remove_dir_path(self.obj_path, self.container_path)
-
- self.metadata = {}
- self.data_file = None
-
- def get_data_file_size(self):
- """
- Returns the os.path.getsize for the file. Raises an exception if this
- file does not match the Content-Length stored in the metadata. Or if
- self.data_file does not exist.
-
- :returns: file size as an int
- :raises DiskFileError: on file size mismatch.
- :raises DiskFileNotExist: on file not existing (including deleted)
- """
- #Marker directory.
- if self.is_dir:
- return 0
- try:
- file_size = 0
- if self.data_file:
- file_size = os.path.getsize(self.data_file)
- if X_CONTENT_LENGTH in self.metadata:
- metadata_size = int(self.metadata[X_CONTENT_LENGTH])
- if file_size != metadata_size:
- self.metadata[X_CONTENT_LENGTH] = file_size
- self.update_object(self.metadata)
-
- return file_size
- except OSError, err:
- if err.errno != errno.ENOENT:
- raise
- raise DiskFileNotExist('Data File does not exist.')
-
- def update_object(self, metadata):
- obj_path = self.datadir + '/' + self.obj
- write_metadata(obj_path, metadata)
- self.metadata = metadata
-
- def filter_metadata(self):
- if X_TYPE in self.metadata:
- self.metadata.pop(X_TYPE)
- if X_OBJECT_TYPE in self.metadata:
- self.metadata.pop(X_OBJECT_TYPE)
-
- @contextmanager
- def mkstemp(self):
- """Contextmanager to make a temporary file."""
-
- if not os.path.exists(self.tmpdir):
- mkdirs(self.tmpdir)
- fd, tmppath = mkstemp(dir=self.tmpdir)
- try:
- yield fd, tmppath
- finally:
- try:
- os.close(fd)
- except OSError:
- pass
- try:
- os.unlink(tmppath)
- except OSError:
- pass
diff --git a/swift/1.4.8/plugins/Glusterfs.py b/swift/1.4.8/plugins/Glusterfs.py
deleted file mode 100644
index 5e191e1bd8f..00000000000
--- a/swift/1.4.8/plugins/Glusterfs.py
+++ /dev/null
@@ -1,131 +0,0 @@
-# Copyright (c) 2011 Red Hat, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import logging
-import os, fcntl, time
-from ConfigParser import ConfigParser
-from swift.common.utils import TRUE_VALUES
-from hashlib import md5
-from swift.plugins.utils import mkdirs
-
-class Glusterfs(object):
- def __init__(self):
- self.name = 'glusterfs'
- self.fs_conf = ConfigParser()
- self.fs_conf.read(os.path.join('/etc/swift', 'fs.conf'))
- self.mount_path = self.fs_conf.get('DEFAULT', 'mount_path', '/mnt/gluster-object')
- self.auth_account = self.fs_conf.get('DEFAULT', 'auth_account', 'auth')
- self.mount_ip = self.fs_conf.get('DEFAULT', 'mount_ip', 'localhost')
- self.remote_cluster = self.fs_conf.get('DEFAULT', 'remote_cluster', False) in TRUE_VALUES
- self.object_only = self.fs_conf.get('DEFAULT', 'object_only', "no") in TRUE_VALUES
-
- def busy_wait(self, mount_path):
- # Iterate for definite number of time over a given
- # interval for successful mount
- for i in range(0, 5):
- if os.path.ismount(os.path.join(mount_path)):
- return True
- time.sleep(2)
- return False
-
- def mount(self, account):
- mount_path = os.path.join(self.mount_path, account)
- export = self.get_export_from_account_id(account)
-
- pid_dir = "/var/lib/glusterd/vols/%s/run/" %export
- pid_file = os.path.join(pid_dir, 'swift.pid');
-
- if not os.path.exists(pid_dir):
- mkdirs(pid_dir)
-
- fd = os.open(pid_file, os.O_CREAT|os.O_RDWR)
- with os.fdopen(fd, 'r+b') as f:
- try:
- fcntl.lockf(f, fcntl.LOCK_EX|fcntl.LOCK_NB)
- except:
- ex = sys.exc_info()[1]
- if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
- # This means that some other process is mounting the
- # filesystem, so wait for the mount process to complete
- return self.busy_wait(mount_path)
-
- mnt_cmd = 'mount -t glusterfs %s:%s %s' % (self.mount_ip, export, \
- mount_path)
- if os.system(mnt_cmd) or not self.busy_wait(mount_path):
- raise Exception('Mount failed %s: %s' % (self.name, mnt_cmd))
- return False
- return True
-
- def unmount(self, mount_path):
- umnt_cmd = 'umount %s 2>> /dev/null' % mount_path
- if os.system(umnt_cmd):
- logging.error('Unable to unmount %s %s' % (mount_path, self.name))
-
- def get_export_list_local(self):
- export_list = []
- cmnd = 'gluster volume info'
-
- if os.system(cmnd + ' >> /dev/null'):
- raise Exception('Getting volume failed %s', self.name)
- return export_list
-
- fp = os.popen(cmnd)
- while True:
- item = fp.readline()
- if not item:
- break
- item = item.strip('\n').strip(' ')
- if item.lower().startswith('volume name:'):
- export_list.append(item.split(':')[1].strip(' '))
-
- return export_list
-
-
- def get_export_list_remote(self):
- export_list = []
- cmnd = 'ssh %s gluster volume info' % self.mount_ip
-
- if os.system(cmnd + ' >> /dev/null'):
- raise Exception('Getting volume info failed %s, make sure to have \
- passwordless ssh on %s', self.name, self.mount_ip)
- return export_list
-
- fp = os.popen(cmnd)
- while True:
- item = fp.readline()
- if not item:
- break
- item = item.strip('\n').strip(' ')
- if item.lower().startswith('volume name:'):
- export_list.append(item.split(':')[1].strip(' '))
-
- return export_list
-
- def get_export_list(self):
- if self.remote_cluster:
- return self.get_export_list_remote()
- else:
- return self.get_export_list_local()
-
- def get_export_from_account_id(self, account):
- if not account:
- print 'account is none, returning'
- raise AttributeError
-
- for export in self.get_export_list():
- if account == 'AUTH_' + export:
- return export
-
- raise Exception('No export found %s %s' % (account, self.name))
- return None
diff --git a/swift/1.4.8/plugins/__init__.py b/swift/1.4.8/plugins/__init__.py
deleted file mode 100644
index 3d98c960c14..00000000000
--- a/swift/1.4.8/plugins/__init__.py
+++ /dev/null
@@ -1,16 +0,0 @@
-# Copyright (c) 2011 Red Hat, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from Glusterfs import Glusterfs
diff --git a/swift/1.4.8/plugins/conf/account-server/1.conf b/swift/1.4.8/plugins/conf/account-server/1.conf
deleted file mode 100644
index 54cbf654003..00000000000
--- a/swift/1.4.8/plugins/conf/account-server/1.conf
+++ /dev/null
@@ -1,22 +0,0 @@
-[DEFAULT]
-devices = /srv/1/node
-mount_check = false
-bind_port = 6012
-user = root
-log_facility = LOG_LOCAL2
-
-[pipeline:main]
-pipeline = gluster account-server
-
-[app:account-server]
-use = egg:swift#account
-
-[filter:gluster]
-use = egg:swift#gluster
-
-[account-replicator]
-vm_test_mode = yes
-
-[account-auditor]
-
-[account-reaper]
diff --git a/swift/1.4.8/plugins/conf/account.builder b/swift/1.4.8/plugins/conf/account.builder
deleted file mode 100644
index 2943b9cfb49..00000000000
--- a/swift/1.4.8/plugins/conf/account.builder
+++ /dev/null
Binary files differ
diff --git a/swift/1.4.8/plugins/conf/account.ring.gz b/swift/1.4.8/plugins/conf/account.ring.gz
deleted file mode 100644
index e1a5e6ae25e..00000000000
--- a/swift/1.4.8/plugins/conf/account.ring.gz
+++ /dev/null
Binary files differ
diff --git a/swift/1.4.8/plugins/conf/container-server/1.conf b/swift/1.4.8/plugins/conf/container-server/1.conf
deleted file mode 100644
index 9e776838f58..00000000000
--- a/swift/1.4.8/plugins/conf/container-server/1.conf
+++ /dev/null
@@ -1,24 +0,0 @@
-[DEFAULT]
-devices = /srv/1/node
-mount_check = false
-bind_port = 6011
-user = root
-log_facility = LOG_LOCAL2
-
-[pipeline:main]
-pipeline = gluster container-server
-
-[app:container-server]
-use = egg:swift#container
-
-[filter:gluster]
-use = egg:swift#gluster
-
-[container-replicator]
-vm_test_mode = yes
-
-[container-updater]
-
-[container-auditor]
-
-[container-sync]
diff --git a/swift/1.4.8/plugins/conf/container.builder b/swift/1.4.8/plugins/conf/container.builder
deleted file mode 100644
index 6031d79df60..00000000000
--- a/swift/1.4.8/plugins/conf/container.builder
+++ /dev/null
Binary files differ
diff --git a/swift/1.4.8/plugins/conf/container.ring.gz b/swift/1.4.8/plugins/conf/container.ring.gz
deleted file mode 100644
index fdbcb18b2bf..00000000000
--- a/swift/1.4.8/plugins/conf/container.ring.gz
+++ /dev/null
Binary files differ
diff --git a/swift/1.4.8/plugins/conf/fs.conf b/swift/1.4.8/plugins/conf/fs.conf
deleted file mode 100644
index b6ec5121f9f..00000000000
--- a/swift/1.4.8/plugins/conf/fs.conf
+++ /dev/null
@@ -1,9 +0,0 @@
-[DEFAULT]
-mount_path = /mnt/gluster-object
-auth_account = auth
-#ip of the fs server.
-mount_ip = localhost
-#fs server need not be local, remote server can also be used,
-#set remote_cluster=yes for using remote server.
-remote_cluster = no
-object_only = no \ No newline at end of file
diff --git a/swift/1.4.8/plugins/conf/object-server/1.conf b/swift/1.4.8/plugins/conf/object-server/1.conf
deleted file mode 100644
index f191cefcf01..00000000000
--- a/swift/1.4.8/plugins/conf/object-server/1.conf
+++ /dev/null
@@ -1,22 +0,0 @@
-[DEFAULT]
-devices = /srv/1/node
-mount_check = false
-bind_port = 6010
-user = root
-log_facility = LOG_LOCAL2
-
-[pipeline:main]
-pipeline = gluster object-server
-
-[app:object-server]
-use = egg:swift#object
-
-[filter:gluster]
-use = egg:swift#gluster
-
-[object-replicator]
-vm_test_mode = yes
-
-[object-updater]
-
-[object-auditor]
diff --git a/swift/1.4.8/plugins/conf/object.builder b/swift/1.4.8/plugins/conf/object.builder
deleted file mode 100644
index ce45350595d..00000000000
--- a/swift/1.4.8/plugins/conf/object.builder
+++ /dev/null
Binary files differ
diff --git a/swift/1.4.8/plugins/conf/object.ring.gz b/swift/1.4.8/plugins/conf/object.ring.gz
deleted file mode 100644
index 73e88d58918..00000000000
--- a/swift/1.4.8/plugins/conf/object.ring.gz
+++ /dev/null
Binary files differ
diff --git a/swift/1.4.8/plugins/conf/proxy-server.conf b/swift/1.4.8/plugins/conf/proxy-server.conf
deleted file mode 100644
index 1fcde8e0d3a..00000000000
--- a/swift/1.4.8/plugins/conf/proxy-server.conf
+++ /dev/null
@@ -1,21 +0,0 @@
-[DEFAULT]
-bind_port = 8080
-user = root
-log_facility = LOG_LOCAL1
-
-[pipeline:main]
-pipeline = healthcheck cache tempauth proxy-server
-
-[app:proxy-server]
-use = egg:swift#proxy
-allow_account_management = true
-account_autocreate = true
-
-[filter:tempauth]
-use = egg:swift#tempauth
-
-[filter:healthcheck]
-use = egg:swift#healthcheck
-
-[filter:cache]
-use = egg:swift#memcache
diff --git a/swift/1.4.8/plugins/conf/swift.conf b/swift/1.4.8/plugins/conf/swift.conf
deleted file mode 100644
index f9864e35231..00000000000
--- a/swift/1.4.8/plugins/conf/swift.conf
+++ /dev/null
@@ -1,7 +0,0 @@
-[DEFAULT]
-Enable_plugin = yes
-
-[swift-hash]
-# random unique string that can never change (DO NOT LOSE)
-swift_hash_path_suffix = gluster
-
diff --git a/swift/1.4.8/plugins/constraints.py b/swift/1.4.8/plugins/constraints.py
deleted file mode 100644
index 6be853629fb..00000000000
--- a/swift/1.4.8/plugins/constraints.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) 2011 Red Hat, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import os
-import logging
-from swift.common.constraints import check_utf8, check_metadata
-
-from webob.exc import HTTPBadRequest, HTTPLengthRequired, \
- HTTPRequestEntityTooLarge
-
-
-#: Max file size allowed for objects
-MAX_FILE_SIZE = 0xffffffffffffffff
-#: Max length of the name of a key for metadata
-MAX_META_NAME_LENGTH = 128
-#: Max length of the value of a key for metadata
-MAX_META_VALUE_LENGTH = 256
-#: Max number of metadata items
-MAX_META_COUNT = 90
-#: Max overall size of metadata
-MAX_META_OVERALL_SIZE = 4096
-#: Max object name length
-MAX_OBJECT_NAME_LENGTH = 255
-#: Max object list length of a get request for a container
-CONTAINER_LISTING_LIMIT = 10000
-#: Max container list length of a get request for an account
-ACCOUNT_LISTING_LIMIT = 10000
-MAX_ACCOUNT_NAME_LENGTH = 255
-MAX_CONTAINER_NAME_LENGTH = 255
-
-def validate_obj_name(obj):
- if len(obj) > MAX_OBJECT_NAME_LENGTH:
- logging.error('Object name too long %s' % obj)
- return False
- if obj == '.' or obj == '..':
- logging.error('Object name cannot be . or .. %s' % obj)
- return False
-
- return True
-
-def check_object_creation(req, object_name):
- """
- Check to ensure that everything is alright about an object to be created.
-
- :param req: HTTP request object
- :param object_name: name of object to be created
- :raises HTTPRequestEntityTooLarge: the object is too large
- :raises HTTPLengthRequered: missing content-length header and not
- a chunked request
- :raises HTTPBadRequest: missing or bad content-type header, or
- bad metadata
- """
- if req.content_length and req.content_length > MAX_FILE_SIZE:
- return HTTPRequestEntityTooLarge(body='Your request is too large.',
- request=req, content_type='text/plain')
- if req.content_length is None and \
- req.headers.get('transfer-encoding') != 'chunked':
- return HTTPLengthRequired(request=req)
- if 'X-Copy-From' in req.headers and req.content_length:
- return HTTPBadRequest(body='Copy requests require a zero byte body',
- request=req, content_type='text/plain')
- for obj in object_name.split('/'):
- if not validate_obj_name(obj):
- return HTTPBadRequest(body='Invalid object name %s' %
- (obj), request=req,
- content_type='text/plain')
- if 'Content-Type' not in req.headers:
- return HTTPBadRequest(request=req, content_type='text/plain',
- body='No content type')
- if not check_utf8(req.headers['Content-Type']):
- return HTTPBadRequest(request=req, body='Invalid Content-Type',
- content_type='text/plain')
- if 'x-object-manifest' in req.headers:
- value = req.headers['x-object-manifest']
- container = prefix = None
- try:
- container, prefix = value.split('/', 1)
- except ValueError:
- pass
- if not container or not prefix or '?' in value or '&' in value or \
- prefix[0] == '/':
- return HTTPBadRequest(request=req,
- body='X-Object-Manifest must in the format container/prefix')
- return check_metadata(req, 'object')
-
diff --git a/swift/1.4.8/plugins/utils.py b/swift/1.4.8/plugins/utils.py
deleted file mode 100644
index d8fc7a43615..00000000000
--- a/swift/1.4.8/plugins/utils.py
+++ /dev/null
@@ -1,679 +0,0 @@
-# Copyright (c) 2011 Red Hat, Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-# implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import logging
-import os
-import errno
-from hashlib import md5
-from swift.common.utils import normalize_timestamp
-from xattr import setxattr, removexattr, getxattr, removexattr
-import cPickle as pickle
-
-X_CONTENT_TYPE = 'Content-Type'
-X_CONTENT_LENGTH = 'Content-Length'
-X_TIMESTAMP = 'X-Timestamp'
-X_PUT_TIMESTAMP = 'X-PUT-Timestamp'
-X_TYPE = 'X-Type'
-X_ETAG = 'ETag'
-X_OBJECTS_COUNT = 'X-Object-Count'
-X_BYTES_USED = 'X-Bytes-Used'
-X_CONTAINER_COUNT = 'X-Container-Count'
-X_OBJECT_TYPE = 'X-Object-Type'
-DIR_TYPE = 'application/directory'
-ACCOUNT = 'Account'
-MOUNT_PATH = '/mnt/gluster-object'
-METADATA_KEY = 'user.swift.metadata'
-CONTAINER = 'container'
-DIR = 'dir'
-MARKER_DIR = 'marker_dir'
-FILE = 'file'
-DIR_TYPE = 'application/directory'
-FILE_TYPE = 'application/octet-stream'
-OBJECT = 'Object'
-OBJECT_TYPE = 'application/octet-stream'
-DEFAULT_UID = -1
-DEFAULT_GID = -1
-PICKLE_PROTOCOL = 2
-CHUNK_SIZE = 65536
-
-
-def mkdirs(path):
- """
- Ensures the path is a directory or makes it if not. Errors if the path
- exists but is a file or on permissions failure.
-
- :param path: path to create
- """
- if not os.path.isdir(path):
- try:
- do_makedirs(path)
- except OSError, err:
- #TODO: check, isdir will fail if mounted and volume stopped.
- #if err.errno != errno.EEXIST or not os.path.isdir(path)
- if err.errno != errno.EEXIST:
- raise
-
-def rmdirs(path):
- if os.path.isdir(path) and dir_empty(path):
- do_rmdir(path)
- else:
- logging.error("rmdirs failed dir may not be empty or not valid dir")
- return False
-
-def strip_obj_storage_path(path, string='/mnt/gluster-object'):
- """
- strip /mnt/gluster-object
- """
- return path.replace(string, '').strip('/')
-
-def do_mkdir(path):
- try:
- os.mkdir(path)
- except Exception, err:
- logging.exception("Mkdir failed on %s err: %s", path, str(err))
- if err.errno != errno.EEXIST:
- raise
- return True
-
-def do_makedirs(path):
- try:
- os.makedirs(path)
- except Exception, err:
- logging.exception("Makedirs failed on %s err: %s", path, str(err))
- if err.errno != errno.EEXIST:
- raise
- return True
-
-
-def do_listdir(path):
- try:
- buf = os.listdir(path)
- except Exception, err:
- logging.exception("Listdir failed on %s err: %s", path, str(err))
- raise
- return buf
-
-def do_chown(path, uid, gid):
- try:
- os.chown(path, uid, gid)
- except Exception, err:
- logging.exception("Chown failed on %s err: %s", path, str(err))
- raise
- return True
-
-def do_stat(path):
- try:
- #Check for fd.
- if isinstance(path, int):
- buf = os.fstat(path)
- else:
- buf = os.stat(path)
- except Exception, err:
- logging.exception("Stat failed on %s err: %s", path, str(err))
- raise
-
- return buf
-
-def do_open(path, mode):
- try:
- fd = open(path, mode)
- except Exception, err:
- logging.exception("Open failed on %s err: %s", path, str(err))
- raise
- return fd
-
-def do_close(fd):
- #fd could be file or int type.
- try:
- if isinstance(fd, int):
- os.close(fd)
- else:
- fd.close()
- except Exception, err:
- logging.exception("Close failed on %s err: %s", fd, str(err))
- raise
- return True
-
-def do_unlink(path, log = True):
- try:
- os.unlink(path)
- except Exception, err:
- if log:
- logging.exception("Unlink failed on %s err: %s", path, str(err))
- if err.errno != errno.ENOENT:
- raise
- return True
-
-def do_rmdir(path):
- try:
- os.rmdir(path)
- except Exception, err:
- logging.exception("Rmdir failed on %s err: %s", path, str(err))
- if err.errno != errno.ENOENT:
- raise
- return True
-
-def do_rename(old_path, new_path):
- try:
- os.rename(old_path, new_path)
- except Exception, err:
- logging.exception("Rename failed on %s to %s err: %s", old_path, new_path, \
- str(err))
- raise
- return True
-
-def do_setxattr(path, key, value):
- fd = None
- if not os.path.isdir(path):
- fd = do_open(path, 'rb')
- else:
- fd = path
- if fd or os.path.isdir(path):
- try:
- setxattr(fd, key, value)
- except Exception, err:
- logging.exception("setxattr failed on %s key %s err: %s", path, key, str(err))
- raise
- finally:
- if fd and not os.path.isdir(path):
- do_close(fd)
- else:
- logging.error("Open failed path %s", path)
- return False
- return True
-
-
-
-def do_getxattr(path, key, log = True):
- fd = None
- if not os.path.isdir(path):
- fd = do_open(path, 'rb')
- else:
- fd = path
- if fd or os.path.isdir(path):
- try:
- value = getxattr(fd, key)
- except Exception, err:
- if log:
- logging.exception("getxattr failed on %s key %s err: %s", path, key, str(err))
- raise
- finally:
- if fd and not os.path.isdir(path):
- do_close(fd)
- else:
- logging.error("Open failed path %s", path)
- return False
- return value
-
-def do_removexattr(path, key):
- fd = None
- if not os.path.isdir(path):
- fd = do_open(path, 'rb')
- else:
- fd = path
- if fd or os.path.isdir(path):
- try:
- removexattr(fd, key)
- except Exception, err:
- logging.exception("removexattr failed on %s key %s err: %s", path, key, str(err))
- raise
- finally:
- if fd and not os.path.isdir(path):
- do_close(fd)
- else:
- logging.error("Open failed path %s", path)
- return False
- return True
-
-def read_metadata(path):
- """
- Helper function to read the pickled metadata from a File/Directory .
-
- :param path: File/Directory to read metadata from.
-
- :returns: dictionary of metadata
- """
- metadata = ''
- key = 0
- while True:
- try:
- metadata += do_getxattr(path, '%s%s' % (METADATA_KEY, (key or '')),
- log = False)
- except Exception:
- break
- key += 1
- if metadata:
- return pickle.loads(metadata)
- else:
- return {}
-
-
-def write_metadata(path, metadata):
- """
- Helper function to write pickled metadata for a File/Directory.
-
- :param path: File/Directory path to write the metadata
- :param metadata: metadata to write
- """
- metastr = pickle.dumps(metadata, PICKLE_PROTOCOL)
- key = 0
- while metastr:
- do_setxattr(path, '%s%s' % (METADATA_KEY, key or ''), metastr[:254])
- metastr = metastr[254:]
- key += 1
-
-def clean_metadata(path):
- key = 0
- while True:
- value = do_getxattr(path, '%s%s' % (METADATA_KEY, (key or '')))
- do_removexattr(path, '%s%s' % (METADATA_KEY, (key or '')))
- key += 1
-
-
-def dir_empty(path):
- """
- Return true if directory/container is empty.
- :param path: Directory path.
- :returns: True/False.
- """
- if os.path.isdir(path):
- try:
- files = do_listdir(path)
- except Exception, err:
- logging.exception("listdir failed on %s err: %s", path, str(err))
- raise
- if not files:
- return True
- else:
- return False
- else:
- if not os.path.exists(path):
- return True
-
-
-def get_device_from_account(account):
- if account.startswith(RESELLER_PREFIX):
- device = account.replace(RESELLER_PREFIX, '', 1)
- return device
-
-def check_user_xattr(path):
- if not os.path.exists(path):
- return False
- do_setxattr(path, 'user.test.key1', 'value1')
- try:
- removexattr(path, 'user.test.key1')
- except Exception, err:
- logging.exception("removexattr failed on %s err: %s", path, str(err))
- #Remove xattr may fail in case of concurrent remove.
- return True
-
-
-def _check_valid_account(account, fs_object):
- mount_path = getattr(fs_object, 'mount_path', MOUNT_PATH)
-
- if os.path.ismount(os.path.join(mount_path, account)):
- return True
-
- if not check_account_exists(fs_object.get_export_from_account_id(account), fs_object):
- logging.error('Account not present %s', account)
- return False
-
- if not os.path.isdir(os.path.join(mount_path, account)):
- mkdirs(os.path.join(mount_path, account))
-
- if fs_object:
- if not fs_object.mount(account):
- return False
-
- return True
-
-def check_valid_account(account, fs_object):
- return _check_valid_account(account, fs_object)
-
-def validate_container(metadata):
- if not metadata:
- logging.error('No metadata')
- return False
-
- if X_TYPE not in metadata.keys() or \
- X_TIMESTAMP not in metadata.keys() or \
- X_PUT_TIMESTAMP not in metadata.keys() or \
- X_OBJECTS_COUNT not in metadata.keys() or \
- X_BYTES_USED not in metadata.keys():
- #logging.error('Container error %s' % metadata)
- return False
-
- if metadata[X_TYPE] == CONTAINER:
- return True
-
- logging.error('Container error %s' % metadata)
- return False
-
-def validate_account(metadata):
- if not metadata:
- logging.error('No metadata')
- return False
-
- if X_TYPE not in metadata.keys() or \
- X_TIMESTAMP not in metadata.keys() or \
- X_PUT_TIMESTAMP not in metadata.keys() or \
- X_OBJECTS_COUNT not in metadata.keys() or \
- X_BYTES_USED not in metadata.keys() or \
- X_CONTAINER_COUNT not in metadata.keys():
- #logging.error('Account error %s' % metadata)
- return False
-
- if metadata[X_TYPE] == ACCOUNT:
- return True
-
- logging.error('Account error %s' % metadata)
- return False
-
-def validate_object(metadata):
- if not metadata:
- logging.error('No metadata')
- return False
-
- if X_TIMESTAMP not in metadata.keys() or \
- X_CONTENT_TYPE not in metadata.keys() or \
- X_ETAG not in metadata.keys() or \
- X_CONTENT_LENGTH not in metadata.keys() or \
- X_TYPE not in metadata.keys() or \
- X_OBJECT_TYPE not in metadata.keys():
- #logging.error('Object error %s' % metadata)
- return False
-
- if metadata[X_TYPE] == OBJECT:
- return True
-
- logging.error('Object error %s' % metadata)
- return False
-
-def is_marker(metadata):
- if not metadata:
- logging.error('No metadata')
- return False
-
- if X_OBJECT_TYPE not in metadata.keys():
- logging.error('X_OBJECT_TYPE missing %s' % metadata)
- return False
-
- if metadata[X_OBJECT_TYPE] == MARKER_DIR:
- return True
- else:
- return False
-
-def _update_list(path, const_path, src_list, reg_file=True, object_count=0,
- bytes_used=0, obj_list=[]):
- obj_path = strip_obj_storage_path(path, const_path)
-
- for i in src_list:
- if obj_path:
- obj_list.append(os.path.join(obj_path, i))
- else:
- obj_list.append(i)
-
- object_count += 1
-
- if reg_file:
- bytes_used += os.path.getsize(path + '/' + i)
-
- return object_count, bytes_used
-
-def update_list(path, const_path, dirs=[], files=[], object_count=0,
- bytes_used=0, obj_list=[]):
- object_count, bytes_used = _update_list (path, const_path, files, True,
- object_count, bytes_used,
- obj_list)
- object_count, bytes_used = _update_list (path, const_path, dirs, False,
- object_count, bytes_used,
- obj_list)
- return object_count, bytes_used
-
-def get_container_details_from_fs(cont_path, const_path,
- memcache=None):
- """
- get container details by traversing the filesystem
- """
- bytes_used = 0
- object_count = 0
- obj_list=[]
- dir_list = []
-
- if os.path.isdir(cont_path):
- for (path, dirs, files) in os.walk(cont_path):
- object_count, bytes_used = update_list(path, const_path, dirs, files,
- object_count, bytes_used,
- obj_list)
-
- dir_list.append(path + ':' + str(do_stat(path).st_mtime))
-
- if memcache:
- memcache.set(strip_obj_storage_path(cont_path), obj_list)
- memcache.set(strip_obj_storage_path(cont_path) + '-dir_list',
- ','.join(dir_list))
- memcache.set(strip_obj_storage_path(cont_path) + '-cont_meta',
- [object_count, bytes_used])
-
- return obj_list, object_count, bytes_used
-
-def get_container_details_from_memcache(cont_path, const_path,
- memcache):
- """
- get container details stored in memcache
- """
-
- bytes_used = 0
- object_count = 0
- obj_list=[]
-
- dir_contents = memcache.get(strip_obj_storage_path(cont_path) + '-dir_list')
- if not dir_contents:
- return get_container_details_from_fs(cont_path, const_path,
- memcache=memcache)
-
- for i in dir_contents.split(','):
- path, mtime = i.split(':')
- if mtime != str(do_stat(path).st_mtime):
- return get_container_details_from_fs(cont_path, const_path,
- memcache=memcache)
-
- obj_list = memcache.get(strip_obj_storage_path(cont_path))
-
- object_count, bytes_used = memcache.get(strip_obj_storage_path(cont_path) + '-cont_meta')
-
- return obj_list, object_count, bytes_used
-
-def get_container_details(cont_path, memcache=None):
- """
- Return object_list, object_count and bytes_used.
- """
- if memcache:
- object_list, object_count, bytes_used = get_container_details_from_memcache(cont_path, cont_path,
- memcache=memcache)
- else:
- object_list, object_count, bytes_used = get_container_details_from_fs(cont_path, cont_path)
-
- return object_list, object_count, bytes_used
-
-def get_account_details_from_fs(acc_path, memcache=None):
- container_list = []
- container_count = 0
-
- if os.path.isdir(acc_path):
- for name in do_listdir(acc_path):
- if not os.path.isdir(acc_path + '/' + name) or \
- name.lower() == 'tmp' or name.lower() == 'async_pending':
- continue
- container_count += 1
- container_list.append(name)
-
- if memcache:
- memcache.set(strip_obj_storage_path(acc_path) + '_container_list', container_list)
- memcache.set(strip_obj_storage_path(acc_path)+'_mtime', str(do_stat(acc_path).st_mtime))
- memcache.set(strip_obj_storage_path(acc_path)+'_container_count', container_count)
-
- return container_list, container_count
-
-def get_account_details_from_memcache(acc_path, memcache=None):
- if memcache:
- mtime = memcache.get(strip_obj_storage_path(acc_path)+'_mtime')
- if not mtime or mtime != str(do_stat(acc_path).st_mtime):
- return get_account_details_from_fs(acc_path, memcache)
- container_list = memcache.get(strip_obj_storage_path(acc_path) + '_container_list')
- container_count = memcache.get(strip_obj_storage_path(acc_path)+'_container_count')
- return container_list, container_count
-
-
-def get_account_details(acc_path, memcache=None):
- """
- Return container_list and container_count.
- """
- if memcache:
- return get_account_details_from_memcache(acc_path, memcache)
- else:
- return get_account_details_from_fs(acc_path, memcache)
-
-
-
-def get_etag(path):
- etag = None
- if os.path.exists(path):
- etag = md5()
- if not os.path.isdir(path):
- fp = open(path, 'rb')
- if fp:
- while True:
- chunk = fp.read(CHUNK_SIZE)
- if chunk:
- etag.update(chunk)
- else:
- break
- fp.close()
-
- etag = etag.hexdigest()
-
- return etag
-
-
-def get_object_metadata(obj_path):
- """
- Return metadata of object.
- """
- metadata = {}
- if os.path.exists(obj_path):
- if not os.path.isdir(obj_path):
- metadata = {
- X_TIMESTAMP: normalize_timestamp(os.path.getctime(obj_path)),
- X_CONTENT_TYPE: FILE_TYPE,
- X_ETAG: get_etag(obj_path),
- X_CONTENT_LENGTH: os.path.getsize(obj_path),
- X_TYPE: OBJECT,
- X_OBJECT_TYPE: FILE,
- }
- else:
- metadata = {
- X_TIMESTAMP: normalize_timestamp(os.path.getctime(obj_path)),
- X_CONTENT_TYPE: DIR_TYPE,
- X_ETAG: get_etag(obj_path),
- X_CONTENT_LENGTH: 0,
- X_TYPE: OBJECT,
- X_OBJECT_TYPE: DIR,
- }
-
- return metadata
-
-def get_container_metadata(cont_path, memcache=None):
- objects = []
- object_count = 0
- bytes_used = 0
- objects, object_count, bytes_used = get_container_details(cont_path,
- memcache=memcache)
- metadata = {X_TYPE: CONTAINER,
- X_TIMESTAMP: normalize_timestamp(os.path.getctime(cont_path)),
- X_PUT_TIMESTAMP: normalize_timestamp(os.path.getmtime(cont_path)),
- X_OBJECTS_COUNT: object_count,
- X_BYTES_USED: bytes_used}
- return metadata
-
-def get_account_metadata(acc_path, memcache=None):
- containers = []
- container_count = 0
- containers, container_count = get_account_details(acc_path, memcache)
- metadata = {X_TYPE: ACCOUNT,
- X_TIMESTAMP: normalize_timestamp(os.path.getctime(acc_path)),
- X_PUT_TIMESTAMP: normalize_timestamp(os.path.getmtime(acc_path)),
- X_OBJECTS_COUNT: 0,
- X_BYTES_USED: 0,
- X_CONTAINER_COUNT: container_count}
- return metadata
-
-def restore_object(obj_path, metadata):
- meta = read_metadata(obj_path)
- if meta:
- meta.update(metadata)
- write_metadata(obj_path, meta)
- else:
- write_metadata(obj_path, metadata)
-
-def restore_container(cont_path, metadata):
- meta = read_metadata(cont_path)
- if meta:
- meta.update(metadata)
- write_metadata(cont_path, meta)
- else:
- write_metadata(cont_path, metadata)
-
-def restore_account(acc_path, metadata):
- meta = read_metadata(acc_path)
- if meta:
- meta.update(metadata)
- write_metadata(acc_path, meta)
- else:
- write_metadata(acc_path, metadata)
-
-def create_object_metadata(obj_path):
- meta = get_object_metadata(obj_path)
- restore_object(obj_path, meta)
- return meta
-
-def create_container_metadata(cont_path, memcache=None):
- meta = get_container_metadata(cont_path, memcache)
- restore_container(cont_path, meta)
- return meta
-
-def create_account_metadata(acc_path, memcache=None):
- meta = get_account_metadata(acc_path, memcache)
- restore_account(acc_path, meta)
- return meta
-
-
-def check_account_exists(account, fs_object):
- if account not in get_account_list(fs_object):
- logging.error('Account not exists %s' % account)
- return False
- else:
- return True
-
-def get_account_list(fs_object):
- account_list = []
- if fs_object:
- account_list = fs_object.get_export_list()
- return account_list
-
-
-def get_account_id(account):
- return RESELLER_PREFIX + md5(account + HASH_PATH_SUFFIX).hexdigest()
-
diff --git a/swift/1.4.8/swift.diff b/swift/1.4.8/swift.diff
deleted file mode 100644
index 8ed5070e11a..00000000000
--- a/swift/1.4.8/swift.diff
+++ /dev/null
@@ -1,797 +0,0 @@
-diff --git a/setup.py b/setup.py
-index d195d34..b5b5ca2 100644
---- a/setup.py
-+++ b/setup.py
-@@ -1,5 +1,6 @@
- #!/usr/bin/python
- # Copyright (c) 2010-2012 OpenStack, LLC.
-+# Copyright (c) 2011 Red Hat, Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
-@@ -94,6 +95,7 @@ setup(
- 'tempurl=swift.common.middleware.tempurl:filter_factory',
- 'formpost=swift.common.middleware.formpost:filter_factory',
- 'name_check=swift.common.middleware.name_check:filter_factory',
-+ 'gluster=swift.common.middleware.gluster:filter_factory',
- ],
- },
- )
-diff --git a/swift/account/server.py b/swift/account/server.py
-index 800b3c0..cb17970 100644
---- a/swift/account/server.py
-+++ b/swift/account/server.py
-@@ -1,4 +1,5 @@
- # Copyright (c) 2010-2012 OpenStack, LLC.
-+# Copyright (c) 2011 Red Hat, Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
-@@ -31,7 +32,7 @@ import simplejson
-
- from swift.common.db import AccountBroker
- from swift.common.utils import get_logger, get_param, hash_path, \
-- normalize_timestamp, split_path, storage_directory
-+ normalize_timestamp, split_path, storage_directory, plugin_enabled
- from swift.common.constraints import ACCOUNT_LISTING_LIMIT, \
- check_mount, check_float, check_utf8
- from swift.common.db_replicator import ReplicatorRpc
-@@ -39,6 +40,8 @@ from swift.common.db_replicator import ReplicatorRpc
-
- DATADIR = 'accounts'
-
-+if plugin_enabled():
-+ from swift.plugins.DiskDir import DiskAccount
-
- class AccountController(object):
- """WSGI controller for the account server."""
-@@ -52,8 +55,12 @@ class AccountController(object):
- self.mount_check, logger=self.logger)
- self.auto_create_account_prefix = \
- conf.get('auto_create_account_prefix') or '.'
-+ self.fs_object = None
-
- def _get_account_broker(self, drive, part, account):
-+ if self.fs_object:
-+ return DiskAccount(self.root, account, self.fs_object);
-+
- hsh = hash_path(account)
- db_dir = storage_directory(DATADIR, part, hsh)
- db_path = os.path.join(self.root, drive, db_dir, hsh + '.db')
-@@ -121,9 +128,15 @@ class AccountController(object):
- if broker.is_deleted():
- return HTTPConflict(request=req)
- metadata = {}
-- metadata.update((key, (value, timestamp))
-- for key, value in req.headers.iteritems()
-- if key.lower().startswith('x-account-meta-'))
-+ if not self.fs_object:
-+ metadata.update((key, (value, timestamp))
-+ for key, value in req.headers.iteritems()
-+ if key.lower().startswith('x-account-meta-'))
-+ else:
-+ metadata.update((key, value)
-+ for key, value in req.headers.iteritems()
-+ if key.lower().startswith('x-account-meta-'))
-+
- if metadata:
- broker.update_metadata(metadata)
- if created:
-@@ -153,6 +166,9 @@ class AccountController(object):
- broker.stale_reads_ok = True
- if broker.is_deleted():
- return HTTPNotFound(request=req)
-+ if self.fs_object and not self.fs_object.object_only:
-+ broker.list_containers_iter(None, None,None,
-+ None, None)
- info = broker.get_info()
- headers = {
- 'X-Account-Container-Count': info['container_count'],
-@@ -164,9 +180,16 @@ class AccountController(object):
- container_ts = broker.get_container_timestamp(container)
- if container_ts is not None:
- headers['X-Container-Timestamp'] = container_ts
-- headers.update((key, value)
-- for key, (value, timestamp) in broker.metadata.iteritems()
-- if value != '')
-+ if not self.fs_object:
-+ headers.update((key, value)
-+ for key, (value, timestamp) in broker.metadata.iteritems()
-+ if value != '')
-+ else:
-+ headers.update((key, value)
-+ for key, value in broker.metadata.iteritems()
-+ if value != '')
-+
-+
- return HTTPNoContent(request=req, headers=headers)
-
- def GET(self, req):
-@@ -190,9 +213,15 @@ class AccountController(object):
- 'X-Account-Bytes-Used': info['bytes_used'],
- 'X-Timestamp': info['created_at'],
- 'X-PUT-Timestamp': info['put_timestamp']}
-- resp_headers.update((key, value)
-- for key, (value, timestamp) in broker.metadata.iteritems()
-- if value != '')
-+ if not self.fs_object:
-+ resp_headers.update((key, value)
-+ for key, (value, timestamp) in broker.metadata.iteritems()
-+ if value != '')
-+ else:
-+ resp_headers.update((key, value)
-+ for key, value in broker.metadata.iteritems()
-+ if value != '')
-+
- try:
- prefix = get_param(req, 'prefix')
- delimiter = get_param(req, 'delimiter')
-@@ -224,6 +253,7 @@ class AccountController(object):
- content_type='text/plain', request=req)
- account_list = broker.list_containers_iter(limit, marker, end_marker,
- prefix, delimiter)
-+
- if out_content_type == 'application/json':
- json_pattern = ['"name":%s', '"count":%s', '"bytes":%s']
- json_pattern = '{' + ','.join(json_pattern) + '}'
-@@ -298,15 +328,29 @@ class AccountController(object):
- return HTTPNotFound(request=req)
- timestamp = normalize_timestamp(req.headers['x-timestamp'])
- metadata = {}
-- metadata.update((key, (value, timestamp))
-- for key, value in req.headers.iteritems()
-- if key.lower().startswith('x-account-meta-'))
-+ if not self.fs_object:
-+ metadata.update((key, (value, timestamp))
-+ for key, value in req.headers.iteritems()
-+ if key.lower().startswith('x-account-meta-'))
-+ else:
-+ metadata.update((key, value)
-+ for key, value in req.headers.iteritems()
-+ if key.lower().startswith('x-account-meta-'))
- if metadata:
- broker.update_metadata(metadata)
- return HTTPNoContent(request=req)
-
-+ def plugin(self, env):
-+ if env.get('Gluster_enabled', False):
-+ self.fs_object = env.get('fs_object')
-+ self.root = env.get('root')
-+ self.mount_check = False
-+ else:
-+ self.fs_object = None
-+
- def __call__(self, env, start_response):
- start_time = time.time()
-+ self.plugin(env)
- req = Request(env)
- self.logger.txn_id = req.headers.get('x-trans-id', None)
- if not check_utf8(req.path_info):
-diff --git a/swift/common/middleware/gluster.py b/swift/common/middleware/gluster.py
-new file mode 100644
-index 0000000..341285d
---- /dev/null
-+++ b/swift/common/middleware/gluster.py
-@@ -0,0 +1,55 @@
-+# Copyright (c) 2011 Red Hat, Inc.
-+#
-+# Licensed under the Apache License, Version 2.0 (the "License");
-+# you may not use this file except in compliance with the License.
-+# You may obtain a copy of the License at
-+#
-+# http://www.apache.org/licenses/LICENSE-2.0
-+#
-+# Unless required by applicable law or agreed to in writing, software
-+# distributed under the License is distributed on an "AS IS" BASIS,
-+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-+# implied.
-+# See the License for the specific language governing permissions and
-+# limitations under the License.
-+
-+from swift.common.utils import get_logger, plugin_enabled
-+from swift import plugins
-+from ConfigParser import ConfigParser
-+
-+class Gluster_plugin(object):
-+ """
-+ Update the environment with keys that reflect Gluster_plugin enabled
-+ """
-+
-+ def __init__(self, app, conf):
-+ self.app = app
-+ self.conf = conf
-+ self.fs_name = 'Glusterfs'
-+ self.logger = get_logger(conf, log_route='gluster')
-+
-+ def __call__(self, env, start_response):
-+ if not plugin_enabled():
-+ return self.app(env, start_response)
-+ env['Gluster_enabled'] =True
-+ fs_object = getattr(plugins, self.fs_name, False)
-+ if not fs_object:
-+ raise Exception('%s plugin not found', self.fs_name)
-+
-+ env['fs_object'] = fs_object()
-+ fs_conf = ConfigParser()
-+ if fs_conf.read('/etc/swift/fs.conf'):
-+ try:
-+ env['root'] = fs_conf.get ('DEFAULT', 'mount_path')
-+ except NoSectionError, NoOptionError:
-+ self.logger.exception(_('ERROR mount_path not present'))
-+ return self.app(env, start_response)
-+
-+def filter_factory(global_conf, **local_conf):
-+ """Returns a WSGI filter app for use with paste.deploy."""
-+ conf = global_conf.copy()
-+ conf.update(local_conf)
-+
-+ def gluster_filter(app):
-+ return Gluster_plugin(app, conf)
-+ return gluster_filter
-diff --git a/swift/common/utils.py b/swift/common/utils.py
-index 47edce8..03701ce 100644
---- a/swift/common/utils.py
-+++ b/swift/common/utils.py
-@@ -1,4 +1,5 @@
- # Copyright (c) 2010-2012 OpenStack, LLC.
-+# Copyright (c) 2011 Red Hat, Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
-@@ -1138,3 +1139,11 @@ def streq_const_time(s1, s2):
- for (a, b) in zip(s1, s2):
- result |= ord(a) ^ ord(b)
- return result == 0
-+
-+def plugin_enabled():
-+ swift_conf = ConfigParser()
-+ swift_conf.read(os.path.join('/etc/swift', 'swift.conf'))
-+ try:
-+ return swift_conf.get('DEFAULT', 'Enable_plugin', 'no') in TRUE_VALUES
-+ except NoOptionError, NoSectionError:
-+ return False
-diff --git a/swift/container/server.py b/swift/container/server.py
-index 8a18cfd..93943a3 100644
---- a/swift/container/server.py
-+++ b/swift/container/server.py
-@@ -1,4 +1,5 @@
- # Copyright (c) 2010-2012 OpenStack, LLC.
-+# Copyright (c) 2011 Red Hat, Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
-@@ -31,7 +32,8 @@ from webob.exc import HTTPAccepted, HTTPBadRequest, HTTPConflict, \
-
- from swift.common.db import ContainerBroker
- from swift.common.utils import get_logger, get_param, hash_path, \
-- normalize_timestamp, storage_directory, split_path, validate_sync_to
-+ normalize_timestamp, storage_directory, split_path, validate_sync_to, \
-+ plugin_enabled
- from swift.common.constraints import CONTAINER_LISTING_LIMIT, \
- check_mount, check_float, check_utf8
- from swift.common.bufferedhttp import http_connect
-@@ -40,6 +42,9 @@ from swift.common.db_replicator import ReplicatorRpc
-
- DATADIR = 'containers'
-
-+if plugin_enabled():
-+ from swift.plugins.DiskDir import DiskDir
-+
-
- class ContainerController(object):
- """WSGI Controller for the container server."""
-@@ -62,6 +67,7 @@ class ContainerController(object):
- ContainerBroker, self.mount_check, logger=self.logger)
- self.auto_create_account_prefix = \
- conf.get('auto_create_account_prefix') or '.'
-+ self.fs_object = None
-
- def _get_container_broker(self, drive, part, account, container):
- """
-@@ -73,6 +79,11 @@ class ContainerController(object):
- :param container: container name
- :returns: ContainerBroker object
- """
-+ if self.fs_object:
-+ return DiskDir(self.root, drive, part, account,
-+ container, self.logger,
-+ fs_object = self.fs_object)
-+
- hsh = hash_path(account, container)
- db_dir = storage_directory(DATADIR, part, hsh)
- db_path = os.path.join(self.root, drive, db_dir, hsh + '.db')
-@@ -211,10 +222,18 @@ class ContainerController(object):
- if broker.is_deleted():
- return HTTPConflict(request=req)
- metadata = {}
-- metadata.update((key, (value, timestamp))
-- for key, value in req.headers.iteritems()
-- if key.lower() in self.save_headers or
-- key.lower().startswith('x-container-meta-'))
-+ #Note: check the structure of req.headers
-+ if not self.fs_object:
-+ metadata.update((key, (value, timestamp))
-+ for key, value in req.headers.iteritems()
-+ if key.lower() in self.save_headers or
-+ key.lower().startswith('x-container-meta-'))
-+ else:
-+ metadata.update((key, value)
-+ for key, value in req.headers.iteritems()
-+ if key.lower() in self.save_headers or
-+ key.lower().startswith('x-container-meta-'))
-+
- if metadata:
- if 'X-Container-Sync-To' in metadata:
- if 'X-Container-Sync-To' not in broker.metadata or \
-@@ -222,6 +241,7 @@ class ContainerController(object):
- broker.metadata['X-Container-Sync-To'][0]:
- broker.set_x_container_sync_points(-1, -1)
- broker.update_metadata(metadata)
-+
- resp = self.account_update(req, account, container, broker)
- if resp:
- return resp
-@@ -245,6 +265,11 @@ class ContainerController(object):
- broker.stale_reads_ok = True
- if broker.is_deleted():
- return HTTPNotFound(request=req)
-+
-+ if self.fs_object and not self.fs_object.object_only:
-+ broker.list_objects_iter(None, None, None, None,
-+ None, None)
-+
- info = broker.get_info()
- headers = {
- 'X-Container-Object-Count': info['object_count'],
-@@ -252,10 +277,17 @@ class ContainerController(object):
- 'X-Timestamp': info['created_at'],
- 'X-PUT-Timestamp': info['put_timestamp'],
- }
-- headers.update((key, value)
-- for key, (value, timestamp) in broker.metadata.iteritems()
-- if value != '' and (key.lower() in self.save_headers or
-- key.lower().startswith('x-container-meta-')))
-+ if not self.fs_object:
-+ headers.update((key, value)
-+ for key, (value, timestamp) in broker.metadata.iteritems()
-+ if value != '' and (key.lower() in self.save_headers or
-+ key.lower().startswith('x-container-meta-')))
-+ else:
-+ headers.update((key, value)
-+ for key, value in broker.metadata.iteritems()
-+ if value != '' and (key.lower() in self.save_headers or
-+ key.lower().startswith('x-container-meta-')))
-+
- return HTTPNoContent(request=req, headers=headers)
-
- def GET(self, req):
-@@ -268,6 +300,7 @@ class ContainerController(object):
- request=req)
- if self.mount_check and not check_mount(self.root, drive):
- return Response(status='507 %s is not mounted' % drive)
-+
- broker = self._get_container_broker(drive, part, account, container)
- broker.pending_timeout = 0.1
- broker.stale_reads_ok = True
-@@ -280,10 +313,17 @@ class ContainerController(object):
- 'X-Timestamp': info['created_at'],
- 'X-PUT-Timestamp': info['put_timestamp'],
- }
-- resp_headers.update((key, value)
-- for key, (value, timestamp) in broker.metadata.iteritems()
-- if value != '' and (key.lower() in self.save_headers or
-- key.lower().startswith('x-container-meta-')))
-+ if not self.fs_object:
-+ resp_headers.update((key, value)
-+ for key, (value, timestamp) in broker.metadata.iteritems()
-+ if value != '' and (key.lower() in self.save_headers or
-+ key.lower().startswith('x-container-meta-')))
-+ else:
-+ resp_headers.update((key, value)
-+ for key, value in broker.metadata.iteritems()
-+ if value != '' and (key.lower() in self.save_headers or
-+ key.lower().startswith('x-container-meta-')))
-+
- try:
- path = get_param(req, 'path')
- prefix = get_param(req, 'prefix')
-@@ -414,10 +454,17 @@ class ContainerController(object):
- return HTTPNotFound(request=req)
- timestamp = normalize_timestamp(req.headers['x-timestamp'])
- metadata = {}
-- metadata.update((key, (value, timestamp))
-- for key, value in req.headers.iteritems()
-- if key.lower() in self.save_headers or
-- key.lower().startswith('x-container-meta-'))
-+ if not self.fs_object:
-+ metadata.update((key, (value, timestamp))
-+ for key, value in req.headers.iteritems()
-+ if key.lower() in self.save_headers or
-+ key.lower().startswith('x-container-meta-'))
-+ else:
-+ metadata.update((key, value)
-+ for key, value in req.headers.iteritems()
-+ if key.lower() in self.save_headers or
-+ key.lower().startswith('x-container-meta-'))
-+
- if metadata:
- if 'X-Container-Sync-To' in metadata:
- if 'X-Container-Sync-To' not in broker.metadata or \
-@@ -427,8 +474,19 @@ class ContainerController(object):
- broker.update_metadata(metadata)
- return HTTPNoContent(request=req)
-
-+ def plugin(self, env):
-+ if env.get('Gluster_enabled', False):
-+ self.fs_object = env.get('fs_object')
-+ if not self.fs_object:
-+ raise NoneTypeError
-+ self.root = env.get('root')
-+ self.mount_check = False
-+ else:
-+ self.fs_object = None
-+
- def __call__(self, env, start_response):
- start_time = time.time()
-+ self.plugin(env)
- req = Request(env)
- self.logger.txn_id = req.headers.get('x-trans-id', None)
- if not check_utf8(req.path_info):
-diff --git a/swift/obj/server.py b/swift/obj/server.py
-index 9cca16b..a45daff 100644
---- a/swift/obj/server.py
-+++ b/swift/obj/server.py
-@@ -1,4 +1,5 @@
- # Copyright (c) 2010-2012 OpenStack, LLC.
-+# Copyright (c) 2011 Red Hat, Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
-@@ -26,6 +27,7 @@ from hashlib import md5
- from tempfile import mkstemp
- from urllib import unquote
- from contextlib import contextmanager
-+from ConfigParser import ConfigParser
-
- from webob import Request, Response, UTC
- from webob.exc import HTTPAccepted, HTTPBadRequest, HTTPCreated, \
-@@ -37,16 +39,23 @@ from eventlet import sleep, Timeout, tpool
-
- from swift.common.utils import mkdirs, normalize_timestamp, \
- storage_directory, hash_path, renamer, fallocate, \
-- split_path, drop_buffer_cache, get_logger, write_pickle
-+ split_path, drop_buffer_cache, get_logger, write_pickle, \
-+ plugin_enabled
- from swift.common.bufferedhttp import http_connect
--from swift.common.constraints import check_object_creation, check_mount, \
-- check_float, check_utf8
-+if plugin_enabled():
-+ from swift.plugins.constraints import check_object_creation
-+ from swift.plugins.utils import X_TYPE, X_OBJECT_TYPE, FILE, DIR, MARKER_DIR, \
-+ OBJECT, DIR_TYPE, FILE_TYPE
-+else:
-+ from swift.common.constraints import check_object_creation
-+
-+from swift.common.constraints import check_mount, check_float, check_utf8
-+
- from swift.common.exceptions import ConnectionTimeout, DiskFileError, \
- DiskFileNotExist
- from swift.obj.replicator import tpooled_get_hashes, invalidate_hash, \
- quarantine_renamer
-
--
- DATADIR = 'objects'
- ASYNCDIR = 'async_pending'
- PICKLE_PROTOCOL = 2
-@@ -339,6 +348,9 @@ class DiskFile(object):
- raise
- raise DiskFileNotExist('Data File does not exist.')
-
-+if plugin_enabled():
-+ from swift.plugins.DiskFile import Gluster_DiskFile
-+
-
- class ObjectController(object):
- """Implements the WSGI application for the Swift Object Server."""
-@@ -377,6 +389,17 @@ class ObjectController(object):
- 'expiring_objects'
- self.expiring_objects_container_divisor = \
- int(conf.get('expiring_objects_container_divisor') or 86400)
-+ self.fs_object = None
-+
-+ def get_DiskFile_obj(self, path, device, partition, account, container, obj,
-+ logger, keep_data_fp=False, disk_chunk_size=65536):
-+ if self.fs_object:
-+ return Gluster_DiskFile(path, device, partition, account, container,
-+ obj, logger, keep_data_fp,
-+ disk_chunk_size, fs_object = self.fs_object);
-+ else:
-+ return DiskFile(path, device, partition, account, container,
-+ obj, logger, keep_data_fp, disk_chunk_size)
-
- def async_update(self, op, account, container, obj, host, partition,
- contdevice, headers_out, objdevice):
-@@ -493,7 +516,7 @@ class ObjectController(object):
- content_type='text/plain')
- if self.mount_check and not check_mount(self.devices, device):
- return Response(status='507 %s is not mounted' % device)
-- file = DiskFile(self.devices, device, partition, account, container,
-+ file = self.get_DiskFile_obj(self.devices, device, partition, account, container,
- obj, self.logger, disk_chunk_size=self.disk_chunk_size)
-
- if 'X-Delete-At' in file.metadata and \
-@@ -548,7 +571,7 @@ class ObjectController(object):
- if new_delete_at and new_delete_at < time.time():
- return HTTPBadRequest(body='X-Delete-At in past', request=request,
- content_type='text/plain')
-- file = DiskFile(self.devices, device, partition, account, container,
-+ file = self.get_DiskFile_obj(self.devices, device, partition, account, container,
- obj, self.logger, disk_chunk_size=self.disk_chunk_size)
- orig_timestamp = file.metadata.get('X-Timestamp')
- upload_expiration = time.time() + self.max_upload_time
-@@ -580,12 +603,29 @@ class ObjectController(object):
- if 'etag' in request.headers and \
- request.headers['etag'].lower() != etag:
- return HTTPUnprocessableEntity(request=request)
-- metadata = {
-- 'X-Timestamp': request.headers['x-timestamp'],
-- 'Content-Type': request.headers['content-type'],
-- 'ETag': etag,
-- 'Content-Length': str(os.fstat(fd).st_size),
-- }
-+ content_type = request.headers['content-type']
-+ if self.fs_object and not content_type:
-+ content_type = FILE_TYPE
-+ if not self.fs_object:
-+ metadata = {
-+ 'X-Timestamp': request.headers['x-timestamp'],
-+ 'Content-Type': request.headers['content-type'],
-+ 'ETag': etag,
-+ 'Content-Length': str(os.fstat(fd).st_size),
-+ }
-+ else:
-+ metadata = {
-+ 'X-Timestamp': request.headers['x-timestamp'],
-+ 'Content-Type': request.headers['content-type'],
-+ 'ETag': etag,
-+ 'Content-Length': str(os.fstat(fd).st_size),
-+ X_TYPE: OBJECT,
-+ X_OBJECT_TYPE: FILE,
-+ }
-+
-+ if self.fs_object and \
-+ request.headers['content-type'].lower() == DIR_TYPE:
-+ metadata.update({X_OBJECT_TYPE: MARKER_DIR})
- metadata.update(val for val in request.headers.iteritems()
- if val[0].lower().startswith('x-object-meta-') and
- len(val[0]) > 14)
-@@ -612,7 +652,7 @@ class ObjectController(object):
- 'x-timestamp': file.metadata['X-Timestamp'],
- 'x-etag': file.metadata['ETag'],
- 'x-trans-id': request.headers.get('x-trans-id', '-')},
-- device)
-+ (self.fs_object and account) or device)
- resp = HTTPCreated(request=request, etag=etag)
- return resp
-
-@@ -626,9 +666,9 @@ class ObjectController(object):
- content_type='text/plain')
- if self.mount_check and not check_mount(self.devices, device):
- return Response(status='507 %s is not mounted' % device)
-- file = DiskFile(self.devices, device, partition, account, container,
-- obj, self.logger, keep_data_fp=True,
-- disk_chunk_size=self.disk_chunk_size)
-+ file = self.get_DiskFile_obj(self.devices, device, partition, account, container,
-+ obj, self.logger, keep_data_fp=True,
-+ disk_chunk_size=self.disk_chunk_size)
- if file.is_deleted() or ('X-Delete-At' in file.metadata and
- int(file.metadata['X-Delete-At']) <= time.time()):
- if request.headers.get('if-match') == '*':
-@@ -702,7 +742,7 @@ class ObjectController(object):
- return resp
- if self.mount_check and not check_mount(self.devices, device):
- return Response(status='507 %s is not mounted' % device)
-- file = DiskFile(self.devices, device, partition, account, container,
-+ file = self.get_DiskFile_obj(self.devices, device, partition, account, container,
- obj, self.logger, disk_chunk_size=self.disk_chunk_size)
- if file.is_deleted() or ('X-Delete-At' in file.metadata and
- int(file.metadata['X-Delete-At']) <= time.time()):
-@@ -744,7 +784,7 @@ class ObjectController(object):
- if self.mount_check and not check_mount(self.devices, device):
- return Response(status='507 %s is not mounted' % device)
- response_class = HTTPNoContent
-- file = DiskFile(self.devices, device, partition, account, container,
-+ file = self.get_DiskFile_obj(self.devices, device, partition, account, container,
- obj, self.logger, disk_chunk_size=self.disk_chunk_size)
- if 'x-if-delete-at' in request.headers and \
- int(request.headers['x-if-delete-at']) != \
-@@ -797,9 +837,18 @@ class ObjectController(object):
- raise hashes
- return Response(body=pickle.dumps(hashes))
-
-+ def plugin(self, env):
-+ if env.get('Gluster_enabled', False):
-+ self.fs_object = env.get('fs_object')
-+ self.devices = env.get('root')
-+ self.mount_check = False
-+ else:
-+ self.fs_object = None
-+
- def __call__(self, env, start_response):
- """WSGI Application entry point for the Swift Object Server."""
- start_time = time.time()
-+ self.plugin(env)
- req = Request(env)
- self.logger.txn_id = req.headers.get('x-trans-id', None)
- if not check_utf8(req.path_info):
-diff --git a/swift/proxy/server.py b/swift/proxy/server.py
-index 17613b8..d277d28 100644
---- a/swift/proxy/server.py
-+++ b/swift/proxy/server.py
-@@ -1,4 +1,5 @@
- # Copyright (c) 2010-2012 OpenStack, LLC.
-+# Copyright (c) 2011 Red Hat, Inc.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
-@@ -53,11 +54,20 @@ from webob import Request, Response
-
- from swift.common.ring import Ring
- from swift.common.utils import cache_from_env, ContextPool, get_logger, \
-- get_remote_client, normalize_timestamp, split_path, TRUE_VALUES
-+ get_remote_client, normalize_timestamp, split_path, TRUE_VALUES, \
-+ plugin_enabled
- from swift.common.bufferedhttp import http_connect
--from swift.common.constraints import check_metadata, check_object_creation, \
-- check_utf8, CONTAINER_LISTING_LIMIT, MAX_ACCOUNT_NAME_LENGTH, \
-- MAX_CONTAINER_NAME_LENGTH, MAX_FILE_SIZE
-+
-+if plugin_enabled():
-+ from swift.plugins.constraints import check_object_creation, \
-+ MAX_ACCOUNT_NAME_LENGTH, MAX_CONTAINER_NAME_LENGTH, MAX_FILE_SIZE
-+else:
-+ from swift.common.constraints import check_object_creation, \
-+ MAX_ACCOUNT_NAME_LENGTH, MAX_CONTAINER_NAME_LENGTH, MAX_FILE_SIZE
-+
-+from swift.common.constraints import check_metadata, check_utf8, \
-+ CONTAINER_LISTING_LIMIT
-+
- from swift.common.exceptions import ChunkReadTimeout, \
- ChunkWriteTimeout, ConnectionTimeout
-
-diff --git a/test/__init__.py b/test/__init__.py
-index ef2ce31..363a051 100644
---- a/test/__init__.py
-+++ b/test/__init__.py
-@@ -6,8 +6,16 @@ import sys
- import os
- from ConfigParser import MissingSectionHeaderError
- from StringIO import StringIO
--
- from swift.common.utils import readconf
-+from swift.common.utils import plugin_enabled
-+if plugin_enabled():
-+ from swift.plugins.constraints import MAX_OBJECT_NAME_LENGTH, \
-+ MAX_CONTAINER_NAME_LENGTH, MAX_ACCOUNT_NAME_LENGTH, \
-+ MAX_FILE_SIZE
-+else:
-+ from swift.common.constraints import MAX_OBJECT_NAME_LENGTH, \
-+ MAX_CONTAINER_NAME_LENGTH, MAX_ACCOUNT_NAME_LENGTH, \
-+ MAX_FILE_SIZE
-
- setattr(__builtin__, '_', lambda x: x)
-
-diff --git a/test/functional/tests.py b/test/functional/tests.py
-index b25b4fd..8d12f58 100644
---- a/test/functional/tests.py
-+++ b/test/functional/tests.py
-@@ -31,6 +31,16 @@ import urllib
- from test import get_config
- from swift import Account, AuthenticationFailed, Connection, Container, \
- File, ResponseError
-+from test import plugin_enabled
-+if plugin_enabled():
-+ from test import MAX_OBJECT_NAME_LENGTH, \
-+ MAX_CONTAINER_NAME_LENGTH, MAX_ACCOUNT_NAME_LENGTH, \
-+ MAX_FILE_SIZE
-+else:
-+ from test import MAX_OBJECT_NAME_LENGTH, \
-+ MAX_CONTAINER_NAME_LENGTH, MAX_ACCOUNT_NAME_LENGTH, \
-+ MAX_FILE_SIZE
-+
-
- config = get_config()
-
-@@ -361,7 +371,7 @@ class TestContainer(Base):
- set_up = False
-
- def testContainerNameLimit(self):
-- limit = 256
-+ limit = MAX_CONTAINER_NAME_LENGTH
-
- for l in (limit-100, limit-10, limit-1, limit,
- limit+1, limit+10, limit+100):
-@@ -949,7 +959,7 @@ class TestFile(Base):
- self.assert_status(404)
-
- def testNameLimit(self):
-- limit = 1024
-+ limit = MAX_OBJECT_NAME_LENGTH
-
- for l in (1, 10, limit/2, limit-1, limit, limit+1, limit*2):
- file = self.env.container.file('a'*l)
-@@ -1093,7 +1103,7 @@ class TestFile(Base):
- self.assert_(file.read(hdrs={'Range': r}) == data[0:1000])
-
- def testFileSizeLimit(self):
-- limit = 5*2**30 + 2
-+ limit = MAX_FILE_SIZE
- tsecs = 3
-
- for i in (limit-100, limit-10, limit-1, limit, limit+1, limit+10,
-diff --git a/test/unit/obj/test_server.py b/test/unit/obj/test_server.py
-index 075700e..5b6f32d 100644
---- a/test/unit/obj/test_server.py
-+++ b/test/unit/obj/test_server.py
-@@ -1355,7 +1355,7 @@ class TestObjectController(unittest.TestCase):
-
- def test_max_object_name_length(self):
- timestamp = normalize_timestamp(time())
-- req = Request.blank('/sda1/p/a/c/' + ('1' * 1024),
-+ req = Request.blank('/sda1/p/a/c/' + ('1' * MAX_OBJECT_NAME_LENGTH),
- environ={'REQUEST_METHOD': 'PUT'},
- headers={'X-Timestamp': timestamp,
- 'Content-Length': '4',
-diff --git a/test/unit/proxy/test_server.py b/test/unit/proxy/test_server.py
-index 364370e..c17fe59 100644
---- a/test/unit/proxy/test_server.py
-+++ b/test/unit/proxy/test_server.py
-@@ -21,7 +21,6 @@ import os
- import sys
- import unittest
- from nose import SkipTest
--from ConfigParser import ConfigParser
- from contextlib import contextmanager
- from cStringIO import StringIO
- from gzip import GzipFile
-@@ -44,8 +43,18 @@ from swift.account import server as account_server
- from swift.container import server as container_server
- from swift.obj import server as object_server
- from swift.common import ring
--from swift.common.constraints import MAX_META_NAME_LENGTH, \
-- MAX_META_VALUE_LENGTH, MAX_META_COUNT, MAX_META_OVERALL_SIZE, MAX_FILE_SIZE
-+from swift.common.utils import plugin_enabled
-+if plugin_enabled():
-+ from swift.plugins.constraints import MAX_META_NAME_LENGTH, \
-+ MAX_META_VALUE_LENGTH, MAX_META_COUNT, MAX_META_OVERALL_SIZE, \
-+ MAX_FILE_SIZE, MAX_ACCOUNT_NAME_LENGTH, MAX_CONTAINER_NAME_LENGTH, \
-+ MAX_OBJECT_NAME_LENGTH
-+else:
-+ from swift.plugins.constraints import MAX_META_NAME_LENGTH, \
-+ MAX_META_VALUE_LENGTH, MAX_META_COUNT, MAX_META_OVERALL_SIZE, \
-+ MAX_FILE_SIZE, MAX_ACCOUNT_NAME_LENGTH, MAX_CONTAINER_NAME_LENGTH, \
-+ MAX_OBJECT_NAME_LENGTH
-+
- from swift.common.utils import mkdirs, normalize_timestamp, NullLogger
- from swift.common.wsgi import monkey_patch_mimetools
-
-@@ -3207,7 +3216,8 @@ class TestContainerController(unittest.TestCase):
- def test_PUT_max_container_name_length(self):
- with save_globals():
- controller = proxy_server.ContainerController(self.app, 'account',
-- '1' * 256)
-+ '1' *
-+ MAX_CONTAINER_NAME_LENGTH,)
- self.assert_status_map(controller.PUT,
- (200, 200, 200, 201, 201, 201), 201,
- missing_container=True)
-@@ -3813,7 +3823,8 @@ class TestAccountController(unittest.TestCase):
- def test_PUT_max_account_name_length(self):
- with save_globals():
- self.app.allow_account_management = True
-- controller = proxy_server.AccountController(self.app, '1' * 256)
-+ controller = proxy_server.AccountController(self.app, '1' *
-+ MAX_ACCOUNT_NAME_LENGTH)
- self.assert_status_map(controller.PUT, (201, 201, 201), 201)
- controller = proxy_server.AccountController(self.app, '2' * 257)
- self.assert_status_map(controller.PUT, (201, 201, 201), 400)
diff --git a/tests/00-geo-rep/00-georep-verify-non-root-setup.t b/tests/00-geo-rep/00-georep-verify-non-root-setup.t
new file mode 100644
index 00000000000..a55fd3e5e6a
--- /dev/null
+++ b/tests/00-geo-rep/00-georep-verify-non-root-setup.t
@@ -0,0 +1,294 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=600
+
+### Basic Non-root geo-rep setup test with Distribute Replicate volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+##User and group to be used for non-root geo-rep setup
+usr="nroot"
+grp="ggroup"
+
+slave_url=$usr@$slave
+slave_vol=$GSV0
+ssh_url=$usr@$SH0
+
+############################################################
+#SETUP VOLUMES AND VARIABLES
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
+TEST $CLI volume start $GSV0
+
+##Mount master
+#TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+#TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+
+##########################################################
+#TEST FUNCTIONS
+
+function distribute_key_non_root()
+{
+ ${GLUSTER_LIBEXECDIR}/set_geo_rep_pem_keys.sh $usr $master $slave_vol
+ echo $?
+}
+
+
+function check_status_non_root()
+{
+ local search_key=$1
+ $GEOREP_CLI $master $slave_url status | grep -F "$search_key" | wc -l
+}
+
+
+function check_and_clean_group()
+{
+ if [ $(getent group $grp) ]
+ then
+ groupdel $grp;
+ echo $?
+ else
+ echo 0
+ fi
+}
+
+function clean_lock_files()
+{
+ if [ ! -f /etc/passwd.lock ];
+ then
+ rm -rf /etc/passwd.lock;
+ fi
+
+ if [ ! -f /etc/group.lock ];
+ then
+ rm -rf /etc/group.lock;
+ fi
+
+ if [ ! -f /etc/shadow.lock ];
+ then
+ rm -rf /etc/shadow.lock;
+ fi
+
+ if [ ! -f /etc/gshadow.lock ];
+ then
+ rm -rf /etc/gshadow.lock;
+ fi
+}
+
+
+###########################################################
+#SETUP NON-ROOT GEO REPLICATION
+
+##Create ggroup group
+##First test if group exists and then create new one
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_and_clean_group
+
+##cleanup *.lock files
+
+clean_lock_files
+
+TEST /usr/sbin/groupadd $grp
+
+clean_lock_files
+##Del if exists and create non-root user and assign it to newly created group
+userdel -r -f $usr
+TEST /usr/sbin/useradd -G $grp $usr
+
+##Modify password for non-root user to have control over distributing ssh-key
+echo "$usr:pass" | chpasswd
+
+##Set up mountbroker root
+TEST gluster-mountbroker setup /var/mountbroker-root $grp
+
+##Associate volume and non-root user to the mountbroker
+TEST gluster-mountbroker add $slave_vol $usr
+
+##Check ssh setting for clear text passwords
+sed '/^PasswordAuthentication /{s/no/yes/}' -i /etc/ssh/sshd_config && grep '^PasswordAuthentication ' /etc/ssh/sshd_config && service sshd restart
+
+
+##Restart glusterd to reflect mountbroker changages
+TEST killall_gluster;
+TEST glusterd;
+TEST pidof glusterd;
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "3" brick_count ${META_VOL}
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "4" brick_count $GMV0
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "4" brick_count $GSV0
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+## Check status of mount-broker
+TEST gluster-mountbroker status
+
+
+##Setup password-less ssh for non-root user
+#sshpass -p "pass" ssh-copy-id -i ~/.ssh/id_rsa.pub $ssh_url
+##Run ssh agent
+eval "$(ssh-agent -s)"
+PASS="pass"
+
+
+##Create a temp script to echo the SSH password, used by SSH_ASKPASS
+
+SSH_ASKPASS_SCRIPT=/tmp/ssh-askpass-script
+cat > ${SSH_ASKPASS_SCRIPT} <<EOL
+#!/bin/bash
+echo "${PASS}"
+EOL
+chmod u+x ${SSH_ASKPASS_SCRIPT}
+
+##set no display, necessary for ssh to use with setsid and SSH_ASKPASS
+export DISPLAY
+
+export SSH_ASKPASS=${SSH_ASKPASS_SCRIPT}
+
+DISPLAY=: setsid ssh-copy-id -o 'PreferredAuthentications=password' -o 'StrictHostKeyChecking=no' -i ~/.ssh/id_rsa.pub $ssh_url
+
+##Setting up PATH for gluster binaries in case of source installation
+##ssh -oNumberOfPasswordPrompts=0 -oStrictHostKeyChecking=no $ssh_url "echo "export PATH=$PATH:/usr/local/sbin" >> ~/.bashrc"
+
+##Creating secret pem pub file
+TEST gluster-georep-sshkey generate
+
+##Create geo-rep non-root setup
+
+TEST $GEOREP_CLI $master $slave_url create push-pem
+
+#check for session creation
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_non_root "Created"
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave_url config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave_url config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+## Test for key distribution
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 distribute_key_non_root
+
+##Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave_url start
+
+## Meta volume is enabled so looking for 2 Active and 2 Passive sessions
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_non_root "Active"
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_non_root "Passive"
+
+#Pause geo-replication session
+TEST $GEOREP_CLI $master $slave_url pause
+
+#Resume geo-replication session
+TEST $GEOREP_CLI $master $slave_url resume
+
+#Validate failure of volume stop when geo-rep is running
+TEST ! $CLI volume stop $GMV0
+
+#Negative test for ssh-port
+#Port should be integer and between 1-65535 range
+
+TEST ! $GEOREP_CLI $master $slave_url config ssh-port -22
+
+TEST ! $GEOREP_CLI $master $slave_url config ssh-port abc
+
+TEST ! $GEOREP_CLI $master $slave_url config ssh-port 6875943
+
+TEST ! $GEOREP_CLI $master $slave_url config ssh-port 4.5
+
+TEST ! $GEOREP_CLI $master $slave_url config ssh-port 22a
+
+#Config Set ssh-port to validate int validation
+TEST $GEOREP_CLI $master $slave config ssh-port 22
+
+#Hybrid directory rename test BZ#1763439
+
+TEST $GEOREP_CLI $master $slave_url config change_detector xsync
+#verify master and slave mount
+
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT "^1$" check_mounted ${master_mnt}
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT "^1$" check_mounted ${slave_mnt}
+
+#Create test data for hybrid crawl
+TEST mkdir ${master_mnt}/dir1
+TEST mkdir ${master_mnt}/dir1/dir2
+TEST mkdir ${master_mnt}/dir1/dir3
+TEST mkdir ${master_mnt}/hybrid_d1
+
+mv ${master_mnt}/hybrid_d1 ${master_mnt}/hybrid_rn_d1
+mv ${master_mnt}/dir1/dir2 ${master_mnt}/rn_dir2
+mv ${master_mnt}/dir1/dir3 ${master_mnt}/dir1/rn_dir3
+
+#Verify hybrid crawl data on slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_rn_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/rn_dir2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/dir1/rn_dir3
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave_url stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave_url delete
+
+#Cleanup authorized_keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' /home/$usr/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' /home/$usr/.ssh/authorized_keys
+
+#clear mountbroker
+gluster-mountbroker remove --user $usr
+gluster-mountbroker remove --volume $slave_vol
+
+#delete group and user created for non-root setup
+TEST userdel -r -f $usr
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_and_clean_group
+
+##password script cleanup
+rm -rf /tmp/ssh-askpass-script
+
+
+cleanup;
+
diff --git a/tests/00-geo-rep/00-georep-verify-setup.t b/tests/00-geo-rep/00-georep-verify-setup.t
new file mode 100644
index 00000000000..0d46c04102d
--- /dev/null
+++ b/tests/00-geo-rep/00-georep-verify-setup.t
@@ -0,0 +1,110 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=400
+GEO_REP_TIMEOUT=200
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
+TEST $CLI volume start $GSV0
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Test invalid slave url
+TEST ! $GEOREP_CLI $master ${SH0}:${GSV0} create push-pem
+TEST ! $GEOREP_CLI $master ${SH0}:::${GSV0} create push-pem
+
+#Create geo-rep session
+TEST create_georep_session $master $slave
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+#get-state commamd shouldn't crash glusterd when geo-rep session is configured
+TEST $CLI get-state
+TEST pidof glusterd
+
+TEST $CLI get-state detail
+TEST pidof glusterd
+
+#Pause geo-replication session
+TEST $GEOREP_CLI $master $slave pause
+
+#Resume geo-replication session
+TEST $GEOREP_CLI $master $slave resume
+
+#Validate failure of volume stop when geo-rep is running
+TEST ! $CLI volume stop $GMV0
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/01-georep-glusterd-tests.t b/tests/00-geo-rep/01-georep-glusterd-tests.t
new file mode 100644
index 00000000000..47d5116af26
--- /dev/null
+++ b/tests/00-geo-rep/01-georep-glusterd-tests.t
@@ -0,0 +1,213 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=300
+
+#Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+#Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+slave1=root@${SH0}::${GSV1}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+#create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 3 $H0:$B0/${GMV0}{1,2,3};
+
+#Negative testase: Create geo-rep session, master is not started
+TEST ! $GEOREP_CLI $master $slave create push-pem
+
+TEST $CLI volume start $GMV0
+
+#create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 3 $H0:$B0/${GSV0}{1,2,3};
+
+#Negative testcase: Create geo-rep session, slave is not started
+TEST ! $GEOREP_CLI $master $slave create push-pem
+
+TEST $CLI volume start $GSV0
+
+#create_and_start_slave1_volume
+TEST $CLI volume create $GSV1 replica 3 $H0:$B0/${GSV1}{1,2,3};
+TEST $CLI volume start $GSV1
+
+#Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+#Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+#Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION GLUSTERD TESTS WITH FANOUT SETUP
+############################################################
+
+#Negative testcase: Test invalid master
+TEST ! $GEOREP_CLI master1 ${SH0}::${GSV0} create push-pem
+
+#Negatvie testcase: Test invalid slave
+TEST ! $GEOREP_CLI $master ${SH0}::slave3 create push-pem
+
+##------------------- Session 1 Creation Begin-----------------##
+#Create geo-rep session
+TEST create_georep_session $master $slave
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+##------------------- Session 1 Creation End-----------------##
+
+##------------------- Session 2 Creation Begin-----------------##
+#Create geo-rep session2
+TEST $GEOREP_CLI $master $slave1 create ssh-port 22 no-verify
+
+#Config gluster-command-dir for session2
+TEST $GEOREP_CLI $master $slave1 config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir for session2
+TEST $GEOREP_CLI $master $slave1 config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume for session2
+TEST $GEOREP_CLI $master $slave1 config use_meta_volume true
+##------------------- Session 2 Creation End-----------------##
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+#check geo-rep status without specifying master and slave volumes
+TEST $GEOREP_CLI status
+
+#Start_georep force
+TEST $GEOREP_CLI $master $slave1 start force
+
+#Negative testcase: Create the same session after start, fails
+#With root@ prefix
+TEST ! $GEOREP_CLI $master $slave1 create push-pem
+#Without root@ prefix
+TEST ! $GEOREP_CLI $master ${SH0}::${GSV1} create push-pem
+TEST $GEOREP_CLI $master $slave1 create push-pem force
+
+##------------------- Fanout status testcases Begin --------------##
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_fanout_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_fanout_status_num_rows "Passive"
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_fanout_status_detail_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_fanout_status_detail_num_rows "Passive"
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_all_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_all_status_num_rows "Passive"
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_all_status_detail_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_all_status_detail_num_rows "Passive"
+
+##------------------- Fanout status testcases End --------------##
+
+##------Checkpoint Testcase Begin---------------##
+#Write I/O
+echo "test data" > $M0/file1
+TEST $GEOREP_CLI $master $slave config checkpoint now
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_checkpoint_met $master $slave
+touch $M0
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 verify_checkpoint_met $master $slave
+##------Checkpoint Testcase End---------------##
+
+##------------------ Geo-rep config testcases Begin--------------------##
+TEST $GEOREP_CLI $master $slave config
+TEST ! $GEOREP_CLI $master $slave config arsync-options '-W'
+TEST $GEOREP_CLI $master $slave config rsync-options '-W'
+TEST $GEOREP_CLI $master $slave config rsync-options
+TEST $GEOREP_CLI $master $slave config \!rsync-options
+TEST $GEOREP_CLI $master $slave config sync-xattrs false
+##------------------ Geo-rep config testcases End --------------------##
+
+##---------------- Pause/Resume testcase Begin-------------##
+#Negative testcase: Resume geo-replication session when not paused
+TEST ! $GEOREP_CLI $master $slave1 resume
+TEST $GEOREP_CLI $master $slave1 resume force
+
+#Pause geo-replication session with root@
+TEST $GEOREP_CLI $master $slave1 pause force
+
+#Resume geo-replication session with root@
+TEST $GEOREP_CLI $master $slave1 resume force
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave1 stop force
+
+#Negative testcase: Resume geo-replication session after geo-rep stop
+TEST ! $GEOREP_CLI $master $slave1 resume
+##---------------- Pause/Resume testcase End-------------##
+
+##-----------------glusterd slave key/value upgrade testcase Begin ---------##
+#Upgrade test of slave key stored in glusterd info file
+src=$(grep slave2 /var/lib/glusterd/vols/$master/info)
+#Remove slave uuuid (last part after divided by : )
+dst=${src%:*}
+
+#Update glusterd info file with old slave format
+sed -i "s|$src|$dst|g" /var/lib/glusterd/vols/$master/info
+TEST ! grep $src /var/lib/glusterd/vols/$master/info
+
+#Restart glusterd to update in-memory volinfo
+TEST pkill glusterd
+TEST glusterd;
+TEST pidof glusterd
+
+#Start geo-rep and validate slave format is updated
+TEST $GEOREP_CLI $master $slave1 start force
+TEST grep $src /var/lib/glusterd/vols/$master/info
+##-----------------glusted slave key/value upgrade testcase End ---------##
+
+#Negative testcase: Delete Geo-rep 2 fails as geo-rep is running
+TEST ! $GEOREP_CLI $master $slave1 delete
+
+#Stop and Delete Geo-rep 2
+TEST $GEOREP_CLI $master $slave1 stop force
+TEST $GEOREP_CLI $master $slave1 delete reset-sync-time
+
+#Stop and Delete Geo-rep 1
+TEST $GEOREP_CLI $master $slave stop
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/bug-1600145.t b/tests/00-geo-rep/bug-1600145.t
new file mode 100644
index 00000000000..1d38bf92682
--- /dev/null
+++ b/tests/00-geo-rep/bug-1600145.t
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+### Basic Tests with Distribute Replicate volumes
+
+##Cleanup and start glusterd
+cleanup;
+SCRIPT_TIMEOUT=600
+TEST glusterd;
+TEST pidof glusterd
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2};
+gluster v set all cluster.brick-multiplex on
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2};
+TEST $CLI volume start $GSV0
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Create geo-rep session
+TEST create_georep_session $master $slave
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Count no. of changelog socket
+brick_pid=`ps -aef | grep glusterfsd | grep -v "shared_storage" | grep -v grep | awk -F " " '{print $2}'`
+n=$(grep -Fc "changelog" /proc/$brick_pid/net/unix)
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Passive"
+
+#Count no. of changelog socket
+brick_pid=`ps -aef | grep glusterfsd | grep -v "shared_storage" | grep -v grep | awk -F " " '{print $2}'`
+c=$(grep -Fc "changelog" /proc/$brick_pid/net/unix)
+let expected=n+2
+TEST [ "$c" -eq "$expected" ]
+
+#Kill the "Active" brick
+brick=$($GEOREP_CLI $master $slave status | grep -F "Active" | awk {'print $3'})
+cat /proc/$brick_pid/net/unix | grep "changelog"
+TEST kill_brick $GMV0 $H0 $brick
+#Expect geo-rep status to be "Faulty"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Faulty"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+
+#Count no. of changelog socket
+brick_pid=`ps -aef | grep glusterfsd | grep -v "shared_storage" | grep -v grep | awk -F " " '{print $2}'`
+cat /proc/$brick_pid/net/unix | grep "changelog"
+ls -lrth /proc/$brick_pid/fd | grep "socket"
+c=$(grep -Fc "changelog" /proc/$brick_pid/net/unix)
+TEST [ "$c" -eq "$n" ]
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
diff --git a/tests/00-geo-rep/bug-1708603.t b/tests/00-geo-rep/bug-1708603.t
new file mode 100644
index 00000000000..26913f1d318
--- /dev/null
+++ b/tests/00-geo-rep/bug-1708603.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=300
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="gluster volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
+TEST $CLI volume start $GSV0
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+#Create geo-rep session
+TEST create_georep_session $master $slave
+
+echo n | $GEOREP_CLI $master $slave config ignore-deletes true >/dev/null 2>&1
+EXPECT "false" echo $($GEOREP_CLI $master $slave config ignore-deletes)
+echo y | $GEOREP_CLI $master $slave config ignore-deletes true
+EXPECT "true" echo $($GEOREP_CLI $master $slave config ignore-deletes)
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
new file mode 100644
index 00000000000..c45d2ff62ce
--- /dev/null
+++ b/tests/00-geo-rep/georep-basic-dr-rsync-arbiter.t
@@ -0,0 +1,234 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=500
+
+AREQUAL_PATH=$(dirname $0)/../utils
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+
+### Basic Tests with Distribute Replicate volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=4
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 3 arbiter 1 $H0:$B0/${GMV0}{1,2,3,4,5,6};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 3 arbiter 1 $H0:$B0/${GSV0}{1,2,3,4,5,6};
+TEST $CLI volume start $GSV0
+TEST $CLI volume set $GSV0 performance.stat-prefetch off
+TEST $CLI volume set $GSV0 performance.quick-read off
+TEST $CLI volume set $GSV0 performance.readdir-ahead off
+TEST $CLI volume set $GSV0 performance.read-ahead off
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Check Hybrid Crawl
+TEST create_data "hybrid"
+TEST create_georep_session $master $slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 6 check_status_num_rows "Created"
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Set changelog roll-over time to 3 secs
+TEST $CLI volume set $GMV0 changelog.rollover-time 3
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "hybrid"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/hybrid_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/hybrid_f3 ${slave_mnt}/hybrid_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/hybrid_d3 ${slave_mnt}/hybrid_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok hybrid_f1 ${slave_mnt}/hybrid_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/hybrid_f1 ${slave_mnt}/hybrid_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/hybrid_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1
+
+#Check History Crawl.
+TEST $GEOREP_CLI $master $slave stop
+TEST create_data "history"
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "history"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/history_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/history_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/history_f3 ${slave_mnt}/history_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/history_d3 ${slave_mnt}/history_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok history_f1 ${slave_mnt}/history_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/history_f1 ${slave_mnt}/history_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/history_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/history_chown_f1
+
+#Check Changelog Crawl.
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Changelog Crawl"
+TEST create_data "changelog"
+
+# logrotate test
+logrotate_file=${master_mnt}/logrotate/lg_test_file
+TEST mkdir -p ${master_mnt}/logrotate
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+
+# CREATE + RENAME
+create_rename ${master_mnt}/rename_test_file
+
+# hard-link rename
+hardlink_rename ${master_mnt}/hardlink_rename_test_file
+
+#SYNC CHECK
+#data_tests "changelog"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/changelog_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/changelog_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/changelog_f3 ${slave_mnt}/changelog_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/changelog_d3 ${slave_mnt}/changelog_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok changelog_f1 ${slave_mnt}/changelog_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/changelog_f1 ${slave_mnt}/changelog_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/changelog_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+#logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+#CREATE+RENAME
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+
+#hardlink rename
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Symlink testcase: Rename symlink and create dir with same name
+TEST create_symlink_rename_mkdir_data
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+#TEST create_hardlink_rename_data
+
+#rsnapshot usecase
+TEST create_rsnapshot_data
+
+#Start Geo-rep
+TEST $GEOREP_CLI $master $slave start
+
+#Wait for geo-rep to come up
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#Check for hardlink rename case. BUG: 1296174
+#It should not create src file again on changelog reprocessing
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Symlink testcase: Rename symlink and create dir with same name
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/symlink_test1
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
+
+#rsnapshot usecase
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+
+#Test rsync-options set BUG:1629561
+TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file"
+TEST "echo sampledata > $master_mnt/rsync_option_test_file"
+
+#rename with existing destination case BUG:1694820
+TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Pause geo-replication session
+TEST $GEOREP_CLI $master $slave pause force
+
+#Resume geo-replication session
+TEST $GEOREP_CLI $master $slave resume force
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup are-equal binary
+TEST rm $AREQUAL_PATH/arequal-checksum
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-basic-dr-rsync.t b/tests/00-geo-rep/georep-basic-dr-rsync.t
new file mode 100644
index 00000000000..d785aa59fc9
--- /dev/null
+++ b/tests/00-geo-rep/georep-basic-dr-rsync.t
@@ -0,0 +1,258 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=500
+
+AREQUAL_PATH=$(dirname $0)/../utils
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+
+### Basic Tests with Distribute Replicate volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
+TEST $CLI volume start $GSV0
+TEST $CLI volume set $GSV0 performance.stat-prefetch off
+TEST $CLI volume set $GSV0 performance.quick-read off
+TEST $CLI volume set $GSV0 performance.readdir-ahead off
+TEST $CLI volume set $GSV0 performance.read-ahead off
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Check Hybrid Crawl
+TEST create_data "hybrid"
+TEST create_georep_session $master $slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Created"
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Negative test for ssh-port
+#Port should be integer and between 1-65535 range
+
+TEST ! $GEOREP_CLI $master $slave config ssh-port -22
+
+TEST ! $GEOREP_CLI $master $slave config ssh-port abc
+
+TEST ! $GEOREP_CLI $master $slave config ssh-port 6875943
+
+TEST ! $GEOREP_CLI $master $slave config ssh-port 4.5
+
+TEST ! $GEOREP_CLI $master $slave config ssh-port 22a
+
+#Config Set ssh-port to validate int validation
+TEST $GEOREP_CLI $master $slave config ssh-port 22
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Set changelog roll-over time to 3 secs
+TEST $CLI volume set $GMV0 changelog.rollover-time 3
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+#data_tests "hybrid"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/hybrid_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/hybrid_f3 ${slave_mnt}/hybrid_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/hybrid_d3 ${slave_mnt}/hybrid_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok hybrid_f1 ${slave_mnt}/hybrid_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/hybrid_f1 ${slave_mnt}/hybrid_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/hybrid_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1
+
+#Check History Crawl.
+TEST $GEOREP_CLI $master $slave stop
+TEST create_data "history"
+TEST create_rename_symlink_case
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+#data_tests "history"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/history_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/history_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/history_f3 ${slave_mnt}/history_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/history_d3 ${slave_mnt}/history_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok history_f1 ${slave_mnt}/history_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/history_f1 ${slave_mnt}/history_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/history_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/history_chown_f1
+
+#Check Changelog Crawl.
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Changelog Crawl"
+TEST create_data "changelog"
+
+# logrotate test
+logrotate_file=${master_mnt}/logrotate/lg_test_file
+TEST mkdir -p ${master_mnt}/logrotate
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+
+# CREATE + RENAME
+create_rename ${master_mnt}/rename_test_file
+
+# hard-link rename
+hardlink_rename ${master_mnt}/hardlink_rename_test_file
+
+#SYNC CHECK
+#data_tests "changelog"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/changelog_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/changelog_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/changelog_f3 ${slave_mnt}/changelog_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/changelog_d3 ${slave_mnt}/changelog_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok changelog_f1 ${slave_mnt}/changelog_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/changelog_f1 ${slave_mnt}/changelog_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/changelog_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+#logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+#CREATE+RENAME
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+
+#hardlink rename
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Symlink testcase: Rename symlink and create dir with same name
+TEST create_symlink_rename_mkdir_data
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+TEST create_hardlink_rename_data
+
+#rsnapshot usecase
+TEST create_rsnapshot_data
+
+#Start Geo-rep
+TEST $GEOREP_CLI $master $slave start
+
+#Wait for geo-rep to come up
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+#Check for hardlink rename case. BUG: 1296174
+#It should not create src file again on changelog reprocessing
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Symlink testcase: Rename symlink and create dir with same name
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/symlink_test1
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
+
+#rsnapshot usecase
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+
+#Test rsync-options set BUG:1629561
+TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file"
+TEST "echo sampledata > $master_mnt/rsync_option_test_file"
+
+#rename with existing destination case BUG:1694820
+TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Test config upgrade BUG: 1707731
+config_file=$GLUSTERD_WORKDIR/geo-replication/${GMV0}_${SH0}_${GSV0}/gsyncd.conf
+cat >> $config_file<<EOL
+[peers ${GMV0} ${GSV0}]
+use_tarssh = true
+timeout = 1
+EOL
+TEST $GEOREP_CLI $master $slave stop
+TEST $GEOREP_CLI $master $slave start
+#verify that the config file is updated
+EXPECT "1" echo $(grep -Fc "vars" $config_file)
+EXPECT "1" echo $(grep -Fc "sync-method = tarssh" $config_file)
+EXPECT "1" echo $(grep -Fc "slave-timeout = 1" $config_file)
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup are-equal binary
+TEST rm $AREQUAL_PATH/arequal-checksum
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
new file mode 100644
index 00000000000..8fed929ffca
--- /dev/null
+++ b/tests/00-geo-rep/georep-basic-dr-tarssh-arbiter.t
@@ -0,0 +1,227 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=500
+
+AREQUAL_PATH=$(dirname $0)/../utils
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+
+### Basic Tests with Distribute Replicate volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=4
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 3 arbiter 1 $H0:$B0/${GMV0}{1,2,3,4,5,6};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 3 arbiter 1 $H0:$B0/${GSV0}{1,2,3,4,5,6};
+TEST $CLI volume start $GSV0
+TEST $CLI volume set $GSV0 performance.stat-prefetch off
+TEST $CLI volume set $GSV0 performance.quick-read off
+TEST $CLI volume set $GSV0 performance.readdir-ahead off
+TEST $CLI volume set $GSV0 performance.read-ahead off
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Check Hybrid Crawl
+TEST create_data "hybrid"
+TEST create_georep_session $master $slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 6 check_status_num_rows "Created"
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Set changelog roll-over time to 3 secs
+TEST $CLI volume set $GMV0 changelog.rollover-time 3
+
+#Config tarssh as sync-engine
+TEST $GEOREP_CLI $master $slave config sync-method tarssh
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "hybrid"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/hybrid_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/hybrid_f3 ${slave_mnt}/hybrid_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/hybrid_d3 ${slave_mnt}/hybrid_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok hybrid_f1 ${slave_mnt}/hybrid_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/hybrid_f1 ${slave_mnt}/hybrid_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/hybrid_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1
+
+#Check History Crawl.
+TEST $GEOREP_CLI $master $slave stop
+TEST create_data "history"
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "history"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/history_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/history_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/history_f3 ${slave_mnt}/history_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/history_d3 ${slave_mnt}/history_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok history_f1 ${slave_mnt}/history_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/history_f1 ${slave_mnt}/history_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/history_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/history_chown_f1
+
+#Check Changelog Crawl.
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Changelog Crawl"
+TEST create_data "changelog"
+
+# logrotate test
+logrotate_file=${master_mnt}/logrotate/lg_test_file
+TEST mkdir -p ${master_mnt}/logrotate
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+
+# CREATE + RENAME
+create_rename ${master_mnt}/rename_test_file
+
+# hard-link rename
+hardlink_rename ${master_mnt}/hardlink_rename_test_file
+
+#SYNC CHECK
+#data_tests "changelog"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/changelog_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/changelog_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/changelog_f3 ${slave_mnt}/changelog_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/changelog_d3 ${slave_mnt}/changelog_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok changelog_f1 ${slave_mnt}/changelog_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/changelog_f1 ${slave_mnt}/changelog_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/changelog_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+#logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+#CREATE+RENAME
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+
+#hardlink rename
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Symlink testcase: Rename symlink and create dir with same name
+TEST create_symlink_rename_mkdir_data
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+#TEST create_hardlink_rename_data
+
+#rsnapshot usecase
+TEST create_rsnapshot_data
+
+#Start Geo-rep
+TEST $GEOREP_CLI $master $slave start
+
+#Wait for geo-rep to come up
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#Check for hardlink rename case. BUG: 1296174
+#It should not create src file again on changelog reprocessing
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Symlink testcase: Rename symlink and create dir with same name
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/symlink_test1
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
+
+#rsnapshot usecase
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+
+#rename with existing destination case BUG:1694820
+TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup are-equal binary
+TEST rm $AREQUAL_PATH/arequal-checksum
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-basic-dr-tarssh.t b/tests/00-geo-rep/georep-basic-dr-tarssh.t
new file mode 100644
index 00000000000..feb2de74c90
--- /dev/null
+++ b/tests/00-geo-rep/georep-basic-dr-tarssh.t
@@ -0,0 +1,227 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=500
+
+AREQUAL_PATH=$(dirname $0)/../utils
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+
+### Basic Tests with Distribute Replicate volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
+TEST $CLI volume start $GSV0
+TEST $CLI volume set $GSV0 performance.stat-prefetch off
+TEST $CLI volume set $GSV0 performance.quick-read off
+TEST $CLI volume set $GSV0 performance.readdir-ahead off
+TEST $CLI volume set $GSV0 performance.read-ahead off
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Check Hybrid Crawl
+TEST create_data "hybrid"
+TEST create_georep_session $master $slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Created"
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Set changelog roll-over time to 3 secs
+TEST $CLI volume set $GMV0 changelog.rollover-time 3
+
+#Config tarssh as sync-engine
+TEST $GEOREP_CLI $master $slave config sync-method tarssh
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+#data_tests "hybrid"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/hybrid_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/hybrid_f3 ${slave_mnt}/hybrid_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/hybrid_d3 ${slave_mnt}/hybrid_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok hybrid_f1 ${slave_mnt}/hybrid_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/hybrid_f1 ${slave_mnt}/hybrid_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/hybrid_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1
+
+#Check History Crawl.
+TEST $GEOREP_CLI $master $slave stop
+TEST create_data "history"
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+#data_tests "history"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/history_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/history_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/history_f3 ${slave_mnt}/history_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/history_d3 ${slave_mnt}/history_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok history_f1 ${slave_mnt}/history_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/history_f1 ${slave_mnt}/history_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/history_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/history_chown_f1
+
+#Check Changelog Crawl.
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Changelog Crawl"
+TEST create_data "changelog"
+
+# logrotate test
+logrotate_file=${master_mnt}/logrotate/lg_test_file
+TEST mkdir -p ${master_mnt}/logrotate
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+
+# CREATE + RENAME
+create_rename ${master_mnt}/rename_test_file
+
+# hard-link rename
+hardlink_rename ${master_mnt}/hardlink_rename_test_file
+
+#SYNC CHECK
+#data_tests "changelog"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/changelog_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/changelog_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/changelog_f3 ${slave_mnt}/changelog_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/changelog_d3 ${slave_mnt}/changelog_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok changelog_f1 ${slave_mnt}/changelog_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/changelog_f1 ${slave_mnt}/changelog_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/changelog_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+#logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+#CREATE+RENAME
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+
+#hardlink rename
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Symlink testcase: Rename symlink and create dir with same name
+TEST create_symlink_rename_mkdir_data
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+TEST create_hardlink_rename_data
+
+#rsnapshot usecase
+TEST create_rsnapshot_data
+
+#Start Geo-rep
+TEST $GEOREP_CLI $master $slave start
+
+#Wait for geo-rep to come up
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+#Check for hardlink rename case. BUG: 1296174
+#It should not create src file again on changelog reprocessing
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Symlink testcase: Rename symlink and create dir with same name
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/symlink_test1
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
+
+#rsnapshot usecase
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+
+#rename with existing destination case BUG:1694820
+TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup are-equal binary
+TEST rm $AREQUAL_PATH/arequal-checksum
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-basic-rsync-ec.t b/tests/00-geo-rep/georep-basic-rsync-ec.t
new file mode 100644
index 00000000000..dd1f94edbc9
--- /dev/null
+++ b/tests/00-geo-rep/georep-basic-rsync-ec.t
@@ -0,0 +1,224 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=500
+
+AREQUAL_PATH=$(dirname $0)/../utils
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+
+### Basic Tests with Distributed Disperse volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=10
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 disperse 3 redundancy 1 $H0:$B0/${GMV0}{0..5};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 disperse 3 redundancy 1 $H0:$B0/${GSV0}{0..5};
+TEST $CLI volume start $GSV0
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Check Hybrid Crawl
+TEST create_data "hybrid"
+TEST create_georep_session $master $slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 6 check_status_num_rows "Created"
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Set changelog roll-over time to 3 secs
+TEST $CLI volume set $GMV0 changelog.rollover-time 3
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "hybrid"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/hybrid_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/hybrid_f3 ${slave_mnt}/hybrid_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/hybrid_d3 ${slave_mnt}/hybrid_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok hybrid_f1 ${slave_mnt}/hybrid_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/hybrid_f1 ${slave_mnt}/hybrid_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/hybrid_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1
+
+#Check History Crawl.
+TEST $GEOREP_CLI $master $slave stop
+TEST create_data "history"
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "history"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/history_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/history_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/history_f3 ${slave_mnt}/history_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/history_d3 ${slave_mnt}/history_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok history_f1 ${slave_mnt}/history_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/history_f1 ${slave_mnt}/history_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/history_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/history_chown_f1
+
+#Check Changelog Crawl.
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Changelog Crawl"
+TEST create_data "changelog"
+
+# logrotate test
+logrotate_file=${master_mnt}/logrotate/lg_test_file
+TEST mkdir -p ${master_mnt}/logrotate
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+
+# CREATE + RENAME
+create_rename ${master_mnt}/rename_test_file
+
+# hard-link rename
+hardlink_rename ${master_mnt}/hardlink_rename_test_file
+
+#SYNC CHECK
+#data_tests "changelog"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/changelog_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/changelog_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/changelog_f3 ${slave_mnt}/changelog_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/changelog_d3 ${slave_mnt}/changelog_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok changelog_f1 ${slave_mnt}/changelog_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/changelog_f1 ${slave_mnt}/changelog_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/changelog_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+#logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+#CREATE+RENAME
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+
+#hardlink rename
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Symlink testcase: Rename symlink and create dir with same name
+TEST create_symlink_rename_mkdir_data
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+TEST create_hardlink_rename_data
+
+#rsnapshot usecase
+#TEST create_rsnapshot_data
+
+#Start Geo-rep
+TEST $GEOREP_CLI $master $slave start
+
+#Wait for geo-rep to come up
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#Check for hardlink rename case. BUG: 1296174
+#It should not create src file again on changelog reprocessing
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Symlink testcase: Rename symlink and create dir with same name
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/symlink_test1
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
+
+#rsnapshot usecase
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+
+#Test rsync-options set BUG:1629561
+TEST gluster volume geo-rep $master $slave config rsync-options "--whole-file"
+TEST "echo sampledata > $master_mnt/rsync_option_test_file"
+
+#rename with existing destination case BUG:1694820
+#TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup are-equal binary
+TEST rm $AREQUAL_PATH/arequal-checksum
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-basic-tarssh-ec.t b/tests/00-geo-rep/georep-basic-tarssh-ec.t
new file mode 100644
index 00000000000..987bd9391c8
--- /dev/null
+++ b/tests/00-geo-rep/georep-basic-tarssh-ec.t
@@ -0,0 +1,223 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=500
+
+AREQUAL_PATH=$(dirname $0)/../utils
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+
+### Basic Tests with Distributed Disperse volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=10
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 disperse 3 redundancy 1 $H0:$B0/${GMV0}{0..5};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 disperse 3 redundancy 1 $H0:$B0/${GSV0}{0..5};
+TEST $CLI volume start $GSV0
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Check Hybrid Crawl
+TEST create_data "hybrid"
+TEST create_georep_session $master $slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 6 check_status_num_rows "Created"
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Set changelog roll-over time to 3 secs
+TEST $CLI volume set $GMV0 changelog.rollover-time 3
+
+#Config tarssh as sync-engine
+TEST $GEOREP_CLI $master $slave config sync-method tarssh
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Verify "features.read-only" Option
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_slave_read_only $GSV0
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "hybrid"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/hybrid_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/hybrid_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/hybrid_f3 ${slave_mnt}/hybrid_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/hybrid_d3 ${slave_mnt}/hybrid_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok hybrid_f1 ${slave_mnt}/hybrid_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/hybrid_f1 ${slave_mnt}/hybrid_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/hybrid_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/hybrid_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/hybrid_chown_f1
+
+#Check History Crawl.
+TEST $GEOREP_CLI $master $slave stop
+TEST create_data "history"
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#data_tests "history"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/history_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/history_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/history_f3 ${slave_mnt}/history_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/history_d3 ${slave_mnt}/history_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok history_f1 ${slave_mnt}/history_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/history_f1 ${slave_mnt}/history_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/history_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/history_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/history_chown_f1
+
+#Check Changelog Crawl.
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Changelog Crawl"
+TEST create_data "changelog"
+
+# logrotate test
+logrotate_file=${master_mnt}/logrotate/lg_test_file
+TEST mkdir -p ${master_mnt}/logrotate
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+logrotate_simulate $logrotate_file 2
+
+# CREATE + RENAME
+create_rename ${master_mnt}/rename_test_file
+
+# hard-link rename
+hardlink_rename ${master_mnt}/hardlink_rename_test_file
+
+#SYNC CHECK
+#data_tests "changelog"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 regular_file_ok ${slave_mnt}/changelog_f1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/changelog_d1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_file_ok ${slave_mnt}/changelog_f3 ${slave_mnt}/changelog_f4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 rename_dir_ok ${slave_mnt}/changelog_d3 ${slave_mnt}/changelog_d4
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 symlink_ok changelog_f1 ${slave_mnt}/changelog_sl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_file_ok ${slave_mnt}/changelog_f1 ${slave_mnt}/changelog_hl1
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_f2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 unlink_ok ${slave_mnt}/changelog_d2
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 data_ok ${slave_mnt}/changelog_f1 "HelloWorld!"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 chown_file_ok ${slave_mnt}/changelog_chown_f1
+
+#logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 directory_ok ${slave_mnt}/logrotate
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt}/logrotate ${slave_mnt}/logrotate
+
+#CREATE+RENAME
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 create_rename_ok ${slave_mnt}/create_rename_test_file
+
+#hardlink rename
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Symlink testcase: Rename symlink and create dir with same name
+TEST create_symlink_rename_mkdir_data
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+TEST create_hardlink_rename_data
+
+#rsnapshot usecase
+#TEST create_rsnapshot_data
+
+#Start Geo-rep
+TEST $GEOREP_CLI $master $slave start
+
+#Wait for geo-rep to come up
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 4 check_status_num_rows "Passive"
+
+#Check for hardlink rename case. BUG: 1296174
+#It should not create src file again on changelog reprocessing
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 hardlink_rename_ok ${slave_mnt}/hardlink_rename_test_file
+
+#Symlink testcase: Rename symlink and create dir with same name
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_symlink_rename_mkdir_data ${slave_mnt}/symlink_test1
+
+#hardlink-rename-unlink usecase. Sonatype Nexus3 Usecase. BUG:1512483
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_hardlink_rename_data ${slave_mnt}
+
+#rsnapshot usecase
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rsnapshot_data ${slave_mnt}
+
+#rename with existing destination case BUG:1694820
+#TEST create_rename_with_existing_destination ${master_mnt}
+#verify rename with existing destination case BUG:1694820
+#EXPECT_WITHIN $GEO_REP_TIMEOUT 0 verify_rename_with_existing_destination ${slave_mnt}
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup are-equal binary
+TEST rm $AREQUAL_PATH/arequal-checksum
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-config-upgrade.t b/tests/00-geo-rep/georep-config-upgrade.t
new file mode 100644
index 00000000000..557461cd9c4
--- /dev/null
+++ b/tests/00-geo-rep/georep-config-upgrade.t
@@ -0,0 +1,132 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=300
+OLD_CONFIG_PATH=$(dirname $0)/gsyncd.conf.old
+WORKING_DIR=/var/lib/glusterd/geo-replication/master_127.0.0.1_slave
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
+TEST $CLI volume start $GSV0
+
+##Create, start and mount meta_volume
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+#Create geo-rep session
+TEST create_georep_session $master $slave
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Enable_metavolume
+TEST $GEOREP_CLI $master $slave config use_meta_volume true
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 2 check_status_num_rows "Passive"
+
+TEST $GEOREP_CLI $master $slave config sync-method tarssh
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Copy old config file
+mv -f $WORKING_DIR/gsyncd.conf $WORKING_DIR/gsyncd.conf.org
+cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
+
+#Check if config get all updates config_file
+TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
+TEST $GEOREP_CLI $master $slave config
+TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
+
+#Check if config get updates config_file
+rm -f $WORKING_DIR/gsyncd.conf
+cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
+TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
+TEST $GEOREP_CLI $master $slave config sync-method
+TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
+
+#Check if config set updates config_file
+rm -f $WORKING_DIR/gsyncd.conf
+cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
+TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
+TEST $GEOREP_CLI $master $slave config sync-xattrs false
+TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
+
+#Check if config reset updates config_file
+rm -f $WORKING_DIR/gsyncd.conf
+cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
+TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
+TEST $GEOREP_CLI $master $slave config \!sync-xattrs
+TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
+
+#Check if geo-rep start updates config_file
+rm -f $WORKING_DIR/gsyncd.conf
+cp -p $OLD_CONFIG_PATH $WORKING_DIR/gsyncd.conf
+TEST ! grep "sync-method" $WORKING_DIR/gsyncd.conf
+TEST $GEOREP_CLI $master $slave start
+TEST grep "sync-method" $WORKING_DIR/gsyncd.conf
+
+#Stop geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-stderr-hang.t b/tests/00-geo-rep/georep-stderr-hang.t
new file mode 100644
index 00000000000..496f0e6577d
--- /dev/null
+++ b/tests/00-geo-rep/georep-stderr-hang.t
@@ -0,0 +1,128 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../geo-rep.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=500
+
+AREQUAL_PATH=$(dirname $0)/../utils
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+
+### Basic Tests with Distribute Replicate volumes
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+
+##Variables
+GEOREP_CLI="$CLI volume geo-replication"
+master=$GMV0
+SH0="127.0.0.1"
+slave=${SH0}::${GSV0}
+num_active=2
+num_passive=2
+master_mnt=$M0
+slave_mnt=$M1
+
+############################################################
+#SETUP VOLUMES AND GEO-REPLICATION
+############################################################
+
+##create_and_start_master_volume
+TEST $CLI volume create $GMV0 $H0:$B0/${GMV0}1;
+TEST $CLI volume start $GMV0
+
+##create_and_start_slave_volume
+TEST $CLI volume create $GSV0 $H0:$B0/${GSV0}1;
+TEST $CLI volume start $GSV0
+TEST $CLI volume set $GSV0 performance.stat-prefetch off
+TEST $CLI volume set $GSV0 performance.quick-read off
+TEST $CLI volume set $GSV0 performance.readdir-ahead off
+TEST $CLI volume set $GSV0 performance.read-ahead off
+
+##Mount master
+TEST glusterfs -s $H0 --volfile-id $GMV0 $M0
+
+##Mount slave
+TEST glusterfs -s $H0 --volfile-id $GSV0 $M1
+
+############################################################
+#BASIC GEO-REPLICATION TESTS
+############################################################
+
+TEST create_georep_session $master $slave
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Created"
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Config gluster-command-dir
+TEST $GEOREP_CLI $master $slave config slave-gluster-command-dir ${GLUSTER_CMD_DIR}
+
+#Set changelog roll-over time to 45 secs
+TEST $CLI volume set $GMV0 changelog.rollover-time 45
+
+#Wait for common secret pem file to be created
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_common_secret_file
+
+#Verify the keys are distributed
+EXPECT_WITHIN $GEO_REP_TIMEOUT 0 check_keys_distributed
+
+#Set sync-jobs to 1
+TEST $GEOREP_CLI $master $slave config sync-jobs 1
+
+#Start_georep
+TEST $GEOREP_CLI $master $slave start
+
+touch $M0
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Changelog Crawl"
+
+#Check History Crawl.
+TEST $GEOREP_CLI $master $slave stop
+TEST create_data_hang "rsync_hang"
+TEST create_data "history_rsync"
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Config tarssh as sync-engine
+TEST $GEOREP_CLI $master $slave config sync-method tarssh
+
+#Create tarssh hang data
+TEST create_data_hang "tarssh_hang"
+TEST create_data "history_tar"
+
+TEST $GEOREP_CLI $master $slave start
+EXPECT_WITHIN $GEO_REP_TIMEOUT 1 check_status_num_rows "Active"
+
+#Verify arequal for whole volume
+EXPECT_WITHIN $GEO_REP_TIMEOUT "x0" arequal_checksum ${master_mnt} ${slave_mnt}
+
+#Stop Geo-rep
+TEST $GEOREP_CLI $master $slave stop
+
+#Delete Geo-rep
+TEST $GEOREP_CLI $master $slave delete
+
+#Cleanup are-equal binary
+TEST rm $AREQUAL_PATH/arequal-checksum
+
+#Cleanup authorized keys
+sed -i '/^command=.*SSH_ORIGINAL_COMMAND#.*/d' ~/.ssh/authorized_keys
+sed -i '/^command=.*gsyncd.*/d' ~/.ssh/authorized_keys
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/00-geo-rep/georep-upgrade.t b/tests/00-geo-rep/georep-upgrade.t
new file mode 100644
index 00000000000..7523068ed50
--- /dev/null
+++ b/tests/00-geo-rep/georep-upgrade.t
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+SCRIPT_TIMEOUT=500
+
+###############################################################################################
+#Before upgrade
+###############################################################################################
+brick=/bricks/brick1
+epoch1=$(date '+%s')
+sleep 1
+epoch2=$(date '+%s')
+mkdir -p /bricks/brick1/.glusterfs/changelogs/htime
+mkdir -p /bricks/brick1/.glusterfs/changelogs
+
+#multiple htime files(changelog enable/disable scenario)
+TEST touch /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch1
+TEST touch /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch2
+
+#changelog files
+TEST touch /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch1
+TEST touch /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch2
+
+htime_file1=/bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch1
+htime_file2=/bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch2
+
+#data inside htime files before upgrade
+data1=/bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch1
+data2=/bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch2
+
+#data inside htime files after upgrade
+updated_data1=/bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m/%d')`/CHANGELOG.$epoch1
+updated_data2=/bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m/%d')`/CHANGELOG.$epoch2
+
+echo -n $data1>$htime_file1
+echo -n $data2>$htime_file2
+
+echo "Before upgrade:"
+EXPECT '1' echo $(grep $data1 $htime_file1 | wc -l)
+EXPECT '1' echo $(grep $data2 $htime_file2 | wc -l)
+
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch1 | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch1 | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch2 | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch2 | wc -l)
+###############################################################################################
+#Upgrade
+###############################################################################################
+### This needed to be fixed as this very vague finding a file with name in '/'
+### multiple file with same name can exist
+### for temp fix picking only 1st result
+TEST upgrade_script=$(find / -type f -name glusterfs-georep-upgrade.py -print | head -n 1)
+TEST python3 $upgrade_script $brick
+
+###############################################################################################
+#After upgrade
+###############################################################################################
+echo "After upgrade:"
+EXPECT '1' echo $(grep $updated_data1 $htime_file1 | wc -l)
+EXPECT '1' echo $(grep $updated_data2 $htime_file2 | wc -l)
+
+#Check directory structure inside changelogs
+TEST ! ls /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch1
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch1 | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch1.bak | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y')` | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m')` | wc -l)
+EXPECT '2' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m/%d')` | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m/%d')`/CHANGELOG.$epoch1 | wc -l)
+
+TEST ! ls /bricks/brick1/.glusterfs/changelogs/CHANGELOG.$epoch2
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch2 | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/htime/HTIME.$epoch2.bak | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y')` | wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m')`| wc -l)
+EXPECT '1' echo $(ls /bricks/brick1/.glusterfs/changelogs/`echo $(date '+%Y/%m/%d')`/CHANGELOG.$epoch2 | wc -l)
+
+TEST rm -rf /bricks
diff --git a/tests/00-geo-rep/gsyncd.conf.old b/tests/00-geo-rep/gsyncd.conf.old
new file mode 100644
index 00000000000..519acaf8f3e
--- /dev/null
+++ b/tests/00-geo-rep/gsyncd.conf.old
@@ -0,0 +1,47 @@
+[__meta__]
+version = 2.0
+
+[peersrx . .]
+remote_gsyncd = /usr/local/libexec/glusterfs/gsyncd
+georep_session_working_dir = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/
+ssh_command_tar = ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/geo-replication/tar_ssh.pem
+changelog_log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}${local_id}-changes.log
+working_dir = /var/lib/misc/glusterfsd/${mastervol}/${eSlave}
+ignore_deletes = false
+pid_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.pid
+state_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.status
+gluster_command_dir = /usr/local/sbin/
+gluster_params = aux-gfid-mount acl
+ssh_command = ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no -i /var/lib/glusterd/geo-replication/secret.pem
+state_detail_file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status
+state_socket_unencoded = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/${eSlave}.socket
+socketdir = /var/run/gluster
+log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}.log
+gluster_log_file = /var/log/glusterfs/geo-replication/${mastervol}/${eSlave}${local_id}.gluster.log
+special_sync_mode = partial
+change_detector = changelog
+pid-file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.pid
+state-file = /var/lib/glusterd/geo-replication/${mastervol}_${remotehost}_${slavevol}/monitor.status
+
+[__section_order__]
+peersrx . . = 0
+peersrx . %5essh%3a = 2
+peersrx . = 3
+peers master slave = 4
+
+[peersrx . %5Essh%3A]
+remote_gsyncd = /nonexistent/gsyncd
+
+[peersrx .]
+gluster_command_dir = /usr/local/sbin/
+gluster_params = aux-gfid-mount acl
+log_file = /var/log/glusterfs/geo-replication-slaves/${session_owner}:${local_node}${local_id}.${slavevol}.log
+log_file_mbr = /var/log/glusterfs/geo-replication-slaves/mbr/${session_owner}:${local_node}${local_id}.${slavevol}.log
+gluster_log_file = /var/log/glusterfs/geo-replication-slaves/${session_owner}:${local_node}${local_id}.${slavevol}.gluster.log
+
+[peers master slave]
+session_owner = 0732cbd1-3ec5-4920-ab0d-aa5a896d5214
+master.stime_xattr_name = trusted.glusterfs.0732cbd1-3ec5-4920-ab0d-aa5a896d5214.07a9005c-ace4-4f67-b3c0-73938fb236c4.stime
+volume_id = 0732cbd1-3ec5-4920-ab0d-aa5a896d5214
+use_tarssh = true
+
diff --git a/tests/000-flaky/basic_afr_split-brain-favorite-child-policy.t b/tests/000-flaky/basic_afr_split-brain-favorite-child-policy.t
new file mode 100644
index 00000000000..77d82a4996f
--- /dev/null
+++ b/tests/000-flaky/basic_afr_split-brain-favorite-child-policy.t
@@ -0,0 +1,203 @@
+#!/bin/bash
+
+#Test the split-brain resolution CLI commands.
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create replica 2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST touch $M0/file
+
+############ Healing using favorite-child-policy = ctime #################
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+#file still in split-brain
+cat $M0/file > /dev/null
+EXPECT "1" echo $?
+
+# Umount to prevent further FOPS on the file, then find the brick with latest ctime.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+ctime1=`stat -c "%.Z" $B0/${V0}0/file`
+ctime2=`stat -c "%.Z" $B0/${V0}1/file`
+if (( $(echo "$ctime1 > $ctime2" | bc -l) )); then
+ LATEST_CTIME_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1)
+else
+ LATEST_CTIME_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1)
+fi
+TEST $CLI volume set $V0 cluster.favorite-child-policy ctime
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+B0_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1)
+B1_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1)
+TEST [ "$LATEST_CTIME_MD5" == "$B0_MD5" ]
+TEST [ "$LATEST_CTIME_MD5" == "$B1_MD5" ]
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+cat $M0/file > /dev/null
+EXPECT "0" echo $?
+
+############ Healing using favorite-child-policy = mtime #################
+TEST $CLI volume set $V0 cluster.favorite-child-policy none
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+#file still in split-brain
+cat $M0/file > /dev/null
+EXPECT "1" echo $?
+
+#We know that the second brick has latest mtime.
+LATEST_CTIME_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1)
+TEST $CLI volume set $V0 cluster.favorite-child-policy mtime
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+cat $M0/file > /dev/null
+EXPECT "0" echo $?
+HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1)
+TEST [ "$LATEST_CTIME_MD5" == "$HEALED_MD5" ]
+
+############ Healing using favorite-child-policy = size #################
+TEST $CLI volume set $V0 cluster.favorite-child-policy none
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=10240
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+#file still in split-brain
+cat $M0/file > /dev/null
+EXPECT "1" echo $?
+
+#We know that the second brick has the bigger size file.
+BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1)
+TEST $CLI volume set $V0 cluster.favorite-child-policy size
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+cat $M0/file > /dev/null
+EXPECT "0" echo $?
+HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1)
+TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5" ]
+
+############ Healing using favorite-child-policy = majority on replica-3 #################
+
+#Convert volume to replica-3
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST $CLI volume set $V0 cluster.quorum-type none
+TEST $CLI volume set $V0 cluster.favorite-child-policy none
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=10240
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+
+#file still in split-brain
+cat $M0/file > /dev/null
+EXPECT "1" echo $?
+
+#We know that the second and third bricks agree with each other. Pick any one of them.
+MAJORITY_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1)
+TEST $CLI volume set $V0 cluster.favorite-child-policy majority
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+cat $M0/file > /dev/null
+EXPECT "0" echo $?
+HEALED_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1)
+TEST [ "$MAJORITY_MD5" == "$HEALED_MD5" ]
+
+TEST force_umount $M0
+cleanup
diff --git a/tests/000-flaky/basic_changelog_changelog-snapshot.t b/tests/000-flaky/basic_changelog_changelog-snapshot.t
new file mode 100644
index 00000000000..f6cd0b04d47
--- /dev/null
+++ b/tests/000-flaky/basic_changelog_changelog-snapshot.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../snapshot.rc
+
+cleanup;
+ROLLOVER_TIME=3
+
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+BRICK_LOG=$(echo "$L1" | tr / - | sed 's/^-//g')
+TEST $CLI volume start $V0
+
+#Enable changelog
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#Create snapshot
+S1="${V0}-snap1"
+
+mkdir $M0/RENAME
+mkdir $M0/LINK
+mkdir $M0/UNLINK
+mkdir $M0/RMDIR
+mkdir $M0/SYMLINK
+
+for i in {1..400} ; do touch $M0/RENAME/file$i; done
+for i in {1..400} ; do touch $M0/LINK/file$i; done
+for i in {1..400} ; do touch $M0/UNLINK/file$i; done
+for i in {1..400} ; do mkdir $M0/RMDIR/dir$i; done
+for i in {1..400} ; do touch $M0/SYMLINK/file$i; done
+
+#Write I/O in background
+for i in {1..400} ; do touch $M0/file$i 2>/dev/null; done &
+for i in {1..400} ; do mknod $M0/mknod-file$i p 2>/dev/null; done &
+for i in {1..400} ; do mkdir $M0/dir$i 2>/dev/null; done & 2>/dev/null
+for i in {1..400} ; do mv $M0/RENAME/file$i $M0/RENAME/rn-file$i 2>/dev/null; done &
+for i in {1..400} ; do ln $M0/LINK/file$i $M0/LINK/ln-file$i 2>/dev/null; done &
+for i in {1..400} ; do rm -f $M0/UNLINK/file$i 2>/dev/null; done &
+for i in {1..400} ; do rmdir $M0/RMDIR/dir$i 2>/dev/null; done &
+for i in {1..400} ; do ln -s $M0/SYMLINK/file$i $M0/SYMLINK/sym-file$i 2>/dev/null; done &
+
+sleep 1
+TEST $CLI snapshot create $S1 $V0 no-timestamp
+TEST snapshot_exists 0 $S1
+
+TEST grep '"Enabled changelog barrier"' /var/log/glusterfs/bricks/$BRICK_LOG.log
+TEST grep '"Disabled changelog barrier"' /var/log/glusterfs/bricks/$BRICK_LOG.log
+
+TEST glusterfs -s $H0 --volfile-id=/snaps/$S1/$V0 $M1
+
+#Clean up
+TEST $CLI volume stop $V0 force
+cleanup;
diff --git a/tests/000-flaky/basic_distribute_rebal-all-nodes-migrate.t b/tests/000-flaky/basic_distribute_rebal-all-nodes-migrate.t
new file mode 100644
index 00000000000..eb5d3305ac1
--- /dev/null
+++ b/tests/000-flaky/basic_distribute_rebal-all-nodes-migrate.t
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+. $(dirname $0)/../dht.rc
+
+
+# Check if every single rebalance process migrated some files
+
+function cluster_rebal_all_nodes_migrated_files {
+ val=0
+ a=$($CLI_1 volume rebalance $V0 status | grep "completed" | awk '{print $2}');
+ b=($a)
+ for i in "${b[@]}"
+ do
+ if [ "$i" -eq "0" ]; then
+ echo "false";
+ val=1;
+ fi
+ done
+ echo $val
+}
+
+cleanup
+
+TEST launch_cluster 3;
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+
+#Start with a pure distribute volume (multiple bricks on the same node)
+TEST $CLI_1 volume create $V0 $H1:$B1/dist1 $H1:$B1/dist2 $H2:$B2/dist3 $H2:$B2/dist4
+
+TEST $CLI_1 volume start $V0
+$CLI_1 volume info $V0
+
+#TEST $CLI_1 volume set $V0 client-log-level DEBUG
+
+## Mount FUSE
+TEST glusterfs -s $H1 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/dir1 2>/dev/null;
+TEST touch $M0/dir1/file-{1..500}
+
+## Add-brick and run rebalance to force file migration
+TEST $CLI_1 volume add-brick $V0 $H1:$B1/dist5 $H2:$B2/dist6
+
+#Start a rebalance
+TEST $CLI_1 volume rebalance $V0 start force
+
+#volume rebalance status should work
+#TEST $CLI_1 volume rebalance $V0 status
+#$CLI_1 volume rebalance $V0 status
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" cluster_rebalance_completed
+EXPECT "0" cluster_rebal_all_nodes_migrated_files
+$CLI_1 volume rebalance $V0 status
+
+
+TEST umount -f $M0
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume delete $V0
+
+
+##############################################################
+
+# Next, a dist-rep volume
+TEST $CLI_1 volume create $V0 replica 2 $H1:$B1/drep1 $H2:$B2/drep1 $H1:$B1/drep2 $H2:$B2/drep2
+
+TEST $CLI_1 volume start $V0
+$CLI_1 volume info $V0
+
+#TEST $CLI_1 volume set $V0 client-log-level DEBUG
+
+## Mount FUSE
+TEST glusterfs -s $H1 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/dir1 2>/dev/null;
+TEST touch $M0/dir1/file-{1..500}
+
+## Add-brick and run rebalance to force file migration
+TEST $CLI_1 volume add-brick $V0 replica 2 $H1:$B1/drep3 $H2:$B2/drep3
+
+#Start a rebalance
+TEST $CLI_1 volume rebalance $V0 start force
+
+#volume rebalance status should work
+#TEST $CLI_1 volume rebalance $V0 status
+#$CLI_1 volume rebalance $V0 status
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" cluster_rebalance_completed
+#EXPECT "0" cluster_rebal_all_nodes_migrated_files
+$CLI_1 volume rebalance $V0 status
+
+
+TEST umount -f $M0
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume delete $V0
+
+##############################################################
+
+# Next, a disperse volume
+TEST $CLI_1 volume create $V0 disperse 3 $H1:$B1/ec1 $H2:$B1/ec2 $H3:$B1/ec3 force
+
+TEST $CLI_1 volume start $V0
+$CLI_1 volume info $V0
+
+#TEST $CLI_1 volume set $V0 client-log-level DEBUG
+
+## Mount FUSE
+TEST glusterfs -s $H1 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/dir1 2>/dev/null;
+TEST touch $M0/dir1/file-{1..500}
+
+## Add-brick and run rebalance to force file migration
+TEST $CLI_1 volume add-brick $V0 $H1:$B2/ec4 $H2:$B2/ec5 $H3:$B2/ec6
+
+#Start a rebalance
+TEST $CLI_1 volume rebalance $V0 start force
+
+#volume rebalance status should work
+#TEST $CLI_1 volume rebalance $V0 status
+#$CLI_1 volume rebalance $V0 status
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" cluster_rebalance_completed
+
+# this will not work unless EC is changed to return all node-uuids
+# comment this out once that patch is ready
+#EXPECT "0" cluster_rebal_all_nodes_migrated_files
+$CLI_1 volume rebalance $V0 status
+
+
+TEST umount -f $M0
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume delete $V0
+
+##############################################################
+
+cleanup
+#G_TESTDEF_TEST_STATUS_NETBSD7=1501388
diff --git a/tests/000-flaky/basic_ec_ec-quorum-count-partial-failure.t b/tests/000-flaky/basic_ec_ec-quorum-count-partial-failure.t
new file mode 100644
index 00000000000..42808ce0c0e
--- /dev/null
+++ b/tests/000-flaky/basic_ec_ec-quorum-count-partial-failure.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#This test checks that partial failure of fop results in main fop failure only
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume create $V1 $H0:$B0/${V1}{0..5}
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=/$V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+TEST dd if=/dev/urandom of=$M0/a bs=12347 count=1
+TEST dd if=/dev/urandom of=$M0/b bs=12347 count=1
+TEST cp $M0/b $M0/c
+TEST fallocate -p -l 101 $M0/c
+TEST $CLI volume stop $V0
+TEST $CLI volume set $V0 debug.delay-gen posix;
+TEST $CLI volume set $V0 delay-gen.delay-duration 10000000;
+TEST $CLI volume set $V0 delay-gen.enable WRITE;
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 disperse.quorum-count 6
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+cksum=$(dd if=$M0/a bs=12345 count=1 | md5sum | awk '{print $1}')
+truncate -s 12345 $M0/a & #While write is waiting for 5 seconds, introduce failure
+fallocate -p -l 101 $M0/b &
+sleep 1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST wait
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+EXPECT "12345" stat --format=%s $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+cksum_after_heal=$(dd if=$M0/a | md5sum | awk '{print $1}')
+TEST [[ $cksum == $cksum_after_heal ]]
+cksum=$(dd if=$M0/c | md5sum | awk '{print $1}')
+cksum_after_heal=$(dd if=$M0/b | md5sum | awk '{print $1}')
+TEST [[ $cksum == $cksum_after_heal ]]
+
+cleanup;
diff --git a/tests/000-flaky/basic_mount-nfs-auth.t b/tests/000-flaky/basic_mount-nfs-auth.t
new file mode 100644
index 00000000000..3d4a9cff00b
--- /dev/null
+++ b/tests/000-flaky/basic_mount-nfs-auth.t
@@ -0,0 +1,342 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+# Our mount timeout must be as long as the time for a regular configuration
+# change to be acted upon *plus* AUTH_REFRESH_TIMEOUT, not one replacing the
+# other. Otherwise this process races vs. the one making the change we're
+# trying to test, which leads to spurious failures.
+MY_MOUNT_TIMEOUT=$((CONFIG_UPDATE_TIMEOUT+AUTH_REFRESH_INTERVAL))
+
+cleanup;
+## Check whether glusterd is running
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+H0IP=$(ip addr show |grep -w inet |grep -v 127.0.0.1|awk '{ print $2 }'| cut -d "/" -f 1)
+H0IP6=$(host $HOSTNAME | grep IPv6 | awk '{print $NF}')
+
+# Export variables for allow & deny
+EXPORT_ALLOW="/$V0 $H0(sec=sys,rw,anonuid=0) @ngtop(sec=sys,rw,anonuid=0)"
+EXPORT_ALLOW_SLASH="/$V0/ $H0(sec=sys,rw,anonuid=0) @ngtop(sec=sys,rw,anonuid=0)"
+EXPORT_DENY="/$V0 1.2.3.4(sec=sys,rw,anonuid=0) @ngtop(sec=sys,rw,anonuid=0)"
+
+# Netgroup variables for allow & deny
+NETGROUP_ALLOW="ngtop ng1000\nng1000 ng999\nng999 ng1\nng1 ng2\nng2 ($H0,,)"
+NETGROUP_DENY="ngtop ng1000\nng1000 ng999\nng999 ng1\nng1 ng2\nng2 (1.2.3.4,,)"
+
+V0L1="$V0/L1"
+V0L2="$V0L1/L2"
+V0L3="$V0L2/L3"
+
+# Other variations for allow & deny
+EXPORT_ALLOW_RO="/$V0 $H0(sec=sys,ro,anonuid=0) @ngtop(sec=sys,ro,anonuid=0)"
+EXPORT_ALLOW_L1="/$V0L1 $H0(sec=sys,rw,anonuid=0) @ngtop(sec=sys,rw,anonuid=0)"
+EXPORT_WILDCARD="/$V0 *(sec=sys,rw,anonuid=0) @ngtop(sec=sys,rw,anonuid=0)"
+
+function build_dirs () {
+ mkdir -p $B0/b{0,1,2}/L1/L2/L3
+}
+
+function export_allow_this_host_ipv6 () {
+ printf "$EXPORT_ALLOW6\n" > "$GLUSTERD_WORKDIR"/nfs/exports
+}
+
+function export_allow_this_host () {
+ printf "$EXPORT_ALLOW\n" > ${NFSDIR}/exports
+}
+
+function export_allow_this_host_with_slash () {
+ printf "$EXPORT_ALLOW_SLASH\n" > ${NFSDIR}/exports
+}
+
+function export_deny_this_host () {
+ printf "$EXPORT_DENY\n" > ${NFSDIR}/exports
+}
+
+function export_allow_this_host_l1 () {
+ printf "$EXPORT_ALLOW_L1\n" >> ${NFSDIR}/exports
+}
+
+function export_allow_wildcard () {
+ printf "$EXPORT_WILDCARD\n" > ${NFSDIR}/exports
+}
+
+function export_allow_this_host_ro () {
+ printf "$EXPORT_ALLOW_RO\n" > ${NFSDIR}/exports
+}
+
+function netgroup_allow_this_host () {
+ printf "$NETGROUP_ALLOW\n" > ${NFSDIR}/netgroups
+}
+
+function netgroup_deny_this_host () {
+ printf "$NETGROUP_DENY\n" > ${NFSDIR}/netgroups
+}
+
+function create_vol () {
+ $CLI vol create $V0 $H0:$B0/b0
+}
+
+function setup_cluster() {
+ build_dirs # Build directories
+ export_allow_this_host # Allow this host in the exports file
+ netgroup_allow_this_host # Allow this host in the netgroups file
+
+ glusterd
+ create_vol # Create the volume
+}
+
+function check_mount_success {
+ mount_nfs $H0:/$1 $N0 nolock
+ if [ $? -eq 0 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function check_mount_failure {
+ mount_nfs $H0:/$1 $N0 nolock
+ if [ $? -ne 0 ]; then
+ echo "Y"
+ else
+ local timeout=$UMOUNT_TIMEOUT
+ while ! umount_nfs $N0 && [$timeout -ne 0] ; do
+ timeout=$(( $timeout - 1 ))
+ sleep 1
+ done
+ fi
+}
+
+function small_write () {
+ dd if=/dev/zero of=$N0/test-small-write count=1 bs=1k 2>&1
+ if [ $? -ne 0 ]; then
+ echo "N"
+ else
+ echo "Y"
+ fi
+}
+
+function bg_write () {
+ dd if=/dev/zero of=$N0/test-bg-write count=1 bs=1k &
+ BG_WRITE_PID=$!
+}
+
+function big_write() {
+ dd if=/dev/zero of=$N0/test-big-write count=500 bs=1024k
+}
+
+function create () {
+ touch $N0/create-test
+}
+
+function stat_nfs () {
+ ls $N0/
+}
+
+# Restarts the NFS server
+function restart_nfs () {
+ local NFS_PID=$(cat $GLUSTERD_PIDFILEDIR/nfs/nfs.pid)
+
+ # kill the NFS-server if it is running
+ while ps -q ${NFS_PID} 2>&1 > /dev/null; do
+ kill ${NFS_PID}
+ sleep 0.5
+ done
+
+ # start-force starts the NFS-server again
+ $CLI vol start patchy force
+}
+
+setup_cluster
+
+# run preliminary tests
+TEST $CLI vol set $V0 nfs.disable off
+TEST $CLI vol start $V0
+
+# Get NFS state directory
+NFSDIR=$( $CLI volume get patchy nfs.mount-rmtab | \
+ awk '/^nfs.mount-rmtab/{print $2}' | \
+ xargs dirname )
+
+## Wait for volume to register with rpc.mountd
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+## NFS server starts with auth disabled
+## Do some tests to verify that.
+
+EXPECT "Y" check_mount_success $V0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Disallow host
+TEST export_deny_this_host
+TEST netgroup_deny_this_host
+
+## Technically deauthorized this host, but since auth is disabled we should be
+## able to do mounts, writes, etc.
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0
+EXPECT "Y" small_write
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Reauthorize this host
+export_allow_this_host
+netgroup_allow_this_host
+
+## Restart NFS with auth enabled
+$CLI vol stop $V0
+TEST $CLI vol set $V0 nfs.exports-auth-enable on
+$CLI vol start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+## Mount NFS
+EXPECT "Y" check_mount_success $V0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount NFS using the IPv6 export
+export_allow_this_host_ipv6
+EXPECT "Y" check_mount_success $V0
+
+## Disallow host
+TEST export_deny_this_host
+TEST netgroup_deny_this_host
+
+## Writes should not be allowed, host is not authorized
+EXPECT_WITHIN $AUTH_REFRESH_INTERVAL "N" small_write
+
+## Unmount so we can test mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Subsequent ounts should not be allowed, host is not authorized
+EXPECT "Y" check_mount_failure $V0
+
+## Reauthorize host
+TEST export_allow_this_host
+TEST netgroup_allow_this_host
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Allow host in netgroups but not in exports, host should be allowed
+TEST export_deny_this_host
+TEST netgroup_allow_this_host
+
+# wait for the mount authentication to rebuild
+sleep $[$AUTH_REFRESH_INTERVAL + 1]
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0
+EXPECT "Y" small_write
+TEST big_write
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Allow host in exports but not in netgroups, host should be allowed
+TEST export_allow_this_host
+TEST netgroup_deny_this_host
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Finally, reauth the host in export and netgroup, test mount & write
+TEST export_allow_this_host_l1
+TEST netgroup_allow_this_host
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0L1
+EXPECT "Y" small_write
+
+## Failover test: Restarting NFS and then doing a write should pass
+bg_write
+TEST restart_nfs
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+TEST wait $BG_WRITE_PID
+EXPECT "Y" small_write
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Test deep mounts
+EXPECT "Y" check_mount_success $V0L1
+EXPECT "Y" small_write
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+TEST export_allow_this_host_ro
+TEST netgroup_deny_this_host
+
+## Restart the nfs server to avoid spurious failure(BZ1256352)
+restart_nfs
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0
+EXPECT "N" small_write # Writes should not be allowed
+TEST ! create # Create should not be allowed
+TEST stat_nfs # Stat should be allowed
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+TEST export_deny_this_host
+TEST netgroup_deny_this_host
+TEST export_allow_this_host_l1 # Allow this host at L1
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_failure $V0 #V0 shouldnt be allowed
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0L1 #V0L1 should be
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Test wildcard hosts
+TEST export_allow_wildcard
+
+# the $MY_MOUNT_TIMEOUT might not be long enough? restart should do
+restart_nfs
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0
+EXPECT_WITHIN $AUTH_REFRESH_INTERVAL "Y" small_write
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Test if path is parsed correctly
+## by mounting host:vol/ instead of host:vol
+EXPECT "Y" check_mount_success $V0/
+EXPECT "Y" small_write
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+TEST export_allow_this_host_with_slash
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0
+EXPECT "Y" small_write
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+EXPECT "Y" check_mount_success $V0/
+EXPECT "Y" small_write
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+
+## Turn off exports authentication
+$CLI vol stop $V0
+TEST $CLI vol set $V0 nfs.exports-auth-enable off
+$CLI vol start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+TEST export_deny_this_host # Deny the host
+TEST netgroup_deny_this_host
+
+EXPECT_WITHIN $MY_MOUNT_TIMEOUT "Y" check_mount_success $V0 # Do a mount & test
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Turn back on the exports authentication
+$CLI vol stop $V0
+TEST $CLI vol set $V0 nfs.exports-auth-enable on
+$CLI vol start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+## Do a simple test to set the refresh time to 20 seconds
+TEST $CLI vol set $V0 nfs.auth-refresh-interval-sec 20
+
+## Do a simple test to see if the volume option exists
+TEST $CLI vol set $V0 nfs.auth-cache-ttl-sec 400
+
+## Finish up
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup
diff --git a/tests/000-flaky/bugs_core_multiplex-limit-issue-151.t b/tests/000-flaky/bugs_core_multiplex-limit-issue-151.t
new file mode 100644
index 00000000000..5a88f97d726
--- /dev/null
+++ b/tests/000-flaky/bugs_core_multiplex-limit-issue-151.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../traps.rc
+. $(dirname $0)/../volume.rc
+
+function count_up_bricks {
+ $CLI --xml volume status all | grep '<status>1' | wc -l
+}
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+function count_brick_pids {
+ $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+ | grep -v "N/A" | sort | uniq | wc -l
+}
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume set all cluster.brick-multiplex on
+TEST ! $CLI volume set all cluster.max-bricks-per-process -1
+TEST ! $CLI volume set all cluster.max-bricks-per-process foobar
+TEST $CLI volume set all cluster.max-bricks-per-process 3
+
+TEST $CLI volume create $V0 $H0:$B0/brick{0..5}
+TEST $CLI volume start $V0
+
+EXPECT 2 count_brick_processes
+EXPECT 2 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
+
+pkill gluster
+TEST glusterd
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
+
+TEST $CLI volume add-brick $V0 $H0:$B0/brick6
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 7 count_up_bricks
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 start
+TEST $CLI volume remove-brick $V0 $H0:$B0/brick3 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 count_up_bricks
+
+cleanup;
diff --git a/tests/000-flaky/bugs_distribute_bug-1117851.t b/tests/000-flaky/bugs_distribute_bug-1117851.t
new file mode 100644
index 00000000000..5980bf2fd4b
--- /dev/null
+++ b/tests/000-flaky/bugs_distribute_bug-1117851.t
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=250
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+create_files () {
+ for i in {1..1000}; do
+ orig=$(printf %s/abc%04d $1 $i)
+ real=$(printf %s/src%04d $1 $i)
+ # Make sure lots of these have linkfiles.
+ echo "This is file $i" > $orig
+ mv $orig $real
+ done
+ sync
+}
+
+move_files_inner () {
+ sfile=$M0/status_$(basename $1)
+ for i in {1..1000}; do
+ src=$(printf %s/src%04d $1 $i)
+ dst=$(printf %s/dst%04d $1 $i)
+ mv $src $dst 2> /dev/null
+ done
+ echo "done" > $sfile
+}
+
+move_files () {
+ #Create the status file here to prevent spurious failures
+ #caused by the file not being created in time by the
+ #background process
+ sfile=$M0/status_$(basename $1)
+ echo "running" > $sfile
+ move_files_inner $* &
+}
+
+check_files () {
+ errors=0
+ for i in {1..1000}; do
+ if [ ! -f $(printf %s/dst%04d $1 $i) ]; then
+ if [ -f $(printf %s/src%04d $1 $i) ]; then
+ echo "file $i didnt get moved" > /dev/stderr
+ else
+ echo "file $i is MISSING" > /dev/stderr
+ errors=$((errors+1))
+ fi
+ fi
+ done
+ if [ $((errors)) != 0 ]; then
+ : ls -l $1 > /dev/stderr
+ fi
+ return $errors
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE with caching disabled (read-write)
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+TEST create_files $M0
+
+## Mount FUSE with caching disabled (read-write) again
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M1;
+
+TEST move_files $M0
+TEST move_files $M1
+
+# It's regrettable that renaming 1000 files might take more than 30 seconds,
+# but on our test systems sometimes it does, so double the time from what we'd
+# use otherwise. There still seem to be some spurious failures, 1 in 20 when
+# this does not complete, added an additional 60 seconds to take false reports
+# out of the system, during test runs, especially on slower test systems.
+EXPECT_WITHIN 120 "done" cat $M0/status_0
+EXPECT_WITHIN 120 "done" cat $M1/status_1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+TEST check_files $M0
+
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/000-flaky/bugs_distribute_bug-1122443.t b/tests/000-flaky/bugs_distribute_bug-1122443.t
new file mode 100644
index 00000000000..abd37082b33
--- /dev/null
+++ b/tests/000-flaky/bugs_distribute_bug-1122443.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+
+make_files() {
+ mkdir $1 && \
+ ln -s ../ $1/symlink && \
+ mknod $1/special_b b 1 2 && \
+ mknod $1/special_c c 3 4 && \
+ mknod $1/special_u u 5 6 && \
+ mknod $1/special_p p && \
+ touch -h --date=@1 $1/symlink && \
+ touch -h --date=@2 $1/special_b &&
+ touch -h --date=@3 $1/special_c &&
+ touch -h --date=@4 $1/special_u &&
+ touch -h --date=@5 $1/special_p
+}
+
+bug_1113050_workaround() {
+ # Test if graph change has settled (bug-1113050?)
+ test=$(stat -c "%n:%Y" $1 2>&1 | tr '\n' ',')
+ if [ $? -eq 0 ] ; then
+ echo RECONNECTED
+ else
+ echo WAITING
+ fi
+ return 0
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume start $V0
+
+# Mount FUSE and create symlink
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST make_files $M0/subdir
+
+# Get mtime before migration
+BEFORE="$(stat -c %n:%Y $M0/subdir/* | sort | tr '\n' ',')"
+echo $BEFORE
+# Migrate brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}0 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}0"
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}0 commit
+
+# Get mtime after migration
+EXPECT_WITHIN 30 RECONNECTED bug_1113050_workaround $M0/subdir/symlink
+sleep 3
+AFTER="$(stat -c %n:%Y $M0/subdir/* | sort | tr '\n' ',')"
+echo $AFTER
+# Check if mtime is unchanged
+TEST [ "$AFTER" == "$BEFORE" ]
+
+cleanup
diff --git a/tests/000-flaky/bugs_glusterd_bug-857330/common.rc b/tests/000-flaky/bugs_glusterd_bug-857330/common.rc
new file mode 100644
index 00000000000..bd122eff18c
--- /dev/null
+++ b/tests/000-flaky/bugs_glusterd_bug-857330/common.rc
@@ -0,0 +1,57 @@
+. $(dirname $0)/../../include.rc
+
+UUID_REGEX='[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}'
+
+TASK_ID=""
+COMMAND=""
+PATTERN=""
+
+function check-and-store-task-id()
+{
+ TASK_ID=""
+
+ local task_id=$($CLI $COMMAND | grep $PATTERN | grep -o -E "$UUID_REGEX")
+
+ if [ -z "$task_id" ] && [ "${task_id+asdf}" = "asdf" ]; then
+ return 1
+ fi
+
+ TASK_ID=$task_id
+ return 0;
+}
+
+function get-task-id()
+{
+ $CLI $COMMAND | grep $PATTERN | grep -o -E "$UUID_REGEX" | tail -n1
+
+}
+
+function check-and-store-task-id-xml()
+{
+ TASK_ID=""
+
+ local task_id=$($CLI $COMMAND --xml | xmllint --format - | grep $PATTERN | grep -o -E "$UUID_REGEX")
+
+ if [ -z "$task_id" ] && [ "${task_id+asdf}" = "asdf" ]; then
+ return 1
+ fi
+
+ TASK_ID=$task_id
+ return 0;
+}
+
+function get-task-id-xml()
+{
+ $CLI $COMMAND --xml | xmllint --format - | grep $PATTERN | grep -o -E "$UUID_REGEX"
+}
+
+function get-task-status()
+{
+ pattern=$1
+ val=1
+ test=$(gluster $COMMAND | grep -o $pattern 2>&1)
+ if [ $? -eq 0 ]; then
+ val=0
+ fi
+ echo $val
+}
diff --git a/tests/000-flaky/bugs_glusterd_bug-857330/normal.t b/tests/000-flaky/bugs_glusterd_bug-857330/normal.t
new file mode 100755
index 00000000000..6c1cf54ec3c
--- /dev/null
+++ b/tests/000-flaky/bugs_glusterd_bug-857330/normal.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+. $(dirname $0)/common.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}1 $H0:$B0/${V0}2;
+TEST $CLI volume info $V0;
+TEST $CLI volume start $V0;
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0;
+
+TEST $PYTHON $(dirname $0)/../../utils/create-files.py \
+ --multi -b 10 -d 10 -n 10 $M0;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+###############
+## Rebalance ##
+###############
+TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}3 $H0:$B0/${V0}4;
+
+COMMAND="volume rebalance $V0 start"
+PATTERN="ID:"
+TEST check-and-store-task-id
+
+COMMAND="volume status $V0"
+PATTERN="ID"
+EXPECT $TASK_ID get-task-id
+
+COMMAND="volume rebalance $V0 status"
+PATTERN="completed"
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" get-task-status $PATTERN
+
+###################
+## Replace-brick ##
+###################
+REP_BRICK_PAIR="$H0:$B0/${V0}2 $H0:$B0/${V0}5"
+
+TEST $CLI volume replace-brick $V0 $REP_BRICK_PAIR commit force;
+
+##################
+## Remove-brick ##
+##################
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5
+
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}5 start"
+PATTERN="ID:"
+TEST check-and-store-task-id
+
+COMMAND="volume status $V0"
+PATTERN="ID"
+EXPECT $TASK_ID get-task-id
+
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}5 status"
+PATTERN="completed"
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" get-task-status $PATTERN
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}5 commit
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/000-flaky/bugs_glusterd_bug-857330/xml.t b/tests/000-flaky/bugs_glusterd_bug-857330/xml.t
new file mode 100755
index 00000000000..11785adacdb
--- /dev/null
+++ b/tests/000-flaky/bugs_glusterd_bug-857330/xml.t
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+. $(dirname $0)/common.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}1 $H0:$B0/${V0}2;
+TEST $CLI volume info $V0;
+TEST $CLI volume start $V0;
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0;
+
+TEST $PYTHON $(dirname $0)/../../utils/create-files.py \
+ --multi -b 10 -d 10 -n 10 $M0;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+
+###############
+## Rebalance ##
+###############
+TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}3 $H0:$B0/${V0}4;
+
+COMMAND="volume rebalance $V0 start"
+PATTERN="task-id"
+TEST check-and-store-task-id-xml
+
+COMMAND="volume status $V0"
+PATTERN="id"
+EXPECT $TASK_ID get-task-id-xml
+
+COMMAND="volume rebalance $V0 status"
+PATTERN="task-id"
+EXPECT $TASK_ID get-task-id-xml
+
+## TODO: Add tests for rebalance stop
+
+COMMAND="volume rebalance $V0 status"
+PATTERN="completed"
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" get-task-status $PATTERN
+
+###################
+## Replace-brick ##
+###################
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}4 $H0:$B0/${V0}5 commit force
+
+##################
+## Remove-brick ##
+##################
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5
+
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 $H0:$B0/${V0}5 start"
+PATTERN="task-id"
+TEST check-and-store-task-id-xml
+
+COMMAND="volume status $V0"
+PATTERN="id"
+EXPECT $TASK_ID get-task-id-xml
+
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 $H0:$B0/${V0}5 status"
+PATTERN="task-id"
+EXPECT $TASK_ID get-task-id-xml
+
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 $H0:$B0/${V0}5 status"
+PATTERN="completed"
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" get-task-status $PATTERN
+
+## TODO: Add tests for remove-brick stop
+
+COMMAND="volume remove-brick $V0 $H0:$B0/${V0}3 $H0:$B0/${V0}5 commit"
+PATTERN="task-id"
+EXPECT $TASK_ID get-task-id-xml
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/000-flaky/bugs_glusterd_quorum-value-check.t b/tests/000-flaky/bugs_glusterd_quorum-value-check.t
new file mode 100644
index 00000000000..a431b8c4fd4
--- /dev/null
+++ b/tests/000-flaky/bugs_glusterd_quorum-value-check.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+function check_quorum_nfs() {
+ local qnfs="$(less /var/lib/glusterd/nfs/nfs-server.vol | grep "quorum-count"| awk '{print $3}')"
+ local qinfo="$($CLI volume info $V0| grep "cluster.quorum-count"| awk '{print $2}')"
+
+ if [ $qnfs = $qinfo ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.quorum-type fixed
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 cluster.quorum-count 1
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs
+TEST $CLI volume set $V0 cluster.quorum-count 2
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs
+TEST $CLI volume set $V0 cluster.quorum-count 3
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "Y" check_quorum_nfs
+
+cleanup;
diff --git a/tests/000-flaky/bugs_nfs_bug-1116503.t b/tests/000-flaky/bugs_nfs_bug-1116503.t
new file mode 100644
index 00000000000..fc50021acc7
--- /dev/null
+++ b/tests/000-flaky/bugs_nfs_bug-1116503.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+#
+# Verify that mounting NFS over UDP (MOUNT service only) works.
+#
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume set $V0 nfs.mount-udp on
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+TEST mount_nfs $H0:/$V0 $N0 nolock,mountproto=udp,proto=tcp;
+TEST mkdir -p $N0/foo/bar
+TEST ls $N0/foo
+TEST ls $N0/foo/bar
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0/foo $N0 nolock,mountproto=udp,proto=tcp;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST $CLI volume set $V0 nfs.addr-namelookup on
+TEST $CLI volume set $V0 nfs.rpc-auth-allow $H0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST $CLI volume set $V0 nfs.rpc-auth-reject $H0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST ! mount_nfs $H0:/$V0/foo/bar $N0 nolock,mountproto=udp,proto=tcp;
+
+cleanup;
diff --git a/tests/000-flaky/features_lock-migration_lkmigration-set-option.t b/tests/000-flaky/features_lock-migration_lkmigration-set-option.t
new file mode 100644
index 00000000000..1327ef3579f
--- /dev/null
+++ b/tests/000-flaky/features_lock-migration_lkmigration-set-option.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+# Test to check
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+#Check lock-migration set option sanity
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 lock-migration on
+EXPECT "on" echo `$CLI volume info | grep lock-migration | awk '{print $2}'`
+TEST $CLI volume set $V0 lock-migration off
+EXPECT "off" echo `$CLI volume info | grep lock-migration | awk '{print $2}'`
+TEST ! $CLI volume set $V0 lock-migration garbage
+#make sure it is still off
+EXPECT "off" echo `$CLI volume info | grep lock-migration | awk '{print $2}'`
+
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+
+#create a afr volume and make sure option setting fails
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2
+TEST $CLI volume start $V0
+
+TEST ! $CLI volume set $V0 lock-migration on
+
+cleanup;
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 00000000000..09c98576987
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,43 @@
+Regression tests framework for GlusterFS
+========================================
+
+## Prereq
+- Build and install the version of glusterfs with your changes. Make
+ sure the installed version is accessible from $PATH.
+
+## Prereq for geo-rep regression tests.
+- Passwordless ssh on the test system to itself
+- arequal-checksum installed on the test-system.
+ You can find the repo here - https://github.com/raghavendrabhat/arequal
+
+## How-To
+- To mount glusterfs, NEVER use 'mount -t glusterfs', instead use
+ 'glusterfs -s ' method. This is because with the patch build setup
+ doesnot install the /sbin/mount.glusterfs necessary, where as the
+ glusterfs binary will be accessible with $PATH, and will pick the
+ right version.
+- (optional) Set environment variables to specify location of
+ export directories and mount points. Unless you have special
+ requirements, the defaults should just work. The variables
+ themselves can be found at the top of tests/include.rc. All
+ of them can be overriden with environment variables.
+
+## Usage
+- Execute `/usr/share/glusterfs/run-tests.sh` as root.
+
+- If you want to run individual tests located in `/usr/share/glusterfs/tests`
+ as opposed to the full test-suite, invoke it as
+ `/usr/share/glusterfs/run-tests.sh [pattern]*`, where pattern can be:
+ - the trailing parts of the full path of a test,
+ e.g. `tests/basic/mount.t`
+ - the name of a file or directory, e.g `self-heal.t` or `basic/`
+ - bug number, which will match against numbered bugs in the
+ `tests/bugs/` directory.
+ - a glob pattern (see `man 7 glob` for mor info on globs)
+
+- To execute single ".t" file, use "prove -vf /path/to/.t"
+- If some test cases fail, report to GlusterFS community at
+ `gluster-devel@gluster.org`.
+
+## Reminder
+- BE WARNED THAT THE TEST CASES DELETE ``GLUSTERD_WORKDIR`` * !!!
diff --git a/tests/afr.rc b/tests/afr.rc
new file mode 100644
index 00000000000..241789903ba
--- /dev/null
+++ b/tests/afr.rc
@@ -0,0 +1,123 @@
+#!/bin/bash
+
+function create_brick_xattrop_entry {
+ local xattrop_dir=$(afr_get_index_path $1)
+ local base_entry=`ls $xattrop_dir|grep xattrop`
+ local gfid_str
+ local params=`echo "$@" | cut -d' ' -f2-`
+ echo $params
+
+ for file in $params
+ do
+ gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $1/$file))
+ ln $xattrop_dir/$base_entry $xattrop_dir/$gfid_str
+ done
+}
+
+function diff_dirs {
+ diff <(ls $1 | sort) <(ls $2 | sort)
+}
+
+function heal_status {
+ local f1_path="${1}/${3}"
+ local f2_path="${2}/${3}"
+ local zero_xattr="000000000000000000000000"
+ local insync=""
+ diff_dirs $f1_path $f2_path
+ if [ $? -eq 0 ];
+ then
+ insync="Y"
+ else
+ insync="N"
+ fi
+ local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
+ local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
+ local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
+ local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
+ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
+ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
+ if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi
+ if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi
+ if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi
+ if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi
+ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
+ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
+ echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2}
+}
+# Check if given dir's self-heal is done
+function is_dir_heal_done {
+ local zero_xattr="000000000000000000000000"
+ if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+# Check if the given file's self-heal is done
+function is_file_heal_done {
+ local f1_path="${1}/${3}"
+ local f2_path="${2}/${3}"
+ local zxattr="000000000000000000000000"
+ local size1=$(stat -c "%s" $f1_path)
+ local size2=$(stat -c "%s" $f2_path)
+ local diff=$((size1-size2))
+ local x11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
+ local x12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
+ local x21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
+ local x22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
+ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
+ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
+ if [ -z $x11 ]; then x11="000000000000000000000000"; fi
+ if [ -z $x12 ]; then x12="000000000000000000000000"; fi
+ if [ -z $x21 ]; then x21="000000000000000000000000"; fi
+ if [ -z $x22 ]; then x22="000000000000000000000000"; fi
+ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
+ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
+ if [ "${diff}${x11}${x12}${x21}${x22}${dirty1}${dirty2}" == "0${zxattr}${zxattr}${zxattr}${zxattr}${zxattr}${zxattr}" ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+#count the number of entries marked for self-heal
+#in brick $1's index
+
+function count_index_entries()
+{
+ ls $1/.glusterfs/indices/xattrop | wc -l
+}
+
+function afr_up_status()
+{
+ local v=$1
+ local m=$2
+ local replica_id=$3
+ grep -E "^up = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'='
+}
+
+function get_quorum_type()
+{
+ local m="$1"
+ local v="$2"
+ local repl_id="$3"
+ cat $m/.meta/graphs/active/$v-replicate-$repl_id/private|grep quorum-type|awk '{print $3}'
+}
+
+function afr_private_key_value()
+{
+ local v=$1
+ local m=$2
+ local replica_id=$3
+ local key=$4
+#xargs at the end will strip leading spaces
+ grep -E "^${key} = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'=' | xargs
+}
+
+function afr_anon_entry_count()
+{
+ local b=$1
+ ls $b/.glusterfs-anonymous-inode* | wc -l
+}
diff --git a/tests/basic/0symbol-check.t b/tests/basic/0symbol-check.t
new file mode 100755
index 00000000000..69485a516af
--- /dev/null
+++ b/tests/basic/0symbol-check.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+#
+
+. $(dirname $0)/../include.rc
+
+buildscratch=""
+
+case $OSTYPE in
+Linux)
+ ;;
+*)
+ echo "Skip Linux specific test" >&2
+ SKIP_TESTS
+ exit 0
+ ;;
+esac
+
+# look in the usual places for the build tree
+if [ -d /build/scratch ]; then
+ buildscratch="/build/scratch"
+else
+ # might be in developer's tree
+ if [ -d ./libglusterfs/src/.libs ]; then
+ buildscratch="."
+ elif [ -d ../libglusterfs/src/.libs ]; then
+ buildscratch=".."
+ fi
+fi
+
+if [ -z ${buildscratch} ]; then
+ echo "could find build tree in /build/scratch, . or .." >&2
+ SKIP_TESTS
+ exit 0
+fi
+
+# check symbols
+
+rm -f ./.symbol-check-errors
+
+TEST find ${buildscratch} -name \*.o -exec ./tests/basic/symbol-check.sh {} \\\;
+
+TEST [ ! -e ./.symbol-check-errors ]
+
+rm -f ./.symbol-check-errors
+
+cleanup
diff --git a/tests/basic/afr/add-brick-self-heal.t b/tests/basic/afr/add-brick-self-heal.t
new file mode 100644
index 00000000000..c847e22977f
--- /dev/null
+++ b/tests/basic/afr/add-brick-self-heal.t
@@ -0,0 +1,74 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+# Create files
+for i in {1..5}
+do
+ echo $i > $M0/file$i.txt
+done
+
+# Metadata changes
+TEST setfattr -n user.test -v qwerty $M0/file5.txt
+
+# Add brick1
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+
+# New-brick should accuse the old-bricks (Simulating case for data-loss)
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}2/
+
+# Check if pending xattr and dirty-xattr are set for newly-added-brick
+EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
+EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Check if entry-heal has happened
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}2 | sort)
+TEST diff <(ls $B0/${V0}1 | sort) <(ls $B0/${V0}2 | sort)
+
+# Test if data was healed
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}2/file1.txt
+
+# Test if metadata was healed and exists on both the bricks
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}2/file5.txt
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt
+
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}2
+
+cleanup;
diff --git a/tests/basic/afr/afr-anon-inode-no-quorum.t b/tests/basic/afr/afr-anon-inode-no-quorum.t
new file mode 100644
index 00000000000..896ba0c9b2c
--- /dev/null
+++ b/tests/basic/afr/afr-anon-inode-no-quorum.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+#Test that anon-inode entry is not cleaned up as long as there exists at least
+#one valid entry
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.readdir-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST touch $M0/a $M0/b
+
+gfid_a=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a))
+gfid_b=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b))
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/a $M0/a-new
+TEST mv $M0/b $M0/b-new
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST ! ls $M0/a
+TEST ! ls $M0/b
+anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
+#Make sure index heal doesn't happen after enabling heal
+TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
+TEST rm -f $B0/${V0}1/.glusterfs/indices/xattrop/*
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+#Allow time for a scan
+sleep 5
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_a
+TEST stat $B0/${V0}0/$anon_inode_name/$gfid_b
+inum_b=$(STAT_INO $B0/${V0}0/$anon_inode_name/$gfid_b)
+TEST rm -f $M0/a-new
+TEST stat $M0/b-new
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
+EXPECT "$inum_b" STAT_INO $B0/${V0}0/b-new
+
+cleanup
diff --git a/tests/basic/afr/afr-anon-inode.t b/tests/basic/afr/afr-anon-inode.t
new file mode 100644
index 00000000000..f4cf37a2fa0
--- /dev/null
+++ b/tests/basic/afr/afr-anon-inode.t
@@ -0,0 +1,114 @@
+#!/bin/bash
+#Tests that afr-anon-inode test cases work fine as expected
+#These are cases where in entry-heal/name-heal we dont know entry for an inode
+#so these inodes are kept in a special directory
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
+TEST $CLI volume set $V0 cluster.use-anonymous-inode no
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
+TEST $CLI volume set $V0 cluster.use-anonymous-inode yes
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "use-anonymous-inode"
+TEST mkdir -p $M0/d1/b $M0/d2/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/d2/a $M0/d1
+TEST mv $M0/d1/b $M0/d2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+anon_inode_name=$(ls -a $B0/${V0}0 | grep glusterfs-anonymous-inode)
+TEST [[ -d $B0/${V0}1/$anon_inode_name ]]
+TEST [[ -d $B0/${V0}2/$anon_inode_name ]]
+anon_gfid=$(gf_get_gfid_xattr $B0/${V0}0/$anon_inode_name)
+EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}1/$anon_inode_name
+EXPECT "$anon_gfid" gf_get_gfid_xattr $B0/${V0}2/$anon_inode_name
+
+TEST ! ls $M0/$anon_inode_name
+EXPECT "^4$" echo $(ls -a $M0 | wc -l)
+
+#Test purging code path by shd
+TEST $CLI volume heal $V0 disable
+TEST mkdir $M0/l0 $M0/l1 $M0/l2
+TEST touch $M0/del-file $M0/del-file-nolink $M0/l0/file
+TEST ln $M0/del-file $M0/del-file-link
+TEST ln $M0/l0/file $M0/l1/file-link1
+TEST ln $M0/l0/file $M0/l2/file-link2
+TEST mkdir -p $M0/del-recursive-dir/d1
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST rm -f $M0/del-file $M0/del-file-nolink
+TEST rm -rf $M0/del-recursive-dir
+TEST mv $M0/d1/a $M0/d2
+TEST mv $M0/l0/file $M0/l0/renamed-file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 0
+
+nolink_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file-nolink))
+link_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-file))
+dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/del-recursive-dir))
+rename_dir_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/d1/a))
+rename_file_gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/l0/file))
+TEST ! stat $M0/del-file
+TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
+TEST ! stat $M0/del-file-nolink
+TEST ! stat $B0/${V0}0/$anon_inode_name/$nolink_gfid
+TEST ! stat $M0/del-recursive-dir
+TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
+TEST ! stat $M0/d1/a
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
+TEST ! stat $M0/l0/file
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_file_gfid
+
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST mv $M0/l1/file-link1 $M0/l1/renamed-file-link1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
+TEST ! stat $M0/l1/file-link1
+TEST stat $B0/${V0}1/$anon_inode_name/$rename_file_gfid
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST mv $M0/l2/file-link2 $M0/l2/renamed-file-link2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 2
+TEST ! stat $M0/l2/file-link2
+TEST stat $B0/${V0}2/$anon_inode_name/$rename_file_gfid
+
+#Simulate only anon-inodes present in all bricks
+TEST rm -f $M0/l0/renamed-file $M0/l1/renamed-file-link1 $M0/l2/renamed-file-link2
+
+#Test that shd doesn't cleanup anon-inodes when some bricks are down
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST $CLI volume heal $V0 enable
+$CLI volume heal $V0
+sleep 5 #Allow time for completion of one scan
+TEST stat $B0/${V0}0/$anon_inode_name/$link_gfid
+TEST stat $B0/${V0}0/$anon_inode_name/$rename_dir_gfid
+TEST stat $B0/${V0}0/$anon_inode_name/$dir_gfid
+rename_dir_inum=$(STAT_INO $B0/${V0}0/$anon_inode_name/$rename_dir_gfid)
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status $V0 1
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}1
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/${V0}2
+
+#Test that rename indeed happened instead of rmdir/mkdir
+renamed_dir_inum=$(STAT_INO $B0/${V0}0/d2/a)
+EXPECT "$rename_dir_inum" echo $renamed_dir_inum
+cleanup;
diff --git a/tests/basic/afr/afr-no-fsync.t b/tests/basic/afr/afr-no-fsync.t
new file mode 100644
index 00000000000..0966d9b0a11
--- /dev/null
+++ b/tests/basic/afr/afr-no-fsync.t
@@ -0,0 +1,20 @@
+#!/bin/bash
+#Tests that sequential write workload doesn't lead to FSYNCs
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST dd if=/dev/zero of=$M0/a bs=1M count=500
+TEST ! "$CLI volume profile $V0 info incremental | grep FSYNC"
+
+cleanup;
diff --git a/tests/basic/afr/afr-read-hash-mode.t b/tests/basic/afr/afr-read-hash-mode.t
new file mode 100644
index 00000000000..eeff10d8ebd
--- /dev/null
+++ b/tests/basic/afr/afr-read-hash-mode.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+function reads_brick_count {
+ $CLI volume profile $V0 info incremental | grep -w READ | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0..2}
+
+TEST $CLI volume set $V0 cluster.choose-local off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+
+# Disable all caching
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+TEST dd if=/dev/urandom of=$M0/FILE bs=1M count=8
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# TEST if the option gives the intended behavior. The way we perform this test
+# is by performing reads from the mount and write to /dev/null. If the
+# read-hash-mode is 3, then for a given file, more than 1 brick should serve the
+# read-fops where as with the default read-hash-mode (i.e. 1), only 1 brick will.
+
+# read-hash-mode=1
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT "1" mount_get_option_value $M0 $V0-replicate-0 read-hash-mode
+TEST $CLI volume profile $V0 start
+TEST dd if=$M0/FILE of=/dev/null bs=1M
+count=`reads_brick_count`
+TEST [ $count -eq 1 ]
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# read-hash-mode=3
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+TEST $CLI volume set $V0 cluster.read-hash-mode 3
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "3" mount_get_option_value $M0 $V0-replicate-0 read-hash-mode
+TEST $CLI volume profile $V0 info clear
+TEST dd if=$M0/FILE of=/dev/null bs=1M
+count=`reads_brick_count`
+TEST [ $count -eq 2 ]
+
+# Check that the arbiter did not serve any reads
+arbiter_reads=$($CLI volume top $V0 read brick $H0:$B0/${V0}2|grep FILE|awk '{print $1}')
+TEST [ -z $arbiter_reads ]
+
+cleanup;
diff --git a/tests/basic/afr/afr-seek.t b/tests/basic/afr/afr-seek.t
new file mode 100644
index 00000000000..c12ee011660
--- /dev/null
+++ b/tests/basic/afr/afr-seek.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+SEEK=$(dirname $0)/seek
+build_tester $(dirname $0)/../seek.c -o ${SEEK}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST mkdir -p $B0/${V0}{0..2}
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
+
+TEST $CLI volume start $V0
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST ${SEEK} create ${M0}/test 0 1 1048576 1
+# Determine underlying filesystem allocation block size
+BSIZE="$(($(${SEEK} scan ${M0}/test hole 0) * 2))"
+
+TEST ${SEEK} create ${M0}/test 0 ${BSIZE} $((${BSIZE} * 4 + 512)) ${BSIZE}
+
+EXPECT "^0$" ${SEEK} scan ${M0}/test data 0
+EXPECT "^$((${BSIZE} / 2))$" ${SEEK} scan ${M0}/test data $((${BSIZE} / 2))
+EXPECT "^$((${BSIZE} - 1))$" ${SEEK} scan ${M0}/test data $((${BSIZE} - 1))
+EXPECT "^$((${BSIZE} * 4))$" ${SEEK} scan ${M0}/test data ${BSIZE}
+EXPECT "^$((${BSIZE} * 4))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 4))
+EXPECT "^$((${BSIZE} * 5))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5))
+EXPECT "^$((${BSIZE} * 5 + 511))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5 + 511))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5 + 512))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 6))
+
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole 0
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole $((${BSIZE} / 2))
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole $((${BSIZE} - 1))
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole ${BSIZE}
+EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 4))
+EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5))
+EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5 + 511))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5 + 512))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 6))
+
+rm -f ${SEEK}
+cleanup
+
+# Centos6 regression slaves seem to not support SEEK_DATA/SEEK_HOLE
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/basic/afr/afr-up.t b/tests/basic/afr/afr-up.t
new file mode 100644
index 00000000000..428aac875e0
--- /dev/null
+++ b/tests/basic/afr/afr-up.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+#Tests that afr up/down works as expected
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,3,4,5,6}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "1" afr_up_status $V0 $M0 0
+EXPECT "1" afr_up_status $V0 $M0 1
+
+#kill two bricks in first replica and check that afr_up_status is 0 for it
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_up_status $V0 $M0 0
+EXPECT "1" afr_up_status $V0 $M0 1
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_up_status $V0 $M0 0
+EXPECT "1" afr_up_status $V0 $M0 1
+cleanup;
diff --git a/tests/basic/afr/arbiter-add-brick.t b/tests/basic/afr/arbiter-add-brick.t
new file mode 100644
index 00000000000..77b93d9a210
--- /dev/null
+++ b/tests/basic/afr/arbiter-add-brick.t
@@ -0,0 +1,86 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create replica 2 volume and create file/dir.
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST mkdir $M0/dir1
+TEST dd if=/dev/urandom of=$M0/file1 bs=1024 count=1
+
+#Kill second brick and perform I/O to have pending heals.
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST mkdir $M0/dir2
+TEST dd if=/dev/urandom of=$M0/file1 bs=1024 count=1024
+
+
+#convert replica 2 to arbiter volume
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#syntax check for add-brick.
+TEST ! $CLI volume add-brick $V0 replica 2 arbiter 1 $H0:$B0/${V0}2
+TEST ! $CLI volume add-brick $V0 replica 3 arbiter 2 $H0:$B0/${V0}2
+
+TEST $CLI volume add-brick $V0 replica 3 arbiter 1 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+#Trigger name heals from client. If we just rely on index heal, the first index
+#crawl on B0 fails for /, dir2 and /file either due to lock collision or files
+#not being present on the other 2 bricks yet. It is getting healed only in the
+#next crawl after priv->shd.timeout (600 seconds) or by manually launching
+#index heal again.
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST stat $M0/dir1
+TEST stat $M0/dir2
+TEST stat $M0/file1
+
+#Heal files
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#Perform I/O after add-brick
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST mkdir $M0/dir3
+TEST dd if=/dev/urandom of=$M0/file2 bs=1024 count=1024
+
+# File hierarchy must be same in all 3 bricks.
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}2 | sort)
+TEST diff <(ls $B0/${V0}1 | sort) <(ls $B0/${V0}2 | sort)
+
+#Mount serves the correct file size
+EXPECT "1048576" stat -c %s $M0/file1
+EXPECT "1048576" stat -c %s $M0/file2
+
+#Check file size in arbiter brick
+EXPECT "0" stat -c %s $B0/${V0}2/file1
+EXPECT "0" stat -c %s $B0/${V0}2/file2
+
+#Increasing replica count of arbiter volumes must not be allowed.
+TEST ! $CLI volume add-brick $V0 replica 4 $H0:$B0/${V0}3
+TEST ! $CLI volume add-brick $V0 replica 4 arbiter 1 $H0:$B0/${V0}3
+
+#Adding another distribute leg should succeed.
+TEST $CLI volume add-brick $V0 replica 3 arbiter 1 $H0:$B0/${V0}{3..5}
+TEST force_umount $M0
+cleanup;
diff --git a/tests/basic/afr/arbiter-cli.t b/tests/basic/afr/arbiter-cli.t
new file mode 100644
index 00000000000..ad79de79d02
--- /dev/null
+++ b/tests/basic/afr/arbiter-cli.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+# Negative test cases for arbiter volume creation should not crash.
+
+TEST glusterd;
+TEST pidof glusterd
+
+# No replica count.
+TEST ! $CLI volume create $V0 arbiter 3 $H0:$B0/${V0}{0,1,2}
+
+# replica count given after arbiter count.
+TEST ! $CLI volume create $V0 arbiter 1 replica 3 $H0:$B0/${V0}{0,1,2}
+
+# Incorrect values for replica and arbiter count.
+TEST ! $CLI volume create $V0 replica 3 arbiter 2 $H0:$B0/${V0}{0,1,2}
+
+# Correct setup
+# Only documented value is replica=2 and arbiter=1.
+TEST $CLI volume create $V0 replica 2 arbiter 1 $H0:$B0/${V0}{0,1,2}
+
+# Earlier documents mentioned 'replica 3 arbiter 1' as the valid option
+# Preserve backward compatibility till Oct, 2019.
+TEST $CLI volume create ${V0}-old replica 3 arbiter 1 $H0:$B0/${V0}-old{0,1,2}
+
+cleanup
diff --git a/tests/basic/afr/arbiter-mount.t b/tests/basic/afr/arbiter-mount.t
new file mode 100644
index 00000000000..404d334d2f9
--- /dev/null
+++ b/tests/basic/afr/arbiter-mount.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+#Check that mounting fails when only arbiter brick is up.
+
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+# Doing `mount -t glusterfs $H0:$V0 $M0` fails right away but doesn't work on NetBSD
+# So check that stat <mount> fails instead.
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST ! stat $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+mount_nfs $H0:/$V0 $N0
+TEST [ $? -ne 0 ]
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST stat $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+mount_nfs $H0:/$V0 $N0
+TEST [ $? -eq 0 ]
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+cleanup
diff --git a/tests/basic/afr/arbiter-remove-brick.t b/tests/basic/afr/arbiter-remove-brick.t
new file mode 100644
index 00000000000..ec93c8758e4
--- /dev/null
+++ b/tests/basic/afr/arbiter-remove-brick.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create arbiter volume.
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#syntax check for remove-brick.
+TEST ! $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}0 force
+TEST ! $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}1 force
+
+#convert to replica 2 volume
+TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 force
+EXPECT "1 x 2 = 2" volinfo_field $V0 "Number of Bricks"
+
+TEST mkdir $M0/dir
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort)
+
+#Mount serves the correct file size
+EXPECT "1048576" stat -c %s $M0/file
+
+#Check file size in bricks
+EXPECT "1048576" stat -c %s $B0/${V0}0/file
+EXPECT "1048576" stat -c %s $B0/${V0}1/file
+
+TEST force_umount $M0
+cleanup;
diff --git a/tests/basic/afr/arbiter-statfs.t b/tests/basic/afr/arbiter-statfs.t
new file mode 100644
index 00000000000..61cb9e1d04f
--- /dev/null
+++ b/tests/basic/afr/arbiter-statfs.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+#Test that statfs is not served from the arbiter brick.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+EXIT_EARLY=1
+TEST glusterd
+
+#Create brick partitions
+TEST truncate -s 1G $B0/brick1
+TEST truncate -s 1G $B0/brick2
+#Arbiter brick is of a lesser size.
+TEST truncate -s 90M $B0/brick3
+LO1=`SETUP_LOOP $B0/brick1`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO1
+LO2=`SETUP_LOOP $B0/brick2`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO2
+LO3=`SETUP_LOOP $B0/brick3`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO3
+TEST mkdir -p $B0/${V0}1 $B0/${V0}2 $B0/${V0}3
+TEST MOUNT_LOOP $LO1 $B0/${V0}1
+TEST MOUNT_LOOP $LO2 $B0/${V0}2
+TEST MOUNT_LOOP $LO3 $B0/${V0}3
+
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{1,2,3};
+TEST $CLI volume start $V0
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+free_space=$(df -P $M0 | tail -1 | awk '{ print $4}')
+TEST [ $free_space -gt 100000 ]
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume delete $V0;
+UMOUNT_LOOP ${B0}/${V0}{1,2,3}
+rm -f ${B0}/brick{1,2,3}
+cleanup;
diff --git a/tests/basic/afr/arbiter.t b/tests/basic/afr/arbiter.t
new file mode 100644
index 00000000000..7c92a9fe6c9
--- /dev/null
+++ b/tests/basic/afr/arbiter.t
@@ -0,0 +1,92 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd
+
+# Non arbiter replica 3 volumes should not have arbiter-count option enabled.
+TEST mkdir -p $B0/${V0}{0,1,2}
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST ! stat $M0/.meta/graphs/active/$V0-replicate-0/options/arbiter-count
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+# Make sure we clean up *all the way* so we don't get "brick X is already part
+# of a volume" errors.
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+# Create and mount a replica 3 arbiter volume.
+TEST mkdir -p $B0/${V0}{0,1,2}
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST stat $M0/.meta/graphs/active/$V0-replicate-0/options/arbiter-count
+EXPECT "1" cat $M0/.meta/graphs/active/$V0-replicate-0/options/arbiter-count
+
+# Write data and metadata
+TEST `echo hello >> $M0/file`
+TEST setfattr -n user.name -v value1 $M0/file
+
+# Data I/O will fail if arbiter is the only source.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST `echo "B0 is down, B1 and B2 are sources" >> $M0/file`
+TEST setfattr -n user.name -v value2 $M0/file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "B2 is down, B3 is the only source, writes will fail" >> $M0/file
+EXPECT_NOT "0" echo $?
+TEST ! cat $M0/file
+# Though metadata IO could have been served from arbiter, we do not allow it
+# anymore as FOPS like getfattr could be overloaded to return iatt buffers for
+# use by other translators.
+TEST ! getfattr -n user.name $M0/file
+TEST ! setfattr -n user.name -v value3 $M0/file
+
+#shd should not data self-heal from arbiter to the sinks.
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+$CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT '1' echo $(count_sh_entries $B0/$V0"1")
+EXPECT_WITHIN $HEAL_TIMEOUT '1' echo $(count_sh_entries $B0/$V0"2")
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST $CLI volume heal $V0
+EXPECT 0 get_pending_heal_count $V0
+
+# I/O can resume again.
+TEST cat $M0/file
+TEST getfattr -n user.name $M0/file
+TEST `echo append>> $M0/file`
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup
diff --git a/tests/basic/afr/client-side-heal.t b/tests/basic/afr/client-side-heal.t
new file mode 100755
index 00000000000..1e9336184b5
--- /dev/null
+++ b/tests/basic/afr/client-side-heal.t
@@ -0,0 +1,95 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+echo "some data" > $M0/datafile
+EXPECT 0 echo $?
+TEST touch $M0/mdatafile
+TEST touch $M0/mdatafile-backend-direct-modify
+TEST mkdir $M0/dir
+
+#Kill a brick and perform I/O to have pending heals.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" afr_child_up_status $V0 0
+
+#pending data heal
+echo "some more data" >> $M0/datafile
+EXPECT 0 echo $?
+
+#pending metadata heal
+TEST chmod +x $M0/mdatafile
+TEST chmod +x $B0/${V0}0/mdatafile-backend-direct-modify
+
+#pending entry heal. Also causes pending metadata/data heals on file{1..5}
+TEST touch $M0/dir/file{1..5}
+
+EXPECT 8 get_pending_heal_count $V0
+
+#After brick comes back up, access from client should not trigger heals
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#Medatada heal via explicit lookup must not happen
+TEST getfattr -d -m. -e hex $M0/mdatafile
+TEST ls $M0/mdatafile-backend-direct-modify
+
+TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" != "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]]
+
+#Inode refresh must not trigger data metadata and entry heals.
+#To trigger inode refresh for sure, the volume is unmounted and mounted each time.
+#Check that data heal does not happen.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST cat $M0/datafile
+#Check that entry heal does not happen.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST ls $M0/dir
+#No heal must have happened
+EXPECT 8 get_pending_heal_count $V0
+
+#Enable heal client side heal options and trigger heals
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+#Inode refresh must trigger data metadata and entry heals.
+#To trigger inode refresh for sure, the volume is unmounted and mounted each time.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST ls $M0/mdatafile-backend-direct-modify
+
+TEST [[ "$(stat -c %A $B0/${V0}0/mdatafile-backend-direct-modify)" == "$(stat -c %A $B0/${V0}1/mdatafile-backend-direct-modify)" ]]
+
+
+TEST getfattr -d -m. -e hex $M0/mdatafile
+EXPECT_WITHIN $HEAL_TIMEOUT 7 get_pending_heal_count $V0
+
+TEST cat $M0/datafile
+EXPECT_WITHIN $HEAL_TIMEOUT 6 get_pending_heal_count $V0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST ls $M0/dir
+EXPECT_WITHIN $HEAL_TIMEOUT 5 get_pending_heal_count $V0
+
+TEST cat $M0/dir/file1
+TEST cat $M0/dir/file2
+TEST cat $M0/dir/file3
+TEST cat $M0/dir/file4
+TEST cat $M0/dir/file5
+
+EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0
+cleanup;
diff --git a/tests/basic/afr/data-self-heal.t b/tests/basic/afr/data-self-heal.t
new file mode 100644
index 00000000000..0f417b4a0ba
--- /dev/null
+++ b/tests/basic/afr/data-self-heal.t
@@ -0,0 +1,209 @@
+#!/bin/bash
+#Self-heal tests
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+function create_xattrop_entry {
+ local xattrop_dir0=$(afr_get_index_path $B0/brick0)
+ local xattrop_dir1=$(afr_get_index_path $B0/brick1)
+ local base_entry_b0=`ls $xattrop_dir0`
+ local base_entry_b1=`ls $xattrop_dir1`
+ local gfid_str
+
+ for file in "$@"
+ do
+ gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/brick0/$file))
+ ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+ ln $xattrop_dir1/$base_entry_b1 $xattrop_dir1/$gfid_str
+ done
+}
+
+function is_heal_done {
+ local f1_path="${1}/${3}"
+ local f2_path="${2}/${3}"
+ local zero_xattr="000000000000000000000000"
+ local size1=$(stat -c "%s" $f1_path)
+ local size2=$(stat -c "%s" $f2_path)
+ local diff=$((size1-size2))
+ local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
+ local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
+ local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
+ local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
+ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
+ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
+ if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi
+ if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi
+ if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi
+ if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi
+ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
+ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
+ if [ "${diff}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2}" == "0${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function print_pending_heals {
+ local result=":"
+ for i in "$@";
+ do
+ if [ "N" == $(is_heal_done $B0/brick0 $B0/brick1 $i) ];
+ then
+ result="$result:$i"
+ fi
+ done
+#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names
+ if [ $result == ":" ]; then result="~"; fi
+ echo $result
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.open-behind off
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+cd $M0
+TEST touch pending-changelog biggest-file-source.txt biggest-file-more-prio-than-changelog.txt same-size-more-prio-to-changelog.txt size-and-witness-same.txt self-accusing-vs-source.txt self-accusing-both.txt self-accusing-vs-innocent.txt self-accusing-bigger-exists.txt size-more-prio-than-self-accused.txt v1-dirty.txt split-brain.txt split-brain-all-dirty.txt split-brain-with-dirty.txt
+
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/brick0/pending-changelog
+TEST "echo abc > $B0/brick1/pending-changelog"
+
+TEST "echo abc > $B0/brick0/biggest-file-source.txt"
+TEST "echo abcd > $B0/brick1/biggest-file-source.txt"
+
+TEST "echo abc > $B0/brick0/biggest-file-more-prio-than-changelog.txt"
+TEST "echo abcd > $B0/brick1/biggest-file-more-prio-than-changelog.txt"
+TEST setfattr -n trusted.afr.dirty -v 0x000000200000000000000000 $B0/brick0/biggest-file-more-prio-than-changelog.txt
+
+TEST "echo abc > $B0/brick0/same-size-more-prio-to-changelog.txt"
+TEST "echo def > $B0/brick1/same-size-more-prio-to-changelog.txt"
+TEST setfattr -n trusted.afr.dirty -v 0x000000200000000000000000 $B0/brick0/same-size-more-prio-to-changelog.txt
+
+TEST "echo abc > $B0/brick0/size-and-witness-same.txt"
+TEST "echo def > $B0/brick1/size-and-witness-same.txt"
+TEST setfattr -n trusted.afr.dirty -v 0x000000200000000000000000 $B0/brick0/size-and-witness-same.txt
+TEST setfattr -n trusted.afr.dirty -v 0x000000200000000000000000 $B0/brick1/size-and-witness-same.txt
+
+TEST "echo abc > $B0/brick0/split-brain.txt"
+TEST "echo def > $B0/brick1/split-brain.txt"
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick0/split-brain.txt
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick1/split-brain.txt
+
+TEST "echo abc > $B0/brick0/split-brain-all-dirty.txt"
+TEST "echo def > $B0/brick1/split-brain-all-dirty.txt"
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick0/split-brain-all-dirty.txt
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick1/split-brain-all-dirty.txt
+TEST setfattr -n trusted.afr.dirty -v 0x000000200000000000000000 $B0/brick0/split-brain-all-dirty.txt
+TEST setfattr -n trusted.afr.dirty -v 0x000000200000000000000000 $B0/brick1/split-brain-all-dirty.txt
+
+TEST "echo abc > $B0/brick0/split-brain-with-dirty.txt"
+TEST "echo def > $B0/brick1/split-brain-with-dirty.txt"
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick0/split-brain-with-dirty.txt
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick1/split-brain-with-dirty.txt
+TEST setfattr -n trusted.afr.dirty -v 0x000000200000000000000000 $B0/brick1/split-brain-with-dirty.txt
+
+TEST "echo def > $B0/brick1/self-accusing-vs-source.txt"
+TEST "echo abc > $B0/brick0/self-accusing-vs-source.txt"
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick1/self-accusing-vs-source.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick1/self-accusing-vs-source.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick0/self-accusing-vs-source.txt
+
+TEST "echo abc > $B0/brick0/self-accusing-both.txt"
+TEST "echo def > $B0/brick1/self-accusing-both.txt"
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick0/self-accusing-both.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick0/self-accusing-both.txt
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick1/self-accusing-both.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick1/self-accusing-both.txt
+
+TEST "echo abc > $B0/brick0/self-accusing-vs-innocent.txt"
+TEST "echo def > $B0/brick1/self-accusing-vs-innocent.txt"
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick1/self-accusing-vs-innocent.txt
+
+TEST "echo abc > $B0/brick0/self-accusing-bigger-exists.txt"
+TEST "echo def > $B0/brick1/self-accusing-bigger-exists.txt"
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick0/self-accusing-bigger-exists.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000300000000000000000 $B0/brick0/self-accusing-bigger-exists.txt
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick1/self-accusing-bigger-exists.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick1/self-accusing-bigger-exists.txt
+
+TEST "echo abc > $B0/brick0/size-more-prio-than-self-accused.txt"
+TEST "echo defg > $B0/brick1/size-more-prio-than-self-accused.txt"
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick0/size-more-prio-than-self-accused.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000300000000000000000 $B0/brick0/size-more-prio-than-self-accused.txt
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick1/size-more-prio-than-self-accused.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000200000000000000000 $B0/brick1/size-more-prio-than-self-accused.txt
+
+TEST "echo abc > $B0/brick0/v1-dirty.txt"
+TEST "echo def > $B0/brick1/v1-dirty.txt"
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000200000000000000000 $B0/brick0/v1-dirty.txt
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000100000000000000000 $B0/brick1/v1-dirty.txt
+
+#Create base entry in indices/xattrop
+echo "Data" > $M0/FILE
+rm -f $M0/FILE
+EXPECT "1" count_index_entries $B0/brick0
+EXPECT "1" count_index_entries $B0/brick1
+cd -
+
+#Create gfid hard links for all files before triggering index heals.
+create_xattrop_entry pending-changelog biggest-file-source.txt biggest-file-more-prio-than-changelog.txt same-size-more-prio-to-changelog.txt size-and-witness-same.txt self-accusing-vs-source.txt self-accusing-both.txt self-accusing-vs-innocent.txt self-accusing-bigger-exists.txt size-more-prio-than-self-accused.txt v1-dirty.txt
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals pending-changelog biggest-file-source.txt biggest-file-more-prio-than-changelog.txt same-size-more-prio-to-changelog.txt size-and-witness-same.txt self-accusing-vs-source.txt self-accusing-both.txt self-accusing-vs-innocent.txt self-accusing-bigger-exists.txt size-more-prio-than-self-accused.txt v1-dirty.txt
+EXPECT "N" is_heal_done $B0/brick0 $B0/brick1 split-brain.txt
+EXPECT "N" is_heal_done $B0/brick0 $B0/brick1 split-brain-all-dirty.txt
+EXPECT "N" is_heal_done $B0/brick0 $B0/brick1 split-brain-with-dirty.txt
+
+EXPECT "0" stat -c "%s" $M0/pending-changelog
+TEST cmp $B0/brick0/pending-changelog $B0/brick1/pending-changelog
+
+EXPECT "abcd" cat $M0/biggest-file-source.txt
+TEST cmp $B0/brick0/biggest-file-source.txt $B0/brick1/biggest-file-source.txt
+
+EXPECT "abcd" cat $M0/biggest-file-more-prio-than-changelog.txt
+TEST cmp $B0/brick0/biggest-file-more-prio-than-changelog.txt $B0/brick1/biggest-file-more-prio-than-changelog.txt
+
+EXPECT "abc" cat $M0/same-size-more-prio-to-changelog.txt
+TEST cmp $B0/brick0/same-size-more-prio-to-changelog.txt $B0/brick1/same-size-more-prio-to-changelog.txt
+
+EXPECT "(abc|def)" cat $M0/size-and-witness-same.txt
+TEST cmp $B0/brick0/size-and-witness-same.txt $B0/brick1/size-and-witness-same.txt
+
+TEST ! cat $M0/split-brain.txt
+TEST ! cat $M0/split-brain-all-dirty.txt
+TEST ! cat $M0/split-brain-with-dirty.txt
+
+EXPECT "abc" cat $M0/self-accusing-vs-source.txt
+TEST cmp $B0/brick0/self-accusing-vs-source.txt $B0/brick1/self-accusing-vs-source.txt
+
+EXPECT "(abc|def)" cat $M0/self-accusing-both.txt
+TEST cmp $B0/brick0/self-accusing-both.txt $B0/brick1/self-accusing-both.txt
+
+EXPECT "def" cat $M0/self-accusing-vs-innocent.txt
+TEST cmp $B0/brick0/self-accusing-vs-innocent.txt $B0/brick1/self-accusing-vs-innocent.txt
+
+EXPECT "abc" cat $M0/self-accusing-bigger-exists.txt
+TEST cmp $B0/brick0/self-accusing-bigger-exists.txt $B0/brick1/self-accusing-bigger-exists.txt
+
+EXPECT "defg" cat $M0/size-more-prio-than-self-accused.txt
+TEST cmp $B0/brick0/size-more-prio-than-self-accused.txt $B0/brick1/size-more-prio-than-self-accused.txt
+
+EXPECT "abc" cat $M0/v1-dirty.txt
+TEST cmp $B0/brick0/v1-dirty.txt $B0/brick1/v1-dirty.txt
+cleanup;
diff --git a/tests/basic/afr/durability-off.t b/tests/basic/afr/durability-off.t
new file mode 100644
index 00000000000..6e0f18b88f8
--- /dev/null
+++ b/tests/basic/afr/durability-off.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+#This test tests that self-heals don't perform fsync when durability is turned
+#off
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST $CLI volume set $V0 cluster.ensure-durability off
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST dd of=$M0/a.txt if=/dev/zero bs=1024k count=1
+#NetBSD sends FSYNC so the counts go for a toss. Stop and start the volume.
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+EXPECT "^0$" echo $($CLI volume profile $V0 info | grep -w FSYNC | wc -l)
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+#Test that fsyncs happen when durability is on
+TEST $CLI volume set $V0 cluster.ensure-durability on
+TEST $CLI volume set $V0 performance.strict-write-ordering on
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST dd of=$M0/a.txt if=/dev/zero bs=1024k count=1
+#NetBSD sends FSYNC so the counts go for a toss. Stop and start the volume.
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+EXPECT "^2$" echo $($CLI volume profile $V0 info | grep -w FSYNC | wc -l)
+
+cleanup;
diff --git a/tests/basic/afr/entry-self-heal-anon-dir-off.t b/tests/basic/afr/entry-self-heal-anon-dir-off.t
new file mode 100644
index 00000000000..7bb6ee14193
--- /dev/null
+++ b/tests/basic/afr/entry-self-heal-anon-dir-off.t
@@ -0,0 +1,459 @@
+#!/bin/bash
+
+#This file checks if missing entry self-heal and entry self-heal are working
+#as expected.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+function get_file_type {
+ stat -c "%a:%F:%g:%t:%T:%u" $1
+}
+
+function diff_dirs {
+ diff <(ls $1 | sort) <(ls $2 | sort)
+}
+
+function heal_status {
+ local f1_path="${1}/${3}"
+ local f2_path="${2}/${3}"
+ local insync=""
+ diff_dirs $f1_path $f2_path
+ if [ $? -eq 0 ];
+ then
+ insync="Y"
+ else
+ insync="N"
+ fi
+ local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
+ local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
+ local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
+ local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
+ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
+ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
+ if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi
+ if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi
+ if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi
+ if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi
+ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
+ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
+ echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2}
+}
+
+function is_heal_done {
+ local zero_xattr="000000000000000000000000"
+ if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function print_pending_heals {
+ local result=":"
+ for i in "$@";
+ do
+ if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ];
+ then
+ result="$result:$i"
+ fi
+ done
+#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names
+ if [ $result == ":" ]; then result="~"; fi
+ echo $result
+}
+
+zero_xattr="000000000000000000000000"
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 cluster.use-anonymous-inode off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.readdir-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0
+cd $M0
+#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens
+#spb is split-brain, fool is all fool
+
+#source_self_accusing means there exists source and a sink which self-accuses.
+#This simulates failures where fops failed on the bricks without it going down.
+#Something like EACCESS/EDQUOT etc
+
+TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing
+TEST mkfifo source_deletions_heal/fifo
+TEST mknod source_deletions_heal/block b 4 5
+TEST mknod source_deletions_heal/char c 1 5
+TEST touch source_deletions_heal/file
+TEST ln -s source_deletions_heal/file source_deletions_heal/slink
+TEST mkdir source_deletions_heal/dir1
+TEST mkdir source_deletions_heal/dir1/dir2
+
+TEST mkfifo source_deletions_me/fifo
+TEST mknod source_deletions_me/block b 4 5
+TEST mknod source_deletions_me/char c 1 5
+TEST touch source_deletions_me/file
+TEST ln -s source_deletions_me/file source_deletions_me/slink
+TEST mkdir source_deletions_me/dir1
+TEST mkdir source_deletions_me/dir1/dir2
+
+TEST mkfifo source_self_accusing/fifo
+TEST mknod source_self_accusing/block b 4 5
+TEST mknod source_self_accusing/char c 1 5
+TEST touch source_self_accusing/file
+TEST ln -s source_self_accusing/file source_self_accusing/slink
+TEST mkdir source_self_accusing/dir1
+TEST mkdir source_self_accusing/dir1/dir2
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0
+TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1
+TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1
+TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1
+
+#Test that the files are deleted
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/slink
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
+TEST ! stat $B0/${V0}1/source_deletions_me/block
+TEST ! stat $B0/${V0}1/source_deletions_me/char
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/slink
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
+TEST ! stat $B0/${V0}1/source_self_accusing/block
+TEST ! stat $B0/${V0}1/source_self_accusing/char
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/slink
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
+
+
+TEST mkfifo source_creations_heal/fifo
+TEST mknod source_creations_heal/block b 4 5
+TEST mknod source_creations_heal/char c 1 5
+TEST touch source_creations_heal/file
+TEST ln -s source_creations_heal/file source_creations_heal/slink
+TEST mkdir source_creations_heal/dir1
+TEST mkdir source_creations_heal/dir1/dir2
+
+TEST mkfifo source_creations_me/fifo
+TEST mknod source_creations_me/block b 4 5
+TEST mknod source_creations_me/char c 1 5
+TEST touch source_creations_me/file
+TEST ln -s source_creations_me/file source_creations_me/slink
+TEST mkdir source_creations_me/dir1
+TEST mkdir source_creations_me/dir1/dir2
+
+$CLI volume stop $V0
+
+#simulate fool fool scenario for fool_* dirs
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
+
+#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me}
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me}
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1
+
+$CLI volume stop $V0
+
+#simulate fool fool scenario for fool_* dirs
+setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me}
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
+
+#simulate self-accusing for source_self_accusing
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+# Check if conservative merges happened correctly on _me_ dirs
+TEST stat spb_me_heal/1
+TEST stat $B0/${V0}0/spb_me_heal/1
+TEST stat $B0/${V0}1/spb_me_heal/1
+
+TEST stat spb_me_heal/0
+TEST stat $B0/${V0}0/spb_me_heal/0
+TEST stat $B0/${V0}1/spb_me_heal/0
+
+TEST stat fool_me/1
+TEST stat $B0/${V0}0/fool_me/1
+TEST stat $B0/${V0}1/fool_me/1
+
+TEST stat fool_me/0
+TEST stat $B0/${V0}0/fool_me/0
+TEST stat $B0/${V0}1/fool_me/0
+
+TEST stat v1_fool_me/0
+TEST stat $B0/${V0}0/v1_fool_me/0
+TEST stat $B0/${V0}1/v1_fool_me/0
+
+TEST stat v1_fool_me/1
+TEST stat $B0/${V0}0/v1_fool_me/1
+TEST stat $B0/${V0}1/v1_fool_me/1
+
+TEST stat v1_dirty_me/0
+TEST stat $B0/${V0}0/v1_dirty_me/0
+TEST stat $B0/${V0}1/v1_dirty_me/0
+
+#Check if files that have gfid-mismatches in _me_ are giving EIO
+TEST ! stat spb_me/0
+
+#Check if stale files are deleted on access
+TEST ! stat source_deletions_me/fifo
+TEST ! stat $B0/${V0}0/source_deletions_me/fifo
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
+TEST ! stat source_deletions_me/block
+TEST ! stat $B0/${V0}0/source_deletions_me/block
+TEST ! stat $B0/${V0}1/source_deletions_me/block
+TEST ! stat source_deletions_me/char
+TEST ! stat $B0/${V0}0/source_deletions_me/char
+TEST ! stat $B0/${V0}1/source_deletions_me/char
+TEST ! stat source_deletions_me/file
+TEST ! stat $B0/${V0}0/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat source_deletions_me/file
+TEST ! stat $B0/${V0}0/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat source_deletions_me/dir1/dir2
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2
+TEST ! stat source_deletions_me/dir1
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
+
+#Test if the files created as part of access are healed correctly
+r=$(get_file_type source_creations_me/fifo)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo
+TEST [ -p source_creations_me/fifo ]
+
+r=$(get_file_type source_creations_me/block)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block
+TEST [ -b source_creations_me/block ]
+
+r=$(get_file_type source_creations_me/char)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char
+TEST [ -c source_creations_me/char ]
+
+r=$(get_file_type source_creations_me/file)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file
+TEST [ -f source_creations_me/file ]
+
+r=$(get_file_type source_creations_me/slink)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink
+TEST [ -h source_creations_me/slink ]
+
+r=$(get_file_type source_creations_me/dir1/dir2)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2
+TEST [ -d source_creations_me/dir1/dir2 ]
+
+r=$(get_file_type source_creations_me/dir1)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1
+TEST [ -d source_creations_me/dir1 ]
+
+#Trigger heal and check _heal dirs are healed properly
+#Trigger change in event generation number. That way inodes would get refreshed during lookup
+TEST kill_brick $V0 $H0 $B0/${V0}1
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST stat spb_heal
+TEST stat spb_me_heal
+TEST stat fool_heal
+TEST stat fool_me
+TEST stat v1_fool_heal
+TEST stat v1_fool_me
+TEST stat source_deletions_heal
+TEST stat source_deletions_me
+TEST stat source_self_accusing
+TEST stat source_creations_heal
+TEST stat source_creations_me
+TEST stat v1_dirty_heal
+TEST stat v1_dirty_me
+TEST $CLI volume stop $V0
+TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/*
+
+$CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#Create base entry in indices/xattrop
+echo "Data" > $M0/FILE
+rm -f $M0/FILE
+EXPECT "1" count_index_entries $B0/${V0}0
+EXPECT "1" count_index_entries $B0/${V0}1
+
+TEST $CLI volume stop $V0;
+
+#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal
+create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1
+
+$CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+$CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0;
+EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing
+
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me
+
+#Don't access the files/dirs from mount point as that may cause self-heals
+# Check if conservative merges happened correctly on heal dirs
+TEST stat $B0/${V0}0/spb_heal/1
+TEST stat $B0/${V0}1/spb_heal/1
+
+TEST stat $B0/${V0}0/spb_heal/0
+TEST stat $B0/${V0}1/spb_heal/0
+
+TEST stat $B0/${V0}0/fool_heal/1
+TEST stat $B0/${V0}1/fool_heal/1
+
+TEST stat $B0/${V0}0/fool_heal/0
+TEST stat $B0/${V0}1/fool_heal/0
+
+TEST stat $B0/${V0}0/v1_fool_heal/0
+TEST stat $B0/${V0}1/v1_fool_heal/0
+
+TEST stat $B0/${V0}0/v1_fool_heal/1
+TEST stat $B0/${V0}1/v1_fool_heal/1
+
+TEST stat $B0/${V0}0/v1_dirty_heal/0
+TEST stat $B0/${V0}1/v1_dirty_heal/0
+
+#Check if files that have gfid-mismatches in spb are giving EIO
+TEST ! stat spb/0
+
+#Check if stale files are deleted on access
+TEST ! stat $B0/${V0}0/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}0/source_deletions_heal/block
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
+TEST ! stat $B0/${V0}0/source_deletions_heal/char
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
+
+#Check if stale files are deleted on access
+TEST ! stat $B0/${V0}0/source_self_accusing/fifo
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
+TEST ! stat $B0/${V0}0/source_self_accusing/block
+TEST ! stat $B0/${V0}1/source_self_accusing/block
+TEST ! stat $B0/${V0}0/source_self_accusing/char
+TEST ! stat $B0/${V0}1/source_self_accusing/char
+TEST ! stat $B0/${V0}0/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}0/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
+
+#Test if the files created as part of full self-heal correctly
+r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo
+TEST [ -p $B0/${V0}0/source_creations_heal/fifo ]
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/block)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/char)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/file)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file
+TEST [ -f $B0/${V0}0/source_creations_heal/file ]
+
+r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink
+TEST [ -h $B0/${V0}0/source_creations_heal/slink ]
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ]
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ]
+
+cd -
+
+#Anonymous directory shouldn't be created
+TEST mkdir $M0/rename-dir
+before_rename=$(STAT_INO $B0/${V0}1/rename-dir)
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST mv $M0/rename-dir $M0/new-name
+TEST $CLI volume start $V0 force
+#'spb' is in split-brain so pending-heal-count will be 2
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+after_rename=$(STAT_INO $B0/${V0}1/new-name)
+EXPECT "0" echo $(ls -a $B0/${V0}0/ | grep anonymous-inode | wc -l)
+EXPECT "0" echo $(ls -a $B0/${V0}1/ | grep anonymous-inode | wc -l)
+EXPECT_NOT "$before_rename" echo $after_rename
+cleanup
diff --git a/tests/basic/afr/entry-self-heal.t b/tests/basic/afr/entry-self-heal.t
new file mode 100644
index 00000000000..0c1da7d211e
--- /dev/null
+++ b/tests/basic/afr/entry-self-heal.t
@@ -0,0 +1,447 @@
+#!/bin/bash
+
+#This file checks if missing entry self-heal and entry self-heal are working
+#as expected.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+function get_file_type {
+ stat -c "%a:%F:%g:%t:%T:%u" $1
+}
+
+function diff_dirs {
+ diff <(ls $1 | sort) <(ls $2 | sort)
+}
+
+function heal_status {
+ local f1_path="${1}/${3}"
+ local f2_path="${2}/${3}"
+ local zero_xattr="000000000000000000000000"
+ local insync=""
+ diff_dirs $f1_path $f2_path
+ if [ $? -eq 0 ];
+ then
+ insync="Y"
+ else
+ insync="N"
+ fi
+ local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
+ local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
+ local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
+ local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
+ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
+ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
+ if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi
+ if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi
+ if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi
+ if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi
+ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
+ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
+ echo ${insync}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2}
+}
+
+function is_heal_done {
+ local zero_xattr="000000000000000000000000"
+ if [ "$(heal_status $@)" == "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function print_pending_heals {
+ local result=":"
+ for i in "$@";
+ do
+ if [ "N" == $(is_heal_done $B0/${V0}0 $B0/${V0}1 $i) ];
+ then
+ result="$result:$i"
+ fi
+ done
+#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names
+ if [ $result == ":" ]; then result="~"; fi
+ echo $result
+}
+
+zero_xattr="000000000000000000000000"
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.readdir-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0
+cd $M0
+#_me_ is dir on which missing entry self-heal happens, _heal is where dir self-heal happens
+#spb is split-brain, fool is all fool
+
+#source_self_accusing means there exists source and a sink which self-accuses.
+#This simulates failures where fops failed on the bricks without it going down.
+#Something like EACCESS/EDQUOT etc
+
+TEST mkdir spb_heal spb spb_me_heal spb_me fool_heal fool_me v1_fool_heal v1_fool_me source_creations_heal source_deletions_heal source_creations_me source_deletions_me v1_dirty_me v1_dirty_heal source_self_accusing
+TEST mkfifo source_deletions_heal/fifo
+TEST mknod source_deletions_heal/block b 4 5
+TEST mknod source_deletions_heal/char c 1 5
+TEST touch source_deletions_heal/file
+TEST ln -s source_deletions_heal/file source_deletions_heal/slink
+TEST mkdir source_deletions_heal/dir1
+TEST mkdir source_deletions_heal/dir1/dir2
+
+TEST mkfifo source_deletions_me/fifo
+TEST mknod source_deletions_me/block b 4 5
+TEST mknod source_deletions_me/char c 1 5
+TEST touch source_deletions_me/file
+TEST ln -s source_deletions_me/file source_deletions_me/slink
+TEST mkdir source_deletions_me/dir1
+TEST mkdir source_deletions_me/dir1/dir2
+
+TEST mkfifo source_self_accusing/fifo
+TEST mknod source_self_accusing/block b 4 5
+TEST mknod source_self_accusing/char c 1 5
+TEST touch source_self_accusing/file
+TEST ln -s source_self_accusing/file source_self_accusing/slink
+TEST mkdir source_self_accusing/dir1
+TEST mkdir source_self_accusing/dir1/dir2
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+TEST touch spb_heal/0 spb/0 spb_me_heal/0 spb_me/0 fool_heal/0 fool_me/0 v1_fool_heal/0 v1_fool_me/0 v1_dirty_heal/0 v1_dirty_me/0
+TEST rm -rf source_deletions_heal/fifo source_deletions_heal/block source_deletions_heal/char source_deletions_heal/file source_deletions_heal/slink source_deletions_heal/dir1
+TEST rm -rf source_deletions_me/fifo source_deletions_me/block source_deletions_me/char source_deletions_me/file source_deletions_me/slink source_deletions_me/dir1
+TEST rm -rf source_self_accusing/fifo source_self_accusing/block source_self_accusing/char source_self_accusing/file source_self_accusing/slink source_self_accusing/dir1
+
+#Test that the files are deleted
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/slink
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
+TEST ! stat $B0/${V0}1/source_deletions_me/block
+TEST ! stat $B0/${V0}1/source_deletions_me/char
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/slink
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
+TEST ! stat $B0/${V0}1/source_self_accusing/block
+TEST ! stat $B0/${V0}1/source_self_accusing/char
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/slink
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
+
+
+TEST mkfifo source_creations_heal/fifo
+TEST mknod source_creations_heal/block b 4 5
+TEST mknod source_creations_heal/char c 1 5
+TEST touch source_creations_heal/file
+TEST ln -s source_creations_heal/file source_creations_heal/slink
+TEST mkdir source_creations_heal/dir1
+TEST mkdir source_creations_heal/dir1/dir2
+
+TEST mkfifo source_creations_me/fifo
+TEST mknod source_creations_me/block b 4 5
+TEST mknod source_creations_me/char c 1 5
+TEST touch source_creations_me/file
+TEST ln -s source_creations_me/file source_creations_me/slink
+TEST mkdir source_creations_me/dir1
+TEST mkdir source_creations_me/dir1/dir2
+
+$CLI volume stop $V0
+
+#simulate fool fool scenario for fool_* dirs
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
+
+#Simulate v1-dirty(self-accusing but no pending ops on others) scenario for v1-dirty
+setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1/v1_dirty_{heal,me}
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}1/v1_dirty_{heal,me}
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+TEST touch spb_heal/1 spb/0 spb_me_heal/1 spb_me/0 fool_heal/1 fool_me/1 v1_fool_heal/1 v1_fool_me/1
+
+$CLI volume stop $V0
+
+#simulate fool fool scenario for fool_* dirs
+setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0/{fool_heal,fool_me}
+setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/{fool_heal,fool_me}
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/{v1_fool_heal,v1_fool_me}
+
+#simulate self-accusing for source_self_accusing
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000006 $B0/${V0}0/source_self_accusing
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+# Check if conservative merges happened correctly on _me_ dirs
+TEST stat spb_me_heal/1
+TEST stat $B0/${V0}0/spb_me_heal/1
+TEST stat $B0/${V0}1/spb_me_heal/1
+
+TEST stat spb_me_heal/0
+TEST stat $B0/${V0}0/spb_me_heal/0
+TEST stat $B0/${V0}1/spb_me_heal/0
+
+TEST stat fool_me/1
+TEST stat $B0/${V0}0/fool_me/1
+TEST stat $B0/${V0}1/fool_me/1
+
+TEST stat fool_me/0
+TEST stat $B0/${V0}0/fool_me/0
+TEST stat $B0/${V0}1/fool_me/0
+
+TEST stat v1_fool_me/0
+TEST stat $B0/${V0}0/v1_fool_me/0
+TEST stat $B0/${V0}1/v1_fool_me/0
+
+TEST stat v1_fool_me/1
+TEST stat $B0/${V0}0/v1_fool_me/1
+TEST stat $B0/${V0}1/v1_fool_me/1
+
+TEST stat v1_dirty_me/0
+TEST stat $B0/${V0}0/v1_dirty_me/0
+TEST stat $B0/${V0}1/v1_dirty_me/0
+
+#Check if files that have gfid-mismatches in _me_ are giving EIO
+TEST ! stat spb_me/0
+
+#Check if stale files are deleted on access
+TEST ! stat source_deletions_me/fifo
+TEST ! stat $B0/${V0}0/source_deletions_me/fifo
+TEST ! stat $B0/${V0}1/source_deletions_me/fifo
+TEST ! stat source_deletions_me/block
+TEST ! stat $B0/${V0}0/source_deletions_me/block
+TEST ! stat $B0/${V0}1/source_deletions_me/block
+TEST ! stat source_deletions_me/char
+TEST ! stat $B0/${V0}0/source_deletions_me/char
+TEST ! stat $B0/${V0}1/source_deletions_me/char
+TEST ! stat source_deletions_me/file
+TEST ! stat $B0/${V0}0/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat source_deletions_me/file
+TEST ! stat $B0/${V0}0/source_deletions_me/file
+TEST ! stat $B0/${V0}1/source_deletions_me/file
+TEST ! stat source_deletions_me/dir1/dir2
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1/dir2
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1/dir2
+TEST ! stat source_deletions_me/dir1
+TEST ! stat $B0/${V0}0/source_deletions_me/dir1
+TEST ! stat $B0/${V0}1/source_deletions_me/dir1
+
+#Test if the files created as part of access are healed correctly
+r=$(get_file_type source_creations_me/fifo)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/fifo
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/fifo
+TEST [ -p source_creations_me/fifo ]
+
+r=$(get_file_type source_creations_me/block)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/block
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/block
+TEST [ -b source_creations_me/block ]
+
+r=$(get_file_type source_creations_me/char)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/char
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_me/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_me/char
+TEST [ -c source_creations_me/char ]
+
+r=$(get_file_type source_creations_me/file)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/file
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/file
+TEST [ -f source_creations_me/file ]
+
+r=$(get_file_type source_creations_me/slink)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/slink
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/slink
+TEST [ -h source_creations_me/slink ]
+
+r=$(get_file_type source_creations_me/dir1/dir2)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1/dir2
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1/dir2
+TEST [ -d source_creations_me/dir1/dir2 ]
+
+r=$(get_file_type source_creations_me/dir1)
+EXPECT "$r" get_file_type $B0/${V0}0/source_creations_me/dir1
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_me/dir1
+TEST [ -d source_creations_me/dir1 ]
+
+#Trigger heal and check _heal dirs are healed properly
+#Trigger change in event generation number. That way inodes would get refreshed during lookup
+TEST kill_brick $V0 $H0 $B0/${V0}1
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST stat spb_heal
+TEST stat spb_me_heal
+TEST stat fool_heal
+TEST stat fool_me
+TEST stat v1_fool_heal
+TEST stat v1_fool_me
+TEST stat source_deletions_heal
+TEST stat source_deletions_me
+TEST stat source_self_accusing
+TEST stat source_creations_heal
+TEST stat source_creations_me
+TEST stat v1_dirty_heal
+TEST stat v1_dirty_me
+TEST $CLI volume stop $V0
+TEST rm -rf $B0/${V0}{0,1}/.glusterfs/indices/xattrop/*
+
+$CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#Create base entry in indices/xattrop
+echo "Data" > $M0/FILE
+rm -f $M0/FILE
+EXPECT "1" count_index_entries $B0/${V0}0
+EXPECT "1" count_index_entries $B0/${V0}1
+
+TEST $CLI volume stop $V0;
+
+#Create entries for fool_heal and fool_me to ensure they are fully healed and dirty xattrs erased, before triggering index heal
+create_brick_xattrop_entry $B0/${V0}0 fool_heal fool_me source_creations_heal/dir1
+
+$CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+$CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0;
+EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals spb_heal spb_me_heal fool_heal fool_me v1_fool_heal v1_fool_me source_deletions_heal source_deletions_me source_creations_heal source_creations_me v1_dirty_heal v1_dirty_me source_self_accusing
+
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 spb_me_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 fool_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_fool_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_deletions_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_self_accusing
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 source_creations_me
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_heal
+EXPECT "Y${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" heal_status $B0/${V0}0 $B0/${V0}1 v1_dirty_me
+
+#Don't access the files/dirs from mount point as that may cause self-heals
+# Check if conservative merges happened correctly on heal dirs
+TEST stat $B0/${V0}0/spb_heal/1
+TEST stat $B0/${V0}1/spb_heal/1
+
+TEST stat $B0/${V0}0/spb_heal/0
+TEST stat $B0/${V0}1/spb_heal/0
+
+TEST stat $B0/${V0}0/fool_heal/1
+TEST stat $B0/${V0}1/fool_heal/1
+
+TEST stat $B0/${V0}0/fool_heal/0
+TEST stat $B0/${V0}1/fool_heal/0
+
+TEST stat $B0/${V0}0/v1_fool_heal/0
+TEST stat $B0/${V0}1/v1_fool_heal/0
+
+TEST stat $B0/${V0}0/v1_fool_heal/1
+TEST stat $B0/${V0}1/v1_fool_heal/1
+
+TEST stat $B0/${V0}0/v1_dirty_heal/0
+TEST stat $B0/${V0}1/v1_dirty_heal/0
+
+#Check if files that have gfid-mismatches in spb are giving EIO
+TEST ! stat spb/0
+
+#Check if stale files are deleted on access
+TEST ! stat $B0/${V0}0/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}1/source_deletions_heal/fifo
+TEST ! stat $B0/${V0}0/source_deletions_heal/block
+TEST ! stat $B0/${V0}1/source_deletions_heal/block
+TEST ! stat $B0/${V0}0/source_deletions_heal/char
+TEST ! stat $B0/${V0}1/source_deletions_heal/char
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}0/source_deletions_heal/file
+TEST ! stat $B0/${V0}1/source_deletions_heal/file
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1/dir2
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1/dir2
+TEST ! stat $B0/${V0}0/source_deletions_heal/dir1
+TEST ! stat $B0/${V0}1/source_deletions_heal/dir1
+
+#Check if stale files are deleted on access
+TEST ! stat $B0/${V0}0/source_self_accusing/fifo
+TEST ! stat $B0/${V0}1/source_self_accusing/fifo
+TEST ! stat $B0/${V0}0/source_self_accusing/block
+TEST ! stat $B0/${V0}1/source_self_accusing/block
+TEST ! stat $B0/${V0}0/source_self_accusing/char
+TEST ! stat $B0/${V0}1/source_self_accusing/char
+TEST ! stat $B0/${V0}0/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}0/source_self_accusing/file
+TEST ! stat $B0/${V0}1/source_self_accusing/file
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1/dir2
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1/dir2
+TEST ! stat $B0/${V0}0/source_self_accusing/dir1
+TEST ! stat $B0/${V0}1/source_self_accusing/dir1
+
+#Test if the files created as part of full self-heal correctly
+r=$(get_file_type $B0/${V0}0/source_creations_heal/fifo)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/fifo
+TEST [ -p $B0/${V0}0/source_creations_heal/fifo ]
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/block
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/block
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/block)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/block
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/char)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}1/source_creations_heal/char
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}0/source_creations_heal/char
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/file)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file
+TEST [ -f $B0/${V0}0/source_creations_heal/file ]
+
+r=$(get_file_type source_creations_heal/file $B0/${V0}0/slink)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/file slink
+TEST [ -h $B0/${V0}0/source_creations_heal/slink ]
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1/dir2)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1/dir2
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1/dir2 ]
+
+r=$(get_file_type $B0/${V0}0/source_creations_heal/dir1)
+EXPECT "$r" get_file_type $B0/${V0}1/source_creations_heal/dir1
+TEST [ -d $B0/${V0}0/source_creations_heal/dir1 ]
+
+cd -
+
+cleanup
diff --git a/tests/basic/afr/gfid-heal.t b/tests/basic/afr/gfid-heal.t
new file mode 100644
index 00000000000..5e26e3307eb
--- /dev/null
+++ b/tests/basic/afr/gfid-heal.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+#gfid self-heal test on distributed replica. Make sure all the gfids are same
+#and the gfid exists on all the bricks
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function get_gfid_count {
+ getfattr -d -m. -e hex $B0/brick{0,1,2,3,4,5}/$1 2>&1 | grep trusted.gfid | grep -v gfid2path | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1,2,3,4,5}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST mkdir $B0/brick{0,1,2,3}/{0..9}
+sleep 2 #to prevent is_fresh_file code path
+TEST stat $M0/{0..9}
+EXPECT 6 get_gfid_count 0
+EXPECT 6 get_gfid_count 1
+EXPECT 6 get_gfid_count 2
+EXPECT 6 get_gfid_count 3
+EXPECT 6 get_gfid_count 4
+EXPECT 6 get_gfid_count 5
+EXPECT 6 get_gfid_count 6
+EXPECT 6 get_gfid_count 7
+EXPECT 6 get_gfid_count 8
+EXPECT 6 get_gfid_count 9
+cleanup;
diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-cli.t b/tests/basic/afr/gfid-mismatch-resolution-with-cli.t
new file mode 100644
index 00000000000..b739ddc49cc
--- /dev/null
+++ b/tests/basic/afr/gfid-mismatch-resolution-with-cli.t
@@ -0,0 +1,168 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+cd $M0
+
+##### Healing from latest mtime ######
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo "Sink based on mtime" > f1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "Source based on mtime" > f1
+
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f1)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1)
+TEST [ "$gfid_0" != "$gfid_1" ]
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#We know that first brick has the latest mtime
+LATEST_MTIME_MD5=$(md5sum $B0/${V0}0/f1 | awk '{print $1}')
+
+TEST $CLI volume heal $V0 split-brain latest-mtime /f1
+
+#gfid split-brain should be resolved
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1)
+TEST [ "$gfid_0" == "$gfid_1" ]
+
+#Heal the data and check the md5sum
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+HEALED_MD5=$(md5sum $B0/${V0}1/f1 | awk '{print $1}')
+TEST [ "$LATEST_MTIME_MD5" == "$HEALED_MD5" ]
+
+
+##### Healing from bigger file ######
+
+TEST mkdir test
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo "Bigger file" > test/f2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "Small file" > test/f2
+
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/test/f2)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/test/f2)
+TEST [ "$gfid_0" != "$gfid_1" ]
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#We know that second brick has the bigger file
+BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/test/f2 | awk '{print $1}')
+
+TEST $CLI volume heal $V0 split-brain bigger-file /test/f2
+
+#gfid split-brain should be resolved
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/test/f2)
+TEST [ "$gfid_0" == "$gfid_1" ]
+
+#Heal the data and check the md5sum
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+HEALED_MD5=$(md5sum $B0/${V0}0/test/f2 | awk '{print $1}')
+TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5" ]
+
+
+#Add one more brick, and heal.
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+
+##### Healing from source brick ######
+
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.quorum-type none
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo "We will consider these as sinks" > test/f3
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "We will take this as source" > test/f3
+
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/test/f3)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/test/f3)
+gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/test/f3)
+TEST [ "$gfid_0" != "$gfid_1" ]
+TEST [ "$gfid_1" == "$gfid_2" ]
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+#We will try to heal the split-brain with bigger file option.
+#It should fail, since we have same file size in bricks 1 & 2.
+EXPECT "No bigger file for file /test/f3" $CLI volume heal $V0 split-brain bigger-file /test/f3
+
+#Now heal from taking the brick 0 as the source
+SOURCE_MD5=$(md5sum $B0/${V0}0/test/f3 | awk '{print $1}')
+
+TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}0 /test/f3
+
+#gfid split-brain should be resolved
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/test/f3)
+gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/test/f3)
+TEST [ "$gfid_0" == "$gfid_1" ]
+TEST [ "$gfid_0" == "$gfid_2" ]
+
+#Heal the data and check the md5sum
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+HEALED_MD5_1=$(md5sum $B0/${V0}1/test/f3 | awk '{print $1}')
+HEALED_MD5_2=$(md5sum $B0/${V0}2/test/f3 | awk '{print $1}')
+TEST [ "$SOURCE_MD5" == "$HEALED_MD5_1" ]
+TEST [ "$SOURCE_MD5" == "$HEALED_MD5_2" ]
+
+cd -
+cleanup;
diff --git a/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
new file mode 100644
index 00000000000..35e295dc170
--- /dev/null
+++ b/tests/basic/afr/gfid-mismatch-resolution-with-fav-child-policy.t
@@ -0,0 +1,229 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+
+##### Healing with favorite-child-policy = mtime ######
+##### and self-heal-daemon ######
+
+TEST $CLI volume set $V0 favorite-child-policy mtime
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo "Sink based on mtime" > $M0/f1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "Source based on mtime" > $M0/f1
+
+#Gfids of file f1 on bricks 0 & 1 should differ
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f1)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1)
+TEST [ "$gfid_0" != "$gfid_1" ]
+
+TEST $CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#We know that first brick has the latest mtime
+LATEST_MTIME_MD5=$(md5sum $B0/${V0}0/f1 | cut -d\ -f1)
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#gfid split-brain should be resolved
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f1)
+TEST [ "$gfid_0" == "$gfid_1" ]
+
+HEALED_MD5=$(md5sum $B0/${V0}1/f1 | cut -d\ -f1)
+TEST [ "$LATEST_MTIME_MD5" == "$HEALED_MD5" ]
+
+TEST $CLI volume set $V0 self-heal-daemon off
+
+
+##### Healing with favorite-child-policy = ctime ######
+##### and self-heal-daemon ######
+
+#gfid split-brain resolution should work even when the granular-enrty-heal is
+#enabled
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+TEST $CLI volume set $V0 favorite-child-policy ctime
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "Sink based on ctime" > $M0/f2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo "Source based on ctime" > $M0/f2
+
+#Gfids of file f2 on bricks 0 & 1 should differ
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f2)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f2)
+TEST [ "$gfid_0" != "$gfid_1" ]
+
+TEST $CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#We know that second brick has the latest ctime
+LATEST_CTIME_MD5=$(md5sum $B0/${V0}1/f2 | cut -d\ -f1)
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#gfid split-brain should be resolved
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f2)
+TEST [ "$gfid_0" == "$gfid_1" ]
+
+HEALED_MD5=$(md5sum $B0/${V0}0/f2 | cut -d\ -f1)
+TEST [ "$LATEST_CTIME_MD5" == "$HEALED_MD5" ]
+
+
+#Add one more brick, and heal.
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST $CLI volume set $V0 self-heal-daemon off
+
+
+##### Healing using favorite-child-policy = size #####
+##### and client side heal #####
+
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+#Set the quorum-type to none, and create a gfid split brain
+TEST $CLI volume set $V0 cluster.quorum-type none
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "Smallest file" > $M0/f3
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "Second smallest file" > $M0/f3
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "Biggest among the three files" > $M0/f3
+
+#Bring back the down bricks.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+#Gfids of file f3 on all the bricks should differ
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f3)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f3)
+gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/f3)
+TEST [ "$gfid_0" != "$gfid_1" ]
+TEST [ "$gfid_0" != "$gfid_2" ]
+TEST [ "$gfid_1" != "$gfid_2" ]
+
+#We know that second brick has the bigger size file
+BIGGER_FILE_MD5=$(md5sum $B0/${V0}1/f3 | cut -d\ -f1)
+
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
+TEST cat $M0/f3 #Trigger data heal via readv inode refresh
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#gfid split-brain should be resolved
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f3)
+gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/f3)
+TEST [ "$gfid_0" == "$gfid_1" ]
+TEST [ "$gfid_2" == "$gfid_1" ]
+
+HEALED_MD5_1=$(md5sum $B0/${V0}0/f3 | cut -d\ -f1)
+HEALED_MD5_2=$(md5sum $B0/${V0}2/f3 | cut -d\ -f1)
+TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5_1" ]
+TEST [ "$BIGGER_FILE_MD5" == "$HEALED_MD5_2" ]
+
+
+##### Healing using favorite-child-policy = majority #####
+##### and client side heal #####
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "Does not agree with bricks 0 & 1" > $M0/f4
+
+TEST $CLI v start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "Agree on bricks 0 & 1" > $M0/f4
+
+#Gfids of file f4 on bricks 0 & 1 should be same and bricks 0 & 2 should differ
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/f4)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/f4)
+gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/f4)
+TEST [ "$gfid_0" == "$gfid_1" ]
+TEST [ "$gfid_0" != "$gfid_2" ]
+
+#We know that first and second bricks agree with each other. Pick any one of
+#them as source
+MAJORITY_MD5=$(md5sum $B0/${V0}0/f4 | cut -d\ -f1)
+
+#Bring back the down brick and heal.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST ls $M0 #Trigger entry heal via readdir inode refresh
+TEST cat $M0/f4 #Trigger data heal via readv inode refresh
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#gfid split-brain should be resolved
+gfid_2=$(gf_get_gfid_xattr $B0/${V0}2/f4)
+TEST [ "$gfid_0" == "$gfid_2" ]
+
+HEALED_MD5=$(md5sum $B0/${V0}2/f4 | cut -d\ -f1)
+TEST [ "$MAJORITY_MD5" == "$HEALED_MD5" ]
+
+cleanup;
diff --git a/tests/basic/afr/gfid-mismatch.t b/tests/basic/afr/gfid-mismatch.t
new file mode 100644
index 00000000000..fc15793cf5a
--- /dev/null
+++ b/tests/basic/afr/gfid-mismatch.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+#Test that GFID mismatches result in EIO
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+#Init
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+# We can't count on brick0 getting a copy of the file immediately without this,
+# because (especially with multiplexing) it might not have *come up*
+# immediately.
+TEST $CLI volume set $V0 cluster.quorum-type auto
+TEST $GFS --volfile-id=$V0 -s $H0 $M0;
+
+#Test
+TEST touch $M0/file
+TEST setfattr -n trusted.gfid -v 0sBfz5vAdHTEK1GZ99qjqTIg== $B0/brick0/file
+TEST ! "find $M0/file"
+TEST ! "stat $M0/file"
+
+#Cleanup
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+TEST rm -rf $B0/*
diff --git a/tests/basic/afr/gfid-self-heal.t b/tests/basic/afr/gfid-self-heal.t
new file mode 100644
index 00000000000..5a530681186
--- /dev/null
+++ b/tests/basic/afr/gfid-self-heal.t
@@ -0,0 +1,145 @@
+#!/bin/bash
+
+#Tests for files without gfids
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 nfs.disable on
+TEST touch $B0/${V0}{0,1}/{1,2,3,4}
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+#Test that readdir returns entries even when no gfids are present
+EXPECT 4 echo $(ls $M0 | grep -v '^\.' | wc -l)
+sleep 2;
+#stat the files and check that the files have same gfids on the bricks now
+TEST stat $M0/1
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/1)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/1)
+TEST "[[ ! -z $gfid_0 ]]"
+EXPECT $gfid_0 echo $gfid_1
+
+TEST stat $M0/2
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/2)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/2)
+TEST "[[ ! -z $gfid_0 ]]"
+EXPECT $gfid_0 echo $gfid_1
+
+TEST stat $M0/3
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/3)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/3)
+TEST "[[ ! -z $gfid_0 ]]"
+EXPECT $gfid_0 echo $gfid_1
+
+TEST stat $M0/4
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/4)
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/4)
+TEST "[[ ! -z $gfid_0 ]]"
+EXPECT $gfid_0 echo $gfid_1
+
+#Check gfid self-heal happens from one brick to other when a file has missing
+#gfid
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/a
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/a)
+TEST touch $B0/${V0}0/a
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 1
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST stat $M0/a
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/a)
+EXPECT $gfid_1 echo $gfid_0
+
+#Check gfid self-heal doesn't happen from one brick to other when type mismatch
+#is present for a name, without any xattrs
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/b
+TEST mkdir $B0/${V0}0/b
+TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 1
+$CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST ! stat $M0/b
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/b)
+TEST "[[ -z \"$gfid_0\" ]]"
+
+#Check gfid assigning doesn't happen when there is type mismatch
+TEST touch $B0/${V0}1/c
+TEST mkdir $B0/${V0}0/c
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 1
+TEST ! stat $M0/c
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/c)
+gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/c)
+TEST "[[ -z \"$gfid_1\" ]]"
+TEST "[[ -z \"$gfid_0\" ]]"
+
+#Check gfid assigning doesn't happen only when even one brick is down to prevent
+# gfid split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $B0/${V0}1/d
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 1
+TEST ! stat $M0/d
+gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/d)
+TEST "[[ -z \"$gfid_1\" ]]"
+TEST $CLI volume start $V0 force;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#Check gfid self-heal doesn't happen from one brick to other when type mismatch
+#is present for a name without any pending xattrs
+#TEST kill_brick $V0 $H0 $B0/${V0}0
+#TEST touch $M0/e
+#TEST $CLI volume start $V0 force;
+#EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+#TEST kill_brick $V0 $H0 $B0/${V0}1
+#TEST mkdir $M0/e
+#TEST $CLI volume stop $V0 force;
+#TEST setfattr -x trusted.gfid $B0/${V0}1/e
+#TEST setfattr -x trusted.gfid $B0/${V0}0/e
+#TEST $CLI volume set $V0 cluster.entry-self-heal off
+#$CLI volume start $V0 force
+#EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+#EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+#TEST ! stat $M0/e
+#gfid_1=$(gf_get_gfid_xattr $B0/${V0}1/e)
+#gfid_0=$(gf_get_gfid_xattr $B0/${V0}0/e)
+#TEST "[[ -z \"$gfid_1\" ]]"
+#TEST "[[ -z \"$gfid_0\" ]]"
+
+#Check if lookup fails with gfid-mismatch of a file
+#is present for a name without any pending xattrs
+#TEST kill_brick $V0 $H0 $B0/${V0}0
+#TEST touch $M0/f
+#$CLI volume start $V0 force
+#EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+#TEST kill_brick $V0 $H0 $B0/${V0}1
+#TEST touch $M0/f
+#simulate no pending changelog
+#$CLI volume stop $V0 force
+#TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
+#TEST setfattr -x trusted.afr.$V0-client-1 $B0/${V0}0
+#$CLI volume start $V0 force
+#EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+#EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+#TEST ! stat $M0/f
+
+cleanup
diff --git a/tests/basic/afr/granular-esh/add-brick.t b/tests/basic/afr/granular-esh/add-brick.t
new file mode 100644
index 00000000000..270cf1d32a6
--- /dev/null
+++ b/tests/basic/afr/granular-esh/add-brick.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../afr.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Create files
+for i in {1..5}
+do
+ echo $i > $M0/file$i.txt
+done
+
+# Metadata changes
+TEST setfattr -n user.test -v qwerty $M0/file5.txt
+
+# Add brick1
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
+
+# New-brick should accuse the old-bricks (Simulating case for data-loss)
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}2/
+
+# Check if pending data, metadata and entry xattrs are set for newly-added-brick
+EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
+EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+
+# Also ensure we are not mistakenly tampering with the new-brick's changelog xattrs
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2
+
+# Check if dirty xattr is set for newly-added-brick
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}2
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Check if entry-heal has happened
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}2 | sort)
+TEST diff <(ls $B0/${V0}1 | sort) <(ls $B0/${V0}2 | sort)
+
+# Test if data was healed
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}2/file1.txt
+
+# Test if metadata was healed and exists on both the bricks
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}2/file5.txt
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt
+
+# Ensure all changelog xattrs are now back to zero.
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}2
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2
+
+cleanup
diff --git a/tests/basic/afr/granular-esh/cli.t b/tests/basic/afr/granular-esh/cli.t
new file mode 100644
index 00000000000..10b6c6398da
--- /dev/null
+++ b/tests/basic/afr/granular-esh/cli.t
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../afr.rc
+
+cleanup
+
+TESTS_EXPECTED_IN_LOOP=4
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+# Test that enabling the option should work on a newly created volume
+TEST $CLI volume set $V0 cluster.granular-entry-heal on
+TEST $CLI volume set $V0 cluster.granular-entry-heal off
+
+#########################
+##### DISPERSE TEST #####
+#########################
+# Execute the same command on a disperse volume and make sure it fails.
+TEST $CLI volume create $V1 disperse 3 redundancy 1 $H0:$B0/${V1}{0,1,2}
+TEST $CLI volume start $V1
+TEST ! $CLI volume heal $V1 granular-entry-heal enable
+TEST ! $CLI volume heal $V1 granular-entry-heal disable
+
+######################
+### REPLICATE TEST ###
+######################
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 self-heal-daemon off
+# Test that the volume-set way of enabling the option is disallowed
+TEST ! $CLI volume set $V0 granular-entry-heal on
+# Test that the volume-heal way of enabling the option is allowed
+TEST $CLI volume heal $V0 granular-entry-heal enable
+# Volume-reset of the option should be allowed
+TEST $CLI volume reset $V0 granular-entry-heal
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+EXPECT "enable" volume_option $V0 cluster.granular-entry-heal
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Kill brick-0.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+# Disabling the option should work even when one or more bricks are down
+TEST $CLI volume heal $V0 granular-entry-heal disable
+# When a brick is down, 'enable' attempt should be failed
+TEST ! $CLI volume heal $V0 granular-entry-heal enable
+
+# Restart the killed brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+# When all bricks are up, it should be possible to enable the option
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+# Kill brick-0 again
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+# Create files under root
+for i in {1..2}
+do
+ echo $i > $M0/f$i
+done
+
+# Test that the index associated with '/' is created on B1.
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID
+
+# Check for successful creation of granular entry indices
+for i in {1..2}
+do
+ TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f$i
+done
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST gluster volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Test if data was healed
+for i in {1..2}
+do
+ TEST_IN_LOOP diff $B0/${V0}0/f$i $B0/${V0}1/f$i
+done
+
+# Now verify that there are no name indices left after self-heal
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID
+
+# Perform a volume-reset-all-options operation
+TEST $CLI volume reset $V0
+# Ensure that granular entry heal is also disabled
+EXPECT "no" volume_get_field $V0 cluster.granular-entry-heal
+EXPECT "off" volume_get_field $V0 cluster.entry-self-heal
+
+cleanup
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1399038
diff --git a/tests/basic/afr/granular-esh/conservative-merge.t b/tests/basic/afr/granular-esh/conservative-merge.t
new file mode 100644
index 00000000000..b170e47e0cb
--- /dev/null
+++ b/tests/basic/afr/granular-esh/conservative-merge.t
@@ -0,0 +1,138 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../afr.rc
+
+cleanup
+
+TESTS_EXPECTED_IN_LOOP=4
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+TEST $GFS --volfile-id=$V0 -s $H0 $M0
+
+TEST mkdir $M0/dir
+gfid_dir=$(get_gfid_string $M0/dir)
+
+echo "1" > $M0/f1
+echo "2" > $M0/f2
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+TEST mkdir $M0/dir2
+gfid_dir2=$(get_gfid_string $M0/dir2)
+TEST unlink $M0/f1
+echo "3" > $M0/f3
+TEST mkdir $M0/dir/subdir
+gfid_subdir=$(get_gfid_string $M0/dir/subdir)
+echo "dir2-1" > $M0/dir2/f1
+echo "subdir-1" > $M0/dir/subdir/f1
+
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/dir2
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f1
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f3
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir2
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir2/f1
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir/subdir
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_subdir
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_subdir/f1
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+TEST mkdir $M0/dir3
+gfid_dir3=$(get_gfid_string $M0/dir3)
+# Root is now in split-brain.
+TEST mkdir $M0/dir/subdir2
+gfid_subdir2=$(get_gfid_string $M0/dir/subdir2)
+# /dir is now in split-brain.
+echo "4" > $M0/f4
+TEST unlink $M0/f2
+echo "dir3-1" > $M0/dir3/f1
+echo "subdir2-1" > $M0/dir/subdir2/f1
+
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$ROOT_GFID
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$ROOT_GFID/dir3
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$ROOT_GFID/f2
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$ROOT_GFID/f4
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$gfid_dir
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$gfid_dir/subdir2
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$gfid_dir3
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$gfid_dir3/f1
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$gfid_subdir2
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$gfid_subdir2/f1
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Make sure entry self-heal did the right thing in terms of impunging deleted
+# files in the event of a split-brain.
+TEST stat $M0/f1
+TEST stat $M0/f2
+
+# Test if data was healed
+for i in {1..4}
+do
+ TEST_IN_LOOP diff $B0/${V0}0/f$i $B0/${V0}1/f$i
+done
+
+TEST diff $B0/${V0}0/dir2/f1 $B0/${V0}1/dir2/f1
+EXPECT "dir2-1" cat $M0/dir2/f1
+
+TEST diff $B0/${V0}0/dir/subdir/f1 $B0/${V0}1/dir/subdir/f1
+EXPECT "subdir-1" cat $M0/dir/subdir/f1
+
+TEST diff $B0/${V0}0/dir3/f1 $B0/${V0}1/dir3/f1
+EXPECT "dir3-1" cat $M0/dir3/f1
+
+TEST diff $B0/${V0}0/dir/subdir2/f1 $B0/${V0}1/dir/subdir2/f1
+EXPECT "subdir2-1" cat $M0/dir/subdir2/f1
+
+# Verify that all name indices have been removed after a successful heal.
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/dir2
+TEST ! stat $B0/${V0}0/.glusterfs/indices/entry-changes/$ROOT_GFID/dir3
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f1
+TEST ! stat $B0/${V0}0/.glusterfs/indices/entry-changes/$ROOT_GFID/f2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f3
+TEST ! stat $B0/${V0}0/.glusterfs/indices/entry-changes/$ROOT_GFID/f4
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir2/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir/subdir
+TEST ! stat $B0/${V0}0/.glusterfs/indices/entry-changes/$gfid_dir/subdir2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_subdir/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir3/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_subdir2/f1
+
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID
+TEST ! stat $B0/${V0}0/.glusterfs/indices/entry-changes/$ROOT_GFID
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir
+TEST ! stat $B0/${V0}0/.glusterfs/indices/entry-changes/$gfid_dir
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_subdir
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir3
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_subdir2
+
+cleanup
diff --git a/tests/basic/afr/granular-esh/granular-esh.t b/tests/basic/afr/granular-esh/granular-esh.t
new file mode 100644
index 00000000000..de0e8f4290b
--- /dev/null
+++ b/tests/basic/afr/granular-esh/granular-esh.t
@@ -0,0 +1,168 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../afr.rc
+
+cleanup
+
+TESTS_EXPECTED_IN_LOOP=12
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Create files under root
+for i in {1..4}
+do
+ echo $i > $M0/f$i
+done
+
+# Create a directory and few files under it
+TEST mkdir $M0/dir
+gfid_dir=$(get_gfid_string $M0/dir)
+
+for i in {1..3}
+do
+ echo $i > $M0/dir/f$i
+done
+
+# Kill brick-0.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+# Create more files
+for i in {5..6}
+do
+ echo $i > $M0/f$i
+done
+
+# Test that the index associated with '/' is created on B1.
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID
+
+# Check for successful creation of granular entry indices
+for i in {5..6}
+do
+ TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f$i
+done
+
+# Delete an existing file
+TEST unlink $M0/f1
+
+# Rename an existing file
+TEST mv $M0/f2 $M0/f2_renamed
+
+# Create a hard link on f3
+TEST ln $M0/f3 $M0/link
+
+# Create a symlink on f4
+TEST ln -s $M0/f4 $M0/symlink
+
+# Check for successful creation of granular entry indices
+for i in {1..2}
+do
+ TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f$i
+done
+
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f2_renamed
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/link
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/symlink
+
+# Create a file and also delete it. This is to test deletion of stale indices during heal.
+TEST touch $M0/file_stale
+TEST unlink $M0/file_stale
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/file_stale
+
+# Create a directory and create its subdirs and files while a brick is down
+TEST mkdir -p $M0/newdir/newsubdir
+
+for i in {1..3}
+do
+ echo $i > $M0/newdir/f$i
+ echo $i > $M0/newdir/newsubdir/f$i
+done
+
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/newdir
+gfid_newdir=$(get_gfid_string $M0/newdir)
+gfid_newsubdir=$(get_gfid_string $M0/newdir/newsubdir)
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newdir/newsubdir
+# Check if 'data' segment of the changelog is set for the newly created directories 'newdir' and 'newsubdir'
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}1/newdir trusted.afr.$V0-client-0 data
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}1/newdir/newsubdir trusted.afr.$V0-client-0 data
+
+# Test that removal of an entire sub-tree in the hierarchy works.
+TEST rm -rf $M0/dir
+
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/dir
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir/f1
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir/f2
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir/f3
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST gluster volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Test if data was healed
+for i in {5..6}
+do
+ TEST_IN_LOOP diff $B0/${V0}0/f$i $B0/${V0}1/f$i
+done
+
+for i in {1..3}
+do
+ TEST_IN_LOOP diff $B0/${V0}0/newdir/f$i $B0/${V0}1/newdir/f$i
+ TEST_IN_LOOP diff $B0/${V0}0/newdir/newsubdir/f$i $B0/${V0}1/newdir/newsubdir/f$i
+done
+
+# Verify that all the deleted names have been removed on the sink brick too by self-heal.
+TEST ! stat $B0/${V0}0/f1
+TEST ! stat $B0/${V0}0/f2
+TEST stat $B0/${V0}0/f2_renamed
+TEST stat $B0/${V0}0/symlink
+EXPECT "3" get_hard_link_count $B0/${V0}0/f3
+EXPECT "f4" readlink $B0/${V0}0/symlink
+TEST ! stat $B0/${V0}0/dir
+
+# Now verify that there are no name indices left after self-heal
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f2_renamed
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/link
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/symlink
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/dir
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/newdir
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/file_stale
+
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir/f2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir/f3
+
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newdir/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newdir/f2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newdir/f3
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newsubdir/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newsubdir/f2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newsubdir/f3
+
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newdir
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_newsubdir
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$gfid_dir
+
+cleanup
diff --git a/tests/basic/afr/granular-esh/granular-indices-but-non-granular-heal.t b/tests/basic/afr/granular-esh/granular-indices-but-non-granular-heal.t
new file mode 100644
index 00000000000..1b5421bf4b6
--- /dev/null
+++ b/tests/basic/afr/granular-esh/granular-indices-but-non-granular-heal.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../afr.rc
+
+cleanup
+
+TESTS_EXPECTED_IN_LOOP=4
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Kill brick-0.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+# Create files under root
+for i in {1..2}
+do
+ echo $i > $M0/f$i
+done
+
+# Test that the index associated with '/' is created on B1.
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID
+
+# Check for successful creation of granular entry indices
+for i in {1..2}
+do
+ TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f$i
+done
+
+# Now disable granular-entry-heal
+TEST $CLI volume heal $V0 granular-entry-heal disable
+
+# Start the brick that was down
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+# Enable shd
+TEST gluster volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+# Now the indices created are granular but the heal is going to be of the
+# normal kind. We test to make sure that heal still completes fine and that
+# the stale granular indices are going to be deleted
+
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Test if data was healed
+for i in {1..2}
+do
+ TEST_IN_LOOP diff $B0/${V0}0/f$i $B0/${V0}1/f$i
+done
+
+# Now verify that there are no name indices left after self-heal
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f1
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID/f2
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$ROOT_GFID
+
+cleanup
diff --git a/tests/basic/afr/granular-esh/replace-brick.t b/tests/basic/afr/granular-esh/replace-brick.t
new file mode 100644
index 00000000000..5fc7811a8d8
--- /dev/null
+++ b/tests/basic/afr/granular-esh/replace-brick.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+# Create files
+for i in {1..5}
+do
+ echo $i > $M0/file$i.txt
+done
+
+# Metadata changes
+TEST setfattr -n user.test -v qwerty $M0/file5.txt
+
+# Replace brick1
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit force
+
+# Replaced-brick should accuse the non-replaced-brick (Simulating case for data-loss)
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/
+
+# Check if data, metadata and entry segments of changelog are set for replaced-brick
+EXPECT "000000010000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+
+# Also ensure we don't mistakenly tamper with the new brick's changelog xattrs
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1_new
+
+# Ensure the dirty xattr is set on the new brick.
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+# Check if entry-heal has happened
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1_new | sort)
+
+# To make sure that files were not lost from brick0
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort)
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+
+# Test if data was healed
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}1_new/file1.txt
+# To make sure that data was not lost from brick0
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}1/file1.txt
+
+# Test if metadata was healed and exists on both the bricks
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}1_new/file5.txt
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt
+
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1_new
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new
+
+cleanup
diff --git a/tests/basic/afr/halo.t b/tests/basic/afr/halo.t
new file mode 100644
index 00000000000..3f61f5a0402
--- /dev/null
+++ b/tests/basic/afr/halo.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+#Tests that halo basic functionality works as expected
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+function get_up_child()
+{
+ if [ "1" == $(afr_private_key_value $V0 $M0 0 "child_up\[0\]") ];
+ then
+ echo 0
+ elif [ "1" == $(afr_private_key_value $V0 $M0 0 "child_up\[1\]") ]
+ then
+ echo 1
+ fi
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.halo-enabled yes
+TEST $CLI volume set $V0 cluster.halo-max-replicas 1
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[0\]"
+EXPECT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[1\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[0\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[1\]"
+
+up_id=$(get_up_child)
+TEST [[ ! -z "$up_id" ]]
+
+down_id=$((1-up_id))
+
+TEST kill_brick $V0 $H0 $B0/${V0}${up_id}
+#As max-replicas is configured to be 1, down_child should be up now
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${down_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "child_up\[${down_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "child_up\[${up_id}\]"
+EXPECT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[${up_id}\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[${down_id}\]"
+
+#Bring the brick back up and the state should be restored
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]"
+
+up_id=$(get_up_child)
+TEST [[ ! -z "$up_id" ]]
+down_id=$((1-up_id))
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${down_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" afr_private_key_value $V0 $M0 0 "child_up\[${down_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "halo_child_up\[${up_id}\]"
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" afr_private_key_value $V0 $M0 0 "child_up\[${up_id}\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[0\]"
+EXPECT_NOT "^-1$" afr_private_key_value $V0 $M0 0 "child_latency\[1\]"
+
+cleanup;
diff --git a/tests/basic/afr/heal-info.t b/tests/basic/afr/heal-info.t
new file mode 100644
index 00000000000..46d65007c88
--- /dev/null
+++ b/tests/basic/afr/heal-info.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+#Test that parallel heal-info command execution doesn't result in spurious
+#entries with locking-scheme granular
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function heal_info_to_file {
+ while [ -f $M0/b.txt ]; do
+ $CLI volume heal $V0 info | grep -i number | grep -v 0 >> $1
+ done
+}
+
+function write_and_del_file {
+ dd of=$M0/a.txt if=/dev/zero bs=1024k count=100
+ rm -f $M0/b.txt
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 locking-scheme granular
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/a.txt $M0/b.txt
+write_and_del_file &
+touch $B0/f1 $B0/f2
+heal_info_to_file $B0/f1 &
+heal_info_to_file $B0/f2 &
+wait
+EXPECT "^$" cat $B0/f1
+EXPECT "^$" cat $B0/f2
+
+cleanup;
diff --git a/tests/basic/afr/heal-quota.t b/tests/basic/afr/heal-quota.t
new file mode 100644
index 00000000000..96e23363da8
--- /dev/null
+++ b/tests/basic/afr/heal-quota.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+#This file tests that heal succeeds even when quota is exceeded
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 10MB
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 hard-timeout 0
+
+TEST touch $M0/a $M0/b
+dd if=/dev/zero of=$M0/b bs=1M count=7
+TEST kill_brick $V0 $H0 $B0/${V0}0
+dd if=/dev/zero of=$M0/a bs=1M count=12 #This shall fail
+TEST $CLI volume start $V0 force
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+cleanup
diff --git a/tests/basic/afr/inodelk.t b/tests/basic/afr/inodelk.t
new file mode 100644
index 00000000000..a32aa8531b5
--- /dev/null
+++ b/tests/basic/afr/inodelk.t
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+#This test tests that inodelk fails when quorum is not met. Also tests the
+#success case where inodelk is obtained and unlocks are done correctly.
+
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 $M0
+
+#Test success case
+TEST mkdir $M0/dir1
+TEST mv $M0/dir1 $M0/dir2
+
+#If there is a problem with inodelk unlocking the following would hang.
+TEST mv $M0/dir2 $M0/dir1
+
+#Test failure case by bringing two of the bricks down
+#Test that the directory is not moved partially on some bricks but successful
+#on other subvol where quorum meets. Do that for both set of bricks
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! mv $M0/dir1 $M0/dir2
+
+TEST stat $B0/${V0}0/dir1
+TEST stat $B0/${V0}1/dir1
+TEST stat $B0/${V0}2/dir1
+TEST stat $B0/${V0}3/dir1
+TEST stat $B0/${V0}4/dir1
+TEST stat $B0/${V0}5/dir1
+TEST ! stat $B0/${V0}0/dir2
+TEST ! stat $B0/${V0}1/dir2
+TEST ! stat $B0/${V0}2/dir2
+TEST ! stat $B0/${V0}3/dir2
+TEST ! stat $B0/${V0}4/dir2
+TEST ! stat $B0/${V0}5/dir2
+
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST kill_brick $V0 $H0 $B0/${V0}4
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST ! mv $M0/dir1 $M0/dir2
+TEST stat $B0/${V0}0/dir1
+TEST stat $B0/${V0}1/dir1
+TEST stat $B0/${V0}2/dir1
+TEST stat $B0/${V0}3/dir1
+TEST stat $B0/${V0}4/dir1
+TEST stat $B0/${V0}5/dir1
+TEST ! stat $B0/${V0}0/dir2
+TEST ! stat $B0/${V0}1/dir2
+TEST ! stat $B0/${V0}2/dir2
+TEST ! stat $B0/${V0}3/dir2
+TEST ! stat $B0/${V0}4/dir2
+TEST ! stat $B0/${V0}5/dir2
+
+#Bring the bricks back up and try mv once more, it should succeed.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 4
+TEST mv $M0/dir1 $M0/dir2
+cleanup;
+#Do similar tests on replica 2
+TEST glusterd;
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..3}
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 $M0
+TEST mkdir $M0/dir1
+TEST mv $M0/dir1 $M0/dir2
+#Because we don't know hashed subvol, do the same test twice bringing 1 brick
+#from each down, quorum calculation should allow it.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/dir2 $M0/dir1
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST mv $M0/dir1 $M0/dir2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST mv $M0/dir2 $M0/dir1
+cleanup
diff --git a/tests/basic/afr/lk-quorum.t b/tests/basic/afr/lk-quorum.t
new file mode 100644
index 00000000000..3364d8a6a1b
--- /dev/null
+++ b/tests/basic/afr/lk-quorum.t
@@ -0,0 +1,257 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=300
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd
+
+#Tests for quorum-type option for replica 2
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1};
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0;
+
+TEST touch $M0/a
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#Check locking behavior with quorum 'fixed' and quorum-count 2
+TEST $CLI volume set $V0 cluster.quorum-type fixed
+TEST $CLI volume set $V0 cluster.quorum-count 2
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^fixed$" mount_get_option_value $M0 $V0-replicate-0 quorum-type
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^2$" mount_get_option_value $M0 $V0-replicate-0 quorum-count
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#When any of the bricks is down lock/unlock should fail
+#kill first brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST fd_close $fd1
+
+#kill 2nd brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#Check locking behavior with quorum 'fixed' and quorum-count 1
+TEST $CLI volume set $V0 cluster.quorum-count 1
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^1$" mount_get_option_value $M0 $V0-replicate-0 quorum-count
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#When any of the bricks is down lock/unlock should succeed
+#kill first brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST fd_close $fd1
+
+#kill 2nd brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#Check locking behavior with quorum 'auto'
+TEST $CLI volume set $V0 cluster.quorum-type auto
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^auto$" mount_get_option_value $M0 $V0-replicate-0 quorum-type
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#When first brick is down lock/unlock should fail
+#kill first brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST fd_close $fd1
+
+#When second brick is down lock/unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+#Tests for replica 3
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0;
+
+TEST touch $M0/a
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST fd_close $fd1
+
+#When any of the bricks is down lock/unlock should succeed
+#kill first brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST fd_close $fd1
+
+#kill 2nd brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#kill 3rd brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST fd_close $fd1
+
+#When any two of the bricks are down lock/unlock should fail
+#kill first,second bricks
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST fd_close $fd1
+
+#kill 2nd,3rd bricks
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST fd_close $fd1
+
+#kill 1st,3rd brick
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST fd_close $fd1
+
+cleanup
diff --git a/tests/basic/afr/metadata-self-heal.t b/tests/basic/afr/metadata-self-heal.t
new file mode 100644
index 00000000000..275aecd2175
--- /dev/null
+++ b/tests/basic/afr/metadata-self-heal.t
@@ -0,0 +1,133 @@
+#!/bin/bash
+#Self-heal tests
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+function is_heal_done {
+ local f1_path="${1}/${3}"
+ local f2_path="${2}/${3}"
+ local zero_xattr="000000000000000000000000"
+ local iatt1=$(stat -c "%g:%u:%A" $f1_path)
+ local iatt2=$(stat -c "%g:%u:%A" $f2_path)
+ local diff="1"
+ if [ "$iatt1" == "$iatt2" ]; then diff=0; fi
+ local xattr11=$(get_hex_xattr trusted.afr.$V0-client-0 $f1_path)
+ local xattr12=$(get_hex_xattr trusted.afr.$V0-client-1 $f1_path)
+ local xattr21=$(get_hex_xattr trusted.afr.$V0-client-0 $f2_path)
+ local xattr22=$(get_hex_xattr trusted.afr.$V0-client-1 $f2_path)
+ local dirty1=$(get_hex_xattr trusted.afr.dirty $f1_path)
+ local dirty2=$(get_hex_xattr trusted.afr.dirty $f2_path)
+ if [ -z $xattr11 ]; then xattr11="000000000000000000000000"; fi
+ if [ -z $xattr12 ]; then xattr12="000000000000000000000000"; fi
+ if [ -z $xattr21 ]; then xattr21="000000000000000000000000"; fi
+ if [ -z $xattr22 ]; then xattr22="000000000000000000000000"; fi
+ if [ -z $dirty1 ]; then dirty1="000000000000000000000000"; fi
+ if [ -z $dirty2 ]; then dirty2="000000000000000000000000"; fi
+ if [ "${diff}${xattr11}${xattr12}${xattr21}${xattr22}${dirty1}${dirty2}" == "0${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}${zero_xattr}" ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function print_pending_heals {
+ local result=":"
+ for i in "$@";
+ do
+ if [ "N" == $(is_heal_done $B0/brick0 $B0/brick1 $i) ];
+ then
+ result="$result:$i"
+ fi
+ done
+#To prevent any match for EXPECT_WITHIN, print a char non-existent in file-names
+ if [ $result == ":" ]; then result="~"; fi
+ echo $result
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+cd $M0
+
+TEST touch a
+
+#Test heal with pending xattrs
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST chmod 777 a
+TEST chown 100:100 a
+TEST setfattr -n trusted.abc -v 0x616263 a
+TEST setfattr -n trusted.def -v 0x646566 a
+permissions=$(stat -c "%A" a)
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals a
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+EXPECT $permissions stat -c "%A" a
+EXPECT $permissions stat -c "%A" $B0/brick0/a
+EXPECT $permissions stat -c "%A" $B0/brick1/a
+
+EXPECT 100 stat -c "%g" a
+EXPECT 100 stat -c "%g" $B0/brick0/a
+EXPECT 100 stat -c "%g" $B0/brick1/a
+
+EXPECT 100 stat -c "%u" a
+EXPECT 100 stat -c "%u" $B0/brick0/a
+EXPECT 100 stat -c "%u" $B0/brick1/a
+
+EXPECT 616263 get_hex_xattr trusted.abc a
+EXPECT 616263 get_hex_xattr trusted.abc $B0/brick0/a
+EXPECT 616263 get_hex_xattr trusted.abc $B0/brick1/a
+
+EXPECT 646566 get_hex_xattr trusted.def a
+EXPECT 646566 get_hex_xattr trusted.def $B0/brick0/a
+EXPECT 646566 get_hex_xattr trusted.def $B0/brick1/a
+
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST setfattr -n trusted.abc -v 0x646566 a
+TEST setfattr -x trusted.def a
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals a
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+EXPECT 646566 get_hex_xattr trusted.abc a
+EXPECT 646566 get_hex_xattr trusted.abc $B0/brick0/a
+EXPECT 646566 get_hex_xattr trusted.abc $B0/brick1/a
+
+TEST ! getfattr -n trusted.def a
+TEST ! getfattr -n trusted.def $B0/brick0/a
+TEST ! getfattr -n trusted.def $B0/brick1/a
+
+#Test split-brain && iatt mismatch without any xattrs (this will be simulated)
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST touch b
+TEST touch c
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST chmod 777 b
+TEST chmod 777 c
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST chown 100:100 b
+TEST chown 100:100 c
+TEST $CLI volume stop $V0
+TEST setfattr -x trusted.afr.$V0-client-0 $B0/brick1/c
+TEST setfattr -x trusted.afr.$V0-client-1 $B0/brick0/c
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "~" print_pending_heals c
+EXPECT "N" is_heal_done $B0/brick0 $B0/brick1 b
+
+cd -
+cleanup;
diff --git a/tests/basic/afr/name-self-heal.t b/tests/basic/afr/name-self-heal.t
new file mode 100644
index 00000000000..50fc2ecc6c2
--- /dev/null
+++ b/tests/basic/afr/name-self-heal.t
@@ -0,0 +1,112 @@
+#!/bin/bash
+#Self-heal tests
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+#Check that when quorum is not enabled name-heal happens correctly
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST touch $M0/a
+TEST touch $M0/c
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST touch $M0/b
+TEST rm -f $M0/a
+TEST rm -f $M0/c
+TEST touch $M0/c #gfid mismatch case
+c_gfid=$(gf_get_gfid_xattr $B0/brick1/c)
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST ! stat $M0/a
+TEST ! stat $B0/brick0/a
+TEST ! stat $B0/brick1/a
+
+TEST stat $M0/b
+TEST stat $B0/brick0/b
+TEST stat $B0/brick1/b
+TEST [[ "$(gf_get_gfid_xattr $B0/brick0/b)" == "$(gf_get_gfid_xattr $B0/brick1/b)" ]]
+
+TEST stat $M0/c
+TEST stat $B0/brick0/c
+TEST stat $B0/brick1/c
+TEST [[ "$(gf_get_gfid_xattr $B0/brick0/c)" == "$c_gfid" ]]
+
+cleanup;
+
+#Check that when quorum is enabled name-heal happens as expected
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST touch $M0/a
+TEST touch $M0/c
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST touch $M0/b
+TEST rm -f $M0/a
+TEST rm -f $M0/c
+TEST touch $M0/c #gfid mismatch case
+c_gfid=$(gf_get_gfid_xattr $B0/brick1/c)
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST ! stat $M0/a
+TEST ! stat $B0/brick0/a
+TEST ! stat $B0/brick1/a
+TEST ! stat $B0/brick2/a
+
+TEST stat $M0/b
+TEST ! stat $B0/brick0/b #Name heal shouldn't be triggered
+TEST stat $B0/brick1/b
+TEST stat $B0/brick2/b
+
+TEST stat $M0/c
+TEST stat $B0/brick0/c
+TEST stat $B0/brick1/c
+TEST stat $B0/brick2/c
+TEST [[ "$(gf_get_gfid_xattr $B0/brick0/c)" == "$c_gfid" ]]
+
+TEST $CLI volume set $V0 cluster.quorum-type none
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "none" get_quorum_type $M0 $V0 0
+TEST stat $M0/b
+TEST stat $B0/brick0/b #Name heal should be triggered
+TEST stat $B0/brick1/b
+TEST stat $B0/brick2/b
+TEST [[ "$(gf_get_gfid_xattr $B0/brick0/b)" == "$(gf_get_gfid_xattr $B0/brick1/b)" ]]
+TEST $CLI volume set $V0 cluster.quorum-type auto
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "auto" get_quorum_type $M0 $V0 0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#Missing parent xattrs cases
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST $CLI volume heal $V0 disable
+#In cases where a good parent doesn't have pending xattrs and a file,
+#name-heal will be triggered
+TEST gf_rm_file_and_gfid_link $B0/brick1 c
+TEST stat $M0/c
+TEST stat $B0/brick0/c
+TEST stat $B0/brick1/c
+TEST stat $B0/brick2/c
+TEST [[ "$(gf_get_gfid_xattr $B0/brick0/c)" == "$c_gfid" ]]
+cleanup
diff --git a/tests/basic/afr/quorum.t b/tests/basic/afr/quorum.t
new file mode 100644
index 00000000000..58116ba49f5
--- /dev/null
+++ b/tests/basic/afr/quorum.t
@@ -0,0 +1,97 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd
+
+function test_write {
+ dd of=$M0/a if=/dev/urandom bs=1k count=1 2>&1 > /dev/null
+}
+
+#Tests for quorum-type option for replica 2
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0;
+
+touch $M0/a
+echo abc > $M0/b
+
+TEST ! $CLI volume set $V0 cluster.quorum-type ""
+TEST $CLI volume set $V0 cluster.quorum-type fixed
+EXPECT fixed volume_option $V0 cluster.quorum-type
+TEST $CLI volume set $V0 cluster.quorum-count 2
+TEST test_write
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! test_write
+TEST ! cat $M0/b
+
+TEST $CLI volume set $V0 cluster.quorum-type auto
+EXPECT auto volume_option $V0 cluster.quorum-type
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST test_write
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! test_write
+TEST ! cat $M0/b
+
+TEST $CLI volume set $V0 cluster.quorum-type none
+EXPECT none volume_option $V0 cluster.quorum-type
+TEST test_write
+#Default is 'none' for even number of bricks in replication
+TEST $CLI volume reset $V0 cluster.quorum-type
+TEST test_write
+EXPECT "abc" cat $M0/b
+
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+#Tests for quorum-type option for replica 3
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3};
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0;
+
+touch $M0/a
+echo abc > $M0/b
+
+TEST $CLI volume set $V0 cluster.quorum-type fixed
+EXPECT fixed volume_option $V0 cluster.quorum-type
+TEST $CLI volume set $V0 cluster.quorum-count 3
+TEST test_write
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! test_write
+TEST ! cat $M0/b
+
+TEST $CLI volume set $V0 cluster.quorum-type auto
+EXPECT auto volume_option $V0 cluster.quorum-type
+TEST test_write
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST ! test_write
+TEST ! cat $M0/b
+
+TEST $CLI volume set $V0 cluster.quorum-type none
+EXPECT none volume_option $V0 cluster.quorum-type
+TEST test_write
+#Default is 'auto' for odd number of bricks in replication
+TEST $CLI volume reset $V0 cluster.quorum-type
+EXPECT "^$" volume_option $V0 cluster.quorum-type
+TEST ! test_write
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST test_write
+cleanup;
diff --git a/tests/basic/afr/read-subvol-data.t b/tests/basic/afr/read-subvol-data.t
new file mode 100644
index 00000000000..39f43a15028
--- /dev/null
+++ b/tests/basic/afr/read-subvol-data.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+#Test if the source is selected based on data transaction for a regular file.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+#Init
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#Test
+TEST $CLI volume set $V0 cluster.read-subvolume $V0-client-1
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST dd if=/dev/urandom of=$M0/afr_success_5.txt bs=1024k count=1
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST dd if=/dev/urandom of=$M0/afr_success_5.txt bs=1024k count=10
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "10485760" echo `ls -l $M0/afr_success_5.txt | awk '{ print $5}'`
+
+#Cleanup
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+TEST rm -rf $B0/*
diff --git a/tests/basic/afr/read-subvol-entry.t b/tests/basic/afr/read-subvol-entry.t
new file mode 100644
index 00000000000..76b2dcf85b0
--- /dev/null
+++ b/tests/basic/afr/read-subvol-entry.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+#Test if the read child is selected based on entry transaction for directory
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+#Init
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#Test
+TEST mkdir -p $M0/abc/def
+
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+
+TEST kill_brick $V0 $H0 $B0/brick0
+
+TEST touch $M0/abc/def/ghi
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "ghi" echo `ls $M0/abc/def/`
+
+#Cleanup
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+TEST rm -rf $B0/*
diff --git a/tests/basic/afr/rename-data-loss.t b/tests/basic/afr/rename-data-loss.t
new file mode 100644
index 00000000000..256ee2aafce
--- /dev/null
+++ b/tests/basic/afr/rename-data-loss.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+#Self-heal tests
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 write-behind off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+cd $M0
+TEST `echo "line1" >> file1`
+TEST mkdir dir1
+TEST mkdir dir2
+TEST mkdir -p dir1/dira/dirb
+TEST `echo "line1">>dir1/dira/dirb/file1`
+TEST mkdir delete_me
+TEST `echo "line1" >> delete_me/file1`
+
+#brick0 has witnessed the second write while brick1 is down.
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST `echo "line2" >> file1`
+TEST `echo "line2" >> dir1/dira/dirb/file1`
+TEST `echo "line2" >> delete_me/file1`
+
+#Toggle the bricks that are up/down.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/brick0
+
+#Rename when the 'source' brick0 for data-selfheals is down.
+mv file1 file2
+mv dir1/dira dir2
+
+#Delete a dir when brick0 is down.
+rm -rf delete_me
+cd -
+
+#Bring everything up and trigger heal
+TEST $CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_anon_entry_count $B0/brick1
+
+#Remount to avoid reading from caches
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "line2" tail -1 $M0/file2
+EXPECT "line2" tail -1 $M0/dir2/dira/dirb/file1
+TEST ! stat $M0/delete_me/file1
+TEST ! stat $M0/delete_me
+
+anon_inode_name=$(ls -a $B0/brick0 | grep glusterfs-anonymous-inode)
+TEST [[ -d $B0/brick0/$anon_inode_name ]]
+TEST [[ -d $B0/brick1/$anon_inode_name ]]
+cleanup
diff --git a/tests/basic/afr/replace-brick-self-heal.t b/tests/basic/afr/replace-brick-self-heal.t
new file mode 100644
index 00000000000..0360db71a2f
--- /dev/null
+++ b/tests/basic/afr/replace-brick-self-heal.t
@@ -0,0 +1,64 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+# Create files
+for i in {1..5}
+do
+ echo $i > $M0/file$i.txt
+done
+
+# Metadata changes
+TEST setfattr -n user.test -v qwerty $M0/file5.txt
+
+# Replace brick1
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit force
+
+# Replaced-brick should accuse the non-replaced-brick (Simulating case for data-loss)
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1_new/
+
+# Check if pending xattr and dirty-xattr are set for replaced-brick
+EXPECT "000000000000000100000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.dirty $B0/${V0}1_new
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+# Check if entry-heal has happened
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1_new | sort)
+
+# To make sure that files were not lost from brick0
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort)
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+
+# Test if data was healed
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}1_new/file1.txt
+# To make sure that data was not lost from brick0
+TEST diff $B0/${V0}0/file1.txt $B0/${V0}1/file1.txt
+
+# Test if metadata was healed and exists on both the bricks
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}1_new/file5.txt
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file5.txt
+
+cleanup;
diff --git a/tests/basic/afr/resolve.t b/tests/basic/afr/resolve.t
new file mode 100644
index 00000000000..a741eee6e5e
--- /dev/null
+++ b/tests/basic/afr/resolve.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#Check that operations succeed after changing the disk of the brick while
+#a brick is down
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1;
+TEST cd $M0
+TEST mkdir -p a/b/c/d/e
+TEST cd a/b/c/d/e
+echo abc > g
+
+#Simulate disk replacement
+TEST kill_brick $V0 $H0 $B0/${V0}0
+rm -rf $B0/${V0}0/.glusterfs $B0/${V0}0/a
+
+#Ideally, disk replacement is done using reset-brick or replace-brick gluster CLI
+#which will create .glusterfs folder.
+mkdir $B0/${V0}0/.glusterfs && chmod 600 $B0/${V0}0/.glusterfs
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+#Test that the lookup returns ENOENT instead of ESTALE
+#If lookup returns ESTALE this command will fail with ESTALE
+TEST touch f
+
+#Test that ESTALE is ignored when there is a good copy
+EXPECT abc cat g
+
+#Simulate file changing only one mount
+#create the file on first mount
+echo ghi > $M0/b
+
+#re-create the file on other mount while one of the bricks is down.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST rm -f $M1/b
+echo jkl > $M1/b
+#Clear the extended attributes on the directory to create a scenario where
+#gfid-mismatch happened. This should result in EIO
+TEST setfattr -x trusted.afr.$V0-client-0 $B0/${V0}1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+# The kernel knows nothing about the tricks done to the volume, and the file
+# may still be in page cache. Wait for timeout.
+EXPECT_WITHIN 10 "^$" cat $M0/b
+cleanup
diff --git a/tests/basic/afr/root-squash-self-heal.t b/tests/basic/afr/root-squash-self-heal.t
new file mode 100644
index 00000000000..6e12098465a
--- /dev/null
+++ b/tests/basic/afr/root-squash-self-heal.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 server.root-squash on
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --no-root-squash=yes --use-readdirp=no $M0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo abc > $M0/a
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+find $M0 | xargs stat > /dev/null
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+cleanup
diff --git a/tests/basic/afr/self-heal.t b/tests/basic/afr/self-heal.t
new file mode 100644
index 00000000000..10fb152d046
--- /dev/null
+++ b/tests/basic/afr/self-heal.t
@@ -0,0 +1,244 @@
+#!/bin/bash
+#Self-heal tests
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+#Init
+AREQUAL_PATH=$(dirname $0)/../../utils
+AREQUAL_BIN=$AREQUAL_PATH/arequal-checksum
+CFLAGS=""
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+TEST [ -e $AREQUAL_BIN ]
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+###############################################################################
+#1.Test successful data, metadata and entry self-heal
+
+#Test
+TEST mkdir -p $M0/abc/def $M0/abc/ghi
+TEST dd if=/dev/urandom of=$M0/abc/file_abc.txt bs=1024k count=2 2>/dev/null
+TEST dd if=/dev/urandom of=$M0/abc/def/file_abc_def_1.txt bs=1024k count=2 2>/dev/null
+TEST dd if=/dev/urandom of=$M0/abc/def/file_abc_def_2.txt bs=1024k count=3 2>/dev/null
+TEST dd if=/dev/urandom of=$M0/abc/ghi/file_abc_ghi.txt bs=1024k count=4 2>/dev/null
+
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST truncate -s 0 $M0/abc/def/file_abc_def_1.txt
+NEW_UID=36
+NEW_GID=36
+TEST chown $NEW_UID:$NEW_GID $M0/abc/def/file_abc_def_2.txt
+TEST rm -rf $M0/abc/ghi
+TEST mkdir -p $M0/def/ghi $M0/jkl/mno
+TEST dd if=/dev/urandom of=$M0/def/ghi/file1.txt bs=1024k count=2 2>/dev/null
+TEST dd if=/dev/urandom of=$M0/def/ghi/file2.txt bs=1024k count=3 2>/dev/null
+TEST dd if=/dev/urandom of=$M0/jkl/mno/file.txt bs=1024k count=4 2>/dev/null
+TEST chown $NEW_UID:$NEW_GID $M0/def/ghi/file2.txt
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#check all files created/deleted on brick1 are also replicated on brick 0
+#(i.e. no reverse heal has happened)
+TEST ls $B0/brick0/def/ghi/file1.txt
+TEST ls $B0/brick0/def/ghi/file2.txt
+TEST ls $B0/brick0/jkl/mno/file.txt
+TEST ! ls $B0/brick0/abc/ghi
+EXPECT "$NEW_UID$NEW_GID" stat -c %u%g $B0/brick0/abc/def/file_abc_def_2.txt
+TEST diff <($AREQUAL_BIN -p $B0/brick0 -i .glusterfs) <($AREQUAL_BIN -p $B0/brick1 -i .glusterfs)
+
+#Cleanup
+TEST rm -rf $M0/*
+###############################################################################
+
+#2.Test successful self-heal of different file types.
+
+#Test
+TEST touch $M0/file
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST rm -f $M0/file
+TEST mkdir $M0/file
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#check heal has happened in the correct direction
+TEST test -d $B0/brick0/file
+TEST diff <($AREQUAL_BIN -p $B0/brick0 -i .glusterfs) <($AREQUAL_BIN -p $B0/brick1 -i .glusterfs)
+
+#Cleanup
+TEST rm -rf $M0/*
+###############################################################################
+
+#3.Test successful self-heal of file permissions.
+
+#Test
+TEST touch $M0/file
+TEST chmod 666 $M0/file
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST chmod 777 $M0/file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#check heal has happened in the correct direction
+EXPECT "777" stat -c %a $B0/brick0/file
+TEST diff <($AREQUAL_BIN -p $B0/brick0 -i .glusterfs) <($AREQUAL_BIN -p $B0/brick1 -i .glusterfs)
+
+#Cleanup
+TEST rm -rf $M0/*
+###############################################################################
+
+#4.Test successful self-heal of file ownership
+
+#Test
+TEST touch $M0/file
+TEST kill_brick $V0 $H0 $B0/brick0
+NEW_UID=36
+NEW_GID=36
+TEST chown $NEW_UID:$NEW_GID $M0/file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#check heal has happened in the correct direction
+EXPECT "$NEW_UID$NEW_GID" stat -c %u%g $B0/brick0/file
+TEST diff <($AREQUAL_BIN -p $B0/brick0 -i .glusterfs) <($AREQUAL_BIN -p $B0/brick1 -i .glusterfs)
+
+#Cleanup
+TEST rm -rf $M0/*
+###############################################################################
+
+#5.File size test
+
+#Test
+TEST touch $M0/file
+TEST `echo "write1">$M0/file`
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST `echo "write2">>$M0/file`
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST truncate -s 0 $M0/file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#check heal has happened in the correct direction
+EXPECT 0 stat -c %s $B0/brick1/file
+TEST diff <($AREQUAL_BIN -p $B0/brick0 -i .glusterfs) <($AREQUAL_BIN -p $B0/brick1 -i .glusterfs)
+
+#Cleanup
+TEST rm -rf $M0/*
+###############################################################################
+
+#6.GFID heal
+
+#Test
+TEST touch $M0/file
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST rm -f $M0/file
+TEST touch $M0/file
+GFID=$(gf_get_gfid_xattr $B0/brick1/file)
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#check heal has happened in the correct direction
+EXPECT "$GFID" gf_get_gfid_xattr $B0/brick0/file
+
+#Cleanup
+TEST rm -rf $M0/*
+###############################################################################
+
+#7. Link/symlink heal
+
+#Test
+TEST touch $M0/file
+TEST ln $M0/file $M0/link_to_file
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST rm -f $M0/link_to_file
+TEST ln -s $M0/file $M0/link_to_file
+TEST ln $M0/file $M0/hard_link_to_file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#check heal has happened in the correct direction
+TEST test -f $B0/brick0/hard_link_to_file
+TEST test -h $B0/brick0/link_to_file
+TEST diff <($AREQUAL_BIN -p $B0/brick0 -i .glusterfs) <($AREQUAL_BIN -p $B0/brick1 -i .glusterfs)
+
+#Cleanup
+TEST rm -rf $M0/*
+###############################################################################
+
+#8. Heal xattrs set by application
+
+#Test
+TEST touch $M0/file
+TEST setfattr -n user.myattr_1 -v My_attribute_1 $M0/file
+TEST setfattr -n user.myattr_2 -v "My_attribute_2" $M0/file
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST setfattr -n user.myattr_1 -v "My_attribute_1_modified" $M0/file
+TEST setfattr -n user.myattr_3 -v "My_attribute_3" $M0/file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+TEST diff <(echo "user.myattr_1=\"My_attribute_1_modified\"") <(getfattr -n user.myattr_1 $B0/brick1/file|grep user.myattr_1)
+TEST diff <(echo "user.myattr_3=\"My_attribute_3\"") <(getfattr -n user.myattr_3 $B0/brick1/file|grep user.myattr_3)
+
+#Cleanup
+TEST rm -rf $M0/*
+###############################################################################
+
+TEST rm -rf $AREQUAL_BIN
+cleanup;
diff --git a/tests/basic/afr/self-heald.t b/tests/basic/afr/self-heald.t
new file mode 100644
index 00000000000..24c82777921
--- /dev/null
+++ b/tests/basic/afr/self-heald.t
@@ -0,0 +1,193 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+START_TIMESTAMP=`date +%s`
+
+function kill_multiple_bricks {
+ local vol=$1
+ local host=$2
+ local brickpath=$3
+
+ if [ $decide_kill == 0 ]
+ then
+ for ((i=0; i<=4; i=i+2)) do
+ TEST kill_brick $vol $host $brickpath/${vol}$i
+ done
+ else
+ for ((i=1; i<=5; i=i+2)) do
+ TEST kill_brick $vol $host $brickpath/${vol}$i
+ done
+ fi
+}
+function check_bricks_up {
+ local vol=$1
+ if [ $decide_kill == 0 ]
+ then
+ for ((i=0; i<=4; i=i+2)) do
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_in_shd $vol $i
+ done
+ else
+ for ((i=1; i<=5; i=i+2)) do
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_in_shd $vol $i
+ done
+ fi
+}
+
+function disconnected_brick_count {
+ local vol=$1
+ $CLI volume heal $vol info | \
+ egrep -i '(transport|Socket is not connected)' | wc -l
+}
+
+TESTS_EXPECTED_IN_LOOP=20
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+decide_kill=$((`date +"%j"|sed 's/^0*//'` % 2 ))
+
+kill_multiple_bricks $V0 $H0 $B0
+cd $M0
+HEAL_FILES=0
+for i in {1..10}
+do
+ dd if=/dev/urandom of=f bs=1024k count=10
+ HEAL_FILES=$(($HEAL_FILES+1)) #+1 for data/metadata self-heal of 'f'
+ mkdir a; cd a;
+ #+3 for metadata self-heal of 'a' one per subvolume of DHT
+ HEAL_FILES=$(($HEAL_FILES+3))
+done
+#+3 represents entry sh on "/", one per subvolume of DHT?
+HEAL_FILES=$(($HEAL_FILES + 3))
+
+cd ~
+EXPECT "$HEAL_FILES" get_pending_heal_count $V0
+
+#When bricks are down, it says Transport End point Not connected for them
+EXPECT "3" disconnected_brick_count $V0
+
+#Create some stale indices and verify that they are not counted in heal info
+#TO create stale index create and delete files when one brick is down in
+#replica pair.
+for i in {11..20}; do echo abc > $M0/$i; done
+HEAL_FILES=$(($HEAL_FILES + 10)) #count extra 10 files
+EXPECT "$HEAL_FILES" get_pending_heal_count $V0
+#delete the files now, so that stale indices will remain.
+for i in {11..20}; do rm -f $M0/$i; done
+#After deleting files they should not appear in heal info
+HEAL_FILES=$(($HEAL_FILES - 10))
+EXPECT "$HEAL_FILES" get_pending_heal_count $V0
+
+
+TEST ! $CLI volume heal $V0
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST ! $CLI volume heal $V0
+TEST ! $CLI volume heal $V0 full
+TEST $CLI volume start $V0 force
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+
+check_bricks_up $V0
+
+TEST $CLI volume heal $V0
+sleep 5 #Until the heal-statistics command implementation
+#check that this heals the contents partially
+TEST [ $HEAL_FILES -gt $(get_pending_heal_count $V0) ]
+
+TEST $CLI volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#Test that ongoing IO is not considered as Pending heal
+(dd if=/dev/zero of=$M0/file1 bs=1k 2>/dev/null 1>/dev/null)&
+back_pid1=$!;
+(dd if=/dev/zero of=$M0/file2 bs=1k 2>/dev/null 1>/dev/null)&
+back_pid2=$!;
+(dd if=/dev/zero of=$M0/file3 bs=1k 2>/dev/null 1>/dev/null)&
+back_pid3=$!;
+(dd if=/dev/zero of=$M0/file4 bs=1k 2>/dev/null 1>/dev/null)&
+back_pid4=$!;
+(dd if=/dev/zero of=$M0/file5 bs=1k 2>/dev/null 1>/dev/null)&
+back_pid5=$!;
+EXPECT 0 get_pending_heal_count $V0
+kill -SIGTERM $back_pid1;
+kill -SIGTERM $back_pid2;
+kill -SIGTERM $back_pid3;
+kill -SIGTERM $back_pid4;
+kill -SIGTERM $back_pid5;
+wait >/dev/null 2>&1;
+
+#Test that volume heal info reports files even when self-heal
+#options are disabled
+TEST touch $M0/f
+TEST mkdir $M0/d
+#DATA
+TEST $CLI volume set $V0 cluster.data-self-heal off
+EXPECT "off" volume_option $V0 cluster.data-self-heal
+kill_multiple_bricks $V0 $H0 $B0
+echo abc > $M0/f
+EXPECT 1 get_pending_heal_count $V0
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+check_bricks_up $V0
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+TEST $CLI volume set $V0 cluster.data-self-heal on
+
+#METADATA
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+EXPECT "off" volume_option $V0 cluster.metadata-self-heal
+kill_multiple_bricks $V0 $H0 $B0
+
+TEST chmod 777 $M0/f
+EXPECT 1 get_pending_heal_count $V0
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+check_bricks_up $V0
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+
+#ENTRY
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+EXPECT "off" volume_option $V0 cluster.entry-self-heal
+kill_multiple_bricks $V0 $H0 $B0
+TEST touch $M0/d/a
+# 4 if mtime/ctime is modified for d in bricks without a
+# 2 otherwise
+PENDING=$( get_pending_heal_count $V0 )
+TEST test $PENDING -eq 2 -o $PENDING -eq 4
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+check_bricks_up $V0
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+#Negative test cases
+#Fail volume does not exist case
+TEST ! $CLI volume heal fail info
+
+#Fail volume stopped case
+TEST $CLI volume stop $V0
+TEST ! $CLI volume heal $V0 info
+
+#Fail non-replicate volume info
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 $H0:$B0/${V0}{6}
+TEST $CLI volume start $V0
+TEST ! $CLI volume heal $V0 info
+
+# Check for non Linux systems that we did not mess with directory offsets
+TEST ! log_newer $START_TIMESTAMP "offset reused from another DIR"
+
+cleanup
diff --git a/tests/basic/afr/sparse-file-self-heal.t b/tests/basic/afr/sparse-file-self-heal.t
new file mode 100644
index 00000000000..04b77c41de1
--- /dev/null
+++ b/tests/basic/afr/sparse-file-self-heal.t
@@ -0,0 +1,181 @@
+#!/bin/bash
+
+#This file checks if self-heal of files with holes is working properly or not
+#bigger is 2M, big is 1M, small is anything less
+#Also tests if non-sparse files with zeroes in it are healed correctly w.r.t
+#disk usage.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 data-self-heal-algorithm full
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k
+TEST dd if=/dev/urandom of=$M0/bigger2big count=1 bs=2048k
+TEST dd if=/dev/urandom of=$M0/big2bigger count=1 bs=1024k
+TEST truncate -s 1G $M0/FILE
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+#File with >128k size hole
+TEST truncate -s 1M $M0/big
+big_md5sum=$(md5sum $M0/big | awk '{print $1}')
+
+#File with <128k hole
+TEST truncate -s 0 $M0/small
+TEST truncate -s 64k $M0/small
+small_md5sum=$(md5sum $M0/small | awk '{print $1}')
+
+#Bigger file truncated to big size hole.
+TEST truncate -s 0 $M0/bigger2big
+TEST truncate -s 1M $M0/bigger2big
+bigger2big_md5sum=$(md5sum $M0/bigger2big | awk '{print $1}')
+
+#Big file truncated to Bigger size hole
+TEST truncate -s 2M $M0/big2bigger
+big2bigger_md5sum=$(md5sum $M0/big2bigger | awk '{print $1}')
+
+#Write data to file and restore its sparseness
+TEST dd if=/dev/urandom of=$M0/FILE count=1 bs=131072
+TEST truncate -s 1G $M0/FILE
+
+#Create a non-sparse file containing zeroes.
+TEST dd if=/dev/zero of=$M0/zeroedfile bs=1024 count=1024
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST gluster volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#If the file system of bricks is XFS and speculative preallocation is on,
+#dropping cahce should be done to free speculatively pre-allocated blocks
+#by XFS.
+drop_cache $M0
+
+big_md5sum_0=$(md5sum $B0/${V0}0/big | awk '{print $1}')
+small_md5sum_0=$(md5sum $B0/${V0}0/small | awk '{print $1}')
+bigger2big_md5sum_0=$(md5sum $B0/${V0}0/bigger2big | awk '{print $1}')
+big2bigger_md5sum_0=$(md5sum $B0/${V0}0/big2bigger | awk '{print $1}')
+
+EXPECT $big_md5sum echo $big_md5sum_0
+EXPECT $small_md5sum echo $small_md5sum_0
+EXPECT $big2bigger_md5sum echo $big2bigger_md5sum_0
+EXPECT $bigger2big_md5sum echo $bigger2big_md5sum_0
+
+
+EXPECT "1" has_holes $B0/${V0}0/big
+#Because self-heal writes the final chunk hole should not be there for
+#files < 128K
+EXPECT "0" has_holes $B0/${V0}0/small
+# Since source is smaller than sink, self-heal does blind copy so no holes will
+# be present
+EXPECT "0" has_holes $B0/${V0}0/bigger2big
+EXPECT "1" has_holes $B0/${V0}0/big2bigger
+
+#Check that self-heal has not written 0s to sink and made it non-sparse.
+USED_KB=`du -s $B0/${V0}0/FILE|cut -f1`
+TEST [ $USED_KB -lt 1000000 ]
+
+#Check that the non-sparse file has the same file size on both bricks and that
+#the disk usage is greater than or equal to the file size. We could have checked
+#that the disk usage is just equal to the file size but XFS does speculative
+#preallocation due to which disk usage can be more than the file size.
+STAT_SIZE1=$(stat -c "%s" $B0/${V0}0/zeroedfile)
+STAT_SIZE2=$(stat -c "%s" $B0/${V0}1/zeroedfile)
+TEST [ $STAT_SIZE1 -eq $STAT_SIZE2 ]
+USED_KB1="$((`stat -c %b $B0/${V0}0/zeroedfile` * `stat -c %B $B0/${V0}0/zeroedfile`))"
+TEST [ $USED_KB1 -ge $STAT_SIZE1 ]
+USED_KB2="$((`stat -c %b $B0/${V0}1/zeroedfile` * `stat -c %B $B0/${V0}1/zeroedfile`))"
+TEST [ $USED_KB2 -ge $STAT_SIZE2 ]
+
+TEST rm -f $M0/*
+
+#check the same tests with diff self-heal
+TEST $CLI volume set $V0 data-self-heal-algorithm diff
+
+TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k
+TEST dd if=/dev/urandom of=$M0/big2bigger count=1 bs=1024k
+TEST dd if=/dev/urandom of=$M0/bigger2big count=1 bs=2048k
+TEST truncate -s 1G $M0/FILE
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+#File with >128k size hole
+TEST truncate -s 1M $M0/big
+big_md5sum=$(md5sum $M0/big | awk '{print $1}')
+
+#File with <128k hole
+TEST truncate -s 0 $M0/small
+TEST truncate -s 64k $M0/small
+small_md5sum=$(md5sum $M0/small | awk '{print $1}')
+
+#Bigger file truncated to big size hole
+TEST truncate -s 0 $M0/bigger2big
+TEST truncate -s 1M $M0/bigger2big
+bigger2big_md5sum=$(md5sum $M0/bigger2big | awk '{print $1}')
+
+#Big file truncated to Bigger size hole
+TEST truncate -s 2M $M0/big2bigger
+big2bigger_md5sum=$(md5sum $M0/big2bigger | awk '{print $1}')
+
+#Write data to file and restore its sparseness
+TEST dd if=/dev/urandom of=$M0/FILE count=1 bs=131072
+TEST truncate -s 1G $M0/FILE
+
+#Create a non-sparse file containing zeroes.
+TEST dd if=/dev/zero of=$M0/zeroedfile bs=1024 count=1024
+
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST gluster volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#If the file system of bricks is XFS and speculative preallocation is on,
+#dropping cahce should be done to free speculatively pre-allocated blocks
+#by XFS.
+drop_cache $M0
+
+big_md5sum_0=$(md5sum $B0/${V0}0/big | awk '{print $1}')
+small_md5sum_0=$(md5sum $B0/${V0}0/small | awk '{print $1}')
+bigger2big_md5sum_0=$(md5sum $B0/${V0}0/bigger2big | awk '{print $1}')
+big2bigger_md5sum_0=$(md5sum $B0/${V0}0/big2bigger | awk '{print $1}')
+
+EXPECT $big_md5sum echo $big_md5sum_0
+EXPECT $small_md5sum echo $small_md5sum_0
+EXPECT $big2bigger_md5sum echo $big2bigger_md5sum_0
+EXPECT $bigger2big_md5sum echo $bigger2big_md5sum_0
+
+EXPECT "1" has_holes $B0/${V0}0/big
+EXPECT "1" has_holes $B0/${V0}0/big2bigger
+EXPECT "0" has_holes $B0/${V0}0/bigger2big
+EXPECT "0" has_holes $B0/${V0}0/small
+
+#Check that self-heal has not written 0s to sink and made it non-sparse.
+USED_KB=`du -s $B0/${V0}0/FILE|cut -f1`
+TEST [ $USED_KB -lt 1000000 ]
+
+#Check that the non-sparse file has the same file size on both bricks and that
+#the disk usage is greater than or equal to the file size. We could have checked
+#that the disk usage is just equal to the file size but XFS does speculative
+#preallocation due to which disk usage can be more than the file size.
+STAT_SIZE1=$(stat -c "%s" $B0/${V0}0/zeroedfile)
+STAT_SIZE2=$(stat -c "%s" $B0/${V0}1/zeroedfile)
+TEST [ $STAT_SIZE1 -eq $STAT_SIZE2 ]
+USED_KB1="$((`stat -c %b $B0/${V0}0/zeroedfile` * `stat -c %B $B0/${V0}0/zeroedfile`))"
+TEST [ $USED_KB1 -ge $STAT_SIZE1 ]
+USED_KB2="$((`stat -c %b $B0/${V0}1/zeroedfile` * `stat -c %B $B0/${V0}1/zeroedfile`))"
+TEST [ $USED_KB2 -ge $STAT_SIZE2 ]
+
+cleanup
diff --git a/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t b/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t
new file mode 100644
index 00000000000..7c249c4bcbd
--- /dev/null
+++ b/tests/basic/afr/split-brain-favorite-child-policy-client-side-healing.t
@@ -0,0 +1,124 @@
+#!/bin/bash
+
+#Test the client side split-brain resolution
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+GET_MDATA_PATH=$(dirname $0)/../../utils
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
+
+TEST glusterd
+TEST pidof glusterd
+
+count_files () {
+ ls $1 | wc -l
+}
+
+#Create replica 2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 cluster.quorum-type fixed
+TEST $CLI volume set $V0 cluster.quorum-count 1
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST mkdir $M0/data
+TEST touch $M0/data/file
+
+
+############ Client side healing using favorite-child-policy = mtime #################
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/data/file bs=1024 count=1024
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/data/file bs=1024 count=1024
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+mtime1=$(get_mtime $B0/${V0}0/data/file)
+mtime2=$(get_mtime $B0/${V0}1/data/file)
+if (( $(echo "$mtime1 > $mtime2" | bc -l) )); then
+ LATEST_MTIME_MD5=$(md5sum $B0/${V0}0/data/file | cut -d\ -f1)
+else
+ LATEST_MTIME_MD5=$(md5sum $B0/${V0}1/data/file | cut -d\ -f1)
+fi
+
+#file will be in split-brain
+cat $M0/data/file > /dev/null
+EXPECT "1" echo $?
+
+TEST $CLI volume set $V0 cluster.favorite-child-policy mtime
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" afr_get_split_brain_count $V0
+cat $M0/data/file > /dev/null
+EXPECT "0" echo $?
+M0_MD5=$(md5sum $M0/data/file | cut -d\ -f1)
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_get_split_brain_count $V0
+TEST [ "$LATEST_MTIME_MD5" == "$M0_MD5" ]
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+B0_MD5=$(md5sum $B0/${V0}0/data/file | cut -d\ -f1)
+B1_MD5=$(md5sum $B0/${V0}1/data/file | cut -d\ -f1)
+TEST [ "$LATEST_MTIME_MD5" == "$B0_MD5" ]
+TEST [ "$LATEST_MTIME_MD5" == "$B1_MD5" ]
+
+############ Client side directory conservative merge #################
+TEST $CLI volume reset $V0 cluster.favorite-child-policy
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/data/test
+files=$(count_files $M0/data)
+EXPECT "2" echo $files
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST touch $M0/data/test1
+files=$(count_files $M0/data)
+EXPECT "2" echo $files
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#data dir will be in entry split-brain
+ls $M0/data > /dev/null
+EXPECT "2" echo $?
+
+TEST $CLI volume set $V0 cluster.favorite-child-policy mtime
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" afr_get_split_brain_count $V0
+
+
+ls $M0/data > /dev/null
+EXPECT "0" echo $?
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" afr_get_split_brain_count $V0
+#Entry Split-brain is gone, but data self-heal is pending on the files
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+
+cat $M0/data/test > /dev/null
+cat $M0/data/test1 > /dev/null
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+files=$(count_files $M0/data)
+EXPECT "3" echo $files
+
+TEST force_umount $M0
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
+
+cleanup
diff --git a/tests/basic/afr/split-brain-heal-info.t b/tests/basic/afr/split-brain-heal-info.t
new file mode 100644
index 00000000000..2e4742fff08
--- /dev/null
+++ b/tests/basic/afr/split-brain-heal-info.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function volume_start_force()
+{
+ local vol=$1
+ TEST $CLI volume start $vol force
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $vol 0
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $vol 1
+}
+
+TESTS_EXPECTED_IN_LOOP=15
+SPB_FILES=0
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dspb
+TEST mkdir $M0/mspb
+TEST mkdir $M0/espb
+TEST touch $M0/dspb/file
+
+#### Simlulate data-split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST `echo "abc" > $M0/dspb/file`
+volume_start_force $V0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST `echo "def" > $M0/dspb/file`
+volume_start_force $V0
+SPB_FILES=$(($SPB_FILES + 1))
+
+### Simulate metadata-split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST chmod 757 $M0/mspb
+volume_start_force $V0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST chmod 747 $M0/mspb
+volume_start_force $V0
+SPB_FILES=$(($SPB_FILES + 1))
+
+#### Simulate entry-split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 0
+TEST touch $M0/espb/a
+volume_start_force $V0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 1
+TEST mkdir $M0/espb/a
+volume_start_force $V0
+SPB_FILES=$(($SPB_FILES + 1))
+
+#Multiply by 2, for each brick in replica pair
+SPB_FILES=$(($SPB_FILES * 2))
+EXPECT "$SPB_FILES" afr_get_split_brain_count $V0
+cleanup;
diff --git a/tests/basic/afr/split-brain-healing-ctime.t b/tests/basic/afr/split-brain-healing-ctime.t
new file mode 100644
index 00000000000..676788fce3f
--- /dev/null
+++ b/tests/basic/afr/split-brain-healing-ctime.t
@@ -0,0 +1,252 @@
+#!/bin/bash
+
+#Test the split-brain resolution CLI commands.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function get_replicate_subvol_number {
+ local filename=$1
+ #get_backend_paths
+ if [ -f $B0/${V0}1/$filename ]
+ then
+ echo 0
+ elif [ -f $B0/${V0}3/$filename ]
+ then echo 1
+ else
+ echo -1
+ fi
+}
+
+cleanup;
+
+AREQUAL_PATH=$(dirname $0)/../../utils
+GET_MDATA_PATH=$(dirname $0)/../../utils
+CFLAGS=""
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+cd $M0
+for i in {1..10}
+do
+ echo "Initial content">>file$i
+done
+
+replica_0_files_list=(`ls $B0/${V0}1|grep -v '^\.'`)
+replica_1_files_list=(`ls $B0/${V0}3|grep -v '^\.'`)
+
+############ Create data split-brain in the files. ###########################
+TEST kill_brick $V0 $H0 $B0/${V0}1
+for file in ${!replica_0_files_list[*]}
+do
+ echo "B1 is down">>${replica_0_files_list[$file]}
+done
+TEST kill_brick $V0 $H0 $B0/${V0}3
+for file in ${!replica_1_files_list[*]}
+do
+ echo "B3 is down">>${replica_1_files_list[$file]}
+done
+
+SMALLER_FILE_SIZE=$(stat -c %s file1)
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+for file in ${!replica_0_files_list[*]}
+do
+ echo "B2 is down">>${replica_0_files_list[$file]}
+ echo "appending more content to make it the bigger file">>${replica_0_files_list[$file]}
+done
+TEST kill_brick $V0 $H0 $B0/${V0}4
+for file in ${!replica_1_files_list[*]}
+do
+ echo "B4 is down">>${replica_1_files_list[$file]}
+ echo "appending more content to make it the bigger file">>${replica_1_files_list[$file]}
+done
+
+BIGGER_FILE_SIZE=$(stat -c %s file1)
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
+
+
+############### Acessing the files should now give EIO. ###############################
+TEST ! cat file1
+TEST ! cat file2
+TEST ! cat file3
+TEST ! cat file4
+TEST ! cat file5
+TEST ! cat file6
+TEST ! cat file7
+TEST ! cat file8
+TEST ! cat file9
+TEST ! cat file10
+###################
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
+
+################ Heal file1 using the bigger-file option ##############
+$CLI volume heal $V0 split-brain bigger-file /file1
+EXPECT "0" echo $?
+EXPECT $BIGGER_FILE_SIZE stat -c %s file1
+
+################ Heal file2 using the bigger-file option and its gfid ##############
+subvolume=$(get_replicate_subvol_number file2)
+if [ $subvolume == 0 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file2)
+elif [ $subvolume == 1 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file2)
+fi
+GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+$CLI volume heal $V0 split-brain bigger-file $GFIDSTR
+EXPECT "0" echo $?
+
+################ Heal file3 using the source-brick option ##############
+################ Use the brick having smaller file size as source #######
+subvolume=$(get_replicate_subvol_number file3)
+if [ $subvolume == 0 ]
+then
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 /file3
+elif [ $subvolume == 1 ]
+then
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3
+fi
+EXPECT "0" echo $?
+EXPECT $SMALLER_FILE_SIZE stat -c %s file3
+
+################ Heal file4 using the source-brick option and it's gfid ##############
+################ Use the brick having smaller file size as source #######
+subvolume=$(get_replicate_subvol_number file4)
+if [ $subvolume == 0 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file4)
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 $GFIDSTR
+elif [ $subvolume == 1 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file4)
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 $GFIDSTR
+fi
+EXPECT "0" echo $?
+EXPECT $SMALLER_FILE_SIZE stat -c %s file4
+
+# With ctime enabled, the ctime xattr ("trusted.glusterfs.mdata") gets healed
+# as part of metadata heal. So mtime would be same, hence it can't be healed
+# using 'latest-mtime' policy, use 'source-brick' option instead.
+################ Heal file5 using the source-brick option ##############
+subvolume=$(get_replicate_subvol_number file5)
+if [ $subvolume == 0 ]
+then
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /file5
+elif [ $subvolume == 1 ]
+then
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 /file5
+fi
+EXPECT "0" echo $?
+
+if [ $subvolume == 0 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}1/file5)
+ mtime2_after_heal=$(get_mtime $B0/${V0}2/file5)
+elif [ $subvolume == 1 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}3/file5)
+ mtime2_after_heal=$(get_mtime $B0/${V0}4/file5)
+fi
+
+#TODO: To below comparisons on full sub-second resolution
+
+TEST [ $mtime1_after_heal -eq $mtime2_after_heal ]
+
+mtime_mount_after_heal=$(stat -c %Y file5)
+
+TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ]
+
+################ Heal file6 using the source-brick option and its gfid ##############
+subvolume=$(get_replicate_subvol_number file6)
+if [ $subvolume == 0 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6)
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 $GFIDSTR
+elif [ $subvolume == 1 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6)
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3 $GFIDSTR
+fi
+EXPECT "0" echo $?
+
+if [ $subvolume == 0 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}1/file6)
+ mtime2_after_heal=$(get_mtime $B0/${V0}2/file6)
+elif [ $subvolume == 1 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}3/file6)
+ mtime2_after_heal=$(get_mtime $B0/${V0}4/file6)
+fi
+
+#TODO: To below comparisons on full sub-second resolution
+
+TEST [ $mtime1_after_heal -eq $mtime2_after_heal ]
+
+mtime_mount_after_heal=$(stat -c %Y file6)
+
+TEST [ $mtime1_after_heal -eq $mtime_mount_after_heal ]
+
+################ Heal remaining SB'ed files of replica_0 using B1 as source ##############
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1
+EXPECT "0" echo $?
+
+################ Heal remaining SB'ed files of replica_1 using B3 as source ##############
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3
+EXPECT "0" echo $?
+
+############### Reading the files should now succeed. ###############################
+TEST cat file1
+TEST cat file2
+TEST cat file3
+TEST cat file4
+TEST cat file5
+TEST cat file6
+TEST cat file7
+TEST cat file8
+TEST cat file9
+TEST cat file10
+
+################ File contents on the bricks must be same. ################################
+TEST diff <(arequal-checksum -p $B0/$V01 -i .glusterfs) <(arequal-checksum -p $B0/$V02 -i .glusterfs)
+TEST diff <(arequal-checksum -p $B0/$V03 -i .glusterfs) <(arequal-checksum -p $B0/$V04 -i .glusterfs)
+
+############### Trying to heal files not in SB should fail. ###############################
+$CLI volume heal $V0 split-brain bigger-file /file1
+EXPECT "1" echo $?
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3
+EXPECT "1" echo $?
+
+cd -
+TEST rm $AREQUAL_PATH/arequal-checksum
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
+cleanup
diff --git a/tests/basic/afr/split-brain-healing.t b/tests/basic/afr/split-brain-healing.t
new file mode 100644
index 00000000000..315e815eb7e
--- /dev/null
+++ b/tests/basic/afr/split-brain-healing.t
@@ -0,0 +1,261 @@
+#!/bin/bash
+
+#Test the split-brain resolution CLI commands.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function get_replicate_subvol_number {
+ local filename=$1
+ #get_backend_paths
+ if [ -f $B0/${V0}1/$filename ]
+ then
+ echo 0
+ elif [ -f $B0/${V0}3/$filename ]
+ then echo 1
+ else
+ echo -1
+ fi
+}
+
+cleanup;
+
+AREQUAL_PATH=$(dirname $0)/../../utils
+GET_MDATA_PATH=$(dirname $0)/../../utils
+CFLAGS=""
+test "`uname -s`" != "Linux" && {
+ CFLAGS="$CFLAGS -lintl";
+}
+build_tester $AREQUAL_PATH/arequal-checksum.c $CFLAGS
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 ctime off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+cd $M0
+for i in {1..10}
+do
+ echo "Initial content">>file$i
+done
+
+replica_0_files_list=(`ls $B0/${V0}1|grep -v '^\.'`)
+replica_1_files_list=(`ls $B0/${V0}3|grep -v '^\.'`)
+
+############ Create data split-brain in the files. ###########################
+TEST kill_brick $V0 $H0 $B0/${V0}1
+for file in ${!replica_0_files_list[*]}
+do
+ echo "B1 is down">>${replica_0_files_list[$file]}
+done
+TEST kill_brick $V0 $H0 $B0/${V0}3
+for file in ${!replica_1_files_list[*]}
+do
+ echo "B3 is down">>${replica_1_files_list[$file]}
+done
+
+SMALLER_FILE_SIZE=$(stat -c %s file1)
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+for file in ${!replica_0_files_list[*]}
+do
+ echo "B2 is down">>${replica_0_files_list[$file]}
+ echo "appending more content to make it the bigger file">>${replica_0_files_list[$file]}
+done
+TEST kill_brick $V0 $H0 $B0/${V0}4
+for file in ${!replica_1_files_list[*]}
+do
+ echo "B4 is down">>${replica_1_files_list[$file]}
+ echo "appending more content to make it the bigger file">>${replica_1_files_list[$file]}
+done
+
+BIGGER_FILE_SIZE=$(stat -c %s file1)
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
+
+
+############### Acessing the files should now give EIO. ###############################
+TEST ! cat file1
+TEST ! cat file2
+TEST ! cat file3
+TEST ! cat file4
+TEST ! cat file5
+TEST ! cat file6
+TEST ! cat file7
+TEST ! cat file8
+TEST ! cat file9
+TEST ! cat file10
+###################
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
+
+################ Heal file1 using the bigger-file option ##############
+$CLI volume heal $V0 split-brain bigger-file /file1
+EXPECT "0" echo $?
+EXPECT $BIGGER_FILE_SIZE stat -c %s file1
+
+################ Heal file2 using the bigger-file option and its gfid ##############
+subvolume=$(get_replicate_subvol_number file2)
+if [ $subvolume == 0 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file2)
+elif [ $subvolume == 1 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file2)
+fi
+GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+$CLI volume heal $V0 split-brain bigger-file $GFIDSTR
+EXPECT "0" echo $?
+
+################ Heal file3 using the source-brick option ##############
+################ Use the brick having smaller file size as source #######
+subvolume=$(get_replicate_subvol_number file3)
+if [ $subvolume == 0 ]
+then
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 /file3
+elif [ $subvolume == 1 ]
+then
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3
+fi
+EXPECT "0" echo $?
+EXPECT $SMALLER_FILE_SIZE stat -c %s file3
+
+################ Heal file4 using the source-brick option and it's gfid ##############
+################ Use the brick having smaller file size as source #######
+subvolume=$(get_replicate_subvol_number file4)
+if [ $subvolume == 0 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file4)
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 $GFIDSTR
+elif [ $subvolume == 1 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file4)
+ GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+ $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 $GFIDSTR
+fi
+EXPECT "0" echo $?
+EXPECT $SMALLER_FILE_SIZE stat -c %s file4
+
+################ Heal file5 using the latest-mtime option ##############
+subvolume=$(get_replicate_subvol_number file5)
+if [ $subvolume == 0 ]
+then
+ mtime1=$(get_mtime $B0/${V0}1/file5)
+ mtime2=$(get_mtime $B0/${V0}2/file5)
+ LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
+elif [ $subvolume == 1 ]
+then
+ mtime1=$(get_mtime $B0/${V0}3/file5)
+ mtime2=$(get_mtime $B0/${V0}4/file5)
+ LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
+fi
+$CLI volume heal $V0 split-brain latest-mtime /file5
+EXPECT "0" echo $?
+
+if [ $subvolume == 0 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}1/file5)
+ mtime2_after_heal=$(get_mtime $B0/${V0}2/file5)
+elif [ $subvolume == 1 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}3/file5)
+ mtime2_after_heal=$(get_mtime $B0/${V0}4/file5)
+fi
+
+#TODO: To below comparisons on full sub-second resolution
+
+TEST [ $LATEST_MTIME -eq $mtime1_after_heal ]
+TEST [ $LATEST_MTIME -eq $mtime2_after_heal ]
+
+mtime_mount_after_heal=$(stat -c %Y file5)
+
+TEST [ $LATEST_MTIME -eq $mtime_mount_after_heal ]
+
+################ Heal file6 using the latest-mtime option and its gfid ##############
+subvolume=$(get_replicate_subvol_number file6)
+if [ $subvolume == 0 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}1/file6)
+ mtime1=$(get_mtime $B0/${V0}1/file6)
+ mtime2=$(get_mtime $B0/${V0}2/file6)
+ LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
+elif [ $subvolume == 1 ]
+then
+ GFID=$(gf_get_gfid_xattr $B0/${V0}3/file6)
+ mtime1=$(get_mtime $B0/${V0}3/file6)
+ mtime2=$(get_mtime $B0/${V0}4/file6)
+ LATEST_MTIME=$(($mtime1 > $mtime2 ? $mtime1:$mtime2))
+fi
+GFIDSTR="gfid:$(gf_gfid_xattr_to_str $GFID)"
+$CLI volume heal $V0 split-brain latest-mtime $GFIDSTR
+EXPECT "0" echo $?
+
+if [ $subvolume == 0 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}1/file6)
+ mtime2_after_heal=$(get_mtime $B0/${V0}2/file6)
+elif [ $subvolume == 1 ]
+then
+ mtime1_after_heal=$(get_mtime $B0/${V0}3/file6)
+ mtime2_after_heal=$(get_mtime $B0/${V0}4/file6)
+fi
+
+#TODO: To below comparisons on full sub-second resolution
+
+TEST [ $LATEST_MTIME -eq $mtime1_after_heal ]
+TEST [ $LATEST_MTIME -eq $mtime2_after_heal ]
+
+mtime_mount_after_heal=$(stat -c %Y file6)
+
+TEST [ $LATEST_MTIME -eq $mtime_mount_after_heal ]
+
+################ Heal remaining SB'ed files of replica_0 using B1 as source ##############
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1
+EXPECT "0" echo $?
+
+################ Heal remaining SB'ed files of replica_1 using B3 as source ##############
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}3
+EXPECT "0" echo $?
+
+############### Reading the files should now succeed. ###############################
+TEST cat file1
+TEST cat file2
+TEST cat file3
+TEST cat file4
+TEST cat file5
+TEST cat file6
+TEST cat file7
+TEST cat file8
+TEST cat file9
+TEST cat file10
+
+################ File contents on the bricks must be same. ################################
+TEST diff <(arequal-checksum -p $B0/$V01 -i .glusterfs) <(arequal-checksum -p $B0/$V02 -i .glusterfs)
+TEST diff <(arequal-checksum -p $B0/$V03 -i .glusterfs) <(arequal-checksum -p $B0/$V04 -i .glusterfs)
+
+############### Trying to heal files not in SB should fail. ###############################
+$CLI volume heal $V0 split-brain bigger-file /file1
+EXPECT "1" echo $?
+$CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}4 /file3
+EXPECT "1" echo $?
+
+cd -
+TEST rm $AREQUAL_PATH/arequal-checksum
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
+cleanup
diff --git a/tests/basic/afr/split-brain-open.t b/tests/basic/afr/split-brain-open.t
new file mode 100644
index 00000000000..9b2f2856047
--- /dev/null
+++ b/tests/basic/afr/split-brain-open.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+
+#Disable self-heal-daemon
+TEST $CLI volume heal $V0 disable
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+TEST touch $M0/data-split-brain.txt
+
+#Create data split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+`echo "brick1_alive" > $M0/data-split-brain.txt`
+TEST [ $? == 0 ];
+
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+`echo "brick0_alive" > $M0/data-split-brain.txt`
+TEST [ $? == 0 ];
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+echo "all-alive" >> $M0/data-split-brain.txt
+TEST [ $? != 0 ];
+
+cleanup;
diff --git a/tests/basic/afr/split-brain-resolution.t b/tests/basic/afr/split-brain-resolution.t
new file mode 100644
index 00000000000..834237c96ec
--- /dev/null
+++ b/tests/basic/afr/split-brain-resolution.t
@@ -0,0 +1,105 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+function get_split_brain_status {
+ local path=$1
+ echo `getfattr -n replica.split-brain-status $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//'
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+TEST $CLI volume start $V0
+
+#Disable self-heal-daemon
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST `echo "some-data" > $M0/data-split-brain.txt`
+TEST `echo "some-data" > $M0/metadata-split-brain.txt`
+
+#Create data and metadata split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+TEST `echo "brick1_alive" > $M0/data-split-brain.txt`
+TEST setfattr -n user.test -v brick1 $M0/metadata-split-brain.txt
+
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST `echo "brick0_alive" > $M0/data-split-brain.txt`
+TEST setfattr -n user.test -v brick0 $M0/metadata-split-brain.txt
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+EXPECT 4 get_pending_heal_count $V0
+
+TEST ! cat $M0/data-split-brain.txt
+TEST ! getfattr -n user.test $M0/metadata-split-brain.txt
+
+#Inspect file in data-split-brain
+EXPECT "data-split-brain:yes metadata-split-brain:no Choices:patchy-client-0,patchy-client-1" get_split_brain_status $M0/data-split-brain.txt
+TEST setfattr -n replica.split-brain-choice -v $V0-client-0 $M0/data-split-brain.txt
+
+#Should now be able to read the contents of data-split-brain.txt
+EXPECT "brick0_alive" cat $M0/data-split-brain.txt
+
+TEST setfattr -n replica.split-brain-choice-timeout -v 10 $M0/
+TEST setfattr -n replica.split-brain-choice -v $V0-client-1 $M0/data-split-brain.txt
+
+#Should now be able to read the contents of data-split-brain.txt
+EXPECT "brick1_alive" cat $M0/data-split-brain.txt
+
+#Inspect the file in metadata-split-brain
+EXPECT "data-split-brain:no metadata-split-brain:yes Choices:patchy-client-0,patchy-client-1" get_split_brain_status $M0/metadata-split-brain.txt
+TEST setfattr -n replica.split-brain-choice -v $V0-client-0 $M0/metadata-split-brain.txt
+
+EXPECT "brick0" get_text_xattr user.test $M0/metadata-split-brain.txt
+
+TEST setfattr -n replica.split-brain-choice -v $V0-client-1 $M0/metadata-split-brain.txt
+EXPECT "brick1" get_text_xattr user.test $M0/metadata-split-brain.txt
+
+#Check that setting split-brain-choice to "none" results in EIO again
+TEST setfattr -n replica.split-brain-choice -v none $M0/metadata-split-brain.txt
+TEST setfattr -n replica.split-brain-choice -v none $M0/data-split-brain.txt
+TEST ! getfattr -n user.test $M0/metadata-split-brain.txt
+TEST ! cat $M0/data-split-brain.txt
+
+#Check that after timeout fops result in EIO again.
+#Set one minute timeout
+TEST setfattr -n replica.split-brain-choice-timeout -v 1 $M0/
+TEST setfattr -n replica.split-brain-choice -v $V0-client-1 $M0/data-split-brain.txt
+EXPECT "brick1_alive" cat $M0/data-split-brain.txt
+TEST setfattr -n replica.split-brain-choice -v $V0-client-0 $M0/metadata-split-brain.txt
+EXPECT "brick0" get_text_xattr user.test $M0/metadata-split-brain.txt
+#Wait until timeout completes and test that the fops fail again
+sleep 62
+TEST ! getfattr -n user.test $M0/metadata-split-brain.txt
+TEST ! cat $M0/data-split-brain.txt
+
+#Negative test cases should fail
+TEST ! setfattr -n replica.split-brain-choice -v $V0-client-4 $M0/data-split-brain.txt
+TEST ! setfattr -n replica.split-brain-heal-finalize -v $V0-client-4 $M0/metadata-split-brain.txt
+
+#Heal the files
+TEST setfattr -n replica.split-brain-heal-finalize -v $V0-client-0 $M0/metadata-split-brain.txt
+TEST setfattr -n replica.split-brain-heal-finalize -v $V0-client-1 $M0/data-split-brain.txt
+
+EXPECT "brick0" get_text_xattr user.test $M0/metadata-split-brain.txt
+EXPECT "brick1_alive" cat $M0/data-split-brain.txt
+
+EXPECT 0 get_pending_heal_count $V0
+
+cleanup;
+
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/basic/afr/stale-file-lookup.t b/tests/basic/afr/stale-file-lookup.t
new file mode 100644
index 00000000000..f30422fd009
--- /dev/null
+++ b/tests/basic/afr/stale-file-lookup.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+#This file checks if stale file lookup fails or not.
+#A file is deleted when a brick was down. Before self-heal could happen to it
+#the file is accessed. It should fail.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST rm -f $M0/a
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST stat $B0/${V0}0/a
+TEST ! stat $B0/${V0}1/a
+TEST ! ls -l $M0/a
+
+cleanup
diff --git a/tests/basic/afr/ta-check-locks.t b/tests/basic/afr/ta-check-locks.t
new file mode 100644
index 00000000000..c0102c35b7b
--- /dev/null
+++ b/tests/basic/afr/ta-check-locks.t
@@ -0,0 +1,68 @@
+#!/bin/bash
+#This test checks if all the locks on
+#ta file are being held and released properly
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+
+function get_lock_count_on_ta()
+{
+ tapid=`cat $B0/ta.pid`
+ local sfile=$(generate_statedump $tapid)
+ count=$(grep "inodelk-count" $sfile | cut -f2 -d'=' | tail -1)
+ ncount=$(grep "inodelk.inodelk" $sfile | grep "len=1" | wc -l)
+ echo "count = $count : ncount = $ncount"
+ if [ "$count" = "" ]
+ then
+ count=0
+ fi
+
+ if [ "$count" -eq "$ncount" ]
+ then
+ echo "$count"
+ else
+ echo "-1"
+ fi
+}
+
+cleanup;
+TEST ta_create_brick_and_volfile brick0
+TEST ta_create_brick_and_volfile brick1
+TEST ta_create_ta_and_volfile ta
+TEST ta_start_brick_process brick0
+TEST ta_start_brick_process brick1
+TEST ta_start_ta_process ta
+
+TEST ta_create_mount_volfile brick0 brick1 ta
+TEST ta_start_mount_process $M0
+TEST ta_start_mount_process $M1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M1 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta
+
+TEST ta_create_shd_volfile brick0 brick1 ta
+TEST ta_start_shd_process glustershd
+shd_pid=$(cat $B0/glustershd.pid)
+
+TEST touch $M0/a.txt
+echo "Hello" >> $M0/a.txt
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "0" get_lock_count_on_ta
+
+TEST ta_kill_brick brick0
+echo "Hello" >> $M0/a.txt
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "1" get_lock_count_on_ta
+
+echo "Hello" >> $M1/a.txt
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "2" get_lock_count_on_ta
+
+echo "xyz" >> $M0/a.txt
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "2" get_lock_count_on_ta
+
+chmod 0666 $M0/a.txt
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "2" get_lock_count_on_ta
+
+TEST ta_start_brick_process brick0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_lock_count_on_ta
+
+cleanup;
diff --git a/tests/basic/afr/ta-read.t b/tests/basic/afr/ta-read.t
new file mode 100644
index 00000000000..3cfc16b9b8a
--- /dev/null
+++ b/tests/basic/afr/ta-read.t
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+# Test read transaction logic for thin-arbiter.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+cleanup;
+TEST ta_create_brick_and_volfile brick0
+TEST ta_create_brick_and_volfile brick1
+TEST ta_create_ta_and_volfile ta
+TEST ta_start_brick_process brick0
+TEST ta_start_brick_process brick1
+TEST ta_start_ta_process ta
+
+TEST ta_create_mount_volfile brick0 brick1 ta
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta
+
+TEST touch $M0/FILE
+TEST ls $B0/brick0/FILE
+TEST ls $B0/brick1/FILE
+TEST ! ls $B0/ta/FILE
+
+# Kill one brick and write to FILE.
+TEST ta_kill_brick brick0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0
+echo "brick0 down">> $M0/FILE
+TEST [ $? -eq 0 ]
+EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/FILE
+EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.patchy-ta-2
+
+#Umount and mount to remove cached data.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1
+# Read must be allowed since good brick is up.
+TEST cat $M0/FILE
+
+#Umount and mount to remove cached data.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+# Toggle good and bad data brick processes.
+TEST ta_start_brick_process brick0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0
+TEST ta_kill_brick brick1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1
+# Read must now fail.
+TEST ! cat $M0/FILE
+
+# Bring all data bricks up, and kill TA.
+TEST ta_start_brick_process brick1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1
+TA_PID=$(ta_get_pid_by_brick_name ta)
+TEST [ -n $TA_PID ]
+TEST ta_kill_brick ta
+TA_PID=$(ta_get_pid_by_brick_name ta)
+TEST [ -z $TA_PID ]
+# Read must now succeed.
+TEST cat $M0/FILE
+cleanup;
diff --git a/tests/basic/afr/ta-shd.t b/tests/basic/afr/ta-shd.t
new file mode 100644
index 00000000000..96ecfc678e0
--- /dev/null
+++ b/tests/basic/afr/ta-shd.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+#Self-heal tests
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+cleanup;
+TEST ta_create_brick_and_volfile brick0
+TEST ta_create_brick_and_volfile brick1
+TEST ta_create_ta_and_volfile ta
+TEST ta_start_brick_process brick0
+TEST ta_start_brick_process brick1
+TEST ta_start_ta_process ta
+
+TEST ta_create_mount_volfile brick0 brick1 ta
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta
+
+TEST ta_create_shd_volfile brick0 brick1 ta
+TEST ta_start_shd_process glustershd
+
+TEST touch $M0/a.txt
+TEST ta_kill_brick brick0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0
+echo "Hello" >> $M0/a.txt
+EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/a.txt
+EXPECT "000000010000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.$V0-ta-2
+
+#TODO: After the write txn changes are merged, take statedump of TA process and
+#check whether AFR_TA_DOM_NOTIFY lock is held by the client here. Take the
+#statedump again after line #38 to check AFR_TA_DOM_NOTIFY lock is released by
+#the SHD process.
+
+TEST ta_start_brick_process brick0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/a.txt
+EXPECT_WITHIN $HEAL_TIMEOUT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.$V0-ta-2
+
+#Kill the previously up brick and try reading from other brick. Since the heal
+#has happened file content should be same.
+TEST ta_kill_brick brick1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1
+#Umount and mount to remove cached data.
+TEST umount $M0
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT "Hello" cat $M0/a.txt
+cleanup;
diff --git a/tests/basic/afr/ta-write-on-bad-brick.t b/tests/basic/afr/ta-write-on-bad-brick.t
new file mode 100644
index 00000000000..096ca9f47cf
--- /dev/null
+++ b/tests/basic/afr/ta-write-on-bad-brick.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+cleanup;
+TEST ta_create_brick_and_volfile brick0
+TEST ta_create_brick_and_volfile brick1
+TEST ta_create_ta_and_volfile ta
+TEST ta_start_brick_process brick0
+TEST ta_start_brick_process brick1
+TEST ta_start_ta_process ta
+
+TEST ta_create_mount_volfile brick0 brick1 ta
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta
+
+TEST touch $M0/a.txt
+TEST ls $B0/brick0/a.txt
+TEST ls $B0/brick1/a.txt
+TEST ! ls $B0/ta/a.txt
+
+TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5
+
+#Good Data brick is down. TA and bad brick are UP
+
+TEST ta_kill_brick brick1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1
+TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5
+TEST ta_kill_brick brick0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 0
+TEST ta_start_brick_process brick1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1
+TEST ! dd if=/dev/zero of=$M0/a.txt bs=1M count=5
+
+# Good Data brick is UP. Bad and TA are down
+TEST ta_kill_brick brick1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ta_mount_child_up_status $M0 $V0 1
+TEST ta_start_brick_process brick0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0
+TEST ta_kill_brick ta
+TEST ! dd if=/dev/zero of=$M0/a.txt bs=1M count=5
+
+# Good and Bad data bricks are UP. TA is down
+TEST ta_start_brick_process brick1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_mount_child_up_status $M0 $V0 0
+TEST dd if=/dev/zero of=$M0/a.txt bs=1M count=5
+
+cleanup;
diff --git a/tests/basic/afr/ta.t b/tests/basic/afr/ta.t
new file mode 100644
index 00000000000..05d48431c95
--- /dev/null
+++ b/tests/basic/afr/ta.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+#Self-heal tests
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+cleanup;
+TEST ta_create_brick_and_volfile brick0
+TEST ta_create_brick_and_volfile brick1
+TEST ta_create_ta_and_volfile ta
+TEST ta_start_brick_process brick0
+TEST ta_start_brick_process brick1
+TEST ta_start_ta_process ta
+
+TEST ta_create_mount_volfile brick0 brick1 ta
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta
+
+TEST touch $M0/a.txt
+TEST ls $B0/brick0/a.txt
+TEST ls $B0/brick1/a.txt
+TEST ! ls $B0/ta/a.txt
+
+TEST ta_kill_brick brick0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+TEST touch $M0/b.txt
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1
+EXPECT "000000010000000200000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/brick1/b.txt
+#New entry mark lead to pending data on the file and on ta
+EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.patchy-ta-2
+TEST ! ls $B0/brick0/b.txt
+TEST ls $B0/brick1/b.txt
+
+#Try to create an entry while good brick is down and bad brick is UP. Should not create
+TEST ta_start_brick_process brick0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+TEST ta_kill_brick brick1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 1
+TEST ! touch $M0/d.txt
+EXPECT "000000010000000100000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/ta/trusted.afr.patchy-ta-2
+
+TEST ta_start_brick_process brick1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+TEST ta_kill_brick brick0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" afr_child_up_status_meta $M0 $V0-replicate-0 0
+
+TEST ta_kill_brick ta
+# Entry create must fail if only one brick is UP, even if that is a good brick.
+TEST ! touch $M0/c.txt
+TEST ! ls $B0/brick0/c.txt
+TEST ! ls $B0/brick1/c.txt
+
+cleanup;
diff --git a/tests/basic/afr/tarissue.t b/tests/basic/afr/tarissue.t
new file mode 100644
index 00000000000..83f7463130c
--- /dev/null
+++ b/tests/basic/afr/tarissue.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+TESTS_EXPECTED_IN_LOOP=10
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+
+#Create a distributed-replicate volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..6};
+TEST $CLI volume set $V0 cluster.consistent-metadata on
+TEST $CLI volume set $V0 cluster.post-op-delay-secs 0
+TEST $CLI volume set $V0 nfs.rdirplus off
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+# Mount NFS
+mount_nfs $H0:/$V0 $N0 vers=3
+
+#Create files
+TEST mkdir -p $N0/nfs/dir1/dir2
+for i in {1..10}; do
+ TEST_IN_LOOP dd if=/dev/urandom of=$N0/nfs/dir1/dir2/file$i bs=1024k count=1
+done
+TEST tar cvf /tmp/dir1.tar.gz $N0/nfs/dir1
+
+TEST rm -f /tmp/dir1.tar.gz
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+cleanup;
diff --git a/tests/basic/all_squash.t b/tests/basic/all_squash.t
new file mode 100644
index 00000000000..29766c50af7
--- /dev/null
+++ b/tests/basic/all_squash.t
@@ -0,0 +1,74 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+# random uid/gid
+uid=22162
+gid=5845
+
+TEST $CLI volume set $V0 server.anonuid $uid;
+TEST $CLI volume set $V0 server.anongid $gid;
+
+# Ensure server.all-squash is disabled
+TEST $CLI volume set $V0 server.all-squash disable;
+
+# Tests for the fuse mount
+mkdir $M0/other;
+chown $uid:$gid $M0/other;
+
+TEST $CLI volume set $V0 server.all-squash enable;
+
+touch $M0/file 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $M0/dir 2>/dev/null;
+TEST [ $? -ne 0 ]
+
+TEST touch $M0/other/file 2>/dev/null;
+TEST [ "$(stat -c %u:%g $M0/other/file)" = "$uid:$gid" ];
+TEST mkdir $M0/other/dir 2>/dev/null;
+TEST [ "$(stat -c %u:%g $M0/other/dir)" = "$uid:$gid" ];
+
+TEST $CLI volume set $V0 server.all-squash disable;
+TEST rm -rf $M0/other;
+
+sleep 1;
+
+# tests for nfs mount
+mkdir $N0/other;
+chown $uid:$gid $N0/other;
+
+TEST $CLI volume set $V0 server.all-squash enable;
+
+touch $N0/file 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $N0/dir 2>/dev/null;
+TEST [ $? -ne 0 ]
+
+TEST touch $N0/other/file 2>/dev/null;
+TEST [ "$(stat -c %u:%g $N0/other/file)" = "$uid:$gid" ];
+TEST mkdir $N0/other/dir 2>/dev/null;
+TEST [ "$(stat -c %u:%g $N0/other/dir)" = "$uid:$gid" ];
+
+TEST $CLI volume set $V0 server.all-squash disable;
+TEST rm -rf $N0/other;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/basic/cdc.t b/tests/basic/cdc.t
new file mode 100755
index 00000000000..8653a77207a
--- /dev/null
+++ b/tests/basic/cdc.t
@@ -0,0 +1,148 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+function file_mime_type () {
+ mime_type=$(file --mime $1 2>/dev/null | sed '/^[^:]*: /s///')
+ echo $mime_type
+}
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a volume with one brick
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '1' brick_count $V0
+
+## Turn off performance translators
+## This is required for testing readv calls
+TEST $CLI volume set $V0 performance.io-cache off
+EXPECT 'off' volinfo_field $V0 'performance.io-cache'
+TEST $CLI volume set $V0 performance.quick-read off
+EXPECT 'off' volinfo_field $V0 'performance.quick-read'
+
+TEST $CLI volume set $V0 performance.strict-write-ordering on
+EXPECT 'on' volinfo_field $V0 'performance.strict-write-ordering'
+
+## Turn on cdc xlator by setting network.compression to on
+TEST $CLI volume set $V0 network.compression on
+EXPECT 'on' volinfo_field $V0 'network.compression'
+
+## Make sure that user cannot change network.compression.mode
+## This would break the cdc xlator if allowed!
+TEST ! $CLI volume set $V0 network.compression.mode client
+
+## Turn on network.compression.debug option
+## This will dump compressed data onto disk as gzip file
+## This is used to check if compression actually happened
+TEST $CLI volume set $V0 network.compression.debug on
+EXPECT 'on' volinfo_field $V0 'network.compression.debug'
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+sleep 2
+## Mount FUSE with caching disabled
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+####################
+## Testing writev ##
+####################
+
+## Create a 1K file locally and find the md5sum
+TEST dd if=/dev/zero of=/tmp/cdc-orig count=1 bs=1k 2>/dev/null
+checksum[original-file]=`md5sum /tmp/cdc-orig | cut -d' ' -f1`
+
+## Copy the file to mountpoint and find its md5sum on brick
+TEST dd if=/tmp/cdc-orig of=$M0/cdc-server count=1 bs=1k 2>/dev/null
+checksum[brick-file]=`md5sum $B0/${V0}1/cdc-server | cut -d' ' -f1`
+
+## Uncompress the gzip dump file and find its md5sum
+# mime outputs for gzip are different for file version > 5.14
+TEST touch /tmp/gzipfile
+TEST gzip /tmp/gzipfile
+GZIP_MIME_TYPE=$(file_mime_type /tmp/gzipfile.gz)
+
+TEST rm -f /tmp/gzipfile.gz
+
+EXPECT "$GZIP_MIME_TYPE" echo $(file_mime_type /tmp/cdcdump.gz)
+
+TEST gunzip -f /tmp/cdcdump.gz
+checksum[dump-file-writev]=`md5sum /tmp/cdcdump | cut -d' ' -f1`
+
+## Check if all 3 checksums are same
+TEST test ${checksum[original-file]} = ${checksum[brick-file]}
+TEST test ${checksum[brick-file]} = ${checksum[dump-file-writev]}
+
+## Cleanup files
+TEST rm -f /tmp/cdcdump.gz
+
+###################
+## Testing readv ##
+###################
+
+## Copy file from mount point to client and find checksum
+TEST dd if=$M0/cdc-server of=/tmp/cdc-client count=1 bs=1k 2>/dev/null
+checksum[client-file]=`md5sum /tmp/cdc-client | cut -d' ' -f1`
+
+## Uncompress the gzip dump file and find its md5sum
+# mime outputs for gzip are different for file version > 5.14
+EXPECT "$GZIP_MIME_TYPE" echo $(file_mime_type /tmp/cdcdump.gz)
+
+TEST gunzip -f /tmp/cdcdump.gz
+checksum[dump-file-readv]=`md5sum /tmp/cdcdump | cut -d' ' -f1`
+
+## Check if all 3 checksums are same
+TEST test ${checksum[brick-file]} = ${checksum[client-file]}
+TEST test ${checksum[client-file]} = ${checksum[dump-file-readv]}
+
+## Cleanup files and unmount
+TEST rm -f /tmp/cdc* $M0/cdc*
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+## Stop the volume
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+## Turn on network.compression.min-size and set it to 100 bytes
+## Compression should not take place if file size
+## is less than 100 bytes
+TEST $CLI volume set $V0 network.compression.min-size 100
+EXPECT '100' volinfo_field $V0 'network.compression.min-size'
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE with caching disabled
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+## Create a file of size 99 bytes on mountpoint
+## This is should not be compressed
+TEST dd if=/dev/zero of=$M0/cdc-small count=1 bs=99 2>/dev/null
+TEST ! test -e /tmp/cdcdump.gz
+
+## Cleanup files and unmount
+TEST rm -f /tmp/cdc* $M0/cdc*
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+## Stop the volume
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+## Reset the network.compression options
+TEST $CLI volume reset $V0 network.compression.debug
+TEST $CLI volume reset $V0 network.compression.min-size
+TEST $CLI volume reset $V0 network.compression
+
+## Delete the volume
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/basic/changelog/changelog-api.t b/tests/basic/changelog/changelog-api.t
new file mode 100644
index 00000000000..516c2f2f60d
--- /dev/null
+++ b/tests/basic/changelog/changelog-api.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../env.rc
+
+cleanup;
+
+CHANGELOG_BIN_PATH=$(dirname $0)/../../utils/changelog
+build_tester $CHANGELOG_BIN_PATH/test-changelog-api.c -lgfchangelog
+
+CHANGELOG_PATH_0="$B0/${V0}0/.glusterfs/changelogs"
+ROLLOVER_TIME=2
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+sleep 3;
+
+#Listen to changelog journal notifcations
+$CHANGELOG_BIN_PATH/test-changelog-api &
+for i in {1..12};do echo "data" > $M0/file$i 2>/dev/null; sleep 1;done &
+
+#Wait for changelogs to be in .processed directory
+sleep 12
+
+EXPECT "Y" processed_changelogs "/tmp/scratch_v1/.processed"
+TEST rm $CHANGELOG_BIN_PATH/test-changelog-api
+rm -rf /tmp/scratch_v1
+
+cleanup;
diff --git a/tests/basic/changelog/changelog-history.t b/tests/basic/changelog/changelog-history.t
new file mode 100644
index 00000000000..ea952619652
--- /dev/null
+++ b/tests/basic/changelog/changelog-history.t
@@ -0,0 +1,91 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../env.rc
+
+cleanup;
+
+SCRIPT_TIMEOUT=300
+HISTORY_BIN_PATH=$(dirname $0)/../../utils/changelog
+build_tester $HISTORY_BIN_PATH/get-history.c -lgfchangelog
+
+time_before_enable1=$(date '+%s')
+CHANGELOG_PATH_0="$B0/${V0}0/.glusterfs/changelogs"
+ROLLOVER_TIME=2
+
+TEST glusterd
+TEST pidof glusterd
+
+sleep 3
+time_before_enable2=$(date '+%s')
+
+sleep 3
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $CLI volume start $V0
+
+sleep 3
+time_after_enable1=$(date '+%s')
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+touch $M0/file{1..10}
+
+sleep 3
+time_after_enable2=$(date '+%s')
+
+let time_future=time_after_enable2+600
+
+#Fails as start falls before changelog enable
+EXPECT "-3" $HISTORY_BIN_PATH/get-history $time_before_enable1 $time_before_enable2
+
+#Fails as start falls before changelog enable
+EXPECT "-3" $HISTORY_BIN_PATH/get-history $time_before_enable2 $time_after_enable1
+
+#Passes as start and end falls in same htime file
+EXPECT "0" $HISTORY_BIN_PATH/get-history $time_after_enable1 $time_after_enable2
+
+#Passes, gives the changelogs till continuous changelogs are available
+# but returns 1
+EXPECT "1" $HISTORY_BIN_PATH/get-history $time_after_enable2 $time_future
+
+#Disable and enable changelog
+TEST $CLI volume set $V0 changelog.changelog off
+sleep 6
+time_between_htime=$(date '+%s')
+sleep 6
+TEST $CLI volume set $V0 changelog.changelog on
+
+sleep 6
+touch $M0/test{1..10}
+time_in_sec_htime1=$(date '+%s')
+
+sleep 6
+touch $M0/test1{1..10}
+time_in_sec_htime2=$(date '+%s')
+
+sleep 3
+TEST $CLI volume set $V0 changelog.changelog off
+sleep 3
+time_after_disable=$(date '+%s')
+
+TEST $CLI volume set $V0 changelog.changelog on
+sleep 5
+
+#Passes, gives the changelogs till continuous changelogs are available
+# but returns 1
+EXPECT_WITHIN 10 "1" $HISTORY_BIN_PATH/get-history $time_after_enable1 $time_in_sec_htime2
+
+#Fails as start falls between htime files
+EXPECT_WITHIN 10 "-3" $HISTORY_BIN_PATH/get-history $time_between_htime $time_in_sec_htime1
+
+#Passes as start and end falls in same htime file
+EXPECT_WITHIN 10 "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime1 $time_in_sec_htime2
+
+#Passes, gives the changelogs till continuous changelogs are available
+EXPECT_WITHIN 10 "0" $HISTORY_BIN_PATH/get-history $time_in_sec_htime2 $time_after_disable
+
+TEST rm $HISTORY_BIN_PATH/get-history
+rm -rf /tmp/scratch_v1/*
+
+cleanup;
diff --git a/tests/basic/changelog/changelog-rename.t b/tests/basic/changelog/changelog-rename.t
new file mode 100644
index 00000000000..9a0ef527b5b
--- /dev/null
+++ b/tests/basic/changelog/changelog-rename.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+CHANGELOG_PATH_0="$B0/${V0}0/.glusterfs/changelogs"
+ROLLOVER_TIME=30
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+touch $M0/file1
+mv $M0/file1 $M0/rn_file1
+mkdir $M0/dir1
+mv $M0/dir1 $M0/rn_dir1
+
+EXPECT "2" check_changelog_op ${CHANGELOG_PATH_0} "RENAME"
+
+cleanup;
+
+#####Test on multiple subvolume#####
+#==========================================#
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+touch $M0/gluster_file
+mv $M0/gluster_file $M0/rn_gluster_file
+mkdir $M0/dir1
+mv $M0/dir1 $M0/rn_dir1
+
+EXPECT "2" check_changelog_op ${CHANGELOG_PATH_0} "RENAME"
+
+cleanup;
diff --git a/tests/basic/changelog/history-api.t b/tests/basic/changelog/history-api.t
new file mode 100644
index 00000000000..9e63118cef9
--- /dev/null
+++ b/tests/basic/changelog/history-api.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../env.rc
+
+cleanup;
+
+HISTORY_BIN_PATH=$(dirname $0)/../../utils/changelog
+build_tester $HISTORY_BIN_PATH/test-history-api.c -lgfchangelog
+
+CHANGELOG_PATH_0="$B0/${V0}0/.glusterfs/changelogs"
+ROLLOVER_TIME=2
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $CLI volume start $V0
+
+sleep 3
+start=$(date '+%s')
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+touch $M0/file{1..10}
+
+for i in {1..12};do echo "data" > $M0/file$i; sleep 1;done
+end=$(date '+%s')
+sleep 2
+
+#Passes as start and end falls in same htime file
+EXPECT "0" $HISTORY_BIN_PATH/test-history-api $start $end
+
+#Wait for changelogs to be in .processed directory
+sleep 2
+
+EXPECT "Y" processed_changelogs "/tmp/scratch_v1/.history/.processed"
+TEST rm $HISTORY_BIN_PATH/test-history-api
+rm -rf /tmp/scratch_v1
+
+cleanup;
diff --git a/tests/basic/cloudsync-sanity.t b/tests/basic/cloudsync-sanity.t
new file mode 100644
index 00000000000..834ba96430c
--- /dev/null
+++ b/tests/basic/cloudsync-sanity.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6,7,8,9};
+TEST $CLI volume set $V0 features.cloudsync enable;
+TEST $CLI volume start $V0;
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+# This test covers lookup, mkdir, mknod, symlink, link, rename,
+# create operations
+TEST $(dirname $0)/rpc-coverage.sh $M1
+
+
+TEST cp $(dirname ${0})/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+./glfsxmp $V0 $H0
+cleanup_tester ./glfsxmp
+rm ./glfsxmp.c
+
+cleanup;
diff --git a/tests/basic/ctime/ctime-ec-heal.t b/tests/basic/ctime/ctime-ec-heal.t
new file mode 100644
index 00000000000..142237c5014
--- /dev/null
+++ b/tests/basic/ctime/ctime-ec-heal.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+#
+# This will test self healing of ctime xattr 'trusted.glusterfs.mdata'
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{1..3}
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+
+# Create files
+mkdir $M0/dir1
+echo "Initial content" > $M0/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+# Kill brick
+TEST kill_brick $V0 $H0 $B0/${V0}3
+
+echo "B3 is down" >> $M0/file1
+echo "Change dir1 time attributes" > $M0/dir1/dir1_file1
+echo "Entry heal file" > $M0/entry_heal_file1
+mkdir $M0/entry_heal_dir1
+
+# Check xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
+
+TEST $CLI volume start $V0 force
+$CLI volume heal $V0
+
+# Check xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
+
+cleanup;
diff --git a/tests/basic/ctime/ctime-ec-rebalance.t b/tests/basic/ctime/ctime-ec-rebalance.t
new file mode 100644
index 00000000000..2b73bcdd103
--- /dev/null
+++ b/tests/basic/ctime/ctime-ec-rebalance.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# This will test healing of ctime xattr 'trusted.glusterfs.mdata' after add-brick and rebalance
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+# Create files
+mkdir $M0/dir1
+echo "test data" > $M0/dir1/file1
+
+# Add brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..8}
+
+#Trigger rebalance
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+#Verify ctime xattr heal on directory
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}6/dir1"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}7/dir1"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}8/dir1"
+
+b6_mdata=$(get_mdata "$B0/${V0}6/dir1")
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}7/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}8/dir1
+
+cleanup;
diff --git a/tests/basic/ctime/ctime-glfs-init.c b/tests/basic/ctime/ctime-glfs-init.c
new file mode 100644
index 00000000000..e4f197b8f30
--- /dev/null
+++ b/tests/basic/ctime/ctime-glfs-init.c
@@ -0,0 +1,68 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ glfs_t *fs = NULL;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile_server failed");
+ goto err;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto err;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto err;
+ }
+
+ glfs_fini(fs);
+ fs = NULL;
+ return 0;
+err:
+ glfs_fini(fs);
+ fs = NULL;
+
+ return -1;
+}
diff --git a/tests/basic/ctime/ctime-glfs-init.t b/tests/basic/ctime/ctime-glfs-init.t
new file mode 100644
index 00000000000..56d7d6caee0
--- /dev/null
+++ b/tests/basic/ctime/ctime-glfs-init.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{1,2,3};
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0;
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/ctime-glfs-init.c -lgfapi -lpthread
+
+TEST ./$(dirname $0)/ctime-glfs-init ${H0} $V0 $logdir/ctime-glfs-init.log
+
+cleanup_tester $(dirname $0)/ctime-glfs-init
+
+cleanup;
+
diff --git a/tests/basic/ctime/ctime-heal-symlinks.t b/tests/basic/ctime/ctime-heal-symlinks.t
new file mode 100644
index 00000000000..547b1807e94
--- /dev/null
+++ b/tests/basic/ctime/ctime-heal-symlinks.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+###############################################################################
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+TEST "echo hello_world > FILE"
+TEST ln -s FILE SOFTLINK
+
+# Remove symlink only (not the .glusterfs entry) and trigger named heal.
+TEST rm -f $B0/${V0}2/SOFTLINK
+TEST stat SOFTLINK
+
+# To heal and clear new-entry mark on source bricks.
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT 2 stat -c %h $B0/${V0}2/SOFTLINK
+EXPECT "hello_world" cat $B0/${V0}2/SOFTLINK
+
+cd -
+cleanup
+###############################################################################
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+TEST "echo hello_world > FILE"
+TEST ln -s FILE SOFTLINK
+
+# Remove symlink only (not the .glusterfs entry) and trigger named heal.
+TEST rm -f $B0/${V0}2/SOFTLINK
+TEST stat SOFTLINK
+
+# To heal and clear new-entry mark on source bricks.
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT 2 stat -c %h $B0/${V0}2/SOFTLINK
+TEST kill_brick $V0 $H0 $B0/${V0}0
+cd -
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+cd $M0
+EXPECT "hello_world" cat SOFTLINK
+
+cd -
+cleanup
+###############################################################################
diff --git a/tests/basic/ctime/ctime-mdata-legacy-files.t b/tests/basic/ctime/ctime-mdata-legacy-files.t
new file mode 100644
index 00000000000..2e782d5c99d
--- /dev/null
+++ b/tests/basic/ctime/ctime-mdata-legacy-files.t
@@ -0,0 +1,83 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+###############################################################################
+#Replica volume
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+#Disable ctime and create file, file doesn't contain "trusted.glusterfs.mdata" xattr
+TEST $CLI volume set $V0 ctime off
+
+TEST "mkdir $M0/DIR"
+TEST "echo hello_world > $M0/DIR/FILE"
+
+#Verify absence of xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR/FILE"
+
+#Enable ctime
+TEST $CLI volume set $V0 ctime on
+sleep 3
+TEST stat $M0/DIR/FILE
+
+#Verify presence "trusted.glusterfs.mdata" xattr on backend
+#The lookup above should have created xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}0/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}1/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}2/DIR/FILE"
+
+###############################################################################
+#Disperse Volume
+
+TEST $CLI volume create $V1 disperse 3 redundancy 1 $H0:$B0/${V1}{0,1,2}
+TEST $CLI volume set $V1 performance.stat-prefetch off
+TEST $CLI volume start $V1
+
+TEST glusterfs --volfile-id=$V1 --volfile-server=$H0 --entry-timeout=0 $M1;
+
+#Disable ctime and create file, file doesn't contain "trusted.glusterfs.mdata" xattr
+TEST $CLI volume set $V1 ctime off
+TEST "mkdir $M1/DIR"
+TEST "echo hello_world > $M1/DIR/FILE"
+
+#Verify absence of xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "" check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR/FILE"
+
+#Enable ctime
+TEST $CLI volume set $V1 ctime on
+sleep 3
+TEST stat $M1/DIR/FILE
+
+#Verify presence "trusted.glusterfs.mdata" xattr on backend
+#The lookup above should have created xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}0/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}1/DIR/FILE"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V1}2/DIR/FILE"
+
+cleanup;
+###############################################################################
diff --git a/tests/basic/ctime/ctime-noatime.t b/tests/basic/ctime/ctime-noatime.t
new file mode 100644
index 00000000000..609ccbd72c1
--- /dev/null
+++ b/tests/basic/ctime/ctime-noatime.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+function atime_compare {
+ local atime=$1
+ local file_name=$2
+ local atime1=$(stat -c "%X" $file_name)
+
+ if [ $atime == $atime1 ]
+ then
+ echo "0"
+ else
+ echo "1"
+ fi
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.read-after-open off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+TEST "echo hello_world > FILE"
+atime1=$(stat -c "%X" FILE)
+
+TEST "cat FILE > /dev/null"
+EXPECT "0" atime_compare $atime1 FILE
+
+sleep 1
+
+TEST $CLI volume set $V0 noatime off
+TEST "cat FILE > /dev/null"
+EXPECT "1" atime_compare $atime1 FILE
+
+cd -
+cleanup
diff --git a/tests/basic/ctime/ctime-readdir.c b/tests/basic/ctime/ctime-readdir.c
new file mode 100644
index 00000000000..8760db29ae8
--- /dev/null
+++ b/tests/basic/ctime/ctime-readdir.c
@@ -0,0 +1,29 @@
+#include <stdio.h>
+#include <dirent.h>
+#include <string.h>
+#include <assert.h>
+
+int
+main(int argc, char **argv)
+{
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ int ret = 0;
+ char *path = NULL;
+
+ assert(argc == 2);
+ path = argv[1];
+
+ dir = opendir(path);
+ if (!dir) {
+ printf("opendir(%s) failed.\n", path);
+ return -1;
+ }
+
+ while ((entry = readdir(dir)) != NULL) {
+ }
+ if (dir)
+ closedir(dir);
+
+ return ret;
+}
diff --git a/tests/basic/ctime/ctime-readdir.t b/tests/basic/ctime/ctime-readdir.t
new file mode 100644
index 00000000000..4564fc1b667
--- /dev/null
+++ b/tests/basic/ctime/ctime-readdir.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{1,2,3};
+TEST $CLI volume set $V0 performance.stat-prefetch on
+TEST $CLI volume set $V0 performance.readdir-ahead off
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+TEST mkdir $M0/dir0
+TEST "echo hello_world > $M0/dir0/FILE"
+
+ctime1=$(stat -c %Z $M0/dir0/FILE)
+echo "Mount change time: $ctime1"
+
+sleep 2
+
+#Write to back end directly to modify ctime of backend file
+TEST "echo write_from_backend >> $B0/brick1/dir0/FILE"
+TEST "echo write_from_backend >> $B0/brick2/dir0/FILE"
+TEST "echo write_from_backend >> $B0/brick3/dir0/FILE"
+echo "Backend change time"
+echo "brick1: $(stat -c %Z $B0/brick1/dir0/FILE)"
+echo "brick2: $(stat -c %Z $B0/brick2/dir0/FILE)"
+echo "brick3: $(stat -c %Z $B0/brick3/dir0/FILE)"
+
+#Stop and start to hit the case of no inode for readdir
+TEST umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+TEST build_tester $(dirname $0)/ctime-readdir.c
+
+#Do readdir
+TEST ./$(dirname $0)/ctime-readdir $M0/dir0
+
+EXPECT "$ctime1" stat -c %Z $M0/dir0/FILE
+echo "Mount change time after readdir $(stat -c %Z $M0/dir0/FILE)"
+
+cleanup_tester $(dirname $0)/ctime-readdir
+
+cleanup;
diff --git a/tests/basic/ctime/ctime-rep-heal.t b/tests/basic/ctime/ctime-rep-heal.t
new file mode 100644
index 00000000000..20517c74971
--- /dev/null
+++ b/tests/basic/ctime/ctime-rep-heal.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+#
+# This will test self healing of ctime xattr 'trusted.glusterfs.mdata'
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+
+# Create files
+mkdir $M0/dir1
+echo "Initial content" > $M0/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+# Kill brick
+TEST kill_brick $V0 $H0 $B0/${V0}3
+
+echo "B3 is down" >> $M0/file1
+echo "Change dir1 time attributes" > $M0/dir1/dir1_file1
+echo "Entry heal file" > $M0/entry_heal_file1
+mkdir $M0/entry_heal_dir1
+
+# Check xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '2' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
+
+TEST $CLI volume start $V0 force
+$CLI volume heal $V0
+
+# Check xattr
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/dir1/dir1_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/dir1/dir1_file1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_file1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '3' get_mdata_count $B0/${V0}{1..3}/entry_heal_dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '1' get_mdata_uniq_count $B0/${V0}{1..3}/entry_heal_dir1
+
+cleanup;
diff --git a/tests/basic/ctime/ctime-rep-rebalance.t b/tests/basic/ctime/ctime-rep-rebalance.t
new file mode 100644
index 00000000000..866cf87e6cb
--- /dev/null
+++ b/tests/basic/ctime/ctime-rep-rebalance.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+#
+# This will test healing of ctime xattr 'trusted.glusterfs.mdata' after add-brick and rebalance
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+
+# Create files
+mkdir $M0/dir1
+
+# Add brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..8}
+
+#Trigger rebalance
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+#Verify ctime xattr heal on directory
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}6/dir1"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}7/dir1"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.glusterfs.mdata' check_for_xattr 'trusted.glusterfs.mdata' "$B0/${V0}8/dir1"
+
+b6_mdata=$(get_mdata "$B0/${V0}6/dir1")
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}7/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "${b6_mdata}" get_mdata $B0/${V0}8/dir1
+
+cleanup;
diff --git a/tests/basic/ctime/ctime-utimesat.t b/tests/basic/ctime/ctime-utimesat.t
new file mode 100644
index 00000000000..540e57aec83
--- /dev/null
+++ b/tests/basic/ctime/ctime-utimesat.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.read-after-open off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+touch $M0/FILE
+
+atime=$(stat -c "%.X" $M0/FILE)
+EXPECT $atime stat -c "%.Y" $M0/FILE
+EXPECT $atime stat -c "%.Z" $M0/FILE
+
+cleanup
diff --git a/tests/basic/distribute/brick-down.t b/tests/basic/distribute/brick-down.t
new file mode 100644
index 00000000000..522ccc07210
--- /dev/null
+++ b/tests/basic/distribute/brick-down.t
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../common-utils.rc
+. $(dirname $0)/../../dht.rc
+
+# Test 1 overview:
+# ----------------
+# Test whether lookups are sent after a brick comes up again
+#
+# 1. Create a 3 brick pure distribute volume
+# 2. Fuse mount the volume so the layout is set on the root
+# 3. Kill one brick and try to create a directory which hashes to that brick.
+# It should fail with EIO.
+# 4. Restart the brick that was killed.
+# 5. Do not remount the volume. Try to create the same directory as in step 3.
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0-{1..3}
+TEST $CLI volume start $V0
+
+# We want the lookup to reach DHT
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+# Mount using FUSE and lookup the mount so a layout is set on the brick root
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+ls $M0/
+
+TEST mkdir $M0/level1
+
+# Find a dirname that will hash to the brick we are going to kill
+hashed=$V0-client-1
+TEST dht_first_filename_with_hashsubvol "$hashed" $M0 "dir-"
+roottestdir=$fn_return_val
+
+hashed=$V0-client-1
+TEST dht_first_filename_with_hashsubvol "$hashed" $M0/level1 "dir-"
+level1testdir=$fn_return_val
+
+
+TEST kill_brick $V0 $H0 $B0/$V0-2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-2
+
+TEST $CLI volume status $V0
+
+
+# Unmount and mount the volume again so dht has an incomplete in memory layout
+
+umount -f $M0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+
+mkdir $M0/$roottestdir
+TEST [ $? -ne 0 ]
+
+mkdir $M0/level1/$level1testdir
+TEST [ $? -ne 0 ]
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-2
+
+#$CLI volume status
+
+# It takes a while for the client to reconnect to the brick
+sleep 5
+
+
+mkdir $M0/$roottestdir
+TEST [ $? -eq 0 ]
+
+mkdir $M0/$level1/level1testdir
+TEST [ $? -eq 0 ]
+
+# Cleanup
+cleanup
+
+
diff --git a/tests/basic/distribute/bug-1265677-use-readdirp.t b/tests/basic/distribute/bug-1265677-use-readdirp.t
new file mode 100644
index 00000000000..eef8affc8b9
--- /dev/null
+++ b/tests/basic/distribute/bug-1265677-use-readdirp.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks use-readdirp disable/enable for dht
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0..1}
+TEST $CLI volume set $V0 nfs.disable yes
+TEST $CLI volume set $V0 dht.force-readdirp yes
+TEST $CLI volume set $V0 performance.readdir-ahead off
+TEST $CLI volume set $V0 performance.force-readdirp no
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --use-readdirp=no $M0;
+TEST mkdir $M0/d
+TEST touch $M0/d/{1..10}
+
+TEST $CLI volume profile $V0 start
+#Clear all the fops till now
+TEST $CLI volume profile $V0 info
+
+EXPECT "^10$" echo $(ls $M0/d | wc -l)
+EXPECT_NOT "^0$" echo $($CLI volume profile $V0 info incremental | grep -w READDIRP | wc -l)
+EXPECT "^10$" echo $(ls $M0/d | wc -l)
+EXPECT "^0$" echo $($CLI volume profile $V0 info incremental | grep -w READDIR | wc -l)
+
+TEST $CLI volume set $V0 dht.force-readdirp no
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-dht use-readdirp
+
+EXPECT "^10$" echo $(ls $M0/d | wc -l)
+EXPECT "^0$" echo $($CLI volume profile $V0 info incremental | grep -w READDIRP | wc -l)
+EXPECT "^10$" echo $(ls $M0/d | wc -l)
+EXPECT_NOT "^0$" echo $($CLI volume profile $V0 info incremental | grep -w READDIR | wc -l)
+
+cleanup
diff --git a/tests/basic/distribute/debug-xattrs.t b/tests/basic/distribute/debug-xattrs.t
new file mode 100644
index 00000000000..6d87c0e8671
--- /dev/null
+++ b/tests/basic/distribute/debug-xattrs.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../common-utils.rc
+
+# Test overview: Test the virtual xattrs dht provides for debugging
+
+# Test 1 : "dht.file.hashed-subvol.<filename>"
+# Get the hashed subvolume for file1 in dir1 using xattr
+# Create file1 in dir1
+# Check if the file is created in the brick returned by xattr
+
+hashdebugxattr="dht.file.hashed-subvol."
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0-{0..3}
+TEST $CLI volume start $V0
+
+# Mount using FUSE and create a file
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+# Test 1 : "dht.file.hashed-subvol.<filename>"
+# Get the hashed subvolume for file1 in dir1 using xattr
+# Create file1 in dir1
+# Check if the file is created in the brick returned by xattr
+# Create a directory on $M0
+
+TEST mkdir $M0/dir1
+
+xattrname=$hashdebugxattr"file1"
+
+hashed=$(getfattr --only-values -n "$xattrname" $M0/dir1)
+
+# Get the brick path for $hashed
+brickpath=$(cat "$M0/.meta/graphs/active/$hashed/options/remote-subvolume")
+brickpath=$brickpath"/dir1/file1"
+
+# Create the file for which we checked the xattr
+TEST touch $M0/dir1/file1
+TEST stat $brickpath
+
+# Non-existent directory
+TEST ! getfattr --only-values -n "$xattrname" $M0/dir2
+
+
+# Cleanup
+cleanup
+
diff --git a/tests/basic/distribute/dir-heal.t b/tests/basic/distribute/dir-heal.t
new file mode 100644
index 00000000000..851f765b245
--- /dev/null
+++ b/tests/basic/distribute/dir-heal.t
@@ -0,0 +1,145 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../common-utils.rc
+
+# Test 1 overview:
+# ----------------
+#
+# 1. Kill one brick of the volume.
+# 2. Create directories and change directory properties.
+# 3. Bring up the brick and access the directory
+# 4. Check the permissions and xattrs on the backend
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0-{1..3}
+TEST $CLI volume start $V0
+
+# We want the lookup to reach DHT
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+# Mount using FUSE , kill a brick and create directories
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+ls $M0/
+cd $M0
+
+TEST kill_brick $V0 $H0 $B0/$V0-1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-1
+
+TEST mkdir dir{1..4}
+
+# No change for dir1
+# Change permissions for dir2
+# Set xattr on dir3
+# Change permissions and set xattr on dir4
+
+TEST chmod 777 $M0/dir2
+
+TEST setfattr -n "user.test" -v "test" $M0/dir3
+
+TEST chmod 777 $M0/dir4
+TEST setfattr -n "user.test" -v "test" $M0/dir4
+
+
+# Start all bricks
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-1
+
+#$CLI volume status
+
+# It takes a while for the client to reconnect to the brick
+sleep 5
+
+stat $M0/dir* > /dev/null
+
+# Check that directories have been created on the brick that was killed
+
+TEST ls $B0/$V0-1/dir1
+
+TEST ls $B0/$V0-1/dir2
+EXPECT "777" stat -c "%a" $B0/$V0-1/dir2
+
+TEST ls $B0/$V0-1/dir3
+EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir3
+
+
+TEST ls $B0/$V0-1/dir4
+EXPECT "777" stat -c "%a" $B0/$V0-1/dir4
+EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir4
+
+
+TEST rm -rf $M0/*
+
+cd
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+
+# Test 2 overview:
+# ----------------
+# 1. Create directories with all bricks up.
+# 2. Kill a brick and change directory properties and set user xattr.
+# 2. Bring up the brick and access the directory
+# 3. Check the permissions and xattrs on the backend
+
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+ls $M0/
+cd $M0
+TEST mkdir dir{1..4}
+
+TEST kill_brick $V0 $H0 $B0/$V0-1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" brick_up_status $V0 $H0 $B0/$V0-1
+
+# No change for dir1
+# Change permissions for dir2
+# Set xattr on dir3
+# Change permissions and set xattr on dir4
+
+TEST chmod 777 $M0/dir2
+
+TEST setfattr -n "user.test" -v "test" $M0/dir3
+
+TEST chmod 777 $M0/dir4
+TEST setfattr -n "user.test" -v "test" $M0/dir4
+
+
+# Start all bricks
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/$V0-1
+
+#$CLI volume status
+
+# It takes a while for the client to reconnect to the brick
+sleep 5
+
+stat $M0/dir* > /dev/null
+
+# Check directories on the brick that was killed
+
+TEST ls $B0/$V0-1/dir2
+EXPECT "777" stat -c "%a" $B0/$V0-1/dir2
+
+TEST ls $B0/$V0-1/dir3
+EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir3
+
+
+TEST ls $B0/$V0-1/dir4
+EXPECT "777" stat -c "%a" $B0/$V0-1/dir4
+EXPECT "test" getfattr -n "user.test" --absolute-names --only-values $B0/$V0-1/dir4
+cd
+
+
+# Cleanup
+cleanup
+
diff --git a/tests/basic/distribute/file-create.t b/tests/basic/distribute/file-create.t
new file mode 100644
index 00000000000..41b662eefe2
--- /dev/null
+++ b/tests/basic/distribute/file-create.t
@@ -0,0 +1,120 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../common-utils.rc
+. $(dirname $0)/../../dht.rc
+
+# Test overview: Test file creation in various scenarios
+
+
+# Test 1 : "dht.file.hashed-subvol.<filename>"
+# Get the hashed subvolume for file1 in dir1 using xattr
+# Create file1 in dir1
+# Check if the file is created in the brick returned by xattr
+
+hashdebugxattr="dht.file.hashed-subvol."
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+# We want fixed size bricks to test min-free-disk
+
+# Create 2 loop devices, one per brick.
+TEST truncate -s 25M $B0/brick1
+TEST truncate -s 25M $B0/brick2
+
+TEST L1=`SETUP_LOOP $B0/brick1`
+TEST MKFS_LOOP $L1
+
+TEST L2=`SETUP_LOOP $B0/brick2`
+TEST MKFS_LOOP $L2
+
+
+TEST mkdir -p $B0/${V0}{1,2}
+
+TEST MOUNT_LOOP $L1 $B0/${V0}1
+TEST MOUNT_LOOP $L2 $B0/${V0}2
+
+
+# Create a plain distribute volume with 2 subvols.
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V0;
+EXPECT "Started" volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 cluster.min-free-disk 40%
+#TEST $CLI volume set $V0 client-log-level DEBUG
+
+# Mount using FUSE and create a file
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST mkdir $M0/dir1
+
+######################################################
+# Test 1 : Test file creation on correct hashed subvol
+######################################################
+
+hashed="$V0-client-0"
+TEST dht_first_filename_with_hashsubvol "$hashed" $M0/dir1 "big-"
+firstfile=$fn_return_val
+
+#Create a large file to fill up $hashed past the min-free-disk limits
+TEST dd if=/dev/zero of=$M0/dir1/$firstfile bs=1M count=15
+
+brickpath_0=$(cat "$M0/.meta/graphs/active/$hashed/options/remote-subvolume")
+brickpath_1=$(cat "$M0/.meta/graphs/active/$V0-client-1/options/remote-subvolume")
+
+TEST stat "$brickpath_0/dir1/$firstfile"
+EXPECT "0" is_dht_linkfile "$brickpath_0/dir1/$firstfile"
+
+
+######################################################
+# Test 2: Create a file which hashes to the subvol which has crossed
+# the min-free-disk limit. It should be created on the other subvol
+######################################################
+
+# DHT only checks disk usage every second. Create a new file and introduce a
+# delay here to ensure DHT updates the in memory disk usage
+sleep 2
+TEST dd if=/dev/zero of=$M0/dir1/file-2 bs=1024 count=1
+
+# Find a file that will hash to $hash_subvol
+TEST dht_first_filename_with_hashsubvol $hashed $M0/dir1 "newfile-"
+newfile=$fn_return_val
+echo $newfile
+
+# Create $newfile - it should be created on the other subvol as its hash subvol
+# has crossed the min-free-disk limit
+TEST dd if=/dev/zero of=$M0/dir1/$newfile bs=1024 count=20
+TEST stat "$brickpath_0/dir1/$newfile"
+EXPECT "1" is_dht_linkfile "$brickpath_0/dir1/$newfile"
+
+
+#TEST rm -rf $M0/dir1/$firstfile
+#TEST rm -rf $M0/dir1/$newfile
+
+
+######################################################
+# Test 3: Test dht_filter_loc_subvol_key
+######################################################
+
+TEST dht_first_filename_with_hashsubvol $V0-client-1 $M0/dir1 "filter-"
+newfile=$fn_return_val
+echo $newfile
+TEST dd if=/dev/zero of="$M0/dir1/$newfile@$V0-dht:$hashed" bs=1024 count=20
+TEST stat $M0/dir1/$newfile
+TEST stat "$brickpath_0/dir1/$newfile"
+EXPECT "1" is_dht_linkfile "$brickpath_1/dir1/$newfile"
+
+
+force_umount $M0
+TEST $CLI volume stop $V0
+UMOUNT_LOOP ${B0}/${V0}{1,2}
+rm -f ${B0}/brick{1,2}
+
+
+# Cleanup
+cleanup
+
diff --git a/tests/basic/distribute/file-rename.t b/tests/basic/distribute/file-rename.t
new file mode 100644
index 00000000000..63111b8ad8f
--- /dev/null
+++ b/tests/basic/distribute/file-rename.t
@@ -0,0 +1,1021 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../common-utils.rc
+
+# Test overview:
+# Test all combinations of src-hashed/src-cached/dst-hashed/dst-cached
+
+hashdebugxattr="dht.file.hashed-subvol."
+
+function get_brick_index {
+ local inpath=$1
+ brickroot=$(getfattr -m . -n trusted.glusterfs.pathinfo $inpath | tr ' ' '\n' | sed -n 's/<POSIX(\(.*\)):.*:.*>.*/\1/p')
+ echo ${brickroot:(-1)}
+}
+
+function get_brick_path_for_subvol {
+ local in_subvol=$1
+ local in_brickpath
+
+ in_brickpath=$(cat "$M0/.meta/graphs/active/$in_subvol/options/remote-subvolume")
+ echo $in_brickpath
+
+}
+
+#Checks that file exists only on hashed and/or cached
+function file_existence_check
+{
+ local in_file_path=$1
+ local in_hashed=$2
+ local in_cached=$3
+ local in_client_subvol
+ local in_brickpath
+ local ret
+
+ for i in {0..3}
+ do
+ in_client_subvol="$V0-client-$i"
+ in_brickpath=$(cat "$M0/.meta/graphs/active/$in_client_subvol/options/remote-subvolume")
+ stat "$in_brickpath/$in_file_path" 2>/dev/null
+ ret=$?
+ # Either the linkto or the data file must exist on the hashed
+ if [ "$in_client_subvol" == "$in_hashed" ]; then
+ if [ $ret -ne 0 ]; then
+ return 1
+ fi
+ continue
+ fi
+
+ # If the cached is non-null, we expect the file to exist on it
+ if [ "$in_client_subvol" == "$in_cached" ]; then
+ if [ $ret -ne 0 ]; then
+ return 1
+ fi
+ continue
+ fi
+
+ if [ $ret -eq 0 ]; then
+ return 2
+ fi
+ done
+ return 0
+}
+
+
+# Check if file exists on any of the bricks of the volume
+function file_does_not_exist
+{
+ local inpath=$1
+ for i in `seq 0 3`
+ do
+ file_path=$B0/$V0-$i/$inpath
+ if [ -f "$file_path" ]; then
+ echo "1"
+ return 1
+ fi
+ done
+ return 0
+}
+
+
+# Input: filename dirpath
+function get_hash_subvol
+{
+ hash_subvol=$(getfattr --only-values -n "$hashdebugxattr$1" $2 2>/dev/null)
+}
+
+
+
+# Find the first filename that hashes to a subvol
+# other than $1
+
+function first_filename_with_diff_hashsubvol
+{
+ local in_subvol=$1
+ local in_path=$2
+ local file_pattern=$3
+ local in_hash_subvol
+
+ for i in {1..100}
+ do
+ dstfilename="$file_pattern$i"
+ in_hash_subvol=$(get_hash_subvol "$dstfilename" "$in_path")
+ echo $in_hash_subvol
+ if [ "$in_subvol" != "$in_hash_subvol" ]; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+# Find the first filename that hashes to the same subvol
+# as $1
+function first_filename_with_same_hashsubvol
+{
+ local in_subvol=$1
+ local in_path=$2
+ local in_hash_subvol
+ local file_pattern=$3
+
+ for i in {1..100}
+ do
+ dstfilename="$file_pattern$i"
+ get_hash_subvol "$dstfilename" "$in_path"
+ in_hash_subvol=$hash_subvol
+# echo $in_hash_subvol
+ if [ "$in_subvol" == "$in_hash_subvol" ]; then
+ return 0
+ fi
+ done
+ return 1
+}
+
+function file_is_linkto
+{
+ local brick_filepath=$1
+
+ test=$(stat $brick_filepath 2>&1)
+ if [ $? -ne 0 ]; then
+ echo "2"
+ return
+ fi
+
+ test=$(getfattr -n trusted.glusterfs.dht.linkto -e text $brick_filepath 2>&1)
+
+ if [ $? -eq 0 ]; then
+ echo "1"
+ else
+ echo "0"
+ fi
+}
+
+
+
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+
+# We need at least 4 bricks to test all combinations of hashed and
+# cached files
+
+TEST $CLI volume create $V0 $H0:$B0/$V0-{0..3}
+TEST $CLI volume start $V0
+
+# Mount using FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+
+################################################################
+# The first set of tests are those where the Dst file does not exist
+# dst-cached = NULL
+#
+###############################################################
+
+################### Test 1 ####################################
+#
+# src-hashed = src-cached = dst-hashed
+# dst-cached = null
+# src-file = src-1
+
+echo " **** Test 1 **** "
+
+src_file="src-1"
+
+TEST mkdir $M0/test-1
+TEST touch $M0/test-1/$src_file
+
+TEST get_hash_subvol $src_file $M0/test-1
+src_hashed=$hash_subvol
+#echo "Hashed subvol for $src_file: " $src_hashed
+
+# Find a file name that hashes to the same subvol as $src_file
+TEST first_filename_with_same_hashsubvol "$src_hashed" "$M0/test-1" "dst-"
+#echo "dst-file name: " $dstfilename
+dst_hashed=$src_hashed
+
+src_hash_brick=$(get_brick_path_for_subvol $src_hashed)
+
+echo "Renaming $src_file to $dstfilename"
+
+TEST mv $M0/test-1/$src_file $M0/test-1/$dstfilename
+
+# Expected:
+# dst file is accessible from the mount point
+# dst file exists only on the hashed brick.
+# no linkto files on any bricks
+# src files do not exist
+
+
+TEST stat $M0/test-1/$dstfilename 2>/dev/null
+TEST file_existence_check test-1/$dstfilename $src_hashed
+TEST file_does_not_exist test-1/$src_file
+EXPECT "0" file_is_linkto $src_hash_brick/test-1/$dstfilename
+
+
+################### Test 2 ####################################
+
+# src-hashed = src-cached != dst-hashed
+# dst-cached = null
+
+echo " **** Test 2 **** "
+
+src_file="src-1"
+
+TEST mkdir $M0/test-2
+TEST touch $M0/test-2/$src_file
+
+TEST get_hash_subvol $src_file $M0/test-2
+src_hashed=$hash_subvol
+#echo "Hashed subvol for $src_file: " $src_hashed
+
+# Find a file name that hashes to a diff hashed subvol than $src_file
+TEST first_filename_with_diff_hashsubvol "$src_hashed" "$M0/test-2" "dst-"
+echo "dst-file name: " $dstfilename
+TEST get_hash_subvol $dstfilename $M0/test-2
+dst_hashed=$hash_subvol
+
+src_hash_brick=$(get_brick_path_for_subvol $src_hashed)
+dst_hash_brick=$(get_brick_path_for_subvol $dst_hashed)
+
+echo "Renaming $src_file to $dstfilename"
+
+TEST mv $M0/test-2/$src_file $M0/test-2/$dstfilename
+
+
+# Expected:
+# dst file is accessible from the mount point
+# dst data file on src_hashed and dst linkto file on dst_hashed
+# src files do not exist
+
+
+TEST stat $M0/test-2/$dstfilename 2>/dev/null
+TEST file_existence_check test-2/$dstfilename $dst_hashed $src_hashed
+TEST file_does_not_exist test-2/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-2/$dstfilename
+EXPECT "0" file_is_linkto $src_hash_brick/test-2/$dstfilename
+
+################### Test 3 ####################################
+
+# src-hashed = dst-hashed != src-cached
+
+echo " **** Test 3 **** "
+
+src_file0="abc-1"
+
+# 1. Create src file with src_cached != src_hashed
+TEST mkdir $M0/test-3
+TEST touch $M0/test-3/$src_file0
+
+TEST get_hash_subvol $src_file0 $M0/test-3
+src_cached=$hash_subvol
+#echo "Hashed subvol for $src_file0: " $src_cached
+
+# Find a file name that hashes to a diff hashed subvol than $src_file0
+TEST first_filename_with_diff_hashsubvol "$src_cached" "$M0/test-3" "src-"
+echo "dst-file name: " $dstfilename
+src_file=$dstfilename
+
+TEST mv $M0/test-3/$src_file0 $M0/test-3/$src_file
+
+TEST get_hash_subvol $src_file $M0/test-3
+src_hashed=$hash_subvol
+
+
+# 2. Rename src to dst
+TEST first_filename_with_same_hashsubvol "$src_hashed" "$M0/test-3" "dst-"
+#echo "dst-file name: " $dstfilename
+
+src_hash_brick=$(get_brick_path_for_subvol $src_hashed)
+src_cached_brick=$(get_brick_path_for_subvol $src_cached)
+
+echo "Renaming $src_file to $dstfilename"
+
+TEST mv $M0/test-3/$src_file $M0/test-3/$dstfilename
+
+
+# Expected:
+# dst file is accessible from the mount point
+TEST stat $M0/test-3/$dstfilename 2>/dev/null
+
+# src file does not exist
+TEST file_does_not_exist test-3/$src_file
+
+# dst linkto file on src_hashed and dst data file on src_cached
+TEST file_existence_check test-3/$dstfilename $src_hashed $src_cached
+
+EXPECT "1" file_is_linkto $src_hash_brick/test-3/$dstfilename
+EXPECT "0" file_is_linkto $src_cached_brick/test-3/$dstfilename
+
+
+
+################### Test 4 ####################################
+
+# src-cached = dst-hashed != src-hashed
+
+echo " **** Test 4 **** "
+
+src_file0="abc-1"
+
+# 1. Create src file with src_cached != src_hashed
+TEST mkdir $M0/test-4
+TEST touch $M0/test-4/$src_file0
+
+TEST get_hash_subvol $src_file0 $M0/test-4
+src_cached=$hash_subvol
+#echo "Hashed subvol for $src_file0: " $src_cached
+
+# Find a file name that hashes to a diff hashed subvol than $src_file0
+TEST first_filename_with_diff_hashsubvol "$src_cached" "$M0/test-4" "src-"
+src_file=$dstfilename
+
+TEST mv $M0/test-4/$src_file0 $M0/test-4/$src_file
+
+TEST get_hash_subvol $src_file $M0/test-4
+src_hashed=$hash_subvol
+
+
+# 2. Rename src to dst
+TEST first_filename_with_same_hashsubvol "$src_cached" "$M0/test-4" "dst-"
+#echo "dst-file name: " $dstfilename
+
+src_hash_brick=$(get_brick_path_for_subvol $src_hashed)
+src_cached_brick=$(get_brick_path_for_subvol $src_cached)
+
+echo "Renaming $src_file to $dstfilename"
+
+TEST mv $M0/test-4/$src_file $M0/test-4/$dstfilename
+
+# Expected:
+# dst file is accessible from the mount point
+TEST stat $M0/test-4/$dstfilename 2>/dev/null
+
+# src file does not exist
+TEST file_does_not_exist test-4/$src_file
+
+# dst linkto file on src_hashed and dst data file on src_cached
+TEST file_existence_check test-4/$dstfilename $src_cached
+
+EXPECT "0" file_is_linkto $src_cached_brick/test-4/$dstfilename
+
+
+################### Test 5 ####################################
+
+# src-cached != src-hashed
+# src-hashed != dst-hashed
+# src-cached != dst-hashed
+
+
+echo " **** Test 5 **** "
+
+# 1. Create src and dst files
+
+TEST mkdir $M0/test-5
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-5" "abc-"
+src_file0=$dstfilename
+
+TEST touch $M0/test-5/$src_file0
+
+TEST get_hash_subvol $src_file0 $M0/test-5
+src_cached=$hash_subvol
+#echo "Hashed subvol for $src_file0: " $src_cached
+
+# Find a file name that hashes to a diff hashed subvol than $src_file0
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-5" "src-"
+src_file=$dstfilename
+
+TEST mv $M0/test-5/$src_file0 $M0/test-5/$src_file
+
+TEST get_hash_subvol $src_file $M0/test-5
+src_hashed=$hash_subvol
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-5" "dst-"
+#echo "dst-file name: " $dstfilename
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-2")
+src_cached_brick=$(get_brick_path_for_subvol $src_cached)
+
+
+# 2. Rename src to dst
+echo "Renaming $src_file to $dstfilename"
+
+TEST mv $M0/test-5/$src_file $M0/test-5/$dstfilename
+
+
+# 3. Validate
+
+# Expected:
+# dst file is accessible from the mount point
+TEST stat $M0/test-5/$dstfilename 2>/dev/null
+
+# src file does not exist
+TEST file_does_not_exist test-5/$src_file
+
+# dst linkto file on src_hashed and dst data file on src_cached
+
+EXPECT "0" file_is_linkto $src_cached_brick/test-5/$dstfilename
+EXPECT "1" file_is_linkto $dst_hash_brick/test-5/$dstfilename
+
+
+########################################################################
+#
+# The Dst file exists
+#
+########################################################################
+
+################### Test 6 ####################################
+
+# src_hash = src_cached
+# dst_hash = dst_cached
+# dst_hash = src_hash
+
+
+TEST mkdir $M0/test-6
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-6" "src-"
+src_file=$dstfilename
+
+TEST touch $M0/test-6/$src_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-6" "dst-"
+dst_file=$dstfilename
+
+TEST touch $M0/test-6/$dst_file
+
+
+# 2. Rename src to dst
+
+TEST mv $M0/test-6/$src_file $M0/test-6/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-6/$dst_file 2>/dev/null
+TEST file_existence_check test-6/$dst_file "$V0-client-0"
+TEST file_does_not_exist test-6/$src_file
+EXPECT "0" file_is_linkto $dst_hash_brick/test-6/$dst_file
+
+
+################### Test 7 ####################################
+
+# src_hash = src_cached
+# dst_hash = dst_cached
+# dst_hash != src_hash
+
+
+echo " **** Test 7 **** "
+
+TEST mkdir $M0/test-7
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-7" "src-"
+src_file=$dstfilename
+
+TEST touch $M0/test-7/$src_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-7" "dst-"
+dst_file=$dstfilename
+
+TEST touch $M0/test-7/$dst_file
+
+
+# 2. Rename src to dst
+
+TEST mv $M0/test-7/$src_file $M0/test-7/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-1")
+src_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-7/$dst_file 2>/dev/null
+TEST file_existence_check test-7/$dst_file "$V0-client-1" "$V0-client-0"
+TEST file_does_not_exist test-7/$src_file
+
+EXPECT "0" file_is_linkto $src_hash_brick/test-7/$dst_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-7/$dst_file
+
+
+################### Test 8 ####################################
+
+# src_hash = src_cached
+# dst_hash != dst_cached
+# dst_hash != src_hash
+# dst_cached != src_hash
+
+echo " **** Test 8 **** "
+
+TEST mkdir $M0/test-8
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-8" "src-"
+src_file=$dstfilename
+TEST touch $M0/test-8/$src_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-8" "dst0-"
+dst_file0=$dstfilename
+TEST touch $M0/test-8/$dst_file0
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-8" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-8/$dst_file0 $M0/test-8/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-8/$src_file $M0/test-8/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-2")
+src_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-8/$dst_file 2>/dev/null
+TEST file_existence_check test-8/$dst_file "$V0-client-2" "$V0-client-0"
+TEST file_does_not_exist test-8/$src_file
+
+EXPECT "0" file_is_linkto $src_hash_brick/test-8/$dst_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-8/$dst_file
+
+################### Test 9 ####################################
+
+# src_hash = src_cached = dst_hash
+# dst_hash != dst_cached
+
+echo " **** Test 9 **** "
+
+TEST mkdir $M0/test-9
+
+
+# 1. Create src and dst files
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-9" "src-"
+src_file=$dstfilename
+TEST touch $M0/test-9/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-9" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-9/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-9" "dst-"
+dst_file=$dstfilename
+
+TEST mv $M0/test-9/$dst0_file $M0/test-9/$dst_file
+
+# 2. Rename the file
+
+mv $M0/test-9/$src_file $M0/test-9/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-9/$dst_file 2>/dev/null
+TEST file_existence_check test-9/$dst_file "$V0-client-0"
+TEST file_does_not_exist test-9/$src_file
+EXPECT "0" file_is_linkto $dst_hash_brick/test-9/$dst_file
+
+
+################### Test 10 ####################################
+
+# src_hash = src_cached = dst_cached
+# dst_hash != dst_cached
+
+echo " **** Test 10 **** "
+
+TEST mkdir $M0/test-10
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-10" "src-"
+src_file=$dstfilename
+TEST touch $M0/test-10/$src_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-10" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-10/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-10" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-10/$dst0_file $M0/test-10/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-10/$src_file $M0/test-10/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-1")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-10/$dst_file 2>/dev/null
+TEST file_existence_check test-10/$dst_file "$V0-client-1" "$V0-client-0"
+TEST file_does_not_exist test-10/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-10/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-10/$dst_file
+
+
+################### Test 11 ####################################
+
+# src_hash != src_cached
+# dst_hash = dst_cached = src_cached
+
+echo " **** Test 11 **** "
+
+TEST mkdir $M0/test-11
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-11" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-11/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-11" "src-"
+src_file=$dstfilename
+
+mv $M0/test-11/$src0_file $M0/test-11/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-11" "dst-"
+dst_file=$dstfilename
+TEST touch $M0/test-11/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-11/$src_file $M0/test-11/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-11/$dst_file 2>/dev/null
+TEST file_existence_check test-11/$dst_file "$V0-client-0"
+TEST file_does_not_exist test-11/$src_file
+EXPECT "0" file_is_linkto $dst_hash_brick/test-11/$dst_file
+
+
+################### Test 12 ####################################
+
+# src_hash != src_cached
+# dst_hash = dst_cached = src_hash
+
+echo " **** Test 12 **** "
+
+TEST mkdir $M0/test-12
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-12" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-12/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-12" "src-"
+src_file=$dstfilename
+
+mv $M0/test-12/$src0_file $M0/test-12/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-12" "dst-"
+dst_file=$dstfilename
+TEST touch $M0/test-12/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-12/$src_file $M0/test-12/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-1")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-12/$dst_file 2>/dev/null
+TEST file_existence_check test-12/$dst_file "$V0-client-1" "$V0-client-0"
+TEST file_does_not_exist test-12/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-12/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-12/$dst_file
+
+################### Test 13 ####################################
+
+# src_hash != src_cached
+# dst_hash = dst_cached
+# dst_hash != src_cached
+# dst_hash != src_hash
+
+echo " **** Test 13 **** "
+
+TEST mkdir $M0/test-13
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-13" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-13/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-13" "src-"
+src_file=$dstfilename
+
+mv $M0/test-13/$src0_file $M0/test-13/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-13" "dst-"
+dst_file=$dstfilename
+TEST touch $M0/test-13/$dst_file
+
+# 2. Rename the file
+
+mv $M0/test-13/$src_file $M0/test-13/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-2")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-13/$dst_file 2>/dev/null
+TEST file_existence_check test-13/$dst_file "$V0-client-2" "$V0-client-0"
+TEST file_does_not_exist test-13/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-13/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-13/$dst_file
+
+
+################### Test 14 ####################################
+
+# src_hash != src_cached
+# dst_hash = src_hash
+# dst_cached = src_cached
+
+echo " **** Test 14 **** "
+
+TEST mkdir $M0/test-14
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-14" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-14/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-14" "src-"
+src_file=$dstfilename
+
+mv $M0/test-14/$src0_file $M0/test-14/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-14" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-14/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-14" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-14/$dst0_file $M0/test-14/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-14/$src_file $M0/test-14/$dst_file
+
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-1")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-14/$dst_file 2>/dev/null
+TEST file_existence_check test-14/$dst_file "$V0-client-1" "$V0-client-0"
+TEST file_does_not_exist test-14/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-14/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-14/$dst_file
+
+################### Test 15 ####################################
+
+# src_hash != src_cached
+# dst_hash != src_hash
+# dst_hash != src_cached
+# dst_cached = src_cached
+
+echo " **** Test 15 **** "
+
+TEST mkdir $M0/test-15
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-15" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-15/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-15" "src-"
+src_file=$dstfilename
+
+mv $M0/test-15/$src0_file $M0/test-15/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-15" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-15/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-15" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-15/$dst0_file $M0/test-15/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-15/$src_file $M0/test-15/$dst_file
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-2")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-15/$dst_file 2>/dev/null
+TEST file_existence_check test-15/$dst_file "$V0-client-2" "$V0-client-0"
+TEST file_does_not_exist test-15/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-15/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-15/$dst_file
+
+
+
+################### Test 16 ####################################
+
+# src_hash != src_cached
+# dst_hash = src_cached
+# dst_cached = src_hash
+
+echo " **** Test 16 **** "
+
+TEST mkdir $M0/test-16
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-16" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-16/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-16" "src-"
+src_file=$dstfilename
+
+mv $M0/test-16/$src0_file $M0/test-16/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-16" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-16/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-16" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-16/$dst0_file $M0/test-16/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-16/$src_file $M0/test-16/$dst_file
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-16/$dst_file 2>/dev/null
+TEST file_existence_check test-16/$dst_file "$V0-client-0"
+TEST file_does_not_exist test-16/$src_file
+EXPECT "0" file_is_linkto $dst_hash_brick/test-16/$dst_file
+
+
+################### Test 17 ####################################
+
+# src_hash != src_cached
+# dst_hash != dst_cached
+# dst_hash != src_hash != src_cached
+# dst_cached = src_hash
+
+
+echo " **** Test 17 **** "
+
+TEST mkdir $M0/test-17
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-17" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-17/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-17" "src-"
+src_file=$dstfilename
+
+mv $M0/test-17/$src0_file $M0/test-17/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-17" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-17/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-17" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-17/$dst0_file $M0/test-17/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-17/$src_file $M0/test-17/$dst_file
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-2")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-17/$dst_file 2>/dev/null
+TEST file_existence_check test-17/$dst_file "$V0-client-2" "$V0-client-0"
+TEST file_does_not_exist test-17/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-17/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-17/$dst_file
+
+
+################### Test 18 ####################################
+
+# src_hash != src_cached
+# dst_hash != dst_cached
+# dst_hash != src_hash != src_cached != dst_cached
+
+
+echo " **** Test 18 **** "
+
+TEST mkdir $M0/test-18
+
+
+# 1. Create src and dst files
+
+TEST first_filename_with_same_hashsubvol "$V0-client-0" "$M0/test-18" "src0-"
+src0_file=$dstfilename
+TEST touch $M0/test-18/$src0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-1" "$M0/test-18" "src-"
+src_file=$dstfilename
+
+mv $M0/test-18/$src0_file $M0/test-18/$src_file
+
+
+TEST first_filename_with_same_hashsubvol "$V0-client-2" "$M0/test-18" "dst0-"
+dst0_file=$dstfilename
+TEST touch $M0/test-18/$dst0_file
+
+TEST first_filename_with_same_hashsubvol "$V0-client-3" "$M0/test-18" "dst-"
+dst_file=$dstfilename
+
+mv $M0/test-18/$dst0_file $M0/test-18/$dst_file
+
+
+# 2. Rename the file
+
+mv $M0/test-18/$src_file $M0/test-18/$dst_file
+
+# 3. Validate
+
+dst_hash_brick=$(get_brick_path_for_subvol "$V0-client-3")
+dst_cached_brick=$(get_brick_path_for_subvol "$V0-client-0")
+
+TEST stat $M0/test-18/$dst_file 2>/dev/null
+TEST file_existence_check test-18/$dst_file "$V0-client-3" "$V0-client-0"
+TEST file_does_not_exist test-18/$src_file
+EXPECT "1" file_is_linkto $dst_hash_brick/test-18/$dst_file
+EXPECT "0" file_is_linkto $dst_cached_brick/test-18/$dst_file
+
+
+# Cleanup
+cleanup
+
diff --git a/tests/basic/distribute/force-migration.t b/tests/basic/distribute/force-migration.t
new file mode 100644
index 00000000000..f6c4997a505
--- /dev/null
+++ b/tests/basic/distribute/force-migration.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This tests checks if the file migration fails with force-migration
+#option set to off.
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST touch $M0/file
+#This rename creates a link file for tile in the other brick.
+TEST mv $M0/file $M0/tile
+#Lets keep writing to the file which will have a open fd
+dd if=/dev/zero of=$M0/tile bs=1b &
+bg_pid=$!
+#Now rebalance will try to skip the file
+TEST $CLI volume set $V0 force-migration off
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+skippedcount=`gluster v rebalance $V0 status | awk 'NR==3{print $6}'`
+TEST [[ $skippedcount -eq 1 ]]
+#file should be migrated now
+TEST $CLI volume set $V0 force-migration on
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+skippedcount=`gluster v rebalance $V0 status | awk 'NR==3{print $6}'`
+rebalancedcount=`gluster v rebalance $V0 status | awk 'NR==3{print $2}'`
+TEST [[ $skippedcount -eq 0 ]]
+TEST [[ $rebalancedcount -eq 1 ]]
+kill -9 $bg_pid > /dev/null 2>&1
+wait > /dev/null 2>&1
+cleanup
+#Bad test because we are not sure writes are happening at the time of
+#rebalance. We need to write a test case which makes sure client
+#writes happen during rebalance. One way would be to set S+T bits on
+#src and write to file from client and then start rebalance. Currently
+#marking this as bad test.
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
+
diff --git a/tests/basic/distribute/lookup.t b/tests/basic/distribute/lookup.t
new file mode 100644
index 00000000000..f757bd99fd9
--- /dev/null
+++ b/tests/basic/distribute/lookup.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../common-utils.rc
+
+# Test overview:
+# Check that non-privileged users can also clean up stale linkto files
+#
+# 1. Use the current parallel-readdir behaviour of changing the DHT child subvols
+# in the graph to generate stale linkto files
+# 2. Access the file with the stale linkto file as a non-root user
+# 3. This should now succeed (returned EIO before commit 3fb1df7870e03c9de)
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0-{1..3}
+TEST $CLI volume start $V0
+
+# Mount using FUSE and create a file
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST glusterfs -s $H0 --volfile-id $V0 $M1
+
+ls $M0/FILE-1
+EXPECT "2" echo $?
+
+
+# Create a file and a directory on $M0
+TEST dd if=/dev/urandom of=$M0/FILE-1 count=1 bs=16k
+TEST mkdir $M0/dir1
+
+ls $M0/FILE-1
+EXPECT "0" echo $?
+
+ls $M0/dir1
+EXPECT "0" echo $?
+
+#Use a fresh mount so as to trigger a fresh lookup
+TEST glusterfs -s $H0 --volfile-id $V0 $M1
+
+TEST ls $M1/FILE-1
+EXPECT "0" echo $?
+
+
+ls $M1/dir1
+EXPECT "0" echo $?
+
+# Cleanup
+cleanup
+
diff --git a/tests/basic/distribute/non-root-unlink-stale-linkto.t b/tests/basic/distribute/non-root-unlink-stale-linkto.t
new file mode 100644
index 00000000000..d6c866ffc8e
--- /dev/null
+++ b/tests/basic/distribute/non-root-unlink-stale-linkto.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../common-utils.rc
+
+# Test overview:
+# Check that non-privileged users can also clean up stale linkto files
+#
+# 1. Use the current parallel-readdir behaviour of changing the DHT child subvols
+# in the graph to generate stale linkto files
+# 2. Access the file with the stale linkto file as a non-root user
+# 3. This should now succeed (returned EIO before commit 3fb1df7870e03c9de)
+
+USERNAME=user11
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0-{1,2}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 performance.parallel-readdir on
+
+# Mount using FUSE and create a file
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+# Create a file for testing
+TEST dd if=/dev/urandom of=$M0/FILE-1 count=1 bs=16k
+
+#Rename to create a linkto file
+TEST mv $M0/FILE-1 $M0/FILE-2
+
+# This should change the graph and cause the linkto values to become stale
+TEST $CLI volume set $V0 performance.parallel-readdir off
+
+$CLI volume set $V0 allow-insecure on
+
+
+TEST useradd -m $USERNAME
+
+#Use a fresh mount so as to trigger a lookup everywhere
+TEST glusterfs -s $H0 --volfile-id $V0 $M1
+TEST run_cmd_as_user $USERNAME "ls $M1/FILE-2"
+
+
+# Cleanup
+TEST userdel --force $USERNAME
+cleanup
+
diff --git a/tests/basic/distribute/spare_file_rebalance.t b/tests/basic/distribute/spare_file_rebalance.t
new file mode 100644
index 00000000000..061c02f7392
--- /dev/null
+++ b/tests/basic/distribute/spare_file_rebalance.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+# Initialize
+#------------------------------------------------------------
+cleanup;
+
+# Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+# Create a volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+# Verify volume creation
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Start volume and verify successful start
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+#------------------------------------------------------------
+
+# Test case - Create sparse files on MP and verify
+# file info after rebalance
+#------------------------------------------------------------
+
+# Create some sparse files and get their size
+TEST cd $M0;
+dd if=/dev/urandom of=sparse_file bs=10k count=1 seek=2M
+cp --sparse=always sparse_file sparse_file_3;
+
+# Add a 3rd brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3;
+
+# Trigger rebalance
+TEST $CLI volume rebalance $V0 start force;
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed;
+
+# Compare original and rebalanced files
+TEST cd $B0/${V0}2
+TEST cmp sparse_file $B0/${V0}3/sparse_file_3
+EXPECT_WITHIN 30 "";
+
+cleanup;
diff --git a/tests/basic/distribute/throttle-rebal.t b/tests/basic/distribute/throttle-rebal.t
new file mode 100644
index 00000000000..f4823cf4f21
--- /dev/null
+++ b/tests/basic/distribute/throttle-rebal.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+# Test to check
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#Check rebal-throttle set option sanity
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2
+TEST $CLI volume start $V0
+
+function set_throttle {
+ local level=$1
+ $CLI volume set $V0 cluster.rebal-throttle $level 2>&1 |grep -oE 'success|failed'
+}
+
+#Determine number of cores
+cores=$(cat /proc/cpuinfo | grep processor | wc -l)
+if [ "$cores" == "" ]; then
+ echo "Could not get number of cores available"
+fi
+
+THROTTLE_LEVEL="lazy"
+EXPECT "success" set_throttle $THROTTLE_LEVEL
+EXPECT "$THROTTLE_LEVEL" echo `$CLI volume info | grep rebal-throttle | awk '{print $2}'`
+
+THROTTLE_LEVEL="normal"
+EXPECT "success" set_throttle $THROTTLE_LEVEL
+EXPECT "$THROTTLE_LEVEL" echo `$CLI volume info | grep rebal-throttle | awk '{print $2}'`
+
+
+THROTTLE_LEVEL="aggressive"
+EXPECT "success" set_throttle $THROTTLE_LEVEL
+EXPECT "$THROTTLE_LEVEL" echo `$CLI volume info | grep rebal-throttle | awk '{print $2}'`
+
+THROTTLE_LEVEL="garbage"
+EXPECT "failed" set_throttle $THROTTLE_LEVEL
+
+#check if throttle-level is still aggressive
+EXPECT "aggressive" echo `$CLI volume info | grep rebal-throttle | awk '{print $2}'`
+
+EXPECT "success" set_throttle $cores
+
+#Setting thorttle number to be more than the number of cores should fail
+THORTTLE_LEVEL=$((cores+1))
+TEST echo $THORTTLE_LEVEL
+EXPECT "failed" set_throttle $THROTTLE_LEVEL
+EXPECT "$cores" echo `$CLI volume info | grep rebal-throttle | awk '{print $2}'`
+
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/basic/ec/dht-rename.t b/tests/basic/ec/dht-rename.t
new file mode 100644
index 00000000000..81b41ff4c78
--- /dev/null
+++ b/tests/basic/ec/dht-rename.t
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks notify part of ec
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+TEST touch $M0/1
+TEST mv $M0/1 $M0/10
+cleanup
diff --git a/tests/basic/ec/ec-1468261.t b/tests/basic/ec/ec-1468261.t
new file mode 100644
index 00000000000..77d704cf880
--- /dev/null
+++ b/tests/basic/ec/ec-1468261.t
@@ -0,0 +1,95 @@
+#!/bin/bash
+#
+# This test case verifies handling node down scenario with optimistic
+# changelog enabled on EC volume.
+###
+
+SCRIPT_TIMEOUT=300
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume set $V0 disperse.optimistic-change-log on
+TEST $CLI volume set $V0 disperse.other-eager-lock on
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Verify that all is good
+TEST mkdir $M0/test_dir
+TEST touch $M0/test_dir/file
+sleep 2
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}0/test_dir
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}1/test_dir
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}2/test_dir
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}3/test_dir
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}4/test_dir
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^$" get_hex_xattr trusted.ec.dirty $B0/${V0}5/test_dir
+
+#Kill two bricks and touch a file
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+TEST touch $M0/test_dir/new_file
+sleep 2
+
+#Dirty should be set on up bricks
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}2/test_dir
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}3/test_dir
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}4/test_dir
+EXPECT_WITHIN $IO_WAIT_TIMEOUT "^00000000000000010000000000000001$" get_hex_xattr trusted.ec.dirty $B0/${V0}5/test_dir
+
+#Bring up the down bricks
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#remove mount point contents
+TEST rm -rf $M0"/*" 2>/dev/null
+
+# unmount and remount the volume
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+#Create a tar file
+TEST mkdir /tmp/test_dir
+seq 1 3000 | xargs -n 1 -P 20 -I {} dd if=/dev/urandom of=/tmp/test_dir/file-{} bs=10K count=1
+tar -cf /tmp/test_dir.tar /tmp/test_dir/ 2>/dev/null
+rm -rf /tmp/test_dir/
+
+#Untar the tar file
+tar -C $M0 -xf /tmp/test_dir.tar 2>/dev/null&
+
+#Kill 1st and 2nd brick
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#Stop untaring
+TEST kill %1
+rm -f /tmp/test_dir.tar
+
+#Bring up the down bricks
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#Kill 3rd and 4th brick
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST kill_brick $V0 $H0 $B0/${V0}4
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#remove mount point contents
+#this will fail if things are wrong
+TEST rm -rf $M0"/*" 2>/dev/null
+
+cleanup
diff --git a/tests/basic/ec/ec-3-1.t b/tests/basic/ec/ec-3-1.t
new file mode 100644
index 00000000000..511ca6420a2
--- /dev/null
+++ b/tests/basic/ec/ec-3-1.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks basic dispersed volume functionality and cli interface
+
+DISPERSE=3
+REDUNDANCY=1
+
+# This must be equal to 36 * $DISPERSE + 109
+TESTS_EXPECTED_IN_LOOP=217
+
+. $(dirname $0)/ec-common
diff --git a/tests/basic/ec/ec-4-1.t b/tests/basic/ec/ec-4-1.t
new file mode 100644
index 00000000000..3f0d0c72e44
--- /dev/null
+++ b/tests/basic/ec/ec-4-1.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks basic dispersed volume functionality and cli interface
+
+DISPERSE=4
+REDUNDANCY=1
+
+# This must be equal to 36 * $DISPERSE + 109
+TESTS_EXPECTED_IN_LOOP=253
+
+. $(dirname $0)/ec-common
diff --git a/tests/basic/ec/ec-5-2.t b/tests/basic/ec/ec-5-2.t
new file mode 100644
index 00000000000..6d9e91b0f58
--- /dev/null
+++ b/tests/basic/ec/ec-5-2.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks basic dispersed volume functionality and cli interface
+
+DISPERSE=5
+REDUNDANCY=2
+
+# This must be equal to 36 * $DISPERSE + 109
+TESTS_EXPECTED_IN_LOOP=289
+
+. $(dirname $0)/ec-common
diff --git a/tests/basic/ec/ec-6-2.t b/tests/basic/ec/ec-6-2.t
new file mode 100644
index 00000000000..b4451f905a1
--- /dev/null
+++ b/tests/basic/ec/ec-6-2.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks basic dispersed volume functionality and cli interface
+
+DISPERSE=6
+REDUNDANCY=2
+
+# This must be equal to 36 * $DISPERSE + 109
+TESTS_EXPECTED_IN_LOOP=325
+
+. $(dirname $0)/ec-common
diff --git a/tests/basic/ec/ec-anonymous-fd.t b/tests/basic/ec/ec-anonymous-fd.t
new file mode 100644
index 00000000000..a61628182ef
--- /dev/null
+++ b/tests/basic/ec/ec-anonymous-fd.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup
+function num_entries {
+ ls -l $1 | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/file1
+TEST fd_write $fd1 testing
+TEST cat $M0/file1
+TEST rm -rf $M0/file1
+TEST fd_write $fd1 testing
+TEST fd_write $fd1 testing
+TEST fd_write $fd1 testing
+
+EXPECT "^2$" num_entries $B0/${V0}0/.glusterfs/unlink/
+EXPECT "^2$" num_entries $B0/${V0}1/.glusterfs/unlink/
+EXPECT "^2$" num_entries $B0/${V0}2/.glusterfs/unlink/
+EXPECT "^2$" num_entries $B0/${V0}3/.glusterfs/unlink/
+EXPECT "^2$" num_entries $B0/${V0}4/.glusterfs/unlink/
+EXPECT "^2$" num_entries $B0/${V0}5/.glusterfs/unlink/
+TEST fd_close $fd1;
+EXPECT_WITHIN $UNLINK_TIMEOUT "^1$" num_entries $B0/${V0}0/.glusterfs/unlink/
+EXPECT_WITHIN $UNLINK_TIMEOUT "^1$" num_entries $B0/${V0}1/.glusterfs/unlink/
+EXPECT_WITHIN $UNLINK_TIMEOUT "^1$" num_entries $B0/${V0}2/.glusterfs/unlink/
+EXPECT_WITHIN $UNLINK_TIMEOUT "^1$" num_entries $B0/${V0}3/.glusterfs/unlink/
+EXPECT_WITHIN $UNLINK_TIMEOUT "^1$" num_entries $B0/${V0}4/.glusterfs/unlink/
+EXPECT_WITHIN $UNLINK_TIMEOUT "^1$" num_entries $B0/${V0}5/.glusterfs/unlink/
+
+cleanup;
diff --git a/tests/basic/ec/ec-background-heals.t b/tests/basic/ec/ec-background-heals.t
new file mode 100644
index 00000000000..29778a4f818
--- /dev/null
+++ b/tests/basic/ec/ec-background-heals.t
@@ -0,0 +1,105 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks background heals option
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume set $V0 disperse.eager-lock off
+TEST $CLI volume set $V0 disperse.other-eager-lock off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+TEST touch $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}1
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}2
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo abc > $M0/a
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+#Accessing file shouldn't heal the file
+EXPECT "abc" cat $M0/a
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
+TEST $CLI volume set $V0 disperse.background-heals 1
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "128" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+#Accessing file should heal the file now
+EXPECT "abc" cat $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#Test above test cases with reset instead of setting background-heals to 1
+TEST $CLI volume set $V0 disperse.heal-wait-qlength 1024
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1024" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+TEST $CLI volume set $V0 disperse.background-heals 0
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+TEST $CLI volume set $V0 disperse.heal-wait-qlength 200 #Changing qlength shouldn't affect anything now
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo abc > $M0/a
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+#Accessing file shouldn't heal the file
+EXPECT "abc" cat $M0/a
+sleep 3
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
+TEST $CLI volume reset $V0 disperse.background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "8" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "200" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+#Accessing file should heal the file now
+EXPECT "abc" cat $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#Test that disabling background-heals still drains the queue
+TEST $CLI volume set $V0 disperse.background-heals 1
+TEST touch $M0/{a,b,c,d}
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "200" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+TEST truncate -s 1GB $M0/a
+echo abc > $M0/b
+echo abc > $M0/c
+echo abc > $M0/d
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+TEST chown root:root $M0/{a,b,c,d}
+TEST $CLI volume set $V0 disperse.background-heals 0
+EXPECT_NOT "0" mount_get_option_value $M0 $V0-disperse-0 heal-waiters
+TEST truncate -s 0 $M0/a # This completes the heal fast ;-)
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#Test that background heals get rejected on meeting background-qlen limit
+TEST $CLI volume set $V0 disperse.background-heals 1
+TEST $CLI volume set $V0 disperse.heal-wait-qlength 0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "1" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+TEST truncate -s 1GB $M0/a
+echo abc > $M0/b
+echo abc > $M0/c
+echo abc > $M0/d
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+TEST chown root:root $M0/{a,b,c,d}
+EXPECT "0" mount_get_option_value $M0 $V0-disperse-0 heal-waiters
+cleanup
diff --git a/tests/basic/ec/ec-badfd.c b/tests/basic/ec/ec-badfd.c
new file mode 100644
index 00000000000..8be23c10eaf
--- /dev/null
+++ b/tests/basic/ec/ec-badfd.c
@@ -0,0 +1,124 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int
+fill_iov(struct iovec *iov, char fillchar, int count)
+{
+ int ret = -1;
+
+ iov->iov_base = malloc(count + 1);
+ if (iov->iov_base == NULL) {
+ return ret;
+ } else {
+ iov->iov_len = count;
+ ret = 0;
+ }
+ memset(iov->iov_base, fillchar, count);
+ memset(iov->iov_base + count, '\0', 1);
+
+ return ret;
+}
+
+int
+write_sync(glfs_t *fs, glfs_fd_t *glfd, int char_count)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ struct iovec iov = {0};
+
+ ret = fill_iov(&iov, 'a', char_count);
+ if (ret) {
+ fprintf(stderr, "failed to create iov");
+ goto out;
+ }
+
+ ret = glfs_pwritev(glfd, &iov, 1, 0, flags);
+out:
+ if (ret < 0) {
+ fprintf(stderr, "glfs_pwritev failed, %d", errno);
+ }
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+ int ret = 1;
+ char volume_cmd[4096] = {0};
+
+ if (argc != 4) {
+ fprintf(stderr, "Syntax: %s <host> <volname> <file>\n", argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_set_logging(fs, "/tmp/ec-badfd.log", 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ fd = glfs_open(fs, argv[3], O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+
+ ret = write_sync(fs, fd, 16);
+ if (ret < 0) {
+ fprintf(stderr, "write_sync failed\n");
+ }
+
+ snprintf(volume_cmd, sizeof(volume_cmd),
+ "gluster --mode=script volume stop %s", argv[2]);
+ /*Stop the volume so that update-size-version fails*/
+ system(volume_cmd);
+ sleep(8); /* 3 seconds more than eager-lock-timeout*/
+ snprintf(volume_cmd, sizeof(volume_cmd),
+ "gluster --mode=script volume start %s", argv[2]);
+ system(volume_cmd);
+ sleep(8); /*wait for bricks to come up*/
+ ret = glfs_fsync(fd, NULL, NULL);
+ if (ret == 0) {
+ fprintf(stderr, "fsync succeeded on a BADFD\n");
+ exit(1);
+ }
+
+ ret = glfs_close(fd);
+ if (ret == 0) {
+ fprintf(stderr, "flush succeeded on a BADFD\n");
+ exit(1);
+ }
+ ret = 0;
+
+out:
+ unlink("/tmp/ec-badfd.log");
+ glfs_fini(fs);
+
+ return ret;
+}
diff --git a/tests/basic/ec/ec-badfd.t b/tests/basic/ec/ec-badfd.t
new file mode 100755
index 00000000000..56feb47f115
--- /dev/null
+++ b/tests/basic/ec/ec-badfd.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{1..6}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 disperse.eager-lock-timeout 5
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+TEST touch $M0/file
+
+TEST build_tester $(dirname $0)/ec-badfd.c -lgfapi -Wall -O2
+TEST $(dirname $0)/ec-badfd $H0 $V0 /file
+cleanup_tester $(dirname ${0})/ec-badfd
+
+cleanup;
diff --git a/tests/basic/ec/ec-common b/tests/basic/ec/ec-common
new file mode 100644
index 00000000000..152e3b51236
--- /dev/null
+++ b/tests/basic/ec/ec-common
@@ -0,0 +1,145 @@
+SIZE_LIST="1048576 1000 12345 0"
+
+LAST_BRICK=$(($DISPERSE - 1))
+
+CHUNK_SIZE=512
+
+function fragment_size
+{
+ local fragments=$(($DISPERSE - $REDUNDANCY))
+ local block_size=$(($CHUNK_SIZE * $fragments))
+ local size=$(($1 + $block_size - 1))
+
+ echo $((( $size - ( $size ) % $block_size ) / $fragments))
+}
+
+cleanup
+
+tmp=`mktemp -d -t ${0##*/}.XXXXXX`
+if [ ! -d $tmp ]; then
+ exit 1
+fi
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy $REDUNDANCY $H0:$B0/${V0}{0..$LAST_BRICK}
+EXPECT 'Created' volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "$DISPERSE" ec_child_up_count $V0 0
+
+TEST dd if=/dev/urandom of=$tmp/small bs=1024 count=1
+TEST dd if=/dev/urandom of=$tmp/big bs=1024 count=4096
+
+cs_small=$(sha1sum $tmp/small | awk '{ print $1 }')
+cs_big=$(sha1sum $tmp/big | awk '{ print $1 }')
+cp $tmp/small $tmp/small1
+for size in $SIZE_LIST; do
+ truncate -s $size $tmp/small1
+ eval cs_small_truncate[$size]=$(sha1sum $tmp/small1 | awk '{ print $1 }')
+done
+cp $tmp/big $tmp/big1
+for size in $SIZE_LIST; do
+ truncate -s $size $tmp/big1
+ eval cs_big_truncate[$size]=$(sha1sum $tmp/big1 | awk '{ print $1 }')
+done
+
+TEST df -h $M0
+TEST stat $M0
+
+for idx in `seq 0 $LAST_BRICK`; do
+ brick[$idx]=$(gf_get_gfid_backend_file_path $B0/$V0$idx)
+done
+
+TEST stat $M0/
+TEST mkdir $M0/dir1
+TEST [ -d $M0/dir1 ]
+TEST touch $M0/file1
+TEST [ -f $M0/file1 ]
+
+for dir in . dir1; do
+ TEST cp $tmp/small $M0/$dir/small
+ TEST [ -f $M0/$dir/small ]
+ fsize=$(fragment_size 1024)
+ EXPECT "1024" stat -c "%s" $M0/$dir/small
+ for idx in `seq 0 $LAST_BRICK`; do
+ EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/small
+ done
+
+ EXPECT "$cs_small" echo $(sha1sum $M0/$dir/small | awk '{ print $1 }')
+
+ TEST cp $tmp/big $M0/$dir/big
+ TEST [ -f $M0/$dir/big ]
+ fsize=$(fragment_size 4194304)
+ EXPECT "4194304" stat -c "%s" $M0/$dir/big
+ for idx in `seq 0 $LAST_BRICK`; do
+ EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/big
+ done
+
+ EXPECT "$cs_big" echo $(sha1sum $M0/$dir/big | awk '{ print $1 }')
+
+# Give enough time for current operations to complete. Otherwise the
+# following kill_brick can cause data corruption and self-heal will be
+# needed, but this script is not prepared to handle self-healing.
+ sleep 2
+
+ for idx in `seq 0 $LAST_BRICK`; do
+ TEST kill_brick $V0 $H0 $B0/$V0$idx
+
+ EXPECT "1024" stat -c "%s" $M0/$dir/small
+ EXPECT "4194304" stat -c "%s" $M0/$dir/big
+
+ TEST $CLI volume start $V0 force
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "$DISPERSE" ec_child_up_count $V0 0
+ done
+
+ for size in $SIZE_LIST; do
+ TEST truncate -s $size $M0/$dir/small
+ TEST [ -f $M0/$dir/small ]
+ fsize=$(fragment_size $size)
+ EXPECT "$size" stat -c "%s" $M0/$dir/small
+ for idx in `seq 0 $LAST_BRICK`; do
+ EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/small
+ done
+
+ EXPECT "${cs_small_truncate[$size]}" echo $(sha1sum $M0/$dir/small | awk '{ print $1 }')
+
+ TEST truncate -s $size $M0/$dir/big
+ TEST [ -f $M0/$dir/big ]
+ EXPECT "$size" stat -c "%s" $M0/$dir/big
+ for idx in `seq 0 $LAST_BRICK`; do
+ EXPECT "$fsize" stat -c "%s" ${brick[$idx]}/$dir/big
+ done
+
+ EXPECT "${cs_big_truncate[$size]}" echo $(sha1sum $M0/$dir/big | awk '{ print $1 }')
+ done
+
+ TEST rm -f $M0/$dir/small
+ TEST [ ! -e $M0/$dir/small ]
+ for idx in `seq 0 $LAST_BRICK`; do
+ TEST [ ! -e ${brick[$idx]}/$dir/small ]
+ done
+
+ TEST rm -f $M0/$dir/big
+ TEST [ ! -e $M0/$dir/big ]
+ for idx in `seq 0 $LAST_BRICK`; do
+ TEST [ ! -e ${brick[$idx]}/$dir/big ]
+ done
+done
+
+TEST rmdir $M0/dir1
+TEST [ ! -e $M0/dir1 ]
+for idx in `seq 0 $LAST_BRICK`; do
+ TEST [ ! -e ${brick[$idx]}/dir1 ]
+done
+
+TEST rm -f $M0/file1
+TEST [ ! -e $M0/file1 ]
+for idx in `seq 0 $LAST_BRICK`; do
+ TEST [ ! -e ${brick[$idx]}/file1 ]
+done
+
+rm -rf $tmp
+
+cleanup
diff --git a/tests/basic/ec/ec-cpu-extensions.t b/tests/basic/ec/ec-cpu-extensions.t
new file mode 100644
index 00000000000..c9af27ea234
--- /dev/null
+++ b/tests/basic/ec/ec-cpu-extensions.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+DISPERSE=18
+REDUNDANCY=2
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TESTS_EXPECTED_IN_LOOP=96
+
+function check_contents
+{
+ local src=$1
+ local cs=$2
+
+ TEST cp $src $M0/file
+ TEST [ -f $M0/file ]
+
+ for ext in none x64 sse avx; do
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+ TEST $CLI volume set $V0 disperse.cpu-extensions $ext
+ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "$DISPERSE" ec_child_up_count $V0 0
+
+ EXPECT "$cs" echo $(sha1sum $M0/file | awk '{ print $1 }')
+ done
+
+ TEST rm -f $M0/file
+}
+
+cleanup
+
+tmp=`mktemp -p ${LOGDIR} -d -t ${0##*/}.XXXXXX`
+if [ ! -d $tmp ]; then
+ exit 1
+fi
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy $REDUNDANCY $H0:$B0/${V0}{1..$DISPERSE}
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume set $V0 disperse.read-policy round-robin
+EXPECT 'Created' volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+
+TEST dd if=/dev/urandom of=$tmp/file bs=1048576 count=1
+cs_file=$(sha1sum $tmp/file | awk '{ print $1 }')
+
+for ext in none x64 sse avx; do
+ TEST $CLI volume set $V0 disperse.cpu-extensions $ext
+ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "$DISPERSE" ec_child_up_count $V0 0
+
+ check_contents $tmp/file $cs_file
+
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+done
+
+TEST rm -rf $tmp
+
+cleanup
diff --git a/tests/basic/ec/ec-data-heal.t b/tests/basic/ec/ec-data-heal.t
new file mode 100755
index 00000000000..2672661c6b1
--- /dev/null
+++ b/tests/basic/ec/ec-data-heal.t
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This test checks data corruption after heal while IO is going on
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+############ Start IO ###########
+TEST touch $M0/file
+#start background IO on file
+dd if=/dev/urandom of=$M0/file conv=fdatasync &
+iopid=$(echo $!)
+
+
+############ Kill and start brick0 for heal ###########
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+#sleep so that data can be written which will be healed later
+sleep 10
+TEST $CLI volume start $V0 force
+##wait for heal info to become 0 and kill IO
+EXPECT_WITHIN $IO_HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+kill $iopid
+EXPECT_WITHIN $IO_HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############### Check md5sum #########################
+
+## unmount and mount get md5sum after killing brick0
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+mdsum0=`md5sum $M0/file | awk '{print $1}'`
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+## unmount and mount get md5sum after killing brick1
+
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+mdsum1=`md5sum $M0/file | awk '{print $1}'`
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+## unmount and mount get md5sum after killing brick2
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+mdsum2=`md5sum $M0/file | awk '{print $1}'`
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+# compare all the three md5sums
+EXPECT "$mdsum0" echo $mdsum1
+EXPECT "$mdsum0" echo $mdsum2
+EXPECT "$mdsum1" echo $mdsum2
+
+cleanup
diff --git a/tests/basic/ec/ec-dirty-flags.t b/tests/basic/ec/ec-dirty-flags.t
new file mode 100644
index 00000000000..68e66103f08
--- /dev/null
+++ b/tests/basic/ec/ec-dirty-flags.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This checks if the fop keeps the dirty flags settings correctly after
+# finishing the fop.
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+cd $M0
+for i in {1..1000}; do dd if=/dev/zero of=file-${i} bs=512k count=2; done
+cd -
+EXPECT "^0$" get_pending_heal_count $V0
+
+cleanup
diff --git a/tests/basic/ec/ec-discard.t b/tests/basic/ec/ec-discard.t
new file mode 100644
index 00000000000..001f4498c86
--- /dev/null
+++ b/tests/basic/ec/ec-discard.t
@@ -0,0 +1,205 @@
+#!/bin/bash
+#
+# Test discard functionality
+#
+# Test that basic discard (hole punch) functionality works via the fallocate
+# command line tool. Hole punch deallocates a region of a file, creating a hole
+# and a zero-filled data region. We verify that hole punch works, frees blocks
+# and that subsequent reads do not read stale data (caches are invalidated).
+#
+# NOTE: fuse fallocate is known to be broken with regard to cache invalidation
+# up to 3.9.0 kernels. Therefore, FOPEN_KEEP_CACHE is not used in this
+# test (opens will invalidate the fuse cache).
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../fallocate.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume set $V0 disperse.optimistic-change-log on
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Check for fallocate and hole punch support
+require_fallocate -l 1m $M0/file
+require_fallocate -p -l 512k $M0/file && rm -f $M0/file
+
+#Write some data, punch a hole and verify the file content changes
+TEST dd if=/dev/urandom of=$M0/file bs=1024k count=1
+TEST cp $M0/file $M0/file.copy.pre
+TEST fallocate -p -o 512k -l 128k $M0/file
+TEST ! cmp $M0/file.copy.pre $M0/file
+TEST rm -f $M0/file $M0/file.copy.pre
+
+#Allocate some blocks, punch a hole and verify block allocation
+TEST fallocate -l 1m $M0/file
+blksz=`stat -c %B $M0/file`
+nblks=`stat -c %b $M0/file`
+TEST [ $(($blksz * $nblks)) -ge 1048576 ]
+TEST fallocate -p -o 512k -l 128k $M0/file
+nblks=`stat -c %b $M0/file`
+TEST [ $(($blksz * $nblks)) -lt $((933889)) ]
+TEST unlink $M0/file
+
+###Punch hole test cases without fallocate
+##With write
+#Touching starting boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 0 -l 500 $B0/test_file
+TEST fallocate -p -o 0 -l 500 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Touching boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 500 -l 1548 $B0/test_file
+TEST fallocate -p -o 500 -l 1548 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Not touching boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 500 -l 1000 $B0/test_file
+TEST fallocate -p -o 500 -l 1000 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Over boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 1500 -l 1000 $B0/test_file
+TEST fallocate -p -o 1500 -l 1000 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+###Punch hole test cases with fallocate
+##Without write
+
+#Zero size
+TEST dd if=/dev/urandom of=$M0/test_file bs=1024 count=8
+TEST ! fallocate -p -o 1500 -l 0 $M0/test_file
+
+#Negative size
+TEST ! fallocate -p -o 1500 -l -100 $M0/test_file
+TEST rm -f $M0/test_file
+
+#Touching boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 2048 -l 2048 $B0/test_file
+TEST fallocate -p -o 2048 -l 2048 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Touching boundary,multiple stripe
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 2048 -l 4096 $B0/test_file
+TEST fallocate -p -o 2048 -l 4096 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+##With write
+
+#Size ends in boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 600 -l 3496 $B0/test_file
+TEST fallocate -p -o 600 -l 3496 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Offset at boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 2048 -l 3072 $B0/test_file
+TEST fallocate -p -o 2048 -l 3072 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Offset and Size not at boundary covering a stripe
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 1500 -l 3000 $B0/test_file
+TEST fallocate -p -o 1500 -l 3000 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+TEST rm -f $B0/test_file $M0/test_file
+
+#Offset and Size not at boundary
+TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=8
+TEST cp $B0/test_file $M0/test_file
+TEST fallocate -p -o 1000 -l 3072 $B0/test_file
+TEST fallocate -p -o 1000 -l 3072 $M0/test_file
+TEST md5_sum=`get_md5_sum $B0/test_file`
+EXPECT $md5_sum get_md5_sum $M0/test_file
+
+#Data Corruption Tests
+#Kill brick1 and brick2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#verify md5 sum
+EXPECT $md5_sum get_md5_sum $M0/test_file
+
+#Bring up the bricks
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Kill brick3 and brick4
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}3
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#verify md5 sum
+EXPECT $md5_sum get_md5_sum $M0/test_file
+
+#Bring up the bricks
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Kill brick5 and brick6
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST kill_brick $V0 $H0 $B0/${V0}5
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+#verify md5 sum
+EXPECT $md5_sum get_md5_sum $M0/test_file
+
+cleanup
diff --git a/tests/basic/ec/ec-fallocate.t b/tests/basic/ec/ec-fallocate.t
new file mode 100644
index 00000000000..1b827eed7df
--- /dev/null
+++ b/tests/basic/ec/ec-fallocate.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+#
+# Run several commands to verify basic fallocate functionality. We verify that
+# fallocate creates and allocates blocks to a file. We also verify that the keep
+# size option does not modify the file size.
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+# check for fallocate support before continuing the test
+require_fallocate -l 1m -n $M0/file && rm -f $M0/file
+
+# fallocate a file and verify blocks are allocated
+TEST fallocate -l 1m $M0/file
+blksz=`stat -c %b $M0/file`
+nblks=`stat -c %B $M0/file`
+TEST [ $(($blksz * $nblks)) -eq 1048576 ]
+
+TEST unlink $M0/file
+
+# truncate a file to a fixed size, fallocate and verify that the size does not
+# change
+TEST truncate -s 1M $M0/file
+TEST fallocate -l 2m -n $M0/file
+blksz=`stat -c %b $M0/file`
+nblks=`stat -c %B $M0/file`
+sz=`stat -c %s $M0/file`
+TEST [ $sz -eq 1048576 ]
+# Note that gluster currently incorporates a hack to limit the number of blocks
+# reported as allocated to the file by the file size. We have allocated beyond the
+# file size here. Just check for non-zero allocation to avoid setting a land mine
+# for if/when that behavior might change.
+TEST [ ! $(($blksz * $nblks)) -eq 0 ]
+TEST unlink $M0/file
+
+# write some data, fallocate within and outside the range
+# and check for data corruption.
+TEST dd if=/dev/urandom of=$M0/file bs=1024k count=1
+TEST cp $M0/file $M0/file.copy.pre
+TEST fallocate -o 512k -l 128k $M0/file
+TEST cp $M0/file $M0/file.copy.post
+TEST cmp $M0/file.copy.pre $M0/file.copy.post
+TEST fallocate -o 1000k -l 128k $M0/file
+TEST cp $M0/file $M0/file.copy.post2
+TEST ! cmp $M0/file.copy.pre $M0/file.copy.post2
+TEST truncate -s 1M $M0/file.copy.post2
+TEST cmp $M0/file.copy.pre $M0/file.copy.post2
+TEST unlink $M0/file
+
+#Make sure offset/size are modified so that 3 blocks are allocated
+TEST touch $M0/f1
+TEST fallocate -o 1280 -l 1024 $M0/f1
+EXPECT "^2304$" stat -c "%s" $M0/f1
+EXPECT "^1536$" stat -c "%s" $B0/${V0}0/f1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup;
diff --git a/tests/basic/ec/ec-fast-fgetxattr.c b/tests/basic/ec/ec-fast-fgetxattr.c
new file mode 100644
index 00000000000..bf982151861
--- /dev/null
+++ b/tests/basic/ec/ec-fast-fgetxattr.c
@@ -0,0 +1,129 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int cbk_complete = 0;
+ssize_t cbk_ret_val = 0;
+int
+fill_iov(struct iovec *iov, char fillchar, int count)
+{
+ int ret = -1;
+
+ iov->iov_base = malloc(count + 1);
+ if (iov->iov_base == NULL) {
+ return ret;
+ } else {
+ iov->iov_len = count;
+ ret = 0;
+ }
+ memset(iov->iov_base, fillchar, count);
+ memset(iov->iov_base + count, '\0', 1);
+
+ return ret;
+}
+
+void
+write_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ fprintf(stderr, "glfs_write failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+write_async(glfs_t *fs, glfs_fd_t *glfd, int char_count)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ struct iovec iov = {0};
+
+ ret = fill_iov(&iov, 'a', char_count);
+ if (ret) {
+ fprintf(stderr, "failed to create iov");
+ goto out;
+ }
+
+ ret = glfs_pwritev_async(glfd, &iov, 1, 0, flags, write_async_cbk, NULL);
+out:
+ if (ret < 0) {
+ fprintf(stderr, "glfs_pwritev async failed");
+ }
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+ int ret = 1;
+ char buf[1024] = {0};
+
+ if (argc != 4) {
+ fprintf(stderr, "Syntax: %s <host> <volname> <file>\n", argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_set_logging(fs, "/tmp/ec-fgetxattr.log", 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ fd = glfs_open(fs, argv[3], O_RDWR | O_TRUNC);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+
+ ret = write_async(fs, fd, 16);
+ if (ret) {
+ fprintf(stderr, "write_async failed\n");
+ }
+
+ sleep(1);
+ ret = glfs_fgetxattr(fd, "trusted.glusterfs.abc", buf, sizeof buf);
+ while (cbk_complete != 1) {
+ /* ret will be -ve as xattr doesn't exist, and fgetxattr should
+ * return waaaayyy before writev */
+ ret = 0;
+ sleep(1);
+ }
+ if (cbk_ret_val < 0) {
+ fprintf(stderr, "cbk_ret_val is -ve\n");
+ ret = -1;
+ }
+ glfs_close(fd);
+
+out:
+ unlink("/tmp/ec-fgetxattr.log");
+ glfs_fini(fs);
+
+ return ret;
+}
diff --git a/tests/basic/ec/ec-fast-fgetxattr.t b/tests/basic/ec/ec-fast-fgetxattr.t
new file mode 100755
index 00000000000..eb12fa4a0ba
--- /dev/null
+++ b/tests/basic/ec/ec-fast-fgetxattr.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{1..6}
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.client-io-threads off
+TEST $CLI volume set $V0 brick-log-level DEBUG
+TEST $CLI volume set $V0 delay-gen posix
+TEST $CLI volume set $V0 delay-gen.delay-duration 10000000
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 delay-gen.enable read,write
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+TEST touch $M0/file
+
+# Perform two writes to make sure io-threads have enough threads to perform
+# things in parallel when the test execution happens.
+echo abc > $M0/file1 &
+echo abc > $M0/file2 &
+wait
+
+TEST build_tester $(dirname $0)/ec-fast-fgetxattr.c -lgfapi -Wall -O2
+TEST $(dirname $0)/ec-fast-fgetxattr $H0 $V0 /file
+cleanup_tester $(dirname ${0})/ec-fast-fgetxattr
+
+cleanup;
diff --git a/tests/basic/ec/ec-fix-openfd.t b/tests/basic/ec/ec-fix-openfd.t
new file mode 100644
index 00000000000..04fdd802c62
--- /dev/null
+++ b/tests/basic/ec/ec-fix-openfd.t
@@ -0,0 +1,111 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+# This test checks for open fd heal on EC
+
+#Create Volume
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 performance.read-after-open yes
+TEST $CLI volume set $V0 performance.lazy-open no
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Touch a file
+TEST touch "$M0/test_file"
+
+#Kill a brick
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Open the file in write mode
+TEST fd=`fd_available`
+TEST fd_open $fd 'rw' "$M0/test_file"
+
+#Bring up the killed brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+sleep 1
+
+#Test the fd count
+EXPECT "0" get_fd_count $V0 $H0 $B0/${V0}0 test_file
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}1 test_file
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}2 test_file
+
+#Write to file
+dd iflag=fullblock if=/dev/urandom bs=1024 count=2 >&$fd 2>/dev/null
+
+#Test the fd count
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}0 test_file
+
+#Close fd
+TEST fd_close $fd
+
+#Stop the volume
+TEST $CLI volume stop $V0
+
+#Start the volume
+TEST $CLI volume start $V0
+
+#Kill brick1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Calculate md5 sum
+md5sum0=`get_md5_sum "$M0/test_file"`
+
+#Bring up the brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Kill brick2
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Calculate md5 sum
+md5sum1=`get_md5_sum "$M0/test_file"`
+
+#Bring up the brick
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+#Kill brick3
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Unmount and mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+
+#Calculate md5 sum
+md5sum2=`get_md5_sum "$M0/test_file"`
+
+#compare the md5sum
+EXPECT "$md5sum0" echo $md5sum1
+EXPECT "$md5sum0" echo $md5sum2
+EXPECT "$md5sum1" echo $md5sum2
+
+cleanup
diff --git a/tests/basic/ec/ec-internal-xattrs.t b/tests/basic/ec/ec-internal-xattrs.t
new file mode 100644
index 00000000000..5ef0502e016
--- /dev/null
+++ b/tests/basic/ec/ec-internal-xattrs.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks internal xattr handling in ec
+TESTS_EXPECTED_IN_LOOP=11
+
+cleanup
+function get_ec_xattrs
+{
+ getfattr -d -m. -e hex $1 | grep trusted.ec
+}
+
+function get_xattr_count
+{
+ getfattr -d -m. -e hex $1 | grep "trusted" | wc -l
+}
+
+declare -a xattrs=("trusted.ec.config" "trusted.ec.size" "trusted.ec.version" "trusted.ec.heal")
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+# Wait until all 6 children have been recognized by the ec xlator
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+TEST touch $M0/a
+
+#Check that internal xattrs are not modifiable or readable
+for x in "${xattrs[@]}"; do
+ TEST_IN_LOOP ! setfattr -n $x "abc" $M0/a
+ TEST_IN_LOOP ! setfattr -x $x "abc" $M0/a
+ if [ $x != "trusted.ec.heal" ];
+ then
+ TEST_IN_LOOP ! getfattr -n $x $M0/a
+ fi
+done
+
+TEST ! get_ec_xattrs $M0/a
+TEST setfattr -n trusted.abc -v 0x616263 $M0/a
+EXPECT "616263" get_hex_xattr trusted.abc $M0/a
+EXPECT "1" get_xattr_count $M0/a
+cleanup
diff --git a/tests/basic/ec/ec-new-entry.t b/tests/basic/ec/ec-new-entry.t
new file mode 100644
index 00000000000..be97aecd8e2
--- /dev/null
+++ b/tests/basic/ec/ec-new-entry.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+function num_entries {
+ ls -l $1 | wc -l
+}
+
+function get_md5sum {
+ md5sum $1 | awk '{print $1}'
+}
+
+#after replace-brick immediately trusted.ec.version will be absent, so if it
+#is present we can assume that heal attempted on root
+function root_heal_attempted {
+ if [ -z $(get_hex_xattr trusted.ec.version $1) ];
+ then
+ echo "N";
+ else
+ echo "Y";
+ fi
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+touch $M0/{1..10}
+touch $M0/11
+TEST mknod $M0/char c 1 5
+TEST mknod $M0/block b 4 5
+for i in {1..10}; do dd if=/dev/zero of=$M0/$i bs=1M count=1; done
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}5 $H0:$B0/${V0}6 commit force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count_shd $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" root_heal_attempted $B0/${V0}6
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+#ls -l gives "Total" line so number of lines will be 1 more
+EXPECT "^14$" num_entries $B0/${V0}6
+EXPECT "^1 5$" stat -c "%t %T" $B0/${V0}6/char
+EXPECT "^4 5$" stat -c "%t %T" $B0/${V0}6/block
+ec_version=$(get_hex_xattr trusted.ec.version $B0/${V0}0)
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}1
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}2
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}3
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}4
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}6
+file_md5sum=$(get_md5sum $M0/1)
+empty_md5sum=$(get_md5sum $M0/11)
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo $file_md5sum
+EXPECT "$file_md5sum" get_md5sum $M0/1
+EXPECT "$file_md5sum" get_md5sum $M0/2
+EXPECT "$file_md5sum" get_md5sum $M0/3
+EXPECT "$file_md5sum" get_md5sum $M0/4
+EXPECT "$file_md5sum" get_md5sum $M0/5
+EXPECT "$file_md5sum" get_md5sum $M0/6
+EXPECT "$file_md5sum" get_md5sum $M0/7
+EXPECT "$file_md5sum" get_md5sum $M0/8
+EXPECT "$file_md5sum" get_md5sum $M0/9
+EXPECT "$file_md5sum" get_md5sum $M0/10
+EXPECT "$empty_md5sum" get_md5sum $M0/11
+
+cleanup;
diff --git a/tests/basic/ec/ec-notify.t b/tests/basic/ec/ec-notify.t
new file mode 100644
index 00000000000..53290b7c798
--- /dev/null
+++ b/tests/basic/ec/ec-notify.t
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks notify part of ec
+
+# We *know* some of these mounts will succeed but not be actually usable
+# (terrible idea IMO), so speed things up and eliminate some noise by
+# overriding this function.
+_GFS () {
+ glusterfs "$@"
+}
+
+ec_up_brick_count () {
+ local bricknum
+ for bricknum in $(seq 0 2); do
+ brick_up_status $V0 $H0 $B0/$V0$bricknum
+ done | grep -E '^1$' | wc -l
+}
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" ec_up_brick_count
+
+#First time mount tests.
+# When all the bricks are up, mount should succeed and up-children
+# count should be 3
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+TEST stat $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# When the volume is stopped mount succeeds and up-children will be 0
+TEST $CLI volume stop $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+# Wait for 5 seconds even after that up_count should show 0
+sleep 5;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "0" ec_child_up_count $V0 0
+TEST ! stat $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# When 2 bricks are up, mount should succeed and up-children
+# count should be 2
+
+TEST $CLI volume start $V0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" ec_up_brick_count
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+TEST stat $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# When only 1 brick is up mount should fail.
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ec_up_brick_count
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+# Wait for 5 seconds even after that up_count should show 1
+sleep 5
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ec_child_up_count $V0 0
+TEST ! stat $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Mount already succeeded. Test that the brick up down are leading to correct
+# state changes in ec.
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" ec_up_brick_count
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+TEST touch $M0/a
+
+# kill 1 brick and the up_count should become 2, fops should still succeed
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" ec_up_brick_count
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+TEST touch $M0/b
+
+# kill one more brick and the up_count should become 1, fops should fail
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ec_up_brick_count
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ec_child_up_count $V0 0
+TEST ! touch $M0/c
+
+# kill one more brick and the up_count should become 0, fops should still fail
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" ec_up_brick_count
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "0" ec_child_up_count $V0 0
+TEST ! touch $M0/c
+
+# Bring up all the bricks up and see that up_count is 3 and fops are succeeding
+# again.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" ec_up_brick_count
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+TEST touch $M0/c
+
+cleanup
diff --git a/tests/basic/ec/ec-optimistic-changelog.t b/tests/basic/ec/ec-optimistic-changelog.t
new file mode 100644
index 00000000000..a372cd39a64
--- /dev/null
+++ b/tests/basic/ec/ec-optimistic-changelog.t
@@ -0,0 +1,153 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks optimistic-change-log option
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume heal $V0 disable
+
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume set $V0 disperse.optimistic-change-log off
+TEST $CLI volume set $V0 disperse.eager-lock off
+TEST $CLI volume set $V0 disperse.other-eager-lock off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 background-heals
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "0" mount_get_option_value $M0 $V0-disperse-0 heal-wait-qlength
+
+TEST $CLI volume set $V0 disperse.background-heals 1
+TEST touch $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}1
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" count_sh_entries $B0/${V0}2
+
+
+
+### optimistic-change-log = off ; All bricks good. Test file operation
+echo abc > $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = off ; Kill one brick . Test file operation
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo abc > $M0/a
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+#Accessing file should heal the file now
+EXPECT "abc" cat $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = off ; All bricks good. Test entry operation
+TEST touch $M0/b
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = off ; All bricks good. Test metadata operation
+TEST chmod 0777 $M0/b
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = off ; Kill one brick. Test entry operation
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST touch $M0/c
+EXPECT 4 get_pending_heal_count $V0 #two for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+getfattr -d -m. -e hex $M0 2>&1 > /dev/null
+getfattr -d -m. -e hex $M0/c 2>&1 > /dev/null
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = off ; Kill one brick. Test metadata operation
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST chmod 0777 $M0/c
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+getfattr -d -m. -e hex $M0/c 2>&1 > /dev/null
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+TEST $CLI volume set $V0 disperse.optimistic-change-log on
+
+### optimistic-change-log = on ; All bricks good. Test file operation
+
+echo abc > $M0/aa
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = on ; Kill one brick. Test file operation
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo abc > $M0/aa
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+#Accessing file should heal the file now
+getfattr -d -m. -e hex $M0/aa 2>&1 > /dev/null
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = on ; All bricks good. Test entry operation
+
+TEST touch $M0/bb
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = on ; All bricks good. Test metadata operation
+
+TEST chmod 0777 $M0/bb
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = on ; Kill one brick. Test entry operation
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST touch $M0/cc
+EXPECT 4 get_pending_heal_count $V0 #two for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+getfattr -d -m. -e hex $M0 2>&1 > /dev/null
+getfattr -d -m. -e hex $M0/cc 2>&1 > /dev/null
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+## optimistic-change-log = on ; Kill one brick. Test metadata operation
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST chmod 0777 $M0/cc
+EXPECT 2 get_pending_heal_count $V0 #One for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+getfattr -d -m. -e hex $M0/cc 2>&1 > /dev/null
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+############################################################
+
+cleanup
diff --git a/tests/basic/ec/ec-quorum-count.t b/tests/basic/ec/ec-quorum-count.t
new file mode 100644
index 00000000000..9310ebbb8f2
--- /dev/null
+++ b/tests/basic/ec/ec-quorum-count.t
@@ -0,0 +1,167 @@
+ #!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../ec.rc
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume create $V1 $H0:$B0/${V1}{0..5}
+TEST $CLI volume set $V0 disperse.eager-lock-timeout 5
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume set $V0 disperse.heal-wait-qlength 0
+
+#Should fail on non-disperse volume
+TEST ! $CLI volume set $V1 disperse.quorum-count 5
+
+#Should succeed on a valid range
+TEST ! $CLI volume set $V0 disperse.quorum-count 0
+TEST ! $CLI volume set $V0 disperse.quorum-count -0
+TEST ! $CLI volume set $V0 disperse.quorum-count abc
+TEST ! $CLI volume set $V0 disperse.quorum-count 10abc
+TEST ! $CLI volume set $V0 disperse.quorum-count 1
+TEST ! $CLI volume set $V0 disperse.quorum-count 2
+TEST ! $CLI volume set $V0 disperse.quorum-count 3
+TEST $CLI volume set $V0 disperse.quorum-count 4
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Test that the option is reflected in the mount
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^4$" ec_option_value $V0 $M0 0 quorum-count
+TEST $CLI volume reset $V0 disperse.quorum-count
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" ec_option_value $V0 $M0 0 quorum-count
+TEST $CLI volume set $V0 disperse.quorum-count 6
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^6$" ec_option_value $V0 $M0 0 quorum-count
+
+TEST touch $M0/a
+TEST touch $M0/data
+TEST setfattr -n trusted.def -v def $M0/a
+TEST touch $M0/src
+TEST touch $M0/del-me
+TEST mkdir $M0/dir1
+TEST dd if=/dev/zero of=$M0/read-file bs=1M count=1 oflag=direct
+TEST dd if=/dev/zero of=$M0/del-file bs=1M count=1 oflag=direct
+TEST gf_rm_file_and_gfid_link $B0/${V0}0 del-file
+#modify operations should fail as the file is not in quorum
+TEST ! dd if=/dev/zero of=$M0/del-file bs=1M count=1 oflag=direct
+TEST kill_brick $V0 $H0 $B0/${V0}0
+#Read should succeed even when quorum-count is not met
+TEST dd if=$M0/read-file of=/dev/null iflag=direct
+TEST ! touch $M0/a2
+TEST ! mkdir $M0/dir2
+TEST ! mknod $M0/b2 b 4 5
+TEST ! ln -s $M0/a $M0/symlink
+TEST ! ln $M0/a $M0/link
+TEST ! mv $M0/src $M0/dst
+TEST ! rm -f $M0/del-me
+TEST ! rmdir $M0/dir1
+TEST ! dd if=/dev/zero of=$M0/a bs=1M count=1 conv=notrunc
+TEST ! dd if=/dev/zero of=$M0/data bs=1M count=1 conv=notrunc
+TEST ! truncate -s 0 $M0/a
+TEST ! setfattr -n trusted.abc -v abc $M0/a
+TEST ! setfattr -x trusted.def $M0/a
+TEST ! chmod +x $M0/a
+TEST ! fallocate -l 2m -n $M0/a
+TEST ! fallocate -p -l 512k $M0/a
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+
+# reset the option and check whether the default redundancy count is
+# accepted or not.
+TEST $CLI volume reset $V0 disperse.quorum-count
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^0$" ec_option_value $V0 $M0 0 quorum-count
+TEST touch $M0/a1
+TEST touch $M0/data1
+TEST setfattr -n trusted.def -v def $M0/a1
+TEST touch $M0/src1
+TEST touch $M0/del-me1
+TEST mkdir $M0/dir11
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST touch $M0/a21
+TEST mkdir $M0/dir21
+TEST mknod $M0/b21 b 4 5
+TEST ln -s $M0/a1 $M0/symlink1
+TEST ln $M0/a1 $M0/link1
+TEST mv $M0/src1 $M0/dst1
+TEST rm -f $M0/del-me1
+TEST rmdir $M0/dir11
+TEST dd if=/dev/zero of=$M0/a1 bs=1M count=1 conv=notrunc
+TEST dd if=/dev/zero of=$M0/data1 bs=1M count=1 conv=notrunc
+TEST truncate -s 0 $M0/a1
+TEST setfattr -n trusted.abc -v abc $M0/a1
+TEST setfattr -x trusted.def $M0/a1
+TEST chmod +x $M0/a1
+TEST fallocate -l 2m -n $M0/a1
+TEST fallocate -p -l 512k $M0/a1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+TEST touch $M0/a2
+TEST touch $M0/data2
+TEST setfattr -n trusted.def -v def $M0/a1
+TEST touch $M0/src2
+TEST touch $M0/del-me2
+TEST mkdir $M0/dir12
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST ! touch $M0/a22
+TEST ! mkdir $M0/dir22
+TEST ! mknod $M0/b22 b 4 5
+TEST ! ln -s $M0/a2 $M0/symlink2
+TEST ! ln $M0/a2 $M0/link2
+TEST ! mv $M0/src2 $M0/dst2
+TEST ! rm -f $M0/del-me2
+TEST ! rmdir $M0/dir12
+TEST ! dd if=/dev/zero of=$M0/a2 bs=1M count=1 conv=notrunc
+TEST ! dd if=/dev/zero of=$M0/data2 bs=1M count=1 conv=notrunc
+TEST ! truncate -s 0 $M0/a2
+TEST ! setfattr -n trusted.abc -v abc $M0/a2
+TEST ! setfattr -x trusted.def $M0/a2
+TEST ! chmod +x $M0/a2
+TEST ! fallocate -l 2m -n $M0/a2
+TEST ! fallocate -p -l 512k $M0/a2
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+
+# Set quorum-count to 5 and kill 1 brick and the fops should pass
+TEST $CLI volume set $V0 disperse.quorum-count 5
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "^5$" ec_option_value $V0 $M0 0 quorum-count
+TEST touch $M0/a3
+TEST touch $M0/data3
+TEST setfattr -n trusted.def -v def $M0/a3
+TEST touch $M0/src3
+TEST touch $M0/del-me3
+TEST mkdir $M0/dir13
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/a31
+TEST mkdir $M0/dir31
+TEST mknod $M0/b31 b 4 5
+TEST ln -s $M0/a3 $M0/symlink3
+TEST ln $M0/a3 $M0/link3
+TEST mv $M0/src3 $M0/dst3
+TEST rm -f $M0/del-me3
+TEST rmdir $M0/dir13
+TEST dd if=/dev/zero of=$M0/a3 bs=1M count=1 conv=notrunc
+TEST dd if=/dev/zero of=$M0/data3 bs=1M count=1 conv=notrunc
+TEST truncate -s 0 $M0/a3
+TEST setfattr -n trusted.abc -v abc $M0/a3
+TEST setfattr -x trusted.def $M0/a3
+TEST chmod +x $M0/a3
+TEST fallocate -l 2m -n $M0/a3
+TEST fallocate -p -l 512k $M0/a3
+TEST dd if=/dev/urandom of=$M0/heal-file bs=1M count=1 oflag=direct
+cksum_before_heal="$(md5sum $M0/heal-file | awk '{print $1}')"
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST kill_brick $V0 $H0 $B0/${V0}5
+cksum_after_heal=$(dd if=$M0/heal-file iflag=direct | md5sum | awk '{print $1}')
+TEST [[ $cksum_before_heal == $cksum_after_heal ]]
+cleanup;
diff --git a/tests/basic/ec/ec-read-mask.t b/tests/basic/ec/ec-read-mask.t
new file mode 100644
index 00000000000..ddb556f2973
--- /dev/null
+++ b/tests/basic/ec/ec-read-mask.t
@@ -0,0 +1,114 @@
+ #!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../ec.rc
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+
+#Empty read-mask should fail
+TEST ! $GFS --xlator-option=*.ec-read-mask="" -s $H0 --volfile-id $V0 $M0
+
+#Less than 4 number of bricks should fail
+TEST ! $GFS --xlator-option="*.ec-read-mask=0" -s $H0 --volfile-id $V0 $M0
+TEST ! $GFS --xlator-option="*.ec-read-mask=0:1" -s $H0 --volfile-id $V0 $M0
+TEST ! $GFS --xlator-option=*.ec-read-mask="0:1:2" -s $H0 --volfile-id $V0 $M0
+
+#ids greater than 5 should fail
+TEST ! $GFS --xlator-option="*.ec-read-mask=0:1:2:6" -s $H0 --volfile-id $V0 $M0
+
+#ids less than 0 should fail
+TEST ! $GFS --xlator-option="*.ec-read-mask=0:-1:2:5" -s $H0 --volfile-id $V0 $M0
+
+#read-mask with non-alphabet or comma should fail
+TEST ! $GFS --xlator-option="*.ec-read-mask=0:1:2:5:abc" -s $H0 --volfile-id $V0 $M0
+TEST ! $GFS --xlator-option="*.ec-read-mask=0:1:2:5a" -s $H0 --volfile-id $V0 $M0
+
+#mount with at least 4 read-mask-ids and all of them valid should pass
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5:4:3" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^111111$" ec_option_value $V0 $M0 0 read-mask
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+
+TEST dd if=/dev/urandom of=$M0/a bs=1M count=1
+md5=$(md5sum $M0/a | awk '{print $1}')
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#Read on the file should fail if any of the read-mask is down when number of
+#ids is data-count
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST ! dd if=$M0/a of=/dev/null
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST ! dd if=$M0/a of=/dev/null
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST ! dd if=$M0/a of=/dev/null
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}5
+TEST ! dd if=$M0/a of=/dev/null
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+#Read on file should succeed when non-read-mask bricks are down
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}3
+EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}')
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}4
+EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}')
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST kill_brick $V0 $H0 $B0/${V0}4
+EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}')
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume start $V0 force
+
+#Deliberately corrupt chunks 3: 4 and check that reads still give correct data
+TEST dd if=/dev/zero of=$B0/${V0}3/a bs=256k count=1
+TEST dd if=/dev/zero of=$B0/${V0}4/a bs=256k count=1
+TEST $GFS --xlator-option="*.ec-read-mask=0:1:2:5" -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT "^100111$" ec_option_value $V0 $M0 0 read-mask
+EXPECT "^$md5$" echo $(dd if=$M0/a | md5sum | awk '{print $1}')
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup;
diff --git a/tests/basic/ec/ec-read-policy.t b/tests/basic/ec/ec-read-policy.t
new file mode 100644
index 00000000000..fe6fe6576e7
--- /dev/null
+++ b/tests/basic/ec/ec-read-policy.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+
+#Disable all caching
+TEST glusterfs --direct-io-mode=yes --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+#TEST volume operations work fine
+
+EXPECT "gfid-hash" mount_get_option_value $M0 $V0-disperse-0 read-policy
+TEST $CLI volume set $V0 disperse.read-policy round-robin
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "round-robin" mount_get_option_value $M0 $V0-disperse-0 read-policy
+
+#TEST if the option gives the intended behavior. The way we perform this test
+#is by performing reads from the mount and write to /dev/null. If the
+#read-policy is round-robin, then all bricks should have read-fop where as
+#with gfid-hash number of bricks with reads should be equal to (num-bricks - redundancy)
+#count
+
+TEST $CLI volume profile $V0 start
+TEST dd if=/dev/zero of=$M0/1 bs=1M count=4
+#Perform reads now from file on the mount, this only tests dispatch_min
+TEST dd if=$M0/1 of=/dev/null bs=1M count=4
+#TEST that reads are executed on all bricks
+rr_reads=$($CLI volume profile $V0 info cumulative| grep -w READ | wc -l)
+EXPECT "^6$" echo $rr_reads
+TEST $CLI volume profile $V0 info clear
+
+TEST $CLI volume set $V0 disperse.read-policy gfid-hash
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "gfid-hash" mount_get_option_value $M0 $V0-disperse-0 read-policy
+
+#Perform reads now from file on the mount, this only tests dispatch_min
+TEST dd if=$M0/1 of=/dev/null bs=1M count=4
+#TEST that reads are executed on all bricks
+gh_reads=$($CLI volume profile $V0 info cumulative| grep -w READ | wc -l)
+EXPECT "^4$" echo $gh_reads
+
+cleanup;
diff --git a/tests/basic/ec/ec-readdir.t b/tests/basic/ec/ec-readdir.t
new file mode 100644
index 00000000000..fad101bcabe
--- /dev/null
+++ b/tests/basic/ec/ec-readdir.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks that readdir works fine on distributed disperse volume
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..5}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 1
+
+TEST mkdir $M0/d
+TEST touch $M0/d/{1..100}
+EXPECT "100" echo $(ls $M0/d/* | wc -l)
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST rm -rf $M0/d/{1..100}
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 1
+#Do it 3 times so that even with all load balancing, readdir never falls
+#on stale bricks
+EXPECT "0" echo $(ls $M0/d/ | wc -l)
+EXPECT "0" echo $(ls $M0/d/ | wc -l)
+EXPECT "0" echo $(ls $M0/d/ | wc -l)
+#Do the same test above for creation of entries
+TEST mkdir $M0/d1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST touch $M0/d1/{1..100}
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 1
+#Do it 3 times so that even with all load balancing, readdir never falls
+#on stale bricks
+EXPECT "100" echo $(ls $M0/d1/ | wc -l)
+EXPECT "100" echo $(ls $M0/d1/ | wc -l)
+EXPECT "100" echo $(ls $M0/d1/ | wc -l)
+cleanup
diff --git a/tests/basic/ec/ec-rebalance.t b/tests/basic/ec/ec-rebalance.t
new file mode 100644
index 00000000000..6cda3a3e4be
--- /dev/null
+++ b/tests/basic/ec/ec-rebalance.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+#
+# This will test the rebalance failure reported in 1447559
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+#cleate and start volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 lookup-optimize on
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+# Create files
+for i in {1..10}
+do
+ dd if=/dev/urandom of=$M0/file$i bs=1024k count=1
+done
+
+md5_1=$(md5sum $M0/file1 | awk '{print $1}')
+md5_2=$(md5sum $M0/file2 | awk '{print $1}')
+md5_3=$(md5sum $M0/file3 | awk '{print $1}')
+md5_4=$(md5sum $M0/file4 | awk '{print $1}')
+md5_5=$(md5sum $M0/file5 | awk '{print $1}')
+md5_6=$(md5sum $M0/file6 | awk '{print $1}')
+md5_7=$(md5sum $M0/file7 | awk '{print $1}')
+md5_8=$(md5sum $M0/file8 | awk '{print $1}')
+md5_9=$(md5sum $M0/file9 | awk '{print $1}')
+md5_10=$(md5sum $M0/file10 | awk '{print $1}')
+# Add brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{3..5}
+
+#Trigger rebalance
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+#Remount to avoid any caches
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+EXPECT "$md5_1" echo $(md5sum $M0/file1 | awk '{print $1}')
+EXPECT "$md5_2" echo $(md5sum $M0/file2 | awk '{print $1}')
+EXPECT "$md5_3" echo $(md5sum $M0/file3 | awk '{print $1}')
+EXPECT "$md5_4" echo $(md5sum $M0/file4 | awk '{print $1}')
+EXPECT "$md5_5" echo $(md5sum $M0/file5 | awk '{print $1}')
+EXPECT "$md5_6" echo $(md5sum $M0/file6 | awk '{print $1}')
+EXPECT "$md5_7" echo $(md5sum $M0/file7 | awk '{print $1}')
+EXPECT "$md5_8" echo $(md5sum $M0/file8 | awk '{print $1}')
+EXPECT "$md5_9" echo $(md5sum $M0/file9 | awk '{print $1}')
+EXPECT "$md5_10" echo $(md5sum $M0/file10 | awk '{print $1}')
+
+cleanup;
diff --git a/tests/basic/ec/ec-reset-brick.t b/tests/basic/ec/ec-reset-brick.t
new file mode 100644
index 00000000000..f1a625df4ff
--- /dev/null
+++ b/tests/basic/ec/ec-reset-brick.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+function num_entries {
+ ls -l $1 | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+mkdir $M0/dir
+touch $M0/dir/{1..10}
+
+mkdir $M0/dir/dir1
+touch $M0/dir/dir1/{1..10}
+
+#kill brick process
+TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}5 start
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "5" ec_child_up_count $V0 0
+
+#reset-brick by removing all the data and create dir again
+rm -rf $B0/${V0}5
+mkdir $B0/${V0}5
+
+#start brick process and heal by commiting reset-brick
+TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}5 $H0:$B0/${V0}5 commit force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count_shd $V0 0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+
+EXPECT "^12$" num_entries $B0/${V0}5/dir
+EXPECT "^11$" num_entries $B0/${V0}5/dir/dir1
+
+ec_version=$(get_hex_xattr trusted.ec.version $B0/${V0}0)
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}1
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}2
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}3
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}4
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}5
+
+cleanup;
diff --git a/tests/basic/ec/ec-root-heal.t b/tests/basic/ec/ec-root-heal.t
new file mode 100644
index 00000000000..11ea7cdf9d4
--- /dev/null
+++ b/tests/basic/ec/ec-root-heal.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+function num_entries {
+ ls -l $1 | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+touch $M0/{1..10}
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}5 $H0:$B0/${V0}6 commit force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count_shd $V0 0
+
+# active heal
+TEST $CLI volume heal $V0 full
+#ls -l gives "Total" line so number of lines will be 1 more
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}
+EXPECT "^11$" num_entries $B0/${V0}6
+ec_version=$(get_hex_xattr trusted.ec.version $B0/${V0}0)
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}1
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}2
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}3
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}4
+EXPECT "$ec_version" get_hex_xattr trusted.ec.version $B0/${V0}6
+
+cleanup;
diff --git a/tests/basic/ec/ec-seek.t b/tests/basic/ec/ec-seek.t
new file mode 100644
index 00000000000..5a7d31b9f8f
--- /dev/null
+++ b/tests/basic/ec/ec-seek.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+SEEK=$(dirname $0)/seek
+build_tester $(dirname $0)/../seek.c -o ${SEEK}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST mkdir -p $B0/${V0}{0..2}
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Created' volinfo_field $V0 'Status'
+EXPECT '3' brick_count $V0
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+TEST ${SEEK} create ${M0}/test 0 1 1048576 1
+# Determine underlying filesystem allocation block size
+BSIZE="$(($(${SEEK} scan ${M0}/test hole 0) * 2))"
+
+TEST ${SEEK} create ${M0}/test 0 ${BSIZE} $((${BSIZE} * 4 + 512)) ${BSIZE}
+
+EXPECT "^0$" ${SEEK} scan ${M0}/test data 0
+EXPECT "^$((${BSIZE} / 2))$" ${SEEK} scan ${M0}/test data $((${BSIZE} / 2))
+EXPECT "^$((${BSIZE} - 1))$" ${SEEK} scan ${M0}/test data $((${BSIZE} - 1))
+EXPECT "^$((${BSIZE} * 4))$" ${SEEK} scan ${M0}/test data ${BSIZE}
+EXPECT "^$((${BSIZE} * 4))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 4))
+EXPECT "^$((${BSIZE} * 5))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5))
+EXPECT "^$((${BSIZE} * 5 + 511))$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5 + 511))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 5 + 512))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test data $((${BSIZE} * 6))
+
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole 0
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole $((${BSIZE} / 2))
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole $((${BSIZE} - 1))
+EXPECT "^${BSIZE}$" ${SEEK} scan ${M0}/test hole ${BSIZE}
+EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 4))
+EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5))
+EXPECT "^$((${BSIZE} * 5 + 512))$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5 + 511))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 5 + 512))
+EXPECT "^ENXIO$" ${SEEK} scan ${M0}/test hole $((${BSIZE} * 6))
+
+rm -f ${SEEK}
+cleanup
+
+# Centos6 regression slaves seem to not support SEEK_DATA/SEEK_HOLE
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/basic/ec/ec-stripe.t b/tests/basic/ec/ec-stripe.t
new file mode 100644
index 00000000000..98b92294feb
--- /dev/null
+++ b/tests/basic/ec/ec-stripe.t
@@ -0,0 +1,227 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# These tests will check the stripe cache functionality of
+# disperse volume
+
+test_index=0
+stripe_count=4
+loop_test=0
+
+TESTS_EXPECTED_IN_LOOP=182
+
+function get_mount_stripe_cache {
+ local sd=$1
+ local field=$2
+ local val=$(grep "$field" $sd | cut -f2 -d'=' | tail -1)
+ echo $val
+}
+
+function get_stripes_in_cache {
+ local target=$1
+ local count=$2
+ local c=0
+ for (( c=0; c<$count; c++ ))
+ do
+ let x=102+$c*1024
+ echo yy | dd of=$target oflag=seek_bytes,sync seek=$x conv=notrunc
+ if [ $? != 0 ]
+ then
+ break
+ fi
+ done
+ echo "$c"
+}
+# tests in this loop = 7
+function mount_get_test_files {
+ let test_index+=1
+ let loop_test+=7
+ echo "Test Case $test_index"
+ local stripe_count=$1
+ TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+ EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+ TEST dd if=/dev/urandom of=$B0/test_file bs=1024 count=20
+ TEST cp $B0/test_file $M0/test_file
+ TEST dd if=/dev/urandom of=$B0/misc_file bs=1024 count=20
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "$stripe_count" get_stripes_in_cache $B0/test_file $stripe_count
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "$stripe_count" get_stripes_in_cache $M0/test_file $stripe_count
+}
+
+#check_statedump_md5sum (hitcount misscount)
+#tests in this loop = 4
+function check_statedump_md5sum {
+ statedump=$(generate_mount_statedump $V0)
+ let loop_test+=4
+ sleep 1
+ nhits=$(get_mount_stripe_cache $statedump "hits")
+ nmisses=$(get_mount_stripe_cache $statedump "misses")
+ EXPECT "$1" echo $nhits
+ EXPECT "$2" echo $nmisses
+ TEST md5_sum=`get_md5_sum $B0/test_file`
+ EXPECT $md5_sum get_md5_sum $M0/test_file
+}
+
+#tests in this loop = 2
+function clean_file_unmount {
+ let loop_test+=2
+ TEST rm -f $B0/test_file $M0/test_file $B0/misc_file
+ cleanup_mount_statedump $V0
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+}
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume set $V0 disperse.eager-lock on
+TEST $CLI volume set $V0 disperse.other-eager-lock on
+TEST $CLI volume set $V0 disperse.stripe-cache 8
+TEST $CLI volume start $V0
+
+### 1 - offset and size in one stripes ####
+
+mount_get_test_files $stripe_count
+# This should have 4 hits on cached stripes
+get_stripes_in_cache $M0/test_file $stripe_count
+check_statedump_md5sum 4 4
+clean_file_unmount
+
+### 2 - Length less than a stripe size, covering two stripes ####
+
+mount_get_test_files $stripe_count
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1022 count=1 oflag=seek_bytes,sync seek=102 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1022 count=1 oflag=seek_bytes,sync seek=102 conv=notrunc
+check_statedump_md5sum 2 4
+clean_file_unmount
+
+### 3 -Length exactly equal to the stripe size, covering a single stripe ####
+
+mount_get_test_files $stripe_count
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1024 count=1 oflag=seek_bytes,sync seek=0 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1024 count=1 oflag=seek_bytes,sync seek=0 conv=notrunc
+check_statedump_md5sum 0 4
+clean_file_unmount
+
+### 4 - Length exactly equal to the stripe size, covering two stripes ####
+
+mount_get_test_files $stripe_count
+TEST dd if=$B0/misc_file of=$B0/test_file bs=2048 count=1 oflag=seek_bytes,sync seek=1024 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=2048 count=1 oflag=seek_bytes,sync seek=1024 conv=notrunc
+check_statedump_md5sum 0 4
+clean_file_unmount
+
+### 5 - Length greater than a stripe size, covering two stripes ####
+
+mount_get_test_files $stripe_count
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1030 count=1 oflag=seek_bytes,sync seek=500 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1030 count=1 oflag=seek_bytes,sync seek=500 conv=notrunc
+check_statedump_md5sum 2 4
+clean_file_unmount
+
+### 6 - Length greater than a stripe size, covering three stripes ####
+
+mount_get_test_files $stripe_count
+TEST dd if=$B0/misc_file of=$B0/test_file bs=2078 count=1 oflag=seek_bytes,sync seek=1000 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=2078 count=1 oflag=seek_bytes,sync seek=1000 conv=notrunc
+check_statedump_md5sum 2 4
+clean_file_unmount
+
+### 7 - Discard range - all stripe from cache should be invalidated complete stripes ####
+
+mount_get_test_files $stripe_count
+TEST fallocate -p -o 0 -l 5120 $B0/test_file
+TEST fallocate -p -o 0 -l 5120 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1024 count=6 oflag=seek_bytes,sync seek=1030 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1024 count=6 oflag=seek_bytes,sync seek=1030 conv=notrunc
+check_statedump_md5sum 5 11
+clean_file_unmount
+
+### 8 - Discard range - starts in the middle of stripe, ends on the middle of next stripe####
+
+mount_get_test_files $stripe_count
+TEST fallocate -p -o 500 -l 1024 $B0/test_file
+TEST fallocate -p -o 500 -l 1024 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1024 count=5 oflag=seek_bytes,sync seek=500 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1024 count=5 oflag=seek_bytes,sync seek=500 conv=notrunc
+check_statedump_md5sum 10 6
+clean_file_unmount
+
+### 9 - Discard range - starts in the middle of stripe, ends on the middle of 3rd stripe#####
+
+mount_get_test_files $stripe_count
+TEST fallocate -p -o 500 -l 2048 $B0/test_file
+TEST fallocate -p -o 500 -l 2048 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1024 count=5 oflag=seek_bytes,sync seek=500 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1024 count=5 oflag=seek_bytes,sync seek=500 conv=notrunc
+check_statedump_md5sum 9 7
+clean_file_unmount
+
+### 10 - Discard range - starts and end within one stripe ####
+
+mount_get_test_files $stripe_count
+TEST fallocate -p -o 500 -l 100 $B0/test_file
+TEST fallocate -p -o 500 -l 100 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1024 count=1 oflag=seek_bytes,sync seek=0 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1024 count=1 oflag=seek_bytes,sync seek=0 conv=notrunc
+check_statedump_md5sum 1 4
+clean_file_unmount
+
+### 11 - Discard range - starts and end in one complete stripe ####
+
+mount_get_test_files $stripe_count
+TEST fallocate -p -o 0 -l 1024 $B0/test_file
+TEST fallocate -p -o 0 -l 1024 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1024 count=1 oflag=seek_bytes,sync seek=512 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1024 count=1 oflag=seek_bytes,sync seek=512 conv=notrunc
+check_statedump_md5sum 1 5
+clean_file_unmount
+
+### 12 - Discard range - starts and end two complete stripe ####
+
+mount_get_test_files $stripe_count
+TEST fallocate -p -o 0 -l 2048 $B0/test_file
+TEST fallocate -p -o 0 -l 2048 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1024 count=4 oflag=seek_bytes,sync seek=300 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1024 count=4 oflag=seek_bytes,sync seek=300 conv=notrunc
+check_statedump_md5sum 5 7
+clean_file_unmount
+
+### 13 - Truncate to invalidate all the stripe in cache ####
+
+mount_get_test_files $stripe_count
+TEST truncate -s 0 $B0/test_file
+TEST truncate -s 0 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1022 count=5 oflag=seek_bytes,sync seek=400 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1022 count=5 oflag=seek_bytes,sync seek=400 conv=notrunc
+check_statedump_md5sum 4 4
+clean_file_unmount
+
+### 14 - Truncate to invalidate all but one the stripe in cache ####
+
+mount_get_test_files $stripe_count
+TEST truncate -s 500 $B0/test_file
+TEST truncate -s 500 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1024 count=1 oflag=seek_bytes,sync seek=525 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1024 count=1 oflag=seek_bytes,sync seek=525 conv=notrunc
+check_statedump_md5sum 2 4
+clean_file_unmount
+
+### 15 - Truncate to invalidate all but one the stripe in cache ####
+mount_get_test_files $stripe_count
+TEST truncate -s 2148 $B0/test_file
+TEST truncate -s 2148 $M0/test_file
+TEST dd if=$B0/misc_file of=$B0/test_file bs=1000 count=1 oflag=seek_bytes,sync seek=2050 conv=notrunc
+TEST dd if=$B0/misc_file of=$M0/test_file bs=1000 count=1 oflag=seek_bytes,sync seek=2050 conv=notrunc
+check_statedump_md5sum 2 4
+clean_file_unmount
+echo "Total loop tests $loop_test"
+cleanup
diff --git a/tests/basic/ec/ec-up.t b/tests/basic/ec/ec-up.t
new file mode 100644
index 00000000000..d54e7e1d022
--- /dev/null
+++ b/tests/basic/ec/ec-up.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+#Tests that ec subvolume is up/down correctly
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../ec.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse-data 2 redundancy 1 $H0:$B0/${V0}{0,1,3,4,5,6}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+EXPECT "1" ec_up_status $V0 $M0 0
+EXPECT "1" ec_up_status $V0 $M0 1
+
+#kill two bricks in first disperse subvolume and check that ec_up_status is 0 for it
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" ec_up_status $V0 $M0 0
+EXPECT "1" ec_up_status $V0 $M0 1
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ec_up_status $V0 $M0 0
+EXPECT "1" ec_up_status $V0 $M0 1
+cleanup;
diff --git a/tests/basic/ec/ec.t b/tests/basic/ec/ec.t
new file mode 100644
index 00000000000..cc882771501
--- /dev/null
+++ b/tests/basic/ec/ec.t
@@ -0,0 +1,259 @@
+#!/bin/bash
+
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TEST_USER=test-ec-user
+TEST_UID=27341
+
+function my_getfattr {
+ getfattr --only-values -e text $* 2> /dev/null
+}
+
+function get_rep_count {
+ v=$(my_getfattr -n trusted.jbr.rep-count $1)
+ #echo $v > /dev/tty
+ echo $v
+}
+
+function create_file {
+ dd if=/dev/urandom of=$1 bs=4k count=$2 conv=sync 2> /dev/null
+}
+
+function setup_perm_file {
+ mkdir $1/perm_dir || return 1
+ chown ${TEST_USER} $1/perm_dir || return 1
+ su ${TEST_USER} -c "touch $1/perm_dir/perm_file" || return 1
+ return 0
+}
+
+# Functions to check repair for specific operation types.
+
+function check_create_write {
+ for b in $*; do
+ cmp $tmpdir/create-write $b/create-write || return 1
+ done
+ return 0
+}
+
+function check_truncate {
+ truncate -s 8192 $tmpdir/truncate
+ for b in $*; do
+ cmp $tmpdir/truncate $b/truncate || return 1
+ done
+ return 0
+}
+
+function check_hard_link {
+ stat $M0/hard-link-1
+ stat $M0/hard-link-2
+ for b in $*; do
+ inum1=$(ls -i $b/hard-link-1 | cut -d' ' -f1)
+ inum2=$(ls -i $b/hard-link-2 | cut -d' ' -f1)
+ if [ "$inum1" != "$inum2" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_soft_link {
+ stat $M0/soft-link
+ for b in $*; do
+ if [ "$(readlink $b/soft-link)" != "soft-link-tgt" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_unlink {
+ stat $M0/unlink
+ for b in $*; do
+ if [ -e $b/unlink ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_mkdir {
+ getfattr -m. -d $M0/mkdir
+ for b in $*; do
+ if [ ! -d $b/mkdir ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_rmdir {
+ getfattr -m. -d $M0/rmdir
+ for b in $*; do
+ if [ -e $b/rmdir ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_setxattr {
+ getfattr -d -m. -e hex $M0/setxattr
+ for b in $*; do
+ v=$(my_getfattr -n user.foo $b/setxattr)
+ if [ "$v" != "ash_nazg_durbatuluk" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_removexattr {
+ getfattr -d -m. -e hex $M0/removexattr
+ for b in $*; do
+ my_getfattr -n user.bar $b/removexattr 2> /dev/null
+ if [ $? -eq 0 ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_perm_file {
+ stat $M0/perm_dir/perm_file
+ getfattr -m. -d $M0/perm_dir
+ b1=$1
+ shift 1
+ ftext=$(stat -c "%u %g %a" $b1/perm_dir/perm_file)
+ #echo "first u/g/a = $ftext" > /dev/tty
+ for b in $*; do
+ btext=$(stat -c "%u %g %a" $b/perm_dir/perm_file)
+ #echo " next u/a/a = $btext" > /dev/tty
+ if [ x"$btext" != x"$ftext" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+cleanup
+
+TEST useradd -o -M -u ${TEST_UID} ${TEST_USER}
+push_trapfunc "userdel --force ${TEST_USER}"
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST mkdir -p $B0/${V0}{0,1,2,3,4,5,6,7,8,9}
+TEST $CLI volume create $V0 disperse 10 redundancy 2 $H0:$B0/${V0}{0,1,2,3,4,5,6,7,8,9}
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Created' volinfo_field $V0 'Status'
+EXPECT '10' brick_count $V0
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+
+# Mount FUSE with caching disabled
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "10" ec_child_up_count $V0 0
+
+# Create local files for comparisons etc.
+tmpdir=$(mktemp -d -t ${0##*/}.XXXXXX)
+push_trapfunc "rm -rf $tmpdir"
+TEST create_file $tmpdir/create-write 10
+TEST create_file $tmpdir/truncate 10
+
+# Prepare files and directories we'll need later.
+TEST cp $tmpdir/truncate $M0/
+TEST touch $M0/hard-link-1
+TEST touch $M0/unlink
+TEST mkdir $M0/rmdir
+TEST touch $M0/setxattr
+TEST touch $M0/removexattr
+TEST setfattr -n user.bar -v "ash_nazg_gimbatul" $M0/removexattr
+
+sleep 2
+
+# Kill a couple of bricks and allow some time for things to settle.
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST kill_brick $V0 $H0 $B0/${V0}8
+sleep 10
+
+# Test create+write
+TEST cp $tmpdir/create-write $M0/
+# Test truncate
+TEST truncate -s 8192 $M0/truncate
+# Test hard link
+TEST ln $M0/hard-link-1 $M0/hard-link-2
+# Test soft link
+TEST ln -s soft-link-tgt $M0/soft-link
+# Test unlink
+TEST rm $M0/unlink
+# Test rmdir
+TEST rmdir $M0/rmdir
+# Test mkdir
+TEST mkdir $M0/mkdir
+# Test setxattr
+TEST setfattr -n user.foo -v "ash_nazg_durbatuluk" $M0/setxattr
+# Test removexattr
+TEST setfattr -x user.bar $M0/removexattr
+# Test uid/gid behavior
+TEST setup_perm_file $M0
+
+sleep 2
+
+# Unmount/remount so that create/write and truncate don't see cached data.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS -s $H0 --volfile-id $V0 $M1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "8" ec_child_up_count $V0 0
+
+# Test create/write and truncate *before* the bricks are brought back.
+TEST check_create_write $M1
+TEST check_truncate $M1
+
+# Restart the bricks and allow repair to occur.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "10" ec_child_up_count $V0 0
+
+# Unmount/remount again, same reason as before.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "10" ec_child_up_count $V0 0
+
+# Make sure everything is as it should be. Most tests check for consistency
+# between the bricks and the front end. This is not valid for disperse, so we
+# check the mountpoint state instead.
+
+TEST check_create_write $M0
+TEST check_truncate $M0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_hard_link $B0/${V0}{0..9}
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_soft_link $B0/${V0}{0..9}
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_unlink $B0/${V0}{0..9}
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_rmdir $B0/${V0}{0..9}
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_mkdir $B0/${V0}{0..9}
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_setxattr $B0/${V0}{0..9}
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_removexattr $B0/${V0}{0..9}
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_perm_file $B0/${V0}{0..9}
+
+cleanup
diff --git a/tests/basic/ec/gfapi-ec-open-truncate.c b/tests/basic/ec/gfapi-ec-open-truncate.c
new file mode 100644
index 00000000000..fb16807003a
--- /dev/null
+++ b/tests/basic/ec/gfapi-ec-open-truncate.c
@@ -0,0 +1,171 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+int
+fill_iov(struct iovec *iov, char fillchar, int count)
+{
+ int ret = -1;
+
+ iov->iov_base = calloc(count + 1, sizeof(fillchar));
+ if (iov->iov_base == NULL) {
+ return ret;
+ } else {
+ iov->iov_len = count;
+ ret = 0;
+ }
+ memset(iov->iov_base, fillchar, count);
+ memset(iov->iov_base + count, '\0', 1);
+
+ return ret;
+}
+
+glfs_t *
+init_glfs(const char *hostname, const char *volname, const char *logfile)
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return NULL;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile_server failed");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ return fs;
+}
+
+int
+main(int argc, char *argv[])
+{
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ glfs_t *fs = NULL;
+ glfs_fd_t *glfd = NULL;
+ int ret = 0;
+ int i = 0;
+ int count = 200;
+ struct iovec iov = {0};
+ int flags = O_RDWR;
+ int bricksup = 0;
+ int fdopen = 0;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = init_glfs(hostname, volname, logfile);
+ if (fs == NULL) {
+ LOG_ERR("init_glfs failed");
+ return -1;
+ }
+
+ /* Brick is down and we are opening a file to trigger fd heal. */
+ /* Bypass Write-behind */
+ glfd = glfs_open(fs, "a", O_WRONLY | O_TRUNC | O_SYNC);
+ if (glfd == NULL) {
+ LOG_ERR("glfs_open_truncate failed");
+ exit(1);
+ }
+ system("gluster --mode=script volume start patchy force");
+ /*CHILD_UP_TIMEOUT is 20 seconds*/
+ for (i = 0; i < 20; i++) {
+ ret = system(
+ "[ $(gluster --mode=script volume status patchy | "
+ "grep \" Y \" | awk '{print $(NF-1)}' | wc -l) == 3 ]");
+ if (WIFEXITED(ret) && WEXITSTATUS(ret)) {
+ printf("Ret value of system: %d\n, ifexited: %d, exitstatus: %d",
+ ret, WIFEXITED(ret), WEXITSTATUS(ret));
+ sleep(1);
+ continue;
+ }
+ printf("Number of loops: %d\n", i);
+ bricksup = 1;
+ break;
+ }
+ if (!bricksup) {
+ system("gluster --mode=script volume status patchy");
+ LOG_ERR("Bricks didn't come up\n");
+ exit(1);
+ }
+
+ /*Not sure how to check that the child-up reached EC, so sleep 3 for now*/
+ sleep(3);
+ ret = fill_iov(&iov, 'a', 200);
+ if (ret) {
+ LOG_ERR("failed to create iov");
+ exit(1);
+ }
+
+ /*write will trigger re-open*/
+ ret = glfs_pwritev(glfd, &iov, 1, 0, flags);
+ if (ret < 0) {
+ LOG_ERR("glfs_test_function failed");
+ exit(1);
+ }
+ /*Check reopen happened by checking for open-fds on the brick*/
+ for (i = 0; i < 20; i++) {
+ ret = system(
+ "[ $(for i in $(pgrep glusterfsd); do ls -l /proc/$i/fd | grep "
+ "\"[.]glusterfs\" | grep -v \".glusterfs/[0-9a-f][0-9a-f]\" | grep "
+ "-v health_check; done | wc -l) == 3 ]");
+ if (WIFEXITED(ret) && WEXITSTATUS(ret)) {
+ printf("Ret value of system: %d\n, ifexited: %d, exitstatus: %d",
+ ret, WIFEXITED(ret), WEXITSTATUS(ret));
+ sleep(1);
+ continue;
+ }
+ fdopen = 1;
+ break;
+ }
+
+ if (!fdopen) {
+ LOG_ERR("fd reopen didn't succeed");
+ exit(1);
+ }
+
+ return 0;
+}
diff --git a/tests/basic/ec/gfapi-ec-open-truncate.t b/tests/basic/ec/gfapi-ec-open-truncate.t
new file mode 100644
index 00000000000..e22562c6ea3
--- /dev/null
+++ b/tests/basic/ec/gfapi-ec-open-truncate.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This .t tests the functionality of open-fd-heal when opened with O_TRUNC.
+#If re-open is not done with O_TRUNC then the test will pass.
+
+cleanup
+
+TEST glusterd
+
+TEST $CLI volume create $V0 disperse 3 ${H0}:$B0/brick{1,2,3}
+EXPECT 'Created' volinfo_field $V0 'Status'
+#Disable heals to prevent any chance of heals masking the problem
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume set $V0 disperse.heal-wait-qlength 0
+TEST $CLI volume set $V0 performance.write-behind off
+
+#We need truncate fop to go through before pre-op completes for the write-fop
+#which triggers open-fd heal. Otherwise truncate won't be allowed on 'bad' brick
+TEST $CLI volume set $V0 delay-gen posix
+TEST $CLI volume set $V0 delay-gen.enable fxattrop
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 delay-gen.delay-duration 1000000
+
+TEST $CLI volume heal $V0 disable
+
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+EXPECT 'Started' volinfo_field $V0 'Status'
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+TEST touch $M0/a
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST kill_brick $V0 $H0 $B0/brick1
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/gfapi-ec-open-truncate.c -lgfapi
+
+TEST $CLI volume profile $V0 info clear
+TEST ./$(dirname $0)/gfapi-ec-open-truncate ${H0} $V0 $logdir/gfapi-ec-open-truncate.log
+
+EXPECT "^2$" echo $($CLI volume profile $V0 info incremental | grep -i truncate | wc -l)
+cleanup_tester $(dirname $0)/gfapi-ec-open-truncate
+
+cleanup
diff --git a/tests/basic/ec/heal-info.t b/tests/basic/ec/heal-info.t
new file mode 100644
index 00000000000..1549d5fcdb0
--- /dev/null
+++ b/tests/basic/ec/heal-info.t
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks if heal info works as expected or not
+
+function create_files {
+ for i in {21..1000};
+ do
+ dd if=/dev/zero of=$M0/$i bs=1M count=1 2>&1 > /dev/null;
+ done
+ rm -f $M0/lock
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume set $V0 client-log-level DEBUG
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --direct-io-mode=yes $M0;
+# Wait until all 6 childs have been recognized by the ec xlator
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#heal info should give zero entries to be healed when I/O is going on
+dd if=/dev/zero of=$M0/a bs=1M count=2048 &
+dd_pid=$!
+sleep 3 #Wait for I/O to proceed for some time
+EXPECT "^0$" get_pending_heal_count $V0
+kill -9 $dd_pid
+touch $M0/lock
+create_files &
+
+total_heal_count=0
+while [ -f $M0/lock ];
+do
+ heal_count=$(get_pending_heal_count $V0)
+ total_heal_count=$((heal_count+total_heal_count))
+done
+EXPECT "^0$" echo $total_heal_count
+
+#When only data heal is required it should print it
+#There is no easy way to create this using commands so assigning xattrs directly
+TEST setfattr -n trusted.ec.version -v 0x00000000000000020000000000000000 $B0/${V0}0/1000
+TEST setfattr -n trusted.ec.version -v 0x00000000000000020000000000000000 $B0/${V0}1/1000
+TEST setfattr -n trusted.ec.version -v 0x00000000000000020000000000000000 $B0/${V0}2/1000
+TEST setfattr -n trusted.ec.version -v 0x00000000000000020000000000000000 $B0/${V0}3/1000
+TEST setfattr -n trusted.ec.version -v 0x00000000000000020000000000000000 $B0/${V0}4/1000
+TEST setfattr -n trusted.ec.version -v 0x00000000000000010000000000000000 $B0/${V0}5/1000
+index_path=$B0/${V0}5/.glusterfs/indices/xattrop/$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}5/1000))
+while [ -f $index_path ]; do :; done
+TEST touch $index_path
+EXPECT "^1$" get_pending_heal_count $V0
+TEST rm -f $M0/1000
+
+#When files/directories need heal test that it prints them
+TEST touch $M0/{1..10}
+TEST kill_brick $V0 $H0 $B0/${V0}0
+for i in {11..20};
+do
+ echo abc > $M0/$i #Data + entry + metadata heal
+done
+for i in {1..10};
+do
+ chmod +x $M0/$i;
+done
+
+EXPECT "^105$" get_pending_heal_count $V0
+
+cleanup
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1533815
diff --git a/tests/basic/ec/lock-contention.t b/tests/basic/ec/lock-contention.t
new file mode 100644
index 00000000000..8f86cee16ad
--- /dev/null
+++ b/tests/basic/ec/lock-contention.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# This test verifies that when 'lock-notify-contention' option is enabled,
+# locks xlator actually sends an upcall notification that causes the acquired
+# lock from one client to be released before it's supposed to when another
+# client accesses the file.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function elapsed_time() {
+ local start="`date +%s`"
+
+ if [[ "test" == `cat "$1"` ]]; then
+ echo "$((`date +%s` - ${start}))"
+ fi
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 features.locks-notify-contention off
+TEST $CLI volume set $V0 disperse.eager-lock on
+TEST $CLI volume set $V0 disperse.eager-lock-timeout 6
+TEST $CLI volume set $V0 disperse.other-eager-lock on
+TEST $CLI volume set $V0 disperse.other-eager-lock-timeout 6
+TEST $CLI volume start $V0
+
+TEST $GFS --direct-io-mode=yes --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M0
+
+TEST $GFS --direct-io-mode=yes --volfile-id=/$V0 --volfile-server=$H0 $M1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0 $M1
+
+TEST $(echo "test" >$M0/file)
+
+# With locks-notify-contention set to off, accessing the file from another
+# client should take 6 seconds. Checking against 3 seconds to be safe.
+elapsed="$(elapsed_time $M1/file)"
+TEST [[ ${elapsed} -ge 3 ]]
+
+elapsed="$(elapsed_time $M0/file)"
+TEST [[ ${elapsed} -ge 3 ]]
+
+TEST $CLI volume set $V0 features.locks-notify-contention on
+
+# With locks-notify-contention set to on, accessing the file from another
+# client should be fast. Checking against 3 seconds to be safe.
+elapsed="$(elapsed_time $M1/file)"
+TEST [[ ${elapsed} -le 3 ]]
+
+elapsed="$(elapsed_time $M0/file)"
+TEST [[ ${elapsed} -le 3 ]]
+
+cleanup
diff --git a/tests/basic/ec/nfs.t b/tests/basic/ec/nfs.t
new file mode 100755
index 00000000000..3f51a640ef7
--- /dev/null
+++ b/tests/basic/ec/nfs.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
+EXPECT "Created" volinfo_field $V0 'Status'
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+
+# The test below fails with "bs=1024k count=1k", but passes when "oflag=direct"
+# is used. There also does not seem to be an issue on systems with sufficient
+# memory. Reducing the "count" prevents hangs too.
+TEST dd if=/dev/zero of=$N0/test bs=1024k count=32
+
+## Before killing daemon to avoid deadlocks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+cleanup
diff --git a/tests/basic/ec/quota.t b/tests/basic/ec/quota.t
new file mode 100755
index 00000000000..c9612c8b76a
--- /dev/null
+++ b/tests/basic/ec/quota.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../quota.c -o $QDD
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse $H0:$B0/${V0}{0..2}
+EXPECT 'Created' volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+TEST mkdir -p $M0/test
+
+TEST $CLI volume quota $V0 enable
+
+TEST $CLI volume quota $V0 limit-usage /test 10MB
+
+EXPECT "10.0MB" quota_hard_limit "/test";
+EXPECT "80%" quota_soft_limit "/test";
+
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 hard-timeout 0
+
+TEST ! $QDD $M0/test/file1.txt 256 48
+TEST rm $M0/test/file1.txt
+
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "0Bytes" quotausage "/test"
+
+TEST $QDD $M0/test/file2.txt 256 32
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "8.0MB" quotausage "/test"
+
+TEST rm $M0/test/file2.txt
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "0Bytes" quotausage "/test"
+TEST $CLI volume stop $V0
+
+rm -f $QDD
+cleanup;
diff --git a/tests/basic/ec/self-heal-read-write-fail.t b/tests/basic/ec/self-heal-read-write-fail.t
new file mode 100644
index 00000000000..0ba591b5bb2
--- /dev/null
+++ b/tests/basic/ec/self-heal-read-write-fail.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This test verifies that self-heal fails when read/write fails as part of heal
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+TEST touch $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo abc >> $M0/a
+
+# Umount the volume to force all pending writes to reach the bricks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#Load error-gen and fail read fop and test that heal fails
+TEST $CLI volume stop $V0 #Stop volume so that error-gen can be loaded
+TEST $CLI volume set $V0 debug.error-gen posix
+TEST $CLI volume set $V0 debug.error-fops read
+TEST $CLI volume set $V0 debug.error-number EBADF
+TEST $CLI volume set $V0 debug.error-failure 100
+
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+TEST ! getfattr -n trusted.ec.heal $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+
+#fail write fop and test that heal fails
+TEST $CLI volume stop $V0
+TEST $CLI volume set $V0 debug.error-fops write
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+TEST ! getfattr -n trusted.ec.heal $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+
+TEST $CLI volume stop $V0 #Stop volume so that error-gen can be disabled
+TEST $CLI volume reset $V0 debug.error-gen
+TEST $CLI volume reset $V0 debug.error-fops
+TEST $CLI volume reset $V0 debug.error-number
+TEST $CLI volume reset $V0 debug.error-failure
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+TEST getfattr -n trusted.ec.heal $M0/a
+EXPECT "^0$" get_pending_heal_count $V0
+
+#Test that heal worked as expected by forcing read from brick0
+#remount to make sure data is not served from any cache
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT "abc" cat $M0/a
+
+cleanup
diff --git a/tests/basic/ec/self-heal.t b/tests/basic/ec/self-heal.t
new file mode 100644
index 00000000000..6329bb60248
--- /dev/null
+++ b/tests/basic/ec/self-heal.t
@@ -0,0 +1,235 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=300
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks self-healing feature of dispersed volumes
+
+cleanup
+
+function check_mount_dir
+{
+ getfattr -d -m. -e hex $M0 2>&1 > /dev/null
+ for i in {1..20}; do
+ ls -l $M0/ | grep "dir1"
+ if [ $? -ne 0 ]; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+function check_size
+{
+ cat $M0/$1 2>&1 > /dev/null
+ for i in "${brick[@]}"; do
+ res=`stat -c "%s" $i/$1`
+ if [ "$res" != "$2" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_mode
+{
+ getfattr -d -m. -e hex $M0/$1 2>&1 > /dev/null
+ for i in "${brick[@]}"; do
+ res=`stat -c "%A" $i/$1`
+ if [ "$res" != "$2" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_date
+{
+ getfattr -d -m. -e hex $M0/$1 2>&1 > /dev/null
+ for i in "${brick[@]}"; do
+ res=`stat -c "%Y" $i/$1`
+ if [ "$res" != "$2" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_xattr
+{
+ getfattr -d -m. -e hex $M0/$1 2>&1 > /dev/null
+ for i in "${brick[@]}"; do
+ getfattr -n $2 $i/$1 2>/dev/null
+ if [ $? -eq 0 ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_dir
+{
+ getfattr -m. -d $M0/dir1 2>&1 > /dev/null
+ for i in "${brick[@]}"; do
+ if [ ! -d $i/dir1 ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_soft_link
+{
+ getfattr -d -m. -e hex $M0/test3 2>&1 > /dev/null
+ for i in "${brick[@]}"; do
+ if [ ! -h $i/test3 ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+function check_hard_link
+{
+ getfattr -d -m. -e hex $M0/test4 2>&1 > /dev/null
+ for i in "${brick[@]}"; do
+ res=`stat -c "%h" $i/test4`
+ if [ "$res" != "3" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+tmp=`mktemp -d -t ${0##*/}.XXXXXX`
+if [ ! -d $tmp ]; then
+ exit 1
+fi
+
+TESTS_EXPECTED_IN_LOOP=194
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume set $V0 client-log-level DEBUG
+#Write-behind has a bug where lookup can race over write which leads to size mismatch on the mount after a 'cp'
+TEST $CLI volume set $V0 performance.write-behind off
+#md-cache can return stale stat due to default timeout being 1 sec
+TEST $CLI volume set $V0 performance.stat-prefetch off
+EXPECT "Created" volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+#direct-io-mode is to make sure 'cat' leads to READ fop which triggers heal
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --direct-io-mode=yes $M0;
+# Wait until all 6 childs have been recognized by the ec xlator
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+TEST dd if=/dev/urandom of=$tmp/test bs=1024 count=1024
+
+cs=$(sha1sum $tmp/test | awk '{ print $1 }')
+
+TEST df -h $M0
+TEST stat $M0
+
+for idx in {0..5}; do
+ brick[$idx]=$(gf_get_gfid_backend_file_path $B0/$V0$idx)
+done
+
+TEST cp $tmp/test $M0/test
+TEST chmod 644 $M0/test
+TEST touch -d "@946681200" $M0/test
+EXPECT "-rw-r--r--" stat -c "%A" $M0/test
+EXPECT "946681200" stat -c "%Y" $M0/test
+
+for idx1 in {0..5}; do
+ TEST chmod 666 ${brick[$idx1]}/test
+ TEST truncate -s 0 ${brick[$idx1]}/test
+ TEST setfattr -n user.test -v "test1" ${brick[$idx1]}/test
+ EXPECT "-rw-r--r--" stat -c "%A" $M0/test
+ EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_size test "262144"
+ EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_mode test "-rw-r--r--"
+ EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_date test "946681200"
+ EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_xattr test "user.test"
+done
+
+for idx1 in {0..4}; do
+ for idx2 in `seq $(($idx1 + 1)) 5`; do
+ if [ $idx1 -ne $idx2 ]; then
+ TEST chmod 666 ${brick[$idx1]}/test
+ TEST chmod 600 ${brick[$idx2]}/test
+ TEST truncate -s 0 ${brick[$idx1]}/test
+ TEST truncate -s 2097152 ${brick[$idx2]}/test
+ TEST setfattr -n user.test -v "test1" ${brick[$idx1]}/test
+ TEST setfattr -n user.test -v "test2" ${brick[$idx2]}/test
+ EXPECT "-rw-r--r--" stat -c "%A" $M0/test
+ EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_size test "262144"
+ EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_mode test "-rw-r--r--"
+ EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_date test "946681200"
+ EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_xattr test "user.test"
+ fi
+ done
+done
+
+sleep 2
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST cp $tmp/test $M0/test2
+EXPECT "1048576" stat -c "%s" $M0/test2
+TEST chmod 777 $M0/test2
+EXPECT "-rwxrwxrwx" stat -c "%A" $M0/test2
+
+TEST mkdir $M0/dir1
+TEST ls -al $M0/dir1
+
+TEST ln -s test2 $M0/test3
+TEST [ -h $M0/test3 ]
+
+TEST ln $M0/test2 $M0/test4
+TEST [ -f $M0/test4 ]
+EXPECT "2" stat -c "%h" $M0/test2
+EXPECT "2" stat -c "%h" $M0/test4
+
+sleep 2
+
+TEST $CLI volume start $V0 force
+# Wait until the killed bricks have been started and recognized by the ec
+# xlator
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+TEST check_mount_dir
+
+EXPECT "1048576" stat -c "%s" $M0/test2
+EXPECT "-rwxrwxrwx" stat -c "%A" $M0/test2
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_size test2 "262144"
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_mode test2 "-rwxrwxrwx"
+
+TEST ls -al $M0/dir1
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_dir
+
+TEST [ -h $M0/test3 ]
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_soft_link
+
+EXPECT "2" stat -c "%h" $M0/test4
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" check_hard_link
+
+TEST rm -rf $tmp
+
+cleanup
diff --git a/tests/basic/ec/statedump.t b/tests/basic/ec/statedump.t
new file mode 100644
index 00000000000..8d311ec3d6d
--- /dev/null
+++ b/tests/basic/ec/statedump.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 1 $H0:$B0/${V0}{0..2}
+EXPECT "Created" volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ec_child_up_status $V0 0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ec_child_up_status $V0 0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ec_child_up_status $V0 0 2
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "2" ec_child_up_count $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "0" ec_child_up_status $V0 0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ec_child_up_status $V0 0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" ec_child_up_status $V0 0 2
+
+cleanup
diff --git a/tests/basic/exports_parsing.t b/tests/basic/exports_parsing.t
new file mode 100644
index 00000000000..da88bbcb2cc
--- /dev/null
+++ b/tests/basic/exports_parsing.t
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+EXP_FILES=$(dirname $0)/../configfiles
+
+cleanup
+
+function test_good_file ()
+{
+ glusterfsd --print-exports $1
+}
+
+function test_long_netgroup()
+{
+ glusterfsd --print-exports $1 2>&1 | sed -n 1p
+}
+
+function test_bad_line ()
+{
+ glusterfsd --print-exports $1 2>&1 | sed -n 1p
+}
+
+function test_big_file ()
+{
+ glusterfsd --print-exports $1 | sed -n 3p
+}
+
+function test_bad_opt ()
+{
+ glusterfsd --print-exports $1 2>&1 | sed -n 1p
+}
+
+function check_export_line() {
+ if [ "$1" == "$2" ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+ return
+}
+
+export_result=$(test_good_file $EXP_FILES/exports)
+EXPECT "Y" check_export_line '/test @test(rw,anonuid=0,sec=sys,) 10.35.11.31(rw,anonuid=0,sec=sys,) ' "$export_result"
+
+export_result=$(test_good_file $EXP_FILES/exports-v6)
+EXPECT "Y" check_export_line '/test @test(rw,anonuid=0,sec=sys,) 2401:db00:11:1:face:0:3d:0(rw,anonuid=0,sec=sys,) ' "$export_result"
+
+EXPECT_KEYWORD "Error parsing netgroups for:" test_bad_line $EXP_FILES/bad_exports
+EXPECT_KEYWORD "Error parsing netgroups for:" test_long_netgroup $EXP_FILES/bad_exports
+
+EXPECT_KEYWORD "HDCDTY43SXOAH1TNUKB23MO9DE574W(rw,anonuid=0,sec=sys,)" test_big_file $EXP_FILES/big_exports
+
+EXPECT_KEYWORD "Could not find any valid options" test_bad_opt $EXP_FILES/exports_bad_opt
+
+cleanup
diff --git a/tests/basic/fencing/afr-lock-heal-advanced.c b/tests/basic/fencing/afr-lock-heal-advanced.c
new file mode 100644
index 00000000000..e202ccd5b29
--- /dev/null
+++ b/tests/basic/fencing/afr-lock-heal-advanced.c
@@ -0,0 +1,227 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <signal.h>
+#include <unistd.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define GF_ENFORCE_MANDATORY_LOCK "trusted.glusterfs.enforce-mandatory-lock"
+
+FILE *logfile_fp;
+
+#define LOG_ERR(func, err) \
+ do { \
+ if (!logfile_fp) { \
+ fprintf(stderr, "%\n%d %s : returned error (%s)\n", __LINE__, \
+ func, strerror(err)); \
+ fflush(stderr); \
+ } else { \
+ fprintf(logfile_fp, "\n%d %s : returned error (%s)\n", __LINE__, \
+ func, strerror(err)); \
+ fflush(logfile_fp); \
+ } \
+ } while (0)
+
+glfs_t *
+setup_client(char *hostname, char *volname, char *log_file)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(logfile_fp, "\nglfs_new: returned NULL (%s)\n",
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_set_volfile_server failed ret:%d (%s)\n",
+ ret, strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_file, 7);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_set_logging failed with ret: %d (%s)\n",
+ ret, strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+out:
+ return fs;
+error:
+ return NULL;
+}
+
+glfs_fd_t *
+open_file(glfs_t *fs, char *fname)
+{
+ glfs_fd_t *fd = NULL;
+
+ fd = glfs_creat(fs, fname, O_CREAT, 0644);
+ if (!fd) {
+ LOG_ERR("glfs_creat", errno);
+ goto out;
+ }
+out:
+ return fd;
+}
+
+int
+acquire_mandatory_lock(glfs_t *fs, glfs_fd_t *fd)
+{
+ struct flock lock;
+ int ret = 0;
+
+ /* initialize lock */
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 100;
+
+ ret = glfs_fsetxattr(fd, GF_ENFORCE_MANDATORY_LOCK, "set", 8, 0);
+ if (ret < 0) {
+ LOG_ERR("glfs_fsetxattr", errno);
+ ret = -1;
+ goto out;
+ }
+
+ /* take a write mandatory lock */
+ ret = glfs_file_lock(fd, F_SETLKW, &lock, GLFS_LK_MANDATORY);
+ if (ret) {
+ LOG_ERR("glfs_file_lock", errno);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+perform_test(glfs_t *fs, char *file1, char *file2)
+{
+ int ret = 0;
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ char *buf = "0123456789";
+
+ fd1 = open_file(fs, file1);
+ if (!fd1) {
+ ret = -1;
+ goto out;
+ }
+ fd2 = open_file(fs, file2);
+ if (!fd2) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Kill one brick from the .t.*/
+ pause();
+
+ ret = acquire_mandatory_lock(fs, fd1);
+ if (ret) {
+ goto out;
+ }
+ ret = acquire_mandatory_lock(fs, fd2);
+ if (ret) {
+ goto out;
+ }
+
+ /* Bring the brick up and let the locks heal. */
+ pause();
+ /*At this point, the .t would have killed and brought back 2 bricks, marking
+ * the fd bad.*/
+
+ ret = glfs_write(fd1, buf, 10, 0);
+ if (ret > 0) {
+ /* Write is supposed to fail with EBADFD*/
+ LOG_ERR("glfs_write", ret);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (fd1)
+ glfs_close(fd1);
+ if (fd2)
+ glfs_close(fd2);
+ return ret;
+}
+
+static void
+sigusr1_handler(int signo)
+{
+ /*Signal caught. Just continue with the execution.*/
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+ char *volname = NULL;
+ char log_file[100];
+ char *hostname = NULL;
+ char *fname1 = NULL;
+ char *fname2 = NULL;
+
+ if (argc != 7) {
+ fprintf(stderr,
+ "Expect following args %s <host> <volname> <file1> <file2> "
+ "<log file "
+ "location> <log_file_suffix>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ fname1 = argv[3];
+ fname2 = argv[4];
+
+ /*Use SIGUSR1 and pause()as a means of hitting break-points this program
+ *when signalled from the .t test case.*/
+ if (signal(SIGUSR1, sigusr1_handler) == SIG_ERR) {
+ LOG_ERR("SIGUSR1 handler error", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ sprintf(log_file, "%s/%s.%s.%s", argv[5], "lock-heal.c", argv[6], "log");
+ logfile_fp = fopen(log_file, "w");
+ if (!logfile_fp) {
+ fprintf(stderr, "\nfailed to open %s\n", log_file);
+ fflush(stderr);
+ return -1;
+ }
+
+ sprintf(log_file, "%s/%s.%s.%s", argv[5], "glfs-client", argv[6], "log");
+ fs = setup_client(hostname, volname, log_file);
+ if (!fs) {
+ LOG_ERR("setup_client", errno);
+ return -1;
+ }
+
+ ret = perform_test(fs, fname1, fname2);
+
+error:
+ if (fs) {
+ /*glfs_fini(fs)*/; // glfs fini path is racy and crashes the program
+ }
+
+ fclose(logfile_fp);
+
+ return ret;
+}
diff --git a/tests/basic/fencing/afr-lock-heal-advanced.t b/tests/basic/fencing/afr-lock-heal-advanced.t
new file mode 100644
index 00000000000..8a5b5989b5e
--- /dev/null
+++ b/tests/basic/fencing/afr-lock-heal-advanced.t
@@ -0,0 +1,115 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+PROCESS_UP_TIMEOUT=90
+
+function is_gfapi_program_alive()
+{
+ pid=$1
+ ps -p $pid
+ if [ $? -eq 0 ]
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function get_active_lock_count {
+ brick=$1
+ i1=$2
+ i2=$3
+ pattern="ACTIVE.*client-${brick: -1}"
+
+ sdump=$(generate_brick_statedump $V0 $H0 $brick)
+ lock_count1="$(egrep "$i1" $sdump -A3| egrep "$pattern"|uniq|wc -l)"
+ lock_count2="$(egrep "$i2" $sdump -A3| egrep "$pattern"|uniq|wc -l)"
+ echo "$((lock_count1+lock_count2))"
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 locks.mandatory-locking forced
+TEST $CLI volume set $V0 enforce-mandatory-lock on
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+TEST build_tester $(dirname $0)/afr-lock-heal-advanced.c -lgfapi -ggdb
+
+#------------------------------------------------------------------------------
+# Use more than 1 fd from same client so that list_for_each_* loops are executed more than once.
+$(dirname $0)/afr-lock-heal-advanced $H0 $V0 "/FILE1" "/FILE2" $logdir C1&
+client_pid=$!
+TEST [ $client_pid ]
+
+TEST sleep 5 # By now, the client would have opened an fd on FILE1 and FILE2 and waiting for a SIGUSR1.
+EXPECT "Y" is_gfapi_program_alive $client_pid
+
+gfid_str1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/FILE1))
+inode1="FILE1|gfid:$gfid_str1"
+gfid_str2=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/FILE2))
+inode2="FILE2|gfid:$gfid_str2"
+
+# Kill brick-3 and let client-1 take lock on both files.
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill -SIGUSR1 $client_pid
+# If program is still alive, glfs_file_lock() was a success.
+EXPECT "Y" is_gfapi_program_alive $client_pid
+
+# Check lock is present on brick-1 and brick-2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_active_lock_count $B0/${V0}0 $inode1 $inode2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_active_lock_count $B0/${V0}1 $inode1 $inode2
+
+# Restart brick-3 and check that the lock has healed on it.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST sleep 10 #Needed for client to re-open fd? Otherwise client_pre_lk_v2() fails with EBADFD for remote-fd.
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_active_lock_count $B0/${V0}2 $inode1 $inode2
+
+#------------------------------------------------------------------------------
+# Kill same brick before heal completes the first time and check it completes the second time.
+TEST $CLI volume set $V0 delay-gen locks
+TEST $CLI volume set $V0 delay-gen.delay-duration 5000000
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 delay-gen.enable finodelk
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST $CLI volume reset $V0 delay-gen
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_active_lock_count $B0/${V0}0 $inode1 $inode2
+
+#------------------------------------------------------------------------------
+# Kill 2 bricks and bring it back. The fds must be marked bad.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+# TODO: `gluster v statedump $V0 client localhost:$client_pid` is not working,
+# so sleep for 20 seconds for the client to connect to connect to the bricks.
+TEST sleep $CHILD_UP_TIMEOUT
+
+# Try to write to FILE1 from the .c; it must fail.
+TEST kill -SIGUSR1 $client_pid
+wait $client_pid
+ret=$?
+TEST [ $ret == 0 ]
+
+cleanup_tester $(dirname $0)/afr-lock-heal-advanced
+cleanup;
diff --git a/tests/basic/fencing/afr-lock-heal-basic.c b/tests/basic/fencing/afr-lock-heal-basic.c
new file mode 100644
index 00000000000..768c9e57181
--- /dev/null
+++ b/tests/basic/fencing/afr-lock-heal-basic.c
@@ -0,0 +1,182 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <signal.h>
+#include <unistd.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define GF_ENFORCE_MANDATORY_LOCK "trusted.glusterfs.enforce-mandatory-lock"
+
+FILE *logfile_fp;
+
+#define LOG_ERR(func, err) \
+ do { \
+ if (!logfile_fp) { \
+ fprintf(stderr, "%\n%d %s : returned error (%s)\n", __LINE__, \
+ func, strerror(err)); \
+ fflush(stderr); \
+ } else { \
+ fprintf(logfile_fp, "\n%d %s : returned error (%s)\n", __LINE__, \
+ func, strerror(err)); \
+ fflush(logfile_fp); \
+ } \
+ } while (0)
+
+glfs_t *
+setup_client(char *hostname, char *volname, char *log_file)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(logfile_fp, "\nglfs_new: returned NULL (%s)\n",
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_set_volfile_server failed ret:%d (%s)\n",
+ ret, strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_file, 7);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_set_logging failed with ret: %d (%s)\n",
+ ret, strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(logfile_fp, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+out:
+ return fs;
+error:
+ return NULL;
+}
+
+int
+acquire_mandatory_lock(glfs_t *fs, char *fname)
+{
+ struct flock lock;
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+
+ fd = glfs_creat(fs, fname, O_CREAT, 0644);
+ if (!fd) {
+ if (errno != EEXIST) {
+ LOG_ERR("glfs_creat", errno);
+ ret = -1;
+ goto out;
+ }
+ fd = glfs_open(fs, fname, O_RDWR | O_NONBLOCK);
+ if (!fd) {
+ LOG_ERR("glfs_open", errno);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* initialize lock */
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 100;
+
+ ret = glfs_fsetxattr(fd, GF_ENFORCE_MANDATORY_LOCK, "set", 8, 0);
+ if (ret < 0) {
+ LOG_ERR("glfs_fsetxattr", errno);
+ ret = -1;
+ goto out;
+ }
+
+ pause();
+
+ /* take a write mandatory lock */
+ ret = glfs_file_lock(fd, F_SETLKW, &lock, GLFS_LK_MANDATORY);
+ if (ret) {
+ LOG_ERR("glfs_file_lock", errno);
+ goto out;
+ }
+
+ pause();
+
+out:
+ if (fd) {
+ glfs_close(fd);
+ }
+
+ return ret;
+}
+
+static void
+sigusr1_handler(int signo)
+{
+ /*Signal caught. Just continue with the execution.*/
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+ char *volname = NULL;
+ char log_file[100];
+ char *hostname = NULL;
+ char *fname = NULL;
+
+ if (argc != 6) {
+ fprintf(stderr,
+ "Expect following args %s <host> <volname> <file> <log file "
+ "location> <log_file_suffix>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ fname = argv[3];
+
+ /*Use SIGUSR1 and pause()as a means of hitting break-points this program
+ *when signalled from the .t test case.*/
+ if (signal(SIGUSR1, sigusr1_handler) == SIG_ERR) {
+ LOG_ERR("SIGUSR1 handler error", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ sprintf(log_file, "%s/%s.%s.%s", argv[4], "lock-heal-basic.c", argv[5],
+ "log");
+ logfile_fp = fopen(log_file, "w");
+ if (!logfile_fp) {
+ fprintf(stderr, "\nfailed to open %s\n", log_file);
+ fflush(stderr);
+ return -1;
+ }
+
+ sprintf(log_file, "%s/%s.%s.%s", argv[4], "glfs-client", argv[5], "log");
+ fs = setup_client(hostname, volname, log_file);
+ if (!fs) {
+ LOG_ERR("setup_client", errno);
+ return -1;
+ }
+
+ ret = acquire_mandatory_lock(fs, fname);
+
+error:
+ if (fs) {
+ /*glfs_fini(fs)*/; // glfs fini path is racy and crashes the program
+ }
+
+ fclose(logfile_fp);
+
+ return ret;
+}
diff --git a/tests/basic/fencing/afr-lock-heal-basic.t b/tests/basic/fencing/afr-lock-heal-basic.t
new file mode 100644
index 00000000000..69131af085d
--- /dev/null
+++ b/tests/basic/fencing/afr-lock-heal-basic.t
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function is_gfapi_program_alive()
+{
+ pid=$1
+ ps -p $pid
+ if [ $? -eq 0 ]
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 locks.mandatory-locking forced
+TEST $CLI volume set $V0 enforce-mandatory-lock on
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+TEST build_tester $(dirname $0)/afr-lock-heal-basic.c -lgfapi -ggdb
+
+$(dirname $0)/afr-lock-heal-basic $H0 $V0 "/FILE" $logdir C1&
+client1_pid=$!
+TEST [ $client1_pid ]
+
+$(dirname $0)/afr-lock-heal-basic $H0 $V0 "/FILE" $logdir C2&
+client2_pid=$!
+TEST [ $client2_pid ]
+
+TEST sleep 5 # By now, the 2 clients would have opened an fd on FILE and waiting for a SIGUSR1.
+EXPECT "Y" is_gfapi_program_alive $client1_pid
+EXPECT "Y" is_gfapi_program_alive $client2_pid
+
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/FILE))
+inode="FILE|gfid:$gfid_str"
+
+# Kill brick-3 and let client-1 take lock on the file.
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill -SIGUSR1 $client1_pid
+# If program is still alive, glfs_file_lock() was a success.
+EXPECT "Y" is_gfapi_program_alive $client1_pid
+
+# Check lock is present on brick-1 and brick-2
+b1_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}0)
+c1_lock_on_b1="$(egrep "$inode" $b1_sdump -A3| egrep 'ACTIVE.*client-0'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+b2_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}1)
+c1_lock_on_b2="$(egrep "$inode" $b2_sdump -A3| egrep 'ACTIVE.*client-1'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+TEST [ "$c1_lock_on_b1" == "$c1_lock_on_b2" ]
+
+# Restart brick-3 and check that the lock has healed on it.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST sleep 10 #Needed for client to re-open fd? Otherwise client_pre_lk_v2() fails with EBADFD for remote-fd. Also wait for lock heal.
+
+b3_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}2)
+c1_lock_on_b3="$(egrep "$inode" $b3_sdump -A3| egrep 'ACTIVE.*client-2'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+TEST [ "$c1_lock_on_b1" == "$c1_lock_on_b3" ]
+
+# Kill brick-1 and let client-2 preempt the lock on bricks 2 and 3.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill -SIGUSR1 $client2_pid
+# If program is still alive, glfs_file_lock() was a success.
+EXPECT "Y" is_gfapi_program_alive $client2_pid
+
+# Restart brick-1 and let lock healing complete.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+TEST sleep 10 #Needed for client to re-open fd? Otherwise client_pre_lk_v2() fails with EBADFD for remote-fd. Also wait for lock heal.
+
+# Check that all bricks now have locks from client 2 only.
+b1_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}0)
+c2_lock_on_b1="$(egrep "$inode" $b1_sdump -A3| egrep 'ACTIVE.*client-0'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+b2_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}1)
+c2_lock_on_b2="$(egrep "$inode" $b2_sdump -A3| egrep 'ACTIVE.*client-1'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+b3_sdump=$(generate_brick_statedump $V0 $H0 $B0/${V0}2)
+c2_lock_on_b3="$(egrep "$inode" $b3_sdump -A3| egrep 'ACTIVE.*client-2'| uniq| awk '{print $1,$2,$3,S4,$5,$6,$7,$8}'|tr -d '(,), ,')"
+TEST [ "$c2_lock_on_b1" == "$c2_lock_on_b2" ]
+TEST [ "$c2_lock_on_b1" == "$c2_lock_on_b3" ]
+TEST [ "$c2_lock_on_b1" != "$c1_lock_on_b1" ]
+
+#Let the client programs run and exit.
+TEST kill -SIGUSR1 $client1_pid
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" is_gfapi_program_alive $client1_pid
+TEST kill -SIGUSR1 $client2_pid
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" is_gfapi_program_alive $client2_pid
+
+cleanup_tester $(dirname $0)/afr-lock-heal-basic
+cleanup;
diff --git a/tests/basic/fencing/fence-basic.c b/tests/basic/fencing/fence-basic.c
new file mode 100644
index 00000000000..4aa452e19b0
--- /dev/null
+++ b/tests/basic/fencing/fence-basic.c
@@ -0,0 +1,229 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define NO_INIT 1
+#define GF_ENFORCE_MANDATORY_LOCK "trusted.glusterfs.enforce-mandatory-lock"
+
+FILE *fp;
+char *buf = "0123456789";
+
+#define LOG_ERR(func, err) \
+ do { \
+ if (!fp) { \
+ fprintf(stderr, "%\n%d %s : returned error (%s)\n", __LINE__, \
+ func, strerror(err)); \
+ fflush(stderr); \
+ } else { \
+ fprintf(fp, "\n%d %s : returned error (%s)\n", __LINE__, func, \
+ strerror(err)); \
+ fflush(fp); \
+ } \
+ } while (0)
+
+glfs_t *
+setup_new_client(char *hostname, char *volname, char *log_file, int flag)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(fp, "\nglfs_new: returned NULL (%s)\n", strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(fp, "\nglfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_file, 7);
+ if (ret < 0) {
+ fprintf(fp, "\nglfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ if (flag == NO_INIT)
+ goto out;
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(fp, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+out:
+ return fs;
+error:
+ return NULL;
+}
+
+/* test plan
+ *
+ * - take mandatory lock from client 1
+ * - preempt mandatory lock from client 2
+ * - write from client 1 which should fail
+ */
+
+int
+test(glfs_t *fs1, glfs_t *fs2, char *fname)
+{
+ struct flock lock;
+ int ret = 0;
+ glfs_fd_t *fd1, *fd2 = NULL;
+
+ fd1 = glfs_creat(fs1, fname, O_RDWR, 0777);
+ if (ret) {
+ LOG_ERR("glfs_creat", errno);
+ ret = -1;
+ goto out;
+ }
+
+ fd2 = glfs_open(fs2, fname, O_RDWR | O_NONBLOCK);
+ if (ret) {
+ LOG_ERR("glfs_open", errno);
+ ret = -1;
+ goto out;
+ }
+
+ /* initialize lock */
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 100;
+
+ ret = glfs_fsetxattr(fd1, GF_ENFORCE_MANDATORY_LOCK, "set", 8, 0);
+ if (ret < 0) {
+ LOG_ERR("glfs_fsetxattr", errno);
+ ret = -1;
+ goto out;
+ }
+
+ /* take a write mandatory lock */
+ ret = glfs_file_lock(fd1, F_SETLKW, &lock, GLFS_LK_MANDATORY);
+ if (ret) {
+ LOG_ERR("glfs_file_lock", errno);
+ goto out;
+ }
+
+ ret = glfs_write(fd1, buf, 10, 0);
+ if (ret != 10) {
+ LOG_ERR("glfs_write", errno);
+ ret = -1;
+ goto out;
+ }
+
+ /* write should fail */
+ ret = glfs_write(fd2, buf, 10, 0);
+ if (ret != -1) {
+ LOG_ERR("glfs_write", errno);
+ ret = -1;
+ goto out;
+ }
+
+ /* preempt mandatory lock from client 1*/
+ ret = glfs_file_lock(fd2, F_SETLKW, &lock, GLFS_LK_MANDATORY);
+ if (ret) {
+ LOG_ERR("glfs_file_lock", errno);
+ goto out;
+ }
+
+ /* write should succeed from client 2 */
+ ret = glfs_write(fd2, buf, 10, 0);
+ if (ret == -1) {
+ LOG_ERR("glfs_write", errno);
+ goto out;
+ }
+
+ /* write should fail from client 1 */
+ ret = glfs_write(fd1, buf, 10, 0);
+ if (ret == 10) {
+ LOG_ERR("glfs_write", errno);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (fd1) {
+ glfs_close(fd1);
+ }
+
+ if (fd2) {
+ glfs_close(fd2);
+ }
+
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs1 = NULL;
+ glfs_t *fs2 = NULL;
+ char *volname = NULL;
+ char log_file[100];
+ char *hostname = NULL;
+ char *fname = "/file";
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+
+ if (argc != 4) {
+ fprintf(
+ stderr,
+ "Expect following args %s <hostname> <Vol> <log file location>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+
+ sprintf(log_file, "%s/%s", argv[3], "fence-basic.log");
+ fp = fopen(log_file, "w");
+ if (!fp) {
+ fprintf(stderr, "\nfailed to open %s\n", log_file);
+ fflush(stderr);
+ return -1;
+ }
+
+ sprintf(log_file, "%s/%s", argv[3], "glfs-client-1.log");
+ fs1 = setup_new_client(hostname, volname, log_file, 0);
+ if (!fs1) {
+ LOG_ERR("setup_new_client", errno);
+ return -1;
+ }
+
+ sprintf(log_file, "%s/%s", argv[3], "glfs-client-2.log");
+ fs2 = setup_new_client(hostname, volname, log_file, 0);
+ if (!fs2) {
+ LOG_ERR("setup_new_client", errno);
+ ret = -1;
+ goto error;
+ }
+
+ ret = test(fs1, fs2, fname);
+
+error:
+ if (fs1) {
+ glfs_fini(fs1);
+ }
+
+ if (fs2) {
+ glfs_fini(fs2);
+ }
+
+ fclose(fp);
+
+ return ret;
+}
diff --git a/tests/basic/fencing/fence-basic.t b/tests/basic/fencing/fence-basic.t
new file mode 100755
index 00000000000..30f379e7b20
--- /dev/null
+++ b/tests/basic/fencing/fence-basic.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 diagnostics.client-log-flush-timeout 30
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 locks.mandatory-locking forced
+TEST $CLI volume set $V0 enforce-mandatory-lock on
+
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/fence-basic.c -lgfapi -ggdb
+TEST $(dirname $0)/fence-basic $H0 $V0 $logdir
+
+cleanup_tester $(dirname $0)/fence-basic
+
+cleanup; \ No newline at end of file
diff --git a/tests/basic/fencing/fencing-crash-conistency.t b/tests/basic/fencing/fencing-crash-conistency.t
new file mode 100644
index 00000000000..0c69411e90c
--- /dev/null
+++ b/tests/basic/fencing/fencing-crash-conistency.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+# with lock enforcement flag write should fail with out lock
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 performance.write-behind off
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST touch $M0/file
+
+#write should pass
+TEST "echo "test" > $M0/file"
+TEST "truncate -s 0 $M0/file"
+
+#enable mandatory locking
+TEST $CLI volume set $V0 locks.mandatory-locking forced
+TEST $CLI volume set $V0 enforce-mandatory-lock on
+
+#write should pass
+TEST "echo "test" >> $M0/file"
+TEST "truncate -s 0 $M0/file"
+
+#enforce lock on the file
+TEST setfattr -n trusted.glusterfs.enforce-mandatory-lock -v 1 $M0/file
+
+#write should fail
+TEST ! "echo "test" >> $M0/file"
+TEST ! "truncate -s 0 $M0/file"
+
+#remove lock enforcement flag
+TEST setfattr -x trusted.glusterfs.enforce-mandatory-lock $M0/file
+
+#write should pass
+TEST "echo "test" >> $M0/file"
+TEST "truncate -s 0 $M0/file"
+
+#enforce lock on the file
+TEST setfattr -n trusted.glusterfs.enforce-mandatory-lock -v 1 $M0/file
+#kill brick
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+TEST $CLI volume start $V0 force
+
+# wait one second for the brick to come online
+sleep 2
+#write should fail (lock xlator gets lock enforcement info from disk)
+TEST ! "echo "test" >> $M0/file"
+TEST ! "truncate -s 0 $M0/file"
+
+cleanup; \ No newline at end of file
diff --git a/tests/basic/fencing/test-fence-option.t b/tests/basic/fencing/test-fence-option.t
new file mode 100644
index 00000000000..115cbe7dbdf
--- /dev/null
+++ b/tests/basic/fencing/test-fence-option.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+# with lock enforcement flag write should fail with out lock
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST touch $M0/file
+
+#setfattr for mandatory-enforcement will fail
+TEST ! setfattr -n trusted.glusterfs.enforce-mandatory-lock -v 1 $M0/file
+
+#enable mandatory locking
+TEST $CLI volume set $V0 locks.mandatory-locking forced
+
+#setfattr will fail
+TEST ! setfattr -n trusted.glusterfs.enforce-mandatory-lock -v 1 $M0/file
+
+#set lock-enforcement option
+TEST $CLI volume set $V0 enforce-mandatory-lock on
+
+#setfattr should succeed
+TEST setfattr -n trusted.glusterfs.enforce-mandatory-lock -v 1 $M0/file
+
+cleanup; \ No newline at end of file
diff --git a/tests/basic/fop-sampling.t b/tests/basic/fop-sampling.t
new file mode 100644
index 00000000000..cea8aa737c0
--- /dev/null
+++ b/tests/basic/fop-sampling.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+#
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+SAMPLE_FILE="$(gluster --print-logdir)/samples/glusterfs_${V0}.samp"
+
+function print_cnt() {
+ local FOP_TYPE=$1
+ local FOP_CNT=$(grep ,${FOP_TYPE} ${SAMPLE_FILE} | wc -l)
+ echo $FOP_CNT
+}
+
+# Verify we got non-zero counts for stats/lookup/readdir
+check_samples() {
+ STAT_CNT=$(print_cnt STAT)
+ if [ "$STAT_CNT" -le "0" ]; then
+ echo "STAT count is zero"
+ return
+ fi
+
+ LOOKUP_CNT=$(print_cnt LOOKUP)
+ if [ "$LOOKUP_CNT" -le "0" ]; then
+ echo "LOOKUP count is zero"
+ return
+ fi
+
+ READDIR_CNT=$(print_cnt READDIR)
+ if [ "$READDIR_CNT" -le "0" ]; then
+ echo "READDIR count is zero"
+ return
+ fi
+
+ echo "OK"
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 diagnostics.latency-measurement on
+TEST $CLI volume set $V0 diagnostics.count-fop-hits on
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 2
+TEST $CLI volume set $V0 diagnostics.fop-sample-buf-size 65535
+TEST $CLI volume set $V0 diagnostics.fop-sample-interval 1
+TEST $CLI volume set $V0 diagnostics.stats-dnscache-ttl-sec 3600
+
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+for i in {1..5}
+do
+ dd if=/dev/zero of=${M0}/testfile$i bs=4k count=1
+done
+
+TEST ls -l $M0
+EXPECT_WITHIN 6 "OK" check_samples
+
+cleanup
diff --git a/tests/basic/fops-sanity.c b/tests/basic/fops-sanity.c
new file mode 100644
index 00000000000..ef00aa0f088
--- /dev/null
+++ b/tests/basic/fops-sanity.c
@@ -0,0 +1,1033 @@
+/* Copyright (c) 2014 Red Hat, Inc. All rights reserved.
+
+* This copyrighted material is made available to anyone wishing
+* to use, modify, copy, or redistribute it subject to the terms
+* and conditions of the GNU General Public License version 2.
+
+* This program is distributed in the hope that it will be
+* useful, but WITHOUT ANY WARRANTY; without even the implied
+* warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+* PURPOSE. See the GNU General Public License for more details.
+
+* You should have received a copy of the GNU General Public
+* License along with this program; if not, write to the Free
+* Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+* Boston, MA 02110-1301, USA.
+*/
+
+/* Filesystem basic sanity check, tests all (almost) fops. */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/xattr.h>
+#include <errno.h>
+#include <string.h>
+#include <dirent.h>
+#include <sys/sysmacros.h>
+
+#ifndef linux
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#endif
+
+/* for fd based fops after unlink */
+int
+fd_based_fops_1(char *filename);
+/* for fd based fops before unlink */
+int
+fd_based_fops_2(char *filename);
+/* fops based on fd after dup */
+int
+dup_fd_based_fops(char *filename);
+/* for fops based on path */
+int
+path_based_fops(char *filename);
+/* for fops which operate on directory */
+int
+dir_based_fops(char *filename);
+/* for fops which operate in link files (symlinks) */
+int
+link_based_fops(char *filename);
+/* to test open syscall with open modes available. */
+int
+test_open_modes(char *filename);
+/* generic function which does open write and read. */
+int
+generic_open_read_write(char *filename, int flag, mode_t mode);
+
+#define OPEN_MODE 0666
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ int result = 0;
+ char filename[255] = {
+ 0,
+ };
+
+ if (argc > 1)
+ strcpy(filename, argv[1]);
+ else
+ strcpy(filename, "temp-xattr-test-file");
+
+ ret = fd_based_fops_1(strcat(filename, "_1"));
+ if (ret < 0) {
+ fprintf(stderr, "fd based file operation 1 failed\n");
+ result |= ret;
+ } else {
+ fprintf(stdout, "fd based file operation 1 passed\n");
+ }
+
+ ret = fd_based_fops_2(strcat(filename, "_2"));
+ if (ret < 0) {
+ result |= ret;
+ fprintf(stderr, "fd based file operation 2 failed\n");
+ } else {
+ fprintf(stdout, "fd based file operation 2 passed\n");
+ }
+
+ ret = dup_fd_based_fops(strcat(filename, "_3"));
+ if (ret < 0) {
+ result |= ret;
+ fprintf(stderr, "dup fd based file operation failed\n");
+ } else {
+ fprintf(stdout, "dup fd based file operation passed\n");
+ }
+
+ ret = path_based_fops(strcat(filename, "_4"));
+ if (ret < 0) {
+ result |= ret;
+ fprintf(stderr, "path based file operation failed\n");
+ } else {
+ fprintf(stdout, "path based file operation passed\n");
+ }
+
+ ret = dir_based_fops(strcat(filename, "_5"));
+ if (ret < 0) {
+ result |= ret;
+ fprintf(stderr, "directory based file operation failed\n");
+ } else {
+ fprintf(stdout, "directory based file operation passed\n");
+ }
+
+ ret = link_based_fops(strcat(filename, "_5"));
+ if (ret < 0) {
+ result |= ret;
+ fprintf(stderr, "link based file operation failed\n");
+ } else {
+ fprintf(stdout, "link based file operation passed\n");
+ }
+
+ ret = test_open_modes(strcat(filename, "_5"));
+ if (ret < 0) {
+ result |= ret;
+ fprintf(stderr, "testing modes of `open' call failed\n");
+ } else {
+ fprintf(stdout, "testing modes of `open' call passed\n");
+ }
+ return result;
+}
+
+/* Execute all possible fops on a fd which is unlinked */
+int
+fd_based_fops_1(char *filename)
+{
+ int fd = 0;
+ int ret = -1;
+ int result = 0;
+ struct stat stbuf = {
+ 0,
+ };
+ char wstr[50] = {
+ 0,
+ };
+ char rstr[50] = {
+ 0,
+ };
+
+ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
+ if (fd < 0) {
+ fprintf(stderr, "open failed : %s\n", strerror(errno));
+ return ret;
+ }
+
+ ret = unlink(filename);
+ if (ret < 0) {
+ fprintf(stderr, "unlink failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ strcpy(wstr, "This is my string\n");
+ ret = write(fd, wstr, strlen(wstr));
+ if (ret <= 0) {
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = lseek(fd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = read(fd, rstr, strlen(wstr));
+ if (ret <= 0) {
+ fprintf(stderr, "read failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = memcmp(rstr, wstr, strlen(wstr));
+ if (ret != 0) {
+ fprintf(stderr, "read returning junk\n");
+ result |= ret;
+ }
+
+ ret = ftruncate(fd, 0);
+ if (ret < 0) {
+ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fstat(fd, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fsync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fdatasync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ /*
+ * These metadata operations fail at the moment because kernel doesn't
+ * pass the client fd in the operation.
+ * The following bug tracks this change.
+ * https://bugzilla.redhat.com/show_bug.cgi?id=1084422
+ * ret = fchmod (fd, 0640);
+ * if (ret < 0) {
+ * fprintf (stderr, "fchmod failed : %s\n", strerror (errno));
+ * result |= ret;
+ * }
+
+ * ret = fchown (fd, 10001, 10001);
+ * if (ret < 0) {
+ * fprintf (stderr, "fchown failed : %s\n", strerror (errno));
+ * result |= ret;
+ * }
+
+ * ret = fsetxattr (fd, "trusted.xattr-test", "working", 8, 0);
+ * if (ret < 0) {
+ * fprintf (stderr, "fsetxattr failed : %s\n", strerror
+ (errno));
+ * result |= ret;
+ * }
+
+ * ret = flistxattr (fd, NULL, 0);
+ * if (ret <= 0) {
+ * fprintf (stderr, "flistxattr failed : %s\n", strerror
+ (errno));
+ * result |= ret;
+ * }
+
+ * ret = fgetxattr (fd, "trusted.xattr-test", NULL, 0);
+ * if (ret <= 0) {
+ * fprintf (stderr, "fgetxattr failed : %s\n", strerror
+ (errno));
+ * result |= ret;
+ * }
+
+ * ret = fremovexattr (fd, "trusted.xattr-test");
+ * if (ret < 0) {
+ * fprintf (stderr, "fremovexattr failed : %s\n", strerror
+ (errno));
+ * result |= ret;
+ * }
+ */
+
+ if (fd)
+ close(fd);
+ return result;
+}
+
+int
+fd_based_fops_2(char *filename)
+{
+ int fd = 0;
+ int ret = -1;
+ int result = 0;
+ struct stat stbuf = {
+ 0,
+ };
+ char wstr[50] = {
+ 0,
+ };
+ char rstr[50] = {
+ 0,
+ };
+
+ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
+ if (fd < 0) {
+ fprintf(stderr, "open failed : %s\n", strerror(errno));
+ return ret;
+ }
+
+ ret = ftruncate(fd, 0);
+ if (ret < 0) {
+ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ strcpy(wstr, "This is my second string\n");
+ ret = write(fd, wstr, strlen(wstr));
+ if (ret < 0) {
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = read(fd, rstr, strlen(wstr));
+ if (ret <= 0) {
+ fprintf(stderr, "read failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = memcmp(rstr, wstr, strlen(wstr));
+ if (ret != 0) {
+ fprintf(stderr, "read returning junk\n");
+ result |= ret;
+ }
+
+ ret = fstat(fd, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fchmod(fd, 0640);
+ if (ret < 0) {
+ fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fchown(fd, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "fchown failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fsync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fsetxattr(fd, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fdatasync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = flistxattr(fd, NULL, 0);
+ if (ret <= 0) {
+ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fgetxattr(fd, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fremovexattr(fd, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ if (fd)
+ close(fd);
+ unlink(filename);
+
+ return result;
+}
+
+int
+path_based_fops(char *filename)
+{
+ int ret = -1;
+ int fd = 0;
+ int result = 0;
+ struct stat stbuf = {
+ 0,
+ };
+ char newfilename[255] = {
+ 0,
+ };
+ char *hardlink = "linkfile-hard.txt";
+ char *symlnk = "linkfile-soft.txt";
+ char buf[1024] = {
+ 0,
+ };
+
+ fd = creat(filename, 0644);
+ if (fd < 0) {
+ fprintf(stderr, "creat failed: %s\n", strerror(errno));
+ return ret;
+ }
+
+ ret = truncate(filename, 0);
+ if (ret < 0) {
+ fprintf(stderr, "truncate failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = stat(filename, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "stat failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = chmod(filename, 0640);
+ if (ret < 0) {
+ fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = chown(filename, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "chown failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = setxattr(filename, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "setxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = listxattr(filename, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "listxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = getxattr(filename, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ fprintf(stderr, "getxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = removexattr(filename, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "removexattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = access(filename, R_OK | W_OK);
+ if (ret < 0) {
+ fprintf(stderr, "access failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = link(filename, hardlink);
+ if (ret < 0) {
+ fprintf(stderr, "link failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+ unlink(hardlink);
+
+ ret = symlink(filename, symlnk);
+ if (ret < 0) {
+ fprintf(stderr, "symlink failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = readlink(symlnk, buf, sizeof(buf));
+ if (ret < 0) {
+ fprintf(stderr, "readlink failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+ unlink(symlnk);
+
+ /* Create a character special file */
+ ret = mknod("cspecial", S_IFCHR | S_IRWXU | S_IRWXG, makedev(2, 3));
+ if (ret < 0) {
+ fprintf(stderr, "cpsecial mknod failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+ unlink("cspecial");
+
+ ret = mknod("bspecial", S_IFBLK | S_IRWXU | S_IRWXG, makedev(4, 5));
+ if (ret < 0) {
+ fprintf(stderr, "bspecial mknod failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+ unlink("bspecial");
+
+#ifdef linux
+ ret = mknod("fifo", S_IFIFO | S_IRWXU | S_IRWXG, 0);
+#else
+ ret = mkfifo("fifo", 0);
+#endif
+ if (ret < 0) {
+ fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+ unlink("fifo");
+
+#ifdef linux
+ ret = mknod("sock", S_IFSOCK | S_IRWXU | S_IRWXG, 0);
+ if (ret < 0) {
+ fprintf(stderr, "sock mknod failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+#else
+ {
+ int s;
+ const char *pathname = "sock";
+ struct sockaddr_un addr;
+
+ s = socket(PF_LOCAL, SOCK_STREAM, 0);
+ memset(&addr, 0, sizeof(addr));
+ strncpy(addr.sun_path, pathname, sizeof(addr.sun_path));
+ ret = bind(s, (const struct sockaddr *)&addr, SUN_LEN(&addr));
+ if (ret < 0) {
+ fprintf(stderr, "fifo mknod failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+ close(s);
+ }
+#endif
+ unlink("sock");
+
+ strcpy(newfilename, filename);
+ strcat(newfilename, "_new");
+ ret = rename(filename, newfilename);
+ if (ret < 0) {
+ fprintf(stderr, "rename failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+ unlink(newfilename);
+
+ if (fd)
+ close(fd);
+
+ unlink(filename);
+ return result;
+}
+
+int
+dup_fd_based_fops(char *filename)
+{
+ int fd = 0;
+ int result = 0;
+ int newfd = 0;
+ int ret = -1;
+ struct stat stbuf = {
+ 0,
+ };
+ char wstr[50] = {
+ 0,
+ };
+ char rstr[50] = {
+ 0,
+ };
+
+ fd = open(filename, O_RDWR | O_CREAT, OPEN_MODE);
+ if (fd < 0) {
+ fprintf(stderr, "open failed : %s\n", strerror(errno));
+ return ret;
+ }
+
+ newfd = dup(fd);
+ if (newfd < 0) {
+ fprintf(stderr, "dup failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ close(fd);
+
+ strcpy(wstr, "This is my string\n");
+ ret = write(newfd, wstr, strlen(wstr));
+ if (ret <= 0) {
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = lseek(newfd, 0, SEEK_SET);
+ if (ret < 0) {
+ fprintf(stderr, "lseek failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = read(newfd, rstr, strlen(wstr));
+ if (ret <= 0) {
+ fprintf(stderr, "read failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = memcmp(rstr, wstr, strlen(wstr));
+ if (ret != 0) {
+ fprintf(stderr, "read returning junk\n");
+ result |= ret;
+ }
+
+ ret = ftruncate(newfd, 0);
+ if (ret < 0) {
+ fprintf(stderr, "ftruncate failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fstat(newfd, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fchmod(newfd, 0640);
+ if (ret < 0) {
+ fprintf(stderr, "fchmod failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fchown(newfd, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "fchown failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fsync(newfd);
+ if (ret < 0) {
+ fprintf(stderr, "fsync failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fsetxattr(newfd, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fdatasync(newfd);
+ if (ret < 0) {
+ fprintf(stderr, "fdatasync failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = flistxattr(newfd, NULL, 0);
+ if (ret <= 0) {
+ fprintf(stderr, "flistxattr failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fgetxattr(newfd, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ fprintf(stderr, "fgetxattr failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = fremovexattr(newfd, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "fremovexattr failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ if (newfd)
+ close(newfd);
+ ret = unlink(filename);
+ if (ret < 0) {
+ fprintf(stderr, "unlink failed : %s\n", strerror(errno));
+ result |= ret;
+ }
+ return result;
+}
+
+int
+dir_based_fops(char *dirname)
+{
+ int ret = -1;
+ int result = 0;
+ DIR *dp = NULL;
+ char buff[255] = {
+ 0,
+ };
+ struct dirent *dbuff = {
+ 0,
+ };
+ struct stat stbuff = {
+ 0,
+ };
+ char newdname[255] = {
+ 0,
+ };
+ char *cwd = NULL;
+
+ ret = mkdir(dirname, 0755);
+ if (ret < 0) {
+ fprintf(stderr, "mkdir failed: %s\n", strerror(errno));
+ return ret;
+ }
+
+ dp = opendir(dirname);
+ if (dp == NULL) {
+ fprintf(stderr, "opendir failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ dbuff = readdir(dp);
+ if (NULL == dbuff) {
+ fprintf(stderr, "readdir failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = closedir(dp);
+ if (ret < 0) {
+ fprintf(stderr, "closedir failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = stat(dirname, &stbuff);
+ if (ret < 0) {
+ fprintf(stderr, "stat failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = chmod(dirname, 0744);
+ if (ret < 0) {
+ fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = chown(dirname, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "chmod failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = setxattr(dirname, "trusted.xattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "setxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = listxattr(dirname, NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "listxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = getxattr(dirname, "trusted.xattr-test", NULL, 0);
+ if (ret <= 0) {
+ ret = -1;
+ fprintf(stderr, "getxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = removexattr(dirname, "trusted.xattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "removexattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ strcpy(newdname, dirname);
+ strcat(newdname, "/../");
+ ret = chdir(newdname);
+ if (ret < 0) {
+ fprintf(stderr, "chdir failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ cwd = getcwd(buff, 255);
+ if (NULL == cwd) {
+ fprintf(stderr, "getcwd failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ strcpy(newdname, dirname);
+ strcat(newdname, "new");
+ ret = rename(dirname, newdname);
+ if (ret < 0) {
+ fprintf(stderr, "rename failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = rmdir(newdname);
+ if (ret < 0) {
+ fprintf(stderr, "rmdir failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ rmdir(dirname);
+ return result;
+}
+
+int
+link_based_fops(char *filename)
+{
+ int ret = -1;
+ int result = 0;
+ int fd = 0;
+ char newname[255] = {
+ 0,
+ };
+ char linkname[255] = {
+ 0,
+ };
+ struct stat lstbuf = {
+ 0,
+ };
+
+ fd = creat(filename, 0644);
+ if (fd < 0) {
+ fd = 0;
+ fprintf(stderr, "creat failed: %s\n", strerror(errno));
+ return ret;
+ }
+
+ strcpy(newname, filename);
+ strcat(newname, "_hlink");
+ ret = link(filename, newname);
+ if (ret < 0) {
+ fprintf(stderr, "link failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = unlink(filename);
+ if (ret < 0) {
+ fprintf(stderr, "unlink failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ strcpy(linkname, filename);
+ strcat(linkname, "_slink");
+ ret = symlink(newname, linkname);
+ if (ret < 0) {
+ fprintf(stderr, "symlink failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = lstat(linkname, &lstbuf);
+ if (ret < 0) {
+ fprintf(stderr, "lstbuf failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = lchown(linkname, 10001, 10001);
+ if (ret < 0) {
+ fprintf(stderr, "lchown failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = lsetxattr(linkname, "trusted.lxattr-test", "working", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "lsetxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = llistxattr(linkname, NULL, 0);
+ if (ret < 0) {
+ ret = -1;
+ fprintf(stderr, "llistxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = lgetxattr(linkname, "trusted.lxattr-test", NULL, 0);
+ if (ret < 0) {
+ ret = -1;
+ fprintf(stderr, "lgetxattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ ret = lremovexattr(linkname, "trusted.lxattr-test");
+ if (ret < 0) {
+ fprintf(stderr, "lremovexattr failed: %s\n", strerror(errno));
+ result |= ret;
+ }
+
+ if (fd)
+ close(fd);
+ unlink(linkname);
+ unlink(newname);
+ return result;
+}
+
+int
+test_open_modes(char *filename)
+{
+ int ret = -1;
+ int result = 0;
+
+ ret = generic_open_read_write(filename, O_CREAT | O_WRONLY, OPEN_MODE);
+ if (ret != 0) {
+ fprintf(stderr, "flag O_CREAT|O_WRONLY failed: \n");
+ result |= ret;
+ }
+
+ ret = generic_open_read_write(filename, O_CREAT | O_RDWR, OPEN_MODE);
+ if (ret != 0) {
+ fprintf(stderr, "flag O_CREAT|O_RDWR failed\n");
+ result |= ret;
+ }
+
+ ret = generic_open_read_write(filename, O_CREAT | O_RDONLY, OPEN_MODE);
+ if (ret != 0) {
+ fprintf(stderr, "flag O_CREAT|O_RDONLY failed\n");
+ result |= ret;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_WRONLY, 0);
+ if (ret != 0) {
+ fprintf(stderr, "flag O_WRONLY failed\n");
+ result |= ret;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_RDWR, 0);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_RDWR failed\n");
+ result |= ret;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_RDONLY, 0);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_RDONLY failed\n");
+ result |= ret;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_TRUNC | O_WRONLY, 0);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_TRUNC|O_WRONLY failed\n");
+ result |= ret;
+ }
+
+#if 0 /* undefined behaviour, unable to reliably test */
+ ret = creat (filename, 0644);
+ close (ret);
+ ret = generic_open_read_write (filename, O_TRUNC|O_RDONLY, 0);
+ if (0 != ret) {
+ fprintf (stderr, "flag O_TRUNC|O_RDONLY failed\n");
+ result |= ret;
+ }
+#endif
+
+ ret = generic_open_read_write(filename, O_CREAT | O_RDWR | O_SYNC,
+ OPEN_MODE);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_CREAT|O_RDWR|O_SYNC failed\n");
+ result |= ret;
+ }
+
+ ret = creat(filename, 0644);
+ close(ret);
+ ret = generic_open_read_write(filename, O_CREAT | O_EXCL, OPEN_MODE);
+ if (0 != ret) {
+ fprintf(stderr, "flag O_CREAT|O_EXCL failed\n");
+ result |= ret;
+ }
+
+ return result;
+}
+
+int
+generic_open_read_write(char *filename, int flag, mode_t mode)
+{
+ int fd = 0;
+ int ret = -1;
+ char wstring[50] = {
+ 0,
+ };
+ char rstring[50] = {
+ 0,
+ };
+
+ fd = open(filename, flag, mode);
+ if (fd < 0) {
+ if (flag == (O_CREAT | O_EXCL) && errno == EEXIST) {
+ unlink(filename);
+ return 0;
+ } else {
+ fprintf(stderr, "open failed: %s\n", strerror(errno));
+ return -1;
+ }
+ }
+
+ strcpy(wstring, "My string to write\n");
+ ret = write(fd, wstring, strlen(wstring));
+ if (ret <= 0) {
+ if (errno != EBADF) {
+ fprintf(stderr, "write failed: %s\n", strerror(errno));
+ close(fd);
+ unlink(filename);
+ return ret;
+ }
+ }
+
+ ret = lseek(fd, 0, SEEK_SET);
+ if (ret < 0) {
+ close(fd);
+ unlink(filename);
+ return ret;
+ }
+
+ ret = read(fd, rstring, strlen(wstring));
+ if (ret < 0 && flag != (O_CREAT | O_WRONLY) && flag != O_WRONLY &&
+ flag != (O_TRUNC | O_WRONLY)) {
+ close(fd);
+ unlink(filename);
+ return ret;
+ }
+
+ /* Compare the rstring with wstring. But we do not want to return
+ * error when the flag is either O_RDONLY, O_CREAT|O_RDONLY or
+ * O_TRUNC|O_RDONLY. Because in that case we are not writing
+ * anything to the file.*/
+
+ ret = memcmp(wstring, rstring, strlen(wstring));
+ if (0 != ret && flag != (O_TRUNC | O_WRONLY) && flag != O_WRONLY &&
+ flag != (O_CREAT | O_WRONLY) &&
+ !(flag == (O_CREAT | O_RDONLY) || flag == O_RDONLY ||
+ flag == (O_TRUNC | O_RDONLY))) {
+ fprintf(stderr, "read is returning junk\n");
+ close(fd);
+ unlink(filename);
+ return ret;
+ }
+
+ close(fd);
+ unlink(filename);
+ return 0;
+}
diff --git a/tests/basic/fops-sanity.t b/tests/basic/fops-sanity.t
new file mode 100755
index 00000000000..8962f3a2fba
--- /dev/null
+++ b/tests/basic/fops-sanity.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+#mount on a random dir
+TEST glusterfs --entry-timeout=3600 --attribute-timeout=3600 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=yes
+
+build_tester $(dirname $0)/fops-sanity.c
+
+TEST cp $(dirname $0)/fops-sanity $M0
+cd $M0
+TEST ./fops-sanity $V0
+cd -
+TEST rm -f $(dirname $0)/fops-sanity
+cleanup;
diff --git a/tests/basic/fuse/Makefile b/tests/basic/fuse/Makefile
new file mode 100644
index 00000000000..c446d253228
--- /dev/null
+++ b/tests/basic/fuse/Makefile
@@ -0,0 +1,12 @@
+CFLAGS = -Wall -g
+LDFLAGS =
+
+BINARIES = seek
+
+%: %.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
+
+all: $(BINARIES)
+
+clean:
+ -$(RM) $(BINARIES)
diff --git a/tests/basic/fuse/active-io-graph-switch.t b/tests/basic/fuse/active-io-graph-switch.t
new file mode 100644
index 00000000000..6ec3e1fcbfa
--- /dev/null
+++ b/tests/basic/fuse/active-io-graph-switch.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TESTS_EXPECTED_IN_LOOP=12
+
+function perform_io_on_mount {
+ local m="$1"
+ local f="$2"
+ local lockfile="$3"
+ while [ -f "$m/$lockfile" ];
+ do
+ dd if=/dev/zero of=$m/$f bs=1M count=1
+ done
+}
+
+function perform_graph_switch {
+ for i in {1..3}
+ do
+ TEST_IN_LOOP $CLI volume set $V0 performance.stat-prefetch off
+ sleep 3
+ TEST_IN_LOOP $CLI volume set $V0 performance.stat-prefetch on
+ sleep 3
+ done
+}
+
+function count_files {
+ ls $M0 | wc -l
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 flush-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST touch $M0/lock
+for i in {1..100}; do perform_io_on_mount $M0 $i lock & done
+EXPECT_WITHIN 5 "101" count_files
+
+perform_graph_switch
+TEST rm -f $M0/lock
+wait
+EXPECT "100" count_files
+TEST rm -f $M0/{1..100}
+EXPECT "0" count_files
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#Repeat the tests with reader-thread-count
+TEST $GFS --reader-thread-count=10 --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST touch $M0/lock
+for i in {1..100}; do perform_io_on_mount $M0 $i lock & done
+EXPECT_WITHIN 5 "101" count_files
+
+perform_graph_switch
+TEST rm -f $M0/lock
+wait
+EXPECT "100" count_files
+TEST rm -f $M0/{1..100}
+EXPECT "0" count_files
+
+cleanup
diff --git a/tests/basic/fuse/seek.c b/tests/basic/fuse/seek.c
new file mode 100644
index 00000000000..30943ad0f33
--- /dev/null
+++ b/tests/basic/fuse/seek.c
@@ -0,0 +1,82 @@
+/* seek.c - use lseek() to find holes in a file
+ *
+ * Author: Niels de Vos <ndevos@redhat.com>
+ */
+
+/* needed for SEEK_HOLE/SEEK_DATA */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+int
+main(int argc, char **argv)
+{
+ int ret = EXIT_SUCCESS;
+ int fd = -1;
+ char *filename = NULL;
+ struct stat st = {
+ 0,
+ };
+ off_t hole_start = 0;
+ off_t hole_end = 0;
+
+ if (argc != 2) {
+ fprintf(stderr, "Invalid argument, use %s <file>\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ filename = argv[1];
+
+ fd = open(filename, O_RDONLY);
+ if (fd <= 0) {
+ perror("open");
+ return EXIT_FAILURE;
+ }
+
+ if (fstat(fd, &st)) {
+ perror("fstat");
+ return EXIT_FAILURE;
+ }
+
+ while (hole_end < st.st_size) {
+ hole_start = lseek(fd, hole_end, SEEK_HOLE);
+ if (hole_start == -1 && errno == ENXIO) {
+ /* no more holes */
+ break;
+ } else if (hole_start == -1 && errno == ENOTSUP) {
+ /* SEEK_HOLE is not supported */
+ perror("lseek(SEEK_HOLE)");
+ ret = EXIT_FAILURE;
+ break;
+ } else if (hole_start == -1) {
+ perror("no more holes");
+ break;
+ }
+
+ hole_end = lseek(fd, hole_start, SEEK_DATA);
+ if (hole_end == -1 && errno == ENXIO) {
+ /* no more data */
+ break;
+ } else if (hole_end == -1 && errno == ENOTSUP) {
+ /* SEEK_DATA is not supported */
+ perror("lseek(SEEK_DATA)");
+ ret = EXIT_FAILURE;
+ break;
+ }
+
+ printf("HOLE found: %ld - %ld%s\n", hole_start, hole_end,
+ (hole_end == st.st_size) ? " (EOF)" : "");
+ }
+
+ close(fd);
+
+ return ret;
+}
diff --git a/tests/basic/geo-replication/marker-xattrs.t b/tests/basic/geo-replication/marker-xattrs.t
new file mode 100755
index 00000000000..7e5ea8eebec
--- /dev/null
+++ b/tests/basic/geo-replication/marker-xattrs.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TEST glusterd
+TEST pidof glusterd
+
+## Start and create a replicated volume
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}-{0,1,2,3,4,5}
+
+TEST $CLI volume set $V0 indexing on
+
+TEST $CLI volume start $V0;
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+## Mount client-pid=-1
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 --client-pid=-1 $M1
+
+TEST touch $M0
+
+vol_uuid=$(get_volume_mark $M1)
+xtime=trusted.glusterfs.$vol_uuid.xtime
+
+TEST "getfattr -n $xtime $B0/${V0}-1 | grep -q ${xtime}="
+
+TEST kill_brick $V0 $H0 $B0/${V0}-0
+
+TEST "getfattr -n $xtime $B0/${V0}-1 | grep -q ${xtime}="
+
+TEST getfattr -d -m. -e hex $M1
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+## Start and create a disperse volume
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}-{0,1,2}
+
+TEST $CLI volume set $V0 indexing on
+
+TEST $CLI volume start $V0;
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" mount_get_option_value $M0 $V0-disperse-0 childs_up
+
+## Mount client-pid=-1
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 --client-pid=-1 $M1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" mount_get_option_value $M1 $V0-disperse-0 childs_up
+
+TEST touch $M0
+
+vol_uuid=$(get_volume_mark $M1)
+xtime=trusted.glusterfs.$vol_uuid.xtime
+stime=trusted.glusterfs.$vol_uuid.stime
+
+stime_val=$(getfattr -e hex -n $xtime $B0/${V0}-1 | grep ${xtime}= | cut -f2 -d'=')
+TEST "setfattr -n $stime -v $stime_val $B0/${V0}-1"
+TEST "getfattr -n $xtime $B0/${V0}-1 | grep -q ${xtime}="
+
+TEST kill_brick $V0 $H0 $B0/${V0}-0
+
+TEST "getfattr -n $xtime $B0/${V0}-1 | grep -q ${xtime}="
+TEST "getfattr -n $stime $M1 | grep -q ${stime}="
+
+TEST getfattr -d -m. -e hex $M1
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup
diff --git a/tests/basic/gfapi/Makefile b/tests/basic/gfapi/Makefile
new file mode 100644
index 00000000000..1c5cf03ca3d
--- /dev/null
+++ b/tests/basic/gfapi/Makefile
@@ -0,0 +1,22 @@
+## compiles against the *system* version of libgfapi,
+## but not the libgfapi for the testcases
+
+CFLAGS = -Wall -g $(shell pkg-config --cflags glusterfs-api)
+LDFLAGS = $(shell pkg-config --libs glusterfs-api)
+
+BINARIES = upcall-cache-invalidate libgfapi-fini-hang anonymous_fd seek \
+ bug1283983 bug1291259 gfapi-ssl-test gfapi-load-volfile \
+ mandatory-lock-optimal
+
+%: %.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
+
+all: check-pkgconfig $(BINARIES)
+
+clean:
+ -$(RM) $(BINARIES)
+
+.phony: check-pkgconfig
+
+check-pkgconfig:
+ pkg-config --exists glusterfs-api
diff --git a/tests/basic/gfapi/anonymous_fd.t b/tests/basic/gfapi/anonymous_fd.t
new file mode 100755
index 00000000000..bc0fb0fa2f0
--- /dev/null
+++ b/tests/basic/gfapi/anonymous_fd.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/anonymous_fd_read_write.c -lgfapi -o $(dirname $0)/anonymous_fd
+TEST ./$(dirname $0)/anonymous_fd $H0 $V0 $logdir/anonymous_fd.log
+
+cleanup_tester $(dirname $0)/anonymous_fd
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/anonymous_fd_read_write.c b/tests/basic/gfapi/anonymous_fd_read_write.c
new file mode 100644
index 00000000000..fc276ca4310
--- /dev/null
+++ b/tests/basic/gfapi/anonymous_fd_read_write.c
@@ -0,0 +1,106 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto out; \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+ struct glfs_object *root = NULL, *file_obj = NULL;
+ struct stat sb = {
+ 0,
+ };
+ char readbuf[32], writebuf[32];
+ char *filename = "file.txt";
+ char *logfile = NULL;
+ char *volname = NULL;
+ char *hostname = NULL;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ ret = -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("glfs_init", ret);
+
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr, "glfs_h_lookupat: error on lookup of / ,%s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ file_obj = glfs_h_creat(fs, root, filename, O_CREAT, 0644, &sb);
+ if (file_obj == NULL) {
+ fprintf(stderr, "glfs_h_creat: error on create of %s: from (%p),%s\n",
+ filename, root, strerror(errno));
+ goto out;
+ }
+
+ /* test read/write based on anonymous fd */
+ memcpy(writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+
+ ret = glfs_h_anonymous_write(fs, file_obj, writebuf, 32, 0);
+ if (ret < 0)
+ LOG_ERR("glfs_h_anonymous_write", ret);
+
+ ret = glfs_h_anonymous_read(fs, file_obj, readbuf, 32, 0);
+ if (ret < 0)
+ LOG_ERR("glfs_h_anonymous_read", ret);
+
+ if (memcmp(readbuf, writebuf, 32)) {
+ fprintf(stderr, "Failed to read what I wrote: %s %s\n", readbuf,
+ writebuf);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (file_obj)
+ glfs_h_close(file_obj);
+
+ if (fs) {
+ ret = glfs_fini(fs);
+ fprintf(stderr, "glfs_fini(fs) returned %d \n", ret);
+ }
+ if (ret)
+ exit(1);
+ exit(0);
+}
diff --git a/tests/basic/gfapi/bug-1241104.c b/tests/basic/gfapi/bug-1241104.c
new file mode 100644
index 00000000000..78c87595a71
--- /dev/null
+++ b/tests/basic/gfapi/bug-1241104.c
@@ -0,0 +1,93 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <sys/wait.h>
+
+int gfapi = 1;
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto out; \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0, i, status = 0;
+ glfs_fd_t *fd = NULL;
+ char *filename = "file_tmp";
+ char *volname = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+ struct flock lock = {
+ 0,
+ };
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("glfs_init", ret);
+
+ fd = glfs_creat(fs, filename, O_RDWR | O_SYNC, 0644);
+ if (fd <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_creat", ret);
+ }
+ fprintf(stderr, "glfs-create fd - %d\n", fd);
+
+ /* validate locks for negative range */
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 10;
+ lock.l_len = -9;
+
+ ret = glfs_posix_lock(fd, F_SETLK, &lock);
+ LOG_ERR("glfs_posix_lock", ret);
+
+err:
+ glfs_close(fd);
+ LOG_ERR("glfs_close", ret);
+
+out:
+ if (fs) {
+ ret = glfs_fini(fs);
+ fprintf(stderr, "glfs_fini(fs) returned %d \n", ret);
+ }
+
+ if (ret)
+ exit(1);
+ exit(0);
+}
diff --git a/tests/basic/gfapi/bug-1241104.t b/tests/basic/gfapi/bug-1241104.t
new file mode 100755
index 00000000000..e7f4759c3d5
--- /dev/null
+++ b/tests/basic/gfapi/bug-1241104.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+## Enable Upcall cache-invalidation feature
+TEST $CLI volume set $V0 features.cache-invalidation on;
+
+TEST build_tester $(dirname $0)/bug-1241104.c -lgfapi
+
+TEST ./$(dirname $0)/bug-1241104 $H0 $V0 $logdir/bug-1241104.log
+
+cleanup_tester $(dirname $0)/bug-1241104
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/bug-1507896.c b/tests/basic/gfapi/bug-1507896.c
new file mode 100644
index 00000000000..1cc20849c2b
--- /dev/null
+++ b/tests/basic/gfapi/bug-1507896.c
@@ -0,0 +1,49 @@
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define VALIDATE_AND_GOTO_LABEL_ON_ERROR(func, ret, label) \
+ do { \
+ if (ret < 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs)
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_new(fs)", ret, out);
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_volfile_server(fs)", ret, out);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_logging(fs)", ret, out);
+
+ ret = glfs_init(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_init(fs)", ret, out);
+
+out:
+ if (fs) {
+ ret = glfs_fini(fs);
+ if (ret)
+ fprintf(stderr, "glfs_fini(fs) returned %d\n", ret);
+ }
+ return ret;
+}
diff --git a/tests/basic/gfapi/bug-1507896.t b/tests/basic/gfapi/bug-1507896.t
new file mode 100644
index 00000000000..4764e650232
--- /dev/null
+++ b/tests/basic/gfapi/bug-1507896.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/bug-1507896.c -lgfapi
+
+TEST ./$(dirname $0)/bug-1507896 $H0 $V0 $logdir/bug-1507896.log
+
+#volume name precedding with '/'
+TEST ! ./$(dirname $0)/bug-1507896 $H0 /$V0 $logdir/bug-1507896.log
+
+#volume name passed with any special characters
+TEST ! ./$(dirname $0)/bug-1507896 $H0 test@_$V0 $logdir/bug-1507896.log
+
+cleanup_tester $(dirname $0)/bug-1507896
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/bug1283983.c b/tests/basic/gfapi/bug1283983.c
new file mode 100644
index 00000000000..b920013d0e0
--- /dev/null
+++ b/tests/basic/gfapi/bug1283983.c
@@ -0,0 +1,122 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+int gfapi = 1;
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error ret(%d), errno(%d)\n", func, \
+ ret, errno); \
+ exit(1); \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+#define LOG_IF_NO_ERR(func, ret) \
+ do { \
+ if (ret == 0) { \
+ fprintf(stderr, "%s : hasn't returned error %d\n", func, ret); \
+ exit(1); \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0, i;
+ glfs_fd_t *fd = NULL;
+ char *filename = "/a1";
+ char *filename2 = "/a2";
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_upcall *cbk = NULL;
+ char *logfile = NULL;
+ char *volname = NULL;
+ int cnt = 1;
+ struct glfs_upcall_inode *in_arg = NULL;
+ struct glfs_object *root = NULL, *leaf = NULL;
+
+ fprintf(stderr, "Starting libgfapi_fini\n");
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1] volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("glfs_init", ret);
+
+ sleep(2);
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (!root) {
+ ret = -1;
+ LOG_ERR("glfs_h_lookupat root", ret);
+ }
+ leaf = glfs_h_lookupat(fs, root, filename, &sb, 0);
+ if (!leaf) {
+ ret = -1;
+ LOG_IF_NO_ERR("glfs_h_lookupat leaf", ret);
+ }
+
+ leaf = glfs_h_creat(fs, root, filename, O_RDWR, 0644, &sb);
+ if (!leaf) {
+ ret = -1;
+ LOG_ERR("glfs_h_lookupat leaf", ret);
+ }
+ fprintf(stderr, "glfs_h_create leaf - %p\n", leaf);
+
+ leaf = glfs_h_lookupat(fs, root, filename2, &sb, 0);
+ if (!leaf) {
+ ret = -1;
+ LOG_IF_NO_ERR("glfs_h_lookupat leaf", ret);
+ }
+
+ ret = glfs_h_rename(fs, root, filename, root, filename2);
+ LOG_ERR("glfs_rename", ret);
+
+ while (cnt++ < 5) {
+ ret = glfs_h_poll_upcall(fs, &cbk);
+ LOG_ERR("glfs_h_poll_upcall", ret);
+
+ /* There should not be any upcalls sent */
+ if (glfs_upcall_get_reason(cbk) != GLFS_UPCALL_EVENT_NULL) {
+ fprintf(stderr, "Error: Upcall received(%d)\n",
+ glfs_upcall_get_reason(cbk));
+ exit(1);
+ }
+
+ glfs_free(cbk);
+ }
+
+ ret = glfs_fini(fs);
+ LOG_ERR("glfs_fini", ret);
+
+ fprintf(stderr, "End of libgfapi_fini\n");
+
+ exit(0);
+}
diff --git a/tests/basic/gfapi/bug1283983.sh b/tests/basic/gfapi/bug1283983.sh
new file mode 100755
index 00000000000..931a0b55935
--- /dev/null
+++ b/tests/basic/gfapi/bug1283983.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+## Enable Upcall cache-invalidation feature
+TEST $CLI volume set $V0 features.cache-invalidation on;
+
+build_tester $(dirname $0)/bug1283983.c -lgfapi
+
+TEST ./$(dirname $0)/bug1283983 $H0 $V0 $logdir/bug1283983.log
+
+## There shouldn't be any NULL gfid messages logged
+TEST ! cat $logdir/bug1283983.log | grep "upcall" | grep "00000000-0000-0000-0000-000000000000"
+
+cleanup_tester $(dirname $0)/bug1283983
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/bug1291259.c b/tests/basic/gfapi/bug1291259.c
new file mode 100644
index 00000000000..cd7bc65268b
--- /dev/null
+++ b/tests/basic/gfapi/bug1291259.c
@@ -0,0 +1,181 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+int gfapi = 1;
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error ret(%d), errno(%d)\n", func, \
+ ret, errno); \
+ exit(1); \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+#define LOG_IF_NO_ERR(func, ret) \
+ do { \
+ if (ret == 0) { \
+ fprintf(stderr, "%s : hasn't returned error %d\n", func, ret); \
+ exit(1); \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+
+#define GLAPI_UUID_LENGTH 16
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_t *fs2 = NULL;
+ int ret = 0, i;
+ glfs_fd_t *fd = NULL;
+ char *filename = "/a1";
+ char *filename2 = "/a2";
+ struct stat sb = {
+ 0,
+ };
+ char *logfile = NULL;
+ char *volname = NULL;
+ char *hostname = NULL;
+ int cnt = 1;
+ int upcall_received = 0;
+ struct glfs_upcall *cbk = NULL;
+ struct glfs_object *root = NULL, *leaf = NULL;
+ unsigned char globjhdl[GFAPI_HANDLE_LENGTH];
+ unsigned char globjhdl2[GFAPI_HANDLE_LENGTH];
+
+ fprintf(stderr, "Starting libgfapi_fini\n");
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("glfs_init", ret);
+
+ /* This does not block, but enables caching of events. Real
+ * applications like NFS-Ganesha run this in a thread before activity
+ * on the fs (through this instance) happens. */
+ ret = glfs_h_poll_upcall(fs, &cbk);
+ LOG_ERR("glfs_h_poll_upcall", ret);
+
+ fs2 = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs2, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs2, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs2);
+ LOG_ERR("glfs_init", ret);
+
+ sleep(2);
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (!root) {
+ ret = -1;
+ LOG_ERR("glfs_h_lookupat root", ret);
+ }
+ leaf = glfs_h_lookupat(fs, root, filename, &sb, 0);
+ if (!leaf) {
+ ret = -1;
+ LOG_IF_NO_ERR("glfs_h_lookupat leaf", ret);
+ }
+
+ root = glfs_h_lookupat(fs2, NULL, "/", &sb, 0);
+ if (!root) {
+ ret = -1;
+ LOG_ERR("glfs_h_lookupat root", ret);
+ }
+ leaf = glfs_h_creat(fs2, root, filename, O_RDWR, 0644, &sb);
+ if (!leaf) {
+ ret = -1;
+ LOG_ERR("glfs_h_lookupat leaf", ret);
+ }
+ fprintf(stderr, "glfs_h_create leaf - %p\n", leaf);
+
+ while (cnt++ < 5 && !upcall_received) {
+ enum glfs_upcall_reason reason = 0;
+ struct glfs_upcall_inode *in_arg = NULL;
+
+ ret = glfs_h_poll_upcall(fs, &cbk);
+ LOG_ERR("glfs_h_poll_upcall", ret);
+ if (ret)
+ goto retry;
+
+ reason = glfs_upcall_get_reason(cbk);
+ fprintf(stderr, "Upcall received(%d)\n", reason);
+
+ if (reason == GLFS_UPCALL_INODE_INVALIDATE) {
+ struct glfs_object *object = NULL;
+
+ in_arg = glfs_upcall_get_event(cbk);
+ object = glfs_upcall_inode_get_object(in_arg);
+
+ ret = glfs_h_extract_handle(root, globjhdl + GLAPI_UUID_LENGTH,
+ GFAPI_HANDLE_LENGTH);
+ LOG_ERR("glfs_h_extract_handle", (ret != 16));
+
+ ret = glfs_h_extract_handle(object, globjhdl2 + GLAPI_UUID_LENGTH,
+ GFAPI_HANDLE_LENGTH);
+ LOG_ERR("glfs_h_extract_handle", (ret != 16));
+
+ if (memcmp(globjhdl + GLAPI_UUID_LENGTH,
+ globjhdl2 + GLAPI_UUID_LENGTH, 16)) {
+ fprintf(stderr, "Error: gfid mismatch\n");
+ exit(1);
+ }
+ upcall_received = 1;
+ }
+
+ retry:
+ if (!upcall_received)
+ sleep(1); /* glfs_h_poll_upcall() does not block */
+
+ if (!ret) {
+ glfs_free(cbk);
+ cbk = NULL;
+ }
+ }
+
+ if (!upcall_received) {
+ fprintf(stderr, "Error: Upcall not received\n");
+ exit(1);
+ }
+
+ ret = glfs_fini(fs);
+ LOG_ERR("glfs_fini", ret);
+
+ fprintf(stderr, "End of libgfapi_fini\n");
+
+ exit(0);
+}
diff --git a/tests/basic/gfapi/bug1291259.t b/tests/basic/gfapi/bug1291259.t
new file mode 100755
index 00000000000..999cda2da3a
--- /dev/null
+++ b/tests/basic/gfapi/bug1291259.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+## Enable Upcall cache-invalidation feature
+TEST $CLI volume set $V0 features.cache-invalidation on;
+
+TEST build_tester $(dirname $0)/bug1291259.c -lgfapi
+
+TEST ./$(dirname $0)/bug1291259 $H0 $V0 $logdir/bug1291259.log
+
+cleanup_tester $(dirname $0)/bug1291259
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1405301
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1405301
diff --git a/tests/basic/gfapi/bug1613098.c b/tests/basic/gfapi/bug1613098.c
new file mode 100644
index 00000000000..ee67e97a034
--- /dev/null
+++ b/tests/basic/gfapi/bug1613098.c
@@ -0,0 +1,96 @@
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define ACL_TYPE_ACCESS (0x8000)
+
+#define VALIDATE_AND_GOTO_LABEL_ON_ERROR(func, ret, label) \
+ do { \
+ if (ret < 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ int flags = O_RDWR | O_SYNC;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ const char *filename = "file_tmp";
+ struct glfs_object *object = NULL;
+ acl_t acl = NULL;
+ struct stat sb;
+
+ if (argc != 3) {
+ fprintf(stderr, "Invalid argument\n");
+ return 1;
+ }
+
+ volname = argv[1];
+ logfile = argv[2];
+
+ fs = glfs_new(volname);
+ if (!fs)
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_new", ret, out);
+
+ ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_volfile_server", ret, out);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_logging", ret, out);
+
+ ret = glfs_init(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_init", ret, out);
+
+ fd = glfs_creat(fs, filename, flags, 0044);
+ if (fd == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_creat", ret, out);
+ }
+ glfs_close(fd);
+
+ object = glfs_h_lookupat(fs, NULL, filename, NULL, 0);
+ if (object == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_h_lookupat", ret, out);
+ }
+
+ ret = glfs_chown(fs, filename, 99, 99);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_chown", ret, out);
+
+ ret = glfs_setfsuid(99);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_setfsuid", ret, out);
+
+ ret = glfs_setfsgid(99);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_setfsgid", ret, out);
+
+ acl = glfs_h_acl_get(fs, object, ACL_TYPE_ACCESS);
+ if (acl == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_h_acl_get", ret, out);
+ }
+
+ ret = glfs_h_acl_set(fs, object, ACL_TYPE_ACCESS, acl);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_h_acl_get", ret, out);
+out:
+ glfs_setfsuid(0);
+ glfs_setfsgid(0);
+
+ if (object)
+ glfs_h_close(object);
+
+ if (fs)
+ glfs_fini(fs);
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/bug1613098.t b/tests/basic/gfapi/bug1613098.t
new file mode 100755
index 00000000000..e4acc2b76bf
--- /dev/null
+++ b/tests/basic/gfapi/bug1613098.t
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TEST glusterd
+
+TEST $CLI volume create $V0 ${H0}:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+build_tester $(dirname $0)/bug1613098.c -lgfapi
+
+TEST ./$(dirname $0)/bug1613098 $V0 $logdir/bug1613098.log
+
+cleanup_tester $(dirname $0)/bug1613098
+
+cleanup;
diff --git a/tests/basic/gfapi/gfapi-async-calls-test.c b/tests/basic/gfapi/gfapi-async-calls-test.c
new file mode 100644
index 00000000000..55835b14709
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-async-calls-test.c
@@ -0,0 +1,494 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+int cbk_complete = 0;
+int cbk_ret_val = -1;
+
+void
+cbk_check()
+{
+ while (cbk_complete != 1) {
+ sleep(1);
+ }
+ if (cbk_ret_val < 0) {
+ fprintf(stderr, "cbk_ret_val is -ve\n");
+ }
+}
+
+int
+fill_iov(struct iovec *iov, char fillchar, int count)
+{
+ int ret = -1;
+
+ iov->iov_base = malloc(count + 1);
+ if (iov->iov_base == NULL) {
+ return ret;
+ } else {
+ iov->iov_len = count;
+ ret = 0;
+ }
+ memset(iov->iov_base, fillchar, count);
+ memset(iov->iov_base + count, '\0', 1);
+
+ return ret;
+}
+
+glfs_t *
+init_glfs(const char *hostname, const char *volname, const char *logfile)
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return NULL;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile_server failed");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ return fs;
+}
+
+void
+pwritev_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_pwritev failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+pwritev_async(glfs_t *fs, glfs_fd_t *glfd, int char_count)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ struct iovec iov = {0};
+ void *write_cookie = NULL;
+
+ ret = fill_iov(&iov, 'a', char_count);
+ if (ret) {
+ LOG_ERR("failed to create iov");
+ goto out;
+ }
+
+ write_cookie = strdup("write_cookie");
+ ret = glfs_pwritev_async(glfd, &iov, 1, 0, flags, pwritev_async_cbk,
+ &write_cookie);
+out:
+ if (ret < 0) {
+ LOG_ERR("glfs_pwritev async failed");
+ }
+ return ret;
+}
+
+void
+pwrite_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_pwrite_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+pwrite_async(glfs_fd_t *glfd)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ char buf1[10];
+ char *buf2 = "ten bytes!";
+ void *write_cookie = strdup("write_cookie");
+ ret = glfs_pwrite_async(glfd, buf1, 10, 0, flags, pwrite_async_cbk,
+ &write_cookie);
+
+ if (ret < 0) {
+ LOG_ERR("glfs_pwrite_async failed");
+ }
+ return ret;
+}
+
+void
+writev_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_writev_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+writev_async(glfs_t *fs, glfs_fd_t *glfd, int char_count)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ struct iovec iov = {0};
+ void *write_cookie = NULL;
+
+ ret = fill_iov(&iov, 'a', char_count);
+ if (ret) {
+ LOG_ERR("failed to create iov");
+ goto out;
+ }
+
+ write_cookie = strdup("write_cookie");
+ ret = glfs_writev_async(glfd, &iov, 1, flags, writev_async_cbk,
+ &write_cookie);
+out:
+ if (ret < 0) {
+ LOG_ERR("glfs_writev_async failed");
+ }
+ return ret;
+}
+
+void
+write_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_write_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+write_async(glfs_fd_t *glfd)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ char buf1[10];
+ char *buf2 = "ten bytes!";
+ void *write_cookie = strdup("write_cookie");
+ ret = glfs_write_async(glfd, buf1, 10, flags, write_async_cbk,
+ &write_cookie);
+
+ if (ret < 0) {
+ LOG_ERR("glfs_write_async failed");
+ }
+ return ret;
+}
+
+void
+preadv_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_preadv_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+preadv_async(glfs_t *fs, glfs_fd_t *glfd, int char_count)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ struct iovec iov = {0};
+ void *read_cookie = NULL;
+
+ ret = fill_iov(&iov, 'a', char_count);
+ if (ret) {
+ LOG_ERR("failed to create iov");
+ goto out;
+ }
+
+ read_cookie = strdup("preadv_cookie");
+ ret = glfs_preadv_async(glfd, &iov, 1, 0, flags, preadv_async_cbk,
+ &read_cookie);
+out:
+ if (ret < 0) {
+ LOG_ERR("glfs_preadv async failed");
+ }
+ return ret;
+}
+
+void
+pread_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_pread_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+pread_async(glfs_fd_t *glfd)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ char buf1[10];
+ void *read_cookie = strdup("read_cookie");
+ ret = glfs_pread_async(glfd, buf1, 10, 0, flags, pread_async_cbk,
+ &read_cookie);
+ if (ret < 0) {
+ LOG_ERR("glfs_pread_async failed");
+ }
+
+ return ret;
+}
+
+void
+readv_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_readv_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+readv_async(glfs_t *fs, glfs_fd_t *glfd, int char_count)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ struct iovec iov = {0};
+ void *read_cookie = NULL;
+
+ ret = fill_iov(&iov, 'a', char_count);
+ if (ret) {
+ LOG_ERR("failed to create iov");
+ goto out;
+ }
+
+ read_cookie = strdup("read_cookie");
+ ret = glfs_readv_async(glfd, &iov, 1, flags, readv_async_cbk, &read_cookie);
+out:
+ if (ret < 0) {
+ LOG_ERR("glfs_readv_async failed");
+ }
+ return ret;
+}
+
+void
+read_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_read_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+read_async(glfs_fd_t *glfd)
+{
+ ssize_t ret = -1;
+ int flags = O_RDWR;
+ char buf1[10];
+ void *read_cookie = strdup("read_cookie");
+ ret = glfs_read_async(glfd, buf1, 10, flags, read_async_cbk, &read_cookie);
+
+ if (ret < 0) {
+ LOG_ERR("glfs_read_async failed");
+ }
+ return ret;
+}
+
+void
+fsync_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_fsync_async_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+void
+fdatasync_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_fdatasync_async_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+void
+ftruncate_async_cbk(glfs_fd_t *fd, ssize_t ret, struct stat *prestat,
+ struct stat *poststat, void *cookie)
+{
+ if (ret < 0) {
+ LOG_ERR("glfs_ftruncate_async_cbk failed");
+ }
+ cbk_ret_val = ret;
+ cbk_complete = 1;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ glfs_t *fs = NULL;
+ const char *filename = "glfs_test.txt";
+ int flags = (O_RDWR | O_CREAT);
+ glfs_fd_t *glfd = NULL;
+ int count = 200;
+ void *data = strdup("Sample_text");
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = init_glfs(hostname, volname, logfile);
+ if (fs == NULL) {
+ LOG_ERR("init_glfs failed");
+ return -1;
+ }
+
+ glfd = glfs_creat(fs, filename, flags, 0644);
+ if (glfd == NULL) {
+ LOG_ERR("glfs_creat failed");
+ exit(1);
+ }
+
+ ret = pwritev_async(fs, glfd, count);
+ if (ret) {
+ LOG_ERR("glfs_pwritev_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = writev_async(fs, glfd, count);
+ if (ret) {
+ LOG_ERR("glfs_writev_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = write_async(glfd);
+ if (ret) {
+ LOG_ERR("glfs_write_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = preadv_async(fs, glfd, count);
+ if (ret) {
+ LOG_ERR("glfs_preadv_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = pread_async(glfd);
+ if (ret) {
+ LOG_ERR("glfs_pread_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = readv_async(fs, glfd, count);
+ if (ret) {
+ LOG_ERR("glfs_readv_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = read_async(glfd);
+ if (ret) {
+ LOG_ERR("glfs_read_async_test failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = glfs_fsync(glfd, NULL, NULL);
+ if (ret < 0) {
+ LOG_ERR("glfs_fsync failed");
+ exit(1);
+ }
+
+ ret = glfs_fdatasync(glfd, NULL, NULL);
+ if (ret < 0) {
+ LOG_ERR("glfs_fdatasync failed");
+ exit(1);
+ }
+
+ ret = glfs_fsync_async(glfd, fsync_async_cbk, data);
+ if (ret < 0) {
+ LOG_ERR("glfs_fsync_async failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = glfs_fdatasync_async(glfd, fdatasync_async_cbk, data);
+ if (ret < 0) {
+ LOG_ERR("glfs_fdatasync_async failed");
+ exit(1);
+ }
+ cbk_check();
+
+ ret = glfs_ftruncate_async(glfd, 4, ftruncate_async_cbk, data);
+ if (ret < 0) {
+ LOG_ERR("glfs_ftruncate_async failed");
+ exit(1);
+ }
+
+ ret = glfs_close(glfd);
+ if (ret < 0) {
+ LOG_ERR("glfs close failed");
+ }
+
+ ret = glfs_fini(fs);
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-async-calls-test.t b/tests/basic/gfapi/gfapi-async-calls-test.t
new file mode 100644
index 00000000000..9e5cd7c92cc
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-async-calls-test.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+EXIT_EARLY=1;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 ${H0}:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/gfapi-async-calls-test.c -lgfapi
+
+
+TEST ./$(dirname $0)/gfapi-async-calls-test ${H0} $V0 $logdir/gfapi-async-calls-test.log
+
+cleanup_tester $(dirname $0)/gfapi-async-calls-test
+
+cleanup;
diff --git a/tests/basic/gfapi/gfapi-copy-file-range.t b/tests/basic/gfapi/gfapi-copy-file-range.t
new file mode 100644
index 00000000000..a56d3a58e07
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-copy-file-range.t
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+mkfs.xfs 2>&1 | grep reflink
+if [ $? -ne 0 ]; then
+ SKIP_TESTS
+ exit
+fi
+
+
+TEST glusterd
+
+TEST truncate -s 2G $B0/xfs_image
+# for now, a xfs filesystem with reflink support is created.
+# In future, better to make changes in MKFS_LOOP so that,
+# once can create a xfs filesystem with reflink enabled in
+# generic and simple way, instead of doing below steps each
+# time.
+TEST mkfs.xfs -f -i size=512 -m reflink=1 $B0/xfs_image;
+
+TEST mkdir $B0/bricks
+TEST mount -t xfs -o loop $B0/xfs_image $B0/bricks
+
+# Just a single brick volume. More test cases need to be
+# added in future for distribute, replicate,
+# distributed replicate and distributed replicated sharded
+# volumes.
+TEST $CLI volume create $V0 $H0:$B0/bricks/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST dd if=/dev/urandom of=$M0/file bs=1M count=555;
+
+# check for the existence of the created file
+TEST stat $M0/file;
+
+# grab the size of the file
+SRC_SIZE=$(stat -c %s $M0/file);
+
+logdir=`gluster --print-logdir`
+
+# TODO:
+# For now, do not call copy-file-range utility. This is because,
+# the regression machines are centos-7 based which does not have
+# copy_file_range API available. So, instead of this testcase
+# causing regression failures, for now, this is just a dummy test
+# case. Uncomment the below tests (until volume stop) when there
+# is support for copy_file_range in the regression machines.
+#
+
+TEST build_tester $(dirname $0)/glfs-copy-file-range.c -lgfapi
+
+TEST ./$(dirname $0)/glfs-copy-file-range $H0 $V0 $logdir/gfapi-copy-file-range.log /file /new
+
+# check whether the destination file is created or not
+TEST stat $M0/new
+
+# check the size of the destination file
+DST_SIZE=$(stat -c %s $M0/new);
+
+# The sizes of the source and destination should be same.
+# Atleast it ensures that, copy_file_range API is working
+# as expected. Whether the actual cloning happened via reflink
+# or a read/write happened is different matter.
+TEST [ $SRC_SIZE == $DST_SIZE ];
+
+cleanup_tester $(dirname $0)/glfs-copy-file-range
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+UMOUNT_LOOP $B0/bricks;
+
+cleanup;
diff --git a/tests/basic/gfapi/gfapi-dup.c b/tests/basic/gfapi/gfapi-dup.c
new file mode 100644
index 00000000000..028108e4590
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-dup.c
@@ -0,0 +1,84 @@
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define VALIDATE_AND_GOTO_LABEL_ON_ERROR(func, ret, label) \
+ do { \
+ if (ret < 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ int flags = O_RDWR | O_SYNC;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+ const char *filename = "file_tmp";
+ const char *buff =
+ "An opinion should be the result of thought, "
+ "not a substitute for it.";
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ return 1;
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs)
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_new", ret, out);
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_volfile_server", ret, out);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_logging", ret, out);
+
+ ret = glfs_init(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_init", ret, out);
+
+ fd1 = glfs_creat(fs, filename, flags, 0644);
+ if (fd1 == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_creat", ret, out);
+ }
+
+ ret = glfs_write(fd1, buff, strlen(buff), flags);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_write", ret, out);
+
+ fd2 = glfs_dup(fd1);
+ if (fd2 == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_dup", ret, out);
+ }
+
+ ret = glfs_lseek(fd2, 0, SEEK_SET);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_lseek", ret, out);
+
+out:
+ if (fd1 != NULL)
+ glfs_close(fd1);
+ if (fd2 != NULL)
+ glfs_close(fd2);
+ if (fs) {
+ ret = glfs_fini(fs);
+ if (ret)
+ fprintf(stderr, "glfs_fini(fs) returned %d\n", ret);
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-dup.t b/tests/basic/gfapi/gfapi-dup.t
new file mode 100755
index 00000000000..849b106f90f
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-dup.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/gfapi-dup.c -lgfapi
+
+TEST ./$(dirname $0)/gfapi-dup $H0 $V0 $logdir/gfapi-dup.log
+
+cleanup_tester $(dirname $0)/gfapi-dup
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/gfapi-graph-switch-open-fd.t b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t
new file mode 100644
index 00000000000..2e666be7ec7
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-graph-switch-open-fd.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{0..2};
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/sync
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/gfapi-keep-writing.c -lgfapi
+
+
+#Launch a program to keep doing writes on an fd
+./$(dirname $0)/gfapi-keep-writing ${H0} $V0 $logdir/gfapi-async-calls-test.log sync &
+p=$!
+sleep 1 #Let some writes go through
+#Check if graph switch will lead to any pending markers for ever
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+
+
+TEST rm -f $M0/sync #Make sure the glfd is closed
+TEST wait #Wait for background process to die
+#Goal is to check if there is permanent FOOL changelog
+sleep 5
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick0/glfs_test.txt trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick1/glfs_test.txt trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick2/glfs_test.txt trusted.afr.dirty
+
+cleanup_tester $(dirname $0)/gfapi-async-calls-test
+
+cleanup;
diff --git a/tests/basic/gfapi/gfapi-keep-writing.c b/tests/basic/gfapi/gfapi-keep-writing.c
new file mode 100644
index 00000000000..91b59cea02b
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-keep-writing.c
@@ -0,0 +1,129 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+glfs_t *
+init_glfs(const char *hostname, const char *volname, const char *logfile)
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return NULL;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile_server failed");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ return fs;
+}
+
+int
+glfs_test_function(const char *hostname, const char *volname,
+ const char *logfile, const char *syncfile)
+{
+ int ret = -1;
+ int flags = O_CREAT | O_RDWR;
+ glfs_t *fs = NULL;
+ glfs_fd_t *glfd = NULL;
+ const char *buff = "This is from my prog\n";
+ const char *filename = "glfs_test.txt";
+ struct stat buf = {0};
+
+ fs = init_glfs(hostname, volname, logfile);
+ if (fs == NULL) {
+ LOG_ERR("init_glfs failed");
+ return -1;
+ }
+
+ glfd = glfs_creat(fs, filename, flags, 0644);
+ if (glfd == NULL) {
+ LOG_ERR("glfs_creat failed");
+ goto out;
+ }
+
+ while (glfs_stat(fs, syncfile, &buf) == 0) {
+ ret = glfs_write(glfd, buff, strlen(buff), flags);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+ }
+
+ ret = glfs_close(glfd);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+
+out:
+ ret = glfs_fini(fs);
+ if (ret) {
+ LOG_ERR("glfs_fini failed");
+ }
+
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ char *syncfile = NULL;
+
+ if (argc != 5) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+ syncfile = argv[4];
+
+ ret = glfs_test_function(hostname, volname, logfile, syncfile);
+ if (ret) {
+ LOG_ERR("glfs_test_function failed");
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-load-volfile.c b/tests/basic/gfapi/gfapi-load-volfile.c
new file mode 100644
index 00000000000..fbfc6045cd7
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-load-volfile.c
@@ -0,0 +1,65 @@
+/*
+ * Create a glfs instance based on a .vol file
+ *
+ * This is used to measure memory leaks by initializing a graph through a .vol
+ * file and destroying it again.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <glusterfs/api/glfs.h>
+
+#define PROGNAME "gfapi-load-volfile"
+
+void
+usage(FILE *output)
+{
+ fprintf(output, "Usage: " PROGNAME " <volfile>\n");
+}
+
+void
+main(int argc, char **argv)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ if (argc != 2) {
+ usage(stderr);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "-h")) {
+ usage(stdout);
+ exit(EXIT_SUCCESS);
+ }
+
+ fs = glfs_new(PROGNAME);
+ if (!fs) {
+ perror("glfs_new failed");
+ exit(EXIT_FAILURE);
+ }
+
+ glfs_set_logging(fs, PROGNAME ".log", 9);
+
+ ret = glfs_set_volfile(fs, argv[1]);
+ if (ret) {
+ perror("glfs_set_volfile failed");
+ ret = EXIT_FAILURE;
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret) {
+ perror("glfs_init failed");
+ ret = EXIT_FAILURE;
+ goto out;
+ }
+
+ ret = EXIT_SUCCESS;
+out:
+ glfs_fini(fs);
+
+ exit(ret);
+}
diff --git a/tests/basic/gfapi/gfapi-load-volfile.t b/tests/basic/gfapi/gfapi-load-volfile.t
new file mode 100644
index 00000000000..d914cacd819
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-load-volfile.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+
+TEST $CLI volume create ${V0} ${H0}:${B0}/brick0
+EXPECT 'Created' volinfo_field ${V0} 'Status'
+
+TEST $CLI volume start ${V0}
+EXPECT 'Started' volinfo_field ${V0} 'Status'
+
+TEST build_tester $(dirname ${0})/gfapi-load-volfile.c -lgfapi
+
+sed -e "s,@@HOSTNAME@@,${H0},g" -e "s,@@BRICKPATH@@,${B0}/brick0,g" \
+ $(dirname ${0})/protocol-client.vol.in \
+ > $(dirname ${0})/protocol-client.vol
+
+TEST ./$(dirname ${0})/gfapi-load-volfile \
+ $(dirname ${0})/protocol-client.vol
+
+cleanup_tester $(dirname ${0})/gfapi-load-volfile
+cleanup_tester $(dirname ${0})/protocol-client.vol
+
+cleanup
diff --git a/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c
new file mode 100644
index 00000000000..7beb8dd1fe4
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.c
@@ -0,0 +1,127 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+glfs_t *
+init_glfs(const char *hostname, const char *volname, const char *volfile,
+ const char *logfile)
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return NULL;
+ }
+
+ ret = glfs_set_volfile(fs, volfile);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile failed");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ return fs;
+}
+
+int
+glfs_test_function(const char *hostname, const char *volname,
+ const char *volfile, const char *logfile)
+{
+ int ret = -1;
+ int flags = O_CREAT | O_RDWR;
+ glfs_t *fs = NULL;
+ glfs_fd_t *glfd = NULL;
+ const char *buff = "This is from my prog\n";
+ const char *filename = "glfs_test.txt";
+
+ fs = init_glfs(hostname, volname, volfile, logfile);
+ if (fs == NULL) {
+ LOG_ERR("init_glfs failed");
+ return -1;
+ }
+
+ glfd = glfs_creat(fs, filename, flags, 0644);
+ if (glfd == NULL) {
+ LOG_ERR("glfs_creat failed");
+ goto out;
+ }
+
+ ret = glfs_write(glfd, buff, strlen(buff), flags);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+
+ ret = glfs_close(glfd);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+
+out:
+ ret = glfs_fini(fs);
+ if (ret) {
+ LOG_ERR("glfs_fini failed");
+ }
+
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *volfile = NULL;
+ char *logfile = NULL;
+
+ if (argc != 5) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ volfile = argv[3];
+ logfile = argv[4];
+
+ ret = glfs_test_function(hostname, volname, volfile, logfile);
+ if (ret) {
+ LOG_ERR("glfs_test_function failed");
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t
new file mode 100755
index 00000000000..8e94df9d321
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-ssl-load-volfile-test.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../ssl.rc
+
+cleanup;
+
+sed -e "s,@@HOSTNAME@@,${H0},g" -e "s,@@BRICKPATH@@,${B0}/brick1,g" \
+ -e "s,@@SSL@@,off,g" \
+ $(dirname ${0})/protocol-client-ssl.vol.in \
+ > $(dirname ${0})/protocol-client-ssl.vol
+
+TEST create_self_signed_certs
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/gfapi-ssl-load-volfile-test.c -lgfapi
+
+# Run test without I/O or management encryption
+TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \
+ $(dirname ${0})/protocol-client-ssl.vol \
+ $logdir/gfapi-ssl-load-volfile-test.log
+
+# Enable management encryption
+touch $GLUSTERD_WORKDIR/secure-access
+
+killall_gluster
+
+TEST glusterd
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+# Run test with management encryption (No I/O encryption)
+TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \
+ $(dirname ${0})/protocol-client-ssl.vol \
+ $logdir/gfapi-ssl-load-volfile-test.log
+
+# Enable I/O encryption
+TEST $CLI volume set $V0 server.ssl on
+
+killall_gluster
+
+sed -e "s,@@HOSTNAME@@,${H0},g" -e "s,@@BRICKPATH@@,${B0}/brick1,g" \
+ -e "s,@@SSL@@,on,g" \
+ $(dirname ${0})/protocol-client-ssl.vol.in \
+ > $(dirname ${0})/protocol-client-ssl.vol
+
+TEST glusterd
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+# Run test without I/O or management encryption
+TEST $(dirname $0)/gfapi-ssl-load-volfile-test $H0 $V0 \
+ $(dirname ${0})/protocol-client-ssl.vol \
+ $logdir/gfapi-ssl-load-volfile-test.log
+
+cleanup_tester $(dirname $0)/gfapi-ssl-load-volfile-test
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
+
+# NetBSD build scripts are not up to date therefore this test
+# is failing in NetBSD. Therefore skipping the test in NetBSD
+# as of now.
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
diff --git a/tests/basic/gfapi/gfapi-ssl-test.c b/tests/basic/gfapi/gfapi-ssl-test.c
new file mode 100644
index 00000000000..a27b5233702
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-ssl-test.c
@@ -0,0 +1,124 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+glfs_t *
+init_glfs(const char *hostname, const char *volname, const char *logfile)
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return NULL;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile_server failed");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ return fs;
+}
+
+int
+glfs_test_function(const char *hostname, const char *volname,
+ const char *logfile)
+{
+ int ret = -1;
+ int flags = O_CREAT | O_RDWR;
+ glfs_t *fs = NULL;
+ glfs_fd_t *glfd = NULL;
+ const char *buff = "This is from my prog\n";
+ const char *filename = "glfs_test.txt";
+
+ fs = init_glfs(hostname, volname, logfile);
+ if (fs == NULL) {
+ LOG_ERR("init_glfs failed");
+ return -1;
+ }
+
+ glfd = glfs_creat(fs, filename, flags, 0644);
+ if (glfd == NULL) {
+ LOG_ERR("glfs_creat failed");
+ goto out;
+ }
+
+ ret = glfs_write(glfd, buff, strlen(buff), flags);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+
+ ret = glfs_close(glfd);
+ if (ret < 0) {
+ LOG_ERR("glfs_write failed");
+ goto out;
+ }
+
+out:
+ ret = glfs_fini(fs);
+ if (ret) {
+ LOG_ERR("glfs_fini failed");
+ }
+
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ ret = glfs_test_function(hostname, volname, logfile);
+ if (ret) {
+ LOG_ERR("glfs_test_function failed");
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-ssl-test.t b/tests/basic/gfapi/gfapi-ssl-test.t
new file mode 100755
index 00000000000..937fcc83a4c
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-ssl-test.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../ssl.rc
+
+cleanup;
+
+TEST create_self_signed_certs
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/gfapi-ssl-test.c -lgfapi
+
+# Run test without I/O or management encryption
+TEST ./$(dirname $0)/gfapi-ssl-test $H0 $V0 $logdir/gfapi-ssl-test.log
+
+# Enable management encryption
+touch $GLUSTERD_WORKDIR/secure-access
+
+killall_gluster
+
+TEST glusterd
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+# Run test with management encryption (No I/O encryption)
+TEST ./$(dirname $0)/gfapi-ssl-test $H0 $V0 $logdir/gfapi-ssl-test.log
+
+# Enable I/O encryption
+TEST $CLI volume set $V0 client.ssl on
+TEST $CLI volume set $V0 server.ssl on
+
+killall_gluster
+
+TEST glusterd
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+# Run test without I/O or management encryption
+TEST ./$(dirname $0)/gfapi-ssl-test $H0 $V0 $logdir/gfapi-ssl-test.log
+
+cleanup_tester $(dirname $0)/gfapi-ssl-test
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
+
+# NetBSD build scripts are not up to date therefore this test
+# is failing in NetBSD. Therefore skipping the test in NetBSD
+# as of now.
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
diff --git a/tests/basic/gfapi/gfapi-statx-basic.c b/tests/basic/gfapi/gfapi-statx-basic.c
new file mode 100644
index 00000000000..a4943fa0fd1
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-statx-basic.c
@@ -0,0 +1,184 @@
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <glusterfs/api/glfs.h>
+
+#define VALIDATE_AND_GOTO_LABEL_ON_ERROR(func, ret, label) \
+ do { \
+ if (ret < 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+#define GOTO_LABEL_ON_FALSE(compstr, ret, label) \
+ do { \
+ if (ret == false) { \
+ fprintf(stderr, "%s : comparison failed!\n", compstr); \
+ goto label; \
+ } \
+ } while (0)
+
+#define WRITE_SIZE 513
+#define TRUNC_SIZE 4096
+
+/* Using private function and hence providing a forward declation in sync with
+code in glfs-internal.h */
+int
+glfs_statx(struct glfs *fs, const char *path, unsigned int mask,
+ struct glfs_stat *statxbuf);
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ int flags = O_RDWR | O_SYNC;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd1 = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ const char *filename = "file_tmp";
+ const char buff[WRITE_SIZE];
+ struct stat sb;
+ unsigned int mask;
+ struct glfs_stat statx;
+ bool bret;
+
+ if (argc != 3) {
+ fprintf(stderr, "Invalid argument\n");
+ fprintf(stderr, "Usage: %s <volname> <logfile>\n", argv[0]);
+ return 1;
+ }
+
+ volname = argv[1];
+ logfile = argv[2];
+
+ fs = glfs_new(volname);
+ if (!fs)
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_new", ret, out);
+
+ ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_volfile_server", ret, out);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_logging", ret, out);
+
+ ret = glfs_init(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_init", ret, out);
+
+ fd1 = glfs_creat(fs, filename, flags, 0644);
+ if (fd1 == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_creat", ret, out);
+ }
+
+ ret = glfs_truncate(fs, filename, TRUNC_SIZE);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_truncate", ret, out);
+
+ ret = glfs_write(fd1, buff, WRITE_SIZE, flags);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_write", ret, out);
+
+ ret = glfs_fstat(fd1, &sb);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_fstat", ret, out);
+
+ if (sb.st_size != TRUNC_SIZE) {
+ fprintf(stderr, "wrong size %jd should be %jd\n", (intmax_t)sb.st_size,
+ (intmax_t)2048);
+ ret = -1;
+ goto out;
+ }
+
+ glfs_close(fd1);
+ fd1 = NULL;
+
+ /* TEST 1: Invalid mask to statx */
+ mask = 0xfafadbdb;
+ ret = glfs_statx(fs, filename, mask, NULL);
+ if (ret == 0 || ((ret == -1) && (errno != EINVAL))) {
+ fprintf(stderr,
+ "Invalid args passed, but error returned is"
+ " incorrect (ret - %d, errno - %d)\n",
+ ret, errno);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+ /* TEST 2: Call statx and validate fields against prior fstat data */
+ /* NOTE: This fails, as iatt->ia_flags are not carried through the stack,
+ * for example if mdc_to_iatt is invoked to serve cached stat, we will loose
+ * the flags. */
+ mask = GLFS_STAT_ALL;
+ ret = glfs_statx(fs, filename, mask, &statx);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_statx", ret, out);
+
+ if ((statx.glfs_st_mask & GLFS_STAT_BASIC_STATS) != GLFS_STAT_BASIC_STATS) {
+ fprintf(stderr, "Invalid glfs_st_mask, expecting 0x%x got 0x%x\n",
+ GLFS_STAT_ALL, statx.glfs_st_mask);
+ ret = -1;
+ goto out;
+ }
+
+ bret = (sb.st_ino == statx.glfs_st_ino);
+ GOTO_LABEL_ON_FALSE("(sb.st_ino == statx.glfs_st_ino)", bret, out);
+
+ bret = (sb.st_mode == statx.glfs_st_mode);
+ GOTO_LABEL_ON_FALSE("(sb.st_mode == statx.glfs_st_mode)", bret, out);
+
+ bret = (sb.st_nlink == statx.glfs_st_nlink);
+ GOTO_LABEL_ON_FALSE("(sb.st_nlink == statx.glfs_st_nlink)", bret, out);
+
+ bret = (sb.st_uid == statx.glfs_st_uid);
+ GOTO_LABEL_ON_FALSE("(sb.st_uid == statx.glfs_st_uid)", bret, out);
+
+ bret = (sb.st_gid == statx.glfs_st_gid);
+ GOTO_LABEL_ON_FALSE("(sb.st_gid == statx.glfs_st_gid)", bret, out);
+
+ bret = (sb.st_size == statx.glfs_st_size);
+ GOTO_LABEL_ON_FALSE("(sb.st_size == statx.glfs_st_size)", bret, out);
+
+ bret = (sb.st_blksize == statx.glfs_st_blksize);
+ GOTO_LABEL_ON_FALSE("(sb.st_blksize == statx.glfs_st_blksize)", bret, out);
+
+ bret = (sb.st_blocks == statx.glfs_st_blocks);
+ GOTO_LABEL_ON_FALSE("(sb.st_blocks == statx.glfs_st_blocks)", bret, out);
+
+ bret = (!memcmp(&sb.st_atim, &statx.glfs_st_atime,
+ sizeof(struct timespec)));
+ GOTO_LABEL_ON_FALSE("(sb.st_atim == statx.glfs_st_atime)", bret, out);
+
+ bret = (!memcmp(&sb.st_mtim, &statx.glfs_st_mtime,
+ sizeof(struct timespec)));
+ GOTO_LABEL_ON_FALSE("(sb.st_mtim == statx.glfs_st_mtime)", bret, out);
+
+ bret = (!memcmp(&sb.st_ctim, &statx.glfs_st_ctime,
+ sizeof(struct timespec)));
+ GOTO_LABEL_ON_FALSE("(sb.st_ctim == statx.glfs_st_ctime)", bret, out);
+
+ /* TEST 3: Check if partial masks are accepted */
+ mask = GLFS_STAT_TYPE | GLFS_STAT_UID | GLFS_STAT_GID;
+ ret = glfs_statx(fs, filename, mask, &statx);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_statx", ret, out);
+
+ /* We currently still return all stats, as is acceptable based on the API
+ * definition in the header (and in statx as well) */
+ if ((statx.glfs_st_mask & GLFS_STAT_BASIC_STATS) != GLFS_STAT_BASIC_STATS) {
+ fprintf(stderr, "Invalid glfs_st_mask, expecting 0x%x got 0x%x\n",
+ GLFS_STAT_ALL, statx.glfs_st_mask);
+ ret = -1;
+ goto out;
+ }
+out:
+ if (fd1 != NULL)
+ glfs_close(fd1);
+ if (fs) {
+ (void)glfs_fini(fs);
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-statx-basic.t b/tests/basic/gfapi/gfapi-statx-basic.t
new file mode 100755
index 00000000000..d9acbce2f99
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-statx-basic.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 ${H0}:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+# NOTE: Test is passing due to very specific volume configuration
+# Disable md-cache, as it does not save and return ia_flags from iatt
+# This is possibly going to be true of other xlators as well (ec/afr), need to
+# ensure these are fixed, or hack statx to return all basic attrs anyway.
+TEST $CLI volume set $V0 performance.md-cache-timeout 0
+
+logdir=`gluster --print-logdir`
+
+build_tester $(dirname $0)/gfapi-statx-basic.c -lgfapi
+
+TEST ./$(dirname $0)/gfapi-statx-basic $V0 $logdir/gfapi-statx-basic.log
+
+cleanup_tester $(dirname $0)/gfapi-statx-basic
+
+cleanup;
diff --git a/tests/basic/gfapi/gfapi-trunc.c b/tests/basic/gfapi/gfapi-trunc.c
new file mode 100644
index 00000000000..769f6cfa1d9
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-trunc.c
@@ -0,0 +1,89 @@
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define VALIDATE_AND_GOTO_LABEL_ON_ERROR(func, ret, label) \
+ do { \
+ if (ret < 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+#define WRITE_SIZE 4096
+#define TRUNC_SIZE 1234
+/* Make sure TRUNC_SIZE is smaller than WRITE_SIZE at compile time. */
+typedef char _size_check[WRITE_SIZE - TRUNC_SIZE];
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ int flags = O_RDWR | O_SYNC;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd1 = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ const char *filename = "file_tmp";
+ const char buff[WRITE_SIZE];
+ struct stat sb;
+
+ if (argc != 3) {
+ fprintf(stderr, "Invalid argument\n");
+ return 1;
+ }
+
+ volname = argv[1];
+ logfile = argv[2];
+
+ fs = glfs_new(volname);
+ if (!fs)
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_new", ret, out);
+
+ ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_volfile_server", ret, out);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_logging", ret, out);
+
+ ret = glfs_init(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_init", ret, out);
+
+ fd1 = glfs_creat(fs, filename, flags, 0644);
+ if (fd1 == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_creat", ret, out);
+ }
+
+ ret = glfs_write(fd1, buff, WRITE_SIZE, flags);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_write", ret, out);
+
+ ret = glfs_truncate(fs, filename, TRUNC_SIZE);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_truncate", ret, out);
+
+ ret = glfs_fstat(fd1, &sb);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_fstat", ret, out);
+
+ if (sb.st_size != TRUNC_SIZE) {
+ fprintf(stderr, "wrong size %jd should be %jd\n", (intmax_t)sb.st_size,
+ (intmax_t)2048);
+ ret = -1;
+ }
+
+out:
+ if (fd1 != NULL)
+ glfs_close(fd1);
+ if (fs) {
+ /*
+ * If this fails (as it does on Special Snowflake NetBSD for no
+ * good reason), it shouldn't affect the result of the test.
+ */
+ (void)glfs_fini(fs);
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/gfapi-trunc.t b/tests/basic/gfapi/gfapi-trunc.t
new file mode 100644
index 00000000000..4943a6be898
--- /dev/null
+++ b/tests/basic/gfapi/gfapi-trunc.t
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TEST glusterd
+
+TEST $CLI volume create $V0 ${H0}:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+build_tester $(dirname $0)/gfapi-trunc.c -lgfapi
+
+TEST ./$(dirname $0)/gfapi-trunc $V0 $logdir/gfapi-trunc.log
+
+cleanup_tester $(dirname $0)/gfapi-trunc
+
+cleanup;
diff --git a/tests/basic/gfapi/glfd-lkowner.c b/tests/basic/gfapi/glfd-lkowner.c
new file mode 100644
index 00000000000..ec0429dc3c4
--- /dev/null
+++ b/tests/basic/gfapi/glfd-lkowner.c
@@ -0,0 +1,214 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+
+int gfapi = 1;
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto out; \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+
+char lownera[8] = "lownera", lownerb[8] = "lownerb";
+char lownerc[8] = "lownerc";
+
+int
+lock_test(glfs_fd_t *glfd1, glfs_fd_t *glfd2, bool should_fail, int l1_start,
+ int l1_len, char *l1_owner, int lo1_len, int l2_start, int l2_len,
+ char *l2_owner, int lo2_len)
+{
+ int ret = -1, f_ret = -1;
+ struct flock lock1 =
+ {
+ 0,
+ },
+ lock2 = {
+ 0,
+ };
+
+lock1:
+ if (!glfd1)
+ goto lock2;
+
+ /* lock on glfd1 */
+ lock1.l_type = F_WRLCK;
+ lock1.l_whence = SEEK_SET;
+ lock1.l_start = l1_start;
+ lock1.l_len = l1_len;
+
+ ret = glfs_fd_set_lkowner(glfd1, l1_owner, lo1_len);
+ LOG_ERR("glfs_fd_set_lkowner on glfd1", ret);
+
+ ret = glfs_posix_lock(glfd1, F_SETLK, &lock1);
+ LOG_ERR("glfs_posix_lock on glfd1", ret);
+
+lock2:
+ if (!glfd2)
+ goto out;
+
+ /* lock on glfd2 */
+ lock2.l_type = F_WRLCK;
+ lock2.l_whence = SEEK_SET;
+ lock2.l_start = l2_start;
+ lock2.l_len = l2_len;
+
+ ret = glfs_fd_set_lkowner(glfd2, l2_owner, lo2_len);
+ LOG_ERR("glfs_fd_set_lkowner on glfd2", ret);
+
+ ret = glfs_posix_lock(glfd2, F_SETLK, &lock2);
+
+ if (should_fail && ret) {
+ f_ret = 0;
+ } else if (!ret && !should_fail) {
+ f_ret = 0;
+ } else {
+ f_ret = -1;
+ }
+out:
+ fprintf(stderr,
+ "Lock test on glfd1 (start(%d), len(%d),"
+ " lk_owner(%s)) and glfd2 (start(%d), len(%d), "
+ "lk_owner(%s)) - expected(%s) - result(%s)\n",
+ l1_start, l1_len, l1_owner, l2_start, l2_len, l2_owner,
+ (should_fail ? "FAIL" : "SUCCESS"), (ret ? "FAIL" : "SUCCESS"));
+ return f_ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0, i, status = 0;
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ glfs_fd_t *fd3 = NULL;
+ char *filename = "file_tmp";
+ char *volname = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("glfs_init", ret);
+
+ fd1 = glfs_creat(fs, filename, O_RDWR | O_SYNC, 0644);
+ if (fd1 <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_creat", ret);
+ }
+ fprintf(stderr, "glfs-create fd1 - %d\n", fd1);
+
+ fd2 = glfs_dup(fd1);
+ fprintf(stderr, "glfs-dup fd2 - %d\n", fd2);
+
+ fd3 = glfs_open(fs, filename, O_RDWR | O_SYNC);
+ if (fd2 <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_open", ret);
+ }
+ fprintf(stderr, "glfs-open fd3 - %d\n", fd3);
+
+ /* TEST 1: Conflicting ranges, same lk_owner
+ * lock1 (0, 10, lownera)
+ * lock2 (5, 10, lownera)
+ * Expected: should not fail but get merged
+ */
+ ret = lock_test(fd1, fd2, false, 0, 10, lownera, 8, 5, 10, lownera, 8);
+ LOG_ERR("==== glfs_lock_test_1", ret);
+
+ /* TEST 2: Conflicting ranges, different lk_owner
+ * lock1 (0, 10, lownera) - already taken
+ * lock2 (5, 10, lownerb)
+ * Expected: should fail and not get merged
+ */
+ ret = lock_test(NULL, fd2, true, 0, 10, lownera, 8, 5, 10, lownerb, 8);
+ LOG_ERR("==== glfs_lock_test_2", ret);
+
+ /* TEST 3: Different ranges, same lk_owner
+ * lock1 (0, 10, lownera) - already taken
+ * lock2 (30, 10, lownera)
+ * Expected: should not fail
+ */
+ ret = lock_test(NULL, fd2, false, 0, 10, lownera, 8, 30, 10, lownera, 8);
+ LOG_ERR("==== glfs_lock_test_3", ret);
+
+ /* TEST 4: Conflicting ranges, different lk_owner
+ * lock1 (0, 10, lownera) - already taken
+ * lock2 (50, 10, lownerb)
+ * Expected: should not fail
+ */
+ ret = lock_test(NULL, fd2, false, 0, 10, lownera, 8, 50, 10, lownerb, 8);
+ LOG_ERR("==== glfs_lock_test_4", ret);
+
+ /* TEST 5: Close fd1 & retry TEST2
+ * lock1 (not applicable)
+ * lock2 (5, 10, lownerb)
+ * Expected: should succeed now
+ */
+ ret = glfs_close(fd1);
+ LOG_ERR("glfs_close", ret);
+
+ ret = lock_test(NULL, fd2, false, 0, 10, lownera, 8, 5, 10, lownerb, 8);
+ LOG_ERR("==== glfs_lock_test_5", ret);
+
+ /* TEST 6: Check closing fd1 doesn't flush fd2 locks
+ * retry TEST 4 but with fd2 and fd3.
+ * lock1 (50, 10, lownerb) - already taken
+ * lock2 (55, 10, lownerc)
+ * Expected: should fail
+ */
+ ret = lock_test(NULL, fd3, true, 50, 10, lownerb, 8, 55, 10, lownerc, 8);
+ LOG_ERR("==== glfs_lock_test_6", ret);
+
+err:
+ ret = glfs_close(fd2);
+ LOG_ERR("glfs_close", ret);
+
+ ret = glfs_close(fd3);
+ LOG_ERR("glfs_close", ret);
+
+out:
+ if (fs) {
+ ret = glfs_fini(fs);
+ fprintf(stderr, "glfs_fini(fs) returned %d\n", ret);
+ }
+
+ if (ret)
+ exit(1);
+ exit(0);
+}
diff --git a/tests/basic/gfapi/glfd-lkowner.t b/tests/basic/gfapi/glfd-lkowner.t
new file mode 100755
index 00000000000..ad7b0260a14
--- /dev/null
+++ b/tests/basic/gfapi/glfd-lkowner.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/glfd-lkowner.c -lgfapi
+
+TEST ./$(dirname $0)/glfd-lkowner $H0 $V0 $logdir/glfd-lkowner.log
+
+cleanup_tester $(dirname $0)/glfd-lkowner
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/glfs-copy-file-range.c b/tests/basic/gfapi/glfs-copy-file-range.c
new file mode 100644
index 00000000000..1c5fd81fc87
--- /dev/null
+++ b/tests/basic/gfapi/glfs-copy-file-range.c
@@ -0,0 +1,180 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <string.h>
+#include <time.h>
+#include <libgen.h>
+
+static void
+cleanup(glfs_t *fs)
+{
+ if (!fs)
+ return;
+#if 0
+ /* glfs fini path is still racy and crashing the program. Since
+ * this program any way has to die, we are not going to call fini
+ * in the released versions. i.e. final builds. For all
+ * internal testing lets enable this so that glfs_fini code
+ * path becomes stable. */
+ glfs_fini (fs);
+#endif
+}
+
+int
+main(int argc, char **argv)
+{
+ glfs_t *fs = NULL;
+ int ret = -1;
+ char *volname = NULL;
+ char *logfilepath = NULL;
+ char *path_src = NULL;
+ char *path_dst = NULL;
+ glfs_fd_t *glfd_in = NULL;
+ glfs_fd_t *glfd_out = NULL;
+ char *volfile_server = NULL;
+
+ struct stat stbuf = {
+ 0,
+ };
+ struct glfs_stat stat_src = {
+ 0,
+ };
+ struct glfs_stat prestat_dst = {
+ 0,
+ };
+ struct glfs_stat poststat_dst = {
+ 0,
+ };
+ size_t len;
+
+ if (argc < 6) {
+ printf("%s <volume> <log file path> <source> <destination>", argv[0]);
+ ret = -1;
+ goto out;
+ }
+
+ volfile_server = argv[1];
+ volname = argv[2];
+ logfilepath = argv[3];
+ path_src = argv[4];
+ path_dst = argv[5];
+
+ if (path_src[0] != '/') {
+ fprintf(stderr, "source path %s is not absolute", path_src);
+ errno = EINVAL;
+ goto out;
+ }
+
+ if (path_dst[0] != '/') {
+ fprintf(stderr, "destination path %s is not absolute", path_dst);
+ errno = EINVAL;
+ goto out;
+ }
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ ret = -errno;
+ fprintf(stderr, "Not able to initialize volume '%s'", volname);
+ goto out;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", volfile_server, 24007);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "Failed to set the volfile server, "
+ "%s",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfilepath, 7);
+ if (ret < 0) {
+ ret = -errno;
+ fprintf(stderr,
+ "Failed to set the log file path, "
+ "%s",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ ret = -errno;
+ if (errno == ENOENT) {
+ fprintf(stderr, "Volume %s does not exist", volname);
+ } else {
+ fprintf(stderr,
+ "%s: Not able to fetch "
+ "volfile from glusterd",
+ volname);
+ }
+ goto out;
+ }
+
+ glfd_in = glfs_open(fs, path_src, O_RDONLY | O_NONBLOCK);
+ if (!glfd_in) {
+ ret = -errno;
+ goto out;
+ } else {
+ printf("OPEN_SRC: opening %s is success\n", path_src);
+ }
+
+ glfd_out = glfs_creat(fs, path_dst, O_RDWR, 0644);
+ if (!glfd_out) {
+ fprintf(stderr,
+ "FAILED_DST_OPEN: failed to "
+ "open (create) %s (%s)\n",
+ path_dst, strerror(errno));
+ ret = -errno;
+ goto out;
+ } else {
+ printf("OPEN_DST: opening %s is success\n", path_dst);
+ }
+
+ ret = glfs_fstat(glfd_in, &stbuf);
+ if (ret < 0) {
+ ret = -errno;
+ goto out;
+ } else {
+ printf("FSTAT_SRC: fstat on %s is success\n", path_dst);
+ }
+
+ len = stbuf.st_size;
+
+ do {
+ ret = glfs_copy_file_range(glfd_in, NULL, glfd_out, NULL, len, 0,
+ &stat_src, &prestat_dst, &poststat_dst);
+ if (ret == -1) {
+ fprintf(stderr, "copy_file_range failed with %s\n",
+ strerror(errno));
+ ret = -errno;
+ break;
+ } else {
+ printf("copy_file_range successful\n");
+ len -= ret;
+ }
+ } while (len > 0);
+
+out:
+ if (glfd_in)
+ glfs_close(glfd_in);
+ if (glfd_out)
+ glfs_close(glfd_out);
+
+ cleanup(fs);
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/glfs_h_creat_open.c b/tests/basic/gfapi/glfs_h_creat_open.c
new file mode 100644
index 00000000000..7672561e73f
--- /dev/null
+++ b/tests/basic/gfapi/glfs_h_creat_open.c
@@ -0,0 +1,118 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error ret(%d), errno(%d)\n", func, \
+ ret, errno); \
+ exit(1); \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+#define LOG_IF_NO_ERR(func, ret) \
+ do { \
+ if (ret == 0) { \
+ fprintf(stderr, "%s : hasn't returned error %d\n", func, ret); \
+ exit(1); \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ struct glfs_object *root = NULL, *leaf = NULL;
+ glfs_fd_t *fd = NULL;
+ char *filename = "/ro-file";
+ struct stat sb = {
+ 0,
+ };
+ char *logfile = NULL;
+ char *volname = NULL;
+ char *hostname = NULL;
+ char buf[32] = "abcdefghijklmnopqrstuvwxyz012345";
+
+ fprintf(stderr, "Starting glfs_h_creat_open\n");
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("glfs_init", ret);
+
+ sleep(2);
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (!root) {
+ ret = -1;
+ LOG_ERR("glfs_h_lookupat root", ret);
+ }
+ leaf = glfs_h_lookupat(fs, root, filename, &sb, 0);
+ if (!leaf) {
+ ret = -1;
+ LOG_IF_NO_ERR("glfs_h_lookupat leaf", ret);
+ }
+
+ leaf = glfs_h_creat_open(fs, root, filename, O_RDONLY, 00444, &sb, &fd);
+ if (!leaf || !fd) {
+ ret = -1;
+ LOG_ERR("glfs_h_creat leaf", ret);
+ }
+ fprintf(stderr, "glfs_h_create_open leaf - %p\n", leaf);
+
+ ret = glfs_write(fd, buf, 32, 0);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_write: error writing to file %s, %s\n", filename,
+ strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ LOG_ERR("glfs_h_getattrs", ret);
+
+ if (sb.st_size != 32) {
+ fprintf(stderr, "glfs_write: post size mismatch\n");
+ goto out;
+ }
+
+ fprintf(stderr, "Successfully opened and written to a read-only file \n");
+out:
+ if (fd)
+ glfs_close(fd);
+
+ ret = glfs_fini(fs);
+ LOG_ERR("glfs_fini", ret);
+
+ fprintf(stderr, "End of libgfapi_fini\n");
+
+ exit(0);
+}
diff --git a/tests/basic/gfapi/glfs_h_creat_open.t b/tests/basic/gfapi/glfs_h_creat_open.t
new file mode 100755
index 00000000000..f24ae7395be
--- /dev/null
+++ b/tests/basic/gfapi/glfs_h_creat_open.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/glfs_h_creat_open.c -lgfapi
+
+TEST ./$(dirname $0)/glfs_h_creat_open $H0 $V0 $logdir/glfs.log
+
+cleanup_tester $(dirname $0)/glfs_h_creat_open
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/glfs_sysrq.c b/tests/basic/gfapi/glfs_sysrq.c
new file mode 100644
index 00000000000..13e06be6df2
--- /dev/null
+++ b/tests/basic/gfapi/glfs_sysrq.c
@@ -0,0 +1,60 @@
+/** glfs_sysrq.c
+ *
+ * Simple test application to run all glfs_syqrq() debugging calls.
+ *
+ * Usage: ./glfs_sysrq <host> <volume> <logfile>
+ */
+#include <errno.h>
+#include <stdio.h>
+
+#include <glusterfs/api/glfs.h>
+
+int
+main(int argc, char *argv[])
+{
+ /* cmdline arguments */
+ char *host = NULL;
+ char *volume = NULL;
+ char *logfile = NULL;
+
+ /* other variables */
+ glfs_t *fs = NULL;
+ int ret = 0;
+
+ if (argc != 4) {
+ fprintf(stderr, "Usage: %s <host> <volume> <logfile>\n", argv[0]);
+ return -1;
+ }
+
+ host = argv[1];
+ volume = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volume);
+ if (!fs) {
+ return -1;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", host, 24007);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ return -1;
+ }
+
+ /* checking of the results is easier in the script running this test */
+ glfs_sysrq(fs, GLFS_SYSRQ_HELP);
+ glfs_sysrq(fs, GLFS_SYSRQ_STATEDUMP);
+
+ glfs_fini(fs);
+
+ return 0;
+}
diff --git a/tests/basic/gfapi/glfs_sysrq.t b/tests/basic/gfapi/glfs_sysrq.t
new file mode 100755
index 00000000000..d1a0e9bc248
--- /dev/null
+++ b/tests/basic/gfapi/glfs_sysrq.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+#
+# Run glfs_sysrq, a gfapi applications calling all glfs_sysrq() commands.
+# Each command generates a specific log message, or something else that can be
+# tested for existance.
+#
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick1
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+logdir=$(gluster --print-logdir)
+
+# clear all statedumps
+cleanup_statedump
+TEST ! test -e $statedumpdir/*.dump.*
+# vim friendly command */
+
+build_tester $(dirname $0)/glfs_sysrq.c -lgfapi
+TEST $(dirname $0)/glfs_sysrq $H0 $V0 $logdir/glfs_sysrq.log
+
+# check for the help message in the log
+TEST grep -q '"(H)elp"' $logdir/glfs_sysrq.log
+
+# see if there is a statedump
+TEST test -e $statedumpdir/*.dump.*
+# vim friendly command */
+
+cleanup_tester $(dirname $0)/glfs_sysrq
+cleanup
diff --git a/tests/basic/gfapi/glfs_xreaddirplus_r.c b/tests/basic/gfapi/glfs_xreaddirplus_r.c
new file mode 100644
index 00000000000..0c4c79123eb
--- /dev/null
+++ b/tests/basic/gfapi/glfs_xreaddirplus_r.c
@@ -0,0 +1,242 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <time.h>
+
+#define VALIDATE_AND_GOTO_LABEL_ON_ERROR(func, ret, label) \
+ do { \
+ if (ret < 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+#define VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR(func, bool_var, ret, label) \
+ do { \
+ if (!bool_var) { \
+ fprintf(stderr, "%s : returned error (%s)\n", func, \
+ strerror(errno)); \
+ ret = -1; \
+ goto label; \
+ } \
+ } while (0)
+
+#define MAX_FILES_CREATE 10
+#define MAXPATHNAME 512
+
+void
+assimilatetime(struct timespec *ts, struct timespec ts_st,
+ struct timespec ts_ed)
+{
+ if ((ts_ed.tv_nsec - ts_st.tv_nsec) < 0) {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec - 1;
+ ts->tv_nsec += 1000000000 + ts_ed.tv_nsec - ts_st.tv_nsec;
+ } else {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec;
+ ts->tv_nsec += ts_ed.tv_nsec - ts_st.tv_nsec;
+ }
+
+ if (ts->tv_nsec > 1000000000) {
+ ts->tv_nsec = ts->tv_nsec - 1000000000;
+ ts->tv_sec += 1;
+ }
+
+ return;
+}
+
+/*
+ * Returns '%' difference between ts1 & ts2
+ */
+int
+comparetime(struct timespec ts1, struct timespec ts2)
+{
+ uint64_t ts1_n, ts2_n;
+ int pct = 0;
+
+ ts1_n = (ts1.tv_sec * 1000000000) + ts1.tv_nsec;
+ ts2_n = (ts2.tv_sec * 1000000000) + ts2.tv_nsec;
+
+ pct = ((ts1_n - ts2_n) * 100) / ts1_n;
+
+ return pct;
+}
+
+int
+old_readdir(glfs_t *fs)
+{
+ struct glfs_object *root = NULL;
+ struct glfs_fd *fd = NULL;
+ struct stat *sb = NULL;
+ char buf[512];
+ struct dirent *entry = NULL;
+ int ret = -1;
+ struct glfs_object *glhandle = NULL;
+
+ if (!fs)
+ return -1;
+
+ root = glfs_h_lookupat(fs, NULL, "/", sb, 0);
+ VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR("glfs_h_lookupat", !!root, ret, out);
+
+ fd = glfs_opendir(fs, "/");
+ VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR("glfs_opendir", !!fd, ret, out);
+
+ while (glfs_readdir_r(fd, (struct dirent *)buf, &entry), entry) {
+ if (strcmp(entry->d_name, ".") && strcmp(entry->d_name, "..")) {
+ glhandle = glfs_h_lookupat(fs, root, "/", sb, 0);
+ VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR("glfs_h_lookupat", !!glhandle,
+ ret, out);
+ }
+ }
+
+ glfs_closedir(fd);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+new_xreaddirplus(glfs_t *fs)
+{
+ struct glfs_fd *fd = NULL;
+ struct stat *sb = NULL;
+ int ret = -1;
+ uint32_t rflags = (GFAPI_XREADDIRP_STAT | GFAPI_XREADDIRP_HANDLE);
+ struct glfs_xreaddirp_stat *xstat = NULL;
+ struct dirent de;
+ struct dirent *pde = NULL;
+ struct glfs_object *glhandle = NULL;
+
+ if (!fs)
+ return -1;
+
+ fd = glfs_opendir(fs, "/");
+ VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR("glfs_opendir", !!fd, ret, out);
+
+ ret = glfs_xreaddirplus_r(fd, rflags, &xstat, &de, &pde);
+ while (ret > 0 && pde != NULL) {
+ if (xstat) {
+ sb = glfs_xreaddirplus_get_stat(xstat);
+ VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR("glfs_xreaddirplus_get_stat",
+ !!sb, ret, out);
+
+ if (strcmp(de.d_name, ".") && strcmp(de.d_name, "..")) {
+ glhandle = glfs_xreaddirplus_get_object(xstat);
+ VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR(
+ "glfs_xreaddirplus_get_object", !!glhandle, ret, out);
+ }
+ }
+
+ if (xstat) {
+ glfs_free(xstat);
+ xstat = NULL;
+ }
+
+ ret = glfs_xreaddirplus_r(fd, rflags, &xstat, &de, &pde);
+
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_xreaddirp_r", ret, out);
+ }
+
+ if (xstat)
+ glfs_free(xstat);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+ char *my_file = "file_";
+ char my_file_name[MAXPATHNAME];
+ uint32_t flags = O_RDWR | O_SYNC;
+ struct glfs_fd *fd = NULL;
+ int i = 0;
+ int pct = 0;
+ struct timespec timestamp = {0, 0}, st_timestamp, ed_timestamp;
+ struct timespec otimestamp = {0, 0}, ost_timestamp, oed_timestamp;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ return 1;
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR("glfs_new", !!fs, ret, out);
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_volfile_server", ret, out);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_logging", ret, out);
+
+ ret = glfs_init(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_init", ret, out);
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf(my_file_name, "%s%d", my_file, i);
+
+ fd = glfs_creat(fs, my_file_name, flags, 0644);
+ VALIDATE_BOOL_AND_GOTO_LABEL_ON_ERROR("glfs_creat", !!fd, ret, out);
+
+ glfs_close(fd);
+ }
+
+ /* measure performance using old readdir call and new xreaddirplus call and
+ * compare */
+ ret = clock_gettime(CLOCK_REALTIME, &ost_timestamp);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("clock_gettime", ret, out);
+
+ ret = old_readdir(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("old_readdir", ret, out);
+
+ ret = clock_gettime(CLOCK_REALTIME, &oed_timestamp);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("clock_gettime", ret, out);
+
+ assimilatetime(&otimestamp, ost_timestamp, oed_timestamp);
+
+ printf("\tOverall time using readdir:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ otimestamp.tv_sec, otimestamp.tv_nsec);
+
+ ret = clock_gettime(CLOCK_REALTIME, &st_timestamp);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("clock_gettime", ret, out);
+
+ ret = new_xreaddirplus(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("new_xreaddirplus", ret, out);
+
+ ret = clock_gettime(CLOCK_REALTIME, &ed_timestamp);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("clock_gettime", ret, out);
+
+ assimilatetime(&timestamp, st_timestamp, ed_timestamp);
+
+ printf("\tOverall time using xreaddirplus:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ timestamp.tv_sec, timestamp.tv_nsec);
+
+ pct = comparetime(otimestamp, timestamp);
+ printf("There is improvement by %d%%\n", pct);
+
+ ret = 0;
+out:
+ if (fs) {
+ ret = glfs_fini(fs);
+ if (ret)
+ fprintf(stderr, "glfs_fini(fs) returned %d\n", ret);
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/glfs_xreaddirplus_r.t b/tests/basic/gfapi/glfs_xreaddirplus_r.t
new file mode 100755
index 00000000000..d21a00c66f2
--- /dev/null
+++ b/tests/basic/gfapi/glfs_xreaddirplus_r.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/glfs_xreaddirplus_r.c -lgfapi
+
+TEST $(dirname $0)/glfs_xreaddirplus_r $H0 $V0 $logdir/glfs_xreaddirplus_r.log
+
+cleanup_tester $(dirname $0)/glfs_xreaddirplus_r
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/basic/gfapi/glfsxmp-coverage.c b/tests/basic/gfapi/glfsxmp-coverage.c
new file mode 100644
index 00000000000..51650023efd
--- /dev/null
+++ b/tests/basic/gfapi/glfsxmp-coverage.c
@@ -0,0 +1,1900 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <string.h>
+#include <time.h>
+
+#define TEST_STR_LEN 2048
+
+int
+test_dirops(glfs_t *fs)
+{
+ glfs_fd_t *fd = NULL;
+ char buf[2048];
+ struct dirent *entry = NULL;
+
+ fd = glfs_opendir(fs, "/");
+ if (!fd) {
+ fprintf(stderr, "/: %s\n", strerror(errno));
+ return -1;
+ }
+
+ fprintf(stderr, "Entries:\n");
+ while (glfs_readdir_r(fd, (struct dirent *)buf, &entry), entry) {
+ fprintf(stderr, "%s: %lu\n", entry->d_name, glfs_telldir(fd));
+ }
+
+ /* Should internally call fsyncdir(), hopefully */
+ glfs_fsync(fd, NULL, NULL);
+
+ glfs_closedir(fd);
+ return 0;
+}
+
+int
+test_xattr(glfs_t *fs)
+{
+ char *filename = "/filename2";
+ char *linkfile = "/linkfile";
+ glfs_fd_t *fd = NULL;
+ char buf[512];
+ char *ptr;
+ int ret;
+
+ ret = glfs_setxattr(fs, filename, "user.testkey", "testval", 8, 0);
+ fprintf(stderr, "setxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_setxattr(fs, filename, "user.testkey2", "testval", 8, 0);
+ fprintf(stderr, "setxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_getxattr(fs, filename, "user.testkey", buf, 512);
+ fprintf(stderr, "getxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_listxattr(fs, filename, buf, 512);
+ fprintf(stderr, "listxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_symlink(fs, "filename", linkfile);
+ fprintf(stderr, "symlink(%s %s): %s\n", filename, linkfile,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_readlink(fs, linkfile, buf, 512);
+ fprintf(stderr, "readlink(%s) : %d (%s)\n", filename, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_lsetxattr(fs, filename, "user.testkey3", "testval", 8, 0);
+ fprintf(stderr, "lsetxattr(%s) : %d (%s)\n", linkfile, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_llistxattr(fs, linkfile, buf, 512);
+ fprintf(stderr, "llistxattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_lgetxattr(fs, filename, "user.testkey3", buf, 512);
+ fprintf(stderr, "lgetxattr(%s): %d (%s)\n", linkfile, ret, strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ for (ptr = buf; ptr < buf + ret; ptr++) {
+ printf("key=%s\n", ptr);
+ ptr += strlen(ptr);
+ }
+
+ ret = glfs_removexattr(fs, filename, "user.testkey2");
+ fprintf(stderr, "removexattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ fd = glfs_open(fs, filename, O_RDWR);
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ ret = glfs_fsetxattr(fd, "user.testkey2", "testval", 8, 0);
+ fprintf(stderr, "fsetxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_fgetxattr(fd, "user.testkey2", buf, 512);
+ fprintf(stderr, "fgetxattr(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_flistxattr(fd, buf, 512);
+ fprintf(stderr, "flistxattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ if (ret < 0)
+ return -1;
+
+ for (ptr = buf; ptr < buf + ret; ptr++) {
+ printf("key=%s\n", ptr);
+ ptr += strlen(ptr);
+ }
+
+ ret = glfs_fremovexattr(fd, "user.testkey2");
+ fprintf(stderr, "fremovexattr(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ glfs_close(fd);
+
+ return 0;
+}
+
+int
+test_chdir(glfs_t *fs)
+{
+ int ret = -1;
+ char *dir = "/dir";
+ char *topdir = "/topdir";
+ char *linkdir = "/linkdir";
+ char *linkdir2 = "/linkdir2";
+ char *subdir = "./subdir";
+ char *respath = NULL;
+ char pathbuf[4096];
+
+ ret = glfs_mkdir(fs, topdir, 0755);
+ fprintf(stderr, "mkdir(%s): %s\n", topdir, strerror(errno));
+ if (ret)
+ return -1;
+
+ ret = glfs_mkdir(fs, dir, 0755);
+ fprintf(stderr, "mkdir(%s): %s\n", dir, strerror(errno));
+ if (ret)
+ return -1;
+
+ respath = glfs_getcwd(fs, pathbuf, 4096);
+ fprintf(stdout, "getcwd() = %s\n", respath);
+
+ ret = glfs_symlink(fs, "topdir", linkdir);
+ if (ret) {
+ fprintf(stderr, "symlink(%s, %s): %s\n", topdir, linkdir,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_chdir(fs, linkdir);
+ if (ret) {
+ fprintf(stderr, "chdir(%s): %s\n", linkdir, strerror(errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd(fs, pathbuf, 4096);
+ fprintf(stdout, "getcwd() = %s\n", respath);
+
+ respath = glfs_realpath(fs, subdir, pathbuf);
+ if (respath) {
+ fprintf(stderr, "realpath(%s) worked unexpectedly: %s\n", subdir,
+ respath);
+ return -1;
+ }
+
+ ret = glfs_mkdir(fs, subdir, 0755);
+ if (ret) {
+ fprintf(stderr, "mkdir(%s): %s\n", subdir, strerror(errno));
+ return -1;
+ }
+
+ respath = glfs_realpath(fs, subdir, pathbuf);
+ if (!respath) {
+ fprintf(stderr, "realpath(%s): %s\n", subdir, strerror(errno));
+ } else {
+ fprintf(stdout, "realpath(%s) = %s\n", subdir, respath);
+ }
+
+ ret = glfs_chdir(fs, subdir);
+ if (ret) {
+ fprintf(stderr, "chdir(%s): %s\n", subdir, strerror(errno));
+ return -1;
+ }
+
+ respath = glfs_getcwd(fs, pathbuf, 4096);
+ fprintf(stdout, "getcwd() = %s\n", respath);
+
+ respath = glfs_realpath(fs, "/linkdir/subdir", pathbuf);
+ if (!respath) {
+ fprintf(stderr, "realpath(/linkdir/subdir): %s\n", strerror(errno));
+ } else {
+ fprintf(stdout, "realpath(/linkdir/subdir) = %s\n", respath);
+ }
+
+ return 0;
+}
+
+#ifdef DEBUG
+static void
+peek_stat(struct stat *sb)
+{
+ printf("Dumping stat information:\n");
+ printf("File type: ");
+
+ switch (sb->st_mode & S_IFMT) {
+ case S_IFBLK:
+ printf("block device\n");
+ break;
+ case S_IFCHR:
+ printf("character device\n");
+ break;
+ case S_IFDIR:
+ printf("directory\n");
+ break;
+ case S_IFIFO:
+ printf("FIFO/pipe\n");
+ break;
+ case S_IFLNK:
+ printf("symlink\n");
+ break;
+ case S_IFREG:
+ printf("regular file\n");
+ break;
+ case S_IFSOCK:
+ printf("socket\n");
+ break;
+ default:
+ printf("unknown?\n");
+ break;
+ }
+
+ printf("I-node number: %ld\n", (long)sb->st_ino);
+
+ printf("Mode: %lo (octal)\n",
+ (unsigned long)sb->st_mode);
+
+ printf("Link count: %ld\n", (long)sb->st_nlink);
+ printf("Ownership: UID=%ld GID=%ld\n", (long)sb->st_uid,
+ (long)sb->st_gid);
+
+ printf("Preferred I/O block size: %ld bytes\n", (long)sb->st_blksize);
+ printf("File size: %lld bytes\n", (long long)sb->st_size);
+ printf("Blocks allocated: %lld\n", (long long)sb->st_blocks);
+
+ printf("Last status change: %s", ctime(&sb->st_ctime));
+ printf("Last file access: %s", ctime(&sb->st_atime));
+ printf("Last file modification: %s", ctime(&sb->st_mtime));
+
+ return;
+}
+
+static void
+peek_handle(unsigned char *glid)
+{
+ int i;
+
+ for (i = 0; i < GFAPI_HANDLE_LENGTH; i++) {
+ printf(":%02x:", glid[i]);
+ }
+ printf("\n");
+}
+#else /* DEBUG */
+static void
+peek_stat(struct stat *sb)
+{
+ return;
+}
+
+static void
+peek_handle(unsigned char *id)
+{
+ return;
+}
+#endif /* DEBUG */
+
+glfs_t *fs = NULL;
+char *full_parent_name = "/testdir", *parent_name = "testdir";
+
+void
+test_h_unlink(void)
+{
+ char *my_dir = "unlinkdir";
+ char *my_file = "file.txt";
+ char *my_subdir = "dir1";
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL,
+ *subdir = NULL, *subleaf = NULL;
+ struct stat sb;
+ int ret;
+
+ printf("glfs_h_unlink tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
+ if (dir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_creat(fs, dir, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ subdir = glfs_h_mkdir(fs, dir, my_subdir, 0755, &sb);
+ if (subdir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_subdir, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ subleaf = glfs_h_creat(fs, subdir, my_file, O_CREAT, 0644, &sb);
+ if (subleaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, subdir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non empty directory */
+ ret = glfs_h_unlink(fs, dir, my_subdir);
+ if ((ret && errno != ENOTEMPTY) || (ret == 0)) {
+ fprintf(stderr,
+ "glfs_h_unlink: error unlinking %s: it is non empty: %s\n",
+ my_subdir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink regular file */
+ ret = glfs_h_unlink(fs, subdir, my_file);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_file, subdir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink directory */
+ ret = glfs_h_unlink(fs, dir, my_subdir);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_subdir, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink regular file */
+ ret = glfs_h_unlink(fs, dir, my_file);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non-existent regular file */
+ ret = glfs_h_unlink(fs, dir, my_file);
+ if ((ret && errno != ENOENT) || (ret == 0)) {
+ fprintf(stderr,
+ "glfs_h_unlink: error unlinking non-existent %s: invalid errno "
+ ",%d, %s\n",
+ my_file, ret, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink non-existent directory */
+ ret = glfs_h_unlink(fs, dir, my_subdir);
+ if ((ret && errno != ENOENT) || (ret == 0)) {
+ fprintf(stderr,
+ "glfs_h_unlink: error unlinking non-existent %s: invalid "
+ "errno ,%d, %s\n",
+ my_subdir, ret, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ /* unlink directory */
+ ret = glfs_h_unlink(fs, parent, my_dir);
+ if (ret) {
+ fprintf(stderr, "glfs_h_unlink: error unlinking %s: from (%p),%s\n",
+ my_dir, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+
+ printf("glfs_h_unlink tests: PASSED\n");
+
+out:
+ if (dir)
+ glfs_h_close(dir);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (subdir)
+ glfs_h_close(subdir);
+ if (subleaf)
+ glfs_h_close(subleaf);
+ if (parent)
+ glfs_h_close(parent);
+
+ return;
+}
+
+void
+test_h_getsetattrs(void)
+{
+ char *my_dir = "attrdir";
+ char *my_file = "attrfile.txt";
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL;
+ struct stat sb, retsb;
+ int ret, valid;
+ struct timespec timestamp;
+
+ printf("glfs_h_getattrs and setattrs tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
+ if (dir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, dir, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_unlink tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ ret = glfs_h_getattrs(fs, dir, &retsb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error %s: from (%p),%s\n", my_dir,
+ dir, strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&retsb);
+ /* TODO: Compare stat information */
+
+ retsb.st_mode = 00666;
+ retsb.st_uid = 1000;
+ retsb.st_gid = 1001;
+ ret = clock_gettime(CLOCK_REALTIME, &timestamp);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ retsb.st_atim = timestamp;
+ retsb.st_mtim = timestamp;
+ valid = GFAPI_SET_ATTR_MODE | GFAPI_SET_ATTR_UID | GFAPI_SET_ATTR_GID |
+ GFAPI_SET_ATTR_ATIME | GFAPI_SET_ATTR_MTIME;
+ peek_stat(&retsb);
+
+ ret = glfs_h_setattrs(fs, dir, &retsb, valid);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_setattrs: error %s: from (%p),%s\n", my_dir,
+ dir, strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ memset(&retsb, 0, sizeof(struct stat));
+ ret = glfs_h_stat(fs, dir, &retsb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_stat: error %s: from (%p),%s\n", my_dir, dir,
+ strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&retsb);
+
+ printf("glfs_h_getattrs and setattrs tests: PASSED\n");
+out:
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (dir)
+ glfs_h_close(dir);
+
+ return;
+}
+
+void
+test_h_truncate(void)
+{
+ char *my_dir = "truncatedir";
+ char *my_file = "file.txt";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL;
+ struct stat sb;
+ glfs_fd_t *fd = NULL;
+ char buf[32];
+ off_t offset = 0;
+ int ret = 0;
+
+ printf("glfs_h_truncate tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
+ if (parent == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ fd = glfs_h_open(fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_h_open: error on open of %s: %s\n", my_file,
+ strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ memcpy(buf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write(fd, buf, 32, 0);
+
+ /* run tests */
+ /* truncate lower */
+ offset = 30;
+ ret = glfs_h_truncate(fs, leaf, offset);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error for %s (%p),%s\n", my_file,
+ leaf, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf(stderr, "glfs_h_truncate: post size mismatch\n");
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ /* truncate higher */
+ offset = 32;
+ ret = glfs_h_truncate(fs, leaf, offset);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error for %s (%p),%s\n", my_file,
+ leaf, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf(stderr, "glfs_h_truncate: post size mismatch\n");
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ /* truncate equal */
+ offset = 30;
+ ret = glfs_h_truncate(fs, leaf, offset);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_truncate: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ ret = glfs_h_getattrs(fs, leaf, &sb);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_getattrs: error for %s (%p),%s\n", my_file,
+ leaf, strerror(errno));
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+ if (sb.st_size != offset) {
+ fprintf(stderr, "glfs_h_truncate: post size mismatch\n");
+ printf("glfs_h_truncate tests: FAILED\n");
+ goto out;
+ }
+
+ printf("glfs_h_truncate tests: PASSED\n");
+out:
+ if (fd)
+ glfs_close(fd);
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+
+ return;
+}
+
+void
+test_h_links(void)
+{
+ char *my_dir = "linkdir";
+ char *my_file = "file.txt";
+ char *my_symlnk = "slnk.txt";
+ char *my_lnk = "lnk.txt";
+ char *linksrc_dir = "dir1";
+ char *linktgt_dir = "dir2";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *dirsrc = NULL, *dirtgt = NULL, *dleaf = NULL;
+ struct glfs_object *ln1 = NULL;
+ struct stat sb;
+ int ret;
+ char *buf = NULL;
+
+ printf("glfs_h_link(s) tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
+ if (parent == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirsrc = glfs_h_mkdir(fs, parent, linksrc_dir, 0755, &sb);
+ if (dirsrc == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ linksrc_dir, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirtgt = glfs_h_mkdir(fs, parent, linktgt_dir, 0755, &sb);
+ if (dirtgt == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ linktgt_dir, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dleaf = glfs_h_creat(fs, dirsrc, my_file, O_CREAT, 0644, &sb);
+ if (dleaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dirsrc, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* run tests */
+ /* sym link: /testdir/linkdir/file.txt to ./slnk.txt */
+ ln1 = glfs_h_symlink(fs, parent, my_symlnk, "./file.txt", &sb);
+ if (ln1 == NULL) {
+ fprintf(stderr, "glfs_h_symlink: error creating %s: from (%p),%s\n",
+ my_symlnk, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ buf = calloc(1024, sizeof(char));
+ if (buf == NULL) {
+ fprintf(stderr, "Error allocating memory\n");
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+
+ ret = glfs_h_readlink(fs, ln1, buf, 1024);
+ if (ret <= 0) {
+ fprintf(stderr, "glfs_h_readlink: error reading %s: from (%p),%s\n",
+ my_symlnk, ln1, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ if (!(strncmp(buf, my_symlnk, strlen(my_symlnk)))) {
+ fprintf(stderr,
+ "glfs_h_readlink: error mismatch in link name: actual %s: "
+ "retrieved %s\n",
+ my_symlnk, buf);
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+
+ /* link: /testdir/linkdir/file.txt to ./lnk.txt */
+ ret = glfs_h_link(fs, leaf, parent, my_lnk);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_link: error creating %s: from (%p),%s\n",
+ my_lnk, parent, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ /* TODO: Should write content to a file and read from the link */
+
+ /* link: /testdir/linkdir/dir1/file.txt to ../dir2/slnk.txt */
+ ret = glfs_h_link(fs, dleaf, dirtgt, my_lnk);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_link: error creating %s: from (%p),%s\n",
+ my_lnk, dirtgt, strerror(errno));
+ printf("glfs_h_link(s) tests: FAILED\n");
+ goto out;
+ }
+ /* TODO: Should write content to a file and read from the link */
+
+ printf("glfs_h_link(s) tests: PASSED\n");
+
+out:
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (dirsrc)
+ glfs_h_close(dirsrc);
+ if (dirtgt)
+ glfs_h_close(dirtgt);
+ if (dleaf)
+ glfs_h_close(dleaf);
+ if (ln1)
+ glfs_h_close(ln1);
+ if (buf)
+ free(buf);
+
+ return;
+}
+
+void
+test_h_rename(void)
+{
+ char *my_dir = "renamedir";
+ char *my_file = "file.txt";
+ char *src_dir = "dir1";
+ char *tgt_dir = "dir2";
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL,
+ *dirsrc = NULL, *dirtgt = NULL, *dleaf = NULL;
+ struct stat sb;
+ int ret;
+
+ printf("glfs_h_rename tests: In Progress\n");
+
+ /* Prepare tests */
+ root = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_mkdir(fs, root, my_dir, 0755, &sb);
+ if (parent == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, root, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, my_file, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, parent, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirsrc = glfs_h_mkdir(fs, parent, src_dir, 0755, &sb);
+ if (dirsrc == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ src_dir, parent, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dirtgt = glfs_h_mkdir(fs, parent, tgt_dir, 0755, &sb);
+ if (dirtgt == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ tgt_dir, parent, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ dleaf = glfs_h_creat(fs, dirsrc, my_file, O_CREAT, 0644, &sb);
+ if (dleaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dirsrc, strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* run tests */
+ /* Rename file.txt -> file1.txt */
+ ret = glfs_h_rename(fs, parent, "file.txt", parent, "file1.txt");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "file.txt", "file1.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir1/file.txt -> file.txt */
+ ret = glfs_h_rename(fs, dirsrc, "file.txt", parent, "file.txt");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s/%s to %s (%s)\n",
+ src_dir, "file.txt", "file.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename file1.txt -> file.txt (exists) */
+ ret = glfs_h_rename(fs, parent, "file1.txt", parent, "file.txt");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n",
+ "file.txt", "file.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir1 -> dir3 */
+ ret = glfs_h_rename(fs, parent, "dir1", parent, "dir3");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n", "dir1",
+ "dir3", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir2 ->dir3 (exists) */
+ ret = glfs_h_rename(fs, parent, "dir2", parent, "dir3");
+ if (ret != 0) {
+ fprintf(stderr, "glfs_h_rename: error renaming %s to %s (%s)\n", "dir2",
+ "dir3", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename file.txt -> dir3 (fail) */
+ ret = glfs_h_rename(fs, parent, "file.txt", parent, "dir3");
+ if (ret == 0) {
+ fprintf(stderr, "glfs_h_rename: NO error renaming %s to %s (%s)\n",
+ "file.txt", "dir3", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ /* rename dir3 -> file.txt (fail) */
+ ret = glfs_h_rename(fs, parent, "dir3", parent, "file.txt");
+ if (ret == 0) {
+ fprintf(stderr, "glfs_h_rename: NO error renaming %s to %s (%s)\n",
+ "dir3", "file.txt", strerror(errno));
+ printf("glfs_h_rename tests: FAILED\n");
+ goto out;
+ }
+
+ printf("glfs_h_rename tests: PASSED\n");
+
+out:
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+ if (dirsrc)
+ glfs_h_close(dirsrc);
+ if (dirtgt)
+ glfs_h_close(dirtgt);
+ if (dleaf)
+ glfs_h_close(dleaf);
+
+ return;
+}
+
+void
+assimilatetime(struct timespec *ts, struct timespec ts_st,
+ struct timespec ts_ed)
+{
+ if ((ts_ed.tv_nsec - ts_st.tv_nsec) < 0) {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec - 1;
+ ts->tv_nsec += 1000000000 + ts_ed.tv_nsec - ts_st.tv_nsec;
+ } else {
+ ts->tv_sec += ts_ed.tv_sec - ts_st.tv_sec;
+ ts->tv_nsec += ts_ed.tv_nsec - ts_st.tv_nsec;
+ }
+
+ if (ts->tv_nsec > 1000000000) {
+ ts->tv_nsec = ts->tv_nsec - 1000000000;
+ ts->tv_sec += 1;
+ }
+
+ return;
+}
+
+#define MAX_FILES_CREATE 10
+#define MAXPATHNAME 512
+void
+test_h_performance(void)
+{
+ char *my_dir = "perftest", *full_dir_path = "/testdir/perftest";
+ char *my_file = "file_", my_file_name[MAXPATHNAME];
+ struct glfs_object *parent = NULL, *leaf = NULL, *dir = NULL;
+ struct stat sb;
+ int ret, i;
+ struct glfs_fd *fd;
+ struct timespec c_ts = {0, 0}, c_ts_st, c_ts_ed;
+ struct timespec o_ts = {0, 0}, o_ts_st, o_ts_ed;
+
+ printf("glfs_h_performance tests: In Progress\n");
+
+ /* Prepare tests */
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, NULL, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ dir = glfs_h_mkdir(fs, parent, my_dir, 0755, &sb);
+ if (dir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error creating %s: from (%p),%s\n",
+ my_dir, parent, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* create performance */
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf(my_file_name, "%s%d", my_file, i);
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_lookupat(fs, dir, my_file_name, &sb, 0);
+ if (leaf != NULL) {
+ fprintf(stderr, "glfs_h_lookup: exists %s\n", my_file_name);
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ leaf = glfs_h_creat(fs, dir, my_file_name, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&c_ts, c_ts_st, c_ts_ed);
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&o_ts, o_ts_st, o_ts_ed);
+
+ printf("Creation performance (handle based):\n\t# empty files:%d\n",
+ MAX_FILES_CREATE);
+ printf("\tOverall time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n", o_ts.tv_sec,
+ o_ts.tv_nsec);
+ printf("\tcreate call time time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ c_ts.tv_sec, c_ts.tv_nsec);
+
+ /* create using path */
+ c_ts.tv_sec = o_ts.tv_sec = 0;
+ c_ts.tv_nsec = o_ts.tv_nsec = 0;
+
+ sprintf(my_file_name, "%s1", full_dir_path);
+ ret = glfs_mkdir(fs, my_file_name, 0755);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_mkdir: error creating %s: from (%p),%s\n", my_dir,
+ parent, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ for (i = 0; i < MAX_FILES_CREATE; i++) {
+ sprintf(my_file_name, "%s1/%sn%d", full_dir_path, my_file, i);
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_st);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ ret = glfs_stat(fs, my_file_name, &sb);
+ if (ret == 0) {
+ fprintf(stderr, "glfs_stat: exists %s\n", my_file_name);
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ fd = glfs_creat(fs, my_file_name, O_CREAT, 0644);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_creat: error creating %s: from (%p),%s\n",
+ my_file, dir, strerror(errno));
+ printf("glfs_h_performance tests: FAILED\n");
+ goto out;
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &c_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&c_ts, c_ts_st, c_ts_ed);
+ glfs_close(fd);
+ }
+
+ ret = clock_gettime(CLOCK_REALTIME, &o_ts_ed);
+ if (ret != 0) {
+ fprintf(stderr, "clock_gettime: error %s\n", strerror(errno));
+ printf("glfs_h_getattrs and setattrs tests: FAILED\n");
+ goto out;
+ }
+
+ assimilatetime(&o_ts, o_ts_st, o_ts_ed);
+
+ printf("Creation performance (path based):\n\t# empty files:%d\n",
+ MAX_FILES_CREATE);
+ printf("\tOverall time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n", o_ts.tv_sec,
+ o_ts.tv_nsec);
+ printf("\tcreate call time time:\n\t\tSecs:%ld\n\t\tnSecs:%ld\n",
+ c_ts.tv_sec, c_ts.tv_nsec);
+out:
+ return;
+}
+
+int
+test_handleops(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_object *root = NULL, *parent = NULL, *leaf = NULL, *tmp = NULL;
+ char readbuf[32], writebuf[32];
+ unsigned char leaf_handle[GFAPI_HANDLE_LENGTH];
+
+ char *full_leaf_name = "/testdir/testfile.txt", *leaf_name = "testfile.txt",
+ *relative_leaf_name = "testdir/testfile.txt";
+ char *leaf_name1 = "testfile1.txt";
+ char *full_newparent_name = "/testdir/dir1", *newparent_name = "dir1";
+ char *full_newnod_name = "/testdir/nod1", *newnod_name = "nod1";
+
+ /* Initialize test area */
+ ret = glfs_mkdir(fs, full_parent_name, 0755);
+ if (ret != 0 && errno != EEXIST) {
+ fprintf(stderr, "%s: (%p) %s\n", full_parent_name, fd, strerror(errno));
+ printf("Test initialization failed on volume %s\n", argv[1]);
+ goto out;
+ } else if (ret != 0) {
+ printf("Found test directory %s to be existing\n", full_parent_name);
+ printf("Cleanup test directory and restart tests\n");
+ goto out;
+ }
+
+ fd = glfs_creat(fs, full_leaf_name, O_CREAT, 0644);
+ if (fd == NULL) {
+ fprintf(stderr, "%s: (%p) %s\n", full_leaf_name, fd, strerror(errno));
+ printf("Test initialization failed on volume %s\n", argv[1]);
+ goto out;
+ }
+ glfs_close(fd);
+
+ printf("Initialized the test area, within volume %s\n", argv[1]);
+
+ /* Handle based APIs test area */
+
+ /* glfs_lookupat test */
+ printf("glfs_h_lookupat tests: In Progress\n");
+ /* start at root of the volume */
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n", "/",
+ NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* lookup a parent within root */
+ parent = glfs_h_lookupat(fs, root, parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ parent_name, root, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* lookup a leaf/child within the parent */
+ leaf = glfs_h_lookupat(fs, parent, leaf_name, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ leaf_name, parent, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* reset */
+ glfs_h_close(root);
+ root = NULL;
+ glfs_h_close(leaf);
+ leaf = NULL;
+ glfs_h_close(parent);
+ parent = NULL;
+
+ /* check absolute paths */
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n", "/",
+ NULL, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_lookupat(fs, NULL, full_leaf_name, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_leaf_name, parent, strerror(errno));
+ printf("glfs_h_lookupat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* reset */
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ /* check multiple component paths */
+ leaf = glfs_h_lookupat(fs, root, relative_leaf_name, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ relative_leaf_name, parent, strerror(errno));
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* reset */
+ glfs_h_close(root);
+ root = NULL;
+ glfs_h_close(parent);
+ parent = NULL;
+
+ /* check symlinks in path */
+
+ /* TODO: -ve test cases */
+ /* parent invalid
+ * path invalid
+ * path does not exist after some components
+ * no parent, but relative path
+ * parent and full path? -ve?
+ */
+
+ printf("glfs_h_lookupat tests: PASSED\n");
+
+ /* glfs_openat test */
+ printf("glfs_h_open tests: In Progress\n");
+ fd = glfs_h_open(fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_h_open: error on open of %s: %s\n",
+ full_leaf_name, strerror(errno));
+ printf("glfs_h_open tests: FAILED\n");
+ goto out;
+ }
+
+ /* test read/write based on fd */
+ memcpy(writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write(fd, writebuf, 32, 0);
+
+ glfs_lseek(fd, 10, SEEK_SET);
+
+ ret = glfs_read(fd, readbuf, 32, 0);
+ if (memcmp(readbuf, writebuf, 32)) {
+ printf("Failed to read what I wrote: %s %s\n", readbuf, writebuf);
+ glfs_close(fd);
+ printf("glfs_h_open tests: FAILED\n");
+ goto out;
+ }
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+ glfs_close(fd);
+
+ printf("glfs_h_open tests: PASSED\n");
+
+ /* Create tests */
+ printf("glfs_h_creat tests: In Progress\n");
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_creat(fs, parent, leaf_name1, O_CREAT, 0644, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_creat: error on create of %s: from (%p),%s\n",
+ leaf_name1, parent, strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_creat(fs, parent, leaf_name1, O_CREAT | O_EXCL, 0644, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf(stderr,
+ "glfs_h_creat: existing file, leaf = (%p), errno = %s\n", leaf,
+ strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+ }
+
+ tmp = glfs_h_creat(fs, root, parent_name, O_CREAT, 0644, &sb);
+ if (tmp != NULL || !(errno == EISDIR || errno == EINVAL)) {
+ fprintf(stderr, "glfs_h_creat: dir create, tmp = (%p), errno = %s\n",
+ leaf, strerror(errno));
+ printf("glfs_h_creat tests: FAILED\n");
+ if (tmp != NULL) {
+ glfs_h_close(tmp);
+ tmp = NULL;
+ }
+ }
+
+ /* TODO: Other combinations and -ve cases as applicable */
+ printf("glfs_h_creat tests: PASSED\n");
+
+ /* extract handle and create from handle test */
+ printf(
+ "glfs_h_extract_handle and glfs_h_create_from_handle tests: In "
+ "Progress\n");
+ /* TODO: Change the lookup to create below for a GIFD recovery failure,
+ * that needs to be fixed */
+ leaf = glfs_h_lookupat(fs, parent, leaf_name1, &sb, 0);
+ if (leaf == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ leaf_name1, parent, strerror(errno));
+ printf("glfs_h_extract_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ ret = glfs_h_extract_handle(leaf, leaf_handle, GFAPI_HANDLE_LENGTH);
+ if (ret < 0) {
+ fprintf(stderr,
+ "glfs_h_extract_handle: error extracting handle of %s: %s\n",
+ full_leaf_name, strerror(errno));
+ printf("glfs_h_extract_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_handle(leaf_handle);
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_create_from_handle(fs, leaf_handle, GFAPI_HANDLE_LENGTH, &sb);
+ if (leaf == NULL) {
+ fprintf(
+ stderr,
+ "glfs_h_create_from_handle: error on create of %s: from (%p),%s\n",
+ leaf_name1, leaf_handle, strerror(errno));
+ printf("glfs_h_create_from_handle tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ fd = glfs_h_open(fs, leaf, O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_h_open: error on open of %s: %s\n",
+ full_leaf_name, strerror(errno));
+ printf("glfs_h_create_from_handle tests: FAILED\n");
+ goto out;
+ }
+
+ /* test read/write based on fd */
+ memcpy(writebuf, "abcdefghijklmnopqrstuvwxyz012345", 32);
+ ret = glfs_write(fd, writebuf, 32, 0);
+
+ glfs_lseek(fd, 0, SEEK_SET);
+
+ ret = glfs_read(fd, readbuf, 32, 0);
+ if (memcmp(readbuf, writebuf, 32)) {
+ printf("Failed to read what I wrote: %s %s\n", writebuf, writebuf);
+ printf("glfs_h_create_from_handle tests: FAILED\n");
+ glfs_close(fd);
+ goto out;
+ }
+
+ glfs_close(fd);
+ glfs_h_close(leaf);
+ leaf = NULL;
+ glfs_h_close(parent);
+ parent = NULL;
+
+ printf(
+ "glfs_h_extract_handle and glfs_h_create_from_handle tests: PASSED\n");
+
+ /* Mkdir tests */
+ printf("glfs_h_mkdir tests: In Progress\n");
+
+ ret = glfs_rmdir(fs, full_newparent_name);
+ if (ret && errno != ENOENT) {
+ fprintf(stderr, "glfs_rmdir: Failed for %s: %s\n", full_newparent_name,
+ strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_mkdir(fs, parent, newparent_name, 0755, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error on mkdir of %s: from (%p),%s\n",
+ newparent_name, parent, strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_mkdir(fs, parent, newparent_name, 0755, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf(stderr,
+ "glfs_h_mkdir: existing directory, leaf = (%p), errno = %s\n",
+ leaf, strerror(errno));
+ printf("glfs_h_mkdir tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+ }
+
+ glfs_h_close(parent);
+ parent = NULL;
+
+ printf("glfs_h_mkdir tests: PASSED\n");
+
+ /* Mknod tests */
+ printf("glfs_h_mknod tests: In Progress\n");
+ ret = glfs_unlink(fs, full_newnod_name);
+ if (ret && errno != ENOENT) {
+ fprintf(stderr, "glfs_unlink: Failed for %s: %s\n", full_newnod_name,
+ strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+
+ parent = glfs_h_lookupat(fs, NULL, full_parent_name, &sb, 0);
+ if (parent == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on lookup of %s: from (%p),%s\n",
+ full_parent_name, root, strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ leaf = glfs_h_mknod(fs, parent, newnod_name, S_IFIFO, 0, &sb);
+ if (leaf == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error on mkdir of %s: from (%p),%s\n",
+ newnod_name, parent, strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ goto out;
+ }
+ peek_stat(&sb);
+
+ /* TODO: create op on a FIFO node hangs, need to check and fix
+ tmp = glfs_h_creat (fs, parent, newnod_name, O_CREAT, 0644, &sb);
+ if (tmp != NULL || errno != EINVAL) {
+ fprintf (stderr, "glfs_h_creat: node create, tmp = (%p), errno =
+ %s\n", tmp, strerror (errno)); printf ("glfs_h_creat/mknod tests:
+ FAILED\n"); if (tmp != NULL) { glfs_h_close(tmp); tmp = NULL;
+ }
+ } */
+
+ glfs_h_close(leaf);
+ leaf = NULL;
+
+ leaf = glfs_h_mknod(fs, parent, newnod_name, 0644, 0, &sb);
+ if (leaf != NULL || errno != EEXIST) {
+ fprintf(stderr,
+ "glfs_h_mknod: existing node, leaf = (%p), errno = %s\n", leaf,
+ strerror(errno));
+ printf("glfs_h_mknod tests: FAILED\n");
+ if (leaf != NULL) {
+ glfs_h_close(leaf);
+ leaf = NULL;
+ }
+ }
+
+ glfs_h_close(parent);
+ parent = NULL;
+
+ printf("glfs_h_mknod tests: PASSED\n");
+
+ /* unlink tests */
+ test_h_unlink();
+
+ /* TODO: opendir tests */
+
+ /* getattr tests */
+ test_h_getsetattrs();
+
+ /* TODO: setattr tests */
+
+ /* truncate tests */
+ test_h_truncate();
+
+ /* link tests */
+ test_h_links();
+
+ /* rename tests */
+ test_h_rename();
+
+ /* performance tests */
+ test_h_performance();
+
+ /* END: New APIs test area */
+
+out:
+ /* Cleanup glfs handles */
+ if (root)
+ glfs_h_close(root);
+ if (parent)
+ glfs_h_close(parent);
+ if (leaf)
+ glfs_h_close(leaf);
+
+ return ret;
+}
+
+int
+test_write_apis(glfs_t *fs)
+{
+ /* Add more content here */
+ /* Some apis we can get are */
+ /*
+ 0. glfs_set_xlator_option()
+
+ Read/Write combinations:
+ . glfs_{p,}readv/{p,}writev
+ . glfs_pread/pwrite
+
+ tests/basic/gfapi/gfapi-async-calls-test.c
+ . glfs_read_async/write_async
+ . glfs_pread_async/pwrite_async
+ . glfs_readv_async/writev_async
+ . glfs_preadv_async/pwritev_async
+
+ . ftruncate/ftruncate_async
+ . fsync/fsync_async
+ . fdatasync/fdatasync_async
+
+ */
+
+ glfs_fd_t *fd = NULL;
+ char *filename = "/filename2";
+ int flags = O_RDWR;
+ char *buf = "some bytes!";
+ char writestr[TEST_STR_LEN];
+ struct iovec iov = {&writestr, TEST_STR_LEN};
+ int ret, i;
+
+ for (i = 0; i < TEST_STR_LEN; i++)
+ writestr[i] = 0x11;
+
+ fd = glfs_open(fs, filename, flags);
+ if (!fd)
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ ret = glfs_writev(fd, &iov, 1, flags);
+ if (ret < 0) {
+ fprintf(stderr, "writev(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ ret = glfs_pwrite(fd, buf, 10, 4, flags, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "pwrite(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ ret = glfs_pwritev(fd, &iov, 1, 4, flags);
+ if (ret < 0) {
+ fprintf(stderr, "pwritev(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ ret = glfs_fsync(fd, NULL, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "fsync(%s): %d (%s)\n", filename, ret, strerror(errno));
+ }
+
+ glfs_close(fd);
+
+ return 0;
+}
+
+int
+test_metadata_ops(glfs_t *fs, glfs_t *fs2)
+{
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_stat gsb = {
+ 0,
+ };
+ struct statvfs sfs;
+ char readbuf[32];
+ char writebuf[11] = "helloworld";
+
+ char *filename = "/filename2";
+ int ret;
+
+ ret = glfs_lstat(fs, filename, &sb);
+ fprintf(stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ fd = glfs_creat(fs, filename, O_RDWR, 0644);
+ if (!fd)
+ fprintf(stderr, "creat(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ fd2 = glfs_open(fs2, filename, O_RDWR);
+ if (!fd2)
+ fprintf(stderr, "open(%s): (%p) %s\n", filename, fd, strerror(errno));
+
+ ret = glfs_lstat(fs, filename, &sb);
+ if (ret)
+ fprintf(stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_write(fd, writebuf, 11, 0);
+ if (ret < 0) {
+ fprintf(stderr, "writev(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ }
+
+ glfs_fsync(fd, NULL, NULL);
+
+ glfs_lseek(fd2, 5, SEEK_SET);
+
+ ret = glfs_read(fd2, readbuf, 32, 0);
+
+ printf("read %d, %s", ret, readbuf);
+
+ /* get stat */
+ ret = glfs_fstat(fd2, &sb);
+ if (ret)
+ fprintf(stderr, "fstat(%s): %d (%s)\n", filename, ret, strerror(errno));
+
+ ret = glfs_access(fs, filename, R_OK);
+ if (ret)
+ fprintf(stderr, "access(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ ret = glfs_fallocate(fd2, 1024, 1024, 1024);
+ if (ret)
+ fprintf(stderr, "fallocate(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ ret = glfs_discard(fd2, 1024, 512);
+ if (ret)
+ fprintf(stderr, "discard(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ ret = glfs_zerofill(fd2, 2048, 1024);
+ if (ret)
+ fprintf(stderr, "zerofill(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+
+ /* set stat */
+ /* TODO: got some errors, need to fix */
+ ret = glfs_fsetattr(fd2, &gsb);
+
+ glfs_close(fd);
+ glfs_close(fd2);
+
+ filename = "/filename3";
+ ret = glfs_mknod(fs, filename, S_IFIFO, 0);
+ if (ret)
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_lstat(fs, filename, &sb);
+ if (ret)
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_rename(fs, filename, "/filename4");
+ if (ret)
+ fprintf(stderr, "rename(%s): (%d) %s\n", filename, ret,
+ strerror(errno));
+
+ ret = glfs_unlink(fs, "/filename4");
+ if (ret)
+ fprintf(stderr, "unlink(%s): (%d) %s\n", "/filename4", ret,
+ strerror(errno));
+
+ filename = "/dirname2";
+ ret = glfs_mkdir(fs, filename, 0);
+ if (ret)
+ fprintf(stderr, "%s: (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_lstat(fs, filename, &sb);
+ if (ret)
+ fprintf(stderr, "lstat(%s): (%d) %s\n", filename, ret, strerror(errno));
+
+ ret = glfs_rmdir(fs, filename);
+ if (ret)
+ fprintf(stderr, "rmdir(%s): (%d) %s\n", filename, ret, strerror(errno));
+}
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs2 = NULL;
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ struct stat sb = {
+ 0,
+ };
+ struct glfs_stat gsb = {
+ 0,
+ };
+ struct statvfs sfs;
+ char readbuf[32];
+ char writebuf[32];
+ char volumeid[64];
+
+ char *filename = "/filename2";
+
+ if ((argc < 2) || (argc > 3)) {
+ printf("Usage:\n\t%s <volname> <hostname>\n\t%s <volfile-path>",
+ argv[0], argv[0]);
+ return -1;
+ }
+
+ if (argc == 2) {
+ /* Generally glfs_new() requires volume name as an argument */
+ fs = glfs_new("test-only");
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+ ret = glfs_set_volfile(fs, argv[1]);
+ if (ret)
+ fprintf(stderr, "glfs_set_volfile failed\n");
+ } else {
+ fs = glfs_new(argv[1]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+ // ret = glfs_set_volfile_server (fs, "unix", "/tmp/gluster.sock", 0);
+ ret = glfs_set_volfile_server(fs, "tcp", argv[2], 24007);
+ if (ret)
+ fprintf(stderr, "glfs_set_volfile_server failed\n");
+ }
+
+ /* Change this to relevant file when running locally */
+ ret = glfs_set_logging(fs, "/dev/stderr", 5);
+ if (ret)
+ fprintf(stderr, "glfs_set_logging failed\n");
+
+ ret = glfs_init(fs);
+ if (ret)
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+
+ if (ret)
+ goto out;
+
+ /* no major use for getting the volume id in this test, done for coverage */
+ ret = glfs_get_volumeid(fs, volumeid, 64);
+ if (ret) {
+ fprintf(stderr, "glfs_get_volumeid: returned %d\n", ret);
+ }
+
+ sleep(2);
+
+ if (argc == 2) {
+ /* Generally glfs_new() requires volume name as an argument */
+ fs2 = glfs_new("test_only_volume");
+ if (!fs2) {
+ fprintf(stderr, "glfs_new(fs2): returned NULL\n");
+ return 1;
+ }
+ ret = glfs_set_volfile(fs2, argv[1]);
+ if (ret)
+ fprintf(stderr, "glfs_set_volfile failed(fs2)\n");
+ } else {
+ fs2 = glfs_new(argv[1]);
+ if (!fs2) {
+ fprintf(stderr, "glfs_new(fs2): returned NULL\n");
+ return 1;
+ }
+ ret = glfs_set_volfile_server(fs2, "tcp", argv[2], 24007);
+ if (ret)
+ fprintf(stderr, "glfs_set_volfile_server failed(fs2)\n");
+ }
+
+ ret = glfs_set_statedump_path(fs2, "/tmp");
+ if (ret) {
+ fprintf(stderr, "glfs_set_statedump_path: %s\n", strerror(errno));
+ }
+
+ ret = glfs_init(fs2);
+ if (ret)
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+
+ test_metadata_ops(fs, fs2);
+
+ test_dirops(fs);
+
+ test_xattr(fs);
+
+ test_chdir(fs);
+
+ test_handleops(argc, argv);
+ // done
+
+ /* Test some extra apis */
+ test_write_apis(fs);
+
+ glfs_statvfs(fs, "/", &sfs);
+
+ glfs_unset_volfile_server(fs, "tcp", argv[2], 24007);
+
+ glfs_fini(fs);
+ glfs_fini(fs2);
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/tests/basic/gfapi/glfsxmp.t b/tests/basic/gfapi/glfsxmp.t
new file mode 100644
index 00000000000..b3e6645c0f5
--- /dev/null
+++ b/tests/basic/gfapi/glfsxmp.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2}
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+$CLI system getspec $V0 > fubar.vol
+
+TEST cp $(dirname $0)/glfsxmp-coverage.c ./glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+
+TEST ./glfsxmp fubar.vol
+
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/basic/gfapi/libgfapi-fini-hang.c b/tests/basic/gfapi/libgfapi-fini-hang.c
new file mode 100644
index 00000000000..37800e3188b
--- /dev/null
+++ b/tests/basic/gfapi/libgfapi-fini-hang.c
@@ -0,0 +1,62 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error %d\n", func, ret); \
+ exit(1); \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0, i;
+ glfs_fd_t *fd = NULL;
+ char readbuf[32];
+ char *logname = NULL;
+ char *hostname = NULL;
+ char *volname = NULL;
+
+ fprintf(stderr, "Starting libgfapi_fini\n");
+
+ if (argc < 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logname = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ exit(1);
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 0);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logname, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ /* Do not call glfs_init.
+ * glfs_fini() shouldn't hang in that case*/
+ ret = glfs_fini(fs);
+ LOG_ERR("glfs_fini", ret);
+ fprintf(stderr, "End of libgfapi_fini\n");
+
+ exit(0);
+}
diff --git a/tests/basic/gfapi/libgfapi-fini-hang.t b/tests/basic/gfapi/libgfapi-fini-hang.t
new file mode 100755
index 00000000000..ba262a943ee
--- /dev/null
+++ b/tests/basic/gfapi/libgfapi-fini-hang.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+function check_process () {
+ ps -p $1
+ if [ $? -eq 1 ] ; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=$(gluster --print-logdir)
+
+TEST build_tester $(dirname $0)/libgfapi-fini-hang.c -o $M0/libgfapi-fini-hang -lgfapi
+TEST cd $M0
+ ./libgfapi-fini-hang $H0 $V0 $logdir/libgfapi-fini-hang.log &
+PID=$!
+
+# check if the process "libgfapi-fini-hang" exits with in $PROCESS_UP_TIMEOUT
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_process $PID
+
+# Kill the process if present
+TEST ! kill -9 $PID
+
+TEST rm -f $M0/libgfapi-fini-hang
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/mandatory-lock-optimal.c b/tests/basic/gfapi/mandatory-lock-optimal.c
new file mode 100644
index 00000000000..34fef8d0b80
--- /dev/null
+++ b/tests/basic/gfapi/mandatory-lock-optimal.c
@@ -0,0 +1,532 @@
+/* Pre-requisites:-
+ *
+ * 1. Make sure that performance translators are switched off while running this
+ * test.
+ * 2. Perform the following volume set operation:
+ * # gluster volume set <VOLNAME> locks.mandatory-locking optimal
+ * 3. For installation under non-standard paths, export LD_LIBRARY_PATH to
+ * automatically load exact libgfapi.so and compile this C file as follows:
+ * $ gcc mandatory-lock-optimal.c -lgfapi -I <include path for api/glfs.h> -L
+ * <include path for libgfapi shared library>
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <glusterfs/api/glfs.h>
+
+#define TOTAL_TEST_COUNT 8
+
+/* C1 = Client 1 : C2 = Client 2 : C3 = Client 3 :
+ * fs1, fd1 are associated with C1. Similarly fs2, fd2 for C2
+ * and fs3, fd3 for C3 */
+
+FILE *fp;
+glfs_t *fs1, *fs2, *fs3;
+glfs_fd_t *fd, *fd1, *fd2, *fd3;
+struct flock lock;
+char buf1[10], *buf2 = "ten bytes!", *fname = "/mand.lock";
+int ret, test_count;
+off_t offset;
+
+/* run_test_1 () : C1 takes byte range mandatory read lock.
+ C2 attempts to read from a conflicting range.
+ Expected result : Read from C2 should pass.
+
+ * run_test_2 () : C1 takes byte range mandatory read lock.
+ C2 attempts write to a conflicting range.
+ Expected result : Write from C2 should fail with EAGAIN.
+
+ * run_test_3 () : C1 takes byte range advisory write lock.
+ C2 attempts to read from a conflicting range.
+ Expected result : Read from C2 should pass.
+
+ * run_test_4 () : C1 takes byte range advisory write lock.
+ C2 attempts write to a conflicting range.
+ Expected result : Write from C2 should pass.
+
+ * run_test_5 () : C1 takes byte range advisory read lock.
+ C2 attempts to open the same file with O_TRUNC.
+ Expected result : Open from C2 should pass.
+
+ * run_test_6 () : C1 takes byte range mandatory read lock.
+ C2 attempts to open the same file with O_TRUNC.
+ Expected result : Open from C2 should fail with EAGAIN.
+
+ * run_test_7 () : C1 takes byte range mandatory read lock.
+ C2 attempts ftruncate on a conflicting range.
+ Expected result : Write from C2 should fail with EAGAIN.
+
+ * run_test_8 () : C1 takes byte range advisory read lock.
+ C2 takes byte range mandatory read lock
+ within the byte range for which C1 already
+ holds an advisory lock so as to perform a
+ basic split/merge. C3 repositions fd3 to
+ start of C2's byte range mandatory lock
+ offset and attempts a write. Then it again
+ repositions fd3 to one byte past C2's byte
+ range mandatoy lock and again attempts a write.
+ Expected result : First write should fail with EAGAIN.
+ Second write should pass. */
+
+#define LOG_ERR(func, err) \
+ do { \
+ if (!fp) \
+ fprintf(stderr, "\n%s : returned error (%s)\n", func, \
+ strerror(err)); \
+ else \
+ fprintf(fp, "\n%s : returned error (%s)\n", func, strerror(err)); \
+ cleanup_and_exit(err); \
+ } while (0)
+
+void
+cleanup_and_exit(int exit_status)
+{
+ if (exit_status || test_count != TOTAL_TEST_COUNT) {
+ fprintf(fp, "\nAborting due to some test failures.\n");
+ exit_status = 1;
+ } else
+ fprintf(fp, "\nAll tests ran successfully.\n");
+ if (fp)
+ fclose(fp);
+ if (fd)
+ glfs_close(fd);
+ if (fd1)
+ glfs_close(fd1);
+ if (fd2)
+ glfs_close(fd2);
+
+ glfs_unlink(fs1, fname);
+
+ if (fs1)
+ glfs_fini(fs1);
+ if (fs2)
+ glfs_fini(fs2);
+
+ exit(exit_status);
+}
+
+glfs_t *
+new_client_create(char *hostname, char *volname, char *logfile_name)
+{
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs)
+ LOG_ERR("glfs_new", errno);
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret)
+ LOG_ERR("glfs_set_volfile_server", errno);
+
+ ret = glfs_set_logging(fs, logfile_name, 7);
+ if (ret)
+ LOG_ERR("glfs_set_logging", errno);
+
+ ret = glfs_init(fs);
+ if (ret)
+ LOG_ERR("glfs_init", errno);
+
+ return fs;
+}
+
+void
+run_test_1(int i)
+{
+ fprintf(fp, "\nRunning Test-%d . . . ", i);
+
+ fd1 = glfs_open(fs1, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd1)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 5L;
+
+ ret = glfs_file_lock(fd1, F_SETLK, &lock, GLFS_LK_MANDATORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd2 = glfs_open(fs2, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd2)
+ LOG_ERR("glfs_open", errno);
+
+ /* On successful read, 0 is returned as there is no content inside the
+ * file
+ */
+ ret = glfs_read(fd2, buf1, 10, 0);
+ if (ret)
+ LOG_ERR("glfs_read", errno);
+
+ ret = glfs_close(fd1);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd1 = NULL;
+
+ ret = glfs_close(fd2);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd2 = NULL;
+
+ test_count++;
+ fprintf(fp, "OK\n", i);
+}
+
+void
+run_test_2(int i)
+{
+ fprintf(fp, "\nRunning Test-%d . . . ", i);
+
+ fd1 = glfs_open(fs1, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd1)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 5L;
+
+ ret = glfs_file_lock(fd1, F_SETLK, &lock, GLFS_LK_MANDATORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd2 = glfs_open(fs2, fname, O_WRONLY | O_NONBLOCK);
+ if (!fd2)
+ LOG_ERR("glfs_open", errno);
+
+ ret = glfs_write(fd2, buf2, 10, 0);
+ if (ret == 10 || errno != EAGAIN)
+ LOG_ERR("glfs_write", errno);
+
+ ret = glfs_close(fd1);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd1 = NULL;
+
+ ret = glfs_close(fd2);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd2 = NULL;
+
+ test_count++;
+ fprintf(fp, "OK\n", i);
+}
+
+void
+run_test_3(int i)
+{
+ fprintf(fp, "\nRunning Test-%d . . . ", i);
+
+ fd1 = glfs_open(fs1, fname, O_WRONLY | O_NONBLOCK);
+ if (!fd1)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 5L;
+
+ ret = glfs_file_lock(fd1, F_SETLK, &lock, GLFS_LK_ADVISORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd2 = glfs_open(fs2, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd2)
+ LOG_ERR("glfs_open", errno);
+
+ /* Still there is no content inside file. So following read should
+ * return 0
+ */
+ ret = glfs_read(fd2, buf1, 10, 0);
+ if (ret)
+ LOG_ERR("glfs_read", errno);
+
+ ret = glfs_close(fd1);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd1 = NULL;
+
+ ret = glfs_close(fd2);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd2 = NULL;
+
+ test_count++;
+ fprintf(fp, "OK\n", i);
+}
+
+void
+run_test_4(int i)
+{
+ fprintf(fp, "\nRunning Test-%d . . . ", i);
+
+ fd1 = glfs_open(fs1, fname, O_WRONLY | O_NONBLOCK);
+ if (!fd1)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 5L;
+
+ ret = glfs_file_lock(fd1, F_SETLK, &lock, GLFS_LK_ADVISORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd2 = glfs_open(fs2, fname, O_WRONLY | O_NONBLOCK);
+ if (!fd2)
+ LOG_ERR("glfs_open", errno);
+
+ ret = glfs_write(fd2, buf2, 10, 0);
+ if (ret != 10)
+ LOG_ERR("glfs_write", errno);
+
+ ret = glfs_close(fd1);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd1 = NULL;
+
+ ret = glfs_close(fd2);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd2 = NULL;
+
+ test_count++;
+ fprintf(fp, "OK\n", i);
+}
+
+void
+run_test_5(int i)
+{
+ fprintf(fp, "\nRunning Test-%d . . . ", i);
+
+ fd1 = glfs_open(fs1, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd1)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 5L;
+
+ ret = glfs_file_lock(fd1, F_SETLK, &lock, GLFS_LK_ADVISORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd2 = glfs_open(fs2, fname, O_RDONLY | O_NONBLOCK | O_TRUNC);
+ if (!fd2)
+ LOG_ERR("glfs_open", errno);
+
+ ret = glfs_close(fd1);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd1 = NULL;
+
+ ret = glfs_close(fd2);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd2 = NULL;
+
+ test_count++;
+ fprintf(fp, "OK\n", i);
+}
+
+void
+run_test_6(int i)
+{
+ fprintf(fp, "\nRunning Test-%d . . . ", i);
+
+ fd1 = glfs_open(fs1, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd1)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 5L;
+
+ ret = glfs_file_lock(fd1, F_SETLK, &lock, GLFS_LK_MANDATORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd2 = glfs_open(fs2, fname, O_RDONLY | O_NONBLOCK | O_TRUNC);
+ if (fd2)
+ LOG_ERR("glfs_open", errno);
+
+ ret = glfs_close(fd1);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd1 = NULL;
+
+ test_count++;
+ fprintf(fp, "OK\n", i);
+}
+
+void
+run_test_7(int i)
+{
+ fprintf(fp, "\nRunning Test-%d . . . ", i);
+
+ fd1 = glfs_open(fs1, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd1)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 5L;
+
+ ret = glfs_file_lock(fd1, F_SETLK, &lock, GLFS_LK_MANDATORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd2 = glfs_open(fs2, fname, O_RDWR | O_NONBLOCK);
+ if (!fd2)
+ LOG_ERR("glfs_open", errno);
+
+ ret = glfs_ftruncate(fd2, 4, NULL, NULL);
+ if (ret == 0 || errno != EAGAIN)
+ LOG_ERR("glfs_ftruncate", errno);
+
+ ret = glfs_close(fd1);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd1 = NULL;
+
+ ret = glfs_close(fd2);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd2 = NULL;
+
+ test_count++;
+ fprintf(fp, "OK\n", i);
+}
+
+void
+run_test_8(int i)
+{
+ fprintf(fp, "\nRunning Test-%d . . . ", i);
+
+ fd1 = glfs_open(fs1, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd1)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 10L;
+
+ ret = glfs_file_lock(fd1, F_SETLK, &lock, GLFS_LK_ADVISORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd2 = glfs_open(fs2, fname, O_RDONLY | O_NONBLOCK);
+ if (!fd2)
+ LOG_ERR("glfs_open", errno);
+
+ lock.l_type = F_RDLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 5L;
+ lock.l_len = 2L;
+
+ ret = glfs_file_lock(fd2, F_SETLK, &lock, GLFS_LK_MANDATORY);
+ if (ret)
+ LOG_ERR("glfs_file_lock", errno);
+
+ fd3 = glfs_open(fs3, fname, O_RDWR | O_NONBLOCK);
+ if (!fd3)
+ LOG_ERR("glfs_open", errno);
+
+ offset = glfs_lseek(fd3, 5L, SEEK_SET);
+ if (offset != 5)
+ LOG_ERR("glfs_lseek", errno);
+
+ ret = glfs_write(fd3, buf2, 10, 0);
+ if (ret == 10 || errno != EAGAIN)
+ LOG_ERR("glfs_write", errno);
+
+ offset = glfs_lseek(fd3, 8L, SEEK_SET);
+ if (offset != 8)
+ LOG_ERR("glfs_lseek", errno);
+
+ ret = glfs_write(fd3, buf2, 10, 0);
+ if (ret != 10)
+ LOG_ERR("glfs_write", errno);
+
+ ret = glfs_close(fd1);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd1 = NULL;
+
+ ret = glfs_close(fd2);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd2 = NULL;
+
+ ret = glfs_close(fd3);
+ if (ret)
+ LOG_ERR("glfs_close", errno);
+ fd3 = NULL;
+
+ test_count++;
+ fprintf(fp, "OK\n", i);
+}
+
+int
+main(int argc, char *argv[])
+{
+ char logfile[50];
+
+ if (argc != 4) {
+ fprintf(stderr,
+ "Usage: %s <server ip/hostname> <volume name> <test log "
+ "directory>\n",
+ argv[0]);
+ return 0;
+ }
+
+ sprintf(logfile, "%s/%s", argv[3], "mandatory-lock-optimal-test.log");
+ fp = fopen(logfile, "w");
+ if (!fp) {
+ fprintf(stderr, "\n%s\n", logfile);
+ LOG_ERR("Log file creation", errno);
+ }
+
+ sprintf(logfile, "%s/%s", argv[3], "glfs-client-1.log");
+ fs1 = new_client_create(argv[1], argv[2], logfile);
+ if (!fs1)
+ LOG_ERR("client-1 creation", EINVAL);
+
+ sprintf(logfile, "%s/%s", argv[3], "glfs-client-2.log");
+ fs2 = new_client_create(argv[1], argv[2], logfile);
+ if (!fs2)
+ LOG_ERR("client-2 creation", EINVAL);
+
+ sprintf(logfile, "%s/%s", argv[3], "glfs-client-3.log");
+ fs3 = new_client_create(argv[1], argv[2], logfile);
+ if (!fs3)
+ LOG_ERR("client-3 creation", EINVAL);
+
+ fd = glfs_creat(fs1, fname, O_RDWR, 0644);
+ if (!fd)
+ LOG_ERR("glfs_creat", errno);
+
+ test_count = 0;
+
+ run_test_1(1);
+ run_test_2(2);
+ run_test_3(3);
+ run_test_4(4);
+ run_test_5(5);
+ run_test_6(6);
+ run_test_7(7);
+ run_test_8(8);
+
+ cleanup_and_exit(0);
+
+ return 0;
+}
diff --git a/tests/basic/gfapi/mandatory-lock-optimal.t b/tests/basic/gfapi/mandatory-lock-optimal.t
new file mode 100644
index 00000000000..27062e1f6c2
--- /dev/null
+++ b/tests/basic/gfapi/mandatory-lock-optimal.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+
+# Create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+logdir=`gluster --print-logdir`
+
+# Switch off performance translators
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.readdir-ahead off
+
+# Enable optimal mandatory-locking mode and restart the volume
+TEST $CLI volume set $V0 locks.mandatory-locking optimal
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+
+# Compile and run the test program
+TEST build_tester $(dirname $0)/mandatory-lock-optimal.c -lgfapi
+TEST ./$(dirname $0)/mandatory-lock-optimal $H0 $V0 $logdir
+
+# Cleanup the environment
+cleanup_tester $(dirname $0)/mandatory-lock-optimal
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/basic/gfapi/protocol-client-ssl.vol.in b/tests/basic/gfapi/protocol-client-ssl.vol.in
new file mode 100644
index 00000000000..cdc0c9d0671
--- /dev/null
+++ b/tests/basic/gfapi/protocol-client-ssl.vol.in
@@ -0,0 +1,15 @@
+#
+# This .vol file expects that there is
+#
+# 1. GlusterD listening on @@HOSTNAME@@
+# 2. a volume that provides a brick on @@BRICKPATH@@
+# 3. the volume with the brick has been started
+#
+volume test
+ type protocol/client
+ option remote-host @@HOSTNAME@@
+ option remote-subvolume @@BRICKPATH@@
+ option transport-type socket
+ option transport.socket.ssl-enabled @@SSL@@
+end-volume
+
diff --git a/tests/basic/gfapi/protocol-client.vol.in b/tests/basic/gfapi/protocol-client.vol.in
new file mode 100644
index 00000000000..ef35001e29f
--- /dev/null
+++ b/tests/basic/gfapi/protocol-client.vol.in
@@ -0,0 +1,14 @@
+#
+# This .vol file expects that there is
+#
+# 1. GlusterD listening on @@HOSTNAME@@
+# 2. a volume that provides a brick on @@BRICKPATH@@
+# 3. the volume with the brick has been started
+#
+volume test
+ type protocol/client
+ option remote-host @@HOSTNAME@@
+ option remote-subvolume @@BRICKPATH@@
+ option transport-type socket
+end-volume
+
diff --git a/tests/basic/gfapi/seek.c b/tests/basic/gfapi/seek.c
new file mode 100644
index 00000000000..85ea9b88141
--- /dev/null
+++ b/tests/basic/gfapi/seek.c
@@ -0,0 +1,99 @@
+/* seek.c - use glfs_lseek() to find holes in a file
+ *
+ * Author: Niels de Vos <ndevos@redhat.com>
+ */
+
+/* needed for SEEK_HOLE/SEEK_DATA */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#include <sys/types.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int
+main(int argc, char **argv)
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ char *filename = NULL;
+ char *volname = NULL;
+ char *hostname = NULL;
+ struct stat st = {
+ 0,
+ };
+ off_t hole_start = 0;
+ off_t hole_end = 0;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument, use %s <hostname> <vol> <file>\n",
+ argv[0]);
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ filename = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ perror("glfs_new() returned NULL");
+ return 1;
+ }
+
+ if (glfs_set_volfile_server(fs, "tcp", hostname, 24007)) {
+ perror("glfs_set_volfile_server");
+ return 1;
+ }
+
+ if (glfs_init(fs)) {
+ perror("glfs_init");
+ return 1;
+ }
+
+ fd = glfs_open(fs, filename, O_RDONLY);
+ if (fd <= 0) {
+ perror("glfs_open");
+ return 1;
+ }
+
+ if (glfs_fstat(fd, &st)) {
+ perror("glfs_fstat");
+ return 1;
+ }
+
+ while (hole_end < st.st_size) {
+ hole_start = glfs_lseek(fd, hole_end, SEEK_HOLE);
+ if (hole_start == -1 && errno == ENXIO)
+ /* no more holes */
+ break;
+ if (hole_start == -1) {
+ perror("no more holes");
+ break;
+ }
+
+ hole_end = glfs_lseek(fd, hole_start, SEEK_DATA);
+ if (hole_end == -1 && errno == ENXIO) {
+ /* no more data */
+ break;
+ }
+
+ printf("HOLE found: %ld - %ld%s\n", hole_start, hole_end,
+ (hole_end == st.st_size) ? " (EOF)" : "");
+ }
+
+ glfs_close(fd);
+
+ if (fs) {
+ glfs_fini(fs);
+ }
+
+ return ret;
+}
diff --git a/tests/basic/gfapi/sink.t b/tests/basic/gfapi/sink.t
new file mode 100644
index 00000000000..53af2ecf62d
--- /dev/null
+++ b/tests/basic/gfapi/sink.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST build_tester $(dirname ${0})/gfapi-load-volfile.c -lgfapi
+TEST ./$(dirname ${0})/gfapi-load-volfile $(dirname $0)/sink.vol
+
+cleanup_tester $(dirname ${0})/gfapi-load-volfile
+
+cleanup
diff --git a/tests/basic/gfapi/sink.vol b/tests/basic/gfapi/sink.vol
new file mode 100644
index 00000000000..d1c92261448
--- /dev/null
+++ b/tests/basic/gfapi/sink.vol
@@ -0,0 +1,24 @@
+#
+# The sink xlator does not do any memory allocations. It only passes the FOPs
+# through to the next xlator.
+#
+# For testing, there is no next xlator needed, we are only interested in the
+# resource usage of the Gluster core when gfapi is used.
+#
+# Note: The sink xlator does not handle any calls. Mounting is possible, but
+# any I/O needs additional functionality in the sink xlator.
+#
+volume sink
+ type debug/sink
+ # an option is required, otherwise the graph parsing fails
+ option an-option-is-required yes
+end-volume
+
+#
+# It is possible to test the resource usage of other xlators by adding them in
+# the graph before the "sink".
+#
+#volume mdcache-sink
+# type performance/md-cache
+# subvolumes sink
+#end-volume
diff --git a/tests/basic/gfapi/upcall-cache-invalidate.c b/tests/basic/gfapi/upcall-cache-invalidate.c
new file mode 100644
index 00000000000..078286a8956
--- /dev/null
+++ b/tests/basic/gfapi/upcall-cache-invalidate.c
@@ -0,0 +1,209 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto out; \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_t *fs2 = NULL;
+ glfs_t *fs_tmp = NULL;
+ glfs_t *fs_tmp2 = NULL;
+ int ret = 0, i;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ glfs_fd_t *fd_tmp = NULL;
+ glfs_fd_t *fd_tmp2 = NULL;
+ char readbuf[32];
+ char *filename = "file_tmp";
+ char *writebuf = NULL;
+ char *vol_id = NULL;
+ unsigned int cnt = 1;
+ struct glfs_upcall *cbk = NULL;
+ char *logfile = NULL;
+ char *volname = NULL;
+ char *hostname = NULL;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("glfs_init", ret);
+
+ /* This does not block, but enables caching of events. Real
+ * applications like NFS-Ganesha run this in a thread before activity
+ * on the fs (through this instance) happens. */
+ ret = glfs_h_poll_upcall(fs_tmp, &cbk);
+ LOG_ERR("glfs_h_poll_upcall", ret);
+
+ fs2 = glfs_new(volname);
+ if (!fs2) {
+ fprintf(stderr, "glfs_new fs2: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs2, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server-fs2", ret);
+
+ ret = glfs_set_logging(fs2, logfile, 7);
+ LOG_ERR("glfs_set_logging-fs2", ret);
+
+ ret = glfs_init(fs2);
+ LOG_ERR("glfs_init-fs2", ret);
+
+ fd = glfs_creat(fs, filename, O_RDWR | O_SYNC, 0644);
+ if (fd <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_creat", ret);
+ }
+ fprintf(stderr, "glfs-create fd - %d\n", fd);
+
+ fd2 = glfs_open(fs2, filename, O_SYNC | O_RDWR | O_CREAT);
+ if (fd2 <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_open-fs2", ret);
+ }
+ fprintf(stderr, "glfs-open fd2 - %d\n", fd2);
+
+ do {
+ if (cnt % 2) {
+ fd_tmp = fd;
+ fs_tmp = fs;
+ fd_tmp2 = fd2;
+ fs_tmp2 = fs2;
+ } else {
+ fd_tmp = fd2;
+ fs_tmp = fs2;
+ fd_tmp2 = fd;
+ fs_tmp2 = fs;
+ }
+
+ /* WRITE on fd_tmp */
+ writebuf = malloc(10);
+ if (writebuf) {
+ memcpy(writebuf, "abcd", 4);
+ ret = glfs_write(fd_tmp, writebuf, 4, 0);
+ if (ret <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_write", ret);
+ } else {
+ fprintf(stderr, "glfs_write succeeded\n");
+ }
+ free(writebuf);
+ } else {
+ fprintf(stderr, "Could not allocate writebuf\n");
+ return -1;
+ }
+
+ /* READ on fd_tmp2 */
+ ret = glfs_lseek(fd_tmp2, 0, SEEK_SET);
+ LOG_ERR("glfs_lseek", ret);
+
+ memset(readbuf, 0, sizeof(readbuf));
+ ret = glfs_pread(fd_tmp2, readbuf, 4, 0, 0, NULL);
+
+ if (ret <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_pread", ret);
+ } else {
+ fprintf(stderr, "glfs_read: %s\n", readbuf);
+ }
+
+ /* Open() fops seem to be not performed on server side until
+ * there are I/Os on that fd
+ */
+ if (cnt > 2) {
+ struct glfs_upcall_inode *in_arg = NULL;
+ enum glfs_upcall_reason reason = 0;
+ struct glfs_object *object = NULL;
+ uint64_t flags = 0;
+ uint64_t expire = 0;
+
+ ret = glfs_h_poll_upcall(fs_tmp, &cbk);
+ LOG_ERR("glfs_h_poll_upcall", ret);
+
+ reason = glfs_upcall_get_reason(cbk);
+
+ /* Expect 'GLFS_INODE_INVALIDATE' upcall event. */
+ if (reason == GLFS_UPCALL_INODE_INVALIDATE) {
+ in_arg = glfs_upcall_get_event(cbk);
+
+ object = glfs_upcall_inode_get_object(in_arg);
+ flags = glfs_upcall_inode_get_flags(in_arg);
+ expire = glfs_upcall_inode_get_expire(in_arg);
+
+ fprintf(stderr,
+ " upcall event type - %d,"
+ " object(%p), flags(%d), "
+ " expire_time_attr(%d)\n",
+ reason, object, flags, expire);
+ } else {
+ fprintf(stderr, "Didn't receive upcall notify event");
+ ret = -1;
+ goto err;
+ }
+
+ glfs_free(cbk);
+ }
+
+ sleep(5);
+ } while (++cnt < 5);
+
+err:
+ glfs_close(fd);
+ LOG_ERR("glfs_close", ret);
+
+ glfs_close(fd2);
+ LOG_ERR("glfs_close-fd2", ret);
+
+out:
+ if (fs) {
+ ret = glfs_fini(fs);
+ fprintf(stderr, "glfs_fini(fs) returned %d \n", ret);
+ }
+
+ if (fs2) {
+ ret = glfs_fini(fs2);
+ fprintf(stderr, "glfs_fini(fs2) returned %d \n", ret);
+ }
+
+ if (ret)
+ exit(1);
+ exit(0);
+}
diff --git a/tests/basic/gfapi/upcall-cache-invalidate.t b/tests/basic/gfapi/upcall-cache-invalidate.t
new file mode 100755
index 00000000000..5fd6a3332e7
--- /dev/null
+++ b/tests/basic/gfapi/upcall-cache-invalidate.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+## Enable Upcall cache-invalidation feature
+TEST $CLI volume set $V0 features.cache-invalidation on;
+
+TEST build_tester $(dirname $0)/upcall-cache-invalidate.c -lgfapi
+
+TEST ./$(dirname $0)/upcall-cache-invalidate $H0 $V0 $logdir/upcall-cache-invalidate.log
+
+cleanup_tester $(dirname $0)/upcall-cache-invalidate
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfapi/upcall-register-api.c b/tests/basic/gfapi/upcall-register-api.c
new file mode 100644
index 00000000000..53ce0ecdb68
--- /dev/null
+++ b/tests/basic/gfapi/upcall-register-api.c
@@ -0,0 +1,286 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto out; \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+
+int upcall_recv = 0;
+
+void
+up_async_invalidate(struct glfs_upcall *up_arg, void *data)
+{
+ struct glfs_upcall_inode *in_arg = NULL;
+ enum glfs_upcall_reason reason = 0;
+ struct glfs_object *object = NULL;
+ uint64_t flags = 0;
+ uint64_t expire = 0;
+
+ if (!up_arg)
+ return;
+
+ reason = glfs_upcall_get_reason(up_arg);
+
+ /* Expect 'GLFS_INODE_INVALIDATE' upcall event. */
+
+ if (reason == GLFS_UPCALL_INODE_INVALIDATE) {
+ in_arg = glfs_upcall_get_event(up_arg);
+
+ object = glfs_upcall_inode_get_object(in_arg);
+ flags = glfs_upcall_inode_get_flags(in_arg);
+ expire = glfs_upcall_inode_get_expire(in_arg);
+
+ fprintf(stderr,
+ " upcall event type - %d,"
+ " object(%p), flags(%d), "
+ " expire_time_attr(%d)\n",
+ reason, object, flags, expire);
+ upcall_recv++;
+ }
+
+ glfs_free(up_arg);
+ return;
+}
+
+int
+perform_io(glfs_t *fs, glfs_t *fs2, int cnt)
+{
+ glfs_t *fs_tmp = NULL;
+ glfs_t *fs_tmp2 = NULL;
+ glfs_fd_t *fd_tmp = NULL;
+ glfs_fd_t *fd_tmp2 = NULL;
+ char readbuf[32];
+ char *writebuf = NULL;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd2 = NULL;
+ char *filename = "file_tmp";
+ int ret = -1;
+
+ if (!fs || !fs2)
+ return -1;
+
+ /* Create file from fs and open it from fs2 */
+ fd = glfs_creat(fs, filename, O_RDWR | O_SYNC, 0644);
+ if (fd <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_creat", ret);
+ }
+
+ fd2 = glfs_open(fs2, filename, O_SYNC | O_RDWR | O_CREAT);
+ if (fd2 <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_open-fs2", ret);
+ }
+
+ do {
+ if (cnt % 2) {
+ fd_tmp = fd;
+ fs_tmp = fs;
+ fd_tmp2 = fd2;
+ fs_tmp2 = fs2;
+ } else {
+ fd_tmp = fd2;
+ fs_tmp = fs2;
+ fd_tmp2 = fd;
+ fs_tmp2 = fs;
+ }
+
+ /* WRITE on fd_tmp */
+ writebuf = malloc(10);
+ if (writebuf) {
+ memcpy(writebuf, "abcd", 4);
+ ret = glfs_write(fd_tmp, writebuf, 4, 0);
+ if (ret <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_write", ret);
+ }
+ free(writebuf);
+ } else {
+ fprintf(stderr, "Could not allocate writebuf\n");
+ return -1;
+ }
+
+ /* READ on fd_tmp2 */
+ ret = glfs_lseek(fd_tmp2, 0, SEEK_SET);
+ LOG_ERR("glfs_lseek", ret);
+
+ memset(readbuf, 0, sizeof(readbuf));
+ ret = glfs_pread(fd_tmp2, readbuf, 4, 0, 0, NULL);
+
+ if (ret <= 0) {
+ ret = -1;
+ LOG_ERR("glfs_pread", ret);
+ }
+
+ sleep(2);
+ } while (--cnt > 0);
+
+ sleep(2);
+
+ ret = 0;
+err:
+ glfs_close(fd);
+
+ glfs_close(fd2);
+
+out:
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_t *fs2 = NULL;
+ int ret = 0, i;
+ char *vol_id = NULL;
+ unsigned int cnt = 5;
+ struct glfs_upcall *cbk = NULL;
+ char *logfile = NULL;
+ char *volname = NULL;
+ char *hostname = NULL;
+ int up_events = GLFS_EVENT_ANY;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ /* Initialize fs */
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("glfs_init", ret);
+
+ /* Initialize fs2 */
+ fs2 = glfs_new(volname);
+ if (!fs2) {
+ fprintf(stderr, "glfs_new fs2: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs2, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server-fs2", ret);
+
+ ret = glfs_set_logging(fs2, logfile, 7);
+ LOG_ERR("glfs_set_logging-fs2", ret);
+
+ ret = glfs_init(fs2);
+ LOG_ERR("glfs_init-fs2", ret);
+
+ /* Register Upcalls */
+ ret = glfs_upcall_register(fs, up_events, up_async_invalidate, NULL);
+
+ /* Check if the return mask contains the event */
+ if (!(ret & GLFS_EVENT_INODE_INVALIDATE)) {
+ fprintf(stderr,
+ "glfs_upcall_register return doesn't contain"
+ " upcall event\n");
+ return -1;
+ }
+
+ ret = glfs_upcall_register(fs2, up_events, up_async_invalidate, NULL);
+
+ /* Check if the return mask contains the event */
+ if ((ret < 0) || !(ret & GLFS_EVENT_INODE_INVALIDATE)) {
+ fprintf(stderr,
+ "glfs_upcall_register return doesn't contain"
+ " upcall event\n");
+ return -1;
+ }
+
+ /* Perform I/O */
+ ret = perform_io(fs, fs2, cnt);
+ LOG_ERR("perform_io", ret);
+
+ if (upcall_recv == 0) {
+ fprintf(stderr, "Upcalls are not received.\n");
+ ret = -1;
+ } else {
+ fprintf(stderr, "Received %d upcalls as expected\n", upcall_recv);
+ ret = 0;
+ }
+
+ sleep(5); /* to flush out previous upcalls if any */
+
+ /* Now unregister and check there are no upcall events received */
+ ret = glfs_upcall_unregister(fs, up_events);
+
+ /* Check if the return mask contains the event */
+ if ((ret < 0) || !(ret & GLFS_EVENT_INODE_INVALIDATE)) {
+ fprintf(stderr,
+ "glfs_upcall_unregister return doesn't contain"
+ " upcall event\n");
+ return -1;
+ }
+
+ ret = glfs_upcall_unregister(fs2, up_events);
+
+ /* Check if the return mask contains the event */
+ if ((ret < 0) || !(ret & GLFS_EVENT_INODE_INVALIDATE)) {
+ fprintf(stderr,
+ "glfs_upcall_unregister return doesn't contain"
+ " upcall event\n");
+ return -1;
+ }
+
+ upcall_recv = 0;
+
+ ret = perform_io(fs, fs2, cnt);
+ LOG_ERR("perform_io", ret);
+
+ if (upcall_recv != 0) {
+ fprintf(stderr, "%d upcalls received even after unregister.\n",
+ upcall_recv);
+ ret = -1;
+ } else {
+ fprintf(stderr,
+ "Post unregister, no upcalls received as"
+ " expected\n");
+ ret = 0;
+ }
+
+out:
+ if (fs) {
+ ret = glfs_fini(fs);
+ fprintf(stderr, "glfs_fini(fs) returned %d\n", ret);
+ }
+
+ if (fs2) {
+ ret = glfs_fini(fs2);
+ fprintf(stderr, "glfs_fini(fs2) returned %d\n", ret);
+ }
+
+ if (ret)
+ exit(1);
+ exit(0);
+}
diff --git a/tests/basic/gfapi/upcall-register-api.t b/tests/basic/gfapi/upcall-register-api.t
new file mode 100755
index 00000000000..a46234ed7af
--- /dev/null
+++ b/tests/basic/gfapi/upcall-register-api.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+## Enable Upcall cache-invalidation feature
+TEST $CLI volume set $V0 features.cache-invalidation on;
+
+TEST build_tester $(dirname $0)/upcall-register-api.c -lgfapi
+
+TEST ./$(dirname $0)/upcall-register-api $H0 $V0 $logdir/upcall-register-api.log
+
+cleanup_tester $(dirname $0)/upcall-register-api
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/gfid-access.t b/tests/basic/gfid-access.t
new file mode 100644
index 00000000000..19b6564e676
--- /dev/null
+++ b/tests/basic/gfid-access.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 --aux-gfid-mount;
+TEST mkdir $M0/a
+TEST touch $M0/b
+a_gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/a))
+b_gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/b))
+
+#Operations on Directory
+TEST setfattr -n trusted.abc -v abc $M0/a
+EXPECT "abc" echo $(getfattr -n trusted.abc --only-values $M0/a)
+EXPECT "abc" echo $(getfattr -n trusted.abc --only-values $M0/.gfid/$a_gfid_str)
+TEST setfattr -x trusted.abc $M0/a
+TEST ! getfattr -n trusted.abc $M0/a
+TEST ! getfattr -n trusted.abc $M0/.gfid/$a_gfid_str
+TEST chmod 0777 $M0/a
+EXPECT "777" stat -c "%a" $M0/a
+EXPECT "777" stat -c "%a" $M0/.gfid/$a_gfid_str
+
+TEST setfattr -n trusted.abc -v def $M0/.gfid/$a_gfid_str
+EXPECT "def" echo $(getfattr -n trusted.abc --only-values $M0/a)
+EXPECT "def" echo $(getfattr -n trusted.abc --only-values $M0/.gfid/$a_gfid_str)
+TEST setfattr -x trusted.abc $M0/.gfid/$a_gfid_str
+TEST ! getfattr -n trusted.abc $M0/a
+TEST ! getfattr -n trusted.abc $M0/.gfid/$a_gfid_str
+TEST chmod 0777 $M0/.gfid/$a_gfid_str
+EXPECT "777" stat -c "%a" $M0/a
+EXPECT "777" stat -c "%a" $M0/.gfid/$a_gfid_str
+
+#Entry operations on directory
+#Test that virtual directories are not allowed to be deleted.
+TEST ! mkdir $M0/.gfid
+TEST ! rmdir $M0/.gfid
+TEST ! touch $M0/.gfid
+TEST ! rm -f $M0/.gfid
+TEST ! mv $M0/.gfid $M0/dont-rename
+TEST ! ln -s $M0/symlink $M0/.gfid
+TEST ! ln $M0/.gfid $M0/hlink
+TEST ! mknod $M0/.gfid b 0 0
+
+#Test that first level directory/file creations inside .gfid are not allowed.
+tmpfile=$(mktemp)
+TEST ! mkdir $M0/.gfid/a
+TEST ! touch $M0/.gfid/a
+TEST ! mv $tmpfile $M0/.gfid
+TEST ! mv $M0/a $M0/.gfid
+TEST ! mknod $M0/.gfid/b b 0 0
+rm -f $tmpfile
+
+#Operations on File
+TEST setfattr -n trusted.abc -v abc $M0/b
+EXPECT "abc" echo $(getfattr -n trusted.abc --only-values $M0/b)
+EXPECT "abc" echo $(getfattr -n trusted.abc --only-values $M0/.gfid/$b_gfid_str)
+TEST setfattr -x trusted.abc $M0/b
+TEST ! getfattr -n trusted.abc $M0/b
+TEST ! getfattr -n trusted.abc $M0/.gfid/$b_gfid_str
+TEST chmod 0777 $M0/b
+EXPECT "777" stat -c "%a" $M0/b
+EXPECT "777" stat -c "%a" $M0/.gfid/$b_gfid_str
+
+TEST setfattr -n trusted.abc -v def $M0/.gfid/$b_gfid_str
+EXPECT "def" echo $(getfattr -n trusted.abc --only-values $M0/b)
+EXPECT "def" echo $(getfattr -n trusted.abc --only-values $M0/.gfid/$b_gfid_str)
+TEST setfattr -x trusted.abc $M0/.gfid/$b_gfid_str
+TEST ! getfattr -n trusted.abc $M0/b
+TEST ! getfattr -n trusted.abc $M0/.gfid/$b_gfid_str
+TEST chmod 0777 $M0/.gfid/$b_gfid_str
+EXPECT "777" stat -c "%a" $M0/b
+EXPECT "777" stat -c "%a" $M0/.gfid/$b_gfid_str
+
+cleanup
diff --git a/tests/basic/gfproxy.t b/tests/basic/gfproxy.t
new file mode 100755
index 00000000000..7aa8b70b793
--- /dev/null
+++ b/tests/basic/gfproxy.t
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+function file_exists
+{
+ if [ -f $1 ]; then echo "Y"; else echo "N"; fi
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 config.gfproxyd enable
+TEST $CLI volume set $V0 failover-hosts "127.0.0.1,192.168.122.215,192.168.122.90"
+TEST $CLI volume set $V0 client-log-level TRACE
+TEST $CLI volume start $V0
+
+sleep 2
+
+REGULAR_CLIENT_VOLFILE="/var/lib/glusterd/vols/${V0}/trusted-${V0}.tcp-fuse.vol"
+GFPROXY_CLIENT_VOLFILE="/var/lib/glusterd/vols/${V0}/trusted-${V0}.tcp-gfproxy-fuse.vol"
+GFPROXYD_VOLFILE="/var/lib/glusterd/vols/${V0}/${V0}.gfproxyd.vol"
+
+# Client volfile must exist
+TEST [ -f $GFPROXY_CLIENT_VOLFILE ]
+
+# write-behind translators must exist
+TEST grep "performance/write-behind" $GFPROXY_CLIENT_VOLFILE
+
+# Make sure we didn't screw up the existing client
+TEST grep "performance/write-behind" $REGULAR_CLIENT_VOLFILE
+TEST grep "cluster/replicate" $REGULAR_CLIENT_VOLFILE
+TEST grep "cluster/distribute" $REGULAR_CLIENT_VOLFILE
+
+TEST [ -f $GFPROXYD_VOLFILE ]
+
+TEST grep "cluster/replicate" $GFPROXYD_VOLFILE
+TEST grep "cluster/distribute" $GFPROXYD_VOLFILE
+
+# write-behind must *not* exist
+TEST ! grep "performance/write-behind" $GFPROXYD_VOLFILE
+
+# Test that we can start the server and the client
+TEST glusterfs --thin-client --volfile-id=patchy --volfile-server=$H0 -l /var/log/glusterfs/${V0}-gfproxy-client.log $M0
+sleep 2
+TEST grep gfproxy-client/${V0} /proc/mounts
+
+# Write data to the mount and checksum it
+TEST dd if=/dev/urandom bs=1M count=10 of=/tmp/testfile1
+md5=$(md5sum /tmp/testfile1 | awk '{print $1}')
+TEST cp -v /tmp/testfile1 $M0/testfile1
+TEST [ "$(md5sum $M0/testfile1 | awk '{print $1}')" == "$md5" ]
+
+rm /tmp/testfile1
+
+dd if=/dev/zero of=$M0/bigfile bs=1K count=10240 &
+BG_STRESS_PID=$!
+
+TEST wait $BG_STRESS_PID
+
+# Perform graph change and make sure the gfproxyd restarts
+TEST $CLI volume set $V0 stat-prefetch off
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/bigfile
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=1501392
diff --git a/tests/basic/global-threading.t b/tests/basic/global-threading.t
new file mode 100644
index 00000000000..f7d34044b09
--- /dev/null
+++ b/tests/basic/global-threading.t
@@ -0,0 +1,104 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+# Test if the given process has a number of threads of a given type between
+# min and max.
+function check_threads() {
+ local pid="${1}"
+ local pattern="${2}"
+ local min="${3}"
+ local max="${4-}"
+ local count
+
+ count="$(ps hH -o comm ${pid} | grep "${pattern}" | wc -l)"
+ if [[ ${min} -gt ${count} ]]; then
+ return 1
+ fi
+ if [[ ! -z "${max}" && ${max} -lt ${count} ]]; then
+ return 1
+ fi
+
+ return 0
+}
+
+cleanup
+
+TEST glusterd
+
+# Glusterd shouldn't use any thread
+TEST check_threads $(get_glusterd_pid) glfs_tpw 0 0
+TEST check_threads $(get_glusterd_pid) glfs_iotwr 0 0
+
+TEST pkill -9 glusterd
+
+TEST glusterd --global-threading
+
+# Glusterd shouldn't use global threads, even if enabled
+TEST check_threads $(get_glusterd_pid) glfs_tpw 0 0
+TEST check_threads $(get_glusterd_pid) glfs_iotwr 0 0
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/b{0,1}
+
+# Normal configuration using io-threads on bricks
+TEST $CLI volume set $V0 config.global-threading off
+TEST $CLI volume set $V0 performance.iot-pass-through off
+TEST $CLI volume set $V0 performance.client-io-threads off
+TEST $CLI volume start $V0
+
+# There shouldn't be global threads
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b0) glfs_tpw 0 0
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b1) glfs_tpw 0 0
+
+# There should be at least 1 io-thread
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b0) glfs_iotwr 1
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b1) glfs_iotwr 1
+
+# Self-heal should be using global threads
+TEST check_threads $(get_shd_process_pid) glfs_tpw 1
+TEST check_threads $(get_shd_process_pid) glfs_iotwr 0 0
+
+TEST $CLI volume stop $V0
+
+# Configuration with global threads on bricks
+TEST $CLI volume set $V0 config.global-threading on
+TEST $CLI volume set $V0 performance.iot-pass-through on
+TEST $CLI volume start $V0
+
+# There should be at least 1 global thread
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b0) glfs_tpw 1
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b1) glfs_tpw 1
+
+# There shouldn't be any io-thread worker threads
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b0) glfs_iotwr 0 0
+TEST check_threads $(get_brick_pid $V0 $H0 $B0/b1) glfs_iotwr 0 0
+
+# Normal configuration using io-threads on clients
+TEST $CLI volume set $V0 performance.iot-pass-through off
+TEST $CLI volume set $V0 performance.client-io-threads on
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+# There shouldn't be global threads
+TEST check_threads $(get_mount_process_pid $V0 $M0) glfs_tpw 0 0
+
+# There should be at least 1 io-thread
+TEST check_threads $(get_mount_process_pid $V0 $M0) glfs_iotwr 1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Configuration with global threads on clients
+TEST $CLI volume set $V0 performance.client-io-threads off
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --global-threading $M0
+
+# There should be at least 1 global thread
+TEST check_threads $(get_mount_process_pid $V0 $M0) glfs_tpw 1
+
+# There shouldn't be io-threads
+TEST check_threads $(get_mount_process_pid $V0 $M0) glfs_iotwr 0 0
+
+# Some basic volume access checks with global-threading enabled everywhere
+TEST mkdir ${M0}/dir
+TEST dd if=/dev/zero of=${M0}/dir/file bs=128k count=8
+
+cleanup
diff --git a/tests/basic/glusterd-restart-shd-mux.t b/tests/basic/glusterd-restart-shd-mux.t
new file mode 100644
index 00000000000..46d0dac2fce
--- /dev/null
+++ b/tests/basic/glusterd-restart-shd-mux.t
@@ -0,0 +1,96 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=20
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+
+for i in $(seq 1 3); do
+ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_afr$i
+ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+
+#Stop the glusterd
+TEST pkill glusterd
+#Only stopping glusterd, so there will be one shd
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^1$" shd_count
+TEST glusterd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+#Check the thread count become to number of volumes*number of ec subvolume (3*6=18)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to number of volumes*number of afr subvolume (4*6=24)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^24$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+shd_pid=$(get_shd_mux_pid $V0)
+for i in $(seq 1 3); do
+ afr_path="/var/run/gluster/shd/${V0}_afr$i/${V0}_afr$i-shd.pid"
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $afr_path
+ ec_path="/var/run/gluster/shd/${V0}_ec$i/${V0}_ec${i}-shd.pid"
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $ec_path
+done
+
+#Reboot a node scenario
+TEST pkill gluster
+#Only stopped glusterd, so there will be one shd
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+
+TEST glusterd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+
+#Check the thread count become to number of volumes*number of ec subvolume (3*6=18)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to number of volumes*number of afr subvolume (4*6=24)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^24$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+shd_pid=$(get_shd_mux_pid $V0)
+for i in $(seq 1 3); do
+ afr_path="/var/run/gluster/shd/${V0}_afr$i/${V0}_afr$i-shd.pid"
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $afr_path
+ ec_path="/var/run/gluster/shd/${V0}_ec$i/${V0}_ec${i}-shd.pid"
+ EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" cat $ec_path
+done
+
+for i in $(seq 1 3); do
+ TEST $CLI volume stop ${V0}_afr$i
+ TEST $CLI volume stop ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}3
+
+TEST touch $M0/foo{1..100}
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^204$" get_pending_heal_count $V0
+
+TEST $CLI volume start ${V0} force
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST rm -rf $M0/*
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+
+TEST $CLI volume stop ${V0}
+TEST $CLI volume delete ${V0}
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^0$" shd_count
+
+cleanup
diff --git a/tests/basic/glusterd/arbiter-volume-probe.t b/tests/basic/glusterd/arbiter-volume-probe.t
new file mode 100644
index 00000000000..cb05f4ada42
--- /dev/null
+++ b/tests/basic/glusterd/arbiter-volume-probe.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+#This tests if the arbiter-count is transferred to the other peer.
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+
+kill_glusterd 2
+$CLI_1 volume create $V0 replica 3 arbiter 1 $H0:$B0/b{1..3}
+TEST $glusterd_2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field_1 $V0 "Number of Bricks"
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field_2 $V0 "Number of Bricks"
+
+cleanup;
diff --git a/tests/basic/glusterd/check-cloudsync-ancestry.t b/tests/basic/glusterd/check-cloudsync-ancestry.t
new file mode 100644
index 00000000000..ff6ffee8db7
--- /dev/null
+++ b/tests/basic/glusterd/check-cloudsync-ancestry.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# When shard and cloudsync xlators enabled on a volume, shard xlator
+# should be an ancestor of cloudsync. This testcase is to check this condition.
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+
+volfile=$(gluster system:: getwd)"/vols/$V0/trusted-$V0.tcp-fuse.vol"
+
+#Test that both shard and cloudsync are not loaded
+EXPECT "N" volgen_volume_exists $volfile $V0-shard features shard
+EXPECT "N" volgen_volume_exists $volfile $V0-cloudsync features cloudsync
+
+#Enable shard and cloudsync in that order and check if volfile is correct
+TEST $CLI volume set $V0 shard on
+TEST $CLI volume set $V0 cloudsync on
+
+#Test that both shard and cloudsync are loaded
+EXPECT "Y" volgen_volume_exists $volfile $V0-shard features shard
+EXPECT "Y" volgen_volume_exists $volfile $V0-cloudsync features cloudsync
+
+EXPECT "Y" volgen_check_ancestry $volfile features shard features cloudsync
+
+#Disable shard and cloudsync
+TEST $CLI volume set $V0 shard off
+TEST $CLI volume set $V0 cloudsync off
+
+#Test that both shard and cloudsync are not loaded
+EXPECT "N" volgen_volume_exists $volfile $V0-shard features shard
+EXPECT "N" volgen_volume_exists $volfile $V0-cloudsync features cloudsync
+
+#Enable cloudsync and shard in that order and check if volfile is correct
+TEST $CLI volume set $V0 cloudsync on
+TEST $CLI volume set $V0 shard on
+
+#Test that both shard and cloudsync are loaded
+EXPECT "Y" volgen_volume_exists $volfile $V0-shard features shard
+EXPECT "Y" volgen_volume_exists $volfile $V0-cloudsync features cloudsync
+
+EXPECT "Y" volgen_check_ancestry $volfile features shard features cloudsync
+
+cleanup;
diff --git a/tests/basic/glusterd/disperse-create.t b/tests/basic/glusterd/disperse-create.t
new file mode 100644
index 00000000000..db8a621d48e
--- /dev/null
+++ b/tests/basic/glusterd/disperse-create.t
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This command tests the volume create command validation for disperse volumes.
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 disperse 3 $H0:$B0/b4 $H0:$B0/b5 $H0:$B0/b6
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/b7 $H0:$B0/b8 $H0:$B0/b9
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 disperse-data 2 $H0:$B0/b10 $H0:$B0/b11 $H0:$B0/b12
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 redundancy 1 $H0:$B0/b10 $H0:$B0/b11 $H0:$B0/b12
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 disperse-data 2 redundancy 1 $H0:$B0/b11 $H0:$B0/b12 $H0:$B0/b13
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 disperse-data 2 redundancy 1 $H0:$B0/b11 $H0:$B0/b12 $H0:$B0/b13
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 disperse 3 disperse-data 2 $H0:$B0/b14 $H0:$B0/b15 $H0:$B0/b16
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+TEST $CLI volume create $V0 disperse 3 disperse-data 2 redundancy 1 $H0:$B0/b17 $H0:$B0/b18 $H0:$B0/b19
+EXPECT "1 x \(2 \+ 1\) = 3" volinfo_field $V0 "Number of Bricks"
+
+# -ve test cases
+#Key-words appearing more than once
+TEST ! $CLI volume create $V0 disperse 3 disperse 3 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+TEST ! $CLI volume create $V0 disperse-data 2 disperse-data 2 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+TEST ! $CLI volume create $V0 redundancy 1 redundancy 1 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+
+#Minimum counts test
+TEST ! $CLI volume create $V0 disperse 2 $H0:$B0/b20 $H0:$B0/b22
+TEST ! $CLI volume create $V0 disperse-data 1 redundancy 0 $H0:$B0/b20 $H0:$B0/b22
+TEST ! $CLI volume create $V0 disperse 4 disperse-data 4 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b23 $H0:$B0/b24
+TEST ! $CLI volume create $V0 redundancy 0 $H0:$B0/b20 $H0:$B0/b22
+
+#Wrong count n != k+m
+TEST ! $CLI volume create $V0 disperse 4 disperse-data 4 redundancy 2 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+#Num bricks is not multiple of disperse count
+TEST ! $CLI volume create $V0 disperse 6 disperse-data 4 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+#Redundancy > data
+TEST ! $CLI volume create $V0 disperse 6 disperse-data 2 redundancy 4 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+TEST ! $CLI volume create $V0 disperse 4 disperse-data 2 redundancy 2 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+#Replica + Disperse
+TEST ! $CLI volume create $V0 disperse 4 replica 2 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+TEST ! $CLI volume create $V0 disperse-data 2 replica 2 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22 $H0:$B0/b23
+TEST ! $CLI volume create $V0 redundancy 2 replica 2 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+TEST ! $CLI volume create $V0 replica 2 disperse 4 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+TEST ! $CLI volume create $V0 replica 2 disperse-data 2 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22 $H0:$B0/b23
+TEST ! $CLI volume create $V0 replica 2 redundancy 2 $H0:$B0/b20 $H0:$B0/b21 $H0:$B0/b22
+
+cleanup
diff --git a/tests/basic/glusterd/heald.t b/tests/basic/glusterd/heald.t
new file mode 100644
index 00000000000..7dae3c3f0fb
--- /dev/null
+++ b/tests/basic/glusterd/heald.t
@@ -0,0 +1,92 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test contains volume heal commands handled by glusterd.
+# Covers enable/disable at the moment. Will be enhanced later to include
+# the other commands as well.
+
+function is_pid_running {
+ local pid=$1
+ num=`ps auxww | grep glustershd | grep $pid | grep -v grep | wc -l`
+ echo $num
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+#Commands should fail when volume doesn't exist
+TEST ! $CLI volume heal non-existent-volume enable
+TEST ! $CLI volume heal non-existent-volume disable
+
+# Glustershd shouldn't be running as long as there are no replicate/disperse
+# volumes
+TEST $CLI volume create dist $H0:$B0/dist
+TEST $CLI volume start dist
+TEST "[ -z $(get_shd_process_pid dist)]"
+TEST ! $CLI volume heal dist enable
+TEST ! $CLI volume heal dist disable
+
+# Commands should work on replicate/disperse volume.
+TEST $CLI volume create r2 replica 2 $H0:$B0/r2_0 $H0:$B0/r2_1
+TEST "[ -z $(get_shd_process_pid r2)]"
+TEST $CLI volume start r2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid r2
+TEST $CLI volume heal r2 enable
+EXPECT "enable" volume_option r2 "cluster.self-heal-daemon"
+volfiler2=$(gluster system:: getwd)"/vols/r2/r2-shd.vol"
+EXPECT "enable" volgen_volume_option $volfiler2 r2-replicate-0 cluster replicate self-heal-daemon
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid r2
+pid=$( get_shd_process_pid r2 )
+TEST $CLI volume heal r2 disable
+EXPECT "disable" volume_option r2 "cluster.self-heal-daemon"
+EXPECT "disable" volgen_volume_option $volfiler2 r2-replicate-0 cluster replicate self-heal-daemon
+EXPECT "1" is_pid_running $pid
+
+# Commands should work on disperse volume.
+TEST $CLI volume create ec2 disperse 3 redundancy 1 $H0:$B0/ec2_0 $H0:$B0/ec2_1 $H0:$B0/ec2_2
+TEST $CLI volume start ec2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ec2
+TEST $CLI volume heal ec2 enable
+EXPECT "enable" volume_option ec2 "cluster.disperse-self-heal-daemon"
+volfileec2=$(gluster system:: getwd)"/vols/ec2/ec2-shd.vol"
+EXPECT "enable" volgen_volume_option $volfileec2 ec2-disperse-0 cluster disperse self-heal-daemon
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "[0-9][0-9]*" get_shd_process_pid ec2
+pid=$(get_shd_process_pid ec2)
+TEST $CLI volume heal ec2 disable
+EXPECT "disable" volume_option ec2 "cluster.disperse-self-heal-daemon"
+EXPECT "disable" volgen_volume_option $volfileec2 ec2-disperse-0 cluster disperse self-heal-daemon
+EXPECT "1" is_pid_running $pid
+
+#Check that shd graph is rewritten correctly on volume stop/start
+EXPECT "Y" volgen_volume_exists $volfileec2 ec2-disperse-0 cluster disperse
+
+EXPECT "Y" volgen_volume_exists $volfiler2 r2-replicate-0 cluster replicate
+TEST $CLI volume stop r2
+EXPECT "Y" volgen_volume_exists $volfileec2 ec2-disperse-0 cluster disperse
+TEST $CLI volume stop ec2
+# When both the volumes are stopped glustershd volfile is not modified just the
+# process is stopped
+TEST "[ -z $(get_shd_process_pid dist) ]"
+TEST "[ -z $(get_shd_process_pid ec2) ]"
+
+TEST $CLI volume start r2
+EXPECT "Y" volgen_volume_exists $volfiler2 r2-replicate-0 cluster replicate
+
+TEST $CLI volume set r2 self-heal-daemon on
+TEST $CLI volume set r2 cluster.self-heal-daemon off
+TEST ! $CLI volume set ec2 self-heal-daemon off
+TEST ! $CLI volume set ec2 cluster.self-heal-daemon on
+TEST ! $CLI volume set dist self-heal-daemon off
+TEST ! $CLI volume set dist cluster.self-heal-daemon on
+
+TEST $CLI volume set ec2 disperse-self-heal-daemon off
+TEST $CLI volume set ec2 cluster.disperse-self-heal-daemon on
+TEST ! $CLI volume set r2 disperse-self-heal-daemon on
+TEST ! $CLI volume set r2 cluster.disperse-self-heal-daemon off
+TEST ! $CLI volume set dist disperse-self-heal-daemon off
+TEST ! $CLI volume set dist cluster.disperse-self-heal-daemon on
+
+cleanup
diff --git a/tests/basic/glusterd/thin-arbiter-volume-probe.t b/tests/basic/glusterd/thin-arbiter-volume-probe.t
new file mode 100644
index 00000000000..acc6943806d
--- /dev/null
+++ b/tests/basic/glusterd/thin-arbiter-volume-probe.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+#This tests if the thin-arbiter-count is transferred to the other peer.
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+
+kill_glusterd 2
+$CLI_1 volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b{1..3}
+TEST $glusterd_2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+EXPECT "1 x 2 = 2" volinfo_field_1 $V0 "Number of Bricks"
+EXPECT "1 x 2 = 2" volinfo_field_2 $V0 "Number of Bricks"
+
+cleanup;
diff --git a/tests/basic/glusterd/thin-arbiter-volume.t b/tests/basic/glusterd/thin-arbiter-volume.t
new file mode 100644
index 00000000000..4e813890a45
--- /dev/null
+++ b/tests/basic/glusterd/thin-arbiter-volume.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../ volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+
+#This command tests the volume create command validation for thin-arbiter volumes.
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+EXPECT "1 x 2 = 2" volinfo_field $V0 "Number of Bricks"
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST touch $M0/a.txt
+TEST ls $B0/b1/a.txt
+TEST ls $B0/b2/a.txt
+TEST ! ls $B0/b3/a.txt
+
+TEST umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+TEST $CLI volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b{4..8}
+EXPECT "2 x 2 = 4" volinfo_field $V0 "Number of Bricks"
+
+TEST $CLI volume delete $V0
+
+TEST rm -rf $B0/b{1..3}
+
+TEST $CLI volume create $V0 replica 2 thin-arbiter 1 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+EXPECT "1 x 2 = 2" volinfo_field $V0 "Number of Bricks"
+
+TEST killall -15 glusterd
+TEST glusterd
+TEST pidof glusterd
+EXPECT "1 x 2 = 2" volinfo_field $V0 "Number of Bricks"
+
+cleanup
+
diff --git a/tests/basic/glusterd/volfile_server_switch.t b/tests/basic/glusterd/volfile_server_switch.t
new file mode 100644
index 00000000000..e11cfed509a
--- /dev/null
+++ b/tests/basic/glusterd/volfile_server_switch.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+
+cleanup;
+
+# * How this test works ?
+# 1. create a 3 node cluster
+# 2. add them to trusted pool
+# 3. create a volume and start
+# 4. mount the volume with all 3 backup-volfile servers
+# 5. kill glusterd in node 1
+# 6. make changes to volume using node 2, using 'volume set' here
+# 7. check whether those notifications are received by client
+
+TEST launch_cluster 3;
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0
+
+TEST $CLI_1 volume start $V0
+
+TEST $CLI_1 volume status $V0;
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H1 --volfile-server=$H2 --volfile-server=$H3 $M0
+
+TEST kill_glusterd 1
+
+TEST $CLI_2 volume set $V0 performance.write-behind off
+
+# make sure by this time directory will be created
+# TODO: suggest ideal time to wait
+sleep 5
+
+count=$(find $M0/.meta/graphs/* -maxdepth 0 -type d -iname "*" | wc -l)
+TEST [ "$count" -gt "1" ]
+
+cleanup;
diff --git a/tests/basic/glusterd/volume-brick-count.t b/tests/basic/glusterd/volume-brick-count.t
new file mode 100644
index 00000000000..dc1a5278f4f
--- /dev/null
+++ b/tests/basic/glusterd/volume-brick-count.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function test_volume_config()
+{
+ volname=$1
+ type_string=$2
+ brickCount=$3
+ distCount=$4
+ replicaCount=$5
+ arbiterCount=$6
+ disperseCount=$7
+ redundancyCount=$8
+
+ EXPECT "$type_string" volinfo_field $volname "Number of Bricks"
+ EXPECT "$brickCount" get-xml "volume info $volname" "brickCount"
+ EXPECT "$distCount" get-xml "volume info $volname" "distCount"
+ EXPECT "$replicaCount" get-xml "volume info $volname" "replicaCount"
+ EXPECT "$arbiterCount" get-xml "volume info $volname" "arbiterCount"
+ EXPECT "$disperseCount" get-xml "volume info $volname" "disperseCount"
+ EXPECT "$redundancyCount" get-xml "volume info $volname" "redundancyCount"
+}
+
+# This command tests the volume create command and number of bricks for different volume types.
+cleanup;
+TESTS_EXPECTED_IN_LOOP=56
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create ${V0}_1 replica 3 arbiter 1 $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+test_volume_config "${V0}_1" "1 x \(2 \+ 1\) = 3" "3" "1" "3" "1" "0" "0"
+
+TEST $CLI volume create ${V0}_2 replica 3 arbiter 1 $H0:$B0/b{4..9}
+test_volume_config "${V0}_2" "2 x \(2 \+ 1\) = 6" "6" "2" "3" "1" "0" "0"
+
+
+TEST $CLI volume create ${V0}_3 replica 3 arbiter 1 $H0:$B0/b{10..12}
+test_volume_config "${V0}_3" "1 x \(2 \+ 1\) = 3" "3" "1" "3" "1" "0" "0"
+TEST killall -15 glusterd
+TEST glusterd
+TEST pidof glusterd
+test_volume_config "${V0}_3" "1 x \(2 \+ 1\) = 3" "3" "1" "3" "1" "0" "0"
+
+TEST $CLI volume create ${V0}_4 replica 3 $H0:$B0/b{13..15}
+test_volume_config "${V0}_4" "1 x 3 = 3" "3" "1" "3" "0" "0" "0"
+
+TEST $CLI volume create ${V0}_5 replica 3 $H0:$B0/b{16..21}
+test_volume_config "${V0}_5" "2 x 3 = 6" "6" "2" "3" "0" "0" "0"
+
+TEST $CLI volume create ${V0}_6 disperse 3 redundancy 1 $H0:$B0/b{22..24}
+test_volume_config "${V0}_6" "1 x \(2 \+ 1\) = 3" "3" "1" "1" "0" "3" "1"
+
+TEST $CLI volume create ${V0}_7 disperse 3 redundancy 1 $H0:$B0/b{25..30}
+test_volume_config "${V0}_7" "2 x \(2 \+ 1\) = 6" "6" "2" "1" "0" "3" "1"
+
+TEST $CLI volume create ${V0}_8 $H0:$B0/b{31..33}
+test_volume_config "${V0}_8" "3" "3" "3" "1" "0" "0" "0"
+
+cleanup
diff --git a/tests/basic/glusterfsd-args.t b/tests/basic/glusterfsd-args.t
new file mode 100644
index 00000000000..2dd84b8c29e
--- /dev/null
+++ b/tests/basic/glusterfsd-args.t
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+EXPECT $GLUSTER_LIBEXECDIR glusterfsd --print-libexecdir
diff --git a/tests/basic/graph-cleanup-brick-down-shd-mux.t b/tests/basic/graph-cleanup-brick-down-shd-mux.t
new file mode 100644
index 00000000000..3c621cdcc26
--- /dev/null
+++ b/tests/basic/graph-cleanup-brick-down-shd-mux.t
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=4
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+
+for i in $(seq 1 2); do
+ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_afr$i
+ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+#Check the thread count become to number of volumes*number of ec subvolume (2*6=12)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to number of volumes*number of afr subvolume (3*6=18)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#kill one brick and test cleanup
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST $CLI volume stop $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+
+#kill an entire subvol and test cleanup
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+#wait for some time to create a race sceanrio
+sleep 1
+TEST $CLI volume stop $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+
+#kill all bricks and test cleanup
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST kill_brick $V0 $H0 $B0/${V0}5
+#wait for some time to create a race sceanrio
+sleep 2
+
+TEST $CLI volume stop $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd ${V0}_afr1 "afr_shd_index_healer"
+
+cleanup
diff --git a/tests/basic/hardlink-limit.t b/tests/basic/hardlink-limit.t
new file mode 100644
index 00000000000..ee65c650b59
--- /dev/null
+++ b/tests/basic/hardlink-limit.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 storage.max-hardlinks 3
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST dd if=/dev/zero of=$M0/testfile count=1
+
+# max-hardlinks is 3, should be able to create 2 links.
+TEST link $M0/testfile $M0/testfile.link1
+TEST link $M0/testfile $M0/testfile.link2
+
+# But not 3.
+TEST ! link $M0/testfile $M0/testfile.link3
+# If we remove one...
+TEST rm $M0/testfile.link1
+# Now we can add one.
+TEST link $M0/testfile $M0/testfile.link3
+
+# But not another
+TEST ! link $M0/testfile $M0/testfile.link4
+
+# Unless we disable the limit...
+TEST $CLI volume set $V0 storage.max-hardlinks 0
+TEST link $M0/testfile $M0/testfile.link4
+
+cleanup;
diff --git a/tests/basic/inode-leak.t b/tests/basic/inode-leak.t
new file mode 100644
index 00000000000..e112fdddf8a
--- /dev/null
+++ b/tests/basic/inode-leak.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume start $V0
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+EXPECT "1" get_mount_active_size_value $V0 $M0
+EXPECT "0" get_mount_lru_size_value $V0 $M0
+
+TEST cp -rf /etc $M0
+TEST find $M0
+TEST rm -rf $M0/*
+
+EXPECT "1" get_mount_active_size_value $V0 $M0
+EXPECT "0" get_mount_lru_size_value $V0 $M0
+
+cleanup
+
+# Mainly marking it as known-issue as it is taking a *lot* of time.
+# Revert back if we are below an hour in regression runs.
+# Or consider running only in nightly regressions.
+
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=000000
diff --git a/tests/basic/inode-quota-enforcing.t b/tests/basic/inode-quota-enforcing.t
new file mode 100644
index 00000000000..d666395dab1
--- /dev/null
+++ b/tests/basic/inode-quota-enforcing.t
@@ -0,0 +1,100 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/quota.c -o $QDD
+
+TESTS_EXPECTED_IN_LOOP=9
+
+TEST glusterd
+
+# --------------------------------------------------
+# Create, start and mount a volume with single brick
+# --------------------------------------------------
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+
+TEST $CLI volume start $V0
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+TEST mkdir -p $M0/test_dir
+
+#--------------------------------------------------------
+# Enable quota of the volume and set hard and soft timeout
+#------------------------------------------------------
+
+TEST $CLI volume quota $V0 enable
+EXPECT 'on' volinfo_field $V0 'features.quota'
+TEST $CLI volume quota $V0 soft-timeout 0
+EXPECT '0' volinfo_field $V0 'features.soft-timeout'
+TEST $CLI volume quota $V0 hard-timeout 0
+EXPECT '0' volinfo_field $V0 'features.hard-timeout'
+
+#-------------------------------------------------------
+# Set quota limits on the directory and
+# verify if the limits are being reflected properly
+#------------------------------------------------------
+
+TEST $CLI volume quota $V0 limit-objects /test_dir 10
+EXPECT "10" quota_object_list_field "/test_dir" 2
+
+TEST $CLI volume quota $V0 limit-usage /test_dir 100MB
+EXPECT "100.0MB" quota_list_field "/test_dir" 2
+
+#------------------------------------------------------
+# Check the quota enforcement mechanism for object count
+#-------------------------------------------------------
+
+# Try creating 9 files and it should succeed as object limit
+# is set to 10, since directory where limit is set is accounted
+# as well.
+
+for i in {1..9}; do
+ #TEST_IN_LOOP touch $M0/test_dir/test$i.txt
+ TEST_IN_LOOP $QDD $M0/test_dir/test$i.txt 256 4
+done
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "9" quota_object_list_field "/test_dir" 4
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "9.0MB" quotausage "/test_dir"
+
+# Check available limit
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "0" quota_object_list_field "/test_dir" 6
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "91.0MB" quota_list_field "/test_dir" 5
+
+# Check if hard-limit exceeded
+EXPECT "Yes" quota_object_list_field "/test_dir" 8
+
+# Check if soft-limit exceeded
+EXPECT "Yes" quota_object_list_field "/test_dir" 7
+
+# Creation of 11th file should throw out an error
+TEST ! touch $M0/test_dir/test11.txt
+
+#-------------------------------------------------------
+# remove quota limits on the directory and
+# verify if the limit show 'N/A' and displayes only the usage
+#------------------------------------------------------
+TEST $CLI volume quota $V0 remove-objects /test_dir
+EXPECT "N/A" quota_object_list_field "/test_dir" 2
+EXPECT "9" quota_object_list_field "/test_dir" 4
+
+TEST $CLI volume quota $V0 remove /test_dir
+EXPECT "N/A" quota_list_field "/test_dir" 2
+EXPECT "9.0MB" quotausage "/test_dir" 4
+
+# Set back the limits
+TEST $CLI volume quota $V0 limit-objects /test_dir 10
+EXPECT "10" quota_object_list_field "/test_dir" 2
+
+# Remove all files and verify the file count
+TEST rm -rf $M0/test_dir/test*
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "0" quota_object_list_field "/test_dir" 4
+
+rm -f $QDD
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1332021
diff --git a/tests/basic/ios-dump.t b/tests/basic/ios-dump.t
new file mode 100644
index 00000000000..0cfbdc6ae7c
--- /dev/null
+++ b/tests/basic/ios-dump.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+function check_brick_inter_stats() {
+ local counter="$1"
+ local inter_cnt=""
+
+ inter_cnt=$(grep -h "\".*inter.*$counter\"" \
+ /var/lib/glusterd/stats/glusterfsd*.dump 2>/dev/null |
+ grep -v '\"0.0000\"' | wc -l)
+ if (( $inter_cnt == 3 )); then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 5
+TEST $CLI volume set $V0 diagnostics.count-fop-hits on
+TEST $CLI volume set $V0 diagnostics.latency-measurement on
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+# Generate some FOPs
+cd $M0
+for i in {1..10}; do
+ mkdir a
+ cd a
+ for g in {1..10}; do
+ dd if=/dev/zero of=test$g bs=128k count=1
+ done
+done
+
+EXPECT_WITHIN 30 "Y" check_brick_inter_stats fop.weighted_latency_ave_usec
+
+cleanup
diff --git a/tests/basic/jbr/jbr-volgen.t b/tests/basic/jbr/jbr-volgen.t
new file mode 100644
index 00000000000..f368710c158
--- /dev/null
+++ b/tests/basic/jbr/jbr-volgen.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+volfiles=${GLUSTERD_WORKDIR}/vols/${V0}/
+check_brick_volfiles () {
+ for vf in ${volfiles}${V0}.$(hostname).*.vol; do
+ grep -qs experimental/jbr $vf || return
+ # At least for now, nothing else would put a client translator
+ # in a brick volfile.
+ grep -qs protocol/client $vf || return
+ done
+ echo "OK"
+}
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+TEST $CLI volume set $V0 cluster.jbr on
+
+# Check that the client volfile got modified properly.
+TEST grep -qs experimental/jbrc ${volfiles}${V0}.tcp-fuse.vol
+
+# Check that the brick volfiles got modified as well.
+EXPECT "OK" check_brick_volfiles
+
+# Put things back and make sure the "undo" worked.
+TEST $CLI volume set $V0 cluster.jbr off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+echo hello > $M0/probe
+EXPECT hello cat ${B0}/${V0}1/probe
+EXPECT hello cat ${B0}/${V0}2/probe
+
+cleanup
+#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=1385758
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1385758
diff --git a/tests/basic/jbr/jbr.t b/tests/basic/jbr/jbr.t
new file mode 100755
index 00000000000..605344b5a7e
--- /dev/null
+++ b/tests/basic/jbr/jbr.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../fdl.rc
+
+cleanup;
+
+TEST verify_lvm_version;
+#Create cluster with 3 nodes
+TEST launch_cluster 3;
+TEST setup_lvm 3
+
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
+
+TEST $CLI_1 volume create $V0 replica 3 $H1:$L1 $H2:$L2 $H3:$L3
+TEST $CLI_1 volume set $V0 cluster.jbr on
+TEST $CLI_1 volume set $V0 cluster.jbr.quorum-percent 100
+TEST $CLI_1 volume set $V0 features.fdl on
+#TEST $CLI_1 volume set $V0 diagnostics.brick-log-level DEBUG
+TEST $CLI_1 volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H1 --entry-timeout=0 $M0;
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" jbrc_child_up_status $V0 0
+
+echo "file" > $M0/file1
+TEST stat $L1/file1
+TEST stat $L2/file1
+TEST stat $L3/file1
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=1385758
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1385758
diff --git a/tests/basic/logchecks-messages.h b/tests/basic/logchecks-messages.h
new file mode 100644
index 00000000000..bf364848ec7
--- /dev/null
+++ b/tests/basic/logchecks-messages.h
@@ -0,0 +1,105 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _LOGCHECKS_MESSAGES_H_
+#define _LOGCHECKS_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_COMP_BASE 1000
+#define GLFS_NUM_MESSAGES 19
+#define GLFS_MSGID_END (GLFS_COMP_BASE + GLFS_NUM_MESSAGES + 1)
+/* Messaged with message IDs */
+#define glfs_msg_start_x GLFS_COMP_BASE, "Invalid: Start of messages"
+/*------------*/
+#define logchecks_msg_1 \
+ (GLFS_COMP_BASE + 1), \
+ "Informational: Testing logging" \
+ " in gluster"
+#define logchecks_msg_2 \
+ (GLFS_COMP_BASE + 2), \
+ "Informational: Format testing:" \
+ " %d:%s:%x"
+#define logchecks_msg_3 \
+ (GLFS_COMP_BASE + 3), \
+ "Critical: Testing logging" \
+ " in gluster"
+#define logchecks_msg_4 \
+ (GLFS_COMP_BASE + 4), \
+ "Critical: Format testing:" \
+ " %d:%s:%x"
+#define logchecks_msg_5 (GLFS_COMP_BASE + 5), "Critical: Rotated the log"
+#define logchecks_msg_6 (GLFS_COMP_BASE + 6), "Critical: Flushed the log"
+#define logchecks_msg_7 (GLFS_COMP_BASE + 7), "Informational: gf_msg_callingfn"
+#define logchecks_msg_8 \
+ (GLFS_COMP_BASE + 8), \
+ "Informational: " \
+ "gf_msg_callingfn: Format testing: %d:%s:%x"
+#define logchecks_msg_9 (GLFS_COMP_BASE + 9), "Critical: gf_msg_callingfn"
+#define logchecks_msg_10 \
+ (GLFS_COMP_BASE + 10), \
+ "Critical: " \
+ "gf_msg_callingfn: Format testing: %d:%s:%x"
+#define logchecks_msg_11 (GLFS_COMP_BASE + 11), "=========================="
+#define logchecks_msg_12 \
+ (GLFS_COMP_BASE + 12), \
+ "Test 1: Only stderr and" \
+ " partial syslog"
+#define logchecks_msg_13 \
+ (GLFS_COMP_BASE + 13), \
+ "Test 2: Only checklog and" \
+ " partial syslog"
+#define logchecks_msg_14 \
+ (GLFS_COMP_BASE + 14), \
+ "Test 5: Changing to" \
+ " traditional format"
+#define logchecks_msg_15 \
+ (GLFS_COMP_BASE + 15), \
+ "Test 6: Changing log level" \
+ " to critical and above"
+#define logchecks_msg_16 (GLFS_COMP_BASE + 16), "Test 7: Only to syslog"
+#define logchecks_msg_17 \
+ (GLFS_COMP_BASE + 17), \
+ "Test 8: Only to syslog," \
+ " traditional format"
+#define logchecks_msg_18 \
+ (GLFS_COMP_BASE + 18), \
+ "Test 9: Only to syslog," \
+ " only critical and above"
+#define logchecks_msg_19 \
+ (GLFS_COMP_BASE + 19), \
+ "Pre init message, not to be" \
+ " seen in logs"
+/*------------*/
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+
+#endif /* !_component_MESSAGES_H_ */ \ No newline at end of file
diff --git a/tests/basic/logchecks.c b/tests/basic/logchecks.c
new file mode 100644
index 00000000000..df0be28ace0
--- /dev/null
+++ b/tests/basic/logchecks.c
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/globals.h>
+#include <glusterfs/logging.h>
+
+#include "logchecks-messages.h"
+#include "../../libglusterfs/src/logging.h"
+
+glusterfs_ctx_t *ctx = NULL;
+
+#define TEST_FILENAME "/tmp/logchecks.log"
+#define GF_LOG_CONTROL_FILE "/etc/glusterfs/logger.conf"
+
+int
+go_log_vargs(gf_loglevel_t level, const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ gf_msg_vplain(level, fmt, ap);
+ va_end(ap);
+
+ return 0;
+}
+
+int
+go_log(void)
+{
+ /*** gf_msg ***/
+ gf_msg("logchecks", GF_LOG_INFO, 0, logchecks_msg_1);
+ gf_msg("logchecks", GF_LOG_INFO, 22, logchecks_msg_2, 42, "Forty-Two", 42);
+ /* change criticality */
+ gf_msg("logchecks", GF_LOG_CRITICAL, 0, logchecks_msg_3);
+ gf_msg("logchecks", GF_LOG_CRITICAL, 22, logchecks_msg_4, 42, "Forty-Two",
+ 42);
+
+ /*** msg_nomem ***/
+ gf_msg_nomem("logchecks", GF_LOG_ALERT, 555);
+ gf_msg_nomem("logchecks", GF_LOG_INFO, 555);
+
+ /*** msg_plain ***/
+ gf_msg_plain(GF_LOG_INFO,
+ "Informational: gf_msg_plain with"
+ " args %d:%s:%x",
+ 42, "Forty-Two", 42);
+ gf_msg_plain(GF_LOG_ALERT,
+ "Alert: gf_msg_plain with"
+ " args %d:%s:%x",
+ 42, "Forty-Two", 42);
+
+ /*** msg_vplain ***/
+ go_log_vargs(GF_LOG_INFO, "Informational: gf_msg_vplain: No args!!!");
+ go_log_vargs(GF_LOG_INFO,
+ "Informational: gf_msg_vplain: Some"
+ " args %d:%s:%x",
+ 42, "Forty-Two", 42);
+ go_log_vargs(GF_LOG_INFO, "Critical: gf_msg_vplain: No args!!!");
+ go_log_vargs(GF_LOG_INFO,
+ "Critical: gf_msg_vplain: Some"
+ " args %d:%s:%x",
+ 42, "Forty-Two", 42);
+
+ /*** msg_plain_nomem ***/
+ gf_msg_plain_nomem(GF_LOG_INFO, "Informational: gf_msg_plain_nomem");
+ gf_msg_plain_nomem(GF_LOG_ALERT, "Alert: gf_msg_plain_nomem");
+
+ /*** msg_backtrace_nomem ***/
+ // TODO: Need to create a stack depth and then call
+ gf_msg_backtrace_nomem(GF_LOG_INFO, 5);
+ gf_msg_backtrace_nomem(GF_LOG_ALERT, 5);
+
+ /*** gf_msg_callingfn ***/
+ // TODO: Need to create a stack depth and then call
+ gf_msg_callingfn("logchecks", GF_LOG_INFO, 0, logchecks_msg_7);
+ gf_msg_callingfn("logchecks", GF_LOG_INFO, 0, logchecks_msg_8, 42,
+ "Forty-Two", 42);
+ gf_msg_callingfn("logchecks", GF_LOG_CRITICAL, 0, logchecks_msg_9);
+ gf_msg_callingfn("logchecks", GF_LOG_CRITICAL, 0, logchecks_msg_10, 42,
+ "Forty-Two", 42);
+
+ /*** gf_msg_debug ***/
+ gf_msg_debug("logchecks", 0, "Debug: Hello World!!!");
+ gf_msg_debug("logchecks", 22, "Debug: With args %d:%s:%x", 42, "Forty-Two",
+ 42);
+
+ /*** gf_msg_trace ***/
+ gf_msg_trace("logchecks", 0, "Trace: Hello World!!!");
+ gf_msg_trace("logchecks", 22, "Trace: With args %d:%s:%x", 42, "Forty-Two",
+ 42);
+
+ /*** gf_msg_backtrace ***/
+ // TODO: Test with lower callstr values to check truncation
+
+ return 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+
+ unlink(GF_LOG_CONTROL_FILE);
+ creat(GF_LOG_CONTROL_FILE, O_RDONLY);
+ ctx = glusterfs_ctx_new();
+ if (!ctx)
+ return -1;
+
+ ret = glusterfs_globals_init(ctx);
+ if (ret) {
+ printf("Error from glusterfs_globals_init [%s]\n", strerror(errno));
+ return ret;
+ }
+
+ /* Pre init test, message should not be printed */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_19);
+
+ THIS->ctx = ctx;
+
+ /* TEST 1: messages before initializing the log, goes to stderr
+ * and syslog based on criticality */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_12);
+ go_log();
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ /* TEST 2: messages post initialization, goes to glusterlog and
+ * syslog based on severity */
+ ret = gf_log_init(ctx, TEST_FILENAME, "logchecks");
+ if (ret != 0) {
+ printf("Error from gf_log_init [%s]\n", strerror(errno));
+ return -1;
+ }
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_13);
+ go_log();
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ /* TEST 3: Test rotation */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_log_logrotate(0);
+ gf_msg("logchecks", GF_LOG_CRITICAL, 0, logchecks_msg_5);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ /* TEST 4: Check flush, nothing noticeable should occur :) */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_log_flush();
+ gf_msg("logchecks", GF_LOG_CRITICAL, 0, logchecks_msg_6);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ /* TEST 5: Change format */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_log_set_logformat(gf_logformat_traditional);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_14);
+ go_log();
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ /* TEST 6: Change level */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_log_set_loglevel(ctx, GF_LOG_CRITICAL);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_15);
+ go_log();
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ /* Reset to run with syslog */
+ gf_log_set_logformat(gf_logformat_withmsgid);
+ gf_log_set_loglevel(ctx, GF_LOG_INFO);
+
+ /* Run tests with logger changed to syslog */
+ /* TEST 7: No more gluster logs */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_log_set_logger(gf_logger_syslog);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_16);
+ go_log();
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ /* TEST 8: Change format */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_log_set_logformat(gf_logformat_traditional);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_14);
+ go_log();
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ /* TEST 9: Change level */
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+ gf_log_set_loglevel(ctx, GF_LOG_CRITICAL);
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_15);
+ go_log();
+ gf_msg("logchecks", GF_LOG_ALERT, 0, logchecks_msg_11);
+
+ // TODO: signal crash prints, but not yet feasible here
+ // TODO: Graph printing
+ // TODO: Multi threaded logging
+
+ /* Close out the logging */
+ gf_log_fini(ctx);
+ gf_log_globals_fini();
+
+ unlink(GF_LOG_CONTROL_FILE);
+ unlink(TEST_FILENAME);
+
+ return 0;
+}
diff --git a/tests/basic/md-cache/bug-1317785.t b/tests/basic/md-cache/bug-1317785.t
new file mode 100644
index 00000000000..5076e3612ac
--- /dev/null
+++ b/tests/basic/md-cache/bug-1317785.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 cache-swift-metadata on
+EXPECT 'on' volinfo_field $V0 'performance.cache-swift-metadata'
+
+TEST $CLI volume set $V0 cache-swift-metadata off
+EXPECT 'off' volinfo_field $V0 'performance.cache-swift-metadata'
+
+TEST $CLI volume set $V0 performance.cache-capability-xattrs off
+EXPECT 'off' volinfo_field $V0 'performance.cache-capability-xattrs'
+
+TEST $CLI volume set $V0 performance.cache-capability-xattrs on
+EXPECT 'on' volinfo_field $V0 'performance.cache-capability-xattrs'
+
+TEST $CLI volume set $V0 performance.cache-ima-xattrs off
+EXPECT 'off' volinfo_field $V0 'performance.cache-ima-xattrs'
+
+TEST $CLI volume set $V0 performance.cache-ima-xattrs on
+EXPECT 'on' volinfo_field $V0 'performance.cache-ima-xattrs'
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/md-cache/bug-1418249.t b/tests/basic/md-cache/bug-1418249.t
new file mode 100755
index 00000000000..85a4f58ec10
--- /dev/null
+++ b/tests/basic/md-cache/bug-1418249.t
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 group metadata-cache
+EXPECT 'on' volinfo_field $V0 'performance.cache-invalidation'
+EXPECT '600' volinfo_field $V0 'performance.md-cache-timeout'
+EXPECT 'on' volinfo_field $V0 'performance.stat-prefetch'
+EXPECT '600' volinfo_field $V0 'features.cache-invalidation-timeout'
+EXPECT 'on' volinfo_field $V0 'features.cache-invalidation'
+EXPECT '200000' volinfo_field $V0 'network.inode-lru-limit'
+cleanup;
diff --git a/tests/basic/meta.t b/tests/basic/meta.t
new file mode 100755
index 00000000000..0bac3c6797d
--- /dev/null
+++ b/tests/basic/meta.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..9};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+# verify json validity
+
+TEST json_verify < $M0/.meta/frames;
+
+TEST json_verify < $M0/.meta/cmdline;
+
+TEST json_verify < $M0/.meta/version;
+
+# default log level (INFO) is 7
+TEST grep -q 7 $M0/.meta/logging/loglevel;
+
+# check for attribute_timeout exposed through state dump
+TEST grep -q attribute_timeout $M0/.meta/master/private;
+
+# check for mount point specified as an option
+TEST grep -q $M0 $M0/.meta/master/options/mountpoint;
+
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/basic/metadisp/fsyncdir.c b/tests/basic/metadisp/fsyncdir.c
new file mode 100644
index 00000000000..62b532b9ce4
--- /dev/null
+++ b/tests/basic/metadisp/fsyncdir.c
@@ -0,0 +1,29 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <fcntl.h>
+
+int
+main(int argc, char **argv)
+{
+ int pfd;
+
+ pfd = open(argv[1], O_RDONLY | O_DIRECTORY);
+ if (pfd == (-1)) {
+ perror("open");
+ return EXIT_FAILURE;
+ }
+
+ if (rename(argv[2], argv[3]) == (-1)) {
+ perror("rename");
+ return EXIT_FAILURE;
+ }
+
+ if (fsync(pfd) == (-1)) {
+ perror("fsync");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tests/basic/metadisp/ftruncate.c b/tests/basic/metadisp/ftruncate.c
new file mode 100644
index 00000000000..c9185212c31
--- /dev/null
+++ b/tests/basic/metadisp/ftruncate.c
@@ -0,0 +1,34 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <fcntl.h>
+
+int
+main(int argc, char **argv)
+{
+ int pfd;
+
+ pfd = open(argv[1], O_RDWR);
+ if (pfd == (-1)) {
+ perror("open");
+ return EXIT_FAILURE;
+ }
+
+ if (ftruncate(pfd, 0) == (-1)) {
+ perror("ftruncate");
+ return EXIT_FAILURE;
+ }
+
+ if (write(pfd, "hello", 5) == (-1)) {
+ perror("write");
+ return EXIT_FAILURE;
+ }
+
+ if (fsync(pfd) == (-1)) {
+ perror("fsync");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tests/basic/metadisp/fxattr.c b/tests/basic/metadisp/fxattr.c
new file mode 100644
index 00000000000..e552057778a
--- /dev/null
+++ b/tests/basic/metadisp/fxattr.c
@@ -0,0 +1,107 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <fcntl.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+
+static char MY_XATTR[] = "user.fxtest";
+static char *PROGRAM;
+#define CONSUME(v) \
+ do { \
+ if (!argc) { \
+ fprintf(stderr, "missing argument\n"); \
+ return EXIT_FAILURE; \
+ } \
+ v = argv[0]; \
+ ++argv; \
+ --argc; \
+ } while (0)
+
+static int
+do_get(int argc, char **argv, int fd)
+{
+ char *value;
+ int ret;
+ char buf[1024];
+
+ CONSUME(value);
+
+ ret = fgetxattr(fd, MY_XATTR, buf, sizeof(buf));
+ if (ret == (-1)) {
+ perror("fgetxattr");
+ return EXIT_FAILURE;
+ }
+
+ if (strncmp(buf, value, ret) != 0) {
+ fprintf(stderr, "data mismatch\n");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
+
+static int
+do_set(int argc, char **argv, int fd)
+{
+ char *value;
+ int ret;
+
+ CONSUME(value);
+
+ ret = fsetxattr(fd, MY_XATTR, value, strlen(value), 0);
+ if (ret == (-1)) {
+ perror("fsetxattr");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
+
+static int
+do_remove(int argc, char **argv, int fd)
+{
+ int ret;
+
+ ret = fremovexattr(fd, MY_XATTR);
+ if (ret == (-1)) {
+ perror("femovexattr");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
+
+int
+main(int argc, char **argv)
+{
+ int fd;
+ char *path;
+ char *cmd;
+
+ CONSUME(PROGRAM);
+ CONSUME(path);
+ CONSUME(cmd);
+
+ fd = open(path, O_RDWR);
+ if (fd == (-1)) {
+ perror("open");
+ return EXIT_FAILURE;
+ }
+
+ if (strcmp(cmd, "get") == 0) {
+ return do_get(argc, argv, fd);
+ }
+
+ if (strcmp(cmd, "set") == 0) {
+ return do_set(argc, argv, fd);
+ }
+
+ if (strcmp(cmd, "remove") == 0) {
+ return do_remove(argc, argv, fd);
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tests/basic/metadisp/gfs-fsetxattr.c b/tests/basic/metadisp/gfs-fsetxattr.c
new file mode 100644
index 00000000000..63578bc528f
--- /dev/null
+++ b/tests/basic/metadisp/gfs-fsetxattr.c
@@ -0,0 +1,141 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int gfapi = 1;
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ int i = 0;
+ glfs_fd_t *fd = NULL;
+ char *topdir = "topdir", *filename = "file1";
+ char *buf = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+ char *basename = NULL;
+ char *dir1 = NULL, *dir2 = NULL, *filename1 = NULL, *filename2 = NULL;
+ struct stat sb = {
+ 0,
+ };
+
+ if (argc != 5) {
+ fprintf(
+ stderr,
+ "Expect following args %s <hostname> <Vol> <log file> <basename>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ logfile = argv[3];
+ basename = argv[4];
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL (%s)\n", strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = asprintf(&dir1, "%s-dir", basename);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ ret = glfs_mkdir(fs, dir1, 0755);
+ if (ret < 0) {
+ fprintf(stderr, "mkdir(%s): %s\n", dir1, strerror(errno));
+ return -1;
+ }
+
+ fd = glfs_opendir(fs, dir1);
+ if (!fd) {
+ fprintf(stderr, "/: %s\n", strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fsetxattr(fd, "user.dirfattr", "fsetxattr", 9, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr(%s): %d (%s)\n", dir1, ret, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_closedir(fd);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_closedir failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = asprintf(&filename1, "%s-file", basename);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ ret = asprintf(&filename2, "%s-file-renamed", basename);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ fd = glfs_creat(fs, filename1, O_RDWR, 0644);
+ if (!fd) {
+ fprintf(stderr, "%s: (%p) %s\n", filename1, fd, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_rename(fs, filename1, filename2);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_rename failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_lstat(fs, filename2, &sb);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_lstat failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fsetxattr(fd, "user.filefattr", "fsetxattr", 9, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr(%s): %d (%s)\n", dir1, ret, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_close(fd);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_close failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+}
diff --git a/tests/basic/metadisp/metadisp.t b/tests/basic/metadisp/metadisp.t
new file mode 100644
index 00000000000..894ffe07226
--- /dev/null
+++ b/tests/basic/metadisp/metadisp.t
@@ -0,0 +1,316 @@
+#!/usr/bin/env bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+
+# Considering `--enable-metadisp` is an option for `./configure`,
+# which is disabled by default, this test will never pass regression.
+# But to see the value of this test, run below after configuring
+# with above option :
+# `prove -vmfe '/bin/bash' tests/basic/metadisp/metadisp.t`
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST
+
+cleanup;
+
+TEST mkdir -p $B0/b0/{0,1}
+
+TEST setfattr -n trusted.glusterfs.volume-id -v 0xddab9eece7b64a95b07351a1f748f56f ${B0}/b0/0
+TEST setfattr -n trusted.glusterfs.volume-id -v 0xddab9eece7b64a95b07351a1f748f56f ${B0}/b0/1
+
+TEST $GFS --volfile=$(dirname $0)/metadisp.vol --volfile-id=$V0 $M0;
+
+NUM_FILES=40
+TEST touch $M0/{1..${NUM_FILES}}
+
+# each drive should get 40 files
+TEST [ $(dir -1 $B0/b0/0/ | wc -l) -eq $NUM_FILES ]
+TEST [ $(dir -1 $B0/b0/1/ | wc -l) -eq $NUM_FILES ]
+
+# now write some data to a file
+echo "hello" > $M0/3
+filename=$$
+echo "hello" > /tmp/metadisp-write-${filename}
+checksum=$(md5sum /tmp/metadisp-write-${filename} | awk '{print $1}')
+TEST [ "$(md5sum $M0/3 | awk '{print $1}')" == "$checksum" ]
+
+# check that the backend file exists on b1
+gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/b0/*/3))
+TEST [ $(dir -1 $B0/b0/1/$gfid | wc -l) -eq 1 ]
+
+# check that the backend file matches the frontend
+TEST [ "$(md5sum $B0/b0/1/$gfid | awk '{print $1}')" == "$checksum" ]
+
+# delete the file
+TEST rm $M0/3
+
+# ensure the frontend and backend files are cleaned up
+TEST ! -e $M0/3
+TEST ! [ stat $B0/b*/*/$gfid ]
+
+# Test TRUNCATE + WRITE flow
+echo "hello" | tee $M0/4
+echo "goo" | tee $M0/4
+filename=$$
+echo "goo" | tee /tmp/metadisp-truncate-${filename}
+checksum=$(md5sum /tmp/metadisp-truncate-${filename} | awk '{print $1}')
+TEST [ "$(md5sum $M0/4 | awk '{print $1}')" == "$checksum" ]
+
+# Test mkdir + rmdir.
+TEST mkdir $M0/rmdir_me
+nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+TEST rmdir $M0/rmdir_me
+nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+
+# Test rename.
+TEST touch $M0/rename_me
+nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+TEST mv $M0/rename_me $M0/such_rename
+nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+
+# Test rename of a file that doesn't exist.
+TEST ! mv $M0/does-not-exist $M0/neither-does-this
+
+
+# cleanup all the other files.
+TEST rm -v $M0/1 $M0/2 $M0/{4..${NUM_FILES}}
+TEST rm $M0/such_rename
+TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq 0 ]
+TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq 0 ]
+
+# Test CREATE flow
+NUM_FILES=40
+TEST touch $M0/{1..${NUM_FILES}}
+TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq $NUM_FILES ]
+TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq $NUM_FILES ]
+
+# Test UNLINK flow
+# No drives should have any files
+TEST rm -v $M0/{1..${NUM_FILES}}
+TEST [ $(ls /d/backends/b0/0/ | wc -l) -eq 0 ]
+TEST [ $(ls /d/backends/b0/1/ | wc -l) -eq 0 ]
+
+# Test CREATE + WRITE + READ flow
+filename=$$
+dd if=/dev/urandom of=/tmp/${filename} bs=1M count=10
+checksum=$(md5sum /tmp/${filename} | awk '{print $1}')
+TEST cp -v /tmp/${filename} $M0/1
+TEST cp -v /tmp/${filename} $M0/2
+TEST cp -v /tmp/${filename} $M0/3
+TEST cp -v /tmp/${filename} $M0/4
+TEST [ "$(md5sum $M0/1 | awk '{print $1}')" == "$checksum" ]
+TEST [ "$(md5sum $M0/2 | awk '{print $1}')" == "$checksum" ]
+TEST [ "$(md5sum $M0/3 | awk '{print $1}')" == "$checksum" ]
+TEST [ "$(md5sum $M0/4 | awk '{print $1}')" == "$checksum" ]
+
+# Test TRUNCATE + WRITE flow
+TEST dd if=/dev/zero of=$M0/1 bs=1M count=20
+
+# Check that readdir stats the files properly and we get the correct sizes
+TEST [ $(find $M0 -size +9M | wc -l) -eq 4 ];
+
+# Test mkdir + rmdir.
+TEST mkdir $M0/rmdir_me
+nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+TEST rmdir $M0/rmdir_me
+nfiles=$(ls -d $B0/b*/*/rmdir_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+
+# Test rename.
+# Still flaky, so disabled until it can be debugged.
+TEST touch $M0/rename_me
+nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+TEST mv $M0/rename_me $M0/such_rename
+nfiles=$(ls $B0/b*/*/rename_me 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "0" ]
+nfiles=$(ls $B0/b*/*/such_rename 2> /dev/null | wc -l)
+TEST [ "$nfiles" = "1" ]
+
+# Test rename of a file that doesn't exist.
+TEST ! mv $M0/does-not-exist $M0/neither-does-this
+
+# Test rename over an existing file.
+ok=yes
+for i in $(seq 0 9); do
+ echo foo > $M0/src$i
+ echo bar > $M0/dst$i
+done
+for i in $(seq 0 9); do
+ mv $M0/src$i $M0/dst$i
+done
+for i in $(seq 0 9); do
+ nfiles=$(cat $B0/b0/*/dst$i | wc -l)
+ if [ "$nfiles" = "2" ]; then
+ echo "COLLISION on dst$i"
+ (ls -l $B0/b0/*/dst$i; cat $B0/b0/*/dst$i) | sed "/^/s// /"
+ ok=no
+ fi
+done
+EXPECT "yes" echo $ok
+
+# Test rename of a directory.
+count_copies () {
+ ls -d $B0/b?/?/$1 2> /dev/null | wc -l
+}
+TEST mkdir $M0/foo_dir
+EXPECT 1 count_copies foo_dir
+EXPECT 0 count_copies bar_dir
+TEST mv $M0/foo_dir $M0/bar_dir
+EXPECT 0 count_copies foo_dir
+EXPECT 1 count_copies bar_dir
+
+for x in $(seq 0 99); do
+ touch $M0/target$x
+ ln -s $M0/target$x $M0/link$x
+done
+on_0=$(ls $B0/b*/0/link* | wc -l)
+on_1=$(ls $B0/b*/1/link* | wc -l)
+TEST [ "$on_0" -eq 100 ]
+TEST [ "$on_1" -eq 0 ]
+TEST [ "$(ls -l $M0/link* | wc -l)" = 100 ]
+
+# Test (hard) link.
+_test_hardlink () {
+ local b
+ local has_src
+ local has_dst
+ local src_inum
+ local dst_inum
+ touch $M0/hardsrc$1
+ ln $M0/hardsrc$1 $M0/harddst$1
+ for b in $B0/b{0}/{0,1}; do
+ [ -f $b/hardsrc$1 ]; has_src=$?
+ [ -f $b/harddst$1 ]; has_dst=$?
+ if [ "$has_src" != "$has_dst" ]; then
+ echo "MISSING $b/hardxxx$1 $has_src $has_dst"
+ return
+ fi
+ if [ "$has_src$has_dst" = "00" ]; then
+ src_inum=$(stat -c '%i' $b/hardsrc$1)
+ dst_inum=$(stat -c '%i' $b/harddst$1)
+ if [ "$dst_inum" != "$src_inum" ]; then
+ echo "MISMATCH $b/hardxx$i $src_inum $dst_inum"
+ return
+ fi
+ fi
+ done
+ echo "OK"
+}
+
+test_hardlink () {
+ local result=$(_test_hardlink $*)
+ # [ "$result" = "OK" ] || echo $result > /dev/tty
+ echo $result
+}
+
+# Do this multiple times to make sure colocation isn't a fluke.
+EXPECT "OK" test_hardlink 0
+EXPECT "OK" test_hardlink 1
+EXPECT "OK" test_hardlink 2
+EXPECT "OK" test_hardlink 3
+EXPECT "OK" test_hardlink 4
+EXPECT "OK" test_hardlink 5
+EXPECT "OK" test_hardlink 6
+EXPECT "OK" test_hardlink 7
+EXPECT "OK" test_hardlink 8
+EXPECT "OK" test_hardlink 9
+
+# Test remove hardlink source. ensure deleting one file
+# doesn't delete the data unless link-count is 1
+TEST mkdir $M0/hardlink
+TEST touch $M0/hardlink/fileA
+echo "data" >> $M0/hardlink/fileA
+checksum=$(md5sum $M0/hardlink/fileA | awk '{print $1}')
+TEST ln $M0/hardlink/fileA $M0/hardlink/fileB
+TEST [ $(dir -1 $M0/hardlink/ | wc -l) -eq 2 ]
+TEST rm $M0/hardlink/fileA
+TEST [ $(dir -1 $M0/hardlink/ | wc -l) -eq 1 ]
+TEST [ "$(md5sum $M0/hardlink/fileB | awk '{print $1}')" == "$checksum" ]
+
+#
+# FIXME: statfs values look ok but the test is bad
+#
+# Test statfs. If we're doing it right, the numbers for the mountpoint should be
+# double those for the brick filesystem times the number of bricks,
+# but unless we're on a completely idle
+# system (which never happens) the numbers can change even while this function
+# runs and that would trip us up. Do a sloppy comparison to deal with that.
+#compare_fields () {
+# val1=$(df $1 | grep / | awk "{print \$$3}")
+# val2=$(df $2 | grep / | awk "{print \$$3}")
+# [ "$val2" -gt "$(((val1/(29/10))*19/10))" -a "$val2" -lt "$(((val1/(31/10))*21/10))" ]
+#}
+
+#brick_df=$(df $B0 | grep /)
+#mount_df=$(df $M0 | grep /)
+#TEST compare_fields $B0 $M0 2 # Total blocks
+#TEST compare_fields $B0 $M0 3 # Used
+#TEST compare_fields $B0 $M0 4 # Available
+
+# Test removexattr.
+#RXATTR_FILE=$(get_file_not_on_disk0 rxtest)
+#TEST setfattr -n user.foo -v bar $M0/$RXATTR_FILE
+#TEST getfattr -n user.foo $B0/b0/1/$RXATTR_FILE
+#TEST setfattr -x user.foo $M0/$RXATTR_FILE
+#TEST ! getfattr -n user.foo $B0/b0/1/$RXATTR_FILE
+
+# Test fsyncdir. We can't really test whether it's doing the right thing,
+# but we can test that it doesn't fail and we can hand-check that it's calling
+# down to all of the disks instead of just one.
+#
+# P.S. There's no fsyncdir test in the rest of Gluster, so who even knows if
+# other translators are handling it correctly?
+
+#FSYNCDIR_EXE=$(dirname $0)/fsyncdir
+#build_tester ${FSYNCDIR_EXE}.c
+#TEST touch $M0/fsyncdir_src
+#TEST $FSYNCDIR_EXE $M0 $M0/fsyncdir_src $M0/fsyncdir_dst
+#TEST rm -f $FSYNCDIR_EXE
+
+# Test fsetxattr, fgetxattr, fremovexattr (in that order).
+FXATTR_FILE=$M0/fxfile1
+TEST touch $FXATTR_FILE
+FXATTR_EXE=$(dirname $0)/fxattr
+build_tester ${FXATTR_EXE}.c
+TEST ! getfattr -n user.fxtest $FXATTR_FILE
+TEST $FXATTR_EXE $FXATTR_FILE set value1
+TEST getfattr -n user.fxtest $FXATTR_FILE
+TEST setfattr -n user.fxtest -v value2 $FXATTR_FILE
+TEST $FXATTR_EXE $FXATTR_FILE get value2
+TEST $FXATTR_EXE $FXATTR_FILE remove
+TEST ! getfattr -n user.fxtest $FXATTR_FILE
+TEST rm -f $FXATTR_EXE
+
+# Test ftruncate
+FTRUNCATE_EXE=$(dirname $0)/ftruncate
+build_tester ${FTRUNCATE_EXE}.c
+FTRUNCATE_FILE=$M0/ftfile1
+TEST dd if=/dev/urandom of=$FTRUNCATE_FILE count=1 bs=1MB
+TEST $FTRUNCATE_EXE $FTRUNCATE_FILE
+#gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/b0/*/ftfile1))
+
+# Test fallocate, discard, zerofill. Actually we don't so much check that these
+# *work* as that they don't throw any errors (especially ENOENT because the
+# file's not on disk zero).
+FALLOC_FILE=fatest1
+TEST touch $M0/$FALLOC_FILE
+TEST fallocate -l $((4096*5)) $M0/$FALLOC_FILE
+TEST fallocate -p -o 4096 -l 4096 $M0/$FALLOC_FILE
+# This actually fails with "operation not supported" on most filesystems, so
+# don't leave it enabled except to test changes.
+#TEST fallocate -z -o $((4096*3)) -l 4096 $M0/$FALLOC_FILE
+
+#cleanup;
diff --git a/tests/basic/metadisp/metadisp.vol b/tests/basic/metadisp/metadisp.vol
new file mode 100644
index 00000000000..58ae2f6f2a8
--- /dev/null
+++ b/tests/basic/metadisp/metadisp.vol
@@ -0,0 +1,14 @@
+volume posix-0
+ type storage/posix
+ option directory /d/backends/b0/0
+end-volume
+
+volume posix-1
+ type storage/posix
+ option directory /d/backends/b0/1
+end-volume
+
+volume metadisp-0
+ type features/metadisp
+ subvolumes posix-0 posix-1
+end-volume
diff --git a/tests/basic/mgmt_v3-locks.t b/tests/basic/mgmt_v3-locks.t
new file mode 100644
index 00000000000..9d766f40602
--- /dev/null
+++ b/tests/basic/mgmt_v3-locks.t
@@ -0,0 +1,120 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+function volume_count {
+ local cli=$1;
+ if [ $cli -eq '1' ] ; then
+ $CLI_1 volume info | grep 'Volume Name' | wc -l;
+ else
+ $CLI_2 volume info | grep 'Volume Name' | wc -l;
+ fi
+}
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI_1 volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+function two_diff_vols_create {
+ # Both volume creates should be successful
+ $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0 &
+ PID_1=$!
+
+ $CLI_2 volume create $V1 $H1:$B1/$V1 $H2:$B2/$V1 $H3:$B3/$V1 &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function two_diff_vols_start {
+ # Both volume starts should be successful
+ $CLI_1 volume start $V0 &
+ PID_1=$!
+
+ $CLI_2 volume start $V1 &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function two_diff_vols_stop_force {
+ # Force stop, so that if rebalance from the
+ # remove bricks is in progress, stop can
+ # still go ahead. Both volume stops should
+ # be successful
+ $CLI_1 volume stop $V0 force &
+ PID_1=$!
+
+ $CLI_2 volume stop $V1 force &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function same_vol_remove_brick {
+
+ # Running two same vol commands at the same time can result in
+ # two success', two failures, or one success and one failure, all
+ # of which are valid. The only thing that shouldn't happen is a
+ # glusterd crash.
+
+ local vol=$1
+ local brick=$2
+ $CLI_1 volume remove-brick $1 $2 start &
+ PID=$!
+ $CLI_2 volume remove-brick $1 $2 start
+ wait $PID
+}
+
+cleanup;
+
+TEST launch_cluster 3;
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers
+
+two_diff_vols_create
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Created' volinfo_field $V1 'Status';
+
+two_diff_vols_start
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT 'Started' volinfo_field $V1 'Status';
+
+same_vol_remove_brick $V0 $H2:$B2/$V0
+# Checking glusterd crashed or not after same volume remove brick
+# on both nodes.
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers
+
+same_vol_remove_brick $V1 $H2:$B2/$V1
+# Checking glusterd crashed or not after same volume remove brick
+# on both nodes.
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers
+
+$CLI_1 volume set $V0 diagnostics.client-log-level DEBUG &
+PID=$!
+$CLI_1 volume set $V1 diagnostics.client-log-level DEBUG
+wait $PID
+kill_glusterd 3
+$CLI_1 volume status $V0
+$CLI_2 volume status $V1
+$CLI_1 peer status
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT 'Started' volinfo_field $V1 'Status';
+
+TEST $glusterd_3
+$CLI_1 volume status $V0
+$CLI_2 volume status $V1
+$CLI_1 peer status
+cleanup;
diff --git a/tests/basic/mount-options.disabled b/tests/basic/mount-options.disabled
new file mode 100644
index 00000000000..a04c8686276
--- /dev/null
+++ b/tests/basic/mount-options.disabled
@@ -0,0 +1,143 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd --xlator-option=*.rpc-auth-allow-insecure=on
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 server.allow-insecure on
+TEST $CLI volume start $V0
+
+#test all the options available to see if the mount succeeds with those options
+#or not. This does not test functionality. This is added to prevent options
+#being removed in future breaking backward-compatibility.
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --entry-timeout=0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile=$GLUSTERD_WORKDIR/vols/$V0/${V0}-fuse.vol $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 --log-file=/tmp/a.txt --log-level=DEBUG $M0
+EXPECT_NOT "0" wc -l /tmp/a.txt
+TEST grep " D " /tmp/a.txt
+TEST rm -f /tmp/a.txt
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --acl
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --aux-gfid-mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --enable-ino32
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --fopen-keep-cache=yes
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --fopen-keep-cache=no
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --fopen-keep-cache=fail
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --mac-compat=yes
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --mac-compat=no
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --mac-compat=fail
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --use-readdirp=yes
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --use-readdirp=no
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --use-readdirp=fail
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --direct-io-mode=yes
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --direct-io-mode=no
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --direct-io-mode=fail
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --read-only
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --selinux
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --worm
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-check
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --dump-fuse=/tmp/a.txt
+EXPECT "0" stat /tmp/a.txt
+TEST rm -f /tmp/a.txt
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --gid-timeout=0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --gid-timeout=-1
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --gid-timeout=abc
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --background-qlen=16
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --background-qlen=abc
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --background-qlen=-1
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --congestion-threshold=12
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --congestion-threshold=abc
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --congestion=threshold=-1
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --negative-timeout=10
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --negative-timeout=abc
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --negative-timeout=-1
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --pid-file=/tmp/a.txt
+EXPECT_NOT "0" wc -l /tmp/a.txt
+TEST rm -f /tmp/a.txt
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-server-port=24007
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-server-port=2400
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-server-transport=tcp
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-server-transport=ib-verbs
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --auto-invalidation=off
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volfile-server-port=socket
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volume-name=$V0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --volume-name=abcd
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --invalid-option
+
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --invalid-option=abc
+cleanup;
diff --git a/tests/basic/mount.t b/tests/basic/mount.t
new file mode 100755
index 00000000000..3a3d7cc9d8d
--- /dev/null
+++ b/tests/basic/mount.t
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6,7,8,9};
+TEST $CLI volume set $V0 nfs.disable false
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+
+## Make volume tightly consistent for metdata
+TEST $CLI volume set $V0 performance.stat-prefetch off;
+
+## Mount FUSE with caching disabled (read-write)
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+## Check consistent "rw" option
+TEST 'mount -t $MOUNT_TYPE_FUSE | grep -E "^$H0:$V0 "|$GREP_MOUNT_OPT_RW';
+TEST 'grep -E "^$H0:$V0 .+ ,?rw,?" /proc/mounts';
+
+## Mount FUSE with caching disabled (read-only)
+TEST $GFS --read-only -s $H0 --volfile-id $V0 $M1;
+
+## Check consistent "ro" option
+TEST 'mount -t $MOUNT_TYPE_FUSE | grep -E "^$H0:$V0 "|$GREP_MOUNT_OPT_RO';
+TEST 'grep -E "^$H0:$V0 .+ ,?ro(,.+)?" /proc/mounts';
+
+## Wait for volume to register with rpc.mountd
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+## Mount NFS
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+## Test for consistent views between NFS and FUSE mounts
+## write access to $M1 should fail
+TEST ! stat $M0/newfile;
+TEST ! touch $M1/newfile;
+TEST touch $M0/newfile;
+TEST stat $M1/newfile;
+TEST stat $N0/newfile;
+TEST ! rm -f $M1/newfile;
+TEST rm -f $N0/newfile;
+TEST ! stat $M0/newfile;
+TEST ! stat $M1/newfile;
+
+# No need to check for status here right now
+$(dirname $0)/rpc-coverage.sh $N0 >/dev/null
+
+## Before killing daemon to avoid deadlocks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/basic/mpx-compat.t b/tests/basic/mpx-compat.t
new file mode 100644
index 00000000000..baf629dbf9b
--- /dev/null
+++ b/tests/basic/mpx-compat.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+#This test tests that self-heals don't perform fsync when durability is turned
+#off
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../traps.rc
+. $(dirname $0)/../volume.rc
+
+function count_processes {
+ # It would generally be a good idea to use "pgrep -x" to ensure an
+ # exact match, but the version of pgrep we have on NetBSD (a.k.a.
+ # the worst operating system ever) doesn't support that option.
+ # Fortunately, "glusterfsd" isn't the prefix of any other name,
+ # so this works anyway. For now.
+ pgrep glusterfsd | wc -w
+}
+
+function count_brick_pids {
+ $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+ | grep -v "N/A" | sort | uniq | wc -l
+}
+
+cleanup
+TEST glusterd
+TEST $CLI volume set all cluster.brick-multiplex yes
+
+# Create two vanilla volumes.
+TEST $CLI volume create $V0 $H0:$B0/brick-${V0}-{0,1}
+TEST $CLI volume create $V1 $H0:$B0/brick-${V1}-{0,1}
+
+# Enable brick log-level to DEBUG
+gluster v set $V0 diagnostics.brick-log-level DEBUG
+
+# Start both.
+TEST $CLI volume start $V0
+TEST $CLI volume start $V1
+
+# There should be only one process for compatible volumes. We can't use
+# EXPECT_WITHIN here because it could transiently see one process as two are
+# coming up, and yield a false positive.
+sleep $PROCESS_UP_TIMEOUT
+EXPECT "1" count_processes
+EXPECT 1 count_brick_pids
+
+# Make the second volume incompatible with the first.
+TEST $CLI volume stop $V1
+TEST $CLI volume set $V1 server.manage-gids no
+TEST $CLI volume start $V1
+
+# There should be two processes this time (can't share protocol/server).
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" count_processes
+
+cleanup; \ No newline at end of file
diff --git a/tests/basic/multiple-volume-shd-mux.t b/tests/basic/multiple-volume-shd-mux.t
new file mode 100644
index 00000000000..d7cfbaec85f
--- /dev/null
+++ b/tests/basic/multiple-volume-shd-mux.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=16
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume start $V0
+
+shd_pid=$(get_shd_mux_pid $V0)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+for i in $(seq 1 3); do
+ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_afr$i
+ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_ec$i
+done
+
+#Check the thread count become to number of volumes*number of ec subvolume (3*6=18)
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^18$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to number of volumes*number of afr subvolume (4*6=24)
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^24$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+#Delete the volumes
+for i in $(seq 1 3); do
+ TEST $CLI volume stop ${V0}_afr$i
+ TEST $CLI volume stop ${V0}_ec$i
+ TEST $CLI volume delete ${V0}_afr$i
+ TEST $CLI volume delete ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $CLI volume stop ${V0}
+TEST $CLI volume delete ${V0}
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+
+cleanup
diff --git a/tests/basic/multiplex.t b/tests/basic/multiplex.t
new file mode 100644
index 00000000000..2f558a6824b
--- /dev/null
+++ b/tests/basic/multiplex.t
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../traps.rc
+. $(dirname $0)/../volume.rc
+
+function count_up_bricks {
+ $CLI --xml volume status $V0 | grep '<status>1' | wc -l
+}
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+function count_brick_pids {
+ $CLI --xml volume status $V0 | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+ | grep -v "N/A" | sort | uniq | wc -l
+}
+
+cleanup
+
+TEST glusterd
+TEST $CLI volume set all cluster.brick-multiplex on
+
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 features.trash enable
+
+TEST $CLI volume start $V0
+# Without multiplexing, there would be two.
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks
+EXPECT 1 count_brick_processes
+
+TEST $CLI volume stop $V0
+#Testing the volume set command introduced for protocol/server
+TEST $CLI volume set $V0 transport.listen-backlog 1024
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 0 count_brick_processes
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks
+EXPECT 1 count_brick_processes
+
+TEST kill_brick $V0 $H0 $B0/brick1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 1 count_up_bricks
+# Make sure the whole process didn't go away.
+EXPECT 1 count_brick_processes
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks
+EXPECT 1 count_brick_processes
+
+# Killing the first brick is a bit more of a challenge due to socket-path
+# issues.
+TEST kill_brick $V0 $H0 $B0/brick0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 1 count_up_bricks
+EXPECT 1 count_brick_processes
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks
+EXPECT 1 count_brick_processes
+
+# Make sure that the two bricks show the same PID.
+EXPECT 1 count_brick_pids
+
+# Do a quick test to make sure that the bricks are acting as separate bricks
+# even though they're in the same process.
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+for i in $(seq 10 99); do
+ echo hello > $M0/file$i
+done
+nbrick0=$(ls $B0/brick0/file?? | wc -l)
+nbrick1=$(ls $B0/brick1/file?? | wc -l)
+TEST [ $((nbrick0 + nbrick1)) -eq 90 ]
+TEST [ $((nbrick0 * nbrick1)) -ne 0 ]
+
+pkill gluster
+TEST glusterd
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 1 count_brick_processes
+
+cleanup; \ No newline at end of file
diff --git a/tests/basic/namespace.t b/tests/basic/namespace.t
new file mode 100644
index 00000000000..d1bbe7eea29
--- /dev/null
+++ b/tests/basic/namespace.t
@@ -0,0 +1,131 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+# These hashes are a result of calling SuperFastHash
+# on the corresponding folder names.
+NAMESPACE_HASH=28153613
+NAMESPACE2_HASH=3926991974
+NAMESPACE3_HASH=3493960770
+
+function check_brick_multiplex() {
+ local ret=$($CLI volume info|grep "cluster.brick-multiplex"|cut -d" " -f2)
+ local cnt="$(ls /var/log/glusterfs/bricks|wc -l)"
+ local bcnt="$(brick_count)"
+
+ if [ $bcnt -ne 1 ]; then
+ if [ -z $ret ]; then
+ ret="no"
+ fi
+
+ if [ $ret = "on" ] || [ $cnt -eq 1 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+ else
+ echo "N"
+ fi
+}
+
+function check_samples() {
+ local FOP_TYPE=$1
+ local NS_HASH=$2
+ local FILE=$3
+ local BRICK=$4
+ local GFID="$(getfattr -n trusted.gfid -e text --only-values $B0/$BRICK$FILE | xxd -p)"
+ local val="$(check_brick_multiplex)"
+
+ if [ $val = "Y" ]; then
+ BRICK="${V0}0"
+ fi
+
+ grep -i "ns_$OP" /var/log/glusterfs/bricks/d-backends-$BRICK.log |
+ grep -- $NS_HASH | sed 's/\-//g' | grep -- $GFID
+ if [ $? -eq 0 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+cleanup;
+
+TEST mkdir -p $B0/${V0}{0,1,2,3,4,5,6,7,8,9}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5,6,7,8}
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.nfs.stat-prefetch off
+TEST $CLI volume set $V0 cluster.read-subvolume-index 0
+TEST $CLI volume set $V0 diagnostics.brick-log-level DEBUG
+TEST $CLI volume set $V0 features.tag-namespaces on
+TEST $CLI volume set $V0 storage.build-pgfid on
+TEST $CLI volume start $V0
+
+sleep 2
+
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+################################
+# Paths in the samples #
+################################
+
+mkdir -p $N0/namespace
+
+# subvol_1 = bar, subvol_2 = foo, subvol_3 = hey
+# Test create, write (tagged by loc, fd respectively).
+touch $N0/namespace/{bar,foo,hey}
+echo "garbage" > $N0/namespace/bar
+echo "garbage" > $N0/namespace/foo
+echo "garbage" > $N0/namespace/hey
+EXPECT_WITHIN 10 "Y" check_samples CREATE $NAMESPACE_HASH /namespace/bar patchy0
+EXPECT_WITHIN 10 "Y" check_samples CREATE $NAMESPACE_HASH /namespace/foo patchy3
+EXPECT_WITHIN 10 "Y" check_samples CREATE $NAMESPACE_HASH /namespace/hey patchy6
+EXPECT_WITHIN 10 "Y" check_samples WRITEV $NAMESPACE_HASH /namespace/bar patchy0
+EXPECT_WITHIN 10 "Y" check_samples WRITEV $NAMESPACE_HASH /namespace/foo patchy3
+EXPECT_WITHIN 10 "Y" check_samples WRITEV $NAMESPACE_HASH /namespace/hey patchy6
+
+# Test stat (tagged by loc)
+stat $N0/namespace/bar &> /dev/null
+stat $N0/namespace/foo &> /dev/null
+stat $N0/namespace/hey &> /dev/null
+EXPECT_WITHIN 10 "Y" check_samples STAT $NAMESPACE_HASH /namespace/bar patchy0
+EXPECT_WITHIN 10 "Y" check_samples STAT $NAMESPACE_HASH /namespace/foo patchy3
+EXPECT_WITHIN 10 "Y" check_samples STAT $NAMESPACE_HASH /namespace/hey patchy6
+
+EXPECT_WITHIN 10 "Y" umount_nfs $N0;
+sleep 1
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+cat $N0/namespace/bar &> /dev/null
+EXPECT_WITHIN 10 "Y" check_samples READ $NAMESPACE_HASH /namespace/bar patchy0
+
+dir $N0/namespace &> /dev/null
+EXPECT_WITHIN 10 "Y" check_samples LOOKUP $NAMESPACE_HASH /namespace patchy0
+
+mkdir -p $N0/namespace{2,3}
+EXPECT_WITHIN 10 "Y" check_samples MKDIR $NAMESPACE2_HASH /namespace2 patchy0
+EXPECT_WITHIN 10 "Y" check_samples MKDIR $NAMESPACE3_HASH /namespace3 patchy0
+
+touch $N0/namespace2/file
+touch $N0/namespace3/file
+EXPECT_WITHIN 10 "Y" check_samples CREATE $NAMESPACE2_HASH /namespace2/file patchy0
+EXPECT_WITHIN 10 "Y" check_samples CREATE $NAMESPACE3_HASH /namespace3/file patchy0
+
+truncate -s 0 $N0/namespace/bar
+EXPECT_WITHIN 10 "Y" check_samples TRUNCATE $NAMESPACE_HASH /namespace/bar patchy0
+
+ln -s $N0/namespace/foo $N0/namespace/foo_link
+EXPECT_WITHIN 10 "Y" check_samples SYMLINK $NAMESPACE_HASH /namespace/foo patchy3
+
+open $N0/namespace/hey
+EXPECT_WITHIN 10 "Y" check_samples OPEN $NAMESPACE_HASH /namespace/hey patchy6
+
+cleanup;
diff --git a/tests/basic/netgroup_parsing.t b/tests/basic/netgroup_parsing.t
new file mode 100644
index 00000000000..cf8d871f1f8
--- /dev/null
+++ b/tests/basic/netgroup_parsing.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+NG_FILES=$(dirname $0)/../configfiles
+cleanup;
+
+function test_ng_1 ()
+{
+ glusterfsd --print-netgroups $1 | sed -n 1p
+}
+
+function test_ng_2 ()
+{
+ glusterfsd --print-netgroups $1 | sed -n 2p
+}
+
+function test_ng_3 ()
+{
+ glusterfsd --print-netgroups $1 | sed -n 3p
+}
+
+function test_ng_4 ()
+{
+ glusterfsd --print-netgroups $1 | sed -n 4p
+}
+
+function test_bad_ng ()
+{
+ glusterfsd --print-netgroups $1 2>&1 | sed -n 1p
+}
+
+function test_large_file ()
+{
+ # The build system needs this path for the test to pass.
+ # This is an important test because this file is ~1800 lines
+ # longs and is a "real-world" netgroups file.
+ glusterfsd --print-netgroups ~/opsfiles/storage/netgroup/netgroup | sed -n 1p
+}
+
+function test_empty_ng ()
+{
+ glusterfsd --print-netgroups $1 2>&1 | sed -n 2p
+}
+
+EXPECT_KEYWORD "ng3 (dev-1763.prn-2.example.com,,)" test_ng_1 $NG_FILES/netgroups
+EXPECT_KEYWORD "ng2 (dev1763.prn2.example.com,,)" test_ng_2 $NG_FILES/netgroups
+EXPECT_KEYWORD "ng1 ng2 (dev1763.prn2.example.com,,)" test_ng_3 $NG_FILES/netgroups
+EXPECT_KEYWORD "asdf ng1 ng2 (dev1763.prn2.example.com,,)" test_ng_4 $NG_FILES/netgroups
+# TODO: get a real-world large netgroup file
+#EXPECT_KEYWORD "wikipedia001.07.prn1 (wikipedia003.prn1.example.com,,)(wikipedia002.prn1.example.com,,)(wikipedia001.prn1.example.com,,)" test_large_file
+EXPECT_KEYWORD "Parse error" test_bad_ng $NG_FILES/bad_netgroups
+EXPECT_KEYWORD "No netgroups were specified except for the parent" test_empty_ng $NG_FILES/bad_netgroups
+
+cleanup;
diff --git a/tests/basic/nl-cache.t b/tests/basic/nl-cache.t
new file mode 100755
index 00000000000..90c778c8a88
--- /dev/null
+++ b/tests/basic/nl-cache.t
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0..4}
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume set $V0 group nl-cache
+EXPECT '600' volinfo_field $V0 'performance.nl-cache-timeout'
+EXPECT 'on' volinfo_field $V0 'performance.nl-cache'
+EXPECT '600' volinfo_field $V0 'features.cache-invalidation-timeout'
+EXPECT 'on' volinfo_field $V0 'features.cache-invalidation'
+EXPECT '200000' volinfo_field $V0 'network.inode-lru-limit'
+TEST $CLI volume set $V0 nl-cache-positive-entry on
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+TEST ! ls $M0/file2
+TEST touch $M0/file1
+TEST ! ls $M0/file2
+TEST touch $M0/file2
+TEST ls $M0/file2
+TEST rm $M0/file2
+TEST rm $M0/file1
+
+TEST mkdir $M0/dir1
+TEST ! ls -l $M0/dir1/file
+TEST mkdir $M0/dir1/dir2
+TEST ! ls -l $M0/dir1/file
+TEST ! ls -l $M0/dir1/dir2/file
+TEST ls -l $M0/dir1/dir2
+TEST rmdir $M0/dir1/dir2
+TEST rmdir $M0/dir1
+
+TEST ! ls -l $M0/file2
+TEST touch $M1/file2
+TEST ls -l $M0/file2
+TEST rm $M1/file2
+
+TEST ! ls -l $M0/dir1
+TEST mkdir $M1/dir1
+TEST ls -l $M0/dir1
+TEST ! ls -l $M0/dir1/file1
+TEST mkdir $M1/dir1/dir2
+TEST ! ls -l $M0/dir1/file1
+TEST ls -l $M0/dir1/dir2
+TEST ! ls -l $M1/dir1/file1
+
+TEST touch $M0/dir1/file
+TEST ln $M0/dir1/file $M0/dir1/file_link
+TEST ls -l $M1/dir1/file
+TEST ls -l $M1/dir1/file_link
+TEST rm $M0/dir1/file
+TEST rm $M0/dir1/file_link
+TEST rmdir $M0/dir1/dir2
+TEST rmdir $M0/dir1
+
+#Check mknod
+TEST ! ls -l $M0/dir
+TEST mkdir $M0/dir
+TEST mknod -m 0666 $M0/dir/block b 4 5
+TEST mknod -m 0666 $M0/dir/char c 1 5
+TEST mknod -m 0666 $M0/dir/fifo p
+TEST rm $M0/dir/block
+TEST rm $M0/dir/char
+TEST rm $M0/dir/fifo
+
+#Check getxattr
+TEST touch $M0/file1
+TEST getfattr -d -m. -e hex $M0/file1
+TEST getfattr -n "glusterfs.get_real_filename:file1" $M0;
+TEST getfattr -n "glusterfs.get_real_filename:FILE1" $M0;
+TEST ! getfattr -n "glusterfs.get_real_filename:FILE2" $M0;
+
+#Check statedump
+TEST generate_mount_statedump $V0 $M0
+TEST cleanup_mount_statedump $V0
+
+#Check reconfigure
+TEST $CLI volume reset $V0 nl-cache-timeout
+TEST $CLI volume reset $V0 nl-cache-positive-entry
+TEST $CLI volume reset $V0 nl-cache-limit
+TEST $CLI volume reset $V0 nl-cache-pass-through
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/basic/nufa.t b/tests/basic/nufa.t
new file mode 100644
index 00000000000..cb09fc5bbbf
--- /dev/null
+++ b/tests/basic/nufa.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+TEST $CLI volume set $V0 nfs.disable false
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume set $V0 nufa on;
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE with caching disabled (read-only)
+TEST $GFS --read-only -s $H0 --volfile-id $V0 $M1;
+
+## Wait for volume to register with rpc.mountd
+sleep 5;
+
+##Wait for connection establishment between nfs server and brick process
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+## Mount NFS
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+## Before killing daemon to avoid deadlocks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+cleanup;
diff --git a/tests/basic/op_errnos.t b/tests/basic/op_errnos.t
new file mode 100755
index 00000000000..9c48d7a02ad
--- /dev/null
+++ b/tests/basic/op_errnos.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../snapshot.rc
+
+function get-op_errno-xml()
+{
+ $CLI $1 --xml | xmllint --format - | grep opErrno | sed 's/\(<opErrno>\|<\/opErrno>\)//g'
+}
+
+cleanup;
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+
+EXPECT 0 get-op_errno-xml "snapshot create snap1 $V0 no-timestamp"
+EXPECT 30806 get-op_errno-xml "snapshot create snap1 imaginary_volume"
+EXPECT 30807 get-op_errno-xml "snapshot delete imaginary_snap"
+EXPECT 30809 get-op_errno-xml "snapshot restore snap1"
+TEST $CLI volume stop $V0
+EXPECT 30810 get-op_errno-xml "snapshot create snap1 $V0"
+TEST $CLI volume start $V0
+EXPECT 30811 get-op_errno-xml "snapshot clone $V0 snap1"
+EXPECT 30812 get-op_errno-xml "snapshot create snap1 $V0 no-timestamp"
+
+EXPECT 0 get-op_errno-xml "snapshot delete snap1"
+TEST $CLI volume stop $V0
+
+cleanup;
diff --git a/tests/basic/open-behind/open-behind.t b/tests/basic/open-behind/open-behind.t
new file mode 100644
index 00000000000..5e865d602e2
--- /dev/null
+++ b/tests/basic/open-behind/open-behind.t
@@ -0,0 +1,183 @@
+#!/bin/bash
+
+WD="$(dirname "${0}")"
+
+. ${WD}/../../include.rc
+. ${WD}/../../volume.rc
+
+function assign() {
+ local _assign_var="${1}"
+ local _assign_value="${2}"
+
+ printf -v "${_assign_var}" "%s" "${_assign_value}"
+}
+
+function pipe_create() {
+ local _pipe_create_var="${1}"
+ local _pipe_create_name
+ local _pipe_create_fd
+
+ _pipe_create_name="$(mktemp -u)"
+ mkfifo "${_pipe_create_name}"
+ exec {_pipe_create_fd}<>"${_pipe_create_name}"
+ rm "${_pipe_create_name}"
+
+ assign "${_pipe_create_var}" "${_pipe_create_fd}"
+}
+
+function pipe_close() {
+ local _pipe_close_fd="${!1}"
+
+ exec {_pipe_close_fd}>&-
+}
+
+function tester_start() {
+ declare -ag tester
+ local tester_in
+ local tester_out
+
+ pipe_create tester_in
+ pipe_create tester_out
+
+ ${WD}/tester <&${tester_in} >&${tester_out} &
+
+ tester=("$!" "${tester_in}" "${tester_out}")
+}
+
+function tester_send() {
+ declare -ag tester
+ local tester_res
+ local tester_extra
+
+ echo "${*}" >&${tester[1]}
+
+ read -t 3 -u ${tester[2]} tester_res tester_extra
+ echo "${tester_res} ${tester_extra}"
+ if [[ "${tester_res}" == "OK" ]]; then
+ return 0
+ fi
+
+ return 1
+}
+
+function tester_stop() {
+ declare -ag tester
+ local tester_res
+
+ tester_send "quit"
+
+ tester_res=0
+ if ! wait ${tester[0]}; then
+ tester_res=$?
+ fi
+
+ unset tester
+
+ return ${tester_res}
+}
+
+function count_open() {
+ local file="$(realpath "${B0}/${V0}/${1}")"
+ local count="0"
+ local inode
+ local ref
+
+ inode="$(stat -c %i "${file}")"
+
+ for fd in /proc/${BRICK_PID}/fd/*; do
+ ref="$(readlink "${fd}")"
+ if [[ "${ref}" == "${B0}/${V0}/"* ]]; then
+ if [[ "$(stat -c %i "${ref}")" == "${inode}" ]]; then
+ count="$((${count} + 1))"
+ fi
+ fi
+ done
+
+ echo "${count}"
+}
+
+cleanup
+
+TEST build_tester ${WD}/tester.c ${WD}/tester-fd.c
+
+TEST glusterd
+TEST pidof glusterd
+TEST ${CLI} volume create ${V0} ${H0}:${B0}/${V0}
+TEST ${CLI} volume set ${V0} flush-behind off
+TEST ${CLI} volume set ${V0} write-behind off
+TEST ${CLI} volume set ${V0} quick-read off
+TEST ${CLI} volume set ${V0} stat-prefetch on
+TEST ${CLI} volume set ${V0} io-cache off
+TEST ${CLI} volume set ${V0} open-behind on
+TEST ${CLI} volume set ${V0} lazy-open off
+TEST ${CLI} volume set ${V0} read-after-open off
+TEST ${CLI} volume start ${V0}
+
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+BRICK_PID="$(get_brick_pid ${V0} ${H0} ${B0}/${V0})"
+
+TEST touch "${M0}/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+TEST tester_start
+
+TEST tester_send fd open 0 "${M0}/test"
+EXPECT_WITHIN 5 "1" count_open "/test"
+TEST tester_send fd close 0
+EXPECT_WITHIN 5 "0" count_open "/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ${CLI} volume set ${V0} lazy-open on
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+TEST tester_send fd open 0 "${M0}/test"
+sleep 2
+EXPECT "0" count_open "/test"
+TEST tester_send fd write 0 "test"
+EXPECT "1" count_open "/test"
+TEST tester_send fd close 0
+EXPECT_WITHIN 5 "0" count_open "/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+TEST tester_send fd open 0 "${M0}/test"
+EXPECT "0" count_open "/test"
+EXPECT "test" tester_send fd read 0 64
+# Even though read-after-open is disabled, use-anonymous-fd is also disabled,
+# so reads need to open the file first.
+EXPECT "1" count_open "/test"
+TEST tester_send fd close 0
+EXPECT "0" count_open "/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+TEST tester_send fd open 0 "${M0}/test"
+EXPECT "0" count_open "/test"
+TEST tester_send fd open 1 "${M0}/test"
+EXPECT "2" count_open "/test"
+TEST tester_send fd close 0
+EXPECT_WITHIN 5 "1" count_open "/test"
+TEST tester_send fd close 1
+EXPECT_WITHIN 5 "0" count_open "/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST ${CLI} volume set ${V0} read-after-open on
+TEST ${GFS} --volfile-id=/${V0} --volfile-server=${H0} ${M0};
+
+TEST tester_send fd open 0 "${M0}/test"
+EXPECT "0" count_open "/test"
+EXPECT "test" tester_send fd read 0 64
+EXPECT "1" count_open "/test"
+TEST tester_send fd close 0
+EXPECT_WITHIN 5 "0" count_open "/test"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST tester_stop
+
+cleanup
diff --git a/tests/basic/open-behind/tester-fd.c b/tests/basic/open-behind/tester-fd.c
new file mode 100644
index 00000000000..00f02bc5b0a
--- /dev/null
+++ b/tests/basic/open-behind/tester-fd.c
@@ -0,0 +1,99 @@
+/*
+ Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "tester.h"
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+
+static int32_t
+fd_open(context_t *ctx, command_t *cmd)
+{
+ obj_t *obj;
+ int32_t fd;
+
+ obj = cmd->args[0].obj.ref;
+
+ fd = open(cmd->args[1].str.data, O_RDWR);
+ if (fd < 0) {
+ return error(errno, "open() failed");
+ }
+
+ obj->type = OBJ_TYPE_FD;
+ obj->fd = fd;
+
+ out_ok("%d", fd);
+
+ return 0;
+}
+
+static int32_t
+fd_close(context_t *ctx, command_t *cmd)
+{
+ obj_t *obj;
+
+ obj = cmd->args[0].obj.ref;
+ obj->type = OBJ_TYPE_NONE;
+
+ if (close(obj->fd) != 0) {
+ return error(errno, "close() failed");
+ }
+
+ out_ok();
+
+ return 0;
+}
+
+static int32_t
+fd_write(context_t *ctx, command_t *cmd)
+{
+ ssize_t len, ret;
+
+ len = strlen(cmd->args[1].str.data);
+ ret = write(cmd->args[0].obj.ref->fd, cmd->args[1].str.data, len);
+ if (ret < 0) {
+ return error(errno, "write() failed");
+ }
+
+ out_ok("%zd", ret);
+
+ return 0;
+}
+
+static int32_t
+fd_read(context_t *ctx, command_t *cmd)
+{
+ char data[cmd->args[1].num.value + 1];
+ ssize_t ret;
+
+ ret = read(cmd->args[0].obj.ref->fd, data, cmd->args[1].num.value);
+ if (ret < 0) {
+ return error(errno, "read() failed");
+ }
+
+ data[ret] = 0;
+
+ out_ok("%zd %s", ret, data);
+
+ return 0;
+}
+
+command_t fd_commands[] = {
+ {"open", fd_open, CMD_ARGS(ARG_VAL(OBJ_TYPE_NONE), ARG_STR(1024))},
+ {"close", fd_close, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD))},
+ {"write", fd_write, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_STR(1024))},
+ {"read", fd_read, CMD_ARGS(ARG_VAL(OBJ_TYPE_FD), ARG_NUM(0, 1024))},
+ CMD_END};
diff --git a/tests/basic/open-behind/tester.c b/tests/basic/open-behind/tester.c
new file mode 100644
index 00000000000..b2da71c8385
--- /dev/null
+++ b/tests/basic/open-behind/tester.c
@@ -0,0 +1,444 @@
+/*
+ Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "tester.h"
+
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+
+static void *
+mem_alloc(size_t size)
+{
+ void *ptr;
+
+ ptr = malloc(size);
+ if (ptr == NULL) {
+ error(ENOMEM, "Failed to allocate memory (%zu bytes)", size);
+ }
+
+ return ptr;
+}
+
+static void
+mem_free(void *ptr)
+{
+ free(ptr);
+}
+
+static bool
+buffer_create(context_t *ctx, size_t size)
+{
+ ctx->buffer.base = mem_alloc(size);
+ if (ctx->buffer.base == NULL) {
+ return false;
+ }
+
+ ctx->buffer.size = size;
+ ctx->buffer.len = 0;
+ ctx->buffer.pos = 0;
+
+ return true;
+}
+
+static void
+buffer_destroy(context_t *ctx)
+{
+ mem_free(ctx->buffer.base);
+ ctx->buffer.size = 0;
+ ctx->buffer.len = 0;
+}
+
+static int32_t
+buffer_get(context_t *ctx)
+{
+ ssize_t len;
+
+ if (ctx->buffer.pos >= ctx->buffer.len) {
+ len = read(0, ctx->buffer.base, ctx->buffer.size);
+ if (len < 0) {
+ return error(errno, "read() failed");
+ }
+ if (len == 0) {
+ return 0;
+ }
+
+ ctx->buffer.len = len;
+ ctx->buffer.pos = 0;
+ }
+
+ return ctx->buffer.base[ctx->buffer.pos++];
+}
+
+static int32_t
+str_skip_spaces(context_t *ctx, int32_t current)
+{
+ while ((current > 0) && (current != '\n') && isspace(current)) {
+ current = buffer_get(ctx);
+ }
+
+ return current;
+}
+
+static int32_t
+str_token(context_t *ctx, char *buffer, uint32_t size, int32_t current)
+{
+ uint32_t len;
+
+ current = str_skip_spaces(ctx, current);
+
+ len = 0;
+ while ((size > 0) && (current > 0) && (current != '\n') &&
+ !isspace(current)) {
+ len++;
+ *buffer++ = current;
+ size--;
+ current = buffer_get(ctx);
+ }
+
+ if (len == 0) {
+ return error(ENODATA, "Expecting a token");
+ }
+
+ if (size == 0) {
+ return error(ENOBUFS, "Token too long");
+ }
+
+ *buffer = 0;
+
+ return current;
+}
+
+static int32_t
+str_number(context_t *ctx, uint64_t min, uint64_t max, uint64_t *value,
+ int32_t current)
+{
+ char text[32], *ptr;
+ uint64_t num;
+
+ current = str_token(ctx, text, sizeof(text), current);
+ if (current > 0) {
+ num = strtoul(text, &ptr, 0);
+ if ((*ptr != 0) || (num < min) || (num > max)) {
+ return error(ERANGE, "Invalid number");
+ }
+ *value = num;
+ }
+
+ return current;
+}
+
+static int32_t
+str_eol(context_t *ctx, int32_t current)
+{
+ current = str_skip_spaces(ctx, current);
+ if (current != '\n') {
+ return error(EINVAL, "Expecting end of command");
+ }
+
+ return current;
+}
+
+static void
+str_skip(context_t *ctx, int32_t current)
+{
+ while ((current > 0) && (current != '\n')) {
+ current = buffer_get(ctx);
+ }
+}
+
+static int32_t
+cmd_parse_obj(context_t *ctx, arg_t *arg, int32_t current)
+{
+ obj_t *obj;
+ uint64_t id;
+
+ current = str_number(ctx, 0, ctx->obj_count, &id, current);
+ if (current <= 0) {
+ return current;
+ }
+
+ obj = &ctx->objs[id];
+ if (obj->type != arg->obj.type) {
+ if (obj->type != OBJ_TYPE_NONE) {
+ return error(EBUSY, "Object is in use");
+ }
+ return error(ENOENT, "Object is not defined");
+ }
+
+ arg->obj.ref = obj;
+
+ return current;
+}
+
+static int32_t
+cmd_parse_num(context_t *ctx, arg_t *arg, int32_t current)
+{
+ return str_number(ctx, arg->num.min, arg->num.max, &arg->num.value,
+ current);
+}
+
+static int32_t
+cmd_parse_str(context_t *ctx, arg_t *arg, int32_t current)
+{
+ return str_token(ctx, arg->str.data, arg->str.size, current);
+}
+
+static int32_t
+cmd_parse_args(context_t *ctx, command_t *cmd, int32_t current)
+{
+ arg_t *arg;
+
+ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
+ switch (arg->type) {
+ case ARG_TYPE_OBJ:
+ current = cmd_parse_obj(ctx, arg, current);
+ break;
+ case ARG_TYPE_NUM:
+ current = cmd_parse_num(ctx, arg, current);
+ break;
+ case ARG_TYPE_STR:
+ current = cmd_parse_str(ctx, arg, current);
+ break;
+ default:
+ return error(EINVAL, "Unknown argument type");
+ }
+ }
+
+ if (current < 0) {
+ return current;
+ }
+
+ current = str_eol(ctx, current);
+ if (current <= 0) {
+ return error(EINVAL, "Syntax error");
+ }
+
+ return cmd->handler(ctx, cmd);
+}
+
+static int32_t
+cmd_parse(context_t *ctx, command_t *cmds)
+{
+ char text[32];
+ command_t *cmd;
+ int32_t current;
+
+ cmd = cmds;
+ do {
+ current = str_token(ctx, text, sizeof(text), buffer_get(ctx));
+ if (current <= 0) {
+ return current;
+ }
+
+ while (cmd->name != NULL) {
+ if (strcmp(cmd->name, text) == 0) {
+ if (cmd->handler != NULL) {
+ return cmd_parse_args(ctx, cmd, current);
+ }
+ cmd = cmd->cmds;
+ break;
+ }
+ cmd++;
+ }
+ } while (cmd->name != NULL);
+
+ str_skip(ctx, current);
+
+ return error(ENOTSUP, "Unknown command");
+}
+
+static void
+cmd_fini(context_t *ctx, command_t *cmds)
+{
+ command_t *cmd;
+ arg_t *arg;
+
+ for (cmd = cmds; cmd->name != NULL; cmd++) {
+ if (cmd->handler == NULL) {
+ cmd_fini(ctx, cmd->cmds);
+ } else {
+ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
+ switch (arg->type) {
+ case ARG_TYPE_STR:
+ mem_free(arg->str.data);
+ arg->str.data = NULL;
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+}
+
+static bool
+cmd_init(context_t *ctx, command_t *cmds)
+{
+ command_t *cmd;
+ arg_t *arg;
+
+ for (cmd = cmds; cmd->name != NULL; cmd++) {
+ if (cmd->handler == NULL) {
+ if (!cmd_init(ctx, cmd->cmds)) {
+ return false;
+ }
+ } else {
+ for (arg = cmd->args; arg->type != ARG_TYPE_NONE; arg++) {
+ switch (arg->type) {
+ case ARG_TYPE_STR:
+ arg->str.data = mem_alloc(arg->str.size);
+ if (arg->str.data == NULL) {
+ return false;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+static bool
+objs_create(context_t *ctx, uint32_t count)
+{
+ uint32_t i;
+
+ ctx->objs = mem_alloc(sizeof(obj_t) * count);
+ if (ctx->objs == NULL) {
+ return false;
+ }
+ ctx->obj_count = count;
+
+ for (i = 0; i < count; i++) {
+ ctx->objs[i].type = OBJ_TYPE_NONE;
+ }
+
+ return true;
+}
+
+static int32_t
+objs_destroy(context_t *ctx)
+{
+ uint32_t i;
+ int32_t err;
+
+ err = 0;
+ for (i = 0; i < ctx->obj_count; i++) {
+ if (ctx->objs[i].type != OBJ_TYPE_NONE) {
+ err = error(ENOTEMPTY, "Objects not destroyed");
+ break;
+ }
+ }
+
+ mem_free(ctx->objs);
+ ctx->objs = NULL;
+ ctx->obj_count = 0;
+
+ return err;
+}
+
+static context_t *
+init(size_t size, uint32_t objs, command_t *cmds)
+{
+ context_t *ctx;
+
+ ctx = mem_alloc(sizeof(context_t));
+ if (ctx == NULL) {
+ goto failed;
+ }
+
+ if (!buffer_create(ctx, size)) {
+ goto failed_ctx;
+ }
+
+ if (!objs_create(ctx, objs)) {
+ goto failed_buffer;
+ }
+
+ if (!cmd_init(ctx, cmds)) {
+ goto failed_objs;
+ }
+
+ ctx->active = true;
+
+ return ctx;
+
+failed_objs:
+ cmd_fini(ctx, cmds);
+ objs_destroy(ctx);
+failed_buffer:
+ buffer_destroy(ctx);
+failed_ctx:
+ mem_free(ctx);
+failed:
+ return NULL;
+}
+
+static int32_t
+fini(context_t *ctx, command_t *cmds)
+{
+ int32_t ret;
+
+ cmd_fini(ctx, cmds);
+ buffer_destroy(ctx);
+
+ ret = objs_destroy(ctx);
+
+ ctx->active = false;
+
+ return ret;
+}
+
+static int32_t
+exec_quit(context_t *ctx, command_t *cmd)
+{
+ ctx->active = false;
+
+ return 0;
+}
+
+static command_t commands[] = {{"fd", NULL, CMD_SUB(fd_commands)},
+ {"quit", exec_quit, CMD_ARGS()},
+ CMD_END};
+
+int32_t
+main(int32_t argc, char *argv[])
+{
+ context_t *ctx;
+ int32_t res;
+
+ ctx = init(1024, 16, commands);
+ if (ctx == NULL) {
+ return 1;
+ }
+
+ do {
+ res = cmd_parse(ctx, commands);
+ if (res < 0) {
+ out_err(-res);
+ }
+ } while (ctx->active);
+
+ res = fini(ctx, commands);
+ if (res >= 0) {
+ out_ok();
+ return 0;
+ }
+
+ out_err(-res);
+
+ return 1;
+}
diff --git a/tests/basic/open-behind/tester.h b/tests/basic/open-behind/tester.h
new file mode 100644
index 00000000000..64e940c78fc
--- /dev/null
+++ b/tests/basic/open-behind/tester.h
@@ -0,0 +1,145 @@
+/*
+ Copyright (c) 2020 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __TESTER_H__
+#define __TESTER_H__
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <stdbool.h>
+
+enum _obj_type;
+typedef enum _obj_type obj_type_t;
+
+enum _arg_type;
+typedef enum _arg_type arg_type_t;
+
+struct _buffer;
+typedef struct _buffer buffer_t;
+
+struct _obj;
+typedef struct _obj obj_t;
+
+struct _context;
+typedef struct _context context_t;
+
+struct _arg;
+typedef struct _arg arg_t;
+
+struct _command;
+typedef struct _command command_t;
+
+enum _obj_type { OBJ_TYPE_NONE, OBJ_TYPE_FD };
+
+enum _arg_type { ARG_TYPE_NONE, ARG_TYPE_OBJ, ARG_TYPE_NUM, ARG_TYPE_STR };
+
+struct _buffer {
+ char *base;
+ uint32_t size;
+ uint32_t len;
+ uint32_t pos;
+};
+
+struct _obj {
+ obj_type_t type;
+ union {
+ int32_t fd;
+ };
+};
+
+struct _context {
+ obj_t *objs;
+ buffer_t buffer;
+ uint32_t obj_count;
+ bool active;
+};
+
+struct _arg {
+ arg_type_t type;
+ union {
+ struct {
+ obj_type_t type;
+ obj_t *ref;
+ } obj;
+ struct {
+ uint64_t value;
+ uint64_t min;
+ uint64_t max;
+ } num;
+ struct {
+ uint32_t size;
+ char *data;
+ } str;
+ };
+};
+
+struct _command {
+ const char *name;
+ int32_t (*handler)(context_t *ctx, command_t *cmd);
+ union {
+ arg_t *args;
+ command_t *cmds;
+ };
+};
+
+#define msg(_stream, _fmt, _args...) \
+ do { \
+ fprintf(_stream, _fmt "\n", ##_args); \
+ fflush(_stream); \
+ } while (0)
+
+#define msg_out(_fmt, _args...) msg(stdout, _fmt, ##_args)
+#define msg_err(_err, _fmt, _args...) \
+ ({ \
+ int32_t __msg_err = (_err); \
+ msg(stderr, "[%4u:%-15s] " _fmt, __LINE__, __FUNCTION__, __msg_err, \
+ ##_args); \
+ -__msg_err; \
+ })
+
+#define error(_err, _fmt, _args...) msg_err(_err, "E(%4d) " _fmt, ##_args)
+#define warn(_err, _fmt, _args...) msg_err(_err, "W(%4d) " _fmt, ##_args)
+#define info(_err, _fmt, _args...) msg_err(_err, "I(%4d) " _fmt, ##_args)
+
+#define out_ok(_args...) msg_out("OK " _args)
+#define out_err(_err) msg_out("ERR %d", _err)
+
+#define ARG_END \
+ { \
+ ARG_TYPE_NONE \
+ }
+
+#define CMD_ARGS1(_x, _args...) \
+ .args = (arg_t[]) { _args }
+#define CMD_ARGS(_args...) CMD_ARGS1(, ##_args, ARG_END)
+
+#define CMD_SUB(_cmds) .cmds = _cmds
+
+#define CMD_END \
+ { \
+ NULL, NULL, CMD_SUB(NULL) \
+ }
+
+#define ARG_VAL(_type) \
+ { \
+ ARG_TYPE_OBJ, .obj = {.type = _type } \
+ }
+#define ARG_NUM(_min, _max) \
+ { \
+ ARG_TYPE_NUM, .num = {.min = _min, .max = _max } \
+ }
+#define ARG_STR(_size) \
+ { \
+ ARG_TYPE_STR, .str = {.size = _size } \
+ }
+
+extern command_t fd_commands[];
+
+#endif /* __TESTER_H__ */ \ No newline at end of file
diff --git a/tests/basic/open-fd-snap-delete.t b/tests/basic/open-fd-snap-delete.t
new file mode 100644
index 00000000000..a9f47cac19d
--- /dev/null
+++ b/tests/basic/open-fd-snap-delete.t
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../snapshot.rc
+. $(dirname $0)/../fileio.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+
+# start glusterd
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+TEST $CLI volume set $V0 nfs.disable false
+
+
+TEST $CLI volume start $V0;
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+for i in {1..10} ; do echo "file" > $M0/file$i ; done
+
+# Create file and directory
+TEST touch $M0/f1
+TEST mkdir $M0/dir
+
+TEST $CLI snapshot config activate-on-create enable
+TEST $CLI volume set $V0 features.uss enable;
+
+for i in {1..10} ; do echo "file" > $M0/dir/file$i ; done
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "file" > $M0/file$i ; done
+for i in {11..20} ; do echo "file" > $M0/dir/file$i ; done
+
+TEST $CLI snapshot create snap2 $V0 no-timestamp;
+
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'r' $M0/.snaps/snap2/dir/file11;
+TEST fd_cat $fd1
+
+TEST $CLI snapshot delete snap2;
+
+TEST ! fd_cat $fd1;
+
+# the return value of this command (i.e. fd_close) depetends
+# mainly on how the release operation on a file descriptor is
+# handled in snapview-server process. As of now snapview-server
+# returns 0 for the release operation. And it is similar to how
+# posix xlator does. So, as of now the expectation is to receive
+# success for the close operation.
+TEST fd_close $fd1;
+
+# This check is mainly to ensure that the snapshot daemon
+# (snapd) is up and running. If it is not running, the following
+# stat would receive ENOTCONN.
+
+TEST stat $M0/.snaps/snap1/dir/file1
+
+TEST $CLI snapshot delete snap1;
+
+TEST rm -rf $M0/*;
+
+TEST $CLI volume stop $V0;
+
+TEST $CLI volume delete $V0;
+
+cleanup
diff --git a/tests/basic/peer-parsing.t b/tests/basic/peer-parsing.t
new file mode 100644
index 00000000000..813b65e2ae1
--- /dev/null
+++ b/tests/basic/peer-parsing.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+PEER_DIR="$GLUSTERD_WORKDIR"/peers
+TEST mkdir -p $PEER_DIR
+
+declare -i HOST_NUM=100
+
+create_random_peer_files() {
+ for i in $(seq 0 9); do
+ local peer_uuid=$(uuidgen)
+ # The rules for quoting and variable substitution in
+ # here documents would force this to be even less
+ # readable that way.
+ (
+ echo "state=1"
+ echo "uuid=$peer_uuid"
+ echo "hostname=127.0.0.$HOST_NUM"
+ ) > $PEER_DIR/$peer_uuid
+ HOST_NUM+=1
+ done
+}
+
+create_non_peer_file() {
+ echo "random stuff" > $PEER_DIR/not_a_peer_file
+}
+
+create_malformed_peer_file() {
+ echo "more random stuff" > $PEER_DIR/$(uuidgen)
+}
+
+# We create lots of files, in batches, to ensure that our bogus ones are
+# properly interspersed with the valid ones.
+
+TEST create_random_peer_files
+TEST create_non_peer_file
+TEST create_random_peer_files
+TEST create_malformed_peer_file
+TEST create_random_peer_files
+
+# There should be 30 peers, not counting the two bogus files.
+TEST glusterd
+N_PEERS=$($CLI peer status | grep ^Uuid: | wc -l)
+TEST [ "$N_PEERS" = "30" ]
+
+# For extra credit, check the logs for messages about bogus files.
+
+cleanup
+
+
+
diff --git a/tests/basic/pgfid-feat.t b/tests/basic/pgfid-feat.t
new file mode 100644
index 00000000000..a7baeec7b7a
--- /dev/null
+++ b/tests/basic/pgfid-feat.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+function get_ancestry_path() {
+ local path=$1
+ local ancestry=$(getfattr --absolute-names -e text -n glusterfs.ancestry.path "$M0/$path" | grep "^glusterfs.ancestry.path" | cut -d"=" -f2 | tr -d \");
+ echo $ancestry;
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+TEST $CLI volume set $V0 build-pgfid on;
+
+TEST $CLI volume start $V0;
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/a;
+TEST touch $M0/a/b;
+
+EXPECT "/a/b" get_ancestry_path "/a/b";
+
+TEST $CLI volume set $V0 build-pgfid off;
+EXPECT "" get_ancestry_path "/a/b";
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/basic/playground/template-xlator-sanity.t b/tests/basic/playground/template-xlator-sanity.t
new file mode 100755
index 00000000000..1c665502bfe
--- /dev/null
+++ b/tests/basic/playground/template-xlator-sanity.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST mkdir -p $B0/single-brick
+cat > $B0/template.vol <<EOF
+volume posix
+ type storage/posix
+ option directory $B0/single-brick
+end-volume
+
+volume template
+ type playground/template
+ subvolumes posix
+ option dummy 13
+end-volume
+EOF
+
+TEST glusterfs -f $B0/template.vol $M0
+
+TEST $(dirname $0)/../rpc-coverage.sh --no-locks $M0
+
+# Take statedump to get maximum code coverage
+pid=$(ps auxww | grep glusterfs | grep -E "template.vol" | awk '{print $2}' | head -1)
+
+TEST generate_statedump $pid
+
+# For monitor output
+kill -USR2 $pid
+
+# Handle SIGHUP and reconfigure
+sed -i -e '/s/dummy 13/dummy 42/g' $B0/template.vol
+kill -HUP $pid
+
+# for calling 'fini()'
+kill -TERM $pid
+
+force_umount $M0
+
+cleanup;
diff --git a/tests/basic/posix/shared-statfs.t b/tests/basic/posix/shared-statfs.t
new file mode 100644
index 00000000000..0e4a1bb409f
--- /dev/null
+++ b/tests/basic/posix/shared-statfs.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+#Test that statfs is not served from posix backend FS.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+
+#Create brick partitions
+TEST truncate -s 100M $B0/brick1
+TEST truncate -s 100M $B0/brick2
+LO1=`SETUP_LOOP $B0/brick1`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO1
+LO2=`SETUP_LOOP $B0/brick2`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO2
+TEST mkdir -p $B0/${V0}1 $B0/${V0}2
+TEST MOUNT_LOOP $LO1 $B0/${V0}1
+TEST MOUNT_LOOP $LO2 $B0/${V0}2
+
+total_brick_blocks=$(df -P $B0/${V0}1 $B0/${V0}2 | tail -2 | awk '{sum = sum+$2}END{print sum}')
+#Account for rounding error
+brick_blocks_two_percent_less=$((total_brick_blocks*98/100))
+# Create a subdir in mountpoint and use that for volume.
+TEST $CLI volume create $V0 $H0:$B0/${V0}1/1 $H0:$B0/${V0}2/1;
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}')
+# Keeping the size less than 200M mainly because XFS will use
+# some storage in brick to keep its own metadata.
+TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ]
+
+
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+# From the same mount point, share another 2 bricks with the volume
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1/2 $H0:$B0/${V0}2/2 $H0:$B0/${V0}1/3 $H0:$B0/${V0}2/3
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+total_mount_blocks=$(df -P $M0 | tail -1 | awk '{ print $2}')
+TEST [ $total_mount_blocks -gt $brick_blocks_two_percent_less -a $total_mount_blocks -lt 200000 ]
+
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+
+UMOUNT_LOOP ${B0}/${V0}{1,2}
+rm -f ${B0}/brick{1,2}
+cleanup;
diff --git a/tests/basic/posix/zero-fill-enospace.c b/tests/basic/posix/zero-fill-enospace.c
new file mode 100644
index 00000000000..b1f142c6be9
--- /dev/null
+++ b/tests/basic/posix/zero-fill-enospace.c
@@ -0,0 +1,67 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+ int ret = 1;
+ off_t size = 0;
+
+ if (argc != 6) {
+ fprintf(stderr,
+ "Syntax: %s <host> <volname> <file-path> <log-file> <size>\n",
+ argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_set_logging(fs, argv[4], 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ fd = glfs_open(fs, argv[3], O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+
+ size = strtol(argv[5], NULL, 10);
+ if (size < 0) {
+ fprintf(stderr, "Wrong size %s", argv[5]);
+ goto out;
+ }
+ ret = glfs_zerofill(fd, 0, size);
+ if (ret <= 0) {
+ fprintf(stderr, "glfs_zerofill: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (fd)
+ glfs_close(fd);
+ glfs_fini(fs);
+ return ret;
+}
diff --git a/tests/basic/posix/zero-fill-enospace.t b/tests/basic/posix/zero-fill-enospace.t
new file mode 100644
index 00000000000..ac2e61b10cf
--- /dev/null
+++ b/tests/basic/posix/zero-fill-enospace.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST truncate -s 100M $B0/brick1
+
+TEST L1=`SETUP_LOOP $B0/brick1`
+TEST MKFS_LOOP $L1
+
+TEST mkdir -p $B0/${V0}1
+
+TEST MOUNT_LOOP $L1 $B0/${V0}1
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+
+TEST $CLI volume start $V0;
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+TEST touch $M0/foo
+TEST build_tester $(dirname $0)/zero-fill-enospace.c -lgfapi -Wall -O2
+TEST ! $(dirname $0)/zero-fill-enospace $H0 $V0 /foo `gluster --print-logdir`/glfs-$V0.log 104857600
+
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+UMOUNT_LOOP ${B0}/${V0}1
+rm -f ${B0}/brick1
+
+cleanup
diff --git a/tests/basic/posixonly.t b/tests/basic/posixonly.t
new file mode 100755
index 00000000000..4844818fcc3
--- /dev/null
+++ b/tests/basic/posixonly.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST mkdir -p $B0/posixonly
+cat > $B0/posixonly.vol <<EOF
+volume poisxonly
+ type storage/posix
+ option directory $B0/posixonly
+end-volume
+EOF
+
+TEST glusterfs -f $B0/posixonly.vol $M0;
+
+TEST touch $M0/filename;
+TEST stat $M0/filename;
+TEST mkdir $M0/dirname;
+TEST stat $M0/dirname;
+TEST touch $M0/dirname/filename;
+TEST stat $M0/dirname/filename;
+TEST ln $M0/dirname/filename $M0/dirname/linkname;
+TEST chown 100:100 $M0/dirname/filename;
+TEST chown 100:100 $M0/dirname;
+TEST rm -rf $M0/filename $M0/dirname;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup;
diff --git a/tests/basic/quick-read-with-upcall.t b/tests/basic/quick-read-with-upcall.t
new file mode 100644
index 00000000000..dfb751dfcdb
--- /dev/null
+++ b/tests/basic/quick-read-with-upcall.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+ #. $(dirname $0)/../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE without selinux:
+TEST glusterfs -s $H0 --volfile-id $V0 --direct-io-mode=enable $M0;
+TEST glusterfs -s $H0 --volfile-id $V0 --direct-io-mode=enable $M1;
+
+D0="test-message0";
+D1="test-message1";
+
+function write_to()
+{
+ local file="$1";
+ local data="$2";
+ echo "$data" > "$file";
+}
+
+
+TEST write_to "$M0/test.txt" "$D0"
+EXPECT "$D0" cat $M0/test.txt
+EXPECT "$D0" cat $M1/test.txt
+
+TEST write_to "$M0/test.txt" "$D1"
+EXPECT "$D1" cat $M0/test.txt
+EXPECT "$D0" cat $M1/test.txt
+
+sleep 1
+
+# TODO: This line normally fails
+EXPECT "$D1" cat $M1/test.txt
+
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 performance.quick-read-cache-timeout 15
+TEST $CLI volume set $V0 performance.md-cache-timeout 15
+
+TEST write_to "$M0/test1.txt" "$D0"
+EXPECT "$D0" cat $M0/test1.txt
+EXPECT "$D0" cat $M1/test1.txt
+
+TEST write_to "$M0/test1.txt" "$D1"
+EXPECT "$D1" cat $M0/test1.txt
+EXPECT "$D0" cat $M1/test1.txt
+
+sleep 1
+EXPECT "$D0" cat $M1/test1.txt
+
+sleep 30
+EXPECT "$D1" cat $M1/test1.txt
+
+TEST $CLI volume set $V0 performance.quick-read-cache-invalidation on
+TEST $CLI volume set $V0 performance.cache-invalidation on
+
+TEST write_to "$M0/test2.txt" "$D0"
+EXPECT "$D0" cat $M0/test2.txt
+EXPECT "$D0" cat $M1/test2.txt
+
+TEST write_to "$M0/test2.txt" "$D1"
+EXPECT "$D1" cat $M0/test2.txt
+EXPECT "$D1" cat $M1/test2.txt
diff --git a/tests/basic/quota-ancestry-building.t b/tests/basic/quota-ancestry-building.t
new file mode 100755
index 00000000000..fcb39ee31f5
--- /dev/null
+++ b/tests/basic/quota-ancestry-building.t
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fileio.rc
+
+cleanup;
+# This tests quota enforcing on an inode without any path information.
+# This should cover anon-fd type of workload as well.
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/quota.c -o $QDD
+
+TESTS_EXPECTED_IN_LOOP=8
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2;
+
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0;
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 1B
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 hard-timeout 0
+
+deep=/0/1/2/3/4/5/6/7/8/9
+TEST mkdir -p $M0/$deep
+
+TEST touch $M0/$deep/file1 $M0/$deep/file2 $M0/$deep/file3 $M0/$deep/file4
+
+TEST fd_open 3 'w' "$M0/$deep/file1"
+TEST fd_open 4 'w' "$M0/$deep/file2"
+TEST fd_open 5 'w' "$M0/$deep/file3"
+TEST fd_open 6 'w' "$M0/$deep/file4"
+
+# consume all quota
+TEST ! $QDD $M0/$deep/file 256 4
+
+# simulate name-less lookups for re-open where the parent information is lost.
+# Stopping and starting the bricks will trigger client re-open which happens on
+# a gfid without any parent information. Since no operations are performed on
+# the fds {3..6} every-xl will be under the impression that they are good fds
+
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+for i in $(seq 3 6); do
+# failing writes indicate that we are enforcing quota set on /
+TEST_IN_LOOP ! fd_write $i "content"
+TEST_IN_LOOP sync
+done
+
+exec 3>&-
+exec 4>&-
+exec 5>&-
+exec 6>&-
+
+TEST $CLI volume stop $V0
+
+rm -f $QDD
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1332020
diff --git a/tests/basic/quota-anon-fd-nfs.t b/tests/basic/quota-anon-fd-nfs.t
new file mode 100755
index 00000000000..9e6675af6ec
--- /dev/null
+++ b/tests/basic/quota-anon-fd-nfs.t
@@ -0,0 +1,117 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+. $(dirname $0)/../fileio.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/quota.c -o $QDD
+
+TESTS_EXPECTED_IN_LOOP=16
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 nfs.disable false
+
+
+# The test makes use of inode-lru-limit to hit a scenario, where we
+# find an inode whose ancestry is not there. Following is the
+# hypothesis (which is confirmed by seeing logs indicating that
+# codepath has been executed, but not through a good understanding of
+# NFS internals).
+
+# At the end of an fop, the reference count of an inode would be
+# zero. The inode (and its ancestry) persists in memory only
+# because of non-zero lookup count. These looked up inodes are put
+# in an lru queue of size 1 (here). So, there can be at most one
+# such inode in memory.
+
+# NFS Server makes use of anonymous fds. So, if it cannot find
+# valid fd, it does a nameless lookup. This gives us an inode
+# whose ancestry is NULL. When a write happens on this inode,
+# quota-enforcer/marker finds a NULL ancestry and asks
+# storage/posix to build it.
+
+TEST $CLI volume set $V0 network.inode-lru-limit 1
+TEST $CLI volume set $V0 performance.nfs.write-behind off
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 1
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 hard-timeout 0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 noac,soft,nolock,vers=3;
+deep=/0/1/2/3/4/5/6/7/8/9
+TEST mkdir -p $N0/$deep
+
+TEST touch $N0/$deep/file1 $N0/$deep/file2 $N0/$deep/file3 $N0/$deep/file4
+
+TEST fd_open 3 'w' "$N0/$deep/file1"
+TEST fd_open 4 'w' "$N0/$deep/file2"
+TEST fd_open 5 'w' "$N0/$deep/file3"
+TEST fd_open 6 'w' "$N0/$deep/file4"
+
+# consume all quota
+echo "Hello" > $N0/$deep/new_file_1
+echo "World" >> $N0/$deep/new_file_1
+echo 1 >> $N0/$deep/new_file_1
+echo 2 >> $N0/$deep/new_file_1
+
+# Try to create a 1M file which should fail
+TEST ! $QDD $N0/$deep/new_file_2 256 4
+
+
+# At the end of each fop in server, reference count of the
+# inode associated with each of the file above drops to zero and hence
+# put into lru queue. Since lru-limit is set to 1, an fop next file
+# will displace the current inode from itable. This will ensure that
+# when writes happens on same fd, fd resolution results in
+# nameless lookup from server and quota_writev encounters an fd
+# associated with an inode whose parent is not present in itable.
+
+for j in $(seq 1 2); do
+ for i in $(seq 3 6); do
+ # failing writes indicate that we are enforcing quota set on /
+ # even with anonymous fds.
+ TEST_IN_LOOP ! fd_write $i "content"
+ TEST_IN_LOOP sync
+ done
+done
+
+exec 3>&-
+exec 4>&-
+exec 5>&-
+exec 6>&-
+
+$CLI volume statedump $V0 all
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# This is ugly, but there seems to be a latent race between other actions and
+# stopping the volume. The visible symptom is that "umount -l" (run from
+# gf_umount_lazy in glusterd) hangs. This happens pretty consistently with the
+# new mem-pool code, though it's not really anything to do with memory pools -
+# just with changed timing. Adding the sleep here makes it work consistently.
+#
+# If anyone else wants to debug the race condition, feel free.
+sleep 3
+
+TEST $CLI volume stop $V0
+
+rm -f $QDD
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/basic/quota-nfs.t b/tests/basic/quota-nfs.t
new file mode 100755
index 00000000000..de94a950a7f
--- /dev/null
+++ b/tests/basic/quota-nfs.t
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/quota.c -o $QDD
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 nfs.disable false
+
+TEST $CLI volume set $V0 network.inode-lru-limit 1
+TEST $CLI volume set $V0 performance.nfs.write-behind off
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+TEST mount_nfs $H0:/$V0 $N0
+deep=/0/1/2/3/4/5/6/7/8/9
+TEST mkdir -p $N0/$deep
+
+TEST $QDD $N0/$deep/file 256 40
+
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 20MB
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 hard-timeout 0
+
+TEST $QDD $N0/$deep/newfile_1 256 20
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/$deep/file
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/$deep/newfile_1
+
+#Unmount and mount to flush the data
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+TEST mount_nfs $H0:/$V0 $N0
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/$deep/file
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/$deep/newfile_1
+
+# wait for write behind to complete.
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "15.0MB" quotausage "/"
+
+# Try to create a 100Mb file which should fail
+TEST ! $QDD $N0/$deep/newfile_2 256 400
+TEST rm -f $N0/$deep/newfile_2
+
+## Before killing daemon to avoid deadlocks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+TEST $CLI volume stop $V0
+
+rm -f $QDD
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/basic/quota-rename.t b/tests/basic/quota-rename.t
new file mode 100644
index 00000000000..37438689d9f
--- /dev/null
+++ b/tests/basic/quota-rename.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+# This regression test tries to ensure renaming a directory with content, and
+# no limit set, is accounted properly, when moved into a directory with quota
+# limit set.
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume start $V0;
+
+TEST $CLI volume quota $V0 enable;
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST $CLI volume quota $V0 hard-timeout 0
+TEST $CLI volume quota $V0 soft-timeout 0
+
+TEST mkdir -p $M0/dir/dir1
+TEST $CLI volume quota $V0 limit-objects /dir 20
+
+TEST mkdir $M0/dir/dir1/d{1..5}
+TEST touch $M0/dir/dir1/f{1..5}
+TEST mv $M0/dir/dir1 $M0/dir/dir2
+
+#Number of files under /dir is 5
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "5" quota_object_list_field "/dir" 4
+
+#Number of directories under /dir is 7
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "7" quota_object_list_field "/dir" 5
+
+cleanup;
diff --git a/tests/basic/quota.c b/tests/basic/quota.c
new file mode 100644
index 00000000000..809ceb8e54c
--- /dev/null
+++ b/tests/basic/quota.c
@@ -0,0 +1,89 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+ssize_t
+nwrite(int fd, const void *buf, size_t count)
+{
+ ssize_t ret = 0;
+ ssize_t written = 0;
+
+ for (written = 0; written != count; written += ret) {
+ ret = write(fd, buf + written, count - written);
+ if (ret < 0) {
+ if (errno == EINTR)
+ ret = 0;
+ else
+ goto out;
+ }
+ }
+
+ ret = written;
+out:
+ return ret;
+}
+
+int
+file_write(char *filename, int bs, int count)
+{
+ int fd = 0;
+ int ret = -1;
+ int i = 0;
+ char *buf = NULL;
+
+ bs = bs * 1024;
+
+ buf = (char *)malloc(bs);
+ if (buf == NULL)
+ goto out;
+
+ memset(buf, 0, bs);
+
+ fd = open(filename, O_RDWR | O_CREAT | O_SYNC, 0600);
+ while (i < count) {
+ ret = nwrite(fd, buf, bs);
+ if (ret == -1) {
+ close(fd);
+ goto out;
+ }
+ i++;
+ }
+
+ ret = fdatasync(fd);
+ if (ret) {
+ close(fd);
+ goto out;
+ }
+
+ ret = close(fd);
+ if (ret)
+ goto out;
+
+ ret = 0;
+
+out:
+ if (buf)
+ free(buf);
+ return ret;
+}
+
+int
+main(int argc, char **argv)
+{
+ if (argc != 4) {
+ printf("Usage: %s <filename> <block size in k> <count>\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ if (file_write(argv[1], atoi(argv[2]), atoi(argv[3])) < 0) {
+ perror("write failed");
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tests/basic/quota.t b/tests/basic/quota.t
new file mode 100755
index 00000000000..46d1bafff84
--- /dev/null
+++ b/tests/basic/quota.t
@@ -0,0 +1,236 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+. $(dirname $0)/../dht.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/quota.c -o $QDD
+
+TESTS_EXPECTED_IN_LOOP=19
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '4' brick_count $V0
+
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir -p $M0/test_dir/in_test_dir
+
+## --------------------------------------------------------------------------
+## Verify quota commands and check if quota-deem-statfs is enabled by default
+## --------------------------------------------------------------------------
+TEST $CLI volume quota $V0 enable
+EXPECT 'on' volinfo_field $V0 'features.quota'
+EXPECT 'on' volinfo_field $V0 'features.inode-quota'
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+
+TEST $CLI volume quota $V0 limit-usage /test_dir 100MB
+
+TEST $CLI volume quota $V0 limit-usage /test_dir/in_test_dir 150MB
+
+EXPECT "150.0MB" quota_hard_limit "/test_dir/in_test_dir";
+EXPECT "80%" quota_soft_limit "/test_dir/in_test_dir";
+
+TEST $CLI volume quota $V0 remove /test_dir/in_test_dir
+
+EXPECT "100.0MB" quota_hard_limit "/test_dir";
+
+TEST $CLI volume quota $V0 limit-usage /test_dir 10MB
+EXPECT "10.0MB" quota_hard_limit "/test_dir";
+EXPECT "80%" quota_soft_limit "/test_dir";
+
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 hard-timeout 0
+
+## ------------------------------
+## Verify quota enforcement
+## -----------------------------
+
+# Try to create a 12MB file which should fail
+TEST ! $QDD $M0/test_dir/1.txt 256 48
+TEST rm $M0/test_dir/1.txt
+
+# wait for marker's accounting to complete
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "0Bytes" quotausage "/test_dir"
+
+TEST $QDD $M0/test_dir/2.txt 256 32
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "8.0MB" quotausage "/test_dir"
+
+# Checking internal xattr
+# This confirms that pgfid is also filtered
+TEST ! "getfattr -d -e hex -m . $M0/test_dir/2.txt | grep pgfid ";
+# just check for quota xattr are visible or not
+TEST ! "getfattr -d -e hex -m . $M0/test_dir | grep quota";
+
+# setfattr should fail
+TEST ! setfattr -n trusted.glusterfs.quota.limit-set -v 10 $M0/test_dir;
+
+# remove xattr should fail
+TEST ! setfattr -x trusted.glusterfs.quota.limit-set $M0/test_dir;
+
+TEST rm $M0/test_dir/2.txt
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "0Bytes" quotausage "/test_dir"
+
+## rename tests
+TEST $QDD $M0/test_dir/2 256 32
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "8.0MB" quotausage "/test_dir"
+TEST mv $M0/test_dir/2 $M0/test_dir/0
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "8.0MB" quotausage "/test_dir"
+TEST rm $M0/test_dir/0
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "0Bytes" quotausage "/test_dir"
+
+## rename tests under different directories
+TEST mkdir -p $M0/1/2;
+TEST $CLI volume quota $V0 limit-usage /1/2 100MB 70%;
+
+# The corresponding write(3) should fail with EDQUOT ("Disk quota exceeded")
+TEST ! $QDD $M0/1/2/file 256 408
+
+TEST mkdir -p $M0/1/3;
+TEST $QDD $M0/1/3/file 256 408
+
+#The corresponding rename(3) should fail with EDQUOT ("Disk quota exceeded")
+TEST ! mv $M0/1/3/ $M0/1/2/3_mvd;
+
+## ---------------------------
+
+## ------------------------------
+## Check if presence of nfs mount results in ESTALE errors for I/O
+# on a fuse mount. Note: Quota command internally uses a fuse mount,
+# though this may change.
+## -----------------------------
+
+##Wait for connection establishment between nfs server and brick process
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+TEST $CLI volume quota $V0 limit-usage /test_dir 100MB
+
+TEST $CLI volume quota $V0 limit-usage /test_dir/in_test_dir 150MB
+
+EXPECT "150.0MB" quota_hard_limit "/test_dir/in_test_dir";
+## -----------------------------
+
+
+###################################################
+## ------------------------------------------------
+## <Test quota functionality in add-brick scenarios>
+## ------------------------------------------------
+###################################################
+QUOTALIMIT=100
+QUOTALIMITROOT=2048
+TESTDIR="addbricktest"
+
+rm -rf $M0/*;
+
+## <Create directories and test>
+## -----------------------------
+# 41-42
+TEST mkdir $M0/$TESTDIR
+TEST mkdir $M0/$TESTDIR/dir{1..10};
+
+
+# 43-52
+## <set limits>
+## -----------------------------
+TEST $CLI volume quota $V0 limit-usage / "$QUOTALIMITROOT"MB;
+for i in {1..10}; do
+ TEST_IN_LOOP $CLI volume quota $V0 limit-usage /$TESTDIR/dir$i \
+ "$QUOTALIMIT"MB;
+done
+## </Enable quota and set limits>
+
+#53-62
+for i in `seq 1 9`; do
+ TEST_IN_LOOP $QDD "$M0/$TESTDIR/dir1/10MBfile$i" 256 40
+done
+
+# 63-64
+## <Add brick and start rebalance>
+## -------------------------------
+TEST $CLI volume add-brick $V0 $H0:$B0/brick{3,4}
+TEST $CLI volume rebalance $V0 start;
+
+## Wait for rebalance
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
+
+## <Try creating data beyond limit>
+## --------------------------------
+for i in `seq 1 200`; do
+ $QDD of="$M0/$TESTDIR/dir1/1MBfile$i" 256 4\
+ 2>&1 | egrep -v '(No space left|Disc quota exceeded)'
+done
+
+# 65
+## <Test whether quota limit crossed more than 10% of limit>
+## ---------------------------------------------------------
+USED_KB=`du -ks $M0/$TESTDIR/dir1 | cut -f1`;
+USED_MB=$(($USED_KB/1024));
+TEST [ $USED_MB -le $((($QUOTALIMIT * 110) / 100)) ]
+
+# 66-67
+## <Test the xattrs healed to new brick>
+## -------------------------------------
+TEST getfattr -d -m "trusted.glusterfs.quota.limit-set" -e hex \
+ --absolute-names $B0/brick{3,4}/$TESTDIR/dir{1..10};
+# Test on root.
+TEST getfattr -d -m "trusted.glusterfs.quota.limit-set" -e hex \
+ --absolute-names $B0/brick{3,4};
+
+## -------------------------------------------------
+## </Test quota functionality in add-brick scenarios>
+## -------------------------------------------------
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+## ---------------------------
+## Test quota volume options
+## ---------------------------
+TEST $CLI volume reset $V0
+EXPECT 'on' volinfo_field $V0 'features.quota'
+EXPECT 'on' volinfo_field $V0 'features.inode-quota'
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume reset $V0 force
+EXPECT 'on' volinfo_field $V0 'features.quota'
+EXPECT 'on' volinfo_field $V0 'features.inode-quota'
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume reset $V0 features.quota-deem-statfs
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume set $V0 features.quota-deem-statfs off
+EXPECT 'off' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume set $V0 features.quota-deem-statfs on
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume quota $V0 disable
+EXPECT 'off' volinfo_field $V0 'features.quota'
+EXPECT 'off' volinfo_field $V0 'features.inode-quota'
+EXPECT '' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume stop $V0;
+
+rm -f $QDD
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1332045
diff --git a/tests/basic/quota_aux_mount.t b/tests/basic/quota_aux_mount.t
new file mode 100755
index 00000000000..78d7f47e373
--- /dev/null
+++ b/tests/basic/quota_aux_mount.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+##-------------------------------------------------------------
+## Tests to verify that aux mount is unmounted after each quota
+## command executes.
+##-------------------------------------------------------------
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '4' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir -p $M0/test_dir/
+
+TEST $CLI volume quota $V0 enable
+EXPECT 'on' volinfo_field $V0 'features.quota'
+EXPECT 'on' volinfo_field $V0 'features.inode-quota'
+
+TEST $CLI volume quota $V0 limit-usage /test_dir 150MB
+EXPECT "1" get_limit_aux
+TEST $CLI volume quota $V0 limit-objects /test_dir 10
+EXPECT "1" get_limit_aux
+EXPECT "150.0MB" quota_hard_limit "/test_dir";
+EXPECT "1" get_list_aux
+EXPECT "10" quota_object_hard_limit "/test_dir";
+EXPECT "1" get_list_aux
+
+TEST $CLI volume quota $V0 remove /test_dir/
+EXPECT "1" get_limit_aux
+TEST $CLI volume quota $V0 remove-objects /test_dir
+EXPECT "1" get_limit_aux
+
+TEST $CLI volume quota $V0 disable
+
+TEST $CLI volume stop $V0;
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1447344
diff --git a/extras/rpc-coverage.sh b/tests/basic/rpc-coverage.sh
index 9148a73e241..6203f0ac7cb 100755
--- a/extras/rpc-coverage.sh
+++ b/tests/basic/rpc-coverage.sh
@@ -47,8 +47,11 @@
#set -e;
set -o pipefail;
+# pull compatibility functions (e.g.: stat replacement if not running Linux)
+. $(dirname $0)/../include.rc
+
function fail() {
- echo "$*: failed.";
+ echo "$*: failed.";
exit 1;
}
@@ -71,8 +74,8 @@ function test_statfs()
{
local size;
- size=$(stat -f -c '%s' $PFX/dir/file);
- test "x$size" != "x0" || fail "statfs"
+ mode=$(stat -c '%a' $PFX/dir/file);
+ test "x$mode" == "x644" || fail "statfs"
}
@@ -85,8 +88,8 @@ function test_open()
function test_write()
{
- dd if=/dev/zero of=$PFX/dir/file bs=65536 count=16 2>/dev/null;
- test $(stat -c '%s' $PFX/dir/file) == 1048576 || fail "open"
+ dd if=/dev/zero of=$PFX/dir/file bs=65536 count=16
+ test $(stat -c '%s' $PFX/dir/file) == 1048576 || fail "write"
}
@@ -114,7 +117,9 @@ function test_fstat()
local msg;
export PFX;
- msg=$(sh -c 'tail -f $PFX/dir/file --pid=$$ & sleep 1 && echo hooha > $PFX/dir/file && sleep 1');
+ echo hooha > $PFX/dir/file
+ sleep 1
+ msg=$(sh -c 'tail $PFX/dir/file')
test "x$msg" == "xhooha" || fail "fstat"
}
@@ -139,7 +144,8 @@ function test_symlink()
{
local msg;
- ln -s $PFX/dir/file $PFX/dir/symlink;
+ ( cd $PFX/dir && ln -s file symlink; )
+
test "$(stat -c '%F' $PFX/dir/symlink)" == "symbolic link" || fail "Creation of symlink"
msg=$(cat $PFX/dir/symlink);
@@ -352,21 +358,21 @@ function test_utimes()
function test_locks()
{
- exec 200>$PFX/dir/lockfile || fail "exec"
+ exec 100>$PFX/dir/lockfile || fail "exec"
## exclusive locks test
- flock -e 200 || fail "flock -e"
- ! flock -n -e $PFX/dir/lockfile -c true || fail "! flock -n -e"
+ flock -x 100 || fail "flock -x"
+ ! flock -n -x $PFX/dir/lockfile -c true || fail "! flock -n -x"
! flock -n -s $PFX/dir/lockfile -c true || fail "! flock -n -s"
- flock -u 200 || fail "flock -u"
+ flock -u 100 || fail "flock -u"
## shared locks test
- flock -s 200 || fail "flock -s"
- ! flock -n -e $PFX/dir/lockfile -c true || fail "! flock -n -e"
+ flock -s 100 || fail "flock -s"
+ ! flock -n -x $PFX/dir/lockfile -c true || fail "! flock -n -x"
flock -n -s $PFX/dir/lockfile -c true || fail "! flock -n -s"
- flock -u 200 || fail "flock -u"
+ flock -u 100 || fail "flock -u"
- exec 200>&- || fail "exec"
+ exec 100>&- || fail "exec"
}
@@ -413,9 +419,15 @@ function test_rmdir()
rm -rf $PFX || fail "rm -rf"
}
+function test_statvfs()
+{
+ df $DIR 2>&1 || fail "df"
+}
+
function run_tests()
{
+ test_statvfs;
test_mkdir;
test_create;
test_statfs;
@@ -430,13 +442,15 @@ function run_tests()
test_rename;
test_chmod;
test_chown;
- test_utimes;
- test_locks;
test_readdir;
test_setxattr;
test_listxattr;
test_getxattr;
test_removexattr;
+ if [ "$run_lock_tests" = "1" ]; then
+ test_locks;
+ fi
+ test_utimes;
test_unlink;
test_rmdir;
}
@@ -447,14 +461,19 @@ function _init()
DIR=$(pwd);
}
-
+run_lock_tests=1
function parse_cmdline()
{
if [ "x$1" == "x" ] ; then
- echo "Usage: $0 /path/mount"
+ echo "Usage: $0 [--no-locks] /path/mount"
exit 1
fi
+ if [ "$1" == "--no-locks" ] ; then
+ run_lock_tests=0
+ shift
+ fi
+
DIR=$1;
if [ ! -d "$DIR" ] ; then
diff --git a/tests/basic/rpc-coverage.t b/tests/basic/rpc-coverage.t
new file mode 100755
index 00000000000..2c1bcd5a63a
--- /dev/null
+++ b/tests/basic/rpc-coverage.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6,7,8,9};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '9' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+TEST $(dirname $0)/rpc-coverage.sh $M1
+cleanup;
diff --git a/tests/basic/sdfs-sanity.t b/tests/basic/sdfs-sanity.t
new file mode 100644
index 00000000000..16d0bed866f
--- /dev/null
+++ b/tests/basic/sdfs-sanity.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6,7,8,9};
+TEST $CLI volume set $V0 features.sdfs enable;
+TEST $CLI volume start $V0;
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+# This test covers lookup, mkdir, mknod, symlink, link, rename,
+# create operations
+TEST $(dirname $0)/rpc-coverage.sh $M1
+
+TEST cp $(dirname ${0})/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+cleanup;
diff --git a/tests/basic/seek.c b/tests/basic/seek.c
new file mode 100644
index 00000000000..54fa6f463af
--- /dev/null
+++ b/tests/basic/seek.c
@@ -0,0 +1,182 @@
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <errno.h>
+
+static char buffer[65536];
+
+static int
+parse_int(const char *text, size_t *value)
+{
+ char *ptr;
+ size_t val;
+
+ val = strtoul(text, &ptr, 0);
+ if (*ptr != 0) {
+ return 0;
+ }
+
+ *value = val;
+
+ return 1;
+}
+
+static int
+fill_area(int fd, off_t offset, size_t size)
+{
+ size_t len;
+ ssize_t res;
+
+ while (size > 0) {
+ len = sizeof(buffer);
+ if (len > size) {
+ len = size;
+ }
+ res = pwrite(fd, buffer, len, offset);
+ if (res < 0) {
+ fprintf(stderr, "pwrite(%d, %p, %lu, %lu) failed: %d\n", fd, buffer,
+ size, offset, errno);
+ return 0;
+ }
+ if (res != len) {
+ fprintf(stderr,
+ "pwrite(%d, %p, %lu, %lu) didn't wrote all "
+ "data: %lu/%lu\n",
+ fd, buffer, size, offset, res, len);
+ return 0;
+ }
+ offset += len;
+ size -= len;
+ }
+
+ return 1;
+}
+
+static void
+syntax(void)
+{
+ fprintf(stderr, "Syntax: seek create <path> <offset> <size> [...]\n");
+ fprintf(stderr, " seek scan <path> data|hole <offset>\n");
+}
+
+static int
+seek_create(const char *path, int argc, char *argv[])
+{
+ size_t off, size;
+ int fd;
+ int ret = 1;
+
+ fd = open(path, O_CREAT | O_TRUNC | O_RDWR, 0644);
+ if (fd < 0) {
+ fprintf(stderr, "Failed to create the file\n");
+ goto out;
+ }
+
+ while (argc > 0) {
+ if (!parse_int(argv[0], &off) || !parse_int(argv[1], &size)) {
+ syntax();
+ goto out_close;
+ }
+ if (!fill_area(fd, off, size)) {
+ goto out_close;
+ }
+ argv += 2;
+ argc -= 2;
+ }
+
+ ret = 0;
+
+out_close:
+ close(fd);
+out:
+ return ret;
+}
+
+static int
+seek_scan(const char *path, const char *type, const char *pos)
+{
+ size_t off, res;
+ int fd, whence;
+ int ret = 1;
+
+ if (strcmp(type, "data") == 0) {
+ whence = SEEK_DATA;
+ } else if (strcmp(type, "hole") == 0) {
+ whence = SEEK_HOLE;
+ } else {
+ syntax();
+ goto out;
+ }
+
+ if (!parse_int(pos, &off)) {
+ syntax();
+ goto out;
+ }
+
+ fd = open(path, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "Failed to open the file\n");
+ goto out;
+ }
+
+ res = lseek(fd, off, whence);
+ if (res == (off_t)-1) {
+ if (errno != ENXIO) {
+ fprintf(stderr, "seek(%d, %lu, %d) failed: %d\n", fd, off, whence,
+ errno);
+ goto out_close;
+ }
+ fprintf(stdout, "ENXIO\n");
+ } else {
+ fprintf(stdout, "%lu\n", res);
+ }
+
+ ret = 0;
+
+out_close:
+ close(fd);
+out:
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 1;
+
+ memset(buffer, 0x55, sizeof(buffer));
+
+ if (argc < 3) {
+ syntax();
+ goto out;
+ }
+
+ if (strcmp(argv[1], "create") == 0) {
+ if (((argc - 3) & 1) != 0) {
+ syntax();
+ goto out;
+ }
+ ret = seek_create(argv[2], argc - 3, argv + 3);
+ } else if (strcmp(argv[1], "scan") == 0) {
+ if (argc != 5) {
+ syntax();
+ goto out;
+ }
+ ret = seek_scan(argv[2], argv[3], argv[4]);
+ } else {
+ syntax();
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
diff --git a/tests/basic/shd-mux-afr.t b/tests/basic/shd-mux-afr.t
new file mode 100644
index 00000000000..cf300c148bb
--- /dev/null
+++ b/tests/basic/shd-mux-afr.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+shd_pid=$(get_shd_mux_pid $V0)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#Create a one more volume
+TEST $CLI volume create ${V0}_1 replica 3 $H0:$B0/${V0}_1{0,1,2,3,4,5}
+TEST $CLI volume start ${V0}_1
+
+#Check whether the shd has multiplexed or not
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}
+
+TEST $CLI volume set ${V0}_1 cluster.background-self-heal-count 0
+TEST $CLI volume set ${V0}_1 cluster.eager-lock off
+TEST $CLI volume set ${V0}_1 performance.flush-behind off
+TEST $GFS --volfile-id=/${V0}_1 --volfile-server=$H0 $M1
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST kill_brick ${V0}_1 $H0 $B0/${V0}_10
+TEST kill_brick ${V0}_1 $H0 $B0/${V0}_14
+
+TEST touch $M0/foo{1..100}
+TEST touch $M1/foo{1..100}
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count ${V0}_1
+
+TEST $CLI volume start ${V0} force
+TEST $CLI volume start ${V0}_1 force
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}_1
+
+TEST rm -rf $M0/*
+TEST rm -rf $M1/*
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+#Stop the volume
+TEST $CLI volume stop ${V0}_1
+TEST $CLI volume delete ${V0}_1
+
+#Check the stop succeeded and detached the volume with out restarting it
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0
+
+#Check the thread count become to earlier number after stopping
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $CLI volume stop ${V0}
+TEST $CLI volume delete ${V0}
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+cleanup
diff --git a/tests/basic/shd-mux-ec.t b/tests/basic/shd-mux-ec.t
new file mode 100644
index 00000000000..ef4d65018d3
--- /dev/null
+++ b/tests/basic/shd-mux-ec.t
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+shd_pid=$(get_shd_mux_pid $V0)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#Now create a ec volume and check mux works
+TEST $CLI volume create ${V0}_2 disperse 6 redundancy 2 $H0:$B0/${V0}_2{0,1,2,3,4,5}
+TEST $CLI volume start ${V0}_2
+
+#Check whether the shd has multiplexed or not
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}_2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid ${V0}
+
+TEST $CLI volume set ${V0}_2 cluster.background-self-heal-count 0
+TEST $CLI volume set ${V0}_2 cluster.eager-lock off
+TEST $CLI volume set ${V0}_2 performance.flush-behind off
+TEST $GFS --volfile-id=/${V0}_2 --volfile-server=$H0 $M1
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST kill_brick ${V0}_2 $H0 $B0/${V0}_20
+TEST kill_brick ${V0}_2 $H0 $B0/${V0}_22
+
+TEST touch $M0/foo{1..100}
+TEST touch $M1/foo{1..100}
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^404$" get_pending_heal_count ${V0}_2
+
+TEST $CLI volume start ${V0} force
+TEST $CLI volume start ${V0}_2 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count ${V0}_2
+
+TEST rm -rf $M0/*
+TEST rm -rf $M1/*
+
+
+#Stop the volume
+TEST $CLI volume stop ${V0}_2
+TEST $CLI volume delete ${V0}_2
+
+#Check the stop succeeded and detached the volume with out restarting it
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^${shd_pid}$" get_shd_mux_pid $V0
+
+#Check the thread count become to zero for ec related threads
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to earlier number after stopping
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $CLI volume stop ${V0}
+TEST $CLI volume delete ${V0}
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+
+cleanup
diff --git a/tests/basic/stats-dump.t b/tests/basic/stats-dump.t
new file mode 100644
index 00000000000..ed73fd1d14a
--- /dev/null
+++ b/tests/basic/stats-dump.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 diagnostics.latency-measurement on
+TEST $CLI volume set $V0 diagnostics.count-fop-hits on
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 1
+TEST $CLI volume set $V0 performance.nfs.io-threads on
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST mount_nfs $H0:/$V0 $N0 nolock,soft,intr
+
+for i in {1..10};do
+ dd if=/dev/zero of=$M0/fuse_testfile$i bs=4k count=100
+done
+
+for i in {1..10};do
+ dd if=/dev/zero of=$N0/nfs_testfile$i bs=4k count=100
+done
+
+# Wait for one dump interval to be done, some seconds past 1 that is the dump
+# interval set
+sleep 2
+
+# Change the dump interval to 0, so that when reading the file contents we
+# do not get them truncated by the next interval that is overwriting the latest
+# stats data
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 0
+
+# Verify we have non-zero write counts from the bricks, gNFSd
+# and the FUSE mount.
+TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfs_nfsd.dump|tail -1|cut -d: -f2) != "0," ]
+TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfs_patchy.dump|tail -1|cut -d: -f2) != "0," ]
+TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy0.dump|tail -1|cut -d: -f2) != "0," ]
+TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy1.dump|tail -1|cut -d: -f2) != "0," ]
+TEST [ $(grep 'aggr.fop.write.count' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy2.dump|tail -1|cut -d: -f2) != "0," ]
+
+# Test that io-stats is getting queue sizes from io-threads
+TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfs_nfsd.dump
+TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy0.dump
+TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy1.dump
+TEST grep '.queue_size' ${GLUSTERD_WORKDIR}/stats/glusterfsd__d_backends_patchy2.dump
+
+cleanup;
diff --git a/tests/basic/symbol-check.sh b/tests/basic/symbol-check.sh
new file mode 100755
index 00000000000..0f8243ca731
--- /dev/null
+++ b/tests/basic/symbol-check.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+syscalls=$'access\nchmod\nchown\nclose\nclosedir\ncreat64\n\
+fallocate64\nfchmod\nfchown\nfdatasync\nfgetxattr\nflistxattr\n\
+fremovexattr\nfsetxattr\nfsync\nftruncate64\n__fxstat64\n\
+__fxstatat64\nlchown\nlgetxattr\nlinkat\nllistxattr\nlremovexattr\n\
+lseek64\nlsetxattr\n__lxstat64\nmkdir\nmkdirat\nopenat64\nopendir\n\
+pread64\npwrite64\npreadv64\npwritev64\nread\nreaddir64\nreadlink\n\
+readv\nrename\nrmdir\n statvfs64\nsymlink\n\truncate64\nunlink\n\
+utimeswrite\nwritev\n\__xmknod\n__xstat64'
+
+syscalls32=$'creat\nfallocate\nftruncate\n__fxstat\n__fxstatat\n\
+lseek\n__lxstat\nopenat\nreaddir\nstatvfs\ntruncate\nstat\n\
+preadv\npwritev\npread\npwrite'
+
+glibccalls=$'tmpfile'
+
+exclude_files=$'/libglusterfs/src/.libs/libglusterfs_la-syscall.o\n\
+/libglusterfs/src/.libs/libglusterfs_la-gen_uuid.o\n\
+/contrib/fuse-util/fusermount.o\n\
+/contrib/fuse-util/mount_util.o\n\
+/contrib/fuse-util/mount-common.o\n\
+/xlators/mount/fuse/src/.libs/mount.o\n\
+/xlators/mount/fuse/src/.libs/mount-common.o\n\
+/xlators/features/qemu-block/src/.libs/block.o\n\
+/xlators/features/qemu-block/src/.libs/cutils.o\n\
+/xlators/features/qemu-block/src/.libs/oslib-posix.o'
+
+function main()
+{
+ for exclude_file in ${exclude_files}; do
+ if [[ ${1} = *${exclude_file} ]]; then
+ exit 0
+ fi
+ done
+
+ local retval=0
+ local t
+ t=$(nm "${1}" | grep " U " | sed -e "s/ //g" -e "s/ U //g")
+
+ for symy in ${t}; do
+
+ for symx in ${syscalls}; do
+
+ if [[ ${symx} = "${symy}" ]]; then
+
+ case ${symx} in
+ "creat64") sym="creat";;
+ "fallocate64") sym="fallocate";;
+ "ftruncate64") sym="ftruncate";;
+ "lseek64") sym="lseek";;
+ "preadv64") sym="preadv";;
+ "pwritev64") sym="pwritev";;
+ "pread64") sym="pread";;
+ "pwrite64") sym="pwrite";;
+ "openat64") sym="openat";;
+ "readdir64") sym="readdir";;
+ "truncate64") sym="truncate";;
+ "__statvfs64") sym="statvfs";;
+ "__fxstat64") sym="fstat";;
+ "__fxstatat64") sym="fstatat";;
+ "__lxstat64") sym="lstat";;
+ "__xmknod") sym="mknod";;
+ "__xstat64") sym="stat";;
+ *) sym=${symx};;
+ esac
+
+ echo "${1} should call sys_${sym}, not ${sym}" >&2
+ retval=1
+ fi
+
+ done
+
+ for symx in ${syscalls32}; do
+
+ if [[ ${symx} = "${symy}" ]]; then
+
+ echo "${1} was not compiled with -D_FILE_OFFSET_BITS=64" >&2
+ retval=1
+ fi
+ done
+
+ symy_glibc=$(echo "${symy}" | sed -e "s/@@GLIBC.*//g")
+ # Eliminate false positives, check if we have a GLIBC symbol in 'y'
+ if [[ ${symy} != "${symy_glibc}" ]]; then
+ for symx in ${glibccalls}; do
+
+ if [[ ${symx} = "${symy_glibc}" ]]; then
+
+ case ${symx} in
+ "tmpfile") alt="mkstemp";;
+ *) alt="none";;
+ esac
+
+ if [[ ${alt} = "none" ]]; then
+ echo "${1} should not call ${symy_glibc}";
+ else
+ echo "${1} should use ${alt} instead of ${symy_glibc}" >&2;
+ fi
+
+ retval=1
+ fi
+ done
+ fi
+
+ done
+
+ if [ ${retval} = 1 ]; then
+ touch ./.symbol-check-errors
+ fi
+ exit ${retval}
+}
+
+main "$@"
diff --git a/tests/basic/trace.t b/tests/basic/trace.t
new file mode 100755
index 00000000000..01e7c9e0a25
--- /dev/null
+++ b/tests/basic/trace.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST mkdir -p $B0/single-brick
+cat > $B0/template.vol <<EOF
+volume posix
+ type storage/posix
+ option directory $B0/single-brick
+end-volume
+
+volume trace
+ type debug/trace
+ option log-file yes
+ option log-history yes
+ subvolumes posix
+end-volume
+EOF
+
+TEST glusterfs -f $B0/template.vol $M0
+
+TEST $(dirname $0)/rpc-coverage.sh --no-locks $M0
+
+# Take statedump to get maximum code coverage
+pid=$(ps auxww | grep glusterfs | grep -E "template.vol" | awk '{print $2}' | head -1)
+
+TEST generate_statedump $pid
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Now, use the glusterd way of enabling trace
+TEST glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+TEST $CLI volume set $V0 debug.trace marker
+TEST $CLI volume set $V0 debug.log-file yes
+#TEST $CLI volume set $V0 debug.log-history yes
+
+TEST $CLI volume start $V0;
+
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+TEST $(dirname $0)/rpc-coverage.sh --no-locks $M1
+cp $(dirname ${0})/gfapi/glfsxmp-coverage.c ./glfsxmp.c
+build_tester ./glfsxmp.c -lgfapi
+./glfsxmp $V0 $H0 > /dev/null
+cleanup_tester ./glfsxmp
+rm ./glfsxmp.c
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+cleanup;
diff --git a/tests/basic/uss.t b/tests/basic/uss.t
new file mode 100644
index 00000000000..09dd00ef995
--- /dev/null
+++ b/tests/basic/uss.t
@@ -0,0 +1,421 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../snapshot.rc
+. $(dirname $0)/../fileio.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+function check_readonly()
+{
+ $@ 2>&1 | grep -q 'Read-only file system'
+ return $?
+}
+
+function lookup()
+{
+ ls $1
+ if [ "$?" == "0" ]
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+cleanup;
+TESTS_EXPECTED_IN_LOOP=10
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+
+TEST $CLI volume set $V0 nfs.disable false
+
+
+TEST $CLI volume start $V0;
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+for i in {1..10} ; do echo "file" > $M0/file$i ; done
+
+# Create file and hard-links
+TEST touch $M0/f1
+TEST mkdir $M0/dir
+TEST ln $M0/f1 $M0/f2
+TEST ln $M0/f1 $M0/dir/f3
+
+TEST $CLI snapshot config activate-on-create enable
+TEST $CLI volume set $V0 features.uss enable;
+
+TEST ! $CLI snapshot create snap1 $V0 no-timestamp description "";
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "file" > $M0/file$i ; done
+
+TEST $CLI snapshot create snap2 $V0 no-timestamp;
+
+########### Test inode numbers ###########
+s1_f1_ino=$(STAT_INO $M0/.snaps/snap1/f1)
+TEST [ $s1_f1_ino != 0 ]
+
+# Inode number of f1 should be same as f2 f3 within snapshot
+EXPECT $s1_f1_ino STAT_INO $M0/.snaps/snap1/f2
+EXPECT $s1_f1_ino STAT_INO $M0/.snaps/snap1/dir/f3
+EXPECT $s1_f1_ino STAT_INO $M0/dir/.snaps/snap1/f3
+
+# Inode number of f1 in snap1 should be different from f1 in snap2
+tmp_ino=$(STAT_INO $M0/.snaps/snap2/f1)
+TEST [ $s1_f1_ino != $tmp_ino ]
+
+# Inode number of f1 in snap1 should be different from f1 in regular volume
+tmp_ino=$(STAT_INO $M0/f1)
+TEST [ $s1_f1_ino != $tmp_ino ]
+
+# Directory inode of snap1 should be different in each sub-dir
+s1_ino=$(STAT_INO $M0/.snaps/snap1)
+tmp_ino=$(STAT_INO $M0/dir/.snaps/snap1)
+TEST [ $s1_ino != $tmp_ino ]
+##########################################
+
+mkdir $M0/dir1;
+mkdir $M0/dir2;
+
+for i in {1..10} ; do echo "foo" > $M0/dir1/foo$i ; done
+for i in {1..10} ; do echo "foo" > $M0/dir2/foo$i ; done
+
+TEST $CLI snapshot create snap3 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "foo" > $M0/dir1/foo$i ; done
+for i in {11..20} ; do echo "foo" > $M0/dir2/foo$i ; done
+
+TEST $CLI snapshot create snap4 $V0 no-timestamp;
+## Test that features.uss takes only options enable/disable and throw error for
+## any other argument.
+for i in {1..10}; do
+ RANDOM_STRING=$(uuidgen | tr -dc 'a-zA-Z' | head -c 8)
+ TEST_IN_LOOP ! $CLI volume set $V0 features.uss $RANDOM_STRING
+done
+
+## Test that features.snapshot-directory:
+## contains only '0-9a-z-_'
+# starts with dot (.)
+# value cannot exceed 255 characters
+## and throws error for any other argument.
+TEST ! $CLI volume set $V0 features.snapshot-directory a/b
+TEST ! $CLI volume set $V0 features.snapshot-directory snaps
+TEST ! $CLI volume set $V0 features.snapshot-directory -a
+TEST ! $CLI volume set $V0 features.snapshot-directory .
+TEST ! $CLI volume set $V0 features.snapshot-directory ..
+TEST ! $CLI volume set $V0 features.snapshot-directory .123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+# test 15
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" count_snaps $M0
+
+NUM_SNAPS=$(ls $M0/.snaps | wc -l);
+
+TEST [ $NUM_SNAPS == 4 ]
+TEST ls $M0/.snaps/snap1;
+TEST ls $M0/.snaps/snap2;
+TEST ls $M0/.snaps/snap3;
+TEST ls $M0/.snaps/snap4;
+
+TEST ls $M0/.snaps/snap3/dir1;
+TEST ls $M0/.snaps/snap3/dir2;
+
+TEST ls $M0/.snaps/snap4/dir1;
+TEST ls $M0/.snaps/snap4/dir2;
+
+TEST ls $M0/dir1/.snaps/
+TEST ! ls $M0/dir1/.snaps/snap1;
+TEST ! ls $M0/dir2/.snaps/snap2;
+TEST ls $M0/dir1/.snaps/snap3;
+TEST ls $M0/dir2/.snaps/snap4;
+
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'r' $M0/.snaps/snap1/file1;
+TEST fd_cat $fd1
+
+# opening fd with in write mode for snapshot files should fail
+TEST fd2=`fd_available`
+TEST ! fd_open $fd1 'w' $M0/.snaps/snap1/file2;
+
+# lookup on .snaps in the snapshot world should fail
+TEST ! stat $M0/.snaps/snap1/.snaps
+
+# creating new entries in snapshots should fail
+TEST check_readonly mkdir $M0/.snaps/new
+TEST check_readonly touch $M0/.snaps/snap2/other;
+
+TEST fd3=`fd_available`
+TEST fd_open $fd3 'r' $M0/dir1/.snaps/snap3/foo1
+
+TEST fd_cat $fd3;
+
+TEST fd_close $fd1;
+TEST fd_close $fd2;
+TEST fd_close $fd3
+
+
+# similar tests on nfs mount
+##Wait for connection establishment between nfs server and brick process
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+#test 44
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+NUM_SNAPS=$(ls $N0/.snaps | wc -l);
+
+TEST [ $NUM_SNAPS == 4 ];
+
+TEST stat $N0/.snaps/snap1;
+TEST stat $N0/.snaps/snap2;
+
+TEST ls -l $N0/.snaps;
+
+# readdir + lookup on each entry
+TEST ls -l $N0/.snaps/snap1;
+TEST ls -l $N0/.snaps/snap2;
+
+# readdir + access each entry by doing stat. If snapview-server has not
+# filled the fs instance and handle in the inode context of the entry as
+# part of readdirp, then when stat comes (i.e fop comes directly without
+# a previous lookup), snapview-server should do a lookup of the entry via
+# gfapi call and fill in the fs instance + handle information in the inode
+# context
+TEST ls $N0/.snaps/snap3/;
+TEST stat $N0/.snaps/snap3/dir1;
+TEST stat $N0/.snaps/snap3/dir2;
+
+TEST ls -l $N0/.snaps/snap3/dir1;
+TEST ls -l $N0/.snaps/snap3/dir2;
+TEST ls -l $N0/.snaps/snap4/dir1;
+TEST ls -l $N0/.snaps/snap4/dir2;
+
+TEST ! ls -l $N0/dir1/.snaps/snap1;
+TEST ! ls -l $N0/dir2/.snaps/snap2;
+TEST ls -l $N0/dir1/.snaps/snap3;
+TEST ls -l $N0/dir2/.snaps/snap4;
+
+
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'r' $N0/.snaps/snap1/file1;
+TEST fd_cat $fd1
+
+TEST fd2=`fd_available`
+TEST ! fd_open $fd1 'w' $N0/.snaps/snap1/file2;
+
+TEST ! stat $N0/.snaps/snap1/.stat
+
+TEST check_readonly mkdir $N0/.snaps/new
+
+TEST check_readonly touch $N0/.snaps/snap2/other;
+
+TEST fd3=`fd_available`
+TEST fd_open $fd3 'r' $N0/dir1/.snaps/snap3/foo1
+
+TEST fd_cat $fd3;
+
+
+TEST fd_close $fd1;
+TEST fd_close $fd2;
+TEST fd_close $fd3;
+
+# test 73
+TEST $CLI volume set $V0 "features.snapshot-directory" .history
+
+#snapd client might take fraction of time to compare the volfile from glusterd
+#hence a EXPECT_WITHIN is a better choice here
+EXPECT_WITHIN 2 "Y" lookup "$M0/.history";
+
+NUM_SNAPS=$(ls $M0/.history | wc -l);
+
+TEST [ $NUM_SNAPS == 4 ]
+
+TEST ls $M0/.history/snap1;
+TEST ls $M0/.history/snap2;
+TEST ls $M0/.history/snap3;
+TEST ls $M0/.history/snap4;
+
+TEST ls $M0/.history/snap3/dir1;
+TEST ls $M0/.history/snap3/dir2;
+
+TEST ls $M0/.history/snap4/dir1;
+TEST ls $M0/.history/snap4/dir2;
+
+TEST ls $M0/dir1/.history/
+TEST ! ls $M0/dir1/.history/snap1;
+TEST ! ls $M0/dir2/.history/snap2;
+TEST ls $M0/dir1/.history/snap3;
+TEST ls $M0/dir2/.history/snap4;
+
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'r' $M0/.history/snap1/file1;
+TEST fd_cat $fd1
+
+# opening fd with in write mode for snapshot files should fail
+TEST fd2=`fd_available`
+TEST ! fd_open $fd1 'w' $M0/.history/snap1/file2;
+
+# lookup on .history in the snapshot world should fail
+TEST ! stat $M0/.history/snap1/.history
+
+# creating new entries in snapshots should fail
+TEST check_readonly mkdir $M0/.history/new
+TEST check_readonly touch $M0/.history/snap2/other;
+
+TEST fd3=`fd_available`
+TEST fd_open $fd3 'r' $M0/dir1/.history/snap3/foo1
+
+TEST fd_cat $fd3;
+
+TEST fd_close $fd1;
+TEST fd_close $fd2;
+TEST fd_close $fd3
+
+
+# similar tests on nfs mount
+# test 103
+TEST ls $N0/.history;
+
+NUM_SNAPS=$(ls $N0/.history | wc -l);
+
+TEST [ $NUM_SNAPS == 4 ];
+
+TEST ls -l $N0/.history/snap1;
+TEST ls -l $N0/.history/snap2;
+TEST ls -l $N0/.history/snap3;
+TEST ls -l $N0/.history/snap4;
+
+TEST ls -l $N0/.history/snap3/dir1;
+TEST ls -l $N0/.history/snap3/dir2;
+
+TEST ls -l $N0/.history/snap4/dir1;
+TEST ls -l $N0/.history/snap4/dir2;
+
+TEST ! ls -l $N0/dir1/.history/snap1;
+TEST ! ls -l $N0/dir2/.history/snap2;
+TEST ls -l $N0/dir1/.history/snap3;
+TEST ls -l $N0/dir2/.history/snap4;
+
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'r' $N0/.history/snap1/file1;
+TEST fd_cat $fd1
+
+TEST fd2=`fd_available`
+TEST ! fd_open $fd1 'w' $N0/.history/snap1/file2;
+
+TEST ! stat $N0/.history/snap1/.stat
+
+TEST check_readonly mkdir $N0/.history/new
+
+TEST check_readonly touch $N0/.history/snap2/other;
+
+TEST fd3=`fd_available`
+TEST fd_open $fd3 'r' $N0/dir1/.history/snap3/foo1
+
+TEST fd_cat $fd3;
+
+TEST fd_close $fd1;
+TEST fd_close $fd2;
+TEST fd_close $fd3;
+
+## Before killing daemon to avoid deadlocks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+#test 131
+TEST $CLI snapshot create snap5 $V0 no-timestamp
+TEST ls $M0/.history;
+
+function count_snaps
+{
+ local mount_point=$1;
+ local num_snaps;
+
+ num_snaps=$(ls $mount_point/.history | wc -l);
+
+ echo $num_snaps;
+}
+
+EXPECT_WITHIN 30 "5" count_snaps $M0;
+
+# deletion of a snapshot and creation of a new snapshot with same name
+# should not create problems. The data that was supposed to be present
+# in the deleted snapshot need not be present in the new snapshot just
+# because the name is same. Ex:
+# 1) Create a file "aaa"
+# 2) Create a snapshot snap6
+# 3) stat the file "aaa" in snap6 and it should succeed
+# 4) delete the file "aaa"
+# 5) Delete the snapshot snap6
+# 6) Create a snapshot snap6
+# 7) stat the file "aaa" in snap6 and it should fail now
+
+echo "aaa" > $M0/aaa;
+
+TEST $CLI snapshot create snap6 $V0 no-timestamp
+
+TEST ls $M0/.history;
+
+EXPECT_WITHIN 30 "6" count_snaps $M0;
+
+EXPECT_WITHIN 10 "Y" lookup $M0/.history/snap6/aaa
+
+TEST rm -f $M0/aaa;
+
+TEST $CLI snapshot delete snap6;
+
+# drop the caches so that, the dentry for "snap6" is
+# is forgotten from the client cache.
+drop_cache $M0
+
+EXPECT_WITHIN 30 "5" count_snaps $M0;
+
+# This should fail, as snap6 just got deleted.
+TEST ! stat $M0/.history/snap6
+
+TEST $CLI snapshot create snap6 $V0 no-timestamp
+
+TEST ls $M0/.history;
+
+EXPECT_WITHIN 30 "6" count_snaps $M0;
+
+TEST ls $M0/.history/snap6/;
+
+TEST ! stat $M0/.history/snap6/aaa;
+
+TEST stat $M0
+
+# done with the tests start cleaning up of things
+TEST $CLI volume set $V0 features.uss disable
+
+TEST $CLI snapshot delete snap6;
+
+TEST $CLI snapshot delete snap5;
+
+TEST $CLI snapshot delete snap4;
+
+TEST $CLI snapshot delete snap3;
+
+TEST $CLI snapshot delete snap2;
+
+TEST $CLI snapshot delete snap1;
+
+# nfs client has been already unmounted at line 333
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/basic/volfile-sanity.t b/tests/basic/volfile-sanity.t
new file mode 100644
index 00000000000..ef2f9344468
--- /dev/null
+++ b/tests/basic/volfile-sanity.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+
+killall glusterd
+
+# Client by default tries to connect to port 24007
+# So, start server on that port, and you can see
+# client successfully working.
+TEST $GFS --xlator-option "${V0}-server.transport.socket.listen-port=24007" \
+ -f /var/lib/glusterd/vols/${V0}/${V0}.${H0}.*.vol
+TEST $GFS -f /var/lib/glusterd/vols/${V0}/${V0}.tcp-fuse.vol $M0
+
+TEST $(df -h $M0 | grep -q ${V0})
+TEST $(cat /proc/mounts | grep -q $M0)
+
+TEST ! stat $M0/newfile;
+TEST touch $M0/newfile;
+TEST rm $M0/newfile;
+
+cleanup;
diff --git a/tests/basic/volume-scale-shd-mux.t b/tests/basic/volume-scale-shd-mux.t
new file mode 100644
index 00000000000..102de22468e
--- /dev/null
+++ b/tests/basic/volume-scale-shd-mux.t
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=6
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+
+for i in $(seq 1 2); do
+ TEST $CLI volume create ${V0}_afr$i replica 3 $H0:$B0/${V0}_afr${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_afr$i
+ TEST $CLI volume create ${V0}_ec$i disperse 6 redundancy 2 $H0:$B0/${V0}_ec${i}{0,1,2,3,4,5}
+ TEST $CLI volume start ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+#Check the thread count become to number of volumes*number of ec subvolume (2*6=12)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+#Check the thread count become to number of volumes*number of afr subvolume (3*6=18)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}{6,7,8};
+#Check the thread count become to number of volumes*number of afr subvolume plus 3 additional threads from newly added bricks (3*6+3=21)
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^21$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#Remove the brick and check the detach is successful
+$CLI volume remove-brick $V0 $H0:$B0/${V0}{6,7,8} force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "glusterfs_graph_cleanup"
+TEST $CLI volume add-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5};
+#Check the thread count become to number of volumes*number of ec subvolume plus 2 additional threads from newly added bricks (2*6+6=18)
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^18$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+
+#Remove the brick and check the detach is successful
+$CLI volume remove-brick ${V0}_ec1 $H0:$B0/${V0}_ec1_add{0,1,2,3,4,5} force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^12$" number_healer_threads_shd $V0 "ec_shd_index_healer"
+
+
+for i in $(seq 1 2); do
+ TEST $CLI volume stop ${V0}_afr$i
+ TEST $CLI volume stop ${V0}_ec$i
+done
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}4
+
+TEST touch $M0/foo{1..100}
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^204$" get_pending_heal_count $V0
+
+TEST $CLI volume start ${V0} force
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST rm -rf $M0/*
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+shd_pid=$(get_shd_mux_pid $V0)
+TEST $CLI volume create ${V0}_distribute1 $H0:$B0/${V0}_distribute10
+TEST $CLI volume start ${V0}_distribute1
+
+#Creating a non-replicate/non-ec volume should not have any effect in shd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+EXPECT "^${shd_pid}$" get_shd_mux_pid $V0
+
+TEST mkdir $B0/add/
+#Now convert the distributed volume to replicate
+TEST $CLI volume add-brick ${V0}_distribute1 replica 3 $H0:$B0/add/{2..3}
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^9$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#scale down the volume
+TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^6$" number_healer_threads_shd $V0 "afr_shd_index_healer"
+
+#Before stopping the process, make sure there is no pending clenup threads hanging
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" number_healer_threads_shd $V0 "glusterfs_graph_cleanup"
+
+TEST $CLI volume stop ${V0}
+TEST $CLI volume delete ${V0}
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+
+TEST rm -rf $B0/add/2 $B0/add/3
+
+#Now convert the distributed volume back to replicate and make sure that a new shd is spawned
+TEST $CLI volume add-brick ${V0}_distribute1 replica 3 $H0:$B0/add/{2..3};
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" shd_count
+EXPECT_WITHIN $HEAL_TIMEOUT "^3$" number_healer_threads_shd ${V0}_distribute1 "afr_shd_index_healer"
+
+#Now convert the replica volume to distribute again and make sure the shd is now stopped
+TEST $CLI volume remove-brick ${V0}_distribute1 replica 1 $H0:$B0/add/{2..3} force
+TEST rm -rf $B0/add/
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^0$" shd_count
+
+cleanup
+
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1708929
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1708929
diff --git a/tests/basic/volume-snap-scheduler.t b/tests/basic/volume-snap-scheduler.t
new file mode 100644
index 00000000000..a638c5cc46a
--- /dev/null
+++ b/tests/basic/volume-snap-scheduler.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+TEST $CLI volume start $V0
+
+## Create, start and mount meta_volume as
+## snap_scheduler expects shared storage to be enabled.
+## This test is very basic in nature not creating any snapshot
+## and purpose is to validate snap scheduling commands.
+
+TEST $CLI volume create $META_VOL replica 3 $H0:$B0/${META_VOL}{1,2,3};
+TEST $CLI volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H0 --volfile-id $META_VOL $META_MNT
+
+##function to check status
+function check_status_scheduler()
+{
+ local key=$1
+ snap_scheduler.py status | grep -F "$key" | wc -l
+}
+
+##Basic snap_scheduler command test init/enable/disable/list
+
+TEST snap_scheduler.py init
+
+TEST snap_scheduler.py enable
+
+EXPECT 1 check_status_scheduler "Enabled"
+
+TEST snap_scheduler.py disable
+
+EXPECT 1 check_status_scheduler "Disabled"
+
+TEST snap_scheduler.py list
+
+TEST $CLI volume stop $V0;
+
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/basic/volume-snapshot-clone.t b/tests/basic/volume-snapshot-clone.t
new file mode 100755
index 00000000000..e6da9d7ddca
--- /dev/null
+++ b/tests/basic/volume-snapshot-clone.t
@@ -0,0 +1,129 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../cluster.rc
+. $(dirname $0)/../snapshot.rc
+
+
+V1="patchy2"
+
+function create_volumes() {
+ $CLI_1 volume create $V0 $H1:$L1 &
+ PID_1=$!
+
+ $CLI_2 volume create $V1 $H2:$L2 $H3:$L3 &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function create_snapshots() {
+
+ $CLI_1 snapshot create $1 $2 no-timestamp&
+ PID_1=$!
+
+ wait $PID_1
+}
+
+
+
+function delete_snapshot() {
+ $CLI_1 snapshot delete $1 &
+ PID_1=$!
+
+ wait $PID_1
+}
+
+cleanup;
+
+TEST verify_lvm_version;
+#Create cluster with 3 nodes
+TEST launch_cluster 3;
+TEST setup_lvm 3
+
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
+
+create_volumes
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Created' volinfo_field $V1 'Status';
+
+start_volumes 2
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT 'Started' volinfo_field $V1 'Status';
+
+TEST $CLI_1 snapshot config activate-on-create enable
+
+#Snapshot Operations
+create_snapshots ${V0}_snap ${V0};
+create_snapshots ${V1}_snap ${V1};
+
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
+
+sleep 5
+TEST $CLI_1 snapshot clone ${V0}_clone ${V0}_snap
+TEST $CLI_1 snapshot clone ${V1}_clone ${V1}_snap
+
+EXPECT 'Created' volinfo_field ${V0}_clone 'Status';
+EXPECT 'Created' volinfo_field ${V1}_clone 'Status';
+
+TEST $CLI_1 volume start ${V0}_clone force;
+TEST $CLI_1 volume start ${V1}_clone force;
+
+EXPECT 'Started' volinfo_field ${V0}_clone 'Status';
+EXPECT 'Started' volinfo_field ${V1}_clone 'Status';
+
+
+TEST glusterfs -s $H1 --volfile-id=/${V0}_clone $M0
+TEST glusterfs -s $H2 --volfile-id=/${V1}_clone $M1
+
+TEST touch $M0/file1
+TEST touch $M1/file1
+
+TEST echo "Hello world" $M0/file1
+TEST echo "Hello world" $M1/file1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+TEST kill_glusterd 2;
+sleep 15
+TEST $glusterd_2;
+sleep 15
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field ${V0}_clone 'Status';
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field ${V1}_clone 'Status';
+
+TEST $CLI_1 volume stop ${V0}_clone
+TEST $CLI_1 volume stop ${V1}_clone
+
+TEST $CLI_1 volume delete ${V0}_clone
+TEST $CLI_1 volume delete ${V1}_clone
+
+TEST $CLI_1 snapshot clone ${V0}_clone ${V0}_snap
+TEST $CLI_1 snapshot clone ${V1}_clone ${V1}_snap
+
+EXPECT 'Created' volinfo_field ${V0}_clone 'Status';
+EXPECT 'Created' volinfo_field ${V1}_clone 'Status';
+
+#Clean up
+stop_force_volumes 2
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT 'Stopped' volinfo_field $V0 'Status';
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT 'Stopped' volinfo_field $V1 'Status';
+
+TEST delete_snapshot ${V0}_snap
+TEST delete_snapshot ${V1}_snap
+
+TEST ! snapshot_exists 1 ${V0}_snap
+TEST ! snapshot_exists 1 ${V1}_snap
+
+delete_volumes 2
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "N" volume_exists $V0
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "N" volume_exists $V1
+
+cleanup;
diff --git a/tests/basic/volume-snapshot-xml.t b/tests/basic/volume-snapshot-xml.t
new file mode 100755
index 00000000000..ff63b54538d
--- /dev/null
+++ b/tests/basic/volume-snapshot-xml.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+
+# Snapshot config xmls
+EXPECT "enable" get-xml "snapshot config activate-on-create enable" "activateOnCreate"
+EXPECT "100" get-xml "snapshot config $V0 snap-max-hard-limit 100" "newHardLimit"
+EXPECT "70" get-xml "snapshot config snap-max-soft-limit 70" "newSoftLimit"
+EXPECT "enable" get-xml "snapshot config auto-delete enable" "autoDelete"
+
+# Snapshot create, activate, deactivate xmls
+EXPECT "snap1" get-xml "snapshot create snap1 $V0 no-timestamp" "name"
+EXPECT "snap1" get-xml "snapshot deactivate snap1" "name"
+EXPECT "snap1" get-xml "snapshot activate snap1" "name"
+EXPECT "snap2" get-xml "snapshot create snap2 $V0 no-timestamp" "name"
+
+# Snapshot info xmls
+EXPECT "2" get-xml "snapshot info" "count"
+EXPECT "Started" get-xml "snapshot info" "status"
+EXPECT "2" get-xml "snapshot info volume $V0" "count"
+EXPECT "Started" get-xml "snapshot info volume $V0" "status"
+EXPECT "1" get-xml "snapshot info snap1" "count"
+EXPECT "2" get-xml "snapshot info snap1" "snapCount"
+EXPECT "Started" get-xml "snapshot info snap1" "status"
+
+# Snapshot list xmls
+EXPECT "2" get-xml "snapshot list" "count"
+EXPECT "snap2" get-xml "snapshot list $V0" "snapshot"
+
+# Snapshot status xmls
+EXPECT "snap2" get-xml "snapshot status" "name"
+EXPECT "snap2" get-xml "snapshot deactivate snap2" "name"
+#XPECT "N/A" get-xml "snapshot status" "pid"
+EXPECT "snap1" get-xml "snapshot status snap1" "name"
+EXPECT "Yes" get-xml "snapshot status snap1" "brick_running"
+
+# Snapshot restore xmls
+TEST $CLI volume stop $V0
+EXPECT "snap2" get-xml "snapshot restore snap2" "name"
+EXPECT "30807" get-xml "snapshot restore snap2" "opErrno"
+EXPECT "0" get-xml "snapshot restore snap1" "opErrno"
+
+# Snapshot delete xmls
+TEST $CLI volume start $V0 force
+EXPECT "snap1" get-xml "snapshot create snap1 $V0 no-timestamp" "name"
+EXPECT "snap2" get-xml "snapshot create snap2 $V0 no-timestamp" "name"
+EXPECT "snap3" get-xml "snapshot create snap3 $V0 no-timestamp" "name"
+EXPECT "Success" get-xml "snapshot delete snap3" "status"
+EXPECT "Success" get-xml "snapshot delete all" "status"
+EXPECT "0" get-xml "snapshot list" "count"
+#XPECT "snap1" get-xml "snapshot create snap1 $V0 no-timestamp" "name"
+#XPECT "snap2" get-xml "snapshot create snap2 $V0 no-timestamp" "name"
+#XPECT "snap3" get-xml "snapshot create snap3 $V0 no-timestamp" "name"
+#XPECT "Success" get-xml "snapshot delete volume $V0" "status"
+#XPECT "0" get-xml "snapshot list" "count"
+
+# Snapshot clone xmls
+# Snapshot clone xml is broken. Once it is fixed it will be added here.
+
+cleanup;
diff --git a/tests/basic/volume-snapshot.t b/tests/basic/volume-snapshot.t
new file mode 100755
index 00000000000..dd938b4064a
--- /dev/null
+++ b/tests/basic/volume-snapshot.t
@@ -0,0 +1,172 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../cluster.rc
+. $(dirname $0)/../snapshot.rc
+
+
+V1="patchy2"
+
+function create_volumes() {
+ $CLI_1 volume create $V0 $H1:$L1 &
+ PID_1=$!
+
+ $CLI_2 volume create $V1 $H2:$L2 $H3:$L3 &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function create_snapshots() {
+ $CLI_1 snapshot create ${V0}_snap ${V0} no-timestamp &
+ PID_1=$!
+
+ $CLI_1 snapshot create ${V1}_snap ${V1} no-timestamp &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function create_snapshots_with_timestamp() {
+ $CLI_1 snapshot create ${V0}_snap1 ${V0}&
+ PID_1=$!
+ $CLI_1 snapshot create ${V1}_snap1 ${V1}&
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+
+function activate_snapshots() {
+ $CLI_1 snapshot activate ${V0}_snap &
+ PID_1=$!
+
+ $CLI_1 snapshot activate ${V1}_snap &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function deactivate_snapshots() {
+ $CLI_1 snapshot deactivate ${V0}_snap &
+ PID_1=$!
+
+ $CLI_1 snapshot deactivate ${V1}_snap &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function delete_snapshots() {
+ $CLI_1 snapshot delete $1 &
+ PID_1=$!
+
+ $CLI_1 snapshot delete $2 &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function restore_snapshots() {
+ $CLI_1 snapshot restore ${V0}_snap &
+ PID_1=$!
+
+ $CLI_1 snapshot restore ${V1}_snap &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+cleanup;
+
+TEST verify_lvm_version;
+#Create cluster with 3 nodes
+TEST launch_cluster 3;
+TEST setup_lvm 3
+
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
+
+create_volumes
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Created' volinfo_field $V1 'Status';
+
+start_volumes 2
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT 'Started' volinfo_field $V1 'Status';
+
+TEST $CLI_1 snapshot config activate-on-create enable
+
+#Snapshot Operations
+create_snapshots
+
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
+
+EXPECT '1' volinfo_field $V0 'Snapshot Count';
+EXPECT '1' volinfo_field $V1 'Snapshot Count';
+EXPECT "1" get-cmd-field-xml "volume info $V0" "snapshotCount"
+EXPECT "1" get-cmd-field-xml "volume info $V1" "snapshotCount"
+
+deactivate_snapshots
+
+EXPECT 'Stopped' snapshot_status ${V0}_snap;
+EXPECT 'Stopped' snapshot_status ${V1}_snap;
+
+activate_snapshots
+
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
+
+deactivate_snapshots
+activate_snapshots
+
+TEST snapshot_exists 1 ${V0}_snap
+TEST snapshot_exists 1 ${V1}_snap
+TEST $CLI_1 snapshot config $V0 snap-max-hard-limit 100
+TEST $CLI_1 snapshot config $V1 snap-max-hard-limit 100
+
+TEST glusterfs -s $H1 --volfile-id=/snaps/${V0}_snap/${V0} $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs -s $H2 --volfile-id=/snaps/${V1}_snap/${V1} $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#create timestamp appended snaps
+create_snapshots_with_timestamp;
+new_name1=`$CLI_1 snapshot list ${V0} | grep ${V0}_snap1`;
+new_name2=`$CLI_1 snapshot list ${V1} | grep ${V1}_snap1`;
+
+EXPECT '2' volinfo_field $V0 'Snapshot Count';
+EXPECT '2' volinfo_field $V1 'Snapshot Count';
+EXPECT "2" get-cmd-field-xml "volume info $V0" "snapshotCount"
+EXPECT "2" get-cmd-field-xml "volume info $V1" "snapshotCount"
+
+EXPECT_NOT "{V0}_snap1" echo $new_name1;
+EXPECT_NOT "{V1}_snap1" echo $new_name1;
+delete_snapshots $new_name1 $new_name2;
+
+EXPECT '1' volinfo_field $V0 'Snapshot Count';
+EXPECT '1' volinfo_field $V1 'Snapshot Count';
+EXPECT "1" get-cmd-field-xml "volume info $V0" "snapshotCount"
+EXPECT "1" get-cmd-field-xml "volume info $V1" "snapshotCount"
+
+#Clean up
+stop_force_volumes 2
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+EXPECT 'Stopped' volinfo_field $V1 'Status';
+
+restore_snapshots
+TEST ! snapshot_exists 1 ${V0}_snap
+TEST ! snapshot_exists 1 ${V1}_snap
+
+EXPECT '0' volinfo_field $V0 'Snapshot Count';
+EXPECT '0' volinfo_field $V1 'Snapshot Count';
+EXPECT "0" get-cmd-field-xml "volume info $V0" "snapshotCount"
+EXPECT "0" get-cmd-field-xml "volume info $V1" "snapshotCount"
+
+delete_volumes 2
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "N" volume_exists $V0
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "N" volume_exists $V1
+
+cleanup;
diff --git a/tests/basic/volume-status.t b/tests/basic/volume-status.t
new file mode 100644
index 00000000000..01d7ebf6c07
--- /dev/null
+++ b/tests/basic/volume-status.t
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+function gluster_client_list_status () {
+ gluster volume status $V0 client-list | sed -n '/Name/','/total/'p | wc -l
+}
+
+function gluster_fd_status () {
+ gluster volume status $V0 fd | sed -n '/Brick :/ p' | wc -l
+}
+
+function gluster_inode_status () {
+ gluster volume status $V0 inode | sed -n '/Connection / p' | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+TEST $CLI volume set $V0 nfs.disable false
+
+TEST $CLI volume start $V0;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+TEST touch $M0/file{1..20}
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" gluster_fd_status
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "768" gluster_inode_status
+
+##Disabling this test until the client-list command works for brick-multiplexing
+#EXPECT_WITHIN $PROCESS_UP_TIMEOUT "7" gluster_client_list_status
+
+##Wait for connection establishment between nfs server and brick process
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+## Mount NFS
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+TEST $CLI volume status all
+TEST $CLI volume status $V0
+
+function test_nfs_cmds () {
+ local ret=0
+ declare -a nfs_cmds=("clients" "mem" "inode" "callpool")
+ for cmd in ${nfs_cmds[@]}; do
+ $CLI volume status $V0 nfs $cmd
+ (( ret += $? ))
+ $CLI volume status $V0 nfs $cmd --xml
+ (( ret += $? ))
+ done
+ return $ret
+}
+
+function test_shd_cmds () {
+ local ret=0
+ declare -a shd_cmds=("mem" "inode" "callpool")
+ for cmd in ${shd_cmds[@]}; do
+ $CLI volume status $V0 shd $cmd
+ (( ret += $? ))
+ $CLI volume status $V0 shd $cmd --xml
+ (( ret += $? ))
+ done
+ return $ret
+}
+
+function test_brick_cmds () {
+ local ret=0
+ declare -a cmds=("detail" "clients" "mem" "inode" "fd" "callpool")
+ for cmd in ${cmds[@]}; do
+ for i in {1..2}; do
+ $CLI volume status $V0 $H0:$B0/${V0}$i $cmd
+ (( ret += $? ))
+ $CLI volume status $V0 $H0:$B0/${V0}$i $cmd --xml
+ (( ret += $? ))
+ done
+ done
+ return $ret
+}
+
+function test_status_cmds () {
+ local ret=0
+ declare -a cmds=("detail" "clients" "mem" "inode" "fd" "callpool" "tasks" "client-list")
+ for cmd in ${cmds[@]}; do
+ $CLI volume status $V0 $cmd
+ (( ret += $? ))
+ $CLI volume status $V0 $cmd --xml
+ (( ret += $? ))
+ done
+ return $ret
+}
+
+TEST test_shd_cmds;
+TEST test_nfs_cmds;
+TEST test_brick_cmds;
+TEST test_status_cmds;
+
+
+## Before killing daemon to avoid deadlocks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+cleanup;
diff --git a/tests/basic/volume.t b/tests/basic/volume.t
new file mode 100644
index 00000000000..27fe093d07d
--- /dev/null
+++ b/tests/basic/volume.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{9,10,11};
+EXPECT '9' brick_count $V0
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}{1,2,3} force;
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume top $V0 read-perf bs 4096 count 1000
+TEST $CLI volume top $V0 write-perf bs 1048576 count 2
+
+TEST touch $M0/foo
+
+# statedump path should be a directory, setting it to a file path should fail
+
+TEST ! $CLI v set $V0 server.statedump-path $M0/foo;
+EXPECT '/var/run/gluster' $CLI v get $V0 server.statedump-path
+
+#set the statedump path to an existing ditectory which should succeed
+TEST mkdir $D0/level;
+TEST $CLI v set $V0 server.statedump-path $D0/level
+EXPECT '/level' volinfo_field $V0 'server.statedump-path'
+
+ret=$(ls $D0/level | wc -l);
+TEST [ $ret == 0 ]
+TEST $CLI v statedump $V0;
+ret=$(ls $D0/level | wc -l);
+TEST ! [ $ret == 0 ]
+
+#set the statedump path to a non - existing directory which should fail
+TEST ! $CLI v set $V0 server.statedump-path /root/test
+EXPECT '/level' volinfo_field $V0 'server.statedump-path'
+
+TEST rm -rf $D0/level
+
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status'
+
+TEST $CLI volume delete $V0
+TEST ! $CLI volume info $V0
+
+cleanup;
diff --git a/tests/basic/xlator-pass-through-sanity.t b/tests/basic/xlator-pass-through-sanity.t
new file mode 100644
index 00000000000..e996be89260
--- /dev/null
+++ b/tests/basic/xlator-pass-through-sanity.t
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume set $V0 performance.io-cache-pass-through enable;
+TEST $CLI volume start $V0;
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+# This test covers lookup, mkdir, mknod, symlink, link, rename,
+# create operations
+TEST $(dirname $0)/rpc-coverage.sh $M1
+
+cleanup;
diff --git a/tests/bitrot/br-signer-threads-config-1797869.t b/tests/bitrot/br-signer-threads-config-1797869.t
new file mode 100644
index 00000000000..657ef3eedaf
--- /dev/null
+++ b/tests/bitrot/br-signer-threads-config-1797869.t
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../cluster.rc
+
+function get_bitd_count_1 {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H1 | wc -l
+}
+
+function get_bitd_count_2 {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H2 | wc -l
+}
+
+function get_bitd_pid_1 {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H1 | awk '{print $2}'
+}
+
+function get_bitd_pid_2 {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H2 | awk '{print $2}'
+}
+
+function get_signer_th_count_1 {
+ ps -eL | grep $(get_bitd_pid_1) | grep glfs_brpobj | wc -l
+}
+
+function get_signer_th_count_2 {
+ ps -eL | grep $(get_bitd_pid_2) | grep glfs_brpobj | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 2
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count;
+
+TEST $CLI_1 volume create $V0 $H1:$B1
+TEST $CLI_1 volume create $V1 $H2:$B2
+EXPECT 'Created' volinfo_field_1 $V0 'Status';
+EXPECT 'Created' volinfo_field_1 $V1 'Status';
+
+TEST $CLI_1 volume start $V0
+TEST $CLI_1 volume start $V1
+EXPECT 'Started' volinfo_field_1 $V0 'Status';
+EXPECT 'Started' volinfo_field_1 $V1 'Status';
+
+#Enable bitrot
+TEST $CLI_1 volume bitrot $V0 enable
+TEST $CLI_1 volume bitrot $V1 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_2
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_2
+
+old_bitd_pid_1=$(get_bitd_pid_1)
+old_bitd_pid_2=$(get_bitd_pid_2)
+TEST $CLI_1 volume bitrot $V0 signer-threads 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_signer_th_count_1
+EXPECT_NOT "$old_bitd_pid_1" get_bitd_pid_1;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" get_signer_th_count_2
+EXPECT "$old_bitd_pid_2" get_bitd_pid_2;
+
+old_bitd_pid_1=$(get_bitd_pid_1)
+old_bitd_pid_2=$(get_bitd_pid_2)
+TEST $CLI_1 volume bitrot $V1 signer-threads 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_signer_th_count_2
+EXPECT_NOT "$old_bitd_pid_2" get_bitd_pid_2;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_signer_th_count_1
+EXPECT "$old_bitd_pid_1" get_bitd_pid_1;
+
+cleanup;
diff --git a/tests/bitrot/br-state-check.t b/tests/bitrot/br-state-check.t
new file mode 100644
index 00000000000..2142275699e
--- /dev/null
+++ b/tests/bitrot/br-state-check.t
@@ -0,0 +1,83 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+cleanup;
+SCRIPT_TIMEOUT=350
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a distribute volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}2 $H0:$B0/${V0}3;
+TEST $CLI volume start $V0;
+
+## Enable bitrot on volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_scrubd_count
+
+## perform a series of scrub related state change tests. As of now, there'
+## no way to check if a given change has been correctly acknowledged by
+## the scrub process as there isn't an _interface_ to check scrub internal
+## state (yet). What's been verified here is scrub state machine execution
+## w.r.t. locking and faults.
+
+## 0x0: verify scrub rescheduling
+TEST $CLI volume bitrot $V0 scrub-frequency monthly
+TEST $CLI volume bitrot $V0 scrub-frequency daily
+TEST $CLI volume bitrot $V0 scrub-frequency hourly
+
+## 0x1: test reschedule after pause/resume
+TEST $CLI volume bitrot $V0 scrub pause
+TEST $CLI volume bitrot $V0 scrub-frequency daily
+TEST $CLI volume bitrot $V0 scrub resume
+
+## 0x2: test reschedule w/ an offline brick
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+TEST $CLI volume bitrot $V0 scrub-frequency hourly
+TEST $CLI volume bitrot $V0 scrub-throttle aggressive
+
+## 0x3: test pause/resume w/ an offline brick
+TEST $CLI volume bitrot $V0 scrub pause
+TEST $CLI volume bitrot $V0 scrub-frequency monthly
+TEST $CLI volume bitrot $V0 scrub resume
+
+## 0x4: test "start" from a paused scrub state
+
+TEST $CLI volume bitrot $V0 scrub pause
+TEST $CLI volume start $V0 force
+
+## 0x4a: try pausing an already paused scrub
+TEST ! $CLI volume bitrot $V0 scrub pause
+
+## 0x4b: perform configuration changes
+TEST $CLI volume bitrot $V0 scrub-frequency hourly
+TEST $CLI volume bitrot $V0 scrub-throttle lazy
+TEST $CLI volume bitrot $V0 scrub resume
+
+## 0x5: test cleanup upon brick going offline
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}3
+
+## 0x6: test cleanup upon brick going offline when srubber is paused
+## (initially paused and otherwise)
+
+## 0x6a: initially paused case
+TEST $CLI volume bitrot $V0 scrub pause
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}3
+TEST $CLI volume bitrot $V0 scrub resume
+
+## 0x6b: paused _after_ execution
+TEST $CLI volume start $V0 force
+TEST $CLI volume bitrot $V0 scrub pause
+TEST kill_brick $V0 $H0 $B0/${V0}2
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1332473
diff --git a/tests/bitrot/br-stub.c b/tests/bitrot/br-stub.c
new file mode 100644
index 00000000000..1111f710f59
--- /dev/null
+++ b/tests/bitrot/br-stub.c
@@ -0,0 +1,195 @@
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <errno.h>
+
+#include "bit-rot-object-version.h"
+
+/* NOTE: no size discovery */
+int
+brstub_validate_version(char *bpath, unsigned long version)
+{
+ int ret = 0;
+ int match = 0;
+ size_t xsize = 0;
+ br_version_t *xv = NULL;
+
+ xsize = sizeof(br_version_t);
+
+ xv = calloc(1, xsize);
+ if (!xv) {
+ match = -1;
+ goto err;
+ }
+
+ ret = getxattr(bpath, "trusted.bit-rot.version", xv, xsize);
+ if (ret < 0) {
+ if (errno == ENODATA)
+ match = -2;
+ goto err;
+ }
+
+ if (xv->ongoingversion != version) {
+ match = -3;
+ fprintf(stderr, "ongoingversion: %lu\n", xv->ongoingversion);
+ }
+ free(xv);
+
+err:
+ return match;
+}
+
+int
+brstub_write_validation(char *filp, char *bpath, unsigned long startversion)
+{
+ int fd1 = 0;
+ int fd2 = 0;
+ int ret = 0;
+ char *string = "string\n";
+
+ /* read only check */
+ fd1 = open(filp, O_RDONLY);
+ if (fd1 < 0)
+ goto err;
+ close(fd1);
+
+ ret = brstub_validate_version(bpath, startversion);
+ if (ret != -2)
+ goto err;
+
+ /* single open (write/) check */
+ fd1 = open(filp, O_RDWR);
+ if (fd1 < 0)
+ goto err;
+
+ ret = write(fd1, string, strlen(string));
+ if (ret <= 0)
+ goto err;
+ /**
+ * Fsync is done so that the write call has properly reached the
+ * disk. For fuse mounts write-behind xlator would have held the
+ * writes with itself and for nfs, client would have held the
+ * write in its cache. So write fop would not have triggered the
+ * versioning as it would have not reached the bit-rot-stub.
+ */
+ fsync(fd1);
+ ret = brstub_validate_version(bpath, startversion);
+ if (ret != 0)
+ goto err;
+ ret = write(fd1, string, strlen(string));
+ if (ret <= 0)
+ goto err;
+ fsync(fd1); /* let it reach the disk */
+
+ ret = brstub_validate_version(bpath, startversion);
+ if (ret != 0)
+ goto err;
+
+ close(fd1);
+
+ /**
+ * Well, this is not a _real_ test per se . For this test to pass
+ * the inode should not get a forget() in the interim. Therefore,
+ * perform this test asap.
+ */
+
+ /* multi open (write/) check */
+ fd1 = open(filp, O_RDWR);
+ if (fd1 < 0)
+ goto err;
+ fd2 = open(filp, O_WRONLY);
+ if (fd1 < 0)
+ goto err;
+
+ ret = write(fd1, string, strlen(string));
+ if (ret <= 0)
+ goto err;
+
+ ret = write(fd2, string, strlen(string));
+ if (ret <= 0)
+ goto err;
+
+ /* probably do a syncfs() */
+ fsync(fd1);
+ fsync(fd2);
+
+ close(fd1);
+ close(fd2);
+
+ /**
+ * incremented once per write()/write().../close()/close() sequence
+ */
+ ret = brstub_validate_version(bpath, startversion);
+ if (ret != 0)
+ goto err;
+
+ return 0;
+
+err:
+ return -1;
+}
+
+int
+brstub_new_object_validate(char *filp, char *brick)
+{
+ int ret = 0;
+ char *fname = NULL;
+ char bpath[PATH_MAX] = {
+ 0,
+ };
+
+ fname = basename(filp);
+ if (!fname)
+ goto err;
+
+ (void)snprintf(bpath, PATH_MAX, "%s/%s", brick, fname);
+
+ printf("Validating initial version..\n");
+ ret = brstub_validate_version(bpath, 2);
+ if (ret != -2) /* version _should_ be missing */
+ goto err;
+
+ printf("Validating version on modifications..\n");
+ ret = brstub_write_validation(filp, bpath, 2);
+ if (ret < 0)
+ goto err;
+
+ return 0;
+
+err:
+ return -1;
+}
+
+int
+main(int argc, char **argv)
+{
+ int ret = 0;
+ char *filp = NULL;
+ char *brick = NULL;
+
+ if (argc != 3) {
+ printf("Usage: %s <path> <brick>\n", argv[0]);
+ goto err;
+ }
+
+ filp = argv[1];
+ brick = argv[2];
+
+ printf("Validating object version [%s]\n", filp);
+ ret = brstub_new_object_validate(filp, brick);
+ if (ret < 0)
+ goto err;
+
+ return 0;
+
+err:
+ return -1;
+}
diff --git a/tests/bitrot/br-stub.t b/tests/bitrot/br-stub.t
new file mode 100644
index 00000000000..cc0319afac9
--- /dev/null
+++ b/tests/bitrot/br-stub.t
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+STUB_SOURCE=$(dirname $0)/br-stub.c
+STUB_EXEC=$(dirname $0)/br-stub
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a distribute volume (B=2)
+TEST $CLI volume create $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}2;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '2' brick_count $V0
+TEST $CLI volume set $V0 nfs.disable false
+
+## Turn off write-behind (write-behind clubs writes together)
+TEST $CLI volume set $V0 performance.write-behind off
+#EXPECT 'off' volinfo_field $V0 'performance.open-behind'
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## enable bitrot
+TEST $CLI volume bitrot $V0 enable;
+
+## Wait for gluster nfs to come up
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+## Build stub C source
+build_tester $STUB_SOURCE -o $STUB_EXEC -I$(dirname $0)/../../xlators/features/bit-rot/src/stub
+TEST [ -e $STUB_EXEC ]
+
+## create & check version
+fname="$M0/filezero"
+touch $fname;
+backpath=$(get_backend_paths $fname)
+
+TEST $STUB_EXEC $fname $(dirname $backpath)
+
+rm -f $fname;
+
+## test nfs
+fname="$N0/filezero"
+touch $fname; # backpath remains same..
+
+TEST $STUB_EXEC $fname $(dirname $backpath)
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+##cleanups..
+rm -f $STUB_EXEC
+
+cleanup;
diff --git a/tests/bitrot/bug-1207627-bitrot-scrub-status.t b/tests/bitrot/bug-1207627-bitrot-scrub-status.t
new file mode 100644
index 00000000000..a361986fdaf
--- /dev/null
+++ b/tests/bitrot/bug-1207627-bitrot-scrub-status.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+## Test case for bitrot scrub status BZ:1207627
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+## Enable bitrot for volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+## Setting scrubber frequency daily
+TEST $CLI volume bitrot $V0 scrub-frequency hourly
+
+## Setting scrubber throttle value lazy
+TEST $CLI volume bitrot $V0 scrub-throttle lazy
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'lazy' scrub_status $V0 'Scrub impact'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'hourly' scrub_status $V0 'Scrub frequency'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
+
+## Set expiry-timeout to 1 sec
+TEST $CLI volume set $V0 features.expiry-time 1
+
+##Mount $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+#Create sample file
+TEST `echo "1234" > $M0/FILE1`
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' "/$B0/${V0}1/FILE1"
+
+##Corrupt the file
+TEST `echo "corrupt" >> /$B0/${V0}1/FILE1`
+
+## Ondemand scrub
+TEST $CLI volume bitrot $V0 scrub ondemand
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.bad-file' check_for_xattr 'trusted.bit-rot.bad-file' "/$B0/${V0}1/FILE1"
+
+cleanup;
diff --git a/tests/bitrot/bug-1221914.t b/tests/bitrot/bug-1221914.t
new file mode 100644
index 00000000000..7f6c10c50df
--- /dev/null
+++ b/tests/bitrot/bug-1221914.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+STUB_SOURCE=$(dirname $0)/br-stub.c
+STUB_EXEC=$(dirname $0)/br-stub
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a distribute volume (B=2)
+TEST $CLI volume create $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}2;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '2' brick_count $V0
+
+## Turn off write-behind (write-behind clubs writes together)
+TEST $CLI volume set $V0 performance.write-behind off
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Enable bitrot
+TEST $CLI volume bitrot $V0 enable;
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+## Build stub C source
+build_tester $STUB_SOURCE -o $STUB_EXEC -I$(dirname $0)/../../xlators/features/bit-rot/src/stub
+TEST [ -e $STUB_EXEC ]
+
+## create & check version
+fname="$M0/filezero"
+$PYTHON -c "import os,stat; os.mknod('${fname}', stat.S_IFREG | stat.S_IRUSR | stat.S_IWUSR | stat.S_IROTH | stat.S_IRGRP)"
+
+backpath=$(get_backend_paths $fname)
+
+TEST $STUB_EXEC $fname $(dirname $backpath)
+
+rm -f $fname;
+
+##cleanups..
+rm -f $STUB_EXEC
+
+cleanup;
diff --git a/tests/bitrot/bug-1244613.t b/tests/bitrot/bug-1244613.t
new file mode 100644
index 00000000000..57b86a94ac0
--- /dev/null
+++ b/tests/bitrot/bug-1244613.t
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+. $(dirname $0)/../fileio.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=16
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 nfs.disable false
+
+# The test makes use of inode-lru-limit to hit a scenario, where we
+# find an inode whose ancestry is not there. Following is the
+# hypothesis (which is confirmed by seeing logs indicating that
+# codepath has been executed, but not through a good understanding of
+# NFS internals).
+
+# At the end of an fop, the reference count of an inode would be
+# zero. The inode (and its ancestry) persists in memory only
+# because of non-zero lookup count. These looked up inodes are put
+# in an lru queue of size 1 (here). So, there can be at most one
+# such inode in memory.
+
+# NFS Server makes use of anonymous fds. So, if it cannot find
+# valid fd, it does a nameless lookup. This gives us an inode
+# whose ancestry is NULL. When a write happens on this inode,
+# quota-enforcer/marker finds a NULL ancestry and asks
+# storage/posix to build it.
+
+TEST $CLI volume set $V0 network.inode-lru-limit 1
+TEST $CLI volume set $V0 performance.nfs.write-behind off
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Enable bitrot
+TEST $CLI volume bitrot $V0 enable;
+
+## Wait for gluster nfs to come up
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+TEST mount_nfs $H0:/$V0 $N0;
+deep=/0/1/2/3/4/5/6/7/8/9
+TEST mkdir -p $N0/$deep
+
+TEST touch $N0/$deep/file1 $N0/$deep/file2 $N0/$deep/file3 $N0/$deep/file4
+
+TEST fd_open 3 'w' "$N0/$deep/file1"
+TEST fd_open 4 'w' "$N0/$deep/file2"
+TEST fd_open 5 'w' "$N0/$deep/file3"
+TEST fd_open 6 'w' "$N0/$deep/file4"
+
+# consume all quota
+echo "Hello" > $N0/$deep/new_file_1
+echo "World" >> $N0/$deep/new_file_1
+echo 1 >> $N0/$deep/new_file_1
+echo 2 >> $N0/$deep/new_file_1
+
+
+# At the end of each fop in server, reference count of the
+# inode associated with each of the file above drops to zero and hence
+# put into lru queue. Since lru-limit is set to 1, an fop next file
+# will displace the current inode from itable. This will ensure that
+# when writes happens on same fd, fd resolution results in
+# nameless lookup from server and encounters an fd
+# associated with an inode whose parent is not present in itable.
+
+for j in $(seq 1 2); do
+ for i in $(seq 3 6); do
+ TEST_IN_LOOP fd_write $i "content"
+ TEST_IN_LOOP sync
+ done
+done
+
+exec 3>&-
+exec 4>&-
+exec 5>&-
+exec 6>&-
+
+$CLI volume statedump $V0 all
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST $CLI volume stop $V0
+
+cleanup;
diff --git a/tests/bitrot/bug-1294786.t b/tests/bitrot/bug-1294786.t
new file mode 100644
index 00000000000..5b4b6ddb4d3
--- /dev/null
+++ b/tests/bitrot/bug-1294786.t
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../cluster.rc
+
+function get_bitd_count_1 {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H1 | wc -l
+}
+
+function get_bitd_count_2 {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | grep $H2 | wc -l
+}
+
+function get_node_uuid {
+ getfattr -n trusted.glusterfs.node-uuid --only-values $M0/FILE1 2>/dev/null
+}
+
+cleanup;
+
+TEST launch_cluster 2
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count;
+
+TEST $CLI_1 volume create $V0 replica 2 $H1:$B1 $H2:$B2
+EXPECT 'Created' volinfo_field_1 $V0 'Status';
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field_1 $V0 'Status';
+
+uuid1=$($CLI_1 system:: uuid get | awk '{print $2}')
+uuid2=$($CLI_2 system:: uuid get | awk '{print $2}')
+
+##Mount $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H1 $M0
+
+#Enable bitrot
+TEST $CLI_1 volume bitrot $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_2
+
+#Create sample file
+TEST `echo "1234" > $M0/FILE1`
+TEST `echo "5678" > $M0/FILE2`
+gfid1=$(getfattr -n glusterfs.gfid.string --only-values $M0/FILE1)
+gfid2=$(getfattr -n glusterfs.gfid.string --only-values $M0/FILE2)
+
+EXPECT "$uuid1" get_node_uuid;
+
+#Corrupt file from back-end
+TEST stat $B1/FILE1
+TEST stat $B1/FILE2
+echo "Corrupted data" >> $B1/FILE1
+echo "Corrupted data" >> $B1/FILE2
+#Manually set bad-file xattr since we can't wait for an hour for scrubber.
+TEST setfattr -n trusted.bit-rot.bad-file -v 0x3100 $B1/FILE1
+TEST setfattr -n trusted.bit-rot.bad-file -v 0x3100 $B1/FILE2
+TEST touch "$B1/.glusterfs/quarantine/$gfid1"
+TEST chmod 000 "$B1/.glusterfs/quarantine/$gfid1"
+TEST touch "$B1/.glusterfs/quarantine/$gfid2"
+TEST chmod 000 "$B1/.glusterfs/quarantine/$gfid2"
+EXPECT "4" get_quarantine_count "$B1";
+
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field_1 $V0 'Status';
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count_2
+#Trigger lookup so that bitrot xlator marks file as bad in its inode context.
+TEST stat $M0/FILE1
+TEST stat $M0/FILE2
+
+EXPECT "$uuid2" get_node_uuid;
+
+#BUG 1308961
+#Remove bad files from mount, it should be removed from quarantine directory.
+TEST rm -f $M0/FILE1
+TEST ! stat "$B1/.glusterfs/quarantine/$gfid1"
+
+#BUG 1308961
+#Set network.inode-lru-limit to 5 and exceed the limit by creating 10 other files.
+#The bad entry from quarantine directory should not be removed.
+TEST $CLI_1 volume set $V0 network.inode-lru-limit 5
+for i in {1..10}
+do
+ echo "1234" > $M0/file_$i
+done
+TEST stat "$B1/.glusterfs/quarantine/$gfid2"
+
+cleanup;
diff --git a/tests/bitrot/bug-1373520.t b/tests/bitrot/bug-1373520.t
new file mode 100644
index 00000000000..6af5124e86e
--- /dev/null
+++ b/tests/bitrot/bug-1373520.t
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create a disperse volume
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+
+#Disable self heal daemon as it races in this test with lookup on volume
+#stop and start.
+$CLI volume set $V0 self-heal-daemon off
+
+#Disable few perf xlators to get the first lookup on the brick
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.force-readdirp off
+TEST $CLI volume set $V0 dht.force-readdirp off
+
+#Mount the volume
+TEST $GFS -s $H0 --use-readdirp=no --attribute-timeout=0 --entry-timeout=0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+#Enable bitrot
+TEST $CLI volume bitrot $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+#Create sample file
+TEST `echo "1234" > $M0/FILE1`
+#Create hardlink
+TEST `ln $M0/FILE1 $M0/HL_FILE1`
+
+#Corrupt file from back-end
+TEST stat $B0/${V0}5/FILE1
+SIZE=$(stat -c %s $B0/${V0}5/FILE1)
+echo "Corrupted data" >> $B0/${V0}5/FILE1
+gfid1=$(getfattr -n glusterfs.gfid.string --only-values $M0/FILE1)
+
+#Manually set bad-file xattr
+TEST setfattr -n trusted.bit-rot.bad-file -v 0x3100 $B0/${V0}5/FILE1
+TEST touch "$B0/${V0}5/.glusterfs/quarantine/$gfid1"
+TEST chmod 000 "$B0/${V0}5/.glusterfs/quarantine/$gfid1"
+EXPECT "3" get_quarantine_count "$B0/${V0}5";
+
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+#Delete file and all links from backend
+TEST rm -rf $(find $B0/${V0}5 -inum $(stat -c %i $B0/${V0}5/FILE1))
+
+#New mount for recovery
+TEST $GFS -s $H0 --use-readdirp=no --attribute-timeout=0 --entry-timeout=0 --volfile-id $V0 $M1
+
+$CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume heal $V0
+
+#Access files
+TEST cat $M1/FILE1
+EXPECT_WITHIN $HEAL_TIMEOUT "$SIZE" path_size $B0/${V0}5/FILE1
+TEST cat $M1/HL_FILE1
+EXPECT_WITHIN $HEAL_TIMEOUT "$SIZE" path_size $B0/${V0}5/HL_FILE1
+
+cleanup;
diff --git a/tests/bitrot/bug-1700078.t b/tests/bitrot/bug-1700078.t
new file mode 100644
index 00000000000..f27374211fe
--- /dev/null
+++ b/tests/bitrot/bug-1700078.t
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+## Enable bitrot for volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+## Turn off quick-read so that it wont cache the contents
+# of the file in lookup. For corrupted files, it might
+# end up in reads being served from the cache instead of
+# an error.
+TEST $CLI volume set $V0 performance.quick-read off
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
+
+## Set expiry-timeout to 1 sec
+TEST $CLI volume set $V0 features.expiry-time 1
+
+##Mount $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+## Turn off quick-read xlator so that, the contents are not served from the
+# quick-read cache.
+TEST $CLI volume set $V0 performance.quick-read off
+
+#Create sample file
+TEST `echo "1234" > $M0/FILE1`
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' "/$B0/${V0}1/FILE1"
+
+##disable bitrot
+TEST $CLI volume bitrot $V0 disable
+
+## modify the file
+TEST `echo "write" >> $M0/FILE1`
+
+# unmount and remount when the file has to be accessed.
+# This is to ensure that, when the remount happens,
+# and the file is read, its contents are served from the
+# brick instead of cache.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+##enable bitrot
+TEST $CLI volume bitrot $V0 enable
+
+# expiry time is set to 1 second. Hence sleep for 2 seconds for the
+# oneshot crawler to finish its crawling and sign the file properly.
+sleep 2
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Active' scrub_status $V0 'State of scrub'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/bitd.log' scrub_status $V0 'Bitrot error log location'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '/var/log/glusterfs/scrub.log' scrub_status $V0 'Scrubber error log location'
+
+## Ondemand scrub
+TEST $CLI volume bitrot $V0 scrub ondemand
+
+# the scrub ondemand CLI command, just ensures that
+# the scrubber has received the ondemand scrub directive
+# and started. sleep for 2 seconds for scrubber to finish
+# crawling and marking file(s) as bad (if if finds that
+# corruption has happened) which are filesystem operations.
+sleep 2
+
+TEST ! getfattr -n 'trusted.bit-rot.bad-file' $B0/${V0}1/FILE1
+
+##Mount $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+TEST cat $M0/FILE1
+
+cleanup;
diff --git a/tests/bitrot/bug-internal-xattrs-check-1243391.t b/tests/bitrot/bug-internal-xattrs-check-1243391.t
new file mode 100644
index 00000000000..bc9c12520b2
--- /dev/null
+++ b/tests/bitrot/bug-internal-xattrs-check-1243391.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a distribute volume (B=2)
+TEST $CLI volume create $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}2;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '2' brick_count $V0
+
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+echo "123" >> $M0/file;
+
+TEST ! setfattr -n "trusted.glusterfs.set-signature" -v "123" $M0/file;
+TEST ! setfattr -n "trusted.glusterfs.get-signature" -v "123" $M0/file;
+
+# sign xattr
+TEST ! setfattr -n "trusted.bit-rot.signature" -v "123" $M0/file;
+TEST ! setfattr -x "trusted.bit-rot.signature" $M0/file;
+
+# versioning xattr
+TEST ! setfattr -n "trusted.bit-rot.version" -v "123" $M0/file;
+TEST ! setfattr -x "trusted.bit-rot.version" $M0/file;
+
+# bad file xattr
+TEST ! setfattr -n "trusted.bit-rot.bad-file" -v "123" $M0/file;
+TEST ! setfattr -x "trusted.bit-rot.bad-file" $M0/file;
+
+cleanup;
diff --git a/tests/bugs/access-control/bug-1051896.c b/tests/bugs/access-control/bug-1051896.c
new file mode 100644
index 00000000000..31799d97a71
--- /dev/null
+++ b/tests/bugs/access-control/bug-1051896.c
@@ -0,0 +1,94 @@
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <assert.h>
+#include <sys/types.h>
+#include <utime.h>
+#include <sys/acl.h>
+
+int
+do_setfacl(const char *path, const char *options, const char *textacl)
+{
+ int r;
+ int type;
+ acl_t acl;
+ int dob;
+ int dok;
+ int dom;
+ struct stat st;
+ char textmode[30];
+
+ r = 0;
+ dob = strchr(options, 'b') != (char *)NULL;
+ dok = strchr(options, 'k') != (char *)NULL;
+ dom = strchr(options, 'm') != (char *)NULL;
+ if ((dom && !textacl) ||
+ (!dom && (textacl || (!dok && !dob) || strchr(options, 'd')))) {
+ errno = EBADRQC; /* "bad request" */
+ r = -1;
+ } else {
+ if (dob || dok) {
+ r = acl_delete_def_file(path);
+ }
+ if (dob && !r) {
+ if (!stat(path, &st)) {
+ sprintf(textmode, "u::%c%c%c,g::%c%c%c,o::%c%c%c",
+ (st.st_mode & 0400 ? 'r' : '-'),
+ (st.st_mode & 0200 ? 'w' : '-'),
+ (st.st_mode & 0100 ? 'x' : '-'),
+ (st.st_mode & 0040 ? 'r' : '-'),
+ (st.st_mode & 0020 ? 'w' : '-'),
+ (st.st_mode & 0010 ? 'x' : '-'),
+ (st.st_mode & 004 ? 'r' : '-'),
+ (st.st_mode & 002 ? 'w' : '-'),
+ (st.st_mode & 001 ? 'x' : '-'));
+ acl = acl_from_text(textmode);
+ if (acl) {
+ r = acl_set_file(path, ACL_TYPE_ACCESS, acl);
+ acl_free(acl);
+ } else
+ r = -1;
+ } else
+ r = -1;
+ }
+ if (!r && dom) {
+ if (strchr(options, 'd'))
+ type = ACL_TYPE_DEFAULT;
+ else
+ type = ACL_TYPE_ACCESS;
+ acl = acl_from_text(textacl);
+ if (acl) {
+ r = acl_set_file(path, type, acl);
+ acl_free(acl);
+ } else
+ r = -1;
+ }
+ }
+ if (r)
+ r = -errno;
+ return r;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int rc = 0;
+
+ if (argc != 4) {
+ fprintf(stderr, "usage: ./setfacl_test <path> <options> <textacl>\n");
+ return 0;
+ }
+ rc = do_setfacl(argv[1], argv[2], argv[3]);
+ if (rc != 0) {
+ fprintf(stderr, "do_setfacl failed: %s\n", strerror(errno));
+ return rc;
+ }
+ return 0;
+}
diff --git a/tests/bugs/access-control/bug-1051896.t b/tests/bugs/access-control/bug-1051896.t
new file mode 100644
index 00000000000..870ede7db21
--- /dev/null
+++ b/tests/bugs/access-control/bug-1051896.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+case $OSTYPE in
+NetBSD)
+ echo "Skip test on ACL which are not available on NetBSD" >&2
+ SKIP_TESTS
+ exit 0
+ ;;
+*)
+ ;;
+esac
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --acl -s $H0 --volfile-id $V0 $M0;
+
+TEST touch $M0/file1;
+
+TEST $CC $(dirname $0)/bug-1051896.c -o $(dirname $0)/bug-1051896 -lacl
+TEST ! $(dirname $0)/bug-1051896 $M0/file1 m 'u::r,u::w,g::r--,o::r--'
+TEST rm -f $(dirname $0)/bug-1051896
+
+cleanup
diff --git a/tests/bugs/access-control/bug-1387241.c b/tests/bugs/access-control/bug-1387241.c
new file mode 100644
index 00000000000..e2e843a2fda
--- /dev/null
+++ b/tests/bugs/access-control/bug-1387241.c
@@ -0,0 +1,18 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+int
+main(int argc, char *argv[])
+{
+ int ret = EXIT_FAILURE;
+ int fd = open(argv[1], O_RDONLY | O_TRUNC);
+
+ if (fd) {
+ ret = EXIT_SUCCESS;
+ close(fd);
+ }
+
+ return ret;
+}
diff --git a/tests/bugs/access-control/bug-1387241.t b/tests/bugs/access-control/bug-1387241.t
new file mode 100644
index 00000000000..2efd80547d6
--- /dev/null
+++ b/tests/bugs/access-control/bug-1387241.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+case $OSTYPE in
+NetBSD)
+ echo "Skip test on ACL which are not available on NetBSD" >&2
+ SKIP_TESTS
+ exit 0
+ ;;
+*)
+ ;;
+esac
+
+#cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --acl -s $H0 --volfile-id $V0 $M0;
+
+TEST touch $M0/file1;
+
+TEST $CC $(dirname $0)/bug-1387241.c -o $(dirname $0)/bug-1387241
+
+TEST $(dirname $0)/bug-1387241 $M0/file1
+
+TEST rm -f $(dirname $0)/bug-1387241
+
+#cleanup
diff --git a/tests/bugs/access-control/bug-887098-gmount-crash.t b/tests/bugs/access-control/bug-887098-gmount-crash.t
new file mode 100644
index 00000000000..ba9937bd5bf
--- /dev/null
+++ b/tests/bugs/access-control/bug-887098-gmount-crash.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id=$V0 --acl $M0
+MOUNT_PID=$(get_mount_process_pid $V0)
+
+for i in {1..25};
+do
+ mkdir $M0/tmp_$i && cat /etc/hosts > $M0/tmp_$i/file
+ cp -RPp $M0/tmp_$i $M0/newtmp_$i && cat /etc/hosts > $M0/newtmp_$i/newfile
+done
+
+EXPECT "$MOUNT_PID" get_mount_process_pid $V0
+TEST rm -rf $M0/*
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/access-control/bug-958691.t b/tests/bugs/access-control/bug-958691.t
new file mode 100644
index 00000000000..8b70607bdbb
--- /dev/null
+++ b/tests/bugs/access-control/bug-958691.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+# Tests for the fuse mount
+TEST mkdir $M0/dir;
+TEST chmod 1777 $M0/dir;
+TEST touch $M0/dir/file{1,2};
+
+TEST $CLI volume set $V0 server.root-squash enable;
+
+mv $M0/dir/file1 $M0/dir/file11 2>/dev/null;
+TEST [ $? -ne 0 ];
+
+TEST $CLI volume set $V0 server.root-squash disable;
+TEST rm -rf $M0/dir;
+
+sleep 1;
+
+# tests for nfs mount
+TEST mkdir $N0/dir;
+TEST chmod 1777 $N0/dir;
+TEST touch $N0/dir/file{1,2};
+
+TEST $CLI volume set $V0 server.root-squash enable;
+
+mv $N0/dir/file1 $N0/dir/file11 2>/dev/null;
+TEST [ $? -ne 0 ];
+
+TEST $CLI volume set $V0 server.root-squash disable;
+TEST rm -rf $N0/dir;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/bitrot/1207029-bitrot-daemon-should-start-on-valid-node.t b/tests/bugs/bitrot/1207029-bitrot-daemon-should-start-on-valid-node.t
new file mode 100755
index 00000000000..691ebc303e4
--- /dev/null
+++ b/tests/bugs/bitrot/1207029-bitrot-daemon-should-start-on-valid-node.t
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+## Test case for bitrot
+## bitd daemon should not start on the node which dont have any brick
+
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+function get_bitd_count {
+ ps auxw | grep glusterfs | grep bitd.pid | grep -v grep | wc -l
+}
+
+## Start a 2 node virtual cluster
+TEST launch_cluster 2;
+
+## Peer probe server 2 from server 1 cli
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+## Creating a volume which is having brick only on one node
+TEST $CLI_1 volume create $V0 $H1:$B1/${V0}0 $H1:$B1/${V0}1
+
+## Start the volume
+TEST $CLI_1 volume start $V0
+
+## Enable bitrot on volume from 2nd node.
+TEST $CLI_2 volume bitrot $V0 enable
+
+## Bitd daemon should be running on the node which is having brick. Here node1
+## only have brick so bitrot daemon count value should be 1.
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+## Bitd daemon should not run on 2nd node and it should not create bitrot
+## volfile on this node. Below test case it to check whether its creating bitrot
+## volfile or not for 2nd node which dont have any brick.
+## Get current working directory of 2nd node which dont have any brick and do
+## stat on bitrot volfile.
+
+cur_wrk_dir2=$($CLI_2 system:: getwd)
+TEST ! stat $cur_wrk_dir2/bitd/bitd-server.vol
+
+
+## Bitd daemon should run on 1st node and it should create bitrot
+## volfile on this node. Below test case it to check whether its creating bitrot
+## volfile or not for 1st node which is having brick.
+## Get current working directory of 1st node which have brick and do
+## stat on bitrot volfile.
+
+cur_wrk_dir1=$($CLI_1 system:: getwd)
+TEST stat $cur_wrk_dir1/bitd/bitd-server.vol
+
+cleanup;
diff --git a/tests/bugs/bitrot/1209751-bitrot-scrub-tunable-reset.t b/tests/bugs/bitrot/1209751-bitrot-scrub-tunable-reset.t
new file mode 100644
index 00000000000..919ffc3ba62
--- /dev/null
+++ b/tests/bugs/bitrot/1209751-bitrot-scrub-tunable-reset.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+## Test case for bitrot
+## On restarting glusterd should not reset bitrot tunable value to default
+
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+## Enable bitrot on volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+## Set bitrot scrub-throttle value to lazy
+TEST $CLI volume bitrot $V0 scrub-throttle lazy
+
+## Set bitrot scrub-frequency value to monthly
+TEST $CLI volume bitrot $V0 scrub-frequency monthly
+
+## Set bitrot scrubber to pause state
+TEST $CLI volume bitrot $V0 scrub pause
+
+## restart glusterd process
+pkill glusterd;
+TEST glusterd;
+TEST pidof glusterd;
+
+## All the bitrot scrub tunable value should come back again.
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status';
+EXPECT 'lazy' volinfo_field $V0 'features.scrub-throttle';
+EXPECT 'monthly' volinfo_field $V0 'features.scrub-freq';
+EXPECT 'pause' volinfo_field $V0 'features.scrub';
+EXPECT 'on' volinfo_field $V0 'features.bitrot';
+
+cleanup;
diff --git a/tests/bugs/bitrot/1209752-volume-status-should-show-bitrot-scrub-info.t b/tests/bugs/bitrot/1209752-volume-status-should-show-bitrot-scrub-info.t
new file mode 100644
index 00000000000..6101910666c
--- /dev/null
+++ b/tests/bugs/bitrot/1209752-volume-status-should-show-bitrot-scrub-info.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+## Test case for bitrot
+## gluster volume status command should show status of bitrot daemon
+
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+
+## Start a 2 node virtual cluster
+TEST launch_cluster 2;
+
+## Peer probe server 2 from server 1 cli
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+## Lets create and start the volume
+TEST $CLI_1 volume create $V0 $H1:$B1/${V0}0 $H2:$B2/${V0}1
+TEST $CLI_1 volume start $V0
+
+## Enable bitrot on volume $V0
+TEST $CLI_1 volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_bitd_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_scrubd_count
+
+## From node 1 Gluster volume status command should show the status of bitrot
+## daemon of all the nodes. there are 2 nodes in a cluster with having brick
+## ${V0}1 and ${V0}2 . So there should be 2 bitrot daemon running.
+
+bitd=$($CLI_1 volume status $V0 | grep "Bitrot Daemon" | grep -v grep | wc -l)
+TEST [ "$bitd" -eq 2 ];
+
+
+
+## From node 2 Gluster volume status command should show the status of Scrubber
+## daemon of all the nodes. There are 2 nodes in a cluster with having brick
+## ${V0}1 and ${V0}2 . So there should be 2 Scrubber daemon running.
+
+scrub=$($CLI_2 volume status $V0 | grep "Scrubber Daemon" | grep -v grep | \
+ wc -l)
+TEST [ "$scrub" -eq 2 ];
+
+
+
+## From node 1 Gluster volume status command should print status of only
+## scrubber daemon. There should be total 2 scrubber daemon running, one daemon
+## for each node
+
+scrub=$($CLI_1 volume status $V0 scrub | grep "Scrubber Daemon" | \
+ grep -v grep | wc -l)
+TEST [ "$scrub" -eq 2 ];
+
+
+
+## From node 2 Gluster volume status command should print status of only
+## bitd daemon. There should be total 2 bitd daemon running, one daemon
+## for each node
+
+bitd=$($CLI_2 volume status $V0 bitd | grep "Bitrot Daemon" | \
+ grep -v grep | wc -l)
+TEST [ "$bitd" -eq 2 ];
+
+
+cleanup;
diff --git a/tests/bugs/bitrot/1209818-vol-info-show-scrub-process-properly.t b/tests/bugs/bitrot/1209818-vol-info-show-scrub-process-properly.t
new file mode 100644
index 00000000000..4fe02dc7f63
--- /dev/null
+++ b/tests/bugs/bitrot/1209818-vol-info-show-scrub-process-properly.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+## Test case for bitrot.
+## volume info should not show 'features.scrub: resume' if scrub process is
+## resumed from paused state.
+
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+## Enable bitrot on volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+## Set bitrot scrubber process to pause state
+TEST $CLI volume bitrot $V0 scrub pause
+
+## gluster volume info command should show scrub process pause.
+EXPECT 'pause' volinfo_field $V0 'features.scrub';
+
+
+## Resume scrub process on volume $V0
+TEST $CLI volume bitrot $V0 scrub resume
+
+## gluster volume info command should show scrub process Active
+EXPECT 'Active' volinfo_field $V0 'features.scrub';
+
+
+## Disable bitrot on volume $V0
+TEST $CLI volume bitrot $V0 disable
+
+## gluster volume info command should show scrub process Inactive
+EXPECT 'Inactive' volinfo_field $V0 'features.scrub';
+
+
+cleanup;
diff --git a/tests/bugs/bitrot/bug-1210684-scrub-pause-resume-error-handling.t b/tests/bugs/bitrot/bug-1210684-scrub-pause-resume-error-handling.t
new file mode 100644
index 00000000000..b15b908d21a
--- /dev/null
+++ b/tests/bugs/bitrot/bug-1210684-scrub-pause-resume-error-handling.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+## Test case for bitrot BZ:1210684
+## Bitrot scrub pause/resume option should give proper error if scrubber is
+## already pause/resume and admin try to perform same operation on a volume
+
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2}
+TEST $CLI volume start $V0
+
+## Enable bitrot for volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+## Pause scrubber operation on volume $V0
+TEST $CLI volume bitrot $V0 scrub pause
+
+## Pausing scrubber again should not success and should give error
+TEST ! $CLI volume bitrot $V0 scrub pause
+
+## Resume scrubber operation on volume $V0
+TEST $CLI volume bitrot $V0 scrub resume
+
+## Resuming scrubber again should not success and should give error
+TEST ! $CLI volume bitrot $V0 scrub resume
+
+cleanup;
diff --git a/tests/bugs/bitrot/bug-1227996.t b/tests/bugs/bitrot/bug-1227996.t
new file mode 100644
index 00000000000..121c7b5f279
--- /dev/null
+++ b/tests/bugs/bitrot/bug-1227996.t
@@ -0,0 +1,57 @@
+#!/bin/bash
+
+## Test case for bitrot
+## Tunable object signing waiting time value for bitrot.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+SLEEP_TIME=3
+
+cleanup;
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+## Enable bitrot on volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+# wait a bit for oneshot crawler to finish
+sleep $SLEEP_TIME
+
+## Set object expiry time value
+TEST $CLI volume bitrot $V0 signing-time $SLEEP_TIME
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+# create and check object signature
+fname="$M0/filezero"
+echo "ZZZ" > $fname
+
+# wait till the object is signed
+sleep `expr $SLEEP_TIME \* 2`
+
+backpath=$(get_backend_paths $fname)
+TEST getfattr -m . -n trusted.bit-rot.signature $backpath
+
+## for now just remove the signature xattr to test for signing
+## upon truncate()
+TEST setfattr -x trusted.bit-rot.signature $backpath
+
+## overwrite the file (truncate(), write())
+echo "XYX" > $fname
+
+# wait till the object is signed
+sleep `expr $SLEEP_TIME \* 2`
+
+# test for new signature
+TEST getfattr -m . -n trusted.bit-rot.signature $backpath
+
+cleanup;
diff --git a/tests/bugs/bitrot/bug-1228680.t b/tests/bugs/bitrot/bug-1228680.t
new file mode 100644
index 00000000000..23db9d5e208
--- /dev/null
+++ b/tests/bugs/bitrot/bug-1228680.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+## Test case for bitrot
+## Tunable object signing waiting time value for bitrot.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+SLEEP_TIME=3
+
+cleanup;
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+## Enable bitrot on volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+# wait a bit for oneshot crawler to finish
+sleep $SLEEP_TIME
+
+## negative test
+TEST ! $CLI volume bitrot $V0 signing-time -100
+
+## Set object expiry time value 5 second.
+TEST $CLI volume bitrot $V0 signing-time $SLEEP_TIME
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+# create and check object signature
+fname="$M0/filezero"
+echo "ZZZ" > $fname
+
+# wait till the object is signed
+sleep `expr $SLEEP_TIME \* 2`
+
+backpath=$(get_backend_paths $fname)
+TEST getfattr -m . -n trusted.bit-rot.signature $backpath
+
+cleanup;
diff --git a/tests/bugs/bitrot/bug-1229134-bitd-not-support-vol-set.t b/tests/bugs/bitrot/bug-1229134-bitd-not-support-vol-set.t
new file mode 100644
index 00000000000..471471f4b6b
--- /dev/null
+++ b/tests/bugs/bitrot/bug-1229134-bitd-not-support-vol-set.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+## Test case for bitrot BZ:1229134
+## gluster volume set <VOLNAME> bitrot * command succeeds,
+## which is not supported to enable bitrot.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2}
+TEST $CLI volume start $V0
+
+## 'gluster volume set <VOLNAME>' command for bitrot should failed.
+TEST ! $CLI volume set $V0 bitrot enable
+TEST ! $CLI volume set $V0 bitrot disable
+TEST ! $CLI volume set $V0 scrub-frequency daily
+TEST ! $CLI volume set $V0 scrub pause
+TEST ! $CLI volume set $V0 scrub-throttle lazy
+
+
+## 'gluster volume bitrot <VOLNAME> *' command for bitrot should succeeds.
+TEST $CLI volume bitrot $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+TEST $CLI volume bitrot $V0 scrub pause
+TEST $CLI volume bitrot $V0 scrub-frequency daily
+TEST $CLI volume bitrot $V0 scrub-throttle lazy
+
+cleanup;
+
diff --git a/tests/bugs/bitrot/bug-1245981.t b/tests/bugs/bitrot/bug-1245981.t
new file mode 100644
index 00000000000..f3955256b01
--- /dev/null
+++ b/tests/bugs/bitrot/bug-1245981.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+## Test case for bitrot
+## Tunable object signing waiting time value for bitrot.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+SLEEP_TIME=5
+
+cleanup;
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 network.inode-lru-limit 1
+## Enable bitrot on volume $V0
+TEST $CLI volume bitrot $V0 enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+# wait a bit for oneshot crawler to finish
+sleep 2;
+
+## Set object expiry time value
+TEST $CLI volume bitrot $V0 signing-time $SLEEP_TIME
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+# create and check object signature
+fname="$M0/filezero"
+echo "ZZZ" > $fname
+echo "123" > $M0/new_file;
+
+touch $M0/1
+touch $M0/2
+touch $M0/3
+touch $M0/4
+touch $M0/5
+
+# wait till the object is signed
+sleep `expr $SLEEP_TIME \* 2`
+
+backpath=$(get_backend_paths $fname)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' $backpath
+
+backpath=$(get_backend_paths $M0/new_file)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'trusted.bit-rot.signature' check_for_xattr 'trusted.bit-rot.signature' $backpath
+
+cleanup;
diff --git a/tests/bugs/bitrot/bug-1288490.t b/tests/bugs/bitrot/bug-1288490.t
new file mode 100644
index 00000000000..5f67f4a6ec5
--- /dev/null
+++ b/tests/bugs/bitrot/bug-1288490.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume start $V0
+
+TEST $CLI volume bitrot $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST dd if=/dev/urandom of=$M0/FILE bs=1024 count=1
+
+# corrupt data -- append 2 bytes
+echo -n "~~" >> $B0/brick0/FILE
+# manually set bad-file xattr
+TEST setfattr -n trusted.bit-rot.bad-file -v 0x3100 $B0/brick0/FILE
+
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+# trigger lookup
+TEST stat $M0/FILE
+
+# extend the file
+TEST dd if=/dev/urandom of=$M0/FILE bs=1024 count=1 oflag=append conv=notrunc
+
+# check backend file size
+EXPECT "1026" stat -c "%s" $B0/brick0/FILE
+EXPECT "2048" stat -c "%s" $B0/brick1/FILE
+
+# check file size on mount
+EXPECT "2048" stat -c "%s" $M0/FILE
+
+TEST umount $M0
+cleanup
diff --git a/tests/bugs/bug-1064147.t b/tests/bugs/bug-1064147.t
new file mode 100755
index 00000000000..27ffde4eb44
--- /dev/null
+++ b/tests/bugs/bug-1064147.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+# Initialize
+#------------------------------------------------------------
+cleanup;
+
+# Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+# Create a volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+
+# Verify volume creation
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Start volume and verify successful start
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+#------------------------------------------------------------
+
+# Test case 1 - Subvolume down + Healing
+#------------------------------------------------------------
+# Kill 2nd brick process
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
+
+# Change root permissions
+TEST chmod 444 $M0
+
+# Store permission for comparision
+TEST permission_new=`stat -c "%A" $M0`
+
+# Bring up the killed brick process
+TEST $CLI volume start $V0 force
+
+# Perform lookup
+sleep 5
+TEST ls $M0
+
+# Check brick permissions
+TEST brick_perm=`stat -c "%A" $B0/${V0}2`
+TEST [ ${brick_perm} = ${permission_new} ]
+#------------------------------------------------------------
+
+# Test case 2 - Add-brick + Healing
+#------------------------------------------------------------
+# Change root permissions
+TEST chmod 777 $M0
+
+# Store permission for comparision
+TEST permission_new_2=`stat -c "%A" $M0`
+
+# Add a 3rd brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
+
+# Perform lookup
+sleep 5
+TEST ls $M0
+
+# Check permissions on the new brick
+TEST brick_perm2=`stat -c "%A" $B0/${V0}3`
+
+TEST [ ${brick_perm2} = ${permission_new_2} ]
+
+cleanup;
diff --git a/tests/bugs/bug-1110262.t b/tests/bugs/bug-1110262.t
new file mode 100644
index 00000000000..90b101fc98d
--- /dev/null
+++ b/tests/bugs/bug-1110262.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../traps.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 nfs.disable false
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+#do some operation on mount, so that kill_brick is guaranteed to be
+#done _after_ first lookup on root and dht has a proper layout on
+#it. Otherwise mkdir done in later stages of script might fail due to
+#lack of layout on "/" as dht-self-heal won't proceed if any of its
+#subvolumes are down.
+TEST ls $M0
+#kill one of the brick process
+TEST kill_brick $V0 $H0 $B0/${V0}2
+
+cleanup_user_group () {
+ userdel --force dev
+ groupdel QA
+}
+push_trapfunc cleanup_user_group
+
+#create a user and group
+TEST useradd dev
+TEST groupadd QA
+
+#create a new directory now with special user, group and mode bits
+mkdir -m 7777 $M0/dironedown
+TEST chown dev $M0/dironedown
+TEST chgrp QA $M0/dironedown
+
+#store the permissions for comparision
+permission_onedown=`ls -l $M0 | grep dironedown | awk '{print $1}'`
+
+#Now bring up the brick process
+TEST $CLI volume start $V0 force
+
+#The updation of directory attrs happens on the revalidate path. Hence, atmax on
+#2 lookups the update will happen.
+sleep 5
+TEST ls $M0/dironedown;
+
+#check directory that was created post brick going down
+TEST brick_perm=`ls -l $B0/${V0}2 | grep dironedown | awk '{print $1}'`
+TEST echo $brick_perm;
+TEST [ ${brick_perm} = ${permission_onedown} ]
+uid=`ls -l $B0/${V0}2 | grep dironedown | awk '{print $3}'`
+TEST echo $uid
+TEST [ $uid = dev ]
+gid=`ls -l $B0/${V0}2 | grep dironedown | awk '{print $4}'`
+TEST echo $gid
+TEST [ $gid = QA ]
+
+cleanup
diff --git a/tests/bugs/bug-1138841.t b/tests/bugs/bug-1138841.t
new file mode 100644
index 00000000000..abec5e89d56
--- /dev/null
+++ b/tests/bugs/bug-1138841.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a volume and set auth.allow using cidr format ip
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 auth.allow 127.0.0.1/20
+TEST $CLI volume start $V0
+
+
+## mount the volume and create a file on the mount point
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+TEST touch $M0/tmp1
+
+## Stop the volume and do the cleanup
+
+TEST $CLI volume stop $V0
+cleanup
diff --git a/tests/bugs/bug-1258069.t b/tests/bugs/bug-1258069.t
new file mode 100755
index 00000000000..b87ecbf2fe8
--- /dev/null
+++ b/tests/bugs/bug-1258069.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 cluster.choose-local off
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+TEST mount_nfs $H0:/$V0 $N0 nolock
+TEST mkdir -p $N0/a/b/c
+TEST umount_nfs $N0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+rmdir $M0/a/b/c
+mkdir $M0/a/b/c
+TEST mount_nfs $H0:/$V0/a/b/c $N0 nolock
+TEST umount_nfs $N0
+TEST umount $M0
+
+cleanup
diff --git a/tests/bugs/bug-1368312.t b/tests/bugs/bug-1368312.t
new file mode 100644
index 00000000000..c60d562bbd7
--- /dev/null
+++ b/tests/bugs/bug-1368312.t
@@ -0,0 +1,86 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+cleanup;
+
+function compare_get_split_brain_status {
+ local path=$1
+ local choice=$2
+ echo `getfattr -n replica.split-brain-status $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//' | grep $choice
+ if [ $? -ne 0 ]
+ then
+ echo 1
+ else
+ echo 0
+ fi
+
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume start $V0
+
+#Disable self-heal-daemon
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+TEST mkdir $M0/tmp1
+
+#Create metadata split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST chmod 666 $M0/tmp1
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}3
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST chmod 757 $M0/tmp1
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
+
+EXPECT 2 get_pending_heal_count $V0
+
+
+TEST kill_brick $V0 $H0 $B0/${V0}4
+TEST chmod 755 $M0/tmp1
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}5
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 4
+
+TEST chmod 766 $M0/tmp1
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 4
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 5
+
+EXPECT 4 get_pending_heal_count $V0
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST chmod 765 $M0/tmp1
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST chmod 756 $M0/tmp1
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+EXPECT 6 get_pending_heal_count $V0
+
+cd $M0
+EXPECT 0 compare_get_split_brain_status ./tmp1 patchy-client-0
+EXPECT 0 compare_get_split_brain_status ./tmp1 patchy-client-1
+EXPECT 0 compare_get_split_brain_status ./tmp1 patchy-client-2
+EXPECT 0 compare_get_split_brain_status ./tmp1 patchy-client-3
+EXPECT 0 compare_get_split_brain_status ./tmp1 patchy-client-4
+EXPECT 0 compare_get_split_brain_status ./tmp1 patchy-client-5
+
+cd -
+cleanup
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/bugs/bug-1371806.t b/tests/bugs/bug-1371806.t
new file mode 100644
index 00000000000..08180525650
--- /dev/null
+++ b/tests/bugs/bug-1371806.t
@@ -0,0 +1,81 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+cleanup;
+
+function get_getfattr {
+ local path=$1
+ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//'
+}
+
+function set_fattr {
+ for i in `seq 1 10`
+ do
+ setfattr -n user.foo -v "newabc" ./tmp${i}
+ if [ "$?" = "0" ]
+ then
+ succ=$((succ+1))
+ else
+ fail=$((fail+1))
+ fi
+ done
+}
+
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume start $V0
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "6" online_brick_count
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+TEST mkdir tmp{1..10}
+
+##First set user.foo xattr with value abc on all dirs
+
+TEST setfattr -n user.foo -v "abc" ./tmp{1..10}
+EXPECT "abc" get_getfattr ./tmp{1..10}
+EXPECT "abc" get_getfattr $B0/${V0}5/tmp{1..10}
+
+TEST kill_brick $V0 $H0 $B0/${V0}5
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "5" online_brick_count
+
+succ=fail=0
+## set user.foo xattr with value newabc after kill one brick
+set_fattr
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "6" online_brick_count
+
+cd -
+TEST umount $M0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+## At this point dht code will heal xattr on down brick only for those dirs
+## hashed subvol was up at the time of update xattr
+TEST stat ./tmp{1..10}
+
+## Count the user.foo xattr value with abc on mount point and compare with fail value
+count=`getfattr -n user.foo ./tmp{1..10} | grep "user.foo" | grep -iw "abc" | wc -l`
+EXPECT "$fail" echo $count
+
+## Count the user.foo xattr value with newabc on mount point and compare with succ value
+count=`getfattr -n user.foo ./tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l`
+EXPECT "$succ" echo $count
+
+## Count the user.foo xattr value with abc on brick and compare with succ value
+count=`getfattr -n user.foo $B0/${V0}5/tmp{1..10} | grep "user.foo" | grep -iw "abc" | wc -l`
+EXPECT "$fail" echo $count
+
+## Count the user.foo xattr value with newabc on brick and compare with succ value
+count=`getfattr -n user.foo $B0/${V0}5/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l`
+EXPECT "$succ" echo $count
+
+
+cd -
+cleanup
+exit
diff --git a/tests/bugs/bug-1371806_1.t b/tests/bugs/bug-1371806_1.t
new file mode 100644
index 00000000000..df19a8c1c2a
--- /dev/null
+++ b/tests/bugs/bug-1371806_1.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+cleanup;
+
+function get_getfattr {
+ local path=$1
+ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//'
+}
+
+function remove_mds_xattr {
+
+ for i in `seq 1 10`
+ do
+ setfattr -x trusted.glusterfs.dht.mds $1/tmp${i} 2> /dev/null
+ done
+}
+
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+cd $M0
+TEST mkdir tmp{1..10}
+
+##Remove internal mds xattr from all directory
+remove_mds_xattr $B0/${V0}0
+remove_mds_xattr $B0/${V0}1
+remove_mds_xattr $B0/${V0}2
+remove_mds_xattr $B0/${V0}3
+
+cd -
+umount $M0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+cd $M0
+
+TEST setfattr -n user.foo -v "abc" ./tmp{1..10}
+EXPECT "abc" get_getfattr ./tmp{1..10}
+
+cd -
+cleanup
diff --git a/tests/bugs/bug-1371806_2.t b/tests/bugs/bug-1371806_2.t
new file mode 100644
index 00000000000..e6aa8e7c1ad
--- /dev/null
+++ b/tests/bugs/bug-1371806_2.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+cleanup;
+
+function get_getfattr {
+ local path=$1
+ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//'
+}
+
+function remove_mds_xattr {
+
+ for i in `seq 1 10`
+ do
+ setfattr -x trusted.glusterfs.dht.mds $1/tmp${i} 2> /dev/null
+ done
+}
+
+
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 --attribute-timeout=0 $M0;
+cd $M0
+TEST mkdir tmp{1..10}
+
+##Remove internal mds xattr from all directory
+remove_mds_xattr $B0/${V0}0
+remove_mds_xattr $B0/${V0}1
+remove_mds_xattr $B0/${V0}2
+remove_mds_xattr $B0/${V0}3
+
+##First set user.foo xattr with value abc on all dirs
+
+TEST setfattr -n user.foo -v "abc" ./tmp{1..10}
+EXPECT "abc" get_getfattr ./tmp{1..10}
+EXPECT "abc" get_getfattr $B0/${V0}0/tmp{1..10}
+EXPECT "abc" get_getfattr $B0/${V0}1/tmp{1..10}
+EXPECT "abc" get_getfattr $B0/${V0}2/tmp{1..10}
+EXPECT "abc" get_getfattr $B0/${V0}3/tmp{1..10}
+
+cd -
+TEST umount $M0
+
+cd -
+cleanup
+exit
diff --git a/tests/bugs/bug-1371806_3.t b/tests/bugs/bug-1371806_3.t
new file mode 100644
index 00000000000..cb13f37c737
--- /dev/null
+++ b/tests/bugs/bug-1371806_3.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+cleanup;
+
+function get_getfattr {
+ local path=$1
+ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//'
+}
+
+function set_fattr {
+ for i in `seq 1 10`
+ do
+ setfattr -n user.foo -v "newabc" ./tmp${i}
+ if [ "$?" = "0" ]
+ then
+ succ=$((succ+1))
+ else
+ fail=$((fail+1))
+ fi
+ done
+}
+
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 --attribute-timeout=0 $M0;
+
+cd $M0
+TEST mkdir tmp{1..10}
+
+TEST kill_brick $V0 $H0 $B0/${V0}3
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "3" online_brick_count
+
+succ=fail=0
+## set user.foo xattr with value newabc after kill one brick
+set_fattr
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "4" online_brick_count
+
+cd -
+TEST umount $M0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 --attribute-timeout=0 $M0;
+
+cd $M0
+## At this point dht code will heal xattr on down brick only for those dirs
+## hashed subvol was up at the time of update xattr
+TEST stat ./tmp{1..10}
+
+
+## Count the user.foo xattr value with newabc on brick and compare with succ value
+count=`getfattr -n user.foo $B0/${V0}3/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l`
+EXPECT "$succ" echo $count
+
+
+cd -
+cleanup
+exit
diff --git a/tests/bugs/bug-1371806_acl.t b/tests/bugs/bug-1371806_acl.t
new file mode 100644
index 00000000000..c39165628cc
--- /dev/null
+++ b/tests/bugs/bug-1371806_acl.t
@@ -0,0 +1,96 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+TEST useradd tmpuser
+
+function set_facl_user {
+ for i in `seq 1 10`
+ do
+ setfacl -m u:tmpuser:rw ./tmp${i}
+ if [ "$?" = "0" ]
+ then
+ succ=$((succ+1))
+ else
+ fail=$((fail+1))
+ fi
+ done
+}
+
+function set_facl_default {
+ for i in `seq 1 10`
+ do
+ setfacl -m d:o:rw ./tmp${i}
+ if [ "$?" = "0" ]
+ then
+ succ1=$((succ1+1))
+ else
+ fail1=$((fail1+1))
+ fi
+ done
+}
+
+
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
+TEST $CLI volume start $V0
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "6" online_brick_count
+
+TEST glusterfs --volfile-id=$V0 --acl --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+TEST mkdir tmp{1..10}
+TEST setfacl -m u:tmpuser:rwx ./tmp{1..10}
+count=`getfacl -p $M0/tmp{1..10} | grep -c "user:tmpuser:rwx"`
+EXPECT "10" echo $count
+TEST setfacl -m d:o:rwx ./tmp{1..10}
+count=`getfacl -p $M0/tmp{1..10} | grep -c "default:other::rwx"`
+EXPECT "10" echo $count
+count=`getfacl -p $B0/${V0}5/tmp{1..10} | grep -c "user:tmpuser:rwx"`
+EXPECT "10" echo $count
+count=`getfacl -p $B0/${V0}5/tmp{1..10} | grep -c "default:other::rwx"`
+EXPECT "10" echo $count
+
+
+TEST kill_brick $V0 $H0 $B0/${V0}5
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "5" online_brick_count
+
+succ=fail=0
+## Update acl attributes on dir after kill one brick
+set_facl_user
+succ1=fail1=0
+set_facl_default
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "6" online_brick_count
+
+cd -
+TEST umount $M0
+TEST glusterfs --volfile-id=$V0 --acl --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+## At this point dht will heal xatts on down brick only for those hashed_subvol
+## was up at the time of updated xattrs
+TEST stat ./tmp{1..10}
+
+# Make sure to send a write and read on the file inside mount
+echo "helloworld" > ./tmp1/file
+TEST cat ./tmp1/file
+
+## Compare succ value with updated acl attributes
+count=`getfacl -p $B0/${V0}5/tmp{1..10} | grep -c "user:tmpuser:rw-"`
+EXPECT "$succ" echo $count
+
+
+count=`getfacl -p $B0/${V0}5/tmp{1..10} | grep -c "default:other::rw-"`
+EXPECT "$succ1" echo $count
+
+cd -
+userdel --force tmpuser
+
+cleanup
diff --git a/tests/bugs/bug-1584517.t b/tests/bugs/bug-1584517.t
new file mode 100644
index 00000000000..7f48015a034
--- /dev/null
+++ b/tests/bugs/bug-1584517.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+cleanup;
+#This test case verifies attributes (uid/gid/perm) for the
+#directory are healed after stop/start brick. To verify the same
+#test case change attributes of the directory after down a DHT subvolume
+#and one AFR children. After start the volume with force and run lookup
+#operation attributes should be healed on started bricks at the backend.
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume start $V0
+TEST useradd dev -M
+TEST groupadd QA
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST mkdir $M0/dironedown
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "5" online_brick_count
+
+TEST kill_brick $V0 $H0 $B0/${V0}3
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "4" online_brick_count
+
+TEST kill_brick $V0 $H0 $B0/${V0}4
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "3" online_brick_count
+
+TEST kill_brick $V0 $H0 $B0/${V0}5
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "2" online_brick_count
+
+TEST chown dev $M0/dironedown
+TEST chgrp QA $M0/dironedown
+TEST chmod 777 $M0/dironedown
+
+#store the permissions for comparision
+permission_onedown=`ls -l $M0 | grep dironedown | awk '{print $1}'`
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "6" online_brick_count
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#Run lookup two times to hit revalidate code path in dht
+# to heal user attr
+
+TEST ls $M0/dironedown
+
+#check attributes those were created post brick going down
+TEST brick_perm=`ls -l $B0/${V0}3 | grep dironedown | awk '{print $1}'`
+TEST echo $brick_perm
+TEST [ ${brick_perm} = ${permission_onedown} ]
+uid=`ls -l $B0/${V0}3 | grep dironedown | awk '{print $3}'`
+TEST echo $uid
+TEST [ $uid = dev ]
+gid=`ls -l $B0/${V0}3 | grep dironedown | awk '{print $4}'`
+TEST echo $gid
+TEST [ $gid = QA ]
+
+TEST umount $M0
+userdel --force dev
+groupdel QA
+
+cleanup
+exit
+
diff --git a/tests/bugs/bug-1620580.t b/tests/bugs/bug-1620580.t
new file mode 100644
index 00000000000..0c74d4a6089
--- /dev/null
+++ b/tests/bugs/bug-1620580.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+#do some operation on mount, so that kill_brick is guaranteed to be
+#done _after_ first lookup on root
+
+TEST ls $M0
+TEST touch $M0/file
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+# Case of Same volume name, but different bricks
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{3,4};
+TEST $CLI volume start $V0;
+
+# Give time for 'reconnect' to happen
+sleep 4
+
+TEST ! ls $M0
+TEST ! touch $M0/file1
+
+# Case of Same brick, but different volume (ie, recreated).
+TEST $CLI volume create $V1 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V1;
+
+# Give time for 'reconnect' to happen
+sleep 4
+TEST ! ls $M0
+TEST ! touch $M0/file2
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+TEST $CLI volume stop $V1
+TEST $CLI volume delete $V1
+
+# Case of Same brick, but different volume (but same volume name)
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+TEST $CLI volume start $V0;
+
+# Give time for 'reconnect' to happen
+sleep 4
+TEST ! ls $M0
+TEST ! touch $M0/file3
+
+
+cleanup
diff --git a/tests/bugs/bug-1694920.t b/tests/bugs/bug-1694920.t
new file mode 100644
index 00000000000..5bf93c92f94
--- /dev/null
+++ b/tests/bugs/bug-1694920.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=300
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fileio.rc
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0};
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id=$V0 $M0;
+
+TEST touch $M0/a
+
+#When all bricks are up, lock and unlock should succeed
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST fd_close $fd1
+
+#When all bricks are down, lock/unlock should fail
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST $CLI volume stop $V0
+TEST ! flock -x $fd1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST fd_close $fd1
+
+#When a brick goes down and comes back up operations on fd which had locks on it should succeed by default
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST $CLI volume stop $V0
+sleep 2
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST fd_write $fd1 "data"
+TEST fd_close $fd1
+
+#When a brick goes down and comes back up operations on fd which had locks on it should fail when client.strict-locks is on
+TEST $CLI volume set $V0 client.strict-locks on
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' $M0/a
+TEST flock -x $fd1
+TEST $CLI volume stop $V0
+sleep 2
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" client_connected_status_meta $M0 $V0-client-0
+TEST ! fd_write $fd1 "data"
+TEST fd_close $fd1
+
+cleanup
diff --git a/tests/bugs/bug-1702299.t b/tests/bugs/bug-1702299.t
new file mode 100644
index 00000000000..1cff2ed5d3d
--- /dev/null
+++ b/tests/bugs/bug-1702299.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+cleanup;
+
+function get_getfattr {
+ local path=$1
+ echo `getfattr -n user.foo $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//'
+}
+
+function set_fattr {
+ for i in `seq 1 10`
+ do
+ setfattr -n user.foo -v "newabc" ./tmp${i}
+ if [ "$?" = "0" ]
+ then
+ succ=$((succ+1))
+ else
+ fail=$((fail+1))
+ fi
+ done
+}
+
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 --attribute-timeout=0 $M0;
+
+cd $M0
+TEST mkdir tmp{1..10}
+
+succ=fail=0
+## set user.foo xattr with value newabc after kill one brick
+set_fattr
+count=10
+EXPECT "$succ" echo $count
+count=0
+EXPECT "$fail" echo $count
+
+cd -
+
+# Add-brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{4,5}
+
+cd $M0
+## At this point dht code will heal xattr on down brick only for those dirs
+## hashed subvol was up at the time of update xattr
+TEST stat ./tmp{1..10}
+
+
+## Count the user.foo xattr value with newabc on brick and compare with succ value
+count=`getfattr -n user.foo $B0/${V0}4/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l`
+EXPECT "$succ" echo $count
+
+## Count the user.foo xattr value with newabc on brick and compare with succ value
+count=`getfattr -n user.foo $B0/${V0}5/tmp{1..10} | grep "user.foo" | grep -iw "newabc" | wc -l`
+EXPECT "$succ" echo $count
+
+
+cd -
+TEST umount $M0
+cleanup
diff --git a/tests/bugs/changelog/bug-1208470.t b/tests/bugs/changelog/bug-1208470.t
new file mode 100755
index 00000000000..526f8f20612
--- /dev/null
+++ b/tests/bugs/changelog/bug-1208470.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+## Testcase:
+## Avoid creating any EMPTY changelog(over the changelog rollover time)
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../changelog.rc
+cleanup;
+
+## override current changelog rollover-time
+## to avoid sleeping for long duration.
+CL_RO_TIME=5
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 $H0:$B0/$V0"1";
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Set changelog ON
+TEST $CLI volume set $V0 changelog.changelog on;
+
+EXPECT 1 count_changelog_files $B0/${V0}1
+
+## Set changelog rollover time
+TEST $CLI volume set $V0 changelog.rollover-time $CL_RO_TIME;
+
+## Wait for changelog rollover time
+sleep $CL_RO_TIME
+
+## NO additional empty changelogs created
+EXPECT 1 count_changelog_files $B0/${V0}1
diff --git a/tests/bugs/changelog/bug-1211327.t b/tests/bugs/changelog/bug-1211327.t
new file mode 100644
index 00000000000..a849ec3981f
--- /dev/null
+++ b/tests/bugs/changelog/bug-1211327.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+#Testcase:
+#On brick restart, new HTIME.TSTAMP file should not be created.
+#But on changelog disable/enable HTIME.TSTAMP should be created.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../changelog.rc
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 $H0:$B0/$V0"1";
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 changelog.changelog on;
+##Let changelog init complete before killing gluster processes
+sleep 1
+
+TEST killall_gluster;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "0" online_brick_count
+
+TEST glusterd;
+TEST pidof glusterd;
+
+##Let the brick processes starts
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+##On brick restart only one HTIME should be found.
+EXPECT 1 count_htime_files;
+
+##On changelog disable/enable, new HTIME should be created.
+TEST $CLI volume set $V0 changelog.changelog off;
+TEST $CLI volume set $V0 changelog.changelog on;
+EXPECT 2 count_htime_files;
+
+cleanup;
diff --git a/tests/bugs/changelog/bug-1225542.t b/tests/bugs/changelog/bug-1225542.t
new file mode 100644
index 00000000000..a646aa88014
--- /dev/null
+++ b/tests/bugs/changelog/bug-1225542.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+#Testcase:
+#On snapshot, notify changelog reconfigure upon explicit rollover
+#irrespective of any failures and send error back to barrier if any.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+## Create a volume
+TEST $CLI volume create $V0 $H0:$L1
+
+## Start volume and verify
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 changelog.changelog on
+##Wait for changelog init to complete.
+sleep 1
+
+## Take snapshot
+TEST $CLI snapshot create snap1 $V0
+
+cleanup;
diff --git a/tests/bugs/changelog/bug-1321955.t b/tests/bugs/changelog/bug-1321955.t
new file mode 100644
index 00000000000..9e3752b1728
--- /dev/null
+++ b/tests/bugs/changelog/bug-1321955.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+#This file checks if missing entry self-heal and entry self-heal are working
+#as expected.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 changelog.changelog enable
+TEST $CLI volume set $V0 changelog.capture-del-path on
+TEST $CLI volume start $V0
+
+#Mount the volume
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+#Create files
+TEST touch $M0/file1
+TEST mkdir $M0/dir1
+TEST touch $M0/dir1/file1
+
+#Check for presence of files
+TEST stat $B0/${V0}0/dir1/file1
+TEST stat $B0/${V0}1/dir1/file1
+TEST stat $B0/${V0}0/file1
+TEST stat $B0/${V0}1/file1
+
+#Kill brick1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+#Del dir1/file1
+TEST rm -f $M0/dir1/file1
+
+#file1 should be present in brick1 and not in brick0
+TEST ! stat $B0/${V0}0/dir1/file1
+TEST stat $B0/${V0}1/dir1/file1
+
+#Bring up the brick which is down
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+#Initiate heal
+TEST $CLI volume heal $V0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#dir1/file1 in brick1 should be deleted
+TEST ! stat $B0/${V0}1/dir1/file1
+
+#file1 under root should not be deleted in brick1
+TEST stat $B0/${V0}1/file1
+
+cleanup;
diff --git a/tests/bugs/cli/bug-1004218.t b/tests/bugs/cli/bug-1004218.t
new file mode 100644
index 00000000000..ab8307d6405
--- /dev/null
+++ b/tests/bugs/cli/bug-1004218.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+# Test if only a single xml document is generated by 'status all'
+# when a volume is not started
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create ${V0}1 $H0:$B0/${V0}1{1,2}
+TEST $CLI volume create ${V0}2 $H0:$B0/${V0}2{1,2}
+
+TEST $CLI volume start ${V0}1
+
+function test_status_all ()
+{
+ $CLI volume status all --xml | xmllint -format -
+}
+
+TEST test_status_all
+
+TEST $CLI volume stop ${V0}1
+
+cleanup
diff --git a/tests/bugs/cli/bug-1022905.t b/tests/bugs/cli/bug-1022905.t
new file mode 100644
index 00000000000..ee629e970d9
--- /dev/null
+++ b/tests/bugs/cli/bug-1022905.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Create a volume
+TEST glusterd;
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1};
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Volume start
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Enable a protected and a resettable/unprotected option
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
+
+## Reset cmd resets only unprotected option(s), succeeds.
+TEST $CLI volume reset $V0;
+
+## Set an unprotected option
+TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
+
+## Now 1 protected and 1 unprotected options are set
+## Reset force should succeed
+TEST $CLI volume reset $V0 force;
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/cli/bug-1030580.t b/tests/bugs/cli/bug-1030580.t
new file mode 100644
index 00000000000..ac8b1d8f6db
--- /dev/null
+++ b/tests/bugs/cli/bug-1030580.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function write_to_file {
+ dd of=$M0/1 if=/dev/zero bs=1024k count=128 oflag=append 2>&1 >/dev/null
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+# Increasing the json stats dump time interval, so that it doesn't mess with the test.
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 3600
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+# Clear the profile info uptill now.
+TEST $CLI volume profile $V0 info clear
+
+# Verify 'volume profile info' prints both cumulative and incremental stats
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info)
+EXPECT 2 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 0 '
+
+# Verify 'volume profile info incremental' prints incremental stats only
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info incremental)
+EXPECT 0 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 1 '
+
+# Verify 'volume profile info cumulative' prints cumulative stats only
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info cumulative)
+EXPECT 2 cumulative_stat_count "$output"
+EXPECT 0 incremental_stat_count "$output" '.*'
+
+# Verify the 'volume profile info cumulative' command above didn't alter
+# the interval id
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info incremental)
+EXPECT 0 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 2 '
+
+cleanup;
diff --git a/tests/bugs/cli/bug-1047378.t b/tests/bugs/cli/bug-1047378.t
new file mode 100644
index 00000000000..33ee6be3d8b
--- /dev/null
+++ b/tests/bugs/cli/bug-1047378.t
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST "echo volume list | $CLI --xml | xmllint --format -"
+
+cleanup
diff --git a/tests/bugs/cli/bug-1047416.t b/tests/bugs/cli/bug-1047416.t
new file mode 100644
index 00000000000..864301034c9
--- /dev/null
+++ b/tests/bugs/cli/bug-1047416.t
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function write_to_file {
+ dd of=$M0/1 if=/dev/zero bs=1024k count=128 oflag=append 2>&1 >/dev/null
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+# Increasing the json stats dump time interval, so that it doesn't mess with the test.
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 3600
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+# Clear the profile info uptill now.
+TEST $CLI volume profile $V0 info clear
+
+# Verify 'volume profile info' prints both cumulative and incremental stats
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info)
+EXPECT 2 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 0 '
+
+# Verify 'volume profile info peek' prints both cumulative and incremental stats
+# without clearing incremental stats
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info peek)
+EXPECT 2 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 1 '
+
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info peek)
+EXPECT 2 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 1 '
+
+# Verify 'volume profile info incremental peek' prints incremental stats only
+# without clearing incremental stats
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info incremental peek)
+EXPECT 0 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 1 '
+
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info incremental peek)
+EXPECT 0 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 1 '
+
+# Verify 'volume profile info clear' clears both incremental and cumulative stats
+write_to_file &
+wait
+output=$($CLI volume profile $V0 info clear)
+EXPECT 2 cleared_stat_count "$output"
+
+output=$($CLI volume profile $V0 info)
+EXPECT 2 cumulative_stat_count "$output"
+EXPECT 2 incremental_stat_count "$output" ' 0 '
+EXPECT 4 data_read_count "$output" ' 0 '
+EXPECT 4 data_written_count "$output" ' 0 '
+
+cleanup;
diff --git a/tests/bugs/cli/bug-1077682.t b/tests/bugs/cli/bug-1077682.t
new file mode 100644
index 00000000000..eab5d86d04b
--- /dev/null
+++ b/tests/bugs/cli/bug-1077682.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3,4}
+TEST $CLI volume start $V0
+TEST ! $CLI volume remove-brick $V0 $H0:$B0/${V0}1
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}3 start
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" \
+"$H0:$B0/${V0}3"
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}3 commit
+TEST killall glusterd
+TEST glusterd
+
+cleanup
diff --git a/tests/bugs/cli/bug-1087487.t b/tests/bugs/cli/bug-1087487.t
new file mode 100755
index 00000000000..0659ffab684
--- /dev/null
+++ b/tests/bugs/cli/bug-1087487.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function rebalance_start {
+ $CLI volume rebalance $1 start | head -1;
+}
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+
+TEST $CLI volume start $V0
+
+EXPECT "volume rebalance: $V0: success: Rebalance on $V0 has \
+been started successfully. Use rebalance status command to \
+check status of the rebalance process." rebalance_start $V0
+
+cleanup;
diff --git a/tests/bugs/cli/bug-1113476.t b/tests/bugs/cli/bug-1113476.t
new file mode 100644
index 00000000000..fc7dbca537d
--- /dev/null
+++ b/tests/bugs/cli/bug-1113476.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+function volinfo_validate ()
+{
+ local var=$1
+ $CLI volume info $V0 | grep "^$var" | sed 's/.*: //'
+}
+
+cleanup;
+
+TEST verify_lvm_version
+TEST glusterd
+TEST pidof glusterd
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+
+EXPECT '' volinfo_validate 'snap-max-hard-limit'
+EXPECT '' volinfo_validate 'snap-max-soft-limit'
+EXPECT '' volinfo_validate 'auto-delete'
+
+TEST $CLI snapshot config snap-max-hard-limit 100
+TEST $CLI snapshot config $V0 snap-max-hard-limit 50
+EXPECT '' volinfo_validate 'snap-max-hard-limit'
+EXPECT '' volinfo_validate 'snap-max-soft-limit'
+EXPECT '' volinfo_validate 'auto-delete'
+
+TEST $CLI snapshot config snap-max-soft-limit 50
+EXPECT '' volinfo_validate 'snap-max-hard-limit'
+EXPECT '' volinfo_validate 'snap-max-soft-limit'
+EXPECT '' volinfo_validate 'auto-delete'
+
+TEST $CLI snapshot config auto-delete enable
+EXPECT '' volinfo_validate 'snap-max-hard-limit'
+EXPECT '' volinfo_validate 'snap-max-soft-limit'
+EXPECT 'enable' volinfo_validate 'auto-delete'
+
+cleanup;
+
+
diff --git a/tests/bugs/cli/bug-1169302.c b/tests/bugs/cli/bug-1169302.c
new file mode 100644
index 00000000000..7c6b5fbf856
--- /dev/null
+++ b/tests/bugs/cli/bug-1169302.c
@@ -0,0 +1,79 @@
+#include <errno.h>
+#include <stdio.h>
+#include <signal.h>
+
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int keep_running = 1;
+
+void
+stop_running(int sig)
+{
+ if (sig == SIGTERM)
+ keep_running = 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ glfs_fd_t *fd = NULL;
+ char *filename = NULL;
+ char *logfile = NULL;
+ char *host = NULL;
+
+ if (argc != 5) {
+ return -1;
+ }
+
+ host = argv[2];
+ logfile = argv[3];
+ filename = argv[4];
+
+ /* setup signal handler for exiting */
+ signal(SIGTERM, stop_running);
+
+ fs = glfs_new(argv[1]);
+ if (!fs) {
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", host, 24007);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ return -1;
+ }
+
+ fd = glfs_creat(fs, filename, O_RDWR, 0644);
+ if (!fd) {
+ return -1;
+ }
+
+ /* sleep until SIGTERM has been received */
+ while (keep_running) {
+ sleep(1);
+ }
+
+ ret = glfs_close(fd);
+ if (ret < 0) {
+ return -1;
+ }
+
+ ret = glfs_fini(fs);
+ if (ret < 0) {
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/tests/bugs/cli/bug-1169302.t b/tests/bugs/cli/bug-1169302.t
new file mode 100755
index 00000000000..19660e033a8
--- /dev/null
+++ b/tests/bugs/cli/bug-1169302.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+cleanup
+
+#setup cluster and test volume
+TEST launch_cluster 3; # start 3-node virtual cluster
+TEST $CLI_1 peer probe $H2; # peer probe server 2 from server 1 cli
+TEST $CLI_1 peer probe $H3; # peer probe server 3 from server 1 cli
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers;
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0
+TEST $CLI_1 volume start $V0
+
+# test CLI parameter acceptance
+TEST $CLI_1 volume statedump $V0
+TEST $CLI_2 volume statedump $V0
+TEST $CLI_3 volume statedump $V0
+TEST ! $CLI_1 volume statedump $V0 client $H2:0
+TEST ! $CLI_2 volume statedump $V0 client $H2:-1
+TEST $CLI_3 volume statedump $V0 client $H2:765
+TEST ! $CLI_1 volume statedump $V0 client $H2:
+TEST ! $CLI_2 volume statedump $V0 client
+TEST ! $CLI_3 volume statedump $V0 client $H2 $GFAPI_PID
+
+# build and run a gfapi appliction for triggering a statedump
+logdir=`gluster --print-logdir`
+STATEDUMP_TIMEOUT=60
+
+build_tester $(dirname $0)/bug-1169302.c -lgfapi
+$(dirname $0)/bug-1169302 $V0 $H1 $logdir/bug-1169302.log testfile & GFAPI_PID=$!
+
+cleanup_statedump
+
+# Take the statedump of the process connected to $H1, it should match the
+# hostname or IP-address with the connection from the bug-1169302 executable.
+# In our CI it seems not possible to use $H0, 'localhost', $(hostname --fqdn)
+# or even "127.0.0.1"....
+sleep 2
+host=`netstat -nap | grep $GFAPI_PID | grep 24007 | awk '{print $4}' | cut -d: -f1`
+TEST $CLI_3 volume statedump $V0 client $host:$GFAPI_PID
+EXPECT_WITHIN $STATEDUMP_TIMEOUT "Y" path_exists $statedumpdir/glusterdump.$GFAPI_PID*
+
+kill $GFAPI_PID
+
+cleanup_statedump
+cleanup_tester $(dirname $0)/bug-1169302
+cleanup \ No newline at end of file
diff --git a/tests/bugs/cli/bug-1320388.t b/tests/bugs/cli/bug-1320388.t
new file mode 100755
index 00000000000..e719fc59033
--- /dev/null
+++ b/tests/bugs/cli/bug-1320388.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test enables management ssl and then test the
+# heal info command.
+
+for d in /etc/ssl /etc/openssl /usr/local/etc/openssl ; do
+ if test -d $d ; then
+ SSL_BASE=$d
+ break
+ fi
+done
+
+SSL_KEY=$SSL_BASE/glusterfs.key
+SSL_CERT=$SSL_BASE/glusterfs.pem
+SSL_CA=$SSL_BASE/glusterfs.ca
+
+cleanup;
+rm -f $SSL_BASE/glusterfs.*
+touch "$GLUSTERD_WORKDIR"/secure-access
+
+TEST openssl genrsa -out $SSL_KEY 2048
+TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+ln $SSL_CERT $SSL_CA
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume set $V0 disperse.eager-lock off
+TEST $CLI volume set $V0 disperse.other-eager-lock off
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^6$" ec_child_up_count $V0 0
+touch $M0/a
+TEST kill_brick $V0 $H0 $B0/${V0}5
+echo abc > $M0/a
+EXPECT_WITHIN $HEAL_TIMEOUT "^5$" get_pending_heal_count $V0 #One for each active brick
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^6$" ec_child_up_count $V0 0
+TEST gluster volume heal $V0 info
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0 #One for each active brick
+cleanup;
diff --git a/tests/bugs/cli/bug-1353156-get-state-cli-validations.t b/tests/bugs/cli/bug-1353156-get-state-cli-validations.t
new file mode 100644
index 00000000000..a4556c9c997
--- /dev/null
+++ b/tests/bugs/cli/bug-1353156-get-state-cli-validations.t
@@ -0,0 +1,147 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../traps.rc
+
+cleanup;
+
+ODIR="/var/tmp/gdstates/"
+NOEXDIR="/var/tmp/gdstatesfoo/"
+
+function get_daemon_not_supported_part {
+ echo $1
+}
+
+function get_usage_part {
+ echo $7
+}
+
+function get_directory_doesnt_exist_part {
+ echo $1
+}
+
+function get_parsing_arguments_part {
+ echo $1
+}
+
+function positive_test {
+ local text=$("$@")
+ echo $text > /dev/stderr
+ (echo -n $text | grep -qs ' state dumped to ') || return 1
+ local opath=$(echo -n $text | awk '{print $5}')
+ [ -r $opath ] || return 1
+ rm -f $opath
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST mkdir -p $ODIR
+
+push_trapfunc rm -rf $ODIR
+
+TEST $CLI volume create $V0 disperse $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+TEST $CLI volume start $V0
+
+TEST setup_lvm 1
+TEST $CLI volume create $V1 $H0:$L1;
+TEST $CLI volume start $V1
+
+TEST $CLI snapshot create ${V1}_snap $V1
+
+TEST positive_test $CLI get-state
+
+TEST positive_test $CLI get-state glusterd
+
+TEST ! $CLI get-state glusterfsd;
+ERRSTR=$($CLI get-state glusterfsd 2>&1 >/dev/null);
+EXPECT 'glusterd' get_daemon_not_supported_part $ERRSTR;
+EXPECT 'Usage:' get_usage_part $ERRSTR;
+
+TEST positive_test $CLI get-state file gdstate
+
+TEST positive_test $CLI get-state glusterd file gdstate
+
+TEST ! $CLI get-state glusterfsd file gdstate;
+ERRSTR=$($CLI get-state glusterfsd file gdstate 2>&1 >/dev/null);
+EXPECT 'glusterd' get_daemon_not_supported_part $ERRSTR;
+EXPECT 'Usage:' get_usage_part $ERRSTR;
+
+TEST positive_test $CLI get-state odir $ODIR
+
+TEST positive_test $CLI get-state glusterd odir $ODIR
+
+TEST positive_test $CLI get-state odir $ODIR file gdstate
+
+TEST positive_test $CLI get-state glusterd odir $ODIR file gdstate
+
+TEST positive_test $CLI get-state detail
+
+TEST positive_test $CLI get-state glusterd detail
+
+TEST positive_test $CLI get-state odir $ODIR detail
+
+TEST positive_test $CLI get-state glusterd odir $ODIR detail
+
+TEST positive_test $CLI get-state glusterd odir $ODIR file gdstate detail
+
+TEST positive_test $CLI get-state volumeoptions
+
+TEST positive_test $CLI get-state glusterd volumeoptions
+
+TEST positive_test $CLI get-state odir $ODIR volumeoptions
+
+TEST positive_test $CLI get-state glusterd odir $ODIR volumeoptions
+
+TEST positive_test $CLI get-state glusterd odir $ODIR file gdstate volumeoptions
+
+TEST ! $CLI get-state glusterfsd odir $ODIR;
+ERRSTR=$($CLI get-state glusterfsd odir $ODIR 2>&1 >/dev/null);
+EXPECT 'glusterd' get_daemon_not_supported_part $ERRSTR;
+EXPECT 'Usage:' get_usage_part $ERRSTR;
+
+TEST ! $CLI get-state glusterfsd odir $ODIR file gdstate;
+ERRSTR=$($CLI get-state glusterfsd odir $ODIR file gdstate 2>&1 >/dev/null);
+EXPECT 'glusterd' get_daemon_not_supported_part $ERRSTR;
+EXPECT 'Usage:' get_usage_part $ERRSTR;
+
+TEST ! $CLI get-state glusterfsd odir $NOEXDIR file gdstate;
+ERRSTR=$($CLI get-state glusterfsd odir $NOEXDIR file gdstate 2>&1 >/dev/null);
+EXPECT 'glusterd' get_daemon_not_supported_part $ERRSTR;
+EXPECT 'Usage:' get_usage_part $ERRSTR;
+
+TEST ! $CLI get-state odir $NOEXDIR;
+ERRSTR=$($CLI get-state odir $NOEXDIR 2>&1 >/dev/null);
+EXPECT 'Failed' get_directory_doesnt_exist_part $ERRSTR;
+
+TEST ! $CLI get-state odir $NOEXDIR file gdstate;
+ERRSTR=$($CLI get-state odir $NOEXDIR 2>&1 >/dev/null);
+EXPECT 'Failed' get_directory_doesnt_exist_part $ERRSTR;
+
+TEST ! $CLI get-state foo bar;
+ERRSTR=$($CLI get-state foo bar 2>&1 >/dev/null);
+EXPECT 'glusterd' get_daemon_not_supported_part $ERRSTR;
+EXPECT 'Usage:' get_usage_part $ERRSTR;
+
+TEST ! $CLI get-state glusterd foo bar;
+ERRSTR=$($CLI get-state glusterd foo bar 2>&1 >/dev/null);
+EXPECT 'Problem' get_parsing_arguments_part $ERRSTR;
+
+TEST ! $CLI get-state glusterd detail file gdstate;
+ERRSTR=$($CLI get-state glusterd foo bar 2>&1 >/dev/null);
+EXPECT 'Problem' get_parsing_arguments_part $ERRSTR;
+
+TEST ! $CLI get-state glusterd foo bar detail;
+ERRSTR=$($CLI get-state glusterd foo bar 2>&1 >/dev/null);
+EXPECT 'Problem' get_parsing_arguments_part $ERRSTR;
+
+TEST ! $CLI get-state glusterd volumeoptions file gdstate;
+ERRSTR=$($CLI get-state glusterd foo bar 2>&1 >/dev/null);
+EXPECT 'Problem' get_parsing_arguments_part $ERRSTR;
+
+TEST ! $CLI get-state glusterd foo bar volumeoptions;
+ERRSTR=$($CLI get-state glusterd foo bar 2>&1 >/dev/null);
+EXPECT 'Problem' get_parsing_arguments_part $ERRSTR;
+
+cleanup;
diff --git a/tests/bugs/cli/bug-1378842-volume-get-all.t b/tests/bugs/cli/bug-1378842-volume-get-all.t
new file mode 100644
index 00000000000..be41f25b000
--- /dev/null
+++ b/tests/bugs/cli/bug-1378842-volume-get-all.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume set all server-quorum-ratio 80
+
+# Execute volume get without having an explicit option, this should fail
+TEST ! $CLI volume get all
+
+# Execute volume get with an explicit global option
+TEST $CLI volume get all server-quorum-ratio
+EXPECT '80' volume_get_field all 'cluster.server-quorum-ratio'
+
+# Execute volume get with 'all'
+TEST $CLI volume get all all
+
+cleanup;
+
diff --git a/tests/bugs/cli/bug-764638.t b/tests/bugs/cli/bug-764638.t
new file mode 100644
index 00000000000..ffc613409d6
--- /dev/null
+++ b/tests/bugs/cli/bug-764638.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI pool list;
+TEST $CLI pool list --xml;
+
+cleanup;
diff --git a/tests/bugs/cli/bug-822830.t b/tests/bugs/cli/bug-822830.t
new file mode 100755
index 00000000000..a9904854110
--- /dev/null
+++ b/tests/bugs/cli/bug-822830.t
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting nfs.rpc-auth-reject as 192.{}.1.2
+TEST ! $CLI volume set $V0 nfs.rpc-auth-reject 192.{}.1.2
+EXPECT '' volinfo_field $V0 'nfs.rpc-auth-reject';
+
+# Setting nfs.rpc-auth-allow as a.a.
+TEST ! $CLI volume set $V0 nfs.rpc-auth-allow a.a.
+EXPECT '' volinfo_field $V0 'nfs.rpc-auth-allow';
+
+## Setting nfs.rpc-auth-reject as 192.*..*
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 192.*..*
+EXPECT '192.*..*' volinfo_field $V0 'nfs.rpc-auth-reject';
+
+# Setting nfs.rpc-auth-allow as a.a
+TEST $CLI volume set $V0 nfs.rpc-auth-allow a.a
+EXPECT 'a.a' volinfo_field $V0 'nfs.rpc-auth-allow';
+
+# Setting nfs.rpc-auth-allow as *.redhat.com
+TEST $CLI volume set $V0 nfs.rpc-auth-allow *.redhat.com
+EXPECT '\*.redhat.com' volinfo_field $V0 'nfs.rpc-auth-allow';
+
+# Setting nfs.rpc-auth-allow as 192.168.10.[1-5]
+TEST $CLI volume set $V0 nfs.rpc-auth-allow 192.168.10.[1-5]
+EXPECT '192.168.10.\[1-5]' volinfo_field $V0 'nfs.rpc-auth-allow';
+
+# Setting nfs.rpc-auth-allow as 192.168.70.?
+TEST $CLI volume set $V0 nfs.rpc-auth-allow 192.168.70.?
+EXPECT '192.168.70.?' volinfo_field $V0 'nfs.rpc-auth-allow';
+
+# Setting nfs.rpc-auth-reject as 192.168.10.5/16
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 192.168.10.5/16
+EXPECT '192.168.10.5/16' volinfo_field $V0 'nfs.rpc-auth-reject';
+
+## Setting nfs.rpc-auth-reject as 192.*.*
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 192.*.*
+EXPECT '192.*.*' volinfo_field $V0 'nfs.rpc-auth-reject';
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/cli/bug-867252.t b/tests/bugs/cli/bug-867252.t
new file mode 100644
index 00000000000..ccc33d82a0f
--- /dev/null
+++ b/tests/bugs/cli/bug-867252.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+function brick_count()
+{
+ local vol=$1;
+
+ $CLI volume info $vol | egrep "^Brick[0-9]+: " | wc -l;
+}
+
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '1' brick_count $V0
+
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}2;
+EXPECT '2' brick_count $V0
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force;
+EXPECT '1' brick_count $V0
+
+cleanup;
diff --git a/tests/bugs/cli/bug-921215.t b/tests/bugs/cli/bug-921215.t
new file mode 100755
index 00000000000..02532562cff
--- /dev/null
+++ b/tests/bugs/cli/bug-921215.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# This is test case for bug no 921215 "Can not create volume with a . in the name"
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST ! $CLI volume create $V0.temp replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+
+cleanup;
diff --git a/tests/bugs/cli/bug-949298.t b/tests/bugs/cli/bug-949298.t
new file mode 100644
index 00000000000..e0692f0c157
--- /dev/null
+++ b/tests/bugs/cli/bug-949298.t
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI --xml volume info $V0
+
+cleanup;
diff --git a/tests/bugs/cli/bug-961307.t b/tests/bugs/cli/bug-961307.t
new file mode 100644
index 00000000000..602a6e34bce
--- /dev/null
+++ b/tests/bugs/cli/bug-961307.t
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+REPLICA=2
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica $REPLICA $H0:$B0/${V0}-00 $H0:$B0/${V0}-01 $H0:$B0/${V0}-10 $H0:$B0/${V0}-11
+TEST $CLI volume start $V0
+
+var1=$($CLI volume remove-brick $H0:$B0/${V0}-00 $H0:$B0/${V0}-01 start 2>&1)
+var2="volume remove-brick start: failed: Volume $H0:$B0/${V0}-00 does not exist"
+
+EXPECT "$var2" echo "$var1"
+cleanup;
diff --git a/tests/bugs/cli/bug-969193.t b/tests/bugs/cli/bug-969193.t
new file mode 100755
index 00000000000..dd6d7cdf100
--- /dev/null
+++ b/tests/bugs/cli/bug-969193.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# Test that "system getspec" works without op_version problems.
+
+. $(dirname $0)/../../include.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+TEST $CLI volume create $V0 $H0:$B0/brick1
+TEST $CLI system getspec $V0
+cleanup;
diff --git a/tests/bugs/cli/bug-977246.t b/tests/bugs/cli/bug-977246.t
new file mode 100644
index 00000000000..bb8d6328e8b
--- /dev/null
+++ b/tests/bugs/cli/bug-977246.t
@@ -0,0 +1,21 @@
+#! /bin/bash
+
+# This test checks if address validation, correctly catches hostnames
+# with consective dots, such as 'example..org', as invalid
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+TEST $CLI volume info $V0
+TEST $CLI volume start $V0
+
+TEST ! $CLI volume set $V0 auth.allow example..org
+
+TEST $CLI volume stop $V0
+
+cleanup;
diff --git a/tests/bugs/cli/bug-982174.t b/tests/bugs/cli/bug-982174.t
new file mode 100644
index 00000000000..067e5b97c17
--- /dev/null
+++ b/tests/bugs/cli/bug-982174.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+# Test to check
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#Check if incorrect log-level keywords does not crash the CLI
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2
+TEST $CLI volume start $V0
+
+function set_log_level_status {
+ local level=$1
+ $CLI volume set $V0 diagnostics.client-log-level $level 2>&1 |grep -oE 'success|failed'
+}
+
+
+LOG_LEVEL="trace"
+EXPECT "failed" set_log_level_status $LOG_LEVEL
+
+
+LOG_LEVEL="error-gen"
+EXPECT "failed" set_log_level_status $LOG_LEVEL
+
+
+LOG_LEVEL="TRACE"
+EXPECT "success" set_log_level_status $LOG_LEVEL
+
+EXPECT "$LOG_LEVEL" echo `$CLI volume info | grep diagnostics | awk '{print $2}'`
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/cli/bug-983317-volume-get.t b/tests/bugs/cli/bug-983317-volume-get.t
new file mode 100644
index 00000000000..c793bbc9f0c
--- /dev/null
+++ b/tests/bugs/cli/bug-983317-volume-get.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Set a volume option
+TEST $CLI volume set $V0 open-behind on
+TEST $CLI volume start $V0
+
+TEST $CLI volume set all server-quorum-ratio 80
+
+TEST $CLI volume set $V0 user.metadata 'dummy'
+
+# Execute volume get without having an explicit option, this should fail
+TEST ! $CLI volume get $V0
+
+# Execute volume get with an explicit option
+TEST $CLI volume get $V0 open-behind
+EXPECT 'on' volume_get_field $V0 'open-behind'
+
+# Execute volume get with 'all"
+TEST $CLI volume get $V0 all
+
+# Check if volume get can display correct global options values as well
+EXPECT '80' volume_get_field $V0 'server-quorum-ratio'
+
+# Check user.* options can also be retrived using volume get
+EXPECT 'dummy' volume_get_field $V0 'user.metadata'
+
+TEST $CLI volume set all brick-multiplex enable
+EXPECT 'enable' volume_get_field $V0 'brick-multiplex'
+
+TEST $CLI volume set all brick-multiplex disable
+EXPECT 'disable' volume_get_field $V0 'brick-multiplex'
+
+#setting an cluster level option for single volume should fail
+TEST ! $CLI volume set $V0 brick-multiplex enable
+
diff --git a/tests/bugs/core/949327.t b/tests/bugs/core/949327.t
new file mode 100644
index 00000000000..6b8033a5c85
--- /dev/null
+++ b/tests/bugs/core/949327.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function tmp_file_count()
+{
+ echo $(ls -lh /tmp/tmp.* 2>/dev/null | wc -l)
+}
+
+
+old_count=$(tmp_file_count);
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+new_count=$(tmp_file_count);
+
+TEST [ "$old_count" -eq "$new_count" ]
+
+cleanup
diff --git a/tests/bugs/core/brick-mux-fd-cleanup.t b/tests/bugs/core/brick-mux-fd-cleanup.t
new file mode 100644
index 00000000000..de11c177b8a
--- /dev/null
+++ b/tests/bugs/core/brick-mux-fd-cleanup.t
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This .t tests that the fds from client are closed on brick when gluster volume
+#stop is executed in brick-mux setup.
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+function keep_fd_open {
+#This function has to be run as background job because opening the fd in
+#foreground and running commands is leading to flush calls on these fds
+#which is making it very difficult to create the race where fds will be left
+#open even after the brick dies.
+ exec 5>$M1/a
+ exec 6>$M1/b
+ while [ -f $M0/a ]; do sleep 1; done
+}
+
+function count_open_files {
+ local brick_pid="$1"
+ local pattern="$2"
+ ls -l /proc/$brick_pid/fd | grep -i "$pattern" | wc -l
+}
+
+TEST $CLI volume set all cluster.brick-multiplex on
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{2,3}
+#Have same configuration on both bricks so that they are multiplexed
+#Delay flush fop for a second
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume heal $V1 disable
+TEST $CLI volume set $V0 delay-gen posix
+TEST $CLI volume set $V0 delay-gen.enable flush
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 delay-gen.delay-duration 1000000
+TEST $CLI volume set $V1 delay-gen posix
+TEST $CLI volume set $V1 delay-gen.enable flush
+TEST $CLI volume set $V1 delay-gen.delay-percentage 100
+TEST $CLI volume set $V1 delay-gen.delay-duration 1000000
+
+TEST $CLI volume start $V0
+TEST $CLI volume start $V1
+
+TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0
+TEST $GFS -s $H0 --volfile-id=$V1 --direct-io-mode=enable $M1
+
+TEST touch $M0/a
+keep_fd_open &
+TEST $CLI volume profile $V1 start
+brick_pid=$(get_brick_pid $V1 $H0 $B0/${V1}2)
+TEST count_open_files $brick_pid "$B0/${V1}2/a"
+TEST count_open_files $brick_pid "$B0/${V1}2/b"
+TEST count_open_files $brick_pid "$B0/${V1}3/a"
+TEST count_open_files $brick_pid "$B0/${V1}3/b"
+
+#If any other flush fops are introduced into the system other than the one at
+#cleanup it interferes with the race, so test for it
+EXPECT "^0$" echo "$($CLI volume profile $V1 info incremental | grep -i flush | wc -l)"
+#Stop the volume
+TEST $CLI volume stop $V1
+
+#Wait for cleanup resources or volume V1
+EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}2/a"
+EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}2/b"
+EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}3/a"
+EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "^0$" count_open_files $brick_pid "$B0/${V1}3/b"
+
+TEST rm -f $M0/a #Exit keep_fd_open()
+wait
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+cleanup
diff --git a/tests/bugs/core/bug-1110917.t b/tests/bugs/core/bug-1110917.t
new file mode 100644
index 00000000000..c4b04fbf2c7
--- /dev/null
+++ b/tests/bugs/core/bug-1110917.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+TEST $CLI volume set $V0 changelog on
+TEST $CLI volume set $V0 changelog.fsync-interval 1
+
+# perform I/O on the background
+f=$(basename `mktemp -t ${0##*/}.XXXXXX`)
+dd if=/dev/urandom of=$M0/$f count=100000 bs=4k &
+
+# this is the best we can do without inducing _error points_ in the code
+# without the patch reconfigre() would hang...
+TEST $CLI volume set $V0 changelog.rollover-time `expr $((RANDOM % 9)) + 1`
+TEST $CLI volume set $V0 changelog.rollover-time `expr $((RANDOM % 9)) + 1`
+
+TEST $CLI volume set $V0 changelog off
+TEST $CLI volume set $V0 changelog on
+TEST $CLI volume set $V0 changelog off
+TEST $CLI volume set $V0 changelog on
+
+TEST $CLI volume set $V0 changelog.rollover-time `expr $((RANDOM % 9)) + 1`
+TEST $CLI volume set $V0 changelog.rollover-time `expr $((RANDOM % 9)) + 1`
+
+# if there's a deadlock, this would hang
+wait;
+
+cleanup;
diff --git a/tests/bugs/core/bug-1111557.t b/tests/bugs/core/bug-1111557.t
new file mode 100644
index 00000000000..4ed45761bce
--- /dev/null
+++ b/tests/bugs/core/bug-1111557.t
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0}
+TEST $CLI volume set $V0 diagnostics.brick-log-buf-size 0
+TEST ! $CLI volume set $V0 diagnostics.brick-log-buf-size -0
+cleanup
diff --git a/tests/bugs/core/bug-1117951.t b/tests/bugs/core/bug-1117951.t
new file mode 100644
index 00000000000..b484fee2fe4
--- /dev/null
+++ b/tests/bugs/core/bug-1117951.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume start $V0
+
+# Running with a locale not using '.' as decimal separator should work
+export LC_NUMERIC=sv_SE.utf8
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# As should a locale using '.' as a decimal separator
+export LC_NUMERIC=C
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup
diff --git a/tests/bugs/core/bug-1119582.t b/tests/bugs/core/bug-1119582.t
new file mode 100644
index 00000000000..c30057c2b2c
--- /dev/null
+++ b/tests/bugs/core/bug-1119582.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../nfs.rc
+
+cleanup;
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+
+TEST $CLI volume set $V0 features.uss disable;
+
+TEST killall glusterd;
+
+rm -f $GLUSTERD_WORKDIR/vols/$V0/snapd.info
+
+TEST glusterd
+
+cleanup ;
diff --git a/tests/bugs/core/bug-1135514-allow-setxattr-with-null-value.t b/tests/bugs/core/bug-1135514-allow-setxattr-with-null-value.t
new file mode 100644
index 00000000000..d26aa561321
--- /dev/null
+++ b/tests/bugs/core/bug-1135514-allow-setxattr-with-null-value.t
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#Test
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST touch $M0/file
+TEST setfattr -n user.attribute1 $M0/file
+TEST getfattr -n user.attribute1 $M0/file
+cleanup
+
diff --git a/tests/bugs/core/bug-1168803-snapd-option-validation-fix.t b/tests/bugs/core/bug-1168803-snapd-option-validation-fix.t
new file mode 100755
index 00000000000..89b015d6374
--- /dev/null
+++ b/tests/bugs/core/bug-1168803-snapd-option-validation-fix.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+## Test case for BZ-1168803 - snapd option validation should not fail if the
+#snapd is not running
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 features.uss enable
+
+## Now set another volume option, this should not fail
+TEST $CLI volume set $V0 performance.io-cache off
+
+## start the volume
+TEST $CLI volume start $V0
+
+## Kill snapd daemon and then try to stop the volume which should not fail
+kill $(ps aux | grep glusterfsd | grep snapd | awk '{print $2}')
+
+TEST $CLI volume stop $V0
+
+cleanup;
diff --git a/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t
new file mode 100755
index 00000000000..a1b9a851bf7
--- /dev/null
+++ b/tests/bugs/core/bug-1402841.t-mt-dir-scan-race.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+FILE_COUNT=500
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.shd-wait-qlength 100
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+for i in `seq 1 $FILE_COUNT`; do touch $M0/file$i; done
+TEST kill_brick $V0 $H0 $B0/${V0}1
+for i in `seq 1 $FILE_COUNT`; do echo hello>$M0/file$i; chmod -x $M0/file$i; done
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+EXPECT "$FILE_COUNT" get_pending_heal_count $V0
+TEST $CLI volume set $V0 self-heal-daemon on
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+TEST $CLI volume set $V0 self-heal-daemon off
+EXPECT_NOT "^0$" get_pending_heal_count $V0
+TEST $CLI volume set $V0 self-heal-daemon on
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+TEST umount $M0
+cleanup;
diff --git a/tests/bugs/core/bug-1421721-mpx-toggle.t b/tests/bugs/core/bug-1421721-mpx-toggle.t
new file mode 100644
index 00000000000..231be5b81a0
--- /dev/null
+++ b/tests/bugs/core/bug-1421721-mpx-toggle.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+write_a_file () {
+ echo $1 > $2
+}
+
+TEST glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}[0,1]
+
+TEST $CLI volume set all cluster.brick-multiplex on
+TEST $CLI volume start $V0
+
+TEST $GFS -s $H0 --volfile-id=$V0 $M0
+TEST write_a_file "hello" $M0/a_file
+
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+
+TEST $CLI volume set all cluster.brick-multiplex off
+TEST $CLI volume start $V0
+
+cleanup
diff --git a/tests/bugs/core/bug-1432542-mpx-restart-crash.t b/tests/bugs/core/bug-1432542-mpx-restart-crash.t
new file mode 100644
index 00000000000..2793d7008e1
--- /dev/null
+++ b/tests/bugs/core/bug-1432542-mpx-restart-crash.t
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=800
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+
+cleanup;
+
+NUM_VOLS=15
+MOUNT_BASE=$(dirname $M0)
+
+# GlusterD reports that bricks are started when in fact their attach requests
+# might still need to be retried. That's a bit of a hack, but there's no
+# feasible way to wait at that point (in attach_brick) and the rest of the
+# code is unprepared to deal with transient errors so the whole "brick start"
+# would fail. Meanwhile, glusterfsd can only handle attach requests at a
+# rather slow rate. After GlusterD tries to start a couple of hundred bricks,
+# glusterfsd can fall behind and we start getting mount failures. Arguably,
+# those are spurious because we will eventually catch up. We're just not
+# ready *yet*. More to the point, even if the errors aren't spurious that's
+# not what we're testing right now. Therefore, we give glusterfsd a bit more
+# breathing room for this test than we would otherwise.
+MOUNT_TIMEOUT=15
+
+get_brick_base () {
+ printf "%s/vol%02d" $B0 $1
+}
+
+get_mount_point () {
+ printf "%s/vol%02d" $MOUNT_BASE $1
+}
+
+function count_up_bricks {
+ vol=$1;
+ $CLI --xml volume status $vol | grep '<status>1' | wc -l
+}
+
+create_volume () {
+
+ local vol_name=$(printf "%s-vol%02d" $V0 $1)
+
+ local brick_base=$(get_brick_base $1)
+ local cmd="$CLI volume create $vol_name replica 3"
+ local b
+ for b in $(seq 0 5); do
+ local this_brick=${brick_base}/brick$b
+ mkdir -p $this_brick
+ cmd="$cmd $H0:$this_brick"
+ done
+ TEST $cmd
+ TEST $CLI volume start $vol_name
+ # check for 6 bricks and 1 shd daemon to be up and running
+ EXPECT_WITHIN 120 7 count_up_bricks $vol_name
+ local mount_point=$(get_mount_point $1)
+ mkdir -p $mount_point
+ TEST $GFS -s $H0 --volfile-id=$vol_name $mount_point
+}
+
+cleanup_func () {
+ local v
+ for v in $(seq 1 $NUM_VOLS); do
+ local mount_point=$(get_mount_point $v)
+ force_umount $mount_point
+ rm -rf $mount_point
+ local vol_name=$(printf "%s-vol%02d" $V0 $v)
+ $CLI volume stop $vol_name
+ $CLI volume delete $vol_name
+ rm -rf $(get_brick_base $1) &
+ done &> /dev/null
+ wait
+}
+push_trapfunc cleanup_func
+
+TEST glusterd
+TEST $CLI volume set all cluster.brick-multiplex on
+
+# Our infrastructure can't handle an arithmetic expression here. The formula
+# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other
+# NUM_VOLS-1 and there are 5 such statements in each iteration.
+TESTS_EXPECTED_IN_LOOP=84
+for i in $(seq 1 $NUM_VOLS); do
+ starttime="$(date +%s)";
+
+ create_volume $i
+ TEST dd if=/dev/zero of=$(get_mount_point $i)/a_file bs=4k count=1
+ # Unmounting to reduce memory footprint on regression hosts
+ mnt_point=$(get_mount_point $i)
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $mnt_point
+ endtime=$(expr $(date +%s) - $starttime)
+
+ echo "Memory Used after $i volumes : $(pmap -x $(pgrep glusterfsd) | grep total)"
+ echo "Thread Count after $i volumes: $(ps -T -p $(pgrep glusterfsd) | wc -l)"
+ echo "Time taken : ${endtime} seconds"
+done
+
+echo "=========="
+echo "List of all the threads in the Brick process"
+ps -T -p $(pgrep glusterfsd)
+echo "=========="
+
+# Kill glusterd, and wait a bit for all traces to disappear.
+TEST killall -9 glusterd
+sleep 5
+TEST killall -9 glusterfsd
+sleep 5
+
+# Restart glusterd. This is where the brick daemon supposedly dumps core,
+# though I (jdarcy) have yet to see that. Again, give it a while to settle,
+# just to be sure.
+TEST glusterd
+
+cleanup_func
+trap - EXIT
+cleanup
diff --git a/tests/bugs/core/bug-1650403.t b/tests/bugs/core/bug-1650403.t
new file mode 100644
index 00000000000..43d09bc8bd9
--- /dev/null
+++ b/tests/bugs/core/bug-1650403.t
@@ -0,0 +1,113 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=500
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+
+cleanup;
+
+NUM_VOLS=5
+MOUNT_BASE=$(dirname $M0)
+
+# GlusterD reports that bricks are started when in fact their attach requests
+# might still need to be retried. That's a bit of a hack, but there's no
+# feasible way to wait at that point (in attach_brick) and the rest of the
+# code is unprepared to deal with transient errors so the whole "brick start"
+# would fail. Meanwhile, glusterfsd can only handle attach requests at a
+# rather slow rate. After GlusterD tries to start a couple of hundred bricks,
+# glusterfsd can fall behind and we start getting mount failures. Arguably,
+# those are spurious because we will eventually catch up. We're just not
+# ready *yet*. More to the point, even if the errors aren't spurious that's
+# not what we're testing right now. Therefore, we give glusterfsd a bit more
+# breathing room for this test than we would otherwise.
+MOUNT_TIMEOUT=15
+
+get_brick_base () {
+ printf "%s/vol%02d" $B0 $1
+}
+
+get_mount_point () {
+ printf "%s/vol%02d" $MOUNT_BASE $1
+}
+
+function count_up_bricks {
+ vol=$1;
+ $CLI --xml volume status $vol | grep '<status>1' | wc -l
+}
+
+create_volume () {
+
+ local vol_name=$(printf "%s-vol%02d" $V0 $1)
+
+ local brick_base=$(get_brick_base $1)
+ local cmd="$CLI volume create $vol_name replica 3"
+ local b
+ for b in $(seq 0 5); do
+ local this_brick=${brick_base}/brick$b
+ mkdir -p $this_brick
+ cmd="$cmd $H0:$this_brick"
+ done
+ TEST $cmd
+ TEST $CLI volume start $vol_name
+ # check for 6 bricks and 1 shd daemon to be up and running
+ EXPECT_WITHIN 120 7 count_up_bricks $vol_name
+ local mount_point=$(get_mount_point $1)
+ mkdir -p $mount_point
+ TEST $GFS -s $H0 --volfile-id=$vol_name $mount_point
+}
+
+cleanup_func () {
+ local v
+ for v in $(seq 1 $NUM_VOLS); do
+ local mount_point=$(get_mount_point $v)
+ force_umount $mount_point
+ rm -rf $mount_point
+ local vol_name=$(printf "%s-vol%02d" $V0 $v)
+ $CLI volume stop $vol_name
+ $CLI volume delete $vol_name
+ rm -rf $(get_brick_base $1) &
+ done &> /dev/null
+ wait
+}
+push_trapfunc cleanup_func
+
+TEST glusterd
+TEST $CLI volume set all cluster.brick-multiplex on
+
+# Our infrastructure can't handle an arithmetic expression here. The formula
+# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other
+# NUM_VOLS-1 and there are 5 such statements in each iteration.
+TESTS_EXPECTED_IN_LOOP=24
+for i in $(seq 1 $NUM_VOLS); do
+ create_volume $i
+ TEST dd if=/dev/zero of=$(get_mount_point $i)/a_file bs=4k count=1
+ # Unmounting to reduce memory footprint on regression hosts
+ mnt_point=$(get_mount_point $i)
+ EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $mnt_point
+done
+
+glustershd_pid=`ps auxwww | grep glustershd | grep -v grep | awk -F " " '{print $2}'`
+TEST [ $glustershd_pid != 0 ]
+start=`pmap -x $glustershd_pid | grep total | awk -F " " '{print $4}'`
+echo "Memory consumption for glustershd process"
+for i in $(seq 1 50); do
+ pmap -x $glustershd_pid | grep total
+ for j in $(seq 1 $NUM_VOLS); do
+ vol_name=$(printf "%s-vol%02d" $V0 $j)
+ gluster v set $vol_name cluster.self-heal-daemon off > /dev/null
+ gluster v set $vol_name cluster.self-heal-daemon on > /dev/null
+ done
+done
+
+end=`pmap -x $glustershd_pid | grep total | awk -F " " '{print $4}'`
+diff=$((end-start))
+
+# If memory consumption is more than 10M it means some leak in reconfigure
+# code path
+
+TEST [ $diff -lt 10000 ]
+
+trap - EXIT
+cleanup
diff --git a/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t b/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t
new file mode 100644
index 00000000000..1acbaa8dc0b
--- /dev/null
+++ b/tests/bugs/core/bug-1699025-brick-mux-detach-brick-fd-issue.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+cleanup
+
+#bug-1444596 - validating brick mux
+
+TEST glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
+TEST $CLI volume create $V1 $H0:$B0/brick{2,3}
+
+TEST $CLI volume set all cluster.brick-multiplex on
+
+TEST $CLI volume start $V0
+TEST $CLI volume start $V1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count
+EXPECT 1 count_brick_processes
+
+TEST $CLI volume stop $V1
+# At the time initialize brick daemon it always keeps open
+# standard fd's (0, 1 , 2) so after stop 1 volume fd's should
+# be open
+nofds=$(ls -lrth /proc/`pgrep glusterfsd`/fd | grep dev/null | wc -l)
+TEST [ $((nofds)) -eq 3 ]
+
+cleanup
diff --git a/tests/bugs/core/bug-834465.c b/tests/bugs/core/bug-834465.c
new file mode 100644
index 00000000000..33dd270b112
--- /dev/null
+++ b/tests/bugs/core/bug-834465.c
@@ -0,0 +1,60 @@
+#include <sys/file.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+int
+main(int argc, char *argv[])
+{
+ int fd = -1;
+ char *filename = NULL;
+ struct flock lock = {
+ 0,
+ };
+ int i = 0;
+ int ret = -1;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s <filename> ", argv[0]);
+ goto out;
+ }
+
+ filename = argv[1];
+
+ fd = open(filename, O_RDWR | O_CREAT, 0);
+ if (fd < 0) {
+ fprintf(stderr, "open (%s) failed (%s)\n", filename, strerror(errno));
+ goto out;
+ }
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 1;
+ lock.l_len = 1;
+
+ while (i < 100) {
+ lock.l_type = F_WRLCK;
+ ret = fcntl(fd, F_SETLK, &lock);
+ if (ret < 0) {
+ fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ lock.l_type = F_UNLCK;
+ ret = fcntl(fd, F_SETLK, &lock);
+ if (ret < 0) {
+ fprintf(stderr, "fcntl setlk failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ i++;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
diff --git a/tests/bugs/core/bug-834465.t b/tests/bugs/core/bug-834465.t
new file mode 100755
index 00000000000..996248d4116
--- /dev/null
+++ b/tests/bugs/core/bug-834465.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+MOUNTDIR=$M0;
+
+#memory-accounting is enabled by default
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $MOUNTDIR;
+
+sdump1=$(generate_mount_statedump $V0);
+nalloc1=0
+grep -A3 "fuse - usage-type gf_common_mt_fd_lk_ctx_node_t" $sdump1
+if [ $? -eq '0' ]
+then
+ nalloc1=`grep -A3 "fuse - usage-type gf_common_mt_fd_lk_ctx_node_t" $sdump1 | grep -E "^num_allocs" | cut -d '=' -f2`
+fi
+
+build_tester $(dirname $0)/bug-834465.c
+
+TEST $(dirname $0)/bug-834465 $M0/testfile
+
+sdump2=$(generate_mount_statedump $V0);
+nalloc2=0
+grep -A3 "fuse - usage-type gf_common_mt_fd_lk_ctx_node_t" $sdump2
+if [ $? -eq '0' ]
+then
+ nalloc2=`grep -A3 "fuse - usage-type gf_common_mt_fd_lk_ctx_node_t" $sdump2 | grep -E "^num_allocs" | cut -d '=' -f2`
+fi
+
+TEST [ $nalloc1 -eq $nalloc2 ];
+
+TEST rm -rf $MOUNTDIR/*
+TEST rm -rf $(dirname $0)/bug-834465
+cleanup_mount_statedump $V0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR
+
+cleanup;
diff --git a/tests/bugs/core/bug-845213.t b/tests/bugs/core/bug-845213.t
new file mode 100644
index 00000000000..136e4126d14
--- /dev/null
+++ b/tests/bugs/core/bug-845213.t
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Create and start a volume with aio enabled
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 remote-dio enable;
+TEST $CLI volume set $V0 network.remote-dio disable;
+
+cleanup;
+
diff --git a/tests/bugs/core/bug-903336.t b/tests/bugs/core/bug-903336.t
new file mode 100644
index 00000000000..b52c1a4758e
--- /dev/null
+++ b/tests/bugs/core/bug-903336.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST setfattr -n trusted.io-stats-dump -v /tmp $M0
+cleanup
diff --git a/tests/bugs/core/bug-908146.t b/tests/bugs/core/bug-908146.t
new file mode 100755
index 00000000000..327be6e54bc
--- /dev/null
+++ b/tests/bugs/core/bug-908146.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.flush-behind off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --direct-io-mode=enable
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1 --attribute-timeout=0 --entry-timeout=0 --direct-io-mode=enable
+
+TEST touch $M0/a
+
+exec 4>"$M0/a"
+exec 5>"$M1/a"
+EXPECT "2" get_fd_count $V0 $H0 $B0/${V0}0 a
+
+exec 4>&-
+EXPECT "1" get_fd_count $V0 $H0 $B0/${V0}0 a
+
+exec 5>&-
+EXPECT "0" get_fd_count $V0 $H0 $B0/${V0}0 a
+
+cleanup
diff --git a/tests/bugs/core/bug-913544.t b/tests/bugs/core/bug-913544.t
new file mode 100644
index 00000000000..af421722590
--- /dev/null
+++ b/tests/bugs/core/bug-913544.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#simulate a split-brain of a file and do truncate. This should not crash the mount point
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+cd $M0
+TEST touch a
+#simulate no-changelog data split-brain
+echo "abc" > $B0/${V0}1/a
+echo "abcd" > $B0/${V0}0/a
+TEST truncate -s 0 a
+TEST ls
+cd
+
+cleanup
diff --git a/tests/bugs/core/bug-924075.t b/tests/bugs/core/bug-924075.t
new file mode 100755
index 00000000000..61ce0f18286
--- /dev/null
+++ b/tests/bugs/core/bug-924075.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#FIXME: there is another patch which moves the following function into
+#include.rc
+function process_leak_count ()
+{
+ local pid=$1;
+ return $(ls -lh /proc/$pid/fd | grep "(deleted)" | wc -l)
+}
+
+TEST glusterd;
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+TEST $CLI volume start $V0;
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+mount_pid=$(get_mount_process_pid $V0);
+TEST process_leak_count $mount_pid;
+
+cleanup;
diff --git a/tests/bugs/core/bug-927616.t b/tests/bugs/core/bug-927616.t
new file mode 100755
index 00000000000..18257131ac7
--- /dev/null
+++ b/tests/bugs/core/bug-927616.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 performance.open-behind off;
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+TEST mkdir $M0/dir;
+
+mkdir $M0/other;
+cp /etc/passwd $M0/;
+cp $M0/passwd $M0/file;
+chmod 600 $M0/file;
+
+chown -R nfsnobody:nfsnobody $M0/dir;
+
+TEST $CLI volume set $V0 server.root-squash on;
+
+sleep 1;
+
+# tests should fail.
+touch $M0/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+touch $N0/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $M0/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $N0/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+
+TEST $CLI volume set $V0 server.root-squash off;
+
+sleep 1;
+
+# tests should pass.
+touch $M0/foo 2>/dev/null;
+TEST [ $? -eq 0 ]
+touch $N0/bar 2>/dev/null;
+TEST [ $? -eq 0 ]
+mkdir $M0/new 2>/dev/null;
+TEST [ $? -eq 0 ]
+mkdir $N0/old 2>/dev/null;
+TEST [ $? -eq 0 ]
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/core/bug-949242.t b/tests/bugs/core/bug-949242.t
new file mode 100644
index 00000000000..5e916cbdbe6
--- /dev/null
+++ b/tests/bugs/core/bug-949242.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+#
+# Bug 949242 - Test basic fallocate functionality.
+#
+# Run several commands to verify basic fallocate functionality. We verify that
+# fallocate creates and allocates blocks to a file. We also verify that the keep
+# size option does not modify the file size.
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../fallocate.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+# check for fallocate support before continuing the test
+require_fallocate -l 1m -n $M0/file && rm -f $M0/file
+
+# fallocate a file and verify blocks are allocated
+TEST fallocate -l 1m $M0/file
+blksz=`stat -c %b $M0/file`
+nblks=`stat -c %B $M0/file`
+TEST [ $(($blksz * $nblks)) -eq 1048576 ]
+
+TEST unlink $M0/file
+
+# truncate a file to a fixed size, fallocate and verify that the size does not
+# change
+TEST truncate -s 1M $M0/file
+TEST fallocate -l 2m -n $M0/file
+blksz=`stat -c %b $M0/file`
+nblks=`stat -c %B $M0/file`
+sz=`stat -c %s $M0/file`
+TEST [ $sz -eq 1048576 ]
+# Note that gluster currently incorporates a hack to limit the number of blocks
+# reported as allocated to the file by the file size. We have allocated beyond the
+# file size here. Just check for non-zero allocation to avoid setting a land mine
+# for if/when that behavior might change.
+TEST [ ! $(($blksz * $nblks)) -eq 0 ]
+
+TEST unlink $M0/file
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/core/bug-986429.t b/tests/bugs/core/bug-986429.t
new file mode 100644
index 00000000000..e512301775f
--- /dev/null
+++ b/tests/bugs/core/bug-986429.t
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+## This tests failover achieved by providing multiple
+## servers from the trusted pool for fetching volume
+## specification
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s non-existent -s $H0 --volfile-id=/$V0 $M0
+
+cleanup;
diff --git a/tests/bugs/core/io-stats-1322825.t b/tests/bugs/core/io-stats-1322825.t
new file mode 100755
index 00000000000..53f2d040daa
--- /dev/null
+++ b/tests/bugs/core/io-stats-1322825.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+# Test details:
+# This is to test that the io-stat-dump xattr is not set on the brick,
+# against the path that is used to trigger the stats dump.
+# Additionally it also tests if as many io-stat dumps are generated as there
+# are io-stat xlators in the graphs, which is 2 by default
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+# Covering replication and distribution in the test
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..4}
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+
+# Generate some activity for the stats to produce something useful
+TEST $CLI volume profile $V0 start
+TEST mkdir $M0/dir1
+
+# Generate the stat dump across the io-stat instances
+TEST setfattr -n trusted.io-stats-dump -v io-stats-1322825 $M0
+
+# Check if $M0 is clean w.r.t xattr information
+# TODO: if there are better ways to check we really get no attr error, please
+# correct the following.
+getfattr -n trusted.io-stats-dump $B0/${V0}1 2>&1 | grep -qi "no such attribute"
+ret=$(echo $?)
+EXPECT 0 echo $ret
+getfattr -n trusted.io-stats-dump $B0/${V0}2 2>&1 | grep -qi "no such attribute"
+ret=$(echo $?)
+EXPECT 0 echo $ret
+getfattr -n trusted.io-stats-dump $B0/${V0}3 2>&1 | grep -qi "no such attribute"
+ret=$(echo $?)
+EXPECT 0 echo $ret
+getfattr -n trusted.io-stats-dump $B0/${V0}4 2>&1 | grep -qi "no such attribute"
+ret=$(echo $?)
+EXPECT 0 echo $ret
+
+# Check if we have 5 io-stat files in /tmp
+EXPECT 5 ls -1 /var/run/gluster/io-stats-1322825*
+# Cleanup the 5 generated files
+rm -f /var/run/gluster/io-stats-1322825*
+
+# Rinse and repeat above for a directory
+TEST setfattr -n trusted.io-stats-dump -v io-stats-1322825 $M0/dir1
+getfattr -n trusted.io-stats-dump $B0/${V0}1/dir1 2>&1 | grep -qi "no such attribute"
+ret=$(echo $?)
+EXPECT 0 echo $ret
+getfattr -n trusted.io-stats-dump $B0/${V0}2/dir1 2>&1 | grep -qi "no such attribute"
+ret=$(echo $?)
+EXPECT 0 echo $ret
+getfattr -n trusted.io-stats-dump $B0/${V0}3/dir1 2>&1 | grep -qi "no such attribute"
+ret=$(echo $?)
+EXPECT 0 echo $ret
+getfattr -n trusted.io-stats-dump $B0/${V0}4/dir1 2>&1 | grep -qi "no such attribute"
+ret=$(echo $?)
+EXPECT 0 echo $ret
+
+EXPECT 5 ls -1 /var/run/gluster/io-stats-1322825*
+rm -f /var/run/gluster/io-stats-1322825*
+
+cleanup;
diff --git a/tests/bugs/core/log-bug-1362520.t b/tests/bugs/core/log-bug-1362520.t
new file mode 100755
index 00000000000..cde854c3349
--- /dev/null
+++ b/tests/bugs/core/log-bug-1362520.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+#. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1};
+TEST $CLI volume start $V0
+
+# Mount FUSE without selinux:
+TEST glusterfs -s $H0 --volfile-id $V0 $@ $M0
+
+#Get the client log file
+log_wd=$(gluster --print-logdir)
+log_id=${M0:1} # Remove initial slash
+log_id=${log_id//\//-} # Replace remaining slashes with dashes
+log_file=$log_wd/$log_id.log
+
+#Set the client xlator log-level to TRACE and check if the TRACE logs get
+#printed
+TEST setfattr -n trusted.glusterfs.$V0-client-0.set-log-level -v TRACE $M0
+TEST ! stat $M0/xyz
+grep -q " T \[rpc-clnt\.c" $log_file
+res=$?
+EXPECT '0' echo $res
+
+#Set the client xlator log-level to INFO and make sure the TRACE logs do
+#not get printed
+echo > $log_file
+TEST setfattr -n trusted.glusterfs.$V0-client-0.set-log-level -v INFO $M0
+TEST ! stat $M0/xyz
+grep -q " T \[rpc-clnt\.c" $log_file
+res=$?
+EXPECT_NOT '0' echo $res
+
+cleanup;
diff --git a/tests/bugs/ctime/issue-832.t b/tests/bugs/ctime/issue-832.t
new file mode 100755
index 00000000000..740f731ab73
--- /dev/null
+++ b/tests/bugs/ctime/issue-832.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+
+#Trigger trusted.glusterfs.mdata setting codepath and see things work as expected
+cleanup
+
+TEST_USER=test-ctime-user
+TEST_UID=27341
+
+TEST useradd -o -M -u ${TEST_UID} ${TEST_USER}
+push_trapfunc "userdel --force ${TEST_USER}"
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+
+$GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+echo abc > $M0/test
+TEST chmod 755 $M0/
+TEST chmod 744 $M0/test
+TEST setfattr -x trusted.glusterfs.mdata $B0/$V0/test
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+$GFS --volfile-id=/$V0 --volfile-server=$H0 $M0;
+su ${TEST_USER} -c "cat $M0/test"
+TEST getfattr -n trusted.glusterfs.mdata $B0/$V0/test
+
+cleanup
diff --git a/tests/bugs/distribute/bug-1042725.t b/tests/bugs/distribute/bug-1042725.t
new file mode 100644
index 00000000000..5497eb8bc00
--- /dev/null
+++ b/tests/bugs/distribute/bug-1042725.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+#Create files
+TEST mkdir $M0/foo
+TEST touch $M0/foo/{1..20}
+for file in {1..20}; do
+ ln $M0/foo/$file $M0/foo/${file}_linkfile;
+done
+
+#Stop one of the brick
+TEST kill_brick ${V0} ${H0} ${B0}/${V0}1
+
+rm -rf $M0/foo 2>/dev/null
+TEST stat $M0/foo
+
+touch $M0/foo/{1..20} 2>/dev/null
+touch $M0/foo/{1..20}_linkfile 2>/dev/null
+
+TEST $CLI volume start $V0 force;
+sleep 5
+function verify_duplicate {
+ count=`ls $M0/foo | sort | uniq --repeated | grep [0-9] -c`
+ echo $count
+}
+EXPECT 0 verify_duplicate
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-1066798.t b/tests/bugs/distribute/bug-1066798.t
new file mode 100755
index 00000000000..03de970a637
--- /dev/null
+++ b/tests/bugs/distribute/bug-1066798.t
@@ -0,0 +1,88 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=200
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+############################################################
+#TEST_PLAN#
+#Create a file
+#Store the hashed brick information
+#Create hard links to it
+#Remove the hashed brick
+#Check now all the hardlinks are migrated in to "OTHERBRICK"
+#Check also in mount point for all the files
+#check there is no failures and skips for migration
+############################################################
+
+TEST touch $M0/file1;
+
+file_perm=`ls -l $M0/file1 | grep file1 | awk '{print $1}'`;
+
+if [ -f $B0/${V0}1/file1 ]
+then
+ HASHED=$B0/${V0}1
+ OTHER=$B0/${V0}2
+else
+ HASHED=$B0/${V0}2
+ OTHER=$B0/${V0}1
+fi
+
+#create hundred hard links
+for i in {1..50};
+do
+TEST_IN_LOOP ln $M0/file1 $M0/link$i;
+done
+
+
+TEST $CLI volume remove-brick $V0 $H0:${HASHED} start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:${HASHED}";
+
+#check consistency in mount point
+#And also check all the links are migrated to OTHER
+for i in {1..50}
+do
+TEST_IN_LOOP [ -f ${OTHER}/link${i} ];
+TEST_IN_LOOP [ -f ${M0}/link${i} ];
+done;
+
+#check in OTHER that all the files has proper permission (Means no
+#linkto files)
+
+for i in {1..50}
+do
+link_perm=`ls -l $OTHER | grep -w link${i} | awk '{print $1}'`;
+TEST_IN_LOOP [ "${file_perm}" == "${link_perm}" ]
+
+done
+
+#check that remove-brick status should not have any failed or skipped files
+
+var=`$CLI volume remove-brick $V0 $H0:${HASHED} status | grep completed`
+
+TEST [ `echo $var | awk '{print $5}'` = "0" ]
+TEST [ `echo $var | awk '{print $6}'` = "0" ]
+
+cleanup
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/bugs/distribute/bug-1086228.t b/tests/bugs/distribute/bug-1086228.t
new file mode 100755
index 00000000000..e14ea572b61
--- /dev/null
+++ b/tests/bugs/distribute/bug-1086228.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../dht.rc
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+TEST $CLI volume start $V0;
+TEST glusterfs --direct-io-mode=yes --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+echo "D" > $M0/file1;
+TEST chmod +st $M0/file1;
+
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}"3"
+TEST $CLI volume rebalance $V0 start force
+
+EXPECT_WITHIN "10" "0" rebalance_completed
+count=0
+for i in `ls $B0/$V0"3"`;
+ do
+ var=`stat -c %A $B0/$V0"3"/$i | cut -c 4`;
+ echo $B0/$V0"3"/$i $var
+ if [ "$var" != "S" ]; then
+ count=$((count + 1))
+ fi
+ done
+
+TEST [[ $count == 0 ]]
+cleanup
diff --git a/tests/bugs/distribute/bug-1088231.t b/tests/bugs/distribute/bug-1088231.t
new file mode 100755
index 00000000000..8d4d1db73a5
--- /dev/null
+++ b/tests/bugs/distribute/bug-1088231.t
@@ -0,0 +1,173 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../dht.rc
+
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.randomize-hash-range-by-gfid on
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --aux-gfid-mount --volfile-server=$H0 $M0
+TEST mkdir $M0/a
+
+
+## Bug Description: In case of dht_discover code path, which is triggered
+## when lookup done is nameless lookup, at the end of the lookup, even if
+## it finds that self-heal is needed to fix-the layout it wont heal because
+## healing code path is not added under nameless lookup.
+
+## What to test: With Patch, Even in case of nameless lookup, if layout
+## needs to be fixed, the it will be fixed wherever lookup is successful
+## and it will not create any directory for subvols having ENOENT as it is
+## nameless lookup.
+
+gfid_with_hyphen=`getfattr -n glusterfs.gfid.string $M0/a 2>/dev/null \
+ | grep glusterfs.gfid.string | cut -d '"' -f 2`
+
+TEST setfattr -x trusted.glusterfs.dht $B0/$V0"0"/a
+
+## new healing code don't attempt healing if inode is already
+## populated. So, unmount and remount before we do stat.
+TEST umount $M0
+TEST glusterfs --volfile-id=/$V0 --aux-gfid-mount --volfile-server=$H0 $M0
+
+TEST stat $M0/.gfid/$gfid_with_hyphen
+
+## Assuming that we have two bricks, we can have two permutations of layout
+## Case 1: Brick - A Brick - B
+## 0 - 50 51-100
+##
+## Case 2: Brick - A Brick - B
+## 51 - 100 0 - 50
+##
+## To ensure layout is assigned properly, the following tests should be
+## performed.
+##
+## Case 1: Layout_b0_s = 0; Layout_b0_e = 50, Layout_b1_s=51,
+## Layout_b1_e = 100;
+##
+## layout_b1_s = layout_b0_e + 1;
+## layout_b0_s = layout_b1_e + 1; but b0_s is 0, so change to 101
+## then compare
+## Case 2: Layout_b0_s = 51, Layout_b0_e = 100, Layout_b1_s=0,
+## Layout_b1_e = 51
+##
+## layout_b0_s = Layout_b1_e + 1;
+## layout_b1_s = Layout_b0_e + 1; but b1_s is 0, so chage to 101.
+
+
+##Extract Layout
+echo `get_layout $B0/$V0"0"/a`
+echo `get_layout $B0/$V0"1"/a`
+layout_b0_s=`get_layout $B0/$V0"0"/a | cut -c19-26`
+layout_b0_e=`get_layout $B0/$V0"0"/a | cut -c27-34`
+layout_b1_s=`get_layout $B0/$V0"1"/a | cut -c19-26`
+layout_b1_e=`get_layout $B0/$V0"1"/a | cut -c27-34`
+
+
+##Add 0X to perform Hex arithematic
+layout_b0_s="0x"$layout_b0_s
+layout_b0_e="0x"$layout_b0_e
+layout_b1_s="0x"$layout_b1_s
+layout_b1_e="0x"$layout_b1_e
+
+
+## Logic of converting starting layout "0" to "Max_value of layout + 1"
+comp1=$(($layout_b0_s + 0))
+if [ "$comp1" == "0" ];then
+ comp1=4294967296
+fi
+
+comp2=$(($layout_b1_s + 0))
+if [ "$comp2" == "0" ];then
+ comp2=4294967296
+fi
+
+diff1=$(($layout_b0_e + 1))
+diff2=$(($layout_b1_e + 1))
+
+
+healed=0
+
+if [ "$comp1" == "$diff1" ] && [ "$comp2" == "$diff2" ]; then
+ healed=$(($healed + 1))
+fi
+
+if [ "$comp1" == "$diff2" ] && [ "$comp2" == "$diff1" ]; then
+ healed=$(($healed + 1))
+fi
+
+TEST [ $healed == 1 ]
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.randomize-hash-range-by-gfid on
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --aux-gfid-mount --volfile-server=$H0 $M0
+TEST mkdir $M0/a
+
+gfid_with_hyphen=`getfattr -n glusterfs.gfid.string $M0/a 2>/dev/null \
+ | grep glusterfs.gfid.string | cut -d '"' -f 2`
+
+TEST setfattr -x trusted.glusterfs.dht $B0/$V0"0"/a
+TEST setfattr -x trusted.glusterfs.dht $B0/$V0"1"/a
+
+## new healing code don't attempt healing if inode is already
+## populated. So, unmount and remount before we do stat.
+TEST umount $M0
+TEST glusterfs --volfile-id=/$V0 --aux-gfid-mount --volfile-server=$H0 $M0
+
+TEST stat $M0/.gfid/$gfid_with_hyphen
+
+##Extract Layout
+
+layout_b0_s=`get_layout $B0/$V0"0"/a | cut -c19-26`
+layout_b0_e=`get_layout $B0/$V0"0"/a | cut -c27-34`
+layout_b1_s=`get_layout $B0/$V0"1"/a | cut -c19-26`
+layout_b1_e=`get_layout $B0/$V0"1"/a | cut -c27-34`
+
+
+##Add 0X to perform Hex arithematic
+layout_b0_s="0x"$layout_b0_s
+layout_b0_e="0x"$layout_b0_e
+layout_b1_s="0x"$layout_b1_s
+layout_b1_e="0x"$layout_b1_e
+
+
+
+## Logic of converting starting layout "0" to "Max_value of layout + 1"
+comp1=$(($layout_b0_s + 0))
+if [ "$comp1" == "0" ];then
+ comp1=4294967296
+fi
+
+comp2=$(($layout_b1_s + 0))
+if [ "$comp2" == "0" ];then
+ comp2=4294967296
+fi
+
+diff1=$(($layout_b0_e + 1))
+diff2=$(($layout_b1_e + 1))
+
+
+healed=0
+
+if [ "$comp1" == "$diff1" ] && [ "$comp2" == "$diff2" ]; then
+ healed=$(($healed + 1))
+fi
+
+if [ "$comp1" == "$diff2" ] && [ "$comp2" == "$diff1" ]; then
+ healed=$(($healed + 1))
+fi
+
+TEST [ $healed == 1 ]
+cleanup
+
diff --git a/tests/bugs/distribute/bug-1099890.t b/tests/bugs/distribute/bug-1099890.t
new file mode 100644
index 00000000000..1a19ba880c0
--- /dev/null
+++ b/tests/bugs/distribute/bug-1099890.t
@@ -0,0 +1,130 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+## TO-DO: Fix the following once the dht du refresh interval issue is fixed:
+## 1. Do away with sleep(1).
+## 2. Do away with creation of empty files.
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../../basic/quota.c -o $QDD
+
+TEST glusterd;
+TEST pidof glusterd;
+
+# Create 2 loop devices, one per brick.
+TEST truncate -s 100M $B0/brick1
+TEST truncate -s 100M $B0/brick2
+
+TEST L1=`SETUP_LOOP $B0/brick1`
+TEST MKFS_LOOP $L1
+
+TEST L2=`SETUP_LOOP $B0/brick2`
+TEST MKFS_LOOP $L2
+
+TEST mkdir -p $B0/${V0}{1,2}
+
+TEST MOUNT_LOOP $L1 $B0/${V0}1
+TEST MOUNT_LOOP $L2 $B0/${V0}2
+
+# Create a plain distribute volume with 2 subvols.
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+TEST $CLI volume start $V0;
+EXPECT "Started" volinfo_field $V0 'Status';
+
+TEST $CLI volume quota $V0 enable;
+
+TEST $CLI volume set $V0 features.quota-deem-statfs on
+
+TEST $CLI volume quota $V0 limit-usage / 150MB;
+
+TEST $CLI volume set $V0 cluster.min-free-disk 50%
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+# Make sure quota-deem-statfs is working as expected
+EXPECT "150M" echo `df -h $M0 -P | tail -1 | awk {'print $2'}`
+
+# Create a new file 'foo' under the root of the volume, which hashes to subvol-0
+# of DHT, that consumes 40M
+TEST $QDD $M0/foo 256 160
+
+TEST stat $B0/${V0}1/foo
+TEST ! stat $B0/${V0}2/foo
+
+# Create a new file 'bar' under the root of the volume, which hashes to subvol-1
+# of DHT, that consumes 40M
+TEST $QDD $M0/bar 256 160
+
+TEST ! stat $B0/${V0}1/bar
+TEST stat $B0/${V0}2/bar
+
+# Touch a zero-byte file on the root of the volume to make sure the statfs data
+# on DHT is refreshed
+sleep 1;
+TEST touch $M0/empty1;
+
+# At this point, the available space on each subvol {60M,60M} is greater than
+# their min-free-disk {50M,50M}, but if this bug still exists, then
+# the total available space on the volume as perceived by DHT should be less
+# than min-free-disk, i.e.,
+#
+# consumed space returned per subvol by quota = (40M + 40M) = 80M
+#
+# Therefore, consumed space per subvol computed by DHT WITHOUT the fix would be:
+# (80M/150M)*100 = 53%
+#
+# Available space per subvol as perceived by DHT with the bug = 47%
+# which is less than min-free-disk
+
+# Now I create a file that hashes to subvol-1 (counting from 0) of DHT.
+# If this bug still exists,then DHT should be routing this creation to subvol-0.
+# If this bug is fixed, then DHT should be routing the creation to subvol-1 only
+# as it has more than min-free-disk space available.
+
+TEST $QDD $M0/file 1 1
+sleep 1;
+TEST ! stat $B0/${V0}1/file
+TEST stat $B0/${V0}2/file
+
+# Touch another zero-byte file on the root of the volume to refresh statfs
+# values stored by DHT.
+
+TEST touch $M0/empty2;
+
+# Now I create a new file that hashes to subvol-0, at the end of which, there
+# will be less than min-free-disk space available on it.
+TEST $QDD $M0/fil 256 80
+sleep 1;
+TEST stat $B0/${V0}1/fil
+TEST ! stat $B0/${V0}2/fil
+
+# Touch to refresh statfs info cached by DHT
+
+TEST touch $M0/empty3;
+
+# Now I create a file that hashes to subvol-0 but since it has less than
+# min-free-disk space available, its data will be cached on subvol-1.
+
+TEST $QDD $M0/zz 256 20
+
+TEST stat $B0/${V0}1/zz
+TEST stat $B0/${V0}2/zz
+
+EXPECT "$V0-client-1" dht_get_linkto_target "$B0/${V0}1/zz"
+
+EXPECT "1" is_dht_linkfile "$B0/${V0}1/zz"
+
+force_umount $M0
+TEST $CLI volume stop $V0
+UMOUNT_LOOP ${B0}/${V0}{1,2}
+rm -f ${B0}/brick{1,2}
+
+rm -f $QDD
+cleanup
diff --git a/tests/bugs/distribute/bug-1125824.t b/tests/bugs/distribute/bug-1125824.t
new file mode 100755
index 00000000000..7e401092273
--- /dev/null
+++ b/tests/bugs/distribute/bug-1125824.t
@@ -0,0 +1,103 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+create_files () {
+ for i in {1..10}; do
+ orig=$(printf %s/file%04d $1 $i)
+ echo "This is file $i" > $orig
+ done
+ for i in {1..10}; do
+ mkdir $(printf %s/dir%04d $1 $i)
+ done
+ sync
+}
+
+create_dirs () {
+ for i in {1..10}; do
+ mkdir $(printf %s/dir%04d $1 $i)
+ create_files $(printf %s/dir%04d $1 $i)
+ done
+ sync
+}
+
+stat_files () {
+ for i in {1..10}; do
+ orig=$(printf %s/file%04d $1 $i)
+ stat $orig
+ done
+ for i in {1..10}; do
+ stat $(printf %s/dir%04d $1 $i)
+ done
+ sync
+}
+
+stat_dirs () {
+ for i in {1..10}; do
+ stat $(printf %s/dir%04d $1 $i)
+ stat_files $(printf %s/dir%04d $1 $i)
+ done
+ sync
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '4' brick_count $V0
+
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0
+
+# Create and poulate the NFS inode tables
+TEST create_dirs $N0
+TEST stat_dirs $N0
+
+# add-bricks changing the state of the volume where some bricks
+# would have some directories and others would not
+TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}{5,6,7,8}
+
+# Post this dht_access was creating a mess for directories which is fixed
+# with this commit. The issues could range from getting ENOENT or
+# ESTALE or entries missing to directories not having complete
+# layouts.
+TEST cd $N0
+TEST ls -lR
+
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+# tests to check post rebalance if layouts and entires are fine and
+# accessible by NFS to clear the volume
+TEST ls -lR
+rm -rf ./*
+# There are additional bugs where NFS+DHT does not delete all entries
+# on an rm -rf, so we do an additional rm -rf to ensure all is done
+# and we are facing this transient issue, rather than a bad directory
+# layout that is cached in memory
+TEST rm -rf ./*
+
+# Get out of the mount, so that umount can work
+TEST cd /
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-1161156.t b/tests/bugs/distribute/bug-1161156.t
new file mode 100755
index 00000000000..2b9e15407ca
--- /dev/null
+++ b/tests/bugs/distribute/bug-1161156.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../../basic/quota.c -o $QDD
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4,5,6};
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+# Testing with NFS for no particular reason
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+TEST mount_nfs $H0:/$V0 $N0
+mydir="dir"
+TEST mkdir -p $N0/$mydir
+TEST mkdir -p $N0/newdir
+
+TEST $QDD $N0/$mydir/file 256 40
+
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 20MB
+TEST $CLI volume quota $V0 limit-usage /newdir 5MB
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 hard-timeout 0
+
+TEST $QDD $N0/$mydir/newfile_1 256 20
+# wait for write behind to complete.
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "15.0MB" quotausage "/"
+TEST ! $QDD $N0/$mydir/newfile_2 256 40
+
+# Test rename within a directory. It should pass even when the
+# corresponding directory quota is filled.
+TEST mv $N0/dir/file $N0/dir/newfile_3
+
+# rename should fail here with disk quota exceeded
+TEST ! mv $N0/dir/newfile_3 $N0/newdir/
+
+umount_nfs $N0
+TEST $CLI volume stop $V0
+
+rm -f $QDD
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-1161311.t b/tests/bugs/distribute/bug-1161311.t
new file mode 100755
index 00000000000..62796068928
--- /dev/null
+++ b/tests/bugs/distribute/bug-1161311.t
@@ -0,0 +1,164 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=350
+
+# This tests for hard link preservation for files that are linked, when the
+# file is undergoing migration
+
+# --- Improvements and other tests ---
+## Fail rebalance of the large file for which links are created during P1/2
+### phases of migration
+## Start with multiple hard links to the file and then create more during P1/2
+### phases of migration
+## Test the same with NFS as the mount rather than FUSE
+## Create links when file is under P2 of migration specifically
+## Test with quota, to error out during hard link creation (if possible)
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+TEST truncate -s 10GB $B0/brick1
+TEST truncate -s 10GB $B0/brick2
+TEST truncate -s 10GB $B0/brick3
+
+TEST LO1=`SETUP_LOOP $B0/brick1`
+TEST MKFS_LOOP $LO1
+
+TEST LO2=`SETUP_LOOP $B0/brick2`
+TEST MKFS_LOOP $LO2
+
+TEST LO3=`SETUP_LOOP $B0/brick3`
+TEST MKFS_LOOP $LO3
+
+TEST mkdir -p $B0/${V0}1 $B0/${V0}2 $B0/${V0}3
+
+
+TEST MOUNT_LOOP $LO1 $B0/${V0}1
+TEST MOUNT_LOOP $LO2 $B0/${V0}2
+TEST MOUNT_LOOP $LO3 $B0/${V0}3
+
+checksticky () {
+ i=0;
+ while [ ! -k $1 ]; do
+ sleep 1
+ i=$((i+1));
+ # Try for 10 seconds to get the sticky bit state
+ # else fail the test, as we may never see it
+ if [[ $i == 10 ]]; then
+ return $i
+ fi
+ echo "Waiting... $i"
+ done
+ echo "Done... got out @ $i"
+ return 0
+}
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '3' brick_count $V0
+
+TEST $CLI volume set $V0 parallel-readdir on
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE with caching disabled (read-write)
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+# Create a directories to hold the links
+TEST mkdir $M0/dir1
+TEST mkdir -p $M0/dir2/dir3
+
+# Create a large file (8 GB), so that rebalance takes time
+# Since we really don't care about the contents of the file, we use fallocate
+# to generate the file much faster. We could also use truncate, which is even
+# faster, but rebalance could take advantage of an sparse file and migrate it
+# in an optimized way, but we don't want a fast migration.
+TEST fallocate -l 8G $M0/dir1/FILE2
+
+# Rename the file to create a linkto, for rebalance to
+# act on the file
+## FILE1 and FILE2 hashes are, 678b1c4a e22c1ada, so they fall
+## into separate bricks when brick count is 3
+TEST mv $M0/dir1/FILE2 $M0/dir1/FILE1
+
+brick_loc=$(get_backend_paths $M0/dir1/FILE1)
+
+# unmount and remount the volume
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+# Start the rebalance
+TEST $CLI volume rebalance $V0 start force
+
+# Wait for FILE to get the sticky bit on, so that file is under
+# active rebalance, before creating the links
+TEST checksticky $brick_loc
+
+# Create the links
+## FILE3 FILE5 FILE7 have hashes, c8c91469 566d26ce 22ce7eba
+## Which fall into separate bricks on a 3 brick layout
+cd $M0
+TEST ln ./dir1/FILE1 ./dir1/FILE7
+TEST ln ./dir1/FILE1 ./dir1/FILE5
+TEST ln ./dir1/FILE1 ./dir1/FILE3
+
+TEST ln ./dir1/FILE1 ./dir2/FILE7
+TEST ln ./dir1/FILE1 ./dir2/FILE5
+TEST ln ./dir1/FILE1 ./dir2/FILE3
+
+TEST ln ./dir1/FILE1 ./dir2/dir3/FILE7
+TEST ln ./dir1/FILE1 ./dir2/dir3/FILE5
+TEST ln ./dir1/FILE1 ./dir2/dir3/FILE3
+cd /
+
+# Ideally for this test to have done its job, the file should still be
+# under migration, so check the sticky bit again
+TEST checksticky $brick_loc
+
+# Wait for rebalance to complete
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+# Check if all files are clean and migrated right
+## stat on the original file should show linkcount of 10
+linkcountsrc=$(stat -c %h $M0/dir1/FILE1)
+TEST [[ $linkcountsrc == 10 ]]
+
+## inode and size of every file should be same as original file
+inodesrc=$(stat -c %i $M0/dir1/FILE1)
+TEST [[ $(stat -c %i $M0/dir1/FILE3) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir1/FILE5) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir1/FILE7) == $inodesrc ]]
+
+TEST [[ $(stat -c %i $M0/dir2/FILE3) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir2/FILE5) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir2/FILE7) == $inodesrc ]]
+
+TEST [[ $(stat -c %i $M0/dir2/dir3/FILE3) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir2/dir3/FILE5) == $inodesrc ]]
+TEST [[ $(stat -c %i $M0/dir2/dir3/FILE7) == $inodesrc ]]
+
+# Check, newer link creations
+cd $M0
+TEST ln ./dir1/FILE1 ./FILE1
+TEST ln ./dir2/FILE3 ./FILE3
+TEST ln ./dir2/dir3/FILE5 ./FILE5
+TEST ln ./dir1/FILE7 ./FILE7
+cd /
+linkcountsrc=$(stat -c %h $M0/dir1/FILE1)
+TEST [[ $linkcountsrc == 14 ]]
+
+
+# Stop the volume
+TEST $CLI volume stop $V0;
+
+UMOUNT_LOOP ${B0}/${V0}{1..3}
+rm -f ${B0}/brick{1..3}
+cleanup;
diff --git a/tests/bugs/distribute/bug-1190734.t b/tests/bugs/distribute/bug-1190734.t
new file mode 100644
index 00000000000..9256088f7a0
--- /dev/null
+++ b/tests/bugs/distribute/bug-1190734.t
@@ -0,0 +1,93 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+BRICK_COUNT=3
+FILE_COUNT=100
+FILE_COUNT_TIME=5
+
+function create_files {
+ rm -rf $2
+ mkdir $2
+ for i in `seq 1 $1`; do
+ touch $2/file_$i
+ done
+}
+
+function get_file_count {
+ ls $1/file_[0-9]* | wc -l
+}
+
+function reset {
+ $CLI volume stop $V0
+ ${UMOUNT_F} $1
+ $CLI volume delete $V0
+}
+
+function start_mount_fuse {
+ $CLI volume start $V0
+ [ $? -ne 0 ] && return 1
+
+ $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+ [ $? -ne 0 ] && return 1
+
+ create_files $FILE_COUNT $M0/$1
+ [ $? -ne 0 ] && return 1
+
+ return 0
+}
+
+function start_mount_nfs {
+ $CLI volume start $V0
+ [ $? -ne 0 ] && return 1
+
+ sleep 3
+ mount_nfs $H0:/$V0 $N0
+ [ $? -ne 0 ] && return 1
+
+ create_files $FILE_COUNT $N0/$1
+ [ $? -ne 0 ] && return 1
+
+ return 0
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+# Test 1-2 Create repliacted volume
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \
+ $H0:$B0/${V0}2 $H0:$B0/${V0}3 $H0:$B0/${V0}4 $H0:$B0/${V0}5
+TEST $CLI volume set $V0 nfs.disable false
+
+# ------- test 1: AFR, fuse + remove bricks
+
+TEST start_mount_fuse test1
+EXPECT_WITHIN $FILE_COUNT_TIME $FILE_COUNT get_file_count $M0/test1
+TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}{2,3} start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/${V0}2 $H0:$B0/${V0}3"
+TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}{2,3} commit
+EXPECT_WITHIN $FILE_COUNT_TIME $FILE_COUNT get_file_count $M0/test1
+reset $M0
+
+# ------- test 2: AFR, nfs + remove bricks
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \
+ $H0:$B0/${V0}2 $H0:$B0/${V0}3 $H0:$B0/${V0}4 $H0:$B0/${V0}5
+TEST $CLI volume set $V0 nfs.disable false
+
+TEST start_mount_nfs test2
+EXPECT_WITHIN $FILE_COUNT_TIME $FILE_COUNT get_file_count $N0/test2
+TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 $H0:$B0/${V0}3 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/${V0}2 $H0:$B0/${V0}3"
+TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}{2,3} commit
+EXPECT_WITHIN $FILE_COUNT_TIME $FILE_COUNT get_file_count $N0/test2
+reset $N0
+
+cleanup
diff --git a/tests/bugs/distribute/bug-1193636.c b/tests/bugs/distribute/bug-1193636.c
new file mode 100644
index 00000000000..ea3f79a4e06
--- /dev/null
+++ b/tests/bugs/distribute/bug-1193636.c
@@ -0,0 +1,68 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <fcntl.h>
+#include <string.h>
+
+#define MY_XATTR_NAME "user.ftest"
+#define MY_XATTR_VAL "ftestval"
+
+void
+usage(void)
+{
+ printf("Usage : bug-1193636 <filename> <xattr_name> <op>\n");
+ printf(" op : 0 - set, 1 - remove\n");
+}
+
+int
+main(int argc, char **argv)
+{
+ int fd;
+ int err = 0;
+ char *xattr_name = NULL;
+ int op = 0;
+
+ if (argc != 4) {
+ usage();
+ exit(1);
+ }
+
+ op = atoi(argv[3]);
+
+ if ((op != 0) && (op != 1)) {
+ printf("Invalid operation specified.\n");
+ usage();
+ exit(1);
+ }
+
+ xattr_name = argv[2];
+
+ fd = open(argv[1], O_RDWR);
+ if (fd == -1) {
+ printf("Failed to open file %s\n", argv[1]);
+ exit(1);
+ }
+
+ if (!op) {
+ err = fsetxattr(fd, xattr_name, MY_XATTR_VAL, strlen(MY_XATTR_VAL) + 1,
+ XATTR_CREATE);
+
+ if (err) {
+ printf("Failed to set xattr %s: %m\n", xattr_name);
+ exit(1);
+ }
+
+ } else {
+ err = fremovexattr(fd, xattr_name);
+
+ if (err) {
+ printf("Failed to remove xattr %s: %m\n", xattr_name);
+ exit(1);
+ }
+ }
+
+ close(fd);
+
+ return 0;
+}
diff --git a/tests/bugs/distribute/bug-1193636.t b/tests/bugs/distribute/bug-1193636.t
new file mode 100644
index 00000000000..b377910336e
--- /dev/null
+++ b/tests/bugs/distribute/bug-1193636.t
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+
+checksticky () {
+ i=0;
+ while [ ! -k $1 ]; do
+ sleep 1
+ i=$((i+1));
+ if [[ $i == 10 ]]; then
+ return $i
+ fi
+ echo "Waiting... $i"
+ done
+ echo "done ...got out @ $i"
+ return 0
+}
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3};
+TEST $CLI volume start $V0
+
+# Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST mkdir $M0/dir1
+
+# Create a large file (1GB), so that rebalance takes time
+dd if=/dev/zero of=$M0/dir1/FILE2 bs=64k count=10240
+
+# Rename the file to create a linkto, for rebalance to
+# act on the file
+TEST mv $M0/dir1/FILE2 $M0/dir1/FILE1
+
+brick_loc=$(get_backend_paths $M0/dir1/FILE1)
+
+build_tester $(dirname $0)/bug-1193636.c
+
+TEST $CLI volume rebalance $V0 start force
+
+TEST checksticky $brick_loc
+
+TEST setfattr -n "user.test1" -v "test1" $M0/dir1/FILE1
+TEST setfattr -n "user.test2" -v "test1" $M0/dir1/FILE1
+TEST setfattr -n "user.test3" -v "test1" $M0/dir1/FILE1
+
+TEST $(dirname $0)/bug-1193636 $M0/dir1/FILE1 user.fsetx 0
+TEST $(dirname $0)/bug-1193636 $M0/dir1/FILE1 user.fremx 0
+
+TEST getfattr -n "user.fremx" $M0/dir1/FILE1
+TEST setfattr -x "user.test2" $M0/dir1/FILE1
+
+
+TEST $(dirname $0)/bug-1193636 $M0/dir1/FILE1 user.fremx 1
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+TEST getfattr -n "user.fsetx" $M0/dir1/FILE1
+TEST getfattr -n "user.test1" $M0/dir1/FILE1
+TEST ! getfattr -n "user.test2" $M0/dir1/FILE1
+TEST ! getfattr -n "user.fremx" $M0/dir1/FILE1
+TEST getfattr -n "user.test3" $M0/dir1/FILE1
+
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-1204140.t b/tests/bugs/distribute/bug-1204140.t
new file mode 100755
index 00000000000..050c069ea30
--- /dev/null
+++ b/tests/bugs/distribute/bug-1204140.t
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../dht.rc
+
+cleanup;
+
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+TEST touch $M0/new.txt;
+TEST getfattr -n "glusterfs.get_real_filename:NEW.txt" $M0;
+TEST ! getfattr -n "glusterfs.get_realfilename:NEXT.txt" $M0;
+
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-1247563.t b/tests/bugs/distribute/bug-1247563.t
new file mode 100644
index 00000000000..a2fc722896f
--- /dev/null
+++ b/tests/bugs/distribute/bug-1247563.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+is_sticky_set () {
+ echo $1
+ if [ -k $1 ];
+ then
+ echo "yes"
+ else
+ echo "no"
+ fi
+}
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3};
+TEST $CLI volume start $V0
+
+# Mount FUSE
+TEST glusterfs --acl -s $H0 --volfile-id $V0 $M0
+
+TEST mkdir $M0/dir1
+
+echo "Testing pacls on rebalance" > $M0/dir1/FILE1
+
+FPATH1=`find $B0/ -name FILE1`
+
+# Rename the file to create a linkto, for rebalance to
+# act on the file
+
+TEST mv $M0/dir1/FILE1 $M0/dir1/FILE2
+FPATH2=`find $B0/ -perm 1000 -name FILE2`
+
+setfacl -m user:root:rwx $M0/dir1/FILE2
+
+# Run rebalance without the force option to skip
+# the file migration
+TEST $CLI volume rebalance $V0 start
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+#Check that the file has been skipped,i.e., the linkto still exists
+EXPECT "yes" is_sticky_set $FPATH2
+
+
+#The linkto file should not have any posix acls set
+COUNT=`getfacl $FPATH2 |grep -c "user:root:rwx"`
+EXPECT "0" echo $COUNT
+
+cleanup;
+
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/bugs/distribute/bug-1368012.t b/tests/bugs/distribute/bug-1368012.t
new file mode 100644
index 00000000000..0b626353aab
--- /dev/null
+++ b/tests/bugs/distribute/bug-1368012.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function get_permission {
+ stat -c "%A" $1
+}
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" online_brick_count
+TEST $CLI volume set $V0 performance.stat-prefetch off
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+##Test case: Add-brick
+#------------------------------------------------------------
+#change permission of both root
+TEST chmod 444 $M0
+
+#store permission for comparision
+TEST permission_root=`stat -c "%A" $M0`
+TEST echo $permission_root
+#Add-brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" online_brick_count
+
+#Allow one lookup to happen
+TEST ls $M0
+#Generate another lookup
+echo 3 > /proc/sys/vm/drop_caches
+TEST ls $M0
+#check root permission
+EXPECT_WITHIN "5" $permission_root get_permission $M0
+#check permission on the new-brick
+EXPECT $permission_root get_permission $B0/${V0}3
+cleanup
diff --git a/tests/bugs/distribute/bug-1389697.t b/tests/bugs/distribute/bug-1389697.t
new file mode 100644
index 00000000000..0d428b8d9d2
--- /dev/null
+++ b/tests/bugs/distribute/bug-1389697.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+
+cleanup
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$B1/b1 $H1:$B1/b2 $H2:$B2/b3
+TEST $CLI_1 volume start $V0
+
+#Start a fix-layout
+TEST $CLI_1 volume rebalance $V0 fix-layout start
+
+#volume rebalance status should work
+TEST $CLI_1 volume rebalance $V0 status
+$CLI_1 volume rebalance $V0 status
+
+val=$($CLI_1 volume rebalance $V0 status |grep "fix-layout" 2>&1)
+val=$?
+TEST [ $val -eq 0 ];
+
+#Start a remove brick for the brick on H2
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/b3 start
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/b3 status
+
+#Check remove brick status from H1
+$CLI_1 volume remove-brick $V0 $H2:$B2/b3 status |grep "fix-layout" 2>&1
+val=$?
+TEST [ $val -eq 1 ];
+
+$CLI_1 volume remove-brick $V0 $H2:$B2/b3 status
+$CLI_2 volume remove-brick $V0 $H2:$B2/b3 status
+
+
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/b3 stop
+
+cleanup
diff --git a/tests/bugs/distribute/bug-1543279.t b/tests/bugs/distribute/bug-1543279.t
new file mode 100644
index 00000000000..47b8b4a4a95
--- /dev/null
+++ b/tests/bugs/distribute/bug-1543279.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+TESTS_EXPECTED_IN_LOOP=44
+SCRIPT_TIMEOUT=600
+
+rename_files() {
+ MOUNT=$1
+ ITERATIONS=$2
+ for i in $(seq 1 $ITERATIONS); do uuid="`uuidgen`"; echo "some data" > $MOUNT/test$uuid; mv $MOUNT/test$uuid $MOUNT/test -f || return $?; done
+}
+
+run_test_for_volume() {
+ VOLUME=$1
+ ITERATIONS=$2
+ TEST_IN_LOOP $CLI volume start $VOLUME
+
+ TEST_IN_LOOP glusterfs -s $H0 --volfile-id $VOLUME $M0
+ TEST_IN_LOOP glusterfs -s $H0 --volfile-id $VOLUME $M1
+ TEST_IN_LOOP glusterfs -s $H0 --volfile-id $VOLUME $M2
+ TEST_IN_LOOP glusterfs -s $H0 --volfile-id $VOLUME $M3
+
+ rename_files $M0 $ITERATIONS &
+ M0_RENAME_PID=$!
+
+ rename_files $M1 $ITERATIONS &
+ M1_RENAME_PID=$!
+
+ rename_files $M2 $ITERATIONS &
+ M2_RENAME_PID=$!
+
+ rename_files $M3 $ITERATIONS &
+ M3_RENAME_PID=$!
+
+ TEST_IN_LOOP wait $M0_RENAME_PID
+ TEST_IN_LOOP wait $M1_RENAME_PID
+ TEST_IN_LOOP wait $M2_RENAME_PID
+ TEST_IN_LOOP wait $M3_RENAME_PID
+
+ TEST_IN_LOOP $CLI volume stop $VOLUME
+ TEST_IN_LOOP $CLI volume delete $VOLUME
+ umount $M0 $M1 $M2 $M3
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0..8} force
+run_test_for_volume $V0 200
+
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0..8} force
+run_test_for_volume $V0 200
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..8} force
+run_test_for_volume $V0 200
+
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5} force
+run_test_for_volume $V0 200
+
+cleanup
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/bugs/distribute/bug-1600379.t b/tests/bugs/distribute/bug-1600379.t
new file mode 100644
index 00000000000..8d2f6154100
--- /dev/null
+++ b/tests/bugs/distribute/bug-1600379.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# Initialize
+#------------------------------------------------------------
+cleanup;
+
+# Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+# Create a volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+
+# Verify volume creation
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Start volume and verify successful start
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+#------------------------------------------------------------
+
+# Test case - Remove xattr from killed brick on lookup
+#------------------------------------------------------------
+# Create a dir and set custom xattr
+TEST mkdir $M0/testdir
+TEST setfattr -n user.attr -v val $M0/testdir
+xattr_val=`getfattr -d $B0/${V0}2/testdir | awk '{print $1}'`;
+TEST ${xattr_val}='user.attr="val"';
+
+# Kill 2nd brick process
+TEST kill_brick $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
+
+# Remove custom xattr
+TEST setfattr -x user.attr $M0/testdir
+
+# Bring up the killed brick process
+TEST $CLI volume start $V0 force
+
+# Perform lookup
+sleep 5
+TEST ls $M0/testdir
+
+# Check brick xattrs
+xattr_val_2=`getfattr -d $B0/${V0}2/testdir`;
+TEST [ ${xattr_val_2} = ''] ;
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-1667804.t b/tests/bugs/distribute/bug-1667804.t
new file mode 100644
index 00000000000..3f7c43111d7
--- /dev/null
+++ b/tests/bugs/distribute/bug-1667804.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+function confirm_all_linkto_files ()
+{
+ inpath=$1
+ for infile in $inpath/*
+ do
+ echo $infile
+ ret1=$(is_dht_linkfile $infile)
+ if [ "$ret1" -eq 0 ]; then
+ echo "$infile is not a linkto file"
+ echo 0
+ return
+ fi
+ done
+ echo 1
+}
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+#Create files and rename them in order to create linkto files
+TEST mkdir -p $M0/dir0/dir1
+TEST touch $M0/dir0/dir1/file-{1..50}
+
+for i in {1..50}; do
+ mv $M0/dir0/dir1/file-$i $M0/dir0/dir1/nfile-$i;
+done
+
+#Remove the second brick to force the creation of linkto files
+#on the removed brick
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}2"
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 stop
+
+EXPECT "1" confirm_all_linkto_files $B0/${V0}2/dir0/dir1
+
+#Modify the xattrs of the linkto files on the removed brick to point to itself.
+
+target=$(cat $M0/.meta/graphs/active/$V0-dht/subvolumes/1/name)
+
+setfattr -n trusted.glusterfs.dht.linkto -v "$target\0" $B0/${V0}2/dir0/dir1/nfile*
+
+
+TEST rm -rf $M0/dir0
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-1786679.t b/tests/bugs/distribute/bug-1786679.t
new file mode 100755
index 00000000000..219ce51c8a9
--- /dev/null
+++ b/tests/bugs/distribute/bug-1786679.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=250
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+
+# create 2 subvols
+# create a dir
+# create a file
+# change layout
+# remove the file
+# execute create from a different mount
+# Without the patch, the file will be present on both of the bricks
+
+cleanup
+
+function get_layout () {
+
+layout=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1 | grep dht | gawk -F"=" '{print $2}'`
+
+echo $layout
+
+}
+
+function set_layout()
+{
+ setfattr -n "trusted.glusterfs.dht" -v $1 $2
+}
+
+TEST glusterd
+TEST pidof glusterd
+
+BRICK1=$B0/${V0}-0
+BRICK2=$B0/${V0}-1
+
+TEST $CLI volume create $V0 $H0:$BRICK1 $H0:$BRICK2
+TEST $CLI volume start $V0
+
+# Mount FUSE and create symlink
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file
+TEST ! stat "$BRICK1/dir/file"
+TEST stat "$BRICK2/dir/file"
+
+layout1="$(get_layout "$BRICK1/dir")"
+layout2="$(get_layout "$BRICK2/dir")"
+
+TEST set_layout $layout1 "$BRICK2/dir"
+TEST set_layout $layout2 "$BRICK1/dir"
+
+TEST rm $M0/dir/file -f
+TEST gluster v set $V0 client-log-level DEBUG
+
+#Without the patch in place, this client will create the file in $BRICK2
+#which will lead to two files being on both the bricks when a new client
+#create the file with the same name
+TEST touch $M0/dir/file
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M1
+TEST touch $M1/dir/file
+
+TEST stat "$BRICK1/dir/file"
+TEST ! stat "$BRICK2/dir/file"
+
+cleanup
diff --git a/tests/bugs/distribute/bug-853258.t b/tests/bugs/distribute/bug-853258.t
new file mode 100755
index 00000000000..6817d9e2cd3
--- /dev/null
+++ b/tests/bugs/distribute/bug-853258.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+mkdir -p $H0:$B0/${V0}0
+mkdir -p $H0:$B0/${V0}1
+mkdir -p $H0:$B0/${V0}2
+mkdir -p $H0:$B0/${V0}3
+
+# Create and start a volume.
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 cluster.weighted-rebalance off
+
+# Force assignment of initial ranges.
+TEST $CLI volume rebalance $V0 fix-layout start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "fix-layout completed" fix-layout_status_field $V0
+
+# Get the original values.
+xattrs=""
+for i in $(seq 0 2); do
+ xattrs="$xattrs $(dht_get_layout $B0/${V0}$i)"
+done
+
+# Expand the volume and force assignment of new ranges.
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" online_brick_count
+# Force assignment of initial ranges.
+TEST $CLI volume rebalance $V0 fix-layout start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "fix-layout completed" fix-layout_status_field $V0
+
+for i in $(seq 0 3); do
+ xattrs="$xattrs $(dht_get_layout $B0/${V0}$i)"
+done
+
+overlap=$( $PYTHON $(dirname $0)/overlap.py $xattrs)
+# 2863311531 = 0xaaaaaaab = 2/3 overlap
+TEST [ "$overlap" -ge 2863311531 ]
+
+cleanup
diff --git a/tests/bugs/distribute/bug-860663.c b/tests/bugs/distribute/bug-860663.c
new file mode 100644
index 00000000000..ca0c31ffe8f
--- /dev/null
+++ b/tests/bugs/distribute/bug-860663.c
@@ -0,0 +1,41 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+#include <err.h>
+#include <sys/param.h>
+
+int main(argc, argv) int argc;
+char **argv;
+{
+ char *basepath;
+ char path[MAXPATHLEN + 1];
+ unsigned int count;
+ int i, fd;
+
+ if (argc != 3)
+ errx(1, "usage: %s path count", argv[0]);
+
+ basepath = argv[1];
+ count = atoi(argv[2]);
+
+ if (count > 999999)
+ errx(1, "count too big");
+
+ if (strlen(basepath) > MAXPATHLEN - 6)
+ errx(1, "path too long");
+
+ for (i = 0; i < count; i++) {
+ (void)sprintf(path, "%s%06d", basepath, i);
+
+ fd = open(path, O_CREAT | O_RDWR, 0644);
+ if (fd == -1)
+ err(1, "create %s failed", path);
+
+ if (close(fd) != 0)
+ warn("close %s failed", path);
+ }
+
+ return 0;
+}
diff --git a/tests/bugs/distribute/bug-860663.t b/tests/bugs/distribute/bug-860663.t
new file mode 100644
index 00000000000..59b8223ef3f
--- /dev/null
+++ b/tests/bugs/distribute/bug-860663.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+function file_count()
+{
+ val=1
+
+ if [ "$1" == "$2" ]
+ then
+ val=0
+ fi
+ echo $val
+}
+
+BRICK_COUNT=3
+
+build_tester $(dirname $0)/bug-860663.c
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST $(dirname $0)/bug-860663 $M0/files 1000
+
+ORIG_FILE_COUNT=`ls -l $M0 | wc -l`;
+TEST [ $ORIG_FILE_COUNT -ge 1000 ]
+
+# Kill a brick process
+kill_brick $V0 $H0 $B0/${V0}1
+
+TEST $CLI volume rebalance $V0 fix-layout start
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "fix-layout failed" fix-layout_status_field $V0;
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+# Unmount and remount to make sure we're doing fresh lookups.
+TEST umount $M0
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+NEW_FILE_COUNT=`ls -l $M0 | wc -l`;
+
+EXPECT "0" file_count $ORIG_FILE_COUNT $NEW_FILE_COUNT
+
+rm -f $(dirname $0)/bug-860663
+cleanup;
diff --git a/tests/bugs/distribute/bug-862967.t b/tests/bugs/distribute/bug-862967.t
new file mode 100644
index 00000000000..2fb0848bd7c
--- /dev/null
+++ b/tests/bugs/distribute/bug-862967.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function uid_gid_compare()
+{
+ val=1
+
+ if [ "$1" == "$3" ]
+ then
+ if [ "$2" == "$4" ]
+ then
+ val=0
+ fi
+ fi
+ echo "$val"
+}
+
+BRICK_COUNT=3
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 --gid-timeout=-1 -s $H0 --volfile-id $V0 $M0;
+
+# change dir permissions
+mkdir $M0/dir;
+chown 1:1 $M0/dir;
+
+# Kill a brick process
+
+kill_brick $V0 $H0 $B0/${V0}2
+# change dir ownership
+NEW_UID=36;
+NEW_GID=36;
+chown $NEW_UID:$NEW_GID $M0/dir;
+
+# bring the brick back up
+TEST $CLI volume start $V0 force
+
+sleep 10;
+
+ls -l $M0/dir;
+
+# check if uid/gid is healed on backend brick which was taken down
+BACKEND_UID=`stat -c %u $B0/${V0}2/dir`;
+BACKEND_GID=`stat -c %g $B0/${V0}2/dir`;
+
+EXPECT "0" uid_gid_compare $NEW_UID $NEW_GID $BACKEND_UID $BACKEND_GID
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-882278.t b/tests/bugs/distribute/bug-882278.t
new file mode 100755
index 00000000000..8cb51474720
--- /dev/null
+++ b/tests/bugs/distribute/bug-882278.t
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup
+
+# Is there a good reason to require --fqdn elsewhere? It's worse than useless
+# here.
+H0=$(hostname -s)
+
+function recreate {
+ # The rm is necessary so we don't get fooled by leftovers from old runs.
+ rm -rf $1 && mkdir -p $1
+}
+
+function count_lines {
+ grep "$1" $2/* | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Start and create a volume
+TEST recreate ${B0}/${V0}-0
+TEST recreate ${B0}/${V0}-1
+TEST $CLI volume create $V0 $H0:$B0/${V0}-{0,1}
+TEST $CLI volume set $V0 cluster.nufa on
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount native
+special_option="--xlator-option ${V0}-dht.local-volume-name=${V0}-client-1"
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $special_option $M0
+
+## Create a bunch of test files.
+for i in $(seq 0 99); do
+ echo hello > $(printf $M0/file%02d $i)
+done
+
+## Make sure the files went to the right place. There might be link files in
+## the other brick, but they won't have any contents.
+EXPECT "0" count_lines hello ${B0}/${V0}-0
+EXPECT "100" count_lines hello ${B0}/${V0}-1
+
+if [ "$EXIT_EARLY" = "1" ]; then
+ exit 0;
+fi
+
+## Finish up
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-884455.t b/tests/bugs/distribute/bug-884455.t
new file mode 100755
index 00000000000..59413cd5408
--- /dev/null
+++ b/tests/bugs/distribute/bug-884455.t
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../dht.rc
+
+cleanup;
+
+function layout_compare()
+{
+ res=0
+
+ if [ "$1" == "$2" ]
+ then
+ res=1
+ fi
+ if [ "$1" == "$3" ]
+ then
+ res=1
+ fi
+ if [ "$2" == "$3" ]
+ then
+ res=1
+ fi
+
+ echo $res
+}
+
+function get_layout()
+{
+ layout1=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1|grep dht |cut -d = -f2`
+ layout2=`getfattr -n trusted.glusterfs.dht -e hex $2 2>&1|grep dht |cut -d = -f2`
+ layout3=`getfattr -n trusted.glusterfs.dht -e hex $3 2>&1|grep dht |cut -d = -f2`
+
+ ret=$(layout_compare $layout1 $layout2 $layout3)
+
+ if [ $ret -ne 0 ]
+ then
+ echo 1
+ else
+ echo 0
+ fi
+
+}
+
+BRICK_COUNT=3
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+## set subvols-per-dir option
+TEST $CLI volume set $V0 subvols-per-directory 2
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/dir{1..10} 2>/dev/null;
+
+## Add-brick n run rebalance to force re-write of layout
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}2
+sleep 5;
+
+## trigger dir self heal on client
+TEST ls -l $M0 2>/dev/null;
+
+TEST $CLI volume rebalance $V0 start force
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
+
+## check for layout overlaps.
+EXPECT "0" get_layout $B0/${V0}0 $B0/${V0}1 $B0/${V0}2
+EXPECT "0" get_layout $B0/${V0}0/dir1 $B0/${V0}1/dir1 $B0/${V0}2/dir1
+EXPECT "0" get_layout $B0/${V0}0/dir2 $B0/${V0}1/dir2 $B0/${V0}2/dir2
+EXPECT "0" get_layout $B0/${V0}0/dir3 $B0/${V0}1/dir3 $B0/${V0}2/dir3
+EXPECT "0" get_layout $B0/${V0}0/dir4 $B0/${V0}1/dir4 $B0/${V0}2/dir4
+EXPECT "0" get_layout $B0/${V0}0/dir5 $B0/${V0}1/dir5 $B0/${V0}2/dir5
+EXPECT "0" get_layout $B0/${V0}0/dir6 $B0/${V0}1/dir6 $B0/${V0}2/dir6
+EXPECT "0" get_layout $B0/${V0}0/dir7 $B0/${V0}1/dir7 $B0/${V0}2/dir7
+EXPECT "0" get_layout $B0/${V0}0/dir8 $B0/${V0}1/dir8 $B0/${V0}2/dir8
+EXPECT "0" get_layout $B0/${V0}0/dir9 $B0/${V0}1/dir9 $B0/${V0}2/dir9
+EXPECT "0" get_layout $B0/${V0}0/dir10 $B0/${V0}1/dir10 $B0/${V0}2/dir10
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-884597.t b/tests/bugs/distribute/bug-884597.t
new file mode 100755
index 00000000000..d6a2c65f370
--- /dev/null
+++ b/tests/bugs/distribute/bug-884597.t
@@ -0,0 +1,173 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../dht.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+BRICK_COUNT=3
+function uid_gid_compare()
+{
+ val=1
+
+ if [ "$1" == "$3" ]
+ then
+ if [ "$2" == "$4" ]
+ then
+ val=0
+ fi
+ fi
+ echo "$val"
+}
+
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+i=1
+NEW_UID=36
+NEW_GID=36
+
+TEST touch $M0/$i
+
+chown $NEW_UID:$NEW_GID $M0/$i
+## rename till file gets a linkfile
+
+has_link=0
+while [ $i -lt 100 ]
+do
+ mv $M0/$i $M0/$(( $i+1 ))
+ if [ $? -ne 0 ]
+ then
+ break
+ fi
+ let i++
+ file_has_linkfile $i
+ has_link=$?
+ if [ $has_link -eq 2 ]
+ then
+ break;
+ fi
+done
+
+TEST [ $has_link -eq 2 ]
+
+get_hashed_brick $i
+cached=$?
+
+# check if uid/gid on linkfile is created with correct uid/gid
+BACKEND_UID=`stat -c %u $B0/${V0}$cached/$i`;
+BACKEND_GID=`stat -c %g $B0/${V0}$cached/$i`;
+
+EXPECT "0" uid_gid_compare $NEW_UID $NEW_GID $BACKEND_UID $BACKEND_GID
+
+# remove linkfile from backend, and trigger a lookup heal. uid/gid should match
+rm -rf $B0/${V0}$cached/$i
+
+# without a unmount, we are not able to trigger a lookup based heal
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+## Mount FUSE
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+lookup=`ls -l $M0/$i 2>/dev/null`
+
+# check if uid/gid on linkfile is created with correct uid/gid
+BACKEND_UID=`stat -c %u $B0/${V0}$cached/$i`;
+BACKEND_GID=`stat -c %g $B0/${V0}$cached/$i`;
+
+EXPECT "0" uid_gid_compare $NEW_UID $NEW_GID $BACKEND_UID $BACKEND_GID
+# create hardlinks. Make sure a linkfile gets created
+
+i=1
+NEW_UID=36
+NEW_GID=36
+
+TEST touch $M0/file
+chown $NEW_UID:$NEW_GID $M0/file;
+
+## ln till file gets a linkfile
+
+has_link=0
+while [ $i -lt 100 ]
+do
+ ln $M0/file $M0/link$i
+ if [ $? -ne 0 ]
+ then
+ break
+ fi
+ file_has_linkfile link$i
+ has_link=$?
+ if [ $has_link -eq 2 ]
+ then
+ break;
+ fi
+ let i++
+done
+
+TEST [ $has_link -eq 2 ]
+
+get_hashed_brick link$i
+cached=$?
+
+# check if uid/gid on linkfile is created with correct uid/gid
+BACKEND_UID=`stat -c %u $B0/${V0}$cached/link$i`;
+BACKEND_GID=`stat -c %g $B0/${V0}$cached/link$i`;
+
+EXPECT "0" uid_gid_compare $NEW_UID $NEW_GID $BACKEND_UID $BACKEND_GID
+
+## UID/GID creation as different user
+i=1
+NEW_UID=36
+NEW_GID=36
+
+TEST touch $M0/user_file1
+TEST chown $NEW_UID:$NEW_GID $M0/user_file1;
+
+## Give permission on volume, so that different users can perform rename
+
+TEST chmod 0777 $M0
+
+## Add a user known as ABC and perform renames
+TEST `useradd -M ABC 2>/dev/null`
+
+TEST cd $M0
+## rename as different user till file gets a linkfile
+
+has_link=0
+while [ $i -lt 100 ]
+do
+ su -m ABC -c "mv $M0/user_file$i $M0/user_file$(( $i+1 ))"
+ if [ $? -ne 0 ]
+ then
+ break
+ fi
+ let i++
+ file_has_linkfile user_file$i
+ has_link=$?
+ if [ $has_link -eq 2 ]
+ then
+ break;
+ fi
+done
+
+TEST [ $has_link -eq 2 ]
+
+## del user ABC
+TEST userdel ABC
+
+get_hashed_brick user_file$i
+cached=$?
+
+# check if uid/gid on linkfile is created with correct uid/gid
+BACKEND_UID=`stat -c %u $B0/${V0}$cached/user_file$i`;
+BACKEND_GID=`stat -c %g $B0/${V0}$cached/user_file$i`;
+
+EXPECT "0" uid_gid_compare $NEW_UID $NEW_GID $BACKEND_UID $BACKEND_GID
+cleanup;
diff --git a/tests/bugs/distribute/bug-907072.t b/tests/bugs/distribute/bug-907072.t
new file mode 100755
index 00000000000..a4d98831380
--- /dev/null
+++ b/tests/bugs/distribute/bug-907072.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../dht.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1,2,3};
+TEST $CLI volume start $V0;
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/test;
+
+# Extract the layout sans the commit hash
+OLD_LAYOUT0=`get_layout $B0/${V0}0/test | cut -c11-34`;
+OLD_LAYOUT1=`get_layout $B0/${V0}1/test | cut -c11-34`;
+OLD_LAYOUT2=`get_layout $B0/${V0}2/test | cut -c11-34`;
+OLD_LAYOUT3=`get_layout $B0/${V0}3/test | cut -c11-34`;
+
+TEST killall glusterfsd;
+
+# Delete directory on one brick
+TEST rm -rf $B0/${V}1/test;
+
+# And only layout xattr on another brick
+TEST setfattr -x trusted.glusterfs.dht $B0/${V0}2/test;
+
+TEST $CLI volume start $V0 force;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+TEST stat $M0/test;
+
+# Extract the layout sans the commit hash
+NEW_LAYOUT0=`get_layout $B0/${V0}0/test | cut -c11-34`;
+NEW_LAYOUT1=`get_layout $B0/${V0}1/test | cut -c11-34`;
+NEW_LAYOUT2=`get_layout $B0/${V0}2/test | cut -c11-34`;
+NEW_LAYOUT3=`get_layout $B0/${V0}3/test | cut -c11-34`;
+
+EXPECT $OLD_LAYOUT0 echo $NEW_LAYOUT0;
+EXPECT $OLD_LAYOUT1 echo $NEW_LAYOUT1;
+EXPECT $OLD_LAYOUT2 echo $NEW_LAYOUT2;
+EXPECT $OLD_LAYOUT3 echo $NEW_LAYOUT3;
diff --git a/tests/bugs/distribute/bug-912564.t b/tests/bugs/distribute/bug-912564.t
new file mode 100755
index 00000000000..d437728f83b
--- /dev/null
+++ b/tests/bugs/distribute/bug-912564.t
@@ -0,0 +1,92 @@
+#!/bin/bash
+
+# Test that the rsync and "extra" regexes cause rename-in-place without
+# creating linkfiles, when they're supposed to. Without the regex we'd have a
+# 1/4 chance of each file being assigned to the right place, so with 16 files
+# we have a 1/2^32 chance of getting the correct result by accident.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function count_linkfiles {
+ local i
+ local count=0
+ for i in $(seq $2 $3); do
+ x=$(find $1$i -perm -1000 | wc -l)
+ # Divide by two because of the .glusterfs links.
+ count=$((count+x/2))
+ done
+ echo $count
+}
+
+# This function only exists to get around quoting difficulties in TEST.
+function set_regex {
+ $CLI volume set $1 cluster.extra-hash-regex '^foo(.+)bar$'
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+mkdir -p $H0:$B0/${V0}0
+mkdir -p $H0:$B0/${V0}1
+mkdir -p $H0:$B0/${V0}2
+mkdir -p $H0:$B0/${V0}3
+
+# Create and start a volume.
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 \
+ $H0:$B0/${V0}2 $H0:$B0/${V0}3
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status';
+
+# Mount it.
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+# Make sure the rsync regex works, by verifying that no linkfiles are
+# created.
+rm -f $M0/file*
+for i in $(seq 0 15); do
+ fn=$(printf file%x $i)
+ tmp_fn=$(printf .%s.%d $fn $RANDOM)
+ echo testing > $M0/$tmp_fn
+ mv $M0/$tmp_fn $M0/$fn
+done
+lf=$(count_linkfiles $B0/$V0 0 3)
+TEST [ "$lf" -eq "0" ]
+
+# Make sure that linkfiles *are* created for normal files.
+rm -f $M0/file*
+for i in $(seq 0 15); do
+ fn=$(printf file%x $i)
+ tmp_fn=$(printf foo%sbar $fn)
+ echo testing > $M0/$tmp_fn
+ mv $M0/$tmp_fn $M0/$fn
+done
+lf=$(count_linkfiles $B0/$V0 0 3)
+TEST [ "$lf" -ne "0" ]
+
+# Make sure that setting an extra regex suppresses the linkfiles.
+TEST set_regex $V0
+rm -f $M0/file*
+for i in $(seq 0 15); do
+ fn=$(printf file%x $i)
+ tmp_fn=$(printf foo%sbar $fn)
+ echo testing > $M0/$tmp_fn
+ mv $M0/$tmp_fn $M0/$fn
+done
+lf=$(count_linkfiles $B0/$V0 0 3)
+TEST [ "$lf" -eq "0" ]
+
+# Re-test the rsync regex, to make sure the extra one didn't break it.
+rm -f $M0/file*
+for i in $(seq 0 15); do
+ fn=$(printf file%x $i)
+ tmp_fn=$(printf .%s.%d $fn $RANDOM)
+ echo testing > $M0/$tmp_fn
+ mv $M0/$tmp_fn $M0/$fn
+done
+lf=$(count_linkfiles $B0/$V0 0 3)
+TEST [ "$lf" -eq "0" ]
+
+cleanup
diff --git a/tests/bugs/distribute/bug-915554.t b/tests/bugs/distribute/bug-915554.t
new file mode 100755
index 00000000000..1f59008c56f
--- /dev/null
+++ b/tests/bugs/distribute/bug-915554.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+#
+# Bug <915554>
+#
+# This test checks for a condition where a rebalance migrates a file and does
+# not preserve the original file size. This can occur due to hole preservation
+# logic in the file migration code. If a file size is aligned to a disk sector
+# boundary (512b) and the tail portion of the file is zero-filled, the file
+# may end up truncated to the end of the last data region in the file.
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../dht.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+BRICK_COUNT=3
+# create, start and mount a two brick DHT volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 --gid-timeout=-1 -s $H0 --volfile-id $V0 $M0;
+
+i=1
+# Write some data to a file and extend such that the file is sparse to a sector
+# aligned boundary.
+echo test > $M0/$i
+TEST truncate -s 1M $M0/$i
+
+# cache the original size
+SIZE1=`stat -c %s $M0/$i`
+
+# rename till file gets a linkfile
+
+while [ $i -ne 0 ]
+do
+ test=`mv $M0/$i $M0/$(( $i+1 )) 2>/dev/null`
+ if [ $? -ne 0 ]
+ then
+ echo "rename failed"
+ break
+ fi
+ let i++
+ file_has_linkfile $i
+ has_link=$?
+ if [ $has_link -eq 2 ]
+ then
+ break;
+ fi
+done
+
+# start a rebalance (force option to overide checks) to trigger migration of
+# file
+
+TEST $CLI volume rebalance $V0 start force
+
+# check if rebalance has completed for up to 15 secs
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
+
+# validate the file size after the migration
+SIZE2=`stat -c %s $M0/$i`
+
+TEST [ $SIZE1 -eq $SIZE2 ]
+
+TEST rm -f $M0/$i
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/distribute/bug-921408.t b/tests/bugs/distribute/bug-921408.t
new file mode 100755
index 00000000000..559114bb85a
--- /dev/null
+++ b/tests/bugs/distribute/bug-921408.t
@@ -0,0 +1,90 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../dht.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+wait_check_status ()
+{
+ n=0
+ while [ $n -lt $1 ]
+ do
+ ret=$(rebalance_completed)
+ if [ $ret == "0" ]
+ then
+ return 0;
+ else
+ sleep 1
+ n=`expr $n + 1`;
+ fi
+ done
+ return 1;
+}
+
+addbr_rebal_till_layout_change()
+{
+ val=1
+ l=$1
+ i=1
+ while [ $i -lt 5 ]
+ do
+ $CLI volume add-brick $V0 $H0:$B0/${V0}$l &>/dev/null
+ $CLI volume rebalance $V0 fix-layout start &>/dev/null
+ wait_check_status $REBALANCE_TIMEOUT
+ if [ $? -eq 1 ]
+ then
+ break
+ fi
+ NEW_LAYOUT=`get_layout $B0/${V0}0 | cut -c11-34`
+ if [ $OLD_LAYOUT == $NEW_LAYOUT ]
+ then
+ i=`expr $i + 1`;
+ l=`expr $l + 1`;
+ else
+ val=0
+ break
+ fi
+ done
+ return $val
+}
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 subvols-per-directory 1
+TEST $CLI volume start $V0
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/test
+TEST touch $M0/test/test
+
+fd=`fd_available`
+TEST fd_open $fd "rw" $M0/test/test
+
+OLD_LAYOUT=`get_layout $B0/${V0}0 | cut -c11-34`
+
+addbr_rebal_till_layout_change 1
+
+TEST [ $? -eq 0 ]
+
+for i in $(seq 1 1000)
+do
+ ls -l $M0/ >/dev/null
+ ret=$?
+ if [ $ret != 0 ]
+ then
+ break
+ fi
+done
+
+TEST [ $ret == 0 ];
+TEST fd_close $fd;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/distribute/bug-924265.t b/tests/bugs/distribute/bug-924265.t
new file mode 100755
index 00000000000..67c21de97cb
--- /dev/null
+++ b/tests/bugs/distribute/bug-924265.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Test that setting cluster.dht-xattr-name works, and that DHT consistently
+# uses the specified name instead of the default.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# We only care about the exit code, so keep it quiet.
+function silent_getfattr {
+ getfattr $* &> /dev/null
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+mkdir -p $H0:$B0/${V0}0
+
+# Create a volume and set the option.
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 cluster.dht-xattr-name trusted.foo.bar
+
+# Start and mount the volume.
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+# Create a directory and make sure it has the right xattr.
+mkdir $M0/test
+TEST ! silent_getfattr -n trusted.glusterfs.dht $B0/${V0}0/test
+TEST silent_getfattr -n trusted.foo.bar $B0/${V0}0/test
+
+cleanup
diff --git a/tests/bugs/distribute/bug-961615.t b/tests/bugs/distribute/bug-961615.t
new file mode 100644
index 00000000000..00938e8fa9b
--- /dev/null
+++ b/tests/bugs/distribute/bug-961615.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This test tests that an extra fd_unref does not happen in rebalance
+#migration completion check code path in dht
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST touch $M0/1
+#This rename creates a link file for 10 in the other volume.
+TEST mv $M0/1 $M0/10
+#Lets keep writing to the file which will trigger rebalance completion check
+dd if=/dev/zero of=$M0/10 bs=1k &
+bg_pid=$!
+#Now rebalance force will migrate file '10'
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+#If the bug exists mount would have crashed by now
+TEST ls $M0
+kill -9 $bg_pid > /dev/null 2>&1
+wait > /dev/null 2>&1
+cleanup
diff --git a/tests/bugs/distribute/bug-973073.t b/tests/bugs/distribute/bug-973073.t
new file mode 100755
index 00000000000..cdb785abac0
--- /dev/null
+++ b/tests/bugs/distribute/bug-973073.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../dht.rc
+
+## Steps followed are one described in bugzilla
+
+cleanup;
+
+function get_layout()
+{
+ layout1=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1`
+
+ if [ $? -ne 0 ]
+ then
+ echo 1
+ else
+ echo 0
+ fi
+
+}
+
+BRICK_COUNT=3
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
+
+## remove-brick status == rebalance_status
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" remove_brick_completed
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 stop
+
+TEST $CLI volume rebalance $V0 fix-layout start
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
+
+TEST mkdir $M0/dir 2>/dev/null;
+
+EXPECT "0" get_layout $B0/${V0}2/dir
+cleanup;
diff --git a/tests/bugs/distribute/issue-1327.t b/tests/bugs/distribute/issue-1327.t
new file mode 100755
index 00000000000..acd8c8c6614
--- /dev/null
+++ b/tests/bugs/distribute/issue-1327.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+SCRIPT_TIMEOUT=250
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../dht.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+BRICK1=$B0/${V0}-0
+BRICK2=$B0/${V0}-1
+
+TEST $CLI volume create $V0 $H0:$BRICK1 $H0:$BRICK2
+TEST $CLI volume start $V0
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST mkdir $M0/dir
+
+#remove dir from one of the brick
+TEST rmdir $BRICK2/dir
+
+#safe cache timeout for lookup to be triggered
+sleep 2
+
+TEST ls $M0/dir
+
+TEST stat $BRICK2/dir
+
+cleanup
diff --git a/tests/bugs/distribute/overlap.py b/tests/bugs/distribute/overlap.py
new file mode 100755
index 00000000000..2813979787b
--- /dev/null
+++ b/tests/bugs/distribute/overlap.py
@@ -0,0 +1,59 @@
+
+from __future__ import print_function
+import sys
+
+def calculate_one (ov, nv):
+ old_start = int(ov[18:26], 16)
+ old_end = int(ov[26:34], 16)
+ new_start = int(nv[18:26], 16)
+ new_end = int(nv[26:34], 16)
+ if (new_end < old_start) or (new_start > old_end):
+ #print '%s, %s -> ZERO' % (ov, nv)
+ return 0
+ all_start = max(old_start, new_start)
+ all_end = min(old_end, new_end)
+ #print '%s, %s -> %08x' % (ov, nv, all_end - all_start + 1)
+ return all_end - all_start + 1
+
+def calculate_all (values):
+ total = 0
+ nv_index = len(values) // 2
+ for old_val in values[:nv_index]:
+ new_val = values[nv_index]
+ nv_index += 1
+ total += calculate_one(old_val, new_val)
+ return total
+
+"""
+test1_vals = [
+ '0x0000000000000000000000003fffffff', # first quarter
+ '0x0000000000000000400000007fffffff', # second quarter
+ '0x000000000000000080000000ffffffff', # second half
+ '0x00000000000000000000000055555554', # first third
+ '0x000000000000000055555555aaaaaaa9', # second third
+ '0x0000000000000000aaaaaaaaffffffff', # last third
+]
+
+test2_vals = [
+ '0x0000000000000000000000003fffffff', # first quarter
+ '0x0000000000000000400000007fffffff', # second quarter
+ '0x000000000000000080000000ffffffff', # second half
+ '0x00000000000000000000000055555554', # first third
+ # Next two are (incorrectly) swapped.
+ '0x0000000000000000aaaaaaaaffffffff', # last third
+ '0x000000000000000055555555aaaaaaa9', # second third
+]
+
+print '%08x' % calculate_one(test1_vals[0], test1_vals[3])
+print '%08x' % calculate_one(test1_vals[1], test1_vals[4])
+print '%08x' % calculate_one(test1_vals[2], test1_vals[5])
+print '= %08x' % calculate_all(test1_vals)
+print '%08x' % calculate_one(test2_vals[0], test2_vals[3])
+print '%08x' % calculate_one(test2_vals[1], test2_vals[4])
+print '%08x' % calculate_one(test2_vals[2], test2_vals[5])
+print '= %08x' % calculate_all(test2_vals)
+"""
+
+if __name__ == '__main__':
+ # Return decimal so bash can reason about it.
+ print('%d' % calculate_all(sys.argv[1:]))
diff --git a/tests/bugs/ec/bug-1161621.t b/tests/bugs/ec/bug-1161621.t
new file mode 100644
index 00000000000..84361e440dc
--- /dev/null
+++ b/tests/bugs/ec/bug-1161621.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 5 redundancy 2 $H0:$B0/${V0}{0..4}
+EXPECT "Created" volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "5" ec_child_up_count $V0 0
+
+tmpdir=$(mktemp -d -t ${0##*/}.XXXXXX)
+push_trapfunc "rm -rf $tmpdir"
+
+TEST dd if=/dev/urandom of=$tmpdir/file bs=1234 count=20
+cs=$(sha1sum $tmpdir/file | awk '{ print $1 }')
+# Test O_APPEND on create
+TEST dd if=$tmpdir/file of=$M0/file bs=1234 count=10 oflag=append
+# Test O_APPEND on open
+TEST dd if=$tmpdir/file of=$M0/file bs=1234 skip=10 oflag=append conv=notrunc
+EXPECT "$cs" echo $(sha1sum $M0/file | awk '{ print $1 }')
+
+# Fill a file with ff (I don't use 0's because empty holes created by an
+# incorrect offset will be returned as 0's and won't be detected)
+dd if=/dev/zero bs=24680 count=1000 | tr '\0' '\377' >$tmpdir/shared
+cs=$(sha1sum $tmpdir/shared | awk '{ print $1 }')
+# Test concurrent writes to the same file using O_APPEND
+dd if=$tmpdir/shared of=$M0/shared bs=123400 count=100 oflag=append conv=notrunc &
+dd if=$tmpdir/shared of=$M1/shared bs=123400 count=100 oflag=append conv=notrunc &
+wait
+
+EXPECT "24680000" stat -c "%s" $M0/shared
+EXPECT "$cs" echo $(sha1sum $M0/shared | awk '{ print $1 }')
+
+cleanup
diff --git a/tests/bugs/ec/bug-1161886.c b/tests/bugs/ec/bug-1161886.c
new file mode 100644
index 00000000000..1f12650ea6d
--- /dev/null
+++ b/tests/bugs/ec/bug-1161886.c
@@ -0,0 +1,53 @@
+
+#include <stdio.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+ int ret = 1;
+
+ if (argc != 4) {
+ fprintf(stderr, "Syntax: %s <host> <volname> <file>\n", argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_set_logging(fs, "/dev/null", 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ fd = glfs_open(fs, argv[3], O_RDWR | O_TRUNC);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+ glfs_close(fd);
+
+ ret = 0;
+
+out:
+ glfs_fini(fs);
+
+ return ret;
+}
diff --git a/tests/bugs/ec/bug-1161886.t b/tests/bugs/ec/bug-1161886.t
new file mode 100644
index 00000000000..e7322210a3e
--- /dev/null
+++ b/tests/bugs/ec/bug-1161886.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function check_ec_size
+{
+ local res
+
+ for i in {0..2}; do
+ res=$(( `getfattr -n trusted.ec.size -e hex $B0/$V0$i/$1 | sed -n "s/^trusted.ec.size=//p"` ))
+ if [ "x$res" == "x" -o "$res" != "$2" ]; then
+ echo "N"
+ return 0
+ fi
+ done
+ echo "Y"
+ return 0
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 1 $H0:$B0/${V0}{0..2}
+EXPECT "Created" volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+TEST dd if=/dev/zero of=$M0/file1 bs=4k count=1
+TEST mv $M0/file1 $M0/file2
+TEST ! stat $M0/file1
+TEST stat $M0/file2
+
+TEST build_tester $(dirname $0)/bug-1161886.c -lgfapi -Wall -O2
+TEST $(dirname $0)/bug-1161886 $H0 $V0 /file2
+EXPECT "^0$" stat -c "%s" $M0/file2
+EXPECT "^Y$" check_ec_size file2 0
+
+rm -f $(dirname $0)/bug-1161886
+
+cleanup
diff --git a/tests/bugs/ec/bug-1179050.t b/tests/bugs/ec/bug-1179050.t
new file mode 100644
index 00000000000..ea2d7b39838
--- /dev/null
+++ b/tests/bugs/ec/bug-1179050.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 1 $H0:$B0/${V0}{0..2}
+EXPECT "Created" volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" ec_child_up_count $V0 0
+
+TEST $CLI volume clear-locks $V0 / kind all inode
+
+sleep 1
+
+EXPECT "3" ec_child_up_count $V0 0
+
+cleanup
diff --git a/tests/bugs/ec/bug-1187474.t b/tests/bugs/ec/bug-1187474.t
new file mode 100644
index 00000000000..e6344c26e73
--- /dev/null
+++ b/tests/bugs/ec/bug-1187474.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+function check_dir()
+{
+ local count
+
+ count=`ls $1 | grep "dir.[0-9]*" | wc -l`
+ if [[ $count -eq 100 ]]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
+EXPECT "Created" volinfo_field $V0 'Status'
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+TEST mount_nfs $H0:/$V0 $N0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+TEST mkdir $M0/dir.{1..100}
+
+sleep 2
+
+EXPECT "Y" check_dir $N0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+cleanup
diff --git a/tests/bugs/ec/bug-1188145.t b/tests/bugs/ec/bug-1188145.t
new file mode 100644
index 00000000000..aa3a59bc62f
--- /dev/null
+++ b/tests/bugs/ec/bug-1188145.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function create_dirs()
+{
+ local stop=$1
+ local idx
+ local res
+
+ res=0
+ idx=1
+ while [[ -f ${stop} ]]; do
+ mkdir $M0/${idx}
+ if [[ $? -ne 0 ]]; then
+ res=1
+ break;
+ fi
+ idx=$(( idx + 1 ))
+ done
+
+ return ${res}
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 redundancy 2 $H0:$B0/${V0}{0..5}
+EXPECT "Created" volinfo_field $V0 'Status'
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+name=`mktemp -t ${0##*/}.XXXXXX`
+create_dirs ${name} &
+pid=$!
+
+sleep 2
+TEST $CLI volume set $V0 uss on
+sleep 5
+TEST $CLI volume set $V0 uss off
+sleep 5
+
+TEST rm -f ${name}
+TEST wait ${pid}
+
+cleanup
diff --git a/tests/bugs/ec/bug-1227869.t b/tests/bugs/ec/bug-1227869.t
new file mode 100644
index 00000000000..00fad825fae
--- /dev/null
+++ b/tests/bugs/ec/bug-1227869.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../../basic/quota.c -o $QDD
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 $H0:$B0/${V0}{1..3}
+EXPECT "Created" volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Started" volinfo_field $V0 'Status'
+
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 100MB
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST $QDD $M0/file 256 40
+
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "10.0MB" quotausage "/"
+
+EXPECT "0" echo $(df -k $M0 | grep -q '10240 '; echo $?)
+EXPECT "0" echo $(df -k $M0 | grep -q '92160 '; echo $?)
+
+rm -f $QDD
+cleanup
diff --git a/tests/bugs/ec/bug-1236065.t b/tests/bugs/ec/bug-1236065.t
new file mode 100644
index 00000000000..9181e73ec19
--- /dev/null
+++ b/tests/bugs/ec/bug-1236065.t
@@ -0,0 +1,95 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+SCRIPT_TIMEOUT=400
+
+cleanup
+
+ec_test_dir=$M0/test
+
+function ec_test_generate_src()
+{
+ mkdir -p $ec_test_dir
+ for i in `seq 0 19`; do
+ dd if=/dev/zero of=$ec_test_dir/$i.c bs=1024 count=2
+ done
+}
+
+function ec_test_make()
+{
+ for i in `ls *.c`; do
+ file=`basename $i`
+ filename=${file%.*}
+ cp $i $filename.o
+ done
+}
+
+## step 1
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 7 redundancy 3 $H0:$B0/${V0}{0..6}
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "7" ec_child_up_count $V0 0
+
+## step 2
+TEST ec_test_generate_src
+
+cd $ec_test_dir
+TEST ec_test_make
+
+## step 3
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT '5' online_brick_count
+
+TEST rm -f *.o
+TEST ec_test_make
+
+## step 4
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "7" online_brick_count
+
+# active heal
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+TEST $CLI volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST rm -f *.o
+TEST ec_test_make
+
+## step 5
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}3
+EXPECT '5' online_brick_count
+
+TEST rm -f *.o
+TEST ec_test_make
+
+EXPECT '5' online_brick_count
+
+## step 6
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "7" online_brick_count
+
+# self-healing
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+TEST $CLI volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST rm -f *.o
+TEST ec_test_make
+
+TEST pidof glusterd
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Started' volinfo_field $V0 'Status'
+EXPECT '7' online_brick_count
+## cleanup
+cd
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+TEST rm -rf $B0/*
+
+cleanup;
diff --git a/tests/bugs/ec/bug-1251446.t b/tests/bugs/ec/bug-1251446.t
new file mode 100644
index 00000000000..5779c2e973b
--- /dev/null
+++ b/tests/bugs/ec/bug-1251446.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 4 redundancy 1 $H0:$B0/${V0}{0..3}
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+TEST dd if=/dev/urandom of=$M0/test1 bs=1024k count=2
+cs=$(sha1sum $M0/test1 | awk '{ print $1 }')
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT '3' online_brick_count
+
+TEST cp $M0/test1 $M0/test2
+EXPECT "$cs" echo $(sha1sum $M0/test2 | awk '{ print $1 }')
+
+TEST $CLI volume start $V0 force
+EXPECT '4' online_brick_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+TEST $CLI volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT "699392" stat -c "%s" $B0/${V0}0/test2
+
+# force cache clear
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "4" ec_child_up_count $V0 0
+
+TEST kill_brick $V0 $H0 $B0/${V0}3
+EXPECT '3' online_brick_count
+
+EXPECT "$cs" echo $(sha1sum $M0/test2 | awk '{ print $1 }')
+
+## cleanup
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/ec/bug-1304988.t b/tests/bugs/ec/bug-1304988.t
new file mode 100755
index 00000000000..334e5c25932
--- /dev/null
+++ b/tests/bugs/ec/bug-1304988.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test renames files from dir to test and vice versa in an infinite loop
+# at the same time add-brick and rebalance starts which should NOT be hanged
+
+cleanup;
+
+function rename_files {
+while :
+do
+ for i in {1..100}; do mv $M0/dir/file-$i $M0/test/newfile-$i; done
+ for i in {1..100}; do mv $M0/test/newfile-$i $M0/dir/file-$i; done
+done
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+mkdir $M0/dir
+mkdir $M0/test
+touch $M0/dir/file-{1..100}
+rename_files &
+back_pid1=$!;
+echo "Started rename $back_pid1"
+
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{6..11}
+TEST $CLI volume rebalance $V0 start force
+
+#Test if rebalance completed with success.
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+echo "rebalance done..."
+kill $back_pid1
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1332022
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1332022
diff --git a/tests/bugs/ec/bug-1547662.t b/tests/bugs/ec/bug-1547662.t
new file mode 100644
index 00000000000..5748218587e
--- /dev/null
+++ b/tests/bugs/ec/bug-1547662.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# Immediately after replace-brick, trusted.ec.version will be absent, so if it
+# is present we can assume that heal was started on root
+function root_heal_attempted {
+ if [ -z $(get_hex_xattr trusted.ec.version $1) ]; then
+ echo "N"
+ else
+ echo "Y"
+ fi
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST ${CLI} volume create ${V0} disperse 6 redundancy 2 ${H0}:${B0}/${V0}{0..5}
+TEST ${CLI} volume start ${V0}
+TEST ${GFS} --volfile-server ${H0} --volfile-id ${V0} ${M0}
+EXPECT_WITHIN ${CHILD_UP_TIMEOUT} "6" ec_child_up_count ${V0} 0
+
+TEST mkdir ${M0}/base
+TEST mkdir ${M0}/base/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}
+TEST mkdir ${M0}/base/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}/dir.{1,2}
+
+TEST ${CLI} volume replace-brick ${V0} ${H0}:${B0}/${V0}5 ${H0}:${B0}/${V0}6 commit force
+EXPECT_WITHIN ${CHILD_UP_TIMEOUT} "6" ec_child_up_count ${V0} 0
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "Y" glustershd_up_status
+EXPECT_WITHIN ${CHILD_UP_TIMEOUT} "6" ec_child_up_count_shd ${V0} 0
+EXPECT_WITHIN ${HEAL_TIMEOUT} "Y" root_heal_attempted ${B0}/${V0}6
+EXPECT_WITHIN ${HEAL_TIMEOUT} "^0$" get_pending_heal_count ${V0}
+EXPECT "^127$" echo $(find ${B0}/${V0}6/base -type d | wc -l)
+
+cleanup;
diff --git a/tests/bugs/ec/bug-1699866-check-reopen-fd.t b/tests/bugs/ec/bug-1699866-check-reopen-fd.t
new file mode 100644
index 00000000000..4386d010318
--- /dev/null
+++ b/tests/bugs/ec/bug-1699866-check-reopen-fd.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume set $V0 disperse.background-heals 0
+TEST $CLI volume set $V0 write-behind off
+TEST $CLI volume set $V0 open-behind off
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+
+TEST mkdir -p $M0/dir
+
+fd="$(fd_available)"
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "5" ec_child_up_count $V0 0
+
+TEST fd_open ${fd} rw $M0/dir/test
+TEST fd_write ${fd} "test1"
+TEST $CLI volume replace-brick ${V0} $H0:$B0/${V0}0 $H0:$B0/${V0}0_1 commit force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0
+TEST fd_write ${fd} "test2"
+TEST fd_close ${fd}
+
+cleanup
diff --git a/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t b/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t
new file mode 100644
index 00000000000..67fdb184b46
--- /dev/null
+++ b/tests/bugs/ec/bug-1708156-honor-inodelk-contention-notify-on-partial-locks.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function do_ls() {
+ local dir="${1}"
+ local i
+
+ for i in {1..50}; do
+ ls -l $M0/${dir} >/dev/null &
+ ls -l $M1/${dir} >/dev/null &
+ ls -l $M2/${dir} >/dev/null &
+ ls -l $M3/${dir} >/dev/null &
+ done
+ wait
+}
+
+function measure_time() {
+ {
+ LC_ALL=C
+ time -p "${@}"
+ } 2>&1 | awk '/^real/ { print $2 * 1000 }'
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+
+TEST $CLI volume set $V0 disperse.eager-lock on
+TEST $CLI volume set $V0 disperse.other-eager-lock on
+TEST $CLI volume set $V0 features.locks-notify-contention on
+TEST $CLI volume set $V0 disperse.eager-lock-timeout 10
+TEST $CLI volume set $V0 disperse.other-eager-lock-timeout 10
+
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M1
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M2
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M3
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "6" ec_child_up_count $V0 0 $M3
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file.{1..10}
+
+# Run multiple 'ls' concurrently from multiple clients so that they collide and
+# cause partial locks.
+TEST [[ $(measure_time do_ls dir) -lt 10000 ]]
+
+cleanup
diff --git a/tests/bugs/error-gen/bug-767095.t b/tests/bugs/error-gen/bug-767095.t
new file mode 100755
index 00000000000..6cc254f559d
--- /dev/null
+++ b/tests/bugs/error-gen/bug-767095.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+dump_dir='/tmp/gerrit_glusterfs'
+TEST mkdir -p $dump_dir;
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 error-gen posix;
+TEST $CLI volume set $V0 server.statedump-path $dump_dir;
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST PID=`gluster --xml volume status patchy | grep -A 8 patchy1 | grep '<pid>' | cut -d '>' -f 2 | cut -d '<' -f 1`
+TEST kill -USR1 $PID;
+sleep 2;
+for file_name in $(ls $dump_dir)
+do
+ TEST grep "error-gen.priv" $dump_dir/$file_name;
+done
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+TEST rm -rf $dump_dir;
+
+cleanup;
diff --git a/tests/bugs/fuse/bug-1030208.t b/tests/bugs/fuse/bug-1030208.t
new file mode 100644
index 00000000000..526283cf101
--- /dev/null
+++ b/tests/bugs/fuse/bug-1030208.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+#Test case: Hardlink test
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+#Create a file and perform fop on a DIR
+TEST touch $M0/foo
+TEST ls $M0/
+
+#Create hardlink
+TEST ln $M0/foo $M0/bar
+
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/fuse/bug-1126048.c b/tests/bugs/fuse/bug-1126048.c
new file mode 100644
index 00000000000..19165ecf6f7
--- /dev/null
+++ b/tests/bugs/fuse/bug-1126048.c
@@ -0,0 +1,43 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <string.h>
+
+/*
+ * This function opens a file and to trigger migration failure, unlinks the
+ * file and performs graph switch (cmd passed in argv). If everything goes fine,
+ * fsync should fail without crashing the mount process.
+ */
+int
+main(int argc, char **argv)
+{
+ int ret = 0;
+ int fd = 0;
+ char *cmd = argv[1];
+ struct stat stbuf = {
+ 0,
+ };
+
+ printf("cmd is: %s\n", cmd);
+ fd = open("a.txt", O_CREAT | O_RDWR, 0644);
+ if (fd < 0)
+ printf("open failed: %s\n", strerror(errno));
+
+ ret = unlink("a.txt");
+ if (ret < 0)
+ printf("unlink failed: %s\n", strerror(errno));
+ if (write(fd, "abc", 3) < 0)
+ printf("Not able to print %s\n", strerror(errno));
+ system(cmd);
+ sleep(1); /* No way to confirm graph switch so sleep 1 */
+ ret = fstat(fd, &stbuf);
+ if (ret < 0)
+ printf("fstat failed %\n", strerror(errno));
+ ret = fsync(fd);
+ if (ret < 0)
+ printf("Not able to fsync %s\n", strerror(errno));
+ return 0;
+}
diff --git a/tests/bugs/fuse/bug-1126048.t b/tests/bugs/fuse/bug-1126048.t
new file mode 100755
index 00000000000..5e2ed293cd3
--- /dev/null
+++ b/tests/bugs/fuse/bug-1126048.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+function grep_for_ebadf {
+ $M0/bug-1126048 "gluster --mode=script --wignore volume add-brick $V0 $H0:$B0/brick2" | grep -i "Bad file descriptor"
+}
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=yes
+
+build_tester $(dirname $0)/bug-1126048.c
+
+TEST cp $(dirname $0)/bug-1126048 $M0
+cd $M0
+TEST grep_for_ebadf
+TEST ls -l $M0
+cd -
+TEST rm -f $(dirname $0)/bug-1126048
+cleanup;
diff --git a/tests/bugs/fuse/bug-1283103.t b/tests/bugs/fuse/bug-1283103.t
new file mode 100644
index 00000000000..56612534cb9
--- /dev/null
+++ b/tests/bugs/fuse/bug-1283103.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+#
+# https://bugzilla.redhat.com/show_bug.cgi?id=1283103
+#
+# Test that it is possible to set and get security.*
+# xattrs other thatn security.selinux irrespective of
+# whether the mount was done with --selinux. This is
+# for example important for Samba to be able to store
+# the Windows-level acls in the security.NTACL xattr
+# when the acl_xattr vfs module is used.
+#
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE without selinux:
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TESTFILE="$M0/testfile"
+TEST touch ${TESTFILE}
+
+TEST echo "setfattr -n security.foobar -v value ${TESTFILE}"
+TEST setfattr -n security.foobar -v value ${TESTFILE}
+TEST getfattr -n security.foobar ${TESTFILE}
+TEST setfattr -x security.foobar ${TESTFILE}
+
+# can not currently test the security.selinux xattrs
+# since the kernel intercepts them.
+# see https://bugzilla.redhat.com/show_bug.cgi?id=1272868
+#TEST ! getfattr -n security.selinux ${TESTFILE}
+#TEST ! setfattr -n security.selinux -v value ${TESTFILE}
+
+TEST umount $M0
+
+# Mount FUSE with selinux:
+TEST glusterfs -s $H0 --volfile-id $V0 --selinux $M0
+
+TEST setfattr -n security.foobar -v value ${TESTFILE}
+TEST getfattr -n security.foobar ${TESTFILE}
+TEST setfattr -x security.foobar ${TESTFILE}
+
+# can not currently test the security.selinux xattrs
+# since the kernel intercepts them.
+# see https://bugzilla.redhat.com/show_bug.cgi?id=1272868
+#TEST setfattr -n security.selinux -v value ${TESTFILE}
+#TEST getfattr -n security.selinux ${TESTFILE}
+
+cleanup;
diff --git a/tests/bugs/fuse/bug-1309462.t b/tests/bugs/fuse/bug-1309462.t
new file mode 100644
index 00000000000..975d72d82ed
--- /dev/null
+++ b/tests/bugs/fuse/bug-1309462.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+#
+# https://bugzilla.redhat.com/show_bug.cgi?id=1309462
+# Test the new fuse mount option --capability.
+# Set/get xattr on security.capability should be sent
+# down from fuse, only if --selinux or --capability option
+# is used for mounting.
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE without selinux:
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TESTFILE="$M0/testfile"
+TEST touch ${TESTFILE}
+
+TEST ! setfattr -n security.capability -v value ${TESTFILE}
+TEST ! getfattr -n security.capability ${TESTFILE}
+
+TEST umount $M0
+
+# Mount FUSE with selinux:
+TEST glusterfs -s $H0 --volfile-id $V0 --selinux $M0
+
+TEST setfattr -n security.capability -v value ${TESTFILE}
+TEST getfattr -n security.capability ${TESTFILE}
+TEST setfattr -x security.capability ${TESTFILE}
+
+TEST umount $M0
+
+# Mount FUSE with capability:
+TEST glusterfs -s $H0 --volfile-id $V0 --capability $M0
+
+TEST setfattr -n security.capability -v value ${TESTFILE}
+TEST getfattr -n security.capability ${TESTFILE}
+TEST setfattr -x security.capability ${TESTFILE}
+
+TEST umount $M0
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1581735
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1581735 \ No newline at end of file
diff --git a/tests/bugs/fuse/bug-1336818.t b/tests/bugs/fuse/bug-1336818.t
new file mode 100644
index 00000000000..53286521742
--- /dev/null
+++ b/tests/bugs/fuse/bug-1336818.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+#Test case: OOM score adjust
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+# Prepare
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+# Basic check
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST umount $M0
+
+# Check valid value (< 0)
+TEST glusterfs --oom-score-adj=-1000 -s $H0 --volfile-id $V0 $M0
+TEST umount $M0
+
+# Check valid value (> 0)
+TEST glusterfs --oom-score-adj=1000 -s $H0 --volfile-id $V0 $M0
+TEST umount $M0
+
+# Check valid value (= 0)
+TEST glusterfs --oom-score-adj=0 -s $H0 --volfile-id $V0 $M0
+TEST umount $M0
+
+# Check invalid value (no value given)
+TEST ! glusterfs --oom-score-adj -s $H0 --volfile-id $V0 $M0
+
+# Check invalid value (< OOM_SCORE_ADJ_MIN)
+TEST ! glusterfs --oom-score-adj=-1001 -s $H0 --volfile-id $V0 $M0
+
+# Check invalid value (> OOM_SCORE_ADJ_MAX)
+TEST ! glusterfs --oom-score-adj=1001 -s $H0 --volfile-id $V0 $M0
+
+# Check invalid value (float)
+TEST ! glusterfs --oom-score-adj=12.34 -s $H0 --volfile-id $V0 $M0
+
+# Check invalid value (non-integer string)
+TEST ! glusterfs --oom-score-adj=qwerty -s $H0 --volfile-id $V0 $M0
+
+cleanup;
diff --git a/tests/bugs/fuse/bug-858215.t b/tests/bugs/fuse/bug-858215.t
new file mode 100755
index 00000000000..95999f6ad24
--- /dev/null
+++ b/tests/bugs/fuse/bug-858215.t
@@ -0,0 +1,79 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+TEST $CLI volume set $V0 nfs.disable off
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --event-history=on -s $H0 --volfile-id $V0 $M0;
+
+## Test for checking whether the fops have been saved in the event-history
+TEST ! stat $M0/newfile;
+TEST touch $M0/newfile;
+TEST stat $M0/newfile;
+TEST rm $M0/newfile;
+
+nfs_pid=$(cat $GLUSTERD_PIDFILEDIR/nfs/nfs.pid || echo -1);
+glustershd_pid=`ps auxwww | grep glustershd | grep -v grep | awk -F " " '{print $2}'`
+TEST [ $glustershd_pid != 0 ];
+pids=$(pidof glusterfs);
+for i in $pids
+do
+ if [ $i -ne $nfs_pid ] && [ $i -ne $glustershd_pid ]; then
+ mount_pid=$i;
+ break;
+ fi
+done
+
+dump_dir='/tmp/gerrit_glusterfs'
+cat >$statedumpdir/glusterdump.options <<EOF
+all=yes
+path=$dump_dir
+EOF
+
+TEST mkdir -p $dump_dir;
+TEST kill -USR1 $mount_pid;
+sleep 2;
+for file_name in $(ls $dump_dir)
+do
+ TEST grep -q "xlator.mount.fuse.history" $dump_dir/$file_name;
+done
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+TEST rm -rf $dump_dir;
+TEST rm $statedumpdir/glusterdump.options;
+
+cleanup;
diff --git a/tests/bugs/fuse/bug-858488-min-free-disk.t b/tests/bugs/fuse/bug-858488-min-free-disk.t
new file mode 100644
index 00000000000..635dc04d1e6
--- /dev/null
+++ b/tests/bugs/fuse/bug-858488-min-free-disk.t
@@ -0,0 +1,108 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Lets create partitions for bricks
+TEST truncate -s 100M $B0/brick1
+TEST truncate -s 200M $B0/brick2
+TEST LO1=`SETUP_LOOP $B0/brick1`
+TEST MKFS_LOOP $LO1
+TEST LO2=`SETUP_LOOP $B0/brick2`
+TEST MKFS_LOOP $LO2
+TEST mkdir -p $B0/${V0}1 $B0/${V0}2
+TEST MOUNT_LOOP $LO1 $B0/${V0}1
+TEST MOUNT_LOOP $LO2 $B0/${V0}2
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs -s $H0 --volfile-id=$V0 --acl $M0
+## Real test starts here
+## ----------------------------------------------------------------------------
+
+MINFREEDISKVALUE=90%
+
+## Set min free disk to MINFREEDISKVALUE percent
+TEST $CLI volume set $V0 cluster.min-free-disk $MINFREEDISKVALUE
+
+## We need to have file name to brick map based on hash.
+## We will use this info in test case 0.
+i=1
+CONTINUE=2
+BRICK1FILE=0
+BRICK2FILE=0
+while [[ $CONTINUE -ne 0 ]]
+do
+ dd if=/dev/zero of=$M0/file$i.data bs=1024 count=1024 1>/dev/null 2>&1
+
+ if [[ -e $B0/${V0}1/file$i.data && $BRICK1FILE = "0" ]]
+ then
+ BRICK1FILE=file$i.data
+ CONTINUE=$(( $CONTINUE - 1 ))
+ fi
+
+ if [[ -e $B0/${V0}2/file$i.data && $BRICK2FILE = "0" ]]
+ then
+ BRICK2FILE=file$i.data
+ CONTINUE=$(( $CONTINUE - 1 ))
+ fi
+
+ rm $M0/file$i.data
+ let i++
+done
+
+
+## Bring free space on one of the bricks to less than minfree value by
+## creating one big file.
+dd if=/dev/zero of=$M0/fillonebrick.data bs=1024 count=25600 1>/dev/null 2>&1
+
+#Lets find out where it was created
+if [ -f $B0/${V0}1/fillonebrick.data ]
+then
+ FILETOCREATE=$BRICK1FILE
+ OTHERBRICK=$B0/${V0}2
+else
+ FILETOCREATE=$BRICK2FILE
+ OTHERBRICK=$B0/${V0}1
+fi
+
+##--------------------------------TEST CASE 0-----------------------------------
+## If we try to create a file which should go into full brick as per hash, it
+## should go into the other brick instead.
+
+## Before that let us create files just to make gluster refresh the stat
+## Using touch so it should not change the disk usage stats
+for k in {1..20};
+do
+ touch $M0/dummyfile$k
+done
+
+dd if=/dev/zero of=$M0/$FILETOCREATE bs=1024 count=2048 1>/dev/null 2>&1
+TEST [ -e $OTHERBRICK/$FILETOCREATE ]
+## Done testing, lets clean up
+TEST rm -rf $M0/*
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+$CLI volume delete $V0;
+
+UMOUNT_LOOP ${B0}/${V0}{1,2}
+rm -f ${B0}/brick{1,2}
+
+cleanup;
diff --git a/tests/bugs/fuse/bug-924726.t b/tests/bugs/fuse/bug-924726.t
new file mode 100755
index 00000000000..2d3c7680798
--- /dev/null
+++ b/tests/bugs/fuse/bug-924726.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+TESTS_EXPECTED_IN_LOOP=10
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function get_socket_count() {
+ netstat -nap | grep $1 | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST ls $M0
+
+GLFS_MNT_PID=`ps ax | grep "glusterfs -s $H0 \-\-volfile\-id $V0 $M0" | sed -e "s/^ *\([0-9]*\).*/\1/g"`
+
+SOCKETS_BEFORE_SWITCH=`netstat -nap | grep $GLFS_MNT_PID | grep ESTABLISHED | wc -l`
+
+for i in $(seq 1 5); do
+ TEST_IN_LOOP $CLI volume set $V0 performance.write-behind off;
+ sleep 1;
+ TEST_IN_LOOP $CLI volume set $V0 performance.write-behind on;
+ sleep 1;
+done
+
+SOCKETS_AFTER_SWITCH=`netstat -nap | grep $GLFS_MNT_PID | grep ESTABLISHED | wc -l`
+
+# currently active graph is not cleaned up till some operation on
+# mount-point. Hence there is one extra graph.
+TEST [ $SOCKETS_AFTER_SWITCH = `expr $SOCKETS_BEFORE_SWITCH + 1` ]
+
+cleanup;
diff --git a/tests/bugs/fuse/bug-963678.t b/tests/bugs/fuse/bug-963678.t
new file mode 100644
index 00000000000..006181f26e1
--- /dev/null
+++ b/tests/bugs/fuse/bug-963678.t
@@ -0,0 +1,57 @@
+#!/bin/bash
+#
+# Bug 963678 - Test discard functionality
+#
+# Test that basic discard (hole punch) functionality works via the fallocate
+# command line tool. Hole punch deallocates a region of a file, creating a hole
+# and a zero-filled data region. We verify that hole punch works, frees blocks
+# and that subsequent reads do not read stale data (caches are invalidated).
+#
+# NOTE: fuse fallocate is known to be broken with regard to cache invalidation
+# up to 3.9.0 kernels. Therefore, FOPEN_KEEP_CACHE is not used in this
+# test (opens will invalidate the fuse cache).
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../fallocate.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+# check for fallocate and hole punch support
+require_fallocate -l 1m $M0/file
+require_fallocate -p -l 512k $M0/file && rm -f $M0/file
+
+# allocate some blocks, punch a hole and verify block allocation
+TEST fallocate -l 1m $M0/file
+blksz=`stat -c %B $M0/file`
+nblks=`stat -c %b $M0/file`
+TEST [ $(($blksz * $nblks)) -ge 1048576 ]
+TEST fallocate -p -o 512k -l 128k $M0/file
+
+nblks=`stat -c %b $M0/file`
+# allow some room for xattr blocks
+TEST [ $(($blksz * $nblks)) -lt $((917504 + 16384)) ]
+TEST unlink $M0/file
+
+# write some data, punch a hole and verify the file content changes
+TEST dd if=/dev/urandom of=$M0/file bs=1024k count=1
+TEST cp $M0/file $M0/file.copy.pre
+TEST fallocate -p -o 512k -l 128k $M0/file
+TEST cp $M0/file $M0/file.copy.post
+TEST ! cmp $M0/file.copy.pre $M0/file.copy.post
+TEST unlink $M0/file
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/fuse/bug-983477.t b/tests/bugs/fuse/bug-983477.t
new file mode 100755
index 00000000000..41ddd9e55a9
--- /dev/null
+++ b/tests/bugs/fuse/bug-983477.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This script checks if use-readdirp option works as accepted in mount options
+
+function get_use_readdirp_value {
+ local vol=$1
+ local statedump=$(generate_mount_statedump $vol)
+ sleep 1
+ local val=$(grep "use_readdirp=" $statedump | cut -f2 -d'=' | tail -1)
+ rm -f $statedump
+ echo $val
+}
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume start $V0
+#If readdirp is enabled statedump should reflect it
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --use-readdirp=yes
+TEST cd $M0
+EXPECT "1" get_use_readdirp_value $V0
+TEST cd -
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#If readdirp is enabled statedump should reflect it
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --use-readdirp=no
+TEST cd $M0
+EXPECT "0" get_use_readdirp_value $V0
+TEST cd -
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#Since args are optional on this argument just specifying "--use-readdirp" should also turn it `on` not `off`
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --use-readdirp
+TEST cd $M0
+EXPECT "1" get_use_readdirp_value $V0
+TEST cd -
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#By default it is enabled.
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST cd $M0
+EXPECT "1" get_use_readdirp_value $V0
+TEST cd -
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#Invalid values for use-readdirp should not be accepted
+TEST ! glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --use-readdirp=please-fail
+
+cleanup
diff --git a/tests/bugs/fuse/bug-985074.t b/tests/bugs/fuse/bug-985074.t
new file mode 100644
index 00000000000..ffa6df54144
--- /dev/null
+++ b/tests/bugs/fuse/bug-985074.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+#
+# Bug 985074 - Verify stale inode/dentry mappings are cleaned out.
+#
+# This test verifies that an inode/dentry mapping for a file removed via a
+# separate mount point is cleaned up appropriately. We create a file and hard
+# link from client 1. Next we remove the link via client 2. Finally, from client
+# 1 we attempt to rename the original filename to the name of the just removed
+# hard link.
+#
+# If the inode is not unlinked properly, the removed directory entry can resolve
+# to an inode (on the client that never saw the rm) that ends up passed down
+# through the lookup call. If md-cache holds valid metadata on the inode (due to
+# a large timeout value or recent lookup on the valid name), it is tricked into
+# returning a successful lookup that should have returned ENOENT. This manifests
+# as an error from the mv command in the following test sequence because file
+# and file.link resolve to the same file:
+#
+# # mv /mnt/glusterfs/0/file /mnt/glusterfs/0/file.link
+# mv: `/mnt/glusterfs/0/file' and `/mnt/glusterfs/0/file.link' are the same file
+#
+###
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0 --entry-timeout=0 --attribute-timeout=0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1 --entry-timeout=0 --attribute-timeout=0
+
+TEST touch $M0/file
+TEST ln $M0/file $M0/file.link
+TEST ls -ali $M0 $M1
+TEST rm -f $M1/file.link
+TEST ls -ali $M0 $M1
+
+TEST mv $M0/file $M0/file.link
+TEST stat $M0/file.link
+TEST ! stat $M0/file
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/fuse/many-groups-for-acl.t b/tests/bugs/fuse/many-groups-for-acl.t
new file mode 100755
index 00000000000..a51b1bc7267
--- /dev/null
+++ b/tests/bugs/fuse/many-groups-for-acl.t
@@ -0,0 +1,123 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+cleanup
+
+# prepare the users and groups
+NEW_USER=bug1246275
+NEW_UID=1246275
+NEW_GID=1246275
+LAST_GID=1246403
+NEW_GIDS=${NEW_GID}
+
+# OS-specific overrides
+case $OSTYPE in
+NetBSD|Darwin)
+ # no ACLs, and only NGROUPS_MAX=16 secondary groups are supported
+ SKIP_TESTS
+ exit 0
+ ;;
+FreeBSD)
+ # NGROUPS_MAX=1023 (FreeBSD>=8.0), we can afford 200 groups
+ ;;
+Linux)
+ # NGROUPS_MAX=65536, we can afford 200 groups
+ ;;
+*)
+ ;;
+esac
+
+# create a user that belongs to many groups
+for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID})
+do
+ groupadd -o -g ${GID} ${NEW_USER}-${GID}
+ NEW_GIDS="${NEW_GIDS},${NEW_USER}-${GID}"
+done
+TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER}
+
+# Linux < 3.8 exports only first 32 gids of pid to userspace
+kernel_exports_few_gids=0
+if [ "$OSTYPE" = Linux ] && \
+ su -m ${NEW_USER} -c "grep ^Groups: /proc/self/status | wc -w | xargs -I@ expr @ - 1 '<' $LAST_GID - $NEW_GID + 1" > /dev/null; then
+ kernel_exports_few_gids=1
+fi
+
+# preparation done, start the tests
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create ${V0} ${H0}:${B0}/${V0}1
+TEST $CLI volume set $V0 nfs.disable off
+# disable manage-gids on the server-side for now, gets enabled later
+TEST $CLI volume set ${V0} server.manage-gids off
+TEST $CLI volume start ${V0}
+
+# This is just a synchronization hack to make sure the bricks are
+# up before going on.
+EXPECT_WITHIN ${NFS_EXPORT_TIMEOUT} "1" is_nfs_export_available
+
+# mount the volume with POSIX ACL support, without --resolve-gids
+TEST glusterfs --acl --volfile-id=/${V0} --volfile-server=${H0} ${M0}
+
+# create some directories for testing
+TEST mkdir ${M0}/first-32-gids-1
+TEST setfacl -m g:${NEW_UID}:rwx ${M0}/first-32-gids-1
+TEST mkdir ${M0}/first-32-gids-2
+TEST setfacl -m g:$[NEW_UID+16]:rwx ${M0}/first-32-gids-2
+TEST mkdir ${M0}/gid-64
+TEST setfacl -m g:$[NEW_UID+64]:rwx ${M0}/gid-64
+TEST mkdir ${M0}/gid-120
+TEST setfacl -m g:$[NEW_UID+120]:rwx ${M0}/gid-120
+
+su -m ${NEW_USER} -c "touch ${M0}/first-32-gids-1/success > /dev/null"
+TEST [ $? -eq 0 ]
+
+su -m ${NEW_USER} -c "touch ${M0}/first-32-gids-2/success > /dev/null"
+TEST [ $? -eq 0 ]
+
+su -m ${NEW_USER} -c "touch ${M0}/gid-64/success--if-all-gids-exported > /dev/null"
+TEST [ $? -eq $kernel_exports_few_gids ]
+
+su -m ${NEW_USER} -c "touch ${M0}/gid-120/failure > /dev/null"
+TEST [ $? -ne 0 ]
+
+# unmount and remount with --resolve-gids
+EXPECT_WITHIN ${UMOUNT_TIMEOUT} "Y" force_umount ${M0}
+TEST glusterfs --acl --resolve-gids --volfile-id=/${V0} --volfile-server=${H0} ${M0}
+
+su -m ${NEW_USER} -c "touch ${M0}/gid-64/success > /dev/null"
+TEST [ $? -eq 0 ]
+
+su -m ${NEW_USER} -c "touch ${M0}/gid-120/failure > /dev/null"
+TEST [ $? -ne 0 ]
+
+# enable server-side resolving of the groups
+# stopping and starting is not really needed, but it prevents races
+TEST $CLI volume stop ${V0}
+TEST $CLI volume set ${V0} server.manage-gids on
+TEST $CLI volume start ${V0}
+EXPECT_WITHIN ${NFS_EXPORT_TIMEOUT} "1" is_nfs_export_available
+
+# unmount and remount to prevent more race conditions on test systems
+EXPECT_WITHIN ${UMOUNT_TIMEOUT} "Y" force_umount ${M0}
+TEST glusterfs --acl --resolve-gids --volfile-id=/${V0} --volfile-server=${H0} ${M0}
+
+su -m ${NEW_USER} -c "touch ${M0}/gid-120/success > /dev/null"
+TEST [ $? -eq 0 ]
+
+# cleanup
+userdel --force ${NEW_USER}
+for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID})
+do
+ groupdel ${NEW_USER}-${GID}
+done
+
+EXPECT_WITHIN ${UMOUNT_TIMEOUT} "Y" force_umount ${M0}
+
+TEST ${CLI} volume stop ${V0}
+TEST ${CLI} volume delete ${V0}
+
+cleanup
diff --git a/tests/bugs/fuse/setup.sh b/tests/bugs/fuse/setup.sh
new file mode 100755
index 00000000000..d44d8dba027
--- /dev/null
+++ b/tests/bugs/fuse/setup.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+#. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE without selinux:
+TEST glusterfs -s $H0 --volfile-id $V0 $@ $M0
+
+echo "Gluster started and volume '$V0' mounted under '$M0'"
diff --git a/tests/bugs/fuse/teardown.sh b/tests/bugs/fuse/teardown.sh
new file mode 100755
index 00000000000..5d57974e1f9
--- /dev/null
+++ b/tests/bugs/fuse/teardown.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
diff --git a/tests/bugs/geo-replication/bug-1111490.t b/tests/bugs/geo-replication/bug-1111490.t
new file mode 100644
index 00000000000..9686fb5a0ef
--- /dev/null
+++ b/tests/bugs/geo-replication/bug-1111490.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume start $V0
+
+# mount with auxiliary gfid mount
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 --aux-gfid-mount
+
+# create file with specific gfid
+uuid=`uuidgen`
+TEST chown 10:10 $M0
+EXPECT "File creation OK" $PYTHON $(dirname $0)/../../utils/gfid-access.py \
+ $M0 ROOT file0 $uuid file 10 10 0644
+
+# check gfid
+EXPECT "$uuid" getfattr --only-values -n glusterfs.gfid.string $M0/file0
+
+# unmount and mount again so as to start with a fresh inode table
+# or use another mount...
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0 --aux-gfid-mount
+
+# touch the file again (gfid-access.py handles errno)
+EXPECT "File creation OK" $PYTHON $(dirname $0)/../../utils/gfid-access.py \
+ $M0 ROOT file0 $uuid file 10 10 0644
+
+cleanup;
diff --git a/tests/bugs/geo-replication/bug-1296496.t b/tests/bugs/geo-replication/bug-1296496.t
new file mode 100644
index 00000000000..a157be7849a
--- /dev/null
+++ b/tests/bugs/geo-replication/bug-1296496.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Start and create a replicated volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+
+TEST $CLI volume set $V0 indexing on
+
+TEST $CLI volume start $V0;
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+## Mount client-pid=-1
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 --client-pid=-1 $M1
+
+TEST touch $M0
+
+vol_uuid=$(gluster vol info $V0 | grep "Volume ID" | awk '{print $3}')
+
+xtime="trusted.glusterfs.$vol_uuid.xtime"
+
+#TEST xtime
+TEST ! getfattr -n $xtime $M0
+TEST getfattr -n $xtime $B0/${V0}-0
+TEST getfattr -n $xtime $B0/${V0}-1
+
+#TEST stime
+slave_uuid=$(uuidgen)
+stime="trusted.glusterfs.$vol_uuid.$slave_uuid.stime"
+TEST setfattr -n $stime -v "0xFFFE" $B0/${V0}-0
+TEST setfattr -n $stime -v "0xFFFF" $B0/${V0}-1
+
+TEST ! getfattr -n $stime $M0
+TEST getfattr -n $stime $M1
+
+cleanup;
diff --git a/tests/bugs/geo-replication/bug-877293.t b/tests/bugs/geo-replication/bug-877293.t
new file mode 100755
index 00000000000..c5205e8109e
--- /dev/null
+++ b/tests/bugs/geo-replication/bug-877293.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+TEST glusterd
+TEST pidof glusterd
+
+## Start and create a replicated volume
+mkdir -p ${B0}/${V0}-0
+mkdir -p ${B0}/${V0}-1
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+
+TEST $CLI volume set $V0 indexing on
+
+TEST $CLI volume start $V0;
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+## Mount client-pid=-1
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 --client-pid=-1 $M1
+
+TEST touch $M0
+
+vol_uuid=`getfattr -n trusted.glusterfs.volume-mark -ehex $M1 | sed -n 's/^trusted.glusterfs.volume-mark=0x//p' | cut -b5-36 | sed 's/\([a-f0-9]\{8\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)/\1-\2-\3-\4-/'`
+xtime=trusted.glusterfs.$vol_uuid.xtime
+
+TEST "getfattr -n $xtime $B0/${V0}-0 | grep -q ${xtime}="
+
+TEST kill_brick $V0 $H0 $B0/${V0}-0
+
+TEST "getfattr -n $xtime $B0/${V0}-1 | grep -q ${xtime}="
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup
diff --git a/tests/bugs/gfapi/bug-1032894.t b/tests/bugs/gfapi/bug-1032894.t
new file mode 100644
index 00000000000..88d110136e2
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1032894.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#Check stale indices are deleted as part of self-heal-daemon crawl.
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+cd $M0
+TEST mkdir a
+cd a
+TEST kill_brick $V0 $H0 $B0/${V0}0
+# Create stale indices
+for i in {1..10}; do echo abc > $i; done
+for i in {1..10}; do rm -f $i; done
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+
+#Since maximum depth of the directory structure that needs healin is 2
+#Trigger two self-heals. That should make sure the heal is complete
+TEST $CLI volume heal $V0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "0" afr_get_index_count $B0/${V0}1
+cleanup
diff --git a/tests/bugs/gfapi/bug-1093594.c b/tests/bugs/gfapi/bug-1093594.c
new file mode 100644
index 00000000000..f7a06dd5ba8
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1093594.c
@@ -0,0 +1,311 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define WRITE_SIZE (128 * 1024)
+#define READ_WRITE_LOOP 100
+#define FOP_LOOP_COUNT 20
+#define TEST_CASE_LOOP 20
+
+int gfapi = 1;
+static int extension = 1;
+
+static int
+large_number_of_fops(glfs_t *fs)
+{
+ int ret = 0;
+ int i = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd1 = NULL;
+ char *dir1 = NULL, *dir2 = NULL, *filename1 = NULL, *filename2 = NULL;
+ char *buf = NULL;
+ struct stat sb = {
+ 0,
+ };
+
+ for (i = 0; i < FOP_LOOP_COUNT; i++) {
+ ret = asprintf(&dir1, "dir%d", extension);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ extension++;
+
+ ret = glfs_mkdir(fs, dir1, 0755);
+ if (ret < 0) {
+ fprintf(stderr, "mkdir(%s): %s\n", dir1, strerror(errno));
+ return -1;
+ }
+
+ fd = glfs_opendir(fs, dir1);
+ if (!fd) {
+ fprintf(stderr, "/: %s\n", strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fsetxattr(fd, "user.dirfattr", "fsetxattr", 8, 0);
+ if (ret < 0) {
+ fprintf(stderr, "fsetxattr(%s): %d (%s)\n", dir1, ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_closedir(fd);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_closedir failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_rmdir(fs, dir1);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_unlink failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = asprintf(&filename1, "file%d", extension);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ ret = asprintf(&filename2, "file-%d", extension);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ extension++;
+
+ fd = glfs_creat(fs, filename1, O_RDWR, 0644);
+ if (!fd) {
+ fprintf(stderr, "%s: (%p) %s\n", filename1, fd, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_rename(fs, filename1, filename2);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_rename failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_lstat(fs, filename2, &sb);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_lstat failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_close(fd);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_close failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_unlink(fs, filename2);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_unlink failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+ }
+}
+
+static int
+large_read_write(glfs_t *fs)
+{
+ int ret = 0;
+ int j = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd1 = NULL;
+ char *filename = NULL;
+ char *buf = NULL;
+
+ ret = asprintf(&filename, "filerw%d", extension);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct filename (%s)", strerror(errno));
+ return ret;
+ }
+
+ extension++;
+
+ fd = glfs_creat(fs, filename, O_RDWR, 0644);
+ if (!fd) {
+ fprintf(stderr, "%s: (%p) %s\n", filename, fd, strerror(errno));
+ return -1;
+ }
+
+ buf = (char *)malloc(WRITE_SIZE);
+ memset(buf, '-', WRITE_SIZE);
+
+ for (j = 0; j < READ_WRITE_LOOP; j++) {
+ ret = glfs_write(fd, buf, WRITE_SIZE, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Write(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ return ret;
+ }
+ }
+
+ fd1 = glfs_open(fs, filename, O_RDWR);
+ if (fd1 < 0) {
+ fprintf(stderr, "Open(%s): %d (%s)\n", filename, ret, strerror(errno));
+ return -1;
+ }
+
+ glfs_lseek(fd1, 0, SEEK_SET);
+ for (j = 0; j < READ_WRITE_LOOP; j++) {
+ ret = glfs_read(fd1, buf, WRITE_SIZE, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Read(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ return ret;
+ }
+ }
+
+ for (j = 0; j < READ_WRITE_LOOP; j++) {
+ ret = glfs_write(fd1, buf, WRITE_SIZE, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Write(%s): %d (%s)\n", filename, ret,
+ strerror(errno));
+ return ret;
+ }
+ }
+
+ glfs_close(fd);
+ glfs_close(fd1);
+ ret = glfs_unlink(fs, filename);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_unlink failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ free(buf);
+ free(filename);
+}
+
+static int
+volfile_change(const char *volname)
+{
+ int ret = 0;
+ char *cmd = NULL, *cmd1 = NULL;
+
+ ret = asprintf(&cmd, "gluster volume set %s stat-prefetch off", volname);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct cli command string (%s)",
+ strerror(errno));
+ return ret;
+ }
+
+ ret = asprintf(&cmd1, "gluster volume set %s stat-prefetch on", volname);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct cli command string (%s)",
+ strerror(errno));
+ return ret;
+ }
+
+ ret = system(cmd);
+ if (ret < 0) {
+ fprintf(stderr, "stat-prefetch off on (%s) failed", volname);
+ return ret;
+ }
+
+ ret = system(cmd1);
+ if (ret < 0) {
+ fprintf(stderr, "stat-prefetch on on (%s) failed", volname);
+ return ret;
+ }
+
+ free(cmd);
+ free(cmd1);
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ int i = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd1 = NULL;
+ char *topdir = "topdir", *filename = "file1";
+ char *buf = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+
+ if (argc != 4) {
+ fprintf(stderr,
+ "Expect following args %s <hostname> <Vol> <log file>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ logfile = argv[3];
+
+ for (i = 0; i < TEST_CASE_LOOP; i++) {
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL (%s)\n", strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = large_number_of_fops(fs);
+ if (ret < 0)
+ return -1;
+
+ ret = large_read_write(fs);
+ if (ret < 0)
+ return -1;
+
+ ret = volfile_change(argv[2]);
+ if (ret < 0)
+ return -1;
+
+ ret = large_number_of_fops(fs);
+ if (ret < 0)
+ return -1;
+
+ ret = large_read_write(fs);
+ if (ret < 0)
+ return -1;
+
+ ret = glfs_fini(fs);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fini failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+ }
+ return 0;
+}
diff --git a/tests/bugs/gfapi/bug-1093594.t b/tests/bugs/gfapi/bug-1093594.t
new file mode 100755
index 00000000000..0c220b3b007
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1093594.t
@@ -0,0 +1,22 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V0;
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/bug-1093594.c -lgfapi
+TEST $(dirname $0)/bug-1093594 $H0 $V0 $logdir/bug-1093594.log
+
+cleanup_tester $(dirname $0)/bug-1093594
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1371541
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1371541
diff --git a/tests/bugs/gfapi/bug-1319374-THIS-crash.t b/tests/bugs/gfapi/bug-1319374-THIS-crash.t
new file mode 100755
index 00000000000..8d3db421c88
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1319374-THIS-crash.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 diagnostics.client-log-flush-timeout 30
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/bug-1319374.c -lgfapi
+TEST $(dirname $0)/bug-1319374 $H0 $V0 $logdir/bug-1319374.log
+
+cleanup_tester $(dirname $0)/bug-1319374
+
+cleanup;
diff --git a/tests/bugs/gfapi/bug-1319374.c b/tests/bugs/gfapi/bug-1319374.c
new file mode 100644
index 00000000000..ea0dfb6b0f2
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1319374.c
@@ -0,0 +1,131 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define NO_INIT 1
+
+glfs_t *
+setup_new_client(char *hostname, char *volname, char *log_file, int flag)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "\nglfs_new: returned NULL (%s)\n", strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_file, 7);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ if (flag == NO_INIT)
+ goto out;
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+out:
+ return fs;
+error:
+ return NULL;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs1 = NULL;
+ glfs_t *fs2 = NULL;
+ glfs_t *fs3 = NULL;
+ char *volname = NULL;
+ char *log_file = NULL;
+ char *hostname = NULL;
+
+ if (argc != 4) {
+ fprintf(
+ stderr,
+ "Expect following args %s <hostname> <Vol> <log file location>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ log_file = argv[3];
+
+ fs1 = setup_new_client(hostname, volname, log_file, NO_INIT);
+ if (!fs1) {
+ fprintf(stderr, "\nsetup_new_client: returned NULL (%s)\n",
+ strerror(errno));
+ goto error;
+ }
+
+ fs2 = setup_new_client(hostname, volname, log_file, 0);
+ if (!fs2) {
+ fprintf(stderr, "\nsetup_new_client: returned NULL (%s)\n",
+ strerror(errno));
+ goto error;
+ }
+
+ fs3 = setup_new_client(hostname, volname, log_file, 0);
+ if (!fs3) {
+ fprintf(stderr, "\nsetup_new_client: returned NULL (%s)\n",
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_fini(fs3);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fini failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ /* The crash is seen in gf_log_flush_timeout_cbk(), and this gets
+ * triggered when 30s timer expires, hence the sleep of 31s
+ */
+ sleep(31);
+ ret = glfs_fini(fs2);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fini failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_init(fs1);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_fini(fs1);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fini failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ return 0;
+error:
+ return -1;
+}
diff --git a/tests/bugs/gfapi/bug-1447266/1460514.c b/tests/bugs/gfapi/bug-1447266/1460514.c
new file mode 100644
index 00000000000..c721559a668
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1447266/1460514.c
@@ -0,0 +1,150 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(func, ret) \
+ do { \
+ if (ret != 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto out; \
+ } else { \
+ fprintf(stderr, "%s : returned %d\n", func, ret); \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+ struct glfs_object *root = NULL, *dir = NULL, *subdir = NULL;
+ struct stat sb = {
+ 0,
+ };
+ char *dirname = "dir";
+ char *subdirname = "subdir";
+ char *logfile = NULL;
+ char *volname = NULL;
+ char *hostname = NULL;
+ unsigned char subdir_handle[GFAPI_HANDLE_LENGTH] = {'\0'};
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("first attempt glfs_init", ret);
+
+ root = glfs_h_lookupat(fs, NULL, "/", &sb, 0);
+ if (root == NULL) {
+ fprintf(stderr, "glfs_h_lookupat: error on lookup of / ,%s\n",
+ strerror(errno));
+ goto out;
+ }
+ dir = glfs_h_mkdir(fs, root, dirname, 0644, &sb);
+ if (dir == NULL) {
+ fprintf(stderr, "glfs_h_mkdir: error on directory creation dir ,%s\n",
+ strerror(errno));
+ goto out;
+ }
+ subdir = glfs_h_mkdir(fs, root, subdirname, 0644, &sb);
+ if (subdir == NULL) {
+ fprintf(stderr,
+ "glfs_h_mkdir: error on directory creation subdir ,%s\n",
+ strerror(errno));
+ goto out;
+ }
+ ret = glfs_h_extract_handle(subdir, subdir_handle, GFAPI_HANDLE_LENGTH);
+ if (ret < 0) {
+ fprintf(stderr,
+ "glfs_h_extract_handle: error extracting handle of %s: %s\n",
+ subdirname, strerror(errno));
+ goto out;
+ }
+
+ glfs_h_close(subdir);
+ subdir = NULL;
+ glfs_h_close(dir);
+ dir = NULL;
+
+ if (fs) {
+ ret = glfs_fini(fs);
+ fprintf(stderr, "glfs_fini(fs) returned %d \n", ret);
+ }
+
+ fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ LOG_ERR("glfs_set_volfile_server", ret);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ LOG_ERR("glfs_set_logging", ret);
+
+ ret = glfs_init(fs);
+ LOG_ERR("second attempt glfs_init", ret);
+
+ subdir = glfs_h_create_from_handle(fs, subdir_handle, GFAPI_HANDLE_LENGTH,
+ &sb);
+ if (subdir == NULL) {
+ fprintf(
+ stderr,
+ "glfs_h_create_from_handle: error on create of %s: from (%p),%s\n",
+ subdirname, subdir_handle, strerror(errno));
+ goto out;
+ }
+ dir = glfs_h_lookupat(fs, subdir, "..", &sb, 0);
+ if (dir == NULL) {
+ fprintf(stderr,
+ "glfs_h_lookupat: error on directory lookup dir using .. ,%s\n",
+ strerror(errno));
+ goto out;
+ }
+
+out:
+ if (subdir)
+ glfs_h_close(subdir);
+ if (dir)
+ glfs_h_close(dir);
+
+ if (fs) {
+ ret = glfs_fini(fs);
+ fprintf(stderr, "glfs_fini(fs) returned %d \n", ret);
+ }
+
+ if (ret)
+ exit(1);
+ exit(0);
+}
diff --git a/tests/bugs/gfapi/bug-1447266/1460514.t b/tests/bugs/gfapi/bug-1447266/1460514.t
new file mode 100644
index 00000000000..594af75cae2
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1447266/1460514.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/1460514.c -lgfapi -o $(dirname $0)/1460514
+TEST ./$(dirname $0)/1460514 $H0 $V0 $logdir/1460514.log
+
+cleanup_tester $(dirname $0)/1460514
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/gfapi/bug-1447266/bug-1447266.c b/tests/bugs/gfapi/bug-1447266/bug-1447266.c
new file mode 100644
index 00000000000..2b7e2d627fe
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1447266/bug-1447266.c
@@ -0,0 +1,107 @@
+#include <glusterfs/api/glfs.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#define TOTAL_ARGS 4
+int
+main(int argc, char *argv[])
+{
+ char *cwd = (char *)malloc(PATH_MAX * sizeof(char *));
+ char *resolved = NULL;
+ char *result = NULL;
+ char *buf = NULL;
+ struct stat st;
+ char *path = NULL;
+ int ret;
+
+ if (argc != TOTAL_ARGS) {
+ printf(
+ "Please give all required command line args.\n"
+ "Format : <volname> <server_ip> <path_name>\n");
+ goto out;
+ }
+
+ glfs_t *fs = glfs_new(argv[1]);
+
+ if (fs == NULL) {
+ printf("glfs_new: %s\n", strerror(errno));
+ /* No need to fail the test for this error */
+ ret = 0;
+ goto out;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[2], 24007);
+ if (ret) {
+ printf("glfs_set_volfile_server: %s\n", strerror(errno));
+ /* No need to fail the test for this error */
+ ret = 0;
+ goto out;
+ }
+
+ path = argv[3];
+
+ ret = glfs_set_logging(fs, "/tmp/gfapi.log", 7);
+ if (ret) {
+ printf("glfs_set_logging: %s\n", strerror(errno));
+ /* No need to fail the test for this error */
+ ret = 0;
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret) {
+ printf("glfs_init: %s\n", strerror(errno));
+ /* No need to fail the test for this error */
+ ret = 0;
+ goto out;
+ }
+
+ sleep(1);
+
+ ret = glfs_chdir(fs, path);
+ if (ret) {
+ printf("glfs_chdir: %s\n", strerror(errno));
+ goto out;
+ }
+
+ buf = glfs_getcwd(fs, cwd, PATH_MAX);
+ if (cwd == NULL) {
+ printf("glfs_getcwd: %s\n", strerror(errno));
+ goto out;
+ }
+
+ printf("\ncwd = %s\n\n", cwd);
+
+ result = glfs_realpath(fs, path, resolved);
+ if (result == NULL) {
+ printf("glfs_realpath: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_stat(fs, path, &st);
+ if (ret) {
+ printf("glfs_stat: %s\n", strerror(errno));
+ goto out;
+ }
+ if (cwd)
+ free(cwd);
+
+ result = glfs_realpath(fs, path, resolved);
+ if (result == NULL) {
+ printf("glfs_realpath: %s\n", strerror(errno));
+ goto out;
+ }
+
+ ret = glfs_fini(fs);
+ if (ret) {
+ printf("glfs_fini: %s\n", strerror(errno));
+ /* No need to fail the test for this error */
+ ret = 0;
+ goto out;
+ }
+
+ printf("\n");
+out:
+ return ret;
+}
diff --git a/tests/bugs/gfapi/bug-1447266/bug-1447266.t b/tests/bugs/gfapi/bug-1447266/bug-1447266.t
new file mode 100644
index 00000000000..45547f4f0e7
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1447266/bug-1447266.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../snapshot.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+TEST $CLI volume set $V0 nfs.disable false
+
+
+TEST $CLI volume start $V0;
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+for i in {1..10} ; do echo "file" > $M0/file$i ; done
+
+# Create file and hard-links
+TEST touch $M0/f1
+TEST mkdir $M0/dir
+TEST ln $M0/f1 $M0/f2
+TEST ln $M0/f1 $M0/dir/f3
+
+TEST $CLI snapshot config activate-on-create enable
+TEST $CLI volume set $V0 features.uss enable;
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "file" > $M0/file$i ; done
+
+TEST $CLI snapshot create snap2 $V0 no-timestamp;
+TEST build_tester $(dirname $0)/bug-1447266.c -lgfapi
+
+#Testing strts from here-->
+
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/.."
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/."
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/../."
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/../.."
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/dir/../."
+#Since dir1 is not present, this test should fail
+TEST ! $(dirname $0)/bug-1447266 $V0 $H0 "/dir/../dir1"
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/dir/.."
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/.snaps"
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/.snaps/."
+#Since snap3 is not present, this test should fail
+TEST ! $(dirname $0)/bug-1447266 $V0 $H0 "/.snaps/.././snap3"
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/.snaps/../."
+TEST $(dirname $0)/bug-1447266 $V0 $H0 "/.snaps/./snap1/./../snap1/dir/."
+
+cleanup_tester $(dirname $0)/bug-1447266
+cleanup;
diff --git a/tests/bugs/gfapi/bug-1630804/gfapi-bz1630804.c b/tests/bugs/gfapi/bug-1630804/gfapi-bz1630804.c
new file mode 100644
index 00000000000..d151784627c
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1630804/gfapi-bz1630804.c
@@ -0,0 +1,112 @@
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define VALIDATE_AND_GOTO_LABEL_ON_ERROR(func, ret, label) \
+ do { \
+ if (ret < 0) { \
+ fprintf(stderr, "%s : returned error %d (%s)\n", func, ret, \
+ strerror(errno)); \
+ goto label; \
+ } \
+ } while (0)
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ int flags = O_WRONLY | O_CREAT | O_TRUNC;
+ int do_write = 0;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ const char *dirname = "/some_dir1";
+ const char *filename = "/some_dir1/testfile";
+ const char *short_filename = "testfile";
+ struct stat sb;
+ char buf[512];
+ struct dirent *entry = NULL;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ fprintf(stderr, "Usage: %s <volname> <logfile> <do-write [0/1]\n",
+ argv[0]);
+ return 1;
+ }
+
+ volname = argv[1];
+ logfile = argv[2];
+ do_write = atoi(argv[3]);
+
+ fs = glfs_new(volname);
+ if (!fs)
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_new", ret, out);
+
+ ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_volfile_server", ret, out);
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_set_logging", ret, out);
+
+ ret = glfs_init(fs);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_init", ret, out);
+
+ ret = glfs_mkdir(fs, dirname, 0755);
+ if (ret && errno != EEXIST)
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_mkdir", ret, out);
+
+ fd1 = glfs_creat(fs, filename, flags, 0644);
+ if (fd1 == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_creat", ret, out);
+ }
+
+ if (do_write) {
+ ret = glfs_write(fd1, "hello world", 11, flags);
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_write", ret, out);
+ }
+
+ fd2 = glfs_opendir(fs, dirname);
+ if (fd2 == NULL) {
+ ret = -1;
+ VALIDATE_AND_GOTO_LABEL_ON_ERROR("glfs_opendir", ret, out);
+ }
+
+ do {
+ ret = glfs_readdirplus_r(fd2, &sb, (struct dirent *)buf, &entry);
+ if (entry != NULL) {
+ if (!strcmp(entry->d_name, short_filename)) {
+ if (sb.st_mode == 0) {
+ fprintf(
+ stderr,
+ "Mode bits are incorrect: d_name - %s, st_mode - %jd\n",
+ entry->d_name, (intmax_t)sb.st_mode);
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+ } while (entry != NULL);
+
+out:
+ if (fd1 != NULL)
+ glfs_close(fd1);
+ if (fd2 != NULL)
+ glfs_closedir(fd2);
+
+ if (fs) {
+ /*
+ * If this fails (as it does on Special Snowflake NetBSD for no
+ * good reason), it shouldn't affect the result of the test.
+ */
+ (void)glfs_fini(fs);
+ }
+
+ return ret;
+}
diff --git a/tests/bugs/gfapi/bug-1630804/gfapi-bz1630804.t b/tests/bugs/gfapi/bug-1630804/gfapi-bz1630804.t
new file mode 100644
index 00000000000..ac59aeeb47b
--- /dev/null
+++ b/tests/bugs/gfapi/bug-1630804/gfapi-bz1630804.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 ${H0}:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+build_tester $(dirname $0)/gfapi-bz1630804.c -lgfapi
+
+TEST ./$(dirname $0)/gfapi-bz1630804 $V0 $logdir/gfapi-bz1630804.log 0
+TEST ./$(dirname $0)/gfapi-bz1630804 $V0 $logdir/gfapi-bz1630804.log 1
+
+cleanup_tester $(dirname $0)/gfapi-trunc
+
+cleanup;
diff --git a/tests/bugs/gfapi/glfs_vol_set_IO_ERR.c b/tests/bugs/gfapi/glfs_vol_set_IO_ERR.c
new file mode 100644
index 00000000000..f38f01144d3
--- /dev/null
+++ b/tests/bugs/gfapi/glfs_vol_set_IO_ERR.c
@@ -0,0 +1,163 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define WRITE_SIZE (128)
+
+glfs_t *
+setup_new_client(char *hostname, char *volname, char *log_fileile)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "\nglfs_new: returned NULL (%s)\n", strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_fileile, 7);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+ return fs;
+error:
+ return NULL;
+}
+
+int
+write_something(glfs_t *fs)
+{
+ glfs_fd_t *fd = NULL;
+ char *buf = NULL;
+ int ret = 0;
+ int j = 0;
+
+ fd = glfs_creat(fs, "filename", O_RDWR, 0644);
+ if (!fd) {
+ fprintf(stderr, "%s: (%p) %s\n", "filename", fd, strerror(errno));
+ return -1;
+ }
+
+ buf = (char *)malloc(WRITE_SIZE);
+ memset(buf, '-', WRITE_SIZE);
+
+ for (j = 0; j < 4; j++) {
+ ret = glfs_write(fd, buf, WRITE_SIZE, 0);
+ if (ret < 0) {
+ fprintf(stderr, "Write(%s): %d (%s)\n", "filename", ret,
+ strerror(errno));
+ return ret;
+ }
+ glfs_lseek(fd, 0, SEEK_SET);
+ }
+ return 0;
+}
+
+static int
+volfile_change(const char *volname)
+{
+ int ret = 0;
+ char *cmd = NULL, *cmd1 = NULL;
+
+ ret = asprintf(&cmd, "gluster volume set %s quick-read on", volname);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct cli command string (%s)",
+ strerror(errno));
+ return ret;
+ }
+
+ ret = asprintf(&cmd1, "gluster volume set %s quick-read off", volname);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct cli command string (%s)",
+ strerror(errno));
+ return ret;
+ }
+
+ ret = system(cmd);
+ if (ret < 0) {
+ fprintf(stderr, "quick-read off on (%s) failed", volname);
+ return ret;
+ }
+
+ ret = system(cmd1);
+ if (ret < 0) {
+ fprintf(stderr, "quick-read on on (%s) failed", volname);
+ return ret;
+ }
+
+ ret = system(cmd);
+ if (ret < 0) {
+ fprintf(stderr, "quick-read off on (%s) failed", volname);
+ return ret;
+ }
+
+ free(cmd);
+ free(cmd1);
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+ char buf[100];
+ glfs_fd_t *fd = NULL;
+
+ if (argc != 4) {
+ fprintf(
+ stderr,
+ "Expect following args %s <hostname> <Vol> <log file location>\n",
+ argv[0]);
+ return -1;
+ }
+
+ fs = setup_new_client(argv[1], argv[2], argv[3]);
+ if (!fs)
+ goto error;
+
+ ret = volfile_change(argv[2]);
+ if (ret < 0)
+ goto error;
+
+ /* This is required as volfile change takes a while to reach this
+ * gfapi client and precess the graph change. Without this the issue
+ * cannot be reproduced as in cannot be tested.
+ */
+ sleep(10);
+
+ ret = write_something(fs);
+ if (ret < 0)
+ goto error;
+
+ ret = glfs_fini(fs);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fini failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ return 0;
+error:
+ return -1;
+}
diff --git a/tests/bugs/gfapi/glfs_vol_set_IO_ERR.t b/tests/bugs/gfapi/glfs_vol_set_IO_ERR.t
new file mode 100755
index 00000000000..5893ef273bd
--- /dev/null
+++ b/tests/bugs/gfapi/glfs_vol_set_IO_ERR.t
@@ -0,0 +1,20 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V0;
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/glfs_vol_set_IO_ERR.c -lgfapi
+TEST $(dirname $0)/glfs_vol_set_IO_ERR $H0 $V0 $logdir/glfs_vol_set_IO_ERR.log
+
+cleanup_tester $(dirname $0)/glfs_vol_set_IO_ERR
+cleanup;
diff --git a/tests/bugs/glusterd/859927/repl.t b/tests/bugs/glusterd/859927/repl.t
new file mode 100755
index 00000000000..6e7c23b5b1d
--- /dev/null
+++ b/tests/bugs/glusterd/859927/repl.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd
+
+#Tests for data-self-heal-algorithm option
+function create_setup_for_self_heal {
+ file=$1
+ kill_brick $V0 $H0 $B0/${V0}1
+ dd of=$file if=/dev/urandom bs=1024k count=1 2>&1 > /dev/null
+ $CLI volume start $V0 force
+}
+
+function test_write {
+ dd of=$M0/a if=/dev/urandom bs=1k count=1 2>&1 > /dev/null
+}
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 client-log-level DEBUG
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0;
+
+touch $M0/a
+
+TEST $CLI volume set $V0 cluster.data-self-heal-algorithm full
+EXPECT full volume_option $V0 cluster.data-self-heal-algorithm
+create_setup_for_self_heal $M0/a
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+cat $file > /dev/null 2>&1
+EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0
+TEST cmp $B0/${V0}1/a $B0/${V0}2/a
+
+TEST $CLI volume set $V0 cluster.data-self-heal-algorithm diff
+EXPECT diff volume_option $V0 cluster.data-self-heal-algorithm
+create_setup_for_self_heal $M0/a
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+cat $file > /dev/null 2>&1
+EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0
+TEST cmp $B0/${V0}1/a $B0/${V0}2/a
+
+TEST $CLI volume reset $V0 cluster.data-self-heal-algorithm
+create_setup_for_self_heal $M0/a
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+cat $file > /dev/null 2>&1
+EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0
+TEST cmp $B0/${V0}1/a $B0/${V0}2/a
+
+TEST ! $CLI volume set $V0 cluster.data-self-heal-algorithm ""
+
+cleanup;
diff --git a/tests/bugs/glusterd/add-brick-and-validate-replicated-volume-options.t b/tests/bugs/glusterd/add-brick-and-validate-replicated-volume-options.t
new file mode 100644
index 00000000000..95d0eb69ac1
--- /dev/null
+++ b/tests/bugs/glusterd/add-brick-and-validate-replicated-volume-options.t
@@ -0,0 +1,110 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status';
+
+#bug-1102656 - validating volume top command
+
+TEST $CLI volume top $V0 open
+TEST ! $CLI volume top $V0 open brick $H0:/tmp/brick
+TEST $CLI volume top $V0 read
+
+TEST $CLI volume status
+
+#bug- 1002556
+EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks';
+
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}3
+EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks';
+
+TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}3 force
+EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks';
+
+TEST killall glusterd
+TEST glusterd
+
+EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks';
+
+#bug-1406411- fail-add-brick-when-replica-count-changes
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+#add-brick should fail
+TEST ! $CLI_NO_FORCE volume add-brick $V0 replica 3 $H0:$B0/${V0}3
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}3
+
+TEST $CLI volume create $V1 $H0:$B0/${V1}{1,2};
+TEST $CLI volume start $V1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}2
+TEST kill_brick $V1 $H0 $B0/${V1}1
+
+#add-brick should fail
+TEST ! $CLI_NO_FORCE volume add-brick $V1 replica 2 $H0:$B0/${V1}{3,4}
+
+TEST $CLI volume start $V1 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}2
+
+TEST $CLI volume add-brick $V1 replica 2 $H0:$B0/${V1}{3,4}
+
+#bug-905307 - validate cluster.post-op-delay-secs option
+
+#Strings should not be accepted.
+TEST ! $CLI volume set $V0 cluster.post-op-delay-secs abc
+
+#-ve ints should not be accepted.
+TEST ! $CLI volume set $V0 cluster.post-op-delay-secs -1
+
+#INT_MAX+1 should not be accepted.
+TEST ! $CLI volume set $V0 cluster.post-op-delay-secs 2147483648
+
+#floats should not be accepted.
+TEST ! $CLI volume set $V0 cluster.post-op-delay-secs 1.25
+
+#min val 0 should be accepted
+TEST $CLI volume set $V0 cluster.post-op-delay-secs 0
+EXPECT "0" volume_option $V0 cluster.post-op-delay-secs
+
+#max val 2147483647 should be accepted
+TEST $CLI volume set $V0 cluster.post-op-delay-secs 2147483647
+EXPECT "2147483647" volume_option $V0 cluster.post-op-delay-secs
+
+#some middle val in range 2147 should be accepted
+TEST $CLI volume set $V0 cluster.post-op-delay-secs 2147
+EXPECT "2147" volume_option $V0 cluster.post-op-delay-secs
+
+#bug-1265479 - validate-replica-volume-options
+
+#Setting data-self-heal option on for distribute-replicate volume
+TEST $CLI volume set $V1 data-self-heal on
+EXPECT 'on' volinfo_field $V1 'cluster.data-self-heal';
+TEST $CLI volume set $V1 cluster.data-self-heal on
+EXPECT 'on' volinfo_field $V1 'cluster.data-self-heal';
+
+#Setting metadata-self-heal option on for distribute-replicate volume
+TEST $CLI volume set $V1 metadata-self-heal on
+EXPECT 'on' volinfo_field $V1 'cluster.metadata-self-heal';
+TEST $CLI volume set $V1 cluster.metadata-self-heal on
+
+#Setting entry-self-heal option on for distribute-replicate volume
+TEST $CLI volume set $V1 entry-self-heal on
+EXPECT 'on' volinfo_field $V1 'cluster.entry-self-heal';
+TEST $CLI volume set $V1 cluster.entry-self-heal on
+EXPECT 'on' volinfo_field $V1 'cluster.entry-self-heal';
+
+cleanup
diff --git a/tests/bugs/glusterd/brick-mux-validation-in-cluster.t b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t
new file mode 100644
index 00000000000..b6af487a791
--- /dev/null
+++ b/tests/bugs/glusterd/brick-mux-validation-in-cluster.t
@@ -0,0 +1,108 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+function count_brick_pids {
+ $CLI_1 --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+ | grep -v "N/A" | sort | uniq | wc -l
+}
+
+function count_N/A_brick_pids {
+ $CLI_1 --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+ | grep -- '\-1' | sort | uniq | wc -l
+}
+
+function check_peers {
+ $CLI_2 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 3
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+TEST $CLI_1 volume set all cluster.brick-multiplex on
+#bug-1609163 - bricks of normal volume should not attach to bricks of gluster_shared_storage volume
+
+##Create, start and mount meta_volume i.e., shared_storage
+TEST $CLI_1 volume create $META_VOL replica 3 $H1:$B1/${META_VOL}1 $H2:$B2/${META_VOL}1 $H3:$B3/${META_VOL}1
+TEST $CLI_1 volume start $META_VOL
+TEST mkdir -p $META_MNT
+TEST glusterfs -s $H1 --volfile-id $META_VOL $META_MNT
+
+TEST $CLI_1 volume info gluster_shared_storage
+
+EXPECT 3 count_brick_processes
+
+#create and start a new volume
+TEST $CLI_1 volume create $V0 replica 3 $H1:$B1/${V0}{1..3} $H2:$B2/${V0}{1..3}
+TEST $CLI_1 volume start $V0
+
+# bricks of normal volume should not attach to bricks of gluster_shared_storage volume
+EXPECT 5 count_brick_processes
+
+#bug-1549996 - stale brick processes on the nodes after volume deletion
+
+TEST $CLI_1 volume create $V1 replica 3 $H1:$B1/${V1}{1..3} $H2:$B2/${V1}{1..3}
+TEST $CLI_1 volume start $V1
+
+EXPECT 5 count_brick_processes
+
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume stop $V1
+
+EXPECT 3 count_brick_processes
+
+TEST $CLI_1 volume stop $META_VOL
+
+TEST $CLI_1 volume delete $META_VOL
+TEST $CLI_1 volume delete $V0
+TEST $CLI_1 volume delete $V1
+
+#bug-1773856 - Brick process fails to come up with brickmux on
+
+TEST $CLI_1 volume create $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1 $H3:$B3/${V0}1 force
+TEST $CLI_1 volume start $V0
+
+
+EXPECT 3 count_brick_processes
+
+#create and start a new volume
+TEST $CLI_1 volume create $V1 $H1:$B1/${V1}2 $H2:$B2/${V1}2 $H3:$B3/${V1}2 force
+TEST $CLI_1 volume start $V1
+
+EXPECT 3 count_brick_processes
+
+V2=patchy2
+TEST $CLI_1 volume create $V2 $H1:$B1/${V2}3 $H2:$B2/${V2}3 $H3:$B3/${V2}3 force
+TEST $CLI_1 volume start $V2
+
+EXPECT 3 count_brick_processes
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_brick_pids
+
+TEST kill_node 1
+
+sleep 10
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers;
+
+$CLI_2 volume set $V0 performance.readdir-ahead on
+$CLI_2 volume set $V1 performance.readdir-ahead on
+
+TEST $glusterd_1;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 count_N/A_brick_pids
+
+cleanup;
diff --git a/tests/bugs/glusterd/brick-mux-validation.t b/tests/bugs/glusterd/brick-mux-validation.t
new file mode 100644
index 00000000000..61b0455f9a8
--- /dev/null
+++ b/tests/bugs/glusterd/brick-mux-validation.t
@@ -0,0 +1,104 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../volume.rc
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+function count_brick_pids {
+ $CLI --xml volume status all | sed -n '/.*<pid>\([^<]*\).*/s//\1/p' \
+ | grep -v "N/A" | sort | uniq | wc -l
+}
+
+cleanup;
+
+#bug-1451248 - validate brick mux after glusterd reboot
+
+TEST glusterd
+TEST $CLI volume set all cluster.brick-multiplex on
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3}
+TEST $CLI volume start $V0
+
+EXPECT 1 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 online_brick_count
+
+pkill gluster
+TEST glusterd
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 online_brick_count
+
+TEST $CLI volume create $V1 $H0:$B0/${V1}{1..3}
+TEST $CLI volume start $V1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 online_brick_count
+
+#bug-1560957 - brick status goes offline after remove-brick followed by add-brick
+
+pkill glusterd
+TEST glusterd
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 force
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}1_new force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_processes
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 count_brick_pids
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 online_brick_count
+
+#bug-1446172 - reset brick with brick multiplexing enabled
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+# Create files
+for i in {1..5}
+do
+ echo $i > $M0/file$i.txt
+done
+
+TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}1_new start
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 5 online_brick_count
+EXPECT 1 count_brick_processes
+
+# Negative case with brick killed but volume-id xattr present
+TEST ! $CLI volume reset-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1 commit
+
+# reset-brick commit force should work and should bring up the brick
+TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}1_new $H0:$B0/${V0}1_new commit force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 online_brick_count
+EXPECT 1 count_brick_processes
+TEST glusterfs --volfile-id=$V1 --volfile-server=$H0 $M1;
+# Create files
+for i in {1..5}
+do
+ echo $i > $M1/file$i.txt
+done
+
+TEST $CLI volume reset-brick $V1 $H0:$B0/${V1}1 start
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 5 online_brick_count
+EXPECT 1 count_brick_processes
+
+# Simulate reset disk
+for i in {1..5}
+do
+ rm -rf $B0/${V1}1/file$i.txt
+done
+
+setfattr -x trusted.glusterfs.volume-id $B0/${V1}1
+setfattr -x trusted.gfid $B0/${V1}1
+
+# Test reset-brick commit. Using CLI_IGNORE_PARTITION since normal CLI uses
+# the --wignore flag that essentially makes the command act like "commit force"
+TEST $CLI_IGNORE_PARTITION volume reset-brick $V1 $H0:$B0/${V1}1 $H0:$B0/${V1}1 commit
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 6 online_brick_count
+EXPECT 1 count_brick_processes
+
+cleanup;
diff --git a/tests/bugs/glusterd/brick-mux.t b/tests/bugs/glusterd/brick-mux.t
new file mode 100644
index 00000000000..927940534c1
--- /dev/null
+++ b/tests/bugs/glusterd/brick-mux.t
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+cleanup
+
+#bug-1444596 - validating brick mux
+
+TEST glusterd -LDEBUG
+TEST $CLI volume create $V0 $H0:$B0/brick{0,1}
+TEST $CLI volume create $V1 $H0:$B0/brick{2,3}
+
+TEST $CLI volume set all cluster.brick-multiplex on
+
+TEST $CLI volume start $V0
+TEST $CLI volume start $V1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count
+EXPECT 1 count_brick_processes
+
+#bug-1499509 - stop all the bricks when a brick process is killed
+kill -9 $(pgrep glusterfsd)
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 online_brick_count
+
+TEST $CLI volume start $V0 force
+TEST $CLI volume start $V1 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count
+
+
+pkill glusterd
+TEST glusterd
+
+#Check brick status after restart glusterd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count
+EXPECT 1 count_brick_processes
+
+TEST $CLI volume set $V1 performance.io-cache-size 32MB
+TEST $CLI volume stop $V1
+TEST $CLI volume start $V1
+
+#Check No. of brick processes after change option
+EXPECT 2 count_brick_processes
+
+pkill glusterd
+TEST glusterd
+
+#Check brick status after restart glusterd should not be NA
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count
+EXPECT 2 count_brick_processes
+
+pkill glusterd
+TEST glusterd
+
+#Check brick status after restart glusterd should not be NA
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 online_brick_count
+EXPECT 2 count_brick_processes
+
+#bug-1444596_brick_mux_posix_hlth_chk_status
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+TEST rm -rf $H0:$B0/brick{0,1}
+
+#Check No. of brick processes after remove brick from back-end
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 online_brick_count
+
+TEST glusterfs -s $H0 --volfile-id $V1 $M0
+TEST touch $M0/file{1..10}
+
+pkill glusterd
+TEST glusterd -LDEBUG
+sleep 5
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 online_brick_count
+
+cleanup
+
diff --git a/tests/bugs/glusterd/brick-order-check-add-brick.t b/tests/bugs/glusterd/brick-order-check-add-brick.t
new file mode 100644
index 00000000000..0be31dac768
--- /dev/null
+++ b/tests/bugs/glusterd/brick-order-check-add-brick.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST verify_lvm_version;
+#Create cluster with 3 nodes
+TEST launch_cluster 3 -NO_DEBUG -NO_FORCE
+TEST setup_lvm 3
+
+TEST $CLI_1 peer probe $H2
+TEST $CLI_1 peer probe $H3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+TEST $CLI_1 volume create $V0 replica 3 $H1:$L1/$V0 $H2:$L2/$V0 $H3:$L3/$V0
+EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks'
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+#add-brick with or without mentioning the replica count should not fail
+TEST $CLI_1 volume add-brick $V0 replica 3 $H1:$L1/${V0}_1 $H2:$L2/${V0}_1 $H3:$L3/${V0}_1
+EXPECT '2 x 3 = 6' volinfo_field $V0 'Number of Bricks'
+
+TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_2 $H2:$L2/${V0}_2 $H3:$L3/${V0}_2
+EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks'
+
+#adding bricks from same host should fail the brick order check
+TEST ! $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5
+EXPECT '3 x 3 = 9' volinfo_field $V0 'Number of Bricks'
+
+#adding bricks from same host with force should succeed
+TEST $CLI_1 volume add-brick $V0 $H1:$L1/${V0}_3 $H1:$L1/${V0}_4 $H1:$L1/${V0}_5 force
+EXPECT '4 x 3 = 12' volinfo_field $V0 'Number of Bricks'
+
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume delete $V0
+
+TEST $CLI_1 volume create $V0 replica 2 $H1:$L1/${V0}1 $H2:$L2/${V0}1
+EXPECT '1 x 2 = 2' volinfo_field $V0 'Number of Bricks'
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+#Add-brick with Increasing replica count
+TEST $CLI_1 volume add-brick $V0 replica 3 $H3:$L3/${V0}1
+EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks'
+
+#Add-brick with Increasing replica count from same host should fail
+TEST ! $CLI_1 volume add-brick $V0 replica 5 $H1:$L1/${V0}2 $H1:$L1/${V0}3
+
+#adding multiple bricks from same host should fail the brick order check
+TEST ! $CLI_1 volume add-brick $V0 replica 3 $H1:$L1/${V0}{4..6} $H2:$L2/${V0}{7..9}
+EXPECT '1 x 3 = 3' volinfo_field $V0 'Number of Bricks'
+
+cleanup
diff --git a/tests/bugs/glusterd/bug-1070734.t b/tests/bugs/glusterd/bug-1070734.t
new file mode 100755
index 00000000000..0afcb3b37b3
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1070734.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 nfs.disable false
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0;
+
+############################################################################
+#TEST-PLAN:
+#Create a directory DIR and a file inside DIR
+#check the hash brick of the file
+#delete the directory for recreating later after remove-brick
+#remove the brick where the files hashed to
+#After remove-brick status says complete go on creating the same directory \
+#DIR and file
+#Check if the file now falls into the other brick
+#Check if the other brick gets the full layout and the remove brick gets \
+#the zeroed layout
+############################################################################
+
+TEST mkdir $N0/DIR;
+
+TEST touch $N0/DIR/file;
+
+if [ -f $B0/${V0}1/DIR/file ]
+then
+ HASHED=$B0/${V0}1;
+ OTHERBRICK=$B0/${V0}2;
+else
+ HASHED=$B0/${V0}2;
+ OTHERBRICK=$B0/${V0}1;
+fi
+
+TEST rm -f $N0/DIR/file;
+TEST rmdir $N0/DIR;
+TEST $CLI volume remove-brick $V0 $H0:${HASHED} start;
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" \
+"$H0:${HASHED}";
+
+TEST mkdir $N0/DIR;
+TEST touch $N0/DIR/file;
+
+#Check now the file should fall in to OTHERBRICK
+TEST [ -f ${OTHERBRICK}/DIR/file ]
+
+#Check the DIR on HASHED should have got zeroed layout and the \
+#OTHERBRICK should have got full layout
+shorter_layout () {
+ dht_get_layout $1 | cut -c 19-
+}
+EXPECT "0000000000000000" shorter_layout $HASHED/DIR ;
+EXPECT "00000000ffffffff" shorter_layout $OTHERBRICK/DIR;
+
+## Before killing daemon to avoid deadlocks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+cleanup
diff --git a/tests/bugs/glusterd/bug-1085330-and-bug-916549.t b/tests/bugs/glusterd/bug-1085330-and-bug-916549.t
new file mode 100644
index 00000000000..892a30d74ea
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1085330-and-bug-916549.t
@@ -0,0 +1,93 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+STR="1234567890"
+volname="Vol"
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+#testcase: bug-1085330
+
+# Construct volname string such that its more than 256 characters
+for i in {1..30}
+do
+ volname+=$STR
+done
+# Now $volname is more than 256 chars
+
+TEST ! $CLI volume create $volname $H0:$B0/${volname}{1,2};
+
+TEST $CLI volume info;
+
+# Construct brick string such that its more than 256 characters
+volname="Vol1234"
+brick="brick"
+for i in {1..30}
+do
+ brick+=$STR
+done
+# Now $brick1 is more than 256 chars
+
+TEST ! $CLI volume create $volname $H0:$B0/$brick;
+
+TEST $CLI volume info;
+
+# Now try to create a volume with couple of bricks (strlen(volname) = 128 &
+# strlen(brick1) = 128
+# Command should still fail as strlen(volp path) > 256
+
+volname="Volume-0"
+brick="brick-00"
+STR="12345678"
+
+for i in {1..15}
+do
+ volname+=$STR
+ brick+=$STR
+done
+TEST ! $CLI volume create $volname $H0:$B0/$brick;
+
+TEST $CLI volume info;
+
+# test case with brick path as 255 and a trailing "/"
+brick=""
+STR1="12345678"
+volname="vol"
+
+for i in {1..31}
+do
+ brick+=$STR1
+done
+brick+="123456/"
+
+echo $brick | wc -c
+# Now $brick is exactly 255 chars, but at end a trailing space
+# This will still fail as volfpath exceeds more than _POSIX_MAX chars
+
+TEST ! $CLI volume create $volname $H0:$B0/$brick;
+
+TEST $CLI volume info;
+
+# Positive test case
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+
+TEST $CLI volume info;
+
+TEST $CLI volume start $V0;
+
+#testcase: bug-916549
+
+pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/);
+brick_pid=$(cat $GLUSTERD_PIDFILEDIR/vols/$V0/$pid_file);
+
+kill -SIGKILL $brick_pid;
+TEST $CLI volume start $V0 force;
+TEST process_leak_count $(pidof glusterd);
+
+cleanup
+
diff --git a/tests/bugs/glusterd/bug-1091935-brick-order-check-from-cli-to-glusterd.t b/tests/bugs/glusterd/bug-1091935-brick-order-check-from-cli-to-glusterd.t
new file mode 100755
index 00000000000..9ac9d8fedd9
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1091935-brick-order-check-from-cli-to-glusterd.t
@@ -0,0 +1,93 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+## Lets create partitions for bricks
+TEST truncate -s 100M $B0/brick1
+TEST truncate -s 200M $B0/brick2
+TEST truncate -s 200M $B0/brick3
+TEST truncate -s 200M $B0/brick4
+
+
+TEST LO1=`SETUP_LOOP $B0/brick1`
+TEST LO2=`SETUP_LOOP $B0/brick2`
+TEST LO3=`SETUP_LOOP $B0/brick3`
+TEST LO4=`SETUP_LOOP $B0/brick4`
+
+
+TEST MKFS_LOOP $LO1
+TEST MKFS_LOOP $LO2
+TEST MKFS_LOOP $LO3
+TEST MKFS_LOOP $LO4
+
+TEST mkdir -p ${B0}/${V0}{0..3}
+
+
+TEST MOUNT_LOOP $LO1 $B0/${V0}0
+TEST MOUNT_LOOP $LO2 $B0/${V0}1
+TEST MOUNT_LOOP $LO3 $B0/${V0}2
+TEST MOUNT_LOOP $LO4 $B0/${V0}3
+
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers;
+
+
+CLI_1_WITHOUT_WIGNORE=$(echo $CLI_1 | sed 's/ --wignore//')
+
+
+# Creating volume with non resolvable host name
+TEST ! $CLI_1_WITHOUT_WIGNORE volume create $V0 replica 2 \
+ $H1:$B0/${V0}0/brick redhat:$B0/${V0}1/brick \
+ $H1:$B0/${V0}2/brick redhat:$B0/${V0}3/brick
+
+
+#Workaround for Bug:1091935
+#Failure to create volume above leaves 1st brick with xattrs.
+rm -rf $B0/${V0}{0..3}/brick;
+
+
+# Creating distribute-replica volume with bad brick order. It will fail
+# due to bad brick order.
+TEST ! $CLI_1_WITHOUT_WIGNORE volume create $V0 replica 2 \
+ $H1:$B0/${V0}0/brick $H1:$B0/${V0}1/brick \
+ $H1:$B0/${V0}2/brick $H1:$B0/${V0}3/brick
+
+
+
+#Workaround for Bug:1091935
+#Failure to create volume above leaves 1st brick with xattrs.
+rm -rf $B0/${V0}{0..3}/brick;
+
+
+
+# Test for positive case, volume create should pass for
+# resolved hostnames and bricks in order.
+TEST $CLI_1_WITHOUT_WIGNORE volume create $V0 replica 2 \
+ $H1:$B0/${V0}0/brick $H2:$B0/${V0}1/brick \
+ $H1:$B0/${V0}2/brick $H2:$B0/${V0}3/brick
+
+# Delete the volume as we want to reuse bricks
+TEST $CLI_1_WITHOUT_WIGNORE volume delete $V0
+
+
+# Now with force at the end of command it will bypass brick-order check
+# for replicate or distribute-replicate volume. and it will create volume
+TEST $CLI_1_WITHOUT_WIGNORE volume create $V0 replica 2 \
+ $H1:$B0/${V0}0/brick $H1:$B0/${V0}1/brick \
+ $H1:$B0/${V0}2/brick $H1:$B0/${V0}3/brick force
+
+# Need to cleanup the loop back devices.
+UMOUNT_LOOP ${B0}/${V0}{0..3}
+rm -f ${B0}/brick{1..4}
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t b/tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t
new file mode 100644
index 00000000000..7be076caaf3
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1238706-daemons-stop-on-peer-cleanup.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+## Test case for stopping all running daemons service on peer detach.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+
+## Start a 2 node virtual cluster
+TEST launch_cluster 2;
+
+## Peer probe server 2 from server 1 cli
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+
+## Creating and starting volume
+TEST $CLI_1 volume create $V0 $H1:$B1/${V0}0 $H1:$B1/${V0}1
+TEST $CLI_1 volume start $V0
+
+TEST $CLI_1 volume set $V0 nfs.disable off
+
+## To Do: Add test case for quota and snapshot daemon. Currently quota
+## Daemon is not working in cluster framework. And sanpd daemon
+## Start only in one node in cluster framework. Add test case
+## once patch http://review.gluster.org/#/c/11666/ merged,
+
+## We are having 2 node "nfs" daemon should run on both node.
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" get_nfs_count
+
+## Detach 2nd node from the cluster.
+TEST $CLI_1 peer detach $H2;
+
+
+## After detaching 2nd node we will have only 1 nfs and quota daemon running.
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_nfs_count
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/bugs/glusterd/bug-1242875-do-not-pass-volinfo-quota.t b/tests/bugs/glusterd/bug-1242875-do-not-pass-volinfo-quota.t
new file mode 100644
index 00000000000..c229d4371b6
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1242875-do-not-pass-volinfo-quota.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Lets create volume V0 and start the volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+## Lets create volume V1 and start the volume
+TEST $CLI volume create $V1 $H0:$B0/${V0}2 $H0:$B0/${V0}3
+TEST $CLI volume start $V1
+
+## Enable quota on 2nd volume
+TEST $CLI volume quota $V1 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_quotad_count
+
+## Killing all gluster process
+pkill gluster;
+
+## there should not be any quota daemon running after killing quota process
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" get_quotad_count
+
+## Start glusterd
+TEST glusterd;
+TEST pidof glusterd;
+
+## Quotad daemon should start on restarting the glusterd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_quotad_count
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-1482906-peer-file-blank-line.t b/tests/bugs/glusterd/bug-1482906-peer-file-blank-line.t
new file mode 100644
index 00000000000..967595e4dbb
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1482906-peer-file-blank-line.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+#Tests for add new line in peers file
+function add_new_line_to_peer_file {
+ UUID_NAME=$($CLI_1 peer status | grep Uuid)
+ PEER_ID=$(echo $UUID_NAME | cut -c 7-)
+ GD_WD=$($CLI_1 system getwd)
+ GD_WD+=/peers/
+ PATH_TO_PEER_FILE=$GD_WD$PEER_ID
+ sed -i '1s/^/\n/gm; $s/$/\n/gm' $PATH_TO_PEER_FILE
+}
+
+cleanup;
+
+TEST launch_cluster 2;
+
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+add_new_line_to_peer_file
+
+TEST kill_glusterd 1
+TEST $glusterd_1
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-1595320.t b/tests/bugs/glusterd/bug-1595320.t
new file mode 100644
index 00000000000..c10e11821a1
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1595320.t
@@ -0,0 +1,93 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup
+
+function count_up_bricks {
+ $CLI --xml volume status $V0 | grep '<status>1' | wc -l
+}
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+# Setup 3 LVMS
+LVM_PREFIX="test"
+TEST init_n_bricks 3
+TEST setup_lvm 3
+
+# Start glusterd
+TEST glusterd
+TEST pidof glusterd
+
+# Create volume and enable brick multiplexing
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3
+TEST $CLI v set all cluster.brick-multiplex on
+
+# Start the volume
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
+EXPECT 1 count_brick_processes
+
+# Kill volume ungracefully
+brick_pid=`pgrep glusterfsd`
+
+# Make sure every brick root should be consumed by a brick process
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 1 ]
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 1 ]
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 1 ]
+
+b1_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-1*.pid)
+b2_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-2*.pid)
+b3_pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/*d-backends-3*.pid)
+
+kill -9 $brick_pid
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 count_brick_processes
+
+# Unmount 3rd brick root from node
+brick_root=$L3
+_umount_lv 3
+
+# Start the volume only 2 brick should be start
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 2 count_up_bricks
+EXPECT 1 count_brick_processes
+
+brick_pid=`pgrep glusterfsd`
+
+# Make sure only two brick root should be consumed by a brick process
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 1 ]
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 1 ]
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 0 ]
+
+# Mount the brick root
+TEST mkdir -p $brick_root
+TEST mount -t xfs -o nouuid /dev/test_vg_3/brick_lvm $brick_root
+
+# Replace brick_pid file to test brick_attach code
+TEST cp $b1_pid_file $b3_pid_file
+
+# Start the volume all brick should be up
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 3 count_up_bricks
+EXPECT 1 count_brick_processes
+
+# Make sure every brick root should be consumed by a brick process
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L1 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 1 ]
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L2 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 1 ]
+n=`ls -lrth /proc/$brick_pid/fd | grep -iw $L3 | grep -v ".glusterfs" | wc -l`
+TEST [ $n -eq 1 ]
+
+cleanup
diff --git a/tests/bugs/glusterd/bug-1696046.t b/tests/bugs/glusterd/bug-1696046.t
new file mode 100644
index 00000000000..e1c1eb2ceb9
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1696046.t
@@ -0,0 +1,113 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function count_up_bricks {
+ $CLI --xml volume status $1 | grep '<status>1' | wc -l
+}
+
+function count_brick_processes {
+ pgrep glusterfsd | wc -l
+}
+
+logdir=`gluster --print-logdir`
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume set all cluster.brick-multiplex on
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3};
+TEST $CLI volume create $V1 replica 3 $H0:$B0/${V1}{1,2,3};
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $CLI volume start $V1;
+EXPECT 'Started' volinfo_field $V1 'Status';
+
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_up_bricks $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 4 count_up_bricks $V1
+
+EXPECT 1 count_brick_processes
+
+# Mount V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+function client-log-file-name()
+{
+ logfilename=$M0".log"
+ echo ${logfilename:1} | tr / -
+}
+
+function brick-log-file-name()
+{
+ logfilename=$B0"/"$V0"1.log"
+ echo ${logfilename:1} | tr / -
+}
+
+log_file=$logdir"/"`client-log-file-name`
+nofdlog=$(cat $log_file | grep " D " | wc -l)
+TEST [ $((nofdlog)) -eq 0 ]
+
+brick_log_file=$logdir"/bricks/"`brick-log-file-name`
+nofdlog=$(cat $brick_log_file | grep " D " | wc -l)
+TEST [ $((nofdlog)) -eq 0 ]
+
+## Set brick-log-level to DEBUG
+TEST $CLI volume set $V0 diagnostics.brick-log-level DEBUG
+
+# Do some operation
+touch $M0/file1
+
+# Check debug message debug message should be exist only for V0
+# Server xlator is common in brick_mux so after enabling DEBUG log
+# some debug message should be available for other xlators like posix
+
+brick_log_file=$logdir"/bricks/"`brick-log-file-name`
+nofdlog=$(cat $brick_log_file | grep file1 | grep -v server | wc -l)
+TEST [ $((nofdlog)) -ne 0 ]
+
+#Check if any debug log exist in client-log file
+nofdlog=$(cat $log_file | grep " D " | wc -l)
+TEST [ $((nofdlog)) -eq 0 ]
+
+## Set brick-log-level to INFO
+TEST $CLI volume set $V0 diagnostics.brick-log-level INFO
+
+## Set client-log-level to DEBUG
+TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
+
+# Do some operation
+touch $M0/file2
+
+nofdlog=$(cat $brick_log_file | grep " D " | grep file2 | wc -l)
+TEST [ $((nofdlog)) -eq 0 ]
+
+nofdlog=$(cat $log_file | grep " D " | wc -l)
+TEST [ $((nofdlog)) -ne 0 ]
+
+# Unmount V0
+TEST umount $M0
+
+#Mount V1
+TEST glusterfs --volfile-id=$V1 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+#do some operation
+touch $M0/file3
+
+
+# DEBUG log level is enabled only for V0 so no debug message should be available
+# in log specific to file2 creation except for server xlator, server xlator is
+# common xlator in brick mulitplex
+nofdlog=$(cat $brick_log_file | grep file3 | grep -v server | wc -l)
+TEST [ $((nofdlog)) -eq 0 ]
+
+# Unmount V1
+TEST umount $M0
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-1699339.t b/tests/bugs/glusterd/bug-1699339.t
new file mode 100644
index 00000000000..bb8d4f46eb8
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1699339.t
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+NUM_VOLS=15
+
+
+get_brick_base () {
+ printf "%s/vol%02d" $B0 $1
+}
+
+function count_up_bricks {
+ vol=$1;
+ $CLI_1 --xml volume status $vol | grep '<status>1' | wc -l
+}
+
+create_volume () {
+
+ local vol_name=$(printf "%s-vol%02d" $V0 $1)
+
+ TEST $CLI_1 volume create $vol_name replica 3 $H1:$B1/${vol_name} $H2:$B2/${vol_name} $H3:$B3/${vol_name}
+ TEST $CLI_1 volume start $vol_name
+}
+
+TEST launch_cluster 3
+TEST $CLI_1 volume set all cluster.brick-multiplex on
+
+# The option accepts the value in the range from 5 to 200
+TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 210
+TEST ! $CLI_1 volume set all glusterd.vol_count_per_thread 4
+
+TEST $CLI_1 volume set all glusterd.vol_count_per_thread 5
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+# Our infrastructure can't handle an arithmetic expression here. The formula
+# is (NUM_VOLS-1)*5 because it sees each TEST/EXPECT once but needs the other
+# NUM_VOLS-1 and there are 5 such statements in each iteration.
+TESTS_EXPECTED_IN_LOOP=28
+for i in $(seq 1 $NUM_VOLS); do
+ starttime="$(date +%s)";
+ create_volume $i
+done
+
+TEST kill_glusterd 1
+
+TESTS_EXPECTED_IN_LOOP=4
+for i in `seq 1 3 15`
+do
+vol1=$(printf "%s-vol%02d" $V0 $i)
+TEST $CLI_2 volume set $vol1 performance.readdir-ahead on
+done
+
+# Bring back 1st glusterd
+TEST $glusterd_1
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+TESTS_EXPECTED_IN_LOOP=4
+for i in `seq 1 3 15`
+do
+vol1=$(printf "%s-vol%02d" $V0 $i)
+EXPECT_WITHIN $PROBE_TIMEOUT "on" volinfo_field_1 $vol1 performance.readdir-ahead
+done
+
+cleanup
diff --git a/tests/bugs/glusterd/bug-1720566.t b/tests/bugs/glusterd/bug-1720566.t
new file mode 100644
index 00000000000..99bcf6ff785
--- /dev/null
+++ b/tests/bugs/glusterd/bug-1720566.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+
+
+cleanup;
+V0="TestLongVolnamec363b7b536700ff06eedeae0dd9037fec363b7b536700ff06eedeae0dd9037fec363b7b536700ff06eedeae0dd9abcd"
+V1="TestLongVolname3102bd28a16c49440bd5210e4ec4d5d93102bd28a16c49440bd5210e4ec4d5d933102bd28a16c49440bd5210e4ebbcd"
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+$CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+EXPECT 'Created' cluster_volinfo_field 1 $V0 'Status';
+$CLI_1 volume create $V1 $H1:$B1/$V1 $H2:$B2/$V1
+EXPECT 'Created' cluster_volinfo_field 1 $V1 'Status';
+
+$CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+$CLI_1 volume start $V1
+EXPECT 'Started' cluster_volinfo_field 1 $V1 'Status';
+
+#Mount FUSE
+TEST glusterfs -s $H1 --volfile-id=$V0 $M0;
+
+
+#Mount FUSE
+TEST glusterfs -s $H1 --volfile-id=$V1 $M1;
+
+TEST mkdir $M0/dir{1..4};
+TEST touch $M0/dir{1..4}/files{1..4};
+
+TEST mkdir $M1/dir{1..4};
+TEST touch $M1/dir{1..4}/files{1..4};
+
+TEST $CLI_1 volume add-brick $V0 $H1:$B1/${V0}_1 $H2:$B2/${V0}_1
+TEST $CLI_1 volume add-brick $V1 $H1:$B1/${V1}_1 $H2:$B2/${V1}_1
+
+
+TEST $CLI_1 volume rebalance $V0 start
+TEST $CLI_1 volume rebalance $V1 start
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V1
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-824753-file-locker.c b/tests/bugs/glusterd/bug-824753-file-locker.c
new file mode 100644
index 00000000000..f5dababad30
--- /dev/null
+++ b/tests/bugs/glusterd/bug-824753-file-locker.c
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+int
+main(int argc, char *argv[])
+{
+ int fd = -1;
+ int ret = -1;
+ char command[2048] = "";
+ char filepath[255] = "";
+ struct flock fl;
+
+ fl.l_type = F_WRLCK;
+ fl.l_whence = SEEK_SET;
+ fl.l_start = 7;
+ fl.l_len = 1;
+ fl.l_pid = getpid();
+
+ snprintf(filepath, 255, "%s/%s", argv[4], argv[5]);
+
+ fd = open(filepath, O_RDWR);
+
+ if (fd == -1)
+ return -1;
+
+ if (fcntl(fd, F_SETLKW, &fl) == -1) {
+ return -1;
+ }
+
+ snprintf(command, sizeof(command),
+ "gluster volume clear-locks %s /%s kind all posix 0,7-1 |"
+ " grep %s | awk -F'..: ' '{print $1}' | grep %s:%s/%s",
+ argv[1], argv[5], argv[2], argv[2], argv[3], argv[1]);
+
+ ret = system(command);
+ close(fd);
+
+ if (ret)
+ return -1;
+ else
+ return 0;
+}
diff --git a/tests/bugs/glusterd/bug-824753.t b/tests/bugs/glusterd/bug-824753.t
new file mode 100755
index 00000000000..b969e28f35e
--- /dev/null
+++ b/tests/bugs/glusterd/bug-824753.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+touch $M0/file1;
+
+TEST $CC -g $(dirname $0)/bug-824753-file-locker.c -o $(dirname $0)/file-locker
+
+TEST $(dirname $0)/file-locker $V0 $H0 $B0 $M0 file1
+
+## Finish up
+TEST rm -f $(dirname $0)/file-locker
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-948729/bug-948729-force.t b/tests/bugs/glusterd/bug-948729/bug-948729-force.t
new file mode 100644
index 00000000000..f4f71f9a1e2
--- /dev/null
+++ b/tests/bugs/glusterd/bug-948729/bug-948729-force.t
@@ -0,0 +1,103 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+uuid1=`uuidgen`;
+uuid2=`uuidgen`;
+uuid3=`uuidgen`;
+
+V1=patchy1
+V2=patchy2
+
+TEST launch_cluster 2;
+
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers;
+
+B3=/d/backends/3
+B4=/d/backends/4
+B5=/d/backends/5
+B6=/d/backends/6
+
+mkdir -p $B3 $B4 $B5 $B6
+
+TEST truncate -s 16M $B1/brick1
+TEST truncate -s 16M $B2/brick2
+TEST truncate -s 16M $B3/brick3
+TEST truncate -s 16M $B4/brick4
+TEST truncate -s 16M $B5/brick5
+TEST truncate -s 16M $B6/brick6
+
+TEST LD1=`SETUP_LOOP $B1/brick1`
+TEST MKFS_LOOP $LD1
+TEST LD2=`SETUP_LOOP $B2/brick2`
+TEST MKFS_LOOP $LD2
+TEST LD3=`SETUP_LOOP $B3/brick3`
+TEST MKFS_LOOP $LD3
+TEST LD4=`SETUP_LOOP $B4/brick4`
+TEST MKFS_LOOP $LD4
+TEST LD5=`SETUP_LOOP $B5/brick5`
+TEST MKFS_LOOP $LD5
+TEST LD6=`SETUP_LOOP $B6/brick6`
+TEST MKFS_LOOP $LD6
+
+mkdir -p $B1/$V0 $B2/$V0 $B3/$V0 $B4/$V0 $B5/$V0 $B6/$V0
+
+TEST MOUNT_LOOP $LD1 $B1/$V0
+TEST MOUNT_LOOP $LD2 $B2/$V0
+TEST MOUNT_LOOP $LD3 $B3/$V0
+TEST MOUNT_LOOP $LD4 $B4/$V0
+TEST MOUNT_LOOP $LD5 $B5/$V0
+TEST MOUNT_LOOP $LD6 $B6/$V0
+
+#Case 0: Parent directory of the brick is absent
+TEST ! $CLI1 volume create $V0 $H1:$B1/$V0/nonexistent/b1 $H2:$B2/$V0/nonexistent/b2 force
+
+#Case 1: File system root is being used as brick directory
+TEST $CLI1 volume create $V0 $H1:$B5/$V0 $H2:$B6/$V0 force
+
+#Case 2: Brick directory contains only one component
+TEST $CLI1 volume create $V1 $H1:/$uuid1 $H2:/$uuid2 force
+
+#Case 3: Sub-directories of the backend FS being used as brick directory
+TEST $CLI1 volume create $V2 $H1:$B1/$V0/brick1 $H2:$B2/$V0/brick2 force
+
+#add-brick tests
+TEST ! $CLI1 volume add-brick $V0 $H1:$B3/$V0/nonexistent/brick3 force
+TEST $CLI1 volume add-brick $V0 $H1:$B3/$V0 force
+TEST $CLI1 volume add-brick $V1 $H1:/$uuid3 force
+TEST $CLI1 volume add-brick $V2 $H1:$B4/$V0/brick3 force
+
+#####replace-brick tests
+#FIX-ME: replace-brick does not work with the newly introduced cluster test
+#####framework
+
+rmdir /$uuid1 /$uuid2 /$uuid3;
+
+$CLI volume stop $V0
+$CLI volume stop $V1
+$CLI volume stop $V2
+
+UMOUNT_LOOP $B1/$V0
+UMOUNT_LOOP $B2/$V0
+UMOUNT_LOOP $B3/$V0
+UMOUNT_LOOP $B4/$V0
+UMOUNT_LOOP $B5/$V0
+UMOUNT_LOOP $B6/$V0
+
+rm -f $B1/brick1
+rm -f $B2/brick2
+rm -f $B3/brick3
+rm -f $B4/brick4
+rm -f $B5/brick5
+rm -f $B6/brick6
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-948729/bug-948729-mode-script.t b/tests/bugs/glusterd/bug-948729/bug-948729-mode-script.t
new file mode 100644
index 00000000000..18bf9a1c4b6
--- /dev/null
+++ b/tests/bugs/glusterd/bug-948729/bug-948729-mode-script.t
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+uuid1=`uuidgen`;
+uuid2=`uuidgen`;
+uuid3=`uuidgen`;
+
+TEST launch_cluster 2;
+
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers;
+
+B3=/d/backends/3
+mkdir -p $B3
+
+TEST truncate -s 16M $B1/brick1
+TEST truncate -s 16M $B2/brick2
+TEST truncate -s 16M $B3/brick3
+
+TEST LD1=`SETUP_LOOP $B1/brick1`
+TEST MKFS_LOOP $LD1
+TEST LD2=`SETUP_LOOP $B2/brick2`
+TEST MKFS_LOOP $LD2
+TEST LD3=`SETUP_LOOP $B3/brick3`
+TEST MKFS_LOOP $LD3
+
+mkdir -p $B1/$V0 $B2/$V0 $B3/$V0
+
+TEST MOUNT_LOOP $LD1 $B1/$V0
+TEST MOUNT_LOOP $LD2 $B2/$V0
+TEST MOUNT_LOOP $LD3 $B3/$V0
+
+cli1=$(echo $CLI1 | sed 's/ --wignore//')
+
+#Case 0: Parent directory of the brick is absent
+TEST ! $cli1 volume create $V0 $H1:$B1/$V0/nonexistent/b1 $H2:$B2/$V0/nonexistent/b2
+
+#Case 1: File system root being used as brick directory
+TEST ! $cli1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+
+#Case 2: Brick directory contains only one component
+TEST ! $cli1 volume create $V0 $H1:/$uuid1 $H2:/$uuid2
+
+#Case 3: Sub-directories of the backend FS being used as brick directory
+TEST $cli1 volume create $V0 $H1:$B1/$V0/brick1 $H2:$B2/$V0/brick2
+
+#add-brick tests
+TEST ! $cli1 volume add-brick $V0 $H1:$B3/$V0/nonexistent/brick3
+TEST ! $cli1 volume add-brick $V0 $H1:$B3/$V0
+TEST ! $cli1 volume add-brick $V0 $H1:/$uuid3
+TEST $cli1 volume add-brick $V0 $H1:$B3/$V0/brick3
+
+#####replace-brick tests
+#FIX-ME : replace-brick does not currently work in the newly introduced
+#####cluster test framework
+
+$CLI1 volume stop $V0
+
+UMOUNT_LOOP $B1/$V0
+UMOUNT_LOOP $B2/$V0
+UMOUNT_LOOP $B3/$V0
+
+rm -f $B1/brick1
+rm -f $B2/brick2
+rm -f $B3/brick3
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-948729/bug-948729.t b/tests/bugs/glusterd/bug-948729/bug-948729.t
new file mode 100644
index 00000000000..2b574aa1a14
--- /dev/null
+++ b/tests/bugs/glusterd/bug-948729/bug-948729.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+uuid1=`uuidgen`;
+uuid2=`uuidgen`;
+uuid3=`uuidgen`;
+
+TEST launch_cluster 2;
+
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers;
+
+B3=/d/backends/3
+
+mkdir -p $B3
+
+TEST truncate -s 16M $B1/brick1
+TEST truncate -s 16M $B2/brick2
+TEST truncate -s 16M $B3/brick3
+
+TEST LD1=`SETUP_LOOP $B1/brick1`
+TEST MKFS_LOOP $LD1
+TEST LD2=`SETUP_LOOP $B2/brick2`
+TEST MKFS_LOOP $LD2
+TEST LD3=`SETUP_LOOP $B3/brick3`
+TEST MKFS_LOOP $LD3
+
+mkdir -p $B1/$V0 $B2/$V0 $B3/$V0
+
+TEST MOUNT_LOOP $LD1 $B1/$V0
+TEST MOUNT_LOOP $LD2 $B2/$V0
+TEST MOUNT_LOOP $LD3 $B3/$V0
+
+#Tests without options 'mode=script' and 'wignore'
+cli1=$(echo $CLI1 | sed 's/ --mode=script//')
+cli1=$(echo $cli1 | sed 's/ --wignore//')
+#Case 0: Parent directory of the brick is absent
+TEST ! $cli1 volume create $V0 $H1:$B1/$V0/nonexistent/b1 $H2:$B2/$V0/nonexistent/b2
+
+#Case 1: File system root being used as brick directory
+TEST ! $cli1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+
+#Case 2: Brick directory contains only one component
+TEST ! $cli1 volume create $V0 $H1:/$uuid1 $H2:/$uuid2
+
+#Case 3: Sub-directories of the backend FS being used as brick directory
+TEST $cli1 volume create $V0 $H1:$B1/$V0/brick1 $H2:$B2/$V0/brick2
+
+#add-brick tests
+TEST ! $cli1 volume add-brick $V0 $H1:$B3/$V0/nonexistent/b3
+TEST ! $cli1 volume add-brick $V0 $H1:$B3/$V0
+TEST ! $cli1 volume add-brick $V0 $H1:/$uuid3
+TEST $cli1 volume add-brick $V0 $H1:$B3/$V0/brick3
+
+#####replace-brick tests
+#FIX-ME: Replace-brick does not work currently in the newly introduced cluster
+#####test framework.
+
+$CLI1 volume stop $V0
+
+UMOUNT_LOOP $B1/$V0
+UMOUNT_LOOP $B2/$V0
+UMOUNT_LOOP $B3/$V0
+
+rm -f $B1/brick1
+rm -f $B2/brick2
+rm -f $B3/brick3
+
+
+cleanup;
diff --git a/tests/bugs/glusterd/bug-949930.t b/tests/bugs/glusterd/bug-949930.t
new file mode 100644
index 00000000000..9a6d38fa37f
--- /dev/null
+++ b/tests/bugs/glusterd/bug-949930.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+V1=patchy2
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume start $V0;
+
+TEST $CLI volume create $V1 $H0:$B0/${V1}{1,2};
+TEST $CLI volume set $V1 nfs.disable off
+TEST $CLI volume start $V1;
+
+TEST ! $CLI volume set $V0 performance.nfs.read-ahead blah
+EXPECT '' volume_option $V0 performance.nfs.read-ahead
+
+TEST $CLI volume set $V0 performance.nfs.read-ahead on
+EXPECT "on" volume_option $V0 performance.nfs.read-ahead
+
+EXPECT '' volume_option $V1 performance.nfs.read-ahead
+
+cleanup;
+
diff --git a/tests/bugs/glusterd/check_elastic_server.t b/tests/bugs/glusterd/check_elastic_server.t
new file mode 100644
index 00000000000..41d2140aa2b
--- /dev/null
+++ b/tests/bugs/glusterd/check_elastic_server.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+
+function cluster_rebalance_status {
+ local vol=$1
+ $CLI_2 volume status | grep -iw "Rebalance" -A 5 | grep "Status" | sed 's/.*: //'
+}
+
+cleanup;
+TEST launch_cluster 4;
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+TEST $CLI_1 peer probe $H4;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 3 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+EXPECT 'Created' cluster_volinfo_field 1 $V0 'Status';
+
+$CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+#Mount invalid volume
+TEST ! glusterfs -s $H1 --volfile-id=$V0_NA $M0;
+
+#Mount FUSE
+TEST glusterfs -s $H1 --volfile-id=$V0 $M0;
+
+TEST mkdir $M0/dir{1..4};
+TEST touch $M0/dir{1..4}/files{1..4};
+
+TEST $CLI_1 volume remove-brick $V0 $H1:$B1/$V0 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_remove_brick_status_completed_field "$V0 $H1:$B1/$V0"
+
+TEST $CLI_1 volume remove-brick $V0 $H1:$B1/$V0 commit
+
+kill_glusterd 1
+
+total_files=`find $M0 -name "files*" | wc -l`
+TEST [ $total_files -eq 16 ];
+
+TEST $CLI_2 volume add-brick $V0 $H3:$B3/$V0
+
+TEST $CLI_2 volume rebalance $V0 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status $V0
+
+total_files=`find $M0 -name "files*" | wc -l`
+TEST [ $total_files -eq 16 ];
+
+TEST $CLI_2 volume add-brick $V0 $H4:$B4/$V0
+
+TEST $CLI_2 volume rebalance $V0 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status $V0
+kill_glusterd 2
+
+total_files=`find $M0 -name "files*" | wc -l`
+TEST [ $total_files -eq 16 ];
+
+cleanup;
+
diff --git a/tests/bugs/glusterd/daemon-log-level-option.t b/tests/bugs/glusterd/daemon-log-level-option.t
new file mode 100644
index 00000000000..66e55e3d758
--- /dev/null
+++ b/tests/bugs/glusterd/daemon-log-level-option.t
@@ -0,0 +1,93 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+function Info_messages_count() {
+ local shd_log=$1
+ cat $shd_log | grep " I " | wc -l
+}
+
+function Warning_messages_count() {
+ local shd_log=$1
+ cat $shd_log | grep " W " | wc -l
+}
+
+function Debug_messages_count() {
+ local shd_log=$1
+ cat $shd_log | grep " D " | wc -l
+}
+
+function Trace_messages_count() {
+ local shd_log=$1
+ cat $shd_log | grep " T " | wc -l
+}
+
+cleanup;
+
+# Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+# set cluster.daemon-log-level option to DEBUG
+TEST $CLI volume set all cluster.daemon-log-level DEBUG
+
+#Create a 3X2 distributed-replicate volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..6};
+TEST $CLI volume start $V0
+
+# log should not have any trace messages
+EXPECT 0 Trace_messages_count "/var/log/glusterfs/glustershd.log"
+
+# stop the volume and remove glustershd log
+TEST $CLI volume stop $V0
+rm -f /var/log/glusterfs/glustershd.log
+
+# set cluster.daemon-log-level option to INFO and start the volume
+TEST $CLI volume set all cluster.daemon-log-level INFO
+TEST $CLI volume start $V0
+
+# log should not have any debug messages
+EXPECT 0 Debug_messages_count "/var/log/glusterfs/glustershd.log"
+
+# log should not have any trace messages
+EXPECT 0 Trace_messages_count "/var/log/glusterfs/glustershd.log"
+
+# stop the volume and remove glustershd log
+TEST $CLI volume stop $V0
+rm -f /var/log/glusterfs/glustershd.log
+
+# set cluster.daemon-log-level option to WARNING and start the volume
+TEST $CLI volume set all cluster.daemon-log-level WARNING
+TEST $CLI volume start $V0
+
+# log should not have any info messages
+EXPECT 0 Info_messages_count "/var/log/glusterfs/glustershd.log"
+
+# log should not have any debug messages
+EXPECT 0 Debug_messages_count "/var/log/glusterfs/glustershd.log"
+
+# log should not have any trace messages
+EXPECT 0 Trace_messages_count "/var/log/glusterfs/glustershd.log"
+
+# stop the volume and remove glustershd log
+TEST $CLI volume stop $V0
+rm -f /var/log/glusterfs/glustershd.log
+
+# set cluster.daemon-log-level option to ERROR and start the volume
+TEST $CLI volume set all cluster.daemon-log-level ERROR
+TEST $CLI volume start $V0
+
+# log should not have any info messages
+EXPECT 0 Info_messages_count "/var/log/glusterfs/glustershd.log"
+
+# log should not have any warning messages
+EXPECT 0 Warning_messages_count "/var/log/glusterfs/glustershd.log"
+
+# log should not have any debug messages
+EXPECT 0 Debug_messages_count "/var/log/glusterfs/glustershd.log"
+
+# log should not have any trace messages
+EXPECT 0 Trace_messages_count "/var/log/glusterfs/glustershd.log"
+
+cleanup
diff --git a/tests/bugs/glusterd/df-results-post-replace-brick-operations.t b/tests/bugs/glusterd/df-results-post-replace-brick-operations.t
new file mode 100644
index 00000000000..04f75889388
--- /dev/null
+++ b/tests/bugs/glusterd/df-results-post-replace-brick-operations.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+TEST glusterd
+
+#Create brick partitions
+TEST truncate -s 100M $B0/brick1
+TEST truncate -s 100M $B0/brick2
+TEST truncate -s 100M $B0/brick3
+TEST truncate -s 100M $B0/brick4
+TEST truncate -s 100M $B0/brick5
+
+LO1=`SETUP_LOOP $B0/brick1`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO1
+
+LO2=`SETUP_LOOP $B0/brick2`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO2
+
+LO3=`SETUP_LOOP $B0/brick3`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO3
+
+LO4=`SETUP_LOOP $B0/brick4`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO4
+
+LO5=`SETUP_LOOP $B0/brick5`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO5
+
+TEST mkdir -p $B0/${V0}1 $B0/${V0}2 $B0/${V0}3 $B0/${V0}4 $B0/${V0}5
+TEST MOUNT_LOOP $LO1 $B0/${V0}1
+TEST MOUNT_LOOP $LO2 $B0/${V0}2
+TEST MOUNT_LOOP $LO3 $B0/${V0}3
+TEST MOUNT_LOOP $LO4 $B0/${V0}4
+TEST MOUNT_LOOP $LO5 $B0/${V0}5
+
+# create a subdirectory in mount point and use it for volume creation
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}1/brick1 $H0:$B0/${V0}2/brick1 $H0:$B0/${V0}3/brick1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" online_brick_count
+
+# mount the volume and check the size at mount point
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+total_space=$(df -P $M0 | tail -1 | awk '{ print $2}')
+
+# perform replace brick operations
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1/brick1 $H0:$B0/${V0}4/brick1 commit force
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}2/brick1 $H0:$B0/${V0}5/brick1 commit force
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+# check for the size at mount point, it should be same as previous
+total_space_new=$(df -P $M0 | tail -1 | awk '{ print $2}')
+TEST [ $total_space -eq $total_space_new ]
diff --git a/tests/bugs/glusterd/mgmt-handshake-and-volume-sync-post-glusterd-restart.t b/tests/bugs/glusterd/mgmt-handshake-and-volume-sync-post-glusterd-restart.t
new file mode 100644
index 00000000000..8001359e6b3
--- /dev/null
+++ b/tests/bugs/glusterd/mgmt-handshake-and-volume-sync-post-glusterd-restart.t
@@ -0,0 +1,71 @@
+#! /bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_peers {
+eval \$CLI_$1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup
+
+TEST launch_cluster 3
+
+TEST $CLI_1 peer probe $H2
+
+#bug-1109741 - validate mgmt handshake
+
+TEST ! $CLI_3 peer probe $H1
+
+GD1_WD=$($CLI_1 system getwd)
+OP_VERS_ORIG=$(grep 'operating-version' ${GD1_WD}/glusterd.info | cut -d '=' -f 2)
+
+TEST $CLI_3 system uuid get # Needed for glusterd.info to be created
+
+GD3_WD=$($CLI_3 system getwd)
+TEST sed -rnie "'s/(operating-version=)\w+/\130600/gip'" ${GD3_WD}/glusterd.info
+
+TEST kill_glusterd 3
+TEST start_glusterd 3
+
+TEST ! $CLI_3 peer probe $H1
+
+OP_VERS_NEW=$(grep 'operating-version' ${GD1_WD}/glusterd.info | cut -d '=' -f 2)
+TEST [[ $OP_VERS_ORIG == $OP_VERS_NEW ]]
+
+#bug-948686 - volume sync after bringing up the killed node
+
+TEST $CLI_1 peer probe $H3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 1
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 2
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 3
+
+TEST $CLI_1 volume create $V0 replica 2 $H1:$B1/$V0 $H1:$B1/${V0}_1 $H2:$B2/$V0 $H3:$B3/$V0
+TEST $CLI_1 volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field_1 $V0 'Status'
+TEST glusterfs --volfile-server=$H1 --volfile-id=$V0 $M0
+
+#kill a node
+TEST kill_node 3
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers 1
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers 2
+
+#modify volume config to see change in volume-sync
+TEST $CLI_1 volume set $V0 write-behind off
+#add some files to the volume to see effect of volume-heal cmd
+TEST touch $M0/{1..100};
+TEST $CLI_1 volume stop $V0;
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 'Stopped' volinfo_field_1 $V0 'Status'
+
+TEST $glusterd_3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 1
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 2
+EXPECT_WITHIN $PROBE_TIMEOUT 2 check_peers 3
+
+sleep 5
+TEST $CLI_3 volume start $V0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field_1 $V0 'Status'
+TEST $CLI_2 volume stop $V0;
+TEST $CLI_2 volume delete $V0;
+
+cleanup
diff --git a/tests/bugs/glusterd/optimized-basic-testcases-in-cluster.t b/tests/bugs/glusterd/optimized-basic-testcases-in-cluster.t
new file mode 100644
index 00000000000..99272e14245
--- /dev/null
+++ b/tests/bugs/glusterd/optimized-basic-testcases-in-cluster.t
@@ -0,0 +1,115 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+
+function peer_count {
+eval \$CLI_$1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+#bug-1454418 - Setting Port number in specific range
+sysctl net.ipv4.ip_local_reserved_ports="24007-24008,32765-32768,49152-49156"
+
+TEST launch_cluster 4;
+
+#bug-1223213
+
+# Fool the cluster to operate with 3.5 version even though binary's op-version
+# is > 3.5. This is to ensure 3.5 code path is hit to test that volume status
+# works when a node is upgraded from 3.5 to 3.7 or higher as mgmt_v3 lock is
+# been introduced in 3.6 version and onwards
+
+GD1_WD=$($CLI_1 system getwd)
+$CLI_1 system uuid get
+Old_op_version=$(cat ${GD1_WD}/glusterd.info | grep operating-version | cut -d '=' -f 2)
+
+TEST sed -rnie "'s/(operating-version=)\w+/\130500/gip'" ${GD1_WD}/glusterd.info
+
+TEST kill_glusterd 1
+TEST start_glusterd 1
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1
+
+TEST `sed -i "s/"30500"/${Old_op_version}/g" ${GD1_WD}/glusterd.info`
+
+TEST kill_glusterd 1
+TEST start_glusterd 1
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 2
+
+#bug-1454418
+sysctl net.ipv4.ip_local_reserved_ports="
+"
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+TEST $CLI_1 volume start $V0
+
+#bug-888752 - volume status --xml from peer in the cluster
+
+TEST $CLI_1 volume status $V0 $H2:$B2/$V0 --xml
+
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume delete $V0
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+TEST $CLI_1 volume create $V1 $H1:$B1/$V1
+
+# bug - 1635820
+# rebooting a node which doen't host bricks for any one volume
+# peer should not go into rejected state
+TEST kill_glusterd 2
+TEST start_glusterd 2
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 2
+
+TEST $CLI_1 volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field_1 $V0 'Status'
+
+TEST $CLI_1 volume start $V1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field_1 $V1 'Status'
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1
+
+TEST $CLI_1 peer probe $H4;
+EXPECT_WITHIN $PROBE_TIMEOUT 3 peer_count 1
+
+#bug-1173414 - validate mgmt-v3-remote-lock-failure
+
+for i in {1..20}
+do
+$CLI_1 volume set $V0 diagnostics.client-log-level DEBUG &
+$CLI_1 volume set $V1 barrier on
+$CLI_2 volume set $V0 diagnostics.client-log-level DEBUG &
+$CLI_2 volume set $V1 barrier on
+done
+
+EXPECT_WITHIN $PROBE_TIMEOUT 3 peer_count 1
+TEST $CLI_1 volume status
+TEST $CLI_2 volume status
+
+#bug-1293414 - validate peer detach
+
+# peers hosting bricks cannot be detached
+TEST ! $CLI_4 peer detach $H1
+EXPECT_WITHIN $PROBE_TIMEOUT 3 peer_count 1
+
+# peer not hosting bricks should be detachable
+TEST $CLI_4 peer detach $H3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1
+
+#bug-1344407 - deleting a volume when peer is down should fail
+
+#volume should be stopped before deletion
+TEST $CLI_1 volume stop $V0
+
+TEST kill_glusterd 2
+TEST ! $CLI_1 volume delete $V0
+
+cleanup
diff --git a/tests/bugs/glusterd/optimized-basic-testcases.t b/tests/bugs/glusterd/optimized-basic-testcases.t
new file mode 100644
index 00000000000..b89ca22415e
--- /dev/null
+++ b/tests/bugs/glusterd/optimized-basic-testcases.t
@@ -0,0 +1,305 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+function get_opret_value () {
+ local VOL=$1
+ $CLI volume info $VOL --xml | sed -ne 's/.*<opRet>\([-0-9]*\)<\/opRet>/\1/p'
+}
+
+function check_brick()
+{
+ vol=$1;
+ num=$2
+ $CLI volume info $V0 | grep "Brick$num" | awk '{print $2}';
+}
+
+function brick_count()
+{
+ local vol=$1;
+
+ $CLI volume info $vol | egrep "^Brick[0-9]+: " | wc -l;
+}
+
+function get_brick_host_uuid()
+{
+ local vol=$1;
+ local uuid_regex='[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}'
+ local host_uuid_list=$($CLI volume info $vol --xml | grep "brick.uuid" | grep -o -E "$uuid_regex");
+
+ echo $host_uuid_list | awk '{print $1}'
+}
+
+function generate_statedump_and_check_for_glusterd_info {
+ pid=`pidof glusterd`
+ #remove old stale statedumps
+ cleanup_statedump $pid
+ kill -USR1 $pid
+ #Wait till the statedump is generated
+ sleep 1
+ fname=$(ls $statedumpdir | grep -E "\.$pid\.dump\.")
+ cat $statedumpdir/$fname | grep "xlator.glusterd.priv" | wc -l
+}
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+#bug-1238135-lazy-daemon-initialization-on-demand
+
+GDWD=$($CLI system getwd)
+
+# glusterd.info file will be created on either first peer probe or volume
+# creation, hence we expect file to be not present in this case
+TEST ! -e $GDWD/glusterd.info
+
+#bug-913487 - setting volume options before creation of volume should fail
+
+TEST ! $CLI volume set $V0 performance.open-behind off;
+TEST pidof glusterd;
+
+#bug-1433578 - glusterd should not crash after probing a invalid peer
+
+TEST ! $CLI peer probe invalid-peer
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+#bug-1786478 - default volume option after volume reset
+addr_family=`volinfo_field $V0 'transport.address-family'`
+TEST $CLI volume reset $V0
+EXPECT $addr_family volinfo_field $V0 'transport.address-family'
+
+#bug-955588 - uuid validation
+
+uuid=`grep UUID $GLUSTERD_WORKDIR/glusterd.info | cut -f2 -d=`
+EXPECT $uuid get_brick_host_uuid $V0
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+#bug-958790 - set options from file
+
+touch $GLUSTERD_WORKDIR/groups/test
+echo "read-ahead=off" > $GLUSTERD_WORKDIR/groups/test
+echo "open-behind=off" >> $GLUSTERD_WORKDIR/groups/test
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 group test
+EXPECT "off" volume_option $V0 performance.read-ahead
+EXPECT "off" volume_option $V0 performance.open-behind
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+#bug-1321836 - validate opret value for non existing volume
+
+EXPECT 0 get_opret_value $V0
+EXPECT -1 get_opret_value "novol"
+
+EXPECT '2' brick_count $V0
+
+#bug-862834 - validate brick status
+
+EXPECT "$H0:$B0/${V0}1" check_brick $V0 '1';
+EXPECT "$H0:$B0/${V0}2" check_brick $V0 '2';
+
+TEST ! $CLI volume create $V1 $H0:$B0/${V1}0 $H0:$B0/${V0}1;
+
+#bug-1482344 - setting volume-option-at-cluster-level should not result in glusterd crash
+
+TEST ! $CLI volume set all transport.listen-backlog 128
+
+# Check the volume info output, if glusterd would have crashed then this command
+# will fail
+TEST $CLI volume info $V0;
+
+#bug-1002556 and bug-1199451 - command should retrieve current op-version of the node
+TEST $CLI volume get all cluster.op-version
+
+#bug-1315186 - reject-lowering-down-op-version
+
+OP_VERS_ORIG=$(grep 'operating-version' ${GDWD}/glusterd.info | cut -d '=' -f 2)
+OP_VERS_NEW=`expr $OP_VERS_ORIG-1`
+
+TEST ! $CLI volume set all $V0 cluster.op-version $OP_VERS_NEW
+
+#bug-1022055 - validate log rotate command
+
+TEST ! $CLI volume log rotate $V0;
+TEST $CLI volume log $V0 rotate;
+
+#bug-1092841 - validating barrier enable/disable
+
+TEST $CLI volume barrier $V0 enable;
+TEST ! $CLI volume barrier $V0 enable;
+
+TEST $CLI volume barrier $V0 disable;
+TEST ! $CLI volume barrier $V0 disable;
+
+#bug-1095097 - validate volume profile command
+
+TEST $CLI volume profile $V0 start
+TEST $CLI volume profile $V0 info
+
+#bug-839595 - validate server-quorum options
+
+TEST $CLI volume set $V0 cluster.server-quorum-type server
+EXPECT "server" volume_option $V0 cluster.server-quorum-type
+TEST $CLI volume set $V0 cluster.server-quorum-type none
+EXPECT "none" volume_option $V0 cluster.server-quorum-type
+TEST $CLI volume reset $V0 cluster.server-quorum-type
+TEST ! $CLI volume set $V0 cluster.server-quorum-type abc
+TEST ! $CLI volume set all cluster.server-quorum-type none
+TEST ! $CLI volume set $V0 cluster.server-quorum-ratio 100
+
+TEST ! $CLI volume set all cluster.server-quorum-ratio abc
+TEST ! $CLI volume set all cluster.server-quorum-ratio -1
+TEST ! $CLI volume set all cluster.server-quorum-ratio 100.0000005
+TEST $CLI volume set all cluster.server-quorum-ratio 0
+EXPECT "0" volume_option $V0 cluster.server-quorum-ratio
+TEST $CLI volume set all cluster.server-quorum-ratio 100
+EXPECT "100" volume_option $V0 cluster.server-quorum-ratio
+TEST $CLI volume set all cluster.server-quorum-ratio 0.0000005
+EXPECT "0.0000005" volume_option $V0 cluster.server-quorum-ratio
+TEST $CLI volume set all cluster.server-quorum-ratio 100%
+EXPECT "100%" volume_option $V0 cluster.server-quorum-ratio
+
+#bug-1265479 - validate-distributed-volume-options
+
+#Setting data-self-heal option on for distribute volume
+TEST ! $CLI volume set $V0 data-self-heal on
+EXPECT '' volinfo_field $V0 'cluster.data-self-heal';
+TEST ! $CLI volume set $V0 cluster.data-self-heal on
+EXPECT '' volinfo_field $V0 'cluster.data-self-heal';
+
+#Setting metadata-self-heal option on for distribute volume
+TEST ! $CLI volume set $V0 metadata-self-heal on
+EXPECT '' volinfo_field $V0 'cluster.metadata-self-heal';
+TEST ! $CLI volume set $V0 cluster.metadata-self-heal on
+EXPECT '' volinfo_field $V0 'cluster.metadata-self-heal';
+
+#Setting entry-self-heal option on for distribute volume
+TEST ! $CLI volume set $V0 entry-self-heal on
+EXPECT '' volinfo_field $V0 'cluster.entrydata-self-heal';
+TEST ! $CLI volume set $V0 cluster.entry-self-heal on
+EXPECT '' volinfo_field $V0 'cluster.entrydata-self-heal';
+
+#bug-1163108 - validate min-free-disk-option
+
+## Setting invalid value for option cluster.min-free-disk should fail
+TEST ! $CLI volume set $V0 min-free-disk ""
+TEST ! $CLI volume set $V0 min-free-disk 143.!/12
+TEST ! $CLI volume set $V0 min-free-disk 123%
+TEST ! $CLI volume set $V0 min-free-disk 194.34%
+
+## Setting fractional value as a size (unit is byte) for option
+## cluster.min-free-disk should fail
+TEST ! $CLI volume set $V0 min-free-disk 199.051
+TEST ! $CLI volume set $V0 min-free-disk 111.999
+
+## Setting valid value for option cluster.min-free-disk should pass
+TEST $CLI volume set $V0 min-free-disk 12%
+TEST $CLI volume set $V0 min-free-disk 56.7%
+TEST $CLI volume set $V0 min-free-disk 120
+TEST $CLI volume set $V0 min-free-disk 369.0000
+
+#bug-1179175-uss-option-validation
+
+## Set features.uss option with non-boolean value. These non-boolean value
+## for features.uss option should fail.
+TEST ! $CLI volume set $V0 features.uss abcd
+TEST ! $CLI volume set $V0 features.uss #$#$
+TEST ! $CLI volume set $V0 features.uss 2324
+
+## Setting other options with valid value. These options should succeed.
+TEST $CLI volume set $V0 barrier enable
+TEST $CLI volume set $V0 ping-timeout 60
+
+## Set features.uss option with valid boolean value. It should succeed.
+TEST $CLI volume set $V0 features.uss enable
+TEST $CLI volume set $V0 features.uss disable
+
+
+## Setting other options with valid value. These options should succeed.
+TEST $CLI volume set $V0 barrier enable
+TEST $CLI volume set $V0 ping-timeout 60
+
+#bug-1209329 - daemon-svcs-on-reset-volume
+
+##enable the bitrot and verify bitd is running or not
+TEST $CLI volume bitrot $V0 enable
+EXPECT 'on' volinfo_field $V0 'features.bitrot'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_bitd_count
+
+##Do reset force which set the bitrot options to default
+TEST $CLI volume reset $V0 force;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" get_bitd_count
+
+##enable the uss option and verify snapd is running or not
+TEST $CLI volume set $V0 features.uss on
+EXPECT 'on' volinfo_field $V0 'features.uss'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_snapd_count
+
+##Do reset force which set the uss options to default
+TEST $CLI volume reset $V0 force;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" get_snapd_count
+
+##verify initial nfs disabled by default
+EXPECT "0" get_nfs_count
+
+##enable nfs and verify
+TEST $CLI volume set $V0 nfs.disable off
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+EXPECT "1" get_nfs_count
+
+##Do reset force which set the nfs.option to default
+TEST $CLI volume reset $V0 force;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" get_nfs_count
+
+##enable the uss option and verify snapd is running or not
+TEST $CLI volume set $V0 features.uss on
+EXPECT 'on' volinfo_field $V0 'features.uss'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_snapd_count
+
+##Disable the uss option using set command and verify snapd
+TEST $CLI volume set $V0 features.uss off
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" get_snapd_count
+
+##enable nfs.disable and verify
+TEST $CLI volume set $V0 nfs.disable on
+EXPECT 'on' volinfo_field $V0 'nfs.disable'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" get_nfs_count
+
+## disable nfs.disable option using set command
+TEST $CLI volume set $V0 nfs.disable off
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" get_nfs_count
+
+TEST $CLI volume info;
+TEST $CLI volume create $V1 $H0:$B0/${V1}1
+TEST $CLI volume start $V1
+pkill glusterd;
+pkill glusterfsd;
+TEST glusterd
+TEST $CLI volume status $V1
+
+#bug-853601 - Avoid using /var/lib/glusterd as a brick
+TEST ! $CLI volume create "test" $H0:/var/lib/glusterd
+TEST ! $CLI volume create "test" $H0:/var/lib/glusterd force
+TEST ! $CLI volume create "test" $H0:/var/lib/glusterd/abc
+TEST ! $CLI volume create "test" $H0:/var/lib/glusterd/abc force
+mkdir -p /xyz/var/lib/glusterd/abc
+
+#bug 1716812 - volfile should be created with transport type both
+TEST $CLI volume create "test" transport tcp,rdma $H0:/xyz/var/lib/glusterd/abc
+EXPECT 'Created' volinfo_field "test" 'Status';
+
+#While taking a statedump, there is a TRY_LOCK on call_frame, which might may cause
+#failure. So Adding a EXPECT_WITHIN
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" generate_statedump_and_check_for_glusterd_info
+
+cleanup_statedump `pidof glusterd`
+cleanup
diff --git a/tests/bugs/glusterd/quorum-validation.t b/tests/bugs/glusterd/quorum-validation.t
new file mode 100644
index 00000000000..3cc3351b43b
--- /dev/null
+++ b/tests/bugs/glusterd/quorum-validation.t
@@ -0,0 +1,122 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+TEST launch_cluster 2
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$B1/${V0}0 $H2:$B2/${V0}1
+TEST $CLI_1 volume set $V0 cluster.server-quorum-type server
+TEST $CLI_1 volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}1
+
+#bug-1177132 - sync server quorum options when a node is brought up
+TEST $CLI_1 volume set all cluster.server-quorum-ratio 52
+
+#Bring down 2nd glusterd
+TEST kill_glusterd 2
+EXPECT_WITHIN $PROBE_TIMEOUT 0 peer_count
+
+#bug-1104642 - sync server quorum options when a node is brought up
+#set the volume all options from the 1st glusterd
+TEST $CLI_1 volume set all cluster.server-quorum-ratio 80
+
+# Now quorum is not meet. Add-brick, Remove-brick, volume-set command
+#(Command based on syncop framework)should fail
+TEST ! $CLI_1 volume add-brick $V0 $H1:$B1/${V0}2
+TEST ! $CLI_1 volume remove-brick $V0 $H1:$B1/${V0}0 start
+TEST ! $CLI_1 volume set $V0 barrier enable
+
+#quorum is not met, rebalance/profile start should fail
+TEST ! $CLI_1 volume rebalance $V0 start
+TEST ! $CLI_1 volume profile $V0 start
+
+#bug-1690753 - Volume stop when quorum not met is successful
+TEST ! $CLI_1 volume stop $V0
+
+#Bring back the 2nd glusterd
+TEST $glusterd_2
+
+#verify whether the value has been synced
+EXPECT_WITHIN $PROBE_TIMEOUT "80" volinfo_field_1 all cluster.server-quorum-ratio
+EXPECT_WITHIN $PROBE_TIMEOUT '1' peer_count
+EXPECT_WITHIN $PROBE_TIMEOUT "80" volinfo_field_2 all cluster.server-quorum-ratio
+
+# Now quorum is meet.
+# Add-brick, Remove-brick, volume-set command should success
+TEST $CLI_1 volume add-brick $V0 $H2:$B2/${V0}2
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0}2 start
+TEST $CLI_1 volume set $V0 barrier enable
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0}2 stop
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}1
+
+## Stop the volume
+TEST $CLI_1 volume stop $V0
+
+## Bring down 2nd glusterd
+TEST kill_glusterd 2
+
+## Now quorum is not meet. Starting volume on 1st node should not success
+TEST ! $CLI_1 volume start $V0
+
+## Bring back 2nd glusterd
+TEST $glusterd_2
+
+# After 2nd glusterd come back, there will be 2 nodes in a cluster
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count;
+
+## Now quorum is meet. Starting volume on 1st node should be success.
+TEST $CLI_1 volume start $V0
+
+# Now re-execute the same profile command and this time it should succeed
+TEST $CLI_1 volume profile $V0 start
+
+#bug-1352277
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}1
+
+TEST $CLI_1 volume set $V0 cluster.server-quorum-type none
+
+# Bring down all the gluster processes
+TEST killall_gluster
+
+#bring back 1st glusterd and check whether the brick process comes back
+TEST $glusterd_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0
+
+#enabling quorum should bring down the brick
+TEST $CLI_1 volume set $V0 cluster.server-quorum-type server
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0
+
+TEST $glusterd_2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}1
+
+#bug-1367478 - brick processes should not be up when quorum is not met
+TEST $CLI_1 volume create $V1 $H1:$B1/${V1}1 $H2:$B2/${V1}2
+TEST $CLI_1 volume start $V1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V1 $H1 $B1/${V1}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V1 $H2 $B2/${V1}2
+
+# Restart 2nd glusterd
+TEST kill_glusterd 2
+TEST $glusterd_2
+
+# Check if all bricks are up
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V1 $H1 $B1/${V1}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V1 $H2 $B2/${V1}2
+
+cleanup
diff --git a/tests/bugs/glusterd/rebalance-in-cluster.t b/tests/bugs/glusterd/rebalance-in-cluster.t
new file mode 100644
index 00000000000..469ec6cd48e
--- /dev/null
+++ b/tests/bugs/glusterd/rebalance-in-cluster.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+
+function rebalance_status_field_1 {
+ $CLI_1 volume rebalance $1 status | awk '{print $7}' | sed -n 3p
+}
+
+cleanup;
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+$CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+EXPECT 'Created' cluster_volinfo_field 1 $V0 'Status';
+
+$CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+#bug-1231437
+
+#Mount FUSE
+TEST glusterfs -s $H1 --volfile-id=$V0 $M0;
+
+TEST mkdir $M0/dir{1..4};
+TEST touch $M0/dir{1..4}/files{1..4};
+
+TEST $CLI_1 volume add-brick $V0 $H1:$B1/${V0}1 $H2:$B2/${V0}1
+
+TEST $CLI_1 volume rebalance $V0 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
+
+#bug - 1764119 - rebalance status should display detailed info when any of the node is dowm
+TEST kill_glusterd 2
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field_1 $V0
+
+TEST start_glusterd 2
+#bug-1245142
+
+$CLI_1 volume rebalance $V0 start &
+#kill glusterd2 after requst sent, so that call back is called
+#with rpc->status fail ,so roughly 1sec delay is introduced to get this scenario.
+sleep 1
+kill_glusterd 2
+#check glusterd commands are working after rebalance start command
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+cleanup;
+
diff --git a/tests/bugs/glusterd/rebalance-operations-in-single-node.t b/tests/bugs/glusterd/rebalance-operations-in-single-node.t
new file mode 100644
index 00000000000..ef85887f440
--- /dev/null
+++ b/tests/bugs/glusterd/rebalance-operations-in-single-node.t
@@ -0,0 +1,131 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function get_rebalanced_info()
+{
+ local rebal_info_key=$2
+ $CLI volume rebalance $1 status | awk '{print $'$rebal_info_key'}' |sed -n 3p| sed 's/ *$//g'
+}
+
+volname="StartMigrationDuringRebalanceTest"
+TEST glusterd
+TEST pidof glusterd;
+
+TEST $CLI volume info;
+TEST $CLI volume create $volname $H0:$B0/${volname}{1..4};
+TEST $CLI volume start $volname;
+
+#bug-1046308 - validate rebalance on a specified volume name
+TEST $CLI volume rebalance $volname start;
+
+#bug-1089668 - validation of rebalance status and remove brick status
+#bug-963541 - after remove brick start rebalance/remove brick start without commiting should fail
+
+TEST ! $CLI volume remove-brick $volname $H0:$B0/${volname}1 status
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $volname
+
+TEST $CLI volume remove-brick $volname $H0:$B0/${volname}1 start
+TEST ! $CLI volume rebalance $volname start
+TEST ! $CLI volume rebalance $volname status
+TEST ! $CLI volume remove-brick $volname $H0:$B0/${volname}2 start
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field \
+"$volname" "$H0:$B0/${volname}1"
+TEST $CLI volume remove-brick $volname $H0:$B0/${volname}1 commit
+
+TEST $CLI volume rebalance $volname start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $volname
+TEST $CLI volume rebalance $volname stop
+
+TEST $CLI volume remove-brick $volname $H0:$B0/${volname}2 start
+TEST $CLI volume remove-brick $volname $H0:$B0/${volname}2 stop
+
+#bug-1351021-rebalance-info-post-glusterd-restart
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..3};
+TEST $CLI volume start $V0;
+
+#Mount volume and create data
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+TEST mkdir $M0/dir{1..10}
+TEST touch $M0/dir{1..10}/file{1..10}
+
+# Add-brick and start rebalance
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}4
+TEST $CLI volume rebalance $V0 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+#Rebalance info before glusterd restart
+OLD_REBAL_FILES=$(get_rebalanced_info $V0 2)
+OLD_SIZE=$(get_rebalanced_info $V0 3)
+OLD_SCANNED=$(get_rebalanced_info $V0 4)
+OLD_FAILURES=$(get_rebalanced_info $V0 5)
+OLD_SKIPPED=$(get_rebalanced_info $V0 6)
+
+
+pkill glusterd;
+pkill glusterfsd;
+TEST glusterd
+
+#Rebalance info after glusterd restart
+NEW_REBAL_FILES=$(get_rebalanced_info $V0 2)
+NEW_SIZE=$(get_rebalanced_info $V0 3)
+NEW_SCANNED=$(get_rebalanced_info $V0 4)
+NEW_FAILURES=$(get_rebalanced_info $V0 5)
+NEW_SKIPPED=$(get_rebalanced_info $V0 6)
+#Check rebalance info before and after glusterd restart
+TEST [ $OLD_REBAL_FILES == $NEW_REBAL_FILES ]
+TEST [ $OLD_SIZE == $NEW_SIZE ]
+TEST [ $OLD_SCANNED == $NEW_SCANNED ]
+TEST [ $OLD_FAILURES == $NEW_FAILURES ]
+TEST [ $OLD_SKIPPED == $NEW_SKIPPED ]
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#bug-1004744 - validation of rebalance fix layout
+
+TEST $CLI volume start $V0 force
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+for i in `seq 11 20`;
+do
+ mkdir $M0/dir_$i
+ echo file>$M0/dir_$i/file_$i
+ for j in `seq 1 100`;
+ do
+ mkdir $M0/dir_$i/dir_$j
+ echo file>$M0/dir_$i/dir_$j/file_$j
+ done
+done
+
+#add 2 bricks
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{5,6};
+
+#perform rebalance fix-layout
+TEST $CLI volume rebalance $V0 fix-layout start
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "fix-layout completed" fix-layout_status_field $V0;
+
+#bug-1075087 - rebalance post add brick
+TEST mkdir $M0/dir{21..30};
+TEST touch $M0/dir{21..30}/files{1..10};
+
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{7,8}
+
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN 180 "completed" rebalance_status_field $V0
+
+TEST pkill gluster
+TEST glusterd
+TEST pidof glusterd
+
+# status should be "completed" immediate after glusterd has respawned.
+EXPECT_WITHIN 20 "completed" rebalance_status_field $V0
+
+cleanup
diff --git a/tests/bugs/glusterd/remove-brick-in-cluster.t b/tests/bugs/glusterd/remove-brick-in-cluster.t
new file mode 100644
index 00000000000..de94220a906
--- /dev/null
+++ b/tests/bugs/glusterd/remove-brick-in-cluster.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+TEST launch_cluster 2;
+
+#bug-1047955 - remove brick from new peer in cluster
+TEST $CLI_1 volume create $V0 replica 2 $H1:$B1/${V0}{1,2,3,4}
+TEST $CLI_1 volume start $V0;
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_2 volume remove-brick $V0 $H1:$B1/${V0}{3,4} start;
+TEST $CLI_2 volume info
+
+#bug-964059 - volume status post remove brick start
+TEST $CLI_1 volume create $V1 $H1:$B1/${V1}0 $H2:$B2/${V1}1
+TEST $CLI_1 volume start $V1
+TEST $CLI_1 volume remove-brick $V1 $H2:$B2/${V1}1 start
+TEST $CLI_1 volume status
+
+TEST $CLI_1 volume stop $V0
+TEST $CLI_1 volume delete $V0
+
+#bug-1230121 - decrease replica count by remove-brick and increse by add-brick
+## Creating a 2x3 replicate volume
+TEST $CLI_1 volume create $V0 replica 3 $H1:$B1/brick1 $H2:$B2/brick2 \
+ $H1:$B1/brick3 $H2:$B2/brick4 \
+ $H1:$B1/brick5 $H2:$B2/brick6
+
+## Start the volume
+TEST $CLI_1 volume start $V0
+
+## Shrinking volume replica 2x3 to 2x2 by performing remove-brick operation.
+TEST $CLI_1 volume remove-brick $V0 replica 2 $H1:$B1/brick1 $H2:$B2/brick6 force
+
+## Shrinking volume replica 2x2 to 1x2 by performing remove-brick operation
+TEST $CLI_1 volume remove-brick $V0 replica 2 $H1:$B1/brick3 $H2:$B2/brick2 force
+
+## Shrinking volume replica from 1x2 to 1x1 by performing remove-brick operation
+TEST $CLI_1 volume remove-brick $V0 replica 1 $H1:$B1/brick5 force
+
+
+### Expanding volume replica by performing add-brick operation.
+
+## Expend volume replica from 1x1 to 1x2 by performing add-brick operation
+TEST $CLI_1 volume add-brick $V0 replica 2 $H1:$B1/brick5 force
+
+## Expend volume replica from 1x2 to 2x2 by performing add-brick operation
+TEST $CLI_1 volume add-brick $V0 replica 2 $H1:$B1/brick3 $H2:$B2/brick2 force
+
+## Expend volume replica from 2x2 to 2x3 by performing add-brick operation
+TEST $CLI_1 volume add-brick $V0 replica 3 $H1:$B1/brick1 $H2:$B2/brick6 force
+
+cleanup
+
diff --git a/tests/bugs/glusterd/remove-brick-testcases.t b/tests/bugs/glusterd/remove-brick-testcases.t
new file mode 100644
index 00000000000..2f982d5266f
--- /dev/null
+++ b/tests/bugs/glusterd/remove-brick-testcases.t
@@ -0,0 +1,119 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+function brick_count()
+{
+ local vol=$1;
+
+ $CLI volume info $vol | egrep "^Brick[0-9]+: " | wc -l;
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..5}
+TEST $CLI volume start $V0
+
+#bug-1225716 - remove-brick on a brick which is down should fail
+#kill a brick process
+kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}1
+
+#remove-brick start should fail as the brick is down
+TEST ! $CLI volume remove-brick $V0 $H0:$B0/${V0}1 start
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+#remove-brick start should succeed as the brick is up
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 start
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}1"
+
+#kill a brick process
+kill_brick $V0 $H0 $B0/${V0}1
+
+#remove-brick commit should pass even if the brick is down
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}1 commit
+
+#bug-1121584 - brick-existing-validation-for-remove-brick-status-stop
+## Start remove-brick operation on the volume
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 start
+
+## By giving non existing brick for remove-brick status/stop command should
+## give error.
+TEST ! $CLI volume remove-brick $V0 $H0:$B0/ABCD status
+TEST ! $CLI volume remove-brick $V0 $H0:$B0/ABCD stop
+
+## By giving brick which is part of volume for remove-brick status/stop command
+## should print statistics of remove-brick operation or stop remove-brick
+## operation.
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 status
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 stop
+
+#bug-878004 - validate remove brick force
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force;
+EXPECT '3' brick_count $V0
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}3 force;
+EXPECT '2' brick_count $V0
+
+#bug-1027171 - Do not allow commit if the bricks are not decommissioned
+#Remove bricks and commit without starting
+function remove_brick_commit_status {
+ $CLI volume remove-brick $V0 \
+ $H0:$B0/${V0}4 commit 2>&1 |grep -oE "success|decommissioned"
+}
+EXPECT "decommissioned" remove_brick_commit_status;
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0;
+
+#Create a 2X3 distributed-replicate volume
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..6};
+TEST $CLI volume start $V0
+
+#Try to reduce replica count with start option
+function remove_brick_start_status {
+ $CLI volume remove-brick $V0 replica 2 \
+ $H0:$B0/${V0}3 $H0:$B0/${V0}6 start 2>&1 |grep -oE "success|failed"
+}
+EXPECT "failed" remove_brick_start_status;
+
+#Remove bricks with commit option
+function remove_brick_commit_status2 {
+ $CLI volume remove-brick $V0 replica 2 \
+ $H0:$B0/${V0}3 $H0:$B0/${V0}6 commit 2>&1 |
+ grep -oE "success|decommissioned"
+}
+EXPECT "decommissioned" remove_brick_commit_status2;
+TEST $CLI volume info $V0
+
+#bug-1040408 - reduce replica count of distributed replicate volume
+
+# Reduce to 2x2 volume by specifying bricks in reverse order
+function remove_brick_status {
+ $CLI volume remove-brick $V0 replica 2 \
+ $H0:$B0/${V0}6 $H0:$B0/${V0}3 force 2>&1 |grep -oE "success|failed"
+}
+EXPECT "success" remove_brick_status;
+TEST $CLI volume info $V0
+
+#bug-1120647 - remove brick validation
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}{4..5} start
+EXPECT_WITHIN 10 "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}5"
+EXPECT_WITHIN 10 "completed" remove_brick_status_completed_field "$V0 $H0:$B0/${V0}4"
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}{4..5} commit
+TEST $CLI volume remove-brick $V0 replica 1 $H0:$B0/${V0}2 force
+
+cleanup
diff --git a/tests/bugs/glusterd/remove-brick-validation.t b/tests/bugs/glusterd/remove-brick-validation.t
new file mode 100644
index 00000000000..a0ff4ff6a24
--- /dev/null
+++ b/tests/bugs/glusterd/remove-brick-validation.t
@@ -0,0 +1,68 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function peer_count {
+eval \$CLI_$1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+## start a 3 node virtual cluster
+TEST launch_cluster 3;
+
+## peer probe server 2 from server 1 cli
+TEST $CLI_1 peer probe $H2;
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1
+
+#testcase: bug-1245045-remove-brick-validation
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+TEST $CLI_1 volume start $V0
+
+kill_glusterd 2
+
+#remove-brick should fail as the peer hosting the brick is down
+TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start
+
+TEST $glusterd_2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1
+
+#volume status should work
+TEST $CLI_2 volume status
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 3
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start
+kill_glusterd 2
+
+#remove-brick commit should fail as the peer hosting the brick is down
+TEST ! $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} commit
+
+TEST $glusterd_2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1
+
+#volume status should work
+TEST $CLI_2 volume status
+
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} stop
+
+kill_glusterd 3
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1
+
+TEST $CLI_1 volume remove-brick $V0 $H2:$B2/${V0} start
+
+TEST start_glusterd 3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1
+TEST $CLI_3 volume status
+
+cleanup
diff --git a/tests/bugs/glusterd/removing-multiple-bricks-in-single-remove-brick-command.t b/tests/bugs/glusterd/removing-multiple-bricks-in-single-remove-brick-command.t
new file mode 100644
index 00000000000..00beab59137
--- /dev/null
+++ b/tests/bugs/glusterd/removing-multiple-bricks-in-single-remove-brick-command.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a 3X2 distributed-replicate volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..6};
+TEST $CLI volume start $V0
+
+#bug-974007 - remove multiple replica pairs in a single brick command
+# Mount FUSE and create files
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST touch $M0/file{1..10}
+
+# Remove bricks from two sub-volumes to make it a 1x2 vol.
+# Bricks in question are given in a random order but from the same subvols.
+function remove_brick_start_status {
+ $CLI volume remove-brick $V0 \
+ $H0:$B0/${V0}6 $H0:$B0/${V0}1 \
+ $H0:$B0/${V0}2 $H0:$B0/${V0}5 start 2>&1 |grep -oE "success|failed"
+}
+EXPECT "success" remove_brick_start_status;
+
+# Wait for rebalance to complete
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/${V0}6 $H0:$B0/${V0}1 $H0:$B0/${V0}2 $H0:$B0/${V0}5"
+
+# Check commit status
+function remove_brick_commit_status {
+ $CLI volume remove-brick $V0 \
+ $H0:$B0/${V0}6 $H0:$B0/${V0}1 \
+ $H0:$B0/${V0}2 $H0:$B0/${V0}5 commit 2>&1 |grep -oE "success|failed"
+}
+EXPECT "success" remove_brick_commit_status;
+
+
+# Check the volume type
+EXPECT "Replicate" echo `$CLI volume info |grep Type |awk '{print $2}'`
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#bug-961669 - remove brick start should fail when reducing the replica count
+
+#Create a 3x3 dist-rep volume
+TEST $CLI volume create $V1 replica 3 $H0:$B0/${V1}{0,1,2,3,4,5,6,7,8};
+TEST $CLI volume start $V1
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "9" brick_count ${V1}
+
+# Mount FUSE and create file/directory
+TEST glusterfs -s $H0 --volfile-id $V1 $M0
+TEST touch $M0/zerobytefile.txt
+TEST mkdir $M0/test_dir
+TEST dd if=/dev/zero of=$M0/file bs=1024 count=1024
+
+function remove_brick_start {
+ $CLI volume remove-brick $V1 replica 2 $H0:$B0/${V1}{1,4,7} start 2>&1|grep -oE 'success|failed'
+}
+
+function remove_brick {
+ $CLI volume remove-brick $V1 replica 2 $H0:$B0/${V1}{1,4,7} force 2>&1|grep -oE 'success|failed'
+}
+
+#remove-brick start variant
+#Actual message displayed at cli is:
+#"volume remove-brick start: failed: Rebalancing not needed when reducing replica count. Try without the 'start' option"
+EXPECT "failed" remove_brick_start;
+
+#remove-brick commit-force
+#Actual message displayed at cli is:
+#"volume remove-brick commit force: success"
+EXPECT "success" remove_brick
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup;
diff --git a/tests/bugs/glusterd/replace-brick-operations.t b/tests/bugs/glusterd/replace-brick-operations.t
new file mode 100644
index 00000000000..044aa3d6c6d
--- /dev/null
+++ b/tests/bugs/glusterd/replace-brick-operations.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+## Test case for BZ: 1094119 Remove replace-brick support from gluster
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+# Start glusterd
+TEST glusterd
+TEST pidof glusterd
+
+## Lets create and start volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+TEST $CLI volume start $V0
+
+#bug-1094119-remove-replace-brick-support-from-glusterd
+
+## Now with this patch replace-brick only accept following commad
+## volume replace-brick <VOLNAME> <SOURCE-BRICK> <NEW-BRICK> {commit force}
+## Apart form this replace brick command will failed.
+
+TEST ! $CLI volume replace-brick $V0 $H0:$B0/${V0}2 $H0:$B0/${V0}3 start
+TEST ! $CLI volume replace-brick $V0 $H0:$B0/${V0}2 $H0:$B0/${V0}3 status
+TEST ! $CLI volume replace-brick $V0 $H0:$B0/${V0}2 $H0:$B0/${V0}3 abort
+
+
+## replace-brick commit force command should success
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}2 $H0:$B0/${V0}3 commit force
+
+#bug-1242543-replace-brick validation
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+# Replace brick1 without killing
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1_new commit force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST kill_brick $V0 $H0 $B0/${V0}1_new
+
+# Replace brick1 after killing the brick
+TEST $CLI volume replace-brick $V0 $H0:$B0/${V0}1_new $H0:$B0/${V0}1_newer commit force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+cleanup;
diff --git a/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t
new file mode 100644
index 00000000000..e6e65c48456
--- /dev/null
+++ b/tests/bugs/glusterd/reset-brick-and-daemons-follow-quorum.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function shd_up_status_1 {
+ $CLI_1 volume status | grep "localhost" | grep "Self-heal Daemon" | awk '{print $7}'
+}
+
+function shd_up_status_2 {
+ $CLI_2 volume status | grep "localhost" | grep "Self-heal Daemon" | awk '{print $7}'
+}
+
+function get_shd_pid_2 {
+ $CLI_2 volume status | grep "localhost" | grep "Self-heal Daemon" | awk '{print $8}'
+}
+
+cleanup;
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+TEST launch_cluster 3
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+
+TEST $CLI_1 volume create $V0 replica 2 $H1:$B0/${V0} $H2:$B0/${V0}
+TEST $CLI_1 volume start $V0
+
+#testcase: bug-1507466 - validate reset-brick commit force
+# Negative case with brick not killed && volume-id xattrs present
+TEST ! $CLI_1 volume reset-brick $V0 $H1:$B0/${V0} $H1:$B0/${V0} commit force
+
+TEST $CLI_1 volume reset-brick $V0 $H1:$B0/${V0} start
+# Now test if reset-brick commit force works
+TEST $CLI_1 volume reset-brick $V0 $H1:$B0/${V0} $H1:$B0/${V0} commit force
+
+#testcase: bug-1383893 - shd should not come up after restarting the peer glusterd
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B0/${V0}
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B0/${V0}
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_2
+
+# Bring down shd on 2nd node
+kill -15 $(get_shd_pid_2)
+
+# Bring down glusterd on 1st node
+TEST kill_glusterd 1
+
+#Bring back 1st glusterd
+TEST $glusterd_1
+
+# We need to wait till PROCESS_UP_TIMEOUT and then check shd service started
+#on node 2, because once glusterd regains quorum, it will restart all volume
+#level daemons
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" shd_up_status_2
+
+cleanup;
diff --git a/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t b/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t
new file mode 100644
index 00000000000..a871e112d87
--- /dev/null
+++ b/tests/bugs/glusterd/serialize-shd-manager-glusterd-restart.t
@@ -0,0 +1,54 @@
+#! /bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_peers {
+count=`$CLI_3 peer status | grep 'Peer in Cluster (Connected)' | wc -l`
+echo $count
+}
+
+function check_shd {
+ps aux | grep $1 | grep glustershd | wc -l
+}
+
+cleanup
+
+
+TEST launch_cluster 6
+
+TESTS_EXPECTED_IN_LOOP=25
+for i in $(seq 2 6); do
+ hostname="H$i"
+ TEST $CLI_1 peer probe ${!hostname}
+done
+
+
+EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers;
+for i in $(seq 1 5); do
+
+ TEST $CLI_1 volume create ${V0}_$i replica 3 $H1:$B1/${V0}_$i $H2:$B2/${V0}_$i $H3:$B3/${V0}_$i $H4:$B4/${V0}_$i $H5:$B5/${V0}_$i $H6:$B6/${V0}_$i
+ TEST $CLI_1 volume start ${V0}_$i force
+
+done
+
+#kill a node
+TEST kill_node 3
+
+TEST $glusterd_3;
+EXPECT_WITHIN $PROBE_TIMEOUT 5 check_peers
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 check_shd $H3
+
+for i in $(seq 1 5); do
+
+ TEST $CLI_1 volume stop ${V0}_$i
+ TEST $CLI_1 volume delete ${V0}_$i
+
+done
+
+for i in $(seq 1 6); do
+ hostname="H$i"
+ EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 0 check_shd ${!hostname}
+done
+cleanup
diff --git a/tests/bugs/glusterd/snapshot-operations.t b/tests/bugs/glusterd/snapshot-operations.t
new file mode 100644
index 00000000000..4705577d741
--- /dev/null
+++ b/tests/bugs/glusterd/snapshot-operations.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+
+cleanup;
+
+TEST verify_lvm_version
+TEST launch_cluster 3;
+TEST setup_lvm 3;
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume create $V0 replica 2 $H1:$L1 $H2:$L2
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+#bug-1318591 - skip-non-directories-inside-vols
+
+b="B1"
+TEST touch ${!b}/glusterd/vols/file
+
+TEST $CLI_1 snapshot create snap1 $V0 no-timestamp;
+
+TEST touch ${!b}/glusterd/snaps/snap1/file
+
+#bug-1322145 - peer hosting snapshotted bricks should not be detachable
+
+kill_glusterd 2
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume replace-brick $V0 $H2:$L2 $H3:$L3 commit force
+
+# peer hosting snapshotted bricks should not be detachable
+TEST ! $CLI_1 peer detach $H2
+
+TEST killall_gluster
+TEST $glusterd_1
+TEST $glusterd_2
+
+cleanup;
+
diff --git a/tests/bugs/glusterd/sync-post-glusterd-restart.t b/tests/bugs/glusterd/sync-post-glusterd-restart.t
new file mode 100644
index 00000000000..de3dff715ab
--- /dev/null
+++ b/tests/bugs/glusterd/sync-post-glusterd-restart.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function volume_get_field()
+{
+ local vol=$1
+ local field=$2
+ $CLI_2 volume get $vol $field | tail -1 | awk '{print $2}'
+}
+
+cleanup
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+TEST $CLI_1 volume start $V0
+
+TEST $CLI_1 volume set $V0 performance.readdir-ahead on
+
+# Bring down 2nd glusterd
+TEST kill_glusterd 2
+
+##bug-1420637 and bug-1323287 - sync post glusterd restart
+
+TEST $CLI_1 volume set all cluster.server-quorum-ratio 60
+TEST $CLI_1 volume set $V0 performance.readdir-ahead off
+TEST $CLI_1 volume set $V0 performance.write-behind off
+
+# Bring back 2nd glusterd
+TEST $glusterd_2
+
+# After 2nd glusterd come back, there will be 2 nodes in a cluster
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count;
+
+#bug-1420637-volume sync post glusterd restart
+
+EXPECT_WITHIN $PROBE_TIMEOUT "60" volinfo_field_2 all cluster.server-quorum-ratio
+EXPECT_WITHIN $PROBE_TIMEOUT "off" volinfo_field_2 $V0 performance.readdir-ahead
+
+#bug-1323287
+EXPECT_WITHIN $PROBE_TIMEOUT 'off' volume_get_field $V0 'write-behind'
+
+#bug-1213295 - volume stop should not crash glusterd post glusterd restart
+
+TEST $CLI_2 volume stop $V0
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume create $V1 $H1:$B1/$V1 $H2:$B2/$V1
+
+cleanup
diff --git a/tests/bugs/glusterd/validating-options-for-replicated-volume.t b/tests/bugs/glusterd/validating-options-for-replicated-volume.t
new file mode 100644
index 00000000000..ddc80b17870
--- /dev/null
+++ b/tests/bugs/glusterd/validating-options-for-replicated-volume.t
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+#bug-1314649 - validate group virt
+TEST $CLI volume set $V0 group virt;
+
+#bug-765230 - remove-quota-related-option-after-disabling-quota
+## setting soft-timeout as 20
+TEST $CLI volume set $V0 features.soft-timeout 20
+EXPECT '20' volinfo_field $V0 'features.soft-timeout';
+
+## enabling features.quota-deem-statfs
+TEST ! $CLI volume set $V0 features.quota-deem-statfs on
+EXPECT '' volinfo_field $V0 'features.quota-deem-statfs'
+
+## enabling quota
+TEST $CLI volume quota $V0 enable
+EXPECT 'on' volinfo_field $V0 'features.quota'
+
+## eetting soft-timeout as 20
+TEST $CLI volume set $V0 features.soft-timeout 20
+EXPECT '20' volinfo_field $V0 'features.soft-timeout';
+
+## enabling features.quota-deem-statfs
+TEST $CLI volume set $V0 features.quota-deem-statfs on
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+## disabling quota
+TEST $CLI volume quota $V0 disable
+EXPECT 'off' volinfo_field $V0 'features.quota'
+EXPECT '' volinfo_field $V0 'features.quota-deem-statfs'
+EXPECT '' volinfo_field $V0 'features.soft-timeout'
+
+## setting soft-timeout as 30
+TEST $CLI volume set $V0 features.soft-timeout 30
+EXPECT '30' volinfo_field $V0 'features.soft-timeout';
+
+## disabling features.quota-deem-statfs
+TEST ! $CLI volume set $V0 features.quota-deem-statfs off
+EXPECT '' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST ! $CLI volume set $V0 statedump-path ""
+TEST ! $CLI volume set $V0 statedump-path " "
+TEST $CLI volume set $V0 statedump-path "/home/"
+EXPECT "/home/" volume_option $V0 server.statedump-path
+
+TEST ! $CLI volume set $V0 background-self-heal-count ""
+TEST ! $CLI volume set $V0 background-self-heal-count " "
+TEST $CLI volume set $V0 background-self-heal-count 10
+EXPECT "10" volume_option $V0 cluster.background-self-heal-count
+
+TEST ! $CLI volume set $V0 io-cache-size ""
+TEST ! $CLI volume set $V0 io-cache-size " "
+TEST $CLI volume set $V0 io-cache-size 64MB
+EXPECT "64MB" volume_option $V0 performance.io-cache-size
+
+TEST ! $CLI volume set $V0 quick-read-cache-size ""
+TEST ! $CLI volume set $V0 quick-read-cache-size " "
+TEST $CLI volume set $V0 quick-read-cache-size 512MB
+EXPECT "512MB" volume_option $V0 performance.quick-read-cache-size
+
+TEST ! $CLI volume set $V0 self-heal-daemon ""
+TEST ! $CLI volume set $V0 self-heal-daemon " "
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT "on" volume_option $V0 cluster.self-heal-daemon
+
+TEST ! $CLI volume set $V0 read-subvolume ""
+TEST ! $CLI volume set $V0 read-subvolume " "
+TEST $CLI volume set $V0 read-subvolume $V0-client-0
+EXPECT "$V0-client-0" volume_option $V0 cluster.read-subvolume
+
+TEST ! $CLI volume set $V0 data-self-heal-algorithm ""
+TEST ! $CLI volume set $V0 data-self-heal-algorithm " "
+TEST ! $CLI volume set $V0 data-self-heal-algorithm on
+TEST $CLI volume set $V0 data-self-heal-algorithm full
+EXPECT "full" volume_option $V0 cluster.data-self-heal-algorithm
+
+TEST ! $CLI volume set $V0 min-free-inodes ""
+TEST ! $CLI volume set $V0 min-free-inodes " "
+TEST $CLI volume set $V0 min-free-inodes 60%
+EXPECT "60%" volume_option $V0 cluster.min-free-inodes
+
+TEST ! $CLI volume set $V0 min-free-disk ""
+TEST ! $CLI volume set $V0 min-free-disk " "
+TEST $CLI volume set $V0 min-free-disk 60%
+EXPECT "60%" volume_option $V0 cluster.min-free-disk
+
+TEST $CLI volume set $V0 min-free-disk 120
+EXPECT "120" volume_option $V0 cluster.min-free-disk
+
+TEST ! $CLI volume set $V0 frame-timeout ""
+TEST ! $CLI volume set $V0 frame-timeout " "
+TEST $CLI volume set $V0 frame-timeout 0
+EXPECT "0" volume_option $V0 network.frame-timeout
+
+TEST ! $CLI volume set $V0 auth.allow ""
+TEST ! $CLI volume set $V0 auth.allow " "
+TEST $CLI volume set $V0 auth.allow 192.168.122.1
+EXPECT "192.168.122.1" volume_option $V0 auth.allow
+
+#bug-782095 - validate performance cache min/max size value
+
+## setting performance cache min size as 2MB
+TEST $CLI volume set $V0 performance.cache-min-file-size 2MB
+EXPECT '2MB' volinfo_field $V0 'performance.cache-min-file-size';
+
+## setting performance cache max size as 20MB
+TEST $CLI volume set $V0 performance.cache-max-file-size 20MB
+EXPECT '20MB' volinfo_field $V0 'performance.cache-max-file-size';
+
+## trying to set performance cache min size as 25MB
+TEST ! $CLI volume set $V0 performance.cache-min-file-size 25MB
+EXPECT '2MB' volinfo_field $V0 'performance.cache-min-file-size';
+
+## able to set performance cache min size as long as its lesser than max size
+TEST $CLI volume set $V0 performance.cache-min-file-size 15MB
+EXPECT '15MB' volinfo_field $V0 'performance.cache-min-file-size';
+
+## trying it out with only cache-max-file-size in CLI as 10MB
+TEST ! $CLI volume set $V0 cache-max-file-size 10MB
+EXPECT '20MB' volinfo_field $V0 'performance.cache-max-file-size';
+
+## finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup
diff --git a/tests/bugs/glusterd/validating-server-quorum.t b/tests/bugs/glusterd/validating-server-quorum.t
new file mode 100644
index 00000000000..ae7d83fd81c
--- /dev/null
+++ b/tests/bugs/glusterd/validating-server-quorum.t
@@ -0,0 +1,125 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_fs {
+ df $1 &> /dev/null
+ echo $?
+}
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 3
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+# Lets create the volume
+TEST $CLI_1 volume create $V0 replica 3 $H1:$B1/${V0}1 $H2:$B2/${V0}2 $H3:$B3/${V0}3
+TEST $CLI_1 volume set $V0 cluster.server-quorum-type server
+
+# Start the volume
+TEST $CLI_1 volume start $V0
+
+#bug-1345727 - bricks should be down when quorum is not met
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H3 $B3/${V0}3
+
+# Bring down glusterd on 2nd node
+TEST kill_glusterd 2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST kill_glusterd 3
+EXPECT_WITHIN $PROBE_TIMEOUT 0 peer_count
+
+# Server quorum is not met. Brick on 1st node must be down
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1
+
+# Set quorum ratio 95. means 95 % or more than 95% nodes of total available node
+# should be available for performing volume operation.
+# i.e. Server-side quorum is met if the number of nodes that are available is
+# greater than or equal to 'quorum-ratio' times the number of nodes in the
+# cluster
+TEST $CLI_1 volume set all cluster.server-quorum-ratio 95
+
+#bug-1483058 - replace-brick should fail when quorum is not met
+TEST ! $CLI_1 volume replace-brick $V0 $H2:$B2/${V0}2 $H1:$B1/${V0}2_new commit force
+
+#Bring back 2nd glusterd
+TEST $glusterd_2
+
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+# Server quorum is still not met. Bricks should be down on 1st and 2nd nodes
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}2
+
+# Bring back 3rd glusterd
+TEST $glusterd_3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+# Server quorum is met now. Bricks should be up on all nodes
+# Check from 3rd instance of glusterd so that the 3rd node finishes all its
+# handshake and then report back the brick status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H1 $B1/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H2 $B2/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H3 $B3/${V0}3
+
+# Check from 1st instance of glusterd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H1 $B1/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H2 $B2/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 3 $V0 $H3 $B3/${V0}3
+
+# TODO : Because commit fe71ee7 introduced a delay of 1 sec to wait for shd connect and
+# disconnect events to be serially processed during a restart of shd daemon,
+# this introduced a race where while releasing big lock, if any command sneaks
+# and acquires the big lock, it might be able to work on a volinfo which is
+# stale. We need to find a better way to fix this.
+
+sleep 3
+
+# quorum is met. replace-brick will execute successfully
+EXPECT_WITHIN $PEER_SYNC_TIMEOUT 0 attempt_replace_brick 1 $V0 $H2:$B2/${V0}2 $H2:$B2/${V0}2_new
+
+TEST $CLI_1 volume reset all
+TEST $CLI_1 volume set $V0 cluster.server-quorum-type server
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}2_new
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H3 $B3/${V0}3
+
+
+#bug-913555 - volume should become unwritable when quorum does not met
+
+TEST glusterfs --volfile-server=$H1 --volfile-id=$V0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 check_fs $M0;
+
+# Kill one pseudo-node, make sure the others survive and volume stays up.
+TEST kill_node 3;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H2 $B2/${V0}2_new
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 check_fs $M0;
+
+# Kill another pseudo-node, make sure the last one dies and volume goes down.
+TEST kill_node 2;
+EXPECT_WITHIN $PROBE_TIMEOUT 0 check_peers
+#two glusterfsds of the other two glusterds must be dead
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" cluster_brick_up_status 1 $V0 $H1 $B1/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 1 check_fs $M0;
+
+TEST $glusterd_2;
+TEST $glusterd_3;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 check_fs $M0;
+
+cleanup
diff --git a/tests/bugs/glusterfs-server/bug-852147.t b/tests/bugs/glusterfs-server/bug-852147.t
new file mode 100755
index 00000000000..75db2a26e05
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-852147.t
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+logdir=`gluster --print-logdir`"/bricks"
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+touch $M0/file1;
+
+TEST $CLI volume set $V0 performance.cache-max-file-size 20MB
+TEST $CLI volume set $V0 performance.cache-min-file-size 10MB
+
+EXPECT "20MB" volinfo_field $V0 'performance.cache-max-file-size';
+EXPECT "10MB" volinfo_field $V0 'performance.cache-min-file-size';
+
+#Performing volume reset and verifying.
+TEST $CLI volume reset $V0
+EXPECT "" volinfo_field $V0 'performance.cache-max-file-size';
+EXPECT "" volinfo_field $V0 'performance.cache-min-file-size';
+
+#Verifying vlolume-profile start, info and stop
+EXPECT "Starting volume profile on $V0 has been successful " $CLI volume profile $V0 start
+
+function vol_prof_info()
+{
+ $CLI volume profile $V0 info | grep Brick | wc -l
+}
+EXPECT "6" vol_prof_info
+
+EXPECT "Stopping volume profile on $V0 has been successful " $CLI volume profile $V0 stop
+
+function log-file-name()
+{
+ logfilename=$B0"/"$V0"1.log"
+ echo ${logfilename:1} | tr / -
+}
+
+function file-size()
+{
+ ls -lrt $1 | awk '{print $5}'
+}
+
+#Finding the current log file's size
+log_file=$logdir"/"`log-file-name`
+log_file_size=`file-size $log_file`
+
+#Removing the old backup log files
+ren_file=$log_file".*"
+rm -rf $ren_file
+
+#Initiating log rotate
+TEST $CLI volume log $V0 rotate
+
+#Capturing new log file's size
+new_file_size=`file-size $log_file`
+
+#Verifying the size of the new log file and the creation of the backup log file
+TEST ! [ $new_file_size -eq $log_file_size ]
+TEST ls -lrt $ren_file
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs-server/bug-861542.t b/tests/bugs/glusterfs-server/bug-861542.t
new file mode 100755
index 00000000000..60d1b132fb4
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-861542.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+# Distributed volume with a single brick was chosen solely for the ease of
+#implementing the test case (to be precise, for the ease of extracting the port number).
+TEST $CLI volume create $V0 $H0:$B0/brick0;
+
+TEST $CLI volume start $V0;
+
+function port_field()
+{
+ local vol=$1;
+ local opt=$2;
+ if [ $opt -eq '0' ]; then
+ $CLI volume status $vol | grep "brick0" | awk '{print $3}';
+ else
+ $CLI volume status $vol detail | grep "^TCP Port " | awk '{print $4}';
+ fi
+}
+
+function xml_port_field()
+{
+ local vol=$1;
+ local opt=$2;
+ $CLI --xml volume status $vol $opt | tr -d '\n' |\
+#Find the first occurrence of the string between <port> and </port>
+ sed -rn 's/<port>/&###/;s/<\/port>/###&/;s/^.*###(.*)###.*$/\1/p'
+}
+
+TEST $CLI volume status $V0;
+TEST $CLI volume status $V0 detail;
+TEST $CLI --xml volume status $V0;
+TEST $CLI --xml volume status $V0 detail;
+
+# Kill the brick process. After this, port number for the killed (in this case brick) process must be "N/A".
+kill `cat $GLUSTERD_PIDFILEDIR/vols/$V0/$H0-d-backends-brick0.pid`
+
+EXPECT "N/A" port_field $V0 '0'; # volume status
+EXPECT "N/A" port_field $V0 '1'; # volume status detail
+
+EXPECT "N/A" xml_port_field $V0 '';
+EXPECT "N/A" xml_port_field $V0 'detail';
+
+cleanup;
diff --git a/tests/bugs/glusterfs-server/bug-864222.t b/tests/bugs/glusterfs-server/bug-864222.t
new file mode 100755
index 00000000000..01a7a4e3afd
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-864222.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+cd $N0
+
+TEST ls
+
+TEST $CLI volume set $V0 nfs.enable-ino32 on
+# Main test. This should pass.
+TEST ls
+
+cd
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+cleanup
+
diff --git a/tests/bugs/glusterfs-server/bug-873549.t b/tests/bugs/glusterfs-server/bug-873549.t
new file mode 100644
index 00000000000..8b5534728fd
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-873549.t
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd -LDEBUG;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+TEST $CLI volume set $V0 performance.quick-read-cache-size 512MB
+TEST $CLI volume start $V0
+TEST $CLI volume statedump $V0 all
+
+cleanup;
diff --git a/tests/bugs/glusterfs-server/bug-877992.t b/tests/bugs/glusterfs-server/bug-877992.t
new file mode 100755
index 00000000000..300000bcf2c
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-877992.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+
+## Start and create a volume
+TEST glusterd -LDEBUG
+TEST pidof glusterd
+
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+function hooks_prep ()
+{
+ local event=$1
+ touch /tmp/pre.out /tmp/post.out
+ touch $GLUSTERD_WORKDIR/hooks/1/"$event"/pre/Spre.sh
+ touch $GLUSTERD_WORKDIR/hooks/1/"$event"/post/Spost.sh
+
+ printf "#! /bin/bash\necho "$event"Pre > /tmp/pre.out\n" > $GLUSTERD_WORKDIR/hooks/1/"$event"/pre/Spre.sh
+ printf "#! /bin/bash\necho "$event"Post > /tmp/post.out\n" > $GLUSTERD_WORKDIR/hooks/1/"$event"/post/Spost.sh
+ chmod a+x $GLUSTERD_WORKDIR/hooks/1/"$event"/pre/Spre.sh
+ chmod a+x $GLUSTERD_WORKDIR/hooks/1/"$event"/post/Spost.sh
+}
+
+function hooks_cleanup ()
+{
+ local event=$1
+ rm /tmp/pre.out /tmp/post.out
+ rm $GLUSTERD_WORKDIR/hooks/1/"$event"/pre/Spre.sh
+ rm $GLUSTERD_WORKDIR/hooks/1/"$event"/post/Spost.sh
+}
+
+## Verify volume is created and its hooks script ran
+hooks_prep 'create'
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'createPre' cat /tmp/pre.out;
+# Spost.sh comes after S10selinux-label-brick.sh under create post hook script
+# list. So consider the delay in setting SELinux context on bricks
+EXPECT_WITHIN 5 'createPost' cat /tmp/post.out;
+hooks_cleanup 'create'
+
+
+## Start volume and verify that its hooks script ran
+hooks_prep 'start'
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN 5 'startPre' cat /tmp/pre.out;
+EXPECT_WITHIN 5 'startPost' cat /tmp/post.out;
+hooks_cleanup 'start'
+
+cleanup;
diff --git a/tests/bugs/glusterfs-server/bug-887145.t b/tests/bugs/glusterfs-server/bug-887145.t
new file mode 100755
index 00000000000..db2cf3c050b
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-887145.t
@@ -0,0 +1,99 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 performance.open-behind off;
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+
+useradd tmp_user 2>/dev/null 1>/dev/null;
+mkdir $M0/dir;
+mkdir $M0/other;
+cp /etc/passwd $M0/;
+cp $M0/passwd $M0/file;
+chmod 600 $M0/file;
+
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+grep nfsnobody /etc/passwd > /dev/null
+if [ $? -eq 1 ]; then
+usr=nobody
+grp=nobody
+else
+usr=nfsnobody
+grp=nfsnobody
+fi
+chown -R $usr:$grp $M0/dir;
+chown -R tmp_user:tmp_user $M0/other;
+
+TEST $CLI volume set $V0 server.root-squash on;
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+# create files and directories in the root of the glusterfs and nfs mount
+# which is owned by root and hence the right behavior is getting EACCESS
+# as the fops are executed as nfsnobody/nobody.
+touch $M0/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+touch $N0/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $M0/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $N0/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+cp $M0/file $M0/tmp_file 2>/dev/null;
+TEST [ $? -ne 0 ]
+cp $N0/file $N0/tmp_file 2>/dev/null;
+TEST [ $? -ne 0 ]
+cat $M0/file 2>/dev/null;
+TEST [ $? -ne 0 ]
+# here read should be allowed because eventhough file "passwd" is owned
+# by root, the permissions if the file allow other users to read it.
+cat $M0/passwd 1>/dev/null;
+TEST [ $? -eq 0 ]
+cat $N0/passwd 1>/dev/null;
+TEST [ $? -eq 0 ]
+
+# create files and directories should succeed as the fops are being executed
+# inside the directory owned by nfsnobody/nobody
+TEST touch $M0/dir/file;
+TEST touch $N0/dir/foo;
+TEST mkdir $M0/dir/new;
+TEST mkdir $N0/dir/other;
+TEST rm -f $M0/dir/file $M0/dir/foo;
+TEST rmdir $N0/dir/*;
+
+# create files and directories here should fail as other directory is owned
+# by tmp_user.
+touch $M0/other/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+touch $N0/other/foo 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $M0/other/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $N0/other/new 2>/dev/null;
+TEST [ $? -ne 0 ]
+
+userdel tmp_user;
+rm -rf /home/tmp_user;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs-server/bug-889996.t b/tests/bugs/glusterfs-server/bug-889996.t
new file mode 100644
index 00000000000..d7d25c42933
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-889996.t
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+rm -rf $B0/${V0}1;
+
+TEST ! $CLI volume start $V0;
+EXPECT 0 online_brick_count;
+
+cleanup;
diff --git a/tests/bugs/glusterfs-server/bug-904300.t b/tests/bugs/glusterfs-server/bug-904300.t
new file mode 100755
index 00000000000..95d5d381c8b
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-904300.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+# 1-8
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0;
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST mount_nfs $H0:/$V0 $N0 nolock
+TEST mkdir $N0/dir1
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+#
+# Case 1: Allow "dir1" to be mounted only from 127.0.0.1
+# 9-12
+TEST $CLI volume set $V0 export-dir \""/dir1(127.0.0.1)"\"
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 2 is_nfs_export_available
+
+TEST mount_nfs localhost:/$V0/dir1 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+#
+# Case 2: Allow "dir1" to be mounted only from 8.8.8.8. This is
+# a negative test case therefore the mount should fail.
+# 13-16
+TEST $CLI volume set $V0 export-dir \""/dir1(8.8.8.8)"\"
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 2 is_nfs_export_available
+
+TEST ! mount_nfs $H0:/$V0/dir1 $N0 nolock
+TEST ! umount $N0
+
+
+# Case 3: Variation of test case1. Here we are checking with hostname
+# instead of ip address.
+# 17-20
+TEST $CLI volume set $V0 export-dir \""/dir1($H0)"\"
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 2 is_nfs_export_available
+
+TEST mount_nfs $H0:/$V0/dir1 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# Case 4: Variation of test case1. Here we are checking with IP range
+# 21-24
+TEST $CLI volume set $V0 export-dir \""/dir1(127.0.0.0/24)"\"
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 2 is_nfs_export_available
+
+TEST mount_nfs localhost:/$V0/dir1 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+## Finish up
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs-server/bug-905864.c b/tests/bugs/glusterfs-server/bug-905864.c
new file mode 100644
index 00000000000..f70003736e7
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-905864.c
@@ -0,0 +1,83 @@
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <pthread.h>
+
+pthread_t th[5] = {0};
+void
+flock_init(struct flock *f, short int type, off_t start, off_t len)
+{
+ f->l_type = type;
+ f->l_start = start;
+ f->l_len = len;
+}
+
+int
+flock_range_in_steps(int fd, int is_set, short l_type, int start, int end,
+ int step)
+{
+ int ret = 0;
+ int i = 0;
+ struct flock f = {
+ 0,
+ };
+
+ for (i = start; i + step < end; i += step) {
+ flock_init(&f, l_type, i, step);
+ ret = fcntl(fd, (is_set) ? F_SETLKW : F_GETLK, &f);
+ if (ret) {
+ perror("fcntl");
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+void *
+random_locker(void *arg)
+{
+ int fd = *(int *)arg;
+ int i = 0;
+ int is_set = 0;
+
+ /* use thread id to choose GETLK or SETLK operation*/
+ is_set = pthread_self() % 2;
+ (void)flock_range_in_steps(fd, is_set, F_WRLCK, 0, 400, 1);
+
+ return NULL;
+}
+
+int
+main(int argc, char **argv)
+{
+ int fd = -1;
+ int ret = 1;
+ int i = 0;
+ char *fname = NULL;
+
+ if (argc < 2)
+ goto out;
+
+ fname = argv[1];
+ fd = open(fname, O_RDWR);
+ if (fd == -1) {
+ perror("open");
+ goto out;
+ }
+
+ ret = flock_range_in_steps(fd, 1, F_WRLCK, 0, 2000, 2);
+ for (i = 0; i < 5; i++) {
+ pthread_create(&th[i], NULL, random_locker, (void *)&fd);
+ }
+ ret = flock_range_in_steps(fd, 1, F_WRLCK, 0, 2000, 2);
+ for (i = 0; i < 5; i++) {
+ pthread_join(th[i], NULL);
+ }
+out:
+ if (fd != -1)
+ close(fd);
+
+ return ret;
+}
diff --git a/tests/bugs/glusterfs-server/bug-905864.t b/tests/bugs/glusterfs-server/bug-905864.t
new file mode 100644
index 00000000000..44923a85333
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-905864.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M1;
+
+TEST touch $M0/file1;
+
+#following C program tries open up race(s) if any, in F_GETLK/F_SETLKW codepaths
+#of locks xlator
+TEST $CC -pthread -g3 $(dirname $0)/bug-905864.c -o $(dirname $0)/bug-905864
+
+$(dirname $0)/bug-905864 $M0/file1 &
+$(dirname $0)/bug-905864 $M1/file1;
+wait
+
+TEST rm -f $(dirname $0)/bug-905864
+EXPECT $(brick_count $V0) online_brick_count
+
+cleanup
diff --git a/tests/bugs/glusterfs-server/bug-912297.t b/tests/bugs/glusterfs-server/bug-912297.t
new file mode 100755
index 00000000000..08f5dcea9b9
--- /dev/null
+++ b/tests/bugs/glusterfs-server/bug-912297.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting owner-uid as -12
+TEST ! $CLI volume set $V0 owner-uid -12
+EXPECT '' volinfo_field $V0 'storage.owner-uid'
+
+## Setting owner-gid as -5
+TEST ! $CLI volume set $V0 owner-gid -5
+EXPECT '' volinfo_field $V0 'storage.owner-gid'
+
+## Setting owner-uid as 36
+TEST $CLI volume set $V0 owner-uid 36
+EXPECT '36' volinfo_field $V0 'storage.owner-uid'
+
+## Setting owner-gid as 36
+TEST $CLI volume set $V0 owner-gid 36
+EXPECT '36' volinfo_field $V0 'storage.owner-gid'
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-1482528.t b/tests/bugs/glusterfs/bug-1482528.t
new file mode 100644
index 00000000000..3adf260bdcd
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-1482528.t
@@ -0,0 +1,100 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2}
+TEST $CLI volume start $V0
+
+# Mount FUSE without selinux:
+TEST glusterfs -s $H0 --volfile-id $V0 $@ $M0
+
+TEST touch $M0/default.txt
+EXPECT "644" stat -c %a $M0/default.txt
+
+TEST chmod 0444 $M0/default.txt
+EXPECT "444" stat -c %a $M0/default.txt
+
+TEST mkdir $M0/default
+EXPECT "755" stat -c %a $M0/default
+
+TEST chmod 0444 $M0/default
+EXPECT "444" stat -c %a $M0/default
+
+TEST mkfifo $M0/mkfifo
+EXPECT "644" stat -c %a $M0/mkfifo
+
+TEST mknod $M0/dmknod b 4 5
+EXPECT "644" stat -c %a $M0/dmknod
+
+#Set the create-directory-mask and create-mask options
+TEST $CLI volume set $V0 storage.create-directory-mask 0444
+TEST $CLI volume set $V0 storage.create-mask 0444
+
+TEST mkdir $M0/create-directory
+EXPECT "444" stat -c %a $M0/create-directory
+
+TEST touch $M0/create-mask.txt
+EXPECT "444" stat -c %a $M0/create-mask.txt
+
+TEST chmod 0777 $M0/create-mask.txt
+EXPECT "444" stat -c %a $M0/create-mask.txt
+
+TEST chmod 0400 $M0/create-mask.txt
+EXPECT "400" stat -c %a $M0/create-mask.txt
+
+TEST chmod 0777 $M0/create-directory
+EXPECT "444" stat -c %a $M0/create-directory
+
+TEST chmod 0400 $M0/create-directory
+EXPECT "400" stat -c %a $M0/create-directory
+
+TEST mkfifo $M0/cfifo
+EXPECT "444" stat -c %a $M0/cfifo
+
+TEST chmod 0777 $M0/cfifo
+EXPECT "444" stat -c %a $M0/cfifo
+
+TEST mknod $M0/cmknod b 4 5
+EXPECT "444" stat -c %a $M0/cmknod
+
+#set force-create-mode and force-directory-mode options
+TEST $CLI volume set $V0 storage.force-create-mode 0777
+TEST $CLI volume set $V0 storage.force-directory-mode 0333
+
+TEST touch $M0/force-create-mode.txt
+EXPECT "777" stat -c %a $M0/force-create-mode.txt
+
+TEST mkdir $M0/force-directory
+EXPECT "777" stat -c %a $M0/force-directory
+
+TEST chmod 0222 $M0/force-create-mode.txt
+EXPECT "777" stat -c %a $M0/force-create-mode.txt
+
+TEST chmod 0222 $M0/force-directory
+EXPECT "333" stat -c %a $M0/force-directory
+
+TEST mkdir $M0/link
+TEST ln -s $M0/force-create-mode.txt $M0/link
+EXPECT "777" stat -c %a $M0/link/force-create-mode.txt
+
+TEST ln $M0/force-create-mode.txt $M0/link/fc.txt
+EXPECT "777" stat -c %a $M0/link/fc.txt
+
+TEST setfacl -m o:r $M0/force-create-mode.txt
+EXPECT "777" stat -c %a $M0/force-create-mode.txt
+
+TEST ln -s $M0/force-directory $M0/link
+EXPECT "777" stat -c %a $M0/link/force-directory
+
+TEST mkfifo $M0/ffifo
+EXPECT "777" stat -c %a $M0/ffifo
+
+TEST mknod $M0/mknod b 4 5
+EXPECT "777" stat -c %a $M0/mknod
diff --git a/tests/bugs/glusterfs/bug-811493.t b/tests/bugs/glusterfs/bug-811493.t
new file mode 100755
index 00000000000..98f7c121a02
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-811493.t
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI system uuid reset;
+
+uuid1=$(grep UUID $GLUSTERD_WORKDIR/glusterd.info | cut -f 2 -d "=");
+
+TEST $CLI system uuid reset;
+uuid2=$(grep UUID $GLUSTERD_WORKDIR/glusterd.info | cut -f 2 -d "=");
+
+TEST [ $uuid1 != $uuid2 ]
+
+cleanup
diff --git a/tests/bugs/glusterfs/bug-844688.t b/tests/bugs/glusterfs/bug-844688.t
new file mode 100755
index 00000000000..65f41b342a5
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-844688.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function check_callstack_log {
+ local text=$1
+ statedump_file=$(generate_mount_statedump $V0);
+ grep $text $statedump_file 2>/dev/null 1>/dev/null;
+ if [ $? -eq 0 ]; then
+ echo "1";
+ else
+ echo "0";
+ fi;
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick0
+TEST $CLI volume start $V0
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+mount_pid=$(get_mount_process_pid $V0);
+# enable dumping of call stack creation and frame creation times in statedump
+# monitoring is enabled by default
+
+# We want to make sure that there is a pending frame in gluster stack.
+# For that we are creating a blocking lock scenario.
+
+TEST touch $M0/lockfile;
+# Open two fd's on the same file
+exec 8>$M0/lockfile;
+exec 9>$M0/lockfile;
+
+# First flock will succeed and the second one will block, hence the background run.
+flock -x 8 ;
+flock -x 9 &
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" check_callstack_log "callstack-creation-time";
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" check_callstack_log "frame-creation-time";
+
+flock -u 8
+flock -u 9;
+
+# Closing the fd's
+exec 8>&-
+exec 9>&-
+
+TEST rm -f $M0/lockfile;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+rm -f $statedumpdir/glusterdump.$mount_pid.*;
+cleanup
diff --git a/tests/bugs/glusterfs/bug-848251.t b/tests/bugs/glusterfs/bug-848251.t
new file mode 100644
index 00000000000..69ffe680f7f
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-848251.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+
+TEST $CLI volume start $V0;
+
+#enable quota
+TEST $CLI volume quota $V0 enable;
+
+#mount on a random dir
+TEST MOUNTDIR="/tmp/$RANDOM"
+TEST mkdir $MOUNTDIR
+TEST glusterfs -s $H0 --volfile-id=$V0 $MOUNTDIR
+
+function set_quota(){
+ mkdir "$MOUNTDIR/$name"
+ $CLI volume quota $V0 limit-usage /$name 50KB
+}
+
+function quota_list(){
+ $CLI volume quota $V0 list | grep -- /$name | awk '{print $3}'
+}
+
+TEST name=":d1"
+#file name containing ':' in the start
+TEST set_quota
+EXPECT "80%" quota_list
+
+TEST name=":d1/d:1"
+#file name containing ':' in between
+TEST set_quota
+EXPECT "80%" quota_list
+
+TEST name=":d1/d:1/d1:"
+#file name containing ':' in the end
+TEST set_quota
+EXPECT "80%" quota_list
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR
+TEST rm -rf $MOUNTDIR
+TEST $CLI volume stop $V0
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-853690.t b/tests/bugs/glusterfs/bug-853690.t
new file mode 100755
index 00000000000..59facfcddb0
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-853690.t
@@ -0,0 +1,91 @@
+#!/bin/bash
+#
+# Bug 853690 - Test that short writes do not lead to corruption.
+#
+# Mismanagement of short writes in AFR leads to corruption and immediately
+# detectable split-brain. Write a file to a replica volume using error-gen
+# to cause short writes on one replica.
+#
+# Short writes are also possible during heal. If ignored, the files are marked
+# consistent and silently differ. After reading the file, cause a lookup, wait
+# for self-heal and verify that the afr xattrs do not match.
+#
+########
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST mkdir -p $B0/test{1,2}
+
+# Our graph is a two brick replica with 100% frequency of short writes on one
+# side of the replica. This guarantees a single write fop leads to an out-of-sync
+# situation.
+cat > $B0/test.vol <<EOF
+volume test-posix-0
+ type storage/posix
+ option directory $B0/test1
+end-volume
+
+volume test-error-0
+ type debug/error-gen
+ option failure 100
+ option enable writev
+ option error-no GF_ERROR_SHORT_WRITE
+ subvolumes test-posix-0
+end-volume
+
+volume test-locks-0
+ type features/locks
+ subvolumes test-error-0
+end-volume
+
+volume test-posix-1
+ type storage/posix
+ option directory $B0/test2
+end-volume
+
+volume test-locks-1
+ type features/locks
+ subvolumes test-posix-1
+end-volume
+
+volume test-replicate-0
+ type cluster/replicate
+ option background-self-heal-count 0
+ subvolumes test-locks-0 test-locks-1
+end-volume
+EOF
+
+TEST glusterd
+
+TEST glusterfs --volfile=$B0/test.vol --attribute-timeout=0 --entry-timeout=0 $M0
+
+# Send a single write, guaranteed to be short on one replica, and attempt to
+# read the data back. Failure to detect the short write results in different
+# file sizes and immediate split-brain (EIO).
+TEST dd if=/dev/urandom of=$M0/file bs=128k count=1
+TEST dd if=$M0/file of=/dev/null bs=128k count=1
+########
+#
+# Test self-heal with short writes...
+#
+########
+
+# Cause a lookup and wait a few seconds for posterity. This self-heal also fails
+# due to a short write.
+TEST ls $M0/file
+# Verify the attributes on the healthy replica do not reflect consistency with
+# the other replica.
+xa=`getfattr -n trusted.afr.test-locks-0 -e hex $B0/test2/file 2>&1 | grep = | cut -f2 -d=`
+EXPECT_NOT 0x000000000000000000000000 echo $xa
+
+TEST rm -f $M0/file
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+rm -f $B0/test.vol
+rm -rf $B0/test1 $B0/test2
+
+cleanup;
+
diff --git a/tests/bugs/glusterfs/bug-856455.t b/tests/bugs/glusterfs/bug-856455.t
new file mode 100644
index 00000000000..d02b39bda8e
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-856455.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+BRICK_COUNT=3
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+## Mount FUSE with caching disabled
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+function query_pathinfo()
+{
+ local path=$1;
+ local retval;
+
+ local pathinfo=$(getfattr -n trusted.glusterfs.pathinfo $path);
+ retval=$(echo $pathinfo | grep -o 'POSIX' | wc -l);
+ echo $retval
+}
+
+TEST touch $M0/f00f;
+TEST mkdir $M0/f00d;
+
+# verify pathinfo for a file and directory
+EXPECT 1 query_pathinfo $M0/f00f;
+EXPECT $BRICK_COUNT query_pathinfo $M0/f00d;
+
+# Kill a brick process and then query for pathinfo
+# for directories pathinfo should list backend patch from available (up) subvolumes
+
+kill_brick $V0 $H0 ${B0}/${V0}1
+
+EXPECT `expr $BRICK_COUNT - 1` query_pathinfo $M0/f00d;
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-860297.t b/tests/bugs/glusterfs/bug-860297.t
new file mode 100644
index 00000000000..c2d21553f68
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-860297.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+TEST $CLI volume create $V0 $H0:$B0/brick1
+setfattr -x trusted.glusterfs.volume-id $B0/brick1
+## If Extended attribute trusted.glusterfs.volume-id is not present
+## then volume should not be able to start
+TEST ! $CLI volume start $V0;
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-861015-index.t b/tests/bugs/glusterfs/bug-861015-index.t
new file mode 100644
index 00000000000..74ffc45bf2e
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-861015-index.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3,4,5}
+TEST $CLI volume set $V0 ensure-durability off
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}4
+cd $M0
+HEAL_FILES=0
+for i in {1..10}
+do
+ echo "abc" > $i
+ HEAL_FILES=$(($HEAL_FILES+1))
+done
+HEAL_FILES=$(($HEAL_FILES+3)) #count brick root distribute-subvol num of times
+
+cd ~
+EXPECT "$HEAL_FILES" get_pending_heal_count $V0
+TEST rm -f $M0/*
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume heal $V0 info
+#Only root dir should be present now in the indices
+EXPECT "1" afr_get_num_indices_in_brick $B0/${V0}1
+EXPECT "1" afr_get_num_indices_in_brick $B0/${V0}3
+EXPECT "1" afr_get_num_indices_in_brick $B0/${V0}5
+cleanup
diff --git a/tests/bugs/glusterfs/bug-861015-log.t b/tests/bugs/glusterfs/bug-861015-log.t
new file mode 100644
index 00000000000..2f3e0ad14f4
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-861015-log.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+log_wd=$(gluster --print-logdir)
+TEST glusterd
+TEST pidof glusterd
+rm -f $log_wd/glustershd.log
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+cd $M0
+for i in {1..10}
+do
+ dd if=/dev/urandom of=f bs=1024k count=10 2>/dev/null
+done
+
+cd ~
+TEST $CLI volume heal $V0 info
+function count_inode_link_failures {
+ logfile=$1
+ grep "inode link failed on the inode" $logfile | wc -l
+}
+EXPECT "0" count_inode_link_failures $log_wd/glustershd.log
+cleanup
diff --git a/tests/bugs/glusterfs/bug-866459.t b/tests/bugs/glusterfs/bug-866459.t
new file mode 100644
index 00000000000..a6c767b4b0e
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-866459.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Create and start a volume with aio enabled
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 linux-aio on
+TEST $CLI volume set $V0 background-self-heal-count 0
+TEST $CLI volume set $V0 performance.stat-prefetch off;
+TEST $CLI volume start $V0
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+dd of=$M0/a if=/dev/urandom bs=1024k count=1 2>&1 > /dev/null
+B0_hiphenated=`echo $B0 | tr '/' '-'`
+## Bring a brick down
+TEST kill_brick $V0 $H0 $B0/${V0}1
+## Rewrite the file
+dd of=$M0/a if=/dev/urandom bs=1024k count=1 2>&1 > /dev/null
+TEST $CLI volume start $V0 force
+## Wait for the brick to give CHILD_UP in client protocol
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+md5offile2=`md5sum $B0/${V0}2/a | awk '{print $1}'`
+
+##trigger self-heal
+ls -l $M0/a
+
+EXPECT "$md5offile2" echo `md5sum $B0/${V0}1/a | awk '{print $1}'`
+
+## Finish up
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-867253.t b/tests/bugs/glusterfs/bug-867253.t
new file mode 100644
index 00000000000..8c3c39baace
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-867253.t
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+function file_count()
+{
+ val=1
+
+ if [ "$1" == "0" ]
+ then
+ if [ "$2" == "0" ]
+ then
+ val=0
+ fi
+ fi
+ echo $val
+}
+
+BRICK_COUNT=2
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+## Mount nfs, with nocache option
+TEST mount_nfs $H0:/$V0 $M0 nolock,noac;
+
+touch $M0/files{1..1000};
+
+# Kill a brick process
+kill_brick $V0 $H0 $B0/${V0}0
+
+drop_cache $M0
+
+ls -l $M0 >/dev/null;
+
+NEW_FILE_COUNT=`echo $?`;
+
+TEST $CLI volume start $V0 force
+
+# Kill a brick process
+kill_brick $V0 $H0 $B0/${V0}1
+
+drop_cache $M0
+
+ls -l $M0 >/dev/null;
+
+NEW_FILE_COUNT1=`echo $?`;
+
+EXPECT "0" file_count $NEW_FILE_COUNT $NEW_FILE_COUNT1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup
diff --git a/tests/bugs/glusterfs/bug-869724.t b/tests/bugs/glusterfs/bug-869724.t
new file mode 100644
index 00000000000..ca5bb17081c
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-869724.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+
+## Make volume tightly consistent for metdata
+TEST $CLI volume set $V0 performance.stat-prefetch off;
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+touch $M0/test;
+build_tester $(dirname $0)/getlk_owner.c
+
+TEST $(dirname $0)/getlk_owner $M0/test;
+
+rm -f $(dirname $0)/getlk_owner
+cleanup;
+
diff --git a/tests/bugs/glusterfs/bug-872923.t b/tests/bugs/glusterfs/bug-872923.t
new file mode 100755
index 00000000000..00e02c89cbe
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-872923.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick0 $H0:$B0/brick1
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+
+cd $N0
+mkdir test_hardlink_self_heal;
+cd test_hardlink_self_heal;
+
+for i in `seq 1 5`;
+do
+ mkdir dir.$i;
+ for j in `seq 1 10`;
+ do
+ dd if=/dev/zero of=dir.$i/file.$j bs=1k count=$j > /dev/null 2>&1;
+ done;
+done;
+
+cd ..
+TEST kill_brick $V0 $H0 $B0/brick0
+cd test_hardlink_self_heal;
+
+RET=0
+for i in `seq 1 5`;
+do
+ for j in `seq 1 10`;
+ do
+ ln dir.$i/file.$j dir.$i/link_file.$j > /dev/null 2>&1;
+ RET=$?
+ if [ $RET -ne 0 ]; then
+ break;
+ fi
+ done ;
+ if [ $RET -ne 0 ]; then
+ break;
+ fi
+done;
+
+cd
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+EXPECT "0" echo $RET;
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-873962-spb.t b/tests/bugs/glusterfs/bug-873962-spb.t
new file mode 100644
index 00000000000..db71cc0f6fe
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-873962-spb.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+touch $M0/a
+
+exec 5<$M0/a
+
+kill_brick $V0 $H0 $B0/${V0}0
+echo "hi" > $M0/a
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+kill_brick $V0 $H0 $B0/${V0}1
+echo "bye" > $M0/a
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST ! cat $M0/a #To mark split-brain
+
+TEST ! read -u 5 line
+exec 5<&-
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-873962.t b/tests/bugs/glusterfs/bug-873962.t
new file mode 100755
index 00000000000..7faa9998159
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-873962.t
@@ -0,0 +1,107 @@
+#!/bin/bash
+
+#AFR TEST-IDENTIFIER SPLIT-BRAIN
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+B0_hiphenated=`echo $B0 | tr '/' '-'`
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+
+# If we allow self-heal to happen in the background, we'll get spurious
+# failures - especially at the point labeled "FAIL HERE" but
+# occasionally elsewhere. This behavior is very timing-dependent. It
+# doesn't show up in Jenkins, but it does on JD's and KP's machines, and
+# it got sharply worse because of an unrelated fsync change (6ae6f3d)
+# which changed timing. Putting anything at the FAIL HERE marker tends
+# to make it go away most of the time on affected machines, even if the
+# "anything" is unrelated.
+#
+# What's going on is that the I/O on the first mountpoint is allowed to
+# complete even though self-heal is still in progress and the state on
+# disk does not reflect its result. In fact, the state changes during
+# self-heal create the appearance of split brain when the second I/O
+# comes in, so that fails even though we haven't actually been in split
+# brain since the manual xattr operations. By disallowing background
+# self-heal, we ensure that the second I/O can't happen before self-heal
+# is complete, because it has to follow the first I/O which now has to
+# follow self-heal.
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+
+#Make sure self-heal is not triggered when the bricks are re-started
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+TEST touch $M0/a
+TEST touch $M0/b
+TEST touch $M0/c
+TEST touch $M0/d
+echo "1" > $M0/b
+echo "1" > $M0/d
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "1" > $M0/a
+echo "1" > $M0/c
+TEST setfattr -n trusted.mdata -v abc $M0/b
+TEST setfattr -n trusted.mdata -v abc $M0/d
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "2" > $M0/a
+echo "2" > $M0/c
+TEST setfattr -n trusted.mdata -v def $M0/b
+TEST setfattr -n trusted.mdata -v def $M0/d
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M1 --direct-io-mode=enable
+
+#Files are in split-brain, so open should fail
+TEST ! cat $M0/a;
+TEST ! cat $M1/a;
+TEST ! cat $M0/b;
+TEST ! cat $M1/b;
+
+#Reset split-brain status
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/a;
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/b;
+
+#The operations should do self-heal and give correct output
+EXPECT "2" cat $M0/a;
+# FAIL HERE - see comment about cluster.self-heal-background-count above.
+EXPECT "2" cat $M1/a;
+TEST dd if=$M0/b of=/dev/null bs=1024k
+EXPECT "def" getfattr -n trusted.mdata --only-values $M0/b 2>/dev/null
+EXPECT "def" getfattr -n trusted.mdata --only-values $M1/b 2>/dev/null
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M1 --direct-io-mode=enable
+
+#Files are in split-brain, so open should fail
+TEST ! cat $M0/c
+TEST ! cat $M1/c
+TEST ! cat $M0/d
+TEST ! cat $M1/d
+
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/c
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/d
+
+#The operations should NOT do self-heal but give correct output
+EXPECT "2" cat $M0/c
+EXPECT "2" cat $M1/c
+EXPECT "1" cat $M0/d
+EXPECT "1" cat $M1/d
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-879490.t b/tests/bugs/glusterfs/bug-879490.t
new file mode 100755
index 00000000000..fb8d4263919
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-879490.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+function peer_probe()
+{
+ $CLI peer probe a.b.c.d --xml | xmllint --format - | grep "<opErrstr>"
+}
+
+EXPECT " <opErrstr>Probe returned with Transport endpoint is not connected</opErrstr>" peer_probe
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-879494.t b/tests/bugs/glusterfs/bug-879494.t
new file mode 100755
index 00000000000..12ee466b33a
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-879494.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+function peer_probe()
+{
+ $CLI peer detach a.b.c.d --xml | xmllint --format - | grep "<opErrstr>"
+}
+
+EXPECT " <opErrstr>a.b.c.d is not part of cluster</opErrstr>" peer_probe
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-892730.t b/tests/bugs/glusterfs/bug-892730.t
new file mode 100755
index 00000000000..a76961134c5
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-892730.t
@@ -0,0 +1,77 @@
+#!/bin/bash
+#
+# Bug 892730 - Verify that afr handles EIO errors from the brick properly.
+#
+# The associated bug describes a problem where EIO errors returned from the
+# local filesystem of a brick that is part of a replica volume are exposed to
+# the user. This test simulates such failures and verifies that the volume
+# operates as expected.
+#
+########
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST mkdir -p $B0/test{1,2}
+
+# The graph is a two brick replica with error-gen enabled on the second brick
+# and configured to return EIO lookup errors 100% of the time. This simulates
+# a brick with a crashed or shut down local filesystem. Note that the order in
+# which errors occur is a factor in reproducing the original bug (error-gen
+# must be enabled in the second brick for this test to be effective).
+
+cat > $B0/test.vol <<EOF
+volume test-posix-0
+ type storage/posix
+ option directory $B0/test1
+end-volume
+
+volume test-locks-0
+ type features/locks
+ subvolumes test-posix-0
+end-volume
+
+volume test-posix-1
+ type storage/posix
+ option directory $B0/test2
+end-volume
+
+volume test-error-1
+ type debug/error-gen
+ option failure 100
+ option enable lookup
+ option error-no EIO
+ subvolumes test-posix-1
+end-volume
+
+volume test-locks-1
+ type features/locks
+ subvolumes test-error-1
+end-volume
+
+volume test-replicate-0
+ type cluster/replicate
+ option background-self-heal-count 0
+ subvolumes test-locks-0 test-locks-1
+end-volume
+EOF
+
+TEST glusterd
+
+TEST glusterfs --volfile=$B0/test.vol --attribute-timeout=0 --entry-timeout=0 $M0
+
+# We should be able to create and remove a file without interference from the
+# "broken" brick.
+
+TEST touch $M0/file
+TEST rm $M0/file
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+rm -f $B0/test.vol
+rm -rf $B0/test1 $B0/test2
+
+cleanup;
+
diff --git a/tests/bugs/glusterfs/bug-893338.t b/tests/bugs/glusterfs/bug-893338.t
new file mode 100644
index 00000000000..b915d3e791e
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-893338.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+## Test for symlink success
+TEST touch $M0/reg_file
+TEST ln -s $M0/reg_file $M0/symlink
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-893378.t b/tests/bugs/glusterfs/bug-893378.t
new file mode 100755
index 00000000000..444dafb44bc
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-893378.t
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+BRICK_COUNT=3
+
+function file_has_linkfile()
+{
+ i=0
+ j=0
+ while [ $i -lt $BRICK_COUNT ]
+ do
+ stat=`stat $B0/${V0}$i/$1 2>/dev/null`
+ if [ $? -eq 0 ]
+ then
+ let j++
+ let "BRICK${j}=$i"
+
+ fi
+ let i++
+ done
+ return $j
+}
+
+function get_cached_brick()
+{
+ i=1
+ while [ $i -lt 3 ]
+ do
+ test=`getfattr -n trusted.glusterfs.dht.linkto -e text $B0/${V0}$BRICK$i 2>&1`
+ if [ $? -eq 1 ]
+ then
+ cached=$BRICK"$i"
+ i=$(( $i+3 ))
+ fi
+ let i++
+ done
+
+ return $cached
+}
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+## create a linkfile on subvolume 0
+TEST touch $M0/1
+TEST mv $M0/1 $M0/2
+
+file_has_linkfile 2
+has_link=$?
+if [ $has_link -eq 2 ]
+then
+ get_cached_brick
+ CACHED=$?
+ # Kill a brick process
+ kill_brick $V0 $H0 $B0/${V0}$CACHED
+fi
+
+## trigger a lookup
+ls -l $M0/2 2>/dev/null
+
+## fail dd if file exists.
+
+dd if=/dev/zero of=$M0/2 bs=1 count=1 conv=excl 2>/dev/null
+EXPECT "1" echo $?
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-895235.t b/tests/bugs/glusterfs/bug-895235.t
new file mode 100644
index 00000000000..ac9caae9561
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-895235.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 ensure-durability off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+
+TEST gluster volume profile $V0 start
+TEST dd of=$M0/a if=/dev/zero bs=1024k count=1 oflag=append
+finodelk_max_latency=$($CLI volume profile $V0 info | grep FINODELK | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+
+TEST [ -z $finodelk_max_latency ]
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-896431.t b/tests/bugs/glusterfs/bug-896431.t
new file mode 100755
index 00000000000..61f71141713
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-896431.t
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting cluster.subvols-per-directory as -5
+TEST ! $CLI volume set $V0 cluster.subvols-per-directory -5
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+TEST ! $CLI volume set $V0 subvols-per-directory -5
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Setting cluster.subvols-per-directory as 0
+TEST ! $CLI volume set $V0 cluster.subvols-per-directory 0
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+TEST ! $CLI volume set $V0 subvols-per-directory 0
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Setting cluster.subvols-per-directory as 4 (the total number of bricks)
+TEST ! $CLI volume set $V0 cluster.subvols-per-directory 4
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+TEST ! $CLI volume set $V0 subvols-per-directory 4
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Setting cluster.subvols-per-directory as 2 (the total number of subvolumes)
+TEST $CLI volume set $V0 cluster.subvols-per-directory 2
+EXPECT '2' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Setting cluster.subvols-per-directory as 1
+TEST $CLI volume set $V0 subvols-per-directory 1
+EXPECT '1' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
+
+## Start and create a pure replicate volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 8 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT 'Replicate' volinfo_field $V0 'Type';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Setting cluster.subvols-per-directory as 8 for a replicate volume
+TEST ! $CLI volume set $V0 cluster.subvols-per-directory 8
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+TEST ! $CLI volume set $V0 subvols-per-directory 8
+EXPECT '' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Setting cluster.subvols-per-directory as 1 for a replicate volume
+TEST $CLI volume set $V0 cluster.subvols-per-directory 1
+EXPECT '1' volinfo_field $V0 'cluster.subvols-per-directory';
+TEST $CLI volume set $V0 subvols-per-directory 1
+EXPECT '1' volinfo_field $V0 'cluster.subvols-per-directory';
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-902610.t b/tests/bugs/glusterfs/bug-902610.t
new file mode 100755
index 00000000000..112c947e116
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-902610.t
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Layout-spread set to 3, but subvols up are 2. So layout should split 50-50
+function get_layout()
+{
+ layout1=`getfattr -n trusted.glusterfs.dht -e hex $1 2>&1|grep dht |cut -d = -f2`
+ layout1_s=$(echo $layout1 | cut -c 19-26)
+ layout1_e=$(echo $layout1 | cut -c 27-34)
+ #echo "layout1 from $layout1_s to $layout1_e" > /dev/tty
+ layout2=`getfattr -n trusted.glusterfs.dht -e hex $2 2>&1|grep dht |cut -d = -f2`
+ layout2_s=$(echo $layout2 | cut -c 19-26)
+ layout2_e=$(echo $layout2 | cut -c 27-34)
+ #echo "layout2 from $layout2_s to $layout2_e" > /dev/tty
+
+ if [ x"$layout2_s" = x"00000000" ]; then
+ # Reverse so we only have the real logic in one place.
+ tmp_s=$layout1_s
+ tmp_e=$layout1_e
+ layout1_s=$layout2_s
+ layout1_e=$layout2_e
+ layout2_s=$tmp_s
+ layout2_e=$tmp_e
+ fi
+
+ # Figure out where the join point is.
+ target=$( $PYTHON -c "print('%08x' % (0x$layout1_e + 1))")
+ #echo "target for layout2 = $target" > /dev/tty
+
+ # The second layout should cover everything that the first doesn't.
+ if [ x"$layout2_s" = x"$target" -a x"$layout2_e" = x"ffffffff" ]; then
+ return 0
+ fi
+
+ return 1
+}
+
+BRICK_COUNT=4
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2 $H0:$B0/${V0}3
+## set subvols-per-dir option
+TEST $CLI volume set $V0 subvols-per-directory 3
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0 --entry-timeout=0 --attribute-timeout=0;
+
+TEST ls -l $M0
+
+## kill 2 bricks to bring down available subvol < spread count
+kill_brick $V0 $H0 $B0/${V0}2
+kill_brick $V0 $H0 $B0/${V0}3
+
+mkdir $M0/dir1 2>/dev/null
+
+get_layout $B0/${V0}0/dir1 $B0/${V0}1/dir1
+EXPECT "0" echo $?
+
+cleanup;
diff --git a/tests/bugs/glusterfs/bug-906646.t b/tests/bugs/glusterfs/bug-906646.t
new file mode 100644
index 00000000000..37b8fe5c8eb
--- /dev/null
+++ b/tests/bugs/glusterfs/bug-906646.t
@@ -0,0 +1,97 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+REPLICA=2
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica $REPLICA $H0:$B0/${V0}-00 $H0:$B0/${V0}-01 $H0:$B0/${V0}-10 $H0:$B0/${V0}-11
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+
+## Mount FUSE with caching disabled
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+function xattr_query_check()
+{
+ local path=$1
+ local xa_name=$2
+
+ local ret=$(getfattr -n $xa_name $path 2>&1 | grep -o "$xa_name: No such attribute" | wc -l)
+ echo $ret
+}
+
+function set_xattr()
+{
+ local path=$1
+ local xa_name=$2
+ local xa_val=$3
+
+ setfattr -n $xa_name -v $xa_val $path
+ echo $?
+}
+
+function remove_xattr()
+{
+ local path=$1
+ local xa_name=$2
+
+ setfattr -x $xa_name $path
+ echo $?
+}
+
+f=f00f
+pth=$M0/$f
+
+TEST touch $pth
+
+# fetch backend paths
+backend_paths=`get_backend_paths $pth`
+
+# convert it into and array
+backend_paths_array=($backend_paths)
+
+# setxattr xattr for this file
+EXPECT 0 set_xattr $pth "trusted.name" "test"
+
+# confirm the set on backend
+EXPECT 0 xattr_query_check ${backend_paths_array[0]} "trusted.name"
+EXPECT 0 xattr_query_check ${backend_paths_array[1]} "trusted.name"
+
+brick_path=`echo ${backend_paths_array[0]} | sed -n 's/\(.*\)\/'$f'/\1/p'`
+brick_id=`$CLI volume info $V0 | grep "Brick[[:digit:]]" | grep -n $brick_path | cut -f1 -d:`
+
+# Kill a brick process
+TEST kill_brick $V0 $H0 $brick_path
+
+# remove the xattr from the mount point
+EXPECT 0 remove_xattr $pth "trusted.name"
+
+# we killed ${backend_paths[0]} - so expect the xattr to be there
+# on the backend there
+EXPECT 0 xattr_query_check ${backend_paths_array[0]} "trusted.name"
+EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name"
+
+# restart the brick process
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
+
+TEST $CLI volume heal $V0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+# check backends - xattr should not be present anywhere
+EXPECT 1 xattr_query_check ${backend_paths_array[0]} "trusted.name"
+EXPECT 1 xattr_query_check ${backend_paths_array[1]} "trusted.name"
+
+cleanup;
diff --git a/tests/bugs/glusterfs/getlk_owner.c b/tests/bugs/glusterfs/getlk_owner.c
new file mode 100644
index 00000000000..cbe277318c1
--- /dev/null
+++ b/tests/bugs/glusterfs/getlk_owner.c
@@ -0,0 +1,124 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+
+#define GETLK_OWNER_CHECK(f, cp, label) \
+ do { \
+ switch (f.l_type) { \
+ case F_RDLCK: \
+ case F_WRLCK: \
+ ret = 1; \
+ goto label; \
+ case F_UNLCK: \
+ if (!are_flocks_sane(&f, &cp)) { \
+ ret = 1; \
+ goto label; \
+ } \
+ break; \
+ } \
+ } while (0)
+
+void
+flock_init(struct flock *f, short int type, off_t start, off_t len)
+{
+ f->l_type = type;
+ f->l_start = start;
+ f->l_len = len;
+}
+
+int
+flock_cp(struct flock *dst, struct flock *src)
+{
+ memcpy((void *)dst, (void *)src, sizeof(struct flock));
+}
+
+int
+are_flocks_sane(struct flock *src, struct flock *cpy)
+{
+ return ((src->l_whence == cpy->l_whence) &&
+ (src->l_start == cpy->l_start) && (src->l_len == cpy->l_len));
+}
+
+/*
+ * Test description:
+ * SETLK (0,3), F_WRLCK
+ * SETLK (3,3), F_WRLCK
+ *
+ * the following GETLK requests must return flock struct unmodified
+ * except for l_type to F_UNLCK
+ * GETLK (3,3), F_WRLCK
+ * GETLK (3,3), F_RDLCK
+ *
+ * */
+
+int
+main(int argc, char **argv)
+{
+ int fd = -1;
+ int ret = 1;
+ char *fname = NULL;
+ struct flock f = {
+ 0,
+ };
+ struct flock cp = {
+ 0,
+ };
+
+ if (argc < 2)
+ goto out;
+
+ fname = argv[1];
+ fd = open(fname, O_RDWR);
+ if (fd == -1) {
+ perror("open");
+ goto out;
+ }
+
+ flock_init(&f, F_WRLCK, 0, 3);
+ flock_cp(&cp, &f);
+ ret = fcntl(fd, F_SETLK, &f);
+ if (ret) {
+ perror("fcntl");
+ goto out;
+ }
+ if (!are_flocks_sane(&f, &cp)) {
+ ret = 1;
+ goto out;
+ }
+
+ flock_init(&f, F_WRLCK, 3, 3);
+ flock_cp(&cp, &f);
+ ret = fcntl(fd, F_SETLK, &f);
+ if (ret) {
+ perror("fcntl");
+ goto out;
+ }
+ if (!are_flocks_sane(&f, &cp)) {
+ ret = 1;
+ goto out;
+ }
+
+ flock_init(&f, F_WRLCK, 3, 3);
+ flock_cp(&cp, &f);
+ ret = fcntl(fd, F_GETLK, &f);
+ if (ret) {
+ perror("fcntl");
+ return 1;
+ }
+ GETLK_OWNER_CHECK(f, cp, out);
+
+ flock_init(&f, F_RDLCK, 3, 3);
+ flock_cp(&cp, &f);
+ ret = fcntl(fd, F_GETLK, &f);
+ if (ret) {
+ perror("fcntl");
+ return 1;
+ }
+ GETLK_OWNER_CHECK(f, cp, out);
+
+out:
+ if (fd != -1)
+ close(fd);
+ return ret;
+}
diff --git a/tests/bugs/heal-symlinks.t b/tests/bugs/heal-symlinks.t
new file mode 100644
index 00000000000..ecd2b525be1
--- /dev/null
+++ b/tests/bugs/heal-symlinks.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../afr.rc
+cleanup;
+
+###############################################################################
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+TEST "echo hello_world > FILE"
+TEST ln -s FILE SOFTLINK
+
+# Remove symlink only (not the .glusterfs entry) and trigger named heal.
+TEST rm -f $B0/${V0}2/SOFTLINK
+TEST stat SOFTLINK
+
+# To heal and clear new-entry mark on source bricks.
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT 2 stat -c %h $B0/${V0}2/SOFTLINK
+EXPECT "hello_world" cat $B0/${V0}2/SOFTLINK
+
+cd -
+cleanup
+###############################################################################
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 3 redundancy 1 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+cd $M0
+TEST "echo hello_world > FILE"
+TEST ln -s FILE SOFTLINK
+
+# Remove symlink only (not the .glusterfs entry) and trigger named heal.
+TEST rm -f $B0/${V0}2/SOFTLINK
+TEST stat SOFTLINK
+
+# To heal and clear new-entry mark on source bricks.
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT 2 stat -c %h $B0/${V0}2/SOFTLINK
+TEST kill_brick $V0 $H0 $B0/${V0}0
+cd -
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+cd $M0
+EXPECT "hello_world" cat SOFTLINK
+
+cd -
+cleanup
+###############################################################################
diff --git a/tests/bugs/index/bug-1559004-EMLINK-handling.t b/tests/bugs/index/bug-1559004-EMLINK-handling.t
new file mode 100644
index 00000000000..5596fa56c4c
--- /dev/null
+++ b/tests/bugs/index/bug-1559004-EMLINK-handling.t
@@ -0,0 +1,91 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+function create_fake_links() {
+ local dst="$1"
+ local dir="$2"
+ local end=0
+ local start=0
+ local src
+
+ src="$(ls ${dst}/.glusterfs/indices/${dir}/${dir}-* | head -1)"
+ mkdir -p ${dst}/.glusterfs/dummy/${dir}
+ while ln ${src} ${dst}/.glusterfs/dummy/${dir}/link-${end}; do
+ end="$((${end} + 1))"
+ done
+
+ if [[ ${end} -gt 50 ]]; then
+ start="$((${end} - 50))"
+ fi
+ if [[ ${end} -gt 0 ]]; then
+ end="$((${end} - 1))"
+ fi
+
+ for i in $(seq ${start} ${end}); do
+ rm -f ${dst}/.glusterfs/dummy/${dir}/link-${i}
+ done
+}
+
+function count_fake_links() {
+ local dst="$1"
+ local dir="$2"
+
+ echo "$(find ${dst}/.glusterfs/dummy/${dir}/ -name "link-*" | wc -l)"
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST touch $B0/ext4-1
+TEST touch $B0/ext4-2
+TEST touch $B0/ext4-3
+TEST truncate -s 2GB $B0/ext4-1
+TEST truncate -s 2GB $B0/ext4-2
+TEST truncate -s 2GB $B0/ext4-3
+
+TEST mkfs.ext4 -F $B0/ext4-1
+TEST mkfs.ext4 -F $B0/ext4-2
+TEST mkfs.ext4 -F $B0/ext4-3
+TEST mkdir $B0/ext41
+TEST mkdir $B0/ext42
+TEST mkdir $B0/ext43
+TEST mount -t ext4 -o loop $B0/ext4-1 $B0/ext41
+TEST mount -t ext4 -o loop $B0/ext4-2 $B0/ext42
+TEST mount -t ext4 -o loop $B0/ext4-3 $B0/ext43
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/ext4{1,2,3}
+TEST $CLI volume start $V0
+TEST $CLI volume heal $V0 granular-entry-heal enable
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST kill_brick $V0 $H0 $B0/ext41
+
+# Make sure indices exist and are initialized
+TEST touch $M0/dummy
+
+# Create enough hard links on bricks to make it fail faster. This is much
+# faster than creating ~70000 files on a volume.
+create_fake_links $B0/ext42 xattrop &
+create_fake_links $B0/ext42 entry-changes &
+wait
+count_xattrop="$(count_fake_links $B0/ext42 xattrop)"
+count_entry="$(count_fake_links $B0/ext42 entry-changes)"
+
+TEST mkdir $M0/d{1..10}
+TEST touch $M0/d{1..10}/{1..10}
+
+#On ext4 max number of hardlinks is ~65k, so there should be 2 base index files
+EXPECT "^2$" echo $(ls $B0/ext42/.glusterfs/indices/xattrop | grep xattrop | wc -l)
+EXPECT "^2$" echo $(ls $B0/ext42/.glusterfs/indices/entry-changes | grep entry-changes | wc -l)
+
+#Number of hardlinks: count_xattrop/count_entry for fake links, 101 for files,
+# 10 for dirs and 2 for base-indices and root-dir for xattrop
+EXPECT "$((${count_xattrop} + 114))" echo $(ls -l $B0/ext42/.glusterfs/indices/xattrop | grep xattrop | awk '{sum+=$2} END{print sum}')
+EXPECT "$((${count_entry} + 113))" echo $(ls -l $B0/ext42/.glusterfs/indices/entry-changes | grep entry-changes | awk '{sum+=$2} END{print sum}')
+
+cleanup
diff --git a/tests/bugs/io-cache/bug-858242.c b/tests/bugs/io-cache/bug-858242.c
new file mode 100644
index 00000000000..ac87a15533e
--- /dev/null
+++ b/tests/bugs/io-cache/bug-858242.c
@@ -0,0 +1,84 @@
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+int
+main(int argc, char *argv[])
+{
+ char *filename = NULL, *volname = NULL, *cmd = NULL;
+ char buffer[1024] = {
+ 0,
+ };
+ int fd = -1;
+ int ret = -1;
+ struct stat statbuf = {
+ 0,
+ };
+
+ if (argc != 3) {
+ fprintf(stderr, "usage: %s <file-name> <volname>\n", argv[0]);
+ goto out;
+ }
+
+ filename = argv[1];
+ volname = argv[2];
+
+ fd = open(filename, O_RDWR | O_CREAT, 0);
+ if (fd < 0) {
+ fprintf(stderr, "open (%s) failed (%s)\n", filename, strerror(errno));
+ goto out;
+ }
+
+ ret = write(fd, "test-content", 12);
+ if (ret < 0) {
+ fprintf(stderr, "write failed (%s)", strerror(errno));
+ goto out;
+ }
+
+ ret = fsync(fd);
+ if (ret < 0) {
+ fprintf(stderr, "fsync failed (%s)", strerror(errno));
+ goto out;
+ }
+
+ ret = fstat(fd, &statbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat failed (%s)", strerror(errno));
+ goto out;
+ }
+
+ ret = asprintf(&cmd, "gluster --mode=script volume stop %s force", volname);
+ if (ret < 0) {
+ fprintf(stderr, "cannot construct cli command string (%s)",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = system(cmd);
+ if (ret < 0) {
+ fprintf(stderr, "stopping volume (%s) failed", volname);
+ goto out;
+ }
+
+ sleep(3);
+
+ ret = read(fd, buffer, 1024);
+ if (ret >= 0) {
+ fprintf(stderr,
+ "read should've returned error, "
+ "but is successful\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/tests/bugs/io-cache/bug-858242.t b/tests/bugs/io-cache/bug-858242.t
new file mode 100755
index 00000000000..0c8ffb6ab30
--- /dev/null
+++ b/tests/bugs/io-cache/bug-858242.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 performance.quick-read off
+
+#mount on a random dir
+TEST glusterfs --entry-timeout=3600 --attribute-timeout=3600 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=yes
+
+build_tester $(dirname $0)/bug-858242.c
+
+TEST $(dirname $0)/bug-858242 $M0/testfile $V0
+
+TEST rm -rf $(dirname $0)/858242
+cleanup;
+
diff --git a/tests/bugs/io-cache/bug-read-hang.c b/tests/bugs/io-cache/bug-read-hang.c
new file mode 100644
index 00000000000..e1fae97e7e8
--- /dev/null
+++ b/tests/bugs/io-cache/bug-read-hang.c
@@ -0,0 +1,125 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define NO_INIT 1
+
+int count = 0;
+void
+read_cbk(glfs_fd_t *fd, ssize_t ret, void *data)
+{
+ count++;
+}
+
+glfs_t *
+setup_new_client(char *hostname, char *volname, char *log_file, int flag)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(stderr, "\nglfs_new: returned NULL (%s)\n", strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_file, 7);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ if (flag == NO_INIT)
+ goto out;
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(stderr, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+out:
+ return fs;
+error:
+ return NULL;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+ struct glfs_fd *fd = NULL;
+ char *volname = NULL;
+ char *log_file = NULL;
+ char *hostname = NULL;
+ char *buf = NULL;
+ struct stat stat;
+
+ if (argc != 4) {
+ fprintf(
+ stderr,
+ "Expect following args %s <hostname> <Vol> <log file location>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ log_file = argv[3];
+
+ fs = setup_new_client(hostname, volname, log_file, 0);
+ if (!fs) {
+ fprintf(stderr, "\nsetup_new_client: returned NULL (%s)\n",
+ strerror(errno));
+ goto error;
+ }
+
+ fd = glfs_opendir(fs, "/");
+ if (!fd) {
+ fprintf(stderr, "/: %s\n", strerror(errno));
+ return -1;
+ }
+
+ glfs_readdirplus(fd, &stat);
+
+ fd = glfs_open(fs, "/test", O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto error;
+ }
+
+ buf = (char *)malloc(5);
+
+ ret = glfs_pread(fd, buf, 5, 0, 0, NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Read(%s): %d (%s)\n", "test", ret, strerror(errno));
+ return ret;
+ }
+
+ free(buf);
+ glfs_close(fd);
+
+ ret = glfs_fini(fs);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fini failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ return 0;
+error:
+ return -1;
+}
diff --git a/tests/bugs/io-cache/bug-read-hang.t b/tests/bugs/io-cache/bug-read-hang.t
new file mode 100755
index 00000000000..f8efe281723
--- /dev/null
+++ b/tests/bugs/io-cache/bug-read-hang.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+#. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..2};
+
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+TEST $CLI volume set $V0 performance.cache-invalidation on
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.cache-samba-metadata on
+TEST $CLI volume set $V0 open-behind off
+
+logdir=`gluster --print-logdir`
+
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+echo "Hello" > $M0/test
+
+TEST build_tester $(dirname $0)/bug-read-hang.c -lgfapi
+TEST $(dirname $0)/bug-read-hang $H0 $V0 $logdir/bug-read-hang.log
+
+cleanup_tester $(dirname $0)/bug-read-hang
+
+cleanup;
diff --git a/tests/bugs/io-stats/bug-1598548.t b/tests/bugs/io-stats/bug-1598548.t
new file mode 100755
index 00000000000..19b0c053d08
--- /dev/null
+++ b/tests/bugs/io-stats/bug-1598548.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+checkdumpthread () {
+ local brick_pid=$(get_brick_pid $1 $2 $3)
+ local thread_count=$(gstack $brick_pid | grep -c _ios_dump_thread)
+ echo $thread_count
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume start $V0
+
+TEST $CLI volume profile $V0 start
+EXPECT 0 checkdumpthread $V0 $H0 $B0/${V0}0
+
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 3
+EXPECT 1 checkdumpthread $V0 $H0 $B0/${V0}0
+
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 10
+EXPECT 1 checkdumpthread $V0 $H0 $B0/${V0}0
+
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 0
+EXPECT 0 checkdumpthread $V0 $H0 $B0/${V0}0
+
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 7
+EXPECT 1 checkdumpthread $V0 $H0 $B0/${V0}0
+
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 0
+EXPECT 0 checkdumpthread $V0 $H0 $B0/${V0}0
+
+TEST $CLI volume set $V0 diagnostics.stats-dump-interval 11
+EXPECT 1 checkdumpthread $V0 $H0 $B0/${V0}0
+
+cleanup;
diff --git a/tests/bugs/logging/bug-823081.t b/tests/bugs/logging/bug-823081.t
new file mode 100755
index 00000000000..bd1965d2d49
--- /dev/null
+++ b/tests/bugs/logging/bug-823081.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+cmd_log_history="cmd_history.log"
+V1=patchy2
+
+TEST glusterd
+TEST pidof glusterd
+
+logdir=`gluster --print-logdir`
+function set_tail ()
+{
+ vol=$1;
+ tail_success="volume create $vol $H0:$B0/${vol}1 $H0:$B0/${vol}2 : SUCCESS"
+ tail_failure="volume create $vol $H0:$B0/${vol}1 $H0:$B0/${vol}2 : FAILED : Volume $vol already exists"
+ tail_success_force="volume create $vol $H0:$B0/${vol}1 $H0:$B0/${vol}2 force : SUCCESS"
+ tail_failure_force="volume create $vol $H0:$B0/${vol}1 $H0:$B0/${vol}2 force : FAILED : Volume $vol already exists"
+}
+
+set_tail $V0;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+tail=`tail -n 1 $logdir/$cmd_log_history | cut -d " " -f 6-`
+TEST [[ \"$tail\" == \"$tail_success\" ]]
+
+TEST ! $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+tail=`tail -n 1 $logdir/$cmd_log_history | cut -d " " -f 6-`
+TEST [[ \"$tail\" == \"$tail_failure\" ]]
+
+set_tail $V1;
+TEST gluster volume create $V1 $H0:$B0/${V1}{1,2} force;
+tail=`tail -n 1 $logdir/$cmd_log_history | cut -d " " -f 6-`
+TEST [[ \"$tail\" == \"$tail_success_force\" ]]
+
+TEST ! gluster volume create $V1 $H0:$B0/${V1}{1,2} force;
+tail=`tail -n 1 $logdir/$cmd_log_history | cut -d " " -f 6-`
+TEST [[ \"$tail\" == \"$tail_failure_force\" ]]
+
+cleanup;
diff --git a/tests/bugs/md-cache/afr-stale-read.t b/tests/bugs/md-cache/afr-stale-read.t
new file mode 100755
index 00000000000..7cee5afe27e
--- /dev/null
+++ b/tests/bugs/md-cache/afr-stale-read.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+#. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..2};
+
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+TEST $CLI volume set $V0 performance.cache-invalidation on
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.cache-samba-metadata on
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 read-subvolume $V0-client-0
+TEST $CLI volume set $V0 performance.quick-read off
+
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+#Write some data from M0 and read it from M1,
+#so that M1 selects a read subvol, and caches the lookup
+TEST `echo "one" > $M0/file1`
+EXPECT "one" cat $M1/file1
+
+#Fail few writes from M0 on brick-0, as a result of this failure
+#upcall in brick-0 will invalidate the read subvolume of M1.
+TEST chattr +i $B0/${V0}1/file1
+TEST `echo "two" > $M0/file1`
+TEST `echo "three" > $M0/file1`
+TEST `echo "four" > $M0/file1`
+TEST `echo "five" > $M0/file1`
+
+EXPECT_WITHIN $MDC_TIMEOUT "five" cat $M1/file1
+TEST chattr -i $B0/${V0}1/file1
+cleanup;
diff --git a/tests/bugs/md-cache/bug-1211863.t b/tests/bugs/md-cache/bug-1211863.t
new file mode 100755
index 00000000000..ba9bde9fee8
--- /dev/null
+++ b/tests/bugs/md-cache/bug-1211863.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## 1. Start glusterd
+TEST glusterd;
+
+## 2. Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+## 3. Start the volume
+TEST $CLI volume start $V0
+
+## 4. Enable the upcall xlator, and increase the md-cache timeout to max
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.xattr-cache-list "user.*"
+
+## 6. Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+## 8. Create a file
+TEST touch $M0/file1
+
+## 9. Setxattr from mount-0
+TEST "setfattr -n user.DOSATTRIB -v "abc" $M0/file1"
+## 10. Getxattr from mount-1, this should return the correct value as it is a fresh getxattr
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc"
+
+## 11. Now modify the same xattr from mount-0 again
+TEST "setfattr -n user.DOSATTRIB -v "xyz" $M0/file1"
+## 12. Since the xattr is already cached in mount-1 it returns the old xattr
+ #value, until the timeout (600)
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc"
+
+## 13. Unmount to clean all the cache
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+TEST $CLI volume set $V0 performance.cache-invalidation on
+
+## 19. Restart the volume to restart the bick process
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+
+## 21. Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+## 23. Repeat the tests 11-14, but this time since cache invalidation is on,
+ #the getxattr will reflect the new value
+TEST "setfattr -n user.DOSATTRIB -v "abc" $M0/file1"
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc"
+TEST "setfattr -n user.DOSATTRIB -v "xyz" $M0/file1"
+sleep 2; #There can be a very very small window where the next getxattr
+ #reaches md-cache, before the cache-invalidation caused by previous
+ #setxattr, reaches md-cache. Hence sleeping for 2 sec.
+ #Also it should not be > 600.
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q xyz"
+
+TEST $CLI volume set $V0 cache-samba-metadata off
+EXPECT 'off' volinfo_field $V0 'performance.cache-samba-metadata'
+
+cleanup;
diff --git a/tests/bugs/md-cache/bug-1211863_unlink.t b/tests/bugs/md-cache/bug-1211863_unlink.t
new file mode 100755
index 00000000000..34392ed919f
--- /dev/null
+++ b/tests/bugs/md-cache/bug-1211863_unlink.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+## Start the volume
+TEST $CLI volume start $V0
+
+## Enable the upcall xlator, and increase the md-cache timeout to max
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+TEST $CLI volume set $V0 performance.cache-invalidation on
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.cache-samba-metadata on
+
+## Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+## Create files and directories from M0
+TEST mkdir $M0/dir1
+TEST touch $M0/dir1/file{1..5}
+
+## Lookup few files from M1, so that md-cache cahces
+TEST ls -l $M1/dir1/file2
+TEST ls -l $M1/dir1/file3
+
+## Remove the looked up file from M0
+TEST rm $M0/dir1/file2
+TEST mv $M0/dir1/file3 $M0/dir1/file6
+
+## Check if the files are not visible from both M0 and M1
+EXPECT_WITHIN $MDC_TIMEOUT "N" path_exists $M0/dir1/file2
+EXPECT_WITHIN $MDC_TIMEOUT "N" path_exists $M0/dir1/file3
+EXPECT_WITHIN $MDC_TIMEOUT "Y" path_exists $M0/dir1/file6
+
+EXPECT_WITHIN $MDC_TIMEOUT "N" path_exists $M1/dir1/file2
+EXPECT_WITHIN $MDC_TIMEOUT "N" path_exists $M1/dir1/file3
+EXPECT_WITHIN $MDC_TIMEOUT "Y" path_exists $M1/dir1/file6
+
+cleanup;
diff --git a/tests/bugs/md-cache/bug-1476324.t b/tests/bugs/md-cache/bug-1476324.t
new file mode 100644
index 00000000000..c34f412a15e
--- /dev/null
+++ b/tests/bugs/md-cache/bug-1476324.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.cache-samba-metadata on
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST touch $M0/file1
+
+TEST "setfattr -n user.DOSATTRIB -v 0sAAOW $M0/file1"
+TEST "getfattr -n user.DOSATTRIB $M0/file1 -e base64 | grep -q 0sAAOW"
+
+TEST "setfattr -n user.DOSATTRIB -v 0x00ff $M0/file1"
+TEST "getfattr -n user.DOSATTRIB $M0/file1 -e hex | grep -q 0x00ff"
+
+cleanup;
diff --git a/tests/bugs/md-cache/bug-1632503.t b/tests/bugs/md-cache/bug-1632503.t
new file mode 100755
index 00000000000..aeb57f65639
--- /dev/null
+++ b/tests/bugs/md-cache/bug-1632503.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TESTS_EXPECTED_IN_LOOP=5
+
+TEST glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.md-cache-statfs on
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+for i in $(seq 1 5); do
+ TEST_IN_LOOP df $M0;
+done
+
+cleanup;
diff --git a/tests/bugs/md-cache/bug-1726205.t b/tests/bugs/md-cache/bug-1726205.t
new file mode 100644
index 00000000000..795130e9bd8
--- /dev/null
+++ b/tests/bugs/md-cache/bug-1726205.t
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 group samba
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST touch $M0/file
+TEST "setfattr -n "user.DosStream.Zone.Identifier:\$DATA" -v '\0' $M0/file"
+TEST "getfattr -n "user.DosStream.Zone.Identifier:\$DATA" -e hex $M0/file | grep -q 0x00"
+
+cleanup;
diff --git a/tests/bugs/md-cache/setxattr-prepoststat.t b/tests/bugs/md-cache/setxattr-prepoststat.t
new file mode 100755
index 00000000000..01fa768299c
--- /dev/null
+++ b/tests/bugs/md-cache/setxattr-prepoststat.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## 1. Start glusterd
+TEST glusterd;
+
+## 2. Lets create volume
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+
+TEST $CLI volume set $V0 group metadata-cache
+TEST $CLI volume set $V0 performance.xattr-cache-list "user.*"
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+TEST touch $M0/file1
+TEST `echo "abakjshdjahskjdhakjhdskjac" >> $M0/file1`
+size=`stat -c '%s' $M0/file1`
+
+## Setxattr from mount-0
+TEST "setfattr -n user.DOSATTRIB -v "abc" $M0/file1"
+EXPECT $size stat -c '%s' $M0/file1
+
+## Getxattr from mount-1, this should return the correct value
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc"
+
+TEST "setfattr -x user.DOSATTRIB $M1/file1"
+EXPECT $size stat -c '%s' $M1/file1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+cleanup;
diff --git a/tests/bugs/nfs/bug-1053579.t b/tests/bugs/nfs/bug-1053579.t
new file mode 100755
index 00000000000..2f53172e24c
--- /dev/null
+++ b/tests/bugs/nfs/bug-1053579.t
@@ -0,0 +1,114 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup
+
+# prepare the users and groups
+NEW_USER=bug1053579
+NEW_UID=1053579
+NEW_GID=1053579
+LAST_GID=1053779
+NEW_GIDS=${NEW_GID}
+
+# OS-specific overrides
+case $OSTYPE in
+NetBSD|Darwin)
+ # only NGROUPS_MAX=16 secondary groups are supported
+ LAST_GID=1053593
+ ;;
+FreeBSD)
+ # NGROUPS_MAX=1023 (FreeBSD>=8.0), we can afford 200 groups
+ ;;
+Linux)
+ # NGROUPS_MAX=65536, we can afford 200 groups
+ ;;
+*)
+ ;;
+esac
+
+# create a user that belongs to many groups
+for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID})
+do
+ groupadd -o -g ${GID} ${NEW_USER}-${GID}
+ NEW_GIDS="${NEW_GIDS},${NEW_USER}-${GID}"
+done
+TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -G ${NEW_USER}-${NEW_GIDS} ${NEW_USER}
+
+# preparation done, start the tests
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume set $V0 nfs.server-aux-gids on
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+# mount the volume
+TEST mount_nfs $H0:/$V0 $N0 nolock
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+# the actual test, this used to crash
+su -m ${NEW_USER} -c "stat $N0/. > /dev/null"
+TEST [ $? -eq 0 ]
+
+# create a file that only a user in a high-group can access
+echo 'Hello World!' > $N0/README
+chgrp ${LAST_GID} $N0/README
+chmod 0640 $N0/README
+
+#su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null"
+su -m ${NEW_USER} -c "cat $N0/README"
+ret=$?
+
+case $OSTYPE in
+Linux) # Linux NFS fails with big GID
+ if [ $ret -ne 0 ] ; then
+ res="Y"
+ else
+ res="N"
+ fi
+ ;;
+*) # Other systems should cope better
+ if [ $ret -eq 0 ] ; then
+ res="Y"
+ else
+ res="N"
+ fi
+ ;;
+esac
+TEST [ "x$res" = "xY" ]
+
+# This passes only on build.gluster.org, not reproducible on other machines?!
+#su -m ${NEW_USER} -c "cat $M0/README 2>&1 > /dev/null"
+#TEST [ $? -ne 0 ]
+
+# enable server.manage-gids and things should work
+TEST $CLI volume set $V0 server.manage-gids on
+
+su -m ${NEW_USER} -c "cat $N0/README 2>&1 > /dev/null"
+TEST [ $? -eq 0 ]
+su -m ${NEW_USER} -c "cat $M0/README 2>&1 > /dev/null"
+TEST [ $? -eq 0 ]
+
+# cleanup
+userdel --force ${NEW_USER}
+for GID in $(seq -f '%6.0f' ${NEW_GID} ${LAST_GID})
+do
+ groupdel ${NEW_USER}-${GID}
+done
+
+rm -f $N0/README
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t b/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t
new file mode 100644
index 00000000000..c360db4c91c
--- /dev/null
+++ b/tests/bugs/nfs/bug-1143880-fix-gNFSd-auth-crash.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST mount_nfs $H0:/$V0 $N0 nolock
+TEST mkdir -p $N0/foo
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+TEST mount_nfs $H0:/$V0/foo $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+cleanup
diff --git a/tests/bugs/nfs/bug-1157223-symlink-mounting.t b/tests/bugs/nfs/bug-1157223-symlink-mounting.t
new file mode 100644
index 00000000000..dea609ed193
--- /dev/null
+++ b/tests/bugs/nfs/bug-1157223-symlink-mounting.t
@@ -0,0 +1,126 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume info;
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0;
+
+## Wait for volume to register with rpc.mountd
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+## Mount NFS
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+mkdir $N0/dir1;
+mkdir $N0/dir2;
+pushd $N0/ ;
+
+##link created using relative path
+ln -s dir1 symlink1;
+
+##relative path contains ".."
+ln -s ../dir1 dir2/symlink2;
+
+##link created using absolute path
+ln -s $N0/dir1 symlink3;
+
+##link pointing to another symlinks
+ln -s symlink1 symlink4
+ln -s symlink3 symlink5
+
+##dead links
+ln -s does/not/exist symlink6
+
+##link which contains ".." points out of glusterfs
+ln -s ../../ symlink7
+
+##links pointing to unauthorized area
+ln -s .glusterfs symlink8
+
+popd ;
+
+##Umount the volume
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount and umount NFS via directory
+TEST mount_nfs $H0:/$V0/dir1 $N0 nolock;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount and umount NFS via symlink1
+TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount and umount NFS via symlink2
+TEST mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount NFS via symlink3 should fail
+TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock;
+
+## Mount and umount NFS via symlink4
+TEST mount_nfs $H0:/$V0/symlink4 $N0 nolock;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount NFS via symlink5 should fail
+TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock;
+
+## Mount NFS via symlink6 should fail
+TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock;
+
+## Mount NFS via symlink7 should fail
+TEST ! mount_nfs $H0:/$V0/symlink7 $N0 nolock;
+
+## Mount NFS via symlink8 should fail
+TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock;
+
+##Similar check for udp mount
+$CLI volume stop $V0
+TEST $CLI volume set $V0 nfs.mount-udp on
+$CLI volume start $V0
+
+## Wait for volume to register with rpc.mountd
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+## Mount and umount NFS via directory
+TEST mount_nfs $H0:/$V0/dir1 $N0 nolock,mountproto=udp,proto=tcp;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount and umount NFS via symlink1
+TEST mount_nfs $H0:/$V0/symlink1 $N0 nolock,mountproto=udp,proto=tcp;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount and umount NFS via symlink2
+TEST mount_nfs $H0:/$V0/dir2/symlink2 $N0 nolock,mountproto=udp,proto=tcp;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount NFS via symlink3 should fail
+TEST ! mount_nfs $H0:/$V0/symlink3 $N0 nolock,mountproto=udp,proto=tcp;
+
+## Mount and umount NFS via symlink4
+TEST mount_nfs $H0:/$V0/symlink4 $N0 nolock,mountproto=udp,proto=tcp;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+## Mount NFS via symlink5 should fail
+TEST ! mount_nfs $H0:/$V0/symlink5 $N0 nolock,mountproto=udp,proto=tcp;
+
+## Mount NFS via symlink6 should fail
+TEST ! mount_nfs $H0:/$V0/symlink6 $N0 nolock,mountproto=udp,proto=tcp;
+
+##symlink7 is not check here, because in udp mount ../../ resolves into root '/'
+
+## Mount NFS via symlink8 should fail
+TEST ! mount_nfs $H0:/$V0/symlink8 $N0 nolock,mountproto=udp,proto=tcp;
+
+rm -rf $H0:$B0/
+cleanup;
diff --git a/tests/bugs/nfs/bug-1161092-nfs-acls.t b/tests/bugs/nfs/bug-1161092-nfs-acls.t
new file mode 100644
index 00000000000..45a22e79336
--- /dev/null
+++ b/tests/bugs/nfs/bug-1161092-nfs-acls.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 nfs.disable false
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+TEST mount_nfs $H0:/$V0 $N0
+
+TEST touch $N0/file1
+TEST chmod 700 $N0/file1
+TEST getfacl $N0/file1
+
+TEST $CLI volume set $V0 root-squash on
+TEST getfacl $N0/file1
+
+TEST umount_nfs $H0:/$V0 $N0
+TEST mount_nfs $H0:/$V0 $N0
+TEST getfacl $N0/file1
+
+## Before killing daemon to avoid deadlocks
+umount_nfs $N0
+
+cleanup;
+
diff --git a/tests/bugs/nfs/bug-1166862.t b/tests/bugs/nfs/bug-1166862.t
new file mode 100755
index 00000000000..c4f51a2d446
--- /dev/null
+++ b/tests/bugs/nfs/bug-1166862.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+#
+# When nfs.mount-rmtab is disabled, it should not get updated.
+#
+# Based on: bug-904065.t
+#
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+# count the lines of a file, return 0 if the file does not exist
+function count_lines()
+{
+ if [ -n "$1" ]
+ then
+ $@ 2>/dev/null | wc -l
+ else
+ echo 0
+ fi
+}
+
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1
+EXPECT 'Created' volinfo_field $V0 'Status'
+TEST $CLI volume set $V0 nfs.disable false
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# glusterfs/nfs needs some time to start up in the background
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+# disable the rmtab by settting it to the magic "/-" value
+TEST $CLI volume set $V0 nfs.mount-rmtab /-
+
+# before mounting the rmtab should be empty
+EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+
+TEST mount_nfs $H0:/$V0 $N0 nolock
+EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+
+# showmount should list one client
+EXPECT '1' count_lines showmount --no-headers $H0
+
+# unmount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# after resetting the option, the rmtab should get updated again
+TEST $CLI volume reset $V0 nfs.mount-rmtab
+
+# before mounting the rmtab should be empty
+EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+
+TEST mount_nfs $H0:/$V0 $N0 nolock
+EXPECT '2' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+
+# removing a mount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+EXPECT '0' count_lines cat $GLUSTERD_WORKDIR/nfs/rmtab
+
+cleanup
diff --git a/tests/bugs/nfs/bug-1210338.c b/tests/bugs/nfs/bug-1210338.c
new file mode 100644
index 00000000000..d4099244176
--- /dev/null
+++ b/tests/bugs/nfs/bug-1210338.c
@@ -0,0 +1,31 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+int
+main(int argc, char *argv[])
+{
+ int ret = -1;
+ int fd = -1;
+
+ fd = open(argv[1], O_CREAT | O_EXCL, 0644);
+
+ if (fd == -1) {
+ fprintf(stderr, "creation of the file %s failed (%s)\n", argv[1],
+ strerror(errno));
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (fd > 0)
+ close(fd);
+
+ return ret;
+}
diff --git a/tests/bugs/nfs/bug-1210338.t b/tests/bugs/nfs/bug-1210338.t
new file mode 100644
index 00000000000..b5c9245affd
--- /dev/null
+++ b/tests/bugs/nfs/bug-1210338.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+NFS_SOURCE=$(dirname $0)/bug-1210338.c
+NFS_EXEC=$(dirname $0)/excl_create
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+
+build_tester $NFS_SOURCE -o $NFS_EXEC
+TEST [ -e $NFS_EXEC ]
+
+TEST $NFS_EXEC $N0/my_file
+
+rm -f $NFS_EXEC;
+
+cleanup
diff --git a/tests/bugs/nfs/bug-1302948.t b/tests/bugs/nfs/bug-1302948.t
new file mode 100755
index 00000000000..a2fb0e68ff0
--- /dev/null
+++ b/tests/bugs/nfs/bug-1302948.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+# TEST the nfs.rdirplus option
+. $(dirname $0)/../../include.rc
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 nfs.rdirplus off
+TEST $CLI volume set $V0 nfs.rdirplus on
+cleanup
diff --git a/tests/bugs/nfs/bug-847622.t b/tests/bugs/nfs/bug-847622.t
new file mode 100755
index 00000000000..5ccee722ed9
--- /dev/null
+++ b/tests/bugs/nfs/bug-847622.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+case $OSTYPE in
+NetBSD)
+ echo "Skip test on ACL which are not available on NetBSD" >&2
+ SKIP_TESTS
+ exit 0
+ ;;
+*)
+ ;;
+esac
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/brick0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+cd $N0
+
+# simple getfacl setfacl commands
+TEST touch testfile
+TEST setfacl -m u:14:r testfile
+TEST getfacl testfile
+
+cd
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+cleanup
+
diff --git a/tests/bugs/nfs/bug-877885.t b/tests/bugs/nfs/bug-877885.t
new file mode 100755
index 00000000000..dca315a3d01
--- /dev/null
+++ b/tests/bugs/nfs/bug-877885.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick0 $H0:$B0/brick1
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 \
+$M0;
+
+TEST touch $M0/file
+TEST mkdir $M0/dir
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+cd $N0
+
+rm -rf * &
+
+TEST mount_nfs $H0:/$V0 $N1 retry=0,nolock;
+
+cd;
+
+kill %1;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N1
+
+cleanup
diff --git a/tests/bugs/nfs/bug-904065.t b/tests/bugs/nfs/bug-904065.t
new file mode 100755
index 00000000000..0eba86e7ee8
--- /dev/null
+++ b/tests/bugs/nfs/bug-904065.t
@@ -0,0 +1,100 @@
+#!/bin/bash
+#
+# This test does not use 'showmount' from the nfs-utils package, it would
+# require setting up a portmapper (either rpcbind or portmap, depending on the
+# Linux distribution used for testing). The persistancy of the rmtab should not
+# affect the current showmount outputs, so existing regression tests should be
+# sufficient.
+#
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+# count the lines of a file, return 0 if the file does not exist
+function count_lines()
+{
+ if [ -e "$1" ]
+ then
+ wc -l < $1
+ else
+ echo 0
+ fi
+}
+
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1
+EXPECT 'Created' volinfo_field $V0 'Status'
+TEST $CLI volume set $V0 nfs.disable false
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# glusterfs/nfs needs some time to start up in the background
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+# before mounting the rmtab should be empty
+EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+
+TEST mount_nfs $H0:/$V0 $N0 nolock
+# the output would looks similar to:
+#
+# hostname-0=172.31.122.104
+# mountpoint-0=/ufo
+#
+EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+
+# duplicate mounts should not be recorded (client could have crashed)
+TEST mount_nfs $H0:/$V0 $N1 nolock
+EXPECT '2' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+
+# removing a mount should (even if there are two) should remove the entry
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N1
+EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+
+# unmounting the other mount should work flawlessly
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+EXPECT '0' count_lines $GLUSTERD_WORKDIR/nfs/rmtab
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $M0
+
+# we'll create a fake rmtab here, similar to how an other storage server would do
+# using an invalid IP address to prevent (unlikely) collisions on the test-machine
+cat << EOF > $M0/rmtab
+hostname-0=127.0.0.256
+mountpoint-0=/ufo
+EOF
+EXPECT '2' count_lines $M0/rmtab
+
+# reconfigure merges the rmtab with the one on the volume
+TEST gluster volume set $V0 nfs.mount-rmtab $M0/rmtab
+
+# glusterfs/nfs needs some time to restart
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+# Apparently "is_nfs_export_available" might return even if the export is
+# not, in fact, available. (eyeroll) Give it a bit of extra time.
+#
+# TBD: fix the broken shell function instead of working around it here
+sleep 5
+
+# a new mount should be added to the rmtab, not overwrite exiting ones
+TEST mount_nfs $H0:/$V0 $N0 nolock
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT '4' count_lines $M0/rmtab
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+EXPECT '2' count_lines $M0/rmtab
+
+# TODO: nfs/reconfigure() is never called and is therefor disabled. When the
+# NFS-server supports reloading and does not get restarted anymore, we should
+# add a test that includes the merging of entries in the old rmtab with the new
+# rmtab.
+
+cleanup
diff --git a/tests/bugs/nfs/bug-915280.t b/tests/bugs/nfs/bug-915280.t
new file mode 100755
index 00000000000..bd279157c25
--- /dev/null
+++ b/tests/bugs/nfs/bug-915280.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 nfs.disable false
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+MOUNTDIR=$N0;
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock,timeo=30,retrans=1
+TEST touch $N0/testfile
+
+TEST $CLI volume set $V0 debug.error-gen client
+TEST $CLI volume set $V0 debug.error-fops stat
+TEST $CLI volume set $V0 debug.error-failure 100
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+pid_file=$(read_nfs_pidfile);
+
+getfacl $N0/testfile 2>/dev/null
+
+nfs_pid=$(get_nfs_pid);
+if [ ! $nfs_pid ]
+then
+ nfs_pid=0;
+fi
+
+TEST [ $nfs_pid -eq $pid_file ]
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR
+
+cleanup;
diff --git a/tests/bugs/nfs/bug-970070.t b/tests/bugs/nfs/bug-970070.t
new file mode 100755
index 00000000000..61be4844e51
--- /dev/null
+++ b/tests/bugs/nfs/bug-970070.t
@@ -0,0 +1,13 @@
+#!/bin/bash
+# TEST the nfs.acl option
+. $(dirname $0)/../../include.rc
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 nfs.acl off
+TEST $CLI volume set $V0 nfs.acl on
+cleanup
diff --git a/tests/bugs/nfs/bug-974972.t b/tests/bugs/nfs/bug-974972.t
new file mode 100755
index 00000000000..975c46f85a4
--- /dev/null
+++ b/tests/bugs/nfs/bug-974972.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+#This script checks that nfs mount does not fail lookup on files with split-brain
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0
+TEST touch $N0/1
+TEST kill_brick ${V0} ${H0} ${B0}/${V0}1
+echo abc > $N0/1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1
+
+TEST kill_brick ${V0} ${H0} ${B0}/${V0}0
+echo def > $N0/1
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" nfs_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_nfs $V0 1
+
+#Lookup should not fail
+TEST ls $N0/1
+TEST ! cat $N0/1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+cleanup
diff --git a/tests/bugs/nfs/showmount-many-clients.t b/tests/bugs/nfs/showmount-many-clients.t
new file mode 100644
index 00000000000..c6c9c35d60a
--- /dev/null
+++ b/tests/bugs/nfs/showmount-many-clients.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+#
+# The nfs.rpc-auth-allow volume option is used to generate the list of clients
+# that are displayed as able to mount the export. The "group" in the export
+# should be a list of all clients, identified by "name". In previous versions,
+# the "name" was the copied string from nfs.rpc-auth-allow. This is not
+# correct, as the volume option should be parsed and split into different
+# groups.
+#
+# When the single string is passed, this testcase fails when the
+# nfs.rpc-auth-allow volume option is longer than 256 characters. By splitting
+# the groups into their own structures, this testcase passes.
+#
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1
+EXPECT 'Created' volinfo_field $V0 'Status'
+TEST $CLI volume set $V0 nfs.disable false
+
+CLIENTS=$(echo 127.0.0.{1..128} | tr ' ' ,)
+TEST $CLI volume set $V0 nfs.rpc-auth-allow ${CLIENTS}
+TEST $CLI volume set $V0 nfs.rpc-auth-reject all
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# glusterfs/nfs needs some time to start up in the background
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+# showmount should not timeout (no reply is sent on error)
+TEST showmount -e $H0
+
+cleanup
diff --git a/tests/bugs/nfs/socket-as-fifo.py b/tests/bugs/nfs/socket-as-fifo.py
new file mode 100755
index 00000000000..eb507e1d30b
--- /dev/null
+++ b/tests/bugs/nfs/socket-as-fifo.py
@@ -0,0 +1,33 @@
+#
+# Create a unix domain socket and test if it is a socket (and not a fifo/pipe).
+#
+# Author: Niels de Vos <ndevos@redhat.com>
+#
+
+from __future__ import print_function
+import os
+import stat
+import sys
+import socket
+
+ret = 1
+
+if len(sys.argv) != 2:
+ print('Usage: %s <socket>' % (sys.argv[0]))
+ sys.exit(ret)
+
+path = sys.argv[1]
+
+sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+sock.bind(path)
+
+stbuf = os.stat(path)
+mode = stbuf.st_mode
+
+if stat.S_ISSOCK(mode):
+ ret = 0
+
+sock.close()
+os.unlink(path)
+
+sys.exit(ret)
diff --git a/tests/bugs/nfs/socket-as-fifo.t b/tests/bugs/nfs/socket-as-fifo.t
new file mode 100644
index 00000000000..d9b9e959ce3
--- /dev/null
+++ b/tests/bugs/nfs/socket-as-fifo.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+
+# this is the actual test
+TEST $PYTHON $(dirname $0)/socket-as-fifo.py $N0/not-a-fifo.socket
+
+TEST umount_nfs $N0
+
+cleanup
diff --git a/tests/bugs/nfs/subdir-trailing-slash.t b/tests/bugs/nfs/subdir-trailing-slash.t
new file mode 100644
index 00000000000..6a114877ac7
--- /dev/null
+++ b/tests/bugs/nfs/subdir-trailing-slash.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+#
+# Verify that mounting a subdir over NFS works, even with a trailing /
+#
+# For example:
+# mount -t nfs server.example.com:/volume/subdir/
+#
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable false
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+TEST mount_nfs $H0:/$V0 $N0 nolock
+TEST mkdir -p $N0/subdir
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST mount_nfs $H0:/$V0/subdir/ $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+cleanup
diff --git a/tests/bugs/nfs/zero-atime.t b/tests/bugs/nfs/zero-atime.t
new file mode 100755
index 00000000000..2a940091ad9
--- /dev/null
+++ b/tests/bugs/nfs/zero-atime.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+#
+# posix_do_utimes() sets atime and mtime to the values in the passed IATT. If
+# not set, these values are 0 and cause a atime/mtime set to the Epoch.
+#
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+
+# create a file for testing
+TEST dd if=/dev/urandom of=$M0/small count=1 bs=1024k
+
+# timezone in UTC results in atime=0 if not set correctly
+TEST TZ=UTC dd if=/dev/urandom of=$M0/small bs=64k count=1 conv=nocreat
+TEST [ "$(stat --format=%X $M0/small)" != "0" ]
+
+TEST rm $M0/small
+
+cleanup
diff --git a/tests/bugs/nl-cache/bug-1451588.t b/tests/bugs/nl-cache/bug-1451588.t
new file mode 100755
index 00000000000..cf07d04c5cc
--- /dev/null
+++ b/tests/bugs/nl-cache/bug-1451588.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0..4}
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume set $V0 performance.nl-cache on
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs --volfile-id=/$V0 --aux-gfid-mount --volfile-server=$H0 $M0
+
+TEST ! stat $M0/.gfid/1901b1a0-c612-46ee-b45a-e8345d5a0b48
+
+cleanup;
+G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/bugs/posix/bug-1034716.t b/tests/bugs/posix/bug-1034716.t
new file mode 100644
index 00000000000..d36f8b598f4
--- /dev/null
+++ b/tests/bugs/posix/bug-1034716.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+#Create a file and perform fop on a DIR
+TEST touch $M0/foo
+
+function xattr_query_check() {
+ local path=$1
+
+ local ret=`getfattr -m . -d $path 2>&1 | grep -c 'trusted.glusterfs'`
+ echo $ret
+}
+
+function set_xattr() {
+ local path=$1
+ local xa_name=$2
+ local xa_val=$3
+
+ setfattr -n $xa_name -v $xa_val $path
+ echo $?
+}
+
+function remove_xattr() {
+ local path=$1
+ local xa_name=$2
+
+ setfattr -x $xa_name $path
+ echo $?
+}
+
+EXPECT 0 xattr_query_check $M0/
+EXPECT 0 xattr_query_check $M0/foo
+
+EXPECT 1 set_xattr $M0/ 'trusted.glusterfs.volume-id' 'foo'
+EXPECT 1 remove_xattr $M0/ 'trusted.glusterfs.volume-id'
+
+
+## Finish up
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/posix/bug-1040275-brick-uid-reset-on-volume-restart.t b/tests/bugs/posix/bug-1040275-brick-uid-reset-on-volume-restart.t
new file mode 100755
index 00000000000..3839c6e3380
--- /dev/null
+++ b/tests/bugs/posix/bug-1040275-brick-uid-reset-on-volume-restart.t
@@ -0,0 +1,60 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function get_uid() {
+ stat -c '%u' $1;
+}
+
+function get_gid() {
+ stat -c '%g' $1;
+}
+
+function check_stat() {
+ stat $1
+ echo $?
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+EXPECT 0 get_uid $M0;
+EXPECT 0 get_gid $M0;
+
+TEST chown 100:101 $M0;
+
+EXPECT 100 get_uid $M0;
+EXPECT 101 get_gid $M0;
+
+TEST $CLI volume stop $V0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" online_brick_count
+
+TEST $CLI volume start $V0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "6" online_brick_count
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 4
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 5
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" check_stat $M0
+EXPECT 100 get_uid $M0;
+EXPECT 101 get_gid $M0;
+
+cleanup;
diff --git a/tests/bugs/posix/bug-1113960.t b/tests/bugs/posix/bug-1113960.t
new file mode 100755
index 00000000000..ee42de2a092
--- /dev/null
+++ b/tests/bugs/posix/bug-1113960.t
@@ -0,0 +1,98 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+NUM_DIRS=50
+NUM_FILES=10
+
+create_dirs () {
+ for (( i=1; i<=$NUM_DIRS; i+=1));do
+ mkdir $1/olddir$i
+ for (( j=0; j<$NUM_FILES; j+=1));do
+ echo "This is file $j in dir $i" > $1/olddir$i/file$j;
+ done
+ done
+ echo "0" > $M0/status
+}
+
+move_dirs () {
+ old_path="$1"
+ new_path="$1"
+
+ #Create a deep directory
+ for (( i=$NUM_DIRS; i>=2; i-=1));do
+ mv $1/olddir$i $1/olddir`expr $i - 1` > /dev/null 2>&1;
+ done
+
+ #Start renaming files and dirs so the paths change for the
+ #posix_handle_path calculations
+
+ for (( i=1; i<=$NUM_DIRS; i+=1));do
+ old_path="$new_path/olddir$i"
+ new_path="$new_path/longernamedir$i"
+ mv $old_path $new_path;
+
+ for (( j=0; j<$NUM_FILES; j+=1));do
+ mv $new_path/file$j $new_path/newfile$j > /dev/null 2>&1;
+ done
+ done
+ echo "done" > $M0/status
+}
+
+ls_loop () {
+ #Loop until the move_dirs function is done
+ for (( i=0; i<=500; i+=1 )); do
+ ls -lR $1 > /dev/null 2>&1
+ done
+}
+
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3,4};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '4' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount FUSE with caching disabled (read-write)
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+TEST create_dirs $M0
+
+## Mount FUSE with caching disabled (read-write) again
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M1;
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M2;
+
+(ls_loop $M1)&
+ls_pid1=$!
+
+(ls_loop $M2)&
+ls_pid2=$!
+
+
+#Start moving/renaming the directories so the paths change
+TEST move_dirs $M0
+
+EXPECT_WITHIN 180 "done" cat $M0/status
+
+#Kill the ls processes
+
+kill -SIGTERM $ls_pid1
+kill -SIGTERM $ls_pid2
+
+
+#Check if all bricks are still up
+EXPECT '4' online_brick_count $V0
+
+cleanup;
diff --git a/tests/bugs/posix/bug-1122028.t b/tests/bugs/posix/bug-1122028.t
new file mode 100755
index 00000000000..492668cf1dc
--- /dev/null
+++ b/tests/bugs/posix/bug-1122028.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+# Create a 1x1 distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+# Start volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# Mount volume over FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST mkdir $M0/dir
+TEST touch $M0/dir/a
+TEST ln $M0/dir/a $M0/dir/b
+
+# Confirm hardlinks
+inum1=$(ls -i $M0/dir/a | cut -d' ' -f1)
+inum2=$(ls -i $M0/dir/b | cut -d' ' -f1)
+TEST [ "$inum1" = "$inum2" ]
+
+# Turn on build-pgfid
+TEST $CLI volume set $V0 build-pgfid on
+EXPECT 'on' volinfo_field $V0 'storage.build-pgfid'
+
+# Unlink files
+TEST unlink $M0/dir/a
+TEST unlink $M0/dir/b
+
+# Unmount
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Stop the volume
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+# Delete the volume
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup
diff --git a/tests/bugs/posix/bug-1175711.c b/tests/bugs/posix/bug-1175711.c
new file mode 100644
index 00000000000..8ab193c4014
--- /dev/null
+++ b/tests/bugs/posix/bug-1175711.c
@@ -0,0 +1,37 @@
+#include <stdio.h>
+#include <dirent.h>
+#include <string.h>
+#include <assert.h>
+
+int
+main(int argc, char **argv)
+{
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ int ret = 0;
+ char *path = NULL;
+
+ assert(argc == 2);
+ path = argv[1];
+
+ dir = opendir(path);
+ if (!dir) {
+ printf("opendir(%s) failed.\n", path);
+ return -1;
+ }
+
+#ifdef _DIRENT_HAVE_D_TYPE
+ while ((entry = readdir(dir)) != NULL) {
+ if (entry->d_type == DT_UNKNOWN) {
+ printf("d_type found to be DT_UNKNOWN\n");
+ ret = -1;
+ break;
+ }
+ }
+#endif
+
+ if (dir)
+ closedir(dir);
+
+ return ret;
+}
diff --git a/tests/bugs/posix/bug-1175711.t b/tests/bugs/posix/bug-1175711.t
new file mode 100755
index 00000000000..f4162544d92
--- /dev/null
+++ b/tests/bugs/posix/bug-1175711.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+# Create, start and mount the volume.
+TEST glusterd;
+TEST $CLI volume create $V0 $H0:$B0/$V0;
+TEST $CLI volume start $V0;
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+# Compile the test program
+TEST $CC -Wall $(dirname $0)/bug-1175711.c -o $(dirname $0)/bug-1175711
+
+# Create directory and some entries inside them.
+mkdir -p $M0/dir-bug-1175711
+mkdir -p $M0/dir-bug-1175711/DT_DIR
+touch $M0/dir-bug-1175711/DT_REG
+
+# Invoke the test program and pass path of directory to it.
+TEST $(dirname $0)/bug-1175711 $M0/dir-bug-1175711
+
+# Unmount, stop and delete the volume
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/posix/bug-1360679.t b/tests/bugs/posix/bug-1360679.t
new file mode 100644
index 00000000000..fbb9d027ddb
--- /dev/null
+++ b/tests/bugs/posix/bug-1360679.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup
+function num_entries {
+ ls -l $1 | wc -l
+}
+
+function create_unlink_entry {
+ for i in {0..1}
+ do
+ mkdir -p $B0/${V0}$i/.glusterfs/unlink/{1..3}/{1..10}/1
+ dd if=/dev/urandom of=$B0/${V0}$i/.glusterfs/unlink/file-1 bs=1M count=1
+ dd if=/dev/urandom of=$B0/${V0}$i/.glusterfs/unlink/file-2 bs=1M count=1
+ dd if=/dev/urandom of=$B0/${V0}$i/.glusterfs/unlink/1/file-1 bs=1M count=1
+ dd if=/dev/urandom of=$B0/${V0}$i/.glusterfs/unlink/2/file-1 bs=1M count=1
+ dd if=/dev/urandom of=$B0/${V0}$i/.glusterfs/unlink/3/file-1 bs=1M count=1
+ ln $B0/${V0}$i/.glusterfs/unlink/file-1 $B0/${V0}$i/.glusterfs/unlink/file-link
+ ln -s $B0/${V0}$i/.glusterfs/unlink/1 $B0/${V0}$i/.glusterfs/unlink/link
+ ln -s $B0/${V0}$i/.glusterfs/unlink/2 $B0/${V0}$i/.glusterfs/unlink/link-2
+ done
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume start $V0
+TEST $CLI volume stop $V0
+create_unlink_entry
+TEST $CLI volume start $V0
+EXPECT_WITHIN $UNLINK_TIMEOUT "^1$" num_entries $B0/${V0}0/.glusterfs/unlink/
+EXPECT_WITHIN $UNLINK_TIMEOUT "^1$" num_entries $B0/${V0}1/.glusterfs/unlink/
+cleanup;
diff --git a/tests/bugs/posix/bug-1619720.t b/tests/bugs/posix/bug-1619720.t
new file mode 100755
index 00000000000..bfd304dc809
--- /dev/null
+++ b/tests/bugs/posix/bug-1619720.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../dht.rc
+
+cleanup;
+
+
+# Test steps:
+# The test checks to make sure that the trusted.pgfid.xx xattr is set on
+# both the linkto and data files post the final rename.
+# The test creates files file-1 and file-3 so that src_hashed = dst_hashed,
+# src_cached = dst_cached and xxx_hashed != xxx_cached.
+# It then renames file-1 to file-3 which triggers the posix_mknod call
+# which updates the trusted.pgfid.xx xattr.
+
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 storage.build-pgfid on
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/tmp
+
+
+
+# Not the best way to do this but I need files which hash to the same subvol and
+# whose cached subvols are the same.
+# In a 2 subvol distributed volume, file-{1,3} hash to the same subvol.
+# file-2 will hash to the other subvol
+
+TEST touch $M0/tmp/file-2
+pgfid_xattr_name=$(getfattr -m "trusted.pgfid.*" $B0/${V0}1/tmp/file-2 | grep "trusted.pgfid")
+echo $pgfid_xattr_name
+
+
+TEST mv $M0/tmp/file-2 $M0/tmp/file-1
+TEST touch $M0/tmp/file-2
+TEST mv $M0/tmp/file-2 $M0/tmp/file-3
+
+# At this point, both the file-1 and file-3 data files exist on one subvol
+# and both linkto files on the other
+
+TEST mv -f $M0/tmp/file-1 $M0/tmp/file-3
+
+
+TEST getfattr -n $pgfid_xattr_name $B0/${V0}0/tmp/file-3
+TEST getfattr -n $pgfid_xattr_name $B0/${V0}1/tmp/file-3
+
+# Not required for the test but an extra check if required.
+# The linkto file was not renamed Without the fix.
+#TEST mv $M0/tmp/file-3 $M0/tmp/file-6
+cleanup;
diff --git a/tests/bugs/posix/bug-1651445.t b/tests/bugs/posix/bug-1651445.t
new file mode 100644
index 00000000000..4d08b69b9b0
--- /dev/null
+++ b/tests/bugs/posix/bug-1651445.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup
+
+TEST verify_lvm_version
+TEST glusterd
+TEST pidof glusterd
+TEST init_n_bricks 3
+TEST setup_lvm 3
+
+TEST $CLI volume create $V0 replica 3 $H0:$L{1,2,3}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+#Setting the size in bytes
+TEST $CLI volume set $V0 storage.reserve 40MB
+
+#wait 5s to reset disk_space_full flag
+sleep 5
+
+TEST dd if=/dev/zero of=$M0/a bs=100M count=1
+TEST dd if=/dev/zero of=$M0/b bs=10M count=1
+
+# Wait 5s to update disk_space_full flag because thread check disk space
+# after every 5s
+
+sleep 5
+# setup_lvm create lvm partition of 150M and 40M are reserve so after
+# consuming more than 110M next dd should fail
+TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1
+TEST dd if=/dev/urandom of=$M0/a bs=1022 count=1 oflag=seek_bytes,sync seek=102 conv=notrunc
+
+rm -rf $M0/*
+
+#Setting the size in percent and repeating the above steps
+TEST $CLI volume set $V0 storage.reserve 40
+
+sleep 5
+
+TEST dd if=/dev/zero of=$M0/a bs=80M count=1
+TEST dd if=/dev/zero of=$M0/b bs=10M count=1
+
+sleep 5
+TEST ! dd if=/dev/zero of=$M0/c bs=5M count=1
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/posix/bug-765380.t b/tests/bugs/posix/bug-765380.t
new file mode 100644
index 00000000000..384b8022a42
--- /dev/null
+++ b/tests/bugs/posix/bug-765380.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+REPLICA=2
+
+TEST $CLI volume create $V0 replica $REPLICA $H0:$B0/${V0}00 $H0:$B0/${V0}01 $H0:$B0/${V0}10 $H0:$B0/${V0}11
+TEST $CLI volume start $V0
+
+## Mount FUSE with caching disabled
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+function count_hostname_or_uuid_from_pathinfo()
+{
+ pathinfo=$(getfattr -n trusted.glusterfs.pathinfo $M0/f00f)
+ echo $pathinfo | grep -o $1 | wc -l
+}
+
+TEST touch $M0/f00f
+
+EXPECT $REPLICA count_hostname_or_uuid_from_pathinfo $H0
+
+# turn on node-uuid-pathinfo option
+TEST $CLI volume set $V0 node-uuid-pathinfo on
+
+# do not expext hostname as part of the pathinfo string
+EXPECT 0 count_hostname_or_uuid_from_pathinfo $H0
+
+uuid=$(grep UUID $GLUSTERD_WORKDIR/glusterd.info | cut -f2 -d=)
+
+# ... but expect the uuid $REPLICA times
+EXPECT $REPLICA count_hostname_or_uuid_from_pathinfo $uuid
+
+cleanup;
diff --git a/tests/bugs/posix/bug-990028.t b/tests/bugs/posix/bug-990028.t
new file mode 100755
index 00000000000..bef36a8897d
--- /dev/null
+++ b/tests/bugs/posix/bug-990028.t
@@ -0,0 +1,157 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=153
+
+function __init()
+{
+ TEST glusterd
+ TEST pidof glusterd
+ TEST $CLI volume info;
+
+ TEST $CLI volume create $V0 $H0:$B0/brick
+
+ EXPECT 'Created' volinfo_field $V0 'Status';
+
+ TEST $CLI volume start $V0
+
+ TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+ TEST $CLI volume quota $V0 enable
+}
+
+#CASE-1
+#checking pgfid under same directory
+function links_in_same_directory()
+{
+ # create a file file1
+ TEST touch $M0/file1
+
+ # create 50 hardlinks for file1
+ for i in `seq 2 50`; do
+ TEST_IN_LOOP ln $M0/file1 $M0/file$i
+ done
+
+ # store the pgfid of file1 in PGFID_FILE1 [should be 50 now (0x000000032)]
+ PGFID_FILE1=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/file1 2>&1 | grep "trusted.pgfid" | gawk -F '=' '{print $2}'`
+
+ # compare the pgfid(link value ) of each hard links are equal or not
+ for i in `seq 2 50`; do
+ TEMP=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/file$i 2>&1 | grep "trusted.pgfid" | gawk -F '=' '{print $2}'`
+ TEST_IN_LOOP [ $PGFID_FILE1 = $TEMP ]
+ done
+
+ # check if no of links value is 50 or not
+ TEST [ $PGFID_FILE1 = "0x00000032" ]
+
+ # unlink file 2 to 50
+ for i in `seq 2 50`; do
+ TEST_IN_LOOP unlink $M0/file$i;
+ done
+
+ # now check if pgfid value is 1 or not
+ PGFID_FILE1=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/file1 2>&1 | grep "trusted.pgfid" | gawk -F '=' '{print $2}'`;
+
+ TEST [ $PGFID_FILE1 = "0x00000001" ]
+
+ TEST rm -f $M0/*
+}
+
+##checking pgfid under diff directories
+function links_across_directories()
+{
+ TEST mkdir $M0/dir1 $M0/dir2;
+
+ # create a file in dir1
+ TEST touch $M0/dir1/file1;
+
+ # create hard link for file1 in dir2
+ TEST ln $M0/dir1/file1 $M0/dir2/file2;
+
+ #first check is to find whether there are two pgfids or not
+ LINES=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/dir1/file1 2>&1 | grep "trusted.pgfid" | wc -l`
+ TEST [ $LINES = 2 ]
+
+ for i in $(seq 1 2); do
+ HL=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/dir$i/file$i 2>&1 | grep "trusted.pgfid" | awk -v n=$i 'NR==n' | cut -d'=' -f2`
+ TEST_IN_LOOP [ $HL = "0x00000001" ]
+ done
+
+ #now unlink file2 and check the pgfid of file1
+ #1. no. of pgfid should be one
+ #2. no. of hard link should be one
+ TEST unlink $M0/dir2/file2
+
+ LINES=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/dir1/file1 2>&1 | grep "trusted.pgfid" | wc -l`
+ TEST [ $LINES == 1 ]
+
+ #next to check is to whether they contain hard link value of one or not
+ HL=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/dir1/file1 2>&1 | grep "trusted.pgfid" | cut -d'=' -f2`
+ TEST [ $HL = "0x00000001" ]
+
+ #rename file under same directory
+
+ TEST touch $M0/r_file1
+ PGFID_rfile1=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/r_file1 2>&1 | grep "trusted.pgfid"`
+
+ #cross check whether hard link count is one
+ HL=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/r_file1 2>&1 | grep "trusted.pgfid" | cut -d'=' -f2`
+
+ TEST [ $HL = "0x00000001" ]
+
+ #now rename the file to r_file1
+ TEST mv $M0/r_file1 $M0/r_file2
+
+ #now check the pgfid hard link count is still one or not
+ HL=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/r_file2 2>&1 | grep "trusted.pgfid" | cut -d'=' -f2`
+
+ TEST [ $HL = "0x00000001" ]
+
+ #now move the file to a different directory where it has no hard link and check
+ TEST mkdir $M0/dir3;
+ TEST mv $M0/r_file2 $M0/dir3;
+
+ #now check the pgfid has changed or not and hard limit is one or not
+ PGFID_newDir=`getfattr -m "trusted.pgfid.*" -de hex $B0/brick/dir3/r_file2 2>&1 | grep "trusted.pgfid"`
+
+ #now the older pgfid and new pgfid shouldn't match
+ TEST [ $PGFID_rfile1 != $PGFID_newDir ]
+
+ HL=`getfattr -m "trusted.pgfid" -de hex $B0/brick/dir3/r_file2 2>&1 | grep "trusted.pgfid" | cut -d'=' -f2`
+ TEST [ $HL = "0x00000001" ]
+
+ TEST touch $M0/dir1/rl_file_1
+ ln $M0/dir1/rl_file_1 $M0/dir2/rl_file_2
+ mv $M0/dir1/rl_file_1 $M0/dir2
+
+ #now the there should be just one pgfid for both files
+ for i in $(seq 1 2); do
+ NL=`getfattr -m "trusted.pgfid" -de hex $B0/brick/dir2/rl_file_$i 2>&1 | grep "trusted.pgfid"|wc -l `
+ TEST_IN_LOOP [ $HL = "0x00000001" ]
+ done
+
+ #now pgfid of both files should match
+ P_rl_file_1=`getfattr -m "trusted.pgfid" -de hex $B0/brick/dir2/rl_file_1 2>&1 | grep "trusted.pgfid"`
+ P_rl_file_2=`getfattr -m "trusted.pgfid" -de hex $B0/brick/dir2/rl_file_2 2>&1 | grep "trusted.pgfid"`
+ TEST [ $P_rl_file_1 = $P_rl_file_2 ]
+
+ #now the no of hard link should be two for both rl_file_1 and rl_file_2
+ for i in $(seq 1 2); do
+ HL=`getfattr -m "trusted.pgfid" -de hex $B0/brick/dir2/rl_file_$i 2>&1 | grep "trusted.pgfid" | cut -d'=' -f2`
+ TEST_IN_LOOP [ $HL = "0x00000002" ]
+ done
+
+ TEST rm -rf $M0/*
+}
+
+__init;
+links_in_same_directory;
+links_across_directories;
+TEST $CLI volume stop $V0
+
+cleanup
diff --git a/tests/bugs/posix/bug-gfid-path.t b/tests/bugs/posix/bug-gfid-path.t
new file mode 100644
index 00000000000..1bbbe9f0670
--- /dev/null
+++ b/tests/bugs/posix/bug-gfid-path.t
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# This test case is for the bug where, even though a file is
+# created when gfid2path option is turned off (default is ON),
+# getfattr of "glusterfs.gfidtopath" was succeeding for that
+# file. Ideally the getfattr should fail, as the file does not
+# have its path(s) stored as a extended attribute (because it
+# was created when gfid2path option was off)
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3,4};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '4' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/dir
+TEST mkdir $M0/new
+TEST mkdir $M0/3
+
+TEST touch $M0/dir/file
+
+# except success as by default gfid2path is enabled
+# and the glusterfs.gfidtopath xattr should give the
+# path of the object as the value
+
+TEST getfattr -n glusterfs.gfidtopath $M0/dir/file
+
+# turn off gfid2path feature
+TEST $CLI volume set $V0 storage.gfid2path off
+
+TEST touch $M0/new/foo
+
+# again enable gfid2path. This has to be enabled before
+# trying the getfattr. Because, glusterfs.gfidtopath xattr
+# request is handled only if gfid2path is enabled. If not,
+# then getxattr on glusterfs.gfid2path fails anyways. In this
+# context we want getfattr to fail, because the file was created
+# when gfid2path feature was disabled and not because gfid2path
+# feature itself is disabled.
+TEST $CLI volume set $V0 storage.gfid2path on
+
+# getfattr should fail as it is attempted on a file
+# which does not have its path stored as a xattr
+# (because file got created after disabling gfid2path)
+TEST ! getfattr -n glusterfs.gfidtopath $M0/new/foo;
+
+
+
+TEST touch $M0/3/new
+
+# should be successful
+TEST getfattr -n glusterfs.gfidtopath $M0/3/new
+
+TEST rm -rf $M0/*
+
+cleanup;
diff --git a/tests/bugs/posix/disallow-gfid-volumeid-fremovexattr.c b/tests/bugs/posix/disallow-gfid-volumeid-fremovexattr.c
new file mode 100644
index 00000000000..4ed3181d48f
--- /dev/null
+++ b/tests/bugs/posix/disallow-gfid-volumeid-fremovexattr.c
@@ -0,0 +1,104 @@
+#include <glusterfs/api/glfs.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ int ret = 0;
+ int i = 0;
+ glfs_fd_t *fd = NULL;
+ char *logfile = NULL;
+ char *hostname = NULL;
+
+ if (argc != 4) {
+ fprintf(stderr,
+ "Expect following args %s <hostname> <Vol> <log file>\n",
+ argv[0]);
+ return -1;
+ }
+
+ hostname = argv[1];
+ logfile = argv[3];
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL (%s)\n", strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+
+ fd = glfs_opendir(fs, "/");
+ if (!fd) {
+ fprintf(stderr, "glfs_opendir failed with (%s)\n", strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fremovexattr(fd, "trusted.gfid");
+ if (ret == 0 || errno != EPERM) {
+ fprintf(stderr,
+ "glfs_fremovexattr gfid exited with ret: "
+ "%d (%s)\n",
+ ret, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fremovexattr(fd, "trusted.glusterfs.volume-id");
+ if (ret == 0 || errno != EPERM) {
+ fprintf(stderr,
+ "glfs_fremovexattr volume-id exited with ret: "
+ "%d (%s)\n",
+ ret, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fsetxattr(fd, "trusted.abc", "abc", 3, 0);
+ if (ret < 0) {
+ fprintf(stderr,
+ "glfs_fsetxattr trusted.abc exited with ret: "
+ "%d (%s)\n",
+ ret, strerror(errno));
+ return -1;
+ }
+
+ ret = glfs_fremovexattr(fd, "trusted.abc");
+ if (ret < 0) {
+ fprintf(stderr,
+ "glfs_fremovexattr trusted.abc exited with "
+ "ret: %d (%s)\n",
+ ret, strerror(errno));
+ return -1;
+ }
+
+ (void)glfs_closedir(fd);
+ ret = glfs_fini(fs);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fini failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ return -1;
+ }
+ return 0;
+}
diff --git a/tests/bugs/posix/disallow-gfid-volumeid-fremovexattr.t b/tests/bugs/posix/disallow-gfid-volumeid-fremovexattr.t
new file mode 100755
index 00000000000..b9fd44ae0d7
--- /dev/null
+++ b/tests/bugs/posix/disallow-gfid-volumeid-fremovexattr.t
@@ -0,0 +1,21 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0};
+TEST $CLI volume start $V0;
+logdir=`gluster --print-logdir`
+
+
+TEST build_tester $(dirname $0)/disallow-gfid-volumeid-fremovexattr.c -lgfapi
+TEST $(dirname $0)/disallow-gfid-volumeid-fremovexattr $H0 $V0 $logdir/disallow-gfid-volumeid-fremovexattr.log
+
+cleanup_tester $(dirname $0)/disallow-gfid-volumeid-fremovexattr
+cleanup;
diff --git a/tests/bugs/posix/disallow-gfid-volumeid-removexattr.t b/tests/bugs/posix/disallow-gfid-volumeid-removexattr.t
new file mode 100644
index 00000000000..d26eb21ccc5
--- /dev/null
+++ b/tests/bugs/posix/disallow-gfid-volumeid-removexattr.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+#This test checks that gfid/volume-id removexattrs are not allowed.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Basic checks
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+#Create a distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0
+
+# Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST ! setfattr -x trusted.gfid $M0
+TEST ! setfattr -x trusted.glusterfs.volume-id $M0
+TEST setfattr -n trusted.abc -v abc $M0
+TEST setfattr -x trusted.abc $M0
+
+cleanup;
diff --git a/tests/bugs/protocol/bug-1321578.t b/tests/bugs/protocol/bug-1321578.t
new file mode 100644
index 00000000000..83904817467
--- /dev/null
+++ b/tests/bugs/protocol/bug-1321578.t
@@ -0,0 +1,82 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+check_mounted () {
+ df | grep $1 | wc -l
+}
+
+CHECK_MOUNT_TIMEOUT=7
+
+TEST glusterd
+TEST $CLI volume create $V0 $H0:$B0/$V0
+
+# Set auth.allow to dummy hostname so it *doesn't* include ourselves.
+TEST $CLI volume set $V0 auth.allow example.org
+TEST $CLI volume start $V0
+
+# "System getspec" will include the username and password if the request comes
+# from a server (which we are). Unfortunately, this will cause authentication
+# to succeed in auth.login regardless of whether auth.addr is working properly
+# or not, which is useless to us. To get a proper test, strip out those lines.
+$CLI system getspec $V0 | sed -e /username/d -e /password/d > fubar.vol
+
+# This mount should fail because auth.allow doesn't include us.
+TEST $GFS -f fubar.vol $M0
+
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT 0 check_mounted $M0
+
+# Add tests when only username is present, but not password
+# "System getspec" will include the username and password if the request comes
+# from a server (which we are). Unfortunately, this will cause authentication
+# to succeed in auth.login regardless of whether auth.addr is working properly
+# or not, which is useless to us. To get a proper test, strip out those lines.
+$CLI system getspec $V0 | sed -e /password/d > fubar.vol
+
+# This mount should fail because auth.allow doesn't include our password.
+TEST $GFS -f fubar.vol $M0
+
+# If we had DONT_EXPECT_WITHIN we could use that, but we don't.
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT 0 check_mounted $M0
+
+# Now, add a test for login failure when server doesn't have the password entry
+# Add tests when only username is present, but not password
+# "System getspec" will include the username and password if the request comes
+# from a server (which we are). Unfortunately, this will cause authentication
+# to succeed in auth.login regardless of whether auth.addr is working properly
+# or not, which is useless to us. To get a proper test, strip out those lines.
+$CLI system getspec $V0 > fubar.vol
+TEST $CLI volume stop $V0
+
+sed -i -e '/password /d' /var/lib/glusterd/vols/$V0/$V0.*$V0.vol
+
+TEST $CLI volume start $V0
+
+# This mount should fail because auth.allow doesn't include our password.
+TEST $GFS -f fubar.vol $M0
+
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT 0 check_mounted $M0
+
+# Set auth.allow to include us. This mount should therefore succeed.
+TEST $CLI volume set $V0 auth.allow $H0
+$CLI system getspec $V0 | sed -e /password/d > fubar.vol
+
+TEST $GFS -f fubar.vol $M0
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT 1 check_mounted $M0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Set auth.reject to include us. This mount should therefore fail.
+TEST $CLI volume stop $V0
+
+TEST $CLI volume set $V0 auth.allow "\*"
+TEST $CLI volume set $V0 auth.reject $H0
+TEST $CLI volume start $V0
+
+# Do this, so login module is not in picture
+$CLI system getspec $V0 | sed -e /password/d > fubar.vol
+
+TEST $GFS -f fubar.vol $M0
+EXPECT_WITHIN $CHECK_MOUNT_TIMEOUT 0 check_mounted $M0
+
+cleanup
diff --git a/tests/bugs/protocol/bug-1390914.t b/tests/bugs/protocol/bug-1390914.t
new file mode 100644
index 00000000000..e3dab92de5a
--- /dev/null
+++ b/tests/bugs/protocol/bug-1390914.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+cleanup;
+
+#test that fops are not wound on anon-fd when fd is not open on that brick
+TEST glusterd;
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3};
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 diagnostics.client-log-level DEBUG
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST $GFS -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0;
+
+TEST touch $M0/1
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST fd_open 200 'w' "$M0/1"
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#lk should only happen on 2 bricks, if there is a bug, it will plant a lock
+#with anon-fd on first-brick which will never be released because flush won't
+#be wound below server xlator for anon-fd
+TEST flock -x -n 200
+TEST fd_close 200
+
+TEST fd_open 200 'w' "$M0/1"
+#this lock will fail if there is a stale lock
+TEST flock -x -n 200
+TEST fd_close 200
+cleanup;
diff --git a/tests/bugs/protocol/bug-1433815-auth-allow.t b/tests/bugs/protocol/bug-1433815-auth-allow.t
new file mode 100644
index 00000000000..a78c0eb7111
--- /dev/null
+++ b/tests/bugs/protocol/bug-1433815-auth-allow.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+check_mounted () {
+ df | grep $1 | wc -l
+}
+
+get_addresses () {
+ ip addr | sed -n '/.*inet \([0-9.]*\).*/s//\1/p' | tr '\n' ','
+}
+
+TEST glusterd
+TEST $CLI volume create $V0 $H0:$B0/$V0
+
+# Set auth.allow so it *doesn't* include ourselves.
+TEST $CLI volume set $V0 auth.allow 1.2.3.4
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+# "System getspec" will include the username and password if the request comes
+# from a server (which we are). Unfortunately, this will cause authentication
+# to succeed in auth.login regardless of whether auth.addr is working properly
+# or not, which is useless to us. To get a proper test, strip out those lines.
+$CLI system getspec $V0 | sed -e /username/d -e /password/d > fubar.vol
+
+# This mount should fail because auth.allow doesn't include us.
+TEST $GFS -f fubar.vol $M0
+# If we had DONT_EXPECT_WITHIN we could use that, but we don't.
+sleep 10
+EXPECT 0 check_mounted $M0
+
+# Set auth.allow to include us. This mount should therefore succeed.
+TEST $CLI volume set $V0 auth.allow "$(get_addresses)"
+TEST $GFS -f fubar.vol $M0
+sleep 10
+EXPECT 1 check_mounted $M0
+
+cleanup
diff --git a/tests/bugs/protocol/bug-762989.t b/tests/bugs/protocol/bug-762989.t
new file mode 100755
index 00000000000..7d201b78b58
--- /dev/null
+++ b/tests/bugs/protocol/bug-762989.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+# Skip the entire test if ip_local_reserved_ports does not exist
+if [ ! -f /proc/sys/net/ipv4/ip_local_reserved_ports ] ; then
+ echo "Skip test on /proc/sys/net/ipv4/ip_local_reserved_ports, "\
+ "which does not exists on this system" >&2
+ SKIP_TESTS
+ exit 0
+fi
+
+## reserve port 1023
+older_ports=$(cat /proc/sys/net/ipv4/ip_local_reserved_ports);
+echo "1023" > /proc/sys/net/ipv4/ip_local_reserved_ports;
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 \
+$M0;
+
+## Wait for volume to register with rpc.mountd
+sleep 6;
+## check if port 1023 (which has been reserved) is used by the gluster processes
+op=$(netstat -ntp | grep gluster | grep -w 1023);
+EXPECT "" echo $op;
+
+#set the reserved ports to the older values
+echo $older_ports > /proc/sys/net/ipv4/ip_local_reserved_ports
+
+cleanup;
diff --git a/tests/bugs/protocol/bug-808400-dist.t b/tests/bugs/protocol/bug-808400-dist.t
new file mode 100755
index 00000000000..0df972585c0
--- /dev/null
+++ b/tests/bugs/protocol/bug-808400-dist.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+MOUNTDIR=$M0;
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $MOUNTDIR;
+
+build_tester $(dirname $0)/bug-808400-flock.c
+build_tester $(dirname $0)/bug-808400-fcntl.c
+
+TEST $(dirname $0)/bug-808400-flock $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind off\'
+TEST $(dirname $0)/bug-808400-fcntl $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind on\'
+
+TEST rm -rf $MOUNTDIR/*
+TEST rm -rf $(dirname $0)/bug-808400-flock $(dirname $0)/bug-808400-fcntl $(dirname $0)/glusterfs.log
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR
+
+cleanup;
diff --git a/tests/bugs/protocol/bug-808400-fcntl.c b/tests/bugs/protocol/bug-808400-fcntl.c
new file mode 100644
index 00000000000..a703ca5c120
--- /dev/null
+++ b/tests/bugs/protocol/bug-808400-fcntl.c
@@ -0,0 +1,124 @@
+#include <sys/file.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+
+#ifndef linux
+#define fstat64(fd, st) fstat(fd, st)
+#endif
+
+int
+run_child(char *filename)
+{
+ int fd = -1, ret = -1;
+ struct flock lock = {
+ 0,
+ };
+ int ppid = 0;
+
+ fd = open(filename, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "open failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ ppid = getppid();
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+
+ ret = fcntl(fd, F_GETLK, &lock);
+ if (ret < 0) {
+ fprintf(stderr, "GETLK failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ if ((lock.l_type == F_UNLCK) || (ppid != lock.l_pid)) {
+ fprintf(stderr,
+ "no locks present, though parent has held "
+ "one\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int fd = -1, ret = -1, status = 0;
+ char *filename = NULL, *cmd = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ struct flock lock = {
+ 0,
+ };
+
+ if (argc != 3) {
+ fprintf(stderr,
+ "Usage: %s <filename> "
+ "<gluster-cmd-to-trigger-graph-switch>\n",
+ argv[0]);
+ goto out;
+ }
+
+ filename = argv[1];
+ cmd = argv[2];
+
+ fd = open(filename, O_RDWR | O_CREAT, 0);
+ if (fd < 0) {
+ fprintf(stderr, "open (%s) failed (%s)\n", filename, strerror(errno));
+ goto out;
+ }
+
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+
+ ret = fcntl(fd, F_SETLK, &lock);
+ if (ret < 0) {
+ fprintf(stderr, "fcntl failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ system(cmd);
+
+ /* wait till graph switch completes */
+ ret = fstat64(fd, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat64 failure (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ sleep(10);
+
+ /* By now old-graph would be disconnected and locks should be cleaned
+ * up if they are not migrated. Check that by trying to acquire a lock
+ * on a new fd opened by another process on same file.
+ */
+ ret = fork();
+ if (ret == 0) {
+ ret = run_child(filename);
+ } else {
+ wait(&status);
+ if (WIFEXITED(status)) {
+ ret = WEXITSTATUS(status);
+ } else {
+ ret = 0;
+ }
+ }
+
+out:
+ return ret;
+}
diff --git a/tests/bugs/protocol/bug-808400-flock.c b/tests/bugs/protocol/bug-808400-flock.c
new file mode 100644
index 00000000000..54a507cc227
--- /dev/null
+++ b/tests/bugs/protocol/bug-808400-flock.c
@@ -0,0 +1,100 @@
+#include <sys/file.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/wait.h>
+
+#ifndef linux
+#define fstat64(fd, st) fstat(fd, st)
+#endif
+
+int
+run_child(char *filename)
+{
+ int fd = -1, ret = -1;
+
+ fd = open(filename, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "open failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ ret = flock(fd, LOCK_EX | LOCK_NB);
+ if ((ret == 0) || (errno != EWOULDBLOCK)) {
+ fprintf(stderr,
+ "no locks present, though parent has held "
+ "one\n");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int fd = -1, ret = -1, status = 0;
+ char *filename = NULL, *cmd = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+
+ if (argc != 3) {
+ fprintf(stderr,
+ "Usage: %s <filename> "
+ "<gluster-cmd-to-trigger-graph-switch>\n",
+ argv[0]);
+ goto out;
+ }
+
+ filename = argv[1];
+ cmd = argv[2];
+
+ fd = open(filename, O_RDWR | O_CREAT, 0);
+ if (fd < 0) {
+ fprintf(stderr, "open (%s) failed (%s)\n", filename, strerror(errno));
+ goto out;
+ }
+
+ ret = flock(fd, LOCK_EX);
+ if (ret < 0) {
+ fprintf(stderr, "flock failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ system(cmd);
+
+ /* wait till graph switch completes */
+ ret = fstat64(fd, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "fstat64 failure (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ sleep(10);
+
+ /* By now old-graph would be disconnected and locks should be cleaned
+ * up if they are not migrated. Check that by trying to acquire a lock
+ * on a new fd opened by another process on same file
+ */
+ ret = fork();
+ if (ret == 0) {
+ ret = run_child(filename);
+ } else {
+ wait(&status);
+ if (WIFEXITED(status)) {
+ ret = WEXITSTATUS(status);
+ } else {
+ ret = 0;
+ }
+ }
+
+out:
+ return ret;
+}
diff --git a/tests/bugs/protocol/bug-808400-repl.t b/tests/bugs/protocol/bug-808400-repl.t
new file mode 100755
index 00000000000..611e5ec93b7
--- /dev/null
+++ b/tests/bugs/protocol/bug-808400-repl.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+MOUNTDIR=$M0;
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $MOUNTDIR;
+
+build_tester $(dirname $0)/bug-808400-flock.c
+build_tester $(dirname $0)/bug-808400-fcntl.c
+
+TEST $(dirname $0)/bug-808400-flock $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind off\'
+TEST $(dirname $0)/bug-808400-fcntl $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind on\'
+
+TEST rm -rf $MOUNTDIR/*
+TEST rm -rf $(dirname $0)/bug-808400-flock $(dirname $0)/bug-808400-fcntl $(dirname $0)/glusterfs.log
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR
+
+cleanup;
diff --git a/tests/bugs/protocol/bug-808400.t b/tests/bugs/protocol/bug-808400.t
new file mode 100755
index 00000000000..4ae1722fca2
--- /dev/null
+++ b/tests/bugs/protocol/bug-808400.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+#mount on a random dir
+TEST MOUNTDIR="/tmp/$RANDOM"
+TEST mkdir $MOUNTDIR
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 --volfile-server=$H0 --volfile-id=$V0 $MOUNTDIR;
+
+build_tester $(dirname $0)/bug-808400-flock.c
+build_tester $(dirname $0)/bug-808400-fcntl.c
+
+TEST $(dirname $0)/bug-808400-flock $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind off\'
+TEST $(dirname $0)/bug-808400-fcntl $MOUNTDIR/testfile \'gluster volume set $V0 performance.write-behind on\'
+
+TEST rm -rf $MOUNTDIR/*
+TEST rm -rf $(dirname $0)/bug-808400-flock $(dirname $0)/bug-808400-fcntl $(dirname $0)/glusterfs.log
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR
+TEST rm -rf $MOUNTDIR
+
+cleanup;
diff --git a/tests/bugs/quick-read/bug-846240.t b/tests/bugs/quick-read/bug-846240.t
new file mode 100755
index 00000000000..bb997e10013
--- /dev/null
+++ b/tests/bugs/quick-read/bug-846240.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+TEST $CLI volume create $V0 $H0:$B0/brick1 $H0:$B0/brick2;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+MOUNTDIR=$M0;
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $MOUNTDIR;
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M1;
+
+TEST touch $M0/testfile;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+# open the file with the fd as 4
+TEST fd=`fd_available`;
+TEST fd_open $fd 'w' "$M0/testfile";
+
+# remove the file from the other mount point. If unlink is sent from
+# $M0 itself, then the file will be actually opened by open-behind which
+# we dont want for this testcase
+TEST rm -f $M1/testfile;
+
+# below command opens the file and writes to the file.
+# upon open, open-behind unwinds the open call with success.
+# now when write comes, open-behind actually opens the file
+# and then sends write on the fd. But before sending open itself,
+# the file would have been removed from the mount $M1. open() gets error
+# and the write call which is put into a stub (open had to be sent first)
+# should unwind with the error received in the open call.
+TEST ! fd_write $fd data
+
+TEST fd_close $fd;
+
+TEST rm -rf $MOUNTDIR/*
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $MOUNTDIR
+
+cleanup;
diff --git a/tests/bugs/quick-read/bz1523599/bz1523599.t b/tests/bugs/quick-read/bz1523599/bz1523599.t
new file mode 100755
index 00000000000..5027efe8e9a
--- /dev/null
+++ b/tests/bugs/quick-read/bz1523599/bz1523599.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+. $(dirname $0)/../../../fileio.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/test_bz1523599.c -lgfapi -o $(dirname $0)/test_bz1523599
+TEST ./$(dirname $0)/test_bz1523599 0 $H0 $V0 test_bz1523599 $logdir/bz1523599.log
+TEST ./$(dirname $0)/test_bz1523599 1 $H0 $V0 test_bz1523599 $logdir/bz1523599.log
+TEST ./$(dirname $0)/test_bz1523599 0 $H0 $V0 test_bz1523599 $logdir/bz1523599.log
+TEST ./$(dirname $0)/test_bz1523599 2 $H0 $V0 test_bz1523599 $logdir/bz1523599.log
+
+cleanup_tester $(dirname $0)/test_bz1523599
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
+
diff --git a/tests/bugs/quick-read/bz1523599/test_bz1523599.c b/tests/bugs/quick-read/bz1523599/test_bz1523599.c
new file mode 100644
index 00000000000..5076a9447f3
--- /dev/null
+++ b/tests/bugs/quick-read/bz1523599/test_bz1523599.c
@@ -0,0 +1,198 @@
+/*
+ * ./test_bz1523599 0 vm140-111 gv0 test211 log
+ * ./test_bz1523599 1 vm140-111 gv0 test211 log
+ * Open - Discard - Read - Then check read information to see if the initial
+ * TEST_STR_LEN/2 bytes read zero
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <sys/uio.h>
+
+#define TEST_STR_LEN 2048
+
+enum fallocate_flag {
+ TEST_WRITE,
+ TEST_DISCARD,
+ TEST_ZEROFILL,
+};
+
+void
+print_str(char *str, int len)
+{
+ int i, addr;
+
+ printf("%07x\t", 0);
+ for (i = 0; i < len; i++) {
+ printf("%02x", str[i]);
+ if (i) {
+ if ((i + 1) % 16 == 0)
+ printf("\n%07x\t", i + 1);
+ else if ((i + 1) % 4 == 0)
+ printf(" ");
+ }
+ }
+ printf("\n");
+}
+
+int
+test_read(char *str, int total_length, int len_zero)
+{
+ int i;
+ int ret = 0;
+
+ for (i = 0; i < len_zero; i++) {
+ if (str[i]) {
+ fprintf(stderr, "char at position %d not zeroed out\n", i);
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ for (i = len_zero; i < total_length; i++) {
+ if (str[i] != 0x11) {
+ fprintf(stderr, "char at position %d does not contain pattern\n",
+ i);
+ ret = -EIO;
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int opcode;
+ char *host_name, *volume_name, *file_path, *glfs_log_path;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+ off_t offset = 0;
+ size_t len_zero = TEST_STR_LEN / 2;
+ char writestr[TEST_STR_LEN];
+ char readstr[TEST_STR_LEN];
+ struct iovec iov = {&readstr, TEST_STR_LEN};
+ int i;
+ int ret = 1;
+
+ for (i = 0; i < TEST_STR_LEN; i++)
+ writestr[i] = 0x11;
+ for (i = 0; i < TEST_STR_LEN; i++)
+ readstr[i] = 0x22;
+
+ if (argc != 6) {
+ fprintf(
+ stderr,
+ "Syntax: %s <test type> <host> <volname> <file-path> <log-file>\n",
+ argv[0]);
+ return 1;
+ }
+
+ opcode = atoi(argv[1]);
+ host_name = argv[2];
+ volume_name = argv[3];
+ file_path = argv[4];
+ glfs_log_path = argv[5];
+
+ fs = glfs_new(volume_name);
+ if (!fs) {
+ perror("glfs_new");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", host_name, 24007);
+ if (ret != 0) {
+ perror("glfs_set_volfile_server");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, glfs_log_path, 7);
+ if (ret != 0) {
+ perror("glfs_set_logging");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ perror("glfs_init");
+ goto out;
+ }
+
+ fd = glfs_creat(fs, file_path, O_RDWR, 0777);
+ if (fd == NULL) {
+ perror("glfs_creat");
+ ret = -1;
+ goto out;
+ }
+
+ switch (opcode) {
+ case TEST_WRITE:
+ fprintf(stderr, "Test Write\n");
+ ret = glfs_write(fd, writestr, TEST_STR_LEN, 0);
+ if (ret < 0) {
+ perror("glfs_write");
+ goto out;
+ } else if (ret != TEST_STR_LEN) {
+ fprintf(stderr, "insufficient data written %d \n", ret);
+ ret = -EIO;
+ goto out;
+ }
+ ret = 0;
+ goto out;
+ case TEST_DISCARD:
+ fprintf(stderr, "Test Discard\n");
+ ret = glfs_discard(fd, offset, len_zero);
+ if (ret < 0) {
+ if (errno == EOPNOTSUPP) {
+ fprintf(stderr, "Operation not supported\n");
+ ret = 0;
+ goto out;
+ }
+ perror("glfs_discard");
+ goto out;
+ }
+ goto test_read;
+ case TEST_ZEROFILL:
+ fprintf(stderr, "Test Zerofill\n");
+ ret = glfs_zerofill(fd, offset, len_zero);
+ if (ret < 0) {
+ if (errno == EOPNOTSUPP) {
+ fprintf(stderr, "Operation not supported\n");
+ ret = 0;
+ goto out;
+ }
+ perror("glfs_zerofill");
+ goto out;
+ }
+ goto test_read;
+ default:
+ ret = -1;
+ fprintf(stderr, "Incorrect test code %d\n", opcode);
+ goto out;
+ }
+
+test_read:
+ ret = glfs_readv(fd, &iov, 1, 0);
+ if (ret < 0) {
+ perror("glfs_readv");
+ goto out;
+ }
+
+ /* printf("Read str\n"); print_str(readstr, TEST_STR_LEN); printf("\n"); */
+ ret = test_read(readstr, TEST_STR_LEN, len_zero);
+
+out:
+ if (fd)
+ glfs_close(fd);
+ glfs_fini(fs);
+
+ if (ret)
+ return -1;
+
+ return 0;
+}
diff --git a/tests/bugs/quota/afr-quota-xattr-mdata-heal.t b/tests/bugs/quota/afr-quota-xattr-mdata-heal.t
new file mode 100644
index 00000000000..ebfa5545728
--- /dev/null
+++ b/tests/bugs/quota/afr-quota-xattr-mdata-heal.t
@@ -0,0 +1,140 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 1MB
+TEST mkdir $M0/d
+TEST $CLI volume quota $V0 limit-usage /d 1MB
+TEST touch $M0/d/a
+echo abc > $M0/d/a
+
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "512Bytes" quotausage "/"
+
+#Set the acl xattrs directly on backend, for some reason on mount it gives error
+acl_access_val="0x0200000001000600ffffffff04000400ffffffff10000400ffffffff20000400ffffffff"
+acl_file_val="0x0000000400000001ffffffff0006000000000004ffffffff0004000000000010ffffffff0004000000000020ffffffff00040000"
+TEST setfattr -n system.posix_acl_access -v $acl_access_val $B0/${V0}0/d
+TEST setfattr -n trusted.SGI_ACL_FILE -v $acl_file_val $B0/${V0}0/d
+TEST setfattr -n system.posix_acl_access -v $acl_access_val $B0/${V0}1/d
+TEST setfattr -n trusted.SGI_ACL_FILE -v $acl_file_val $B0/${V0}1/d
+TEST setfattr -n trusted.foo -v "baz" $M0/d
+TEST setfattr -n trusted.foo -v "baz" $M0/d/a
+TEST setfattr -n trusted.foo1 -v "baz1" $M0/d
+TEST setfattr -n trusted.foo1 -v "baz1" $M0/d/a
+TEST setfattr -n trusted.foo3 -v "unchanged" $M0/d
+TEST setfattr -n trusted.foo3 -v "unchanged" $M0/d/a
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+#Induce metadata self-heal
+TEST setfattr -n trusted.foo -v "bar" $M0/d
+TEST setfattr -n trusted.foo -v "bar" $M0/d/a
+TEST setfattr -x trusted.foo1 $M0/d
+TEST setfattr -x trusted.foo1 $M0/d/a
+TEST setfattr -n trusted.foo2 -v "bar2" $M0/d
+TEST setfattr -n trusted.foo2 -v "bar2" $M0/d/a
+d_quota_contri=$(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.*.contri")
+d_quota_dirty=$(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.dirty")
+d_quota_limit=$(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.limit-set")
+d_quota_size=$(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.size")
+
+a_pgfid=$(getfattr -d -m . -e hex $B0/${V0}1/d/a | grep -E "trusted.pgfid.")
+
+#Change internal xattrs in the backend, later check that they are not healed
+TEST setfattr -n trusted.glusterfs.quota.00000000-0000-0000-0000-000000000001.contri -v 0x0000000000000400 $B0/${V0}0/d
+TEST setfattr -n trusted.glusterfs.quota.dirty -v 0x0000000000000400 $B0/${V0}0/d
+TEST setfattr -n trusted.glusterfs.quota.limit-set -v 0x0000000000000400 $B0/${V0}0/d #This will be healed, this is external xattr
+TEST setfattr -n trusted.glusterfs.quota.size -v 0x0000000000000400 $B0/${V0}0/d
+TEST setfattr -n $(echo $a_pgfid | cut -f1 -d'=') -v "orphan" $B0/${V0}0/d/a
+
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+#Check external xattrs match
+EXPECT "bar" echo $(getfattr -d -m. -e text $B0/${V0}0/d | grep trusted.foo)
+EXPECT "bar" echo $(getfattr -d -m. -e text $B0/${V0}0/d/a | grep trusted.foo)
+TEST ! getfattr -n trusted.foo1 $B0/${V0}0/d
+TEST ! getfattr -n trusted.foo1 $B0/${V0}0/d/a
+EXPECT "unchanged" echo $(getfattr -d -m. -e text $B0/${V0}0/d | grep trusted.foo3)
+EXPECT "unchanged" echo $(getfattr -d -m. -e text $B0/${V0}0/d/a | grep trusted.foo3)
+EXPECT "bar2" echo $(getfattr -d -m. -e text $B0/${V0}0/d | grep trusted.foo2)
+EXPECT "bar2" echo $(getfattr -d -m. -e text $B0/${V0}0/d/a | grep trusted.foo2)
+EXPECT "$d_quota_limit" echo $(getfattr -d -m . -e hex $B0/${V0}0/d | grep "trusted.glusterfs.quota.limit-set")
+
+EXPECT "bar" echo $(getfattr -d -m. -e text $B0/${V0}1/d | grep trusted.foo)
+EXPECT "bar" echo $(getfattr -d -m. -e text $B0/${V0}1/d/a | grep trusted.foo)
+TEST ! getfattr -n trusted.foo1 $B0/${V0}1/d
+TEST ! getfattr -n trusted.foo1 $B0/${V0}1/d/a
+EXPECT "unchanged" echo $(getfattr -d -m. -e text $B0/${V0}1/d | grep trusted.foo3)
+EXPECT "unchanged" echo $(getfattr -d -m. -e text $B0/${V0}1/d/a | grep trusted.foo3)
+EXPECT "bar2" echo $(getfattr -d -m. -e text $B0/${V0}1/d | grep trusted.foo2)
+EXPECT "bar2" echo $(getfattr -d -m. -e text $B0/${V0}1/d/a | grep trusted.foo2)
+EXPECT "$d_quota_limit" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep "trusted.glusterfs.quota.limit-set")
+
+#Test that internal xattrs on B0 are not healed
+EXPECT 0x0000000000000400 echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.glusterfs.quota.00000000-0000-0000-0000-000000000001.contri)
+EXPECT 0x0000000000000400 echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.glusterfs.quota.dirty)
+EXPECT "$d_quota_limit" echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.glusterfs.quota.limit-set) #This will be healed, this is external xattr
+EXPECT 0x0000000000000400 echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.glusterfs.quota.size)
+EXPECT "$acl_access_val" echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep system.posix_acl_access)
+EXPECT "$acl_file_val" echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.SGI_ACL_FILE)
+EXPECT "orphan" echo $(getfattr -d -m. -e text $B0/${V0}0/d/a | grep $(echo $a_pgfid | cut -f1 -d'='))
+
+#Test that xattrs didn't go bad in source
+EXPECT "$d_quota_contri" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.*.contri")
+EXPECT "$d_quota_dirty" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.dirty")
+EXPECT "$d_quota_limit" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.limit-set")
+EXPECT "$d_quota_size" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.size")
+EXPECT "$a_pgfid" echo $(getfattr -d -m . -e hex $B0/${V0}1/d/a | grep -E "trusted.pgfid.")
+EXPECT "$acl_access_val" echo $(getfattr -d -m. -e hex $B0/${V0}1/d | grep system.posix_acl_access)
+EXPECT "$acl_file_val" echo $(getfattr -d -m. -e hex $B0/${V0}1/d | grep trusted.SGI_ACL_FILE)
+
+#Do a lookup and it shouldn't trigger metadata self-heal and heal xattrs
+EXPECT "bar" echo $(getfattr -d -m. -e text $B0/${V0}0/d | grep trusted.foo)
+EXPECT "bar" echo $(getfattr -d -m. -e text $B0/${V0}0/d/a | grep trusted.foo)
+TEST ! getfattr -n trusted.foo1 $B0/${V0}0/d
+TEST ! getfattr -n trusted.foo1 $B0/${V0}0/d/a
+EXPECT "unchanged" echo $(getfattr -d -m. -e text $B0/${V0}0/d | grep trusted.foo3)
+EXPECT "unchanged" echo $(getfattr -d -m. -e text $B0/${V0}0/d/a | grep trusted.foo3)
+EXPECT "bar2" echo $(getfattr -d -m. -e text $B0/${V0}0/d | grep trusted.foo2)
+EXPECT "bar2" echo $(getfattr -d -m. -e text $B0/${V0}0/d/a | grep trusted.foo2)
+EXPECT "$d_quota_limit" echo $(getfattr -d -m . -e hex $B0/${V0}0/d | grep "trusted.glusterfs.quota.limit-set")
+
+EXPECT "bar" echo $(getfattr -d -m. -e text $B0/${V0}1/d | grep trusted.foo)
+EXPECT "bar" echo $(getfattr -d -m. -e text $B0/${V0}1/d/a | grep trusted.foo)
+TEST ! getfattr -n trusted.foo1 $B0/${V0}1/d
+TEST ! getfattr -n trusted.foo1 $B0/${V0}1/d/a
+EXPECT "unchanged" echo $(getfattr -d -m. -e text $B0/${V0}1/d | grep trusted.foo3)
+EXPECT "unchanged" echo $(getfattr -d -m. -e text $B0/${V0}1/d/a | grep trusted.foo3)
+EXPECT "bar2" echo $(getfattr -d -m. -e text $B0/${V0}1/d | grep trusted.foo2)
+EXPECT "bar2" echo $(getfattr -d -m. -e text $B0/${V0}1/d/a | grep trusted.foo2)
+EXPECT "$d_quota_limit" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep "trusted.glusterfs.quota.limit-set")
+
+#Test that internal xattrs on B0 are not healed
+EXPECT 0x0000000000000400 echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.glusterfs.quota.00000000-0000-0000-0000-000000000001.contri)
+EXPECT 0x0000000000000400 echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.glusterfs.quota.dirty)
+EXPECT "$d_quota_limit" echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.glusterfs.quota.limit-set) #This will be healed, this is external xattr
+EXPECT 0x0000000000000400 echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.glusterfs.quota.size)
+EXPECT "orphan" echo $(getfattr -d -m. -e text $B0/${V0}0/d/a | grep $(echo $a_pgfid | cut -f1 -d'='))
+
+#Test that xattrs didn't go bad in source
+EXPECT "$d_quota_contri" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.*.contri")
+EXPECT "$d_quota_dirty" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.dirty")
+EXPECT "$d_quota_limit" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.limit-set")
+EXPECT "$d_quota_size" echo $(getfattr -d -m . -e hex $B0/${V0}1/d | grep -E "trusted.glusterfs.quota.size")
+EXPECT "$a_pgfid" echo $(getfattr -d -m . -e hex $B0/${V0}1/d/a | grep -E "trusted.pgfid.")
+
+EXPECT "$acl_access_val" echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep system.posix_acl_access)
+EXPECT "$acl_file_val" echo $(getfattr -d -m. -e hex $B0/${V0}0/d | grep trusted.SGI_ACL_FILE)
+EXPECT "$acl_access_val" echo $(getfattr -d -m. -e hex $B0/${V0}1/d | grep system.posix_acl_access)
+EXPECT "$acl_file_val" echo $(getfattr -d -m. -e hex $B0/${V0}1/d | grep trusted.SGI_ACL_FILE)
+cleanup
diff --git a/tests/bugs/quota/bug-1035576.t b/tests/bugs/quota/bug-1035576.t
new file mode 100644
index 00000000000..cbc1b69ebb3
--- /dev/null
+++ b/tests/bugs/quota/bug-1035576.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This script tests that self-heal of limit-set xattr is happening on a directory
+#but self-heal of quota.size xattr is not happening
+
+cleanup;
+
+TEST glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+#Lets disable perf-xls so that lookup would reach afr
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+TEST $CLI volume quota $V0 enable
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+cd $M0
+TEST mkdir $M0/a
+TEST $CLI volume quota $V0 limit-usage /a 1GB
+echo abc > $M0/a/f
+$CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+quota_limit_val1=$(get_hex_xattr trusted.glusterfs.quota.limit-set $B0/${V0}1/a)
+quota_size_val1=$(get_hex_xattr trusted.glusterfs.quota.size $B0/${V0}1/a)
+
+#Trigger entry,metadata self-heal
+TEST ls $M0/a
+
+quota_limit_val0=$(get_hex_xattr trusted.glusterfs.quota.limit-set $B0/${V0}0/a)
+quota_size_val0=$(get_hex_xattr trusted.glusterfs.quota.size $B0/${V0}0/a)
+
+#Test that limit-set xattr is healed
+TEST [ $quota_limit_val0 == $quota_limit_val1 ]
+
+#Only entry, metadata self-heal is done quota size value should not be same
+TEST [ $quota_size_val0 != $quota_size_val1 ]
+TEST cat $M0/a/f
+
+#Now that data self-heal is done quota size value should be same
+quota_size_val0=$(get_hex_xattr trusted.glusterfs.quota.size $B0/${V0}0/a)
+TEST [ $quota_size_val0 == $quota_size_val1 ]
+cleanup
diff --git a/tests/bugs/quota/bug-1038598.t b/tests/bugs/quota/bug-1038598.t
new file mode 100644
index 00000000000..108e14cb8d8
--- /dev/null
+++ b/tests/bugs/quota/bug-1038598.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../../basic/quota.c -o $QDD
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '2' brick_count $V0
+
+TEST $CLI volume start $V0;
+
+TEST $CLI volume quota $V0 enable
+sleep 5
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir -p $M0/test_dir
+TEST $CLI volume quota $V0 limit-usage /test_dir 10MB 50
+
+EXPECT "10.0MB" quota_hard_limit "/test_dir";
+EXPECT "50%" quota_soft_limit "/test_dir";
+
+TEST $QDD $M0/test_dir/file1.txt 256 16
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "4.0MB" quotausage "/test_dir";
+EXPECT 'No' quota_sl_exceeded "/test_dir";
+EXPECT 'No' quota_hl_exceeded "/test_dir";
+
+TEST $QDD $M0/test_dir/file1.txt 256 24
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "6.0MB" quotausage "/test_dir";
+EXPECT 'Yes' quota_sl_exceeded "/test_dir";
+EXPECT 'No' quota_hl_exceeded "/test_dir";
+
+#set timeout to 0 so that quota gets enforced without any lag
+TEST $CLI volume set $V0 features.hard-timeout 0
+TEST $CLI volume set $V0 features.soft-timeout 0
+
+TEST ! $QDD $M0/test_dir/file1.txt 256 60
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT 'Yes' quota_sl_exceeded "/test_dir";
+EXPECT 'Yes' quota_hl_exceeded "/test_dir";
+
+rm -f $QDD
+
+cleanup;
diff --git a/tests/bugs/quota/bug-1087198.t b/tests/bugs/quota/bug-1087198.t
new file mode 100644
index 00000000000..618a46b957d
--- /dev/null
+++ b/tests/bugs/quota/bug-1087198.t
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+## The script tests the logging of the quota in the bricks after reaching soft
+## limit of the configured limit.
+##
+## Steps:
+## 1. Create and mount the volume
+## 2. Enable quota and set the limit on 2 directories
+## 3. Write some data to cross the limit
+## 4. Grep the string expected in brick logs
+## 5. Wait for 10 seconds (alert timeout is set to 10s)
+## 6. Repeat 3 and 4.
+## 7. Cleanup
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../../basic/quota.c -o $QDD
+
+#1
+## Step 1
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick{1..4};
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0;
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 noac,nolock
+
+QUOTA_LIMIT_DIR="quota_limit_dir"
+BRICK_LOG_DIR="`gluster --print-logdir`/bricks"
+
+#9
+TEST mkdir $N0/$QUOTA_LIMIT_DIR
+
+#10
+## Step 2
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 alert-time 10
+TEST $CLI volume quota $V0 hard-timeout 0
+TEST $CLI volume quota $V0 soft-timeout 0
+
+# Set limit to 200KB (204800B)
+TEST $CLI volume quota $V0 limit-usage / 204800B
+TEST $CLI volume quota $V0 limit-usage /$QUOTA_LIMIT_DIR 100KB
+
+#16
+## Step 3 and 4
+TEST $QDD $N0/$QUOTA_LIMIT_DIR/95KB_file 1 95
+#Uncomment below TEST once the bug# 1202292 is fixed
+#TEST grep -e "\"Usage crossed soft limit:.*used by /$QUOTA_LIMIT_DIR\"" -- $BRICK_LOG_DIR/*
+
+TEST $QDD $N0/100KB_file 1 100
+#Uncomment below TEST once the bug# 1202292 is fixed
+#TEST grep -e "\"Usage crossed soft limit:.*used by /\"" -- $BRICK_LOG_DIR/*
+
+#20
+## Step 5
+TEST sleep 10
+
+## Step 6
+TEST $QDD $N0/$QUOTA_LIMIT_DIR/1KB_file 1 1
+TEST grep -e "\"Usage is above soft limit:.*used by /$QUOTA_LIMIT_DIR\"" -- $BRICK_LOG_DIR/*
+
+#23
+TEST $QDD $N0/1KB_file 1 1
+TEST grep -e "\"Usage is above soft limit:.*used by /\"" -- $BRICK_LOG_DIR/*
+
+#25
+## Step 7
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST $CLI volume stop $V0
+
+rm -f $QDD
+
+cleanup;
diff --git a/tests/bugs/quota/bug-1104692.t b/tests/bugs/quota/bug-1104692.t
new file mode 100755
index 00000000000..9640996135f
--- /dev/null
+++ b/tests/bugs/quota/bug-1104692.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2 $H0:$B0/${V0}3
+TEST $CLI volume start $V0
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+TEST mkdir -p $M0/limit_one/limit_two/limit_three $M0/limit_four \
+ $M0/limit_one/limit_five
+
+TEST $CLI volume set $V0 server.root-squash on
+TEST $CLI volume quota $V0 enable
+
+TEST $CLI volume quota $V0 limit-usage / 1GB
+TEST $CLI volume quota $V0 limit-usage /limit_one 1GB
+TEST $CLI volume quota $V0 limit-usage /limit_one/limit_two 1GB
+TEST $CLI volume quota $V0 limit-usage /limit_one/limit_two/limit_three 1GB
+TEST $CLI volume quota $V0 limit-usage /limit_four 1GB
+TEST $CLI volume quota $V0 limit-usage /limit_one/limit_five 1GB
+
+cleanup;
diff --git a/tests/bugs/quota/bug-1153964.t b/tests/bugs/quota/bug-1153964.t
new file mode 100644
index 00000000000..2e449d3ba00
--- /dev/null
+++ b/tests/bugs/quota/bug-1153964.t
@@ -0,0 +1,81 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+function rename_loop()
+{
+ local i=0
+ local limit=$1
+ while [ $i -lt $limit ]
+ do
+ j=$[$i + 1]
+ mv $N0/test_dir/file$i $N0/test_dir/file$j
+ if [ "$?" != "0" ]
+ then
+ return 1
+ fi
+ i=$[$i + 1]
+ done
+ return 0
+}
+
+function createFile_and_checkLimit()
+{
+ local count_val=$1;
+ dd if=/dev/zero of="$N0/test_dir/file0" bs=1048576 count=$count_val
+ sleep 3
+ if [ -f $N0/test_dir/file0 ]
+ then
+ rename_loop 10
+ if [ "$?" == "0" ]
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+ fi
+}
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}2
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+TEST $CLI volume quota $V0 enable
+EXPECT 'on' volinfo_field $V0 'features.quota'
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+TEST mkdir -p $N0/test_dir/
+
+# Try to rename file under various case and check if
+# quota limit exceeds or not.
+TEST $CLI volume quota $V0 limit-usage /test_dir 100MB
+# Case1 : If used size is less than hard-limit size
+# Create a 600MB file
+EXPECT 'Y' createFile_and_checkLimit 60
+
+TEST rm -rf $N0/test_dir/*
+# Case2 : If used size is equal to hard-limit size
+# Create a 100MB file
+EXPECT 'Y' createFile_and_checkLimit 100
+
+TEST rm -rf $N0/test_dir/*
+# Case3 : If used size is greater than hard-limit size
+# Create a 110MB file
+EXPECT 'Y' createFile_and_checkLimit 110
+
+# remove this directory as it has been created as part
+# of above testcase
+TEST rm -rf $N0/test_dir/
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+cleanup;
diff --git a/tests/bugs/quota/bug-1178130.t b/tests/bugs/quota/bug-1178130.t
new file mode 100644
index 00000000000..ccd6b792cf8
--- /dev/null
+++ b/tests/bugs/quota/bug-1178130.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# This regression test tries to ensure renaming a directory with content, and
+# no limit set, is accounted properly, when moved into a directory with quota
+# limit set.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../../basic/quota.c -o $QDD
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V0;
+
+TEST $CLI volume quota $V0 enable;
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST $CLI volume quota $V0 limit-usage / 500MB
+TEST $CLI volume quota $V0 hard-timeout 0
+TEST $CLI volume quota $V0 soft-timeout 0
+
+TEST $QDD $M0/file 256 40
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "10.0MB" quotausage "/"
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST mv $M0/file $M0/file2
+TEST $CLI volume start $V0 force;
+
+#wait for self heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "0" STAT "$B0/${V0}2/file2"
+
+#usage should remain same after rename and self-heal operation
+EXPECT "10.0MB" quotausage "/"
+
+rm -f $QDD
+
+cleanup;
diff --git a/tests/bugs/quota/bug-1235182.t b/tests/bugs/quota/bug-1235182.t
new file mode 100644
index 00000000000..6091146cb97
--- /dev/null
+++ b/tests/bugs/quota/bug-1235182.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+# This regression test tries to ensure renaming a directory with content, and
+# no limit set, is accounted properly, when moved into a directory with quota
+# limit set.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../../basic/quota.c -o $QDD
+
+TEST glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0};
+TEST $CLI volume start $V0;
+
+TEST $CLI volume quota $V0 enable;
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST $CLI volume quota $V0 limit-usage / 1GB
+TEST $CLI volume quota $V0 hard-timeout 0
+TEST $CLI volume quota $V0 soft-timeout 0
+
+TEST mkdir $M0/1
+$QDD $M0/1/f1 256 400&
+PID=$!
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $M0/1/f1
+TESTS_EXPECTED_IN_LOOP=150
+for i in {1..50}; do
+ ii=`expr $i + 1`;
+ touch $M0/$i/f$ii
+ echo Hello > $M0/$i/f$ii
+
+ #rename within same dir
+ TEST_IN_LOOP mv -f $M0/$i/f$i $M0/$i/f$ii;
+
+ #rename to different dir
+ TEST_IN_LOOP mkdir $M0/$ii
+ TEST_IN_LOOP mv -f $M0/$i/f$ii $M0/$ii/f$ii;
+ stat $M0/$ii/f$ii >/dev/null
+done
+
+echo "Wait for process with pid $PID to complete"
+wait $PID
+echo "Process with pid $PID finished"
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $M0/51/f51
+
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "100.0MB" quotausage "/"
+
+rm -f $QDD
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/bugs/quota/bug-1243798.t b/tests/bugs/quota/bug-1243798.t
new file mode 100644
index 00000000000..fa6abeb08fb
--- /dev/null
+++ b/tests/bugs/quota/bug-1243798.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0;
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 noac,nolock
+
+TEST mkdir -p $N0/dir1/dir2
+TEST touch $N0/dir1/dir2/file
+
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 hard-timeout 0
+TEST $CLI volume quota $V0 soft-timeout 0
+TEST $CLI volume quota $V0 limit-objects /dir1 10
+
+TEST stat $N0/dir1/dir2/file
+
+sleep 2
+
+#Remove size and contri xattr from /dir1
+#Remove contri xattr from /dir1/dir2
+setfattr -x trusted.glusterfs.quota.size.1 $B0/$V0/dir1
+setfattr -x trusted.glusterfs.quota.00000000-0000-0000-0000-000000000001.contri.1 $B0/$V0/dir1
+contri=$(getfattr -d -m . -e hex $B0/$V0/dir1/dir2 | grep contri | awk -F= '{print $1}')
+setfattr -x $contri $B0/$V0/dir1/dir2
+
+#Initiate healing by writing to a file
+echo Hello > $N0/dir1/dir2/file
+
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "2" quota_object_list_field "/dir1" 5
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+cleanup;
diff --git a/tests/bugs/quota/bug-1250582-volume-reset-should-not-remove-quota-quota-deem-statfs.t b/tests/bugs/quota/bug-1250582-volume-reset-should-not-remove-quota-quota-deem-statfs.t
new file mode 100644
index 00000000000..3b55e739bf9
--- /dev/null
+++ b/tests/bugs/quota/bug-1250582-volume-reset-should-not-remove-quota-quota-deem-statfs.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+# This test ensures that 'gluster volume reset' command do not remove
+# features.quota-deem-statfs, features.quota.
+# Also, tests that 'gluster volume set features.quota-deem-statfs' can be
+# turned on/off when quota is enabled.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${v0}{1,2};
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume quota $V0 enable
+EXPECT 'on' volinfo_field $V0 'features.quota'
+EXPECT 'on' volinfo_field $V0 'features.inode-quota'
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume reset $V0
+EXPECT 'on' volinfo_field $V0 'features.quota'
+EXPECT 'on' volinfo_field $V0 'features.inode-quota'
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume reset $V0 force
+EXPECT 'on' volinfo_field $V0 'features.quota'
+EXPECT 'on' volinfo_field $V0 'features.inode-quota'
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume reset $V0 features.quota-deem-statfs
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume set $V0 features.quota-deem-statfs off
+EXPECT 'off' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume set $V0 features.quota-deem-statfs on
+EXPECT 'on' volinfo_field $V0 'features.quota-deem-statfs'
+
+TEST $CLI volume quota $V0 disable
+EXPECT 'off' volinfo_field $V0 'features.quota'
+EXPECT 'off' volinfo_field $V0 'features.inode-quota'
+EXPECT '' volinfo_field $V0 'features.quota-deem-statfs'
+
+cleanup;
+
diff --git a/tests/bugs/quota/bug-1260545.t b/tests/bugs/quota/bug-1260545.t
new file mode 100644
index 00000000000..46808022f01
--- /dev/null
+++ b/tests/bugs/quota/bug-1260545.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+QDD=$(dirname $0)/quota
+# compile the test write program and run it
+build_tester $(dirname $0)/../../basic/quota.c -o $QDD
+
+TEST glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}2;
+TEST $CLI volume start $V0;
+
+TEST $CLI volume quota $V0 enable;
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST $CLI volume quota $V0 limit-usage / 11MB
+TEST $CLI volume quota $V0 hard-timeout 0
+TEST $CLI volume quota $V0 soft-timeout 0
+
+TEST $QDD $M0/f1 256 40
+
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "10.0MB" quotausage "/"
+
+if [ -f "$B0/${V0}1/f1" ]; then
+ HASHED="$B0/${V0}1"
+ OTHER="$B0/${V0}2"
+else
+ HASHED="$B0/${V0}2"
+ OTHER="$B0/${V0}1"
+fi
+
+TEST $CLI volume remove-brick $V0 $H0:${HASHED} start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:${HASHED}";
+
+#check consistency in mount point and also check that file is migrated to OTHER
+TEST [ -f "$OTHER/f1" ];
+TEST [ -f "$M0/f1" ];
+
+#check that remove-brick status should not have any failed or skipped files
+var=`$CLI volume remove-brick $V0 $H0:${HASHED} status | grep completed`
+TEST [ `echo $var | awk '{print $5}'` = "0" ]
+TEST [ `echo $var | awk '{print $6}'` = "0" ]
+
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "10.0MB" quotausage "/"
+
+rm -f $QDD
+cleanup;
diff --git a/tests/bugs/quota/bug-1287996.t b/tests/bugs/quota/bug-1287996.t
new file mode 100644
index 00000000000..2f46ee1ca2d
--- /dev/null
+++ b/tests/bugs/quota/bug-1287996.t
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function check_peers {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup;
+
+TEST launch_cluster 2;
+
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0
+TEST $CLI_1 volume start $V0
+TEST $CLI_1 volume quota $V0 enable
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 check_peers
+
+cleanup;
diff --git a/tests/bugs/quota/bug-1292020.t b/tests/bugs/quota/bug-1292020.t
new file mode 100644
index 00000000000..b70047ae3f9
--- /dev/null
+++ b/tests/bugs/quota/bug-1292020.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function write_sample_data () {
+ dd if=/dev/zero of=$M0/f1 bs=256k count=400 2>&1 |
+ egrep -i 'exceeded|no space' && echo 'passed'
+}
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0;
+TEST $CLI volume quota $V0 enable;
+TEST $CLI volume quota $V0 limit-usage / 1
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+
+EXPECT_WITHIN 30 "passed" write_sample_data
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+cleanup;
diff --git a/tests/bugs/quota/bug-1293601.t b/tests/bugs/quota/bug-1293601.t
new file mode 100644
index 00000000000..741758b73f5
--- /dev/null
+++ b/tests/bugs/quota/bug-1293601.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4}
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "4" online_brick_count
+TEST $CLI volume quota $V0 enable
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+for i in {1..512}; do
+ dd if=/dev/zero of=$M0/f$i bs=1k count=1
+done
+
+mkdir $M0/dir1
+for i in {513..1024}; do
+ dd if=/dev/zero of=$M0/dir1/f$i bs=1k count=1
+done
+
+EXPECT_WITHIN $MARKER_UPDATE_TIMEOUT "1.0MB" quotausage "/"
+
+TEST $CLI volume quota $V0 disable
+TEST $CLI volume quota $V0 enable
+
+EXPECT_WITHIN 60 "1.0MB" quotausage "/"
+
+cleanup;
diff --git a/tests/bugs/read-only/bug-1134822-read-only-default-in-graph.t b/tests/bugs/read-only/bug-1134822-read-only-default-in-graph.t
new file mode 100644
index 00000000000..cc27d04656f
--- /dev/null
+++ b/tests/bugs/read-only/bug-1134822-read-only-default-in-graph.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+#Test case: This test checks when a volume is made read-only though volume set
+# and bricks are not restarted no write operations can be performed on
+# this volume
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create a volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}0;
+TEST $CLI volume start $V0
+
+# Mount FUSE and create file/directory, create should succeed as the read-only
+# is off by default
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST touch $M0/zerobytefile1.txt
+TEST mkdir $M0/test_dir1
+TEST dd if=/dev/zero of=$M0/file1 bs=1024 count=1024
+
+# turn on read-only option through volume set
+TEST gluster volume set $V0 read-only on
+
+# worm feature can't be enabled if read-only is enabled
+TEST ! gluster volume set $V0 worm on
+
+# turn off read-only option through volume set
+TEST gluster volume set $V0 read-only off
+
+# turn on worm option through volume set
+TEST gluster volume set $V0 worm on
+
+# read-only feature can't be enabled if worm is enabled
+TEST ! gluster volume set $V0 read-only on
+
+
+TEST gluster volume set $V0 worm off
+TEST gluster volume set $V0 read-only on
+
+# Check whether read-operations can be performed or not
+TEST cat $M0/file1
+
+# All write operations should fail now
+TEST ! touch $M0/zerobytefile2.txt
+TEST ! mkdir $M0/test_dir2
+TEST ! dd if=/dev/zero of=$M0/file2 bs=1024 count=1024
+
+# turn off read-only option through volume set
+TEST gluster volume set $V0 read-only off
+
+# All write operations should succeed now
+TEST touch $M0/zerobytefile2.txt
+TEST mkdir $M0/test_dir2
+TEST dd if=/dev/zero of=$M0/file2 bs=1024 count=1024
+
+# Turn on worm
+TEST gluster volume set $V0 worm on
+
+# unlink should fail now
+TEST ! rm -rf $M0/zerobytefile2.txt
+
+cleanup;
diff --git a/tests/bugs/readdir-ahead/bug-1390050.c b/tests/bugs/readdir-ahead/bug-1390050.c
new file mode 100644
index 00000000000..9578df2dd90
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1390050.c
@@ -0,0 +1,72 @@
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <string.h>
+#include <errno.h>
+
+int
+main(int argc, char *argv[])
+{
+ const char *glfs_dir = NULL, *filepath = NULL;
+ DIR *dirfd = NULL;
+ int filefd = 0, ret = 0;
+ struct stat stbuf = {
+ 0,
+ };
+ size_t size_before_write = 0;
+
+ glfs_dir = argv[1];
+ filepath = argv[2];
+ dirfd = opendir(glfs_dir);
+ if (dirfd == NULL) {
+ fprintf(stderr, "opening directory failed (%s)\n", strerror(errno));
+ goto err;
+ }
+
+ filefd = open(filepath, O_RDWR);
+ if (filefd < 0) {
+ fprintf(stderr, "open failed on path %s (%s)\n", filepath,
+ strerror(errno));
+ goto err;
+ }
+
+ ret = stat(filepath, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "stat failed on path %s (%s)\n", filepath,
+ strerror(errno));
+ goto err;
+ }
+
+ size_before_write = stbuf.st_size;
+
+ ret = write(filefd, "testdata", strlen("testdata123") + 1);
+ if (ret <= 0) {
+ fprintf(stderr, "write failed (%s)\n", strerror(errno));
+ goto err;
+ }
+
+ while (readdir(dirfd)) {
+ /* do nothing */
+ }
+
+ ret = stat(filepath, &stbuf);
+ if (ret < 0) {
+ fprintf(stderr, "stat failed on path %s (%s)\n", strerror(errno));
+ goto err;
+ }
+
+ if (stbuf.st_size == size_before_write) {
+ fprintf(stderr,
+ "file size (%lu) has not changed even after "
+ "its written to\n",
+ stbuf.st_size);
+ goto err;
+ }
+
+ return 0;
+err:
+ return -1;
+}
diff --git a/tests/bugs/readdir-ahead/bug-1390050.t b/tests/bugs/readdir-ahead/bug-1390050.t
new file mode 100644
index 00000000000..ab1d7d4ead9
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1390050.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B{0..1}/$V0
+TEST $CLI volume set $V0 readdir-ahead on
+
+DIRECTORY="$M0/subdir1/subdir2"
+
+#Make sure md-cache has large timeout to hold stat from readdirp_cbk in its cache
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+rm -rf $M0/*
+TEST mkdir -p $DIRECTORY
+rm -rf $DIRECTORY/*
+TEST touch $DIRECTORY/file{0..10}
+rdd_tester=$(dirname $0)/rdd-tester
+TEST build_tester $(dirname $0)/bug-1390050.c -o $rdd_tester
+TEST $rdd_tester $DIRECTORY $DIRECTORY/file4
+rm -f $rdd_tester
+cleanup;
+
diff --git a/tests/bugs/readdir-ahead/bug-1436090.t b/tests/bugs/readdir-ahead/bug-1436090.t
new file mode 100755
index 00000000000..e0877f15684
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1436090.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+TEST launch_cluster 2;
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+$CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0
+EXPECT 'Created' cluster_volinfo_field 1 $V0 'Status';
+
+$CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+TEST glusterfs -s $H1 --volfile-id $V0 $M0;
+TEST mkdir $M0/dir1
+
+# Create a large file (4 GB), so that rebalance takes time
+# Since we really don't care about the contents of the file, we use fallocate
+# to generate the file much faster. We could also use truncate, which is even
+# faster, but rebalance could take advantage of an sparse file and migrate it
+# in an optimized way, but we don't want a fast migration.
+TEST fallocate -l 4G $M0/dir1/foo
+
+TEST mv $M0/dir1/foo $M0/dir1/bar
+
+TEST $CLI_1 volume rebalance $V0 start force
+TEST ! $CLI_1 volume set $V0 parallel-readdir on
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 2 $V0
+TEST $CLI_1 volume set $V0 parallel-readdir on
+TEST mv $M0/dir1/bar $M0/dir1/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs -s $H1 --volfile-id $V0 $M0;
+TEST $CLI_1 volume rebalance $V0 start force
+TEST ln $M0/dir1/foo $M0/dir1/bar
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 1 $V0
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" cluster_rebalance_status_field 2 $V0
+cleanup;
diff --git a/tests/bugs/readdir-ahead/bug-1439640.t b/tests/bugs/readdir-ahead/bug-1439640.t
new file mode 100755
index 00000000000..dcd54076444
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1439640.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B{0..1}/$V0
+TEST $CLI volume set $V0 readdir-ahead on
+TEST $CLI volume start $V0
+
+TEST ! $CLI volume set $V0 parallel-readdir sdf
+
+TEST $CLI volume set $V0 parallel-readdir off
+TEST $CLI volume set $V0 parallel-readdir on
+
+TEST ! $CLI volume set $V0 rda-cache-limit 0
+TEST ! $CLI volume set $V0 rda-cache-limit -634
+TEST ! $CLI volume set $V0 rda-cache-limit 87adh
+TEST ! $CLI volume set $V0 parallel-readdir sdf
+
+TEST ! $CLI volume set $V0 rda-request-size 0
+TEST ! $CLI volume set $V0 rda-request-size -634
+TEST ! $CLI volume set $V0 rda-request-size 87adh
+
+TEST $CLI volume set $V0 rda-cache-limit 10MB
+TEST $CLI volume set $V0 rda-request-size 128KB
+
+cleanup;
diff --git a/tests/bugs/readdir-ahead/bug-1446516.t b/tests/bugs/readdir-ahead/bug-1446516.t
new file mode 100755
index 00000000000..2bf6520d861
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1446516.t
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1..4}
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 parallel-readdir on
+
+TEST $CLI volume set $V0 rda-cache-limit 4GB
+
+TEST $CLI volume set $V0 parallel-readdir off
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+cleanup;
diff --git a/tests/bugs/readdir-ahead/bug-1512437.t b/tests/bugs/readdir-ahead/bug-1512437.t
new file mode 100755
index 00000000000..50eaa7d6696
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1512437.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 parallel-readdir on
+TEST $CLI volume set $V0 readdir-optimize on
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+TEST mkdir -p $M0/subdir1/subdir2;
+umount $M0
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+count=`ls -1 $M0/subdir1 | wc -l`
+TEST [ $count -eq 1 ]
+
+cleanup;
diff --git a/tests/bugs/readdir-ahead/bug-1670253-consistent-metadata.t b/tests/bugs/readdir-ahead/bug-1670253-consistent-metadata.t
new file mode 100644
index 00000000000..6adfc17c92c
--- /dev/null
+++ b/tests/bugs/readdir-ahead/bug-1670253-consistent-metadata.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 readdir-ahead on #on by default as of writing this .t.
+TEST $CLI volume set $V0 consistent-metadata on
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+TEST touch $M0/FILE
+echo "abc" >> $M0/FILE
+EXPECT "^0$" echo $?
+EXPECT "abc" cat $M0/FILE
+echo "truncate" >$M0/FILE
+EXPECT "^0$" echo $?
+EXPECT "truncate" cat $M0/FILE
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup;
diff --git a/tests/bugs/replicate/886998/strict-readdir.t b/tests/bugs/replicate/886998/strict-readdir.t
new file mode 100644
index 00000000000..63fe313b201
--- /dev/null
+++ b/tests/bugs/replicate/886998/strict-readdir.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../../include.rc
+. $(dirname $0)/../../../volume.rc
+
+function num_files_in_dir {
+ d=$1
+ ls $d | sort | uniq | wc -l
+}
+
+#Basic sanity tests for readdir functionality
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/r2d2_0 $H0:$B0/r2d2_1 $H0:$B0/r2d2_2 $H0:$B0/r2d2_3
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-server=$H0 --volfile-id=/$V0 $M0
+
+TEST touch $M0/{1..100}
+EXPECT "100" num_files_in_dir $M0
+
+TEST kill_brick $V0 $H0 $B0/r2d2_0
+TEST kill_brick $V0 $H0 $B0/r2d2_2
+EXPECT "100" num_files_in_dir $M0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST kill_brick $V0 $H0 $B0/r2d2_1
+TEST kill_brick $V0 $H0 $B0/r2d2_3
+EXPECT "100" num_files_in_dir $M0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 3
+
+TEST $CLI volume set $V0 cluster.strict-readdir on
+EXPECT "on" volinfo_field $V0 cluster.strict-readdir
+TEST kill_brick $V0 $H0 $B0/r2d2_0
+TEST kill_brick $V0 $H0 $B0/r2d2_2
+EXPECT "100" num_files_in_dir $M0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST kill_brick $V0 $H0 $B0/r2d2_1
+TEST kill_brick $V0 $H0 $B0/r2d2_3
+EXPECT "100" num_files_in_dir $M0
+cleanup;
diff --git a/tests/bugs/replicate/bug-1015990-rep.t b/tests/bugs/replicate/bug-1015990-rep.t
new file mode 100755
index 00000000000..ab8166e372a
--- /dev/null
+++ b/tests/bugs/replicate/bug-1015990-rep.t
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
+
+
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+TEST kill_brick $V0 $H0 $B0/$V0"3"
+
+for i in {1..100}; do echo "STRING" > $M0/File$i; done
+
+# Check shd is connected to all up bricks before running statistics command.
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
+
+command_output=$(gluster volume heal $V0 statistics heal-count replica $H0:$B0/$V0"1")
+substring="Number of entries:"
+count=0
+while read -r line;
+do
+ if [[ "$line" == *$substring* ]]
+ then
+ value=$(echo $line | cut -f 2 -d :)
+ count=$(($count + $value))
+ fi
+
+done <<< "$command_output"
+
+brick_2_entries_count=$count
+xattrop_count_brick_2=$(count_sh_entries $B0/$V0"2")
+EXPECT $brick_2_entries_count echo $xattrop_count_brick_2
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1015990.t b/tests/bugs/replicate/bug-1015990.t
new file mode 100755
index 00000000000..a8b12f2c202
--- /dev/null
+++ b/tests/bugs/replicate/bug-1015990.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+
+
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+sleep 5
+TEST kill_brick $V0 $H0 $B0/$V0"3"
+sleep 5
+
+for i in {1..100}; do echo "STRING" > $M0/File$i; done
+
+brick_2_sh_entries=$(count_sh_entries $B0/$V0"2")
+brick_4_sh_entries=$(count_sh_entries $B0/$V0"4")
+
+
+command_output=$(gluster volume heal $V0 statistics heal-count)
+
+
+substring="Number of entries:"
+count=0
+while read -r line;
+do
+ if [[ "$line" == *$substring* ]]
+ then
+ value=$(echo $line | cut -f 2 -d :)
+ count=$(($count + $value))
+ fi
+
+done <<< "$command_output"
+
+brick_2_entries_count=$(($count-$value))
+brick_4_entries_count=$value
+
+xattrop_count_brick_2=$(count_sh_entries $B0/$V0"2")
+xattrop_count_brick_4=$(count_sh_entries $B0/$V0"4")
+EXPECT $brick_2_entries_count echo $xattrop_count_brick_2
+EXPECT $brick_4_entries_count echo $xattrop_count_brick_4
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0
+
+cleanup;
+
diff --git a/tests/bugs/replicate/bug-1032927.t b/tests/bugs/replicate/bug-1032927.t
new file mode 100644
index 00000000000..eb663d03fed
--- /dev/null
+++ b/tests/bugs/replicate/bug-1032927.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This tests if pathinfo getxattr fails when one of the bricks is down
+#Lets hope it doesn't
+
+cleanup;
+function get_pathinfo_in_loop {
+ failed=0
+ for i in {1..1000}
+ do
+ getfattr -n trusted.glusterfs.pathinfo $M0 2>/dev/null
+ if [ $? -ne 0 ]; then failed=1;break; fi
+ done
+ return $failed
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+cd $M0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+#when one of the bricks is down getfattr of pathinfo should not fail
+#Lets just do the test for 1000 times to see if we hit the race
+TEST get_pathinfo_in_loop
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1037501.t b/tests/bugs/replicate/bug-1037501.t
new file mode 100755
index 00000000000..ce079555b50
--- /dev/null
+++ b/tests/bugs/replicate/bug-1037501.t
@@ -0,0 +1,104 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function write_file()
+{
+ path="$1"; shift
+ echo "$*" > "$path"
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Start and create a volume
+mkdir -p ${B0}/${V0}-0
+mkdir -p ${B0}/${V0}-1
+mkdir -p ${B0}/${V0}-2
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}-{0,1,2}
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+TEST `echo "TEST-FILE" > $M0/File`
+TEST `mkdir $M0/Dir`
+TEST `ln $M0/File $M0/Link`
+TEST `mknod $M0/FIFO p`
+
+TEST $CLI volume add-brick $V0 replica 4 $H0:$B0/$V0-3 force
+TEST $CLI volume add-brick $V0 replica 5 $H0:$B0/$V0-4 force
+TEST $CLI volume add-brick $V0 replica 6 $H0:$B0/$V0-5 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 4
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 5
+TEST gluster volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-0/File
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-1/File
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-2/File
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-3/File
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-4/File
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-5/File
+
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-0/Link
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-1/Link
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-2/Link
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-3/Link
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-4/Link
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-5/Link
+
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-0/Dir
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-1/Dir
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-2/Dir
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-3/Dir
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-4/Dir
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-5/Dir
+
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-0/FIFO
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-1/FIFO
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-2/FIFO
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-3/FIFO
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-4/FIFO
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" path_exists $B0/$V0-5/FIFO
+
+EXPECT 10 stat -c '%s' $B0/$V0-0/File
+EXPECT 10 stat -c '%s' $B0/$V0-1/File
+EXPECT 10 stat -c '%s' $B0/$V0-2/File
+EXPECT 10 stat -c '%s' $B0/$V0-3/File
+EXPECT 10 stat -c '%s' $B0/$V0-4/File
+EXPECT 10 stat -c '%s' $B0/$V0-5/File
+
+EXPECT 3 stat -c '%h' $B0/$V0-0/Link
+EXPECT 3 stat -c '%h' $B0/$V0-1/Link
+EXPECT 3 stat -c '%h' $B0/$V0-2/Link
+EXPECT 3 stat -c '%h' $B0/$V0-3/Link
+EXPECT 3 stat -c '%h' $B0/$V0-4/Link
+EXPECT 3 stat -c '%h' $B0/$V0-5/Link
+
+EXPECT 'directory' stat -c '%F' $B0/$V0-0/Dir
+EXPECT 'directory' stat -c '%F' $B0/$V0-1/Dir
+EXPECT 'directory' stat -c '%F' $B0/$V0-2/Dir
+EXPECT 'directory' stat -c '%F' $B0/$V0-3/Dir
+EXPECT 'directory' stat -c '%F' $B0/$V0-4/Dir
+EXPECT 'directory' stat -c '%F' $B0/$V0-5/Dir
+
+EXPECT 'fifo' stat -c '%F' $B0/$V0-0/FIFO
+EXPECT 'fifo' stat -c '%F' $B0/$V0-1/FIFO
+EXPECT 'fifo' stat -c '%F' $B0/$V0-2/FIFO
+EXPECT 'fifo' stat -c '%F' $B0/$V0-3/FIFO
+EXPECT 'fifo' stat -c '%F' $B0/$V0-4/FIFO
+EXPECT 'fifo' stat -c '%F' $B0/$V0-5/FIFO
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1046624.t b/tests/bugs/replicate/bug-1046624.t
new file mode 100755
index 00000000000..e2762ea6764
--- /dev/null
+++ b/tests/bugs/replicate/bug-1046624.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+## Start and create a volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+
+## Make sure automatic self-heal doesn't perturb our results.
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume set $V0 background-self-heal-count 0
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount native
+TEST ${GFS} --volfile-server=$H0 --volfile-id=$V0 --use-readdirp=no $M0
+
+TEST `echo "TEST-FILE" > $M0/File`
+TEST `mkdir $M0/Dir`
+TEST kill_brick $V0 $H0 $B0/${V0}-0
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 0
+
+TEST `ln -s $M0/File $M0/Link1`
+TEST `ln -s $M0/Dir $M0/Link2`
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST `find $M0/ 2>/dev/null 1>/dev/null`
+TEST `find $M0/ | xargs stat 2>/dev/null 1>/dev/null`
+
+TEST stat $B0/${V0}-0/Link1
+TEST stat $B0/${V0}-0/Link2
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1058797.t b/tests/bugs/replicate/bug-1058797.t
new file mode 100644
index 00000000000..598062a0dab
--- /dev/null
+++ b/tests/bugs/replicate/bug-1058797.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+#Test that the setuid bit is healed correctly.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+#Basic checks
+TEST glusterd
+
+#Create a 1x2 replica volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1};
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+# FUSE mount;create a file
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST touch $M0/file
+
+#Kill brick1 and set S_ISUID and S_ISGID bits from mount point
+kill_brick $V0 $H0 $B0/brick1
+TEST chmod +x,+s $M0/file
+
+#Get file permissions from backend brick0 and verify that S_ISUID is indeed set
+file_permissions1=`ls -l $B0/brick0/file | awk '{print $1}'| cut -d. -f1 | cut -d- -f2,3,4,5,6`
+setuid_bit1=`echo $file_permissions1 | cut -b3`
+EXPECT "s" echo $setuid_bit1
+
+#Restart volume and do lookup from mount to trigger heal
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST dd if=$M0/file of=/dev/null
+
+#Get file permissions from healed brick1 and verify that S_ISUID is indeed set
+file_permissions2=`ls -l $B0/brick1/file | awk '{print $1}' | cut -d. -f1 | cut -d- -f2,3,4,5,6`
+setuid_bit2=`echo $file_permissions2 | cut -b3`
+EXPECT "s" echo $setuid_bit2
+
+#Also compare the entire permission string,just to be sure
+EXPECT $file_permissions1 echo $file_permissions2
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1101647.t b/tests/bugs/replicate/bug-1101647.t
new file mode 100644
index 00000000000..708bc1a1e29
--- /dev/null
+++ b/tests/bugs/replicate/bug-1101647.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume start $V0;
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+#Create base entry in indices/xattrop
+echo "Data">$M0/file
+
+TEST $CLI volume heal $V0
+#Entries from indices/xattrop should not be cleared after a heal.
+EXPECT 1 count_index_entries $B0/$V0"1"
+EXPECT 1 count_index_entries $B0/$V0"2"
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "More data">>$M0/file
+
+EXPECT 1 echo `$CLI volume heal $V0 statistics heal-count|grep "Number of entries:"|head -n1|awk '{print $4}'`
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1130892.t b/tests/bugs/replicate/bug-1130892.t
new file mode 100644
index 00000000000..c7509f33cc2
--- /dev/null
+++ b/tests/bugs/replicate/bug-1130892.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+# Create a 1X2 replica
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Disable self-heal daemon
+TEST gluster volume set $V0 self-heal-daemon off
+
+# Enable Client side heal
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+
+# Disable all perf-xlators
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+
+# Volume start
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+# FUSE Mount
+TEST ${GFS} -s $H0 --volfile-id $V0 $M0
+
+# Create files and dirs
+TEST mkdir -p $M0/one/two/
+TEST `echo "Carpe diem" > $M0/one/two/three`
+
+# Simulate disk-replacement
+TEST kill_brick $V0 $H0 $B0/${V0}-1
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 1
+TEST rm -rf $B0/${V0}-1/one
+TEST rm -rf $B0/${V0}-1/.glusterfs
+
+#Ideally, disk replacement is done using reset-brick or replace-brick gluster CLI
+#which will create .glusterfs folder.
+mkdir $B0/${V0}-1/.glusterfs && chmod 600 $B0/${V0}-1/.glusterfs
+
+# Start force
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST stat $M0/one
+
+sleep 1
+
+# Check pending xattrs
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 data
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 entry
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1 metadata
+
+TEST gluster volume set $V0 self-heal-daemon on
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_file_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two/three
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1132102.t b/tests/bugs/replicate/bug-1132102.t
new file mode 100644
index 00000000000..c7dbbf818aa
--- /dev/null
+++ b/tests/bugs/replicate/bug-1132102.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+#This tests that mknod and create fops mark necessary pending changelog
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+cd $M0
+TEST mkfifo fifo
+TEST mknod block b 0 0
+TEST touch a
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/fifo trusted.afr.$V0-client-0 data
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/fifo trusted.afr.$V0-client-0 entry
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/fifo trusted.afr.$V0-client-0 metadata
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/block trusted.afr.$V0-client-0 data
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/block trusted.afr.$V0-client-0 entry
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/block trusted.afr.$V0-client-0 metadata
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/a trusted.afr.$V0-client-0 data
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/a trusted.afr.$V0-client-0 entry
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/a trusted.afr.$V0-client-0 metadata
+cleanup
diff --git a/tests/bugs/replicate/bug-1134691-afr-lookup-metadata-heal.t b/tests/bugs/replicate/bug-1134691-afr-lookup-metadata-heal.t
new file mode 100644
index 00000000000..b69a38ae788
--- /dev/null
+++ b/tests/bugs/replicate/bug-1134691-afr-lookup-metadata-heal.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+#### Test iatt and user xattr heal from lookup code path ####
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+cd $M0
+TEST touch file
+TEST setfattr -n user.attribute1 -v "value" $B0/brick0/file
+TEST kill_brick $V0 $H0 $B0/brick2
+TEST chmod +x file
+iatt=$(stat -c "%g:%u:%A" file)
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT 2 get_pending_heal_count $V0
+
+#Trigger metadataheal
+TEST stat file
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#iattrs must be matching
+iatt1=$(stat -c "%g:%u:%A" $B0/brick0/file)
+iatt2=$(stat -c "%g:%u:%A" $B0/brick1/file)
+iatt3=$(stat -c "%g:%u:%A" $B0/brick2/file)
+EXPECT $iatt echo $iatt1
+EXPECT $iatt echo $iatt2
+EXPECT $iatt echo $iatt3
+
+#xattrs must be matching
+xatt1_cnt=$(getfattr -d $B0/brick0/file|wc|awk '{print $1}')
+xatt2_cnt=$(getfattr -d $B0/brick1/file|wc|awk '{print $1}')
+xatt3_cnt=$(getfattr -d $B0/brick2/file|wc|awk '{print $1}')
+EXPECT "$xatt1_cnt" echo $xatt2_cnt
+EXPECT "$xatt1_cnt" echo $xatt3_cnt
+
+#changelogs must be zero
+xattr1=$(get_hex_xattr trusted.afr.$V0-client-2 $B0/brick0/file)
+xattr2=$(get_hex_xattr trusted.afr.$V0-client-2 $B0/brick1/file)
+EXPECT "000000000000000000000000" echo $xattr1
+EXPECT "000000000000000000000000" echo $xattr2
+
+cd -
+cleanup;
diff --git a/tests/bugs/replicate/bug-1139230.t b/tests/bugs/replicate/bug-1139230.t
new file mode 100644
index 00000000000..9ceac6c4f4e
--- /dev/null
+++ b/tests/bugs/replicate/bug-1139230.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+# Create a 1X2 replica
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Volume start
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+# FUSE Mount
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+TEST mkdir -p $M0/one
+
+# Kill a brick
+TEST kill_brick $V0 $H0 $B0/${V0}-1
+
+TEST `echo "A long" > $M0/one/two`
+
+# Start force
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_dir_heal_done $B0/${V0}-0 $B0/${V0}-1 one
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" is_file_heal_done $B0/${V0}-0 $B0/${V0}-1 one/two
+
+# Pending xattrs should be set for all the bricks once self-heal is done
+# Check pending xattrs
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-0
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-0/one trusted.afr.$V0-client-1
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-1/one trusted.afr.$V0-client-0
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-1/one trusted.afr.$V0-client-1
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-0/one trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-1/one trusted.afr.dirty
+
+TEST `echo "time ago" > $M0/one/three`
+
+# Pending xattrs should be set for all the bricks once transaction is done
+# Check pending xattrs
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-0/one/three trusted.afr.$V0-client-0
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-0/one/three trusted.afr.$V0-client-1
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-1/one/three trusted.afr.$V0-client-0
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-1/one/three trusted.afr.$V0-client-1
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-0/one/three trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/${V0}-1/one/three trusted.afr.dirty
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1180545.t b/tests/bugs/replicate/bug-1180545.t
new file mode 100644
index 00000000000..5e40edd6c38
--- /dev/null
+++ b/tests/bugs/replicate/bug-1180545.t
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+#Create gfid split-brain of directory and check if conservative merge
+#completes successfully.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+function check_sh_entries() {
+ local expected="$1"
+ local count=
+ local good="0"
+ shift
+
+ for i in $*; do
+ count="$(count_sh_entries $i)"
+ if [[ "x${count}" == "x${expected}" ]]; then
+ good="$((good + 1))"
+ fi
+ done
+ if [[ "x${good}" != "x${last_good}" ]]; then
+ last_good="${good}"
+# This triggers a sweep of the heal index. However if more than one brick
+# tries to heal the same directory at the same time, one of them will take
+# the lock and the other will give up, waiting for the next heal cycle, which
+# is set to 60 seconds (the minimum valid value). So, each time we detect
+# that one brick has completed the heal, we trigger another heal.
+ $CLI volume heal $V0
+ fi
+
+ echo "${good}"
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 cluster.heal-timeout 60
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 stat-prefetch off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#Create files with alternate brick down. One file has gfid mismatch.
+TEST mkdir $M0/DIR
+
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST touch $M0/DIR/FILE
+TEST touch $M0/DIR/file{1..5}
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST touch $M0/DIR/FILE
+TEST touch $M0/DIR/file{6..10}
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#Trigger heal and verify number of entries in backend
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+
+last_good=""
+
+EXPECT_WITHIN $HEAL_TIMEOUT "2" check_sh_entries 2 $B0/brick{0,1}
+
+#Two entries for DIR and two for FILE
+EXPECT_WITHIN $HEAL_TIMEOUT "4" get_pending_heal_count $V0
+TEST diff <(ls $B0/brick0/DIR) <(ls $B0/brick1/DIR)
+cleanup
diff --git a/tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t b/tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t
new file mode 100644
index 00000000000..fe8e992e8f8
--- /dev/null
+++ b/tests/bugs/replicate/bug-1190069-afr-stale-index-entries.t
@@ -0,0 +1,57 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+#Stale entries in xattrop folder for files which do not need heal must be removed during the next index heal crawl.
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1};
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0;
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST `echo hello>$M0/datafile`
+TEST touch $M0/mdatafile
+
+#Create split-brain and reset the afr xattrs, so that we have only the entry inside xattrop folder.
+#This is to simulate the case where the brick crashed just before pre-op happened, but index xlator created the entry inside xattrop folder.
+
+#Create data, metadata SB.
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+TEST stat $M0/datafile
+TEST `echo append>>$M0/datafile`
+TEST chmod +x $M0/mdatafile
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 1
+TEST kill_brick $V0 $H0 $B0/$V0"0"
+TEST stat $M0/datafile
+TEST `echo append>>$M0/datafile`
+TEST chmod -x $M0/mdatafile
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 0
+TEST ! cat $M0/datafile
+
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT '2' count_sh_entries $B0/$V0"0"
+EXPECT_WITHIN $HEAL_TIMEOUT '2' count_sh_entries $B0/$V0"1"
+
+#Reset xattrs and trigger heal.
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}0/datafile
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000000 $B0/${V0}1/datafile
+
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}0/mdatafile
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000000 $B0/${V0}1/mdatafile
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+EXPECT_WITHIN $HEAL_TIMEOUT '0' count_sh_entries $B0/$V0"0"
+EXPECT_WITHIN $HEAL_TIMEOUT '0' count_sh_entries $B0/$V0"1"
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1221481-allow-fops-on-dir-split-brain.t b/tests/bugs/replicate/bug-1221481-allow-fops-on-dir-split-brain.t
new file mode 100644
index 00000000000..6ff471fbf15
--- /dev/null
+++ b/tests/bugs/replicate/bug-1221481-allow-fops-on-dir-split-brain.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+#Allow readdirs to proceed on directories that are in split-brain
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1};
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file{1..5}
+
+#Create entry split-brain
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 1
+TEST touch $M0/dir/FILE
+EXPECT_WITHIN ${UMOUNT_TIMEOUT} "^Y$" force_umount $M0
+TEST $CLI volume start $V0 force
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 1
+TEST kill_brick $V0 $H0 $B0/$V0"0"
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} "^0$" afr_child_up_status $V0 0
+TEST touch $M0/dir/FILE
+EXPECT_WITHIN ${UMOUNT_TIMEOUT} "^Y$" force_umount $M0
+TEST $CLI volume start $V0 force
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT '1' afr_child_up_status_meta $M0 $V0-replicate-0 1
+
+cd $M0/dir
+EXPECT "6" echo $(ls | wc -l)
+TEST ! cat FILE
+TEST `echo hello>hello.txt`
+cd -
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup
diff --git a/tests/bugs/replicate/bug-1238398-split-brain-resolution.t b/tests/bugs/replicate/bug-1238398-split-brain-resolution.t
new file mode 100644
index 00000000000..8ef3aae979f
--- /dev/null
+++ b/tests/bugs/replicate/bug-1238398-split-brain-resolution.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+function get_split_brain_status {
+ local path=$1
+ echo `getfattr -n replica.split-brain-status $path` | cut -f2 -d"=" | sed -e 's/^"//' -e 's/"$//'
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+
+#Disable self-heal-daemon
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+TEST `echo "some-data" > $M0/metadata-split-brain.txt`
+
+#Create metadata split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST chmod 666 $M0/metadata-split-brain.txt
+
+TEST $CLI volume start $V0 force
+TEST kill_brick $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST chmod 757 $M0/metadata-split-brain.txt
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+EXPECT 2 get_pending_heal_count $V0
+
+#Inspect the file in metadata-split-brain
+EXPECT "data-split-brain:no metadata-split-brain:yes Choices:patchy-client-0,patchy-client-1" get_split_brain_status $M0/metadata-split-brain.txt
+TEST setfattr -n replica.split-brain-choice -v $V0-client-0 $M0/metadata-split-brain.txt
+
+EXPECT "757" stat -c %a $M0/metadata-split-brain.txt
+
+TEST setfattr -n replica.split-brain-choice -v $V0-client-1 $M0/metadata-split-brain.txt
+EXPECT "666" stat -c %a $M0/metadata-split-brain.txt
+
+cleanup;
+
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/bugs/replicate/bug-1238508-self-heal.t b/tests/bugs/replicate/bug-1238508-self-heal.t
new file mode 100644
index 00000000000..24fb07d31f0
--- /dev/null
+++ b/tests/bugs/replicate/bug-1238508-self-heal.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+
+# Disable self-heal-daemon
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST mkdir $M0/olddir;
+TEST `echo "some-data" > $M0/olddir/oldfile`
+
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST mv $M0/olddir/oldfile $M0/olddir/newfile;
+TEST mv $M0/olddir $M0/newdir;
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+# Test if the files are present on both the bricks
+EXPECT "newdir" ls $B0/${V0}0/
+EXPECT "newdir" ls $B0/${V0}1/
+EXPECT "newfile" ls $B0/${V0}0/newdir/
+EXPECT "newfile" ls $B0/${V0}1/newdir/
+
+# Test if gfid-link files in .glusterfs also provide correct info
+brick0gfid=$(gf_get_gfid_backend_file_path $B0/${V0}0 newdir)
+brick1gfid=$(gf_get_gfid_backend_file_path $B0/${V0}1 newdir)
+EXPECT "newfile" ls $brick0gfid
+EXPECT "newfile" ls $brick1gfid
+
+# Test if the files are accessible from the mount
+EXPECT "newdir" ls $M0/
+EXPECT "newfile" ls $M0/newdir
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1250170-fsync.c b/tests/bugs/replicate/bug-1250170-fsync.c
new file mode 100644
index 00000000000..21fd96594aa
--- /dev/null
+++ b/tests/bugs/replicate/bug-1250170-fsync.c
@@ -0,0 +1,56 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+
+int
+main(int argc, char **argv)
+{
+ char *file = NULL;
+ int fd = -1;
+ char *buffer = NULL;
+ size_t buf_size = 0;
+ size_t written = 0;
+ int ret = 0;
+ off_t offset = 0;
+ int i = 0;
+ int loop_count = 5;
+
+ if (argc < 2) {
+ printf("Usage:%s <filename>\n", argv[0]);
+ return -1;
+ }
+
+ file = argv[1];
+ buf_size = 1024;
+ buffer = malloc(buf_size);
+ if (!buffer) {
+ perror("malloc");
+ return -1;
+ }
+ memset(buffer, 'R', buf_size);
+
+ fd = open(file, O_WRONLY);
+ if (fd == -1) {
+ perror("open");
+ return -1;
+ }
+
+ for (i = 0; i < loop_count; i++) {
+ ret = write(fd, buffer, buf_size);
+ if (ret == -1) {
+ perror("write");
+ return ret;
+ } else {
+ written += ret;
+ }
+ offset = lseek(fd, 0, SEEK_SET);
+ }
+
+ free(buffer);
+ return 0;
+}
diff --git a/tests/bugs/replicate/bug-1250170-fsync.t b/tests/bugs/replicate/bug-1250170-fsync.t
new file mode 100644
index 00000000000..7a3fdbf8bb5
--- /dev/null
+++ b/tests/bugs/replicate/bug-1250170-fsync.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST gcc $(dirname $0)/bug-1250170-fsync.c -o $(dirname $0)/bug-1250170-fsync
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST touch $M0/file
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST gluster volume profile $V0 start
+#Perform 5 non-sequential writes.
+TEST $(dirname $0)/bug-1250170-fsync $M0/file
+
+#Run profile info initially to filter out the interval statistics in the
+#subsequent runs.
+TEST $CLI volume profile $V0 info
+#We get only cumulative statistics.
+write_count=$($CLI volume profile $V0 info | grep WRITE |awk '{count += $8} END {print count}')
+fsync_count=$($CLI volume profile $V0 info | grep FSYNC |awk '{count += $8} END {print count}')
+
+EXPECT "5" echo $write_count
+TEST [ -z $fsync_count ]
+
+TEST $CLI volume profile $V0 stop
+TEST umount $M0
+rm -f $(dirname $0)/bug-1250170-fsync
+cleanup
diff --git a/tests/bugs/replicate/bug-1266876-allow-reset-brick-for-same-path.t b/tests/bugs/replicate/bug-1266876-allow-reset-brick-for-same-path.t
new file mode 100644
index 00000000000..884b7892954
--- /dev/null
+++ b/tests/bugs/replicate/bug-1266876-allow-reset-brick-for-same-path.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+# Create files
+for i in {1..5}
+do
+ echo $i > $M0/file$i.txt
+done
+
+# Negative case with brick not killed && volume-id xattrs present
+TEST ! $CLI volume reset-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1 commit force
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+# Negative case with brick killed but volume-id xattr present
+TEST ! $CLI volume reset-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1 commit
+
+TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}1 start
+# Simulated reset disk
+for i in {1..5}
+do
+ rm -rf $B0/${V0}{1}/file$i.txt
+done
+for i in {6..10}
+do
+ echo $i > $M0/file$i.txt
+done
+
+# Now reset the brick
+TEST $CLI volume reset-brick $V0 $H0:$B0/${V0}1 $H0:$B0/${V0}1 commit force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+# Check if entry-heal has happened
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort)
+EXPECT "10" echo $(ls $B0/${V0}1 | wc -l)
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1292379.t b/tests/bugs/replicate/bug-1292379.t
new file mode 100644
index 00000000000..be1bf699173
--- /dev/null
+++ b/tests/bugs/replicate/bug-1292379.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.eager-lock off
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST wfd=`fd_available`
+TEST fd_open $wfd "w" $M0/a
+
+TEST fd_write $wfd "abcd"
+
+# Kill brick-0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+# While brick-0 is down, rename 'a' to 'b'
+TEST mv $M0/a $M0/b
+
+TEST fd_write $wfd "lmn"
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST fd_write $wfd "pqrs"
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST fd_write $wfd "xyz"
+TEST fd_close $wfd
+
+md5sum_b0=$(md5sum $B0/${V0}0/b | awk '{print $1}')
+
+EXPECT "$md5sum_b0" echo `md5sum $B0/${V0}1/b | awk '{print $1}'`
+
+TEST umount $M0
+cleanup
diff --git a/tests/bugs/replicate/bug-1297695.t b/tests/bugs/replicate/bug-1297695.t
new file mode 100644
index 00000000000..d5c1a214fe2
--- /dev/null
+++ b/tests/bugs/replicate/bug-1297695.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+function write_to_file {
+ dd of=$M0/dir/file if=/dev/urandom bs=1024k count=128 2>&1 >/dev/null
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+
+TEST $CLI volume set $V0 cluster.eager-lock on
+TEST $CLI volume set $V0 post-op-delay-secs 3
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST $CLI volume set $V0 ensure-durability off
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST `echo 'abc' > $M0/dir/file`
+
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+write_to_file &
+#Test if the MAX [F]INODELK fop latency is of the order of seconds.
+EXPECT "^1$" get_pending_heal_count $V0
+inodelk_max_latency=$($CLI volume profile $V0 info | grep INODELK | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+TEST [ -z $inodelk_max_latency ]
+cleanup
diff --git a/tests/bugs/replicate/bug-1305031-block-reads-on-metadata-sbrain.t b/tests/bugs/replicate/bug-1305031-block-reads-on-metadata-sbrain.t
new file mode 100644
index 00000000000..780ddb9250c
--- /dev/null
+++ b/tests/bugs/replicate/bug-1305031-block-reads-on-metadata-sbrain.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Test that for files in metadata-split-brain, we do not wind even a single read.
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST chmod 700 $M0/file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST chmod 777 $M0/file
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST umount $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+lines=`cat $M0/file|wc|awk '{print $1}'`
+EXPECT 0 echo $lines
+TEST umount $M0
+cleanup
diff --git a/tests/bugs/replicate/bug-1325792.t b/tests/bugs/replicate/bug-1325792.t
new file mode 100644
index 00000000000..73190e5f341
--- /dev/null
+++ b/tests/bugs/replicate/bug-1325792.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 3
+
+
+EXPECT 1 echo `$CLI volume heal $V0 statistics heal-count replica $H0:$B0/${V0}0 | grep -A 1 ${V0}0 | grep "entries" | wc -l`
+EXPECT 1 echo `$CLI volume heal $V0 statistics heal-count replica $H0:$B0/${V0}1 | grep -A 1 ${V0}1 | grep "entries" | wc -l`
+EXPECT 1 echo `$CLI volume heal $V0 statistics heal-count replica $H0:$B0/${V0}2 | grep -A 1 ${V0}2 | grep "entries" | wc -l`
+EXPECT 1 echo `$CLI volume heal $V0 statistics heal-count replica $H0:$B0/${V0}3 | grep -A 1 ${V0}3 | grep "entries" | wc -l`
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1335652.t b/tests/bugs/replicate/bug-1335652.t
new file mode 100644
index 00000000000..653a1b05ce2
--- /dev/null
+++ b/tests/bugs/replicate/bug-1335652.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 shard on
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+#Kill the zero'th brick so that 1st and 2nd get marked dirty
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+TEST dd if=/dev/urandom of=$M0/file bs=10MB count=20
+
+#At any point value of dirty should not be greater than 0 on source bricks
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}1/.shard
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}2/.shard
+
+rm -rf $M0/file;
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1340623-mkdir-fails-remove-brick-started.t b/tests/bugs/replicate/bug-1340623-mkdir-fails-remove-brick-started.t
new file mode 100644
index 00000000000..6d177a7d3f8
--- /dev/null
+++ b/tests/bugs/replicate/bug-1340623-mkdir-fails-remove-brick-started.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a 2x2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/r11 $H0:$B0/r12 $H0:$B0/r21 $H0:$B0/r22;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## for release > 3.7 , gluster nfs is off by default
+TEST $CLI vol set $V0 nfs.disable off;
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+TEST mount_nfs $H0:/$V0 $N0;
+
+## create some directories and files inside mount
+mkdir $N0/io;
+for j in {1..10}; do mkdir $N0/io/b$j; for k in {1..10}; do touch $N0/io/b$j/c$k; done done
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/r11 $H0:$B0/r12 start;
+
+TEST mkdir $N0/dir1;
+
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$B0/r11 $H0:$B0/r12"
+
+TEST $CLI volume remove-brick $V0 $H0:$B0/r11 $H0:$B0/r12 commit;
+
+TEST mkdir $N0/dir2;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1341650.t b/tests/bugs/replicate/bug-1341650.t
new file mode 100644
index 00000000000..610342ca5bd
--- /dev/null
+++ b/tests/bugs/replicate/bug-1341650.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume set $V0 quota-deem-statfs on
+TEST $CLI volume set $V0 soft-timeout 0
+TEST $CLI volume set $V0 hard-timeout 0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST mkdir $M0/dir
+TEST $CLI volume quota $V0 limit-objects /dir 10
+
+TEST touch $M0/dir/file1
+TEST touch $M0/dir/file2
+TEST touch $M0/dir/file3
+TEST touch $M0/dir/file4
+TEST touch $M0/dir/file5
+
+# Kill 3rd brick and create entries
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST touch $M0/dir/file6
+TEST touch $M0/dir/file7
+TEST touch $M0/dir/file8
+TEST touch $M0/dir/file9
+
+# Quota object limit is reached. Remove object for create to succeed.
+TEST ! touch $M0/dir/file10
+
+TEST rm $M0/dir/file1
+TEST touch $M0/dir/file10
+
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}0/dir
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.dirty $B0/${V0}1/dir
+
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+#Check that no convervative merge happened.file1 must not be present on any brick.
+TEST ! stat $B0/${V0}0/dir/file1
+TEST ! stat $B0/${V0}1/dir/file1
+TEST ! stat $B0/${V0}2/dir/file1
+
+TEST umount $M0
+cleanup
diff --git a/tests/bugs/replicate/bug-1363721.t b/tests/bugs/replicate/bug-1363721.t
new file mode 100644
index 00000000000..0ed34d8a4f4
--- /dev/null
+++ b/tests/bugs/replicate/bug-1363721.t
@@ -0,0 +1,118 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+FILE_UPDATE_TIMEOUT=20
+cleanup
+
+function size_increased {
+ local file=$1
+ local size=$2
+ local new_size=$(stat -c%s $file)
+ if [ $new_size -gt $size ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function has_write_failed {
+ local pid=$1
+ if [ -d /proc/$pid ]; then echo "N"; else echo "Y"; fi
+}
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --direct-io-mode=enable $M0
+
+cd $M0
+
+# Start writing to a file.
+(dd if=/dev/urandom of=$M0/file1 bs=1k 2>/dev/null 1>/dev/null)&
+dd_pid=$!
+
+# Let IO happen
+EXPECT_WITHIN $FILE_UPDATE_TIMEOUT "Y" size_increased file1 0
+
+# Now kill the zeroth brick
+kill_brick $V0 $H0 $B0/${V0}0
+
+# Let IO continue
+EXPECT_WITHIN $FILE_UPDATE_TIMEOUT "Y" size_increased file1 $(stat -c%s file1)
+
+# Now bring the brick back up
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+# Let IO continue
+EXPECT_WITHIN $FILE_UPDATE_TIMEOUT "Y" size_increased file1 $(stat -c%s file1)
+
+# Now kill the first brick
+kill_brick $V0 $H0 $B0/${V0}1
+
+# Let IO continue
+EXPECT_WITHIN $FILE_UPDATE_TIMEOUT "Y" size_increased file1 $(stat -c%s file1)
+
+# Now bring the brick back up
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+# Let IO continue for 3 seconds
+sleep 3
+
+# Now kill the second brick
+kill_brick $V0 $H0 $B0/${V0}2
+
+# At this point the write should have been failed.
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "Y" has_write_failed $dd_pid
+
+# Also make sure that the second brick is never an accused.
+
+md5sum_2=$(md5sum $B0/${V0}2/file1 | awk '{print $1}')
+
+EXPECT_NOT "$md5sum_2" echo `md5sum $B0/${V0}0/file1 | awk '{print $1}'`
+EXPECT_NOT "$md5sum_2" echo `md5sum $B0/${V0}1/file1 | awk '{print $1}'`
+
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}0/file1 trusted.afr.dirty data
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/file1 trusted.afr.dirty data
+
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}0/file1 trusted.afr.$V0-client-2 data
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/file1 trusted.afr.$V0-client-2 data
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file1 trusted.afr.$V0-client-2 data
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}0/file1 trusted.afr.$V0-client-2 metadata
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/file1 trusted.afr.$V0-client-2 metadata
+EXPECT "00000000" afr_get_specific_changelog_xattr $B0/${V0}2/file1 trusted.afr.$V0-client-2 metadata
+
+# Now bring the brick back up
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+# Enable shd
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT "$md5sum_2" echo `md5sum $B0/${V0}0/file1 | awk '{print $1}'`
+EXPECT "$md5sum_2" echo `md5sum $B0/${V0}1/file1 | awk '{print $1}'`
+EXPECT "$md5sum_2" echo `md5sum $B0/${V0}2/file1 | awk '{print $1}'`
+
+cd ~
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1365455.t b/tests/bugs/replicate/bug-1365455.t
new file mode 100644
index 00000000000..1953e2a9327
--- /dev/null
+++ b/tests/bugs/replicate/bug-1365455.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function check_size
+{
+ for i in {1..10}; do
+ size1=`stat -c %s $B0/${V0}0/tmp$i`
+ size2=`stat -c %s $B0/${V0}1/tmp$i`
+ if [[ $size1 -eq 0 ]] || [[ $size2 -eq 0 ]] || [[ $size1 -ne $size2 ]]; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0;
+
+TEST $CLI volume start $V0;
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+
+for i in {1..10}
+do
+ echo abc > $M0/tmp$i
+done
+
+
+# Add Another brick
+TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}1
+
+#Check if self heal daemon has come up
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+
+#Check if self heal daemon is able to see all bricks
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Check if entry-heal has happened
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort)
+
+#Check size of files on bricks
+TEST check_size
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t b/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t
new file mode 100644
index 00000000000..d049d95ef9a
--- /dev/null
+++ b/tests/bugs/replicate/bug-1386188-sbrain-fav-child.t
@@ -0,0 +1,82 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/data.txt
+TEST touch $M0/mdata.txt
+
+#Create data and metadata split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/data.txt bs=1024 count=1024
+TEST setfattr -n user.value -v value1 $M0/mdata.txt
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/data.txt bs=1024 count=1024
+TEST setfattr -n user.value -v value2 $M0/mdata.txt
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+## Check that the file still in split-brain,
+ ## I/O fails
+ cat $M0/data.txt > /dev/null
+ EXPECT "1" echo $?
+ ## pending xattrs blame each other.
+ brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/data.txt)
+ brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt)
+ TEST [ $brick0_pending -ne "000000000000000000000000" ]
+ TEST [ $brick1_pending -ne "000000000000000000000000" ]
+
+ ## I/O fails
+ getfattr -n user.value $M0/mdata.txt
+ EXPECT "1" echo $?
+ brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/mdata.txt)
+ brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/mdata.txt)
+ TEST [ $brick0_pending -ne "000000000000000000000000" ]
+ TEST [ $brick1_pending -ne "000000000000000000000000" ]
+
+## Let us use mtime as fav-child policy. So brick0 will be source.
+ # Set dirty (data part) on the sink brick to check if it is reset later along with the pending xattr.
+ TEST setfattr -n trusted.afr.dirty -v 0x000000010000000000000000 $B0/${V0}1/data.txt
+ # Set dirty (metadata part) on the sink brick to check if it is reset later along with the pending xattr.
+ TEST setfattr -n trusted.afr.dirty -v 0x000000000000000100000000 $B0/${V0}1/mdata.txt
+
+ TEST $CLI volume set $V0 favorite-child-policy mtime
+
+ # Reading the file should be allowed and sink brick xattrs must be reset.
+ cat $M0/data.txt > /dev/null
+ EXPECT "0" echo $?
+ TEST brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt)
+ TEST brick1_dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}1/data.txt)
+ TEST [ $brick1_dirty -eq "000000000000000000000000" ]
+ TEST [ $brick1_pending -eq "000000000000000000000000" ]
+
+ # Accessing the file should be allowed and sink brick xattrs must be reset.
+ EXPECT "value2" echo $(getfattr --only-values -n user.value $M0/mdata.txt)
+ TEST brick1_pending=$(get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1/data.txt)
+ TEST brick1_dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}1/data.txt)
+ TEST [ $brick1_dirty -eq "000000000000000000000000" ]
+ TEST [ $brick1_pending -eq "000000000000000000000000" ]
+
+#Enable shd and heal the file.
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT 0 get_pending_heal_count $V0
+cleanup;
diff --git a/tests/bugs/replicate/bug-1402730.t b/tests/bugs/replicate/bug-1402730.t
new file mode 100644
index 00000000000..c7866df463b
--- /dev/null
+++ b/tests/bugs/replicate/bug-1402730.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 granular-entry-heal on
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0
+
+TEST mkdir -p $M0/a/b/c -p
+cd $M0/a/b/c
+
+TEST kill_brick $V0 $H0 $B0/${V0}2
+rm -rf $B0/${V0}2/*
+rm -rf $B0/${V0}2/.glusterfs
+
+#Ideally, disk replacement is done using reset-brick or replace-brick gluster CLI
+#which will create .glusterfs folder.
+mkdir $B0/${V0}2/.glusterfs && chmod 600 $B0/${V0}2/.glusterfs
+
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST touch file
+
+GFID_C=$(get_gfid_string $M0/a/b/c)
+TEST stat $B0/${V0}0/.glusterfs/indices/entry-changes/$GFID_C/file
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$GFID_C/file
+
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}0/a/b/c trusted.afr.$V0-client-2 entry
+EXPECT_NOT "00000000" afr_get_specific_changelog_xattr $B0/${V0}1/a/b/c trusted.afr.$V0-client-2 entry
+
+cd ~
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1408712.t b/tests/bugs/replicate/bug-1408712.t
new file mode 100644
index 00000000000..9499a598ef1
--- /dev/null
+++ b/tests/bugs/replicate/bug-1408712.t
@@ -0,0 +1,101 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup
+
+TESTS_EXPECTED_IN_LOOP=12
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume heal $V0 granular-entry-heal enable
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 performance.flush-behind off
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M1 $V0-replicate-0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M1 $V0-replicate-0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M1 $V0-replicate-0 2
+
+TEST cd $M0
+TEST dd if=/dev/zero of=file bs=1M count=8
+
+# Kill brick-0.
+TEST kill_brick $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "0" brick_up_status $V0 $H0 $B0/${V0}0
+
+TEST "dd if=/dev/zero bs=1M count=8 >> file"
+
+FILE_GFID=$(get_gfid_string $M0/file)
+
+# Test that the index associated with '/.shard' is created on B1 and B2.
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
+TEST stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
+# Check for successful creation of granular entry indices
+for i in {2..3}
+do
+ TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
+ TEST_IN_LOOP stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
+done
+
+TEST cd ~
+TEST md5sum $M1/file
+
+# Test that the index associated with '/.shard' and the created shards do not disappear on B1 and B2.
+TEST stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
+TEST stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
+for i in {2..3}
+do
+ TEST_IN_LOOP stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
+ TEST_IN_LOOP stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
+done
+
+# Start the brick that was down
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+# Enable shd
+TEST gluster volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Now verify that there are no name indices left after self-heal
+TEST ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
+TEST ! stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID
+
+for i in {2..3}
+do
+ TEST_IN_LOOP ! stat $B0/${V0}1/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
+ TEST_IN_LOOP ! stat $B0/${V0}2/.glusterfs/indices/entry-changes/$DOT_SHARD_GFID/$FILE_GFID.$i
+done
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1417522-block-split-brain-resolution.t b/tests/bugs/replicate/bug-1417522-block-split-brain-resolution.t
new file mode 100644
index 00000000000..d0e2fee8bcd
--- /dev/null
+++ b/tests/bugs/replicate/bug-1417522-block-split-brain-resolution.t
@@ -0,0 +1,69 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/file
+
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=10
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=20
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST ! dd if=$M0/file of=/dev/null
+SOURCE_BRICK_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1)
+
+# Various fav-child policies must not heal the file when some bricks are down.
+TEST $CLI volume set $V0 favorite-child-policy size
+TEST ! dd if=$M0/file of=/dev/null
+TEST $CLI volume set $V0 favorite-child-policy ctime
+TEST ! dd if=$M0/file of=/dev/null
+TEST $CLI volume set $V0 favorite-child-policy mtime
+TEST ! dd if=$M0/file of=/dev/null
+TEST $CLI volume set $V0 favorite-child-policy majority
+TEST ! dd if=$M0/file of=/dev/null
+
+# CLI/mount based split-brain resolution must also not work.
+TEST ! $CLI volume heal $V0 split-brain bigger-file /file
+TEST ! $CLI volume heal $V0 split-brain mtime /file
+TEST ! $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}2 /file1
+
+TEST ! getfattr -n replica.split-brain-status $M0/file
+TEST ! setfattr -n replica.split-brain-choice -v $V0-client-1 $M0/file
+
+# Bring all bricks back up and launch heal.
+TEST $CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT 0 get_pending_heal_count $V0
+B1_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1)
+B2_MD5=$(md5sum $B0/${V0}2/file | cut -d\ -f1)
+TEST [ "$SOURCE_BRICK_MD5" == "$B1_MD5" ]
+TEST [ "$SOURCE_BRICK_MD5" == "$B2_MD5" ]
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup;
+
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
new file mode 100644
index 00000000000..10ce0131f4f
--- /dev/null
+++ b/tests/bugs/replicate/bug-1433571-undo-pending-only-on-up-bricks.t
@@ -0,0 +1,79 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+# Disable self-heal-daemon, client-side-heal and set quorum-type to none
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.quorum-type none
+
+#Kill bricks 0 & 1 and create a file to have pending entry for 0 & 1 on brick 2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+echo "file 1" >> $M0/f1
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#Kill bricks 1 & 2 and create a file to have pending entry for 1 & 2 on brick 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "file 2" >> $M0/f2
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+#Kill bricks 2 & 0 and create a file to have pending entry for 2 & 0 on brick 1
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST kill_brick $V0 $H0 $B0/${V0}0
+echo "file 3" >> $M0/f3
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1
+EXPECT "000000000000000000000001" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+# We were killing one brick and checking that entry heal does not reset the
+# pending xattrs for the down brick. Now that we need all bricks to be up for
+# entry heal, I'm removing that test from the .t
+
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+TEST ls $M0
+TEST cat $M0/f1
+TEST cat $M0/f2
+TEST cat $M0/f3
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}1
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-0 $B0/${V0}2
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}2
+
+#Check whether all the bricks contains all the 3 files.
+EXPECT "3" echo $(ls $B0/${V0}0 | wc -l)
+EXPECT "3" echo $(ls $B0/${V0}1 | wc -l)
+EXPECT "3" echo $(ls $B0/${V0}2 | wc -l)
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t b/tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t
new file mode 100644
index 00000000000..cdcaf62c925
--- /dev/null
+++ b/tests/bugs/replicate/bug-1438255-do-not-mark-self-accusing-xattrs.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+NEW_USER=bug1438255
+NEW_UID=1438255
+NEW_GID=1438255
+
+TEST groupadd -o -g ${NEW_GID} ${NEW_USER}-${NEW_GID}
+TEST useradd -o -M -u ${NEW_UID} -g ${NEW_GID} -K MAIL_DIR=/dev/null ${NEW_USER}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+TEST touch $M0/FILE
+TEST kill_brick $V0 $H0 $B0/${V0}2
+chown $NEW_UID:$NEW_GID $M0/FILE
+EXPECT "000000000000000100000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0/FILE
+EXPECT "000000000000000100000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1/FILE
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+# setfattr done as NEW_USER fails on 3rd brick with EPERM but suceeds on
+# the first 2 and hence on the mount.
+su -m bug1438255 -c "setfattr -n user.myattr -v myvalue $M0/FILE"
+TEST [ $? -eq 0 ]
+EXPECT "000000000000000200000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0/FILE
+EXPECT "000000000000000200000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}1/FILE
+# Brick 3 does not have any self-blaming pending xattr.
+TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}2/FILE
+
+TEST userdel --force ${NEW_USER}
+TEST groupdel ${NEW_USER}-${NEW_GID}
+cleanup
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
diff --git a/tests/bugs/replicate/bug-1448804-check-quorum-type-values.t b/tests/bugs/replicate/bug-1448804-check-quorum-type-values.t
new file mode 100644
index 00000000000..5bacf3edcfe
--- /dev/null
+++ b/tests/bugs/replicate/bug-1448804-check-quorum-type-values.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..1}
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+# Default quorum-type for replica 2 is none. quorum-count is zero but it is not displayed.
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "none" get_quorum_type $M0 $V0 0
+cat $M0/.meta/graphs/active/$V0-replicate-0/private|grep quorum-count
+TEST [ $? -ne 0 ]
+
+# Convert to replica-3.
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+# Default quorum-type for replica 3 is auto. quorum-count is INT_MAX but it is not displayed.
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "auto" get_quorum_type $M0 $V0 0
+cat $M0/.meta/graphs/active/$V0-replicate-0/private|grep quorum-count
+TEST [ $? -ne 0 ]
+
+# Change the type to fixed.
+TEST $CLI volume set $V0 cluster.quorum-type fixed
+# We haven't set quorum-count yet, so it takes the default value of zero in reconfigure() and hence the quorum-type is displayed as none.
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "none" get_quorum_type $M0 $V0 0
+cat $M0/.meta/graphs/active/$V0-replicate-0/private|grep quorum-count
+TEST [ $? -ne 0 ]
+
+# set quorum-count and check.
+TEST $CLI volume set $V0 cluster.quorum-count 1
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "fixed" get_quorum_type $M0 $V0 0
+EXPECT "1" echo `cat $M0/.meta/graphs/active/$V0-replicate-0/private|grep quorum-count|awk '{print $3}'`
+
+# reset to default values.
+TEST $CLI volume reset $V0 cluster.quorum-type
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "auto" get_quorum_type $M0 $V0 0
+cat $M0/.meta/graphs/active/$V0-replicate-0/private|grep quorum-count
+TEST [ $? -ne 0 ]
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1473026.t b/tests/bugs/replicate/bug-1473026.t
new file mode 100644
index 00000000000..efb3ffa0d39
--- /dev/null
+++ b/tests/bugs/replicate/bug-1473026.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=1473026
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1473026
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0;
+
+#kill one brick (this has some issue)
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+#kill the brick to be replaced
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+# We know this command would fail because file system is read only now
+TEST ! $CLI volume replace-brick $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}4 commit force
+
+TEST pkill glusterd
+
+# Glusterd should start but the volume info and brick volfiles don't match
+TEST glusterd
+TEST pidof glusterd
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1477169-entry-selfheal-rename.t b/tests/bugs/replicate/bug-1477169-entry-selfheal-rename.t
new file mode 100644
index 00000000000..bb858a8a63d
--- /dev/null
+++ b/tests/bugs/replicate/bug-1477169-entry-selfheal-rename.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST mkdir -p $M0/d1/dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 $M0/d2
+gfid_d1=$(gf_get_gfid_xattr $B0/${V0}0/d1)
+gfid_d2=$(gf_get_gfid_xattr $B0/${V0}0/d2)
+gfid_dir=$(gf_get_gfid_xattr $B0/${V0}0/d1/dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789)
+
+gfid_str_d1=$(gf_gfid_xattr_to_str $gfid_d1)
+gfid_str_d2=$(gf_gfid_xattr_to_str $gfid_d2)
+gfid_str_d3=$(gf_gfid_xattr_to_str $gfid_dir)
+
+# Kill 3rd brick and rename the dir from mount.
+TEST kill_brick $V0 $H0 $B0/${V0}2
+TEST mv $M0/d1/dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789 $M0/d2
+
+# Bring it back and trigger heal.
+TEST $CLI volume start $V0 force
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Check that .glusterfs symlink for dir exists and points to d2/dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+TEST linkname=$(readlink $B0/${V0}2/.glusterfs/${gfid_str_d3:0:2}/${gfid_str_d3:2:2}/$gfid_str_d3)
+EXPECT "dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" basename $linkname
+TEST parent_dir_gfid_str=$(echo $linkname|cut -d / -f5)
+EXPECT $gfid_str_d2 echo $parent_dir_gfid_str
+
+TEST rmdir $M0/d2/dir012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789
+
+TEST ! stat $B0/${V0}0/.glusterfs/${gfid_str_d3:0:2}/${gfid_str_d3:2:2}/$gfid_str_d3
+TEST ! stat $B0/${V0}1/.glusterfs/${gfid_str_d3:0:2}/${gfid_str_d3:2:2}/$gfid_str_d3
+TEST ! stat $B0/${V0}2/.glusterfs/${gfid_str_d3:0:2}/${gfid_str_d3:2:2}/$gfid_str_d3
+cleanup;
diff --git a/tests/bugs/replicate/bug-1480525.t b/tests/bugs/replicate/bug-1480525.t
new file mode 100644
index 00000000000..7c63bb2e4ea
--- /dev/null
+++ b/tests/bugs/replicate/bug-1480525.t
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+EXPECT_NOT "-1" echo `cat $M0/.meta/graphs/active/$V0-replicate-0/private|grep read_child |awk '{print $3}'`
+TEST $CLI volume set $V0 choose-local off
+EXPECT_WITHIN $CONFIG_UPDATE_TIMEOUT "-1" echo `cat $M0/.meta/graphs/active/$V0-replicate-0/private|grep read_child |awk '{print $3}'`
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1493415-gfid-heal.t b/tests/bugs/replicate/bug-1493415-gfid-heal.t
new file mode 100644
index 00000000000..8a79febf4b4
--- /dev/null
+++ b/tests/bugs/replicate/bug-1493415-gfid-heal.t
@@ -0,0 +1,78 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST $CLI volume set $V0 self-heal-daemon off
+
+# Create base entry in indices/xattrop
+echo "Data" > $M0/FILE
+
+#------------------------------------------------------------------------------#
+TEST touch $M0/f1
+gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/f1)
+gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1)
+
+# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a
+# brick crash at the point where file got created but no xattrs were set.
+TEST setfattr -x trusted.gfid $B0/${V0}1/f1
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 2
+
+# Assume there were no pending xattrs on parent dir due to 1st brick crashing
+# too. Then name heal from client must heal the gfid.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST stat $M0/f1
+EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/f1
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+#------------------------------------------------------------------------------#
+TEST mkdir $M0/dir
+TEST touch $M0/dir/f2
+gfid_f2=$(gf_get_gfid_xattr $B0/${V0}0/dir/f2)
+gfid_str_f2=$(gf_gfid_xattr_to_str $gfid_f2)
+
+# Remove gfid xattr and .glusterfs hard link from 2nd brick. This simulates a
+# brick crash at the point where file got created but no xattrs were set.
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir/f2
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
+
+#Now simulate setting of pending entry xattr on parent dir of 1st brick.
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir
+create_brick_xattrop_entry $B0/${V0}0 dir
+
+# storage/posix considers that a file without gfid changed less than a second
+# before doesn't exist, so we need to wait for a second to force posix to
+# consider that this is a valid file but without gfid.
+sleep 2
+
+#Trigger entry-heal via shd
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT "$gfid_f2" gf_get_gfid_xattr $B0/${V0}1/dir/f2
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
+
+#------------------------------------------------------------------------------#
+cleanup;
diff --git a/tests/bugs/replicate/bug-1498570-client-iot-graph-check.t b/tests/bugs/replicate/bug-1498570-client-iot-graph-check.t
new file mode 100644
index 00000000000..2b3b3040228
--- /dev/null
+++ b/tests/bugs/replicate/bug-1498570-client-iot-graph-check.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+TESTS_EXPECTED_IN_LOOP=21
+function reset_cluster
+{
+ cleanup
+ TEST glusterd
+ TEST pidof glusterd
+
+}
+function check_iot_option
+{
+ local enabled=$1
+ local is_loaded_in_graph=$2
+
+ EXPECT "$enabled" volume_get_field $V0 client-io-threads
+ IOT_STRING="volume\ $V0-io-threads"
+ grep "$IOT_STRING" $GLUSTERD_WORKDIR/vols/$V0/trusted-$V0.tcp-fuse.vol
+ TEST ret=$?
+ EXPECT_NOT "$is_loaded_in_graph" echo $ret
+}
+
+reset_cluster
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+check_iot_option on 1
+
+reset_cluster
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+check_iot_option off 0
+
+reset_cluster
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}1
+check_iot_option off 0
+TEST $CLI volume remove-brick $V0 replica 1 $H0:$B0/${V0}1 force
+check_iot_option on 1
+
+reset_cluster
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..5}
+TEST $CLI volume set $V0 client-io-threads on
+check_iot_option on 1
+TEST $CLI volume remove-brick $V0 replica 2 $H0:$B0/${V0}2 $H0:$B0/${V0}5 force
+check_iot_option on 1
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1539358-split-brain-detection.t b/tests/bugs/replicate/bug-1539358-split-brain-detection.t
new file mode 100755
index 00000000000..7b71a7a9e7d
--- /dev/null
+++ b/tests/bugs/replicate/bug-1539358-split-brain-detection.t
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+
+###############################################################################yy
+# Case of 2 bricks blaming the third and the third blaming the other two.
+
+TEST `echo "hello" >> $M0/file`
+
+# B0 and B2 must blame B1
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+TEST `echo "append" >> $M0/file`
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/file trusted.afr.$V0-client-1 data
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/file trusted.afr.$V0-client-1 data
+CLIENT_MD5=$(md5sum $M0/file | cut -d\ -f1)
+
+# B1 must blame B0 and B2
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/$V0"1"/file
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/$V0"1"/file
+
+# Launch heal
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+B0_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1)
+B1_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1)
+B2_MD5=$(md5sum $B0/${V0}2/file | cut -d\ -f1)
+TEST [ "$CLIENT_MD5" == "$B0_MD5" ]
+TEST [ "$CLIENT_MD5" == "$B1_MD5" ]
+TEST [ "$CLIENT_MD5" == "$B2_MD5" ]
+
+TEST rm $M0/file
+
+###############################################################################yy
+# Case of each brick blaming the next one in a cyclic manner
+
+TEST `echo "hello" >> $M0/file`
+# Mark cyclic xattrs and modify file content directly on the bricks.
+TEST $CLI volume set $V0 self-heal-daemon off
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/$V0"0"/file
+setfattr -n trusted.afr.dirty -v 0x000000010000000000000000 $B0/$V0"0"/file
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/$V0"1"/file
+setfattr -n trusted.afr.dirty -v 0x000000010000000000000000 $B0/$V0"1"/file
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/$V0"2"/file
+setfattr -n trusted.afr.dirty -v 0x000000010000000000000000 $B0/$V0"2"/file
+
+TEST `echo "ab" >> $B0/$V0"0"/file`
+TEST `echo "cdef" >> $B0/$V0"1"/file`
+TEST `echo "ghi" >> $B0/$V0"2"/file`
+
+# Add entry to xattrop dir to trigger index heal.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/file))
+ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0
+
+# Launch heal
+TEST $CLI volume set $V0 self-heal-daemon on
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+B0_MD5=$(md5sum $B0/${V0}0/file | cut -d\ -f1)
+B1_MD5=$(md5sum $B0/${V0}1/file | cut -d\ -f1)
+B2_MD5=$(md5sum $B0/${V0}2/file | cut -d\ -f1)
+TEST [ "$B0_MD5" == "$B1_MD5" ]
+TEST [ "$B0_MD5" == "$B2_MD5" ]
+###############################################################################yy
+cleanup
diff --git a/tests/bugs/replicate/bug-1561129-enospc.t b/tests/bugs/replicate/bug-1561129-enospc.t
new file mode 100644
index 00000000000..1b402fcc781
--- /dev/null
+++ b/tests/bugs/replicate/bug-1561129-enospc.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+#Tests that sequential write workload doesn't lead to FSYNCs
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST truncate -s 128M $B0/xfs_image
+TEST mkfs.xfs -f $B0/xfs_image
+TEST mkdir $B0/bricks
+TEST mount -t xfs -o loop $B0/xfs_image $B0/bricks
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/bricks/brick{0,1,3}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+# Write 50MB of data, which will try to consume 50x3=150MB on $B0/bricks.
+# Before that, we hit ENOSPC in pre-op cbk, which should not crash the mount.
+TEST ! dd if=/dev/zero of=$M0/a bs=1M count=50
+TEST stat $M0/a
+cleanup;
diff --git a/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
new file mode 100644
index 00000000000..49c4dea4e9c
--- /dev/null
+++ b/tests/bugs/replicate/bug-1586020-mark-dirty-for-entry-txn-on-quorum-failure.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+function create_files {
+ local i=1
+ while (true)
+ do
+ dd if=/dev/zero of=$M0/file$i bs=1M count=10
+ if [ -e $B0/${V0}0/file$i ] || [ -e $B0/${V0}1/file$i ]; then
+ ((i++))
+ else
+ break
+ fi
+ done
+ echo $i
+}
+
+TEST glusterd
+
+#Create brick partitions
+TEST truncate -s 100M $B0/brick0
+TEST truncate -s 100M $B0/brick1
+#Have the 3rd brick of a higher size to test the scenario of entry transaction
+#passing on only one brick and not on other bricks.
+TEST truncate -s 110M $B0/brick2
+LO1=`SETUP_LOOP $B0/brick0`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO1
+LO2=`SETUP_LOOP $B0/brick1`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO2
+LO3=`SETUP_LOOP $B0/brick2`
+TEST [ $? -eq 0 ]
+TEST MKFS_LOOP $LO3
+TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2
+TEST MOUNT_LOOP $LO1 $B0/${V0}0
+TEST MOUNT_LOOP $LO2 $B0/${V0}1
+TEST MOUNT_LOOP $LO3 $B0/${V0}2
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+i=$(create_files)
+TEST ! ls $B0/${V0}0/file$i
+TEST ! ls $B0/${V0}1/file$i
+TEST ls $B0/${V0}2/file$i
+dirty=$(get_hex_xattr trusted.afr.dirty $B0/${V0}2)
+TEST [ "$dirty" != "000000000000000000000000" ]
+
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST rm -f $M0/file1
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume delete $V0;
+UMOUNT_LOOP ${B0}/${V0}{0,1,2}
+rm -f ${B0}/brick{0,1,2}
+cleanup;
diff --git a/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t b/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t
new file mode 100644
index 00000000000..c6e5459e9a8
--- /dev/null
+++ b/tests/bugs/replicate/bug-1591193-assign-gfid-and-heal.t
@@ -0,0 +1,128 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+function check_gfid_and_link_count
+{
+ local file=$1
+
+ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
+ TEST [ ! -z $file_gfid_b0 ]
+ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
+ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
+ EXPECT $file_gfid_b0 echo $file_gfid_b1
+ EXPECT $file_gfid_b0 echo $file_gfid_b2
+
+ EXPECT "2" stat -c %h $B0/${V0}0/$file
+ EXPECT "2" stat -c %h $B0/${V0}1/$file
+ EXPECT "2" stat -c %h $B0/${V0}2/$file
+}
+TESTS_EXPECTED_IN_LOOP=30
+
+##############################################################################
+# Test on 1x3 volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+
+# Create files directly in the backend on different bricks
+echo $RANDOM >> $B0/${V0}0/file1
+echo $RANDOM >> $B0/${V0}1/file2
+echo $RANDOM >> $B0/${V0}2/file3
+
+# To prevent is_fresh_file code path
+sleep 2
+
+# Access them from mount to trigger name + gfid heal.
+TEST stat $M0/file1
+TEST stat $M0/file2
+TEST stat $M0/file3
+
+# Launch index heal to complete any pending data/metadata heals.
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Check each file has a gfid and the .glusterfs hardlink
+check_gfid_and_link_count file1
+check_gfid_and_link_count file2
+check_gfid_and_link_count file3
+
+TEST rm $M0/file1
+TEST rm $M0/file2
+TEST rm $M0/file3
+cleanup;
+
+##############################################################################
+# Test on 1x (2+1) volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+
+# Create files directly in the backend on different bricks
+echo $RANDOM >> $B0/${V0}0/file1
+echo $RANDOM >> $B0/${V0}1/file2
+touch $B0/${V0}2/file3
+
+# To prevent is_fresh_file code path
+sleep 2
+
+# Access them from mount to trigger name + gfid heal.
+TEST stat $M0/file1
+TEST stat $M0/file2
+
+# Though file is created on all 3 bricks, lookup will fail as arbiter blames the
+# other 2 bricks and ariter is not 'readable'.
+TEST ! stat $M0/file3
+
+# Launch index heal to complete any pending data/metadata heals.
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Now file3 should be accesible from mount.
+TEST stat $M0/file3
+
+# Check each file has a gfid and the .glusterfs hardlink
+check_gfid_and_link_count file1
+check_gfid_and_link_count file2
+check_gfid_and_link_count file3
+
+TEST rm $M0/file1
+TEST rm $M0/file2
+TEST rm $M0/file3
+cleanup;
diff --git a/tests/bugs/replicate/bug-1626994-info-split-brain.t b/tests/bugs/replicate/bug-1626994-info-split-brain.t
new file mode 100644
index 00000000000..86bfecb1a9e
--- /dev/null
+++ b/tests/bugs/replicate/bug-1626994-info-split-brain.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+# Test to check dirs having dirty xattr do not show up in info split-brain.
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+# Create base entry in indices/xattrop
+echo "Data" > $M0/FILE
+rm -f $M0/FILE
+EXPECT "1" count_index_entries $B0/${V0}0
+EXPECT "1" count_index_entries $B0/${V0}1
+EXPECT "1" count_index_entries $B0/${V0}2
+
+TEST mkdir $M0/dirty_dir
+TEST mkdir $M0/pending_dir
+
+# Set dirty xattrs on all bricks to simulate the case where entry transaction
+# succeeded only the pre-op phase.
+TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}0/dirty_dir
+TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/dirty_dir
+TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2/dirty_dir
+create_brick_xattrop_entry $B0/${V0}0 dirty_dir
+# Should not show up as split-brain.
+EXPECT "0" afr_get_split_brain_count $V0
+
+# replace/reset brick case where the new brick has dirty and the other 2 bricks
+# blame it should not be reported as split-brain.
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}0
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1
+TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}2
+create_brick_xattrop_entry $B0/${V0}0 "/"
+# Should not show up as split-brain.
+EXPECT "0" afr_get_split_brain_count $V0
+
+# Set pending xattrs on all bricks blaming each other to simulate the case of
+# entry split-brain.
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/pending_dir
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/${V0}1/pending_dir
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}2/pending_dir
+create_brick_xattrop_entry $B0/${V0}0 pending_dir
+# Should show up as split-brain.
+EXPECT "1" afr_get_split_brain_count $V0
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1637249-gfid-heal.t b/tests/bugs/replicate/bug-1637249-gfid-heal.t
new file mode 100644
index 00000000000..e824f14531e
--- /dev/null
+++ b/tests/bugs/replicate/bug-1637249-gfid-heal.t
@@ -0,0 +1,149 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1};
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume start $V0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+###############################################################################
+
+# Test for gfid + name heal when there is no 'source' brick, i.e. parent dir
+# xattrs are in split-brain or have dirty xattrs.
+
+TEST mkdir $M0/dir_pending
+TEST dd if=/dev/urandom of=$M0/dir_pending/file1 bs=1024 count=1024
+TEST mkdir $M0/dir_pending/dir11
+TEST mkdir $M0/dir_dirty
+TEST touch $M0/dir_dirty/file2
+
+# Set pending entry xattrs on dir_pending and remove gfid of entries under it on one brick.
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/dir_pending
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/dir_pending
+
+gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/dir_pending/file1)
+gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_pending/file1
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+gfid_d11=$(gf_get_gfid_xattr $B0/${V0}0/dir_pending/dir11)
+gfid_str_d11=$(gf_gfid_xattr_to_str $gfid_d11)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_pending/dir11
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11
+
+
+# Set dirty entry xattrs on dir_dirty and remove gfid of entries under it on one brick.
+TEST setfattr -n trusted.afr.dirty -v 0x000000000000000000000001 $B0/${V0}1/dir_dirty
+gfid_f2=$(gf_get_gfid_xattr $B0/${V0}0/dir_dirty/file2)
+gfid_str_f2=$(gf_gfid_xattr_to_str $gfid_f2)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_dirty/file2
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
+
+# Create a file under dir_pending directly on the backend only on 1 brick
+TEST touch $B0/${V0}1/dir_pending/file3
+
+# Create a file under dir_pending directly on the backend on all bricks
+TEST touch $B0/${V0}0/dir_pending/file4
+TEST touch $B0/${V0}1/dir_pending/file4
+
+# Stop & start the volume and mount client again.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST stat $M0/dir_pending/file1
+EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/dir_pending/file1
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+TEST stat $M0/dir_pending/dir11
+EXPECT "$gfid_d11" gf_get_gfid_xattr $B0/${V0}1/dir_pending/dir11
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11
+
+
+TEST stat $M0/dir_dirty/file2
+EXPECT "$gfid_f2" gf_get_gfid_xattr $B0/${V0}1/dir_dirty/file2
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f2:0:2}/${gfid_str_f2:2:2}/$gfid_str_f2
+
+TEST stat $M0/dir_pending/file3 # This assigns gfid on 2nd brick and heals the entry on to the 1st brick.
+gfid_f3=$(gf_get_gfid_xattr $B0/${V0}1/dir_pending/file3)
+TEST [ ! -z "$gfid_f3" ]
+EXPECT "$gfid_f3" gf_get_gfid_xattr $B0/${V0}0/dir_pending/file3
+
+TEST stat $M0/dir_pending/file4
+gfid_f4=$(gf_get_gfid_xattr $B0/${V0}0/dir_pending/file4)
+TEST [ ! -z "$gfid_f4" ]
+EXPECT "$gfid_f4" gf_get_gfid_xattr $B0/${V0}1/dir_pending/file4
+###############################################################################
+
+# Test for gfid + name heal when all bricks are 'source', i.e. parent dir
+# does not have any pending or dirty xattrs.
+
+TEST mkdir $M0/dir_clean
+TEST dd if=/dev/urandom of=$M0/dir_clean/file1 bs=1024 count=1024
+TEST mkdir $M0/dir_clean/dir11
+
+gfid_f1=$(gf_get_gfid_xattr $B0/${V0}0/dir_clean/file1)
+gfid_str_f1=$(gf_gfid_xattr_to_str $gfid_f1)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_clean/file1
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+gfid_d11=$(gf_get_gfid_xattr $B0/${V0}0/dir_clean/dir11)
+gfid_str_d11=$(gf_gfid_xattr_to_str $gfid_d11)
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir_clean/dir11
+TEST rm $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11
+
+# Create a file under dir_clean directly on the backend only on 1 brick
+TEST touch $B0/${V0}1/dir_clean/file3
+
+# Create a file under dir_clean directly on the backend on all bricks
+TEST touch $B0/${V0}0/dir_clean/file4
+TEST touch $B0/${V0}1/dir_clean/file4
+
+# Stop & start the volume and mount client again.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST stat $M0/dir_clean/file1
+EXPECT "$gfid_f1" gf_get_gfid_xattr $B0/${V0}1/dir_clean/file1
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_f1:0:2}/${gfid_str_f1:2:2}/$gfid_str_f1
+
+TEST stat $M0/dir_clean/dir11
+EXPECT "$gfid_d11" gf_get_gfid_xattr $B0/${V0}1/dir_clean/dir11
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_d11:0:2}/${gfid_str_d11:2:2}/$gfid_str_d11
+
+TEST stat $M0/dir_clean/file3 # This assigns gfid on 2nd brick and heals the entry on to the 1st brick.
+gfid_f3=$(gf_get_gfid_xattr $B0/${V0}1/dir_clean/file3)
+TEST [ ! -z "$gfid_f3" ]
+EXPECT "$gfid_f3" gf_get_gfid_xattr $B0/${V0}0/dir_clean/file3
+
+TEST stat $M0/dir_clean/file4
+gfid_f4=$(gf_get_gfid_xattr $B0/${V0}0/dir_clean/file4)
+TEST [ ! -z "$gfid_f4" ]
+EXPECT "$gfid_f4" gf_get_gfid_xattr $B0/${V0}1/dir_clean/file4
+###############################################################################
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t b/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t
new file mode 100644
index 00000000000..d7d1f285e01
--- /dev/null
+++ b/tests/bugs/replicate/bug-1637802-arbiter-stale-data-heal-lock.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+# Test to check that data self-heal does not leave any stale lock.
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+# Create base entry in indices/xattrop
+echo "Data" > $M0/FILE
+
+# Kill arbiter brick and write to FILE.
+TEST kill_brick $V0 $H0 $B0/${V0}2
+echo "arbiter down" >> $M0/FILE
+EXPECT 2 get_pending_heal_count $V0
+
+# Bring it back up and let heal complete.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# write to the FILE must succeed.
+echo "this must succeed" >> $M0/FILE
+TEST [ $? -eq 0 ]
+cleanup;
diff --git a/tests/bugs/replicate/bug-1655050-dir-sbrain-size-policy.t b/tests/bugs/replicate/bug-1655050-dir-sbrain-size-policy.t
new file mode 100644
index 00000000000..63f72e86bf6
--- /dev/null
+++ b/tests/bugs/replicate/bug-1655050-dir-sbrain-size-policy.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create replica 2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 data-self-heal off
+TEST $CLI volume set $V0 entry-self-heal off
+TEST $CLI volume set $V0 metadata-self-heal off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+cd $M0
+TEST mkdir dir
+
+#Create metadata split-brain
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST chmod 757 dir
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST chmod 747 dir
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+#Use size as fav-child policy.
+TEST $CLI volume set $V0 cluster.favorite-child-policy size
+
+#Enable shd and heal the file.
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+EXPECT_WITHIN $HEAL_TIMEOUT "2" get_pending_heal_count $V0
+
+b1c1dir=$(afr_get_specific_changelog_xattr $B0/${V0}0/dir \
+ trusted.afr.$V0-client-1 "metadata")
+b2c0dir=$(afr_get_specific_changelog_xattr $B0/${V0}1/dir \
+ trusted.afr.$V0-client-0 "metadata")
+
+EXPECT "00000001" echo $b1c1dir
+EXPECT "00000001" echo $b2c0dir
+
+#Finish up
+TEST force_umount $M0
+cleanup;
diff --git a/tests/bugs/replicate/bug-1655052-sbrain-policy-same-size.t b/tests/bugs/replicate/bug-1655052-sbrain-policy-same-size.t
new file mode 100755
index 00000000000..319736e1157
--- /dev/null
+++ b/tests/bugs/replicate/bug-1655052-sbrain-policy-same-size.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+#Test the split-brain resolution CLI commands.
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+#Create replica 2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST touch $M0/file
+
+############ Healing using favorite-child-policy = size and size of bricks is same #################
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd if=/dev/urandom of=$M0/file bs=1024 count=1024
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+#file still in split-brain
+EXPECT_WITHIN $HEAL_TIMEOUT "2" get_pending_heal_count $V0
+cat $M0/file > /dev/null
+EXPECT_NOT "^0$" echo $?
+
+#We know that both bricks have same size file
+TEST $CLI volume set $V0 cluster.favorite-child-policy size
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "2" get_pending_heal_count $V0
+cat $M0/file > /dev/null
+EXPECT_NOT "^0$" echo $?
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup
+
diff --git a/tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t b/tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t
new file mode 100644
index 00000000000..783016dc3c0
--- /dev/null
+++ b/tests/bugs/replicate/bug-1655854-support-dist-to-rep3-arb-conversion.t
@@ -0,0 +1,95 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+# Conversion from 2x1 to 2x3
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST mkdir $M0/dir
+TEST dd if=/dev/urandom of=$M0/dir/file bs=100K count=5
+file_md5sum=$(md5sum $M0/dir/file | awk '{print $1}')
+
+TEST $CLI volume add-brick $V0 replica 3 $H0:$B0/${V0}{2..5}
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}3
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}4
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}5
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 3
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 4
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 5
+
+# Trigger heal and wait for for it to complete
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Check whether the directory & file are healed to the newly added bricks
+TEST ls $B0/${V0}2/dir
+TEST ls $B0/${V0}3/dir
+TEST ls $B0/${V0}4/dir
+TEST ls $B0/${V0}5/dir
+
+TEST [ $file_md5sum == $(md5sum $B0/${V0}4/dir/file | awk '{print $1}') ]
+TEST [ $file_md5sum == $(md5sum $B0/${V0}5/dir/file | awk '{print $1}') ]
+
+
+# Conversion from 2x1 to 2x(2+1)
+
+TEST $CLI volume create $V1 $H0:$B0/${V1}{0,1}
+EXPECT 'Created' volinfo_field $V1 'Status';
+TEST $CLI volume start $V1
+EXPECT 'Started' volinfo_field $V1 'Status';
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}1
+
+TEST $GFS --volfile-id=$V1 --volfile-server=$H0 $M1;
+TEST mkdir $M1/dir
+TEST dd if=/dev/urandom of=$M1/dir/file bs=100K count=5
+file_md5sum=$(md5sum $M1/dir/file | awk '{print $1}')
+
+TEST $CLI volume add-brick $V1 replica 3 arbiter 1 $H0:$B0/${V1}{2..5}
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}3
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}4
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V1 $H0 $B0/${V1}5
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 2
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 3
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 4
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V1 5
+
+# Trigger heal and wait for for it to complete
+TEST $CLI volume heal $V1
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V1
+
+# Check whether the directory & file are healed to the newly added bricks
+TEST ls $B0/${V1}2/dir
+TEST ls $B0/${V1}3/dir
+TEST ls $B0/${V1}4/dir
+TEST ls $B0/${V1}5/dir
+
+EXPECT "0" stat -c %s $B0/${V1}5/dir/file
+TEST [ $file_md5sum == $(md5sum $B0/${V1}4/dir/file | awk '{print $1}') ]
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1657783-do-not-update-read-subvol-on-rename-link.t b/tests/bugs/replicate/bug-1657783-do-not-update-read-subvol-on-rename-link.t
new file mode 100644
index 00000000000..b180f0e1239
--- /dev/null
+++ b/tests/bugs/replicate/bug-1657783-do-not-update-read-subvol-on-rename-link.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0..2}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+TEST $CLI volume set $V0 performance.write-behind off
+
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST mkdir $M0/dir
+TEST "echo abc > $M0/file1"
+TEST "echo uvw > $M0/file2"
+
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST "echo def > $M0/file1"
+TEST "echo xyz > $M0/file2"
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+# Rename file1 and read it. Read should be served from the 3rd brick
+TEST mv $M0/file1 $M0/file3
+EXPECT "def" cat $M0/file3
+
+# Create a link to file2 and read it. Read should be served from the 3rd brick
+TEST ln $M0/file2 $M0/dir/file4
+EXPECT "xyz" cat $M0/dir/file4
+EXPECT "xyz" cat $M0/file2
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t b/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t
new file mode 100644
index 00000000000..78581e99614
--- /dev/null
+++ b/tests/bugs/replicate/bug-1686568-send-truncate-on-arbiter-from-shd.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+CHANGELOG_PATH_0="$B0/${V0}2/.glusterfs/changelogs"
+ROLLOVER_TIME=100
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 changelog.changelog on
+TEST $CLI volume set $V0 changelog.rollover-time $ROLLOVER_TIME
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST dd if=/dev/zero of=$M0/file1 bs=128K count=5
+
+TEST $CLI volume profile $V0 start
+TEST $CLI volume add-brick $V0 replica 3 arbiter 1 $H0:$B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+TEST $CLI volume profile $V0 info
+truncate_count=$($CLI volume profile $V0 info | grep TRUNCATE | awk '{count += $8} END {print count}')
+
+EXPECT "1" echo $truncate_count
+EXPECT "1" check_changelog_op ${CHANGELOG_PATH_0} "^ D "
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1696599-io-hang.t b/tests/bugs/replicate/bug-1696599-io-hang.t
new file mode 100755
index 00000000000..869cdb94bda
--- /dev/null
+++ b/tests/bugs/replicate/bug-1696599-io-hang.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+#Tests that local structures in afr are removed from granted/blocked list of
+#locks when inodelk fails on all bricks
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1..3}
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.client-io-threads off
+TEST $CLI volume set $V0 delay-gen locks
+TEST $CLI volume set $V0 delay-gen.delay-duration 5000000
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 delay-gen.enable finodelk
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+TEST touch $M0/file
+#Trigger write and stop bricks so inodelks fail on all bricks leading to
+#lock failure condition
+echo abc >> $M0/file &
+
+TEST $CLI volume stop $V0
+TEST $CLI volume reset $V0 delay-gen
+wait
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_meta $M0 $V0-replicate-0 2
+#Test that only one write succeeded, this tests that delay-gen worked as
+#expected
+echo abc >> $M0/file
+EXPECT "abc" cat $M0/file
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t
new file mode 100644
index 00000000000..76d1f2170f2
--- /dev/null
+++ b/tests/bugs/replicate/bug-1717819-metadata-split-brain-detection.t
@@ -0,0 +1,136 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $CLI volume heal $V0 disable
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+###############################################################################
+# Case of 2 bricks blaming the third and the third blaming the other two.
+
+TEST mkdir $M0/dir
+
+# B0 and B2 must blame B1
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+TEST setfattr -n user.metadata -v 1 $M0/dir
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/dir trusted.afr.$V0-client-1 metadata
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/dir trusted.afr.$V0-client-1 metadata
+CLIENT_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $M0/dir)
+
+# B1 must blame B0 and B2
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"1"/dir
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir
+
+# Launch heal
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir)
+B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir)
+B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir)
+
+TEST [ "$CLIENT_XATTR" == "$B0_XATTR" ]
+TEST [ "$CLIENT_XATTR" == "$B1_XATTR" ]
+TEST [ "$CLIENT_XATTR" == "$B2_XATTR" ]
+TEST setfattr -x user.metadata $M0/dir
+
+###############################################################################
+# Case of each brick blaming the next one in a cyclic manner
+
+TEST $CLI volume heal $V0 disable
+TEST `echo "hello" >> $M0/dir/file`
+# Mark cyclic xattrs and modify metadata directly on the bricks.
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000100000000 $B0/$V0"0"/dir/file
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"2"/dir/file
+
+setfattr -n user.metadata -v 1 $B0/$V0"0"/dir/file
+setfattr -n user.metadata -v 2 $B0/$V0"1"/dir/file
+setfattr -n user.metadata -v 3 $B0/$V0"2"/dir/file
+
+# Add entry to xattrop dir to trigger index heal.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/file))
+ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0
+
+# Launch heal
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file)
+B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file)
+B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir/file)
+
+TEST [ "$B0_XATTR" == "$B1_XATTR" ]
+TEST [ "$B0_XATTR" == "$B2_XATTR" ]
+TEST rm -f $M0/dir/file
+
+###############################################################################
+# Case of 2 bricks having quorum blaming and the other having only one blaming.
+
+TEST $CLI volume heal $V0 disable
+TEST `echo "hello" >> $M0/dir/file`
+# B0 and B2 must blame B1
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+TEST setfattr -n user.metadata -v 1 $M0/dir/file
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}0/dir/file trusted.afr.$V0-client-1 metadata
+EXPECT "00000001" afr_get_specific_changelog_xattr $B0/${V0}2/dir/file trusted.afr.$V0-client-1 metadata
+
+# B1 must blame B0 and B2
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"1"/dir/file
+
+# B0 must blame B2
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000100000000 $B0/$V0"0"/dir/file
+
+# Modify the metadata directly on the bricks B1 & B2.
+setfattr -n user.metadata -v 2 $B0/$V0"1"/dir/file
+setfattr -n user.metadata -v 3 $B0/$V0"2"/dir/file
+
+# Launch heal
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" brick_up_status $V0 $H0 $B0/${V0}1
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+B0_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}0/dir/file)
+B1_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}1/dir/file)
+B2_XATTR=$(getfattr -n 'user.metadata' --absolute-names --only-values $B0/${V0}2/dir/file)
+
+TEST [ "$B0_XATTR" == "$B1_XATTR" ]
+TEST [ "$B0_XATTR" == "$B2_XATTR" ]
+
+###############################################################################
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
new file mode 100644
index 00000000000..0aeaaafc84c
--- /dev/null
+++ b/tests/bugs/replicate/bug-1722507-type-mismatch-error-handling.t
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+TEST $CLI volume heal $V0 disable
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+
+##########################################################################################
+# GFID link file and the GFID is missing on one brick and all the bricks are being blamed.
+
+TEST touch $M0/dir/file
+#TEST kill_brick $V0 $H0 $B0/$V0"1"
+
+#B0 and B2 must blame B1
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+
+# Add entry to xattrop dir to trigger index heal.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
+ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+EXPECT "^1$" get_pending_heal_count $V0
+
+# Remove the gfid xattr and the link file on one brick.
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+
+# Launch heal
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+
+# Wait for 2 second to force posix to consider that this is a valid file but
+# without gfid.
+sleep 2
+TEST $CLI volume heal $V0
+
+# Heal should not fail as the file is missing gfid xattr and the link file,
+# which is not actually the gfid or type mismatch.
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+rm -f $M0/dir/file
+
+
+###########################################################################################
+# GFID link file and the GFID is missing on two bricks and all the bricks are being blamed.
+
+TEST $CLI volume heal $V0 disable
+TEST touch $M0/dir/file
+#TEST kill_brick $V0 $H0 $B0/$V0"1"
+
+#B0 and B2 must blame B1
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
+setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+
+# Add entry to xattrop dir to trigger index heal.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
+ln -s $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+EXPECT "^1$" get_pending_heal_count $V0
+
+# Remove the gfid xattr and the link file on two bricks.
+gfid_file=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file)
+gfid_str_file=$(gf_gfid_xattr_to_str $gfid_file)
+TEST setfattr -x trusted.gfid $B0/${V0}0/dir/file
+TEST rm -f $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+TEST setfattr -x trusted.gfid $B0/${V0}1/dir/file
+TEST rm -f $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+
+# Launch heal
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+
+# Wait for 2 second to force posix to consider that this is a valid file but
+# without gfid.
+sleep 2
+TEST $CLI volume heal $V0
+
+# Heal should not fail as the file is missing gfid xattr and the link file,
+# which is not actually the gfid or type mismatch.
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}0/dir/file
+TEST stat $B0/${V0}0/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+EXPECT "$gfid_file" gf_get_gfid_xattr $B0/${V0}1/dir/file
+TEST stat $B0/${V0}1/.glusterfs/${gfid_str_file:0:2}/${gfid_str_file:2:2}/$gfid_str_file
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1728770-pass-xattrs.t b/tests/bugs/replicate/bug-1728770-pass-xattrs.t
new file mode 100644
index 00000000000..159c4fcc6a1
--- /dev/null
+++ b/tests/bugs/replicate/bug-1728770-pass-xattrs.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+function fop_on_bad_disk {
+ local path=$1
+ mkdir $path/dir{1..1000} 2>/dev/null
+ mv $path/dir1 $path/newdir
+ touch $path/foo.txt
+ echo $?
+}
+
+function ls_fop_on_bad_disk {
+ local path=$1
+ ls $path
+ echo $?
+}
+
+TEST init_n_bricks 6;
+TEST setup_lvm 6;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 replica 3 $H0:$L1 $H0:$L2 $H0:$L3 $H0:$L4 $H0:$L5 $H0:$L6;
+TEST $CLI volume set $V0 health-check-interval 1000;
+
+TEST $CLI volume start $V0;
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+#corrupt last disk
+dd if=/dev/urandom of=/dev/mapper/patchy_snap_vg_6-brick_lvm bs=512K count=200 status=progress && sync
+
+
+# Test the disk is now returning EIO for touch and ls
+EXPECT_WITHIN $DISK_FAIL_TIMEOUT "^1$" fop_on_bad_disk "$L6"
+EXPECT_WITHIN $DISK_FAIL_TIMEOUT "^2$" ls_fop_on_bad_disk "$L6"
+
+TEST touch $M0/foo{1..100}
+TEST $CLI volume remove-brick $V0 replica 3 $H0:$L4 $H0:$L5 $H0:$L6 start
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" remove_brick_status_completed_field "$V0" "$H0:$L4 $H0:$L5 $H0:$L6";
+
+#check that remove-brick status should not have any failed or skipped files
+var=`$CLI volume remove-brick $V0 $H0:$L4 $H0:$L5 $H0:$L6 status | grep completed`
+TEST [ `echo $var | awk '{print $5}'` = "0" ]
+TEST [ `echo $var | awk '{print $6}'` = "0" ]
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t b/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t
new file mode 100644
index 00000000000..14dfae89135
--- /dev/null
+++ b/tests/bugs/replicate/bug-1734370-entry-heal-restore-time.t
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+function time_stamps_match {
+ path=$1
+ mtime_source_b0=$(get_mtime $B0/${V0}0/$path)
+ atime_source_b0=$(get_atime $B0/${V0}0/$path)
+ mtime_source_b2=$(get_mtime $B0/${V0}2/$path)
+ atime_source_b2=$(get_atime $B0/${V0}2/$path)
+ mtime_sink_b1=$(get_mtime $B0/${V0}1/$path)
+ atime_sink_b1=$(get_atime $B0/${V0}1/$path)
+
+ #The same brick must be the source of heal for both atime and mtime.
+ if [[ ( $mtime_source_b0 -eq $mtime_sink_b1 && $atime_source_b0 -eq $atime_sink_b1 ) || \
+ ( $mtime_source_b2 -eq $mtime_sink_b1 && $atime_source_b2 -eq $atime_sink_b1 ) ]]
+ then
+ echo "Y"
+ else
+ echo "Mtimes: $mtime_source_b0:$mtime_sink_b1:$mtime_source_b2 Atimes: $atime_source_b0:$atime_sink_b1:$atime_source_b2"
+ fi
+
+}
+
+function mtimes_match {
+ path=$1
+ mtime_source_b0=$(get_mtime $B0/${V0}0/$path)
+ mtime_source_b2=$(get_mtime $B0/${V0}2/$path)
+ mtime_sink_b1=$(get_mtime $B0/${V0}1/$path)
+
+ if [[ ( $mtime_source_b0 -eq $mtime_sink_b1) || \
+ ( $mtime_source_b2 -eq $mtime_sink_b1) ]]
+ then
+ echo "Y"
+ else
+ echo "Mtimes: $mtime_source_b0:$mtime_sink_b1:$mtime_source_b2"
+ fi
+
+}
+
+# Test that the parent dir's timestamps are restored during entry-heal.
+GET_MDATA_PATH=$(dirname $0)/../../utils
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --attribute-timeout=0 --entry-timeout=0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+###############################################################################
+TEST mkdir $M0/DIR
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST touch $M0/DIR/FILE
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+EXPECT "Y" time_stamps_match DIR
+ctime_source1=$(get_ctime $B0/${V0}0/$path)
+ctime_source2=$(get_ctime $B0/${V0}2/$path)
+ctime_sink=$(get_ctime $B0/${V0}1/$path)
+TEST [ $ctime_source1 -eq $ctime_sink ]
+TEST [ $ctime_source2 -eq $ctime_sink ]
+
+
+###############################################################################
+# Repeat the test with ctime feature disabled.
+TEST $CLI volume set $V0 features.ctime off
+TEST mkdir $M0/DIR2
+TEST kill_brick $V0 $H0 $B0/${V0}1
+TEST touch $M0/DIR2/FILE
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+#Executing parallel heal may lead to changing atime after heal. So better
+#to test just the mtime
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+EXPECT "Y" mtimes_match DIR2
+
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
+cleanup;
diff --git a/tests/bugs/replicate/bug-1744548-heal-timeout.t b/tests/bugs/replicate/bug-1744548-heal-timeout.t
new file mode 100644
index 00000000000..011535066f9
--- /dev/null
+++ b/tests/bugs/replicate/bug-1744548-heal-timeout.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+function get_cumulative_opendir_count {
+#sed command prints content between Cumulative and Interval, this keeps content from Cumulative stats
+ $CLI volume profile $V0 info |sed -n '/^Cumulative/,/^Interval/p'|grep OPENDIR| awk '{print $8}'|tr -d '\n'
+}
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume heal $V0 disable
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST ! $CLI volume heal $V0
+
+# Enable shd and verify that index crawl is triggered immediately.
+TEST $CLI volume profile $V0 start
+TEST $CLI volume profile $V0 info clear
+TEST $CLI volume heal $V0 enable
+# Each brick does 4 opendirs, corresponding to dirty, xattrop and entry-changes, anonymous-inode
+EXPECT_WITHIN 4 "^444$" get_cumulative_opendir_count
+
+# Check that a change in heal-timeout is honoured immediately.
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+sleep 10
+# Two crawls must have happened.
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^121212$" get_cumulative_opendir_count
+
+# shd must not heal if it is disabled and heal-timeout is changed.
+TEST $CLI volume heal $V0 disable
+#Wait for configuration update and any opendir fops to complete
+sleep 10
+TEST $CLI volume profile $V0 info clear
+TEST $CLI volume set $V0 cluster.heal-timeout 6
+#Better to wait for more than 6 seconds to account for configuration updates
+sleep 10
+COUNT=`$CLI volume profile $V0 info incremental |grep OPENDIR|awk '{print $8}'|tr -d '\n'`
+TEST [ -z $COUNT ]
+cleanup;
diff --git a/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
new file mode 100644
index 00000000000..96279084065
--- /dev/null
+++ b/tests/bugs/replicate/bug-1749322-entry-heal-not-happening.t
@@ -0,0 +1,89 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup
+
+function check_gfid_and_link_count
+{
+ local file=$1
+
+ file_gfid_b0=$(gf_get_gfid_xattr $B0/${V0}0/$file)
+ TEST [ ! -z $file_gfid_b0 ]
+ file_gfid_b1=$(gf_get_gfid_xattr $B0/${V0}1/$file)
+ file_gfid_b2=$(gf_get_gfid_xattr $B0/${V0}2/$file)
+ EXPECT $file_gfid_b0 echo $file_gfid_b1
+ EXPECT $file_gfid_b0 echo $file_gfid_b2
+
+ EXPECT "2" stat -c %h $B0/${V0}0/$file
+ EXPECT "2" stat -c %h $B0/${V0}1/$file
+ EXPECT "2" stat -c %h $B0/${V0}2/$file
+}
+TESTS_EXPECTED_IN_LOOP=18
+
+################################################################################
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume start $V0;
+TEST $CLI volume set $V0 cluster.heal-timeout 5
+TEST $CLI volume heal $V0 disable
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+TEST `echo "File 1 " > $M0/dir/file1`
+TEST touch $M0/dir/file{2..4}
+
+# Remove file2 from 1st & 3rd bricks
+TEST rm -f $B0/$V0"0"/dir/file2
+TEST rm -f $B0/$V0"2"/dir/file2
+
+# Remove file3 and the .glusterfs hardlink from 1st & 2nd bricks
+gfid_file3=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file3)
+gfid_str_file3=$(gf_gfid_xattr_to_str $gfid_file3)
+TEST rm $B0/$V0"0"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
+TEST rm $B0/$V0"1"/.glusterfs/${gfid_str_file3:0:2}/${gfid_str_file3:2:2}/$gfid_str_file3
+TEST rm -f $B0/$V0"0"/dir/file3
+TEST rm -f $B0/$V0"1"/dir/file3
+
+# Remove the .glusterfs hardlink and the gfid xattr of file4 on 3rd brick
+gfid_file4=$(gf_get_gfid_xattr $B0/$V0"0"/dir/file4)
+gfid_str_file4=$(gf_gfid_xattr_to_str $gfid_file4)
+TEST rm $B0/$V0"2"/.glusterfs/${gfid_str_file4:0:2}/${gfid_str_file4:2:2}/$gfid_str_file4
+TEST setfattr -x trusted.gfid $B0/$V0"2"/dir/file4
+
+# B0 and B2 blame each other
+setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/$V0"2"/dir
+setfattr -n trusted.afr.$V0-client-2 -v 0x000000000000000000000001 $B0/$V0"0"/dir
+
+# Add entry to xattrop dir on first brick.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/dir/))
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_str
+
+EXPECT "^1$" get_pending_heal_count $V0
+
+# Launch heal
+TEST $CLI volume heal $V0 enable
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^Y$" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "^1$" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# All the files must be present on all the bricks after conservative merge and
+# should have the gfid xattr and the .glusterfs hardlink.
+check_gfid_and_link_count dir/file1
+check_gfid_and_link_count dir/file2
+check_gfid_and_link_count dir/file3
+check_gfid_and_link_count dir/file4
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t b/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t
new file mode 100644
index 00000000000..c1bdf34ee6d
--- /dev/null
+++ b/tests/bugs/replicate/bug-1756938-replica-3-sbrain-cli.t
@@ -0,0 +1,111 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard enable
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+TEST glusterfs --volfile-server=$H0 --volfile-id=/$V0 $M0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+#Create split-brain by setting afr xattrs/gfids manually.
+#file1 is non-sharded and will be in data split-brain.
+#file2 will have one shard which will be in data split-brain.
+#file3 will have one shard which will be in gfid split-brain.
+#file4 will have one shard which will be in data & metadata split-brain.
+TEST dd if=/dev/zero of=$M0/file1 bs=1024 count=1024 oflag=direct
+TEST dd if=/dev/zero of=$M0/file2 bs=1M count=6 oflag=direct
+TEST dd if=/dev/zero of=$M0/file3 bs=1M count=6 oflag=direct
+TEST dd if=/dev/zero of=$M0/file4 bs=1M count=6 oflag=direct
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+#-------------------------------------------------------------------------------
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}0/file1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}0/file1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}1/file1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}1/file1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}2/file1
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}2/file1
+
+#-------------------------------------------------------------------------------
+gfid_f2=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file2))
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}0/.shard/$gfid_f2.1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}0/.shard/$gfid_f2.1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}1/.shard/$gfid_f2.1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000000000000 $B0/${V0}1/.shard/$gfid_f2.1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000000000000 $B0/${V0}2/.shard/$gfid_f2.1
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000000000000 $B0/${V0}2/.shard/$gfid_f2.1
+
+#-------------------------------------------------------------------------------
+TESTS_EXPECTED_IN_LOOP=5
+function assign_new_gfid {
+ brickpath=$1
+ filename=$2
+ gfid=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brickpath/$filename))
+ gfid_shard=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brickpath/.shard/$gfid.1))
+
+ TEST rm $brickpath/.glusterfs/${gfid_shard:0:2}/${gfid_shard:2:2}/$gfid_shard
+ TEST setfattr -x trusted.gfid $brickpath/.shard/$gfid.1
+ new_gfid=$(get_random_gfid)
+ new_gfid_str=$(gf_gfid_xattr_to_str $new_gfid)
+ TEST setfattr -n trusted.gfid -v $new_gfid $brickpath/.shard/$gfid.1
+ TEST mkdir -p $brickpath/.glusterfs/${new_gfid_str:0:2}/${new_gfid_str:2:2}
+ TEST ln $brickpath/.shard/$gfid.1 $brickpath/.glusterfs/${new_gfid_str:0:2}/${new_gfid_str:2:2}/$new_gfid_str
+}
+assign_new_gfid $B0/$V0"1" file3
+assign_new_gfid $B0/$V0"2" file3
+
+#-------------------------------------------------------------------------------
+gfid_f4=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file4))
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000100000000 $B0/${V0}0/.shard/$gfid_f4.1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000100000000 $B0/${V0}0/.shard/$gfid_f4.1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000100000000 $B0/${V0}1/.shard/$gfid_f4.1
+TEST setfattr -n trusted.afr.$V0-client-2 -v 0x000000010000000100000000 $B0/${V0}1/.shard/$gfid_f4.1
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000010000000100000000 $B0/${V0}2/.shard/$gfid_f4.1
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000010000000100000000 $B0/${V0}2/.shard/$gfid_f4.1
+
+#-------------------------------------------------------------------------------
+#Add entry to xattrop dir on first brick and check for split-brain.
+xattrop_dir0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_dir0`
+
+gfid_f1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/file1))
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f1
+
+gfid_f2_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f2.1))
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f2_shard1
+
+gfid_f3=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/${V0}0/file3))
+gfid_f3_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f3.1))
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f3_shard1
+
+gfid_f4_shard1=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/.shard/$gfid_f4.1))
+TEST ln $xattrop_dir0/$base_entry_b0 $xattrop_dir0/$gfid_f4_shard1
+
+#-------------------------------------------------------------------------------
+#gfid split-brain won't show up in split-brain count.
+EXPECT "3" afr_get_split_brain_count $V0
+EXPECT_NOT "^0$" get_pending_heal_count $V0
+
+#Resolve split-brains
+TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /file1
+GFIDSTR="gfid:$gfid_f2_shard1"
+TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 $GFIDSTR
+TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /.shard/$gfid_f3.1
+TEST $CLI volume heal $V0 split-brain source-brick $H0:$B0/${V0}1 /.shard/$gfid_f4.1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+cleanup;
diff --git a/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t b/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t
new file mode 100644
index 00000000000..7e24eaec03d
--- /dev/null
+++ b/tests/bugs/replicate/bug-1761531-metadata-heal-restore-time.t
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup
+
+GET_MDATA_PATH=$(dirname $0)/../../utils
+build_tester $GET_MDATA_PATH/get-mdata-xattr.c
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0..2}
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+
+TEST touch $M0/a
+sleep 1
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST touch $M0/a
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+mtime0=$(get_mtime $B0/brick0/a)
+mtime1=$(get_mtime $B0/brick1/a)
+TEST [ $mtime0 -eq $mtime1 ]
+
+ctime0=$(get_ctime $B0/brick0/a)
+ctime1=$(get_ctime $B0/brick1/a)
+TEST [ $ctime0 -eq $ctime1 ]
+
+###############################################################################
+# Repeat the test with ctime feature disabled.
+TEST $CLI volume set $V0 features.ctime off
+
+TEST touch $M0/b
+sleep 1
+TEST kill_brick $V0 $H0 $B0/brick0
+TEST touch $M0/b
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^2$" get_pending_heal_count $V0
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+mtime2=$(get_mtime $B0/brick0/b)
+mtime3=$(get_mtime $B0/brick1/b)
+TEST [ $mtime2 -eq $mtime3 ]
+
+TEST rm $GET_MDATA_PATH/get-mdata-xattr
+
+TEST force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/replicate/bug-1801624-entry-heal.t b/tests/bugs/replicate/bug-1801624-entry-heal.t
new file mode 100644
index 00000000000..94b465181fa
--- /dev/null
+++ b/tests/bugs/replicate/bug-1801624-entry-heal.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/brick{0,1,2}
+TEST $CLI volume set $V0 heal-timeout 5
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0 granular-entry-heal enable
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+echo "Data">$M0/FILE
+ret=$?
+TEST [ $ret -eq 0 ]
+
+# Re-create the file when a brick is down.
+TEST kill_brick $V0 $H0 $B0/brick1
+TEST rm $M0/FILE
+echo "New Data">$M0/FILE
+ret=$?
+TEST [ $ret -eq 0 ]
+EXPECT_WITHIN $HEAL_TIMEOUT "4" get_pending_heal_count $V0
+
+# Launching index heal must not reset parent dir afr xattrs or remove granular entry indices.
+$CLI volume heal $V0 # CLI will fail but heal is launched anyway.
+TEST sleep 5 # give index heal a chance to do one run.
+brick0_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick0/)
+brick2_pending=$(get_hex_xattr trusted.afr.$V0-client-1 $B0/brick2/)
+TEST [ $brick0_pending -eq "000000000000000000000002" ]
+TEST [ $brick2_pending -eq "000000000000000000000002" ]
+EXPECT "FILE" ls $B0/brick0/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/
+EXPECT "FILE" ls $B0/brick2/.glusterfs/indices/entry-changes/00000000-0000-0000-0000-000000000001/
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+$CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+# No gfid-split-brain (i.e. EIO) must be seen. Try on fresh mount to avoid cached values.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST cat $M0/FILE
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup;
diff --git a/tests/bugs/replicate/bug-765564.t b/tests/bugs/replicate/bug-765564.t
new file mode 100644
index 00000000000..098d225018f
--- /dev/null
+++ b/tests/bugs/replicate/bug-765564.t
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Start and create a volume
+mkdir -p ${B0}/${V0}-0
+mkdir -p ${B0}/${V0}-1
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+
+TEST $CLI volume set $V0 performance.io-cache off;
+TEST $CLI volume set $V0 performance.write-behind off;
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+TEST $CLI volume start $V0;
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+#returns success if 'olddir' is absent
+#'olddir' must be absent in both replicas
+function rm_succeeded () {
+ local dir1=$1
+ [[ -d $H0:$B0/${V0}-0/$dir1 || -d $H0:$B0/${V0}-1/$dir1 ]] && return 0
+ return 1
+}
+
+# returns successes if 'newdir' is present
+#'newdir' must be present in both replicas
+function mv_succeeded () {
+ local dir1=$1
+ [[ -d $H0:$B0/${V0}-0/$dir1 && -d $H0:$B0/${V0}-1/$dir1 ]] && return 1
+ return 0
+}
+
+# returns zero on success
+# Only one of rm and mv can succeed. This is captured by the XOR below
+
+function chk_backend_consistency(){
+ local dir1=$1
+ local dir2=$2
+ local rm_status=rm_succeeded $dir1
+ local mv_status=mv_succeeded $dir2
+ [[ ( $rm_status && ! $mv_status ) || ( ! $rm_status && $mv_status ) ]] && return 0
+ return 1
+}
+
+#concurrent removal/rename of dirs
+function rm_mv_correctness () {
+ ret=0
+ for i in {1..100}; do
+ mkdir $M0/"dir"$i
+ rmdir $M0/"dir"$i &
+ mv $M0/"dir"$i $M0/"adir"$i &
+ wait
+ tmp_ret=$(chk_backend_consistency "dir"$i "adir"$i)
+ (( ret += tmp_ret ))
+ rm -rf $M0/"dir"$i
+ rm -rf $M0/"adir"$i
+ done
+ return $ret
+}
+
+TEST touch $M0/a;
+TEST mv $M0/a $M0/b;
+
+#test rename fop when one of the bricks is down
+kill_brick ${V0} ${H0} ${B0}/${V0}-1;
+TEST touch $M0/h;
+TEST mv $M0/h $M0/1;
+
+TEST $CLI volume start $V0 force;
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1;
+find $M0 2>/dev/null 1>/dev/null;
+find $M0 | xargs stat 2>/dev/null 1>/dev/null;
+
+TEST rm_mv_correctness;
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup;
+
diff --git a/tests/bugs/replicate/bug-767585-gfid.t b/tests/bugs/replicate/bug-767585-gfid.t
new file mode 100755
index 00000000000..4176aabb544
--- /dev/null
+++ b/tests/bugs/replicate/bug-767585-gfid.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#Test cases to perform gfid-self-heal
+#file 'a' should be assigned a fresh gfid
+#file 'b' should be healed with gfid1 from brick1
+#file 'c' should be healed with gfid2 from brick2
+
+gfid1="0x8428b7193a764bf8be8046fb860b8993"
+gfid2="0x85ad91afa2f74694bf52c3326d048209"
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --direct-io-mode=enable
+touch $B0/${V0}0/a $B0/${V0}1/a
+touch $B0/${V0}0/b $B0/${V0}1/b
+touch $B0/${V0}0/c $B0/${V0}1/c
+
+TEST setfattr -n trusted.gfid -v $gfid1 $B0/${V0}0/b
+TEST setfattr -n trusted.gfid -v $gfid2 $B0/${V0}1/c
+
+sleep 2
+
+TEST stat $M0/a
+TEST stat $M0/b
+TEST stat $M0/c
+
+TEST gf_get_gfid_xattr $B0/${V0}0/a
+TEST gf_get_gfid_xattr $B0/${V0}1/a
+
+EXPECT "$gfid1" gf_get_gfid_xattr $B0/${V0}0/b
+EXPECT "$gfid1" gf_get_gfid_xattr $B0/${V0}1/b
+
+EXPECT "$gfid2" gf_get_gfid_xattr $B0/${V0}0/c
+EXPECT "$gfid2" gf_get_gfid_xattr $B0/${V0}1/c
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-802417.t b/tests/bugs/replicate/bug-802417.t
new file mode 100755
index 00000000000..f213439401e
--- /dev/null
+++ b/tests/bugs/replicate/bug-802417.t
@@ -0,0 +1,120 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function write_file()
+{
+ path="$1"; shift
+ echo "$*" > "$path"
+}
+
+cleanup;
+
+#####################################################
+# We are currently not triggering data heal unless all bricks of the replica are
+# up. We will need to modify this .t once the fix for preventing stale reads
+# being served to clients for files in spurious split-brains is done. Spurious
+# split-brains here means afr xattrs indicates sbrain but it is actually not.
+# Self-heal will heal such files automatically but before the heal completes,
+# reads can be served which needs fixing.
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
+######################################################
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Start and create a volume
+mkdir -p ${B0}/${V0}-0
+mkdir -p ${B0}/${V0}-1
+mkdir -p ${B0}/${V0}-2
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}-{0,1,2}
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Make sure io-cache and write-behind don't interfere.
+TEST $CLI volume set $V0 performance.io-cache off;
+TEST $CLI volume set $V0 performance.write-behind off;
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+## Make sure automatic self-heal doesn't perturb our results.
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 cluster.quorum-type none
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+## Create a file with some recognizably stale data.
+TEST write_file $M0/a_file "old_data"
+
+## Kill two of the bricks and write some newer data.
+TEST kill_brick ${V0} ${H0} ${B0}/${V0}-1
+TEST kill_brick ${V0} ${H0} ${B0}/${V0}-2
+TEST write_file $M0/a_file "new_data"
+
+## Bring all the bricks up and kill one so we do a partial self-heal.
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 2
+TEST kill_brick ${V0} ${H0} ${B0}/${V0}-2
+TEST dd if=${M0}/a_file of=/dev/null
+
+
+obs_path_0=${B0}/${V0}-0/a_file
+obs_path_1=${B0}/${V0}-1/a_file
+obs_path_2=${B0}/${V0}-2/a_file
+
+tgt_xattr_0="trusted.afr.${V0}-client-0"
+tgt_xattr_1="trusted.afr.${V0}-client-1"
+tgt_xattr_2="trusted.afr.${V0}-client-2"
+
+actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_0)
+EXPECT "0x000000000000000000000000|^\$" echo $actual
+
+EXPECT_WITHIN $HEAL_TIMEOUT "0x000000000000000000000000" \
+afr_get_changelog_xattr $obs_path_0 $tgt_xattr_1
+
+actual=$(afr_get_changelog_xattr $obs_path_0 $tgt_xattr_2)
+EXPECT "0x000000030000000000000000" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_1 $tgt_xattr_0)
+EXPECT "0x000000000000000000000000|^\$" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_1 $tgt_xattr_1)
+EXPECT "0x000000000000000000000000|^\$" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_1 $tgt_xattr_2)
+EXPECT "0x000000010000000000000000" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_2 $tgt_xattr_0)
+EXPECT "0x000000000000000000000000|^\$" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_2 $tgt_xattr_1)
+EXPECT "0x000000000000000000000000|^\$" echo $actual
+
+actual=$(afr_get_changelog_xattr $obs_path_2 $tgt_xattr_2)
+EXPECT "0x000000000000000000000000|^\$" echo $actual
+
+if [ "$EXIT_EARLY" = "1" ]; then
+ exit 0;
+fi
+
+## Finish up
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-821056.t b/tests/bugs/replicate/bug-821056.t
new file mode 100644
index 00000000000..81186d86309
--- /dev/null
+++ b/tests/bugs/replicate/bug-821056.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind on
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+touch $M0/a
+
+#Open file with fd as 5
+exec 5>$M0/a
+realpath=$(gf_get_gfid_backend_file_path $B0/${V0}0 "a")
+
+kill_brick $V0 $H0 $B0/${V0}0
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+EXPECT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpath"
+
+kill_brick $V0 $H0 $B0/${V0}0
+TEST gf_rm_file_and_gfid_link $B0/${V0}0 "a"
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+ls -l $M0/a > /dev/null 2>&1 #Make sure the file is re-created
+EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpath"
+EXPECT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 $B0/${V0}0/a
+
+for i in {1..1024}; do
+ echo "open sesame" >&5
+done
+
+EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpath"
+#close the fd
+exec 5>&-
+
+#Check that anon-fd based file is not leaking.
+EXPECT_WITHIN $REOPEN_TIMEOUT "N" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpath"
+cleanup;
diff --git a/tests/bugs/replicate/bug-830665.t b/tests/bugs/replicate/bug-830665.t
new file mode 100755
index 00000000000..68180424803
--- /dev/null
+++ b/tests/bugs/replicate/bug-830665.t
@@ -0,0 +1,127 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+function recreate {
+ rm -rf $1 && mkdir -p $1
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Start and create a volume
+recreate ${B0}/${V0}-0
+recreate ${B0}/${V0}-1
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1}
+TEST $CLI volume set $V0 nfs.disable false
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+#EXPECT_WITHIN fails the test if the command it executes fails. This function
+#returns "" when the file doesn't exist
+function friendly_cat {
+ if [ ! -f $1 ];
+ then
+ echo "";
+ else
+ cat $1;
+ fi
+}
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Make sure stat-prefetch doesn't prevent self-heal checks.
+TEST $CLI volume set $V0 performance.stat-prefetch off;
+
+## Make sure automatic self-heal doesn't perturb our results.
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+## Mount NFS
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+## Create some files and directories
+echo "test_data" > $N0/a_file;
+mkdir $N0/a_dir;
+echo "more_test_data" > $N0/a_dir/another_file;
+
+## Unmount and stop the volume.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+TEST $CLI volume stop $V0;
+
+# Recreate the brick. Note that because of http://review.gluster.org/#change,4202
+# we need to preserve and restore the volume ID or else the brick (and thus the
+# entire not-very-HA-any-more volume) won't start. When that bug is fixed, we can
+# remove the [gs]etxattr calls.
+volid=$(getfattr -e hex -n trusted.glusterfs.volume-id $B0/${V0}-0 2> /dev/null \
+ | grep = | cut -d= -f2)
+rm -rf $B0/${V0}-0;
+mkdir $B0/${V0}-0;
+#Ideally, disk replacement is done using reset-brick or replace-brick gluster CLI
+#which will create .glusterfs folder.
+mkdir $B0/${V0}-0/.glusterfs && chmod 600 $B0/${V0}-0/.glusterfs
+
+setfattr -n trusted.glusterfs.volume-id -v $volid $B0/${V0}-0
+
+## Restart and remount. Note that we use actimeo=0 so that the stat calls
+## we need for self-heal don't get blocked by the NFS client.
+TEST $CLI volume start $V0;
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock,actimeo=0;
+
+## The Linux NFS client has a really charming habit of caching stuff right
+## after mount, even though we set actimeo=0 above. Life would be much easier
+## if NFS developers cared as much about correctness as they do about shaving
+## a few seconds off of benchmarks.
+ls -l $N0 &> /dev/null;
+sleep 5;
+
+## Force entry self-heal.
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST gluster volume heal $V0 full
+#ls -lR $N0 > /dev/null;
+
+## Do NOT check through the NFS mount here. That will force a new self-heal
+## check, but we want to test whether self-heal already happened.
+
+## Make sure everything's in order on the recreated brick.
+EXPECT_WITHIN $HEAL_TIMEOUT 'test_data' friendly_cat $B0/${V0}-0/a_file;
+EXPECT_WITHIN $HEAL_TIMEOUT 'more_test_data' friendly_cat $B0/${V0}-0/a_dir/another_file;
+
+if [ "$EXIT_EARLY" = "1" ]; then
+ exit 0;
+fi
+
+## Finish up
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-859581.t b/tests/bugs/replicate/bug-859581.t
new file mode 100755
index 00000000000..d8b45a257a1
--- /dev/null
+++ b/tests/bugs/replicate/bug-859581.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+EXPECT 'Created' volinfo_field $V0 'Status';
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs --direct-io-mode=yes --use-readdirp=no --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+mkdir -p $M0/dir1/dir2
+
+TEST rm -f $(gf_get_gfid_backend_file_path $B0/${V0}1 "dir1")
+TEST rmdir $B0/${V0}1/dir1/dir2
+
+TEST stat $M0/dir1/dir2
+
+TEST [ -d $B0/${V0}1/dir1/dir2 ]
+TEST [ ! -d $(gf_get_gfid_backend_file_path $B0/${V0}1 "dir1") ]
+
+# Stop the volume to flush caches and force symlink recreation
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status';
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+# Till now, protocol/server was not doing inode linking as part of readdirp.
+# But pas part of user servicable snapshots patcth, changes to do inode linking
+# in protocol/server in readdirp, were introduced. So now to make sure
+# the gfid handle of dir1 is healed, explicit lookup has to be sent on it.
+# Otherwise, whenever ls -l is done just on the mount point $M0, lookup on the
+# entries received as part of readdirp, is not sent, because the inodes for
+# those entries were linked as part of readdirp itself. i.e instead of doing
+# "ls -l $M0", it has to be the below command.
+ls -l $M0/dir1;
+
+TEST [ -h $(gf_get_gfid_backend_file_path $B0/${V0}1 "dir1") ]
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
+
diff --git a/tests/bugs/replicate/bug-865825.t b/tests/bugs/replicate/bug-865825.t
new file mode 100755
index 00000000000..ffb2e0f6437
--- /dev/null
+++ b/tests/bugs/replicate/bug-865825.t
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+## Start and create a volume
+mkdir -p ${B0}/${V0}-0
+mkdir -p ${B0}/${V0}-1
+mkdir -p ${B0}/${V0}-2
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}-{0,1,2}
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+## Verify volume is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Make sure io-cache and write-behind don't interfere.
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 performance.io-cache off;
+TEST $CLI volume set $V0 performance.quick-read off;
+TEST $CLI volume set $V0 performance.write-behind off;
+TEST $CLI volume set $V0 performance.stat-prefetch off
+
+## Make sure automatic self-heal doesn't perturb our results.
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount native
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+
+## Create a file with some recognizable contents.
+echo "test_data" > $M0/a_file;
+
+## Unmount.
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+## Mess with the flags as though brick-0 accuses brick-2 while brick-1 is
+## missing its brick-2 changelog altogether.
+value=0x000000010000000000000000
+setfattr -n trusted.afr.${V0}-client-2 -v $value $B0/${V0}-0/a_file
+setfattr -x trusted.afr.${V0}-client-2 $B0/${V0}-1/a_file
+echo "wrong_data" > $B0/${V0}-2/a_file
+
+gluster volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+gluster volume heal $V0 full
+
+## Make sure brick 2 now has the correct contents.
+EXPECT_WITHIN $HEAL_TIMEOUT "test_data" cat $B0/${V0}-2/a_file
+
+if [ "$EXIT_EARLY" = "1" ]; then
+ exit 0;
+fi
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-880898.t b/tests/bugs/replicate/bug-880898.t
new file mode 100644
index 00000000000..660d34ca25f
--- /dev/null
+++ b/tests/bugs/replicate/bug-880898.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/brick2
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+pkill glusterfs
+uuid=""
+for line in $(cat $GLUSTERD_WORKDIR/glusterd.info)
+do
+ if [[ $line == UUID* ]]
+ then
+ uuid=`echo $line | sed -r 's/^.{5}//'`
+ fi
+done
+
+#Command execution should fail reporting that the bricks are not running.
+TEST ! $CLI volume heal $V0 info
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-884328.t b/tests/bugs/replicate/bug-884328.t
new file mode 100644
index 00000000000..acc8e542240
--- /dev/null
+++ b/tests/bugs/replicate/bug-884328.t
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST check_option_help_presence "cluster.quorum-type"
+TEST check_option_help_presence "cluster.quorum-count"
+cleanup;
diff --git a/tests/bugs/replicate/bug-886998.t b/tests/bugs/replicate/bug-886998.t
new file mode 100644
index 00000000000..bcac235ff09
--- /dev/null
+++ b/tests/bugs/replicate/bug-886998.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+# This tests that the replicate trash directory(.landfill) has following
+# properties.
+# Note: This is to have backward compatibility with 3.3 glusterfs
+# In the latest releases this dir is present inside .glusterfs of brick.
+# 1) lookup of trash dir fails
+# 2) readdir does not show this directory
+# 3) Self-heal does not do any self-heal of these directories.
+gfid1="0xc2e75dde97f346e7842d1076a8e699f8"
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0 --direct-io-mode=enable
+
+TEST mkdir $B0/${V0}1/.landfill
+TEST setfattr -n trusted.gfid -v $gfid1 $B0/${V0}1/.landfill
+TEST mkdir $B0/${V0}0/.landfill
+TEST setfattr -n trusted.gfid -v $gfid1 $B0/${V0}0/.landfill
+
+TEST ! stat $M0/.landfill
+EXPECT "" echo $(ls -a $M0 | grep ".landfill")
+
+TEST rmdir $B0/${V0}0/.landfill
+#Force a conservative merge and it should not create .landfill
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000000 $B0/${V0}0/
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/
+
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000001 $B0/${V0}1/
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/
+
+EXPECT "" echo $(ls -a $M0 | grep ".landfill")
+TEST ! stat $B0/${V0}0/.landfill
+TEST stat $B0/${V0}1/.landfill
+
+#TEST that the dir is not deleted even when xattrs suggest to delete
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000000 $B0/${V0}0/
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000001 $B0/${V0}0/
+
+TEST setfattr -n trusted.afr.$V0-client-0 -v 0x000000000000000000000000 $B0/${V0}1/
+TEST setfattr -n trusted.afr.$V0-client-1 -v 0x000000000000000000000000 $B0/${V0}1/
+
+EXPECT "" echo $(ls -a $M0 | grep ".landfill")
+TEST ! stat $B0/${V0}0/.landfill
+TEST stat $B0/${V0}1/.landfill
+cleanup;
diff --git a/tests/bugs/replicate/bug-888174.t b/tests/bugs/replicate/bug-888174.t
new file mode 100644
index 00000000000..8c70265513d
--- /dev/null
+++ b/tests/bugs/replicate/bug-888174.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This tests if flush, fsync wakes up the delayed post-op or not.
+#If it is not woken up, INODELK from the next command waits
+#for post-op-delay secs. There would be pending changelog even after the command
+#completes.
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/r2_0 $H0:$B0/r2_1
+
+TEST $CLI volume set $V0 cluster.eager-lock on
+
+TEST $CLI volume set $V0 performance.flush-behind off
+EXPECT "off" volume_option $V0 performance.flush-behind
+
+TEST $CLI volume set $V0 cluster.post-op-delay-secs 3
+EXPECT "3" volume_option $V0 cluster.post-op-delay-secs
+
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0
+
+#Check that INODELK MAX latency is not in the order of seconds
+TEST gluster volume profile $V0 start
+for i in {1..5}
+do
+ echo hi > $M0/a
+done
+#Test if the MAX INODELK fop latency is of the order of seconds.
+inodelk_max_latency=$($CLI volume profile $V0 info | grep INODELK | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+
+TEST [ -z $inodelk_max_latency ]
+
+TEST dd of=$M0/a if=/dev/urandom bs=1024k count=10 conv=fsync
+#Check for no trace of pending changelog. Flush should make sure of it.
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_0/a trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_1/a trusted.afr.dirty
+
+
+dd of=$M0/a if=/dev/urandom bs=1024k count=1024 2>/dev/null &
+p=$!
+#trigger graph switches, tests for fsync not leaving any pending flags
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+
+kill -TERM $p
+#wait for dd to exit
+wait > /dev/null 2>&1
+
+#Goal is to check if there is permanent FOOL changelog
+sleep 5
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_0/a trusted.afr.dirty
+EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/r2_1/a trusted.afr.dirty
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-913051.t b/tests/bugs/replicate/bug-913051.t
new file mode 100644
index 00000000000..6794995e6fe
--- /dev/null
+++ b/tests/bugs/replicate/bug-913051.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup;
+
+#Test that afr opens the file on the bricks that were offline at the time of
+# open after the brick comes online. This tests for writev, readv triggering
+# open-fd-fix in afr.
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume start $V0
+TEST $GFS --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 --direct-io-mode=enable $M0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+
+TEST mkdir $M0/dir
+TEST touch $M0/dir/a
+TEST touch $M0/dir/b
+echo abc > $M0/dir/b
+
+TEST wfd=`fd_available`
+TEST fd_open $wfd "w" $M0/dir/a
+TEST rfd=`fd_available`
+TEST fd_open $rfd "r" $M0/dir/b
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+
+#attempt self-heal so that the files are created on brick-0
+
+TEST dd if=$M0/dir/a of=/dev/null bs=1024k
+TEST dd if=$M0/dir/b of=/dev/null bs=1024k
+
+#trigger writev for attempting open-fd-fix in afr
+TEST fd_write $wfd "open sesame"
+
+#trigger readv for attempting open-fd-fix in afr
+TEST fd_cat $rfd
+
+EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpatha"
+EXPECT_WITHIN $REOPEN_TIMEOUT "Y" gf_check_file_opened_in_brick $V0 $H0 $B0/${V0}0 "$realpathb"
+
+TEST fd_close $wfd
+TEST fd_close $rfd
+cleanup;
diff --git a/tests/bugs/replicate/bug-916226.t b/tests/bugs/replicate/bug-916226.t
new file mode 100644
index 00000000000..893905f9a47
--- /dev/null
+++ b/tests/bugs/replicate/bug-916226.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1 $H0:$B0/${V0}2 $H0:$B0/${V0}3
+TEST $CLI volume set $V0 cluster.eager-lock on
+TEST $CLI volume start $V0
+
+## Mount FUSE
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST mkdir $M0/dir{1..10};
+TEST touch $M0/dir{1..10}/files{1..10};
+
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}4 $H0:/$B0/${V0}5
+
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-918437-sh-mtime.t b/tests/bugs/replicate/bug-918437-sh-mtime.t
new file mode 100644
index 00000000000..6a194b14a9b
--- /dev/null
+++ b/tests/bugs/replicate/bug-918437-sh-mtime.t
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+function get_mtime {
+ local f=$1
+ stat $f | grep Modify | awk '{print $2 $3}' | cut -f1 -d'.'
+}
+
+function file_exists {
+ if [ -f $1 ]; then echo "Y"; else echo "N"; fi
+}
+cleanup;
+
+## Tests if mtime is correct after self-heal.
+TEST glusterd
+TEST pidof glusterd
+TEST mkdir -p $B0/gfs0/brick0{1,2}
+TEST $CLI volume create $V0 replica 2 transport tcp $H0:$B0/gfs0/brick01 $H0:$B0/gfs0/brick02
+TEST $CLI volume set $V0 nfs.disable on
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 cluster.background-self-heal-count 0
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --direct-io-mode=enable
+# file 'a' is healed from brick02 to brick01 where as file 'b' is healed from
+# brick01 to brick02
+
+TEST cp -p /etc/passwd $M0/a
+TEST cp -p /etc/passwd $M0/b
+
+#Store mtimes before self-heals
+TEST modify_atstamp=$(get_mtime $B0/gfs0/brick02/a)
+TEST modify_btstamp=$(get_mtime $B0/gfs0/brick02/b)
+
+TEST $CLI volume stop $V0
+TEST gf_rm_file_and_gfid_link $B0/gfs0/brick01 a
+TEST gf_rm_file_and_gfid_link $B0/gfs0/brick02 b
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
+
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+#TODO remove these 2 lines once heal-full is fixed in v2.
+TEST stat $M0/a
+TEST stat $M0/b
+
+TEST gluster volume heal $V0 full
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" file_exists $B0/gfs0/brick01/a
+EXPECT_WITHIN $HEAL_TIMEOUT "Y" file_exists $B0/gfs0/brick02/b
+EXPECT_WITHIN $HEAL_TIMEOUT 0 get_pending_heal_count $V0
+
+size=`stat -c '%s' /etc/passwd`
+EXPECT $size stat -c '%s' $B0/gfs0/brick01/a
+
+TEST modify_atstamp1=$(get_mtime $B0/gfs0/brick01/a)
+TEST modify_atstamp2=$(get_mtime $B0/gfs0/brick02/a)
+EXPECT $modify_atstamp echo $modify_atstamp1
+EXPECT $modify_atstamp echo $modify_atstamp2
+
+TEST modify_btstamp1=$(get_mtime $B0/gfs0/brick01/b)
+TEST modify_btstamp2=$(get_mtime $B0/gfs0/brick02/b)
+EXPECT $modify_btstamp echo $modify_btstamp1
+EXPECT $modify_btstamp echo $modify_btstamp2
+cleanup;
diff --git a/tests/bugs/replicate/bug-921231.t b/tests/bugs/replicate/bug-921231.t
new file mode 100755
index 00000000000..81504612f63
--- /dev/null
+++ b/tests/bugs/replicate/bug-921231.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test writes to same file with 2 fds and tests that cluster.eager-lock is not
+# causing extra delay because of post-op-delay-secs
+cleanup;
+
+function write_to_file {
+ dd of=$M0/1 if=/dev/zero bs=1024k count=128 oflag=append 2>&1 >/dev/null
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+TEST $CLI volume set $V0 cluster.eager-lock on
+TEST $CLI volume set $V0 post-op-delay-secs 3
+TEST $CLI volume set $V0 client-log-level DEBUG
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST $CLI volume set $V0 ensure-durability off
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+write_to_file &
+write_to_file &
+wait
+#Test if the MAX [F]INODELK fop latency is of the order of seconds.
+inodelk_max_latency=$($CLI volume profile $V0 info | grep INODELK | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+TEST [ -z $inodelk_max_latency ]
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-957877.t b/tests/bugs/replicate/bug-957877.t
new file mode 100644
index 00000000000..bcce7e3c9e7
--- /dev/null
+++ b/tests/bugs/replicate/bug-957877.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/f1
+TEST setfattr -n "user.foo" -v "test" $M0/f1
+
+BRICK=$B0"/${V0}1"
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+# Wait for self-heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT '0' count_sh_entries $BRICK;
+
+TEST getfattr -n "user.foo" $B0/${V0}0/f1;
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-976800.t b/tests/bugs/replicate/bug-976800.t
new file mode 100644
index 00000000000..27f8b27619e
--- /dev/null
+++ b/tests/bugs/replicate/bug-976800.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test checks if there are any open fds on the brick
+# even after the file is closed on the mount. This particular
+# test tests dd with "fsync" to check afr's fsync codepath
+cleanup;
+
+function is_fd_open {
+ local v=$1
+ local h=$2
+ local b=$3
+ local bpid=$(get_brick_pid $v $h $b)
+ ls -l /proc/$bpid/fd | grep -w "\-> $b/1"
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 ensure-durability off
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume set $V0 flush-behind off
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST dd of=$M0/1 if=/dev/zero bs=1k count=1 conv=fsync
+TEST ! is_fd_open $V0 $H0 $B0/${V0}0
+cleanup;
diff --git a/tests/bugs/replicate/bug-977797.t b/tests/bugs/replicate/bug-977797.t
new file mode 100755
index 00000000000..9a8f36c956c
--- /dev/null
+++ b/tests/bugs/replicate/bug-977797.t
@@ -0,0 +1,96 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 open-behind off
+TEST $CLI volume set $V0 quick-read off
+TEST $CLI volume set $V0 read-ahead off
+TEST $CLI volume set $V0 write-behind off
+TEST $CLI volume set $V0 io-cache off
+TEST $CLI volume set $V0 cluster.data-self-heal on
+TEST $CLI volume set $V0 cluster.metadata-self-heal on
+TEST $CLI volume set $V0 cluster.entry-self-heal on
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+
+TEST mkdir -p $M0/a
+TEST `echo "GLUSTERFS" > $M0/a/file`
+
+TEST kill_brick $V0 $H0 $B0/$V0"1"
+
+TEST chown root $M0/a
+TEST chown root $M0/a/file
+TEST `echo "GLUSTER-FILE-SYSTEM" > $M0/a/file`
+TEST mkdir $M0/a/b
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0;
+
+
+
+TEST kill_brick $V0 $H0 $B0/$V0"2"
+
+TEST chmod 757 $M0/a
+TEST chmod 757 $M0/a/file
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1;
+
+#Trigger entry heal of $M0/a
+getfattr -n user.nosuchattr $M0/a
+dd if=$M0/a/file of=/dev/null bs=1024k
+#read fails, but heal is triggered.
+TEST [ $? -ne 0 ]
+
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
+afr_get_specific_changelog_xattr $B0/$V0"1"/a/file trusted.afr.$V0-client-0 "data"
+
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
+afr_get_specific_changelog_xattr $B0/$V0"1"/a/file trusted.afr.$V0-client-1 "data"
+
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
+afr_get_specific_changelog_xattr $B0/$V0"2"/a/file trusted.afr.$V0-client-0 "data"
+
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
+afr_get_specific_changelog_xattr $B0/$V0"2"/a/file trusted.afr.$V0-client-1 "data"
+
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
+afr_get_specific_changelog_xattr $B0/$V0"1"/a trusted.afr.$V0-client-0 "entry"
+
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
+afr_get_specific_changelog_xattr $B0/$V0"1"/a trusted.afr.$V0-client-1 "entry"
+
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
+afr_get_specific_changelog_xattr $B0/$V0"2"/a trusted.afr.$V0-client-0 "entry"
+
+EXPECT_WITHIN $HEAL_TIMEOUT "00000000" \
+afr_get_specific_changelog_xattr $B0/$V0"2"/a trusted.afr.$V0-client-1 "entry"
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
diff --git a/tests/bugs/replicate/bug-978794.t b/tests/bugs/replicate/bug-978794.t
new file mode 100644
index 00000000000..8e43e74bf79
--- /dev/null
+++ b/tests/bugs/replicate/bug-978794.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+
+# This test opens 100 fds and triggers graph switches to check if fsync
+# as part of graph-switch causes crash or not.
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST touch $M0/{1..100}
+for i in {1..100}; do fd[$i]=`fd_available`; fd_open ${fd[$i]} 'w' $M0/$i; done
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{2,3}
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+TEST cat $M0/{1..100}
+for i in {1..100}; do fd_write ${fd[$i]} 'abc'; done
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}{4,5}
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "completed" rebalance_status_field $V0
+for i in {1..100}; do fd_write ${fd[$i]} 'abc'; done
+TEST cat $M0/{1..100}
+cleanup
diff --git a/tests/bugs/replicate/bug-979365.t b/tests/bugs/replicate/bug-979365.t
new file mode 100755
index 00000000000..c09c7d51772
--- /dev/null
+++ b/tests/bugs/replicate/bug-979365.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This script checks that ensure-durability option enables/disables afr
+#sending fsyncs
+cleanup;
+
+function num_fsyncs {
+ $CLI volume profile $V0 info | grep -w FSYNC | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 ensure-durability on
+TEST $CLI volume set $V0 cluster.eager-lock off
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST dd of=$M0/a if=/dev/zero bs=1024k count=10
+#fsyncs take a while to complete.
+sleep 5
+
+# There can be zero or more fsyncs, depending on the order
+# in which the writes reached the server, in turn deciding
+# whether they were treated as "appending" writes or not.
+
+TEST [[ $(num_fsyncs) -ge 0 ]]
+#Stop the volume to erase the profile info of old operations
+TEST $CLI volume profile $V0 stop
+TEST $CLI volume stop $V0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+#Disable ensure-durability now to disable fsyncs in afr.
+TEST $CLI volume set $V0 ensure-durability off
+TEST $CLI volume start $V0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST $CLI volume profile $V0 start
+TEST dd of=$M0/a if=/dev/zero bs=1024k count=10
+#fsyncs take a while to complete.
+sleep 5
+TEST [[ $(num_fsyncs) -eq 0 ]]
+
+cleanup
diff --git a/tests/bugs/replicate/bug-986905.t b/tests/bugs/replicate/bug-986905.t
new file mode 100755
index 00000000000..f4f7386ebc4
--- /dev/null
+++ b/tests/bugs/replicate/bug-986905.t
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#This script checks if hardlinks that are created while a brick is down are
+#healed properly.
+
+cleanup;
+function get_inum {
+ ls -i $1 | awk '{print $1}'
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST touch $M0/a
+TEST ln $M0/a $M0/link_a
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
+TEST ls -l $M0
+inum=$(get_inum $B0/${V0}0/a)
+EXPECT "$inum" get_inum $B0/${V0}0/link_a
+cleanup
diff --git a/tests/bugs/replicate/issue-1254-prioritize-enospc.t b/tests/bugs/replicate/issue-1254-prioritize-enospc.t
new file mode 100644
index 00000000000..fab94b71b27
--- /dev/null
+++ b/tests/bugs/replicate/issue-1254-prioritize-enospc.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+function create_bricks {
+ TEST truncate -s 100M $B0/brick0
+ TEST truncate -s 100M $B0/brick1
+ TEST truncate -s 20M $B0/brick2
+ LO1=`SETUP_LOOP $B0/brick0`
+ TEST [ $? -eq 0 ]
+ TEST MKFS_LOOP $LO1
+ LO2=`SETUP_LOOP $B0/brick1`
+ TEST [ $? -eq 0 ]
+ TEST MKFS_LOOP $LO2
+ LO3=`SETUP_LOOP $B0/brick2`
+ TEST [ $? -eq 0 ]
+ TEST MKFS_LOOP $LO3
+ TEST mkdir -p $B0/${V0}0 $B0/${V0}1 $B0/${V0}2
+ TEST MOUNT_LOOP $LO1 $B0/${V0}0
+ TEST MOUNT_LOOP $LO2 $B0/${V0}1
+ TEST MOUNT_LOOP $LO3 $B0/${V0}2
+}
+
+function create_files {
+ local i=1
+ while (true)
+ do
+ touch $M0/file$i
+ if [ -e $B0/${V0}2/file$i ];
+ then
+ ((i++))
+ else
+ break
+ fi
+ done
+}
+
+TESTS_EXPECTED_IN_LOOP=13
+
+#Arbiter volume: Check for ENOSPC when arbiter brick becomes full#
+TEST glusterd
+create_bricks
+TEST $CLI volume create $V0 replica 3 arbiter 1 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+create_files
+TEST kill_brick $V0 $H0 $B0/${V0}1
+error1=$(touch $M0/file-1 2>&1)
+EXPECT "No space left on device" echo $error1
+error2=$(mkdir $M0/dir-1 2>&1)
+EXPECT "No space left on device" echo $error2
+error3=$((echo "Test" > $M0/file-3) 2>&1)
+EXPECT "No space left on device" echo $error3
+
+cleanup
+
+#Replica-3 volume: Check for ENOSPC when one of the brick becomes full#
+#Keeping the third brick of lower size to simulate disk full scenario#
+TEST glusterd
+create_bricks
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+create_files
+TEST kill_brick $V0 $H0 $B0/${V0}1
+error1=$(touch $M0/file-1 2>&1)
+EXPECT "No space left on device" echo $error1
+error2=$(mkdir $M0/dir-1 2>&1)
+EXPECT "No space left on device" echo $error2
+error3=$((cat /dev/zero > $M0/file1) 2>&1)
+EXPECT "No space left on device" echo $error3
+
+cleanup
diff --git a/tests/bugs/replicate/mdata-heal-no-xattrs.t b/tests/bugs/replicate/mdata-heal-no-xattrs.t
new file mode 100644
index 00000000000..d3b0c504c80
--- /dev/null
+++ b/tests/bugs/replicate/mdata-heal-no-xattrs.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2};
+TEST $CLI volume set $V0 cluster.self-heal-daemon off
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" afr_child_up_status $V0 2
+echo "Data">$M0/FILE
+ret=$?
+TEST [ $ret -eq 0 ]
+
+# Change permission on brick-0: simulates the case where there is metadata
+# mismatch but no pending xattrs. This brick will become the source for heal.
+TEST chmod +x $B0/$V0"0"/FILE
+
+# Add gfid to xattrop
+xattrop_b0=$(afr_get_index_path $B0/$V0"0")
+base_entry_b0=`ls $xattrop_b0`
+gfid_str_FILE=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $B0/$V0"0"/FILE))
+TEST ln $xattrop_b0/$base_entry_b0 $xattrop_b0/$gfid_str_FILE
+EXPECT_WITHIN $HEAL_TIMEOUT "^1$" get_pending_heal_count $V0
+
+TEST $CLI volume set $V0 cluster.self-heal-daemon on
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 2
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "^0$" get_pending_heal_count $V0
+
+# Brick-0 should contain xattrs blaming other 2 bricks.
+# The values will be zero because heal is over.
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-1 $B0/${V0}0/FILE
+EXPECT "000000000000000000000000" get_hex_xattr trusted.afr.$V0-client-2 $B0/${V0}0/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}0/FILE
+
+# Brick-1 and Brick-2 must not contain any afr xattrs.
+TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}1/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-1 $B0/${V0}1/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}1/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-0 $B0/${V0}2/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-1 $B0/${V0}2/FILE
+TEST ! getfattr -n trusted.afr.$V0-client-2 $B0/${V0}2/FILE
+
+# check permission bits.
+EXPECT '755' stat -c %a $B0/${V0}0/FILE
+EXPECT '755' stat -c %a $B0/${V0}1/FILE
+EXPECT '755' stat -c %a $B0/${V0}2/FILE
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+cleanup;
diff --git a/tests/bugs/replicate/ta-inode-refresh-read.t b/tests/bugs/replicate/ta-inode-refresh-read.t
new file mode 100644
index 00000000000..6dd6ff7f163
--- /dev/null
+++ b/tests/bugs/replicate/ta-inode-refresh-read.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Test read transaction inode refresh logic for thin-arbiter.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../thin-arbiter.rc
+cleanup;
+TEST ta_create_brick_and_volfile brick0
+TEST ta_create_brick_and_volfile brick1
+TEST ta_create_ta_and_volfile ta
+TEST ta_start_brick_process brick0
+TEST ta_start_brick_process brick1
+TEST ta_start_ta_process ta
+
+TEST ta_create_mount_volfile brick0 brick1 ta
+# Set afr xlator options to choose brick0 as read-subvol.
+sed -i '/iam-self-heal-daemon/a \ option read-subvolume-index 0' $B0/mount.vol
+TEST [ $? -eq 0 ]
+sed -i '/iam-self-heal-daemon/a \ option choose-local false' $B0/mount.vol
+TEST [ $? -eq 0 ]
+
+TEST ta_start_mount_process $M0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" ta_up_status $V0 $M0 0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "trusted.afr.patchy-ta-2" ls $B0/ta
+
+TEST touch $M0/FILE
+TEST ls $B0/brick0/FILE
+TEST ls $B0/brick1/FILE
+TEST ! ls $B0/ta/FILE
+TEST setfattr -n user.name -v ravi $M0/FILE
+
+# Remove gfid hardlink from brick0 which is the read-subvol for FILE.
+# This triggers inode refresh up on a getfattr and eventually calls
+# afr_ta_read_txn(). Without this patch, afr_ta_read_txn() will again query
+# brick0 causing getfattr to fail.
+TEST rm -f $(gf_get_gfid_backend_file_path $B0/brick0 FILE)
+TEST getfattr -n user.name $M0/FILE
+
+cleanup;
diff --git a/tests/bugs/rpc/bug-1043886.t b/tests/bugs/rpc/bug-1043886.t
new file mode 100755
index 00000000000..c1ea7a71e8b
--- /dev/null
+++ b/tests/bugs/rpc/bug-1043886.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+## Mount FUSE with caching disabled
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0;
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+## Mount volume as NFS export
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+# just a random uid/gid
+uid=22162
+gid=5845
+
+mkdir $N0/other;
+chown $uid:$gid $N0/other;
+
+TEST $CLI volume set $V0 server.root-squash on;
+TEST $CLI volume set $V0 server.anonuid $uid;
+TEST $CLI volume set $V0 server.anongid $gid;
+
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+# create files and directories in the root of the glusterfs and nfs mount
+# which is owned by root and hence the right behavior is getting EACCESS
+# as the fops are executed as nfsnobody.
+touch $M0/file 2>/dev/null;
+TEST [ $? -ne 0 ]
+mkdir $M0/dir 2>/dev/null;
+TEST [ $? -ne 0 ]
+
+# Here files and directories should be getting created as other directory is owned
+# by tmp_user as server.anonuid and server.anongid have the value of tmp_user uid and gid
+TEST touch $M0/other/file 2>/dev/null;
+TEST [ "$(stat -c %u:%g $N0/other/file)" = "$uid:$gid" ];
+TEST mkdir $M0/other/dir 2>/dev/null;
+TEST [ "$(stat -c %u:%g $N0/other/dir)" = "$uid:$gid" ];
+
+## Before killing daemon to avoid deadlocks
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/rpc/bug-847624.t b/tests/bugs/rpc/bug-847624.t
new file mode 100755
index 00000000000..fe8fc982887
--- /dev/null
+++ b/tests/bugs/rpc/bug-847624.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup
+
+#1
+TEST glusterd
+TEST pidof glusterd
+#3
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 nfs.drc on
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+cd $N0
+#7
+TEST dbench -t 10 10
+TEST rm -rf $N0/*
+cd
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+#10
+TEST $CLI volume set $V0 nfs.drc-size 10000
+cleanup
diff --git a/tests/bugs/rpc/bug-884452.t b/tests/bugs/rpc/bug-884452.t
new file mode 100644
index 00000000000..c161a68190d
--- /dev/null
+++ b/tests/bugs/rpc/bug-884452.t
@@ -0,0 +1,47 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST touch $M0/{1..10000}
+
+RUN_LS_LOOP_FILE="$M0/run-ls-loop"
+function ls-loop
+{
+ while [ -f $RUN_LS_LOOP_FILE ]; do
+ ls -lR $M0 1>/dev/null 2>&1
+ done;
+}
+
+touch $RUN_LS_LOOP_FILE
+ls-loop &
+
+function vol-status-loop
+{
+ for i in {1..1000}; do
+ $CLI volume status $V0 clients >/dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ return 1
+ fi
+ done;
+
+ return 0
+}
+
+TEST vol-status-loop
+
+rm -f $RUN_LS_LOOP_FILE
+wait
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+cleanup;
diff --git a/tests/bugs/rpc/bug-921072.t b/tests/bugs/rpc/bug-921072.t
new file mode 100755
index 00000000000..ae7eb0101bc
--- /dev/null
+++ b/tests/bugs/rpc/bug-921072.t
@@ -0,0 +1,132 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+#1
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+TEST mount_nfs $H0:/$V0 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# based on ip addresses (1-4)
+# case 1: allow only localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-allow 127.0.0.1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST mount_nfs localhost:/$V0 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# case 2: allow only non-localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-allow 192.168.1.1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+#11
+TEST ! mount_nfs localhost:/$V0 $N0 nolock
+TEST $CLI volume reset $V0 force
+TEST $CLI volume set $V0 nfs.disable off
+# case 3: reject only localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 127.0.0.1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST ! mount_nfs localhost:/$V0 $N0 nolock
+
+# case 4: reject only non-localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 192.168.1.1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST mount_nfs localhost:/$V0 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+
+
+# NEED TO CHECK BOTH IP AND NAME BASED AUTH.
+# CASES WITH NFS.ADDR-NAMELOOKUP ON (5-12)
+TEST $CLI volume reset $V0 force
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 nfs.addr-namelookup on
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+#20
+TEST mount_nfs localhost:/$V0 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# case 5: allow only localhost
+TEST $CLI volume set $V0 nfs.rpc-auth-allow localhost
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST mount_nfs localhost:/$V0 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# case 6: allow only somehost
+TEST $CLI volume set $V0 nfs.rpc-auth-allow somehost
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST ! mount_nfs localhost:/$V0 $N0 nolock
+
+# case 7: reject only localhost
+TEST $CLI volume reset $V0 force
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 nfs.addr-namelookup on
+TEST $CLI volume set $V0 nfs.rpc-auth-reject localhost
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+#30
+TEST ! mount_nfs localhost:/$V0 $N0 nolock
+
+# case 8: reject only somehost
+TEST $CLI volume set $V0 nfs.rpc-auth-reject somehost
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST mount_nfs localhost:/$V0 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# based on ip addresses: repeat of cases 1-4
+# case 9: allow only localhost ip
+TEST $CLI volume reset $V0 force
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 nfs.addr-namelookup on
+TEST $CLI volume set $V0 nfs.rpc-auth-allow 127.0.0.1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST mount_nfs localhost:/$V0 $N0 nolock
+TEST mkdir -p $N0/subdir
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+# case 10: allow a non-localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-allow 192.168.1.1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+#41
+TEST ! mount_nfs localhost:/$V0 $N0 nolock
+
+# case 11: reject only localhost ip
+TEST $CLI volume reset $V0 force
+TEST $CLI volume set $V0 nfs.disable off
+TEST $CLI volume set $V0 nfs.addr-namelookup on
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 127.0.0.1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST ! mount_nfs localhost:/$V0 $N0 nolock
+TEST ! mount_nfs localhost:/$V0/subdir $N0 nolock
+
+# case 12: reject only non-localhost ip
+TEST $CLI volume set $V0 nfs.rpc-auth-reject 192.168.1.1
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT 1 is_nfs_export_available
+
+TEST mount_nfs localhost:/$V0 $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST mount_nfs localhost:/$V0/subdir $N0 nolock
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $N0
+
+TEST $CLI volume stop --mode=script $V0
+#52
+TEST $CLI volume delete --mode=script $V0
+cleanup
diff --git a/tests/bugs/rpc/bug-954057.t b/tests/bugs/rpc/bug-954057.t
new file mode 100755
index 00000000000..40acdc2fdc7
--- /dev/null
+++ b/tests/bugs/rpc/bug-954057.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This script checks if use-readdirp option works as accepted in mount options
+
+# Note on re-reading $M0/new after enabling root-squash:
+# Since we have readen it once, the file is present in various caches.
+# In order to actually fail on second attempt we must:
+# 1) drop kernel cache
+# 2) make sure FUSE does not cache the entry. This is also
+# in the kernel, but not flushed by a failed umount.
+# Using $GFS enforces this because it sets --entry-timeout=0
+# 3) make sure reading new permissins does not produce stale
+# information from glusterfs metadata cache. Setting volume
+# option performance.stat-prefetch off enforces that.
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+TEST mkdir $M0/nobody
+grep nfsnobody /etc/passwd > /dev/null
+if [ $? -eq 1 ]; then
+usr=nobody
+grp=nobody
+else
+usr=nfsnobody
+grp=nfsnobody
+fi
+TEST chown $usr:$grp $M0/nobody
+TEST `echo "file" >> $M0/file`
+TEST cp $M0/file $M0/new
+TEST chmod 700 $M0/new
+TEST cat $M0/new
+
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 server.root-squash enable
+drop_cache $M0
+TEST ! mkdir $M0/other
+TEST mkdir $M0/nobody/other
+TEST cat $M0/file
+TEST ! cat $M0/new
+TEST `echo "nobody" >> $M0/nobody/file`
+
+#mount the client without root-squashing
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 --no-root-squash=yes $M1
+TEST mkdir $M1/m1_dir
+TEST `echo "file" >> $M1/m1_file`
+TEST cp $M0/file $M1/new
+TEST chmod 700 $M1/new
+TEST cat $M1/new
+
+TEST $CLI volume set $V0 server.root-squash disable
+TEST mkdir $M0/other
+TEST cat $M0/new
+
+cleanup
diff --git a/tests/bugs/shard/bug-1245547.t b/tests/bugs/shard/bug-1245547.t
new file mode 100644
index 00000000000..3c46785d10f
--- /dev/null
+++ b/tests/bugs/shard/bug-1245547.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+#Create a file.
+TEST touch $M0/foo
+#Write some data into it.
+TEST `echo "abc" > $M0/foo`
+
+#This should ensure /.shard is created on the bricks.
+TEST stat $B0/${V0}0/.shard
+TEST stat $B0/${V0}1/.shard
+
+#Create a file 'bar' with holes.
+TEST touch $M0/bar
+TEST truncate -s 10G $M0/bar
+#Unlink on such a file should succeed.
+TEST unlink $M0/bar
+
+#Create a file 'baz' with holes.
+TEST touch $M0/baz
+TEST truncate -s 10G $M0/baz
+#Rename with a sharded existing dest that has holes must succeed.
+TEST mv -f $M0/foo $M0/baz
+
+cleanup
diff --git a/tests/bugs/shard/bug-1248887.t b/tests/bugs/shard/bug-1248887.t
new file mode 100644
index 00000000000..2c51f7ce0e8
--- /dev/null
+++ b/tests/bugs/shard/bug-1248887.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+#Create a file.
+TEST touch $M0/foo
+#Write some data into it.
+TEST `echo "abc" > $M0/foo`
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/foo
+EXPECT "0000000000000004000000000000000000000000000000010000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/foo
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/foo
+EXPECT "0000000000000004000000000000000000000000000000010000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/foo
+TEST kill_brick $V0 $H0 $B0/${V0}0
+TEST `echo "abc" >> $M0/foo`
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/foo
+EXPECT "0000000000000008000000000000000000000000000000010000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/foo
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}0/foo
+EXPECT "0000000000000008000000000000000000000000000000010000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}0/foo
+EXPECT "0000000000400000" get_hex_xattr trusted.glusterfs.shard.block-size $B0/${V0}1/foo
+EXPECT "0000000000000008000000000000000000000000000000010000000000000000" get_hex_xattr trusted.glusterfs.shard.file-size $B0/${V0}1/foo
+
+cleanup;
diff --git a/tests/bugs/shard/bug-1250855.t b/tests/bugs/shard/bug-1250855.t
new file mode 100644
index 00000000000..b8bc3b42513
--- /dev/null
+++ b/tests/bugs/shard/bug-1250855.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=40
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST mkdir $M0/dir
+
+for i in {1..20}; do
+ TEST_IN_LOOP touch $M0/dir/$i;
+done
+
+TEST $CLI volume set $V0 features.shard on
+
+TEST ls $M0
+TEST ls $M0/dir
+
+for i in {1..10}; do
+ TEST_IN_LOOP mv $M0/dir/$i $M0/dir/$i-sharded;
+done
+
+for i in {11..20}; do
+ TEST_IN_LOOP unlink $M0/dir/$i;
+done
+
+cleanup;
diff --git a/tests/bugs/shard/bug-1251824.t b/tests/bugs/shard/bug-1251824.t
new file mode 100644
index 00000000000..d81685d01de
--- /dev/null
+++ b/tests/bugs/shard/bug-1251824.t
@@ -0,0 +1,109 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../common-utils.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST useradd -M test_user 2>/dev/null
+
+# Create 3 files as root.
+TEST touch $M0/foo
+TEST touch $M0/bar
+TEST touch $M0/baz
+TEST touch $M0/qux
+TEST mkdir $M0/dir
+
+# Change ownership to non-root on foo and bar.
+TEST chown test_user:test_user $M0/foo
+TEST chown test_user:test_user $M0/bar
+
+# Write 6M of data on foo as non-root, 2M overflowing into block-1.
+TEST run_cmd_as_user test_user "dd if=/dev/zero of=$M0/foo bs=1M count=6"
+
+# Ensure owner and group are root on the block-1 shard.
+gfid_foo=$(get_gfid_string $M0/foo)
+
+EXPECT "root" echo `find $B0 -name $gfid_foo.1 | xargs stat -c %U`
+EXPECT "root" echo `find $B0 -name $gfid_foo.1 | xargs stat -c %G`
+
+#Ensure /.shard is owned by root.
+EXPECT "root" echo `find $B0/${V0}0 -name .shard | xargs stat -c %U`
+EXPECT "root" echo `find $B0/${V0}0 -name .shard | xargs stat -c %G`
+EXPECT "root" echo `find $B0/${V0}1 -name .shard | xargs stat -c %U`
+EXPECT "root" echo `find $B0/${V0}1 -name .shard | xargs stat -c %G`
+EXPECT "root" echo `find $B0/${V0}2 -name .shard | xargs stat -c %U`
+EXPECT "root" echo `find $B0/${V0}2 -name .shard | xargs stat -c %G`
+EXPECT "root" echo `find $B0/${V0}3 -name .shard | xargs stat -c %U`
+EXPECT "root" echo `find $B0/${V0}3 -name .shard | xargs stat -c %G`
+
+# Write 6M of data on bar as root.
+TEST dd if=/dev/zero of=$M0/bar bs=1M count=6
+
+# Ensure owner and group are root on the block-1 shard.
+gfid_bar=$(get_gfid_string $M0/bar)
+
+EXPECT "root" echo `find $B0 -name $gfid_bar.1 | xargs stat -c %U`
+EXPECT "root" echo `find $B0 -name $gfid_bar.1 | xargs stat -c %G`
+
+# Write 6M of data on baz as root.
+TEST dd if=/dev/zero of=$M0/baz bs=1M count=6
+
+gfid_baz=$(get_gfid_string $M0/baz)
+
+# Ensure owner and group are root on the block-1 shard.
+EXPECT "root" echo `find $B0 -name $gfid_baz.1 | xargs stat -c %U`
+EXPECT "root" echo `find $B0 -name $gfid_baz.1 | xargs stat -c %G`
+
+# Test to ensure unlink from an unauthorized user does not lead to only
+# the shards under /.shard getting unlinked while that on the base file fails
+# with EPERM/ACCES.
+
+TEST ! run_cmd_as_user test_user "unlink $M0/baz"
+TEST find $B0/*/.shard/$gfid_baz.1
+
+# Test to ensure rename of a file where the dest file exists and is sharded,
+# from an unauthorized user does not lead to only the shards under /.shard
+# getting unlinked while that on the base file fails with EPERM/ACCES.
+
+TEST ! run_cmd_as_user test_user "mv -f $M0/qux $M0/baz"
+TEST find $B0/*/.shard/$gfid_baz.1
+TEST stat $M0/qux
+
+# Shard translator executes steps in the following order while doing a truncate
+# to a lower size:
+# 1) unlinking shards under /.shard first with frame->root->{uid,gid} being 0,
+# 2) truncate the original file by the right amount.
+# The following two tests are towards ensuring that truncate attempt from an
+# unauthorised user doesn't result in only the shards under /.shard getting
+# removed (since they're being performed as root) while step 2) above fails,
+# leaving the file in an inconsistent state.
+
+TEST ! run_cmd_as_user test_user "truncate -s 1M $M0/baz"
+TEST find $B0/*/.shard/$gfid_baz.1
+
+# Perform a cp as non-root user. This should trigger readv() which will trigger
+# reads on first shard of "foo" under /.shard, and this must not fail if shard
+# translator correctly sets frame->root->uid,gid to 0 before reading off the
+# first shard, since it's owned by root.
+TEST chown test_user:test_user $M0/dir
+TEST run_cmd_as_user test_user "cp $M0/foo $M0/dir/quux"
+
+md5sum_foo=$(md5sum $M0/foo | awk '{print $1}')
+EXPECT "$md5sum_foo" echo `md5sum $M0/dir/quux | awk '{print $1}'`
+
+userdel test_user
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/shard/bug-1256580.t b/tests/bugs/shard/bug-1256580.t
new file mode 100644
index 00000000000..279fcc54e48
--- /dev/null
+++ b/tests/bugs/shard/bug-1256580.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file
+
+# Create "file" with holes.
+TEST truncate -s 6M $M0/dir/file
+EXPECT '6291456' stat -c %s $M0/dir/file
+
+# Perform writes that do not cross the 6M boundary
+TEST dd if=/dev/zero of=$M0/dir/file bs=1024 seek=3072 count=2048 conv=notrunc
+
+# Ensure that the file size is 6M (as opposed to 8M that would appear in the
+# presence of this bug).
+EXPECT '6291456' stat -c %s $M0/dir/file
+
+#Extend the write beyond EOF such that it again creates a hole of 1M size
+TEST dd if=/dev/zero of=$M0/dir/file bs=1024 seek=7168 count=2048 conv=notrunc
+
+# Ensure that the file size is not greater than 9M.
+EXPECT '9437184' stat -c %s $M0/dir/file
+cleanup
diff --git a/tests/bugs/shard/bug-1258334.t b/tests/bugs/shard/bug-1258334.t
new file mode 100644
index 00000000000..94ed822aae8
--- /dev/null
+++ b/tests/bugs/shard/bug-1258334.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+TEST touch $M0/dir/foo
+TEST touch $M0/dir/bar
+TEST touch $M0/dir/new
+
+TEST truncate -s 14M $M0/dir/foo
+TEST truncate -s 14M $M0/dir/bar
+
+# Perform writes that fall on the 2nd block of "foo" (counting from 0)
+TEST dd if=/dev/zero of=$M0/dir/foo bs=1024 seek=10240 count=2048 conv=notrunc
+
+# Perform writes that fall on the 2nd block of "bar" (counting from 0)
+TEST dd if=/dev/zero of=$M0/dir/bar bs=1024 seek=10240 count=2048 conv=notrunc
+
+# Now unlink "foo". If the bug exists, it should fail with EINVAL.
+TEST unlink $M0/dir/foo
+
+# Now rename "new" to "bar". If the bug exists, it should fail with EINVAL.
+TEST mv -f $M0/dir/new $M0/dir/bar
+
+TEST dd if=/dev/zero of=$M0/dir/new bs=1024 count=5120
+
+# Now test that this fix does not break unlink of files without holes
+TEST unlink $M0/dir/new
+
+cleanup
diff --git a/tests/bugs/shard/bug-1259651.t b/tests/bugs/shard/bug-1259651.t
new file mode 100644
index 00000000000..72856fdbaad
--- /dev/null
+++ b/tests/bugs/shard/bug-1259651.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file_plain
+
+# Create "file_plain" with holes.
+TEST truncate -s 12M $M0/dir/file_plain
+
+# Perform writes on it that would create holes.
+TEST dd if=/dev/zero of=$M0/dir/file_plain bs=1024 seek=10240 count=1024 conv=notrunc
+
+md5sum_file_plain=$(md5sum $M0/dir/file_plain | awk '{print $1}')
+
+# Now enable sharding on the volume.
+TEST $CLI volume set $V0 features.shard on
+
+# Create a sharded file called "file_sharded"
+TEST touch $M0/dir/file_sharded
+
+# Truncate it to make it sparse
+TEST truncate -s 12M $M0/dir/file_sharded
+
+# Perform writes on it that would create holes in block-0 and block-1.
+TEST dd if=/dev/zero of=$M0/dir/file_sharded bs=1024 seek=10240 count=1024 conv=notrunc
+
+# If this bug is fixed, md5sum of file_sharded and file_plain should be same.
+EXPECT "$md5sum_file_plain" echo `md5sum $M0/dir/file_sharded | awk '{print $1}'`
+
+cleanup
diff --git a/tests/bugs/shard/bug-1260637.t b/tests/bugs/shard/bug-1260637.t
new file mode 100644
index 00000000000..21008ee19dd
--- /dev/null
+++ b/tests/bugs/shard/bug-1260637.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Create a file.
+TEST touch $M0/foo
+
+# Check that the shard xattrs are set in the backend.
+TEST getfattr -n trusted.glusterfs.shard.block-size $B0/${V0}0/foo
+TEST getfattr -n trusted.glusterfs.shard.file-size $B0/${V0}0/foo
+
+# Verify that shard xattrs are not exposed on the mount.
+TEST ! getfattr -n trusted.glusterfs.shard.block-size $M0/foo
+TEST ! getfattr -n trusted.glusterfs.shard.file-size $M0/foo
+
+# Verify that shard xattrs cannot be set from the mount.
+TEST ! setfattr -n trusted.glusterfs.shard.block-size -v "123" $M0/foo
+TEST ! setfattr -n trusted.glusterfs.shard.file-size -v "123" $M0/foo
+
+# Verify that shard xattrs cannot be removed from the mount.
+TEST ! setfattr -x trusted.glusterfs.shard.block-size $M0/foo
+TEST ! setfattr -x trusted.glusterfs.shard.file-size $M0/foo
+
+# Verify that shard xattrs are not listed when listxattr is triggered.
+TEST ! "getfattr -d -m . $M0/foo | grep shard"
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-1261773.t b/tests/bugs/shard/bug-1261773.t
new file mode 100644
index 00000000000..46d5a8b91c9
--- /dev/null
+++ b/tests/bugs/shard/bug-1261773.t
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST check_option_help_presence "features.shard"
+TEST check_option_help_presence "features.shard-block-size"
+
+cleanup
diff --git a/tests/bugs/shard/bug-1272986.t b/tests/bugs/shard/bug-1272986.t
new file mode 100644
index 00000000000..66e896ad0c4
--- /dev/null
+++ b/tests/bugs/shard/bug-1272986.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume start $V0
+
+# $M0 is where the reads will be done and $M1 is where files will be created,
+# written to, etc.
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M1
+
+# Write some data into a file, such that its size crosses the shard block size.
+TEST dd if=/dev/urandom of=$M1/file bs=1M count=5 conv=notrunc oflag=direct
+
+md5sum1_reader=$(md5sum $M0/file | awk '{print $1}')
+
+EXPECT "$md5sum1_reader" echo `md5sum $M1/file | awk '{print $1}'`
+
+# Append some more data into the file.
+TEST dd if=/dev/urandom of=$M1/file bs=256k count=1 conv=notrunc oflag=direct
+
+md5sum2_reader=$(dd if=$M0/file iflag=direct bs=256k| md5sum | awk '{print $1}')
+
+# Test to see if the reader refreshes its cache correctly as part of the reads
+# triggered through md5sum. If it does, then the md5sum on the reader and writer
+# must match.
+EXPECT "$md5sum2_reader" echo `md5sum $M1/file | awk '{print $1}'`
+
+cleanup
diff --git a/tests/bugs/shard/bug-1342298.t b/tests/bugs/shard/bug-1342298.t
new file mode 100644
index 00000000000..ecd7720e8db
--- /dev/null
+++ b/tests/bugs/shard/bug-1342298.t
@@ -0,0 +1,23 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+echo a > $M0/a
+TEST dd if=$M0/a of=/dev/null bs=4096 count=1 iflag=direct
+
+cleanup;
diff --git a/tests/bugs/shard/bug-1468483.t b/tests/bugs/shard/bug-1468483.t
new file mode 100644
index 00000000000..e462b8d54d5
--- /dev/null
+++ b/tests/bugs/shard/bug-1468483.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../common-utils.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 16MB
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST dd if=/dev/zero conv=fsync of=$M0/foo bs=1M count=100
+
+#This should ensure /.shard is created on the bricks.
+TEST stat $B0/${V0}0/.shard
+
+gfid_foo=$(get_gfid_string $M0/foo)
+
+TEST stat $B0/${V0}0/.shard/$gfid_foo.1
+TEST stat $B0/${V0}0/.shard/$gfid_foo.2
+TEST stat $B0/${V0}0/.shard/$gfid_foo.3
+TEST stat $B0/${V0}0/.shard/$gfid_foo.4
+TEST stat $B0/${V0}0/.shard/$gfid_foo.5
+TEST stat $B0/${V0}0/.shard/$gfid_foo.6
+
+# For a file with 7 shards, there should be 7 fsyncs on the brick. Without this
+# fix, I was seeing only 1 fsync (on the base shard alone).
+
+EXPECT "7" echo `$CLI volume profile $V0 info incremental | grep -w FSYNC | awk '{print $8}'`
+
+useradd -M test_user 2>/dev/null
+
+TEST touch $M0/bar
+
+# Change ownership to non-root on bar.
+TEST chown test_user:test_user $M0/bar
+
+TEST $CLI volume profile $V0 stop
+TEST $CLI volume profile $V0 start
+
+# Write 100M of data on bar as non-root.
+TEST run_cmd_as_user test_user "dd if=/dev/zero conv=fsync of=$M0/bar bs=1M count=100"
+
+EXPECT "7" echo `$CLI volume profile $V0 info incremental | grep -w FSYNC | awk '{print $8}'`
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+userdel test_user
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-1488546.t b/tests/bugs/shard/bug-1488546.t
new file mode 100644
index 00000000000..60480dc55e5
--- /dev/null
+++ b/tests/bugs/shard/bug-1488546.t
@@ -0,0 +1,25 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 md-cache-timeout 60
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST dd if=/dev/zero of=$M0/file bs=1M count=20
+TEST ln $M0/file $M0/linkey
+
+EXPECT "20971520" stat -c %s $M0/linkey
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/shard/bug-1568521-EEXIST.t b/tests/bugs/shard/bug-1568521-EEXIST.t
new file mode 100644
index 00000000000..2f9f165aa63
--- /dev/null
+++ b/tests/bugs/shard/bug-1568521-EEXIST.t
@@ -0,0 +1,91 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+function get_file_count {
+ ls $1* | wc -l
+}
+
+FILE_COUNT_TIME=5
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+# Unlink a temporary file to trigger creation of .remove_me
+TEST touch $M0/tmp
+TEST unlink $M0/tmp
+
+TEST stat $B0/${V0}0/.shard/.remove_me
+TEST stat $B0/${V0}1/.shard/.remove_me
+
+TEST dd if=/dev/zero of=$M0/dir/file bs=1024 count=9216
+gfid_file=$(get_gfid_string $M0/dir/file)
+
+# Create marker file from the backend to simulate ENODATA.
+touch $B0/${V0}0/.shard/.remove_me/$gfid_file
+touch $B0/${V0}1/.shard/.remove_me/$gfid_file
+
+# Set block and file size to incorrect values of 64MB and 5MB to simulate "stale xattrs" case
+# and confirm that the correct values are set when the actual unlink takes place
+
+TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}0/.shard/.remove_me/$gfid_file
+TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}1/.shard/.remove_me/$gfid_file
+
+TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}0/.shard/.remove_me/$gfid_file
+TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}1/.shard/.remove_me/$gfid_file
+
+# Sleep for 2 seconds to prevent posix_gfid_heal() from believing marker file is "fresh" and failing lookup with ENOENT
+sleep 2
+
+TEST unlink $M0/dir/file
+TEST ! stat $B0/${V0}0/dir/file
+TEST ! stat $B0/${V0}1/dir/file
+
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_file
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_file
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_file
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_file
+
+##############################
+### Repeat test for rename ###
+##############################
+
+TEST touch $M0/src
+TEST dd if=/dev/zero of=$M0/dir/dst bs=1024 count=9216
+gfid_dst=$(get_gfid_string $M0/dir/dst)
+
+# Create marker file from the backend to simulate ENODATA.
+touch $B0/${V0}0/.shard/.remove_me/$gfid_dst
+touch $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+# Set block and file size to incorrect values of 64MB and 5MB to simulate "stale xattrs" case
+# and confirm that the correct values are set when the actual unlink takes place
+
+TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}0/.shard/.remove_me/$gfid_dst
+TEST setfattr -n trusted.glusterfs.shard.block-size -v 0x0000000004000000 $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}0/.shard/.remove_me/$gfid_dst
+TEST setfattr -n trusted.glusterfs.shard.file-size -v 0x0000000000500000000000000000000000000000000000000000000000000000 $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+# Sleep for 2 seconds to prevent posix_gfid_heal() from believing marker file is "fresh" and failing lookup with ENOENT
+sleep 2
+
+TEST mv -f $M0/src $M0/dir/dst
+TEST ! stat $B0/${V0}0/src
+TEST ! stat $B0/${V0}1/src
+
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_dst
+
+cleanup
diff --git a/tests/bugs/shard/bug-1568521.t b/tests/bugs/shard/bug-1568521.t
new file mode 100644
index 00000000000..167fb635ac8
--- /dev/null
+++ b/tests/bugs/shard/bug-1568521.t
@@ -0,0 +1,53 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+
+function delete_files {
+ local mountpoint=$1;
+ local success=0;
+ local value=$2
+ for i in {1..500}; do
+ unlink $mountpoint/file-$i 2>/dev/null 1>/dev/null
+ if [ $? -eq 0 ]; then
+ echo $2 >> $B0/output.txt
+ fi
+ done
+ echo $success
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 shard-block-size 4MB
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M1
+
+for i in {1..500}; do
+ dd if=/dev/urandom of=$M0/file-$i bs=1M count=2
+done
+
+for i in {1..500}; do
+ stat $M1/file-$i > /dev/null
+done
+
+delete_files $M0 0 &
+delete_files $M1 1 &
+wait
+
+success1=$(grep 0 $B0/output.txt | wc -l);
+success2=$(grep 1 $B0/output.txt | wc -l);
+
+echo "Success1 is $success1";
+echo "Success2 is $success2";
+
+success_total=$((success1 + success2));
+
+EXPECT 500 echo $success_total
+
+cleanup
diff --git a/tests/bugs/shard/bug-1605056-2.t b/tests/bugs/shard/bug-1605056-2.t
new file mode 100644
index 00000000000..a9c10fec3ea
--- /dev/null
+++ b/tests/bugs/shard/bug-1605056-2.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 25
+TEST $CLI volume set $V0 performance.write-behind off
+
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Perform a write that would cause 25 shards to be created under .shard
+TEST dd if=/dev/zero of=$M0/foo bs=1M count=104
+
+# Write into another file bar to ensure all of foo's shards are evicted from lru list of $M0
+TEST dd if=/dev/zero of=$M0/bar bs=1M count=104
+
+# Delete foo from $M0. If there's a bug, the mount will crash.
+TEST unlink $M0/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-1605056.t b/tests/bugs/shard/bug-1605056.t
new file mode 100644
index 00000000000..c2329ea79f8
--- /dev/null
+++ b/tests/bugs/shard/bug-1605056.t
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+SHARD_COUNT_TIME=5
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 25
+TEST $CLI volume set $V0 performance.write-behind off
+
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M1
+
+# Perform a write that would cause 25 shards to be created under .shard
+TEST dd if=/dev/zero of=$M0/foo bs=1M count=104
+
+# Read the file from $M1, indirectly filling up the lru list.
+TEST `cat $M1/foo > /dev/null`
+statedump=$(generate_mount_statedump $V0 $M1)
+sleep 1
+EXPECT "25" echo $(grep "inode-count" $statedump | cut -f2 -d'=' | tail -1)
+rm -f $statedump
+
+# Delete foo from $M0.
+TEST unlink $M0/foo
+
+# Send stat on foo from $M1 to force $M1 to "forget" inode associated with foo.
+# Now the ghost shards associated with "foo" are still in lru list of $M1.
+TEST ! stat $M1/foo
+
+# Let's force the ghost shards of "foo" out of lru list by looking up more shards
+# through I/O on a file named "bar" from $M1. This should crash if the base inode
+# had been destroyed by now.
+
+TEST dd if=/dev/zero of=$M1/bar bs=1M count=104
+
+###############################################
+#### Now for some inode ref-leak tests ... ####
+###############################################
+
+# Expect there to be 29 active inodes - 26 belonging to "bar", 1 for .shard,
+# 1 for .shard/remove_me and 1 for '/'
+EXPECT_WITHIN $SHARD_COUNT_TIME `expr 26 + 3` get_mount_active_size_value $V0 $M1
+
+TEST rm -f $M1/bar
+EXPECT_WITHIN $SHARD_COUNT_TIME 3 get_mount_active_size_value $V0 $M1
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M1
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-1669077.t b/tests/bugs/shard/bug-1669077.t
new file mode 100644
index 00000000000..8d3a67a36be
--- /dev/null
+++ b/tests/bugs/shard/bug-1669077.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+SHARD_COUNT_TIME=5
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 25
+
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# If the bug still exists, client should crash during fallocate below
+TEST fallocate -l 200M $M0/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t b/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t
new file mode 100644
index 00000000000..3e4a65af19a
--- /dev/null
+++ b/tests/bugs/shard/bug-1696136-lru-limit-equals-deletion-rate.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 120
+TEST $CLI volume set $V0 features.shard-deletion-rate 120
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST build_tester $(dirname $0)/bug-1696136.c -lgfapi -Wall -O2
+
+# Create a file
+TEST touch $M0/file1
+
+# Fallocate a 500M file. This will make sure number of participant shards are > lru-limit
+TEST $(dirname $0)/bug-1696136 $H0 $V0 "0" "0" "536870912" /file1 `gluster --print-logdir`/glfs-$V0.log
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+rm -f $(dirname $0)/bug-1696136
+
+cleanup
diff --git a/tests/bugs/shard/bug-1696136.c b/tests/bugs/shard/bug-1696136.c
new file mode 100644
index 00000000000..cb650535b09
--- /dev/null
+++ b/tests/bugs/shard/bug-1696136.c
@@ -0,0 +1,122 @@
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+enum fallocate_flag {
+ TEST_FALLOCATE_NONE,
+ TEST_FALLOCATE_KEEP_SIZE,
+ TEST_FALLOCATE_ZERO_RANGE,
+ TEST_FALLOCATE_PUNCH_HOLE,
+ TEST_FALLOCATE_MAX,
+};
+
+int
+get_fallocate_flag(int opcode)
+{
+ int ret = 0;
+
+ switch (opcode) {
+ case TEST_FALLOCATE_NONE:
+ ret = 0;
+ break;
+ case TEST_FALLOCATE_KEEP_SIZE:
+ ret = FALLOC_FL_KEEP_SIZE;
+ break;
+ case TEST_FALLOCATE_ZERO_RANGE:
+ ret = FALLOC_FL_ZERO_RANGE;
+ break;
+ case TEST_FALLOCATE_PUNCH_HOLE:
+ ret = FALLOC_FL_PUNCH_HOLE;
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 1;
+ int opcode = -1;
+ off_t offset = 0;
+ size_t len = 0;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+
+ if (argc != 8) {
+ fprintf(stderr,
+ "Syntax: %s <host> <volname> <opcode> <offset> <len> "
+ "<file-path> <log-file>\n",
+ argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, argv[7], 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ opcode = atoi(argv[3]);
+ opcode = get_fallocate_flag(opcode);
+ if (opcode < 0) {
+ fprintf(stderr, "get_fallocate_flag: invalid flag \n");
+ goto out;
+ }
+
+ /* Note that off_t is signed but size_t isn't. */
+ offset = strtol(argv[4], NULL, 10);
+ len = strtoul(argv[5], NULL, 10);
+
+ fd = glfs_open(fs, argv[6], O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+
+ ret = glfs_fallocate(fd, opcode, offset, len);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fallocate: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = glfs_unlink(fs, argv[6]);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_unlink: returned %d\n", ret);
+ goto out;
+ }
+ /* Sleep for 3s to give enough time for background deletion to complete
+ * during which if the bug exists, the process will crash.
+ */
+ sleep(3);
+ ret = 0;
+
+out:
+ if (fd)
+ glfs_close(fd);
+ glfs_fini(fs);
+ return ret;
+}
diff --git a/tests/bugs/shard/bug-1696136.t b/tests/bugs/shard/bug-1696136.t
new file mode 100644
index 00000000000..b6dc858f083
--- /dev/null
+++ b/tests/bugs/shard/bug-1696136.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 120
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST build_tester $(dirname $0)/bug-1696136.c -lgfapi -Wall -O2
+
+# Create a file
+TEST touch $M0/file1
+
+# Fallocate a 500M file. This will make sure number of participant shards are > lru-limit
+TEST $(dirname $0)/bug-1696136 $H0 $V0 "0" "0" "536870912" /file1 `gluster --print-logdir`/glfs-$V0.log
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+rm -f $(dirname $0)/bug-1696136
+
+cleanup
diff --git a/tests/bugs/shard/bug-1705884.t b/tests/bugs/shard/bug-1705884.t
new file mode 100644
index 00000000000..f6e50376a58
--- /dev/null
+++ b/tests/bugs/shard/bug-1705884.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+require_fallocate -l 1m $M0/file
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST fallocate -l 200M $M0/foo
+EXPECT `echo "$(( ( 200 * 1024 * 1024 ) / 512 ))"` stat -c %b $M0/foo
+TEST truncate -s 0 $M0/foo
+EXPECT "0" stat -c %b $M0/foo
+TEST fallocate -l 100M $M0/foo
+EXPECT `echo "$(( ( 100 * 1024 * 1024 ) / 512 ))"` stat -c %b $M0/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-1738419.t b/tests/bugs/shard/bug-1738419.t
new file mode 100644
index 00000000000..8d0a31d9754
--- /dev/null
+++ b/tests/bugs/shard/bug-1738419.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 network.remote-dio off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.strict-o-direct on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST dd if=/dev/zero of=$M0/metadata bs=501 count=1
+
+EXPECT "501" echo $("dd" if=$M0/metadata bs=4096 count=1 of=/dev/null iflag=direct 2>&1 | awk '/bytes/ {print $1}')
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/bug-shard-discard.c b/tests/bugs/shard/bug-shard-discard.c
new file mode 100644
index 00000000000..6fa93fb89d1
--- /dev/null
+++ b/tests/bugs/shard/bug-shard-discard.c
@@ -0,0 +1,70 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ off_t off = 0;
+ size_t len = 0;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+
+ if (argc != 7) {
+ fprintf(
+ stderr,
+ "Syntax: %s <host> <volname> <file-path> <off> <len> <log-file>\n",
+ argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, argv[6], 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ fd = glfs_open(fs, argv[3], O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+
+ /* Note that off_t is signed but size_t isn't. */
+ off = strtol(argv[4], NULL, 10);
+ len = strtoul(argv[5], NULL, 10);
+
+ ret = glfs_discard(fd, off, len);
+ if (ret <= 0) {
+ fprintf(stderr, "glfs_discard: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (fd)
+ glfs_close(fd);
+ glfs_fini(fs);
+ return ret;
+}
diff --git a/tests/bugs/shard/bug-shard-discard.t b/tests/bugs/shard/bug-shard-discard.t
new file mode 100644
index 00000000000..910ade14801
--- /dev/null
+++ b/tests/bugs/shard/bug-shard-discard.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+FILE_COUNT_TIME=5
+
+function get_shard_count {
+ ls $1/$2.* | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Create a file.
+TEST touch $M0/foo
+TEST dd if=/dev/urandom of=$M0/foo bs=1M count=10
+
+# This should ensure /.shard is created on the bricks.
+TEST stat $B0/${V0}0/.shard
+TEST stat $B0/${V0}1/.shard
+TEST stat $B0/${V0}2/.shard
+TEST stat $B0/${V0}3/.shard
+
+#Note the size of the file, it should be 10M
+EXPECT '10485760' stat -c %s $M0/foo
+
+gfid_foo=$(get_gfid_string $M0/foo)
+
+TEST build_tester $(dirname $0)/bug-shard-discard.c -lgfapi -Wall -O2
+#Call discard on the file at off=7M and len=3M
+TEST $(dirname $0)/bug-shard-discard $H0 $V0 /foo 7340032 3145728 `gluster --print-logdir`/glfs-$V0.log
+
+#Ensure that discard doesn't change the original size of the file.
+EXPECT '10485760' stat -c %s $M0/foo
+
+# Ensure that the last shard is all zero'd out
+EXPECT "1" file_all_zeroes `find $B0 -name $gfid_foo.2`
+EXPECT_NOT "1" file_all_zeroes `find $B0 -name $gfid_foo.1`
+
+# Now unlink the file. And ensure that all shards associated with the file are cleaned up
+TEST unlink $M0/foo
+
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}0/.shard $gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}1/.shard $gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}2/.shard $gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_shard_count $B0/${V0}3/.shard $gfid_foo
+TEST ! stat $M0/foo
+
+#clean up everything
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+TEST rm -f $(dirname $0)/bug-shard-discard
+
+cleanup
diff --git a/tests/bugs/shard/bug-shard-zerofill.c b/tests/bugs/shard/bug-shard-zerofill.c
new file mode 100644
index 00000000000..ed4c8c54dc2
--- /dev/null
+++ b/tests/bugs/shard/bug-shard-zerofill.c
@@ -0,0 +1,60 @@
+#include <stdio.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+int
+main(int argc, char *argv[])
+{
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+ int ret = 1;
+
+ if (argc != 5) {
+ fprintf(stderr, "Syntax: %s <host> <volname> <file-path> <log-file>\n",
+ argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_set_logging(fs, argv[4], 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ fd = glfs_open(fs, argv[3], O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+
+ /* Zero-fill "foo" with 10MB of data */
+ ret = glfs_zerofill(fd, 0, 10485760);
+ if (ret <= 0) {
+ fprintf(stderr, "glfs_zerofill: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (fd)
+ glfs_close(fd);
+ glfs_fini(fs);
+ return ret;
+}
diff --git a/tests/bugs/shard/bug-shard-zerofill.t b/tests/bugs/shard/bug-shard-zerofill.t
new file mode 100644
index 00000000000..4a919a24b99
--- /dev/null
+++ b/tests/bugs/shard/bug-shard-zerofill.t
@@ -0,0 +1,46 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0..3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Create a file.
+TEST touch $M0/foo
+
+gfid_foo=$(get_gfid_string $M0/foo)
+
+TEST build_tester $(dirname $0)/bug-shard-zerofill.c -lgfapi -Wall -O2
+TEST $(dirname $0)/bug-shard-zerofill $H0 $V0 /foo `gluster --print-logdir`/glfs-$V0.log
+
+# This should ensure /.shard is created on the bricks.
+TEST stat $B0/${V0}0/.shard
+TEST stat $B0/${V0}1/.shard
+TEST stat $B0/${V0}2/.shard
+TEST stat $B0/${V0}3/.shard
+
+EXPECT "4194304" echo `find $B0 -name $gfid_foo.1 | xargs stat -c %s`
+EXPECT "2097152" echo `find $B0 -name $gfid_foo.2 | xargs stat -c %s`
+
+EXPECT "1" file_all_zeroes $M0/foo
+
+TEST `echo "abc" >> $M0/foo`
+
+EXPECT_NOT "1" file_all_zeroes $M0/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+TEST rm -f $(dirname $0)/bug-shard-zerofill
+
+cleanup
diff --git a/tests/bugs/shard/configure-lru-limit.t b/tests/bugs/shard/configure-lru-limit.t
new file mode 100644
index 00000000000..923a4d8d747
--- /dev/null
+++ b/tests/bugs/shard/configure-lru-limit.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 features.shard-lru-limit 25
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $B0/${V0}2
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Perform a write that would cause 25 shards to be created, 24 of them under .shard
+TEST dd if=/dev/zero of=$M0/foo bs=1M count=100
+
+statedump=$(generate_mount_statedump $V0)
+sleep 1
+EXPECT "25" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
+
+# Base shard is never added to this list. So all other shards should make up for 24 inodes in lru list
+EXPECT "24" echo $(grep "inode-count" $statedump | cut -f2 -d'=' | tail -1)
+
+rm -f $statedump
+
+# Test to ensure there's no "reconfiguration" of the value once set.
+TEST $CLI volume set $V0 features.shard-lru-limit 30
+statedump=$(generate_mount_statedump $V0)
+sleep 1
+EXPECT "25" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
+rm -f $statedump
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+statedump=$(generate_mount_statedump $V0)
+sleep 1
+EXPECT "30" echo $(grep "lru-max-limit" $statedump | cut -f2 -d'=' | tail -1)
+rm -f $statedump
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/issue-1243.t b/tests/bugs/shard/issue-1243.t
new file mode 100644
index 00000000000..ba22d2b74fe
--- /dev/null
+++ b/tests/bugs/shard/issue-1243.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.strict-o-direct on
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST $CLI volume set $V0 md-cache-timeout 10
+
+# Write data into a file such that its size crosses shard-block-size
+TEST dd if=/dev/zero of=$M0/foo bs=1048576 count=8 oflag=direct
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Execute a setxattr on the file.
+TEST setfattr -n trusted.libvirt -v some-value $M0/foo
+
+# Size of the file should be the aggregated size, not the shard-block-size
+EXPECT '8388608' stat -c %s $M0/foo
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+# Execute a removexattr on the file.
+TEST setfattr -x trusted.libvirt $M0/foo
+
+# Size of the file should be the aggregated size, not the shard-block-size
+EXPECT '8388608' stat -c %s $M0/foo
+cleanup
diff --git a/tests/bugs/shard/issue-1281.t b/tests/bugs/shard/issue-1281.t
new file mode 100644
index 00000000000..9704caa8944
--- /dev/null
+++ b/tests/bugs/shard/issue-1281.t
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+#Open a file and store descriptor in fd = 5
+exec 5>$M0/foo
+
+#Unlink the same file which is opened in prev step
+TEST unlink $M0/foo
+
+#Write something on the file using the open fd = 5
+echo "issue-1281" >&5
+
+#Write on the descriptor should be succesful
+EXPECT 0 echo $?
+
+#Close the fd = 5
+exec 5>&-
+
+cleanup
diff --git a/tests/bugs/shard/issue-1425.t b/tests/bugs/shard/issue-1425.t
new file mode 100644
index 00000000000..bbe82c0e5b2
--- /dev/null
+++ b/tests/bugs/shard/issue-1425.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+FILE_COUNT_TIME=5
+
+function get_file_count {
+ ls $1* | wc -l
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+TEST $CLI volume profile $V0 start
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST fallocate -l 20M $M0/foo
+gfid_new=$(get_gfid_string $M0/foo)
+
+# Check for the base shard
+TEST stat $M0/foo
+TEST stat $B0/${V0}0/foo
+
+# There should be 4 associated shards
+EXPECT_WITHIN $FILE_COUNT_TIME 4 get_file_count $B0/${V0}0/.shard/$gfid_new
+
+# There should be 1+4 shards and we expect 4 lookups less than on the build without this patch
+EXPECT "21" echo `$CLI volume profile $V0 info incremental | grep -w LOOKUP | awk '{print $8}'`
+
+# Delete the base shard and check shards get cleaned up
+TEST unlink $M0/foo
+
+TEST ! stat $M0/foo
+TEST ! stat $B0/${V0}0/foo
+
+# There should be no shards now
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new
+cleanup
diff --git a/tests/bugs/shard/parallel-truncate-read.t b/tests/bugs/shard/parallel-truncate-read.t
new file mode 100644
index 00000000000..4de876f58f6
--- /dev/null
+++ b/tests/bugs/shard/parallel-truncate-read.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+#This test will crash if shard's LRU contains a shard's inode even after the
+#inode is forgotten. Minimum time for crash to happen I saw was 180 seconds
+
+. $(dirname $0)/../../include.rc
+
+function keep_writing {
+ cd $M0;
+ while [ -f /tmp/parallel-truncate-read ]
+ do
+ dd if=/dev/zero of=file1 bs=1M count=16
+ done
+ cd
+}
+
+function keep_reading {
+ cd $M0;
+ while [ -f /tmp/parallel-truncate-read ]
+ do
+ cat file1 > /dev/null
+ done
+ cd
+}
+
+cleanup;
+
+TEST touch /tmp/parallel-truncate-read
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+keep_writing &
+keep_reading &
+sleep 180
+TEST rm -f /tmp/parallel-truncate-read
+wait
+#test that the mount is operational
+TEST stat $M0
+
+cleanup;
diff --git a/tests/bugs/shard/shard-append-test.c b/tests/bugs/shard/shard-append-test.c
new file mode 100644
index 00000000000..c7debb2b182
--- /dev/null
+++ b/tests/bugs/shard/shard-append-test.c
@@ -0,0 +1,183 @@
+#include <fcntl.h>
+#include <unistd.h>
+#include <time.h>
+#include <limits.h>
+#include <string.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+#define LOG_ERR(msg) \
+ do { \
+ fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
+ } while (0)
+
+/*This test tests that shard xlator handles offset in appending writes
+ * correctly. This test performs writes of 1025 bytes 1025 times, in 5 threads
+ * with different threads. The buffer to be written is same character repeated
+ * 1025 times in the buffer for a thread. At the end it reads the buffer till
+ * end of file and tests that the read of 1025 bytes is always same character
+ * and the content read is 5*1025*1025 size. 1025 bytes is chosen because it
+ * will lead to write on more than one shard at some point when the size is
+ * going over the initial shard*/
+pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
+int thread_data = '1';
+
+glfs_t *
+init_glfs(const char *hostname, const char *volname, const char *logfile)
+{
+ int ret = -1;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ LOG_ERR("glfs_new failed");
+ return NULL;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_volfile_server failed");
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, logfile, 7);
+ if (ret < 0) {
+ LOG_ERR("glfs_set_logging failed");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ LOG_ERR("glfs_init failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ return fs;
+}
+
+void *
+write_data(void *data)
+{
+ char buf[1025] = {0};
+ glfs_fd_t *glfd = NULL;
+ glfs_t *fs = data;
+ int i = 0;
+
+ pthread_mutex_lock(&lock);
+ {
+ memset(buf, thread_data, sizeof(buf));
+ thread_data++;
+ }
+ pthread_mutex_unlock(&lock);
+
+ for (i = 0; i < 1025; i++) {
+ glfd = glfs_creat(fs, "parallel-write.txt", O_WRONLY | O_APPEND,
+ S_IRUSR | S_IWUSR | O_SYNC);
+ if (!glfd) {
+ LOG_ERR("Failed to create file");
+ exit(1);
+ }
+
+ if (glfs_write(glfd, buf, sizeof(buf), 0) < 0) {
+ LOG_ERR("Failed to write to file");
+ exit(1);
+ }
+ if (glfs_close(glfd) != 0) {
+ LOG_ERR("Failed to close file");
+ exit(1);
+ }
+ }
+ return NULL;
+}
+
+int
+main(int argc, char *argv[])
+{
+ pthread_t tid[5] = {0};
+ char buf[1025] = {0};
+ char cmp_buf[1025] = {0};
+ int ret = 0;
+ char *hostname = NULL;
+ char *volname = NULL;
+ char *logfile = NULL;
+ glfs_t *fs = NULL;
+ glfs_fd_t *glfd = NULL;
+ ssize_t bytes_read = 0;
+ ssize_t total_bytes_read = 0;
+ int i = 0;
+
+ if (argc != 4) {
+ fprintf(stderr, "Invalid argument\n");
+ exit(1);
+ }
+
+ hostname = argv[1];
+ volname = argv[2];
+ logfile = argv[3];
+
+ fs = init_glfs(hostname, volname, logfile);
+ if (fs == NULL) {
+ LOG_ERR("init_glfs failed");
+ return -1;
+ }
+
+ for (i = 0; i < 5; i++) {
+ pthread_create(&tid[i], NULL, write_data, fs);
+ }
+
+ for (i = 0; i < 5; i++) {
+ pthread_join(tid[i], NULL);
+ }
+ glfd = glfs_open(fs, "parallel-write.txt", O_RDONLY);
+ if (!glfd) {
+ LOG_ERR("Failed to open file for reading");
+ exit(1);
+ }
+
+ while ((bytes_read = glfs_read(glfd, buf, sizeof(buf), 0)) > 0) {
+ if (bytes_read != sizeof(buf)) {
+ fprintf(stderr,
+ "Didn't read complete data read: %zd "
+ "expected: %lu",
+ bytes_read, sizeof(buf));
+ exit(1);
+ }
+
+ total_bytes_read += bytes_read;
+ if (buf[0] < '1' || buf[0] >= thread_data) {
+ fprintf(stderr, "Invalid character found: %c", buf[0]);
+ exit(1);
+ }
+ memset(cmp_buf, buf[0], sizeof(cmp_buf));
+ if (memcmp(cmp_buf, buf, sizeof(cmp_buf))) {
+ LOG_ERR("Data corrupted");
+ exit(1);
+ }
+ memset(cmp_buf, 0, sizeof(cmp_buf));
+ }
+
+ if (total_bytes_read != 5 * 1025 * 1025) {
+ fprintf(stderr,
+ "Failed to read what is written, read; %zd, "
+ "expected %zu",
+ total_bytes_read, 5 * 1025 * 1025);
+ exit(1);
+ }
+
+ if (glfs_close(glfd) != 0) {
+ LOG_ERR("Failed to close");
+ exit(1);
+ }
+ return 0;
+}
diff --git a/tests/bugs/shard/shard-append-test.t b/tests/bugs/shard/shard-append-test.t
new file mode 100644
index 00000000000..f8719f2a2c1
--- /dev/null
+++ b/tests/bugs/shard/shard-append-test.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{1,2,3};
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+
+#Uncomment the following line after shard-queuing is implemented
+#TEST $CLI volume set $V0 performance.write-behind off
+
+TEST $CLI volume set $V0 performance.strict-o-direct on
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 performance.read-ahead off
+TEST $CLI volume start $V0;
+
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/shard-append-test.c -lgfapi -lpthread
+
+TEST ./$(dirname $0)/shard-append-test ${H0} $V0 $logdir/shard-append-test.log
+
+cleanup_tester $(dirname $0)/shard-append-test
+
+cleanup;
diff --git a/tests/bugs/shard/shard-fallocate.c b/tests/bugs/shard/shard-fallocate.c
new file mode 100644
index 00000000000..cb0714e8564
--- /dev/null
+++ b/tests/bugs/shard/shard-fallocate.c
@@ -0,0 +1,113 @@
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+
+enum fallocate_flag {
+ TEST_FALLOCATE_NONE,
+ TEST_FALLOCATE_KEEP_SIZE,
+ TEST_FALLOCATE_ZERO_RANGE,
+ TEST_FALLOCATE_PUNCH_HOLE,
+ TEST_FALLOCATE_MAX,
+};
+
+int
+get_fallocate_flag(int opcode)
+{
+ int ret = 0;
+
+ switch (opcode) {
+ case TEST_FALLOCATE_NONE:
+ ret = 0;
+ break;
+ case TEST_FALLOCATE_KEEP_SIZE:
+ ret = FALLOC_FL_KEEP_SIZE;
+ break;
+ case TEST_FALLOCATE_ZERO_RANGE:
+ ret = FALLOC_FL_ZERO_RANGE;
+ break;
+ case TEST_FALLOCATE_PUNCH_HOLE:
+ ret = FALLOC_FL_PUNCH_HOLE;
+ break;
+ default:
+ ret = -1;
+ break;
+ }
+ return ret;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 1;
+ int opcode = -1;
+ off_t offset = 0;
+ size_t len = 0;
+ glfs_t *fs = NULL;
+ glfs_fd_t *fd = NULL;
+
+ if (argc != 8) {
+ fprintf(stderr,
+ "Syntax: %s <host> <volname> <opcode> <offset> <len> "
+ "<file-path> <log-file>\n",
+ argv[0]);
+ return 1;
+ }
+
+ fs = glfs_new(argv[2]);
+ if (!fs) {
+ fprintf(stderr, "glfs_new: returned NULL\n");
+ return 1;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", argv[1], 24007);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_volfile_server: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = glfs_set_logging(fs, argv[7], 7);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_set_logging: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret != 0) {
+ fprintf(stderr, "glfs_init: returned %d\n", ret);
+ goto out;
+ }
+
+ opcode = atoi(argv[3]);
+ opcode = get_fallocate_flag(opcode);
+ if (opcode < 0) {
+ fprintf(stderr, "get_fallocate_flag: invalid flag \n");
+ goto out;
+ }
+
+ /* Note that off_t is signed but size_t isn't. */
+ offset = strtol(argv[4], NULL, 10);
+ len = strtoul(argv[5], NULL, 10);
+
+ fd = glfs_open(fs, argv[6], O_RDWR);
+ if (fd == NULL) {
+ fprintf(stderr, "glfs_open: returned NULL\n");
+ goto out;
+ }
+
+ ret = glfs_fallocate(fd, opcode, offset, len);
+ if (ret < 0) {
+ fprintf(stderr, "glfs_fallocate: returned %d\n", ret);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (fd)
+ glfs_close(fd);
+ glfs_fini(fs);
+ return ret;
+}
diff --git a/tests/bugs/shard/shard-inode-refcount-test.t b/tests/bugs/shard/shard-inode-refcount-test.t
new file mode 100644
index 00000000000..3fd181be690
--- /dev/null
+++ b/tests/bugs/shard/shard-inode-refcount-test.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+SHARD_COUNT_TIME=5
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}0
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST dd if=/dev/zero conv=fsync of=$M0/one-plus-five-shards bs=1M count=23
+
+ACTIVE_INODES_BEFORE=$(get_mount_active_size_value $V0)
+TEST rm -f $M0/one-plus-five-shards
+# Expect 5 inodes less. But one inode more than before because .remove_me would be created.
+EXPECT_WITHIN $SHARD_COUNT_TIME `expr $ACTIVE_INODES_BEFORE - 5 + 1` get_mount_active_size_value $V0 $M0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/bugs/shard/unlinks-and-renames.t b/tests/bugs/shard/unlinks-and-renames.t
new file mode 100644
index 00000000000..990ca69a8b1
--- /dev/null
+++ b/tests/bugs/shard/unlinks-and-renames.t
@@ -0,0 +1,333 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+# The aim of this test script is to exercise the various codepaths of unlink
+# and rename fops in sharding and make sure they work fine.
+#
+
+FILE_COUNT_TIME=5
+
+function get_file_count {
+ ls $1* | wc -l
+}
+
+#################################################
+################### UNLINK ######################
+#################################################
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+TEST touch $M0/dir/foo
+TEST touch $M0/dir/new
+
+##########################################
+##### 01. Unlink with /.shard absent #####
+##########################################
+
+TEST truncate -s 5M $M0/dir/foo
+TEST ! stat $B0/${V0}0/.shard
+TEST ! stat $B0/${V0}1/.shard
+# Test to ensure that unlink doesn't fail due to absence of /.shard
+gfid_foo=$(get_gfid_string $M0/dir/foo)
+TEST unlink $M0/dir/foo
+TEST stat $B0/${V0}0/.shard/.remove_me
+TEST stat $B0/${V0}1/.shard/.remove_me
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo
+
+######################################################
+##### 02. Unlink of a sharded file without holes #####
+######################################################
+
+# Create a 9M sharded file
+TEST dd if=/dev/zero of=$M0/dir/new bs=1024 count=9216
+gfid_new=$(get_gfid_string $M0/dir/new)
+# Ensure its shards are created.
+TEST stat $B0/${V0}0/.shard/$gfid_new.1
+TEST stat $B0/${V0}1/.shard/$gfid_new.1
+TEST stat $B0/${V0}0/.shard/$gfid_new.2
+TEST stat $B0/${V0}1/.shard/$gfid_new.2
+TEST unlink $M0/dir/new
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_new
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_new
+TEST ! stat $M0/dir/new
+TEST ! stat $B0/${V0}0/dir/new
+TEST ! stat $B0/${V0}1/dir/new
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_new
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_new
+
+###########################################
+##### 03. Unlink with /.shard present #####
+###########################################
+
+TEST truncate -s 5M $M0/dir/foo
+gfid_foo=$(get_gfid_string $M0/dir/foo)
+# Ensure its shards are absent.
+TEST ! stat $B0/${V0}0/.shard/$gfid_foo.1
+TEST ! stat $B0/${V0}1/.shard/$gfid_foo.1
+# Test to ensure that unlink of a sparse file works fine.
+TEST unlink $M0/dir/foo
+TEST ! stat $B0/${V0}0/dir/foo
+TEST ! stat $B0/${V0}1/dir/foo
+TEST ! stat $M0/dir/foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo
+
+#################################################################
+##### 04. Unlink of a file with only one block (the zeroth) #####
+#################################################################
+
+TEST touch $M0/dir/foo
+gfid_foo=$(get_gfid_string $M0/dir/foo)
+TEST dd if=/dev/zero of=$M0/dir/foo bs=1024 count=1024
+# Test to ensure that unlink of a file with only base shard works fine.
+TEST unlink $M0/dir/foo
+TEST ! stat $B0/${V0}0/dir/foo
+TEST ! stat $B0/${V0}1/dir/foo
+TEST ! stat $M0/dir/foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_foo
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_foo
+
+########################################################
+##### 05. Unlink of a sharded file with hard-links #####
+########################################################
+
+# Create a 9M sharded file
+TEST dd if=/dev/zero of=$M0/dir/original bs=1024 count=9216
+gfid_original=$(get_gfid_string $M0/dir/original)
+# Ensure its shards are created.
+TEST stat $B0/${V0}0/.shard/$gfid_original.1
+TEST stat $B0/${V0}1/.shard/$gfid_original.1
+TEST stat $B0/${V0}0/.shard/$gfid_original.2
+TEST stat $B0/${V0}1/.shard/$gfid_original.2
+# Create a hard link.
+TEST ln $M0/dir/original $M0/link
+# Now delete the original file.
+TEST unlink $M0/dir/original
+TEST ! stat $B0/${V0}0/.shard/.remove_me/$gfid_original
+TEST ! stat $B0/${V0}1/.shard/.remove_me/$gfid_original
+# Ensure the shards are still intact.
+TEST stat $B0/${V0}0/.shard/$gfid_original.1
+TEST stat $B0/${V0}1/.shard/$gfid_original.1
+TEST stat $B0/${V0}0/.shard/$gfid_original.2
+TEST stat $B0/${V0}1/.shard/$gfid_original.2
+TEST ! stat $M0/dir/original
+TEST stat $M0/link
+TEST stat $B0/${V0}0/link
+TEST stat $B0/${V0}1/link
+# Now delete the last link.
+TEST unlink $M0/link
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_original
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_original
+# Ensure that the shards are all cleaned up.
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_original
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_original
+TEST ! stat $M0/link
+TEST ! stat $B0/${V0}0/link
+TEST ! stat $B0/${V0}1/link
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
+
+#################################################
+################### RENAME ######################
+#################################################
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/dir
+TEST touch $M0/dir/src
+TEST touch $M0/dir/dst
+
+##########################################
+##### 06. Rename with /.shard absent #####
+##########################################
+
+TEST truncate -s 5M $M0/dir/dst
+gfid_dst=$(get_gfid_string $M0/dir/dst)
+TEST ! stat $B0/${V0}0/.shard
+TEST ! stat $B0/${V0}1/.shard
+# Test to ensure that rename doesn't fail due to absence of /.shard
+TEST mv -f $M0/dir/src $M0/dir/dst
+TEST ! stat $M0/dir/src
+TEST stat $M0/dir/dst
+TEST ! stat $B0/${V0}0/dir/src
+TEST ! stat $B0/${V0}1/dir/src
+TEST stat $B0/${V0}0/dir/dst
+TEST stat $B0/${V0}1/dir/dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+######################################################
+##### 07. Rename to a sharded file without holes #####
+######################################################
+
+TEST unlink $M0/dir/dst
+TEST touch $M0/dir/src
+# Create a 9M sharded file
+TEST dd if=/dev/zero of=$M0/dir/dst bs=1024 count=9216
+gfid_dst=$(get_gfid_string $M0/dir/dst)
+# Ensure its shards are created.
+TEST stat $B0/${V0}0/.shard/$gfid_dst.1
+TEST stat $B0/${V0}1/.shard/$gfid_dst.1
+TEST stat $B0/${V0}0/.shard/$gfid_dst.2
+TEST stat $B0/${V0}1/.shard/$gfid_dst.2
+TEST mv -f $M0/dir/src $M0/dir/dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_dst
+TEST ! stat $M0/dir/src
+TEST stat $M0/dir/dst
+TEST ! stat $B0/${V0}0/dir/src
+TEST ! stat $B0/${V0}1/dir/src
+TEST stat $B0/${V0}0/dir/dst
+TEST stat $B0/${V0}1/dir/dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+#######################################################
+##### 08. Rename of dst file with /.shard present #####
+#######################################################
+
+TEST unlink $M0/dir/dst
+TEST touch $M0/dir/src
+TEST truncate -s 5M $M0/dir/dst
+gfid_dst=$(get_gfid_string $M0/dir/dst)
+# Test to ensure that rename into a sparse file works fine.
+TEST mv -f $M0/dir/src $M0/dir/dst
+TEST ! stat $M0/dir/src
+TEST stat $M0/dir/dst
+TEST ! stat $B0/${V0}0/dir/src
+TEST ! stat $B0/${V0}1/dir/src
+TEST stat $B0/${V0}0/dir/dst
+TEST stat $B0/${V0}1/dir/dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+###################################################################
+##### 09. Rename of dst file with only one block (the zeroth) #####
+###################################################################
+
+TEST unlink $M0/dir/dst
+TEST touch $M0/dir/src
+TEST dd if=/dev/zero of=$M0/dir/dst bs=1024 count=1024
+gfid_dst=$(get_gfid_string $M0/dir/dst)
+# Test to ensure that rename into a file with only base shard works fine.
+TEST mv -f $M0/dir/src $M0/dir/dst
+TEST ! stat $M0/dir/src
+TEST stat $M0/dir/dst
+TEST ! stat $B0/${V0}0/dir/src
+TEST ! stat $B0/${V0}1/dir/src
+TEST stat $B0/${V0}0/dir/dst
+TEST stat $B0/${V0}1/dir/dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+############################################################
+##### 10. Rename to a dst sharded file with hard-links #####
+############################################################
+
+TEST unlink $M0/dir/dst
+TEST touch $M0/dir/src
+# Create a 9M sharded file
+TEST dd if=/dev/zero of=$M0/dir/dst bs=1024 count=9216
+gfid_dst=$(get_gfid_string $M0/dir/dst)
+# Ensure its shards are created.
+TEST stat $B0/${V0}0/.shard/$gfid_dst.1
+TEST stat $B0/${V0}1/.shard/$gfid_dst.1
+TEST stat $B0/${V0}0/.shard/$gfid_dst.2
+TEST stat $B0/${V0}1/.shard/$gfid_dst.2
+# Create a hard link.
+TEST ln $M0/dir/dst $M0/link
+# Now rename src to the dst.
+TEST mv -f $M0/dir/src $M0/dir/dst
+# Ensure the shards are still intact.
+TEST stat $B0/${V0}0/.shard/$gfid_dst.1
+TEST stat $B0/${V0}1/.shard/$gfid_dst.1
+TEST stat $B0/${V0}0/.shard/$gfid_dst.2
+TEST stat $B0/${V0}1/.shard/$gfid_dst.2
+TEST ! stat $M0/dir/src
+TEST ! stat $B0/${V0}0/dir/src
+TEST ! stat $B0/${V0}1/dir/src
+TEST ! stat $B0/${V0}0/.shard/.remove_me/$gfid_dst
+TEST ! stat $B0/${V0}1/.shard/.remove_me/$gfid_dst
+# Now rename another file to the last link.
+TEST touch $M0/dir/src2
+TEST mv -f $M0/dir/src2 $M0/link
+# Ensure that the shards are all cleaned up.
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/$gfid_dst
+TEST ! stat $B0/${V0}0/.shard/$gfid_dst.1
+TEST ! stat $B0/${V0}1/.shard/$gfid_dst.1
+TEST ! stat $B0/${V0}0/.shard/$gfid_dst.2
+TEST ! stat $B0/${V0}1/.shard/$gfid_dst.2
+TEST ! stat $M0/dir/src2
+TEST ! stat $B0/${V0}0/dir/src2
+TEST ! stat $B0/${V0}1/dir/src2
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}0/.shard/.remove_me/$gfid_dst
+EXPECT_WITHIN $FILE_COUNT_TIME 0 get_file_count $B0/${V0}1/.shard/.remove_me/$gfid_dst
+
+##############################################################
+##### 11. Rename with non-existent dst and a sharded src #####
+##############################################################l
+
+TEST touch $M0/dir/src
+TEST dd if=/dev/zero of=$M0/dir/src bs=1024 count=9216
+gfid_src=$(get_gfid_string $M0/dir/src)
+# Ensure its shards are created.
+TEST stat $B0/${V0}0/.shard/$gfid_src.1
+TEST stat $B0/${V0}1/.shard/$gfid_src.1
+TEST stat $B0/${V0}0/.shard/$gfid_src.2
+TEST stat $B0/${V0}1/.shard/$gfid_src.2
+# Now rename src to the dst.
+TEST mv $M0/dir/src $M0/dir/dst2
+
+TEST stat $B0/${V0}0/.shard/$gfid_src.1
+TEST stat $B0/${V0}1/.shard/$gfid_src.1
+TEST stat $B0/${V0}0/.shard/$gfid_src.2
+TEST stat $B0/${V0}1/.shard/$gfid_src.2
+TEST ! stat $M0/dir/src
+TEST ! stat $B0/${V0}0/dir/src
+TEST ! stat $B0/${V0}1/dir/src
+TEST stat $M0/dir/dst2
+TEST stat $B0/${V0}0/dir/dst2
+TEST stat $B0/${V0}1/dir/dst2
+
+#############################################################################
+##### 12. Rename with non-existent dst and a sharded src with no shards #####
+#############################################################################
+
+TEST touch $M0/dir/src
+TEST dd if=/dev/zero of=$M0/dir/src bs=1024 count=1024
+gfid_src=$(get_gfid_string $M0/dir/src)
+TEST ! stat $B0/${V0}0/.shard/$gfid_src.1
+TEST ! stat $B0/${V0}1/.shard/$gfid_src.1
+# Now rename src to the dst.
+TEST mv $M0/dir/src $M0/dir/dst1
+TEST ! stat $M0/dir/src
+TEST ! stat $B0/${V0}0/dir/src
+TEST ! stat $B0/${V0}1/dir/src
+TEST stat $M0/dir/dst1
+TEST stat $B0/${V0}0/dir/dst1
+TEST stat $B0/${V0}1/dir/dst1
+
+cleanup
diff --git a/tests/bugs/shard/zero-flag.t b/tests/bugs/shard/zero-flag.t
new file mode 100644
index 00000000000..1f39787ab9f
--- /dev/null
+++ b/tests/bugs/shard/zero-flag.t
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fallocate.rc
+
+cleanup
+
+require_fallocate -l 1m $M0/file
+require_fallocate -p -l 512k $M0/file && rm -f $M0/file
+require_fallocate -z -l 512k $M0/file && rm -f $M0/file
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1,2,3}
+TEST $CLI volume set $V0 features.shard on
+TEST $CLI volume set $V0 features.shard-block-size 4MB
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+TEST build_tester $(dirname $0)/shard-fallocate.c -lgfapi -Wall -O2
+
+# On file1 confirm that when fallocate's offset + len > cur file size,
+# the new file size will increase.
+TEST touch $M0/tmp
+TEST `echo 'abcdefghijklmnopqrstuvwxyz' > $M0/tmp`
+TEST touch $M0/file1
+
+gfid_file1=$(get_gfid_string $M0/file1)
+
+TEST $(dirname $0)/shard-fallocate $H0 $V0 "0" "0" "6291456" /file1 `gluster --print-logdir`/glfs-$V0.log
+
+EXPECT '6291456' stat -c %s $M0/file1
+
+# This should ensure /.shard is created on the bricks.
+TEST stat $B0/${V0}0/.shard
+TEST stat $B0/${V0}1/.shard
+TEST stat $B0/${V0}2/.shard
+TEST stat $B0/${V0}3/.shard
+
+EXPECT "2097152" echo `find $B0 -name $gfid_file1.1 | xargs stat -c %s`
+EXPECT "1" file_all_zeroes $M0/file1
+
+
+# On file2 confirm that fallocate to already allocated region of the
+# file does not change the content of the file.
+TEST truncate -s 6M $M0/file2
+TEST dd if=$M0/tmp of=$M0/file2 bs=1 seek=3145728 count=26 conv=notrunc
+md5sum_file2=$(md5sum $M0/file2 | awk '{print $1}')
+
+TEST $(dirname $0)/shard-fallocate $H0 $V0 "0" "3145728" "26" /file2 `gluster --print-logdir`/glfs-$V0.log
+
+EXPECT '6291456' stat -c %s $M0/file2
+EXPECT "$md5sum_file2" echo `md5sum $M0/file2 | awk '{print $1}'`
+
+# On file3 confirm that fallocate to a region of the file that consists
+#of holes creates a new shard in its place, fallocates it and there is no
+#change in the file content seen by the application.
+TEST touch $M0/file3
+
+gfid_file3=$(get_gfid_string $M0/file3)
+
+TEST dd if=$M0/tmp of=$M0/file3 bs=1 seek=9437184 count=26 conv=notrunc
+TEST ! stat $B0/$V0*/.shard/$gfid_file3.1
+TEST stat $B0/$V0*/.shard/$gfid_file3.2
+md5sum_file3=$(md5sum $M0/file3 | awk '{print $1}')
+EXPECT "1048602" echo `find $B0 -name $gfid_file3.2 | xargs stat -c %s`
+
+TEST $(dirname $0)/shard-fallocate $H0 $V0 "0" "5242880" "1048576" /file3 `gluster --print-logdir`/glfs-$V0.log
+EXPECT "$md5sum_file3" echo `md5sum $M0/file3 | awk '{print $1}'`
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+rm -f $(dirname $0)/shard-fallocate
+cleanup
diff --git a/tests/bugs/snapshot/bug-1045333.t b/tests/bugs/snapshot/bug-1045333.t
new file mode 100755
index 00000000000..6c0b995b5b0
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1045333.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+
+
+S1="${V0}-snap1" #Create snapshot with name contains hyphen(-)
+S2="-${V0}-snap2" #Create snapshot with name starts with hyphen(-)
+#Create snapshot with a long name
+S3="${V0}_single_gluster_volume_is_accessible_by_multiple_clients_offline_snapshot_is_a_long_name"
+
+TEST $CLI snapshot create $S1 $V0 no-timestamp
+TEST snapshot_exists 0 $S1
+
+TEST $CLI snapshot create $S2 $V0 no-timestamp
+TEST snapshot_exists 0 $S2
+
+TEST $CLI snapshot create $S3 $V0 no-timestamp
+TEST snapshot_exists 0 $S3
+
+
+TEST glusterfs -s $H0 --volfile-id=/snaps/$S1/$V0 $M0
+TEST glusterfs -s $H0 --volfile-id=/snaps/$S2/$V0 $M1
+TEST glusterfs -s $H0 --volfile-id=/snaps/$S3/$V0 $M2
+
+#Clean up
+#TEST $CLI snapshot delete $S1
+#TEST $CLI snapshot delete $S2
+#TEST $CLI snapshot delete $S3
+
+TEST $CLI volume stop $V0 force
+#TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1049834.t b/tests/bugs/snapshot/bug-1049834.t
new file mode 100755
index 00000000000..29c75cc7f96
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1049834.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version
+TEST launch_cluster 2
+TEST setup_lvm 2
+
+TEST $CLI_1 peer probe $H2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$L1 $H2:$L2
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+#Setting the snap-max-hard-limit to 4
+TEST $CLI_1 snapshot config $V0 snap-max-hard-limit 4
+PID_1=$!
+wait $PID_1
+
+#Creating 3 snapshots on the volume (which is the soft-limit)
+TEST create_n_snapshots $V0 3 $V0_snap
+TEST snapshot_n_exists $V0 3 $V0_snap
+
+#Creating the 4th snapshot on the volume and expecting it to be created
+# but with the deletion of the oldest snapshot i.e 1st snapshot
+TEST $CLI_1 snapshot create ${V0}_snap4 ${V0} no-timestamp
+TEST snapshot_exists 1 ${V0}_snap4
+TEST ! snapshot_exists 1 ${V0}_snap1
+TEST $CLI_1 snapshot delete ${V0}_snap4
+TEST $CLI_1 snapshot create ${V0}_snap1 ${V0} no-timestamp
+TEST snapshot_exists 1 ${V0}_snap1
+
+#Deleting the 4 snaps
+#TEST delete_n_snapshots $V0 4 $V0_snap
+#TEST ! snapshot_n_exists $V0 4 $V0_snap
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1064768.t b/tests/bugs/snapshot/bug-1064768.t
new file mode 100644
index 00000000000..53140a0e13e
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1064768.t
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick0 $H0:$B0/brick1
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status';
+
+TEST $CLI volume profile $V0 start
+TEST $CLI volume profile $V0 info
+TEST $CLI volume profile $V0 stop
+
+TEST $CLI volume status
+TEST $CLI volume stop $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Stopped' volinfo_field $V0 'Status';
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1087203.t b/tests/bugs/snapshot/bug-1087203.t
new file mode 100644
index 00000000000..035be098576
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1087203.t
@@ -0,0 +1,103 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../cluster.rc
+
+function get_volume_info ()
+{
+ local var=$1
+ $CLI_1 volume info $V0 | grep "^$var" | sed 's/.*: //'
+}
+
+cleanup;
+
+TEST verify_lvm_version
+TEST launch_cluster 2
+TEST setup_lvm 2
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count;
+
+TEST $CLI_1 volume create $V0 $H1:$L1 $H2:$L2
+EXPECT "$V0" get_volume_info 'Volume Name';
+EXPECT 'Created' get_volume_info 'Status';
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' get_volume_info 'Status';
+
+
+# Setting system limit
+TEST $CLI_1 snapshot config snap-max-hard-limit 100
+
+# Volume limit cannot exceed system limit, as limit is set to 100,
+# this should fail.
+TEST ! $CLI_1 snapshot config $V0 snap-max-hard-limit 101
+
+# Following are the invalid cases
+TEST ! $CLI_1 snapshot config $V0 snap-max-hard-limit a10
+TEST ! $CLI_1 snapshot config snap-max-hard-limit 10a
+TEST ! $CLI_1 snapshot config snap-max-hard-limit 10%
+TEST ! $CLI_1 snapshot config snap-max-soft-limit 50%1
+TEST ! $CLI_1 snapshot config snap-max-soft-limit 0111
+TEST ! $CLI_1 snapshot config snap-max-hard-limit OXA
+TEST ! $CLI_1 snapshot config snap-max-hard-limit 11.11
+TEST ! $CLI_1 snapshot config snap-max-soft-limit 50%
+TEST ! $CLI_1 snapshot config snap-max-hard-limit -100
+TEST ! $CLI_1 snapshot config snap-max-soft-limit -90
+
+# Soft limit cannot be assigned to volume
+TEST ! $CLI_1 snapshot config $V0 snap-max-soft-limit 10
+
+# Valid case
+TEST $CLI_1 snapshot config snap-max-soft-limit 50
+TEST $CLI_1 snapshot config $V0 snap-max-hard-limit 10
+
+# Validating auto-delete feature
+# Make sure auto-delete is disabled by default
+EXPECT 'disable' snap_config CLI_1 'auto-delete'
+
+# Test for invalid value for auto-delete
+TEST ! $CLI_1 snapshot config auto-delete test
+
+TEST $CLI_1 snapshot config snap-max-hard-limit 6
+TEST $CLI_1 snapshot config snap-max-soft-limit 50
+
+# Create 4 snapshots
+snap_index=1
+snap_count=4
+TEST snap_create CLI_1 $V0 $snap_index $snap_count
+
+# If auto-delete is disabled then oldest snapshot
+# should not be deleted automatically.
+EXPECT '4' get_snap_count CLI_1;
+
+TEST snap_delete CLI_1 $snap_index $snap_count;
+
+# After all those 4 snaps are deleted, There will not be any snaps present
+EXPECT '0' get_snap_count CLI_1;
+
+TEST $CLI_1 snapshot config auto-delete enable
+
+# auto-delete is already enabled, Hence expect a failure.
+TEST ! $CLI_1 snapshot config auto-delete on
+
+# Testing other boolean values with auto-delete
+TEST $CLI_1 snapshot config auto-delete off
+EXPECT 'off' snap_config CLI_1 'auto-delete'
+
+TEST $CLI_1 snapshot config auto-delete true
+EXPECT 'true' snap_config CLI_1 'auto-delete'
+
+# Try to create 4 snaps again, As auto-delete is enabled
+# oldest snap should be deleted and snapcount should be 3
+
+TEST snap_create CLI_1 $V0 $snap_index $snap_count;
+EXPECT '3' get_snap_count CLI_1;
+
+TEST $CLI_1 snapshot config auto-delete disable
+EXPECT 'disable' snap_config CLI_1 'auto-delete'
+
+cleanup;
+
diff --git a/tests/bugs/snapshot/bug-1090042.t b/tests/bugs/snapshot/bug-1090042.t
new file mode 100755
index 00000000000..98531a9751e
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1090042.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+TEST glusterd;
+
+TEST $CLI volume create $V0 replica 3 $H0:$L1 $H0:$L2 $H0:$L3;
+TEST $CLI volume start $V0;
+
+TEST kill_brick $V0 $H0 $L1;
+
+#Normal snap create should fail
+TEST ! $CLI snapshot create ${V0}_snap1 $V0 no-timestamp;
+TEST ! snapshot_exists 0 ${V0}_snap1;
+
+#With changes introduced in BZ #1184344 force snap create should fail too
+TEST ! $CLI snapshot create ${V0}_snap1 $V0 no-timestamp force;
+TEST ! snapshot_exists 0 ${V0}_snap1;
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1109770.t b/tests/bugs/snapshot/bug-1109770.t
new file mode 100644
index 00000000000..22511995937
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1109770.t
@@ -0,0 +1,65 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../nfs.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+for i in {1..10} ; do echo "file" > $M0/file$i ; done
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "file" > $M0/file$i ; done
+
+TEST $CLI snapshot create snap2 $V0 no-timestamp;
+
+mkdir $M0/dir1;
+mkdir $M0/dir2;
+
+for i in {1..10} ; do echo "foo" > $M0/dir1/foo$i ; done
+for i in {1..10} ; do echo "foo" > $M0/dir2/foo$i ; done
+
+TEST $CLI snapshot create snap3 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "foo" > $M0/dir1/foo$i ; done
+for i in {11..20} ; do echo "foo" > $M0/dir2/foo$i ; done
+
+TEST $CLI snapshot create snap4 $V0 no-timestamp;
+
+TEST $CLI volume set $V0 features.uss enable;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
+
+TEST $CLI volume set $V0 features.uss disable;
+
+SNAPD_PID=$(ps auxww | grep snapd | grep -v grep | awk '{print $2}');
+
+TEST ! [ $SNAPD_PID -gt 0 ];
+
+TEST $CLI volume set $V0 features.uss enable;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
+
+TEST $CLI volume stop $V0;
+
+SNAPD_PID=$(ps auxww | grep snapd | grep -v grep | awk '{print $2}');
+
+TEST ! [ $SNAPD_PID -gt 0 ];
+
+cleanup ;
diff --git a/tests/bugs/snapshot/bug-1109889.t b/tests/bugs/snapshot/bug-1109889.t
new file mode 100644
index 00000000000..5fdc7dc9506
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1109889.t
@@ -0,0 +1,74 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../nfs.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+
+TEST $CLI volume start $V0;
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+MOUNT_PID=$(get_mount_process_pid $V0 $M0)
+
+for i in {1..10} ; do echo "file" > $M0/file$i ; done
+
+TEST $CLI snapshot config activate-on-create enable
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "file" > $M0/file$i ; done
+
+TEST $CLI snapshot create snap2 $V0 no-timestamp;
+
+mkdir $M0/dir1;
+mkdir $M0/dir2;
+
+for i in {1..10} ; do echo "foo" > $M0/dir1/foo$i ; done
+for i in {1..10} ; do echo "foo" > $M0/dir2/foo$i ; done
+
+TEST $CLI snapshot create snap3 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "foo" > $M0/dir1/foo$i ; done
+for i in {11..20} ; do echo "foo" > $M0/dir2/foo$i ; done
+
+TEST $CLI snapshot create snap4 $V0 no-timestamp;
+
+TEST $CLI volume set $V0 features.uss enable;
+
+#let snapd get started properly and client connect to snapd
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" snap_client_connected_status $V0
+
+SNAPD_PID=$(ps auxww | grep snapd | grep -v grep | awk '{print $2}');
+
+TEST [ $SNAPD_PID -gt 0 ];
+
+TEST stat $M0/.snaps;
+
+kill -KILL $SNAPD_PID;
+
+# let snapd die properly
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "0" snap_client_connected_status $V0
+
+TEST ! stat $M0/.snaps;
+
+TEST $CLI volume start $V0 force;
+
+# let client get the snapd port from glusterd and connect
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" snap_client_connected_status $V0
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $M0/.snaps
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1111041.t b/tests/bugs/snapshot/bug-1111041.t
new file mode 100755
index 00000000000..efda9688d8b
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1111041.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../nfs.rc
+
+cleanup;
+
+function is_snapd_running {
+ $CLI volume status $1 | grep "Snapshot Daemon" | wc -l;
+}
+
+function snapd_pid {
+ $CLI volume status $V0 | grep "Snapshot Daemon" | awk {'print $8'}
+}
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0 $H0:$B0/${V0}1
+
+TEST $CLI volume start $V0;
+
+EXPECT "0" is_snapd_running $v0
+
+TEST $CLI volume set $V0 features.uss enable;
+
+EXPECT "1" is_snapd_running $V0
+
+SNAPD_PID=$(snapd_pid);
+
+TEST [ $SNAPD_PID -gt 0 ]
+
+kill -9 $SNAPD_PID
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT "^N/A$" snapd_pid
+
+cleanup ;
diff --git a/tests/bugs/snapshot/bug-1112613.t b/tests/bugs/snapshot/bug-1112613.t
new file mode 100644
index 00000000000..e566de056bc
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1112613.t
@@ -0,0 +1,49 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../cluster.rc
+
+cleanup;
+
+V1="patchy2"
+
+TEST verify_lvm_version;
+TEST launch_cluster 2
+TEST setup_lvm 2
+
+TEST $CLI_1 peer probe $H2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$L1
+TEST $CLI_1 volume start $V0
+TEST $CLI_1 volume create $V1 $H2:$L2
+TEST $CLI_1 volume start $V1
+
+# Create 3 snapshots for volume $V0
+snap_count=3
+snap_index=1
+TEST snap_create CLI_1 $V0 $snap_index $snap_count;
+
+# Create 3 snapshots for volume $V1
+snap_count=4
+snap_index=11
+TEST snap_create CLI_1 $V1 $snap_index $snap_count;
+
+EXPECT '3' get_snap_count CLI_1 $V0;
+EXPECT '4' get_snap_count CLI_1 $V1;
+EXPECT '7' get_snap_count CLI_1
+
+TEST $CLI_1 snapshot delete volume $V0
+EXPECT '0' get_snap_count CLI_1 $V0;
+EXPECT '4' get_snap_count CLI_1 $V1;
+EXPECT '4' get_snap_count CLI_1
+
+TEST $CLI_1 snapshot delete all
+EXPECT '0' get_snap_count CLI_1 $V0;
+EXPECT '0' get_snap_count CLI_1 $V1;
+EXPECT '0' get_snap_count CLI_1
+
+cleanup;
+
diff --git a/tests/bugs/snapshot/bug-1113975.t b/tests/bugs/snapshot/bug-1113975.t
new file mode 100644
index 00000000000..86c1739fb46
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1113975.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+for i in {1..10} ; do echo "file" > $M0/file$i ; done
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+
+for i in {11..20} ; do echo "file" > $M0/file$i ; done
+
+TEST $CLI snapshot create snap2 $V0 no-timestamp;
+
+TEST $CLI volume stop $V0
+
+TEST $CLI snapshot restore snap1;
+
+TEST $CLI snapshot restore snap2;
+
+TEST $CLI volume start $V0
+
+cleanup ;
diff --git a/tests/bugs/snapshot/bug-1155042-dont-display-deactivated-snapshots.t b/tests/bugs/snapshot/bug-1155042-dont-display-deactivated-snapshots.t
new file mode 100644
index 00000000000..c5a285eb775
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1155042-dont-display-deactivated-snapshots.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST init_n_bricks 2
+TEST setup_lvm 2
+TEST glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2
+TEST $CLI volume start $V0
+
+# enable uss and mount the volume
+TEST $CLI volume set $V0 features.uss enable
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+# create 10 snapshots and check if all are being reflected
+# in the USS world
+gluster snapshot config activate-on-create enable
+for i in {1..10}; do $CLI snapshot create snap$i $V0 no-timestamp; done
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 10 uss_count_snap_displayed $M0
+
+# snapshots should not be displayed after deactivation
+for i in {1..10}; do $CLI snapshot deactivate snap$i --mode=script; done
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 0 uss_count_snap_displayed $M0
+
+# activate all the snapshots and check if all the activated snapshots
+# are displayed again
+for i in {1..10}; do $CLI snapshot activate snap$i --mode=script; done
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 10 uss_count_snap_displayed $M0
+
+cleanup;
+
diff --git a/tests/bugs/snapshot/bug-1157991.t b/tests/bugs/snapshot/bug-1157991.t
new file mode 100755
index 00000000000..f626ef2b705
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1157991.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp
+EXPECT 'Stopped' snapshot_status snap1;
+
+TEST $CLI snapshot config activate-on-create enable
+TEST $CLI snapshot create snap2 $V0 no-timestamp
+EXPECT 'Started' snapshot_status snap2;
+
+#Clean up
+TEST $CLI snapshot delete snap1
+TEST $CLI snapshot delete snap2
+
+TEST $CLI volume stop $V0 force
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1162462.t b/tests/bugs/snapshot/bug-1162462.t
new file mode 100755
index 00000000000..5c2e4fe37f0
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1162462.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+TEST $CLI volume start $V0;
+TEST $CLI volume set $V0 features.uss enable;
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+mkdir $M0/test
+echo "file1" > $M0/file1
+ln -s $M0/file1 $M0/test/file_symlink
+ls -l $M0/ > /dev/null
+ls -l $M0/test/ > /dev/null
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+$CLI snapshot activate snap1;
+EXPECT 'Started' snapshot_status snap1;
+
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" snap_client_connected_status $V0
+ls $M0/.snaps/snap1/test/ > /dev/null
+ls -l $M0/.snaps/snap1/test/ > /dev/null
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" snap_client_connected_status $V0
+
+TEST $CLI snapshot delete snap1;
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1162498.t b/tests/bugs/snapshot/bug-1162498.t
new file mode 100755
index 00000000000..a97e4429ee7
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1162498.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+
+TEST $CLI snapshot config activate-on-create enable
+TEST $CLI volume set $V0 features.uss enable
+
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+TEST mkdir $M0/xyz
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp
+TEST $CLI snapshot create snap2 $V0 no-timestamp
+
+TEST rmdir $M0/xyz
+
+TEST $CLI snapshot create snap3 $V0 no-timestamp
+TEST $CLI snapshot create snap4 $V0 no-timestamp
+
+TEST mkdir $M0/xyz
+TEST ls $M0/xyz/.snaps/
+
+TEST $CLI volume stop $V0
+TEST $CLI snapshot restore snap2
+TEST $CLI volume start $V0
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST glusterfs -s $H0 --volfile-id=$V0 $M0
+
+#Dir xyz exists in snap1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $M0/xyz
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" count_snaps $M0/xyz
+TEST mkdir $M0/abc
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" count_snaps $M0/abc
+
+#Clean up
+TEST $CLI snapshot delete snap1
+TEST $CLI snapshot delete snap3
+TEST $CLI snapshot delete snap4
+TEST $CLI volume stop $V0 force
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1166197.t b/tests/bugs/snapshot/bug-1166197.t
new file mode 100755
index 00000000000..b070ae271ba
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1166197.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+CURDIR=`pwd`
+
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+TEST $CLI snapshot config activate-on-create enable
+TEST $CLI volume set $V0 features.uss enable
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status';
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+TEST mount_nfs $H0:/$V0 $N0 nolock
+TEST mkdir $N0/testdir
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp
+TEST $CLI snapshot create snap2 $V0 no-timestamp
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/testdir/.snaps
+
+TEST cd $N0/testdir
+TEST cd .snaps
+TEST ls
+
+TEST $CLI snapshot deactivate snap2
+TEST ls
+
+TEST cd $CURDIR
+
+#Clean up
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" umount_nfs $N0
+TEST $CLI snapshot delete snap1
+TEST $CLI snapshot delete snap2
+TEST $CLI volume stop $V0 force
+TEST $CLI volume delete $V0
+
+cleanup;
+
diff --git a/tests/bugs/snapshot/bug-1167580-set-proper-uid-and-gid-during-nfs-access.t b/tests/bugs/snapshot/bug-1167580-set-proper-uid-and-gid-during-nfs-access.t
new file mode 100644
index 00000000000..52a7a790b97
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1167580-set-proper-uid-and-gid-during-nfs-access.t
@@ -0,0 +1,205 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+# This function returns a value "Y" if user can execute
+# the given command. Else it will return "N"
+# @arg-1 : Name of the user
+# @arg-2 : Path of the file
+# @arg-3 : command to be executed
+function check_if_permitted () {
+ local usr=$1
+ local path=$2
+ local cmd=$3
+ local var
+ local ret
+ var=$(su - $usr -c "$cmd $path")
+ ret=$?
+
+ if [ "$cmd" == "cat" ]
+ then
+ if [ "$var" == "Test" ]
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+ else
+ if [ "$ret" == "0" ]
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+ fi
+}
+
+# Create a directory in /tmp to specify which directory to make
+# as home directory for user
+home_dir=$(mktemp -d)
+chmod 777 $home_dir
+
+function get_new_user() {
+ local temp=$(uuidgen | tr -dc 'a-zA-Z' | head -c 8)
+ id $temp
+ if [ "$?" == "0" ]
+ then
+ get_new_user
+ else
+ echo $temp
+ fi
+}
+
+function create_user() {
+ local user=$1
+ local group=$2
+
+ if [ "$group" == "" ]
+ then
+ /usr/sbin/useradd -d $home_dir/$user $user
+ else
+ /usr/sbin/useradd -d $home_dir/$user -G $group $user
+ fi
+
+ return $?
+}
+
+cleanup;
+
+TEST setup_lvm 1
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume start $V0
+
+# Mount the volume as both fuse and nfs mount
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+TEST mount_nfs $H0:/$V0 $N0 nolock
+
+# Create 2 user
+user1=$(get_new_user)
+create_user $user1
+user2=$(get_new_user)
+create_user $user2
+
+# create a file for which only user1 has access
+echo "Test" > $M0/README
+chown $user1 $M0/README
+chmod 700 $M0/README
+
+# enable uss and take a snapshot
+TEST $CLI volume set $V0 uss enable
+TEST $CLI snapshot config activate-on-create on
+TEST $CLI snapshot create snap1 $V0 no-timestamp
+
+# try to access the file using user1 account.
+# It should succeed with both normal mount and snapshot world.
+# There is time delay in which snapd might not have got the notification
+# from glusterd about snapshot create hence using "EXPECT_WITHIN"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" check_if_permitted $user1 $M0/README cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" check_if_permitted $user1 $N0/README cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" check_if_permitted $user1 $M0/.snaps/snap1/README cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" check_if_permitted $user1 $N0/.snaps/snap1/README cat
+
+
+# try to access the file using user2 account
+# It should fail from both normal mount and snapshot world
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user2 $M0/README cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user2 $N0/README cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user2 $M0/.snaps/snap1/README cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user2 $N0/.snaps/snap1/README cat
+
+# We need to test another scenario where user belonging to one group
+# tries to access files from user belonging to another group
+# instead of using the already created users and making the test case look complex
+# I thought of using two different users.
+
+# The test case written below does the following things
+# 1) Create 2 users (user{3,4}), belonging to 2 different groups (group{3,4})
+# 2) Take a snapshot "snap2"
+# 3) Create a file for which only users belonging to group3 have
+# permission to read
+# 4) Test various combinations of Read-Write, Fuse-NFS mount, User{3,4,5}
+# from both normal mount, and USS world.
+
+echo "Test" > $M0/file3
+
+chmod 740 $M0/file3
+
+group3=$(get_new_user)
+groupadd $group3
+
+group4=$(get_new_user)
+groupadd $group4
+
+user3=$(get_new_user)
+create_user $user3 $group3
+
+user4=$(get_new_user)
+create_user $user4 $group4
+
+user5=$(get_new_user)
+create_user $user5
+
+chgrp $group3 $M0/file3
+
+TEST $CLI snapshot create snap2 $V0 no-timestamp
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" check_if_permitted $user3 $M0/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" check_if_permitted $user3 $M0/.snaps/snap2/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user3 $M0/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user3 $M0/.snaps/snap2/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" check_if_permitted $user3 $N0/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" check_if_permitted $user3 $N0/.snaps/snap2/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user3 $N0/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user3 $N0/.snaps/snap2/file3 "echo Hello >"
+
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user4 $M0/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user4 $M0/.snaps/snap2/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user4 $M0/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user4 $M0/.snaps/snap2/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user4 $N0/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user4 $N0/.snaps/snap2/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user4 $N0/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user4 $N0/.snaps/snap2/file3 "echo Hello >"
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user5 $M0/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user5 $M0/.snaps/snap2/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user5 $M0/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user5 $M0/.snaps/snap2/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user5 $N0/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user5 $N0/.snaps/snap2/file3 cat
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user5 $N0/file3 "echo Hello >"
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "N" check_if_permitted $user5 $N0/.snaps/snap2/file3 "echo Hello >"
+
+# cleanup
+/usr/sbin/userdel -f -r $user1
+/usr/sbin/userdel -f -r $user2
+/usr/sbin/userdel -f -r $user3
+/usr/sbin/userdel -f -r $user4
+/usr/sbin/userdel -f -r $user5
+
+#cleanup all the home directory which is created as part of this test case
+if [ -d "$home_dir" ]
+then
+ rm -rf $home_dir
+fi
+
+
+groupdel $group3
+groupdel $group4
+
+TEST $CLI snapshot delete all
+
+cleanup;
+
+
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/bugs/snapshot/bug-1168875.t b/tests/bugs/snapshot/bug-1168875.t
new file mode 100644
index 00000000000..9737784fd84
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1168875.t
@@ -0,0 +1,96 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../fileio.rc
+. $(dirname $0)/../../nfs.rc
+
+cleanup;
+
+function check_entry_point_exists ()
+{
+ local entry_point=$1;
+ local _path=$2;
+
+ ls -a $_path | grep $entry_point;
+
+ if [ $? -eq 0 ]; then
+ echo 'Y';
+ else
+ echo 'N';
+ fi
+}
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 --xlator-option *-snapview-client.snapdir-entry-path=/dir $M0;
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $N0;
+for i in {1..10} ; do echo "file" > $M0/file$i ; done
+
+
+for i in {11..20} ; do echo "file" > $M0/file$i ; done
+
+mkdir $M0/dir;
+
+for i in {1..10} ; do echo "file" > $M0/dir/file$i ; done
+
+mkdir $M0/dir1;
+mkdir $M0/dir2;
+
+for i in {1..10} ; do echo "foo" > $M0/dir1/foo$i ; done
+for i in {1..10} ; do echo "foo" > $M0/dir2/foo$i ; done
+
+for i in {11..20} ; do echo "foo" > $M0/dir1/foo$i ; done
+for i in {11..20} ; do echo "foo" > $M0/dir2/foo$i ; done
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+TEST $CLI snapshot activate snap1;
+
+TEST $CLI volume set $V0 features.uss enable;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists .snaps $M0/dir
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists .snaps $N0/dir
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists .snaps $M0/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists .snaps $N0/dir1
+
+TEST $CLI volume set $V0 features.show-snapshot-directory enable;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $M0/dir
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/dir
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $M0/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/dir1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_entry_point_exists ".snaps" $M0/dir
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists ".snaps" $N0/dir
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists ".snaps" $M0/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists ".snaps" $N0/dir1
+
+TEST $CLI volume set $V0 features.show-snapshot-directory disable;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $M0/dir
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/dir
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $M0/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" STAT $N0/dir1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists ".snaps" $M0/dir
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists ".snaps" $N0/dir
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists ".snaps" $M0/dir1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'N' check_entry_point_exists ".snaps" $N0/dir1
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1178079.t b/tests/bugs/snapshot/bug-1178079.t
new file mode 100644
index 00000000000..a1a6b0b9d49
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1178079.t
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+#Create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1..2};
+TEST $CLI volume start $V0;
+
+TEST $CLI volume set $V0 features.uss on;
+
+TEST glusterfs -s $H0 --volfile-id $V0 $M0;
+
+TEST touch $M0/file;
+
+TEST getfattr -d -m . -e hex $M0/file;
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1202436-calculate-quota-cksum-during-snap-restore.t b/tests/bugs/snapshot/bug-1202436-calculate-quota-cksum-during-snap-restore.t
new file mode 100644
index 00000000000..addc05917d8
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1202436-calculate-quota-cksum-during-snap-restore.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST verify_lvm_version
+TEST launch_cluster 2
+TEST setup_lvm 1
+
+TEST $CLI_1 volume create $V0 $H1:$L1
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# Quota is not working with cluster test framework
+# Need to check why, Until then commenting out this
+#TEST $CLI_1 volume quota $V0 enable
+#EXPECT 'on' volinfo_field $V0 'features.quota'
+
+TEST $CLI_1 snapshot create ${V0}_snap $V0
+EXPECT '1' get_snap_count CLI_1 $V0
+
+TEST $CLI_1 volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status'
+
+TEST $CLI_1 snapshot restore $($CLI_1 snapshot list)
+EXPECT '0' get_snap_count CLI_1 $V0
+
+TEST $CLI_1 peer probe $H2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1205592.t b/tests/bugs/snapshot/bug-1205592.t
new file mode 100644
index 00000000000..f7d99345c8d
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1205592.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version
+TEST launch_cluster 3
+TEST setup_lvm 3
+
+TEST $CLI_1 peer probe $H2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 peer probe $H3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$L1 $H2:$L2 $H3:$L3
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+
+kill_glusterd 3
+# If glusterd-quorum is not met then snapshot-create should fail
+TEST ! $CLI_1 snapshot create ${V0}_snap1 ${V0} no-timestamp
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1227646.t b/tests/bugs/snapshot/bug-1227646.t
new file mode 100644
index 00000000000..9b73dfdb32f
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1227646.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+
+TEST glusterd;
+TEST pidof glusterd;
+
+#TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+TEST $CLI volume create $V0 $H0:$L2 $H0:$L3;
+TEST $CLI volume start $V0;
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+TEST $CLI volume stop $V0
+TEST $CLI snapshot restore snap1;
+TEST $CLI volume start $V0
+
+TEST pkill gluster
+TEST glusterd
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Started' volinfo_field $V0 'Status'
+
+TEST $CLI volume stop $V0
+
+cleanup ;
+
diff --git a/tests/bugs/snapshot/bug-1232430.t b/tests/bugs/snapshot/bug-1232430.t
new file mode 100755
index 00000000000..50411b1dbfc
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1232430.t
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version;
+TEST glusterd -LDEBUG;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1/brick_dir
+TEST $CLI volume start $V0
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp
+
+TEST $CLI snapshot delete snap1
+
+TEST $CLI volume stop $V0 force
+TEST $CLI volume delete $V0
+cleanup
diff --git a/tests/bugs/snapshot/bug-1250387.t b/tests/bugs/snapshot/bug-1250387.t
new file mode 100755
index 00000000000..c9039e37f73
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1250387.t
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST init_n_bricks 1;
+TEST setup_lvm 1;
+
+TEST glusterd;
+
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1;
+
+TEST $CLI volume start $V0;
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+
+EXPECT "description" get-cmd-field-xml "snapshot info snap1" "description"
+
+cleanup ;
diff --git a/tests/bugs/snapshot/bug-1260848.t b/tests/bugs/snapshot/bug-1260848.t
new file mode 100644
index 00000000000..6455d8297b2
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1260848.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../nfs.rc
+. $(dirname $0)/../../volume.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 disperse 6 redundancy 2 $H0:$B0/${V0}{0..5}
+TEST $CLI volume set $V0 nfs.disable false
+TEST $CLI volume set $V0 uss on
+TEST $CLI volume start $V0
+
+## Wait for volume to register with rpc.mountd
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available;
+
+## Mount NFS
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+TEST df -h $N0
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1275616.t b/tests/bugs/snapshot/bug-1275616.t
new file mode 100755
index 00000000000..dcaeae30f90
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1275616.t
@@ -0,0 +1,50 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST setup_lvm 1
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+TEST $CLI snapshot config activate-on-create enable
+
+TEST $CLI snapshot config $V0 snap-max-hard-limit 100
+TEST $CLI snapshot create snap1 $V0 no-timestamp
+
+TEST $CLI snapshot config $V0 snap-max-hard-limit 150
+TEST $CLI snapshot create snap2 $V0 no-timestamp
+
+TEST $CLI snapshot config $V0 snap-max-hard-limit 200
+TEST $CLI snapshot create snap3 $V0 no-timestamp
+EXPECT '197' snap_info_volume CLI "Snaps Available" $V0;
+
+TEST $CLI volume stop $V0
+
+# Restore the snapshots and verify the snap-max-hard-limit
+# and the Snaps Available
+TEST $CLI snapshot restore snap1
+EXPECT '98' snap_info_volume CLI "Snaps Available" $V0;
+EXPECT '100' snap_config_volume CLI 'snap-max-hard-limit' $V0
+
+TEST $CLI snapshot restore snap2
+EXPECT '149' snap_info_volume CLI "Snaps Available" $V0;
+EXPECT '150' snap_config_volume CLI 'snap-max-hard-limit' $V0
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Yes" get_snap_brick_status snap3
+
+#Take a clone and verify it inherits snapshot's snap-max-hard-limit
+TEST $CLI snapshot clone clone1 snap3
+
+EXPECT '149' snap_info_volume CLI "Snaps Available" $V0;
+EXPECT '150' snap_config_volume CLI 'snap-max-hard-limit' $V0
+
+EXPECT '200' snap_info_volume CLI "Snaps Available" clone1
+EXPECT '200' snap_config_volume CLI 'snap-max-hard-limit' clone1
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1279327.t b/tests/bugs/snapshot/bug-1279327.t
new file mode 100644
index 00000000000..4e4be6eeea6
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1279327.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST verify_lvm_version;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST init_n_bricks 3
+TEST setup_lvm 3
+
+TEST $CLI volume create $V0 $H0:$L1
+TEST $CLI volume start $V0
+TEST $CLI volume quota $V0 enable
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp
+TEST $CLI snapshot activate snap1
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Yes" get_snap_brick_status snap1
+
+#Take a clone and verify it inherits snapshot's snap-max-hard-limit
+TEST $CLI snapshot clone clone1 snap1
+TEST $CLI volume start clone1
+EXPECT 'Started' volinfo_field clone1 'Status';
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1316437.t b/tests/bugs/snapshot/bug-1316437.t
new file mode 100644
index 00000000000..300c03c97f5
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1316437.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST glusterd
+
+# Intentionally not carving lvms for this as we will not be taking
+# snapshots in this testcase
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+TEST $CLI volume start $V0;
+
+TEST $CLI volume set $V0 features.uss enable;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
+
+killall glusterd glusterfsd glusterfs
+
+EXPECT_WITHIN $PROCESS_DOWN_TIMEOUT 'N' check_if_snapd_exist
+
+glusterd
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1322772-real-path-fix-for-snapshot.t b/tests/bugs/snapshot/bug-1322772-real-path-fix-for-snapshot.t
new file mode 100644
index 00000000000..488bd462a01
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1322772-real-path-fix-for-snapshot.t
@@ -0,0 +1,56 @@
+#!/bin/bash
+
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../include.rc
+cleanup;
+
+TESTS_EXPECTED_IN_LOOP=2
+
+TEST verify_lvm_version
+TEST init_n_bricks 2
+TEST setup_lvm 2
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$L1
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume create $V1 $H0:$L2
+EXPECT 'Created' volinfo_field $V1 'Status'
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V1
+EXPECT 'Started' volinfo_field $V1 'Status'
+
+TEST $CLI snapshot config activate-on-create enable
+TEST $CLI snapshot create ${V0}_snap $V0 no-timestamp
+TEST $CLI snapshot create ${V1}_snap $V1 no-timestamp
+
+# Simulate a node reboot by unmounting the brick, snap_brick and followed by
+# deleting the brick. Now once glusterd restarts, it should be able to construct
+# and remount the snap brick
+snap_bricks=`gluster snap status | grep "Brick Path" | awk -F ":" '{print $3}'`
+
+TEST $CLI volume stop $V1
+TEST $CLI snapshot restore ${V1}_snap;
+
+pkill gluster
+for snap_brick in $snap_bricks
+do
+ echo "Unmounting snap brick" $snap_brick
+ EXPECT_WITHIN_TEST_IN_LOOP $UMOUNT_TIMEOUT "Y" force_umount $snap_brick
+done
+
+rm -rf $snap_brick
+
+TEST glusterd
+TEST pidof glusterd
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" brick_up_status $V0 $H0 $L1
+
+cleanup
+
diff --git a/tests/bugs/snapshot/bug-1399598-uss-with-ssl.t b/tests/bugs/snapshot/bug-1399598-uss-with-ssl.t
new file mode 100755
index 00000000000..f4e4e6ec4d2
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1399598-uss-with-ssl.t
@@ -0,0 +1,108 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../traps.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../ssl.rc
+
+function file_exists
+{
+ if [ -f $1 ]; then echo "Y"; else echo "N"; fi
+}
+
+function volume_online_brick_count
+{
+ $CLI volume status $V0 | awk '$1 == "Brick" && $6 != "N/A" { print $6}' | wc -l;
+}
+
+function total_online_bricks
+{
+ # This will count snapd, which isn't really a brick, but callers can
+ # account for that so it's OK.
+ find $GLUSTERD_PIDFILEDIR -name '*.pid' | wc -l
+}
+
+cleanup;
+
+# Initialize the test setup
+TEST setup_lvm 1;
+
+TEST create_self_signed_certs
+
+# Start glusterd
+TEST glusterd
+TEST pidof glusterd;
+#EST $CLI volume set all cluster.brick-multiplex on
+
+# Create and start the volume
+TEST $CLI volume create $V0 $H0:$L1/b1;
+
+TEST $CLI volume start $V0;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" volume_online_brick_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" total_online_bricks
+
+# Mount the volume and create some files
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+TEST touch $M0/file;
+
+# Enable activate-on-create
+TEST $CLI snapshot config activate-on-create enable;
+
+# Create a snapshot
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" total_online_bricks
+
+TEST $CLI volume set $V0 features.uss enable;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" total_online_bricks
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/file
+# Volume set can trigger graph switch therefore chances are we send this
+# req to old graph. Old graph will not have .snaps. Therefore we should
+# wait for some time.
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/.snaps/snap1/file
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Enable management encryption
+touch $GLUSTERD_WORKDIR/secure-access
+killall_gluster
+
+TEST glusterd
+TEST pidof glusterd;
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" volume_online_brick_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" total_online_bricks
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
+
+# Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/file
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/.snaps/snap1/file
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Enable I/O encryption
+TEST $CLI volume set $V0 client.ssl on
+TEST $CLI volume set $V0 server.ssl on
+
+killall_gluster
+
+TEST glusterd
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" volume_online_brick_count
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "3" total_online_bricks
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT 'Y' check_if_snapd_exist
+
+# Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/file
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" file_exists $M0/.snaps/snap1/file
+
+TEST $CLI snapshot delete all
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
new file mode 100644
index 00000000000..04a85db0c1a
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1482023-snpashot-issue-with-other-processes-accessing-mounted-path.t
@@ -0,0 +1,133 @@
+#!/bin/bash
+
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+
+function create_snapshots() {
+ $CLI_1 snapshot create ${V0}_snap ${V0} no-timestamp &
+ PID_1=$!
+
+ $CLI_1 snapshot create ${V1}_snap ${V1} no-timestamp &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function activate_snapshots() {
+ $CLI_1 snapshot activate ${V0}_snap &
+ PID_1=$!
+
+ $CLI_1 snapshot activate ${V1}_snap &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+
+function deactivate_snapshots() {
+ $CLI_1 snapshot deactivate ${V0}_snap &
+ PID_1=$!
+
+ $CLI_1 snapshot deactivate ${V1}_snap &
+ PID_2=$!
+
+ wait $PID_1 $PID_2
+}
+cleanup;
+
+TEST verify_lvm_version;
+# Create cluster with 3 nodes
+TEST launch_cluster 3;
+TEST setup_lvm 3
+
+TEST $CLI_1 peer probe $H2;
+TEST $CLI_1 peer probe $H3;
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
+
+# Create volumes
+TEST $CLI_1 volume create $V0 $H1:$L1
+TEST $CLI_2 volume create $V1 $H2:$L2 $H3:$L3
+
+# Start volumes
+TEST $CLI_1 volume start $V0
+TEST $CLI_2 volume start $V1
+
+TEST $CLI_1 snapshot config activate-on-create enable
+
+# Snapshot Operations
+create_snapshots
+
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
+
+deactivate_snapshots
+
+EXPECT 'Stopped' snapshot_status ${V0}_snap;
+EXPECT 'Stopped' snapshot_status ${V1}_snap;
+
+activate_snapshots
+
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
+
+# This Function will get snap id form snap info command and will
+# check for mount point in system against snap id.
+function mounted_snaps
+{
+ snap_id=`$CLI_1 snap info $1_snap | grep "Snap Volume Name" |
+ awk -F ":" '{print $2}'`
+ echo `mount | grep $snap_id | wc -l`
+}
+
+EXPECT "1" mounted_snaps ${V0}
+EXPECT "2" mounted_snaps ${V1}
+
+deactivate_snapshots
+
+EXPECT "0" mounted_snaps ${V0}
+EXPECT "0" mounted_snaps ${V1}
+
+# This part of test is designed to validate that updates are properly being
+# handled during handshake.
+
+activate_snapshots
+
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
+
+kill_glusterd 2
+
+deactivate_snapshots
+EXPECT 'Stopped' snapshot_status ${V0}_snap;
+EXPECT 'Stopped' snapshot_status ${V1}_snap;
+
+TEST start_glusterd 2
+
+# Updates form friend should reflect as snap was deactivated while glusterd
+# process was inactive and mount point should also not exist.
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" mounted_snaps ${V0}
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "0" mounted_snaps ${V1}
+
+# It might be possible that the import snap synctask is still updating the data,
+# we need to allow a buffer time to be on the safer side
+sleep 2
+
+kill_glusterd 2
+activate_snapshots
+EXPECT 'Started' snapshot_status ${V0}_snap;
+EXPECT 'Started' snapshot_status ${V1}_snap;
+TEST start_glusterd 2
+
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count;
+
+# Updates form friend should reflect as snap was activated while glusterd
+# process was inactive and mount point should exist.
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" mounted_snaps ${V0}
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "2" mounted_snaps ${V1}
+
+cleanup;
+# run first!
+#G_TESTDEF_TEST_STATUS_CENTOS6=BRICK_MUX_BAD_TEST,BUG=1743069
diff --git a/tests/bugs/snapshot/bug-1512451-snapshot-creation-failed-after-brick-reset.t b/tests/bugs/snapshot/bug-1512451-snapshot-creation-failed-after-brick-reset.t
new file mode 100644
index 00000000000..53b274e8819
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1512451-snapshot-creation-failed-after-brick-reset.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../cluster.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+TEST verify_lvm_version
+TEST launch_cluster 2
+TEST setup_lvm 2
+
+TEST $CLI_1 peer probe $H2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count
+
+TEST $CLI_1 volume create $V0 $H1:$L1/B1 $H2:$L2/B1
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST $CLI_1 snapshot create ${V0}_snap1 ${V0} no-timestamp
+TEST snapshot_exists 1 ${V0}_snap1
+
+TEST $CLI_1 snapshot delete ${V0}_snap1
+TEST ! snapshot_exists 1 ${V0}_snap1
+
+TEST $CLI_1 volume reset-brick $V0 $H1:$L1/B1 start
+TEST $CLI_1 volume reset-brick $V0 $H1:$L1/B1 $H1:$L1/B1 commit force
+
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "1" cluster_brick_up_status 1 $V0 $H1 $L1/B1
+
+TEST $CLI_1 snapshot create ${V0}_snap1 ${V0} no-timestamp
+TEST snapshot_exists 1 ${V0}_snap1
+
+TEST $CLI_1 snapshot delete ${V0}_snap1
+TEST ! snapshot_exists 1 ${V0}_snap1
+
+cleanup;
diff --git a/tests/bugs/snapshot/bug-1597662.t b/tests/bugs/snapshot/bug-1597662.t
new file mode 100644
index 00000000000..f582930476a
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1597662.t
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+
+cleanup;
+
+TEST init_n_bricks 3;
+TEST setup_lvm 3;
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$L1 $H0:$L2 $H0:$L3;
+TEST $CLI volume start $V0;
+
+snap_path=/var/run/gluster/snaps
+
+TEST $CLI snapshot create snap1 $V0 no-timestamp;
+
+$CLI snapshot activate snap1;
+
+EXPECT 'Started' snapshot_status snap1;
+
+# This Function will check for entry /var/run/gluster/snaps/<snap-name>
+# against snap-name
+
+function is_snap_path
+{
+ echo `ls $snap_path | grep snap1 | wc -l`
+}
+
+# snap is active so snap_path should exist
+EXPECT "1" is_snap_path
+
+$CLI snapshot deactivate snap1;
+EXPECT_WITHIN ${PROCESS_DOWN_TIMEOUT} 'Stopped' snapshot_status snap1
+# snap is deactivated so snap_path should not exist
+EXPECT "0" is_snap_path
+
+# activate snap again
+$CLI snapshot activate snap1;
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} 'Started' snapshot_status snap1
+
+# snap is active so snap_path should exist
+EXPECT "1" is_snap_path
+
+# delete snap now
+TEST $CLI snapshot delete snap1;
+
+# snap is deleted so snap_path should not exist
+EXPECT "0" is_snap_path
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
+
diff --git a/tests/bugs/snapshot/bug-1618004-fix-memory-corruption-in-snap-import.t b/tests/bugs/snapshot/bug-1618004-fix-memory-corruption-in-snap-import.t
new file mode 100644
index 00000000000..a2c004e435e
--- /dev/null
+++ b/tests/bugs/snapshot/bug-1618004-fix-memory-corruption-in-snap-import.t
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../snapshot.rc
+. $(dirname $0)/../../cluster.rc
+
+function get_volume_info ()
+{
+ local var=$1
+ $CLI_1 volume info $V0 | grep "^$var" | sed 's/.*: //'
+}
+
+cleanup;
+
+TEST verify_lvm_version
+TEST launch_cluster 2
+TEST setup_lvm 2
+
+TEST $CLI_1 peer probe $H2;
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count;
+
+TEST $CLI_1 volume create $V0 $H1:$L1 $H2:$L2
+EXPECT "$V0" get_volume_info 'Volume Name';
+EXPECT 'Created' get_volume_info 'Status';
+
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' get_volume_info 'Status';
+
+
+# Setting system limit
+TEST $CLI_1 snapshot config activate-on-create enable
+
+TEST $CLI_1 snapshot create snap1 $V0 no-timestamp description "test"
+TEST kill_glusterd 1
+#deactivate snapshot for changing snap version, so that handshake will
+#happen when glusterd is restarted
+TEST $CLI_2 snapshot deactivate snap1
+TEST start_glusterd 1
+
+#Wait till handshake complete
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} 'Stopped' snapshot_status snap1
+
+#Delete the snapshot, without this fix, delete will lead to assertion failure
+$CLI_1 snapshot delete all
+EXPECT '0' get_snap_count CLI_1;
+cleanup;
+
diff --git a/tests/bugs/stripe/bug-1002207.t b/tests/bugs/stripe/bug-1002207.t
new file mode 100644
index 00000000000..c58a6e20ab8
--- /dev/null
+++ b/tests/bugs/stripe/bug-1002207.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume create $V0 stripe 2 $H0:$B0/${V0}{1,2,3,4,5,6,7,8};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs --attribute-timeout=0 --entry-timeout=0 -s $H0 --volfile-id=$V0 $M0;
+TEST dd if=/dev/zero of=$M0/file$i.data bs=1024 count=1024;
+
+function xattr_query_check()
+{
+ local path=$1
+ local xa_name=$2
+
+ local ret=$(getfattr -n $xa_name $path 2>&1 | grep -o "$xa_name: No such attribute" | wc -l)
+ echo $ret
+}
+
+function set_xattr()
+{
+ local path=$1
+ local xa_name=$2
+ local xa_val=$3
+
+ setfattr -n $xa_name -v $xa_val $path
+ echo $?
+}
+
+EXPECT 0 set_xattr $M0/file$i.data "trusted.name" "testofafairlylongxattrstringthatbutnotlongenoughtofailmemoryallocation"
+EXPECT 0 xattr_query_check $M0/file$i.data "trusted.name"
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/bugs/stripe/bug-1111454.t b/tests/bugs/stripe/bug-1111454.t
new file mode 100644
index 00000000000..1509dd7b1a2
--- /dev/null
+++ b/tests/bugs/stripe/bug-1111454.t
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+#symlink resolution should succeed
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 stripe 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0 --attribute-timeout=0 --entry-timeout=0
+TEST mkdir $M0/dir
+TEST touch $M0/dir/file
+TEST ln -s file $M0/dir/symlinkfile
+TEST ls -lR $M0
+cleanup
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=000000
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=000000
diff --git a/tests/bugs/trace/bug-797171.t b/tests/bugs/trace/bug-797171.t
new file mode 100755
index 00000000000..b823e477229
--- /dev/null
+++ b/tests/bugs/trace/bug-797171.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/brick1;
+TEST $CLI volume set $V0 debug.trace marker;
+TEST $CLI volume set $V0 debug.log-history on
+
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 \
+$M0;
+
+touch $M0/{1..22};
+rm -f $M0/*;
+
+pid_file=$(ls $GLUSTERD_PIDFILEDIR/vols/$V0/);
+brick_pid=$(cat $GLUSTERD_PIDFILEDIR/vols/$V0/$pid_file);
+
+mkdir $statedumpdir/statedump_tmp/;
+echo "path=$statedumpdir/statedump_tmp" > $statedumpdir/glusterdump.options;
+echo "all=yes" >> $statedumpdir/glusterdump.options;
+
+TEST $CLI volume statedump $V0 history;
+
+file_name=$(ls $statedumpdir/statedump_tmp);
+TEST grep "xlator.debug.trace.history" $statedumpdir/statedump_tmp/$file_name;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+rm -rf $statedumpdir/statedump_tmp;
+rm -f $statedumpdir/glusterdump.options;
+
+cleanup;
diff --git a/tests/bugs/transport/bug-873367.t b/tests/bugs/transport/bug-873367.t
new file mode 100755
index 00000000000..8070bc1b83c
--- /dev/null
+++ b/tests/bugs/transport/bug-873367.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+SSL_BASE=/etc/ssl
+SSL_KEY=$SSL_BASE/glusterfs.key
+SSL_CERT=$SSL_BASE/glusterfs.pem
+SSL_CA=$SSL_BASE/glusterfs.ca
+
+cleanup;
+rm -f $SSL_BASE/glusterfs.*
+mkdir -p $B0/1
+mkdir -p $M0
+
+TEST openssl genrsa -out $SSL_KEY 2048
+TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+ln $SSL_CERT $SSL_CA
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/1
+TEST $CLI volume set $V0 server.ssl on
+TEST $CLI volume set $V0 client.ssl on
+TEST $CLI volume set $V0 ssl.certificate-depth 6
+TEST $CLI volume set $V0 ssl.cipher-list HIGH
+TEST $CLI volume set $V0 auth.ssl-allow Anyone
+TEST $CLI volume start $V0
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+echo some_data > $M0/data_file
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# If the bug is not fixed, the next mount will fail.
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+EXPECT some_data cat $M0/data_file
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/bugs/unclassified/bug-1034085.t b/tests/bugs/unclassified/bug-1034085.t
new file mode 100644
index 00000000000..aacaa24d642
--- /dev/null
+++ b/tests/bugs/unclassified/bug-1034085.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+#Test case: Check the creation of indices/xattrop dir as soon as brick comes up.
+
+. $(dirname $0)/../../include.rc
+
+cleanup;
+
+#Create a volume
+TEST glusterd;
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}-{0,1};
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+TEST mkdir -p $B0/${V0}-0/.glusterfs/indices/
+TEST touch $B0/${V0}-0/.glusterfs/indices/xattrop
+
+#Volume start should not work when xattrop dir not created
+TEST ! $CLI volume start $V0;
+
+TEST rm $B0/${V0}-0/.glusterfs/indices/xattrop
+
+#Volume start should work now
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+#Check for the existence of indices/xattrop dir
+TEST [ -d $B0/${V0}-0/.glusterfs/indices/xattrop/ ];
+
+cleanup;
diff --git a/tests/bugs/unclassified/bug-1357397.t b/tests/bugs/unclassified/bug-1357397.t
new file mode 100644
index 00000000000..e2ec6f4d253
--- /dev/null
+++ b/tests/bugs/unclassified/bug-1357397.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2}
+
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 features.trash on
+TEST $CLI volume set $V0 features.trash-internal-op on
+
+TEST [ -e $B0/${V0}1/.trashcan ]
+
+TEST [ -e $B0/${V0}1/.trashcan/internal_op ]
+
+TEST $CLI volume stop $V0
+
+rm -rf $B0/${V0}1/.trashcan/internal_op
+
+TEST [ ! -e $B0/${V0}1/.trashcan/internal_op ]
+
+TEST $CLI volume start $V0 force
+
+TEST [ -e $B0/${V0}1/.trashcan/internal_op ]
+
+cleanup
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1385758
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1385758
diff --git a/tests/bugs/unclassified/bug-874498.t b/tests/bugs/unclassified/bug-874498.t
new file mode 100644
index 00000000000..2aa9b168a8a
--- /dev/null
+++ b/tests/bugs/unclassified/bug-874498.t
@@ -0,0 +1,64 @@
+#!/bin/bash
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick1 $H0:$B0/brick2;
+TEST $CLI volume start $V0;
+
+
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0;
+B0_hiphenated=`echo $B0 | tr '/' '-'`
+kill_brick $V0 $H0 $B0/brick1
+
+echo "GLUSTER FILE SYSTEM" > $M0/FILE1
+echo "GLUSTER FILE SYSTEM" > $M0/FILE2
+
+FILEN=$B0"/brick2"
+XATTROP=$FILEN/.glusterfs/indices/xattrop
+
+function get_gfid()
+{
+path_of_file=$1
+
+gfid_value=`getfattr -d -m . $path_of_file -e hex 2>/dev/null | grep trusted.gfid | grep -v gfid2path | cut --complement -c -15 | sed 's/\([a-f0-9]\{8\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)/\1-\2-\3-\4-/'`
+
+echo $gfid_value
+}
+
+GFID_ROOT=`get_gfid $B0/brick2`
+GFID_FILE1=`get_gfid $B0/brick2/FILE1`
+GFID_FILE2=`get_gfid $B0/brick2/FILE2`
+
+
+count=0
+for i in `ls $XATTROP`
+do
+ if [ "$i" == "$GFID_ROOT" ] || [ "$i" == "$GFID_FILE1" ] || [ "$i" == "$GFID_FILE2" ]
+ then
+ count=$(( count + 1 ))
+ fi
+done
+
+EXPECT "3" echo $count
+
+
+TEST $CLI volume start $V0 force
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+
+##Expected number of entries are 0 in the .glusterfs/indices/xattrop directory
+EXPECT_WITHIN $HEAL_TIMEOUT '0' count_sh_entries $FILEN;
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/unclassified/bug-991622.t b/tests/bugs/unclassified/bug-991622.t
new file mode 100644
index 00000000000..17b37a7767d
--- /dev/null
+++ b/tests/bugs/unclassified/bug-991622.t
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+#This tests that no fd leaks are observed in unlink/rename in open-behind
+function leaked_fds {
+ ls -l /proc/$(get_brick_pid $V0 $H0 $B0/$V0)/fd | grep deleted
+}
+
+cleanup;
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume set $V0 open-behind on
+TEST $CLI volume start $V0
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id=$V0 $M0 --direct-io-mode=enable
+
+TEST fd1=`fd_available`
+TEST fd_open $fd1 'w' "$M0/testfile1"
+TEST fd_write $fd1 "content"
+
+TEST fd2=`fd_available`
+TEST fd_open $fd2 'w' "$M0/testfile2"
+TEST fd_write $fd2 "content"
+
+TEST touch $M0/a
+TEST rm $M0/testfile1
+TEST mv $M0/a $M0/testfile2
+TEST fd_close $fd1
+TEST fd_close $fd2
+TEST ! leaked_fds
+cleanup;
diff --git a/tests/bugs/upcall/bug-1227204.t b/tests/bugs/upcall/bug-1227204.t
new file mode 100755
index 00000000000..fc393b1837f
--- /dev/null
+++ b/tests/bugs/upcall/bug-1227204.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+# This regression test tries to ensure that quota limit-usage set work with
+# features.cache-invalidation on.
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4,5,6};
+TEST $CLI volume start $V0;
+
+TEST $CLI volume set $V0 features.cache-invalidation on;
+TEST $CLI volume quota $V0 enable;
+
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+TEST mkdir -p $M0/1/2;
+TEST $CLI volume quota $V0 limit-usage /1/2 100MB 70%;
+
+TEST $CLI volume status $V0
+TEST $CLI volume stop $V0
+
+cleanup;
diff --git a/tests/bugs/upcall/bug-1369430.t b/tests/bugs/upcall/bug-1369430.t
new file mode 100755
index 00000000000..f53c17a1495
--- /dev/null
+++ b/tests/bugs/upcall/bug-1369430.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## 1. Start glusterd
+TEST glusterd;
+
+## 2. Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+## 3. Start the volume
+TEST $CLI volume start $V0
+
+## 4. Enable the upcall xlator, and increase the md-cache timeout to max
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+TEST $CLI volume set $V0 performance.cache-invalidation on
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.cache-samba-metadata on
+
+## 8. Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+## 10. Create directory and files from the M0
+TEST mkdir $M0/dir1
+TEST touch $M0/dir1/file{1..5}
+
+## 12. Access the files via readdirp, from M1
+TEST ls -l $M1/dir1/
+
+# Change the stat of one of the files from M0 and wait for it to
+# invalidate the md-cache of another mount M0
+echo "hello" > $M0/dir1/file2
+sleep 2;
+
+## 13. Expct non zero size when stat from M1
+EXPECT_NOT "0" stat -c %s $M1/dir1/file2
+
+cleanup;
diff --git a/tests/bugs/upcall/bug-1394131.t b/tests/bugs/upcall/bug-1394131.t
new file mode 100755
index 00000000000..b371ce4e682
--- /dev/null
+++ b/tests/bugs/upcall/bug-1394131.t
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## 1. Start glusterd
+TEST glusterd;
+
+## 2. Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+## 3. Enable the upcall xlator, and increase the md-cache timeout to max
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+TEST $CLI volume set $V0 indexing on
+
+## 6. Start the volume
+TEST $CLI volume start $V0
+
+## 7. Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+## 8. Create directory and files from the M0
+TEST touch $M0/file1
+TEST mv $M0/file1 $M0/file2
+
+cleanup;
diff --git a/tests/bugs/upcall/bug-1422776.t b/tests/bugs/upcall/bug-1422776.t
new file mode 100755
index 00000000000..cb249ce1cd2
--- /dev/null
+++ b/tests/bugs/upcall/bug-1422776.t
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start glusterd
+TEST glusterd;
+
+## Lets create volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3};
+
+## Enable the upcall xlator, and increase the md-cache timeout to max
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+TEST $CLI volume set $V0 indexing on
+
+## Start the volume
+TEST $CLI volume start $V0
+TEST $CLI volume quota $V0 enable
+
+## Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+## Create directory and files from the M0
+TEST touch $M0/file1
+TEST mv $M0/file1 $M0/file2
+
+cleanup;
diff --git a/tests/bugs/upcall/bug-1458127.t b/tests/bugs/upcall/bug-1458127.t
new file mode 100755
index 00000000000..e844f37f1d3
--- /dev/null
+++ b/tests/bugs/upcall/bug-1458127.t
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0}
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume set $V0 performance.nl-cache on
+TEST $CLI volume set $V0 nl-cache-positive-entry on
+TEST $CLI volume set $V0 nl-cache-timeout 2
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 2
+TEST $CLI volume set $V0 md-cache-timeout 20
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+TEST mkdir $M0/dir
+TEST touch $M0/dir/xyz
+#Wait until upcall clears the fact that M0 had accessed dir
+sleep 4
+TEST mv $M0/dir/xyz $M0/dir/xyz1
+TEST ! ls $M0/dir/file1
+TEST touch $M1/dir/file1
+TEST ls $M0/dir/file1
+TEST ls $M0/dir/file1
+
+cleanup;
diff --git a/tests/bugs/upcall/bug-upcall-stat.t b/tests/bugs/upcall/bug-upcall-stat.t
new file mode 100755
index 00000000000..0ba944ec441
--- /dev/null
+++ b/tests/bugs/upcall/bug-upcall-stat.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+TEST glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1};
+
+TEST $CLI volume set $V0 features.cache-invalidation on
+TEST $CLI volume set $V0 features.cache-invalidation-timeout 600
+TEST $CLI volume set $V0 performance.cache-invalidation on
+TEST $CLI volume set $V0 performance.md-cache-timeout 600
+TEST $CLI volume set $V0 performance.cache-samba-metadata on
+
+#TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+
+## 5. Create two gluster mounts
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M0
+TEST glusterfs --volfile-id=/$V0 --volfile-server=$H0 $M1
+
+TEST $CLI volume profile $V0 start
+
+## 8. Create a file
+TEST touch $M0/file1
+
+TEST "setfattr -n user.DOSATTRIB -v "abc" $M0/file1"
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q abc"
+TEST "setfattr -n user.DOSATTRIB -v "xyz" $M0/file1"
+sleep 2; #There can be a very very small window where the next getxattr
+ #reaches md-cache, before the cache-invalidation caused by previous
+ #setxattr, reaches md-cache. Hence sleeping for 2 sec.
+ #Also it should not be > 600.
+TEST "getfattr -n user.DOSATTRIB $M1/file1 | grep -q xyz"
+
+$CLI volume profile $V0 info | grep -q CI_XATTR
+EXPECT '0' echo $?
diff --git a/tests/bugs/write-behind/bug-1058663.c b/tests/bugs/write-behind/bug-1058663.c
new file mode 100644
index 00000000000..aedf97d7487
--- /dev/null
+++ b/tests/bugs/write-behind/bug-1058663.c
@@ -0,0 +1,123 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <signal.h>
+
+#define FILE_SIZE 1048576
+
+/* number of tests to run */
+#define RUN_LOOP 1000
+
+/* number of SIGBUS before exiting */
+#define MAX_SIGBUS 1
+static int expect_sigbus;
+static int sigbus_received;
+
+/* test for truncate()/seek()/write()/mmap()
+ * There should ne no SIGBUS triggered.
+ */
+void
+seek_write(char *filename)
+{
+ int fd;
+ uint8_t *map;
+ int i;
+
+ fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0600);
+ lseek(fd, FILE_SIZE - 1, SEEK_SET);
+ write(fd, "\xff", 1);
+
+ map = mmap(NULL, FILE_SIZE, PROT_READ, MAP_PRIVATE, fd, 0);
+ for (i = 0; i < (FILE_SIZE - 1); i++) {
+ if (map[i] != 0) /* should never be true */
+ abort();
+ }
+ munmap(map, FILE_SIZE);
+
+ close(fd);
+}
+
+int
+read_after_eof(char *filename)
+{
+ int ret = 0;
+ int fd;
+ char *data;
+ uint8_t *map;
+
+ fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 0600);
+ lseek(fd, FILE_SIZE - 1, SEEK_SET);
+ write(fd, "\xff", 1);
+
+ /* trigger verify that reading after EOF fails */
+ ret = read(fd, data, FILE_SIZE / 2);
+ if (ret != 0)
+ return 1;
+
+ /* map an area of 1 byte after FILE_SIZE */
+ map = mmap(NULL, 1, PROT_READ, MAP_PRIVATE, fd, FILE_SIZE);
+ /* map[0] is an access after EOF, it should trigger SIGBUS */
+ if (map[0] != 0)
+ /* it is expected that we exit before we get here */
+ if (!sigbus_received)
+ return 1;
+ munmap(map, FILE_SIZE);
+
+ close(fd);
+
+ return ret;
+}
+
+/* signal handler for SIGBUS */
+void
+catch_sigbus(int signum)
+{
+ switch (signum) {
+#ifdef __NetBSD__
+ /* Depending on architecture, we can get SIGSEGV */
+ case SIGSEGV: /* FALLTHROUGH */
+#endif
+ case SIGBUS:
+ sigbus_received++;
+ if (!expect_sigbus)
+ exit(EXIT_FAILURE);
+ if (sigbus_received >= MAX_SIGBUS)
+ exit(EXIT_SUCCESS);
+ break;
+ default:
+ printf("Unexpected signal received: %d\n", signum);
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ int i = 0;
+
+ if (argc == 1) {
+ printf("Usage: %s <filename>\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+#ifdef __NetBSD__
+ /* Depending on architecture, we can get SIGSEGV */
+ signal(SIGSEGV, catch_sigbus);
+#endif
+ signal(SIGBUS, catch_sigbus);
+
+ /* the next test should not trigger SIGBUS */
+ expect_sigbus = 0;
+ for (i = 0; i < RUN_LOOP; i++) {
+ seek_write(argv[1]);
+ }
+
+ /* the next test should trigger SIGBUS */
+ expect_sigbus = 1;
+ if (read_after_eof(argv[1]))
+ return EXIT_FAILURE;
+
+ return EXIT_SUCCESS;
+}
diff --git a/tests/bugs/write-behind/bug-1058663.t b/tests/bugs/write-behind/bug-1058663.t
new file mode 100644
index 00000000000..a900a6d7afa
--- /dev/null
+++ b/tests/bugs/write-behind/bug-1058663.t
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/$V0;
+TEST $CLI volume start $V0;
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+# compile the test program and run it
+TEST $CC $(dirname $0)/bug-1058663.c -o $(dirname $0)/bug-1058663;
+TEST $(dirname $0)/bug-1058663 $M0/bug-1058663.bin;
+TEST rm -f $(dirname $0)/M0/bug-1058663.bin;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/bugs/write-behind/bug-1279730.c b/tests/bugs/write-behind/bug-1279730.c
new file mode 100644
index 00000000000..706ae67b102
--- /dev/null
+++ b/tests/bugs/write-behind/bug-1279730.c
@@ -0,0 +1,149 @@
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <assert.h>
+
+int
+main(int argc, char *argv[])
+{
+ int fd = -1, ret = -1, len = 0;
+ char *path = NULL,
+ buf[128] =
+ {
+ 0,
+ },
+ *cmd = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ int write_to_child[2] =
+ {
+ 0,
+ },
+ write_to_parent[2] = {
+ 0,
+ };
+
+ path = argv[1];
+ cmd = argv[2];
+
+ assert(argc == 3);
+
+ ret = pipe(write_to_child);
+ if (ret < 0) {
+ fprintf(stderr,
+ "creation of write-to-child pipe failed "
+ "(%s)\n",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = pipe(write_to_parent);
+ if (ret < 0) {
+ fprintf(stderr,
+ "creation of write-to-parent pipe failed "
+ "(%s)\n",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = fork();
+ switch (ret) {
+ case 0:
+ close(write_to_child[1]);
+ close(write_to_parent[0]);
+
+ /* child, wait for instructions to execute command */
+ ret = read(write_to_child[0], buf, 128);
+ if (ret < 0) {
+ fprintf(stderr, "child: read on pipe failed (%s)\n",
+ strerror(errno));
+ goto out;
+ }
+
+ system(cmd);
+
+ ret = write(write_to_parent[1], "1", 2);
+ if (ret < 0) {
+ fprintf(stderr, "child: write to pipe failed (%s)\n",
+ strerror(errno));
+ goto out;
+ }
+ break;
+
+ case -1:
+ fprintf(stderr, "fork failed (%s)\n", strerror(errno));
+ goto out;
+
+ default:
+ close(write_to_parent[1]);
+ close(write_to_child[0]);
+
+ fd = open(path, O_CREAT | O_RDWR | O_APPEND, S_IRWXU);
+ if (fd < 0) {
+ fprintf(stderr, "open failed (%s)\n", strerror(errno));
+ goto out;
+ }
+
+ len = strlen("test-content") + 1;
+ ret = write(fd, "test-content", len);
+
+ if (ret < len) {
+ fprintf(stderr, "write failed %d (%s)\n", ret, strerror(errno));
+ }
+
+ ret = pread(fd, buf, 128, 0);
+ if ((ret == len) && (strcmp(buf, "test-content") == 0)) {
+ fprintf(stderr,
+ "read should've failed as previous "
+ "write would've failed with EDQUOT, but its "
+ "successful");
+ ret = -1;
+ goto out;
+ }
+
+ ret = write(write_to_child[1], "1", 2);
+ if (ret < 0) {
+ fprintf(stderr, "parent: write to pipe failed (%s)\n",
+ strerror(errno));
+ goto out;
+ }
+
+ ret = read(write_to_parent[0], buf, 128);
+ if (ret < 0) {
+ fprintf(stderr, "parent: read from pipe failed (%s)\n",
+ strerror(errno));
+ goto out;
+ }
+
+ /* this will force a sync on cached-write and now that quota
+ limit is increased, sync will be successful. ignore return
+ value as fstat would fail with EDQUOT (picked up from
+ cached-write because of previous sync failure.
+ */
+ fstat(fd, &stbuf);
+
+ ret = pread(fd, buf, 128, 0);
+ if (ret != len) {
+ fprintf(stderr,
+ "post cmd read failed %d (data:%s) "
+ "(error:%s)\n",
+ ret, buf, strerror(errno));
+ goto out;
+ }
+
+ if (strcmp(buf, "test-content")) {
+ fprintf(stderr, "wrong data (%s)\n", buf);
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
diff --git a/tests/bugs/write-behind/bug-1279730.t b/tests/bugs/write-behind/bug-1279730.t
new file mode 100755
index 00000000000..20447c349d5
--- /dev/null
+++ b/tests/bugs/write-behind/bug-1279730.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+. $(dirname $0)/../../fileio.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/$V0;
+TEST $CLI volume start $V0;
+TEST $CLI volume quota $V0 enable
+TEST $CLI volume quota $V0 limit-usage / 4
+TEST $CLI volume quota $V0 hard-timeout 0
+TEST $CLI volume quota $V0 soft-timeout 0
+
+TEST glusterfs --entry-timeout=0 --attribute-timeout=0 -s $H0 --volfile-id $V0 $M0
+
+# compile the test program and run it
+TEST $CC -O0 -g3 $(dirname $0)/bug-1279730.c -o $(dirname $0)/bug-1279730
+
+TEST $(dirname $0)/bug-1279730 $M0/file "\"$CLI volume quota $V0 limit-usage / 1024\""
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+
+cleanup;
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1279730
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1279730
+
diff --git a/tests/bugs/write-behind/issue-884.c b/tests/bugs/write-behind/issue-884.c
new file mode 100644
index 00000000000..e9c33b351ad
--- /dev/null
+++ b/tests/bugs/write-behind/issue-884.c
@@ -0,0 +1,267 @@
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <time.h>
+#include <assert.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <pthread.h>
+
+#include <glusterfs/api/glfs.h>
+
+/* Based on a reproducer by Stefan Ring. It seems to be quite sensible to any
+ * timing modification, so the code has been maintained as is, only with minor
+ * changes. */
+
+struct glfs *glfs;
+
+pthread_mutex_t the_mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t the_cond = PTHREAD_COND_INITIALIZER;
+
+typedef struct _my_aiocb {
+ int64_t size;
+ volatile int64_t seq;
+ int which;
+} my_aiocb;
+
+typedef struct _worker_data {
+ my_aiocb cb;
+ struct iovec iov;
+ int64_t offset;
+} worker_data;
+
+typedef struct {
+ worker_data wdata[2];
+
+ volatile unsigned busy;
+} all_data_t;
+
+all_data_t all_data;
+
+static void
+completion_fnc(struct glfs_fd *fd, ssize_t ret, struct glfs_stat *pre,
+ struct glfs_stat *post, void *arg)
+{
+ void *the_thread;
+ my_aiocb *cb = (my_aiocb *)arg;
+ long seq = cb->seq;
+
+ assert(ret == cb->size);
+
+ pthread_mutex_lock(&the_mutex);
+ pthread_cond_broadcast(&the_cond);
+
+ all_data.busy &= ~(1 << cb->which);
+ cb->seq = -1;
+
+ the_thread = (void *)pthread_self();
+ printf("worker %d is done from thread %p, seq %ld!\n", cb->which,
+ the_thread, seq);
+
+ pthread_mutex_unlock(&the_mutex);
+}
+
+static void
+init_wdata(worker_data *data, int which)
+{
+ data->cb.which = which;
+ data->cb.seq = -1;
+
+ data->iov.iov_base = malloc(1024 * 1024);
+ memset(data->iov.iov_base, 6,
+ 1024 * 1024); /* tail part never overwritten */
+}
+
+static void
+init()
+{
+ all_data.busy = 0;
+
+ init_wdata(&all_data.wdata[0], 0);
+ init_wdata(&all_data.wdata[1], 1);
+}
+
+static void
+do_write(struct glfs_fd *fd, int content, int size, int64_t seq,
+ worker_data *wdata, const char *name)
+{
+ int ret;
+
+ wdata->cb.size = size;
+ wdata->cb.seq = seq;
+
+ if (content >= 0)
+ memset(wdata->iov.iov_base, content, size);
+ wdata->iov.iov_len = size;
+
+ pthread_mutex_lock(&the_mutex);
+ printf("(%d) dispatching write \"%s\", offset %lx, len %x, seq %ld\n",
+ wdata->cb.which, name, (long)wdata->offset, size, (long)seq);
+ pthread_mutex_unlock(&the_mutex);
+ ret = glfs_pwritev_async(fd, &wdata->iov, 1, wdata->offset, 0,
+ completion_fnc, &wdata->cb);
+ assert(ret >= 0);
+}
+
+#define IDLE 0 // both workers must be idle
+#define ANY 1 // use any worker, other one may be busy
+
+int
+get_worker(int waitfor, int64_t excl_seq)
+{
+ int which;
+
+ pthread_mutex_lock(&the_mutex);
+
+ while (waitfor == IDLE && (all_data.busy & 3) != 0 ||
+ waitfor == ANY &&
+ ((all_data.busy & 3) == 3 ||
+ excl_seq >= 0 && (all_data.wdata[0].cb.seq == excl_seq ||
+ all_data.wdata[1].cb.seq == excl_seq)))
+ pthread_cond_wait(&the_cond, &the_mutex);
+
+ if (!(all_data.busy & 1))
+ which = 0;
+ else
+ which = 1;
+
+ all_data.busy |= (1 << which);
+
+ pthread_mutex_unlock(&the_mutex);
+
+ return which;
+}
+
+static int
+doit(struct glfs_fd *fd)
+{
+ int ret;
+ int64_t seq = 0;
+ int64_t offset = 0; // position in file, in blocks
+ int64_t base = 0x1000; // where to place the data, in blocks
+
+ int async_mode = ANY;
+
+ init();
+
+ for (;;) {
+ int which;
+ worker_data *wdata;
+
+ // for growing to the first offset
+ for (;;) {
+ int gap = base + 0x42 - offset;
+ if (!gap)
+ break;
+ if (gap > 80)
+ gap = 80;
+
+ which = get_worker(IDLE, -1);
+ wdata = &all_data.wdata[which];
+
+ wdata->offset = offset << 9;
+ do_write(fd, 0, gap << 9, seq++, wdata, "gap-filling");
+
+ offset += gap;
+ }
+
+ // 8700
+ which = get_worker(IDLE, -1);
+ wdata = &all_data.wdata[which];
+
+ wdata->offset = (base + 0x42) << 9;
+ do_write(fd, 1, 62 << 9, seq++, wdata, "!8700");
+
+ // 8701
+ which = get_worker(IDLE, -1);
+ wdata = &all_data.wdata[which];
+
+ wdata->offset = (base + 0x42) << 9;
+ do_write(fd, 2, 55 << 9, seq++, wdata, "!8701");
+
+ // 8702
+ which = get_worker(async_mode, -1);
+ wdata = &all_data.wdata[which];
+
+ wdata->offset = (base + 0x79) << 9;
+ do_write(fd, 3, 54 << 9, seq++, wdata, "!8702");
+
+ // 8703
+ which = get_worker(async_mode, -1);
+ wdata = &all_data.wdata[which];
+
+ wdata->offset = (base + 0xaf) << 9;
+ do_write(fd, 4, 81 << 9, seq++, wdata, "!8703");
+
+ // 8704
+ // this writes both 5s and 6s
+ // the range of 5s is the one that overwrites 8703
+
+ which = get_worker(async_mode, seq - 1);
+ wdata = &all_data.wdata[which];
+
+ memset(wdata->iov.iov_base, 5, 81 << 9);
+ wdata->offset = (base + 0xaf) << 9;
+ do_write(fd, -1, 1623 << 9, seq++, wdata, "!8704");
+
+ offset = base + 0x706;
+ base += 0x1000;
+ if (base >= 0x100000)
+ break;
+ }
+
+ printf("done!\n");
+ fflush(stdout);
+
+ pthread_mutex_lock(&the_mutex);
+
+ while ((all_data.busy & 3) != 0)
+ pthread_cond_wait(&the_cond, &the_mutex);
+
+ pthread_mutex_unlock(&the_mutex);
+
+ ret = glfs_close(fd);
+ assert(ret >= 0);
+ /*
+ ret = glfs_fini(glfs);
+ assert(ret >= 0);
+ */
+ return 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret;
+ int open_flags = O_RDWR | O_DIRECT | O_TRUNC;
+ struct glfs_fd *fd;
+
+ glfs = glfs_new(argv[1]);
+ if (!glfs) {
+ printf("glfs_new!\n");
+ goto out;
+ }
+ ret = glfs_set_volfile_server(glfs, "tcp", "localhost", 24007);
+ if (ret < 0) {
+ printf("set_volfile!\n");
+ goto out;
+ }
+ ret = glfs_init(glfs);
+ if (ret) {
+ printf("init!\n");
+ goto out;
+ }
+ fd = glfs_open(glfs, argv[2], open_flags);
+ if (!fd) {
+ printf("open!\n");
+ goto out;
+ }
+ srand(time(NULL));
+ return doit(fd);
+out:
+ return 1;
+}
diff --git a/tests/bugs/write-behind/issue-884.t b/tests/bugs/write-behind/issue-884.t
new file mode 100755
index 00000000000..2bcf7d15265
--- /dev/null
+++ b/tests/bugs/write-behind/issue-884.t
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+. $(dirname $0)/../../include.rc
+. $(dirname $0)/../../volume.rc
+
+# This test tries to detect a race condition in write-behind. It's based on a
+# reproducer written by Stefan Ring that is able to hit it sometimes. On my
+# system, it happened around 10% of the runs. This means that if this bug
+# appears again, this test will fail once every 10 runs. Most probably this
+# failure will be hidden by the automatic test retry of the testing framework.
+#
+# Please, if this test fails, it needs to be analyzed in detail.
+
+function run() {
+ "${@}" >/dev/null
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+# This makes it easier to hit the issue
+TEST $CLI volume set $V0 client-log-level TRACE
+TEST $CLI volume start $V0
+
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+build_tester $(dirname $0)/issue-884.c -lgfapi
+
+TEST touch $M0/testfile
+
+# This program generates a file of 535694336 bytes with a fixed pattern
+TEST run $(dirname $0)/issue-884 $V0 testfile
+
+# This is the md5sum of the expected pattern without corruption
+EXPECT "ad105f9349345a70fc697632cbb5eec8" echo "$(md5sum $B0/$V0/testfile | awk '{ print $1; }')"
+
+cleanup
diff --git a/tests/changelog.rc b/tests/changelog.rc
new file mode 100644
index 00000000000..ffad2e80632
--- /dev/null
+++ b/tests/changelog.rc
@@ -0,0 +1,9 @@
+
+function count_htime_files {
+ ls -l $B0/$V0"1"/.glusterfs/changelogs/htime/ | grep HTIME | wc -l
+}
+
+function count_changelog_files {
+ # Where $1 is the brick name passed
+ ls -l $1/.glusterfs/changelogs/ | grep CHANGELOG | wc -l
+}
diff --git a/tests/cleanup.sh b/tests/cleanup.sh
new file mode 100644
index 00000000000..29fd6929fac
--- /dev/null
+++ b/tests/cleanup.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+
+. $(dirname $0)/include.rc
+cleanup
diff --git a/tests/cluster.rc b/tests/cluster.rc
new file mode 100644
index 00000000000..34f5b02398f
--- /dev/null
+++ b/tests/cluster.rc
@@ -0,0 +1,218 @@
+#!/bin/bash
+
+CLUSTER_PFX="127.1.1"; # ".x" for each glusterd
+CLUSTER_COUNT=1; # Just initial definition
+
+function launch_cluster() {
+ local count=$1;
+
+ CLUSTER_COUNT=$count;
+
+ define_backends $count;
+ define_hosts $count;
+ define_glusterds $count $2;
+ define_clis $count $3;
+
+ start_glusterds;
+}
+
+
+function define_backends() {
+ local b;
+
+ for i in `seq 1 $count`; do
+ eval "B$i=$B0/$i";
+ done
+
+ for i in `seq 1 $count`; do
+ b="B$i";
+ mkdir -p ${!b}/glusterd;
+ mkdir -p ${!b}/run;
+ done
+}
+
+
+function define_glusterds() {
+ local count=$1;
+ local h;
+ local b;
+ local wopt;
+ local bopt;
+ local popt;
+
+ for i in `seq 1 $count`; do
+ b="B$i";
+ h="H$i";
+ wopt="management.working-directory=${!b}/glusterd";
+ ropt="management.run-directory=${!b}/run/gluster";
+ bopt="management.transport.socket.bind-address=${!h}";
+ popt="--pid-file=${!b}/glusterd.pid";
+ sopt="management.glusterd-sockfile=${!b}/glusterd/gd.sock"
+ #Get the logdir
+ logdir=`gluster --print-logdir`
+ clopt="management.cluster-test-mode=${logdir}/$i";
+ #Fetch the testcases name and prefix the glusterd log with it
+ logfile=`echo ${0##*/}`_glusterd$i.log
+ lopt="--log-file=$logdir/$i/$logfile"
+ if [ "$2" == "-LDEBUG" ]; then
+ eval "glusterd_$i='glusterd -LDEBUG --xlator-option $wopt --xlator-option $bopt --xlator-option $ropt --xlator-option $sopt --xlator-option $clopt $lopt $popt'";
+ eval "glusterd$i='glusterd -LDEBUG --xlator-option $wopt --xlator-option $bopt --xlator-option $ropt --xlator-option $sopt --xlator-option $clopt $lopt $popt'";
+ else
+ eval "glusterd_$i='glusterd --xlator-option $wopt --xlator-option $bopt --xlator-option $ropt --xlator-option $sopt --xlator-option $clopt $lopt $popt'";
+ eval "glusterd$i='glusterd --xlator-option $wopt --xlator-option $bopt --xlator-option $ropt --xlator-option $sopt --xlator-option $clopt $lopt $popt'";
+ fi
+ done
+}
+
+function start_glusterd() {
+ local g
+ local index=$1
+
+ g="glusterd_${index}"
+ ${!g}
+}
+
+function start_glusterds() {
+ for i in `seq 1 $CLUSTER_COUNT`; do
+ start_glusterd $i
+ done
+}
+
+
+function kill_glusterd() {
+ local index=$1;
+ local b;
+ local pidfile;
+
+ b="B$index";
+ pidfile="${!b}/glusterd.pid";
+
+ kill `cat $pidfile`;
+}
+
+function restart_glusterd() {
+ local index=$1
+ local b
+ local pidfile
+ local g
+
+ b="B$index"
+ pidfile="${!b}/glusterd.pid"
+
+ kill `cat $pidfile`
+
+ g="glusterd_${index}"
+ ${!g}
+}
+
+function kill_node() {
+ local index=$1;
+ local h;
+
+ h="H$index";
+
+ terminate_pids $(ps -ef | grep gluster | grep ${!h} | awk '{print $2}')
+ find $B0/$index/glusterd/vols -name '*.pid' | xargs rm -f
+}
+
+
+function define_hosts() {
+ local count=$1;
+
+ for i in `seq 1 $count`; do
+ eval "H_$i=${CLUSTER_PFX}.$i"
+ eval "H$i=${CLUSTER_PFX}.$i";
+ case $OSTYPE in
+ NetBSD)
+ ifconfig lo0 alias ${CLUSTER_PFX}.$i 2>/dev/null
+ ;;
+ *)
+ ;;
+ esac
+ done
+}
+
+
+function define_clis() {
+ local count=$1;
+ local h;
+
+ for i in `seq 1 $count`; do
+ b="B$i";
+ #get the logdir
+ logdir=`gluster --print-logdir`
+ #Fetch the testcases name and prefix the cli log with it
+ logfile=`echo ${0##*/}`_cli$i.log
+ lopt="--log-file=$logdir/$logfile"
+ logfile1=`echo ${0##*/}`_cli_$i.log
+ lopt1="--log-file=$logdir/$logfile1"
+
+
+ if [ "$2" == "-NO_FORCE" ]; then
+ eval "CLI_$i='$CLI_NO_FORCE --glusterd-sock=${!b}/glusterd/gd.sock $lopt'";
+ eval "CLI$i='$CLI_NO_FORCE --glusterd-sock=${!b}/glusterd/gd.sock $lopt1'";
+ else
+ eval "CLI_$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt'";
+ eval "CLI$i='$CLI --glusterd-sock=${!b}/glusterd/gd.sock $lopt1'";
+ fi
+ done
+}
+
+function peer_count() {
+ $CLI_1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+function attempt_replace_brick {
+ local cli_no=$1
+ local vol=$2;
+ local src_brick=$3;
+ local dst_brick=$4;
+
+ eval \$CLI_$cli_no volume replace-brick $vol $src_brick $dst_brick commit force;
+ echo $?
+}
+
+function cluster_rebalance_status_field {
+ #The rebalance status can be up to 3 words, (e.g.:'fix-layout in progress'), hence the awk-print $7 thru $9.
+ #But if the status is less than 3 words, it also prints the next field i.e the run_time_in_secs.(e.g.:'completed 3.00').
+ #So we trim the numbers out with `tr`. Finally remove the trailing white spaces with sed. What we get is one of the
+ #strings in the 'cli_vol_task_status_str' char array of cli-rpc-ops.c
+
+ eval \$CLI_$1 volume rebalance $2 status | awk '{print $7,$8,$9}' |sed -n 3p |tr -d '[^0-9+\.]'|sed 's/ *$//g'
+}
+
+function cluster_volinfo_field()
+{
+ local vol=$2;
+ local field=$3;
+ eval \$CLI_$1 volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+function volinfo_field_1()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI_1 volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+function volinfo_field_2()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI_2 volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+function cluster_brick_up_status {
+ local vol=$2
+ local host=$3
+ local brick=$4
+ eval \$CLI_$1 volume status $vol $host:$brick --xml | sed -ne 's/.*<status>\([01]\)<\/status>/\1/p'
+}
+
+function cluster_remove_brick_status_completed_field {
+ local vol=$1
+ local brick_list=$2
+ $CLI_1 volume remove-brick $vol $brick_list status | awk '{print $7}' | sed -n 3p
+}
diff --git a/tests/common-utils.rc b/tests/common-utils.rc
new file mode 100644
index 00000000000..2be4076e8b6
--- /dev/null
+++ b/tests/common-utils.rc
@@ -0,0 +1,7 @@
+
+function run_cmd_as_user {
+ local user=$1
+ shift
+ su -m $user -c "$*" || return 1
+ return 0
+}
diff --git a/tests/configfiles/bad_exports b/tests/configfiles/bad_exports
new file mode 100644
index 00000000000..6fd18d9415a
--- /dev/null
+++ b/tests/configfiles/bad_exports
@@ -0,0 +1,9 @@
+#$Id$
+#/0838586658093758013308385866580937580133083858665809375801330838586658093758013308385866580937580133083858665809375801330838586658093758013308385866580937580133 @test(sec=sys,rw,anonuid=0) 10.35.11.32(sec=sys,rw,anonuid=0)
+
+/test @test(sec=sys,rw,anonuid=0) shreyas.facebook.com(sec=sys,rw,anonuid=0) shreyas.s(sec=sys,rw,anonuid=595)
+çççßåß僃
+/asdf @ObVyg571RJaorkGbgVerI9esrck8yiVD7NVqqJvj2H9DuPH3SzHnYLIXjd4zZjuuh2N0O0bYYQf7VYNrYHoxc1llgRU1iEsQRy2XaWnUlhTHKVEL8tt1TrbZCi8qXyg0l058rTnW4msvU5hW83ESwyOE4bBSz4VsW0sJaVd8Gv4waZqojemLN8AIlAoChVOfP1yhuAP1298ejkaf2fjhdfa4t4effhgadff342fdddgasdg42gahgdmnui24290hfjdjadgdkjhg2nvncms(sec=sys,rw,anonuid=1)
+#/vol/root -sec=sys,rw=@storage.prn1:@storage.ash4:@storage.frc1,anon=0
+#/vol/home107 -sec=sys,rw,nosuid,root=@storage.prn1:@storage.ash4:@storage.frc1:@hr.ash3:@hr.prn1:ldap001.prn1.facebook.com:ldap001.frc1.facebook.com
+#/vol/home109 -sec=sys,rw,nosuid,root=@storage.prn1:@storage.ash4:@storage.frc1:@hr.ash3:@hr.prn1:ldap001.prn1.facebook.com:ldap001.frc1.facebook.com
diff --git a/tests/configfiles/bad_netgroups b/tests/configfiles/bad_netgroups
new file mode 100644
index 00000000000..ea27edfef10
--- /dev/null
+++ b/tests/configfiles/bad_netgroups
@@ -0,0 +1,5 @@
+asdf ng1
+ng1 ng2
+ng2 (dev1763.prn2.facebook.com, ,)
+
+emptyng
diff --git a/tests/configfiles/big_exports b/tests/configfiles/big_exports
new file mode 100644
index 00000000000..9ca5d655664
--- /dev/null
+++ b/tests/configfiles/big_exports
@@ -0,0 +1,10 @@
+/75213U8JV58PBY7F0VFGJ080MH3K71 @ZXV3UE7WJSCZSPMPAYUBACCZUOD0XY(sec=sys,rw,anonuid=0) 9PAC2KCTKRIH62CPGAMAUAJGLVQNS3(sec=sys,rw,anonuid=0)
+/O4DYT8D6QVS9EKEHTYOPTYL6IWU4DN @KLBH3LB3UN5LWDWPPQEQWEHYVL3K0A(sec=sys,rw,anonuid=0) B37PXMCQMY5IQPDGV08XC7ITYT650V(sec=sys,rw,anonuid=0)
+/OFHJLTKZMDAN28Q9IQQQIPFUEZ2YAN @UY3K3B8C05OQ4OTX42VXQKJ2CGJ8QX(sec=sys,rw,anonuid=0) AM0ET70HT6YND7D8RKG446LEOW40EC(sec=sys,rw,anonuid=0)
+/3VDZ2JHFQ2JGF2GQGYQH38UPAW6A6T @DEPUVDYZOJFCSQ7KD07NVPAFGEG7YJ(sec=sys,rw,anonuid=0) 5HI538NCEYF7KY7HC1F69UBWFVTIGA(sec=sys,rw,anonuid=0)
+/4ZI3ZRJUNQM21ZM8VB891X4ZCUHK7E @7U8TNSZ55AWJAOPAIV67OGPWLGM4JV(sec=sys,rw,anonuid=0) 8698JR9V4KKENE7UGYHV3T4XG9K0NH(sec=sys,rw,anonuid=0)
+/A4CSZ2FQ3VYPT9R0HYN3QVQ7TK9IHI @G2Z45H649YZ9WNC3OSU7STCLT3VWT9(sec=sys,rw,anonuid=0) 65CA94Z7JXZ0F0JB5EP95I6FBJT673(sec=sys,rw,anonuid=0)
+/G91PI0EX5TUYSX91IAH49M1GEMNKSP @O5IFIYJUENNNK16U0FK0QCDE0DK9G2(sec=sys,rw,anonuid=0) A8AZTTWC7BMTV8YW8XE4R57WUOSUMZ(sec=sys,rw,anonuid=0)
+/YCZFA0ALYC284R60E7QXQN7AVSILFO @7OGJV2J1NOII7UOGN12SUNRW3XBWWG(sec=sys,rw,anonuid=0) HDCDTY43SXOAH1TNUKB23MO9DE574W(sec=sys,rw,anonuid=0)
+/VBGB57O8R87B9N4E8QPGU6D55DVZE5 @F95KY58VAUOUX30QKIN16U987UU9BE(sec=sys,rw,anonuid=0) WGSH35L15FT2IC0IT9PTCU8SCYW9W4(sec=sys,rw,anonuid=0)
+/NTHST2FDSP35BKEEIOQIQX38722AN0 @T9BXSDXF2N5HVOM8P1BN0Q5IQ6RC34(sec=sys,rw,anonuid=0) OLJR1KXJRY14UEZNV1LP7RV68KPIW7(sec=sys,rw,anonuid=0)
diff --git a/tests/configfiles/exports b/tests/configfiles/exports
new file mode 100644
index 00000000000..82ba450403e
--- /dev/null
+++ b/tests/configfiles/exports
@@ -0,0 +1 @@
+/test @test(sec=sys,rw,anonuid=0) 10.35.11.31(sec=sys,rw,anonuid=0)
diff --git a/tests/configfiles/exports-v6 b/tests/configfiles/exports-v6
new file mode 100644
index 00000000000..426b1ef5705
--- /dev/null
+++ b/tests/configfiles/exports-v6
@@ -0,0 +1 @@
+/test @test(rw,anonuid=0,sec=sys,) 2401:db00:11:1:face:0:3d:0(rw,anonuid=0,sec=sys,)
diff --git a/tests/configfiles/exports_bad_opt b/tests/configfiles/exports_bad_opt
new file mode 100644
index 00000000000..70a56890e4c
--- /dev/null
+++ b/tests/configfiles/exports_bad_opt
@@ -0,0 +1 @@
+/groot asdf(r) @ngtop(r)
diff --git a/tests/configfiles/netgroups b/tests/configfiles/netgroups
new file mode 100644
index 00000000000..f1f5fcdc145
--- /dev/null
+++ b/tests/configfiles/netgroups
@@ -0,0 +1,4 @@
+asdf ng1
+ng1 ng2
+ng2 (dev1763.prn2.example.com,,)
+ng3 (dev-1763.prn-2.example.com,,)
diff --git a/tests/dht.rc b/tests/dht.rc
new file mode 100644
index 00000000000..6918ebde04b
--- /dev/null
+++ b/tests/dht.rc
@@ -0,0 +1,174 @@
+#!/bin/bash
+
+dhthashdebugxattr="dht.file.hashed-subvol."
+
+function get_layout()
+{
+ getfattr -n trusted.glusterfs.dht -e hex $1 2>&1 | grep dht | cut -d = -f2
+}
+
+## populates $BRICK1 and $BRICK2 with hashed/cached subvolume. These will be
+## used by get_cached_brick and get_hashed_brick
+
+function file_has_linkfile()
+{
+ k=0
+ l=0
+ while [ $k -lt $BRICK_COUNT ]
+ do
+ stat=$(stat $B0/${V0}$k/$1 2>/dev/null)
+ if [ $? -eq 0 ]
+ then
+ let l++
+ let "BRICK${l}=$k"
+
+ fi
+ let k++
+ done
+ return $l
+}
+
+function get_cached_brick()
+{
+ i=1
+ brick=$BRICK1
+ while [ $i -lt 3 ]
+ do
+ test=$(getfattr -n trusted.glusterfs.dht.linkto -e text $B0/${V0}$brick/$1 2>&1)
+ if [ $? -eq 1 ]
+ then
+ cached=$brick
+ i=$(( $i+3 ))
+ fi
+ brick=$BRICK1
+ let i++
+ done
+
+ return $cached
+}
+
+function get_hashed_brick()
+{
+ j=1
+ brick=$BRICK1
+ while [ $j -lt 3 ]
+ do
+ test=$(getfattr -n trusted.glusterfs.dht.linkto -e text $B0/${V0}$brick/$1 2>&1)
+ if [ $? -eq 0 ]
+ then
+ hashed=$brick
+ j=$(( $j+3 ))
+ fi
+ brick=$BRICK2
+ let j++
+ done
+
+ return $hashed
+}
+
+
+function cluster_rebalance_completed()
+{
+ val=1
+
+ # Rebalance status will be either "failed" or "completed"
+
+ test=$($CLI_1 volume rebalance $V0 status | grep "in progress" 2>&1)
+ if [ $? -ne 0 ]
+ then
+ val=0
+ fi
+
+ echo $val
+ # Do not *return* the value here. If it's non-zero, that will cause
+ # EXPECT_WITHIN (e.g. in bug-884455.t) to return prematurely, leading to
+ # a spurious test failure. Nothing else checks the return value anyway
+ # (they all check the output) so there's no need for it to be non-zero
+ # just because grep didn't find what we want.
+}
+
+function rebalance_completed()
+{
+ val=1
+ test=$($CLI volume rebalance $V0 status | grep localhost | grep "completed" 2>&1)
+ if [ $? -eq 0 ]
+ then
+ val=0
+ fi
+
+ echo $val
+ # Do not *return* the value here. If it's non-zero, that will cause
+ # EXPECT_WITHIN (e.g. in bug-884455.t) to return prematurely, leading to
+ # a spurious test failure. Nothing else checks the return value anyway
+ # (they all check the output) so there's no need for it to be non-zero
+ # just because grep didn't find what we want.
+}
+
+function remove_brick_completed()
+{
+ val=1
+ test=$(gluster volume remove-brick $V0 $H0:$B0/${V0}2 status | grep localhost | grep "completed" 2>&1)
+ if [ $? -eq 0 ]
+ then
+ val=0
+ fi
+
+ echo $val
+}
+
+function dht_get_linkto_target()
+{
+ local path=$1;
+ echo $(getfattr -e text --only-values --absolute-names -n trusted.glusterfs.dht.linkto $path)
+}
+
+function is_dht_linkfile()
+{
+ local path=$1
+ retval=0
+ local output=$(stat -c %a $path)
+ if [ $output -eq 1000 ]; then
+ retval=1
+ fi
+
+ echo $retval
+ return $retval
+}
+
+
+# Given an existing directory on the volume, get the hashed subvol for a file
+# in that directory
+# Input: filename dirpath_on_mount
+
+function dht_get_hash_subvol()
+{
+ local hashed_subvol
+ hashed_subvol=$(getfattr --only-values -n "$dhthashdebugxattr$1" $2 2>/dev/null)
+ echo $hashed_subvol
+}
+
+
+# Find the first filename that hashes to the same subvol
+# as $1
+# Input: subvol_name dirpath_on_mount file_pattern
+
+function dht_first_filename_with_hashsubvol()
+{
+ local in_subvol=$1
+ local in_path=$2
+ local in_hash_subvol
+ local file_pattern=$3
+ local filename
+
+ for i in {1..50}
+ do
+ filename="$file_pattern$i"
+ in_hash_subvol=$(dht_get_hash_subvol "$filename" "$in_path")
+ # echo $in_hash_subvol
+ if [ "$in_subvol" == "$in_hash_subvol" ]; then
+ fn_return_val=$filename
+ return 0
+ fi
+ done
+ return 1
+}
diff --git a/tests/ec.rc b/tests/ec.rc
new file mode 100644
index 00000000000..f18752fc99a
--- /dev/null
+++ b/tests/ec.rc
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+function ec_up_status()
+{
+ local v=$1
+ local m=$2
+ local ec_id=$3
+ grep -E "^up =" $m/.meta/graphs/active/${v}-disperse-${ec_id}/private | cut -f2 -d'='
+}
+
+function ec_option_value()
+{
+ local v=$1
+ local m=$2
+ local ec_id=$3
+ local opt=$4
+ grep -E "^$opt =" $m/.meta/graphs/active/${v}-disperse-${ec_id}/private | cut -f2 -d'='| awk '{print $1}'
+}
diff --git a/tests/env.rc.in b/tests/env.rc.in
new file mode 100644
index 00000000000..0478d66aec6
--- /dev/null
+++ b/tests/env.rc.in
@@ -0,0 +1,42 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+
+PATH=@bindir@:@sbindir@:$PATH
+export PATH
+
+GLUSTERD_PIDFILEDIR=@localstatedir@/run/gluster
+export GLUSTERD_PIDFILEDIR
+
+LD_LIBRARY_PATH=@libdir@:$LD_LIBRARY_PATH
+export LD_LIBRARY_PATH
+
+LIBRARY_PATH=@libdir@:$LIBRARY_PATH
+export LIBRARY_PATH
+
+CPATH=@includedir@:$CPATH
+export CPATH
+
+GLUSTERD_WORKDIR=@GLUSTERD_WORKDIR@
+export GLUSTERD_WORKDIR
+
+PKG_CONFIG_PATH=@pkgconfigdir@:$PKG_CONFIG_PATH
+export PKG_CONFIG_PATH
+
+PYTHONPATH=@BUILD_PYTHON_SITE_PACKAGES@:$PYTHON_PATH
+export PYTHONPATH
+
+PYTHON=@PYTHON@
+export PYTHON
+
+PYTHONPATH=@BUILD_PYTHON_SITE_PACKAGES@:$PYTHON_PATH
+export PYTHONPATH
+
+GLUSTER_CMD_DIR=@sbindir@
+export GLUSTER_CMD_DIR
+
+GLUSTER_LIBEXECDIR=@GLUSTERFS_LIBEXECDIR@
+export GLUSTER_LIBEXECDIR
+
+RUN_NFS_TESTS=@BUILD_GNFS@
+export RUN_NFS_TESTS
diff --git a/tests/experimental/.gitignore b/tests/experimental/.gitignore
new file mode 100644
index 00000000000..803908720af
--- /dev/null
+++ b/tests/experimental/.gitignore
@@ -0,0 +1,7 @@
+# Initial commit, git needs a file to add a directory
+# First tests to appear here, should remove this file
+# Directives:
+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore
diff --git a/tests/fallocate.rc b/tests/fallocate.rc
new file mode 100644
index 00000000000..d117ba9c051
--- /dev/null
+++ b/tests/fallocate.rc
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+# Helper to verify a given fallocate command is supported and skip a test
+# otherwise. Older versions of the fallocate utility might not support all modes
+# (i.e., discard) and older versions of fuse might not support the associated
+# fallocate requests.
+
+function require_fallocate()
+{
+ output=`fallocate $* 2>&1`
+ ret=$?
+ if [ ! $ret -eq 0 ] && ([[ $output == *unsupported* ]] ||
+ [[ $output == *invalid* ]] ||
+ [[ $output == *"not supported"* ]])
+ then
+ SKIP_TESTS
+ exit
+ fi
+}
diff --git a/tests/fdl.rc b/tests/fdl.rc
new file mode 100644
index 00000000000..df58305b923
--- /dev/null
+++ b/tests/fdl.rc
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+log_base=$($CLI --print-logdir)
+log_id=${B0}/${V0}-0
+log_id=${log_id:1} # Remove initial slash
+log_id=${log_id//\//-} # Replace remaining slashes with dashes
+FDL_META_FILE=${log_base}/${log_id}-meta-1.jnl
+FDL_DATA_FILE=${log_base}/${log_id}-data-1.jnl
+
+check_logfile() {
+ [ $(gf_logdump $FDL_META_FILE $FDL_DATA_FILE | grep $1 | wc -l) -ge $2 ]
+}
diff --git a/tests/features/delay-gen.t b/tests/features/delay-gen.t
new file mode 100755
index 00000000000..72e6dbb7697
--- /dev/null
+++ b/tests/features/delay-gen.t
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume set $V0 delay-gen posix
+TEST $CLI volume set $V0 delay-gen.delay-duration 1000000
+TEST $CLI volume set $V0 delay-gen.delay-percentage 100
+TEST $CLI volume set $V0 delay-gen.enable read,write
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+TEST $CLI volume profile $V0 start
+## Mount FUSE with caching disabled (read-write)
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+
+TEST dd if=/dev/zero of=$M0/1 count=1 bs=128k oflag=sync
+
+#Write should take at least a second
+write_max_latency=$($CLI volume profile $V0 info | grep WRITE | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+
+#Create should not take a second
+create_max_latency=$($CLI volume profile $V0 info | grep CREATE | awk 'BEGIN {max = 0} {if ($6 > max) max=$6;} END {print max}' | cut -d. -f 1 | egrep "[0-9]{7,}")
+
+TEST [ ! -z $write_max_latency ];
+TEST [ -z $create_max_latency ];
+
+# Not providing a particular fop will make it test everything
+TEST $CLI volume reset $V0 delay-gen.enable
+TEST $CLI volume set $V0 delay-gen.delay-duration 100
+
+cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+build_tester ./glfsxmp.c -lgfapi
+./glfsxmp $V0 $H0 >/dev/null
+cleanup_tester ./glfsxmp
+rm ./glfsxmp.c
+
+$(dirname $0)/../basic/rpc-coverage.sh $M0 >/dev/null
+
+cleanup;
+#G_TESTDEF_TEST_STATUS_NETBSD7=1501397
diff --git a/tests/features/dh1024.pem b/tests/features/dh1024.pem
new file mode 100644
index 00000000000..fe514bd4ee5
--- /dev/null
+++ b/tests/features/dh1024.pem
@@ -0,0 +1,5 @@
+-----BEGIN DH PARAMETERS-----
+MIGHAoGBAL2k+efZ6g50PpL41G96IaRw2OTH921yhHMNSXBE/K+R6oTkJFcNJs1N
+q+a1Ko2xCBDa5MgvudqWep6PvE06rzEaJPW8ITdu8j3Eo9T1rorJ3CctpE/CaRl2
+7v4DNe+Mho6q1MPlG5PfXEZWgbT7tjn/Y6lwD/B2CoMzAx+4DXgbAgEC
+-----END DH PARAMETERS-----
diff --git a/tests/features/fdl-overflow.t b/tests/features/fdl-overflow.t
new file mode 100644
index 00000000000..34b941d2f2a
--- /dev/null
+++ b/tests/features/fdl-overflow.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fdl.rc
+
+_check_sizes () {
+ local n=0
+ local sz
+ local total_sz=0
+
+ # We don't care about the sizes of the meta files. That would be
+ # embedding too much of the implementation into the test.
+ n=$(ls ${log_base}/${log_id}-meta-*.jnl | wc -l)
+ [ $n = 2 ] || return 1
+
+ # We *do* care about the sizes of the data files, which should exactly
+ # reflect the amount of data written via dd.
+ n=0
+ while read sz name; do
+ G_LOG "found journal ${name} size ${sz}MB"
+ n=$((n+1))
+ total_sz=$((total_sz+sz))
+ done < <(du -sm ${log_base}/${log_id}-data-*.jnl)
+ [ $n = 2 ] || return 1
+ # On our CentOS and NetBSD regression-test systems, but not on my Fedora
+ # development system, each file ends up being slightly larger than its
+ # data size because of metadata, and 'du' rounds that up to a full extra
+ # megabyte. We'll allow either result, because what we're really
+ # looking for is a complete failure to roll over from one file to
+ # another at the appropriate size.
+ [ $total_sz = 20 -o $total_sz = $((n+20)) ] || return 1
+
+ return 0
+}
+
+check_sizes () {
+ set -x
+ _check_sizes
+ ret=$?
+ set +x
+ return ret
+}
+
+if [ x"$OSTYPE" = x"NetBSD" ]; then
+ CREAT_OFLAG="creat,"
+else
+ CREAT_OFLAG=""
+fi
+
+TEST rm -f ${log_base}/${log_id}-*.log
+TEST glusterd
+TEST pidof glusterd
+
+# Get a simple volume set up and mounted with FDL active.
+TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0
+TEST $CLI volume set $V0 changelog.changelog off
+TEST $CLI volume set $V0 features.fdl on
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+
+# Generate some I/O and unmount/stop so we can see log sizes.
+TEST dd if=/dev/zero of=$M0/twentyMB bs=1048576 count=20 \
+ oflag=${CREAT_OFLAG}sync
+TEST umount $M0
+TEST $CLI volume stop $V0
+
+TEST _check_sizes
+
+cleanup
+#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=1385758
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1385758
diff --git a/tests/features/fdl.t b/tests/features/fdl.t
new file mode 100644
index 00000000000..5a3c13fc850
--- /dev/null
+++ b/tests/features/fdl.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fdl.rc
+
+if [ x"$OSTYPE" = x"NetBSD" ]; then
+ CREAT_OFLAG="creat,"
+else
+ CREAT_OFLAG=""
+fi
+
+TEST rm -f $FDL_META_FILE $FDL_DATA_FILE
+TEST glusterd
+TEST pidof glusterd
+
+# Get a simple volume set up and mounted with FDL active.
+TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0
+TEST $CLI volume set $V0 changelog.changelog off
+TEST $CLI volume set $V0 features.fdl on
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+
+# Generate some I/O and unmount.
+TEST mkdir -p $M0/abc/def
+TEST dd if=/dev/zero of=$M0/abc/def/ghi bs=128 count=2 \
+ oflag=${CREAT_OFLAG}sync
+TEST chmod 314 $M0/abc/def/ghi
+TEST rm -rf $M0/abc
+TEST umount $M0
+
+# Check that gf_logdump works, and shows the ops we just issued. There will be
+# more SETATTR ops than the one corresponding to our chmod, because some are
+# issued internally. We have to guess a bit about where the log will be.
+TEST check_logfile GF_FOP_MKDIR 2
+TEST check_logfile GF_FOP_CREATE 1
+TEST check_logfile GF_FOP_WRITE 2
+TEST check_logfile GF_FOP_SETATTR 1
+TEST check_logfile GF_FOP_UNLINK 1
+TEST check_logfile GF_FOP_RMDIR 2
+
+cleanup
+#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=1385758
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1385758
diff --git a/tests/features/flock_interrupt.t b/tests/features/flock_interrupt.t
new file mode 100644
index 00000000000..b8717e30dfb
--- /dev/null
+++ b/tests/features/flock_interrupt.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}0;
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/testfile;
+
+echo > got_lock
+flock $M0/testfile sleep 6 & { sleep 0.3; flock -w 2 $M0/testfile true; echo ok > got_lock; } &
+
+EXPECT_WITHIN 4 ok cat got_lock;
+
+## Finish up
+rm -f got_lock;
+cleanup;
diff --git a/tests/features/fuse-lru-limit.t b/tests/features/fuse-lru-limit.t
new file mode 100644
index 00000000000..dd6be2d5397
--- /dev/null
+++ b/tests/features/fuse-lru-limit.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST glusterfs -s $H0 --volfile-id $V0 $M0
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "2" online_brick_count
+
+EXPECT "1" get_mount_active_size_value $V0 $M0
+EXPECT "0" get_mount_lru_size_value $V0 $M0
+
+mkdir ${M0}/dir-{1..9}
+for i in {1..9}; do
+ for j in {1..1000}; do
+ echo "Test file" > ${M0}/dir-$i/file-$j;
+ done;
+done
+lc=$(get_mount_lru_size_value $V0 ${M0})
+# ideally it should be 9000+
+TEST [ $lc -ge 9000 ]
+
+TEST umount $M0
+
+TEST glusterfs -s $H0 --volfile-id $V0 --lru-limit 1000 $M0
+
+TEST find $M0
+lc=$(get_mount_lru_size_value $V0 ${M0})
+# ideally it should be <1000
+# Not sure if there are any possibilities of buffer need.
+TEST [ $lc -le 1000 ]
+
+TEST rm -rf $M0/*
+
+EXPECT "1" get_mount_active_size_value $V0 $M0
+EXPECT "0" get_mount_lru_size_value $V0 $M0
+
+cleanup
diff --git a/tests/features/glfs-lease-recall.c b/tests/features/glfs-lease-recall.c
new file mode 100644
index 00000000000..9a60f9beec1
--- /dev/null
+++ b/tests/features/glfs-lease-recall.c
@@ -0,0 +1,372 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+/* Few rules:
+ * 1. A client may have multiple lease keys, but a lease key cannot be shared by
+ * multiple clients.
+ * 2. Lease key can be set before open, or in glfs_lease request. A lease key
+ * set like this is valid for the lifetime of the fd, i.e. a fd cannot have
+ * multiple lease key. But a lease key can be shared across multiple fds.
+ */
+glfs_t *client1 = NULL, *client2 = NULL;
+glfs_fd_t *fd1 = NULL;
+FILE *log_file = NULL;
+char lid1[GLFS_LEASE_ID_SIZE] = "lid1-clnt1",
+ lid2[GLFS_LEASE_ID_SIZE] = "lid2-clnt2";
+char lid3[GLFS_LEASE_ID_SIZE] = "lid3-clnt2", lid4[GLFS_LEASE_ID_SIZE] = {
+ 0,
+};
+char *volname = NULL, *glfs_log_file = NULL;
+int upcall_recv = 0;
+
+#define MAX_CLIENTS 4
+#define MAX_FDS 4
+#define TEST_FILE "/test/lease"
+#define SHUD_PASS 0
+#define SHUD_FAIL -1
+#define NONE 0
+
+static void
+recall_cbk(struct glfs_lease lease, void *data);
+
+static int
+set_read_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_SET_LEASE;
+ lease.lease_type = GLFS_RD_LEASE;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+ ret = glfs_lease(fd, &lease, &recall_cbk, fd);
+ if (ret < 0) {
+ fprintf(log_file, "\n RD_LEASE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ fprintf(log_file, "\n Took RD_LEASE");
+ return ret;
+}
+
+static int
+set_write_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_SET_LEASE;
+ lease.lease_type = GLFS_RW_LEASE;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+ ret = glfs_lease(fd, &lease, &recall_cbk, NULL);
+ if (ret < 0) {
+ fprintf(log_file, "\n RW_LEASE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ fprintf(log_file, "\n Took RW_LEASE");
+ return ret;
+}
+
+static int
+get_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_GET_LEASE;
+ lease.lease_type = -1;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+ ret = glfs_lease(fd, &lease, &recall_cbk, NULL);
+ if (ret < 0) {
+ fprintf(log_file, "\n GET_LEASE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ if (lease.lease_type == GLFS_RD_LEASE)
+ fprintf(log_file, "\n Esisting Lease: RD_LEASE");
+ else if (lease.lease_type == GLFS_RW_LEASE)
+ fprintf(log_file, "\n Esisting Lease: RW_LEASE");
+ else if (lease.lease_type == 3)
+ fprintf(log_file, "\n Esisting Lease: RD_LEASE|RW_LEASE");
+ else if (lease.lease_type == 0)
+ fprintf(log_file, "\n Esisting Lease: NONE");
+ else
+ fprintf(log_file, "\n Existing lease type:%d", lease.lease_type);
+ return lease.lease_type;
+}
+
+static int
+unlk_write_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_UNLK_LEASE;
+ lease.lease_type = GLFS_RW_LEASE;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+ ret = glfs_lease(fd, &lease, &recall_cbk, NULL);
+ if (ret < 0) {
+ fprintf(log_file, "\n Unlock RW_LESAE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ fprintf(log_file, "\n Unlocked RW_LEASE");
+ return ret;
+}
+
+static int
+unlk_read_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_UNLK_LEASE;
+ lease.lease_type = GLFS_RD_LEASE;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+
+ ret = glfs_lease(fd, &lease, &recall_cbk, NULL);
+ if (ret < 0) {
+ fprintf(log_file, "\n Unlock RD_LEASE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ fprintf(log_file, "\n Unlocked RD_LEASE");
+ return ret;
+}
+
+void
+up_async_lease_recall(struct glfs_upcall *up_arg, void *data)
+{
+ struct glfs_upcall_lease *in_arg = NULL;
+ enum glfs_upcall_reason reason = 0;
+ struct glfs_object *object = NULL;
+ uint64_t flags = 0;
+ uint64_t expire = 0;
+
+ if (!up_arg)
+ return;
+
+ reason = glfs_upcall_get_reason(up_arg);
+
+ /* Expect 'GLFS_UPCALL_RECALL_LEASE' upcall event. */
+
+ if (reason == GLFS_UPCALL_RECALL_LEASE) {
+ in_arg = glfs_upcall_get_event(up_arg);
+
+ object = glfs_upcall_lease_get_object(in_arg);
+
+ fprintf(log_file,
+ " upcall event type - %d,"
+ " object(%p)\n",
+ reason, object);
+ upcall_recv = 1;
+ }
+
+ glfs_free(up_arg);
+ return;
+}
+
+glfs_t *
+setup_new_client(char *volname, char *log_fileile)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+ int up_events = GLFS_EVENT_ANY;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(log_file, "\nglfs_new: returned NULL (%s)\n", strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007);
+ if (ret < 0) {
+ fprintf(log_file, "\nglfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_fileile, 7);
+ if (ret < 0) {
+ fprintf(log_file, "\nglfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(log_file, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ /* Register Upcalls */
+ ret = glfs_upcall_register(fs, up_events, up_async_lease_recall, NULL);
+
+ /* Check if the return mask contains the event */
+ if ((ret < 0) || !(ret & GLFS_EVENT_RECALL_LEASE)) {
+ fprintf(stderr,
+ "glfs_upcall_register return doesn't contain"
+ " upcall event - GLFS_EVENT_RECALL_LEASE\n");
+ goto error;
+ }
+
+ return fs;
+error:
+ if (fs)
+ glfs_fini(fs);
+ return NULL;
+}
+
+#define OPEN(client, flags, fd, lease_id) \
+ do { \
+ int ret_val = 0; \
+ ret_val = glfs_setfsleaseid(lease_id); \
+ if (ret_val) { \
+ fprintf(log_file, \
+ "\nglfs_setfsleaseid failed with ret: %d (%s)\n", ret, \
+ strerror(errno)); \
+ return -1; \
+ } \
+ fd = glfs_open(client, TEST_FILE, flags); \
+ if (fd == NULL) { \
+ fprintf(log_file, "\nglfs_open failed with ret: %d (%s)\n", ret, \
+ strerror(errno)); \
+ return -1; \
+ } \
+ } while (0)
+
+#define VERIFY_RESULT(test_case, ret, value) \
+ do { \
+ if (ret != value) { \
+ fprintf(log_file, \
+ "\n Testcase %d failed, ret = %d, value=%d\n", \
+ test_case, ret, value); \
+ goto error; /*test unsuccessful*/ \
+ } \
+ fprintf(log_file, "\n Testcase %d Succeeded\n", test_case); \
+ } while (0)
+
+static void
+recall_cbk(struct glfs_lease lease, void *data)
+{
+ int ret = -1;
+ char ld[GLFS_LEASE_ID_SIZE] = "";
+
+ fprintf(log_file, "\nRECALL received on lease_id:(%s)", lease.lease_id);
+ memcpy(ld, lease.lease_id, GLFS_LEASE_ID_SIZE);
+ ret = unlk_write_lease((glfs_fd_t *)data, ld);
+ VERIFY_RESULT(500, ret, SHUD_PASS);
+error:
+ return;
+}
+
+static int
+testcase_recall_conflict_lease()
+{
+ struct glfs_object *obj = NULL;
+ glfs_fd_t *fd1 = NULL;
+ int ret = 0;
+ struct glfs_lease lease = {
+ 0,
+ };
+
+ fprintf(log_file,
+ "\n Basic test case for conflicting lease causing recall");
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_SET_LEASE;
+ lease.lease_type = GLFS_RD_LEASE;
+ memcpy(&lease.lease_id, lid2, GLFS_LEASE_ID_SIZE);
+ /* Open fd on client 1 in RD mode */
+ OPEN(client1, O_RDWR, fd1, lid1);
+ ret = set_write_lease(fd1, lid1);
+ VERIFY_RESULT(1, ret, SHUD_PASS);
+
+ /* reset counter */
+ upcall_recv = 0;
+
+ obj = glfs_h_lookupat(client2, NULL, TEST_FILE, NULL, 0);
+ ret = glfs_h_lease(client2, obj, &lease);
+ VERIFY_RESULT(2, ret, SHUD_FAIL);
+
+ sleep(3);
+ /* should recv upcall */
+ VERIFY_RESULT(6, !upcall_recv, SHUD_PASS);
+
+ ret = unlk_write_lease(fd1, lid1);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = glfs_h_close(obj);
+ VERIFY_RESULT(3, ret, SHUD_PASS);
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ int i = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd1 = NULL;
+ char *topdir = "topdir", *filename = "file1";
+ char *buf = NULL;
+ int x = 0;
+ ssize_t xattr_size = -1;
+
+ if (argc != 4) {
+ fprintf(stderr,
+ "Expect following args %s <Vol> <glfs client log file> "
+ "<testcase log file>\n",
+ argv[0]);
+ return -1;
+ }
+
+ log_file = fopen(argv[3], "w");
+ if (!log_file)
+ goto error;
+
+ volname = argv[1];
+ glfs_log_file = argv[2];
+
+ /* Setup 2 clients */
+ client1 = setup_new_client(volname, glfs_log_file);
+ client2 = setup_new_client(volname, glfs_log_file);
+
+ ret = testcase_recall_conflict_lease();
+ VERIFY_RESULT(101, ret, SHUD_PASS);
+
+ glfs_fini(client1);
+ glfs_fini(client2);
+
+ fclose(log_file);
+ return 0;
+error:
+ return -1;
+}
diff --git a/tests/features/glfs-lease.c b/tests/features/glfs-lease.c
new file mode 100644
index 00000000000..e82cd875b38
--- /dev/null
+++ b/tests/features/glfs-lease.c
@@ -0,0 +1,717 @@
+#include <glusterfs/api/glfs.h>
+#include <glusterfs/api/glfs-handles.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+
+/* Few rules:
+ * 1. A client may have multiple lease keys, but a lease key cannot be shared by
+ * multiple clients.
+ * 2. Lease key can be set before open, or in glfs_lease request. A lease key
+ * set like this is valid for the lifetime of the fd, i.e. a fd cannot have
+ * multiple lease key. But a lease key can be shared across multiple fds.
+ */
+glfs_t *client1 = NULL, *client2 = NULL, *client3 = NULL, *client4 = NULL;
+glfs_fd_t *fd1 = NULL, *fd2 = NULL, *fd3 = NULL, *fd4 = NULL;
+FILE *log_file = NULL;
+char lid1[GLFS_LEASE_ID_SIZE] = "lid1-clnt1",
+ lid2[GLFS_LEASE_ID_SIZE] = "lid2-clnt2";
+char lid3[GLFS_LEASE_ID_SIZE] = "lid3-clnt2", lid4[GLFS_LEASE_ID_SIZE] = {
+ 0,
+};
+char *volname = NULL, *glfs_log_file = NULL;
+
+#define MAX_CLIENTS 4
+#define MAX_FDS 4
+#define TEST_FILE "/test/lease"
+#define SHUD_PASS 0
+#define SHUD_FAIL -1
+#define NONE 0
+
+static void
+recall_cbk(struct glfs_lease lease, void *data);
+
+static int
+set_read_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_SET_LEASE;
+ lease.lease_type = GLFS_RD_LEASE;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+ ret = glfs_lease(fd, &lease, &recall_cbk, fd);
+ if (ret < 0) {
+ fprintf(log_file, "\n RD_LEASE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ fprintf(log_file, "\n Took RD_LEASE");
+ return ret;
+}
+
+static int
+set_write_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_SET_LEASE;
+ lease.lease_type = GLFS_RW_LEASE;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+ ret = glfs_lease(fd, &lease, &recall_cbk, NULL);
+ if (ret < 0) {
+ fprintf(log_file, "\n RW_LEASE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ fprintf(log_file, "\n Took RW_LEASE");
+ return ret;
+}
+
+static int
+get_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_GET_LEASE;
+ lease.lease_type = -1;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+ ret = glfs_lease(fd, &lease, &recall_cbk, NULL);
+ if (ret < 0) {
+ fprintf(log_file, "\n GET_LEASE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ if (lease.lease_type == GLFS_RD_LEASE)
+ fprintf(log_file, "\n Esisting Lease: RD_LEASE");
+ else if (lease.lease_type == GLFS_RW_LEASE)
+ fprintf(log_file, "\n Esisting Lease: RW_LEASE");
+ else if (lease.lease_type == 3)
+ fprintf(log_file, "\n Esisting Lease: RD_LEASE|RW_LEASE");
+ else if (lease.lease_type == 0)
+ fprintf(log_file, "\n Esisting Lease: NONE");
+ else
+ fprintf(log_file, "\n Existing lease type:%d", lease.lease_type);
+ return lease.lease_type;
+}
+
+static int
+unlk_write_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_UNLK_LEASE;
+ lease.lease_type = GLFS_RW_LEASE;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+ ret = glfs_lease(fd, &lease, &recall_cbk, NULL);
+ if (ret < 0) {
+ fprintf(log_file, "\n Unlock RW_LESAE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ fprintf(log_file, "\n Unlocked RW_LEASE");
+ return ret;
+}
+
+static int
+unlk_read_lease(glfs_fd_t *fd, char ld[])
+{
+ struct glfs_lease lease = {
+ 0,
+ };
+ int ret = 0;
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_UNLK_LEASE;
+ lease.lease_type = GLFS_RD_LEASE;
+ memcpy(&lease.lease_id, ld, GLFS_LEASE_ID_SIZE);
+
+ ret = glfs_lease(fd, &lease, &recall_cbk, NULL);
+ if (ret < 0) {
+ fprintf(log_file, "\n Unlock RD_LEASE failed with ret: %d (%s)", ret,
+ strerror(errno));
+ return -1;
+ }
+ fprintf(log_file, "\n Unlocked RD_LEASE");
+ return ret;
+}
+
+glfs_t *
+setup_new_client(char *volname, char *log_fileile)
+{
+ int ret = 0;
+ glfs_t *fs = NULL;
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ fprintf(log_file, "\nglfs_new: returned NULL (%s)\n", strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", "localhost", 24007);
+ if (ret < 0) {
+ fprintf(log_file, "\nglfs_set_volfile_server failed ret:%d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_set_logging(fs, log_fileile, 7);
+ if (ret < 0) {
+ fprintf(log_file, "\nglfs_set_logging failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+
+ ret = glfs_init(fs);
+ if (ret < 0) {
+ fprintf(log_file, "\nglfs_init failed with ret: %d (%s)\n", ret,
+ strerror(errno));
+ goto error;
+ }
+ return fs;
+error:
+ return NULL;
+}
+
+#define OPEN(client, flags, fd, lease_id) \
+ do { \
+ int ret_val = 0; \
+ ret_val = glfs_setfsleaseid(lease_id); \
+ if (ret_val) { \
+ fprintf(log_file, \
+ "\nglfs_setfsleaseid failed with ret: %d (%s)\n", ret, \
+ strerror(errno)); \
+ return -1; \
+ } \
+ fd = glfs_open(client, TEST_FILE, flags); \
+ if (fd == NULL) { \
+ fprintf(log_file, "\nglfs_open failed with ret: %d (%s)\n", ret, \
+ strerror(errno)); \
+ return -1; \
+ } \
+ } while (0)
+
+#define VERIFY_RESULT(test_case, ret, value) \
+ do { \
+ if (ret != value) { \
+ fprintf(log_file, \
+ "\n Testcase %d failed, ret = %d, value=%d\n", \
+ test_case, ret, value); \
+ goto error; /*test unsuccessful*/ \
+ } \
+ fprintf(log_file, "\n Testcase %d Succeeded\n", test_case); \
+ } while (0)
+
+static void
+recall_cbk(struct glfs_lease lease, void *data)
+{
+ int ret = -1;
+ char ld[GLFS_LEASE_ID_SIZE] = "";
+
+ fprintf(log_file, "\nRECALL received on lease_id:(%s)", lease.lease_id);
+ memcpy(ld, lease.lease_id, GLFS_LEASE_ID_SIZE);
+ ret = unlk_write_lease((glfs_fd_t *)data, ld);
+ VERIFY_RESULT(500, ret, SHUD_PASS);
+error:
+ return;
+}
+
+static int
+testcase1_rd_lease()
+{
+ glfs_fd_t *fd1 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for Read lease:");
+ /* Open fd on client 1 in RD mode */
+ OPEN(client1, O_RDONLY, fd1, lid1);
+ ret = set_write_lease(fd1, lid1);
+ VERIFY_RESULT(1, ret, SHUD_FAIL);
+
+ ret = set_read_lease(fd1, lid1);
+ VERIFY_RESULT(2, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(3, ret, GLFS_RD_LEASE);
+
+ ret = unlk_write_lease(fd1, lid1);
+ VERIFY_RESULT(4, ret, SHUD_FAIL);
+
+ ret = unlk_read_lease(fd1, lid1);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(6, ret, NONE);
+
+ ret = unlk_read_lease(fd1, lid1);
+ VERIFY_RESULT(7, ret, SHUD_PASS);
+
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(8, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase2_wr_lease()
+{
+ glfs_fd_t *fd1 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for Write lease:");
+ /* Open fd on client 1 in WRonly mode */
+ OPEN(client1, O_WRONLY, fd1, lid1);
+ ret = set_read_lease(fd1, lid1);
+ VERIFY_RESULT(1, ret, SHUD_FAIL);
+
+ ret = unlk_write_lease(fd1, lid1);
+ VERIFY_RESULT(2, ret, SHUD_PASS);
+
+ ret = set_write_lease(fd1, lid1);
+ VERIFY_RESULT(3, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(4, ret, GLFS_RW_LEASE);
+
+ ret = unlk_write_lease(fd1, lid1);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(6, ret, NONE);
+
+ ret = unlk_read_lease(fd1, lid1);
+ VERIFY_RESULT(7, ret, SHUD_FAIL);
+
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(8, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase3_rd_wr_lease()
+{
+ glfs_fd_t *fd1 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for Read Write lease:");
+ /* Open fd on client 1 in WRonly mode */
+ OPEN(client1, O_RDWR, fd1, lid1);
+ ret = set_read_lease(fd1, lid1);
+ VERIFY_RESULT(1, ret, SHUD_PASS);
+
+ ret = set_write_lease(fd1, lid1);
+ VERIFY_RESULT(2, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(3, ret, (GLFS_RW_LEASE | GLFS_RD_LEASE));
+
+ ret = unlk_write_lease(fd1, lid1);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(5, ret, GLFS_RD_LEASE);
+
+ ret = unlk_read_lease(fd1, lid1);
+ VERIFY_RESULT(6, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(7, ret, NONE);
+
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(8, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase4_rd_lease_multi_clnt()
+{
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for multi client Read lease:");
+
+ /* Open fd on client 1 in RD mode */
+ OPEN(client1, O_RDONLY, fd1, lid1);
+
+ /* Open fd on client 2 in RW mode */
+ OPEN(client2, O_RDONLY, fd2, lid2);
+
+ ret = set_read_lease(fd1, lid1);
+ VERIFY_RESULT(1, ret, SHUD_PASS);
+
+ ret = set_read_lease(fd2, lid2);
+ VERIFY_RESULT(2, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(3, ret, GLFS_RD_LEASE);
+
+ ret = unlk_read_lease(fd1, lid1);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ ret = unlk_read_lease(fd2, lid2);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(6, ret, NONE);
+
+ ret = get_lease(fd2, lid2);
+ VERIFY_RESULT(7, ret, NONE);
+
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(8, ret, SHUD_PASS);
+
+ ret = glfs_close(fd2);
+ VERIFY_RESULT(9, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase5_openfd_multi_lid()
+{
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ glfs_fd_t *fd3 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for multi lid openfd check:");
+
+ /* Open fd on client 1 in RD mode */
+ OPEN(client1, O_RDONLY, fd1, lid1);
+
+ /* Open fd on client 2 in RW mode */
+ OPEN(client2, O_RDWR, fd2, lid2);
+ OPEN(client2, O_RDWR, fd3, lid2);
+
+ ret = set_read_lease(fd1, lid1);
+ VERIFY_RESULT(
+ 1, ret,
+ SHUD_FAIL); /*As there are other openfds in WR mode from diff lid*/
+
+ ret = set_write_lease(fd2, lid2);
+ VERIFY_RESULT(
+ 2, ret, SHUD_FAIL); /*As thers is another fd in RD mode from diff lid */
+
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(3, ret, SHUD_PASS);
+
+ ret = set_write_lease(fd2, lid2);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ ret = unlk_write_lease(fd2, lid2);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = glfs_close(fd2);
+ VERIFY_RESULT(6, ret, SHUD_PASS);
+
+ ret = glfs_close(fd3);
+ VERIFY_RESULT(7, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase6_openfd_same_lid()
+{
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ glfs_fd_t *fd3 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for same lid openfd check:");
+
+ /* Open fd on client 2 in RW mode */
+ OPEN(client1, O_RDWR, fd1, lid2);
+ OPEN(client1, O_RDWR, fd2, lid2);
+
+ ret = set_write_lease(fd1, lid2);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ ret = set_write_lease(fd2, lid2);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ ret = set_read_lease(fd2, lid2);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ ret = unlk_write_lease(fd1, lid2);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = unlk_read_lease(fd2, lid2);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = unlk_write_lease(fd2, lid2);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(6, ret, SHUD_PASS);
+
+ ret = glfs_close(fd2);
+ VERIFY_RESULT(7, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase7_rd_multi_lid()
+{
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for multi lease id Read lease:");
+
+ /* Open fd on client 1 in RD mode */
+ OPEN(client2, O_RDONLY, fd1, lid2);
+
+ /* Open fd on client 2 in RD mode */
+ OPEN(client2, O_RDONLY, fd2, lid3);
+
+ ret = set_read_lease(fd1, lid2);
+ VERIFY_RESULT(1, ret, SHUD_PASS);
+
+ ret = set_read_lease(fd2, lid3);
+ VERIFY_RESULT(2, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid2);
+ VERIFY_RESULT(3, ret, GLFS_RD_LEASE);
+
+ ret = unlk_read_lease(fd1, lid2);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ ret = unlk_read_lease(fd2, lid3);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid2);
+ VERIFY_RESULT(6, ret, NONE);
+
+ ret = get_lease(fd2, lid3);
+ VERIFY_RESULT(7, ret, NONE);
+
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(8, ret, SHUD_PASS);
+
+ ret = glfs_close(fd2);
+ VERIFY_RESULT(9, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase8_client_disconnect()
+{
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for client disconnect cleanup");
+
+ /* Open fd on client 1 in RD mode */
+ OPEN(client1, O_RDWR, fd1, lid1);
+
+ ret = set_read_lease(fd1, lid1);
+ VERIFY_RESULT(1, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(2, ret, GLFS_RD_LEASE);
+
+ ret = set_write_lease(fd1, lid1);
+ VERIFY_RESULT(3, ret, SHUD_PASS);
+
+ ret = get_lease(fd1, lid1);
+ VERIFY_RESULT(4, ret, (GLFS_RD_LEASE | GLFS_RW_LEASE));
+
+ ret = glfs_fini(client1);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ /* Open fd on client 2 in RD mode */
+ OPEN(client2, O_RDONLY, fd2, lid3);
+
+ ret = get_lease(fd2, lid3);
+ VERIFY_RESULT(6, ret, NONE);
+
+ ret = glfs_close(fd2);
+ VERIFY_RESULT(7, ret, SHUD_PASS);
+
+ client1 = setup_new_client(volname, glfs_log_file);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase9_recall_conflict_lease()
+{
+ struct glfs_object *obj = NULL;
+ glfs_fd_t *fd1 = NULL;
+ int ret = 0;
+ struct glfs_lease lease = {
+ 0,
+ };
+
+ fprintf(log_file,
+ "\n Basic test case for conflicting lease causing recall");
+
+ memset(&lease, 0, sizeof(lease));
+ lease.cmd = GLFS_SET_LEASE;
+ lease.lease_type = GLFS_RD_LEASE;
+ memcpy(&lease.lease_id, lid2, GLFS_LEASE_ID_SIZE);
+ /* Open fd on client 1 in RD mode */
+ OPEN(client1, O_RDWR, fd1, lid1);
+ ret = set_write_lease(fd1, lid1);
+ VERIFY_RESULT(1, ret, SHUD_PASS);
+
+ obj = glfs_h_lookupat(client2, NULL, TEST_FILE, NULL, 0);
+ ret = glfs_h_lease(client2, obj, &lease);
+ VERIFY_RESULT(2, ret, SHUD_FAIL);
+
+ ret = unlk_write_lease(fd1, lid1);
+ VERIFY_RESULT(5, ret, SHUD_PASS);
+
+ sleep(3);
+ ret = glfs_h_close(obj);
+ VERIFY_RESULT(3, ret, SHUD_PASS);
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(4, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+static int
+testcase10_recall_open_conflict()
+{
+ glfs_fd_t *fd1 = NULL;
+ glfs_fd_t *fd2 = NULL;
+ int ret = 0;
+
+ fprintf(log_file, "\n Basic test case for conflicting open causing recall");
+
+ /* Open fd on client 1 in RW mode */
+ OPEN(client1, O_RDWR, fd1, lid1);
+
+ ret = set_write_lease(fd1, lid1);
+ VERIFY_RESULT(1, ret, SHUD_PASS);
+
+ /* Open fd on client 1 in RW mode */
+ OPEN(client2, O_RDWR, fd2, lid2);
+
+ /* TODO: Check for recall cbk functionality */
+ ret = glfs_close(fd1);
+ VERIFY_RESULT(2, ret, SHUD_PASS);
+
+ ret = glfs_close(fd2);
+ VERIFY_RESULT(3, ret, SHUD_PASS);
+
+ return 0;
+error:
+ return -1;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ int i = 0;
+ glfs_fd_t *fd = NULL;
+ glfs_fd_t *fd1 = NULL;
+ char *topdir = "topdir", *filename = "file1";
+ char *buf = NULL;
+ int x = 0;
+ ssize_t xattr_size = -1;
+
+ if (argc != 4) {
+ fprintf(stderr,
+ "Expect following args %s <Vol> <glfs client log file> "
+ "<testcase log file>\n",
+ argv[0]);
+ return -1;
+ }
+
+ log_file = fopen(argv[3], "w");
+ if (!log_file)
+ goto error;
+
+ volname = argv[1];
+ glfs_log_file = argv[2];
+
+ /* Setup 3 clients */
+ client1 = setup_new_client(volname, glfs_log_file);
+ client2 = setup_new_client(volname, glfs_log_file);
+ client3 = setup_new_client(volname, glfs_log_file);
+
+ ret = testcase1_rd_lease();
+ VERIFY_RESULT(101, ret, SHUD_PASS);
+
+ ret = testcase2_wr_lease();
+ VERIFY_RESULT(102, ret, SHUD_PASS);
+
+ ret = testcase3_rd_wr_lease();
+ VERIFY_RESULT(103, ret, SHUD_PASS);
+
+ ret = testcase4_rd_lease_multi_clnt();
+ VERIFY_RESULT(104, ret, SHUD_PASS);
+
+ ret = testcase5_openfd_multi_lid();
+ VERIFY_RESULT(105, ret, SHUD_PASS);
+
+ ret = testcase6_openfd_same_lid();
+ VERIFY_RESULT(106, ret, SHUD_PASS);
+
+ ret = testcase7_rd_multi_lid();
+ VERIFY_RESULT(107, ret, SHUD_PASS);
+
+ ret = testcase8_client_disconnect();
+ VERIFY_RESULT(108, ret, SHUD_PASS);
+
+ ret = testcase9_recall_conflict_lease();
+ VERIFY_RESULT(109, ret, SHUD_PASS);
+
+ ret = testcase10_recall_open_conflict();
+ VERIFY_RESULT(110, ret, SHUD_PASS);
+
+ glfs_fini(client1);
+ glfs_fini(client2);
+ glfs_fini(client3);
+
+ fclose(log_file);
+ return 0;
+error:
+ return -1;
+}
diff --git a/tests/features/glfs-lease.t b/tests/features/glfs-lease.t
new file mode 100755
index 00000000000..6ef6da05043
--- /dev/null
+++ b/tests/features/glfs-lease.t
@@ -0,0 +1,31 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0};
+TEST $CLI volume set $V0 leases on
+TEST $CLI volume set $V0 open-behind off
+TEST $CLI volume set $V0 write-behind on
+TEST $CLI volume start $V0
+
+logdir=`gluster --print-logdir`
+TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST mkdir $M0/test
+TEST touch $M0/test/lease
+
+build_tester $(dirname $0)/glfs-lease.c -lgfapi
+build_tester $(dirname $0)/glfs-lease-recall.c -lgfapi
+TEST $(dirname $0)/glfs-lease $V0 $logdir/glfs-lease.log $logdir/lease-test.log
+TEST $(dirname $0)/glfs-lease-recall $V0 $logdir/glfs-lease-recall.log $logdir/lease-test-recall.log
+
+TEST $CLI volume set $V0 leases off
+
+cleanup_tester $(dirname $0)/glfs-lease
+cleanup;
diff --git a/tests/features/interrupt.t b/tests/features/interrupt.t
new file mode 100644
index 00000000000..067eb1b7486
--- /dev/null
+++ b/tests/features/interrupt.t
@@ -0,0 +1,71 @@
+#!/bin/bash
+
+##Copy this file to tests/bugs before running run.sh (cp extras/test/bug-920583.t tests/bugs/)
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+TESTS_EXPECTED_IN_LOOP=4
+
+cleanup;
+logdir=`gluster --print-logdir`
+
+TEST build_tester $(dirname $0)/open_and_sleep.c
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6,7,8,9};
+
+## Verify volume is is created
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+function log-file-name()
+{
+ logfilename=$M0".log"
+ echo ${logfilename:1} | tr / -
+}
+
+log_file=$logdir"/"`log-file-name`
+
+function test_interrupt {
+ local handlebool="$1"
+ local logpattern="$2"
+
+ TEST $GFS --volfile-id=$V0 --volfile-server=$H0 --fuse-flush-handle-interrupt=$handlebool --log-level=DEBUG $M0
+
+ # If the test helper fails (which is considered a setup error, not failure of the test
+ # case itself), kill will be invoked without argument, and that will be the actual
+ # error which is caught.
+ TEST "./$(dirname $0)/open_and_sleep $M0/testfile-$handlebool | { sleep 0.1; xargs -n1 kill -INT; }"
+
+ TEST "grep -E '$logpattern' $log_file"
+ # Basic sanity check, making sure filesystem has not crashed.
+ TEST test -f $M0/testfile-$handlebool
+}
+
+# Theoretically FLUSH might finish before INTERRUPT is handled,
+# in which case we'd get the "no handler found" message instead of
+# "interrupt handler triggered" (but it's unlikely).
+# If that's observed, the pattern can be changed to
+# 'FLUSH.*interrupt handler triggered|[I]NTERRUPT.*no handler found'
+# to fix the test.
+test_interrupt yes '[F]LUSH.*interrupt handler triggered'
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+test_interrupt no '[I]NTERRUPT.*no handler found'
+
+## Finish up
+TEST $CLI volume stop $V0;
+EXPECT 'Stopped' volinfo_field $V0 'Status';
+
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+cleanup_tester $(dirname $0)/open_and_sleep;
+cleanup;
diff --git a/tests/features/ipc.t b/tests/features/ipc.t
new file mode 100755
index 00000000000..5c92287eaa1
--- /dev/null
+++ b/tests/features/ipc.t
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=GH269
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=GH269
+
+cleanup;
+mkdir -p $B0/1
+mkdir -p $M0
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/1
+TEST $CLI volume start $V0
+
+# Find OS-dependent EOPNOTSUPP value from system headers
+EOPNOTSUPP=$( echo '#include <errno.h>\\EOPNOTSUPP\\' | tr '\\' '\n' | \
+ cc -E -c - | tail -1 )
+
+# liglusterfs embbeds its own UUID implementation. The function name
+# may be the same as in built(in implementation from libc, but with
+# different prototype. In that case, we must make sure python will
+# use libglusterfs's version, and dlopen() does not make any guarantee
+# on this. By preloading libglusterfs.so before launching python, we
+# ensure libglusterfs's UUID functions will be used.
+LD_PRELOAD=${prefix}/lib/libglusterfs.so
+export LD_PRELOAD
+
+# This is a pretty lame test. Basically we just want to make sure that we
+# get all the way through the translator stacks on client and server to get a
+# simple error (EOPNOTSUPP) instead of a crash, RPC error, etc.
+EXPECT ${EOPNOTSUPP} $PYTHON $(dirname $0)/ipctest.py $H0 $V0
+
+unset LD_PRELOAD
+cleanup;
diff --git a/tests/features/ipctest.py b/tests/features/ipctest.py
new file mode 100755
index 00000000000..f6f699cf5c4
--- /dev/null
+++ b/tests/features/ipctest.py
@@ -0,0 +1,28 @@
+
+from __future__ import print_function
+import ctypes
+
+api = ctypes.CDLL("libgfapi.so", mode=ctypes.RTLD_GLOBAL)
+
+api.glfs_ipc.argtypes = [ ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p, ctypes.c_void_p ]
+api.glfs_ipc.restype = ctypes.c_int
+
+def do_ipc (host, volume):
+ fs = api.glfs_new(volume)
+ #api.glfs_set_logging(fs, "/dev/stderr", 7)
+ api.glfs_set_volfile_server(fs, "tcp", host, 24007)
+
+ api.glfs_init(fs)
+ ret = api.glfs_ipc(fs, 1470369258, 0, 0)
+ api.glfs_fini(fs)
+
+ return ret
+
+if __name__ == "__main__":
+ import sys
+
+ try:
+ res = do_ipc(*sys.argv[1:3])
+ print(res)
+ except:
+ print("IPC failed (volume not started?)")
diff --git a/tests/features/lock_revocation.t b/tests/features/lock_revocation.t
new file mode 100644
index 00000000000..67bc13159f9
--- /dev/null
+++ b/tests/features/lock_revocation.t
@@ -0,0 +1,54 @@
+#!/bin/bash
+logdir=$(gluster --print-logdir)
+BRICK_LOGFILES="$logdir/bricks/d-backends-brick?.log"
+rm -f $BRICK_LOGFILES &> /dev/null
+
+# Test that lock revocation works
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+cleanup;
+
+function deadlock_fop() {
+ local MNT=$1
+ for i in {1..1000}; do
+ dd if=/dev/zero of=$MNT/testfile bs=1k count=10 &> /dev/null
+ if grep "MONKEY LOCKING" $BRICK_LOGFILES &> /dev/null; then
+ break
+ fi
+ done
+}
+
+function monkey_unlock() {
+ grep "MONKEY LOCKING" $BRICK_LOGFILES &> /dev/null && echo SUCCESS
+ return 0
+}
+
+function append_to_file() {
+ local FILE_PATH=$1
+ echo "hello" >> $FILE_PATH
+ return 0
+}
+
+#Init
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $CLI volume set $V0 features.locks-monkey-unlocking on
+TEST $CLI volume set $V0 features.locks-revocation-secs 2
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 -s $H0 $M0;
+TEST $GFS --volfile-id=$V0 -s $H0 $M1;
+
+# Deadlock writes to a file using monkey unlocking
+deadlock_fop $M0 &
+EXPECT_WITHIN 60 "SUCCESS" monkey_unlock
+
+# Sleep > unlock timeout and attempt to write to the file
+sleep 3
+TEST append_to_file $M1/testfile
+
+cleanup
+#G_TESTDEF_TEST_STATUS_NETBSD7=BAD_TEST,BUG=1369401
+#G_TESTDEF_TEST_STATUS_CENTOS6=BAD_TEST,BUG=1448364
diff --git a/tests/features/mandatory-lock-forced.c b/tests/features/mandatory-lock-forced.c
new file mode 100644
index 00000000000..4028d6c6eaf
--- /dev/null
+++ b/tests/features/mandatory-lock-forced.c
@@ -0,0 +1,143 @@
+#include <stdio.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+
+#define LOG_ERR(func, err) \
+ do { \
+ fprintf(stderr, "%s : returned error (%s)\n", func, strerror(err)); \
+ exit(err); \
+ } while (0)
+
+int fd;
+struct flock lock;
+char *buf = "ten bytes!";
+char *fname = "/mnt/glusterfs/0/mand.lock";
+int open_flags, child, err, status, blocked = 0;
+
+int
+do_child(char *argv[])
+{
+ /* Initialize file open flags */
+ if (strcmp(argv[2], "BLOCK") == 0)
+ open_flags = O_RDWR;
+ else if (strcmp(argv[2], "TRUNC") == 0)
+ open_flags = O_RDWR | O_TRUNC | O_NONBLOCK;
+ else if (strcmp(argv[2], "NONE") == 0)
+ open_flags = O_RDWR | O_NONBLOCK;
+ else
+ LOG_ERR("Invalid option:", EINVAL);
+
+ /* Open the file */
+ fd = open(fname, open_flags);
+ if (fd == -1)
+ LOG_ERR("Child open", errno);
+
+ /* Perform the file operation*/
+ if (strcmp(argv[3], "READ") == 0) {
+ buf = NULL;
+ err = read(fd, buf, 10);
+ if (err == -1)
+ LOG_ERR("Child read", errno);
+ } else if (strcmp(argv[3], "WRITE") == 0) {
+ err = write(fd, buf, 10);
+ if (err == -1)
+ LOG_ERR("Child write", errno);
+ } else if (strcmp(argv[3], "FTRUNCATE") == 0) {
+ err = ftruncate(fd, 5);
+ if (err)
+ LOG_ERR("Child ftruncate", errno);
+ } else
+ LOG_ERR("Invalid operation:", EINVAL);
+
+ /* Close child fd */
+ err = close(fd);
+ if (err)
+ LOG_ERR("Child close", errno);
+
+ /* Exit success */
+ exit(0);
+}
+
+int
+main(int argc, char *argv[])
+{
+ if (argc < 4) {
+ fprintf(stderr,
+ "Wrong usage: Use as ./mandatory-lock "
+ "<RD_LCK/WR_LCK> <BLOCK/TRUNC/NONE> "
+ "<READ/WRITE/FTRUNCATE\n");
+ exit(EINVAL);
+ }
+ /* Create an empty lock file */
+ fd = open(fname, O_CREAT | O_RDWR, 0755);
+ if (fd == -1)
+ LOG_ERR("Parent create", errno);
+
+ /* Determine the type of lock */
+ if (strcmp(argv[1], "RD_LCK") == 0)
+ lock.l_type = F_RDLCK;
+ else if (strcmp(argv[1], "WR_LCK") == 0)
+ lock.l_type = F_WRLCK;
+ else
+ LOG_ERR("Parent lock type", EINVAL);
+
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0L;
+ lock.l_len = 0L;
+
+ /* Let parent acquire the initial lock */
+ err = fcntl(fd, F_SETLK, &lock);
+ if (err)
+ LOG_ERR("Parent lock", errno);
+
+ /* Now fork a child */
+ child = fork();
+ if (child == 0)
+ /* Perform the child operations */
+ do_child(argv);
+ else {
+ /* If blocking mode, then sleep for 2 seconds
+ * and wait for the child */
+ if (strcmp(argv[2], "NONE") != 0) {
+ sleep(2);
+ if (waitpid(child, &status, WNOHANG) == 0)
+ blocked = 1;
+ /* Release the parent lock so that the
+ * child can terminate */
+ lock.l_type = F_UNLCK;
+ err = fcntl(fd, F_SETLK, &lock);
+ if (err)
+ LOG_ERR("Parent unlock", errno);
+ }
+
+ /* Wait for child to finish */
+ waitpid(child, &status, 0);
+
+ /* Close the parent fd */
+ err = close(fd);
+ if (err)
+ LOG_ERR("Parent close", errno);
+
+ /* Remove the lock file*/
+ err = unlink(fname);
+ if (err)
+ LOG_ERR("Parent unlink", errno);
+
+ /* If not blocked, exit with child exit status*/
+ errno = WEXITSTATUS(status);
+
+ /* If blocked, exit with corresponding
+ * error code */
+ if (blocked)
+ errno = EWOULDBLOCK;
+
+ if (errno != 0)
+ printf("%s\n", strerror(errno));
+
+ exit(errno);
+ }
+}
diff --git a/tests/features/mandatory-lock-forced.t b/tests/features/mandatory-lock-forced.t
new file mode 100644
index 00000000000..563669c6774
--- /dev/null
+++ b/tests/features/mandatory-lock-forced.t
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+# Start glusterd [1]
+TEST glusterd
+
+# Create and verify the volume information [2-4]
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+# Turn off the performance translators [5-9]
+TEST $CLI volume set $V0 performance.open-behind off
+TEST $CLI volume set $V0 performance.write-behind off
+TEST $CLI volume set $V0 performance.quick-read off
+TEST $CLI volume set $V0 performance.io-cache off
+TEST $CLI volume set $V0 performance.read-ahead off
+
+# Start and mount the volume [10-11]
+TEST $CLI volume start $V0;
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+build_tester $(dirname $0)/mandatory-lock-forced.c -o $(dirname $0)/mandatory-lock
+
+# Various read/write tests without enabling mandatory-locking [12-18]
+$(dirname $0)/mandatory-lock RD_LCK NONE READ
+TEST [ $? -eq 0 ]
+
+$(dirname $0)/mandatory-lock RD_LCK NONE WRITE
+TEST [ $? -eq 0 ]
+
+$(dirname $0)/mandatory-lock WR_LCK NONE READ
+TEST [ $? -eq 0 ]
+
+$(dirname $0)/mandatory-lock WR_LCK NONE WRITE
+TEST [ $? -eq 0 ]
+
+# Specifies O_TRUNC during open
+$(dirname $0)/mandatory-lock RD_LCK TRUNC READ
+TEST [ $? -eq 0 ]
+
+$(dirname $0)/mandatory-lock RD_LCK NONE FTRUNCATE
+TEST [ $? -eq 0 ]
+
+$(dirname $0)/mandatory-lock RD_LCK BLOCK WRITE
+TEST [ $? -eq 0 ]
+
+# Enable mandatory-locking [19]
+TEST $CLI volume set $V0 mandatory-locking forced
+
+# Restart the volume to take the change into effect [20-23]
+TEST umount $M0
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=/$V0 --volfile-server=$H0 $M0
+
+# Repeat the above tests with mandatory-locking [24-30]
+$(dirname $0)/mandatory-lock RD_LCK NONE READ
+TEST [ $? -eq 0 ]
+
+EXPECT "Resource temporarily unavailable" $(dirname $0)/mandatory-lock RD_LCK NONE WRITE
+
+EXPECT "Resource temporarily unavailable" $(dirname $0)/mandatory-lock WR_LCK NONE READ
+
+EXPECT "Resource temporarily unavailable" $(dirname $0)/mandatory-lock WR_LCK NONE WRITE
+
+EXPECT "Resource temporarily unavailable" $(dirname $0)/mandatory-lock RD_LCK TRUNC READ
+
+EXPECT "Resource temporarily unavailable" $(dirname $0)/mandatory-lock RD_LCK NONE FTRUNCATE
+
+EXPECT "Resource temporarily unavailable" $(dirname $0)/mandatory-lock RD_LCK BLOCK WRITE
+
+rm -rf $(dirname $0)/mandatory-lock
+
+cleanup
+
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1326464
diff --git a/tests/features/nuke.t b/tests/features/nuke.t
new file mode 100755
index 00000000000..f1f5f9f90ab
--- /dev/null
+++ b/tests/features/nuke.t
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+create_files () {
+ mkdir $1
+ for i in $(seq 0 99); do
+ mkdir $1/dir$i
+ for j in $(seq 0 99); do
+ touch $1/dir$i/file$j
+ done
+ done
+}
+
+count_files () {
+ ls $1 | wc -l
+}
+
+LANDFILL=$B0/${V0}1/.glusterfs/landfill
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+
+TEST create_files $M0/foo
+TEST [ $(count_files $LANDFILL) = "0" ]
+
+# This should immediately send the whole directory to the landfill.
+TEST setfattr -n glusterfs.dht.nuke -v trinity $M0/foo
+
+# Make sure the directory's not visible on the mountpoint, and is visible in
+# the brick's landfill.
+TEST ! ls $M0/foo
+TEST [ $(count_files $LANDFILL) = "1" ]
+
+# Make sure the janitor thread cleans it up in a timely fashion.
+EXPECT_WITHIN 60 "0" count_files $LANDFILL
+
+cleanup
diff --git a/tests/features/open_and_sleep.c b/tests/features/open_and_sleep.c
new file mode 100644
index 00000000000..7d0e22a2503
--- /dev/null
+++ b/tests/features/open_and_sleep.c
@@ -0,0 +1,27 @@
+#include <unistd.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+int
+main(int argc, char **argv)
+{
+ pid_t pid;
+ int fd;
+
+ if (argc >= 2) {
+ fd = open(argv[1], O_RDWR | O_CREAT, 0644);
+ if (fd == -1) {
+ fprintf(stderr, "cannot open/create %s\n", argv[1]);
+ return 1;
+ }
+ }
+
+ pid = getpid();
+ printf("%d\n", pid);
+ fflush(stdout);
+
+ for (;;)
+ sleep(1);
+
+ return 0;
+}
diff --git a/tests/features/openssl.cnf.in b/tests/features/openssl.cnf.in
new file mode 100644
index 00000000000..1fce34b11b9
--- /dev/null
+++ b/tests/features/openssl.cnf.in
@@ -0,0 +1,41 @@
+[ req ]
+distinguished_name = req_distinguished_name
+x509_extensions = v3_ca
+[ req_distinguished_name ]
+commonName = Common Name
+commonName_max = 64
+[ v3_ca ]
+subjectKeyIdentifier = hash
+authorityKeyIdentifier = keyid:always,issuer:always
+basicConstraints = CA:true
+[ ca ]
+default_ca = CA_default
+[ CA_default ]
+dir = @TMPDIR@
+certs = $dir/certs
+crl_dir = $dir/crl
+database = $dir/index.txt
+unique_subjecta = no
+new_certs_dir = $dir/newcerts
+certificate = $dir/ca.crt
+serial = $dir/serial
+crl = $dir/crl.pem
+private_key = $dir/self.key
+x509_extensions = usr_cert
+name_opt = ca_default
+cert_opt = ca_default
+default_days = 365
+default_crl_days = 30
+crl_extensions = crl_ext
+default_md = sha256
+preserve = no
+policy = policy_test
+[ policy_test ]
+commonName = supplied
+[ usr_cert ]
+basicConstraints = CA:FALSE
+subjectKeyIdentifier = hash
+authorityKeyIdentifier = keyid,issuer:always
+crlDistributionPoints = URI:file://@TMPDIR@/crl.pem
+[ crl_ext ]
+authorityKeyIdentifier = keyid:always,issuer:always
diff --git a/tests/features/readdir-ahead.t b/tests/features/readdir-ahead.t
new file mode 100755
index 00000000000..75223df61bf
--- /dev/null
+++ b/tests/features/readdir-ahead.t
@@ -0,0 +1,45 @@
+#!/bin/bash
+#
+# Test basic readdir-ahead functionality. Verify that readdir-ahead can be
+# enabled, create a set of files and run some ls tests.
+#
+###
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/$V0
+TEST $CLI volume start $V0
+
+TEST $CLI volume set $V0 readdir-ahead on
+
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+
+TEST mkdir $M0/test
+for i in $(seq 0 99)
+do
+ touch $M0/test/$i
+done
+
+count=`ls -1 $M0/test | wc -l`
+TEST [ $count -eq 100 ]
+
+count=`ls -1 $M0/test | wc -l`
+TEST [ $count -eq 100 ]
+
+TEST rm -rf $M0/test/*
+
+count=`ls -1 $M0/test | wc -l`
+TEST [ $count -eq 0 ]
+
+TEST rmdir $M0/test
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup;
diff --git a/tests/features/recon.t b/tests/features/recon.t
new file mode 100644
index 00000000000..82ef6fd755d
--- /dev/null
+++ b/tests/features/recon.t
@@ -0,0 +1,59 @@
+#!/bin/bash
+
+. $(dirname $0)/../traps.rc
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../fdl.rc
+
+tmpdir=$(mktemp -d -t ${0##*/}.XXXXXX)
+push_trapfunc "rm -rf $tmpdir"
+
+write_file () {
+ echo "peekaboo" > $1
+}
+
+TEST rm -f $FDL_META_FILE $FDL_DATA_FILE
+TEST glusterd
+TEST pidof glusterd
+
+# Get a simple volume set up and mounted with FDL active.
+TEST $CLI volume create $V0 ${H0}:${B0}/${V0}-0
+TEST $CLI volume set $V0 features.fdl on
+TEST $CLI volume start $V0
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+
+# Generate some I/O and then copy off the journal files for later.
+TEST mkdir -p $M0/abc/def
+TEST write_file $M0/abc/def/ghi
+#EST chmod 314 $M0/abc/def/ghi
+cp ${FDL_META_FILE} ${FDL_DATA_FILE} ${tmpdir}
+
+# Get back to an empty state and unmount.
+TEST rm -rf $M0/abc
+TEST umount $M0
+
+# Make sure we really are in an empty state. Otherwise the tests below could
+# pass just because we never cleaned up in the first place.
+TEST [ ! -d ${B0}/${V0}-0/abc ]
+
+# Create a stub volfile.
+vol_file=${GLUSTERD_WORKDIR}/vols/${V0}/${V0}.${H0}.${log_id}.vol
+vol_id_line=$(grep volume-id ${vol_file})
+cat > ${tmpdir}/recon.vol << EOF
+volume recon-posix
+ type storage/posix
+ option directory ${B0}/${V0}-0
+${vol_id_line}
+end-volume
+EOF
+
+TEST gf_recon ${tmpdir}/recon.vol ${tmpdir}/$(basename ${FDL_META_FILE}) \
+ ${tmpdir}/$(basename ${FDL_DATA_FILE})
+
+TEST [ -d ${B0}/${V0}-0/abc/def ]
+EXPECT "peekaboo" cat ${B0}/${V0}-0/abc/def/ghi
+# TBD: test permissions, xattrs
+
+cleanup
+#G_TESTDEF_TEST_STATUS_CENTOS6=KNOWN_ISSUE,BUG=1385758
+#G_TESTDEF_TEST_STATUS_NETBSD7=KNOWN_ISSUE,BUG=1385758
diff --git a/tests/features/ssl-authz.t b/tests/features/ssl-authz.t
new file mode 100755
index 00000000000..497083e5a3a
--- /dev/null
+++ b/tests/features/ssl-authz.t
@@ -0,0 +1,121 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+ping_file () {
+ echo hello > $1 2> /dev/null
+}
+for d in /etc/ssl /etc/openssl /usr/local/etc/openssl ; do
+ if test -d $d ; then
+ SSL_BASE=$d
+ break
+ fi
+done
+SSL_KEY=$SSL_BASE/glusterfs.key
+SSL_CERT=$SSL_BASE/glusterfs.pem
+SSL_CA=$SSL_BASE/glusterfs.ca
+
+cleanup;
+rm -f $SSL_BASE/glusterfs.*
+mkdir -p $B0/1
+mkdir -p $M0
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI v set all cluster.brick-multiplex on
+# Construct a cipher list that excludes CBC because of POODLE.
+# http://web.nvd.nist.gov/view/vuln/detail?vulnId=CVE-2014-3566
+#
+# Since this is a bit opaque, here's what it does:
+# (1) Get the ciphers matching a normal cipher-list spec
+# (2) Delete any colon-separated entries containing "CBC"
+# (3) Collapse adjacent colons from deleted entries
+# (4) Remove colons at the beginning or end
+function valid_ciphers {
+ openssl ciphers 'HIGH:!SSLv2' | sed \
+ -e '/[^:]*CBC[^:]*/s///g' \
+ -e '/::*/s//:/g' \
+ -e '/^:/s///' \
+ -e '/:$/s///'
+}
+
+TEST openssl genrsa -out $SSL_KEY 2048
+TEST openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+ln $SSL_CERT $SSL_CA
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/{1,2,3} force
+TEST $CLI volume set $V0 server.ssl on
+TEST $CLI volume set $V0 client.ssl on
+TEST $CLI volume set $V0 ssl.cipher-list $(valid_ciphers)
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count
+
+# This mount should SUCCEED because ssl-allow=* by default. This effectively
+# disables SSL authorization, though authentication and encryption might still
+# be enabled.
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+TEST ping_file $M0/before
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+glusterfsd_pid=`pgrep glusterfsd`
+TEST [ $glusterfsd_pid != 0 ]
+start=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'`
+echo "Memory consumption for glusterfsd process"
+for i in $(seq 1 100); do
+ gluster v heal $V0 info >/dev/null
+done
+#Wait to cleanup memory
+sleep 10
+end=`pmap -x $glusterfsd_pid | grep total | awk -F " " '{print $4}'`
+diff=$((end-start))
+
+# If memory consumption is more than 15M some leak in SSL code path
+
+TEST [ $diff -lt 15000 ]
+
+
+# Set ssl-allow to a wildcard that includes our identity.
+TEST $CLI volume stop $V0
+TEST $CLI volume set $V0 auth.ssl-allow Any*
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "3" online_brick_count
+
+# This mount should SUCCEED because we match the wildcard.
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+TEST ping_file $M0/before
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Set ssl-allow to include the identity we've created.
+TEST $CLI volume stop $V0
+TEST $CLI volume set $V0 auth.ssl-allow Anyone
+TEST $CLI volume start $V0
+
+# This mount should SUCCEED because this specific identity is allowed.
+TEST glusterfs --volfile-server=$H0 --volfile-id=$V0 $M0
+TEST ping_file $M0/before
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Change the authorized user name. Note that servers don't pick up changes
+# automagically like clients do, so we have to stop/start ourselves.
+TEST $CLI volume stop $V0
+TEST $CLI volume set $V0 auth.ssl-allow NotYou
+TEST $CLI volume start $V0
+
+# This mount should FAIL because the identity given by our certificate does not
+# match the allowed user. In other words, authentication works (they know who
+# we are) but authorization doesn't (we're not the right person).
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0
+
+# Looks like /*/bin/glusterfs isn't returning error status correctly (again).
+# We may get an unusable mount where ping will fail, or no mount at all,
+# where ping will write to the mount point instead of the mounted filesystem.
+# In order to avoid spurious failures, create a file by ping and check it
+# is absent from the brick.
+ping_file $M0/after
+TEST test -f $B0/1/before
+TEST ! test -f $B0/1/after
+
+cleanup;
diff --git a/tests/features/ssl-ciphers.t b/tests/features/ssl-ciphers.t
new file mode 100644
index 00000000000..b70fe360e02
--- /dev/null
+++ b/tests/features/ssl-ciphers.t
@@ -0,0 +1,244 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+brick_port() {
+ $CLI --xml volume status $1 | sed -n '/.*<port>\([0-9]*\).*/s//\1/p'
+}
+
+wait_mount() {
+ i=1
+ while [ $i -lt $CONFIG_UPDATE_TIMEOUT ] ; do
+ sleep 1
+ i=$(( $i + 1 ))
+ mounted=`mount|awk -v m=$1 '
+ BEGIN {r = "N";}
+ ($3 == m) {r = "Y"; exit;}
+ END {print r;}
+ '`
+ if [ "x${mounted}" = "xY" ] ; then
+ ls $M0 2>/dev/null || continue
+ break;
+ fi
+ done
+
+ if [ "x${mounted}" = "xY" ] ; then
+ ls $M0 2>/dev/null || mounted="N"
+ fi
+
+ echo $mounted
+}
+
+openssl_connect() {
+ ssl_opt="-verify 3 -verify_return_error -CAfile $SSL_CA"
+ ssl_opt="$ssl_opt -crl_check_all -CApath $TMPDIR"
+ cmd="echo "" | openssl s_client $ssl_opt $@ 2>/dev/null"
+ CIPHER=$(eval $cmd | awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}')
+ if [ "x${CIPHER}" = "x" -o "x${CIPHER}" = "x0000" -o "x${CIPHER}" = "x(NONE)" ] ; then
+ echo "N"
+ else
+ echo "Y"
+ fi
+}
+
+#Validate the cipher to pass EXPECT test case before call openssl_connect
+check_cipher() {
+ cmd="echo "" | openssl s_client $@ 2> /dev/null"
+ cipher=$(eval $cmd |awk -F "Cipher is" '{print $2}' | tr -d '[:space:]' | awk -F " " '{print $1}')
+ if [ "x${cipher}" = "x" -o "x${cipher}" = "x0000" -o "x${cipher}" = "x(NONE)" ] ; then
+ echo "N"
+ else
+ echo "Y"
+ fi
+}
+
+cleanup;
+mkdir -p $B0
+mkdir -p $M0
+
+TMPDIR=`mktemp -d /tmp/${0##*/}.XXXXXX`
+TEST test -d $TMPDIR
+
+SSL_KEY=$TMPDIR/self.key
+SSL_CSR=$TMPDIR/self.csr
+SSL_CERT=$TMPDIR/self.crt
+SSL_CA=$TMPDIR/ca.crt
+SSL_CFG=$TMPDIR/openssl.cnf
+SSL_CRL=$TMPDIR/crl.pem
+
+sed "s|@TMPDIR@|${TMPDIR}|" `pwd`/`dirname $0`/openssl.cnf.in > $SSL_CFG
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST openssl genrsa -out $SSL_KEY 2048 2>/dev/null
+TEST openssl req -config $SSL_CFG -new -key $SSL_KEY -x509 \
+ -subj /CN=CA -out $SSL_CA
+TEST openssl req -config $SSL_CFG -new -key $SSL_KEY \
+ -subj /CN=$H0 -out $SSL_CSR
+
+echo "01" > $TMPDIR/serial
+TEST touch $TMPDIR/index.txt $TMPDIR/index.txx.attr
+TEST mkdir -p $TMPDIR/certs $TMPDIR/newcerts $TMPDIR/crl
+TEST openssl ca -batch -config $SSL_CFG -in $SSL_CSR -out $SSL_CERT 2>&1
+
+touch $SSL_CRL
+CRLHASH=`openssl x509 -hash -fingerprint -noout -in $SSL_CA|sed -n '1s/$/.r0/p'`
+ln -sf $SSL_CRL $TMPDIR/$CRLHASH
+TEST openssl ca -config $SSL_CFG -gencrl -out $SSL_CRL 2>&1
+
+
+TEST $CLI volume create $V0 $H0:$B0/1
+TEST $CLI volume set $V0 server.ssl on
+TEST $CLI volume set $V0 client.ssl on
+TEST $CLI volume set $V0 ssl.private-key $SSL_KEY
+TEST $CLI volume set $V0 ssl.own-cert $SSL_CERT
+TEST $CLI volume set $V0 ssl.ca-list $SSL_CA
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+
+BRICK_PORT=`brick_port $V0`
+
+# Test we can connect
+EXPECT "Y" openssl_connect -connect $H0:$BRICK_PORT
+
+# Test SSLv2 protocol fails
+EXPECT "N" openssl_connect -ssl2 -connect $H0:$BRICK_PORT
+
+# Test SSLv3 protocol fails
+EXPECT "N" openssl_connect -ssl3 -connect $H0:$BRICK_PORT
+
+# Test TLSv1 protocol based on openssl version
+cmd="openssl version"
+ver=$(eval $cmd | awk -F " " '{print $2}' | grep "^1.1")
+if [ "x${ver}" = "x" ]; then
+ supp="N"
+else
+ supp="Y"
+fi
+EXPECT "${supp}" openssl_connect -tls1 -connect $H0:$BRICK_PORT
+
+# Test a HIGH CBC cipher
+cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
+
+# Test EECDH
+cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+
+# test MD5 fails
+cph=`check_cipher -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher DES-CBC3-MD5 -connect $H0:$BRICK_PORT
+
+# test RC4 fails
+cph=`check_cipher -cipher RC4-SHA -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher RC4-SHA -connect $H0:$BRICK_PORT
+
+# test eNULL fails
+cph=`check_cipher -cipher NULL-SHA256 -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher NULL-SHA256 -connect $H0:$BRICK_PORT
+
+# test SHA2
+cph=`check_cipher -cipher AES256-SHA256 -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES256-SHA256 -connect $H0:$BRICK_PORT
+
+# test GCM
+cph=`check_cipher -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES256-GCM-SHA384 -connect $H0:$BRICK_PORT
+
+# Test DH fails without DH params
+cph=`check_cipher -cipher EDH -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT
+
+# Test DH with DH params
+TEST $CLI volume set $V0 ssl.dh-param `pwd`/`dirname $0`/dh1024.pem
+EXPECT "`pwd`/`dirname $0`/dh1024.pem" volume_option $V0 ssl.dh-param
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+BRICK_PORT=`brick_port $V0`
+EXPECT "Y" openssl_connect -cipher EDH -connect $H0:$BRICK_PORT
+
+# Test the cipher-list option
+TEST $CLI volume set $V0 ssl.cipher-list AES256-SHA
+EXPECT AES256-SHA volume_option $V0 ssl.cipher-list
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+BRICK_PORT=`brick_port $V0`
+cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
+cph=`check_cipher -cipher AES128-SHA -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES128-SHA -connect $H0:$BRICK_PORT
+
+# Test the ec-curve option
+TEST $CLI volume set $V0 ssl.cipher-list EECDH:EDH:!TLSv1
+EXPECT EECDH:EDH:!TLSv1 volume_option $V0 ssl.cipher-list
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+BRICK_PORT=`brick_port $V0`
+cph=`check_cipher -cipher AES256-SHA -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher AES256-SHA -connect $H0:$BRICK_PORT
+cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+
+TEST $CLI volume set $V0 ssl.ec-curve invalid
+EXPECT invalid volume_option $V0 ssl.ec-curve
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+BRICK_PORT=`brick_port $V0`
+cph=`check_cipher -cipher EECDH -connect $H0:$BRICK_PORT`
+EXPECT "$cph" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+
+TEST $CLI volume set $V0 ssl.ec-curve secp521r1
+EXPECT secp521r1 volume_option $V0 ssl.ec-curve
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" online_brick_count
+BRICK_PORT=`brick_port $V0`
+EXPECT "Y" openssl_connect -cipher EECDH -connect $H0:$BRICK_PORT
+
+# test revocation
+TEST $CLI volume set $V0 ssl.crl-path $TMPDIR
+EXPECT $TMPDIR volume_option $V0 ssl.crl-path
+$GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+EXPECT "Y" wait_mount $M0
+TEST_FILE=`mktemp $M0/${0##*/}.XXXXXX`
+TEST test -f $TEST_FILE
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+TEST openssl ca -batch -config $SSL_CFG -revoke $SSL_CERT 2>&1
+TEST openssl ca -config $SSL_CFG -gencrl -out $SSL_CRL 2>&1
+
+# Failed once revoked
+# Although client fails to mount without restarting the server after crl-path
+# is set when no actual crl file is found on the client, it would also fail
+# when server is restarted for the same reason. Since the socket initialization
+# code is the same for client and server, the crl verification flags need to
+# be turned off for the client to avoid SSL searching for CRLs in the
+# ssl.crl-path. If no CRL files are found in the ssl.crl-path, SSL fails the
+# connect() attempt on the client.
+TEST $CLI volume stop $V0
+TEST $CLI volume start $V0
+$GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+EXPECT "N" wait_mount $M0
+TEST ! test -f $TEST_FILE
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Succeed with CRL disabled
+TEST $CLI volume stop $V0
+TEST $CLI volume set $V0 ssl.crl-path NULL
+EXPECT NULL volume_option $V0 ssl.crl-path
+TEST $CLI volume start $V0
+$GFS --volfile-id=$V0 --volfile-server=$H0 $M0
+EXPECT "Y" wait_mount $M0
+TEST test -f $TEST_FILE
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+rm -rf $TMPDIR
+cleanup;
diff --git a/tests/features/subdir-mount.t b/tests/features/subdir-mount.t
new file mode 100644
index 00000000000..a02bd6befc4
--- /dev/null
+++ b/tests/features/subdir-mount.t
@@ -0,0 +1,121 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../nfs.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{1,2,3,4};
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+
+## Mount FUSE with caching disabled (read-write)
+TEST $GFS -s $H0 --volfile-id $V0 --volume-name ${V0}-dht $M0;
+
+TEST ! stat $M0/subdir1;
+TEST mkdir $M0/subdir1;
+TEST ! stat $M0/subdir2;
+TEST mkdir $M0/subdir2;
+TEST ! stat $M0/subdir1/subdir1.1;
+TEST mkdir $M0/subdir1/subdir1.1;
+TEST ! stat $M0/subdir1/subdir1.1/subdir1.2;
+TEST mkdir $M0/subdir1/subdir1.1/subdir1.2;
+
+# mount volume/subdir1
+TEST $GFS --subdir-mount /subdir1 -s $H0 --volfile-id $V0 --volume-name ${V0}-dht $M1;
+
+TEST touch $M0/topfile;
+TEST ! stat $M1/topfile;
+
+TEST touch $M1/subdir1_file;
+TEST ! stat $M0/subdir1_file;
+TEST stat $M0/subdir1/subdir1_file;
+
+# mount volume/subdir2
+TEST $GFS --subdir-mount /subdir2 -s $H0 --volfile-id $V0 $M2;
+
+TEST ! stat $M2/topfile;
+
+TEST touch $M2/subdir2_file;
+TEST ! stat $M0/subdir2_file;
+TEST ! stat $M1/subdir2_file;
+TEST stat $M0/subdir2/subdir2_file;
+
+# umount $M1 / $M2
+TEST umount $M1
+TEST umount $M2
+
+# mount non-existing subdir ; this works with mount.glusterfs,
+# but with glusterfs, the script doesn't returns error.
+#TEST ! $GFS --subdir-mount subdir_not_there -s $H0 --volfile-id $V0 $M1;
+
+# mount subdir with depth
+TEST $GFS --subdir-mount /subdir1/subdir1.1/subdir1.2 -s $H0 --volfile-id $V0 $M2;
+TEST ! stat $M2/topfile;
+TEST touch $M2/subdir1.2_file;
+TEST ! stat $M0/subdir1.2_file;
+TEST stat $M0/subdir1/subdir1.1/subdir1.2/subdir1.2_file;
+
+TEST umount $M2
+
+# Lets validate the options # Not having '*' in here as there was some
+# problem with option validation with this
+TEST $CLI volume set $V0 auth.allow 192.168.1.1
+
+TEST $CLI volume set $V0 auth.allow "192.168.1.1,10.10.\*.\*,::1"
+
+TEST $CLI volume set $V0 auth.allow "/subdir1\(1.2.3.4\),/\(192.168.10.2\|192.168.11.1\),/subdir2\(1.2.3.4\)"
+
+# directories should be absolute
+TEST ! $CLI volume set $V0 auth.allow "subdir2\(1.2.3.4\)"
+
+# support subdir inside subdir
+TEST $CLI volume set $V0 auth.allow '/subdir1/subdir1.1/subdir1.2/\(1.2.3.4\|::1\),/\(192.168.10.1\|192.168.11.1\),/subdir2\(1.2.3.4\)'
+
+TEST $CLI volume stop $V0
+
+TEST $CLI volume start $V0
+
+TEST $GFS --subdir-mount /subdir1/subdir1.1/subdir1.2 -s $H0 --volfile-id $V0 $M2
+TEST stat $M2
+
+initcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l`
+# mount shouldn't fail even after add-brick
+TEST $CLI volume add-brick $V0 replica 2 $H0:$B0/${V0}{5,6};
+
+# Wait to execute create-subdir-mounts.sh script by glusterd
+newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l`
+while [ $newcnt -eq $initcnt ]
+do
+ newcnt=`grep -i create-subdir-mounts /var/log/glusterfs/glusterd.log | wc -l`
+ sleep 1
+done
+
+# Existing mount should still be active
+mount_inode=$(stat --format "%i" "$M2")
+TEST test "$mount_inode" == "1"
+
+TEST umount $M2
+
+# Now the exported subdirs should be automatically healed due to
+# hook scripts. Check if the mount is successful.
+TEST $GFS --subdir-mount /subdir2 -s $H0 --volfile-id $V0 $M2
+mount_inode=$(stat --format "%i" "$M2")
+TEST test "$mount_inode" == "1"
+
+TEST umount $M0
+TEST umount $M2
+
+
+TEST $CLI volume stop $V0;
+TEST $CLI volume delete $V0;
+TEST ! $CLI volume info $V0;
+
+## This should clean the mountpoints
+cleanup;
diff --git a/tests/features/trash.t b/tests/features/trash.t
new file mode 100755
index 00000000000..da5b50bc85a
--- /dev/null
+++ b/tests/features/trash.t
@@ -0,0 +1,247 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+
+cleanup
+
+test_mount() {
+ volume=$1
+ mount=$2
+ test_dir=$3
+ RETVAL=0
+ glusterfs -s $H0 --volfile-id $volume $mount --attribute-timeout=0
+
+ if [ "x$test_dir" = "x" ] ; then return $RETVAL; fi
+
+ timeout=0
+ while [ $timeout -lt $PROCESS_UP_TIMEOUT ] ; do
+ timeout=$(( $timeout + 1 ))
+ test -d $test_dir
+ RETVAL=$?
+ if [ $RETVAL -eq 0 ] ; then break ; fi
+ sleep 1
+ done
+
+ return $RETVAL
+}
+
+start_vol() {
+ volume=$1
+ mount=$2
+ test_dir=$3
+ $CLI volume start $volume
+ test_mount $volume $mount $test_dir
+ RETVAL=$?
+ return $RETVAL
+}
+
+create_files() {
+ echo 'Hi' > $1
+ echo 'Hai' > $2
+}
+
+file_exists () {
+ vol=$1
+ shift
+ for file in `ls $B0/${vol}1/$@ 2> /dev/null` ; do
+ test -e ${file} && return 0
+ done
+ for file in `ls $B0/${vol}2/$@ 2> /dev/null` ; do
+ test -e ${file} && return 0
+ done
+
+ return 1
+}
+
+unlink_op() {
+
+ rm -f $M0/$1
+ ls $M0/.trashcan/1/2/3 &> /dev/null
+ sleep 2
+
+ test ! -e $M0/$1
+ wildcard_exists $M0/.trashcan/$1*
+
+ # remove from trashcan
+ rm -f $M0/.trashcan/$1*
+ wildcard_not_exists $M0/.trashcan/$1*
+}
+
+truncate_op() {
+ truncate -s 2 $M0/$1
+ ls $M0/.trashcan/1/2/3 &> /dev/null
+ sleep 2
+
+ test -e $M0/$1
+ test $(ls -l $M0/$1 | awk '{print $5}') -eq 2 &> /dev/null
+ wildcard_exists $M0/.trashcan/$1*
+ test $(ls -l $M0/.trashcan/$1*|awk '{print $5}') -eq $2 &> /dev/null
+
+ # truncate from trashcan
+ truncate -s 1 $M0/.trashcan/$1*
+ test $(ls $M0/.trashcan/$1* | wc -l) -eq 1
+}
+
+wildcard_exists() {
+ test -e $1
+ if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
+}
+
+wildcard_not_exists() {
+ test ! -e $1
+ if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
+}
+
+# testing glusterd
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+# creating distributed volume
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+
+# checking volume status
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Created' volinfo_field $V0 'Status'
+EXPECT '2' brick_count $V0
+
+# test without enabling trash translator
+TEST start_vol $V0 $M0
+
+# test on enabling trash translator
+TEST $CLI volume set $V0 features.trash on
+EXPECT 'on' volinfo_field $V0 'features.trash'
+
+# files directly under mount point
+create_files $M0/file1 $M0/file2
+TEST file_exists $V0 file1 file2
+
+# perform unlink
+TEST unlink_op file1
+
+# perform truncate
+TEST truncate_op file2 4
+
+# create files directory hierarchy and check
+mkdir -p $M0/1/2/3
+create_files $M0/1/2/3/foo1 $M0/1/2/3/foo2
+TEST file_exists $V0 1/2/3/foo1 1/2/3/foo2
+
+# perform unlink
+TEST unlink_op 1/2/3/foo1
+
+# perform truncate
+TEST truncate_op 1/2/3/foo2 4
+
+# create a directory for eliminate pattern
+mkdir $M0/a
+
+# set the eliminate pattern
+TEST $CLI volume set $V0 features.trash-eliminate-path /a
+EXPECT '/a' volinfo_field $V0 'features.trash-eliminate-path'
+
+# create two files and check
+create_files $M0/a/test1 $M0/a/test2
+TEST file_exists $V0 a/test1 a/test2
+
+# remove from eliminate pattern
+rm -f $M0/a/test1
+EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test1*
+
+# truncate from eliminate path
+truncate -s 2 $M0/a/test2
+TEST [ -e $M0/a/test2 ]
+TEST [ `ls -l $M0/a/test2 | awk '{print $5}'` -eq 2 ]
+EXPECT "Y" wildcard_not_exists $M0/.trashcan/a/test2*
+
+# set internal op on
+TEST $CLI volume set $V0 features.trash-internal-op on
+EXPECT 'on' volinfo_field $V0 'features.trash-internal-op'
+
+# again create two files and check
+create_files $M0/inop1 $M0/inop2
+TEST file_exists $V0 inop1 inop2
+
+# perform unlink
+TEST unlink_op inop1
+
+# perform truncate
+TEST truncate_op inop2 4
+
+# remove one brick and restart the volume
+TEST $CLI volume remove-brick $V0 $H0:$B0/${V0}2 force
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+TEST $CLI volume stop $V0
+TEST start_vol $V0 $M0 $M0/.trashcan
+
+# again create two files and check
+create_files $M0/rebal1 $M0/rebal2
+TEST file_exists $V0 rebal1 rebal2
+
+# add one brick
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
+TEST [ -d $B0/${V0}3 ]
+
+
+# perform rebalance
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
+
+#Find out which file was migrated to the new brick
+file_name=$(ls $B0/${V0}3/rebal*| xargs basename)
+
+# check whether rebalance was succesful
+EXPECT "Y" wildcard_exists $B0/${V0}3/$file_name*
+EXPECT "Y" wildcard_exists $B0/${V0}1/.trashcan/internal_op/$file_name*
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# force required in case rebalance is not over
+TEST $CLI volume stop $V0 force
+
+# create a replicated volume
+TEST $CLI volume create $V1 replica 2 $H0:$B0/${V1}{1,2}
+
+# checking volume status
+EXPECT "$V1" volinfo_field $V1 'Volume Name'
+EXPECT 'Replicate' volinfo_field $V1 'Type'
+EXPECT 'Created' volinfo_field $V1 'Status'
+EXPECT '2' brick_count $V1
+
+# enable trash with options and start the replicate volume by disabling automatic self-heal
+TEST $CLI volume set $V1 features.trash on
+TEST $CLI volume set $V1 features.trash-internal-op on
+EXPECT 'on' volinfo_field $V1 'features.trash'
+EXPECT 'on' volinfo_field $V1 'features.trash-internal-op'
+TEST start_vol $V1 $M1 $M1/.trashcan
+
+# mount and check for trash directory
+TEST [ -d $M1/.trashcan/internal_op ]
+
+# create a file and check
+touch $M1/self
+TEST [ -e $B0/${V1}1/self -a -e $B0/${V1}2/self ]
+
+# kill one brick and delete the file from mount point
+kill_brick $V1 $H0 $B0/${V1}1
+EXPECT_WITHIN ${PROCESS_UP_TIMEOUT} "1" online_brick_count
+rm -f $M1/self
+EXPECT "Y" wildcard_exists $B0/${V1}2/.trashcan/self*
+
+# check renaming of trash directory through cli
+TEST $CLI volume set $V0 trash-dir abc
+TEST start_vol $V0 $M0 $M0/abc
+TEST [ -e $M0/abc -a ! -e $M0/.trashcan ]
+EXPECT "Y" wildcard_exists $B0/${V0}1/abc/internal_op/rebal*
+
+# ensure that rename and delete operation on trash directory fails
+rm -rf $M0/abc/internal_op
+TEST [ -e $M0/abc/internal_op ]
+rm -rf $M0/abc/
+TEST [ -e $M0/abc ]
+mv $M0/abc $M0/trash
+TEST [ -e $M0/abc ]
+
+cleanup
diff --git a/tests/features/unhashed-auto.t b/tests/features/unhashed-auto.t
new file mode 100755
index 00000000000..0a6bbfbb07d
--- /dev/null
+++ b/tests/features/unhashed-auto.t
@@ -0,0 +1,125 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+
+NFILES=100
+
+touch_files () {
+ for i in $(seq 1 $NFILES); do
+ touch $(printf $M0/dir/file%02d $i)
+ done
+}
+
+count_files () {
+ found=0
+ for i in $(seq 1 $NFILES); do
+ if [ -f $(printf $M0/dir/file%02d $i) ]; then
+ found=$((found+1))
+ fi
+ done
+ echo "found $found files" > /dev/tty
+ echo $found
+}
+
+wait_for_rebalance () {
+ while true; do
+ tmp=$(rebalance_completed)
+ if [ $tmp -eq 1 ]; then
+ sleep 1
+ else
+ break
+ fi
+ done
+}
+
+get_xattr () {
+ cmd="getfattr --absolute-names --only-values -n trusted.glusterfs.dht"
+ $cmd $1 | od -tx1 -An | tr -d ' '
+}
+
+get_xattr_hash () {
+ cmd="getfattr --absolute-names --only-values -n trusted.glusterfs.dht"
+ $cmd $1 | od -tx1 -An | awk '{printf("%s%s%s%s\n", $1, $2, $3, $4);}'
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume set $V0 cluster.lookup-optimize ON
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# Create some files for later tests.
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+TEST mkdir $M0/dir
+TEST touch_files
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Add a brick and do the fix-layout part of rebalance to update directory layouts
+# (including their directory commit hashes).
+TEST $CLI volume add-brick $V0 $H0:$B0/${V0}3
+EXPECT '3' brick_count $V0
+TEST $CLI volume rebalance $V0 fix-layout start
+TEST wait_for_rebalance
+
+# Now for the sneaky part. *Undo* the part of rebalance that updated the volume
+# commit hash, forcing a false match between that and the directory commit hashes.
+TEST setfattr -x trusted.glusterfs.dht.commithash $B0/${V0}1
+TEST setfattr -x trusted.glusterfs.dht.commithash $B0/${V0}2
+TEST setfattr -x trusted.glusterfs.dht.commithash $B0/${V0}3
+
+# Mount and check that we do *not* see all of the files. This indicates that we
+# correctly skipped the broadcast lookup that would have found them.
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+TEST [ $(count_files) -ne 100 ]
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Do the fix-layout again to generate a new volume commit hash.
+TEST $CLI volume rebalance $V0 fix-layout start
+TEST wait_for_rebalance
+
+# Mount and check that we *do* see all of the files. This indicates that we saw
+# the mismatch and did the broadcast lookup this time.
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+TEST [ $(count_files) -eq 100 ]
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Do a *full* rebalance and verify that the directory commit hash changed.
+old_val=$(get_xattr $B0/${V0}1/dir)
+TEST $CLI volume rebalance $V0 start
+TEST wait_for_rebalance
+new_val=$(get_xattr $B0/${V0}1/dir)
+TEST [ ! x"$old_val" = x"$new_val" ]
+
+# Force an anomoly on an existing layout and heal it
+## The healed layout should not carry a commit-hash (or should carry 1 in the
+## commit-hash)
+TEST setfattr -x trusted.glusterfs.dht $B0/${V0}1/dir
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+TEST [ -d $M0/dir ]
+new_hash=$(get_xattr_hash $B0/${V0}1/dir)
+TEST [ x"$new_hash" = x"00000001" ]
+new_hash=$(get_xattr_hash $B0/${V0}2/dir)
+TEST [ x"$new_hash" = x"00000001" ]
+
+# Unset the option and check that newly created directories get 1 in the
+# disk layout
+TEST $CLI volume set $V0 cluster.lookup-optimize off
+TEST mkdir $M0/dir1
+new_hash=$(get_xattr_hash $B0/${V0}1/dir1)
+TEST [ x"$new_hash" = x"00000001" ]
+new_hash=$(get_xattr_hash $B0/${V0}2/dir1)
+TEST [ x"$new_hash" = x"00000001" ]
+
+
+cleanup
diff --git a/tests/features/weighted-rebalance.t b/tests/features/weighted-rebalance.t
new file mode 100755
index 00000000000..0d730406fd0
--- /dev/null
+++ b/tests/features/weighted-rebalance.t
@@ -0,0 +1,78 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../dht.rc
+
+NFILES=1000
+
+touch_files () {
+ for i in $(seq 1 $NFILES); do
+ touch $(printf $M0/dir/file%02d $i) 2> /dev/null
+ done
+}
+
+count_files () {
+ found=0
+ for i in $(seq 1 $NFILES); do
+ if [ -f $(printf $1/dir/file%02d $i) ]; then
+ found=$((found+1))
+ fi
+ done
+ echo $found
+}
+
+get_xattr () {
+ cmd="getfattr --absolute-names --only-values -n trusted.glusterfs.dht"
+ $cmd $1 | od -tx1 -An | tr -d ' '
+}
+
+cleanup
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info
+
+TEST mkdir ${B0}/${V0}{1,2}
+
+TEST truncate -s $((40*1024*1024)) ${B0}/disk1
+TEST MKFS_LOOP -i 512 ${B0}/disk1
+TEST MOUNT_LOOP ${B0}/disk1 ${B0}/${V0}1
+
+TEST truncate -s $((80*1024*1024)) ${B0}/disk2
+TEST MKFS_LOOP -i 512 ${B0}/disk2
+TEST MOUNT_LOOP ${B0}/disk2 ${B0}/${V0}2
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2}
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# Create some files for later tests.
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+TEST mkdir $M0/dir
+TEST touch_files
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0
+
+# Check that the larger brick got more of the files.
+nfiles=$(count_files ${B0}/${V0}2)
+#echo $nfiles $(get_xattr ${B0}/${V0}1) $(get_xattr ${B0}/${V0}2) 3>&2 2>&1 1>&3 3>&-
+TEST [ $nfiles -ge 580 ]
+
+# Turn off the size-weighted rebalance.
+TEST $CLI volume set $V0 cluster.weighted-rebalance off
+
+# Rebalance again and check that the distribution is even again.
+TEST $CLI volume rebalance $V0 start force
+EXPECT_WITHIN $REBALANCE_TIMEOUT "0" rebalance_completed
+nfiles=$(count_files ${B0}/${V0}2)
+#echo $nfiles $(get_xattr ${B0}/${V0}1) $(get_xattr ${B0}/${V0}2) 3>&2 2>&1 1>&3 3>&-
+TEST [ $nfiles -le 580 ]
+
+$CLI volume stop $V0
+UMOUNT_LOOP ${B0}/${V0}{1,2}
+rm -f ${B0}/disk{1,2}
+
+cleanup
diff --git a/tests/features/worm.t b/tests/features/worm.t
new file mode 100755
index 00000000000..40b08cdee02
--- /dev/null
+++ b/tests/features/worm.t
@@ -0,0 +1,117 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}1
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name'
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+## Mount FUSE with caching disabled (read-write)
+TEST $GFS -s $H0 --volfile-id $V0 $M0
+
+## Tests for the volume level WORM
+TEST `echo "File 1" > $M0/file1`
+TEST touch $M0/file2
+
+## Enable the volume level WORM
+TEST $CLI volume set $V0 features.worm 1
+TEST ! mv $M0/file1 $M0/file11
+TEST `echo "block" > $M0/file2`
+
+## Disable the volume level WORM and delete the legacy files
+TEST $CLI volume set $V0 features.worm 0
+TEST rm -f $M0/*
+
+## Enable file level WORM
+TEST $CLI volume set $V0 features.worm-file-level 1
+TEST $CLI volume set $V0 features.default-retention-period 10
+TEST $CLI volume set $V0 features.auto-commit-period 5
+
+## Tests for manual transition to WORM/Retained state
+TEST `echo "worm 1" > $M0/file1`
+TEST chmod 0444 $M0/file1
+sleep 5
+TEST `echo "line 1" > $M0/file1`
+TEST ! mv $M0/file1 $M0/file2
+sleep 10
+TEST ! link $M0/file1 $M0/file2
+sleep 5
+TEST rm -f $M0/file1
+
+## Test for mv over WORM/Reatined state
+TEST `echo "worm 1" > $M0/file1`
+TEST chmod 0444 $M0/file1
+TEST `echo "worm 2" > $M0/file2`
+TEST ! mv $M0/file2 $M0/file1
+TEST rm -f $M0/file2
+sleep 10
+TEST rm -f $M0/file1
+
+## Test for state transition over write.
+TEST `echo "worm 1" > $M0/file3`
+sleep 5
+TEST `echo "worm 2" >> $M0/file3`
+EXPECT 'worm 1' cat $M0/file3
+TEST ! rm -f $M0/file3
+
+## Test for checking if Worm files are undeletable after setting worm-files-deletable as 0.
+TEST $CLI volume set $V0 features.worm-files-deletable 0
+TEST `echo "worm 1" > $M0/file4`
+TEST chmod 0444 $M0/file4
+sleep 10
+TEST `echo "worm 1" >> $M0/file4`
+TEST ! rm -f $M0/file4
+
+## Test for state transition if auto-commit-period is 0
+TEST $CLI volume set $V0 features.auto-commit-period 0
+TEST `echo "worm 1" > $M0/file5`
+EXPECT '3/10/0' echo $(getfattr -e text --absolute-names --only-value -n "trusted.reten_state" $B0/${V0}1/file5)
+EXPECT 'worm 1' cat $M0/file5
+TEST ! rm -f $M0/file5
+TEST $CLI volume set $V0 features.auto-commit-period 5
+
+## Test for checking if retention-period is updated on increasing the access time of a WORM-RETAINED file.
+TEST $CLI volume set $V0 features.worm-files-deletable 1
+TEST `echo "worm 1" >> $M0/file1`
+initial_timestamp=$(date +%s)
+current_time_seconds=$(date +%S | sed 's/^0*//' );
+TEST chmod 0444 $M0/file1
+EXPECT '3/10/5' echo $(getfattr -e text --absolute-names --only-value -n "trusted.reten_state" $B0/${V0}1/file1)
+changed_timestamp=$(date +%Y%m%d%H%M --date '60 seconds');
+seconds_diff=`expr 60 - $((current_time_seconds))`
+TEST `touch -a -t "${changed_timestamp}" $M0/file1`
+EXPECT "3/$seconds_diff/5" echo $(getfattr -e text --absolute-names --only-value -n "trusted.reten_state" $B0/${V0}1/file1)
+sleep $seconds_diff
+TEST `echo "worm 2" >> $M0/file1`
+EXPECT "$initial_timestamp" echo $(stat --printf %X $M0/file1)
+
+
+## Test for checking if retention-period is updated on decreasing the access time of a WORM-RETAINED file
+TEST $CLI volume set $V0 features.default-retention-period 120
+initial_timestamp=$(date +%s)
+current_time_seconds=$(date +%S | sed 's/^0*//' );
+TEST chmod 0444 $M0/file1
+EXPECT '3/120/5' echo $(getfattr -e text --absolute-names --only-value -n "trusted.reten_state" $B0/${V0}1/file1)
+changed_timestamp=$(date +%Y%m%d%H%M --date '60 seconds');
+seconds_diff=`expr 60 - $((current_time_seconds))`
+TEST `touch -a -t "${changed_timestamp}" $M0/file1`
+EXPECT "3/$seconds_diff/5" echo $(getfattr -e text --absolute-names --only-value -n "trusted.reten_state" $B0/${V0}1/file1)
+sleep $seconds_diff
+TEST `echo "worm 4" >> $M0/file1`
+EXPECT "$initial_timestamp" echo $(stat --printf %X $M0/file1)
+TEST rm -f $M0/file1
+
+TEST $CLI volume stop $V0
+EXPECT 'Stopped' volinfo_field $V0 'Status'
+
+cleanup;
diff --git a/tests/features/worm_sh.t b/tests/features/worm_sh.t
new file mode 100644
index 00000000000..da7afc084b2
--- /dev/null
+++ b/tests/features/worm_sh.t
@@ -0,0 +1,75 @@
+#!/bin/bash
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 cluster.data-self-heal off
+TEST $CLI volume set $V0 cluster.metadata-self-heal off
+TEST $CLI volume set $V0 cluster.entry-self-heal off
+
+TEST $CLI volume set $V0 self-heal-daemon off
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+## Enable file level WORM
+TEST $CLI volume set $V0 features.worm-file-level 1
+TEST $CLI volume set $V0 features.default-retention-period 100
+TEST $CLI volume set $V0 features.auto-commit-period 5
+
+## Tests for manual transition to WORM/Retained state
+TEST `echo "worm1" > $M0/file1`
+TEST chmod 0444 $M0/file1
+sleep 5
+TEST `echo "worm2" > $M0/file2`
+TEST chmod 0444 $M0/file2
+sleep 5
+TEST `echo "worm3" > $M0/file3`
+TEST chmod 0444 $M0/file3
+sleep 5
+
+## Stopp one of the bricks
+TEST kill_brick $V0 $H0 $B0/${V0}1
+
+## Manipulate the WORMed-Files
+TEST $CLI volume set $V0 features.worm-file-level 0
+sleep 5
+
+TEST chmod 0777 $M0/file1
+TEST `echo "test" >> $M0/file1`
+TEST `echo "test" >> $M0/file3`
+TEST `rm -rf $M0/file2`
+
+## Metadata changes
+TEST setfattr -n user.test -v qwerty $M0/file3
+sleep 5
+
+## Enable file level WORM again
+TEST $CLI volume set $V0 features.worm-file-level 1
+
+## Restart volume and trigger self-heal
+TEST $CLI volume stop $V0 force
+TEST $CLI volume start $V0 force
+TEST $CLI volume set $V0 self-heal-daemon on
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $CLI volume heal $V0
+
+# Wait for heal to complete
+EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
+
+# Check if entry-heal has happened
+TEST diff <(ls $B0/${V0}0 | sort) <(ls $B0/${V0}1 | sort)
+
+# Test if data was healed
+TEST diff $B0/${V0}0/file1 $B0/${V0}1/file1
+TEST diff $B0/${V0}0/file3 $B0/${V0}1/file3
+
+# Test if metadata was healed and exists on both the bricks
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}1/file3
+EXPECT "qwerty" get_text_xattr user.test $B0/${V0}0/file3
+
+cleanup;
diff --git a/tests/fileio.rc b/tests/fileio.rc
new file mode 100644
index 00000000000..ec0767832e8
--- /dev/null
+++ b/tests/fileio.rc
@@ -0,0 +1,61 @@
+#!/bin/bash
+
+function fd_available() {
+ for i in {1..65536}; do
+ if [ ! -e /proc/$$/fd/$i ]; then
+ echo $i;
+ return 0;
+ fi
+ done
+
+ return 1;
+}
+
+function fd_open() {
+ local fd=$1;
+ local mode=$2
+ local path=$3;
+
+ case $mode in
+ r)
+ eval "exec $fd<$path";;
+ w)
+ eval "exec $fd>$path";;
+ rw)
+ eval "exec $fd<>$path";;
+ *)
+ false;;
+ esac
+}
+
+
+function fd_cat() {
+ local fd=$1;
+
+ eval "cat <&$fd";
+}
+
+
+function fd_write() {
+ local fd=$1;
+ shift;
+ local msg="$@";
+
+ eval "echo $@ >&$fd";
+}
+
+
+function fd_close() {
+ local fd=$1;
+
+ eval "exec $fd>&-";
+}
+
+
+function fd_based_example() {
+ TEST fd=`fd_available`;
+ TEST fd_open $fd "rw" $M0/filename;
+ TEST fd_cat $fd; # print existing stuff
+ TEST fd_write $fd "new stuff"; # append
+ TEST fd_close $fd;
+}
diff --git a/tests/geo-rep.rc b/tests/geo-rep.rc
new file mode 100644
index 00000000000..9ba4262730e
--- /dev/null
+++ b/tests/geo-rep.rc
@@ -0,0 +1,495 @@
+GEO_REP_TIMEOUT=120
+CHECK_MOUNT_TIMEOUT=50
+#check for mount point
+function check_mounted () {
+ df | grep $1 | wc -l
+}
+
+function check_status_num_rows()
+{
+ local search_key=$1
+ $GEOREP_CLI $master $slave status | grep -F "$search_key" | wc -l
+}
+
+function check_fanout_status_num_rows()
+{
+ local search_key=$1
+ $GEOREP_CLI $master status | grep -F "$search_key" | wc -l
+}
+
+function check_fanout_status_detail_num_rows()
+{
+ local search_key=$1
+ $GEOREP_CLI $master status detail | grep -F "$search_key" | wc -l
+}
+
+function check_all_status_num_rows()
+{
+ local search_key=$1
+ $GEOREP_CLI status | grep -F "$search_key" | wc -l
+}
+
+function check_all_status_detail_num_rows()
+{
+ local search_key=$1
+ $GEOREP_CLI status detail | grep -F "$search_key" | wc -l
+}
+
+function verify_checkpoint_met()
+{
+ local master=$1
+ local slave=$2
+ $GEOREP_CLI $master $slave status detail| grep -F "Yes" | wc -l
+}
+
+function check_keys_distributed()
+{
+ local search_key=$(cat /var/lib/glusterd/geo-replication/master_slave_common_secret.pem.pub)
+ grep -F "$search_key" ~/.ssh/authorized_keys > /dev/null
+ echo $?
+}
+
+function check_common_secret_file()
+{
+ stat /var/lib/glusterd/geo-replication/master_slave_common_secret.pem.pub
+ echo $?
+}
+
+function create_rename_symlink_case()
+{
+ mkdir ${mastermnt}/MUL_REN_SYMLINK
+ cd ${mastermnt}/MUL_REN_SYMLINK
+ mkdir sym_dir1
+ ln -s "sym_dir1" sym1
+ mv sym1 sym2
+ mv sym2 sym3
+ mv sym3 sym4
+ cd -
+}
+
+function create_data()
+{
+ prefix=$1
+
+ # GF_FOP_MKNOD
+ # GF_FOP_MKDIR
+ # GF_FOP_UNLINK
+ # GF_FOP_RMDIR
+ # GF_FOP_SYMLINK
+ # GF_FOP_RENAME
+ # GF_FOP_LINK
+ # GF_FOP_SETXATTR
+ # GF_FOP_REMOVEXATTR
+ # GF_FOP_CREATE
+ # GF_FOP_SETATTR
+
+ # Regular file + data
+ echo "HelloWorld!" > ${master_mnt}/${prefix}_f1
+ touch ${master_mnt}/${prefix}_f2
+ touch ${master_mnt}/${prefix}_f3
+
+ # non-ascii filename test
+ echo "Hello non-ascii" > ${master_mnt}/${prefix}_f1_ಸಂತಸ
+ touch ${master_mnt}/${prefix}_f2_ಸಂತಸ
+ touch ${master_mnt}/${prefix}_f3_ಸಂತಸ
+
+ # dir
+ mkdir ${master_mnt}/${prefix}_d1
+ mkdir ${master_mnt}/${prefix}_d2
+ mkdir ${master_mnt}/${prefix}_d3
+
+ # non-ascii dir and filename test
+ mkdir ${master_mnt}/${prefix}_d1_ನನà³à²¨
+ mkdir ${master_mnt}/${prefix}_d2_ಸಂತಸ
+ mkdir ${master_mnt}/${prefix}_d3_ಸಂತಸ
+ echo "Hello non-ascii" > ${master_mnt}/${prefix}_d1_ನನà³à²¨/ಸಂತಸ
+
+ # Hardlink + non-ascii name
+ ln ${master_mnt}/${prefix}_f1 ${master_mnt}/${prefix}_hl1
+ ln ${master_mnt}/${prefix}_f1 ${master_mnt}/${prefix}_hl1_ಸಂತಸ
+
+ # Symlink
+ cd ${master_mnt}
+ ln -s ${prefix}_f1 ${prefix}_sl1
+ ln -s ${prefix}_f1 ${prefix}_sl1_ಸಂತಸ
+ cd -
+
+ # UNLINK
+ rm ${master_mnt}/${prefix}_f2
+ rm ${master_mnt}/${prefix}_f2_ಸಂತಸ
+
+ # RMDIR
+ rmdir ${master_mnt}/${prefix}_d2
+ rmdir ${master_mnt}/${prefix}_d2_ಸಂತಸ
+
+ # Rename - File
+ mv ${master_mnt}/${prefix}_f3 ${master_mnt}/${prefix}_f4
+ mv ${master_mnt}/${prefix}_f3_ಸಂತಸ ${master_mnt}/${prefix}_f4_ಸಂತಸ
+
+ # Rename - Dir
+ mv ${master_mnt}/${prefix}_d3 ${master_mnt}/${prefix}_d4
+ mv ${master_mnt}/${prefix}_d3_ಸಂತಸ ${master_mnt}/${prefix}_d4_ಸಂತಸ
+
+ # chown
+ touch ${master_mnt}/${prefix}_chown_f1
+ chown 1000:1000 ${master_mnt}/${prefix}_chown_f1
+ touch ${master_mnt}/${prefix}_chown_f1_ಸಂತಸ
+ chown 1000:1000 ${master_mnt}/${prefix}_chown_f1_ಸಂತಸ
+}
+
+function create_data_hang()
+{
+ prefix=$1
+ mkdir ${master_mnt}/${prefix}
+ cd ${master_mnt}/${prefix}
+ # ~1k files is required with 1 sync-job and hang happens if
+ # stderr buffer of tar/ssh executed with Popen is full (i.e., 64k).
+ # 64k is hit when ~800 files were not found while syncing data
+ # from master. So around 1k files is required to hit the condition.
+ for i in {1..1000}
+ do
+ echo "test data" > file$i
+ mv -f file$i file
+ done
+ cd -
+}
+
+function chown_file_ok()
+{
+ local file_owner=$(stat --format "%u:%g" "$1")
+ if test "X$file_owner" != "X1000:1000"; then echo 1; else echo 0; fi
+}
+
+function regular_file_ok()
+{
+ local file_type=$(stat --format "%F" "$1")
+ if test "X$file_type" != "Xregular file"; then echo 1; else echo 0; fi
+}
+
+function directory_ok()
+{
+ file_type=$(stat --format "%F" "$1")
+ if test "X$file_type" != "Xdirectory"; then echo 1; else echo 0; fi
+}
+
+function unlink_ok()
+{
+ stat "$1" > /dev/null 2>&1
+ rc=$?
+ echo $rc
+}
+
+function hardlink_file_ok()
+{
+ orig_file=$1
+ link_file=$2
+
+ orig_inode=$(stat --format "%i" "$orig_file")
+ rc=$?
+ if test $rc != 0; then
+ echo $rc
+ else
+ link_inode=$(stat --format "%i" "$link_file")
+ rc=$?
+ if test $rc != 0; then
+ echo $rc
+ else
+ if test $orig_inode != $link_inode; then
+ echo 1
+ else
+ echo 0
+ fi
+ fi
+ fi
+}
+
+function data_ok()
+{
+ path=$1
+ data1="$2"
+ data2=$(cat $path)
+ echo "data1:$data1"
+ echo "data2:$data2"
+ if test "X$data1" != "X$data2"; then
+ echo 1
+ else
+ echo 0
+ fi
+}
+
+function arequal_checksum()
+{
+ master=$1
+ slave=$2
+ ret=$(diff <(arequal-checksum -p $master) <(arequal-checksum -p $slave) | wc -l)
+ echo x$ret
+}
+
+function symlink_ok()
+{
+ local orig_file_name=$1
+ local symlink_file=$2
+
+ local file_type=$(stat --format "%F" "$symlink_file")
+ if test "X$file_type" != "Xsymbolic link"; then
+ echo 1
+ else
+ local fname=$(readlink $symlink_file)
+ if test "X$fname" != "X$orig_file_name"; then
+ echo 2
+ else
+ echo 0
+ fi
+ fi
+
+}
+
+function rename_file_ok()
+{
+ old_name=$1
+ new_name=$2
+
+ if [ -f $old_name ]; then
+ echo 1
+ elif [ ! -f $new_name ]; then
+ echo 2
+ else
+ echo 0
+ fi
+}
+
+function rename_dir_ok()
+{
+ old_name=$1
+ new_name=$2
+
+ if [ -d $old_name ]; then
+ echo 1
+ elif [ ! -d $new_name ]; then
+ echo 2
+ else
+ echo 0
+ fi
+}
+
+function create_georep_session()
+{
+ $CLI system:: execute gsec_create
+ rc=$?
+ if test $rc != 0; then
+ echo $rc
+ else
+ $CLI volume geo-rep $master $slave create push-pem
+ rc=$?
+ if test $rc != 0; then
+ echo $rc
+ else
+ echo 0
+ fi
+ fi
+}
+
+# logrotate_simulate should be called (rotate_count + 1) times to cause
+# an unlink and a gfid re-allocation.
+# remember to keep the file name and rotate_count the same across the
+# calls
+function logrotate_simulate()
+{
+ file_name=$1
+ declare -i rotate_count=$2
+
+ while [ $rotate_count -ge 0 ]; do
+ source_file="$file_name.$((rotate_count))"
+ if [ $rotate_count -eq 0 ]; then
+ source_file="$file_name"
+ fi
+ if [ -f "${source_file}" ]; then
+ mv "${source_file}" "$file_name.$((rotate_count+1))"
+ fi
+ ((rotate_count--))
+ done
+
+ # logrotate causes gfid to be rellocated to a new file created
+ # after an unlink and a blind rename later causes georep session
+ # to go Faulty
+ # this should not happen if source basename on slave is tested
+ # to be linked with its own gfid as on master, before invoking
+ # the rename syscall
+ touch $file_name
+ rotate_count=$2
+ unlink_file_name="$file_name.$((rotate_count+1))"
+ unlink $unlink_file_name 2>/dev/null
+}
+
+function create_rename()
+{
+ file_name=$1
+ echo $file_name > $file_name
+ mv $file_name $file_name.bak
+}
+
+function create_rename_ok()
+{
+ file_name=$1
+ # after a log replay, we don't expect the original file
+ # to be recreated i.e. a dangling entry without a corresponding
+ # back-end gfid link should not exist on the slave
+ if [ -f "$file_name" ]; then
+ echo 1
+ else
+ echo 0
+ fi
+}
+
+function hardlink_rename()
+{
+ file_name=$1
+ echo $file_name > $file_name
+ ln $file_name $file_name.hl
+ mv $file_name.hl $file_name.hl1
+}
+
+function hardlink_rename_ok()
+{
+ file_name=$1
+ # the hardlink file should not exist on the slave after renaming
+ # to one of its links on changelog reprocessing
+ if [ ! -f "$file_name" ]; then
+ echo 1
+ elif [ ! -f "$file_name.hl1" ]; then
+ echo 2
+ elif [ -f "$file_name.hl" ]; then
+ echo 3
+ else
+ echo 0
+ fi
+}
+
+function create_symlink_rename_mkdir_data()
+{
+ mkdir ${master_mnt}/symlink_test1
+ touch ${master_mnt}/symlink_test1/file1
+ ln -s "./file1" ${master_mnt}/symlink_test1/sym_link
+ mv ${master_mnt}/symlink_test1/sym_link ${master_mnt}/symlink_test1/rn_sym_link
+ mkdir ${master_mnt}/symlink_test1/sym_link
+}
+function verify_symlink_rename_mkdir_data()
+{
+ sym_dir=$1
+ if [ ! -f $sym_dir/file1 ]; then
+ echo 1
+ elif [ ! -h $sym_dir/rn_sym_link ]; then
+ echo 2
+ elif [ ! -d $sym_dir/sym_link ]; then
+ echo 3
+ else
+ echo 0
+ fi
+}
+
+function create_rsnapshot_data()
+{
+ rm -rf /tmp/rsnapshot_symlinkbug
+ mkdir /tmp/rsnapshot_symlinkbug
+ ln -f -s /does/not/exist /tmp/rsnapshot_symlinkbug/a_symlink
+ rsync -a /tmp/rsnapshot_symlinkbug ${master_mnt}/
+ cp -al ${master_mnt}/rsnapshot_symlinkbug ${master_mnt}/rsnapshot_symlinkbug.0
+ ln -f -s /does/not/exist2 /tmp/rsnapshot_symlinkbug/a_symlink
+ rsync -a /tmp/rsnapshot_symlinkbug ${master_mnt}/
+ cp -al ${master_mnt}/rsnapshot_symlinkbug ${master_mnt}/rsnapshot_symlinkbug.1
+}
+
+function verify_rsnapshot_data()
+{
+ dir="$1/rsnapshot_symlinkbug"
+ dir0="$1/rsnapshot_symlinkbug.0"
+ dir1="$1/rsnapshot_symlinkbug.1"
+ if [ ! -d "$dir" ]; then
+ echo 1
+ elif [ ! -h $dir/a_symlink ]; then
+ echo 2
+ elif test "X$(readlink $dir/a_symlink)" != "X/does/not/exist2"; then
+ echo 3
+ elif [ ! -h $dir0/a_symlink ]; then
+ echo 4
+ elif test "X$(readlink $dir0/a_symlink)" != "X/does/not/exist"; then
+ echo 5
+ elif [ ! -h $dir1/a_symlink ]; then
+ echo 6
+ elif test "X$(readlink $dir1/a_symlink)" != "X/does/not/exist2"; then
+ echo 7
+ else
+ echo 0
+ fi
+}
+
+function create_hardlink_rename_data()
+{
+ dir=${master_mnt}/hardlink_rename_issue
+ mkdir $dir
+ echo "test_data" > $dir/f1
+ ln $dir/f1 $dir/f2
+ mv $dir/f2 $dir/f3
+ unlink $dir/f1
+}
+
+function verify_hardlink_rename_data()
+{
+ dir=$1/hardlink_rename_issue
+ if [ ! -d $dir ]; then
+ echo 1
+ elif [ -f $dir/f1 ]; then
+ echo 2
+ elif [ -f $dir/f2 ]; then
+ echo 3
+ elif [ ! -f $dir/f3 ]; then
+ echo 4
+ elif test "Xtest_data" != "X$(cat $dir/f3)"; then
+ echo 5
+ else
+ echo 0
+ fi
+}
+
+function check_slave_read_only()
+{
+ volum=$1
+ gluster volume info $1 | grep 'features.read-only: on'
+ echo $?
+}
+
+function create_rename_with_existing_destination()
+{
+ dir=$1/rename_with_existing_destination
+ mkdir $dir
+ for i in {1..5}
+ do
+ echo "Data_set$i" > $dir/data_set$i
+ mv $dir/data_set$i $dir/data_set -f
+ done
+}
+
+function verify_rename_with_existing_destination()
+{
+ dir=$1/rename_with_existing_destination
+
+ if [ ! -d $dir ]; then
+ echo 1
+ elif [ ! -f $dir/data_set ]; then
+ echo 2
+ elif [ -f $dir/data_set1 ]; then
+ echo 3
+ elif [ -f $dir/data_set2 ]; then
+ echo 4
+ elif [ -f $dir/data_set3 ]; then
+ echo 5
+ elif [ -f $dir/data_set4 ]; then
+ echo 6
+ elif [ -f $dir/data_set5 ]; then
+ echo 7
+ elif test "XData_set5" != "X$(cat $dir/data_set)"; then
+ echo 8
+ else
+ echo 0
+ fi
+}
diff --git a/tests/gfid2path/block-mount-access.t b/tests/gfid2path/block-mount-access.t
new file mode 100644
index 00000000000..b1726ad9604
--- /dev/null
+++ b/tests/gfid2path/block-mount-access.t
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a 2*2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start the volume
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+#CREATE
+fname=$M0/file1
+touch $fname;
+backpath=$B0/${V0}1/file1
+
+pgfid="00000000-0000-0000-0000-000000000001"
+
+#Check for the presence of xattr
+key="trusted.gfid2path"
+gfid2path_xattr=$(getfattr -h -d -m. $backpath 2>/dev/null | grep -a $key | cut -f1 -d'=')
+
+#Check getxattr
+TEST ! getfattr -h -n $gfid2path_xattr $M0/file1
+
+#Check listgetxattr
+EXPECT_NOT $gfid2path_xattr get_xattr_key $key $M0/file1
+
+#Check removexattr
+TEST ! setfattr -h -x $gfid2path_xattr $M0/file1
+
+#Check setxattr
+TEST ! setfattr -h -n "trusted.gfid2path.d16e15bafe6e4257" -v "$pgfid/file2" $M0/file1
+
+#Cleanup
+cleanup;
diff --git a/tests/gfid2path/get-gfid-to-path.t b/tests/gfid2path/get-gfid-to-path.t
new file mode 100644
index 00000000000..dea95f4c9f8
--- /dev/null
+++ b/tests/gfid2path/get-gfid-to-path.t
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../afr.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a 1*2 volume
+TEST $CLI volume create $V0 replica 2 $H0:$B0/${V0}{0,1}
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+
+## Start the volume
+TEST $CLI volume start $V0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "Y" glustershd_up_status
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 0
+EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status_in_shd $V0 1
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --aux-gfid-mount --volfile-id=$V0 $M0;
+
+root_gfid="00000000-0000-0000-0000-000000000001"
+
+#Check for ROOT
+EXPECT "/" get_gfid2path $M0/.gfid/$root_gfid
+
+#CREATE
+fname=$M0/file1
+touch $fname;
+
+#Get gfid of file1
+gfid=$(getfattr -h --only-values -n glusterfs.gfid.string $M0/file1)
+
+#Get path from virt xattr
+EXPECT "/file1" get_gfid2path $M0/.gfid/$gfid
+
+#Create hardlink and get path
+ln $fname $M0/hl_file1
+EXPECT "/file1" get_gfid2path $M0/.gfid/$gfid
+EXPECT "/hl_file1" get_gfid2path $M0/.gfid/$gfid
+
+#Rename and get path
+mv $fname $M0/rn_file1
+EXPECT "/hl_file1" get_gfid2path $M0/.gfid/$gfid
+EXPECT "/rn_file1" get_gfid2path $M0/.gfid/$gfid
+
+#Create symlink and get path
+ln -s $fname $M0/sym_file1
+gfid=$(getfattr -h --only-values -n glusterfs.gfid.string $M0/sym_file1)
+EXPECT "/sym_file1" get_gfid2path $M0/.gfid/$gfid
+
+#Create dir and get path
+mkdir -p $M0/dir1/dir2
+gfid=$(getfattr -h --only-values -n glusterfs.gfid.string $M0/dir1/dir2)
+EXPECT "/dir1/dir2" get_gfid2path $M0/.gfid/$gfid
+
+#Create file under dir2 and get path
+touch $M0/dir1/dir2/file1
+gfid=$(getfattr -h --only-values -n glusterfs.gfid.string $M0/dir1/dir2/file1)
+EXPECT "/dir1/dir2/file1" get_gfid2path $M0/.gfid/$gfid
+
+#Create hardlink under dir2 and get path
+ln $M0/dir1/dir2/file1 $M0/dir1/hl_file1
+gfid=$(getfattr -h --only-values -n glusterfs.gfid.string $M0/dir1/dir2/file1)
+EXPECT "/dir1/dir2/file1" get_gfid2path $M0/.gfid/$gfid
+EXPECT "/dir1/hl_file1" get_gfid2path $M0/.gfid/$gfid
+
+cleanup;
diff --git a/tests/gfid2path/gfid2path_fuse.t b/tests/gfid2path/gfid2path_fuse.t
new file mode 100644
index 00000000000..d0fe1fc16ae
--- /dev/null
+++ b/tests/gfid2path/gfid2path_fuse.t
@@ -0,0 +1,166 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+CLI_SETGFID2PATH="gluster-setgfid2path";
+
+cleanup;
+
+XXHSUM_SOURCE="$(dirname $0)/../../contrib/xxhash/xxhsum.c $(dirname $0)/../../contrib/xxhash/xxhash.c"
+XXHSUM_EXEC=$(dirname $0)/xxhsum
+
+## Build xxhsum C source
+build_tester $XXHSUM_SOURCE -o $XXHSUM_EXEC -I$(dirname $0)/../../contrib/xxhash
+TEST [ -e $XXHSUM_EXEC ]
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a single brick volume (B=1)
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '1' brick_count $V0
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Mount the volume
+TEST $GFS --volfile-server=$H0 --volfile-id=$V0 $M0;
+
+## disable gfid2path
+TEST $CLI volume set $V0 gfid2path disable
+
+pgfid="00000000-0000-0000-0000-000000000001"
+xxh64_file=$B0/${V0}1/xxh64_file
+
+# Create a file before enabling gfid2path
+fname=$M0/before_file1
+touch $fname;
+backpath=$B0/${V0}1/before_file1
+
+# Set gfid2path xattr
+TEST $CLI_SETGFID2PATH $backpath
+
+#Check for the presence of xattr
+pgfid_bname=$pgfid/before_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+## enable gfid2path
+TEST $CLI volume set $V0 gfid2path enable
+
+#CREATE
+fname=$M0/file1
+touch $fname;
+backpath=$B0/${V0}1/file1
+
+#Check for the presence of xattr
+pgfid_bname=$pgfid/file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#MKNOD
+fname=$M0/mknod_file1
+mknod $fname p;
+backpath=$B0/${V0}1/mknod_file1
+
+#Check for the presence of xattr
+pgfid_bname=$pgfid/mknod_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#LINK
+fname1=$M0/file1
+fname2=$M0/hl_file1
+ln $fname1 $fname2
+backpath1=$B0/${V0}1/file1
+backpath2=$B0/${V0}1/hl_file1
+
+#Check for the presence of two xattrs
+pgfid_bname=$pgfid/file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath1
+
+pgfid_bname=$pgfid/hl_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath2
+
+#RENAME
+fname1=$M0/file1
+fname2=$M0/rn_file1
+mv $fname1 $fname2
+backpath=$B0/${V0}1/rn_file1
+
+#Check for the presence of new xattr
+pgfid_bname=$pgfid/file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT_NOT $pgfid_bname get_text_xattr $key $backpath
+
+pgfid_bname=$pgfid/rn_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#UNLINK
+fname1=$M0/hl_file1
+rm -f $fname1
+fname2=$M0/rn_file1
+backpath=$B0/${V0}1/rn_file1
+
+#Check removal of xattr
+pgfid_bname=$pgfid/hl_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT_NOT $pgfid_bname get_text_xattr $key $backpath
+
+pgfid_bname=$pgfid/rn_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#SYMLINK
+fname=rn_file1
+sym_fname=$M0/sym_file1
+ln -s $fname $sym_fname
+backpath=$B0/${V0}1/sym_file1
+
+#Check for the presence of xattr
+pgfid_bname=$pgfid/sym_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#FINAL UNLINK
+fname=$M0/rn_file1
+sym_fname=$M0/sym_file1
+mknod_fname=$M0/mknod_file1
+
+rm -f $fname
+rm -f $sym_fname
+rm -f $mknod_fname
+TEST ! stat $fname
+TEST ! stat $sym_fname
+TEST ! stat $mknod_fname
+
+#Cleanups
+rm -f $STUB_EXEC
+cleanup;
diff --git a/tests/gfid2path/gfid2path_nfs.t b/tests/gfid2path/gfid2path_nfs.t
new file mode 100644
index 00000000000..d1ea7df2f4d
--- /dev/null
+++ b/tests/gfid2path/gfid2path_nfs.t
@@ -0,0 +1,152 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../nfs.rc
+
+#G_TESTDEF_TEST_STATUS_CENTOS6=NFS_TEST
+
+cleanup;
+
+XXHSUM_SOURCE="$(dirname $0)/../../contrib/xxhash/xxhsum.c $(dirname $0)/../../contrib/xxhash/xxhash.c"
+XXHSUM_EXEC=$(dirname $0)/xxhsum
+
+## Build xxhsum C source
+build_tester $XXHSUM_SOURCE -o $XXHSUM_EXEC -I$(dirname $0)/../../contrib/xxhash
+TEST [ -e $XXHSUM_EXEC ]
+
+TEST glusterd
+TEST pidof glusterd
+
+## Create a single brick volume (B=1)
+TEST $CLI volume create $V0 $H0:$B0/${V0}1;
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '1' brick_count $V0
+
+TEST $CLI volume set $V0 nfs.disable false
+
+## Start the volume
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+## Wait for volume to register with rpc.mountd
+EXPECT_WITHIN $NFS_EXPORT_TIMEOUT "1" is_nfs_export_available
+
+## Mount the volume
+TEST mount_nfs $H0:/$V0 $N0 nolock;
+
+
+pgfid="00000000-0000-0000-0000-000000000001"
+xxh64_file=$B0/${V0}1/xxh64_file
+
+#CREATE
+fname=$N0/file1
+touch $fname;
+backpath=$B0/${V0}1/file1
+
+#Check for the presence of xattr
+pgfid_bname=$pgfid/file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#MKNOD
+fname=$N0/mknod_file1
+mknod $fname p;
+backpath=$B0/${V0}1/mknod_file1
+
+#Check for the presence of xattr
+pgfid_bname=$pgfid/mknod_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#LINK
+fname1=$N0/file1
+fname2=$N0/hl_file1
+ln $fname1 $fname2
+backpath1=$B0/${V0}1/file1
+backpath2=$B0/${V0}1/hl_file1
+
+#Check for the presence of two xattrs
+pgfid_bname=$pgfid/file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath1
+
+pgfid_bname=$pgfid/hl_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath2
+
+#RENAME
+fname1=$N0/file1
+fname2=$N0/rn_file1
+mv $fname1 $fname2
+backpath=$B0/${V0}1/rn_file1
+
+#Check for the presence of new xattr
+pgfid_bname=$pgfid/file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT_NOT $pgfid_bname get_text_xattr $key $backpath
+
+pgfid_bname=$pgfid/rn_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#UNLINK
+fname1=$N0/hl_file1
+rm -f $fname1
+fname2=$N0/rn_file1
+backpath=$B0/${V0}1/rn_file1
+
+#Check removal of xattr
+pgfid_bname=$pgfid/hl_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT_NOT $pgfid_bname get_text_xattr $key $backpath
+
+pgfid_bname=$pgfid/rn_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#SYMLINK
+fname=rn_file1
+sym_fname=$N0/sym_file1
+ln -s $fname $sym_fname
+backpath=$B0/${V0}1/sym_file1
+
+#Check for the presence of xattr
+pgfid_bname=$pgfid/sym_file1
+echo -n $pgfid_bname > $xxh64_file
+xxh64sum=$(($XXHSUM_EXEC $xxh64_file) 2>/dev/null | awk '{print $1}')
+key="trusted.gfid2path.$xxh64sum"
+EXPECT $pgfid_bname get_text_xattr $key $backpath
+
+#FINAL UNLINK
+fname=$N0/rn_file1
+sym_fname=$N0/sym_file1
+mknod_fname=$N0/mknod_file1
+
+rm -f $fname
+rm -f $sym_fname
+rm -f $mknod_fname
+TEST ! stat $fname
+TEST ! stat $sym_fname
+TEST ! stat $mknod_fname
+
+#Cleanups
+rm -f $STUB_EXEC
+cleanup;
diff --git a/tests/glusterfind/glusterfind-basic.t b/tests/glusterfind/glusterfind-basic.t
new file mode 100644
index 00000000000..ccb33fb1fc8
--- /dev/null
+++ b/tests/glusterfind/glusterfind-basic.t
@@ -0,0 +1,84 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+. $(dirname $0)/../env.rc
+
+SCRIPT_TIMEOUT=300
+
+##Cleanup and start glusterd
+cleanup;
+TEST glusterd;
+TEST pidof glusterd
+
+##create .keys
+mkdir -p /var/lib/glusterd/glusterfind/.keys
+
+#create_and_start test_volume
+TEST $CLI volume create test-vol $H0:$B0/b1 $H0:$B0/b2 $H0:$B0/b3
+TEST gluster volume start test-vol
+
+##Mount test-vol
+TEST glusterfs -s $H0 --volfile-id test-vol $M0
+
+TEST timestamp1=$(date +'%s')
+
+##Create files and dirs inside the mount point
+TEST mkdir -p $M0/dir1
+TEST touch $M0/file1
+
+##Glusterfind Create
+TEST glusterfind create sess_vol1 test-vol --force
+
+##################################################################################
+#Incremental crawl
+##################################################################################
+##Glusterfind Pre
+TEST glusterfind pre sess_vol1 test-vol output_file.txt
+
+#Glusterfind Post
+TEST glusterfind post sess_vol1 test-vol
+
+##Glusterfind List
+EXPECT '1' echo $(glusterfind list | grep sess_vol1 | wc -l)
+
+TEST timestamp2=$(date +'%s')
+
+##Glusterfind Query
+TEST glusterfind query test-vol --since-time $timestamp1 --end-time $timestamp2 output_file.txt
+
+#################################################################################
+#Full Crawl
+#################################################################################
+##Glusterfind Pre
+TEST glusterfind pre sess_vol1 test-vol output_file.txt --full --regenerate-outfile
+EXPECT '1' echo $(grep 'NEW dir1' output_file.txt | wc -l)
+EXPECT '1' echo $(grep 'NEW file1' output_file.txt | wc -l)
+
+##Glusterfind Query commands
+TEST glusterfind query test-vol --full output_file.txt
+EXPECT '1' echo $(grep 'NEW dir1' output_file.txt | wc -l)
+EXPECT '1' echo $(grep 'NEW file1' output_file.txt | wc -l)
+
+##using tag, full crawl
+TEST glusterfind query test-vol --full --tag-for-full-find NEW output_file.txt
+EXPECT '1' echo $(grep 'NEW dir1' output_file.txt | wc -l)
+EXPECT '1' echo $(grep 'NEW file1' output_file.txt | wc -l)
+
+##using -field-separator option, full crawl
+glusterfind query test-vol --full output_file.txt --field-separator "=="
+EXPECT '1' echo $(grep 'NEW==dir1' output_file.txt | wc -l)
+EXPECT '1' echo $(grep 'NEW==file1' output_file.txt | wc -l)
+
+##Adding or Replacing a Brick from an Existing Glusterfind Session
+TEST gluster volume add-brick test-vol $H0:$B0/b4 force
+
+##To make existing session work after brick add
+TEST glusterfind create sess_vol test-vol --force
+EXPECT '1' echo $(glusterfind list | grep sess_vol1 | wc -l)
+
+##glusterfind delete
+TEST glusterfind delete sess_vol test-vol
+
+rm -rf output_file.txt
+cleanup;
diff --git a/tests/include.rc b/tests/include.rc
new file mode 100644
index 00000000000..0dc7d830449
--- /dev/null
+++ b/tests/include.rc
@@ -0,0 +1,1350 @@
+
+checkpoint_time="$(date +%s%N)"
+
+M0=${M0:=/mnt/glusterfs/0}; # 0th mount point for FUSE
+M1=${M1:=/mnt/glusterfs/1}; # 1st mount point for FUSE
+M2=${M2:=/mnt/glusterfs/2}; # 2nd mount point for FUSE
+M3=${M3:=/mnt/glusterfs/3}; # 3rd mount point for FUSE
+N0=${N0:=/mnt/nfs/0}; # 0th mount point for NFS
+N1=${N1:=/mnt/nfs/1}; # 1st mount point for NFS
+V0=${V0:=patchy}; # volume name to use in tests
+V1=${V1:=patchy1}; # volume name to use in tests
+GMV0=${GMV0:=master}; # master volume name to use in geo-rep tests
+GSV0=${GSV0:=slave}; # slave volume name to use in geo-rep tests
+GSV1=${GSV1:=slave1}; # slave volume name to use in geo-rep tests
+B0=${B0:=/d/backends}; # top level of brick directories
+WORKDIRS="$B0 $M0 $M1 $M2 $M3 $N0 $N1"
+
+ROOT_GFID="00000000-0000-0000-0000-000000000001"
+DOT_SHARD_GFID="be318638-e8a0-4c6d-977d-7a937aa84806"
+
+META_VOL=${META_VOL:=gluster_shared_storage}; # shared gluster storage volume used by snapshot scheduler, nfs ganesha and geo-rep.
+META_MNT=${META_MNT:=/var/run/gluster/shared_storage}; # Mount point of shared gluster volume.
+
+CC=cc
+OSTYPE=$(uname -s)
+
+env_dir=$(dirname $0)
+while true; do
+ ENV_RC=${env_dir}/env.rc
+ if [ -f ${ENV_RC} ]; then
+ break
+ fi
+ new_dir=$(dirname $env_dir)
+ if [ x"$new_dir" = x"$old_dir" ]; then
+ ENV_RC="/not/found"
+ break
+ fi
+ old_dir=$env_dir
+ env_dir=$new_dir
+done
+
+if [ ! -f $ENV_RC ]; then
+ echo "Aborting." | tee /dev/stderr
+ echo | tee /dev/stderr
+ echo "env.rc not found" | tee /dev/stderr
+ echo | tee /dev/stderr
+ echo "Please correct the problem and try again." | tee /dev/stderr
+ echo | tee /dev/stderr
+ exit 1
+fi
+. $ENV_RC
+
+H0=${H0:=`hostname`}; # hostname
+MOUNT_TYPE_FUSE="fuse.glusterfs"
+GREP_MOUNT_OPT_RO="grep (ro"
+GREP_MOUNT_OPT_RW="grep (rw"
+UMOUNT_F="umount -f"
+
+PATH=$PATH:${PWD}/tests/utils
+
+case $OSTYPE in
+Linux)
+ H0=${H0:=`hostname --fqdn`}; # hostname
+ ;;
+NetBSD)
+ MOUNT_TYPE_FUSE="puffs|perfuse|fuse.glusterfs"
+ GREP_MOUNT_OPT_RO="grep (read-only"
+ GREP_MOUNT_OPT_RW="grep -v (read-only"
+ UMOUNT_F="umount -f -R"
+ ;;
+*)
+ ;;
+esac
+
+DEBUG=${DEBUG:=0} # turn on debugging?
+
+PROCESS_DOWN_TIMEOUT=5
+PROCESS_UP_TIMEOUT=45
+NFS_EXPORT_TIMEOUT=20
+CHILD_UP_TIMEOUT=20
+PROBE_TIMEOUT=60
+PEER_SYNC_TIMEOUT=20
+REBALANCE_TIMEOUT=600
+REOPEN_TIMEOUT=20
+HEAL_TIMEOUT=80
+IO_HEAL_TIMEOUT=120
+MARKER_UPDATE_TIMEOUT=20
+JANITOR_TIMEOUT=60
+UMOUNT_TIMEOUT=5
+CONFIG_UPDATE_TIMEOUT=5
+AUTH_REFRESH_INTERVAL=10
+GRAPH_SWITCH_TIMEOUT=10
+UNLINK_TIMEOUT=5
+MDC_TIMEOUT=5
+IO_WAIT_TIMEOUT=5
+DISK_FAIL_TIMEOUT=80
+
+LOGDIR=$(gluster --print-logdir)
+
+statedumpdir=`gluster --print-statedumpdir`; # Default directory for statedump
+
+CLI="gluster --mode=script --wignore";
+CLI_NO_FORCE="gluster --mode=script";
+
+# CLI_IGNORE_PARTITION makes sure that the warning related to bricks being on
+# root partition is ignored while running the command in a "no force" mode
+CLI_IGNORE_PARTITION="gluster --mode=script --wignore-partition"
+
+function wait_delay() {
+ local delay="$1"
+ local interval="$2"
+ shift 2
+ local deadline="$(($(date +%s%N) + ${delay}000000000))"
+
+ $*
+ while [[ $? -ne 0 ]]; do
+ if [[ $(date +%s%N) -ge ${deadline} ]]; then
+ return 1
+ fi
+ sleep ${interval}
+ $*
+ done
+
+ return 0
+}
+
+_GFS () {
+ glusterfs "$@"
+ local mount_ret=$?
+ if [ $mount_ret != 0 ]; then
+ return $mount_ret
+ fi
+ local mount_point=${!#}
+ local i=0
+ while true; do
+ touch $mount_point/xy_zzy 2> /dev/null && break
+ i=$((i+1))
+ [ $i -lt 100 ] || break
+ sleep 0.1
+ done
+ rm -f $mount_point/xy_zzy
+ return $mount_ret
+}
+GFS="_GFS --attribute-timeout=0 --entry-timeout=0";
+
+mkdir -p $WORKDIRS
+
+case $OSTYPE in
+FreeBSD | Darwin)
+wc () {
+ if test "x$1" = "x-l"; then
+ awk '{ lines++ } END {print lines}'
+ fi
+ if test "x$1" = "x-w"; then
+ awk '{ words += NF } END {print words}' }
+ fi
+ if test "x$1" = "x-c"; then
+ awk '{ chars += length($0) + 1 } END {print chars}'
+ fi
+ if test "x$1" = "x-m"; then
+ awk '{ chars += length($0) + 1 } END {print chars}'
+ fi
+}
+;;
+NetBSD)
+wc() {
+ /usr/bin/wc $@ | sed 's/^ *\([0-9]*\).*$/\1/g'
+}
+;;
+esac
+
+testcnt=`egrep '^[[:space:]]*(EXPECT|EXPECT_NOT|TEST|EXPECT_WITHIN|EXPECT_KEYWORD)[[:space:]]' $0 | wc -l`
+expect_tests=`egrep '^[[:space:]]*TESTS_EXPECTED_IN_LOOP[[:space:]]*' $0`
+
+x_ifs=$IFS
+IFS=$'\n'
+for line in $expect_tests; do
+ expect_tests=`echo $line | cut -f 2 -d =`
+ testcnt=`expr $testcnt + $expect_tests`
+done
+IFS=$x_ifs
+
+echo "1..`echo $testcnt`"
+
+t=1
+
+function dbg()
+{
+ [ "x$DEBUG" = "x0" ] || echo "$*" >&2;
+}
+
+function G_LOG()
+{
+ local g_log_logdir;
+ g_log_logdir=`$CLI --print-logdir`
+ test -d $g_log_logdir
+ if [ $? != 0 ]; then
+ return
+ fi
+ local g_log_string;
+ g_log_string="++++++++++ G_LOG:$0: TEST: $@ ++++++++++"
+ g_log_string="`date -u +["%F %T.%6N"]`:$g_log_string"
+ local g_log_filename
+ for g_log_filename in `find $g_log_logdir/ -type f -name \*.log`;
+ do
+ echo "$g_log_string" >> "$g_log_filename"
+ done
+}
+
+function test_header()
+{
+ dbg "=========================";
+ dbg "TEST $t (line $TESTLINE): $*";
+ saved_cmd="$*"
+ start_time="$(date +%s%N)"
+}
+
+
+function test_footer()
+{
+ RET=$?
+ local lineno=$1
+ local err=$2
+ local end_time
+ local elapsed1
+ local elapsed2
+
+ end_time="$(date +%s%N)"
+ elapsed1="$(((start_time - checkpoint_time) / 1000000))"
+ elapsed2="$(((end_time - start_time) / 1000000))"
+ checkpoint_time="$end_time"
+ if [ $RET -eq 0 ]; then
+ printf "ok %3d [%7d/%7d] <%4d> '%s'\n" "$t" "$elapsed1" "$elapsed2" "$lineno" "$saved_cmd";
+ else
+ printf "not ok %3d [%7d/%7d] <%4d> '%s' -> '%s'\n" "$t" "$elapsed1" "$elapsed2" "$lineno" "$saved_cmd" "$err"
+ if [ "$EXIT_EARLY" = "1" ]; then
+ cleanup
+ exit $RET
+ fi
+ fi
+
+ dbg "RESULT $t: $RET";
+
+ t=`expr $t + 1`;
+}
+
+function test_expect_footer()
+{
+ local lineno=$1
+ local e=$2
+ local a=$3
+ local err=""
+
+ if ! [[ "$a" =~ $e ]]; then
+ err="Got \"$a\" instead of \"$e\""
+ fi
+ [[ "$a" =~ $e ]];
+
+ test_footer "$lineno" "$err";
+}
+
+function _EXPECT()
+{
+ TESTLINE=$1;
+ shift;
+ local a=""
+
+ G_LOG $TESTLINE "$@";
+ test_header "$@";
+
+ e="$1";
+ shift;
+ a=$("$@" | tail -1)
+
+ if [ "x$e" = "x" ] ; then
+ test_expect_footer "$TESTLINE" "x$e" "x$a";
+ else
+ test_expect_footer "$TESTLINE" "$e" "$a";
+ fi
+}
+
+function test_expect_not_footer()
+{
+ local lineno=$1
+ local e=$2
+ local a=$3
+ local err=""
+
+ if [[ "$a" =~ $e ]]; then
+ err="Got \"$a\" when not expecting it"
+ fi
+
+ ! [[ "$a" =~ $e ]];
+ test_footer "$lineno" "$err";
+}
+
+function _EXPECT_NOT()
+{
+ TESTLINE=$1;
+ shift;
+ local a=""
+
+ G_LOG $TESTLINE "$@";
+ test_header "$@";
+
+ e="$1";
+ shift;
+ a=$("$@" | tail -1)
+
+ if [ "x$e" = "x" ] ; then
+ test_expect_not_footer "$TESTLINE" "x$e" "x$a";
+ else
+ test_expect_not_footer "$TESTLINE" "$e" "$a";
+ fi
+}
+
+function _EXPECT_KEYWORD()
+{
+ TESTLINE=$1;
+ shift;
+ G_LOG $TESTLINE "$@";
+ test_header "$@";
+
+ e="$1";
+ shift;
+ "$@" | tail -1 | grep -q "$e"
+
+ test_footer "$TESTLINE";
+}
+
+function _TEST()
+{
+ TESTLINE=$1;
+ shift;
+ local redirect=""
+
+ G_LOG $TESTLINE "$@";
+ test_header "$@";
+
+ if [ "$1" = "!" ]; then
+ redirect="2>&1"
+ fi
+
+ eval "$@" >/dev/null $redirect
+
+ test_footer "$TESTLINE";
+}
+
+#This function should be used carefully.
+#The expected regex, given to this function, should be
+#used within ^ and $ to match exactly with the output of
+#command.
+function _EXPECT_WITHIN()
+{
+ TESTLINE=$1
+ shift;
+
+ local timeout=$1
+ shift;
+
+ G_LOG $TESTLINE "$@";
+ test_header "$@"
+
+ e=$1;
+ a="";
+ shift;
+
+ local endtime="$(( ${timeout}000000000 + $(date +%s%N) ))"
+
+ # We *want* this to be globally visible.
+ EW_RETRIES=0
+
+ while [[ "$(date +%s%N)" < "$endtime" ]]; do
+ a=$("$@" | tail -1 ; exit ${PIPESTATUS[0]})
+ ## Check command success
+ if [ $? -ne 0 ]; then
+ break;
+ fi
+ ## Check match success
+ if [[ "$a" =~ $e ]]; then
+ break;
+ fi
+ sleep 0.25;
+ EW_RETRIES=$((EW_RETRIES+1))
+ done
+
+ if [ "x$e" = "x" ] ; then
+ test_expect_footer "$TESTLINE" "x$e" "x$a";
+ else
+ test_expect_footer "$TESTLINE" "$e" "$a";
+ fi
+}
+
+
+function SKIP_TESTS()
+{
+ dbg "Skipping tests $t-$testcnt";
+ while [ $t -le $testcnt ]; do
+ true ; test_footer;
+ done
+}
+
+
+function _TEST_IN_LOOP()
+{
+ testcnt=`expr $testcnt + 1`;
+ _TEST $@
+}
+
+function _EXPECT_WITHIN_TEST_IN_LOOP()
+{
+ testcnt=`expr $testcnt + 1`;
+ _EXPECT_WITHIN $@
+}
+
+which killall > /dev/null || {
+ killall() {
+ pkill $@
+ }
+}
+
+which pidof > /dev/null || {
+ pidof() {
+ $PYTHON pidof.py $@
+ }
+}
+
+stat -c %s /dev/null > /dev/null 2>&1 || {
+ stat() {
+ local format=""
+ local f=""
+
+ if [ "x$1" = "x-c" ] ; then
+ oformat=$2
+ shift
+ shift
+ files=$@
+ else
+ files=$@
+ fi
+
+ for f in $files ; do
+ format=$oformat
+
+ # %t/%T should return 0 for non devices.
+ case "${format}" in
+ *%t*|*%T*)
+ `which stat` -f '%HT' $f | grep -q 'Device$' || \
+ format=`echo "${format}" | sed 's/%t/0/g; s/%T/0/g;'`
+ ;;
+ *)
+ ;;
+ esac
+
+ if [ "x${format}" = "x" ] ; then
+ `which stat` $f
+ else
+ cmd=""
+ case $format in
+ *%u*) cmd="${cmd} s/%u/`$( which stat ) -f %u $f`/g;" ;&
+ *%g*) cmd="${cmd} s/%g/`$( which stat ) -f %g $f`/g;" ;&
+ *%a*) cmd="${cmd} s/%a/`$( which stat ) -f %p $f |
+ sed 's/^..//; s/^0//'`/g;" ;&
+ *%A*) cmd="${cmd} s/%A/`ls -ld $f|awk '{print $1}'`/g;" ;&
+ *%s*) cmd="${cmd} s/%s/`$( which stat ) -f %z $f`/g;" ;&
+ *%h*) cmd="${cmd} s/%h/`$( which stat ) -f %l $f`/g;" ;&
+ *%F*) cmd="${cmd} s/%F/`$( which stat ) -f %HT $f | sed '
+ s/Directory/directory/;
+ s/Fifo File/fifo/;
+ s/Symbolic Link/symbolic link/;
+ s/Regular File/regular file/;
+ s/Block Device/block special file/;
+ s/Character Device/character special file/;
+ ' | sed \"$(
+ test -s $f || echo 's/regular file/regular empty file/g'
+ )\"`/g;" ;&
+ *%n*) cmd="${cmd} s|%n|`$( which stat ) -f %N $f`|g;" ;&
+ *%Y*) cmd="${cmd} s/%Y/`$( which stat ) -f %m $f`/g;" ;&
+ *%X*) cmd="${cmd} s/%X/`$( which stat ) -f %a $f`/g;" ;&
+ *%Z*) cmd="${cmd} s/%Z/`$( which stat ) -f %c $f`/g;" ;&
+ *%.Z*) cmd="${cmd} s/%.Z/`$( which stat ) -f %.9Fc $f`/g;" ;&
+ *%b*) cmd="${cmd} s/%b/`$( which stat ) -f %b $f`/g;" ;&
+ *%B*) cmd="${cmd} s/%B/512/g;" ;&
+ *%t*) cmd="${cmd} s/%t/`$( which stat ) -f %XHr $f`/g;" ;&
+ *%T*) cmd="${cmd} s/%T/`$( which stat ) -f %XLr $f`/g;" ;&
+ esac
+
+ `which stat` -f "`echo $format|sed \"$cmd\"`" $f
+ fi
+ done
+ }
+}
+
+function signal_pids() {
+ local sig="$1"
+ shift
+ local pids=($*)
+
+ if [[ ${#pids[@]} -gt 0 ]]; then
+ kill -${sig} ${pids[@]} 2>/dev/null || true
+ fi
+}
+
+function check_pids() {
+ local pids=($*)
+ local tmp=()
+ local pid
+
+ for pid in "${pids[@]}"; do
+ kill -0 "${pid}" 2>/dev/null && tmp+=(${pid})
+ done
+
+ echo "${tmp[@]}"
+}
+
+function pids_alive() {
+ local pids=($*)
+
+ if [[ "$(check_pids ${pids[@]})" != "" ]]; then
+ return 1;
+ fi
+
+ return 0
+}
+
+function terminate_pids() {
+ local pids=($*)
+
+ signal_pids TERM ${pids[@]}
+ wait_delay ${PROCESS_DOWN_TIMEOUT} 0.1 pids_alive ${pids[@]}
+ if [[ $? -ne 0 ]]; then
+ pids=($(check_pids ${pids[@]}))
+ signal_pids KILL ${pids[@]}
+ wait_delay 1 0.1 pids_alive ${pids[@]}
+ if [[ $? -ne 0 ]]; then
+ return 2
+ fi
+
+ return 1
+ fi
+
+ return 0
+}
+
+function process_pids() {
+ local proc
+ local pids=()
+
+ for proc in $*; do
+ pids+=($(pgrep ${proc}))
+ done
+
+ echo "${pids[@]}"
+}
+
+## Lock files should get automatically removed once "usradd" or "groupadd"
+## command finishes. But sometimes we encounter situations (bugs) where
+## some of these files may not get properly unlocked after the execution of
+## the command. In that case, when we execute useradd next time, it may show
+## the error “cannot lock /etc/password†or “unable to lock group fileâ€.
+## So, to avoid any such errors, check for any lock files under /etc.
+## and remove those.
+
+function remove_lock_files()
+{
+ if [ ! -f /etc/passwd.lock ];
+ then
+ rm -rf /etc/passwd.lock;
+ fi
+
+ if [ ! -f /etc/group.lock ];
+ then
+ rm -rf /etc/group.lock;
+ fi
+
+ if [ ! -f /etc/shadow.lock ];
+ then
+ rm -rf /etc/shadow.lock;
+ fi
+
+ if [ ! -f /etc/gshadow.lock ];
+ then
+ rm -rf /etc/gshadow.lock;
+ fi
+}
+
+
+function cleanup()
+{
+ local end_time
+
+ # Prepare flags for umount
+ case `uname -s` in
+ Linux)
+ flag="-l"
+ ;;
+ NetBSD)
+ flag="-f -R"
+ ;;
+ FreeBSD|Darwin)
+ flag="-f"
+ ;;
+ *)
+ flag=""
+ ;;
+ esac
+
+ # Clean up lock files.
+ remove_lock_files
+
+ # Clean up all client mounts
+ for m in `mount | grep fuse.glusterfs | awk '{print $3}'`; do
+ umount $flag $m
+ done
+
+ # Unmount all well known mount points
+ umount $flag $M0 2>/dev/null || umount -f $M0 2>/dev/null || true;
+ umount $flag $M1 2>/dev/null || umount -f $M1 2>/dev/null || true;
+ umount $flag $M2 2>/dev/null || umount -f $M2 2>/dev/null || true;
+ umount $flag $N0 2>/dev/null || umount -f $N0 2>/dev/null || true;
+ umount $flag $N1 2>/dev/null || umount -f $N1 2>/dev/null || true;
+
+
+ # unmount all stale mounts from /tmp, This is a temporary work around
+ # till the stale mount in /tmp is found.
+ umount $flag /tmp/mnt* 2>/dev/null
+
+
+ # Send SIGTERM to all gluster processes and rpc.statd that are still running
+ terminate_pids $(process_pids glusterfs glusterfsd glusterd rpc.statd)
+
+ test x"$OSTYPE" = x"NetBSD" && pkill -9 perfused || true
+
+ # unregister nfs and related services from portmapper/rpcbind
+ ## nfs
+ rpcinfo -d 100003 3 2>/dev/null || true;
+ ## mountd
+ rpcinfo -d 100005 1 2>/dev/null || true;
+ rpcinfo -d 100005 3 2>/dev/null || true;
+ ## nlockmgr
+ rpcinfo -d 100021 1 2>/dev/null || true;
+ rpcinfo -d 100021 4 2>/dev/null || true;
+ ## nfs_acl
+ rpcinfo -d 100227 3 2>/dev/null || true;
+
+ # unmount brick filesystems after killing daemons
+ MOUNTPOINTS=`mount | grep "$B0/" | awk '{print $3}'`
+ for m in $MOUNTPOINTS;
+ do
+ umount $flag $m
+ done
+
+ # Cleanup lvm
+ type cleanup_lvm &>/dev/null && cleanup_lvm || true;
+
+ # Destroy loop devices
+ # TODO: This should be a function DESTROY_LOOP
+ case `uname -s` in
+ Linux)
+ LOOPDEVICES=`losetup -a | grep "$B0/" | \
+ awk '{print $1}' | tr -d :`
+ for l in $LOOPDEVICES;
+ do
+ losetup -d $l
+ done
+ ;;
+ NetBSD)
+ # cleanup loopback device with unmounted backing store
+ for vnd in /dev/vnd* ; do
+ vnconfig -l ${vnd} 2>&1 | \
+ grep -q 'Bad file descriptor' && vnconfig -u ${vnd}
+ done
+
+ vnd=`vnconfig -l | \
+ awk '!/not in use/{printf("%s%s:%d ", $1, $2, $5);}'`
+ for l in ${vnd} ; do
+ dev=${l%%:*}
+ tmp=${l#*:}
+ fs=${tmp%%:*}
+ inode=${tmp#*:}
+ file=`find -x ${fs} -inum ${inode} -print -exit`
+ echo ${file} | grep "$B0/" && \
+ LOOPDEVICES="${LOOPDEVICES} $dev"
+ done
+ for l in $LOOPDEVICES;
+ do
+ vnconfig -u $l
+ done
+ ;;
+ *)
+ echo "`uname -s` loopback device supportmissing"
+ ;;
+ esac
+
+ # remove contents of "GLUSTERD_WORKDIR" except hooks and groups
+ # directories.
+ if [ -n $GLUSTERD_WORKDIR ]
+ then
+ find $GLUSTERD_WORKDIR/* -maxdepth 0 -name 'hooks' -prune \
+ -o -name 'groups' -prune -o -exec rm -rf '{}' ';'
+ else
+ echo "GLUSTERD_WORKDIR is not set"
+ fi
+
+ # Complete cleanup time
+ rm -rf "$B0/*" "/etc/glusterd/*";
+ rm -rf $WORKDIRS
+ find $GLUSTERD_PIDFILEDIR -name "*.pid" | xargs rm -rf
+ leftover=""
+ for d in $WORKDIRS ; do
+ if test -d $d ; then
+ leftover="$leftover $d"
+ fi
+ done
+ if [ "x$leftover" != "x" ] ; then
+ echo "Aborting."
+ echo
+ echo "$d could not be deleted, here are the left over items"
+ for d in $leftover; do
+ find $d -exec ls -ld {} \;
+ done
+ echo
+ echo "Please correct the problem and try again."
+ echo
+ return 1;
+ fi >&2
+
+ mkdir -p $WORKDIRS
+ # This is usually the last thing a test script calls, so our return
+ # value becomes their exit value. While it's not great for the mkdir
+ # above to fail, promoting that into a failure of the whole test (and
+ # thus of an entire regression-test run) seems a bit excessive. Make
+ # sure we return good status anyway.
+
+ return 0
+}
+
+function force_terminate () {
+ local ret=$?;
+ >&2 echo -e "\nreceived external"\
+ "signal --`kill -l $ret`--, calling 'cleanup' ...\n";
+ cleanup;
+ exit $ret;
+}
+
+trap force_terminate INT TERM HUP
+
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+function cleanup_tester ()
+{
+ local exe=$1
+ rm -f $exe
+}
+
+function build_tester ()
+{
+ local cfile=$1
+ local fname=$(basename "$cfile")
+ local ext="${fname##*.}"
+ local execname="${fname%.*}"
+ shift
+ local cflags=$*
+ if [ `echo $cflags | grep -c "lgfapi" ` -gt 0 ]
+ then
+ cflags="$cflags $(pkg-config glusterfs-api --cflags-only-I --libs-only-L)"
+ fi
+ $CC -g -o $(dirname $cfile)/$execname $cfile $cflags
+}
+
+function process_leak_count ()
+{
+ local pid=$1;
+ return $(ls -lh /proc/$pid/fd | grep "(deleted)"| wc -l)
+}
+
+which truncate > /dev/null || {
+ truncate() {
+ local nocreate=0
+ local ioblocks=0
+ local fileref=""
+ local newsize=""
+
+ args=`getopt xor:s: $*`
+ if [ $? -ne 0 ]; then
+ echo 'Usage: truncate [-co](-r file | -s size) file ...'
+ exit 2
+ fi
+ set -- $args
+ while [ $# -gt 0 ]; do
+ case "$1" in
+ -c)
+ nocreate=1;
+ ;;
+ -o)
+ ioblocks=1;
+ echo "Unimplemented -o option"
+ exit 2
+ ;;
+ -r)
+ fileref=$2;
+ shift;
+ ;;
+ -s)
+ newsize=$2;
+ shift;
+ ;;
+ --)
+ shift;
+ break;
+ ;;
+ *)
+ echo 'Usage: truncate [-co](-r file | -s size) file ...'
+ exit 2;
+ ;;
+ esac
+ shift
+ done
+
+ if [ "x$newsize" = "x" -a "x$fileref" = "x" ] ; then
+ echo 'Usage: truncate [-co](-r file | -s size) file ...'
+ exit 2;
+ fi
+
+ if [ "x$newsize" != "x" -a "x$fileref" != "x" ] ; then
+ echo 'Usage: truncate [-co](-r file | -s size) file ...'
+ exit 2;
+ fi
+
+ if [ "x$newsize" != "x" ] ; then
+ echo $newsize | grep -q '^[-_<>%/]' && {
+ echo "Unimplemented prefix in ${newsize}"
+ exit 2;
+ }
+
+ echo $newsize | egrep -q '[TPEZY]B?$' && {
+ echo "Unit not implemented for ${newsize}"
+ exit 2;
+ }
+
+ case $newsize in
+ *KB)
+ newsize=$(( ${newsize/KB/} * 1000 ))
+ ;;
+ *K)
+ newsize=$(( ${newsize/K/} * 1024 ))
+ ;;
+ *MB)
+ newsize=$(( ${newsize/MB/} * 1000 * 1000 ))
+ ;;
+ *M)
+ newsize=$(( ${newsize/M/} * 1024 * 1024 ))
+ ;;
+ *GB)
+ newsize=$(( ${newsize/GB/} * 1000 * 1000 * 1000 ))
+ ;;
+ *G)
+ newsize=$(( ${newsize/G/} * 1024 * 1024 * 1024 ))
+ ;;
+ esac
+
+ fi
+
+ if [ "x$fileref" != "x" ] ; then
+ if [ ! -f $fileref ] ; then
+ echo "File does not exists: ${fileref}"
+ exit 2;
+ fi
+ newsize=`ls -l ${fileref}|awk '{print $5}'`
+ fi
+
+ if [ $# -eq 0 ]; then
+ echo 'Usage: truncate [-co](-r file | -s size) file ...'
+ exit 2;
+ fi
+
+ for f in $* ; do
+ if [ "x$nocreate" = "x1" -a ! -f $f ] ; then
+ continue;
+ fi
+
+ dd bs=1 seek=$newsize if=/dev/null of=$f msgfmt=quiet
+ done
+ }
+}
+
+which md5sum > /dev/null || {
+ md5sum() {
+ for f in $* ; do
+ md5 $f | awk -F'[() ]' '{printf("%s %s\n", $6, $3)}'
+ done
+ }
+}
+
+which setfattr > /dev/null || {
+ setfattr() {
+ $PYTHON setfattr.py $@
+ }
+}
+
+which getfattr > /dev/null || {
+ getfattr() {
+ $PYTHON getfattr.py $@
+ }
+}
+
+which sha1sum > /dev/null || {
+ sha1sum() {
+ case $OSTYPE in
+ Darwin)
+ for f in $* ; do
+ openssl sha1 $f | awk -F'[() ]' '{printf("%s %s\n", $4, $2)}'
+ done
+ ;;
+ NetBSD | FreeBSD)
+ for f in $* ; do
+ sha1 $f | awk -F'[() ]' '{printf("%s %s\n", $6, $3)}'
+ done
+ ;;
+ esac
+ }
+}
+
+userdel --help 2>/dev/null | grep -q -- '--force' || {
+ userdel() {
+ if [ "x$1" = "x--force" ]; then
+ user=$2
+ else
+ user=$1
+ fi
+ eval "$( which userdel ) $user"
+ }
+}
+
+useradd --help 2>/dev/null | grep -q -- '--no-create-home' || {
+ useradd() {
+ # Just remove -M (do not create home) which is the default
+ # other options are identical
+ args=`echo $*|sed 's/-M//'`
+ eval "$( which useradd ) $args"
+ }
+}
+
+userdel --help 2>/dev/null | grep -q -- '--force' || {
+ userdel() {
+ if [ "x$1" = "x--force" ]; then
+ user=$2
+ else
+ user=$1
+ fi
+ eval "$( which userdel ) $user"
+ }
+}
+
+useradd --help 2>/dev/null | grep -q -- '--no-create-home' || {
+ useradd() {
+ # Just remove -M (do not create home) which is the default
+ # other options are identical
+ args=`echo $*|sed 's/-M//'`
+ eval "$( which useradd ) $args"
+ }
+}
+
+DBG_TEST () {
+ read -p "execute \"$*\"? " x;
+ case $x in
+ 'y')
+ _TEST "$@"
+ ;;
+ 'q')
+ exit 0
+ ;;
+ *)
+ echo "skipping"
+ ;;
+ esac
+}
+
+alias EXPECT='_EXPECT $LINENO'
+alias EXPECT_NOT='_EXPECT_NOT $LINENO'
+if [ -n "$GF_INTERACTIVE" ]; then
+ alias TEST='DBG_TEST $LINENO'
+else
+ alias TEST='_TEST $LINENO'
+fi
+alias EXPECT_WITHIN='_EXPECT_WITHIN $LINENO'
+alias EXPECT_KEYWORD='_EXPECT_KEYWORD $LINENO'
+alias TEST_IN_LOOP='_TEST_IN_LOOP $LINENO'
+alias EXPECT_WITHIN_TEST_IN_LOOP='_EXPECT_WITHIN_TEST_IN_LOOP $LINENO'
+shopt -s expand_aliases
+
+if [ x"$OSTYPE" = x"Linux" ]; then
+ alias dd="dd status=none"
+elif [ x"$OSTYPE" = x"NetBSD" ]; then
+ alias dd="dd msgfmt=quiet"
+fi
+# MacOS doesn't seem to support either option. Doing nothing at all is
+# probably the safest option there and on anything we don't recognize, but
+# if you want to reduce the noise level and know the correct option for
+# your favorite platform please feel free to add it here.
+
+function SETUP_LOOP ()
+{
+ if [ $# != 1 ] ; then
+ echo "SETUP_LOOP usage" >&2
+ return 1;
+ fi
+
+ backend=$1
+
+ case ${OSTYPE} in
+ Linux)
+ losetup --find --show ${backend}
+ ;;
+ NetBSD)
+ vnd=`vnconfig -l|awk -F: '/not in use/{print $1; exit}'`
+ if [ "x${vnd}" = "x" ] ; then
+ echo "no more vnd" >&2
+ return 1;
+ fi
+ vnconfig ${vnd} ${backend}
+ echo ${vnd}
+ ;;
+ *)
+ echo "Please define SETUP_LOOP for ${OSTYPE} in include.rc" >&2
+ return 1;
+ ;;
+ esac
+}
+
+function MKFS_LOOP ()
+{
+ args=`getopt i: $*`
+ if [ $? -ne 0 ] ; then
+ echo "MKFS_LOOP usage" >&2
+ return 1;
+ fi
+ set -- ${args}
+
+ isize=""
+ while test $# -gt 0; do
+ case "$1" in
+ -i) isize=$2; shift ;;
+ --) shift; break ;;
+ esac
+ shift
+ done
+
+ dev=$1
+
+ case ${OSTYPE} in
+ Linux)
+ test "x${isize}" != "x" && isize="-i size=${isize}"
+ mkfs.xfs -f ${isize} ${dev}
+ ;;
+ NetBSD)
+ test "x${isize}" != "x" && isize="-i ${isize}"
+
+ echo ${dev} | grep -q '^vnd'
+ if [ $? -ne 0 ] ; then
+ vnd=`vnconfig -l|awk -F: '/not in use/{print $1; exit}'`
+ if [ "x${vnd}" = "x" ] ; then
+ echo "no more vnd" >&2
+ return 1;
+ fi
+ vnconfig ${vnd} ${dev}
+ else
+ vnd=${dev}
+ fi
+ newfs ${isize} /dev/r${vnd}a
+ ;;
+ *)
+ echo "Please define MKFS_LOOP for ${OSTYPE} in include.rc" >&2
+ return 1;
+ ;;
+ esac
+}
+
+# usage: log_newer timestamp "string"
+# search in glusterfs logs for "string" logged after timestamp seconds
+# since the Epoch (usually obtained by date +%s)
+log_newer()
+{
+ ts=$1
+ msg=$2
+ logdir=`$CLI --print-logdir`
+
+ local x_ifs=$IFS
+ IFS="["
+ for date in `grep -hr "$msg" $logdir | grep -v "G_LOG" | awk -F '[\]]' '{print $1}'` ; do
+ if [ `date -d "$date" +%s` -gt $ts ] ; then
+ IFS=$x_ifs
+ return 0;
+ fi
+ done 2>/dev/null
+ IFS=$x_ifs
+ return 1
+}
+
+function MOUNT_LOOP ()
+{
+ if [ $# != 2 ] ; then
+ echo "MOUNT_LOOP usage" >&2
+ return 1;
+ fi
+
+ dev=$1
+ target=$2
+
+ case ${OSTYPE} in
+ Linux)
+ echo ${dev} | grep -q '^/dev/loop'
+ if [ $? -eq 0 ] ; then
+ mount -t xfs ${dev} ${target}
+ else
+ mount -o loop ${dev} ${target}
+ fi
+ ;;
+ NetBSD)
+ echo ${dev} | grep -q '^vnd'
+ if [ $? -ne 0 ] ; then
+ ino=`/usr/bin/stat -f %i ${dev}`
+ dev=`vnconfig -l | awk -v ino=${ino} -F'[: ]*' '($5 == ino) {print $1}'`
+ fi
+
+ mount /dev/${dev}a ${target} >&2
+ if [ $? -ne 0 ] ; then
+ echo "failed to mount /dev/${dev}a on ${target}" >&2
+ return 1;
+ fi
+
+ mkdir -p ${target}/.attribute/system ${target}/.attribute/user
+ mount -u -o extattr ${target} >&2
+
+ ;;
+ *)
+ echo "Please define MOUNT_LOOP for ${OSTYPE} in include.rc" >&2
+ return 1;
+ ;;
+ esac
+}
+
+function UMOUNT_LOOP ()
+{
+ case ${OSTYPE} in
+ Linux)
+ force_umount $*
+ ;;
+ NetBSD)
+ for target in $* ; do
+ dev=`mount | awk -v target=${target} '($3 == target) {print $1}'`
+ force_umount ${target}
+ echo ${dev} | grep -q '^/dev/vnd'
+ if [ $? -eq 0 ] ; then
+ dev=`echo ${dev} | sed 's|^/dev/||; s|a$||'`
+ vnconfig -u ${dev}
+ else
+ ino=`/usr/bin/stat -f %i ${dev}`
+ dev=`vnconfig -l | awk -v ino=${ino} -F'[: ]*' '($5 == ino) {print $1}'`
+ if [ "x${dev}" != "x" ] ; then
+ vnconfig -u ${dev}
+ fi
+ fi
+ done
+ ;;
+ *)
+ echo "Please define UMOUNT_LOOP for ${OSTYPE} in include.rc" >&2
+ return 1;
+ ;;
+ esac
+}
+
+function SETUP_LOOP ()
+{
+ if [ $# != 1 ] ; then
+ echo "SETUP_LOOP usage" >&2
+ return 1;
+ fi
+
+ backend=$1
+
+ case ${OSTYPE} in
+ Linux)
+ losetup --find --show ${backend}
+ ;;
+ NetBSD)
+ vnd=`vnconfig -l|awk -F: '/not in use/{print $1; exit}'`
+ if [ "x${vnd}" = "x" ] ; then
+ echo "no more vnd" >&2
+ return 1;
+ fi
+ vnconfig ${vnd} ${backend}
+ echo ${vnd}
+ ;;
+ *)
+ echo "Please define SETUP_LOOP for ${OSTYPE} in include.rc" >&2
+ return 1;
+ ;;
+ esac
+}
+
+function MKFS_LOOP ()
+{
+ args=`getopt i: $*`
+ if [ $? -ne 0 ] ; then
+ echo "MKFS_LOOP usage" >&2
+ return 1;
+ fi
+ set -- ${args}
+
+ isize=""
+ while test $# -gt 0; do
+ case "$1" in
+ -i) isize=$2; shift ;;
+ --) shift; break ;;
+ esac
+ shift
+ done
+
+ dev=$1
+
+ case ${OSTYPE} in
+ Linux)
+ test "x${isize}" != "x" && isize="-i size=${isize}"
+ mkfs.xfs -f ${isize} ${dev}
+ ;;
+ NetBSD)
+ test "x${isize}" != "x" && isize="-i ${isize}"
+
+ echo ${dev} | grep -q '^vnd'
+ if [ $? -ne 0 ] ; then
+ vnd=`vnconfig -l|awk -F: '/not in use/{print $1; exit}'`
+ if [ "x${vnd}" = "x" ] ; then
+ echo "no more vnd" >&2
+ return 1;
+ fi
+ vnconfig ${vnd} ${dev}
+ else
+ vnd=${dev}
+ fi
+ newfs ${isize} /dev/r${vnd}a
+ ;;
+ *)
+ echo "Please define MKFS_LOOP for ${OSTYPE} in include.rc" >&2
+ return 1;
+ ;;
+ esac
+}
+
+function MOUNT_LOOP ()
+{
+ if [ $# != 2 ] ; then
+ echo "MOUNT_LOOP usage" >&2
+ return 1;
+ fi
+
+ dev=$1
+ target=$2
+
+ case ${OSTYPE} in
+ Linux)
+ echo ${dev} | grep -q '^/dev/loop'
+ if [ $? -eq 0 ] ; then
+ mount -t xfs ${dev} ${target}
+ else
+ mount -o loop ${dev} ${target}
+ fi
+ ;;
+ NetBSD)
+ echo ${dev} | grep -q '^vnd'
+ if [ $? -ne 0 ] ; then
+ ino=`/usr/bin/stat -f %i ${dev}`
+ dev=`vnconfig -l | awk -v ino=${ino} -F'[: ]*' '($5 == ino) {print $1}'`
+ fi
+
+ mount /dev/${dev}a ${target} >&2
+ if [ $? -ne 0 ] ; then
+ echo "failed to mount /dev/${dev}a on ${target}" >&2
+ return 1;
+ fi
+
+ mkdir -p ${target}/.attribute/system ${target}/.attribute/user
+ mount -u -o extattr ${target} >&2
+
+ ;;
+ *)
+ echo "Please define MOUNT_LOOP for ${OSTYPE} in include.rc" >&2
+ return 1;
+ ;;
+ esac
+}
+
+function UMOUNT_LOOP ()
+{
+ case ${OSTYPE} in
+ Linux)
+ force_umount $*
+ ;;
+ NetBSD)
+ for target in $* ; do
+ dev=`mount | awk -v target=${target} '($3 == target) {print $1}'`
+ force_umount ${target}
+ echo ${dev} | grep -q '^/dev/vnd'
+ if [ $? -eq 0 ] ; then
+ dev=`echo ${dev} | sed 's|^/dev/||; s|a$||'`
+ vnconfig -u ${dev}
+ else
+ ino=`/usr/bin/stat -f %i ${dev}`
+ dev=`vnconfig -l | awk -v ino=${ino} -F'[: ]*' '($5 == ino) {print $1}'`
+ if [ "x${dev}" != "x" ] ; then
+ vnconfig -u ${dev}
+ fi
+ fi
+ done
+ ;;
+ *)
+ echo "Please define UMOUNT_LOOP for ${OSTYPE} in include.rc" >&2
+ return 1;
+ ;;
+ esac
+}
+
+function STAT()
+{
+ stat $1
+ echo $?
+}
+
+function STAT_INO()
+{
+ local ino=$(stat -c '%i' $1)
+ if [ $? -eq 0 ]; then
+ echo $ino
+ else
+ echo 0
+ fi
+}
+
+function get_md5_sum()
+{
+ local file=$1;
+ md5_sum=$(md5sum $file | awk '{print $1}');
+ echo $md5_sum
+}
diff --git a/tests/line-coverage/afr-heal-info.t b/tests/line-coverage/afr-heal-info.t
new file mode 100644
index 00000000000..182665917c4
--- /dev/null
+++ b/tests/line-coverage/afr-heal-info.t
@@ -0,0 +1,43 @@
+#!/bin/bash
+#Test that parallel heal-info command execution doesn't result in spurious
+#entries with locking-scheme granular
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+
+function write_and_del_file {
+ dd of=$M0/a.txt if=/dev/zero bs=1024k count=100
+ rm -f $M0/b.txt
+}
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume create $V0 replica 2 $H0:$B0/brick{0,1}
+TEST $CLI volume set $V0 locking-scheme granular
+TEST $CLI volume start $V0
+TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
+TEST touch $M0/a.txt $M0/b.txt
+write_and_del_file &
+touch $B0/f1 $B0/f2
+
+# All above is similar to basic/afr/heal-info.t
+
+TEST $CLI volume heal $V0 enable
+TEST $CLI volume heal $V0 info --xml
+TEST $CLI volume heal $V0 info summary
+TEST $CLI volume heal $V0 info summary --xml
+TEST $CLI volume heal $V0 info split-brain
+TEST $CLI volume heal $V0 info split-brain --xml
+
+TEST $CLI volume heal $V0 statistics heal-count
+
+# It may fail as the file is not in splitbrain
+$CLI volume heal $V0 split-brain latest-mtime /a.txt
+
+TEST $CLI volume heal $V0 disable
+
+TEST $CLI volume stop $V0
+cleanup;
diff --git a/tests/line-coverage/arbiter-coverage.t b/tests/line-coverage/arbiter-coverage.t
new file mode 100755
index 00000000000..82b470141b5
--- /dev/null
+++ b/tests/line-coverage/arbiter-coverage.t
@@ -0,0 +1,32 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 2 arbiter 1 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+build_tester ./glfsxmp.c -lgfapi
+$(dirname $0)/../basic/rpc-coverage.sh $M1 >/dev/null
+./glfsxmp $V0 $H0 >/dev/null
+
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+## Finish up
+TEST $CLI volume stop $V0;
+
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/line-coverage/cli-peer-and-volume-operations.t b/tests/line-coverage/cli-peer-and-volume-operations.t
new file mode 100644
index 00000000000..0cf8dbe81f9
--- /dev/null
+++ b/tests/line-coverage/cli-peer-and-volume-operations.t
@@ -0,0 +1,135 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+. $(dirname $0)/../volume.rc
+
+function peer_count {
+eval \$CLI_$1 peer status | grep 'Peer in Cluster (Connected)' | wc -l
+}
+
+cleanup
+
+TEST launch_cluster 3
+
+TEST $CLI_1 system uuid reset
+
+## basic peer commands
+TEST $CLI_1 peer probe $H2
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 1
+EXPECT_WITHIN $PROBE_TIMEOUT 1 peer_count 2
+
+#probe a unreachable node
+TEST kill_glusterd 3
+TEST ! $CLI_1 peer probe $H3
+
+#detach a node which is not a part of cluster
+TEST ! $CLI_1 peer detach $H3
+TEST ! $CLI_1 peer detach $H3 force
+
+TEST start_glusterd 3
+TEST $CLI_1 peer probe $H3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 1
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 2
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 3
+
+# probe a node which is already part of cluster
+TEST $CLI_1 peer probe $H3
+
+#probe an invalid address
+TEST ! $CLI_1 peer probe 1024.1024.1024.1024
+
+TEST $CLI_1 pool list
+
+TEST $CLI_1 --help
+TEST $CLI_1 --version
+TEST $CLI_1 --print-logdir
+TEST $CLI_1 --print-statedumpdir
+
+# try unrecognised command
+TEST ! $CLI_1 volume
+TEST pidof glusterd
+
+## all help commands
+TEST $CLI_1 global help
+TEST $CLI_1 help
+
+TEST $CLI_1 peer help
+TEST $CLI_1 volume help
+TEST $CLI_1 volume bitrot help
+TEST $CLI_1 volume quota help
+TEST $CLI_1 snapshot help
+
+## volume operations
+TEST $CLI_1 volume create $V0 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0
+# create a volume with already existing volume name
+TEST ! $CLI_1 volume create $V0 $H1:$B1/$V1 $H2:$B2/$V1
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+# Mount the volume and create files
+TEST glusterfs -s $H1 --volfile-id $V0 $M1
+TEST touch $M1/file{1..100}
+
+#fails because $V0 is not shd compatible
+TEST ! $CLI_1 volume status $V0 shd
+
+#test explicitly provided options
+TEST $CLI_1 --timeout=120 --log-level=INFO volume status
+
+#changing timezone to a different one, to check localtime logging feature
+TEST export TZ='Asia/Kolkata'
+TEST restart_glusterd 1
+
+#localtime logging enable
+TEST $CLI_1 volume set all cluster.localtime-logging enable
+EXPECT '1' logging_time_check $LOGDIR
+
+#localtime logging disable
+TEST $CLI_1 volume set all cluster.localtime-logging disable
+EXPECT '0' logging_time_check $LOGDIR
+
+#changing timezone back to original timezone
+TEST export TZ='UTC'
+
+#negative tests for volume options
+#'set' option to enable quota/inode-quota is now depreciated
+TEST ! $CLI_1 volume set $V0 quota enable
+TEST ! $CLI_1 volume set $V0 inode-quota enable
+
+#invalid transport type 'rcp'
+TEST ! $CLI_1 volume set $V0 config.transport rcp
+
+#'op-version' option is not valid for a single volume
+TEST ! $CLI_1 volume set $V0 cluster.op-version 72000
+
+#'op-version' option can't be used with any other option
+TEST ! $CLI_1 volume set all cluster.localtime-logging disable cluster.op-version 72000
+
+#invalid format of 'op-version'
+TEST ! $CLI_1 volume set all cluster.op-version 72-000
+
+#provided 'op-version' value is greater than max allowed op-version
+op_version=$($CLI_1 volume get all cluster.max-op-version | awk 'NR==3 {print$2}')
+op_version=$((op_version+1000)) #this can be any number greater than 0
+TEST ! $CLI_1 volume set all cluster.op-version $op_version
+
+#provided 'op-verison' value cannot be less than the current cluster op-version value
+TEST ! $CLI_1 volume set all cluster.op-version 00000
+
+# system commnds
+TEST $CLI_1 system help
+TEST $CLI_1 system uuid get
+TEST $CLI_1 system getspec $V0
+TEST $CLI_1 system getwd
+TEST $CLI_1 system fsm log
+
+# Both these may fail, but it covers xdr functions and some
+# more code in cli/glusterd
+$CLI_1 system:: mount test local:/$V0
+$CLI_1 system:: umount $M0 lazy
+$CLI_1 system:: copy file options
+$CLI_1 system:: portmap brick2port $H0:$B0/brick
+$CLI_1 system:: uuid reset
+
+cleanup
diff --git a/tests/line-coverage/cli-volume-top-profile-coverage.t b/tests/line-coverage/cli-volume-top-profile-coverage.t
new file mode 100644
index 00000000000..35713c26faa
--- /dev/null
+++ b/tests/line-coverage/cli-volume-top-profile-coverage.t
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../cluster.rc
+. $(dirname $0)/../volume.rc
+
+cleanup
+
+# Creating cluster
+TEST launch_cluster 3
+
+# Probing peers
+TEST $CLI_1 peer probe $H2
+TEST $CLI_1 peer probe $H3
+EXPECT_WITHIN $PROBE_TIMEOUT 2 peer_count 3
+
+# Creating a volume and starting it.
+TEST $CLI_1 volume create $V0 replica 3 $H1:$B1/$V0 $H2:$B2/$V0 $H3:$B3/$V0
+TEST $CLI_1 volume start $V0
+EXPECT 'Started' cluster_volinfo_field 1 $V0 'Status';
+
+TEST glusterfs -s $H1 --volfile-id $V0 $M1
+TEST touch $M1/file{1..100}
+
+# Testing volume top command with and without xml output
+function test_volume_top_cmds () {
+ local ret=0
+ declare -a top_cmds=("read" "open" "write" "opendir" "readdir")
+ for cmd in ${top_cmds[@]}; do
+ $CLI_1 volume top $V0 $cmd
+ (( ret += $? ))
+ $CLI_1 volume top $V0 clear
+ (( ret += $? ))
+ $CLI_1 volume top $V0 $cmd --xml
+ (( ret += $? ))
+ $CLI_1 volume top $V0 $cmd brick $H1:$B1/$V0
+ (( ret += $? ))
+ $CLI_1 volume top $V0 clear brick $H1:$B1/$V0
+ (( ret += $? ))
+ $CLI_1 volume top $V0 $cmd brick $H1:$B1/$V0 --xml
+ (( ret += $? ))
+ done
+ return $ret
+}
+
+# Testing volume profile command with and without xml
+function test_volume_profile_cmds () {
+ local ret=0
+ declare -a profile_cmds=("start" "info" "info peek" "info cumulative" "info clear" "info incremental peek" "stop")
+ for cmd in "${profile_cmds[@]}"; do
+ $CLI_1 volume profile $V0 $cmd
+ (( ret += $? ))
+ $CLI_1 volume profile $V0 $cmd --xml
+ (( ret += $? ))
+ done
+ return $ret
+}
+
+TEST test_volume_top_cmds;
+TEST test_volume_profile_cmds;
+
+cleanup
diff --git a/tests/line-coverage/errorgen-coverage.t b/tests/line-coverage/errorgen-coverage.t
new file mode 100755
index 00000000000..f4622428d79
--- /dev/null
+++ b/tests/line-coverage/errorgen-coverage.t
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+# Because I have added 10 iterations of rpc-coverage and glfsxmp for errorgen
+SCRIPT_TIMEOUT=600
+
+cleanup;
+
+## Start and create a volume
+TEST glusterd;
+TEST pidof glusterd;
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+TEST $CLI volume set $V0 error-gen posix;
+TEST $CLI volume set $V0 debug.error-failure 3%;
+
+## Start volume and verify
+TEST $CLI volume start $V0;
+
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+build_tester ./glfsxmp.c -lgfapi
+for i in $(seq 1 10); do
+ # as there is error-gen, there can be errors, so no
+ # need to test for success of below two commands
+ $(dirname $0)/../basic/rpc-coverage.sh $M1 >/dev/null
+ ./glfsxmp $V0 $H0 >/dev/null
+done
+
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+## Finish up
+TEST $CLI volume stop $V0;
+
+TEST $CLI volume delete $V0;
+
+cleanup;
diff --git a/tests/line-coverage/log-and-brick-ops-negative-case.t b/tests/line-coverage/log-and-brick-ops-negative-case.t
new file mode 100644
index 00000000000..d86cb452282
--- /dev/null
+++ b/tests/line-coverage/log-and-brick-ops-negative-case.t
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup
+TEST glusterd
+TEST pidof glusterd
+
+#create volumes
+TEST $CLI volume create ${V0}_1 $H0:$B0/v{1..2}
+
+TEST $CLI volume create ${V0}_2 replica 3 arbiter 1 $H0:$B0/v{3..5}
+
+TEST $CLI volume create ${V0}_3 disperse 3 redundancy 1 $H0:$B0/v{6..8}
+TEST $CLI volume start ${V0}_3
+EXPECT 'Started' volinfo_field ${V0}_3 'Status'
+
+TEST $CLI volume create ${V0}_4 replica 3 $H0:$B0/v{9..14}
+TEST $CLI volume start ${V0}_4
+EXPECT 'Started' volinfo_field ${V0}_4 'Status'
+
+#log rotate option
+#provided volume does not exist
+TEST ! $CLI volume log ${V0}_5 rotate
+
+#volume must be started before using log rotate option
+TEST ! $CLI volume log ${V0}_1 rotate
+TEST $CLI volume start ${V0}_1
+EXPECT 'Started' volinfo_field ${V0}_1 'Status'
+
+#incorrect brick provided for the volume
+TEST ! $CLI volume log ${V0}_1 rotate $H0:$B0/v15
+
+#add-brick operations
+#volume must be in started to state to increase replica count
+TEST ! $CLI volume add-brick ${V0}_2 replica 4 $H0:$B0/v15
+TEST $CLI volume start ${V0}_2
+EXPECT 'Started' volinfo_field ${V0}_2 'Status'
+
+#incorrect number of bricks for a replica 4 volume
+TEST ! $CLI volume add-brick ${V0}_1 replica 4 $H0:$B0/v15
+
+#replica count provided is less than the current replica count
+TEST ! $CLI volume add-brick ${V0}_2 replica 2 $H0:$B0/v15
+
+#dispersed to replicated dispersed not possible
+TEST ! $CLI volume add-brick ${V0}_3 replica 2 $H0:$B0/v15
+
+#remove-brick operations
+#replica count option provided for dispersed vol
+TEST ! $CLI volume remove-brick ${V0}_3 replica 2 $H0:$B0/v8 start
+
+#given replica count is greater than the current replica count
+TEST ! $CLI volume remove-brick ${V0}_2 replica 4 $H0:$B0/v5 start
+
+#number of bricks to be removed, must be a multiple of replica count
+TEST ! $CLI volume remove-brick ${V0}_2 replica 3 $H0:$B0/v{3..4} start
+
+#less number of bricks given to reduce the replica count
+TEST ! $CLI volume remove-brick ${V0}_2 replica 1 $H0:$B0/v3 start
+
+#bricks should be from different subvol
+TEST ! $CLI volume remove-brick ${V0}_4 replica 2 $H0:$B0/v{13..14} start
+
+#arbiter must be removed to reduce replica count
+TEST ! $CLI volume remove-brick ${V0}_2 replica 1 $H0:$B0/v{3..4} start
+
+#removal of bricks is not allowed without reducing the replica count explicitly
+TEST ! $CLI volume remove-brick ${V0}_2 replica 3 $H0:$B0/v{3..5} start
+
+#incorrect brick for given vol
+TEST ! $CLI volume remove-brick ${V0}_1 $H0:$B0/v15 start
+
+#removing all the bricks are not allowed
+TEST ! $CLI volume remove-brick ${V0}_1 $H0:$B0/v{1..2} start
+
+#volume must not be stopped state while removing bricks
+TEST $CLI volume stop ${V0}_1
+TEST ! $CLI volume remove-brick ${V0}_1 $H0:$B0/v1 start
+
+cleanup \ No newline at end of file
diff --git a/tests/line-coverage/meta-max-coverage.t b/tests/line-coverage/meta-max-coverage.t
new file mode 100755
index 00000000000..1cc07610aa7
--- /dev/null
+++ b/tests/line-coverage/meta-max-coverage.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume start $V0;
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1
+
+TEST stat $M1/.meta/
+
+# expect failures in rpc-coverage.sh execution.
+res=$($(dirname $0)/../basic/rpc-coverage.sh $M1/.meta)
+
+
+# Expect errors here, hence no need to 'check for success'
+for file in $(find $M1/.meta type f -print); do
+ cat $file >/dev/null
+ echo 1>$file
+ echo hello>$file
+done
+
+TEST umount $M1
+
+cleanup;
diff --git a/tests/line-coverage/namespace-linecoverage.t b/tests/line-coverage/namespace-linecoverage.t
new file mode 100644
index 00000000000..8de6a0f279b
--- /dev/null
+++ b/tests/line-coverage/namespace-linecoverage.t
@@ -0,0 +1,39 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+cleanup;
+
+TEST glusterd
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{0,1,2,3,4,5,6,7,8}
+TEST $CLI volume set $V0 performance.stat-prefetch off
+TEST $CLI volume set $V0 cluster.read-subvolume-index 0
+TEST $CLI volume set $V0 features.tag-namespaces on
+TEST $CLI volume start $V0
+TEST $CLI volume set $V0 storage.build-pgfid on
+
+sleep 2
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+
+mkdir -p $M1/namespace
+
+# subvol_1 = bar, subvol_2 = foo, subvol_3 = hey
+# Test create, write (tagged by loc, fd respectively).
+touch $M1/namespace/{bar,foo,hey}
+
+open $M1/namespace/hey
+
+## TODO: best way to increase coverage is to have a gfapi program
+## which covers maximum fops
+TEST $(dirname $0)/../basic/rpc-coverage.sh $M1
+
+TEST cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+cleanup;
diff --git a/tests/line-coverage/old-protocol.t b/tests/line-coverage/old-protocol.t
new file mode 100755
index 00000000000..5676e5636db
--- /dev/null
+++ b/tests/line-coverage/old-protocol.t
@@ -0,0 +1,37 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+file="/var/lib/glusterd/vols/$V0/trusted-$V0.tcp-fuse.vol"
+sed -i -e 's$send-gids true$send-gids true\n option testing.old-protocol true$g' $file
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+## TODO: best way to increase coverage is to have a gfapi program
+## which covers maximum fops
+TEST $(dirname $0)/../basic/rpc-coverage.sh $M1
+
+TEST cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+cleanup;
diff --git a/tests/line-coverage/quiesce-coverage.t b/tests/line-coverage/quiesce-coverage.t
new file mode 100755
index 00000000000..ca29343451e
--- /dev/null
+++ b/tests/line-coverage/quiesce-coverage.t
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 replica 3 $H0:$B0/${V0}{1,2,3,4,5,6};
+
+EXPECT "$V0" volinfo_field $V0 'Volume Name';
+EXPECT 'Created' volinfo_field $V0 'Status';
+EXPECT '6' brick_count $V0
+
+TEST $CLI volume start $V0;
+EXPECT 'Started' volinfo_field $V0 'Status';
+
+file="/var/lib/glusterd/vols/$V0/trusted-$V0.tcp-fuse.vol"
+
+cat >> ${file} <<EOF
+
+volume quiesce
+ type features/quiesce
+ subvolumes ${V0}
+end-volume
+EOF
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+## TODO: best way to increase coverage is to have a gfapi program
+## which covers maximum fops
+TEST $(dirname $0)/../basic/rpc-coverage.sh $M1
+
+TEST cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+cleanup;
diff --git a/tests/line-coverage/shard-coverage.t b/tests/line-coverage/shard-coverage.t
new file mode 100644
index 00000000000..1797999c146
--- /dev/null
+++ b/tests/line-coverage/shard-coverage.t
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup
+
+TEST glusterd
+
+TEST $CLI volume create $V0 $H0:$B0/brick
+TEST $CLI volume set $V0 features.shard on
+EXPECT 'Created' volinfo_field $V0 'Status'
+
+TEST $CLI volume start $V0
+EXPECT 'Started' volinfo_field $V0 'Status'
+
+# It is good to copy the file locally and build it, so the scope remains
+# inside tests directory.
+TEST cp $(dirname ${0})/../basic/gfapi/glfsxmp-coverage.c glfsxmp.c
+TEST build_tester ./glfsxmp.c -lgfapi
+TEST ./glfsxmp $V0 $H0
+TEST cleanup_tester ./glfsxmp
+TEST rm ./glfsxmp.c
+
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+TEST $(dirname $0)/../basic/rpc-coverage.sh $M1
+
+
+TEST $CLI volume stop $V0
+TEST $CLI volume delete $V0
+
+cleanup
diff --git a/tests/line-coverage/some-features-in-libglusterfs.t b/tests/line-coverage/some-features-in-libglusterfs.t
new file mode 100644
index 00000000000..5719c4e039c
--- /dev/null
+++ b/tests/line-coverage/some-features-in-libglusterfs.t
@@ -0,0 +1,67 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+function grep_string {
+ local f=$1
+ local string=$2
+ # The output of test script also shows up in log. Ignore them.
+ echo $(grep ${string} ${f} | grep -v "++++++" | wc -l)
+}
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}
+TEST $CLI volume set $V0 client-log-level TRACE
+TEST $CLI volume start $V0;
+
+log_file="$(gluster --print-logdir)/gluster.log"
+## Mount FUSE
+TEST $GFS -s $H0 --log-file $log_file --volfile-id $V0 $M1
+
+## Cover 'monitoring.c' here
+pgrep 'glusterfs' | xargs kill -USR2
+
+EXPECT_WITHIN 2 1 grep_string $log_file 'sig:USR2'
+
+## Also cover statedump
+pgrep 'glusterfs' | xargs kill -USR1
+
+EXPECT_WITHIN 2 1 grep_string $log_file 'sig:USR1'
+
+## Also cover SIGHUP
+pgrep 'glusterfs' | xargs kill -HUP
+
+EXPECT_WITHIN 2 1 grep_string $log_file 'sig:HUP'
+
+## Also cover SIGTERM
+pgrep 'glusterfs' | xargs kill -TERM
+
+EXPECT_WITHIN 2 1 grep_string $log_file 'cleanup_and_exit'
+
+# Previous call should make umount of the process.
+# force_umount $M1
+
+# TODO: below section is commented out, mainly as our regression treats the test
+# as failure because sending ABRT signal will cause the process to dump core.
+# Our regression treats the test as failure, if there is a core.
+# FIXME: figure out a way to run this test, because this part of the code gets
+# executed only when there is coredump, and it is critical for debugging, to
+# keep it working always.
+
+# # Restart client
+# TEST $GFS -s $H0 --log-file $log_file --volfile-id $V0 $M1
+#
+# ## Also cover SIGABRT
+# pgrep 'glusterfs ' | xargs kill -ABRT
+#
+# TEST [ 1 -eq $(grep 'pending frames' $log_file | wc -l) ]
+
+TEST rm $log_file
+
+cleanup;
diff --git a/tests/line-coverage/volfile-with-all-graph-syntax.t b/tests/line-coverage/volfile-with-all-graph-syntax.t
new file mode 100644
index 00000000000..b137432cceb
--- /dev/null
+++ b/tests/line-coverage/volfile-with-all-graph-syntax.t
@@ -0,0 +1,73 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST mkdir -p $B0/test
+cat > $B0/test.vol <<EOF
+volume test
+ type storage/posix
+ option directory $B0/test
+ option multiple-line-string "I am
+ testing a feature of volfile graph.l"
+ option single-line-string "this is running on $H0"
+ option option-with-back-tick `date +%Y%M%d`
+end-volume
+EOF
+
+# This should succeed, but it will have some unknown options, which is OK.
+TEST glusterfs -f $B0/test.vol $M0;
+
+EXPECT_WITHIN $UMOUNT_TIMEOUT "Y" force_umount $M0;
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+volume test
+ type storage/posix
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+ type storage/posix
+end-volume
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+volume test
+end-volume
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+volume test
+ option test and test
+end-volume
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+volume test
+ subvolumes
+end-volume
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+# This should not succeed
+cat > $B0/test.vol <<EOF
+volume test
+ type storage/posix
+ new-option key value
+ option directory $B0/test
+end-volume
+EOF
+TEST ! glusterfs -f $B0/test.vol $M0;
+
+cleanup;
diff --git a/tests/nfs.rc b/tests/nfs.rc
new file mode 100644
index 00000000000..2140f311c33
--- /dev/null
+++ b/tests/nfs.rc
@@ -0,0 +1,70 @@
+#!/bin/bash
+
+# Due to portmap registration NFS takes some time to
+# export all volumes. Therefore tests should start only
+# after exports are visible by showmount command. This
+# routine will check if showmount shows the exports or not
+#
+function is_nfs_export_available ()
+{
+ local vol=$1
+
+ if [ "$vol" == "" ]; then
+ vol=$V0
+ fi
+
+ exp=$(showmount -e localhost 2> /dev/null | grep $vol | wc -l)
+ echo "$exp"
+}
+
+function mount_nfs ()
+{
+ local e=$1
+ local m=$2
+ local opt=$3
+ if [ ! -z "$opt" ]; then opt=",$opt"; fi
+ opt="soft,intr,vers=3$opt"
+
+ nopt=""
+ for o in ${opt//,/ }; do
+ case $OSTYPE in
+ NetBSD)
+ test "x${nopt}" = "x" && nopt="tcp,-R=2,"
+
+ case $o in
+ nolock|noac|actimeo=*|mountproto=udp)
+ continue
+ ;;
+ proto=tcp)
+ o="tcp"
+ ;;
+ vers=3)
+ o="nfsv3"
+ ;;
+ retry=*)
+ o=${o/retry=/-R}
+ ;;
+ timeo=*)
+ o=${o/timeo=/-t}
+ ;;
+ retrans=*)
+ o=${o/retrans=/-x}
+ ;;
+ *)
+ ;;
+ esac
+ ;;
+ *)
+ ;;
+ esac
+ if [ ! -z "$nopt" ]; then nopt="${nopt},"; fi
+ nopt="${nopt}$o"
+ done
+
+ mount -t nfs -o $nopt $e $m
+}
+
+function umount_nfs {
+ ${UMOUNT_F} $1
+ if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
+}
diff --git a/tests/performance/open-behind.t b/tests/performance/open-behind.t
new file mode 100755
index 00000000000..002a98a8352
--- /dev/null
+++ b/tests/performance/open-behind.t
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+. $(dirname $0)/../volume.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info 2>/dev/null;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+TEST $CLI volume set $V0 performance.flush-behind off;
+
+TEST $CLI volume start $V0;
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+D0="hello-this-is-a-test-message0";
+F0="test-file0";
+
+function write_to()
+{
+ local file="$1";
+ local data="$2";
+
+ echo "$data" > "$file";
+}
+
+
+TEST write_to "$M0/$F0" "$D0";
+EXPECT "$D0" cat $M1/$F0;
+
+# open-behind delays open and uses anonymous fds for fops like
+# fstat and readv. So after creating the file, if volume is restarted
+# then later when the file is read, because of the use of anonymous fds
+# volume top open will show number of files opened as 0.
+TEST $CLI volume stop $V0;
+sleep 1;
+TEST $CLI volume start $V0;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" client_connected_status_meta $M0 ${V0}-client-0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" client_connected_status_meta $M0 ${V0}-client-1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" client_connected_status_meta $M1 ${V0}-client-0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" client_connected_status_meta $M1 ${V0}-client-1
+
+cat $M1/$F0 >/dev/null;
+
+string=$(gluster volume top $V0 open | grep -w "$F0");
+
+EXPECT "" echo $string;
+
+TEST $CLI volume set $V0 performance.open-behind off;
+
+EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "2" num_graphs $M0;
+EXPECT_WITHIN $GRAPH_SWITCH_TIMEOUT "2" num_graphs $M1;
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" client_connected_status_meta $M0 ${V0}-client-0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" client_connected_status_meta $M0 ${V0}-client-1
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" client_connected_status_meta $M1 ${V0}-client-0
+EXPECT_WITHIN $PROCESS_UP_TIMEOUT "^1$" client_connected_status_meta $M1 ${V0}-client-1
+
+D1="hello-this-is-a-test-message1";
+F1="test-file1";
+
+TEST write_to "$M0/$F1" "$D1";
+EXPECT "$D1" cat $M1/$F1;
+
+EXPECT "$D0" cat $M1/$F0;
+
+$CLI volume top $V0 open | grep -w "$F0" >/dev/null 2>&1
+TEST [ $? -eq 0 ];
+
+cleanup;
diff --git a/tests/performance/quick-read.t b/tests/performance/quick-read.t
new file mode 100644
index 00000000000..26ec175df0d
--- /dev/null
+++ b/tests/performance/quick-read.t
@@ -0,0 +1,55 @@
+#!/bin/bash
+
+. $(dirname $0)/../include.rc
+
+cleanup;
+
+TEST glusterd
+TEST pidof glusterd
+TEST $CLI volume info;
+
+TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2};
+
+TEST $CLI volume start $V0;
+
+## Mount FUSE
+TEST $GFS -s $H0 --volfile-id $V0 $M0;
+
+TEST $GFS -s $H0 --volfile-id $V0 $M1;
+
+D0="hello-this-is-a-test-message0";
+F0="test-file0";
+
+function write_to()
+{
+ local file="$1";
+ local data="$2";
+
+ echo "$data" > "$file";
+}
+
+
+TEST write_to "$M0/$F0" "$D0";
+EXPECT "$D0" cat $M1/$F0;
+EXPECT "$D0" cat $M0/$F0;
+EXPECT "$D0" cat $M1/$F0;
+EXPECT "$D0" cat $M0/$F0;
+
+sleep 1;
+
+EXPECT "$D0" cat $M1/$F0;
+EXPECT "$D0" cat $M0/$F0;
+EXPECT "$D0" cat $M1/$F0;
+EXPECT "$D0" cat $M0/$F0;
+
+TEST $CLI volume set $V0 performance.quick-read off;
+
+D1="hello-this-is-a-test-message1";
+F1="test-file1";
+
+TEST write_to "$M0/$F1" "$D1";
+EXPECT "$D1" cat $M0/$F1;
+
+EXPECT "$D0" cat $M1/$F0;
+
+cleanup;
diff --git a/tests/snapshot.rc b/tests/snapshot.rc
new file mode 100644
index 00000000000..f6da514f826
--- /dev/null
+++ b/tests/snapshot.rc
@@ -0,0 +1,484 @@
+#!/bin/bash
+
+LVM_DEFINED=0
+LVM_PREFIX="patchy_snap"
+LVM_COUNT=0
+VHD_SIZE="300M"
+
+#This function will init B# bricks
+#This is used when launch_cluster is
+#not called to init B#. Call it before
+#setup_lvm
+function init_n_bricks() {
+ local count=$1
+ for i in `seq 1 $count`; do
+ eval "B$i=/d/backends/$i"
+ done
+}
+
+count_snaps () {
+ ls $1/.snaps | wc -l
+}
+
+function init_lvm() {
+ if [ "$1" == "" ]; then
+ echo "Error: Invalid argument supplied"
+ return 1
+ fi
+ LVM_COUNT=$1
+
+ if [ "$2" != "" ]; then
+ VHD_SIZE=$2
+ fi
+
+ local b
+ local i
+
+ if [ "$B1" = "" ]; then
+ B1=$B0
+ fi
+
+ for i in `seq 1 $LVM_COUNT`; do
+ b="B$i"
+ if [ "${!b}" = "" ]; then
+ echo "Error: $b not defined."
+ echo "Please run launch_cluster with atleast $LVM_COUNT nodes"
+ return 1
+ fi
+
+ eval "L$i=${!b}/${LVM_PREFIX}_mnt"
+ l="L$i"
+ mkdir -p ${!l}
+ if [ $? -ne 0 ]; then
+ echo "Error: failed to create dir ${!l}"
+ return 1
+ fi
+
+ eval "VG$i=${LVM_PREFIX}_vg_${i}"
+ done
+
+ LVM_DEFINED=1
+ return 0
+}
+
+function verify_lvm_version() {
+ if `/sbin/lvcreate --help | grep -q thin`; then
+ return 0;
+ fi
+ return 1;
+}
+
+function setup_lvm() {
+ init_lvm $@ || return 1
+ _setup_lvm
+ return 0
+}
+
+function cleanup_lvm() {
+ pkill gluster
+ sleep 2
+
+ if [ "$LVM_DEFINED" = "1" ]; then
+ _cleanup_lvm >/dev/null 2>&1
+ fi
+
+ _cleanup_lvm_again >/dev/null 2>&1
+ # TODO Delete cleanup has open bug
+ # once fixed delete this
+ mount | grep "run/gluster/snaps" | awk '{print $3}' | xargs umount 2> /dev/null
+ mount | grep "patchy_snap" | awk '{print $3}' | xargs umount 2> /dev/null
+ \rm -rf /var/run/gluster/snaps/*
+ lvscan | grep "/dev/patchy_snap" | awk '{print $2}'| xargs lvremove -f 2> /dev/null
+ vgs | grep patchy_snap | awk '{print $1}' | xargs vgremove -f 2>/dev/null
+ \rm -rf /dev/patchy*
+ return 0
+}
+
+# Find out how this file was sourced, source traps.rc the same way, and use
+# push_trapfunc to make sure cleanup_lvm gets called before we exit.
+. $(dirname ${BASH_SOURCE[0]})/traps.rc
+push_trapfunc cleanup_lvm
+
+########################################################
+# Private Functions
+########################################################
+function _setup_lvm() {
+ local count=$LVM_COUNT
+ local b
+ local i
+
+ for i in `seq 1 $count`; do
+ b="B$i"
+
+ _create_vhd ${!b} $i
+ _create_lv ${!b} $i
+ _mount_lv $i
+ done
+}
+
+function _cleanup_lvm() {
+ local count=$LVM_COUNT
+ local b
+ local i
+
+ for i in `seq 1 $count`; do
+ b="B$i"
+ _umount_lv $i
+ _remove_lv $i
+ _remove_vhd ${!b}
+ done
+}
+
+function _cleanup_lvm_again() {
+ local file
+
+ mount | grep $LVM_PREFIX | awk '{print $3}' | xargs -r ${UMOUNT_F}
+
+ /sbin/vgs | grep $LVM_PREFIX | awk '{print $1}' | xargs -r vgremove -f
+
+ find $B0 -name "${LVM_PREFIX}_loop" | xargs -r losetup -d
+
+ find $B0 -name "${LVM_PREFIX}*" | xargs -r rm -rf
+
+ find /run/gluster/snaps -name "*${LVM_PREFIX}*" | xargs -r rm -rf
+
+ for file in `ls /run/gluster/snaps`; do
+ find /run/gluster/snaps/$file -mmin -2 | xargs -r rm -rf
+ done
+}
+
+########################################################
+########################################################
+function _create_vhd() {
+ local dir=$1
+ local num=$2
+ local loop_num=`expr $2 + 8`
+
+ fallocate -l${VHD_SIZE} $dir/${LVM_PREFIX}_vhd
+ mknod -m660 $dir/${LVM_PREFIX}_loop b 7 $loop_num
+ /sbin/losetup $dir/${LVM_PREFIX}_loop $dir/${LVM_PREFIX}_vhd
+}
+
+function _create_lv() {
+ local dir=$1
+ local num=$2
+ local vg="VG$num"
+ local thinpoolsize="200M"
+ local virtualsize="150M"
+
+ /sbin/pvcreate $dir/${LVM_PREFIX}_loop
+ /sbin/vgcreate ${!vg} $dir/${LVM_PREFIX}_loop
+
+ /sbin/lvcreate -L ${thinpoolsize} -T /dev/${!vg}/thinpool
+ /sbin/lvcreate -V ${virtualsize} -T /dev/${!vg}/thinpool -n brick_lvm
+
+ mkfs.xfs -f /dev/${!vg}/brick_lvm
+}
+
+function _mount_lv() {
+ local num=$1
+ local vg="VG$num"
+ local l="L$num"
+
+ mount -t xfs -o nouuid /dev/${!vg}/brick_lvm ${!l}
+}
+
+function _umount_lv() {
+ local num=$1
+ local l="L$num"
+
+ ${UMOUNT_F} ${!l} 2>/dev/null || true
+ rmdir ${!l} 2>/dev/null || true
+}
+
+function _remove_lv() {
+ local num=$1
+ local vg="VG$num"
+
+ vgremove -f ${!vg}
+}
+
+function _remove_vhd() {
+ local dir=$1
+
+ losetup -d $dir/${LVM_PREFIX}_loop
+ rm -f $dir/${LVM_PREFIX}_loop
+ rm -f $dir/${LVM_PREFIX}_vhd
+}
+
+########################################################
+# Utility Functions
+########################################################
+function snapshot_exists() {
+ local clitype=$1
+ local snapname=$2
+ local cli=$CLI
+ if [ "$clitype" == "1" ]; then
+ cli=$CLI_1;
+ fi
+ if [ "$clitype" == "2" ]; then
+ cli=$CLI_2;
+ fi
+ $cli snapshot list | egrep -q "^$snapname\$"
+ return $?
+}
+
+#Create N number of snaps in a given volume
+#Arg1 : <Volume Name>
+#Arg2 : <Count of snaps to be created>
+#Arg3 : <Snap Name Pattern>
+#Return: Returns 0 if all snaps are created ,
+# if not will return exit code of last failed
+# snap create command.
+function create_n_snapshots() {
+ local cli=$1
+ local vol=$1
+ local snap_count=$2
+ local snap_name=$3
+ local ret=0
+ for i in `seq 1 $snap_count`; do
+ $CLI_1 snapshot create $snap_name$i ${vol} no-timestamp &
+ PID_1=$!
+ wait $PID_1
+ ret=$?
+ if [ "$ret" != "0" ]; then
+ break
+ fi
+ done
+ return $ret
+}
+
+
+#Delete N number of snaps in a given volume
+#Arg1 : <Volume Name>
+#Arg2 : <Count of snaps to be deleted>
+#Arg3 : <Snap Name Pattern>
+#Return: Returns 0 if all snaps are Delete,
+# if not will return exit code of last failed
+# snap delete command.
+function delete_n_snapshots() {
+ local vol=$1
+ local snap_count=$2
+ local snap_name=$3
+ local ret=0
+ for i in `seq 1 $snap_count`; do
+ $CLI_1 snapshot delete $snap_name$i &
+ PID_1=$!
+ wait $PID_1
+ temp=$?
+ if [ "$temp" != "0" ]; then
+ ret=$temp
+ fi
+ done
+ return $ret
+}
+
+#Check for the existance of N number of snaps in a given volume
+#Arg1 : <Volume Name>
+#Arg2 : <Count of snaps to be checked>
+#Arg3 : <Snap Name Pattern>
+#Return: Returns 0 if all snaps exists,
+# if not will return exit code of last failed
+# snapshot_exists().
+function snapshot_n_exists() {
+ local vol=$1
+ local snap_count=$2
+ local snap_name=$3
+ local ret=0
+ for i in `seq 1 $snap_count`; do
+ snapshot_exists 1 $snap_name$i
+ ret=$?
+ if [ "$ret" != "0" ]; then
+ break
+ fi
+ done
+ return $ret
+}
+
+#Check for the status of snapshot for a volume
+#Arg1 : <Snap Name>
+function snapshot_status()
+{
+ local snap=$1;
+ local cli=$CLI_1;
+ if [ "$cli" = "" ]; then
+ cli=$CLI
+ fi
+
+ #TODO: Right now just fetches the status of the single snap volume.
+ #When snapshot will have multiple snap volumes, should have a
+ #cummulative logic for status
+ $cli snapshot info $snap | grep "Status" | sed 's/.*: //';
+}
+
+
+#Check the different status of a particular snapshot
+#Arg1 : <Snap name>
+#Arg2 : <Filed in status>
+#Arg3 : <Expected value>
+function snapshot_snap_status()
+{
+
+ local snap=$1;
+ local cli=$CLI_1;
+ local field=$2;
+ local expected=$3;
+ if [ "$cli" = "" ]; then
+ cli=$CLI
+ fi
+ for i in $($cli snapshot status $snap | grep "$field" | \
+ cut -d ':' -f2 | awk '{print $1}') ;
+ do
+ if [ "$i" != "$expected" ]; then
+ echo "Failed"
+ return 1;
+ fi;
+ done;
+echo "Success"
+return 0;
+}
+
+
+# TODO: Cleanup code duplication
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI_1 volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+
+function volume_exists() {
+ $CLI_1 volume info $1 > /dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+# arg-1 : From which node the command should be trigerred
+# Ex : $CLI_1, $CLI_2, etc.
+# arg-2 : Volume name
+# arg-3 : Starting index for the snapname "snap$i"
+# arg-4 : Number of snapshots to be taken
+function snap_create()
+{
+ eval local cli_index=\$$1
+ local volname=$2
+ local i=$3
+ local limit=$[$i + $4]
+
+ while [ $i -lt $limit ]
+ do
+ $cli_index snapshot create snap$i $volname no-timestamp
+ i=$[$i+1]
+ done
+}
+
+# arg-1 : From which node the command should be trigerred
+# Ex : $CLI_1. $CLI_2, etc.
+# arg-2 : Volume name.
+function get_snap_count()
+{
+ eval local cli_index=\$$1
+ local volname=$2
+
+
+ if [ -z "$2" ]
+ then
+ $cli_index snapshot list | grep -v "No snapshots present"\
+ | wc -l
+ else
+ $cli_index snapshot list $volname\
+ | grep -v "No snapshots present"\
+ | wc -l
+ fi
+}
+
+# arg-1 : From which node the command should be trigerred
+# Ex : $CLI_1, $CLI_2, etc.
+# arg-2 : Starting index for the snapname "snap$i"
+# arg-3 : Number of snapshots to be deleted.
+function snap_delete()
+{
+ eval local cli_index=\$$1
+ local i=$2
+ local limit=$[$i + $3]
+
+ while [ $i -lt $limit ]
+ do
+ $cli_index snapshot delete snap$i
+ i=$[$i+1]
+ done
+}
+
+# arg-1 : From which node the command should be triggered
+# Ex : $CLI_1, $CLI_2, etc.
+# arg-2 : key value
+function snap_config()
+{
+ eval local cli_index=\$$1
+ local var=$2
+ $cli_index snapshot config | grep "^$var" | sed 's/.*: //'
+}
+
+function check_if_snapd_exist() {
+ local pid
+ pid=$(ps aux | grep "snapd" | grep -v grep | awk '{print $2}')
+ if [ -n "$pid" ]; then echo "Y"; else echo "N"; fi
+}
+
+# returns number of snapshot being displayed in ".snaps" directory
+function uss_count_snap_displayed() {
+ local path=$1
+ ls $path/.snaps | wc -l
+}
+
+function snap_info_volume()
+{
+ eval local cli_index=\$$1
+ local var=$2
+ local vol=$3
+ $cli_index snapshot info volume $vol | grep "^$var" | sed 's/.*: //'
+}
+
+function snap_config_volume()
+{
+ eval local cli_index=\$$1
+ local var=$2
+ local vol=$3
+ $cli_index snapshot config $vol| grep "^$var" | sed 's/.*: //'
+}
+
+#return specific fields of xml output
+function get-cmd-field-xml()
+{
+ local cli=$CLI_1;
+ if [ "$cli" = "" ]; then
+ cli=$CLI
+ fi
+
+ COMMAND=$1
+ PATTERN=$2
+
+ $cli $COMMAND --xml | xmllint --format - | grep $PATTERN
+}
+
+function get_snap_brick_status()
+{
+ local snap=$1;
+
+ $CLI snapshot status $snap | grep "Brick Running" | sed 's/.*: //';
+}
+
+case $OSTYPE in
+NetBSD)
+ echo "Skip test on LVM which is not available on NetBSD" >&2
+ SKIP_TESTS
+ exit 0
+ ;;
+*)
+ ;;
+esac
diff --git a/tests/ssl.rc b/tests/ssl.rc
new file mode 100644
index 00000000000..b1ccc4c8d38
--- /dev/null
+++ b/tests/ssl.rc
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+for d in /etc/ssl /etc/openssl /usr/local/etc/openssl ; do
+ if test -d $d ; then
+ SSL_BASE=$d
+ break
+ fi
+done
+
+if [ ! -d "$SSL_BASE" ]; then
+ echo "Skip test! SSL certificate path missing in the system" >&2
+ SKIP_TESTS
+ exit 0
+fi
+
+SSL_KEY=$SSL_BASE/glusterfs.key
+SSL_CERT=$SSL_BASE/glusterfs.pem
+SSL_CA=$SSL_BASE/glusterfs.ca
+
+
+# Create self-signed certificates
+function create_self_signed_certs (){
+ openssl genrsa -out $SSL_KEY 2048
+ openssl req -new -x509 -key $SSL_KEY -subj /CN=Anyone -out $SSL_CERT
+ ln $SSL_CERT $SSL_CA
+ return $?
+}
+
+function cleanup_certs () {
+ rm -f $SSL_BASE/glusterfs.*
+}
+
+push_trapfunc cleanup_certs
+
+cleanup_certs
diff --git a/tests/thin-arbiter.rc b/tests/thin-arbiter.rc
new file mode 100644
index 00000000000..e26d91b1907
--- /dev/null
+++ b/tests/thin-arbiter.rc
@@ -0,0 +1,613 @@
+declare -A PORTMAP
+PORTCURR=49152
+function ta_create_ta_and_volfile()
+{
+ local b=$B0/$1
+ mkdir -p $b/.glusterfs/indices
+cat > $B0/ta.vol <<EOF
+volume ta-posix
+ type storage/posix
+ option directory $b
+end-volume
+
+volume ta-thin-arbiter
+ type features/thin-arbiter
+ subvolumes ta-posix
+end-volume
+
+volume ta-locks
+ type features/locks
+ option notify-contention yes
+ subvolumes ta-thin-arbiter
+end-volume
+
+volume ta-upcall
+ type features/upcall
+ option cache-invalidation off
+ subvolumes ta-locks
+end-volume
+
+volume ta-io-threads
+ type performance/io-threads
+ subvolumes ta-upcall
+end-volume
+
+volume ta-index
+ type features/index
+ option xattrop-pending-watchlist trusted.afr.ta-
+ option xattrop-dirty-watchlist trusted.afr.dirty
+ option index-base $b/.glusterfs/indices
+ subvolumes ta-io-threads
+end-volume
+
+volume ta-io-stats
+ type debug/io-stats
+ option count-fop-hits off
+ option latency-measurement off
+ option log-level WARNING
+ option unique-id $b
+ subvolumes ta-index
+end-volume
+
+volume ta-server
+ type protocol/server
+ option transport.listen-backlog 10
+ option transport.socket.keepalive-count 9
+ option transport.socket.keepalive-interval 2
+ option transport.socket.keepalive-time 20
+ option transport.tcp-user-timeout 0
+ option transport.socket.keepalive 1
+ option auth.addr.$b.allow *
+ option auth-path $b
+ option transport.address-family inet
+ option transport-type tcp
+ subvolumes ta-io-stats
+end-volume
+EOF
+}
+
+function ta_create_brick_and_volfile()
+{
+ local b=$B0/$1
+ mkdir -p $b/.glusterfs/indices
+cat > $B0/${1}.vol <<EOF
+volume ${V0}-posix
+ type storage/posix
+ option directory $b
+end-volume
+
+volume ${V0}-locks
+ type features/locks
+ subvolumes ${V0}-posix
+end-volume
+
+volume ${V0}-leases
+ type features/leases
+ option leases off
+ subvolumes ${V0}-locks
+end-volume
+
+volume ${V0}-upcall
+ type features/upcall
+ option cache-invalidation off
+ subvolumes ${V0}-leases
+end-volume
+
+volume ${V0}-io-threads
+ type performance/io-threads
+ subvolumes ${V0}-upcall
+end-volume
+
+volume ${V0}-index
+ type features/index
+ option xattrop-pending-watchlist trusted.afr.${V0}-
+ option xattrop-dirty-watchlist trusted.afr.dirty
+ option index-base $b/.glusterfs/indices
+ subvolumes ${V0}-io-threads
+end-volume
+
+volume $b
+ type debug/io-stats
+ option count-fop-hits off
+ option latency-measurement off
+ option log-level INFO
+ option unique-id $b
+ subvolumes ${V0}-index
+end-volume
+
+volume ${V0}-server
+ type protocol/server
+ option transport.listen-backlog 1024
+ option transport.socket.keepalive-count 9
+ option transport.socket.keepalive-interval 2
+ option transport.socket.keepalive-time 20
+ option transport.tcp-user-timeout 0
+ option transport.socket.keepalive 1
+ option auth.addr.$b.allow *
+ option auth-path $b
+ option auth.login.459d48e8-2a92-4f11-89f2-077b29f6f86d.password a0ad63dd-8314-4f97-9160-1b93e3cb1f0b
+ option auth.login.$b.allow 459d48e8-2a92-4f11-89f2-077b29f6f86d
+ option transport.address-family inet
+ option transport-type tcp
+ subvolumes $b
+end-volume
+EOF
+}
+
+function ta_set_port_by_name()
+{
+ if [ -z ${PORTMAP[$1]} ]
+ then
+ PORTMAP[$1]=$PORTCURR
+ PORTCURR=$((PORTCURR+1))
+ fi
+}
+
+function ta_start_brick_process()
+{
+ ta_set_port_by_name $1
+ local p=${PORTMAP[$1]}
+ if glusterfs -p $B0/${1}.pid --volfile=$B0/${1}.vol -l $(gluster --print-logdir)/${1}.log --xlator-option ${V0}-server.listen-port=$p
+ then
+ cat $B0/${1}.pid
+ else
+ echo ""
+ return 1
+ fi
+}
+
+function ta_start_ta_process()
+{
+ ta_set_port_by_name $1
+ local p=${PORTMAP[$1]}
+ if glusterfs -p $B0/${1}.pid --volfile=$B0/${1}.vol -l $(gluster --print-logdir)/${1}.log --xlator-option ta-server.listen-port=$p
+ then
+ cat $B0/${1}.pid
+ else
+ echo ""
+ return 1
+ fi
+}
+
+function ta_start_mount_process()
+{
+ mkdir -p $1
+ identifier=$(echo $1 | tr / .)
+ if glusterfs --entry-timeout=0 --attribute-timeout=0 -p $B0/${identifier}.pid --volfile=$B0/mount.vol $1
+ then
+ cat $B0/$identifier.pid
+ else
+ echo ""
+ return 1
+ fi
+}
+
+function ta_get_mount_pid()
+{
+ local mount_path=$1
+ identifier=$(echo $mount_path | tr / .)
+ cat $B0/${identifier}.pid
+}
+
+function ta_create_mount_volfile()
+{
+ local b0=$B0/$1
+ local b1=$B0/$2
+ local ta=$B0/$3
+ local b0_port=${PORTMAP[$1]}
+ local b1_port=${PORTMAP[$2]}
+ local ta_port=${PORTMAP[$3]}
+cat > $B0/mount.vol <<EOF
+volume ${V0}-client-0
+ type protocol/client
+ option remote-host $H0
+ option client-bind-insecure off
+ option transport.socket.keepalive-interval 2
+ option transport.socket.keepalive-time 20
+ option transport.socket.ssl-enabled off
+ option remote-subvolume $b0
+ option transport.tcp-user-timeout 0
+ option transport.socket.keepalive-count 9
+ option transport-type tcp
+ option ping-timeout 42
+ option send-gids on
+ option remote-port $b0_port
+ option password a0ad63dd-8314-4f97-9160-1b93e3cb1f0b
+ option username 459d48e8-2a92-4f11-89f2-077b29f6f86d
+end-volume
+
+volume ${V0}-client-1
+ type protocol/client
+ option remote-host $H0
+ option client-bind-insecure off
+ option transport.socket.keepalive-interval 2
+ option transport.socket.keepalive-time 20
+ option transport.socket.ssl-enabled off
+ option remote-subvolume $b1
+ option transport.tcp-user-timeout 0
+ option transport.socket.keepalive-count 9
+ option transport-type tcp
+ option ping-timeout 42
+ option send-gids on
+ option remote-port $b1_port
+ option password a0ad63dd-8314-4f97-9160-1b93e3cb1f0b
+ option username 459d48e8-2a92-4f11-89f2-077b29f6f86d
+end-volume
+
+volume ${V0}-thin-arbiter-client
+ type protocol/client
+ option client-bind-insecure off
+ option transport.socket.ssl-enabled off
+ option remote-subvolume $ta
+ option ping-timeout 42
+ option remote-host $H0
+ option send-gids on
+ option transport.socket.keepalive-interval 2
+ option remote-port $ta_port
+ option transport-type tcp
+ option transport.tcp-user-timeout 0
+ option transport.socket.keepalive-time 20
+ option transport.socket.keepalive-count 9
+end-volume
+
+volume ${V0}-replicate-0
+ type cluster/replicate
+ option afr-dirty-xattr trusted.afr.dirty
+ option iam-self-heal-daemon off
+ option afr-pending-xattr ${V0}-client-0,${V0}-client-1,${V0}-ta-2
+ option thin-arbiter $H0:$ta
+ subvolumes ${V0}-client-0 ${V0}-client-1 ${V0}-thin-arbiter-client
+end-volume
+
+volume ${V0}-distribute
+ type cluster/distribute
+ option rebal-throttle normal
+ option force-migration off
+ option lookup-optimize on
+ option weighted-rebalance on
+ option write-freq-threshold 0
+ option assert-no-child-down off
+ option lock-migration off
+ option lookup-unhashed on
+ option randomize-hash-range-by-gfid off
+ option unhashed-sticky-bit off
+ option use-readdirp on
+ option readdir-optimize off
+ option xattr-name trusted.glusterfs.dht
+ option read-freq-threshold 0
+ option min-free-disk 10%
+ option min-free-inodes 5%
+ option rebalance-stats off
+ subvolumes ${V0}-replicate-0
+end-volume
+
+volume ${V0}-write-behind
+ type performance/write-behind
+ option strict-O_DIRECT off
+ option strict-write-ordering off
+ option resync-failed-syncs-after-fsync off
+ option aggregate-size 128KB
+ option flush-behind on
+ option cache-size 1MB
+ option trickling-writes on
+ subvolumes ${V0}-distribute
+end-volume
+
+volume ${V0}-read-ahead
+ type performance/read-ahead
+ option force-atime-update false
+ option page-count 4
+ option page-size 131072
+ option pass-through false
+ subvolumes ${V0}-write-behind
+end-volume
+
+volume ${V0}-readdir-ahead
+ type performance/readdir-ahead
+ option rda-low-wmark 4096
+ option rda-high-wmark 128KB
+ option rda-cache-limit 10MB
+ option parallel-readdir off
+ option pass-through false
+ option rda-request-size 131072
+ subvolumes ${V0}-read-ahead
+end-volume
+
+volume ${V0}-io-cache
+ type performance/io-cache
+ option cache-timeout 1
+ option cache-size 32MB
+ option min-file-size 0
+ option max-file-size 0
+ option pass-through false
+ subvolumes ${V0}-readdir-ahead
+end-volume
+
+volume ${V0}-quick-read
+ type performance/quick-read
+ option cache-invalidation false
+ option ctime-invalidation false
+ option cache-size 128MB
+ option cache-timeout 1
+ option max-file-size 64KB
+ subvolumes ${V0}-io-cache
+end-volume
+
+volume ${V0}-open-behind
+ type performance/open-behind
+ option use-anonymous-fd yes
+ option lazy-open yes
+ option read-after-open no
+ option pass-through false
+ subvolumes ${V0}-quick-read
+end-volume
+
+volume ${V0}-md-cache
+ type performance/md-cache
+ option pass-through false
+ option cache-capability-xattrs true
+ option cache-posix-acl false
+ option cache-swift-metadata true
+ option cache-samba-metadata false
+ option md-cache-timeout 1
+ option force-readdirp true
+ option cache-invalidation false
+ option md-cache-statfs off
+ option cache-selinux false
+ option cache-ima-xattrs true
+ subvolumes ${V0}-open-behind
+end-volume
+
+volume ${V0}-io-threads
+ type performance/io-threads
+ option normal-prio-threads 16
+ option enable-least-priority on
+ option idle-time 120
+ option cleanup-disconnected-reqs off
+ option pass-through false
+ option thread-count 16
+ option high-prio-threads 16
+ option low-prio-threads 16
+ option least-prio-threads 1
+ subvolumes ${V0}-md-cache
+end-volume
+
+volume ${V0}
+ type debug/io-stats
+ option client-logger gluster-log
+ option client-log-buf-size 5
+ option latency-measurement off
+ option client-log-level INFO
+ option brick-log-level INFO
+ option count-fop-hits off
+ option sys-log-level CRITICAL
+ option brick-log-format with-msg-id
+ option brick-log-buf-size 5
+ option dump-fd-stats off
+ option ios-dump-interval 0
+ option ios-dump-format json
+ option client-log-format with-msg-id
+ option log-buf-size 5
+ option log-flush-timeout 120
+ option client-log-flush-timeout 120
+ option ios-sample-interval 0
+ option ios-sample-buf-size 65535
+ option brick-logger gluster-log
+ option ios-dnscache-ttl-sec 86400
+ option brick-log-flush-timeout 120
+ option unique-id /no/such/path
+ subvolumes ${V0}-io-threads
+end-volume
+EOF
+}
+
+function ta_kill_brick()
+{
+ local p=$(cat $B0/${1}.pid)
+ echo > $B0/${1}.pid
+ kill -9 $p
+}
+
+function ta_get_pid_by_brick_name()
+{
+ cat $B0/${1}.pid
+}
+
+function ta_up_status()
+{
+ local v=$1
+ local m=$2
+ local replica_id=$3
+ grep -E "^up = " $m/.meta/graphs/active/${v}-replicate-${replica_id}/private | cut -f2 -d'='
+}
+
+function ta_create_shd_volfile()
+{
+ local b0=$B0/$1
+ local b1=$B0/$2
+ local ta=$B0/$3
+ local b0_port=${PORTMAP[$1]}
+ local b1_port=${PORTMAP[$2]}
+ local ta_port=${PORTMAP[$3]}
+cat > $B0/glustershd.vol <<EOF
+volume ${V0}-replicate-0-client-0
+ type protocol/client
+ option send-gids on
+ option transport.socket.keepalive-interval 2
+ option remote-host $H0
+ option remote-subvolume $b0
+ option ping-timeout 42
+ option client-bind-insecure off
+ option transport.socket.own-thread off
+ option frame-timeout 1800
+ option non-blocking-io off
+ option transport.socket.keepalive 1
+ option transport.socket.keepalive-count 9
+ option transport.tcp-user-timeout 0
+ option transport.socket.nodelay 1
+ option transport.socket.keepalive-time 20
+ option transport.socket.read-fail-log off
+ option transport-type tcp
+ option filter-O_DIRECT disable
+ option event-threads 2
+ option transport.listen-backlog 1024
+ option transport.socket.ssl-enabled off
+ option password a0ad63dd-8314-4f97-9160-1b93e3cb1f0b
+ option username 459d48e8-2a92-4f11-89f2-077b29f6f86d
+ option remote-port $b0_port
+end-volume
+
+volume ${V0}-replicate-0-client-1
+ type protocol/client
+ option remote-host $H0
+ option transport.socket.keepalive-time 20
+ option transport.socket.keepalive-count 9
+ option transport.socket.own-thread off
+ option transport.socket.ssl-enabled off
+ option transport-type tcp
+ option remote-subvolume $b1
+ option event-threads 2
+ option transport.tcp-user-timeout 0
+ option transport.socket.keepalive 1
+ option transport.socket.nodelay 1
+ option transport.socket.read-fail-log off
+ option frame-timeout 1800
+ option ping-timeout 42
+ option client-bind-insecure off
+ option filter-O_DIRECT disable
+ option send-gids on
+ option non-blocking-io off
+ option transport.listen-backlog 1024
+ option transport.socket.keepalive-interval 2
+ option password a0ad63dd-8314-4f97-9160-1b93e3cb1f0b
+ option username 459d48e8-2a92-4f11-89f2-077b29f6f86d
+ option remote-port $b1_port
+end-volume
+
+volume ${V0}-replicate-0-thin-arbiter-client
+ type protocol/client
+ option frame-timeout 1800
+ option event-threads 2
+ option transport.listen-backlog 1024
+ option transport.socket.nodelay 1
+ option transport.socket.keepalive-count 9
+ option transport.socket.ssl-enabled off
+ option transport-type tcp
+ option remote-subvolume $ta
+ option filter-O_DIRECT disable
+ option non-blocking-io off
+ option transport.socket.keepalive-interval 2
+ option transport.socket.read-fail-log off
+ option remote-host $H0
+ option send-gids on
+ option transport.tcp-user-timeout 0
+ option transport.socket.keepalive-time 20
+ option ping-timeout 42
+ option client-bind-insecure off
+ option transport.socket.keepalive 1
+ option transport.socket.own-thread off
+ option remote-port $ta_port
+end-volume
+
+volume ${V0}-replicate-0
+ type cluster/replicate
+ option background-self-heal-count 8
+ option metadata-self-heal on
+ option data-change-log on
+ option entrylk-trace off
+ option iam-self-heal-daemon yes
+ option afr-dirty-xattr trusted.afr.dirty
+ option heal-timeout 10
+ option read-hash-mode 1
+ option metadata-splitbrain-forced-heal off
+ option thin-arbiter $H0:$ta
+ option shd-max-threads 1
+ option afr-pending-xattr ${V0}-client-0,${V0}-client-1,${V0}-ta-2
+ option halo-max-latency 5
+ option halo-max-replicas 99999
+ option entry-change-log on
+ option halo-nfsd-max-latency 5
+ option inodelk-trace off
+ option pre-op-compat on
+ option eager-lock on
+ option self-heal-readdir-size 1KB
+ option ensure-durability on
+ option locking-scheme full
+ option halo-enabled False
+ option heal-wait-queue-length 128
+ option entry-self-heal on
+ option self-heal-daemon on
+ option quorum-reads no
+ option shd-wait-qlength 1024
+ option choose-local true
+ option halo-min-replicas 2
+ option data-self-heal on
+ option metadata-change-log on
+ option consistent-metadata no
+ option full-lock yes
+ option use-compound-fops no
+ option halo-shd-max-latency 99999
+ option quorum-type none
+ option favorite-child-policy none
+ option read-subvolume-index -1
+ option optimistic-change-log on
+ option iam-nfs-daemon off
+ option post-op-delay-secs 1
+ option granular-entry-heal no
+ option consistent-io no
+ option data-self-heal-window-size 1
+ subvolumes ${V0}-replicate-0-client-0 ${V0}-replicate-0-client-1 ${V0}-replicate-0-thin-arbiter-client
+end-volume
+
+volume glustershd
+ type debug/io-stats
+ option log-buf-size 5
+ option ios-dump-format json
+ option latency-measurement off
+ option sys-log-level CRITICAL
+ option brick-log-level INFO
+ option client-logger gluster-log
+ option client-log-format with-msg-id
+ option brick-log-format with-msg-id
+ option client-log-buf-size 5
+ option log-flush-timeout 120
+ option ios-dump-interval 0
+ option ios-sample-interval 0
+ option ios-dnscache-ttl-sec 86400
+ option count-fop-hits off
+ option client-log-level INFO
+ option brick-logger gluster-log
+ option brick-log-buf-size 5
+ option ios-sample-buf-size 65535
+ option client-log-flush-timeout 120
+ option brick-log-flush-timeout 120
+ option unique-id /no/such/path
+ option dump-fd-stats off
+ subvolumes ${V0}-replicate-0
+end-volume
+EOF
+}
+
+function ta_start_shd_process()
+{
+ if glusterfs -p $B0/${1}.pid --volfile=$B0/${1}.vol -l $(gluster --print-logdir)/${1}.log --process-name=glustershd
+ then
+ cat $B0/${1}.pid
+ else
+ echo ""
+ return 1
+ fi
+}
+
+function ta_mount_child_up_status()
+{
+ local mount_path=$1
+ #brick_id is (brick-num in volume info - 1)
+ local vol=$2
+ local brick_id=$3
+ local pid=$(ta_get_mount_pid $mount_path)
+ local fpath=$(generate_statedump $pid)
+ up=$(grep -a -B1 trusted.afr.$vol-client-$brick_id $fpath | head -1 | cut -f2 -d'=')
+ rm -f $fpath
+ echo "$up"
+}
diff --git a/tests/traps.rc b/tests/traps.rc
new file mode 100644
index 00000000000..f011960c97e
--- /dev/null
+++ b/tests/traps.rc
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+# Make sure this only gets included/executed once. Unfortunately, bash doesn't
+# usually distinguish between values that are unset and values that are null.
+# To work around that, we declare TRAPFUNCS to be a one-element array right at
+# the start, but that one element is : which is defined to do nothing.
+
+if [ ${#TRAPFUNCS[@]} = 0 ]; then
+ TRAPFUNCS=(:)
+
+ push_trapfunc () {
+ TRAPFUNCS[${#TRAPFUNCS[@]}]="$@"
+ }
+
+ execute_trapfuncs () {
+ for i in "${TRAPFUNCS[@]}"; do
+ $i
+ done
+ }
+
+ trap execute_trapfuncs EXIT
+fi
diff --git a/tests/utils/arequal-checksum.c b/tests/utils/arequal-checksum.c
new file mode 100644
index 00000000000..b51a054162b
--- /dev/null
+++ b/tests/utils/arequal-checksum.c
@@ -0,0 +1,633 @@
+/*
+ Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+#define _XOPEN_SOURCE 500
+
+#include <ftw.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+#include <dirent.h>
+#include <stdlib.h>
+#include <libgen.h>
+#include <stdint.h>
+#include <dirent.h>
+#include <argp.h>
+
+/*
+ * FTW_ACTIONRETVAL is a GNU libc extension. It is used here to skip
+ * hierarchies. On other systems we will still walk the tree, ignoring
+ * entries.
+ */
+#ifndef FTW_ACTIONRETVAL
+#define FTW_ACTIONRETVAL 0
+#endif
+
+int debug = 0;
+
+typedef struct {
+ char test_directory[4096];
+ char **ignored_directory;
+ unsigned int directories_ignored;
+} arequal_config_t;
+
+static arequal_config_t arequal_config;
+
+static error_t
+arequal_parse_opts(int key, char *arg, struct argp_state *_state);
+
+static struct argp_option arequal_options[] = {
+ {"ignore", 'i', "IGNORED", 0, "entry in the given path to be ignored"},
+ {"path", 'p', "PATH", 0, "path where arequal has to be run"},
+ {0, 0, 0, 0, 0}};
+
+#define DBG(fmt...) \
+ do { \
+ if (debug) { \
+ fprintf(stderr, "D "); \
+ fprintf(stderr, fmt); \
+ } \
+ } while (0)
+
+void
+add_to_list(char *arg);
+void
+get_absolute_path(char directory[], char *arg);
+
+static int
+roof(int a, int b)
+{
+ return ((((a) + (b)-1) / ((b) ? (b) : 1)) * (b));
+}
+
+void
+add_to_list(char *arg)
+{
+ char *string = NULL;
+ int index = 0;
+
+ index = arequal_config.directories_ignored - 1;
+ string = strdup(arg);
+
+ if (!arequal_config.ignored_directory) {
+ arequal_config.ignored_directory = calloc(1, sizeof(char *));
+ } else
+ arequal_config.ignored_directory = realloc(
+ arequal_config.ignored_directory, sizeof(char *) * (index + 1));
+
+ arequal_config.ignored_directory[index] = string;
+}
+
+static error_t
+arequal_parse_opts(int key, char *arg, struct argp_state *_state)
+{
+ switch (key) {
+ case 'i': {
+ arequal_config.directories_ignored++;
+ add_to_list(arg);
+ } break;
+ case 'p': {
+ if (arg[0] == '/')
+ strcpy(arequal_config.test_directory, arg);
+ else
+ get_absolute_path(arequal_config.test_directory, arg);
+
+ if (arequal_config
+ .test_directory[strlen(arequal_config.test_directory) -
+ 1] == '/')
+ arequal_config
+ .test_directory[strlen(arequal_config.test_directory) - 1] =
+ '\0';
+ } break;
+
+ case ARGP_KEY_NO_ARGS:
+ break;
+ case ARGP_KEY_ARG:
+ break;
+ case ARGP_KEY_END:
+ if (_state->argc == 1) {
+ argp_usage(_state);
+ }
+ }
+
+ return 0;
+}
+
+void
+get_absolute_path(char directory[], char *arg)
+{
+ char cwd[4096] = {
+ 0,
+ };
+
+ if (getcwd(cwd, sizeof(cwd)) == NULL)
+ printf("some error in getting cwd\n");
+
+ if (strcmp(arg, ".") != 0) {
+ if (cwd[strlen(cwd)] != '/')
+ cwd[strlen(cwd)] = '/';
+ strcat(cwd, arg);
+ }
+ strcpy(directory, cwd);
+}
+
+static struct argp argp = {
+ arequal_options, arequal_parse_opts, "",
+ "arequal - Tool which calculates the checksum of all the entries"
+ "present in a given directory"};
+
+/* All this runs in single thread, hence using 'global' variables */
+
+unsigned long long avg_uid_file = 0;
+unsigned long long avg_uid_dir = 0;
+unsigned long long avg_uid_symlink = 0;
+unsigned long long avg_uid_other = 0;
+
+unsigned long long avg_gid_file = 0;
+unsigned long long avg_gid_dir = 0;
+unsigned long long avg_gid_symlink = 0;
+unsigned long long avg_gid_other = 0;
+
+unsigned long long avg_mode_file = 0;
+unsigned long long avg_mode_dir = 0;
+unsigned long long avg_mode_symlink = 0;
+unsigned long long avg_mode_other = 0;
+
+unsigned long long global_ctime_checksum = 0;
+
+unsigned long long count_dir = 0;
+unsigned long long count_file = 0;
+unsigned long long count_symlink = 0;
+unsigned long long count_other = 0;
+
+unsigned long long checksum_file1 = 0;
+unsigned long long checksum_file2 = 0;
+unsigned long long checksum_dir = 0;
+unsigned long long checksum_symlink = 0;
+unsigned long long checksum_other = 0;
+
+unsigned long long
+checksum_path(const char *path)
+{
+ unsigned long long csum = 0;
+ unsigned long long *nums = 0;
+ int len = 0;
+ int cnt = 0;
+
+ len = roof(strlen(path), sizeof(csum));
+ cnt = len / sizeof(csum);
+
+ nums = __builtin_alloca(len);
+ memset(nums, 0, len);
+ strcpy((char *)nums, path);
+
+ while (cnt) {
+ csum ^= *nums;
+ nums++;
+ cnt--;
+ }
+
+ return csum;
+}
+
+int
+checksum_md5(const char *path, const struct stat *sb)
+{
+ uint64_t this_data_checksum = 0;
+ FILE *filep = NULL;
+ char *cmd = NULL;
+ char strvalue[17] = {
+ 0,
+ };
+ int ret = -1;
+ int len = 0;
+ const char *pos = NULL;
+ char *cpos = NULL;
+
+ /* Have to escape single-quotes in filename.
+ * First, calculate the size of the buffer I'll need.
+ */
+ for (pos = path; *pos; pos++) {
+ if (*pos == '\'')
+ len += 4;
+ else
+ len += 1;
+ }
+
+ cmd = malloc(sizeof(char) * (len + 20));
+ cmd[0] = '\0';
+
+ /* Now, build the command with single quotes escaped. */
+
+ cpos = cmd;
+#if defined(linux)
+ strcpy(cpos, "md5sum '");
+ cpos += 8;
+#elif defined(__NetBSD__)
+ strcpy(cpos, "md5 -n '");
+ cpos += 8;
+#elif defined(__FreeBSD__) || defined(__APPLE__)
+ strcpy(cpos, "md5 -q '");
+ cpos += 8;
+#else
+#error "Please add system-specific md5 command"
+#endif
+
+ /* Add the file path, with every single quotes replaced with this sequence:
+ * '\''
+ */
+
+ for (pos = path; *pos; pos++) {
+ if (*pos == '\'') {
+ strcpy(cpos, "'\\''");
+ cpos += 4;
+ } else {
+ *cpos = *pos;
+ cpos++;
+ }
+ }
+
+ /* Add on the trailing single-quote and null-terminate. */
+ strcpy(cpos, "'");
+
+ filep = popen(cmd, "r");
+ if (!filep) {
+ perror(path);
+ goto out;
+ }
+
+ if (fread(strvalue, sizeof(char), 16, filep) != 16) {
+ fprintf(stderr, "%s: short read\n", path);
+ goto out;
+ }
+
+ this_data_checksum = strtoull(strvalue, NULL, 16);
+ if (-1 == this_data_checksum) {
+ fprintf(stderr, "%s: %s\n", strvalue, strerror(errno));
+ goto out;
+ }
+ checksum_file1 ^= this_data_checksum;
+
+ if (fread(strvalue, sizeof(char), 16, filep) != 16) {
+ fprintf(stderr, "%s: short read\n", path);
+ goto out;
+ }
+
+ this_data_checksum = strtoull(strvalue, NULL, 16);
+ if (-1 == this_data_checksum) {
+ fprintf(stderr, "%s: %s\n", strvalue, strerror(errno));
+ goto out;
+ }
+ checksum_file2 ^= this_data_checksum;
+
+ ret = 0;
+out:
+ if (filep)
+ pclose(filep);
+
+ if (cmd)
+ free(cmd);
+
+ return ret;
+}
+
+int
+checksum_filenames(const char *path, const struct stat *sb)
+{
+ DIR *dirp = NULL;
+ struct dirent *entry = NULL;
+ unsigned long long csum = 0;
+ int i = 0;
+ int found = 0;
+
+ dirp = opendir(path);
+ if (!dirp) {
+ perror(path);
+ goto out;
+ }
+
+ errno = 0;
+ while ((entry = readdir(dirp))) {
+ /* do not calculate the checksum of the entries which user has
+ told to ignore and proceed to other siblings.*/
+ if (arequal_config.ignored_directory) {
+ for (i = 0; i < arequal_config.directories_ignored; i++) {
+ if ((strcmp(entry->d_name,
+ arequal_config.ignored_directory[i]) == 0)) {
+ found = 1;
+ DBG("ignoring the entry %s\n", entry->d_name);
+ break;
+ }
+ }
+ if (found == 1) {
+ found = 0;
+ continue;
+ }
+ }
+ csum = checksum_path(entry->d_name);
+ checksum_dir ^= csum;
+ }
+
+ if (errno) {
+ perror(path);
+ goto out;
+ }
+
+out:
+ if (dirp)
+ closedir(dirp);
+
+ return 0;
+}
+
+int
+process_file(const char *path, const struct stat *sb)
+{
+ int ret = 0;
+
+ count_file++;
+
+ avg_uid_file ^= sb->st_uid;
+ avg_gid_file ^= sb->st_gid;
+ avg_mode_file ^= sb->st_mode;
+
+ ret = checksum_md5(path, sb);
+
+ return ret;
+}
+
+int
+process_dir(const char *path, const struct stat *sb)
+{
+ unsigned long long csum = 0;
+
+ count_dir++;
+
+ avg_uid_dir ^= sb->st_uid;
+ avg_gid_dir ^= sb->st_gid;
+ avg_mode_dir ^= sb->st_mode;
+
+ csum = checksum_filenames(path, sb);
+
+ checksum_dir ^= csum;
+
+ return 0;
+}
+
+int
+process_symlink(const char *path, const struct stat *sb)
+{
+ int ret = 0;
+ char buf[4096] = {
+ 0,
+ };
+ unsigned long long csum = 0;
+
+ count_symlink++;
+
+ avg_uid_symlink ^= sb->st_uid;
+ avg_gid_symlink ^= sb->st_gid;
+ avg_mode_symlink ^= sb->st_mode;
+
+ ret = readlink(path, buf, 4096);
+ if (ret < 0) {
+ perror(path);
+ goto out;
+ }
+
+ DBG("readlink (%s) => %s\n", path, buf);
+
+ csum = checksum_path(buf);
+
+ DBG("checksum_path (%s) => %llx\n", buf, csum);
+
+ checksum_symlink ^= csum;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+process_other(const char *path, const struct stat *sb)
+{
+ count_other++;
+
+ avg_uid_other ^= sb->st_uid;
+ avg_gid_other ^= sb->st_gid;
+ avg_mode_other ^= sb->st_mode;
+
+ checksum_other ^= sb->st_rdev;
+
+ return 0;
+}
+
+static int
+ignore_entry(const char *bname, const char *dname)
+{
+ int i;
+
+ for (i = 0; i < arequal_config.directories_ignored; i++) {
+ if (strcmp(bname, arequal_config.ignored_directory[i]) == 0 &&
+ strncmp(arequal_config.test_directory, dname,
+ strlen(arequal_config.test_directory)) == 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+int
+process_entry(const char *path, const struct stat *sb, int typeflag,
+ struct FTW *ftwbuf)
+{
+ int ret = 0;
+ char *name = NULL;
+ char *bname = NULL;
+ char *dname = NULL;
+ int i = 0;
+
+ /* The if condition below helps in ignoring some directories in
+ the given path. If the name of the entry is one of the directory
+ names that the user told to ignore, then that directory will not
+ be processed and will return FTW_SKIP_SUBTREE to nftw which will
+ not crawl this directory and move on to other siblings.
+ Note that for nftw to recognize FTW_SKIP_TREE, FTW_ACTIONRETVAL
+ should be passed as an argument to nftw.
+
+ This mainly helps in calculating the checksum of network filesystems
+ (client-server), where the server might have some hidden directories
+ for managing the filesystem. So to calculate the sanity of filesystem
+ one has to get the checksum of the client and then the export directory
+ of server by telling arequal to ignore some of the directories which
+ are not part of the namespace.
+ */
+
+ if (arequal_config.ignored_directory) {
+#ifndef FTW_SKIP_SUBTREE
+ char *cp;
+
+ name = strdup(path);
+ dname = dirname(name);
+
+ for (cp = strtok(name, "/"); cp; cp = strtok(NULL, "/")) {
+ if (ignore_entry(cp, dname)) {
+ DBG("ignoring %s\n", path);
+ if (name)
+ free(name);
+ return 0;
+ }
+ }
+#else /* FTW_SKIP_SUBTREE */
+ name = strdup(path);
+
+ name[strlen(name)] = '\0';
+
+ bname = strrchr(name, '/');
+ if (bname)
+ bname++;
+
+ dname = dirname(name);
+ if (ignore_entry(bname, dname)) {
+ DBG("ignoring %s\n", bname);
+ ret = FTW_SKIP_SUBTREE;
+ if (name)
+ free(name);
+ return ret;
+ }
+#endif /* FTW_SKIP_SUBTREE */
+ }
+
+ DBG("processing entry %s\n", path);
+
+ switch ((S_IFMT & sb->st_mode)) {
+ case S_IFDIR:
+ ret = process_dir(path, sb);
+ break;
+ case S_IFREG:
+ ret = process_file(path, sb);
+ break;
+ case S_IFLNK:
+ ret = process_symlink(path, sb);
+ break;
+ default:
+ ret = process_other(path, sb);
+ break;
+ }
+
+ if (name)
+ free(name);
+ return ret;
+}
+
+int
+display_counts(FILE *fp)
+{
+ fprintf(fp, "\n");
+ fprintf(fp, "Entry counts\n");
+ fprintf(fp, "Regular files : %lld\n", count_file);
+ fprintf(fp, "Directories : %lld\n", count_dir);
+ fprintf(fp, "Symbolic links : %lld\n", count_symlink);
+ fprintf(fp, "Other : %lld\n", count_other);
+ fprintf(fp, "Total : %lld\n",
+ (count_file + count_dir + count_symlink + count_other));
+
+ return 0;
+}
+
+int
+display_checksums(FILE *fp)
+{
+ fprintf(fp, "\n");
+ fprintf(fp, "Checksums\n");
+ fprintf(fp, "Regular files : %llx%llx\n", checksum_file1, checksum_file2);
+ fprintf(fp, "Directories : %llx\n", checksum_dir);
+ fprintf(fp, "Symbolic links : %llx\n", checksum_symlink);
+ fprintf(fp, "Other : %llx\n", checksum_other);
+ fprintf(fp, "Total : %llx\n",
+ (checksum_file1 ^ checksum_file2 ^ checksum_dir ^ checksum_symlink ^
+ checksum_other));
+
+ return 0;
+}
+
+int
+display_metadata(FILE *fp)
+{
+ fprintf(fp, "\n");
+ fprintf(fp, "Metadata checksums\n");
+ fprintf(fp, "Regular files : %llx\n",
+ (avg_uid_file + 13) * (avg_gid_file + 11) * (avg_mode_file + 7));
+ fprintf(fp, "Directories : %llx\n",
+ (avg_uid_dir + 13) * (avg_gid_dir + 11) * (avg_mode_dir + 7));
+ fprintf(fp, "Symbolic links : %llx\n",
+ (avg_uid_symlink + 13) * (avg_gid_symlink + 11) *
+ (avg_mode_symlink + 7));
+ fprintf(fp, "Other : %llx\n",
+ (avg_uid_other + 13) * (avg_gid_other + 11) * (avg_mode_other + 7));
+
+ return 0;
+}
+
+int
+display_stats(FILE *fp)
+{
+ display_counts(fp);
+
+ display_metadata(fp);
+
+ display_checksums(fp);
+
+ return 0;
+}
+
+int
+main(int argc, char *argv[])
+{
+ int ret = 0;
+ int i = 0;
+
+ ret = argp_parse(&argp, argc, argv, 0, 0, NULL);
+ if (ret != 0) {
+ fprintf(stderr, "parsing arguments failed\n");
+ return -2;
+ }
+
+ /* Use FTW_ACTIONRETVAL to take decision on what to do depending upon */
+ /* the return value of the callback function */
+ /* (process_entry in this case) */
+ ret = nftw(arequal_config.test_directory, process_entry, 30,
+ FTW_ACTIONRETVAL | FTW_PHYS | FTW_MOUNT);
+ if (ret != 0) {
+ fprintf(stderr, "ftw (%s) returned %d (%s), terminating\n", argv[1],
+ ret, strerror(errno));
+ return 1;
+ }
+
+ display_stats(stdout);
+
+ if (arequal_config.ignored_directory) {
+ for (i = 0; i < arequal_config.directories_ignored; i++) {
+ if (arequal_config.ignored_directory[i])
+ free(arequal_config.ignored_directory[i]);
+ }
+ free(arequal_config.ignored_directory);
+ }
+
+ return 0;
+}
diff --git a/tests/utils/changelog/changelog.h b/tests/utils/changelog/changelog.h
new file mode 100644
index 00000000000..1502b689eb4
--- /dev/null
+++ b/tests/utils/changelog/changelog.h
@@ -0,0 +1,125 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_CHANGELOG_H
+#define _GF_CHANGELOG_H
+
+struct gf_brick_spec;
+
+/**
+ * Max bit shiter for event selection
+ */
+#define CHANGELOG_EV_SELECTION_RANGE 5
+
+#define CHANGELOG_OP_TYPE_JOURNAL (1 << 0)
+#define CHANGELOG_OP_TYPE_OPEN (1 << 1)
+#define CHANGELOG_OP_TYPE_CREATE (1 << 2)
+#define CHANGELOG_OP_TYPE_RELEASE (1 << 3)
+#define CHANGELOG_OP_TYPE_BR_RELEASE \
+ (1 << 4) /* logical release (last close()), \
+ sent by bitrot stub */
+#define CHANGELOG_OP_TYPE_MAX (1 << CHANGELOG_EV_SELECTION_RANGE)
+
+struct ev_open {
+ unsigned char gfid[16];
+ int32_t flags;
+};
+
+struct ev_creat {
+ unsigned char gfid[16];
+ int32_t flags;
+};
+
+struct ev_release {
+ unsigned char gfid[16];
+};
+
+struct ev_release_br {
+ unsigned long version;
+ unsigned char gfid[16];
+ int32_t sign_info;
+};
+
+struct ev_changelog {
+ char path[PATH_MAX];
+};
+
+typedef struct changelog_event {
+ unsigned int ev_type;
+
+ union {
+ struct ev_open open;
+ struct ev_creat create;
+ struct ev_release release;
+ struct ev_changelog journal;
+ struct ev_release_br releasebr;
+ } u;
+} changelog_event_t;
+
+#define CHANGELOG_EV_SIZE (sizeof(changelog_event_t))
+
+/**
+ * event callback, connected & disconnection defs
+ */
+typedef void(CALLBACK)(void *, char *, void *, changelog_event_t *);
+typedef void *(INIT)(void *, struct gf_brick_spec *);
+typedef void(FINI)(void *, char *, void *);
+typedef void(CONNECT)(void *, char *, void *);
+typedef void(DISCONNECT)(void *, char *, void *);
+
+struct gf_brick_spec {
+ char *brick_path;
+ unsigned int filter;
+
+ INIT *init;
+ FINI *fini;
+ CALLBACK *callback;
+ CONNECT *connected;
+ DISCONNECT *disconnected;
+
+ void *ptr;
+};
+
+/* API set */
+
+int
+gf_changelog_register(char *brick_path, char *scratch_dir, char *log_file,
+ int log_levl, int max_reconnects);
+ssize_t
+gf_changelog_scan();
+
+int
+gf_changelog_start_fresh();
+
+ssize_t
+gf_changelog_next_change(char *bufptr, size_t maxlen);
+
+int
+gf_changelog_done(char *file);
+
+/* newer flexible API */
+int
+gf_changelog_init(void *xl);
+
+int
+gf_changelog_register_generic(struct gf_brick_spec *bricks, int count,
+ int ordered, char *logfile, int lvl, void *xl);
+
+int
+gf_history_changelog(char *changelog_dir, unsigned long start,
+ unsigned long end, int n_parallel,
+ unsigned long *actual_end);
+int
+gf_history_changelog_scan();
+ssize_t
+gf_history_changelog_next_change(char *bufptr, size_t maxlen);
+int
+gf_history_changelog_done(char *file);
+#endif
diff --git a/tests/utils/changelog/get-history.c b/tests/utils/changelog/get-history.c
new file mode 100644
index 00000000000..9963ab76958
--- /dev/null
+++ b/tests/utils/changelog/get-history.c
@@ -0,0 +1,71 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * get set of new changes every 10 seconds (just print the file names)
+ *
+ * Compile it using:
+ * gcc -o gethistory `pkg-config --cflags libgfchangelog` get-history.c \
+ * `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "changelog.h"
+
+int
+main(int argc, char **argv)
+{
+ int ret = 0;
+ unsigned long end_ts = 0;
+ int start = 0;
+ int end = 0;
+
+ ret = gf_changelog_init(NULL);
+ if (ret) {
+ printf("-1");
+ fflush(stdout);
+ return -1;
+ }
+
+ ret = gf_changelog_register("/d/backends/patchy0", "/tmp/scratch_v1",
+ "/var/log/glusterfs/changes.log", 9, 5);
+ if (ret) {
+ printf("-2");
+ fflush(stdout);
+ return -1;
+ }
+
+ start = atoi(argv[1]);
+ end = atoi(argv[2]);
+
+ ret = gf_history_changelog("/d/backends/patchy0/.glusterfs/changelogs",
+ start, end, 3, &end_ts);
+ if (ret < 0) {
+ printf("-3");
+ fflush(stdout);
+ return -1;
+ } else if (ret == 1) {
+ printf("1");
+ fflush(stdout);
+ return 0;
+ }
+
+out:
+ printf("0");
+ fflush(stdout);
+ return 0;
+}
diff --git a/tests/utils/changelog/test-changelog-api.c b/tests/utils/changelog/test-changelog-api.c
new file mode 100644
index 00000000000..f4eb066b630
--- /dev/null
+++ b/tests/utils/changelog/test-changelog-api.c
@@ -0,0 +1,98 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * get set of new changes every 5 seconds (just print the file names)
+ *
+ * Compile it using:
+ * gcc -o getchanges `pkg-config --cflags libgfchangelog` get-changes.c \
+ * `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <errno.h>
+
+#include "changelog.h"
+
+int
+main(int argc, char **argv)
+{
+ int i = 0;
+ int ret = 0;
+ ssize_t nr_changes = 0;
+ ssize_t changes = 0;
+ char fbuf[PATH_MAX] = {
+ 0,
+ };
+
+ ret = gf_changelog_init(NULL);
+ if (ret) {
+ printf("-1");
+ fflush(stdout);
+ return -1;
+ }
+
+ /* get changes for brick "/d/backends/patchy0" */
+ ret = gf_changelog_register("/d/backends/patchy0", "/tmp/scratch_v1",
+ "/var/log/glusterfs/changes.log", 9, 5);
+ if (ret) {
+ printf("-2");
+ fflush(stdout);
+ return -1;
+ }
+
+ while (1) {
+ i = 0;
+ nr_changes = gf_changelog_scan();
+ if (nr_changes < 0) {
+ printf("-4");
+ fflush(stdout);
+ return -1;
+ }
+
+ if (nr_changes == 0)
+ goto next;
+
+ while ((changes = gf_changelog_next_change(fbuf, PATH_MAX)) > 0) {
+ /* process changelog */
+ /* ... */
+ /* ... */
+ /* ... */
+ /* done processing */
+
+ ret = gf_changelog_done(fbuf);
+ if (ret) {
+ printf("-5");
+ fflush(stdout);
+ return -1;
+ }
+ }
+
+ if (changes == -1) {
+ printf("-6");
+ fflush(stdout);
+ return -1;
+ }
+
+ next:
+ sleep(2);
+ }
+
+out:
+ printf("0");
+ fflush(stdout);
+ return ret;
+}
diff --git a/tests/utils/changelog/test-history-api.c b/tests/utils/changelog/test-history-api.c
new file mode 100644
index 00000000000..d78e387df10
--- /dev/null
+++ b/tests/utils/changelog/test-history-api.c
@@ -0,0 +1,111 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * get set of new changes every 10 seconds (just print the file names)
+ *
+ * Compile it using:
+ * gcc -o gethistory `pkg-config --cflags libgfchangelog` get-history.c \
+ * `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "changelog.h"
+
+int
+main(int argc, char **argv)
+{
+ int ret = 0;
+ int i = 0;
+ unsigned long end_ts = 0;
+ ssize_t nr_changes = 0;
+ ssize_t changes = 0;
+ int start = 0;
+ int end = 0;
+ char fbuf[PATH_MAX] = {
+ 0,
+ };
+
+ ret = gf_changelog_init(NULL);
+ if (ret) {
+ printf("-1");
+ fflush(stdout);
+ return -1;
+ }
+
+ ret = gf_changelog_register("/d/backends/patchy0", "/tmp/scratch_v1",
+ "/var/log/glusterfs/changes.log", 9, 5);
+ if (ret) {
+ printf("-2");
+ fflush(stdout);
+ return -1;
+ }
+
+ start = atoi(argv[1]);
+ end = atoi(argv[2]);
+
+ ret = gf_history_changelog("/d/backends/patchy0/.glusterfs/changelogs",
+ start, end, 3, &end_ts);
+ if (ret < 0) {
+ printf("-3");
+ fflush(stdout);
+ return -1;
+ } else if (ret == 1) {
+ printf("1");
+ fflush(stdout);
+ return 0;
+ }
+
+ while (1) {
+ nr_changes = gf_history_changelog_scan();
+ if (nr_changes < 0) {
+ printf("-4");
+ fflush(stdout);
+ return -1;
+ }
+
+ if (nr_changes == 0) {
+ goto out;
+ }
+
+ while ((changes = gf_history_changelog_next_change(fbuf, PATH_MAX)) >
+ 0) {
+ /* process changelog */
+ /* ... */
+ /* ... */
+ /* ... */
+ /* done processing */
+
+ ret = gf_history_changelog_done(fbuf);
+ if (ret) {
+ printf("-5");
+ fflush(stdout);
+ return -1;
+ }
+ }
+ if (changes == -1) {
+ printf("-6");
+ fflush(stdout);
+ return -1;
+ }
+ }
+
+out:
+ printf("0");
+ fflush(stdout);
+ return 0;
+}
diff --git a/tests/utils/changelogparser.py b/tests/utils/changelogparser.py
new file mode 100644
index 00000000000..3b8f81d1bad
--- /dev/null
+++ b/tests/utils/changelogparser.py
@@ -0,0 +1,236 @@
+# -*- coding: utf-8 -*-
+"""
+Why?
+
+Converts this
+
+GlusterFS Changelog | version: v1.1 | encoding : 2
+E0b99ef11-4b79-4cd0-9730-b5a0e8c4a8c0^@4^@16877^@0^@0^@00000000-0000-0000-0000-
+000000000001/dir1^@Ec5250af6-720e-4bfe-b938-827614304f39^@23^@33188^@0^@0^@0b99
+ef11-4b79-4cd0-9730-b5a0e8c4a8c0/hello.txt^@Dc5250af6-720e-4bfe-b938-827614304f
+39^@Dc5250af6-720e-4bfe-b938-827614304f39^@
+
+
+to human readable :)
+
+E 0b99ef11-4b79-4cd0-9730-b5a0e8c4a8c0 MKDIR 16877 0 000000000-0000-0000-0000
+ -000000000001/dir1
+E c5250af6-720e-4bfe-b938-827614304f39 CREATE 33188 0 0 0b99ef11-4b79-4cd0-9730
+ -b5a0e8c4a8c0/hello.txt
+D c5250af6-720e-4bfe-b938-827614304f39
+D c5250af6-720e-4bfe-b938-827614304f39
+
+
+"""
+import sys
+import codecs
+
+ENTRY = 'E'
+META = 'M'
+DATA = 'D'
+SEP = "\x00"
+
+GF_FOP = [
+ "NULL", "STAT", "READLINK", "MKNOD", "MKDIR", "UNLINK",
+ "RMDIR", "SYMLINK", "RENAME", "LINK", "TRUNCATE", "OPEN",
+ "READ", "WRITE", "STATFS", "FLUSH", "FSYNC", "SETXATTR",
+ "GETXATTR", "REMOVEXATTR", "OPENDIR", "FSYNCDIR", "ACCESS",
+ "CREATE", "FTRUNCATE", "FSTAT", "LK", "LOOKUP", "READDIR",
+ "INODELK", "FINODELK", "ENTRYLK", "FENTRYLK", "XATTROP",
+ "FXATTROP", "FSETXATTR", "FGETXATTR", "RCHECKSUM", "SETATTR",
+ "FSETATTR", "READDIRP", "GETSPEC", "FORGET", "RELEASE",
+ "RELEASEDIR", "FREMOVEXATTR", "FALLOCATE", "DISCARD", "ZEROFILL"]
+
+
+class NumTokens_V11(object):
+ E = 7
+ M = 3
+ D = 2
+ NULL = 3
+ MKNOD = 7
+ MKDIR = 7
+ UNLINK = 4
+ RMDIR = 4
+ SYMLINK = 4
+ RENAME = 5
+ LINK = 4
+ SETXATTR = 3
+ REMOVEXATTR = 3
+ CREATE = 7
+ SETATTR = 3
+ FTRUNCATE = 3
+ FXATTROP = 3
+
+
+class NumTokens_V12(NumTokens_V11):
+ UNLINK = 5
+ RMDIR = 5
+
+
+class Version:
+ V11 = "v1.1"
+ V12 = "v1.2"
+
+
+class Record(object):
+ def __init__(self, **kwargs):
+ self.ts = kwargs.get("ts", None)
+ self.fop_type = kwargs.get("fop_type", None)
+ self.gfid = kwargs.get("gfid", None)
+ self.path = kwargs.get("path", None)
+ self.fop = kwargs.get("fop", None)
+ self.path1 = kwargs.get("path1", None)
+ self.path2 = kwargs.get("path2", None)
+ self.mode = kwargs.get("mode", None)
+ self.uid = kwargs.get("uid", None)
+ self.gid = kwargs.get("gid", None)
+
+ def create_mknod_mkdir(self, **kwargs):
+ self.path = kwargs.get("path", None)
+ self.fop = kwargs.get("fop", None)
+ self.mode = kwargs.get("mode", None)
+ self.uid = kwargs.get("uid", None)
+ self.gid = kwargs.get("gid", None)
+
+ def metadata(self, **kwargs):
+ self.fop = kwargs.get("fop", None)
+
+ def rename(self, **kwargs):
+ self.fop = kwargs.get("fop", None)
+ self.path1 = kwargs.get("path1", None)
+ self.path2 = kwargs.get("path2", None)
+
+ def link_symlink_unlink_rmdir(self, **kwargs):
+ self.path = kwargs.get("path", None)
+ self.fop = kwargs.get("fop", None)
+
+ def __unicode__(self):
+ if self.fop_type == "D":
+ return u"{ts} {fop_type} {gfid}".format(**self.__dict__)
+ elif self.fop_type == "M":
+ return u"{ts} {fop_type} {gfid} {fop}".format(**self.__dict__)
+ elif self.fop_type == "E":
+ if self.fop in ["CREATE", "MKNOD", "MKDIR"]:
+ return (u"{ts} {fop_type} {gfid} {fop} "
+ u"{path} {mode} {uid} {gid}".format(**self.__dict__))
+ elif self.fop == "RENAME":
+ return (u"{ts} {fop_type} {gfid} {fop} "
+ u"{path1} {path2}".format(**self.__dict__))
+ elif self.fop in ["LINK", "SYMLINK", "UNLINK", "RMDIR"]:
+ return (u"{ts} {fop_type} {gfid} {fop} "
+ u"{path}".format(**self.__dict__))
+ else:
+ return repr(self.__dict__)
+ else:
+ return repr(self.__dict__)
+
+ def __str__(self):
+ if sys.version_info >= (3,):
+ return self.__unicode__()
+ else:
+ return unicode(self).encode('utf-8')
+
+
+def get_num_tokens(data, tokens, version=Version.V11):
+ if version == Version.V11:
+ cls_numtokens = NumTokens_V11
+ elif version == Version.V12:
+ cls_numtokens = NumTokens_V12
+ else:
+ sys.stderr.write("Unknown Changelog Version\n")
+ sys.exit(1)
+
+ if data[tokens[0]] in [ENTRY, META]:
+ if len(tokens) >= 3:
+ return getattr(cls_numtokens, GF_FOP[int(data[tokens[2]])])
+ else:
+ return None
+ else:
+ return getattr(cls_numtokens, data[tokens[0]])
+
+
+def process_record(data, tokens, changelog_ts, callback):
+ if data[tokens[0]] in [ENTRY, META]:
+ try:
+ tokens[2] = GF_FOP[int(data[tokens[2]])]
+ except ValueError:
+ tokens[2] = "NULL"
+
+ if not changelog_ts:
+ ts1 = int(changelog_ts)
+ else:
+ ts1=""
+ record = Record(ts=ts1, fop_type=data[tokens[0]],
+ gfid=data[tokens[1]])
+ if data[tokens[0]] == META:
+ record.metadata(fop=tokens[2])
+ elif data[tokens[0]] == ENTRY:
+ if tokens[2] in ["CREATE", "MKNOD", "MKDIR"]:
+ record.create_mknod_mkdir(fop=tokens[2],
+ path=data[tokens[6]],
+ mode=int(data[tokens[3]]),
+ uid=int(data[tokens[4]]),
+ gid=int(data[tokens[5]]))
+ elif tokens[2] == "RENAME":
+ record.rename(fop=tokens[2],
+ path1=data[tokens[3]],
+ path2=data[tokens[4]])
+ if tokens[2] in ["LINK", "SYMLINK", "UNLINK", "RMDIR"]:
+ record.link_symlink_unlink_rmdir(fop=tokens[2],
+ path=data[tokens[3]])
+ callback(record)
+
+
+def default_callback(record):
+ sys.stdout.write(u"{0}\n".format(record))
+
+
+def parse(filename, callback=default_callback):
+ data = None
+ tokens = []
+ changelog_ts = filename.rsplit(".")[-1]
+ with codecs.open(filename, mode="rb", encoding="utf-8") as f:
+ # GlusterFS Changelog | version: v1.1 | encoding : 2
+ header = f.readline()
+ version = header.split()[4]
+
+ data = f.readline()
+
+ slice_start = 0
+ in_record = False
+
+ prev_char = ""
+ next_char = ""
+ for i, c in enumerate(data):
+ next_char = ""
+ if len(data) >= (i + 2):
+ next_char = data[i+1]
+
+ if not in_record and c in [ENTRY, META, DATA]:
+ tokens.append(slice(slice_start, i+1))
+ slice_start = i+1
+ in_record = True
+ continue
+
+ if c == SEP and ((prev_char != SEP and next_char == SEP) or
+ (prev_char == SEP and next_char != SEP) or
+ (prev_char != SEP and next_char != SEP)):
+ tokens.append(slice(slice_start, i))
+ slice_start = i+1
+
+ num_tokens = get_num_tokens(data, tokens, version)
+
+ if num_tokens == len(tokens):
+ process_record(data, tokens, changelog_ts, callback)
+ in_record = False
+ tokens = []
+
+ prev_char = c
+
+ # process last record
+ if slice_start < (len(data) - 1):
+ tokens.append(slice(slice_start, len(data)))
+ process_record(data, tokens, changelog_ts, callback)
+ tokens = []
+
+parse(sys.argv[1])
diff --git a/tests/utils/create-files.py b/tests/utils/create-files.py
new file mode 100755
index 00000000000..04736e9c73b
--- /dev/null
+++ b/tests/utils/create-files.py
@@ -0,0 +1,593 @@
+
+# This script was developed by Vijaykumar Koppad (vkoppad@redhat.com)
+# The latest version of this script can found at
+# http://github.com/vijaykumar-koppad/crefi
+
+from __future__ import with_statement
+import os
+import re
+import sys
+import time
+import errno
+import xattr
+import string
+import random
+import logging
+import tarfile
+import argparse
+
+datsiz = 0
+timr = 0
+
+def get_ascii_upper_alpha_digits():
+ if sys.version_info > (3,0):
+ return string.ascii_uppercase+string.digits
+ else:
+ return string.uppercase+string.digits
+
+def setLogger(filename):
+ global logger
+ logger = logging.getLogger(filename)
+ logger.setLevel(logging.DEBUG)
+ return
+
+
+def setupLogger(filename):
+ logger = logging.getLogger(filename)
+ logger.setLevel(logging.DEBUG)
+ formatter = logging.Formatter('%(asctime)s - %(message)s')
+ ch = logging.StreamHandler()
+ ch.setLevel(logging.INFO)
+ ch.setFormatter(formatter)
+ logger.addHandler(ch)
+ return logger
+
+
+def os_rd(src, size):
+ global datsiz
+ fd = os.open(src, os.O_RDONLY)
+ data = os.read(fd, size)
+ os.close(fd)
+ datsiz = datsiz + size
+ return data
+
+
+def os_wr(dest, data):
+ global timr
+ st = time.time()
+ fd = os.open(dest, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o644)
+ os.write(fd, data)
+ os.close(fd)
+ ed = time.time()
+ timr = timr+(ed-st)
+ return
+
+
+def create_sparse_file(fil, size, mins, maxs, rand):
+ if rand:
+ size = random.randint(mins, maxs)
+ else:
+ size = size
+ data = os_rd("/dev/zero", size)
+ os_wr(fil, data)
+ return
+
+
+def create_binary_file(fil, size, mins, maxs, rand):
+ if rand:
+ size = random.randint(mins, maxs)
+ else:
+ size = size
+ data = os_rd("/dev/urandom", size)
+ os_wr(fil, data)
+ return
+
+
+def create_txt_file(fil, size, mins, maxs, rand):
+ if rand:
+ size = random.randint(mins, maxs)
+ if size < 500*1024:
+ data = os_rd("/etc/services", size)
+ os_wr(fil, data)
+ else:
+ data = os_rd("/etc/services", 512*1024)
+ file_size = 0
+ fd = os.open(fil, os.O_WRONLY | os.O_CREAT | os.O_EXCL, 0o644)
+ while file_size < size:
+ os.write(fd, data)
+ file_size += 500*1024
+ os.close(fd)
+ return
+
+
+def create_tar_file(fil, size, mins, maxs, rand):
+ if rand:
+ size = random.randint(mins, maxs)
+ else:
+ size = size
+ data = os_rd("/dev/urandom", size)
+ os_wr(fil, data)
+ tar = tarfile.open(fil+".tar.gz", "w:gz")
+ tar.add(fil)
+ tar.close()
+ os.unlink(fil)
+ return
+
+
+def get_filename(flen):
+ size = flen
+ char = get_ascii_upper_alpha_digits()
+ st = ''.join(random.choice(char) for i in range(size))
+ ti = str((hex(int(str(time.time()).split('.')[0])))[2:])
+ return ti+"%%"+st
+
+
+def text_files(files, file_count, inter, size, mins, maxs, rand,
+ flen, randname, dir_path):
+ global datsiz, timr
+ for k in range(files):
+ if not file_count % inter:
+ logger.info("Total files created -- "+str(file_count))
+ if not randname:
+ fil = dir_path+"/"+"file"+str(k)
+ else:
+ fil = dir_path+"/"+get_filename(flen)
+ create_txt_file(fil, size, mins, maxs, rand)
+ file_count += 1
+ return file_count
+
+
+def sparse_files(files, file_count, inter, size, mins, maxs,
+ rand, flen, randname, dir_path):
+ for k in range(files):
+ if not file_count % inter:
+ logger.info("Total files created -- "+str(file_count))
+ if not randname:
+ fil = dir_path+"/"+"file"+str(k)
+ else:
+ fil = dir_path+"/"+get_filename(flen)
+ create_sparse_file(fil, size, mins, maxs, rand)
+ file_count += 1
+ return file_count
+
+
+def binary_files(files, file_count, inter, size, mins, maxs,
+ rand, flen, randname, dir_path):
+ for k in range(files):
+ if not file_count % inter:
+ logger.info("Total files created -- "+str(file_count))
+ if not randname:
+ fil = dir_path+"/"+"file"+str(k)
+ else:
+ fil = dir_path+"/"+get_filename(flen)
+ create_binary_file(fil, size, mins, maxs, rand)
+ file_count += 1
+ return file_count
+
+
+def tar_files(files, file_count, inter, size, mins, maxs,
+ rand, flen, randname, dir_path):
+ for k in range(files):
+ if not file_count % inter:
+ logger.info("Total files created -- "+str(file_count))
+ if not randname:
+ fil = dir_path+"/"+"file"+str(k)
+ else:
+ fil = dir_path+"/"+get_filename(flen)
+ create_tar_file(fil, size, mins, maxs, rand)
+ file_count += 1
+ return file_count
+
+
+def setxattr_files(files, randname, dir_path):
+ char = get_ascii_upper_alpha_digits()
+ if not randname:
+ for k in range(files):
+ v = ''.join(random.choice(char) for i in range(10))
+ n = "user."+v
+ xattr.setxattr(dir_path+"/"+"file"+str(k), n, v)
+ else:
+ dirs = os.listdir(dir_path+"/")
+ for fil in dirs:
+ v = ''.join(random.choice(char) for i in range(10))
+ n = "user."+v
+ xattr.setxattr(dir_path+"/"+fil, n, v)
+ return
+
+
+def rename_files(files, flen, randname, dir_path):
+ if not randname:
+ for k in range(files):
+ os.rename(dir_path + "/" + "file" + str(k),
+ dir_path + "/" + "file" + str(files+k))
+ else:
+ dirs = os.listdir(dir_path)
+ for fil in dirs:
+ if not os.path.isdir(fil):
+ newfil = get_filename(flen)
+ os.rename(dir_path + "/" + fil,
+ dir_path + "/" + newfil)
+ return
+
+
+def truncate_files(files, mins, maxs, randname, dir_path):
+ if not randname:
+ for k in range(files):
+ byts = random.randint(mins, maxs)
+ fd = os.open(dir_path + "/" + "file" + str(k), os.O_WRONLY)
+ os.ftruncate(fd, byts)
+ os.close(fd)
+ else:
+ dirs = os.listdir(dir_path)
+ for fil in dirs:
+ if not os.path.isdir(dir_path+"/"+fil):
+ byts = random.randint(mins, maxs)
+ fd = os.open(dir_path+"/"+fil, os.O_WRONLY)
+ os.ftruncate(fd, byts)
+ os.close(fd)
+ return
+
+
+def chmod_files(files, flen, randname, dir_path):
+ if not randname:
+ for k in range(files):
+ mod = random.randint(0, 511)
+ os.chmod(dir_path+"/"+"file"+str(k), mod)
+ else:
+ dirs = os.listdir(dir_path)
+ for fil in dirs:
+ mod = random.randint(0, 511)
+ os.chmod(dir_path+"/"+fil, mod)
+ return
+
+def random_og(path):
+ u = random.randint(1025, 65536)
+ g = -1
+ os.chown(path, u, g)
+
+def chown_files(files, flen, randname, dir_path):
+ if not randname:
+ for k in range(files):
+ random_og(dir_path+"/"+"file"+str(k))
+ else:
+ dirs = os.listdir(dir_path)
+ for fil in dirs:
+ random_og(dir_path+"/"+fil)
+ return
+
+
+def chgrp_files(files, flen, randname, dir_path):
+ if not randname:
+ for k in range(files):
+ random_og(dir_path+"/"+"file"+str(k))
+ else:
+ dirs = os.listdir(dir_path)
+ for fil in dirs:
+ random_og(dir_path+"/"+fil)
+ return
+
+
+def symlink_files(files, flen, randname, dir_path):
+ try:
+ os.makedirs(dir_path+"/"+"symlink_to_files")
+ except OSError as ex:
+ if ex.errno is not errno.EEXIST:
+ raise
+ if not randname:
+ for k in range(files):
+ src_file = "file"+str(k)
+ os.symlink(dir_path+"/"+src_file,
+ dir_path+"/"+"symlink_to_files/file"+str(k)+"_sym")
+ else:
+ dirs = os.listdir(dir_path)
+ for fil in dirs:
+ newfil = get_filename(flen)
+ os.symlink(dir_path+"/"+fil,
+ dir_path+"/"+"symlink_to_files/"+newfil)
+ return
+
+
+def hardlink_files(files, flen, randname, dir_path):
+ try:
+ os.makedirs(dir_path+"/"+"hardlink_to_files")
+ except OSError as ex:
+ if ex.errno is not errno.EEXIST:
+ raise
+ if not randname:
+ for k in range(files):
+ src_file = "file"+str(k)
+ os.link(dir_path+"/"+src_file,
+ dir_path+"/"+"hardlink_to_files/file"+str(k)+"_hard")
+ else:
+ dirs = os.listdir(dir_path)
+ for fil in dirs:
+ if not os.path.isdir(dir_path+"/"+fil):
+ newfil = get_filename(flen)
+ os.link(dir_path+"/"+fil,
+ dir_path+"/"+"hardlink_to_files/"+newfil)
+ return
+
+
+def human2bytes(size):
+ size_short = {
+ 1024: ['K', 'KB', 'KiB', 'k', 'kB', 'kiB'],
+ 1024*1024: ['M', 'MB', 'MiB'],
+ 1024*1024*1024: ['G', 'GB', 'GiB']
+ }
+ num = re.search('(\d+)', size).group()
+ ext = size[len(num):]
+ num = int(num)
+ if ext == '':
+ return num
+ for value, keys in size_short.items():
+ if ext in keys:
+ size = num*value
+ return size
+
+
+def bytes2human(byts):
+ abbr = {
+ 1 << 30: "GB",
+ 1 << 20: "MB",
+ 1 << 10: "KB",
+ 1: "bytes"
+ }
+ if byts == 1:
+ return '1 bytes'
+ for factor, suffix in abbr.items():
+ if byts >= factor:
+ break
+ return "%.3f %s" % (byts / factor, suffix)
+
+
+def multipledir(mnt_pnt, brdth, depth, files, fop, file_type="text",
+ inter="1000", size="100K", mins="10K", maxs="500K",
+ rand=False, l=10, randname=False):
+ files_count = 1
+ size = human2bytes(size)
+ maxs = human2bytes(maxs)
+ mins = human2bytes(mins)
+ for i in range(brdth):
+ dir_path = mnt_pnt
+ for j in range(depth):
+ dir_path = dir_path+"/"+"level"+str(j)+str(i)
+ try:
+ os.makedirs(dir_path)
+ except OSError as ex:
+ if ex.errno is not errno.EEXIST:
+ raise
+
+ if fop == "create":
+ logger.info("Entering the directory level"+str(j)+str(i))
+ if file_type == "text":
+ files_count = text_files(files, files_count, inter, size,
+ mins, maxs, rand, l, randname,
+ dir_path)
+ elif file_type == "sparse":
+ files_count = sparse_files(files, files_count, inter, size,
+ mins, maxs, rand, l, randname,
+ dir_path)
+ elif file_type == "binary":
+ files_count = binary_files(files, files_count, inter, size,
+ mins, maxs, rand, l, randname,
+ dir_path)
+ elif file_type == "tar":
+ files_count = tar_files(files, files_count, inter, size,
+ mins, maxs, rand, l, randname,
+ dir_path)
+ else:
+ logger.error("Not a valid file type")
+ sys.exit(1)
+
+ elif fop == "rename":
+ logger.info("Started renaming files for the files 0 to " +
+ str(files)+" in the directory level"+str(j) +
+ str(i)+" ...")
+ rename_files(files, l, randname, dir_path)
+ logger.info("Finished renaming files for the files 0 to " +
+ str(files)+" in the directory level"+str(j)+str(i))
+
+ elif fop == "chmod":
+ logger.info("Started changing permission of files for the " +
+ "files 0 to "+str(files)+" in the directory level"
+ + str(j)+str(i)+" ...")
+ chmod_files(files, l, randname, dir_path)
+ logger.info("Finished changing permission of files for " +
+ "the files 0 to "+str(files) +
+ " in the directory level"+str(j)+str(i))
+
+ elif fop == "chown":
+ logger.info("Started changing ownership of files for the " +
+ "files 0 to " + str(files) +
+ " in the directory level"+str(j)+str(i)+" ...")
+ chown_files(files, l, randname, dir_path)
+ logger.info("Finished changing ownership of files for " +
+ "the files 0 to "+str(files) +
+ " in the directory level"+str(j)+str(i))
+
+ elif fop == "chgrp":
+ logger.info("Started changing group ownership of files for " +
+ "the files 0 to " + str(files) +
+ " in the directory level"+str(j)+str(i)+" ...")
+ chgrp_files(files, l, randname, dir_path)
+ logger.info("Finished changing group ownership of files for " +
+ "the files 0 to "+str(files) +
+ " in the directory level"+str(j)+str(i))
+
+ elif fop == "symlink":
+ logger.info("Started creating symlink to the files 0 to " +
+ str(files)+" in the directory level" +
+ str(j)+str(i)+"...")
+ symlink_files(files, l, randname, dir_path)
+ logger.info("Finished creating symlink to the files 0 to " +
+ str(files) + " in the directory level" +
+ str(j)+str(i))
+
+ elif fop == "hardlink":
+ logger.info("Started creating hardlink to the files 0 to " +
+ str(files)+" in the directory level" +
+ str(j)+str(i)+"...")
+ hardlink_files(files, l, randname, dir_path)
+ logger.info("Finished creating hardlink to the files 0 to " +
+ str(files) + " in the directory level" +
+ str(j)+str(i))
+
+ elif fop == "truncate":
+ logger.info("Started truncating the files 0 to " +
+ str(files)+" in the directory level" +
+ str(j)+str(i)+"...")
+ truncate_files(files, mins, maxs, randname, dir_path)
+ logger.info("Finished truncating the files 0 to " +
+ str(files)+" in the directory level" +
+ str(j)+str(i))
+
+ elif fop == "setxattr":
+ logger.info("Started setxattr to the files 0 to " +
+ str(files)+" in the directory level" +
+ str(j)+str(i)+"...")
+ setxattr_files(files, randname, dir_path)
+ logger.info("Finished setxattr to the files 0 to " +
+ str(files)+" in the directory level" +
+ str(j)+str(i))
+
+ if fop == "create":
+ thrpt = datsiz / timr
+ logger.info("finished creating files with throughput ---- " +
+ bytes2human(thrpt)+"ps")
+
+
+def singledir(mnt_pnt, files, fop, file_type="text", inter="1000", size="100K",
+ mins="10K", maxs="500K", rand=False, l=10, randname=False):
+
+ files_count = 1
+ size = human2bytes(size)
+ maxs = human2bytes(maxs)
+ mins = human2bytes(mins)
+ if fop == "create":
+ if file_type == "text":
+ files_count = text_files(files, files_count, inter, size, mins,
+ maxs, rand, l, randname, mnt_pnt)
+ elif file_type == "sparse":
+ files_count = sparse_files(files, files_count, inter, size, mins,
+ maxs, rand, l, randname, mnt_pnt)
+ elif file_type == "binary":
+ files_count = binary_files(files, files_count, inter, size, mins,
+ maxs, rand, l, randname, mnt_pnt)
+ elif file_type == "tar":
+ files_count = tar_files(files, files_count, inter, size, mins,
+ maxs, rand, l, randname, mnt_pnt)
+ else:
+ logger.info("Not a valid file type")
+ sys.exit(1)
+ thrpt = datsiz / timr
+ logger.info("finished creating files with avg throughput ---- " +
+ bytes2human(thrpt)+"ps")
+
+ elif fop == "rename":
+ logger.info("Started renaming files for the files 0 to " +
+ str(files) + "...")
+ rename_files(files, l, randname, mnt_pnt)
+ logger.info("Finished renaming files for the files 0 to "+str(files))
+
+ elif fop == "chmod":
+ logger.info("Started changing permission for the files 0 to " +
+ str(files)+" ...")
+ chmod_files(files, l, randname, mnt_pnt)
+ logger.info("Finished changing permission files for the files 0 to " +
+ str(files))
+
+ elif fop == "chown":
+ logger.info("Started changing ownership for the files 0 to " +
+ str(files)+"...")
+ chown_files(files, l, randname, mnt_pnt)
+ logger.info("Finished changing ownership for the files 0 to " +
+ str(files))
+
+ elif fop == "chgrp":
+ logger.info("Started changing group ownership for the files 0 to " +
+ str(files)+"...")
+ chgrp_files(files, l, randname, mnt_pnt)
+ logger.info("Finished changing group ownership for the files 0 to " +
+ str(files))
+
+ elif fop == "symlink":
+ logger.info("Started creating symlink to the files 0 to " +
+ str(files)+"...")
+ symlink_files(files, l, randname, mnt_pnt)
+ logger.info("Finished creating symlink to the files 0 to " +
+ str(files))
+
+ elif fop == "hardlink":
+ logger.info("Started creating hardlink to the files 0 to " +
+ str(files)+"...")
+ hardlink_files(files, l, randname, mnt_pnt)
+ logger.info("Finished creating hardlink to the files 0 to " +
+ str(files))
+
+ elif fop == "truncate":
+ logger.info("Started truncating the files 0 to " + str(files)+"...")
+ truncate_files(files, mins, maxs, randname, mnt_pnt)
+ logger.info("Finished truncating the files 0 to " + str(files))
+
+ elif fop == "setxattr":
+ logger.info("Started setxattr to the files 0 to " + str(files)+"...")
+ setxattr_files(files, randname, mnt_pnt)
+ logger.info("Finished setxattr to the files 0 to " + str(files))
+
+
+if __name__ == '__main__':
+ usage = "usage: %prog [option] <MNT_PT>"
+ parser = argparse.ArgumentParser(formatter_class=argparse.
+ ArgumentDefaultsHelpFormatter)
+ parser.add_argument("-n", dest="files", type=int, default=100,
+ help="number of files in each level ")
+ parser.add_argument("--size", action="store", default="100k",
+ help="size of the files to be used ")
+ parser.add_argument("--random", action="store_true", default=False,
+ help="random size of the file between --min and --max")
+ parser.add_argument("--max", action="store", default="500K",
+ help="maximum size of the files, if random is True")
+ parser.add_argument("--min", action="store", default="10K",
+ help="minimum size of the files, if random is True")
+ parser.add_argument("--single", action="store_true", dest="dir",
+ default=True, help="create files in single directory")
+ parser.add_argument("--multi", action="store_false", dest="dir",
+ help="create files in multiple directories")
+ parser.add_argument("-b", dest="brdth", type=int, default=5,
+ help="number of directories in one level(works " +
+ "with --multi) ")
+ parser.add_argument("-d", dest="depth", type=int, default=5,
+ help="number of levels of directories (works " +
+ "with --multi) ")
+ parser.add_argument("-l", dest="flen", type=int, default=10,
+ help="number of bytes for filename ( Used only when " +
+ "randname is enabled) ")
+ parser.add_argument("-t", action="store", dest="file_type",
+ default="text", choices=["text", "sparse", "binary",
+ "tar"],
+ help="type of the file to be created ()")
+ parser.add_argument("-I", dest="inter", type=int, default=100,
+ help="print number files created of interval")
+ parser.add_argument("--fop", action="store", dest="fop", default="create",
+ choices=["create", "rename", "chmod", "chown", "chgrp",
+ "symlink", "hardlink", "truncate",
+ "setxattr"],
+ help="fop to be performed on the files")
+ parser.add_argument("-R", dest="randname", action="store_false",
+ default=True, help="To disable random file name " +
+ "(default: Enabled)")
+ parser.add_argument("mntpnt", help="Mount point")
+
+ args = parser.parse_args()
+ logger = setupLogger("testlost")
+ args.mntpnt = os.path.abspath(args.mntpnt)
+
+ if args.dir:
+ singledir(args.mntpnt, args.files, args.fop, args.file_type,
+ args.inter, args.size, args.min, args.max,
+ args.random, args.flen, args.randname)
+ else:
+ multipledir(args.mntpnt, args.brdth, args.depth, args.files,
+ args.fop, args.file_type, args.inter, args.size,
+ args.min, args.max, args.random, args.flen,
+ args.randname)
diff --git a/tests/utils/get-mdata-xattr.c b/tests/utils/get-mdata-xattr.c
new file mode 100644
index 00000000000..e9f54717263
--- /dev/null
+++ b/tests/utils/get-mdata-xattr.c
@@ -0,0 +1,152 @@
+/*
+ Copyright (c) 2019 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdlib.h>
+#include <endian.h>
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <errno.h>
+
+typedef struct gf_timespec_disk {
+ uint64_t tv_sec;
+ uint64_t tv_nsec;
+} gf_timespec_disk_t;
+
+/* posix_mdata_t on disk structure */
+typedef struct __attribute__((__packed__)) posix_mdata_disk {
+ /* version of structure, bumped up if any new member is added */
+ uint8_t version;
+ /* flags indicates valid fields in the structure */
+ uint64_t flags;
+ gf_timespec_disk_t ctime;
+ gf_timespec_disk_t mtime;
+ gf_timespec_disk_t atime;
+} posix_mdata_disk_t;
+
+/* In memory representation posix metadata xattr */
+typedef struct {
+ /* version of structure, bumped up if any new member is added */
+ uint8_t version;
+ /* flags indicates valid fields in the structure */
+ uint64_t flags;
+ struct timespec ctime;
+ struct timespec mtime;
+ struct timespec atime;
+} posix_mdata_t;
+
+#define GF_XATTR_MDATA_KEY "trusted.glusterfs.mdata"
+
+/* posix_mdata_from_disk converts posix_mdata_disk_t into host byte order
+ */
+static inline void
+posix_mdata_from_disk(posix_mdata_t *out, posix_mdata_disk_t *in)
+{
+ out->version = in->version;
+ out->flags = be64toh(in->flags);
+
+ out->ctime.tv_sec = be64toh(in->ctime.tv_sec);
+ out->ctime.tv_nsec = be64toh(in->ctime.tv_nsec);
+
+ out->mtime.tv_sec = be64toh(in->mtime.tv_sec);
+ out->mtime.tv_nsec = be64toh(in->mtime.tv_nsec);
+
+ out->atime.tv_sec = be64toh(in->atime.tv_sec);
+ out->atime.tv_nsec = be64toh(in->atime.tv_nsec);
+}
+
+/* posix_fetch_mdata_xattr fetches the posix_mdata_t from disk */
+static int
+posix_fetch_mdata_xattr(const char *real_path, posix_mdata_t *metadata)
+{
+ size_t size = -1;
+ char *value = NULL;
+ char gfid_str[64] = {0};
+
+ char *key = GF_XATTR_MDATA_KEY;
+
+ if (!metadata || !real_path) {
+ goto err;
+ }
+
+ /* Get size */
+ size = lgetxattr(real_path, key, NULL, 0);
+ if (size == -1) {
+ goto err;
+ }
+
+ value = calloc(size + 1, sizeof(char));
+ if (!value) {
+ goto err;
+ }
+
+ /* Get xattr value */
+ size = lgetxattr(real_path, key, value, size);
+ if (size == -1) {
+ goto err;
+ }
+ posix_mdata_from_disk(metadata, (posix_mdata_disk_t *)value);
+
+out:
+ if (value)
+ free(value);
+ return 0;
+err:
+ if (value)
+ free(value);
+ return -1;
+}
+
+int
+main(int argc, char *argv[])
+{
+ posix_mdata_t metadata;
+ uint64_t result;
+
+ if (argc != 3) {
+ /*
+ Usage: get_mdata_xattr -c|-m|-a <file-name>
+ where -c --> ctime
+ -m --> mtime
+ -a --> atime
+ */
+ printf("-1");
+ goto err;
+ }
+
+ if (posix_fetch_mdata_xattr(argv[2], &metadata)) {
+ printf("-1");
+ goto err;
+ }
+
+ switch (argv[1][1]) {
+ case 'c':
+ result = metadata.ctime.tv_sec;
+ break;
+ case 'm':
+ result = metadata.mtime.tv_sec;
+ break;
+ case 'a':
+ result = metadata.atime.tv_sec;
+ break;
+ default:
+ printf("-1");
+ goto err;
+ }
+ printf("%" PRIu64, result);
+ fflush(stdout);
+ return 0;
+err:
+ fflush(stdout);
+ return -1;
+}
diff --git a/tests/utils/getfattr.py b/tests/utils/getfattr.py
new file mode 100755
index 00000000000..3eb40e1c887
--- /dev/null
+++ b/tests/utils/getfattr.py
@@ -0,0 +1,133 @@
+
+from __future__ import print_function
+import os
+import sys
+from optparse import OptionParser
+
+import xattr
+
+def handle_textencoding(attr):
+ ### required for Python's handling of NULL strings.
+ attr_null_replace = (attr.encode('hex').decode('hex')).replace('\x00',
+ '\\000')
+ return attr_null_replace
+
+def getfattr(path, option):
+ attr = xattr.getxattr(path, option.name)
+ encoded_attr = attr
+
+ if option.encoding == "text":
+ ## special case handle it.
+ encoded_attr = handle_textencoding(attr)
+ else:
+ encoded_attr = attr.encode(option.encoding)
+
+ if option.onlyvalues:
+ print (encoded_attr)
+ return
+
+ print_getfattr (path, option, encoded_attr)
+ return
+
+def print_getfattr (path, option, encoded_attr=None):
+ if encoded_attr:
+ if option.encoding == "hex":
+ print(("%s=0x%s" % (option.name, encoded_attr)))
+ elif option.encoding == "base64":
+ print(("%s=0s%s" % (option.name, encoded_attr)))
+ else:
+ print(("%s=\"%s\"" % (option.name, encoded_attr)))
+ else:
+ print(option.name)
+
+ return
+
+def print_header (path, absnames):
+ if absnames:
+ print(("# file: %s" % path))
+ else:
+ print ("getfattr: Removing leading '/' from absolute path names")
+ print(("# file: %s" % path[1:]))
+
+if __name__ == '__main__':
+ usage = "usage: %prog [-n name|-d] [-e en] [-m pattern] path...."
+ parser = OptionParser(usage=usage)
+ parser.add_option("-n", action="store", dest="name", type="string",
+ help="Dump the value of the named extended attribute"
+ " extended attribute.")
+ parser.add_option("-d", action="store_true", dest="dump",
+ help="Dump the values of all extended attributes"
+ " associated with pathname.")
+ parser.add_option("-e", action="store", dest="encoding", type="string",
+ default="base64",
+ help="Encode values after retrieving"
+ " them. Valid values of [en] are `text`, `hex`,"
+ " and `base64`. Values encoded as text strings are"
+ " enclosed in double quotes (\"), while strings"
+ " encoded as hexadecimal and base64 are prefixed with"
+ " 0x and 0s, respectively.")
+ parser.add_option("-m", action="store", dest="pattern", type="string",
+ help="Only include attributes with names matching the"
+ " regular expression pattern. The default value for"
+ " pattern is \"^user\\.\", which includes all the"
+ " attributes in the user namespace. Specify \"-\" for"
+ " including all attributes. Refer to attr(5) for a more"
+ " detailed discussion of namespaces.")
+ parser.add_option("--absolute-names", action="store_true", dest="absnames",
+ help="Do not strip leading slash characters ('/')."
+ " The default behaviour is to strip leading slash characters.")
+ parser.add_option("--only-values", action="store_true", dest="onlyvalues",
+ help="Dump out the raw extended attribute value(s)"
+ " without encoding them.")
+
+ (option, args) = parser.parse_args()
+ if not args:
+ print ("Usage: getfattr [-hRLP] [-n name|-d] [-e en] [-m pattern]"
+ " path...")
+ print ("Try `getfattr --help' for more information.")
+ sys.exit(1)
+
+ if option.dump and option.name:
+ print ("-d and -n are mutually exclusive...")
+ sys.exit(1)
+
+ if option.pattern and option.name:
+ print ("-m and -n are mutually exclusive...")
+ sys.exit(1)
+
+ if option.encoding:
+ if (not (option.encoding.strip() == "hex" or
+ option.encoding.strip() == "base64" or
+ option.encoding.strip() == "text")):
+ print(("unrecognized encoding parameter... %s, please use"
+ " `text`, `base64` or `hex`" % option.encoding))
+ sys.exit(1)
+
+ args[0] = os.path.abspath(args[0])
+
+ if option.name:
+ print_header(args[0], option.absnames)
+ try:
+ getfattr(args[0], option)
+ except KeyError as err:
+ print(("Invalid key %s" % err))
+ sys.exit(1)
+ except IOError as err:
+ print (err)
+ sys.exit(1)
+
+ if option.pattern:
+ print_header(args[0], option.absnames)
+ try:
+ xattrs = xattr.listxattr(args[0])
+ for attr in xattrs:
+ if option.dump:
+ option.name = attr.encode('utf-8')
+ getfattr(args[0], option)
+ else:
+ option.name = attr.encode('utf-8')
+ print_getfattr(args[0], option, None)
+
+ except IOError as err:
+ print (err)
+ sys.exit(1)
diff --git a/tests/utils/gfid-access.py b/tests/utils/gfid-access.py
new file mode 100755
index 00000000000..c35c1223df6
--- /dev/null
+++ b/tests/utils/gfid-access.py
@@ -0,0 +1,124 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+from __future__ import print_function
+import os
+import sys
+import stat
+import time
+import struct
+import random
+import libcxattr
+
+from errno import EEXIST
+
+Xattr = libcxattr.Xattr()
+
+def umask():
+ return os.umask(0)
+
+def _fmt_mknod(l):
+ return "!II%dsI%dsIII" % (37, l+1)
+
+def _fmt_mkdir(l):
+ return "!II%dsI%dsII" % (37, l+1)
+
+def _fmt_symlink(l1, l2):
+ return "!II%dsI%ds%ds" % (37, l1+1, l2+1)
+
+
+if sys.version_info > (3,):
+ def entry_pack_reg(gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(_fmt_mknod(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), 0, umask())
+
+ # mkdir
+ def entry_pack_dir(gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(_fmt_mkdir(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), umask())
+ # symlink
+ def entry_pack_symlink(gf, bn, lnk, st):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ lnk_encoded = lnk.encode()
+ llen = len(lnk_encoded)
+ return struct.pack(_fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf.encode(), st['mode'], bn_encoded,
+ lnk_encoded)
+
+else:
+ def entry_pack_reg(gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(_fmt_mknod(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+
+ def entry_pack_dir(gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(_fmt_mkdir(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), umask())
+
+ def entry_pack_symlink(gf, bn, lnk, mo, uid, gid):
+ blen = len(bn)
+ llen = len(lnk)
+ return struct.pack(_fmt_symlink(blen, llen),
+ uid, gid, gf, mo, bn, lnk)
+
+if __name__ == '__main__':
+ if len(sys.argv) < 9:
+ print(("USAGE: %s <mount> <pargfid|ROOT> <filename> <GFID> <file type>"
+ " <uid> <gid> <file permission(octal str)>" % (sys.argv[0])))
+ sys.exit(-1) # nothing to do
+ mtpt = sys.argv[1]
+ pargfid = sys.argv[2]
+ fname = sys.argv[3]
+ randomgfid = sys.argv[4]
+ ftype = sys.argv[5]
+ uid = int(sys.argv[6])
+ gid = int(sys.argv[7])
+ perm = int(sys.argv[8], 8)
+
+ os.chdir(mtpt)
+ if pargfid == 'ROOT':
+ pargfid = '.gfid/00000000-0000-0000-0000-000000000001'
+ else:
+ pargfid = '.gfid/' + pargfid
+
+ blob = None
+
+ # entry op: use non-zero uid/gid (to catch gfid-access xlator bugs)
+ if ftype == 'file':
+ mode = stat.S_IFREG | perm
+ blob = entry_pack_reg(randomgfid, fname, mode, uid, gid)
+ elif ftype =='dir':
+ mode = stat.S_IFDIR | perm
+ blob = entry_pack_dir(randomgfid, fname, mode, uid, gid)
+ else: # not yet...
+ sys.exit(-1)
+
+ if blob == None:
+ sys.exit(-1)
+ try:
+ Xattr.lsetxattr(pargfid, 'glusterfs.gfid.newfile', blob)
+ except OSError:
+ ex = sys.exc_info()[1]
+ if not ex.errno in [EEXIST]:
+ raise
+ sys.exit(-1)
+ print("File creation OK")
+ sys.exit(0)
diff --git a/tests/utils/libcxattr.py b/tests/utils/libcxattr.py
new file mode 100644
index 00000000000..3f3ed1fffbb
--- /dev/null
+++ b/tests/utils/libcxattr.py
@@ -0,0 +1,108 @@
+#
+# Copyright (c) 2011-2014 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+import os
+import sys
+from ctypes import CDLL, c_int
+from py2py3 import bytearray_to_str, gr_create_string_buffer
+from py2py3 import gr_query_xattr, gr_lsetxattr, gr_lremovexattr
+
+
+class Xattr(object):
+
+ """singleton that wraps the extended attributes system
+ interface for python using ctypes
+
+ Just implement it to the degree we need it, in particular
+ - we need just the l*xattr variants, ie. we never want symlinks to be
+ followed
+ - don't need size discovery for getxattr, as we always know the exact
+ sizes we expect
+ """
+
+ if sys.hexversion >= 0x02060000:
+ from ctypes import DEFAULT_MODE
+ libc = CDLL("libc.so.6", DEFAULT_MODE, None, True)
+ else:
+ libc = CDLL("libc.so.6")
+
+ @classmethod
+ def geterrno(cls):
+ if sys.hexversion >= 0x02060000:
+ from ctypes import get_errno
+ return get_errno()
+ # breaks on NetBSD
+ return c_int.in_dll(cls.libc, 'errno').value
+
+ @classmethod
+ def raise_oserr(cls):
+ errn = cls.geterrno()
+ raise OSError(errn, os.strerror(errn))
+
+ @classmethod
+ def _query_xattr(cls, path, siz, syscall, *a):
+ if siz:
+ buf = gr_create_string_buffer(siz)
+ else:
+ buf = None
+ ret = getattr(cls.libc, syscall)(*((path,) + a + (buf, siz)))
+ if ret == -1:
+ cls.raise_oserr()
+ if siz:
+ # py2 and py3 compatibility. Convert bytes array
+ # to string
+ result = bytearray_to_str(buf.raw)
+ return result[:ret]
+ else:
+ return ret
+
+ @classmethod
+ def lgetxattr(cls, path, attr, siz=0):
+ return gr_query_xattr(cls, path, siz, 'lgetxattr', attr)
+
+ @classmethod
+ def lgetxattr_buf(cls, path, attr):
+ """lgetxattr variant with size discovery"""
+ size = cls.lgetxattr(path, attr)
+ if size == -1:
+ cls.raise_oserr()
+ if size == 0:
+ return ''
+ return cls.lgetxattr(path, attr, size)
+
+ @classmethod
+ def llistxattr(cls, path, siz=0):
+ ret = gr_query_xattr(cls, path, siz, 'llistxattr')
+ if isinstance(ret, str):
+ ret = ret.strip('\0')
+ ret = ret.split('\0') if ret else []
+ return ret
+
+ @classmethod
+ def lsetxattr(cls, path, attr, val):
+ ret = gr_lsetxattr(cls, path, attr, val)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def lremovexattr(cls, path, attr):
+ ret = gr_lremovexattr(cls, path, attr)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def llistxattr_buf(cls, path):
+ """listxattr variant with size discovery"""
+ size = cls.llistxattr(path)
+ if size == -1:
+ cls.raise_oserr()
+ if size == 0:
+ return []
+ return cls.llistxattr(path, size)
diff --git a/tests/utils/pidof.py b/tests/utils/pidof.py
new file mode 100755
index 00000000000..4b7071c0a48
--- /dev/null
+++ b/tests/utils/pidof.py
@@ -0,0 +1,45 @@
+
+from __future__ import print_function
+import sys
+
+try:
+ import psutil
+except ImportError:
+ print("Please install psutil --> pip install psutil")
+ sys.exit(1)
+
+def pmap_find(p, name):
+ for m in p.memory_maps(grouped=True):
+ if m.path.endswith("%s.so" % name):
+ return True
+ continue
+ return False
+
+def pidof(processname):
+ for p in psutil.process_iter():
+ if p.pid == 0:
+ continue
+ if "gluster" in processname:
+ if processname == "glusterd" and pmap_find(p, "glusterd"):
+ print((p.pid))
+ if processname == "glusterfs" and pmap_find(p, "client"):
+ print((p.pid))
+ if processname == "glusterfsd" and pmap_find(p, "posix-acl"):
+ print((p.pid))
+ continue
+ if processname.strip() == p.name():
+ print((p.pid))
+
+def main(argv):
+ if len(argv) < 2:
+ sys.stderr.write("Usage: %s <processname>\n" % (argv[0],))
+ return 1
+ try:
+ pidof(argv[1])
+ except Exception as err:
+ print(err)
+ sys.stderr.write("Please be root - %s\n" % err);
+ sys.exit(1)
+
+if __name__ == "__main__":
+ main(sys.argv)
diff --git a/tests/utils/py2py3.py b/tests/utils/py2py3.py
new file mode 100644
index 00000000000..63aca10fd26
--- /dev/null
+++ b/tests/utils/py2py3.py
@@ -0,0 +1,186 @@
+#
+# Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+# All python2/python3 compatibility routines
+
+import sys
+import os
+import stat
+import struct
+from ctypes import create_string_buffer
+
+def umask():
+ return os.umask(0)
+
+if sys.version_info >= (3,):
+ def pipe():
+ (r, w) = os.pipe()
+ os.set_inheritable(r, True)
+ os.set_inheritable(w, True)
+ return (r, w)
+
+ # Raw conversion of bytearray to string. Used in the cases where
+ # buffer is created by create_string_buffer which is a 8-bit char
+ # array and passed to syscalls to fetch results. Using encode/decode
+ # doesn't work as it converts to string altering the size.
+ def bytearray_to_str(byte_arr):
+ return ''.join([chr(b) for b in byte_arr])
+
+ # Raw conversion of string to bytes. This is required to convert
+ # back the string into bytearray(c char array) to use in struc
+ # pack/unpacking. Again encode/decode can't be used as it
+ # converts it alters size.
+ def str_to_bytearray(string):
+ return bytes([ord(c) for c in string])
+
+ def gr_create_string_buffer(size):
+ return create_string_buffer(b'\0', size)
+
+ def gr_query_xattr(cls, path, size, syscall, attr=None):
+ if attr:
+ return cls._query_xattr(path.encode(), size, syscall,
+ attr.encode())
+ else:
+ return cls._query_xattr(path.encode(), size, syscall)
+
+ def gr_lsetxattr(cls, path, attr, val):
+ return cls.libc.lsetxattr(path.encode(), attr.encode(), val,
+ len(val), 0)
+
+ def gr_lremovexattr(cls, path, attr):
+ return cls.libc.lremovexattr(path.encode(), attr.encode())
+
+ def gr_cl_register(cls, brick, path, log_file, log_level, retries):
+ return cls._get_api('gf_changelog_register')(brick.encode(),
+ path.encode(),
+ log_file.encode(),
+ log_level, retries)
+
+ def gr_cl_done(cls, clfile):
+ return cls._get_api('gf_changelog_done')(clfile.encode())
+
+ def gr_cl_history_changelog(cls, changelog_path, start, end, num_parallel,
+ actual_end):
+ return cls._get_api('gf_history_changelog')(changelog_path.encode(),
+ start, end, num_parallel,
+ actual_end)
+
+ def gr_cl_history_done(cls, clfile):
+ return cls._get_api('gf_history_changelog_done')(clfile.encode())
+
+ # regular file
+
+ def entry_pack_reg(cls, gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(cls._fmt_mknod(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), 0, umask())
+
+ def entry_pack_reg_stat(cls, gf, bn, st):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mknod(blen),
+ st['uid'], st['gid'],
+ gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), 0, umask())
+ # mkdir
+
+ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ return struct.pack(cls._fmt_mkdir(blen),
+ uid, gid, gf.encode(), mo, bn_encoded,
+ stat.S_IMODE(mo), umask())
+ # symlink
+
+ def entry_pack_symlink(cls, gf, bn, lnk, st):
+ bn_encoded = bn.encode()
+ blen = len(bn_encoded)
+ lnk_encoded = lnk.encode()
+ llen = len(lnk_encoded)
+ return struct.pack(cls._fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf.encode(), st['mode'], bn_encoded,
+ lnk_encoded)
+else:
+ def pipe():
+ (r, w) = os.pipe()
+ return (r, w)
+
+ # Raw conversion of bytearray to string
+ def bytearray_to_str(byte_arr):
+ return byte_arr
+
+ # Raw conversion of string to bytearray
+ def str_to_bytearray(string):
+ return string
+
+ def gr_create_string_buffer(size):
+ return create_string_buffer('\0', size)
+
+ def gr_query_xattr(cls, path, size, syscall, attr=None):
+ if attr:
+ return cls._query_xattr(path, size, syscall, attr)
+ else:
+ return cls._query_xattr(path, size, syscall)
+
+ def gr_lsetxattr(cls, path, attr, val):
+ return cls.libc.lsetxattr(path, attr, val, len(val), 0)
+
+ def gr_lremovexattr(cls, path, attr):
+ return cls.libc.lremovexattr(path, attr)
+
+ def gr_cl_register(cls, brick, path, log_file, log_level, retries):
+ return cls._get_api('gf_changelog_register')(brick, path, log_file,
+ log_level, retries)
+
+ def gr_cl_done(cls, clfile):
+ return cls._get_api('gf_changelog_done')(clfile)
+
+ def gr_cl_history_changelog(cls, changelog_path, start, end, num_parallel,
+ actual_end):
+ return cls._get_api('gf_history_changelog')(changelog_path, start, end,
+ num_parallel, actual_end)
+
+ def gr_cl_history_done(cls, clfile):
+ return cls._get_api('gf_history_changelog_done')(clfile)
+
+ # regular file
+
+ def entry_pack_reg(cls, gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(cls._fmt_mknod(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+
+ def entry_pack_reg_stat(cls, gf, bn, st):
+ blen = len(bn)
+ mo = st['mode']
+ return struct.pack(cls._fmt_mknod(blen),
+ st['uid'], st['gid'],
+ gf, mo, bn,
+ stat.S_IMODE(mo), 0, umask())
+ # mkdir
+
+ def entry_pack_mkdir(cls, gf, bn, mo, uid, gid):
+ blen = len(bn)
+ return struct.pack(cls._fmt_mkdir(blen),
+ uid, gid, gf, mo, bn,
+ stat.S_IMODE(mo), umask())
+ # symlink
+
+ def entry_pack_symlink(cls, gf, bn, lnk, st):
+ blen = len(bn)
+ llen = len(lnk)
+ return struct.pack(cls._fmt_symlink(blen, llen),
+ st['uid'], st['gid'],
+ gf, st['mode'], bn, lnk)
diff --git a/tests/utils/setfattr.py b/tests/utils/setfattr.py
new file mode 100755
index 00000000000..8b7b6abacc0
--- /dev/null
+++ b/tests/utils/setfattr.py
@@ -0,0 +1,77 @@
+
+import os
+import sys
+from optparse import OptionParser
+
+import xattr
+
+def convert(string):
+ tmp_string = string
+ if (string[0] == '0' and
+ (string[1] == 's' or
+ string[1] == 'S')):
+ tmp_string = string.strip('%s%s' %
+ (string[0],
+ string[1]))
+ return tmp_string.decode('base64')
+
+ if (string[0] == '0' and
+ (string[1] == 'x' or
+ string[1] == 'X')):
+ tmp_string = string.split('%s%s' %
+ (string[0],
+ string[1]))
+ return tmp_string[1].decode('hex')
+
+ return tmp_string
+
+if __name__ == '__main__':
+ usage = "usage: %prog [-n name] [-v value] [-x name]"
+ parser = OptionParser(usage=usage)
+ parser.add_option("-n", action="store", dest="name", type="string",
+ help="Specifies the name of the extended attribute to set.")
+ parser.add_option("-v", action="store", dest="value", type="string",
+ help="Specifies the new value of the extended attribute."
+ " There are three methods available for encoding the value."
+ " If the given string is enclosed in double quotes, the"
+ " inner string is treated as text. In that case,"
+ " backslashes and double quotes have special meanings"
+ " and need to be escaped by a preceding backslash. Any"
+ " control characters can be encoded as a backslash"
+ " followed by three digits as its ASCII code in octal."
+ " If the given string begins with 0x or 0X, it expresses"
+ " a hexadecimal number. If the given string begins with"
+ " 0s or 0S, base64 encoding is expected.")
+ parser.add_option("-x", action="store", dest="xname", type="string",
+ help="Remove the named extended attribute entirely.")
+
+ (option, args) = parser.parse_args()
+ if not args:
+ print ("Usage: setfattr {-n name} [-v value] file...")
+ print (" setfattr {-x name} file...")
+ print ("Try `setfattr --help' for more information.")
+ sys.exit(1)
+
+ if option.name and option.xname:
+ print ("-n and -x are mutually exclusive...")
+ sys.exit(1)
+
+ if option.name:
+ if option.value is None:
+ print ("-n option requires -v value...")
+
+ args[0] = os.path.abspath(args[0])
+
+ if option.name and option.value:
+ try:
+ xattr.setxattr(args[0], option.name, convert(option.value))
+ except Exception as err:
+ print (err)
+ sys.exit(1)
+
+ if option.xname:
+ try:
+ xattr.removexattr(args[0], option.xname)
+ except Exception as err:
+ print (err)
+ sys.exit(1)
diff --git a/tests/utils/testn.sh b/tests/utils/testn.sh
new file mode 100755
index 00000000000..079351d8529
--- /dev/null
+++ b/tests/utils/testn.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+#
+# Use this script to identify the command and line-number of test-cases.
+#
+
+if [ -z "${1}" -a -z "${2}" ]
+then
+ echo "Usage: ${0} path/to/test/case.t testnumber"
+ exit 1
+elif [ -z "${2}" ]
+then
+ echo "ERROR: The second parameter to ${0} should be a number."
+ exit 2
+fi
+
+awk '{print FNR " " $0}' ${1} | egrep '^[[:digit:]]+[[:space:]]*(EXPECT|TEST|EXPECT_WITHIN|EXPECT_KEYWORD)' | sed -n ${2}p
diff --git a/tests/vagrant/vagrant-template-centos6/Vagrantfile b/tests/vagrant/vagrant-template-centos6/Vagrantfile
new file mode 100644
index 00000000000..b276f90768d
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/Vagrantfile
@@ -0,0 +1,55 @@
+# -*- mode: ruby -*-
+# vi: set ft=ruby :
+
+Vagrant.configure("2") do |config|
+ config.vm.define "vagrant-testVM" do |testvm|
+ testvm.vm.box = "raghavendra-talur/gluster-dev-centos6"
+ testvm.vm.hostname = "vagrant-testVM"
+ #testvm.ssh.insert_key = false
+ testvm.vm.synced_folder ".", "/vagrant", disabled: true
+
+ host = RbConfig::CONFIG['host_os']
+ # Give VM 1/4 system memory & access to all cpu cores on the host
+ if host =~ /darwin/
+ cpus = `sysctl -n hw.ncpu`.to_i
+ # sysctl returns Bytes and we need to convert to MB
+ mem = `sysctl -n hw.memsize`.to_i / 1024 / 1024 / 4
+ elsif host =~ /linux/
+ cpus = `nproc`.to_i
+ # meminfo shows KB and we need to convert to MB
+ mem = `grep 'MemTotal' /proc/meminfo | sed -e 's/MemTotal://' -e 's/ kB//'`.to_i / 1024 / 4
+ else # sorry Windows folks, I can't help you
+ cpus = 2
+ mem = 1024
+ end
+
+ # Define basic config for VM, memory, cpu, storage pool
+ testvm.vm.provider "libvirt" do |lv|
+ lv.storage_pool_name = "default"
+ lv.memory = mem
+ lv.cpus = cpus
+
+
+ # We need a brick partition, lets have a 5G disk for that.
+ # If you need more bricks, just add more letters to the
+ # string below.
+ "b".split("").each do |i|
+ lv.storage :file,
+ #:path => "",
+ #:allow_existing => "",
+ :device => "vd#{i}",
+ :size => "5G",
+ :type => "qcow2",
+ :bus => "virtio",
+ :cache => "default"
+ end
+ end
+
+ # Let's provision
+ testvm.vm.provision "ansible", run: "always" do |setup|
+ setup.verbose = "v"
+ setup.playbook = "setup.yml"
+ end
+
+ end
+end
diff --git a/tests/vagrant/vagrant-template-centos6/roles/daemon-services/tasks/main.yml b/tests/vagrant/vagrant-template-centos6/roles/daemon-services/tasks/main.yml
new file mode 100644
index 00000000000..0e4c83244cc
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/roles/daemon-services/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: stop and disable kernel nfs
+ service: name=nfs state=stopped enabled=no
diff --git a/tests/vagrant/vagrant-template-centos6/roles/fix-localhost/tasks/main.yml b/tests/vagrant/vagrant-template-centos6/roles/fix-localhost/tasks/main.yml
new file mode 100644
index 00000000000..84dd252b65b
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/roles/fix-localhost/tasks/main.yml
@@ -0,0 +1,6 @@
+---
+- name: remove IPv6 address for localhost
+ shell: sed -i '/::1/s/localhost //' /etc/hosts
+
+- name: add IPv4 address for localhost
+ shell: sed -i '/127.0.0.1/s/$/ localhost/' /etc/hosts
diff --git a/tests/vagrant/vagrant-template-centos6/roles/install-pkgs/tasks/main.yml b/tests/vagrant/vagrant-template-centos6/roles/install-pkgs/tasks/main.yml
new file mode 100644
index 00000000000..bf3eff077b4
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/roles/install-pkgs/tasks/main.yml
@@ -0,0 +1,92 @@
+---
+- name: install deltarpm
+ yum: name=deltarpm state=present
+
+- name: remove samba3
+ shell: yum -y remove samba*
+
+- name: update system
+ shell: yum -y update
+
+- name: install epel repo
+ yum: name=http://download.fedoraproject.org/pub/epel/6/x86_64/epel-release-6-8.noarch.rpm state=present
+
+- name: install other packages
+ yum: name={{ item }} state=present
+ with_items:
+ - attr
+ - autoconf
+ - automake
+ - bc
+ - bison
+ #- libcmocka-devel
+ - dbench
+ - dos2unix
+ - e2fsprogs
+ - findutils
+ - flex
+ - fuse-devel
+ - fuse-libs
+ - gcc
+ - gdb
+ - git
+ - glib2-devel
+ #- hostname
+ - libacl-devel
+ - libaio-devel
+ - libattr-devel
+ - libibverbs-devel
+ - librdmacm-devel
+ - libselinux-python
+ - libsemanage-python
+ - libtool
+ - libxml2-devel
+ - lvm2-devel
+ - make
+ #- man-db
+ - mock
+ - net-tools
+ #- nfs-ganesha-gluster
+ - nfs-utils
+ - openssh-server
+ - openssl-devel
+ - perl-Test-Harness
+ - pkgconfig
+ #- procps-ng
+ - psmisc
+ - python-devel
+ - python-eventlet
+ - python-netifaces
+ - python-paste-deploy
+ - python-setuptools
+ - python-simplejson
+ - python-sphinx
+ - python-webob
+ - pyxattr
+ - readline-devel
+ - rpm-build
+ - screen
+ - strace
+ - supervisor
+ - systemtap-sdt-devel
+ - sqlite-devel
+ - samba4*
+ - userspace-rcu-devel
+ - vim
+ - wget
+ - which
+ - xfsprogs
+ - yajl-devel
+
+- name: install dev help packages, not required by Gluster
+ yum: name={{ item }} state=present
+ with_items:
+ - cgdb
+ - clang
+ - lsof
+ - perf
+ - sysstat
+ - systemtap
+ - systemtap-runtime
+ - tcpdump
+ - valgrind
diff --git a/tests/vagrant/vagrant-template-centos6/roles/iptables/tasks/main.yml b/tests/vagrant/vagrant-template-centos6/roles/iptables/tasks/main.yml
new file mode 100644
index 00000000000..768cb0e8668
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/roles/iptables/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: disable iptables, need to add specific rules later
+ shell: iptables -F
diff --git a/tests/vagrant/vagrant-template-centos6/roles/mock-user/tasks/main.yml b/tests/vagrant/vagrant-template-centos6/roles/mock-user/tasks/main.yml
new file mode 100644
index 00000000000..c8e1209937e
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/roles/mock-user/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: Add mock user; required for rpm.t
+ user: name=mock group=mock
diff --git a/tests/vagrant/vagrant-template-centos6/roles/prepare-brick/tasks/main.yml b/tests/vagrant/vagrant-template-centos6/roles/prepare-brick/tasks/main.yml
new file mode 100644
index 00000000000..6b3f6b8d3ea
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/roles/prepare-brick/tasks/main.yml
@@ -0,0 +1,6 @@
+---
+- name: Format backend
+ filesystem: fstype=xfs dev=/dev/vdb
+
+- name: Add entry to fstab and mount
+ mount: name=/d src=/dev/vdb fstype=xfs state=mounted
diff --git a/tests/vagrant/vagrant-template-centos6/roles/remove-gluster-pkgs/tasks/main.yml b/tests/vagrant/vagrant-template-centos6/roles/remove-gluster-pkgs/tasks/main.yml
new file mode 100644
index 00000000000..c91efa9ba7c
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/roles/remove-gluster-pkgs/tasks/main.yml
@@ -0,0 +1,4 @@
+---
+- name: Erase gluster packages, keep dependencies; we will source install
+ shell: rpm -ev --nodeps `rpm -qa | grep ^gluster`
+ ignore_errors: True
diff --git a/tests/vagrant/vagrant-template-centos6/setup.yml b/tests/vagrant/vagrant-template-centos6/setup.yml
new file mode 100644
index 00000000000..520f1cdb019
--- /dev/null
+++ b/tests/vagrant/vagrant-template-centos6/setup.yml
@@ -0,0 +1,15 @@
+---
+- hosts: all
+ sudo: true
+ roles:
+ #Installing packages invoke dnf and metadata download takes a long time.
+ #The box used in Vagrantfile has all the packages installed.
+ #Refer to main.yml file in install-pkgs role to get list of packages.
+ #install-pkgs role is hence disabled by default.
+ #- install-pkgs
+ - remove-gluster-pkgs
+ - prepare-brick
+ - mock-user
+ - iptables
+ - fix-localhost
+ - daemon-services
diff --git a/tests/vagrant/vagrant-template-fedora/Vagrantfile b/tests/vagrant/vagrant-template-fedora/Vagrantfile
new file mode 100644
index 00000000000..df806c7aaee
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/Vagrantfile
@@ -0,0 +1,56 @@
+# -*- mode: ruby -*-
+# vi: set ft=ruby :
+
+Vagrant.configure("2") do |config|
+ config.vm.define "vagrant-testVM" do |testvm|
+ testvm.vm.box = "gluster-dev-fedora"
+ testvm.vm.box_url = "http://download.gluster.org/pub/gluster/glusterfs/vagrant/gluster-dev-fedora/boxes/gluster-dev-fedora.json"
+ testvm.vm.hostname = "vagrant-testVM"
+ #testvm.ssh.insert_key = false
+ testvm.vm.synced_folder ".", "/vagrant", disabled: true
+
+ host = RbConfig::CONFIG['host_os']
+ # Give VM 1/4 system memory & access to all cpu cores on the host
+ if host =~ /darwin/
+ cpus = `sysctl -n hw.ncpu`.to_i
+ # sysctl returns Bytes and we need to convert to MB
+ mem = `sysctl -n hw.memsize`.to_i / 1024 / 1024 / 4
+ elsif host =~ /linux/
+ cpus = `nproc`.to_i
+ # meminfo shows KB and we need to convert to MB
+ mem = `grep 'MemTotal' /proc/meminfo | sed -e 's/MemTotal://' -e 's/ kB//'`.to_i / 1024 / 4
+ else # sorry Windows folks, I can't help you
+ cpus = 2
+ mem = 1024
+ end
+
+ # Define basic config for VM, memory, cpu, storage pool
+ testvm.vm.provider "libvirt" do |lv|
+ lv.storage_pool_name = "default"
+ lv.memory = mem
+ lv.cpus = cpus
+
+
+ # We need a brick partition, lets have a 5G disk for that.
+ # If you need more bricks, just add more letters to the
+ # string below.
+ "b".split("").each do |i|
+ lv.storage :file,
+ #:path => "",
+ #:allow_existing => "",
+ :device => "vd#{i}",
+ :size => "5G",
+ :type => "qcow2",
+ :bus => "virtio",
+ :cache => "default"
+ end
+ end
+
+ # Let's provision
+ testvm.vm.provision "ansible", run: "always" do |setup|
+ setup.verbose = "v"
+ setup.playbook = "setup.yml"
+ end
+
+ end
+end
diff --git a/tests/vagrant/vagrant-template-fedora/roles/daemon-services/tasks/main.yml b/tests/vagrant/vagrant-template-fedora/roles/daemon-services/tasks/main.yml
new file mode 100644
index 00000000000..98d077b1f2e
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/roles/daemon-services/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: stop and disable kernel nfs
+ service: name=nfs-server state=stopped enabled=no
diff --git a/tests/vagrant/vagrant-template-fedora/roles/fix-localhost/tasks/main.yml b/tests/vagrant/vagrant-template-fedora/roles/fix-localhost/tasks/main.yml
new file mode 100644
index 00000000000..84dd252b65b
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/roles/fix-localhost/tasks/main.yml
@@ -0,0 +1,6 @@
+---
+- name: remove IPv6 address for localhost
+ shell: sed -i '/::1/s/localhost //' /etc/hosts
+
+- name: add IPv4 address for localhost
+ shell: sed -i '/127.0.0.1/s/$/ localhost/' /etc/hosts
diff --git a/tests/vagrant/vagrant-template-fedora/roles/install-pkgs/tasks/main.yml b/tests/vagrant/vagrant-template-fedora/roles/install-pkgs/tasks/main.yml
new file mode 100644
index 00000000000..2512034cdd7
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/roles/install-pkgs/tasks/main.yml
@@ -0,0 +1,85 @@
+---
+- name: install deltarpm
+ dnf: name=deltarpm state=present
+
+- name: update system
+ shell: dnf update -y
+
+- name: install other packages
+ dnf: name={{ item }} state=present
+ with_items:
+ - attr
+ - autoconf
+ - automake
+ - bc
+ - bison
+ - libcmocka-devel
+ - cifs-utils
+ - dbench
+ - dos2unix
+ - e2fsprogs
+ - findutils
+ - flex
+ - fuse-devel
+ - fuse-libs
+ - gcc
+ - gdb
+ - git
+ - glib2-devel
+ - hostname
+ - libacl-devel
+ - libaio-devel
+ - libattr-devel
+ - libibverbs-devel
+ - librdmacm-devel
+ - libtool
+ - libxml2-devel
+ - lvm2-devel
+ - make
+ - man-db
+ - mock
+ - net-tools
+ - nfs-ganesha-gluster
+ - nfs-utils
+ - openssh-server
+ - openssl-devel
+ - perl-Test-Harness
+ - pkgconfig
+ - procps-ng
+ - psmisc
+ - python-devel
+ - python-eventlet
+ - python-netifaces
+ - python-paste-deploy
+ - python-setuptools
+ - python-simplejson
+ - python-sphinx
+ - python-webob
+ - pyxattr
+ - readline-devel
+ - rpm-build
+ - screen
+ - strace
+ - supervisor
+ - systemtap-sdt-devel
+ - sqlite-devel
+ - samba*
+ - userspace-rcu-devel
+ - vim
+ - wget
+ - which
+ - xfsprogs
+ - yajl-devel
+
+- name: install dev help packages, not required by Gluster
+ dnf: name={{ item }} state=present
+ with_items:
+ - cgdb
+ - clang
+ - lsof
+ - perf
+ - sysstat
+ - systemtap
+ - systemtap-runtime
+ - tcpdump
+ - valgrind
diff --git a/tests/vagrant/vagrant-template-fedora/roles/iptables/tasks/main.yml b/tests/vagrant/vagrant-template-fedora/roles/iptables/tasks/main.yml
new file mode 100644
index 00000000000..768cb0e8668
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/roles/iptables/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: disable iptables, need to add specific rules later
+ shell: iptables -F
diff --git a/tests/vagrant/vagrant-template-fedora/roles/mock-user/tasks/main.yml b/tests/vagrant/vagrant-template-fedora/roles/mock-user/tasks/main.yml
new file mode 100644
index 00000000000..c8e1209937e
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/roles/mock-user/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: Add mock user; required for rpm.t
+ user: name=mock group=mock
diff --git a/tests/vagrant/vagrant-template-fedora/roles/prepare-brick/tasks/main.yml b/tests/vagrant/vagrant-template-fedora/roles/prepare-brick/tasks/main.yml
new file mode 100644
index 00000000000..6b3f6b8d3ea
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/roles/prepare-brick/tasks/main.yml
@@ -0,0 +1,6 @@
+---
+- name: Format backend
+ filesystem: fstype=xfs dev=/dev/vdb
+
+- name: Add entry to fstab and mount
+ mount: name=/d src=/dev/vdb fstype=xfs state=mounted
diff --git a/tests/vagrant/vagrant-template-fedora/roles/remove-gluster-pkgs/tasks/main.yml b/tests/vagrant/vagrant-template-fedora/roles/remove-gluster-pkgs/tasks/main.yml
new file mode 100644
index 00000000000..c91efa9ba7c
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/roles/remove-gluster-pkgs/tasks/main.yml
@@ -0,0 +1,4 @@
+---
+- name: Erase gluster packages, keep dependencies; we will source install
+ shell: rpm -ev --nodeps `rpm -qa | grep ^gluster`
+ ignore_errors: True
diff --git a/tests/vagrant/vagrant-template-fedora/roles/selinux/tasks/main.yml b/tests/vagrant/vagrant-template-fedora/roles/selinux/tasks/main.yml
new file mode 100644
index 00000000000..c9ba9618428
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/roles/selinux/tasks/main.yml
@@ -0,0 +1,3 @@
+---
+- name: Allow gfapi in Samba to bind to other ports than well known smb ports
+ seboolean: name=samba_load_libgfapi state=yes persistent=yes
diff --git a/tests/vagrant/vagrant-template-fedora/setup.yml b/tests/vagrant/vagrant-template-fedora/setup.yml
new file mode 100644
index 00000000000..fc42a8157f3
--- /dev/null
+++ b/tests/vagrant/vagrant-template-fedora/setup.yml
@@ -0,0 +1,16 @@
+---
+- hosts: all
+ sudo: true
+ roles:
+ #Installing packages invoke dnf and metadata download takes a long time.
+ #The box used in Vagrantfile has all the packages installed.
+ #Refer to main.yml file in install-pkgs role to get list of packages.
+ #install-pkgs role is hence disabled by default.
+ #- install-pkgs
+ - remove-gluster-pkgs
+ - prepare-brick
+ - mock-user
+ - selinux
+ - iptables
+ - fix-localhost
+ - daemon-services
diff --git a/tests/volume.rc b/tests/volume.rc
new file mode 100644
index 00000000000..b38848c0e52
--- /dev/null
+++ b/tests/volume.rc
@@ -0,0 +1,1002 @@
+function volinfo_field()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume info $vol | grep "^$field: " | sed 's/.*: //';
+}
+
+function volume_get_field()
+{
+ local vol=$1
+ local field=$2
+ $CLI volume get $vol $field | tail -1 | awk '{print $2}'
+}
+
+
+function brick_count()
+{
+ local vol=$1;
+
+ $CLI volume info $vol | egrep "^Brick[0-9]+: " | wc -l;
+}
+
+function check_brick_status() {
+ cmd="gluster --xml volume status"
+ local daemon=$1
+
+ if [[ -z $daemon ]]
+ then
+ echo `$cmd | grep '<status>1' | wc -l`
+ else
+ echo `$cmd | grep -A 5 ${daemon} | grep '<status>1' | wc -l`
+ fi
+}
+
+function online_brick_count ()
+{
+ local v1=0
+ local v2=0
+ local v3=0
+ local v4=0
+ local v5=0
+ local tot=0
+
+ #First count total Number of bricks and then subtract daemon status
+ v1=`check_brick_status`
+ v2=`check_brick_status "Self-heal"`
+ v3=`check_brick_status "Quota"`
+ v4=`check_brick_status "Snapshot"`
+ v5=`check_brick_status "Tier"`
+ v6=`check_brick_status "Scrubber"`
+ v7=`check_brick_status "Bitrot"`
+
+ tot=$((v1-v2-v3-v4-v5-v6-v7))
+ echo $tot
+
+}
+
+
+function brick_up_status {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ $CLI volume status $vol $host:$brick --xml | sed -ne 's/.*<status>\([01]\)<\/status>/\1/p'
+}
+
+function volume_option()
+{
+ local vol=$1
+ local key=$2
+ $CLI volume info $vol | egrep "^$key: " | cut -f2 -d' ';
+}
+
+function rebalance_status_field {
+ $CLI volume rebalance $1 status | awk '{print $7}' | sed -n 3p
+}
+
+function fix-layout_status_field {
+ #The fix-layout status can be up to 3 words, (ex:'fix-layout in progress'), hence the awk-print $2 thru $4.
+ #But if the status is less than 3 words, it also prints the next field i.e the run_time_in_secs.(ex:'completed 3.00').
+ #So we trim the numbers out with `tr`. Finally remove the trailing white spaces with sed. What we get is one of the
+ #strings in the 'cli_vol_task_status_str' char array of cli-rpc-ops.c
+
+ $CLI volume rebalance $1 status | awk '{print $2,$3,$4}' |sed -n 3p |tr -d '[^0-9+\.]'|sed 's/ *$//g'
+}
+
+function remove_brick_status_completed_field {
+ local vol=$1
+ local brick_list=$2
+ $CLI volume remove-brick $vol $brick_list status | awk '{print $7}' | sed -n 3p
+}
+
+function get_mount_process_pid {
+ local vol=$1
+ local mnt=$2
+ ps auxww | grep glusterfs | grep -E "volfile-id[ =]/?$vol .*$mnt" | awk '{print $2}' | head -1
+}
+
+function get_nfs_pid ()
+{
+ ps auxww | grep "volfile-id\ gluster\/nfs" | awk '{print $2}' | head -1
+}
+
+function read_nfs_pidfile ()
+{
+ echo `cat $GLUSTERD_PIDFILEDIR/nfs/nfs.pid`
+}
+
+function cleanup_statedump {
+ pid=$1
+ rm -f $statedumpdir/*$pid.dump.*
+ #.vimrc friendly comment */
+}
+
+function wait_statedump_ready {
+ local maxtime="${1}000000000"
+ local pid="$2"
+ local deadline="$(($(date +%s%N) + maxtime))"
+ local fname
+
+ while [[ "$(date +%s%N)" < "$deadline" ]]; do
+ fname="$statedumpdir/$(ls $statedumpdir | grep -E "\.$pid\.dump\.")"
+ if [[ -f "$fname" ]]; then
+ grep "^DUMP-END-TIME" "$fname" >/dev/null
+ if [[ $? -eq 0 ]]; then
+ echo $fname
+ return
+ fi
+ fi
+ sleep 0.1
+ done
+
+ echo "nostatedump"
+}
+
+function generate_statedump {
+ pid=$1
+ #remove old stale statedumps
+ cleanup_statedump $pid
+ kill -USR1 $pid
+ wait_statedump_ready 3 $pid
+}
+
+function generate_mount_statedump {
+ local vol=$1
+ local mnt=$2
+ generate_statedump $(get_mount_process_pid $vol $mnt)
+}
+
+function cleanup_mount_statedump {
+ local vol=$1
+ cleanup_statedump $(get_mount_process_pid $vol)
+}
+
+function snap_client_connected_status {
+ local vol=$1
+ local fpath=$(generate_mount_statedump $vol)
+ up=$(grep -a -A1 xlator.protocol.client.$vol-snapd-client.priv $fpath | tail -1 | cut -f 2 -d'=')
+ rm -f $fpath
+ echo "$up"
+}
+
+function _jbrc_child_up_status {
+ local vol=$1
+ #brick_id is (brick-num in volume info - 1)
+ local brick_id=$2
+ local gen_state_dump=$3
+ local fpath=$($gen_state_dump $vol)
+ up=$(grep -a -B1 child_$brick_id=$vol-client-$brick_id $fpath | head -1 | cut -f2 -d'=')
+ rm -f $fpath
+ echo "$up"
+}
+
+function jbrc_child_up_status {
+ local vol=$1
+ #brick_id is (brick-num in volume info - 1)
+ local brick_id=$2
+ _jbrc_child_up_status $vol $brick_id generate_mount_statedump
+}
+
+function _afr_child_up_status {
+ local vol=$1
+ #brick_id is (brick-num in volume info - 1)
+ local brick_id=$2
+ local gen_state_dump=$3
+ local fpath=$($gen_state_dump $vol)
+ up=$(grep -a -B1 trusted.afr.$vol-client-$brick_id $fpath | head -1 | cut -f2 -d'=')
+ rm -f $fpath
+ echo "$up"
+}
+
+function afr_child_up_status_meta {
+ local mnt=$1
+ local repl=$2
+ local child=$3
+ grep -E "^child_up\[$child\]" $mnt/.meta/graphs/active/$repl/private | awk '{print $3}'
+}
+
+function client_connected_status_meta {
+ local mnt=$1
+ local client=$2
+ grep "connected" $mnt/.meta/graphs/active/$client/private | awk '{print $3}'
+}
+
+function afr_child_up_status {
+ local vol=$1
+ #brick_id is (brick-num in volume info - 1)
+ local brick_id=$2
+ _afr_child_up_status $vol $brick_id generate_mount_statedump
+}
+
+function ec_get_info {
+ local vol=$1
+ local dist_id=$2
+ local key=$3
+ local fpath=$4
+ local value=$(sed -n "/^\[cluster\/disperse\.$vol-disperse-$dist_id\]/,/^\[/{s/^$key=\(.*\)/\1/p;}" $fpath | head -1)
+ rm -f $fpath
+ echo "$value"
+}
+
+function ec_child_up_status {
+ local vol=$1
+ local dist_id=$2
+ local brick_id=$(($3 + 1))
+ local mnt=$4
+ local mask=$(ec_get_info $vol $dist_id "childs_up_mask" $(generate_mount_statedump $vol $mnt))
+ echo "${mask: -$brick_id:1}"
+}
+
+function ec_child_up_count {
+ local vol=$1
+ local dist_id=$2
+ local mnt=$3
+ ec_get_info $vol $dist_id "childs_up" $(generate_mount_statedump $vol $mnt)
+}
+
+function ec_child_up_status_shd {
+ local vol=$1
+ local dist_id=$2
+ local brick_id=$(($3 + 1))
+ local mask=$(ec_get_info $vol $dist_id "childs_up_mask" $(generate_shd_statedump $vol))
+ echo "${mask: -$brick_id:1}"
+}
+
+function ec_child_up_count_shd {
+ local vol=$1
+ local dist_id=$2
+ ec_get_info $vol $dist_id "childs_up" $(generate_shd_statedump $vol)
+}
+
+function get_shd_process_pid {
+ local vol=$1
+ ps auxww | grep "process-name\ glustershd" | awk '{print $2}' | head -1
+}
+
+function generate_shd_statedump {
+ local vol=$1
+ generate_statedump $(get_shd_process_pid $vol)
+}
+
+function generate_nfs_statedump {
+ generate_statedump $(get_nfs_pid)
+}
+
+function generate_brick_statedump {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ generate_statedump $(get_brick_pid $vol $host $brick)
+}
+
+function afr_child_up_status_in_shd {
+ local vol=$1
+ #brick_id is (brick-num in volume info - 1)
+ local brick_id=$2
+ _afr_child_up_status $vol $brick_id generate_shd_statedump
+}
+
+function afr_child_up_status_in_nfs {
+ local vol=$1
+ #brick_id is (brick-num in volume info - 1)
+ local brick_id=$2
+ _afr_child_up_status $vol $brick_id generate_nfs_statedump
+}
+
+function nfs_up_status {
+ gluster volume status | grep "NFS Server" | awk '{print $7}'
+}
+
+function glustershd_up_status {
+ gluster volume status | grep "Self-heal Daemon" | awk '{print $7}'
+}
+
+function quotad_up_status {
+ gluster volume status | grep "Quota Daemon" | awk '{print $7}'
+}
+
+function get_glusterd_pid {
+ pgrep '^glusterd$' | head -1
+}
+
+function get_brick_pidfile {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ local brick_hiphenated=$(echo $brick | tr '/' '-')
+ echo $GLUSTERD_PIDFILEDIR/vols/$vol/${host}${brick_hiphenated}.pid
+}
+
+function get_brick_pid {
+ cat $(get_brick_pidfile $*)
+}
+
+function kill_brick {
+ local vol=$1
+ local host=$2
+ local brick=$3
+
+ local pidfile=$(get_brick_pidfile $vol $host $brick)
+ local cmdline="/proc/$(cat $pidfile)/cmdline"
+ local socket=$(cat $cmdline | tr '\0' '\n' | grep '\.socket$')
+
+ gf_attach -d $socket $brick
+
+ local deadline="$(($(date +%s%N) + ${PROCESS_UP_TIMEOUT}000000000))"
+ while [[ "$(date +%s%N)" < "$deadline" ]]; do
+ if [[ "$(brick_up_status $vol $host $brick)" == "0" ]]; then
+ break
+ fi
+ done
+}
+
+function check_option_help_presence {
+ local option=$1
+ $CLI volume set help | grep "^Option:" | grep -w $option
+}
+
+function afr_get_changelog_xattr {
+ local file=$1
+ local xkey=$2
+ local xval=$(getfattr -n $xkey -e hex $file 2>/dev/null | grep "$xkey" | cut -f2 -d'=')
+ if [ -z $xval ]; then
+ xval="0x000000000000000000000000"
+ fi
+ echo $xval
+}
+
+function get_pending_heal_count {
+ local vol=$1
+ gluster volume heal $vol info | grep "Number of entries" | awk '{ sum+=$4} END {print sum}'
+}
+
+function afr_get_split_brain_count {
+ local vol=$1
+ gluster volume heal $vol info split-brain | grep "Number of entries in split-brain" | awk '{ sum+=$6} END {print sum}'
+}
+
+function afr_get_index_path {
+ local brick_path=$1
+ echo "$brick_path/.glusterfs/indices/xattrop"
+}
+
+function afr_get_num_indices_in_brick {
+ local brick_path=$1
+ echo $(ls $(afr_get_index_path $brick_path) | grep -v xattrop | wc -l)
+}
+
+function gf_get_gfid_xattr {
+ file=$1
+ getfattr -n trusted.gfid -e hex $file 2>/dev/null | grep "trusted.gfid" | cut -f2 -d'='
+}
+
+function gf_gfid_xattr_to_str {
+ xval=$1
+ echo "${xval:2:8}-${xval:10:4}-${xval:14:4}-${xval:18:4}-${xval:22:12}"
+}
+
+function get_text_xattr {
+ local key=$1
+ local path=$2
+ getfattr -h -d -m. -e text $path 2>/dev/null | grep -a $key | cut -f2 -d'='
+}
+
+function get_gfid2path {
+ local path=$1
+ getfattr -h --only-values -n glusterfs.gfidtopath $path 2>/dev/null
+}
+
+function get_mdata {
+ local path=$1
+ getfattr -h -e hex -n trusted.glusterfs.mdata $path 2>/dev/null | grep "trusted.glusterfs.mdata" | cut -f2 -d'='
+}
+
+function get_mdata_count {
+ getfattr -d -m . -e hex $@ 2>/dev/null | grep mdata | wc -l
+}
+
+function get_mdata_uniq_count {
+ getfattr -d -m . -e hex $@ 2>/dev/null | grep mdata | uniq | wc -l
+}
+
+function get_xattr_key {
+ local key=$1
+ local path=$2
+ getfattr -h -d -m. -e text $path 2>/dev/null | grep -a $key | cut -f1 -d'='
+}
+
+function gf_check_file_opened_in_brick {
+ vol=$1
+ host=$2
+ brick=$3
+ realpath=$4
+ ls -l /proc/$(get_brick_pid $vol $host $brick)/fd | grep "${realpath}$" 2>&1 > /dev/null
+ if [ $? -eq 0 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function gf_get_gfid_backend_file_path {
+ brickpath=$1
+ filepath_in_brick=$2
+ gfid=$(gf_get_gfid_xattr "$brickpath/$filepath_in_brick")
+ gfidstr=$(gf_gfid_xattr_to_str $gfid)
+ echo "$brickpath/.glusterfs/${gfidstr:0:2}/${gfidstr:2:2}/$gfidstr"
+}
+
+function gf_rm_file_and_gfid_link {
+ brickpath=$1
+ filepath_in_brick=$2
+ rm -f $(gf_get_gfid_backend_file_path $brickpath $filepath_in_brick)
+ rm -f "$brickpath/$filepath_in_brick"
+}
+
+
+function gd_is_replace_brick_completed {
+ local host=$1
+ local vol=$2
+ local src_brick=$3
+ local dst_brick=$4
+ $CLI volume replace-brick $vol $src_brick $dst_brick status | grep -i "Migration complete"
+ if [ $? -eq 0 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function dht_get_layout {
+ local my_xa=trusted.glusterfs.dht
+ getfattr -d -e hex -n $my_xa $1 2> /dev/null | grep "$my_xa=" | cut -d= -f2
+}
+
+function afr_get_specific_changelog_xattr ()
+{
+ local path=$1
+ local key=$2
+ local type=$3
+ local specific_changelog=""
+
+ changelog_xattr=$(afr_get_changelog_xattr "$path" "$key")
+ if [ "$type" == "data" ]; then
+ specific_changelog=${changelog_xattr:2:8}
+ elif [ "$type" == "metadata" ]; then
+ specific_changelog=${changelog_xattr:10:8}
+ elif [ "$type" == "entry" ]; then
+ specific_changelog=${changelog_xattr:18:8}
+ else
+ specific_changlog="error"
+ fi
+
+ echo $specific_changelog
+}
+##
+ # query pathinfo xattr and extract POSIX pathname(s)
+ ##
+function get_backend_paths {
+ local path=$1
+
+ getfattr -m . -n trusted.glusterfs.pathinfo $path | tr ' ' '\n' | sed -n 's/<POSIX.*:.*:\(.*\)>.*/\1/p'
+}
+
+#Gets the xattr value in hex, also removed 0x in front of the value
+function get_hex_xattr {
+ local key=$1
+ local path=$2
+ getfattr -d -m. -e hex $2 2>/dev/null | grep $1 | cut -f2 -d'=' | cut -f2 -d'x'
+}
+
+function cumulative_stat_count {
+ echo "$1" | grep "Cumulative Stats:" | wc -l
+}
+
+function incremental_stat_count {
+ echo "$1" | grep "Interval$2Stats:" | wc -l
+}
+
+function cleared_stat_count {
+ echo "$1" | grep "Cleared stats." | wc -l
+}
+
+function data_read_count {
+ echo "$1" | grep "Data Read:$2bytes" | wc -l
+}
+
+function data_written_count {
+ echo "$1" | grep "Data Written:$2bytes" | wc -l
+}
+
+function has_holes {
+ if [ $((`stat -c '%b*%B-%s' $1`)) -lt 0 ];
+ then
+ echo "1"
+ else
+ echo "0"
+ fi
+}
+
+function do_volume_operations() {
+ local operation=$1
+ local count=$2
+ local force=$3
+
+ local pids=()
+ local cli
+ local v
+
+ for i in `seq 1 $count`; do
+ cli="CLI_$i"
+ v="V`expr $i - 1`"
+ ${!cli} volume $operation ${!v} $force &
+ pids[$i]=$!
+ done
+
+ for i in `seq 1 $count`; do
+ wait ${pids[$i]}
+ done
+}
+
+function start_volumes() {
+ do_volume_operations start $1
+}
+
+function stop_volumes() {
+ do_volume_operations stop $1
+}
+
+function start_force_volumes() {
+ do_volume_operations start $1 force
+}
+
+function stop_force_volumes() {
+ do_volume_operations stop $1 force
+}
+
+function delete_volumes() {
+ do_volume_operations delete $1
+}
+
+function volume_exists() {
+ $CLI volume info $1 > /dev/null 2>&1
+ if [ $? -eq 0 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function killall_gluster() {
+ terminate_pids $(process_pids gluster)
+ find $GLUSTERD_PIDFILEDIR -name '*.pid' | xargs rm -f
+}
+
+function afr_get_index_count {
+ local brick=$1
+ ls $1/.glusterfs/indices/xattrop | grep -v xattrop | wc -l
+}
+
+function landfill_entry_count {
+ local brick=$1
+ ls $brick/.glusterfs/landfill | wc -l
+}
+
+function path_exists {
+ stat $1
+ if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
+}
+
+function path_size {
+ local size=$(stat -c %s $1)
+ if [ $? -eq 0 ]; then echo $size; else echo ""; fi
+}
+
+function force_umount {
+ ${UMOUNT_F} $*
+ if [ $? -eq 0 ]; then echo "Y"; else echo "N"; fi
+}
+
+function assign_gfid {
+ local gfid=$1
+ local file=$2
+ setfattr -n trusted.gfid -v $1 $2
+}
+
+function get_random_gfid {
+ echo "0x"$(uuidgen | awk -F '-' 'BEGIN {OFS=""} {print $1,$2,$3,$4,$5}')
+}
+
+function volgen_volume_exists {
+ local volfile="$1"
+ local xl_vol="$2"
+ local xl_type="$3"
+ local xl_feature="$4"
+ xl=$(sed -e "/./{H;\$!d;}" -e "x;/volume $xl_vol/!d;/type $xl_type\/$xl_feature/!d" $volfile)
+ if [ -z "$xl" ];
+ then
+ echo "N"
+ else
+ echo "Y"
+ fi
+}
+
+function volgen_volume_option {
+ local volfile="$1"
+ local xl_vol="$2"
+ local xl_type="$3"
+ local xl_feature="$4"
+ local xl_option="$5"
+ sed -e "/./{H;\$!d;}" -e "x;/volume $xl_vol/!d;/type $xl_type\/$xl_feature/!d;/option $xl_option/!d" $volfile | grep " $xl_option " | awk '{print $3}'
+}
+
+function mount_get_option_value {
+ local m=$1
+ local subvol=$2
+ local key=$3
+
+ grep -w "$3" $m/.meta/graphs/active/$subvol/private | awk '{print $3}'
+}
+
+function get_volume_mark {
+ getfattr -n trusted.glusterfs.volume-mark -ehex $1 | sed -n 's/^trusted.glusterfs.volume-mark=0x//p' | cut -b5-36 | sed 's/\([a-f0-9]\{8\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)\([a-f0-9]\{4\}\)/\1-\2-\3-\4-/'
+}
+
+# setup geo-rep in a single a node.
+
+function setup_georep {
+
+ $CLI volume create $GMV0 replica 2 $H0:$B0/${GMV0}{1,2,3,4};
+
+ $CLI volume start $GMV0
+
+ $CLI volume create $GSV0 replica 2 $H0:$B0/${GSV0}{1,2,3,4};
+
+ $CLI volume start $GSV0
+
+ $CLI system:: execute gsec_create
+
+ $CLI volume geo-rep $GMV0 $H0::$GSV0 create push-pem
+
+ $CLI volume geo-rep $GMV0 $H0::$GSV0 start
+
+ sleep 80 # after start geo-rep takes a minute to get stable
+
+}
+
+
+# stop and delete geo-rep session
+
+function cleanup_georep {
+
+ $CLI volume geo-rep $GMV0 $H0::$GSV0 stop
+
+ $CLI volume geo-rep $GMV0 $H0::$GSV0 delete
+}
+
+function num_graphs
+{
+ local mountpoint=$1
+ echo `ls $mountpoint/.meta/graphs/ | grep -v active | wc -l`
+}
+
+function get_aux()
+{
+##Check if a auxiliary mount is there
+local aux_suffix=$1
+local rundir=$(gluster --print-statedumpdir)
+local pidfile="${rundir}/${V0}$aux_suffix.pid"
+if [ -f $pidfile ];
+then
+ local pid=$(cat ${rundir}/${V0}.pid)
+ pidof glusterfs 2>&1 | grep -w $pid > /dev/null
+
+ if [ $? -eq 0 ]
+ then
+ echo "0"
+ else
+ echo "1"
+ fi
+else
+ echo "1"
+fi
+}
+
+function get_list_aux()
+{
+# check for quota list aux mount
+ get_aux "_quota_list"
+}
+
+function get_limit_aux()
+{
+# check for quota list aux mount
+ get_aux "_quota_limit"
+}
+
+function check_for_xattr {
+ local xattr=$1
+ local filepath=$2
+ getfattr -n $xattr $filepath 2>/dev/null | grep "$xattr" | cut -f1 -d'='
+}
+
+function get_bitd_count {
+ ps auxww | grep glusterfs | grep bitd.pid | grep -v grep | wc -l
+}
+
+function get_scrubd_count {
+ ps auxww | grep glusterfs | grep scrub.pid | grep -v grep | wc -l
+}
+
+function get_quarantine_count {
+ ls -l "$1/.glusterfs/quarantine" | wc -l
+}
+
+function get_quotad_count {
+ ps auxww | grep glusterfs | grep quotad.pid | grep -v grep | wc -l
+}
+
+function get_nfs_count {
+ ps auxww | grep glusterfs | grep nfs.pid | grep -v grep | wc -l
+}
+
+function get_snapd_count {
+ ps auxww | grep glusterfs | grep snapd.pid | grep -v grep | wc -l
+}
+
+function drop_cache() {
+ case $OSTYPE in
+ Linux)
+ echo 3 > /proc/sys/vm/drop_caches
+ ;;
+ *)
+ # fail but flush caches
+ ( cd $1 && umount $1 2>/dev/null )
+ ;;
+ esac
+}
+
+function quota_list_field () {
+ local QUOTA_PATH=$1
+ local FIELD=$2
+ local awk_arg="{print \$$FIELD}"
+
+ $CLI volume quota $V0 list $QUOTA_PATH | grep $QUOTA_PATH | awk "$awk_arg"
+}
+
+function quota_object_list_field () {
+ local QUOTA_PATH=$1
+ local FIELD=$2
+ local awk_arg="{print \$$FIELD}"
+
+ $CLI volume quota $V0 list-objects $QUOTA_PATH | grep $QUOTA_PATH | awk "$awk_arg"
+}
+
+function quotausage()
+{
+ quota_list_field $1 4
+}
+
+function quota_hard_limit()
+{
+ quota_list_field $1 2
+}
+
+function quota_soft_limit()
+{
+ quota_list_field $1 3
+}
+
+function quota_sl_exceeded()
+{
+ quota_list_field $1 6
+}
+
+function quota_hl_exceeded()
+{
+ quota_list_field $1 7
+
+}
+
+function quota_object_hard_limit()
+{
+ quota_object_list_field $1 2
+}
+
+function scrub_status()
+{
+ local vol=$1;
+ local field=$2;
+
+ $CLI volume bitrot $vol scrub status | grep "^$field: " | sed 's/.*: //';
+}
+
+function get_gfid_string {
+ local path=$1;
+ getfattr -n glusterfs.gfid.string $1 2>/dev/null \
+ | grep glusterfs.gfid.string | cut -d '"' -f 2
+}
+
+function file_all_zeroes {
+ < $1 tr -d '\0' | read -n 1 || echo 1
+}
+
+function get_hard_link_count {
+ local path=$1;
+ stat -c %h $path
+}
+
+function count_sh_entries()
+{
+ ls $1/.glusterfs/indices/xattrop | grep -v "xattrop-" | wc -l
+}
+
+function check_brick_multiplex() {
+ cnt="$(ls /var/log/glusterfs/bricks|wc -l)"
+ local ret=$($CLI volume info|grep "cluster.brick-multiplex"|cut -d" " -f2)
+ local bcnt="$(brick_count)"
+
+ if [ $bcnt -ne 1 ]; then
+ if [ "$ret" = "on" ] || [ $cnt -eq 1 ]; then
+ echo "Y"
+ else
+ echo "N"
+ fi
+ else
+ echo "N"
+ fi
+}
+
+function get_fd_count {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ local fname=$4
+ local val="$(check_brick_multiplex)"
+ local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
+ local statedump=$(generate_brick_statedump $vol $host $brick)
+ if [ $val == "N" ]; then
+ count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
+ else
+ count=$(grep "${brick}.active.1" -A3 $statedump | grep "gfid=$gfid_str" -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
+ fi
+# If no information is found for a given gfid, it means it has not been
+# accessed, so it doesn't have any open fd. In this case we return 0.
+ count="${count:-0}"
+ rm -f $statedump
+ echo $count
+}
+
+
+function get_active_fd_count {
+ local vol=$1
+ local host=$2
+ local brick=$3
+ local fname=$4
+ local val="$(check_brick_multiplex)"
+ local gfid_str=$(gf_gfid_xattr_to_str $(gf_get_gfid_xattr $brick/$fname))
+ local statedump=$(generate_brick_statedump $vol $host $brick)
+ if [ $val == "N" ]; then
+ count=$(grep "gfid=$gfid_str" $statedump -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
+ else
+ count=$(grep "${brick}.active.1" -A3 $statedump | grep "gfid=$gfid_str" -A2 | grep fd-count | cut -f2 -d'=' | tail -1)
+ fi
+ rm -f $statedump
+ echo $count
+}
+
+function get_mount_active_size_value {
+ local vol=$1
+ local mount=$2
+ local statedump=$(generate_mount_statedump $vol $mount)
+ local val=$(grep "active_size" $statedump | cut -f2 -d'=' | tail -1)
+ rm -f $statedump
+ echo $val
+}
+
+function get_mount_lru_size_value {
+ local vol=$1
+ local mount=$2
+ local statedump=$(generate_mount_statedump $vol $mount)
+ local val=$(grep "lru_size" $statedump | cut -f2 -d'=' | tail -1)
+ rm -f $statedump
+ echo $val
+}
+
+function check_changelog_op {
+ local clog_path=$1
+ local op=$2
+
+ $PYTHON $(dirname $0)/../../utils/changelogparser.py ${clog_path}/CHANGELOG | grep "$op" | wc -l
+}
+
+function processed_changelogs {
+ local processed_dir=$1
+ count=$(ls -l $processed_dir | grep CHANGELOG | wc -l)
+ if [ $count -gt 0 ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function volgen_check_ancestry {
+ #Returns Y if ancestor_xl is an ancestor of $child_xl according to the volfile
+ local volfile="$1"
+
+ local child_xl_type="$2"
+ local child_xl="$3"
+
+ local ancestor_xl_type="$4"
+ local ancestor_xl="$5"
+
+ child_linenum=$(awk '/type $child_xl_type\/$child_xl/ {print FNR}' $volfile)
+ ancestor_linenum=$(awk '/type $ancestor_xl_type\/$ancestor_xl/ {print FNR}' $volfile)
+
+ if [ $child_linenum -lt $ancestor_linenum ];
+ then
+ echo "Y"
+ else
+ echo "N"
+ fi
+}
+
+function get_shd_mux_pid {
+ local volume=$1
+ pid=`$CLI volume status $volume shd | awk '/Self-heal/{print $8}'`
+ echo $pid
+}
+
+function shd_count {
+ ps aux | grep "glustershd" | grep -v grep | wc -l
+}
+
+function number_healer_threads_shd {
+ local pid=$(get_shd_mux_pid $1)
+ pstack $pid | grep $2 | wc -l
+}
+
+function get_mtime {
+ local time=$(get-mdata-xattr -m $1)
+ if [ $time == "-1" ];
+ then
+ echo $(stat -c %Y $1)
+ else
+ echo $time
+ fi
+}
+
+function get_ctime {
+ local time=$(get-mdata-xattr -c $1)
+ if [ $time == "-1" ];
+ then
+ echo $(stat -c %Z $1)
+ else
+ echo $time
+ fi
+}
+
+function get_atime {
+ local time=$(get-mdata-xattr -a $1)
+ if [ $time == "-1" ];
+ then
+ echo $(stat -c %X $1)
+ else
+ echo $time
+ fi
+}
+
+function get-xml()
+{
+ $CLI $1 --xml | xmllint --format - | grep $2 | sed 's/\(<"$2">\|<\/"$2">\)//g'
+}
+
+function logging_time_check()
+{
+ local logdir=$1
+ local logfile=`echo ${0##*/}`_glusterd1.log
+
+ cat $logdir/1/$logfile | tail -n 2 | head -n 1 | grep $(date +%H:%M) | wc -l
+}
diff --git a/tools/Makefile.am b/tools/Makefile.am
new file mode 100644
index 00000000000..5808a3728cd
--- /dev/null
+++ b/tools/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = gfind_missing_files glusterfind setgfid2path
+
+CLEANFILES =
diff --git a/tools/gfind_missing_files/Makefile.am b/tools/gfind_missing_files/Makefile.am
new file mode 100644
index 00000000000..181fe7091f3
--- /dev/null
+++ b/tools/gfind_missing_files/Makefile.am
@@ -0,0 +1,32 @@
+gfindmissingfilesdir = $(GLUSTERFS_LIBEXECDIR)/gfind_missing_files
+
+if WITH_SERVER
+gfindmissingfiles_SCRIPTS = gfind_missing_files.sh gfid_to_path.sh \
+ gfid_to_path.py
+endif
+
+EXTRA_DIST = gfind_missing_files.sh gfid_to_path.sh \
+ gfid_to_path.py
+
+if WITH_SERVER
+gfindmissingfiles_PROGRAMS = gcrawler
+endif
+
+gcrawler_SOURCES = gcrawler.c
+gcrawler_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+gcrawler_LDFLAGS = $(GF_LDFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+if WITH_SERVER
+uninstall-local:
+ rm -f $(DESTDIR)$(sbindir)/gfind_missing_files
+
+install-data-local:
+ rm -f $(DESTDIR)$(sbindir)/gfind_missing_files
+ ln -s $(GLUSTERFS_LIBEXECDIR)/gfind_missing_files/gfind_missing_files.sh $(DESTDIR)$(sbindir)/gfind_missing_files
+endif
+
+CLEANFILES =
diff --git a/tools/gfind_missing_files/gcrawler.c b/tools/gfind_missing_files/gcrawler.c
new file mode 100644
index 00000000000..4acbe92bc8f
--- /dev/null
+++ b/tools/gfind_missing_files/gcrawler.c
@@ -0,0 +1,581 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <dirent.h>
+#include <assert.h>
+#include <glusterfs/locking.h>
+
+#include <glusterfs/compat.h>
+#include <glusterfs/list.h>
+#include <glusterfs/syscall.h>
+
+#define THREAD_MAX 32
+#define BUMP(name) INC(name, 1)
+#define DEFAULT_WORKERS 4
+
+#define NEW(x) \
+ { \
+ x = calloc(1, sizeof(typeof(*x))); \
+ }
+
+#define err(x...) fprintf(stderr, x)
+#define out(x...) fprintf(stdout, x)
+#define dbg(x...) \
+ do { \
+ if (debug) \
+ fprintf(stdout, x); \
+ } while (0)
+#define tout(x...) \
+ do { \
+ out("[%ld] ", pthread_self()); \
+ out(x); \
+ } while (0)
+#define terr(x...) \
+ do { \
+ err("[%ld] ", pthread_self()); \
+ err(x); \
+ } while (0)
+#define tdbg(x...) \
+ do { \
+ dbg("[%ld] ", pthread_self()); \
+ dbg(x); \
+ } while (0)
+
+int debug = 0;
+const char *slavemnt = NULL;
+int workers = 0;
+
+struct stats {
+ unsigned long long int cnt_skipped_gfids;
+};
+
+pthread_spinlock_t stats_lock;
+
+struct stats stats_total;
+int stats = 0;
+
+#define INC(name, val) \
+ do { \
+ if (!stats) \
+ break; \
+ pthread_spin_lock(&stats_lock); \
+ { \
+ stats_total.cnt_##name += val; \
+ } \
+ pthread_spin_unlock(&stats_lock); \
+ } while (0)
+
+void
+stats_dump()
+{
+ if (!stats)
+ return;
+
+ out("-------------------------------------------\n");
+ out("Skipped_Files : %10lld\n", stats_total.cnt_skipped_gfids);
+ out("-------------------------------------------\n");
+}
+
+struct dirjob {
+ struct list_head list;
+
+ char *dirname;
+
+ struct dirjob *parent;
+ int ret; /* final status of this subtree */
+ int refcnt; /* how many dirjobs have this as parent */
+
+ pthread_spinlock_t lock;
+};
+
+struct xwork {
+ pthread_t cthreads[THREAD_MAX]; /* crawler threads */
+ int count;
+ int idle;
+ int stop;
+
+ struct dirjob crawl;
+
+ struct dirjob *rootjob; /* to verify completion in xwork_fini() */
+
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+};
+
+struct dirjob *
+dirjob_ref(struct dirjob *job)
+{
+ pthread_spin_lock(&job->lock);
+ {
+ job->refcnt++;
+ }
+ pthread_spin_unlock(&job->lock);
+
+ return job;
+}
+
+void
+dirjob_free(struct dirjob *job)
+{
+ assert(list_empty(&job->list));
+
+ pthread_spin_destroy(&job->lock);
+ free(job->dirname);
+ free(job);
+}
+
+void
+dirjob_ret(struct dirjob *job, int err)
+{
+ int ret = 0;
+ int refcnt = 0;
+ struct dirjob *parent = NULL;
+
+ pthread_spin_lock(&job->lock);
+ {
+ refcnt = --job->refcnt;
+ job->ret = (job->ret || err);
+ }
+ pthread_spin_unlock(&job->lock);
+
+ if (refcnt == 0) {
+ ret = job->ret;
+
+ if (ret)
+ terr("Failed: %s (%d)\n", job->dirname, ret);
+ else
+ tdbg("Finished: %s\n", job->dirname);
+
+ parent = job->parent;
+ if (parent)
+ dirjob_ret(parent, ret);
+
+ dirjob_free(job);
+ job = NULL;
+ }
+}
+
+struct dirjob *
+dirjob_new(const char *dir, struct dirjob *parent)
+{
+ struct dirjob *job = NULL;
+
+ NEW(job);
+ if (!job)
+ return NULL;
+
+ job->dirname = strdup(dir);
+ if (!job->dirname) {
+ free(job);
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&job->list);
+ pthread_spin_init(&job->lock, PTHREAD_PROCESS_PRIVATE);
+ job->ret = 0;
+
+ if (parent)
+ job->parent = dirjob_ref(parent);
+
+ job->refcnt = 1;
+
+ return job;
+}
+
+void
+xwork_addcrawl(struct xwork *xwork, struct dirjob *job)
+{
+ pthread_mutex_lock(&xwork->mutex);
+ {
+ list_add_tail(&job->list, &xwork->crawl.list);
+ pthread_cond_broadcast(&xwork->cond);
+ }
+ pthread_mutex_unlock(&xwork->mutex);
+}
+
+int
+xwork_add(struct xwork *xwork, const char *dir, struct dirjob *parent)
+{
+ struct dirjob *job = NULL;
+
+ job = dirjob_new(dir, parent);
+ if (!job)
+ return -1;
+
+ xwork_addcrawl(xwork, job);
+
+ return 0;
+}
+
+struct dirjob *
+xwork_pick(struct xwork *xwork, int block)
+{
+ struct dirjob *job = NULL;
+ struct list_head *head = NULL;
+
+ head = &xwork->crawl.list;
+
+ pthread_mutex_lock(&xwork->mutex);
+ {
+ for (;;) {
+ if (xwork->stop)
+ break;
+
+ if (!list_empty(head)) {
+ job = list_entry(head->next, typeof(*job), list);
+ list_del_init(&job->list);
+ break;
+ }
+
+ if (((xwork->count * 2) == xwork->idle) &&
+ list_empty(&xwork->crawl.list)) {
+ /* no outstanding jobs, and no
+ active workers
+ */
+ tdbg("Jobless. Terminating\n");
+ xwork->stop = 1;
+ pthread_cond_broadcast(&xwork->cond);
+ break;
+ }
+
+ if (!block)
+ break;
+
+ xwork->idle++;
+ pthread_cond_wait(&xwork->cond, &xwork->mutex);
+ xwork->idle--;
+ }
+ }
+ pthread_mutex_unlock(&xwork->mutex);
+
+ return job;
+}
+
+int
+skip_name(const char *dirname, const char *name)
+{
+ if (strcmp(name, ".") == 0)
+ return 1;
+
+ if (strcmp(name, "..") == 0)
+ return 1;
+
+ if (strcmp(name, "changelogs") == 0)
+ return 1;
+
+ if (strcmp(name, "health_check") == 0)
+ return 1;
+
+ if (strcmp(name, "indices") == 0)
+ return 1;
+
+ if (strcmp(name, "landfill") == 0)
+ return 1;
+
+ return 0;
+}
+
+int
+skip_stat(struct dirjob *job, const char *name)
+{
+ if (job == NULL)
+ return 0;
+
+ if (strcmp(job->dirname, ".glusterfs") == 0) {
+ tdbg(
+ "Directly adding directories under .glusterfs "
+ "to global list: %s\n",
+ name);
+ return 1;
+ }
+
+ if (job->parent != NULL) {
+ if (strcmp(job->parent->dirname, ".glusterfs") == 0) {
+ tdbg(
+ "Directly adding directories under .glusterfs/XX "
+ "to global list: %s\n",
+ name);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+int
+xworker_do_crawl(struct xwork *xwork, struct dirjob *job)
+{
+ DIR *dirp = NULL;
+ int ret = -1;
+ int boff;
+ int plen;
+ char *path = NULL;
+ struct dirjob *cjob = NULL;
+ struct stat statbuf = {
+ 0,
+ };
+ struct dirent *entry;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char gfid_path[PATH_MAX] = {
+ 0,
+ };
+
+ plen = strlen(job->dirname) + 256 + 2;
+ path = alloca(plen);
+
+ tdbg("Entering: %s\n", job->dirname);
+
+ dirp = sys_opendir(job->dirname);
+ if (!dirp) {
+ terr("opendir failed on %s (%s)\n", job->dirname, strerror(errno));
+ goto out;
+ }
+
+ boff = sprintf(path, "%s/", job->dirname);
+
+ for (;;) {
+ errno = 0;
+ entry = sys_readdir(dirp, scratch);
+ if (!entry || errno != 0) {
+ if (errno != 0) {
+ err("readdir(%s): %s\n", job->dirname, strerror(errno));
+ ret = errno;
+ goto out;
+ }
+ break;
+ }
+
+ if (entry->d_ino == 0)
+ continue;
+
+ if (skip_name(job->dirname, entry->d_name))
+ continue;
+
+ /* It is sure that, children and grandchildren of .glusterfs
+ * are directories, just add them to global queue.
+ */
+ if (skip_stat(job, entry->d_name)) {
+ strncpy(path + boff, entry->d_name, (plen - boff));
+ cjob = dirjob_new(path, job);
+ if (!cjob) {
+ err("dirjob_new(%s): %s\n", path, strerror(errno));
+ ret = -1;
+ goto out;
+ }
+ xwork_addcrawl(xwork, cjob);
+ continue;
+ }
+
+ (void)snprintf(gfid_path, sizeof(gfid_path), "%s/.gfid/%s", slavemnt,
+ entry->d_name);
+ ret = sys_lstat(gfid_path, &statbuf);
+
+ if (ret && errno == ENOENT) {
+ out("%s\n", entry->d_name);
+ BUMP(skipped_gfids);
+ }
+
+ if (ret && errno != ENOENT) {
+ err("stat on slave failed(%s): %s\n", gfid_path, strerror(errno));
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ if (dirp)
+ (void)sys_closedir(dirp);
+
+ return ret;
+}
+
+void *
+xworker_crawl(void *data)
+{
+ struct xwork *xwork = data;
+ struct dirjob *job = NULL;
+ int ret = -1;
+
+ while ((job = xwork_pick(xwork, 0))) {
+ ret = xworker_do_crawl(xwork, job);
+ dirjob_ret(job, ret);
+ }
+
+ return NULL;
+}
+
+int
+xwork_fini(struct xwork *xwork, int stop)
+{
+ int i = 0;
+ int ret = 0;
+ void *tret = 0;
+
+ pthread_mutex_lock(&xwork->mutex);
+ {
+ xwork->stop = (xwork->stop || stop);
+ pthread_cond_broadcast(&xwork->cond);
+ }
+ pthread_mutex_unlock(&xwork->mutex);
+
+ for (i = 0; i < xwork->count; i++) {
+ pthread_join(xwork->cthreads[i], &tret);
+ tdbg("CThread id %ld returned %p\n", xwork->cthreads[i], tret);
+ }
+
+ if (debug) {
+ assert(xwork->rootjob->refcnt == 1);
+ dirjob_ret(xwork->rootjob, 0);
+ }
+
+ if (stats)
+ pthread_spin_destroy(&stats_lock);
+
+ return ret;
+}
+
+int
+xwork_init(struct xwork *xwork, int count)
+{
+ int i = 0;
+ int ret = 0;
+ struct dirjob *rootjob = NULL;
+
+ if (stats)
+ pthread_spin_init(&stats_lock, PTHREAD_PROCESS_PRIVATE);
+
+ pthread_mutex_init(&xwork->mutex, NULL);
+ pthread_cond_init(&xwork->cond, NULL);
+
+ INIT_LIST_HEAD(&xwork->crawl.list);
+
+ rootjob = dirjob_new(".glusterfs", NULL);
+ if (debug)
+ xwork->rootjob = dirjob_ref(rootjob);
+
+ xwork_addcrawl(xwork, rootjob);
+
+ xwork->count = count;
+ for (i = 0; i < count; i++) {
+ ret = pthread_create(&xwork->cthreads[i], NULL, xworker_crawl, xwork);
+ if (ret)
+ break;
+ tdbg("Spawned crawler %d thread %ld\n", i, xwork->cthreads[i]);
+ }
+
+ return ret;
+}
+
+int
+xfind(const char *basedir)
+{
+ struct xwork xwork;
+ int ret = 0;
+ char *cwd = NULL;
+
+ ret = chdir(basedir);
+ if (ret) {
+ err("%s: %s\n", basedir, strerror(errno));
+ return ret;
+ }
+
+ cwd = getcwd(0, 0);
+ if (!cwd) {
+ err("getcwd(): %s\n", strerror(errno));
+ return -1;
+ }
+
+ tdbg("Working directory: %s\n", cwd);
+ free(cwd);
+
+ memset(&xwork, 0, sizeof(xwork));
+
+ ret = xwork_init(&xwork, workers);
+ if (ret == 0)
+ xworker_crawl(&xwork);
+
+ ret = xwork_fini(&xwork, ret);
+ stats_dump();
+
+ return ret;
+}
+
+static char *
+parse_and_validate_args(int argc, char *argv[])
+{
+ char *basedir = NULL;
+ struct stat d = {
+ 0,
+ };
+ int ret = -1;
+#ifndef __FreeBSD__
+ unsigned char volume_id[16];
+#endif /* __FreeBSD__ */
+ char *slv_mnt = NULL;
+
+ if (argc != 4) {
+ err("Usage: %s <DIR> <SLAVE-VOL-MOUNT> <CRAWL-THREAD-COUNT>\n",
+ argv[0]);
+ return NULL;
+ }
+
+ basedir = argv[1];
+ ret = sys_lstat(basedir, &d);
+ if (ret) {
+ err("%s: %s\n", basedir, strerror(errno));
+ return NULL;
+ }
+
+#ifndef __FreeBSD__
+ ret = sys_lgetxattr(basedir, "trusted.glusterfs.volume-id", volume_id, 16);
+ if (ret != 16) {
+ err("%s:Not a valid brick path.\n", basedir);
+ return NULL;
+ }
+#endif /* __FreeBSD__ */
+
+ slv_mnt = argv[2];
+ ret = sys_lstat(slv_mnt, &d);
+ if (ret) {
+ err("%s: %s\n", slv_mnt, strerror(errno));
+ return NULL;
+ }
+ slavemnt = argv[2];
+
+ workers = atoi(argv[3]);
+ if (workers <= 0)
+ workers = DEFAULT_WORKERS;
+
+ return basedir;
+}
+
+int
+main(int argc, char *argv[])
+{
+ char *basedir = NULL;
+
+ basedir = parse_and_validate_args(argc, argv);
+ if (!basedir)
+ return 1;
+
+ xfind(basedir);
+
+ return 0;
+}
diff --git a/tools/gfind_missing_files/gfid_to_path.py b/tools/gfind_missing_files/gfid_to_path.py
new file mode 100644
index 00000000000..01e08a9494a
--- /dev/null
+++ b/tools/gfind_missing_files/gfid_to_path.py
@@ -0,0 +1,162 @@
+#!/usr/bin/python3
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import sys
+import os
+import xattr
+import uuid
+import re
+import errno
+
+CHANGELOG_SEARCH_MAX_TRY = 31
+DEC_CTIME_START = 5
+ROOT_GFID = "00000000-0000-0000-0000-000000000001"
+MAX_NUM_CHANGELOGS_TRY = 2
+
+
+def output_not_found(gfid):
+ # Write GFID to stderr
+ sys.stderr.write("%s\n" % gfid)
+
+
+def output_success(path):
+ # Write converted Path to Stdout
+ sys.stdout.write("%s\n" % path)
+
+
+def full_dir_path(gfid):
+ out_path = ""
+ while True:
+ path = os.path.join(".glusterfs", gfid[0:2], gfid[2:4], gfid)
+ path_readlink = os.readlink(path)
+ pgfid = os.path.dirname(path_readlink)
+ out_path = os.path.join(os.path.basename(path_readlink), out_path)
+ if pgfid == "../../00/00/%s" % ROOT_GFID:
+ out_path = os.path.join("./", out_path)
+ break
+ gfid = os.path.basename(pgfid)
+ return out_path
+
+
+def find_path_from_changelog(fd, gfid):
+ """
+ In given Changelog File, finds using following pattern
+ <T><GFID>\x00<TYPE>\x00<MODE>\x00<UID>\x00<GID>\x00<PARGFID>/<BASENAME>
+ Pattern search finds PARGFID and BASENAME, Convert PARGFID to Path
+ Using readlink and add basename to form Full path.
+ """
+ content = fd.read()
+
+ pattern = "E%s" % gfid
+ pattern += "\x00(3|23)\x00\d+\x00\d+\x00\d+\x00([^\x00]+)/([^\x00]+)"
+ pat = re.compile(pattern)
+ match = pat.search(content)
+
+ if match:
+ pgfid = match.group(2)
+ basename = match.group(3)
+ if pgfid == ROOT_GFID:
+ return os.path.join("./", basename)
+ else:
+ full_path_parent = full_dir_path(pgfid)
+ if full_path_parent:
+ return os.path.join(full_path_parent, basename)
+
+ return None
+
+
+def gfid_to_path(gfid):
+ """
+ Try readlink, if it is directory it succeeds.
+ Get ctime of the GFID file, Decrement by 5 sec
+ Search for Changelog filename, Since Changelog file generated
+ every 15 sec, Search and get immediate next Changelog after the file
+ Creation. Get the Path by searching in Changelog file.
+ Get the resultant file's GFID and Compare with the input, If these
+ GFIDs are different then Some thing is changed(May be Rename)
+ """
+ gfid = gfid.strip()
+ gpath = os.path.join(".glusterfs", gfid[0:2], gfid[2:4], gfid)
+ try:
+ output_success(full_dir_path(gfid))
+ return
+ except OSError:
+ # Not an SymLink
+ pass
+
+ try:
+ ctime = int(os.stat(gpath).st_ctime)
+ ctime -= DEC_CTIME_START
+ except (OSError, IOError):
+ output_not_found(gfid)
+ return
+
+ path = None
+ found_changelog = False
+ changelog_parse_try = 0
+ for i in range(CHANGELOG_SEARCH_MAX_TRY):
+ cl = os.path.join(".glusterfs/changelogs", "CHANGELOG.%s" % ctime)
+
+ try:
+ with open(cl, "rb") as f:
+ changelog_parse_try += 1
+ found_changelog = True
+ path = find_path_from_changelog(f, gfid)
+ if not path and changelog_parse_try < MAX_NUM_CHANGELOGS_TRY:
+ ctime += 1
+ continue
+ break
+ except (IOError, OSError) as e:
+ if e.errno == errno.ENOENT:
+ ctime += 1
+ else:
+ break
+
+ if not found_changelog:
+ output_not_found(gfid)
+ return
+
+ if not path:
+ output_not_found(gfid)
+ return
+ gfid1 = str(uuid.UUID(bytes=xattr.get(path, "trusted.gfid")))
+ if gfid != gfid1:
+ output_not_found(gfid)
+ return
+
+ output_success(path)
+
+
+def main():
+ num_arguments = 3
+ if not sys.stdin.isatty():
+ num_arguments = 2
+
+ if len(sys.argv) != num_arguments:
+ sys.stderr.write("Invalid arguments\nUsage: "
+ "%s <BRICK_PATH> <GFID_FILE>\n" % sys.argv[0])
+ sys.exit(1)
+
+ path = sys.argv[1]
+
+ if sys.stdin.isatty():
+ gfid_list = os.path.abspath(sys.argv[2])
+ os.chdir(path)
+ with open(gfid_list) as f:
+ for gfid in f:
+ gfid_to_path(gfid)
+ else:
+ os.chdir(path)
+ for gfid in sys.stdin:
+ gfid_to_path(gfid)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/gfind_missing_files/gfid_to_path.sh b/tools/gfind_missing_files/gfid_to_path.sh
new file mode 100644
index 00000000000..ebe817ac2f3
--- /dev/null
+++ b/tools/gfind_missing_files/gfid_to_path.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+
+## Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+## This file is part of GlusterFS.
+##
+## This file is licensed to you under your choice of the GNU Lesser
+## General Public License, version 3 or any later version (LGPLv3 or
+## later), or the GNU General Public License, version 2 (GPLv2), in all
+## cases as published by the Free Software Foundation.
+
+E_BADARGS=65
+
+
+gfid_to_path ()
+{
+ brick_dir=$1;
+ gfid_file=$(readlink -e $2);
+
+ current_dir=$(pwd);
+ cd $brick_dir;
+
+ while read gfid
+ do
+ to_search=`echo .glusterfs/${gfid:0:2}"/"${gfid:2:2}"/"$gfid`;
+ find . -samefile $to_search | grep -v $to_search;
+ done < $gfid_file;
+
+ cd $current_dir;
+}
+
+
+main ()
+{
+ if [ $# -ne 2 ]
+ then
+ echo "Usage: `basename $0` BRICK_DIR GFID_FILE";
+ exit $E_BADARGS;
+ fi
+
+ gfid_to_path $1 $2;
+}
+
+main "$@";
diff --git a/tools/gfind_missing_files/gfind_missing_files.sh b/tools/gfind_missing_files/gfind_missing_files.sh
new file mode 100644
index 00000000000..e7aaa0b5dd4
--- /dev/null
+++ b/tools/gfind_missing_files/gfind_missing_files.sh
@@ -0,0 +1,119 @@
+#!/bin/sh
+
+## Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+## This file is part of GlusterFS.
+##
+## This file is licensed to you under your choice of the GNU Lesser
+## General Public License, version 3 or any later version (LGPLv3 or
+## later), or the GNU General Public License, version 2 (GPLv2), in all
+## cases as published by the Free Software Foundation.
+
+BRICKPATH= #Brick path of gluster volume
+SLAVEHOST= #Slave hostname
+SLAVEVOL= #Slave volume
+SLAVEMNT= #Slave gluster volume mount point
+WORKERS=4 #Default number of worker threads
+
+out()
+{
+ echo "$@";
+}
+
+fatal()
+{
+ out FATAL "$@";
+ exit 1
+}
+
+ping_host ()
+{
+ ### Use bash internal socket support
+ {
+ exec 400<>/dev/tcp/$1/$2
+ if [ $? -ne '0' ]; then
+ return 1;
+ else
+ exec 400>&-
+ return 0;
+ fi
+ } 1>&2 2>/dev/null
+}
+
+mount_slave()
+{
+ local i; # inode number
+ SSH_PORT=22
+
+ SLAVEMNT=`mktemp -d`
+ [ "x$SLAVEMNT" = "x" ] && fatal "Could not mktemp directory";
+ [ -d "$SLAVEMNT" ] || fatal "$SLAVEMNT not a directory";
+
+ ping_host ${SLAVEHOST} $SSH_PORT
+ if [ $? -ne 0 ]; then
+ echo "$SLAVEHOST not reachable.";
+ exit 1;
+ fi;
+
+ glusterfs --volfile-id=$SLAVEVOL --aux-gfid-mount --volfile-server=$SLAVEHOST $SLAVEMNT;
+ i=$(stat -c '%i' $SLAVEMNT);
+ [ "x$i" = "x1" ] || fatal "Could not mount volume $2 on $SLAVEMNT Please check host and volume exists";
+}
+
+parse_cli()
+{
+ if [ "$#" -ne 4 ]; then
+ echo "Usage: gfind_missing_files <brick-path> <slave-host> <slave-vol> <OUTFILE>"
+ exit 1
+ else
+ BRICKPATH=$1;
+ SLAVEHOST=$2;
+ SLAVEVOL=$3;
+ OUTFILE=$4;
+
+ mount_slave;
+ echo "Slave volume is mounted at ${SLAVEMNT}"
+ echo
+ fi
+}
+
+main()
+{
+ parse_cli "$@";
+
+ echo "Calling crawler...";
+ path=$(readlink -e $0)
+ $(dirname $path)/gcrawler ${BRICKPATH} ${SLAVEMNT} ${WORKERS} > ${OUTFILE}
+
+ #Clean up the mount
+ umount $SLAVEMNT;
+ rmdir $SLAVEMNT;
+
+ echo "Crawl Complete."
+ num_files_missing=$(wc -l ${OUTFILE} | awk '{print $1}')
+ if [ $num_files_missing -eq 0 ]
+ then
+ echo "Total Missing File Count : 0"
+ exit 0;
+ fi
+
+ echo "gfids of skipped files are available in the file ${OUTFILE}"
+ echo
+ echo "Starting gfid to path conversion"
+
+ #Call python script to convert gfids to full pathname
+ INFILE=$(readlink -e ${OUTFILE})
+ python $(dirname $path)/gfid_to_path.py ${BRICKPATH} ${INFILE} 1> ${OUTFILE}_pathnames 2> ${OUTFILE}_gfids
+ echo "Path names of skipped files are available in the file ${OUTFILE}_pathnames"
+
+ gfid_to_path_failures=$(wc -l ${OUTFILE}_gfids | awk '{print $1}')
+ if [ $gfid_to_path_failures -gt 0 ]
+ then
+ echo "WARNING: Unable to convert some GFIDs to Paths, GFIDs logged to ${OUTFILE}_gfids"
+ echo "Use $(dirname $path)/gfid_to_path.sh <brick-path> ${OUTFILE}_gfids to convert those GFIDs to Path"
+ fi
+
+ #Output
+ echo "Total Missing File Count : $(wc -l ${OUTFILE} | awk '{print $1}')"
+}
+
+main "$@";
diff --git a/tools/glusterfind/Makefile.am b/tools/glusterfind/Makefile.am
new file mode 100644
index 00000000000..f17dbdb228e
--- /dev/null
+++ b/tools/glusterfind/Makefile.am
@@ -0,0 +1,24 @@
+SUBDIRS = src
+
+EXTRA_DIST = S57glusterfind-delete-post.py glusterfind
+
+if WITH_SERVER
+bin_SCRIPTS = glusterfind
+endif
+
+CLEANFILES = $(bin_SCRIPTS)
+
+if WITH_SERVER
+deletehookscriptsdir = $(GLUSTERFS_LIBEXECDIR)/glusterfind/
+deletehookscripts_SCRIPTS = S57glusterfind-delete-post.py
+
+uninstall-local:
+ rm -f $(DESTDIR)$(GLUSTERD_WORKDIR)/hooks/1/delete/post/S57glusterfind-delete-post
+
+install-data-local:
+ $(mkdir_p) $(DESTDIR)$(GLUSTERD_WORKDIR)/glusterfind/.keys
+ $(mkdir_p) $(DESTDIR)$(GLUSTERD_WORKDIR)/hooks/1/delete/post/
+ rm -f $(DESTDIR)$(GLUSTERD_WORKDIR)/hooks/1/delete/post/S57glusterfind-delete-post
+ ln -s $(GLUSTERFS_LIBEXECDIR)/glusterfind/S57glusterfind-delete-post.py \
+ $(DESTDIR)$(GLUSTERD_WORKDIR)/hooks/1/delete/post/S57glusterfind-delete-post
+endif
diff --git a/tools/glusterfind/S57glusterfind-delete-post.py b/tools/glusterfind/S57glusterfind-delete-post.py
new file mode 100755
index 00000000000..5beece220f0
--- /dev/null
+++ b/tools/glusterfind/S57glusterfind-delete-post.py
@@ -0,0 +1,69 @@
+#!/usr/bin/python3
+import os
+import shutil
+from errno import ENOENT
+from subprocess import Popen, PIPE
+from argparse import ArgumentParser
+
+
+DEFAULT_GLUSTERD_WORKDIR = "/var/lib/glusterd"
+
+
+def handle_rm_error(func, path, exc_info):
+ if exc_info[1].errno == ENOENT:
+ return
+
+ raise exc_info[1]
+
+
+def get_glusterd_workdir():
+ p = Popen(["gluster", "system::", "getwd"],
+ stdout=PIPE, stderr=PIPE, universal_newlines=True)
+
+ out, _ = p.communicate()
+
+ if p.returncode == 0:
+ return out.strip()
+ else:
+ return DEFAULT_GLUSTERD_WORKDIR
+
+
+def get_args():
+ parser = ArgumentParser(description="Volume delete post hook script")
+ parser.add_argument("--volname")
+ return parser.parse_args()
+
+
+def main():
+ args = get_args()
+ glusterfind_dir = os.path.join(get_glusterd_workdir(), "glusterfind")
+
+ # Check all session directories, if any directory found for
+ # the deleted volume, cleanup all the session directories
+ try:
+ ls_glusterfind_dir = os.listdir(glusterfind_dir)
+ except OSError:
+ ls_glusterfind_dir = []
+
+ for session in ls_glusterfind_dir:
+ # don't blow away the keys directory
+ if session == ".keys":
+ continue
+
+ # Possible session directory
+ volume_session_path = os.path.join(glusterfind_dir,
+ session,
+ args.volname)
+ if os.path.exists(volume_session_path):
+ shutil.rmtree(volume_session_path, onerror=handle_rm_error)
+
+ # Try to Remove directory, if any other dir exists for different
+ # volume, then rmdir will fail with ENOTEMPTY which is fine
+ try:
+ os.rmdir(os.path.join(glusterfind_dir, session))
+ except (OSError, IOError):
+ pass
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/glusterfind/glusterfind.in b/tools/glusterfind/glusterfind.in
new file mode 100644
index 00000000000..ca154b625dd
--- /dev/null
+++ b/tools/glusterfind/glusterfind.in
@@ -0,0 +1,18 @@
+#!/usr/bin/python3
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import sys
+sys.path.insert(1, '@GLUSTERFS_LIBEXECDIR@/')
+sys.path.insert(1, '@GLUSTERFS_LIBEXECDIR@/glusterfind')
+
+from glusterfind.main import main
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/glusterfind/src/Makefile.am b/tools/glusterfind/src/Makefile.am
new file mode 100644
index 00000000000..43b6141b01c
--- /dev/null
+++ b/tools/glusterfind/src/Makefile.am
@@ -0,0 +1,16 @@
+glusterfinddir = $(GLUSTERFS_LIBEXECDIR)/glusterfind
+
+if WITH_SERVER
+glusterfind_PYTHON = conf.py utils.py __init__.py \
+ main.py libgfchangelog.py changelogdata.py gfind_py2py3.py
+
+glusterfind_SCRIPTS = changelog.py nodeagent.py \
+ brickfind.py
+
+glusterfind_DATA = tool.conf
+endif
+
+EXTRA_DIST = changelog.py nodeagent.py brickfind.py \
+ tool.conf changelogdata.py
+
+CLEANFILES =
diff --git a/tools/glusterfind/src/__init__.py b/tools/glusterfind/src/__init__.py
new file mode 100644
index 00000000000..1753698b5fa
--- /dev/null
+++ b/tools/glusterfind/src/__init__.py
@@ -0,0 +1,9 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
diff --git a/tools/glusterfind/src/brickfind.py b/tools/glusterfind/src/brickfind.py
new file mode 100644
index 00000000000..73b6350188d
--- /dev/null
+++ b/tools/glusterfind/src/brickfind.py
@@ -0,0 +1,118 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import os
+import sys
+import logging
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+try:
+ import urllib.parse as urllib
+except ImportError:
+ import urllib
+import time
+
+from utils import mkdirp, setup_logger, create_file, output_write, find
+import conf
+
+
+PROG_DESCRIPTION = """
+Changelog Crawler
+"""
+
+logger = logging.getLogger()
+
+
+def brickfind_crawl(brick, args):
+ if brick.endswith("/"):
+ brick = brick[0:len(brick)-1]
+
+ working_dir = os.path.dirname(args.outfile)
+ mkdirp(working_dir, exit_on_err=True, logger=logger)
+ create_file(args.outfile, exit_on_err=True, logger=logger)
+
+ with open(args.outfile, "a+") as fout:
+ brick_path_len = len(brick)
+
+ def output_callback(path, filter_result, is_dir):
+ path = path.strip()
+ path = path[brick_path_len+1:]
+
+ if args.type == "both":
+ output_write(fout, path, args.output_prefix,
+ encode=(not args.no_encode), tag=args.tag,
+ field_separator=args.field_separator)
+ else:
+ if (is_dir and args.type == "d") or (
+ (not is_dir) and args.type == "f"):
+ output_write(fout, path, args.output_prefix,
+ encode=(not args.no_encode), tag=args.tag,
+ field_separator=args.field_separator)
+
+ ignore_dirs = [os.path.join(brick, dirname)
+ for dirname in
+ conf.get_opt("brick_ignore_dirs").split(",")]
+
+ find(brick, callback_func=output_callback,
+ ignore_dirs=ignore_dirs)
+
+ fout.flush()
+ os.fsync(fout.fileno())
+
+
+def _get_args():
+ parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+ description=PROG_DESCRIPTION)
+
+ parser.add_argument("session", help="Session Name")
+ parser.add_argument("volume", help="Volume Name")
+ parser.add_argument("node", help="Node Name")
+ parser.add_argument("brick", help="Brick Name")
+ parser.add_argument("outfile", help="Output File")
+ parser.add_argument("tag", help="Tag to prefix file name with")
+ parser.add_argument("--only-query", help="Only query, No session update",
+ action="store_true")
+ parser.add_argument("--debug", help="Debug", action="store_true")
+ parser.add_argument("--no-encode",
+ help="Do not encode path in outfile",
+ action="store_true")
+ parser.add_argument("--output-prefix", help="File prefix in output",
+ default=".")
+ parser.add_argument('--type', help="type: f, f-files only"
+ " d, d-directories only, by default = both",
+ default='both')
+ parser.add_argument("--field-separator", help="Field separator",
+ default=" ")
+
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = _get_args()
+ session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
+ status_file = os.path.join(session_dir, args.volume,
+ "%s.status" % urllib.quote_plus(args.brick))
+ status_file_pre = status_file + ".pre"
+ mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
+ logger=logger)
+ mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
+ exit_on_err=True)
+ log_file = os.path.join(conf.get_opt("log_dir"),
+ args.session,
+ args.volume,
+ "brickfind.log")
+ setup_logger(logger, log_file, args.debug)
+
+ time_to_update = int(time.time())
+ brickfind_crawl(args.brick, args)
+ if not args.only_query:
+ with open(status_file_pre, "w") as f:
+ f.write(str(time_to_update))
+ sys.exit(0)
diff --git a/tools/glusterfind/src/changelog.py b/tools/glusterfind/src/changelog.py
new file mode 100644
index 00000000000..a5e9ea4288f
--- /dev/null
+++ b/tools/glusterfind/src/changelog.py
@@ -0,0 +1,469 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import os
+import sys
+import time
+import xattr
+import logging
+from gfind_py2py3 import bytearray_to_str
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+import hashlib
+try:
+ import urllib.parse as urllib
+except ImportError:
+ import urllib
+import codecs
+
+import libgfchangelog
+from utils import mkdirp, symlink_gfid_to_path
+from utils import fail, setup_logger, find
+from utils import get_changelog_rollover_time
+from utils import output_path_prepare
+from changelogdata import ChangelogData
+import conf
+
+
+CHANGELOG_LOG_LEVEL = 9
+CHANGELOG_CONN_RETRIES = 5
+CHANGELOGAPI_NUM_WORKERS = 3
+PROG_DESCRIPTION = """
+Changelog Crawler
+"""
+history_turns = 0
+history_turn_time = 0
+
+logger = logging.getLogger()
+
+
+def pgfid_to_path(brick, changelog_data):
+ """
+ For all the pgfids in table, converts into path using recursive
+ readlink.
+ """
+ # pgfid1 to path1 in case of CREATE/MKNOD/MKDIR/LINK/SYMLINK
+ for row in changelog_data.gfidpath_get_distinct("pgfid1", {"path1": ""}):
+ # In case of Data/Metadata only, pgfid1 will not be there
+ if row[0] == "":
+ continue
+
+ try:
+ path = symlink_gfid_to_path(brick, row[0])
+ path = output_path_prepare(path, args)
+ changelog_data.gfidpath_set_path1(path, row[0])
+ except (IOError, OSError) as e:
+ logger.warn("Error converting to path: %s" % e)
+ continue
+
+ # pgfid2 to path2 in case of RENAME
+ for row in changelog_data.gfidpath_get_distinct("pgfid2",
+ {"type": "RENAME",
+ "path2": ""}):
+ # Only in case of Rename pgfid2 exists
+ if row[0] == "":
+ continue
+
+ try:
+ path = symlink_gfid_to_path(brick, row[0])
+ path = output_path_prepare(path, args)
+ changelog_data.gfidpath_set_path2(path, row[0])
+ except (IOError, OSError) as e:
+ logger.warn("Error converting to path: %s" % e)
+ continue
+
+
+def populate_pgfid_and_inodegfid(brick, changelog_data):
+ """
+ For all the DATA/METADATA modifications GFID,
+ If symlink, directly convert to Path using Readlink.
+ If not symlink, try to get PGFIDs via xattr query and populate it
+ to pgfid table, collect inodes in inodegfid table
+ """
+ for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}):
+ gfid = row[3].strip()
+ p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid)
+ if os.path.islink(p):
+ # It is a Directory if GFID backend path is symlink
+ try:
+ path = symlink_gfid_to_path(brick, gfid)
+ path = output_path_prepare(path, args)
+ changelog_data.gfidpath_update({"path1": path},
+ {"gfid": gfid})
+ except (IOError, OSError) as e:
+ logger.warn("Error converting to path: %s" % e)
+ continue
+ else:
+ try:
+ # INODE and GFID to inodegfid table
+ changelog_data.inodegfid_add(os.stat(p).st_ino, gfid)
+ file_xattrs = xattr.list(p)
+ for x in file_xattrs:
+ x_str = bytearray_to_str(x)
+ if x_str.startswith("trusted.pgfid."):
+ # PGFID in pgfid table
+ changelog_data.pgfid_add(x_str.split(".")[-1])
+ except (IOError, OSError):
+ # All OS Errors ignored, since failures will be logged
+ # in End. All GFIDs present in gfidpath table
+ continue
+
+
+def enum_hard_links_using_gfid2path(brick, gfid, args):
+ hardlinks = []
+ p = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid)
+ if not os.path.isdir(p):
+ # we have a symlink or a normal file
+ try:
+ file_xattrs = xattr.list(p)
+ for x in file_xattrs:
+ x_str = bytearray_to_str(x)
+ if x_str.startswith("trusted.gfid2path."):
+ # get the value for the xattr i.e. <PGFID>/<BN>
+ v = xattr.getxattr(p, x_str)
+ v_str = bytearray_to_str(v)
+ pgfid, bn = v_str.split(os.sep)
+ try:
+ path = symlink_gfid_to_path(brick, pgfid)
+ fullpath = os.path.join(path, bn)
+ fullpath = output_path_prepare(fullpath, args)
+ hardlinks.append(fullpath)
+ except (IOError, OSError) as e:
+ logger.warn("Error converting to path: %s" % e)
+ continue
+ except (IOError, OSError):
+ pass
+ return hardlinks
+
+
+def gfid_to_all_paths_using_gfid2path(brick, changelog_data, args):
+ path = ""
+ for row in changelog_data.gfidpath_get({"path1": "", "type": "MODIFY"}):
+ gfid = row[3].strip()
+ logger.debug("Processing gfid %s" % gfid)
+ hardlinks = enum_hard_links_using_gfid2path(brick, gfid, args)
+
+ path = ",".join(hardlinks)
+
+ changelog_data.gfidpath_update({"path1": path}, {"gfid": gfid})
+
+
+def gfid_to_path_using_pgfid(brick, changelog_data, args):
+ """
+ For all the pgfids collected, Converts to Path and
+ does readdir on those directories and looks up inodegfid
+ table for matching inode number.
+ """
+ populate_pgfid_and_inodegfid(brick, changelog_data)
+
+ # If no GFIDs needs conversion to Path
+ if not changelog_data.inodegfid_exists({"converted": 0}):
+ return
+
+ def inode_filter(path):
+ # Looks in inodegfid table, if exists returns
+ # inode number else None
+ try:
+ st = os.lstat(path)
+ except (OSError, IOError):
+ st = None
+
+ if st and changelog_data.inodegfid_exists({"inode": st.st_ino}):
+ return st.st_ino
+
+ return None
+
+ # Length of brick path, to remove from output path
+ brick_path_len = len(brick)
+
+ def output_callback(path, inode):
+ # For each path found, encodes it and updates path1
+ # Also updates converted flag in inodegfid table as 1
+ path = path.strip()
+ path = path[brick_path_len+1:]
+
+ path = output_path_prepare(path, args)
+
+ changelog_data.append_path1(path, inode)
+ changelog_data.inodegfid_update({"converted": 1}, {"inode": inode})
+
+ ignore_dirs = [os.path.join(brick, dirname)
+ for dirname in
+ conf.get_opt("brick_ignore_dirs").split(",")]
+
+ for row in changelog_data.pgfid_get():
+ try:
+ path = symlink_gfid_to_path(brick, row[0])
+ find(os.path.join(brick, path),
+ callback_func=output_callback,
+ filter_func=inode_filter,
+ ignore_dirs=ignore_dirs,
+ subdirs_crawl=False)
+ except (IOError, OSError) as e:
+ logger.warn("Error converting to path: %s" % e)
+ continue
+
+
+def gfid_to_path_using_batchfind(brick, changelog_data):
+ # If all the GFIDs converted using gfid_to_path_using_pgfid
+ if not changelog_data.inodegfid_exists({"converted": 0}):
+ return
+
+ def inode_filter(path):
+ # Looks in inodegfid table, if exists returns
+ # inode number else None
+ try:
+ st = os.lstat(path)
+ except (OSError, IOError):
+ st = None
+
+ if st and changelog_data.inodegfid_exists({"inode": st.st_ino}):
+ return st.st_ino
+
+ return None
+
+ # Length of brick path, to remove from output path
+ brick_path_len = len(brick)
+
+ def output_callback(path, inode):
+ # For each path found, encodes it and updates path1
+ # Also updates converted flag in inodegfid table as 1
+ path = path.strip()
+ path = path[brick_path_len+1:]
+ path = output_path_prepare(path, args)
+
+ changelog_data.append_path1(path, inode)
+
+ ignore_dirs = [os.path.join(brick, dirname)
+ for dirname in
+ conf.get_opt("brick_ignore_dirs").split(",")]
+
+ # Full Namespace Crawl
+ find(brick, callback_func=output_callback,
+ filter_func=inode_filter,
+ ignore_dirs=ignore_dirs)
+
+
+def parse_changelog_to_db(changelog_data, filename, args):
+ """
+ Parses a Changelog file and populates data in gfidpath table
+ """
+ with codecs.open(filename, encoding="utf-8") as f:
+ changelogfile = os.path.basename(filename)
+ for line in f:
+ data = line.strip().split(" ")
+ if data[0] == "E" and data[2] in ["CREATE", "MKNOD", "MKDIR"]:
+ # CREATE/MKDIR/MKNOD
+ changelog_data.when_create_mknod_mkdir(changelogfile, data)
+ elif data[0] in ["D", "M"]:
+ # DATA/META
+ if not args.only_namespace_changes:
+ changelog_data.when_data_meta(changelogfile, data)
+ elif data[0] == "E" and data[2] in ["LINK", "SYMLINK"]:
+ # LINK/SYMLINK
+ changelog_data.when_link_symlink(changelogfile, data)
+ elif data[0] == "E" and data[2] == "RENAME":
+ # RENAME
+ changelog_data.when_rename(changelogfile, data)
+ elif data[0] == "E" and data[2] in ["UNLINK", "RMDIR"]:
+ # UNLINK/RMDIR
+ changelog_data.when_unlink_rmdir(changelogfile, data)
+
+
+def get_changes(brick, hash_dir, log_file, start, end, args):
+ """
+ Makes use of libgfchangelog's history API to get changelogs
+ containing changes from start and end time. Further collects
+ the modified gfids from the changelogs and writes the list
+ of gfid to 'gfid_list' file.
+ """
+ session_dir = os.path.join(conf.get_opt("session_dir"),
+ args.session)
+ status_file = os.path.join(session_dir, args.volume,
+ "%s.status" % urllib.quote_plus(args.brick))
+
+ # Get previous session
+ try:
+ with open(status_file) as f:
+ start = int(f.read().strip())
+ except (ValueError, OSError, IOError):
+ start = args.start
+
+ try:
+ libgfchangelog.cl_init()
+ libgfchangelog.cl_register(brick, hash_dir, log_file,
+ CHANGELOG_LOG_LEVEL, CHANGELOG_CONN_RETRIES)
+ except libgfchangelog.ChangelogException as e:
+ fail("%s Changelog register failed: %s" % (brick, e), logger=logger)
+
+ # Output files to record GFIDs and GFID to Path failure GFIDs
+ changelog_data = ChangelogData(args.outfile, args)
+
+ # Changelogs path(Hard coded to BRICK/.glusterfs/changelogs
+ cl_path = os.path.join(brick, ".glusterfs/changelogs")
+
+ # Fail if History fails for requested Start and End
+ try:
+ actual_end = libgfchangelog.cl_history_changelog(
+ cl_path, start, end, CHANGELOGAPI_NUM_WORKERS)
+ except libgfchangelog.ChangelogException as e:
+ fail("%s: %s Historical Changelogs not available: %s" %
+ (args.node, brick, e), logger=logger)
+
+ logger.info("[1/4] Starting changelog parsing ...")
+ try:
+ # scan followed by getchanges till scan returns zero.
+ # history_scan() is blocking call, till it gets the number
+ # of changelogs to process. Returns zero when no changelogs
+ # to be processed. returns positive value as number of changelogs
+ # to be processed, which will be fetched using
+ # history_getchanges()
+ changes = []
+ while libgfchangelog.cl_history_scan() > 0:
+ changes = libgfchangelog.cl_history_getchanges()
+
+ for change in changes:
+ # Ignore if last processed changelog comes
+ # again in list
+ if change.endswith(".%s" % start):
+ continue
+ try:
+ parse_changelog_to_db(changelog_data, change, args)
+ libgfchangelog.cl_history_done(change)
+ except IOError as e:
+ logger.warn("Error parsing changelog file %s: %s" %
+ (change, e))
+
+ changelog_data.commit()
+ except libgfchangelog.ChangelogException as e:
+ fail("%s Error during Changelog Crawl: %s" % (brick, e),
+ logger=logger)
+
+ logger.info("[1/4] Finished changelog parsing.")
+
+ # Convert all pgfid available from Changelogs
+ logger.info("[2/4] Starting 'pgfid to path' conversions ...")
+ pgfid_to_path(brick, changelog_data)
+ changelog_data.commit()
+ logger.info("[2/4] Finished 'pgfid to path' conversions.")
+
+ # Convert all gfids recorded for data and metadata to all hardlink paths
+ logger.info("[3/4] Starting 'gfid2path' conversions ...")
+ gfid_to_all_paths_using_gfid2path(brick, changelog_data, args)
+ changelog_data.commit()
+ logger.info("[3/4] Finished 'gfid2path' conversions.")
+
+ # If some GFIDs fail to get converted from previous step,
+ # convert using find
+ logger.info("[4/4] Starting 'gfid to path using batchfind' "
+ "conversions ...")
+ gfid_to_path_using_batchfind(brick, changelog_data)
+ changelog_data.commit()
+ logger.info("[4/4] Finished 'gfid to path using batchfind' conversions.")
+
+ return actual_end
+
+
+def changelog_crawl(brick, start, end, args):
+ """
+ Init function, prepares working dir and calls Changelog query
+ """
+ if brick.endswith("/"):
+ brick = brick[0:len(brick)-1]
+
+ # WORKING_DIR/BRICKHASH/OUTFILE
+ working_dir = os.path.dirname(args.outfile)
+ brickhash = hashlib.sha1(brick.encode())
+ brickhash = str(brickhash.hexdigest())
+ working_dir = os.path.join(working_dir, brickhash)
+
+ mkdirp(working_dir, exit_on_err=True, logger=logger)
+
+ log_file = os.path.join(conf.get_opt("log_dir"),
+ args.session,
+ args.volume,
+ "changelog.%s.log" % brickhash)
+
+ logger.info("%s Started Changelog Crawl. Start: %s, End: %s"
+ % (brick, start, end))
+ return get_changes(brick, working_dir, log_file, start, end, args)
+
+
+def _get_args():
+ parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+ description=PROG_DESCRIPTION)
+
+ parser.add_argument("session", help="Session Name")
+ parser.add_argument("volume", help="Volume Name")
+ parser.add_argument("node", help="Node Name")
+ parser.add_argument("brick", help="Brick Name")
+ parser.add_argument("outfile", help="Output File")
+ parser.add_argument("start", help="Start Time", type=int)
+ parser.add_argument("end", help="End Time", type=int)
+ parser.add_argument("--only-query", help="Query mode only (no session)",
+ action="store_true")
+ parser.add_argument("--debug", help="Debug", action="store_true")
+ parser.add_argument("--no-encode",
+ help="Do not encode path in outfile",
+ action="store_true")
+ parser.add_argument("--output-prefix", help="File prefix in output",
+ default=".")
+ parser.add_argument("--type",default="both")
+ parser.add_argument("-N", "--only-namespace-changes",
+ help="List only namespace changes",
+ action="store_true")
+
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = _get_args()
+ mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
+ exit_on_err=True)
+ log_file = os.path.join(conf.get_opt("log_dir"),
+ args.session,
+ args.volume,
+ "changelog.log")
+ setup_logger(logger, log_file, args.debug)
+
+ session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
+ status_file = os.path.join(session_dir, args.volume,
+ "%s.status" % urllib.quote_plus(args.brick))
+ status_file_pre = status_file + ".pre"
+ mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
+ logger=logger)
+
+ end = -1
+ if args.only_query:
+ start = args.start
+ end = args.end
+ else:
+ try:
+ with open(status_file) as f:
+ start = int(f.read().strip())
+ except (ValueError, OSError, IOError):
+ start = args.start
+
+ # end time is optional; so a -1 may be sent to use the default method of
+ # identifying the end time
+ if end == -1:
+ end = int(time.time()) - get_changelog_rollover_time(args.volume)
+
+ logger.info("%s Started Changelog Crawl - Start: %s End: %s" % (args.brick,
+ start,
+ end))
+ actual_end = changelog_crawl(args.brick, start, end, args)
+ if not args.only_query:
+ with open(status_file_pre, "w") as f:
+ f.write(str(actual_end))
+
+ logger.info("%s Finished Changelog Crawl - End: %s" % (args.brick,
+ actual_end))
+ sys.exit(0)
diff --git a/tools/glusterfind/src/changelogdata.py b/tools/glusterfind/src/changelogdata.py
new file mode 100644
index 00000000000..641593cf4b1
--- /dev/null
+++ b/tools/glusterfind/src/changelogdata.py
@@ -0,0 +1,440 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import sqlite3
+import os
+
+from utils import RecordType, unquote_plus_space_newline
+from utils import output_path_prepare
+
+
+class OutputMerger(object):
+ """
+ Class to merge the output files collected from
+ different nodes
+ """
+ def __init__(self, db_path, all_dbs):
+ self.conn = sqlite3.connect(db_path)
+ self.cursor = self.conn.cursor()
+ self.cursor_reader = self.conn.cursor()
+ query = "DROP TABLE IF EXISTS finallist"
+ self.cursor.execute(query)
+
+ query = """
+ CREATE TABLE finallist(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ ts VARCHAR,
+ type VARCHAR,
+ gfid VARCHAR,
+ path1 VARCHAR,
+ path2 VARCHAR,
+ UNIQUE (type, path1, path2) ON CONFLICT IGNORE
+ )
+ """
+ self.cursor.execute(query)
+
+ # If node database exists, read each db and insert into
+ # final table. Ignore if combination of TYPE PATH1 PATH2
+ # already exists
+ for node_db in all_dbs:
+ if os.path.exists(node_db):
+ conn = sqlite3.connect(node_db)
+ cursor = conn.cursor()
+ query = """
+ SELECT ts, type, gfid, path1, path2
+ FROM gfidpath
+ WHERE path1 != ''
+ ORDER BY id ASC
+ """
+ for row in cursor.execute(query):
+ self.add_if_not_exists(row[0], row[1], row[2],
+ row[3], row[4])
+
+ self.conn.commit()
+
+ def add_if_not_exists(self, ts, ty, gfid, path1, path2=""):
+ # Adds record to finallist only if not exists
+ query = """
+ INSERT INTO finallist(ts, type, gfid, path1, path2)
+ VALUES(?, ?, ?, ?, ?)
+ """
+ self.cursor.execute(query, (ts, ty, gfid, path1, path2))
+
+ def get(self):
+ query = """SELECT type, path1, path2 FROM finallist
+ ORDER BY ts ASC, id ASC"""
+ return self.cursor_reader.execute(query)
+
+ def get_failures(self):
+ query = """
+ SELECT gfid
+ FROM finallist
+ WHERE path1 = '' OR (path2 = '' AND type = 'RENAME')
+ """
+ return self.cursor_reader.execute(query)
+
+
+class ChangelogData(object):
+ def __init__(self, dbpath, args):
+ self.conn = sqlite3.connect(dbpath)
+ self.cursor = self.conn.cursor()
+ self.cursor_reader = self.conn.cursor()
+ self._create_table_gfidpath()
+ self._create_table_pgfid()
+ self._create_table_inodegfid()
+ self.args = args
+ self.path_sep = "/"
+
+ def _create_table_gfidpath(self):
+ drop_table = "DROP TABLE IF EXISTS gfidpath"
+ self.cursor.execute(drop_table)
+
+ create_table = """
+ CREATE TABLE gfidpath(
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ ts VARCHAR,
+ type VARCHAR,
+ gfid VARCHAR(40),
+ pgfid1 VARCHAR(40) DEFAULT '',
+ bn1 VARCHAR(500) DEFAULT '',
+ pgfid2 VARCHAR(40) DEFAULT '',
+ bn2 VARCHAR(500) DEFAULT '',
+ path1 VARCHAR DEFAULT '',
+ path2 VARCHAR DEFAULT ''
+ )
+ """
+ self.cursor.execute(create_table)
+
+ create_index = """
+ CREATE INDEX gfid_index ON gfidpath(gfid);
+ """
+ self.cursor.execute(create_index)
+
+ def _create_table_inodegfid(self):
+ drop_table = "DROP TABLE IF EXISTS inodegfid"
+ self.cursor.execute(drop_table)
+
+ create_table = """
+ CREATE TABLE inodegfid(
+ inode INTEGER PRIMARY KEY,
+ gfid VARCHAR(40),
+ converted INTEGER DEFAULT 0,
+ UNIQUE (inode, gfid) ON CONFLICT IGNORE
+ )
+ """
+ self.cursor.execute(create_table)
+
+ def _create_table_pgfid(self):
+ drop_table = "DROP TABLE IF EXISTS pgfid"
+ self.cursor.execute(drop_table)
+
+ create_table = """
+ CREATE TABLE pgfid(
+ pgfid VARCHAR(40) PRIMARY KEY,
+ UNIQUE (pgfid) ON CONFLICT IGNORE
+ )
+ """
+ self.cursor.execute(create_table)
+
+ def _get(self, tablename, filters):
+ # SELECT * FROM <TABLENAME> WHERE <CONDITION>
+ params = []
+ query = "SELECT * FROM %s WHERE 1=1" % tablename
+
+ for key, value in filters.items():
+ query += " AND %s = ?" % key
+ params.append(value)
+
+ return self.cursor_reader.execute(query, params)
+
+ def _get_distinct(self, tablename, distinct_field, filters):
+ # SELECT DISTINCT <COL> FROM <TABLENAME> WHERE <CONDITION>
+ params = []
+ query = "SELECT DISTINCT %s FROM %s WHERE 1=1" % (distinct_field,
+ tablename)
+
+ for key, value in filters.items():
+ query += " AND %s = ?" % key
+ params.append(value)
+
+ return self.cursor_reader.execute(query, params)
+
+ def _delete(self, tablename, filters):
+ # DELETE FROM <TABLENAME> WHERE <CONDITIONS>
+ query = "DELETE FROM %s WHERE 1=1" % tablename
+ params = []
+
+ for key, value in filters.items():
+ query += " AND %s = ?" % key
+ params.append(value)
+
+ self.cursor.execute(query, params)
+
+ def _add(self, tablename, data):
+ # INSERT INTO <TABLENAME>(<col1>, <col2>..) VALUES(?,?..)
+ query = "INSERT INTO %s(" % tablename
+ fields = []
+ params = []
+ for key, value in data.items():
+ fields.append(key)
+ params.append(value)
+
+ values_substitute = len(fields)*["?"]
+ query += "%s) VALUES(%s)" % (",".join(fields),
+ ",".join(values_substitute))
+ self.cursor.execute(query, params)
+
+ def _update(self, tablename, data, filters):
+ # UPDATE <TABLENAME> SET col1 = ?,.. WHERE col1=? AND ..
+ params = []
+ update_fields = []
+ for key, value in data.items():
+ update_fields.append("%s = ?" % key)
+ params.append(value)
+
+ query = "UPDATE %s SET %s WHERE 1 = 1" % (tablename,
+ ", ".join(update_fields))
+
+ for key, value in filters.items():
+ query += " AND %s = ?" % key
+ params.append(value)
+
+ self.cursor.execute(query, params)
+
+ def _exists(self, tablename, filters):
+ if not filters:
+ return False
+
+ query = "SELECT COUNT(1) FROM %s WHERE 1=1" % tablename
+ params = []
+
+ for key, value in filters.items():
+ query += " AND %s = ?" % key
+ params.append(value)
+
+ self.cursor.execute(query, params)
+ row = self.cursor.fetchone()
+ return True if row[0] > 0 else False
+
+ def gfidpath_add(self, changelogfile, ty, gfid, pgfid1="", bn1="",
+ pgfid2="", bn2="", path1="", path2=""):
+ self._add("gfidpath", {
+ "ts": changelogfile.split(".")[-1],
+ "type": ty,
+ "gfid": gfid,
+ "pgfid1": pgfid1,
+ "bn1": bn1,
+ "pgfid2": pgfid2,
+ "bn2": bn2,
+ "path1": path1,
+ "path2": path2
+ })
+
+ def gfidpath_update(self, data, filters):
+ self._update("gfidpath", data, filters)
+
+ def gfidpath_delete(self, filters):
+ self._delete("gfidpath", filters)
+
+ def gfidpath_exists(self, filters):
+ return self._exists("gfidpath", filters)
+
+ def gfidpath_get(self, filters={}):
+ return self._get("gfidpath", filters)
+
+ def gfidpath_get_distinct(self, distinct_field, filters={}):
+ return self._get_distinct("gfidpath", distinct_field, filters)
+
+ def pgfid_add(self, pgfid):
+ self._add("pgfid", {
+ "pgfid": pgfid
+ })
+
+ def pgfid_update(self, data, filters):
+ self._update("pgfid", data, filters)
+
+ def pgfid_get(self, filters={}):
+ return self._get("pgfid", filters)
+
+ def pgfid_get_distinct(self, distinct_field, filters={}):
+ return self._get_distinct("pgfid", distinct_field, filters)
+
+ def pgfid_exists(self, filters):
+ return self._exists("pgfid", filters)
+
+ def inodegfid_add(self, inode, gfid, converted=0):
+ self._add("inodegfid", {
+ "inode": inode,
+ "gfid": gfid,
+ "converted": converted
+ })
+
+ def inodegfid_update(self, data, filters):
+ self._update("inodegfid", data, filters)
+
+ def inodegfid_get(self, filters={}):
+ return self._get("inodegfid", filters)
+
+ def inodegfid_get_distinct(self, distinct_field, filters={}):
+ return self._get_distinct("inodegfid", distinct_field, filters)
+
+ def inodegfid_exists(self, filters):
+ return self._exists("inodegfid", filters)
+
+ def append_path1(self, path, inode):
+ # || is for concatenate in SQL
+ query = """UPDATE gfidpath SET path1 = path1 || ',' || ?
+ WHERE gfid IN (SELECT gfid FROM inodegfid WHERE inode = ?)"""
+ self.cursor.execute(query, (path, inode))
+
+ def gfidpath_set_path1(self, path1, pgfid1):
+ # || is for concatenate in SQL
+ if path1 == "":
+ update_str1 = "? || bn1"
+ update_str2 = "? || bn2"
+ else:
+ update_str1 = "? || '{0}' || bn1".format(self.path_sep)
+ update_str2 = "? || '{0}' || bn2".format(self.path_sep)
+
+ query = """UPDATE gfidpath SET path1 = %s
+ WHERE pgfid1 = ?""" % update_str1
+ self.cursor.execute(query, (path1, pgfid1))
+
+ # Set Path2 if pgfid1 and pgfid2 are same
+ query = """UPDATE gfidpath SET path2 = %s
+ WHERE pgfid2 = ?""" % update_str2
+ self.cursor.execute(query, (path1, pgfid1))
+
+ def gfidpath_set_path2(self, path2, pgfid2):
+ # || is for concatenate in SQL
+ if path2 == "":
+ update_str = "? || bn2"
+ else:
+ update_str = "? || '{0}' || bn2".format(self.path_sep)
+
+ query = """UPDATE gfidpath SET path2 = %s
+ WHERE pgfid2 = ?""" % update_str
+ self.cursor.execute(query, (path2, pgfid2))
+
+ def when_create_mknod_mkdir(self, changelogfile, data):
+ # E <GFID> <MKNOD|CREATE|MKDIR> <MODE> <USER> <GRP> <PGFID>/<BNAME>
+ # Add the Entry to DB
+ pgfid1, bn1 = data[6].split("/", 1)
+
+ if self.args.no_encode:
+ bn1 = unquote_plus_space_newline(bn1).strip()
+
+ self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1)
+
+ def when_rename(self, changelogfile, data):
+ # E <GFID> RENAME <OLD_PGFID>/<BNAME> <PGFID>/<BNAME>
+ pgfid1, bn1 = data[3].split("/", 1)
+ pgfid2, bn2 = data[4].split("/", 1)
+
+ if self.args.no_encode:
+ bn1 = unquote_plus_space_newline(bn1).strip()
+ bn2 = unquote_plus_space_newline(bn2).strip()
+
+ if self.gfidpath_exists({"gfid": data[1], "type": "NEW",
+ "pgfid1": pgfid1, "bn1": bn1}):
+ # If <OLD_PGFID>/<BNAME> is same as CREATE, Update
+ # <NEW_PGFID>/<BNAME> in NEW.
+ self.gfidpath_update({"pgfid1": pgfid2, "bn1": bn2},
+ {"gfid": data[1], "type": "NEW",
+ "pgfid1": pgfid1, "bn1": bn1})
+ elif self.gfidpath_exists({"gfid": data[1], "type": "RENAME",
+ "pgfid2": pgfid1, "bn2": bn1}):
+ # If we are renaming file back to original name then just
+ # delete the entry since it will effectively be a no-op
+ if self.gfidpath_exists({"gfid": data[1], "type": "RENAME",
+ "pgfid2": pgfid1, "bn2": bn1,
+ "pgfid1": pgfid2, "bn1": bn2}):
+ self.gfidpath_delete({"gfid": data[1], "type": "RENAME",
+ "pgfid2": pgfid1, "bn2": bn1})
+ else:
+ # If <OLD_PGFID>/<BNAME> is same as <PGFID2>/<BN2>
+ # (may be previous RENAME)
+ # then UPDATE <NEW_PGFID>/<BNAME> as <PGFID2>/<BN2>
+ self.gfidpath_update({"pgfid2": pgfid2, "bn2": bn2},
+ {"gfid": data[1], "type": "RENAME",
+ "pgfid2": pgfid1, "bn2": bn1})
+ else:
+ # Else insert as RENAME
+ self.gfidpath_add(changelogfile, RecordType.RENAME, data[1],
+ pgfid1, bn1, pgfid2, bn2)
+
+ if self.gfidpath_exists({"gfid": data[1], "type": "MODIFY"}):
+ # If MODIFY exists already for that GFID, remove it and insert
+ # again so that MODIFY entry comes after RENAME entry
+ # Output will have MODIFY <NEWNAME>
+ self.gfidpath_delete({"gfid": data[1], "type": "MODIFY"})
+ self.gfidpath_add(changelogfile, RecordType.MODIFY, data[1])
+
+ def when_link_symlink(self, changelogfile, data):
+ # E <GFID> <LINK|SYMLINK> <PGFID>/<BASENAME>
+ # Add as New record in Db as Type NEW
+ pgfid1, bn1 = data[3].split("/", 1)
+ if self.args.no_encode:
+ bn1 = unquote_plus_space_newline(bn1).strip()
+
+ self.gfidpath_add(changelogfile, RecordType.NEW, data[1], pgfid1, bn1)
+
+ def when_data_meta(self, changelogfile, data):
+ # If GFID row exists, Ignore else Add to Db
+ if not self.gfidpath_exists({"gfid": data[1], "type": "NEW"}) and \
+ not self.gfidpath_exists({"gfid": data[1], "type": "MODIFY"}):
+ self.gfidpath_add(changelogfile, RecordType.MODIFY, data[1])
+
+ def when_unlink_rmdir(self, changelogfile, data):
+ # E <GFID> <UNLINK|RMDIR> <PGFID>/<BASENAME>
+ pgfid1, bn1 = data[3].split("/", 1)
+
+ if self.args.no_encode:
+ bn1 = unquote_plus_space_newline(bn1).strip()
+
+ deleted_path = data[4] if len(data) == 5 else ""
+ if deleted_path != "":
+ deleted_path = unquote_plus_space_newline(deleted_path)
+ deleted_path = output_path_prepare(deleted_path, self.args)
+
+ if self.gfidpath_exists({"gfid": data[1], "type": "NEW",
+ "pgfid1": pgfid1, "bn1": bn1}):
+ # If path exists in table as NEW with same GFID
+ # Delete that row
+ self.gfidpath_delete({"gfid": data[1], "type": "NEW",
+ "pgfid1": pgfid1, "bn1": bn1})
+ else:
+ # Else Record as DELETE
+ self.gfidpath_add(changelogfile, RecordType.DELETE, data[1],
+ pgfid1, bn1, path1=deleted_path)
+
+ # Update path1 as deleted_path if pgfid1 and bn1 is same as deleted
+ self.gfidpath_update({"path1": deleted_path}, {"gfid": data[1],
+ "pgfid1": pgfid1,
+ "bn1": bn1})
+
+ # Update path2 as deleted_path if pgfid2 and bn2 is same as deleted
+ self.gfidpath_update({"path2": deleted_path}, {
+ "type": RecordType.RENAME,
+ "gfid": data[1],
+ "pgfid2": pgfid1,
+ "bn2": bn1})
+
+ # If deleted directory is parent for somebody
+ query1 = """UPDATE gfidpath SET path1 = ? || '{0}' || bn1
+ WHERE pgfid1 = ? AND path1 != ''""".format(self.path_sep)
+ self.cursor.execute(query1, (deleted_path, data[1]))
+
+ query1 = """UPDATE gfidpath SET path2 = ? || '{0}' || bn1
+ WHERE pgfid2 = ? AND path2 != ''""".format(self.path_sep)
+ self.cursor.execute(query1, (deleted_path, data[1]))
+
+ def commit(self):
+ self.conn.commit()
diff --git a/tools/glusterfind/src/conf.py b/tools/glusterfind/src/conf.py
new file mode 100644
index 00000000000..3849ba5dd1f
--- /dev/null
+++ b/tools/glusterfind/src/conf.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import os
+try:
+ from ConfigParser import ConfigParser
+except ImportError:
+ from configparser import ConfigParser
+
+config = ConfigParser()
+config.read(os.path.join(os.path.dirname(os.path.abspath(__file__)),
+ "tool.conf"))
+
+
+def list_change_detectors():
+ return dict(config.items("change_detectors")).keys()
+
+
+def get_opt(opt):
+ return config.get("vars", opt)
+
+
+def get_change_detector(opt):
+ return config.get("change_detectors", opt)
diff --git a/tools/glusterfind/src/gfind_py2py3.py b/tools/glusterfind/src/gfind_py2py3.py
new file mode 100644
index 00000000000..87324fbf350
--- /dev/null
+++ b/tools/glusterfind/src/gfind_py2py3.py
@@ -0,0 +1,88 @@
+#
+# Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+# This file is part of GlusterFS.
+
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+#
+
+# All python2/python3 compatibility routines
+
+import os
+import sys
+from ctypes import create_string_buffer
+
+if sys.version_info >= (3,):
+
+ # Raw conversion of bytearray to string. Used in the cases where
+ # buffer is created by create_string_buffer which is a 8-bit char
+ # array and passed to syscalls to fetch results. Using encode/decode
+ # doesn't work as it converts to string altering the size.
+ # def bytearray_to_str(byte_arr):
+ def bytearray_to_str(byte_arr):
+ return ''.join([chr(b) for b in byte_arr])
+
+ def gf_create_string_buffer(size):
+ return create_string_buffer(b'\0', size)
+
+ def gfind_history_changelog(libgfc, changelog_path, start, end, num_parallel,
+ actual_end):
+ return libgfc.gf_history_changelog(changelog_path.encode(), start, end, num_parallel,
+ actual_end)
+
+ def gfind_changelog_register(libgfc, brick, path, log_file, log_level,
+ retries):
+ return libgfc.gf_changelog_register(brick.encode(), path.encode(), log_file.encode(),
+ log_level, retries)
+
+ def gfind_history_changelog_done(libgfc, clfile):
+ return libgfc.gf_history_changelog_done(clfile.encode())
+
+ def gfind_write_row(f, row, field_separator, p_rep, row_2_rep):
+ f.write(u"{0}{1}{2}{3}{4}\n".format(row,
+ field_separator,
+ p_rep,
+ field_separator,
+ row_2_rep))
+
+ def gfind_write(f, row, field_separator, p_rep):
+ f.write(u"{0}{1}{2}\n".format(row,
+ field_separator,
+ p_rep))
+
+
+else:
+
+ # Raw conversion of bytearray to string
+ def bytearray_to_str(byte_arr):
+ return byte_arr
+
+ def gf_create_string_buffer(size):
+ return create_string_buffer('\0', size)
+
+ def gfind_history_changelog(libgfc, changelog_path, start, end, num_parallel,
+ actual_end):
+ return libgfc.gf_history_changelog(changelog_path, start, end,
+ num_parallel, actual_end)
+
+ def gfind_changelog_register(libgfc, brick, path, log_file, log_level,
+ retries):
+ return libgfc.gf_changelog_register(brick, path, log_file,
+ log_level, retries)
+
+ def gfind_history_changelog_done(libgfc, clfile):
+ return libgfc.gf_history_changelog_done(clfile)
+
+ def gfind_write_row(f, row, field_separator, p_rep, row_2_rep):
+ f.write(u"{0}{1}{2}{3}{4}\n".format(row,
+ field_separator,
+ p_rep,
+ field_separator,
+ row_2_rep).encode())
+
+ def gfind_write(f, row, field_separator, p_rep):
+ f.write(u"{0}{1}{2}\n".format(row,
+ field_separator,
+ p_rep).encode())
diff --git a/tools/glusterfind/src/libgfchangelog.py b/tools/glusterfind/src/libgfchangelog.py
new file mode 100644
index 00000000000..513bb101e93
--- /dev/null
+++ b/tools/glusterfind/src/libgfchangelog.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import os
+from ctypes import CDLL, RTLD_GLOBAL, get_errno, create_string_buffer, c_ulong, byref
+from ctypes.util import find_library
+from gfind_py2py3 import bytearray_to_str, gf_create_string_buffer
+from gfind_py2py3 import gfind_history_changelog, gfind_changelog_register
+from gfind_py2py3 import gfind_history_changelog_done
+
+
+class ChangelogException(OSError):
+ pass
+
+libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL, use_errno=True)
+
+
+def raise_oserr(prefix=None):
+ errn = get_errno()
+ prefix_or_empty = prefix + ": " if prefix else ""
+ raise ChangelogException(errn, prefix_or_empty + os.strerror(errn))
+
+
+def cl_init():
+ ret = libgfc.gf_changelog_init(None)
+ if ret == -1:
+ raise_oserr(prefix="gf_changelog_init")
+
+
+def cl_register(brick, path, log_file, log_level, retries=0):
+ ret = gfind_changelog_register(libgfc, brick, path, log_file,log_level, retries)
+ if ret == -1:
+ raise_oserr(prefix="gf_changelog_register")
+
+
+def cl_history_scan():
+ ret = libgfc.gf_history_changelog_scan()
+ if ret == -1:
+ raise_oserr(prefix="gf_history_changelog_scan")
+
+ return ret
+
+
+def cl_history_changelog(changelog_path, start, end, num_parallel):
+ actual_end = c_ulong()
+ ret = gfind_history_changelog(libgfc,changelog_path, start, end,
+ num_parallel,
+ byref(actual_end))
+ if ret == -1:
+ raise_oserr(prefix="gf_history_changelog")
+
+ return actual_end.value
+
+
+def cl_history_startfresh():
+ ret = libgfc.gf_history_changelog_start_fresh()
+ if ret == -1:
+ raise_oserr(prefix="gf_history_changelog_start_fresh")
+
+
+def cl_history_getchanges():
+ """ remove hardcoding for path name length """
+ def clsort(f):
+ return f.split('.')[-1]
+
+ changes = []
+ buf = gf_create_string_buffer(4096)
+
+ while True:
+ ret = libgfc.gf_history_changelog_next_change(buf, 4096)
+ if ret in (0, -1):
+ break
+ # py2 and py3 compatibility
+ result = bytearray_to_str(buf.raw[:ret - 1])
+ changes.append(result)
+ if ret == -1:
+ raise_oserr(prefix="gf_history_changelog_next_change")
+
+ return sorted(changes, key=clsort)
+
+
+def cl_history_done(clfile):
+ ret = gfind_history_changelog_done(libgfc, clfile)
+ if ret == -1:
+ raise_oserr(prefix="gf_history_changelog_done")
diff --git a/tools/glusterfind/src/main.py b/tools/glusterfind/src/main.py
new file mode 100644
index 00000000000..4b5466d0114
--- /dev/null
+++ b/tools/glusterfind/src/main.py
@@ -0,0 +1,921 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import sys
+from errno import ENOENT, ENOTEMPTY
+import time
+from multiprocessing import Process
+import os
+import xml.etree.cElementTree as etree
+from argparse import ArgumentParser, RawDescriptionHelpFormatter, Action
+from gfind_py2py3 import gfind_write_row, gfind_write
+import logging
+import shutil
+import tempfile
+import signal
+from datetime import datetime
+import codecs
+import re
+
+from utils import execute, is_host_local, mkdirp, fail
+from utils import setup_logger, human_time, handle_rm_error
+from utils import get_changelog_rollover_time, cache_output, create_file
+import conf
+from changelogdata import OutputMerger
+
+PROG_DESCRIPTION = """
+GlusterFS Incremental API
+"""
+ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError
+
+logger = logging.getLogger()
+vol_statusStr = ""
+gtmpfilename = None
+g_pid_nodefile_map = {}
+
+
+class StoreAbsPath(Action):
+ def __init__(self, option_strings, dest, nargs=None, **kwargs):
+ super(StoreAbsPath, self).__init__(option_strings, dest, **kwargs)
+
+ def __call__(self, parser, namespace, values, option_string=None):
+ setattr(namespace, self.dest, os.path.abspath(values))
+
+
+def get_pem_key_path(session, volume):
+ return os.path.join(conf.get_opt("session_dir"),
+ session,
+ volume,
+ "%s_%s_secret.pem" % (session, volume))
+
+
+def node_cmd(host, host_uuid, task, cmd, args, opts):
+ """
+ Runs command via ssh if host is not local
+ """
+ try:
+ localdir = is_host_local(host_uuid)
+
+ # this is so to avoid deleting the ssh keys on local node which
+ # otherwise cause ssh password prompts on the console (race conditions)
+ # mode_delete() should be cleaning up the session tree
+ if localdir and task == "delete":
+ return
+
+ pem_key_path = get_pem_key_path(args.session, args.volume)
+
+ if not localdir:
+ # prefix with ssh command if not local node
+ cmd = ["ssh",
+ "-oNumberOfPasswordPrompts=0",
+ "-oStrictHostKeyChecking=no",
+ # We force TTY allocation (-t -t) so that Ctrl+C is handed
+ # through; see:
+ # https://bugzilla.redhat.com/show_bug.cgi?id=1382236
+ # Note that this turns stderr of the remote `cmd`
+ # into stdout locally.
+ "-t",
+ "-t",
+ "-i", pem_key_path,
+ "root@%s" % host] + cmd
+
+ (returncode, err, out) = execute(cmd, logger=logger)
+ if returncode != 0:
+ # Because the `-t -t` above turns the remote stderr into
+ # local stdout, we need to log both stderr and stdout
+ # here to print all error messages.
+ fail("%s - %s failed; stdout (including remote stderr):\n"
+ "%s\n"
+ "stderr:\n"
+ "%s" % (host, task, out, err),
+ returncode,
+ logger=logger)
+
+ if opts.get("copy_outfile", False) and not localdir:
+ cmd_copy = ["scp",
+ "-oNumberOfPasswordPrompts=0",
+ "-oStrictHostKeyChecking=no",
+ "-i", pem_key_path,
+ "root@%s:/%s" % (host, opts.get("node_outfile")),
+ os.path.dirname(opts.get("node_outfile"))]
+ execute(cmd_copy, exit_msg="%s - Copy command failed" % host,
+ logger=logger)
+ except KeyboardInterrupt:
+ sys.exit(2)
+
+
+def run_cmd_nodes(task, args, **kwargs):
+ global g_pid_nodefile_map
+ nodes = get_nodes(args.volume)
+ pool = []
+ for num, node in enumerate(nodes):
+ host, brick = node[1].split(":")
+ host_uuid = node[0]
+ cmd = []
+ opts = {}
+
+ # tmpfilename is valid only for tasks: pre, query and cleanup
+ tmpfilename = kwargs.get("tmpfilename", "BADNAME")
+
+ node_outfile = os.path.join(conf.get_opt("working_dir"),
+ args.session, args.volume,
+ tmpfilename,
+ "tmp_output_%s" % num)
+
+ if task == "pre":
+ if vol_statusStr != "Started":
+ fail("Volume %s is not online" % args.volume,
+ logger=logger)
+
+ # If Full backup is requested or start time is zero, use brickfind
+ change_detector = conf.get_change_detector("changelog")
+ tag = None
+ if args.full:
+ change_detector = conf.get_change_detector("brickfind")
+ tag = args.tag_for_full_find.strip()
+ if tag == "":
+ tag = '""' if not is_host_local(host_uuid) else ""
+
+ # remote file will be copied into this directory
+ mkdirp(os.path.dirname(node_outfile),
+ exit_on_err=True, logger=logger)
+
+ FS = args.field_separator
+ if not is_host_local(host_uuid):
+ FS = "'" + FS + "'"
+
+ cmd = [change_detector,
+ args.session,
+ args.volume,
+ host,
+ brick,
+ node_outfile] + \
+ ([str(kwargs.get("start")), str(kwargs.get("end"))]
+ if not args.full else []) + \
+ ([tag] if tag is not None else []) + \
+ ["--output-prefix", args.output_prefix] + \
+ (["--debug"] if args.debug else []) + \
+ (["--no-encode"] if args.no_encode else []) + \
+ (["--only-namespace-changes"] if args.only_namespace_changes
+ else []) + \
+ (["--type", args.type]) + \
+ (["--field-separator", FS] if args.full else [])
+
+ opts["node_outfile"] = node_outfile
+ opts["copy_outfile"] = True
+ elif task == "query":
+ # If Full backup is requested or start time is zero, use brickfind
+ tag = None
+ change_detector = conf.get_change_detector("changelog")
+ if args.full:
+ change_detector = conf.get_change_detector("brickfind")
+ tag = args.tag_for_full_find.strip()
+ if tag == "":
+ tag = '""' if not is_host_local(host_uuid) else ""
+
+ # remote file will be copied into this directory
+ mkdirp(os.path.dirname(node_outfile),
+ exit_on_err=True, logger=logger)
+
+ FS = args.field_separator
+ if not is_host_local(host_uuid):
+ FS = "'" + FS + "'"
+
+ cmd = [change_detector,
+ args.session,
+ args.volume,
+ host,
+ brick,
+ node_outfile] + \
+ ([str(kwargs.get("start")), str(kwargs.get("end"))]
+ if not args.full else []) + \
+ ([tag] if tag is not None else []) + \
+ ["--only-query"] + \
+ ["--output-prefix", args.output_prefix] + \
+ (["--debug"] if args.debug else []) + \
+ (["--no-encode"] if args.no_encode else []) + \
+ (["--only-namespace-changes"]
+ if args.only_namespace_changes else []) + \
+ (["--type", args.type]) + \
+ (["--field-separator", FS] if args.full else [])
+
+ opts["node_outfile"] = node_outfile
+ opts["copy_outfile"] = True
+ elif task == "cleanup":
+ # After pre/query run, cleanup the working directory and other
+ # temp files. Remove the directory to which node_outfile has
+ # been copied in main node
+ try:
+ os.remove(node_outfile)
+ except (OSError, IOError):
+ logger.warn("Failed to cleanup temporary file %s" %
+ node_outfile)
+ pass
+
+ cmd = [conf.get_opt("nodeagent"),
+ "cleanup",
+ args.session,
+ args.volume,
+ os.path.dirname(node_outfile)] + \
+ (["--debug"] if args.debug else [])
+ elif task == "create":
+ if vol_statusStr != "Started":
+ fail("Volume %s is not online" % args.volume,
+ logger=logger)
+
+ # When glusterfind create, create session directory in
+ # each brick nodes
+ cmd = [conf.get_opt("nodeagent"),
+ "create",
+ args.session,
+ args.volume,
+ brick,
+ kwargs.get("time_to_update")] + \
+ (["--debug"] if args.debug else []) + \
+ (["--reset-session-time"] if args.reset_session_time
+ else [])
+ elif task == "post":
+ # Rename pre status file to actual status file in each node
+ cmd = [conf.get_opt("nodeagent"),
+ "post",
+ args.session,
+ args.volume,
+ brick] + \
+ (["--debug"] if args.debug else [])
+ elif task == "delete":
+ # When glusterfind delete, cleanup all the session files/dirs
+ # from each node.
+ cmd = [conf.get_opt("nodeagent"),
+ "delete",
+ args.session,
+ args.volume] + \
+ (["--debug"] if args.debug else [])
+
+ if cmd:
+ p = Process(target=node_cmd,
+ args=(host, host_uuid, task, cmd, args, opts))
+ p.start()
+ pool.append(p)
+ g_pid_nodefile_map[p.pid] = node_outfile
+
+ for num, p in enumerate(pool):
+ p.join()
+ if p.exitcode != 0:
+ logger.warn("Command %s failed in %s" % (task, nodes[num][1]))
+ if task in ["create", "delete"]:
+ fail("Command %s failed in %s" % (task, nodes[num][1]))
+ elif task == "pre" or task == "query":
+ if args.disable_partial:
+ sys.exit(1)
+ else:
+ del g_pid_nodefile_map[p.pid]
+
+
+@cache_output
+def get_nodes(volume):
+ """
+ Get the gluster volume info xml output and parse to get
+ the brick details.
+ """
+ global vol_statusStr
+
+ cmd = ["gluster", 'volume', 'info', volume, "--xml"]
+ _, data, _ = execute(cmd,
+ exit_msg="Failed to Run Gluster Volume Info",
+ logger=logger)
+ tree = etree.fromstring(data)
+
+ # Test to check if volume has been deleted after session creation
+ count_el = tree.find('volInfo/volumes/count')
+ if int(count_el.text) == 0:
+ fail("Unable to get volume details", logger=logger)
+
+ # this status is used in caller: run_cmd_nodes
+ vol_statusStr = tree.find('volInfo/volumes/volume/statusStr').text
+ vol_typeStr = tree.find('volInfo/volumes/volume/typeStr').text
+
+ nodes = []
+ volume_el = tree.find('volInfo/volumes/volume')
+ try:
+ brick_elems = []
+ if vol_typeStr == "Tier":
+ brick_elems.append('bricks/hotBricks/brick')
+ brick_elems.append('bricks/coldBricks/brick')
+ else:
+ brick_elems.append('bricks/brick')
+
+ for elem in brick_elems:
+ for b in volume_el.findall(elem):
+ nodes.append((b.find('hostUuid').text,
+ b.find('name').text))
+ except (ParseError, AttributeError, ValueError) as e:
+ fail("Failed to parse Volume Info: %s" % e, logger=logger)
+
+ return nodes
+
+
+def _get_args():
+ parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+ description=PROG_DESCRIPTION)
+ subparsers = parser.add_subparsers(dest="mode")
+ subparsers.required = True
+
+ # create <SESSION> <VOLUME> [--debug] [--force]
+ parser_create = subparsers.add_parser('create')
+ parser_create.add_argument("session", help="Session Name")
+ parser_create.add_argument("volume", help="Volume Name")
+ parser_create.add_argument("--debug", help="Debug", action="store_true")
+ parser_create.add_argument("--force", help="Force option to recreate "
+ "the session", action="store_true")
+ parser_create.add_argument("--reset-session-time",
+ help="Reset Session Time to Current Time",
+ action="store_true")
+
+ # delete <SESSION> <VOLUME> [--debug]
+ parser_delete = subparsers.add_parser('delete')
+ parser_delete.add_argument("session", help="Session Name")
+ parser_delete.add_argument("volume", help="Volume Name")
+ parser_delete.add_argument("--debug", help="Debug", action="store_true")
+
+ # list [--session <SESSION>] [--volume <VOLUME>]
+ parser_list = subparsers.add_parser('list')
+ parser_list.add_argument("--session", help="Session Name", default="")
+ parser_list.add_argument("--volume", help="Volume Name", default="")
+ parser_list.add_argument("--debug", help="Debug", action="store_true")
+
+ # pre <SESSION> <VOLUME> <OUTFILE>
+ # [--output-prefix <OUTPUT_PREFIX>] [--full]
+ parser_pre = subparsers.add_parser('pre')
+ parser_pre.add_argument("session", help="Session Name")
+ parser_pre.add_argument("volume", help="Volume Name")
+ parser_pre.add_argument("outfile", help="Output File", action=StoreAbsPath)
+ parser_pre.add_argument("--debug", help="Debug", action="store_true")
+ parser_pre.add_argument("--no-encode",
+ help="Do not encode path in output file",
+ action="store_true")
+ parser_pre.add_argument("--full", help="Full find", action="store_true")
+ parser_pre.add_argument("--disable-partial", help="Disable Partial find, "
+ "Fail when one node fails", action="store_true")
+ parser_pre.add_argument("--output-prefix", help="File prefix in output",
+ default=".")
+ parser_pre.add_argument("--regenerate-outfile",
+ help="Regenerate outfile, discard the outfile "
+ "generated from last pre command",
+ action="store_true")
+ parser_pre.add_argument("-N", "--only-namespace-changes",
+ help="List only namespace changes",
+ action="store_true")
+ parser_pre.add_argument("--tag-for-full-find",
+ help="Tag prefix for file names emitted during"
+ " a full find operation; default: \"NEW\"",
+ default="NEW")
+ parser_pre.add_argument('--type', help="type: f, f-files only"
+ " d, d-directories only, by default = both",
+ default='both', choices=["f", "d", "both"])
+ parser_pre.add_argument("--field-separator", help="Field separator string",
+ default=" ")
+
+ # query <VOLUME> <OUTFILE> --since-time <SINCE_TIME>
+ # [--output-prefix <OUTPUT_PREFIX>] [--full]
+ parser_query = subparsers.add_parser('query')
+ parser_query.add_argument("volume", help="Volume Name")
+ parser_query.add_argument("outfile", help="Output File",
+ action=StoreAbsPath)
+ parser_query.add_argument("--since-time", help="UNIX epoch time since "
+ "which listing is required", type=int)
+ parser_query.add_argument("--end-time", help="UNIX epoch time up to "
+ "which listing is required", type=int)
+ parser_query.add_argument("--no-encode",
+ help="Do not encode path in output file",
+ action="store_true")
+ parser_query.add_argument("--full", help="Full find", action="store_true")
+ parser_query.add_argument("--debug", help="Debug", action="store_true")
+ parser_query.add_argument("--disable-partial", help="Disable Partial find,"
+ " Fail when one node fails", action="store_true")
+ parser_query.add_argument("--output-prefix", help="File prefix in output",
+ default=".")
+ parser_query.add_argument("-N", "--only-namespace-changes",
+ help="List only namespace changes",
+ action="store_true")
+ parser_query.add_argument("--tag-for-full-find",
+ help="Tag prefix for file names emitted during"
+ " a full find operation; default: \"NEW\"",
+ default="NEW")
+ parser_query.add_argument('--type', help="type: f, f-files only"
+ " d, d-directories only, by default = both",
+ default='both', choices=["f", "d", "both"])
+ parser_query.add_argument("--field-separator",
+ help="Field separator string",
+ default=" ")
+
+ # post <SESSION> <VOLUME>
+ parser_post = subparsers.add_parser('post')
+ parser_post.add_argument("session", help="Session Name")
+ parser_post.add_argument("volume", help="Volume Name")
+ parser_post.add_argument("--debug", help="Debug", action="store_true")
+
+ return parser.parse_args()
+
+
+def ssh_setup(args):
+ pem_key_path = get_pem_key_path(args.session, args.volume)
+
+ if not os.path.exists(pem_key_path):
+ # Generate ssh-key
+ cmd = ["ssh-keygen",
+ "-N",
+ "",
+ "-f",
+ pem_key_path]
+ execute(cmd,
+ exit_msg="Unable to generate ssh key %s"
+ % pem_key_path,
+ logger=logger)
+
+ logger.info("Ssh key generated %s" % pem_key_path)
+
+ try:
+ shutil.copyfile(pem_key_path + ".pub",
+ os.path.join(conf.get_opt("session_dir"),
+ ".keys",
+ "%s_%s_secret.pem.pub" % (args.session,
+ args.volume)))
+ except (IOError, OSError) as e:
+ fail("Failed to copy public key to %s: %s"
+ % (os.path.join(conf.get_opt("session_dir"), ".keys"), e),
+ logger=logger)
+
+ # Copy pub file to all nodes
+ cmd = ["gluster",
+ "system::",
+ "copy",
+ "file",
+ "/glusterfind/.keys/%s.pub" % os.path.basename(pem_key_path)]
+
+ execute(cmd, exit_msg="Failed to distribute ssh keys", logger=logger)
+
+ logger.info("Distributed ssh key to all nodes of Volume")
+
+ # Add to authorized_keys file in each node
+ cmd = ["gluster",
+ "system::",
+ "execute",
+ "add_secret_pub",
+ "root",
+ "/glusterfind/.keys/%s.pub" % os.path.basename(pem_key_path)]
+ execute(cmd,
+ exit_msg="Failed to add ssh keys to authorized_keys file",
+ logger=logger)
+
+ logger.info("Ssh key added to authorized_keys of Volume nodes")
+
+
+def enable_volume_options(args):
+ execute(["gluster", "volume", "set",
+ args.volume, "build-pgfid", "on"],
+ exit_msg="Failed to set volume option build-pgfid on",
+ logger=logger)
+ logger.info("Volume option set %s, build-pgfid on" % args.volume)
+
+ execute(["gluster", "volume", "set",
+ args.volume, "changelog.changelog", "on"],
+ exit_msg="Failed to set volume option "
+ "changelog.changelog on", logger=logger)
+ logger.info("Volume option set %s, changelog.changelog on"
+ % args.volume)
+
+ execute(["gluster", "volume", "set",
+ args.volume, "changelog.capture-del-path", "on"],
+ exit_msg="Failed to set volume option "
+ "changelog.capture-del-path on", logger=logger)
+ logger.info("Volume option set %s, changelog.capture-del-path on"
+ % args.volume)
+
+
+def write_output(outfile, outfilemerger, field_separator):
+ with codecs.open(outfile, "a", encoding="utf-8") as f:
+ for row in outfilemerger.get():
+ # Multiple paths in case of Hardlinks
+ paths = row[1].split(",")
+ row_2_rep = None
+ for p in paths:
+ if p == "":
+ continue
+ p_rep = p.replace("//", "/")
+ if not row_2_rep:
+ row_2_rep = row[2].replace("//", "/")
+ if p_rep == row_2_rep:
+ continue
+
+ if row_2_rep and row_2_rep != "":
+ gfind_write_row(f, row[0], field_separator, p_rep, row_2_rep)
+
+ else:
+ gfind_write(f, row[0], field_separator, p_rep)
+
+def validate_volume(volume):
+ cmd = ["gluster", 'volume', 'info', volume, "--xml"]
+ _, data, _ = execute(cmd,
+ exit_msg="Failed to Run Gluster Volume Info",
+ logger=logger)
+ try:
+ tree = etree.fromstring(data)
+ statusStr = tree.find('volInfo/volumes/volume/statusStr').text
+ except (ParseError, AttributeError) as e:
+ fail("Invalid Volume: Check the Volume name! %s" % e)
+ if statusStr != "Started":
+ fail("Volume %s is not online" % volume)
+
+# The rules for a valid session name.
+SESSION_NAME_RULES = {
+ 'min_length': 2,
+ 'max_length': 256, # same as maximum volume length
+ # Specifies all alphanumeric characters, underscore, hyphen.
+ 'valid_chars': r'0-9a-zA-Z_-',
+}
+
+
+# checks valid session name, fail otherwise
+def validate_session_name(session):
+ # Check for minimum length
+ if len(session) < SESSION_NAME_RULES['min_length']:
+ fail('session_name must be at least ' +
+ str(SESSION_NAME_RULES['min_length']) + ' characters long.')
+ # Check for maximum length
+ if len(session) > SESSION_NAME_RULES['max_length']:
+ fail('session_name must not exceed ' +
+ str(SESSION_NAME_RULES['max_length']) + ' characters length.')
+
+ # Matches strings composed entirely of characters specified within
+ if not re.match(r'^[' + SESSION_NAME_RULES['valid_chars'] +
+ ']+$', session):
+ fail('Session name can only contain these characters: ' +
+ SESSION_NAME_RULES['valid_chars'])
+
+
+def mode_create(session_dir, args):
+ validate_session_name(args.session)
+
+ logger.debug("Init is called - Session: %s, Volume: %s"
+ % (args.session, args.volume))
+ mkdirp(session_dir, exit_on_err=True, logger=logger)
+ mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
+ logger=logger)
+ status_file = os.path.join(session_dir, args.volume, "status")
+
+ if os.path.exists(status_file) and not args.force:
+ fail("Session %s already created" % args.session, logger=logger)
+
+ if not os.path.exists(status_file) or args.force:
+ ssh_setup(args)
+ enable_volume_options(args)
+
+ # Add Rollover time to current time to make sure changelogs
+ # will be available if we use this time as start time
+ time_to_update = int(time.time()) + get_changelog_rollover_time(
+ args.volume)
+
+ run_cmd_nodes("create", args, time_to_update=str(time_to_update))
+
+ if not os.path.exists(status_file) or args.reset_session_time:
+ with open(status_file, "w") as f:
+ f.write(str(time_to_update))
+
+ sys.stdout.write("Session %s created with volume %s\n" %
+ (args.session, args.volume))
+
+ sys.exit(0)
+
+
+def mode_query(session_dir, args):
+ global gtmpfilename
+ global g_pid_nodefile_map
+
+ # Verify volume status
+ cmd = ["gluster", 'volume', 'info', args.volume, "--xml"]
+ _, data, _ = execute(cmd,
+ exit_msg="Failed to Run Gluster Volume Info",
+ logger=logger)
+ try:
+ tree = etree.fromstring(data)
+ statusStr = tree.find('volInfo/volumes/volume/statusStr').text
+ except (ParseError, AttributeError) as e:
+ fail("Invalid Volume: %s" % e, logger=logger)
+
+ if statusStr != "Started":
+ fail("Volume %s is not online" % args.volume, logger=logger)
+
+ mkdirp(session_dir, exit_on_err=True, logger=logger)
+ mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
+ logger=logger)
+ mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger)
+
+ # Configure cluster for pasword-less SSH
+ ssh_setup(args)
+
+ # Enable volume options for changelog capture
+ enable_volume_options(args)
+
+ # Test options
+ if not args.full and args.type in ["f", "d"]:
+ fail("--type can only be used with --full")
+ if not args.since_time and not args.end_time and not args.full:
+ fail("Please specify either {--since-time and optionally --end-time} "
+ "or --full", logger=logger)
+
+ if args.since_time and args.end_time and args.full:
+ fail("Please specify either {--since-time and optionally --end-time} "
+ "or --full, but not both",
+ logger=logger)
+
+ if args.end_time and not args.since_time:
+ fail("Please specify --since-time as well", logger=logger)
+
+ # Start query command processing
+ start = -1
+ end = -1
+ if args.since_time:
+ start = args.since_time
+ if args.end_time:
+ end = args.end_time
+ else:
+ start = 0 # --full option is handled separately
+
+ logger.debug("Query is called - Session: %s, Volume: %s, "
+ "Start time: %s, End time: %s"
+ % ("default", args.volume, start, end))
+
+ prefix = datetime.now().strftime("%Y%m%d-%H%M%S-%f-")
+ gtmpfilename = prefix + next(tempfile._get_candidate_names())
+
+ run_cmd_nodes("query", args, start=start, end=end,
+ tmpfilename=gtmpfilename)
+
+ # Merger
+ if args.full:
+ if len(g_pid_nodefile_map) > 0:
+ cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \
+ ["-o", args.outfile]
+ execute(cmd,
+ exit_msg="Failed to merge output files "
+ "collected from nodes", logger=logger)
+ else:
+ fail("Failed to collect any output files from peers. "
+ "Looks like all bricks are offline.", logger=logger)
+ else:
+ # Read each Changelogs db and generate finaldb
+ create_file(args.outfile, exit_on_err=True, logger=logger)
+ outfilemerger = OutputMerger(args.outfile + ".db",
+ list(g_pid_nodefile_map.values()))
+ write_output(args.outfile, outfilemerger, args.field_separator)
+
+ try:
+ os.remove(args.outfile + ".db")
+ except (IOError, OSError):
+ pass
+
+ run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename)
+
+ sys.stdout.write("Generated output file %s\n" % args.outfile)
+
+
+def mode_pre(session_dir, args):
+ global gtmpfilename
+ global g_pid_nodefile_map
+
+ """
+ Read from Session file and write to session.pre file
+ """
+ endtime_to_update = int(time.time()) - get_changelog_rollover_time(
+ args.volume)
+ status_file = os.path.join(session_dir, args.volume, "status")
+ status_file_pre = status_file + ".pre"
+
+ mkdirp(os.path.dirname(args.outfile), exit_on_err=True, logger=logger)
+
+ if not args.full and args.type in ["f", "d"]:
+ fail("--type can only be used with --full")
+
+ # If Pre status file exists and running pre command again
+ if os.path.exists(status_file_pre) and not args.regenerate_outfile:
+ fail("Post command is not run after last pre, "
+ "use --regenerate-outfile")
+
+ start = 0
+ try:
+ with open(status_file) as f:
+ start = int(f.read().strip())
+ except ValueError:
+ pass
+ except (OSError, IOError) as e:
+ fail("Error Opening Session file %s: %s"
+ % (status_file, e), logger=logger)
+
+ logger.debug("Pre is called - Session: %s, Volume: %s, "
+ "Start time: %s, End time: %s"
+ % (args.session, args.volume, start, endtime_to_update))
+
+ prefix = datetime.now().strftime("%Y%m%d-%H%M%S-%f-")
+ gtmpfilename = prefix + next(tempfile._get_candidate_names())
+
+ run_cmd_nodes("pre", args, start=start, end=-1, tmpfilename=gtmpfilename)
+
+ # Merger
+ if args.full:
+ if len(g_pid_nodefile_map) > 0:
+ cmd = ["sort", "-u"] + list(g_pid_nodefile_map.values()) + \
+ ["-o", args.outfile]
+ execute(cmd,
+ exit_msg="Failed to merge output files "
+ "collected from nodes", logger=logger)
+ else:
+ fail("Failed to collect any output files from peers. "
+ "Looks like all bricks are offline.", logger=logger)
+ else:
+ # Read each Changelogs db and generate finaldb
+ create_file(args.outfile, exit_on_err=True, logger=logger)
+ outfilemerger = OutputMerger(args.outfile + ".db",
+ list(g_pid_nodefile_map.values()))
+ write_output(args.outfile, outfilemerger, args.field_separator)
+
+ try:
+ os.remove(args.outfile + ".db")
+ except (IOError, OSError):
+ pass
+
+ run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename)
+
+ with open(status_file_pre, "w") as f:
+ f.write(str(endtime_to_update))
+
+ sys.stdout.write("Generated output file %s\n" % args.outfile)
+
+
+def mode_post(session_dir, args):
+ """
+ If pre session file exists, overwrite session file
+ If pre session file does not exists, return ERROR
+ """
+ status_file = os.path.join(session_dir, args.volume, "status")
+ logger.debug("Post is called - Session: %s, Volume: %s"
+ % (args.session, args.volume))
+ status_file_pre = status_file + ".pre"
+
+ if os.path.exists(status_file_pre):
+ run_cmd_nodes("post", args)
+ os.rename(status_file_pre, status_file)
+ sys.stdout.write("Session %s with volume %s updated\n" %
+ (args.session, args.volume))
+ sys.exit(0)
+ else:
+ fail("Pre script is not run", logger=logger)
+
+
+def mode_delete(session_dir, args):
+ run_cmd_nodes("delete", args)
+ shutil.rmtree(os.path.join(session_dir, args.volume),
+ onerror=handle_rm_error)
+ sys.stdout.write("Session %s with volume %s deleted\n" %
+ (args.session, args.volume))
+
+ # If the session contains only this volume, then cleanup the
+ # session directory. If a session contains multiple volumes
+ # then os.rmdir will fail with ENOTEMPTY
+ try:
+ os.rmdir(session_dir)
+ except OSError as e:
+ if not e.errno == ENOTEMPTY:
+ logger.warn("Failed to delete session directory: %s" % e)
+
+
+def mode_list(session_dir, args):
+ """
+ List available sessions to stdout, if session name is set
+ only list that session.
+ """
+ if args.session:
+ if not os.path.exists(os.path.join(session_dir, args.session)):
+ fail("Invalid Session", logger=logger)
+ sessions = [args.session]
+ else:
+ sessions = []
+ for d in os.listdir(session_dir):
+ if d != ".keys":
+ sessions.append(d)
+
+ output = []
+ for session in sessions:
+ # Session Volume Last Processed
+ volnames = os.listdir(os.path.join(session_dir, session))
+
+ for volname in volnames:
+ if args.volume and args.volume != volname:
+ continue
+
+ status_file = os.path.join(session_dir, session, volname, "status")
+ last_processed = None
+ try:
+ with open(status_file) as f:
+ last_processed = f.read().strip()
+ except (OSError, IOError) as e:
+ if e.errno == ENOENT:
+ continue
+ else:
+ raise
+ output.append((session, volname, last_processed))
+
+ if output:
+ sys.stdout.write("%s %s %s\n" % ("SESSION".ljust(25),
+ "VOLUME".ljust(25),
+ "SESSION TIME".ljust(25)))
+ sys.stdout.write("-"*75)
+ sys.stdout.write("\n")
+ for session, volname, last_processed in output:
+ sess_time = 'Session Corrupted'
+ if last_processed:
+ try:
+ sess_time = human_time(last_processed)
+ except TypeError:
+ sess_time = 'Session Corrupted'
+ sys.stdout.write("%s %s %s\n" % (session.ljust(25),
+ volname.ljust(25),
+ sess_time.ljust(25)))
+
+ if not output:
+ if args.session or args.volume:
+ fail("Invalid Session", logger=logger)
+ else:
+ sys.stdout.write("No sessions found.\n")
+
+
+def main():
+ global gtmpfilename
+
+ args = None
+
+ try:
+ args = _get_args()
+ mkdirp(conf.get_opt("session_dir"), exit_on_err=True)
+
+ # force the default session name if mode is "query"
+ if args.mode == "query":
+ args.session = "default"
+
+ if args.mode == "list":
+ session_dir = conf.get_opt("session_dir")
+ else:
+ session_dir = os.path.join(conf.get_opt("session_dir"),
+ args.session)
+
+ if not os.path.exists(session_dir) and \
+ args.mode not in ["create", "list", "query"]:
+ fail("Invalid session %s" % args.session)
+
+ # volume involved, validate the volume first
+ if args.mode not in ["list"]:
+ validate_volume(args.volume)
+
+
+ # "default" is a system defined session name
+ if args.mode in ["create", "post", "pre", "delete"] and \
+ args.session == "default":
+ fail("Invalid session %s" % args.session)
+
+ vol_dir = os.path.join(session_dir, args.volume)
+ if not os.path.exists(vol_dir) and args.mode not in \
+ ["create", "list", "query"]:
+ fail("Session %s not created with volume %s" %
+ (args.session, args.volume))
+
+ mkdirp(os.path.join(conf.get_opt("log_dir"),
+ args.session,
+ args.volume),
+ exit_on_err=True)
+ log_file = os.path.join(conf.get_opt("log_dir"),
+ args.session,
+ args.volume,
+ "cli.log")
+ setup_logger(logger, log_file, args.debug)
+
+ # globals() will have all the functions already defined.
+ # mode_<args.mode> will be the function name to be called
+ globals()["mode_" + args.mode](session_dir, args)
+ except KeyboardInterrupt:
+ if args is not None:
+ if args.mode == "pre" or args.mode == "query":
+ # cleanup session
+ if gtmpfilename is not None:
+ # no more interrupts until we clean up
+ signal.signal(signal.SIGINT, signal.SIG_IGN)
+ run_cmd_nodes("cleanup", args, tmpfilename=gtmpfilename)
+
+ # Interrupted, exit with non zero error code
+ sys.exit(2)
diff --git a/tools/glusterfind/src/nodeagent.py b/tools/glusterfind/src/nodeagent.py
new file mode 100644
index 00000000000..679daa6fa76
--- /dev/null
+++ b/tools/glusterfind/src/nodeagent.py
@@ -0,0 +1,139 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import shutil
+import sys
+import os
+import logging
+from argparse import ArgumentParser, RawDescriptionHelpFormatter
+try:
+ import urllib.parse as urllib
+except ImportError:
+ import urllib
+from errno import ENOTEMPTY
+
+from utils import setup_logger, mkdirp, handle_rm_error
+import conf
+
+logger = logging.getLogger()
+
+
+def mode_cleanup(args):
+ working_dir = os.path.join(conf.get_opt("working_dir"),
+ args.session,
+ args.volume,
+ args.tmpfilename)
+
+ mkdirp(os.path.join(conf.get_opt("log_dir"), args.session, args.volume),
+ exit_on_err=True)
+ log_file = os.path.join(conf.get_opt("log_dir"),
+ args.session,
+ args.volume,
+ "changelog.log")
+
+ setup_logger(logger, log_file)
+
+ try:
+ shutil.rmtree(working_dir, onerror=handle_rm_error)
+ except (OSError, IOError) as e:
+ logger.error("Failed to delete working directory: %s" % e)
+ sys.exit(1)
+
+
+def mode_create(args):
+ session_dir = os.path.join(conf.get_opt("session_dir"),
+ args.session)
+ status_file = os.path.join(session_dir, args.volume,
+ "%s.status" % urllib.quote_plus(args.brick))
+
+ mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
+ logger=logger)
+
+ if not os.path.exists(status_file) or args.reset_session_time:
+ with open(status_file, "w") as f:
+ f.write(args.time_to_update)
+
+ sys.exit(0)
+
+
+def mode_post(args):
+ session_dir = os.path.join(conf.get_opt("session_dir"), args.session)
+ status_file = os.path.join(session_dir, args.volume,
+ "%s.status" % urllib.quote_plus(args.brick))
+
+ mkdirp(os.path.join(session_dir, args.volume), exit_on_err=True,
+ logger=logger)
+ status_file_pre = status_file + ".pre"
+
+ if os.path.exists(status_file_pre):
+ os.rename(status_file_pre, status_file)
+ sys.exit(0)
+
+
+def mode_delete(args):
+ session_dir = os.path.join(conf.get_opt("session_dir"),
+ args.session)
+ shutil.rmtree(os.path.join(session_dir, args.volume),
+ onerror=handle_rm_error)
+
+ # If the session contains only this volume, then cleanup the
+ # session directory. If a session contains multiple volumes
+ # then os.rmdir will fail with ENOTEMPTY
+ try:
+ os.rmdir(session_dir)
+ except OSError as e:
+ if not e.errno == ENOTEMPTY:
+ logger.warn("Failed to delete session directory: %s" % e)
+
+
+def _get_args():
+ parser = ArgumentParser(formatter_class=RawDescriptionHelpFormatter,
+ description="Node Agent")
+ subparsers = parser.add_subparsers(dest="mode")
+
+ parser_cleanup = subparsers.add_parser('cleanup')
+ parser_cleanup.add_argument("session", help="Session Name")
+ parser_cleanup.add_argument("volume", help="Volume Name")
+ parser_cleanup.add_argument("tmpfilename", help="Temporary File Name")
+ parser_cleanup.add_argument("--debug", help="Debug", action="store_true")
+
+ parser_session_create = subparsers.add_parser('create')
+ parser_session_create.add_argument("session", help="Session Name")
+ parser_session_create.add_argument("volume", help="Volume Name")
+ parser_session_create.add_argument("brick", help="Brick Path")
+ parser_session_create.add_argument("time_to_update", help="Time to Update")
+ parser_session_create.add_argument("--reset-session-time",
+ help="Reset Session Time",
+ action="store_true")
+ parser_session_create.add_argument("--debug", help="Debug",
+ action="store_true")
+
+ parser_post = subparsers.add_parser('post')
+ parser_post.add_argument("session", help="Session Name")
+ parser_post.add_argument("volume", help="Volume Name")
+ parser_post.add_argument("brick", help="Brick Path")
+ parser_post.add_argument("--debug", help="Debug",
+ action="store_true")
+
+ parser_delete = subparsers.add_parser('delete')
+ parser_delete.add_argument("session", help="Session Name")
+ parser_delete.add_argument("volume", help="Volume Name")
+ parser_delete.add_argument("--debug", help="Debug",
+ action="store_true")
+ return parser.parse_args()
+
+
+if __name__ == "__main__":
+ args = _get_args()
+
+ # globals() will have all the functions already defined.
+ # mode_<args.mode> will be the function name to be called
+ globals()["mode_" + args.mode](args)
diff --git a/tools/glusterfind/src/tool.conf.in b/tools/glusterfind/src/tool.conf.in
new file mode 100644
index 00000000000..a80f4a784c0
--- /dev/null
+++ b/tools/glusterfind/src/tool.conf.in
@@ -0,0 +1,10 @@
+[vars]
+session_dir=@GLUSTERD_WORKDIR@/glusterfind/
+working_dir=@GLUSTERFSD_MISCDIR@/glusterfind/
+log_dir=/var/log/glusterfs/glusterfind/
+nodeagent=@GLUSTERFS_LIBEXECDIR@/glusterfind/nodeagent.py
+brick_ignore_dirs=.glusterfs,.trashcan
+
+[change_detectors]
+changelog=@GLUSTERFS_LIBEXECDIR@/glusterfind/changelog.py
+brickfind=@GLUSTERFS_LIBEXECDIR@/glusterfind/brickfind.py \ No newline at end of file
diff --git a/tools/glusterfind/src/utils.py b/tools/glusterfind/src/utils.py
new file mode 100644
index 00000000000..906ebd8f252
--- /dev/null
+++ b/tools/glusterfind/src/utils.py
@@ -0,0 +1,267 @@
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com/>
+# This file is part of GlusterFS.
+#
+# This file is licensed to you under your choice of the GNU Lesser
+# General Public License, version 3 or any later version (LGPLv3 or
+# later), or the GNU General Public License, version 2 (GPLv2), in all
+# cases as published by the Free Software Foundation.
+
+import sys
+from subprocess import PIPE, Popen
+from errno import EEXIST, ENOENT
+import xml.etree.cElementTree as etree
+import logging
+import os
+from datetime import datetime
+
+ROOT_GFID = "00000000-0000-0000-0000-000000000001"
+DEFAULT_CHANGELOG_INTERVAL = 15
+SPACE_ESCAPE_CHAR = "%20"
+NEWLINE_ESCAPE_CHAR = "%0A"
+PERCENTAGE_ESCAPE_CHAR = "%25"
+
+ParseError = etree.ParseError if hasattr(etree, 'ParseError') else SyntaxError
+cache_data = {}
+
+
+class RecordType(object):
+ NEW = "NEW"
+ MODIFY = "MODIFY"
+ RENAME = "RENAME"
+ DELETE = "DELETE"
+
+
+def cache_output(func):
+ def wrapper(*args, **kwargs):
+ global cache_data
+ if cache_data.get(func.__name__, None) is None:
+ cache_data[func.__name__] = func(*args, **kwargs)
+
+ return cache_data[func.__name__]
+ return wrapper
+
+
+def handle_rm_error(func, path, exc_info):
+ if exc_info[1].errno == ENOENT:
+ return
+
+ raise exc_info[1]
+
+
+def find(path, callback_func=lambda x: True, filter_func=lambda x: True,
+ ignore_dirs=[], subdirs_crawl=True):
+ if path in ignore_dirs:
+ return
+
+ # Capture filter_func output and pass it to callback function
+ filter_result = filter_func(path)
+ if filter_result is not None:
+ callback_func(path, filter_result, os.path.isdir(path))
+
+ for p in os.listdir(path):
+ full_path = os.path.join(path, p)
+
+ is_dir = os.path.isdir(full_path)
+ if is_dir:
+ if subdirs_crawl:
+ find(full_path, callback_func, filter_func, ignore_dirs)
+ else:
+ filter_result = filter_func(full_path)
+ if filter_result is not None:
+ callback_func(full_path, filter_result)
+ else:
+ filter_result = filter_func(full_path)
+ if filter_result is not None:
+ callback_func(full_path, filter_result, is_dir)
+
+
+def output_write(f, path, prefix=".", encode=False, tag="",
+ field_separator=" "):
+ if path == "":
+ return
+
+ if prefix != ".":
+ path = os.path.join(prefix, path)
+
+ if encode:
+ path = quote_plus_space_newline(path)
+
+ # set the field separator
+ FS = "" if tag == "" else field_separator
+
+ f.write("%s%s%s\n" % (tag.strip(), FS, path))
+
+
+def human_time(ts):
+ return datetime.fromtimestamp(float(ts)).strftime("%Y-%m-%d %H:%M:%S")
+
+
+def setup_logger(logger, path, debug=False):
+ if debug:
+ logger.setLevel(logging.DEBUG)
+ else:
+ logger.setLevel(logging.INFO)
+
+ # create the logging file handler
+ fh = logging.FileHandler(path)
+
+ formatter = logging.Formatter("[%(asctime)s] %(levelname)s "
+ "[%(module)s - %(lineno)s:%(funcName)s] "
+ "- %(message)s")
+
+ fh.setFormatter(formatter)
+
+ # add handler to logger object
+ logger.addHandler(fh)
+
+
+def create_file(path, exit_on_err=False, logger=None):
+ """
+ If file exists overwrite. Print error to stderr and exit
+ if exit_on_err is set, else raise the exception. Consumer
+ should handle the exception.
+ """
+ try:
+ open(path, 'w').close()
+ except (OSError, IOError) as e:
+ if exit_on_err:
+ fail("Failed to create file %s: %s" % (path, e), logger=logger)
+ else:
+ raise
+
+
+def mkdirp(path, exit_on_err=False, logger=None):
+ """
+ Try creating required directory structure
+ ignore EEXIST and raise exception for rest of the errors.
+ Print error in stderr and exit if exit_on_err is set, else
+ raise exception.
+ """
+ try:
+ os.makedirs(path)
+ except (OSError, IOError) as e:
+ if e.errno == EEXIST and os.path.isdir(path):
+ pass
+ else:
+ if exit_on_err:
+ fail("Fail to create dir %s: %s" % (path, e), logger=logger)
+ else:
+ raise
+
+
+def fail(msg, code=1, logger=None):
+ """
+ Write error to stderr and exit
+ """
+ if logger:
+ logger.error(msg)
+ sys.stderr.write("%s\n" % msg)
+ sys.exit(code)
+
+
+def execute(cmd, exit_msg=None, logger=None):
+ """
+ If failure_msg is not None then return returncode, out and error.
+ If failure msg is set, write to stderr and exit.
+ """
+ p = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True)
+
+ (out, err) = p.communicate()
+ if p.returncode != 0 and exit_msg is not None:
+ fail("%s: %s" % (exit_msg, err), p.returncode, logger=logger)
+
+ return (p.returncode, out, err)
+
+
+def symlink_gfid_to_path(brick, gfid):
+ """
+ Each directories are symlinked to file named GFID
+ in .glusterfs directory of brick backend. Using readlink
+ we get PARGFID/basename of dir. readlink recursively till
+ we get PARGFID as ROOT_GFID.
+ """
+ if gfid == ROOT_GFID:
+ return ""
+
+ out_path = ""
+ while True:
+ path = os.path.join(brick, ".glusterfs", gfid[0:2], gfid[2:4], gfid)
+ path_readlink = os.readlink(path)
+ pgfid = os.path.dirname(path_readlink)
+ out_path = os.path.join(os.path.basename(path_readlink), out_path)
+ if pgfid == "../../00/00/%s" % ROOT_GFID:
+ break
+ gfid = os.path.basename(pgfid)
+ return out_path
+
+
+@cache_output
+def get_my_uuid():
+ cmd = ["gluster", "system::", "uuid", "get", "--xml"]
+ rc, out, err = execute(cmd)
+
+ if rc != 0:
+ return None
+
+ tree = etree.fromstring(out)
+ uuid_el = tree.find("uuidGenerate/uuid")
+ return uuid_el.text
+
+
+def is_host_local(host_uuid):
+ # Get UUID only if it is not done previously
+ # else Cache the UUID value
+ my_uuid = get_my_uuid()
+ if my_uuid == host_uuid:
+ return True
+
+ return False
+
+
+def get_changelog_rollover_time(volumename):
+ cmd = ["gluster", "volume", "get", volumename,
+ "changelog.rollover-time", "--xml"]
+ rc, out, err = execute(cmd)
+
+ if rc != 0:
+ return DEFAULT_CHANGELOG_INTERVAL
+
+ try:
+ tree = etree.fromstring(out)
+ val = tree.find('volGetopts/Opt/Value').text
+ if val is not None:
+ # Filter the value by split, as it may be 'X (DEFAULT)'
+ # and we only need 'X'
+ return int(val.split(' ', 1)[0])
+ except ParseError:
+ return DEFAULT_CHANGELOG_INTERVAL
+
+
+def output_path_prepare(path, args):
+ """
+ If Prefix is set, joins to Path, removes ending slash
+ and encodes it.
+ """
+ if args.output_prefix != ".":
+ path = os.path.join(args.output_prefix, path)
+ if path.endswith("/"):
+ path = path[0:len(path)-1]
+
+ if args.no_encode:
+ return path
+ else:
+ return quote_plus_space_newline(path)
+
+
+def unquote_plus_space_newline(s):
+ return s.replace(SPACE_ESCAPE_CHAR, " ")\
+ .replace(NEWLINE_ESCAPE_CHAR, "\n")\
+ .replace(PERCENTAGE_ESCAPE_CHAR, "%")
+
+
+def quote_plus_space_newline(s):
+ return s.replace("%", PERCENTAGE_ESCAPE_CHAR)\
+ .replace(" ", SPACE_ESCAPE_CHAR)\
+ .replace("\n", NEWLINE_ESCAPE_CHAR)
diff --git a/tools/setgfid2path/Makefile.am b/tools/setgfid2path/Makefile.am
new file mode 100644
index 00000000000..c14787a80ce
--- /dev/null
+++ b/tools/setgfid2path/Makefile.am
@@ -0,0 +1,5 @@
+SUBDIRS = src
+
+EXTRA_DIST = gluster-setgfid2path.8
+
+man8_MANS = gluster-setgfid2path.8
diff --git a/tools/setgfid2path/gluster-setgfid2path.8 b/tools/setgfid2path/gluster-setgfid2path.8
new file mode 100644
index 00000000000..2e228ca8514
--- /dev/null
+++ b/tools/setgfid2path/gluster-setgfid2path.8
@@ -0,0 +1,54 @@
+
+.\" Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+.\" This file is part of GlusterFS.
+.\"
+.\" This file is licensed to you under your choice of the GNU Lesser
+.\" General Public License, version 3 or any later version (LGPLv3 or
+.\" later), or the GNU General Public License, version 2 (GPLv2), in all
+.\" cases as published by the Free Software Foundation.
+.\"
+.\"
+.TH gluster-setgfid2path 8 "Command line utility to set GFID to Path Xattrs"
+.SH NAME
+gluster-setgfid2path - Gluster tool to set GFID to Path xattrs
+.SH SYNOPSIS
+.B gluster-setgfid2path
+.IR file
+.SH DESCRIPTION
+New feature introduced with Gluster release 3.12, to find full path from GFID.
+This feature can be enabled using Volume set command \fBgluster volume set
+<VOLUME> storage.gfid2path enable\fR
+.PP
+Once \fBgfid2path\fR feature is enabled, it starts recording the necessary
+xattrs required for the feature. But it will not add xattrs for the already
+existing files. This tool provides facility to update the gfid2path xattrs for
+the given file path.
+
+.SH EXAMPLES
+To add xattrs of a single file,
+.PP
+.nf
+.RS
+gluster-setgfid2path /bricks/b1/hello.txt
+.RE
+.fi
+.PP
+To set xattr for all the existing files, run the below script on each bricks.
+.PP
+.nf
+.RS
+BRICK=/bricks/b1
+find $BRICK -type d \\( -path "${BRICK}/.trashcan" -o -path \\
+ "${BRICK}/.glusterfs" \\) -prune -o -type f \\
+ -exec gluster-setgfid2path {} \\;
+.RE
+.fi
+.PP
+.SH SEE ALSO
+.nf
+\fBgluster\fR(8)
+\fR
+.fi
+.SH COPYRIGHT
+.nf
+Copyright(c) 2017 Red Hat, Inc. <http://www.redhat.com>
diff --git a/tools/setgfid2path/src/Makefile.am b/tools/setgfid2path/src/Makefile.am
new file mode 100644
index 00000000000..7316d117070
--- /dev/null
+++ b/tools/setgfid2path/src/Makefile.am
@@ -0,0 +1,16 @@
+gluster_setgfid2pathdir = $(sbindir)
+
+if WITH_SERVER
+gluster_setgfid2path_PROGRAMS = gluster-setgfid2path
+endif
+
+gluster_setgfid2path_SOURCES = main.c
+
+gluster_setgfid2path_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+gluster_setgfid2path_LDFLAGS = $(GF_LDFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
diff --git a/tools/setgfid2path/src/main.c b/tools/setgfid2path/src/main.c
new file mode 100644
index 00000000000..4320a7b2481
--- /dev/null
+++ b/tools/setgfid2path/src/main.c
@@ -0,0 +1,130 @@
+/*
+ Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+#include <stdio.h>
+#include <libgen.h>
+
+#include <glusterfs/common-utils.h>
+#include <glusterfs/syscall.h>
+
+#define MAX_GFID2PATH_LINK_SUP 500
+#define GFID_SIZE 16
+#define GFID_XATTR_KEY "trusted.gfid"
+
+int
+main(int argc, char **argv)
+{
+ int ret = 0;
+ struct stat st;
+ char *dname = NULL;
+ char *bname = NULL;
+ ssize_t ret_size = 0;
+ uuid_t pgfid_raw = {
+ 0,
+ };
+ char pgfid[36 + 1] = "";
+ char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = {
+ 0,
+ };
+ char pgfid_bname[1024] = {
+ 0,
+ };
+ char *key = NULL;
+ char *val = NULL;
+ size_t key_size = 0;
+ size_t val_size = 0;
+ const char *file_path = NULL;
+ char *file_path1 = NULL;
+ char *file_path2 = NULL;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: setgfid2path <file-path>\n");
+ return -1;
+ }
+
+ ret = sys_lstat(argv[1], &st);
+ if (ret != 0) {
+ fprintf(stderr, "Invalid File Path\n");
+ return -1;
+ }
+
+ if (st.st_nlink >= MAX_GFID2PATH_LINK_SUP) {
+ fprintf(stderr,
+ "Number of Hardlink support exceeded. "
+ "max=%d\n",
+ MAX_GFID2PATH_LINK_SUP);
+ return -1;
+ }
+
+ file_path = argv[1];
+ file_path1 = strdup(file_path);
+ file_path2 = strdup(file_path);
+
+ dname = dirname(file_path1);
+ bname = basename(file_path2);
+
+ /* Get GFID of Parent directory */
+ ret_size = sys_lgetxattr(dname, GFID_XATTR_KEY, pgfid_raw, GFID_SIZE);
+ if (ret_size != GFID_SIZE) {
+ fprintf(stderr, "Failed to get GFID of parent directory. dir=%s\n",
+ dname);
+ ret = -1;
+ goto out;
+ }
+
+ /* Convert to UUID format */
+ if (uuid_utoa_r(pgfid_raw, pgfid) == NULL) {
+ fprintf(stderr,
+ "Failed to format GFID of parent directory. "
+ "dir=%s GFID=%s\n",
+ dname, pgfid_raw);
+ ret = -1;
+ goto out;
+ }
+
+ /* Find xxhash for PGFID/BaseName */
+ snprintf(pgfid_bname, sizeof(pgfid_bname), "%s/%s", pgfid, bname);
+ gf_xxh64_wrapper((unsigned char *)pgfid_bname, strlen(pgfid_bname),
+ GF_XXHSUM64_DEFAULT_SEED, xxh64);
+
+ key_size = SLEN(GFID2PATH_XATTR_KEY_PREFIX) + GF_XXH64_DIGEST_LENGTH * 2 +
+ 1;
+ key = alloca(key_size);
+ snprintf(key, key_size, GFID2PATH_XATTR_KEY_PREFIX "%s", xxh64);
+
+ val_size = UUID_CANONICAL_FORM_LEN + NAME_MAX + 2;
+ val = alloca(val_size);
+ snprintf(val, val_size, "%s/%s", pgfid, bname);
+
+ /* Set the Xattr, ignore if same key xattr already exists */
+ ret = sys_lsetxattr(file_path, key, val, strlen(val), XATTR_CREATE);
+ if (ret == -1) {
+ if (errno == EEXIST) {
+ printf("Xattr already exists, ignoring..\n");
+ ret = 0;
+ goto out;
+ }
+
+ fprintf(stderr, "Failed to set gfid2path xattr. errno=%d\n error=%s",
+ errno, strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ printf("Success. file=%s key=%s value=%s\n", file_path, key, val);
+
+out:
+ if (file_path1 != NULL)
+ free(file_path1);
+
+ if (file_path2 != NULL)
+ free(file_path2);
+
+ return ret;
+}
diff --git a/xlators/Makefile.am b/xlators/Makefile.am
index b1643d26ca0..ef20cbb64fa 100644
--- a/xlators/Makefile.am
+++ b/xlators/Makefile.am
@@ -1,3 +1,13 @@
-SUBDIRS = cluster storage protocol performance debug features encryption mount nfs mgmt system
+if BUILD_GNFS
+ GNFS_DIR = nfs
+endif
+
+DIST_SUBDIRS = cluster storage protocol performance debug features \
+ mount nfs mgmt system playground meta
+
+SUBDIRS = cluster storage protocol performance debug features \
+ mount ${GNFS_DIR} mgmt system playground meta
+
+EXTRA_DIST = xlator.sym
CLEANFILES =
diff --git a/xlators/bindings/Makefile.am b/xlators/bindings/Makefile.am
deleted file mode 100644
index f7766580257..00000000000
--- a/xlators/bindings/Makefile.am
+++ /dev/null
@@ -1 +0,0 @@
-SUBDIRS = $(BINDINGS_SUBDIRS)
diff --git a/xlators/bindings/python/src/Makefile.am b/xlators/bindings/python/src/Makefile.am
deleted file mode 100644
index c0b9141c667..00000000000
--- a/xlators/bindings/python/src/Makefile.am
+++ /dev/null
@@ -1,19 +0,0 @@
-
-xlator_PROGRAMS = python.so
-
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/bindings
-
-python_PYTHON = gluster.py glustertypes.py glusterstack.py
-
-pythondir = $(xlatordir)/python
-
-python_so_SOURCES = python.c
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles \
- $(PYTHON_CPPLAGS) -DGLUSTER_PYTHON_PATH=\"$(pythondir)\"
-
-AM_LDFLAGS = $(PYTHON_LDFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/bindings/python/src/gluster.py b/xlators/bindings/python/src/gluster.py
deleted file mode 100644
index ee0eb131011..00000000000
--- a/xlators/bindings/python/src/gluster.py
+++ /dev/null
@@ -1,47 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-from glustertypes import *
-from glusterstack import *
-import sys
-import inspect
-
-libglusterfs = CDLL("libglusterfs.so")
-_gf_log = libglusterfs._gf_log
-_gf_log.restype = c_int32
-_gf_log.argtypes = [c_char_p, c_char_p, c_char_p, c_int32, c_int, c_char_p]
-
-gf_log_loglevel = c_int.in_dll(libglusterfs, "gf_log_loglevel")
-
-GF_LOG_NONE = 0
-GF_LOG_CRITICAL = 1
-GF_LOG_ERROR = 2
-GF_LOG_WARNING = 3
-GF_LOG_DEBUG = 4
-
-def gf_log(module, level, fmt, *params):
- if level <= gf_log_loglevel:
- frame = sys._getframe(1)
- _gf_log(module, frame.f_code.co_filename, frame.f_code.co_name,
- frame.f_lineno, level, fmt, *params)
-
-class ComplexTranslator(object):
- def __init__(self, xlator):
- self.xlator = xlator_t.from_address(xlator)
-
- def __getattr__(self, item):
- return getattr(self.xlator, item)
diff --git a/xlators/bindings/python/src/glusterstack.py b/xlators/bindings/python/src/glusterstack.py
deleted file mode 100644
index ba24c81652e..00000000000
--- a/xlators/bindings/python/src/glusterstack.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-from glustertypes import *
-
-libc = CDLL("libc.so.6")
-calloc = libc.calloc
-calloc.argtypes = [c_int, c_int]
-calloc.restype = c_void_p
-
-# TODO: Can these be done in C somehow?
-def stack_wind(frame, rfn, obj, fn, *params):
- """Frame is a frame object"""
- _new = cast(calloc(1, sizeof(call_frame_t)), POINTER(call_frame_t))
- _new[0].root = frame.root
- _new[0].next = frame.root[0].frames.next
- _new[0].prev = pointer(frame.root[0].frames)
- if frame.root[0].frames.next:
- frame.root[0].frames.next[0].prev = _new
- frame.root[0].frames.next = _new
- _new[0].this = obj
- # TODO: Type checking like tmp_cbk?
- _new[0].ret = rfn
- _new[0].parent = pointer(frame)
- _new[0].cookie = cast(_new, c_void_p)
- # TODO: Initialize lock
- #_new.lock.init()
- frame.ref_count += 1
- fn(_new, obj, *params)
-
-def stack_unwind(frame, *params):
- """Frame is a frame object"""
- fn = frame[0].ret
- parent = frame[0].parent[0]
- parent.ref_count -= 1
-
- op_ret = params[0]
- op_err = params[1]
- params = params[2:]
- fn(parent, call_frame_t.from_address(frame[0].cookie), parent.this,
- op_ret, op_err, *params)
diff --git a/xlators/bindings/python/src/glustertypes.py b/xlators/bindings/python/src/glustertypes.py
deleted file mode 100644
index e9069d07c72..00000000000
--- a/xlators/bindings/python/src/glustertypes.py
+++ /dev/null
@@ -1,167 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-from ctypes import *
-import collections
-
-#
-# Forward declaration of some gluster types
-#
-class call_frame_t(Structure):
- pass
-
-class call_ctx_t(Structure):
- pass
-
-class call_pool_t(Structure):
- pass
-
-class xlator_t(Structure):
- def _getFirstChild(self):
- return self.children[0].xlator
- firstChild = property(_getFirstChild)
-
-class xlator_list_t(Structure):
- pass
-
-class xlator_fops(Structure):
- pass
-
-class xlator_mops(Structure):
- pass
-
-class glusterfs_ctx_t(Structure):
- pass
-
-class list_head(Structure):
- pass
-
-class dict_t(Structure):
- pass
-
-class inode_table_t(Structure):
- pass
-
-class fd_t(Structure):
- pass
-
-class iovec(Structure):
- _fields_ = [
- ("iov_base", c_void_p),
- ("iov_len", c_size_t),
- ]
-
- def __init__(self, s):
- self.iov_base = cast(c_char_p(s), c_void_p)
- self.iov_len = len(s)
-
- def getBytes(self):
- return string_at(self.iov_base, self.iov_len)
-
-# This is a pthread_spinlock_t
-# TODO: what happens to volatile-ness?
-gf_lock_t = c_int
-
-uid_t = c_uint32
-gid_t = c_uint32
-pid_t = c_int32
-
-off_t = c_int64
-
-#
-# Function pointer types
-#
-ret_fn_t = CFUNCTYPE(c_int32, POINTER(call_frame_t), POINTER(call_frame_t),
- POINTER(xlator_t), c_int32, c_int32)
-
-fini_fn_t = CFUNCTYPE(None, POINTER(xlator_t))
-init_fn_t = CFUNCTYPE(c_int32, POINTER(xlator_t))
-event_notify_fn_t = CFUNCTYPE(c_int32, POINTER(xlator_t), c_int32, c_void_p)
-
-list_head._fields_ = [
- ("next", POINTER(list_head)),
- ("prev", POINTER(list_head)),
- ]
-
-call_frame_t._fields_ = [
- ("root", POINTER(call_ctx_t)),
- ("parent", POINTER(call_frame_t)),
- ("next", POINTER(call_frame_t)),
- ("prev", POINTER(call_frame_t)),
- ("local", c_void_p),
- ("this", POINTER(xlator_t)),
- ("ret", ret_fn_t),
- ("ref_count", c_int32),
- ("lock", gf_lock_t),
- ("cookie", c_void_p),
- ("op", c_int32),
- ("type", c_int8),
- ]
-
-call_ctx_t._fields_ = [
- ("all_frames", list_head),
- ("trans", c_void_p),
- ("pool", call_pool_t),
- ("unique", c_uint64),
- ("state", c_void_p),
- ("uid", uid_t),
- ("gid", gid_t),
- ("pid", pid_t),
- ("frames", call_frame_t),
- ("req_refs", POINTER(dict_t)),
- ("rsp_refs", POINTER(dict_t)),
- ]
-
-xlator_t._fields_ = [
- ("name", c_char_p),
- ("type", c_char_p),
- ("next", POINTER(xlator_t)),
- ("prev", POINTER(xlator_t)),
- ("parent", POINTER(xlator_t)),
- ("children", POINTER(xlator_list_t)),
- ("fops", POINTER(xlator_fops)),
- ("mops", POINTER(xlator_mops)),
- ("fini", fini_fn_t),
- ("init", init_fn_t),
- ("notify", event_notify_fn_t),
- ("options", POINTER(dict_t)),
- ("ctx", POINTER(glusterfs_ctx_t)),
- ("itable", POINTER(inode_table_t)),
- ("ready", c_char),
- ("private", c_void_p),
- ]
-
-xlator_list_t._fields_ = [
- ("xlator", POINTER(xlator_t)),
- ("next", POINTER(xlator_list_t)),
- ]
-
-fop_functions = collections.defaultdict(lambda: c_void_p)
-fop_function_names = ['lookup', 'forget', 'stat', 'fstat', 'chmod', 'fchmod',
- 'chown', 'fchown', 'truncate', 'ftruncate', 'utimens', 'access',
- 'readlink', 'mknod', 'mkdir', 'unlink', 'rmdir', 'symlink',
- 'rename', 'link', 'create', 'open', 'readv', 'writev', 'flush',
- 'close', 'fsync', 'opendir', 'readdir', 'closedir', 'fsyncdir',
- 'statfs', 'setxattr', 'getxattr', 'removexattr', 'lk', 'writedir',
- # TODO: Call backs?
- ]
-
-fop_writev_t = CFUNCTYPE(c_int32, POINTER(call_frame_t), POINTER(xlator_t),
- POINTER(fd_t), POINTER(iovec), c_int32,
- off_t)
-
-fop_functions['writev'] = fop_writev_t
-xlator_fops._fields_ = [(f, fop_functions[f]) for f in fop_function_names]
diff --git a/xlators/bindings/python/src/python.c b/xlators/bindings/python/src/python.c
deleted file mode 100644
index 3310a211513..00000000000
--- a/xlators/bindings/python/src/python.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- Copyright (c) 2007-2010 Chris AtLee <chris@atlee.ca>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <Python.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-
-typedef struct
-{
- char *scriptname;
- PyObject *pXlator;
- PyObject *pScriptModule;
- PyObject *pGlusterModule;
- PyThreadState *pInterp;
-
- PyObject *pFrameType, *pVectorType, *pFdType;
-} python_private_t;
-
-int32_t
-python_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset)
-{
- python_private_t *priv = (python_private_t *)this->private;
- gf_log("python", GF_LOG_DEBUG, "In writev");
- if (PyObject_HasAttrString(priv->pXlator, "writev"))
- {
-
- PyObject *retval = PyObject_CallMethod(priv->pXlator, "writev",
- "O O O i l",
- PyObject_CallMethod(priv->pFrameType, "from_address", "O&", PyLong_FromVoidPtr, frame),
- PyObject_CallMethod(priv->pFdType, "from_address", "O&", PyLong_FromVoidPtr, fd),
- PyObject_CallMethod(priv->pVectorType, "from_address", "O&", PyLong_FromVoidPtr, vector),
- count,
- offset);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- }
- Py_XDECREF(retval);
- }
- else
- {
- return default_writev(frame, this, fd, vector, count, offset);
- }
- return 0;
-}
-
-struct xlator_fops fops = {
- .writev = python_writev
-};
-
-static PyObject *
-AnonModule_FromFile (const char* fname)
-{
- // Get the builtins
- PyThreadState* pThread = PyThreadState_Get();
- PyObject *pBuiltins = pThread->interp->builtins;
-
- if (PyErr_Occurred())
- {
- PyErr_Print();
- return NULL;
- }
-
- // Create a new dictionary for running code in
- PyObject *pModuleDict = PyDict_New();
- PyDict_SetItemString(pModuleDict, "__builtins__", pBuiltins);
- Py_INCREF(pBuiltins);
-
- // Run the file in the new context
- FILE* fp = fopen(fname, "r");
- PyRun_File(fp, fname, Py_file_input, pModuleDict, pModuleDict);
- fclose(fp);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- return NULL;
- }
-
- // Create an object to hold the new context
- PyRun_String("class ModuleWrapper(object):\n\tpass\n", Py_single_input, pModuleDict, pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- return NULL;
- }
- PyObject *pModule = PyRun_String("ModuleWrapper()", Py_eval_input, pModuleDict, pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- Py_XDECREF(pModule);
- return NULL;
- }
-
- // Set the new context's dictionary to the one we used to run the code
- // inside
- PyObject_SetAttrString(pModule, "__dict__", pModuleDict);
- if (PyErr_Occurred())
- {
- PyErr_Print();
- Py_DECREF(pModuleDict);
- Py_DECREF(pBuiltins);
- Py_DECREF(pModule);
- return NULL;
- }
-
- return pModule;
-}
-
-int32_t
-init (xlator_t *this)
-{
- // This is ok to call more than once per process
- Py_InitializeEx(0);
-
- if (!this->children) {
- gf_log ("python", GF_LOG_ERROR,
- "FATAL: python should have exactly one child");
- return -1;
- }
-
- python_private_t *priv = CALLOC (sizeof (python_private_t), 1);
- ERR_ABORT (priv);
-
- data_t *scriptname = dict_get (this->options, "scriptname");
- if (scriptname) {
- priv->scriptname = data_to_str(scriptname);
- } else {
- gf_log("python", GF_LOG_ERROR,
- "FATAL: python requires the scriptname parameter");
- return -1;
- }
-
- priv->pInterp = Py_NewInterpreter();
-
- // Adjust python's path
- PyObject *syspath = PySys_GetObject("path");
- PyObject *path = PyString_FromString(GLUSTER_PYTHON_PATH);
- PyList_Append(syspath, path);
- Py_DECREF(path);
-
- gf_log("python", GF_LOG_DEBUG,
- "Loading gluster module");
-
- priv->pGlusterModule = PyImport_ImportModule("gluster");
- if (PyErr_Occurred())
- {
- PyErr_Print();
- return -1;
- }
-
- priv->pFrameType = PyObject_GetAttrString(priv->pGlusterModule, "call_frame_t");
- priv->pFdType = PyObject_GetAttrString(priv->pGlusterModule, "fd_t");
- priv->pVectorType = PyObject_GetAttrString(priv->pGlusterModule, "iovec");
-
- gf_log("python", GF_LOG_DEBUG, "Loading script...%s", priv->scriptname);
-
- priv->pScriptModule = AnonModule_FromFile(priv->scriptname);
- if (!priv->pScriptModule || PyErr_Occurred())
- {
- gf_log("python", GF_LOG_ERROR, "Error loading %s", priv->scriptname);
- PyErr_Print();
- return -1;
- }
-
- if (!PyObject_HasAttrString(priv->pScriptModule, "xlator"))
- {
- gf_log("python", GF_LOG_ERROR, "%s does not have a xlator attribute", priv->scriptname);
- return -1;
- }
- gf_log("python", GF_LOG_DEBUG, "Instantiating translator");
- priv->pXlator = PyObject_CallMethod(priv->pScriptModule, "xlator", "O&",
- PyLong_FromVoidPtr, this);
- if (PyErr_Occurred() || !priv->pXlator)
- {
- PyErr_Print();
- return -1;
- }
-
- this->private = priv;
-
- gf_log ("python", GF_LOG_DEBUG, "python xlator loaded");
- return 0;
-}
-
-void
-fini (xlator_t *this)
-{
- python_private_t *priv = (python_private_t*)(this->private);
- Py_DECREF(priv->pXlator);
- Py_DECREF(priv->pScriptModule);
- Py_DECREF(priv->pGlusterModule);
- Py_DECREF(priv->pFrameType);
- Py_DECREF(priv->pFdType);
- Py_DECREF(priv->pVectorType);
- Py_EndInterpreter(priv->pInterp);
- return;
-}
diff --git a/xlators/bindings/python/src/testxlator.py b/xlators/bindings/python/src/testxlator.py
deleted file mode 100644
index 507455c856a..00000000000
--- a/xlators/bindings/python/src/testxlator.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Copyright (c) 2007 Chris AtLee <chris@atlee.ca>
-# This file is part of GlusterFS.
-#
-# GlusterFS is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3 of the License,
-# or (at your option) any later version.
-#
-# GlusterFS is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see
-# <http://www.gnu.org/licenses/>.
-
-"""
-This is a test translator written in python.
-
-Important things to note:
- This file must be import-able from glusterfsd. This probably means
- setting PYTHONPATH to where this file is located.
-
- This file must have a top-level xlator class object that will be
- used to instantiate individual translators.
-"""
-from gluster import *
-
-class MyXlator(ComplexTranslator):
- name = "MyXlator"
- def writev_cbk(self, frame, cookie, op_ret, op_errno, buf):
- stack_unwind(frame, op_ret, op_errno, buf)
- return 0
-
- def writev(self, frame, fd, vector, count, offset):
- gf_log(self.name, GF_LOG_WARNING, "writev %i bytes", vector.iov_len)
- # TODO: Use cookie to pass this to writev_cbk
- old_count = vector.iov_len
-
- data = vector.getBytes().encode("zlib")
-
- vector = iovec(data)
- gf_log(self.name, GF_LOG_WARNING, "writev %i bytes", vector.iov_len)
-
- @ret_fn_t
- def rfn(frame, prev, this, op_ret, op_errno, *params):
- if len(params) == 0:
- params = [0]
- return self.writev_cbk(frame, prev, old_count, op_errno, *params)
-
- stack_wind(frame, rfn, self.firstChild,
- self.firstChild[0].fops[0].writev, fd, vector, count, offset)
- return 0
-
-xlator = MyXlator
diff --git a/xlators/cluster/Makefile.am b/xlators/cluster/Makefile.am
index 0990822a7d3..8e067d5ab58 100644
--- a/xlators/cluster/Makefile.am
+++ b/xlators/cluster/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = stripe afr dht
+SUBDIRS = afr dht ec
CLEANFILES =
diff --git a/xlators/cluster/afr/src/Makefile.am b/xlators/cluster/afr/src/Makefile.am
index 95db5dd9645..610819b28fc 100644
--- a/xlators/cluster/afr/src/Makefile.am
+++ b/xlators/cluster/afr/src/Makefile.am
@@ -1,35 +1,35 @@
-xlator_LTLIBRARIES = afr.la pump.la
+xlator_LTLIBRARIES = afr.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
afr_common_source = afr-dir-read.c afr-dir-write.c afr-inode-read.c \
- afr-inode-write.c afr-open.c afr-transaction.c afr-self-heal-data.c \
- afr-self-heal-common.c afr-self-heal-metadata.c afr-self-heal-entry.c \
- afr-self-heal-algorithm.c afr-lk-common.c afr-self-heald.c \
+ afr-inode-write.c afr-open.c afr-transaction.c afr-lk-common.c \
+ afr-read-txn.c \
$(top_builddir)/xlators/lib/src/libxlator.c
-afr_la_LDFLAGS = -module -avoidversion
-afr_la_SOURCES = $(afr_common_source) afr.c
-afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+AFR_SELFHEAL_SOURCES = afr-self-heal-common.c afr-self-heal-data.c \
+ afr-self-heal-entry.c afr-self-heal-metadata.c afr-self-heald.c \
+ afr-self-heal-name.c
-pump_la_LDFLAGS = -module -avoidversion
-pump_la_SOURCES = $(afr_common_source) pump.c
-pump_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+afr_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+afr_la_SOURCES = $(afr_common_source) $(AFR_SELFHEAL_SOURCES) afr.c
+afr_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = afr.h afr-transaction.h afr-inode-write.h afr-inode-read.h \
- afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-self-heal-common.h \
- afr-self-heal-algorithm.h pump.h afr-mem-types.h afr-common.c \
- afr-self-heald.h $(top_builddir)/xlators/lib/src/libxlator.h \
- $(top_builddir)/glusterfsd/src/glusterfsd.h
+ afr-dir-read.h afr-dir-write.h afr-self-heal.h afr-mem-types.h \
+ afr-common.c afr-self-heald.h \
+ $(top_builddir)/xlators/lib/src/libxlator.h afr-messages.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
-I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \
- -I$(top_srcdir)/rpc/rpc-lib/src -shared -nostartfiles $(GF_CFLAGS)
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/replicate.so
- rm -f $(DESTDIR)$(xlatordir)/pump.so
install-data-hook:
ln -sf afr.so $(DESTDIR)$(xlatordir)/replicate.so
diff --git a/xlators/cluster/afr/src/afr-common.c b/xlators/cluster/afr/src/afr-common.c
index 09b0195cd0b..032ab5c8001 100644
--- a/xlators/cluster/afr/src/afr-common.c
+++ b/xlators/cluster/afr/src/afr-common.c
@@ -15,29 +15,20 @@
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "statedump.h"
-#include "inode.h"
-
-#include "fd.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/hashfn.h>
+#include <glusterfs/list.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/events.h>
+#include <glusterfs/upcall-utils.h>
#include "afr-inode-read.h"
#include "afr-inode-write.h"
@@ -45,4234 +36,7843 @@
#include "afr-dir-write.h"
#include "afr-transaction.h"
#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
#include "afr-self-heald.h"
-#include "pump.h"
+#include "afr-messages.h"
-#define AFR_ICTX_OPENDIR_DONE_MASK 0x0000000200000000ULL
-#define AFR_ICTX_SPLIT_BRAIN_MASK 0x0000000100000000ULL
-#define AFR_ICTX_READ_CHILD_MASK 0x00000000FFFFFFFFULL
+int32_t
+afr_quorum_errno(afr_private_t *priv)
+{
+ return ENOTCONN;
+}
-int
-afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
- gf_boolean_t fail_conflict);
-void
-afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count)
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid)
{
- int i = 0;
+ if (!__is_root_gfid(pargfid)) {
+ return _gf_false;
+ }
- for (i = 0; i < child_count; i++)
- dst[i] = src[i];
+ if (strcmp(name, GF_REPLICATE_TRASH_DIR) == 0) {
+ /*For backward compatibility /.landfill is private*/
+ return _gf_true;
+ }
+
+ if (pid == GF_CLIENT_PID_GSYNCD) {
+ /*geo-rep needs to create/sync private directory on slave because
+ * it appears in changelog*/
+ return _gf_false;
+ }
+
+ if (pid == GF_CLIENT_PID_GLFS_HEAL || pid == GF_CLIENT_PID_SELF_HEALD) {
+ if (strcmp(name, priv->anon_inode_name) == 0) {
+ /* anonymous-inode dir is private*/
+ return _gf_true;
+ }
+ } else {
+ if (strncmp(name, AFR_ANON_DIR_PREFIX, strlen(AFR_ANON_DIR_PREFIX)) ==
+ 0) {
+ /* anonymous-inode dir prefix is private for geo-rep to work*/
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
}
void
-afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path)
+afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies)
{
- int i = 0;
- afr_private_t *priv = NULL;
- int ret = 0;
-
- priv = this->private;
+ int i = 0;
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_set_uint64 (xattr_req, priv->pending_key[i],
- 3 * sizeof(int32_t));
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- path, priv->pending_key[i]);
- /* 3 = data+metadata+entry */
- }
- ret = dict_set_int32 (xattr_req, GF_GFIDLESS_LOOKUP, 1);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG, "%s: failed to set gfidless "
- "lookup", path);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].op_ret == 0) {
+ replies[i] = 1;
+ } else {
+ replies[i] = 0;
}
+ }
}
int
-afr_lookup_xattr_req_prepare (afr_local_t *local, xlator_t *this,
- dict_t *xattr_req, loc_t *loc, void **gfid_req)
+afr_fav_child_reset_sink_xattrs(void *opaque);
+
+int
+afr_fav_child_reset_sink_xattrs_cbk(int ret, call_frame_t *frame, void *opaque);
+
+static void
+afr_discover_done(call_frame_t *frame, xlator_t *this);
+
+int
+afr_dom_lock_acquire_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- int ret = -ENOMEM;
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = (long)cookie;
- GF_ASSERT (gfid_req);
+ local->cont.lk.dom_lock_op_ret[i] = op_ret;
+ local->cont.lk.dom_lock_op_errno[i] = op_errno;
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to acquire %s on %s",
+ uuid_utoa(local->fd->inode->gfid), AFR_LK_HEAL_DOM,
+ priv->children[i]->name);
+ } else {
+ local->cont.lk.dom_locked_nodes[i] = 1;
+ }
- *gfid_req = NULL;
- local->xattr_req = dict_new ();
- if (!local->xattr_req)
- goto out;
- if (xattr_req)
- dict_copy (xattr_req, local->xattr_req);
+ syncbarrier_wake(&local->barrier);
- afr_xattr_req_prepare (this, local->xattr_req, loc->path);
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_INODELK_COUNT);
- }
- ret = dict_set_uint64 (local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_ENTRYLK_COUNT);
- }
+ return 0;
+}
- ret = dict_set_uint32 (local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: Unable to set dict value for %s",
- loc->path, GLUSTERFS_PARENT_ENTRYLK);
+int
+afr_dom_lock_acquire(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+ int i = 0;
+
+ priv = frame->this->private;
+ local = frame->local;
+ local->cont.lk.dom_locked_nodes = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.locked_nodes),
+ gf_afr_mt_char);
+ if (!local->cont.lk.dom_locked_nodes) {
+ return -ENOMEM;
+ }
+ local->cont.lk.dom_lock_op_ret = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.dom_lock_op_ret),
+ gf_afr_mt_int32_t);
+ if (!local->cont.lk.dom_lock_op_ret) {
+ return -ENOMEM; /* CALLOC'd members are freed in afr_local_cleanup. */
+ }
+ local->cont.lk.dom_lock_op_errno = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.dom_lock_op_errno),
+ gf_afr_mt_int32_t);
+ if (!local->cont.lk.dom_lock_op_errno) {
+ return -ENOMEM; /* CALLOC'd members are freed in afr_local_cleanup. */
+ }
+ flock.l_type = F_WRLCK;
+
+ AFR_ONALL(frame, afr_dom_lock_acquire_cbk, finodelk, AFR_LK_HEAL_DOM,
+ local->fd, F_SETLK, &flock, NULL);
+
+ if (!afr_has_quorum(local->cont.lk.dom_locked_nodes, frame->this, NULL))
+ goto blocking_lock;
+
+ /*If any of the bricks returned EAGAIN, we still need blocking locks.*/
+ if (AFR_COUNT(local->cont.lk.dom_locked_nodes, priv->child_count) !=
+ priv->child_count) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lk.dom_lock_op_ret[i] == -1 &&
+ local->cont.lk.dom_lock_op_errno[i] == EAGAIN)
+ goto blocking_lock;
}
+ }
- ret = dict_get_ptr (local->xattr_req, "gfid-req", gfid_req);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: failed to get the gfid from dict", loc->path);
- *gfid_req = NULL;
- } else {
- if (loc->parent != NULL)
- dict_del (local->xattr_req, "gfid-req");
- }
- ret = 0;
-out:
- return ret;
-}
+ return 0;
-void
-afr_lookup_save_gfid (uuid_t dst, void* new, const loc_t *loc)
-{
- inode_t *inode = NULL;
+blocking_lock:
+ afr_dom_lock_release(frame);
+ AFR_ONALL(frame, afr_dom_lock_acquire_cbk, finodelk, AFR_LK_HEAL_DOM,
+ local->fd, F_SETLKW, &flock, NULL);
+ if (!afr_has_quorum(local->cont.lk.dom_locked_nodes, frame->this, NULL)) {
+ afr_dom_lock_release(frame);
+ return -afr_quorum_errno(priv);
+ }
- inode = loc->inode;
- if (inode && !uuid_is_null (inode->gfid))
- uuid_copy (dst, inode->gfid);
- else if (!uuid_is_null (loc->gfid))
- uuid_copy (dst, loc->gfid);
- else if (new && !uuid_is_null (new))
- uuid_copy (dst, new);
+ return 0;
}
int
-afr_errno_count (int32_t *children, int *child_errno,
- unsigned int child_count, int32_t op_errno)
-{
- int i = 0;
- int errno_count = 0;
- int child = 0;
-
- for (i = 0; i < child_count; i++) {
- if (children) {
- child = children[i];
- if (child == -1)
- break;
- } else {
- child = i;
- }
- if (child_errno[child] == op_errno)
- errno_count++;
- }
- return errno_count;
+afr_dom_lock_release_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = (long)cookie;
+
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to release %s on %s", local->loc.path,
+ AFR_LK_HEAL_DOM, priv->children[i]->name);
+ }
+ local->cont.lk.dom_locked_nodes[i] = 0;
+
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
}
-int32_t
-afr_set_dict_gfid (dict_t *dict, uuid_t gfid)
-{
- int ret = 0;
- uuid_t *pgfid = NULL;
+void
+afr_dom_lock_release(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+
+ local = frame->local;
+ priv = frame->this->private;
+ locked_on = local->cont.lk.dom_locked_nodes;
+ if (AFR_COUNT(locked_on, priv->child_count) == 0)
+ return;
+ flock.l_type = F_UNLCK;
- GF_ASSERT (gfid);
+ AFR_ONLIST(locked_on, frame, afr_dom_lock_release_cbk, finodelk,
+ AFR_LK_HEAL_DOM, local->fd, F_SETLK, &flock, NULL);
- pgfid = GF_CALLOC (1, sizeof (uuid_t), gf_common_mt_char);
- if (!pgfid) {
- ret = -1;
- goto out;
- }
+ return;
+}
- uuid_copy (*pgfid, gfid);
+static void
+afr_lk_heal_info_cleanup(afr_lk_heal_info_t *info)
+{
+ if (!info)
+ return;
+ if (info->xdata_req)
+ dict_unref(info->xdata_req);
+ if (info->fd)
+ fd_unref(info->fd);
+ GF_FREE(info->locked_nodes);
+ GF_FREE(info->child_up_event_gen);
+ GF_FREE(info->child_down_event_gen);
+ GF_FREE(info);
+}
- ret = dict_set_dynptr (dict, "gfid-req", pgfid, sizeof (uuid_t));
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR, "gfid set failed");
+static int
+afr_add_lock_to_saved_locks(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+ afr_lk_heal_info_t *info = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -ENOMEM;
+
+ info = GF_CALLOC(sizeof(*info), 1, gf_afr_mt_lk_heal_info_t);
+ if (!info) {
+ goto cleanup;
+ }
+ INIT_LIST_HEAD(&info->pos);
+ info->fd = fd_ref(local->fd);
+ info->cmd = local->cont.lk.cmd;
+ info->pid = frame->root->pid;
+ info->flock = local->cont.lk.user_flock;
+ info->xdata_req = dict_copy_with_ref(local->xdata_req, NULL);
+ if (!info->xdata_req) {
+ goto cleanup;
+ }
+ info->lk_owner = frame->root->lk_owner;
+ info->locked_nodes = GF_MALLOC(
+ sizeof(*info->locked_nodes) * priv->child_count, gf_afr_mt_char);
+ if (!info->locked_nodes) {
+ goto cleanup;
+ }
+ memcpy(info->locked_nodes, local->cont.lk.locked_nodes,
+ sizeof(*info->locked_nodes) * priv->child_count);
+ info->child_up_event_gen = GF_CALLOC(sizeof(*info->child_up_event_gen),
+ priv->child_count, gf_afr_mt_int32_t);
+ if (!info->child_up_event_gen) {
+ goto cleanup;
+ }
+ info->child_down_event_gen = GF_CALLOC(sizeof(*info->child_down_event_gen),
+ priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!info->child_down_event_gen) {
+ goto cleanup;
+ }
+
+ LOCK(&local->fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(local->fd, this);
+ if (fd_ctx)
+ fd_ctx->lk_heal_info = info;
+ }
+ UNLOCK(&local->fd->lock);
+ if (!fd_ctx) {
+ goto cleanup;
+ }
+
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&info->pos, &priv->saved_locks);
+ }
+ UNLOCK(&priv->lock);
+
+ return 0;
+cleanup:
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to add lock to healq",
+ uuid_utoa(local->fd->inode->gfid));
+ if (info) {
+ afr_lk_heal_info_cleanup(info);
+ if (fd_ctx) {
+ LOCK(&local->fd->lock);
+ {
+ fd_ctx->lk_heal_info = NULL;
+ }
+ UNLOCK(&local->fd->lock);
+ }
+ }
+ return ret;
+}
+static int
+afr_remove_lock_from_saved_locks(afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ struct gf_flock flock = local->cont.lk.user_flock;
+ afr_lk_heal_info_t *info = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = -EINVAL;
+
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx || !fd_ctx->lk_heal_info) {
+ goto out;
+ }
+
+ info = fd_ctx->lk_heal_info;
+ if ((info->flock.l_start != flock.l_start) ||
+ (info->flock.l_whence != flock.l_whence) ||
+ (info->flock.l_len != flock.l_len)) {
+ /*TODO: Compare lkowners too.*/
+ goto out;
+ }
+
+ LOCK(&priv->lock);
+ {
+ list_del(&fd_ctx->lk_heal_info->pos);
+ }
+ UNLOCK(&priv->lock);
+
+ afr_lk_heal_info_cleanup(info);
+ fd_ctx->lk_heal_info = NULL;
+ ret = 0;
out:
- if (ret && pgfid)
- GF_FREE (pgfid);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_LK_HEAL_DOM,
+ "%s: Failed to remove lock from healq",
+ uuid_utoa(local->fd->inode->gfid));
+ return ret;
+}
- return ret;
+int
+afr_lock_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "Failed to heal lock on child %d for %s", i,
+ uuid_utoa(local->fd->inode->gfid));
+ }
+ syncbarrier_wake(&local->barrier);
+ return 0;
}
-afr_inode_ctx_t*
-afr_inode_ctx_get_from_addr (uint64_t addr, int32_t child_count)
+int
+afr_getlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- int ret = -1;
- afr_inode_ctx_t *ctx = NULL;
- size_t size = 0;
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
- GF_ASSERT (child_count > 0);
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "Failed getlk for %s", uuid_utoa(local->fd->inode->gfid));
+ } else {
+ local->cont.lk.getlk_rsp[i] = *lock;
+ }
- if (!addr) {
- ctx = GF_CALLOC (1, sizeof (*ctx),
- gf_afr_mt_inode_ctx_t);
- if (!ctx)
- goto out;
- size = sizeof (*ctx->fresh_children);
- ctx->fresh_children = GF_CALLOC (child_count, size,
- gf_afr_mt_int32_t);
- if (!ctx->fresh_children)
- goto out;
- } else {
- ctx = (afr_inode_ctx_t*) (long) addr;
- }
- ret = 0;
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+static gf_boolean_t
+afr_does_lk_owner_match(call_frame_t *frame, afr_private_t *priv,
+ afr_lk_heal_info_t *info)
+{
+ int i = 0;
+ afr_local_t *local = frame->local;
+ struct gf_flock flock = {
+ 0,
+ };
+ gf_boolean_t ret = _gf_true;
+ char *wind_on = alloca0(priv->child_count);
+ unsigned char *success_replies = alloca0(priv->child_count);
+ local->cont.lk.getlk_rsp = GF_CALLOC(sizeof(*local->cont.lk.getlk_rsp),
+ priv->child_count, gf_afr_mt_gf_lock);
+
+ flock = info->flock;
+ for (i = 0; i < priv->child_count; i++) {
+ if (info->locked_nodes[i])
+ wind_on[i] = 1;
+ }
+
+ AFR_ONLIST(wind_on, frame, afr_getlk_cbk, lk, info->fd, F_GETLK, &flock,
+ info->xdata_req);
+
+ afr_fill_success_replies(local, priv, success_replies);
+ if (AFR_COUNT(success_replies, priv->child_count) == 0) {
+ ret = _gf_false;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid || local->replies[i].op_ret != 0)
+ continue;
+ if (local->cont.lk.getlk_rsp[i].l_type == F_UNLCK)
+ continue;
+ /*TODO: Do we really need to compare lkowner if F_UNLCK is true?*/
+ if (!is_same_lkowner(&local->cont.lk.getlk_rsp[i].l_owner,
+ &info->lk_owner)) {
+ ret = _gf_false;
+ break;
+ }
+ }
out:
- if (ret && ctx) {
- if (ctx->fresh_children)
- GF_FREE (ctx->fresh_children);
- GF_FREE (ctx);
- ctx = NULL;
- }
- return ctx;
+ afr_local_replies_wipe(local, priv);
+ GF_FREE(local->cont.lk.getlk_rsp);
+ local->cont.lk.getlk_rsp = NULL;
+ return ret;
}
-void
-afr_inode_get_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
+static void
+afr_mark_fd_bad(fd_t *fd, xlator_t *this)
{
- GF_ASSERT (inode);
- GF_ASSERT (params);
+ afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
- afr_inode_ctx_t *ctx = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- uint64_t ctx_addr = 0;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx_addr);
- if (ret < 0)
- goto unlock;
- ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
- if (!ctx)
- goto unlock;
- switch (params->op) {
- case AFR_INODE_GET_READ_CTX:
- fresh_children = params->u.read_ctx.children;
- read_child = (int32_t)(ctx->masks &
- AFR_ICTX_READ_CHILD_MASK);
- params->u.read_ctx.read_child = read_child;
- if (!fresh_children)
- goto unlock;
- for (i = 0; i < priv->child_count; i++)
- fresh_children[i] = ctx->fresh_children[i];
- break;
- case AFR_INODE_GET_OPENDIR_DONE:
- params->u.value = _gf_false;
- if (ctx->masks & AFR_ICTX_OPENDIR_DONE_MASK)
- params->u.value = _gf_true;
- break;
- case AFR_INODE_GET_SPLIT_BRAIN:
- params->u.value = _gf_false;
- if (ctx->masks & AFR_ICTX_SPLIT_BRAIN_MASK)
- params->u.value = _gf_true;
- ;
- break;
- default:
- GF_ASSERT (0);
- break;
- }
+ if (!fd)
+ return;
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(fd, this);
+ if (fd_ctx) {
+ fd_ctx->is_fd_bad = _gf_true;
+ fd_ctx->lk_heal_info = NULL;
}
-unlock:
- UNLOCK (&inode->lock);
+ }
+ UNLOCK(&fd->lock);
}
-gf_boolean_t
-afr_is_split_brain (xlator_t *this, inode_t *inode)
+static void
+afr_add_lock_to_lkhealq(afr_private_t *priv, afr_lk_heal_info_t *info)
{
- afr_inode_params_t params = {0};
+ LOCK(&priv->lock);
+ {
+ list_del(&info->pos);
+ list_add_tail(&info->pos, &priv->lk_healq);
+ }
+ UNLOCK(&priv->lock);
+}
- params.op = AFR_INODE_GET_SPLIT_BRAIN;
- afr_inode_get_ctx (this, inode, &params);
- return params.u.value;
+static void
+afr_lock_heal_do(call_frame_t *frame, afr_private_t *priv,
+ afr_lk_heal_info_t *info)
+{
+ int i = 0;
+ int op_errno = 0;
+ int32_t *current_event_gen = NULL;
+ afr_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ char *wind_on = alloca0(priv->child_count);
+ gf_boolean_t retry = _gf_true;
+
+ frame->root->pid = info->pid;
+ lk_owner_copy(&frame->root->lk_owner, &info->lk_owner);
+
+ op_errno = -afr_dom_lock_acquire(frame);
+ if ((op_errno != 0)) {
+ goto release;
+ }
+
+ if (!afr_does_lk_owner_match(frame, priv, info)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_LK_HEAL_DOM,
+ "Ignoring lock heal for %s since lk-onwers mismatch. "
+ "Lock possibly pre-empted by another client.",
+ uuid_utoa(info->fd->inode->gfid));
+ goto release;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (info->locked_nodes[i])
+ continue;
+ wind_on[i] = 1;
+ }
+
+ current_event_gen = alloca(priv->child_count);
+ memcpy(current_event_gen, info->child_up_event_gen,
+ priv->child_count * sizeof *current_event_gen);
+ AFR_ONLIST(wind_on, frame, afr_lock_heal_cbk, lk, info->fd, info->cmd,
+ &info->flock, info->xdata_req);
+
+ LOCK(&priv->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_on[i])
+ continue;
+ if ((!local->replies[i].valid) || (local->replies[i].op_ret != 0)) {
+ continue;
+ }
+
+ if ((current_event_gen[i] == info->child_up_event_gen[i]) &&
+ (current_event_gen[i] > info->child_down_event_gen[i])) {
+ info->locked_nodes[i] = 1;
+ retry = _gf_false;
+ list_del_init(&info->pos);
+ list_add_tail(&info->pos, &priv->saved_locks);
+ } else {
+ /*We received subsequent child up/down events while heal was in
+ * progress; don't mark child as healed. Attempt again on the
+ * new child up*/
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_LK_HEAL_DOM,
+ "Event gen mismatch: skipped healing lock on child %d "
+ "for %s.",
+ i, uuid_utoa(info->fd->inode->gfid));
+ }
+ }
+ }
+ UNLOCK(&priv->lock);
+
+release:
+ afr_dom_lock_release(frame);
+ if (retry)
+ afr_add_lock_to_lkhealq(priv, info);
+ return;
}
-gf_boolean_t
-afr_is_opendir_done (xlator_t *this, inode_t *inode)
+static int
+afr_lock_heal_done(int ret, call_frame_t *frame, void *opaque)
{
- afr_inode_params_t params = {0};
-
- params.op = AFR_INODE_GET_OPENDIR_DONE;
- afr_inode_get_ctx (this, inode, &params);
- return params.u.value;
+ STACK_DESTROY(frame->root);
+ return 0;
}
+static int
+afr_lock_heal(void *opaque)
+{
+ call_frame_t *frame = (call_frame_t *)opaque;
+ call_frame_t *iter_frame = NULL;
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
+ struct list_head healq = {
+ 0,
+ };
+ int ret = 0;
+
+ iter_frame = afr_copy_frame(frame);
+ if (!iter_frame) {
+ return ENOMEM;
+ }
+
+ INIT_LIST_HEAD(&healq);
+ LOCK(&priv->lock);
+ {
+ list_splice_init(&priv->lk_healq, &healq);
+ }
+ UNLOCK(&priv->lock);
+
+ list_for_each_entry_safe(info, tmp, &healq, pos)
+ {
+ GF_ASSERT((AFR_COUNT(info->locked_nodes, priv->child_count) <
+ priv->child_count));
+ ((afr_local_t *)(iter_frame->local))->fd = fd_ref(info->fd);
+ afr_lock_heal_do(iter_frame, priv, info);
+ AFR_STACK_RESET(iter_frame);
+ if (iter_frame->local == NULL) {
+ ret = ENOTCONN;
+ gf_msg(frame->this->name, GF_LOG_ERROR, ENOTCONN,
+ AFR_MSG_LK_HEAL_DOM,
+ "Aborting processing of lk_healq."
+ "Healing will be reattempted on next child up for locks "
+ "that are still in quorum.");
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&healq, &priv->lk_healq);
+ }
+ UNLOCK(&priv->lock);
+ break;
+ }
+ }
+
+ AFR_STACK_DESTROY(iter_frame);
+ return ret;
+}
-int32_t
-afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children)
+static int
+__afr_lock_heal_synctask(xlator_t *this, afr_private_t *priv, int child)
{
- afr_inode_params_t params = {0};
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
- params.op = AFR_INODE_GET_READ_CTX;
- params.u.read_ctx.children = fresh_children;
- afr_inode_get_ctx (this, inode, &params);
- return params.u.read_ctx.read_child;
+ if (priv->shd.iamshd)
+ return 0;
+
+ list_for_each_entry_safe(info, tmp, &priv->saved_locks, pos)
+ {
+ info->child_up_event_gen[child] = priv->event_generation;
+ list_del_init(&info->pos);
+ list_add_tail(&info->pos, &priv->lk_healq);
+ }
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ return -1;
+
+ ret = synctask_new(this->ctx->env, afr_lock_heal, afr_lock_heal_done, frame,
+ frame);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_LK_HEAL_DOM,
+ "Failed to launch lock heal synctask");
+
+ return ret;
}
-void
-afr_inode_ctx_set_read_child (afr_inode_ctx_t *ctx, int32_t read_child)
+static int
+__afr_mark_pending_lk_heal(xlator_t *this, afr_private_t *priv, int child)
{
- uint64_t remaining_mask = 0;
- uint64_t mask = 0;
+ afr_lk_heal_info_t *info = NULL;
+ afr_lk_heal_info_t *tmp = NULL;
- remaining_mask = (~AFR_ICTX_READ_CHILD_MASK & ctx->masks);
- mask = (AFR_ICTX_READ_CHILD_MASK & read_child);
- ctx->masks = remaining_mask | mask;
+ if (priv->shd.iamshd)
+ return 0;
+ list_for_each_entry_safe(info, tmp, &priv->saved_locks, pos)
+ {
+ info->child_down_event_gen[child] = priv->event_generation;
+ if (info->locked_nodes[child] == 1)
+ info->locked_nodes[child] = 0;
+ if (!afr_has_quorum(info->locked_nodes, this, NULL)) {
+ /* Since the lock was lost on quorum no. of nodes, we should
+ * not attempt to heal it anymore. Some other client could have
+ * acquired the lock, modified data and released it and this
+ * client wouldn't know about it if we heal it.*/
+ afr_mark_fd_bad(info->fd, this);
+ list_del(&info->pos);
+ afr_lk_heal_info_cleanup(info);
+ /* We're not winding an unlock on the node where the lock is still
+ * present because when fencing logic switches over to the new
+ * client (since we marked the fd bad), it should preempt any
+ * existing lock. */
+ }
+ }
+ return 0;
}
-void
-afr_inode_ctx_set_read_ctx (afr_inode_ctx_t *ctx, int32_t read_child,
- int32_t *fresh_children, int32_t child_count)
+gf_boolean_t
+afr_is_consistent_io_possible(afr_local_t *local, afr_private_t *priv,
+ int32_t *op_errno)
+{
+ if (priv->consistent_io && local->call_count != priv->child_count) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOLS_DOWN,
+ "All subvolumes are not up");
+ if (op_errno)
+ *op_errno = ENOTCONN;
+ return _gf_false;
+ }
+ return _gf_true;
+}
+
+gf_boolean_t
+afr_is_lock_mode_mandatory(dict_t *xdata)
{
- int i = 0;
+ int ret = 0;
+ uint32_t lk_mode = GF_LK_ADVISORY;
- afr_inode_ctx_set_read_child (ctx, read_child);
- for (i = 0; i < child_count; i++) {
- if (fresh_children)
- ctx->fresh_children[i] = fresh_children[i];
- else
- ctx->fresh_children[i] = -1;
- }
+ ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_mode);
+ if (!ret && lk_mode == GF_LK_MANDATORY)
+ return _gf_true;
+
+ return _gf_false;
}
-void
-afr_inode_ctx_rm_stale_children (afr_inode_ctx_t *ctx, int32_t *stale_children,
- int32_t child_count)
+call_frame_t *
+afr_copy_frame(call_frame_t *base)
{
- int i = 0;
- int32_t read_child = -1;
+ afr_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ int op_errno = 0;
- GF_ASSERT (stale_children);
- for (i = 0; i < child_count; i++) {
- if (stale_children[i] == -1)
- break;
- afr_children_rm_child (ctx->fresh_children,
- stale_children[i], child_count);
- }
- read_child = (int32_t)(ctx->masks & AFR_ICTX_READ_CHILD_MASK);
- if (!afr_is_child_present (ctx->fresh_children, child_count,
- read_child))
- afr_inode_ctx_set_read_child (ctx, ctx->fresh_children[0]);
+ frame = copy_frame(base);
+ if (!frame)
+ return NULL;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local) {
+ AFR_STACK_DESTROY(frame);
+ return NULL;
+ }
+
+ return frame;
}
-void
-afr_inode_ctx_set_opendir_done (afr_inode_ctx_t *ctx)
+/* Check if an entry or inode could be undergoing a transaction. */
+gf_boolean_t
+afr_is_possibly_under_txn(afr_transaction_type type, afr_local_t *local,
+ xlator_t *this)
+{
+ int i = 0;
+ int tmp = 0;
+ afr_private_t *priv = NULL;
+ GF_UNUSED char *key = NULL;
+ int keylen = 0;
+
+ priv = this->private;
+
+ if (type == AFR_ENTRY_TRANSACTION) {
+ key = GLUSTERFS_PARENT_ENTRYLK;
+ keylen = SLEN(GLUSTERFS_PARENT_ENTRYLK);
+ } else if (type == AFR_DATA_TRANSACTION) {
+ /*FIXME: Use GLUSTERFS_INODELK_DOM_COUNT etc. once
+ * pl_inodelk_xattr_fill supports separate keys for different
+ * domains.*/
+ key = GLUSTERFS_INODELK_COUNT;
+ keylen = SLEN(GLUSTERFS_INODELK_COUNT);
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].xdata)
+ continue;
+ if (dict_get_int32n(local->replies[i].xdata, key, keylen, &tmp) == 0)
+ if (tmp)
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+static void
+afr_inode_ctx_destroy(afr_inode_ctx_t *ctx)
{
- uint64_t remaining_mask = 0;
- uint64_t mask = 0;
+ int i = 0;
+
+ if (!ctx)
+ return;
+
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ GF_FREE(ctx->pre_op_done[i]);
+ }
- remaining_mask = (~AFR_ICTX_OPENDIR_DONE_MASK & ctx->masks);
- mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_OPENDIR_DONE_MASK);
- ctx->masks = remaining_mask | mask;
+ GF_FREE(ctx);
}
-void
-afr_inode_ctx_set_splitbrain (afr_inode_ctx_t *ctx, gf_boolean_t set)
+int
+__afr_inode_ctx_get(xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx)
+{
+ uint64_t ctx_int = 0;
+ int ret = -1;
+ int i = -1;
+ int num_locks = -1;
+ afr_inode_ctx_t *ictx = NULL;
+ afr_lock_t *lock = NULL;
+ afr_private_t *priv = this->private;
+
+ ret = __inode_ctx_get(inode, this, &ctx_int);
+ if (ret == 0) {
+ *ctx = (afr_inode_ctx_t *)(uintptr_t)ctx_int;
+ return 0;
+ }
+
+ ictx = GF_CALLOC(1, sizeof(afr_inode_ctx_t), gf_afr_mt_inode_ctx_t);
+ if (!ictx)
+ goto out;
+
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ ictx->pre_op_done[i] = GF_CALLOC(sizeof *ictx->pre_op_done[i],
+ priv->child_count, gf_afr_mt_int32_t);
+ if (!ictx->pre_op_done[i]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ num_locks = sizeof(ictx->lock) / sizeof(afr_lock_t);
+ for (i = 0; i < num_locks; i++) {
+ lock = &ictx->lock[i];
+ INIT_LIST_HEAD(&lock->post_op);
+ INIT_LIST_HEAD(&lock->frozen);
+ INIT_LIST_HEAD(&lock->waiting);
+ INIT_LIST_HEAD(&lock->owners);
+ }
+
+ ctx_int = (uint64_t)(uintptr_t)ictx;
+ ret = __inode_ctx_set(inode, this, &ctx_int);
+ if (ret) {
+ goto out;
+ }
+
+ ictx->spb_choice = -1;
+ ictx->read_subvol = 0;
+ ictx->write_subvol = 0;
+ ictx->lock_count = 0;
+ ret = 0;
+ *ctx = ictx;
+out:
+ if (ret) {
+ afr_inode_ctx_destroy(ictx);
+ }
+ return ret;
+}
+
+/*
+ * INODE CTX 64-bit VALUE FORMAT FOR SMALL (<= 16) SUBVOL COUNTS:
+ *
+ * |<---------- 64bit ------------>|
+ * 63 32 31 16 15 0
+ * | EVENT_GEN | DATA | METADATA |
+ *
+ *
+ * METADATA (bit-0 .. bit-15): bitmap representing subvolumes from which
+ * metadata can be attempted to be read.
+ *
+ * bit-0 => priv->subvolumes[0]
+ * bit-1 => priv->subvolumes[1]
+ * ... etc. till bit-15
+ *
+ * DATA (bit-16 .. bit-31): bitmap representing subvolumes from which data
+ * can be attempted to be read.
+ *
+ * bit-16 => priv->subvolumes[0]
+ * bit-17 => priv->subvolumes[1]
+ * ... etc. till bit-31
+ *
+ * EVENT_GEN (bit-32 .. bit-63): event generation (i.e priv->event_generation)
+ * when DATA and METADATA was last updated.
+ *
+ * If EVENT_GEN is < priv->event_generation,
+ * or is 0, it means afr_inode_refresh() needs
+ * to be called to recalculate the bitmaps.
+ */
+
+int
+__afr_set_in_flight_sb_status(xlator_t *this, afr_local_t *local,
+ inode_t *inode)
+{
+ int i = 0;
+ int txn_type = 0;
+ int count = 0;
+ int index = -1;
+ uint16_t datamap_old = 0;
+ uint16_t metadatamap_old = 0;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint16_t tmp_map = 0;
+ uint16_t mask = 0;
+ uint32_t event = 0;
+ uint64_t val = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ txn_type = local->transaction.type;
+
+ if (txn_type == AFR_DATA_TRANSACTION)
+ val = local->inode_ctx->write_subvol;
+ else
+ val = local->inode_ctx->read_subvol;
+
+ metadatamap_old = metadatamap = (val & 0x000000000000ffff);
+ datamap_old = datamap = (val & 0x00000000ffff0000) >> 16;
+ event = (val & 0xffffffff00000000) >> 32;
+
+ if (txn_type == AFR_DATA_TRANSACTION)
+ tmp_map = datamap;
+ else if (txn_type == AFR_METADATA_TRANSACTION)
+ tmp_map = metadatamap;
+
+ count = gf_bits_count(tmp_map);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.failed_subvols[i])
+ continue;
+
+ mask = 1 << i;
+ if (txn_type == AFR_METADATA_TRANSACTION)
+ metadatamap &= ~mask;
+ else if (txn_type == AFR_DATA_TRANSACTION)
+ datamap &= ~mask;
+ }
+
+ switch (txn_type) {
+ case AFR_METADATA_TRANSACTION:
+ if ((metadatamap_old != 0) && (metadatamap == 0) && (count == 1)) {
+ index = gf_bits_index(tmp_map);
+ local->transaction.in_flight_sb_errno = local->replies[index]
+ .op_errno;
+ local->transaction.in_flight_sb = _gf_true;
+ metadatamap |= (1 << index);
+ }
+ if (metadatamap_old != metadatamap) {
+ __afr_inode_need_refresh_set(inode, this);
+ }
+ break;
+
+ case AFR_DATA_TRANSACTION:
+ if ((datamap_old != 0) && (datamap == 0) && (count == 1)) {
+ index = gf_bits_index(tmp_map);
+ local->transaction.in_flight_sb_errno = local->replies[index]
+ .op_errno;
+ local->transaction.in_flight_sb = _gf_true;
+ datamap |= (1 << index);
+ }
+ if (datamap_old != datamap)
+ __afr_inode_need_refresh_set(inode, this);
+ break;
+
+ default:
+ break;
+ }
+
+ val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
+ (((uint64_t)event) << 32);
+
+ if (txn_type == AFR_DATA_TRANSACTION)
+ local->inode_ctx->write_subvol = val;
+ local->inode_ctx->read_subvol = val;
+
+ return 0;
+}
+
+gf_boolean_t
+afr_is_symmetric_error(call_frame_t *frame, xlator_t *this)
{
- uint64_t remaining_mask = 0;
- uint64_t mask = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int op_errno = 0;
+ int i_errno = 0;
+ gf_boolean_t matching_errors = _gf_true;
+ int i = 0;
- if (set) {
- remaining_mask = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
- mask = (0xFFFFFFFFFFFFFFFFULL & AFR_ICTX_SPLIT_BRAIN_MASK);
- ctx->masks = remaining_mask | mask;
- } else {
- ctx->masks = (~AFR_ICTX_SPLIT_BRAIN_MASK & ctx->masks);
+ priv = this->private;
+ local = frame->local;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret != -1) {
+ /* Operation succeeded on at least one subvol,
+ so it is not a failed-everywhere situation.
+ */
+ matching_errors = _gf_false;
+ break;
}
-}
+ i_errno = local->replies[i].op_errno;
-void
-afr_inode_set_ctx (xlator_t *this, inode_t *inode, afr_inode_params_t *params)
-{
- GF_ASSERT (inode);
- GF_ASSERT (params);
-
- int ret = 0;
- afr_inode_ctx_t *ctx = NULL;
- afr_private_t *priv = NULL;
- uint64_t ctx_addr = 0;
- gf_boolean_t set = _gf_false;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
- int32_t *stale_children = NULL;
-
- priv = this->private;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx_addr);
- if (ret < 0)
- ctx_addr = 0;
- ctx = afr_inode_ctx_get_from_addr (ctx_addr, priv->child_count);
- if (!ctx)
- goto unlock;
- switch (params->op) {
- case AFR_INODE_SET_READ_CTX:
- read_child = params->u.read_ctx.read_child;
- fresh_children = params->u.read_ctx.children;
- afr_inode_ctx_set_read_ctx (ctx, read_child,
- fresh_children,
- priv->child_count);
- break;
- case AFR_INODE_RM_STALE_CHILDREN:
- stale_children = params->u.read_ctx.children;
- afr_inode_ctx_rm_stale_children (ctx,
- stale_children,
- priv->child_count);
- break;
- case AFR_INODE_SET_OPENDIR_DONE:
- afr_inode_ctx_set_opendir_done (ctx);
- break;
- case AFR_INODE_SET_SPLIT_BRAIN:
- set = params->u.value;
- afr_inode_ctx_set_splitbrain (ctx, set);
- break;
- default:
- GF_ASSERT (0);
- break;
- }
- ret = __inode_ctx_put (inode, this, (uint64_t)ctx);
- if (ret) {
- gf_log_callingfn (this->name, GF_LOG_ERROR, "failed to "
- "set the inode ctx (%s)",
- uuid_utoa (inode->gfid));
- }
+ if (i_errno == ENOTCONN) {
+ /* ENOTCONN is not a symmetric error. We do not
+ know if the operation was performed on the
+ backend or not.
+ */
+ matching_errors = _gf_false;
+ break;
}
-unlock:
- UNLOCK (&inode->lock);
+
+ if (!op_errno) {
+ op_errno = i_errno;
+ } else if (op_errno != i_errno) {
+ /* Mismatching op_errno's */
+ matching_errors = _gf_false;
+ break;
+ }
+ }
+
+ return matching_errors;
}
-void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set)
+int
+afr_set_in_flight_sb_status(xlator_t *this, call_frame_t *frame, inode_t *inode)
{
- afr_inode_params_t params = {0};
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ /* If this transaction saw no failures, then exit. */
+ if (AFR_COUNT(local->transaction.failed_subvols, priv->child_count) == 0)
+ return 0;
+
+ if (afr_is_symmetric_error(frame, this))
+ return 0;
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_set_in_flight_sb_status(this, local, inode);
+ }
+ UNLOCK(&inode->lock);
- params.op = AFR_INODE_SET_SPLIT_BRAIN;
- params.u.value = set;
- afr_inode_set_ctx (this, inode, &params);
+ return ret;
}
-void
-afr_set_opendir_done (xlator_t *this, inode_t *inode)
-{
- afr_inode_params_t params = {0};
+int
+__afr_inode_read_subvol_get_small(inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int *event_p)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint32_t event = 0;
+ uint64_t val = 0;
+ int i = 0;
+ afr_inode_ctx_t *ctx = NULL;
+
+ priv = this->private;
+
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret < 0)
+ return ret;
- params.op = AFR_INODE_SET_OPENDIR_DONE;
- afr_inode_set_ctx (this, inode, &params);
+ val = ctx->read_subvol;
+
+ metadatamap = (val & 0x000000000000ffff);
+ datamap = (val & 0x00000000ffff0000) >> 16;
+ event = (val & 0xffffffff00000000) >> 32;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (metadata)
+ metadata[i] = (metadatamap >> i) & 1;
+ if (data)
+ data[i] = (datamap >> i) & 1;
+ }
+
+ if (event_p)
+ *event_p = event;
+ return ret;
}
-void
-afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
- int32_t *fresh_children)
+int
+__afr_inode_read_subvol_set_small(inode_t *inode, xlator_t *this,
+ unsigned char *data, unsigned char *metadata,
+ int event)
{
- afr_inode_params_t params = {0};
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint64_t val = 0;
+ int i = 0;
+ int ret = -1;
+ afr_inode_ctx_t *ctx = NULL;
+
+ priv = this->private;
- priv = this->private;
- GF_ASSERT (read_child >= 0);
- GF_ASSERT (fresh_children);
- GF_ASSERT (afr_is_child_present (fresh_children, priv->child_count,
- read_child));
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret)
+ goto out;
- params.op = AFR_INODE_SET_READ_CTX;
- params.u.read_ctx.read_child = read_child;
- params.u.read_ctx.children = fresh_children;
- afr_inode_set_ctx (this, inode, &params);
+ for (i = 0; i < priv->child_count; i++) {
+ if (data[i])
+ datamap |= (1 << i);
+ if (metadata[i])
+ metadatamap |= (1 << i);
+ }
+
+ val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
+ (((uint64_t)event) << 32);
+
+ ctx->read_subvol = val;
+
+ ret = 0;
+out:
+ return ret;
}
-void
-afr_inode_rm_stale_children (xlator_t *this, inode_t *inode,
- int32_t *stale_children)
+int
+__afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int *event_p)
{
- afr_inode_params_t params = {0};
+ afr_private_t *priv = NULL;
+ int ret = -1;
- GF_ASSERT (stale_children);
+ priv = this->private;
- params.op = AFR_INODE_RM_STALE_CHILDREN;
- params.u.read_ctx.children = stale_children;
- afr_inode_set_ctx (this, inode, &params);
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_get_small(inode, this, data, metadata,
+ event_p);
+ else
+ /* TBD: allocate structure with array and read from it */
+ ret = -1;
+
+ return ret;
}
-gf_boolean_t
-afr_is_source_child (int32_t *sources, int32_t child_count, int32_t child)
+int
+__afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
+ int *spb_choice)
{
- gf_boolean_t source_xattrs = _gf_false;
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- GF_ASSERT (child < child_count);
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret < 0)
+ return ret;
- if ((child >= 0) && (child < child_count) &&
- sources[child]) {
- source_xattrs = _gf_true;
- }
- return source_xattrs;
+ *spb_choice = ctx->spb_choice;
+ return 0;
}
-gf_boolean_t
-afr_is_child_present (int32_t *success_children, int32_t child_count,
- int32_t child)
+int
+__afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int event)
{
- gf_boolean_t success_child = _gf_false;
- int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = -1;
- GF_ASSERT (child < child_count);
+ priv = this->private;
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- if (child == success_children[i]) {
- success_child = _gf_true;
- break;
- }
- }
- return success_child;
+ if (priv->child_count <= 16)
+ ret = __afr_inode_read_subvol_set_small(inode, this, data, metadata,
+ event);
+ else
+ ret = -1;
+
+ return ret;
}
-gf_boolean_t
-afr_is_read_child (int32_t *success_children, int32_t *sources,
- int32_t child_count, int32_t child)
+int
+__afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this,
+ int spb_choice)
{
- gf_boolean_t success_child = _gf_false;
- gf_boolean_t source = _gf_false;
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- if (child < 0) {
- return _gf_false;
- }
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret)
+ goto out;
- GF_ASSERT (success_children);
- GF_ASSERT (child_count > 0);
+ ctx->spb_choice = spb_choice;
- success_child = afr_is_child_present (success_children, child_count,
- child);
- if (!success_child)
- goto out;
- if (NULL == sources) {
- source = _gf_true;
- goto out;
- }
- source = afr_is_source_child (sources, child_count, child);
+ ret = 0;
out:
- return (success_child && source);
+ return ret;
}
-int32_t
-afr_hash_child (int32_t *success_children, int32_t child_count,
- unsigned int hmode, uuid_t gfid)
+int
+afr_inode_read_subvol_get(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int *event_p)
{
- uuid_t gfid_copy = {0,};
+ int ret = -1;
- if (!hmode) {
- return -1;
- }
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- if (gfid) {
- uuid_copy(gfid_copy,gfid);
- }
- if (hmode > 1) {
- /*
- * Why getpid? Because it's one of the cheapest calls
- * available - faster than gethostname etc. - and returns a
- * constant-length value that's sure to be shorter than a UUID.
- * It's still very unlikely to be the same across clients, so
- * it still provides good mixing. We're not trying for
- * perfection here. All we need is a low probability that
- * multiple clients won't converge on the same subvolume.
- */
- *((pid_t *)gfid_copy) = getpid();
- }
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_get(inode, this, data, metadata, event_p);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
- return SuperFastHash((char *)gfid_copy,
- sizeof(gfid_copy)) % child_count;
+int
+afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+ unsigned char *data = alloca0(priv->child_count);
+ unsigned char *metadata = alloca0(priv->child_count);
+ int data_count = 0;
+ int metadata_count = 0;
+ int event_generation = 0;
+ int ret = 0;
+
+ ret = afr_inode_read_subvol_get(inode, this, data, metadata,
+ &event_generation);
+ if (ret == -1)
+ return -EIO;
+
+ data_count = AFR_COUNT(data, priv->child_count);
+ metadata_count = AFR_COUNT(metadata, priv->child_count);
+
+ if (inode->ia_type == IA_IFDIR) {
+ /* For directories, allow even if it is in data split-brain. */
+ if (type == AFR_METADATA_TRANSACTION || local->op == GF_FOP_STAT ||
+ local->op == GF_FOP_FSTAT) {
+ if (!metadata_count)
+ return -EIO;
+ }
+ } else {
+ /* For files, abort in case of data/metadata split-brain. */
+ if (!data_count || !metadata_count) {
+ return -EIO;
+ }
+ }
+
+ if (type == AFR_METADATA_TRANSACTION && readable)
+ memcpy(readable, metadata, priv->child_count * sizeof *metadata);
+ if (type == AFR_DATA_TRANSACTION && readable) {
+ if (!data_count)
+ memcpy(readable, local->child_up,
+ priv->child_count * sizeof *readable);
+ else
+ memcpy(readable, data, priv->child_count * sizeof *data);
+ }
+ if (event_p)
+ *event_p = event_generation;
+ return 0;
}
-/* If sources is NULL the xattrs are assumed to be of source for all
- * success_children.
+static int
+afr_inode_split_brain_choice_get(inode_t *inode, xlator_t *this,
+ int *spb_choice)
+{
+ int ret = -1;
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_split_brain_choice_get(inode, this, spb_choice);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return ret;
+}
+
+/*
+ * frame is used to get the favourite policy. Since
+ * afr_inode_split_brain_choice_get was called with afr_open, it is possible to
+ * have a frame with out local->replies. So in that case, frame is passed as
+ * null, hence this function will handle the frame NULL case.
*/
int
-afr_select_read_child_from_policy (int32_t *success_children,
- int32_t child_count, int32_t prev_read_child,
- int32_t config_read_child, int32_t *sources,
- unsigned int hmode, uuid_t gfid)
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol)
{
- int32_t read_child = -1;
- int i = 0;
+ int ret = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- GF_ASSERT (success_children);
+ GF_VALIDATE_OR_GOTO("afr", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, spb_subvol, out);
- read_child = prev_read_child;
- if (afr_is_read_child (success_children, sources, child_count,
- read_child))
- goto out;
-
- read_child = config_read_child;
- if (afr_is_read_child (success_children, sources, child_count,
- read_child))
- goto out;
+ priv = this->private;
- read_child = afr_hash_child (success_children, child_count,
- hmode, gfid);
- if (afr_is_read_child (success_children, sources, child_count,
- read_child)) {
- goto out;
+ ret = afr_inode_split_brain_choice_get(inode, this, spb_subvol);
+ if (*spb_subvol < 0 && priv->fav_child_policy && frame && frame->local) {
+ local = frame->local;
+ *spb_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode,
+ NULL);
+ if (*spb_subvol >= 0) {
+ ret = 0;
}
+ }
- for (i = 0; i < child_count; i++) {
- read_child = success_children[i];
- if (read_child < 0)
- break;
- if (afr_is_read_child (success_children, sources, child_count,
- read_child))
- goto out;
- }
- read_child = -1;
+out:
+ return ret;
+}
+int
+afr_inode_read_subvol_set(inode_t *inode, xlator_t *this, unsigned char *data,
+ unsigned char *metadata, int event)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_read_subvol_set(inode, this, data, metadata, event);
+ }
+ UNLOCK(&inode->lock);
out:
- return read_child;
+ return ret;
}
-/* This function should be used when all the success_children are sources
- */
-void
-afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
- int32_t *fresh_children, int32_t prev_read_child,
- int32_t config_read_child, uuid_t gfid)
-{
- int read_child = -1;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- read_child = afr_select_read_child_from_policy (fresh_children,
- priv->child_count,
- prev_read_child,
- config_read_child,
- NULL,
- priv->hash_mode, gfid);
- if (read_child >= 0)
- afr_inode_set_read_ctx (this, inode, read_child,
- fresh_children);
-}
-
-/* afr_next_call_child ()
- * This is a common function used by all the read-type fops
- * This function should not be called with the inode's read_children array.
- * The fop's handler should make a copy of the inode's read_children,
- * preferred read_child into the local vars, because while this function is
- * in execution there is a chance for inode's read_ctx to change.
- */
-int32_t
-afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
- size_t child_count, int32_t *last_index,
- int32_t read_child)
+int
+afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this, int spb_choice)
{
- int next_index = 0;
- int32_t next_call_child = -1;
+ int ret = -1;
- GF_ASSERT (last_index);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- next_index = *last_index;
-retry:
- next_index++;
- if ((next_index >= child_count) ||
- (fresh_children[next_index] == -1))
- goto out;
- if ((fresh_children[next_index] == read_child) ||
- (!child_up[fresh_children[next_index]]))
- goto retry;
- *last_index = next_index;
- next_call_child = fresh_children[next_index];
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_split_brain_choice_set(inode, this, spb_choice);
+ }
+ UNLOCK(&inode->lock);
out:
- return next_call_child;
+ return ret;
}
- /* This function should not be called with the inode's read_children array.
- * The fop's handler should make a copy of the inode's read_children,
- * preferred read_child into the local vars, because while this function is
- * in execution there is a chance for inode's read_ctx to change.
+/* The caller of this should perform afr_inode_refresh, if this function
+ * returns _gf_true
*/
-int32_t
-afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
- int32_t *fresh_children,
- int32_t *call_child, int32_t *last_index)
+gf_boolean_t
+afr_is_inode_refresh_reqd(inode_t *inode, xlator_t *this, int event_gen1,
+ int event_gen2)
{
- int ret = 0;
- afr_private_t *priv = NULL;
- int i = 0;
+ gf_boolean_t need_refresh = _gf_false;
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- GF_ASSERT (child_up);
- GF_ASSERT (call_child);
- GF_ASSERT (last_index);
- GF_ASSERT (fresh_children);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- if (read_child < 0) {
- ret = -EIO;
- goto out;
- }
- priv = this->private;
- *call_child = -1;
- *last_index = -1;
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret)
+ goto unlock;
- if (child_up[read_child]) {
- *call_child = read_child;
- } else {
- for (i = 0; i < priv->child_count; i++) {
- if (fresh_children[i] == -1)
- break;
- if (child_up[fresh_children[i]]) {
- *call_child = fresh_children[i];
- ret = 0;
- break;
- }
- }
+ need_refresh = ctx->need_refresh;
+ /* Hoping that the caller will do inode_refresh followed by
+ * this, hence setting the need_refresh to false */
+ ctx->need_refresh = _gf_false;
+ }
+unlock:
+ UNLOCK(&inode->lock);
- if (*call_child == -1) {
- ret = -ENOTCONN;
- goto out;
- }
+ if (event_gen1 != event_gen2)
+ need_refresh = _gf_true;
+out:
+ return need_refresh;
+}
- *last_index = i;
- }
+int
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+ afr_inode_ctx_t *ctx = NULL;
+
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret == 0) {
+ ctx->need_refresh = _gf_true;
+ }
+
+ return ret;
+}
+
+int
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_need_refresh_set(inode, this);
+ }
+ UNLOCK(&inode->lock);
out:
- gf_log (this->name, GF_LOG_DEBUG, "Returning %d, call_child: %d, "
- "last_index: %d", ret, *call_child, *last_index);
- return ret;
+ return ret;
}
-void
-afr_reset_xattr (dict_t **xattr, unsigned int child_count)
+int
+afr_spb_choice_timeout_cancel(xlator_t *this, inode_t *inode)
{
- unsigned int i = 0;
+ afr_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- if (!xattr)
- goto out;
- for (i = 0; i < child_count; i++) {
- if (xattr[i]) {
- dict_unref (xattr[i]);
- xattr[i] = NULL;
- }
+ if (!inode)
+ return ret;
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret < 0 || !ctx) {
+ UNLOCK(&inode->lock);
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
+ "Failed to cancel split-brain choice timer.");
+ goto out;
+ }
+ ctx->spb_choice = -1;
+ if (ctx->timer) {
+ gf_timer_call_cancel(this->ctx, ctx->timer);
+ ctx->timer = NULL;
}
+ ret = 0;
+ }
+ UNLOCK(&inode->lock);
out:
- return;
+ return ret;
}
void
-afr_local_sh_cleanup (afr_local_t *local, xlator_t *this)
+afr_set_split_brain_choice_cbk(void *data)
{
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+ inode_t *inode = data;
+ xlator_t *this = THIS;
- sh = &local->self_heal;
- priv = this->private;
+ afr_spb_choice_timeout_cancel(this, inode);
+ inode_invalidate(inode);
+ inode_unref(inode);
+ return;
+}
- if (sh->buf)
- GF_FREE (sh->buf);
+int
+afr_set_split_brain_choice(int ret, call_frame_t *frame, void *opaque)
+{
+ int op_errno = ENOMEM;
+ afr_private_t *priv = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+ inode_t *inode = NULL;
+ loc_t *loc = NULL;
+ xlator_t *this = NULL;
+ afr_spbc_timeout_t *data = opaque;
+ struct timespec delta = {
+ 0,
+ };
+ gf_boolean_t timer_set = _gf_false;
+ gf_boolean_t timer_cancelled = _gf_false;
+ gf_boolean_t timer_reset = _gf_false;
+ int old_spb_choice = -1;
+
+ frame = data->frame;
+ loc = data->loc;
+ this = frame->this;
+ priv = this->private;
+
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ delta.tv_sec = priv->spb_choice_timeout;
+ delta.tv_nsec = 0;
+
+ if (!loc->inode) {
+ ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (!(data->d_spb || data->m_spb)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
+ "Cannot set "
+ "replica.split-brain-choice on %s. File is"
+ " not in data/metadata split-brain.",
+ uuid_utoa(loc->gfid));
+ ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /*
+ * we're ref'ing the inode before LOCK like it is done elsewhere in the
+ * code. If we ref after LOCK, coverity complains of possible deadlocks.
+ */
+ inode = inode_ref(loc->inode);
+
+ LOCK(&inode->lock);
+ {
+ ret = __afr_inode_ctx_get(this, inode, &ctx);
+ if (ret) {
+ UNLOCK(&inode->lock);
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR,
+ "Failed to get inode_ctx for %s", loc->name);
+ goto post_unlock;
+ }
- if (sh->parentbufs)
- GF_FREE (sh->parentbufs);
+ old_spb_choice = ctx->spb_choice;
+ ctx->spb_choice = data->spb_child_index;
- if (sh->inode)
- inode_unref (sh->inode);
+ /* Possible changes in spb-choice :
+ * valid to -1 : cancel timer and unref
+ * valid to valid : cancel timer and inject new one
+ * -1 to -1 : unref and do not do anything
+ * -1 to valid : inject timer
+ */
- if (sh->xattr) {
- afr_reset_xattr (sh->xattr, priv->child_count);
- GF_FREE (sh->xattr);
+ /* ctx->timer is NULL iff previous value of
+ * ctx->spb_choice is -1
+ */
+ if (ctx->timer) {
+ if (ctx->spb_choice == -1) {
+ if (!gf_timer_call_cancel(this->ctx, ctx->timer)) {
+ ctx->timer = NULL;
+ timer_cancelled = _gf_true;
+ }
+ /* If timer cancel failed here it means that the
+ * previous cbk will be executed which will set
+ * spb_choice to -1. So we can consider the
+ * 'valid to -1' case to be a success
+ * (i.e. ret = 0) and goto unlock.
+ */
+ goto unlock;
+ }
+ goto reset_timer;
+ } else {
+ if (ctx->spb_choice == -1)
+ goto unlock;
+ goto set_timer;
}
- if (sh->child_errno)
- GF_FREE (sh->child_errno);
+ reset_timer:
+ ret = gf_timer_call_cancel(this->ctx, ctx->timer);
+ if (ret != 0) {
+ /* We need to bail out now instead of launching a new
+ * timer. Otherwise the cbk of the previous timer event
+ * will cancel the new ctx->timer.
+ */
+ ctx->spb_choice = old_spb_choice;
+ ret = -1;
+ op_errno = EAGAIN;
+ goto unlock;
+ }
+ ctx->timer = NULL;
+ timer_reset = _gf_true;
+
+ set_timer:
+ ctx->timer = gf_timer_call_after(this->ctx, delta,
+ afr_set_split_brain_choice_cbk, inode);
+ if (!ctx->timer) {
+ ctx->spb_choice = old_spb_choice;
+ ret = -1;
+ op_errno = ENOMEM;
+ }
+ if (!timer_reset && ctx->timer)
+ timer_set = _gf_true;
+ if (timer_reset && !ctx->timer)
+ timer_cancelled = _gf_true;
+ }
+unlock:
+ UNLOCK(&inode->lock);
+post_unlock:
+ if (!timer_set)
+ inode_unref(inode);
+ if (timer_cancelled)
+ inode_unref(inode);
+ /*
+ * We need to invalidate the inode to prevent the kernel from serving
+ * reads from an older cached value despite a change in spb_choice to
+ * a new value.
+ */
+ inode_invalidate(inode);
+out:
+ GF_FREE(data);
+ AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
+ return 0;
+}
- afr_matrix_cleanup (sh->pending_matrix, priv->child_count);
- afr_matrix_cleanup (sh->delta_matrix, priv->child_count);
+int
+afr_accused_fill(xlator_t *this, dict_t *xdata, unsigned char *accused,
+ afr_transaction_type type)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int idx = afr_index_for_transaction_type(type);
+ void *pending_raw = NULL;
+ int pending[3];
+ int ret = 0;
- if (sh->sources)
- GF_FREE (sh->sources);
+ priv = this->private;
- if (sh->success)
- GF_FREE (sh->success);
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_get_ptr(xdata, priv->pending_key[i], &pending_raw);
+ if (ret) /* no pending flags */
+ continue;
+ memcpy(pending, pending_raw, sizeof(pending));
- if (sh->locked_nodes)
- GF_FREE (sh->locked_nodes);
+ if (ntoh32(pending[idx]))
+ accused[i] = 1;
+ }
- if (sh->healing_fd) {
- fd_unref (sh->healing_fd);
- sh->healing_fd = NULL;
- }
-
- if (sh->linkname)
- GF_FREE ((char *)sh->linkname);
+ return 0;
+}
- if (sh->success_children)
- GF_FREE (sh->success_children);
+int
+afr_accuse_smallfiles(xlator_t *this, struct afr_reply *replies,
+ unsigned char *data_accused)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t maxsize = 0;
- if (sh->fresh_children)
- GF_FREE (sh->fresh_children);
+ priv = this->private;
- if (sh->fresh_parent_dirs)
- GF_FREE (sh->fresh_parent_dirs);
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].xdata &&
+ dict_get_sizen(replies[i].xdata, GLUSTERFS_BAD_INODE))
+ continue;
+ if (data_accused[i])
+ continue;
+ if (replies[i].poststat.ia_size > maxsize)
+ maxsize = replies[i].poststat.ia_size;
+ }
- loc_wipe (&sh->parent_loc);
- loc_wipe (&sh->lookup_loc);
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i])
+ continue;
+ if (AFR_IS_ARBITER_BRICK(priv, i))
+ continue;
+ if (replies[i].poststat.ia_size < maxsize)
+ data_accused[i] = 1;
+ }
- if (sh->checksum)
- GF_FREE (sh->checksum);
+ return 0;
+}
- if (sh->write_needed)
- GF_FREE (sh->write_needed);
- if (sh->healing_fd)
- fd_unref (sh->healing_fd);
+int
+afr_readables_fill(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *data_accused, unsigned char *metadata_accused,
+ unsigned char *data_readable,
+ unsigned char *metadata_readable, struct afr_reply *replies)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ int i = 0;
+ int ret = 0;
+ ia_type_t ia_type = IA_INVAL;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ data_readable[i] = 1;
+ metadata_readable[i] = 1;
+ }
+ if (AFR_IS_ARBITER_BRICK(priv, ARBITER_BRICK_INDEX)) {
+ data_readable[ARBITER_BRICK_INDEX] = 0;
+ metadata_readable[ARBITER_BRICK_INDEX] = 0;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies) { /* Lookup */
+ if (!replies[i].valid || replies[i].op_ret == -1 ||
+ (replies[i].xdata &&
+ dict_get_sizen(replies[i].xdata, GLUSTERFS_BAD_INODE))) {
+ data_readable[i] = 0;
+ metadata_readable[i] = 0;
+ continue;
+ }
+
+ xdata = replies[i].xdata;
+ ia_type = replies[i].poststat.ia_type;
+ } else { /* pre-op xattrop */
+ xdata = local->transaction.changelog_xdata[i];
+ ia_type = inode->ia_type;
+ }
+
+ if (!xdata)
+ continue; /* mkdir_cbk sends NULL xdata_rsp. */
+ afr_accused_fill(this, xdata, data_accused,
+ (ia_type == IA_IFDIR) ? AFR_ENTRY_TRANSACTION
+ : AFR_DATA_TRANSACTION);
+
+ afr_accused_fill(this, xdata, metadata_accused,
+ AFR_METADATA_TRANSACTION);
+ }
+
+ if (replies && ia_type != IA_INVAL && ia_type != IA_IFDIR &&
+ /* We want to accuse small files only when we know for
+ * sure that there is no IO happening. Otherwise, the
+ * ia_sizes obtained in post-refresh replies may
+ * mismatch due to a race between inode-refresh and
+ * ongoing writes, causing spurious heal launches*/
+ !afr_is_possibly_under_txn(AFR_DATA_TRANSACTION, local, this)) {
+ afr_accuse_smallfiles(this, replies, data_accused);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_accused[i]) {
+ data_readable[i] = 0;
+ ret = 1;
+ }
+ if (metadata_accused[i]) {
+ metadata_readable[i] = 0;
+ ret = 1;
+ }
+ }
+ return ret;
}
+int
+afr_replies_interpret(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t *start_heal)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int event_generation = 0;
+ int i = 0;
+ unsigned char *data_accused = NULL;
+ unsigned char *metadata_accused = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ priv = this->private;
+ replies = local->replies;
+ event_generation = local->event_generation;
+
+ data_accused = alloca0(priv->child_count);
+ data_readable = alloca0(priv->child_count);
+ metadata_accused = alloca0(priv->child_count);
+ metadata_readable = alloca0(priv->child_count);
+
+ ret = afr_readables_fill(frame, this, inode, data_accused, metadata_accused,
+ data_readable, metadata_readable, replies);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (start_heal && priv->child_up[i] &&
+ (data_accused[i] || metadata_accused[i])) {
+ *start_heal = _gf_true;
+ break;
+ }
+ }
+ afr_inode_read_subvol_set(inode, this, data_readable, metadata_readable,
+ event_generation);
+ return ret;
+}
-void
-afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this)
+int
+afr_refresh_selfheal_done(int ret, call_frame_t *heal, void *opaque)
{
- afr_private_t * priv = NULL;
-
- priv = this->private;
+ if (heal)
+ AFR_STACK_DESTROY(heal);
+ return 0;
+}
- afr_matrix_cleanup (local->pending, priv->child_count);
- afr_matrix_cleanup (local->transaction.txn_changelog,
- priv->child_count);
+int
+afr_inode_refresh_err(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int err = 0;
- if (local->internal_lock.locked_nodes)
- GF_FREE (local->internal_lock.locked_nodes);
+ local = frame->local;
+ priv = this->private;
- if (local->internal_lock.inode_locked_nodes)
- GF_FREE (local->internal_lock.inode_locked_nodes);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && !local->replies[i].op_ret) {
+ err = 0;
+ goto ret;
+ }
+ }
- if (local->internal_lock.entry_locked_nodes)
- GF_FREE (local->internal_lock.entry_locked_nodes);
+ err = afr_final_errno(local, priv);
+ret:
+ return err;
+}
- if (local->internal_lock.lower_locked_nodes)
- GF_FREE (local->internal_lock.lower_locked_nodes);
+gf_boolean_t
+afr_selfheal_enabled(const xlator_t *this)
+{
+ const afr_private_t *priv = this->private;
+ return priv->data_self_heal || priv->metadata_self_heal ||
+ priv->entry_self_heal;
+}
- GF_FREE (local->transaction.pre_op);
- GF_FREE (local->transaction.eager_lock);
+int
+afr_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
+{
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *heal_local = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ int event_generation = 0;
+ int read_subvol = -1;
+ int ret = 0;
+
+ local = frame->local;
+ inode = local->inode;
+ priv = this->private;
+
+ if (err)
+ goto refresh_done;
+
+ if (local->op == GF_FOP_LOOKUP)
+ goto refresh_done;
+
+ ret = afr_inode_get_readable(frame, inode, this, local->readable,
+ &event_generation, local->transaction.type);
+
+ if (ret == -EIO) {
+ /* No readable subvolume even after refresh ==> splitbrain.*/
+ if (!priv->fav_child_policy) {
+ err = EIO;
+ goto refresh_done;
+ }
+ read_subvol = afr_sh_get_fav_by_policy(this, local->replies, inode,
+ NULL);
+ if (read_subvol == -1) {
+ err = EIO;
+ goto refresh_done;
+ }
+
+ heal_frame = afr_frame_create(this, NULL);
+ if (!heal_frame) {
+ err = EIO;
+ goto refresh_done;
+ }
+ heal_local = heal_frame->local;
+ heal_local->xdata_req = dict_new();
+ if (!heal_local->xdata_req) {
+ err = EIO;
+ AFR_STACK_DESTROY(heal_frame);
+ goto refresh_done;
+ }
+ heal_local->heal_frame = frame;
+ ret = synctask_new(this->ctx->env, afr_fav_child_reset_sink_xattrs,
+ afr_fav_child_reset_sink_xattrs_cbk, heal_frame,
+ heal_frame);
+ return 0;
+ }
- GF_FREE (local->transaction.basename);
- GF_FREE (local->transaction.new_basename);
+refresh_done:
+ afr_local_replies_wipe(local, this->private);
+ local->refreshfn(frame, this, err);
- loc_wipe (&local->transaction.parent_loc);
- loc_wipe (&local->transaction.new_parent_loc);
+ return 0;
}
+int
+afr_inode_refresh_done(call_frame_t *frame, xlator_t *this, int error)
+{
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t start_heal = _gf_false;
+ afr_local_t *heal_local = NULL;
+ unsigned char *success_replies = NULL;
+ int ret = 0;
+
+ if (error != 0) {
+ goto refresh_done;
+ }
+
+ local = frame->local;
+ priv = this->private;
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+
+ if (priv->thin_arbiter_count && local->is_read_txn &&
+ AFR_COUNT(success_replies, priv->child_count) != priv->child_count) {
+ /* We need to query the good bricks and/or thin-arbiter.*/
+ if (success_replies[0]) {
+ local->read_txn_query_child = AFR_CHILD_ZERO;
+ } else if (success_replies[1]) {
+ local->read_txn_query_child = AFR_CHILD_ONE;
+ }
+ error = EINVAL;
+ goto refresh_done;
+ }
+
+ if (!afr_has_quorum(success_replies, this, frame)) {
+ error = afr_final_errno(frame->local, this->private);
+ if (!error)
+ error = afr_quorum_errno(priv);
+ goto refresh_done;
+ }
+
+ ret = afr_replies_interpret(frame, this, local->refreshinode, &start_heal);
+
+ if (ret && afr_selfheal_enabled(this) && start_heal) {
+ heal_frame = afr_frame_create(this, NULL);
+ if (!heal_frame)
+ goto refresh_done;
+ heal_local = heal_frame->local;
+ heal_local->refreshinode = inode_ref(local->refreshinode);
+ heal_local->heal_frame = heal_frame;
+ if (!afr_throttled_selfheal(heal_frame, this)) {
+ AFR_STACK_DESTROY(heal_frame);
+ goto refresh_done;
+ }
+ }
+
+refresh_done:
+ afr_txn_refresh_done(frame, this, error);
+
+ return 0;
+}
void
-afr_local_cleanup (afr_local_t *local, xlator_t *this)
-{
- afr_private_t * priv = NULL;
+afr_inode_refresh_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *buf,
+ dict_t *xdata, struct iatt *par)
+{
+ afr_local_t *local = NULL;
+ int call_child = (long)cookie;
+ int8_t need_heal = 1;
+ int call_count = 0;
+ int ret = 0;
+
+ local = frame->local;
+ local->replies[call_child].valid = 1;
+ local->replies[call_child].op_ret = op_ret;
+ local->replies[call_child].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[call_child].poststat = *buf;
+ if (par)
+ local->replies[call_child].postparent = *par;
+ if (xdata)
+ local->replies[call_child].xdata = dict_ref(xdata);
+ }
+
+ if (xdata) {
+ ret = dict_get_int8(xdata, "link-count", &need_heal);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to get link count");
+ }
+ }
- if (!local)
- return;
+ local->replies[call_child].need_heal = need_heal;
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ afr_set_need_heal(this, local);
+ ret = afr_inode_refresh_err(frame, this);
+ if (ret) {
+ gf_msg_debug(this->name, ret, "afr_inode_refresh_err failed");
+ }
+ afr_inode_refresh_done(frame, this, ret);
+ }
+}
- afr_local_sh_cleanup (local, this);
+int
+afr_inode_refresh_subvol_with_lookup_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *par)
+{
+ afr_inode_refresh_subvol_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ xdata, par);
+ return 0;
+}
- afr_local_transaction_cleanup (local, this);
+int
+afr_inode_refresh_subvol_with_lookup(call_frame_t *frame, xlator_t *this, int i,
+ inode_t *inode, uuid_t gfid, dict_t *xdata)
+{
+ loc_t loc = {
+ 0,
+ };
+ afr_private_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- loc_wipe (&local->loc);
- loc_wipe (&local->newloc);
+ loc.inode = inode;
+ if (gf_uuid_is_null(inode->gfid) && gfid) {
+ /* To handle setattr/setxattr on yet to be linked inode from
+ * dht */
+ gf_uuid_copy(loc.gfid, gfid);
+ } else {
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ }
- if (local->fd)
- fd_unref (local->fd);
+ STACK_WIND_COOKIE(frame, afr_inode_refresh_subvol_with_lookup_cbk,
+ (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->lookup, &loc, xdata);
+ return 0;
+}
- if (local->xattr_req)
- dict_unref (local->xattr_req);
+int
+afr_inode_refresh_subvol_with_fstat_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ afr_inode_refresh_subvol_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ xdata, NULL);
+ return 0;
+}
- if (local->dict)
- dict_unref (local->dict);
+int
+afr_inode_refresh_subvol_with_fstat(call_frame_t *frame, xlator_t *this, int i,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- if (local->child_up)
- GF_FREE (local->child_up);
+ priv = this->private;
+ local = frame->local;
- if (local->child_errno)
- GF_FREE (local->child_errno);
+ STACK_WIND_COOKIE(frame, afr_inode_refresh_subvol_with_fstat_cbk,
+ (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->fstat, local->fd, xdata);
+ return 0;
+}
- if (local->fresh_children)
- GF_FREE (local->fresh_children);
+int
+afr_inode_refresh_do(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
+ int ret = 0;
+ dict_t *xdata = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ unsigned char *wind_subvols = NULL;
- if (local->fd_open_on)
- GF_FREE (local->fd_open_on);
+ priv = this->private;
+ local = frame->local;
+ wind_subvols = alloca0(priv->child_count);
- { /* lookup */
- if (local->cont.lookup.xattrs) {
- afr_reset_xattr (local->cont.lookup.xattrs,
- priv->child_count);
- GF_FREE (local->cont.lookup.xattrs);
- local->cont.lookup.xattrs = NULL;
- }
+ afr_local_replies_wipe(local, priv);
- if (local->cont.lookup.xattr) {
- dict_unref (local->cont.lookup.xattr);
- }
+ if (local->fd) {
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx) {
+ afr_inode_refresh_done(frame, this, EINVAL);
+ return 0;
+ }
+ }
- if (local->cont.lookup.inode) {
- inode_unref (local->cont.lookup.inode);
- }
+ xdata = dict_new();
+ if (!xdata) {
+ afr_inode_refresh_done(frame, this, ENOMEM);
+ return 0;
+ }
- if (local->cont.lookup.postparents)
- GF_FREE (local->cont.lookup.postparents);
+ ret = afr_xattr_req_prepare(this, xdata);
+ if (ret != 0) {
+ dict_unref(xdata);
+ afr_inode_refresh_done(frame, this, -ret);
+ return 0;
+ }
- if (local->cont.lookup.bufs)
- GF_FREE (local->cont.lookup.bufs);
+ ret = dict_set_sizen_str_sizen(xdata, "link-count", GF_XATTROP_INDEX_COUNT);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
+ }
- if (local->cont.lookup.success_children)
- GF_FREE (local->cont.lookup.success_children);
+ ret = dict_set_str_sizen(xdata, GLUSTERFS_INODELK_DOM_COUNT, this->name);
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "Unable to set inodelk-dom-count in dict ");
+ }
- if (local->cont.lookup.sources)
- GF_FREE (local->cont.lookup.sources);
+ if (local->fd) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i] && fd_ctx->opened_on[i] == AFR_FD_OPENED)
+ wind_subvols[i] = 1;
}
+ } else {
+ memcpy(wind_subvols, local->child_up,
+ sizeof(*local->child_up) * priv->child_count);
+ }
- { /* getxattr */
- if (local->cont.getxattr.name)
- GF_FREE (local->cont.getxattr.name);
- }
+ local->call_count = AFR_COUNT(wind_subvols, priv->child_count);
- { /* lk */
- if (local->cont.lk.locked_nodes)
- GF_FREE (local->cont.lk.locked_nodes);
- }
+ call_count = local->call_count;
+ if (!call_count) {
+ dict_unref(xdata);
+ if (local->fd && AFR_COUNT(local->child_up, priv->child_count))
+ afr_inode_refresh_done(frame, this, EBADFD);
+ else
+ afr_inode_refresh_done(frame, this, ENOTCONN);
+ return 0;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_subvols[i])
+ continue;
- { /* create */
- if (local->cont.create.fd)
- fd_unref (local->cont.create.fd);
- if (local->cont.create.params)
- dict_unref (local->cont.create.params);
- }
+ if (local->fd)
+ afr_inode_refresh_subvol_with_fstat(frame, this, i, xdata);
+ else
+ afr_inode_refresh_subvol_with_lookup(
+ frame, this, i, local->refreshinode, local->refreshgfid, xdata);
- { /* mknod */
- if (local->cont.mknod.params)
- dict_unref (local->cont.mknod.params);
- }
+ if (!--call_count)
+ break;
+ }
- { /* mkdir */
- if (local->cont.mkdir.params)
- dict_unref (local->cont.mkdir.params);
- }
+ dict_unref(xdata);
- { /* symlink */
- if (local->cont.symlink.params)
- dict_unref (local->cont.symlink.params);
- }
+ return 0;
+}
- { /* writev */
- GF_FREE (local->cont.writev.vector);
- }
+int
+afr_inode_refresh(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, afr_inode_refresh_cbk_t refreshfn)
+{
+ afr_local_t *local = NULL;
- { /* setxattr */
- if (local->cont.setxattr.dict)
- dict_unref (local->cont.setxattr.dict);
- }
+ local = frame->local;
- { /* fsetxattr */
- if (local->cont.fsetxattr.dict)
- dict_unref (local->cont.fsetxattr.dict);
- }
+ local->refreshfn = refreshfn;
- { /* removexattr */
- GF_FREE (local->cont.removexattr.name);
- }
- { /* xattrop */
- if (local->cont.xattrop.xattr)
- dict_unref (local->cont.xattrop.xattr);
- }
- { /* fxattrop */
- if (local->cont.fxattrop.xattr)
- dict_unref (local->cont.fxattrop.xattr);
- }
- { /* symlink */
- GF_FREE (local->cont.symlink.linkpath);
- }
+ if (local->refreshinode) {
+ inode_unref(local->refreshinode);
+ local->refreshinode = NULL;
+ }
- { /* opendir */
- if (local->cont.opendir.checksum)
- GF_FREE (local->cont.opendir.checksum);
- }
+ local->refreshinode = inode_ref(inode);
- { /* readdirp */
- if (local->cont.readdir.dict)
- dict_unref (local->cont.readdir.dict);
- }
+ if (gfid)
+ gf_uuid_copy(local->refreshgfid, gfid);
+ else
+ gf_uuid_clear(local->refreshgfid);
- if (local->xdata_req)
- dict_unref (local->xdata_req);
+ afr_inode_refresh_do(frame, this);
- if (local->xdata_rsp)
- dict_unref (local->xdata_rsp);
+ return 0;
}
-
int
-afr_frame_return (call_frame_t *frame)
+afr_xattr_req_prepare(xlator_t *this, dict_t *xattr_req)
{
- afr_local_t *local = NULL;
- int call_count = 0;
-
- local = frame->local;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
+ priv = this->private;
- return call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_uint64(xattr_req, priv->pending_key[i],
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret < 0)
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Unable to set dict value for %s", priv->pending_key[i]);
+ /* 3 = data+metadata+entry */
+ }
+ ret = dict_set_uint64(xattr_req, AFR_DIRTY,
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "failed to set dirty "
+ "query flag");
+ }
+
+ ret = dict_set_int32_sizen(xattr_req, "list-xattr", 1);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to set list-xattr in dict ");
+ }
+
+ return ret;
}
int
-afr_set_elem_count_get (unsigned char *elems, int child_count)
-{
- int i = 0;
- int ret = 0;
-
- for (i = 0; i < child_count; i++)
- if (elems[i])
- ret++;
- return ret;
+afr_lookup_xattr_req_prepare(afr_local_t *local, xlator_t *this,
+ dict_t *xattr_req, loc_t *loc)
+{
+ int ret = -ENOMEM;
+
+ if (!local->xattr_req)
+ local->xattr_req = dict_new();
+
+ if (!local->xattr_req)
+ goto out;
+
+ if (xattr_req && (xattr_req != local->xattr_req))
+ dict_copy(xattr_req, local->xattr_req);
+
+ ret = afr_xattr_req_prepare(this, local->xattr_req);
+
+ ret = dict_set_uint64(local->xattr_req, GLUSTERFS_INODELK_COUNT, 0);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s", loc->path,
+ GLUSTERFS_INODELK_COUNT);
+ }
+ ret = dict_set_uint64(local->xattr_req, GLUSTERFS_ENTRYLK_COUNT, 0);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s", loc->path,
+ GLUSTERFS_ENTRYLK_COUNT);
+ }
+
+ ret = dict_set_uint32(local->xattr_req, GLUSTERFS_PARENT_ENTRYLK, 0);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s", loc->path,
+ GLUSTERFS_PARENT_ENTRYLK);
+ }
+
+ ret = dict_set_sizen_str_sizen(local->xattr_req, "link-count",
+ GF_XATTROP_INDEX_COUNT);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
+ }
+
+ ret = 0;
+out:
+ return ret;
}
-/**
- * up_children_count - return the number of children that are up
- */
-
-unsigned int
-afr_up_children_count (unsigned char *child_up, unsigned int child_count)
+int
+afr_least_pending_reads_child(afr_private_t *priv, unsigned char *readable)
{
- return afr_set_elem_count_get (child_up, child_count);
-}
+ int i = 0;
+ int child = -1;
+ int64_t read_iter = -1;
+ int64_t pending_read = -1;
-unsigned int
-afr_locked_children_count (unsigned char *children, unsigned int child_count)
-{
- return afr_set_elem_count_get (children, child_count);
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i])
+ continue;
+ read_iter = GF_ATOMIC_GET(priv->pending_reads[i]);
+ if (child == -1 || read_iter < pending_read) {
+ pending_read = read_iter;
+ child = i;
+ }
+ }
-unsigned int
-afr_pre_op_done_children_count (unsigned char *pre_op,
- unsigned int child_count)
-{
- return afr_set_elem_count_get (pre_op, child_count);
+ return child;
}
-gf_boolean_t
-afr_is_fresh_lookup (loc_t *loc, xlator_t *this)
+static int32_t
+afr_least_latency_child(afr_private_t *priv, unsigned char *readable)
{
- uint64_t ctx = 0;
- int32_t ret = 0;
+ int32_t i = 0;
+ int child = -1;
- GF_ASSERT (loc);
- GF_ASSERT (this);
- GF_ASSERT (loc->inode);
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i] ||
+ priv->child_latency[i] < 0)
+ continue;
- ret = inode_ctx_get (loc->inode, this, &ctx);
- if (0 == ret)
- return _gf_false;
- return _gf_true;
+ if (child == -1 ||
+ priv->child_latency[i] < priv->child_latency[child]) {
+ child = i;
+ }
+ }
+ return child;
}
-void
-afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent)
+static int32_t
+afr_least_latency_times_pending_reads_child(afr_private_t *priv,
+ unsigned char *readable)
{
- GF_ASSERT (loc);
- GF_ASSERT (buf);
+ int32_t i = 0;
+ int child = -1;
+ int64_t pending_read = 0;
+ int64_t latency = -1;
+ int64_t least_latency = -1;
- uuid_copy (loc->gfid, buf->ia_gfid);
- if (postparent)
- uuid_copy (loc->pargfid, postparent->ia_gfid);
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (AFR_IS_ARBITER_BRICK(priv, i) || !readable[i] ||
+ priv->child_latency[i] < 0)
+ continue;
-int
-afr_lookup_build_response_params (afr_local_t *local, xlator_t *this)
-{
- struct iatt *buf = NULL;
- struct iatt *postparent = NULL;
- dict_t **xattr = NULL;
- int32_t *success_children = NULL;
- int32_t *sources = NULL;
- afr_private_t *priv = NULL;
- int32_t read_child = -1;
- int ret = 0;
- int i = 0;
-
- GF_ASSERT (local);
-
- buf = &local->cont.lookup.buf;
- postparent = &local->cont.lookup.postparent;
- xattr = &local->cont.lookup.xattr;
- priv = this->private;
-
- read_child = afr_inode_get_read_ctx (this, local->cont.lookup.inode,
- local->fresh_children);
- if (read_child < 0) {
- ret = -1;
- goto out;
- }
- success_children = local->cont.lookup.success_children;
- sources = local->cont.lookup.sources;
- memset (sources, 0, sizeof (*sources) * priv->child_count);
- afr_children_intersection_get (local->fresh_children, success_children,
- sources, priv->child_count);
- if (!sources[read_child]) {
- read_child = -1;
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i]) {
- read_child = i;
- break;
- }
- }
- }
- if (read_child < 0) {
- ret = -1;
- goto out;
- }
- gf_log (this->name, GF_LOG_DEBUG, "Building lookup response from %d",
- read_child);
- if (!*xattr)
- *xattr = dict_ref (local->cont.lookup.xattrs[read_child]);
- *buf = local->cont.lookup.bufs[read_child];
- *postparent = local->cont.lookup.postparents[read_child];
+ pending_read = GF_ATOMIC_GET(priv->pending_reads[i]);
+ latency = (pending_read + 1) * priv->child_latency[i];
- if (IA_INVAL == local->cont.lookup.inode->ia_type) {
- /* fix for RT #602 */
- local->cont.lookup.inode->ia_type = buf->ia_type;
+ if (child == -1 || latency < least_latency) {
+ least_latency = latency;
+ child = i;
}
-out:
- return ret;
+ }
+ return child;
}
-static void
-afr_lookup_update_lk_counts (afr_local_t *local, xlator_t *this,
- int child_index, dict_t *xattr)
-{
- uint32_t inodelk_count = 0;
- uint32_t entrylk_count = 0;
- int ret = -1;
- uint32_t parent_entrylk = 0;
-
- GF_ASSERT (local);
- GF_ASSERT (this);
- GF_ASSERT (xattr);
- GF_ASSERT (child_index >= 0);
-
- ret = dict_get_uint32 (xattr, GLUSTERFS_INODELK_COUNT,
- &inodelk_count);
- if (ret == 0)
- local->inodelk_count += inodelk_count;
-
- ret = dict_get_uint32 (xattr, GLUSTERFS_ENTRYLK_COUNT,
- &entrylk_count);
- if (ret == 0)
- local->entrylk_count += entrylk_count;
- ret = dict_get_uint32 (xattr, GLUSTERFS_PARENT_ENTRYLK,
- &parent_entrylk);
- if (!ret)
- local->cont.lookup.parent_entrylk += parent_entrylk;
+int
+afr_hash_child(afr_read_subvol_args_t *args, afr_private_t *priv,
+ unsigned char *readable)
+{
+ uuid_t gfid_copy = {
+ 0,
+ };
+ pid_t pid;
+ int child = -1;
+
+ switch (priv->hash_mode) {
+ case AFR_READ_POLICY_FIRST_UP:
+ break;
+ case AFR_READ_POLICY_GFID_HASH:
+ gf_uuid_copy(gfid_copy, args->gfid);
+ child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) %
+ priv->child_count;
+ break;
+ case AFR_READ_POLICY_GFID_PID_HASH:
+ if (args->ia_type != IA_IFDIR) {
+ /*
+ * Why getpid? Because it's one of the cheapest calls
+ * available - faster than gethostname etc. - and
+ * returns a constant-length value that's sure to be
+ * shorter than a UUID. It's still very unlikely to be
+ * the same across clients, so it still provides good
+ * mixing. We're not trying for perfection here. All we
+ * need is a low probability that multiple clients
+ * won't converge on the same subvolume.
+ */
+ gf_uuid_copy(gfid_copy, args->gfid);
+ pid = getpid();
+ *(pid_t *)gfid_copy ^= pid;
+ }
+ child = SuperFastHash((char *)gfid_copy, sizeof(gfid_copy)) %
+ priv->child_count;
+ break;
+ case AFR_READ_POLICY_LESS_LOAD:
+ child = afr_least_pending_reads_child(priv, readable);
+ break;
+ case AFR_READ_POLICY_LEAST_LATENCY:
+ child = afr_least_latency_child(priv, readable);
+ break;
+ case AFR_READ_POLICY_LOAD_LATENCY_HYBRID:
+ child = afr_least_latency_times_pending_reads_child(priv, readable);
+ break;
+ }
+
+ return child;
}
-static void
-afr_lookup_set_self_heal_params_by_xattr (afr_local_t *local, xlator_t *this,
- dict_t *xattr)
+int
+afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
+ unsigned char *readable,
+ afr_read_subvol_args_t *args)
{
- GF_ASSERT (local);
- GF_ASSERT (this);
- GF_ASSERT (xattr);
+ int i = 0;
+ int read_subvol = -1;
+ afr_private_t *priv = NULL;
+ afr_read_subvol_args_t local_args = {
+ 0,
+ };
- if (afr_sh_has_metadata_pending (xattr, this)) {
- local->self_heal.do_metadata_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "metadata self-heal is pending for %s.",
- local->loc.path);
- }
+ priv = this->private;
- if (afr_sh_has_entry_pending (xattr, this)) {
- local->self_heal.do_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "entry self-heal is pending for %s.", local->loc.path);
- }
+ /* first preference - explicitly specified or local subvolume */
+ if (priv->read_child >= 0 && readable[priv->read_child])
+ return priv->read_child;
- if (afr_sh_has_data_pending (xattr, this)) {
- local->self_heal.do_data_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_DEBUG,
- "data self-heal is pending for %s.", local->loc.path);
- }
-}
+ if (inode_is_linked(inode)) {
+ gf_uuid_copy(local_args.gfid, inode->gfid);
+ local_args.ia_type = inode->ia_type;
+ } else if (args) {
+ local_args = *args;
+ }
-static void
-afr_detect_self_heal_by_iatt (afr_local_t *local, xlator_t *this,
- struct iatt *buf, struct iatt *lookup_buf)
-{
- if (PERMISSION_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- gf_log (this->name, GF_LOG_INFO,
- "permissions differ for %s ", local->loc.path);
- local->self_heal.do_metadata_self_heal = _gf_true;
- }
+ /* second preference - use hashed mode */
+ read_subvol = afr_hash_child(&local_args, priv, readable);
+ if (read_subvol >= 0 && readable[read_subvol])
+ return read_subvol;
- if (OWNERSHIP_DIFFERS (buf, lookup_buf)) {
- /* mismatching permissions */
- local->self_heal.do_metadata_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_INFO,
- "ownership differs for %s ", local->loc.path);
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (readable[i])
+ return i;
+ }
- if (SIZE_DIFFERS (buf, lookup_buf)
- && IA_ISREG (buf->ia_type)) {
- gf_log (this->name, GF_LOG_INFO,
- "size differs for %s ", local->loc.path);
- local->self_heal.do_data_self_heal = _gf_true;
- }
+ /* no readable subvolumes, either split brain or all subvols down */
- if (uuid_compare (buf->ia_gfid, lookup_buf->ia_gfid)) {
- /* mismatching gfid */
- gf_log (this->name, GF_LOG_WARNING,
- "%s: gfid different on subvolume", local->loc.path);
- }
+ return -1;
}
-static void
-afr_detect_self_heal_by_lookup_status (afr_local_t *local, xlator_t *this,
- gf_boolean_t split_brain)
-{
- GF_ASSERT (local);
- GF_ASSERT (this);
-
- if ((local->success_count > 0) && (local->enoent_count > 0)) {
- local->self_heal.do_metadata_self_heal = _gf_true;
- local->self_heal.do_data_self_heal = _gf_true;
- local->self_heal.do_entry_self_heal = _gf_true;
- local->self_heal.do_gfid_self_heal = _gf_true;
- local->self_heal.do_missing_entry_self_heal = _gf_true;
- gf_log(this->name, GF_LOG_INFO,
- "entries are missing in lookup of %s.",
- local->loc.path);
- //If all self-heals are needed no need to check for other rules
- goto out;
- }
-
- if ((local->success_count > 0) && split_brain &&
- IA_ISREG (local->cont.lookup.inode->ia_type)) {
- local->self_heal.do_data_self_heal = _gf_true;
- local->self_heal.do_gfid_self_heal = _gf_true;
- gf_log (this->name, GF_LOG_WARNING,
- "split brain detected during lookup of %s.",
- local->loc.path);
- }
+int
+afr_inode_read_subvol_type_get(inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type)
+{
+ int ret = -1;
-out:
- return;
+ if (type == AFR_METADATA_TRANSACTION)
+ ret = afr_inode_read_subvol_get(inode, this, 0, readable, event_p);
+ else
+ ret = afr_inode_read_subvol_get(inode, this, readable, 0, event_p);
+ return ret;
}
-gf_boolean_t
-afr_can_self_heal_proceed (afr_self_heal_t *sh, afr_private_t *priv)
+void
+afr_readables_intersect_get(inode_t *inode, xlator_t *this, int *event,
+ unsigned char *intersection)
{
- GF_ASSERT (sh);
- GF_ASSERT (priv);
+ afr_private_t *priv = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ unsigned char *intersect = NULL;
+
+ priv = this->private;
+ data_readable = alloca0(priv->child_count);
+ metadata_readable = alloca0(priv->child_count);
+ intersect = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_get(inode, this, data_readable, metadata_readable,
+ event);
- return (sh->do_gfid_self_heal
- || sh->do_missing_entry_self_heal
- || (afr_data_self_heal_enabled (priv->data_self_heal) &&
- sh->do_data_self_heal)
- || (priv->metadata_self_heal && sh->do_metadata_self_heal)
- || (priv->entry_self_heal && sh->do_entry_self_heal));
+ AFR_INTERSECT(intersect, data_readable, metadata_readable,
+ priv->child_count);
+ if (intersection)
+ memcpy(intersection, intersect,
+ sizeof(*intersection) * priv->child_count);
}
-afr_transaction_type
-afr_transaction_type_get (ia_type_t ia_type)
+int
+afr_read_subvol_get(inode_t *inode, xlator_t *this, int *subvol_p,
+ unsigned char *readables, int *event_p,
+ afr_transaction_type type, afr_read_subvol_args_t *args)
{
- afr_transaction_type type = AFR_METADATA_TRANSACTION;
+ afr_private_t *priv = NULL;
+ unsigned char *readable = NULL;
+ unsigned char *intersection = NULL;
+ int subvol = -1;
+ int event = 0;
- GF_ASSERT (ia_type != IA_INVAL);
+ priv = this->private;
- if (IA_ISDIR (ia_type)) {
- type = AFR_ENTRY_TRANSACTION;
- } else if (IA_ISREG (ia_type)) {
- type = AFR_DATA_TRANSACTION;
- }
- return type;
-}
+ readable = alloca0(priv->child_count);
+ intersection = alloca0(priv->child_count);
-int
-afr_lookup_select_read_child (afr_local_t *local, xlator_t *this,
- int32_t *read_child)
-{
- ia_type_t ia_type = IA_INVAL;
- int32_t source = -1;
- int ret = -1;
- dict_t **xattrs = NULL;
- int32_t *success_children = NULL;
- afr_transaction_type type = AFR_METADATA_TRANSACTION;
- uuid_t *gfid = NULL;
-
- GF_ASSERT (local);
- GF_ASSERT (this);
- GF_ASSERT (local->success_count > 0);
-
- success_children = local->cont.lookup.success_children;
- /*We can take the success_children[0] only because we already
- *handle the conflicting children other wise, we could select the
- *read_child based on wrong file type
- */
- ia_type = local->cont.lookup.bufs[success_children[0]].ia_type;
- type = afr_transaction_type_get (ia_type);
- xattrs = local->cont.lookup.xattrs;
- gfid = &local->cont.lookup.buf.ia_gfid;
- source = afr_lookup_select_read_child_by_txn_type (this, local, xattrs,
- type, *gfid);
- if (source < 0) {
- gf_log (this->name, GF_LOG_DEBUG, "failed to select source "
- "for %s", local->loc.path);
- goto out;
- }
+ afr_inode_read_subvol_type_get(inode, this, readable, &event, type);
- gf_log (this->name, GF_LOG_DEBUG, "Source selected as %d for %s",
- source, local->loc.path);
- *read_child = source;
- ret = 0;
-out:
- return ret;
-}
+ afr_readables_intersect_get(inode, this, &event, intersection);
-static inline gf_boolean_t
-afr_is_transaction_running (afr_local_t *local)
-{
- GF_ASSERT (local->fop == GF_FOP_LOOKUP);
- return ((local->inodelk_count > 0) || (local->entrylk_count > 0));
+ if (AFR_COUNT(intersection, priv->child_count) > 0)
+ subvol = afr_read_subvol_select_by_policy(inode, this, intersection,
+ args);
+ else
+ subvol = afr_read_subvol_select_by_policy(inode, this, readable, args);
+ if (subvol_p)
+ *subvol_p = subvol;
+ if (event_p)
+ *event_p = event;
+ if (readables)
+ memcpy(readables, readable, sizeof(*readables) * priv->child_count);
+ return subvol;
}
void
-afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
- gf_boolean_t background, ia_type_t ia_type, char *reason,
- void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
- xlator_t *this),
- int (*unwind) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- int32_t sh_failed))
-{
- afr_local_t *local = NULL;
- char sh_type_str[256] = {0,};
- char *bg = "";
-
- GF_ASSERT (frame);
- GF_ASSERT (this);
- GF_ASSERT (inode);
- GF_ASSERT (ia_type != IA_INVAL);
+afr_local_transaction_cleanup(afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
- local = frame->local;
- local->self_heal.background = background;
- local->self_heal.type = ia_type;
- local->self_heal.unwind = unwind;
- local->self_heal.gfid_sh_success_cbk = gfid_sh_success_cbk;
-
- afr_self_heal_type_str_get (&local->self_heal,
- sh_type_str,
- sizeof (sh_type_str));
-
- if (background)
- bg = "background";
- gf_log (this->name, GF_LOG_INFO,
- "%s %s self-heal triggered. path: %s, reason: %s", bg,
- sh_type_str, local->loc.path, reason);
-
- afr_self_heal (frame, this, inode);
-}
-
-unsigned int
-afr_gfid_missing_count (const char *xlator_name, int32_t *success_children,
- struct iatt *bufs, unsigned int child_count,
- const char *path)
-{
- unsigned int gfid_miss_count = 0;
- int i = 0;
- struct iatt *child1 = NULL;
-
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- child1 = &bufs[success_children[i]];
- if (uuid_is_null (child1->ia_gfid)) {
- gf_log (xlator_name, GF_LOG_DEBUG, "%s: gfid is null"
- " on subvolume %d", path, success_children[i]);
- gfid_miss_count++;
- }
- }
+ priv = this->private;
- return gfid_miss_count;
-}
+ afr_matrix_cleanup(local->pending, priv->child_count);
-static int
-afr_lookup_gfid_missing_count (afr_local_t *local, xlator_t *this)
-{
- int32_t *success_children = NULL;
- afr_private_t *priv = NULL;
- struct iatt *bufs = NULL;
- int miss_count = 0;
+ GF_FREE(local->internal_lock.lower_locked_nodes);
- priv = this->private;
- bufs = local->cont.lookup.bufs;
- success_children = local->cont.lookup.success_children;
+ afr_lockees_cleanup(&local->internal_lock);
- miss_count = afr_gfid_missing_count (this->name, success_children,
- bufs, priv->child_count,
- local->loc.path);
- return miss_count;
-}
+ GF_FREE(local->transaction.pre_op);
-gf_boolean_t
-afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
- unsigned int child_count, const char *path,
- const char *xlator_name)
-{
- gf_boolean_t conflicting = _gf_false;
- int i = 0;
- struct iatt *child1 = NULL;
- struct iatt *child2 = NULL;
- uuid_t *gfid = NULL;
-
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- child1 = &bufs[success_children[i]];
- if ((!gfid) && (!uuid_is_null (child1->ia_gfid)))
- gfid = &child1->ia_gfid;
-
- if (i == 0)
- continue;
-
- child2 = &bufs[success_children[i-1]];
- if (FILETYPE_DIFFERS (child1, child2)) {
- gf_log (xlator_name, GF_LOG_WARNING, "%s: filetype "
- "differs on subvolumes (%d, %d)", path,
- success_children[i-1], success_children[i]);
- conflicting = _gf_true;
- goto out;
- }
- if (!gfid || uuid_is_null (child1->ia_gfid))
- continue;
- if (uuid_compare (*gfid, child1->ia_gfid)) {
- gf_log (xlator_name, GF_LOG_WARNING, "%s: gfid differs"
- " on subvolume %d", path, success_children[i]);
- conflicting = _gf_true;
- goto out;
- }
+ GF_FREE(local->transaction.pre_op_sources);
+ if (local->transaction.changelog_xdata) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.changelog_xdata[i])
+ continue;
+ dict_unref(local->transaction.changelog_xdata[i]);
}
-out:
- return conflicting;
+ GF_FREE(local->transaction.changelog_xdata);
+ }
+
+ GF_FREE(local->transaction.failed_subvols);
+
+ GF_FREE(local->transaction.basename);
+ GF_FREE(local->transaction.new_basename);
+
+ loc_wipe(&local->transaction.parent_loc);
+ loc_wipe(&local->transaction.new_parent_loc);
}
-/* afr_update_gfid_from_iatts: This function should be called only if the
- * iatts are not conflicting.
- */
void
-afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs,
- int32_t *success_children, unsigned int child_count)
-{
- uuid_t *gfid = NULL;
- int i = 0;
- int child = 0;
-
- for (i = 0; i < child_count; i++) {
- child = success_children[i];
- if (child == -1)
- break;
- if ((!gfid) && (!uuid_is_null (bufs[child].ia_gfid))) {
- gfid = &bufs[child].ia_gfid;
- } else if (gfid && (!uuid_is_null (bufs[child].ia_gfid))) {
- if (uuid_compare (*gfid, bufs[child].ia_gfid)) {
- GF_ASSERT (0);
- goto out;
- }
- }
- }
- if (gfid && (!uuid_is_null (*gfid)))
- uuid_copy (uuid, *gfid);
-out:
- return;
+afr_reply_wipe(struct afr_reply *reply)
+{
+ if (reply->xdata) {
+ dict_unref(reply->xdata);
+ reply->xdata = NULL;
+ }
+
+ if (reply->xattr) {
+ dict_unref(reply->xattr);
+ reply->xattr = NULL;
+ }
}
-static gf_boolean_t
-afr_lookup_conflicting_entries (afr_local_t *local, xlator_t *this)
+void
+afr_replies_wipe(struct afr_reply *replies, int count)
{
- afr_private_t *priv = NULL;
- gf_boolean_t conflict = _gf_false;
+ int i = 0;
- priv = this->private;
- conflict = afr_conflicting_iattrs (local->cont.lookup.bufs,
- local->cont.lookup.success_children,
- priv->child_count, local->loc.path,
- this->name);
- return conflict;
+ for (i = 0; i < count; i++) {
+ afr_reply_wipe(&replies[i]);
+ }
}
-gf_boolean_t
-afr_open_only_data_self_heal (char *data_self_heal)
+void
+afr_local_replies_wipe(afr_local_t *local, afr_private_t *priv)
{
- return !strcmp (data_self_heal, "open");
+ if (!local->replies)
+ return;
+
+ afr_replies_wipe(local->replies, priv->child_count);
+
+ memset(local->replies, 0, sizeof(*local->replies) * priv->child_count);
}
-gf_boolean_t
-afr_data_self_heal_enabled (char *data_self_heal)
+static gf_boolean_t
+afr_fop_lock_is_unlock(call_frame_t *frame)
+{
+ afr_local_t *local = frame->local;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ if ((F_UNLCK == local->cont.inodelk.in_flock.l_type) &&
+ (local->cont.inodelk.in_cmd == F_SETLKW ||
+ local->cont.inodelk.in_cmd == F_SETLK))
+ return _gf_true;
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ if (ENTRYLK_UNLOCK == local->cont.entrylk.in_cmd)
+ return _gf_true;
+ break;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
+}
+
+static gf_boolean_t
+afr_lk_is_unlock(int32_t cmd, struct gf_flock *flock)
{
- gf_boolean_t enabled = _gf_false;
+ switch (cmd) {
+ case F_RESLK_UNLCK:
+ return _gf_true;
+ break;
- if (gf_string2boolean (data_self_heal, &enabled) == -1) {
- enabled = !strcmp (data_self_heal, "open");
- GF_ASSERT (enabled);
- }
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
- return enabled;
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ if (F_UNLCK == flock->l_type)
+ return _gf_true;
+ break;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
}
-static void
-afr_lookup_set_self_heal_params (afr_local_t *local, xlator_t *this)
+void
+afr_handle_inconsistent_fop(call_frame_t *frame, int32_t *op_ret,
+ int32_t *op_errno)
{
- int i = 0;
- struct iatt *bufs = NULL;
- dict_t **xattr = NULL;
- afr_private_t *priv = NULL;
- int32_t child1 = -1;
- int32_t child2 = -1;
- afr_self_heal_t *sh = NULL;
- gf_boolean_t split_brain = _gf_false;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- priv = this->private;
- sh = &local->self_heal;
+ if (!frame || !frame->this || !frame->local || !frame->this->private)
+ return;
- split_brain = afr_is_split_brain (this, local->cont.lookup.inode);
- afr_detect_self_heal_by_lookup_status (local, this, split_brain);
+ if (*op_ret < 0)
+ return;
- if (afr_lookup_gfid_missing_count (local, this))
- local->self_heal.do_gfid_self_heal = _gf_true;
+ /* Failing inodelk/entrylk/lk here is not a good idea because we
+ * need to cleanup the locks on the other bricks if we choose to fail
+ * the fop here. The brick may go down just after unwind happens as well
+ * so anyways the fop will fail when the next fop is sent so leaving
+ * it like this for now.*/
+ local = frame->local;
+ switch (local->op) {
+ case GF_FOP_LOOKUP:
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ case GF_FOP_LK:
+ return;
+ default:
+ break;
+ }
- if (_gf_true == afr_lookup_conflicting_entries (local, this))
- local->self_heal.do_missing_entry_self_heal = _gf_true;
- else
- afr_update_gfid_from_iatts (local->self_heal.sh_gfid_req,
- local->cont.lookup.bufs,
- local->cont.lookup.success_children,
- priv->child_count);
+ priv = frame->this->private;
+ if (!priv->consistent_io)
+ return;
- bufs = local->cont.lookup.bufs;
- for (i = 1; i < local->success_count; i++) {
- child1 = local->cont.lookup.success_children[i-1];
- child2 = local->cont.lookup.success_children[i];
- afr_detect_self_heal_by_iatt (local, this,
- &bufs[child1], &bufs[child2]);
- }
+ if (local->event_generation &&
+ (local->event_generation != priv->event_generation))
+ goto inconsistent;
- xattr = local->cont.lookup.xattrs;
- for (i = 0; i < local->success_count; i++) {
- child1 = local->cont.lookup.success_children[i];
- afr_lookup_set_self_heal_params_by_xattr (local, this,
- xattr[child1]);
- }
- if (afr_open_only_data_self_heal (priv->data_self_heal)
- && !split_brain)
- sh->do_data_self_heal = _gf_false;
+ return;
+inconsistent:
+ *op_ret = -1;
+ *op_errno = ENOTCONN;
}
-int
-afr_self_heal_lookup_unwind (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- int32_t sh_failed)
+void
+afr_local_cleanup(afr_local_t *local, xlator_t *this)
{
- afr_local_t *local = NULL;
- int ret = -1;
- dict_t *xattr = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
+ if (!local)
+ return;
- if (op_ret == -1) {
- local->op_ret = -1;
- if (afr_error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
+ syncbarrier_destroy(&local->barrier);
- goto out;
- } else {
- local->op_ret = 0;
- }
+ afr_local_transaction_cleanup(local, this);
- afr_lookup_done_success_action (frame, this, _gf_true);
- xattr = local->cont.lookup.xattr;
- if (xattr) {
- ret = dict_set_int32 (xattr, "sh-failed", sh_failed);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
- "sh-failed to %d", local->loc.path, sh_failed);
- }
-out:
- AFR_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->cont.lookup.inode, &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
+ priv = this->private;
- return 0;
-}
+ loc_wipe(&local->loc);
+ loc_wipe(&local->newloc);
-//TODO: At the moment only lookup needs this, so not doing any checks, in the
-// future we will have to do fop specific operations
-void
-afr_post_gfid_sh_success (call_frame_t *sh_frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_local_t *sh_local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- struct iatt *lookup_bufs = NULL;
- struct iatt *lookup_parentbufs = NULL;
+ if (local->fd)
+ fd_unref(local->fd);
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
- local = sh->orig_frame->local;
- lookup_bufs = local->cont.lookup.bufs;
- lookup_parentbufs = local->cont.lookup.postparents;
- priv = this->private;
+ if (local->xattr_req)
+ dict_unref(local->xattr_req);
- memcpy (lookup_bufs, sh->buf, priv->child_count * sizeof (*sh->buf));
- memcpy (lookup_parentbufs, sh->parentbufs,
- priv->child_count * sizeof (*sh->parentbufs));
+ if (local->xattr_rsp)
+ dict_unref(local->xattr_rsp);
- afr_reset_xattr (local->cont.lookup.xattrs, priv->child_count);
- if (local->cont.lookup.xattr) {
- dict_unref (local->cont.lookup.xattr);
- local->cont.lookup.xattr = NULL;
- }
+ if (local->dict)
+ dict_unref(local->dict);
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i])
- local->cont.lookup.xattrs[i] = dict_ref (sh->xattr[i]);
- }
+ afr_local_replies_wipe(local, priv);
+ GF_FREE(local->replies);
- afr_reset_children (local->cont.lookup.success_children,
- priv->child_count);
- afr_children_copy (local->cont.lookup.success_children,
- sh->fresh_children, priv->child_count);
-}
+ GF_FREE(local->child_up);
-static void
-afr_lookup_perform_self_heal (call_frame_t *frame, xlator_t *this,
- gf_boolean_t *sh_launched)
-{
- unsigned int up_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- char *reason = NULL;
-
- GF_ASSERT (sh_launched);
- *sh_launched = _gf_false;
- priv = this->private;
- local = frame->local;
-
- up_count = afr_up_children_count (local->child_up, priv->child_count);
- if (up_count == 1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Only 1 child up - do not attempt to detect self heal");
- goto out;
- }
+ GF_FREE(local->read_attempted);
+
+ GF_FREE(local->readable);
+ GF_FREE(local->readable2);
+
+ if (local->inode)
+ inode_unref(local->inode);
- afr_lookup_set_self_heal_params (local, this);
- if (afr_can_self_heal_proceed (&local->self_heal, priv)) {
- if (afr_is_transaction_running (local))
- goto out;
+ if (local->parent)
+ inode_unref(local->parent);
- reason = "lookup detected pending operations";
- afr_launch_self_heal (frame, this, local->cont.lookup.inode,
- _gf_true, local->cont.lookup.buf.ia_type,
- reason, afr_post_gfid_sh_success,
- afr_self_heal_lookup_unwind);
- *sh_launched = _gf_true;
- }
-out:
- return;
-}
+ if (local->parent2)
+ inode_unref(local->parent2);
-void
-afr_get_fresh_children (int32_t *success_children, int32_t *sources,
- int32_t *fresh_children, unsigned int child_count)
-{
- unsigned int i = 0;
- unsigned int j = 0;
-
- GF_ASSERT (success_children);
- GF_ASSERT (sources);
- GF_ASSERT (fresh_children);
-
- afr_reset_children (fresh_children, child_count);
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- if (afr_is_read_child (success_children, sources, child_count,
- success_children[i])) {
- fresh_children[j] = success_children[i];
- j++;
- }
- }
-}
+ if (local->refreshinode)
+ inode_unref(local->refreshinode);
-static int
-afr_lookup_set_read_ctx (afr_local_t *local, xlator_t *this, int32_t read_child)
-{
- afr_private_t *priv = NULL;
+ { /* getxattr */
+ GF_FREE(local->cont.getxattr.name);
+ }
- GF_ASSERT (read_child >= 0);
+ { /* lk */
+ GF_FREE(local->cont.lk.locked_nodes);
+ GF_FREE(local->cont.lk.dom_locked_nodes);
+ GF_FREE(local->cont.lk.dom_lock_op_ret);
+ GF_FREE(local->cont.lk.dom_lock_op_errno);
+ }
- priv = this->private;
- afr_get_fresh_children (local->cont.lookup.success_children,
- local->cont.lookup.sources,
- local->fresh_children, priv->child_count);
- afr_inode_set_read_ctx (this, local->cont.lookup.inode, read_child,
- local->fresh_children);
+ { /* create */
+ if (local->cont.create.fd)
+ fd_unref(local->cont.create.fd);
+ if (local->cont.create.params)
+ dict_unref(local->cont.create.params);
+ }
- return 0;
+ { /* mknod */
+ if (local->cont.mknod.params)
+ dict_unref(local->cont.mknod.params);
+ }
+
+ { /* mkdir */
+ if (local->cont.mkdir.params)
+ dict_unref(local->cont.mkdir.params);
+ }
+
+ { /* symlink */
+ if (local->cont.symlink.params)
+ dict_unref(local->cont.symlink.params);
+ }
+
+ { /* writev */
+ GF_FREE(local->cont.writev.vector);
+ if (local->cont.writev.iobref)
+ iobref_unref(local->cont.writev.iobref);
+ }
+
+ { /* setxattr */
+ if (local->cont.setxattr.dict)
+ dict_unref(local->cont.setxattr.dict);
+ }
+
+ { /* fsetxattr */
+ if (local->cont.fsetxattr.dict)
+ dict_unref(local->cont.fsetxattr.dict);
+ }
+
+ { /* removexattr */
+ GF_FREE(local->cont.removexattr.name);
+ }
+ { /* xattrop */
+ if (local->cont.xattrop.xattr)
+ dict_unref(local->cont.xattrop.xattr);
+ }
+ { /* symlink */
+ GF_FREE(local->cont.symlink.linkpath);
+ }
+
+ { /* opendir */
+ GF_FREE(local->cont.opendir.checksum);
+ }
+
+ { /* open */
+ if (local->cont.open.fd)
+ fd_unref(local->cont.open.fd);
+ }
+
+ { /* readdirp */
+ if (local->cont.readdir.dict)
+ dict_unref(local->cont.readdir.dict);
+ }
+
+ { /* inodelk */
+ GF_FREE(local->cont.inodelk.volume);
+ if (local->cont.inodelk.xdata)
+ dict_unref(local->cont.inodelk.xdata);
+ }
+
+ { /* entrylk */
+ GF_FREE(local->cont.entrylk.volume);
+ GF_FREE(local->cont.entrylk.basename);
+ if (local->cont.entrylk.xdata)
+ dict_unref(local->cont.entrylk.xdata);
+ }
+
+ if (local->xdata_req)
+ dict_unref(local->xdata_req);
+
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
}
int
-afr_lookup_done_success_action (call_frame_t *frame, xlator_t *this,
- gf_boolean_t fail_conflict)
+afr_frame_return(call_frame_t *frame)
{
- int32_t read_child = -1;
- int32_t ret = -1;
- afr_local_t *local = NULL;
- gf_boolean_t fresh_lookup = _gf_false;
+ afr_local_t *local = NULL;
+ int call_count = 0;
- local = frame->local;
- fresh_lookup = local->cont.lookup.fresh_lookup;
+ local = frame->local;
- if (local->loc.parent == NULL)
- fail_conflict = _gf_true;
-
- if (afr_lookup_conflicting_entries (local, this)) {
- if (fail_conflict == _gf_false)
- ret = 0;
- goto out;
- }
+ LOCK(&frame->lock);
+ {
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- ret = afr_lookup_select_read_child (local, this, &read_child);
- if (!afr_is_transaction_running (local) || fresh_lookup) {
- if (read_child < 0)
- goto out;
+ return call_count;
+}
- ret = afr_lookup_set_read_ctx (local, this, read_child);
- if (ret)
- goto out;
- }
+static char *afr_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL};
- ret = afr_lookup_build_response_params (local, this);
- if (ret)
- goto out;
- afr_update_loc_gfids (&local->loc,
- &local->cont.lookup.buf,
- &local->cont.lookup.postparent);
+gf_boolean_t
+afr_is_xattr_ignorable(char *key)
+{
+ int i = 0;
- ret = 0;
-out:
- if (ret) {
- local->op_ret = -1;
- local->op_errno = EIO;
- }
- return ret;
+ if (!strncmp(key, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX)))
+ return _gf_true;
+ for (i = 0; afr_ignore_xattrs[i]; i++) {
+ if (!strcmp(key, afr_ignore_xattrs[i]))
+ return _gf_true;
+ }
+ return _gf_false;
}
-int
-afr_lookup_get_latest_subvol (afr_local_t *local, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- int32_t *success_children = NULL;
- struct iatt *bufs = NULL;
- int i = 0;
- int child = 0;
- int lsubvol = -1;
-
- priv = this->private;
- success_children = local->cont.lookup.success_children;
- bufs = local->cont.lookup.bufs;
- for (i = 0; i < priv->child_count; i++) {
- child = success_children[i];
- if (child == -1)
- break;
- if (uuid_is_null (bufs[child].ia_gfid))
- continue;
- if (lsubvol < 0) {
- lsubvol = child;
- } else if (bufs[lsubvol].ia_ctime < bufs[child].ia_ctime) {
- lsubvol = child;
- } else if ((bufs[lsubvol].ia_ctime == bufs[child].ia_ctime) &&
- (bufs[lsubvol].ia_ctime_nsec < bufs[child].ia_ctime_nsec)) {
- lsubvol = child;
- }
- }
- return lsubvol;
+static gf_boolean_t
+afr_xattr_match_needed(dict_t *this, char *key1, data_t *value1, void *data)
+{
+ /* Ignore all non-disk (i.e. virtual) xattrs right away. */
+ if (!gf_is_valid_xattr_namespace(key1))
+ return _gf_false;
+
+ /* Ignore on-disk xattrs that AFR doesn't need to heal. */
+ if (!afr_is_xattr_ignorable(key1))
+ return _gf_true;
+
+ return _gf_false;
}
-void
-afr_lookup_mark_other_entries_stale (afr_local_t *local, xlator_t *this,
- int subvol)
-{
- afr_private_t *priv = NULL;
- int32_t *success_children = NULL;
- struct iatt *bufs = NULL;
- int i = 0;
- int child = 0;
-
- priv = this->private;
- success_children = local->cont.lookup.success_children;
- bufs = local->cont.lookup.bufs;
- memcpy (local->fresh_children, success_children,
- sizeof (*success_children) * priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- child = local->fresh_children[i];
- if (child == -1)
- break;
- if (child == subvol)
- continue;
- if (uuid_is_null (bufs[child].ia_gfid) &&
- (bufs[child].ia_type == bufs[subvol].ia_type))
- continue;
- afr_children_rm_child (success_children, child,
- priv->child_count);
- local->success_count--;
- }
- afr_reset_children (local->fresh_children, priv->child_count);
+gf_boolean_t
+afr_xattrs_are_equal(dict_t *dict1, dict_t *dict2)
+{
+ return are_dicts_equal(dict1, dict2, afr_xattr_match_needed, NULL);
}
-void
-afr_succeed_lookup_on_latest_iatt (afr_local_t *local, xlator_t *this)
+static int
+afr_get_parent_read_subvol(xlator_t *this, inode_t *parent,
+ struct afr_reply *replies, unsigned char *readable)
{
- int lsubvol = 0;
+ int i = 0;
+ int par_read_subvol = -1;
+ int par_read_subvol_iter = -1;
+ afr_private_t *priv = NULL;
- if (!afr_lookup_conflicting_entries (local, this))
- goto out;
+ priv = this->private;
- lsubvol = afr_lookup_get_latest_subvol (local, this);
- if (lsubvol < 0)
- goto out;
- afr_lookup_mark_other_entries_stale (local, this, lsubvol);
-out:
- return;
-}
+ if (parent)
+ par_read_subvol = afr_data_subvol_get(parent, this, NULL, NULL, NULL,
+ NULL);
-static void
-afr_lookup_done (call_frame_t *frame, xlator_t *this)
-{
- int unwind = 1;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- gf_boolean_t sh_launched = _gf_false;
- gf_boolean_t fail_conflict = _gf_false;
- int gfid_miss_count = 0;
- int enotconn_count = 0;
- int up_children_count = 0;
-
- priv = this->private;
- local = frame->local;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
- if (local->op_ret < 0)
- goto unwind;
-
- if (local->cont.lookup.parent_entrylk && local->success_count > 1)
- afr_succeed_lookup_on_latest_iatt (local, this);
-
- gfid_miss_count = afr_lookup_gfid_missing_count (local, this);
- up_children_count = afr_up_children_count (local->child_up,
- priv->child_count);
- enotconn_count = priv->child_count - up_children_count;
- if ((gfid_miss_count == local->success_count) &&
- (enotconn_count > 0)) {
- local->op_ret = -1;
- local->op_errno = EIO;
- gf_log (this->name, GF_LOG_ERROR, "Failing lookup for %s, "
- "LOOKUP on a file without gfid is not allowed when "
- "some of the children are down", local->loc.path);
- goto unwind;
- }
-
- if ((gfid_miss_count == local->success_count) &&
- uuid_is_null (local->cont.lookup.gfid_req)) {
- local->op_ret = -1;
- local->op_errno = ENODATA;
- gf_log (this->name, GF_LOG_ERROR, "%s: No gfid present",
- local->loc.path);
- goto unwind;
- }
-
- if (gfid_miss_count && uuid_is_null (local->cont.lookup.gfid_req))
- fail_conflict = _gf_true;
- ret = afr_lookup_done_success_action (frame, this, fail_conflict);
- if (ret)
- goto unwind;
- uuid_copy (local->self_heal.sh_gfid_req, local->cont.lookup.gfid_req);
+ if (replies[i].op_ret < 0)
+ continue;
- afr_lookup_perform_self_heal (frame, this, &sh_launched);
- if (sh_launched) {
- unwind = 0;
- goto unwind;
+ if (par_read_subvol_iter == -1) {
+ par_read_subvol_iter = i;
+ continue;
}
- unwind:
- if (unwind) {
- AFR_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno, local->cont.lookup.inode,
- &local->cont.lookup.buf,
- local->cont.lookup.xattr,
- &local->cont.lookup.postparent);
- }
-}
+ if ((par_read_subvol_iter != par_read_subvol) && readable[i])
+ par_read_subvol_iter = i;
-/*
- * During a lookup, some errors are more "important" than
- * others in that they must be given higher priority while
- * returning to the user.
- *
- * The hierarchy is ESTALE > ENOENT > others
- *
- */
+ if (i == par_read_subvol)
+ par_read_subvol_iter = i;
+ }
+ /* At the end of the for-loop, the only reason why @par_read_subvol_iter
+ * could be -1 is when this LOOKUP has failed on all sub-volumes.
+ * So it is okay to send an arbitrary subvolume (0 in this case)
+ * as parent read subvol.
+ */
+ if (par_read_subvol_iter == -1)
+ par_read_subvol_iter = 0;
-gf_boolean_t
-afr_error_more_important (int32_t old_errno, int32_t new_errno)
+ return par_read_subvol_iter;
+}
+
+int
+afr_read_subvol_decide(inode_t *inode, xlator_t *this,
+ afr_read_subvol_args_t *args, unsigned char *readable)
{
- gf_boolean_t ret = _gf_true;
+ int event = 0;
+ afr_private_t *priv = NULL;
+ unsigned char *intersection = NULL;
- /* Nothing should ever overwrite ESTALE */
- if (old_errno == ESTALE)
- ret = _gf_false;
+ priv = this->private;
+ intersection = alloca0(priv->child_count);
- /* Nothing should overwrite ENOENT, except ESTALE/EIO*/
- else if ((old_errno == ENOENT) && (new_errno != ESTALE)
- && (new_errno != EIO))
- ret = _gf_false;
+ afr_readables_intersect_get(inode, this, &event, intersection);
- return ret;
-}
+ if (AFR_COUNT(intersection, priv->child_count) <= 0) {
+ /* TODO: If we have one brick with valid data_readable and
+ * another with metadata_readable, try to send an iatt with
+ * valid bits from both.*/
+ return -1;
+ }
-int32_t
-afr_resultant_errno_get (int32_t *children,
- int *child_errno, unsigned int child_count)
-{
- int i = 0;
- int32_t op_errno = 0;
- int child = 0;
-
- for (i = 0; i < child_count; i++) {
- if (children) {
- child = children[i];
- if (child == -1)
- break;
- } else {
- child = i;
- }
- if (afr_error_more_important (op_errno, child_errno[child]))
- op_errno = child_errno[child];
- }
- return op_errno;
+ memcpy(readable, intersection, sizeof(*readable) * priv->child_count);
+
+ return afr_read_subvol_select_by_policy(inode, this, intersection, args);
}
-static void
-afr_lookup_handle_error (afr_local_t *local, int32_t op_ret, int32_t op_errno)
+static inline int
+afr_first_up_child(call_frame_t *frame, xlator_t *this)
{
- GF_ASSERT (local);
- if (op_errno == ENOENT)
- local->enoent_count++;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
- if (afr_error_more_important (local->op_errno, op_errno))
- local->op_errno = op_errno;
+ local = frame->local;
+ priv = this->private;
- if (local->op_errno == ESTALE) {
- local->op_ret = -1;
- }
+ for (i = 0; i < priv->child_count; i++)
+ if (local->replies[i].valid && local->replies[i].op_ret == 0)
+ return i;
+ return -1;
}
static void
-afr_set_root_inode_on_first_lookup (afr_local_t *local, xlator_t *this,
- inode_t *inode)
+afr_attempt_readsubvol_set(call_frame_t *frame, xlator_t *this,
+ unsigned char *success_replies,
+ unsigned char *data_readable, int *read_subvol)
{
- afr_private_t *priv = NULL;
- GF_ASSERT (inode);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int spb_subvol = -1;
+ int child_count = -1;
- if (!__is_root_gfid (inode->gfid))
- goto out;
- if (!afr_is_fresh_lookup (&local->loc, this))
- goto out;
- priv = this->private;
- if ((priv->first_lookup)) {
- gf_log (this->name, GF_LOG_INFO, "added root inode");
- priv->root_inode = inode_ref (inode);
- priv->first_lookup = 0;
- }
-out:
+ if (*read_subvol != -1)
return;
-}
-static void
-afr_lookup_cache_args (afr_local_t *local, int child_index, dict_t *xattr,
- struct iatt *buf, struct iatt *postparent)
-{
- GF_ASSERT (child_index >= 0);
- local->cont.lookup.xattrs[child_index] = dict_ref (xattr);
- local->cont.lookup.postparents[child_index] = *postparent;
- local->cont.lookup.bufs[child_index] = *buf;
+ priv = this->private;
+ local = frame->local;
+ child_count = priv->child_count;
+
+ afr_split_brain_read_subvol_get(local->inode, this, frame, &spb_subvol);
+ if ((spb_subvol >= 0) &&
+ (AFR_COUNT(success_replies, child_count) == child_count)) {
+ *read_subvol = spb_subvol;
+ } else if (!priv->quorum_count ||
+ frame->root->pid == GF_CLIENT_PID_GLFS_HEAL) {
+ *read_subvol = afr_first_up_child(frame, this);
+ } else if (priv->quorum_count &&
+ afr_has_quorum(data_readable, this, NULL)) {
+ /* read_subvol is guaranteed to be valid if we hit this path. */
+ *read_subvol = afr_first_up_child(frame, this);
+ } else {
+ /* If quorum is enabled and we do not have a
+ readable yet, it means all good copies are down.
+ */
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_READ_SUBVOL_ERROR,
+ "no read "
+ "subvols for %s",
+ local->loc.path);
+ }
+ if (*read_subvol >= 0)
+ dict_del_sizen(local->replies[*read_subvol].xdata, GF_CONTENT_KEY);
}
static void
-afr_lookup_handle_first_success (afr_local_t *local, xlator_t *this,
- inode_t *inode, struct iatt *buf)
-{
- local->cont.lookup.inode = inode_ref (inode);
- local->cont.lookup.buf = *buf;
- afr_set_root_inode_on_first_lookup (local, this, inode);
-}
+afr_lookup_done(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = -1;
+ int op_errno = 0;
+ int read_subvol = 0;
+ int par_read_subvol = 0;
+ int ret = -1;
+ unsigned char *readable = NULL;
+ unsigned char *success_replies = NULL;
+ int event = 0;
+ struct afr_reply *replies = NULL;
+ uuid_t read_gfid = {
+ 0,
+ };
+ gf_boolean_t locked_entry = _gf_false;
+ gf_boolean_t in_flight_create = _gf_false;
+ gf_boolean_t can_interpret = _gf_true;
+ inode_t *parent = NULL;
+ ia_type_t ia_type = IA_INVAL;
+ afr_read_subvol_args_t args = {
+ 0,
+ };
+ char *gfid_heal_msg = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ replies = local->replies;
+ parent = local->loc.parent;
+
+ locked_entry = afr_is_possibly_under_txn(AFR_ENTRY_TRANSACTION, local,
+ this);
+
+ readable = alloca0(priv->child_count);
+ success_replies = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_get(parent, this, readable, NULL, &event);
+ par_read_subvol = afr_get_parent_read_subvol(this, parent, replies,
+ readable);
+
+ /* First, check if we have a gfid-change from somewhere,
+ If so, propagate that so that a fresh lookup can be
+ issued
+ */
+ if (local->cont.lookup.needs_fresh_lookup) {
+ local->op_ret = -1;
+ local->op_errno = ESTALE;
+ goto error;
+ }
-static int32_t
-afr_discovery_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- int ret = 0;
- char *pathinfo = NULL;
- gf_boolean_t is_local = _gf_false;
- afr_private_t *priv = NULL;
- int32_t child_index = -1;
+ op_errno = afr_final_errno(frame->local, this->private);
+ local->op_errno = op_errno;
- if (op_ret != 0) {
- goto out;
+ read_subvol = -1;
+ afr_fill_success_replies(local, priv, success_replies);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret == -1) {
+ if (locked_entry && replies[i].op_errno == ENOENT) {
+ in_flight_create = _gf_true;
+ }
+ continue;
}
- ret = dict_get_str (dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
- if (ret != 0) {
- goto out;
+ if (read_subvol == -1 || !readable[read_subvol]) {
+ read_subvol = i;
+ gf_uuid_copy(read_gfid, replies[i].poststat.ia_gfid);
+ ia_type = replies[i].poststat.ia_type;
+ local->op_ret = 0;
}
+ }
- ret = afr_local_pathinfo (pathinfo, &is_local);
+ if (in_flight_create && !afr_has_quorum(success_replies, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = ENOENT;
+ goto error;
+ }
+
+ if (read_subvol == -1)
+ goto error;
+ /* We now have a read_subvol, which is readable[] (if there
+ were any). Next we look for GFID mismatches. We don't
+ consider a GFID mismatch as an error if read_subvol is
+ readable[] but the mismatching GFID subvol is not.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1) {
+ continue;
+ }
+
+ if (!gf_uuid_compare(replies[i].poststat.ia_gfid, read_gfid))
+ continue;
+
+ can_interpret = _gf_false;
+
+ if (locked_entry)
+ continue;
+
+ /* Now GFIDs mismatch. It's OK as long as this subvol
+ is not readable[] but read_subvol is */
+ if (readable[read_subvol] && !readable[i])
+ continue;
+
+ /* If we were called from glfsheal and there is still a gfid
+ * mismatch, succeed the lookup and let glfsheal print the
+ * response via gfid-heal-msg.*/
+ if (!dict_get_str_sizen(local->xattr_req, "gfid-heal-msg",
+ &gfid_heal_msg))
+ goto cant_interpret;
+
+ /* LOG ERROR */
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto error;
+ }
+
+ /* Forth, for the finalized GFID, pick the best subvolume
+ to return stats from.
+ */
+ read_subvol = -1;
+ memset(readable, 0, sizeof(*readable) * priv->child_count);
+ if (can_interpret) {
+ if (!afr_has_quorum(success_replies, this, NULL))
+ goto cant_interpret;
+ /* It is safe to call afr_replies_interpret() because we have
+ a response from all the UP subvolumes and all of them resolved
+ to the same GFID
+ */
+ gf_uuid_copy(args.gfid, read_gfid);
+ args.ia_type = ia_type;
+ ret = afr_replies_interpret(frame, this, local->inode, NULL);
+ read_subvol = afr_read_subvol_decide(local->inode, this, &args,
+ readable);
+ if (read_subvol == -1)
+ goto cant_interpret;
if (ret) {
- goto out;
+ afr_inode_need_refresh_set(local->inode, this);
+ dict_del_sizen(local->replies[read_subvol].xdata, GF_CONTENT_KEY);
+ }
+ } else {
+ cant_interpret:
+ afr_attempt_readsubvol_set(frame, this, success_replies, readable,
+ &read_subvol);
+ if (read_subvol == -1) {
+ goto error;
}
+ }
- priv = this->private;
- /*
- * Note that one local subvolume will override another here. The only
- * way to avoid that would be to retain extra information about whether
- * the previous read_child is local, and it's just not worth it. Even
- * the slowest local subvolume is far preferable to a remote one.
- */
- if (is_local) {
- child_index = (int32_t)(long)cookie;
- gf_log (this->name, GF_LOG_INFO,
- "selecting local read_child %s",
- priv->children[child_index]->name);
- priv->read_child = child_index;
+ afr_handle_quota_size(frame, this);
+
+ afr_set_need_heal(this, local);
+ if (AFR_IS_ARBITER_BRICK(priv, read_subvol) && local->op_ret == 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ gf_msg_debug(this->name, 0,
+ "Arbiter cannot be a read subvol "
+ "for %s",
+ local->loc.path);
+ goto error;
+ }
+
+ ret = dict_get_str_sizen(local->xattr_req, "gfid-heal-msg", &gfid_heal_msg);
+ if (!ret) {
+ ret = dict_set_str_sizen(local->replies[read_subvol].xdata,
+ "gfid-heal-msg", gfid_heal_msg);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
+ "Error setting gfid-heal-msg dict");
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
}
+ }
-out:
- STACK_DESTROY(frame->root);
- return 0;
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->replies[read_subvol].poststat,
+ local->replies[read_subvol].xdata,
+ &local->replies[par_read_subvol].postparent);
+ return;
+
+error:
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, NULL, NULL,
+ NULL, NULL);
}
-static void
-afr_attempt_local_discovery (xlator_t *this, int32_t child_index)
+/*
+ * During a lookup, some errors are more "important" than
+ * others in that they must be given higher priority while
+ * returning to the user.
+ *
+ * The hierarchy is ENODATA > ENOENT > ESTALE > ENOSPC others
+ */
+
+int
+afr_higher_errno(int32_t old_errno, int32_t new_errno)
{
- call_frame_t *newframe = NULL;
- loc_t tmploc = {0,};
- afr_private_t *priv = this->private;
+ if (old_errno == ENODATA || new_errno == ENODATA)
+ return ENODATA;
+ if (old_errno == ENOENT || new_errno == ENOENT)
+ return ENOENT;
+ if (old_errno == ESTALE || new_errno == ESTALE)
+ return ESTALE;
+ if (old_errno == ENOSPC || new_errno == ENOSPC)
+ return ENOSPC;
- newframe = create_frame(this,this->ctx->pool);
- if (!newframe) {
- return;
- }
+ return new_errno;
+}
+
+int
+afr_final_errno(afr_local_t *local, afr_private_t *priv)
+{
+ int i = 0;
+ int op_errno = 0;
+ int tmp_errno = 0;
- tmploc.gfid[sizeof(tmploc.gfid)-1] = 1;
- STACK_WIND_COOKIE (newframe, afr_discovery_cbk,
- (void *)(long)child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->getxattr,
- &tmploc, GF_XATTR_PATHINFO_KEY, NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret >= 0)
+ continue;
+ tmp_errno = local->replies[i].op_errno;
+ op_errno = afr_higher_errno(op_errno, tmp_errno);
+ }
+
+ return op_errno;
+}
+
+static int32_t
+afr_local_discovery_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int ret = 0;
+ char *pathinfo = NULL;
+ gf_boolean_t is_local = _gf_false;
+ afr_private_t *priv = NULL;
+ int32_t child_index = -1;
+
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ priv = this->private;
+ child_index = (int32_t)(long)cookie;
+
+ ret = dict_get_str_sizen(dict, GF_XATTR_PATHINFO_KEY, &pathinfo);
+ if (ret != 0) {
+ goto out;
+ }
+
+ ret = glusterfs_is_local_pathinfo(pathinfo, &is_local);
+ if (ret) {
+ goto out;
+ }
+
+ /*
+ * Note that one local subvolume will override another here. The only
+ * way to avoid that would be to retain extra information about whether
+ * the previous read_child is local, and it's just not worth it. Even
+ * the slowest local subvolume is far preferable to a remote one.
+ */
+ if (is_local) {
+ priv->local[child_index] = 1;
+ /* Don't set arbiter as read child. */
+ if (AFR_IS_ARBITER_BRICK(priv, child_index))
+ goto out;
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_LOCAL_CHILD,
+ "selecting local read_child %s",
+ priv->children[child_index]->name);
+
+ priv->read_child = child_index;
+ }
+out:
+ STACK_DESTROY(frame->root);
+ return 0;
}
static void
-afr_lookup_handle_success (afr_local_t *local, xlator_t *this, int32_t child_index,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
+afr_attempt_local_discovery(xlator_t *this, int32_t child_index)
{
- afr_private_t *priv = this->private;
+ call_frame_t *newframe = NULL;
+ loc_t tmploc = {
+ 0,
+ };
+ afr_private_t *priv = this->private;
- if (local->success_count == 0) {
- if (local->op_errno != ESTALE) {
- local->op_ret = op_ret;
- local->op_errno = 0;
- }
- afr_lookup_handle_first_success (local, this, inode, buf);
- }
- afr_lookup_update_lk_counts (local, this,
- child_index, xattr);
+ newframe = create_frame(this, this->ctx->pool);
+ if (!newframe) {
+ return;
+ }
- afr_lookup_cache_args (local, child_index, xattr,
- buf, postparent);
+ tmploc.gfid[sizeof(tmploc.gfid) - 1] = 1;
+ STACK_WIND_COOKIE(newframe, afr_local_discovery_cbk,
+ (void *)(long)child_index, priv->children[child_index],
+ priv->children[child_index]->fops->getxattr, &tmploc,
+ GF_XATTR_PATHINFO_KEY, NULL);
+}
- if (local->do_discovery && (priv->read_child == (-1))) {
- afr_attempt_local_discovery(this,child_index);
- }
+int
+afr_lookup_sh_metadata_wrap(void *opaque)
+{
+ call_frame_t *frame = opaque;
+ afr_local_t *local = NULL;
+ xlator_t *this = NULL;
+ inode_t *inode = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int i = 0, first = -1;
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ priv = this->private;
+ replies = local->replies;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ first = i;
+ break;
+ }
+ if (first == -1)
+ goto out;
+
+ if (afr_selfheal_metadata_by_stbuf(this, &replies[first].poststat))
+ goto out;
+
+ afr_local_replies_wipe(local, this->private);
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ if (local->xattr_req) {
+ dict_copy(local->xattr_req, dict);
+ }
+
+ ret = dict_set_sizen_str_sizen(dict, "link-count", GF_XATTROP_INDEX_COUNT);
+ if (ret) {
+ gf_msg_debug(this->name, -ret, "Unable to set link-count in dict ");
+ }
+
+ if (loc_is_nameless(&local->loc)) {
+ ret = afr_selfheal_unlocked_discover_on(frame, local->inode,
+ local->loc.gfid, local->replies,
+ local->child_up, dict);
+ } else {
+ inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent,
+ local->loc.name, local->replies,
+ local->child_up, dict);
+ }
+ if (inode)
+ inode_unref(inode);
+out:
+ if (loc_is_nameless(&local->loc))
+ afr_discover_done(frame, this);
+ else
+ afr_lookup_done(frame, this);
- local->cont.lookup.success_children[local->success_count] = child_index;
- local->success_count++;
+ if (dict)
+ dict_unref(dict);
+
+ return 0;
}
-int
-afr_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
+gf_boolean_t
+afr_is_pending_set(xlator_t *this, dict_t *xdata, int type)
{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = -1;
+ int idx = -1;
+ afr_private_t *priv = NULL;
+ void *pending_raw = NULL;
+ int *pending_int = NULL;
+ int i = 0;
- child_index = (long) cookie;
+ priv = this->private;
+ idx = afr_index_for_transaction_type(type);
- LOCK (&frame->lock);
- {
- local = frame->local;
+ if (dict_get_ptr(xdata, AFR_DIRTY, &pending_raw) == 0) {
+ if (pending_raw) {
+ pending_int = pending_raw;
- if (op_ret == -1) {
- afr_lookup_handle_error (local, op_ret, op_errno);
- goto unlock;
- }
- afr_lookup_handle_success (local, this, child_index, op_ret,
- op_errno, inode, buf, xattr,
- postparent);
+ if (ntoh32(pending_int[idx]))
+ return _gf_true;
+ }
+ }
- }
-unlock:
- UNLOCK (&frame->lock);
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr(xdata, priv->pending_key[i], &pending_raw))
+ continue;
+ if (!pending_raw)
+ continue;
+ pending_int = pending_raw;
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- afr_lookup_done (frame, this);
- }
+ if (ntoh32(pending_int[idx]))
+ return _gf_true;
+ }
- return 0;
+ return _gf_false;
}
-int
-afr_lookup_cont_init (afr_local_t *local, unsigned int child_count)
-{
- int ret = -ENOMEM;
- struct iatt *iatts = NULL;
- int32_t *success_children = NULL;
- int32_t *sources = NULL;
-
- GF_ASSERT (local);
- local->cont.lookup.xattrs = GF_CALLOC (child_count,
- sizeof (*local->cont.lookup.xattr),
- gf_afr_mt_dict_t);
- if (NULL == local->cont.lookup.xattrs)
- goto out;
+static gf_boolean_t
+afr_can_start_metadata_self_heal(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int i = 0, first = -1;
+ gf_boolean_t start = _gf_false;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ local = frame->local;
+ replies = local->replies;
+ priv = this->private;
+
+ if (!priv->metadata_self_heal)
+ return _gf_false;
- iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
- if (NULL == iatts)
- goto out;
- local->cont.lookup.postparents = iatts;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (first == -1) {
+ first = i;
+ stbuf = replies[i].poststat;
+ continue;
+ }
- iatts = GF_CALLOC (child_count, sizeof (*iatts), gf_afr_mt_iatt);
- if (NULL == iatts)
- goto out;
- local->cont.lookup.bufs = iatts;
+ if (afr_is_pending_set(this, replies[i].xdata,
+ AFR_METADATA_TRANSACTION)) {
+ /* Let shd do the heal so that lookup is not blocked
+ * on getting metadata lock/doing the heal */
+ start = _gf_false;
+ break;
+ }
- success_children = afr_children_create (child_count);
- if (NULL == success_children)
- goto out;
- local->cont.lookup.success_children = success_children;
+ if (gf_uuid_compare(stbuf.ia_gfid, replies[i].poststat.ia_gfid)) {
+ start = _gf_false;
+ break;
+ }
+ if (!IA_EQUAL(stbuf, replies[i].poststat, type)) {
+ start = _gf_false;
+ break;
+ }
- local->fresh_children = afr_children_create (child_count);
- if (NULL == local->fresh_children)
- goto out;
+ /*Check if iattrs need heal*/
+ if ((!IA_EQUAL(stbuf, replies[i].poststat, uid)) ||
+ (!IA_EQUAL(stbuf, replies[i].poststat, gid)) ||
+ (!IA_EQUAL(stbuf, replies[i].poststat, prot))) {
+ start = _gf_true;
+ continue;
+ }
- sources = GF_CALLOC (sizeof (*sources), child_count, gf_afr_mt_int32_t);
- if (NULL == sources)
- goto out;
- local->cont.lookup.sources = sources;
+ /*Check if xattrs need heal*/
+ if (!afr_xattrs_are_equal(replies[first].xdata, replies[i].xdata))
+ start = _gf_true;
+ }
- ret = 0;
+ return start;
+}
+
+int
+afr_lookup_metadata_heal_check(call_frame_t *frame, xlator_t *this)
+
+{
+ call_frame_t *heal = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ if (!afr_can_start_metadata_self_heal(frame, this))
+ goto out;
+
+ heal = afr_frame_create(this, &ret);
+ if (!heal) {
+ ret = -ret;
+ goto out;
+ }
+
+ ret = synctask_new(this->ctx->env, afr_lookup_sh_metadata_wrap,
+ afr_refresh_selfheal_done, heal, frame);
+ if (ret)
+ goto out;
+ return ret;
out:
- return ret;
+ if (loc_is_nameless(&local->loc))
+ afr_discover_done(frame, this);
+ else
+ afr_lookup_done(frame, this);
+ if (heal)
+ AFR_STACK_DESTROY(heal);
+ return ret;
}
int
-afr_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+afr_lookup_selfheal_wrap(void *opaque)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- void *gfid_req = NULL;
- int ret = -1;
- int i = 0;
- int call_count = 0;
- uint64_t ctx = 0;
- int32_t op_errno = 0;
+ int ret = 0;
+ call_frame_t *frame = opaque;
+ afr_local_t *local = NULL;
+ xlator_t *this = NULL;
+ inode_t *inode = NULL;
+ uuid_t pargfid = {
+ 0,
+ };
- priv = this->private;
+ local = frame->local;
+ this = frame->this;
+ loc_pargfid(&local->loc, pargfid);
- AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+ ret = afr_selfheal_name(frame->this, pargfid, local->loc.name,
+ &local->cont.lookup.gfid_req, local->xattr_req);
+ if (ret == -EIO)
+ goto unwind;
- local->op_ret = -1;
+ afr_local_replies_wipe(local, this->private);
- frame->local = local;
- local->fop = GF_FOP_LOOKUP;
+ inode = afr_selfheal_unlocked_lookup_on(frame, local->loc.parent,
+ local->loc.name, local->replies,
+ local->child_up, local->xattr_req);
+ if (inode)
+ inode_unref(inode);
- loc_copy (&local->loc, loc);
- ret = loc_path (&local->loc, NULL);
- if (ret < 0) {
- op_errno = EINVAL;
- goto out;
- }
+ afr_lookup_metadata_heal_check(frame, this);
+ return 0;
- ret = inode_ctx_get (local->loc.inode, this, &ctx);
- if (ret == 0) {
- /* lookup is a revalidate */
+unwind:
+ AFR_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL);
+ return 0;
+}
- local->read_child_index = afr_inode_get_read_ctx (this,
- local->loc.inode,
- NULL);
+int
+afr_lookup_entry_heal(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ call_frame_t *heal = NULL;
+ int i = 0, first = -1;
+ gf_boolean_t name_state_mismatch = _gf_false;
+ struct afr_reply *replies = NULL;
+ int ret = 0;
+ unsigned char *par_readables = NULL;
+ unsigned char *success = NULL;
+ int32_t op_errno = 0;
+ uuid_t gfid = {0};
+
+ local = frame->local;
+ replies = local->replies;
+ priv = this->private;
+ par_readables = alloca0(priv->child_count);
+ success = alloca0(priv->child_count);
+
+ ret = afr_inode_read_subvol_get(local->loc.parent, this, par_readables,
+ NULL, NULL);
+ if (ret < 0 || AFR_COUNT(par_readables, priv->child_count) == 0) {
+ /* In this case set par_readables to all 1 so that name_heal
+ * need checks at the end of this function will flag missing
+ * entry when name state mismatches*/
+ memset(par_readables, 1, priv->child_count);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret == 0) {
+ if (gf_uuid_is_null(gfid)) {
+ gf_uuid_copy(gfid, replies[i].poststat.ia_gfid);
+ }
+ success[i] = 1;
} else {
- LOCK (&priv->read_child_lock);
- {
- if (priv->hash_mode) {
- local->read_child_index = -1;
- }
- else {
- local->read_child_index =
- (++priv->read_child_rr) %
- (priv->child_count);
- }
- }
- UNLOCK (&priv->read_child_lock);
- local->cont.lookup.fresh_lookup = _gf_true;
+ if ((replies[i].op_errno != ENOTCONN) &&
+ (replies[i].op_errno != ENOENT) &&
+ (replies[i].op_errno != ESTALE)) {
+ op_errno = replies[i].op_errno;
+ }
}
- local->child_up = memdup (priv->child_up,
- sizeof (*local->child_up) * priv->child_count);
- if (NULL == local->child_up) {
- op_errno = ENOMEM;
- goto out;
+ /*gfid is missing, needs heal*/
+ if ((replies[i].op_ret == -1) && (replies[i].op_errno == ENODATA)) {
+ goto name_heal;
}
- ret = afr_lookup_cont_init (local, priv->child_count);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
+ if (first == -1) {
+ first = i;
+ continue;
}
- local->call_count = afr_up_children_count (local->child_up,
- priv->child_count);
- call_count = local->call_count;
- if (local->call_count == 0) {
- ret = -1;
- op_errno = ENOTCONN;
- goto out;
+ if (replies[i].op_ret != replies[first].op_ret) {
+ name_state_mismatch = _gf_true;
}
- /* By default assume ENOTCONN. On success it will be set to 0. */
- local->op_errno = ENOTCONN;
-
- ret = afr_lookup_xattr_req_prepare (local, this, xattr_req, &local->loc,
- &gfid_req);
- if (ret) {
- local->op_errno = -ret;
- goto out;
- }
- afr_lookup_save_gfid (local->cont.lookup.gfid_req, gfid_req,
- &local->loc);
- local->fop = GF_FOP_LOOKUP;
- if (priv->choose_local && !priv->did_discovery) {
- if (gfid_req && __is_root_gfid(gfid_req)) {
- local->do_discovery = _gf_true;
- priv->did_discovery = _gf_true;
- }
+ if (replies[i].op_ret == 0) {
+ /* Rename after this lookup may succeed if we don't do
+ * a name-heal and the destination may not have pending xattrs
+ * to indicate which name is good and which is bad so always do
+ * this heal*/
+ if (gf_uuid_compare(replies[i].poststat.ia_gfid, gfid)) {
+ goto name_heal;
+ }
}
+ }
+
+ if (name_state_mismatch) {
+ if (!priv->quorum_count)
+ goto name_heal;
+ if (!afr_has_quorum(success, this, NULL))
+ goto name_heal;
+ if (op_errno)
+ goto name_heal;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- &local->loc, local->xattr_req);
- if (!--call_count)
- break;
- }
+ if (!replies[i].valid)
+ continue;
+ if (par_readables[i] && replies[i].op_ret < 0 &&
+ replies[i].op_errno != ENOTCONN) {
+ goto name_heal;
+ }
}
+ }
- ret = 0;
-out:
- if (ret)
- AFR_STACK_UNWIND (lookup, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
+ goto metadata_heal;
- return 0;
-}
+name_heal:
+ heal = afr_frame_create(this, NULL);
+ if (!heal)
+ goto metadata_heal;
+ ret = synctask_new(this->ctx->env, afr_lookup_selfheal_wrap,
+ afr_refresh_selfheal_done, heal, frame);
+ if (ret) {
+ AFR_STACK_DESTROY(heal);
+ goto metadata_heal;
+ }
+ return ret;
-/* {{{ open */
+metadata_heal:
+ ret = afr_lookup_metadata_heal_check(frame, this);
-int
-__afr_fd_ctx_set (xlator_t *this, fd_t *fd)
-{
- afr_private_t * priv = NULL;
- int ret = -1;
- uint64_t ctx = 0;
- afr_fd_ctx_t * fd_ctx = NULL;
+ return ret;
+}
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (fd, out);
+int
+afr_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = -1;
+ GF_UNUSED int ret = 0;
+ int8_t need_heal = 1;
+
+ child_index = (long)cookie;
+
+ local = frame->local;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ /*
+ * On revalidate lookup if the gfid-changed, afr should unwind the fop
+ * with ESTALE so that a fresh lookup will be sent by the top xlator.
+ * So remember it.
+ */
+ if (xdata && dict_get_sizen(xdata, "gfid-changed"))
+ local->cont.lookup.needs_fresh_lookup = _gf_true;
+
+ if (xdata) {
+ ret = dict_get_int8(xdata, "link-count", &need_heal);
+ local->replies[child_index].need_heal = need_heal;
+ } else {
+ local->replies[child_index].need_heal = need_heal;
+ }
+ if (op_ret != -1) {
+ local->replies[child_index].poststat = *buf;
+ local->replies[child_index].postparent = *postparent;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+ }
+
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ afr_set_need_heal(this, local);
+ afr_lookup_entry_heal(frame, this);
+ }
+
+ return 0;
+}
- priv = this->private;
+static void
+afr_discover_unwind(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int read_subvol = -1;
+ int ret = 0;
+ unsigned char *data_readable = NULL;
+ unsigned char *success_replies = NULL;
- ret = __fd_ctx_get (fd, this, &ctx);
+ priv = this->private;
+ local = frame->local;
+ data_readable = alloca0(priv->child_count);
+ success_replies = alloca0(priv->child_count);
- if (ret == 0)
- goto out;
+ afr_fill_success_replies(local, priv, success_replies);
+ if (AFR_COUNT(success_replies, priv->child_count) > 0)
+ local->op_ret = 0;
- fd_ctx = GF_CALLOC (1, sizeof (afr_fd_ctx_t),
- gf_afr_mt_afr_fd_ctx_t);
- if (!fd_ctx) {
- ret = -ENOMEM;
- goto out;
- }
+ if (local->op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(frame->local, this->private);
+ goto error;
+ }
- fd_ctx->pre_op_done = GF_CALLOC (sizeof (*fd_ctx->pre_op_done),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_done) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!afr_has_quorum(success_replies, this, frame))
+ goto unwind;
- fd_ctx->pre_op_piggyback = GF_CALLOC (sizeof (*fd_ctx->pre_op_piggyback),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->pre_op_piggyback) {
- ret = -ENOMEM;
- goto out;
- }
+ ret = afr_replies_interpret(frame, this, local->inode, NULL);
+ if (ret) {
+ afr_inode_need_refresh_set(local->inode, this);
+ }
- fd_ctx->opened_on = GF_CALLOC (sizeof (*fd_ctx->opened_on),
- priv->child_count,
- gf_afr_mt_int32_t);
- if (!fd_ctx->opened_on) {
- ret = -ENOMEM;
- goto out;
- }
+ read_subvol = afr_read_subvol_decide(local->inode, this, NULL,
+ data_readable);
- fd_ctx->lock_piggyback = GF_CALLOC (sizeof (*fd_ctx->lock_piggyback),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->lock_piggyback) {
- ret = -ENOMEM;
- goto out;
- }
+unwind:
+ afr_attempt_readsubvol_set(frame, this, success_replies, data_readable,
+ &read_subvol);
+ if (read_subvol == -1)
+ goto error;
- fd_ctx->lock_acquired = GF_CALLOC (sizeof (*fd_ctx->lock_acquired),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->lock_acquired) {
- ret = -ENOMEM;
- goto out;
- }
+ if (AFR_IS_ARBITER_BRICK(priv, read_subvol) && local->op_ret == 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ gf_msg_debug(this->name, 0,
+ "Arbiter cannot be a read subvol "
+ "for %s",
+ local->loc.path);
+ }
- fd_ctx->up_count = priv->up_count;
- fd_ctx->down_count = priv->down_count;
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->replies[read_subvol].poststat,
+ local->replies[read_subvol].xdata,
+ &local->replies[read_subvol].postparent);
+ return;
- fd_ctx->locked_on = GF_CALLOC (sizeof (*fd_ctx->locked_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!fd_ctx->locked_on) {
- ret = -ENOMEM;
- goto out;
- }
+error:
+ AFR_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno, NULL, NULL,
+ NULL, NULL);
+}
- INIT_LIST_HEAD (&fd_ctx->paused_calls);
- INIT_LIST_HEAD (&fd_ctx->entries);
+static int
+afr_ta_id_file_check(void *opaque)
+{
+ afr_private_t *priv = NULL;
+ xlator_t *this = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ uuid_t gfid = {
+ 0,
+ };
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ this = opaque;
+ priv = this->private;
+
+ ret = afr_fill_ta_loc(this, &loc, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.", loc.name);
+ goto out;
+ }
+
+ ret = syncop_lookup(priv->children[THIN_ARBITER_BRICK_INDEX], &loc, &stbuf,
+ 0, 0, 0);
+ if (ret == 0) {
+ goto out;
+ } else if (ret == -ENOENT) {
+ fd = fd_create(loc.inode, getpid());
+ if (!fd)
+ goto out;
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ gf_uuid_generate(gfid);
+ ret = dict_set_gfuuid(dict, "gfid-req", gfid, true);
+ ret = syncop_create(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
+ O_RDWR, 0664, fd, &stbuf, dict, NULL);
+ }
- ret = __fd_ctx_set (fd, this, (uint64_t)(long) fd_ctx);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set fd ctx (%p)", fd);
out:
- return ret;
+ if (ret == 0) {
+ gf_uuid_copy(priv->ta_gfid, stbuf.ia_gfid);
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to lookup/create thin-arbiter id file.");
+ }
+ if (dict)
+ dict_unref(dict);
+ if (fd)
+ fd_unref(fd);
+ loc_wipe(&loc);
+
+ return 0;
}
+static int
+afr_ta_id_file_check_cbk(int ret, call_frame_t *ta_frame, void *opaque)
+{
+ return 0;
+}
-int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd)
+static void
+afr_discover_done(call_frame_t *frame, xlator_t *this)
{
- int ret = -1;
+ int ret = 0;
+ afr_private_t *priv = NULL;
- LOCK (&fd->lock);
- {
- ret = __afr_fd_ctx_set (this, fd);
- }
- UNLOCK (&fd->lock);
+ priv = this->private;
+ if (!priv->thin_arbiter_count)
+ goto unwind;
+ if (!gf_uuid_is_null(priv->ta_gfid))
+ goto unwind;
- return ret;
+ ret = synctask_new(this->ctx->env, afr_ta_id_file_check,
+ afr_ta_id_file_check_cbk, NULL, this);
+ if (ret)
+ goto unwind;
+unwind:
+ afr_discover_unwind(frame, this);
}
-/* {{{ flush */
-
int
-afr_flush_unwind (call_frame_t *frame, xlator_t *this)
+afr_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = -1;
+ GF_UNUSED int ret = 0;
+ int8_t need_heal = 1;
- local = frame->local;
+ child_index = (long)cookie;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
- if (main_frame) {
- AFR_STACK_UNWIND (flush, main_frame,
- local->op_ret, local->op_errno,
- NULL);
- }
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (op_ret != -1) {
+ local->replies[child_index].poststat = *buf;
+ local->replies[child_index].postparent = *postparent;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+ }
- return 0;
+ if (local->do_discovery && (op_ret == 0))
+ afr_attempt_local_discovery(this, child_index);
+
+ if (xdata) {
+ ret = dict_get_int8(xdata, "link-count", &need_heal);
+ local->replies[child_index].need_heal = need_heal;
+ } else {
+ local->replies[child_index].need_heal = need_heal;
+ }
+
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ afr_set_need_heal(this, local);
+ afr_lookup_metadata_heal_check(frame, this);
+ }
+
+ return 0;
}
+int
+afr_discover_do(call_frame_t *frame, xlator_t *this, int err)
+{
+ int ret = 0;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (err) {
+ local->op_errno = err;
+ goto out;
+ }
+
+ call_count = local->call_count = AFR_COUNT(local->child_up,
+ priv->child_count);
+
+ ret = afr_lookup_xattr_req_prepare(local, this, local->xattr_req,
+ &local->loc);
+ if (ret) {
+ local->op_errno = -ret;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(
+ frame, afr_discover_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->lookup, &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+out:
+ AFR_STACK_UNWIND(lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
+ return 0;
+}
int
-afr_flush_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_discover(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- int need_unwind = 0;
+ int op_errno = ENOMEM;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int event = 0;
- local = frame->local;
- priv = this->private;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- local->op_errno = op_errno;
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ if (__is_root_gfid(loc->inode->gfid)) {
+ if (!priv->root_inode)
+ priv->root_inode = inode_ref(loc->inode);
+
+ if (priv->choose_local && !priv->did_discovery) {
+ /* Logic to detect which subvolumes of AFR are
+ local, in order to prefer them for reads
+ */
+ local->do_discovery = _gf_true;
+ priv->did_discovery = _gf_true;
}
- UNLOCK (&frame->lock);
+ }
- if (need_unwind)
- afr_flush_unwind (frame, this);
+ local->op = GF_FOP_LOOKUP;
- call_count = afr_frame_return (frame);
+ loc_copy(&local->loc, loc);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
+ local->inode = inode_ref(loc->inode);
+
+ if (xattr_req) {
+ /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
+ allocate one for us */
+ local->xattr_req = dict_copy_with_ref(xattr_req, NULL);
+ if (!local->xattr_req) {
+ op_errno = ENOMEM;
+ goto out;
}
+ }
+ if (gf_uuid_is_null(loc->inode->gfid)) {
+ afr_discover_do(frame, this, 0);
return 0;
+ }
+
+ afr_read_subvol_get(loc->inode, this, NULL, NULL, &event,
+ AFR_DATA_TRANSACTION, NULL);
+
+ afr_discover_do(frame, this, 0);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
+int
+afr_lookup_do(call_frame_t *frame, xlator_t *this, int err)
+{
+ int ret = 0;
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (err < 0) {
+ local->op_errno = err;
+ goto out;
+ }
+
+ call_count = local->call_count = AFR_COUNT(local->child_up,
+ priv->child_count);
+
+ ret = afr_lookup_xattr_req_prepare(local, this, local->xattr_req,
+ &local->loc);
+ if (ret) {
+ local->op_errno = -ret;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(
+ frame, afr_lookup_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->lookup, &local->loc, local->xattr_req);
+ if (!--call_count)
+ break;
+ }
+ }
+ return 0;
+out:
+ AFR_STACK_UNWIND(lookup, frame, -1, local->op_errno, 0, 0, 0, 0);
+ return 0;
+}
+
+/*
+ * afr_lookup()
+ *
+ * The goal here is to figure out what the element getting looked up is.
+ * i.e what is the GFID, inode type and a conservative estimate of the
+ * inode attributes are.
+ *
+ * As we lookup, operations may be underway on the entry name and the
+ * inode. In lookup() we are primarily concerned only with the entry
+ * operations. If the entry is getting unlinked or renamed, we detect
+ * what operation is underway by querying for on-going transactions and
+ * pending self-healing on the entry through xdata.
+ *
+ * If the entry is a file/dir, it may need self-heal and/or in a
+ * split-brain condition. Lookup is not the place to worry about these
+ * conditions. Outcast marking will naturally handle them in the read
+ * paths.
+ *
+ * Here is a brief goal of what we are trying to achieve:
+ *
+ * - LOOKUP on all subvolumes concurrently, querying on-going transaction
+ * and pending self-heal info from the servers.
+ *
+ * - If all servers reply the same inode type and GFID, the overall call
+ * MUST be a success.
+ *
+ * - If inode types or GFIDs mismatch, and there IS either an on-going
+ * transaction or pending self-heal, inspect what the nature of the
+ * transaction or pending heal is, and select the appropriate subvolume's
+ * reply as the winner.
+ *
+ * - If inode types or GFIDs mismatch, and there are no on-going transactions
+ * or pending self-heal on the entry name on any of the servers, fail the
+ * lookup with EIO. Something has gone wrong beyond reasonable action.
+ */
int
-afr_flush_wind (call_frame_t *frame, xlator_t *this)
+afr_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = -1;
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ int event = 0;
+ int ret = 0;
- local = frame->local;
- priv = this->private;
+ if (loc_is_nameless(loc)) {
+ if (xattr_req)
+ dict_del_sizen(xattr_req, "gfid-req");
+ afr_discover(frame, this, loc, xattr_req);
+ return 0;
+ }
- call_count = afr_up_children_count (local->child_up, priv->child_count);
+ if (afr_is_private_directory(this->private, loc->parent->gfid, loc->name,
+ frame->root->pid)) {
+ op_errno = EPERM;
+ goto out;
+ }
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- local->call_count = call_count;
+ if (!local->call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_flush_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->flush,
- local->fd, NULL);
-
- if (!--call_count)
- break;
- }
- }
+ local->op = GF_FOP_LOOKUP;
- return 0;
-}
+ loc_copy(&local->loc, loc);
+ local->inode = inode_ref(loc->inode);
-int
-afr_flush_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
+ if (xattr_req) {
+ /* If xattr_req was null, afr_lookup_xattr_req_prepare() will
+ allocate one for us */
+ local->xattr_req = dict_copy_with_ref(xattr_req, NULL);
+ if (!local->xattr_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = dict_get_gfuuid(local->xattr_req, "gfid-req",
+ &local->cont.lookup.gfid_req);
+ if (ret == 0) {
+ dict_del_sizen(local->xattr_req, "gfid-req");
+ }
+ }
- local = frame->local;
+ afr_read_subvol_get(loc->parent, this, NULL, NULL, &event,
+ AFR_DATA_TRANSACTION, NULL);
- local->transaction.unwind (frame, this);
+ afr_lookup_do(frame, this, 0);
- AFR_STACK_DESTROY (frame);
+ return 0;
+out:
+ AFR_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
+void
+_afr_cleanup_fd_ctx(xlator_t *this, afr_fd_ctx_t *fd_ctx)
+{
+ afr_private_t *priv = this->private;
+
+ if (fd_ctx->lk_heal_info) {
+ LOCK(&priv->lock);
+ {
+ list_del(&fd_ctx->lk_heal_info->pos);
+ }
+ afr_lk_heal_info_cleanup(fd_ctx->lk_heal_info);
+ fd_ctx->lk_heal_info = NULL;
+ }
+ GF_FREE(fd_ctx->opened_on);
+ GF_FREE(fd_ctx);
+ return;
+}
int
-afr_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int ret = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ ret = fd_ctx_get(fd, this, &ctx);
+ if (ret < 0)
+ goto out;
- priv = this->private;
+ fd_ctx = (afr_fd_ctx_t *)(long)ctx;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
+ if (fd_ctx) {
+ _afr_cleanup_fd_ctx(this, fd_ctx);
+ }
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+out:
+ return 0;
+}
- local->op = GF_FOP_FLUSH;
+int
+afr_release(xlator_t *this, fd_t *fd)
+{
+ afr_cleanup_fd_ctx(this, fd);
- local->transaction.fop = afr_flush_wind;
- local->transaction.done = afr_flush_done;
- local->transaction.unwind = afr_flush_unwind;
+ return 0;
+}
- local->fd = fd_ref (fd);
+afr_fd_ctx_t *
+__afr_fd_ctx_get(fd_t *fd, xlator_t *this)
+{
+ uint64_t ctx = 0;
+ int ret = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
- local->transaction.main_frame = frame;
- local->transaction.start = 0;
- local->transaction.len = 0;
+ ret = __fd_ctx_get(fd, this, &ctx);
- ret = afr_open_fd_fix (transaction_frame, this, _gf_false);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ ret = __afr_fd_ctx_set(this, fd);
+ if (ret < 0)
+ goto out;
+ ret = __fd_ctx_get(fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+ }
- ret = 0;
+ fd_ctx = (afr_fd_ctx_t *)(long)ctx;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
-
- AFR_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
- }
-
- return 0;
+ return fd_ctx;
}
-/* }}} */
+afr_fd_ctx_t *
+afr_fd_ctx_get(fd_t *fd, xlator_t *this)
+{
+ afr_fd_ctx_t *fd_ctx = NULL;
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __afr_fd_ctx_get(fd, this);
+ }
+ UNLOCK(&fd->lock);
+
+ return fd_ctx;
+}
int
-afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd)
+__afr_fd_ctx_set(xlator_t *this, fd_t *fd)
{
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
-
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0)
- goto out;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ uint64_t ctx = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int i = 0;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ VALIDATE_OR_GOTO(this->private, out);
+ VALIDATE_OR_GOTO(fd, out);
- if (fd_ctx) {
- if (fd_ctx->pre_op_done)
- GF_FREE (fd_ctx->pre_op_done);
+ priv = this->private;
- if (fd_ctx->opened_on)
- GF_FREE (fd_ctx->opened_on);
+ ret = __fd_ctx_get(fd, this, &ctx);
- if (fd_ctx->locked_on)
- GF_FREE (fd_ctx->locked_on);
+ if (ret == 0)
+ goto out;
- if (fd_ctx->pre_op_piggyback)
- GF_FREE (fd_ctx->pre_op_piggyback);
- list_for_each_entry_safe (paused_call, tmp, &fd_ctx->paused_calls,
- call_list) {
- list_del_init (&paused_call->call_list);
- GF_FREE (paused_call);
- }
+ fd_ctx = GF_CALLOC(1, sizeof(afr_fd_ctx_t), gf_afr_mt_afr_fd_ctx_t);
+ if (!fd_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
- if (fd_ctx->lock_piggyback)
- GF_FREE (fd_ctx->lock_piggyback);
+ fd_ctx->opened_on = GF_CALLOC(sizeof(*fd_ctx->opened_on), priv->child_count,
+ gf_afr_mt_int32_t);
+ if (!fd_ctx->opened_on) {
+ ret = -ENOMEM;
+ goto out;
+ }
- if (fd_ctx->lock_acquired)
- GF_FREE (fd_ctx->lock_acquired);
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_is_anonymous(fd))
+ fd_ctx->opened_on[i] = AFR_FD_OPENED;
+ else
+ fd_ctx->opened_on[i] = AFR_FD_NOT_OPENED;
+ }
- GF_FREE (fd_ctx);
- }
+ fd_ctx->readdir_subvol = -1;
+ fd_ctx->lk_heal_info = NULL;
+ ret = __fd_ctx_set(fd, this, (uint64_t)(long)fd_ctx);
+ if (ret)
+ gf_msg_debug(this->name, 0, "failed to set fd ctx (%p)", fd);
out:
- return 0;
+ if (ret && fd_ctx)
+ _afr_cleanup_fd_ctx(this, fd_ctx);
+ return ret;
}
+/* {{{ flush */
int
-afr_release (xlator_t *this, fd_t *fd)
+afr_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- afr_locked_fd_t *locked_fd = NULL;
- afr_locked_fd_t *tmp = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int call_count = -1;
- priv = this->private;
+ local = frame->local;
- afr_cleanup_fd_ctx (this, fd);
+ LOCK(&frame->lock);
+ {
+ if (op_ret != -1) {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ } else {
+ local->op_errno = op_errno;
+ }
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- list_for_each_entry_safe (locked_fd, tmp, &priv->saved_fds,
- list) {
+ if (call_count == 0)
+ AFR_STACK_UNWIND(flush, frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
- if (locked_fd->fd == fd) {
- list_del_init (&locked_fd->list);
- GF_FREE (locked_fd);
- }
+ return 0;
+}
+static int
+afr_flush_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+
+ priv = this->private;
+ local = frame->local;
+ call_count = local->call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, afr_flush_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->flush,
+ local->fd, xdata);
+ if (!--call_count)
+ break;
}
+ }
- return 0;
+ return 0;
}
+afr_local_t *
+afr_wakeup_same_fd_delayed_op(xlator_t *this, afr_lock_t *lock, fd_t *fd)
+{
+ afr_local_t *local = NULL;
+
+ if (lock->delay_timer) {
+ local = list_entry(lock->post_op.next, afr_local_t,
+ transaction.owner_list);
+ if (fd == local->fd) {
+ if (gf_timer_call_cancel(this->ctx, lock->delay_timer)) {
+ local = NULL;
+ } else {
+ lock->delay_timer = NULL;
+ }
+ } else {
+ local = NULL;
+ }
+ }
+
+ return local;
+}
-/* {{{ fsync */
+void
+afr_delayed_changelog_wake_resume(xlator_t *this, inode_t *inode,
+ call_stub_t *stub)
+{
+ afr_inode_ctx_t *ctx = NULL;
+ afr_lock_t *lock = NULL;
+ afr_local_t *metadata_local = NULL;
+ afr_local_t *data_local = NULL;
+ LOCK(&inode->lock);
+ {
+ (void)__afr_inode_ctx_get(this, inode, &ctx);
+ lock = &ctx->lock[AFR_DATA_TRANSACTION];
+ data_local = afr_wakeup_same_fd_delayed_op(this, lock, stub->args.fd);
+ lock = &ctx->lock[AFR_METADATA_TRANSACTION];
+ metadata_local = afr_wakeup_same_fd_delayed_op(this, lock,
+ stub->args.fd);
+ }
+ UNLOCK(&inode->lock);
+
+ if (data_local) {
+ data_local->transaction.resume_stub = stub;
+ } else if (metadata_local) {
+ metadata_local->transaction.resume_stub = stub;
+ } else {
+ call_resume(stub);
+ }
+ if (data_local) {
+ afr_delayed_changelog_wake_up_cbk(data_local);
+ }
+ if (metadata_local) {
+ afr_delayed_changelog_wake_up_cbk(metadata_local);
+ }
+}
int
-afr_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+afr_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- int read_child = 0;
+ afr_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int op_errno = ENOMEM;
- local = frame->local;
-
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+ local->op = GF_FOP_FLUSH;
+ if (!afr_is_consistent_io_possible(local, this->private, &op_errno))
+ goto out;
- if (op_ret == 0) {
- local->op_ret = 0;
+ local->fd = fd_ref(fd);
- if (local->success_count == 0) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
- }
+ stub = fop_flush_stub(frame, afr_flush_wrapper, fd, xdata);
+ if (!stub)
+ goto out;
- if (child_index == read_child) {
- local->cont.fsync.prebuf = *prebuf;
- local->cont.fsync.postbuf = *postbuf;
- }
+ afr_delayed_changelog_wake_resume(this, fd->inode, stub);
- local->success_count++;
- }
+ return 0;
+out:
+ AFR_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
+ return 0;
+}
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+int
+afr_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = -1;
- call_count = afr_frame_return (frame);
+ local = frame->local;
- if (call_count == 0) {
- AFR_STACK_UNWIND (fsync, frame, local->op_ret, local->op_errno,
- &local->cont.fsync.prebuf,
- &local->cont.fsync.postbuf,
- NULL);
+ LOCK(&frame->lock);
+ {
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ } else {
+ local->op_errno = op_errno;
}
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- return 0;
-}
+ if (call_count == 0)
+ AFR_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
+}
int
-afr_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync, dict_t *xdata)
+afr_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int32_t call_count = 0;
+ int32_t op_errno = ENOMEM;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ priv = this->private;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- call_count = local->call_count;
+ local->op = GF_FOP_FSYNCDIR;
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
- local->fd = fd_ref (fd);
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_fsync_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fsync,
- fd, datasync, xdata);
- if (!--call_count)
- break;
- }
+ call_count = local->call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND(frame, afr_fsyncdir_cbk, priv->children[i],
+ priv->children[i]->fops->fsyncdir, fd, datasync, xdata);
+ if (!--call_count)
+ break;
}
+ }
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ AFR_STACK_UNWIND(fsyncdir, frame, -1, op_errno, NULL);
+
+ return 0;
}
/* }}} */
-/* {{{ fsync */
+static int
+afr_serialized_lock_wind(call_frame_t *frame, xlator_t *this);
-int32_t
-afr_fsyncdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
+static gf_boolean_t
+afr_is_conflicting_lock_present(int32_t op_ret, int32_t op_errno)
{
- afr_local_t *local = NULL;
- int call_count = -1;
-
- local = frame->local;
+ if (op_ret == -1 && op_errno == EAGAIN)
+ return _gf_true;
+ return _gf_false;
+}
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
+static void
+afr_fop_lock_unwind(call_frame_t *frame, glusterfs_fop_t op, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ switch (op) {
+ case GF_FOP_INODELK:
+ AFR_STACK_UNWIND(inodelk, frame, op_ret, op_errno, xdata);
+ break;
+ case GF_FOP_FINODELK:
+ AFR_STACK_UNWIND(finodelk, frame, op_ret, op_errno, xdata);
+ break;
+ case GF_FOP_ENTRYLK:
+ AFR_STACK_UNWIND(entrylk, frame, op_ret, op_errno, xdata);
+ break;
+ case GF_FOP_FENTRYLK:
+ AFR_STACK_UNWIND(fentrylk, frame, op_ret, op_errno, xdata);
+ break;
+ default:
+ break;
+ }
+}
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+static void
+afr_fop_lock_wind(call_frame_t *frame, xlator_t *this, int child_index,
+ int32_t (*lock_cbk)(call_frame_t *, void *, xlator_t *,
+ int32_t, int32_t, dict_t *))
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int i = child_index;
+
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ STACK_WIND_COOKIE(
+ frame, lock_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->inodelk,
+ (const char *)local->cont.inodelk.volume, &local->loc,
+ local->cont.inodelk.cmd, &local->cont.inodelk.flock,
+ local->cont.inodelk.xdata);
+ break;
+ case GF_FOP_FINODELK:
+ STACK_WIND_COOKIE(
+ frame, lock_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->finodelk,
+ (const char *)local->cont.inodelk.volume, local->fd,
+ local->cont.inodelk.cmd, &local->cont.inodelk.flock,
+ local->cont.inodelk.xdata);
+ break;
+ case GF_FOP_ENTRYLK:
+ STACK_WIND_COOKIE(
+ frame, lock_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->entrylk, local->cont.entrylk.volume,
+ &local->loc, local->cont.entrylk.basename,
+ local->cont.entrylk.cmd, local->cont.entrylk.type,
+ local->cont.entrylk.xdata);
+ break;
+ case GF_FOP_FENTRYLK:
+ STACK_WIND_COOKIE(
+ frame, lock_cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->fentrylk, local->cont.entrylk.volume,
+ local->fd, local->cont.entrylk.basename,
+ local->cont.entrylk.cmd, local->cont.entrylk.type,
+ local->cont.entrylk.xdata);
+ break;
+ default:
+ break;
+ }
+}
- call_count = afr_frame_return (frame);
+void
+afr_fop_lock_proceed(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- if (call_count == 0)
- AFR_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno, xdata);
+ local = frame->local;
+ priv = frame->this->private;
- return 0;
+ if (local->fop_lock_state != AFR_FOP_LOCK_PARALLEL) {
+ afr_fop_lock_unwind(frame, local->op, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return;
+ }
+ /* At least one child is up */
+ /*
+ * Non-blocking locks also need to be serialized. Otherwise there is
+ * a chance that both the mounts which issued same non-blocking inodelk
+ * may endup not acquiring the lock on any-brick.
+ * Ex: Mount1 and Mount2
+ * request for full length lock on file f1. Mount1 afr may acquire the
+ * partial lock on brick-1 and may not acquire the lock on brick-2
+ * because Mount2 already got the lock on brick-2, vice versa. Since
+ * both the mounts only got partial locks, afr treats them as failure in
+ * gaining the locks and unwinds with EAGAIN errno.
+ */
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+ local->fop_lock_state = AFR_FOP_LOCK_SERIAL;
+ afr_local_replies_wipe(local, priv);
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.cmd = local->cont.inodelk.in_cmd;
+ local->cont.inodelk.flock = local->cont.inodelk.in_flock;
+ if (local->cont.inodelk.xdata)
+ dict_unref(local->cont.inodelk.xdata);
+ local->cont.inodelk.xdata = NULL;
+ if (local->xdata_req)
+ local->cont.inodelk.xdata = dict_ref(local->xdata_req);
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.cmd = local->cont.entrylk.in_cmd;
+ if (local->cont.entrylk.xdata)
+ dict_unref(local->cont.entrylk.xdata);
+ local->cont.entrylk.xdata = NULL;
+ if (local->xdata_req)
+ local->cont.entrylk.xdata = dict_ref(local->xdata_req);
+ break;
+ default:
+ break;
+ }
+ afr_serialized_lock_wind(frame, frame->this);
}
+static int32_t
+afr_unlock_partial_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
-int32_t
-afr_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t datasync, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ int child_index = (long)cookie;
+ uuid_t gfid = {0};
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local = frame->local;
+ priv = this->private;
- priv = this->private;
+ if (op_ret < 0 && op_errno != ENOTCONN) {
+ if (local->fd)
+ gf_uuid_copy(gfid, local->fd->inode->gfid);
+ else
+ loc_gfid(&local->loc, gfid);
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL,
+ "%s: Failed to unlock %s on %s "
+ "with lk_owner: %s",
+ uuid_utoa(gfid), gf_fop_list[local->op],
+ priv->children[child_index]->name,
+ lkowner_utoa(&frame->root->lk_owner));
+ }
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ call_count = afr_frame_return(frame);
+ if (call_count == 0)
+ afr_fop_lock_proceed(frame);
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ return 0;
+}
- call_count = local->call_count;
+static int32_t
+afr_unlock_locks_and_proceed(call_frame_t *frame, xlator_t *this,
+ int call_count)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+
+ if (call_count == 0) {
+ afr_fop_lock_proceed(frame);
+ goto out;
+ }
+
+ local = frame->local;
+ priv = this->private;
+ local->call_count = call_count;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.flock.l_type = F_UNLCK;
+ local->cont.inodelk.cmd = F_SETLK;
+ if (local->cont.inodelk.xdata)
+ dict_unref(local->cont.inodelk.xdata);
+ local->cont.inodelk.xdata = NULL;
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.cmd = ENTRYLK_UNLOCK;
+ if (local->cont.entrylk.xdata)
+ dict_unref(local->cont.entrylk.xdata);
+ local->cont.entrylk.xdata = NULL;
+ break;
+ default:
+ break;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fsyncdir_cbk,
- priv->children[i],
- priv->children[i]->fops->fsyncdir,
- fd, datasync, xdata);
- if (!--call_count)
- break;
- }
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
- ret = 0;
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
- return 0;
-}
+ if (local->replies[i].op_ret == -1)
+ continue;
-/* }}} */
+ afr_fop_lock_wind(frame, this, i, afr_unlock_partial_lock_cbk);
-/* {{{ xattrop */
+ if (!--call_count)
+ break;
+ }
+
+out:
+ return 0;
+}
int32_t
-afr_xattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr, dict_t *xdata)
+afr_fop_lock_done(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- int call_count = -1;
+ int i = 0;
+ int lock_count = 0;
+ unsigned char *success = NULL;
- local = frame->local;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- if (!local->cont.xattrop.xattr)
- local->cont.xattrop.xattr = dict_ref (xattr);
- local->op_ret = 0;
- }
+ local = frame->local;
+ priv = this->private;
+ success = alloca0(priv->child_count);
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
- call_count = afr_frame_return (frame);
+ if (local->replies[i].op_ret == 0) {
+ lock_count++;
+ success[i] = 1;
+ }
- if (call_count == 0)
- AFR_STACK_UNWIND (xattrop, frame, local->op_ret, local->op_errno,
- local->cont.xattrop.xattr, xdata);
+ if (local->op_ret == -1 && local->op_errno == EAGAIN)
+ continue;
- return 0;
-}
+ if ((local->replies[i].op_ret == -1) &&
+ (local->replies[i].op_errno == EAGAIN)) {
+ local->op_ret = -1;
+ local->op_errno = EAGAIN;
+ continue;
+ }
+ if (local->replies[i].op_ret == 0)
+ local->op_ret = 0;
-int32_t
-afr_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
+ local->op_errno = local->replies[i].op_errno;
+ }
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (afr_fop_lock_is_unlock(frame))
+ goto unwind;
- priv = this->private;
+ if (afr_is_conflicting_lock_present(local->op_ret, local->op_errno)) {
+ afr_unlock_locks_and_proceed(frame, this, lock_count);
+ } else if (priv->quorum_count && !afr_has_quorum(success, this, NULL)) {
+ local->fop_lock_state = AFR_FOP_LOCK_QUORUM_FAILED;
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ if (local->op_errno == 0)
+ local->op_errno = afr_quorum_errno(priv);
+ afr_unlock_locks_and_proceed(frame, this, lock_count);
+ } else {
+ goto unwind;
+ }
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ return 0;
+unwind:
+ afr_fop_lock_unwind(frame, local->op, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
+}
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+static int
+afr_common_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = (long)cookie;
- call_count = local->call_count;
+ local = frame->local;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_xattrop_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- loc, optype, xattr, xdata);
- if (!--call_count)
- break;
- }
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (op_ret == 0 && xdata) {
+ local->replies[child_index].xdata = dict_ref(xdata);
+ LOCK(&frame->lock);
+ {
+ if (!local->xdata_rsp)
+ local->xdata_rsp = dict_ref(xdata);
}
-
- ret = 0;
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
- return 0;
+ UNLOCK(&frame->lock);
+ }
+ return 0;
}
-/* }}} */
-
-/* {{{ fxattrop */
+static int32_t
+afr_serialized_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
-int32_t
-afr_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
-
- int call_count = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = (long)cookie;
+ int next_child = 0;
- local = frame->local;
+ local = frame->local;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- if (!local->cont.fxattrop.xattr)
- local->cont.fxattrop.xattr = dict_ref (xattr);
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
- local->op_ret = 0;
- }
+ for (next_child = child_index + 1; next_child < priv->child_count;
+ next_child++) {
+ if (local->child_up[next_child])
+ break;
+ }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ if (afr_is_conflicting_lock_present(op_ret, op_errno) ||
+ (next_child == priv->child_count)) {
+ afr_fop_lock_done(frame, this);
+ } else {
+ afr_fop_lock_wind(frame, this, next_child, afr_serialized_lock_cbk);
+ }
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- AFR_STACK_UNWIND (fxattrop, frame, local->op_ret, local->op_errno,
- local->cont.fxattrop.xattr, xdata);
-
- return 0;
+ return 0;
}
-
-int32_t
-afr_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+static int
+afr_serialized_lock_wind(call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
+ local = frame->local;
- priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ afr_fop_lock_wind(frame, this, i, afr_serialized_lock_cbk);
+ break;
+ }
+ }
+ return 0;
+}
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+static int32_t
+afr_parallel_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+{
+ int call_count = 0;
- call_count = local->call_count;
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fxattrop_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- fd, optype, xattr, xdata);
- if (!--call_count)
- break;
- }
- }
+ call_count = afr_frame_return(frame);
+ if (call_count == 0)
+ afr_fop_lock_done(frame, this);
- ret = 0;
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-/* }}} */
+static int
+afr_parallel_lock_wind(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ int i = 0;
+ priv = this->private;
+ local = frame->local;
+ call_count = local->call_count;
-int32_t
-afr_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+ afr_fop_lock_wind(frame, this, i, afr_parallel_lock_cbk);
+ if (!--call_count)
+ break;
+ }
+ return 0;
+}
+static int
+afr_fop_handle_lock(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- int call_count = -1;
+ afr_local_t *local = frame->local;
+ int op_errno = 0;
- local = frame->local;
+ if (!afr_fop_lock_is_unlock(frame)) {
+ if (!afr_is_consistent_io_possible(local, this->private, &op_errno))
+ goto out;
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.cmd = F_SETLK;
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.cmd = ENTRYLK_LOCK_NB;
+ break;
+ default:
+ break;
+ }
+ }
- local->op_errno = op_errno;
+ if (local->xdata_req) {
+ switch (local->op) {
+ case GF_FOP_INODELK:
+ case GF_FOP_FINODELK:
+ local->cont.inodelk.xdata = dict_ref(local->xdata_req);
+ break;
+ case GF_FOP_ENTRYLK:
+ case GF_FOP_FENTRYLK:
+ local->cont.entrylk.xdata = dict_ref(local->xdata_req);
+ break;
+ default:
+ break;
}
- UNLOCK (&frame->lock);
+ }
- call_count = afr_frame_return (frame);
+ local->fop_lock_state = AFR_FOP_LOCK_PARALLEL;
+ afr_parallel_lock_wind(frame, this);
+out:
+ return -op_errno;
+}
- if (call_count == 0)
- AFR_STACK_UNWIND (inodelk, frame, local->op_ret,
- local->op_errno, xdata);
+static int32_t
+afr_handle_inodelk(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ const char *volume, loc_t *loc, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = ENOMEM;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = fop;
+ if (loc)
+ loc_copy(&local->loc, loc);
+ if (fd && (flock->l_type != F_UNLCK)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local->fd = fd_ref(fd);
+ }
+
+ local->cont.inodelk.volume = gf_strdup(volume);
+ if (!local->cont.inodelk.volume) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->cont.inodelk.in_cmd = cmd;
+ local->cont.inodelk.cmd = cmd;
+ local->cont.inodelk.in_flock = *flock;
+ local->cont.inodelk.flock = *flock;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ op_errno = -afr_fop_handle_lock(frame, frame->this);
+ if (op_errno)
+ goto out;
+ return 0;
+out:
+ afr_fop_lock_unwind(frame, fop, -1, op_errno, NULL);
- return 0;
+ return 0;
}
+int32_t
+afr_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ afr_handle_inodelk(frame, this, GF_FOP_INODELK, volume, loc, NULL, cmd,
+ flock, xdata);
+ return 0;
+}
int32_t
-afr_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata)
+afr_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
+ afr_handle_inodelk(frame, this, GF_FOP_FINODELK, volume, NULL, fd, cmd,
+ flock, xdata);
+ return 0;
+}
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+static int
+afr_handle_entrylk(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ const char *volume, loc_t *loc, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = ENOMEM;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = fop;
+ if (loc)
+ loc_copy(&local->loc, loc);
+ if (fd && (cmd != ENTRYLK_UNLOCK)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local->fd = fd_ref(fd);
+ }
+ local->cont.entrylk.cmd = cmd;
+ local->cont.entrylk.in_cmd = cmd;
+ local->cont.entrylk.type = type;
+ local->cont.entrylk.volume = gf_strdup(volume);
+ local->cont.entrylk.basename = gf_strdup(basename);
+ if (!local->cont.entrylk.volume || !local->cont.entrylk.basename) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+ op_errno = -afr_fop_handle_lock(frame, frame->this);
+ if (op_errno)
+ goto out;
+
+ return 0;
+out:
+ afr_fop_lock_unwind(frame, fop, -1, op_errno, NULL);
+ return 0;
+}
- priv = this->private;
+int
+afr_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ afr_handle_entrylk(frame, this, GF_FOP_ENTRYLK, volume, loc, NULL, basename,
+ cmd, type, xdata);
+ return 0;
+}
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+int
+afr_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ afr_handle_entrylk(frame, this, GF_FOP_FENTRYLK, volume, NULL, fd, basename,
+ cmd, type, xdata);
+ return 0;
+}
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+int
+afr_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct statvfs *statvfs, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ struct statvfs *buf = NULL;
- call_count = local->call_count;
+ local = frame->local;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_inodelk_cbk,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- volume, loc, cmd, flock, xdata);
-
- if (!--call_count)
- break;
+ LOCK(&frame->lock);
+ {
+ if (op_ret != 0) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+
+ local->op_ret = op_ret;
+
+ buf = &local->cont.statfs.buf;
+ if (local->cont.statfs.buf_set) {
+ if (statvfs->f_bavail < buf->f_bavail) {
+ *buf = *statvfs;
+ if (xdata) {
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = dict_ref(xdata);
}
+ }
+ } else {
+ *buf = *statvfs;
+ local->cont.statfs.buf_set = 1;
+ if (xdata)
+ local->xdata_rsp = dict_ref(xdata);
}
+ }
+unlock:
+ call_count = --local->call_count;
+ UNLOCK(&frame->lock);
- ret = 0;
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
- return 0;
+ if (call_count == 0)
+ AFR_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
+ &local->cont.statfs.buf, local->xdata_rsp);
+
+ return 0;
}
+int
+afr_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
+ int32_t op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_STATFS;
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
+
+ if (priv->arbiter_count == 1 && local->child_up[ARBITER_BRICK_INDEX])
+ local->call_count--;
+ call_count = local->call_count;
+ if (!call_count) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ if (AFR_IS_ARBITER_BRICK(priv, i))
+ continue;
+ STACK_WIND(frame, afr_statfs_cbk, priv->children[i],
+ priv->children[i]->fops->statfs, loc, xdata);
+ if (!--call_count)
+ break;
+ }
+ }
+
+ return 0;
+out:
+ AFR_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
int32_t
-afr_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
+afr_lk_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
dict_t *xdata)
-
{
- afr_local_t *local = NULL;
- int call_count = -1;
-
- local = frame->local;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ int call_count = -1;
+ int child_index = (long)cookie;
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
- call_count = afr_frame_return (frame);
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL,
+ "gfid=%s: unlock failed on subvolume %s "
+ "with lock owner %s",
+ uuid_utoa(local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ lkowner_utoa(&frame->root->lk_owner));
+ }
- if (call_count == 0)
- AFR_STACK_UNWIND (finodelk, frame, local->op_ret,
- local->op_errno, xdata);
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno, NULL,
+ local->xdata_rsp);
+ }
- return 0;
+ return 0;
}
-
int32_t
-afr_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
- dict_t *xdata)
+afr_lk_unlock(call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local = frame->local;
+ priv = this->private;
- priv = this->private;
+ call_count = afr_locked_nodes_count(local->cont.lk.locked_nodes,
+ priv->child_count);
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ if (call_count == 0) {
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno, NULL,
+ local->xdata_rsp);
+ return 0;
+ }
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->call_count = call_count;
- call_count = local->call_count;
+ local->cont.lk.user_flock.l_type = F_UNLCK;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_finodelk_cbk,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- volume, fd, cmd, flock, xdata);
-
- if (!--call_count)
- break;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lk.locked_nodes[i]) {
+ STACK_WIND_COOKIE(frame, afr_lk_unlock_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->lk,
+ local->fd, F_SETLK, &local->cont.lk.user_flock,
+ NULL);
+
+ if (!--call_count)
+ break;
}
+ }
- ret = 0;
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-
int32_t
-afr_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = -1;
- local = frame->local;
+ local = frame->local;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
+ child_index = (long)cookie;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ if (op_ret < 0 && op_errno == EAGAIN) {
+ local->op_ret = -1;
+ local->op_errno = EAGAIN;
- call_count = afr_frame_return (frame);
+ afr_lk_unlock(frame, this);
+ return 0;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lk.locked_nodes[child_index] = 1;
+ local->cont.lk.ret_flock = *lock;
+ }
+
+ child_index++;
+
+ if (child_index < priv->child_count) {
+ STACK_WIND_COOKIE(frame, afr_lk_cbk, (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lk, local->fd,
+ local->cont.lk.cmd, &local->cont.lk.user_flock,
+ local->xdata_req);
+ } else if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.locked_nodes, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
- if (call_count == 0)
- AFR_STACK_UNWIND (entrylk, frame, local->op_ret,
- local->op_errno, xdata);
+ afr_lk_unlock(frame, this);
+ } else {
+ if (local->op_ret < 0)
+ local->op_errno = afr_final_errno(local, priv);
- return 0;
-}
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, local->xdata_rsp);
+ }
+ return 0;
+}
-int32_t
-afr_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc,
- const char *basename, entrylk_cmd cmd, entrylk_type type,
- dict_t *xdata)
+int
+afr_lk_transaction_cbk(int ret, call_frame_t *frame, void *opaque)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ return 0;
+}
- priv = this->private;
+int
+afr_lk_txn_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = -1;
+
+ local = frame->local;
+ child_index = (long)cookie;
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lk.locked_nodes[child_index] = 1;
+ local->cont.lk.ret_flock = *lock;
+ }
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+int
+afr_lk_txn_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ int child_index = (long)cookie;
+
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_UNLOCK_FAIL,
+ "gfid=%s: unlock failed on subvolume %s "
+ "with lock owner %s",
+ uuid_utoa(local->fd->inode->gfid),
+ priv->children[child_index]->name,
+ lkowner_utoa(&frame->root->lk_owner));
+ }
+ return 0;
+}
+int
+afr_lk_transaction(void *opaque)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ char *wind_on = NULL;
+ int op_errno = 0;
+ int i = 0;
+ int ret = 0;
+
+ frame = (call_frame_t *)opaque;
+ local = frame->local;
+ this = frame->this;
+ priv = this->private;
+ wind_on = alloca0(priv->child_count);
+
+ if (priv->arbiter_count || priv->child_count != 3) {
+ op_errno = ENOTSUP;
+ gf_msg(frame->this->name, GF_LOG_ERROR, op_errno, AFR_MSG_LK_HEAL_DOM,
+ "%s: Lock healing supported only for replica 3 volumes.",
+ uuid_utoa(local->fd->inode->gfid));
+ goto err;
+ }
+
+ op_errno = -afr_dom_lock_acquire(frame); // Released during
+ // AFR_STACK_UNWIND
+ if (op_errno != 0) {
+ goto err;
+ }
+ if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.dom_locked_nodes, this, NULL)) {
+ op_errno = afr_final_errno(local, priv);
+ goto err;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_up[i] && local->cont.lk.dom_locked_nodes[i])
+ wind_on[i] = 1;
+ }
+ AFR_ONLIST(wind_on, frame, afr_lk_txn_wind_cbk, lk, local->fd,
+ local->cont.lk.cmd, &local->cont.lk.user_flock,
+ local->xdata_req);
+
+ if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lk.locked_nodes, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ goto unlock;
+ } else {
+ if (local->cont.lk.user_flock.l_type == F_UNLCK)
+ ret = afr_remove_lock_from_saved_locks(local, this);
+ else
+ ret = afr_add_lock_to_saved_locks(frame, this);
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto unlock;
+ }
+ AFR_STACK_UNWIND(lk, frame, local->op_ret, local->op_errno,
+ &local->cont.lk.ret_flock, local->xdata_rsp);
+ }
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ return 0;
- call_count = local->call_count;
+unlock:
+ local->cont.lk.user_flock.l_type = F_UNLCK;
+ AFR_ONLIST(local->cont.lk.locked_nodes, frame, afr_lk_txn_unlock_cbk, lk,
+ local->fd, F_SETLK, &local->cont.lk.user_flock, NULL);
+err:
+ AFR_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
+ return -1;
+}
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_entrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- volume, loc, basename, cmd, type, xdata);
-
- if (!--call_count)
- break;
- }
+int
+afr_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+ int i = 0;
+ int32_t op_errno = ENOMEM;
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_LK;
+ if (!afr_lk_is_unlock(cmd, flock)) {
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
+ }
+
+ local->cont.lk.locked_nodes = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lk.locked_nodes),
+ gf_afr_mt_char);
+
+ if (!local->cont.lk.locked_nodes) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->fd = fd_ref(fd);
+ local->cont.lk.cmd = cmd;
+ local->cont.lk.user_flock = *flock;
+ local->cont.lk.ret_flock = *flock;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ if (afr_is_lock_mode_mandatory(xdata)) {
+ ret = synctask_new(this->ctx->env, afr_lk_transaction,
+ afr_lk_transaction_cbk, frame, frame);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto out;
}
-
- ret = 0;
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
return 0;
-}
+ }
+ STACK_WIND_COOKIE(frame, afr_lk_cbk, (void *)(long)0, priv->children[i],
+ priv->children[i]->fops->lk, fd, cmd, flock,
+ local->xdata_req);
+ return 0;
+out:
+ AFR_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
-int32_t
-afr_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+ return 0;
+}
+int32_t
+afr_lease_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_lease *lease,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = -1;
-
- local = frame->local;
+ afr_local_t *local = NULL;
+ int call_count = -1;
- LOCK (&frame->lock);
- {
- if (op_ret == 0)
- local->op_ret = 0;
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
+ local = frame->local;
+ call_count = afr_frame_return(frame);
- if (call_count == 0)
- AFR_STACK_UNWIND (fentrylk, frame, local->op_ret,
- local->op_errno, xdata);
+ if (call_count == 0)
+ AFR_STACK_UNWIND(lease, frame, local->op_ret, local->op_errno, lease,
+ xdata);
- return 0;
+ return 0;
}
-
int32_t
-afr_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd,
- const char *basename, entrylk_cmd cmd,
- entrylk_type type, dict_t *xdata)
+afr_lease_unlock(call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int ret = -1;
- int i = 0;
- int32_t call_count = 0;
- int32_t op_errno = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_count = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local = frame->local;
+ priv = this->private;
- priv = this->private;
+ call_count = afr_locked_nodes_count(local->cont.lease.locked_nodes,
+ priv->child_count);
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ if (call_count == 0) {
+ AFR_STACK_UNWIND(lease, frame, local->op_ret, local->op_errno,
+ &local->cont.lease.ret_lease, NULL);
+ return 0;
+ }
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->call_count = call_count;
- call_count = local->call_count;
+ local->cont.lease.user_lease.cmd = GF_UNLK_LEASE;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_fentrylk_cbk,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
- volume, fd, basename, cmd, type, xdata);
-
- if (!--call_count)
- break;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->cont.lease.locked_nodes[i]) {
+ STACK_WIND(frame, afr_lease_unlock_cbk, priv->children[i],
+ priv->children[i]->fops->lease, &local->loc,
+ &local->cont.lease.user_lease, NULL);
+
+ if (!--call_count)
+ break;
}
+ }
- ret = 0;
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
int32_t
-afr_statfs_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct statvfs *statvfs, dict_t *xdata)
+afr_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_lease *lease, dict_t *xdata)
{
- afr_local_t *local = NULL;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = -1;
- LOCK (&frame->lock);
- {
- local = frame->local;
-
- if (op_ret == 0) {
- local->op_ret = op_ret;
-
- if (local->cont.statfs.buf_set) {
- if (statvfs->f_bavail < local->cont.statfs.buf.f_bavail)
- local->cont.statfs.buf = *statvfs;
- } else {
- local->cont.statfs.buf = *statvfs;
- local->cont.statfs.buf_set = 1;
- }
- }
+ local = frame->local;
+ priv = this->private;
- if (op_ret == -1)
- local->op_errno = op_errno;
+ child_index = (long)cookie;
- }
- UNLOCK (&frame->lock);
+ afr_common_lock_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ if (op_ret < 0 && op_errno == EAGAIN) {
+ local->op_ret = -1;
+ local->op_errno = EAGAIN;
- call_count = afr_frame_return (frame);
+ afr_lease_unlock(frame, this);
+ return 0;
+ }
+
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->op_errno = 0;
+ local->cont.lease.locked_nodes[child_index] = 1;
+ local->cont.lease.ret_lease = *lease;
+ }
+
+ child_index++;
+ if (child_index < priv->child_count) {
+ STACK_WIND_COOKIE(frame, afr_lease_cbk, (void *)(long)child_index,
+ priv->children[child_index],
+ priv->children[child_index]->fops->lease, &local->loc,
+ &local->cont.lease.user_lease, xdata);
+ } else if (priv->quorum_count &&
+ !afr_has_quorum(local->cont.lease.locked_nodes, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
- if (call_count == 0)
- AFR_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->cont.statfs.buf, xdata);
+ afr_lease_unlock(frame, this);
+ } else {
+ if (local->op_ret < 0)
+ local->op_errno = afr_final_errno(local, priv);
+ AFR_STACK_UNWIND(lease, frame, local->op_ret, local->op_errno,
+ &local->cont.lease.ret_lease, NULL);
+ }
- return 0;
+ return 0;
}
-
-int32_t
-afr_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xdata)
+int
+afr_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- int child_count = 0;
- afr_local_t * local = NULL;
- int i = 0;
- int ret = -1;
- int call_count = 0;
- int32_t op_errno = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int32_t op_errno = ENOMEM;
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
+ priv = this->private;
- priv = this->private;
- child_count = priv->child_count;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ local->op = GF_FOP_LEASE;
+ local->cont.lease.locked_nodes = GF_CALLOC(
+ priv->child_count, sizeof(*local->cont.lease.locked_nodes),
+ gf_afr_mt_char);
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ if (!local->cont.lease.locked_nodes) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- call_count = local->call_count;
+ loc_copy(&local->loc, loc);
+ local->cont.lease.user_lease = *lease;
+ local->cont.lease.ret_lease = *lease;
- for (i = 0; i < child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND (frame, afr_statfs_cbk,
- priv->children[i],
- priv->children[i]->fops->statfs,
- loc, xdata);
- if (!--call_count)
- break;
- }
- }
+ STACK_WIND_COOKIE(frame, afr_lease_cbk, (void *)(long)0, priv->children[0],
+ priv->children[0]->fops->lease, loc, lease, xdata);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
+ AFR_STACK_UNWIND(lease, frame, -1, op_errno, NULL, NULL);
-int32_t
-afr_lk_unlock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
- dict_t *xdata)
-{
- afr_local_t * local = NULL;
- int call_count = -1;
+ return 0;
+}
- local = frame->local;
- call_count = afr_frame_return (frame);
+int
+afr_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = (long)cookie;
+ int call_count = 0;
+ gf_boolean_t failed = _gf_false;
+ gf_boolean_t succeeded = _gf_false;
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+
+ call_count = afr_frame_return(frame);
+ if (call_count)
+ goto out;
+ /* If any of the subvolumes failed with other than ENOTCONN
+ * return error else return success unless all the subvolumes
+ * failed.
+ * TODO: In case of failure, we need to unregister the xattrs
+ * from the other subvolumes where it succeeded (once upcall
+ * fixes the Bz-1371622)*/
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0 &&
+ local->replies[i].op_errno != ENOTCONN) {
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ if (local->replies[i].xdata) {
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ }
+ failed = _gf_true;
+ break;
+ }
+ if (local->replies[i].op_ret == 0) {
+ succeeded = _gf_true;
+ local->op_ret = 0;
+ local->op_errno = 0;
+ if (!local->xdata_rsp && local->replies[i].xdata) {
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ }
+ }
+ }
+
+ if (!succeeded && !failed) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ }
- if (call_count == 0)
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- lock, xdata);
+ AFR_STACK_UNWIND(ipc, frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
- return 0;
+out:
+ return 0;
}
-
-int32_t
-afr_lk_unlock (call_frame_t *frame, xlator_t *this)
+int
+afr_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int i = 0;
- int call_count = 0;
+ afr_local_t *local = NULL;
+ int32_t op_errno = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int call_cnt = -1;
- local = frame->local;
- priv = this->private;
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
- call_count = afr_locked_nodes_count (local->cont.lk.locked_nodes,
- priv->child_count);
+ if (op != GF_IPC_TARGET_UPCALL)
+ goto wind_default;
- if (call_count == 0) {
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock, NULL);
- return 0;
- }
+ VALIDATE_OR_GOTO(this->private, err);
+ priv = this->private;
- local->call_count = call_count;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto err;
- local->cont.lk.user_flock.l_type = F_UNLCK;
+ call_cnt = local->call_count;
+ if (xdata) {
for (i = 0; i < priv->child_count; i++) {
- if (local->cont.lk.locked_nodes[i]) {
- STACK_WIND (frame, afr_lk_unlock_cbk,
- priv->children[i],
- priv->children[i]->fops->lk,
- local->fd, F_SETLK,
- &local->cont.lk.user_flock, NULL);
-
- if (!--call_count)
- break;
- }
+ if (dict_set_int8(xdata, priv->pending_key[i], 0) < 0)
+ goto err;
}
+ }
- return 0;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+ STACK_WIND_COOKIE(frame, afr_ipc_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->ipc, op,
+ xdata);
+ if (!--call_cnt)
+ break;
+ }
+ return 0;
-int32_t
-afr_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+err:
+ if (op_errno == -1)
+ op_errno = errno;
+ AFR_STACK_UNWIND(ipc, frame, -1, op_errno, NULL);
+
+ return 0;
+
+wind_default:
+ STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ipc, op, xdata);
+ return 0;
+}
+
+int
+afr_forget(xlator_t *this, inode_t *inode)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int child_index = -1;
-/* int ret = 0; */
+ uint64_t ctx_int = 0;
+ afr_inode_ctx_t *ctx = NULL;
+ afr_spb_choice_timeout_cancel(this, inode);
+ inode_ctx_del(inode, this, &ctx_int);
+ if (!ctx_int)
+ return 0;
- local = frame->local;
- priv = this->private;
+ ctx = (afr_inode_ctx_t *)(uintptr_t)ctx_int;
+ afr_inode_ctx_destroy(ctx);
+ return 0;
+}
- child_index = (long) cookie;
+int
+afr_priv_dump(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+ GF_ASSERT(this);
+ priv = this->private;
+
+ GF_ASSERT(priv);
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+ gf_proc_dump_write("child_count", "%u", priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ sprintf(key, "child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->child_up[i]);
+ sprintf(key, "pending_key[%d]", i);
+ gf_proc_dump_write(key, "%s", priv->pending_key[i]);
+ sprintf(key, "pending_reads[%d]", i);
+ gf_proc_dump_write(key, "%" PRId64,
+ GF_ATOMIC_GET(priv->pending_reads[i]));
+ sprintf(key, "child_latency[%d]", i);
+ gf_proc_dump_write(key, "%" PRId64, priv->child_latency[i]);
+ sprintf(key, "halo_child_up[%d]", i);
+ gf_proc_dump_write(key, "%d", priv->halo_child_up[i]);
+ }
+ gf_proc_dump_write("data_self_heal", "%d", priv->data_self_heal);
+ gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
+ gf_proc_dump_write("entry_self_heal", "%d", priv->entry_self_heal);
+ gf_proc_dump_write("read_child", "%d", priv->read_child);
+ gf_proc_dump_write("wait_count", "%u", priv->wait_count);
+ gf_proc_dump_write("heal-wait-queue-length", "%d", priv->heal_wait_qlen);
+ gf_proc_dump_write("heal-waiters", "%d", priv->heal_waiters);
+ gf_proc_dump_write("background-self-heal-count", "%d",
+ priv->background_self_heal_count);
+ gf_proc_dump_write("healers", "%d", priv->healers);
+ gf_proc_dump_write("read-hash-mode", "%d", priv->hash_mode);
+ gf_proc_dump_write("use-anonymous-inode", "%d", priv->use_anon_inode);
+ if (priv->quorum_count == AFR_QUORUM_AUTO) {
+ gf_proc_dump_write("quorum-type", "auto");
+ } else if (priv->quorum_count == 0) {
+ gf_proc_dump_write("quorum-type", "none");
+ } else {
+ gf_proc_dump_write("quorum-type", "fixed");
+ gf_proc_dump_write("quorum-count", "%d", priv->quorum_count);
+ }
+ gf_proc_dump_write("up", "%u", afr_has_quorum(priv->child_up, this, NULL));
+ if (priv->thin_arbiter_count) {
+ gf_proc_dump_write("ta_child_up", "%d", priv->ta_child_up);
+ gf_proc_dump_write("ta_bad_child_index", "%d",
+ priv->ta_bad_child_index);
+ gf_proc_dump_write("ta_notify_dom_lock_offset", "%" PRId64,
+ priv->ta_notify_dom_lock_offset);
+ }
+
+ return 0;
+}
- if (!child_went_down (op_ret, op_errno) && (op_ret == -1)) {
- local->op_ret = -1;
- local->op_errno = op_errno;
+/**
+ * find_child_index - find the child's index in the array of subvolumes
+ * @this: AFR
+ * @child: child
+ */
- afr_lk_unlock (frame, this);
- return 0;
- }
+static int
+afr_find_child_index(xlator_t *this, xlator_t *child)
+{
+ afr_private_t *priv = NULL;
+ int child_count = -1;
+ int i = -1;
- if (op_ret == 0) {
- local->op_ret = 0;
- local->op_errno = 0;
- local->cont.lk.locked_nodes[child_index] = 1;
- local->cont.lk.ret_flock = *lock;
- }
+ priv = this->private;
+ child_count = priv->child_count;
+ if (priv->thin_arbiter_count) {
+ child_count++;
+ }
- child_index++;
+ for (i = 0; i < child_count; i++) {
+ if ((xlator_t *)child == priv->children[i])
+ break;
+ }
- if (child_index < priv->child_count) {
- STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->lk,
- local->fd, local->cont.lk.cmd,
- &local->cont.lk.user_flock, xdata);
- } else if (local->op_ret == -1) {
- /* all nodes have gone down */
+ return i;
+}
- AFR_STACK_UNWIND (lk, frame, -1, ENOTCONN,
- &local->cont.lk.ret_flock, NULL);
- } else {
- /* locking has succeeded on all nodes that are up */
+int
+__afr_get_up_children_count(afr_private_t *priv)
+{
+ int up_children = 0;
+ int i = 0;
- /* temporarily
- ret = afr_mark_locked_nodes (this, local->fd,
- local->cont.lk.locked_nodes);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not save locked nodes info in fdctx");
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 1)
+ up_children++;
- ret = afr_save_locked_fd (this, local->fd);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not save locked fd");
+ return up_children;
+}
- */
- AFR_STACK_UNWIND (lk, frame, local->op_ret, local->op_errno,
- &local->cont.lk.ret_flock, NULL);
- }
+static int
+__get_heard_from_all_status(xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ int i;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->last_event[i]) {
+ return 0;
+ }
+ }
+ if (priv->thin_arbiter_count && !priv->ta_child_up) {
return 0;
+ }
+ return 1;
}
-
-int
-afr_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+glusterfs_event_t
+__afr_transform_event_from_state(xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int i = 0;
- int32_t op_errno = 0;
- int ret = -1;
+ int i = 0;
+ int up_children = 0;
+ afr_private_t *priv = this->private;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (__get_heard_from_all_status(this))
+ /* have_heard_from_all. Let afr_notify() do the propagation. */
+ return GF_EVENT_MAXVAL;
- priv = this->private;
+ up_children = __afr_get_up_children_count(priv);
+ /* Treat the children with pending notification, as having sent a
+ * GF_EVENT_CHILD_DOWN. i.e. set the event as GF_EVENT_SOME_DESCENDENT_DOWN,
+ * as done in afr_notify() */
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->last_event[i])
+ continue;
+ priv->last_event[i] = GF_EVENT_SOME_DESCENDENT_DOWN;
+ priv->child_up[i] = 0;
+ }
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ if (up_children)
+ /* We received at least one child up */
+ return GF_EVENT_CHILD_UP;
+ else
+ return GF_EVENT_CHILD_DOWN;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ return GF_EVENT_MAXVAL;
+}
- local->cont.lk.locked_nodes = GF_CALLOC (priv->child_count,
- sizeof (*local->cont.lk.locked_nodes),
- gf_afr_mt_char);
+static void
+afr_notify_cbk(void *data)
+{
+ xlator_t *this = data;
+ afr_private_t *priv = this->private;
+ glusterfs_event_t event = GF_EVENT_MAXVAL;
+ gf_boolean_t propagate = _gf_false;
+
+ LOCK(&priv->lock);
+ {
+ if (!priv->timer) {
+ /*
+ * Either child_up/child_down is already sent to parent.
+ * This is a spurious wake up.
+ */
+ goto unlock;
+ }
+ priv->timer = NULL;
+ event = __afr_transform_event_from_state(this);
+ if (event != GF_EVENT_MAXVAL)
+ propagate = _gf_true;
+ }
+unlock:
+ UNLOCK(&priv->lock);
+ if (propagate)
+ default_notify(this, event, NULL);
+}
- if (!local->cont.lk.locked_nodes) {
- op_errno = ENOMEM;
- goto out;
- }
+static void
+__afr_launch_notify_timer(xlator_t *this, afr_private_t *priv)
+{
+ struct timespec delay = {
+ 0,
+ };
- local->fd = fd_ref (fd);
- local->cont.lk.cmd = cmd;
- local->cont.lk.user_flock = *flock;
- local->cont.lk.ret_flock = *flock;
+ gf_msg_debug(this->name, 0, "Initiating child-down timer");
+ delay.tv_sec = 10;
+ delay.tv_nsec = 0;
+ priv->timer = gf_timer_call_after(this->ctx, delay, afr_notify_cbk, this);
+ if (priv->timer == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_TIMER_CREATE_FAIL,
+ "Cannot create timer for delayed initialization");
+ }
+}
- STACK_WIND_COOKIE (frame, afr_lk_cbk, (void *) (long) 0,
- priv->children[i],
- priv->children[i]->fops->lk,
- fd, cmd, flock, xdata);
+static int
+find_best_down_child(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int32_t best_child = -1;
+ int64_t best_latency = INT64_MAX;
- ret = 0;
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
- return 0;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->child_up[i] && priv->child_latency[i] >= 0 &&
+ priv->child_latency[i] < best_latency) {
+ best_child = i;
+ best_latency = priv->child_latency[i];
+ }
+ }
+ if (best_child >= 0) {
+ gf_msg_debug(this->name, 0,
+ "Found best down child (%d) @ %" PRId64 " ms latency",
+ best_child, best_latency);
+ }
+ return best_child;
}
int
-afr_forget (xlator_t *this, inode_t *inode)
+find_worst_up_child(xlator_t *this)
{
- uint64_t ctx_addr = 0;
- afr_inode_ctx_t *ctx = NULL;
+ afr_private_t *priv = NULL;
+ int i = -1;
+ int32_t worst_child = -1;
+ int64_t worst_latency = INT64_MIN;
- inode_ctx_get (inode, this, &ctx_addr);
+ priv = this->private;
- if (!ctx_addr)
- goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->child_up[i] && priv->child_latency[i] >= 0 &&
+ priv->child_latency[i] > worst_latency) {
+ worst_child = i;
+ worst_latency = priv->child_latency[i];
+ }
+ }
+ if (worst_child >= 0) {
+ gf_msg_debug(this->name, 0,
+ "Found worst up child (%d) @ %" PRId64 " ms latency",
+ worst_child, worst_latency);
+ }
+ return worst_child;
+}
- ctx = (afr_inode_ctx_t *)(long)ctx_addr;
- if (ctx->fresh_children)
- GF_FREE (ctx->fresh_children);
- GF_FREE (ctx);
-out:
- return 0;
+void
+__afr_handle_ping_event(xlator_t *this, xlator_t *child_xlator, const int idx,
+ int64_t halo_max_latency_msec, int32_t *event,
+ int64_t child_latency_msec)
+{
+ afr_private_t *priv = NULL;
+ int up_children = 0;
+
+ priv = this->private;
+
+ priv->child_latency[idx] = child_latency_msec;
+ gf_msg_debug(child_xlator->name, 0, "Client ping @ %" PRId64 " ms",
+ child_latency_msec);
+ if (priv->shd.iamshd)
+ return;
+
+ up_children = __afr_get_up_children_count(priv);
+
+ if (child_latency_msec > halo_max_latency_msec &&
+ priv->child_up[idx] == 1 && up_children > priv->halo_min_replicas) {
+ if ((up_children - 1) < priv->halo_min_replicas) {
+ gf_log(child_xlator->name, GF_LOG_INFO,
+ "Overriding halo threshold, "
+ "min replicas: %d",
+ priv->halo_min_replicas);
+ } else {
+ gf_log(child_xlator->name, GF_LOG_INFO,
+ "Child latency (%" PRId64
+ " ms) "
+ "exceeds halo threshold (%" PRId64
+ "), "
+ "marking child down.",
+ child_latency_msec, halo_max_latency_msec);
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_DOWN;
+ }
+ }
+ } else if (child_latency_msec < halo_max_latency_msec &&
+ priv->child_up[idx] == 0) {
+ if (up_children < priv->halo_max_replicas) {
+ gf_log(child_xlator->name, GF_LOG_INFO,
+ "Child latency (%" PRId64
+ " ms) "
+ "below halo threshold (%" PRId64
+ "), "
+ "marking child up.",
+ child_latency_msec, halo_max_latency_msec);
+ if (priv->halo_child_up[idx]) {
+ *event = GF_EVENT_CHILD_UP;
+ }
+ } else {
+ gf_log(child_xlator->name, GF_LOG_INFO,
+ "Not marking child %d up, "
+ "max replicas (%d) reached.",
+ idx, priv->halo_max_replicas);
+ }
+ }
}
-int
-afr_priv_dump (xlator_t *this)
+static int64_t
+afr_get_halo_latency(xlator_t *this)
{
- afr_private_t *priv = NULL;
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
+ afr_private_t *priv = NULL;
+ int64_t halo_max_latency_msec = 0;
+ priv = this->private;
- GF_ASSERT (this);
- priv = this->private;
+ if (priv->shd.iamshd) {
+ halo_max_latency_msec = priv->shd.halo_max_latency_msec;
+ } else if (priv->nfsd.iamnfsd) {
+ halo_max_latency_msec = priv->nfsd.halo_max_latency_msec;
+ } else {
+ halo_max_latency_msec = priv->halo_max_latency_msec;
+ }
+ gf_msg_debug(this->name, 0, "Using halo latency %" PRId64,
+ halo_max_latency_msec);
+ return halo_max_latency_msec;
+}
- GF_ASSERT (priv);
- snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
- gf_proc_dump_add_section(key_prefix);
- gf_proc_dump_write("child_count", "%u", priv->child_count);
- gf_proc_dump_write("read_child_rr", "%u", priv->read_child_rr);
- for (i = 0; i < priv->child_count; i++) {
- sprintf (key, "child_up[%d]", i);
- gf_proc_dump_write(key, "%d", priv->child_up[i]);
- sprintf (key, "pending_key[%d]", i);
- gf_proc_dump_write(key, "%s", priv->pending_key[i]);
- }
- gf_proc_dump_write("data_self_heal", "%s", priv->data_self_heal);
- gf_proc_dump_write("metadata_self_heal", "%d", priv->metadata_self_heal);
- gf_proc_dump_write("entry_self_heal", "%d", priv->entry_self_heal);
- gf_proc_dump_write("data_change_log", "%d", priv->data_change_log);
- gf_proc_dump_write("metadata_change_log", "%d", priv->metadata_change_log);
- gf_proc_dump_write("entry-change_log", "%d", priv->entry_change_log);
- gf_proc_dump_write("read_child", "%d", priv->read_child);
- gf_proc_dump_write("favorite_child", "%d", priv->favorite_child);
- gf_proc_dump_write("wait_count", "%u", priv->wait_count);
+void
+__afr_handle_child_up_event(xlator_t *this, xlator_t *child_xlator,
+ const int idx, int64_t child_latency_msec,
+ int32_t *event, int32_t *call_psh,
+ int32_t *up_child)
+{
+ afr_private_t *priv = NULL;
+ int up_children = 0;
+ int worst_up_child = -1;
+ int64_t halo_max_latency_msec = afr_get_halo_latency(this);
+
+ priv = this->private;
+
+ /*
+ * This only really counts if the child was never up
+ * (value = -1) or had been down (value = 0). See
+ * comment at GF_EVENT_CHILD_DOWN for a more detailed
+ * explanation.
+ */
+ if (priv->child_up[idx] != 1) {
+ priv->event_generation++;
+ }
+ priv->child_up[idx] = 1;
+
+ *call_psh = 1;
+ *up_child = idx;
+ up_children = __afr_get_up_children_count(priv);
+ /*
+ * If this is an _actual_ CHILD_UP event, we
+ * want to set the child_latency to MAX to indicate
+ * the child needs ping data to be available before doing child-up
+ */
+ if (!priv->halo_enabled)
+ goto out;
+
+ if (child_latency_msec < 0) {
+ /*set to INT64_MAX-1 so that it is found for best_down_child*/
+ priv->halo_child_up[idx] = 1;
+ if (priv->child_latency[idx] < 0) {
+ priv->child_latency[idx] = AFR_HALO_MAX_LATENCY;
+ }
+ }
+
+ /*
+ * Handle the edge case where we exceed
+ * halo_min_replicas and we've got a child which is
+ * marked up as it was helping to satisfy the
+ * halo_min_replicas even though it's latency exceeds
+ * halo_max_latency_msec.
+ */
+ if (up_children > priv->halo_min_replicas) {
+ worst_up_child = find_worst_up_child(this);
+ if (worst_up_child >= 0 &&
+ priv->child_latency[worst_up_child] > halo_max_latency_msec) {
+ gf_msg_debug(this->name, 0,
+ "Marking child %d down, "
+ "doesn't meet halo threshold (%" PRId64
+ "), and > "
+ "halo_min_replicas (%d)",
+ worst_up_child, halo_max_latency_msec,
+ priv->halo_min_replicas);
+ priv->child_up[worst_up_child] = 0;
+ up_children--;
+ }
+ }
+
+ if (up_children > priv->halo_max_replicas && !priv->shd.iamshd) {
+ worst_up_child = find_worst_up_child(this);
+ if (worst_up_child < 0) {
+ worst_up_child = idx;
+ }
+ priv->child_up[worst_up_child] = 0;
+ up_children--;
+ gf_msg_debug(this->name, 0,
+ "Marking child %d down, "
+ "up_children (%d) > halo_max_replicas (%d)",
+ worst_up_child, up_children, priv->halo_max_replicas);
+ }
+out:
+ if (up_children == 1) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOL_UP,
+ "Subvolume '%s' came back up; "
+ "going online.",
+ child_xlator->name);
+ gf_event(EVENT_AFR_SUBVOL_UP, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
+ } else {
+ *event = GF_EVENT_SOME_DESCENDENT_UP;
+ }
- return 0;
+ priv->last_event[idx] = *event;
}
+void
+__afr_handle_child_down_event(xlator_t *this, xlator_t *child_xlator, int idx,
+ int64_t child_latency_msec, int32_t *event,
+ int32_t *call_psh, int32_t *up_child)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int up_children = 0;
+ int down_children = 0;
+ int best_down_child = -1;
+
+ priv = this->private;
+
+ /*
+ * If a brick is down when we start, we'll get a
+ * CHILD_DOWN to indicate its initial state. There
+ * was never a CHILD_UP in this case, so if we
+ * increment "down_count" the difference between than
+ * and "up_count" will no longer be the number of
+ * children that are currently up. This has serious
+ * implications e.g. for quorum enforcement, so we
+ * don't increment these values unless the event
+ * represents an actual state transition between "up"
+ * (value = 1) and anything else.
+ */
+ if (priv->child_up[idx] == 1) {
+ priv->event_generation++;
+ }
+
+ /*
+ * If this is an _actual_ CHILD_DOWN event, we
+ * want to set the child_latency to < 0 to indicate
+ * the child is really disconnected.
+ */
+ if (child_latency_msec < 0) {
+ priv->child_latency[idx] = child_latency_msec;
+ priv->halo_child_up[idx] = 0;
+ }
+ priv->child_up[idx] = 0;
+
+ up_children = __afr_get_up_children_count(priv);
+ /*
+ * Handle the edge case where we need to find the
+ * next best child (to mark up) as marking this child
+ * down would cause us to fall below halo_min_replicas.
+ * We will also force the SHD to heal this child _now_
+ * as we want it to be up to date if we are going to
+ * begin using it synchronously.
+ */
+ if (priv->halo_enabled && up_children < priv->halo_min_replicas) {
+ best_down_child = find_best_down_child(this);
+ if (best_down_child >= 0) {
+ gf_msg_debug(this->name, 0,
+ "Swapping out child %d for "
+ "child %d to satisfy halo_min_replicas (%d).",
+ idx, best_down_child, priv->halo_min_replicas);
+ priv->child_up[best_down_child] = 1;
+ *call_psh = 1;
+ *up_child = best_down_child;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++)
+ if (priv->child_up[i] == 0)
+ down_children++;
+ if (down_children == priv->child_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SUBVOLS_DOWN,
+ "All subvolumes are down. Going "
+ "offline until at least one of them "
+ "comes back up.");
+ gf_event(EVENT_AFR_SUBVOLS_DOWN, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
+ } else {
+ *event = GF_EVENT_SOME_DESCENDENT_DOWN;
+ }
+ priv->last_event[idx] = *event;
+}
-/**
- * find_child_index - find the child's index in the array of subvolumes
- * @this: AFR
- * @child: child
- */
+void
+afr_ta_lock_release_synctask(xlator_t *this)
+{
+ call_frame_t *ta_frame = NULL;
+ int ret = 0;
-static int
-find_child_index (xlator_t *this, xlator_t *child)
+ ta_frame = afr_ta_frame_create(this);
+ if (!ta_frame) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to create ta_frame");
+ return;
+ }
+
+ ret = synctask_new(this->ctx->env, afr_release_notify_lock_for_ta,
+ afr_ta_lock_release_done, ta_frame, this);
+ if (ret) {
+ STACK_DESTROY(ta_frame->root);
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to release "
+ "AFR_TA_DOM_NOTIFY lock.");
+ }
+}
+
+static void
+afr_handle_inodelk_contention(xlator_t *this, struct gf_upcall *upcall)
{
- afr_private_t *priv = NULL;
- int i = -1;
+ struct gf_upcall_inodelk_contention *lc = NULL;
+ unsigned int inmem_count = 0;
+ unsigned int onwire_count = 0;
+ afr_private_t *priv = this->private;
- priv = this->private;
+ lc = upcall->data;
- for (i = 0; i < priv->child_count; i++) {
- if ((xlator_t *) child == priv->children[i])
- break;
- }
+ if (strcmp(lc->domain, AFR_TA_DOM_NOTIFY) != 0)
+ return;
- return i;
+ if (priv->shd.iamshd) {
+ /* shd should ignore AFR_TA_DOM_NOTIFY release requests. */
+ return;
+ }
+ LOCK(&priv->lock);
+ {
+ if (priv->release_ta_notify_dom_lock == _gf_true) {
+ /* Ignore multiple release requests from shds.*/
+ UNLOCK(&priv->lock);
+ return;
+ }
+ priv->release_ta_notify_dom_lock = _gf_true;
+ inmem_count = priv->ta_in_mem_txn_count;
+ onwire_count = priv->ta_on_wire_txn_count;
+ }
+ UNLOCK(&priv->lock);
+ if (inmem_count || onwire_count)
+ /* lock release will happen in txn code path after
+ * in-memory or on-wire txns are over.*/
+ return;
+
+ afr_ta_lock_release_synctask(this);
+}
+
+static void
+afr_handle_upcall_event(xlator_t *this, struct gf_upcall *upcall)
+{
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+ afr_private_t *priv = this->private;
+ inode_t *inode = NULL;
+ inode_table_t *itable = NULL;
+ int i = 0;
+
+ switch (upcall->event_type) {
+ case GF_UPCALL_INODELK_CONTENTION:
+ afr_handle_inodelk_contention(this, upcall);
+ break;
+ case GF_UPCALL_CACHE_INVALIDATION:
+ up_ci = (struct gf_upcall_cache_invalidation *)upcall->data;
+
+ /* Since md-cache will be aggressively filtering
+ * lookups, the stale read issue will be more
+ * pronounced. Hence when a pending xattr is set notify
+ * all the md-cache clients to invalidate the existing
+ * stat cache and send the lookup next time */
+ if (!up_ci->dict)
+ break;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!dict_get(up_ci->dict, priv->pending_key[i]))
+ continue;
+ up_ci->flags |= UP_INVAL_ATTR;
+ itable = ((xlator_t *)this->graph->top)->itable;
+ /*Internal processes may not have itable for
+ *top xlator*/
+ if (itable)
+ inode = inode_find(itable, upcall->gfid);
+ if (inode)
+ afr_inode_need_refresh_set(inode, this);
+ break;
+ }
+ break;
+ default:
+ break;
+ }
}
int32_t
-afr_notify (xlator_t *this, int32_t event,
- void *data, void *data2)
-{
- afr_private_t *priv = NULL;
- int i = -1;
- int up_children = 0;
- int down_children = 0;
- int propagate = 0;
- int had_heard_from_all = 0;
- int have_heard_from_all = 0;
- int idx = -1;
- int ret = -1;
- int call_psh = 0;
- int up_child = AFR_ALL_CHILDREN;
- dict_t *input = NULL;
- dict_t *output = NULL;
-
- priv = this->private;
-
- if (!priv)
- return 0;
-
- /*
- * We need to reset this in case children come up in "staggered"
- * fashion, so that we discover a late-arriving local subvolume. Note
- * that we could end up issuing N lookups to the first subvolume, and
- * O(N^2) overall, but N is small for AFR so it shouldn't be an issue.
- */
- priv->did_discovery = _gf_false;
+afr_notify(xlator_t *this, int32_t event, void *data, void *data2)
+{
+ afr_private_t *priv = NULL;
+ xlator_t *child_xlator = NULL;
+ int i = -1;
+ int propagate = 0;
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ int idx = -1;
+ int ret = -1;
+ int call_psh = 0;
+ int up_child = -1;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
+ gf_boolean_t had_quorum = _gf_false;
+ gf_boolean_t has_quorum = _gf_false;
+ int64_t halo_max_latency_msec = 0;
+ int64_t child_latency_msec = -1;
+
+ child_xlator = (xlator_t *)data;
+
+ priv = this->private;
+
+ if (!priv)
+ return 0;
- had_heard_from_all = 1;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->last_event[i]) {
- had_heard_from_all = 0;
- }
+ /*
+ * We need to reset this in case children come up in "staggered"
+ * fashion, so that we discover a late-arriving local subvolume. Note
+ * that we could end up issuing N lookups to the first subvolume, and
+ * O(N^2) overall, but N is small for AFR so it shouldn't be an issue.
+ */
+ priv->did_discovery = _gf_false;
+
+ /* parent xlators don't need to know about every child_up, child_down
+ * because of afr ha. If all subvolumes go down, child_down has
+ * to be triggered. In that state when 1 subvolume comes up child_up
+ * needs to be triggered. dht optimizes revalidate lookup by sending
+ * it only to one of its subvolumes. When child up/down happens
+ * for afr's subvolumes dht should be notified by child_modified. The
+ * subsequent revalidate lookup happens on all the dht's subvolumes
+ * which triggers afr self-heals if any.
+ */
+ idx = afr_find_child_index(this, child_xlator);
+ if (idx < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_CHILD_UP,
+ "Received child_up from invalid subvolume");
+ goto out;
+ }
+
+ had_quorum = priv->quorum_count &&
+ afr_has_quorum(priv->child_up, this, NULL);
+ if (event == GF_EVENT_CHILD_PING) {
+ child_latency_msec = (int64_t)(uintptr_t)data2;
+ if (priv->halo_enabled) {
+ halo_max_latency_msec = afr_get_halo_latency(this);
+
+ /* Calculates the child latency and sets event
+ */
+ LOCK(&priv->lock);
+ {
+ __afr_handle_ping_event(this, child_xlator, idx,
+ halo_max_latency_msec, &event,
+ child_latency_msec);
+ }
+ UNLOCK(&priv->lock);
+ } else {
+ LOCK(&priv->lock);
+ {
+ priv->child_latency[idx] = child_latency_msec;
+ }
+ UNLOCK(&priv->lock);
}
+ }
- /* parent xlators dont need to know about every child_up, child_down
- * because of afr ha. If all subvolumes go down, child_down has
- * to be triggered. In that state when 1 subvolume comes up child_up
- * needs to be triggered. dht optimizes revalidate lookup by sending
- * it only to one of its subvolumes. When child up/down happens
- * for afr's subvolumes dht should be notified by child_modified. The
- * subsequent revalidate lookup happens on all the dht's subvolumes
- * which triggers afr self-heals if any.
+ if (event == GF_EVENT_CHILD_PING) {
+ /* This is the only xlator that handles PING, no reason to
+ * propagate.
*/
- idx = find_child_index (this, data);
- if (idx < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Received child_up "
- "from invalid subvolume");
- goto out;
+ goto out;
+ }
+
+ if (event == GF_EVENT_TRANSLATOR_OP) {
+ LOCK(&priv->lock);
+ {
+ had_heard_from_all = __get_heard_from_all_status(this);
+ }
+ UNLOCK(&priv->lock);
+
+ if (!had_heard_from_all) {
+ ret = -1;
+ } else {
+ input = data;
+ output = data2;
+ ret = afr_xl_op(this, input, output);
}
+ goto out;
+ }
+
+ if (event == GF_EVENT_UPCALL) {
+ afr_handle_upcall_event(this, data);
+ }
+ LOCK(&priv->lock);
+ {
+ had_heard_from_all = __get_heard_from_all_status(this);
switch (event) {
- case GF_EVENT_CHILD_UP:
- LOCK (&priv->lock);
- {
- /*
- * This only really counts if the child was never up
- * (value = -1) or had been down (value = 0). See
- * comment at GF_EVENT_CHILD_DOWN for a more detailed
- * explanation.
- */
- if (priv->child_up[idx] != 1) {
- priv->up_count++;
- }
- priv->child_up[idx] = 1;
-
- call_psh = 1;
- up_child = idx;
- for (i = 0; i < priv->child_count; i++)
- if (priv->child_up[i] == 1)
- up_children++;
- if (up_children == 1) {
- gf_log (this->name, GF_LOG_INFO,
- "Subvolume '%s' came back up; "
- "going online.", ((xlator_t *)data)->name);
- } else {
- event = GF_EVENT_CHILD_MODIFIED;
- }
-
- priv->last_event[idx] = event;
+ case GF_EVENT_PARENT_UP:
+ __afr_launch_notify_timer(this, priv);
+ propagate = 1;
+ break;
+ case GF_EVENT_CHILD_UP:
+ if (priv->thin_arbiter_count &&
+ (idx == AFR_CHILD_THIN_ARBITER)) {
+ priv->ta_child_up = 1;
+ priv->ta_event_gen++;
+ break;
}
- UNLOCK (&priv->lock);
-
+ __afr_handle_child_up_event(this, child_xlator, idx,
+ child_latency_msec, &event,
+ &call_psh, &up_child);
+ __afr_lock_heal_synctask(this, priv, idx);
break;
- case GF_EVENT_CHILD_DOWN:
- LOCK (&priv->lock);
- {
- /*
- * If a brick is down when we start, we'll get a
- * CHILD_DOWN to indicate its initial state. There
- * was never a CHILD_UP in this case, so if we
- * increment "down_count" the difference between than
- * and "up_count" will no longer be the number of
- * children that are currently up. This has serious
- * implications e.g. for quorum enforcement, so we
- * don't increment these values unless the event
- * represents an actual state transition between "up"
- * (value = 1) and anything else.
- */
- if (priv->child_up[idx] == 1) {
- priv->down_count++;
- }
- priv->child_up[idx] = 0;
-
- for (i = 0; i < priv->child_count; i++)
- if (priv->child_up[i] == 0)
- down_children++;
- if (down_children == priv->child_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "All subvolumes are down. Going offline "
- "until atleast one of them comes back up.");
- } else {
- event = GF_EVENT_CHILD_MODIFIED;
- }
-
- priv->last_event[idx] = event;
+ case GF_EVENT_CHILD_DOWN:
+ if (priv->thin_arbiter_count &&
+ (idx == AFR_CHILD_THIN_ARBITER)) {
+ priv->ta_child_up = 0;
+ priv->ta_event_gen++;
+ afr_ta_locked_priv_invalidate(priv);
+ break;
}
- UNLOCK (&priv->lock);
-
+ __afr_handle_child_down_event(this, child_xlator, idx,
+ child_latency_msec, &event,
+ &call_psh, &up_child);
+ __afr_mark_pending_lk_heal(this, priv, idx);
break;
- case GF_EVENT_CHILD_CONNECTING:
- LOCK (&priv->lock);
- {
- priv->last_event[idx] = event;
- }
- UNLOCK (&priv->lock);
+ case GF_EVENT_CHILD_CONNECTING:
+ priv->last_event[idx] = event;
break;
- case GF_EVENT_TRANSLATOR_OP:
- input = data;
- output = data2;
- ret = afr_xl_op (this, input, output);
- goto out;
+ case GF_EVENT_SOME_DESCENDENT_DOWN:
+ priv->last_event[idx] = event;
break;
-
- default:
+ default:
propagate = 1;
break;
}
+ have_heard_from_all = __get_heard_from_all_status(this);
+ if (!had_heard_from_all && have_heard_from_all) {
+ if (priv->timer) {
+ gf_timer_call_cancel(this->ctx, priv->timer);
+ priv->timer = NULL;
+ }
+ /* This is the first event which completes aggregation
+ of events from all subvolumes. If at least one subvol
+ had come up, propagate CHILD_UP, but only this time
+ */
+ event = GF_EVENT_CHILD_DOWN;
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
+ event = GF_EVENT_CHILD_UP;
+ break;
+ }
- /* have all subvolumes reported status once by now? */
- have_heard_from_all = 1;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->last_event[i])
- have_heard_from_all = 0;
+ if (priv->last_event[i] == GF_EVENT_CHILD_CONNECTING) {
+ event = GF_EVENT_CHILD_CONNECTING;
+ /* continue to check other events for CHILD_UP */
+ }
+ }
+ }
+ }
+ UNLOCK(&priv->lock);
+
+ if (priv->quorum_count) {
+ has_quorum = afr_has_quorum(priv->child_up, this, NULL);
+ if (!had_quorum && has_quorum) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_QUORUM_MET,
+ "Client-quorum is met");
+ gf_event(EVENT_AFR_QUORUM_MET, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
+ }
+ if (had_quorum && !has_quorum) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
+ "Client-quorum is not met");
+ gf_event(EVENT_AFR_QUORUM_FAIL, "client-pid=%d; subvol=%s",
+ this->ctx->cmd_args.client_pid, this->name);
+ }
+ }
+
+ /* if all subvols have reported status, no need to hide anything
+ or wait for anything else. Just propagate blindly */
+ if (have_heard_from_all)
+ propagate = 1;
+
+ ret = 0;
+ if (propagate)
+ ret = default_notify(this, event, data);
+
+ if ((!had_heard_from_all) || call_psh) {
+ /* Launch self-heal on all local subvolumes if:
+ * a) We have_heard_from_all for the first time
+ * b) Already heard from everyone, but we now got a child-up
+ * event.
+ */
+ if (have_heard_from_all) {
+ afr_selfheal_childup(this, priv);
}
+ }
+out:
+ return ret;
+}
- /* if all subvols have reported status, no need to hide anything
- or wait for anything else. Just propagate blindly */
- if (have_heard_from_all)
- propagate = 1;
+int
+afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
+{
+ int __ret = -1;
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+
+ __ret = syncbarrier_init(&local->barrier);
+ if (__ret) {
+ if (op_errno)
+ *op_errno = __ret;
+ goto out;
+ }
+
+ local->child_up = GF_MALLOC(priv->child_count * sizeof(*local->child_up),
+ gf_afr_mt_char);
+ if (!local->child_up) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ memcpy(local->child_up, priv->child_up,
+ sizeof(*local->child_up) * priv->child_count);
+ local->call_count = AFR_COUNT(local->child_up, priv->child_count);
+ if (local->call_count == 0) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, AFR_MSG_SUBVOLS_DOWN,
+ "no subvolumes up");
+ if (op_errno)
+ *op_errno = ENOTCONN;
+ goto out;
+ }
+
+ local->event_generation = priv->event_generation;
+
+ local->read_attempted = GF_CALLOC(priv->child_count, sizeof(char),
+ gf_afr_mt_char);
+ if (!local->read_attempted) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->readable = GF_CALLOC(priv->child_count, sizeof(char),
+ gf_afr_mt_char);
+ if (!local->readable) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->readable2 = GF_CALLOC(priv->child_count, sizeof(char),
+ gf_afr_mt_char);
+ if (!local->readable2) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->read_subvol = -1;
+
+ local->replies = GF_CALLOC(priv->child_count, sizeof(*local->replies),
+ gf_afr_mt_reply_t);
+ if (!local->replies) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->need_full_crawl = _gf_false;
+ if (priv->thin_arbiter_count) {
+ local->ta_child_up = priv->ta_child_up;
+ local->ta_failed_subvol = AFR_CHILD_UNKNOWN;
+ local->read_txn_query_child = AFR_CHILD_UNKNOWN;
+ local->ta_event_gen = priv->ta_event_gen;
+ local->fop_state = TA_SUCCESS;
+ }
+ local->is_new_entry = _gf_false;
+
+ INIT_LIST_HEAD(&local->healer);
+ return 0;
+out:
+ return -1;
+}
- if (!had_heard_from_all && have_heard_from_all) {
- /* This is the first event which completes aggregation
- of events from all subvolumes. If at least one subvol
- had come up, propagate CHILD_UP, but only this time
- */
- event = GF_EVENT_CHILD_DOWN;
-
- LOCK (&priv->lock);
- {
- up_children = afr_up_children_count (priv->child_up,
- priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- if (priv->last_event[i] == GF_EVENT_CHILD_UP) {
- event = GF_EVENT_CHILD_UP;
- break;
- }
-
- if (priv->last_event[i] ==
- GF_EVENT_CHILD_CONNECTING) {
- event = GF_EVENT_CHILD_CONNECTING;
- /* continue to check other events for CHILD_UP */
- }
- }
- }
- UNLOCK (&priv->lock);
- }
+int
+afr_internal_lock_init(afr_internal_lock_t *lk, size_t child_count)
+{
+ int ret = -ENOMEM;
- ret = 0;
- if (propagate)
- ret = default_notify (this, event, data);
- if (call_psh && priv->shd.iamshd)
- afr_proactive_self_heal ((void*) (long) up_child);
+ lk->lower_locked_nodes = GF_CALLOC(sizeof(*lk->lower_locked_nodes),
+ child_count, gf_afr_mt_char);
+ if (NULL == lk->lower_locked_nodes)
+ goto out;
+ lk->lock_op_ret = -1;
+ lk->lock_op_errno = EUCLEAN;
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-int
-afr_first_up_child (unsigned char *child_up, size_t child_count)
+void
+afr_matrix_cleanup(int32_t **matrix, unsigned int m)
{
- int ret = -1;
- int i = 0;
+ int i = 0;
- GF_ASSERT (child_up);
+ if (!matrix)
+ goto out;
+ for (i = 0; i < m; i++) {
+ GF_FREE(matrix[i]);
+ }
- for (i = 0; i < child_count; i++) {
- if (child_up[i]) {
- ret = i;
- break;
- }
- }
+ GF_FREE(matrix);
+out:
+ return;
+}
- return ret;
+int32_t **
+afr_matrix_create(unsigned int m, unsigned int n)
+{
+ int32_t **matrix = NULL;
+ int i = 0;
+
+ matrix = GF_CALLOC(sizeof(*matrix), m, gf_afr_mt_int32_t);
+ if (!matrix)
+ goto out;
+
+ for (i = 0; i < m; i++) {
+ matrix[i] = GF_CALLOC(sizeof(*matrix[i]), n, gf_afr_mt_int32_t);
+ if (!matrix[i])
+ goto out;
+ }
+ return matrix;
+out:
+ afr_matrix_cleanup(matrix, m);
+ return NULL;
}
int
-afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno)
+afr_transaction_local_init(afr_local_t *local, xlator_t *this)
+{
+ int ret = -ENOMEM;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ INIT_LIST_HEAD(&local->transaction.wait_list);
+ INIT_LIST_HEAD(&local->transaction.owner_list);
+ INIT_LIST_HEAD(&local->ta_waitq);
+ INIT_LIST_HEAD(&local->ta_onwireq);
+ ret = afr_internal_lock_init(&local->internal_lock, priv->child_count);
+ if (ret < 0)
+ goto out;
+
+ ret = -ENOMEM;
+ local->pre_op_compat = priv->pre_op_compat;
+
+ local->transaction.pre_op = GF_CALLOC(sizeof(*local->transaction.pre_op),
+ priv->child_count, gf_afr_mt_char);
+ if (!local->transaction.pre_op)
+ goto out;
+
+ local->transaction.changelog_xdata = GF_CALLOC(
+ sizeof(*local->transaction.changelog_xdata), priv->child_count,
+ gf_afr_mt_dict_t);
+ if (!local->transaction.changelog_xdata)
+ goto out;
+
+ if (priv->arbiter_count == 1) {
+ local->transaction.pre_op_sources = GF_CALLOC(
+ sizeof(*local->transaction.pre_op_sources), priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.pre_op_sources)
+ goto out;
+ }
+
+ local->transaction.failed_subvols = GF_CALLOC(
+ sizeof(*local->transaction.failed_subvols), priv->child_count,
+ gf_afr_mt_char);
+ if (!local->transaction.failed_subvols)
+ goto out;
+
+ local->pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+afr_set_low_priority(call_frame_t *frame)
{
- int ret = -1;
+ frame->root->pid = LOW_PRIO_PROC_PID;
+}
- local->op_ret = -1;
- local->op_errno = EUCLEAN;
-
- local->child_up = GF_CALLOC (priv->child_count,
- sizeof (*local->child_up),
- gf_afr_mt_char);
- if (!local->child_up) {
- if (op_errno)
- *op_errno = ENOMEM;
- goto out;
- }
+void
+afr_priv_destroy(afr_private_t *priv)
+{
+ int i = 0;
+ int child_count = -1;
- memcpy (local->child_up, priv->child_up,
- sizeof (*local->child_up) * priv->child_count);
- local->call_count = afr_up_children_count (local->child_up,
- priv->child_count);
- if (local->call_count == 0) {
- gf_log (THIS->name, GF_LOG_INFO, "no subvolumes up");
- if (op_errno)
- *op_errno = ENOTCONN;
- goto out;
- }
+ if (!priv)
+ goto out;
- local->child_errno = GF_CALLOC (priv->child_count,
- sizeof (*local->child_errno),
- gf_afr_mt_int32_t);
- if (!local->child_errno) {
- if (op_errno)
- *op_errno = ENOMEM;
- goto out;
- }
+ GF_FREE(priv->sh_domain);
+ GF_FREE(priv->last_event);
- ret = 0;
+ child_count = priv->child_count;
+ if (priv->thin_arbiter_count) {
+ child_count++;
+ }
+ if (priv->pending_key) {
+ for (i = 0; i < child_count; i++)
+ GF_FREE(priv->pending_key[i]);
+ }
+
+ GF_FREE(priv->pending_reads);
+ GF_FREE(priv->local);
+ GF_FREE(priv->pending_key);
+ GF_FREE(priv->children);
+ GF_FREE(priv->anon_inode);
+ GF_FREE(priv->child_up);
+ GF_FREE(priv->halo_child_up);
+ GF_FREE(priv->child_latency);
+ LOCK_DESTROY(&priv->lock);
+
+ GF_FREE(priv);
out:
- return ret;
+ return;
}
-int
-afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
- transaction_lk_type_t lk_type)
+int **
+afr_mark_pending_changelog(afr_private_t *priv, unsigned char *pending,
+ dict_t *xattr, ia_type_t iat)
{
- int ret = -ENOMEM;
+ int i = 0;
+ int **changelog = NULL;
+ int idx = -1;
+ int m_idx = 0;
+ int d_idx = 0;
+ int ret = 0;
- lk->inode_locked_nodes = GF_CALLOC (sizeof (*lk->inode_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->inode_locked_nodes)
- goto out;
+ m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION);
+ d_idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION);
- lk->entry_locked_nodes = GF_CALLOC (sizeof (*lk->entry_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->entry_locked_nodes)
- goto out;
+ idx = afr_index_from_ia_type(iat);
- lk->locked_nodes = GF_CALLOC (sizeof (*lk->locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->locked_nodes)
- goto out;
+ changelog = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog)
+ goto out;
- lk->lower_locked_nodes = GF_CALLOC (sizeof (*lk->lower_locked_nodes),
- child_count, gf_afr_mt_char);
- if (NULL == lk->lower_locked_nodes)
- goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!pending[i])
+ continue;
- lk->lock_op_ret = -1;
- lk->lock_op_errno = EUCLEAN;
- lk->transaction_lk_type = lk_type;
+ changelog[i][m_idx] = hton32(1);
+ if (idx != -1)
+ changelog[i][idx] = hton32(1);
+ /* If the newentry marking is on a newly created directory,
+ * then mark it with the full-heal indicator.
+ */
+ if ((IA_ISDIR(iat)) && (priv->esh_granular))
+ changelog[i][d_idx] = hton32(1);
+ }
+ ret = afr_set_pending_dict(priv, xattr, changelog);
+ if (ret < 0) {
+ afr_matrix_cleanup(changelog, priv->child_count);
+ return NULL;
+ }
+out:
+ return changelog;
+}
- ret = 0;
+static dict_t *
+afr_set_heal_info(char *status)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_sizen(dict, "heal-info", status);
+ if (ret)
+ gf_msg("", GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Failed to set heal-info key to "
+ "%s",
+ status);
out:
- return ret;
+ /* Any error other than EINVAL, dict_set_dynstr frees status */
+ if (ret == -ENOMEM || ret == -EINVAL) {
+ GF_FREE(status);
+ }
+
+ if (ret && dict) {
+ dict_unref(dict);
+ dict = NULL;
+ }
+ return dict;
}
-void
-afr_matrix_cleanup (int32_t **matrix, unsigned int m)
+static gf_boolean_t
+afr_is_dirty_count_non_unary_for_txn(xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type)
{
- int i = 0;
+ afr_private_t *priv = this->private;
+ int *dirty = alloca0(priv->child_count * sizeof(int));
+ int i = 0;
- if (!matrix)
- goto out;
- for (i = 0; i < m; i++) {
- GF_FREE (matrix[i]);
+ afr_selfheal_extract_xattr(this, replies, type, dirty, NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (dirty[i] > 1)
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+static gf_boolean_t
+afr_is_dirty_count_non_unary(xlator_t *this, struct afr_reply *replies,
+ ia_type_t ia_type)
+{
+ gf_boolean_t data_chk = _gf_false;
+ gf_boolean_t mdata_chk = _gf_false;
+ gf_boolean_t entry_chk = _gf_false;
+
+ switch (ia_type) {
+ case IA_IFDIR:
+ mdata_chk = _gf_true;
+ entry_chk = _gf_true;
+ break;
+ case IA_IFREG:
+ mdata_chk = _gf_true;
+ data_chk = _gf_true;
+ break;
+ default:
+ /*IA_IFBLK, IA_IFCHR, IA_IFLNK, IA_IFIFO, IA_IFSOCK*/
+ mdata_chk = _gf_true;
+ break;
+ }
+
+ if (data_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_DATA_TRANSACTION)) {
+ return _gf_true;
+ } else if (mdata_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_METADATA_TRANSACTION)) {
+ return _gf_true;
+ } else if (entry_chk && afr_is_dirty_count_non_unary_for_txn(
+ this, replies, AFR_ENTRY_TRANSACTION)) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+static int
+afr_update_heal_status(xlator_t *this, struct afr_reply *replies,
+ ia_type_t ia_type, gf_boolean_t *esh, gf_boolean_t *dsh,
+ gf_boolean_t *msh, unsigned char pending)
+{
+ int ret = -1;
+ GF_UNUSED int ret1 = 0;
+ int i = 0;
+ int io_domain_lk_count = 0;
+ int shd_domain_lk_count = 0;
+ afr_private_t *priv = NULL;
+ char *key1 = NULL;
+ char *key2 = NULL;
+
+ priv = this->private;
+ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(this->name));
+ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(priv->sh_domain));
+ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name);
+ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if ((replies[i].valid != 1) || (replies[i].op_ret != 0))
+ continue;
+ if (!io_domain_lk_count) {
+ ret1 = dict_get_int32(replies[i].xdata, key1, &io_domain_lk_count);
+ }
+ if (!shd_domain_lk_count) {
+ ret1 = dict_get_int32(replies[i].xdata, key2, &shd_domain_lk_count);
+ }
+ }
+
+ if (!pending) {
+ if ((afr_is_dirty_count_non_unary(this, replies, ia_type)) ||
+ (!io_domain_lk_count)) {
+ /* Needs heal. */
+ ret = 0;
+ } else {
+ /* No heal needed. */
+ *dsh = *esh = *msh = 0;
}
+ } else {
+ if (shd_domain_lk_count) {
+ ret = -EAGAIN; /*For 'possibly-healing'. */
+ } else {
+ ret = 0; /*needs heal. Just set a non -ve value so that it is
+ assumed as the source index.*/
+ }
+ }
+ return ret;
+}
- GF_FREE (matrix);
+/*return EIO, EAGAIN or pending*/
+int
+afr_lockless_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **inode, gf_boolean_t *entry_selfheal,
+ gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal, unsigned char *pending)
+{
+ int ret = -1;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ gf_boolean_t dsh = _gf_false;
+ gf_boolean_t msh = _gf_false;
+ gf_boolean_t esh = _gf_false;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *valid_on = NULL;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+ replies = alloca0(sizeof(*replies) * priv->child_count);
+ sources = alloca0(sizeof(*sources) * priv->child_count);
+ sinks = alloca0(sizeof(*sinks) * priv->child_count);
+ witness = alloca0(sizeof(*witness) * priv->child_count);
+ valid_on = alloca0(sizeof(*valid_on) * priv->child_count);
+
+ ret = afr_selfheal_unlocked_inspect(frame, this, gfid, inode, &dsh, &msh,
+ &esh, replies);
+ if (ret)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ valid_on[i] = 1;
+ }
+ }
+ if (msh) {
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_METADATA_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
+ goto out;
+ }
+ if (dsh) {
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_DATA_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
+ goto out;
+ }
+ if (esh) {
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_ENTRY_TRANSACTION, valid_on,
+ sources, sinks, witness, pending);
+ if (*pending & PFLAG_SBRAIN)
+ ret = -EIO;
+ if (ret)
+ goto out;
+ }
+
+ ret = afr_update_heal_status(this, replies, (*inode)->ia_type, &esh, &dsh,
+ &msh, *pending);
out:
- return;
+ *data_selfheal = dsh;
+ *entry_selfheal = esh;
+ *metadata_selfheal = msh;
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+ return ret;
}
-int32_t**
-afr_matrix_create (unsigned int m, unsigned int n)
-{
- int32_t **matrix = NULL;
- int i = 0;
-
- matrix = GF_CALLOC (sizeof (*matrix), m, gf_afr_mt_int32_t);
- if (!matrix)
+int
+afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ gf_boolean_t data_selfheal = _gf_false;
+ gf_boolean_t metadata_selfheal = _gf_false;
+ gf_boolean_t entry_selfheal = _gf_false;
+ unsigned char pending = 0;
+ dict_t *dict = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+ inode_t *inode = NULL;
+ char *substr = NULL;
+ char *status = NULL;
+ call_frame_t *heal_frame = NULL;
+ afr_local_t *heal_local = NULL;
+
+ /*Use frame with lk-owner set*/
+ heal_frame = afr_frame_create(frame->this, &op_errno);
+ if (!heal_frame) {
+ ret = -1;
+ goto out;
+ }
+ heal_local = heal_frame->local;
+ heal_frame->local = frame->local;
+
+ ret = afr_lockless_inspect(heal_frame, this, loc->gfid, &inode,
+ &entry_selfheal, &data_selfheal,
+ &metadata_selfheal, &pending);
+
+ if (ret == -ENOMEM) {
+ ret = -1;
+ goto out;
+ }
+
+ if (pending & PFLAG_PENDING) {
+ gf_asprintf(&substr, "-pending");
+ if (!substr)
+ goto out;
+ }
+
+ if (ret == -EIO) {
+ ret = gf_asprintf(&status, "split-brain%s", substr ? substr : "");
+ if (ret < 0) {
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ } else if (ret == -EAGAIN) {
+ ret = gf_asprintf(&status, "possibly-healing%s", substr ? substr : "");
+ if (ret < 0) {
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ } else if (ret >= 0) {
+ /* value of ret = source index
+ * so ret >= 0 and at least one of the 3 booleans set to
+ * true means a source is identified; heal is required.
+ */
+ if (!data_selfheal && !entry_selfheal && !metadata_selfheal) {
+ status = gf_strdup("no-heal");
+ if (!status) {
+ ret = -1;
goto out;
-
- for (i = 0; i < m; i++) {
- matrix[i] = GF_CALLOC (sizeof (*matrix[i]), n,
- gf_afr_mt_int32_t);
- if (!matrix[i])
- goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ } else {
+ ret = gf_asprintf(&status, "heal%s", substr ? substr : "");
+ if (ret < 0) {
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+ }
+ } else if (ret < 0) {
+ /* Apart from above checked -ve ret values, there are
+ * other possible ret values like ENOTCONN
+ * (returned when number of valid replies received are
+ * less than 2)
+ * in which case heal is required when one of the
+ * selfheal booleans is set.
+ */
+ if (data_selfheal || entry_selfheal || metadata_selfheal) {
+ ret = gf_asprintf(&status, "heal%s", substr ? substr : "");
+ if (ret < 0) {
+ goto out;
+ }
+ dict = afr_set_heal_info(status);
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
}
- return matrix;
+ }
+
+ ret = 0;
+ op_errno = 0;
+
out:
- afr_matrix_cleanup (matrix, m);
- return NULL;
+ if (heal_frame) {
+ heal_frame->local = heal_local;
+ AFR_STACK_DESTROY(heal_frame);
+ }
+ AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
+ if (dict)
+ dict_unref(dict);
+ if (inode)
+ inode_unref(inode);
+ GF_FREE(substr);
+ return ret;
+}
+
+int
+_afr_is_split_brain(call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies, afr_transaction_type type,
+ gf_boolean_t *spb)
+{
+ afr_private_t *priv = NULL;
+ uint64_t *witness = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ int sources_count = 0;
+ int ret = 0;
+
+ priv = this->private;
+
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ witness = alloca0(priv->child_count * sizeof(*witness));
+
+ ret = afr_selfheal_find_direction(frame, this, replies, type,
+ priv->child_up, sources, sinks, witness,
+ NULL);
+ if (ret)
+ return ret;
+
+ sources_count = AFR_COUNT(sources, priv->child_count);
+ if (!sources_count)
+ *spb = _gf_true;
+
+ return ret;
}
int
-afr_transaction_local_init (afr_local_t *local, xlator_t *this)
+afr_is_split_brain(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, gf_boolean_t *d_spb, gf_boolean_t *m_spb)
{
- int child_up_count = 0;
- int ret = -ENOMEM;
- afr_private_t *priv = NULL;
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
- priv = this->private;
- ret = afr_internal_lock_init (&local->internal_lock, priv->child_count,
- AFR_TRANSACTION_LK);
- if (ret < 0)
- goto out;
+ priv = this->private;
- ret = -ENOMEM;
- child_up_count = afr_up_children_count (local->child_up,
- priv->child_count);
- if (priv->optimistic_change_log && child_up_count == priv->child_count)
- local->optimistic_change_log = 1;
+ replies = alloca0(sizeof(*replies) * priv->child_count);
- local->first_up_child = afr_first_up_child (local->child_up,
- priv->child_count);
+ ret = afr_selfheal_unlocked_discover(frame, inode, gfid, replies);
+ if (ret)
+ goto out;
- local->transaction.eager_lock =
- GF_CALLOC (sizeof (*local->transaction.eager_lock),
- priv->child_count,
- gf_afr_mt_int32_t);
+ if (!afr_can_decide_split_brain_source_sinks(replies, priv->child_count)) {
+ ret = -EAGAIN;
+ goto out;
+ }
- if (!local->transaction.eager_lock)
- goto out;
+ ret = _afr_is_split_brain(frame, this, replies, AFR_DATA_TRANSACTION,
+ d_spb);
+ if (ret)
+ goto out;
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children)
- goto out;
+ ret = _afr_is_split_brain(frame, this, replies, AFR_METADATA_TRANSACTION,
+ m_spb);
+out:
+ if (replies) {
+ afr_replies_wipe(replies, priv->child_count);
+ replies = NULL;
+ }
+ return ret;
+}
- if (local->fd) {
- local->fd_open_on = GF_CALLOC (sizeof (*local->fd_open_on),
- priv->child_count,
- gf_afr_mt_char);
- if (!local->fd_open_on)
- goto out;
+int
+afr_get_split_brain_status_cbk(int ret, call_frame_t *frame, void *opaque)
+{
+ GF_FREE(opaque);
+ return 0;
+}
+
+int
+afr_get_split_brain_status(void *opaque)
+{
+ gf_boolean_t d_spb = _gf_false;
+ gf_boolean_t m_spb = _gf_false;
+ int ret = -1;
+ int op_errno = 0;
+ int i = 0;
+ char *choices = NULL;
+ char *status = NULL;
+ dict_t *dict = NULL;
+ inode_t *inode = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ loc_t *loc = NULL;
+ afr_spb_status_t *data = NULL;
+
+ data = opaque;
+ frame = data->frame;
+ this = frame->this;
+ loc = data->loc;
+ priv = this->private;
+ children = priv->children;
+
+ inode = afr_inode_find(this, loc->gfid);
+ if (!inode)
+ goto out;
+
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ /* Calculation for string length :
+ * (child_count X length of child-name) + SLEN(" Choices :")
+ * child-name consists of :
+ * a) 251 = max characters for volname according to GD_VOLUME_NAME_MAX
+ * b) strlen("-client-00,") assuming 16 replicas
+ */
+ choices = alloca0(priv->child_count * (256 + SLEN("-client-00,")) +
+ SLEN(" Choices:"));
+
+ ret = afr_is_split_brain(frame, this, inode, loc->gfid, &d_spb, &m_spb);
+ if (ret) {
+ op_errno = -ret;
+ if (ret == -EAGAIN) {
+ ret = dict_set_sizen_str_sizen(dict, GF_AFR_SBRAIN_STATUS,
+ SBRAIN_HEAL_NO_GO_MSG);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ AFR_MSG_DICT_SET_FAILED,
+ "Failed to set GF_AFR_SBRAIN_STATUS in dict");
+ }
+ }
+ ret = -1;
+ goto out;
+ }
+
+ if (d_spb || m_spb) {
+ sprintf(choices, " Choices:");
+ for (i = 0; i < priv->child_count; i++) {
+ strcat(choices, children[i]->name);
+ strcat(choices, ",");
}
+ choices[strlen(choices) - 1] = '\0';
- local->transaction.pre_op = GF_CALLOC (sizeof (*local->transaction.pre_op),
- priv->child_count,
- gf_afr_mt_char);
- if (!local->transaction.pre_op)
- goto out;
+ ret = gf_asprintf(&status,
+ "data-split-brain:%s "
+ "metadata-split-brain:%s%s",
+ (d_spb) ? "yes" : "no", (m_spb) ? "yes" : "no",
+ choices);
- local->pending = afr_matrix_create (priv->child_count,
- AFR_NUM_CHANGE_LOGS);
- if (!local->pending)
- goto out;
+ if (-1 == ret) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = dict_set_dynstr_sizen(dict, GF_AFR_SBRAIN_STATUS, status);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ } else {
+ ret = dict_set_sizen_str_sizen(dict, GF_AFR_SBRAIN_STATUS,
+ SFILE_NOT_UNDER_DATA);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ }
- local->transaction.txn_changelog = afr_matrix_create (priv->child_count,
- AFR_NUM_CHANGE_LOGS);
- if (!local->transaction.txn_changelog)
- goto out;
- ret = 0;
+ ret = 0;
out:
- return ret;
+ AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
+ if (dict)
+ dict_unref(dict);
+ if (inode)
+ inode_unref(inode);
+ return ret;
}
-void
-afr_reset_children (int32_t *fresh_children, int32_t child_count)
-{
- unsigned int i = 0;
- for (i = 0; i < child_count; i++)
- fresh_children[i] = -1;
+int32_t
+afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int ret = 0;
+ int op_errno = 0;
+ dict_t *dict = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *heal_local = NULL;
+ call_frame_t *heal_frame = NULL;
+
+ local = frame->local;
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ heal_frame = afr_frame_create(this, &op_errno);
+ if (!heal_frame) {
+ ret = -1;
+ goto out;
+ }
+ heal_local = heal_frame->local;
+ heal_frame->local = frame->local;
+ /*Initiate heal with heal_frame with lk-owner set so that inodelk/entrylk
+ * work correctly*/
+ ret = afr_selfheal_do(heal_frame, this, loc->gfid);
+
+ if (ret == 1 || ret == 2) {
+ ret = dict_set_sizen_str_sizen(dict, "sh-fail-msg",
+ SFILE_NOT_IN_SPLIT_BRAIN);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Failed to set sh-fail-msg in dict");
+ ret = 0;
+ goto out;
+ } else {
+ if (local->xdata_rsp) {
+ /* 'sh-fail-msg' has been set in the dict during self-heal.*/
+ dict_copy(local->xdata_rsp, dict);
+ ret = 0;
+ } else if (ret < 0) {
+ op_errno = -ret;
+ ret = -1;
+ }
+ }
+
+out:
+ if (heal_frame) {
+ heal_frame->local = heal_local;
+ AFR_STACK_DESTROY(heal_frame);
+ }
+ if (local->op == GF_FOP_GETXATTR)
+ AFR_STACK_UNWIND(getxattr, frame, ret, op_errno, dict, NULL);
+ else if (local->op == GF_FOP_SETXATTR)
+ AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
+ if (dict)
+ dict_unref(dict);
+ return ret;
}
-int32_t*
-afr_children_create (int32_t child_count)
+int
+afr_get_child_index_from_name(xlator_t *this, char *name)
{
- int32_t *children = NULL;
- int i = 0;
-
- GF_ASSERT (child_count > 0);
+ afr_private_t *priv = this->private;
+ int index = -1;
- children = GF_CALLOC (child_count, sizeof (*children),
- gf_afr_mt_int32_t);
- if (NULL == children)
- goto out;
- for (i = 0; i < child_count; i++)
- children[i] = -1;
+ for (index = 0; index < priv->child_count; index++) {
+ if (!strcmp(priv->children[index]->name, name))
+ goto out;
+ }
+ index = -1;
out:
- return children;
+ return index;
}
void
-afr_children_add_child (int32_t *children, int32_t child,
- int32_t child_count)
-{
- gf_boolean_t child_found = _gf_false;
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (children[i] == -1)
- break;
- if (children[i] == child) {
- child_found = _gf_true;
- break;
- }
- }
-
- if (!child_found) {
- GF_ASSERT (i < child_count);
- children[i] = child;
- }
+afr_priv_need_heal_set(afr_private_t *priv, gf_boolean_t need_heal)
+{
+ LOCK(&priv->lock);
+ {
+ priv->need_heal = need_heal;
+ }
+ UNLOCK(&priv->lock);
}
void
-afr_children_rm_child (int32_t *children, int32_t child, int32_t child_count)
-{
- int i = 0;
-
- GF_ASSERT ((child >= 0) && (child < child_count));
- for (i = 0; i < child_count; i++) {
- if (children[i] == -1)
- break;
- if (children[i] == child) {
- if (i != (child_count - 1))
- memmove (children + i, children + i + 1,
- sizeof (*children)*(child_count - i - 1));
- children[child_count - 1] = -1;
- break;
- }
+afr_set_need_heal(xlator_t *this, afr_local_t *local)
+{
+ int i = 0;
+ afr_private_t *priv = this->private;
+ gf_boolean_t need_heal = _gf_false;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].need_heal) {
+ need_heal = _gf_true;
+ break;
}
+ }
+ afr_priv_need_heal_set(priv, need_heal);
+ return;
}
-int
-afr_get_children_count (int32_t *children, unsigned int child_count)
+gf_boolean_t
+afr_get_need_heal(xlator_t *this)
{
- int count = 0;
- int i = 0;
+ afr_private_t *priv = this->private;
+ gf_boolean_t need_heal = _gf_true;
- for (i = 0; i < child_count; i++) {
- if (children[i] == -1)
- break;
- count++;
- }
- return count;
+ LOCK(&priv->lock);
+ {
+ need_heal = priv->need_heal;
+ }
+ UNLOCK(&priv->lock);
+ return need_heal;
}
-void
-afr_set_low_priority (call_frame_t *frame)
+int
+afr_get_msg_id(char *op_type)
{
- frame->root->pid = LOW_PRIO_PROC_PID;
+ if (!strcmp(op_type, GF_AFR_REPLACE_BRICK))
+ return AFR_MSG_REPLACE_BRICK_STATUS;
+ else if (!strcmp(op_type, GF_AFR_ADD_BRICK))
+ return AFR_MSG_ADD_BRICK_STATUS;
+ return -1;
}
int
-afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
- int flags)
+afr_fav_child_reset_sink_xattrs_cbk(int ret, call_frame_t *heal_frame,
+ void *opaque)
{
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
+ call_frame_t *txn_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *heal_local = NULL;
+ xlator_t *this = NULL;
- GF_ASSERT (fd && fd->inode);
- ret = afr_fd_ctx_set (this, fd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set fd ctx for fd=%p", fd);
- goto out;
+ heal_local = heal_frame->local;
+ txn_frame = heal_local->heal_frame;
+ local = txn_frame->local;
+ this = txn_frame->this;
+
+ /* Refresh the inode agan and proceed with the transaction.*/
+ afr_inode_refresh(txn_frame, this, local->inode, NULL, local->refreshfn);
+
+ AFR_STACK_DESTROY(heal_frame);
+
+ return 0;
+}
+
+int
+afr_fav_child_reset_sink_xattrs(void *opaque)
+{
+ call_frame_t *heal_frame = NULL;
+ call_frame_t *txn_frame = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t d_spb = _gf_false;
+ gf_boolean_t m_spb = _gf_false;
+ afr_local_t *heal_local = NULL;
+ afr_local_t *txn_local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ unsigned char *locked_on = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
+ struct afr_reply *locked_replies = NULL;
+ int ret = 0;
+
+ heal_frame = (call_frame_t *)opaque;
+ heal_local = heal_frame->local;
+ txn_frame = heal_local->heal_frame;
+ txn_local = txn_frame->local;
+ this = txn_frame->this;
+ inode = txn_local->inode;
+ priv = this->private;
+ locked_on = alloca0(priv->child_count);
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+ undid_pending = alloca0(priv->child_count);
+ locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
+
+ ret = _afr_is_split_brain(txn_frame, this, txn_local->replies,
+ AFR_DATA_TRANSACTION, &d_spb);
+
+ ret = _afr_is_split_brain(txn_frame, this, txn_local->replies,
+ AFR_METADATA_TRANSACTION, &m_spb);
+
+ /* Take appropriate locks and reset sink xattrs. */
+ if (d_spb) {
+ ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name, 0, 0,
+ locked_on);
+ {
+ if (ret < priv->child_count)
+ goto data_unlock;
+ ret = __afr_selfheal_data_prepare(
+ heal_frame, this, inode, locked_on, sources, sinks,
+ healed_sinks, undid_pending, locked_replies, NULL);
+ }
+ data_unlock:
+ afr_selfheal_uninodelk(heal_frame, this, inode, this->name, 0, 0,
+ locked_on);
+ }
+
+ if (m_spb) {
+ memset(locked_on, 0, sizeof(*locked_on) * priv->child_count);
+ memset(undid_pending, 0, sizeof(*undid_pending) * priv->child_count);
+ ret = afr_selfheal_inodelk(heal_frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, locked_on);
+ {
+ if (ret < priv->child_count)
+ goto mdata_unlock;
+ ret = __afr_selfheal_metadata_prepare(
+ heal_frame, this, inode, locked_on, sources, sinks,
+ healed_sinks, undid_pending, locked_replies, NULL);
}
+ mdata_unlock:
+ afr_selfheal_uninodelk(heal_frame, this, inode, this->name,
+ LLONG_MAX - 1, 0, locked_on);
+ }
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not get fd ctx for fd=%p", fd);
+ return ret;
+}
+
+/*
+ * Concatenates the xattrs in local->replies separated by a delimiter.
+ */
+int
+afr_serialize_xattrs_with_delimiter(call_frame_t *frame, xlator_t *this,
+ char *buf, const char *default_str,
+ int32_t *serz_len, char delimiter)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ char *xattr = NULL;
+ int i = 0;
+ int len = 0;
+ int keylen = 0;
+ size_t str_len = 0;
+ int ret = -1;
+
+ priv = this->private;
+ local = frame->local;
+
+ keylen = strlen(local->cont.getxattr.name);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid || local->replies[i].op_ret) {
+ str_len = strlen(default_str);
+ buf = strncat(buf, default_str, str_len);
+ len += str_len;
+ buf[len++] = delimiter;
+ buf[len] = '\0';
+ } else {
+ ret = dict_get_strn(local->replies[i].xattr,
+ local->cont.getxattr.name, keylen, &xattr);
+ if (ret) {
+ gf_msg("TEST", GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "Failed to get the node_uuid of brick "
+ "%d",
+ i);
goto out;
- }
+ }
+ str_len = strlen(xattr);
+ buf = strncat(buf, xattr, str_len);
+ len += str_len;
+ buf[len++] = delimiter;
+ buf[len] = '\0';
+ }
+ }
+ buf[--len] = '\0'; /*remove the last delimiter*/
+ if (serz_len)
+ *serz_len = ++len;
+ ret = 0;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- fd_ctx->opened_on[child] = AFR_FD_OPENED;
- if (!IA_ISDIR (fd->inode->ia_type)) {
- fd_ctx->flags = flags;
- }
- ret = 0;
out:
- return ret;
+ return ret;
+}
+
+uint64_t
+afr_write_subvol_get(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ uint64_t write_subvol = 0;
+
+ local = frame->local;
+ LOCK(&local->inode->lock);
+ write_subvol = local->inode_ctx->write_subvol;
+ UNLOCK(&local->inode->lock);
+
+ return write_subvol;
+}
+
+int
+afr_write_subvol_set(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *data_accused = NULL;
+ unsigned char *metadata_accused = NULL;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ uint16_t datamap = 0;
+ uint16_t metadatamap = 0;
+ uint64_t val = 0;
+ int event = 0;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ data_accused = alloca0(priv->child_count);
+ metadata_accused = alloca0(priv->child_count);
+ data_readable = alloca0(priv->child_count);
+ metadata_readable = alloca0(priv->child_count);
+ event = local->event_generation;
+
+ afr_readables_fill(frame, this, local->inode, data_accused,
+ metadata_accused, data_readable, metadata_readable,
+ NULL);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (data_readable[i])
+ datamap |= (1 << i);
+ if (metadata_readable[i])
+ metadatamap |= (1 << i);
+ }
+
+ val = ((uint64_t)metadatamap) | (((uint64_t)datamap) << 16) |
+ (((uint64_t)event) << 32);
+
+ LOCK(&local->inode->lock);
+ {
+ if (local->inode_ctx->write_subvol == 0 &&
+ local->transaction.type == AFR_DATA_TRANSACTION) {
+ local->inode_ctx->write_subvol = val;
+ }
+ }
+ UNLOCK(&local->inode->lock);
+
+ return 0;
+}
+
+int
+afr_write_subvol_reset(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ LOCK(&local->inode->lock);
+ {
+ GF_ASSERT(local->inode_ctx->lock_count > 0);
+ local->inode_ctx->lock_count--;
+
+ if (!local->inode_ctx->lock_count)
+ local->inode_ctx->write_subvol = 0;
+ }
+ UNLOCK(&local->inode->lock);
+
+ return 0;
+}
+
+int
+afr_set_inode_local(xlator_t *this, afr_local_t *local, inode_t *inode)
+{
+ int ret = 0;
+
+ local->inode = inode_ref(inode);
+ LOCK(&local->inode->lock);
+ {
+ ret = __afr_inode_ctx_get(this, local->inode, &local->inode_ctx);
+ }
+ UNLOCK(&local->inode->lock);
+ if (ret < 0) {
+ gf_msg_callingfn(
+ this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_INODE_CTX_GET_FAILED,
+ "Error getting inode ctx %s", uuid_utoa(local->inode->gfid));
+ }
+ return ret;
}
gf_boolean_t
-afr_have_quorum (char *logname, afr_private_t *priv)
+afr_ta_is_fop_called_from_synctask(xlator_t *this)
{
- unsigned int quorum = 0;
+ struct synctask *task = NULL;
+ gf_lkowner_t tmp_owner = {
+ 0,
+ };
- GF_VALIDATE_OR_GOTO(logname,priv,out);
+ task = synctask_get();
+ if (!task)
+ return _gf_false;
- quorum = priv->quorum_count;
- if (quorum != AFR_QUORUM_AUTO) {
- return (priv->up_count >= (priv->down_count + quorum));
- }
+ set_lk_owner_from_ptr(&tmp_owner, (void *)this);
- quorum = priv->child_count / 2 + 1;
- if (priv->up_count >= (priv->down_count + quorum)) {
- return _gf_true;
- }
+ if (!is_same_lkowner(&tmp_owner, &task->frame->root->lk_owner))
+ return _gf_false;
- /*
- * Special case for even numbers of nodes: if we have exactly half
- * and that includes the first ("senior-most") node, then that counts
- * as quorum even if it wouldn't otherwise. This supports e.g. N=2
- * while preserving the critical property that there can only be one
- * such group.
- */
- if ((priv->child_count % 2) == 0) {
- quorum = priv->child_count / 2;
- if (priv->up_count >= (priv->down_count + quorum)) {
- if (priv->child_up[0]) {
- return _gf_true;
- }
- }
- }
+ return _gf_true;
+}
+int
+afr_ta_post_op_lock(xlator_t *this, loc_t *loc)
+{
+ int ret = 0;
+ uuid_t gfid = {
+ 0,
+ };
+ afr_private_t *priv = this->private;
+ gf_boolean_t locked = _gf_false;
+ struct gf_flock flock1 = {
+ 0,
+ };
+ struct gf_flock flock2 = {
+ 0,
+ };
+ int32_t cmd = 0;
+
+ /* Clients must take AFR_TA_DOM_NOTIFY lock only when the previous lock
+ * has been released in afr_notify due to upcall notification from shd.
+ */
+ GF_ASSERT(priv->ta_notify_dom_lock_offset == 0);
+
+ if (!priv->shd.iamshd)
+ GF_ASSERT(afr_ta_is_fop_called_from_synctask(this));
+ flock1.l_type = F_WRLCK;
+
+ while (!locked) {
+ if (priv->shd.iamshd) {
+ cmd = F_SETLKW;
+ flock1.l_start = 0;
+ flock1.l_len = 0;
+ } else {
+ cmd = F_SETLK;
+ gf_uuid_generate(gfid);
+ flock1.l_start = gfid_to_ino(gfid);
+ if (flock1.l_start < 0)
+ flock1.l_start = -flock1.l_start;
+ flock1.l_len = 1;
+ }
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, loc, cmd, &flock1, NULL, NULL);
+ if (!ret) {
+ locked = _gf_true;
+ priv->ta_notify_dom_lock_offset = flock1.l_start;
+ } else if (ret == -EAGAIN) {
+ continue;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to get "
+ "AFR_TA_DOM_NOTIFY lock on %s.",
+ loc->name);
+ goto out;
+ }
+ }
+
+ flock2.l_type = F_WRLCK;
+ flock2.l_start = 0;
+ flock2.l_len = 0;
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_MODIFY, loc, F_SETLKW, &flock2, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to get AFR_TA_DOM_MODIFY lock on %s.", loc->name);
+ flock1.l_type = F_UNLCK;
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, loc, F_SETLK, &flock1, NULL,
+ NULL);
+ }
+out:
+ return ret;
+}
+
+int
+afr_ta_post_op_unlock(xlator_t *this, loc_t *loc)
+{
+ afr_private_t *priv = this->private;
+ struct gf_flock flock = {
+ 0,
+ };
+ int ret = 0;
+
+ if (!priv->shd.iamshd)
+ GF_ASSERT(afr_ta_is_fop_called_from_synctask(this));
+ flock.l_type = F_UNLCK;
+ flock.l_start = 0;
+ flock.l_len = 0;
+
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_MODIFY, loc, F_SETLK, &flock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to unlock AFR_TA_DOM_MODIFY lock.");
+ goto out;
+ }
+
+ if (!priv->shd.iamshd)
+ /* Mounts (clients) will not release the AFR_TA_DOM_NOTIFY lock
+ * in post-op as they use it as a notification mechanism. When
+ * shd sends a lock request on TA during heal, the clients will
+ * receive a lock-contention upcall notification upon which they
+ * will release the AFR_TA_DOM_NOTIFY lock after completing the
+ * in flight I/O.*/
+ goto out;
+
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, loc, F_SETLK, &flock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to unlock AFR_TA_DOM_NOTIFY lock.");
+ }
out:
+ return ret;
+}
+
+call_frame_t *
+afr_ta_frame_create(xlator_t *this)
+{
+ call_frame_t *frame = NULL;
+ void *lk_owner = NULL;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ return NULL;
+ lk_owner = (void *)this;
+ afr_set_lk_owner(frame, this, lk_owner);
+ return frame;
+}
+
+gf_boolean_t
+afr_ta_has_quorum(afr_private_t *priv, afr_local_t *local)
+{
+ int data_count = 0;
+
+ data_count = AFR_COUNT(local->child_up, priv->child_count);
+ if (data_count == 2) {
+ return _gf_true;
+ } else if (data_count == 1 && local->ta_child_up) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+static gf_boolean_t
+afr_is_add_replica_mount_lookup_on_root(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+
+ if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT)
return _gf_false;
+
+ local = frame->local;
+
+ if (local->op != GF_FOP_LOOKUP)
+ /* TODO:If the replica count is being increased on a plain distribute
+ * volume that was never mounted, we need to allow setxattr on '/' with
+ * GF_CLIENT_PID_NO_ROOT_SQUASH to accomodate for DHT layout setting */
+ return _gf_false;
+
+ if (local->inode == NULL)
+ return _gf_false;
+
+ if (!__is_root_gfid(local->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
}
-void
-afr_priv_destroy (afr_private_t *priv)
+gf_boolean_t
+afr_lookup_has_quorum(call_frame_t *frame, const unsigned int up_children_count)
{
- int i = 0;
+ if (frame && (up_children_count > 0) &&
+ afr_is_add_replica_mount_lookup_on_root(frame))
+ return _gf_true;
- if (!priv)
- goto out;
- inode_unref (priv->root_inode);
- GF_FREE (priv->shd.pos);
- GF_FREE (priv->shd.pending);
- GF_FREE (priv->shd.inprogress);
-// for (i = 0; i < priv->child_count; i++)
-// if (priv->shd.timer && priv->shd.timer[i])
-// gf_timer_call_cancel (this->ctx, priv->shd.timer[i]);
- GF_FREE (priv->shd.timer);
-
- if (priv->shd.healed)
- eh_destroy (priv->shd.healed);
-
- if (priv->shd.heal_failed)
- eh_destroy (priv->shd.heal_failed);
-
- if (priv->shd.split_brain)
- eh_destroy (priv->shd.split_brain);
-
- GF_FREE (priv->last_event);
- if (priv->pending_key) {
- for (i = 0; i < priv->child_count; i++)
- GF_FREE (priv->pending_key[i]);
- }
- GF_FREE (priv->pending_key);
- GF_FREE (priv->children);
- GF_FREE (priv->child_up);
- LOCK_DESTROY (&priv->lock);
- LOCK_DESTROY (&priv->read_child_lock);
- pthread_mutex_destroy (&priv->mutex);
- GF_FREE (priv);
-out:
- return;
+ return _gf_false;
}
-int
-xlator_subvolume_count (xlator_t *this)
+void
+afr_handle_replies_quorum(call_frame_t *frame, xlator_t *this)
{
- int i = 0;
- xlator_list_t *list = NULL;
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ unsigned char *success_replies = NULL;
+
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+
+ if (priv->quorum_count && !afr_has_quorum(success_replies, this, NULL)) {
+ local->op_errno = afr_final_errno(local, priv);
+ if (!local->op_errno)
+ local->op_errno = afr_quorum_errno(priv);
+ local->op_ret = -1;
+ }
+}
+
+gf_boolean_t
+afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv, int child)
+{
+ int *pending = NULL;
+ int ret = 0;
+ int i = 0;
+
+ ret = dict_get_ptr(dict, priv->pending_key[child], (void *)&pending);
+ if (ret == 0) {
+ for (i = 0; i < AFR_NUM_CHANGE_LOGS; i++) {
+ /* Not doing a ntoh32(pending) as we just want to check
+ * if it is non-zero or not. */
+ if (pending[i]) {
+ return _gf_true;
+ }
+ }
+ }
- for (list = this->children; list; list = list->next)
- i++;
- return i;
+ return _gf_false;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.c b/xlators/cluster/afr/src/afr-dir-read.c
index ce91ffba729..f8bf8340dab 100644
--- a/xlators/cluster/afr/src/afr-dir-read.c
+++ b/xlators/cluster/afr/src/afr-dir-read.c
@@ -8,704 +8,339 @@
cases as published by the Free Software Foundation.
*/
-
#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
#include <string.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "checksum.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/list.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
#include "afr.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-int
-afr_examine_dir_sh_unwind (call_frame_t *frame, xlator_t *this, int32_t op_ret,
- int32_t op_errno, int32_t sh_failed)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- afr_set_opendir_done (this, local->fd->inode);
-
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd, NULL);
-
- return 0;
-}
-
-
-gf_boolean_t
-__checksums_differ (uint32_t *checksum, int child_count,
- unsigned char *child_up)
-{
- int ret = _gf_false;
- int i = 0;
- uint32_t cksum = 0;
- gf_boolean_t activate_check = _gf_false;
-
- for (i = 0; i < child_count; i++) {
- if (!child_up[i])
- continue;
- if (_gf_false == activate_check) {
- cksum = checksum[i];
- activate_check = _gf_true;
- continue;
- }
-
- if (cksum != checksum[i]) {
- ret = _gf_true;
- break;
- }
-
- cksum = checksum[i];
- }
-
- return ret;
-}
-
+#include "afr-transaction.h"
int32_t
-afr_examine_dir_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
+afr_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- gf_dirent_t * entry = NULL;
- gf_dirent_t * tmp = NULL;
- char *reason = NULL;
- int child_index = 0;
- uint32_t entry_cksum = 0;
- int call_count = 0;
- off_t last_offset = 0;
- inode_t *inode = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- inode = local->fd->inode;
-
- child_index = (long) cookie;
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int32_t child_index = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to do opendir on %s",
- local->loc.path, priv->children[child_index]->name);
- local->op_ret = -1;
- local->op_ret = op_errno;
- goto out;
- }
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: no entries found in %s",
- local->loc.path, priv->children[child_index]->name);
- goto out;
- }
+ local = frame->local;
+ fd_ctx = local->fd_ctx;
+ child_index = (long)cookie;
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- entry_cksum = gf_rsync_weak_checksum ((unsigned char *)entry->d_name,
- strlen (entry->d_name));
- local->cont.opendir.checksum[child_index] ^= entry_cksum;
- }
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ } else {
+ local->op_ret = op_ret;
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
}
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- /* read more entries */
-
- STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->readdir,
- local->fd, 131072, last_offset, NULL);
-
- return 0;
-
-out:
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (__checksums_differ (local->cont.opendir.checksum,
- priv->child_count,
- local->child_up)) {
-
- sh->do_entry_self_heal = _gf_true;
- sh->forced_merge = _gf_true;
-
- reason = "checksums of directory differ";
- afr_launch_self_heal (frame, this, inode, _gf_false,
- inode->ia_type, reason, NULL,
- afr_examine_dir_sh_unwind);
- } else {
- afr_set_opendir_done (this, inode);
-
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd, NULL);
- }
- }
+ if (call_count == 0) {
+ afr_handle_replies_quorum(frame, this);
+ AFR_STACK_UNWIND(opendir, frame, local->op_ret, local->op_errno,
+ local->fd, NULL);
+ }
- return 0;
+ return 0;
}
-
int
-afr_examine_dir (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int i = 0;
- int call_count = 0;
-
- local = frame->local;
- priv = this->private;
-
- local->cont.opendir.checksum = GF_CALLOC (priv->child_count,
- sizeof (*local->cont.opendir.checksum),
- gf_afr_mt_int32_t);
-
- call_count = afr_up_children_count (local->child_up, priv->child_count);
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_examine_dir_readdir_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->readdir,
- local->fd, 131072, 0, NULL);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-afr_opendir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd, dict_t *xdata)
+afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int32_t up_children_count = 0;
- int ret = -1;
- int call_count = -1;
- int32_t child_index = 0;
-
- priv = this->private;
- local = frame->local;
- child_index = (long) cookie;
-
- up_children_count = afr_up_children_count (local->child_up,
- priv->child_count);
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- ret = afr_child_fd_ctx_set (this, fd, child_index, 0);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
- }
-
- local->op_errno = op_errno;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (local->op_ret != 0)
- goto out;
-
- if (!afr_is_opendir_done (this, local->fd->inode) &&
- up_children_count > 1) {
-
- /*
- * This is the first opendir on this inode. We need
- * to check if the directory's entries are the same
- * on all subvolumes. This is needed in addition
- * to regular entry self-heal because the readdir
- * call is sent only to the first subvolume, and
- * thus files that exist only there will never be healed
- * otherwise (assuming changelog shows no anomalies).
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "reading contents of directory %s looking for mismatch",
- local->loc.path);
-
- afr_examine_dir (frame, this);
-
- } else {
- /* do the unwind */
- goto out;
- }
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = -1;
+ int32_t op_errno = ENOMEM;
+ afr_fd_ctx_t *fd_ctx = NULL;
-out:
- AFR_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd, NULL);
-
- return 0;
-}
-
-
-int32_t
-afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int child_count = 0;
- int i = 0;
- int ret = -1;
- int call_count = -1;
- int32_t op_errno = 0;
+ priv = this->private;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- priv = this->private;
+ local->op = GF_FOP_OPENDIR;
- child_count = priv->child_count;
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ op_errno = afr_quorum_errno(priv);
+ goto out;
+ }
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx)
+ goto out;
- loc_copy (&local->loc, loc);
+ loc_copy(&local->loc, loc);
- local->fd = fd_ref (fd);
+ local->fd = fd_ref(fd);
+ local->fd_ctx = fd_ctx;
- call_count = local->call_count;
+ call_count = local->call_count;
- for (i = 0; i < child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_opendir_cbk,
- (void*) (long) i,
- priv->children[i],
- priv->children[i]->fops->opendir,
- loc, fd, NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, afr_opendir_cbk, (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->opendir, loc, fd, NULL);
- if (!--call_count)
- break;
- }
+ if (!--call_count)
+ break;
}
+ }
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (opendir, frame, -1, op_errno, fd, NULL);
-
- return 0;
+ AFR_STACK_UNWIND(opendir, frame, -1, op_errno, fd, NULL);
+ return 0;
}
-
-/**
- * Common algorithm for directory read calls:
- *
- * - Try the fop on the first child that is up
- * - if we have failed due to ENOTCONN:
- * try the next child
- *
- * Applicable to: readdir
- */
-
-
-struct entry_name {
- char *name;
- struct list_head list;
-};
-
-
-static gf_boolean_t
-remembered_name (const char *name, struct list_head *entries)
+static int
+afr_validate_read_subvol(inode_t *inode, xlator_t *this, int par_read_subvol)
{
- struct entry_name *e = NULL;
- gf_boolean_t ret = _gf_false;
-
- list_for_each_entry (e, entries, list) {
- if (!strcmp (name, e->name)) {
- ret = _gf_true;
- goto out;
- }
- }
+ int gen = 0;
+ int entry_read_subvol = 0;
+ unsigned char *data_readable = NULL;
+ unsigned char *metadata_readable = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ data_readable = alloca0(priv->child_count);
+ metadata_readable = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_get(inode, this, data_readable, metadata_readable,
+ &gen);
+
+ if (gen != priv->event_generation || !data_readable[par_read_subvol] ||
+ !metadata_readable[par_read_subvol])
+ return -1;
+
+ /* Once the control reaches the following statement, it means that the
+ * parent's read subvol is perfectly readable. So calling
+ * either afr_data_subvol_get() or afr_metadata_subvol_get() would
+ * yield the same result. Hence, choosing afr_data_subvol_get() below.
+ */
+
+ if (!priv->consistent_metadata)
+ return 0;
-out:
- return ret;
+ /* For an inode fetched through readdirp which is yet to be linked,
+ * inode ctx would not be initialised (yet). So this function returns
+ * -1 above due to gen being 0, which is why it is OK to pass NULL for
+ * read_subvol_args here.
+ */
+ entry_read_subvol = afr_data_subvol_get(inode, this, NULL, NULL, NULL,
+ NULL);
+ if (entry_read_subvol != par_read_subvol)
+ return -1;
+
+ return 0;
}
-
static void
-afr_remember_entries (gf_dirent_t *entries, fd_t *fd)
+afr_readdir_transform_entries(call_frame_t *frame, gf_dirent_t *subvol_entries,
+ int subvol, gf_dirent_t *entries, fd_t *fd)
{
- struct entry_name *n = NULL;
- gf_dirent_t *entry = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return;
+ int ret = -1;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t need_heal = _gf_false;
+ gf_boolean_t validate_subvol = _gf_false;
+
+ this = THIS;
+ priv = this->private;
+
+ need_heal = afr_get_need_heal(this);
+ validate_subvol = need_heal | priv->consistent_metadata;
+
+ list_for_each_entry_safe(entry, tmp, &subvol_entries->list, list)
+ {
+ if (afr_is_private_directory(priv, fd->inode->gfid, entry->d_name,
+ frame->root->pid)) {
+ continue;
}
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ list_del_init(&entry->list);
+ list_add_tail(&entry->list, &entries->list);
- list_for_each_entry (entry, &entries->list, list) {
- n = GF_CALLOC (1, sizeof (*n), gf_afr_mt_entry_name);
- n->name = gf_strdup (entry->d_name);
- INIT_LIST_HEAD (&n->list);
+ if (!validate_subvol)
+ continue;
- list_add (&n->list, &fd_ctx->entries);
+ if (entry->inode) {
+ ret = afr_validate_read_subvol(entry->inode, this, subvol);
+ if (ret == -1) {
+ inode_unref(entry->inode);
+ entry->inode = NULL;
+ continue;
+ }
}
+ }
}
-
-static off_t
-afr_filter_entries (gf_dirent_t *entries, fd_t *fd)
+int32_t
+afr_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *subvol_entries,
+ dict_t *xdata)
{
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- off_t offset = 0;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return -1;
- }
+ afr_local_t *local = NULL;
+ gf_dirent_t entries;
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ INIT_LIST_HEAD(&entries.list);
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- offset = entry->d_off;
+ local = frame->local;
- if (remembered_name (entry->d_name, &fd_ctx->entries)) {
- list_del (&entry->list);
- GF_FREE (entry);
- }
- }
+ if (op_ret < 0 && !local->cont.readdir.offset) {
+ /* failover only if this was first readdir, detected
+ by offset == 0 */
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- return offset;
-}
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
+ if (op_ret >= 0)
+ afr_readdir_transform_entries(frame, subvol_entries, (long)cookie,
+ &entries, local->fd);
-static void
-afr_forget_entries (fd_t *fd)
-{
- struct entry_name *entry = NULL;
- struct entry_name *tmp = NULL;
- int ret = 0;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
-
- ret = fd_ctx_get (fd, THIS, &ctx);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- return;
- }
+ AFR_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, xdata);
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
+ gf_dirent_free(&entries);
- list_for_each_entry_safe (entry, tmp, &fd_ctx->entries, list) {
- GF_FREE (entry->name);
- list_del (&entry->list);
- GF_FREE (entry);
- }
+ return 0;
}
-
-int32_t
-afr_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
+int
+afr_readdir_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- AFR_STACK_UNWIND (readdir, frame, op_ret, op_errno, entries, NULL);
-
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx) {
+ local->op_errno = EINVAL;
+ local->op_ret = -1;
+ }
+
+ if (subvol == -1 || !fd_ctx) {
+ AFR_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno, 0, 0);
return 0;
+ }
+
+ fd_ctx->readdir_subvol = subvol;
+
+ if (local->op == GF_FOP_READDIR)
+ STACK_WIND_COOKIE(frame, afr_readdir_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readdir, local->fd,
+ local->cont.readdir.size, local->cont.readdir.offset,
+ local->xdata_req);
+ else
+ STACK_WIND_COOKIE(frame, afr_readdir_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readdirp, local->fd,
+ local->cont.readdir.size, local->cont.readdir.offset,
+ local->xdata_req);
+ return 0;
}
-
-int32_t
-afr_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
+int
+afr_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, int whichop, dict_t *dict)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int32_t next_call_child = -1;
- int ret = 0;
- int32_t *last_index = NULL;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- off_t offset = 0;
- int32_t call_child = -1;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
-
- read_child = (long) cookie;
- last_index = &local->cont.readdir.last_index;
- fresh_children = local->fresh_children;
-
- /* the value of the last_index changes if afr_next_call_child is
- * called. So to find the call_child of this callback use last_index
- * before the next_call_child call.
- */
- if (*last_index == -1)
- call_child = read_child;
- else
- call_child = fresh_children[*last_index];
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (local->fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", local->fd);
- op_ret = -1;
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (op_ret == -1) {
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index,
- read_child);
- if (next_call_child < 0)
- goto out;
- gf_log (this->name, GF_LOG_TRACE,
- "starting readdir afresh on child %d, offset %"PRId64,
- next_call_child, (uint64_t) 0);
-
- fd_ctx->failed_over = _gf_true;
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->readdirp,
- local->fd,
- local->cont.readdir.size, 0,
- local->cont.readdir.dict);
- return 0;
- }
- }
-
- if (priv->strict_readdir) {
- if (fd_ctx->failed_over) {
- if (list_empty (&entries->list)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no entries found");
- goto out;
- }
-
- offset = afr_filter_entries (entries, local->fd);
-
- afr_remember_entries (entries, local->fd);
-
- if (list_empty (&entries->list)) {
- /* All the entries we got were duplicate. We
- shouldn't send an empty list now, because
- that will make the application stop reading. So
- try to get more entries */
-
- gf_log (this->name, GF_LOG_TRACE,
- "trying to fetch non-duplicate entries "
- "from offset %"PRId64", child %s",
- offset, children[call_child]->name);
-
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) read_child,
- children[call_child],
- children[call_child]->fops->readdirp,
- local->fd, local->cont.readdir.size, offset,
- local->cont.readdir.dict);
- return 0;
- }
- } else {
- afr_remember_entries (entries, local->fd);
- }
- }
-
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ int subvol = -1;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ local->op = whichop;
+ local->fd = fd_ref(fd);
+ local->cont.readdir.size = size;
+ local->cont.readdir.offset = offset;
+ local->xdata_req = (dict) ? dict_ref(dict) : NULL;
+
+ subvol = fd_ctx->readdir_subvol;
+
+ if (offset == 0 || subvol == -1) {
+ /* First readdir has option of failing over and selecting
+ an appropriate read subvolume */
+ afr_read_txn(frame, this, fd->inode, afr_readdir_wind,
+ AFR_DATA_TRANSACTION);
+ } else {
+ /* But continued readdirs MUST stick to the same subvolume
+ without an option to failover */
+ afr_readdir_wind(frame, this, subvol);
+ }
+
+ return 0;
out:
- AFR_STACK_UNWIND (readdirp, frame, op_ret, op_errno, entries, NULL);
-
- return 0;
+ AFR_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
int32_t
-afr_do_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, int whichop, dict_t *dict)
+afr_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = -1;
- int32_t op_errno = 0;
- uint64_t read_child = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- children = priv->children;
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
-
- read_child = afr_inode_get_read_ctx (this, fd->inode,
- local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readdir.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
-
- local->fd = fd_ref (fd);
- local->cont.readdir.size = size;
- local->cont.readdir.dict = (dict)? dict_ref (dict) : NULL;
-
- if (priv->strict_readdir) {
- ret = fd_ctx_get (fd, this, &ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not get fd ctx for fd=%p", fd);
- op_errno = -ret;
- goto out;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- if (fd_ctx->last_tried != call_child) {
- gf_log (this->name, GF_LOG_TRACE,
- "first up child has changed from %d to %d, "
- "restarting readdir from offset 0",
- fd_ctx->last_tried, call_child);
+ afr_do_readdir(frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
- fd_ctx->failed_over = _gf_true;
- offset = 0;
- }
-
- fd_ctx->last_tried = call_child;
- }
-
- if (whichop == GF_FOP_READDIR)
- STACK_WIND_COOKIE (frame, afr_readdir_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdir, fd,
- size, offset, dict);
- else
- STACK_WIND_COOKIE (frame, afr_readdirp_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readdirp, fd,
- size, offset, dict);
-
- ret = 0;
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
int32_t
-afr_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *xdata)
+afr_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
- return 0;
-}
-
+ afr_do_readdir(frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
-int32_t
-afr_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *dict)
-{
- afr_do_readdir (frame, this, fd, size, offset, GF_FOP_READDIRP, dict);
- return 0;
+ return 0;
}
-
int32_t
-afr_releasedir (xlator_t *this, fd_t *fd)
+afr_releasedir(xlator_t *this, fd_t *fd)
{
- afr_forget_entries (fd);
- afr_cleanup_fd_ctx (this, fd);
+ afr_cleanup_fd_ctx(this, fd);
- return 0;
+ return 0;
}
diff --git a/xlators/cluster/afr/src/afr-dir-read.h b/xlators/cluster/afr/src/afr-dir-read.h
index 09456d15949..773e925ec6c 100644
--- a/xlators/cluster/afr/src/afr-dir-read.h
+++ b/xlators/cluster/afr/src/afr-dir-read.h
@@ -11,26 +11,23 @@
#ifndef __DIR_READ_H__
#define __DIR_READ_H__
-
int32_t
-afr_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd, dict_t *xdata);
+afr_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata);
int32_t
-afr_releasedir (xlator_t *this, fd_t *fd);
+afr_releasedir(xlator_t *this, fd_t *fd);
int32_t
-afr_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, dict_t *xdata);
-
+afr_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata);
int32_t
-afr_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, dict_t *dict);
+afr_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict);
int32_t
-afr_checksum (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, dict_t *xdata);
-
+afr_checksum(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ dict_t *xdata);
#endif /* __DIR_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-dir-write.c b/xlators/cluster/afr/src/afr-dir-write.c
index 0b804bef580..b7cceb79158 100644
--- a/xlators/cluster/afr/src/afr-dir-write.c
+++ b/xlators/cluster/afr/src/afr-dir-write.c
@@ -8,1247 +8,930 @@
cases as published by the Free Software Foundation.
*/
-
#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/list.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
#include "afr.h"
#include "afr-transaction.h"
+void
+afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this);
+
int
-afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno)
+afr_build_parent_loc(loc_t *parent, loc_t *child, int32_t *op_errno)
{
- int ret = -1;
- char *child_path = NULL;
-
- if (!child->parent) {
- if (op_errno)
- *op_errno = EINVAL;
- goto out;
- }
-
- child_path = gf_strdup (child->path);
- if (!child_path) {
- if (op_errno)
- *op_errno = ENOMEM;
- goto out;
- }
- parent->path = gf_strdup( dirname (child_path) );
- if (!parent->path) {
- if (op_errno)
- *op_errno = ENOMEM;
- goto out;
- }
- parent->inode = inode_ref (child->parent);
- uuid_copy (parent->gfid, child->pargfid);
-
- ret = 0;
+ int ret = -1;
+ char *child_path = NULL;
+
+ if (!child->parent) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ child_path = gf_strdup(child->path);
+ if (!child_path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ parent->path = gf_strdup(dirname(child_path));
+ if (!parent->path) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ parent->inode = inode_ref(child->parent);
+ gf_uuid_copy(parent->gfid, child->pargfid);
+
+ ret = 0;
out:
- if (child_path)
- GF_FREE(child_path);
+ GF_FREE(child_path);
- return ret;
+ return ret;
}
-/* {{{ create */
-
-int
-afr_create_unwind (call_frame_t *frame, xlator_t *this)
+static void
+__afr_dir_write_finalize(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.create.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.create.read_child_buf;
- } else {
- unwind_buf = &local->cont.create.buf;
- }
-
- AFR_STACK_UNWIND (create, main_frame,
- local->op_ret, local->op_errno,
- local->cont.create.fd,
- local->cont.create.inode,
- unwind_buf, &local->cont.create.preparent,
- &local->cont.create.postparent,
- NULL);
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int inode_read_subvol = -1;
+ int parent_read_subvol = -1;
+ int parent2_read_subvol = -1;
+ int i = 0;
+ afr_read_subvol_args_t args = {
+ 0,
+ };
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret == -1)
+ continue;
+ gf_uuid_copy(args.gfid, local->replies[i].poststat.ia_gfid);
+ args.ia_type = local->replies[i].poststat.ia_type;
+ break;
+ }
+
+ if (local->inode) {
+ if (local->op != GF_FOP_RENAME && local->op != GF_FOP_LINK)
+ afr_replies_interpret(frame, this, local->inode, NULL);
+
+ inode_read_subvol = afr_data_subvol_get(local->inode, this, NULL, NULL,
+ NULL, &args);
+ }
+
+ if (local->parent)
+ parent_read_subvol = afr_data_subvol_get(local->parent, this, NULL,
+ local->readable, NULL, NULL);
+
+ if (local->parent2)
+ parent2_read_subvol = afr_data_subvol_get(local->parent2, this, NULL,
+ local->readable2, NULL, NULL);
+
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ afr_pick_error_xdata(local, priv, local->parent, local->readable,
+ local->parent2, local->readable2);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0) {
+ if (local->inode)
+ afr_inode_need_refresh_set(local->inode, this);
+ if (local->parent)
+ afr_inode_need_refresh_set(local->parent, this);
+ if (local->parent2)
+ afr_inode_need_refresh_set(local->parent2, this);
+ continue;
+ }
+
+ if (local->op_ret == -1) {
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+
+ local->cont.dir_fop.buf = local->replies[i].poststat;
+ local->cont.dir_fop.preparent = local->replies[i].preparent;
+ local->cont.dir_fop.postparent = local->replies[i].postparent;
+ local->cont.dir_fop.prenewparent = local->replies[i].preparent2;
+ local->cont.dir_fop.postnewparent = local->replies[i].postparent2;
+ if (local->xdata_rsp) {
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ }
+
+ if (local->replies[i].xdata)
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ continue;
+ }
+
+ if (i == inode_read_subvol) {
+ local->cont.dir_fop.buf = local->replies[i].poststat;
+ if (local->replies[i].xdata) {
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ }
+ }
+
+ if (i == parent_read_subvol) {
+ local->cont.dir_fop.preparent = local->replies[i].preparent;
+ local->cont.dir_fop.postparent = local->replies[i].postparent;
+ }
+
+ if (i == parent2_read_subvol) {
+ local->cont.dir_fop.prenewparent = local->replies[i].preparent2;
+ local->cont.dir_fop.postnewparent = local->replies[i].postparent2;
+ }
+ }
}
-
-int
-afr_create_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct iatt *buf,
+static void
+__afr_dir_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+ int op_ret, int op_errno, struct iatt *poststat,
struct iatt *preparent, struct iatt *postparent,
+ struct iatt *preparent2, struct iatt *postparent2,
dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- uint64_t ctx = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int ret = 0;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- ret = afr_fd_ctx_set (this, fd);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not set ctx on fd=%p", fd);
-
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
-
- ret = fd_ctx_get (fd, this, &ctx);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "could not get fd ctx for fd=%p", fd);
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
-
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-
- fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
- fd_ctx->flags = local->cont.create.flags;
+ afr_local_t *local = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ local = frame->local;
+ fd_ctx = local->fd_ctx;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ if (poststat)
+ local->replies[child_index].poststat = *poststat;
+ if (preparent)
+ local->replies[child_index].preparent = *preparent;
+ if (postparent)
+ local->replies[child_index].postparent = *postparent;
+ if (preparent2)
+ local->replies[child_index].preparent2 = *preparent2;
+ if (postparent2)
+ local->replies[child_index].postparent2 = *postparent2;
+ if (fd_ctx)
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ } else {
+ if (op_errno != ENOTEMPTY)
+ afr_transaction_fop_failed(frame, this, child_index);
+ if (fd_ctx)
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ }
+
+ return;
+}
- if (local->success_count == 0)
- local->cont.create.buf = *buf;
+static int
+__afr_dir_write_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ struct iatt *preparent2, struct iatt *postparent2,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = (long)cookie;
+ int call_count = -1;
+ afr_private_t *priv = NULL;
- if (child_index == local->read_child_index) {
- local->cont.create.read_child_buf = *buf;
- local->cont.create.preparent = *preparent;
- local->cont.create.postparent = *postparent;
- }
+ priv = this->private;
+ local = frame->local;
- local->cont.create.inode = inode;
+ LOCK(&frame->lock);
+ {
+ __afr_dir_write_fill(frame, this, child_index, op_ret, op_errno, buf,
+ preparent, postparent, preparent2, postparent2,
+ xdata);
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
+ if (call_count == 0) {
+ __afr_dir_write_finalize(frame, this);
- local->op_errno = op_errno;
+ if (afr_txn_nothing_failed(frame, this)) {
+ /*if it did pre-op, it will do post-op changing ctime*/
+ if (priv->consistent_metadata && afr_needs_changelog_update(local))
+ afr_zero_fill_stat(local);
+ local->transaction.unwind(frame, this);
}
-unlock:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
+ afr_mark_entry_pending_changelog(frame, this);
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child,
- local->cont.create.buf.ia_gfid);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ afr_transaction_resume(frame, this);
+ }
- return 0;
+ return 0;
}
-
int
-afr_create_wind (call_frame_t *frame, xlator_t *this)
+afr_mark_new_entry_changelog_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ int call_count = 0;
- local = frame->local;
- priv = this->private;
+ call_count = afr_frame_return(frame);
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ if (call_count == 0)
+ AFR_STACK_DESTROY(frame);
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_create_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->create,
- &local->loc,
- local->cont.create.flags,
- local->cont.create.mode,
- local->umask,
- local->cont.create.fd,
- local->xdata_req);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return 0;
}
-
-int
-afr_create_done (call_frame_t *frame, xlator_t *this)
+void
+afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ call_frame_t *new_frame = NULL;
+ afr_local_t *local = NULL;
+ afr_local_t *new_local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int32_t **changelog = NULL;
+ int i = 0;
+ int op_errno = ENOMEM;
+ unsigned char *pending = NULL;
+ int call_count = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ new_frame = copy_frame(frame);
+ if (!new_frame)
+ goto out;
+
+ new_local = AFR_FRAME_INIT(new_frame, op_errno);
+ if (!new_local)
+ goto out;
+
+ xattr = dict_new();
+ if (!xattr)
+ goto out;
+
+ pending = alloca0(priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ !local->transaction.failed_subvols[i]) {
+ call_count++;
+ continue;
+ }
+ pending[i] = 1;
+ }
+
+ changelog = afr_mark_pending_changelog(priv, pending, xattr,
+ local->cont.dir_fop.buf.ia_type);
+ if (!changelog)
+ goto out;
+
+ new_local->pending = changelog;
+ gf_uuid_copy(new_local->loc.gfid, local->cont.dir_fop.buf.ia_gfid);
+ new_local->loc.inode = inode_ref(local->inode);
+
+ new_local->call_count = call_count;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (pending[i])
+ continue;
+
+ STACK_WIND_COOKIE(new_frame, afr_mark_new_entry_changelog_cbk,
+ (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->xattrop, &new_local->loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL);
+ if (!--call_count)
+ break;
+ }
+
+ new_frame = NULL;
+out:
+ if (new_frame)
+ AFR_STACK_DESTROY(new_frame);
+ if (xattr)
+ dict_unref(xattr);
+ return;
}
-
-int
-afr_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *params)
+void
+afr_mark_entry_pending_changelog(call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int pre_op_count = 0;
+ int failed_count = 0;
+ unsigned char *success_replies = NULL;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- QUORUM_CHECK(create,out);
+ if (local->op_ret < 0)
+ return;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
+ if (local->op != GF_FOP_CREATE && local->op != GF_FOP_MKNOD &&
+ local->op != GF_FOP_MKDIR)
+ return;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ pre_op_count = AFR_COUNT(local->transaction.pre_op, priv->child_count);
+ failed_count = AFR_COUNT(local->transaction.failed_subvols,
+ priv->child_count);
- loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ /* FOP succeeded on all bricks. */
+ if (pre_op_count == priv->child_count && !failed_count)
+ return;
- local->cont.create.flags = flags;
- local->cont.create.mode = mode;
- local->cont.create.fd = fd_ref (fd);
- local->umask = umask;
- if (params)
- local->xdata_req = dict_ref (params);
+ /* FOP did not suceed on quorum no. of bricks. */
+ success_replies = alloca0(priv->child_count);
+ afr_fill_success_replies(local, priv, success_replies);
+ if (!afr_has_quorum(success_replies, this, NULL))
+ return;
- local->transaction.fop = afr_create_wind;
- local->transaction.done = afr_create_done;
- local->transaction.unwind = afr_create_unwind;
+ if (priv->thin_arbiter_count) {
+ /*Mark new entry using ta file*/
+ local->is_new_entry = _gf_true;
+ return;
+ }
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
+ afr_mark_new_entry_changelog(frame, this);
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- ret = 0;
-out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL, NULL);
- }
-
- return 0;
+ return;
}
-/* }}} */
-
-/* {{{ mknod */
+/* {{{ create */
int
-afr_mknod_unwind (call_frame_t *frame, xlator_t *this)
+afr_create_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.mknod.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mknod.read_child_buf;
- } else {
- unwind_buf = &local->cont.mknod.buf;
- }
-
- AFR_STACK_UNWIND (mknod, main_frame,
- local->op_ret, local->op_errno,
- local->cont.mknod.inode,
- unwind_buf, &local->cont.mknod.preparent,
- &local->cont.mknod.postparent,
- NULL);
- }
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(create, main_frame, local->op_ret, local->op_errno,
+ local->cont.create.fd, local->inode,
+ &local->cont.dir_fop.buf, &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_mknod_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
+afr_create_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.mknod.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.mknod.read_child_buf = *buf;
- local->cont.mknod.preparent = *preparent;
- local->cont.mknod.postparent = *postparent;
- }
-
- local->cont.mknod.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child,
- local->cont.mknod.buf.ia_gfid);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
-
-
-int32_t
-afr_mknod_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_mknod_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mknod,
- &local->loc, local->cont.mknod.mode,
- local->cont.mknod.dev,
- local->umask,
- local->xdata_req);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_mknod_done (call_frame_t *frame, xlator_t *this)
+afr_create_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_create_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->create, &local->loc,
+ local->cont.create.flags, local->cont.create.mode,
+ local->umask, local->cont.create.fd, local->xdata_req);
+ return 0;
}
-
int
-afr_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t dev, mode_t umask, dict_t *params)
+afr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- QUORUM_CHECK(mknod,out);
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
-
- local->cont.mknod.mode = mode;
- local->cont.mknod.dev = dev;
- local->umask = umask;
- if (params)
- local->xdata_req = dict_ref (params);
-
- local->transaction.fop = afr_mknod_wind;
- local->transaction.done = afr_mknod_done;
- local->transaction.unwind = afr_mknod_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+
+ local->fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!local->fd_ctx)
+ goto out;
+
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->op = GF_FOP_CREATE;
+ local->cont.create.flags = flags;
+ local->fd_ctx->flags = flags;
+ local->cont.create.mode = mode;
+ local->cont.create.fd = fd_ref(fd);
+ local->umask = umask;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_create_wind;
+ local->transaction.unwind = afr_create_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
/* }}} */
-/* {{{ mkdir */
-
-
-int
-afr_mkdir_unwind (call_frame_t *frame, xlator_t *this)
-{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.mkdir.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.mkdir.read_child_buf;
- } else {
- unwind_buf = &local->cont.mkdir.buf;
- }
-
- AFR_STACK_UNWIND (mkdir, main_frame,
- local->op_ret, local->op_errno,
- local->cont.mkdir.inode,
- unwind_buf, &local->cont.mkdir.preparent,
- &local->cont.mkdir.postparent,
- NULL);
- }
-
- return 0;
-}
-
+/* {{{ mknod */
int
-afr_mkdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+afr_mknod_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.mkdir.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.mkdir.read_child_buf = *buf;
- local->cont.mkdir.preparent = *preparent;
- local->cont.mkdir.postparent = *postparent;
- }
-
- local->cont.mkdir.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child,
- local->cont.mkdir.buf.ia_gfid);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(mknod, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_mkdir_wind (call_frame_t *frame, xlator_t *this)
+afr_mknod_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_mkdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->mkdir,
- &local->loc, local->cont.mkdir.mode,
- local->umask,
- local->xdata_req);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_mkdir_done (call_frame_t *frame, xlator_t *this)
+afr_mknod_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_mknod_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->mknod, &local->loc,
+ local->cont.mknod.mode, local->cont.mknod.dev,
+ local->umask, local->xdata_req);
+ return 0;
}
-
int
-afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
+afr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- QUORUM_CHECK(mkdir,out);
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
-
- local->cont.mkdir.mode = mode;
- local->umask = umask;
- if (params)
- local->xdata_req = dict_ref (params);
-
- local->transaction.fop = afr_mkdir_wind;
- local->transaction.done = afr_mkdir_done;
- local->transaction.unwind = afr_mkdir_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->op = GF_FOP_MKNOD;
+ local->cont.mknod.mode = mode;
+ local->cont.mknod.dev = dev;
+ local->umask = umask;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_mknod_wind;
+ local->transaction.unwind = afr_mknod_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (mkdir, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
-
- return 0;
+ AFR_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
-/* {{{ link */
-
+/* {{{ mkdir */
int
-afr_link_unwind (call_frame_t *frame, xlator_t *this)
+afr_mkdir_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.link.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.link.read_child_buf;
- } else {
- unwind_buf = &local->cont.link.buf;
- }
-
- AFR_STACK_UNWIND (link, main_frame,
- local->op_ret, local->op_errno,
- local->cont.link.inode,
- unwind_buf, &local->cont.link.preparent,
- &local->cont.link.postparent,
- NULL);
- }
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(mkdir, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_link_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+afr_mkdir_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, inode_t *inode,
struct iatt *buf, struct iatt *preparent,
struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0) {
- local->cont.link.buf = *buf;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.link.read_child_buf = *buf;
- local->cont.link.preparent = *preparent;
- local->cont.link.postparent = *postparent;
- }
-
- local->cont.link.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
+}
- call_count = afr_frame_return (frame);
+int
+afr_mkdir_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child,
- local->cont.link.buf.ia_gfid);
- local->transaction.unwind (frame, this);
+ local = frame->local;
+ priv = this->private;
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ STACK_WIND_COOKIE(frame, afr_mkdir_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->mkdir, &local->loc,
+ local->cont.mkdir.mode, local->umask, local->xdata_req);
+ return 0;
}
-
int
-afr_link_wind (call_frame_t *frame, xlator_t *this)
+afr_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->cont.mkdir.mode = mode;
+ local->umask = umask;
+
+ if (!xdata || !dict_get_sizen(xdata, "gfid-req")) {
+ op_errno = EPERM;
+ gf_msg_callingfn(this->name, GF_LOG_WARNING, op_errno,
+ AFR_MSG_GFID_NULL,
+ "mkdir: %s is received "
+ "without gfid-req %p",
+ loc->path, xdata);
+ goto out;
+ }
+
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ if (!local->xdata_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local->op = GF_FOP_MKDIR;
+ local->transaction.wind = afr_mkdir_wind;
+ local->transaction.unwind = afr_mkdir_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ AFR_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+/* }}} */
- local->call_count = call_count;
+/* {{{ link */
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_link_wind_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->link,
- &local->loc,
- &local->newloc, local->xdata_req);
+int
+afr_link_unwind(call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- if (!--call_count)
- break;
- }
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(link, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_link_done (call_frame_t *frame, xlator_t *this)
+afr_link_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t * local = frame->local;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
+}
- local->transaction.unwind (frame, this);
+int
+afr_link_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
- return 0;
+ STACK_WIND_COOKIE(frame, afr_link_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->link, &local->loc,
+ &local->newloc, local->xdata_req);
+ return 0;
}
-
int
-afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+afr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- priv = this->private;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- QUORUM_CHECK(link,out);
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->newloc, newloc);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
+ local->inode = inode_ref(oldloc->inode);
+ local->parent = inode_ref(newloc->parent);
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ if (!local->xdata_req)
+ goto out;
- loc_copy (&local->loc, oldloc);
- loc_copy (&local->newloc, newloc);
- if (xdata)
- local->xdata_req = dict_ref (xdata);
+ local->op = GF_FOP_LINK;
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
+ local->transaction.wind = afr_link_wind;
+ local->transaction.unwind = afr_link_unwind;
- local->transaction.fop = afr_link_wind;
- local->transaction.done = afr_link_done;
- local->transaction.unwind = afr_link_unwind;
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
- ret = afr_build_parent_loc (&local->transaction.parent_loc, newloc,
- &op_errno);
- if (ret)
- goto out;
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(newloc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (newloc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (link, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
/* {{{ symlink */
-
int
-afr_symlink_unwind (call_frame_t *frame, xlator_t *this)
+afr_symlink_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.symlink.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.symlink.read_child_buf;
- } else {
- unwind_buf = &local->cont.symlink.buf;
- }
-
- AFR_STACK_UNWIND (symlink, main_frame,
- local->op_ret, local->op_errno,
- local->cont.symlink.inode,
- unwind_buf, &local->cont.symlink.preparent,
- &local->cont.symlink.postparent,
- NULL);
- }
-
- return 0;
-}
-
-
-int
-afr_symlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int child_index = -1;
- int32_t *fresh_children = NULL;
-
- local = frame->local;
- priv = this->private;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- local->op_ret = op_ret;
-
- if (local->success_count == 0)
- local->cont.symlink.buf = *buf;
-
- if (child_index == local->read_child_index) {
- local->cont.symlink.read_child_buf = *buf;
- local->cont.symlink.preparent = *preparent;
- local->cont.symlink.postparent = *postparent;
- }
-
- local->cont.symlink.inode = inode;
-
- fresh_children = local->fresh_children;
- fresh_children[local->success_count] = child_index;
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_set_read_ctx_from_policy (this, inode,
- local->fresh_children,
- local->read_child_index,
- priv->read_child,
- local->cont.symlink.buf.ia_gfid);
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(symlink, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->cont.dir_fop.buf,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
+}
int
-afr_symlink_wind (call_frame_t *frame, xlator_t *this)
+afr_symlink_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_symlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->symlink,
- local->cont.symlink.linkpath,
- &local->loc,
- local->umask,
- local->xdata_req);
-
- if (!--call_count)
- break;
-
- }
- }
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_symlink_done (call_frame_t *frame, xlator_t *this)
+afr_symlink_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_symlink_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->symlink,
+ local->cont.symlink.linkpath, &local->loc, local->umask,
+ local->xdata_req);
+ return 0;
}
-
int
-afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, mode_t umask, dict_t *params)
+afr_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- QUORUM_CHECK(symlink,out);
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- loc_copy (&local->loc, loc);
-
- LOCK (&priv->read_child_lock);
- {
- local->read_child_index = (++priv->read_child_rr)
- % (priv->child_count);
- }
- UNLOCK (&priv->read_child_lock);
-
- local->cont.symlink.linkpath = gf_strdup (linkpath);
- local->umask = umask;
- if (params)
- local->xdata_req = dict_ref (params);
-
- local->transaction.fop = afr_symlink_wind;
- local->transaction.done = afr_symlink_done;
- local->transaction.unwind = afr_symlink_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->cont.symlink.linkpath = gf_strdup(linkpath);
+ local->umask = umask;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_SYMLINK;
+ local->transaction.wind = afr_symlink_wind;
+ local->transaction.unwind = afr_symlink_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (symlink, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(symlink, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
/* }}} */
@@ -1256,217 +939,118 @@ out:
/* {{{ rename */
int
-afr_rename_unwind (call_frame_t *frame, xlator_t *this)
+afr_rename_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
- struct iatt *unwind_buf = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- if (local->cont.rename.read_child_buf.ia_ino) {
- unwind_buf = &local->cont.rename.read_child_buf;
- } else {
- unwind_buf = &local->cont.rename.buf;
- }
-
- AFR_STACK_UNWIND (rename, main_frame,
- local->op_ret, local->op_errno,
- unwind_buf,
- &local->cont.rename.preoldparent,
- &local->cont.rename.postoldparent,
- &local->cont.rename.prenewparent,
- &local->cont.rename.postnewparent,
- NULL);
- }
-
- return 0;
-}
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ local = frame->local;
-int
-afr_rename_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = -1;
-
- local = frame->local;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (afr_fop_failed (op_ret, op_errno) && op_errno != ENOTEMPTY)
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
-
- if (buf) {
- local->cont.rename.buf = *buf;
- }
-
- local->success_count++;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.rename.read_child_buf = *buf;
-
- local->cont.rename.preoldparent = *preoldparent;
- local->cont.rename.postoldparent = *postoldparent;
- local->cont.rename.prenewparent = *prenewparent;
- local->cont.rename.postnewparent = *postnewparent;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
-
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(rename, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.buf, &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent,
+ &local->cont.dir_fop.prenewparent,
+ &local->cont.dir_fop.postnewparent, local->xdata_rsp);
+ return 0;
+}
-int32_t
-afr_rename_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_rename_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_rename_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rename,
- &local->loc,
- &local->newloc, NULL);
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
}
-
int
-afr_rename_done (call_frame_t *frame, xlator_t *this)
+afr_rename_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = frame->local;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local->transaction.unwind (frame, this);
+ local = frame->local;
+ priv = this->private;
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ STACK_WIND_COOKIE(frame, afr_rename_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->rename, &local->loc,
+ &local->newloc, local->xdata_req);
+ return 0;
}
-
int
-afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+afr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- QUORUM_CHECK(rename,out);
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- loc_copy (&local->loc, oldloc);
- loc_copy (&local->newloc, newloc);
-
- local->read_child_index = afr_inode_get_read_ctx (this, oldloc->inode, NULL);
-
- local->transaction.fop = afr_rename_wind;
- local->transaction.done = afr_rename_done;
- local->transaction.unwind = afr_rename_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, oldloc,
- &op_errno);
- if (ret)
- goto out;
- ret = afr_build_parent_loc (&local->transaction.new_parent_loc, newloc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (oldloc->path);
- local->transaction.new_basename = AFR_BASENAME (newloc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_RENAME_TRANSACTION);
-
- ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->newloc, newloc);
+
+ local->inode = inode_ref(oldloc->inode);
+ local->parent = inode_ref(oldloc->parent);
+ local->parent2 = inode_ref(newloc->parent);
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_RENAME;
+ local->transaction.wind = afr_rename_wind;
+ local->transaction.unwind = afr_rename_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, oldloc,
+ &op_errno);
+ if (ret)
+ goto out;
+ ret = afr_build_parent_loc(&local->transaction.new_parent_loc, newloc,
+ &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(oldloc->path);
+ local->transaction.new_basename = AFR_BASENAME(newloc->path);
+ ret = afr_transaction(transaction_frame, this,
+ AFR_ENTRY_RENAME_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- AFR_STACK_UNWIND (rename, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL, NULL);
- }
-
- return 0;
+ AFR_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
/* }}} */
@@ -1474,395 +1058,205 @@ out:
/* {{{ unlink */
int
-afr_unlink_unwind (call_frame_t *frame, xlator_t *this)
+afr_unlink_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (unlink, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.unlink.preparent,
- &local->cont.unlink.postparent,
- NULL);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
-
-
-int
-afr_unlink_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (child_index == local->read_child_index) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- if (child_index == local->read_child_index) {
- local->cont.unlink.preparent = *preparent;
- local->cont.unlink.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ AFR_STACK_UNWIND(unlink, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
}
-
-int32_t
-afr_unlink_wind (call_frame_t *frame, xlator_t *this)
+int
+afr_unlink_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_unlink_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->unlink,
- &local->loc, local->xflag,
- local->xdata_req);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
-
-int32_t
-afr_unlink_done (call_frame_t *frame, xlator_t *this)
+int
+afr_unlink_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = frame->local;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local->transaction.unwind (frame, this);
+ local = frame->local;
+ priv = this->private;
- AFR_STACK_DESTROY (frame);
-
- return 0;
+ STACK_WIND_COOKIE(frame, afr_unlink_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->unlink, &local->loc,
+ local->xflag, local->xdata_req);
+ return 0;
}
-
-int32_t
-afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata)
+int
+afr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- QUORUM_CHECK(unlink,out);
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- loc_copy (&local->loc, loc);
- local->xflag = xflag;
- if (xdata)
- local->xdata_req = dict_ref (xdata);
-
- local->transaction.fop = afr_unlink_wind;
- local->transaction.done = afr_unlink_done;
- local->transaction.unwind = afr_unlink_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->xflag = xflag;
+
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_UNLINK;
+ local->transaction.wind = afr_unlink_wind;
+ local->transaction.unwind = afr_unlink_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (unlink, frame, -1, op_errno,
- NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
/* {{{ rmdir */
-
-
int
-afr_rmdir_unwind (call_frame_t *frame, xlator_t *this)
+afr_rmdir_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t *main_frame = NULL;
- afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame) {
- main_frame = local->transaction.main_frame;
- }
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (rmdir, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.rmdir.preparent,
- &local->cont.rmdir.postparent,
- NULL);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
-
-
-int
-afr_rmdir_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- afr_local_t * local = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
- int read_child = 0;
-
- local = frame->local;
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno) && (op_errno != ENOTEMPTY))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
-
- }
-
- if (child_index == read_child) {
- local->cont.rmdir.preparent = *preparent;
- local->cont.rmdir.postparent = *postparent;
- }
-
- local->success_count++;
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ AFR_STACK_UNWIND(rmdir, main_frame, local->op_ret, local->op_errno,
+ &local->cont.dir_fop.preparent,
+ &local->cont.dir_fop.postparent, local->xdata_rsp);
+ return 0;
}
-
int
-afr_rmdir_wind (call_frame_t *frame, xlator_t *this)
+afr_rmdir_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_rmdir_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rmdir,
- &local->loc, local->cont.rmdir.flags,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return __afr_dir_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
}
-
int
-afr_rmdir_done (call_frame_t *frame, xlator_t *this)
+afr_rmdir_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
- return 0;
+ STACK_WIND_COOKIE(frame, afr_rmdir_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->rmdir, &local->loc,
+ local->cont.rmdir.flags, local->xdata_req);
+ return 0;
}
-
int
-afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, dict_t *xdata)
+afr_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t * transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- QUORUM_CHECK(rmdir,out);
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- local->cont.rmdir.flags = flags;
- loc_copy (&local->loc, loc);
-
- local->transaction.fop = afr_rmdir_wind;
- local->transaction.done = afr_rmdir_done;
- local->transaction.unwind = afr_rmdir_unwind;
-
- ret = afr_build_parent_loc (&local->transaction.parent_loc, loc,
- &op_errno);
- if (ret)
- goto out;
-
- local->transaction.main_frame = frame;
- local->transaction.basename = AFR_BASENAME (loc->path);
-
- afr_transaction (transaction_frame, this, AFR_ENTRY_TRANSACTION);
-
- ret = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, loc);
+ local->inode = inode_ref(loc->inode);
+ local->parent = inode_ref(loc->parent);
+
+ local->cont.rmdir.flags = flags;
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->op = GF_FOP_RMDIR;
+ local->transaction.wind = afr_rmdir_wind;
+ local->transaction.unwind = afr_rmdir_unwind;
+
+ ret = afr_build_parent_loc(&local->transaction.parent_loc, loc, &op_errno);
+ if (ret)
+ goto out;
+
+ local->transaction.main_frame = frame;
+ local->transaction.basename = AFR_BASENAME(loc->path);
+ ret = afr_transaction(transaction_frame, this, AFR_ENTRY_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
diff --git a/xlators/cluster/afr/src/afr-dir-write.h b/xlators/cluster/afr/src/afr-dir-write.h
index 02f0a3682d9..1d88c3b9b26 100644
--- a/xlators/cluster/afr/src/afr-dir-write.h
+++ b/xlators/cluster/afr/src/afr-dir-write.h
@@ -12,36 +12,35 @@
#define __DIR_WRITE_H__
int32_t
-afr_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata);
+afr_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
int32_t
-afr_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t dev, mode_t umask, dict_t *xdata);
+afr_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata);
int32_t
-afr_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
+afr_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
int32_t
-afr_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata);
+afr_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
int32_t
-afr_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, dict_t *xdata);
+afr_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata);
int32_t
-afr_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata);
+afr_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
int32_t
-afr_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata);
+afr_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
int
-afr_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *oldloc, mode_t umask, dict_t *params);
+afr_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *oldloc, mode_t umask, dict_t *params);
#endif /* __DIR_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-read.c b/xlators/cluster/afr/src/afr-inode-read.c
index 40e5abd3a03..c5521704de2 100644
--- a/xlators/cluster/afr/src/afr-inode-read.c
+++ b/xlators/cluster/afr/src/afr-inode-read.c
@@ -8,7 +8,6 @@
cases as published by the Free Software Foundation.
*/
-
#include <libgen.h>
#include <unistd.h>
#include <fnmatch.h>
@@ -16,1351 +15,1880 @@
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/list.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/quota-common-utils.h>
+
+#include "afr-transaction.h"
+#include "afr-messages.h"
-/**
- * Common algorithm for inode read calls:
- *
- * - Try the fop on the first child that is up
- * - if we have failed due to ENOTCONN:
- * try the next child
- *
- * Applicable to: access, stat, fstat, readlink, getxattr
- */
+/*
+ * Quota size xattrs are not maintained by afr. There is a
+ * possibility that they differ even when both the directory changelog xattrs
+ * suggest everything is fine. So if there is at least one 'source' check among
+ * the sources which has the maximum quota size. Otherwise check among all the
+ * available ones for maximum quota size. This way if there is a source and
+ * stale copies it always votes for the 'source'.
+ * */
+
+int
+afr_handle_quota_size(call_frame_t *frame, xlator_t *this)
+{
+ unsigned char *readable = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ int i = 0;
+ int ret = 0;
+ quota_meta_t size = {
+ 0,
+ };
+ quota_meta_t max_size = {
+ 0,
+ };
+ int readable_cnt = 0;
+ int read_subvol = -1;
+
+ local = frame->local;
+ priv = this->private;
+ replies = local->replies;
+
+ readable = alloca0(priv->child_count);
+
+ afr_inode_read_subvol_get(local->inode, this, readable, 0, 0);
+
+ readable_cnt = AFR_COUNT(readable, priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (readable_cnt && !readable[i])
+ continue;
+ if (!replies[i].xdata)
+ continue;
+ ret = quota_dict_get_meta(replies[i].xdata, QUOTA_SIZE_KEY,
+ SLEN(QUOTA_SIZE_KEY), &size);
+ if (ret == -1)
+ continue;
+ if (read_subvol == -1)
+ read_subvol = i;
+ if (size.size > max_size.size ||
+ (size.file_count + size.dir_count) >
+ (max_size.file_count + max_size.dir_count))
+ read_subvol = i;
+
+ if (size.size > max_size.size)
+ max_size.size = size.size;
+ if (size.file_count > max_size.file_count)
+ max_size.file_count = size.file_count;
+ if (size.dir_count > max_size.dir_count)
+ max_size.dir_count = size.dir_count;
+ }
+
+ if (max_size.size == 0 && max_size.file_count == 0 &&
+ max_size.dir_count == 0)
+ return read_subvol;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (readable_cnt && !readable[i])
+ continue;
+ if (!replies[i].xdata)
+ continue;
+ quota_dict_set_meta(replies[i].xdata, QUOTA_SIZE_KEY, &max_size,
+ IA_IFDIR);
+ }
+
+ return read_subvol;
+}
/* {{{ access */
-int32_t
-afr_access_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+int
+afr_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
+ afr_local_t *local = NULL;
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.access.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
-
- STACK_WIND_COOKIE (frame, afr_access_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->access,
- &local->loc, local->cont.access.mask,
- NULL);
- }
+ local = frame->local;
-out:
- if (unwind) {
- AFR_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
- }
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ afr_read_txn_continue(frame, this, (long)cookie);
return 0;
-}
-
+ }
-int32_t
-afr_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
- dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- int32_t op_errno = 0;
- int32_t read_child = -1;
- int ret = -1;
+ AFR_STACK_UNWIND(access, frame, op_ret, op_errno, xdata);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ return 0;
+}
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+int
+afr_access_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- children = priv->children;
+ priv = this->private;
+ local = frame->local;
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(access, frame, local->op_ret, local->op_errno, 0);
+ return 0;
+ }
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ STACK_WIND_COOKIE(frame, afr_access_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->access, &local->loc,
+ local->cont.access.mask, local->xdata_req);
+ return 0;
+}
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+int
+afr_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int mask,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int op_errno = 0;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- read_child = afr_inode_get_read_ctx (this, loc->inode,
- local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.access.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local->op = GF_FOP_ACCESS;
+ loc_copy(&local->loc, loc);
+ local->cont.access.mask = mask;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
- loc_copy (&local->loc, loc);
- local->cont.access.mask = mask;
+ afr_read_txn(frame, this, loc->inode, afr_access_wind,
+ AFR_METADATA_TRANSACTION);
- STACK_WIND_COOKIE (frame, afr_access_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->access,
- loc, mask, xdata);
-
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (access, frame, -1, op_errno, NULL);
- return 0;
-}
+ AFR_STACK_UNWIND(access, frame, -1, op_errno, NULL);
+ return 0;
+}
/* }}} */
/* {{{ stat */
-int32_t
-afr_stat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf, dict_t *xdata)
+int
+afr_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
+ afr_local_t *local = NULL;
- priv = this->private;
- children = priv->children;
+ local = frame->local;
- read_child = (long) cookie;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- local = frame->local;
-
- if (op_ret == -1) {
- last_index = &local->cont.stat.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
-
- STACK_WIND_COOKIE (frame, afr_stat_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->stat,
- &local->loc, NULL);
- }
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
-out:
- if (unwind) {
- AFR_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
- }
+ AFR_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ return 0;
}
-
-int32_t
-afr_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+int
+afr_stat_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- int32_t op_errno = 0;
- int32_t read_child = -1;
- int ret = -1;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ priv = this->private;
+ local = frame->local;
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno, 0, 0);
+ return 0;
+ }
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ STACK_WIND_COOKIE(
+ frame, afr_stat_cbk, (void *)(long)subvol, priv->children[subvol],
+ priv->children[subvol]->fops->stat, &local->loc, local->xdata_req);
+ return 0;
+}
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+int
+afr_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int op_errno = 0;
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- read_child = afr_inode_get_read_ctx (this, loc->inode,
- local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.stat.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- loc_copy (&local->loc, loc);
+ local->op = GF_FOP_STAT;
+ loc_copy(&local->loc, loc);
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
- STACK_WIND_COOKIE (frame, afr_stat_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->stat,
- loc, xdata);
+ afr_read_txn(frame, this, loc->inode, afr_stat_wind, AFR_DATA_TRANSACTION);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
/* }}} */
/* {{{ fstat */
-int32_t
-afr_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
+int
+afr_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
+ afr_local_t *local = NULL;
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.fstat.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
-
- STACK_WIND_COOKIE (frame, afr_fstat_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->fstat,
- local->fd, NULL);
- }
+ local = frame->local;
-out:
- if (unwind) {
- AFR_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
- }
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ afr_read_txn_continue(frame, this, (long)cookie);
return 0;
-}
-
-
-int32_t
-afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- int32_t op_errno = 0;
- int32_t read_child = 0;
- int ret = -1;
+ }
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (fd, out);
- VALIDATE_OR_GOTO (this->private, out);
+ AFR_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata);
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
-
- VALIDATE_OR_GOTO (fd->inode, out);
+ return 0;
+}
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+int
+afr_fstat_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ priv = this->private;
+ local = frame->local;
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno, 0, 0);
+ return 0;
+ }
- read_child = afr_inode_get_read_ctx (this, fd->inode,
- local->fresh_children);
+ STACK_WIND_COOKIE(
+ frame, afr_fstat_cbk, (void *)(long)subvol, priv->children[subvol],
+ priv->children[subvol]->fops->fstat, local->fd, local->xdata_req);
+ return 0;
+}
+int32_t
+afr_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int op_errno = 0;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.fstat.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local->op = GF_FOP_FSTAT;
+ local->fd = fd_ref(fd);
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
- local->fd = fd_ref (fd);
+ afr_fix_open(fd, this);
- ret = afr_open_fd_fix (frame, this, _gf_false);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
- STACK_WIND_COOKIE (frame, afr_fstat_cbk, (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->fstat,
- fd, xdata);
+ afr_read_txn(frame, this, fd->inode, afr_fstat_wind, AFR_DATA_TRANSACTION);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
+ AFR_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
/* }}} */
/* {{{ readlink */
-int32_t
-afr_readlink_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- const char *buf, struct iatt *sbuf, dict_t *xdata)
+int
+afr_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *buf,
+ struct iatt *sbuf, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
+ afr_local_t *local = NULL;
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.readlink.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_readlink_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->readlink,
- &local->loc,
- local->cont.readlink.size, NULL);
- }
+ local = frame->local;
-out:
- if (unwind) {
- AFR_STACK_UNWIND (readlink, frame, op_ret, op_errno, buf, sbuf,
- xdata);
- }
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ afr_read_txn_continue(frame, this, (long)cookie);
return 0;
-}
+ }
+ AFR_STACK_UNWIND(readlink, frame, op_ret, op_errno, buf, sbuf, xdata);
+ return 0;
+}
-int32_t
-afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size, dict_t *xdata)
+int
+afr_readlink_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- int32_t op_errno = 0;
- int32_t read_child = -1;
- int ret = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local = frame->local;
+ priv = this->private;
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(readlink, frame, local->op_ret, local->op_errno, 0, 0,
+ 0);
+ return 0;
+ }
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ STACK_WIND_COOKIE(frame, afr_readlink_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->readlink, &local->loc,
+ local->cont.readlink.size, local->xdata_req);
+ return 0;
+}
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
- read_child = afr_inode_get_read_ctx (this, loc->inode,
- local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readlink.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+int
+afr_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
- loc_copy (&local->loc, loc);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- local->cont.readlink.size = size;
+ local->op = GF_FOP_READLINK;
+ loc_copy(&local->loc, loc);
+ local->cont.readlink.size = size;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
- STACK_WIND_COOKIE (frame, afr_readlink_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readlink,
- loc, size, xdata);
+ afr_read_txn(frame, this, loc->inode, afr_readlink_wind,
+ AFR_DATA_TRANSACTION);
- ret = 0;
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
+ AFR_STACK_UNWIND(readlink, frame, -1, op_errno, 0, 0, 0);
+ return 0;
+}
/* }}} */
/* {{{ getxattr */
struct _xattr_key {
- char *key;
- struct list_head list;
+ char *key;
+ struct list_head list;
};
-
-void
-__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
- void *data)
+int
+__gather_xattr_keys(dict_t *dict, char *key, data_t *value, void *data)
{
- struct list_head * list = data;
- struct _xattr_key * xkey = NULL;
+ struct list_head *list = data;
+ struct _xattr_key *xkey = NULL;
- if (!strncmp (key, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
+ if (!strncmp(key, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX))) {
+ xkey = GF_MALLOC(sizeof(*xkey), gf_afr_mt_xattr_key);
+ if (!xkey)
+ return -1;
- xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
- if (!xkey)
- return;
+ xkey->key = key;
+ INIT_LIST_HEAD(&xkey->list);
- xkey->key = key;
- INIT_LIST_HEAD (&xkey->list);
-
- list_add_tail (&xkey->list, list);
- }
+ list_add_tail(&xkey->list, list);
+ }
+ return 0;
}
-
void
-__filter_xattrs (dict_t *dict)
+afr_filter_xattrs(dict_t *dict)
{
- struct list_head keys = {0,};
- struct _xattr_key *key = NULL;
- struct _xattr_key *tmp = NULL;
+ struct list_head keys = {
+ 0,
+ };
+ struct _xattr_key *key = NULL;
+ struct _xattr_key *tmp = NULL;
- INIT_LIST_HEAD (&keys);
+ INIT_LIST_HEAD(&keys);
- dict_foreach (dict, __gather_xattr_keys,
- (void *) &keys);
+ dict_foreach(dict, __gather_xattr_keys, (void *)&keys);
- list_for_each_entry_safe (key, tmp, &keys, list) {
- dict_del (dict, key->key);
+ list_for_each_entry_safe(key, tmp, &keys, list)
+ {
+ dict_del(dict, key->key);
- list_del_init (&key->list);
+ list_del_init(&key->list);
- GF_FREE (key);
- }
+ GF_FREE(key);
+ }
}
+static gf_boolean_t
+afr_getxattr_ignorable_errnos(int32_t op_errno)
+{
+ if (op_errno == ENODATA || op_errno == ENOTSUP || op_errno == ERANGE ||
+ op_errno == ENAMETOOLONG)
+ return _gf_true;
-
-int32_t
-afr_getxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+ return _gf_false;
+}
+int
+afr_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
+ afr_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.getxattr.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->getxattr,
- &local->loc,
- local->cont.getxattr.name,
- NULL);
- }
+ if (op_ret < 0 && !afr_getxattr_ignorable_errnos(op_errno)) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
-out:
- if (unwind) {
- if (op_ret >= 0 && dict)
- __filter_xattrs (dict);
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
- }
+ if (dict)
+ afr_filter_xattrs(dict);
+
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
+}
+int
+afr_getxattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno, NULL,
+ NULL);
return 0;
+ }
+
+ STACK_WIND_COOKIE(frame, afr_getxattr_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->getxattr, &local->loc,
+ local->cont.getxattr.name, local->xdata_req);
+ return 0;
}
int32_t
-afr_getxattr_unwind (call_frame_t *frame, int op_ret, int op_errno,
- dict_t *dict, dict_t *xdata)
+afr_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, dict_t *dict,
+ dict_t *xdata)
{
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
int32_t
-afr_getxattr_clrlk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_fgetxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- dict_t *xattr = NULL;
- char *tmp_report = NULL;
- char lk_summary[1024] = {0,};
- int serz_len = 0;
- int32_t callcnt = 0;
- long int cky = 0;
- int ret = 0;
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
- cky = (long) cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1)
- local->child_errno[cky] = op_errno;
-
- if (!local->dict)
- local->dict = dict_new ();
- if (local->dict) {
- ret = dict_get_str (dict, local->cont.getxattr.name,
- &tmp_report);
- if (ret)
- goto unlock;
- ret = dict_set_dynstr (local->dict,
- children[cky]->name,
- gf_strdup (tmp_report));
- if (ret)
- goto unlock;
- }
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {
+ 0,
+ };
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+ int keylen = 0;
+ int children_keylen = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long)cookie;
+ keylen = strlen(local->cont.getxattr.name);
+ children_keylen = strlen(children[cky]->name);
+
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->replies[cky].op_errno = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new();
+ if (local->dict) {
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstrn(local->dict, children[cky]->name,
+ children_keylen, gf_strdup(tmp_report));
+ if (ret)
+ goto unlock;
}
+ }
unlock:
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- xattr = dict_new ();
- if (!xattr) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
- ret = dict_serialize_value_with_delim (local->dict,
- lk_summary,
- &serz_len, '\n');
- if (ret) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Error serializing dictionary");
- goto unwind;
- }
- if (serz_len == -1)
- snprintf (lk_summary, sizeof (lk_summary),
- "No locks cleared.");
- ret = dict_set_dynstr (xattr, local->cont.getxattr.name,
- gf_strdup (lk_summary));
- if (ret) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR,
- "Error setting dictionary");
- goto unwind;
- }
+ UNLOCK(&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim(local->dict, lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf(lk_summary, sizeof(lk_summary), "No locks cleared.");
+ ret = dict_set_dynstrn(xattr, local->cont.getxattr.name, keylen,
+ gf_strdup(lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_DICT_SET_FAILED,
+ "Error setting dictionary");
+ goto unwind;
+ }
+
+ op_errno = afr_final_errno(local, priv);
+
+ unwind:
+ AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, xattr, xdata);
+ if (xattr)
+ dict_unref(xattr);
+ }
- unwind:
- // Updating child_errno with more recent 'events'
- local->child_errno[cky] = op_errno;
- op_errno = afr_resultant_errno_get (NULL, local->child_errno,
- priv->child_count);
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+ return ret;
+}
- if (xattr)
- dict_unref (xattr);
+int32_t
+afr_getxattr_clrlk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t **children = NULL;
+ dict_t *xattr = NULL;
+ char *tmp_report = NULL;
+ char lk_summary[1024] = {
+ 0,
+ };
+ int serz_len = 0;
+ int32_t callcnt = 0;
+ long int cky = 0;
+ int ret = 0;
+ int keylen = 0;
+ int children_keylen = 0;
+
+ priv = this->private;
+ children = priv->children;
+
+ local = frame->local;
+ cky = (long)cookie;
+
+ keylen = strlen(local->cont.getxattr.name);
+ children_keylen = strlen(children[cky]->name);
+
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret == -1)
+ local->replies[cky].op_errno = op_errno;
+
+ if (!local->dict)
+ local->dict = dict_new();
+ if (local->dict) {
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen,
+ &tmp_report);
+ if (ret)
+ goto unlock;
+ ret = dict_set_dynstrn(local->dict, children[cky]->name,
+ children_keylen, gf_strdup(tmp_report));
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ if (!callcnt) {
+ xattr = dict_new();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ ret = dict_serialize_value_with_delim(local->dict, lk_summary,
+ &serz_len, '\n');
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ if (serz_len == -1)
+ snprintf(lk_summary, sizeof(lk_summary), "No locks cleared.");
+ ret = dict_set_dynstrn(xattr, local->cont.getxattr.name, keylen,
+ gf_strdup(lk_summary));
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_DICT_SET_FAILED,
+ "Error setting dictionary");
+ goto unwind;
}
- return ret;
+ op_errno = afr_final_errno(local, priv);
+
+ unwind:
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata);
+
+ if (xattr)
+ dict_unref(xattr);
+ }
+
+ return ret;
}
/**
* node-uuid cbk uses next child querying mechanism
*/
int32_t
-afr_getxattr_node_uuid_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_getxattr_node_uuid_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int unwind = 1;
- int curr_call_child = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int unwind = 1;
+ int curr_call_child = 0;
- priv = this->private;
- children = priv->children;
+ priv = this->private;
+ children = priv->children;
- local = frame->local;
+ local = frame->local;
+
+ if (op_ret == -1) { /** query the _next_ child */
+
+ /**
+ * _current_ becomes _next_
+ * If done with all children and yet no success; give up !
+ */
+ curr_call_child = (int)((long)cookie);
+ if (++curr_call_child == priv->child_count)
+ goto unwind;
+
+ gf_msg_debug(this->name, op_errno,
+ "op_ret (-1): Re-querying afr-child (%d/%d)",
+ curr_call_child, priv->child_count);
+
+ unwind = 0;
+ STACK_WIND_COOKIE(
+ frame, afr_getxattr_node_uuid_cbk, (void *)(long)curr_call_child,
+ children[curr_call_child],
+ children[curr_call_child]->fops->getxattr, &local->loc,
+ local->cont.getxattr.name, local->xdata_req);
+ }
+
+unwind:
+ if (unwind)
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
+}
+
+/**
+ * list-node-uuids cbk returns the list of node_uuids for the subvolume.
+ */
+int32_t
+afr_getxattr_list_node_uuids_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr_serz = NULL;
+ long cky = 0;
+ int32_t tlen = 0;
+
+ local = frame->local;
+ priv = this->private;
+ cky = (long)cookie;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+ local->replies[cky].valid = 1;
+ local->replies[cky].op_ret = op_ret;
+ local->replies[cky].op_errno = op_errno;
+
+ if (op_ret < 0)
+ goto unlock;
+
+ local->op_ret = 0;
+
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ local->replies[cky].xattr = dict_ref(dict);
+ }
+
+unlock:
+ UNLOCK(&frame->lock);
- if (op_ret == -1) { /** query the _next_ child */
-
- /**
- * _current_ becomes _next_
- * If done with all childs and yet no success; give up !
- */
- curr_call_child = (int) ((long)cookie);
- if (++curr_call_child == priv->child_count)
- goto unwind;
-
- gf_log (this->name, GF_LOG_WARNING,
- "op_ret (-1): Re-querying afr-child (%d/%d)",
- curr_call_child, priv->child_count);
-
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
- (void *) (long) curr_call_child,
- children[curr_call_child],
- children[curr_call_child]->fops->getxattr,
- &local->loc,
- local->cont.getxattr.name,
- NULL);
+ if (!callcnt) {
+ if (local->op_ret != 0) {
+ /* All bricks gave an error. */
+ local->op_errno = afr_final_errno(local, priv);
+ goto unwind;
}
- unwind:
- if (unwind)
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
+ /*Since we store the UUID0_STR as node uuid for down bricks and
+ *for non zero op_ret, assigning length to priv->child_count
+ *number of uuids*/
+ local->cont.getxattr.xattr_len = (SLEN(UUID0_STR) + 2) *
+ priv->child_count;
+
+ if (!local->dict)
+ local->dict = dict_new();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
- return 0;
+ xattr_serz = GF_CALLOC(local->cont.getxattr.xattr_len, sizeof(char),
+ gf_common_mt_char);
+
+ if (!xattr_serz) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ ret = afr_serialize_xattrs_with_delimiter(frame, this, xattr_serz,
+ UUID0_STR, &tlen, ' ');
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ GF_FREE(xattr_serz);
+ goto unwind;
+ }
+ ret = dict_set_dynstr_sizen(local->dict, GF_XATTR_LIST_NODE_UUIDS_KEY,
+ xattr_serz);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set node_uuid key in dict");
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ if (ret == -EINVAL)
+ GF_FREE(xattr_serz);
+ } else {
+ local->op_ret = local->cont.getxattr.xattr_len - 1;
+ local->op_errno = 0;
+ }
+
+ unwind:
+ AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno,
+ local->dict, local->xdata_rsp);
+ }
+
+ return ret;
}
int32_t
-afr_getxattr_pathinfo_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_getxattr_quota_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- int32_t callcnt = 0;
- int ret = 0;
- char *xattr = NULL;
- char *xattr_serz = NULL;
- char xattr_cky[1024] = {0,};
- dict_t *nxattr = NULL;
- long cky = 0;
- int32_t padding = 0;
- int32_t tlen = 0;
-
- if (!frame || !frame->local || !this) {
- gf_log ("", GF_LOG_ERROR, "possible NULL deref");
- goto out;
+ int idx = (long)cookie;
+ int call_count = 0;
+ afr_local_t *local = frame->local;
+ int read_subvol = -1;
+
+ local->replies[idx].valid = 1;
+ local->replies[idx].op_ret = op_ret;
+ local->replies[idx].op_errno = op_errno;
+ if (dict)
+ local->replies[idx].xdata = dict_ref(dict);
+ call_count = afr_frame_return(frame);
+ if (call_count == 0) {
+ local->inode = inode_ref(local->loc.inode);
+ read_subvol = afr_handle_quota_size(frame, this);
+ if (read_subvol != -1) {
+ op_ret = local->replies[read_subvol].op_ret;
+ op_errno = local->replies[read_subvol].op_errno;
+ dict = local->replies[read_subvol].xdata;
}
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+ }
+
+ return 0;
+}
+
+int32_t
+afr_getxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
+ LOCK(&frame->lock);
+ {
local = frame->local;
- cky = (long) cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (!dict || (op_ret < 0))
- goto out;
-
- if (!local->dict)
- local->dict = dict_new ();
-
- if (local->dict) {
- ret = dict_get_str (dict,
- local->cont.getxattr.name,
- &xattr);
- if (ret)
- goto out;
-
- xattr = gf_strdup (xattr);
-
- (void)snprintf (xattr_cky, 1024, "%s-%ld",
- local->cont.getxattr.name, cky);
- ret = dict_set_dynstr (local->dict,
- xattr_cky, xattr);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Cannot set xattr cookie key");
- goto out;
- }
-
- local->cont.getxattr.xattr_len += strlen (xattr) + 1;
- }
- }
- out:
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (!local->cont.getxattr.xattr_len)
- goto unwind;
-
- nxattr = dict_new ();
- if (!nxattr)
- goto unwind;
-
- /* extra bytes for decorations (brackets and <>'s) */
- padding += strlen (this->name) + strlen (AFR_PATHINFO_HEADER) + 4;
- local->cont.getxattr.xattr_len += (padding + 2);
-
- xattr_serz = GF_CALLOC (local->cont.getxattr.xattr_len,
- sizeof (char), gf_common_mt_char);
-
- if (!xattr_serz)
- goto unwind;
-
- /* the xlator info */
- (void) sprintf (xattr_serz, "(<"AFR_PATHINFO_HEADER"%s> ",
- this->name);
-
- /* actual series of pathinfo */
- ret = dict_serialize_value_with_delim (local->dict,
- xattr_serz + strlen (xattr_serz),
- &tlen, ' ');
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Error serializing"
- " dictionary");
- goto unwind;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
+ }
+
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
}
+ }
+ }
+
+ if (!dict) {
+ goto unlock;
+ }
- /* closing part */
- *(xattr_serz + padding + tlen) = ')';
- *(xattr_serz + padding + tlen + 1) = '\0';
+ op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
- ret = dict_set_dynstr (nxattr, local->cont.getxattr.name,
- xattr_serz);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Cannot set pathinfo"
- " key in dict");
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
+
+ if (!local->dict) {
+ local->dict = dict_new();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy(lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy(xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
- unwind:
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, nxattr,
- xdata);
+ op_ret = dict_allocate_and_serialize(
+ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
+ if (op_ret != 0) {
+ local->op_ret = -1;
+ goto unwind;
+ }
- if (nxattr)
- dict_unref (nxattr);
+ op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
}
- return ret;
+ unwind:
+ AFR_STACK_UNWIND(getxattr, frame, op_ret, op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref(lockinfo);
+
+ return 0;
}
-static gf_boolean_t
-afr_is_special_xattr (const char *name, fop_getxattr_cbk_t *cbk)
+int32_t
+afr_fgetxattr_lockinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- gf_boolean_t is_spl = _gf_true;
+ int call_cnt = 0, len = 0;
+ char *lockinfo_buf = NULL;
+ dict_t *lockinfo = NULL, *newdict = NULL;
+ afr_local_t *local = NULL;
- GF_ASSERT (cbk);
- if (!cbk) {
- is_spl = _gf_false;
- goto out;
+ LOCK(&frame->lock);
+ {
+ local = frame->local;
+
+ call_cnt = --local->call_count;
+
+ if ((op_ret < 0) || (!dict && !xdata)) {
+ goto unlock;
}
- if (!strcmp (name, GF_XATTR_PATHINFO_KEY))
- *cbk = afr_getxattr_pathinfo_cbk;
+ if (xdata) {
+ if (!local->xdata_rsp) {
+ local->xdata_rsp = dict_new();
+ if (!local->xdata_rsp) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
- else if (!strncmp (name, GF_XATTR_CLRLK_CMD,
- strlen (GF_XATTR_CLRLK_CMD)))
- *cbk = afr_getxattr_clrlk_cbk;
- else
- is_spl = _gf_false;
+ if (!dict) {
+ goto unlock;
+ }
-out:
- return is_spl;
-}
+ op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
+ (void **)&lockinfo_buf, &len);
-static void
-afr_getxattr_frm_all_children (xlator_t *this, call_frame_t *frame,
- const char *name, loc_t *loc,
- fop_getxattr_cbk_t cbk)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int i = 0;
+ if (!lockinfo_buf) {
+ goto unlock;
+ }
- priv = this->private;
- children = priv->children;
+ if (!local->dict) {
+ local->dict = dict_new();
+ if (!local->dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ if (lockinfo_buf != NULL) {
+ lockinfo = dict_new();
+ if (lockinfo == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ } else {
+ op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
+
+ if (lockinfo && local->dict) {
+ dict_copy(lockinfo, local->dict);
+ }
+ }
+ }
+
+ if (xdata && local->xdata_rsp) {
+ dict_copy(xdata, local->xdata_rsp);
+ }
+
+ if (!call_cnt) {
+ newdict = dict_new();
+ if (!newdict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
- local = frame->local;
- local->call_count = priv->child_count;
+ op_ret = dict_allocate_and_serialize(
+ local->dict, (char **)&lockinfo_buf, (unsigned int *)&len);
+ if (op_ret != 0) {
+ local->op_ret = -1;
+ goto unwind;
+ }
- for (i = 0; i < priv->child_count; i++) {
- STACK_WIND_COOKIE (frame, cbk,
- (void *) (long) i,
- children[i], children[i]->fops->getxattr,
- loc, name, NULL);
+ op_ret = dict_set_dynptr(newdict, GF_XATTR_LOCKINFO_KEY,
+ (void *)lockinfo_buf, len);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto unwind;
}
- return;
+
+ unwind:
+ AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, newdict,
+ local->xdata_rsp);
+ }
+
+ dict_unref(lockinfo);
+
+ return 0;
}
int32_t
-afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
+afr_fgetxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- xlator_t **sub_volumes = NULL;
- int i = 0;
- int32_t op_errno = 0;
- int32_t read_child = -1;
- int ret = -1;
- fop_getxattr_cbk_t cbk = NULL;
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ int keylen = 0;
+ char xattr_cky[1024] = {
+ 0,
+ };
+ int xattr_cky_len = 0;
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_msg("", GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long)cookie;
+ keylen = strlen(local->cont.getxattr.name);
+ xattr_cky_len = snprintf(xattr_cky, sizeof(xattr_cky), "%s-%ld",
+ local->cont.getxattr.name, cky);
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
+ }
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (!dict || (op_ret < 0))
+ goto unlock;
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ if (!local->dict) {
+ local->dict = dict_new();
+ if (!local->dict)
+ goto unlock;
+ }
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen, &xattr);
+ if (ret)
+ goto unlock;
- children = priv->children;
+ xattr = gf_strdup(xattr);
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ ret = dict_set_dynstrn(local->dict, xattr_cky, xattr_cky_len, xattr);
+ if (ret) {
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set xattr cookie key");
+ goto post_unlock;
+ }
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->cont.getxattr.xattr_len += strlen(xattr) + 1;
+ }
+unlock:
+ UNLOCK(&frame->lock);
+post_unlock:
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
- loc_copy (&local->loc, loc);
- if (!name)
- goto no_name;
+ nxattr = dict_new();
+ if (!nxattr)
+ goto unwind;
- local->cont.getxattr.name = gf_strdup (name);
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen(this->name) + SLEN(AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
- if (!strncmp (name, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: no data present for key %s",
- loc->path, name);
- op_errno = ENODATA;
- goto out;
- }
- if ((strcmp (GF_XATTR_MARKER_KEY, name) == 0)
- && (-1 == frame->root->pid)) {
+ xattr_serz = GF_MALLOC(local->cont.getxattr.xattr_len,
+ gf_common_mt_char);
- local->marker.call_count = priv->child_count;
+ if (!xattr_serz)
+ goto unwind;
- sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
- for (i = 0, trav = this->children; trav ;
- trav = trav->next, i++) {
+ /* the xlator info */
+ int xattr_serz_len = sprintf(
+ xattr_serz, "(<" AFR_PATHINFO_HEADER "%s> ", this->name);
- *(sub_volumes + i) = trav->xlator;
- }
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim(
+ local->dict, xattr_serz + xattr_serz_len, &tlen, ' ');
+ if (ret) {
+ GF_FREE(xattr_serz);
+ goto unwind;
+ }
- if (cluster_getmarkerattr (frame, this, loc, name,
- local, afr_getxattr_unwind,
- sub_volumes,
- priv->child_count,
- MARKER_UUID_TYPE,
- priv->vol_uuid)) {
-
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to get marker attr (%s)",
- loc->path, name);
- op_errno = EINVAL;
- goto out;
- }
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
- return 0;
+ ret = dict_set_dynstrn(nxattr, local->cont.getxattr.name, keylen,
+ xattr_serz);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set pathinfo key in dict");
+ if (ret == -EINVAL)
+ GF_FREE(xattr_serz);
}
- /*
- * if we are doing getxattr with pathinfo as the key then we
- * collect information from all childs
- */
- if (afr_is_special_xattr (name, &cbk)) {
- afr_getxattr_frm_all_children (this, frame, name,
- loc, cbk);
- return 0;
+ unwind:
+ AFR_STACK_UNWIND(fgetxattr, frame, local->op_ret, local->op_errno,
+ nxattr, local->xdata_rsp);
+
+ if (nxattr)
+ dict_unref(nxattr);
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+afr_getxattr_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
+ int ret = 0;
+ char *xattr = NULL;
+ char *xattr_serz = NULL;
+ char xattr_cky[1024] = {
+ 0,
+ };
+ int keylen = 0;
+ int xattr_cky_len = 0;
+ dict_t *nxattr = NULL;
+ long cky = 0;
+ int32_t padding = 0;
+ int32_t tlen = 0;
+
+ if (!frame || !frame->local || !this) {
+ gf_msg("", GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+ cky = (long)cookie;
+ keylen = strlen(local->cont.getxattr.name);
+ xattr_cky_len = snprintf(xattr_cky, sizeof(xattr_cky), "%s-%ld",
+ local->cont.getxattr.name, cky);
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = op_ret;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
}
- if (XATTR_IS_NODE_UUID (name)) {
- i = 0;
- STACK_WIND_COOKIE (frame, afr_getxattr_node_uuid_cbk,
- (void *) (long) i,
- children[i],
- children[i]->fops->getxattr,
- loc, name, xdata);
- return 0;
+ if (!dict || (op_ret < 0))
+ goto unlock;
+
+ if (!local->dict) {
+ local->dict = dict_new();
+ if (!local->dict)
+ goto unlock;
}
+ ret = dict_get_strn(dict, local->cont.getxattr.name, keylen, &xattr);
+ if (ret)
+ goto unlock;
- if (*priv->vol_uuid) {
- if ((match_uuid_local (name, priv->vol_uuid) == 0)
- && (-1 == frame->root->pid)) {
- local->marker.call_count = priv->child_count;
-
- sub_volumes = alloca ( priv->child_count * sizeof (xlator_t *));
- for (i = 0, trav = this->children; trav ;
- trav = trav->next, i++) {
-
- *(sub_volumes + i) = trav->xlator;
-
- }
-
- if (cluster_getmarkerattr (frame, this, loc,
- name, local,
- afr_getxattr_unwind,
- sub_volumes,
- priv->child_count,
- MARKER_XTIME_TYPE,
- priv->vol_uuid)) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to get marker attr (%s)",
- loc->path, name);
- op_errno = EINVAL;
- goto out;
- }
-
- return 0;
- }
+ xattr = gf_strdup(xattr);
+
+ ret = dict_set_dynstrn(local->dict, xattr_cky, xattr_cky_len, xattr);
+ if (ret) {
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set xattr cookie key");
+ goto post_unlock;
}
-no_name:
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
+ local->cont.getxattr.xattr_len += strlen(xattr) + 1;
+ }
+unlock:
+ UNLOCK(&frame->lock);
+post_unlock:
+ if (!callcnt) {
+ if (!local->cont.getxattr.xattr_len)
+ goto unwind;
+
+ nxattr = dict_new();
+ if (!nxattr)
+ goto unwind;
+
+ /* extra bytes for decorations (brackets and <>'s) */
+ padding += strlen(this->name) + SLEN(AFR_PATHINFO_HEADER) + 4;
+ local->cont.getxattr.xattr_len += (padding + 2);
+
+ xattr_serz = GF_MALLOC(local->cont.getxattr.xattr_len,
+ gf_common_mt_char);
+
+ if (!xattr_serz)
+ goto unwind;
+
+ /* the xlator info */
+ int xattr_serz_len = sprintf(
+ xattr_serz, "(<" AFR_PATHINFO_HEADER "%s> ", this->name);
+
+ /* actual series of pathinfo */
+ ret = dict_serialize_value_with_delim(
+ local->dict, xattr_serz + xattr_serz_len, &tlen, ' ');
+ if (ret) {
+ GF_FREE(xattr_serz);
+ goto unwind;
}
- read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.getxattr.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
+ /* closing part */
+ *(xattr_serz + padding + tlen) = ')';
+ *(xattr_serz + padding + tlen + 1) = '\0';
+
+ ret = dict_set_dynstrn(nxattr, local->cont.getxattr.name, keylen,
+ xattr_serz);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Cannot set pathinfo key in dict");
+ if (ret == -EINVAL)
+ GF_FREE(xattr_serz);
}
- STACK_WIND_COOKIE (frame, afr_getxattr_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->getxattr,
- loc, name, xdata);
+ unwind:
+ AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno,
+ nxattr, local->xdata_rsp);
+
+ if (nxattr)
+ dict_unref(nxattr);
+ }
- ret = 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return ret;
}
-/* {{{ fgetxattr */
+static int
+afr_aggregate_stime_xattr(dict_t *this, char *key, data_t *value, void *data)
+{
+ int ret = 0;
+
+ if (fnmatch(GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0)
+ ret = gf_get_max_stime(THIS, data, key, value);
+ return ret;
+}
int32_t
-afr_fgetxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+afr_common_getxattr_stime_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
- priv = this->private;
- children = priv->children;
+ afr_local_t *local = NULL;
+ int32_t callcnt = 0;
- local = frame->local;
+ if (!frame || !frame->local || !this) {
+ gf_msg("", GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "possible NULL deref");
+ goto out;
+ }
+
+ local = frame->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (!dict || (op_ret < 0)) {
+ local->op_errno = op_errno;
+ goto cleanup;
+ }
+
+ if (!local->dict)
+ local->dict = dict_copy_with_ref(dict, NULL);
+ else
+ dict_foreach(dict, afr_aggregate_stime_xattr, local->dict);
+ local->op_ret = 0;
+ }
+
+cleanup:
+ UNLOCK(&frame->lock);
+
+ if (!callcnt) {
+ AFR_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno,
+ local->dict, xdata);
+ }
+
+out:
+ return 0;
+}
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.getxattr.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
- STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->fgetxattr,
- local->fd,
- local->cont.getxattr.name,
- NULL);
+static gf_boolean_t
+afr_is_special_xattr(const char *name, fop_getxattr_cbk_t *cbk,
+ gf_boolean_t is_fgetxattr)
+{
+ gf_boolean_t is_spl = _gf_true;
+
+ GF_ASSERT(cbk);
+ if (!cbk || !name) {
+ is_spl = _gf_false;
+ goto out;
+ }
+
+ if (!strcmp(name, GF_XATTR_PATHINFO_KEY) ||
+ !strcmp(name, GF_XATTR_USER_PATHINFO_KEY)) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_pathinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_pathinfo_cbk;
+ }
+ } else if (!strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_clrlk_cbk;
+ } else {
+ *cbk = afr_getxattr_clrlk_cbk;
}
+ } else if (!strncmp(name, GF_XATTR_LOCKINFO_KEY,
+ SLEN(GF_XATTR_LOCKINFO_KEY))) {
+ if (is_fgetxattr) {
+ *cbk = afr_fgetxattr_lockinfo_cbk;
+ } else {
+ *cbk = afr_getxattr_lockinfo_cbk;
+ }
+ } else if (fnmatch(GF_XATTR_STIME_PATTERN, name, FNM_NOESCAPE) == 0) {
+ *cbk = afr_common_getxattr_stime_cbk;
+ } else if (strcmp(name, QUOTA_SIZE_KEY) == 0) {
+ *cbk = afr_getxattr_quota_size_cbk;
+ } else if (!strcmp(name, GF_XATTR_LIST_NODE_UUIDS_KEY)) {
+ *cbk = afr_getxattr_list_node_uuids_cbk;
+ } else {
+ is_spl = _gf_false;
+ }
out:
- if (unwind) {
- if (op_ret >= 0 && dict)
- __filter_xattrs (dict);
+ return is_spl;
+}
- AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict,
- xdata);
+static void
+afr_getxattr_all_subvols(xlator_t *this, call_frame_t *frame, const char *name,
+ loc_t *loc, fop_getxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ priv = this->private;
+
+ local = frame->local;
+ // local->call_count set in afr_local_init
+ call_count = local->call_count;
+
+ if (!strcmp(name, GF_XATTR_LIST_NODE_UUIDS_KEY)) {
+ GF_FREE(local->cont.getxattr.name);
+ local->cont.getxattr.name = gf_strdup(GF_XATTR_NODE_UUID_KEY);
+ }
+
+ // If up-children count is 0, afr_local_init would have failed already
+ // and the call would have unwound so not handling it here.
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->getxattr, loc,
+ local->cont.getxattr.name, NULL);
+ if (!--call_count)
+ break;
}
+ }
+ return;
+}
- return 0;
+int
+afr_marker_populate_args(call_frame_t *frame, int type, int *gauge,
+ xlator_t **subvols)
+{
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+
+ memcpy(subvols, priv->children, sizeof(*subvols) * priv->child_count);
+
+ if (type == MARKER_XTIME_TYPE) {
+ /*Don't error out on ENOENT/ENOTCONN */
+ gauge[MCNT_NOTFOUND] = 0;
+ gauge[MCNT_ENOTCONN] = 0;
+ }
+ return priv->child_count;
}
-int32_t
-afr_fgetxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+static int
+afr_handle_heal_xattrs(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *heal_op)
+{
+ int ret = -1;
+ afr_spb_status_t *data = NULL;
+
+ if (!strcmp(heal_op, GF_HEAL_INFO)) {
+ afr_get_heal_info(frame, this, loc);
+ ret = 0;
+ goto out;
+ }
+
+ if (!strcmp(heal_op, GF_AFR_HEAL_SBRAIN)) {
+ afr_heal_splitbrain_file(frame, this, loc);
+ ret = 0;
+ goto out;
+ }
+
+ if (!strcmp(heal_op, GF_AFR_SBRAIN_STATUS)) {
+ data = GF_CALLOC(1, sizeof(*data), gf_afr_mt_spb_status_t);
+ if (!data) {
+ ret = 1;
+ goto out;
+ }
+ data->frame = frame;
+ data->loc = loc;
+ ret = synctask_new(this->ctx->env, afr_get_split_brain_status,
+ afr_get_split_brain_status_cbk, NULL, data);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS,
+ "Failed to create"
+ " synctask. Unable to fetch split-brain status"
+ " for %s.",
+ loc->name);
+ ret = 1;
+ goto out;
+ }
+ goto out;
+ }
+
+out:
+ if (ret == 1) {
+ AFR_STACK_UNWIND(getxattr, frame, -1, ENOMEM, NULL, NULL);
+ if (data)
+ GF_FREE(data);
+ ret = 0;
+ }
+ return ret;
+}
+int32_t
+afr_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata)
{
- AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t **children = NULL;
+ int i = 0;
+ int32_t op_errno = 0;
+ int ret = -1;
+ fop_getxattr_cbk_t cbk = NULL;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ priv = this->private;
+
+ children = priv->children;
+
+ loc_copy(&local->loc, loc);
+
+ local->op = GF_FOP_GETXATTR;
+
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ if (!name)
+ goto no_name;
+
+ local->cont.getxattr.name = gf_strdup(name);
+
+ if (!local->cont.getxattr.name) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (!strncmp(name, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX))) {
+ op_errno = ENODATA;
+ goto out;
+ }
+
+ if (cluster_handle_marker_getxattr(frame, loc, name, priv->vol_uuid,
+ afr_getxattr_unwind,
+ afr_marker_populate_args) == 0)
+ return 0;
+
+ ret = afr_handle_heal_xattrs(frame, this, &local->loc, name);
+ if (ret == 0)
+ return 0;
+
+ /*
+ * Heal daemons don't have IO threads ... and as a result they
+ * send this getxattr down and eventually crash :(
+ */
+ op_errno = -1;
+ GF_CHECK_XATTR_KEY_AND_GOTO(name, IO_THREADS_QUEUE_SIZE_KEY, op_errno, out);
+
+ /*
+ * Special xattrs which need responses from all subvols
+ */
+ if (afr_is_special_xattr(name, &cbk, 0)) {
+ afr_getxattr_all_subvols(this, frame, name, loc, cbk);
+ return 0;
+ }
+
+ if (XATTR_IS_NODE_UUID(name)) {
+ i = 0;
+ STACK_WIND_COOKIE(frame, afr_getxattr_node_uuid_cbk, (void *)(long)i,
+ children[i], children[i]->fops->getxattr, loc, name,
+ xdata);
return 0;
+ }
+
+no_name:
+
+ afr_read_txn(frame, this, local->loc.inode, afr_getxattr_wind,
+ AFR_METADATA_TRANSACTION);
+
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
+/* {{{ fgetxattr */
+
int32_t
-afr_fgetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata)
+afr_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- xlator_t **children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int32_t read_child = -1;
+ afr_local_t *local = NULL;
+ local = frame->local;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
- children = priv->children;
+ if (dict)
+ afr_filter_xattrs(dict);
- AFR_LOCAL_ALLOC_OR_GOTO (local, out);
- frame->local = local;
+ AFR_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata);
- op_ret = afr_local_init (local, priv, &op_errno);
- if (op_ret < 0) {
- op_errno = -op_ret;
- goto out;
- }
+ return 0;
+}
+
+int
+afr_fgetxattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local->fd = fd_ref (fd);
- if (name)
- local->cont.getxattr.name = gf_strdup (name);
+ local = frame->local;
+ priv = this->private;
- /* pathinfo gets handled only in getxattr() */
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(fgetxattr, frame, local->op_ret, local->op_errno, NULL,
+ NULL);
+ return 0;
+ }
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ STACK_WIND_COOKIE(frame, afr_fgetxattr_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fgetxattr, local->fd,
+ local->cont.getxattr.name, local->xdata_req);
+ return 0;
+}
- read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);
- op_ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.getxattr.last_index);
- if (op_ret < 0) {
- op_errno = -op_ret;
- op_ret = -1;
- goto out;
+static void
+afr_fgetxattr_all_subvols(xlator_t *this, call_frame_t *frame,
+ fop_fgetxattr_cbk_t cbk)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+
+ priv = this->private;
+
+ local = frame->local;
+ // local->call_count set in afr_local_init
+ call_count = local->call_count;
+
+ // If up-children count is 0, afr_local_init would have failed already
+ // and the call would have unwound so not handling it here.
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, cbk, (void *)(long)i, priv->children[i],
+ priv->children[i]->fops->fgetxattr, local->fd,
+ local->cont.getxattr.name, NULL);
+ if (!--call_count)
+ break;
}
+ }
- STACK_WIND_COOKIE (frame, afr_fgetxattr_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->fgetxattr,
- fd, name, xdata);
+ return;
+}
- op_ret = 0;
-out:
- if (op_ret == -1) {
- AFR_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno, NULL, NULL);
+int
+afr_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+ fop_fgetxattr_cbk_t cbk = NULL;
+
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_FGETXATTR;
+ local->fd = fd_ref(fd);
+ if (name) {
+ local->cont.getxattr.name = gf_strdup(name);
+ if (!local->cont.getxattr.name) {
+ op_errno = ENOMEM;
+ goto out;
}
+ }
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ /* pathinfo gets handled only in getxattr(), but we need to handle
+ * lockinfo.
+ * If we are doing fgetxattr with lockinfo as the key then we
+ * collect information from all children.
+ */
+ if (afr_is_special_xattr(name, &cbk, 1)) {
+ afr_fgetxattr_all_subvols(this, frame, cbk);
return 0;
-}
+ }
+
+ afr_fix_open(fd, this);
+ afr_read_txn(frame, this, fd->inode, afr_fgetxattr_wind,
+ AFR_METADATA_TRANSACTION);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
/* }}} */
/* {{{ readv */
-/**
- * read algorithm:
- *
- * if the user has specified a read subvolume, use it
- * otherwise -
- * use the inode number to hash it to one of the subvolumes, and
- * read from there (to balance read load)
- *
- * if any of the above read's fail, try the children in sequence
- * beginning at the beginning
- */
+int
+afr_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *buf, struct iobref *iobref, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
-int32_t
-afr_readv_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref, dict_t *xdata)
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
+
+ AFR_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, buf, iobref,
+ xdata);
+ return 0;
+}
+
+int
+afr_readv_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t *fresh_children = NULL;
- int32_t read_child = -1;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ local = frame->local;
+ priv = this->private;
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, 0, 0, 0,
+ 0, 0);
+ return 0;
+ }
- children = priv->children;
+ STACK_WIND_COOKIE(
+ frame, afr_readv_cbk, (void *)(long)subvol, priv->children[subvol],
+ priv->children[subvol]->fops->readv, local->fd, local->cont.readv.size,
+ local->cont.readv.offset, local->cont.readv.flags, local->xdata_req);
+ return 0;
+}
- local = frame->local;
+int
+afr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.readv.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
-
- STACK_WIND_COOKIE (frame, afr_readv_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->readv,
- local->fd, local->cont.readv.size,
- local->cont.readv.offset,
- local->cont.readv.flags,
- NULL);
- }
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_READ;
+ local->fd = fd_ref(fd);
+ local->cont.readv.size = size;
+ local->cont.readv.offset = offset;
+ local->cont.readv.flags = flags;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ afr_fix_open(fd, this);
+ afr_read_txn(frame, this, fd->inode, afr_readv_wind, AFR_DATA_TRANSACTION);
+
+ return 0;
out:
- if (unwind) {
- AFR_STACK_UNWIND (readv, frame, op_ret, op_errno,
- vector, count, buf, iobref, xdata);
- }
+ AFR_STACK_UNWIND(readv, frame, -1, op_errno, 0, 0, 0, 0, 0);
- return 0;
+ return 0;
}
+/* }}} */
-int32_t
-afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+/* {{{ seek */
+
+int
+afr_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, off_t offset, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- int32_t op_errno = 0;
- int32_t read_child = -1;
- int ret = -1;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (fd, out);
-
- priv = this->private;
- children = priv->children;
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ afr_local_t *local = NULL;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = frame->local;
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children) {
- op_errno = ENOMEM;
- goto out;
- }
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- read_child = afr_inode_get_read_ctx (this, fd->inode, local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.readv.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ afr_read_txn_continue(frame, this, (long)cookie);
+ return 0;
+ }
- local->fd = fd_ref (fd);
+ AFR_STACK_UNWIND(seek, frame, op_ret, op_errno, offset, xdata);
+ return 0;
+}
- local->cont.readv.size = size;
- local->cont.readv.offset = offset;
- local->cont.readv.flags = flags;
+int
+afr_seek_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- ret = afr_open_fd_fix (frame, this, _gf_false);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
- STACK_WIND_COOKIE (frame, afr_readv_cbk,
- (void *) (long) call_child,
- children[call_child],
- children[call_child]->fops->readv,
- fd, size, offset, flags, xdata);
+ local = frame->local;
+ priv = this->private;
- ret = 0;
-out:
- if (ret < 0) {
- AFR_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL,
- NULL, NULL);
- }
+ if (subvol == -1) {
+ AFR_STACK_UNWIND(seek, frame, local->op_ret, local->op_errno, 0, NULL);
return 0;
+ }
+
+ STACK_WIND_COOKIE(
+ frame, afr_seek_cbk, (void *)(long)subvol, priv->children[subvol],
+ priv->children[subvol]->fops->seek, local->fd, local->cont.seek.offset,
+ local->cont.seek.what, local->xdata_req);
+ return 0;
}
+int
+afr_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int32_t op_errno = 0;
+
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_SEEK;
+ local->fd = fd_ref(fd);
+ local->cont.seek.offset = offset;
+ local->cont.seek.what = what;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ afr_fix_open(fd, this);
+
+ afr_read_txn(frame, this, fd->inode, afr_seek_wind, AFR_DATA_TRANSACTION);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND(seek, frame, -1, op_errno, 0, NULL);
+
+ return 0;
+}
/* }}} */
diff --git a/xlators/cluster/afr/src/afr-inode-read.h b/xlators/cluster/afr/src/afr-inode-read.h
index e4091a793e0..8c982bc7e6f 100644
--- a/xlators/cluster/afr/src/afr-inode-read.h
+++ b/xlators/cluster/afr/src/afr-inode-read.h
@@ -12,31 +12,34 @@
#define __INODE_READ_H__
int32_t
-afr_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask, dict_t *xdata);
+afr_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata);
int32_t
-afr_stat (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xdata);
+afr_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
int32_t
-afr_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *xdata);
+afr_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
int32_t
-afr_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size, dict_t *xdata);
+afr_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata);
int32_t
-afr_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata);
+afr_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
int32_t
-afr_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata);
+afr_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata);
int32_t
-afr_fgetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata);
-
+afr_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata);
+
+int
+afr_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata);
+int
+afr_handle_quota_size(call_frame_t *frame, xlator_t *this);
#endif /* __INODE_READ_H__ */
diff --git a/xlators/cluster/afr/src/afr-inode-write.c b/xlators/cluster/afr/src/afr-inode-write.c
index 02e36b13aba..1d6e4f3570a 100644
--- a/xlators/cluster/afr/src/afr-inode-write.c
+++ b/xlators/cluster/afr/src/afr-inode-write.c
@@ -8,2101 +8,2558 @@
cases as published by the Free Software Foundation.
*/
-
-#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-
+#include <glusterfs/glusterfs.h>
#include "afr.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include "protocol-common.h"
+#include <glusterfs/byte-order.h>
#include "afr-transaction.h"
-#include "afr-self-heal-common.h"
+#include "afr-self-heal.h"
+#include "afr-messages.h"
-/* {{{ writev */
-
-int
-afr_writev_unwind (call_frame_t *frame, xlator_t *this)
+static void
+__afr_inode_write_finalize(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (writev, main_frame,
- local->op_ret, local->op_errno,
- &local->cont.writev.prebuf,
- &local->cont.writev.postbuf,
- NULL);
- }
- return 0;
+ int i = 0;
+ int ret = 0;
+ int read_subvol = 0;
+ struct iatt *stbuf = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_read_subvol_args_t args = {
+ 0,
+ };
+
+ local = frame->local;
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, local->inode, out);
+
+ /*This code needs to stay till DHT sends fops on linked
+ * inodes*/
+ if (!inode_is_linked(local->inode)) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret == -1)
+ continue;
+ if (!gf_uuid_is_null(local->replies[i].poststat.ia_gfid)) {
+ gf_uuid_copy(args.gfid, local->replies[i].poststat.ia_gfid);
+ args.ia_type = local->replies[i].poststat.ia_type;
+ break;
+ } else {
+ ret = dict_get_bin(local->replies[i].xdata,
+ DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
+ if (ret)
+ continue;
+ gf_uuid_copy(args.gfid, stbuf->ia_gfid);
+ args.ia_type = stbuf->ia_type;
+ break;
+ }
+ }
+ }
+
+ if (local->transaction.type == AFR_METADATA_TRANSACTION) {
+ read_subvol = afr_metadata_subvol_get(local->inode, this, NULL,
+ local->readable, NULL, &args);
+ } else {
+ read_subvol = afr_data_subvol_get(local->inode, this, NULL,
+ local->readable, NULL, &args);
+ }
+
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ afr_pick_error_xdata(local, priv, local->inode, local->readable, NULL,
+ NULL);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+ if (local->replies[i].op_ret < 0)
+ continue;
+
+ /* Order of checks in the compound conditional
+ below is important.
+
+ - Highest precedence: largest op_ret
+ - Next precedence: if all op_rets are equal, read subvol
+ - Least precedence: any succeeded subvol
+ */
+ if ((local->op_ret < local->replies[i].op_ret) ||
+ ((local->op_ret == local->replies[i].op_ret) &&
+ (i == read_subvol))) {
+ local->op_ret = local->replies[i].op_ret;
+ local->op_errno = local->replies[i].op_errno;
+
+ local->cont.inode_wfop.prebuf = local->replies[i].prestat;
+ local->cont.inode_wfop.postbuf = local->replies[i].poststat;
+
+ if (local->replies[i].xdata) {
+ if (local->xdata_rsp)
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ }
+ if (local->replies[i].xattr) {
+ if (local->xattr_rsp)
+ dict_unref(local->xattr_rsp);
+ local->xattr_rsp = dict_ref(local->replies[i].xattr);
+ }
+ }
+ }
+
+ afr_set_in_flight_sb_status(this, frame, local->inode);
+out:
+ return;
}
-
-int
-afr_writev_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+static void
+__afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xattr, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
- int read_child = 0;
-
- local = frame->local;
-
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->replies[child_index].valid = 1;
+
+ if (AFR_IS_ARBITER_BRICK(priv, child_index) && op_ret == 1)
+ op_ret = iov_length(local->cont.writev.vector,
+ local->cont.writev.count);
+
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+ if (xdata)
+ local->replies[child_index].xdata = dict_ref(xdata);
+
+ if (op_ret >= 0) {
+ if (prebuf)
+ local->replies[child_index].prestat = *prebuf;
+ if (postbuf)
+ local->replies[child_index].poststat = *postbuf;
+ if (xattr)
+ local->replies[child_index].xattr = dict_ref(xattr);
+ } else {
+ afr_transaction_fop_failed(frame, this, child_index);
+ }
+
+ return;
+}
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+static int
+__afr_inode_write_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ int child_index = (long)cookie;
+ int call_count = -1;
+ afr_private_t *priv = NULL;
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ priv = this->private;
+ local = frame->local;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
- }
+ LOCK(&frame->lock);
+ {
+ __afr_inode_write_fill(frame, this, child_index, op_ret, op_errno,
+ prebuf, postbuf, xattr, xdata);
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
- if (child_index == read_child) {
- local->cont.writev.prebuf = *prebuf;
- local->cont.writev.postbuf = *postbuf;
- }
- }
+ if (call_count == 0) {
+ __afr_inode_write_finalize(frame, this);
- local->op_errno = op_errno;
+ if (afr_txn_nothing_failed(frame, this)) {
+ /*if it did pre-op, it will do post-op changing ctime*/
+ if (priv->consistent_metadata && afr_needs_changelog_update(local))
+ afr_zero_fill_stat(local);
+ local->transaction.unwind(frame, this);
}
- UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ afr_transaction_resume(frame, this);
+ }
- if (call_count == 0) {
- local->transaction.unwind (frame, this);
-
- local->transaction.resume (frame, this);
- }
- return 0;
+ return 0;
}
-int
-afr_writev_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- int call_count = -1;
-
- local = frame->local;
- priv = this->private;
+/* {{{ writev */
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+void
+afr_writev_copy_outvars(call_frame_t *src_frame, call_frame_t *dst_frame)
+{
+ afr_local_t *src_local = NULL;
+ afr_local_t *dst_local = NULL;
+
+ src_local = src_frame->local;
+ dst_local = dst_frame->local;
+
+ dst_local->op_ret = src_local->op_ret;
+ dst_local->op_errno = src_local->op_errno;
+ dst_local->cont.inode_wfop.prebuf = src_local->cont.inode_wfop.prebuf;
+ dst_local->cont.inode_wfop.postbuf = src_local->cont.inode_wfop.postbuf;
+ if (src_local->xdata_rsp)
+ dst_local->xdata_rsp = dict_ref(src_local->xdata_rsp);
+}
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+void
+afr_writev_unwind(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
- local->call_count = call_count;
+ local = frame->local;
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_writev_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- local->fd,
- local->cont.writev.vector,
- local->cont.writev.count,
- local->cont.writev.offset,
- local->cont.writev.flags,
- local->cont.writev.iobref,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
+ if (priv->consistent_metadata)
+ afr_zero_fill_stat(local);
- return 0;
+ AFR_STACK_UNWIND(writev, frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
}
-
int
-afr_writev_done (call_frame_t *frame, xlator_t *this)
+afr_transaction_writev_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ call_frame_t *fop_frame = NULL;
- local = frame->local;
+ fop_frame = afr_transaction_detach_fop_frame(frame);
- iobref_unref (local->cont.writev.iobref);
- local->cont.writev.iobref = NULL;
+ if (fop_frame) {
+ afr_writev_copy_outvars(frame, fop_frame);
+ afr_writev_unwind(fop_frame, this);
+ }
+ return 0;
+}
- local->transaction.unwind (frame, this);
+static void
+afr_writev_handle_short_writes(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+ /*
+ * We already have the best case result of the writev calls staged
+ * as the return value. Any writev that returns some value less
+ * than the best case is now out of sync, so mark the fop as
+ * failed. Note that fops that have returned with errors have
+ * already been marked as failed.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if ((!local->replies[i].valid) || (local->replies[i].op_ret == -1))
+ continue;
+
+ if (local->replies[i].op_ret < local->op_ret)
+ afr_transaction_fop_failed(frame, this, i);
+ }
+}
- AFR_STACK_DESTROY (frame);
+void
+afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int ret = 0;
+ afr_local_t *local = frame->local;
+ uint32_t open_fd_count = 0;
+ uint32_t write_is_append = 0;
+ int32_t num_inodelks = 0;
+
+ LOCK(&frame->lock);
+ {
+ __afr_inode_write_fill(frame, this, child_index, op_ret, op_errno,
+ prebuf, postbuf, NULL, xdata);
+ if (op_ret == -1 || !xdata)
+ goto unlock;
+
+ write_is_append = 0;
+ ret = dict_get_uint32(xdata, GLUSTERFS_WRITE_IS_APPEND,
+ &write_is_append);
+ if (ret || !write_is_append)
+ local->append_write = _gf_false;
+
+ ret = dict_get_uint32(xdata, GLUSTERFS_ACTIVE_FD_COUNT, &open_fd_count);
+ if (ret < 0)
+ goto unlock;
+ if (open_fd_count > local->open_fd_count) {
+ local->open_fd_count = open_fd_count;
+ local->update_open_fd_count = _gf_true;
+ }
- return 0;
+ ret = dict_get_int32_sizen(xdata, GLUSTERFS_INODELK_COUNT,
+ &num_inodelks);
+ if (ret < 0)
+ goto unlock;
+ if (num_inodelks > local->num_inodelks) {
+ local->num_inodelks = num_inodelks;
+ local->update_num_inodelks = _gf_true;
+ }
+ }
+unlock:
+ UNLOCK(&frame->lock);
}
+void
+afr_process_post_writev(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_lock_t *lock = NULL;
+
+ local = frame->local;
+
+ if (!local->stable_write && !local->append_write)
+ /* An appended write removes the necessity to
+ fsync() the file. This is because self-heal
+ has the logic to check for larger file when
+ the xattrs are not reliably pointing at
+ a stale file.
+ */
+ afr_fd_report_unstable_write(this, local);
+
+ __afr_inode_write_finalize(frame, this);
+
+ afr_writev_handle_short_writes(frame, this);
+
+ if (local->update_open_fd_count)
+ local->inode_ctx->open_fd_count = local->open_fd_count;
+ if (local->update_num_inodelks &&
+ local->transaction.type == AFR_DATA_TRANSACTION) {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ lock->num_inodelks = local->num_inodelks;
+ }
+}
int
-afr_do_writev (call_frame_t *frame, xlator_t *this)
+afr_writev_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- call_frame_t *transaction_frame = NULL;
- afr_local_t *local = NULL;
- int op_ret = -1;
- int op_errno = 0;
-
- local = frame->local;
+ call_frame_t *fop_frame = NULL;
+ int child_index = (long)cookie;
+ int call_count = -1;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
+ afr_inode_write_fill(frame, this, child_index, op_ret, op_errno, prebuf,
+ postbuf, xdata);
- transaction_frame->local = local;
- frame->local = NULL;
+ call_count = afr_frame_return(frame);
- local->op = GF_FOP_WRITE;
+ if (call_count == 0) {
+ afr_process_post_writev(frame, this);
- local->success_count = 0;
-
- local->transaction.fop = afr_writev_wind;
- local->transaction.done = afr_writev_done;
- local->transaction.unwind = afr_writev_unwind;
-
- local->transaction.main_frame = frame;
- if (local->fd->flags & O_APPEND) {
- local->transaction.start = 0;
- local->transaction.len = 0;
+ if (!afr_txn_nothing_failed(frame, this)) {
+ // Don't unwind until post-op is complete
+ afr_transaction_resume(frame, this);
} else {
- local->transaction.start = local->cont.writev.offset;
- local->transaction.len = iov_length (local->cont.writev.vector,
- local->cont.writev.count);
- }
-
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
-
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (writev, frame, op_ret, op_errno, NULL, NULL, NULL);
- }
-
- return 0;
+ /*
+ * Generally inode-write fops do transaction.unwind then
+ * transaction.resume, but writev needs to make sure that
+ * delayed post-op frame is placed in fdctx before unwind
+ * happens. This prevents the race of flush doing the
+ * changelog wakeup first in fuse thread and then this
+ * writev placing its delayed post-op frame in fdctx.
+ * This helps flush make sure all the delayed post-ops are
+ * completed.
+ */
+
+ fop_frame = afr_transaction_detach_fop_frame(frame);
+ afr_writev_copy_outvars(frame, fop_frame);
+ afr_transaction_resume(frame, this);
+ afr_writev_unwind(fop_frame, this);
+ }
+ }
+ return 0;
}
static int
-afr_prepare_loc (call_frame_t *frame, fd_t *fd)
-{
- afr_local_t *local = NULL;
- char *name = NULL;
- char *path = NULL;
- int ret = 0;
-
- if ((!fd) || (!fd->inode))
- return -1;
-
- local = frame->local;
- ret = inode_path (fd->inode, NULL, (char **)&path);
- if (ret <= 0) {
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "Unable to get path for gfid: %s",
- uuid_utoa (fd->inode->gfid));
- return -1;
- }
-
- if (local->loc.path) {
- if (strcmp (path, local->loc.path))
- gf_log (frame->this->name, GF_LOG_DEBUG,
- "overwriting old loc->path %s with %s",
- local->loc.path, path);
- GF_FREE ((char *)local->loc.path);
- }
- local->loc.path = path;
-
- name = strrchr (local->loc.path, '/');
- if (name)
- name++;
- local->loc.name = name;
-
- if (local->loc.inode) {
- inode_unref (local->loc.inode);
- }
- local->loc.inode = inode_ref (fd->inode);
-
- if (local->loc.parent) {
- inode_unref (local->loc.parent);
- }
-
- local->loc.parent = inode_parent (local->loc.inode, 0, NULL);
-
- return 0;
+afr_arbiter_writev_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = frame->local;
+ afr_private_t *priv = this->private;
+ static char byte = 0xFF;
+ static struct iovec vector = {&byte, 1};
+ int32_t count = 1;
+
+ STACK_WIND_COOKIE(
+ frame, afr_writev_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol], priv->children[subvol]->fops->writev, local->fd,
+ &vector, count, local->cont.writev.offset, local->cont.writev.flags,
+ local->cont.writev.iobref, local->xdata_req);
+
+ return 0;
}
-afr_fd_paused_call_t*
-afr_paused_call_create (call_frame_t *frame)
+int
+afr_writev_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_fd_paused_call_t *paused_call = NULL;
-
- local = frame->local;
- GF_ASSERT (local->fop_call_continue);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- paused_call = GF_CALLOC (1, sizeof (*paused_call),
- gf_afr_fd_paused_call_t);
- if (paused_call) {
- INIT_LIST_HEAD (&paused_call->call_list);
- paused_call->frame = frame;
- }
+ local = frame->local;
+ priv = this->private;
- return paused_call;
+ if (AFR_IS_ARBITER_BRICK(priv, subvol)) {
+ afr_arbiter_writev_wind(frame, this, subvol);
+ return 0;
+ }
+
+ STACK_WIND_COOKIE(frame, afr_writev_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->writev, local->fd,
+ local->cont.writev.vector, local->cont.writev.count,
+ local->cont.writev.offset, local->cont.writev.flags,
+ local->cont.writev.iobref, local->xdata_req);
+ return 0;
}
-static int
-afr_pause_fd_fop (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx)
+int
+afr_do_writev(call_frame_t *frame, xlator_t *this)
{
- afr_fd_paused_call_t *paused_call = NULL;
- int ret = 0;
-
- paused_call = afr_paused_call_create (frame);
- if (paused_call)
- list_add (&paused_call->call_list, &fd_ctx->paused_calls);
- else
- ret = -ENOMEM;
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = frame->local;
+ transaction_frame->local = local;
+ frame->local = NULL;
+
+ if (!AFR_FRAME_INIT(frame, op_errno))
+ goto out;
+
+ local->op = GF_FOP_WRITE;
+
+ local->transaction.wind = afr_writev_wind;
+ local->transaction.unwind = afr_transaction_writev_unwind;
+
+ local->transaction.main_frame = frame;
+
+ if (local->fd->flags & O_APPEND) {
+ /*
+ * Backend vfs ignores the 'offset' for append mode fd so
+ * locking just the region provided for the writev does not
+ * give consistency guarantee. The actual write may happen at a
+ * completely different range than the one provided by the
+ * offset, len in the fop. So lock the entire file.
+ */
+ local->transaction.start = 0;
+ local->transaction.len = 0;
+ } else {
+ local->transaction.start = local->cont.writev.offset;
+ local->transaction.len = iov_length(local->cont.writev.vector,
+ local->cont.writev.count);
+ }
+
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return ret;
+ AFR_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-static void
-afr_trigger_open_fd_self_heal (call_frame_t *frame, xlator_t *this)
+int
+afr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- inode_t *inode = NULL;
- char *reason = NULL;
+ afr_local_t *local = NULL;
+ int op_errno = ENOMEM;
+ int ret = -1;
+
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->cont.writev.vector = iov_dup(vector, count);
+ if (!local->cont.writev.vector)
+ goto out;
+ local->cont.writev.count = count;
+ local->cont.writev.offset = offset;
+ local->cont.writev.flags = flags;
+ local->cont.writev.iobref = iobref_ref(iobref);
+
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
+
+ if (dict_set_uint32(local->xdata_req, GLUSTERFS_ACTIVE_FD_COUNT, 4)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (dict_set_str_sizen(local->xdata_req, GLUSTERFS_INODELK_DOM_COUNT,
+ this->name)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (dict_set_uint32(local->xdata_req, GLUSTERFS_WRITE_IS_APPEND, 4)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ /* Set append_write to be true speculatively. If on any
+ server it turns not be true, we unset it in the
+ callback.
+ */
+ local->append_write = _gf_true;
+
+ /* detect here, but set it in writev_wind_cbk *after* the unstable
+ write is performed
+ */
+ local->stable_write = !!((fd->flags | flags) & (O_SYNC | O_DSYNC));
+
+ afr_fix_open(fd, this);
+
+ afr_do_writev(frame, this);
+
+ return 0;
+out:
+ AFR_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
- local = frame->local;
- sh = &local->self_heal;
- inode = local->fd->inode;
+ return 0;
+}
- sh->do_missing_entry_self_heal = _gf_true;
- sh->do_gfid_self_heal = _gf_true;
- sh->do_data_self_heal = _gf_true;
+/* }}} */
- reason = "subvolume came online";
- afr_launch_self_heal (frame, this, inode, _gf_true, inode->ia_type,
- reason, NULL, NULL);
-}
+/* {{{ truncate */
int
-afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop)
+afr_truncate_unwind(call_frame_t *frame, xlator_t *this)
{
- int ret = 0;
- int i = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- inode_t *inode = NULL;
- gf_boolean_t need_self_heal = _gf_false;
- int *need_open = NULL;
- int need_open_count = 0;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- gf_boolean_t fop_continue = _gf_true;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
- GF_ASSERT (local->fd);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
+ return 0;
- inode = local->fd->inode;
- //gfid is not set in rebalance, that case needs to be handled.
- if (fd_is_anonymous (local->fd) ||
- !inode || uuid_is_null (inode->gfid)) {
- fop_continue = _gf_true;
- goto out;
- }
+ AFR_STACK_UNWIND(truncate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
- if (pause_fop)
- GF_ASSERT (local->fop_call_continue);
+int
+afr_truncate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
- ret = afr_prepare_loc (frame, local->fd);
- if (ret < 0) {
- //File does not exist we cant open it.
- ret = 0;
- goto out;
- }
+ local = frame->local;
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- if (!fd_ctx) {
- ret = -EINVAL;
- goto out;
- }
+ if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
- LOCK (&local->fd->lock);
- {
- if (fd_ctx->up_count < priv->up_count) {
- need_self_heal = _gf_true;
- fd_ctx->up_count = priv->up_count;
- fd_ctx->down_count = priv->down_count;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if ((fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED) &&
- local->child_up[i]) {
- fd_ctx->opened_on[i] = AFR_FD_OPENING;
- if (!need_open)
- need_open = GF_CALLOC (priv->child_count,
- sizeof (*need_open),
- gf_afr_mt_int32_t);
- need_open[i] = 1;
- need_open_count++;
- } else if (pause_fop && local->child_up[i] &&
- (fd_ctx->opened_on[i] == AFR_FD_OPENING)) {
- local->fop_paused = _gf_true;
- }
- }
-
- if (local->fop_paused) {
- GF_ASSERT (pause_fop);
- gf_log (this->name, GF_LOG_INFO, "Pause fd %p",
- local->fd);
- ret = afr_pause_fd_fop (frame, this, fd_ctx);
- if (ret)
- goto unlock;
- fop_continue = _gf_false;
- }
- }
-unlock:
- UNLOCK (&local->fd->lock);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to fix fd for %s",
- local->loc.path);
- fop_continue = _gf_false;
- goto out;
- }
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
+}
- if (need_self_heal)
- afr_trigger_open_fd_self_heal (frame, this);
+int
+afr_truncate_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- if (!need_open_count)
- goto out;
+ local = frame->local;
+ priv = this->private;
- gf_log (this->name, GF_LOG_INFO, "Opening fd %p", local->fd);
- afr_fix_open (frame, this, fd_ctx, need_open_count, need_open);
- fop_continue = _gf_false;
-out:
- if (need_open)
- GF_FREE (need_open);
- if (fop_continue && local->fop_call_continue)
- local->fop_call_continue (frame, this);
- return ret;
+ STACK_WIND_COOKIE(frame, afr_truncate_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->truncate, &local->loc,
+ local->cont.truncate.offset, local->xdata_req);
+ return 0;
}
int
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- uint32_t flags, struct iobref *iobref, dict_t *xdata)
+afr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- priv = this->private;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- QUORUM_CHECK(writev,out);
+ local->cont.truncate.offset = offset;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ if (!local->xdata_req)
+ goto out;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->transaction.wind = afr_truncate_wind;
+ local->transaction.unwind = afr_truncate_unwind;
- local->cont.writev.vector = iov_dup (vector, count);
- local->cont.writev.count = count;
- local->cont.writev.offset = offset;
- local->cont.writev.flags = flags;
- local->cont.writev.iobref = iobref_ref (iobref);
+ loc_copy(&local->loc, loc);
+ ret = afr_set_inode_local(this, local, loc->inode);
+ if (ret)
+ goto out;
- local->fd = fd_ref (fd);
- local->fop_call_continue = afr_do_writev;
+ local->op = GF_FOP_TRUNCATE;
- ret = afr_open_fd_fix (frame, this, _gf_true);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ local->transaction.main_frame = frame;
+ local->transaction.start = offset;
+ local->transaction.len = 0;
- ret = 0;
+ /* Set it true speculatively, will get reset in afr_truncate_wind_cbk
+ if truncate was not a NOP */
+ local->stable_write = _gf_true;
+
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-
/* }}} */
-/* {{{ truncate */
+/* {{{ ftruncate */
int
-afr_truncate_unwind (call_frame_t *frame, xlator_t *this)
+afr_ftruncate_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (truncate, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.truncate.prebuf,
- &local->cont.truncate.postbuf,
- NULL);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(ftruncate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
int
-afr_truncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+afr_ftruncate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno) && op_errno != EFBIG)
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.truncate.prebuf = *prebuf;
- local->cont.truncate.postbuf = *postbuf;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
-
-
-int32_t
-afr_truncate_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
+ afr_local_t *local = NULL;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ local = frame->local;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_truncate_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->truncate,
- &local->loc,
- local->cont.truncate.offset,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
+ if (op_ret == 0 && prebuf->ia_size != postbuf->ia_size)
+ local->stable_write = _gf_false;
- return 0;
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
}
-
int
-afr_truncate_done (call_frame_t *frame, xlator_t *this)
+afr_ftruncate_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
- return 0;
+ STACK_WIND_COOKIE(frame, afr_ftruncate_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->ftruncate, local->fd,
+ local->cont.ftruncate.offset, local->xdata_req);
+ return 0;
}
-
int
-afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset, dict_t *xdata)
+afr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- priv = this->private;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- QUORUM_CHECK(truncate,out);
+ local->cont.ftruncate.offset = offset;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
+ if (!local->xdata_req)
+ goto out;
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->op = GF_FOP_FTRUNCATE;
- local->cont.truncate.offset = offset;
+ local->transaction.wind = afr_ftruncate_wind;
+ local->transaction.unwind = afr_ftruncate_unwind;
- local->transaction.fop = afr_truncate_wind;
- local->transaction.done = afr_truncate_done;
- local->transaction.unwind = afr_truncate_unwind;
+ local->transaction.main_frame = frame;
- loc_copy (&local->loc, loc);
+ local->transaction.start = local->cont.ftruncate.offset;
+ local->transaction.len = 0;
- local->transaction.main_frame = frame;
- local->transaction.start = offset;
- local->transaction.len = 0;
+ afr_fix_open(fd, this);
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ /* Set it true speculatively, will get reset in afr_ftruncate_wind_cbk
+ if truncate was not a NOP */
+ local->stable_write = _gf_true;
- ret = 0;
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
- }
+ AFR_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
/* }}} */
-/* {{{ ftruncate */
-
+/* {{{ setattr */
int
-afr_ftruncate_unwind (call_frame_t *frame, xlator_t *this)
+afr_setattr_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (ftruncate, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.ftruncate.prebuf,
- &local->cont.ftruncate.postbuf,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(setattr, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
int
-afr_ftruncate_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+afr_setattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preop,
+ struct iatt *postop, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int call_count = -1;
- int need_unwind = 0;
- int read_child = 0;
-
- local = frame->local;
- priv = this->private;
-
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
-
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
-
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
-
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
-
- if (child_index == read_child) {
- local->cont.ftruncate.prebuf = *prebuf;
- local->cont.ftruncate.postbuf = *postbuf;
- }
-
- local->success_count++;
-
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, preop,
+ postop, NULL, xdata);
}
-
int
-afr_ftruncate_wind (call_frame_t *frame, xlator_t *this)
+afr_setattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_ftruncate_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->ftruncate,
- local->fd,
- local->cont.ftruncate.offset,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_setattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->setattr, &local->loc,
+ &local->cont.setattr.in_buf, local->cont.setattr.valid,
+ local->xdata_req);
+ return 0;
}
-
int
-afr_ftruncate_done (call_frame_t *frame, xlator_t *this)
+afr_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
+ int32_t valid, dict_t *xdata)
{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->cont.setattr.in_buf = *buf;
+ local->cont.setattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->transaction.wind = afr_setattr_wind;
+ local->transaction.unwind = afr_setattr_unwind;
+
+ loc_copy(&local->loc, loc);
+ ret = afr_set_inode_local(this, local, loc->inode);
+ if (ret)
+ goto out;
+
+ local->op = GF_FOP_SETATTR;
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
+/* {{{ fsetattr */
int
-afr_do_ftruncate (call_frame_t *frame, xlator_t *this)
+afr_fsetattr_unwind(call_frame_t *frame, xlator_t *this)
{
- call_frame_t * transaction_frame = NULL;
- afr_local_t * local = NULL;
- int op_ret = -1;
- int op_errno = 0;
-
- local = frame->local;
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
-
- transaction_frame->local = local;
- frame->local = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local->op = GF_FOP_FTRUNCATE;
+ local = frame->local;
- local->transaction.fop = afr_ftruncate_wind;
- local->transaction.done = afr_ftruncate_done;
- local->transaction.unwind = afr_ftruncate_unwind;
-
- local->transaction.main_frame = frame;
-
- local->transaction.start = local->cont.ftruncate.offset;
- local->transaction.len = 0;
-
- afr_transaction (transaction_frame, this, AFR_DATA_TRANSACTION);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
+ return 0;
- op_ret = 0;
-out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, NULL,
- NULL, NULL);
- }
+ AFR_STACK_UNWIND(fsetattr, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
- return 0;
+int
+afr_fsetattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preop,
+ struct iatt *postop, dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, preop,
+ postop, NULL, xdata);
}
+int
+afr_fsetattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_fsetattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsetattr, local->fd,
+ &local->cont.fsetattr.in_buf, local->cont.fsetattr.valid,
+ local->xdata_req);
+ return 0;
+}
int
-afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset, dict_t *xdata)
+afr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf,
+ int32_t valid, dict_t *xdata)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- priv = this->private;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- QUORUM_CHECK(ftruncate,out);
+ local->cont.fsetattr.in_buf = *buf;
+ local->cont.fsetattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ if (!local->xdata_req)
+ goto out;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->transaction.wind = afr_fsetattr_wind;
+ local->transaction.unwind = afr_fsetattr_unwind;
- local->cont.ftruncate.offset = offset;
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- local->fd = fd_ref (fd);
- local->fop_call_continue = afr_do_ftruncate;
+ local->op = GF_FOP_FSETATTR;
- ret = afr_open_fd_fix (frame, this, _gf_true);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ afr_fix_open(fd, this);
- ret = 0;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-/* }}} */
-
-/* {{{ setattr */
+/* {{{ setxattr */
int
-afr_setattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_setxattr_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (setattr, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.setattr.preop_buf,
- &local->cont.setattr.postop_buf,
- NULL);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
+
+ AFR_STACK_UNWIND(setxattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
}
+int
+afr_setxattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, NULL, xdata);
+}
int
-afr_setattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
+afr_setxattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_setxattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->setxattr, &local->loc,
+ local->cont.setxattr.dict, local->cont.setxattr.flags,
+ local->xdata_req);
+ return 0;
+}
- local = frame->local;
- priv = this->private;
+int
+afr_emptyb_set_pending_changelog_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
- read_child = afr_inode_get_read_ctx (this, local->loc.inode, NULL);
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i, ret = 0;
+ char *op_type = NULL;
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+ local = frame->local;
+ priv = this->private;
+ i = (long)cookie;
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
- }
+ ret = dict_get_str_sizen(local->xdata_req, "replicate-brick-op", &op_type);
+ if (ret)
+ goto out;
- if (child_index == read_child) {
- local->cont.setattr.preop_buf = *preop;
- local->cont.setattr.postop_buf = *postop;
- }
+ gf_smsg(this->name, op_ret ? GF_LOG_ERROR : GF_LOG_INFO,
+ op_ret ? op_errno : 0, AFR_MSG_SET_PEND_XATTR, "name=%s",
+ priv->children[i]->name, "op_ret=%s",
+ op_ret ? "failed" : "succeeded", NULL);
- local->success_count++;
+out:
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+int
+afr_emptyb_set_pending_changelog(call_frame_t *frame, xlator_t *this,
+ unsigned char *locked_nodes)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int ret = 0, i = 0;
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ local = frame->local;
+ priv = this->private;
- call_count = afr_frame_return (frame);
+ AFR_ONLIST(locked_nodes, frame, afr_emptyb_set_pending_changelog_cbk,
+ xattrop, &local->loc, GF_XATTROP_ADD_ARRAY, local->xattr_req,
+ NULL);
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ /* It is sufficient if xattrop was successful on one child */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
- return 0;
+ if (local->replies[i].op_ret == 0) {
+ ret = 0;
+ goto out;
+ } else {
+ ret = afr_higher_errno(ret, local->replies[i].op_errno);
+ }
+ }
+out:
+ return -ret;
}
-
-int32_t
-afr_setattr_wind (call_frame_t *frame, xlator_t *this)
+static int
+_afr_handle_empty_brick_type(xlator_t *this, call_frame_t *frame, loc_t *loc,
+ int empty_index, afr_transaction_type type,
+ char *op_type, const int op_type_len)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ int count = 0;
+ int ret = -ENOMEM;
+ int idx = -1;
+ int d_idx = -1;
+ unsigned char *locked_nodes = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ locked_nodes = alloca0(priv->child_count);
+
+ idx = afr_index_for_transaction_type(type);
+ d_idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION);
+
+ local->pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!local->pending)
+ goto out;
+
+ local->pending[empty_index][idx] = hton32(1);
+
+ if ((priv->esh_granular) && (type == AFR_ENTRY_TRANSACTION))
+ local->pending[empty_index][d_idx] = hton32(1);
+
+ local->xdata_req = dict_new();
+ if (!local->xdata_req)
+ goto out;
+
+ ret = dict_set_nstrn(local->xdata_req, "replicate-brick-op",
+ SLEN("replicate-brick-op"), op_type, op_type_len);
+ if (ret)
+ goto out;
+
+ local->xattr_req = dict_new();
+ if (!local->xattr_req)
+ goto out;
+
+ ret = afr_set_pending_dict(priv, local->xattr_req, local->pending);
+ if (ret < 0)
+ goto out;
+
+ if (AFR_ENTRY_TRANSACTION == type) {
+ count = afr_selfheal_entrylk(frame, this, loc->inode, this->name, NULL,
+ locked_nodes);
+ } else {
+ count = afr_selfheal_inodelk(frame, this, loc->inode, this->name,
+ LLONG_MAX - 1, 0, locked_nodes);
+ }
+
+ if (!count) {
+ gf_smsg(this->name, GF_LOG_ERROR, EAGAIN, AFR_MSG_REPLACE_BRICK_STATUS,
+ NULL);
+ ret = -EAGAIN;
+ goto unlock;
+ }
+
+ ret = afr_emptyb_set_pending_changelog(frame, this, locked_nodes);
+ if (ret)
+ goto unlock;
+ ret = 0;
+unlock:
+ if (AFR_ENTRY_TRANSACTION == type) {
+ afr_selfheal_unentrylk(frame, this, loc->inode, this->name, NULL,
+ locked_nodes, NULL);
+ } else {
+ afr_selfheal_uninodelk(frame, this, loc->inode, this->name,
+ LLONG_MAX - 1, 0, locked_nodes);
+ }
+out:
+ return ret;
+}
- local->call_count = call_count;
+void
+afr_brick_args_cleanup(void *opaque)
+{
+ afr_empty_brick_args_t *data = NULL;
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_setattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc,
- &local->cont.setattr.in_buf,
- local->cont.setattr.valid,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
+ data = opaque;
+ loc_wipe(&data->loc);
+ GF_FREE(data);
+}
- return 0;
+int
+_afr_handle_empty_brick_cbk(int ret, call_frame_t *frame, void *opaque)
+{
+ afr_brick_args_cleanup(opaque);
+ return 0;
}
+int
+_afr_handle_empty_brick(void *opaque)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int empty_index = -1;
+ int ret = -1;
+ int op_errno = ENOMEM;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ char *op_type = NULL;
+ int op_type_len = 0;
+ afr_empty_brick_args_t *data = NULL;
+ call_frame_t *op_frame = NULL;
+
+ data = opaque;
+ frame = data->frame;
+ empty_index = data->empty_index;
+ if (!data->op_type)
+ goto out;
+
+ op_frame = copy_frame(frame);
+ if (!op_frame) {
+ ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_type = data->op_type;
+ op_type_len = strlen(op_type);
+ this = op_frame->this;
+ priv = this->private;
+
+ afr_set_lk_owner(op_frame, this, op_frame->root);
+ local = AFR_FRAME_INIT(op_frame, op_errno);
+ if (!local)
+ goto out;
+
+ loc_copy(&local->loc, &data->loc);
+
+ gf_smsg(this->name, GF_LOG_INFO, 0, AFR_MSG_NEW_BRICK, "name=%s",
+ priv->children[empty_index]->name, NULL);
+
+ ret = _afr_handle_empty_brick_type(this, op_frame, &local->loc, empty_index,
+ AFR_METADATA_TRANSACTION, op_type,
+ op_type_len);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ dict_unref(local->xdata_req);
+ dict_unref(local->xattr_req);
+ afr_matrix_cleanup(local->pending, priv->child_count);
+ local->pending = NULL;
+ local->xattr_req = NULL;
+ local->xdata_req = NULL;
+
+ ret = _afr_handle_empty_brick_type(this, op_frame, &local->loc, empty_index,
+ AFR_ENTRY_TRANSACTION, op_type,
+ op_type_len);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ if (op_frame) {
+ AFR_STACK_DESTROY(op_frame);
+ }
+ AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
+ return 0;
+}
int
-afr_setattr_done (call_frame_t *frame, xlator_t *this)
+afr_split_brain_resolve_do(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ char *data)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ local->xdata_req = dict_new();
+
+ if (!local->xdata_req) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_int32_sizen(local->xdata_req, "heal-op",
+ GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str_sizen(local->xdata_req, "child-name", data);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ /* set spb choice to -1 whether heal succeeds or not:
+ * If heal succeeds : spb-choice should be set to -1 as
+ * it is no longer valid; file is not
+ * in split-brain anymore.
+ * If heal doesn't succeed:
+ * spb-choice should be set to -1
+ * otherwise reads will be served
+ * from spb-choice which is misleading.
+ */
+ ret = afr_inode_split_brain_choice_set(loc->inode, this, -1);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN_SET_FAILED,
+ NULL);
+ afr_heal_splitbrain_file(frame, this, loc);
+ ret = 0;
+out:
+ if (ret < 0)
+ AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
+ return 0;
+}
- local = frame->local;
+int
+afr_get_split_brain_child_index(xlator_t *this, void *value, size_t len)
+{
+ int spb_child_index = -1;
+ char *spb_child_str = NULL;
- local->transaction.unwind (frame, this);
+ spb_child_str = alloca0(len + 1);
+ memcpy(spb_child_str, value, len);
- AFR_STACK_DESTROY (frame);
+ if (!strcmp(spb_child_str, "none"))
+ return -2;
- return 0;
+ spb_child_index = afr_get_child_index_from_name(this, spb_child_str);
+ if (spb_child_index < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "subvol=%s", spb_child_str, NULL);
+ }
+ return spb_child_index;
}
-
int
-afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata)
+afr_can_set_split_brain_choice(void *opaque)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
-
- QUORUM_CHECK(setattr,out);
+ afr_spbc_timeout_t *data = opaque;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ loc_t *loc = NULL;
+ int ret = -1;
+
+ frame = data->frame;
+ loc = data->loc;
+ this = frame->this;
+
+ ret = afr_is_split_brain(frame, this, loc->inode, loc->gfid, &data->d_spb,
+ &data->m_spb);
+
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED, "gfid=%s",
+ uuid_utoa(loc->gfid), NULL);
+ return ret;
+}
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
+int
+afr_handle_split_brain_commands(xlator_t *this, call_frame_t *frame, loc_t *loc,
+ dict_t *dict)
+{
+ void *choice_value = NULL;
+ void *resolve_value = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_spbc_timeout_t *data = NULL;
+ int len = 0;
+ int spb_child_index = -1;
+ int ret = -1;
+ int op_errno = EINVAL;
+
+ priv = this->private;
+
+ ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_CHOICE, &choice_value, &len);
+ ret = dict_get_ptr_and_len(dict, GF_AFR_SBRAIN_RESOLVE, &resolve_value,
+ &len);
+ if (!choice_value && !resolve_value) {
+ ret = -1;
+ goto out;
+ }
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local) {
+ ret = 1;
+ goto out;
+ }
+
+ local->op = GF_FOP_SETXATTR;
+
+ if (choice_value) {
+ spb_child_index = afr_get_split_brain_child_index(this, choice_value,
+ len);
+ if (spb_child_index < 0) {
+ /* Case where value was "none" */
+ if (spb_child_index == -2)
+ spb_child_index = -1;
+ else {
+ ret = 1;
+ op_errno = EINVAL;
goto out;
+ }
}
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- local->cont.setattr.in_buf = *buf;
- local->cont.setattr.valid = valid;
-
- local->transaction.fop = afr_setattr_wind;
- local->transaction.done = afr_setattr_done;
- local->transaction.unwind = afr_setattr_unwind;
-
- loc_copy (&local->loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ data = GF_CALLOC(1, sizeof(*data), gf_afr_mt_spbc_timeout_t);
+ if (!data) {
+ ret = 1;
+ goto out;
+ }
+ data->spb_child_index = spb_child_index;
+ data->frame = frame;
+ loc_copy(&local->loc, loc);
+ data->loc = &local->loc;
+ ret = synctask_new(this->ctx->env, afr_can_set_split_brain_choice,
+ afr_set_split_brain_choice, NULL, data);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS,
+ "name=%s", loc->name, NULL);
+ ret = 1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = 0;
+ goto out;
+ }
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (resolve_value) {
+ spb_child_index = afr_get_split_brain_child_index(this, resolve_value,
+ len);
+ if (spb_child_index < 0) {
+ ret = 1;
+ goto out;
+ }
+ afr_split_brain_resolve_do(frame, this, loc,
+ priv->children[spb_child_index]->name);
ret = 0;
+ }
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
- }
-
- return 0;
+ /* key was correct but value was invalid when ret == 1 */
+ if (ret == 1) {
+ AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
+ if (data)
+ GF_FREE(data);
+ ret = 0;
+ }
+ return ret;
}
-/* {{{ fsetattr */
-
int
-afr_fsetattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_handle_spb_choice_timeout(xlator_t *this, call_frame_t *frame, dict_t *dict)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
+ int ret = -1;
+ int op_errno = 0;
+ uint64_t timeout = 0;
+ afr_private_t *priv = NULL;
- local = frame->local;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (fsetattr, main_frame, local->op_ret,
- local->op_errno,
- &local->cont.fsetattr.preop_buf,
- &local->cont.fsetattr.postop_buf,
- NULL);
- }
+ ret = dict_get_uint64(dict, GF_AFR_SPB_CHOICE_TIMEOUT, &timeout);
+ if (!ret) {
+ priv->spb_choice_timeout = timeout * 60;
+ AFR_STACK_UNWIND(setxattr, frame, ret, op_errno, NULL);
+ }
- return 0;
+ return ret;
}
-
int
-afr_fsetattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
+afr_handle_empty_brick(xlator_t *this, call_frame_t *frame, loc_t *loc,
+ dict_t *dict)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int child_index = (long) cookie;
- int read_child = 0;
- int call_count = -1;
- int need_unwind = 0;
+ int ret = -1;
+ int ab_ret = -1;
+ int empty_index = -1;
+ int op_errno = EPERM;
+ char *empty_brick = NULL;
+ char *op_type = NULL;
+ afr_empty_brick_args_t *data = NULL;
+
+ ret = dict_get_str_sizen(dict, GF_AFR_REPLACE_BRICK, &empty_brick);
+ if (!ret)
+ op_type = GF_AFR_REPLACE_BRICK;
+
+ ab_ret = dict_get_str_sizen(dict, GF_AFR_ADD_BRICK, &empty_brick);
+ if (!ab_ret)
+ op_type = GF_AFR_ADD_BRICK;
+
+ if (ret && ab_ret)
+ goto out;
+
+ if (frame->root->pid != GF_CLIENT_PID_ADD_REPLICA_MOUNT) {
+ gf_smsg(this->name, GF_LOG_ERROR, EPERM, AFR_MSG_INTERNAL_ATTR,
+ "op_type=%s", op_type, NULL);
+ ret = 1;
+ goto out;
+ }
+ empty_index = afr_get_child_index_from_name(this, empty_brick);
+
+ if (empty_index < 0) {
+ /* Didn't belong to this replica pair
+ * Just do a no-op
+ */
+ AFR_STACK_UNWIND(setxattr, frame, 0, 0, NULL);
+ return 0;
+ } else {
+ data = GF_CALLOC(1, sizeof(*data), gf_afr_mt_empty_brick_t);
+ if (!data) {
+ ret = 1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ data->frame = frame;
+ loc_copy(&data->loc, loc);
+ data->empty_index = empty_index;
+ data->op_type = op_type;
+ ret = synctask_new(this->ctx->env, _afr_handle_empty_brick,
+ _afr_handle_empty_brick_cbk, NULL, data);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN_STATUS,
+ NULL);
+ ret = 1;
+ op_errno = ENOMEM;
+ afr_brick_args_cleanup(data);
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ if (ret == 1) {
+ AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
+ ret = 0;
+ }
+ return ret;
+}
- local = frame->local;
- priv = this->private;
+static int
+afr_handle_special_xattr(xlator_t *this, call_frame_t *frame, loc_t *loc,
+ dict_t *dict)
+{
+ int ret = -1;
- read_child = afr_inode_get_read_ctx (this, local->fd->inode, NULL);
+ ret = afr_handle_split_brain_commands(this, frame, loc, dict);
+ if (ret == 0)
+ goto out;
- LOCK (&frame->lock);
- {
- if (child_index == read_child) {
- local->read_child_returned = _gf_true;
- }
+ ret = afr_handle_spb_choice_timeout(this, frame, dict);
+ if (ret == 0)
+ goto out;
- if (afr_fop_failed (op_ret, op_errno))
- afr_transaction_fop_failed (frame, this, child_index);
+ /* Applicable for replace-brick and add-brick commands */
+ ret = afr_handle_empty_brick(this, frame, loc, dict);
+out:
+ return ret;
+}
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
+int
+afr_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
- if (child_index == read_child) {
- local->cont.fsetattr.preop_buf = *preop;
- local->cont.fsetattr.postop_buf = *postop;
- }
+ GF_IF_INTERNAL_XATTR_GOTO("trusted.afr.*", dict, op_errno, out);
- local->success_count++;
+ GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.afr.*", dict, op_errno, out);
- if ((local->success_count >= priv->wait_count)
- && local->read_child_returned) {
- need_unwind = 1;
- }
- }
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ ret = afr_handle_special_xattr(this, frame, loc, dict);
+ if (ret == 0)
+ return 0;
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- call_count = afr_frame_return (frame);
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ local->cont.setxattr.dict = dict_ref(dict);
+ local->cont.setxattr.flags = flags;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- return 0;
-}
+ if (!local->xdata_req)
+ goto out;
+ local->transaction.wind = afr_setxattr_wind;
+ local->transaction.unwind = afr_setxattr_unwind;
-int32_t
-afr_fsetattr_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ loc_copy(&local->loc, loc);
+ ret = afr_set_inode_local(this, local, loc->inode);
+ if (ret)
+ goto out;
- local = frame->local;
- priv = this->private;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ local->op = GF_FOP_SETXATTR;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- local->call_count = call_count;
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_fsetattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fsetattr,
- local->fd,
- &local->cont.fsetattr.in_buf,
- local->cont.fsetattr.valid,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
+ AFR_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
+/* {{{ fsetxattr */
int
-afr_fsetattr_done (call_frame_t *frame, xlator_t *this)
+afr_fsetxattr_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
+ local = frame->local;
- local->transaction.unwind (frame, this);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
+ return 0;
- AFR_STACK_DESTROY (frame);
+ AFR_STACK_UNWIND(fsetxattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
+}
- return 0;
+int
+afr_fsetxattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, NULL, xdata);
}
int
-afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata)
+afr_fsetxattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_fsetxattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsetxattr, local->fd,
+ local->cont.fsetxattr.dict, local->cont.fsetxattr.flags,
+ local->xdata_req);
+ return 0;
+}
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
+int
+afr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- priv = this->private;
+ GF_IF_INTERNAL_XATTR_GOTO("trusted.afr.*", dict, op_errno, out);
- QUORUM_CHECK(fsetattr,out);
+ GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.afr.*", dict, op_errno, out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->cont.fsetxattr.dict = dict_ref(dict);
+ local->cont.fsetxattr.flags = flags;
- local->cont.fsetattr.in_buf = *buf;
- local->cont.fsetattr.valid = valid;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- local->transaction.fop = afr_fsetattr_wind;
- local->transaction.done = afr_fsetattr_done;
- local->transaction.unwind = afr_fsetattr_unwind;
+ if (!local->xdata_req)
+ goto out;
- local->fd = fd_ref (fd);
+ local->transaction.wind = afr_fsetxattr_wind;
+ local->transaction.unwind = afr_fsetxattr_unwind;
- ret = afr_open_fd_fix (transaction_frame, this, _gf_false);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local->op = GF_FOP_FSETXATTR;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- ret = 0;
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(fsetxattr, frame, -1, op_errno, NULL);
+ return 0;
}
+/* }}} */
-/* {{{ setxattr */
-
+/* {{{ removexattr */
int
-afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_removexattr_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
- if (main_frame) {
- AFR_STACK_UNWIND (setxattr, main_frame,
- local->op_ret, local->op_errno,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
-
-int
-afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->child_count) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
+ AFR_STACK_UNWIND(removexattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
}
-
int
-afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
+afr_removexattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc,
- local->cont.setxattr.dict,
- local->cont.setxattr.flags,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, NULL, xdata);
}
-
int
-afr_setxattr_done (call_frame_t *frame, xlator_t *this)
+afr_removexattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = frame->local;
-
- local->transaction.unwind (frame, this);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ priv = this->private;
- return 0;
+ STACK_WIND_COOKIE(frame, afr_removexattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->removexattr, &local->loc,
+ local->cont.removexattr.name, local->xdata_req);
+ return 0;
}
int
-afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
+afr_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- data_pair_t *trav = NULL;
- int ret = -1;
- int op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (this, out);
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
- trav, op_errno, out);
+ GF_IF_NATIVE_XATTR_GOTO("trusted.afr.*", name, op_errno, out);
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
- trav, op_errno, out);
+ GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.afr.*", name, op_errno, out);
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this->private, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- priv = this->private;
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- QUORUM_CHECK(setxattr,out);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
+ local->cont.removexattr.name = gf_strdup(name);
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ if (!local->xdata_req)
+ goto out;
- local->cont.setxattr.dict = dict_ref (dict);
- local->cont.setxattr.flags = flags;
+ local->transaction.wind = afr_removexattr_wind;
+ local->transaction.unwind = afr_removexattr_unwind;
- local->transaction.fop = afr_setxattr_wind;
- local->transaction.done = afr_setxattr_done;
- local->transaction.unwind = afr_setxattr_unwind;
+ loc_copy(&local->loc, loc);
+ ret = afr_set_inode_local(this, local, loc->inode);
+ if (ret)
+ goto out;
- loc_copy (&local->loc, loc);
+ local->op = GF_FOP_REMOVEXATTR;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
+ return 0;
}
-/* {{{ fsetxattr */
-
-
+/* ffremovexattr */
int
-afr_fsetxattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_fremovexattr_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- call_frame_t *main_frame = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
+ local = frame->local;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (fsetxattr, main_frame,
- local->op_ret, local->op_errno,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
+
+ AFR_STACK_UNWIND(fremovexattr, main_frame, local->op_ret, local->op_errno,
+ local->xdata_rsp);
+ return 0;
}
+int
+afr_fremovexattr_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, NULL, xdata);
+}
int
-afr_fsetxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_fremovexattr_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local = frame->local;
- priv = this->private;
+ local = frame->local;
+ priv = this->private;
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
+ STACK_WIND_COOKIE(frame, afr_fremovexattr_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fremovexattr, local->fd,
+ local->cont.removexattr.name, local->xdata_req);
+ return 0;
+}
- if (local->success_count == priv->child_count) {
- need_unwind = 1;
- }
- }
+int
+afr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ GF_IF_NATIVE_XATTR_GOTO("trusted.afr.*", name, op_errno, out);
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.afr.*", name, op_errno, out);
- call_count = afr_frame_return (frame);
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- return 0;
-}
+ local->cont.removexattr.name = gf_strdup(name);
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
+ if (!local->xdata_req)
+ goto out;
-int
-afr_fsetxattr_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ local->transaction.wind = afr_fremovexattr_wind;
+ local->transaction.unwind = afr_fremovexattr_unwind;
- local = frame->local;
- priv = this->private;
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ local->op = GF_FOP_FREMOVEXATTR;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- local->call_count = call_count;
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_fsetxattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fsetxattr,
- local->fd,
- local->cont.fsetxattr.dict,
- local->cont.fsetxattr.flags,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
-}
+ AFR_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
int
-afr_fsetxattr_done (call_frame_t *frame, xlator_t *this)
+afr_fallocate_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = frame->local;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
+
+ AFR_STACK_UNWIND(fallocate, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
}
int
-afr_fsetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata)
+afr_fallocate_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = EINVAL;
- data_pair_t *trav = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.afr.*", dict,
- trav, op_errno, out);
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
+}
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.afr.*", dict,
- trav, op_errno, out);
+int
+afr_fallocate_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_fallocate_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fallocate, local->fd,
+ local->cont.fallocate.mode, local->cont.fallocate.offset,
+ local->cont.fallocate.len, local->xdata_req);
+ return 0;
+}
- if (ret)
- goto out;
+int
+afr_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ call_frame_t *transaction_frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- priv = this->private;
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- QUORUM_CHECK(fsetxattr,out);
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+ local->cont.fallocate.mode = mode;
+ local->cont.fallocate.offset = offset;
+ local->cont.fallocate.len = len;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- transaction_frame->local = local;
+ if (!local->xdata_req)
+ goto out;
- local->op_ret = -1;
+ local->op = GF_FOP_FALLOCATE;
- local->cont.fsetxattr.dict = dict_ref (dict);
- local->cont.fsetxattr.flags = flags;
+ local->transaction.wind = afr_fallocate_wind;
+ local->transaction.unwind = afr_fallocate_unwind;
- local->transaction.fop = afr_fsetxattr_wind;
- local->transaction.done = afr_fsetxattr_done;
- local->transaction.unwind = afr_fsetxattr_unwind;
+ local->transaction.main_frame = frame;
- local->fd = fd_ref (fd);
+ local->transaction.start = local->cont.fallocate.offset;
+ local->transaction.len = 0;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ afr_fix_open(fd, this);
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
/* }}} */
-
-/* {{{ removexattr */
-
+/* {{{ discard */
int
-afr_removexattr_unwind (call_frame_t *frame, xlator_t *this)
+afr_discard_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
- if (main_frame) {
- AFR_STACK_UNWIND (removexattr, main_frame,
- local->op_ret, local->op_errno,
- NULL);
- }
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(discard, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
int
-afr_removexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_discard_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
+}
- local = frame->local;
- priv = this->private;
+int
+afr_discard_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_discard_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->discard, local->fd,
+ local->cont.discard.offset, local->cont.discard.len,
+ local->xdata_req);
+ return 0;
+}
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
+int
+afr_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ local->cont.discard.offset = offset;
+ local->cont.discard.len = len;
- call_count = afr_frame_return (frame);
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- return 0;
-}
+ if (!local->xdata_req)
+ goto out;
+ local->op = GF_FOP_DISCARD;
-int32_t
-afr_removexattr_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
+ local->transaction.wind = afr_discard_wind;
+ local->transaction.unwind = afr_discard_unwind;
- local = frame->local;
- priv = this->private;
+ local->transaction.main_frame = frame;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ local->transaction.start = local->cont.discard.offset;
+ local->transaction.len = 0;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ afr_fix_open(fd, this);
- local->call_count = call_count;
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_removexattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->removexattr,
- &local->loc,
- local->cont.removexattr.name,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
+/* {{{ zerofill */
int
-afr_removexattr_done (call_frame_t *frame, xlator_t *this)
+afr_zerofill_unwind(call_frame_t *frame, xlator_t *this)
{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- AFR_STACK_DESTROY (frame);
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(discard, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
+ return 0;
+}
int
-afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
+afr_zerofill_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_errno = 0;
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
+}
- VALIDATE_OR_GOTO (this, out);
+int
+afr_zerofill_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_zerofill_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->zerofill, local->fd,
+ local->cont.zerofill.offset, local->cont.zerofill.len,
+ local->xdata_req);
+ return 0;
+}
- GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
- name, op_errno, out);
+int
+afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
- name, op_errno, out);
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- priv = this->private;
+ local->cont.zerofill.offset = offset;
+ local->cont.zerofill.len = len;
- QUORUM_CHECK(removexattr,out);
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- op_errno = ENOMEM;
- goto out;
- }
+ if (xdata)
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ else
+ local->xdata_req = dict_new();
- AFR_LOCAL_ALLOC_OR_GOTO (transaction_frame->local, out);
- local = transaction_frame->local;
+ if (!local->xdata_req)
+ goto out;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local->op = GF_FOP_ZEROFILL;
- local->cont.removexattr.name = gf_strdup (name);
+ local->transaction.wind = afr_zerofill_wind;
+ local->transaction.unwind = afr_zerofill_unwind;
- local->transaction.fop = afr_removexattr_wind;
- local->transaction.done = afr_removexattr_done;
- local->transaction.unwind = afr_removexattr_unwind;
+ local->transaction.main_frame = frame;
- loc_copy (&local->loc, loc);
+ local->transaction.start = local->cont.zerofill.offset;
+ local->transaction.len = len;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ afr_fix_open(fd, this);
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- ret = 0;
+ return 0;
out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-/* ffremovexattr */
-int
-afr_fremovexattr_unwind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
+/* }}} */
- if (main_frame) {
- AFR_STACK_UNWIND (fremovexattr, main_frame,
- local->op_ret, local->op_errno,
- NULL);
- }
- return 0;
+int32_t
+afr_xattrop_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, xattr, xdata);
}
-
int
-afr_fremovexattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_xattrop_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- int call_count = -1;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->wait_count) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_xattrop_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->xattrop, &local->loc,
+ local->cont.xattrop.optype, local->cont.xattrop.xattr,
+ local->xdata_req);
+ return 0;
+}
- call_count = afr_frame_return (frame);
+int
+afr_xattrop_unwind(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
+ local = frame->local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
return 0;
-}
+ AFR_STACK_UNWIND(xattrop, main_frame, local->op_ret, local->op_errno,
+ local->xattr_rsp, local->xdata_rsp);
+ return 0;
+}
int32_t
-afr_fremovexattr_wind (call_frame_t *frame, xlator_t *this)
+afr_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
- call_count = afr_pre_op_done_children_count (local->transaction.pre_op,
- priv->child_count);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
- local->call_count = call_count;
+ local->cont.xattrop.xattr = dict_ref(xattr);
+ local->cont.xattrop.optype = optype;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
- for (i = 0; i < priv->child_count; i++) {
- if (local->transaction.pre_op[i]) {
- STACK_WIND_COOKIE (frame, afr_fremovexattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fremovexattr,
- local->fd,
- local->cont.removexattr.name,
- NULL);
-
- if (!--call_count)
- break;
- }
- }
+ local->transaction.wind = afr_xattrop_wind;
+ local->transaction.unwind = afr_xattrop_unwind;
- return 0;
-}
+ loc_copy(&local->loc, loc);
+ ret = afr_set_inode_local(this, local, loc->inode);
+ if (ret)
+ goto out;
+ local->op = GF_FOP_XATTROP;
-int
-afr_fremovexattr_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
- local->transaction.unwind (frame, this);
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
- AFR_STACK_DESTROY (frame);
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
+int32_t
+afr_fxattrop_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, NULL,
+ NULL, xattr, xdata);
+}
int
-afr_fremovexattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata)
+afr_fxattrop_wind(call_frame_t *frame, xlator_t *this, int subvol)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- int ret = -1;
- int op_ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (this, out);
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ STACK_WIND_COOKIE(frame, afr_fxattrop_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fxattrop, local->fd,
+ local->cont.xattrop.optype, local->cont.xattrop.xattr,
+ local->xdata_req);
+ return 0;
+}
- GF_IF_NATIVE_XATTR_GOTO ("trusted.afr.*",
- name, op_errno, out);
+int
+afr_fxattrop_unwind(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.afr.*",
- name, op_errno, out);
+ local = frame->local;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this->private, out);
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
+ return 0;
- priv = this->private;
+ AFR_STACK_UNWIND(fxattrop, main_frame, local->op_ret, local->op_errno,
+ local->xattr_rsp, local->xdata_rsp);
+ return 0;
+}
- QUORUM_CHECK(fremovexattr, out);
+int32_t
+afr_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int op_errno = ENOMEM;
+
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->cont.xattrop.xattr = dict_ref(xattr);
+ local->cont.xattrop.optype = optype;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ local->transaction.wind = afr_fxattrop_wind;
+ local->transaction.unwind = afr_fxattrop_unwind;
+
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
+
+ local->op = GF_FOP_FXATTROP;
+
+ local->transaction.main_frame = frame;
+ local->transaction.start = LLONG_MAX - 1;
+ local->transaction.len = 0;
+
+ ret = afr_transaction(transaction_frame, this, AFR_METADATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
+out:
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- goto out;
- }
+ AFR_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
- AFR_LOCAL_ALLOC_OR_GOTO (local, out);
+int
+afr_fsync_unwind(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
+ local = frame->local;
- transaction_frame->local = local;
+ main_frame = afr_transaction_detach_fop_frame(frame);
+ if (!main_frame)
+ return 0;
- local->op_ret = -1;
+ AFR_STACK_UNWIND(fsync, main_frame, local->op_ret, local->op_errno,
+ &local->cont.inode_wfop.prebuf,
+ &local->cont.inode_wfop.postbuf, local->xdata_rsp);
- local->cont.removexattr.name = gf_strdup (name);
+ return 0;
+}
- local->transaction.fop = afr_fremovexattr_wind;
- local->transaction.done = afr_fremovexattr_done;
- local->transaction.unwind = afr_fremovexattr_unwind;
+int
+afr_fsync_wind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return __afr_inode_write_cbk(frame, cookie, this, op_ret, op_errno, prebuf,
+ postbuf, NULL, xdata);
+}
- local->fd = fd_ref (fd);
+int
+afr_fsync_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
+ local = frame->local;
+ priv = this->private;
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
+ STACK_WIND_COOKIE(frame, afr_fsync_wind_cbk, (void *)(long)subvol,
+ priv->children[subvol],
+ priv->children[subvol]->fops->fsync, local->fd,
+ local->cont.fsync.datasync, local->xdata_req);
+ return 0;
+}
- op_ret = 0;
+int
+afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *transaction_frame = NULL;
+ int ret = -1;
+ int32_t op_errno = ENOMEM;
+ int8_t last_fsync = 0;
+
+ AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
+ transaction_frame = copy_frame(frame);
+ if (!transaction_frame)
+ goto out;
+
+ local = AFR_FRAME_INIT(transaction_frame, op_errno);
+ if (!local)
+ goto out;
+
+ if (xdata) {
+ local->xdata_req = dict_copy_with_ref(xdata, NULL);
+ if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) {
+ if (last_fsync) {
+ local->transaction.disable_delayed_post_op = _gf_true;
+ }
+ }
+ } else {
+ local->xdata_req = dict_new();
+ }
+
+ if (!local->xdata_req)
+ goto out;
+
+ local->fd = fd_ref(fd);
+ ret = afr_set_inode_local(this, local, fd->inode);
+ if (ret)
+ goto out;
+
+ local->op = GF_FOP_FSYNC;
+ local->cont.fsync.datasync = datasync;
+
+ if (afr_fd_has_witnessed_unstable_write(this, fd->inode)) {
+ /* don't care. we only wanted to CLEAR the bit */
+ }
+
+ local->transaction.wind = afr_fsync_wind;
+ local->transaction.unwind = afr_fsync_unwind;
+
+ local->transaction.main_frame = frame;
+
+ ret = afr_transaction(transaction_frame, this, AFR_DATA_TRANSACTION);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto out;
+ }
+
+ return 0;
out:
- if (op_ret == -1) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL);
- }
+ if (transaction_frame)
+ AFR_STACK_DESTROY(transaction_frame);
- return 0;
+ AFR_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
}
diff --git a/xlators/cluster/afr/src/afr-inode-write.h b/xlators/cluster/afr/src/afr-inode-write.h
index ed11079fd1b..a787069b7a1 100644
--- a/xlators/cluster/afr/src/afr-inode-write.h
+++ b/xlators/cluster/afr/src/afr-inode-write.h
@@ -12,60 +12,83 @@
#define __INODE_WRITE_H__
int32_t
-afr_chmod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dict_t *xdata);
+afr_chmod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dict_t *xdata);
int32_t
-afr_chown (call_frame_t *frame, xlator_t *this,
- loc_t *loc, uid_t uid, gid_t gid, dict_t *xdata);
+afr_chown(call_frame_t *frame, xlator_t *this, loc_t *loc, uid_t uid, gid_t gid,
+ dict_t *xdata);
int
-afr_fchown (call_frame_t *frame, xlator_t *this,
- fd_t *fd, uid_t uid, gid_t gid, dict_t *xdata);
+afr_fchown(call_frame_t *frame, xlator_t *this, fd_t *fd, uid_t uid, gid_t gid,
+ dict_t *xdata);
int32_t
-afr_fchmod (call_frame_t *frame, xlator_t *this,
- fd_t *fd, mode_t mode, dict_t *xdata);
+afr_fchmod(call_frame_t *frame, xlator_t *this, fd_t *fd, mode_t mode,
+ dict_t *xdata);
int32_t
-afr_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- uint32_t flags, struct iobref *iobref, dict_t *xdata);
+afr_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata);
int32_t
-afr_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset, dict_t *xdata);
+afr_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata);
int32_t
-afr_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset, dict_t *xdata);
+afr_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata);
int32_t
-afr_utimens (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct timespec tv[2], dict_t *xdata);
+afr_utimens(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct timespec tv[2], dict_t *xdata);
int
-afr_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *buf, int32_t valid, dict_t *xdata);
+afr_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *buf,
+ int32_t valid, dict_t *xdata);
int
-afr_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *buf, int32_t valid, dict_t *xdata);
+afr_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *buf,
+ int32_t valid, dict_t *xdata);
int32_t
-afr_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata);
+afr_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata);
int32_t
-afr_fsetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *dict, int32_t flags, dict_t *xdata);
+afr_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata);
int32_t
-afr_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata);
+afr_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
int32_t
-afr_fremovexattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata);
+afr_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata);
+int
+afr_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+
+int
+afr_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata);
+
+int
+afr_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata);
+
+int32_t
+afr_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+int32_t
+afr_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+int
+afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
#endif /* __INODE_WRITE_H__ */
diff --git a/xlators/cluster/afr/src/afr-lk-common.c b/xlators/cluster/afr/src/afr-lk-common.c
index 92356c82e4c..bc8eabe0f43 100644
--- a/xlators/cluster/afr/src/afr-lk-common.c
+++ b/xlators/cluster/afr/src/afr-lk-common.c
@@ -8,2290 +8,784 @@
cases as published by the Free Software Foundation.
*/
-#include "dict.h"
-#include "byte-order.h"
-#include "common-utils.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/common-utils.h>
#include "afr.h"
#include "afr-transaction.h"
+#include "afr-messages.h"
#include <signal.h>
-
-#define LOCKED_NO 0x0 /* no lock held */
-#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */
-#define LOCKED_LOWER 0x2 /* for lower path */
-
-#define AFR_TRACE_INODELK_IN(frame, this, params ...) \
- do { \
- afr_private_t *_priv = this->private; \
- if (!_priv->inodelk_trace) \
- break; \
- afr_trace_inodelk_in (frame, this, params); \
- } while (0);
-
-#define AFR_TRACE_INODELK_OUT(frame, this, params ...) \
- do { \
- afr_private_t *_priv = this->private; \
- if (!_priv->inodelk_trace) \
- break; \
- afr_trace_inodelk_out (frame, this, params); \
- } while (0);
-
-#define AFR_TRACE_ENTRYLK_IN(frame, this, params ...) \
- do { \
- afr_private_t *_priv = this->private; \
- if (!_priv->entrylk_trace) \
- break; \
- afr_trace_entrylk_in (frame, this, params); \
- } while (0);
-
-#define AFR_TRACE_ENTRYLK_OUT(frame, this, params ...) \
- do { \
- afr_private_t *_priv = this->private; \
- if (!_priv->entrylk_trace) \
- break; \
- afr_trace_entrylk_out (frame, this, params); \
- } while (0);
-
-int
-afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index);
-
-static int
-afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this);
-
-static uint64_t afr_lock_number = 1;
-
-static uint64_t
-get_afr_lock_number ()
-{
- return (++afr_lock_number);
-}
-
-int
-afr_set_lock_number (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->lock_number = get_afr_lock_number ();
-
- return 0;
-}
+#define LOCKED_NO 0x0 /* no lock held */
+#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path */
+#define LOCKED_LOWER 0x2 /* for lower path */
void
-afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner)
+afr_lockee_cleanup(afr_lockee_t *lockee)
{
- gf_log (this->name, GF_LOG_TRACE,
- "Setting lk-owner=%llu",
- (unsigned long long) (unsigned long)lk_owner);
+ if (lockee->fd) {
+ fd_unref(lockee->fd);
+ lockee->fd = NULL;
+ } else {
+ loc_wipe(&lockee->loc);
+ }
- set_lk_owner_from_ptr (&frame->root->lk_owner, lk_owner);
-}
+ GF_FREE(lockee->basename);
+ lockee->basename = NULL;
+ GF_FREE(lockee->locked_nodes);
+ lockee->locked_nodes = NULL;
-static int
-is_afr_lock_selfheal (afr_local_t *local)
-{
- afr_internal_lock_t *int_lock = NULL;
- int ret = -1;
-
- int_lock = &local->internal_lock;
-
- switch (int_lock->selfheal_lk_type) {
- case AFR_DATA_SELF_HEAL_LK:
- case AFR_METADATA_SELF_HEAL_LK:
- ret = 1;
- break;
- case AFR_ENTRY_SELF_HEAL_LK:
- ret = 0;
- break;
- }
-
- return ret;
-
-}
-
-int32_t
-internal_lock_count (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int32_t call_count = 0;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- if (local->fd) {
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && local->fd_open_on[i])
- ++call_count;
- }
- } else {
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i])
- ++call_count;
- }
- }
-
- return call_count;
-}
-
-static void
-afr_print_inodelk (char *str, int size, int cmd,
- struct gf_flock *flock, gf_lkowner_t *owner)
-{
- char *cmd_str = NULL;
- char *type_str = NULL;
-
- switch (cmd) {
-#if F_GETLK != F_GETLK64
- case F_GETLK64:
-#endif
- case F_GETLK:
- cmd_str = "GETLK";
- break;
-
-#if F_SETLK != F_SETLK64
- case F_SETLK64:
-#endif
- case F_SETLK:
- cmd_str = "SETLK";
- break;
-
-#if F_SETLKW != F_SETLKW64
- case F_SETLKW64:
-#endif
- case F_SETLKW:
- cmd_str = "SETLKW";
- break;
-
- default:
- cmd_str = "<null>";
- break;
- }
-
- switch (flock->l_type) {
- case F_RDLCK:
- type_str = "READ";
- break;
- case F_WRLCK:
- type_str = "WRITE";
- break;
- case F_UNLCK:
- type_str = "UNLOCK";
- break;
- default:
- type_str = "UNKNOWN";
- break;
- }
-
- snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, "
- "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
- cmd_str, type_str, (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid,
- lkowner_utoa (owner));
-
-}
-
-static void
-afr_print_lockee (char *str, int size, loc_t *loc, fd_t *fd,
- int child_index)
-{
- snprintf (str, size, "path=%s, fd=%p, child=%d",
- loc->path ? loc->path : "<nul>",
- fd ? fd : NULL,
- child_index);
+ return;
}
void
-afr_print_entrylk (char *str, int size, const char *basename,
- gf_lkowner_t *owner)
-{
- snprintf (str, size, "Basename=%s, lk-owner=%s",
- basename ? basename : "<nul>",
- lkowner_utoa (owner));
-}
-
-static void
-afr_print_verdict (int op_ret, int op_errno, char *str)
-{
- if (op_ret < 0) {
- if (op_errno == EAGAIN)
- strcpy (str, "EAGAIN");
- else
- strcpy (str, "FAILED");
- }
- else
- strcpy (str, "GRANTED");
-}
-
-static void
-afr_set_lock_call_type (afr_lock_call_type_t lock_call_type,
- char *lock_call_type_str,
- afr_internal_lock_t *int_lock)
-{
- switch (lock_call_type) {
- case AFR_INODELK_TRANSACTION:
- if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
- strcpy (lock_call_type_str, "AFR_INODELK_TRANSACTION");
- else
- strcpy (lock_call_type_str, "AFR_INODELK_SELFHEAL");
- break;
- case AFR_INODELK_NB_TRANSACTION:
- if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
- strcpy (lock_call_type_str, "AFR_INODELK_NB_TRANSACTION");
- else
- strcpy (lock_call_type_str, "AFR_INODELK_NB_SELFHEAL");
- break;
- case AFR_ENTRYLK_TRANSACTION:
- if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
- strcpy (lock_call_type_str, "AFR_ENTRYLK_TRANSACTION");
- else
- strcpy (lock_call_type_str, "AFR_ENTRYLK_SELFHEAL");
- break;
- case AFR_ENTRYLK_NB_TRANSACTION:
- if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK)
- strcpy (lock_call_type_str, "AFR_ENTRYLK_NB_TRANSACTION");
- else
- strcpy (lock_call_type_str, "AFR_ENTRYLK_NB_SELFHEAL");
- break;
- default:
- strcpy (lock_call_type_str, "UNKNOWN");
- break;
- }
-
-}
-
-static void
-afr_trace_inodelk_out (call_frame_t *frame, xlator_t *this,
- afr_lock_call_type_t lock_call_type,
- afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
- int op_ret, int op_errno, int32_t child_index)
+afr_lockees_cleanup(afr_internal_lock_t *int_lock)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- char lockee[256];
- char lock_call_type_str[256];
- char verdict[16];
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
+ int i = 0;
- afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
-
- afr_print_verdict (op_ret, op_errno, verdict);
-
- gf_log (this->name, GF_LOG_INFO,
- "[%s %s] [%s] lk-owner=%s Lockee={%s} Number={%llu}",
- lock_call_type_str,
- lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
- verdict, lkowner_utoa (&frame->root->lk_owner), lockee,
- (unsigned long long) int_lock->lock_number);
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ afr_lockee_cleanup(&int_lock->lockee[i]);
+ }
+ return;
}
-
-static void
-afr_trace_inodelk_in (call_frame_t *frame, xlator_t *this,
- afr_lock_call_type_t lock_call_type,
- afr_lock_op_type_t lk_op_type, struct gf_flock *flock,
- int32_t cmd, int32_t child_index)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
-
- char lock[256];
- char lockee[256];
- char lock_call_type_str[256];
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- afr_print_inodelk (lock, 256, cmd, flock, &frame->root->lk_owner);
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
-
- afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
-
- gf_log (this->name, GF_LOG_INFO,
- "[%s %s] Lock={%s} Lockee={%s} Number={%llu}",
- lock_call_type_str,
- lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
- lock, lockee,
- (unsigned long long) int_lock->lock_number);
-
+int
+afr_entry_lockee_cmp(const void *l1, const void *l2)
+{
+ const afr_lockee_t *r1 = l1;
+ const afr_lockee_t *r2 = l2;
+ int ret = 0;
+ uuid_t gfid1 = {0};
+ uuid_t gfid2 = {0};
+
+ loc_gfid((loc_t *)&r1->loc, gfid1);
+ loc_gfid((loc_t *)&r2->loc, gfid2);
+ ret = gf_uuid_compare(gfid1, gfid2);
+ /*Entrylks with NULL basename are the 'smallest'*/
+ if (ret == 0) {
+ if (!r1->basename)
+ return -1;
+ if (!r2->basename)
+ return 1;
+ ret = strcmp(r1->basename, r2->basename);
+ }
+
+ if (ret <= 0)
+ return -1;
+ else
+ return 1;
}
-static void
-afr_trace_entrylk_in (call_frame_t *frame, xlator_t *this,
- afr_lock_call_type_t lock_call_type,
- afr_lock_op_type_t lk_op_type, const char *basename,
- int32_t child_index)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
-
- char lock[256];
- char lockee[256];
- char lock_call_type_str[256];
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- afr_print_entrylk (lock, 256, basename, &frame->root->lk_owner);
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
-
- afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
-
- gf_log (this->name, GF_LOG_INFO,
- "[%s %s] Lock={%s} Lockee={%s} Number={%llu}",
- lock_call_type_str,
- lk_op_type == AFR_LOCK_OP ? "LOCK REQUEST" : "UNLOCK REQUEST",
- lock, lockee,
- (unsigned long long) int_lock->lock_number);
-}
+int
+afr_lock_blocking(call_frame_t *frame, xlator_t *this, int child_index);
-static void
-afr_trace_entrylk_out (call_frame_t *frame, xlator_t *this,
- afr_lock_call_type_t lock_call_type,
- afr_lock_op_type_t lk_op_type, const char *basename,
- int op_ret, int op_errno, int32_t child_index)
+void
+afr_set_lk_owner(call_frame_t *frame, xlator_t *this, void *lk_owner)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- char lock[256];
- char lockee[256];
- char lock_call_type_str[256];
- char verdict[16];
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- afr_print_lockee (lockee, 256, &local->loc, local->fd, child_index);
-
- afr_set_lock_call_type (lock_call_type, lock_call_type_str, int_lock);
-
- afr_print_verdict (op_ret, op_errno, verdict);
-
- gf_log (this->name, GF_LOG_INFO,
- "[%s %s] [%s] Lock={%s} Lockee={%s} Number={%llu}",
- lock_call_type_str,
- lk_op_type == AFR_LOCK_OP ? "LOCK REPLY" : "UNLOCK REPLY",
- verdict,
- lock, lockee,
- (unsigned long long) int_lock->lock_number);
+ gf_msg_trace(this->name, 0, "Setting lk-owner=%llu",
+ (unsigned long long)(unsigned long)lk_owner);
+ set_lk_owner_from_ptr(&frame->root->lk_owner, lk_owner);
}
-static int
-transaction_lk_op (afr_local_t *local)
+int32_t
+internal_lock_count(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- int ret = -1;
-
- int_lock = &local->internal_lock;
-
- if (int_lock->transaction_lk_type == AFR_TRANSACTION_LK) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "lk op is for a transaction");
- ret = 1;
- }
- else if (int_lock->transaction_lk_type == AFR_SELFHEAL_LK) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "lk op is for a self heal");
-
- ret = 0;
- }
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int32_t call_count = 0;
+ int i = 0;
- if (ret == -1)
- gf_log (THIS->name, GF_LOG_DEBUG,
- "lk op is not set");
+ local = frame->local;
+ priv = this->private;
- return ret;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->child_up[i])
+ ++call_count;
+ }
+ return call_count;
}
-static int
-is_afr_lock_transaction (afr_local_t *local)
+int
+afr_add_entry_lockee(afr_local_t *local, loc_t *loc, char *basename,
+ int child_count)
{
- int ret = 0;
+ int ret = -ENOMEM;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ afr_lockee_t *lockee = &int_lock->lockee[int_lock->lockee_count];
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- ret = 1;
- break;
+ GF_ASSERT(int_lock->lockee_count < AFR_LOCKEE_COUNT_MAX);
+ loc_copy(&lockee->loc, loc);
+ lockee->basename = (basename) ? gf_strdup(basename) : NULL;
+ if (basename && !lockee->basename)
+ goto out;
- case AFR_ENTRY_RENAME_TRANSACTION:
- case AFR_ENTRY_TRANSACTION:
- ret = 0;
- break;
+ lockee->locked_count = 0;
+ lockee->locked_nodes = GF_CALLOC(child_count, sizeof(*lockee->locked_nodes),
+ gf_afr_mt_afr_node_character);
- }
+ if (!lockee->locked_nodes)
+ goto out;
- return ret;
+ ret = 0;
+ int_lock->lockee_count++;
+out:
+ if (ret) {
+ afr_lockee_cleanup(lockee);
+ }
+ return ret;
}
-static int
-initialize_entrylk_variables (call_frame_t *frame, xlator_t *this)
+int
+afr_add_inode_lockee(afr_local_t *local, int child_count)
{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
+ int ret = -ENOMEM;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ afr_lockee_t *lockee = &int_lock->lockee[int_lock->lockee_count];
- int i = 0;
+ if (local->fd) {
+ lockee->fd = fd_ref(local->fd);
+ } else {
+ loc_copy(&lockee->loc, &local->loc);
+ }
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
+ lockee->locked_count = 0;
+ lockee->locked_nodes = GF_CALLOC(child_count, sizeof(*lockee->locked_nodes),
+ gf_afr_mt_afr_node_character);
- int_lock->entrylk_lock_count = 0;
- int_lock->lock_op_ret = -1;
- int_lock->lock_op_errno = 0;
-
- for (i = 0; i < priv->child_count; i++) {
- int_lock->entry_locked_nodes[i] = 0;
- }
+ if (!lockee->locked_nodes)
+ goto out;
- return 0;
+ ret = 0;
+ int_lock->lockee_count++;
+out:
+ if (ret) {
+ afr_lockee_cleanup(lockee);
+ }
+ return ret;
}
static int
-initialize_inodelk_variables (call_frame_t *frame, xlator_t *this)
+initialize_internal_lock_variables(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->inodelk_lock_count = 0;
- int_lock->lock_op_ret = -1;
- int_lock->lock_op_errno = 0;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
- for (i = 0; i < priv->child_count; i++) {
- int_lock->inode_locked_nodes[i] = 0;
- }
+ int i = 0;
- return 0;
-}
-
-loc_t *
-lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2)
-{
- int ret = 0;
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- ret = uuid_compare (l1->inode->gfid, l2->inode->gfid);
+ int_lock->lock_count = 0;
+ int_lock->lock_op_ret = -1;
+ int_lock->lock_op_errno = 0;
+ int_lock->lk_attempted_count = 0;
- if (ret == 0)
- ret = strcmp (b1, b2);
+ for (i = 0; i < AFR_LOCKEE_COUNT_MAX; i++) {
+ if (!int_lock->lockee[i].locked_nodes)
+ break;
+ int_lock->lockee[i].locked_count = 0;
+ memset(int_lock->lockee[i].locked_nodes, 0,
+ sizeof(*int_lock->lockee[i].locked_nodes) * priv->child_count);
+ }
- if (ret <= 0)
- return l1;
- else
- return l2;
+ return 0;
}
int
-afr_locked_nodes_count (unsigned char *locked_nodes, int child_count)
-
+afr_lockee_locked_nodes_count(afr_internal_lock_t *int_lock)
{
- int i = 0;
- int call_count = 0;
+ int call_count = 0;
+ int i = 0;
- for (i = 0; i < child_count; i++) {
- if (locked_nodes[i] & LOCKED_YES)
- call_count++;
- }
+ for (i = 0; i < int_lock->lockee_count; i++)
+ call_count += int_lock->lockee[i].locked_count;
- return call_count;
+ return call_count;
}
-/* FIXME: What if UNLOCK fails */
-static int32_t
-afr_unlock_common_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- int call_count = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK (&frame->lock);
- {
- call_count = --int_lock->lk_call_count;
- }
- UNLOCK (&frame->lock);
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "All internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- }
-
- return 0;
-}
+int
+afr_locked_nodes_count(unsigned char *locked_nodes, int child_count)
-static int32_t
-afr_unlock_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- int32_t child_index = (long)cookie;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
- AFR_UNLOCK_OP, NULL, op_ret,
- op_errno, child_index);
-
- if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
- gf_log (this->name, GF_LOG_INFO, "%s: unlock failed on %d "
- "unlock by %s", local->loc.path, child_index,
- lkowner_utoa (&frame->root->lk_owner));
- }
+ int i = 0;
+ int call_count = 0;
+ for (i = 0; i < child_count; i++) {
+ if (locked_nodes[i] & LOCKED_YES)
+ call_count++;
+ }
- int_lock->inode_locked_nodes[child_index] &= LOCKED_NO;
- if (local->transaction.eager_lock)
- local->transaction.eager_lock[child_index] = 0;
-
- afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, xdata);
-
- return 0;
-
+ return call_count;
}
-static int
-afr_unlock_inodelk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- struct gf_flock flock = {0,};
- struct gf_flock full_flock = {0,};
- struct gf_flock *flock_use = NULL;
- int call_count = 0;
- int i = 0;
- int piggyback = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- gf_boolean_t fd_lock_owner = _gf_false;
-
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = F_UNLCK;
-
- full_flock.l_type = F_UNLCK;
- call_count = afr_locked_nodes_count (int_lock->inode_locked_nodes,
- priv->child_count);
-
- int_lock->lk_call_count = call_count;
-
- if (!call_count) {
- gf_log (this->name, GF_LOG_TRACE,
- "No internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- goto out;
- }
-
- if (local->fd)
- fd_ctx = afr_fd_ctx_get (local->fd, this);
-
- for (i = 0; i < priv->child_count; i++) {
- if ((int_lock->inode_locked_nodes[i] & LOCKED_YES)
- != LOCKED_YES)
- continue;
-
- if (local->fd) {
- flock_use = &flock;
- if (!local->transaction.eager_lock[i]) {
- if (fd_lock_owner) {
- afr_set_lk_owner (frame, this,
- frame->root);
- fd_lock_owner = _gf_false;
- }
- goto wind;
- }
-
- piggyback = 0;
-
- LOCK (&local->fd->lock);
- {
- if (fd_ctx->lock_piggyback[i]) {
- fd_ctx->lock_piggyback[i]--;
- piggyback = 1;
- } else {
- fd_ctx->lock_acquired[i]--;
- }
- }
- UNLOCK (&local->fd->lock);
-
- if (!fd_lock_owner) {
- afr_set_lk_owner (frame, this, local->fd);
- fd_lock_owner = _gf_true;
- }
- if (piggyback) {
- afr_unlock_inodelk_cbk (frame, (void *) (long) i,
- this, 1, 0, NULL);
- if (!--call_count)
- break;
- continue;
- }
-
- flock_use = &full_flock;
- wind:
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_TRANSACTION,
- AFR_UNLOCK_OP, flock_use, F_SETLK,
- i);
-
- STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
- (void *) (long)i,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- this->name, local->fd,
- F_SETLK, flock_use, NULL);
-
- if (!--call_count)
- break;
-
- } else {
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_TRANSACTION,
- AFR_UNLOCK_OP, &flock, F_SETLK, i);
-
- STACK_WIND_COOKIE (frame, afr_unlock_inodelk_cbk,
- (void *) (long)i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name, &local->loc,
- F_SETLK, &flock, NULL);
-
- if (!--call_count)
- break;
- }
- }
-out:
- return 0;
-}
-
-static int32_t
-afr_unlock_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- int32_t child_index = (long)cookie;
-
- local = frame->local;
-
- AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_UNLOCK_OP, NULL, op_ret,
- op_errno, child_index);
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: unlock failed on %d, reason: %s",
- local->loc.path, child_index, strerror (op_errno));
- }
-
- afr_unlock_common_cbk (frame, cookie, this, op_ret, op_errno, NULL);
-
- return 0;
-}
-
-static int
-afr_unlock_entrylk (call_frame_t *frame, xlator_t *this)
+static void
+afr_log_locks_failure(call_frame_t *frame, char *where, char *what,
+ int op_errno)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int call_count = 0;
- int i = -1;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
-
- call_count = afr_locked_nodes_count (int_lock->entry_locked_nodes,
- priv->child_count);
- int_lock->lk_call_count = call_count;
-
- if (!call_count){
- gf_log (this->name, GF_LOG_TRACE,
- "No internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->entry_locked_nodes[i] & LOCKED_YES) {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_NB_TRANSACTION,
- AFR_UNLOCK_OP, basename, i);
-
- STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- loc, basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
-
- if (!--call_count)
- break;
- }
- }
+ xlator_t *this = frame->this;
+ gf_lkowner_t *lk_owner = &frame->root->lk_owner;
+ afr_local_t *local = frame->local;
+ const char *fop = NULL;
+ char *gfid = NULL;
+ const char *name = NULL;
-out:
- return 0;
+ fop = gf_fop_list[local->op];
+ switch (local->transaction.type) {
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ case AFR_ENTRY_TRANSACTION:
+ switch (local->op) {
+ case GF_FOP_LINK:
+ gfid = uuid_utoa(local->newloc.pargfid);
+ name = local->newloc.name;
+ break;
+ default:
+ gfid = uuid_utoa(local->loc.pargfid);
+ name = local->loc.name;
+ break;
+ }
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ AFR_MSG_INTERNAL_LKS_FAILED,
+ "Unable to do entry %s with lk-owner:%s on %s "
+ "while attempting %s on {pgfid:%s, name:%s}.",
+ what, lkowner_utoa(lk_owner), where, fop, gfid, name);
+ break;
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ gfid = uuid_utoa(local->inode->gfid);
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ AFR_MSG_INTERNAL_LKS_FAILED,
+ "Unable to do inode %s with lk-owner:%s on %s "
+ "while attempting %s on gfid:%s.",
+ what, lkowner_utoa(lk_owner), where, fop, gfid);
+ break;
+ }
}
static int32_t
-afr_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+afr_unlock_common_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- int child_index = (long) cookie;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int lockee_num = 0;
+ int call_count = 0;
+ int child_index = 0;
+ int ret = 0;
- local = frame->local;
- int_lock = &local->internal_lock;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ lockee_num = (int)((long)cookie) / priv->child_count;
+ child_index = (int)((long)cookie) % priv->child_count;
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
- }
-
- local->op_errno = op_errno;
- int_lock->lock_op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if ((op_ret == -1) &&
- (op_errno == ENOSYS)) {
- afr_unlock (frame, this);
- } else {
- if (op_ret == 0) {
- int_lock->locked_nodes[child_index] |= LOCKED_YES;
- int_lock->lock_count++;
- }
- afr_lock_blocking (frame, this, child_index + 1);
- }
+ if (op_ret < 0 && op_errno != ENOTCONN && op_errno != EBADFD) {
+ afr_log_locks_failure(frame, priv->children[child_index]->name,
+ "unlock", op_errno);
+ }
- return 0;
-}
+ int_lock->lockee[lockee_num].locked_nodes[child_index] &= LOCKED_NO;
+ if (local->transaction.type == AFR_DATA_TRANSACTION && op_ret != 1)
+ ret = afr_write_subvol_reset(frame, this);
-static int32_t
-afr_blocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_TRANSACTION,
- AFR_LOCK_OP, NULL, op_ret,
- op_errno, (long) cookie);
+ LOCK(&frame->lock);
+ {
+ call_count = --int_lock->lk_call_count;
+ }
+ UNLOCK(&frame->lock);
- afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
- return 0;
+ if (call_count == 0) {
+ int_lock->lock_cbk(frame, this);
+ }
+ return ret;
}
-static int32_t
-afr_lock_lower_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *higher_name = NULL;
- int child_index = (long) cookie;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
-
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
-
- local->op_ret = op_ret;
- }
-
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (op_ret != 0) {
- afr_copy_locked_nodes (frame, this);
- afr_unlock (frame, this);
- goto out;
- } else {
- int_lock->lower_locked_nodes[child_index] |= LOCKED_LOWER;
- int_lock->lock_count++;
- }
-
- /* The lower path has been locked. Now lock the higher path */
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, higher_name, child_index);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, higher, higher_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
-
-out:
- return 0;
-}
-
-static int32_t
-afr_blocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, NULL, op_ret,
- op_errno, (long)cookie);
-
- afr_lock_cbk (frame, cookie, this, op_ret, op_errno, xdata);
- return 0;
-}
-
-static int
-afr_copy_locked_nodes (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- memcpy (int_lock->inode_locked_nodes,
- int_lock->locked_nodes,
- priv->child_count);
- int_lock->inodelk_lock_count = int_lock->lock_count;
- break;
-
- case AFR_ENTRY_RENAME_TRANSACTION:
+void
+afr_internal_lock_wind(call_frame_t *frame,
+ int32_t (*cbk)(call_frame_t *, void *, xlator_t *,
+ int32_t, int32_t, dict_t *),
+ void *cookie, int child, int lockee_num,
+ gf_boolean_t blocking, gf_boolean_t unlock)
+{
+ afr_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ entrylk_cmd cmd = ENTRYLK_LOCK_NB;
+ int32_t cmd1 = F_SETLK;
+ struct gf_flock flock = {
+ 0,
+ };
+
+ switch (local->transaction.type) {
case AFR_ENTRY_TRANSACTION:
- memcpy (int_lock->entry_locked_nodes,
- int_lock->locked_nodes,
- priv->child_count);
- int_lock->entrylk_lock_count = int_lock->lock_count;
- break;
- }
-
- return 0;
-
-}
-
-int
-afr_lock_blocking (call_frame_t *frame, xlator_t *this, int child_index)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- loc_t *lower = NULL;
- const char *lower_name = NULL;
- struct gf_flock flock = {0,};
- uint64_t ctx = 0;
- int ret = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
-
- if (local->fd) {
- ret = fd_ctx_get (local->fd, this, &ctx);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "unable to get fd ctx for fd=%p",
- local->fd);
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
-
- afr_copy_locked_nodes (frame, this);
-
- afr_unlock (frame, this);
-
- return 0;
- }
-
- /* skip over children that or down
- or don't have the fd open */
-
- while ((child_index < priv->child_count)
- && (!local->child_up[child_index] ||
- !local->fd_open_on[child_index]))
-
- child_index++;
- } else {
- /* skip over children that are down */
- while ((child_index < priv->child_count)
- && !local->child_up[child_index])
- child_index++;
- }
-
- if ((child_index == priv->child_count) &&
- int_lock->lock_count == 0) {
-
- gf_log (this->name, GF_LOG_INFO,
- "unable to lock on even one child");
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
-
- afr_copy_locked_nodes (frame, this);
-
- afr_unlock(frame, this);
-
- return 0;
-
- }
-
- if ((child_index == priv->child_count)
- || (int_lock->lock_count == int_lock->lk_expected_count)) {
-
- /* we're done locking */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "we're done locking");
-
- afr_copy_locked_nodes (frame, this);
-
- int_lock->lock_op_ret = 0;
- int_lock->lock_cbk (frame, this);
- return 0;
- }
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
-
- if (local->fd) {
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_TRANSACTION,
- AFR_LOCK_OP, &flock, F_SETLKW,
- child_index);
-
- STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->finodelk,
- this->name, local->fd,
- F_SETLKW, &flock, NULL);
-
- } else {
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_TRANSACTION,
- AFR_LOCK_OP, &flock, F_SETLKW,
- child_index);
-
- STACK_WIND_COOKIE (frame, afr_blocking_inodelk_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->inodelk,
- this->name, &local->loc,
- F_SETLKW, &flock, NULL);
- }
-
- break;
-
case AFR_ENTRY_RENAME_TRANSACTION:
- {
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
+ if (unlock) {
+ cmd = ENTRYLK_UNLOCK;
+ } else if (blocking) { /*Doesn't make sense to have blocking
+ unlock*/
+ cmd = ENTRYLK_LOCK;
+ }
+
+ if (local->fd) {
+ STACK_WIND_COOKIE(frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->fentrylk,
+ int_lock->domain,
+ int_lock->lockee[lockee_num].fd,
+ int_lock->lockee[lockee_num].basename, cmd,
+ ENTRYLK_WRLCK, NULL);
+ } else {
+ STACK_WIND_COOKIE(frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->entrylk,
+ int_lock->domain,
+ &int_lock->lockee[lockee_num].loc,
+ int_lock->lockee[lockee_num].basename, cmd,
+ ENTRYLK_WRLCK, NULL);
+ }
+ break;
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- AFR_TRACE_ENTRYLK_IN (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, lower_name, child_index);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_lower_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name, lower, lower_name,
- ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
-
- break;
- }
-
- case AFR_ENTRY_TRANSACTION:
- if (local->fd) {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, local->transaction.basename,
- child_index);
-
- STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->fentrylk,
- this->name, local->fd,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
- } else {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, local->transaction.basename,
- child_index);
-
- STACK_WIND_COOKIE (frame, afr_blocking_entrylk_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->entrylk,
- this->name,
- &local->transaction.parent_loc,
- local->transaction.basename,
- ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
- }
-
- break;
- }
-
- return 0;
-}
-
-int32_t
-afr_blocking_lock (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int up_count = 0;
-
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
-
- switch (local->transaction.type) {
case AFR_DATA_TRANSACTION:
case AFR_METADATA_TRANSACTION:
- initialize_inodelk_variables (frame, this);
- break;
+ flock = int_lock->lockee[lockee_num].flock;
+ if (unlock) {
+ flock.l_type = F_UNLCK;
+ } else if (blocking) { /*Doesn't make sense to have blocking
+ unlock*/
+ cmd1 = F_SETLKW;
+ }
+
+ if (local->fd) {
+ STACK_WIND_COOKIE(
+ frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->finodelk, int_lock->domain,
+ int_lock->lockee[lockee_num].fd, cmd1, &flock, NULL);
+ } else {
+ STACK_WIND_COOKIE(
+ frame, cbk, cookie, priv->children[child],
+ priv->children[child]->fops->inodelk, int_lock->domain,
+ &int_lock->lockee[lockee_num].loc, cmd1, &flock, NULL);
+ }
+ break;
+ }
+}
- case AFR_ENTRY_RENAME_TRANSACTION:
- up_count = afr_up_children_count (local->child_up,
- priv->child_count);
- int_lock->lk_expected_count = 2 * up_count;
- //fallthrough
- case AFR_ENTRY_TRANSACTION:
- initialize_entrylk_variables (frame, this);
+static int
+afr_unlock_now(call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int child_index = 0;
+ int lockee_num = 0;
+ int i = -1;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ call_count = afr_lockee_locked_nodes_count(int_lock);
+
+ int_lock->lk_call_count = call_count;
+
+ if (!call_count) {
+ gf_msg_trace(this->name, 0, "No internal locks unlocked");
+ int_lock->lock_cbk(frame, this);
+ goto out;
+ }
+
+ for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
+ lockee_num = i / priv->child_count;
+ child_index = i % priv->child_count;
+ if (int_lock->lockee[lockee_num].locked_nodes[child_index] &
+ LOCKED_YES) {
+ afr_internal_lock_wind(frame, afr_unlock_common_cbk,
+ (void *)(long)i, child_index, lockee_num,
+ _gf_false, _gf_true);
+ if (!--call_count)
break;
}
+ }
- afr_lock_blocking (frame, this, 0);
-
- return 0;
+out:
+ return 0;
}
static int32_t
-afr_nonblocking_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- AFR_TRACE_ENTRYLK_OUT (frame, this, AFR_ENTRYLK_TRANSACTION,
- AFR_LOCK_OP, NULL, op_ret,
- op_errno, (long) cookie);
-
- if (op_ret < 0 ) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
-
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
+afr_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int cky = (long)cookie;
+ int child_index = 0;
+ int lockee_num = 0;
+
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ child_index = ((int)cky) % priv->child_count;
+ lockee_num = ((int)cky) / priv->child_count;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_msg(this->name, GF_LOG_ERROR, ENOSYS,
+ AFR_MSG_LOCK_XLATOR_NOT_LOADED,
+ "subvolume does not support locking. "
+ "please load features/locks xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+ }
+
+ local->op_errno = op_errno;
+ int_lock->lock_op_errno = op_errno;
+ }
+
+ int_lock->lk_attempted_count++;
+ }
+ UNLOCK(&frame->lock);
+
+ if ((op_ret == -1) && (op_errno == ENOSYS)) {
+ afr_unlock_now(frame, this);
+ } else {
+ if (op_ret == 0) {
+ int_lock->lockee[lockee_num]
+ .locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_num].locked_count++;
+ int_lock->lock_count++;
+ if (local->transaction.type == AFR_DATA_TRANSACTION) {
+ LOCK(&local->inode->lock);
+ {
+ local->inode_ctx->lock_count++;
}
- } else if (op_ret == 0) {
- int_lock->entry_locked_nodes[child_index] |= LOCKED_YES;
- int_lock->entrylk_lock_count++;
+ UNLOCK(&local->inode->lock);
+ }
}
+ afr_lock_blocking(frame, this, cky + 1);
+ }
- LOCK (&frame->lock);
- {
- call_count = --int_lock->lk_call_count;
- }
- UNLOCK (&frame->lock);
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "Last locking reply received");
- /* all locks successful. Proceed to call FOP */
- if (int_lock->entrylk_lock_count ==
- int_lock->lk_expected_count) {
- gf_log (this->name, GF_LOG_TRACE,
- "All servers locked. Calling the cbk");
- int_lock->lock_op_ret = 0;
- int_lock->lock_cbk (frame, this);
- }
- /* Not all locks were successful. Unlock and try locking
- again, this time with serially blocking locks */
- else {
- gf_log (this->name, GF_LOG_TRACE,
- "%d servers locked. Trying again with blocking calls",
- int_lock->lock_count);
-
- afr_unlock(frame, this);
- }
- }
-
- return 0;
+ return 0;
}
-void
-afr_mark_fd_open_on (afr_local_t *local, afr_fd_ctx_t *fd_ctx,
- size_t child_count)
+static gf_boolean_t
+_is_lock_wind_needed(afr_local_t *local, int child_index)
{
- int i = 0;
-
- GF_ASSERT (local->fd_open_on);
-
- memset (local->fd_open_on, 0, sizeof (*local->fd_open_on)*child_count);
- for (i = 0; i < child_count; i++)
- if (fd_ctx->opened_on[i] == AFR_FD_OPENED)
- local->fd_open_on[i] = 1;
-}
-
-int
-afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int32_t call_count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- initialize_entrylk_variables (frame, this);
-
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
-
- if (local->fd) {
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- if (!fd_ctx) {
- gf_log (this->name, GF_LOG_INFO,
- "unable to get fd ctx for fd=%p",
- local->fd);
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
- local->op_errno = EINVAL;
- int_lock->lock_op_errno = EINVAL;
-
- return -1;
- }
-
- afr_mark_fd_open_on (local, fd_ctx, priv->child_count);
- call_count = internal_lock_count (frame, this);
- int_lock->lk_call_count = call_count;
- int_lock->lk_expected_count = call_count;
+ if (!local->child_up[child_index])
+ return _gf_false;
- if (!call_count) {
- gf_log (this->name, GF_LOG_INFO,
- "fd not open on any subvolumes. aborting.");
- afr_unlock (frame, this);
- goto out;
- }
-
- /* Send non-blocking entrylk calls only on up children
- and where the fd has been opened */
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i] && local->fd_open_on[i]) {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP, basename, i);
-
- STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fentrylk,
- this->name, local->fd,
- basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
- NULL);
- }
- }
- } else {
- GF_ASSERT (loc);
-
- call_count = internal_lock_count (frame, this);
- int_lock->lk_call_count = call_count;
- int_lock->lk_expected_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_NB_TRANSACTION,
- AFR_LOCK_OP, basename, i);
-
- STACK_WIND_COOKIE (frame, afr_nonblocking_entrylk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name, loc, basename,
- ENTRYLK_LOCK_NB, ENTRYLK_WRLCK,
- NULL);
-
- if (!--call_count)
- break;
-
- }
- }
- }
-out:
- return 0;
+ return _gf_true;
}
-int32_t
-afr_nonblocking_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+static gf_boolean_t
+is_blocking_locks_count_sufficient(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
- afr_fd_ctx_t *fd_ctx = NULL;
-
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ int child = 0;
+ int nlockee = 0;
+ int lockee_count = 0;
+ gf_boolean_t ret = _gf_true;
- local = frame->local;
- int_lock = &local->internal_lock;
+ local = frame->local;
+ priv = this->private;
+ int_lock = &local->internal_lock;
+ lockee_count = int_lock->lockee_count;
- AFR_TRACE_INODELK_OUT (frame, this, AFR_INODELK_NB_TRANSACTION,
- AFR_LOCK_OP, NULL, op_ret,
- op_errno, (long) cookie);
-
- if (op_ret < 0) {
- if (op_errno == ENOSYS) {
- /* return ENOTSUP */
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume does not support locking. "
- "please load features/locks xlator on server");
- local->op_ret = op_ret;
- int_lock->lock_op_ret = op_ret;
- int_lock->lock_op_errno = op_errno;
- local->op_errno = op_errno;
- }
- if (local->transaction.eager_lock)
- local->transaction.eager_lock[child_index] = 0;
- } else {
- int_lock->inode_locked_nodes[child_index]
- |= LOCKED_YES;
- int_lock->inodelk_lock_count++;
-
- if (local->transaction.eager_lock &&
- local->transaction.eager_lock[child_index] && local->fd) {
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- /* piggybacked */
-
- if (op_ret == 1) {
- /* piggybacked */
- } else if (op_ret == 0) {
- /* lock acquired from server */
- LOCK (&local->fd->lock);
- {
- fd_ctx->lock_acquired[child_index]++;
- }
- UNLOCK (&local->fd->lock);
- }
- }
- }
-
- LOCK (&frame->lock);
- {
- call_count = --int_lock->lk_call_count;
- }
- UNLOCK (&frame->lock);
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "Last inode locking reply received");
- /* all locks successful. Proceed to call FOP */
- if (int_lock->inodelk_lock_count ==
- int_lock->lk_expected_count) {
- gf_log (this->name, GF_LOG_TRACE,
- "All servers locked. Calling the cbk");
- int_lock->lock_op_ret = 0;
- int_lock->lock_cbk (frame, this);
- }
- /* Not all locks were successful. Unlock and try locking
- again, this time with serially blocking locks */
- else {
- gf_log (this->name, GF_LOG_TRACE,
- "%d servers locked. Trying again with blocking calls",
- int_lock->lock_count);
-
- afr_unlock(frame, this);
- }
+ if (int_lock->lock_count == 0) {
+ afr_log_locks_failure(frame, "any subvolume", "lock",
+ int_lock->lock_op_errno);
+ return _gf_false;
+ }
+ /* For FOPS that take multiple sets of locks (mkdir, rename),
+ * there must be at least one brick on which the locks from
+ * all lock sets were successful. */
+ for (child = 0; child < priv->child_count; child++) {
+ ret = _gf_true;
+ for (nlockee = 0; nlockee < lockee_count; nlockee++) {
+ if (!(int_lock->lockee[nlockee].locked_nodes[child] & LOCKED_YES))
+ ret = _gf_false;
}
+ if (ret)
+ return ret;
+ }
+ if (!ret)
+ afr_log_locks_failure(frame, "all", "lock", int_lock->lock_op_errno);
- return 0;
+ return ret;
}
int
-afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- int32_t call_count = 0;
- int i = 0;
- int ret = 0;
- struct gf_flock flock = {0,};
- struct gf_flock full_flock = {0,};
- struct gf_flock *flock_use = NULL;
- int piggyback = 0;
- gf_boolean_t fd_lock_owner = _gf_false;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- flock.l_start = int_lock->lk_flock.l_start;
- flock.l_len = int_lock->lk_flock.l_len;
- flock.l_type = int_lock->lk_flock.l_type;
-
- full_flock.l_type = int_lock->lk_flock.l_type;
-
- initialize_inodelk_variables (frame, this);
-
- if (local->fd) {
- fd_ctx = afr_fd_ctx_get (local->fd, this);
- if (!fd_ctx) {
- gf_log (this->name, GF_LOG_INFO,
- "unable to get fd ctx for fd=%p",
- local->fd);
-
- local->op_ret = -1;
- int_lock->lock_op_ret = -1;
- local->op_errno = EINVAL;
- int_lock->lock_op_errno = EINVAL;
-
- ret = -1;
- goto out;
- }
-
- afr_mark_fd_open_on (local, fd_ctx, priv->child_count);
- call_count = internal_lock_count (frame, this);
- int_lock->lk_call_count = call_count;
- int_lock->lk_expected_count = call_count;
-
- if (!call_count) {
- gf_log (this->name, GF_LOG_INFO,
- "fd not open on any subvolumes. aborting.");
- afr_unlock (frame, this);
- goto out;
- }
-
- /* Send non-blocking inodelk calls only on up children
- and where the fd has been opened */
- for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i] || !local->fd_open_on[i])
- continue;
-
- flock_use = &flock;
- if (!priv->eager_lock) {
- if (fd_lock_owner) {
- afr_set_lk_owner (frame, this,
- frame->root);
- fd_lock_owner = _gf_false;
- }
- goto wind;
- }
-
- piggyback = 0;
- local->transaction.eager_lock[i] = 1;
-
- LOCK (&local->fd->lock);
- {
- if (fd_ctx->lock_acquired[i]) {
- fd_ctx->lock_piggyback[i]++;
- piggyback = 1;
- }
- }
- UNLOCK (&local->fd->lock);
-
- if (!fd_lock_owner) {
- afr_set_lk_owner (frame, this, local->fd);
- fd_lock_owner = _gf_true;
- }
-
- if (piggyback) {
- /* (op_ret == 1) => indicate piggybacked lock */
- afr_nonblocking_inodelk_cbk (frame, (void *) (long) i,
- this, 1, 0, NULL);
- if (!--call_count)
- break;
- continue;
- }
- flock_use = &full_flock;
- wind:
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_NB_TRANSACTION,
- AFR_LOCK_OP, flock_use, F_SETLK, i);
-
- STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->finodelk,
- this->name, local->fd,
- F_SETLK, flock_use, NULL);
-
- if (!--call_count)
- break;
- }
- } else {
- call_count = internal_lock_count (frame, this);
- int_lock->lk_call_count = call_count;
- int_lock->lk_expected_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
- continue;
- AFR_TRACE_INODELK_IN (frame, this,
- AFR_INODELK_NB_TRANSACTION,
- AFR_LOCK_OP, &flock, F_SETLK, i);
-
- STACK_WIND_COOKIE (frame, afr_nonblocking_inodelk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->inodelk,
- this->name, &local->loc,
- F_SETLK, &flock, NULL);
-
- if (!--call_count)
- break;
- }
- }
-out:
- return ret;
-}
-
-static int
-__is_lower_locked (call_frame_t *frame, xlator_t *this)
+afr_lock_blocking(call_frame_t *frame, xlator_t *this, int cookie)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER)
- count++;
- }
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
+ int child_index = 0;
+ int lockee_num = 0;
- return count;
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+ child_index = cookie % priv->child_count;
+ lockee_num = cookie / priv->child_count;
-}
+ if (local->fd) {
+ ret = fd_ctx_get(local->fd, this, &ctx);
-static int
-__is_higher_locked (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int count = 0;
- int i = 0;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->locked_nodes[i] & LOCKED_YES)
- count++;
- }
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_FD_CTX_GET_FAILED,
+ "unable to get fd ctx for fd=%p", local->fd);
- return count;
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
-}
+ afr_unlock_now(frame, this);
-static int
-afr_unlock_lower_entrylk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- const char *basename = NULL;
- loc_t *loc = NULL;
- int call_count = 0;
- int i = -1;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
-
- basename = int_lock->lk_basename;
- if (int_lock->lk_loc)
- loc = int_lock->lk_loc;
-
- call_count = __is_lower_locked (frame, this);
- int_lock->lk_call_count = call_count;
-
- if (!call_count){
- gf_log (this->name, GF_LOG_TRACE,
- "No internal locks unlocked");
- int_lock->lock_cbk (frame, this);
- goto out;
+ return 0;
}
+ }
- for (i = 0; i < priv->child_count; i++) {
- if (int_lock->lower_locked_nodes[i] & LOCKED_LOWER) {
- AFR_TRACE_ENTRYLK_IN (frame, this,
- AFR_ENTRYLK_NB_TRANSACTION,
- AFR_UNLOCK_OP, basename, i);
-
- STACK_WIND_COOKIE (frame, afr_unlock_entrylk_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->entrylk,
- this->name,
- loc, basename,
- ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL);
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ if (!is_blocking_locks_count_sufficient(frame, this)) {
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
- if (!--call_count)
- break;
+ afr_unlock_now(frame, this);
- }
+ return 0;
}
+ }
-out:
- return 0;
-
-}
+ if (int_lock->lk_expected_count == int_lock->lk_attempted_count) {
+ /* we're done locking */
+ gf_msg_debug(this->name, 0, "we're done locking");
-static int
-afr_post_unlock_higher_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- local->transaction.done (frame, this);
+ int_lock->lock_op_ret = 0;
+ int_lock->lock_cbk(frame, this);
return 0;
-}
-
-static int
-afr_post_unlock_lower_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- loc_t *higher = NULL;
- const char *higher_name = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- higher = (lower == &local->transaction.parent_loc ?
- &local->transaction.new_parent_loc :
- &local->transaction.parent_loc);
-
- higher_name = (higher == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- if (__is_higher_locked (frame, this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking higher");
- int_lock->lk_basename = higher_name;
- int_lock->lk_loc = higher;
- int_lock->lock_cbk = afr_post_unlock_higher_cbk;
-
- afr_unlock_entrylk (frame, this);
- } else
- local->transaction.done (frame, this);
+ }
+ if (!_is_lock_wind_needed(local, child_index)) {
+ afr_lock_blocking(frame, this, cookie + 1);
return 0;
-}
+ }
-static int
-afr_rename_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- loc_t *lower = NULL;
- const char *lower_name = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- lower = lower_path (&local->transaction.parent_loc,
- local->transaction.basename,
- &local->transaction.new_parent_loc,
- local->transaction.new_basename);
-
- lower_name = (lower == &local->transaction.parent_loc ?
- local->transaction.basename :
- local->transaction.new_basename);
-
- if (__is_lower_locked (frame, this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking lower");
- int_lock->lk_basename = lower_name;
- int_lock->lk_loc = lower;
- int_lock->lock_cbk = afr_post_unlock_lower_cbk;
-
- afr_unlock_lower_entrylk (frame, this);
- } else
- afr_post_unlock_lower_cbk (frame, this);
-
- return 0;
-}
-
-static int
-afr_rename_transaction (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- return (local->transaction.type ==
- AFR_ENTRY_RENAME_TRANSACTION);
+ afr_internal_lock_wind(frame, afr_lock_cbk, (void *)(long)cookie,
+ child_index, lockee_num, _gf_true, _gf_false);
+ return 0;
}
int32_t
-afr_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- if (transaction_lk_op (local)) {
- afr_set_lk_owner (frame, this, frame->root);
- if (is_afr_lock_transaction (local))
- afr_unlock_inodelk (frame, this);
- else
- if (!afr_rename_transaction (frame, this))
- afr_unlock_entrylk (frame, this);
- else
- afr_rename_unlock (frame, this);
- } else {
- if (is_afr_lock_selfheal (local))
- afr_unlock_inodelk (frame, this);
- else
- afr_unlock_entrylk (frame, this);
- }
-
- return 0;
-}
-
-int
-afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
- unsigned char *locked_nodes)
+afr_blocking_lock(call_frame_t *frame, xlator_t *this)
{
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fdctx = NULL;
- uint64_t tmp = 0;
- int ret = 0;
-
- priv = this->private;
-
- ret = afr_fd_ctx_set (this, fd);
- if (ret)
- goto out;
-
- ret = fd_ctx_get (fd, this, &tmp);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "failed to get the fd ctx");
- goto out;
- }
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- GF_ASSERT (fdctx->locked_on);
-
- memcpy (fdctx->locked_on, locked_nodes,
- priv->child_count);
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int up_count = 0;
-out:
- return ret;
-}
+ priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
-static int
-__is_fd_saved (xlator_t *this, fd_t *fd)
-{
- afr_locked_fd_t *locked_fd = NULL;
- afr_private_t *priv = NULL;
- int found = 0;
+ up_count = AFR_COUNT(local->child_up, priv->child_count);
+ int_lock->lk_call_count = int_lock->lk_expected_count =
+ (int_lock->lockee_count * up_count);
+ initialize_internal_lock_variables(frame, this);
- priv = this->private;
+ afr_lock_blocking(frame, this, 0);
- list_for_each_entry (locked_fd, &priv->saved_fds, list) {
- if (locked_fd->fd == fd) {
- found = 1;
- break;
- }
- }
-
- return found;
+ return 0;
}
-static int
-__afr_save_locked_fd (xlator_t *this, fd_t *fd)
+static int32_t
+afr_nb_internal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- afr_private_t *priv = NULL;
- afr_locked_fd_t *locked_fd = NULL;
- int ret = 0;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ int call_count = 0;
+ int child_index = 0;
+ int lockee_num = 0;
+ afr_private_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- locked_fd = GF_CALLOC (1, sizeof (*locked_fd),
- gf_afr_mt_locked_fd);
- if (!locked_fd) {
- ret = -1;
- goto out;
- }
-
- locked_fd->fd = fd;
- INIT_LIST_HEAD (&locked_fd->list);
-
- list_add_tail (&locked_fd->list, &priv->saved_fds);
-
-out:
- return ret;
-}
-
-int
-afr_save_locked_fd (xlator_t *this, fd_t *fd)
-{
- afr_private_t *priv = NULL;
- int ret = 0;
+ child_index = ((long)cookie) % priv->child_count;
+ lockee_num = ((long)cookie) / priv->child_count;
- priv = this->private;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- pthread_mutex_lock (&priv->mutex);
+ if (op_ret == 0 && local->transaction.type == AFR_DATA_TRANSACTION) {
+ LOCK(&local->inode->lock);
{
- if (__is_fd_saved (this, fd)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "fd=%p already saved", fd);
- goto unlock;
- }
-
- ret = __afr_save_locked_fd (this, fd);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "fd=%p could not be saved", fd);
- goto unlock;
- }
+ local->inode_ctx->lock_count++;
}
-unlock:
- pthread_mutex_unlock (&priv->mutex);
-
- return ret;
-}
-
-static int
-afr_lock_recovery_cleanup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_locked_fd_t *locked_fd = NULL;
-
- local = frame->local;
-
- locked_fd = local->locked_fd;
-
- STACK_DESTROY (frame->root);
- afr_local_cleanup (local, this);
-
- afr_save_locked_fd (this, locked_fd->fd);
-
- return 0;
-
-}
-
-static int
-afr_get_source_lock_recovery (xlator_t *this, fd_t *fd)
-{
- afr_fd_ctx_t *fdctx = NULL;
- afr_private_t *priv = NULL;
- uint64_t tmp = 0;
- int i = 0;
- int source_child = -1;
- int ret = 0;
+ UNLOCK(&local->inode->lock);
+ }
- priv = this->private;
-
- ret = fd_ctx_get (fd, this, &tmp);
- if (ret)
- goto out;
-
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- for (i = 0; i < priv->child_count; i++) {
- if (fdctx->locked_on[i]) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Found lock recovery source=%d", i);
- source_child = i;
- break;
- }
+ LOCK(&frame->lock);
+ {
+ if (op_ret < 0) {
+ if (op_errno == ENOSYS) {
+ /* return ENOTSUP */
+ gf_msg(this->name, GF_LOG_ERROR, ENOSYS,
+ AFR_MSG_LOCK_XLATOR_NOT_LOADED,
+ "subvolume does not support "
+ "locking. please load features/locks"
+ " xlator on server");
+ local->op_ret = op_ret;
+ int_lock->lock_op_ret = op_ret;
+
+ int_lock->lock_op_errno = op_errno;
+ local->op_errno = op_errno;
+ }
+ } else if (op_ret == 0) {
+ int_lock->lockee[lockee_num]
+ .locked_nodes[child_index] |= LOCKED_YES;
+ int_lock->lockee[lockee_num].locked_count++;
+ int_lock->lock_count++;
}
-out:
- return source_child;
-
-}
+ call_count = --int_lock->lk_call_count;
+ }
+ UNLOCK(&frame->lock);
-int32_t
-afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
- dict_t *xdata);
-int32_t
-afr_recover_lock_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
- dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int32_t source_child = 0;
- struct gf_flock flock = {0,};
-
- local = frame->local;
- priv = this->private;
-
- if (op_ret) {
- gf_log (this->name, GF_LOG_INFO,
- "lock recovery failed");
- goto cleanup;
+ if (call_count == 0) {
+ gf_msg_trace(this->name, 0, "Last locking reply received");
+ /* all locks successful. Proceed to call FOP */
+ if (int_lock->lock_count == int_lock->lk_expected_count) {
+ gf_msg_trace(this->name, 0, "All servers locked. Calling the cbk");
+ int_lock->lock_op_ret = 0;
+ int_lock->lock_cbk(frame, this);
}
+ /* Not all locks were successful. Unlock and try locking
+ again, this time with serially blocking locks */
+ else {
+ gf_msg_trace(this->name, 0,
+ "%d servers locked. Trying again "
+ "with blocking calls",
+ int_lock->lock_count);
- source_child = local->source_child;
-
- memcpy (&flock, lock, sizeof (*lock));
-
- STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk,
- (void *) (long) source_child,
- priv->children[source_child],
- priv->children[source_child]->fops->lk,
- local->fd, F_GETLK_FD, &flock, NULL);
-
- return 0;
-
-cleanup:
- afr_lock_recovery_cleanup (frame, this);
- return 0;
-}
-
-int
-afr_recover_lock (call_frame_t *frame, xlator_t *this,
- struct gf_flock *flock)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int32_t lock_recovery_child = 0;
-
- priv = this->private;
- local = frame->local;
-
- lock_recovery_child = local->lock_recovery_child;
-
- frame->root->lk_owner = flock->l_owner;
-
- STACK_WIND_COOKIE (frame, afr_recover_lock_cbk,
- (void *) (long) lock_recovery_child,
- priv->children[lock_recovery_child],
- priv->children[lock_recovery_child]->fops->lk,
- local->fd, F_SETLK, flock, NULL);
-
- return 0;
-}
-
-static int
-is_afr_lock_eol (struct gf_flock *lock)
-{
- int ret = 0;
-
- if ((lock->l_type == GF_LK_EOL))
- ret = 1;
-
- return ret;
-}
-
-int32_t
-afr_get_locks_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
- dict_t *xdata)
-{
- if (op_ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Failed to get locks on fd");
- goto cleanup;
+ afr_unlock_now(frame, this);
}
+ }
- gf_log (this->name, GF_LOG_DEBUG,
- "Got a lock on fd");
-
- if (is_afr_lock_eol (lock)) {
- gf_log (this->name, GF_LOG_INFO,
- "Reached EOL on locks on fd");
- goto cleanup;
- }
-
- afr_recover_lock (frame, this, lock);
-
- return 0;
-
-cleanup:
- afr_lock_recovery_cleanup (frame, this);
-
- return 0;
+ return 0;
}
-static int
-afr_lock_recovery (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- fd_t *fd = NULL;
- int ret = 0;
- int32_t source_child = 0;
- struct gf_flock flock = {0,};
-
- priv = this->private;
- local = frame->local;
-
- fd = local->fd;
-
- source_child = afr_get_source_lock_recovery (this, fd);
- if (source_child < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not recover locks due to lock "
- "split brain");
- ret = -1;
- goto out;
+int
+afr_lock_nonblocking(call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int child = 0;
+ int lockee_num = 0;
+ int32_t call_count = 0;
+ int i = 0;
+ int ret = 0;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ priv = this->private;
+
+ initialize_internal_lock_variables(frame, this);
+
+ if (local->fd) {
+ fd_ctx = afr_fd_ctx_get(local->fd, this);
+ if (!fd_ctx) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_FD_CTX_GET_FAILED,
+ "unable to get fd ctx for fd=%p", local->fd);
+
+ local->op_ret = -1;
+ int_lock->lock_op_ret = -1;
+ local->op_errno = EINVAL;
+ int_lock->lock_op_errno = EINVAL;
+
+ afr_unlock_now(frame, this);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ call_count = int_lock->lockee_count * internal_lock_count(frame, this);
+ int_lock->lk_call_count = call_count;
+ int_lock->lk_expected_count = call_count;
+
+ if (!call_count) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_INFO_COMMON,
+ "fd not open on any subvolumes. aborting.");
+ afr_unlock_now(frame, this);
+ goto out;
+ }
+
+ /* Send non-blocking lock calls only on up children
+ and where the fd has been opened */
+ for (i = 0; i < int_lock->lockee_count * priv->child_count; i++) {
+ child = i % priv->child_count;
+ lockee_num = i / priv->child_count;
+ if (local->child_up[child]) {
+ afr_internal_lock_wind(frame, afr_nb_internal_lock_cbk,
+ (void *)(long)i, child, lockee_num,
+ _gf_false, _gf_false);
+ if (!--call_count)
+ break;
}
-
- local->source_child = source_child;
-
- /* the flock can be zero filled as we're querying incrementally
- the locks held on the fd.
- */
- STACK_WIND_COOKIE (frame, afr_get_locks_fd_cbk,
- (void *) (long) source_child,
- priv->children[source_child],
- priv->children[source_child]->fops->lk,
- local->fd, F_GETLK_FD, &flock, NULL);
-
+ }
out:
- return ret;
-}
-
-
-static int
-afr_mark_fd_opened (xlator_t *this, fd_t *fd, int32_t child_index)
-{
- afr_fd_ctx_t *fdctx = NULL;
- uint64_t tmp = 0;
- int ret = 0;
-
- ret = fd_ctx_get (fd, this, &tmp);
- if (ret)
- goto out;
-
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- fdctx->opened_on[child_index] = AFR_FD_OPENED;
-
-out:
- return ret;
+ return ret;
}
int32_t
-afr_lock_recovery_preopen_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- dict_t *xdata)
-{
- int32_t child_index = (long )cookie;
- int ret = 0;
-
- if (op_ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Reopen during lock-recovery failed");
- goto cleanup;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Open succeeded => proceed to recover locks");
-
- ret = afr_lock_recovery (frame, this);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Lock recovery failed");
- goto cleanup;
- }
-
- ret = afr_mark_fd_opened (this, fd, child_index);
- if (ret) {
- gf_log (this->name, GF_LOG_INFO,
- "Marking fd open failed");
- goto cleanup;
- }
-
- return 0;
-
-cleanup:
- afr_lock_recovery_cleanup (frame, this);
- return 0;
-}
-
-static int
-afr_lock_recovery_preopen (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- uint64_t tmp = 0;
- afr_fd_ctx_t *fdctx = NULL;
- loc_t loc = {0,};
- int32_t child_index = 0;
- int ret = 0;
-
- priv = this->private;
- local = frame->local;
-
- GF_ASSERT (local && local->fd);
-
- ret = fd_ctx_get (local->fd, this, &tmp);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to get the context of fd",
- uuid_utoa (local->fd->inode->gfid));
- fdctx = (afr_fd_ctx_t *) (long) tmp;
- /* TODO: instead we should return from the function */
- GF_ASSERT (fdctx);
-
- child_index = local->lock_recovery_child;
-
- inode_path (local->fd->inode, NULL, (char **)&loc.path);
- loc.name = strrchr (loc.path, '/');
- loc.inode = inode_ref (local->fd->inode);
- loc.parent = inode_parent (local->fd->inode, 0, NULL);
-
-
- STACK_WIND_COOKIE (frame, afr_lock_recovery_preopen_cbk,
- (void *)(long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->open,
- &loc, fdctx->flags, local->fd, NULL);
-
- return 0;
-}
-
-static int
-is_fd_opened (fd_t *fd, int32_t child_index)
-{
- afr_fd_ctx_t *fdctx = NULL;
- uint64_t tmp = 0;
- int ret = 0;
-
- ret = fd_ctx_get (fd, THIS, &tmp);
- if (ret)
- goto out;
-
- fdctx = (afr_fd_ctx_t *) (long) tmp;
-
- if (fdctx->opened_on[child_index] == AFR_FD_OPENED)
- ret = 1;
-
-out:
- return ret;
-}
-
-int
-afr_attempt_lock_recovery (xlator_t *this, int32_t child_index)
-{
- call_frame_t *frame = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_locked_fd_t *locked_fd = NULL;
- afr_locked_fd_t *tmp = NULL;
- int ret = -1;
- struct list_head locks_list = {0,};
- int32_t op_errno = 0;
-
-
- priv = this->private;
-
- if (list_empty (&priv->saved_fds))
- goto out;
-
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- ret = -1;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0) {
- ret = -1;
- goto out;
- }
-
- frame->local = local;
-
- INIT_LIST_HEAD (&locks_list);
-
- pthread_mutex_lock (&priv->mutex);
- {
- list_splice_init (&priv->saved_fds, &locks_list);
- }
- pthread_mutex_unlock (&priv->mutex);
-
- list_for_each_entry_safe (locked_fd, tmp,
- &locks_list, list) {
-
- list_del_init (&locked_fd->list);
-
- local->fd = fd_ref (locked_fd->fd);
- local->lock_recovery_child = child_index;
- local->locked_fd = locked_fd;
-
- if (!is_fd_opened (locked_fd->fd, child_index)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "attempting open before lock "
- "recovery");
- afr_lock_recovery_preopen (frame, this);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "attempting lock recovery "
- "without a preopen");
- afr_lock_recovery (frame, this);
- }
- }
+afr_unlock(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_lock_t *lock = NULL;
+
+ local = frame->local;
+
+ if (!local->transaction.eager_lock_on)
+ goto out;
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ LOCK(&local->inode->lock);
+ {
+ list_del_init(&local->transaction.owner_list);
+ if (list_empty(&lock->owners) && list_empty(&lock->post_op)) {
+ local->transaction.do_eager_unlock = _gf_true;
+ /*TODO: Need to get metadata use on_disk and inherit/uninherit
+ *GF_ASSERT (!local->inode_ctx->on_disk[local->transaction.type]);
+ *GF_ASSERT (!local->inode_ctx->inherited[local->transaction.type]);
+ */
+ GF_ASSERT(lock->release);
+ }
+ }
+ UNLOCK(&local->inode->lock);
+ if (!local->transaction.do_eager_unlock) {
+ local->internal_lock.lock_cbk(frame, this);
+ return 0;
+ }
out:
- if ((ret < 0) && frame)
- AFR_STACK_DESTROY (frame);
- return ret;
-}
-
-void
-afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src,
- unsigned int child_count)
-{
- afr_local_t *dst_local = NULL;
- afr_local_t *src_local = NULL;
- afr_internal_lock_t *dst_lock = NULL;
- afr_internal_lock_t *src_lock = NULL;
-
- dst_local = dst->local;
- dst_lock = &dst_local->internal_lock;
- src_local = src->local;
- src_lock = &src_local->internal_lock;
- if (src_lock->inode_locked_nodes) {
- memcpy (dst_lock->inode_locked_nodes,
- src_lock->inode_locked_nodes,
- sizeof (*dst_lock->inode_locked_nodes) * child_count);
- memset (src_lock->inode_locked_nodes, 0,
- sizeof (*src_lock->inode_locked_nodes) * child_count);
- }
-
- dst_lock->transaction_lk_type = src_lock->transaction_lk_type;
- dst_lock->selfheal_lk_type = src_lock->selfheal_lk_type;
- dst_lock->inodelk_lock_count = src_lock->inodelk_lock_count;
- src_lock->inodelk_lock_count = 0;
+ afr_unlock_now(frame, this);
+ return 0;
}
diff --git a/xlators/cluster/afr/src/afr-mem-types.h b/xlators/cluster/afr/src/afr-mem-types.h
index 7e684801fae..816065fb57a 100644
--- a/xlators/cluster/afr/src/afr-mem-types.h
+++ b/xlators/cluster/afr/src/afr-mem-types.h
@@ -8,40 +8,31 @@
cases as published by the Free Software Foundation.
*/
-
#ifndef __AFR_MEM_TYPES_H__
#define __AFR_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_afr_mem_types_ {
- gf_afr_mt_iovec = gf_common_mt_end + 1,
- gf_afr_mt_afr_fd_ctx_t,
- gf_afr_mt_afr_private_t,
- gf_afr_mt_int32_t,
- gf_afr_mt_char,
- gf_afr_mt_xattr_key,
- gf_afr_mt_dict_t,
- gf_afr_mt_xlator_t,
- gf_afr_mt_iatt,
- gf_afr_mt_int,
- gf_afr_mt_afr_node_character,
- gf_afr_mt_sh_diff_loop_state,
- gf_afr_mt_uint8_t,
- gf_afr_mt_loc_t,
- gf_afr_mt_entry_name,
- gf_afr_mt_pump_priv,
- gf_afr_mt_locked_fd,
- gf_afr_mt_inode_ctx_t,
- gf_afr_fd_paused_call_t,
- gf_afr_mt_crawl_data_t,
- gf_afr_mt_brick_pos_t,
- gf_afr_mt_shd_bool_t,
- gf_afr_mt_shd_timer_t,
- gf_afr_mt_shd_event_t,
- gf_afr_mt_time_t,
- gf_afr_mt_pos_data_t,
- gf_afr_mt_end
+ gf_afr_mt_afr_fd_ctx_t = gf_common_mt_end + 1,
+ gf_afr_mt_afr_private_t,
+ gf_afr_mt_int32_t,
+ gf_afr_mt_char,
+ gf_afr_mt_xattr_key,
+ gf_afr_mt_dict_t,
+ gf_afr_mt_xlator_t,
+ gf_afr_mt_afr_node_character,
+ gf_afr_mt_inode_ctx_t,
+ gf_afr_mt_shd_event_t,
+ gf_afr_mt_reply_t,
+ gf_afr_mt_subvol_healer_t,
+ gf_afr_mt_spbc_timeout_t,
+ gf_afr_mt_spb_status_t,
+ gf_afr_mt_empty_brick_t,
+ gf_afr_mt_child_latency_t,
+ gf_afr_mt_atomic_t,
+ gf_afr_mt_lk_heal_info_t,
+ gf_afr_mt_gf_lock,
+ gf_afr_mt_end
};
#endif
-
diff --git a/xlators/cluster/afr/src/afr-messages.h b/xlators/cluster/afr/src/afr-messages.h
new file mode 100644
index 00000000000..e73fd997765
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-messages.h
@@ -0,0 +1,167 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _AFR_MESSAGES_H_
+#define _AFR_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(
+ AFR, AFR_MSG_QUORUM_FAIL, AFR_MSG_QUORUM_MET, AFR_MSG_QUORUM_OVERRIDE,
+ AFR_MSG_INVALID_CHILD_UP, AFR_MSG_SUBVOL_UP, AFR_MSG_SUBVOLS_DOWN,
+ AFR_MSG_ENTRY_UNLOCK_FAIL, AFR_MSG_SPLIT_BRAIN, AFR_MSG_OPEN_FAIL,
+ AFR_MSG_UNLOCK_FAIL, AFR_MSG_REPLACE_BRICK_STATUS, AFR_MSG_GFID_NULL,
+ AFR_MSG_FD_CREATE_FAILED, AFR_MSG_DICT_SET_FAILED,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR, AFR_MSG_MIGRATION_IN_PROGRESS,
+ AFR_MSG_CHILD_MISCONFIGURED, AFR_MSG_VOL_MISCONFIGURED,
+ AFR_MSG_INTERNAL_LKS_FAILED, AFR_MSG_INVALID_FD, AFR_MSG_LOCK_INFO,
+ AFR_MSG_LOCK_XLATOR_NOT_LOADED, AFR_MSG_FD_CTX_GET_FAILED,
+ AFR_MSG_INVALID_SUBVOL, AFR_MSG_PUMP_XLATOR_ERROR, AFR_MSG_SELF_HEAL_INFO,
+ AFR_MSG_READ_SUBVOL_ERROR, AFR_MSG_DICT_GET_FAILED, AFR_MSG_INFO_COMMON,
+ AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR, AFR_MSG_LOCAL_CHILD, AFR_MSG_INVALID_DATA,
+ AFR_MSG_INVALID_ARG, AFR_MSG_INDEX_DIR_GET_FAILED, AFR_MSG_FSYNC_FAILED,
+ AFR_MSG_FAVORITE_CHILD, AFR_MSG_SELF_HEAL_FAILED,
+ AFR_MSG_SPLIT_BRAIN_STATUS, AFR_MSG_ADD_BRICK_STATUS, AFR_MSG_NO_CHANGELOG,
+ AFR_MSG_TIMER_CREATE_FAIL, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ AFR_MSG_INODE_CTX_GET_FAILED, AFR_MSG_THIN_ARB,
+ AFR_MSG_THIN_ARB_XATTROP_FAILED, AFR_MSG_THIN_ARB_LOC_POP_FAILED,
+ AFR_MSG_GET_PEND_VAL, AFR_MSG_THIN_ARB_SKIP_SHD, AFR_MSG_UNKNOWN_SET,
+ AFR_MSG_NO_XL_ID, AFR_MSG_SELF_HEAL_INFO_START,
+ AFR_MSG_SELF_HEAL_INFO_FINISH, AFR_MSG_INCRE_COUNT,
+ AFR_MSG_ADD_TO_OUTPUT_FAILED, AFR_MSG_SET_TIME_FAILED,
+ AFR_MSG_GFID_MISMATCH_DETECTED, AFR_MSG_GFID_HEAL_MSG,
+ AFR_MSG_THIN_ARB_LOOKUP_FAILED, AFR_MSG_DICT_CREATE_FAILED,
+ AFR_MSG_NO_MAJORITY_TO_RESOLVE, AFR_MSG_TYPE_MISMATCH,
+ AFR_MSG_SIZE_POLICY_NOT_APPLICABLE, AFR_MSG_NO_CHILD_SELECTED,
+ AFR_MSG_INVALID_CHILD, AFR_MSG_RESOLVE_CONFLICTING_DATA,
+ SERROR_GETTING_SRC_BRICK, SNO_DIFF_IN_MTIME, SNO_BIGGER_FILE,
+ SALL_BRICKS_UP_TO_RESOLVE, AFR_MSG_UNLOCK_FAILED, AFR_MSG_POST_OP_FAILED,
+ AFR_MSG_TA_FRAME_CREATE_FAILED, AFR_MSG_SET_KEY_XATTROP_FAILED,
+ AFR_MSG_BLOCKING_ENTRYLKS_FAILED, AFR_MSG_FOP_FAILED,
+ AFR_MSG_CLEAN_UP_FAILED, AFR_MSG_UNABLE_TO_FETCH, AFR_MSG_XATTR_SET_FAILED,
+ AFR_MSG_SPLIT_BRAIN_REPLICA, AFR_MSG_INODE_CTX_FAILED,
+ AFR_MSG_LOOKUP_FAILED, AFR_MSG_ALL_SUBVOLS_DOWN,
+ AFR_MSG_RELEASE_LOCK_FAILED, AFR_MSG_CLEAR_TIME_SPLIT_BRAIN,
+ AFR_MSG_READ_FAILED, AFR_MSG_LAUNCH_FAILED, AFR_MSG_READ_SUBVOL_NOT_UP,
+ AFR_MSG_LK_HEAL_DOM, AFR_MSG_NEW_BRICK, AFR_MSG_SPLIT_BRAIN_SET_FAILED,
+ AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED, AFR_MSG_HEALER_SPAWN_FAILED,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, AFR_MSG_NULL_DEREF, AFR_MSG_SET_PEND_XATTR,
+ AFR_MSG_INTERNAL_ATTR);
+
+#define AFR_MSG_DICT_GET_FAILED_STR "Dict get failed"
+#define AFR_MSG_DICT_SET_FAILED_STR "Dict set failed"
+#define AFR_MSG_HEALER_SPAWN_FAILED_STR "Healer spawn failed"
+#define AFR_MSG_ADD_CRAWL_EVENT_FAILED_STR "Adding crawl event failed"
+#define AFR_MSG_INVALID_ARG_STR "Invalid argument"
+#define AFR_MSG_INDEX_DIR_GET_FAILED_STR "unable to get index-dir on "
+#define AFR_MSG_THIN_ARB_LOOKUP_FAILED_STR "Failed lookup on file"
+#define AFR_MSG_DICT_CREATE_FAILED_STR "Failed to create dict."
+#define AFR_MSG_THIN_ARB_XATTROP_FAILED_STR "Xattrop failed."
+#define AFR_MSG_THIN_ARB_LOC_POP_FAILED_STR \
+ "Failed to populate loc for thin-arbiter"
+#define AFR_MSG_GET_PEND_VAL_STR "Error getting value of pending"
+#define AFR_MSG_THIN_ARB_SKIP_SHD_STR "I am not the god shd. skipping."
+#define AFR_MSG_UNKNOWN_SET_STR "Unknown set"
+#define AFR_MSG_NO_XL_ID_STR "xl does not have id"
+#define AFR_MSG_SELF_HEAL_INFO_START_STR "starting full sweep on"
+#define AFR_MSG_SELF_HEAL_INFO_FINISH_STR "finished full sweep on"
+#define AFR_MSG_INCRE_COUNT_STR "Could not increment the counter."
+#define AFR_MSG_ADD_TO_OUTPUT_FAILED_STR "Could not add to output"
+#define AFR_MSG_SET_TIME_FAILED_STR "Could not set time"
+#define AFR_MSG_GFID_HEAL_MSG_STR "Error setting gfid-heal-msg dict"
+#define AFR_MSG_NO_MAJORITY_TO_RESOLVE_STR \
+ "No majority to resolve gfid split brain"
+#define AFR_MSG_GFID_MISMATCH_DETECTED_STR "Gfid mismatch dectected"
+#define AFR_MSG_SELF_HEAL_INFO_STR "performing selfheal"
+#define AFR_MSG_TYPE_MISMATCH_STR "TYPE mismatch"
+#define AFR_MSG_SIZE_POLICY_NOT_APPLICABLE_STR \
+ "Size policy is not applicable to directories."
+#define AFR_MSG_NO_CHILD_SELECTED_STR \
+ "No child selected by favorite-child policy"
+#define AFR_MSG_INVALID_CHILD_STR "Invalid child"
+#define AFR_MSG_RESOLVE_CONFLICTING_DATA_STR \
+ "selected as authentic to resolve conflicting data"
+#define SERROR_GETTING_SRC_BRICK_STR "Error getting the source brick"
+#define SNO_DIFF_IN_MTIME_STR "No difference in mtime"
+#define SNO_BIGGER_FILE_STR "No bigger file"
+#define SALL_BRICKS_UP_TO_RESOLVE_STR \
+ "All the bricks should be up to resolve the gfid split brain"
+#define AFR_MSG_UNLOCK_FAILED_STR "Failed to unlock"
+#define AFR_MSG_POST_OP_FAILED_STR "Post-op on thin-arbiter failed"
+#define AFR_MSG_TA_FRAME_CREATE_FAILED_STR "Failed to create ta_frame"
+#define AFR_MSG_SET_KEY_XATTROP_FAILED_STR "Could not set key during xattrop"
+#define AFR_MSG_BLOCKING_ENTRYLKS_FAILED_STR "Blocking entrylks failed"
+#define AFR_MSG_FSYNC_FAILED_STR "fsync failed"
+#define AFR_MSG_QUORUM_FAIL_STR "quorum is not met"
+#define AFR_MSG_FOP_FAILED_STR "Failing Fop"
+#define AFR_MSG_INVALID_SUBVOL_STR "not a subvolume"
+#define AFR_MSG_VOL_MISCONFIGURED_STR "Volume is dangling"
+#define AFR_MSG_CHILD_MISCONFIGURED_STR \
+ "replicate translator needs more than one subvolume defined"
+#define AFR_MSG_CLEAN_UP_FAILED_STR "Failed to clean up healer threads"
+#define AFR_MSG_QUORUM_OVERRIDE_STR "overriding quorum-count"
+#define AFR_MSG_UNABLE_TO_FETCH_STR \
+ "Unable to fetch afr-pending-xattr option from volfile. Falling back to " \
+ "using client translator names"
+#define AFR_MSG_NULL_DEREF_STR "possible NULL deref"
+#define AFR_MSG_XATTR_SET_FAILED_STR "Cannot set xattr cookie key"
+#define AFR_MSG_SPLIT_BRAIN_STATUS_STR "Failed to create synctask"
+#define AFR_MSG_SUBVOLS_DOWN_STR "All subvolumes are not up"
+#define AFR_MSG_SPLIT_BRAIN_CHOICE_ERROR_STR \
+ "Failed to cancel split-brain choice"
+#define AFR_MSG_SPLIT_BRAIN_REPLICA_STR \
+ "Cannot set replica. File is not in data/metadata split-brain"
+#define AFR_MSG_INODE_CTX_FAILED_STR "Failed to get inode_ctx"
+#define AFR_MSG_READ_SUBVOL_ERROR_STR "no read subvols"
+#define AFR_MSG_LOCAL_CHILD_STR "selecting local read-child"
+#define AFR_MSG_LOOKUP_FAILED_STR "Failed to lookup/create thin-arbiter id file"
+#define AFR_MSG_TIMER_CREATE_FAIL_STR \
+ "Cannot create timer for delayed initialization"
+#define AFR_MSG_SUBVOL_UP_STR "Subvolume came back up; going online"
+#define AFR_MSG_ALL_SUBVOLS_DOWN_STR \
+ "All subvolumes are down. Going offline until atleast one of them is up"
+#define AFR_MSG_RELEASE_LOCK_FAILED_STR "Failed to release lock"
+#define AFR_MSG_INVALID_CHILD_UP_STR "Received child_up from invalid subvolume"
+#define AFR_MSG_QUORUM_MET_STR "Client-quorum is met"
+#define AFR_MSG_EXPUNGING_FILE_OR_DIR_STR "expunging file or dir"
+#define AFR_MSG_SELF_HEAL_FAILED_STR "Invalid"
+#define AFR_MSG_SPLIT_BRAIN_STR "Skipping conservative mergeon the file"
+#define AFR_MSG_CLEAR_TIME_SPLIT_BRAIN_STR "clear time split brain"
+#define AFR_MSG_READ_FAILED_STR "Failing read since good brick is down"
+#define AFR_MSG_LAUNCH_FAILED_STR "Failed to launch synctask"
+#define AFR_MSG_READ_SUBVOL_NOT_UP_STR \
+ "read subvolume in this generation is not up"
+#define AFR_MSG_INTERNAL_LKS_FAILED_STR \
+ "Unable to work with lk-owner while attempting fop"
+#define AFR_MSG_LOCK_XLATOR_NOT_LOADED_STR \
+ "subvolume does not support locking. please load features/locks xlator " \
+ "on server."
+#define AFR_MSG_FD_CTX_GET_FAILED_STR "unable to get fd ctx"
+#define AFR_MSG_INFO_COMMON_STR "fd not open on any subvolumes, aborting."
+#define AFR_MSG_REPLACE_BRICK_STATUS_STR "Couldn't acquire lock on any child."
+#define AFR_MSG_NEW_BRICK_STR "New brick"
+#define AFR_MSG_SPLIT_BRAIN_SET_FAILED_STR \
+ "Failed to set split-brain choice to -1"
+#define AFR_MSG_SPLIT_BRAIN_DETERMINE_FAILED_STR \
+ "Failed to determine split-brain. Aborting split-brain-choice set"
+#define AFR_MSG_OPEN_FAIL_STR "Failed to open subvolume"
+#define AFR_MSG_SET_PEND_XATTR_STR "Set of pending xattr"
+#define AFR_MSG_INTERNAL_ATTR_STR "is an internal extended attribute"
+#endif /* !_AFR_MESSAGES_H_ */
diff --git a/xlators/cluster/afr/src/afr-open.c b/xlators/cluster/afr/src/afr-open.c
index 35090d80e4b..64856042b65 100644
--- a/xlators/cluster/afr/src/afr-open.c
+++ b/xlators/cluster/afr/src/afr-open.c
@@ -8,438 +8,346 @@
cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
#include <unistd.h>
-#include <fnmatch.h>
#include <sys/time.h>
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-#include "statedump.h"
-
-#include "fd.h"
-
-#include "afr-inode-read.h"
-#include "afr-inode-write.h"
-#include "afr-dir-read.h"
-#include "afr-dir-write.h"
-#include "afr-transaction.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-int
-afr_stale_child_up (afr_local_t *local, xlator_t *this)
-{
- int i = 0;
- afr_private_t *priv = NULL;
- int up = -1;
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/statedump.h>
- priv = this->private;
-
- if (!local->fresh_children)
- local->fresh_children = afr_children_create (priv->child_count);
- if (!local->fresh_children)
- goto out;
-
- afr_inode_get_read_ctx (this, local->fd->inode, local->fresh_children);
- if (priv->child_count == afr_get_children_count (local->fresh_children,
- priv->child_count))
- goto out;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!local->child_up[i])
- continue;
- if (afr_is_child_present (local->fresh_children,
- priv->child_count, i))
- continue;
- up = i;
- break;
- }
-out:
- return up;
-}
+#include "afr-transaction.h"
-void
-afr_perform_data_self_heal (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_is_fd_fixable(fd_t *fd)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- inode_t *inode = NULL;
- int st_child = -1;
- char reason[64] = {0};
-
- local = frame->local;
- sh = &local->self_heal;
- inode = local->fd->inode;
-
- if (!IA_ISREG (inode->ia_type))
- goto out;
-
- st_child = afr_stale_child_up (local, this);
- if (st_child < 0)
- goto out;
-
- sh->do_data_self_heal = _gf_true;
- sh->do_metadata_self_heal = _gf_true;
- sh->do_gfid_self_heal = _gf_true;
- sh->do_missing_entry_self_heal = _gf_true;
-
- snprintf (reason, sizeof (reason), "stale subvolume %d detected",
- st_child);
- afr_launch_self_heal (frame, this, inode, _gf_true, inode->ia_type,
- reason, NULL, NULL);
-out:
- return;
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous(fd))
+ return _gf_false;
+ else if (gf_uuid_is_null(fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
}
int
-afr_open_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+afr_open_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- afr_local_t * local = frame->local;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- if (afr_open_only_data_self_heal (priv->data_self_heal))
- afr_perform_data_self_heal (frame, this);
- AFR_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd, xdata);
- return 0;
-}
+ afr_local_t *local = frame->local;
+ AFR_STACK_UNWIND(open, frame, local->op_ret, local->op_errno,
+ local->cont.open.fd, xdata);
+ return 0;
+}
int
-afr_open_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd, dict_t *xdata)
+afr_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- afr_local_t * local = NULL;
- int ret = 0;
- int call_count = -1;
- int child_index = (long) cookie;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->success_count++;
-
- ret = afr_child_fd_ctx_set (this, fd, child_index,
- local->cont.open.flags);
- if (ret) {
- local->op_ret = -1;
- local->op_errno = -ret;
- goto unlock;
- }
- }
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = (long)cookie;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ local = frame->local;
+ fd_ctx = local->fd_ctx;
+
+ local->replies[child_index].valid = 1;
+ local->replies[child_index].op_ret = op_ret;
+ local->replies[child_index].op_errno = op_errno;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
+ } else {
+ local->op_ret = op_ret;
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ if (!local->xdata_rsp && xdata)
+ local->xdata_rsp = dict_ref(xdata);
}
-unlock:
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if ((local->cont.open.flags & O_TRUNC)
- && (local->op_ret >= 0)) {
- STACK_WIND (frame, afr_open_ftruncate_cbk,
- this, this->fops->ftruncate,
- fd, 0, NULL);
- } else {
- if (afr_open_only_data_self_heal (priv->data_self_heal))
- afr_perform_data_self_heal (frame, this);
- AFR_STACK_UNWIND (open, frame, local->op_ret,
- local->op_errno, local->fd, xdata);
- }
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
+
+ if (call_count == 0) {
+ afr_handle_replies_quorum(frame, this);
+ if (local->op_ret == -1) {
+ AFR_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, NULL,
+ NULL);
+ } else if (fd_ctx->flags & O_TRUNC) {
+ STACK_WIND(frame, afr_open_ftruncate_cbk, this,
+ this->fops->ftruncate, fd, 0, NULL);
+ } else {
+ AFR_STACK_UNWIND(open, frame, local->op_ret, local->op_errno,
+ local->cont.open.fd, local->xdata_rsp);
}
+ }
- return 0;
+ return 0;
}
int
-afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
+afr_open_continue(call_frame_t *frame, xlator_t *this, int err)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int i = 0;
- int ret = -1;
- int32_t call_count = 0;
- int32_t op_errno = 0;
- int32_t wind_flags = flags & (~O_TRUNC);
- //We can't let truncation to happen outside transaction.
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
- VALIDATE_OR_GOTO (loc, out);
-
- priv = this->private;
-
- if (flags & (O_CREAT|O_TRUNC)) {
- QUORUM_CHECK(open,out);
- }
-
- if (afr_is_split_brain (this, loc->inode)) {
- /* self-heal failed */
- gf_log (this->name, GF_LOG_WARNING,
- "failed to open as split brain seen, returning EIO");
- op_errno = EIO;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = 0;
+ int i = 0;
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
+ local = frame->local;
+ priv = this->private;
- call_count = local->call_count;
- loc_copy (&local->loc, loc);
-
- local->cont.open.flags = flags;
-
- local->fd = fd_ref (fd);
+ if (err) {
+ AFR_STACK_UNWIND(open, frame, -1, err, NULL, NULL);
+ } else {
+ local->call_count = AFR_COUNT(local->child_up, priv->child_count);
+ call_count = local->call_count;
for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_open_cbk, (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- loc, wind_flags, fd, xdata);
-
- if (!--call_count)
- break;
- }
+ if (local->child_up[i]) {
+ STACK_WIND_COOKIE(frame, afr_open_cbk, (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->open, &local->loc,
+ (local->cont.open.flags & ~O_TRUNC),
+ local->cont.open.fd, local->xdata_req);
+ if (!--call_count)
+ break;
+ }
}
+ }
+ return 0;
+}
- ret = 0;
+int
+afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int spb_subvol = 0;
+ int event_generation = 0;
+ int ret = 0;
+ int32_t op_errno = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+
+ // We can't let truncation to happen outside transaction.
+
+ priv = this->private;
+
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
+
+ local->op = GF_FOP_OPEN;
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ op_errno = afr_quorum_errno(priv);
+ goto out;
+ }
+
+ if (!afr_is_consistent_io_possible(local, priv, &op_errno))
+ goto out;
+
+ local->inode = inode_ref(loc->inode);
+ loc_copy(&local->loc, loc);
+ local->fd_ctx = fd_ctx;
+ fd_ctx->flags = flags;
+ if (xdata)
+ local->xdata_req = dict_ref(xdata);
+
+ local->cont.open.flags = flags;
+ local->cont.open.fd = fd_ref(fd);
+
+ ret = afr_inode_get_readable(frame, local->inode, this, NULL,
+ &event_generation, AFR_DATA_TRANSACTION);
+ if ((ret < 0) &&
+ (afr_split_brain_read_subvol_get(local->inode, this, NULL,
+ &spb_subvol) == 0) &&
+ spb_subvol < 0) {
+ afr_inode_refresh(frame, this, local->inode, local->inode->gfid,
+ afr_open_continue);
+ } else {
+ afr_open_continue(frame, this, 0);
+ }
+
+ return 0;
out:
- if (ret < 0)
- AFR_STACK_UNWIND (open, frame, -1, op_errno, fd, xdata);
+ AFR_STACK_UNWIND(open, frame, -1, op_errno, fd, NULL);
- return 0;
+ return 0;
}
-//NOTE: this function should be called with holding the lock on
-//fd to which fd_ctx belongs
-void
-afr_get_resumable_calls (xlator_t *this, afr_fd_ctx_t *fd_ctx,
- struct list_head *list)
+int
+afr_openfd_fix_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
- afr_local_t *call_local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- gf_boolean_t call = _gf_false;
-
- priv = this->private;
- list_for_each_entry_safe (paused_call, tmp, &fd_ctx->paused_calls,
- call_list) {
- call = _gf_true;
- call_local = paused_call->frame->local;
- for (i = 0; i < priv->child_count; i++) {
- if (call_local->child_up[i] &&
- (fd_ctx->opened_on[i] == AFR_FD_OPENING))
- call = _gf_false;
- }
-
- if (call) {
- list_del_init (&paused_call->call_list);
- list_add (&paused_call->call_list, list);
- }
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ int call_count = 0;
+ int child_index = (long)cookie;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (op_ret >= 0) {
+ gf_msg_debug(this->name, 0,
+ "fd for %s opened "
+ "successfully on subvolume %s",
+ local->loc.path, priv->children[child_index]->name);
+ } else {
+ gf_smsg(this->name, fop_log_level(GF_FOP_OPEN, op_errno), op_errno,
+ AFR_MSG_OPEN_FAIL, "path=%s", local->loc.path, "subvolume=%s",
+ priv->children[child_index]->name, NULL);
+ }
+
+ fd_ctx = local->fd_ctx;
+
+ LOCK(&local->fd->lock);
+ {
+ if (op_ret >= 0) {
+ fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
+ } else {
+ fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
}
+ }
+ UNLOCK(&local->fd->lock);
+
+ call_count = afr_frame_return(frame);
+ if (call_count == 0)
+ AFR_STACK_DESTROY(frame);
+
+ return 0;
}
-void
-afr_resume_calls (xlator_t *this, struct list_head *list)
+static int
+afr_fd_ctx_need_open(fd_t *fd, xlator_t *this, unsigned char *need_open)
{
- afr_fd_paused_call_t *paused_call = NULL;
- afr_fd_paused_call_t *tmp = NULL;
- afr_local_t *call_local = NULL;
-
- list_for_each_entry_safe (paused_call, tmp, list, call_list) {
- list_del_init (&paused_call->call_list);
- call_local = paused_call->frame->local;
- call_local->fop_call_continue (paused_call->frame, this);
- GF_FREE (paused_call);
+ afr_fd_ctx_t *fd_ctx = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int count = 0;
+
+ priv = this->private;
+
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx)
+ return 0;
+
+ LOCK(&fd->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ if (fd_ctx->opened_on[i] == AFR_FD_NOT_OPENED &&
+ priv->child_up[i]) {
+ fd_ctx->opened_on[i] = AFR_FD_OPENING;
+ need_open[i] = 1;
+ count++;
+ } else {
+ need_open[i] = 0;
+ }
}
+ }
+ UNLOCK(&fd->lock);
+
+ return count;
}
-int
-afr_openfd_fix_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+void
+afr_fix_open(fd_t *fd, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
- struct list_head paused_calls = {0};
- gf_boolean_t fop_paused = _gf_false;
- fd_t *local_fd = NULL;
-
- priv = this->private;
- local = frame->local;
- fop_paused = local->fop_paused;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = -1;
+ int32_t op_errno = 0;
+ afr_fd_ctx_t *fd_ctx = NULL;
+ unsigned char *need_open = NULL;
+ int call_count = 0;
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_INFO, "fd for %s opened "
- "successfully on subvolume %s", local->loc.path,
- priv->children[child_index]->name);
- }
+ priv = this->private;
- local_fd = fd_ref (local->fd);
- call_count = afr_frame_return (frame);
- //Note: Do not access any thing using the frame outside call_count 0
+ if (!afr_is_fd_fixable(fd))
+ goto out;
- //Note: No frame locking needed for this block of code
- fd_ctx = afr_fd_ctx_get (local_fd, this);
- if (!fd_ctx) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get fd context, %p", local_fd);
- goto out;
- }
+ fd_ctx = afr_fd_ctx_get(fd, this);
+ if (!fd_ctx)
+ goto out;
- LOCK (&local_fd->lock);
- {
- if (op_ret >= 0) {
- fd_ctx->opened_on[child_index] = AFR_FD_OPENED;
- } else {
- fd_ctx->opened_on[child_index] = AFR_FD_NOT_OPENED;
- }
- if (call_count == 0) {
- INIT_LIST_HEAD (&paused_calls);
- afr_get_resumable_calls (this, fd_ctx, &paused_calls);
- }
- }
- UNLOCK (&local_fd->lock);
-out:
- if (call_count == 0) {
- afr_resume_calls (this, &paused_calls);
- //If the fop is paused then resume_calls will continue the fop
- if (fop_paused)
- goto done;
-
- if (local->fop_call_continue)
- local->fop_call_continue (frame, this);
- else
- AFR_STACK_DESTROY (frame);
- }
+ need_open = alloca0(priv->child_count);
-done:
- fd_unref (local_fd);
- return 0;
-}
+ call_count = afr_fd_ctx_need_open(fd, this, need_open);
+ if (!call_count)
+ goto out;
-int
-afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx,
- int need_open_count, int *need_open)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
- call_frame_t *open_frame = NULL;
- afr_local_t *open_local = NULL;
- int ret = -1;
- ia_type_t ia_type = IA_INVAL;
- int32_t op_errno = 0;
-
- GF_ASSERT (fd_ctx);
- GF_ASSERT (need_open_count > 0);
- GF_ASSERT (need_open);
-
- local = frame->local;
- priv = this->private;
- if (!local->fop_call_continue) {
- open_frame = copy_frame (frame);
- if (!open_frame) {
- ret = -ENOMEM;
- goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (open_frame->local, out);
- open_local = open_frame->local;
- ret = afr_local_init (open_local, priv, &op_errno);
- if (ret < 0)
- goto out;
- loc_copy (&open_local->loc, &local->loc);
- open_local->fd = fd_ref (local->fd);
- } else {
- ret = 0;
- open_frame = frame;
- open_local = local;
- }
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ goto out;
- open_local->call_count = need_open_count;
+ local = AFR_FRAME_INIT(frame, op_errno);
+ if (!local)
+ goto out;
- gf_log (this->name, GF_LOG_DEBUG, "need open count: %d",
- need_open_count);
+ local->loc.inode = inode_ref(fd->inode);
+ ret = loc_path(&local->loc, NULL);
+ if (ret < 0)
+ goto out;
- ia_type = open_local->fd->inode->ia_type;
- GF_ASSERT (ia_type != IA_INVAL);
- for (i = 0; i < priv->child_count; i++) {
- if (!need_open[i])
- continue;
- if (IA_IFDIR == ia_type) {
- gf_log (this->name, GF_LOG_DEBUG,
- "opening fd for dir %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (open_frame, afr_openfd_fix_open_cbk,
- (void*) (long) i,
- priv->children[i],
- priv->children[i]->fops->opendir,
- &open_local->loc, open_local->fd,
- NULL);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "opening fd for file %s on subvolume %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (open_frame, afr_openfd_fix_open_cbk,
- (void *)(long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- &open_local->loc, fd_ctx->flags,
- open_local->fd, NULL);
- }
+ local->fd = fd_ref(fd);
+ local->fd_ctx = fd_ctx;
+
+ local->call_count = call_count;
+
+ gf_msg_debug(this->name, 0, "need open count: %d", call_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!need_open[i])
+ continue;
+ if (IA_IFDIR == fd->inode->ia_type) {
+ gf_msg_debug(this->name, 0, "opening fd for dir %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->opendir, &local->loc,
+ local->fd, NULL);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "opening fd for file %s on subvolume %s",
+ local->loc.path, priv->children[i]->name);
+
+ STACK_WIND_COOKIE(frame, afr_openfd_fix_open_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->open,
+ &local->loc, fd_ctx->flags & (~O_TRUNC),
+ local->fd, NULL);
}
- op_errno = 0;
- ret = 0;
+
+ if (!--call_count)
+ break;
+ }
+
+ return;
out:
- if (op_errno)
- ret = -op_errno;
- if (ret && open_frame)
- AFR_STACK_DESTROY (open_frame);
- return ret;
+ if (frame)
+ AFR_STACK_DESTROY(frame);
}
diff --git a/xlators/cluster/afr/src/afr-read-txn.c b/xlators/cluster/afr/src/afr-read-txn.c
new file mode 100644
index 00000000000..6fc2c75145c
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-read-txn.c
@@ -0,0 +1,494 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "afr.h"
+#include "afr-transaction.h"
+#include "afr-messages.h"
+
+void
+afr_pending_read_increment(afr_private_t *priv, int child_index)
+{
+ if (child_index < 0 || child_index > priv->child_count)
+ return;
+
+ GF_ATOMIC_INC(priv->pending_reads[child_index]);
+}
+
+void
+afr_pending_read_decrement(afr_private_t *priv, int child_index)
+{
+ if (child_index < 0 || child_index > priv->child_count)
+ return;
+
+ GF_ATOMIC_DEC(priv->pending_reads[child_index]);
+}
+
+void
+afr_read_txn_wind(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ afr_pending_read_decrement(priv, local->read_subvol);
+ local->read_subvol = subvol;
+ afr_pending_read_increment(priv, subvol);
+ local->readfn(frame, this, subvol);
+}
+
+int
+afr_read_txn_next_subvol(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int subvol = -1;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->readable[i]) {
+ /* don't even bother trying here.
+ just mark as attempted and move on. */
+ local->read_attempted[i] = 1;
+ continue;
+ }
+
+ if (!local->read_attempted[i]) {
+ subvol = i;
+ break;
+ }
+ }
+
+ /* If no more subvols were available for reading, we leave
+ @subvol as -1, which is an indication we have run out of
+ readable subvols. */
+ if (subvol != -1)
+ local->read_attempted[subvol] = 1;
+ afr_read_txn_wind(frame, this, subvol);
+
+ return 0;
+}
+
+static int
+afr_ta_read_txn_done(int ret, call_frame_t *ta_frame, void *opaque)
+{
+ STACK_DESTROY(ta_frame->root);
+ return 0;
+}
+
+static int
+afr_ta_read_txn(void *opaque)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ int read_subvol = -1;
+ int query_child = AFR_CHILD_UNKNOWN;
+ int possible_bad_child = AFR_CHILD_UNKNOWN;
+ int ret = 0;
+ int op_errno = ENOMEM;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+ dict_t *xdata_req = NULL;
+ dict_t *xdata_rsp = NULL;
+ int **pending = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ frame = (call_frame_t *)opaque;
+ this = frame->this;
+ local = frame->local;
+ priv = this->private;
+ query_child = local->read_txn_query_child;
+
+ if (query_child == AFR_CHILD_ZERO) {
+ possible_bad_child = AFR_CHILD_ONE;
+ } else if (query_child == AFR_CHILD_ONE) {
+ possible_bad_child = AFR_CHILD_ZERO;
+ } else {
+ /*read_txn_query_child is AFR_CHILD_UNKNOWN*/
+ goto out;
+ }
+
+ /* Ask the query_child to see if it blames the possibly bad one. */
+ xdata_req = dict_new();
+ if (!xdata_req)
+ goto out;
+
+ pending = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!pending)
+ goto out;
+
+ ret = afr_set_pending_dict(priv, xdata_req, pending);
+ if (ret < 0)
+ goto out;
+
+ if (local->fd) {
+ ret = syncop_fxattrop(priv->children[query_child], local->fd,
+ GF_XATTROP_ADD_ARRAY, xdata_req, NULL, &xdata_rsp,
+ NULL);
+ } else {
+ ret = syncop_xattrop(priv->children[query_child], &local->loc,
+ GF_XATTROP_ADD_ARRAY, xdata_req, NULL, &xdata_rsp,
+ NULL);
+ }
+ if (ret || !xdata_rsp) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed xattrop for gfid %s on %s",
+ uuid_utoa(local->inode->gfid),
+ priv->children[query_child]->name);
+ op_errno = -ret;
+ goto out;
+ }
+
+ if (afr_ta_dict_contains_pending_xattr(xdata_rsp, priv,
+ possible_bad_child)) {
+ read_subvol = query_child;
+ goto out;
+ }
+ dict_unref(xdata_rsp);
+ xdata_rsp = NULL;
+
+ /* It doesn't. So query thin-arbiter to see if it blames any data brick. */
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.", loc.name);
+ goto out;
+ }
+ flock.l_type = F_WRLCK; /*start and length are already zero. */
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_MODIFY, &loc, F_SETLKW, &flock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "gfid:%s: Failed to get AFR_TA_DOM_MODIFY lock on %s.",
+ uuid_utoa(local->inode->gfid),
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX]);
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
+ GF_XATTROP_ADD_ARRAY, xdata_req, NULL, &xdata_rsp,
+ NULL);
+ if (ret || !xdata_rsp) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "gfid:%s: Failed xattrop on %s.", uuid_utoa(local->inode->gfid),
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX]);
+ op_errno = -ret;
+ goto unlock;
+ }
+
+ if (!afr_ta_dict_contains_pending_xattr(xdata_rsp, priv, query_child)) {
+ read_subvol = query_child;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, EIO, AFR_MSG_THIN_ARB,
+ "Failing read for gfid %s since good brick %s is down",
+ uuid_utoa(local->inode->gfid),
+ priv->children[possible_bad_child]->name);
+ op_errno = EIO;
+ }
+
+unlock:
+ flock.l_type = F_UNLCK;
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_MODIFY, &loc, F_SETLK, &flock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "gfid:%s: Failed to unlock AFR_TA_DOM_MODIFY lock on "
+ "%s.",
+ uuid_utoa(local->inode->gfid),
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX]);
+ }
+out:
+ if (xdata_req)
+ dict_unref(xdata_req);
+ if (xdata_rsp)
+ dict_unref(xdata_rsp);
+ if (pending)
+ afr_matrix_cleanup(pending, priv->child_count);
+ loc_wipe(&loc);
+
+ if (read_subvol == -1) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+ afr_read_txn_wind(frame, this, read_subvol);
+ return ret;
+}
+
+void
+afr_ta_read_txn_synctask(call_frame_t *frame, xlator_t *this)
+{
+ call_frame_t *ta_frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ ta_frame = afr_ta_frame_create(this);
+ if (!ta_frame) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to create ta_frame");
+ goto out;
+ }
+ ret = synctask_new(this->ctx->env, afr_ta_read_txn, afr_ta_read_txn_done,
+ ta_frame, frame);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to launch "
+ "afr_ta_read_txn synctask for gfid %s.",
+ uuid_utoa(local->inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ STACK_DESTROY(ta_frame->root);
+ goto out;
+ }
+ return;
+out:
+ afr_read_txn_wind(frame, this, -1);
+}
+
+int
+afr_read_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int read_subvol = -1;
+ inode_t *inode = NULL;
+ int ret = -1;
+ int spb_subvol = -1;
+
+ local = frame->local;
+ inode = local->inode;
+ priv = this->private;
+
+ if (err) {
+ if (!priv->thin_arbiter_count)
+ goto readfn;
+ if (err != EINVAL)
+ goto readfn;
+ /* We need to query the good bricks and/or thin-arbiter.*/
+ afr_ta_read_txn_synctask(frame, this);
+ return 0;
+ }
+
+ read_subvol = afr_read_subvol_select_by_policy(inode, this, local->readable,
+ NULL);
+ if (read_subvol == -1) {
+ err = EIO;
+ goto readfn;
+ }
+
+ if (local->read_attempted[read_subvol]) {
+ afr_read_txn_next_subvol(frame, this);
+ return 0;
+ }
+
+ local->read_attempted[read_subvol] = 1;
+readfn:
+ if (read_subvol == -1) {
+ ret = afr_split_brain_read_subvol_get(inode, this, frame, &spb_subvol);
+ if ((ret == 0) && spb_subvol >= 0)
+ read_subvol = spb_subvol;
+ }
+
+ if (read_subvol == -1) {
+ AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(-1, err);
+ }
+ afr_read_txn_wind(frame, this, read_subvol);
+
+ return 0;
+}
+
+int
+afr_read_txn_continue(call_frame_t *frame, xlator_t *this, int subvol)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local->refreshed) {
+ local->refreshed = _gf_true;
+ afr_inode_refresh(frame, this, local->inode, NULL,
+ afr_read_txn_refresh_done);
+ } else {
+ afr_read_txn_next_subvol(frame, this);
+ }
+
+ return 0;
+}
+
+/* afr_read_txn_wipe:
+
+ clean internal variables in @local in order to make
+ it possible to call afr_read_txn() multiple times from
+ the same frame
+*/
+
+void
+afr_read_txn_wipe(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->readfn = NULL;
+
+ if (local->inode)
+ inode_unref(local->inode);
+
+ for (i = 0; i < priv->child_count; i++) {
+ local->read_attempted[i] = 0;
+ local->readable[i] = 0;
+ }
+}
+
+/*
+ afr_read_txn:
+
+ This is the read transaction function. The way it works:
+
+ - Determine read-subvolume from inode ctx.
+
+ - If read-subvolume's generation was stale, refresh ctx once by
+ calling afr_inode_refresh()
+
+ Else make an attempt to read on read-subvolume.
+
+ - If attempted read on read-subvolume fails, refresh ctx once
+ by calling afr_inode_refresh()
+
+ - After ctx refresh, query read-subvolume freshly and attempt
+ read once.
+
+ - If read fails, try every other readable[] subvolume before
+ finally giving up. readable[] elements are set by afr_inode_refresh()
+ based on dirty and pending flags.
+
+ - If file is in split brain in the backend, generation will be
+ kept 0 by afr_inode_refresh() and readable[] will be set 0 for
+ all elements. Therefore reads always fail.
+*/
+
+int
+afr_read_txn(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_read_txn_wind_t readfn, afr_transaction_type type)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *data = NULL;
+ unsigned char *metadata = NULL;
+ int read_subvol = -1;
+ int event_generation = 0;
+ int ret = -1;
+
+ priv = this->private;
+ local = frame->local;
+ data = alloca0(priv->child_count);
+ metadata = alloca0(priv->child_count);
+
+ afr_read_txn_wipe(frame, this);
+
+ local->readfn = readfn;
+ local->inode = inode_ref(inode);
+ local->is_read_txn = _gf_true;
+ local->transaction.type = type;
+
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ local->op_ret = -1;
+ local->op_errno = afr_quorum_errno(priv);
+ goto read;
+ }
+
+ if (!afr_is_consistent_io_possible(local, priv, &local->op_errno)) {
+ local->op_ret = -1;
+ goto read;
+ }
+
+ if (priv->thin_arbiter_count && !afr_ta_has_quorum(priv, local)) {
+ local->op_ret = -1;
+ local->op_errno = -afr_quorum_errno(priv);
+ goto read;
+ }
+
+ if (priv->thin_arbiter_count &&
+ AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
+ if (local->child_up[0]) {
+ local->read_txn_query_child = AFR_CHILD_ZERO;
+ } else if (local->child_up[1]) {
+ local->read_txn_query_child = AFR_CHILD_ONE;
+ }
+ afr_ta_read_txn_synctask(frame, this);
+ return 0;
+ }
+
+ ret = afr_inode_read_subvol_get(inode, this, data, metadata,
+ &event_generation);
+ if (ret == -1)
+ /* very first transaction on this inode */
+ goto refresh;
+ AFR_INTERSECT(local->readable, data, metadata, priv->child_count);
+
+ gf_msg_debug(this->name, 0,
+ "%s: generation now vs cached: %d, "
+ "%d",
+ uuid_utoa(inode->gfid), local->event_generation,
+ event_generation);
+ if (afr_is_inode_refresh_reqd(inode, this, local->event_generation,
+ event_generation))
+ /* servers have disconnected / reconnected, and possibly
+ rebooted, very likely changing the state of freshness
+ of copies */
+ goto refresh;
+
+ read_subvol = afr_read_subvol_select_by_policy(inode, this, local->readable,
+ NULL);
+
+ if (read_subvol < 0 || read_subvol > priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "Unreadable subvolume %d found "
+ "with event generation %d for gfid %s.",
+ read_subvol, event_generation, uuid_utoa(inode->gfid));
+ goto refresh;
+ }
+
+ if (!local->child_up[read_subvol]) {
+ /* should never happen, just in case */
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_READ_SUBVOL_ERROR,
+ "subvolume %d is the "
+ "read subvolume in this generation, but is not up",
+ read_subvol);
+ goto refresh;
+ }
+
+ local->read_attempted[read_subvol] = 1;
+
+read:
+ afr_read_txn_wind(frame, this, read_subvol);
+
+ return 0;
+
+refresh:
+ afr_inode_refresh(frame, this, inode, NULL, afr_read_txn_refresh_done);
+
+ return 0;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.c b/xlators/cluster/afr/src/afr-self-heal-algorithm.c
deleted file mode 100644
index 18edca8c1eb..00000000000
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.c
+++ /dev/null
@@ -1,810 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-
-#include <openssl/md5.h>
-#include "glusterfs.h"
-#include "afr.h"
-#include "xlator.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-
-#include "afr-transaction.h"
-#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-#include "afr-self-heal-algorithm.h"
-
-/*
- This file contains the various self-heal algorithms
-*/
-
-static int
-sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
- gf_boolean_t is_first_call, call_frame_t *old_loop_frame);
-static int
-sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
- int32_t op_ret, int32_t op_errno);
-static int
-sh_destroy_frame (call_frame_t *frame, xlator_t *this)
-{
- if (!frame)
- goto out;
-
- AFR_STACK_DESTROY (frame);
-out:
- return 0;
-}
-
-static void
-sh_private_cleanup (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh_priv = sh->private;
- if (sh_priv)
- GF_FREE (sh_priv);
-}
-
-static int
-sh_number_of_writes_needed (unsigned char *write_needed, int child_count)
-{
- int writes = 0;
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (write_needed[i])
- writes++;
- }
-
- return writes;
-}
-
-
-static int
-sh_loop_driver_done (call_frame_t *sh_frame, xlator_t *this,
- call_frame_t *last_loop_frame)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
- int32_t total_blocks = 0;
- int32_t diff_blocks = 0;
-
- local = sh_frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
- if (sh_priv) {
- total_blocks = sh_priv->total_blocks;
- diff_blocks = sh_priv->diff_blocks;
- }
-
- sh_private_cleanup (sh_frame, this);
- if (sh->op_failed) {
- GF_ASSERT (!last_loop_frame);
- //loop_finish should have happened and the old_loop should be NULL
- gf_log (this->name, GF_LOG_INFO,
- "self-heal aborting on %s",
- local->loc.path);
-
- local->self_heal.algo_abort_cbk (sh_frame, this);
- } else {
- GF_ASSERT (last_loop_frame);
- if (diff_blocks == total_blocks) {
- gf_log (this->name, GF_LOG_INFO, "full self-heal "
- "completed on %s",local->loc.path);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "diff self-heal on %s: completed. "
- "(%d blocks of %d were different (%.2f%%))",
- local->loc.path, diff_blocks, total_blocks,
- ((diff_blocks * 1.0)/total_blocks) * 100);
- }
-
- sh->old_loop_frame = last_loop_frame;
- local->self_heal.algo_completion_cbk (sh_frame, this);
- }
-
- return 0;
-}
-
-int
-sh_loop_finish (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- if (!loop_frame)
- goto out;
-
- loop_local = loop_frame->local;
- if (loop_local) {
- loop_sh = &loop_local->self_heal;
- }
-
- if (loop_sh && loop_sh->data_lock_held) {
- afr_sh_data_unlock (loop_frame, this,
- sh_destroy_frame);
- } else {
- sh_destroy_frame (loop_frame, this);
- }
-out:
- return 0;
-}
-
-static int
-sh_loop_lock_success (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_loop_finish (loop_sh->old_loop_frame, this);
- loop_sh->old_loop_frame = NULL;
-
- gf_log (this->name, GF_LOG_DEBUG, "Acquired lock for range %"PRIu64
- " %"PRIu64, loop_sh->offset, loop_sh->block_size);
- loop_sh->data_lock_held = _gf_true;
- loop_sh->sh_data_algo_start (loop_frame, this);
- return 0;
-}
-
-static int
-sh_loop_lock_failure (call_frame_t *loop_frame, xlator_t *this)
-{
- call_frame_t *sh_frame = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
- sh_frame = loop_sh->sh_frame;
-
- gf_log (this->name, GF_LOG_ERROR, "failed lock for range %"PRIu64
- " %"PRIu64, loop_sh->offset, loop_sh->block_size);
- sh_loop_finish (loop_sh->old_loop_frame, this);
- loop_sh->old_loop_frame = NULL;
- sh_loop_return (sh_frame, this, loop_frame, -1, ENOTCONN);
- return 0;
-}
-
-static int
-sh_loop_frame_create (call_frame_t *sh_frame, xlator_t *this,
- call_frame_t *old_loop_frame, call_frame_t **loop_frame)
-{
- call_frame_t *new_loop_frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *new_loop_local = NULL;
- afr_self_heal_t *new_loop_sh = NULL;
- afr_private_t *priv = NULL;
-
- GF_ASSERT (sh_frame);
- GF_ASSERT (loop_frame);
-
- *loop_frame = NULL;
- local = sh_frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- new_loop_frame = copy_frame (sh_frame);
- if (!new_loop_frame)
- goto out;
- //We want the frame to have same lk_owner as sh_frame
- //so that locks translator allows conflicting locks
- new_loop_local = afr_local_copy (local, this);
- if (!new_loop_local)
- goto out;
- new_loop_frame->local = new_loop_local;
-
- new_loop_sh = &new_loop_local->self_heal;
- new_loop_sh->sources = memdup (sh->sources,
- priv->child_count * sizeof (*sh->sources));
- if (!new_loop_sh->sources)
- goto out;
- new_loop_sh->write_needed = GF_CALLOC (priv->child_count,
- sizeof (*new_loop_sh->write_needed),
- gf_afr_mt_char);
- if (!new_loop_sh->write_needed)
- goto out;
- new_loop_sh->checksum = GF_CALLOC (priv->child_count, MD5_DIGEST_LENGTH,
- gf_afr_mt_uint8_t);
- if (!new_loop_sh->checksum)
- goto out;
- new_loop_sh->inode = inode_ref (sh->inode);
- new_loop_sh->sh_data_algo_start = sh->sh_data_algo_start;
- new_loop_sh->source = sh->source;
- new_loop_sh->active_sinks = sh->active_sinks;
- new_loop_sh->healing_fd = fd_ref (sh->healing_fd);
- new_loop_sh->file_has_holes = sh->file_has_holes;
- new_loop_sh->old_loop_frame = old_loop_frame;
- new_loop_sh->sh_frame = sh_frame;
- *loop_frame = new_loop_frame;
- return 0;
-out:
- sh_destroy_frame (new_loop_frame, this);
- return -ENOMEM;
-}
-
-static int
-sh_loop_start (call_frame_t *sh_frame, xlator_t *this, off_t offset,
- call_frame_t *old_loop_frame)
-{
- call_frame_t *new_loop_frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *new_loop_local = NULL;
- afr_self_heal_t *new_loop_sh = NULL;
- int ret = 0;
-
- GF_ASSERT (sh_frame);
-
- local = sh_frame->local;
- sh = &local->self_heal;
-
- ret = sh_loop_frame_create (sh_frame, this, old_loop_frame,
- &new_loop_frame);
- if (ret)
- goto out;
- new_loop_local = new_loop_frame->local;
- new_loop_sh = &new_loop_local->self_heal;
- new_loop_sh->offset = offset;
- new_loop_sh->block_size = sh->block_size;
- afr_sh_data_lock (new_loop_frame, this, offset, new_loop_sh->block_size,
- sh_loop_lock_success, sh_loop_lock_failure);
- return 0;
-out:
- sh->op_failed = 1;
- if (old_loop_frame)
- sh_loop_finish (old_loop_frame, this);
- sh_loop_return (sh_frame, this, new_loop_frame, -1, ENOMEM);
- return 0;
-}
-
-static int
-sh_loop_driver (call_frame_t *sh_frame, xlator_t *this,
- gf_boolean_t is_first_call, call_frame_t *old_loop_frame)
-{
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
- gf_boolean_t is_driver_done = _gf_false;
- blksize_t block_size = 0;
- int loop = 0;
- off_t offset = 0;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- local = sh_frame->local;
- sh = &local->self_heal;
- sh_priv = sh->private;
-
- LOCK (&sh_priv->lock);
- {
- if (!is_first_call)
- sh_priv->loops_running--;
- offset = sh_priv->offset;
- block_size = sh->block_size;
- while ((!sh->eof_reached) && (0 == sh->op_failed) &&
- (sh_priv->loops_running < priv->data_self_heal_window_size)
- && (sh_priv->offset < sh->file_size)) {
-
- loop++;
- sh_priv->offset += block_size;
- sh_priv->loops_running++;
-
- if (!is_first_call)
- break;
- }
- if (0 == sh_priv->loops_running) {
- is_driver_done = _gf_true;
- }
- }
- UNLOCK (&sh_priv->lock);
-
- if (0 == loop) {
- //loop finish does unlock, but the erasing of the pending
- //xattrs needs to happen before that so do not finish the loop
- if (is_driver_done && !sh->op_failed)
- goto driver_done;
- if (old_loop_frame) {
- sh_loop_finish (old_loop_frame, this);
- old_loop_frame = NULL;
- }
- }
-
- //If we have more loops to form we should finish previous loop after
- //the next loop lock
- while (loop--) {
- if (sh->op_failed) {
- // op failed in other loop, stop spawning more loops
- if (old_loop_frame) {
- sh_loop_finish (old_loop_frame, this);
- old_loop_frame = NULL;
- }
- sh_loop_driver (sh_frame, this, _gf_false, NULL);
- } else {
- gf_log (this->name, GF_LOG_TRACE, "spawning a loop "
- "for offset %"PRId64, offset);
-
- sh_loop_start (sh_frame, this, offset, old_loop_frame);
- old_loop_frame = NULL;
- offset += block_size;
- }
- }
-
-driver_done:
- if (is_driver_done) {
- sh_loop_driver_done (sh_frame, this, old_loop_frame);
- }
- return 0;
-}
-
-static int
-sh_loop_return (call_frame_t *sh_frame, xlator_t *this, call_frame_t *loop_frame,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t * loop_local = NULL;
- afr_self_heal_t * loop_sh = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
-
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- if (loop_frame) {
- loop_local = loop_frame->local;
- if (loop_local)
- loop_sh = &loop_local->self_heal;
- if (loop_sh)
- gf_log (this->name, GF_LOG_TRACE, "loop for offset "
- "%"PRId64" returned", loop_sh->offset);
- }
-
- if (op_ret == -1) {
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- if (loop_frame) {
- sh_loop_finish (loop_frame, this);
- loop_frame = NULL;
- }
- }
-
- sh_loop_driver (sh_frame, this, _gf_false, loop_frame);
-
- return 0;
-}
-
-static int
-sh_loop_write_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *postbuf, dict_t *xdata)
-{
- afr_private_t * priv = NULL;
- afr_local_t * loop_local = NULL;
- afr_self_heal_t * loop_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = 0;
- int child_index = 0;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_frame = loop_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- child_index = (long) cookie;
-
- gf_log (this->name, GF_LOG_TRACE,
- "wrote %d bytes of data from %s to child %d, offset %"PRId64"",
- op_ret, sh_local->loc.path, child_index, loop_sh->offset);
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "write to %s failed on subvolume %s (%s)",
- sh_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->op_failed = 1;
- afr_sh_set_error (loop_sh, op_errno);
- }
-
- call_count = afr_frame_return (loop_frame);
-
- if (call_count == 0) {
- sh_loop_return (sh_frame, this, loop_frame,
- loop_sh->op_ret, loop_sh->op_errno);
- }
-
- return 0;
-}
-
-static void
-sh_prune_writes_needed (call_frame_t *sh_frame, call_frame_t *loop_frame,
- afr_private_t *priv)
-{
- afr_local_t *sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- int i = 0;
-
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- if (!strcmp (sh->algo->name, "diff"))
- return;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- /* full self-heal guarantees there exists atleast 1 file with size 0
- * That means for other files we can preserve holes that come after
- * its size before 'trim'
- */
- for (i = 0; i < priv->child_count; i++) {
- if (loop_sh->write_needed[i] &&
- ((loop_sh->offset + 1) > sh->buf[i].ia_size))
- loop_sh->write_needed[i] = 0;
- }
-}
-
-static int
-sh_loop_read_cbk (call_frame_t *loop_frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct iatt *buf,
- struct iobref *iobref, dict_t *xdata)
-{
- afr_private_t * priv = NULL;
- afr_local_t * loop_local = NULL;
- afr_self_heal_t * loop_sh = NULL;
- call_frame_t *sh_frame = NULL;
- int i = 0;
- int call_count = 0;
- afr_local_t * sh_local = NULL;
- afr_self_heal_t * sh = NULL;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_frame = loop_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- gf_log (this->name, GF_LOG_TRACE,
- "read %d bytes of data from %s, offset %"PRId64"",
- op_ret, loop_local->loc.path, loop_sh->offset);
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- sh->op_failed = 1;
- gf_log (this->name, GF_LOG_ERROR, "read failed on %d "
- "for %s reason :%s", sh->source,
- sh_local->loc.path, strerror (errno));
- } else {
- sh->eof_reached = _gf_true;
- gf_log (this->name, GF_LOG_DEBUG, "Eof reached for %s",
- sh_local->loc.path);
- }
- sh_loop_return (sh_frame, this, loop_frame, op_ret, op_errno);
- goto out;
- }
-
- if (loop_sh->file_has_holes && iov_0filled (vector, count) == 0)
- sh_prune_writes_needed (sh_frame, loop_frame, priv);
-
- call_count = sh_number_of_writes_needed (loop_sh->write_needed,
- priv->child_count);
- if (call_count == 0) {
- sh_loop_return (sh_frame, this, loop_frame, 0, 0);
- goto out;
- }
- loop_local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (!loop_sh->write_needed[i])
- continue;
- STACK_WIND_COOKIE (loop_frame, sh_loop_write_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->writev,
- loop_sh->healing_fd, vector, count,
- loop_sh->offset, 0, iobref, NULL);
-
- if (!--call_count)
- break;
- }
-
-out:
- return 0;
-}
-
-
-static int
-sh_loop_read (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- STACK_WIND_COOKIE (loop_frame, sh_loop_read_cbk,
- (void *) (long) loop_sh->source,
- priv->children[loop_sh->source],
- priv->children[loop_sh->source]->fops->readv,
- loop_sh->healing_fd, loop_sh->block_size,
- loop_sh->offset, 0, NULL);
-
- return 0;
-}
-
-
-static int
-sh_diff_checksum_cbk (call_frame_t *loop_frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- uint32_t weak_checksum, uint8_t *strong_checksum,
- dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t *sh_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_sh_algo_private_t *sh_priv = NULL;
- int child_index = 0;
- int call_count = 0;
- int i = 0;
- int write_needed = 0;
-
- priv = this->private;
-
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- sh_frame = loop_sh->sh_frame;
- sh_local = sh_frame->local;
- sh = &sh_local->self_heal;
-
- sh_priv = sh->private;
-
- child_index = (long) cookie;
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "checksum on %s failed on subvolume %s (%s)",
- sh_local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- } else {
- memcpy (loop_sh->checksum + child_index * MD5_DIGEST_LENGTH,
- strong_checksum, MD5_DIGEST_LENGTH);
- }
-
- call_count = afr_frame_return (loop_frame);
-
- if (call_count == 0) {
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] || !sh_local->child_up[i])
- continue;
-
- if (memcmp (loop_sh->checksum + (i * MD5_DIGEST_LENGTH),
- loop_sh->checksum + (sh->source * MD5_DIGEST_LENGTH),
- MD5_DIGEST_LENGTH)) {
- /*
- Checksums differ, so this block
- must be written to this sink
- */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "checksum on subvolume %s at offset %"
- PRId64" differs from that on source",
- priv->children[i]->name, loop_sh->offset);
-
- write_needed = loop_sh->write_needed[i] = 1;
- }
- }
-
- LOCK (&sh_priv->lock);
- {
- sh_priv->total_blocks++;
- if (write_needed)
- sh_priv->diff_blocks++;
- }
- UNLOCK (&sh_priv->lock);
-
- if (write_needed && !sh->op_failed) {
- sh_loop_read (loop_frame, this);
- } else {
- sh_loop_return (sh_frame, this, loop_frame,
- op_ret, op_errno);
- }
- }
-
- return 0;
-}
-
-static int
-sh_diff_checksum (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- call_count = loop_sh->active_sinks + 1; /* sinks and source */
-
- loop_local->call_count = call_count;
-
- STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
- (void *) (long) loop_sh->source,
- priv->children[loop_sh->source],
- priv->children[loop_sh->source]->fops->rchecksum,
- loop_sh->healing_fd,
- loop_sh->offset, loop_sh->block_size, NULL);
-
- for (i = 0; i < priv->child_count; i++) {
- if (loop_sh->sources[i] || !loop_local->child_up[i])
- continue;
-
- STACK_WIND_COOKIE (loop_frame, sh_diff_checksum_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->rchecksum,
- loop_sh->healing_fd,
- loop_sh->offset, loop_sh->block_size, NULL);
-
- if (!--call_count)
- break;
- }
-
- return 0;
-}
-
-static int
-sh_full_read_write_to_sinks (call_frame_t *loop_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *loop_local = NULL;
- afr_self_heal_t *loop_sh = NULL;
- int i = 0;
-
- priv = this->private;
- loop_local = loop_frame->local;
- loop_sh = &loop_local->self_heal;
-
- for (i = 0; i < priv->child_count; i++) {
- if (loop_sh->sources[i] || !loop_local->child_up[i])
- continue;
- loop_sh->write_needed[i] = 1;
- }
- sh_loop_read (loop_frame, this);
- return 0;
-}
-
-afr_sh_algo_private_t*
-afr_sh_priv_init ()
-{
- afr_sh_algo_private_t *sh_priv = NULL;
-
- sh_priv = GF_CALLOC (1, sizeof (*sh_priv),
- gf_afr_mt_afr_private_t);
- if (!sh_priv)
- goto out;
-
- LOCK_INIT (&sh_priv->lock);
-out:
- return sh_priv;
-}
-
-void
-afr_sh_transfer_lock (call_frame_t *dst, call_frame_t *src,
- unsigned int child_count)
-{
- afr_local_t *dst_local = NULL;
- afr_self_heal_t *dst_sh = NULL;
- afr_local_t *src_local = NULL;
- afr_self_heal_t *src_sh = NULL;
-
- dst_local = dst->local;
- dst_sh = &dst_local->self_heal;
- src_local = src->local;
- src_sh = &src_local->self_heal;
- GF_ASSERT (src_sh->data_lock_held);
- GF_ASSERT (!dst_sh->data_lock_held);
- afr_lk_transfer_datalock (dst, src, child_count);
- src_sh->data_lock_held = _gf_false;
- dst_sh->data_lock_held = _gf_true;
-}
-
-int
-afr_sh_start_loops (call_frame_t *sh_frame, xlator_t *this,
- afr_sh_algo_fn sh_data_algo_start)
-{
- call_frame_t *first_loop_frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int ret = 0;
- afr_private_t *priv = NULL;
-
- local = sh_frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- sh->sh_data_algo_start = sh_data_algo_start;
- local->call_count = 0;
- ret = sh_loop_frame_create (sh_frame, this, NULL, &first_loop_frame);
- if (ret)
- goto out;
- afr_sh_transfer_lock (first_loop_frame, sh_frame, priv->child_count);
- sh->private = afr_sh_priv_init ();
- if (!sh->private) {
- ret = -1;
- goto out;
- }
- sh_loop_driver (sh_frame, this, _gf_true, first_loop_frame);
- ret = 0;
-out:
- if (ret) {
- sh->op_failed = 1;
- sh_loop_driver_done (sh_frame, this, NULL);
- }
- return 0;
-}
-
-int
-afr_sh_algo_diff (call_frame_t *sh_frame, xlator_t *this)
-{
- afr_sh_start_loops (sh_frame, this, sh_diff_checksum);
- return 0;
-}
-
-int
-afr_sh_algo_full (call_frame_t *sh_frame, xlator_t *this)
-{
- afr_sh_start_loops (sh_frame, this, sh_full_read_write_to_sinks);
- return 0;
-}
-
-struct afr_sh_algorithm afr_self_heal_algorithms[] = {
- {.name = "full", .fn = afr_sh_algo_full},
- {.name = "diff", .fn = afr_sh_algo_diff},
- {0, 0},
-};
diff --git a/xlators/cluster/afr/src/afr-self-heal-algorithm.h b/xlators/cluster/afr/src/afr-self-heal-algorithm.h
deleted file mode 100644
index 6b20789b1bb..00000000000
--- a/xlators/cluster/afr/src/afr-self-heal-algorithm.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __AFR_SELF_HEAL_ALGORITHM_H__
-#define __AFR_SELF_HEAL_ALGORITHM_H__
-
-typedef int (*afr_sh_algo_fn) (call_frame_t *frame,
- xlator_t *this);
-
-struct afr_sh_algorithm {
- const char *name;
- afr_sh_algo_fn fn;
-};
-
-extern struct afr_sh_algorithm afr_self_heal_algorithms[3];
-typedef struct {
- gf_lock_t lock;
- unsigned int loops_running;
- off_t offset;
-
- int32_t total_blocks;
- int32_t diff_blocks;
-} afr_sh_algo_private_t;
-
-#endif /* __AFR_SELF_HEAL_ALGORITHM_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.c b/xlators/cluster/afr/src/afr-self-heal-common.c
index c4384ebc572..a580a1584cc 100644
--- a/xlators/cluster/afr/src/afr-self-heal-common.c
+++ b/xlators/cluster/afr/src/afr-self-heal-common.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,2422 +8,2927 @@
cases as published by the Free Software Foundation.
*/
-#include "glusterfs.h"
-#include "xlator.h"
-#include "byte-order.h"
-
#include "afr.h"
-#include "afr-transaction.h"
-#include "afr-self-heal-common.h"
#include "afr-self-heal.h"
-#include "pump.h"
+#include <glusterfs/byte-order.h>
+#include "protocol-common.h"
+#include "afr-messages.h"
+#include <glusterfs/events.h>
void
-afr_sh_reset (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+afr_heal_synctask(xlator_t *this, afr_local_t *local);
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+int
+afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
+ inode_t *inode, struct afr_reply *replies, int source,
+ unsigned char *sources, void *gfid, int *gfid_idx)
+{
+ afr_private_t *priv = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ unsigned char *wind_on = NULL;
+ ia_type_t ia_type = IA_INVAL;
+ dict_t *xdata = NULL;
+ loc_t loc = {
+ 0,
+ };
+ int ret = 0;
+ int i = 0;
+
+ priv = this->private;
+ wind_on = alloca0(priv->child_count);
+ if (source >= 0 && replies[source].valid && replies[source].op_ret == 0)
+ ia_type = replies[source].poststat.ia_type;
+
+ if (ia_type != IA_INVAL)
+ goto heal;
+
+ /* If ia_type is still invalid, it means either
+ * (a)'source' was -1, i.e. parent dir pending xattrs are in split-brain
+ * (or) (b) The parent dir pending xattrs are all zeroes (i.e. all bricks
+ * are sources) and the 'source' we selected earlier might be the one where
+ * the file is not actually present.
+ *
+ * In both cases, let us pick a brick with a successful reply and use its
+ * ia_type.
+ * */
+ for (i = 0; i < priv->child_count; i++) {
+ if (source == -1) {
+ /* case (a) above. */
+ if (replies[i].valid && replies[i].op_ret == 0 &&
+ replies[i].poststat.ia_type != IA_INVAL) {
+ ia_type = replies[i].poststat.ia_type;
+ break;
+ }
+ } else {
+ /* case (b) above. */
+ if (i == source)
+ continue;
+ if (sources[i] && replies[i].valid && replies[i].op_ret == 0 &&
+ replies[i].poststat.ia_type != IA_INVAL) {
+ ia_type = replies[i].poststat.ia_type;
+ break;
+ }
+ }
+ }
- memset (sh->child_errno, 0,
- sizeof (*sh->child_errno) * priv->child_count);
- memset (sh->buf, 0, sizeof (*sh->buf) * priv->child_count);
- memset (sh->parentbufs, 0,
- sizeof (*sh->parentbufs) * priv->child_count);
- memset (sh->success, 0, sizeof (*sh->success) * priv->child_count);
- memset (sh->locked_nodes, 0,
- sizeof (*sh->locked_nodes) * priv->child_count);
- sh->active_sinks = 0;
+heal:
+ /* gfid heal on those subvolumes that do not have gfid associated
+ * with the inode and update those replies.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
- afr_reset_xattr (sh->xattr, priv->child_count);
-}
+ if (gf_uuid_is_null(gfid) &&
+ !gf_uuid_is_null(replies[i].poststat.ia_gfid) &&
+ replies[i].poststat.ia_type == ia_type)
+ gfid = replies[i].poststat.ia_gfid;
-//Intersection[child]=1 if child is part of intersection
-void
-afr_children_intersection_get (int32_t *set1, int32_t *set2,
- int *intersection, unsigned int child_count)
-{
- int i = 0;
+ if (!gf_uuid_is_null(replies[i].poststat.ia_gfid) ||
+ replies[i].poststat.ia_type != ia_type)
+ continue;
+
+ wind_on[i] = 1;
+ }
- memset (intersection, 0, sizeof (*intersection) * child_count);
- for (i = 0; i < child_count; i++) {
- intersection[i] = afr_is_child_present (set1, child_count, i)
- && afr_is_child_present (set2, child_count,
- i);
+ if (AFR_COUNT(wind_on, priv->child_count) == 0)
+ return 0;
+
+ xdata = dict_new();
+ if (!xdata) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_gfuuid(xdata, "gfid-req", gfid, true);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ local = frame->local;
+ loc.parent = inode_ref(parent);
+ gf_uuid_copy(loc.pargfid, parent->gfid);
+ loc.name = name;
+ loc.inode = inode_ref(inode);
+
+ AFR_ONLIST(wind_on, frame, afr_selfheal_discover_cbk, lookup, &loc, xdata);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_on[i])
+ continue;
+ afr_reply_wipe(&replies[i]);
+ afr_reply_copy(&replies[i], &local->replies[i]);
+ }
+ if (gfid_idx && (*gfid_idx == -1)) {
+ /*Pick a brick where the gifd heal was successful.*/
+ for (i = 0; i < priv->child_count; i++) {
+ if (!wind_on[i])
+ continue;
+ if (replies[i].valid && replies[i].op_ret == 0 &&
+ !gf_uuid_is_null(replies[i].poststat.ia_gfid)) {
+ *gfid_idx = i;
+ break;
+ }
}
-}
+ }
+out:
+ if (gfid_idx && (*gfid_idx == -1) && (ret == 0) && local) {
+ ret = -afr_final_errno(local, priv);
+ }
+ loc_wipe(&loc);
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ if (xdata)
+ dict_unref(xdata);
-/**
- * select_source - select a source and return it
- */
+ return ret;
+}
int
-afr_sh_select_source (int sources[], int child_count)
+afr_gfid_sbrain_source_from_src_brick(xlator_t *this, struct afr_reply *replies,
+ char *src_brick)
{
- int i = 0;
- for (i = 0; i < child_count; i++)
- if (sources[i])
- return i;
+ int i = 0;
+ afr_private_t *priv = NULL;
- return -1;
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (strcmp(priv->children[i]->name, src_brick) == 0)
+ return i;
+ }
+ return -1;
}
-void
-afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this)
+int
+afr_selfheal_gfid_mismatch_by_majority(struct afr_reply *replies,
+ int child_count)
{
- int i = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int active_sinks = 0;
+ int j = 0;
+ int i = 0;
+ int votes;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ for (i = 0; i < child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
- for (i = 0; i < priv->child_count; i++) {
- if (sh->sources[i] == 0 && local->child_up[i] == 1) {
- active_sinks++;
- sh->success[i] = 1;
- } else if (sh->sources[i] == 1 && local->child_up[i] == 1) {
- sh->success[i] = 1;
- }
+ votes = 1;
+ for (j = i + 1; j < child_count; j++) {
+ if ((!gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[j].poststat.ia_gfid)))
+ votes++;
+ if (votes > child_count / 2)
+ return i;
}
- sh->active_sinks = active_sinks;
+ }
+
+ return -1;
}
int
-afr_sh_source_count (int sources[], int child_count)
+afr_gfid_sbrain_source_from_bigger_file(struct afr_reply *replies,
+ int child_count)
{
- int i = 0;
- int nsource = 0;
+ int i = 0;
+ int src = -1;
+ uint64_t size = 0;
- for (i = 0; i < child_count; i++)
- if (sources[i])
- nsource++;
- return nsource;
+ for (i = 0; i < child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret == -1)
+ continue;
+ if (size < replies[i].poststat.ia_size) {
+ src = i;
+ size = replies[i].poststat.ia_size;
+ } else if (replies[i].poststat.ia_size == size) {
+ src = -1;
+ }
+ }
+ return src;
}
-void
-afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno)
-{
- sh->op_ret = -1;
- if (afr_error_more_important (sh->op_errno, op_errno))
- sh->op_errno = op_errno;
+int
+afr_gfid_sbrain_source_from_latest_mtime(struct afr_reply *replies,
+ int child_count)
+{
+ int i = 0;
+ int src = -1;
+ uint32_t mtime = 0;
+ uint32_t mtime_nsec = 0;
+
+ for (i = 0; i < child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
+ if ((mtime < replies[i].poststat.ia_mtime) ||
+ ((mtime == replies[i].poststat.ia_mtime) &&
+ (mtime_nsec < replies[i].poststat.ia_mtime_nsec))) {
+ src = i;
+ mtime = replies[i].poststat.ia_mtime;
+ mtime_nsec = replies[i].poststat.ia_mtime_nsec;
+ } else if ((mtime == replies[i].poststat.ia_mtime) &&
+ (mtime_nsec == replies[i].poststat.ia_mtime_nsec)) {
+ src = -1;
+ }
+ }
+ return src;
}
-void
-afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this)
+int
+afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, uuid_t pargfid, const char *bname,
+ int src_idx, int child_idx,
+ unsigned char *locked_on, int *src, dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ char g1[64] = {
+ 0,
+ };
+ char g2[64] = {
+ 0,
+ };
+ int up_count = 0;
+ int heal_op = -1;
+ int ret = -1;
+ char *src_brick = NULL;
+
+ *src = -1;
+ priv = this->private;
+ up_count = AFR_COUNT(locked_on, priv->child_count);
+ if (up_count != priv->child_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "All the bricks should be up to resolve the gfid split "
+ "barin");
+ if (xdata) {
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SALL_BRICKS_UP_TO_RESOLVE);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
+ "Error setting"
+ " gfid-heal-msg dict");
+ }
+ goto out;
+ }
+
+ if (xdata) {
+ ret = dict_get_int32_sizen(xdata, "heal-op", &heal_op);
+ if (ret)
+ goto fav_child;
+ } else {
+ goto fav_child;
+ }
+
+ switch (heal_op) {
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
+ *src = afr_gfid_sbrain_source_from_bigger_file(replies,
+ priv->child_count);
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ SNO_BIGGER_FILE);
+ if (xdata) {
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SNO_BIGGER_FILE);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_SET_FAILED,
+ "Error"
+ " setting gfid-heal-msg dict");
+ }
+ }
+ break;
+
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:
+ *src = afr_gfid_sbrain_source_from_latest_mtime(replies,
+ priv->child_count);
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ SNO_DIFF_IN_MTIME);
+ if (xdata) {
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SNO_DIFF_IN_MTIME);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_SET_FAILED,
+ "Error"
+ "setting gfid-heal-msg dict");
+ }
+ }
+ break;
+
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+ ret = dict_get_str_sizen(xdata, "child-name", &src_brick);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "Error getting the source "
+ "brick");
+ break;
+ }
+ *src = afr_gfid_sbrain_source_from_src_brick(this, replies,
+ src_brick);
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ SERROR_GETTING_SRC_BRICK);
+ if (xdata) {
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ SERROR_GETTING_SRC_BRICK);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_SET_FAILED,
+ "Error"
+ " setting gfid-heal-msg dict");
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+ goto out;
+
+fav_child:
+ switch (priv->fav_child_policy) {
+ case AFR_FAV_CHILD_BY_SIZE:
+ *src = afr_sh_fav_by_size(this, replies, inode);
+ break;
+ case AFR_FAV_CHILD_BY_MTIME:
+ *src = afr_sh_fav_by_mtime(this, replies, inode);
+ break;
+ case AFR_FAV_CHILD_BY_CTIME:
+ *src = afr_sh_fav_by_ctime(this, replies, inode);
+ break;
+ case AFR_FAV_CHILD_BY_MAJORITY:
+ if (priv->child_count != 2)
+ *src = afr_selfheal_gfid_mismatch_by_majority(
+ replies, priv->child_count);
+ else
+ *src = -1;
+
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "No majority to resolve "
+ "gfid split brain");
+ }
+ break;
+ default:
+ break;
+ }
+
+out:
+ if (*src == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "Gfid mismatch detected for <gfid:%s>/%s>, %s on %s and"
+ " %s on %s.",
+ uuid_utoa(pargfid), bname,
+ uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1),
+ priv->children[child_idx]->name,
+ uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2),
+ priv->children[src_idx]->name);
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;type=gfid;file="
+ "<gfid:%s>/%s>;count=2;child-%d=%s;gfid-%d=%s;"
+ "child-%d=%s;gfid-%d=%s",
+ this->ctx->cmd_args.client_pid, this->name, uuid_utoa(pargfid),
+ bname, child_idx, priv->children[child_idx]->name, child_idx,
+ uuid_utoa_r(replies[child_idx].poststat.ia_gfid, g1), src_idx,
+ priv->children[src_idx]->name, src_idx,
+ uuid_utoa_r(replies[src_idx].poststat.ia_gfid, g2));
+ return -1;
+ }
+ return 0;
+}
+
+int
+afr_selfheal_post_op_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
{
- afr_private_t * priv = this->private;
- char *buf = NULL;
- char *ptr = NULL;
- int i = 0;
- int j = 0;
+ afr_local_t *local = NULL;
- /* 10 digits per entry + 1 space + '[' and ']' */
- buf = GF_MALLOC (priv->child_count * 11 + 8, gf_afr_mt_char);
+ local = frame->local;
- for (i = 0; i < priv->child_count; i++) {
- ptr = buf;
- ptr += sprintf (ptr, "[ ");
- for (j = 0; j < priv->child_count; j++) {
- ptr += sprintf (ptr, "%d ", pending_matrix[i][j]);
- }
- sprintf (ptr, "]");
- gf_log (this->name, GF_LOG_DEBUG,
- "pending_matrix: %s", buf);
- }
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ syncbarrier_wake(&local->barrier);
- GF_FREE (buf);
+ return 0;
}
-void
-afr_init_pending_matrix (int32_t **pending_matrix, size_t child_count)
+int
+afr_selfheal_post_op(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int subvol, dict_t *xattr, dict_t *xdata)
{
- int i = 0;
- int j = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ loc_t loc = {
+ 0,
+ };
+ int ret = 0;
- GF_ASSERT (pending_matrix);
+ priv = this->private;
+ local = frame->local;
- for (i = 0; i < child_count; i++) {
- for (j = 0; j < child_count; j++) {
- pending_matrix[i][j] = 0;
- }
- }
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ local->op_ret = 0;
+
+ STACK_WIND(frame, afr_selfheal_post_op_cbk, priv->children[subvol],
+ priv->children[subvol]->fops->xattrop, &loc,
+ GF_XATTROP_ADD_ARRAY, xattr, xdata);
+
+ syncbarrier_wait(&local->barrier, 1);
+ if (local->op_ret < 0)
+ ret = -local->op_errno;
+
+ loc_wipe(&loc);
+ local->op_ret = 0;
+
+ return ret;
}
-void
-afr_mark_ignorant_subvols_as_pending (int32_t **pending_matrix,
- unsigned char *ignorant_subvols,
- size_t child_count)
-{
- int i = 0;
- int j = 0;
-
- GF_ASSERT (pending_matrix);
- GF_ASSERT (ignorant_subvols);
-
- for (i = 0; i < child_count; i++) {
- if (ignorant_subvols[i]) {
- for (j = 0; j < child_count; j++) {
- if (!ignorant_subvols[j])
- pending_matrix[j][i] += 1;
- }
- }
- }
+int
+afr_check_stale_error(struct afr_reply *replies, afr_private_t *priv)
+{
+ int i = 0;
+ int op_errno = 0;
+ int tmp_errno = 0;
+ int stale_count = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ tmp_errno = replies[i].op_errno;
+ if (tmp_errno == ENOENT || tmp_errno == ESTALE) {
+ op_errno = afr_higher_errno(op_errno, tmp_errno);
+ stale_count++;
+ }
+ }
+ if (stale_count != priv->child_count)
+ return -ENOTCONN;
+ else
+ return -op_errno;
}
int
-afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
- unsigned char *ignorant_subvols,
- dict_t *xattr[], afr_transaction_type type,
- size_t child_count)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
- int k = 0;
-
- afr_init_pending_matrix (pending_matrix, child_count);
-
- for (i = 0; i < child_count; i++) {
- pending_raw = NULL;
-
- for (j = 0; j < child_count; j++) {
- ret = dict_get_ptr (xattr[i], pending_key[j],
- &pending_raw);
-
- if (ret != 0) {
- /*
- * There is no xattr present. This means this
- * subvolume should be considered an 'ignorant'
- * subvolume.
- */
-
- if (ignorant_subvols)
- ignorant_subvols[i] = 1;
- continue;
- }
-
- memcpy (pending, pending_raw, sizeof(pending));
- k = afr_index_for_transaction_type (type);
-
- pending_matrix[i][j] = ntoh32 (pending[k]);
- }
+afr_sh_generic_fop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ int i = (long)cookie;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (pre)
+ local->replies[i].prestat = *pre;
+ if (post)
+ local->replies[i].poststat = *post;
+ if (xdata)
+ local->replies[i].xdata = dict_ref(xdata);
+
+ syncbarrier_wake(&local->barrier);
+
+ return 0;
+}
+
+int
+afr_selfheal_restore_time(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, unsigned char *healed_sinks,
+ struct afr_reply *replies)
+{
+ loc_t loc = {
+ 0,
+ };
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, setattr, &loc,
+ &replies[source].poststat,
+ (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME),
+ NULL);
+
+ loc_wipe(&loc);
+
+ return 0;
+}
+
+dict_t *
+afr_selfheal_output_xattr(xlator_t *this, gf_boolean_t is_full_crawl,
+ afr_transaction_type type, int *output_dirty,
+ int **output_matrix, int subvol,
+ int **full_heal_mtx_out)
+{
+ int j = 0;
+ int idx = 0;
+ int d_idx = 0;
+ int ret = 0;
+ int *raw = 0;
+ dict_t *xattr = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ idx = afr_index_for_transaction_type(type);
+ d_idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION);
+
+ xattr = dict_new();
+ if (!xattr)
+ return NULL;
+
+ /* clear dirty */
+ raw = GF_CALLOC(sizeof(int), AFR_NUM_CHANGE_LOGS, gf_afr_mt_int32_t);
+ if (!raw)
+ goto err;
+
+ raw[idx] = hton32(output_dirty[subvol]);
+ ret = dict_set_bin(xattr, AFR_DIRTY, raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ GF_FREE(raw);
+ goto err;
+ }
+
+ /* clear/set pending */
+ for (j = 0; j < priv->child_count; j++) {
+ raw = GF_CALLOC(sizeof(int), AFR_NUM_CHANGE_LOGS, gf_afr_mt_int32_t);
+ if (!raw)
+ goto err;
+
+ raw[idx] = hton32(output_matrix[subvol][j]);
+ if (is_full_crawl)
+ raw[d_idx] = hton32(full_heal_mtx_out[subvol][j]);
+
+ ret = dict_set_bin(xattr, priv->pending_key[j], raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ GF_FREE(raw);
+ goto err;
}
+ }
- return ret;
+ return xattr;
+err:
+ if (xattr)
+ dict_unref(xattr);
+ return NULL;
}
-typedef enum {
- AFR_NODE_INVALID,
- AFR_NODE_INNOCENT,
- AFR_NODE_FOOL,
- AFR_NODE_WISE,
-} afr_node_type;
+int
+afr_selfheal_undo_pending(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type, struct afr_reply *replies,
+ unsigned char *locked_on)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+ int j = 0;
+ unsigned char *pending = NULL;
+ int *input_dirty = NULL;
+ int **input_matrix = NULL;
+ int **full_heal_mtx_in = NULL;
+ int **full_heal_mtx_out = NULL;
+ int *output_dirty = NULL;
+ int **output_matrix = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xdata = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ pending = alloca0(priv->child_count);
+
+ input_dirty = alloca0(priv->child_count * sizeof(int));
+ input_matrix = ALLOC_MATRIX(priv->child_count, int);
+ full_heal_mtx_in = ALLOC_MATRIX(priv->child_count, int);
+ full_heal_mtx_out = ALLOC_MATRIX(priv->child_count, int);
+ output_dirty = alloca0(priv->child_count * sizeof(int));
+ output_matrix = ALLOC_MATRIX(priv->child_count, int);
+
+ xdata = dict_new();
+ if (!xdata)
+ return -1;
-typedef struct {
- afr_node_type type;
- int wisdom;
-} afr_node_character;
+ afr_selfheal_extract_xattr(this, replies, type, input_dirty, input_matrix);
+ if (local->need_full_crawl)
+ afr_selfheal_extract_xattr(this, replies, AFR_DATA_TRANSACTION, NULL,
+ full_heal_mtx_in);
-static int
-afr_sh_is_innocent (int32_t *array, int child_count)
-{
- int i = 0;
- int ret = 1; /* innocent until proven guilty */
+ for (i = 0; i < priv->child_count; i++)
+ if (sinks[i] && !healed_sinks[i])
+ pending[i] = 1;
- for (i = 0; i < child_count; i++) {
- if (array[i]) {
- ret = 0;
- break;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ if (pending[j]) {
+ output_matrix[i][j] = 1;
+ if (type == AFR_ENTRY_TRANSACTION)
+ full_heal_mtx_out[i][j] = 1;
+ } else if (locked_on[j]) {
+ output_matrix[i][j] = -input_matrix[i][j];
+ if (type == AFR_ENTRY_TRANSACTION)
+ full_heal_mtx_out[i][j] = -full_heal_mtx_in[i][j];
+ }
}
+ }
- return ret;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (!pending[i])
+ output_dirty[i] = -input_dirty[i];
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!locked_on[i])
+ /* perform post-op only on subvols we had locked
+ and inspected on.
+ */
+ continue;
+ if (undid_pending[i])
+ /* We already unset the pending xattrs in
+ * _afr_fav_child_reset_sink_xattrs(). */
+ continue;
-static int
-afr_sh_is_fool (int32_t *array, int i, int child_count)
-{
- return array[i]; /* fool if accuses itself */
-}
+ xattr = afr_selfheal_output_xattr(this, local->need_full_crawl, type,
+ output_dirty, output_matrix, i,
+ full_heal_mtx_out);
+ if (!xattr) {
+ continue;
+ }
+ if ((type == AFR_ENTRY_TRANSACTION) && (priv->esh_granular)) {
+ if (xdata && dict_set_int8(xdata, GF_XATTROP_PURGE_INDEX, 1))
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_DICT_SET_FAILED,
+ "Failed to set"
+ " dict value for %s",
+ GF_XATTROP_PURGE_INDEX);
+ }
-static int
-afr_sh_is_wise (int32_t *array, int i, int child_count)
-{
- return !array[i]; /* wise if does not accuse itself */
+ afr_selfheal_post_op(frame, this, inode, i, xattr, xdata);
+ dict_unref(xattr);
+ }
+
+ if (xdata)
+ dict_unref(xdata);
+
+ return 0;
}
+void
+afr_reply_copy(struct afr_reply *dst, struct afr_reply *src)
+{
+ dict_t *xdata = NULL;
+
+ dst->valid = src->valid;
+ dst->op_ret = src->op_ret;
+ dst->op_errno = src->op_errno;
+ dst->prestat = src->prestat;
+ dst->poststat = src->poststat;
+ dst->preparent = src->preparent;
+ dst->postparent = src->postparent;
+ dst->preparent2 = src->preparent2;
+ dst->postparent2 = src->postparent2;
+ if (src->xdata)
+ xdata = dict_ref(src->xdata);
+ else
+ xdata = NULL;
+ if (dst->xdata)
+ dict_unref(dst->xdata);
+ dst->xdata = xdata;
+ if (xdata && dict_get_str_boolean(xdata, "fips-mode-rchecksum",
+ _gf_false) == _gf_true) {
+ memcpy(dst->checksum, src->checksum, SHA256_DIGEST_LENGTH);
+ } else {
+ memcpy(dst->checksum, src->checksum, MD5_DIGEST_LENGTH);
+ }
+ dst->fips_mode_rchecksum = src->fips_mode_rchecksum;
+}
-static int
-afr_sh_all_nodes_innocent (afr_node_character *characters,
- int child_count)
+void
+afr_replies_copy(struct afr_reply *dst, struct afr_reply *src, int count)
{
- int i = 0;
- int ret = 1;
+ int i = 0;
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_INNOCENT) {
- ret = 0;
- break;
- }
- }
+ if (dst == src)
+ return;
- return ret;
+ for (i = 0; i < count; i++) {
+ afr_reply_copy(&dst[i], &src[i]);
+ }
}
-
-static int
-afr_sh_wise_nodes_exist (afr_node_character *characters, int child_count)
+int
+afr_selfheal_fill_dirty(xlator_t *this, int *dirty, int subvol, int idx,
+ dict_t *xdata)
{
- int i = 0;
- int ret = 0;
+ void *pending_raw = NULL;
+ int pending[3] = {
+ 0,
+ };
- for (i = 0; i < child_count; i++) {
- if (characters[i].type == AFR_NODE_WISE) {
- ret = 1;
- break;
- }
- }
+ if (!dirty)
+ return 0;
- return ret;
-}
+ if (dict_get_ptr(xdata, AFR_DIRTY, &pending_raw))
+ return -1;
+ if (!pending_raw)
+ return -1;
-/*
- * The 'wisdom' of a wise node is 0 if any other wise node accuses it.
- * It is 1 if no other wise node accuses it.
- * Only wise nodes with wisdom 1 are sources.
- *
- * If no nodes with wisdom 1 exist, a split-brain has occurred.
- */
+ memcpy(pending, pending_raw, sizeof(pending));
-static void
-afr_sh_compute_wisdom (int32_t *pending_matrix[],
- afr_node_character characters[], int child_count)
+ dirty[subvol] = ntoh32(pending[idx]);
+
+ return 0;
+}
+
+int
+afr_selfheal_fill_matrix(xlator_t *this, int **matrix, int subvol, int idx,
+ dict_t *xdata)
{
- int i = 0;
- int j = 0;
+ int i = 0;
+ void *pending_raw = NULL;
+ int pending[3] = {
+ 0,
+ };
+ afr_private_t *priv = NULL;
- for (i = 0; i < child_count; i++) {
- if (characters[i].type == AFR_NODE_WISE) {
- characters[i].wisdom = 1;
+ priv = this->private;
- for (j = 0; j < child_count; j++) {
- if ((characters[j].type == AFR_NODE_WISE)
- && pending_matrix[j][i]) {
+ if (!matrix)
+ return 0;
- characters[i].wisdom = 0;
- }
- }
- }
- }
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr(xdata, priv->pending_key[i], &pending_raw))
+ continue;
+ if (!pending_raw)
+ continue;
-static int
-afr_sh_wise_nodes_conflict (afr_node_character *characters,
- int child_count)
+ memcpy(pending, pending_raw, sizeof(pending));
+
+ matrix[subvol][i] = ntoh32(pending[idx]);
+ }
+
+ return 0;
+}
+
+int
+afr_selfheal_extract_xattr(xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type, int *dirty, int **matrix)
{
- int i = 0;
- int ret = 1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ dict_t *xdata = NULL;
+ int idx = -1;
- for (i = 0; i < child_count; i++) {
- if ((characters[i].type == AFR_NODE_WISE)
- && characters[i].wisdom == 1) {
+ idx = afr_index_for_transaction_type(type);
- /* There is atleast one bona-fide wise node */
- ret = 0;
- break;
- }
- }
+ priv = this->private;
- return ret;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
+ if (!replies[i].xdata)
+ continue;
-static int
-afr_sh_mark_wisest_as_sources (int sources[],
- afr_node_character *characters,
- int child_count)
+ xdata = replies[i].xdata;
+
+ afr_selfheal_fill_dirty(this, dirty, i, idx, xdata);
+ afr_selfheal_fill_matrix(this, matrix, i, idx, xdata);
+ }
+
+ return 0;
+}
+
+/*
+ * If by chance there are multiple sources with differing sizes, select
+ * the largest file as the source.
+ *
+ * This can happen if data was directly modified in the backend or for snapshots
+ */
+void
+afr_mark_largest_file_as_source(xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
{
- int nsources = 0;
- int i = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t size = 0;
- for (i = 0; i < child_count; i++) {
- if (characters[i].wisdom == 1) {
- sources[i] = 1;
- nsources++;
- }
+ /* Find source with biggest file size */
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (!replies[i].valid || replies[i].op_ret != 0) {
+ sources[i] = 0;
+ continue;
}
+ if (size <= replies[i].poststat.ia_size) {
+ size = replies[i].poststat.ia_size;
+ }
+ }
- return nsources;
+ /* Mark sources with less size as not source */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (size > replies[i].poststat.ia_size)
+ sources[i] = 0;
+ }
}
-static void
-afr_compute_witness_of_fools (int32_t *witnesses, int32_t **pending_matrix,
- afr_node_character *characters,
- int32_t child_count)
-{
- int i = 0;
- int j = 0;
- int witness = 0;
-
- GF_ASSERT (witnesses);
- GF_ASSERT (pending_matrix);
- GF_ASSERT (characters);
- GF_ASSERT (child_count > 0);
-
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_FOOL)
- continue;
-
- witness = 0;
- for (j = 0; j < child_count; j++) {
- if (i == j)
- continue;
- witness += pending_matrix[i][j];
- }
- witnesses[i] = witness;
- }
+void
+afr_mark_latest_mtime_file_as_source(xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint32_t mtime = 0;
+ uint32_t mtime_nsec = 0;
+
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (!replies[i].valid || replies[i].op_ret != 0) {
+ sources[i] = 0;
+ continue;
+ }
+ if ((mtime < replies[i].poststat.ia_mtime) ||
+ ((mtime == replies[i].poststat.ia_mtime) &&
+ (mtime_nsec < replies[i].poststat.ia_mtime_nsec))) {
+ mtime = replies[i].poststat.ia_mtime;
+ mtime_nsec = replies[i].poststat.ia_mtime_nsec;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if ((mtime > replies[i].poststat.ia_mtime) ||
+ ((mtime == replies[i].poststat.ia_mtime) &&
+ (mtime_nsec > replies[i].poststat.ia_mtime_nsec))) {
+ sources[i] = 0;
+ }
+ }
}
-static int32_t
-afr_find_biggest_witness_among_fools (int32_t *witnesses,
- afr_node_character *characters,
- int32_t child_count)
+void
+afr_mark_active_sinks(xlator_t *this, unsigned char *sources,
+ unsigned char *locked_on, unsigned char *sinks)
{
- int i = 0;
- int biggest_witness = -1;
-
- GF_ASSERT (witnesses);
- GF_ASSERT (characters);
- GF_ASSERT (child_count > 0);
+ int i = 0;
+ afr_private_t *priv = NULL;
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_FOOL)
- continue;
+ priv = this->private;
- if (biggest_witness < witnesses[i])
- biggest_witness = witnesses[i];
- }
- return biggest_witness;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i] && locked_on[i])
+ sinks[i] = 1;
+ else
+ sinks[i] = 0;
+ }
}
-int
-afr_mark_fool_as_source_by_witness (int32_t *sources, int32_t *witnesses,
- afr_node_character *characters,
- int32_t child_count, int32_t witness)
+gf_boolean_t
+afr_dict_contains_heal_op(call_frame_t *frame)
{
- int i = 0;
- int nsources = 0;
+ afr_local_t *local = NULL;
+ dict_t *xdata_req = NULL;
+ int ret = 0;
+ int heal_op = -1;
- GF_ASSERT (sources);
- GF_ASSERT (witnesses);
- GF_ASSERT (characters);
- GF_ASSERT (child_count > 0);
-
- for (i = 0; i < child_count; i++) {
- if (characters[i].type != AFR_NODE_FOOL)
- continue;
+ local = frame->local;
+ xdata_req = local->xdata_req;
+ ret = dict_get_int32_sizen(xdata_req, "heal-op", &heal_op);
+ if (ret)
+ return _gf_false;
+ if (local->xdata_rsp == NULL) {
+ local->xdata_rsp = dict_new();
+ if (!local->xdata_rsp)
+ return _gf_true;
+ }
+ ret = dict_set_sizen_str_sizen(local->xdata_rsp, "sh-fail-msg",
+ SFILE_NOT_IN_SPLIT_BRAIN);
- if (witness == witnesses[i]) {
- sources[i] = 1;
- nsources++;
- }
- }
- return nsources;
+ return _gf_true;
}
-static int
-afr_mark_biggest_of_fools_as_source (int32_t *sources, int32_t **pending_matrix,
- afr_node_character *characters,
- int child_count)
+gf_boolean_t
+afr_can_decide_split_brain_source_sinks(struct afr_reply *replies,
+ int child_count)
{
- int32_t biggest_witness = 0;
- int nsources = 0;
- int32_t *witnesses = NULL;
+ int i = 0;
- GF_ASSERT (child_count > 0);
+ for (i = 0; i < child_count; i++)
+ if (replies[i].valid != 1 || replies[i].op_ret != 0)
+ return _gf_false;
- witnesses = GF_CALLOC (child_count, sizeof (*witnesses),
- gf_afr_mt_int32_t);
- if (NULL == witnesses) {
- nsources = -1;
- goto out;
- }
+ return _gf_true;
+}
- afr_compute_witness_of_fools (witnesses, pending_matrix, characters,
- child_count);
- biggest_witness = afr_find_biggest_witness_among_fools (witnesses,
- characters,
- child_count);
- nsources = afr_mark_fool_as_source_by_witness (sources, witnesses,
- characters, child_count,
- biggest_witness);
+int
+afr_mark_split_brain_source_sinks_by_heal_op(
+ call_frame_t *frame, xlator_t *this, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on,
+ struct afr_reply *replies, afr_transaction_type type, int heal_op)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata_req = NULL;
+ dict_t *xdata_rsp = NULL;
+ int ret = 0;
+ int i = 0;
+ char *name = NULL;
+ int source = -1;
+
+ local = frame->local;
+ priv = this->private;
+ xdata_req = local->xdata_req;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i])
+ if (sources[i] || !sinks[i] || !healed_sinks[i]) {
+ ret = -1;
+ goto out;
+ }
+ }
+ if (local->xdata_rsp == NULL) {
+ local->xdata_rsp = dict_new();
+ if (!local->xdata_rsp) {
+ ret = -1;
+ goto out;
+ }
+ }
+ xdata_rsp = local->xdata_rsp;
+
+ if (!afr_can_decide_split_brain_source_sinks(replies, priv->child_count)) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SBRAIN_HEAL_NO_GO_MSG);
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++)
+ if (locked_on[i])
+ sources[i] = 1;
+ switch (heal_op) {
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BIGGER_FILE:
+ if (type == AFR_METADATA_TRANSACTION) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SUSE_SOURCE_BRICK_TO_HEAL);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ afr_mark_largest_file_as_source(this, sources, replies);
+ if (AFR_COUNT(sources, priv->child_count) != 1) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SNO_BIGGER_FILE);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_LATEST_MTIME:
+ if (type == AFR_METADATA_TRANSACTION) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SUSE_SOURCE_BRICK_TO_HEAL);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ afr_mark_latest_mtime_file_as_source(this, sources, replies);
+ if (AFR_COUNT(sources, priv->child_count) != 1) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SNO_DIFF_IN_MTIME);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ break;
+ case GF_SHD_OP_SBRAIN_HEAL_FROM_BRICK:
+ ret = dict_get_str_sizen(xdata_req, "child-name", &name);
+ if (ret)
+ goto out;
+ source = afr_get_child_index_from_name(this, name);
+ if (source < 0) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SINVALID_BRICK_NAME);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ if (locked_on[source] != 1) {
+ ret = dict_set_sizen_str_sizen(xdata_rsp, "sh-fail-msg",
+ SBRICK_IS_NOT_UP);
+ if (!ret)
+ ret = -1;
+ goto out;
+ }
+ memset(sources, 0, sizeof(*sources) * priv->child_count);
+ sources[source] = 1;
+ break;
+ default:
+ ret = -1;
+ goto out;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ break;
+ }
+ }
+ sinks[source] = 0;
+ healed_sinks[source] = 0;
+ ret = source;
out:
- if (witnesses)
- GF_FREE (witnesses);
- return nsources;
+ if (ret < 0)
+ memset(sources, 0, sizeof(*sources) * priv->child_count);
+ return ret;
}
int
-afr_mark_child_as_source_by_uid (int32_t *sources, struct iatt *bufs,
- int32_t *success_children,
- unsigned int child_count, uint32_t uid)
-{
- int i = 0;
- int nsources = 0;
- int child = 0;
-
- for (i = 0; i < child_count; i++) {
- if (-1 == success_children[i])
- break;
-
- child = success_children[i];
- if (uid == bufs[child].ia_uid) {
- sources[child] = 1;
- nsources++;
+afr_sh_fav_by_majority(xlator_t *this, struct afr_reply *replies,
+ inode_t *inode)
+{
+ afr_private_t *priv;
+ int vote_count = -1;
+ int fav_child = -1;
+ int i = 0;
+ int k = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid == 1) {
+ gf_msg_debug(this->name, 0,
+ "Child:%s mtime_sec = %" PRId64 ", size = %" PRIu64
+ " for gfid %s",
+ priv->children[i]->name, replies[i].poststat.ia_mtime,
+ replies[i].poststat.ia_size, uuid_utoa(inode->gfid));
+ vote_count = 0;
+ for (k = 0; k < priv->child_count; k++) {
+ if ((replies[k].poststat.ia_mtime ==
+ replies[i].poststat.ia_mtime) &&
+ (replies[k].poststat.ia_size ==
+ replies[i].poststat.ia_size)) {
+ vote_count++;
}
+ }
+ if (vote_count > priv->child_count / 2) {
+ fav_child = i;
+ break;
+ }
}
- return nsources;
+ }
+ return fav_child;
}
+/*
+ * afr_sh_fav_by_mtime: Choose favorite child by mtime.
+ */
int
-afr_get_child_with_lowest_uid (struct iatt *bufs, int32_t *success_children,
- unsigned int child_count)
-{
- int i = 0;
- int smallest = -1;
- int child = 0;
-
- for (i = 0; i < child_count; i++) {
- if (-1 == success_children[i])
- break;
- child = success_children[i];
- if ((smallest == -1) ||
- (bufs[child].ia_uid < bufs[smallest].ia_uid)) {
- smallest = child;
- }
- }
- return smallest;
+afr_sh_fav_by_mtime(xlator_t *this, struct afr_reply *replies, inode_t *inode)
+{
+ afr_private_t *priv;
+ int fav_child = -1;
+ int i = 0;
+ uint32_t cmp_mtime = 0;
+ uint32_t cmp_mtime_nsec = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid == 1) {
+ gf_msg_debug(this->name, 0,
+ "Child:%s mtime = %" PRId64
+ ", mtime_nsec = %d for "
+ "gfid %s",
+ priv->children[i]->name, replies[i].poststat.ia_mtime,
+ replies[i].poststat.ia_mtime_nsec,
+ uuid_utoa(inode->gfid));
+ if (replies[i].poststat.ia_mtime > cmp_mtime) {
+ cmp_mtime = replies[i].poststat.ia_mtime;
+ cmp_mtime_nsec = replies[i].poststat.ia_mtime_nsec;
+ fav_child = i;
+ } else if ((replies[i].poststat.ia_mtime == cmp_mtime) &&
+ (replies[i].poststat.ia_mtime_nsec > cmp_mtime_nsec)) {
+ cmp_mtime = replies[i].poststat.ia_mtime;
+ cmp_mtime_nsec = replies[i].poststat.ia_mtime_nsec;
+ fav_child = i;
+ }
+ }
+ }
+ return fav_child;
}
-static int
-afr_sh_mark_lowest_uid_as_source (struct iatt *bufs, int32_t *success_children,
- int child_count, int32_t *sources)
-{
- int nsources = 0;
- int smallest = 0;
+/*
+ * afr_sh_fav_by_ctime: Choose favorite child by ctime.
+ */
+int
+afr_sh_fav_by_ctime(xlator_t *this, struct afr_reply *replies, inode_t *inode)
+{
+ afr_private_t *priv;
+ int fav_child = -1;
+ int i = 0;
+ uint32_t cmp_ctime = 0;
+ uint32_t cmp_ctime_nsec = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid == 1) {
+ gf_msg_debug(this->name, 0,
+ "Child:%s ctime = %" PRId64
+ ", ctime_nsec = %d for "
+ "gfid %s",
+ priv->children[i]->name, replies[i].poststat.ia_ctime,
+ replies[i].poststat.ia_ctime_nsec,
+ uuid_utoa(inode->gfid));
+ if (replies[i].poststat.ia_ctime > cmp_ctime) {
+ cmp_ctime = replies[i].poststat.ia_ctime;
+ cmp_ctime_nsec = replies[i].poststat.ia_ctime_nsec;
+ fav_child = i;
+ } else if ((replies[i].poststat.ia_ctime == cmp_ctime) &&
+ (replies[i].poststat.ia_ctime_nsec > cmp_ctime_nsec)) {
+ cmp_ctime = replies[i].poststat.ia_ctime;
+ cmp_ctime_nsec = replies[i].poststat.ia_ctime_nsec;
+ fav_child = i;
+ }
+ }
+ }
+ return fav_child;
+}
- smallest = afr_get_child_with_lowest_uid (bufs, success_children,
- child_count);
- if (smallest < 0) {
- nsources = -1;
- goto out;
- }
- nsources = afr_mark_child_as_source_by_uid (sources, bufs,
- success_children, child_count,
- bufs[smallest].ia_uid);
-out:
- return nsources;
+/*
+ * afr_sh_fav_by_size: Choose favorite child by size
+ * when not all files are of zero size.
+ */
+int
+afr_sh_fav_by_size(xlator_t *this, struct afr_reply *replies, inode_t *inode)
+{
+ afr_private_t *priv;
+ int fav_child = -1;
+ int i = 0;
+ uint64_t cmp_sz = 0;
+
+ priv = this->private;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid) {
+ continue;
+ }
+ gf_msg_debug(this->name, 0,
+ "Child:%s file size = %" PRIu64 " for gfid %s",
+ priv->children[i]->name, replies[i].poststat.ia_size,
+ uuid_utoa(inode->gfid));
+ if (replies[i].poststat.ia_type == IA_IFDIR) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ "Cannot perform selfheal on %s. "
+ "Size policy is not applicable to directories.",
+ uuid_utoa(inode->gfid));
+ break;
+ }
+ if (replies[i].poststat.ia_size > cmp_sz) {
+ cmp_sz = replies[i].poststat.ia_size;
+ fav_child = i;
+ } else if (replies[i].poststat.ia_size == cmp_sz) {
+ fav_child = -1;
+ }
+ }
+ if (fav_child == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "No bigger file");
+ }
+ return fav_child;
}
int
-afr_get_no_xattr_dir_read_child (xlator_t *this, int32_t *success_children,
- struct iatt *bufs)
+afr_sh_get_fav_by_policy(xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, char **policy_str)
{
- afr_private_t *priv = NULL;
- int i = 0;
- int child = -1;
- int read_child = -1;
+ afr_private_t *priv = NULL;
+ int fav_child = -1;
- priv = this->private;
- for (i = 0; i < priv->child_count; i++) {
- child = success_children[i];
- if (child < 0)
- break;
- if (read_child < 0)
- read_child = child;
- else if (bufs[read_child].ia_size < bufs[child].ia_size)
- read_child = child;
- }
- return read_child;
+ priv = this->private;
+ if (!afr_can_decide_split_brain_source_sinks(replies, priv->child_count)) {
+ return -1;
+ }
+
+ switch (priv->fav_child_policy) {
+ case AFR_FAV_CHILD_BY_SIZE:
+ fav_child = afr_sh_fav_by_size(this, replies, inode);
+ if (policy_str && fav_child >= 0) {
+ *policy_str = "SIZE";
+ }
+ break;
+ case AFR_FAV_CHILD_BY_CTIME:
+ fav_child = afr_sh_fav_by_ctime(this, replies, inode);
+ if (policy_str && fav_child >= 0) {
+ *policy_str = "CTIME";
+ }
+ break;
+ case AFR_FAV_CHILD_BY_MTIME:
+ fav_child = afr_sh_fav_by_mtime(this, replies, inode);
+ if (policy_str && fav_child >= 0) {
+ *policy_str = "MTIME";
+ }
+ break;
+ case AFR_FAV_CHILD_BY_MAJORITY:
+ fav_child = afr_sh_fav_by_majority(this, replies, inode);
+ if (policy_str && fav_child >= 0) {
+ *policy_str = "MAJORITY";
+ }
+ break;
+ case AFR_FAV_CHILD_NONE:
+ default:
+ break;
+ }
+
+ return fav_child;
}
int
-afr_sh_mark_zero_size_file_as_sink (struct iatt *bufs, int32_t *success_children,
- int child_count, int32_t *sources)
-{
- int nsources = 0;
- int i = 0;
- int child = 0;
- gf_boolean_t sink_exists = _gf_false;
- gf_boolean_t source_exists = _gf_false;
- int source = -1;
-
- for (i = 0; i < child_count; i++) {
- child = success_children[i];
- if (child < 0)
- break;
- if (!bufs[child].ia_size) {
- sink_exists = _gf_true;
- continue;
- }
- if (!source_exists) {
- source_exists = _gf_true;
- source = child;
- continue;
- }
- if (bufs[source].ia_size != bufs[child].ia_size) {
- nsources = -1;
- goto out;
- }
- }
- if (!source_exists && !sink_exists) {
- nsources = -1;
- goto out;
- }
-
- if (!source_exists || !sink_exists)
- goto out;
-
- for (i = 0; i < child_count; i++) {
- child = success_children[i];
- if (child < 0)
- break;
- if (bufs[child].ia_size) {
- sources[child] = 1;
- nsources++;
- }
- }
-out:
- return nsources;
+afr_mark_split_brain_source_sinks_by_policy(
+ call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on,
+ struct afr_reply *replies, afr_transaction_type type)
+{
+ afr_private_t *priv = NULL;
+ int fav_child = -1;
+ char mtime_str[256];
+ char ctime_str[256];
+ char *policy_str = NULL;
+ struct tm *tm_ptr;
+ time_t time;
+
+ priv = this->private;
+
+ fav_child = afr_sh_get_fav_by_policy(this, replies, inode, &policy_str);
+ if (fav_child == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ "No child selected by favorite-child policy.");
+ } else if (fav_child > priv->child_count - 1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ "Invalid child (%d) "
+ "selected by policy %s.",
+ fav_child, policy_str);
+ } else if (fav_child >= 0) {
+ time = replies[fav_child].poststat.ia_mtime;
+ tm_ptr = localtime(&time);
+ strftime(mtime_str, sizeof(mtime_str), "%Y-%m-%d %H:%M:%S", tm_ptr);
+ time = replies[fav_child].poststat.ia_ctime;
+ tm_ptr = localtime(&time);
+ strftime(ctime_str, sizeof(ctime_str), "%Y-%m-%d %H:%M:%S", tm_ptr);
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SBRAIN_FAV_CHILD_POLICY,
+ "Source %s selected as authentic to resolve conflicting data "
+ "in file (gfid:%s) by %s (%" PRIu64
+ " bytes @ %s mtime, %s "
+ "ctime).",
+ priv->children[fav_child]->name, uuid_utoa(inode->gfid),
+ policy_str, replies[fav_child].poststat.ia_size, mtime_str,
+ ctime_str);
+
+ sources[fav_child] = 1;
+ sinks[fav_child] = 0;
+ healed_sinks[fav_child] = 0;
+ }
+ return fav_child;
}
-char *
-afr_get_character_str (afr_node_type type)
+gf_boolean_t
+afr_is_file_empty_on_all_children(afr_private_t *priv,
+ struct afr_reply *replies)
{
- char *character = NULL;
+ int i = 0;
- switch (type) {
- case AFR_NODE_INNOCENT:
- character = "innocent";
- break;
- case AFR_NODE_FOOL:
- character = "fool";
- break;
- case AFR_NODE_WISE:
- character = "wise";
- break;
- default:
- character = "invalid";
- break;
- }
- return character;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if ((!replies[i].valid) || (replies[i].op_ret != 0) ||
+ (replies[i].poststat.ia_size != 0))
+ return _gf_false;
+ }
-afr_node_type
-afr_find_child_character_type (int32_t *pending_row, int32_t child,
- unsigned int child_count)
-{
- afr_node_type type = AFR_NODE_INVALID;
+ return _gf_true;
+}
- GF_ASSERT ((child >= 0) && (child < child_count));
+int
+afr_mark_source_sinks_if_file_empty(xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ afr_transaction_type type)
+{
+ int source = -1;
+ int i = 0;
+ afr_private_t *priv = this->private;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ if ((AFR_COUNT(locked_on, priv->child_count) < priv->child_count) ||
+ (afr_success_count(replies, priv->child_count) < priv->child_count))
+ return -1;
- if (afr_sh_is_innocent (pending_row, child_count))
- type = AFR_NODE_INNOCENT;
- else if (afr_sh_is_fool (pending_row, child, child_count))
- type = AFR_NODE_FOOL;
- else if (afr_sh_is_wise (pending_row, child, child_count))
- type = AFR_NODE_WISE;
- return type;
+ if (type == AFR_DATA_TRANSACTION) {
+ if (!afr_is_file_empty_on_all_children(priv, replies))
+ return -1;
+ goto mark;
+ }
+
+ /*For AFR_METADATA_TRANSACTION, metadata must be same on all bricks.*/
+ stbuf = replies[0].poststat;
+ for (i = 1; i < priv->child_count; i++) {
+ if ((!IA_EQUAL(stbuf, replies[i].poststat, type)) ||
+ (!IA_EQUAL(stbuf, replies[i].poststat, uid)) ||
+ (!IA_EQUAL(stbuf, replies[i].poststat, gid)) ||
+ (!IA_EQUAL(stbuf, replies[i].poststat, prot)))
+ return -1;
+ }
+ for (i = 1; i < priv->child_count; i++) {
+ if (!afr_xattrs_are_equal(replies[0].xdata, replies[i].xdata))
+ return -1;
+ }
+
+mark:
+ /* data/metadata is same on all bricks. Pick one of them as source. Rest
+ * are sinks.*/
+ for (i = 0; i < priv->child_count; i++) {
+ if (source == -1) {
+ source = i;
+ sources[i] = 1;
+ sinks[i] = 0;
+ healed_sinks[i] = 0;
+ continue;
+ }
+ sources[i] = 0;
+ sinks[i] = 1;
+ healed_sinks[i] = 1;
+ }
+
+ return source;
+}
+
+/* Return a source depending on the type of heal_op, and set sources[source],
+ * sinks[source] and healed_sinks[source] to 1, 0 and 0 respectively. Do so
+ * only if the following condition is met:
+ * ∀i((i ∈ locked_on[] ∧ i=1)==>(sources[i]=0 ∧ sinks[i]=1 ∧ healed_sinks[i]=1))
+ * i.e. for each locked node, sources[node] is 0; healed_sinks[node] and
+ * sinks[node] are 1. This should be the case if the file is in split-brain.
+ */
+int
+afr_mark_split_brain_source_sinks(
+ call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on,
+ struct afr_reply *replies, afr_transaction_type type)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata_req = NULL;
+ int heal_op = -1;
+ int ret = -1;
+ int source = -1;
+
+ local = frame->local;
+ priv = this->private;
+ xdata_req = local->xdata_req;
+
+ source = afr_mark_source_sinks_if_file_empty(
+ this, sources, sinks, healed_sinks, locked_on, replies, type);
+ if (source >= 0)
+ return source;
+
+ ret = dict_get_int32_sizen(xdata_req, "heal-op", &heal_op);
+ if (ret)
+ goto autoheal;
+
+ source = afr_mark_split_brain_source_sinks_by_heal_op(
+ frame, this, sources, sinks, healed_sinks, locked_on, replies, type,
+ heal_op);
+ return source;
+
+autoheal:
+ /* Automatically heal if fav_child_policy is set. */
+ if (priv->fav_child_policy != AFR_FAV_CHILD_NONE) {
+ source = afr_mark_split_brain_source_sinks_by_policy(
+ frame, this, inode, sources, sinks, healed_sinks, locked_on,
+ replies, type);
+ if (source != -1) {
+ ret = dict_set_int32_sizen(xdata_req, "fav-child-policy", 1);
+ if (ret)
+ return -1;
+ }
+ }
+
+ return source;
}
int
-afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
- int32_t **pending_matrix, int32_t *sources,
- int32_t *success_children, afr_transaction_type type,
- int32_t *subvol_status, gf_boolean_t ignore_ignorant)
-{
- afr_private_t *priv = NULL;
- afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
- int nsources = -1;
- unsigned char *ignorant_subvols = NULL;
- unsigned int child_count = 0;
+_afr_fav_child_reset_sink_xattrs(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int source,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type,
+ unsigned char *locked_on,
+ struct afr_reply *replies)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int *input_dirty = NULL;
+ int **input_matrix = NULL;
+ int *output_dirty = NULL;
+ int **output_matrix = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xdata = NULL;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (!dict_get_sizen(local->xdata_req, "fav-child-policy"))
+ return 0;
- priv = this->private;
- child_count = priv->child_count;
+ xdata = dict_new();
+ if (!xdata)
+ return -1;
- if (afr_get_children_count (success_children, priv->child_count) == 0)
- goto out;
+ input_dirty = alloca0(priv->child_count * sizeof(int));
+ input_matrix = ALLOC_MATRIX(priv->child_count, int);
+ output_dirty = alloca0(priv->child_count * sizeof(int));
+ output_matrix = ALLOC_MATRIX(priv->child_count, int);
- if (!ignore_ignorant) {
- ignorant_subvols = GF_CALLOC (sizeof (*ignorant_subvols),
- child_count, gf_afr_mt_char);
- if (NULL == ignorant_subvols)
- goto out;
- }
+ afr_selfheal_extract_xattr(this, replies, type, input_dirty, input_matrix);
- afr_build_pending_matrix (priv->pending_key, pending_matrix,
- ignorant_subvols, xattr, type,
- priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || !healed_sinks[i])
+ continue;
+ output_dirty[i] = -input_dirty[i];
+ output_matrix[i][source] = -input_matrix[i][source];
+ }
- if (!ignore_ignorant)
- afr_mark_ignorant_subvols_as_pending (pending_matrix,
- ignorant_subvols,
- priv->child_count);
- sh_type = afr_self_heal_type_for_transaction (type);
- if (AFR_SELF_HEAL_INVALID == sh_type)
- goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i] || !locked_on[i])
+ continue;
+ xattr = afr_selfheal_output_xattr(this, _gf_false, type, output_dirty,
+ output_matrix, i, NULL);
- afr_sh_print_pending_matrix (pending_matrix, this);
+ afr_selfheal_post_op(frame, this, inode, i, xattr, xdata);
- nsources = afr_mark_sources (this, sources, pending_matrix, bufs,
- sh_type, success_children, subvol_status);
-out:
- GF_FREE (ignorant_subvols);
- return nsources;
+ undid_pending[i] = 1;
+ dict_unref(xattr);
+ }
+
+ if (xdata)
+ dict_unref(xdata);
+
+ return 0;
}
-void
-afr_find_character_types (afr_node_character *characters,
- int32_t **pending_matrix, int32_t *success_children,
- unsigned int child_count)
-{
- afr_node_type type = AFR_NODE_INVALID;
- int child = 0;
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- child = success_children[i];
- if (child == -1)
- break;
- type = afr_find_child_character_type (pending_matrix[child],
- child, child_count);
- characters[child].type = type;
- }
+gf_boolean_t
+afr_does_witness_exist(xlator_t *this, uint64_t *witness)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (witness[i])
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+unsigned int
+afr_get_quorum_count(afr_private_t *priv)
+{
+ if (priv->quorum_count == AFR_QUORUM_AUTO) {
+ return priv->child_count / 2 + 1;
+ } else {
+ return priv->quorum_count;
+ }
}
void
-afr_mark_success_children_sources (int32_t *sources, int32_t *success_children,
- unsigned int child_count)
-{
- int i = 0;
- for (i = 0; i < child_count; i++) {
- if (success_children[i] == -1)
- break;
- sources[success_children[i]] = 1;
+afr_selfheal_post_op_failure_accounting(afr_private_t *priv, char *accused,
+ unsigned char *sources,
+ unsigned char *locked_on)
+{
+ int i = 0;
+ unsigned int quorum_count = 0;
+
+ if (AFR_COUNT(sources, priv->child_count) != 0)
+ return;
+
+ quorum_count = afr_get_quorum_count(priv);
+ for (i = 0; i < priv->child_count; i++) {
+ if ((accused[i] < quorum_count) && locked_on[i]) {
+ sources[i] = 1;
}
+ }
+ return;
}
-/**
- * mark_sources: Mark all 'source' nodes and return number of source
- * nodes found
+
+/*
+ * This function determines if a self-heal is required for a given inode,
+ * and if needed, in what direction.
*
- * A node (a row in the pending matrix) belongs to one of
- * three categories:
+ * locked_on[] is the array representing servers which have been locked and
+ * from which xattrs have been fetched for analysis.
*
- * M is the pending matrix.
+ * The output of the function is by filling the arrays sources[] and sinks[].
*
- * 'innocent' - M[i] is all zeroes
- * 'fool' - M[i] has i'th element = 1 (self-reference)
- * 'wise' - M[i] has i'th element = 0, others are 1 or 0.
+ * sources[i] is set if i'th server is an eligible source for a selfheal.
*
- * All 'innocent' nodes are sinks. If all nodes are innocent, no self-heal is
- * needed.
+ * sinks[i] is set if i'th server needs to be healed.
*
- * A 'wise' node can be a source. If two 'wise' nodes conflict, it is
- * a split-brain. If one wise node refers to the other but the other doesn't
- * refer back, the referrer is a source.
+ * if sources[0..N] are all set, there is no need for a selfheal.
+ *
+ * if sinks[0..N] are all set, the inode is in split brain.
*
- * All fools are sinks, unless there are no 'wise' nodes. In that case,
- * one of the fools is made a source.
*/
int
-afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
- struct iatt *bufs, afr_self_heal_type type,
- int32_t *success_children, int32_t *subvol_status)
-{
- /* stores the 'characters' (innocent, fool, wise) of the nodes */
- afr_node_character *characters = NULL;
- int nsources = -1;
- unsigned int child_count = 0;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- child_count = priv->child_count;
- characters = GF_CALLOC (sizeof (afr_node_character),
- child_count, gf_afr_mt_afr_node_character);
- if (!characters)
- goto out;
-
- this = THIS;
-
- /* start clean */
- memset (sources, 0, sizeof (*sources) * child_count);
- nsources = 0;
- afr_find_character_types (characters, pending_matrix, success_children,
- child_count);
- if (afr_sh_all_nodes_innocent (characters, child_count)) {
- switch (type) {
- case AFR_SELF_HEAL_METADATA:
- nsources = afr_sh_mark_lowest_uid_as_source (bufs,
- success_children,
- child_count,
- sources);
- break;
- case AFR_SELF_HEAL_DATA:
- nsources = afr_sh_mark_zero_size_file_as_sink (bufs,
- success_children,
- child_count,
- sources);
- if ((nsources < 0) && subvol_status)
- *subvol_status |= SPLIT_BRAIN;
- break;
- default:
- break;
- }
- goto out;
+afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ afr_transaction_type type, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ uint64_t *witness, unsigned char *pflag)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int j = 0;
+ int *dirty = NULL; /* Denotes if dirty xattr is set */
+ int **matrix = NULL; /* Changelog matrix */
+ char *accused = NULL; /* Accused others without any self-accusal */
+ char *pending = NULL; /* Have pending operations on others */
+ char *self_accused = NULL; /* Accused itself */
+
+ priv = this->private;
+
+ dirty = alloca0(priv->child_count * sizeof(int));
+ accused = alloca0(priv->child_count);
+ pending = alloca0(priv->child_count);
+ self_accused = alloca0(priv->child_count);
+ matrix = ALLOC_MATRIX(priv->child_count, int);
+ memset(witness, 0, sizeof(*witness) * priv->child_count);
+
+ /* First construct the pending matrix for further analysis */
+ afr_selfheal_extract_xattr(this, replies, type, dirty, matrix);
+
+ if (pflag) {
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++)
+ if (matrix[i][j])
+ *pflag |= PFLAG_PENDING;
+ if (*pflag)
+ break;
}
-
- if (afr_sh_wise_nodes_exist (characters, child_count)) {
- afr_sh_compute_wisdom (pending_matrix, characters, child_count);
-
- if (afr_sh_wise_nodes_conflict (characters, child_count)) {
- if (subvol_status)
- *subvol_status |= SPLIT_BRAIN;
- nsources = -1;
- } else {
- nsources = afr_sh_mark_wisest_as_sources (sources,
- characters,
- child_count);
- }
- } else {
- if (subvol_status)
- *subvol_status |= ALL_FOOLS;
- nsources = afr_mark_biggest_of_fools_as_source (sources,
- pending_matrix,
- characters,
- child_count);
+ }
+
+ if (afr_success_count(replies, priv->child_count) < priv->child_count) {
+ /* Treat this just like locks not being acquired */
+ return -ENOTCONN;
+ }
+
+ /* short list all self-accused */
+ for (i = 0; i < priv->child_count; i++) {
+ if (matrix[i][i])
+ self_accused[i] = 1;
+ }
+
+ /* Next short list all accused to exclude them from being sources */
+ /* Self-accused can't accuse others as they are FOOLs */
+ for (i = 0; i < priv->child_count; i++) {
+ for (j = 0; j < priv->child_count; j++) {
+ if (matrix[i][j]) {
+ if (!self_accused[i])
+ accused[j] += 1;
+ if (i != j)
+ pending[i] += 1;
+ }
+ }
+ }
+
+ /* Short list all non-accused as sources */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!accused[i] && locked_on[i])
+ sources[i] = 1;
+ else
+ sources[i] = 0;
+ }
+
+ /* Everyone accused by non-self-accused sources are sinks */
+ memset(sinks, 0, priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (self_accused[i])
+ continue;
+ for (j = 0; j < priv->child_count; j++) {
+ if (matrix[i][j])
+ sinks[j] = 1;
+ }
+ }
+
+ /* For breaking ties provide with number of fops they witnessed */
+
+ /*
+ * count the pending fops witnessed from itself to others when it is
+ * self-accused
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!self_accused[i])
+ continue;
+ for (j = 0; j < priv->child_count; j++) {
+ if (i == j)
+ continue;
+ witness[i] += matrix[i][j];
+ }
+ }
+
+ if (type == AFR_DATA_TRANSACTION || type == AFR_METADATA_TRANSACTION)
+ afr_selfheal_post_op_failure_accounting(priv, accused, sources,
+ locked_on);
+
+ /* If no sources, all locked nodes are sinks - split brain */
+ if (AFR_COUNT(sources, priv->child_count) == 0) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i])
+ sinks[i] = 1;
+ }
+ if (pflag)
+ *pflag |= PFLAG_SBRAIN;
+ }
+
+ /* One more class of witness similar to dirty in v2 is where no pending
+ * exists but we have self-accusing markers. This can happen in afr-v1
+ * if the brick crashes just after doing xattrop on self but
+ * before xattrop on the other xattrs on the brick in pre-op. */
+ if (AFR_COUNT(pending, priv->child_count) == 0) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (self_accused[i])
+ witness[i] += matrix[i][i];
+ }
+ } else {
+ /* In afr-v1 if a file is self-accused and has pending
+ * operations on others then it is similar to 'dirty' in afr-v2.
+ * Consider such cases as witness.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (self_accused[i] && pending[i])
+ witness[i] += matrix[i][i];
}
+ }
-out:
- if (nsources == 0)
- afr_mark_success_children_sources (sources, success_children,
- child_count);
- GF_FREE (characters);
+ /* count the number of dirty fops witnessed */
+ for (i = 0; i < priv->child_count; i++)
+ witness[i] += dirty[i];
- gf_log (this->name, GF_LOG_DEBUG, "Number of sources: %d", nsources);
- return nsources;
+ return 0;
}
void
-afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], unsigned char success[],
- int child_count, afr_transaction_type type)
-{
- int i = 0;
- int j = 0;
-
- afr_build_pending_matrix (priv->pending_key, delta_matrix, NULL,
- xattr, type, priv->child_count);
- for (i = 0; i < priv->child_count; i++)
- for (j = 0; j < priv->child_count; j++)
- delta_matrix[i][j] = -delta_matrix[i][j];
+afr_log_selfheal(uuid_t gfid, xlator_t *this, int ret, char *type, int source,
+ unsigned char *sources, unsigned char *healed_sinks)
+{
+ char *status = NULL;
+ char *sinks_str = NULL;
+ char *p = NULL;
+ char *sources_str = NULL;
+ char *q = NULL;
+ afr_private_t *priv = NULL;
+ gf_loglevel_t loglevel = GF_LOG_NONE;
+ int i = 0;
+
+ priv = this->private;
+ sinks_str = alloca0(priv->child_count * 8);
+ p = sinks_str;
+ sources_str = alloca0(priv->child_count * 8);
+ q = sources_str;
+ for (i = 0; i < priv->child_count; i++) {
+ if (healed_sinks[i])
+ p += sprintf(p, "%d ", i);
+ if (sources[i]) {
+ if (source == i) {
+ q += sprintf(q, "[%d] ", i);
+ } else {
+ q += sprintf(q, "%d ", i);
+ }
+ }
+ }
+
+ if (ret < 0) {
+ status = "Failed";
+ loglevel = GF_LOG_DEBUG;
+ } else {
+ status = "Completed";
+ loglevel = GF_LOG_INFO;
+ }
+
+ gf_msg(this->name, loglevel, 0, AFR_MSG_SELF_HEAL_INFO,
+ "%s %s selfheal on %s. "
+ "sources=%s sinks=%s",
+ status, type, uuid_utoa(gfid), sources_str, sinks_str);
}
-
int
-afr_sh_delta_to_xattr (xlator_t *this,
- int32_t *delta_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type)
-{
- int i = 0;
- int j = 0;
- int k = 0;
- int ret = 0;
- int32_t *pending = NULL;
- int32_t *local_pending = NULL;
- afr_private_t *priv = NULL;
-
- priv = this->private;
- for (i = 0; i < child_count; i++) {
- if (!xattr[i])
- continue;
-
- local_pending = NULL;
- for (j = 0; j < child_count; j++) {
- pending = GF_CALLOC (sizeof (int32_t), 3,
- gf_afr_mt_int32_t);
-
- if (!pending)
- continue;
- /* 3 = data+metadata+entry */
-
- k = afr_index_for_transaction_type (type);
-
- pending[k] = hton32 (delta_matrix[i][j]);
-
- if (j == i) {
- local_pending = pending;
- continue;
- }
- ret = dict_set_bin (xattr[i], priv->pending_key[j],
- pending,
- AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value.");
- GF_FREE (pending);
- }
- }
- if (local_pending) {
- ret = dict_set_bin (xattr[i], priv->pending_key[i],
- local_pending,
- AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value.");
- GF_FREE (local_pending);
- }
- }
- }
- return 0;
-}
-
-
-int
-afr_sh_has_metadata_pending (dict_t *xattr, xlator_t *this)
+afr_selfheal_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *parbuf)
{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
+ afr_local_t *local = NULL;
+ int i = -1;
+ GF_UNUSED int ret = -1;
+ int8_t need_heal = 1;
- priv = this->private;
+ local = frame->local;
+ i = (long)cookie;
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (buf)
+ local->replies[i].poststat = *buf;
+ if (parbuf)
+ local->replies[i].postparent = *parbuf;
+ if (xdata) {
+ local->replies[i].xdata = dict_ref(xdata);
+ ret = dict_get_int8(xdata, "link-count", &need_heal);
+ }
- if (ret != 0)
- return 0;
+ local->replies[i].need_heal = need_heal;
+ syncbarrier_wake(&local->barrier);
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
+ return 0;
+}
- if (pending[j])
- return 1;
- }
+inode_t *
+afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent,
+ const char *name, struct afr_reply *replies,
+ unsigned char *lookup_on, dict_t *xattr)
+{
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr_req = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
- return 0;
-}
+ local = frame->local;
+ priv = frame->this->private;
+ xattr_req = dict_new();
+ if (!xattr_req)
+ return NULL;
-int
-afr_sh_has_data_pending (dict_t *xattr, xlator_t *this)
-{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
+ if (xattr)
+ dict_copy(xattr, xattr_req);
- priv = this->private;
+ if (afr_xattr_req_prepare(frame->this, xattr_req) != 0) {
+ dict_unref(xattr_req);
+ return NULL;
+ }
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
+ inode = inode_new(parent->table);
+ if (!inode) {
+ dict_unref(xattr_req);
+ return NULL;
+ }
- if (ret != 0)
- return 0;
+ loc.parent = inode_ref(parent);
+ gf_uuid_copy(loc.pargfid, parent->gfid);
+ loc.name = name;
+ loc.inode = inode_ref(inode);
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
+ AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xattr_req);
- if (pending[j])
- return 1;
- }
+ afr_replies_copy(replies, local->replies, priv->child_count);
- return 0;
-}
+ loc_wipe(&loc);
+ dict_unref(xattr_req);
+ return inode;
+}
-int
-afr_sh_has_entry_pending (dict_t *xattr, xlator_t *this)
+static int
+afr_set_multi_dom_lock_count_request(xlator_t *this, dict_t *dict)
{
- /* Indexable by result of afr_index_for_transaction_type(): 0 -- 2. */
- int32_t pending[3] = {0,};
- void *pending_raw = NULL;
- afr_private_t *priv = NULL;
- int ret = -1;
- int i = 0;
- int j = 0;
-
- priv = this->private;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ char *key1 = NULL;
+ char *key2 = NULL;
- for (i = 0; i < priv->child_count; i++) {
- ret = dict_get_ptr (xattr, priv->pending_key[i],
- &pending_raw);
+ priv = this->private;
+ key1 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(this->name));
+ key2 = alloca0(strlen(GLUSTERFS_INODELK_DOM_PREFIX) + 2 +
+ strlen(priv->sh_domain));
- if (ret != 0)
- return 0;
+ ret = dict_set_uint32(dict, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS, 1);
+ if (ret)
+ return ret;
- memcpy (pending, pending_raw, sizeof(pending));
- j = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
+ sprintf(key1, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, this->name);
+ ret = dict_set_uint32(dict, key1, 1);
+ if (ret)
+ return ret;
- if (pending[j])
- return 1;
- }
+ sprintf(key2, "%s:%s", GLUSTERFS_INODELK_DOM_PREFIX, priv->sh_domain);
+ ret = dict_set_uint32(dict, key2, 1);
+ if (ret)
+ return ret;
- return 0;
+ return 0;
}
int
-afr_sh_missing_entries_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies,
+ unsigned char *discover_on, dict_t *dict)
+{
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr_req = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = frame->this->private;
+
+ xattr_req = dict_new();
+ if (!xattr_req)
+ return -ENOMEM;
+ if (dict)
+ dict_copy(dict, xattr_req);
+
+ if (afr_xattr_req_prepare(frame->this, xattr_req) != 0) {
+ dict_unref(xattr_req);
+ return -ENOMEM;
+ }
+
+ if (afr_set_multi_dom_lock_count_request(frame->this, xattr_req)) {
+ dict_unref(xattr_req);
+ return -1;
+ }
- local = frame->local;
- sh = &local->self_heal;
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, gfid);
- afr_sh_reset (frame, this);
+ AFR_ONLIST(discover_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xattr_req);
- if (local->govinda_gOvinda) {
- gf_log (this->name, GF_LOG_INFO,
- "split brain found, aborting selfheal of %s",
- local->loc.path);
- sh->op_failed = 1;
- }
+ afr_replies_copy(replies, local->replies, priv->child_count);
- if (sh->op_failed) {
- sh->completion_cbk (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to metadata check on %s",
- local->loc.path);
- afr_self_heal_metadata (frame, this);
- }
+ loc_wipe(&loc);
+ dict_unref(xattr_req);
- return 0;
+ return 0;
}
-
-static int
-afr_sh_missing_entries_finish (call_frame_t *frame, xlator_t *this)
+int
+afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
+ struct afr_reply *replies)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ afr_local_t *local = NULL;
+ dict_t *dict = NULL;
- local = frame->local;
- int_lock = &local->internal_lock;
+ local = frame->local;
- int_lock->lock_cbk = afr_sh_missing_entries_done;
- afr_unlock (frame, this);
+ if (local->xattr_req)
+ dict = local->xattr_req;
- return 0;
+ return afr_selfheal_unlocked_discover_on(frame, inode, gfid, replies,
+ local->child_up, dict);
}
-int
-afr_sh_common_create (afr_self_heal_t *sh, unsigned int child_count)
+unsigned int
+afr_success_count(struct afr_reply *replies, unsigned int count)
{
- int ret = -ENOMEM;
- sh->buf = GF_CALLOC (child_count, sizeof (*sh->buf),
- gf_afr_mt_iatt);
- if (!sh->buf)
- goto out;
- sh->parentbufs = GF_CALLOC (child_count, sizeof (*sh->parentbufs),
- gf_afr_mt_iatt);
- if (!sh->parentbufs)
- goto out;
- sh->child_errno = GF_CALLOC (child_count, sizeof (*sh->child_errno),
- gf_afr_mt_int);
- if (!sh->child_errno)
- goto out;
- sh->success_children = afr_children_create (child_count);
- if (!sh->success_children)
- goto out;
- sh->fresh_children = afr_children_create (child_count);
- if (!sh->fresh_children)
- goto out;
- sh->xattr = GF_CALLOC (child_count, sizeof (*sh->xattr),
- gf_afr_mt_dict_t);
- if (!sh->xattr)
- goto out;
- ret = 0;
-out:
- return ret;
-}
+ int i = 0;
+ unsigned int success = 0;
-void
-afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent,
- loc_t *loc)
-{
- int child_index = 0;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == 0) {
- sh->buf[child_index] = *buf;
- sh->parentbufs[child_index] = *postparent;
- sh->success_children[sh->success_count] = child_index;
- sh->success_count++;
- sh->xattr[child_index] = dict_ref (xattr);
- } else {
- gf_log (this->name, GF_LOG_ERROR, "path %s on subvolume"
- " %s => -1 (%s)", loc->path,
- priv->children[child_index]->name,
- strerror (op_errno));
- local->self_heal.child_errno[child_index] = op_errno;
- }
- }
- UNLOCK (&frame->lock);
- return;
-}
-
-gf_boolean_t
-afr_valid_ia_type (ia_type_t ia_type)
-{
- switch (ia_type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- case IA_IFLNK:
- case IA_IFDIR:
- return _gf_true;
- default:
- return _gf_false;
- }
- return _gf_false;
+ for (i = 0; i < count; i++)
+ if (replies[i].valid && replies[i].op_ret == 0)
+ success++;
+ return success;
}
int
-afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
- int active_source, call_frame_t **impunge_frame)
+afr_selfheal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- afr_local_t *local = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int32_t op_errno = 0;
- afr_private_t *priv = NULL;
- int ret = 0;
- call_frame_t *new_frame = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
- op_errno = ENOMEM;
- priv = this->private;
- new_frame = copy_frame (frame);
- if (!new_frame) {
- goto out;
- }
+ local = frame->local;
+ i = (long)cookie;
- AFR_LOCAL_ALLOC_OR_GOTO (impunge_local, out);
-
- local = frame->local;
- new_frame->local = impunge_local;
- impunge_sh = &impunge_local->self_heal;
- impunge_sh->sh_frame = frame;
- impunge_sh->active_source = active_source;
- impunge_local->child_up = memdup (local->child_up,
- sizeof (*local->child_up) *
- priv->child_count);
- if (!impunge_local->child_up)
- goto out;
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
- impunge_local->pending = afr_matrix_create (priv->child_count,
- AFR_NUM_CHANGE_LOGS);
- if (!impunge_local->pending)
- goto out;
+ syncbarrier_wake(&local->barrier);
- ret = afr_sh_common_create (impunge_sh, priv->child_count);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
- op_errno = 0;
- *impunge_frame = new_frame;
-out:
- if (op_errno && new_frame)
- AFR_STACK_DESTROY (new_frame);
- return -op_errno;
-}
-
-void
-afr_sh_missing_entry_call_impunge_recreate (call_frame_t *frame, xlator_t *this,
- struct iatt *buf,
- struct iatt *postparent,
- afr_impunge_done_cbk_t impunge_done)
-{
- call_frame_t *impunge_frame = NULL;
- afr_local_t *local = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int ret = 0;
- unsigned int enoent_count = 0;
- afr_private_t *priv = NULL;
- int i = 0;
- int32_t op_errno = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- enoent_count = afr_errno_count (NULL, sh->child_errno,
- priv->child_count, ENOENT);
- if (!enoent_count) {
- gf_log (this->name, GF_LOG_INFO,
- "no missing files - %s. proceeding to metadata check",
- local->loc.path);
- goto out;
- }
- sh->impunge_done = impunge_done;
- ret = afr_impunge_frame_create (frame, this, sh->source, &impunge_frame);
- if (ret)
- goto out;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- loc_copy (&impunge_local->loc, &local->loc);
- ret = afr_build_parent_loc (&impunge_sh->parent_loc,
- &impunge_local->loc, &op_errno);
- if (ret) {
- ret = -op_errno;
- goto out;
- }
- impunge_local->call_count = enoent_count;
- impunge_sh->entrybuf = sh->buf[sh->source];
- impunge_sh->parentbuf = sh->parentbufs[sh->source];
- for (i = 0; i < priv->child_count; i++) {
- if (!impunge_local->child_up[i]) {
- impunge_sh->child_errno[i] = ENOTCONN;
- continue;
- }
- if (sh->child_errno[i] != ENOENT) {
- impunge_sh->child_errno[i] = EEXIST;
- continue;
- }
- }
- for (i = 0; i < priv->child_count; i++) {
- if (sh->child_errno[i] != ENOENT)
- continue;
- afr_sh_entry_impunge_create (impunge_frame, this, i);
- enoent_count--;
- }
- GF_ASSERT (!enoent_count);
- return;
-out:
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "impunge of %s failed, "
- "reason: %s", local->loc.path, strerror (-ret));
- sh->op_failed = 1;
- }
- afr_sh_missing_entries_finish (frame, this);
+ return 0;
}
int
-afr_sh_create_entry_cbk (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+afr_locked_fill(call_frame_t *frame, xlator_t *this, unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int count = 0;
- local = frame->local;
- sh = &local->self_heal;
- if (op_ret < 0)
- sh->op_failed = 1;
- afr_sh_missing_entries_finish (frame, this);
- return 0;
-}
+ local = frame->local;
+ priv = this->private;
-static int
-sh_missing_entries_create (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int type = 0;
- struct iatt *buf = NULL;
- struct iatt *postparent = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- buf = &sh->buf[sh->source];
- postparent = &sh->parentbufs[sh->source];
-
- type = buf->ia_type;
- if (!afr_valid_ia_type (type)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: unknown file type: 0%o", local->loc.path, type);
- local->govinda_gOvinda = 1;
- afr_sh_missing_entries_finish (frame, this);
- goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].valid && local->replies[i].op_ret == 0) {
+ locked_on[i] = 1;
+ count++;
+ } else {
+ locked_on[i] = 0;
}
+ }
- afr_sh_missing_entry_call_impunge_recreate (frame, this,
- buf, postparent,
- afr_sh_create_entry_cbk);
-out:
- return 0;
+ return count;
}
-void
-afr_sh_missing_entries_lookup_done (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- ia_type_t ia_type = IA_INVAL;
- int32_t nsources = 0;
- loc_t *loc = NULL;
- int32_t subvol_status = 0;
- afr_transaction_type txn_type = AFR_DATA_TRANSACTION;
- gf_boolean_t split_brain = _gf_false;
- int read_child = -1;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
- loc = &local->loc;
-
- if (op_ret < 0) {
- if (op_errno == EIO)
- local->govinda_gOvinda = 1;
- // EIO can happen if finding the fresh parent dir failed
- goto out;
- }
+int
+afr_selfheal_tryinodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on)
+{
+ loc_t loc = {
+ 0,
+ };
+ struct gf_flock flock = {
+ 0,
+ };
- //now No chance for the ia_type to conflict
- ia_type = sh->buf[sh->success_children[0]].ia_type;
- txn_type = afr_transaction_type_get (ia_type);
- nsources = afr_build_sources (this, sh->xattr, sh->buf,
- sh->pending_matrix, sh->sources,
- sh->success_children, txn_type,
- &subvol_status, _gf_false);
- if (nsources < 0) {
- gf_log (this->name, GF_LOG_INFO, "No sources for dir of %s,"
- " in missing entry self-heal, continuing with the rest"
- " of the self-heals", local->loc.path);
- if (subvol_status & SPLIT_BRAIN) {
- split_brain = _gf_true;
- switch (txn_type) {
- case AFR_DATA_TRANSACTION:
- nsources = 1;
- sh->sources[sh->success_children[0]] = 1;
- break;
- case AFR_ENTRY_TRANSACTION:
- read_child = afr_get_no_xattr_dir_read_child
- (this,
- sh->success_children,
- sh->buf);
- sh->sources[read_child] = 1;
- nsources = 1;
- break;
- default:
- op_errno = EIO;
- goto out;
- }
- } else {
- op_errno = EIO;
- goto out;
- }
- }
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- sh->source = sh->fresh_children[0];
- if (sh->source == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
- op_errno = EIO;
- goto out;
- }
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- if (sh->gfid_sh_success_cbk)
- sh->gfid_sh_success_cbk (frame, this);
- sh->type = sh->buf[sh->source].ia_type;
- if (uuid_is_null (loc->inode->gfid))
- uuid_copy (loc->gfid, sh->buf[sh->source].ia_gfid);
- if (split_brain) {
- afr_sh_missing_entries_finish (frame, this);
- } else {
- sh_missing_entries_create (frame, this);
- }
- return;
-out:
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- afr_sh_missing_entries_finish (frame, this);
- return;
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLK, &flock,
+ NULL);
+
+ loc_wipe(&loc);
+
+ return afr_locked_fill(frame, this, locked_on);
}
-static int
-afr_sh_common_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_common_lookup_resp_handler (frame, cookie, this, op_ret,
- op_errno, inode, buf, xattr,
- postparent, &sh->lookup_loc);
- call_count = afr_frame_return (frame);
-
- if (call_count)
- goto out;
- op_ret = -1;
- if (!sh->success_count) {
- op_errno = afr_resultant_errno_get (NULL, sh->child_errno,
- priv->child_count);
- gf_log (this->name, GF_LOG_ERROR, "Failed to lookup %s, "
- "reason %s", sh->lookup_loc.path,
- strerror (op_errno));
- goto done;
- }
+int
+afr_selfheal_inodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on)
+{
+ loc_t loc = {
+ 0,
+ };
+ struct gf_flock flock = {
+ 0,
+ };
+ afr_local_t *local = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
- if ((sh->lookup_flags & AFR_LOOKUP_FAIL_CONFLICTS) &&
- (afr_conflicting_iattrs (sh->buf, sh->success_children,
- priv->child_count,
- sh->lookup_loc.path, this->name))) {
- op_errno = EIO;
- gf_log (this->name, GF_LOG_ERROR, "Conflicting entries "
- "for %s", sh->lookup_loc.path);
- goto done;
- }
+ priv = this->private;
+ local = frame->local;
- if ((sh->lookup_flags & AFR_LOOKUP_FAIL_MISSING_GFIDS) &&
- (afr_gfid_missing_count (this->name, sh->success_children,
- sh->buf, priv->child_count,
- sh->lookup_loc.path))) {
- op_errno = ENODATA;
- gf_log (this->name, GF_LOG_ERROR, "Missing Gfids "
- "for %s", sh->lookup_loc.path);
- goto done;
- }
- op_ret = 0;
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
-done:
- sh->lookup_done (frame, this, op_ret, op_errno);
-out:
- return 0;
-}
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
-int
-afr_sh_remove_entry_cbk (call_frame_t *frame, xlator_t *this, int child,
- int32_t op_ret, int32_t op_errno)
-{
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- GF_ASSERT (sh->post_remove_call);
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_ERROR,
- "purge entry %s failed, on child %d reason, %s",
- local->loc.path, child, strerror (op_errno));
- LOCK (&frame->lock);
- {
- afr_sh_set_error (sh, EIO);
- sh->op_failed = 1;
- }
- UNLOCK (&frame->lock);
- }
- call_count = afr_frame_return (frame);
- if (call_count == 0)
- sh->post_remove_call (frame, this);
- return 0;
-}
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLK, &flock,
+ NULL);
-void
-afr_sh_call_entry_expunge_remove (call_frame_t *frame, xlator_t *this,
- int child_index, struct iatt *buf,
- struct iatt *parentbuf,
- afr_expunge_done_cbk_t expunge_done)
-{
- call_frame_t *expunge_frame = NULL;
- afr_local_t *local = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int32_t op_errno = 0;
- int ret = 0;
-
- expunge_frame = copy_frame (frame);
- if (!expunge_frame) {
- goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == -1 &&
+ local->replies[i].op_errno == EAGAIN) {
+ afr_locked_fill(frame, this, locked_on);
+ afr_selfheal_uninodelk(frame, this, inode, dom, off, size,
+ locked_on);
+
+ AFR_SEQ(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLKW,
+ &flock, NULL);
+ break;
}
+ }
- AFR_LOCAL_ALLOC_OR_GOTO (expunge_local, out);
+ loc_wipe(&loc);
- local = frame->local;
- sh = &local->self_heal;
- expunge_frame->local = expunge_local;
- expunge_sh = &expunge_local->self_heal;
- expunge_sh->sh_frame = frame;
- loc_copy (&expunge_local->loc, &local->loc);
- ret = afr_build_parent_loc (&expunge_sh->parent_loc,
- &expunge_local->loc, &op_errno);
- if (ret) {
- ret = -op_errno;
- goto out;
- }
- sh->expunge_done = expunge_done;
- afr_sh_entry_expunge_remove (expunge_frame, this, child_index, buf,
- parentbuf);
- return;
-out:
- gf_log (this->name, GF_LOG_ERROR, "Expunge of %s failed, reason: %s",
- local->loc.path, strerror (op_errno));
- expunge_done (frame, this, child_index, -1, op_errno);
+ return afr_locked_fill(frame, this, locked_on);
}
-void
-afr_sh_remove_stale_lookup_info (afr_self_heal_t *sh, int32_t *success_children,
- int32_t *fresh_children,
- unsigned int child_count)
-{
- int i = 0;
-
- for (i = 0; i < child_count; i++) {
- if (afr_is_child_present (success_children, child_count, i) &&
- !afr_is_child_present (fresh_children, child_count, i)) {
- sh->child_errno[i] = ENOENT;
- GF_ASSERT (sh->xattr[i]);
- dict_unref (sh->xattr[i]);
- sh->xattr[i] = NULL;
- }
+static void
+afr_get_lock_and_eagain_counts(afr_private_t *priv, struct afr_reply *replies,
+ int *lock_count, int *eagain_count)
+{
+ int i = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret == 0) {
+ (*lock_count)++;
+ } else if (replies[i].op_ret == -1 && replies[i].op_errno == EAGAIN) {
+ (*eagain_count)++;
}
+ }
}
+/*Do blocking locks if number of locks acquired is majority and there were some
+ * EAGAINs. Useful for odd-way replication*/
int
-afr_sh_purge_stale_entries_done (call_frame_t *frame, xlator_t *this)
+afr_selfheal_tie_breaker_inodelk(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, off_t off,
+ size_t size, unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct gf_flock flock = {
+ 0,
+ };
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int lock_count = 0;
+ int eagain_count = 0;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ priv = this->private;
+ local = frame->local;
- if (sh->op_failed) {
- afr_sh_missing_entries_finish (frame, this);
- } else {
- if (afr_gfid_missing_count (this->name, sh->fresh_children,
- sh->buf, priv->child_count,
- local->loc.path)) {
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_missing_entries_lookup_done,
- sh->sh_gfid_req,
- AFR_LOOKUP_FAIL_CONFLICTS|
- AFR_LOOKUP_FAIL_MISSING_GFIDS,
- NULL);
- } else {
- //No need to set gfid so goto missing entries lookup done
- //Behave as if you have done the lookup
- afr_sh_remove_stale_lookup_info (sh,
- sh->success_children,
- sh->fresh_children,
- priv->child_count);
- afr_children_copy (sh->success_children,
- sh->fresh_children,
- priv->child_count);
- afr_sh_missing_entries_lookup_done (frame, this, 0, 0);
- }
- }
- return 0;
-}
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
-gf_boolean_t
-afr_sh_purge_entry_condition (afr_local_t *local, afr_private_t *priv,
- int child)
-{
- afr_self_heal_t *sh = NULL;
+ flock.l_type = F_WRLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- sh = &local->self_heal;
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLK, &flock,
+ NULL);
- if (local->child_up[child] &&
- (!afr_is_child_present (sh->fresh_parent_dirs, priv->child_count,
- child))
- && (sh->child_errno[child] != ENOENT))
- return _gf_true;
+ afr_get_lock_and_eagain_counts(priv, local->replies, &lock_count,
+ &eagain_count);
- return _gf_false;
-}
-
-gf_boolean_t
-afr_sh_purge_stale_entry_condition (afr_local_t *local, afr_private_t *priv,
- int child)
-{
- afr_self_heal_t *sh = NULL;
+ if (lock_count > priv->child_count / 2 && eagain_count) {
+ afr_locked_fill(frame, this, locked_on);
+ afr_selfheal_uninodelk(frame, this, inode, dom, off, size, locked_on);
- sh = &local->self_heal;
+ AFR_SEQ(frame, afr_selfheal_lock_cbk, inodelk, dom, &loc, F_SETLKW,
+ &flock, NULL);
+ }
- if (local->child_up[child] &&
- (!afr_is_child_present (sh->fresh_children, priv->child_count,
- child))
- && (sh->child_errno[child] != ENOENT))
- return _gf_true;
+ loc_wipe(&loc);
- return _gf_false;
+ return afr_locked_fill(frame, this, locked_on);
}
-void
-afr_sh_purge_entry_common (call_frame_t *frame, xlator_t *this,
- gf_boolean_t purge_condition (afr_local_t *local,
- afr_private_t *priv,
- int child))
+int
+afr_selfheal_uninodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ const unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- int call_count = 0;
+ loc_t loc = {
+ 0,
+ };
+ struct gf_flock flock = {
+ 0,
+ };
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- for (i = 0; i < priv->child_count; i++) {
- if (purge_condition (local, priv, i))
- call_count++;
- }
+ flock.l_type = F_UNLCK;
+ flock.l_start = off;
+ flock.l_len = size;
- if (call_count == 0) {
- sh->post_remove_call (frame, this);
- goto out;
- }
+ AFR_ONLIST(locked_on, frame, afr_selfheal_lock_cbk, inodelk, dom, &loc,
+ F_SETLK, &flock, NULL);
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!purge_condition (local, priv, i))
- continue;
- gf_log (this->name, GF_LOG_INFO, "purging the stale entry %s "
- "on %d", local->loc.path, i);
- afr_sh_call_entry_expunge_remove (frame, this,
- (long) i, &sh->buf[i],
- &sh->parentbufs[i],
- afr_sh_remove_entry_cbk);
- }
-out:
- return;
-}
-
-void
-afr_sh_purge_entry (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ loc_wipe(&loc);
- local = frame->local;
- sh = &local->self_heal;
- sh->post_remove_call = afr_sh_missing_entries_finish;
-
- afr_sh_purge_entry_common (frame, this, afr_sh_purge_entry_condition);
+ return 0;
}
-void
-afr_sh_purge_stale_entry (call_frame_t *frame, xlator_t *this)
+int
+afr_selfheal_tryentrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int i = 0;
+ loc_t loc = {
+ 0,
+ };
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- sh->post_remove_call = afr_sh_purge_stale_entries_done;
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
- for (i = 0; i < priv->child_count; i++) {
- if (afr_is_child_present (sh->fresh_children,
- priv->child_count, i))
- continue;
+ loc_wipe(&loc);
- if ((!local->child_up[i]) || sh->child_errno[i] != 0)
- continue;
-
- GF_ASSERT (!uuid_is_null (sh->entrybuf.ia_gfid) ||
- uuid_is_null (sh->buf[i].ia_gfid));
-
- if ((sh->entrybuf.ia_type != sh->buf[i].ia_type) ||
- (uuid_compare (sh->buf[i].ia_gfid,
- sh->entrybuf.ia_gfid)))
- continue;
-
- afr_children_add_child (sh->fresh_children, i,
- priv->child_count);
-
- }
- afr_sh_purge_entry_common (frame, this,
- afr_sh_purge_stale_entry_condition);
-}
-
-void
-afr_sh_save_child_iatts_from_policy (int32_t *children, struct iatt *bufs,
- struct iatt *save,
- unsigned int child_count)
-{
- int i = 0;
- int child = 0;
- gf_boolean_t saved = _gf_false;
-
- GF_ASSERT (save);
- //if iatt buf with gfid exists sets it
- for (i = 0; i < child_count; i++) {
- child = children[i];
- if (child == -1)
- break;
- *save = bufs[child];
- saved = _gf_true;
- if (!uuid_is_null (save->ia_gfid))
- break;
- }
- GF_ASSERT (saved);
+ return afr_locked_fill(frame, this, locked_on);
}
-void
-afr_get_children_of_fresh_parent_dirs (afr_self_heal_t *sh,
- unsigned int child_count)
+int
+afr_selfheal_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on)
{
- afr_children_intersection_get (sh->success_children,
- sh->fresh_parent_dirs,
- sh->sources, child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, child_count);
- memset (sh->sources, 0, sizeof (*sh->sources) * child_count);
-}
+ loc_t loc = {
+ 0,
+ };
+ afr_local_t *local = NULL;
+ int i = 0;
+ afr_private_t *priv = NULL;
-void
-afr_sh_children_lookup_done (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int32_t fresh_child_enoents = 0;
- int32_t fresh_parent_count = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (op_ret < 0)
- goto fail;
- afr_get_children_of_fresh_parent_dirs (sh, priv->child_count);
- fresh_parent_count = afr_get_children_count (sh->fresh_parent_dirs,
- priv->child_count);
- //we need the enoent count of the subvols present in fresh_parent_dirs
- fresh_child_enoents = afr_errno_count (sh->fresh_parent_dirs,
- sh->child_errno,
- priv->child_count, ENOENT);
- if (fresh_child_enoents == fresh_parent_count) {
- gf_log (this->name, GF_LOG_INFO, "Deleting stale file %s",
- local->loc.path);
- afr_sh_set_error (sh, ENOENT);
- sh->op_failed = 1;
- afr_sh_purge_entry (frame, this);
- } else if (!afr_conflicting_iattrs (sh->buf, sh->fresh_children,
- priv->child_count, local->loc.path,
- this->name)) {
- afr_sh_save_child_iatts_from_policy (sh->fresh_children,
- sh->buf, &sh->entrybuf,
- priv->child_count);
- afr_update_gfid_from_iatts (sh->sh_gfid_req, sh->buf,
- sh->fresh_children,
- priv->child_count);
- afr_sh_purge_stale_entry (frame, this);
- } else {
- op_errno = EIO;
- local->govinda_gOvinda = 1;
- goto fail;
- }
+ priv = this->private;
+ local = frame->local;
- return;
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
-fail:
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- afr_sh_missing_entries_finish (frame, this);
- return;
-}
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
-static void
-afr_sh_find_fresh_parents (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int enoent_count = 0;
- int nsources = 0;
- int source = -1;
- int32_t subvol_status = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (op_ret < 0)
- goto out;
- enoent_count = afr_errno_count (NULL, sh->child_errno,
- priv->child_count, ENOENT);
- if (enoent_count > 0) {
- gf_log (this->name, GF_LOG_INFO, "Parent dir missing for %s,"
- " in missing entry self-heal, aborting missing-entry "
- "self-heal",
- local->loc.path);
- afr_sh_missing_entries_finish (frame, this);
- return;
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == -1 &&
+ local->replies[i].op_errno == EAGAIN) {
+ afr_locked_fill(frame, this, locked_on);
+ afr_selfheal_unentrylk(frame, this, inode, dom, name, locked_on,
+ NULL);
- nsources = afr_build_sources (this, sh->xattr, sh->buf,
- sh->pending_matrix, sh->sources,
- sh->success_children,
- AFR_ENTRY_TRANSACTION, &subvol_status,
- _gf_true);
- if ((subvol_status & ALL_FOOLS) ||
- (subvol_status & SPLIT_BRAIN)) {
- gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative "
- "merge", sh->parent_loc.path);
- afr_mark_success_children_sources (sh->sources,
- sh->success_children,
- priv->child_count);
- } else if (nsources < 0) {
- gf_log (this->name, GF_LOG_ERROR, "No sources for dir "
- "of %s, in missing entry self-heal, aborting "
- "self-heal", local->loc.path);
- op_errno = EIO;
- goto out;
+ AFR_SEQ(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ break;
}
+ }
- source = afr_sh_select_source (sh->sources, priv->child_count);
- if (source == -1) {
- GF_ASSERT (0);
- gf_log (this->name, GF_LOG_DEBUG, "No active sources found.");
- op_errno = EIO;
- goto out;
- }
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_parent_dirs, priv->child_count);
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_children_lookup_done, NULL, 0,
- NULL);
- return;
+ loc_wipe(&loc);
-out:
- afr_sh_set_error (sh, op_errno);
- sh->op_failed = 1;
- afr_sh_missing_entries_finish (frame, this);
- return;
+ return afr_locked_fill(frame, this, locked_on);
}
-void
-afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count)
+int
+afr_selfheal_tie_breaker_entrylk(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, const char *name,
+ unsigned char *locked_on)
{
- int i = 0;
+ loc_t loc = {
+ 0,
+ };
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int lock_count = 0;
+ int eagain_count = 0;
- for (i = 0; i < child_count; i++) {
- memset (&sh->buf[i], 0, sizeof (sh->buf[i]));
- memset (&sh->parentbufs[i], 0, sizeof (sh->parentbufs[i]));
- sh->child_errno[i] = 0;
- }
- memset (&sh->parentbuf, 0, sizeof (sh->parentbuf));
- sh->success_count = 0;
- afr_reset_children (sh->success_children, child_count);
- afr_reset_children (sh->fresh_children, child_count);
- afr_reset_xattr (sh->xattr, child_count);
- loc_wipe (&sh->lookup_loc);
-}
+ priv = this->private;
+ local = frame->local;
-/* afr self-heal state will be lost if this call is made
- * please check the afr_sh_common_reset that is called in this function
- */
-int
-afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- afr_lookup_done_cbk_t lookup_done , uuid_t gfid,
- int32_t flags, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- int i = 0;
- int call_count = 0;
- afr_private_t *priv = NULL;
- dict_t *xattr_req = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
-
- call_count = afr_up_children_count (local->child_up, priv->child_count);
-
- local->call_count = call_count;
-
- xattr_req = dict_new();
-
- if (xattr_req) {
- afr_xattr_req_prepare (this, xattr_req, loc->path);
- if (gfid) {
- gf_log (this->name, GF_LOG_DEBUG,
- "looking up %s with gfid: %s",
- loc->path, uuid_utoa (gfid));
- GF_ASSERT (!uuid_is_null (gfid));
- afr_set_dict_gfid (xattr_req, gfid);
- }
- }
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- afr_sh_common_reset (sh, priv->child_count);
- sh->lookup_done = lookup_done;
- loc_copy (&sh->lookup_loc, loc);
- sh->lookup_flags = flags;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- gf_log (this->name, GF_LOG_DEBUG,
- "looking up %s on subvolume %s",
- loc->path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame,
- afr_sh_common_lookup_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->lookup,
- loc, xattr_req);
-
- if (!--call_count)
- break;
- }
- }
+ AFR_ONALL(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL);
- if (xattr_req)
- dict_unref (xattr_req);
+ afr_get_lock_and_eagain_counts(priv, local->replies, &lock_count,
+ &eagain_count);
- return 0;
-}
+ if (lock_count > priv->child_count / 2 && eagain_count) {
+ afr_locked_fill(frame, this, locked_on);
+ afr_selfheal_unentrylk(frame, this, inode, dom, name, locked_on, NULL);
+
+ AFR_SEQ(frame, afr_selfheal_lock_cbk, entrylk, dom, &loc, name,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ }
+ loc_wipe(&loc);
+ return afr_locked_fill(frame, this, locked_on);
+}
int
-afr_sh_post_nb_entrylk_conflicting_sh_cbk (call_frame_t *frame, xlator_t *this)
+afr_selfheal_unentrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on,
+ dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ loc_t loc = {
+ 0,
+ };
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Non blocking entrylks failed.");
- sh->op_failed = -1;
- afr_sh_missing_entries_done (frame, this);
- } else {
+ AFR_ONLIST(locked_on, frame, afr_selfheal_lock_cbk, entrylk, dom, &loc,
+ name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata);
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking entrylks done. Proceeding to FOP");
- afr_sh_common_lookup (frame, this, &sh->parent_loc,
- afr_sh_find_fresh_parents,
- NULL, AFR_LOOKUP_FAIL_CONFLICTS,
- NULL);
- }
+ loc_wipe(&loc);
- return 0;
+ return 0;
}
-int
-afr_sh_post_nb_entrylk_gfid_sh_cbk (call_frame_t *frame, xlator_t *this)
+gf_boolean_t
+afr_is_data_set(xlator_t *this, dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- int_lock = &local->internal_lock;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Non blocking entrylks failed.");
- afr_sh_missing_entries_done (frame, this);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking entrylks done. Proceeding to FOP");
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_missing_entries_lookup_done,
- sh->sh_gfid_req, AFR_LOOKUP_FAIL_CONFLICTS|
- AFR_LOOKUP_FAIL_MISSING_GFIDS,
- NULL);
- }
-
- return 0;
+ return afr_is_pending_set(this, xdata, AFR_DATA_TRANSACTION);
}
-int
-afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
- char *base_name, afr_lock_cbk_t lock_cbk)
+gf_boolean_t
+afr_is_metadata_set(xlator_t *this, dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
- int_lock->selfheal_lk_type = AFR_ENTRY_SELF_HEAL_LK;
+ return afr_is_pending_set(this, xdata, AFR_METADATA_TRANSACTION);
+}
- afr_set_lock_number (frame, this);
+gf_boolean_t
+afr_is_entry_set(xlator_t *this, dict_t *xdata)
+{
+ return afr_is_pending_set(this, xdata, AFR_ENTRY_TRANSACTION);
+}
- int_lock->lk_basename = base_name;
- int_lock->lk_loc = loc;
- int_lock->lock_cbk = lock_cbk;
+/*
+ * This function inspects the looked up replies (in an unlocked manner)
+ * and decides whether a locked verification and possible healing is
+ * required or not. It updates the three booleans for each type
+ * of healing. If the boolean flag gets set to FALSE, then we are sure
+ * no healing is required. If the boolean flag gets set to TRUE then
+ * we have to proceed with locked reinspection.
+ */
- afr_nonblocking_entrylk (frame, this);
+int
+afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **link_inode, gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal,
+ gf_boolean_t *entry_selfheal,
+ struct afr_reply *replies_dst)
+{
+ afr_private_t *priv = NULL;
+ inode_t *inode = NULL;
+ int i = 0;
+ int valid_cnt = 0;
+ struct iatt first = {
+ 0,
+ };
+ int first_idx = 0;
+ struct afr_reply *replies = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ inode = afr_inode_find(this, gfid);
+ if (!inode)
+ goto out;
+
+ replies = alloca0(sizeof(*replies) * priv->child_count);
+
+ ret = afr_selfheal_unlocked_discover(frame, inode, gfid, replies);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret == -1)
+ continue;
+
+ /* The data segment of the changelog can be non-zero to indicate
+ * the directory needs a full heal. So the check below ensures
+ * it's not a directory before setting the data_selfheal boolean.
+ */
+ if (data_selfheal && !IA_ISDIR(replies[i].poststat.ia_type) &&
+ afr_is_data_set(this, replies[i].xdata))
+ *data_selfheal = _gf_true;
+
+ if (metadata_selfheal && afr_is_metadata_set(this, replies[i].xdata))
+ *metadata_selfheal = _gf_true;
+
+ if (entry_selfheal && afr_is_entry_set(this, replies[i].xdata))
+ *entry_selfheal = _gf_true;
+
+ valid_cnt++;
+ if (valid_cnt == 1) {
+ first = replies[i].poststat;
+ first_idx = i;
+ continue;
+ }
+
+ if (!IA_EQUAL(first, replies[i].poststat, type)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "TYPE mismatch %d vs %d on %s for gfid:%s",
+ (int)first.ia_type, (int)replies[i].poststat.ia_type,
+ priv->children[i]->name,
+ uuid_utoa(replies[i].poststat.ia_gfid));
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;"
+ "type=file;gfid=%s;"
+ "ia_type-%d=%s;ia_type-%d=%s",
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(replies[i].poststat.ia_gfid), first_idx,
+ gf_inode_type_to_str(first.ia_type), i,
+ gf_inode_type_to_str(replies[i].poststat.ia_type));
+ ret = -EIO;
+ goto out;
+ }
+
+ if (!IA_EQUAL(first, replies[i].poststat, uid)) {
+ gf_msg_debug(this->name, 0,
+ "UID mismatch "
+ "%d vs %d on %s for gfid:%s",
+ (int)first.ia_uid, (int)replies[i].poststat.ia_uid,
+ priv->children[i]->name,
+ uuid_utoa(replies[i].poststat.ia_gfid));
+
+ if (metadata_selfheal)
+ *metadata_selfheal = _gf_true;
+ }
+
+ if (!IA_EQUAL(first, replies[i].poststat, gid)) {
+ gf_msg_debug(this->name, 0,
+ "GID mismatch "
+ "%d vs %d on %s for gfid:%s",
+ (int)first.ia_uid, (int)replies[i].poststat.ia_uid,
+ priv->children[i]->name,
+ uuid_utoa(replies[i].poststat.ia_gfid));
+
+ if (metadata_selfheal)
+ *metadata_selfheal = _gf_true;
+ }
+
+ if (!IA_EQUAL(first, replies[i].poststat, prot)) {
+ gf_msg_debug(this->name, 0,
+ "MODE mismatch "
+ "%d vs %d on %s for gfid:%s",
+ (int)st_mode_from_ia(first.ia_prot, 0),
+ (int)st_mode_from_ia(replies[i].poststat.ia_prot, 0),
+ priv->children[i]->name,
+ uuid_utoa(replies[i].poststat.ia_gfid));
+
+ if (metadata_selfheal)
+ *metadata_selfheal = _gf_true;
+ }
+
+ if (IA_ISREG(first.ia_type) &&
+ !IA_EQUAL(first, replies[i].poststat, size)) {
+ gf_msg_debug(this->name, 0,
+ "SIZE mismatch "
+ "%lld vs %lld on %s for gfid:%s",
+ (long long)first.ia_size,
+ (long long)replies[i].poststat.ia_size,
+ priv->children[i]->name,
+ uuid_utoa(replies[i].poststat.ia_gfid));
+
+ if (data_selfheal)
+ *data_selfheal = _gf_true;
+ }
+ }
+
+ if (valid_cnt > 0 && link_inode) {
+ *link_inode = inode_link(inode, NULL, NULL, &first);
+ if (!*link_inode) {
+ ret = -EINVAL;
+ goto out;
+ }
+ } else if (valid_cnt < 2) {
+ ret = afr_check_stale_error(replies, priv);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (replies && replies_dst)
+ afr_replies_copy(replies_dst, replies, priv->child_count);
+ if (inode)
+ inode_unref(inode);
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
- return 0;
+ return ret;
}
-static int
-afr_self_heal_parent_entrylk (call_frame_t *frame, xlator_t *this,
- afr_lock_cbk_t lock_cbk)
+inode_t *
+afr_inode_find(xlator_t *this, uuid_t gfid)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_internal_lock_t *int_lock = NULL;
- int ret = -1;
- int32_t op_errno = 0;
-
- local = frame->local;
- sh = &local->self_heal;
+ inode_table_t *table = NULL;
+ inode_t *inode = NULL;
- gf_log (this->name, GF_LOG_TRACE,
- "attempting to recreate missing entries for path=%s",
- local->loc.path);
+ table = this->itable;
+ if (!table)
+ return NULL;
- ret = afr_build_parent_loc (&sh->parent_loc, &local->loc, &op_errno);
- if (ret)
- goto out;
+ inode = inode_find(table, gfid);
+ if (inode)
+ return inode;
- afr_sh_entrylk (frame, this, &sh->parent_loc, NULL,
- lock_cbk);
- return 0;
-out:
- int_lock = &local->internal_lock;
- int_lock->lock_op_ret = -1;
- lock_cbk (frame, this);
- return 0;
-}
+ inode = inode_new(table);
+ if (!inode)
+ return NULL;
-static int
-afr_self_heal_conflicting_entries (call_frame_t *frame, xlator_t *this)
-{
- afr_self_heal_parent_entrylk (frame, this,
- afr_sh_post_nb_entrylk_conflicting_sh_cbk);
- return 0;
-}
+ gf_uuid_copy(inode->gfid, gfid);
-static int
-afr_self_heal_gfids (call_frame_t *frame, xlator_t *this)
-{
- afr_self_heal_parent_entrylk (frame, this,
- afr_sh_post_nb_entrylk_gfid_sh_cbk);
- return 0;
+ return inode;
}
-afr_local_t *afr_local_copy (afr_local_t *l, xlator_t *this)
+call_frame_t *
+afr_frame_create(xlator_t *this, int32_t *op_errno)
{
- afr_private_t *priv = NULL;
- afr_local_t *lc = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *shc = NULL;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ pid_t pid = GF_CLIENT_PID_SELF_HEALD;
- priv = this->private;
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ if (op_errno)
+ *op_errno = ENOMEM;
+ return NULL;
+ }
- sh = &l->self_heal;
+ local = AFR_FRAME_INIT(frame, (*op_errno));
+ if (!local) {
+ STACK_DESTROY(frame->root);
+ return NULL;
+ }
- lc = mem_get0 (this->local_pool);
- if (!lc)
- goto out;
+ syncopctx_setfspid(&pid);
- shc = &lc->self_heal;
-
- shc->unwind = sh->unwind;
- shc->gfid_sh_success_cbk = sh->gfid_sh_success_cbk;
- shc->do_missing_entry_self_heal = sh->do_missing_entry_self_heal;
- shc->do_gfid_self_heal = sh->do_gfid_self_heal;
- shc->do_data_self_heal = sh->do_data_self_heal;
- shc->do_metadata_self_heal = sh->do_metadata_self_heal;
- shc->do_entry_self_heal = sh->do_entry_self_heal;
- shc->forced_merge = sh->forced_merge;
- shc->background = sh->background;
- shc->type = sh->type;
-
- uuid_copy (shc->sh_gfid_req, sh->sh_gfid_req);
- if (l->loc.path)
- loc_copy (&lc->loc, &l->loc);
-
- lc->child_up = memdup (l->child_up,
- sizeof (*lc->child_up) * priv->child_count);
- if (l->xattr_req)
- lc->xattr_req = dict_ref (l->xattr_req);
-
- if (l->cont.lookup.inode)
- lc->cont.lookup.inode = inode_ref (l->cont.lookup.inode);
- if (l->cont.lookup.xattr)
- lc->cont.lookup.xattr = dict_ref (l->cont.lookup.xattr);
- if (l->internal_lock.inode_locked_nodes)
- lc->internal_lock.inode_locked_nodes =
- memdup (l->internal_lock.inode_locked_nodes,
- sizeof (*lc->internal_lock.inode_locked_nodes) * priv->child_count);
- else
- lc->internal_lock.inode_locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.inode_locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
- if (l->internal_lock.entry_locked_nodes)
- lc->internal_lock.entry_locked_nodes =
- memdup (l->internal_lock.entry_locked_nodes,
- sizeof (*lc->internal_lock.entry_locked_nodes) * priv->child_count);
- else
- lc->internal_lock.entry_locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.entry_locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
- if (l->internal_lock.locked_nodes)
- lc->internal_lock.locked_nodes =
- memdup (l->internal_lock.locked_nodes,
- sizeof (*lc->internal_lock.locked_nodes) * priv->child_count);
- else
- lc->internal_lock.locked_nodes =
- GF_CALLOC (sizeof (*l->internal_lock.locked_nodes),
- priv->child_count,
- gf_afr_mt_char);
+ frame->root->pid = pid;
- lc->internal_lock.inodelk_lock_count =
- l->internal_lock.inodelk_lock_count;
- lc->internal_lock.entrylk_lock_count =
- l->internal_lock.entrylk_lock_count;
+ afr_set_lk_owner(frame, this, frame->root);
-out:
- return lc;
+ return frame;
}
int
-afr_self_heal_completion_cbk (call_frame_t *bgsh_frame, xlator_t *this)
+afr_selfheal_newentry_mark(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, struct afr_reply *replies,
+ unsigned char *sources, unsigned char *newentry)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
- char sh_type_str[256] = {0,};
- gf_boolean_t split_brain = _gf_false;
+ int ret = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int **changelog = NULL;
- priv = this->private;
- local = bgsh_frame->local;
- sh = &local->self_heal;
+ priv = this->private;
- if (local->govinda_gOvinda)
- split_brain = _gf_true;
+ gf_uuid_copy(inode->gfid, replies[source].poststat.ia_gfid);
- afr_set_split_brain (this, sh->inode, split_brain);
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
- afr_self_heal_type_str_get (sh, sh_type_str,
- sizeof(sh_type_str));
- if (sh->op_failed) {
- gf_log (this->name, GF_LOG_ERROR, "background %s self-heal "
- "failed on %s", sh_type_str, local->loc.path);
- } else {
- gf_log (this->name, GF_LOG_INFO, "background %s self-heal "
- "completed on %s", sh_type_str, local->loc.path);
- }
+ changelog = afr_mark_pending_changelog(priv, newentry, xattr,
+ replies[source].poststat.ia_type);
- FRAME_SU_UNDO (bgsh_frame, afr_local_t);
+ if (!changelog) {
+ ret = -ENOMEM;
+ goto out;
+ }
- if (!sh->unwound && sh->unwind) {
- sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
- sh->op_failed);
- }
-
- if (sh->background) {
- LOCK (&priv->lock);
- {
- priv->background_self_heals_started--;
- }
- UNLOCK (&priv->lock);
- }
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ ret |= afr_selfheal_post_op(frame, this, inode, i, xattr, NULL);
+ }
+out:
+ if (changelog)
+ afr_matrix_cleanup(changelog, priv->child_count);
+ if (xattr)
+ dict_unref(xattr);
+ return ret;
+}
- AFR_STACK_DESTROY (bgsh_frame);
+int
+afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid)
+{
+ int ret = -1;
+ int entry_ret = 1;
+ int metadata_ret = 1;
+ int data_ret = 1;
+ int or_ret = 0;
+ inode_t *inode = NULL;
+ fd_t *fd = NULL;
+ gf_boolean_t data_selfheal = _gf_false;
+ gf_boolean_t metadata_selfheal = _gf_false;
+ gf_boolean_t entry_selfheal = _gf_false;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ ret = afr_selfheal_unlocked_inspect(frame, this, gfid, &inode,
+ &data_selfheal, &metadata_selfheal,
+ &entry_selfheal, NULL);
+ if (ret)
+ goto out;
+
+ if (!(data_selfheal || metadata_selfheal || entry_selfheal)) {
+ ret = 2;
+ goto out;
+ }
+
+ if (inode->ia_type == IA_IFREG) {
+ ret = afr_selfheal_data_open(this, inode, &fd);
+ if (!fd) {
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ if (data_selfheal && priv->data_self_heal)
+ data_ret = afr_selfheal_data(frame, this, fd);
+
+ if (metadata_selfheal && priv->metadata_self_heal)
+ metadata_ret = afr_selfheal_metadata(frame, this, inode);
+
+ if (entry_selfheal && priv->entry_self_heal)
+ entry_ret = afr_selfheal_entry(frame, this, inode);
+
+ or_ret = (data_ret | metadata_ret | entry_ret);
+
+ if (data_ret == -EIO || metadata_ret == -EIO || entry_ret == -EIO)
+ ret = -EIO;
+ else if (data_ret == 1 && metadata_ret == 1 && entry_ret == 1)
+ ret = 1;
+ else if (or_ret < 0)
+ ret = or_ret;
+ else
+ ret = 0;
- return 0;
+out:
+ if (inode)
+ inode_unref(inode);
+ if (fd)
+ fd_unref(fd);
+ return ret;
}
+/*
+ * This is the entry point for healing a given GFID. The return values for this
+ * function are as follows:
+ * '0' if the self-heal is successful
+ * '1' if the afr-xattrs are non-zero (due to on-going IO) and no heal is needed
+ * '2' if the afr-xattrs are all-zero and no heal is needed
+ * $errno if the heal on the gfid failed.
+ */
int
-afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int32_t op_errno = 0;
- int ret = 0;
- afr_self_heal_t *orig_sh = NULL;
- call_frame_t *sh_frame = NULL;
- afr_local_t *sh_local = NULL;
- loc_t *loc = NULL;
-
- local = frame->local;
- orig_sh = &local->self_heal;
- priv = this->private;
-
- GF_ASSERT (local->loc.path);
-
- gf_log (this->name, GF_LOG_TRACE,
- "performing self heal on %s (metadata=%d data=%d entry=%d)",
- local->loc.path,
- local->self_heal.do_metadata_self_heal,
- local->self_heal.do_data_self_heal,
- local->self_heal.do_entry_self_heal);
-
- op_errno = ENOMEM;
- sh_frame = copy_frame (frame);
- if (!sh_frame)
- goto out;
- afr_set_lk_owner (sh_frame, this, sh_frame->root);
- afr_set_low_priority (sh_frame);
+afr_selfheal(xlator_t *this, uuid_t gfid)
+{
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
- sh_local = afr_local_copy (local, this);
- if (!sh_local)
- goto out;
- sh_frame->local = sh_local;
- sh = &sh_local->self_heal;
+ frame = afr_frame_create(this, NULL);
+ if (!frame)
+ return ret;
- sh->inode = inode_ref (inode);
- sh->orig_frame = frame;
+ local = frame->local;
+ local->xdata_req = dict_new();
- sh->completion_cbk = afr_self_heal_completion_cbk;
+ ret = afr_selfheal_do(frame, this, gfid);
- sh->success = GF_CALLOC (priv->child_count, sizeof (*sh->success),
- gf_afr_mt_char);
- if (!sh->success)
- goto out;
- sh->sources = GF_CALLOC (sizeof (*sh->sources), priv->child_count,
- gf_afr_mt_int);
- if (!sh->sources)
- goto out;
- sh->locked_nodes = GF_CALLOC (sizeof (*sh->locked_nodes),
- priv->child_count,
- gf_afr_mt_int);
- if (!sh->locked_nodes)
- goto out;
+ if (frame)
+ AFR_STACK_DESTROY(frame);
- sh->pending_matrix = afr_matrix_create (priv->child_count,
- priv->child_count);
- if (!sh->pending_matrix)
- goto out;
+ return ret;
+}
- sh->delta_matrix = afr_matrix_create (priv->child_count,
- priv->child_count);
- if (!sh->delta_matrix)
- goto out;
+afr_local_t *
+__afr_dequeue_heals(afr_private_t *priv)
+{
+ afr_local_t *local = NULL;
- sh->fresh_parent_dirs = afr_children_create (priv->child_count);
- if (!sh->fresh_parent_dirs)
- goto out;
- ret = afr_sh_common_create (sh, priv->child_count);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ if (list_empty(&priv->heal_waiting))
+ goto none;
+ if ((priv->background_self_heal_count > 0) &&
+ (priv->healers >= priv->background_self_heal_count))
+ goto none;
- if (local->self_heal.background) {
- LOCK (&priv->lock);
- {
- if (priv->background_self_heals_started
- < priv->background_self_heal_count) {
- priv->background_self_heals_started++;
+ local = list_entry(priv->heal_waiting.next, afr_local_t, healer);
+ priv->heal_waiters--;
+ GF_ASSERT(priv->heal_waiters >= 0);
+ list_del_init(&local->healer);
+ list_add(&local->healer, &priv->healing);
+ priv->healers++;
+ return local;
+none:
+ gf_msg_debug(THIS->name, 0,
+ "Nothing dequeued. "
+ "Num healers: %d, Num Waiters: %d",
+ priv->healers, priv->heal_waiters);
+ return NULL;
+}
+int
+afr_refresh_selfheal_wrap(void *opaque)
+{
+ call_frame_t *heal_frame = opaque;
+ afr_local_t *local = heal_frame->local;
+ int ret = 0;
- } else {
- local->self_heal.background = _gf_false;
- sh->background = _gf_false;
- }
- }
- UNLOCK (&priv->lock);
- }
+ ret = afr_selfheal(heal_frame->this, local->refreshinode->gfid);
+ return ret;
+}
- if (!local->loc.parent) {
- sh->do_missing_entry_self_heal = _gf_false;
- sh->do_gfid_self_heal = _gf_false;
- }
+int
+afr_refresh_heal_done(int ret, call_frame_t *frame, void *opaque)
+{
+ call_frame_t *heal_frame = opaque;
+ xlator_t *this = heal_frame->this;
+ afr_private_t *priv = this->private;
+ afr_local_t *local = heal_frame->local;
- FRAME_SU_DO (sh_frame, afr_local_t);
- if (sh->do_missing_entry_self_heal) {
- afr_self_heal_conflicting_entries (sh_frame, this);
- } else if (sh->do_gfid_self_heal) {
- GF_ASSERT (!uuid_is_null (sh->sh_gfid_req));
- afr_self_heal_gfids (sh_frame, this);
- } else {
- loc = &sh_local->loc;
- if (uuid_is_null (loc->inode->gfid) && uuid_is_null (loc->gfid)) {
- if (!uuid_is_null (inode->gfid))
- GF_ASSERT (!uuid_compare (inode->gfid,
- sh->sh_gfid_req));
- uuid_copy (loc->gfid, sh->sh_gfid_req);
- }
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to metadata check on %s",
- local->loc.path);
+ LOCK(&priv->lock);
+ {
+ list_del_init(&local->healer);
+ priv->healers--;
+ GF_ASSERT(priv->healers >= 0);
+ local = __afr_dequeue_heals(priv);
+ }
+ UNLOCK(&priv->lock);
- afr_sh_missing_entries_done (sh_frame, this);
- }
- op_errno = 0;
+ AFR_STACK_DESTROY(heal_frame);
-out:
- if (op_errno) {
- orig_sh->unwind (frame, this, -1, op_errno, 1);
- if (sh_frame)
- AFR_STACK_DESTROY (sh_frame);
- }
- return 0;
+ if (local)
+ afr_heal_synctask(this, local);
+ return 0;
}
void
-afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
- size_t size)
+afr_heal_synctask(xlator_t *this, afr_local_t *local)
{
- GF_ASSERT (str && (size > strlen (" missing-entry gfid "
- "meta-data data entry")));
+ int ret = 0;
+ call_frame_t *heal_frame = NULL;
- if (self_heal_p->do_metadata_self_heal) {
- snprintf (str, size, " meta-data");
- }
-
- if (self_heal_p->do_data_self_heal) {
- snprintf (str + strlen(str), size - strlen(str), " data");
- }
-
- if (self_heal_p->do_entry_self_heal) {
- snprintf (str + strlen(str), size - strlen(str), " entry");
- }
-
- if (self_heal_p->do_missing_entry_self_heal) {
- snprintf (str + strlen(str), size - strlen(str),
- " missing-entry");
- }
+ heal_frame = local->heal_frame;
+ ret = synctask_new(this->ctx->env, afr_refresh_selfheal_wrap,
+ afr_refresh_heal_done, heal_frame, heal_frame);
+ if (ret < 0)
+ /* Heal not launched. Will be queued when the next inode
+ * refresh happens and shd hasn't healed it yet. */
+ afr_refresh_heal_done(ret, heal_frame, heal_frame);
+}
- if (self_heal_p->do_gfid_self_heal) {
- snprintf (str + strlen(str), size - strlen(str), " gfid");
+gf_boolean_t
+afr_throttled_selfheal(call_frame_t *frame, xlator_t *this)
+{
+ gf_boolean_t can_heal = _gf_true;
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+
+ LOCK(&priv->lock);
+ {
+ if ((priv->background_self_heal_count > 0) &&
+ (priv->heal_wait_qlen + priv->background_self_heal_count) >
+ (priv->heal_waiters + priv->healers)) {
+ list_add_tail(&local->healer, &priv->heal_waiting);
+ priv->heal_waiters++;
+ local = __afr_dequeue_heals(priv);
+ } else {
+ can_heal = _gf_false;
}
-}
+ }
+ UNLOCK(&priv->lock);
-afr_self_heal_type
-afr_self_heal_type_for_transaction (afr_transaction_type type)
-{
- afr_self_heal_type sh_type = AFR_SELF_HEAL_INVALID;
+ if (can_heal) {
+ if (local)
+ afr_heal_synctask(this, local);
+ else
+ gf_msg_debug(this->name, 0,
+ "Max number of heals are "
+ "pending, background self-heal rejected.");
+ }
- switch (type) {
- case AFR_DATA_TRANSACTION:
- sh_type = AFR_SELF_HEAL_DATA;
- break;
- case AFR_METADATA_TRANSACTION:
- sh_type = AFR_SELF_HEAL_METADATA;
- break;
- case AFR_ENTRY_TRANSACTION:
- sh_type = AFR_SELF_HEAL_ENTRY;
- break;
- case AFR_ENTRY_RENAME_TRANSACTION:
- GF_ASSERT (0);
- break;
- }
- return sh_type;
+ return can_heal;
}
int
-afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
+afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources,
+ afr_transaction_type type)
{
- int ret = -1;
- uuid_t pargfid = {0};
+ int source = -1;
+ int i = 0;
- if (!child)
- goto out;
+ /* Give preference to local child to save on bandwidth */
+ for (i = 0; i < priv->child_count; i++) {
+ if (priv->local[i] && sources[i]) {
+ if ((type == AFR_DATA_TRANSACTION) && AFR_IS_ARBITER_BRICK(priv, i))
+ continue;
- if (!uuid_is_null (parent->inode->gfid))
- uuid_copy (pargfid, parent->inode->gfid);
- else if (!uuid_is_null (parent->gfid))
- uuid_copy (pargfid, parent->gfid);
+ source = i;
+ goto out;
+ }
+ }
- if (uuid_is_null (pargfid))
- goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ goto out;
+ }
+ }
+out:
+ return source;
+}
- if (strcmp (parent->path, "/") == 0)
- ret = gf_asprintf ((char **)&child->path, "/%s", name);
- else
- ret = gf_asprintf ((char **)&child->path, "%s/%s", parent->path,
- name);
+static int
+afr_anon_inode_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ afr_local_t *local = frame->local;
+ int i = (long)cookie;
+
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
+ if (op_ret == 0) {
+ local->op_ret = 0;
+ local->replies[i].poststat = *buf;
+ local->replies[i].preparent = *preparent;
+ local->replies[i].postparent = *postparent;
+ }
+ if (xdata) {
+ local->replies[i].xdata = dict_ref(xdata);
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed while setting child path");
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode)
+{
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ unsigned char *mkdir_on = alloca0(priv->child_count);
+ unsigned char *lookup_on = alloca0(priv->child_count);
+ loc_t loc = {0};
+ int32_t op_errno = 0;
+ int32_t child_op_errno = 0;
+ struct iatt iatt = {0};
+ dict_t *xdata = NULL;
+ uuid_t anon_inode_gfid = {0};
+ int mkdir_count = 0;
+ int i = 0;
+
+ /*Try to mkdir everywhere and return success if the dir exists on 'child'
+ */
+
+ if (!priv->use_anon_inode) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ frame = afr_frame_create(this, &op_errno);
+ if (op_errno) {
+ goto out;
+ }
+ local = frame->local;
+ if (!local->child_up[child]) {
+ /*Other bricks may need mkdir so don't error out yet*/
+ child_op_errno = ENOTCONN;
+ }
+ gf_uuid_parse(priv->anon_gfid_str, anon_inode_gfid);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->child_up[i])
+ continue;
+
+ if (priv->anon_inode[i]) {
+ mkdir_on[i] = 0;
+ } else {
+ mkdir_on[i] = 1;
+ mkdir_count++;
}
+ }
- child->name = strrchr (child->path, '/');
- if (child->name)
- child->name++;
+ if (mkdir_count == 0) {
+ *linked_inode = inode_find(this->itable, anon_inode_gfid);
+ if (*linked_inode) {
+ op_errno = 0;
+ goto out;
+ }
+ }
- child->parent = inode_ref (parent->inode);
- child->inode = inode_new (parent->inode->table);
- uuid_copy (child->pargfid, pargfid);
+ loc.parent = inode_ref(this->itable->root);
+ loc.name = priv->anon_inode_name;
+ loc.inode = inode_new(this->itable);
+ if (!loc.inode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
- if (!child->inode) {
- ret = -1;
- goto out;
+ xdata = dict_new();
+ if (!xdata) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_errno = -dict_set_gfuuid(xdata, "gfid-req", anon_inode_gfid, _gf_true);
+ if (op_errno) {
+ goto out;
+ }
+
+ if (mkdir_count == 0) {
+ memcpy(lookup_on, local->child_up, priv->child_count);
+ goto lookup;
+ }
+
+ AFR_ONLIST(mkdir_on, frame, afr_anon_inode_mkdir_cbk, mkdir, &loc, 0755, 0,
+ xdata);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!mkdir_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else if (local->replies[i].op_ret < 0 &&
+ local->replies[i].op_errno == EEXIST) {
+ lookup_on[i] = 1;
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+
+ if (AFR_COUNT(lookup_on, priv->child_count) == 0) {
+ goto link;
+ }
+
+lookup:
+ AFR_ONLIST(lookup_on, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ xdata);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!lookup_on[i]) {
+ continue;
+ }
+
+ if (local->replies[i].op_ret == 0) {
+ if (gf_uuid_compare(anon_inode_gfid,
+ local->replies[i].poststat.ia_gfid) == 0) {
+ priv->anon_inode[i] = 1;
+ iatt = local->replies[i].poststat;
+ } else {
+ if (i == child)
+ child_op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_DATA,
+ "%s has gfid: %s", priv->anon_inode_name,
+ uuid_utoa(local->replies[i].poststat.ia_gfid));
+ }
+ } else if (i == child) {
+ child_op_errno = local->replies[i].op_errno;
+ }
+ }
+link:
+ if (!gf_uuid_is_null(iatt.ia_gfid)) {
+ *linked_inode = inode_link(loc.inode, loc.parent, loc.name, &iatt);
+ if (*linked_inode) {
+ op_errno = 0;
+ inode_lookup(*linked_inode);
+ } else {
+ op_errno = ENOMEM;
}
+ goto out;
+ }
- ret = 0;
out:
- if ((ret == -1) && child)
- loc_wipe (child);
-
- return ret;
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+ /*child_op_errno takes precedence*/
+ if (child_op_errno == 0) {
+ child_op_errno = op_errno;
+ }
+
+ if (child_op_errno && *linked_inode) {
+ inode_unref(*linked_inode);
+ *linked_inode = NULL;
+ }
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return -child_op_errno;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-common.h b/xlators/cluster/afr/src/afr-self-heal-common.h
deleted file mode 100644
index cea554670b5..00000000000
--- a/xlators/cluster/afr/src/afr-self-heal-common.h
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __AFR_SELF_HEAL_COMMON_H__
-#define __AFR_SELF_HEAL_COMMON_H__
-
-#define FILE_HAS_HOLES(buf) (((buf)->ia_size) > ((buf)->ia_blocks * 512))
-#define AFR_SH_MIN_PARTICIPANTS 2
-
-typedef enum {
- AFR_SELF_HEAL_ENTRY,
- AFR_SELF_HEAL_METADATA,
- AFR_SELF_HEAL_DATA,
- AFR_SELF_HEAL_INVALID = -1,
-} afr_self_heal_type;
-
-typedef enum {
- AFR_LOOKUP_FAIL_CONFLICTS = 1,
- AFR_LOOKUP_FAIL_MISSING_GFIDS = 2,
-} afr_lookup_flags_t;
-
-int
-afr_sh_select_source (int sources[], int child_count);
-
-int
-afr_sh_source_count (int sources[], int child_count);
-
-void
-afr_sh_print_pending_matrix (int32_t *pending_matrix[], xlator_t *this);
-
-int
-afr_build_pending_matrix (char **pending_key, int32_t **pending_matrix,
- unsigned char *ignorant_subvols,
- dict_t *xattr[], afr_transaction_type type,
- size_t child_count);
-
-void
-afr_sh_pending_to_delta (afr_private_t *priv, dict_t **xattr,
- int32_t *delta_matrix[], unsigned char success[],
- int child_count, afr_transaction_type type);
-
-int
-afr_mark_sources (xlator_t *this, int32_t *sources, int32_t **pending_matrix,
- struct iatt *bufs, afr_self_heal_type type,
- int32_t *success_children, int32_t *subvol_status);
-
-int
-afr_sh_delta_to_xattr (xlator_t *this,
- int32_t *delta_matrix[], dict_t *xattr[],
- int child_count, afr_transaction_type type);
-
-void
-afr_self_heal_type_str_get (afr_self_heal_t *self_heal_p, char *str,
- size_t size);
-
-afr_self_heal_type
-afr_self_heal_type_for_transaction (afr_transaction_type type);
-
-int
-afr_build_sources (xlator_t *this, dict_t **xattr, struct iatt *bufs,
- int32_t **pending_matrix, int32_t *sources,
- int32_t *success_children, afr_transaction_type type,
- int32_t *subvol_status, gf_boolean_t ignore_ignorant);
-void
-afr_sh_common_reset (afr_self_heal_t *sh, unsigned int child_count);
-
-void
-afr_sh_common_lookup_resp_handler (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- dict_t *xattr, struct iatt *postparent,
- loc_t *loc);
-
-int
-afr_sh_common_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- afr_lookup_done_cbk_t lookup_cbk, uuid_t uuid,
- int32_t flags, dict_t *xdata);
-int
-afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
- int active_src, struct iatt *buf,
- struct iatt *parentbuf);
-int
-afr_sh_entrylk (call_frame_t *frame, xlator_t *this, loc_t *loc,
- char *base_name, afr_lock_cbk_t lock_cbk);
-int
-afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
- int child_index);
-int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this,
- afr_lock_cbk_t lock_cbk);
-afr_local_t *
-afr_local_copy (afr_local_t *l, xlator_t *this);
-int
-afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
- off_t start, off_t len,
- afr_lock_cbk_t success_handler,
- afr_lock_cbk_t failure_handler);
-void
-afr_sh_set_error (afr_self_heal_t *sh, int32_t op_errno);
-void
-afr_sh_mark_source_sinks (call_frame_t *frame, xlator_t *this);
-typedef int
-(*afr_fxattrop_cbk_t) (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr, dict_t *xdata);
-int
-afr_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name);
-int
-afr_impunge_frame_create (call_frame_t *frame, xlator_t *this,
- int active_source, call_frame_t **impunge_frame);
-void
-afr_sh_reset (call_frame_t *frame, xlator_t *this);
-
-void
-afr_children_intersection_get (int32_t *set1, int32_t *set2,
- int *intersection, unsigned int child_count);
-int
-afr_get_no_xattr_dir_read_child (xlator_t *this, int32_t *success_children,
- struct iatt *bufs);
-#endif /* __AFR_SELF_HEAL_COMMON_H__ */
diff --git a/xlators/cluster/afr/src/afr-self-heal-data.c b/xlators/cluster/afr/src/afr-self-heal-data.c
index 7186da710b3..37bcc2b3f9e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-data.c
+++ b/xlators/cluster/afr/src/afr-self-heal-data.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,1404 +8,884 @@
cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
-#include <unistd.h>
-#include <fnmatch.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <signal.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-
-#include "afr-transaction.h"
#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-#include "afr-self-heal-algorithm.h"
-
-int
-afr_sh_data_fail (call_frame_t *frame, xlator_t *this);
-
-static inline gf_boolean_t
-afr_sh_data_proceed (unsigned int success_count)
-{
- return (success_count >= AFR_SH_MIN_PARTICIPANTS);
-}
-
-extern int
-sh_loop_finish (call_frame_t *loop_frame, xlator_t *this);
-
-int
-afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this);
-
-int
-afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_data_finish (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_data_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh->completion_cbk (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_data_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "flush failed on %s on subvolume %s: %s",
- local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_sh_data_done (frame, this);
- }
-
- return 0;
-}
-
-int
-afr_sh_data_close (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- int call_count = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- call_count = afr_set_elem_count_get (sh->success,
- priv->child_count);
- local->call_count = call_count;
-
- if (call_count == 0) {
- afr_sh_data_done (frame, this);
- return 0;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!sh->success[i])
- continue;
- gf_log (this->name, GF_LOG_DEBUG,
- "closing fd of %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_flush_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->flush,
- sh->healing_fd, NULL);
-
- if (!--call_count)
- break;
- }
+#include <glusterfs/byte-order.h>
+#include "protocol-common.h"
+#include "afr-messages.h"
+#include <glusterfs/events.h>
- return 0;
+#define HAS_HOLES(i) ((i->ia_blocks * 512) < (i->ia_size))
+static int
+__checksum_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, uint32_t weak, uint8_t *strong, dict_t *xdata)
+{
+ afr_local_t *local = NULL;
+ struct afr_reply *replies = NULL;
+ int i = (long)cookie;
+
+ local = frame->local;
+ replies = local->replies;
+
+ replies[i].valid = 1;
+ replies[i].op_ret = op_ret;
+ replies[i].op_errno = op_errno;
+ if (xdata) {
+ replies[i].buf_has_zeroes = dict_get_str_boolean(
+ xdata, "buf-has-zeroes", _gf_false);
+ replies[i].fips_mode_rchecksum = dict_get_str_boolean(
+ xdata, "fips-mode-rchecksum", _gf_false);
+ }
+ if (strong) {
+ if (replies[i].fips_mode_rchecksum) {
+ memcpy(local->replies[i].checksum, strong, SHA256_DIGEST_LENGTH);
+ } else {
+ memcpy(local->replies[i].checksum, strong, MD5_DIGEST_LENGTH);
+ }
+ }
+
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+static gf_boolean_t
+__afr_can_skip_data_block_heal(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks,
+ off_t offset, size_t size, struct iatt *poststat)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ unsigned char *wind_subvols = NULL;
+ gf_boolean_t checksum_match = _gf_true;
+ struct afr_reply *replies = NULL;
+ dict_t *xdata = NULL;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+ replies = local->replies;
+
+ xdata = dict_new();
+ if (!xdata)
+ goto out;
+ if (dict_set_int32_sizen(xdata, "check-zero-filled", 1)) {
+ dict_unref(xdata);
+ goto out;
+ }
+
+ wind_subvols = alloca0(priv->child_count);
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || healed_sinks[i])
+ wind_subvols[i] = 1;
+ }
+
+ AFR_ONLIST(wind_subvols, frame, __checksum_cbk, rchecksum, fd, offset, size,
+ xdata);
+ if (xdata)
+ dict_unref(xdata);
+
+ if (!replies[source].valid || replies[source].op_ret != 0)
+ return _gf_false;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source)
+ continue;
+ if (replies[i].valid) {
+ if (memcmp(replies[source].checksum, replies[i].checksum,
+ replies[source].fips_mode_rchecksum
+ ? SHA256_DIGEST_LENGTH
+ : MD5_DIGEST_LENGTH)) {
+ checksum_match = _gf_false;
+ break;
+ }
+ }
+ }
+
+ if (checksum_match) {
+ if (HAS_HOLES(poststat))
+ return _gf_true;
+
+ /* For non-sparse files, we might be better off writing the
+ * zeroes to sinks to avoid mismatch of disk-usage in bricks. */
+ if (local->replies[source].buf_has_zeroes)
+ return _gf_false;
+ else
+ return _gf_true;
+ }
+out:
+ return _gf_false;
+}
+
+static gf_boolean_t
+__afr_is_sink_zero_filled(xlator_t *this, fd_t *fd, size_t size, off_t offset,
+ int sink)
+{
+ afr_private_t *priv = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec *iovec = NULL;
+ int count = 0;
+ int ret = 0;
+ gf_boolean_t zero_filled = _gf_false;
+
+ priv = this->private;
+ ret = syncop_readv(priv->children[sink], fd, size, offset, 0, &iovec,
+ &count, &iobref, NULL, NULL, NULL);
+ if (ret < 0)
+ goto out;
+ ret = iov_0filled(iovec, count);
+ if (!ret)
+ zero_filled = _gf_true;
+out:
+ if (iovec)
+ GF_FREE(iovec);
+ if (iobref)
+ iobref_unref(iobref);
+ return zero_filled;
}
-int
-afr_sh_data_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
-{
-
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = (long) cookie;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "setattr failed on %s on subvolume %s: %s",
- local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- afr_sh_data_finish (frame, this);
- }
+static int
+__afr_selfheal_data_read_write(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks,
+ off_t offset, size_t size,
+ struct afr_reply *replies, int type)
+{
+ struct iovec *iovec = NULL;
+ int count = 0;
+ struct iobref *iobref = NULL;
+ int ret = 0;
+ int i = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ ret = syncop_readv(priv->children[source], fd, size, offset, 0, &iovec,
+ &count, &iobref, NULL, NULL, NULL);
+ if (ret <= 0)
+ return ret;
- return 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
+
+ /*
+ * TODO: Use fiemap() and discard() to heal holes
+ * in the future.
+ *
+ * For now,
+ *
+ * - if the source had any holes at all,
+ * AND
+ * - if we are writing past the original file size
+ * of the sink
+ * AND
+ * - is NOT the last block of the source file. if
+ * the block contains EOF, it has to be written
+ * in order to set the file size even if the
+ * last block is 0-filled.
+ * AND
+ * - if the read buffer is filled with only 0's
+ *
+ * then, skip writing to this source. We don't depend
+ * on the write to happen to update the size as we
+ * have performed an ftruncate() upfront anyways.
+ */
+#define is_last_block(o, b, s) ((s >= o) && (s <= (o + b)))
+ if (HAS_HOLES((&replies[source].poststat)) &&
+ offset >= replies[i].poststat.ia_size &&
+ !is_last_block(offset, size, replies[source].poststat.ia_size) &&
+ (iov_0filled(iovec, count) == 0))
+ continue;
+
+ /* Avoid filling up sparse regions of the sink with 0-filled
+ * writes.*/
+ if (type == AFR_SELFHEAL_DATA_FULL &&
+ HAS_HOLES((&replies[source].poststat)) &&
+ ((offset + size) <= replies[i].poststat.ia_size) &&
+ (iov_0filled(iovec, count) == 0) &&
+ __afr_is_sink_zero_filled(this, fd, size, offset, i)) {
+ continue;
+ }
+
+ ret = syncop_writev(priv->children[i], fd, iovec, count, offset, iobref,
+ 0, NULL, NULL, NULL, NULL);
+ if (ret != iov_length(iovec, count)) {
+ /* write() failed on this sink. unset the corresponding
+ member in sinks[] (which is healed_sinks[] in the
+ caller) so that this server does NOT get considered
+ as successfully healed.
+ */
+ healed_sinks[i] = 0;
+ }
+ }
+ if (iovec)
+ GF_FREE(iovec);
+ if (iobref)
+ iobref_unref(iobref);
+
+ return ret;
+}
+
+static gf_boolean_t
+afr_source_sinks_locked(xlator_t *this, unsigned char *locked_on, int source,
+ unsigned char *healed_sinks)
+{
+ afr_private_t *priv = this->private;
+ int i = 0;
+
+ if (!locked_on[source])
+ return _gf_false;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (healed_sinks[i] && locked_on[i])
+ return _gf_true;
+ }
+
+ return _gf_false;
}
-int
-afr_sh_data_setattr (call_frame_t *frame, xlator_t *this)
+static int
+afr_selfheal_data_block(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int source, unsigned char *healed_sinks, off_t offset,
+ size_t size, int type, struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- int i = 0;
- int call_count = 0;
- int source = 0;
- int32_t valid = 0;
- struct iatt stbuf = {0,};
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- valid |= (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME);
-
- stbuf.ia_atime = sh->buf[source].ia_atime;
- stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
- stbuf.ia_mtime = sh->buf[source].ia_mtime;
- stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
-
- call_count = afr_set_elem_count_get (sh->success,
- priv->child_count);
- local->call_count = call_count;
-
- if (call_count == 0) {
- GF_ASSERT (0);
- afr_sh_data_finish (frame, this);
- return 0;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!sh->success[i])
- continue;
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ unsigned char *data_lock = NULL;
- STACK_WIND_COOKIE (frame, afr_sh_data_setattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc, &stbuf, valid, NULL);
+ priv = this->private;
+ data_lock = alloca0(priv->child_count);
- if (!--call_count)
- break;
+ ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, offset, size,
+ data_lock);
+ {
+ if (!afr_source_sinks_locked(this, data_lock, source, healed_sinks)) {
+ ret = -ENOTCONN;
+ goto unlock;
}
- return 0;
-}
-
-int
-afr_sh_data_setattr_fstat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int child_index = (long) cookie;
-
- local = frame->local;
- sh = &local->self_heal;
-
- GF_ASSERT (sh->source == child_index);
- if (op_ret != -1) {
- sh->buf[child_index] = *buf;
- afr_sh_data_setattr (frame, this);
- } else {
- gf_log (this->name, GF_LOG_ERROR, "%s: Failed to set "
- "time-stamps after self-heal", local->loc.path);
- afr_sh_data_fail (frame, this);
+ if (type == AFR_SELFHEAL_DATA_DIFF &&
+ __afr_can_skip_data_block_heal(frame, this, fd, source,
+ healed_sinks, offset, size,
+ &replies[source].poststat)) {
+ ret = 0;
+ goto unlock;
}
- return 0;
-}
-
-/*
- * If there are any writes after the self-heal is triggered then the
- * stbuf stored in local->self_heal.buf[] will be invalid so we do one more
- * stat on the source and then set the [am]times
- */
-int
-afr_sh_set_timestamps (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_setattr_fstat_cbk,
- (void *) (long) sh->source,
- priv->children[sh->source],
- priv->children[sh->source]->fops->fstat,
- sh->healing_fd, NULL);
- return 0;
-}
-
-//Fun fact, lock_cbk is being used for both lock & unlock
-int
-afr_sh_data_unlock (call_frame_t *frame, xlator_t *this,
- afr_lock_cbk_t lock_cbk)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
-
- GF_ASSERT (sh->data_lock_held);
-
- sh->data_lock_held = _gf_false;
- int_lock->lock_cbk = lock_cbk;
- afr_unlock (frame, this);
-
- return 0;
+ ret = __afr_selfheal_data_read_write(
+ frame, this, fd, source, healed_sinks, offset, size, replies, type);
+ }
+unlock:
+ afr_selfheal_uninodelk(frame, this, fd->inode, this->name, offset, size,
+ data_lock);
+ return ret;
}
-int
-afr_sh_data_finish (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "finishing data selfheal of %s", local->loc.path);
-
- if (sh->data_lock_held)
- afr_sh_data_unlock (frame, this, afr_sh_data_close);
- else
- afr_sh_data_close (frame, this);
-
- return 0;
-}
-
-int
-afr_sh_data_fail (call_frame_t *frame, xlator_t *this)
+static int
+afr_selfheal_data_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *healed_sinks)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
- gf_log (this->name, GF_LOG_DEBUG,
- "finishing failed data selfheal of %s", local->loc.path);
+ local = frame->local;
+ priv = this->private;
- sh->op_failed = 1;
- if (sh->data_lock_held)
- afr_sh_data_unlock (frame, this, afr_sh_data_close);
- else
- afr_sh_data_close (frame, this);
+ if (!priv->ensure_durability)
return 0;
-}
-int
-afr_sh_data_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr, dict_t *xdata)
-{
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int32_t child_index = (long) cookie;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Erasing of pending change "
- "log failed on %s for subvol %s, reason: %s",
- local->loc.path, priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- }
+ AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, fsync, fd, 0, NULL);
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed) {
- if (sh->old_loop_frame)
- sh_loop_finish (sh->old_loop_frame, this);
- sh->old_loop_frame = NULL;
- afr_sh_data_fail (frame, this);
- goto out;
- }
- if (!IA_ISREG (sh->type)) {
- afr_sh_data_finish (frame, this);
- goto out;
- }
- GF_ASSERT (sh->old_loop_frame);
- afr_sh_data_lock (frame, this, 0, 0,
- afr_post_sh_big_lock_success,
- afr_post_sh_big_lock_failure);
- }
-out:
- return 0;
+ for (i = 0; i < priv->child_count; i++)
+ if (healed_sinks[i] && local->replies[i].op_ret != 0)
+ /* fsync() failed. Do NOT consider this server
+ as successfully healed. Mark it so.
+ */
+ healed_sinks[i] = 0;
+ return 0;
}
-int
-afr_sh_data_erase_pending (call_frame_t *frame, xlator_t *this)
+static int
+afr_data_self_heal_type_get(afr_private_t *priv, unsigned char *healed_sinks,
+ int source, struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_DATA_TRANSACTION);
- gf_log (this->name, GF_LOG_DEBUG, "Delta matrix for: %s",
- lkowner_utoa (&frame->root->lk_owner));
- afr_sh_print_pending_matrix (sh->delta_matrix, this);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- afr_sh_delta_to_xattr (this, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_DATA_TRANSACTION);
+ int type = AFR_SELFHEAL_DATA_FULL;
+ int i = 0;
- GF_ASSERT (call_count);
- local->call_count = call_count;
+ if (priv->data_self_heal_algorithm == AFR_SELFHEAL_DATA_DYNAMIC) {
+ type = AFR_SELFHEAL_DATA_FULL;
for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_data_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- sh->healing_fd,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i],
- NULL);
- if (!--call_count)
- break;
+ if (!healed_sinks[i] && i != source)
+ continue;
+ if (replies[i].poststat.ia_size) {
+ type = AFR_SELFHEAL_DATA_DIFF;
+ break;
+ }
}
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- GF_FREE (erase_xattr);
-
- return 0;
+ } else {
+ type = priv->data_self_heal_algorithm;
+ }
+ return type;
}
-
-static struct afr_sh_algorithm *
-sh_algo_from_name (xlator_t *this, char *name)
-{
- int i = 0;
-
- while (afr_self_heal_algorithms[i].name) {
- if (!strcmp (name, afr_self_heal_algorithms[i].name)) {
- return &afr_self_heal_algorithms[i];
- }
-
- i++;
- }
-
- return NULL;
-}
-
-
static int
-sh_zero_byte_files_exist (afr_local_t *local, int child_count)
+afr_selfheal_data_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
+ unsigned char *healed_sinks, struct afr_reply *replies)
{
- int i = 0;
- int ret = 0;
- afr_self_heal_t *sh = NULL;
-
- sh = &local->self_heal;
- for (i = 0; i < child_count; i++) {
- if (!local->child_up[i] || sh->child_errno[i])
- continue;
- if (sh->buf[i].ia_size == 0) {
- ret = 1;
- break;
- }
- }
-
- return ret;
-}
+ afr_private_t *priv = NULL;
+ off_t off = 0;
+ size_t block = 0;
+ int type = AFR_SELFHEAL_DATA_FULL;
+ int ret = -1;
+ call_frame_t *iter_frame = NULL;
+ unsigned char arbiter_sink_status = 0;
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "performing data selfheal on %s", uuid_utoa(fd->inode->gfid));
-struct afr_sh_algorithm *
-afr_sh_data_pick_algo (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t * priv = NULL;
- struct afr_sh_algorithm * algo = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- algo = sh_algo_from_name (this, priv->data_self_heal_algorithm);
-
- if (algo == NULL) {
- /* option not set, so fall back on heuristics */
-
- if (sh_zero_byte_files_exist (local, priv->child_count)
- || (sh->file_size <= (priv->data_self_heal_window_size *
- this->ctx->page_size))) {
-
- /*
- * If the file does not exist on one of the subvolumes,
- * or a zero-byte file exists (created by entry self-heal)
- * the entire content has to be copied anyway, so there
- * is no benefit from using the "diff" algorithm.
- *
- * If the file size is about the same as page size,
- * the entire file can be read and written with a few
- * (pipelined) STACK_WINDs, which will be faster
- * than "diff" which has to read checksums and then
- * read and write.
- */
-
- algo = sh_algo_from_name (this, "full");
-
- } else {
- algo = sh_algo_from_name (this, "diff");
- }
- }
-
- return algo;
-}
-
-
-int
-afr_sh_data_sync_prepare (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- struct afr_sh_algorithm *sh_algo = NULL;
+ priv = this->private;
+ if (priv->arbiter_count) {
+ arbiter_sink_status = healed_sinks[ARBITER_BRICK_INDEX];
+ healed_sinks[ARBITER_BRICK_INDEX] = 0;
+ }
- local = frame->local;
- sh = &local->self_heal;
+ block = 128 * 1024 * priv->data_self_heal_window_size;
- sh->algo_completion_cbk = afr_sh_data_erase_pending;
- sh->algo_abort_cbk = afr_sh_data_fail;
+ type = afr_data_self_heal_type_get(priv, healed_sinks, source, replies);
- sh_algo = afr_sh_data_pick_algo (frame, this);
+ iter_frame = afr_copy_frame(frame);
+ if (!iter_frame) {
+ ret = -ENOMEM;
+ goto out;
+ }
- sh->algo = sh_algo;
- sh_algo->fn (frame, this);
+ for (off = 0; off < replies[source].poststat.ia_size; off += block) {
+ if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
- return 0;
-}
+ ret = afr_selfheal_data_block(iter_frame, this, fd, source,
+ healed_sinks, off, block, type, replies);
+ if (ret < 0)
+ goto out;
-int
-afr_sh_data_trim_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int call_count = 0;
- int child_index = 0;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "ftruncate of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "ftruncate of %s on subvolume %s completed",
- local->loc.path,
- priv->children[child_index]->name);
- }
+ AFR_STACK_RESET(iter_frame);
+ if (iter_frame->local == NULL) {
+ ret = -ENOTCONN;
+ goto out;
}
- UNLOCK (&frame->lock);
+ }
- call_count = afr_frame_return (frame);
+ ret = afr_selfheal_data_fsync(frame, this, fd, healed_sinks);
- if (call_count == 0) {
- if (sh->op_failed)
- afr_sh_data_fail (frame, this);
- else
- afr_sh_data_sync_prepare (frame, this);
- }
+out:
+ if (arbiter_sink_status)
+ healed_sinks[ARBITER_BRICK_INDEX] = arbiter_sink_status;
- return 0;
+ if (iter_frame)
+ AFR_STACK_DESTROY(iter_frame);
+ return ret;
}
-
-int
-afr_sh_data_trim_sinks (call_frame_t *frame, xlator_t *this)
+static int
+__afr_selfheal_truncate_sinks(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *healed_sinks, uint64_t size)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- afr_self_heal_t *sh = NULL;
- int *sources = NULL;
- int call_count = 0;
- int i = 0;
-
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ local = frame->local;
+ priv = this->private;
- sources = sh->sources;
- call_count = sh->active_sinks;
+ /* This will send truncate on the arbiter brick as well if it is marked as
+ * sink. If changelog is enabled on the volume it captures truncate as a
+ * data transactions on the arbiter brick. This will help geo-rep to
+ * properly sync the data from master to slave if arbiter is the ACTIVE
+ * brick during syncing and which had got some entries healed for data as
+ * part of self heal.
+ */
+ AFR_ONLIST(healed_sinks, frame, afr_sh_generic_fop_cbk, ftruncate, fd, size,
+ NULL);
- local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++)
+ if (healed_sinks[i] && local->replies[i].op_ret == -1)
+ /* truncate() failed. Do NOT consider this server
+ as successfully healed. Mark it so.
+ */
+ healed_sinks[i] = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] || !local->child_up[i])
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_trim_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->ftruncate,
- sh->healing_fd, sh->file_size,
- NULL);
-
- if (!--call_count)
- break;
- }
-
- return 0;
+ return 0;
}
-int
-afr_sh_inode_set_read_ctx (afr_self_heal_t *sh, xlator_t *this)
+gf_boolean_t
+afr_has_source_witnesses(xlator_t *this, unsigned char *sources,
+ uint64_t *witness)
{
- afr_private_t *priv = NULL;
- int ret = 0;
- int i = 0;
-
- priv = this->private;
- sh->source = afr_sh_select_source (sh->sources, priv->child_count);
- if (sh->source < 0) {
- ret = -1;
- goto out;
- }
+ int i = 0;
+ afr_private_t *priv = NULL;
- /* detect changes not visible through pending flags -- JIC */
- for (i = 0; i < priv->child_count; i++) {
- if (i == sh->source || sh->child_errno[i])
- continue;
-
- if (SIZE_DIFFERS (&sh->buf[i], &sh->buf[sh->source]))
- sh->sources[i] = 0;
- }
+ priv = this->private;
- afr_reset_children (sh->fresh_children, priv->child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
-out:
- return ret;
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] && witness[i])
+ return _gf_true;
+ }
+ return _gf_false;
}
-void
-afr_sh_data_fix (call_frame_t *frame, xlator_t *this)
+static gf_boolean_t
+afr_does_size_mismatch(xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
{
- int source = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
- sh->block_size = this->ctx->page_size;
- sh->file_size = sh->buf[source].ia_size;
-
- if (FILE_HAS_HOLES (&sh->buf[source]))
- sh->file_has_holes = 1;
-
- if (sh->background && sh->unwind && !sh->unwound) {
- sh->unwind (sh->orig_frame, this, sh->op_ret, sh->op_errno,
- sh->op_failed);
- sh->unwound = _gf_true;
- }
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct iatt *min = NULL;
+ struct iatt *max = NULL;
- afr_sh_mark_source_sinks (frame, this);
- if (sh->active_sinks == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "no active sinks for performing self-heal on file %s",
- local->loc.path);
- afr_sh_data_finish (frame, this);
- return;
- }
+ priv = this->private;
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing file %s from subvolume %s to %d other",
- local->loc.path, priv->children[sh->source]->name,
- sh->active_sinks);
- afr_sh_data_trim_sinks (frame, this);
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
-int
-afr_sh_data_fxattrop_fstat_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int nsources = 0;
- int ret = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- gf_log (this->name, GF_LOG_DEBUG, "Pending matrix for: %s",
- lkowner_utoa (&frame->root->lk_owner));
-
- nsources = afr_build_sources (this, sh->xattr, sh->buf, sh->pending_matrix,
- sh->sources, sh->success_children,
- AFR_DATA_TRANSACTION, NULL, _gf_true);
- if ((nsources == 0) && !sh->sync_done) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No self-heal needed for %s",
- local->loc.path);
-
- local->govinda_gOvinda = 0;
- afr_sh_data_finish (frame, this);
- return 0;
- }
-
- if ((nsources == -1)
- && (priv->favorite_child != -1)
- && (sh->child_errno[priv->favorite_child] == 0)) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Picking favorite child %s as authentic source to "
- "resolve conflicting data of %s",
- priv->children[priv->favorite_child]->name,
- local->loc.path);
+ if (replies[i].op_ret < 0)
+ continue;
- sh->sources[priv->favorite_child] = 1;
+ if (!sources[i])
+ continue;
- nsources = afr_sh_source_count (sh->sources,
- priv->child_count);
- }
+ if (AFR_IS_ARBITER_BRICK(priv, i) && (replies[i].poststat.ia_size == 0))
+ continue;
- if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal contents of '%s' (possible "
- "split-brain). Please delete the file from all but "
- "the preferred subvolume.", local->loc.path);
+ if (!min)
+ min = &replies[i].poststat;
- local->govinda_gOvinda = 1;
- afr_sh_data_fail (frame, this);
- return 0;
- }
+ if (!max)
+ max = &replies[i].poststat;
- local->govinda_gOvinda = 0;
- ret = afr_sh_inode_set_read_ctx (sh, this);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No active sources found.");
+ if (min->ia_size > replies[i].poststat.ia_size)
+ min = &replies[i].poststat;
- afr_sh_data_fail (frame, this);
- return 0;
- }
+ if (max->ia_size < replies[i].poststat.ia_size)
+ max = &replies[i].poststat;
+ }
- if (sh->sync_done) {
- afr_sh_data_setattr (frame, this);
- } else {
- afr_sh_data_fix (frame, this);
- }
- return 0;
-}
+ if (min && max) {
+ if (min->ia_size != max->ia_size)
+ return _gf_true;
+ }
-int
-afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
- dict_t **xattr,
- afr_transaction_type txn_type,
- uuid_t gfid)
-{
- afr_private_t *priv = NULL;
- int read_child = -1;
- int32_t **pending_matrix = NULL;
- int32_t *sources = NULL;
- int32_t *success_children = NULL;
- struct iatt *bufs = NULL;
- int32_t nsources = 0;
- int32_t prev_read_child = -1;
- int32_t config_read_child = -1;
- int32_t subvol_status = 0;
-
- priv = this->private;
- bufs = local->cont.lookup.bufs;
- success_children = local->cont.lookup.success_children;
-
- pending_matrix = afr_matrix_create (priv->child_count,
- priv->child_count);
- if (NULL == pending_matrix)
- goto out;
-
- sources = local->cont.lookup.sources;
- memset (sources, 0, sizeof (*sources) * priv->child_count);
-
- nsources = afr_build_sources (this, xattr, bufs, pending_matrix,
- sources, success_children, txn_type,
- &subvol_status, _gf_false);
- if (subvol_status & SPLIT_BRAIN) {
- gf_log (this->name, GF_LOG_WARNING, "%s: Possible split-brain",
- local->loc.path);
- switch (txn_type) {
- case AFR_DATA_TRANSACTION:
- afr_set_split_brain (this,
- local->cont.lookup.inode,
- _gf_true);
- nsources = 1;
- sources[success_children[0]] = 1;
- break;
- case AFR_ENTRY_TRANSACTION:
- read_child = afr_get_no_xattr_dir_read_child (this,
- success_children,
- bufs);
- sources[read_child] = 1;
- nsources = 1;
- break;
- default:
- break;
- }
- }
- if (nsources < 0)
- goto out;
-
- prev_read_child = local->read_child_index;
- config_read_child = priv->read_child;
- read_child = afr_select_read_child_from_policy (success_children,
- priv->child_count,
- prev_read_child,
- config_read_child,
- sources,
- priv->hash_mode, gfid);
-out:
- afr_matrix_cleanup (pending_matrix, priv->child_count);
- gf_log (this->name, GF_LOG_DEBUG, "returning read_child: %d",
- read_child);
- return read_child;
+ return _gf_false;
}
-int
-afr_sh_data_special_file_fix (call_frame_t *frame, xlator_t *this)
+static void
+afr_mark_biggest_witness_as_source(xlator_t *this, unsigned char *sources,
+ uint64_t *witness)
{
- afr_private_t *priv = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ uint64_t biggest_witness = 0;
- for (i = 0; i < priv->child_count ; i++)
- if (1 == local->child_up[i])
- sh->success[i] = 1;
+ priv = this->private;
+ /* Find source with biggest witness count */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (biggest_witness < witness[i])
+ biggest_witness = witness[i];
+ }
- afr_sh_data_erase_pending (frame, this);
+ /* Mark files with less witness count as not source */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ if (witness[i] < biggest_witness)
+ sources[i] = 0;
+ }
- return 0;
+ return;
}
-int
-afr_sh_data_fstat_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct iatt *buf, dict_t *xdata)
+/* This is a tie breaker function. Only one source be assigned here */
+static void
+afr_mark_newest_file_as_source(xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int call_count = -1;
- int child_index = (long) cookie;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "fstat of %s on %s succeeded",
- local->loc.path,
- priv->children[child_index]->name);
-
- sh->buf[child_index] = *buf;
- sh->success_children[sh->success_count] = child_index;
- sh->success_count++;
- } else {
- gf_log (this->name, GF_LOG_ERROR, "%s: fstat failed "
- "on %s, reason %s", local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->child_errno[child_index] = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- /* Previous versions of glusterfs might have set
- * the pending data xattrs which need to be erased
- */
- if (!afr_sh_data_proceed (sh->success_count)) {
- gf_log (this->name, GF_LOG_ERROR, "inspecting metadata "
- "succeeded on < %d children, aborting "
- "self-heal for %s", AFR_SH_MIN_PARTICIPANTS,
- local->loc.path);
- afr_sh_data_fail (frame, this);
- goto out;
- }
- if (IA_ISREG (sh->type))
- afr_sh_data_fxattrop_fstat_done (frame, this);
- else
- afr_sh_data_special_file_fix (frame, this);
- }
-out:
- return 0;
-}
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ uint32_t max_ctime = 0;
+ priv = this->private;
+ /* Find source with latest ctime */
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
-int
-afr_sh_data_fstat (call_frame_t *frame, xlator_t *this)
-{
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- int child = 0;
- int32_t *fstat_children = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- fstat_children = memdup (sh->success_children,
- sizeof (*fstat_children) * priv->child_count);
- if (!fstat_children) {
- afr_sh_data_fail (frame, this);
- goto out;
+ if (max_ctime <= replies[i].poststat.ia_ctime) {
+ source = i;
+ max_ctime = replies[i].poststat.ia_ctime;
}
- call_count = sh->success_count;
- local->call_count = call_count;
+ }
- memset (sh->buf, 0, sizeof (*sh->buf) * priv->child_count);
- afr_reset_children (sh->success_children, priv->child_count);
- sh->success_count = 0;
- for (i = 0; i < priv->child_count; i++) {
- child = fstat_children[i];
- if (child == -1)
- break;
- STACK_WIND_COOKIE (frame, afr_sh_data_fstat_cbk,
- (void *) (long) child,
- priv->children[child],
- priv->children[child]->fops->fstat,
- sh->healing_fd, NULL);
- --call_count;
- }
- GF_ASSERT (!call_count);
-out:
- if (fstat_children)
- GF_FREE (fstat_children);
- return 0;
+ /* Only mark one of the files as source to break ties */
+ memset(sources, 0, sizeof(*sources) * priv->child_count);
+ sources[source] = 1;
}
-void
-afr_sh_common_fxattrop_resp_handler (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int child_index = (long) cookie;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "fxattrop of %s on %s succeeded",
- local->loc.path,
- priv->children[child_index]->name);
-
- sh->xattr[child_index] = dict_ref (xattr);
- sh->success_children[sh->success_count] = child_index;
- sh->success_count++;
- } else {
- gf_log (this->name, GF_LOG_ERROR, "fxattrop of %s "
- "failed on %s, reason %s", local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->child_errno[child_index] = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-}
-
-int
-afr_sh_data_fxattrop_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xattr, dict_t *xdata)
-{
- int call_count = -1;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- afr_sh_common_fxattrop_resp_handler (frame, cookie, this, op_ret,
- op_errno, xattr);
-
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- if (!afr_sh_data_proceed (sh->success_count)) {
- gf_log (this->name, GF_LOG_ERROR, "%s, inspecting "
- "change log succeeded on < %d children",
- local->loc.path, AFR_SH_MIN_PARTICIPANTS);
- afr_sh_data_fail (frame, this);
- goto out;
- }
- afr_sh_data_fstat (frame, this);
- }
-out:
- return 0;
-}
-
-
-int
-afr_sh_data_fxattrop (call_frame_t *frame, xlator_t *this)
-{
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- dict_t *xattr_req = NULL;
- int32_t *zero_pending = NULL;
- int call_count = 0;
- int i = 0;
- int ret = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- call_count = afr_up_children_count (local->child_up,
- priv->child_count);
-
- local->call_count = call_count;
-
- xattr_req = dict_new();
- if (!xattr_req) {
- ret = -1;
- goto out;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- zero_pending = GF_CALLOC (3, sizeof (*zero_pending),
- gf_afr_mt_int32_t);
- if (!zero_pending) {
- ret = -1;
- goto out;
- }
- ret = dict_set_dynptr (xattr_req, priv->pending_key[i],
- zero_pending,
- 3 * sizeof (*zero_pending));
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to set dict value");
- goto out;
- } else {
- zero_pending = NULL;
- }
- }
-
- afr_reset_xattr (sh->xattr, priv->child_count);
- afr_reset_children (sh->success_children, priv->child_count);
- memset (sh->child_errno, 0,
- sizeof (*sh->child_errno) * priv->child_count);
- sh->success_count = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_sh_data_fxattrop_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- sh->healing_fd, GF_XATTROP_ADD_ARRAY,
- xattr_req, NULL);
-
- if (!--call_count)
- break;
- }
- }
+static int
+__afr_selfheal_data_finalize_source(
+ call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on,
+ unsigned char *undid_pending, struct afr_reply *replies, uint64_t *witness)
+{
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int sources_count = 0;
+ priv = this->private;
+
+ sources_count = AFR_COUNT(sources, priv->child_count);
+
+ if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) ||
+ !sources_count) {
+ /* split brain */
+ source = afr_mark_split_brain_source_sinks(
+ frame, this, inode, sources, sinks, healed_sinks, locked_on,
+ replies, AFR_DATA_TRANSACTION);
+ if (source < 0) {
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;type=data;"
+ "file=%s",
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(inode->gfid));
+ return -EIO;
+ }
+
+ _afr_fav_child_reset_sink_xattrs(
+ frame, this, inode, source, healed_sinks, undid_pending,
+ AFR_DATA_TRANSACTION, locked_on, replies);
+ goto out;
+ }
+
+ /* No split brain at this point. If we were called from
+ * afr_heal_splitbrain_file(), abort.*/
+ if (afr_dict_contains_heal_op(frame))
+ return -EIO;
+
+ /* If there are no witnesses/size-mismatches on sources we are done*/
+ if (!afr_does_size_mismatch(this, sources, replies) &&
+ !afr_has_source_witnesses(this, sources, witness))
+ goto out;
+
+ afr_mark_largest_file_as_source(this, sources, replies);
+ afr_mark_biggest_witness_as_source(this, sources, witness);
+ afr_mark_newest_file_as_source(this, sources, replies);
+ if (priv->arbiter_count)
+ /* Choose non-arbiter brick as source for empty files. */
+ afr_mark_source_sinks_if_file_empty(this, sources, sinks, healed_sinks,
+ locked_on, replies,
+ AFR_DATA_TRANSACTION);
out:
- if (xattr_req)
- dict_unref (xattr_req);
+ afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
+ source = afr_choose_source_by_policy(priv, sources, AFR_DATA_TRANSACTION);
- if (ret) {
- if (zero_pending)
- GF_FREE (zero_pending);
- afr_sh_data_fail (frame, this);
- }
-
- return 0;
+ return source;
}
+/*
+ * __afr_selfheal_data_prepare:
+ *
+ * This function inspects the on-disk xattrs and determines which subvols
+ * are sources and sinks.
+ *
+ * The return value is the index of the subvolume to be used as the source
+ * for self-healing, or -1 if no healing is necessary/split brain.
+ */
int
-afr_sh_data_big_lock_success (call_frame_t *frame, xlator_t *this)
+__afr_selfheal_data_prepare(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ struct afr_reply *replies, unsigned char *pflag)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ uint64_t *witness = NULL;
- local = frame->local;
- sh = &local->self_heal;
+ priv = this->private;
- sh->data_lock_held = _gf_true;
- afr_sh_data_fxattrop (frame, this);
- return 0;
-}
+ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
-int
-afr_sh_data_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ if (ret)
+ return ret;
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
+ witness = alloca0(priv->child_count * sizeof(*witness));
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_DATA_TRANSACTION, locked_on, sources,
+ sinks, witness, pflag);
+ if (ret)
+ return ret;
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Blocking data inodelks "
- "failed for %s. by %s",
- local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count);
- sh->data_lock_failure_handler (frame, this);
- } else {
+ source = __afr_selfheal_data_finalize_source(
+ frame, this, inode, sources, sinks, healed_sinks, locked_on,
+ undid_pending, replies, witness);
+ if (source < 0)
+ return -EIO;
- gf_log (this->name, GF_LOG_DEBUG, "Blocking data inodelks "
- "done for %s by %s. Proceding to self-heal",
- local->loc.path, lkowner_utoa (&frame->root->lk_owner));
-
- sh->data_lock_success_handler (frame, this);
- }
-
- return 0;
+ return source;
}
-int
-afr_sh_data_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
- "failed for %s. by %s",
- local->loc.path, lkowner_utoa (&frame->root->lk_owner));
+static int
+__afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *locked_on)
+{
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
+ struct afr_reply *locked_replies = NULL;
+ int source = -1;
+ gf_boolean_t did_sh = _gf_true;
+ gf_boolean_t is_arbiter_the_only_sink = _gf_false;
+ gf_boolean_t empty_file = _gf_false;
+
+ priv = this->private;
+
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+ data_lock = alloca0(priv->child_count);
+ undid_pending = alloca0(priv->child_count);
+
+ locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, 0, 0,
+ data_lock);
+ {
+ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "self-heal as only %d number "
+ "of subvolumes "
+ "could be locked",
+ uuid_utoa(fd->inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_data_prepare(frame, this, fd->inode, data_lock,
+ sources, sinks, healed_sinks,
+ undid_pending, locked_replies, NULL);
+ if (ret < 0)
+ goto unlock;
+
+ if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
+ did_sh = _gf_false;
+ goto unlock;
+ }
+
+ source = ret;
+
+ if (AFR_IS_ARBITER_BRICK(priv, source)) {
+ empty_file = afr_is_file_empty_on_all_children(priv,
+ locked_replies);
+ if (empty_file)
+ goto restore_time;
+
+ did_sh = _gf_false;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_truncate_sinks(
+ frame, this, fd, healed_sinks,
+ locked_replies[source].poststat.ia_size);
+ if (ret < 0)
+ goto unlock;
+
+ if (priv->arbiter_count &&
+ AFR_COUNT(healed_sinks, priv->child_count) == 1 &&
+ healed_sinks[ARBITER_BRICK_INDEX]) {
+ is_arbiter_the_only_sink = _gf_true;
+ goto restore_time;
+ }
+ ret = 0;
+ }
+unlock:
+ afr_selfheal_uninodelk(frame, this, fd->inode, this->name, 0, 0, data_lock);
+ if (ret < 0)
+ goto out;
+
+ if (!did_sh)
+ goto out;
+
+ ret = afr_selfheal_data_do(frame, this, fd, source, healed_sinks,
+ locked_replies);
+ if (ret)
+ goto out;
+restore_time:
+ afr_selfheal_restore_time(frame, this, fd->inode, source, healed_sinks,
+ locked_replies);
+
+ if (!is_arbiter_the_only_sink && !empty_file) {
+ ret = afr_selfheal_inodelk(frame, this, fd->inode, this->name, 0, 0,
+ data_lock);
+ if (ret < priv->child_count) {
+ ret = -ENOTCONN;
+ did_sh = _gf_false;
+ goto skip_undo_pending;
+ }
+ }
+ ret = afr_selfheal_undo_pending(
+ frame, this, fd->inode, sources, sinks, healed_sinks, undid_pending,
+ AFR_DATA_TRANSACTION, locked_replies, data_lock);
+skip_undo_pending:
+ afr_selfheal_uninodelk(frame, this, fd->inode, this->name, 0, 0, data_lock);
+out:
- int_lock->lock_cbk = afr_sh_data_post_blocking_inodelk_cbk;
- afr_blocking_lock (frame, this);
- } else {
+ if (did_sh)
+ afr_log_selfheal(fd->inode->gfid, this, ret, "data", source, sources,
+ healed_sinks);
+ else
+ ret = 1;
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking data inodelks "
- "done for %s by %s. Proceeding to self-heal",
- local->loc.path, lkowner_utoa (&frame->root->lk_owner));
- sh->data_lock_success_handler (frame, this);
- }
+ if (locked_replies)
+ afr_replies_wipe(locked_replies, priv->child_count);
- return 0;
+ return ret;
}
int
-afr_sh_data_lock_rec (call_frame_t *frame, xlator_t *this, off_t start, off_t len)
+afr_selfheal_data_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
+ afr_local_t *local = NULL;
+ int i = (long)cookie;
- int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
- int_lock->selfheal_lk_type = AFR_DATA_SELF_HEAL_LK;
+ local = frame->local;
- afr_set_lock_number (frame, this);
+ local->replies[i].valid = 1;
+ local->replies[i].op_ret = op_ret;
+ local->replies[i].op_errno = op_errno;
- int_lock->lk_flock.l_start = start;
- int_lock->lk_flock.l_len = len;
- int_lock->lk_flock.l_type = F_WRLCK;
- int_lock->lock_cbk = afr_sh_data_post_nonblocking_inodelk_cbk;
+ syncbarrier_wake(&local->barrier);
- afr_nonblocking_inodelk (frame, this);
-
- return 0;
+ return 0;
}
int
-afr_post_sh_big_lock_success (call_frame_t *frame, xlator_t *this)
+afr_selfheal_data_open(xlator_t *this, inode_t *inode, fd_t **fd)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- GF_ASSERT (sh->old_loop_frame);
- sh_loop_finish (sh->old_loop_frame, this);
- sh->old_loop_frame = NULL;
- sh->data_lock_held = _gf_true;
- sh->sync_done = _gf_true;
- afr_sh_data_fxattrop (frame, this);
- return 0;
-}
+ int ret = 0;
+ fd_t *fd_tmp = NULL;
+ loc_t loc = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
-int
-afr_post_sh_big_lock_failure (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
+ priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
+ fd_tmp = fd_create(inode, 0);
+ if (!fd_tmp)
+ return -ENOMEM;
- GF_ASSERT (sh->old_loop_frame);
- sh_loop_finish (sh->old_loop_frame, this);
- sh->old_loop_frame = NULL;
- afr_sh_set_timestamps (frame, this);
- return 0;
-}
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ fd_unref(fd_tmp);
+ goto out;
+ }
+ local = frame->local;
-int
-afr_sh_data_lock (call_frame_t *frame, xlator_t *this,
- off_t start, off_t len,
- afr_lock_cbk_t success_handler,
- afr_lock_cbk_t failure_handler)
-{
- afr_local_t * local = NULL;
- afr_self_heal_t * sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_data_open_cbk, open, &loc,
+ O_RDWR | O_LARGEFILE, fd_tmp, NULL);
- sh->data_lock_success_handler = success_handler;
- sh->data_lock_failure_handler = failure_handler;
- return afr_sh_data_lock_rec (frame, this, start, len);
-}
+ ret = -ENOTCONN;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
-int
-afr_sh_data_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- /* TODO: some of the open's might fail.
- In that case, modify cleanup fn to send flush on those
- fd's which are already open */
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "open of %s failed on child %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "open of %s succeeded on child %s",
- local->loc.path,
- priv->children[child_index]->name);
- }
+ if (local->replies[i].op_ret < 0) {
+ ret = -local->replies[i].op_errno;
+ continue;
}
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_data_fail (frame, this);
- return 0;
- }
+ ret = 0;
+ break;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened, commencing sync",
- local->loc.path);
-
- afr_sh_data_lock (frame, this, 0, 0,
- afr_sh_data_big_lock_success,
- afr_sh_data_fail);
- }
+ if (ret < 0) {
+ fd_unref(fd_tmp);
+ goto out;
+ } else {
+ fd_bind(fd_tmp);
+ }
- return 0;
+ *fd = fd_tmp;
+out:
+ loc_wipe(&loc);
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return ret;
}
-
int
-afr_sh_data_open (call_frame_t *frame, xlator_t *this)
+afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd)
{
- int i = 0;
- int call_count = 0;
- fd_t *fd = NULL;
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t *sh = NULL;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ int ret = 0;
+ inode_t *inode = fd->inode;
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
+ priv = this->private;
- call_count = afr_up_children_count (local->child_up, priv->child_count);
- local->call_count = call_count;
+ locked_on = alloca0(priv->child_count);
- fd = fd_create (local->loc.inode, frame->root->pid);
- sh->healing_fd = fd;
-
- /* open sinks */
- for (i = 0; i < priv->child_count; i++) {
- if(!local->child_up[i])
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_data_open_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->open,
- &local->loc,
- O_RDWR|O_LARGEFILE, fd, NULL);
-
- if (!--call_count)
- break;
+ ret = afr_selfheal_tie_breaker_inodelk(frame, this, inode, priv->sh_domain,
+ 0, 0, locked_on);
+ {
+ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "self-heal as only %d number of "
+ "subvolumes could be locked",
+ uuid_utoa(fd->inode->gfid), ret);
+ /* Either less than two subvols available, or another
+ selfheal (from another server) is in progress. Skip
+ for now in any case there isn't anything to do.
+ */
+ ret = -ENOTCONN;
+ goto unlock;
}
- return 0;
-}
+ ret = __afr_selfheal_data(frame, this, fd, locked_on);
+ }
+unlock:
+ afr_selfheal_uninodelk(frame, this, inode, priv->sh_domain, 0, 0,
+ locked_on);
-
-int
-afr_self_heal_data (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = this->private;
-
- local = frame->local;
- sh = &local->self_heal;
-
- local->govinda_gOvinda = afr_is_split_brain (this, sh->inode);
-
- if (sh->do_data_self_heal &&
- afr_data_self_heal_enabled (priv->data_self_heal)) {
- afr_sh_data_open (frame, this);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "not doing data self heal on %s",
- local->loc.path);
- afr_sh_data_done (frame, this);
- }
-
- return 0;
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-entry.c b/xlators/cluster/afr/src/afr-self-heal-entry.c
index a23a74d72cd..64893f441e3 100644
--- a/xlators/cluster/afr/src/afr-self-heal-entry.c
+++ b/xlators/cluster/afr/src/afr-self-heal-entry.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,2404 +8,1269 @@
cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
-#include <unistd.h>
-#include <fnmatch.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <signal.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "inode.h"
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-
-#include "afr-transaction.h"
#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-#define AFR_INIT_SH_FRAME_VALS(_frame, _local, _sh, _sh_frame, _sh_local, _sh_sh)\
- do {\
- _local = _frame->local;\
- _sh = &_local->self_heal;\
- _sh_frame = _sh->sh_frame;\
- _sh_local = _sh_frame->local;\
- _sh_sh = &_sh_local->self_heal;\
- } while (0);
-
-int
-afr_sh_entry_impunge_create_file (call_frame_t *impunge_frame, xlator_t *this,
- int child_index);
-int
-afr_sh_entry_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- sh->completion_cbk (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_unlock (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_internal_lock_t *int_lock = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->lock_cbk = afr_sh_entry_done;
- afr_unlock (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_finish (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- gf_log (this->name, GF_LOG_TRACE,
- "finishing entry selfheal of %s", local->loc.path);
-
- afr_sh_entry_unlock (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr, dict_t *xdata)
-{
- long i = 0;
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_local_t *orig_local = NULL;
- call_frame_t *orig_frame = NULL;
- afr_private_t *priv = NULL;
- int32_t read_child = -1;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
- i = (long)cookie;
-
-
- afr_children_add_child (sh->fresh_children, i, priv->child_count);
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to erase pending xattrs on %s (%s)",
- local->loc.path, priv->children[i]->name,
- strerror (op_errno));
- }
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->source == -1) {
- //this happens if the forced merge option is set
- read_child = sh->fresh_children[0];
- } else {
- read_child = sh->source;
- }
- afr_inode_set_read_ctx (this, sh->inode, read_child,
- sh->fresh_children);
- orig_frame = sh->orig_frame;
- orig_local = orig_frame->local;
-
- if (sh->source != -1) {
- orig_local->cont.lookup.buf.ia_nlink = sh->buf[sh->source].ia_nlink;
- }
-
- afr_sh_entry_finish (frame, this);
- }
-
- return 0;
-}
-
+#include <glusterfs/byte-order.h>
+#include "afr-transaction.h"
+#include "afr-messages.h"
+#include <glusterfs/syncop-utils.h>
+#include <glusterfs/events.h>
int
-afr_sh_entry_erase_pending (call_frame_t *frame, xlator_t *this)
+afr_selfheal_entry_anon_inode(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child,
+ struct afr_reply *replies,
+ gf_boolean_t *anon_inode)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
- int need_unwind = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (sh->entries_skipped) {
- need_unwind = 1;
- sh->op_failed = _gf_true;
- goto out;
- }
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix, sh->success,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
- }
-
- if (call_count == 0)
- need_unwind = 1;
-
- afr_sh_delta_to_xattr (this, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_ENTRY_TRANSACTION);
-
- local->call_count = call_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i],
- NULL);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- GF_FREE (erase_xattr);
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ int i = 0;
+ char g[64] = {0};
+ unsigned char *lookup_success = NULL;
+ call_frame_t *frame = NULL;
+ loc_t loc2 = {
+ 0,
+ };
+ loc_t loc = {
+ 0,
+ };
+
+ priv = this->private;
+ subvol = priv->children[child];
+ lookup_success = alloca0(priv->child_count);
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (replies[child].poststat.ia_type == IA_IFDIR) {
+ /* This directory may have sub-directory hierarchy which may need to
+ * be preserved for subsequent heals. So unconditionally move the
+ * directory to anonymous-inode directory*/
+ *anon_inode = _gf_true;
+ goto anon_inode;
+ }
+
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ gf_uuid_copy(loc.gfid, replies[child].poststat.ia_gfid);
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ lookup_success[i] = 1;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ ret = -local->replies[i].op_errno;
+ }
+ }
+
+ if (priv->quorum_count) {
+ if (afr_has_quorum(lookup_success, this, NULL)) {
+ *anon_inode = _gf_true;
+ }
+ } else if (AFR_COUNT(lookup_success, priv->child_count) > 1) {
+ *anon_inode = _gf_true;
+ } else if (ret) {
+ goto out;
+ }
+
+anon_inode:
+ if (!*anon_inode) {
+ ret = 0;
+ goto out;
+ }
+
+ loc.parent = inode_ref(dir);
+ gf_uuid_copy(loc.pargfid, dir->gfid);
+ loc.name = name;
+
+ ret = afr_anon_inode_create(this, child, &loc2.parent);
+ if (ret < 0)
+ goto out;
+
+ loc2.name = g;
+ ret = syncop_rename(subvol, &loc, &loc2, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s failed",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "Rename to %s dir %s/%s (%s) on %s successful",
+ priv->anon_inode_name, uuid_utoa(dir->gfid), name, g,
+ subvol->name);
+ }
out:
- if (need_unwind)
- afr_sh_entry_finish (frame, this);
+ loc_wipe(&loc);
+ loc_wipe(&loc2);
+ if (frame) {
+ AFR_STACK_DESTROY(frame);
+ }
- return 0;
+ return ret;
}
-
-
-static int
-next_active_source (call_frame_t *frame, xlator_t *this,
- int current_active_source)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int source = -1;
- int next_active_source = -1;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- source = sh->source;
-
- if (source != -1) {
- if (current_active_source != source)
- next_active_source = source;
- goto out;
- }
-
- /*
- the next active sink becomes the source for the
- 'conservative decision' of merging all entries
- */
-
- for (i = 0; i < priv->child_count; i++) {
- if ((sh->sources[i] == 0)
- && (local->child_up[i] == 1)
- && (i > current_active_source)) {
-
- next_active_source = i;
- break;
- }
- }
-out:
- return next_active_source;
-}
-
-
-
-static int
-next_active_sink (call_frame_t *frame, xlator_t *this,
- int current_active_sink)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int next_active_sink = -1;
- int i = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- /*
- the next active sink becomes the source for the
- 'conservative decision' of merging all entries
- */
-
- for (i = 0; i < priv->child_count; i++) {
- if ((sh->sources[i] == 0)
- && (local->child_up[i] == 1)
- && (i > current_active_sink)) {
-
- next_active_sink = i;
- break;
- }
- }
-
- return next_active_sink;
-}
-
-int
-afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this);
-
-int
-afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src);
-
-int
-afr_sh_entry_expunge_entry_done (call_frame_t *frame, xlator_t *this,
- int active_src, int32_t op_ret,
- int32_t op_errno)
-{
- int call_count = 0;
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0)
- afr_sh_entry_expunge_subvol (frame, this, active_src);
-
- return 0;
-}
-
-int
-afr_sh_entry_expunge_parent_setattr_cbk (call_frame_t *expunge_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop,
- dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = (long) cookie;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
-
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "setattr on parent directory of %s on subvolume %s failed: %s",
- expunge_local->loc.path,
- priv->children[active_src]->name, strerror (op_errno));
- }
-
- AFR_STACK_DESTROY (expunge_frame);
- sh->expunge_done (frame, this, active_src, op_ret, op_errno);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_remove_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int active_src = 0;
- int32_t valid = 0;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
-
- active_src = (long) cookie;
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "removed %s on %s",
- expunge_local->loc.path,
- priv->children[active_src]->name);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "removing %s on %s failed (%s)",
- expunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- }
-
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_parent_setattr_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->setattr,
- &expunge_sh->parent_loc,
- &expunge_sh->parentbuf,
- valid, NULL);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_unlink (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
-
- gf_log (this->name, GF_LOG_TRACE,
- "expunging file %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->unlink,
- &expunge_local->loc, 0, NULL);
-
- return 0;
-}
-
-
-
int
-afr_sh_entry_expunge_rmdir (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "expunging directory %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_remove_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->rmdir,
- &expunge_local->loc, 1, NULL);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_remove (call_frame_t *expunge_frame, xlator_t *this,
- int active_src, struct iatt *buf,
- struct iatt *parentbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
- int type = 0;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
- loc_t *loc = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- local = frame->local;
- sh = &local->self_heal;
- loc = &expunge_local->loc;
-
- type = buf->ia_type;
- if (loc->parent && uuid_is_null (loc->parent->gfid))
- uuid_copy (loc->pargfid, parentbuf->ia_gfid);
-
- switch (type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- case IA_IFLNK:
- afr_sh_entry_expunge_unlink (expunge_frame, this, active_src);
- break;
+ char g[64] = {0};
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ gf_boolean_t anon_inode = _gf_false;
+
+ priv = this->private;
+ subvol = priv->children[child];
+
+ if ((!replies[child].valid) || (replies[child].op_ret < 0)) {
+ /*Nothing to do*/
+ ret = 0;
+ goto out;
+ }
+
+ if (priv->use_anon_inode) {
+ ret = afr_selfheal_entry_anon_inode(this, dir, name, inode, child,
+ replies, &anon_inode);
+ if (ret < 0 || anon_inode)
+ goto out;
+ }
+
+ loc.parent = inode_ref(dir);
+ loc.inode = inode_new(inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ loc.name = name;
+ switch (replies[child].poststat.ia_type) {
case IA_IFDIR:
- afr_sh_entry_expunge_rmdir (expunge_frame, this, active_src);
- break;
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging dir %s/%s (%s) on %s", uuid_utoa(dir->gfid), name,
+ uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
+ break;
default:
- gf_log (this->name, GF_LOG_ERROR,
- "%s has unknown file type on %s: 0%o",
- expunge_local->loc.path,
- priv->children[active_src]->name, type);
- goto out;
- break;
- }
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging file %s/%s (%s) on %s", uuid_utoa(dir->gfid),
+ name, uuid_utoa_r(replies[child].poststat.ia_gfid, g),
+ subvol->name);
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+ break;
+ }
- return 0;
out:
- AFR_STACK_DESTROY (expunge_frame);
- sh->expunge_done (frame, this, active_src, -1, EINVAL);
-
- return 0;
+ loc_wipe(&loc);
+ return ret;
}
-
int
-afr_sh_entry_expunge_lookup_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *x,
- struct iatt *postparent)
+afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
+ unsigned char *sources, inode_t *dir,
+ const char *name, inode_t *inode,
+ struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- call_frame_t *frame = NULL;
- int active_src = 0;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- active_src = (long) cookie;
- local = frame->local;
- sh = &local->self_heal;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "lookup of %s on %s failed (%s)",
- expunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ loc_t srcloc = {
+ 0,
+ };
+ loc_t anonloc = {
+ 0,
+ };
+ xlator_t *this = frame->this;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ struct iatt *iatt = NULL;
+ char *linkname = NULL;
+ mode_t mode = 0;
+ struct iatt newent = {
+ 0,
+ };
+ unsigned char *newentry = NULL;
+ char iatt_uuid_str[64] = {0};
+ char dir_uuid_str[64] = {0};
+
+ priv = this->private;
+ iatt = &replies[source].poststat;
+ uuid_utoa_r(iatt->ia_gfid, iatt_uuid_str);
+ if (iatt->ia_type == IA_INVAL || gf_uuid_is_null(iatt->ia_gfid)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SELF_HEAL_FAILED,
+ "Invalid ia_type (%d) or gfid(%s). source brick=%d, "
+ "pargfid=%s, name=%s",
+ iatt->ia_type, iatt_uuid_str, source,
+ uuid_utoa_r(dir->gfid, dir_uuid_str), name);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata)
+ return -ENOMEM;
+ newentry = alloca0(priv->child_count);
+ loc.parent = inode_ref(dir);
+ gf_uuid_copy(loc.pargfid, dir->gfid);
+ loc.name = name;
+ loc.inode = inode_ref(inode);
+
+ ret = afr_selfheal_entry_delete(this, dir, name, inode, dst, replies);
+ if (ret)
+ goto out;
+
+ ret = dict_set_gfuuid(xdata, "gfid-req", replies[source].poststat.ia_gfid,
+ true);
+ if (ret)
+ goto out;
+
+ srcloc.inode = inode_ref(inode);
+ gf_uuid_copy(srcloc.gfid, iatt->ia_gfid);
+ ret = syncop_lookup(priv->children[dst], &srcloc, 0, 0, 0, 0);
+ if (ret == -ENOENT || ret == -ESTALE) {
+ newentry[dst] = 1;
+ ret = afr_selfheal_newentry_mark(frame, this, inode, source, replies,
+ sources, newentry);
+ if (ret)
+ goto out;
+ } else if (ret == 0 && iatt->ia_type == IA_IFDIR && priv->use_anon_inode) {
+ // Try rename from hidden directory
+ ret = afr_anon_inode_create(this, dst, &anonloc.parent);
+ if (ret < 0)
+ goto out;
+ anonloc.inode = inode_ref(inode);
+ anonloc.name = iatt_uuid_str;
+ ret = syncop_rename(priv->children[dst], &anonloc, &loc, NULL, NULL);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = -1; /*This sets 'mismatch' to true*/
+ goto out;
+ }
+
+ mode = st_mode_from_ia(iatt->ia_prot, iatt->ia_type);
+
+ switch (iatt->ia_type) {
+ case IA_IFDIR:
+ ret = syncop_mkdir(priv->children[dst], &loc, mode, 0, xdata, NULL);
+ break;
+ case IA_IFLNK:
+ if (!newentry[dst]) {
+ ret = syncop_link(priv->children[dst], &srcloc, &loc, &newent,
+ NULL, NULL);
+ } else {
+ ret = syncop_readlink(priv->children[source], &srcloc,
+ &linkname, 4096, NULL, NULL);
+ if (ret <= 0)
+ goto out;
+ ret = syncop_symlink(priv->children[dst], &loc, linkname, NULL,
+ xdata, NULL);
+ }
+ break;
+ default:
+ ret = dict_set_int32_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+ if (ret)
goto out;
- }
-
- afr_sh_entry_expunge_remove (expunge_frame, this, active_src, buf,
- postparent);
+ ret = syncop_mknod(
+ priv->children[dst], &loc, mode,
+ makedev(ia_major(iatt->ia_rdev), ia_minor(iatt->ia_rdev)),
+ &newent, xdata, NULL);
+ break;
+ }
- return 0;
out:
- AFR_STACK_DESTROY (expunge_frame);
- sh->expunge_done (frame, this, active_src, op_ret, op_errno);
-
- return 0;
+ if (xdata)
+ dict_unref(xdata);
+ GF_FREE(linkname);
+ loc_wipe(&loc);
+ loc_wipe(&srcloc);
+ loc_wipe(&anonloc);
+ return ret;
}
-
-int
-afr_sh_entry_expunge_purge (call_frame_t *expunge_frame, xlator_t *this,
- int active_src)
-{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s",
- expunge_local->loc.path, priv->children[active_src]->name);
-
- STACK_WIND_COOKIE (expunge_frame, afr_sh_entry_expunge_lookup_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->lookup,
- &expunge_local->loc, NULL);
-
- return 0;
-}
-
-int
-afr_sh_entry_expunge_entry_cbk (call_frame_t *expunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *x,
- struct iatt *postparent)
+static int
+__afr_selfheal_heal_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, int source,
+ unsigned char *sources, unsigned char *healed_sinks,
+ unsigned char *locked_on, struct afr_reply *replies)
{
- afr_private_t *priv = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int source = 0;
- call_frame_t *frame = NULL;
- int active_src = 0;
- int need_expunge = 0;
- afr_self_heal_t *sh = NULL;
- afr_local_t *local = NULL;
-
- priv = this->private;
- expunge_local = expunge_frame->local;
- expunge_sh = &expunge_local->self_heal;
- frame = expunge_sh->sh_frame;
- active_src = expunge_sh->active_source;
- source = (long) cookie;
- local = frame->local;
- sh = &local->self_heal;
-
- if (op_ret == -1 && op_errno == ENOENT)
- need_expunge = 1;
- else if (op_ret == -1)
- goto out;
-
- if (!uuid_is_null (expunge_sh->entrybuf.ia_gfid) &&
- !uuid_is_null (buf->ia_gfid) &&
- (uuid_compare (expunge_sh->entrybuf.ia_gfid, buf->ia_gfid) != 0)) {
- char uuidbuf1[64];
- char uuidbuf2[64];
- gf_log (this->name, GF_LOG_DEBUG,
- "entry %s found on %s with mismatching gfid (%s/%s)",
- expunge_local->loc.path,
- priv->children[source]->name,
- uuid_utoa_r (expunge_sh->entrybuf.ia_gfid, uuidbuf1),
- uuid_utoa_r (buf->ia_gfid, uuidbuf2));
- need_expunge = 1;
- }
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
- if (need_expunge) {
- gf_log (this->name, GF_LOG_INFO,
- "missing entry %s on %s",
- expunge_local->loc.path,
- priv->children[source]->name);
+ priv = this->private;
- if (postparent)
- expunge_sh->parentbuf = *postparent;
+ if (!replies[source].valid)
+ return -EIO;
- afr_sh_entry_expunge_purge (expunge_frame, this, active_src);
-
- return 0;
- }
+ /* Skip healing this entry if the last lookup on it failed for reasons
+ * other than ENOENT.
+ */
+ if ((replies[source].op_ret < 0) && (replies[source].op_errno != ENOENT))
+ return -replies[source].op_errno;
-out:
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s exists under %s",
- expunge_local->loc.path,
- priv->children[source]->name);
+ if (replies[source].op_ret == 0) {
+ ret = afr_lookup_and_heal_gfid(this, fd->inode, name, inode, replies,
+ source, sources,
+ &replies[source].poststat.ia_gfid, NULL);
+ if (ret)
+ return ret;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
+ if (replies[source].op_ret == -1 &&
+ replies[source].op_errno == ENOENT) {
+ ret = afr_selfheal_entry_delete(this, fd->inode, name, inode, i,
+ replies);
} else {
- gf_log (this->name, GF_LOG_INFO,
- "looking up %s under %s failed (%s)",
- expunge_local->loc.path,
- priv->children[source]->name,
- strerror (op_errno));
- }
+ if (!gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[source].poststat.ia_gfid))
+ continue;
- AFR_STACK_DESTROY (expunge_frame);
- sh->expunge_done (frame, this, active_src, op_ret, op_errno);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_entry (call_frame_t *frame, xlator_t *this,
- gf_dirent_t *entry)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int ret = -1;
- call_frame_t *expunge_frame = NULL;
- afr_local_t *expunge_local = NULL;
- afr_self_heal_t *expunge_sh = NULL;
- int active_src = 0;
- int source = 0;
- int op_errno = 0;
- char *name = NULL;
- int op_ret = -1;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
- source = sh->source;
- sh->expunge_done = afr_sh_entry_expunge_entry_done;
-
- name = entry->d_name;
-
- if ((strcmp (name, ".") == 0)
- || (strcmp (name, "..") == 0)) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- name, local->loc.path);
- op_ret = 0;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "inspecting existence of %s under %s",
- name, local->loc.path);
-
- expunge_frame = copy_frame (frame);
- if (!expunge_frame) {
- op_errno = ENOMEM;
- goto out;
- }
-
- AFR_LOCAL_ALLOC_OR_GOTO (expunge_local, out);
-
- expunge_frame->local = expunge_local;
- expunge_sh = &expunge_local->self_heal;
- expunge_sh->sh_frame = frame;
- expunge_sh->active_source = active_src;
- expunge_sh->entrybuf = entry->d_stat;
- loc_copy (&expunge_sh->parent_loc, &local->loc);
-
- ret = afr_build_child_loc (this, &expunge_local->loc, &local->loc,
- name);
- if (ret != 0) {
- op_errno = EINVAL;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s", expunge_local->loc.path,
- priv->children[source]->name);
-
- STACK_WIND_COOKIE (expunge_frame,
- afr_sh_entry_expunge_entry_cbk,
- (void *) (long) source,
- priv->children[source],
- priv->children[source]->fops->lookup,
- &expunge_local->loc, NULL);
-
- ret = 0;
-out:
- if (ret == -1)
- sh->expunge_done (frame, this, active_src, op_ret, op_errno);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- gf_dirent_t *entry = NULL;
- off_t last_offset = 0;
- int active_src = 0;
- int entry_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "readdir of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir of %s on subvolume %s complete",
- local->loc.path,
- priv->children[active_src]->name);
- }
-
- afr_sh_entry_expunge_all (frame, this);
- return 0;
+ ret = afr_selfheal_recreate_entry(frame, i, source, sources,
+ fd->inode, name, inode, replies);
}
+ if (ret < 0)
+ break;
+ }
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- entry_count++;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "readdir'ed %d entries from %s",
- entry_count, priv->children[active_src]->name);
-
- sh->offset = last_offset;
- local->call_count = entry_count;
-
- list_for_each_entry (entry, &entries->list, list) {
- afr_sh_entry_expunge_entry (frame, this, entry);
- }
-
- return 0;
+ return ret;
}
-int
-afr_sh_entry_expunge_subvol (call_frame_t *frame, xlator_t *this,
- int active_src)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- STACK_WIND (frame, afr_sh_entry_expunge_readdir_cbk,
- priv->children[active_src],
- priv->children[active_src]->fops->readdirp,
- sh->healing_fd, sh->block_size, sh->offset, NULL);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_expunge_all (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int active_src = -1;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh->offset = 0;
-
- if (sh->source == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sources for %s to expunge entries",
- local->loc.path);
- goto out;
- }
-
- active_src = next_active_sink (frame, this, sh->active_source);
- sh->active_source = active_src;
-
- if (sh->op_failed) {
- goto out;
- }
-
- if (active_src == -1) {
- /* completed creating missing files on all subvolumes */
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "expunging entries of %s on %s to other sinks",
- local->loc.path, priv->children[active_src]->name);
-
- afr_sh_entry_expunge_subvol (frame, this, active_src);
-
- return 0;
-out:
- afr_sh_entry_impunge_all (frame, this);
- return 0;
-
-}
-
-
-int
-afr_sh_entry_impunge_entry_done (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- int call_count = 0;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- if (op_ret < 0)
- sh->entries_skipped = _gf_true;
- call_count = afr_frame_return (frame);
- if (call_count == 0)
- afr_sh_entry_impunge_subvol (frame, this);
-
- return 0;
-}
-
-void
-afr_sh_entry_call_impunge_done (call_frame_t *impunge_frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+static int
+afr_selfheal_detect_gfid_and_type_mismatch(xlator_t *this,
+ struct afr_reply *replies,
+ inode_t *inode, uuid_t pargfid,
+ char *bname, int src_idx,
+ unsigned char *locked_on, int *src)
{
- afr_local_t *impunge_local = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
-
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
-
- AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (frame, this, op_ret, op_errno);
+ int i = 0;
+ int ret = -1;
+ afr_private_t *priv = NULL;
+ void *gfid = NULL;
+ ia_type_t ia_type = IA_INVAL;
+
+ priv = this->private;
+ gfid = &replies[src_idx].poststat.ia_gfid;
+ ia_type = replies[src_idx].poststat.ia_type;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == src_idx)
+ continue;
+
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret != 0)
+ continue;
+
+ if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
+ continue;
+
+ if (replies[i].poststat.ia_type == IA_INVAL)
+ continue;
+
+ if (ia_type == IA_INVAL || gf_uuid_is_null(gfid)) {
+ src_idx = i;
+ ia_type = replies[src_idx].poststat.ia_type;
+ gfid = &replies[src_idx].poststat.ia_gfid;
+ continue;
+ }
+
+ if (gf_uuid_compare(gfid, replies[i].poststat.ia_gfid) &&
+ (ia_type == replies[i].poststat.ia_type)) {
+ ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
+ bname, src_idx, i, locked_on, src,
+ NULL);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "Skipping conservative merge on the "
+ "file.");
+ return ret;
+ }
+
+ if (ia_type != replies[i].poststat.ia_type) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_SPLIT_BRAIN,
+ "Type mismatch detected "
+ "for <gfid:%s>/%s>, %s on %s and %s on %s. "
+ "Skipping conservative merge on the file.",
+ uuid_utoa(pargfid), bname,
+ gf_inode_type_to_str(replies[i].poststat.ia_type),
+ priv->children[i]->name,
+ gf_inode_type_to_str(replies[src_idx].poststat.ia_type),
+ priv->children[src_idx]->name);
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;type=file;"
+ "file=<gfid:%s>/%s>;count=2;child-%d=%s;type-"
+ "%d=%s;child-%d=%s;type-%d=%s",
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(pargfid), bname, i, priv->children[i]->name, i,
+ gf_inode_type_to_str(replies[i].poststat.ia_type), src_idx,
+ priv->children[src_idx]->name, src_idx,
+ gf_inode_type_to_str(replies[src_idx].poststat.ia_type));
+ return -1;
+ }
+ }
+
+ return 0;
}
-int
-afr_sh_entry_impunge_setattr_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop,
- dict_t *xdata)
+static int
+__afr_selfheal_merge_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, unsigned char *sources,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on, struct afr_reply *replies)
{
- int call_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- int child_index = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- child_index = (long) cookie;
-
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setattr done for %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "setattr (%s) on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- }
-
- call_count = afr_frame_return (impunge_frame);
- if (call_count == 0) {
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- 0, op_errno);
- }
+ int ret = 0;
+ int i = 0;
+ int source = -1;
+ int src = -1;
+ afr_private_t *priv = NULL;
- return 0;
-}
+ priv = this->private;
-int
-afr_sh_entry_impunge_parent_setattr_cbk (call_frame_t *setattr_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop,
- dict_t *xdata)
-{
- int call_count = 0;
- afr_local_t *setattr_local = NULL;
-
- setattr_local = setattr_frame->local;
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_INFO,
- "setattr on parent directory (%s) failed: %s",
- setattr_local->loc.path, strerror (op_errno));
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ source = i;
+ break;
}
+ }
- call_count = afr_frame_return (setattr_frame);
- if (call_count == 0)
- AFR_STACK_DESTROY (setattr_frame);
+ if (source == -1) {
+ /* entry got deleted in the mean time? */
return 0;
-}
-
-int
-afr_sh_entry_impunge_setattr (call_frame_t *impunge_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_local_t *setattr_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *setattr_frame = NULL;
- int32_t valid = 0;
- int32_t op_errno = 0;
- int child_index = 0;
- int call_count = 0;
- int i = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "setting ownership of %s on %s to %d/%d",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- impunge_sh->entrybuf.ia_uid,
- impunge_sh->entrybuf.ia_gid);
-
- setattr_frame = copy_frame (impunge_frame);
- if (!setattr_frame) {
- op_errno = ENOMEM;
- goto out;
- }
- AFR_LOCAL_ALLOC_OR_GOTO (setattr_frame->local, out);
- setattr_local = setattr_frame->local;
- call_count = afr_errno_count (NULL, impunge_sh->child_errno,
- priv->child_count, 0);
- loc_copy (&setattr_local->loc, &impunge_sh->parent_loc);
- impunge_local->call_count = call_count;
- setattr_local->call_count = call_count;
+ }
+
+ /* Set all the sources as 1, otheriwse newentry_mark won't be set */
+ for (i = 0; i < priv->child_count; i++) {
+ if (replies[i].valid && replies[i].op_ret == 0) {
+ sources[i] = 1;
+ }
+ }
+
+ ret = afr_lookup_and_heal_gfid(this, fd->inode, name, inode, replies,
+ source, sources,
+ &replies[source].poststat.ia_gfid, NULL);
+ if (ret)
+ return ret;
+
+ /* In case of type mismatch / unable to resolve gfid mismatch on the
+ * entry, return -1.*/
+ ret = afr_selfheal_detect_gfid_and_type_mismatch(
+ this, replies, inode, fd->inode->gfid, name, source, locked_on, &src);
+
+ if (ret < 0)
+ return ret;
+ if (src != -1) {
+ source = src;
for (i = 0; i < priv->child_count; i++) {
- if (impunge_sh->child_errno[i])
- continue;
- valid = GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- STACK_WIND_COOKIE (setattr_frame,
- afr_sh_entry_impunge_parent_setattr_cbk,
- (void *) (long) i, priv->children[i],
- priv->children[i]->fops->setattr,
- &setattr_local->loc,
- &impunge_sh->parentbuf, valid, NULL);
-
- valid = GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
- STACK_WIND_COOKIE (impunge_frame,
- afr_sh_entry_impunge_setattr_cbk,
- (void *) (long) i, priv->children[i],
- priv->children[i]->fops->setattr,
- &impunge_local->loc,
- &impunge_sh->entrybuf, valid, NULL);
- call_count--;
- }
- GF_ASSERT (!call_count);
- return 0;
-out:
- if (setattr_frame)
- AFR_STACK_DESTROY (setattr_frame);
- afr_sh_entry_call_impunge_done (impunge_frame, this, 0, op_errno);
- return 0;
-}
-
-int
-afr_sh_entry_impunge_xattrop_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xattr, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- int child_index = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to perform xattrop on %s (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
+ if (i != src && replies[i].valid &&
+ gf_uuid_compare(replies[src].poststat.ia_gfid,
+ replies[i].poststat.ia_gfid)) {
+ sources[i] = 0;
+ }
}
+ }
- afr_sh_entry_impunge_setattr (impunge_frame, this);
- return 0;
-out:
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- -1, op_errno);
- return 0;
-}
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source || !healed_sinks[i])
+ continue;
-void
-afr_sh_prepare_new_entry_pending_matrix (int32_t **pending,
- int *child_errno,
- struct iatt *buf,
- unsigned int child_count)
-{
- int midx = 0;
- int idx = 0;
- int i = 0;
-
- midx = afr_index_for_transaction_type (AFR_METADATA_TRANSACTION);
- if (IA_ISDIR (buf->ia_type))
- idx = afr_index_for_transaction_type (AFR_ENTRY_TRANSACTION);
- else if (IA_ISREG (buf->ia_type))
- idx = afr_index_for_transaction_type (AFR_DATA_TRANSACTION);
- else
- idx = -1;
- for (i = 0; i < child_count; i++) {
- if (child_errno[i])
- continue;
- pending[i][midx] = hton32 (1);
- if (idx == -1)
- continue;
- pending[i][idx] = hton32 (1);
+ if (src != -1) {
+ if (!gf_uuid_compare(replies[src].poststat.ia_gfid,
+ replies[i].poststat.ia_gfid))
+ continue;
+ } else if (replies[i].op_errno != ENOENT) {
+ continue;
}
-}
-
-int
-afr_sh_entry_impunge_perform_xattrop (call_frame_t *impunge_frame,
- xlator_t *this)
-{
- int active_src = 0;
- dict_t *xattr = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int32_t op_errno = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
-
- afr_sh_prepare_new_entry_pending_matrix (impunge_local->pending,
- impunge_sh->child_errno,
- &impunge_sh->entrybuf,
- priv->child_count);
- xattr = dict_new ();
- if (!xattr) {
- op_errno = ENOMEM;
- goto out;
- }
-
- afr_set_pending_dict (priv, xattr, impunge_local->pending, active_src,
- LOCAL_LAST);
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_xattrop_cbk,
- (void *) (long) active_src,
- priv->children[active_src],
- priv->children[active_src]->fops->xattrop,
- &impunge_local->loc, GF_XATTROP_ADD_ARRAY, xattr, NULL);
+ ret |= afr_selfheal_recreate_entry(frame, i, source, sources, fd->inode,
+ name, inode, replies);
+ }
- if (xattr)
- dict_unref (xattr);
- return 0;
-out:
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- -1, op_errno);
- return 0;
+ return ret;
}
-int
-afr_sh_entry_impunge_newfile_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static int
+__afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *inode, int source,
+ unsigned char *sources, unsigned char *healed_sinks,
+ unsigned char *locked_on, struct afr_reply *replies)
{
- int call_count = 0;
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- impunge_sh->child_errno[child_index] = op_errno;
- gf_log (this->name, GF_LOG_ERROR,
- "creation of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- } else {
- impunge_sh->child_errno[child_index] = 0;
- }
-
- call_count = afr_frame_return (impunge_frame);
- if (call_count == 0) {
- if (!afr_errno_count (NULL, impunge_sh->child_errno,
- priv->child_count, 0)) {
- // new_file creation failed every where
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- -1, op_errno);
- goto out;
- }
- afr_sh_entry_impunge_perform_xattrop (impunge_frame, this);
- }
-out:
- return 0;
+ int ret = -1;
+
+ if (source < 0)
+ ret = __afr_selfheal_merge_dirent(frame, this, fd, name, inode, sources,
+ healed_sinks, locked_on, replies);
+ else
+ ret = __afr_selfheal_heal_dirent(frame, this, fd, name, inode, source,
+ sources, healed_sinks, locked_on,
+ replies);
+ return ret;
}
-int
-afr_sh_entry_impunge_hardlink_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static gf_boolean_t
+is_full_heal_marker_present(xlator_t *this, dict_t *xdata, int idx)
{
- int call_count = 0;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
-
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
-
- if (IA_IFLNK == impunge_sh->entrybuf.ia_type) {
- //For symlinks impunge is attempted un-conditionally
- //So the file can already exist.
- if ((op_ret < 0) && (op_errno == EEXIST))
- op_ret = 0;
- }
-
- call_count = afr_frame_return (impunge_frame);
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
-
- return 0;
+ int i = 0;
+ int pending[3] = {
+ 0,
+ };
+ void *pending_raw = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!xdata)
+ return _gf_false;
+
+ /* Iterate over each of the priv->pending_keys[] elements and then
+ * see if any of them have data segment non-zero. If they do, return
+ * true. Else return false.
+ */
+ for (i = 0; i < priv->child_count; i++) {
+ if (dict_get_ptr(xdata, priv->pending_key[i], &pending_raw))
+ continue;
+
+ if (!pending_raw)
+ continue;
+
+ memcpy(pending, pending_raw, sizeof(pending));
+ if (ntoh32(pending[idx]))
+ return _gf_true;
+ }
+
+ return _gf_false;
}
-int
-afr_sh_entry_impunge_hardlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
+static gf_boolean_t
+afr_need_full_heal(xlator_t *this, struct afr_reply *replies, int source,
+ unsigned char *healed_sinks, afr_transaction_type type)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- loc_t *loc = NULL;
- struct iatt *buf = NULL;
- loc_t oldloc = {0};
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- loc = &impunge_local->loc;
- buf = &impunge_sh->entrybuf;
-
- oldloc.inode = inode_ref (loc->inode);
- uuid_copy (oldloc.gfid, buf->ia_gfid);
- gf_log (this->name, GF_LOG_DEBUG, "linking missing file %s on %s",
- loc->path, priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_hardlink_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->link,
- &oldloc, loc, NULL);
- loc_wipe (&oldloc);
+ int i = 0;
+ int idx = 0;
+ afr_private_t *priv = NULL;
- return 0;
-}
+ priv = this->private;
-int
-afr_sh_nameless_lookup_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- if (op_ret < 0) {
- afr_sh_entry_impunge_create_file (impunge_frame, this,
- (long)cookie);
- } else {
- afr_sh_entry_impunge_hardlink (impunge_frame, this,
- (long)cookie);
- }
- return 0;
-}
+ if (!priv->esh_granular)
+ return _gf_true;
-int
-afr_sh_entry_impunge_check_hardlink (call_frame_t *impunge_frame,
- xlator_t *this,
- int child_index, struct iatt *stbuf)
-{
- afr_private_t *priv = NULL;
- call_frame_t *frame = NULL;
- afr_local_t *impunge_local = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- afr_self_heal_t *sh = NULL;
- loc_t *loc = NULL;
- dict_t *xattr_req = NULL;
- loc_t oldloc = {0};
- int ret = -1;
-
- priv = this->private;
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
- loc = &impunge_local->loc;
-
- xattr_req = dict_new ();
- if (!xattr_req)
- goto out;
- oldloc.inode = inode_ref (loc->inode);
- uuid_copy (oldloc.gfid, stbuf->ia_gfid);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_nameless_lookup_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->lookup,
- &oldloc, xattr_req);
- ret = 0;
-out:
- if (xattr_req)
- dict_unref (xattr_req);
- loc_wipe (&oldloc);
- if (ret)
- sh->impunge_done (frame, this, -1, ENOMEM);
- return 0;
-}
+ if (type != AFR_ENTRY_TRANSACTION)
+ return _gf_true;
-int
-afr_sh_entry_impunge_mknod (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *stbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- dict_t *dict = NULL;
- int ret = 0;
+ priv = this->private;
+ idx = afr_index_for_transaction_type(AFR_DATA_TRANSACTION);
- priv = this->private;
- impunge_local = impunge_frame->local;
+ /* If there is a clear source, check whether the full-heal-indicator
+ * is present in its xdata. Otherwise, we need to examine all the
+ * participating bricks and then figure if *even* one of them has a
+ * full-heal-indicator.
+ */
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing file %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
+ if (source != -1) {
+ if (is_full_heal_marker_present(this, replies[source].xdata, idx))
+ return _gf_true;
+ }
- dict = dict_new ();
- if (!dict)
- gf_log (this->name, GF_LOG_ERROR, "Out of memory");
+ /* else ..*/
- GF_ASSERT (!uuid_is_null (stbuf->ia_gfid));
- ret = afr_set_dict_gfid (dict, stbuf->ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
- impunge_local->loc.path);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->mknod,
- &impunge_local->loc,
- st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
- makedev (ia_major (stbuf->ia_rdev),
- ia_minor (stbuf->ia_rdev)), 0, dict);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!healed_sinks[i])
+ continue;
- if (dict)
- dict_unref (dict);
+ if (is_full_heal_marker_present(this, replies[i].xdata, idx))
+ return _gf_true;
+ }
- return 0;
+ return _gf_false;
}
-
-
-int
-afr_sh_entry_impunge_mkdir (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *stbuf)
+static int
+__afr_selfheal_entry_finalize_source(xlator_t *this, unsigned char *sources,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ uint64_t *witness)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- dict_t *dict = NULL;
-
- int ret = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int sources_count = 0;
+ int i = 0;
- dict = dict_new ();
- if (!dict) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- return 0;
- }
-
- GF_ASSERT (!uuid_is_null (stbuf->ia_gfid));
- ret = afr_set_dict_gfid (dict, stbuf->ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_INFO, "%s: gfid set failed",
- impunge_local->loc.path);
+ priv = this->private;
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing directory %s on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
+ sources_count = AFR_COUNT(sources, priv->child_count);
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->mkdir,
- &impunge_local->loc,
- st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type),
- 0, dict);
+ if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) ||
+ !sources_count || afr_does_witness_exist(this, witness)) {
+ memset(sources, 0, sizeof(*sources) * priv->child_count);
+ afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
+ return -1;
+ }
- if (dict)
- dict_unref (dict);
+ source = afr_choose_source_by_policy(priv, sources, AFR_ENTRY_TRANSACTION);
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_symlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, const char *linkname)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- dict_t *dict = NULL;
- struct iatt *buf = NULL;
- int ret = 0;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
-
- buf = &impunge_local->cont.symlink.buf;
-
- dict = dict_new ();
- if (!dict) {
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- -1, ENOMEM);
- goto out;
+ /*If the selected source does not blame any other brick, then mark
+ * everything as sink to trigger conservative merge.
+ */
+ if (source != -1 && !AFR_COUNT(healed_sinks, priv->child_count)) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_on[i]) {
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
}
+ return -1;
+ }
- GF_ASSERT (!uuid_is_null (buf->ia_gfid));
- ret = afr_set_dict_gfid (dict, buf->ia_gfid);
- if (ret)
- gf_log (this->name, GF_LOG_INFO,
- "%s: dict set gfid failed",
- impunge_local->loc.path);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "creating missing symlink %s -> %s on %s",
- impunge_local->loc.path, linkname,
- priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_newfile_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->symlink,
- linkname, &impunge_local->loc, 0, dict);
-
- if (dict)
- dict_unref (dict);
-out:
- return 0;
+ return source;
}
-
int
-afr_sh_entry_impunge_symlink_unlink_cbk (call_frame_t *impunge_frame,
- void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+__afr_selfheal_entry_prepare(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ struct afr_reply *replies, int *source_p,
+ unsigned char *pflag)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = -1;
- int call_count = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "unlink of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- goto out;
- }
-
- afr_sh_entry_impunge_symlink (impunge_frame, this, child_index,
- impunge_sh->linkname);
-
- return 0;
-out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
-
- return 0;
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+
+ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
+ if (ret)
+ return ret;
+
+ witness = alloca0(sizeof(*witness) * priv->child_count);
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_ENTRY_TRANSACTION, locked_on, sources,
+ sinks, witness, pflag);
+ if (ret)
+ return ret;
+
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count);
+
+ source = __afr_selfheal_entry_finalize_source(this, sources, healed_sinks,
+ locked_on, replies, witness);
+
+ if (source < 0) {
+ /* If source is < 0 (typically split-brain), we perform a
+ conservative merge of entries rather than erroring out */
+ }
+ *source_p = source;
+
+ return ret;
}
-
-int
-afr_sh_entry_impunge_symlink_unlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
+static int
+afr_selfheal_entry_dirent(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *name, inode_t *parent_idx_inode,
+ xlator_t *subvol, gf_boolean_t full_crawl)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "unlinking symlink %s with wrong target on %s",
- impunge_local->loc.path,
- priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_symlink_unlink_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->unlink,
- &impunge_local->loc, 0, NULL);
-
+ int ret = 0;
+ int source = -1;
+ unsigned char *locked_on = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ inode_t *inode = NULL;
+ struct afr_reply *replies = NULL;
+ struct afr_reply *par_replies = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+
+ priv = this->private;
+
+ if (afr_is_private_directory(priv, fd->inode->gfid, name,
+ GF_CLIENT_PID_SELF_HEALD)) {
return 0;
+ }
+
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
+ ret = dict_set_int32_sizen(xattr, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ dict_unref(xattr);
+ return -1;
+ }
+
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+ locked_on = alloca0(priv->child_count);
+
+ replies = alloca0(priv->child_count * sizeof(*replies));
+ par_replies = alloca0(priv->child_count * sizeof(*par_replies));
+
+ ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
+ locked_on);
+ {
+ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "entry self-heal as only %d sub-volumes "
+ " could be locked in %s domain",
+ uuid_utoa(fd->inode->gfid), ret, this->name);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_entry_prepare(frame, this, fd->inode, locked_on,
+ sources, sinks, healed_sinks,
+ par_replies, &source, NULL);
+ if (ret < 0)
+ goto unlock;
+
+ inode = afr_selfheal_unlocked_lookup_on(frame, fd->inode, name, replies,
+ locked_on, xattr);
+ if (!inode) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_entry_dirent(frame, this, fd, name, inode, source,
+ sources, healed_sinks, locked_on,
+ replies);
+
+ if ((ret == 0) && (priv->esh_granular) && parent_idx_inode) {
+ ret = afr_shd_entry_purge(subvol, parent_idx_inode, name,
+ inode->ia_type);
+ /* Why is ret force-set to 0? We do not care about
+ * index purge failing for full heal as it is quite
+ * possible during replace-brick that not all files
+ * and directories have their name indices present in
+ * entry-changes/.
+ */
+ ret = 0;
+ }
+ }
+
+unlock:
+ afr_selfheal_unentrylk(frame, this, fd->inode, this->name, NULL, locked_on,
+ NULL);
+ if (inode)
+ inode_unref(inode);
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+ if (par_replies)
+ afr_replies_wipe(par_replies, priv->child_count);
+ if (xattr)
+ dict_unref(xattr);
+
+ return ret;
}
-
-int
-afr_sh_entry_impunge_readlink_sink_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *linkname, struct iatt *sbuf, dict_t *xdata)
+static inode_t *
+afr_shd_entry_changes_index_inode(xlator_t *this, xlator_t *subvol,
+ uuid_t pargfid)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = -1;
- int call_count = -1;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
-
- child_index = (long) cookie;
-
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_INFO,
- "readlink of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- goto out;
- }
-
- /* symlink doesn't exist on the sink */
-
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- afr_sh_entry_impunge_symlink (impunge_frame, this,
- child_index, impunge_sh->linkname);
- return 0;
- }
-
-
- /* symlink exists on the sink, so check if targets match */
-
- if (strcmp (linkname, impunge_sh->linkname) == 0) {
- /* targets match, nothing to do */
-
- goto out;
- } else {
- /*
- * Hah! Sneaky wolf in sheep's clothing!
- */
- afr_sh_entry_impunge_symlink_unlink (impunge_frame, this,
- child_index);
- return 0;
- }
+ int ret = -1;
+ void *index_gfid = NULL;
+ loc_t rootloc = {
+ 0,
+ };
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ inode_t *inode = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+
+ rootloc.inode = inode_ref(this->itable->root);
+ gf_uuid_copy(rootloc.gfid, rootloc.inode->gfid);
+
+ ret = syncop_getxattr(subvol, &rootloc, &xattr,
+ GF_XATTROP_ENTRY_CHANGES_GFID, NULL, NULL);
+ if (ret || !xattr) {
+ errno = -ret;
+ goto out;
+ }
+
+ ret = dict_get_ptr(xattr, GF_XATTROP_ENTRY_CHANGES_GFID, &index_gfid);
+ if (ret) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ loc.inode = inode_new(this->itable);
+ if (!loc.inode) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_copy(loc.pargfid, index_gfid);
+ loc.name = gf_strdup(uuid_utoa(pargfid));
+
+ ret = syncop_lookup(subvol, &loc, &iatt, NULL, NULL, NULL);
+ if (ret < 0) {
+ errno = -ret;
+ goto out;
+ }
+
+ inode = inode_link(loc.inode, NULL, NULL, &iatt);
out:
- LOCK (&impunge_frame->lock);
- {
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
+ if (xattr)
+ dict_unref(xattr);
+ loc_wipe(&rootloc);
+ GF_FREE((char *)loc.name);
+ loc_wipe(&loc);
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
-
- return 0;
+ return inode;
}
-
-int
-afr_sh_entry_impunge_readlink_sink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "checking symlink target of %s on %s",
- impunge_local->loc.path, priv->children[child_index]->name);
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_sink_cbk,
- (void *) (long) child_index,
- priv->children[child_index],
- priv->children[child_index]->fops->readlink,
- &impunge_local->loc, 4096, NULL);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_readlink_cbk (call_frame_t *impunge_frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *linkname, struct iatt *sbuf, dict_t *xdata)
+static int
+afr_selfheal_entry_do_subvol(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int child)
{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int child_index = -1;
- int call_count = -1;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
-
- child_index = (long) cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "readlink of %s on %s failed (%s)",
- impunge_local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- goto out;
- }
-
- impunge_sh->linkname = gf_strdup (linkname);
- afr_sh_entry_impunge_readlink_sink (impunge_frame, this, child_index);
-
- return 0;
-
-out:
- LOCK (&impunge_frame->lock);
+ int ret = 0;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ off_t offset = 0;
+ call_frame_t *iter_frame = NULL;
+ xlator_t *subvol = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t mismatch = _gf_false;
+ afr_local_t *local = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ priv = this->private;
+ subvol = priv->children[child];
+
+ INIT_LIST_HEAD(&entries.list);
+
+ local = frame->local;
+
+ iter_frame = afr_copy_frame(frame);
+ if (!iter_frame)
+ return -ENOMEM;
+
+ loc.inode = afr_shd_entry_changes_index_inode(this, subvol,
+ fd->inode->gfid);
+
+ while ((ret = syncop_readdir(subvol, fd, 131072, offset, &entries, NULL,
+ NULL))) {
+ if (ret > 0)
+ ret = 0;
+ list_for_each_entry(entry, &entries.list, list)
{
- call_count = --impunge_local->call_count;
- }
- UNLOCK (&impunge_frame->lock);
-
- if (call_count == 0)
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_readlink (call_frame_t *impunge_frame, xlator_t *this,
- int child_index, struct iatt *stbuf)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int active_src = -1;
-
- priv = this->private;
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- active_src = impunge_sh->active_source;
- impunge_local->cont.symlink.buf = *stbuf;
-
- STACK_WIND_COOKIE (impunge_frame, afr_sh_entry_impunge_readlink_cbk,
- (void *) (long) child_index,
- priv->children[active_src],
- priv->children[active_src]->fops->readlink,
- &impunge_local->loc, 4096, NULL);
-
- return 0;
-}
-
-int
-afr_sh_entry_impunge_create (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
-{
- call_frame_t *frame = NULL;
- afr_local_t *impunge_local = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- ia_type_t type = IA_INVAL;
- int active_src = 0;
- struct iatt *buf = NULL;
-
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
- active_src = impunge_sh->active_source;
- afr_update_loc_gfids (&impunge_local->loc, &impunge_sh->entrybuf,
- &impunge_sh->parentbuf);
-
- buf = &impunge_sh->entrybuf;
- type = buf->ia_type;
-
- switch (type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- case IA_IFLNK:
- afr_sh_entry_impunge_check_hardlink (impunge_frame, this,
- child_index, buf);
- break;
- case IA_IFDIR:
- afr_sh_entry_impunge_mkdir (impunge_frame, this,
- child_index, buf);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "%s has unknown file type on %s: 0%o",
- impunge_local->loc.path,
- priv->children[active_src]->name, type);
- sh->impunge_done (frame, this, -1, EINVAL);
- break;
- }
+ offset = entry->d_off;
- return 0;
-}
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ continue;
-int
-afr_sh_entry_impunge_create_file (call_frame_t *impunge_frame, xlator_t *this,
- int child_index)
-{
- call_frame_t *frame = NULL;
- afr_local_t *impunge_local = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- ia_type_t type = IA_INVAL;
- int active_src = 0;
- struct iatt *buf = NULL;
-
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
- active_src = impunge_sh->active_source;
- buf = &impunge_sh->entrybuf;
- type = buf->ia_type;
-
- switch (type) {
- case IA_IFSOCK:
- case IA_IFREG:
- case IA_IFBLK:
- case IA_IFCHR:
- case IA_IFIFO:
- afr_sh_entry_impunge_mknod (impunge_frame, this,
- child_index, buf);
+ ret = afr_selfheal_entry_dirent(iter_frame, this, fd, entry->d_name,
+ loc.inode, subvol,
+ local->need_full_crawl);
+ AFR_STACK_RESET(iter_frame);
+ if (iter_frame->local == NULL) {
+ ret = -ENOTCONN;
break;
- case IA_IFLNK:
- afr_sh_entry_impunge_readlink (impunge_frame, this,
- child_index, buf);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR,
- "%s has unknown file type on %s: 0%o",
- impunge_local->loc.path,
- priv->children[active_src]->name, type);
- sh->impunge_done (frame, this, -1, EINVAL);
+ }
+
+ if (ret == -1) {
+ /* gfid or type mismatch. */
+ mismatch = _gf_true;
+ ret = 0;
+ }
+ if (ret)
break;
}
- return 0;
-}
-
-gf_boolean_t
-afr_sh_need_recreate (afr_self_heal_t *impunge_sh, unsigned int child,
- unsigned int child_count)
-{
- gf_boolean_t recreate = _gf_false;
-
- GF_ASSERT (impunge_sh->child_errno);
-
- if (child == impunge_sh->active_source)
- goto out;
+ gf_dirent_free(&entries);
+ if (ret)
+ break;
+ }
- if (IA_IFLNK == impunge_sh->entrybuf.ia_type) {
- recreate = _gf_true;
- goto out;
- }
+ loc_wipe(&loc);
- if (impunge_sh->child_errno[child] == ENOENT)
- recreate = _gf_true;
-out:
- return recreate;
+ AFR_STACK_DESTROY(iter_frame);
+ if (mismatch == _gf_true)
+ /* undo pending will be skipped */
+ ret = -1;
+ return ret;
}
-unsigned int
-afr_sh_recreate_count (afr_self_heal_t *impunge_sh, int *sources,
- unsigned int child_count)
+static int
+afr_selfheal_entry_granular_dirent(xlator_t *subvol, gf_dirent_t *entry,
+ loc_t *parent, void *data)
{
- int count = 0;
- int i = 0;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ afr_granular_esh_args_t *args = data;
+
+ /* Look up the actual inode associated with entry. If the lookup returns
+ * ESTALE or ENOENT, then it means we have a stale index. Remove it.
+ * This is analogous to the check in afr_shd_index_heal() except that
+ * here it is achieved through LOOKUP and in afr_shd_index_heal() through
+ * a GETXATTR.
+ */
+
+ loc.inode = inode_new(args->xl->itable);
+ loc.parent = inode_ref(args->heal_fd->inode);
+ gf_uuid_copy(loc.pargfid, loc.parent->gfid);
+ loc.name = entry->d_name;
+
+ ret = syncop_lookup(args->xl, &loc, &iatt, NULL, NULL, NULL);
+ if ((ret == -ENOENT) || (ret == -ESTALE)) {
+ /* The name indices under the pgfid index dir are guaranteed
+ * to be regular files. Hence the hardcoding.
+ */
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, IA_IFREG);
+ ret = 0;
+ goto out;
+ }
+ /* TBD: afr_shd_zero_xattrop? */
- for (i = 0; i < child_count; i++) {
- if (afr_sh_need_recreate (impunge_sh, i, child_count))
- count++;
- }
+ ret = afr_selfheal_entry_dirent(args->frame, args->xl, args->heal_fd,
+ entry->d_name, parent->inode, subvol,
+ _gf_false);
+ AFR_STACK_RESET(args->frame);
+ if (args->frame->local == NULL)
+ ret = -ENOTCONN;
- return count;
-}
+ if (ret == -1)
+ args->mismatch = _gf_true;
-int
-afr_sh_entry_call_impunge_recreate (call_frame_t *impunge_frame,
- xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- unsigned int recreate_count = 0;
- int i = 0;
- int active_src = 0;
-
- priv = this->private;
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
- active_src = impunge_sh->active_source;
- impunge_sh->entrybuf = impunge_sh->buf[active_src];
- impunge_sh->parentbuf = impunge_sh->parentbufs[active_src];
- recreate_count = afr_sh_recreate_count (impunge_sh, sh->sources,
- priv->child_count);
- if (!recreate_count) {
- afr_sh_entry_call_impunge_done (impunge_frame, this, 0, 0);
- goto out;
- }
- impunge_local->call_count = recreate_count;
- for (i = 0; i < priv->child_count; i++) {
- if (!impunge_local->child_up[i]) {
- impunge_sh->child_errno[i] = ENOTCONN;
- continue;
- }
- if (!afr_sh_need_recreate (impunge_sh, i, priv->child_count)) {
- impunge_sh->child_errno[i] = EEXIST;
- continue;
- }
- }
- for (i = 0; i < priv->child_count; i++) {
- if (!afr_sh_need_recreate (impunge_sh, i, priv->child_count))
- continue;
- (void)afr_sh_entry_impunge_create (impunge_frame, this, i);
- recreate_count--;
- }
- GF_ASSERT (!recreate_count);
out:
- return 0;
+ loc_wipe(&loc);
+ return 0;
}
-void
-afr_sh_entry_common_lookup_done (call_frame_t *impunge_frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- afr_private_t *priv = NULL;
- afr_local_t *impunge_local = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- call_frame_t *frame = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- unsigned int gfid_miss_count = 0;
- unsigned int children_up_count = 0;
- uuid_t gfid = {0};
- int active_src = 0;
-
- priv = this->private;
- AFR_INIT_SH_FRAME_VALS (impunge_frame, impunge_local, impunge_sh,
- frame, local, sh);
- active_src = impunge_sh->active_source;
-
- if (op_ret < 0)
- goto done;
- if (impunge_sh->child_errno[active_src]) {
- op_ret = -1;
- op_errno = impunge_sh->child_errno[active_src];
- goto done;
- }
-
- gfid_miss_count = afr_gfid_missing_count (this->name,
- impunge_sh->success_children,
- impunge_sh->buf, priv->child_count,
- impunge_local->loc.path);
- children_up_count = afr_up_children_count (impunge_local->child_up,
- priv->child_count);
- if ((gfid_miss_count == children_up_count) &&
- (children_up_count < priv->child_count)) {
- op_ret = -1;
- op_errno = ENODATA;
- gf_log (this->name, GF_LOG_ERROR, "Not all children are up, "
- "gfid should not be assigned in this state for %s",
- impunge_local->loc.path);
- goto done;
- }
-
- if (gfid_miss_count) {
- afr_update_gfid_from_iatts (gfid, impunge_sh->buf,
- impunge_sh->success_children,
- priv->child_count);
- if (uuid_is_null (gfid)) {
- sh->entries_skipped = _gf_true;
- gf_log (this->name, GF_LOG_INFO, "%s: Skipping entry "
- "self-heal because of gfid absence",
- impunge_local->loc.path);
- goto done;
- }
- afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
- afr_sh_entry_common_lookup_done, gfid,
- AFR_LOOKUP_FAIL_CONFLICTS |
- AFR_LOOKUP_FAIL_MISSING_GFIDS,
- NULL);
- } else {
- afr_sh_entry_call_impunge_recreate (impunge_frame, this);
- }
- return;
-done:
- afr_sh_entry_call_impunge_done (impunge_frame, this,
- op_ret, op_errno);
- return;
-}
-
-int
-afr_sh_entry_impunge_entry (call_frame_t *frame, xlator_t *this,
- gf_dirent_t *entry)
+static int
+afr_selfheal_entry_granular(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int subvol_idx, gf_boolean_t is_src)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_self_heal_t *impunge_sh = NULL;
- int ret = -1;
- call_frame_t *impunge_frame = NULL;
- afr_local_t *impunge_local = NULL;
- int active_src = 0;
- int op_errno = 0;
- int op_ret = -1;
-
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
- sh->impunge_done = afr_sh_entry_impunge_entry_done;
-
- if ((strcmp (entry->d_name, ".") == 0)
- || (strcmp (entry->d_name, "..") == 0)) {
-
- gf_log (this->name, GF_LOG_TRACE,
- "skipping inspection of %s under %s",
- entry->d_name, local->loc.path);
- op_ret = 0;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "inspecting existence of %s under %s",
- entry->d_name, local->loc.path);
-
- ret = afr_impunge_frame_create (frame, this, active_src,
- &impunge_frame);
- if (ret) {
- op_errno = -ret;
- goto out;
- }
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ xlator_t *subvol = NULL;
+ afr_private_t *priv = NULL;
+ afr_granular_esh_args_t args = {
+ 0,
+ };
+
+ priv = this->private;
+ subvol = priv->children[subvol_idx];
+
+ args.frame = afr_copy_frame(frame);
+ if (!args.frame)
+ goto out;
+ args.xl = this;
+ /* args.heal_fd represents the fd associated with the original directory
+ * on which entry heal is being attempted.
+ */
+ args.heal_fd = fd;
+
+ /* @subvol here represents the subvolume of AFR where
+ * indices/entry-changes/<pargfid> will be processed
+ */
+ loc.inode = afr_shd_entry_changes_index_inode(this, subvol,
+ fd->inode->gfid);
+ if (!loc.inode) {
+ /* If granular heal failed on the sink (as it might sometimes
+ * because it is the src that would mostly contain the granular
+ * changelogs and the sink's entry-changes would be empty),
+ * do not treat heal as failure.
+ */
+ if (is_src)
+ ret = -errno;
+ else
+ ret = 0;
+ goto out;
+ }
- impunge_local = impunge_frame->local;
- impunge_sh = &impunge_local->self_heal;
- ret = afr_build_child_loc (this, &impunge_local->loc, &local->loc,
- entry->d_name);
- loc_copy (&impunge_sh->parent_loc, &local->loc);
- if (ret != 0) {
- op_errno = ENOMEM;
- goto out;
- }
+ ret = syncop_dir_scan(subvol, &loc, GF_CLIENT_PID_SELF_HEALD, &args,
+ afr_selfheal_entry_granular_dirent);
- afr_sh_common_lookup (impunge_frame, this, &impunge_local->loc,
- afr_sh_entry_common_lookup_done, NULL,
- AFR_LOOKUP_FAIL_CONFLICTS, NULL);
+ loc_wipe(&loc);
- op_ret = 0;
+ if (args.mismatch == _gf_true)
+ ret = -1;
out:
- if (ret) {
- if (impunge_frame)
- AFR_STACK_DESTROY (impunge_frame);
- sh->impunge_done (frame, this, op_ret, op_errno);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- gf_dirent_t *entry = NULL;
- off_t last_offset = 0;
- int active_src = 0;
- int entry_count = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- active_src = sh->active_source;
-
- if (op_ret <= 0) {
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "readdir of %s on subvolume %s failed (%s)",
- local->loc.path,
- priv->children[active_src]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir of %s on subvolume %s complete",
- local->loc.path,
- priv->children[active_src]->name);
- }
-
- afr_sh_entry_impunge_all (frame, this);
- return 0;
- }
-
- list_for_each_entry (entry, &entries->list, list) {
- last_offset = entry->d_off;
- entry_count++;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir'ed %d entries from %s",
- entry_count, priv->children[active_src]->name);
-
- sh->offset = last_offset;
- local->call_count = entry_count;
-
- list_for_each_entry (entry, &entries->list, list) {
- afr_sh_entry_impunge_entry (frame, this, entry);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_impunge_subvol (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int32_t active_src = 0;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
- active_src = sh->active_source;
- gf_log (this->name, GF_LOG_DEBUG, "%s: readdir from offset %zd",
- local->loc.path, sh->offset);
-
- STACK_WIND (frame, afr_sh_entry_impunge_readdir_cbk,
- priv->children[active_src],
- priv->children[active_src]->fops->readdirp,
- sh->healing_fd, sh->block_size, sh->offset, NULL);
-
- return 0;
+ if (args.frame)
+ AFR_STACK_DESTROY(args.frame);
+ return ret;
}
-
-int
-afr_sh_entry_impunge_all (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- int active_src = -1;
-
- priv = this->private;
- local = frame->local;
- sh = &local->self_heal;
-
- sh->offset = 0;
-
- active_src = next_active_source (frame, this, sh->active_source);
- sh->active_source = active_src;
-
- if (sh->op_failed) {
- afr_sh_entry_finish (frame, this);
- return 0;
- }
-
- if (active_src == -1) {
- /* completed creating missing files on all subvolumes */
- afr_sh_entry_erase_pending (frame, this);
- return 0;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "impunging entries of %s on %s to other sinks",
- local->loc.path, priv->children[active_src]->name);
-
- afr_sh_entry_impunge_subvol (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_entry_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- /* TODO: some of the open's might fail.
- In that case, modify cleanup fn to send flush on those
- fd's which are already open */
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "opendir of %s failed on child %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
- sh->op_failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if (sh->op_failed) {
- afr_sh_entry_finish (frame, this);
- return 0;
- }
- gf_log (this->name, GF_LOG_TRACE,
- "fd for %s opened, commencing sync",
- local->loc.path);
-
- sh->active_source = -1;
- afr_sh_entry_expunge_all (frame, this);
- }
-
- return 0;
-}
-
-
-int
-afr_sh_entry_open (call_frame_t *frame, xlator_t *this)
+static int
+afr_selfheal_entry_do(call_frame_t *frame, xlator_t *this, fd_t *fd, int source,
+ unsigned char *sources, unsigned char *healed_sinks)
{
- int i = 0;
- int call_count = 0;
-
- int source = -1;
- int *sources = NULL;
-
- fd_t *fd = NULL;
-
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = local->self_heal.source;
- sources = local->self_heal.sources;
-
- sh->block_size = 65536; //131072
- sh->offset = 0;
-
- call_count = sh->active_sinks;
- if (source != -1)
- call_count++;
-
- local->call_count = call_count;
-
- fd = fd_create (local->loc.inode, frame->root->pid);
- sh->healing_fd = fd;
-
- if (source != -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "opening directory %s on subvolume %s (source)",
- local->loc.path, priv->children[source]->name);
-
- /* open source */
- STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
- (void *) (long) source,
- priv->children[source],
- priv->children[source]->fops->opendir,
- &local->loc, fd, NULL);
- call_count--;
- }
-
- /* open sinks */
- for (i = 0; i < priv->child_count; i++) {
- if (sources[i] || !local->child_up[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "opening directory %s on subvolume %s (sink)",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_entry_opendir_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->opendir,
- &local->loc, fd, NULL);
-
- if (!--call_count)
- break;
- }
-
- return 0;
-}
-
+ int i = 0;
+ int ret = 0;
+ gf_boolean_t mismatch = _gf_false;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "performing entry selfheal on %s", uuid_utoa(fd->inode->gfid));
+
+ for (i = 0; i < priv->child_count; i++) {
+ /* Expunge */
+ if (!healed_sinks[i])
+ continue;
+
+ if (!local->need_full_crawl)
+ /* Why call afr_selfheal_entry_granular() on a "healed sink",
+ * given that it is the source that contains the granular
+ * indices?
+ * If the index for this directory is non-existent or empty on
+ * this subvol (=> clear sink), then it will return early
+ * without failure status.
+ * If the index is non-empty and it is yet a 'healed sink', then
+ * it is due to a split-brain in which case we anyway need to
+ * crawl the indices/entry-changes/pargfid directory.
+ */
+ ret = afr_selfheal_entry_granular(frame, this, fd, i, _gf_false);
+ else
+ ret = afr_selfheal_entry_do_subvol(frame, this, fd, i);
-int
-afr_sh_entry_sync_prepare (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- afr_sh_mark_source_sinks (frame, this);
- if (source != -1)
- sh->success[source] = 1;
-
- if (sh->active_sinks == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "no active sinks for self-heal on dir %s",
- local->loc.path);
- afr_sh_entry_finish (frame, this);
- return 0;
- }
- if (source == -1 && sh->active_sinks < 2) {
- gf_log (this->name, GF_LOG_TRACE,
- "cannot sync with 0 sources and 1 sink on dir %s",
- local->loc.path);
- afr_sh_entry_finish (frame, this);
- return 0;
+ if (ret == -1) {
+ /* gfid or type mismatch. */
+ mismatch = _gf_true;
+ ret = 0;
}
+ if (ret)
+ break;
+ }
- if (source != -1)
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing directory %s from subvolume %s to "
- "%d other",
- local->loc.path, priv->children[source]->name,
- sh->active_sinks);
+ if (!ret && source != -1) {
+ /* Impunge */
+ if (local->need_full_crawl)
+ ret = afr_selfheal_entry_do_subvol(frame, this, fd, source);
else
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sources for %s found. "
- "merging all entries as a conservative decision",
- local->loc.path);
-
- afr_sh_entry_open (frame, this);
-
- return 0;
+ ret = afr_selfheal_entry_granular(frame, this, fd, source,
+ _gf_true);
+ }
+
+ if (mismatch == _gf_true)
+ /* undo pending will be skipped */
+ ret = -1;
+ return ret;
}
-
-void
-afr_sh_entry_fix (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+static int
+__afr_selfheal_entry(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
- int nsources = 0;
- int32_t subvol_status = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (op_ret < 0) {
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- afr_sh_entry_finish (frame, this);
- goto out;
- }
-
- if (sh->forced_merge) {
- sh->source = -1;
- goto heal;
- }
-
- nsources = afr_build_sources (this, sh->xattr, sh->buf,
- sh->pending_matrix, sh->sources,
- sh->success_children,
- AFR_ENTRY_TRANSACTION, &subvol_status,
- _gf_true);
- if ((subvol_status & ALL_FOOLS) ||
- (subvol_status & SPLIT_BRAIN)) {
- gf_log (this->name, GF_LOG_INFO, "%s: Performing conservative "
- "merge", local->loc.path);
- source = -1;
- memset (sh->sources, 0,
- sizeof (*sh->sources) * priv->child_count);
- } else if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_entry_finish (frame, this);
- return;
- } else {
- source = afr_sh_select_source (sh->sources, priv->child_count);
- }
-
- sh->source = source;
-
- afr_reset_children (sh->fresh_children, priv->child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- if (sh->source >= 0)
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
-
-heal:
- afr_sh_entry_sync_prepare (frame, this);
+ int ret = -1;
+ int source = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *postop_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
+ struct afr_reply *locked_replies = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t did_sh = _gf_true;
+
+ priv = this->private;
+ local = frame->local;
+
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+ undid_pending = alloca0(priv->child_count);
+ data_lock = alloca0(priv->child_count);
+ postop_lock = alloca0(priv->child_count);
+
+ locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
+ data_lock);
+ {
+ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "entry self-heal as only %d sub-volumes could "
+ "be locked in %s domain",
+ uuid_utoa(fd->inode->gfid), ret, this->name);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_entry_prepare(frame, this, fd->inode, data_lock,
+ sources, sinks, healed_sinks,
+ locked_replies, &source, NULL);
+ if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
+ did_sh = _gf_false;
+ goto unlock;
+ }
+
+ local->need_full_crawl = afr_need_full_heal(
+ this, locked_replies, source, healed_sinks, AFR_ENTRY_TRANSACTION);
+ }
+unlock:
+ afr_selfheal_unentrylk(frame, this, fd->inode, this->name, NULL, data_lock,
+ NULL);
+ if (ret < 0)
+ goto out;
+
+ if (!did_sh)
+ goto out;
+
+ ret = afr_selfheal_entry_do(frame, this, fd, source, sources, healed_sinks);
+ if (ret)
+ goto out;
+
+ /* Take entrylks in xlator domain before doing post-op (undo-pending) in
+ * entry self-heal. This is to prevent a parallel name self-heal on
+ * an entry under @fd->inode from reading pending xattrs while it is
+ * being modified by SHD after entry sh below, given that
+ * name self-heal takes locks ONLY in xlator domain and is free to read
+ * pending changelog in the absence of the following locking.
+ */
+ ret = afr_selfheal_entrylk(frame, this, fd->inode, this->name, NULL,
+ postop_lock);
+ {
+ if (AFR_CMP(data_lock, postop_lock, priv->child_count) != 0) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "post-op after entry self-heal as %d "
+ "sub-volumes, as opposed to %d, "
+ "could be locked in %s domain",
+ uuid_utoa(fd->inode->gfid), ret,
+ AFR_COUNT(data_lock, priv->child_count), this->name);
+ ret = -ENOTCONN;
+ goto postop_unlock;
+ }
+
+ afr_selfheal_restore_time(frame, this, fd->inode, source, healed_sinks,
+ locked_replies);
+ ret = afr_selfheal_undo_pending(
+ frame, this, fd->inode, sources, sinks, healed_sinks, undid_pending,
+ AFR_ENTRY_TRANSACTION, locked_replies, postop_lock);
+ }
+postop_unlock:
+ afr_selfheal_unentrylk(frame, this, fd->inode, this->name, NULL,
+ postop_lock, NULL);
out:
- return;
+ if (did_sh)
+ afr_log_selfheal(fd->inode->gfid, this, ret, "entry", source, sources,
+ healed_sinks);
+ else
+ ret = 1;
+
+ if (locked_replies)
+ afr_replies_wipe(locked_replies, priv->child_count);
+ return ret;
}
-int
-afr_sh_post_nonblocking_entry_cbk (call_frame_t *frame, xlator_t *this)
+static fd_t *
+afr_selfheal_data_opendir(xlator_t *this, inode_t *inode)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
- sh = &local->self_heal;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Non Blocking entrylks "
- "failed for %s.", local->loc.path);
- sh->op_failed = 1;
- afr_sh_entry_done (frame, this);
- } else {
-
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking entrylks done "
- "for %s. Proceeding to FOP", local->loc.path);
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_entry_fix, NULL,
- AFR_LOOKUP_FAIL_CONFLICTS |
- AFR_LOOKUP_FAIL_MISSING_GFIDS,
- NULL);
- }
-
- return 0;
+ loc_t loc = {
+ 0,
+ };
+ int ret = 0;
+ fd_t *fd = NULL;
+
+ fd = fd_create(inode, 0);
+ if (!fd)
+ return NULL;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ ret = syncop_opendir(this, &loc, fd, NULL, NULL);
+ if (ret) {
+ fd_unref(fd);
+ fd = NULL;
+ } else {
+ fd_bind(fd);
+ }
+
+ loc_wipe(&loc);
+ return fd;
}
int
-afr_self_heal_entry (call_frame_t *frame, xlator_t *this)
+afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
-
- priv = this->private;
- local = frame->local;
-
- if (local->self_heal.do_entry_self_heal && priv->entry_self_heal) {
- afr_sh_entrylk (frame, this, &local->loc, NULL,
- afr_sh_post_nonblocking_entry_cbk);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "proceeding to completion on %s",
- local->loc.path);
- afr_sh_entry_done (frame, this);
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ unsigned char *locked_on = NULL;
+ fd_t *fd = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ fd = afr_selfheal_data_opendir(this, inode);
+ if (!fd)
+ return -EIO;
+
+ locked_on = alloca0(priv->child_count);
+
+ ret = afr_selfheal_tie_breaker_entrylk(frame, this, inode, priv->sh_domain,
+ NULL, locked_on);
+ {
+ if (ret < priv->child_count) {
+ gf_msg_debug(this->name, 0,
+ "%s: Skipping "
+ "entry self-heal as only %d sub-volumes could "
+ "be locked in %s domain",
+ uuid_utoa(fd->inode->gfid), ret, priv->sh_domain);
+ /* Either less than two subvols available, or another
+ selfheal (from another server) is in progress. Skip
+ for now in any case there isn't anything to do.
+ */
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_entry(frame, this, fd, locked_on);
+ }
+unlock:
+ afr_selfheal_unentrylk(frame, this, inode, priv->sh_domain, NULL, locked_on,
+ NULL);
+
+ if (fd)
+ fd_unref(fd);
+
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-metadata.c b/xlators/cluster/afr/src/afr-self-heal-metadata.c
index 1bb22a09b79..03f43bad16e 100644
--- a/xlators/cluster/afr/src/afr-self-heal-metadata.c
+++ b/xlators/cluster/afr/src/afr-self-heal-metadata.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,592 +8,539 @@
cases as published by the Free Software Foundation.
*/
-#include <libgen.h>
-#include <unistd.h>
-#include <fnmatch.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <signal.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
#include "afr.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "list.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat-errno.h"
-#include "compat.h"
-#include "byte-order.h"
-
-#include "afr-transaction.h"
#include "afr-self-heal.h"
-#include "afr-self-heal-common.h"
-
-
-int
-afr_sh_metadata_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
-
- local = frame->local;
- sh = &local->self_heal;
-
- afr_sh_reset (frame, this);
- if (local->govinda_gOvinda) {
- gf_log (this->name, GF_LOG_INFO,
- "split-brain detected, aborting selfheal of %s",
- local->loc.path);
- sh->op_failed = 1;
- sh->completion_cbk (frame, this);
- } else {
- if (IA_ISDIR (sh->type)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "proceeding to entry check on %s",
- local->loc.path);
- afr_self_heal_entry (frame, this);
- return 0;
- }
- gf_log (this->name, GF_LOG_DEBUG,
- "proceeding to data check on %s",
- local->loc.path);
- afr_self_heal_data (frame, this);
- }
+#include <glusterfs/byte-order.h>
+#include "protocol-common.h"
+#include <glusterfs/events.h>
- return 0;
-}
+#define AFR_HEAL_ATTR (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE)
-int
-afr_sh_inode_unlock (call_frame_t *frame, xlator_t *this)
+static gf_boolean_t
+_afr_ignorable_key_match(dict_t *d, char *k, data_t *val, void *mdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->lock_cbk = afr_sh_metadata_done;
- afr_unlock (frame, this);
-
- return 0;
+ return afr_is_xattr_ignorable(k);
}
-int
-afr_sh_metadata_finish (call_frame_t *frame, xlator_t *this)
-{
- afr_sh_inode_unlock (frame, this);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_erase_pending_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xattr, dict_t *xdata)
+void
+afr_delete_ignorable_xattrs(dict_t *xattr)
{
- afr_local_t *local = NULL;
- int call_count = 0;
- long i = 0;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
-
- local = frame->local;
- priv = this->private;
- sh = &local->self_heal;
- i = (long)cookie;
-
- if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
- (!IA_ISDIR (sh->buf[sh->source].ia_type))) {
- afr_children_add_child (sh->fresh_children, i,
- priv->child_count);
- }
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- if ((!IA_ISREG (sh->buf[sh->source].ia_type)) &&
- (!IA_ISDIR (sh->buf[sh->source].ia_type))) {
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
- }
- afr_sh_metadata_finish (frame, this);
- }
-
- return 0;
+ dict_foreach_match(xattr, _afr_ignorable_key_match, NULL,
+ dict_remove_foreach_fn, NULL);
}
-
int
-afr_sh_metadata_erase_pending (call_frame_t *frame, xlator_t *this)
+__afr_selfheal_metadata_do(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, unsigned char *healed_sinks,
+ struct afr_reply *locked_replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int i = 0;
- dict_t **erase_xattr = NULL;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- afr_sh_pending_to_delta (priv, sh->xattr, sh->delta_matrix,
- sh->success, priv->child_count,
- AFR_METADATA_TRANSACTION);
-
- erase_xattr = GF_CALLOC (sizeof (*erase_xattr), priv->child_count,
- gf_afr_mt_dict_t);
- if (!erase_xattr)
- return -ENOMEM;
-
- for (i = 0; i < priv->child_count; i++) {
- if (sh->xattr[i]) {
- call_count++;
-
- erase_xattr[i] = get_new_dict();
- dict_ref (erase_xattr[i]);
- }
+ int ret = -1;
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ dict_t *old_xattr = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "performing metadata selfheal on %s", uuid_utoa(inode->gfid));
+
+ ret = syncop_getxattr(priv->children[source], &loc, &xattr, NULL, NULL,
+ NULL);
+ if (ret < 0) {
+ ret = -EIO;
+ goto out;
+ }
+
+ afr_delete_ignorable_xattrs(xattr);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (old_xattr) {
+ dict_unref(old_xattr);
+ old_xattr = NULL;
}
- afr_sh_delta_to_xattr (this, sh->delta_matrix, erase_xattr,
- priv->child_count, AFR_METADATA_TRANSACTION);
-
- local->call_count = call_count;
-
- if (call_count == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "metadata of %s not healed on any subvolume",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (!erase_xattr[i])
- continue;
-
- gf_log (this->name, GF_LOG_TRACE,
- "erasing pending flags from %s on %s",
- local->loc.path, priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_erase_pending_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, erase_xattr[i],
- NULL);
- if (!--call_count)
- break;
- }
-
- for (i = 0; i < priv->child_count; i++) {
- if (erase_xattr[i]) {
- dict_unref (erase_xattr[i]);
- }
- }
- GF_FREE (erase_xattr);
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_sync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int call_count = 0;
- int child_index = 0;
-
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- child_index = (long) cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "setting attributes failed for %s on %s (%s)",
- local->loc.path,
- priv->children[child_index]->name,
- strerror (op_errno));
-
- sh->success[child_index] = 0;
- }
+ if (!healed_sinks[i])
+ continue;
+
+ ret = syncop_setattr(priv->children[i], &loc,
+ &locked_replies[source].poststat, AFR_HEAL_ATTR,
+ NULL, NULL, NULL, NULL);
+ if (ret)
+ healed_sinks[i] = 0;
+
+ ret = syncop_getxattr(priv->children[i], &loc, &old_xattr, 0, NULL,
+ NULL);
+ if (old_xattr) {
+ afr_delete_ignorable_xattrs(old_xattr);
+ ret = syncop_removexattr(priv->children[i], &loc, "", old_xattr,
+ NULL);
+ if (ret)
+ healed_sinks[i] = 0;
}
- UNLOCK (&frame->lock);
- call_count = afr_frame_return (frame);
+ ret = syncop_setxattr(priv->children[i], &loc, xattr, 0, NULL, NULL);
+ if (ret)
+ healed_sinks[i] = 0;
+ }
+ ret = 0;
- if (call_count == 0)
- afr_sh_metadata_erase_pending (frame, this);
+out:
+ loc_wipe(&loc);
+ if (xattr)
+ dict_unref(xattr);
+ if (old_xattr)
+ dict_unref(old_xattr);
- return 0;
+ return ret;
}
-
-int
-afr_sh_metadata_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
+static uint64_t
+mtime_ns(struct iatt *ia)
{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno, xdata);
-
- return 0;
-}
-
+ uint64_t ret;
-int
-afr_sh_metadata_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_sh_metadata_sync_cbk (frame, cookie, this, op_ret, op_errno, xdata);
+ ret = (((uint64_t)(ia->ia_mtime)) * 1000000000) +
+ (uint64_t)(ia->ia_mtime_nsec);
- return 0;
+ return ret;
}
-
-int
-afr_sh_metadata_sync (call_frame_t *frame, xlator_t *this, dict_t *xattr)
+/*
+ * When directory content is modified, [mc]time is updated. On
+ * Linux, the filesystem does it, while at least on NetBSD, the
+ * kernel file-system independent code does it. This means that
+ * when entries are added while bricks are down, the kernel sends
+ * a SETATTR [mc]time which will cause metadata split brain for
+ * the directory. In this case, clear the split brain by finding
+ * the source with the most recent modification date.
+ */
+static int
+afr_dirtime_splitbrain_source(call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ unsigned char *locked_on)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
- int active_sinks = 0;
- int call_count = 0;
- int i = 0;
-
- struct iatt stbuf = {0,};
- int32_t valid = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
- active_sinks = sh->active_sinks;
-
- /*
- * 2 calls per sink - setattr, setxattr
- */
- if (xattr)
- call_count = active_sinks * 2;
- else
- call_count = active_sinks;
-
- local->call_count = call_count;
-
- stbuf.ia_atime = sh->buf[source].ia_atime;
- stbuf.ia_atime_nsec = sh->buf[source].ia_atime_nsec;
- stbuf.ia_mtime = sh->buf[source].ia_mtime;
- stbuf.ia_mtime_nsec = sh->buf[source].ia_mtime_nsec;
-
- stbuf.ia_uid = sh->buf[source].ia_uid;
- stbuf.ia_gid = sh->buf[source].ia_gid;
-
- stbuf.ia_type = sh->buf[source].ia_type;
- stbuf.ia_prot = sh->buf[source].ia_prot;
-
- valid = GF_SET_ATTR_MODE |
- GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME;
-
- for (i = 0; i < priv->child_count; i++) {
- if (call_count == 0) {
- break;
- }
- if (sh->sources[i] || !local->child_up[i])
- continue;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "self-healing metadata of %s from %s to %s",
- local->loc.path, priv->children[source]->name,
- priv->children[i]->name);
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_setattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setattr,
- &local->loc, &stbuf, valid, NULL);
-
- call_count--;
-
- if (!xattr)
- continue;
-
- STACK_WIND_COOKIE (frame, afr_sh_metadata_xattr_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc, xattr, 0, NULL);
- call_count--;
- }
-
- return 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ struct iatt source_ia;
+ struct iatt child_ia;
+ uint64_t mtime = 0;
+ int i;
+ int ret = -1;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!locked_on[i])
+ continue;
+
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret != 0)
+ continue;
+
+ if (mtime_ns(&replies[i].poststat) <= mtime)
+ continue;
+
+ mtime = mtime_ns(&replies[i].poststat);
+ source = i;
+ }
+
+ if (source == -1)
+ goto out;
+
+ source_ia = replies[source].poststat;
+ if (source_ia.ia_type != IA_IFDIR)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (i == source)
+ continue;
+
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret != 0)
+ continue;
+
+ child_ia = replies[i].poststat;
+
+ if (!IA_EQUAL(source_ia, child_ia, gfid) ||
+ !IA_EQUAL(source_ia, child_ia, type) ||
+ !IA_EQUAL(source_ia, child_ia, prot) ||
+ !IA_EQUAL(source_ia, child_ia, uid) ||
+ !IA_EQUAL(source_ia, child_ia, gid) ||
+ !afr_xattrs_are_equal(replies[source].xdata, replies[i].xdata))
+ goto out;
+ }
+
+ /*
+ * Metadata split brain is just about [amc]time
+ * We return our source.
+ */
+ ret = source;
+out:
+ return ret;
}
-
-int
-afr_sh_metadata_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr,
- dict_t *xdata)
+static int
+__afr_selfheal_metadata_mark_pending_xattrs(call_frame_t *frame, xlator_t *this,
+ inode_t *inode,
+ struct afr_reply *replies,
+ unsigned char *sources)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
-
- int i;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "getxattr of %s failed on subvolume %s (%s). proceeding without xattr",
- local->loc.path, priv->children[source]->name,
- strerror (op_errno));
-
- afr_sh_metadata_sync (frame, this, NULL);
- } else {
- for (i = 0; i < priv->child_count; i++) {
- dict_del (xattr, priv->pending_key[i]);
- }
-
- afr_sh_metadata_sync (frame, this, xattr);
+ int ret = 0;
+ int i = 0;
+ int m_idx = 0;
+ afr_private_t *priv = NULL;
+ int raw[AFR_NUM_CHANGE_LOGS] = {0};
+ dict_t *xattr = NULL;
+
+ priv = this->private;
+ m_idx = afr_index_for_transaction_type(AFR_METADATA_TRANSACTION);
+ raw[m_idx] = 1;
+
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i])
+ continue;
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ ret = -1;
+ goto out;
}
-
- return 0;
-}
-
-
-int
-afr_sh_metadata_sync_prepare (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int source = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- source = sh->source;
-
- afr_sh_mark_source_sinks (frame, this);
- if (sh->active_sinks == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no active sinks for performing self-heal on file %s",
- local->loc.path);
- afr_sh_metadata_finish (frame, this);
- return 0;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+ ret = afr_selfheal_post_op(frame, this, inode, i, xattr, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, -ret, AFR_MSG_SELF_HEAL_INFO,
+ "Failed to set pending metadata xattr on child %d for %s", i,
+ uuid_utoa(inode->gfid));
+ goto out;
}
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "syncing metadata of %s from subvolume %s to %d active sinks",
- local->loc.path, priv->children[source]->name,
- sh->active_sinks);
+ afr_replies_wipe(replies, priv->child_count);
+ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
- STACK_WIND (frame, afr_sh_metadata_getxattr_cbk,
- priv->children[source],
- priv->children[source]->fops->getxattr,
- &local->loc, NULL, NULL);
-
- return 0;
+out:
+ if (xattr)
+ dict_unref(xattr);
+ return ret;
}
-
-void
-afr_sh_metadata_fix (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
+/*
+ * Look for mismatching uid/gid or mode or user xattrs even if
+ * AFR xattrs don't say so, and pick one arbitrarily as winner. */
+
+static int
+__afr_selfheal_metadata_finalize_source(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ unsigned char *locked_on,
+ struct afr_reply *replies)
{
- afr_local_t *local = NULL;
- afr_self_heal_t *sh = NULL;
- afr_private_t *priv = NULL;
- int nsources = 0;
- int source = 0;
- int i = 0;
-
- local = frame->local;
- sh = &local->self_heal;
- priv = this->private;
-
- if (op_ret < 0) {
- sh->op_failed = 1;
- afr_sh_set_error (sh, op_errno);
- afr_sh_metadata_finish (frame, this);
- goto out;
- }
- nsources = afr_build_sources (this, sh->xattr, sh->buf,
- sh->pending_matrix, sh->sources,
- sh->success_children,
- AFR_METADATA_TRANSACTION, NULL, _gf_false);
- if (nsources == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "No self-heal needed for %s",
- local->loc.path);
-
- afr_sh_metadata_finish (frame, this);
- goto out;
+ int i = 0;
+ afr_private_t *priv = NULL;
+ struct iatt srcstat = {
+ 0,
+ };
+ int source = -1;
+ int sources_count = 0;
+ int ret = 0;
+
+ priv = this->private;
+
+ sources_count = AFR_COUNT(sources, priv->child_count);
+
+ if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) ||
+ !sources_count) {
+ source = afr_mark_split_brain_source_sinks(
+ frame, this, inode, sources, sinks, healed_sinks, locked_on,
+ replies, AFR_METADATA_TRANSACTION);
+ if (source >= 0) {
+ _afr_fav_child_reset_sink_xattrs(
+ frame, this, inode, source, healed_sinks, undid_pending,
+ AFR_METADATA_TRANSACTION, locked_on, replies);
+ goto out;
}
- if ((nsources == -1)
- && (priv->favorite_child != -1)
- && (sh->child_errno[priv->favorite_child] == 0)) {
-
- gf_log (this->name, GF_LOG_WARNING,
- "Picking favorite child %s as authentic source to resolve conflicting metadata of %s",
- priv->children[priv->favorite_child]->name,
- local->loc.path);
-
- sh->sources[priv->favorite_child] = 1;
-
- nsources = afr_sh_source_count (sh->sources,
- priv->child_count);
- }
-
- if (nsources == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to self-heal permissions/ownership of '%s' "
- "(possible split-brain). Please fix the file on "
- "all backend volumes", local->loc.path);
-
- local->govinda_gOvinda = 1;
-
- afr_sh_metadata_finish (frame, this);
- goto out;
+ /* If this is a directory mtime/ctime only split brain
+ use the most recent */
+ source = afr_dirtime_splitbrain_source(frame, this, replies, locked_on);
+ if (source != -1) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SPLIT_BRAIN,
+ "clear time "
+ "split brain on %s",
+ uuid_utoa(replies[source].poststat.ia_gfid));
+ sources[source] = 1;
+ healed_sinks[source] = 0;
+ goto out;
}
- source = afr_sh_select_source (sh->sources, priv->child_count);
-
- if (source == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "No active sources found.");
-
- afr_sh_metadata_finish (frame, this);
- goto out;
+ if (!priv->metadata_splitbrain_forced_heal) {
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;"
+ "type=metadata;file=%s",
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(inode->gfid));
+ return -EIO;
}
- sh->source = source;
-
- /* detect changes not visible through pending flags -- JIC */
+ /* Metadata split brain, select one subvol
+ arbitrarily */
for (i = 0; i < priv->child_count; i++) {
- if (i == source || sh->child_errno[i])
- continue;
-
- if (PERMISSION_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
-
- if (OWNERSHIP_DIFFERS (&sh->buf[i], &sh->buf[source]))
- sh->sources[i] = 0;
+ if (locked_on[i] && healed_sinks[i]) {
+ sources[i] = 1;
+ healed_sinks[i] = 0;
+ break;
+ }
}
-
- if ((!IA_ISREG (sh->buf[source].ia_type)) &&
- (!IA_ISDIR (sh->buf[source].ia_type))) {
- afr_reset_children (sh->fresh_children, priv->child_count);
- afr_get_fresh_children (sh->success_children, sh->sources,
- sh->fresh_children, priv->child_count);
- afr_inode_set_read_ctx (this, sh->inode, sh->source,
- sh->fresh_children);
+ }
+
+ /* No split brain at this point. If we were called from
+ * afr_heal_splitbrain_file(), abort.*/
+ if (afr_dict_contains_heal_op(frame))
+ return -EIO;
+
+ source = afr_choose_source_by_policy(priv, sources,
+ AFR_METADATA_TRANSACTION);
+ srcstat = replies[source].poststat;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i] || i == source)
+ continue;
+ if (!IA_EQUAL(srcstat, replies[i].poststat, type) ||
+ !IA_EQUAL(srcstat, replies[i].poststat, uid) ||
+ !IA_EQUAL(srcstat, replies[i].poststat, gid) ||
+ !IA_EQUAL(srcstat, replies[i].poststat, prot)) {
+ gf_msg_debug(this->name, 0,
+ "%s: iatt mismatch "
+ "for source(%d) vs (%d)",
+ uuid_utoa(replies[source].poststat.ia_gfid), source,
+ i);
+ sources[i] = 0;
+ healed_sinks[i] = 1;
}
-
- afr_sh_metadata_sync_prepare (frame, this);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i] || i == source)
+ continue;
+ if (!afr_xattrs_are_equal(replies[source].xdata, replies[i].xdata)) {
+ gf_msg_debug(this->name, 0,
+ "%s: xattr mismatch "
+ "for source(%d) vs (%d)",
+ uuid_utoa(replies[source].poststat.ia_gfid), source,
+ i);
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
+ }
+ if ((sources_count == priv->child_count) && (source > -1) &&
+ (AFR_COUNT(healed_sinks, priv->child_count) != 0)) {
+ ret = __afr_selfheal_metadata_mark_pending_xattrs(frame, this, inode,
+ replies, sources);
+ if (ret < 0)
+ return ret;
+ }
out:
- return;
+ afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
+ return source;
}
int
-afr_sh_metadata_post_nonblocking_inodelk_cbk (call_frame_t *frame,
- xlator_t *this)
+__afr_selfheal_metadata_prepare(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ struct afr_reply *replies, unsigned char *pflag)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Non Blocking metadata "
- "inodelks failed for %s.", local->loc.path);
- gf_log (this->name, GF_LOG_ERROR, "Metadata self-heal "
- "failed for %s.", local->loc.path);
- afr_sh_metadata_done (frame, this);
- } else {
-
- gf_log (this->name, GF_LOG_DEBUG, "Non Blocking metadata "
- "inodelks done for %s. Proceeding to FOP",
- local->loc.path);
- afr_sh_common_lookup (frame, this, &local->loc,
- afr_sh_metadata_fix, NULL,
- AFR_LOOKUP_FAIL_CONFLICTS |
- AFR_LOOKUP_FAIL_MISSING_GFIDS,
- NULL);
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+
+ ret = afr_selfheal_unlocked_discover(frame, inode, inode->gfid, replies);
+ if (ret)
+ return ret;
+
+ witness = alloca0(sizeof(*witness) * priv->child_count);
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_METADATA_TRANSACTION, locked_on,
+ sources, sinks, witness, pflag);
+ if (ret)
+ return ret;
+
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count);
+
+ /* If any source has witness, pick first
+ * witness source and make everybody else sinks */
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i] && witness[i]) {
+ source = i;
+ break;
}
+ }
- return 0;
-}
-
-int
-afr_sh_metadata_lock (call_frame_t *frame, xlator_t *this)
-{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
-
- local = frame->local;
- int_lock = &local->internal_lock;
-
- int_lock->transaction_lk_type = AFR_SELFHEAL_LK;
- int_lock->selfheal_lk_type = AFR_METADATA_SELF_HEAL_LK;
-
- afr_set_lock_number (frame, this);
+ if (source != -1) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (i != source && sources[i]) {
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
+ }
+ }
- int_lock->lk_flock.l_start = LLONG_MAX - 1;
- int_lock->lk_flock.l_len = 0;
- int_lock->lk_flock.l_type = F_WRLCK;
- int_lock->lock_cbk = afr_sh_metadata_post_nonblocking_inodelk_cbk;
+ source = __afr_selfheal_metadata_finalize_source(
+ frame, this, inode, sources, sinks, healed_sinks, undid_pending,
+ locked_on, replies);
- afr_nonblocking_inodelk (frame, this);
+ if (source < 0)
+ return -EIO;
- return 0;
+ return source;
}
-
int
-afr_self_heal_metadata (call_frame_t *frame, xlator_t *this)
+afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = this->private;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *data_lock = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *undid_pending = NULL;
+ struct afr_reply *locked_replies = NULL;
+ gf_boolean_t did_sh = _gf_true;
+ int source = -1;
+
+ priv = this->private;
+
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+ undid_pending = alloca0(priv->child_count);
+ data_lock = alloca0(priv->child_count);
+
+ locked_replies = alloca0(sizeof(*locked_replies) * priv->child_count);
+
+ ret = afr_selfheal_inodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
+ data_lock);
+ {
+ if (ret < priv->child_count) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ ret = __afr_selfheal_metadata_prepare(
+ frame, this, inode, data_lock, sources, sinks, healed_sinks,
+ undid_pending, locked_replies, NULL);
+ if (ret < 0)
+ goto unlock;
- local = frame->local;
+ source = ret;
- if (local->self_heal.do_metadata_self_heal && priv->metadata_self_heal) {
- afr_sh_metadata_lock (frame, this);
- } else {
- afr_sh_metadata_done (frame, this);
+ if (AFR_COUNT(healed_sinks, priv->child_count) == 0) {
+ did_sh = _gf_false;
+ goto unlock;
}
- return 0;
+ ret = __afr_selfheal_metadata_do(frame, this, inode, source,
+ healed_sinks, locked_replies);
+ if (ret)
+ goto unlock;
+
+ afr_selfheal_restore_time(frame, this, inode, source, healed_sinks,
+ locked_replies);
+
+ ret = afr_selfheal_undo_pending(
+ frame, this, inode, sources, sinks, healed_sinks, undid_pending,
+ AFR_METADATA_TRANSACTION, locked_replies, data_lock);
+ }
+unlock:
+ afr_selfheal_uninodelk(frame, this, inode, this->name, LLONG_MAX - 1, 0,
+ data_lock);
+
+ if (did_sh)
+ afr_log_selfheal(inode->gfid, this, ret, "metadata", source, sources,
+ healed_sinks);
+ else
+ ret = 1;
+
+ if (locked_replies)
+ afr_replies_wipe(locked_replies, priv->child_count);
+ return ret;
+}
+
+int
+afr_selfheal_metadata_by_stbuf(xlator_t *this, struct iatt *stbuf)
+{
+ inode_t *inode = NULL;
+ inode_t *link_inode = NULL;
+ call_frame_t *frame = NULL;
+ int ret = 0;
+
+ if (gf_uuid_is_null(stbuf->ia_gfid)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ inode = inode_new(this->itable);
+ if (!inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ link_inode = inode_link(inode, NULL, NULL, stbuf);
+ if (!link_inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ frame = afr_frame_create(this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ ret = afr_selfheal_metadata(frame, this, link_inode);
+out:
+ if (inode)
+ inode_unref(inode);
+ if (link_inode)
+ inode_unref(link_inode);
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-self-heal-name.c b/xlators/cluster/afr/src/afr-self-heal-name.c
new file mode 100644
index 00000000000..834aac86d48
--- /dev/null
+++ b/xlators/cluster/afr/src/afr-self-heal-name.c
@@ -0,0 +1,616 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/events.h>
+#include "afr.h"
+#include "afr-self-heal.h"
+#include "afr-messages.h"
+
+int
+__afr_selfheal_assign_gfid(xlator_t *this, inode_t *parent, uuid_t pargfid,
+ const char *bname, inode_t *inode,
+ struct afr_reply *replies, void *gfid,
+ unsigned char *locked_on, int source,
+ unsigned char *sources, gf_boolean_t is_gfid_absent,
+ int *gfid_idx)
+{
+ int ret = 0;
+ int up_count = 0;
+ int locked_count = 0;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ gf_uuid_copy(parent->gfid, pargfid);
+
+ if (is_gfid_absent) {
+ /* Ensure all children of AFR are up before performing gfid heal, to
+ * guard against the possibility of gfid split brain. */
+
+ up_count = AFR_COUNT(priv->child_up, priv->child_count);
+ if (up_count != priv->child_count) {
+ ret = -EIO;
+ goto out;
+ }
+
+ locked_count = AFR_COUNT(locked_on, priv->child_count);
+ if (locked_count != priv->child_count) {
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ ret = afr_lookup_and_heal_gfid(this, parent, bname, inode, replies, source,
+ sources, gfid, gfid_idx);
+
+out:
+ return ret;
+}
+
+int
+__afr_selfheal_name_impunge(call_frame_t *frame, xlator_t *this,
+ inode_t *parent, uuid_t pargfid, const char *bname,
+ inode_t *inode, struct afr_reply *replies,
+ int gfid_idx)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ unsigned char *sources = NULL;
+
+ priv = this->private;
+
+ sources = alloca0(priv->child_count);
+
+ gf_uuid_copy(parent->gfid, pargfid);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
+
+ if (gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[gfid_idx].poststat.ia_gfid) == 0) {
+ sources[i] = 1;
+ continue;
+ }
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i])
+ continue;
+
+ ret |= afr_selfheal_recreate_entry(frame, i, gfid_idx, sources, parent,
+ bname, inode, replies);
+ }
+
+ return ret;
+}
+
+int
+__afr_selfheal_name_expunge(xlator_t *this, inode_t *parent, uuid_t pargfid,
+ const char *bname, inode_t *inode,
+ struct afr_reply *replies)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret)
+ continue;
+
+ ret |= afr_selfheal_entry_delete(this, parent, bname, inode, i,
+ replies);
+ }
+
+ return ret;
+}
+
+static gf_boolean_t
+afr_selfheal_name_need_heal_check(xlator_t *this, struct afr_reply *replies)
+{
+ int i = 0;
+ int first_idx = -1;
+ gf_boolean_t need_heal = _gf_false;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if ((replies[i].op_ret == -1) && (replies[i].op_errno == ENODATA))
+ need_heal = _gf_true;
+
+ if (first_idx == -1) {
+ first_idx = i;
+ continue;
+ }
+
+ if (replies[i].op_ret != replies[first_idx].op_ret)
+ need_heal = _gf_true;
+
+ if (gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[first_idx].poststat.ia_gfid))
+ need_heal = _gf_true;
+
+ if ((replies[i].op_ret == 0) &&
+ (gf_uuid_is_null(replies[i].poststat.ia_gfid)))
+ need_heal = _gf_true;
+ }
+
+ return need_heal;
+}
+
+static int
+afr_selfheal_name_type_mismatch_check(xlator_t *this, struct afr_reply *replies,
+ int source, unsigned char *sources,
+ uuid_t pargfid, const char *bname)
+{
+ int i = 0;
+ int type_idx = -1;
+ ia_type_t inode_type = IA_INVAL;
+ ia_type_t inode_type1 = IA_INVAL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
+
+ if (replies[i].poststat.ia_type == IA_INVAL)
+ continue;
+
+ if (inode_type == IA_INVAL) {
+ inode_type = replies[i].poststat.ia_type;
+ type_idx = i;
+ continue;
+ }
+ inode_type1 = replies[i].poststat.ia_type;
+ if (sources[i] || source == -1) {
+ if ((sources[type_idx] || source == -1) &&
+ (inode_type != inode_type1)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SPLIT_BRAIN,
+ "Type mismatch for <gfid:%s>/%s: "
+ "%s on %s and %s on %s",
+ uuid_utoa(pargfid), bname,
+ gf_inode_type_to_str(inode_type1),
+ priv->children[i]->name,
+ gf_inode_type_to_str(inode_type),
+ priv->children[type_idx]->name);
+ gf_event(EVENT_AFR_SPLIT_BRAIN,
+ "client-pid=%d;"
+ "subvol=%s;type=file;"
+ "file=<gfid:%s>/%s;count=2;"
+ "child-%d=%s;type-%d=%s;child-%d=%s;"
+ "type-%d=%s",
+ this->ctx->cmd_args.client_pid, this->name,
+ uuid_utoa(pargfid), bname, i, priv->children[i]->name,
+ i, gf_inode_type_to_str(inode_type1), type_idx,
+ priv->children[type_idx]->name, type_idx,
+ gf_inode_type_to_str(inode_type));
+ return -EIO;
+ }
+ inode_type = replies[i].poststat.ia_type;
+ type_idx = i;
+ }
+ }
+ return 0;
+}
+
+static int
+afr_selfheal_name_gfid_mismatch_check(xlator_t *this, struct afr_reply *replies,
+ int source, unsigned char *sources,
+ int *gfid_idx, uuid_t pargfid,
+ const char *bname, inode_t *inode,
+ unsigned char *locked_on, dict_t *xdata)
+{
+ int i = 0;
+ int gfid_idx_iter = -1;
+ int ret = -1;
+ void *gfid = NULL;
+ void *gfid1 = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid || replies[i].op_ret != 0)
+ continue;
+
+ if (gf_uuid_is_null(replies[i].poststat.ia_gfid))
+ continue;
+
+ if (!gfid) {
+ gfid = &replies[i].poststat.ia_gfid;
+ gfid_idx_iter = i;
+ continue;
+ }
+
+ gfid1 = &replies[i].poststat.ia_gfid;
+ if (sources[i] || source == -1) {
+ if ((sources[gfid_idx_iter] || source == -1) &&
+ gf_uuid_compare(gfid, gfid1)) {
+ ret = afr_gfid_split_brain_source(this, replies, inode, pargfid,
+ bname, gfid_idx_iter, i,
+ locked_on, gfid_idx, xdata);
+ if (!ret && *gfid_idx >= 0) {
+ ret = dict_set_sizen_str_sizen(xdata, "gfid-heal-msg",
+ "GFID split-brain resolved");
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ AFR_MSG_DICT_SET_FAILED,
+ "Error setting gfid-"
+ "heal-msg dict");
+ }
+ return ret;
+ }
+ gfid = &replies[i].poststat.ia_gfid;
+ gfid_idx_iter = i;
+ }
+ }
+
+ *gfid_idx = gfid_idx_iter;
+ return 0;
+}
+
+static gf_boolean_t
+afr_selfheal_name_source_empty_check(xlator_t *this, struct afr_reply *replies,
+ unsigned char *sources, int source)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ gf_boolean_t source_is_empty = _gf_true;
+
+ priv = this->private;
+
+ if (source == -1) {
+ source_is_empty = _gf_false;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!sources[i])
+ continue;
+
+ if (replies[i].op_ret == -1 && replies[i].op_errno == ENOENT)
+ continue;
+
+ source_is_empty = _gf_false;
+ break;
+ }
+out:
+ return source_is_empty;
+}
+
+int
+__afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks, int source,
+ unsigned char *locked_on, struct afr_reply *replies,
+ void *gfid_req, dict_t *xdata)
+{
+ int gfid_idx = -1;
+ int ret = -1;
+ void *gfid = NULL;
+ gf_boolean_t source_is_empty = _gf_true;
+ gf_boolean_t need_heal = _gf_false;
+ gf_boolean_t is_gfid_absent = _gf_false;
+
+ need_heal = afr_selfheal_name_need_heal_check(this, replies);
+ if (!need_heal)
+ return 0;
+
+ source_is_empty = afr_selfheal_name_source_empty_check(this, replies,
+ sources, source);
+ if (source_is_empty) {
+ ret = __afr_selfheal_name_expunge(this, parent, pargfid, bname, inode,
+ replies);
+ if (ret == -EIO)
+ ret = -1;
+ return ret;
+ }
+
+ ret = afr_selfheal_name_type_mismatch_check(this, replies, source, sources,
+ pargfid, bname);
+ if (ret)
+ return ret;
+
+ ret = afr_selfheal_name_gfid_mismatch_check(this, replies, source, sources,
+ &gfid_idx, pargfid, bname,
+ inode, locked_on, xdata);
+ if (ret)
+ return ret;
+
+ if (gfid_idx == -1) {
+ if (!gfid_req || gf_uuid_is_null(gfid_req))
+ return -1;
+ gfid = gfid_req;
+ } else {
+ gfid = &replies[gfid_idx].poststat.ia_gfid;
+ if (source == -1)
+ /* Either entry split-brain or dirty xattrs are present on parent.*/
+ source = gfid_idx;
+ }
+
+ is_gfid_absent = (gfid_idx == -1) ? _gf_true : _gf_false;
+ ret = __afr_selfheal_assign_gfid(this, parent, pargfid, bname, inode,
+ replies, gfid, locked_on, source, sources,
+ is_gfid_absent, &gfid_idx);
+ if (ret || (gfid_idx < 0))
+ return ret;
+
+ ret = __afr_selfheal_name_impunge(frame, this, parent, pargfid, bname,
+ inode, replies, gfid_idx);
+ if (ret == -EIO)
+ ret = -1;
+
+ return ret;
+}
+
+int
+__afr_selfheal_name_finalize_source(xlator_t *this, unsigned char *sources,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on, uint64_t *witness)
+{
+ int i = 0;
+ afr_private_t *priv = NULL;
+ int source = -1;
+ int sources_count = 0;
+
+ priv = this->private;
+
+ sources_count = AFR_COUNT(sources, priv->child_count);
+
+ if ((AFR_CMP(locked_on, healed_sinks, priv->child_count) == 0) ||
+ !sources_count || afr_does_witness_exist(this, witness)) {
+ memset(sources, 0, sizeof(*sources) * priv->child_count);
+ afr_mark_active_sinks(this, sources, locked_on, healed_sinks);
+ return -1;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (sources[i]) {
+ source = i;
+ break;
+ }
+ }
+
+ return source;
+}
+
+int
+__afr_selfheal_name_prepare(call_frame_t *frame, xlator_t *this,
+ inode_t *parent, uuid_t pargfid,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ int *source_p)
+{
+ int ret = -1;
+ int source = -1;
+ afr_private_t *priv = NULL;
+ struct afr_reply *replies = NULL;
+ uint64_t *witness = NULL;
+
+ priv = this->private;
+
+ replies = alloca0(priv->child_count * sizeof(*replies));
+
+ ret = afr_selfheal_unlocked_discover(frame, parent, pargfid, replies);
+ if (ret)
+ goto out;
+
+ witness = alloca0(sizeof(*witness) * priv->child_count);
+ ret = afr_selfheal_find_direction(frame, this, replies,
+ AFR_ENTRY_TRANSACTION, locked_on, sources,
+ sinks, witness, NULL);
+ if (ret)
+ goto out;
+
+ /* Initialize the healed_sinks[] array optimistically to
+ the intersection of to-be-healed (i.e sinks[]) and
+ the list of servers which are up (i.e locked_on[]).
+
+ As we encounter failures in the healing process, we
+ will unmark the respective servers in the healed_sinks[]
+ array.
+ */
+ AFR_INTERSECT(healed_sinks, sinks, locked_on, priv->child_count);
+
+ source = __afr_selfheal_name_finalize_source(this, sources, healed_sinks,
+ locked_on, witness);
+ if (source < 0) {
+ /* If source is < 0 (typically split-brain), we perform a
+ conservative merge of entries rather than erroring out */
+ }
+ *source_p = source;
+
+out:
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+
+ return ret;
+}
+
+int
+afr_selfheal_name_do(call_frame_t *frame, xlator_t *this, inode_t *parent,
+ uuid_t pargfid, const char *bname, void *gfid_req,
+ dict_t *xdata)
+{
+ afr_private_t *priv = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *sinks = NULL;
+ unsigned char *healed_sinks = NULL;
+ unsigned char *locked_on = NULL;
+ int source = -1;
+ struct afr_reply *replies = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+ dict_t *xattr = NULL;
+
+ xattr = dict_new();
+ if (!xattr)
+ return -ENOMEM;
+
+ ret = dict_set_int32_sizen(xattr, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ dict_unref(xattr);
+ return -1;
+ }
+
+ priv = this->private;
+
+ locked_on = alloca0(priv->child_count);
+ sources = alloca0(priv->child_count);
+ sinks = alloca0(priv->child_count);
+ healed_sinks = alloca0(priv->child_count);
+
+ replies = alloca0(priv->child_count * sizeof(*replies));
+
+ ret = afr_selfheal_entrylk(frame, this, parent, this->name, bname,
+ locked_on);
+ {
+ if (ret < priv->child_count) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_name_prepare(frame, this, parent, pargfid,
+ locked_on, sources, sinks,
+ healed_sinks, &source);
+ if (ret)
+ goto unlock;
+
+ inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies,
+ locked_on, xattr);
+ if (!inode) {
+ ret = -ENOMEM;
+ goto unlock;
+ }
+
+ ret = __afr_selfheal_name_do(frame, this, parent, pargfid, bname, inode,
+ sources, sinks, healed_sinks, source,
+ locked_on, replies, gfid_req, xdata);
+ }
+unlock:
+ afr_selfheal_unentrylk(frame, this, parent, this->name, bname, locked_on,
+ NULL);
+ if (inode)
+ inode_unref(inode);
+
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+ if (xattr)
+ dict_unref(xattr);
+
+ return ret;
+}
+
+int
+afr_selfheal_name_unlocked_inspect(call_frame_t *frame, xlator_t *this,
+ inode_t *parent, uuid_t pargfid,
+ const char *bname, gf_boolean_t *need_heal)
+{
+ afr_private_t *priv = NULL;
+ int i = 0;
+ struct afr_reply *replies = NULL;
+ inode_t *inode = NULL;
+ int first_idx = -1;
+ afr_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ replies = alloca0(sizeof(*replies) * priv->child_count);
+
+ inode = afr_selfheal_unlocked_lookup_on(frame, parent, bname, replies,
+ local->child_up, NULL);
+ if (!inode)
+ return -ENOMEM;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if ((replies[i].op_ret == -1) && (replies[i].op_errno == ENODATA)) {
+ *need_heal = _gf_true;
+ break;
+ }
+
+ if (first_idx == -1) {
+ first_idx = i;
+ continue;
+ }
+
+ if (replies[i].op_ret != replies[first_idx].op_ret) {
+ *need_heal = _gf_true;
+ break;
+ }
+
+ if (gf_uuid_compare(replies[i].poststat.ia_gfid,
+ replies[first_idx].poststat.ia_gfid)) {
+ *need_heal = _gf_true;
+ break;
+ }
+ }
+
+ if (inode)
+ inode_unref(inode);
+ if (replies)
+ afr_replies_wipe(replies, priv->child_count);
+ return 0;
+}
+
+int
+afr_selfheal_name(xlator_t *this, uuid_t pargfid, const char *bname,
+ void *gfid_req, dict_t *xdata)
+{
+ inode_t *parent = NULL;
+ call_frame_t *frame = NULL;
+ int ret = -1;
+ gf_boolean_t need_heal = _gf_false;
+
+ parent = afr_inode_find(this, pargfid);
+ if (!parent)
+ goto out;
+
+ frame = afr_frame_create(this, NULL);
+ if (!frame)
+ goto out;
+
+ ret = afr_selfheal_name_unlocked_inspect(frame, this, parent, pargfid,
+ bname, &need_heal);
+ if (ret)
+ goto out;
+
+ if (need_heal) {
+ ret = afr_selfheal_name_do(frame, this, parent, pargfid, bname,
+ gfid_req, xdata);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (parent)
+ inode_unref(parent);
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+
+ return ret;
+}
diff --git a/xlators/cluster/afr/src/afr-self-heal.h b/xlators/cluster/afr/src/afr-self-heal.h
index 2efc1116d33..48e6dbcfb18 100644
--- a/xlators/cluster/afr/src/afr-self-heal.h
+++ b/xlators/cluster/afr/src/afr-self-heal.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,43 +8,370 @@
cases as published by the Free Software Foundation.
*/
-#ifndef __AFR_SELF_HEAL_H__
-#define __AFR_SELF_HEAL_H__
+#ifndef _AFR_SELFHEAL_H
+#define _AFR_SELFHEAL_H
-#include <sys/stat.h>
+/* Perform fop on all UP subvolumes and wait for all callbacks to return */
-#define FILETYPE_DIFFERS(buf1,buf2) ((buf1)->ia_type != (buf2)->ia_type)
-#define PERMISSION_DIFFERS(buf1,buf2) (st_mode_from_ia ((buf1)->ia_prot, (buf1)->ia_type) != st_mode_from_ia ((buf2)->ia_prot, (buf2)->ia_type))
-#define OWNERSHIP_DIFFERS(buf1,buf2) (((buf1)->ia_uid != (buf2)->ia_uid) || ((buf1)->ia_gid != (buf2)->ia_gid))
-#define SIZE_DIFFERS(buf1,buf2) ((buf1)->ia_size != (buf2)->ia_size)
+#define AFR_ONALL(frame, rfn, fop, args...) \
+ do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0, __count = 0; \
+ unsigned char *__child_up = alloca(__priv->child_count); \
+ \
+ memcpy(__child_up, __priv->child_up, \
+ sizeof(*__child_up) * __priv->child_count); \
+ __count = AFR_COUNT(__child_up, __priv->child_count); \
+ \
+ __local->barrier.waitfor = __count; \
+ afr_local_replies_wipe(__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!__child_up[__i]) \
+ continue; \
+ STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ } \
+ syncbarrier_wait(&__local->barrier, __count); \
+ } while (0)
-#define SIZE_GREATER(buf1,buf2) ((buf1)->ia_size > (buf2)->ia_size)
+/* Perform fop on all subvolumes represented by list[] array and wait
+ for all callbacks to return */
+
+#define AFR_ONLIST(list, frame, rfn, fop, args...) \
+ do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0; \
+ int __count = 0; \
+ unsigned char *__list = alloca(__priv->child_count); \
+ \
+ memcpy(__list, list, sizeof(*__list) * __priv->child_count); \
+ __count = AFR_COUNT(__list, __priv->child_count); \
+ __local->barrier.waitfor = __count; \
+ afr_local_replies_wipe(__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!__list[__i]) \
+ continue; \
+ STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ } \
+ syncbarrier_wait(&__local->barrier, __count); \
+ } while (0)
+
+#define AFR_SEQ(frame, rfn, fop, args...) \
+ do { \
+ afr_local_t *__local = frame->local; \
+ afr_private_t *__priv = frame->this->private; \
+ int __i = 0; \
+ \
+ afr_local_replies_wipe(__local, __priv); \
+ \
+ for (__i = 0; __i < __priv->child_count; __i++) { \
+ if (!__priv->child_up[__i]) \
+ continue; \
+ STACK_WIND_COOKIE(frame, rfn, (void *)(long)__i, \
+ __priv->children[__i], \
+ __priv->children[__i]->fops->fop, args); \
+ syncbarrier_wait(&__local->barrier, 1); \
+ } \
+ } while (0)
+
+#define ALLOC_MATRIX(n, type) \
+ ({ \
+ int __i; \
+ type **__ptr = alloca(n * sizeof(type *)); \
+ \
+ for (__i = 0; __i < n; __i++) \
+ __ptr[__i] = alloca0(n * sizeof(type)); \
+ __ptr; \
+ })
+
+#define IA_EQUAL(f, s, field) \
+ (memcmp(&(f.ia_##field), &(s.ia_##field), sizeof(s.ia_##field)) == 0)
+
+#define SBRAIN_HEAL_NO_GO_MSG \
+ "Failed to obtain replies from all bricks of " \
+ "the replica (are they up?). Cannot resolve split-brain."
+#define SFILE_NOT_IN_SPLIT_BRAIN "File not in split-brain"
+#define SNO_BIGGER_FILE "No bigger file"
+#define SNO_DIFF_IN_MTIME "No difference in mtime"
+#define SUSE_SOURCE_BRICK_TO_HEAL \
+ "Use source-brick option to heal metadata" \
+ " split-brain"
+#define SINVALID_BRICK_NAME "Invalid brick name"
+#define SBRICK_IS_NOT_UP "Brick is not up"
+#define SBRICK_NOT_CONNECTED "Brick is not connected"
+#define SLESS_THAN2_BRICKS_in_REP "< 2 bricks in replica are up"
+#define SBRICK_IS_REMOTE "Brick is remote"
+#define SSTARTED_SELF_HEAL "Started self-heal"
+#define SOP_NOT_SUPPORTED "Operation Not Supported"
+#define SFILE_NOT_UNDER_DATA \
+ "The file is not under data or metadata " \
+ "split-brain"
+#define SFILE_NOT_IN_SPLIT_BRAIN "File not in split-brain"
+#define SALL_BRICKS_UP_TO_RESOLVE \
+ "All the bricks should be up to resolve the" \
+ " gfid split brain"
+#define SERROR_GETTING_SRC_BRICK "Error getting the source brick"
+int
+afr_selfheal(xlator_t *this, uuid_t gfid);
+
+gf_boolean_t
+afr_throttled_selfheal(call_frame_t *frame, xlator_t *this);
+
+int
+afr_selfheal_name(xlator_t *this, uuid_t gfid, const char *name, void *gfid_req,
+ dict_t *xdata);
+
+int
+afr_selfheal_data(call_frame_t *frame, xlator_t *this, fd_t *fd);
+
+int
+afr_selfheal_metadata(call_frame_t *frame, xlator_t *this, inode_t *inode);
+
+int
+afr_selfheal_entry(call_frame_t *frame, xlator_t *this, inode_t *inode);
+
+int
+afr_lookup_and_heal_gfid(xlator_t *this, inode_t *parent, const char *name,
+ inode_t *inode, struct afr_reply *replies, int source,
+ unsigned char *sources, void *gfid, int *gfid_idx);
+
+int
+afr_selfheal_inodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on);
+
+int
+afr_selfheal_tryinodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ unsigned char *locked_on);
+
+int
+afr_selfheal_tie_breaker_inodelk(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, off_t off,
+ size_t size, unsigned char *locked_on);
+
+int
+afr_selfheal_uninodelk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, off_t off, size_t size,
+ const unsigned char *locked_on);
+
+int
+afr_selfheal_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on);
+
+int
+afr_selfheal_tryentrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on);
+
+int
+afr_selfheal_tie_breaker_entrylk(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, char *dom, const char *name,
+ unsigned char *locked_on);
+
+int
+afr_selfheal_unentrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ char *dom, const char *name, unsigned char *locked_on,
+ dict_t *xdata);
+
+int
+afr_selfheal_unlocked_discover(call_frame_t *frame, inode_t *inode, uuid_t gfid,
+ struct afr_reply *replies);
+
+int
+afr_selfheal_unlocked_discover_on(call_frame_t *frame, inode_t *inode,
+ uuid_t gfid, struct afr_reply *replies,
+ unsigned char *discover_on, dict_t *dict);
+inode_t *
+afr_selfheal_unlocked_lookup_on(call_frame_t *frame, inode_t *parent,
+ const char *name, struct afr_reply *replies,
+ unsigned char *lookup_on, dict_t *xattr);
+
+int
+afr_selfheal_find_direction(call_frame_t *frame, xlator_t *this,
+ struct afr_reply *replies,
+ afr_transaction_type type, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ uint64_t *witness, unsigned char *flag);
+int
+afr_selfheal_fill_matrix(xlator_t *this, int **matrix, int subvol, int idx,
+ dict_t *xdata);
+
+int
+afr_selfheal_extract_xattr(xlator_t *this, struct afr_reply *replies,
+ afr_transaction_type type, int *dirty, int **matrix);
int
-afr_sh_has_metadata_pending (dict_t *xattr, xlator_t *this);
+afr_sh_generic_fop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata);
+
int
-afr_sh_has_entry_pending (dict_t *xattr, xlator_t *this);
+afr_selfheal_restore_time(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, unsigned char *healed_sinks,
+ struct afr_reply *replies);
int
-afr_sh_has_data_pending (dict_t *xattr, xlator_t *this);
+afr_selfheal_undo_pending(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type, struct afr_reply *replies,
+ unsigned char *locked_on);
int
-afr_self_heal_entry (call_frame_t *frame, xlator_t *this);
+afr_selfheal_recreate_entry(call_frame_t *frame, int dst, int source,
+ unsigned char *sources, inode_t *dir,
+ const char *name, inode_t *inode,
+ struct afr_reply *replies);
int
-afr_self_heal_data (call_frame_t *frame, xlator_t *this);
+afr_selfheal_post_op(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int subvol, dict_t *xattr, dict_t *xdata);
+
+call_frame_t *
+afr_frame_create(xlator_t *this, int32_t *op_errno);
+
+inode_t *
+afr_inode_find(xlator_t *this, uuid_t gfid);
int
-afr_self_heal_metadata (call_frame_t *frame, xlator_t *this);
+afr_selfheal_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *parbuf);
+void
+afr_reply_copy(struct afr_reply *dst, struct afr_reply *src);
+
+void
+afr_replies_copy(struct afr_reply *dst, struct afr_reply *src, int count);
+
+int
+afr_selfheal_newentry_mark(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ int source, struct afr_reply *replies,
+ unsigned char *sources, unsigned char *newentry);
+
+unsigned int
+afr_success_count(struct afr_reply *replies, unsigned int count);
+
+void
+afr_log_selfheal(uuid_t gfid, xlator_t *this, int ret, char *type, int source,
+ unsigned char *sources, unsigned char *healed_sinks);
+
+void
+afr_mark_largest_file_as_source(xlator_t *this, unsigned char *sources,
+ struct afr_reply *replies);
+void
+afr_mark_active_sinks(xlator_t *this, unsigned char *sources,
+ unsigned char *locked_on, unsigned char *sinks);
+
+gf_boolean_t
+afr_dict_contains_heal_op(call_frame_t *frame);
+gf_boolean_t
+afr_can_decide_split_brain_source_sinks(struct afr_reply *replies,
+ int child_count);
int
-afr_self_heal_get_source (xlator_t *this, afr_local_t *local, dict_t **xattr);
+afr_mark_split_brain_source_sinks(
+ call_frame_t *frame, xlator_t *this, inode_t *inode, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks, unsigned char *locked_on,
+ struct afr_reply *replies, afr_transaction_type type);
int
-afr_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode);
+afr_sh_get_fav_by_policy(xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, char **policy_str);
+
+int
+_afr_fav_child_reset_sink_xattrs(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int source,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ afr_transaction_type type,
+ unsigned char *locked_on,
+ struct afr_reply *replies);
+
+int
+afr_get_child_index_from_name(xlator_t *this, char *name);
+
+gf_boolean_t
+afr_does_witness_exist(xlator_t *this, uint64_t *witness);
+
+int
+__afr_selfheal_data_prepare(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *sinks, unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ struct afr_reply *replies, unsigned char *flag);
+
+int
+__afr_selfheal_metadata_prepare(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *undid_pending,
+ struct afr_reply *replies, unsigned char *flag);
+int
+__afr_selfheal_entry_prepare(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, unsigned char *locked_on,
+ unsigned char *sources, unsigned char *sinks,
+ unsigned char *healed_sinks,
+ struct afr_reply *replies, int *source_p,
+ unsigned char *flag);
int
-afr_lookup_select_read_child_by_txn_type (xlator_t *this, afr_local_t *local,
- dict_t **xattr,
- afr_transaction_type txn_type,
- uuid_t gfid);
-#endif /* __AFR_SELF_HEAL_H__ */
+afr_selfheal_unlocked_inspect(call_frame_t *frame, xlator_t *this, uuid_t gfid,
+ inode_t **link_inode, gf_boolean_t *data_selfheal,
+ gf_boolean_t *metadata_selfheal,
+ gf_boolean_t *entry_selfheal,
+ struct afr_reply *replies);
+
+int
+afr_selfheal_do(call_frame_t *frame, xlator_t *this, uuid_t gfid);
+
+int
+afr_selfheal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata);
+
+int
+afr_locked_fill(call_frame_t *frame, xlator_t *this, unsigned char *locked_on);
+int
+afr_choose_source_by_policy(afr_private_t *priv, unsigned char *sources,
+ afr_transaction_type type);
+
+int
+afr_selfheal_metadata_by_stbuf(xlator_t *this, struct iatt *stbuf);
+
+int
+afr_sh_fav_by_size(xlator_t *this, struct afr_reply *replies, inode_t *inode);
+int
+afr_sh_fav_by_mtime(xlator_t *this, struct afr_reply *replies, inode_t *inode);
+int
+afr_sh_fav_by_ctime(xlator_t *this, struct afr_reply *replies, inode_t *inode);
+
+int
+afr_gfid_split_brain_source(xlator_t *this, struct afr_reply *replies,
+ inode_t *inode, uuid_t pargfid, const char *bname,
+ int src_idx, int child_idx,
+ unsigned char *locked_on, int *src, dict_t *xdata);
+int
+afr_mark_source_sinks_if_file_empty(xlator_t *this, unsigned char *sources,
+ unsigned char *sinks,
+ unsigned char *healed_sinks,
+ unsigned char *locked_on,
+ struct afr_reply *replies,
+ afr_transaction_type type);
+
+gf_boolean_t
+afr_is_file_empty_on_all_children(afr_private_t *priv,
+ struct afr_reply *replies);
+
+int
+afr_selfheal_entry_delete(xlator_t *this, inode_t *dir, const char *name,
+ inode_t *inode, int child, struct afr_reply *replies);
+int
+afr_anon_inode_create(xlator_t *this, int child, inode_t **linked_inode);
+#endif /* !_AFR_SELFHEAL_H */
diff --git a/xlators/cluster/afr/src/afr-self-heald.c b/xlators/cluster/afr/src/afr-self-heald.c
index c5deb18b8af..109fd4b7421 100644
--- a/xlators/cluster/afr/src/afr-self-heald.c
+++ b/xlators/cluster/afr/src/afr-self-heald.c
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,1210 +8,1709 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
#include "afr.h"
-#include "syncop.h"
+#include "afr-self-heal.h"
#include "afr-self-heald.h"
-#include "afr-self-heal-common.h"
#include "protocol-common.h"
-#include "event-history.h"
-
-typedef enum {
- STOP_CRAWL_ON_SINGLE_SUBVOL = 1
-} afr_crawl_flags_t;
-
-typedef enum {
- HEAL = 1,
- INFO
-} shd_crawl_op;
-
-typedef struct shd_dump {
- dict_t *dict;
- xlator_t *this;
- int child;
-} shd_dump_t;
+#include <glusterfs/syncop-utils.h>
+#include "afr-messages.h"
+#include <glusterfs/byte-order.h>
+
+#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
+#define AFR_STATISTICS_HISTORY_SIZE 50
+
+#define ASSERT_LOCAL(this, healer) \
+ if (!afr_shd_is_subvol_local(this, healer->subvol)) { \
+ healer->local = _gf_false; \
+ if (safe_break(healer)) { \
+ break; \
+ } else { \
+ continue; \
+ } \
+ } else { \
+ healer->local = _gf_true; \
+ }
+
+#define NTH_INDEX_HEALER(this, n) \
+ &((((afr_private_t *)this->private))->shd.index_healers[n])
+#define NTH_FULL_HEALER(this, n) \
+ &((((afr_private_t *)this->private))->shd.full_healers[n])
+
+char *
+afr_subvol_name(xlator_t *this, int subvol)
+{
+ afr_private_t *priv = NULL;
-typedef struct shd_event_ {
- int child;
- char *path;
-} shd_event_t;
+ priv = this->private;
+ if (subvol < 0 || subvol > priv->child_count)
+ return NULL;
-typedef struct shd_pos_ {
- int child;
- xlator_t *this;
- afr_child_pos_t pos;
-} shd_pos_t;
+ return priv->children[subvol]->name;
+}
-typedef int
-(*afr_crawl_done_cbk_t) (int ret, call_frame_t *sync_frame, void *crawl_data);
+void
+afr_destroy_crawl_event_data(void *data)
+{
+ return;
+}
void
-afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
- process_entry_cbk_t process_entry, void *op_data,
- gf_boolean_t exclusive, int crawl_flags,
- afr_crawl_done_cbk_t crawl_done);
+afr_destroy_shd_event_data(void *data)
+{
+ shd_event_t *shd_event = data;
-static int
-_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data);
+ if (!shd_event)
+ return;
+ GF_FREE(shd_event->path);
-int
-afr_syncop_find_child_position (void *data);
+ return;
+}
-static int
-_loc_assign_gfid_path (loc_t *loc)
+gf_boolean_t
+afr_shd_is_subvol_local(xlator_t *this, int subvol)
{
- int ret = -1;
- char gfid_path[64] = {0};
-
- if (loc->inode && !uuid_is_null (loc->inode->gfid)) {
- ret = inode_path (loc->inode, NULL, (char**)&loc->path);
- } else if (!uuid_is_null (loc->gfid)) {
- snprintf (gfid_path, sizeof (gfid_path), "<gfid:%s>",
- uuid_utoa (loc->gfid));
- loc->path = gf_strdup (gfid_path);
- if (loc->path)
- ret = 0;
- }
- return ret;
+ afr_private_t *priv = NULL;
+ gf_boolean_t is_local = _gf_false;
+ loc_t loc = {
+ 0,
+ };
+
+ loc.inode = this->itable->root;
+ gf_uuid_copy(loc.gfid, loc.inode->gfid);
+ priv = this->private;
+ syncop_is_subvol_local(priv->children[subvol], &loc, &is_local);
+ return is_local;
}
-void
-shd_cleanup_event (void *event)
+int
+__afr_shd_healer_wait(struct subvol_healer *healer)
{
- shd_event_t *shd_event = event;
+ afr_private_t *priv = NULL;
+ struct timespec wait_till = {
+ 0,
+ };
+ int ret = 0;
- if (!shd_event)
- goto out;
- if (shd_event->path)
- GF_FREE (shd_event->path);
- GF_FREE (shd_event);
-out:
- return;
+ priv = healer->this->private;
+
+disabled_loop:
+ wait_till.tv_sec = gf_time() + priv->shd.timeout;
+
+ while (!healer->rerun) {
+ ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
+ if (ret == ETIMEDOUT)
+ break;
+ }
+
+ ret = healer->rerun;
+ healer->rerun = 0;
+
+ if (!priv->shd.enabled)
+ goto disabled_loop;
+
+ return ret;
}
int
-afr_get_local_child (afr_self_heald_t *shd, unsigned int child_count)
+afr_shd_healer_wait(struct subvol_healer *healer)
{
- int i = 0;
- int ret = -1;
- for (i = 0; i < child_count; i++) {
- if (shd->pos[i] == AFR_POS_LOCAL) {
- ret = i;
- break;
- }
- }
- return ret;
+ int ret = 0;
+
+ pthread_mutex_lock(&healer->mutex);
+ {
+ ret = __afr_shd_healer_wait(healer);
+ }
+ pthread_mutex_unlock(&healer->mutex);
+
+ return ret;
}
-static int
-_build_index_loc (xlator_t *this, loc_t *loc, char *name, loc_t *parent)
+gf_boolean_t
+safe_break(struct subvol_healer *healer)
{
- int ret = 0;
-
- uuid_copy (loc->pargfid, parent->inode->gfid);
- loc->path = "";
- loc->name = name;
- loc->parent = inode_ref (parent->inode);
- if (!loc->parent) {
- loc->path = NULL;
- loc_wipe (loc);
- ret = -1;
- }
- return ret;
+ gf_boolean_t ret = _gf_false;
+
+ pthread_mutex_lock(&healer->mutex);
+ {
+ if (healer->rerun)
+ goto unlock;
+
+ healer->running = _gf_false;
+ ret = _gf_true;
+ }
+unlock:
+ pthread_mutex_unlock(&healer->mutex);
+
+ return ret;
}
-int
-_add_path_to_dict (xlator_t *this, dict_t *output, int child, char *path,
- struct timeval *tv, gf_boolean_t dyn)
+inode_t *
+afr_shd_inode_find(xlator_t *this, xlator_t *subvol, uuid_t gfid)
{
- //subkey not used for now
- int ret = -1;
- uint64_t count = 0;
- char key[256] = {0};
- int xl_id = 0;
+ int ret = 0;
+ uint64_t val = IA_INVAL;
+ dict_t *xdata = NULL;
+ dict_t *rsp_dict = NULL;
+ inode_t *inode = NULL;
+
+ xdata = dict_new();
+ if (!xdata)
+ goto out;
+
+ ret = dict_set_int8(xdata, GF_INDEX_IA_TYPE_GET_REQ, 1);
+ if (ret)
+ goto out;
+
+ ret = syncop_inode_find(this, subvol, gfid, &inode, xdata, &rsp_dict);
+ if (ret < 0)
+ goto out;
+
+ if (rsp_dict) {
+ ret = dict_get_uint64(rsp_dict, GF_INDEX_IA_TYPE_GET_RSP, &val);
+ if (ret)
+ goto out;
+ }
+ ret = inode_ctx_set2(inode, subvol, 0, &val);
+out:
+ if (ret && inode) {
+ inode_unref(inode);
+ inode = NULL;
+ }
+ if (xdata)
+ dict_unref(xdata);
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+ return inode;
+}
- ret = dict_get_int32 (output, this->name, &xl_id);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "xl does not have id");
- goto out;
- }
+inode_t *
+afr_shd_index_inode(xlator_t *this, xlator_t *subvol, char *vgfid)
+{
+ loc_t rootloc = {
+ 0,
+ };
+ inode_t *inode = NULL;
+ int ret = 0;
+ dict_t *xattr = NULL;
+ void *index_gfid = NULL;
- snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
- ret = dict_get_uint64 (output, key, &count);
+ rootloc.inode = inode_ref(this->itable->root);
+ gf_uuid_copy(rootloc.gfid, rootloc.inode->gfid);
- snprintf (key, sizeof (key), "%d-%d-%"PRIu64, xl_id, child, count);
- if (dyn)
- ret = dict_set_dynstr (output, key, path);
- else
- ret = dict_set_str (output, key, path);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s: Could not add to output",
- path);
- goto out;
- }
+ ret = syncop_getxattr(subvol, &rootloc, &xattr, vgfid, NULL, NULL);
+ if (ret || !xattr) {
+ errno = -ret;
+ goto out;
+ }
- if (!tv)
- goto inc_count;
- snprintf (key, sizeof (key), "%d-%d-%"PRIu64"-time", xl_id,
- child, count);
- ret = dict_set_uint32 (output, key, tv->tv_sec);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s: Could not set time",
- path);
- goto out;
- }
+ ret = dict_get_ptr(xattr, vgfid, &index_gfid);
+ if (ret)
+ goto out;
+
+ gf_msg_debug(this->name, 0, "%s dir gfid for %s: %s", vgfid, subvol->name,
+ uuid_utoa(index_gfid));
+
+ inode = afr_shd_inode_find(this, subvol, index_gfid);
-inc_count:
- snprintf (key, sizeof (key), "%d-%d-count", xl_id, child);
- ret = dict_set_uint64 (output, key, count + 1);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Could not increment count");
- goto out;
- }
- ret = 0;
out:
- return ret;
+ loc_wipe(&rootloc);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return inode;
}
int
-_get_path_from_gfid_loc (xlator_t *this, xlator_t *readdir_xl, loc_t *child,
- char **fpath)
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
+ ia_type_t type)
{
- dict_t *xattr = NULL;
- char *path = NULL;
- int ret = -1;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
- ret = syncop_getxattr (readdir_xl, child, &xattr,
- GFID_TO_PATH_KEY);
- if (ret)
- goto out;
- ret = dict_get_str (xattr, GFID_TO_PATH_KEY, &path);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to get path for "
- "gfid %s", uuid_utoa (child->gfid));
- goto out;
- }
- path = gf_strdup (path);
- if (!path) {
- ret = -1;
- goto out;
- }
- ret = 0;
-out:
- if (!ret)
- *fpath = path;
- if (xattr)
- dict_unref (xattr);
- return ret;
+ loc.parent = inode_ref(inode);
+ loc.name = name;
+
+ if (IA_ISDIR(type))
+ ret = syncop_rmdir(subvol, &loc, 1, NULL, NULL);
+ else
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+
+ loc_wipe(&loc);
+ return ret;
}
-int
-_add_event_to_dict (circular_buffer_t *cb, void *data)
+void
+afr_shd_zero_xattrop(xlator_t *this, uuid_t gfid)
{
- int ret = 0;
- shd_dump_t *dump_data = NULL;
- shd_event_t *shd_event = NULL;
+ call_frame_t *frame = NULL;
+ inode_t *inode = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+ int i = 0;
+ int raw[AFR_NUM_CHANGE_LOGS] = {0};
+
+ priv = this->private;
+ frame = afr_frame_create(this, NULL);
+ if (!frame)
+ goto out;
+ inode = afr_inode_find(this, gfid);
+ if (!inode)
+ goto out;
+ xattr = dict_new();
+ if (!xattr)
+ goto out;
+ ret = dict_set_static_bin(xattr, AFR_DIRTY, raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], raw,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret)
+ goto out;
+ }
+
+ /*Send xattrop to all bricks. Doing a lookup to see if bricks are up or
+ * has valid repies for this gfid seems a bit of an overkill.*/
+ for (i = 0; i < priv->child_count; i++)
+ afr_selfheal_post_op(frame, this, inode, i, xattr, NULL);
- dump_data = data;
- shd_event = cb->data;
- if (shd_event->child != dump_data->child)
- goto out;
- ret = _add_path_to_dict (dump_data->this, dump_data->dict,
- dump_data->child, shd_event->path, &cb->tv,
- _gf_false);
out:
- return ret;
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ if (inode)
+ inode_unref(inode);
+ if (xattr)
+ dict_unref(xattr);
+ return;
}
int
-_add_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict, int child)
+afr_shd_selfheal_name(struct subvol_healer *healer, int child, uuid_t parent,
+ const char *bname)
{
- shd_dump_t dump_data = {0};
+ int ret = -1;
- dump_data.this = this;
- dump_data.dict = dict;
- dump_data.child = child;
- eh_dump (eh, &dump_data, _add_event_to_dict);
- return 0;
+ ret = afr_selfheal_name(THIS, parent, bname, NULL, NULL);
+
+ return ret;
}
int
-_add_summary_to_dict (xlator_t *this, afr_crawl_data_t *crawl_data,
- gf_dirent_t *entry,
- loc_t *childloc, loc_t *parentloc, struct iatt *iattr)
+afr_shd_selfheal(struct subvol_healer *healer, int child, uuid_t gfid)
{
- dict_t *output = NULL;
- xlator_t *readdir_xl = NULL;
- int ret = -1;
- char *path = NULL;
+ int ret = 0;
+ eh_t *eh = NULL;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ shd_event_t *shd_event = NULL;
+ char *path = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ crawl_event_t *crawl_event = NULL;
+
+ this = healer->this;
+ priv = this->private;
+ shd = &priv->shd;
+ crawl_event = &healer->crawl_event;
+
+ subvol = priv->children[child];
+
+ // If this fails with ENOENT/ESTALE index is stale
+ ret = syncop_gfid_to_path(this->itable, subvol, gfid, &path);
+ if (ret < 0)
+ return ret;
- if (uuid_is_null (childloc->gfid))
- goto out;
+ ret = afr_selfheal(this, gfid);
+
+ LOCK(&priv->lock);
+ {
+ if (ret == -EIO) {
+ eh = shd->split_brain;
+ crawl_event->split_brain_count++;
+ } else if (ret < 0) {
+ crawl_event->heal_failed_count++;
+ } else if (ret == 0) {
+ crawl_event->healed_count++;
+ }
+ }
+ UNLOCK(&priv->lock);
- output = crawl_data->op_data;
- readdir_xl = crawl_data->readdir_xl;
+ if (eh) {
+ shd_event = GF_CALLOC(1, sizeof(*shd_event), gf_afr_mt_shd_event_t);
+ if (!shd_event)
+ goto out;
- ret = _get_path_from_gfid_loc (this, readdir_xl, childloc, &path);
- if (ret)
- goto out;
+ shd_event->child = child;
+ shd_event->path = path;
- ret = _add_path_to_dict (this, output, crawl_data->child, path, NULL,
- _gf_true);
+ if (eh_save_history(eh, shd_event) < 0)
+ goto out;
+
+ shd_event = NULL;
+ path = NULL;
+ }
out:
- if (ret && path)
- GF_FREE (path);
- return ret;
+ GF_FREE(shd_event);
+ GF_FREE(path);
+ return ret;
}
void
-_remove_stale_index (xlator_t *this, xlator_t *readdir_xl,
- loc_t *parent, char *fname)
+afr_shd_sweep_prepare(struct subvol_healer *healer)
{
- int ret = 0;
- loc_t index_loc = {0};
+ crawl_event_t *event = NULL;
- ret = _build_index_loc (this, &index_loc, fname, parent);
- if (ret)
- goto out;
- gf_log (this->name, GF_LOG_INFO, "Removing stale index "
- "for %s on %s", index_loc.name, readdir_xl->name);
- ret = syncop_unlink (readdir_xl, &index_loc);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s: Failed to remove"
- " index on %s - %s", index_loc.name,
- readdir_xl->name, strerror (errno));
- }
- index_loc.path = NULL;
- loc_wipe (&index_loc);
-out:
- return;
+ event = &healer->crawl_event;
+
+ event->healed_count = 0;
+ event->split_brain_count = 0;
+ event->heal_failed_count = 0;
+
+ event->start_time = gf_time();
+ event->end_time = 0;
+ _mask_cancellation();
}
void
-_crawl_post_sh_action (xlator_t *this, loc_t *parent, loc_t *child,
- int32_t op_ret, int32_t op_errno, dict_t *xattr_rsp,
- afr_crawl_data_t *crawl_data)
+afr_shd_sweep_done(struct subvol_healer *healer)
{
- int ret = 0;
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- eh_t *eh = NULL;
- char *path = NULL;
- shd_event_t *event = NULL;
- int32_t sh_failed = 0;
- gf_boolean_t split_brain = 0;
-
- priv = this->private;
- shd = &priv->shd;
- if (crawl_data->crawl == INDEX) {
- if ((op_ret < 0) && (op_errno == ENOENT)) {
- _remove_stale_index (this, crawl_data->readdir_xl,
- parent, uuid_utoa (child->gfid));
- goto out;
- }
- ret = _get_path_from_gfid_loc (this, crawl_data->readdir_xl,
- child, &path);
- if (ret)
- goto out;
- } else {
- path = gf_strdup (child->path);
- if (!path) {
- ret = -1;
- goto out;
- }
- }
+ crawl_event_t *event = NULL;
+ crawl_event_t *history = NULL;
+ afr_self_heald_t *shd = NULL;
- if (xattr_rsp)
- ret = dict_get_int32 (xattr_rsp, "sh-failed", &sh_failed);
- split_brain = afr_is_split_brain (this, child->inode);
- if ((op_ret < 0 && op_errno == EIO) || split_brain)
- eh = shd->split_brain;
- else if ((op_ret < 0) || sh_failed)
- eh = shd->heal_failed;
- else
- eh = shd->healed;
- ret = -1;
- event = GF_CALLOC (1, sizeof (*event), gf_afr_mt_shd_event_t);
- if (!event)
- goto out;
- event->child = crawl_data->child;
- event->path = path;
- ret = eh_save_history (eh, event);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "%s:Failed to save to "
- "event history, (%d, %s)", path, op_ret, strerror (op_errno));
- goto out;
- }
- ret = 0;
-out:
- if (ret && path)
- GF_FREE (path);
+ event = &healer->crawl_event;
+ shd = &(((afr_private_t *)healer->this->private)->shd);
+
+ event->end_time = gf_time();
+ history = gf_memdup(event, sizeof(*event));
+ event->start_time = 0;
+
+ if (!history)
return;
+
+ if (eh_save_history(shd->statistics[healer->subvol], history) < 0)
+ GF_FREE(history);
+ _unmask_cancellation();
}
int
-_self_heal_entry (xlator_t *this, afr_crawl_data_t *crawl_data, gf_dirent_t *entry,
- loc_t *child, loc_t *parent, struct iatt *iattr)
+afr_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
{
- struct iatt parentbuf = {0};
- int ret = 0;
- dict_t *xattr_rsp = NULL;
-
- gf_log (this->name, GF_LOG_DEBUG, "lookup %s", child->path);
-
- ret = syncop_lookup (this, child, NULL,
- iattr, &xattr_rsp, &parentbuf);
- _crawl_post_sh_action (this, parent, child, ret, errno, xattr_rsp,
- crawl_data);
- if (xattr_rsp)
- dict_unref (xattr_rsp);
- return ret;
-}
+ struct subvol_healer *healer = data;
+ afr_private_t *priv = NULL;
+ uuid_t gfid = {0};
+ int ret = 0;
+ uint64_t val = IA_INVAL;
-static int
-afr_crawl_done (int ret, call_frame_t *sync_frame, void *data)
-{
- GF_FREE (data);
- STACK_DESTROY (sync_frame->root);
+ priv = healer->this->private;
+ if (!priv->shd.enabled)
+ return -EBUSY;
+
+ gf_msg_debug(healer->this->name, 0, "got entry: %s from %s", entry->d_name,
+ priv->children[healer->subvol]->name);
+
+ ret = gf_uuid_parse(entry->d_name, gfid);
+ if (ret)
return 0;
-}
-void
-_do_self_heal_on_subvol (xlator_t *this, int child, afr_crawl_type_t crawl)
-{
- afr_start_crawl (this, child, crawl, _self_heal_entry,
- NULL, _gf_true, STOP_CRAWL_ON_SINGLE_SUBVOL,
- afr_crawl_done);
+ inode_ctx_get2(parent->inode, subvol, NULL, &val);
+
+ ret = afr_shd_selfheal(healer, healer->subvol, gfid);
+
+ if (ret == -ENOENT || ret == -ESTALE)
+ afr_shd_entry_purge(subvol, parent->inode, entry->d_name, val);
+
+ if (ret == 2)
+ /* If bricks crashed in pre-op after creating indices/xattrop
+ * link but before setting afr changelogs, we end up with stale
+ * xattrop links but zero changelogs. Remove such entries by
+ * sending a post-op with zero changelogs.
+ */
+ afr_shd_zero_xattrop(healer->this, gfid);
+
+ return 0;
}
-gf_boolean_t
-_crawl_proceed (xlator_t *this, int child, int crawl_flags, char **reason)
+int
+afr_shd_index_sweep(struct subvol_healer *healer, char *vgfid)
{
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- gf_boolean_t proceed = _gf_false;
- char *msg = NULL;
-
- priv = this->private;
- shd = &priv->shd;
- if (!shd->enabled) {
- msg = "Self-heal daemon is not enabled";
- gf_log (this->name, GF_LOG_ERROR, "%s", msg);
- goto out;
- }
- if (!priv->child_up[child]) {
- gf_log (this->name, GF_LOG_ERROR, "Stopping crawl for %s , "
- "subvol went down", priv->children[child]->name);
- msg = "Brick is Not connected";
- goto out;
- }
+ loc_t loc = {0};
+ afr_private_t *priv = NULL;
+ int ret = 0;
+ xlator_t *subvol = NULL;
+ dict_t *xdata = NULL;
+ call_frame_t *frame = NULL;
+
+ priv = healer->this->private;
+ subvol = priv->children[healer->subvol];
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ loc.inode = afr_shd_index_inode(healer->this, subvol, vgfid);
+ if (!loc.inode) {
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_INDEX_DIR_GET_FAILED, "unable to get index-dir on %s",
+ subvol->name);
+ ret = -errno;
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata || dict_set_int32_sizen(xdata, "get-gfid-type", 1)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_mt_dir_scan(frame, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
+ healer, afr_shd_index_heal, xdata,
+ priv->shd.max_threads, priv->shd.wait_qlength);
+
+ if (ret == 0)
+ ret = healer->crawl_event.healed_count;
- if (crawl_flags & STOP_CRAWL_ON_SINGLE_SUBVOL) {
- if (afr_up_children_count (priv->child_up,
- priv->child_count) < 2) {
- gf_log (this->name, GF_LOG_ERROR, "Stopping crawl as "
- "< 2 children are up");
- msg = "< 2 bricks in replica are running";
- goto out;
- }
- }
- proceed = _gf_true;
out:
- if (reason)
- *reason = msg;
- return proceed;
+ loc_wipe(&loc);
+
+ if (xdata)
+ dict_unref(xdata);
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ return ret;
}
int
-_do_crawl_op_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl,
- shd_crawl_op op, dict_t *output)
+afr_shd_index_sweep_all(struct subvol_healer *healer)
{
- afr_private_t *priv = NULL;
- char *status = NULL;
- char *subkey = NULL;
- char key[256] = {0};
- shd_pos_t pos_data = {0};
- int op_ret = -1;
- int xl_id = -1;
- int i = 0;
- int ret = 0;
- int crawl_flags = 0;
-
- priv = this->private;
- if (op == HEAL)
- crawl_flags |= STOP_CRAWL_ON_SINGLE_SUBVOL;
-
- ret = dict_get_int32 (output, this->name, &xl_id);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Invalid input, "
- "translator-id is not available");
- goto out;
- }
- pos_data.this = this;
- subkey = "status";
- for (i = 0; i < priv->child_count; i++) {
- if (_crawl_proceed (this, i, crawl_flags, &status)) {
- pos_data.child = i;
- ret = synctask_new (this->ctx->env,
- afr_syncop_find_child_position,
- NULL, NULL, &pos_data);
- if (ret) {
- status = "Not able to find brick location";
- } else if (pos_data.pos == AFR_POS_REMOTE) {
- status = "brick is remote";
- } else {
- op_ret = 0;
- if (op == HEAL) {
- status = "Started self-heal";
- _do_self_heal_on_subvol (this, i,
- crawl);
- } else {
- status = "";
- afr_start_crawl (this, i, INDEX,
- _add_summary_to_dict,
- output, _gf_false, 0,
- NULL);
- }
- }
- snprintf (key, sizeof (key), "%d-%d-%s", xl_id,
- i, subkey);
- ret = dict_set_str (output, key, status);
- if (!op_ret && (crawl == FULL))
- break;
- }
- snprintf (key, sizeof (key), "%d-%d-%s", xl_id, i, subkey);
- ret = dict_set_str (output, key, status);
- }
+ int ret = 0;
+ int count = 0;
+
+ ret = afr_shd_index_sweep(healer, GF_XATTROP_INDEX_GFID);
+ if (ret < 0)
+ goto out;
+ count = ret;
+
+ ret = afr_shd_index_sweep(healer, GF_XATTROP_DIRTY_GFID);
+ if (ret < 0)
+ goto out;
+ count += ret;
+
+ ret = afr_shd_index_sweep(healer, GF_XATTROP_ENTRY_CHANGES_GFID);
+ if (ret < 0)
+ goto out;
+ count += ret;
out:
- return op_ret;
+ if (ret < 0)
+ return ret;
+ else
+ return count;
}
int
-_do_self_heal_on_local_subvols (xlator_t *this, afr_crawl_type_t crawl,
- dict_t *output)
+afr_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
{
- return _do_crawl_op_on_local_subvols (this, crawl, HEAL, output);
+ struct subvol_healer *healer = data;
+ xlator_t *this = healer->this;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (this->cleanup_starting) {
+ return -ENOTCONN;
+ }
+
+ if (!priv->shd.enabled)
+ return -EBUSY;
+
+ afr_shd_selfheal_name(healer, healer->subvol, parent->inode->gfid,
+ entry->d_name);
+
+ afr_shd_selfheal(healer, healer->subvol, entry->d_stat.ia_gfid);
+
+ return 0;
}
int
-_get_index_summary_on_local_subvols (xlator_t *this, dict_t *output)
+afr_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
{
- return _do_crawl_op_on_local_subvols (this, INDEX, INFO, output);
+ afr_private_t *priv = NULL;
+ loc_t loc = {0};
+
+ priv = healer->this->private;
+ loc.inode = inode;
+ return syncop_ftw(priv->children[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer, afr_shd_full_heal);
}
int
-_add_all_subvols_eh_to_dict (xlator_t *this, eh_t *eh, dict_t *dict)
+afr_shd_fill_ta_loc(xlator_t *this, loc_t *loc)
{
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- int i = 0;
+ afr_private_t *priv = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ int ret = -1;
+
+ priv = this->private;
+ loc->parent = inode_ref(this->itable->root);
+ gf_uuid_copy(loc->pargfid, loc->parent->gfid);
+ loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX];
+ loc->inode = inode_new(loc->parent->table);
+ GF_CHECK_ALLOC(loc->inode, ret, out);
+
+ if (!gf_uuid_is_null(priv->ta_gfid))
+ goto assign_gfid;
+
+ ret = syncop_lookup(priv->children[THIN_ARBITER_BRICK_INDEX], loc, &stbuf,
+ 0, 0, 0);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed lookup on file %s.", loc->name);
+ goto out;
+ }
+
+ gf_uuid_copy(priv->ta_gfid, stbuf.ia_gfid);
+
+assign_gfid:
+ gf_uuid_copy(loc->gfid, priv->ta_gfid);
+ ret = 0;
- priv = this->private;
- shd = &priv->shd;
+out:
+ if (ret)
+ loc_wipe(loc);
- for (i = 0; i < priv->child_count; i++) {
- if (shd->pos[i] != AFR_POS_LOCAL)
- continue;
- _add_eh_to_dict (this, eh, dict, i);
- }
- return 0;
+ return ret;
}
int
-afr_xl_op (xlator_t *this, dict_t *input, dict_t *output)
+_afr_shd_ta_get_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata)
{
- gf_xl_afr_op_t op = GF_AFR_OP_INVALID;
- int ret = 0;
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- int xl_id = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int raw[AFR_NUM_CHANGE_LOGS] = {
+ 0,
+ };
+ int ret = -1;
+ int i = 0;
+
+ priv = this->private;
+
+ xattr = dict_new();
+ if (!xattr) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_DICT_GET_FAILED,
+ "Failed to create dict.");
+ goto out;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], &raw,
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret)
+ goto out;
+ }
- priv = this->private;
- shd = &priv->shd;
+ ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL, xdata, NULL);
+ if (ret || !(*xdata)) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Xattrop failed on %s.", loc->name);
+ }
- ret = dict_get_int32 (input, "xl-op", (int32_t*)&op);
- if (ret)
- goto out;
- ret = dict_get_int32 (input, this->name, &xl_id);
- if (ret)
- goto out;
- ret = dict_set_int32 (output, this->name, xl_id);
- if (ret)
- goto out;
- switch (op) {
- case GF_AFR_OP_HEAL_INDEX:
- ret = _do_self_heal_on_local_subvols (this, INDEX, output);
- break;
- case GF_AFR_OP_HEAL_FULL:
- ret = _do_self_heal_on_local_subvols (this, FULL, output);
- break;
- case GF_AFR_OP_INDEX_SUMMARY:
- (void)_get_index_summary_on_local_subvols (this, output);
- ret = 0;
- break;
- case GF_AFR_OP_HEALED_FILES:
- ret = _add_all_subvols_eh_to_dict (this, shd->healed, output);
- break;
- case GF_AFR_OP_HEAL_FAILED_FILES:
- ret = _add_all_subvols_eh_to_dict (this, shd->heal_failed,
- output);
- break;
- case GF_AFR_OP_SPLIT_BRAIN_FILES:
- ret = _add_all_subvols_eh_to_dict (this, shd->split_brain,
- output);
- break;
- default:
- gf_log (this->name, GF_LOG_ERROR, "Unknown set op %d", op);
- break;
- }
out:
- dict_del (output, this->name);
- return ret;
+ if (xattr)
+ dict_unref(xattr);
+
+ return ret;
}
void
-afr_poll_self_heal (void *data)
+afr_shd_ta_get_xattrs(xlator_t *this, loc_t *loc, struct subvol_healer *healer,
+ dict_t **xdata)
{
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- struct timeval timeout = {0};
- xlator_t *this = NULL;
- long child = (long)data;
- gf_timer_t *old_timer = NULL;
- gf_timer_t *new_timer = NULL;
- shd_pos_t pos_data = {0};
- int ret = 0;
-
- this = THIS;
- priv = this->private;
- shd = &priv->shd;
-
- if (shd->pos[child] == AFR_POS_UNKNOWN) {
- pos_data.this = this;
- pos_data.child = child;
- ret = synctask_new (this->ctx->env,
- afr_syncop_find_child_position,
- NULL, NULL, &pos_data);
- if (!ret)
- shd->pos[child] = pos_data.pos;
- }
- if (shd->enabled && (shd->pos[child] == AFR_POS_LOCAL))
- _do_self_heal_on_subvol (this, child, INDEX);
- timeout.tv_sec = shd->timeout;
- timeout.tv_usec = 0;
- //notify and previous timer should be synchronized.
- LOCK (&priv->lock);
- {
- old_timer = shd->timer[child];
- if (shd->pos[child] == AFR_POS_REMOTE)
- goto unlock;
- shd->timer[child] = gf_timer_call_after (this->ctx, timeout,
- afr_poll_self_heal,
- data);
- new_timer = shd->timer[child];
- }
-unlock:
- UNLOCK (&priv->lock);
-
- if (old_timer)
- gf_timer_call_cancel (this->ctx, old_timer);
- if (!new_timer && (shd->pos[child] != AFR_POS_REMOTE)) {
- gf_log (this->name, GF_LOG_WARNING,
- "Could not create self-heal polling timer for %s",
- priv->children[child]->name);
+ int ret = 0;
+
+ loc_wipe(loc);
+ if (afr_shd_fill_ta_loc(this, loc)) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.", loc->name);
+ ret = -1;
+ goto out;
+ }
+
+ ret = afr_ta_post_op_lock(this, loc);
+ if (ret)
+ goto out;
+
+ ret = _afr_shd_ta_get_xattrs(this, loc, xdata);
+ if (ret) {
+ if (*xdata) {
+ dict_unref(*xdata);
+ *xdata = NULL;
}
- return;
+ }
+
+ afr_ta_post_op_unlock(this, loc);
+
+out:
+ if (ret)
+ healer->rerun = 1;
}
-static int
-afr_local_child_poll_self_heal (int ret, call_frame_t *sync_frame, void *data)
+int
+afr_shd_ta_unset_xattrs(xlator_t *this, loc_t *loc, dict_t **xdata, int healer)
{
- afr_self_heald_t *shd = NULL;
- shd_pos_t *pos_data = data;
- afr_private_t *priv = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xattr = NULL;
+ gf_boolean_t need_xattrop = _gf_false;
+ void *pending_raw = NULL;
+ int *raw = NULL;
+ int pending[AFR_NUM_CHANGE_LOGS] = {
+ 0,
+ };
+ int i = 0;
+ int j = 0;
+ int val = 0;
+ int ret = -1;
+
+ priv = this->private;
+
+ xattr = dict_new();
+ if (!xattr) {
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ raw = GF_CALLOC(AFR_NUM_CHANGE_LOGS, sizeof(int), gf_afr_mt_int32_t);
+ if (!raw) {
+ goto out;
+ }
- if (ret)
- goto out;
+ ret = dict_get_ptr(*xdata, priv->pending_key[i], &pending_raw);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "Error getting value "
+ "of pending key %s",
+ priv->pending_key[i]);
+ GF_FREE(raw);
+ goto out;
+ }
+
+ memcpy(pending, pending_raw, sizeof(pending));
+ for (j = 0; j < AFR_NUM_CHANGE_LOGS; j++) {
+ val = ntoh32(pending[j]);
+ if (val) {
+ if (i == healer) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_THIN_ARB,
+ "I am "
+ "not the good shd. Skipping. "
+ "SHD = %d.",
+ healer);
+ ret = 0;
+ GF_FREE(raw);
+ goto out;
+ }
+ need_xattrop = _gf_true;
+ raw[j] = hton32(-val);
+ }
+ }
+
+ ret = dict_set_bin(xattr, priv->pending_key[i], raw,
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ if (ret) {
+ GF_FREE(raw);
+ goto out;
+ }
+
+ if (need_xattrop)
+ break;
+ }
+
+ if (!need_xattrop) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Xattrop failed.");
- priv = pos_data->this->private;
- shd = &priv->shd;
- shd->pos[pos_data->child] = pos_data->pos;
- if (pos_data->pos != AFR_POS_REMOTE)
- afr_poll_self_heal ((void*)(long)pos_data->child);
out:
- GF_FREE (data);
- return 0;
+ if (xattr)
+ dict_unref(xattr);
+
+ return ret;
}
void
-afr_proactive_self_heal (void *data)
+afr_shd_ta_check_and_unset_xattrs(xlator_t *this, loc_t *loc,
+ struct subvol_healer *healer,
+ dict_t *pre_crawl_xdata)
{
- xlator_t *this = NULL;
- long child = (long)data;
- shd_pos_t *pos_data = NULL;
- int ret = 0;
+ int ret_lock = 0;
+ int ret = 0;
+ dict_t *post_crawl_xdata = NULL;
- this = THIS;
+ ret_lock = afr_ta_post_op_lock(this, loc);
+ if (ret_lock)
+ goto unref;
- //Position of brick could have changed and it could be local now.
- //Compute the position again
- pos_data = GF_CALLOC (1, sizeof (*pos_data), gf_afr_mt_pos_data_t);
- if (!pos_data)
- goto out;
- pos_data->this = this;
- pos_data->child = child;
- ret = synctask_new (this->ctx->env, afr_syncop_find_child_position,
- afr_local_child_poll_self_heal, NULL, pos_data);
- if (ret)
- goto out;
-out:
- return;
-}
+ ret = _afr_shd_ta_get_xattrs(this, loc, &post_crawl_xdata);
+ if (ret)
+ goto unref;
-static int
-get_pathinfo_host (char *pathinfo, char *hostname, size_t size)
-{
- char *start = NULL;
- char *end = NULL;
- int ret = -1;
- int i = 0;
+ if (!are_dicts_equal(pre_crawl_xdata, post_crawl_xdata, NULL, NULL)) {
+ ret = -1;
+ goto unref;
+ }
- if (!pathinfo)
- goto out;
+ ret = afr_shd_ta_unset_xattrs(this, loc, &post_crawl_xdata, healer->subvol);
- start = strchr (pathinfo, ':');
- if (!start)
- goto out;
- end = strrchr (pathinfo, ':');
- if (start == end)
- goto out;
+unref:
+ if (post_crawl_xdata) {
+ dict_unref(post_crawl_xdata);
+ post_crawl_xdata = NULL;
+ }
- memset (hostname, 0, size);
- i = 0;
- while (++start != end)
- hostname[i++] = *start;
- ret = 0;
-out:
- return ret;
+ if (ret || ret_lock)
+ healer->rerun = 1;
+
+ if (!ret_lock)
+ afr_ta_post_op_unlock(this, loc);
}
-int
-afr_local_pathinfo (char *pathinfo, gf_boolean_t *local)
+gf_boolean_t
+afr_bricks_available_for_heal(afr_private_t *priv)
{
- int ret = 0;
- char pathinfohost[1024] = {0};
- char localhost[1024] = {0};
- xlator_t *this = THIS;
+ int up_children = 0;
- *local = _gf_false;
- ret = get_pathinfo_host (pathinfo, pathinfohost, sizeof (pathinfohost));
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Invalid pathinfo: %s",
- pathinfo);
- goto out;
- }
+ up_children = __afr_get_up_children_count(priv);
+ if (up_children < 2) {
+ return _gf_false;
+ }
+ return _gf_true;
+}
- ret = gethostname (localhost, sizeof (localhost));
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "gethostname() failed, "
- "reason: %s", strerror (errno));
- goto out;
+static gf_boolean_t
+afr_shd_ta_needs_heal(xlator_t *this, struct subvol_healer *healer)
+{
+ dict_t *xdata = NULL;
+ afr_private_t *priv = NULL;
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ int i = 0;
+ gf_boolean_t need_heal = _gf_false;
+
+ priv = this->private;
+
+ ret = afr_shd_fill_ta_loc(this, &loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate thin-arbiter loc for: %s.", loc.name);
+ healer->rerun = 1;
+ goto out;
+ }
+
+ if (_afr_shd_ta_get_xattrs(this, &loc, &xdata)) {
+ healer->rerun = 1;
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (afr_ta_dict_contains_pending_xattr(xdata, priv, i)) {
+ need_heal = _gf_true;
+ break;
}
+ }
- if (!strcmp (localhost, pathinfohost))
- *local = _gf_true;
out:
- return ret;
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+
+ return need_heal;
}
-int
-afr_crawl_build_start_loc (xlator_t *this, afr_crawl_data_t *crawl_data,
- loc_t *dirloc)
+static int
+afr_shd_anon_inode_cleaner(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
{
- afr_private_t *priv = NULL;
- dict_t *xattr = NULL;
- void *index_gfid = NULL;
- loc_t rootloc = {0};
- struct iatt iattr = {0};
- struct iatt parent = {0};
- int ret = 0;
- xlator_t *readdir_xl = crawl_data->readdir_xl;
- inode_t *link_inode = NULL;
-
- priv = this->private;
- if (crawl_data->crawl == FULL) {
- afr_build_root_loc (this, dirloc);
- } else {
- afr_build_root_loc (this, &rootloc);
- ret = syncop_getxattr (readdir_xl, &rootloc, &xattr,
- GF_XATTROP_INDEX_GFID);
- if (ret < 0)
- goto out;
- ret = dict_get_ptr (xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "failed to get index "
- "dir gfid on %s", readdir_xl->name);
- goto out;
- }
- if (!index_gfid) {
- gf_log (this->name, GF_LOG_ERROR, "index gfid empty "
- "on %s", readdir_xl->name);
- ret = -1;
- goto out;
- }
- uuid_copy (dirloc->gfid, index_gfid);
- dirloc->path = "";
- dirloc->inode = inode_new (priv->root_inode->table);
- ret = syncop_lookup (readdir_xl, dirloc, NULL,
- &iattr, NULL, &parent);
- if (ret < 0) {
- if (errno != ENOENT) {
- gf_log (this->name, GF_LOG_ERROR, "lookup "
- "failed on index dir on %s - (%s)",
- readdir_xl->name, strerror (errno));
- }
- goto out;
+ struct subvol_healer *healer = data;
+ afr_private_t *priv = healer->this->private;
+ call_frame_t *frame = NULL;
+ afr_local_t *local = NULL;
+ int ret = 0;
+ loc_t loc = {0};
+ int count = 0;
+ int i = 0;
+ int op_errno = 0;
+ struct iatt *iatt = NULL;
+ gf_boolean_t multiple_links = _gf_false;
+ unsigned char *gfid_present = alloca0(priv->child_count);
+ unsigned char *entry_present = alloca0(priv->child_count);
+ char *type = "file";
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+ local = frame->local;
+ if (AFR_COUNT(local->child_up, priv->child_count) != priv->child_count) {
+ gf_msg_debug(healer->this->name, 0,
+ "Not all bricks are up. Skipping "
+ "cleanup of %s on %s",
+ entry->d_name, subvol->name);
+ ret = 0;
+ goto out;
+ }
+
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = gf_uuid_parse(entry->d_name, loc.gfid);
+ if (ret) {
+ ret = 0;
+ goto out;
+ }
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup, &loc,
+ NULL);
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ gfid_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ if (iatt->ia_type == IA_IFDIR) {
+ type = "dir";
+ }
+
+ if (i == healer->subvol) {
+ if (local->replies[i].poststat.ia_nlink > 1) {
+ multiple_links = _gf_true;
}
- link_inode = inode_link (dirloc->inode, NULL, NULL, &iattr);
- inode_unref (link_inode);
+ }
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ /*Inode is deleted from subvol*/
+ if (count == 1 || (iatt->ia_type != IA_IFDIR && multiple_links)) {
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR, "expunging %s %s/%s on %s", type,
+ priv->anon_inode_name, entry->d_name, subvol->name);
+ ret = afr_shd_entry_purge(subvol, parent->inode, entry->d_name,
+ iatt->ia_type);
+ if (ret == -ENOENT || ret == -ESTALE)
+ ret = 0;
+ } else if (count > 1) {
+ loc_wipe(&loc);
+ loc.parent = inode_ref(parent->inode);
+ loc.name = entry->d_name;
+ loc.inode = inode_new(parent->inode->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
}
+ AFR_ONLIST(local->child_up, frame, afr_selfheal_discover_cbk, lookup,
+ &loc, NULL);
+ count = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->replies[i].op_ret == 0) {
+ count++;
+ entry_present[i] = 1;
+ iatt = &local->replies[i].poststat;
+ } else if (local->replies[i].op_errno != ENOENT &&
+ local->replies[i].op_errno != ESTALE) {
+ /*We don't have complete view. Skip the entry*/
+ gf_msg_debug(healer->this->name, local->replies[i].op_errno,
+ "Skipping cleanup of %s on %s", entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (gfid_present[i] && !entry_present[i]) {
+ /*Entry is not anonymous on at least one subvol*/
+ gf_msg_debug(healer->this->name, 0,
+ "Valid entry present on %s "
+ "Skipping cleanup of %s on %s",
+ priv->children[i]->name, entry->d_name,
+ subvol->name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ gf_msg(healer->this->name, GF_LOG_WARNING, 0,
+ AFR_MSG_EXPUNGING_FILE_OR_DIR,
+ "expunging %s %s/%s on all subvols", type, priv->anon_inode_name,
+ entry->d_name);
ret = 0;
+ for (i = 0; i < priv->child_count; i++) {
+ op_errno = -afr_shd_entry_purge(priv->children[i], loc.parent,
+ entry->d_name, iatt->ia_type);
+ if (op_errno != ENOENT && op_errno != ESTALE) {
+ ret |= -op_errno;
+ }
+ }
+ }
+
out:
- if (xattr)
- dict_unref (xattr);
- loc_wipe (&rootloc);
- return ret;
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return ret;
}
-int
-afr_crawl_opendir (xlator_t *this, afr_crawl_data_t *crawl_data, fd_t **dirfd,
- loc_t *dirloc)
+static void
+afr_cleanup_anon_inode_dir(struct subvol_healer *healer)
{
- fd_t *fd = NULL;
- int ret = 0;
-
- if (crawl_data->crawl == FULL) {
- fd = fd_create (dirloc->inode, crawl_data->pid);
- if (!fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to create fd for %s", dirloc->path);
- ret = -1;
- goto out;
- }
-
- ret = syncop_opendir (crawl_data->readdir_xl, dirloc, fd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "opendir failed on %s", dirloc->path);
- goto out;
- }
- } else {
- fd = fd_anonymous (dirloc->inode);
- }
- ret = 0;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ afr_private_t *priv = healer->this->private;
+ loc_t loc = {0};
+
+ ret = afr_anon_inode_create(healer->this, healer->subvol, &loc.inode);
+ if (ret)
+ goto out;
+
+ frame = afr_frame_create(healer->this, &ret);
+ if (!frame) {
+ ret = -ret;
+ goto out;
+ }
+
+ ret = syncop_mt_dir_scan(frame, priv->children[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer,
+ afr_shd_anon_inode_cleaner, NULL,
+ priv->shd.max_threads, priv->shd.wait_qlength);
out:
- if (!ret)
- *dirfd = fd;
- return ret;
+ if (frame)
+ AFR_STACK_DESTROY(frame);
+ loc_wipe(&loc);
+ return;
}
-xlator_t*
-afr_crawl_readdir_xl_get (xlator_t *this, afr_crawl_data_t *crawl_data)
+void *
+afr_shd_index_healer(void *data)
{
- afr_private_t *priv = this->private;
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+ int ret = 0;
+ afr_private_t *priv = NULL;
+ dict_t *pre_crawl_xdata = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ healer = data;
+ THIS = this = healer->this;
+ priv = this->private;
+
+ for (;;) {
+ afr_shd_healer_wait(healer);
+
+ if (!afr_bricks_available_for_heal(priv))
+ continue;
+
+ ASSERT_LOCAL(this, healer);
+ priv->local[healer->subvol] = healer->local;
+
+ if (priv->thin_arbiter_count) {
+ if (afr_shd_ta_needs_heal(this, healer))
+ afr_shd_ta_get_xattrs(this, &loc, healer, &pre_crawl_xdata);
+ }
- if (crawl_data->crawl == FULL) {
- return this;
- } else {
- return priv->children[crawl_data->child];
+ do {
+ gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
+ afr_subvol_name(this, healer->subvol));
+
+ afr_shd_sweep_prepare(healer);
+
+ ret = afr_shd_index_sweep_all(healer);
+
+ afr_shd_sweep_done(healer);
+ /*
+ As long as at least one gfid was
+ healed, keep retrying. We may have
+ just healed a directory and thereby
+ created entries for other gfids which
+ could not be healed thus far.
+ */
+
+ gf_msg_debug(this->name, 0, "finished index sweep on subvol %s",
+ afr_subvol_name(this, healer->subvol));
+ /*
+ Give a pause before retrying to avoid a busy loop
+ in case the only entry in index is because of
+ an ongoing I/O.
+ */
+ sleep(1);
+ } while (ret > 0);
+
+ if (ret == 0) {
+ afr_cleanup_anon_inode_dir(healer);
}
- return NULL;
-}
-int
-afr_crawl_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent,
- gf_dirent_t *entry, afr_crawl_data_t *crawl_data)
-{
- int ret = -1;
- afr_private_t *priv = NULL;
+ if (ret == 0 && pre_crawl_xdata &&
+ !healer->crawl_event.heal_failed_count) {
+ afr_shd_ta_check_and_unset_xattrs(this, &loc, healer,
+ pre_crawl_xdata);
+ }
- priv = this->private;
- if (crawl_data->crawl == FULL) {
- ret = afr_build_child_loc (this, child, parent, entry->d_name);
- } else {
- child->inode = inode_new (priv->root_inode->table);
- if (!child->inode)
- goto out;
- uuid_parse (entry->d_name, child->gfid);
- ret = _loc_assign_gfid_path (child);
+ if (pre_crawl_xdata) {
+ dict_unref(pre_crawl_xdata);
+ pre_crawl_xdata = NULL;
}
-out:
- return ret;
+ }
+
+ return NULL;
}
-static int
-_process_entries (xlator_t *this, loc_t *parentloc, gf_dirent_t *entries,
- off_t *offset, afr_crawl_data_t *crawl_data)
+void *
+afr_shd_full_healer(void *data)
{
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- int ret = 0;
- loc_t entry_loc = {0};
- fd_t *fd = NULL;
- struct iatt iattr = {0};
- inode_t *link_inode = NULL;
-
- list_for_each_entry_safe (entry, tmp, &entries->list, list) {
- if (!_crawl_proceed (this, crawl_data->child,
- crawl_data->crawl_flags, NULL)) {
- ret = -1;
- goto out;
- }
- *offset = entry->d_off;
- if (IS_ENTRY_CWD (entry->d_name) ||
- IS_ENTRY_PARENT (entry->d_name))
- continue;
- if ((crawl_data->crawl == FULL) &&
- uuid_is_null (entry->d_stat.ia_gfid)) {
- gf_log (this->name, GF_LOG_WARNING, "%s/%s: No "
- "gfid present skipping",
- parentloc->path, entry->d_name);
- continue;
- }
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+ int run = 0;
- loc_wipe (&entry_loc);
- ret = afr_crawl_build_child_loc (this, &entry_loc, parentloc,
- entry, crawl_data);
- if (ret)
- goto out;
+ healer = data;
+ THIS = this = healer->this;
- ret = crawl_data->process_entry (this, crawl_data, entry,
- &entry_loc, parentloc, &iattr);
+ for (;;) {
+ pthread_mutex_lock(&healer->mutex);
+ {
+ run = __afr_shd_healer_wait(healer);
+ if (!run)
+ healer->running = _gf_false;
+ }
+ pthread_mutex_unlock(&healer->mutex);
- if (ret)
- continue;
+ if (!run)
+ break;
- link_inode = inode_link (entry_loc.inode, NULL, NULL, &iattr);
- if (link_inode == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "inode link failed "
- "on the inode (%s)",entry_loc.path);
- ret = -1;
- goto out;
- }
- inode_unref (link_inode);
-
- if (crawl_data->crawl == INDEX)
- continue;
-
- if (!IA_ISDIR (iattr.ia_type))
- continue;
- fd = NULL;
- ret = afr_crawl_opendir (this, crawl_data, &fd, &entry_loc);
- if (ret)
- continue;
- ret = _crawl_directory (fd, &entry_loc, crawl_data);
- if (fd)
- fd_unref (fd);
- }
- ret = 0;
-out:
- loc_wipe (&entry_loc);
- return ret;
+ ASSERT_LOCAL(this, healer);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "starting full sweep on subvol %s",
+ afr_subvol_name(this, healer->subvol));
+
+ afr_shd_sweep_prepare(healer);
+
+ afr_shd_full_sweep(healer, this->itable->root);
+
+ afr_shd_sweep_done(healer);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_SELF_HEAL_INFO,
+ "finished full sweep on subvol %s",
+ afr_subvol_name(this, healer->subvol));
+ }
+
+ return NULL;
}
-static int
-_crawl_directory (fd_t *fd, loc_t *loc, afr_crawl_data_t *crawl_data)
+int
+afr_shd_healer_init(xlator_t *this, struct subvol_healer *healer)
{
- xlator_t *this = NULL;
- off_t offset = 0;
- gf_dirent_t entries;
- int ret = 0;
- gf_boolean_t free_entries = _gf_false;
- xlator_t *readdir_xl = crawl_data->readdir_xl;
-
- INIT_LIST_HEAD (&entries.list);
- this = THIS;
-
- GF_ASSERT (loc->inode);
-
- if (crawl_data->crawl == FULL)
- gf_log (this->name, GF_LOG_DEBUG, "crawling %s", loc->path);
- else
- gf_log (this->name, GF_LOG_DEBUG, "crawling INDEX %s",
- uuid_utoa (loc->gfid));
-
- while (1) {
- if (crawl_data->crawl == FULL)
- ret = syncop_readdirp (readdir_xl, fd, 131072, offset,
- NULL, &entries);
- else
- ret = syncop_readdir (readdir_xl, fd, 131072, offset,
- &entries);
- if (ret <= 0)
- break;
- ret = 0;
- free_entries = _gf_true;
+ int ret = 0;
- if (!_crawl_proceed (this, crawl_data->child,
- crawl_data->crawl_flags, NULL)) {
- ret = -1;
- goto out;
- }
- if (list_empty (&entries.list))
- goto out;
+ ret = pthread_mutex_init(&healer->mutex, NULL);
+ if (ret)
+ goto out;
- ret = _process_entries (this, loc, &entries, &offset,
- crawl_data);
- gf_dirent_free (&entries);
- free_entries = _gf_false;
- }
- ret = 0;
+ ret = pthread_cond_init(&healer->cond, NULL);
+ if (ret)
+ goto out;
+
+ healer->this = this;
+ healer->running = _gf_false;
+ healer->rerun = _gf_false;
+ healer->local = _gf_false;
out:
- if (free_entries)
- gf_dirent_free (&entries);
- return ret;
+ return ret;
}
-static char*
-position_str_get (afr_child_pos_t pos)
+int
+afr_shd_healer_spawn(xlator_t *this, struct subvol_healer *healer,
+ void *(threadfn)(void *))
{
- switch (pos) {
- case AFR_POS_UNKNOWN:
- return "unknown";
- case AFR_POS_LOCAL:
- return "local";
- case AFR_POS_REMOTE:
- return "remote";
+ int ret = 0;
+
+ pthread_mutex_lock(&healer->mutex);
+ {
+ if (healer->running) {
+ pthread_cond_signal(&healer->cond);
+ } else {
+ ret = gf_thread_create(&healer->thread, NULL, threadfn, healer,
+ "shdheal");
+ if (ret)
+ goto unlock;
+ healer->running = 1;
}
- return NULL;
+
+ healer->rerun = 1;
+ }
+unlock:
+ pthread_mutex_unlock(&healer->mutex);
+
+ return ret;
}
int
-afr_find_child_position (xlator_t *this, int child, afr_child_pos_t *pos)
+afr_shd_full_healer_spawn(xlator_t *this, int subvol)
{
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- dict_t *xattr_rsp = NULL;
- loc_t loc = {0};
- int ret = 0;
- char *node_uuid = NULL;
+ return afr_shd_healer_spawn(this, NTH_FULL_HEALER(this, subvol),
+ afr_shd_full_healer);
+}
- priv = this->private;
- shd = &priv->shd;
+int
+afr_shd_index_healer_spawn(xlator_t *this, int subvol)
+{
+ return afr_shd_healer_spawn(this, NTH_INDEX_HEALER(this, subvol),
+ afr_shd_index_healer);
+}
- afr_build_root_loc (this, &loc);
+int
+afr_shd_dict_add_crawl_event(xlator_t *this, dict_t *output,
+ crawl_event_t *crawl_event)
+{
+ int ret = 0;
+ uint64_t count = 0;
+ char key[128] = {0};
+ int keylen = 0;
+ char suffix[64] = {0};
+ int xl_id = 0;
+ uint64_t healed_count = 0;
+ uint64_t split_brain_count = 0;
+ uint64_t heal_failed_count = 0;
+ char *start_time_str = 0;
+ char *end_time_str = NULL;
+ char *crawl_type = NULL;
+ int progress = -1;
+ int child = -1;
+
+ child = crawl_event->child;
+ healed_count = crawl_event->healed_count;
+ split_brain_count = crawl_event->split_brain_count;
+ heal_failed_count = crawl_event->heal_failed_count;
+ crawl_type = crawl_event->crawl_type;
+
+ if (!crawl_event->start_time)
+ goto out;
+
+ start_time_str = gf_strdup(ctime(&crawl_event->start_time));
+
+ if (crawl_event->end_time)
+ end_time_str = gf_strdup(ctime(&crawl_event->end_time));
+
+ ret = dict_get_int32(output, this->name, &xl_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "xl does not have id");
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "statistics-%d-%d-count", xl_id, child);
+ ret = dict_get_uint64(output, key, &count);
+
+ snprintf(suffix, sizeof(suffix), "%d-%d-%" PRIu64, xl_id, child, count);
+ snprintf(key, sizeof(key), "statistics_healed_cnt-%s", suffix);
+ ret = dict_set_uint64(output, key, healed_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_healed_count to output");
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "statistics_sb_cnt-%s", suffix);
+ ret = dict_set_uint64(output, key, split_brain_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_split_brain_count to output");
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "statistics_crawl_type-%s", suffix);
+ ret = dict_set_strn(output, key, keylen, crawl_type);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_crawl_type to output");
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "statistics_heal_failed_cnt-%s", suffix);
+ ret = dict_set_uint64(output, key, heal_failed_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_healed_failed_count to output");
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "statistics_strt_time-%s", suffix);
+ ret = dict_set_dynstrn(output, key, keylen, start_time_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_crawl_start_time to output");
+ goto out;
+ } else {
+ start_time_str = NULL;
+ }
+
+ if (!end_time_str)
+ progress = 1;
+ else
+ progress = 0;
+
+ keylen = snprintf(key, sizeof(key), "statistics_end_time-%s", suffix);
+ if (!end_time_str)
+ end_time_str = gf_strdup("Could not determine the end time");
+ ret = dict_set_dynstrn(output, key, keylen, end_time_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_crawl_end_time to output");
+ goto out;
+ } else {
+ end_time_str = NULL;
+ }
+
+ keylen = snprintf(key, sizeof(key), "statistics_inprogress-%s", suffix);
+
+ ret = dict_set_int32n(output, key, keylen, progress);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not add statistics_inprogress to output");
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "statistics-%d-%d-count", xl_id, child);
+ ret = dict_set_uint64(output, key, count + 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not increment the counter.");
+ goto out;
+ }
+out:
+ GF_FREE(start_time_str);
+ GF_FREE(end_time_str);
+ return ret;
+}
- ret = syncop_getxattr (priv->children[child], &loc, &xattr_rsp,
- GF_XATTR_NODE_UUID_KEY);
+int
+afr_shd_dict_add_path(xlator_t *this, dict_t *output, int child, char *path,
+ struct timeval *tv)
+{
+ int ret = -1;
+ uint64_t count = 0;
+ char key[64] = {0};
+ int keylen = 0;
+ char xl_id_child_str[32] = {0};
+ int xl_id = 0;
+
+ ret = dict_get_int32(output, this->name, &xl_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "xl does not have id");
+ goto out;
+ }
+
+ snprintf(xl_id_child_str, sizeof(xl_id_child_str), "%d-%d", xl_id, child);
+ snprintf(key, sizeof(key), "%s-count", xl_id_child_str);
+ ret = dict_get_uint64(output, key, &count);
+
+ keylen = snprintf(key, sizeof(key), "%s-%" PRIu64, xl_id_child_str, count);
+ ret = dict_set_dynstrn(output, key, keylen, path);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Could not add to output", path);
+ goto out;
+ }
+
+ if (tv) {
+ snprintf(key, sizeof(key), "%s-%" PRIu64 "-time", xl_id_child_str,
+ count);
+ ret = dict_set_uint32(output, key, tv->tv_sec);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "getxattr failed on %s - "
- "(%s)", priv->children[child]->name, strerror (errno));
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "%s: Could not set time", path);
+ goto out;
}
+ }
- ret = dict_get_str (xattr_rsp, GF_XATTR_NODE_UUID_KEY, &node_uuid);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "node-uuid key not found on "
- "child %s", priv->children[child]->name);
- goto out;
- }
+ snprintf(key, sizeof(key), "%s-count", xl_id_child_str);
- if (!strcmp (node_uuid, shd->node_uuid))
- *pos = AFR_POS_LOCAL;
- else
- *pos = AFR_POS_REMOTE;
+ ret = dict_set_uint64(output, key, count + 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "Could not increment count");
+ goto out;
+ }
- gf_log (this->name, GF_LOG_DEBUG, "child %s is %s",
- priv->children[child]->name, position_str_get (*pos));
+ ret = 0;
out:
- if (ret)
- *pos = AFR_POS_UNKNOWN;
- loc_wipe (&loc);
- return ret;
+ return ret;
}
int
-afr_syncop_find_child_position (void *data)
+afr_add_shd_event(circular_buffer_t *cb, void *data)
{
- shd_pos_t *pos_data = data;
- int ret = 0;
+ dict_t *output = NULL;
+ xlator_t *this = THIS;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ shd_event_t *shd_event = NULL;
+ char *path = NULL;
+
+ output = data;
+ priv = this->private;
+ shd = &priv->shd;
+ shd_event = cb->data;
+
+ if (!shd->index_healers[shd_event->child].local)
+ return 0;
- ret = afr_find_child_position (pos_data->this, pos_data->child,
- &pos_data->pos);
- return ret;
+ path = gf_strdup(shd_event->path);
+ if (!path)
+ return -ENOMEM;
+
+ afr_shd_dict_add_path(this, output, shd_event->child, path, &cb->tv);
+ return 0;
}
-static int
-afr_dir_crawl (void *data)
+int
+afr_add_crawl_event(circular_buffer_t *cb, void *data)
{
- xlator_t *this = NULL;
- int ret = -1;
- xlator_t *readdir_xl = NULL;
- fd_t *fd = NULL;
- loc_t dirloc = {0};
- afr_crawl_data_t *crawl_data = data;
-
- this = THIS;
-
- if (!_crawl_proceed (this, crawl_data->child, crawl_data->crawl_flags,
- NULL))
- goto out;
+ dict_t *output = NULL;
+ xlator_t *this = THIS;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ crawl_event_t *crawl_event = NULL;
+
+ output = data;
+ priv = this->private;
+ shd = &priv->shd;
+ crawl_event = cb->data;
+
+ if (!shd->index_healers[crawl_event->child].local)
+ return 0;
- readdir_xl = afr_crawl_readdir_xl_get (this, crawl_data);
- if (!readdir_xl)
- goto out;
- crawl_data->readdir_xl = readdir_xl;
+ afr_shd_dict_add_crawl_event(this, output, crawl_event);
- ret = afr_crawl_build_start_loc (this, crawl_data, &dirloc);
- if (ret)
- goto out;
+ return 0;
+}
- ret = afr_crawl_opendir (this, crawl_data, &fd, &dirloc);
+int
+afr_selfheal_daemon_init(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ int ret = -1;
+ int i = 0;
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ shd->index_healers = GF_CALLOC(sizeof(*shd->index_healers),
+ priv->child_count,
+ gf_afr_mt_subvol_healer_t);
+ if (!shd->index_healers)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ shd->index_healers[i].subvol = i;
+ ret = afr_shd_healer_init(this, &shd->index_healers[i]);
if (ret)
- goto out;
-
- ret = _crawl_directory (fd, &dirloc, crawl_data);
+ goto out;
+ }
+
+ shd->full_healers = GF_CALLOC(sizeof(*shd->full_healers), priv->child_count,
+ gf_afr_mt_subvol_healer_t);
+ if (!shd->full_healers)
+ goto out;
+ for (i = 0; i < priv->child_count; i++) {
+ shd->full_healers[i].subvol = i;
+ ret = afr_shd_healer_init(this, &shd->full_healers[i]);
if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Crawl failed on %s",
- readdir_xl->name);
- else
- gf_log (this->name, GF_LOG_DEBUG, "Crawl completed "
- "on %s", readdir_xl->name);
- if (crawl_data->crawl == INDEX)
- dirloc.path = NULL;
+ goto out;
+ }
+
+ shd->split_brain = eh_new(AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false,
+ afr_destroy_shd_event_data);
+ if (!shd->split_brain)
+ goto out;
+
+ shd->statistics = GF_CALLOC(sizeof(eh_t *), priv->child_count,
+ gf_common_mt_eh_t);
+ if (!shd->statistics)
+ goto out;
+
+ for (i = 0; i < priv->child_count; i++) {
+ shd->statistics[i] = eh_new(AFR_STATISTICS_HISTORY_SIZE, _gf_false,
+ afr_destroy_crawl_event_data);
+ if (!shd->statistics[i])
+ goto out;
+ shd->full_healers[i].crawl_event.child = i;
+ shd->full_healers[i].crawl_event.crawl_type = "FULL";
+ shd->index_healers[i].crawl_event.child = i;
+ shd->index_healers[i].crawl_event.crawl_type = "INDEX";
+ }
+
+ ret = 0;
out:
- if (fd)
- fd_unref (fd);
- if (crawl_data->crawl == INDEX)
- dirloc.path = NULL;
- loc_wipe (&dirloc);
- return ret;
+ return ret;
}
-static int
-afr_dir_exclusive_crawl (void *data)
+void
+afr_selfheal_childup(xlator_t *this, afr_private_t *priv)
{
- afr_private_t *priv = NULL;
- afr_self_heald_t *shd = NULL;
- gf_boolean_t crawl = _gf_false;
- int ret = 0;
- int child = -1;
- xlator_t *this = NULL;
- afr_crawl_data_t *crawl_data = data;
-
- this = THIS;
- priv = this->private;
- shd = &priv->shd;
- child = crawl_data->child;
-
- LOCK (&priv->lock);
- {
- if (shd->inprogress[child]) {
- if (shd->pending[child] != FULL)
- shd->pending[child] = crawl_data->crawl;
- } else {
- shd->inprogress[child] = _gf_true;
- crawl = _gf_true;
- }
- }
- UNLOCK (&priv->lock);
+ int subvol = 0;
- if (!crawl) {
- gf_log (this->name, GF_LOG_INFO, "Another crawl is in progress "
- "for %s", priv->children[child]->name);
- goto out;
- }
+ if (!priv->shd.iamshd)
+ return;
+ for (subvol = 0; subvol < priv->child_count; subvol++)
+ if (priv->child_up[subvol])
+ afr_shd_index_healer_spawn(this, subvol);
- do {
- afr_dir_crawl (data);
- LOCK (&priv->lock);
- {
- if (shd->pending[child] != NONE) {
- crawl_data->crawl = shd->pending[child];
- shd->pending[child] = NONE;
- } else {
- shd->inprogress[child] = _gf_false;
- crawl = _gf_false;
- }
- }
- UNLOCK (&priv->lock);
- } while (crawl);
-out:
- return ret;
+ return;
}
-void
-afr_start_crawl (xlator_t *this, int idx, afr_crawl_type_t crawl,
- process_entry_cbk_t process_entry, void *op_data,
- gf_boolean_t exclusive, int crawl_flags,
- afr_crawl_done_cbk_t crawl_done)
+int
+afr_shd_get_index_count(xlator_t *this, int i, uint64_t *count)
{
- afr_private_t *priv = NULL;
- call_frame_t *frame = NULL;
- afr_crawl_data_t *crawl_data = NULL;
- int ret = 0;
- int (*crawler) (void*) = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t *subvol = NULL;
+ loc_t rootloc = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ int ret = -1;
- priv = this->private;
+ priv = this->private;
+ subvol = priv->children[i];
- frame = create_frame (this, this->ctx->pool);
- if (!frame)
- goto out;
+ rootloc.inode = inode_ref(this->itable->root);
+ gf_uuid_copy(rootloc.gfid, rootloc.inode->gfid);
- afr_set_lk_owner (frame, this, frame->root);
- afr_set_low_priority (frame);
- crawl_data = GF_CALLOC (1, sizeof (*crawl_data),
- gf_afr_mt_crawl_data_t);
- if (!crawl_data)
- goto out;
- crawl_data->process_entry = process_entry;
- crawl_data->child = idx;
- crawl_data->pid = frame->root->pid;
- crawl_data->crawl = crawl;
- crawl_data->op_data = op_data;
- crawl_data->crawl_flags = crawl_flags;
- gf_log (this->name, GF_LOG_DEBUG, "starting crawl %d for %s",
- crawl_data->crawl, priv->children[idx]->name);
-
- if (exclusive)
- crawler = afr_dir_exclusive_crawl;
- else
- crawler = afr_dir_crawl;
- ret = synctask_new (this->ctx->env, crawler,
- crawl_done, frame, crawl_data);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Could not create the "
- "task for %d ret %d", idx, ret);
-out:
- return;
-}
+ ret = syncop_getxattr(subvol, &rootloc, &xattr, GF_XATTROP_INDEX_COUNT,
+ NULL, NULL);
+ if (ret < 0)
+ goto out;
-void
-afr_build_root_loc (xlator_t *this, loc_t *loc)
-{
- afr_private_t *priv = NULL;
+ ret = dict_get_uint64(xattr, GF_XATTROP_INDEX_COUNT, count);
+ if (ret)
+ goto out;
+
+ ret = 0;
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+ loc_wipe(&rootloc);
- priv = this->private;
- loc->path = gf_strdup ("/");
- loc->name = "";
- loc->inode = inode_ref (priv->root_inode);
- uuid_copy (loc->gfid, loc->inode->gfid);
+ return ret;
}
int
-afr_set_root_gfid (dict_t *dict)
+afr_xl_op(xlator_t *this, dict_t *input, dict_t *output)
{
- uuid_t gfid;
- int ret = 0;
+ gf_xl_afr_op_t op = GF_SHD_OP_INVALID;
+ int ret = 0;
+ int xl_id = 0;
+ afr_private_t *priv = NULL;
+ afr_self_heald_t *shd = NULL;
+ struct subvol_healer *healer = NULL;
+ int i = 0;
+ char key[64];
+ int keylen = 0;
+ int this_name_len = 0;
+ int op_ret = 0;
+ uint64_t cnt = 0;
+
+#define AFR_SET_DICT_AND_LOG(name, output, key, keylen, dict_str, \
+ dict_str_len) \
+ { \
+ int ret; \
+ \
+ ret = dict_set_nstrn(output, key, keylen, dict_str, dict_str_len); \
+ if (ret) { \
+ gf_smsg(name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED, \
+ "key=%s", key, "value=%s", dict_str, NULL); \
+ } \
+ }
+
+ priv = this->private;
+ shd = &priv->shd;
+
+ ret = dict_get_int32_sizen(input, "xl-op", (int32_t *)&op);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "key=xl-op", NULL);
+ goto out;
+ }
+ this_name_len = strlen(this->name);
+ ret = dict_get_int32n(input, this->name, this_name_len, &xl_id);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_GET_FAILED,
+ "key=%s", this->name, NULL);
+ goto out;
+ }
+ ret = dict_set_int32n(output, this->name, this_name_len, xl_id);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_DICT_SET_FAILED,
+ "key=%s", this->name, NULL);
+ goto out;
+ }
+ switch (op) {
+ case GF_SHD_OP_HEAL_INDEX:
+ op_ret = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->index_healers[i];
+ keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
+
+ if (!priv->child_up[i]) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_NOT_CONNECTED,
+ SLEN(SBRICK_NOT_CONNECTED));
+ op_ret = -1;
+ } else if (AFR_COUNT(priv->child_up, priv->child_count) < 2) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SLESS_THAN2_BRICKS_in_REP,
+ SLEN(SLESS_THAN2_BRICKS_in_REP));
+ op_ret = -1;
+ } else if (!afr_shd_is_subvol_local(this, healer->subvol)) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_IS_REMOTE,
+ SLEN(SBRICK_IS_REMOTE));
+ } else {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SSTARTED_SELF_HEAL,
+ SLEN(SSTARTED_SELF_HEAL));
- memset (gfid, 0, 16);
- gfid[15] = 1;
+ ret = afr_shd_index_healer_spawn(this, i);
- ret = afr_set_dict_gfid (dict, gfid);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_HEALER_SPAWN_FAILED, NULL);
+ }
+ }
+ }
+ break;
+ case GF_SHD_OP_HEAL_FULL:
+ op_ret = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->full_healers[i];
+ keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
+
+ if (!priv->child_up[i]) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_NOT_CONNECTED,
+ SLEN(SBRICK_NOT_CONNECTED));
+ } else if (AFR_COUNT(priv->child_up, priv->child_count) < 2) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SLESS_THAN2_BRICKS_in_REP,
+ SLEN(SLESS_THAN2_BRICKS_in_REP));
+ } else if (!afr_shd_is_subvol_local(this, healer->subvol)) {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_IS_REMOTE,
+ SLEN(SBRICK_IS_REMOTE));
+ } else {
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SSTARTED_SELF_HEAL,
+ SLEN(SSTARTED_SELF_HEAL));
- return ret;
-}
+ ret = afr_shd_full_healer_spawn(this, i);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_HEALER_SPAWN_FAILED, NULL);
+ }
+ op_ret = 0;
+ }
+ }
+ break;
+ case GF_SHD_OP_INDEX_SUMMARY:
+ /* this case has been handled in glfs-heal.c */
+ break;
+ case GF_SHD_OP_SPLIT_BRAIN_FILES:
+ eh_dump(shd->split_brain, output, afr_add_shd_event);
+ break;
+ case GF_SHD_OP_STATISTICS:
+ for (i = 0; i < priv->child_count; i++) {
+ eh_dump(shd->statistics[i], output, afr_add_crawl_event);
+ ret = afr_shd_dict_add_crawl_event(
+ this, output, &shd->index_healers[i].crawl_event);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, NULL);
+ }
+
+ ret = afr_shd_dict_add_crawl_event(
+ this, output, &shd->full_healers[i].crawl_event);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_ADD_CRAWL_EVENT_FAILED, NULL);
+ }
+ }
+ break;
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT:
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ op_ret = -1;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!priv->child_up[i]) {
+ keylen = snprintf(key, sizeof(key), "%d-%d-status", xl_id,
+ i);
+ AFR_SET_DICT_AND_LOG(this->name, output, key, keylen,
+ SBRICK_NOT_CONNECTED,
+ SLEN(SBRICK_NOT_CONNECTED));
+ } else {
+ snprintf(key, sizeof(key), "%d-%d-hardlinks", xl_id, i);
+ ret = afr_shd_get_index_count(this, i, &cnt);
+ if (ret == 0) {
+ ret = dict_set_uint64(output, key, cnt);
+ }
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ AFR_MSG_DICT_SET_FAILED, NULL);
+ }
+ op_ret = 0;
+ }
+ }
+
+ break;
+
+ default:
+ gf_smsg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_ARG, "op=%d",
+ op, NULL);
+ break;
+ }
+out:
+ dict_deln(output, this->name, this_name_len);
+ return op_ret;
+
+#undef AFR_SET_DICT_AND_LOG
+}
diff --git a/xlators/cluster/afr/src/afr-self-heald.h b/xlators/cluster/afr/src/afr-self-heald.h
index 32a8aaca50c..18db728ea7b 100644
--- a/xlators/cluster/afr/src/afr-self-heald.h
+++ b/xlators/cluster/afr/src/afr-self-heald.h
@@ -1,5 +1,5 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
This file is licensed to you under your choice of the GNU Lesser
@@ -8,45 +8,68 @@
cases as published by the Free Software Foundation.
*/
-#ifndef __AFR_SELF_HEALD_H__
-#define __AFR_SELF_HEALD_H__
-#include "xlator.h"
+#ifndef _AFR_SELF_HEALD_H
+#define _AFR_SELF_HEALD_H
-#define IS_ROOT_PATH(path) (!strcmp (path, "/"))
-#define IS_ENTRY_CWD(entry) (!strcmp (entry, "."))
-#define IS_ENTRY_PARENT(entry) (!strcmp (entry, ".."))
-#define AFR_ALL_CHILDREN -1
+#include <pthread.h>
-typedef struct afr_crawl_data_ {
- int child;
- pid_t pid;
- afr_crawl_type_t crawl;
- xlator_t *readdir_xl;
- void *op_data;
- int crawl_flags;
- int (*process_entry) (xlator_t *this, struct afr_crawl_data_ *crawl_data,
- gf_dirent_t *entry, loc_t *child, loc_t *parent,
- struct iatt *iattr);
-} afr_crawl_data_t;
+typedef struct {
+ char *path;
+ int child;
+} shd_event_t;
-typedef int (*process_entry_cbk_t) (xlator_t *this, afr_crawl_data_t *crawl_data,
- gf_dirent_t *entry, loc_t *child, loc_t *parent,
- struct iatt *iattr);
+typedef struct {
+ uint64_t healed_count;
+ uint64_t split_brain_count;
+ uint64_t heal_failed_count;
-void afr_build_root_loc (xlator_t *this, loc_t *loc);
+ /* If start_time is 0, it means crawler is not in progress
+ and stats are not valid */
+ time_t start_time;
+ /* If start_time is NOT 0 and end_time is 0, it means
+ cralwer is in progress */
+ time_t end_time;
+ char *crawl_type;
+ int child;
+} crawl_event_t;
-int afr_set_root_gfid (dict_t *dict);
+struct subvol_healer {
+ xlator_t *this;
+ crawl_event_t crawl_event;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ pthread_t thread;
+ int subvol;
+ gf_boolean_t local;
+ gf_boolean_t running;
+ gf_boolean_t rerun;
+};
-void
-afr_proactive_self_heal (void *data);
+typedef struct {
+ struct subvol_healer *index_healers;
+ struct subvol_healer *full_healers;
+
+ eh_t *split_brain;
+ eh_t **statistics;
+ int timeout;
+ uint32_t max_threads;
+ uint32_t wait_qlength;
+ uint32_t halo_max_latency_msec;
+ gf_boolean_t iamshd;
+ gf_boolean_t enabled;
+} afr_self_heald_t;
int
-afr_xl_op (xlator_t *this, dict_t *input, dict_t *output);
+afr_selfheal_daemon_init(xlator_t *this);
+
+int
+afr_xl_op(xlator_t *this, dict_t *input, dict_t *output);
+
+int
+afr_shd_gfid_to_path(xlator_t *this, xlator_t *subvol, uuid_t gfid,
+ char **path_p);
-/*
- * In addition to its self-heal use, this is used to find a local default
- * read_child.
- */
int
-afr_local_pathinfo (char *pathinfo, gf_boolean_t *local);
-#endif /* __AFR_SELF_HEALD_H__ */
+afr_shd_entry_purge(xlator_t *subvol, inode_t *inode, char *name,
+ ia_type_t type);
+#endif /* !_AFR_SELF_HEALD_H */
diff --git a/xlators/cluster/afr/src/afr-transaction.c b/xlators/cluster/afr/src/afr-transaction.c
index 6634b3102a1..a51f79b1f43 100644
--- a/xlators/cluster/afr/src/afr-transaction.c
+++ b/xlators/cluster/afr/src/afr-transaction.c
@@ -8,1302 +8,2920 @@
cases as published by the Free Software Foundation.
*/
-#include "dict.h"
-#include "byte-order.h"
-#include "common-utils.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/timer.h>
#include "afr.h"
#include "afr-transaction.h"
+#include "afr-self-heal.h"
+#include "afr-messages.h"
#include <signal.h>
+typedef enum {
+ AFR_TRANSACTION_PRE_OP,
+ AFR_TRANSACTION_POST_OP,
+} afr_xattrop_type_t;
-#define LOCKED_NO 0x0 /* no lock held */
-#define LOCKED_YES 0x1 /* for DATA, METADATA, ENTRY and higher_path
- of RENAME */
-#define LOCKED_LOWER 0x2 /* for lower_path of RENAME */
+static void
+afr_lock_resume_shared(struct list_head *list);
-afr_fd_ctx_t *
-__afr_fd_ctx_get (fd_t *fd, xlator_t *this)
-{
- uint64_t ctx = 0;
- int ret = 0;
- afr_fd_ctx_t *fd_ctx = NULL;
- int i = 0;
- afr_private_t *priv = NULL;
+static void
+afr_post_op_handle_success(call_frame_t *frame, xlator_t *this);
- priv = this->private;
+static void
+afr_post_op_handle_failure(call_frame_t *frame, xlator_t *this, int op_errno);
- ret = __fd_ctx_get (fd, this, &ctx);
+void
+__afr_transaction_wake_shared(afr_local_t *local, struct list_head *shared);
- if (ret < 0 && fd_is_anonymous (fd)) {
- ret = __afr_fd_ctx_set (this, fd);
- if (ret < 0)
- goto out;
+void
+afr_changelog_post_op_do(call_frame_t *frame, xlator_t *this);
- ret = __fd_ctx_get (fd, this, &ctx);
- if (ret < 0)
- goto out;
+int
+afr_changelog_post_op_safe(call_frame_t *frame, xlator_t *this);
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
- for (i = 0; i < priv->child_count; i++)
- fd_ctx->opened_on[i] = AFR_FD_OPENED;
- }
+gf_boolean_t
+afr_changelog_pre_op_uninherit(call_frame_t *frame, xlator_t *this);
- fd_ctx = (afr_fd_ctx_t *)(long) ctx;
-out:
- return fd_ctx;
-}
+gf_boolean_t
+afr_changelog_pre_op_update(call_frame_t *frame, xlator_t *this);
+int
+afr_changelog_call_count(afr_transaction_type type,
+ unsigned char *pre_op_subvols,
+ unsigned char *failed_subvols,
+ unsigned int child_count);
+int
+afr_changelog_do(call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume,
+ afr_xattrop_type_t op);
-afr_fd_ctx_t *
-afr_fd_ctx_get (fd_t *fd, xlator_t *this)
-{
- afr_fd_ctx_t *fd_ctx = NULL;
+static void
+afr_ta_decide_post_op_state(call_frame_t *frame, xlator_t *this);
- LOCK(&fd->lock);
- {
- fd_ctx = __afr_fd_ctx_get (fd, this);
- }
- UNLOCK(&fd->lock);
+static int
+afr_ta_post_op_do(void *opaque);
- return fd_ctx;
-}
+static int
+afr_ta_post_op_synctask(xlator_t *this, afr_local_t *local);
+static int
+afr_changelog_post_op_done(call_frame_t *frame, xlator_t *this);
static void
-afr_pid_save (call_frame_t *frame)
-{
- afr_local_t * local = NULL;
-
- local = frame->local;
+afr_changelog_post_op_fail(call_frame_t *frame, xlator_t *this, int op_errno);
- local->saved_pid = frame->root->pid;
+void
+afr_ta_locked_priv_invalidate(afr_private_t *priv)
+{
+ priv->ta_bad_child_index = AFR_CHILD_UNKNOWN;
+ priv->release_ta_notify_dom_lock = _gf_false;
+ priv->ta_notify_dom_lock_offset = 0;
}
-
static void
-afr_pid_restore (call_frame_t *frame)
+afr_ta_process_waitq(xlator_t *this)
{
- afr_local_t * local = NULL;
+ afr_local_t *entry = NULL;
+ afr_private_t *priv = this->private;
+ struct list_head waitq = {
+ 0,
+ };
+
+ INIT_LIST_HEAD(&waitq);
+ LOCK(&priv->lock);
+ list_splice_init(&priv->ta_waitq, &waitq);
+ UNLOCK(&priv->lock);
+ list_for_each_entry(entry, &waitq, ta_waitq)
+ {
+ afr_ta_decide_post_op_state(entry->transaction.frame, this);
+ }
+}
- local = frame->local;
+int
+afr_ta_lock_release_done(int ret, call_frame_t *ta_frame, void *opaque)
+{
+ afr_ta_process_waitq(ta_frame->this);
+ STACK_DESTROY(ta_frame->root);
+ return 0;
+}
- frame->root->pid = local->saved_pid;
+int
+afr_release_notify_lock_for_ta(void *opaque)
+{
+ xlator_t *this = NULL;
+ afr_private_t *priv = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct gf_flock flock = {
+ 0,
+ };
+ int ret = -1;
+
+ this = (xlator_t *)opaque;
+ priv = this->private;
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate loc for thin-arbiter.");
+ goto out;
+ }
+ flock.l_type = F_UNLCK;
+ flock.l_start = priv->ta_notify_dom_lock_offset;
+ flock.l_len = 1;
+ ret = syncop_inodelk(priv->children[THIN_ARBITER_BRICK_INDEX],
+ AFR_TA_DOM_NOTIFY, &loc, F_SETLK, &flock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to unlock AFR_TA_DOM_NOTIFY lock.");
+ }
+
+ LOCK(&priv->lock);
+ {
+ afr_ta_locked_priv_invalidate(priv);
+ }
+ UNLOCK(&priv->lock);
+out:
+ loc_wipe(&loc);
+ return ret;
}
+void
+afr_zero_fill_stat(afr_local_t *local)
+{
+ if (!local)
+ return;
+ if (local->transaction.type == AFR_DATA_TRANSACTION ||
+ local->transaction.type == AFR_METADATA_TRANSACTION) {
+ gf_zero_fill_stat(&local->cont.inode_wfop.prebuf);
+ gf_zero_fill_stat(&local->cont.inode_wfop.postbuf);
+ } else if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
+ local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ gf_zero_fill_stat(&local->cont.dir_fop.buf);
+ gf_zero_fill_stat(&local->cont.dir_fop.preparent);
+ gf_zero_fill_stat(&local->cont.dir_fop.postparent);
+ if (local->transaction.type == AFR_ENTRY_TRANSACTION)
+ return;
+ gf_zero_fill_stat(&local->cont.dir_fop.prenewparent);
+ gf_zero_fill_stat(&local->cont.dir_fop.postnewparent);
+ }
+}
-static void
-__mark_all_pending (int32_t *pending[], int child_count,
- afr_transaction_type type)
+/* In case of errors afr needs to choose which xdata from lower xlators it needs
+ * to unwind with. The way it is done is by checking if there are
+ * any good subvols which failed. Give preference to errnos other than
+ * ENOTCONN even if the child is source */
+void
+afr_pick_error_xdata(afr_local_t *local, afr_private_t *priv, inode_t *inode1,
+ unsigned char *readable1, inode_t *inode2,
+ unsigned char *readable2)
{
- int i = 0;
- int j = 0;
+ int s = -1; /*selection*/
+ int i = 0;
+ unsigned char *readable = NULL;
+
+ if (local->xdata_rsp) {
+ dict_unref(local->xdata_rsp);
+ local->xdata_rsp = NULL;
+ }
+
+ readable = alloca0(priv->child_count * sizeof(*readable));
+ if (inode2 && readable2) { /*rename fop*/
+ AFR_INTERSECT(readable, readable1, readable2, priv->child_count);
+ } else {
+ memcpy(readable, readable1, sizeof(*readable) * priv->child_count);
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
+
+ if (local->replies[i].op_ret >= 0)
+ continue;
+
+ if (local->replies[i].op_errno == ENOTCONN)
+ continue;
+
+ /*Order is important in the following condition*/
+ if ((s < 0) || (!readable[s] && readable[i]))
+ s = i;
+ }
+
+ if (s != -1 && local->replies[s].xdata) {
+ local->xdata_rsp = dict_ref(local->replies[s].xdata);
+ } else if (s == -1) {
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->replies[i].valid)
+ continue;
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
- pending[i][j] = hton32 (1);
+ if (local->replies[i].op_ret >= 0)
+ continue;
+
+ if (!local->replies[i].xdata)
+ continue;
+ local->xdata_rsp = dict_ref(local->replies[i].xdata);
+ break;
}
+ }
}
+gf_boolean_t
+afr_needs_changelog_update(afr_local_t *local)
+{
+ if (local->transaction.type == AFR_DATA_TRANSACTION)
+ return _gf_true;
+ if (!local->optimistic_change_log)
+ return _gf_true;
+ return _gf_false;
+}
-static void
-__mark_child_dead (int32_t *pending[], int child_count, int child,
- afr_transaction_type type)
+gf_boolean_t
+afr_changelog_has_quorum(afr_local_t *local, xlator_t *this)
{
- int j = 0;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ unsigned char *success_children = NULL;
- j = afr_index_for_transaction_type (type);
+ priv = this->private;
+ success_children = alloca0(priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.failed_subvols[i]) {
+ success_children[i] = 1;
+ }
+ }
- pending[child][j] = 0;
+ if (afr_has_quorum(success_children, this, NULL)) {
+ return _gf_true;
+ }
+
+ return _gf_false;
}
+gf_boolean_t
+afr_is_write_subvol_valid(call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ uint64_t write_subvol = 0;
+ unsigned char *writable = NULL;
+ uint16_t datamap = 0;
+
+ local = frame->local;
+ priv = this->private;
+ writable = alloca0(priv->child_count);
+
+ write_subvol = afr_write_subvol_get(frame, this);
+ datamap = (write_subvol & 0x00000000ffff0000) >> 16;
+ for (i = 0; i < priv->child_count; i++) {
+ if (datamap & (1 << i))
+ writable[i] = 1;
+
+ if (writable[i] && !local->transaction.failed_subvols[i])
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
-static void
-__mark_pre_op_done_on_fd (call_frame_t *frame, xlator_t *this, int child_index)
+int
+afr_transaction_fop(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int call_count = -1;
+ unsigned char *failed_subvols = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ failed_subvols = local->transaction.failed_subvols;
+ call_count = priv->child_count -
+ AFR_COUNT(failed_subvols, priv->child_count);
+ /* Fail if pre-op did not succeed on quorum no. of bricks. */
+ if (!afr_changelog_has_quorum(local, this) || !call_count) {
+ local->op_ret = -1;
+ /* local->op_errno is already captured in changelog cbk. */
+ afr_transaction_resume(frame, this);
+ return 0;
+ }
+
+ /* Fail if at least one writeable brick isn't up.*/
+ if (local->transaction.type == AFR_DATA_TRANSACTION &&
+ !afr_is_write_subvol_valid(frame, this)) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ afr_transaction_resume(frame, this);
+ return 0;
+ }
- local = frame->local;
+ local->call_count = call_count;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] && !failed_subvols[i]) {
+ local->transaction.wind(frame, this, i);
- if (!local->fd)
- return;
+ if (!--call_count)
+ break;
+ }
+ }
- fd_ctx = afr_fd_ctx_get (local->fd, this);
+ return 0;
+}
- if (!fd_ctx)
- goto out;
+int
+afr_transaction_done(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t unwind = _gf_false;
+ afr_lock_t *lock = NULL;
+ afr_local_t *lock_local = NULL;
+
+ priv = this->private;
+ local = frame->local;
- LOCK (&local->fd->lock);
+ if (priv->consistent_metadata) {
+ LOCK(&frame->lock);
{
- if (local->transaction.type == AFR_DATA_TRANSACTION)
- fd_ctx->pre_op_done[child_index]++;
+ unwind = (local->transaction.main_frame != NULL);
}
- UNLOCK (&local->fd->lock);
-out:
- return;
+ UNLOCK(&frame->lock);
+ if (unwind) /*It definitely did post-op*/
+ afr_zero_fill_stat(local);
+ }
+
+ if (local->transaction.do_eager_unlock) {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ LOCK(&local->inode->lock);
+ {
+ lock->acquired = _gf_false;
+ lock->release = _gf_false;
+ list_splice_init(&lock->frozen, &lock->waiting);
+ if (list_empty(&lock->waiting))
+ goto unlock;
+ lock_local = list_entry(lock->waiting.next, afr_local_t,
+ transaction.wait_list);
+ list_del_init(&lock_local->transaction.wait_list);
+ list_add(&lock_local->transaction.owner_list, &lock->owners);
+ }
+ unlock:
+ UNLOCK(&local->inode->lock);
+ }
+ if (lock_local) {
+ afr_lock(lock_local->transaction.frame,
+ lock_local->transaction.frame->this);
+ }
+ local->transaction.unwind(frame, this);
+
+ GF_ASSERT(list_empty(&local->transaction.owner_list));
+ GF_ASSERT(list_empty(&local->transaction.wait_list));
+ AFR_STACK_DESTROY(frame);
+
+ return 0;
}
-
static void
-__mark_pre_op_undone_on_fd (call_frame_t *frame, xlator_t *this, int child_index)
+afr_lock_fail_shared(afr_local_t *local, struct list_head *list)
{
- afr_local_t *local = NULL;
- afr_fd_ctx_t *fd_ctx = NULL;
+ afr_local_t *each = NULL;
+
+ while (!list_empty(list)) {
+ each = list_entry(list->next, afr_local_t, transaction.wait_list);
+ list_del_init(&each->transaction.wait_list);
+ each->op_ret = -1;
+ each->op_errno = local->op_errno;
+ afr_transaction_done(each->transaction.frame,
+ each->transaction.frame->this);
+ }
+}
- local = frame->local;
+static void
+afr_handle_lock_acquire_failure(afr_local_t *local)
+{
+ struct list_head shared;
+ afr_lock_t *lock = NULL;
- if (!local->fd)
- return;
+ if (!local->transaction.eager_lock_on)
+ goto out;
- fd_ctx = afr_fd_ctx_get (local->fd, this);
+ lock = &local->inode_ctx->lock[local->transaction.type];
- if (!fd_ctx)
- goto out;
+ INIT_LIST_HEAD(&shared);
+ LOCK(&local->inode->lock);
+ {
+ lock->release = _gf_true;
+ list_splice_init(&lock->waiting, &shared);
+ }
+ UNLOCK(&local->inode->lock);
- LOCK (&local->fd->lock);
- {
- if (local->transaction.type == AFR_DATA_TRANSACTION)
- fd_ctx->pre_op_done[child_index]--;
- }
- UNLOCK (&local->fd->lock);
+ afr_lock_fail_shared(local, &shared);
+ local->transaction.do_eager_unlock = _gf_true;
out:
- return;
+ local->internal_lock.lock_cbk = afr_transaction_done;
+ afr_unlock(local->transaction.frame, local->transaction.frame->this);
}
+call_frame_t *
+afr_transaction_detach_fop_frame(call_frame_t *frame)
+{
+ afr_local_t *local = NULL;
+ call_frame_t *fop_frame = NULL;
+
+ local = frame->local;
+
+ afr_handle_inconsistent_fop(frame, &local->op_ret, &local->op_errno);
+ LOCK(&frame->lock);
+ {
+ fop_frame = local->transaction.main_frame;
+ local->transaction.main_frame = NULL;
+ }
+ UNLOCK(&frame->lock);
+
+ return fop_frame;
+}
static void
-__mark_down_children (int32_t *pending[], int child_count,
- unsigned char *child_up, afr_transaction_type type)
+afr_save_lk_owner(call_frame_t *frame)
{
- int i = 0;
- int j = 0;
+ afr_local_t *local = NULL;
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
+ local = frame->local;
- if (!child_up[i])
- pending[i][j] = 0;
- }
+ local->saved_lk_owner = frame->root->lk_owner;
}
-
static void
-__mark_all_success (int32_t *pending[], int child_count,
- afr_transaction_type type)
+afr_restore_lk_owner(call_frame_t *frame)
{
- int i;
- int j;
+ afr_local_t *local = NULL;
- for (i = 0; i < child_count; i++) {
- j = afr_index_for_transaction_type (type);
- pending[i][j] = hton32 (-1);
- }
+ local = frame->local;
+
+ frame->root->lk_owner = local->saved_lk_owner;
}
+void
+__mark_all_success(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i;
-static int
-__changelog_enabled (afr_private_t *priv, afr_transaction_type type)
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ local->transaction.failed_subvols[i] = 0;
+ }
+}
+
+void
+afr_compute_pre_op_sources(call_frame_t *frame, xlator_t *this)
{
- int ret = 0;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_transaction_type type = -1;
+ dict_t *xdata = NULL;
+ int **matrix = NULL;
+ int idx = -1;
+ int i = 0;
+ int j = 0;
+
+ priv = this->private;
+ local = frame->local;
+ type = local->transaction.type;
+ idx = afr_index_for_transaction_type(type);
+ matrix = ALLOC_MATRIX(priv->child_count, int);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.changelog_xdata[i])
+ continue;
+ xdata = local->transaction.changelog_xdata[i];
+ afr_selfheal_fill_matrix(this, matrix, i, idx, xdata);
+ }
+
+ memset(local->transaction.pre_op_sources, 1, priv->child_count);
+
+ /*If lock or pre-op failed on a brick, it is not a source. */
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i])
+ local->transaction.pre_op_sources[i] = 0;
+ }
+
+ /* If brick is blamed by others, it is not a source. */
+ for (i = 0; i < priv->child_count; i++)
+ for (j = 0; j < priv->child_count; j++)
+ if (matrix[i][j] != 0)
+ local->transaction.pre_op_sources[j] = 0;
+}
- switch (type) {
- case AFR_DATA_TRANSACTION:
- if (priv->data_change_log)
- ret = 1;
+void
+afr_txn_arbitrate_fop(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int pre_op_sources_count = 0;
+ int i = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ afr_compute_pre_op_sources(frame, this);
+ pre_op_sources_count = AFR_COUNT(local->transaction.pre_op_sources,
+ priv->child_count);
+
+ /* If arbiter is the only source, do not proceed. */
+ if (pre_op_sources_count < 2 &&
+ local->transaction.pre_op_sources[ARBITER_BRICK_INDEX]) {
+ local->op_ret = -1;
+ local->op_errno = ENOTCONN;
+ for (i = 0; i < priv->child_count; i++)
+ local->transaction.failed_subvols[i] = 1;
+ }
+
+ afr_transaction_fop(frame, this);
+
+ return;
+}
- break;
+int
+afr_transaction_perform_fop(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ int ret = 0;
+ int failure_count = 0;
+ struct list_head shared;
+ afr_lock_t *lock = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ INIT_LIST_HEAD(&shared);
+ if (local->transaction.type == AFR_DATA_TRANSACTION &&
+ !local->transaction.inherited) {
+ ret = afr_write_subvol_set(frame, this);
+ if (ret) {
+ /*act as if operation failed on all subvols*/
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ for (i = 0; i < priv->child_count; i++)
+ local->transaction.failed_subvols[i] = 1;
+ }
+ }
+
+ if (local->pre_op_compat)
+ /* old mode, pre-op was done as afr_changelog_do()
+ just now, before OP */
+ afr_changelog_pre_op_update(frame, this);
+
+ if (!local->transaction.eager_lock_on || local->transaction.inherited)
+ goto fop;
+ failure_count = AFR_COUNT(local->transaction.failed_subvols,
+ priv->child_count);
+ if (failure_count == priv->child_count) {
+ afr_handle_lock_acquire_failure(local);
+ return 0;
+ } else {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ LOCK(&local->inode->lock);
+ {
+ lock->acquired = _gf_true;
+ __afr_transaction_wake_shared(local, &shared);
+ }
+ UNLOCK(&local->inode->lock);
+ }
+
+fop:
+ /* Perform fops with the lk-owner from top xlator.
+ * Eg: lk-owner of posix-lk and flush should be same,
+ * flush cant clear the posix-lks without that lk-owner.
+ */
+ afr_save_lk_owner(frame);
+ frame->root->lk_owner = local->transaction.main_frame->root->lk_owner;
+
+ if (priv->arbiter_count == 1) {
+ afr_txn_arbitrate_fop(frame, this);
+ } else {
+ afr_transaction_fop(frame, this);
+ }
+
+ afr_lock_resume_shared(&shared);
+ return 0;
+}
- case AFR_METADATA_TRANSACTION:
- if (priv->metadata_change_log)
- ret = 1;
+int
+afr_set_pending_dict(afr_private_t *priv, dict_t *xattr, int **pending)
+{
+ int i = 0;
+ int ret = 0;
- break;
+ for (i = 0; i < priv->child_count; i++) {
+ ret = dict_set_static_bin(xattr, priv->pending_key[i], pending[i],
+ AFR_NUM_CHANGE_LOGS * sizeof(int));
+ /* 3 = data+metadata+entry */
- case AFR_ENTRY_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- if (priv->entry_change_log)
- ret = 1;
+ if (ret)
+ break;
+ }
+ return ret;
+}
+
+static void
+afr_ta_dom_lock_check_and_release(afr_ta_fop_state_t fop_state, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ unsigned int inmem_count = 0;
+ unsigned int onwire_count = 0;
+ gf_boolean_t release = _gf_false;
+
+ LOCK(&priv->lock);
+ {
+ /*Once we get notify lock release upcall notification,
+ if any of the fop state counters are non-zero, we will
+ not release the lock.
+ */
+ onwire_count = priv->ta_on_wire_txn_count;
+ inmem_count = priv->ta_in_mem_txn_count;
+ switch (fop_state) {
+ case TA_GET_INFO_FROM_TA_FILE:
+ onwire_count = --priv->ta_on_wire_txn_count;
+ break;
+ case TA_INFO_IN_MEMORY_SUCCESS:
+ case TA_INFO_IN_MEMORY_FAILED:
+ inmem_count = --priv->ta_in_mem_txn_count;
+ break;
+ case TA_WAIT_FOR_NOTIFY_LOCK_REL:
+ GF_ASSERT(0);
+ break;
+ case TA_SUCCESS:
break;
}
+ release = priv->release_ta_notify_dom_lock;
+ }
+ UNLOCK(&priv->lock);
- return ret;
-}
+ if (inmem_count != 0 || release == _gf_false || onwire_count != 0)
+ return;
+ afr_ta_lock_release_synctask(this);
+}
-static int
-__fop_changelog_needed (call_frame_t *frame, xlator_t *this)
+static void
+afr_ta_process_onwireq(afr_ta_fop_state_t fop_state, xlator_t *this)
{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- int op_ret = 0;
- afr_transaction_type type = -1;
+ afr_private_t *priv = this->private;
+ afr_local_t *entry = NULL;
+ int bad_child = AFR_CHILD_UNKNOWN;
+
+ struct list_head onwireq = {
+ 0,
+ };
+ INIT_LIST_HEAD(&onwireq);
+
+ LOCK(&priv->lock);
+ {
+ bad_child = priv->ta_bad_child_index;
+ if (bad_child == AFR_CHILD_UNKNOWN) {
+ /*The previous on-wire ta_post_op was a failure. Just dequeue
+ *one element to wind on-wire again. */
+ entry = list_entry(priv->ta_onwireq.next, afr_local_t, ta_onwireq);
+ list_del_init(&entry->ta_onwireq);
+ } else {
+ /* Prepare to process all fops based on bad_child_index. */
+ list_splice_init(&priv->ta_onwireq, &onwireq);
+ }
+ }
+ UNLOCK(&priv->lock);
- priv = this->private;
- local = frame->local;
- type = local->transaction.type;
+ if (entry) {
+ afr_ta_post_op_synctask(this, entry);
+ return;
+ } else {
+ while (!list_empty(&onwireq)) {
+ entry = list_entry(onwireq.next, afr_local_t, ta_onwireq);
+ list_del_init(&entry->ta_onwireq);
+ if (entry->ta_failed_subvol == bad_child) {
+ afr_post_op_handle_success(entry->transaction.frame, this);
+ } else {
+ afr_post_op_handle_failure(entry->transaction.frame, this, EIO);
+ }
+ }
+ }
+}
- if (__changelog_enabled (priv, type)) {
- switch (local->op) {
+int
+afr_changelog_post_op_done(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_private_t *priv = NULL;
+
+ local = frame->local;
+ priv = this->private;
+ int_lock = &local->internal_lock;
+
+ if (priv->thin_arbiter_count) {
+ /*fop should not come here with TA_WAIT_FOR_NOTIFY_LOCK_REL state */
+ afr_ta_dom_lock_check_and_release(local->fop_state, this);
+ }
+
+ /* Fail the FOP if post-op did not succeed on quorum no. of bricks. */
+ if (!afr_changelog_has_quorum(local, this)) {
+ local->op_ret = -1;
+ /*local->op_errno is already captured in changelog cbk*/
+ }
+
+ if (local->transaction.resume_stub) {
+ call_resume(local->transaction.resume_stub);
+ local->transaction.resume_stub = NULL;
+ }
+
+ int_lock->lock_cbk = afr_transaction_done;
+ afr_unlock(frame, this);
+
+ return 0;
+}
- case GF_FOP_WRITE:
- case GF_FOP_FTRUNCATE:
- op_ret = 1;
- break;
+static void
+afr_changelog_post_op_fail(call_frame_t *frame, xlator_t *this, int op_errno)
+{
+ afr_local_t *local = frame->local;
+ local->op_ret = -1;
+ local->op_errno = op_errno;
- case GF_FOP_FLUSH:
- op_ret = 0;
- break;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, AFR_MSG_THIN_ARB,
+ "Failing %s for gfid %s. Fop state is:%d", gf_fop_list[local->op],
+ uuid_utoa(local->inode->gfid), local->fop_state);
- default:
- op_ret = 1;
- }
- }
+ afr_changelog_post_op_done(frame, this);
+}
- return op_ret;
+unsigned char *
+afr_locked_nodes_get(afr_transaction_type type, afr_internal_lock_t *int_lock)
+{
+ /*Because same set of subvols participate in all lockee
+ * entities*/
+ return int_lock->lockee[0].locked_nodes;
}
int
-afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
- int child, afr_xattrop_type_t op)
+afr_changelog_call_count(afr_transaction_type type,
+ unsigned char *pre_op_subvols,
+ unsigned char *failed_subvols,
+ unsigned int child_count)
{
- int i = 0;
- int ret = 0;
+ int i = 0;
+ int call_count = 0;
- if (op == LOCAL_FIRST) {
- ret = dict_set_static_bin (xattr, priv->pending_key[child],
- pending[child],
- AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
- if (ret)
- goto out;
+ for (i = 0; i < child_count; i++) {
+ if (pre_op_subvols[i] && !failed_subvols[i]) {
+ call_count++;
}
- for (i = 0; i < priv->child_count; i++) {
- if (i == child)
- continue;
- ret = dict_set_static_bin (xattr, priv->pending_key[i],
- pending[i],
- AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
- /* 3 = data+metadata+entry */
+ }
- if (ret < 0)
- goto out;
- }
- if (op == LOCAL_LAST) {
- ret = dict_set_static_bin (xattr, priv->pending_key[child],
- pending[child],
- AFR_NUM_CHANGE_LOGS * sizeof (int32_t));
- if (ret)
- goto out;
- }
-out:
- return ret;
+ if (type == AFR_ENTRY_RENAME_TRANSACTION)
+ call_count *= 2;
+
+ return call_count;
}
-int
-afr_lock_server_count (afr_private_t *priv, afr_transaction_type type)
+gf_boolean_t
+afr_txn_nothing_failed(call_frame_t *frame, xlator_t *this)
{
- int ret = 0;
-
- switch (type) {
- case AFR_DATA_TRANSACTION:
- ret = priv->child_count;
- break;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (priv->thin_arbiter_count) {
+ /* We need to perform post-op even if 1 data brick was down
+ * before the txn started.*/
+ if (AFR_COUNT(local->transaction.failed_subvols, priv->child_count))
+ return _gf_false;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i] &&
+ local->transaction.failed_subvols[i])
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
- case AFR_METADATA_TRANSACTION:
- ret = priv->child_count;
- break;
+void
+afr_handle_symmetric_errors(call_frame_t *frame, xlator_t *this)
+{
+ if (afr_is_symmetric_error(frame, this))
+ __mark_all_success(frame, this);
+}
- case AFR_ENTRY_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- ret = priv->child_count;
- break;
+gf_boolean_t
+afr_has_quorum(unsigned char *subvols, xlator_t *this, call_frame_t *frame)
+{
+ unsigned int quorum_count = 0;
+ afr_private_t *priv = NULL;
+ unsigned int up_children_count = 0;
+
+ priv = this->private;
+ up_children_count = AFR_COUNT(subvols, priv->child_count);
+
+ if (afr_lookup_has_quorum(frame, up_children_count))
+ return _gf_true;
+
+ if (priv->quorum_count == AFR_QUORUM_AUTO) {
+ /*
+ * Special case for auto-quorum with an even number of nodes.
+ *
+ * A replica set with even count N can only handle the same
+ * number of failures as odd N-1 before losing "vanilla"
+ * quorum, and the probability of more simultaneous failures is
+ * actually higher. For example, with a 1% chance of failure
+ * we'd have a 0.03% chance of two simultaneous failures with
+ * N=3 but a 0.06% chance with N=4. However, the special case
+ * is necessary for N=2 because there's no real quorum in that
+ * case (i.e. can't normally survive *any* failures). In that
+ * case, we treat the first node as a tie-breaker, allowing
+ * quorum to be retained in some cases while still honoring the
+ * all-important constraint that there can not simultaneously
+ * be two partitioned sets of nodes each believing they have
+ * quorum. Of two equally sized sets, the one without that
+ * first node will lose.
+ *
+ * It turns out that the special case is beneficial for higher
+ * values of N as well. Continuing the example above, the
+ * probability of losing quorum with N=4 and this type of
+ * quorum is (very) slightly lower than with N=3 and vanilla
+ * quorum. The difference becomes even more pronounced with
+ * higher N. Therefore, even though such replica counts are
+ * unlikely to be seen in practice, we might as well use the
+ * "special" quorum then as well.
+ */
+ if ((up_children_count * 2) == priv->child_count) {
+ return subvols[0];
}
+ }
+
+ if (priv->quorum_count == AFR_QUORUM_AUTO) {
+ quorum_count = priv->child_count / 2 + 1;
+ } else {
+ quorum_count = priv->quorum_count;
+ }
- return ret;
+ if (up_children_count >= quorum_count)
+ return _gf_true;
+
+ return _gf_false;
}
-/* {{{ pending */
+static gf_boolean_t
+afr_has_fop_quorum(call_frame_t *frame)
+{
+ xlator_t *this = frame->this;
+ afr_local_t *local = frame->local;
+ unsigned char *locked_nodes = NULL;
-int32_t
-afr_changelog_post_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr,
- dict_t *xdata)
+ locked_nodes = afr_locked_nodes_get(local->transaction.type,
+ &local->internal_lock);
+ return afr_has_quorum(locked_nodes, this, NULL);
+}
+
+static gf_boolean_t
+afr_has_fop_cbk_quorum(call_frame_t *frame)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int call_count = -1;
+ afr_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ afr_private_t *priv = this->private;
+ unsigned char *success = alloca0(priv->child_count);
+ int i = 0;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i])
+ if (!local->transaction.failed_subvols[i])
+ success[i] = 1;
+ }
+
+ return afr_has_quorum(success, this, NULL);
+}
- priv = this->private;
- local = frame->local;
- int_lock = &local->internal_lock;
+gf_boolean_t
+afr_need_dirty_marking(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *local = NULL;
+ gf_boolean_t need_dirty = _gf_false;
- LOCK (&frame->lock);
- {
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
+ local = frame->local;
- if (call_count == 0) {
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- local->transaction.done (frame, this);
- } else {
- int_lock->lock_cbk = local->transaction.done;
- afr_unlock (frame, this);
- }
- }
+ if (!priv->quorum_count || !local->optimistic_change_log)
+ return _gf_false;
- return 0;
-}
+ if (local->transaction.type == AFR_DATA_TRANSACTION ||
+ local->transaction.type == AFR_METADATA_TRANSACTION)
+ return _gf_false;
+ if (AFR_COUNT(local->transaction.failed_subvols, priv->child_count) ==
+ priv->child_count)
+ return _gf_false;
-void
-afr_transaction_rm_stale_children (call_frame_t *frame, xlator_t *this,
- inode_t *inode, afr_transaction_type type)
-{
- int i = -1;
- int count = 0;
- int read_child = -1;
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- int **pending = NULL;
- int idx = 0;
- int32_t *stale_children = NULL;
- int32_t *fresh_children = NULL;
- gf_boolean_t rm_stale_children = _gf_false;
-
- idx = afr_index_for_transaction_type (type);
-
- priv = this->private;
- local = frame->local;
- pending = local->pending;
-
- if (local->op_ret < 0)
- goto out;
- fresh_children = local->fresh_children;
- read_child = afr_inode_get_read_ctx (this, inode, fresh_children);
- if (read_child < 0) {
- gf_log (this->name, GF_LOG_DEBUG, "Possible split-brain "
- "for %s", uuid_utoa (inode->gfid));
- goto out;
- }
+ if (!afr_has_fop_cbk_quorum(frame))
+ need_dirty = _gf_true;
- for (i = 0; i < priv->child_count; i++) {
- if (!afr_is_child_present (fresh_children,
- priv->child_count, i))
- continue;
- if (pending[i][idx])
- continue;
- /* child is down or op failed on it */
- if (!stale_children)
- stale_children = afr_children_create (priv->child_count);
- if (!stale_children)
- goto out;
+ return need_dirty;
+}
- rm_stale_children = _gf_true;
- stale_children[count++] = i;
- gf_log (this->name, GF_LOG_DEBUG, "Removing stale child "
- "%d for %s", i, uuid_utoa (inode->gfid));
- }
+void
+afr_handle_quorum(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ const char *file = NULL;
+ uuid_t gfid = {0};
- if (!rm_stale_children)
- goto out;
+ local = frame->local;
+ priv = frame->this->private;
- afr_inode_rm_stale_children (this, inode, stale_children);
-out:
- if (stale_children)
- GF_FREE (stale_children);
+ if (priv->quorum_count == 0)
return;
-}
-unsigned char*
-afr_locked_nodes_get (afr_transaction_type type, afr_internal_lock_t *int_lock)
-{
- unsigned char *locked_nodes = NULL;
- switch (type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- locked_nodes = int_lock->inode_locked_nodes;
- break;
+ /* If the fop already failed return right away to preserve errno */
+ if (local->op_ret == -1)
+ return;
+ /*
+ * Network split may happen just after the fops are unwound, so check
+ * if the fop succeeded in a way it still follows quorum. If it doesn't,
+ * mark the fop as failure, mark the changelogs so it reflects that
+ * failure.
+ *
+ * Scenario:
+ * There are 3 mounts on 3 machines(node1, node2, node3) all writing to
+ * single file. Network split happened in a way that node1 can't see
+ * node2, node3. Node2, node3 both of them can't see node1. Now at the
+ * time of sending write all the bricks are up. Just after write fop is
+ * wound on node1, network split happens. Node1 thinks write fop failed
+ * on node2, node3 so marks pending changelog for those 2 extended
+ * attributes on node1. Node2, node3 thinks writes failed on node1 so
+ * they mark pending changelog for node1. When the network is stable
+ * again the file already is in split-brain. These checks prevent
+ * marking pending changelog on other subvolumes if the fop doesn't
+ * succeed in a way it is still following quorum. So with this fix what
+ * is happening is, node1 will have all pending changelog(FOOL) because
+ * the write succeeded only on node1 but failed on node2, node3 so
+ * instead of marking pending changelogs on node2, node3 it just treats
+ * the fop as failure and goes into DIRTY state. Where as node2, node3
+ * say they are sources and have pending changelog to node1 so there is
+ * no split-brain with the fix. The problem is eliminated completely.
+ */
+
+ if (afr_has_fop_cbk_quorum(frame))
+ return;
+
+ if (afr_need_dirty_marking(frame, this))
+ goto set_response;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.pre_op[i])
+ afr_transaction_fop_failed(frame, frame->this, i);
+ }
+
+set_response:
+ local->op_ret = -1;
+ local->op_errno = afr_final_errno(local, priv);
+ if (local->op_errno == 0)
+ local->op_errno = afr_quorum_errno(priv);
+
+ if (local->fd) {
+ gf_uuid_copy(gfid, local->fd->inode->gfid);
+ file = uuid_utoa(gfid);
+ } else {
+ loc_path(&local->loc, local->loc.name);
+ file = local->loc.path;
+ }
+
+ gf_msg(frame->this->name, GF_LOG_WARNING, local->op_errno,
+ AFR_MSG_QUORUM_FAIL, "%s: Failing %s as quorum is not met", file,
+ gf_fop_list[local->op]);
+
+ switch (local->transaction.type) {
case AFR_ENTRY_TRANSACTION:
case AFR_ENTRY_RENAME_TRANSACTION:
- locked_nodes = int_lock->entry_locked_nodes;
- break;
- }
- return locked_nodes;
+ afr_pick_error_xdata(local, priv, local->parent, local->readable,
+ local->parent2, local->readable2);
+ break;
+ default:
+ afr_pick_error_xdata(local, priv, local->inode, local->readable,
+ NULL, NULL);
+ break;
+ }
}
int
-afr_changelog_pre_op_call_count (afr_transaction_type type,
- afr_internal_lock_t *int_lock,
- unsigned int child_count)
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop)
{
- int call_count = 0;
- unsigned char *locked_nodes = NULL;
+ afr_private_t *priv = NULL;
+
+ priv = this->private;
+ loc->parent = inode_ref(priv->root_inode);
+ gf_uuid_copy(loc->pargfid, loc->parent->gfid);
+ loc->name = priv->pending_key[THIN_ARBITER_BRICK_INDEX];
+ if (is_gfid_based_fop && gf_uuid_is_null(priv->ta_gfid)) {
+ /* Except afr_ta_id_file_check() which is path based, all other gluster
+ * FOPS need gfid.*/
+ return -EINVAL;
+ }
+ gf_uuid_copy(loc->gfid, priv->ta_gfid);
+ loc->inode = inode_new(loc->parent->table);
+ if (!loc->inode) {
+ loc_wipe(loc);
+ return -ENOMEM;
+ }
+ return 0;
+}
- locked_nodes = afr_locked_nodes_get (type, int_lock);
- GF_ASSERT (locked_nodes);
+static int
+afr_ta_post_op_done(int ret, call_frame_t *frame, void *opaque)
+{
+ xlator_t *this = NULL;
+ afr_local_t *local = NULL;
+ call_frame_t *txn_frame = NULL;
+ afr_ta_fop_state_t fop_state;
+
+ local = (afr_local_t *)opaque;
+ fop_state = local->fop_state;
+ txn_frame = local->transaction.frame;
+ this = frame->this;
+
+ if (ret == 0) {
+ /*Mark pending xattrs on the up data brick.*/
+ afr_post_op_handle_success(txn_frame, this);
+ } else {
+ afr_post_op_handle_failure(txn_frame, this, -ret);
+ }
+
+ STACK_DESTROY(frame->root);
+ afr_ta_process_onwireq(fop_state, this);
+
+ return 0;
+}
- call_count = afr_locked_children_count (locked_nodes, child_count);
- if (type == AFR_ENTRY_RENAME_TRANSACTION)
- call_count *= 2;
+int **
+afr_set_changelog_xattr(afr_private_t *priv, unsigned char *pending,
+ dict_t *xattr, afr_local_t *local)
+{
+ int **changelog = NULL;
+ int idx = 0;
+ int ret = 0;
+ int i;
+
+ if (local->is_new_entry == _gf_true) {
+ changelog = afr_mark_pending_changelog(priv, pending, xattr,
+ local->cont.dir_fop.buf.ia_type);
+ } else {
+ idx = afr_index_for_transaction_type(local->transaction.type);
+ changelog = afr_matrix_create(priv->child_count, AFR_NUM_CHANGE_LOGS);
+ if (!changelog) {
+ goto out;
+ }
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i])
+ changelog[i][idx] = hton32(1);
+ }
+ ret = afr_set_pending_dict(priv, xattr, changelog);
+ if (ret < 0) {
+ afr_matrix_cleanup(changelog, priv->child_count);
+ return NULL;
+ }
+ }
- return call_count;
+out:
+ return changelog;
}
-int
-afr_changelog_post_op_call_count (afr_transaction_type type,
- unsigned char *pre_op,
- unsigned int child_count)
+static void
+afr_ta_locked_xattrop_validate(afr_private_t *priv, afr_local_t *local,
+ gf_boolean_t *valid)
{
- int call_count = 0;
+ if (priv->ta_event_gen > local->ta_event_gen) {
+ /* We can't trust the ta's response anymore.*/
+ afr_ta_locked_priv_invalidate(priv);
+ *valid = _gf_false;
+ return;
+ }
+ return;
+}
- call_count = afr_pre_op_done_children_count (pre_op, child_count);
- if (type == AFR_ENTRY_RENAME_TRANSACTION)
- call_count *= 2;
+static int
+afr_ta_post_op_do(void *opaque)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ xlator_t *this = NULL;
+ dict_t *xattr = NULL;
+ unsigned char *pending = NULL;
+ int **changelog = NULL;
+ int failed_subvol = -1;
+ int success_subvol = -1;
+ loc_t loc = {
+ 0,
+ };
+ int i = 0;
+ int ret = 0;
+ gf_boolean_t valid = _gf_true;
+
+ local = (afr_local_t *)opaque;
+ this = local->transaction.frame->this;
+ priv = this->private;
+
+ ret = afr_fill_ta_loc(this, &loc, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Failed to populate loc for thin-arbiter.");
+ goto out;
+ }
+
+ xattr = dict_new();
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ pending = alloca0(priv->child_count);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i]) {
+ pending[i] = 1;
+ failed_subvol = i;
+ } else {
+ success_subvol = i;
+ }
+ }
+
+ changelog = afr_set_changelog_xattr(priv, pending, xattr, local);
+
+ if (!changelog) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = afr_ta_post_op_lock(this, &loc);
+ if (ret)
+ goto out;
+
+ ret = syncop_xattrop(priv->children[THIN_ARBITER_BRICK_INDEX], &loc,
+ GF_XATTROP_ADD_ARRAY, xattr, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, AFR_MSG_THIN_ARB,
+ "Post-op on thin-arbiter id file %s failed for gfid %s.",
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX],
+ uuid_utoa(local->inode->gfid));
+ }
+ LOCK(&priv->lock);
+ {
+ if (ret == 0) {
+ priv->ta_bad_child_index = failed_subvol;
+ } else if (ret == -EINVAL) {
+ priv->ta_bad_child_index = success_subvol;
+ ret = -EIO; /* TA failed the fop. Return EIO to application. */
+ }
- return call_count;
+ afr_ta_locked_xattrop_validate(priv, local, &valid);
+ }
+ UNLOCK(&priv->lock);
+ if (valid == _gf_false) {
+ gf_msg(this->name, GF_LOG_ERROR, EIO, AFR_MSG_THIN_ARB,
+ "Post-op on thin-arbiter id file %s for gfid %s invalidated due "
+ "to event-gen mismatch.",
+ priv->pending_key[THIN_ARBITER_BRICK_INDEX],
+ uuid_utoa(local->inode->gfid));
+ ret = -EIO;
+ }
+
+ afr_ta_post_op_unlock(this, &loc);
+out:
+ if (xattr)
+ dict_unref(xattr);
+
+ if (changelog)
+ afr_matrix_cleanup(changelog, priv->child_count);
+
+ loc_wipe(&loc);
+
+ return ret;
}
-void
-afr_compute_txn_changelog (afr_local_t *local, afr_private_t *priv)
+static int
+afr_ta_post_op_synctask(xlator_t *this, afr_local_t *local)
{
- int i = 0;
- int index = 0;
- int32_t postop = 0;
- int32_t preop = 1;
- int32_t **txn_changelog = NULL;
+ call_frame_t *ta_frame = NULL;
+ int ret = 0;
+
+ ta_frame = afr_ta_frame_create(this);
+ if (!ta_frame) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to create ta_frame");
+ goto err;
+ }
+ ret = synctask_new(this->ctx->env, afr_ta_post_op_do, afr_ta_post_op_done,
+ ta_frame, local);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, AFR_MSG_THIN_ARB,
+ "Failed to launch post-op on thin arbiter for gfid %s",
+ uuid_utoa(local->inode->gfid));
+ STACK_DESTROY(ta_frame->root);
+ goto err;
+ }
+
+ return ret;
+err:
+ afr_changelog_post_op_fail(local->transaction.frame, this, ENOMEM);
+ return ret;
+}
- txn_changelog = local->transaction.txn_changelog;
- index = afr_index_for_transaction_type (local->transaction.type);
- for (i = 0; i < priv->child_count; i++) {
- postop = ntoh32 (local->pending[i][index]);
- txn_changelog[i][index] = hton32 (postop + preop);
+static void
+afr_ta_set_fop_state(afr_private_t *priv, afr_local_t *local,
+ int *on_wire_count)
+{
+ LOCK(&priv->lock);
+ {
+ if (priv->release_ta_notify_dom_lock == _gf_true) {
+ /* Put the fop in waitq until notify dom lock is released.*/
+ local->fop_state = TA_WAIT_FOR_NOTIFY_LOCK_REL;
+ list_add_tail(&local->ta_waitq, &priv->ta_waitq);
+ } else if (priv->ta_bad_child_index == AFR_CHILD_UNKNOWN) {
+ /* Post-op on thin-arbiter to decide success/failure. */
+ local->fop_state = TA_GET_INFO_FROM_TA_FILE;
+ *on_wire_count = ++priv->ta_on_wire_txn_count;
+ if (*on_wire_count > 1) {
+ /*Avoid sending multiple on-wire post-ops on TA*/
+ list_add_tail(&local->ta_onwireq, &priv->ta_onwireq);
+ }
+ } else if (local->ta_failed_subvol == priv->ta_bad_child_index) {
+ /* Post-op on TA not needed as the fop failed on the in-memory bad
+ * brick. Just mark pending xattrs on the good data brick.*/
+ local->fop_state = TA_INFO_IN_MEMORY_SUCCESS;
+ priv->ta_in_mem_txn_count++;
+ } else {
+ /* Post-op on TA not needed as the fop succeeded only on the
+ * in-memory bad data brick and not the good one. Fail the fop.*/
+ local->fop_state = TA_INFO_IN_MEMORY_FAILED;
+ priv->ta_in_mem_txn_count++;
}
+ }
+ UNLOCK(&priv->lock);
}
-afr_xattrop_type_t
-afr_get_postop_xattrop_type (int32_t **pending, int optimized, int child,
- afr_transaction_type type)
+static void
+afr_ta_fill_failed_subvol(afr_private_t *priv, afr_local_t *local)
{
- int index = 0;
- afr_xattrop_type_t op = LOCAL_LAST;
+ int i = 0;
- index = afr_index_for_transaction_type (type);
- if (optimized && !pending[child][index])
- op = LOCAL_FIRST;
- return op;
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i]) {
+ local->ta_failed_subvol = i;
+ break;
+ }
+ }
}
-void
-afr_set_postop_dict (afr_local_t *local, xlator_t *this, dict_t *xattr,
- int optimized, int child)
-{
- int32_t **txn_changelog = NULL;
- int32_t **changelog = NULL;
- afr_private_t *priv = NULL;
- int ret = 0;
- afr_xattrop_type_t op = LOCAL_LAST;
-
- priv = this->private;
- txn_changelog = local->transaction.txn_changelog;
- op = afr_get_postop_xattrop_type (local->pending, optimized, child,
- local->transaction.type);
- if (optimized)
- changelog = txn_changelog;
- else
- changelog = local->pending;
- ret = afr_set_pending_dict (priv, xattr, changelog, child, op);
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
+static void
+afr_post_op_handle_success(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ if (local->is_new_entry == _gf_true) {
+ afr_mark_new_entry_changelog(frame, this);
+ }
+ afr_changelog_post_op_do(frame, this);
+
+ return;
}
-int
-afr_changelog_post_op (call_frame_t *frame, xlator_t *this)
+static void
+afr_post_op_handle_failure(call_frame_t *frame, xlator_t *this, int op_errno)
+{
+ afr_changelog_post_op_fail(frame, this, op_errno);
+
+ return;
+}
+
+static void
+afr_ta_decide_post_op_state(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int on_wire_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ afr_ta_set_fop_state(priv, local, &on_wire_count);
+
+ switch (local->fop_state) {
+ case TA_GET_INFO_FROM_TA_FILE:
+ if (on_wire_count == 1)
+ afr_ta_post_op_synctask(this, local);
+ /*else, fop is queued in ta_onwireq.*/
+ break;
+ case TA_WAIT_FOR_NOTIFY_LOCK_REL:
+ /*Post releasing the notify lock, we will act on this queue*/
+ break;
+ case TA_INFO_IN_MEMORY_SUCCESS:
+ afr_post_op_handle_success(frame, this);
+ break;
+ case TA_INFO_IN_MEMORY_FAILED:
+ afr_post_op_handle_failure(frame, this, EIO);
+ break;
+ default:
+ break;
+ }
+ return;
+}
+
+static void
+afr_handle_failure_using_thin_arbiter(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = this->private;
+ afr_local_t *local = frame->local;
+
+ afr_ta_fill_failed_subvol(priv, local);
+ gf_msg_debug(this->name, 0,
+ "Fop failed on data brick (%s) for gfid=%s. "
+ "ta info needed to decide fop result.",
+ priv->children[local->ta_failed_subvol]->name,
+ uuid_utoa(local->inode->gfid));
+ afr_ta_decide_post_op_state(frame, this);
+}
+
+void
+afr_changelog_post_op_do(call_frame_t *frame, xlator_t *this)
{
- afr_private_t * priv = this->private;
- afr_internal_lock_t *int_lock = NULL;
- int i = 0;
- int call_count = 0;
+ afr_private_t *priv = this->private;
+ afr_local_t *local = NULL;
+ dict_t *xattr = NULL;
+ int i = 0;
+ int ret = 0;
+ int idx = 0;
+ int nothing_failed = 1;
+ gf_boolean_t need_undirty = _gf_false;
+
+ afr_handle_quorum(frame, this);
+ local = frame->local;
+ idx = afr_index_for_transaction_type(local->transaction.type);
+
+ xattr = dict_new();
+ if (!xattr) {
+ afr_changelog_post_op_fail(frame, this, ENOMEM);
+ goto out;
+ }
+
+ nothing_failed = afr_txn_nothing_failed(frame, this);
+
+ if (afr_changelog_pre_op_uninherit(frame, this))
+ need_undirty = _gf_false;
+ else
+ need_undirty = _gf_true;
+
+ if (local->op_ret < 0 && !nothing_failed) {
+ if (afr_need_dirty_marking(frame, this)) {
+ local->dirty[idx] = hton32(1);
+ goto set_dirty;
+ }
- afr_local_t * local = NULL;
- afr_fd_ctx_t *fdctx = NULL;
- dict_t **xattr = NULL;
- int piggyback = 0;
- int index = 0;
- int nothing_failed = 1;
+ afr_changelog_post_op_done(frame, this);
+ goto out;
+ }
+
+ if (nothing_failed && !need_undirty) {
+ afr_changelog_post_op_done(frame, this);
+ goto out;
+ }
+
+ if (local->transaction.in_flight_sb) {
+ afr_changelog_post_op_fail(frame, this,
+ local->transaction.in_flight_sb_errno);
+ goto out;
+ }
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.failed_subvols[i])
+ local->pending[i][idx] = hton32(1);
+ }
+
+ ret = afr_set_pending_dict(priv, xattr, local->pending);
+ if (ret < 0) {
+ afr_changelog_post_op_fail(frame, this, ENOMEM);
+ goto out;
+ }
+
+ if (need_undirty)
+ local->dirty[idx] = hton32(-1);
+ else
+ local->dirty[idx] = hton32(0);
+
+set_dirty:
+ ret = dict_set_static_bin(xattr, AFR_DIRTY, local->dirty,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ afr_changelog_post_op_fail(frame, this, ENOMEM);
+ goto out;
+ }
+
+ afr_changelog_do(frame, this, xattr, afr_changelog_post_op_done,
+ AFR_TRANSACTION_POST_OP);
+out:
+ if (xattr)
+ dict_unref(xattr);
- local = frame->local;
- int_lock = &local->internal_lock;
+ return;
+}
- __mark_down_children (local->pending, priv->child_count,
- local->child_up, local->transaction.type);
+static int
+afr_changelog_post_op_now(call_frame_t *frame, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ int failed_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (priv->thin_arbiter_count) {
+ failed_count = AFR_COUNT(local->transaction.failed_subvols,
+ priv->child_count);
+ if (failed_count == 1) {
+ afr_handle_failure_using_thin_arbiter(frame, this);
+ return 0;
+ } else {
+ /* Txn either succeeded or failed on both data bricks. Let
+ * post_op_do handle it as the case might be. */
+ }
+ }
- if (local->fd)
- afr_transaction_rm_stale_children (frame, this,
- local->fd->inode,
- local->transaction.type);
+ afr_changelog_post_op_do(frame, this);
+ return 0;
+}
- xattr = alloca (priv->child_count * sizeof (*xattr));
- memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
+gf_boolean_t
+afr_changelog_pre_op_uninherit(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ afr_inode_ctx_t *ctx = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ int type = 0;
+
+ local = frame->local;
+ priv = this->private;
+ ctx = local->inode_ctx;
+
+ type = afr_index_for_transaction_type(local->transaction.type);
+ if (type != AFR_DATA_TRANSACTION)
+ return !local->transaction.dirtied;
+
+ if (local->transaction.no_uninherit)
+ return _gf_false;
+
+ /* This function must be idempotent. So check if we
+ were called before and return the same answer again.
+
+ It is important to keep this function idempotent for
+ the call in afr_changelog_post_op_safe() to not have
+ side effects on the call from afr_changelog_post_op_now()
+ */
+ if (local->transaction.uninherit_done)
+ return local->transaction.uninherit_value;
+
+ LOCK(&local->inode->lock);
+ {
for (i = 0; i < priv->child_count; i++) {
- xattr[i] = dict_new ();
+ if (local->transaction.pre_op[i] != ctx->pre_op_done[type][i]) {
+ ret = !local->transaction.dirtied;
+ goto unlock;
+ }
+ }
+
+ if (ctx->inherited[type]) {
+ ret = _gf_true;
+ ctx->inherited[type]--;
+ } else if (ctx->on_disk[type]) {
+ ret = _gf_false;
+ ctx->on_disk[type]--;
+ } else {
+ /* ASSERT */
+ ret = _gf_false;
}
- call_count = afr_changelog_post_op_call_count (local->transaction.type,
- local->transaction.pre_op,
- priv->child_count);
- local->call_count = call_count;
+ if (!ctx->inherited[type] && !ctx->on_disk[type]) {
+ for (i = 0; i < priv->child_count; i++)
+ ctx->pre_op_done[type][i] = 0;
+ }
+ }
+unlock:
+ UNLOCK(&local->inode->lock);
- if (local->fd)
- fdctx = afr_fd_ctx_get (local->fd, this);
+ local->transaction.uninherit_done = _gf_true;
+ local->transaction.uninherit_value = ret;
- if (call_count == 0) {
- /* no child is up */
- int_lock->lock_cbk = local->transaction.done;
- afr_unlock (frame, this);
- goto out;
+ return ret;
+}
+
+gf_boolean_t
+afr_changelog_pre_op_inherit(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ int type = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (local->transaction.type != AFR_DATA_TRANSACTION)
+ return _gf_false;
+
+ type = afr_index_for_transaction_type(local->transaction.type);
+
+ LOCK(&local->inode->lock);
+ {
+ if (!local->inode_ctx->on_disk[type]) {
+ /* nothing to inherit yet */
+ ret = _gf_false;
+ goto unlock;
}
- /* check if something has failed, to handle piggybacking */
- nothing_failed = 1;
- index = afr_index_for_transaction_type (local->transaction.type);
for (i = 0; i < priv->child_count; i++) {
- if (local->pending[i][index] == 0) {
- nothing_failed = 0;
- break;
- }
+ if (local->transaction.pre_op[i] !=
+ local->inode_ctx->pre_op_done[type][i]) {
+ /* either inherit exactly, or don't */
+ ret = _gf_false;
+ goto unlock;
+ }
}
- afr_compute_txn_changelog (local , priv);
+ local->inode_ctx->inherited[type]++;
- for (i = 0; i < priv->child_count; i++) {
- if (!local->transaction.pre_op[i])
- continue;
+ ret = _gf_true;
- if (local->transaction.type != AFR_DATA_TRANSACTION)
- afr_set_postop_dict (local, this, xattr[i],
- local->optimistic_change_log, i);
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- {
- if (!fdctx) {
- afr_set_postop_dict (local, this, xattr[i],
- 0, i);
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- break;
- }
-
- LOCK (&local->fd->lock);
- {
- piggyback = 0;
- if (fdctx->pre_op_piggyback[i]) {
- fdctx->pre_op_piggyback[i]--;
- piggyback = 1;
- }
- }
- UNLOCK (&local->fd->lock);
-
- afr_set_postop_dict (local, this, xattr[i],
- piggyback, i);
-
- if (nothing_failed && piggyback) {
- afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i], NULL);
- } else {
- __mark_pre_op_undone_on_fd (frame, this, i);
- STACK_WIND_COOKIE (frame,
- afr_changelog_post_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- }
- }
- break;
- case AFR_METADATA_TRANSACTION:
- {
- if (nothing_failed && local->optimistic_change_log) {
- afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i],
- NULL);
- break;
- }
-
- if (local->fd)
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- else
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- }
- break;
+ local->transaction.inherited = _gf_true;
+ }
+unlock:
+ UNLOCK(&local->inode->lock);
- case AFR_ENTRY_RENAME_TRANSACTION:
- {
- if (nothing_failed && local->optimistic_change_log) {
- afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i],
- NULL);
- } else {
- STACK_WIND_COOKIE (frame, afr_changelog_post_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- }
- call_count--;
- }
+ return ret;
+}
- /*
- set it again because previous stack_wind
- might have already returned (think of case
- where subvolume is posix) and would have
- used the dict as placeholder for return
- value
- */
+gf_boolean_t
+afr_changelog_pre_op_update(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+ gf_boolean_t ret = _gf_false;
+ int type = 0;
- afr_set_postop_dict (local, this, xattr[i],
- local->optimistic_change_log, i);
+ local = frame->local;
+ priv = this->private;
- /* fall through */
+ if (local->transaction.type == AFR_ENTRY_TRANSACTION ||
+ local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION)
+ return _gf_false;
- case AFR_ENTRY_TRANSACTION:
- {
- if (nothing_failed && local->optimistic_change_log) {
- afr_changelog_post_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i],
- NULL);
- break;
- }
-
- if (local->fd)
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- else
- STACK_WIND (frame, afr_changelog_post_op_cbk,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- }
- break;
- }
+ if (local->transaction.inherited)
+ /* was already inherited in afr_changelog_pre_op */
+ return _gf_false;
- if (!--call_count)
- break;
- }
+ if (!local->transaction.dirtied)
+ return _gf_false;
-out:
- for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
+ if (!afr_txn_nothing_failed(frame, this))
+ return _gf_false;
+
+ type = afr_index_for_transaction_type(local->transaction.type);
+
+ ret = _gf_false;
+
+ LOCK(&local->inode->lock);
+ {
+ if (!local->inode_ctx->on_disk[type]) {
+ for (i = 0; i < priv->child_count; i++)
+ local->inode_ctx->pre_op_done[type][i] =
+ (!local->transaction.failed_subvols[i]);
+ } else {
+ for (i = 0; i < priv->child_count; i++)
+ if (local->inode_ctx->pre_op_done[type][i] !=
+ (!local->transaction.failed_subvols[i])) {
+ local->transaction.no_uninherit = 1;
+ goto unlock;
+ }
}
+ local->inode_ctx->on_disk[type]++;
- return 0;
-}
+ ret = _gf_true;
+ }
+unlock:
+ UNLOCK(&local->inode->lock);
+ return ret;
+}
-int32_t
-afr_changelog_pre_op_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr,
- dict_t *xdata)
+int
+afr_changelog_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xattr, dict_t *xdata)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = this->private;
- int call_count = -1;
- int child_index = (long) cookie;
+ afr_local_t *local = NULL;
+ int call_count = -1;
+ int child_index = -1;
- local = frame->local;
+ local = frame->local;
+ child_index = (long)cookie;
- LOCK (&frame->lock);
- {
- switch (op_ret) {
- case 0:
- __mark_pre_op_done_on_fd (frame, this, child_index);
- //fallthrough we need to mark the pre_op
- case 1:
- local->transaction.pre_op[child_index] = 1;
- /* special op_ret for piggyback */
- break;
- case -1:
- if (op_errno == ENOTSUP) {
- gf_log (this->name, GF_LOG_ERROR,
- "xattrop not supported by %s",
- priv->children[child_index]->name);
- local->op_ret = -1;
-
- } else if (!child_went_down (op_ret, op_errno)) {
- gf_log (this->name, GF_LOG_ERROR,
- "xattrop failed on child %s: %s",
- priv->children[child_index]->name,
- strerror (op_errno));
- }
- local->op_errno = op_errno;
- break;
- }
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ afr_transaction_fop_failed(frame, this, child_index);
+ }
- call_count = --local->call_count;
- }
- UNLOCK (&frame->lock);
+ if (xattr)
+ local->transaction.changelog_xdata[child_index] = dict_ref(xattr);
- if (call_count == 0) {
- if ((local->op_ret == -1) &&
- (local->op_errno == ENOTSUP)) {
- local->transaction.resume (frame, this);
- } else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
+ call_count = afr_frame_return(frame);
- afr_pid_restore (frame);
+ if (call_count == 0) {
+ local->transaction.changelog_resume(frame, this);
+ }
+
+ return 0;
+}
- local->transaction.fop (frame, this);
+void
+afr_changelog_populate_xdata(call_frame_t *frame, afr_xattrop_type_t op,
+ dict_t **xdata, dict_t **newloc_xdata)
+{
+ int i = 0;
+ int ret = 0;
+ char *key = NULL;
+ int keylen = 0;
+ const char *name = NULL;
+ dict_t *xdata1 = NULL;
+ dict_t *xdata2 = NULL;
+ xlator_t *this = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ gf_boolean_t need_entry_key_set = _gf_true;
+
+ local = frame->local;
+ this = THIS;
+ priv = this->private;
+
+ if (local->transaction.type == AFR_DATA_TRANSACTION ||
+ local->transaction.type == AFR_METADATA_TRANSACTION)
+ goto out;
+
+ if (!priv->esh_granular)
+ goto out;
+
+ xdata1 = dict_new();
+ if (!xdata1)
+ goto out;
+
+ name = local->loc.name;
+ if (local->op == GF_FOP_LINK)
+ name = local->newloc.name;
+
+ switch (op) {
+ case AFR_TRANSACTION_PRE_OP:
+ key = GF_XATTROP_ENTRY_IN_KEY;
+ break;
+ case AFR_TRANSACTION_POST_OP:
+ if (afr_txn_nothing_failed(frame, this)) {
+ key = GF_XATTROP_ENTRY_OUT_KEY;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.failed_subvols[i])
+ continue;
+ need_entry_key_set = _gf_false;
+ break;
}
+ /* If the transaction itself did not fail and there
+ * are no failed subvolumes, check whether the fop
+ * failed due to a symmetric error. If it did, do
+ * not set the ENTRY_OUT xattr which would end up
+ * deleting a name index which was created possibly by
+ * an earlier entry txn that may have failed on some
+ * of the sub-volumes.
+ */
+ if (local->op_ret)
+ need_entry_key_set = _gf_false;
+ } else {
+ key = GF_XATTROP_ENTRY_IN_KEY;
+ }
+ break;
+ }
+
+ if (need_entry_key_set) {
+ keylen = strlen(key);
+ ret = dict_set_strn(xdata1, key, keylen, (char *)name);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
+ "%s/%s: Could not set %s key during xattrop",
+ uuid_utoa(local->loc.pargfid), local->loc.name, key);
+ if (local->transaction.type == AFR_ENTRY_RENAME_TRANSACTION) {
+ xdata2 = dict_new();
+ if (!xdata2)
+ goto out;
+
+ ret = dict_set_strn(xdata2, key, keylen,
+ (char *)local->newloc.name);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, AFR_MSG_DICT_SET_FAILED,
+ "%s/%s: Could not set %s key during "
+ "xattrop",
+ uuid_utoa(local->newloc.pargfid), local->newloc.name,
+ key);
}
+ }
- return 0;
+ *xdata = xdata1;
+ *newloc_xdata = xdata2;
+ xdata1 = xdata2 = NULL;
+out:
+ if (xdata1)
+ dict_unref(xdata1);
+ return;
}
int
-afr_changelog_pre_op (call_frame_t *frame, xlator_t *this)
+afr_changelog_prepare(xlator_t *this, call_frame_t *frame, int *call_count,
+ afr_changelog_resume_t changelog_resume,
+ afr_xattrop_type_t op, dict_t **xdata,
+ dict_t **newloc_xdata)
{
- afr_private_t * priv = this->private;
- int i = 0;
- int ret = 0;
- int call_count = 0;
- dict_t **xattr = NULL;
- afr_fd_ctx_t *fdctx = NULL;
- afr_local_t *local = NULL;
- int piggyback = 0;
- afr_internal_lock_t *int_lock = NULL;
- unsigned char *locked_nodes = NULL;
+ afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
- int_lock = &local->internal_lock;
+ local = frame->local;
+ priv = this->private;
- xattr = alloca (priv->child_count * sizeof (*xattr));
- memset (xattr, 0, (priv->child_count * sizeof (*xattr)));
+ *call_count = afr_changelog_call_count(
+ local->transaction.type, local->transaction.pre_op,
+ local->transaction.failed_subvols, priv->child_count);
- for (i = 0; i < priv->child_count; i++) {
- xattr[i] = dict_new ();
- }
+ if (*call_count == 0) {
+ changelog_resume(frame, this);
+ return -1;
+ }
- call_count = afr_changelog_pre_op_call_count (local->transaction.type,
- int_lock,
- priv->child_count);
- if (call_count == 0) {
- local->internal_lock.lock_cbk =
- local->transaction.done;
- afr_unlock (frame, this);
- goto out;
+ afr_changelog_populate_xdata(frame, op, xdata, newloc_xdata);
+ local->call_count = *call_count;
+
+ local->transaction.changelog_resume = changelog_resume;
+ return 0;
+}
+
+int
+afr_changelog_do(call_frame_t *frame, xlator_t *this, dict_t *xattr,
+ afr_changelog_resume_t changelog_resume, afr_xattrop_type_t op)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ dict_t *newloc_xdata = NULL;
+ int i = 0;
+ int call_count = 0;
+ int ret = 0;
+
+ local = frame->local;
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (local->transaction.changelog_xdata[i]) {
+ dict_unref(local->transaction.changelog_xdata[i]);
+ local->transaction.changelog_xdata[i] = NULL;
}
+ }
- local->call_count = call_count;
+ ret = afr_changelog_prepare(this, frame, &call_count, changelog_resume, op,
+ &xdata, &newloc_xdata);
- __mark_all_pending (local->pending, priv->child_count,
- local->transaction.type);
+ if (ret)
+ return 0;
- if (local->fd)
- fdctx = afr_fd_ctx_get (local->fd, this);
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i] ||
+ local->transaction.failed_subvols[i])
+ continue;
- locked_nodes = afr_locked_nodes_get (local->transaction.type, int_lock);
- for (i = 0; i < priv->child_count; i++) {
- if (!locked_nodes[i])
- continue;
- ret = afr_set_pending_dict (priv, xattr[i], local->pending,
- i, LOCAL_FIRST);
-
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
-
-
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- {
- if (!fdctx) {
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &(local->loc),
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- break;
- }
-
- LOCK (&local->fd->lock);
- {
- piggyback = 0;
- if (fdctx->pre_op_done[i]) {
- fdctx->pre_op_piggyback[i]++;
- piggyback = 1;
- fdctx->hit++;
- } else {
- fdctx->miss++;
- }
- }
- UNLOCK (&local->fd->lock);
-
- if (piggyback)
- afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i],
- NULL);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- }
- break;
- case AFR_METADATA_TRANSACTION:
- {
- if (local->optimistic_change_log) {
- afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i],
- NULL);
- break;
- }
-
- if (local->fd)
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &(local->loc),
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ if (!local->fd) {
+ STACK_WIND_COOKIE(
+ frame, afr_changelog_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->xattrop,
+ &local->loc, GF_XATTROP_ADD_ARRAY, xattr, xdata);
+ } else {
+ STACK_WIND_COOKIE(
+ frame, afr_changelog_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->fxattrop,
+ local->fd, GF_XATTROP_ADD_ARRAY, xattr, xdata);
}
break;
+ case AFR_ENTRY_RENAME_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- {
- if (local->optimistic_change_log) {
- afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i],
- NULL);
- } else {
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.new_parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- }
-
- call_count--;
- }
+ STACK_WIND_COOKIE(frame, afr_changelog_cbk, (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.new_parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr, newloc_xdata);
+ call_count--;
+ /* fall through */
- /*
- set it again because previous stack_wind
- might have already returned (think of case
- where subvolume is posix) and would have
- used the dict as placeholder for return
- value
- */
+ case AFR_ENTRY_TRANSACTION:
+ if (local->fd)
+ STACK_WIND_COOKIE(
+ frame, afr_changelog_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->fxattrop,
+ local->fd, GF_XATTROP_ADD_ARRAY, xattr, xdata);
+ else
+ STACK_WIND_COOKIE(frame, afr_changelog_cbk, (void *)(long)i,
+ priv->children[i],
+ priv->children[i]->fops->xattrop,
+ &local->transaction.parent_loc,
+ GF_XATTROP_ADD_ARRAY, xattr, xdata);
+ break;
+ }
- ret = afr_set_pending_dict (priv, xattr[i], local->pending,
- i, LOCAL_FIRST);
+ if (!--call_count)
+ break;
+ }
- if (ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "failed to set pending entry");
+ if (xdata)
+ dict_unref(xdata);
+ if (newloc_xdata)
+ dict_unref(newloc_xdata);
+ return 0;
+}
- /* fall through */
+static void
+afr_init_optimistic_changelog_for_txn(xlator_t *this, afr_local_t *local)
+{
+ int locked_count = 0;
+ afr_private_t *priv = NULL;
- case AFR_ENTRY_TRANSACTION:
- {
- if (local->optimistic_change_log) {
- afr_changelog_pre_op_cbk (frame, (void *)(long)i,
- this, 1, 0, xattr[i],
- NULL);
- break;
- }
-
- if (local->fd)
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->fxattrop,
- local->fd,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- else
- STACK_WIND_COOKIE (frame,
- afr_changelog_pre_op_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->xattrop,
- &local->transaction.parent_loc,
- GF_XATTROP_ADD_ARRAY, xattr[i],
- NULL);
- }
- break;
- }
+ priv = this->private;
- if (!--call_count)
- break;
- }
-out:
- for (i = 0; i < priv->child_count; i++) {
- dict_unref (xattr[i]);
- }
+ locked_count = AFR_COUNT(local->transaction.pre_op, priv->child_count);
+ if (priv->optimistic_change_log && locked_count == priv->child_count)
+ local->optimistic_change_log = 1;
- return 0;
+ return;
}
-
int
-afr_post_blocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+afr_changelog_pre_op(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ afr_private_t *priv = this->private;
+ int i = 0;
+ int ret = 0;
+ int call_count = 0;
+ int op_errno = 0;
+ afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ unsigned char *locked_nodes = NULL;
+ int idx = -1;
+ gf_boolean_t pre_nop = _gf_true;
+ dict_t *xdata_req = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+ idx = afr_index_for_transaction_type(local->transaction.type);
+
+ locked_nodes = afr_locked_nodes_get(local->transaction.type, int_lock);
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (locked_nodes[i]) {
+ local->transaction.pre_op[i] = 1;
+ call_count++;
+ } else {
+ local->transaction.failed_subvols[i] = 1;
+ }
+ }
+
+ afr_init_optimistic_changelog_for_txn(this, local);
+
+ if (afr_changelog_pre_op_inherit(frame, this))
+ goto next;
+
+ /* This condition should not be met with present code, as
+ * transaction.done will be called if locks are not acquired on even a
+ * single node.
+ */
+ if (call_count == 0) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+
+ /* Check if the fop can be performed on at least
+ * quorum number of nodes.
+ */
+ if (priv->quorum_count && !afr_has_fop_quorum(frame)) {
+ op_errno = int_lock->lock_op_errno;
+ if (op_errno == 0)
+ op_errno = afr_quorum_errno(priv);
+ goto err;
+ }
+
+ xdata_req = dict_new();
+ if (!xdata_req) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (call_count < priv->child_count)
+ pre_nop = _gf_false;
+
+ /* Set an all-zero pending changelog so that in the cbk, we can get the
+ * current on-disk values. In a replica 3 volume with arbiter enabled,
+ * these values are needed to arrive at a go/ no-go of the fop phase to
+ * avoid ending up in split-brain.*/
+
+ ret = afr_set_pending_dict(priv, xdata_req, local->pending);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (afr_needs_changelog_update(local)) {
+ local->dirty[idx] = hton32(1);
+
+ ret = dict_set_static_bin(xdata_req, AFR_DIRTY, local->dirty,
+ sizeof(int) * AFR_NUM_CHANGE_LOGS);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- local = frame->local;
- int_lock = &local->internal_lock;
+ pre_nop = _gf_false;
+ local->transaction.dirtied = 1;
+ }
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Blocking inodelks failed.");
- local->transaction.done (frame, this);
- } else {
+ if (pre_nop)
+ goto next;
- gf_log (this->name, GF_LOG_DEBUG,
- "Blocking inodelks done. Proceeding to FOP");
- afr_internal_lock_finish (frame, this);
- }
+ if (!local->pre_op_compat) {
+ dict_copy(xdata_req, local->xdata_req);
+ goto next;
+ }
- return 0;
+ afr_changelog_do(frame, this, xdata_req, afr_transaction_perform_fop,
+ AFR_TRANSACTION_PRE_OP);
+
+ if (xdata_req)
+ dict_unref(xdata_req);
+
+ return 0;
+next:
+ afr_transaction_perform_fop(frame, this);
+
+ if (xdata_req)
+ dict_unref(xdata_req);
+
+ return 0;
+err:
+ local->internal_lock.lock_cbk = afr_transaction_done;
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ afr_handle_lock_acquire_failure(local);
+
+ if (xdata_req)
+ dict_unref(xdata_req);
+
+ return 0;
}
+int
+afr_post_nonblocking_lock_cbk(call_frame_t *frame, xlator_t *this)
+{
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
+ int_lock = &local->internal_lock;
+
+ /* Initiate blocking locks if non-blocking has failed */
+ if (int_lock->lock_op_ret < 0) {
+ gf_msg_debug(this->name, 0,
+ "Non blocking locks failed. Proceeding to blocking");
+ int_lock->lock_cbk = afr_internal_lock_finish;
+ afr_blocking_lock(frame, this);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Non blocking locks done. Proceeding to FOP");
+
+ afr_internal_lock_finish(frame, this);
+ }
+
+ return 0;
+}
int
-afr_post_nonblocking_inodelk_cbk (call_frame_t *frame, xlator_t *this)
+afr_post_blocking_rename_cbk(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
- int_lock = &local->internal_lock;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- /* Initiate blocking locks if non-blocking has failed */
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking inodelks failed. Proceeding to blocking");
- int_lock->lock_cbk = afr_post_blocking_inodelk_cbk;
- afr_set_lk_owner (frame, this, frame->root);
- afr_blocking_lock (frame, this);
- } else {
+ if (int_lock->lock_op_ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, AFR_MSG_INTERNAL_LKS_FAILED,
+ "Blocking entrylks failed.");
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking inodelks done. Proceeding to FOP");
- afr_internal_lock_finish (frame, this);
- }
+ afr_transaction_done(frame, this);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Blocking entrylks done. Proceeding to FOP");
- return 0;
+ afr_internal_lock_finish(frame, this);
+ }
+ return 0;
}
-
int
-afr_post_blocking_entrylk_cbk (call_frame_t *frame, xlator_t *this)
+afr_post_lower_unlock_cbk(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
- int_lock = &local->internal_lock;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Blocking entrylks failed.");
- local->transaction.done (frame, this);
- } else {
+ GF_ASSERT(!int_lock->higher_locked);
- gf_log (this->name, GF_LOG_DEBUG,
- "Blocking entrylks done. Proceeding to FOP");
- afr_internal_lock_finish (frame, this);
- }
+ int_lock->lock_cbk = afr_post_blocking_rename_cbk;
+ afr_blocking_lock(frame, this);
- return 0;
+ return 0;
}
+int
+afr_set_transaction_flock(xlator_t *this, afr_local_t *local,
+ afr_lockee_t *lockee)
+{
+ afr_private_t *priv = NULL;
+ struct gf_flock *flock = NULL;
+
+ priv = this->private;
+ flock = &lockee->flock;
+
+ if ((priv->arbiter_count || local->transaction.eager_lock_on ||
+ priv->full_lock) &&
+ local->transaction.type == AFR_DATA_TRANSACTION) {
+ /*Lock entire file to avoid network split brains.*/
+ flock->l_len = 0;
+ flock->l_start = 0;
+ } else {
+ flock->l_len = local->transaction.len;
+ flock->l_start = local->transaction.start;
+ }
+ flock->l_type = F_WRLCK;
+
+ return 0;
+}
int
-afr_post_nonblocking_entrylk_cbk (call_frame_t *frame, xlator_t *this)
+afr_lock(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ afr_internal_lock_t *int_lock = NULL;
+ afr_local_t *local = NULL;
+ int i = 0;
- local = frame->local;
- int_lock = &local->internal_lock;
+ local = frame->local;
+ int_lock = &local->internal_lock;
- /* Initiate blocking locks if non-blocking has failed */
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking entrylks failed. Proceeding to blocking");
- int_lock->lock_cbk = afr_post_blocking_entrylk_cbk;
- afr_blocking_lock (frame, this);
- } else {
+ int_lock->lock_cbk = afr_post_nonblocking_lock_cbk;
+ int_lock->domain = this->name;
- gf_log (this->name, GF_LOG_DEBUG,
- "Non blocking entrylks done. Proceeding to FOP");
- afr_internal_lock_finish (frame, this);
- }
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ for (i = 0; i < int_lock->lockee_count; i++) {
+ afr_set_transaction_flock(this, local, &int_lock->lockee[i]);
+ }
- return 0;
-}
+ break;
+ case AFR_ENTRY_TRANSACTION:
+ int_lock->lk_basename = local->transaction.basename;
+ if (local->transaction.parent_loc.path)
+ int_lock->lk_loc = &local->transaction.parent_loc;
+ else
+ GF_ASSERT(local->fd);
+ break;
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ break;
+ }
+ afr_lock_nonblocking(frame, this);
-int
-afr_post_blocking_rename_cbk (call_frame_t *frame, xlator_t *this)
+ return 0;
+}
+
+static gf_boolean_t
+afr_locals_overlap(afr_local_t *local1, afr_local_t *local2)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ uint64_t start1 = local1->transaction.start;
+ uint64_t start2 = local2->transaction.start;
+ uint64_t end1 = 0;
+ uint64_t end2 = 0;
+
+ if (local1->transaction.len)
+ end1 = start1 + local1->transaction.len - 1;
+ else
+ end1 = ULLONG_MAX;
+
+ if (local2->transaction.len)
+ end2 = start2 + local2->transaction.len - 1;
+ else
+ end2 = ULLONG_MAX;
+
+ return ((end1 >= start2) && (end2 >= start1));
+}
- local = frame->local;
- int_lock = &local->internal_lock;
+gf_boolean_t
+afr_has_lock_conflict(afr_local_t *local, gf_boolean_t waitlist_check)
+{
+ afr_local_t *each = NULL;
+ afr_lock_t *lock = NULL;
+
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ /*
+ * Once full file lock is acquired in eager-lock phase, overlapping
+ * writes do not compete for inode-locks, instead are transferred to the
+ * next writes. Because of this overlapping writes are not ordered.
+ * This can cause inconsistencies in replication.
+ * Example:
+ * Two overlapping writes w1, w2 are sent in parallel on same fd
+ * in two threads t1, t2.
+ * Both threads can execute afr_writev_wind in the following manner.
+ * t1 winds w1 on brick-0
+ * t2 winds w2 on brick-0
+ * t2 winds w2 on brick-1
+ * t1 winds w1 on brick-1
+ *
+ * This check makes sure the locks are not transferred for
+ * overlapping writes.
+ */
+ list_for_each_entry(each, &lock->owners, transaction.owner_list)
+ {
+ if (afr_locals_overlap(each, local)) {
+ return _gf_true;
+ }
+ }
+
+ if (!waitlist_check)
+ return _gf_false;
+ list_for_each_entry(each, &lock->waiting, transaction.wait_list)
+ {
+ if (afr_locals_overlap(each, local)) {
+ return _gf_true;
+ }
+ }
+ return _gf_false;
+}
- if (int_lock->lock_op_ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "Blocking entrylks failed.");
- local->transaction.done (frame, this);
- } else {
+/* }}} */
+static void
+afr_copy_inodelk_vars(afr_internal_lock_t *dst, afr_internal_lock_t *src,
+ xlator_t *this, int lockee_num)
+{
+ afr_private_t *priv = this->private;
+ afr_lockee_t *sl = &src->lockee[lockee_num];
+ afr_lockee_t *dl = &dst->lockee[lockee_num];
+
+ dst->domain = src->domain;
+ dl->flock.l_len = sl->flock.l_len;
+ dl->flock.l_start = sl->flock.l_start;
+ dl->flock.l_type = sl->flock.l_type;
+ dl->locked_count = sl->locked_count;
+ memcpy(dl->locked_nodes, sl->locked_nodes,
+ priv->child_count * sizeof(*dl->locked_nodes));
+}
- gf_log (this->name, GF_LOG_DEBUG,
- "Blocking entrylks done. Proceeding to FOP");
- afr_internal_lock_finish (frame, this);
+void
+__afr_transaction_wake_shared(afr_local_t *local, struct list_head *shared)
+{
+ gf_boolean_t conflict = _gf_false;
+ afr_local_t *each = NULL;
+ afr_lock_t *lock = &local->inode_ctx->lock[local->transaction.type];
+
+ while (!conflict) {
+ if (list_empty(&lock->waiting))
+ return;
+ each = list_entry(lock->waiting.next, afr_local_t,
+ transaction.wait_list);
+ if (afr_has_lock_conflict(each, _gf_false)) {
+ conflict = _gf_true;
}
- return 0;
+ if (conflict && !list_empty(&lock->owners))
+ return;
+ afr_copy_inodelk_vars(&each->internal_lock, &local->internal_lock,
+ each->transaction.frame->this, 0);
+ list_move_tail(&each->transaction.wait_list, shared);
+ list_add_tail(&each->transaction.owner_list, &lock->owners);
+ }
}
+static void
+afr_lock_resume_shared(struct list_head *list)
+{
+ afr_local_t *each = NULL;
+
+ while (!list_empty(list)) {
+ each = list_entry(list->next, afr_local_t, transaction.wait_list);
+ list_del_init(&each->transaction.wait_list);
+ afr_changelog_pre_op(each->transaction.frame,
+ each->transaction.frame->this);
+ }
+}
int
-afr_post_lower_unlock_cbk (call_frame_t *frame, xlator_t *this)
+afr_internal_lock_finish(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ afr_local_t *local = frame->local;
+ afr_lock_t *lock = NULL;
+
+ local->internal_lock.lock_cbk = NULL;
+ if (!local->transaction.eager_lock_on) {
+ if (local->internal_lock.lock_op_ret < 0) {
+ afr_transaction_done(frame, this);
+ return 0;
+ }
+ afr_changelog_pre_op(frame, this);
+ } else {
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ if (local->internal_lock.lock_op_ret < 0) {
+ afr_handle_lock_acquire_failure(local);
+ } else {
+ lock->event_generation = local->event_generation;
+ afr_changelog_pre_op(frame, this);
+ }
+ }
- local = frame->local;
- int_lock = &local->internal_lock;
+ return 0;
+}
- GF_ASSERT (!int_lock->higher_locked);
+gf_boolean_t
+afr_are_conflicting_ops_waiting(afr_local_t *local, xlator_t *this)
+{
+ afr_lock_t *lock = NULL;
+ lock = &local->inode_ctx->lock[local->transaction.type];
+
+ /* Lets say mount1 has eager-lock(full-lock) and after the eager-lock
+ * is taken mount2 opened the same file, it won't be able to
+ * perform any {meta,}data operations until mount1 releases eager-lock.
+ * To avoid such scenario do not enable eager-lock for this transaction
+ * if open-fd-count is > 1 for metadata transactions and if num-inodelks > 1
+ * for data transactions
+ */
+
+ if (local->transaction.type == AFR_METADATA_TRANSACTION) {
+ if (local->inode_ctx->open_fd_count > 1) {
+ return _gf_true;
+ }
+ } else if (local->transaction.type == AFR_DATA_TRANSACTION) {
+ if (lock->num_inodelks > 1) {
+ return _gf_true;
+ }
+ }
- int_lock->lock_cbk = afr_post_blocking_rename_cbk;
- afr_blocking_lock (frame, this);
+ return _gf_false;
+}
- return 0;
+gf_boolean_t
+afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this,
+ int delay)
+{
+ afr_local_t *local = NULL;
+ afr_lock_t *lock = NULL;
+ gf_boolean_t res = _gf_false;
+
+ local = frame->local;
+ lock = &local->inode_ctx->lock[local->transaction.type];
+
+ if (!afr_txn_nothing_failed(frame, this)) {
+ lock->release = _gf_true;
+ goto out;
+ }
+
+ if (afr_are_conflicting_ops_waiting(local, this)) {
+ lock->release = _gf_true;
+ goto out;
+ }
+
+ if (!list_empty(&lock->owners))
+ goto out;
+ else
+ GF_ASSERT(list_empty(&lock->waiting));
+
+ if (lock->release) {
+ goto out;
+ }
+
+ if (!delay) {
+ goto out;
+ }
+
+ if (local->transaction.disable_delayed_post_op) {
+ goto out;
+ }
+
+ if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP) &&
+ (local->op != GF_FOP_FSYNC)) {
+ /*Only allow writes/fsyncs but shard does [f]xattrops on writes, so
+ * they are fine too*/
+ goto out;
+ }
+
+ res = _gf_true;
+out:
+ return res;
}
+void
+afr_delayed_changelog_wake_up_cbk(void *data)
+{
+ afr_lock_t *lock = NULL;
+ afr_local_t *local = data;
+ afr_local_t *timer_local = NULL;
+ struct list_head shared;
+
+ INIT_LIST_HEAD(&shared);
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ LOCK(&local->inode->lock);
+ {
+ timer_local = list_entry(lock->post_op.next, afr_local_t,
+ transaction.owner_list);
+ if (list_empty(&lock->owners) && (local == timer_local)) {
+ GF_ASSERT(list_empty(&lock->waiting));
+ /*Last owner*/
+ lock->release = _gf_true;
+ lock->delay_timer = NULL;
+ }
+ }
+ UNLOCK(&local->inode->lock);
+ afr_changelog_post_op_now(local->transaction.frame,
+ local->transaction.frame->this);
+}
+/* SET operation */
int
-afr_set_transaction_flock (afr_local_t *local)
+afr_fd_report_unstable_write(xlator_t *this, afr_local_t *local)
{
- afr_internal_lock_t *int_lock = NULL;
+ LOCK(&local->inode->lock);
+ {
+ local->inode_ctx->witnessed_unstable_write = _gf_true;
+ }
+ UNLOCK(&local->inode->lock);
- int_lock = &local->internal_lock;
+ return 0;
+}
- int_lock->lk_flock.l_len = local->transaction.len;
- int_lock->lk_flock.l_start = local->transaction.start;
- int_lock->lk_flock.l_type = F_WRLCK;
+/* TEST and CLEAR operation */
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write(xlator_t *this, inode_t *inode)
+{
+ afr_inode_ctx_t *ctx = NULL;
+ gf_boolean_t witness = _gf_false;
- return 0;
+ LOCK(&inode->lock);
+ {
+ (void)__afr_inode_ctx_get(this, inode, &ctx);
+
+ if (ctx->witnessed_unstable_write) {
+ witness = _gf_true;
+ ctx->witnessed_unstable_write = _gf_false;
+ }
+ }
+ UNLOCK(&inode->lock);
+
+ return witness;
}
int
-afr_lock_rec (call_frame_t *frame, xlator_t *this)
+afr_changelog_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int child_index = (long)cookie;
+ int call_count = -1;
+ afr_local_t *local = NULL;
- local = frame->local;
- int_lock = &local->internal_lock;
+ priv = this->private;
+ local = frame->local;
- int_lock->transaction_lk_type = AFR_TRANSACTION_LK;
+ if (op_ret != 0) {
+ /* Failure of fsync() is as good as failure of previous
+ write(). So treat it like one.
+ */
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, AFR_MSG_FSYNC_FAILED,
+ "fsync(%s) failed on subvolume %s. Transaction was %s",
+ uuid_utoa(local->fd->inode->gfid),
+ priv->children[child_index]->name, gf_fop_list[local->op]);
- switch (local->transaction.type) {
- case AFR_DATA_TRANSACTION:
- case AFR_METADATA_TRANSACTION:
- afr_set_transaction_flock (local);
+ afr_transaction_fop_failed(frame, this, child_index);
+ }
- int_lock->lock_cbk = afr_post_nonblocking_inodelk_cbk;
+ call_count = afr_frame_return(frame);
- afr_nonblocking_inodelk (frame, this);
- break;
+ if (call_count == 0)
+ afr_changelog_post_op_now(frame, this);
- case AFR_ENTRY_RENAME_TRANSACTION:
+ return 0;
+}
- int_lock->lock_cbk = afr_post_blocking_rename_cbk;
- afr_blocking_lock (frame, this);
- break;
+int
+afr_changelog_fsync(call_frame_t *frame, xlator_t *this)
+{
+ afr_local_t *local = NULL;
+ int i = 0;
+ int call_count = 0;
+ afr_private_t *priv = NULL;
+ dict_t *xdata = NULL;
+ GF_UNUSED int ret = -1;
- case AFR_ENTRY_TRANSACTION:
- int_lock->lk_basename = local->transaction.basename;
- if (&local->transaction.parent_loc)
- int_lock->lk_loc = &local->transaction.parent_loc;
- else
- GF_ASSERT (local->fd);
+ local = frame->local;
+ priv = this->private;
- int_lock->lock_cbk = afr_post_nonblocking_entrylk_cbk;
- afr_nonblocking_entrylk (frame, this);
- break;
- }
+ call_count = AFR_COUNT(local->transaction.pre_op, priv->child_count);
+ if (!call_count) {
+ /* will go straight to unlock */
+ afr_changelog_post_op_now(frame, this);
return 0;
-}
+ }
+ local->call_count = call_count;
-int
-afr_lock (call_frame_t *frame, xlator_t *this)
-{
- afr_pid_save (frame);
+ xdata = dict_new();
+ if (xdata) {
+ ret = dict_set_int32_sizen(xdata, "batch-fsync", 1);
+ ret = dict_set_str(xdata, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ }
- frame->root->pid = (long) frame->root;
+ for (i = 0; i < priv->child_count; i++) {
+ if (!local->transaction.pre_op[i])
+ continue;
- afr_set_lk_owner (frame, this, frame->root);
+ STACK_WIND_COOKIE(frame, afr_changelog_fsync_cbk, (void *)(long)i,
+ priv->children[i], priv->children[i]->fops->fsync,
+ local->fd, 1, xdata);
+ if (!--call_count)
+ break;
+ }
- afr_set_lock_number (frame, this);
+ if (xdata)
+ dict_unref(xdata);
- return afr_lock_rec (frame, this);
+ return 0;
}
-
-/* }}} */
-
int
-afr_internal_lock_finish (call_frame_t *frame, xlator_t *this)
+afr_changelog_post_op_safe(call_frame_t *frame, xlator_t *this)
{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
- priv = this->private;
- local = frame->local;
+ local = frame->local;
+ priv = this->private;
- /* Perform fops with the lk-owner from top xlator.
- * Eg: lk-owner of posix-lk and flush should be same,
- * flush cant clear the posix-lks without that lk-owner.
- */
- frame->root->lk_owner = local->transaction.main_frame->root->lk_owner;
- if (__fop_changelog_needed (frame, this)) {
- afr_changelog_pre_op (frame, this);
- } else {
- __mark_all_success (local->pending, priv->child_count,
- local->transaction.type);
+ if (!local->fd || local->transaction.type != AFR_DATA_TRANSACTION) {
+ afr_changelog_post_op_now(frame, this);
+ return 0;
+ }
+
+ if (afr_changelog_pre_op_uninherit(frame, this) &&
+ afr_txn_nothing_failed(frame, this)) {
+ /* just detected that this post-op is about to
+ be optimized away as a new write() has
+ already piggybacked on this frame's changelog.
+ */
+ afr_changelog_post_op_now(frame, this);
+ return 0;
+ }
+
+ /* Calling afr_changelog_post_op_now() now will result in
+ issuing ->[f]xattrop().
+
+ Performing a hard POST-OP (->[f]xattrop() FOP) is a more
+ responsible operation that what it might appear on the surface.
+
+ The changelog of a file (in the xattr of the file on the server)
+ stores information (pending count) about the state of the file
+ on the OTHER server. This changelog is blindly trusted, and must
+ therefore be updated in such a way it remains trustworthy. This
+ implies that decrementing the pending count (essentially "clearing
+ the dirty flag") must be done STRICTLY after we are sure that the
+ operation on the other server has reached stable storage.
+
+ While the backend filesystem on that server will eventually flush
+ it to stable storage, we (being in userspace) have no mechanism
+ to get notified when the write became "stable".
+
+ This means we need take matter into our own hands and issue an
+ fsync() EVEN IF THE APPLICATION WAS PERFORMING UNSTABLE WRITES,
+ and get an acknowledgement for it. And we need to wait for the
+ fsync() acknowledgement before initiating the hard POST-OP.
+
+ However if the FD itself was opened in O_SYNC or O_DSYNC then
+ we are already guaranteed that the writes were made stable as
+ part of the FOP itself. The same holds true for NFS stable
+ writes which happen on an anonymous FD with O_DSYNC or O_SYNC
+ flag set in the writev() @flags param. For all other write types,
+ mark a flag in the fdctx whenever an unstable write is witnessed.
+ */
+
+ if (!afr_fd_has_witnessed_unstable_write(this, local->inode)) {
+ afr_changelog_post_op_now(frame, this);
+ return 0;
+ }
+
+ /* Check whether users want durability and perform fsync/post-op
+ * accordingly.
+ */
+ if (priv->ensure_durability) {
+ /* Time to fsync() */
+ afr_changelog_fsync(frame, this);
+ } else {
+ afr_changelog_post_op_now(frame, this);
+ }
+
+ return 0;
+}
- afr_pid_restore (frame);
+void
+afr_changelog_post_op(call_frame_t *frame, xlator_t *this)
+{
+ struct timespec delta = {
+ 0,
+ };
+ afr_private_t *priv = NULL;
+ afr_local_t *local = frame->local;
+ afr_lock_t *lock = NULL;
+ gf_boolean_t post_op = _gf_true;
+ struct list_head shared;
+
+ priv = this->private;
+ delta.tv_sec = priv->post_op_delay_secs;
+ delta.tv_nsec = 0;
+
+ INIT_LIST_HEAD(&shared);
+ if (!local->transaction.eager_lock_on)
+ goto out;
+
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ LOCK(&local->inode->lock);
+ {
+ list_del_init(&local->transaction.owner_list);
+ list_add(&local->transaction.owner_list, &lock->post_op);
+ __afr_transaction_wake_shared(local, &shared);
+
+ if (!afr_is_delayed_changelog_post_op_needed(frame, this,
+ delta.tv_sec)) {
+ if (list_empty(&lock->owners))
+ lock->release = _gf_true;
+ goto unlock;
+ }
- local->transaction.fop (frame, this);
+ GF_ASSERT(lock->delay_timer == NULL);
+ lock->delay_timer = gf_timer_call_after(
+ this->ctx, delta, afr_delayed_changelog_wake_up_cbk, local);
+ if (!lock->delay_timer) {
+ lock->release = _gf_true;
+ } else {
+ post_op = _gf_false;
}
+ }
+unlock:
+ UNLOCK(&local->inode->lock);
- return 0;
-}
+ if (!list_empty(&shared)) {
+ afr_lock_resume_shared(&shared);
+ }
+out:
+ if (post_op) {
+ if (!local->transaction.eager_lock_on || lock->release) {
+ afr_changelog_post_op_safe(frame, this);
+ } else {
+ afr_changelog_post_op_now(frame, this);
+ }
+ }
+}
int
-afr_transaction_resume (call_frame_t *frame, xlator_t *this)
+afr_transaction_resume(call_frame_t *frame, xlator_t *this)
{
- afr_internal_lock_t *int_lock = NULL;
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
+ afr_local_t *local = NULL;
- local = frame->local;
- int_lock = &local->internal_lock;
- priv = this->private;
+ local = frame->local;
- if (__fop_changelog_needed (frame, this)) {
- afr_changelog_post_op (frame, this);
- } else {
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- local->transaction.done (frame, this);
- } else {
- int_lock->lock_cbk = local->transaction.done;
- afr_unlock (frame, this);
- }
- }
+ afr_restore_lk_owner(frame);
- return 0;
-}
+ afr_handle_symmetric_errors(frame, this);
+ if (!local->pre_op_compat)
+ /* new mode, pre-op was done along
+ with OP */
+ afr_changelog_pre_op_update(frame, this);
+
+ afr_changelog_post_op(frame, this);
+
+ return 0;
+}
/**
* afr_transaction_fop_failed - inform that an fop failed
*/
void
-afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this, int child_index)
+afr_transaction_fop_failed(call_frame_t *frame, xlator_t *this, int child_index)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = NULL;
+
+ local = frame->local;
- local = frame->local;
- priv = this->private;
+ local->transaction.failed_subvols[child_index] = 1;
+}
- __mark_child_dead (local->pending, priv->child_count,
- child_index, local->transaction.type);
+static gf_boolean_t
+__need_previous_lock_unlocked(afr_local_t *local)
+{
+ afr_lock_t *lock = NULL;
+
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ if (!lock->acquired)
+ return _gf_false;
+ if (lock->acquired && lock->event_generation != local->event_generation)
+ return _gf_true;
+ return _gf_false;
+}
+
+void
+__afr_eager_lock_handle(afr_local_t *local, gf_boolean_t *take_lock,
+ gf_boolean_t *do_pre_op, afr_local_t **timer_local)
+{
+ afr_lock_t *lock = NULL;
+ afr_local_t *owner_local = NULL;
+ xlator_t *this = local->transaction.frame->this;
+
+ local->transaction.eager_lock_on = _gf_true;
+ afr_set_lk_owner(local->transaction.frame, this, local->inode);
+
+ lock = &local->inode_ctx->lock[local->transaction.type];
+ if (__need_previous_lock_unlocked(local)) {
+ if (!list_empty(&lock->owners)) {
+ lock->release = _gf_true;
+ } else if (lock->delay_timer) {
+ lock->release = _gf_true;
+ if (gf_timer_call_cancel(this->ctx, lock->delay_timer)) {
+ /* It will be put in frozen list
+ * in the code flow below*/
+ } else {
+ *timer_local = list_entry(lock->post_op.next, afr_local_t,
+ transaction.owner_list);
+ lock->delay_timer = NULL;
+ }
+ }
+ }
+
+ if (lock->release) {
+ list_add_tail(&local->transaction.wait_list, &lock->frozen);
+ *take_lock = _gf_false;
+ goto out;
+ }
+
+ if (lock->delay_timer) {
+ *take_lock = _gf_false;
+ if (gf_timer_call_cancel(this->ctx, lock->delay_timer)) {
+ list_add_tail(&local->transaction.wait_list, &lock->frozen);
+ } else {
+ *timer_local = list_entry(lock->post_op.next, afr_local_t,
+ transaction.owner_list);
+ afr_copy_inodelk_vars(&local->internal_lock,
+ &(*timer_local)->internal_lock, this, 0);
+ lock->delay_timer = NULL;
+ *do_pre_op = _gf_true;
+ list_add_tail(&local->transaction.owner_list, &lock->owners);
+ }
+ goto out;
+ }
+
+ if (!list_empty(&lock->owners)) {
+ if (!lock->acquired || afr_has_lock_conflict(local, _gf_true)) {
+ list_add_tail(&local->transaction.wait_list, &lock->waiting);
+ *take_lock = _gf_false;
+ goto out;
+ }
+ owner_local = list_entry(lock->owners.next, afr_local_t,
+ transaction.owner_list);
+ afr_copy_inodelk_vars(&local->internal_lock,
+ &owner_local->internal_lock, this, 0);
+ *take_lock = _gf_false;
+ *do_pre_op = _gf_true;
+ }
+
+ if (lock->acquired)
+ GF_ASSERT(!(*take_lock));
+ list_add_tail(&local->transaction.owner_list, &lock->owners);
+out:
+ return;
+}
+
+void
+afr_transaction_start(afr_local_t *local, xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ gf_boolean_t take_lock = _gf_true;
+ gf_boolean_t do_pre_op = _gf_false;
+ afr_local_t *timer_local = NULL;
+
+ priv = this->private;
+
+ if (local->transaction.type != AFR_DATA_TRANSACTION &&
+ local->transaction.type != AFR_METADATA_TRANSACTION)
+ goto lock_phase;
+
+ if (!priv->eager_lock)
+ goto lock_phase;
+
+ LOCK(&local->inode->lock);
+ {
+ __afr_eager_lock_handle(local, &take_lock, &do_pre_op, &timer_local);
+ }
+ UNLOCK(&local->inode->lock);
+lock_phase:
+ if (!local->transaction.eager_lock_on) {
+ afr_set_lk_owner(local->transaction.frame, this,
+ local->transaction.frame->root);
+ }
+
+ if (take_lock) {
+ afr_lock(local->transaction.frame, this);
+ } else if (do_pre_op) {
+ afr_changelog_pre_op(local->transaction.frame, this);
+ }
+ /*Always call delayed_changelog_wake_up_cbk after calling pre-op above
+ * so that any inheriting can happen*/
+ if (timer_local)
+ afr_delayed_changelog_wake_up_cbk(timer_local);
}
int
-afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type)
+afr_write_txn_refresh_done(call_frame_t *frame, xlator_t *this, int err)
{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
+ afr_local_t *local = frame->local;
+
+ if (err) {
+ AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(-1, err);
+ goto fail;
+ }
+
+ afr_transaction_start(local, this);
+ return 0;
+fail:
+ local->transaction.unwind(frame, this);
+ AFR_STACK_DESTROY(frame);
+ return 0;
+}
- local = frame->local;
- priv = this->private;
+int
+afr_transaction_lockee_init(call_frame_t *frame)
+{
+ afr_local_t *local = frame->local;
+ afr_internal_lock_t *int_lock = &local->internal_lock;
+ afr_private_t *priv = frame->this->private;
+ int ret = 0;
- afr_transaction_local_init (local, this);
+ switch (local->transaction.type) {
+ case AFR_DATA_TRANSACTION:
+ case AFR_METADATA_TRANSACTION:
+ ret = afr_add_inode_lockee(local, priv->child_count);
+ break;
- local->transaction.resume = afr_transaction_resume;
- local->transaction.type = type;
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ ret = afr_add_entry_lockee(local, &local->transaction.parent_loc,
+ local->transaction.basename,
+ priv->child_count);
+ if (ret) {
+ goto out;
+ }
+ if (local->op == GF_FOP_RENAME) {
+ ret = afr_add_entry_lockee(
+ local, &local->transaction.new_parent_loc,
+ local->transaction.new_basename, priv->child_count);
+ if (ret) {
+ goto out;
+ }
- if (afr_lock_server_count (priv, local->transaction.type) == 0) {
- afr_internal_lock_finish (frame, this);
- } else {
- afr_lock (frame, this);
- }
+ if (local->newloc.inode &&
+ IA_ISDIR(local->newloc.inode->ia_type)) {
+ ret = afr_add_entry_lockee(local, &local->newloc, NULL,
+ priv->child_count);
+ if (ret) {
+ goto out;
+ }
+ }
+ } else if (local->op == GF_FOP_RMDIR) {
+ ret = afr_add_entry_lockee(local, &local->loc, NULL,
+ priv->child_count);
+ if (ret) {
+ goto out;
+ }
+ }
+
+ if (int_lock->lockee_count > 1) {
+ qsort(int_lock->lockee, int_lock->lockee_count,
+ sizeof(*int_lock->lockee), afr_entry_lockee_cmp);
+ }
+ break;
+ }
+out:
+ return ret;
+}
- return 0;
+int
+afr_transaction(call_frame_t *frame, xlator_t *this, afr_transaction_type type)
+{
+ afr_local_t *local = NULL;
+ afr_private_t *priv = NULL;
+ int ret = -1;
+ int event_generation = 0;
+
+ local = frame->local;
+ priv = this->private;
+ local->transaction.frame = frame;
+
+ local->transaction.type = type;
+
+ if (priv->quorum_count && !afr_has_quorum(local->child_up, this, NULL)) {
+ ret = -afr_quorum_errno(priv);
+ goto out;
+ }
+
+ if (!afr_is_consistent_io_possible(local, priv, &ret)) {
+ ret = -ret; /*op_errno to ret conversion*/
+ goto out;
+ }
+
+ if (priv->thin_arbiter_count && !afr_ta_has_quorum(priv, local)) {
+ ret = -afr_quorum_errno(priv);
+ goto out;
+ }
+
+ ret = afr_transaction_local_init(local, this);
+ if (ret < 0)
+ goto out;
+
+ ret = afr_transaction_lockee_init(frame);
+ if (ret)
+ goto out;
+
+ if (type != AFR_METADATA_TRANSACTION) {
+ goto txn_start;
+ }
+
+ ret = afr_inode_get_readable(frame, local->inode, this, local->readable,
+ &event_generation, type);
+ if (ret < 0 ||
+ afr_is_inode_refresh_reqd(local->inode, this, priv->event_generation,
+ event_generation)) {
+ afr_inode_refresh(frame, this, local->inode, local->loc.gfid,
+ afr_write_txn_refresh_done);
+ ret = 0;
+ goto out;
+ }
+
+txn_start:
+ ret = 0;
+ afr_transaction_start(local, this);
+out:
+ return ret;
}
diff --git a/xlators/cluster/afr/src/afr-transaction.h b/xlators/cluster/afr/src/afr-transaction.h
index d19d16fb39e..beefa26f4a6 100644
--- a/xlators/cluster/afr/src/afr-transaction.h
+++ b/xlators/cluster/afr/src/afr-transaction.h
@@ -11,24 +11,65 @@
#ifndef __TRANSACTION_H__
#define __TRANSACTION_H__
-typedef enum {
- LOCAL_FIRST = 1,
- LOCAL_LAST = 2
-} afr_xattrop_type_t;
+#include "afr.h"
void
-afr_transaction_fop_failed (call_frame_t *frame, xlator_t *this,
- int child_index);
+afr_transaction_fop_failed(call_frame_t *frame, xlator_t *this,
+ int child_index);
+
+int32_t
+afr_transaction(call_frame_t *frame, xlator_t *this, afr_transaction_type type);
int
-afr_lock_server_count (afr_private_t *priv, afr_transaction_type type);
+afr_set_pending_dict(afr_private_t *priv, dict_t *xattr, int32_t **pending);
-int32_t
-afr_transaction (call_frame_t *frame, xlator_t *this, afr_transaction_type type);
+void
+afr_delayed_changelog_wake_up(xlator_t *this, fd_t *fd);
+
+void
+__mark_all_success(call_frame_t *frame, xlator_t *this);
+
+gf_boolean_t
+afr_txn_nothing_failed(call_frame_t *frame, xlator_t *this);
+
+int
+afr_read_txn(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ afr_read_txn_wind_t readfn, afr_transaction_type type);
+
+int
+afr_read_txn_continue(call_frame_t *frame, xlator_t *this, int subvol);
+
+void
+afr_pending_read_increment(afr_private_t *priv, int child_index);
+
+void
+afr_pending_read_decrement(afr_private_t *priv, int child_index);
+
+call_frame_t *
+afr_transaction_detach_fop_frame(call_frame_t *frame);
+gf_boolean_t
+afr_has_quorum(unsigned char *subvols, xlator_t *this, call_frame_t *frame);
+gf_boolean_t
+afr_needs_changelog_update(afr_local_t *local);
+void
+afr_zero_fill_stat(afr_local_t *local);
+
+void
+afr_pick_error_xdata(afr_local_t *local, afr_private_t *priv, inode_t *inode1,
+ unsigned char *readable1, inode_t *inode2,
+ unsigned char *readable2);
+int
+afr_transaction_resume(call_frame_t *frame, xlator_t *this);
+
+int
+afr_lock(call_frame_t *frame, xlator_t *this);
+
+void
+afr_delayed_changelog_wake_up_cbk(void *data);
+
+int
+afr_release_notify_lock_for_ta(void *opaque);
-afr_fd_ctx_t *
-afr_fd_ctx_get (fd_t *fd, xlator_t *this);
int
-afr_set_pending_dict (afr_private_t *priv, dict_t *xattr, int32_t **pending,
- int child, afr_xattrop_type_t op);
+afr_ta_lock_release_done(int ret, call_frame_t *ta_frame, void *opaque);
#endif /* __TRANSACTION_H__ */
diff --git a/xlators/cluster/afr/src/afr.c b/xlators/cluster/afr/src/afr.c
index ebb9f1ee904..df7366f0a65 100644
--- a/xlators/cluster/afr/src/afr.c
+++ b/xlators/cluster/afr/src/afr.c
@@ -15,620 +15,1330 @@
#include <stdlib.h>
#include <signal.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
#include "afr-common.c"
-
-#define SHD_INODE_LRU_LIMIT 2048
-#define AFR_EH_HEALED_LIMIT 1024
-#define AFR_EH_HEAL_FAIL_LIMIT 1024
-#define AFR_EH_SPLIT_BRAIN_LIMIT 1024
+#include "afr-messages.h"
struct volume_options options[];
+static char *afr_favorite_child_policies[AFR_FAV_CHILD_POLICY_MAX + 1] = {
+ [AFR_FAV_CHILD_NONE] = "none",
+ [AFR_FAV_CHILD_BY_SIZE] = "size",
+ [AFR_FAV_CHILD_BY_CTIME] = "ctime",
+ [AFR_FAV_CHILD_BY_MTIME] = "mtime",
+ [AFR_FAV_CHILD_BY_MAJORITY] = "majority",
+ [AFR_FAV_CHILD_POLICY_MAX] = NULL,
+};
+
int32_t
-notify (xlator_t *this, int32_t event,
- void *data, ...)
+notify(xlator_t *this, int32_t event, void *data, ...)
{
- int ret = -1;
- va_list ap;
- void *data2 = NULL;
+ int ret = -1;
+ va_list ap;
+ void *data2 = NULL;
- va_start (ap, data);
- data2 = va_arg (ap, dict_t*);
- va_end (ap);
- ret = afr_notify (this, event, data, data2);
+ va_start(ap, data);
+ data2 = va_arg(ap, dict_t *);
+ va_end(ap);
+ ret = afr_notify(this, event, data, data2);
- return ret;
+ return ret;
}
int32_t
-mem_acct_init (xlator_t *this)
+mem_acct_init(xlator_t *this)
{
- int ret = -1;
+ int ret = -1;
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1);
+ if (!this)
+ return ret;
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
+ ret = xlator_mem_acct_init(this, gf_afr_mt_end + 1);
+ if (ret != 0) {
return ret;
-}
+ }
+ return ret;
+}
int
-xlator_subvolume_index (xlator_t *this, xlator_t *subvol)
+xlator_subvolume_index(xlator_t *this, xlator_t *subvol)
{
- int index = -1;
- int i = 0;
- xlator_list_t *list = NULL;
-
- list = this->children;
-
- while (list) {
- if (subvol == list->xlator ||
- strcmp (subvol->name, list->xlator->name) == 0) {
- index = i;
- break;
- }
- list = list->next;
- i++;
+ int index = -1;
+ int i = 0;
+ xlator_list_t *list = NULL;
+
+ list = this->children;
+
+ while (list) {
+ if (subvol == list->xlator ||
+ strcmp(subvol->name, list->xlator->name) == 0) {
+ index = i;
+ break;
}
+ list = list->next;
+ i++;
+ }
- return index;
+ return index;
}
-void
-fix_quorum_options (xlator_t *this, afr_private_t *priv, char *qtype)
+static void
+fix_quorum_options(xlator_t *this, afr_private_t *priv, char *qtype,
+ dict_t *options)
{
- if (priv->quorum_count && strcmp(qtype,"fixed")) {
- gf_log(this->name,GF_LOG_WARNING,
- "quorum-type %s overriding quorum-count %u",
- qtype, priv->quorum_count);
- }
- if (!strcmp(qtype,"none")) {
- priv->quorum_count = 0;
- }
- else if (!strcmp(qtype,"auto")) {
- priv->quorum_count = AFR_QUORUM_AUTO;
- }
+ if (dict_get_sizen(options, "quorum-type") == NULL) {
+ /* If user doesn't configure anything enable auto-quorum if the
+ * replica has more than two subvolumes */
+ if (priv->child_count > 2)
+ qtype = "auto";
+ }
+
+ if (priv->quorum_count && strcmp(qtype, "fixed")) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_OVERRIDE,
+ "quorum-type %s overriding quorum-count %u", qtype,
+ priv->quorum_count);
+ }
+
+ if (!strcmp(qtype, "none")) {
+ priv->quorum_count = 0;
+ } else if (!strcmp(qtype, "auto")) {
+ priv->quorum_count = AFR_QUORUM_AUTO;
+ }
}
int
-reconfigure (xlator_t *this, dict_t *options)
+afr_set_favorite_child_policy(afr_private_t *priv, char *policy)
{
- afr_private_t *priv = NULL;
- xlator_t *read_subvol = NULL;
- int ret = -1;
- int index = -1;
- char *qtype = NULL;
+ int index = -1;
- priv = this->private;
+ index = gf_get_index_by_elem(afr_favorite_child_policies, policy);
+ if (index < 0 || index >= AFR_FAV_CHILD_POLICY_MAX)
+ return -1;
- GF_OPTION_RECONF ("background-self-heal-count",
- priv->background_self_heal_count, options, uint32,
- out);
+ priv->fav_child_policy = index;
- GF_OPTION_RECONF ("metadata-self-heal",
- priv->metadata_self_heal, options, bool, out);
+ return 0;
+}
- GF_OPTION_RECONF ("data-self-heal", priv->data_self_heal, options, str,
- out);
+static void
+set_data_self_heal_algorithm(afr_private_t *priv, char *algo)
+{
+ if (!algo) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DYNAMIC;
+ } else if (strcmp(algo, "full") == 0) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_FULL;
+ } else if (strcmp(algo, "diff") == 0) {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DIFF;
+ } else {
+ priv->data_self_heal_algorithm = AFR_SELFHEAL_DATA_DYNAMIC;
+ }
+}
- GF_OPTION_RECONF ("entry-self-heal", priv->entry_self_heal, options,
- bool, out);
+void
+afr_handle_anon_inode_options(afr_private_t *priv, dict_t *options)
+{
+ char *volfile_id_str = NULL;
+ uuid_t anon_inode_gfid = {0};
+
+ /*If volume id is not present don't enable anything*/
+ if (dict_get_str(options, "volume-id", &volfile_id_str))
+ return;
+ GF_ASSERT(strlen(AFR_ANON_DIR_PREFIX) + strlen(volfile_id_str) <= NAME_MAX);
+ /*anon_inode_name is not supposed to change once assigned*/
+ if (!priv->anon_inode_name[0]) {
+ snprintf(priv->anon_inode_name, sizeof(priv->anon_inode_name), "%s-%s",
+ AFR_ANON_DIR_PREFIX, volfile_id_str);
+ gf_uuid_parse(volfile_id_str, anon_inode_gfid);
+ /*Flip a bit to make sure volfile-id and anon-gfid are not same*/
+ anon_inode_gfid[0] ^= 1;
+ uuid_utoa_r(anon_inode_gfid, priv->anon_gfid_str);
+ }
+}
- GF_OPTION_RECONF ("strict-readdir", priv->strict_readdir, options, bool,
- out);
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ afr_private_t *priv = NULL;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ int timeout_old = 0;
+ int ret = -1;
+ int index = -1;
+ char *qtype = NULL;
+ char *fav_child_policy = NULL;
+ char *data_self_heal = NULL;
+ char *data_self_heal_algorithm = NULL;
+ char *locking_scheme = NULL;
+ gf_boolean_t consistent_io = _gf_false;
+ gf_boolean_t choose_local_old = _gf_false;
+ gf_boolean_t enabled_old = _gf_false;
- GF_OPTION_RECONF ("data-self-heal-window-size",
- priv->data_self_heal_window_size, options,
- uint32, out);
+ priv = this->private;
- GF_OPTION_RECONF ("data-change-log", priv->data_change_log, options,
- bool, out);
+ GF_OPTION_RECONF("metadata-splitbrain-forced-heal",
+ priv->metadata_splitbrain_forced_heal, options, bool, out);
- GF_OPTION_RECONF ("metadata-change-log",
- priv->metadata_change_log, options, bool, out);
+ GF_OPTION_RECONF("background-self-heal-count",
+ priv->background_self_heal_count, options, uint32, out);
- GF_OPTION_RECONF ("entry-change-log", priv->entry_change_log, options,
- bool, out);
+ GF_OPTION_RECONF("heal-wait-queue-length", priv->heal_wait_qlen, options,
+ uint32, out);
- GF_OPTION_RECONF ("data-self-heal-algorithm",
- priv->data_self_heal_algorithm, options, str, out);
+ GF_OPTION_RECONF("metadata-self-heal", priv->metadata_self_heal, options,
+ bool, out);
- GF_OPTION_RECONF ("self-heal-daemon", priv->shd.enabled, options, bool, out);
+ GF_OPTION_RECONF("data-self-heal", data_self_heal, options, str, out);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
- GF_OPTION_RECONF ("read-subvolume", read_subvol, options, xlator, out);
+ GF_OPTION_RECONF("entry-self-heal", priv->entry_self_heal, options, bool,
+ out);
- GF_OPTION_RECONF ("read-hash-mode", priv->hash_mode,
- options, uint32, out);
+ GF_OPTION_RECONF("data-self-heal-window-size",
+ priv->data_self_heal_window_size, options, uint32, out);
- if (read_subvol) {
- index = xlator_subvolume_index (this, read_subvol);
- if (index == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
- read_subvol->name);
- goto out;
- }
- priv->read_child = index;
- }
+ GF_OPTION_RECONF("data-self-heal-algorithm", data_self_heal_algorithm,
+ options, str, out);
+ set_data_self_heal_algorithm(priv, data_self_heal_algorithm);
- GF_OPTION_RECONF ("eager-lock", priv->eager_lock, options, bool, out);
- GF_OPTION_RECONF ("quorum-type", qtype, options, str, out);
- GF_OPTION_RECONF ("quorum-count", priv->quorum_count, options,
- uint32, out);
- fix_quorum_options(this,priv,qtype);
- GF_OPTION_RECONF ("heal-timeout", priv->shd.timeout, options,
- int32, out);
+ GF_OPTION_RECONF("halo-enabled", priv->halo_enabled, options, bool, out);
- /* Reset this so we re-discover in case the topology changed. */
- priv->did_discovery = _gf_false;
+ GF_OPTION_RECONF("halo-shd-max-latency", priv->shd.halo_max_latency_msec,
+ options, uint32, out);
- ret = 0;
-out:
- return ret;
+ GF_OPTION_RECONF("halo-nfsd-max-latency", priv->nfsd.halo_max_latency_msec,
+ options, uint32, out);
-}
+ GF_OPTION_RECONF("halo-max-latency", priv->halo_max_latency_msec, options,
+ uint32, out);
+ GF_OPTION_RECONF("halo-max-replicas", priv->halo_max_replicas, options,
+ uint32, out);
-static const char *favorite_child_warning_str = "You have specified subvolume '%s' "
- "as the 'favorite child'. This means that if a discrepancy in the content "
- "or attributes (ownership, permission, etc.) of a file is detected among "
- "the subvolumes, the file on '%s' will be considered the definitive "
- "version and its contents will OVERWRITE the contents of the file on other "
- "subvolumes. All versions of the file except that on '%s' "
- "WILL BE LOST.";
+ GF_OPTION_RECONF("halo-min-replicas", priv->halo_min_replicas, options,
+ uint32, out);
+ GF_OPTION_RECONF("read-subvolume", read_subvol, options, xlator, out);
-int32_t
-init (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- int child_count = 0;
- xlator_list_t *trav = NULL;
- int i = 0;
- int ret = -1;
- GF_UNUSED int op_errno = 0;
- xlator_t *read_subvol = NULL;
- xlator_t *fav_child = NULL;
- char *qtype = NULL;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "replicate translator needs more than one "
- "subvolume defined.");
- return -1;
- }
+ choose_local_old = priv->choose_local;
+ GF_OPTION_RECONF("choose-local", priv->choose_local, options, bool, out);
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling.");
+ if (choose_local_old != priv->choose_local) {
+ priv->read_child = -1;
+ if (choose_local_old == _gf_false)
+ priv->did_discovery = _gf_false;
+ }
+
+ GF_OPTION_RECONF("read-hash-mode", priv->hash_mode, options, uint32, out);
+
+ if (read_subvol) {
+ index = xlator_subvolume_index(this, read_subvol);
+ if (index == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "%s not a subvolume", read_subvol->name);
+ goto out;
}
+ priv->read_child = index;
+ }
+
+ GF_OPTION_RECONF("read-subvolume-index", read_subvol_index, options, int32,
+ out);
+
+ if (read_subvol_index > -1) {
+ index = read_subvol_index;
+ if (index >= priv->child_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "%d not a subvolume-index", index);
+ goto out;
+ }
+ priv->read_child = index;
+ }
+
+ GF_OPTION_RECONF("pre-op-compat", priv->pre_op_compat, options, bool, out);
+ GF_OPTION_RECONF("locking-scheme", locking_scheme, options, str, out);
+ priv->granular_locks = (strcmp(locking_scheme, "granular") == 0);
+ GF_OPTION_RECONF("full-lock", priv->full_lock, options, bool, out);
+ GF_OPTION_RECONF("granular-entry-heal", priv->esh_granular, options, bool,
+ out);
+
+ GF_OPTION_RECONF("eager-lock", priv->eager_lock, options, bool, out);
+ GF_OPTION_RECONF("optimistic-change-log", priv->optimistic_change_log,
+ options, bool, out);
+ GF_OPTION_RECONF("quorum-type", qtype, options, str, out);
+ GF_OPTION_RECONF("quorum-count", priv->quorum_count, options, uint32, out);
+ fix_quorum_options(this, priv, qtype, options);
+ if (priv->quorum_count && !afr_has_quorum(priv->child_up, this, NULL))
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_QUORUM_FAIL,
+ "Client-quorum is not met");
+
+ GF_OPTION_RECONF("post-op-delay-secs", priv->post_op_delay_secs, options,
+ uint32, out);
+
+ GF_OPTION_RECONF(AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, options,
+ size_uint64, out);
+ /* Reset this so we re-discover in case the topology changed. */
+ GF_OPTION_RECONF("ensure-durability", priv->ensure_durability, options,
+ bool, out);
+
+ enabled_old = priv->shd.enabled;
+ GF_OPTION_RECONF("self-heal-daemon", priv->shd.enabled, options, bool, out);
+
+ GF_OPTION_RECONF("iam-self-heal-daemon", priv->shd.iamshd, options, bool,
+ out);
+
+ timeout_old = priv->shd.timeout;
+ GF_OPTION_RECONF("heal-timeout", priv->shd.timeout, options, int32, out);
+
+ GF_OPTION_RECONF("consistent-metadata", priv->consistent_metadata, options,
+ bool, out);
+
+ GF_OPTION_RECONF("shd-max-threads", priv->shd.max_threads, options, uint32,
+ out);
+
+ GF_OPTION_RECONF("shd-wait-qlength", priv->shd.wait_qlength, options,
+ uint32, out);
+
+ GF_OPTION_RECONF("favorite-child-policy", fav_child_policy, options, str,
+ out);
+ if (afr_set_favorite_child_policy(priv, fav_child_policy) == -1)
+ goto out;
+
+ priv->did_discovery = _gf_false;
+
+ GF_OPTION_RECONF("consistent-io", consistent_io, options, bool, out);
+ if (priv->quorum_count != 0)
+ consistent_io = _gf_false;
+ priv->consistent_io = consistent_io;
+
+ afr_handle_anon_inode_options(priv, options);
+
+ GF_OPTION_RECONF("use-anonymous-inode", priv->use_anon_inode, options, bool,
+ out);
+ if (priv->shd.enabled) {
+ if ((priv->shd.enabled != enabled_old) ||
+ (timeout_old != priv->shd.timeout))
+ afr_selfheal_childup(this, priv);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
- this->private = GF_CALLOC (1, sizeof (afr_private_t),
- gf_afr_mt_afr_private_t);
- if (!this->private)
- goto out;
+static int
+afr_pending_xattrs_init(afr_private_t *priv, xlator_t *this)
+{
+ int ret = -1;
+ int i = 0;
+ char *ptr = NULL;
+ char *ptr1 = NULL;
+ char *xattrs_list = NULL;
+ xlator_list_t *trav = NULL;
+ int child_count = -1;
+
+ trav = this->children;
+ child_count = priv->child_count;
+ if (priv->thin_arbiter_count) {
+ /* priv->pending_key[THIN_ARBITER_BRICK_INDEX] is used as the
+ * name of the thin arbiter file for persistence across add/
+ * removal of DHT subvols.*/
+ child_count++;
+ }
+
+ GF_OPTION_INIT("afr-pending-xattr", xattrs_list, str, out);
+ priv->pending_key = GF_CALLOC(sizeof(*priv->pending_key), child_count,
+ gf_afr_mt_char);
+ if (!priv->pending_key) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (!xattrs_list) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_NO_CHANGELOG,
+ "Unable to fetch afr-pending-xattr option from volfile."
+ " Falling back to using client translator names. ");
- priv = this->private;
- LOCK_INIT (&priv->lock);
- LOCK_INIT (&priv->read_child_lock);
- //lock recovery is not done in afr
- pthread_mutex_init (&priv->mutex, NULL);
- INIT_LIST_HEAD (&priv->saved_fds);
+ while (i < child_count) {
+ ret = gf_asprintf(&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
+ trav->xlator->name);
+ if (ret == -1) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ trav = trav->next;
+ i++;
+ }
+ ret = 0;
+ goto out;
+ }
- child_count = xlator_subvolume_count (this);
+ ptr = ptr1 = gf_strdup(xattrs_list);
+ if (!ptr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0, ptr = strtok(ptr, ","); ptr; ptr = strtok(NULL, ",")) {
+ ret = gf_asprintf(&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
+ ptr);
+ if (ret == -1) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ i++;
+ }
+ ret = 0;
- priv->child_count = child_count;
+out:
+ GF_FREE(ptr1);
+ return ret;
+}
- priv->read_child = -1;
+void
+afr_ta_init(afr_private_t *priv)
+{
+ priv->thin_arbiter_count = 1;
+ priv->child_count--;
+ priv->ta_child_up = 0;
+ priv->ta_bad_child_index = AFR_CHILD_UNKNOWN;
+ priv->ta_notify_dom_lock_offset = 0;
+ priv->ta_in_mem_txn_count = 0;
+ priv->ta_on_wire_txn_count = 0;
+ priv->release_ta_notify_dom_lock = _gf_false;
+ INIT_LIST_HEAD(&priv->ta_waitq);
+ INIT_LIST_HEAD(&priv->ta_onwireq);
+ gf_uuid_clear(priv->ta_gfid);
+}
- GF_OPTION_INIT ("read-subvolume", read_subvol, xlator, out);
- if (read_subvol) {
- priv->read_child = xlator_subvolume_index (this, read_subvol);
- if (priv->read_child == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
- read_subvol->name);
- goto out;
- }
+int32_t
+init(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
+ int child_count = 0;
+ xlator_list_t *trav = NULL;
+ int i = 0;
+ int ret = -1;
+ GF_UNUSED int op_errno = 0;
+ xlator_t *read_subvol = NULL;
+ int read_subvol_index = -1;
+ char *qtype = NULL;
+ char *fav_child_policy = NULL;
+ char *thin_arbiter = NULL;
+ char *data_self_heal = NULL;
+ char *locking_scheme = NULL;
+ char *data_self_heal_algorithm = NULL;
+
+ if (!this->children) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_CHILD_MISCONFIGURED,
+ "replicate translator needs more than one "
+ "subvolume defined.");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_VOL_MISCONFIGURED,
+ "Volume is dangling.");
+ }
+
+ this->private = GF_CALLOC(1, sizeof(afr_private_t),
+ gf_afr_mt_afr_private_t);
+ if (!this->private)
+ goto out;
+
+ priv = this->private;
+ INIT_LIST_HEAD(&priv->saved_locks);
+ INIT_LIST_HEAD(&priv->lk_healq);
+ LOCK_INIT(&priv->lock);
+
+ child_count = xlator_subvolume_count(this);
+
+ priv->child_count = child_count;
+
+ priv->read_child = -1;
+
+ GF_OPTION_INIT("arbiter-count", priv->arbiter_count, uint32, out);
+ GF_OPTION_INIT("thin-arbiter", thin_arbiter, str, out);
+ if (thin_arbiter && strlen(thin_arbiter) > 0) {
+ afr_ta_init(priv);
+ }
+ INIT_LIST_HEAD(&priv->healing);
+ INIT_LIST_HEAD(&priv->heal_waiting);
+
+ priv->spb_choice_timeout = AFR_DEFAULT_SPB_CHOICE_TIMEOUT;
+
+ GF_OPTION_INIT("afr-dirty-xattr", priv->afr_dirty, str, out);
+
+ GF_OPTION_INIT("metadata-splitbrain-forced-heal",
+ priv->metadata_splitbrain_forced_heal, bool, out);
+
+ GF_OPTION_INIT("read-subvolume", read_subvol, xlator, out);
+ if (read_subvol) {
+ priv->read_child = xlator_subvolume_index(this, read_subvol);
+ if (priv->read_child == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "%s not a subvolume", read_subvol->name);
+ goto out;
}
- GF_OPTION_INIT ("choose-local", priv->choose_local, bool, out);
-
- GF_OPTION_INIT ("read-hash-mode", priv->hash_mode, uint32, out);
-
- priv->favorite_child = -1;
- GF_OPTION_INIT ("favorite-child", fav_child, xlator, out);
- if (fav_child) {
- priv->favorite_child = xlator_subvolume_index (this, fav_child);
- if (priv->favorite_child == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s not a subvolume",
- fav_child->name);
- goto out;
- }
- gf_log (this->name, GF_LOG_WARNING,
- favorite_child_warning_str, fav_child->name,
- fav_child->name, fav_child->name);
+ }
+ GF_OPTION_INIT("read-subvolume-index", read_subvol_index, int32, out);
+ if (read_subvol_index > -1) {
+ if (read_subvol_index >= priv->child_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, AFR_MSG_INVALID_SUBVOL,
+ "%d not a subvolume-index", read_subvol_index);
+ goto out;
}
+ priv->read_child = read_subvol_index;
+ }
+ GF_OPTION_INIT("choose-local", priv->choose_local, bool, out);
+ priv->pending_reads = GF_CALLOC(sizeof(*priv->pending_reads),
+ priv->child_count, gf_afr_mt_atomic_t);
- GF_OPTION_INIT ("background-self-heal-count",
- priv->background_self_heal_count, uint32, out);
+ GF_OPTION_INIT("read-hash-mode", priv->hash_mode, uint32, out);
- GF_OPTION_INIT ("data-self-heal", priv->data_self_heal, str, out);
+ priv->favorite_child = -1;
- GF_OPTION_INIT ("data-self-heal-algorithm",
- priv->data_self_heal_algorithm, str, out);
+ GF_OPTION_INIT("favorite-child-policy", fav_child_policy, str, out);
+ if (afr_set_favorite_child_policy(priv, fav_child_policy) == -1)
+ goto out;
- GF_OPTION_INIT ("data-self-heal-window-size",
- priv->data_self_heal_window_size, uint32, out);
+ GF_OPTION_INIT("shd-max-threads", priv->shd.max_threads, uint32, out);
- GF_OPTION_INIT ("metadata-self-heal", priv->metadata_self_heal, bool,
- out);
+ GF_OPTION_INIT("shd-wait-qlength", priv->shd.wait_qlength, uint32, out);
- GF_OPTION_INIT ("entry-self-heal", priv->entry_self_heal, bool, out);
+ GF_OPTION_INIT("background-self-heal-count",
+ priv->background_self_heal_count, uint32, out);
- GF_OPTION_INIT ("self-heal-daemon", priv->shd.enabled, bool, out);
+ GF_OPTION_INIT("heal-wait-queue-length", priv->heal_wait_qlen, uint32, out);
- GF_OPTION_INIT ("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
+ GF_OPTION_INIT("data-self-heal", data_self_heal, str, out);
+ if (gf_string2boolean(data_self_heal, &priv->data_self_heal) == -1)
+ goto out;
- GF_OPTION_INIT ("data-change-log", priv->data_change_log, bool, out);
+ GF_OPTION_INIT("data-self-heal-algorithm", data_self_heal_algorithm, str,
+ out);
+ set_data_self_heal_algorithm(priv, data_self_heal_algorithm);
- GF_OPTION_INIT ("metadata-change-log", priv->metadata_change_log, bool,
- out);
+ GF_OPTION_INIT("data-self-heal-window-size",
+ priv->data_self_heal_window_size, uint32, out);
- GF_OPTION_INIT ("entry-change-log", priv->entry_change_log, bool, out);
+ GF_OPTION_INIT("metadata-self-heal", priv->metadata_self_heal, bool, out);
- GF_OPTION_INIT ("optimistic-change-log", priv->optimistic_change_log,
- bool, out);
+ GF_OPTION_INIT("entry-self-heal", priv->entry_self_heal, bool, out);
- GF_OPTION_INIT ("inodelk-trace", priv->inodelk_trace, bool, out);
+ GF_OPTION_INIT("halo-shd-max-latency", priv->shd.halo_max_latency_msec,
+ uint32, out);
- GF_OPTION_INIT ("entrylk-trace", priv->entrylk_trace, bool, out);
+ GF_OPTION_INIT("halo-max-latency", priv->halo_max_latency_msec, uint32,
+ out);
+ GF_OPTION_INIT("halo-max-replicas", priv->halo_max_replicas, uint32, out);
+ GF_OPTION_INIT("halo-min-replicas", priv->halo_min_replicas, uint32, out);
- GF_OPTION_INIT ("strict-readdir", priv->strict_readdir, bool, out);
+ GF_OPTION_INIT("halo-enabled", priv->halo_enabled, bool, out);
- GF_OPTION_INIT ("eager-lock", priv->eager_lock, bool, out);
- GF_OPTION_INIT ("quorum-type", qtype, str, out);
- GF_OPTION_INIT ("quorum-count", priv->quorum_count, uint32, out);
- fix_quorum_options(this,priv,qtype);
+ GF_OPTION_INIT("halo-nfsd-max-latency", priv->nfsd.halo_max_latency_msec,
+ uint32, out);
- priv->wait_count = 1;
+ GF_OPTION_INIT("iam-nfs-daemon", priv->nfsd.iamnfsd, bool, out);
- priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
- gf_afr_mt_char);
- if (!priv->child_up) {
- ret = -ENOMEM;
- goto out;
- }
+ GF_OPTION_INIT("optimistic-change-log", priv->optimistic_change_log, bool,
+ out);
- for (i = 0; i < child_count; i++)
- priv->child_up[i] = -1; /* start with unknown state.
- this initialization needed
- for afr_notify() to work
- reliably
- */
+ GF_OPTION_INIT("pre-op-compat", priv->pre_op_compat, bool, out);
+ GF_OPTION_INIT("locking-scheme", locking_scheme, str, out);
+ priv->granular_locks = (strcmp(locking_scheme, "granular") == 0);
+ GF_OPTION_INIT("full-lock", priv->full_lock, bool, out);
+ GF_OPTION_INIT("granular-entry-heal", priv->esh_granular, bool, out);
- priv->children = GF_CALLOC (sizeof (xlator_t *), child_count,
- gf_afr_mt_xlator_t);
- if (!priv->children) {
- ret = -ENOMEM;
- goto out;
- }
+ GF_OPTION_INIT("eager-lock", priv->eager_lock, bool, out);
+ GF_OPTION_INIT("quorum-type", qtype, str, out);
+ GF_OPTION_INIT("quorum-count", priv->quorum_count, uint32, out);
+ GF_OPTION_INIT(AFR_SH_READDIR_SIZE_KEY, priv->sh_readdir_size, size_uint64,
+ out);
+ fix_quorum_options(this, priv, qtype, this->options);
- priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key),
- child_count,
- gf_afr_mt_char);
- if (!priv->pending_key) {
- ret = -ENOMEM;
- goto out;
- }
+ GF_OPTION_INIT("post-op-delay-secs", priv->post_op_delay_secs, uint32, out);
+ GF_OPTION_INIT("ensure-durability", priv->ensure_durability, bool, out);
- trav = this->children;
- i = 0;
- while (i < child_count) {
- priv->children[i] = trav->xlator;
-
- ret = gf_asprintf (&priv->pending_key[i], "%s.%s",
- AFR_XATTR_PREFIX,
- trav->xlator->name);
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed to set pending key");
- ret = -ENOMEM;
- goto out;
- }
-
- trav = trav->next;
- i++;
- }
+ GF_OPTION_INIT("self-heal-daemon", priv->shd.enabled, bool, out);
- priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
- gf_afr_mt_int32_t);
- if (!priv->last_event) {
- ret = -ENOMEM;
- goto out;
- }
+ GF_OPTION_INIT("iam-self-heal-daemon", priv->shd.iamshd, bool, out);
+ GF_OPTION_INIT("heal-timeout", priv->shd.timeout, int32, out);
- /* keep more local here as we may need them for self-heal etc */
- this->local_pool = mem_pool_new (afr_local_t, 512);
- if (!this->local_pool) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto out;
- }
+ GF_OPTION_INIT("consistent-metadata", priv->consistent_metadata, bool, out);
+ GF_OPTION_INIT("consistent-io", priv->consistent_io, bool, out);
+ afr_handle_anon_inode_options(priv, this->options);
- priv->first_lookup = 1;
- priv->root_inode = NULL;
+ GF_OPTION_INIT("use-anonymous-inode", priv->use_anon_inode, bool, out);
+ if (priv->quorum_count != 0)
+ priv->consistent_io = _gf_false;
- if (!priv->shd.iamshd) {
- ret = 0;
- goto out;
- }
+ priv->wait_count = 1;
+ priv->local = GF_CALLOC(sizeof(unsigned char), child_count, gf_afr_mt_char);
+ if (!priv->local) {
ret = -ENOMEM;
- priv->shd.pos = GF_CALLOC (sizeof (*priv->shd.pos), child_count,
- gf_afr_mt_brick_pos_t);
- if (!priv->shd.pos)
- goto out;
+ goto out;
+ }
- priv->shd.pending = GF_CALLOC (sizeof (*priv->shd.pending), child_count,
- gf_afr_mt_int32_t);
- if (!priv->shd.pending)
- goto out;
+ priv->anon_inode = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
- priv->shd.inprogress = GF_CALLOC (sizeof (*priv->shd.inprogress),
- child_count, gf_afr_mt_shd_bool_t);
- if (!priv->shd.inprogress)
- goto out;
- priv->shd.timer = GF_CALLOC (sizeof (*priv->shd.timer), child_count,
- gf_afr_mt_shd_timer_t);
- if (!priv->shd.timer)
- goto out;
+ priv->child_up = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
- priv->shd.healed = eh_new (AFR_EH_HEALED_LIMIT, _gf_false);
- if (!priv->shd.healed)
- goto out;
+ priv->child_latency = GF_MALLOC(sizeof(*priv->child_latency) * child_count,
+ gf_afr_mt_child_latency_t);
+ priv->halo_child_up = GF_CALLOC(sizeof(unsigned char), child_count,
+ gf_afr_mt_char);
- priv->shd.heal_failed = eh_new (AFR_EH_HEAL_FAIL_LIMIT, _gf_false);
- if (!priv->shd.heal_failed)
- goto out;
+ if (!priv->child_up || !priv->child_latency || !priv->halo_child_up ||
+ !priv->anon_inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ /*Initialize to -ve ping timeout so that they are not considered
+ * in child-up events until ping-event comes*/
+ for (i = 0; i < child_count; i++)
+ priv->child_latency[i] = -1;
+
+ priv->children = GF_CALLOC(sizeof(xlator_t *), child_count,
+ gf_afr_mt_xlator_t);
+ if (!priv->children) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = afr_pending_xattrs_init(priv, this);
+ if (ret)
+ goto out;
+
+ trav = this->children;
+ i = 0;
+ while (i < child_count) {
+ priv->children[i] = trav->xlator;
+ trav = trav->next;
+ i++;
+ }
+
+ ret = gf_asprintf(&priv->sh_domain, AFR_SH_DATA_DOMAIN_FMT, this->name);
+ if (-1 == ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
- priv->shd.split_brain = eh_new (AFR_EH_SPLIT_BRAIN_LIMIT, _gf_false);
- if (!priv->shd.split_brain)
- goto out;
+ priv->last_event = GF_CALLOC(child_count, sizeof(*priv->last_event),
+ gf_afr_mt_int32_t);
+ if (!priv->last_event) {
+ ret = -ENOMEM;
+ goto out;
+ }
- this->itable = inode_table_new (SHD_INODE_LRU_LIMIT, this);
- if (!this->itable)
- goto out;
- priv->root_inode = inode_ref (this->itable->root);
- GF_OPTION_INIT ("node-uuid", priv->shd.node_uuid, str, out);
- GF_OPTION_INIT ("heal-timeout", priv->shd.timeout, int32, out);
+ this->itable = inode_table_new(SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (priv->shd.iamshd) {
+ ret = afr_selfheal_daemon_init(this);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
- ret = 0;
+ /* keep more local here as we may need them for self-heal etc */
+ this->local_pool = mem_pool_new(afr_local_t, 512);
+ if (!this->local_pool) {
+ ret = -1;
+ goto out;
+ }
+
+ priv->root_inode = NULL;
+
+ ret = 0;
out:
- return ret;
+ return ret;
+}
+void
+afr_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+{
+ int ret = -1;
+
+ if (!healer)
+ return;
+
+ if (healer->running) {
+ /*
+ * If there are any resources to cleanup, We need
+ * to do that gracefully using pthread_cleanup_push
+ */
+ ret = gf_thread_cleanup_xint(healer->thread);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, AFR_MSG_SELF_HEAL_FAILED,
+ "Failed to clean up healer threads.");
+ healer->thread = 0;
+ }
+ pthread_cond_destroy(&healer->cond);
+ pthread_mutex_destroy(&healer->mutex);
}
-
-int
-fini (xlator_t *this)
+void
+afr_selfheal_daemon_fini(xlator_t *this)
{
- afr_private_t *priv = NULL;
+ struct subvol_healer *healer = NULL;
+ afr_self_heald_t *shd = NULL;
+ afr_private_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ shd = &priv->shd;
+ if (!shd->iamshd)
+ return;
+
+ for (i = 0; i < priv->child_count; i++) {
+ healer = &shd->index_healers[i];
+ afr_destroy_healer_object(this, healer);
+
+ healer = &shd->full_healers[i];
+ afr_destroy_healer_object(this, healer);
+
+ if (shd->statistics[i])
+ eh_destroy(shd->statistics[i]);
+ }
+ GF_FREE(shd->index_healers);
+ GF_FREE(shd->full_healers);
+ GF_FREE(shd->statistics);
+ if (shd->split_brain)
+ eh_destroy(shd->split_brain);
+}
+void
+fini(xlator_t *this)
+{
+ afr_private_t *priv = NULL;
- priv = this->private;
- this->private = NULL;
- afr_priv_destroy (priv);
- if (this->itable);//I dont see any destroy func
+ priv = this->private;
- return 0;
-}
+ afr_selfheal_daemon_fini(this);
+ GF_ASSERT(list_empty(&priv->saved_locks));
+ LOCK(&priv->lock);
+ if (priv->timer != NULL) {
+ gf_timer_call_cancel(this->ctx, priv->timer);
+ priv->timer = NULL;
+ }
+ UNLOCK(&priv->lock);
+
+ if (this->local_pool != NULL) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
+
+ this->private = NULL;
+ afr_priv_destroy(priv);
+ if (this->itable) {
+ inode_table_destroy(this->itable);
+ this->itable = NULL;
+ }
+
+ return;
+}
struct xlator_fops fops = {
- .lookup = afr_lookup,
- .open = afr_open,
- .lk = afr_lk,
- .flush = afr_flush,
- .statfs = afr_statfs,
- .fsync = afr_fsync,
- .fsyncdir = afr_fsyncdir,
- .xattrop = afr_xattrop,
- .fxattrop = afr_fxattrop,
- .inodelk = afr_inodelk,
- .finodelk = afr_finodelk,
- .entrylk = afr_entrylk,
- .fentrylk = afr_fentrylk,
-
- /* inode read */
- .access = afr_access,
- .stat = afr_stat,
- .fstat = afr_fstat,
- .readlink = afr_readlink,
- .getxattr = afr_getxattr,
- .fgetxattr = afr_fgetxattr,
- .readv = afr_readv,
-
- /* inode write */
- .writev = afr_writev,
- .truncate = afr_truncate,
- .ftruncate = afr_ftruncate,
- .setxattr = afr_setxattr,
- .fsetxattr = afr_fsetxattr,
- .setattr = afr_setattr,
- .fsetattr = afr_fsetattr,
- .removexattr = afr_removexattr,
- .fremovexattr = afr_fremovexattr,
-
- /* dir read */
- .opendir = afr_opendir,
- .readdir = afr_readdir,
- .readdirp = afr_readdirp,
-
- /* dir write */
- .create = afr_create,
- .mknod = afr_mknod,
- .mkdir = afr_mkdir,
- .unlink = afr_unlink,
- .rmdir = afr_rmdir,
- .link = afr_link,
- .symlink = afr_symlink,
- .rename = afr_rename,
+ .lookup = afr_lookup,
+ .lk = afr_lk,
+ .flush = afr_flush,
+ .statfs = afr_statfs,
+ .fsyncdir = afr_fsyncdir,
+ .inodelk = afr_inodelk,
+ .finodelk = afr_finodelk,
+ .entrylk = afr_entrylk,
+ .fentrylk = afr_fentrylk,
+ .ipc = afr_ipc,
+ .lease = afr_lease,
+
+ /* inode read */
+ .access = afr_access,
+ .stat = afr_stat,
+ .fstat = afr_fstat,
+ .readlink = afr_readlink,
+ .getxattr = afr_getxattr,
+ .fgetxattr = afr_fgetxattr,
+ .readv = afr_readv,
+ .seek = afr_seek,
+
+ /* inode write */
+ .writev = afr_writev,
+ .truncate = afr_truncate,
+ .ftruncate = afr_ftruncate,
+ .setxattr = afr_setxattr,
+ .fsetxattr = afr_fsetxattr,
+ .setattr = afr_setattr,
+ .fsetattr = afr_fsetattr,
+ .removexattr = afr_removexattr,
+ .fremovexattr = afr_fremovexattr,
+ .fallocate = afr_fallocate,
+ .discard = afr_discard,
+ .zerofill = afr_zerofill,
+ .xattrop = afr_xattrop,
+ .fxattrop = afr_fxattrop,
+ .fsync = afr_fsync,
+
+ /*inode open*/
+ .opendir = afr_opendir,
+ .open = afr_open,
+
+ /* dir read */
+ .readdir = afr_readdir,
+ .readdirp = afr_readdirp,
+
+ /* dir write */
+ .create = afr_create,
+ .mknod = afr_mknod,
+ .mkdir = afr_mkdir,
+ .unlink = afr_unlink,
+ .rmdir = afr_rmdir,
+ .link = afr_link,
+ .symlink = afr_symlink,
+ .rename = afr_rename,
};
-
struct xlator_dumpops dumpops = {
- .priv = afr_priv_dump,
+ .priv = afr_priv_dump,
};
-
struct xlator_cbks cbks = {
- .release = afr_release,
- .releasedir = afr_releasedir,
- .forget = afr_forget,
+ .release = afr_release,
+ .releasedir = afr_releasedir,
+ .forget = afr_forget,
};
-
struct volume_options options[] = {
- { .key = {"read-subvolume" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"read-hash-mode" },
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .max = 2,
- .default_value = "0",
- .description = "0 = first responder, "
- "1 = hash by GFID (all clients use same subvolume), "
- "2 = hash by GFID and client PID",
- },
- { .key = {"choose-local" },
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "true",
- .description = "Choose a local subvolume to read from if "
- "read-subvolume is not explicitly set.",
- },
- { .key = {"favorite-child"},
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"background-self-heal-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .default_value = "16",
- .validate = GF_OPT_VALIDATE_MIN,
- },
- { .key = {"data-self-heal"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "",
- .value = {"1", "on", "yes", "true", "enable",
- "0", "off", "no", "false", "disable",
- "open"},
- .default_value = "on",
- },
- { .key = {"data-self-heal-algorithm"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "",
- .description = "Select between \"full\", \"diff\". The "
- "\"full\" algorithm copies the entire file from "
- "source to sink. The \"diff\" algorithm copies to "
- "sink only those blocks whose checksums don't match "
- "with those of source.",
- .value = { "diff", "full", "" }
- },
- { .key = {"data-self-heal-window-size"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 1024,
- .default_value = "1",
- .description = "Maximum number blocks per file for which self-heal "
- "process would be applied simultaneously."
- },
- { .key = {"metadata-self-heal"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"entry-self-heal"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"data-change-log"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"metadata-change-log"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"entry-change-log"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"optimistic-change-log"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"strict-readdir"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"inodelk-trace"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"entrylk-trace"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"eager-lock"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"self-heal-daemon"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"iam-self-heal-daemon"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"quorum-type"},
- .type = GF_OPTION_TYPE_STR,
- .value = { "none", "auto", "fixed", "" },
- .default_value = "none",
- .description = "If value is \"fixed\" only allow writes if "
- "quorum-count bricks are present. If value is "
- "\"auto\" only allow writes if more than half of "
- "bricks, or exactly half including the first, are "
- "present.",
- },
- { .key = {"quorum-count"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = INT_MAX,
- .default_value = 0,
- .description = "If quorum-type is \"fixed\" only allow writes if "
- "this many bricks or present. Other quorum types "
- "will OVERWRITE this value.",
- },
- { .key = {"node-uuid"},
- .type = GF_OPTION_TYPE_STR,
- .description = "Local glusterd uuid string",
- },
- { .key = {"heal-timeout"},
- .type = GF_OPTION_TYPE_INT,
- .min = 60,
- .max = INT_MAX,
- .default_value = "600",
- .description = "Poll timeout for checking the need to self-heal"
- },
- { .key = {NULL} },
+ {.key = {"read-subvolume"},
+ .type = GF_OPTION_TYPE_XLATOR,
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. Afr will prefer the one specified using "
+ "this option if it is not stale. Option value must be "
+ "one of the xlator names of the children. "
+ "Ex: <volname>-client-0 till "
+ "<volname>-client-<number-of-bricks - 1>"},
+ {.key = {"read-subvolume-index"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "-1",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one specified using "
+ "this option if it is not stale. allowed options"
+ " include -1 till replica-count - 1"},
+ {.key = {"read-hash-mode"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 5,
+ .default_value = "1",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description =
+ "inode-read fops happen only on one of the bricks in "
+ "replicate. AFR will prefer the one computed using "
+ "the method specified using this option.\n"
+ "0 = first readable child of AFR, starting from 1st child.\n"
+ "1 = hash by GFID of file (all clients use "
+ "same subvolume).\n"
+ "2 = hash by GFID of file and client PID.\n"
+ "3 = brick having the least outstanding read requests.\n"
+ "4 = brick having the least network ping latency.\n"
+ "5 = Hybrid mode between 3 and 4, ie least value among "
+ "network-latency multiplied by outstanding-read-requests."},
+ {
+ .key = {"choose-local"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Choose a local subvolume (i.e. Brick) to read from"
+ " if read-subvolume is not explicitly set.",
+ },
+ {.key = {"background-self-heal-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 256,
+ .default_value = "8",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This specifies the number of per client self-heal "
+ "jobs that can perform parallel heals in the "
+ "background."},
+ {.key = {"halo-shd-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "99999",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "Maximum latency for shd halo replication in msec."},
+ {.key = {"halo-enabled"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "False",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "Enable Halo (geo) replication mode."},
+ {.key = {"halo-nfsd-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "5",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "Maximum latency for nfsd halo replication in msec."},
+ {.key = {"halo-max-latency"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = AFR_HALO_MAX_LATENCY,
+ .default_value = "5",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "Maximum latency for halo replication in msec."},
+ {.key = {"halo-max-replicas"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "99999",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "The maximum number of halo replicas; replicas"
+ " beyond this value will be written asynchronously"
+ "via the SHD."},
+ {.key = {"halo-min-replicas"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 99999,
+ .default_value = "2",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate", "halo"},
+ .description = "The minimmum number of halo replicas, before adding "
+ "out of region replicas."},
+ {.key = {"heal-wait-queue-length"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = 10000, /*Around 100MB with sizeof(afr_local_t)= 10496 bytes*/
+ .default_value = "128",
+ .validate = GF_OPT_VALIDATE_MIN,
+ .op_version = {GD_OP_VERSION_3_7_10},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This specifies the number of heals that can be queued"
+ " for the parallel background self heal jobs."},
+ {.key = {"data-self-heal"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"1", "on", "yes", "true", "enable", "0", "off", "no", "false",
+ "disable", "open"},
+ .default_value = "off",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Using this option we can enable/disable data "
+ "self-heal on the file. \"open\" means data "
+ "self-heal action will only be triggered by file "
+ "open operations."},
+ {.key = {"data-self-heal-algorithm"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Select between \"full\", \"diff\". The "
+ "\"full\" algorithm copies the entire file from "
+ "source to sink. The \"diff\" algorithm copies to "
+ "sink only those blocks whose checksums don't match "
+ "with those of source. If no option is configured "
+ "the option is chosen dynamically as follows: "
+ "If the file does not exist on one of the sinks "
+ "or empty file exists or if the source file size is "
+ "about the same as page size the entire file will "
+ "be read and written i.e \"full\" algo, "
+ "otherwise \"diff\" algo is chosen.",
+ .value = {"diff", "full"}},
+ {.key = {"data-self-heal-window-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 1024,
+ .default_value = "1",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Maximum number blocks per file for which self-heal "
+ "process would be applied simultaneously."},
+ {.key = {"metadata-self-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ /*.validate_fn = validate_replica*/
+ .description = "Using this option we can enable/disable metadata "
+ "i.e. Permissions, ownerships, xattrs self-heal on "
+ "the file/directory."},
+ {.key = {"entry-self-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ /*.validate_fn = validate_replica*/
+ .description = "Using this option we can enable/disable entry "
+ "self-heal on the directory."},
+ {.key = {"data-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"},
+ {.key = {"metadata-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"},
+ {.key = {"entry-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"},
+ {.key = {"optimistic-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Entry/Metadata fops will not perform "
+ "pre fop changelog operations in afr transaction "
+ "if this option is enabled."},
+ {.key = {"inodelk-trace"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enabling this option logs inode lock/unlocks"},
+ {.key = {"entrylk-trace"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enabling this option logs entry lock/unlocks"},
+ {.key = {"pre-op-compat"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Use separate pre-op xattrop() FOP rather than "
+ "overloading xdata of the OP"},
+ {.key = {"eager-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description =
+ "Enable/Disable eager lock for replica volume. "
+ "Lock phase of a transaction has two sub-phases. "
+ "First is an attempt to acquire locks in parallel by "
+ "broadcasting non-blocking lock requests. If lock "
+ "acquisition fails on any server, then the held locks "
+ "are unlocked and we revert to a blocking locks mode "
+ "sequentially on one server after another. If this "
+ "option is enabled the initial broadcasting lock "
+ "request attempts to acquire a full lock on the entire file. "
+ "If this fails, we revert back to the sequential "
+ "\"regional\" blocking locks as before. In the case "
+ "where such an \"eager\" lock is granted in the "
+ "non-blocking phase, it gives rise to an opportunity "
+ "for optimization. i.e, if the next write transaction "
+ "on the same FD arrives before the unlock phase of "
+ "the first transaction, it \"takes over\" the full "
+ "file lock. Similarly if yet another data transaction "
+ "arrives before the unlock phase of the \"optimized\" "
+ "transaction, that in turn \"takes over\" the lock as "
+ "well. The actual unlock now happens at the end of "
+ "the last \"optimized\" transaction."
+
+ },
+ {.key = {"self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ /*.validate_fn = validate_replica_heal_enable_disable*/
+ .description = "This option applies to only self-heal-daemon. "
+ "Index directory crawl and automatic healing of files "
+ "will not be performed if this option is turned off."},
+ {.key = {"iam-self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the replicate "
+ "translator is running as part of self-heal-daemon "
+ "or not."},
+ {.key = {"iam-nfs-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the replicate "
+ "translator is running as part of an NFS daemon "
+ "or not."},
+ {
+ .key = {"quorum-type"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"none", "auto", "fixed"},
+ .default_value = "none",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ /*.option = quorum-type*/
+ .description = "If value is \"fixed\" only allow writes if "
+ "quorum-count bricks are present. If value is "
+ "\"auto\" only allow writes if more than half of "
+ "bricks, or exactly half including the first, are "
+ "present.",
+ },
+ {
+ .key = {"quorum-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = INT_MAX,
+ .default_value = 0,
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ /*.option = quorum-count*/
+ /*.validate_fn = validate_quorum_count*/
+ .description = "If quorum-type is \"fixed\" only allow writes if "
+ "this many bricks are present. Other quorum types "
+ "will OVERWRITE this value.",
+ },
+ {
+ .key = {"quorum-reads"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option has been removed. Reads are not allowed "
+ "if quorum is not met.",
+ },
+ {
+ .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Local glusterd uuid string, used in starting "
+ "self-heal-daemon so that it can crawl only on "
+ "local index directories.",
+ },
+ {
+ .key = {"post-op-delay-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "1",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Time interval induced artificially before "
+ "post-operation phase of the transaction to "
+ "enhance overlap of adjacent write operations.",
+ },
+ {
+ .key = {AFR_SH_READDIR_SIZE_KEY},
+ .type = GF_OPTION_TYPE_SIZET,
+ .description = "readdirp size for performing entry self-heal",
+ .min = 1024,
+ .max = 131072,
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .default_value = "1KB",
+ },
+ {
+ .key = {"ensure-durability"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .op_version = {3},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Afr performs fsyncs for transactions if this "
+ "option is on to make sure the changelogs/data is "
+ "written to the disk",
+ .default_value = "on",
+ },
+ {
+ .key = {"afr-dirty-xattr"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = AFR_DIRTY_DEFAULT,
+ },
+ {.key = {"afr-pending-xattr"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Comma separated list of xattrs that are used to "
+ "capture information on pending heals."},
+ {
+ .key = {"metadata-splitbrain-forced-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ {.key = {"heal-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 5,
+ .max = INT_MAX,
+ .default_value = "600",
+ .op_version = {2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "time interval for checking the need to self-heal "
+ "in self-heal-daemon"},
+ {
+ .key = {"consistent-metadata"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "If this option is enabled, readdirp will force "
+ "lookups on those entries read whose read child is "
+ "not the same as that of the parent. This will "
+ "guarantee that all read operations on a file serve "
+ "attributes from the same subvol as long as it holds "
+ " a good copy of the file/dir.",
+ },
+ {.key = {"arbiter-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .description = "subset of child_count. Has to be 0 or 1."},
+ {
+ .key = {"thin-arbiter"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "contains host:path of thin abriter brick",
+ },
+ {.key = {"shd-max-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 64,
+ .default_value = "1",
+ .op_version = {GD_OP_VERSION_3_7_12},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "Maximum number of parallel heals SHD can do per "
+ "local brick. This can substantially lower heal times"
+ ", but can also crush your bricks if you don't have "
+ "the storage hardware to support this."},
+ {
+ .key = {"shd-wait-qlength"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 655536,
+ .default_value = "1024",
+ .op_version = {GD_OP_VERSION_3_7_12},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option can be used to control number of heals"
+ " that can wait in SHD per subvolume",
+ },
+ {
+ .key = {"locking-scheme"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"full", "granular"},
+ .default_value = "full",
+ .op_version = {GD_OP_VERSION_3_7_12},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "If this option is set to granular, self-heal will "
+ "stop being compatible with afr-v1, which helps afr "
+ "be more granular while self-healing",
+ },
+ {.key = {"full-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "yes",
+ .op_version = {GD_OP_VERSION_3_13_2},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "If this option is disabled, then the IOs will take "
+ "range locks same as versions till 3.13.1."},
+ {
+ .key = {"granular-entry-heal"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_3_8_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "If this option is enabled, self-heal will resort to "
+ "granular way of recording changelogs and doing entry "
+ "self-heal.",
+ },
+ {
+ .key = {"favorite-child-policy"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"none", "size", "ctime", "mtime", "majority"},
+ .default_value = "none",
+ .op_version = {GD_OP_VERSION_3_7_12},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option can be used to automatically resolve "
+ "split-brains using various policies without user "
+ "intervention. \"size\" picks the file with the "
+ "biggest size as the source. \"ctime\" and \"mtime\" "
+ "pick the file with the latest ctime and mtime "
+ "respectively as the source. \"majority\" picks a file"
+ " with identical mtime and size in more than half the "
+ "number of bricks in the replica.",
+ },
+ {
+ .key = {"consistent-io"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .description = "If this option is enabled, i/o will fail even if "
+ "one of the bricks is down in the replicas",
+ },
+ {.key = {"use-compound-fops"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_3_8_4},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"replicate"},
+ .description = "This option exists only for backward compatibility "
+ "and configuring it doesn't have any effect"},
+ {.key = {"use-anonymous-inode"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ .op_version = {GD_OP_VERSION_8_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .tags = {"replicate"},
+ .description = "Setting this option heals directory renames efficiently"},
+
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "replicate",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/cluster/afr/src/afr.h b/xlators/cluster/afr/src/afr.h
index c8e01fcb841..d62f9a9caf2 100644
--- a/xlators/cluster/afr/src/afr.h
+++ b/xlators/cluster/afr/src/afr.h
@@ -8,1058 +8,1416 @@
cases as published by the Free Software Foundation.
*/
-
#ifndef __AFR_H__
#define __AFR_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "call-stub.h"
-#include "compat-errno.h"
+#include <glusterfs/call-stub.h>
+#include <glusterfs/compat-errno.h>
#include "afr-mem-types.h"
-#include "afr-self-heal-algorithm.h"
#include "libxlator.h"
-#include "timer.h"
+#include <glusterfs/timer.h>
+#include <glusterfs/syncop.h>
+
+#include "afr-self-heald.h"
+#include "afr-messages.h"
-#define AFR_XATTR_PREFIX "trusted.afr"
+#define SHD_INODE_LRU_LIMIT 1
#define AFR_PATHINFO_HEADER "REPLICATE:"
+#define AFR_SH_READDIR_SIZE_KEY "self-heal-readdir-size"
+#define AFR_SH_DATA_DOMAIN_FMT "%s:self-heal"
+#define AFR_DIRTY_DEFAULT AFR_XATTR_PREFIX ".dirty"
+#define AFR_DIRTY (((afr_private_t *)(THIS->private))->afr_dirty)
+
+#define AFR_LOCKEE_COUNT_MAX 3
+#define AFR_DOM_COUNT_MAX 3
+#define AFR_NUM_CHANGE_LOGS 3 /*data + metadata + entry*/
+#define AFR_DEFAULT_SPB_CHOICE_TIMEOUT 300 /*in seconds*/
+
+#define ARBITER_BRICK_INDEX 2
+#define THIN_ARBITER_BRICK_INDEX 2
+#define AFR_TA_DOM_NOTIFY "afr.ta.dom-notify"
+#define AFR_TA_DOM_MODIFY "afr.ta.dom-modify"
+
+#define AFR_LK_HEAL_DOM "afr.lock-heal.domain"
+
+#define AFR_HALO_MAX_LATENCY 99999
+#define AFR_ANON_DIR_PREFIX ".glusterfs-anonymous-inode"
+
+#define PFLAG_PENDING (1 << 0)
+#define PFLAG_SBRAIN (1 << 1)
+
+typedef int (*afr_lock_cbk_t)(call_frame_t *frame, xlator_t *this);
+
+typedef int (*afr_read_txn_wind_t)(call_frame_t *frame, xlator_t *this,
+ int subvol);
+
+typedef int (*afr_inode_refresh_cbk_t)(call_frame_t *frame, xlator_t *this,
+ int err);
+
+typedef int (*afr_changelog_resume_t)(call_frame_t *frame, xlator_t *this);
+
+#define AFR_COUNT(array, max) \
+ ({ \
+ int __i; \
+ int __res = 0; \
+ for (__i = 0; __i < max; __i++) \
+ if (array[__i]) \
+ __res++; \
+ __res; \
+ })
+#define AFR_INTERSECT(dst, src1, src2, max) \
+ ({ \
+ int __i; \
+ for (__i = 0; __i < max; __i++) \
+ dst[__i] = src1[__i] && src2[__i]; \
+ })
+#define AFR_CMP(a1, a2, len) \
+ ({ \
+ int __cmp = 0; \
+ int __i; \
+ for (__i = 0; __i < len; __i++) \
+ if (a1[__i] != a2[__i]) { \
+ __cmp = 1; \
+ break; \
+ } \
+ __cmp; \
+ })
+#define AFR_IS_ARBITER_BRICK(priv, index) \
+ ((priv->arbiter_count == 1) && (index == ARBITER_BRICK_INDEX))
+
+#define AFR_SET_ERROR_AND_CHECK_SPLIT_BRAIN(ret, errnum) \
+ do { \
+ local->op_ret = ret; \
+ local->op_errno = errnum; \
+ if (local->op_errno == EIO) \
+ gf_msg(this->name, GF_LOG_ERROR, local->op_errno, \
+ AFR_MSG_SPLIT_BRAIN, \
+ "Failing %s on gfid %s: " \
+ "split-brain observed.", \
+ gf_fop_list[local->op], uuid_utoa(local->inode->gfid)); \
+ } while (0)
+
+#define AFR_ERROR_OUT_IF_FDCTX_INVALID(__fd, __this, __error, __label) \
+ do { \
+ afr_fd_ctx_t *__fd_ctx = NULL; \
+ __fd_ctx = afr_fd_ctx_get(__fd, __this); \
+ if (__fd_ctx && __fd_ctx->is_fd_bad) { \
+ __error = EBADF; \
+ goto __label; \
+ } \
+ } while (0)
-struct _pump_private;
+typedef enum {
+ AFR_READ_POLICY_FIRST_UP,
+ AFR_READ_POLICY_GFID_HASH,
+ AFR_READ_POLICY_GFID_PID_HASH,
+ AFR_READ_POLICY_LESS_LOAD,
+ AFR_READ_POLICY_LEAST_LATENCY,
+ AFR_READ_POLICY_LOAD_LATENCY_HYBRID,
+} afr_read_hash_mode_t;
-typedef int (*afr_expunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
- int child, int32_t op_error,
- int32_t op_errno);
+typedef enum {
+ AFR_FAV_CHILD_NONE,
+ AFR_FAV_CHILD_BY_SIZE,
+ AFR_FAV_CHILD_BY_CTIME,
+ AFR_FAV_CHILD_BY_MTIME,
+ AFR_FAV_CHILD_BY_MAJORITY,
+ AFR_FAV_CHILD_POLICY_MAX,
+} afr_favorite_child_policy;
-typedef int (*afr_impunge_done_cbk_t) (call_frame_t *frame, xlator_t *this,
- int32_t op_error, int32_t op_errno);
-typedef int (*afr_post_remove_call_t) (call_frame_t *frame, xlator_t *this);
+typedef enum {
+ AFR_SELFHEAL_DATA_FULL = 0,
+ AFR_SELFHEAL_DATA_DIFF,
+ AFR_SELFHEAL_DATA_DYNAMIC,
+} afr_data_self_heal_type_t;
-typedef int (*afr_lock_cbk_t) (call_frame_t *frame, xlator_t *this);
-typedef void (*afr_lookup_done_cbk_t) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno);
+typedef enum {
+ AFR_CHILD_UNKNOWN = -1,
+ AFR_CHILD_ZERO,
+ AFR_CHILD_ONE,
+ AFR_CHILD_THIN_ARBITER,
+} afr_child_index;
typedef enum {
- AFR_POS_UNKNOWN,
- AFR_POS_LOCAL,
- AFR_POS_REMOTE
-} afr_child_pos_t;
+ TA_WAIT_FOR_NOTIFY_LOCK_REL, /*FOP came after notify domain lock upcall
+ notification and waiting for its release.*/
+ TA_GET_INFO_FROM_TA_FILE, /*FOP needs post-op on ta file to get
+ *info about which brick is bad.*/
+ TA_INFO_IN_MEMORY_SUCCESS, /*Bad brick info is in memory and fop failed
+ *on BAD brick - Success*/
+ TA_INFO_IN_MEMORY_FAILED, /*Bad brick info is in memory and fop failed
+ *on GOOD brick - Failed*/
+ TA_SUCCESS, /*FOP succeeded on both data bricks.*/
+} afr_ta_fop_state_t;
+
+struct afr_nfsd {
+ uint32_t halo_max_latency_msec;
+ gf_boolean_t iamnfsd;
+};
+
+typedef struct _afr_lk_heal_info {
+ fd_t *fd;
+ int32_t cmd;
+ struct gf_flock flock;
+ dict_t *xdata_req;
+ unsigned char *locked_nodes;
+ struct list_head pos;
+ gf_lkowner_t lk_owner;
+ pid_t pid;
+ int32_t *child_up_event_gen;
+ int32_t *child_down_event_gen;
+} afr_lk_heal_info_t;
+
+typedef struct _afr_private {
+ gf_lock_t lock; /* to guard access to child_count, etc */
+ unsigned int child_count; /* total number of children */
+ unsigned int arbiter_count; /*subset of child_count.
+ Has to be 0 or 1.*/
+
+ xlator_t **children;
+
+ inode_t *root_inode;
+
+ int favorite_child; /* subvolume to be preferred in resolving
+ split-brain cases */
+ /* For thin-arbiter. */
+ uuid_t ta_gfid;
+ unsigned int thin_arbiter_count; /* 0 or 1 at the moment.*/
+ int ta_bad_child_index;
+ int ta_event_gen;
+ unsigned int ta_in_mem_txn_count;
+ unsigned int ta_on_wire_txn_count;
+ struct list_head ta_waitq;
+ struct list_head ta_onwireq;
+
+ unsigned char *anon_inode;
+ unsigned char *child_up;
+ unsigned char *halo_child_up;
+ int64_t *child_latency;
+ unsigned char *local;
+
+ char **pending_key;
+
+ afr_data_self_heal_type_t data_self_heal_algorithm;
+ unsigned int data_self_heal_window_size; /* max number of pipelined
+ read/writes */
+
+ struct list_head heal_waiting; /*queue for files that need heal*/
+ uint32_t heal_wait_qlen; /*configurable queue length for heal_waiting*/
+ int32_t heal_waiters; /* No. of elements currently in wait queue.*/
+
+ struct list_head healing; /* queue for files that are undergoing
+ background heal*/
+ uint32_t background_self_heal_count; /*configurable queue length for
+ healing queue*/
+ int32_t healers; /* No. of elements currently undergoing background
+ heal*/
+
+ gf_boolean_t release_ta_notify_dom_lock;
+
+ gf_boolean_t metadata_self_heal; /* on/off */
+ gf_boolean_t entry_self_heal; /* on/off */
+
+ gf_boolean_t metadata_splitbrain_forced_heal; /* on/off */
+ int read_child; /* read-subvolume */
+ gf_atomic_t *pending_reads; /*No. of pending read cbks per child.*/
+
+ gf_timer_t *timer; /* launched when parent up is received */
+
+ unsigned int wait_count; /* # of servers to wait for success */
+
+ unsigned char ta_child_up;
+ gf_boolean_t optimistic_change_log;
+ gf_boolean_t eager_lock;
+ gf_boolean_t pre_op_compat; /* on/off */
+ uint32_t post_op_delay_secs;
+ unsigned int quorum_count;
+
+ off_t ta_notify_dom_lock_offset;
+ afr_favorite_child_policy fav_child_policy; /*Policy to use for automatic
+ resolution of split-brains.*/
+ afr_read_hash_mode_t hash_mode; /* for when read_child is not set */
+
+ int32_t *last_event;
+
+ /* @event_generation: Keeps count of number of events received which can
+ potentially impact consistency decisions. The events are CHILD_UP
+ and CHILD_DOWN, when we have to recalculate the freshness/staleness
+ of copies to detect if changes had happened while the other server
+ was down. CHILD_DOWN and CHILD_UP can also be received on network
+ disconnect/reconnects and not necessarily server going down/up.
+ Recalculating freshness/staleness on network events is equally
+ important as we might have had a network split brain.
+ */
+ uint32_t event_generation;
+ char vol_uuid[UUID_SIZE + 1];
+
+ gf_boolean_t choose_local;
+ gf_boolean_t did_discovery;
+ gf_boolean_t ensure_durability;
+ gf_boolean_t halo_enabled;
+ gf_boolean_t consistent_metadata;
+ gf_boolean_t need_heal;
+ gf_boolean_t granular_locks;
+ uint64_t sh_readdir_size;
+ char *sh_domain;
+ char *afr_dirty;
+
+ uint64_t spb_choice_timeout;
+
+ afr_self_heald_t shd;
+ struct afr_nfsd nfsd;
+
+ uint32_t halo_max_latency_msec;
+ uint32_t halo_max_replicas;
+ uint32_t halo_min_replicas;
+
+ gf_boolean_t full_lock;
+ gf_boolean_t esh_granular;
+ gf_boolean_t consistent_io;
+ gf_boolean_t data_self_heal; /* on/off */
+ gf_boolean_t use_anon_inode;
+
+ /*For lock healing.*/
+ struct list_head saved_locks;
+ struct list_head lk_healq;
+
+ /*For anon-inode handling */
+ char anon_inode_name[NAME_MAX + 1];
+ char anon_gfid_str[UUID_SIZE + 1];
+} afr_private_t;
typedef enum {
- SPLIT_BRAIN = 1,
- ALL_FOOLS = 2
-} afr_subvol_status_t;
+ AFR_DATA_TRANSACTION, /* truncate, write, ... */
+ AFR_METADATA_TRANSACTION, /* chmod, chown, ... */
+ AFR_ENTRY_TRANSACTION, /* create, rmdir, ... */
+ AFR_ENTRY_RENAME_TRANSACTION, /* rename */
+} afr_transaction_type;
+
+/*
+ xattr format: trusted.afr.volume = [x y z]
+ x - data pending
+ y - metadata pending
+ z - entry pending
+*/
+
+static inline int
+afr_index_for_transaction_type(afr_transaction_type type)
+{
+ switch (type) {
+ case AFR_DATA_TRANSACTION:
+ return 0;
+
+ case AFR_METADATA_TRANSACTION:
+ return 1;
+
+ case AFR_ENTRY_TRANSACTION:
+ case AFR_ENTRY_RENAME_TRANSACTION:
+ return 2;
+ }
+
+ return -1; /* make gcc happy */
+}
+
+static inline int
+afr_index_from_ia_type(ia_type_t type)
+{
+ switch (type) {
+ case IA_IFDIR:
+ return afr_index_for_transaction_type(AFR_ENTRY_TRANSACTION);
+ case IA_IFREG:
+ return afr_index_for_transaction_type(AFR_DATA_TRANSACTION);
+ default:
+ return -1;
+ }
+}
+
+typedef struct {
+ struct gf_flock flock;
+ loc_t loc;
+ fd_t *fd;
+ char *basename;
+ unsigned char *locked_nodes;
+ int locked_count;
+
+} afr_lockee_t;
+
+int
+afr_entry_lockee_cmp(const void *l1, const void *l2);
+
+typedef struct {
+ loc_t *lk_loc;
+
+ afr_lockee_t lockee[AFR_LOCKEE_COUNT_MAX];
+
+ const char *lk_basename;
+ const char *lower_basename;
+ const char *higher_basename;
+
+ unsigned char *lower_locked_nodes;
+
+ afr_lock_cbk_t lock_cbk;
+
+ int lockee_count;
+
+ int32_t lk_call_count;
+ int32_t lk_expected_count;
+ int32_t lk_attempted_count;
+
+ int32_t lock_op_ret;
+ int32_t lock_op_errno;
+ char *domain; /* Domain on which inode/entry lock/unlock in progress.*/
+ int32_t lock_count;
+ char lower_locked;
+ char higher_locked;
+} afr_internal_lock_t;
+
+struct afr_reply {
+ int valid;
+ int32_t op_ret;
+ dict_t *xattr; /*For xattrop*/
+ dict_t *xdata;
+ struct iatt poststat;
+ struct iatt postparent;
+ struct iatt prestat;
+ struct iatt preparent;
+ struct iatt preparent2;
+ struct iatt postparent2;
+ int32_t op_errno;
+ /* For rchecksum */
+ uint8_t checksum[SHA256_DIGEST_LENGTH];
+ gf_boolean_t buf_has_zeroes;
+ gf_boolean_t fips_mode_rchecksum;
+ /* For lookup */
+ int8_t need_heal;
+};
typedef enum {
- AFR_INODE_SET_READ_CTX = 1,
- AFR_INODE_RM_STALE_CHILDREN,
- AFR_INODE_SET_OPENDIR_DONE,
- AFR_INODE_SET_SPLIT_BRAIN,
- AFR_INODE_GET_READ_CTX,
- AFR_INODE_GET_OPENDIR_DONE,
- AFR_INODE_GET_SPLIT_BRAIN,
-} afr_inode_op_t;
-
-typedef struct afr_inode_params_ {
- afr_inode_op_t op;
- union {
- gf_boolean_t value;
- struct {
- int32_t read_child;
- int32_t *children;
- } read_ctx;
- } u;
-} afr_inode_params_t;
-
-typedef struct afr_inode_ctx_ {
- uint64_t masks;
- int32_t *fresh_children;//increasing order of latency
-} afr_inode_ctx_t;
+ AFR_FD_NOT_OPENED,
+ AFR_FD_OPENED,
+ AFR_FD_OPENING
+} afr_fd_open_status_t;
+
+typedef struct {
+ afr_fd_open_status_t *opened_on; /* which subvolumes the fd is open on */
+ int flags;
+
+ /* the subvolume on which the latest sequence of readdirs (starting
+ at offset 0) has begun. Till the next readdir request with 0 offset
+ arrives, we continue to read off this subvol.
+ */
+ int readdir_subvol;
+ /* lock-healing related members. */
+ gf_boolean_t is_fd_bad;
+ afr_lk_heal_info_t *lk_heal_info;
+
+} afr_fd_ctx_t;
typedef enum {
- NONE,
- INDEX,
- FULL,
-} afr_crawl_type_t;
-
-typedef struct afr_self_heald_ {
- gf_boolean_t enabled;
- gf_boolean_t iamshd;
- afr_crawl_type_t *pending;
- gf_boolean_t *inprogress;
- afr_child_pos_t *pos;
- gf_timer_t **timer;
- eh_t *healed;
- eh_t *heal_failed;
- eh_t *split_brain;
- char *node_uuid;
- int timeout;
-} afr_self_heald_t;
+ AFR_FOP_LOCK_PARALLEL,
+ AFR_FOP_LOCK_SERIAL,
+ AFR_FOP_LOCK_QUORUM_FAILED,
+} afr_fop_lock_state_t;
+
+typedef struct _afr_inode_lock_t {
+ /* @num_inodelks:
+ Number of inodelks queried from the server, as queried through
+ xdata in FOPs. Currently, used to decide if eager-locking must be
+ temporarily disabled.
+ */
+ int32_t num_inodelks;
+ unsigned int event_generation;
+ gf_timer_t *delay_timer;
+ struct list_head owners; /*Transactions that are performing fop*/
+ struct list_head post_op; /*Transactions that are done with the fop
+ *So can not conflict with the fops*/
+ struct list_head waiting; /*Transaction that are waiting for
+ *conflicting transactions to complete*/
+ struct list_head frozen; /*Transactions that need to go as part of
+ * next batch of eager-lock*/
+ gf_boolean_t release;
+ gf_boolean_t acquired;
+} afr_lock_t;
+
+typedef struct _afr_inode_ctx {
+ uint64_t read_subvol;
+ uint64_t write_subvol;
+ int lock_count;
+ int spb_choice;
+ gf_timer_t *timer;
+ unsigned int *pre_op_done[AFR_NUM_CHANGE_LOGS];
+ int inherited[AFR_NUM_CHANGE_LOGS];
+ int on_disk[AFR_NUM_CHANGE_LOGS];
+ /*Only 2 types of transactions support eager-locks now. DATA/METADATA*/
+ afr_lock_t lock[2];
+
+ /* @open_fd_count:
+ Number of open FDs queried from the server, as queried through
+ xdata in FOPs. Currently, used to decide if eager-locking must be
+ temporarily disabled.
+ */
+ uint32_t open_fd_count;
+ gf_boolean_t need_refresh;
+
+ /* set if any write on this fd was a non stable write
+ (i.e, without O_SYNC or O_DSYNC)
+ */
+ gf_boolean_t witnessed_unstable_write;
+} afr_inode_ctx_t;
-typedef struct _afr_private {
- gf_lock_t lock; /* to guard access to child_count, etc */
- unsigned int child_count; /* total number of children */
+typedef struct _afr_local {
+ glusterfs_fop_t op;
+ unsigned int call_count;
- unsigned int read_child_rr; /* round-robin index of the read_child */
- gf_lock_t read_child_lock; /* lock to protect above */
+ /* @event_generation: copy of priv->event_generation taken at the
+ time of starting the transaction. The copy is made so that we
+ have a stable value through the various phases of the transaction.
+ */
+ unsigned int event_generation;
- xlator_t **children;
+ uint32_t open_fd_count;
+ int32_t num_inodelks;
- int first_lookup;
- inode_t *root_inode;
+ int32_t op_ret;
+ int32_t op_errno;
- unsigned char *child_up;
+ int dirty[AFR_NUM_CHANGE_LOGS];
- char **pending_key;
+ int32_t **pending;
- char *data_self_heal; /* on/off/open */
- char * data_self_heal_algorithm; /* name of algorithm */
- unsigned int data_self_heal_window_size; /* max number of pipelined
- read/writes */
+ loc_t loc;
+ loc_t newloc;
- unsigned int background_self_heal_count;
- unsigned int background_self_heals_started;
- gf_boolean_t metadata_self_heal; /* on/off */
- gf_boolean_t entry_self_heal; /* on/off */
+ fd_t *fd;
+ afr_fd_ctx_t *fd_ctx;
- gf_boolean_t data_change_log; /* on/off */
- gf_boolean_t metadata_change_log; /* on/off */
- gf_boolean_t entry_change_log; /* on/off */
+ /* @child_up: copy of priv->child_up taken at the time of transaction
+ start. The copy is taken so that we have a stable child_up array
+ through the phases of the transaction as priv->child_up[i] can keep
+ changing through time.
+ */
+ unsigned char *child_up;
- int read_child; /* read-subvolume */
- unsigned int hash_mode; /* for when read_child is not set */
- int favorite_child; /* subvolume to be preferred in resolving
- split-brain cases */
+ /* @read_attempted:
+ array of flags representing subvolumes where read operations of
+ the read transaction have already been attempted. The array is
+ first pre-filled with down subvolumes, and as reads are performed
+ on other subvolumes, those are set as well. This way if the read
+ operation fails we do not retry on that subvolume again.
+ */
+ unsigned char *read_attempted;
- gf_boolean_t inodelk_trace;
- gf_boolean_t entrylk_trace;
+ /* @readfn:
- gf_boolean_t strict_readdir;
+ pointer to function which will perform the read operation on a given
+ subvolume. Used in read transactions.
+ */
- unsigned int wait_count; /* # of servers to wait for success */
+ afr_read_txn_wind_t readfn;
- uint64_t up_count; /* number of CHILD_UPs we have seen */
- uint64_t down_count; /* number of CHILD_DOWNs we have seen */
+ /* @inode:
- struct _pump_private *pump_private; /* Set if we are loaded as pump */
- int use_afr_in_pump;
+ the inode on which the read txn is performed on. ref'ed and copied
+ from either fd->inode or loc.inode
+ */
- pthread_mutex_t mutex;
- struct list_head saved_fds; /* list of fds on which locks have succeeded */
- gf_boolean_t optimistic_change_log;
- gf_boolean_t eager_lock;
- unsigned int quorum_count;
+ inode_t *inode;
- char vol_uuid[UUID_SIZE + 1];
- int32_t *last_event;
- afr_self_heald_t shd;
- gf_boolean_t choose_local;
- gf_boolean_t did_discovery;
-} afr_private_t;
+ /* @parent[2]:
-typedef struct {
- /* External interface: These are variables (some optional) that
- are set by whoever has triggered self-heal */
-
- gf_boolean_t do_data_self_heal;
- gf_boolean_t do_metadata_self_heal;
- gf_boolean_t do_entry_self_heal;
- gf_boolean_t do_gfid_self_heal;
- gf_boolean_t do_missing_entry_self_heal;
-
- gf_boolean_t forced_merge; /* Is this a self-heal triggered to
- forcibly merge the directories? */
-
- gf_boolean_t background; /* do self-heal in background
- if possible */
- ia_type_t type; /* st_mode of the entry we're doing
- self-heal on */
- inode_t *inode; /* inode on which the self-heal is
- performed on */
- uuid_t sh_gfid_req; /* gfid self-heal needs to be done
- with this gfid if it is not null */
-
- /* Function to call to unwind. If self-heal is being done in the
- background, this function will be called as soon as possible. */
-
- int (*unwind) (call_frame_t *frame, xlator_t *this, int32_t op_ret,
- int32_t op_errno, int32_t sh_failed);
-
- /* End of external interface members */
-
-
- /* array of stat's, one for each child */
- struct iatt *buf;
- struct iatt *parentbufs;
- struct iatt parentbuf;
- struct iatt entrybuf;
-
- afr_expunge_done_cbk_t expunge_done;
- afr_impunge_done_cbk_t impunge_done;
-
- /* array of xattr's, one for each child */
- dict_t **xattr;
-
- /* array containing if the lookups succeeded in the order of response
- */
- int32_t *success_children;
- int success_count;
- /* array containing the fresh children found in the self-heal process */
- int32_t *fresh_children;
- /* array containing the fresh children found in the parent lookup */
- int32_t *fresh_parent_dirs;
- /* array of errno's, one for each child */
- int *child_errno;
- /*loc used for lookup*/
- loc_t lookup_loc;
- int32_t lookup_flags;
- afr_lookup_done_cbk_t lookup_done;
-
- int32_t **pending_matrix;
- int32_t **delta_matrix;
-
- int32_t op_ret;
- int32_t op_errno;
-
- int *sources;
- int source;
- int active_source;
- int active_sinks;
- unsigned char *success;
- unsigned char *locked_nodes;
- int lock_count;
-
- const char *linkname;
- gf_boolean_t entries_skipped;
-
- int op_failed;
-
- gf_boolean_t sync_done;
- gf_boolean_t data_lock_held;
- gf_boolean_t eof_reached;
- fd_t *healing_fd;
- int file_has_holes;
- blksize_t block_size;
- off_t file_size;
- off_t offset;
- unsigned char *write_needed;
- uint8_t *checksum;
- afr_post_remove_call_t post_remove_call;
+ parent inode[s] on which directory transactions are performed.
+ */
- loc_t parent_loc;
+ inode_t *parent;
+ inode_t *parent2;
- call_frame_t *orig_frame;
- call_frame_t *old_loop_frame;
- gf_boolean_t unwound;
+ /* @readable:
- afr_sh_algo_private_t *private;
+ array of flags representing servers from which a read can be
+ performed. This is the output of afr_inode_refresh()
+ */
+ unsigned char *readable;
+ unsigned char *readable2; /*For rename transaction*/
- struct afr_sh_algorithm *algo;
- afr_lock_cbk_t data_lock_success_handler;
- afr_lock_cbk_t data_lock_failure_handler;
- int (*completion_cbk) (call_frame_t *frame, xlator_t *this);
- int (*sh_data_algo_start) (call_frame_t *frame, xlator_t *this);
- int (*algo_completion_cbk) (call_frame_t *frame, xlator_t *this);
- int (*algo_abort_cbk) (call_frame_t *frame, xlator_t *this);
- void (*gfid_sh_success_cbk) (call_frame_t *sh_frame, xlator_t *this);
+ afr_inode_refresh_cbk_t refreshfn;
- call_frame_t *sh_frame;
-} afr_self_heal_t;
+ /* @refreshinode:
-typedef enum {
- AFR_DATA_TRANSACTION, /* truncate, write, ... */
- AFR_METADATA_TRANSACTION, /* chmod, chown, ... */
- AFR_ENTRY_TRANSACTION, /* create, rmdir, ... */
- AFR_ENTRY_RENAME_TRANSACTION, /* rename */
-} afr_transaction_type;
+ Inode currently getting refreshed.
+ */
+ inode_t *refreshinode;
-typedef enum {
- AFR_TRANSACTION_LK,
- AFR_SELFHEAL_LK,
-} transaction_lk_type_t;
+ dict_t *xattr_req;
-typedef enum {
- AFR_LOCK_OP,
- AFR_UNLOCK_OP,
-} afr_lock_op_type_t;
+ dict_t *dict;
-typedef enum {
- AFR_DATA_SELF_HEAL_LK,
- AFR_METADATA_SELF_HEAL_LK,
- AFR_ENTRY_SELF_HEAL_LK,
-}selfheal_lk_type_t;
+ int read_subvol; /* Current read subvolume */
-typedef enum {
- AFR_INODELK_TRANSACTION,
- AFR_INODELK_NB_TRANSACTION,
- AFR_ENTRYLK_TRANSACTION,
- AFR_ENTRYLK_NB_TRANSACTION,
- AFR_INODELK_SELFHEAL,
- AFR_INODELK_NB_SELFHEAL,
- AFR_ENTRYLK_SELFHEAL,
- AFR_ENTRYLK_NB_SELFHEAL,
-} afr_lock_call_type_t;
+ int optimistic_change_log;
-/*
- xattr format: trusted.afr.volume = [x y z]
- x - data pending
- y - metadata pending
- z - entry pending
-*/
+ afr_internal_lock_t internal_lock;
-static inline int
-afr_index_for_transaction_type (afr_transaction_type type)
-{
- switch (type) {
+ /*To handle setattr/setxattr on yet to be linked inode from dht*/
+ uuid_t refreshgfid;
- case AFR_DATA_TRANSACTION:
- return 0;
+ /* @refreshed:
- case AFR_METADATA_TRANSACTION:
- return 1;
+ the inode was "refreshed" (i.e, pending xattrs from all subvols
+ freshly inspected and inode ctx updated accordingly) as part of
+ this transaction already.
+ */
+ gf_boolean_t refreshed;
- case AFR_ENTRY_TRANSACTION:
- case AFR_ENTRY_RENAME_TRANSACTION:
- return 2;
- }
+ gf_boolean_t update_num_inodelks;
+ gf_boolean_t update_open_fd_count;
- return -1; /* make gcc happy */
-}
+ /*
+ @pre_op_compat:
+ compatibility mode of pre-op. send a separate pre-op and
+ op operations as part of transaction, rather than combining
+ */
-typedef struct {
- loc_t *lk_loc;
- struct gf_flock lk_flock;
-
- const char *lk_basename;
- const char *lower_basename;
- const char *higher_basename;
- char lower_locked;
- char higher_locked;
-
- unsigned char *locked_nodes;
- unsigned char *lower_locked_nodes;
- unsigned char *inode_locked_nodes;
- unsigned char *entry_locked_nodes;
-
- selfheal_lk_type_t selfheal_lk_type;
- transaction_lk_type_t transaction_lk_type;
-
- int32_t lock_count;
- int32_t inodelk_lock_count;
- int32_t entrylk_lock_count;
-
- uint64_t lock_number;
- int32_t lk_call_count;
- int32_t lk_expected_count;
-
- int32_t lock_op_ret;
- int32_t lock_op_errno;
- afr_lock_cbk_t lock_cbk;
-} afr_internal_lock_t;
+ gf_boolean_t pre_op_compat;
-typedef struct _afr_locked_fd {
- fd_t *fd;
- struct list_head list;
-} afr_locked_fd_t;
+ /* Is the current writev() going to perform a stable write?
+ i.e, is fd->flags or @flags writev param have O_SYNC or
+ O_DSYNC?
+ */
+ gf_boolean_t stable_write;
-typedef struct _afr_local {
- int uid;
- int gid;
- unsigned int call_count;
- unsigned int success_count;
- unsigned int enoent_count;
+ /* This write appended to the file. Nnot necessarily O_APPEND,
+ just means the offset of write was at the end of file.
+ */
+ gf_boolean_t append_write;
+ /*
+ This struct contains the arguments for the "continuation"
+ (scheme-like) of fops
+ */
- unsigned int govinda_gOvinda;
+ struct {
+ struct {
+ struct statvfs buf;
+ unsigned char buf_set;
+ } statfs;
- unsigned int read_child_index;
- unsigned char read_child_returned;
- unsigned int first_up_child;
+ struct {
+ fd_t *fd;
+ int32_t flags;
+ } open;
- pid_t saved_pid;
+ struct {
+ struct gf_flock user_flock;
+ struct gf_flock ret_flock;
+ unsigned char *locked_nodes;
+ int32_t cmd;
+ /*For lock healing only.*/
+ unsigned char *dom_locked_nodes;
+ int32_t *dom_lock_op_ret;
+ int32_t *dom_lock_op_errno;
+ struct gf_flock *getlk_rsp;
+ } lk;
+
+ /* inode read */
- int32_t op_ret;
- int32_t op_errno;
+ struct {
+ int32_t mask;
+ int last_index; /* index of the child we tried previously */
+ } access;
- int32_t **pending;
+ struct {
+ int last_index;
+ } stat;
- loc_t loc;
- loc_t newloc;
+ struct {
+ int last_index;
+ } fstat;
- fd_t *fd;
- unsigned char *fd_open_on;
+ struct {
+ size_t size;
+ int last_index;
+ } readlink;
- glusterfs_fop_t fop;
+ struct {
+ char *name;
+ long xattr_len;
+ int last_index;
+ } getxattr;
- unsigned char *child_up;
- int32_t *fresh_children; //in the order of response
+ struct {
+ size_t size;
+ off_t offset;
+ int last_index;
+ uint32_t flags;
+ } readv;
- int32_t *child_errno;
+ /* dir read */
- dict_t *xattr_req;
+ struct {
+ uint32_t *checksum;
+ int success_count;
+ int32_t op_ret;
+ int32_t op_errno;
+ } opendir;
- int32_t inodelk_count;
- int32_t entrylk_count;
+ struct {
+ int32_t op_ret;
+ int32_t op_errno;
+ size_t size;
+ off_t offset;
+ dict_t *dict;
+ int last_index;
+ gf_boolean_t failed;
+ } readdir;
+ /* inode write */
- afr_internal_lock_t internal_lock;
+ struct {
+ struct iatt prebuf;
+ struct iatt postbuf;
+ } inode_wfop; // common structure for all inode-write-fops
- afr_locked_fd_t *locked_fd;
- int32_t source_child;
- int32_t lock_recovery_child;
+ struct {
+ struct iovec *vector;
+ struct iobref *iobref;
+ off_t offset;
+ int32_t op_ret;
+ int32_t count;
+ uint32_t flags;
+ } writev;
- dict_t *dict;
- int optimistic_change_log;
+ struct {
+ off_t offset;
+ } truncate;
- gf_boolean_t fop_paused;
- int (*fop_call_continue) (call_frame_t *frame, xlator_t *this);
+ struct {
+ off_t offset;
+ } ftruncate;
- /*
- This struct contains the arguments for the "continuation"
- (scheme-like) of fops
- */
+ struct {
+ struct iatt in_buf;
+ int32_t valid;
+ } setattr;
+
+ struct {
+ struct iatt in_buf;
+ int32_t valid;
+ } fsetattr;
+
+ struct {
+ dict_t *dict;
+ int32_t flags;
+ } setxattr;
- int op;
struct {
- struct {
- unsigned char buf_set;
- struct statvfs buf;
- } statfs;
-
- struct {
- uint32_t parent_entrylk;
- uuid_t gfid_req;
- inode_t *inode;
- struct iatt buf;
- struct iatt postparent;
- dict_t **xattrs;
- dict_t *xattr;
- struct iatt *postparents;
- struct iatt *bufs;
- int32_t read_child;
- int32_t *sources;
- int32_t *success_children;
- gf_boolean_t fresh_lookup;
- } lookup;
-
- struct {
- int32_t flags;
- } open;
-
- struct {
- int32_t cmd;
- struct gf_flock user_flock;
- struct gf_flock ret_flock;
- unsigned char *locked_nodes;
- } lk;
-
- /* inode read */
-
- struct {
- int32_t mask;
- int last_index; /* index of the child we tried previously */
- } access;
-
- struct {
- int last_index;
- } stat;
-
- struct {
- int last_index;
- } fstat;
-
- struct {
- size_t size;
- int last_index;
- } readlink;
-
- struct {
- char *name;
- int last_index;
- long xattr_len;
- } getxattr;
-
- struct {
- size_t size;
- off_t offset;
- int last_index;
- uint32_t flags;
- } readv;
-
- /* dir read */
-
- struct {
- int success_count;
- int32_t op_ret;
- int32_t op_errno;
-
- uint32_t *checksum;
- } opendir;
-
- struct {
- int32_t op_ret;
- int32_t op_errno;
- size_t size;
- off_t offset;
- dict_t *dict;
- gf_boolean_t failed;
- int last_index;
- } readdir;
- /* inode write */
-
- struct {
- struct iatt prebuf;
- struct iatt postbuf;
-
- int32_t op_ret;
-
- struct iovec *vector;
- struct iobref *iobref;
- int32_t count;
- off_t offset;
- uint32_t flags;
- } writev;
-
- struct {
- struct iatt prebuf;
- struct iatt postbuf;
- } fsync;
-
- struct {
- off_t offset;
- struct iatt prebuf;
- struct iatt postbuf;
- } truncate;
-
- struct {
- off_t offset;
- struct iatt prebuf;
- struct iatt postbuf;
- } ftruncate;
-
- struct {
- struct iatt in_buf;
- int32_t valid;
- struct iatt preop_buf;
- struct iatt postop_buf;
- } setattr;
-
- struct {
- struct iatt in_buf;
- int32_t valid;
- struct iatt preop_buf;
- struct iatt postop_buf;
- } fsetattr;
-
- struct {
- dict_t *dict;
- int32_t flags;
- } setxattr;
-
- struct {
- dict_t *dict;
- int32_t flags;
- } fsetxattr;
-
- struct {
- char *name;
- } removexattr;
-
- struct {
- dict_t *xattr;
- } xattrop;
-
- struct {
- dict_t *xattr;
- } fxattrop;
-
- /* dir write */
-
- struct {
- fd_t *fd;
- dict_t *params;
- int32_t flags;
- mode_t mode;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- struct iatt read_child_buf;
- } create;
-
- struct {
- dev_t dev;
- mode_t mode;
- dict_t *params;
- inode_t *inode;
- struct iatt buf;
- struct iatt preparent;
- struct iatt postparent;
- struct iatt read_child_buf;
- } mknod;
-
- struct {
- int32_t mode;
- dict_t *params;
- inode_t *inode;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preparent;
- struct iatt postparent;
- } mkdir;
-
- struct {
- int32_t op_ret;
- int32_t op_errno;
- struct iatt preparent;
- struct iatt postparent;
- } unlink;
-
- struct {
- int flags;
- int32_t op_ret;
- int32_t op_errno;
- struct iatt preparent;
- struct iatt postparent;
- } rmdir;
-
- struct {
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preoldparent;
- struct iatt prenewparent;
- struct iatt postoldparent;
- struct iatt postnewparent;
- } rename;
-
- struct {
- inode_t *inode;
- struct iatt buf;
- struct iatt read_child_buf;
- struct iatt preparent;
- struct iatt postparent;
- } link;
-
- struct {
- inode_t *inode;
- dict_t *params;
- struct iatt buf;
- struct iatt read_child_buf;
- char *linkpath;
- struct iatt preparent;
- struct iatt postparent;
- } symlink;
-
- struct {
- int32_t flags;
- dir_entry_t *entries;
- int32_t count;
- } setdents;
- } cont;
+ dict_t *dict;
+ int32_t flags;
+ } fsetxattr;
struct {
- off_t start, len;
+ char *name;
+ } removexattr;
- int *eager_lock;
+ struct {
+ dict_t *xattr;
+ gf_xattrop_flags_t optype;
+ } xattrop;
- char *basename;
- char *new_basename;
+ /* dir write */
- loc_t parent_loc;
- loc_t new_parent_loc;
+ struct {
+ inode_t *inode;
+ struct iatt buf;
+ struct iatt preparent;
+ struct iatt postparent;
+ struct iatt prenewparent;
+ struct iatt postnewparent;
+ } dir_fop; // common structure for all dir fops
- afr_transaction_type type;
+ struct {
+ fd_t *fd;
+ dict_t *params;
+ int32_t flags;
+ mode_t mode;
+ } create;
- int32_t **txn_changelog;//changelog after pre+post ops
- unsigned char *pre_op;
+ struct {
+ dict_t *params;
+ dev_t dev;
+ mode_t mode;
+ } mknod;
- call_frame_t *main_frame;
+ struct {
+ dict_t *params;
+ int32_t mode;
+ } mkdir;
- int (*fop) (call_frame_t *frame, xlator_t *this);
+ struct {
+ dict_t *params;
+ char *linkpath;
+ } symlink;
- int (*done) (call_frame_t *frame, xlator_t *this);
+ struct {
+ off_t offset;
+ size_t len;
+ int32_t mode;
+ } fallocate;
- int (*resume) (call_frame_t *frame, xlator_t *this);
+ struct {
+ off_t offset;
+ size_t len;
+ } discard;
- int (*unwind) (call_frame_t *frame, xlator_t *this);
+ struct {
+ off_t offset;
+ off_t len;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ } zerofill;
- /* post-op hook */
- } transaction;
+ struct {
+ char *volume;
+ int32_t cmd;
+ int32_t in_cmd;
+ struct gf_flock in_flock;
+ struct gf_flock flock;
+ void *xdata;
+ } inodelk;
- afr_self_heal_t self_heal;
+ struct {
+ char *volume;
+ char *basename;
+ void *xdata;
+ entrylk_cmd in_cmd;
+ entrylk_cmd cmd;
+ entrylk_type type;
+ } entrylk;
- struct marker_str marker;
+ struct {
+ off_t offset;
+ gf_seek_what_t what;
+ } seek;
- /* extra data for fops */
- dict_t *xdata_req;
- dict_t *xdata_rsp;
+ struct {
+ struct gf_lease user_lease;
+ struct gf_lease ret_lease;
+ unsigned char *locked_nodes;
+ } lease;
- mode_t umask;
- int xflag;
- gf_boolean_t do_discovery;
-} afr_local_t;
+ struct {
+ int flags;
+ } rmdir;
-typedef enum {
- AFR_FD_NOT_OPENED,
- AFR_FD_OPENED,
- AFR_FD_OPENING
-} afr_fd_open_status_t;
+ struct {
+ int32_t datasync;
+ } fsync;
-typedef struct {
- struct list_head call_list;
- call_frame_t *frame;
-} afr_fd_paused_call_t;
+ struct {
+ uuid_t gfid_req;
+ gf_boolean_t needs_fresh_lookup;
+ } lookup;
-typedef struct {
- unsigned int *pre_op_done;
- afr_fd_open_status_t *opened_on; /* which subvolumes the fd is open on */
- unsigned int *pre_op_piggyback;
+ } cont;
- unsigned int *lock_piggyback;
- unsigned int *lock_acquired;
+ struct {
+ char *basename;
+ char *new_basename;
- int flags;
- uint64_t up_count; /* number of CHILD_UPs this fd has seen */
- uint64_t down_count; /* number of CHILD_DOWNs this fd has seen */
+ loc_t parent_loc;
+ loc_t new_parent_loc;
- int32_t last_tried;
+ /* stub to resume on destruction
+ of the transaction frame */
+ call_stub_t *resume_stub;
- int hit, miss;
- gf_boolean_t failed_over;
- struct list_head entries; /* needed for readdir failover */
+ struct list_head owner_list;
+ struct list_head wait_list;
- unsigned char *locked_on; /* which subvolumes locks have been successful */
- struct list_head paused_calls; /* queued calls while fix_open happens */
-} afr_fd_ctx_t;
+ unsigned char *pre_op;
+ /* Changelog xattr dict for [f]xattrop*/
+ dict_t **changelog_xdata;
+ unsigned char *pre_op_sources;
-/* try alloc and if it fails, goto label */
-#define AFR_LOCAL_ALLOC_OR_GOTO(var, label) do { \
- var = mem_get0 (THIS->local_pool); \
- if (!var) { \
- gf_log (this->name, GF_LOG_ERROR, \
- "out of memory :("); \
- op_errno = ENOMEM; \
- goto label; \
- } \
- } while (0);
+ /* @failed_subvols: subvolumes on which a pre-op or a
+ FOP failed. */
+ unsigned char *failed_subvols;
+ call_frame_t *main_frame; /*Fop frame*/
+ call_frame_t *frame; /*Transaction frame*/
-/* did a call fail due to a child failing? */
-#define child_went_down(op_ret, op_errno) (((op_ret) < 0) && \
- ((op_errno == ENOTCONN) || \
- (op_errno == EBADFD)))
+ int (*wind)(call_frame_t *frame, xlator_t *this, int subvol);
-#define afr_fop_failed(op_ret, op_errno) ((op_ret) == -1)
+ int (*unwind)(call_frame_t *frame, xlator_t *this);
-/* have we tried all children? */
-#define all_tried(i, count) ((i) == (count) - 1)
+ off_t start, len;
-int32_t
-afr_set_dict_gfid (dict_t *dict, uuid_t gfid);
+ afr_transaction_type type;
-int
-pump_command_reply (call_frame_t *frame, xlator_t *this);
+ int32_t in_flight_sb_errno; /* This is where the cause of the
+ failure on the last good copy of
+ the file is stored.
+ */
-int32_t
-afr_notify (xlator_t *this, int32_t event, void *data, void *data2);
+ /* @changelog_resume: function to be called after changlogging
+ (either pre-op or post-op) is done
+ */
+ afr_changelog_resume_t changelog_resume;
+
+ gf_boolean_t eager_lock_on;
+ gf_boolean_t do_eager_unlock;
+
+ /* @dirtied: flag which indicates whether we set dirty flag
+ in the OP. Typically true when we are performing operation
+ on more than one subvol and optimistic changelog is disabled
+
+ A 'true' value set in @dirtied flag means an 'undirtying'
+ has to be done in POST-OP phase.
+ */
+ gf_boolean_t dirtied;
+
+ /* @inherited: flag which indicates that the dirty flags
+ of the previous transaction were inherited
+ */
+ gf_boolean_t inherited;
+
+ /*
+ @no_uninherit: flag which indicates that a pre_op_uninherit()
+ must _not_ be attempted (and returned as failure) always. This
+ flag is set when a hard pre-op is performed, but not accounted
+ for it in fd_ctx->on_disk[]. Such transactions are "isolated"
+ from the pre-op piggybacking entirely and therefore uninherit
+ must not be attempted.
+ */
+ gf_boolean_t no_uninherit;
+
+ gf_boolean_t in_flight_sb; /* Indicator for occurrence of
+ split-brain while in the middle of
+ a txn. */
+
+ /* @uninherit_done:
+ @uninherit_value:
+ The above pair variables make pre_op_uninherit() idempotent.
+ Both are FALSE initially. The first call to pre_op_uninherit
+ sets @uninherit_done to TRUE and the return value to
+ @uninherit_value. Further calls will check for @uninherit_done
+ to be TRUE and if so will simply return @uninherit_value.
+ */
+ gf_boolean_t uninherit_done;
+ gf_boolean_t uninherit_value;
+
+ gf_boolean_t disable_delayed_post_op;
+ } transaction;
+
+ syncbarrier_t barrier;
+
+ /* extra data for fops */
+ dict_t *xdata_req;
+ dict_t *xdata_rsp;
+
+ dict_t *xattr_rsp; /*for [f]xattrop*/
+
+ mode_t umask;
+ int xflag;
+ struct afr_reply *replies;
+
+ /* For client side background heals. */
+ struct list_head healer;
+ call_frame_t *heal_frame;
+
+ afr_inode_ctx_t *inode_ctx;
+
+ /*For thin-arbiter transactions.*/
+ int ta_failed_subvol;
+ int ta_event_gen;
+ struct list_head ta_waitq;
+ struct list_head ta_onwireq;
+ afr_ta_fop_state_t fop_state;
+ afr_fop_lock_state_t fop_lock_state;
+ gf_lkowner_t saved_lk_owner;
+ unsigned char read_txn_query_child;
+ unsigned char ta_child_up;
+ gf_boolean_t do_discovery;
+ gf_boolean_t need_full_crawl;
+ gf_boolean_t is_read_txn;
+ gf_boolean_t is_new_entry;
+} afr_local_t;
+
+typedef struct afr_spbc_timeout {
+ call_frame_t *frame;
+ loc_t *loc;
+ int spb_child_index;
+ gf_boolean_t d_spb;
+ gf_boolean_t m_spb;
+} afr_spbc_timeout_t;
+
+typedef struct afr_spb_status {
+ call_frame_t *frame;
+ loc_t *loc;
+} afr_spb_status_t;
+
+typedef struct afr_empty_brick_args {
+ call_frame_t *frame;
+ char *op_type;
+ loc_t loc;
+ int empty_index;
+} afr_empty_brick_args_t;
+
+typedef struct afr_read_subvol_args {
+ ia_type_t ia_type;
+ uuid_t gfid;
+} afr_read_subvol_args_t;
+
+typedef struct afr_granular_esh_args {
+ fd_t *heal_fd;
+ xlator_t *xl;
+ call_frame_t *frame;
+ gf_boolean_t mismatch; /* flag to represent occurrence of type/gfid
+ mismatch */
+} afr_granular_esh_args_t;
+
+int
+afr_inode_get_readable(call_frame_t *frame, inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type);
+int
+afr_inode_read_subvol_get(inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int *event_generation);
int
-afr_attempt_lock_recovery (xlator_t *this, int32_t child_index);
+__afr_inode_read_subvol_get(inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int *event_generation);
int
-afr_save_locked_fd (xlator_t *this, fd_t *fd);
+__afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvol,
+ int event_generation);
+int
+afr_inode_read_subvol_set(inode_t *inode, xlator_t *this,
+ unsigned char *data_subvols,
+ unsigned char *metadata_subvols,
+ int event_generation);
int
-afr_mark_locked_nodes (xlator_t *this, fd_t *fd,
- unsigned char *locked_nodes);
+__afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
-void
-afr_set_lk_owner (call_frame_t *frame, xlator_t *this, void *lk_owner);
+int
+afr_inode_need_refresh_set(inode_t *inode, xlator_t *this);
int
-afr_set_lock_number (call_frame_t *frame, xlator_t *this);
+afr_read_subvol_select_by_policy(inode_t *inode, xlator_t *this,
+ unsigned char *readable,
+ afr_read_subvol_args_t *args);
+int
+afr_inode_read_subvol_type_get(inode_t *inode, xlator_t *this,
+ unsigned char *readable, int *event_p, int type);
+int
+afr_read_subvol_get(inode_t *inode, xlator_t *this, int *subvol_p,
+ unsigned char *readables, int *event_p,
+ afr_transaction_type type, afr_read_subvol_args_t *args);
-loc_t *
-lower_path (loc_t *l1, const char *b1, loc_t *l2, const char *b2);
+#define afr_data_subvol_get(i, t, s, r, e, a) \
+ afr_read_subvol_get(i, t, s, r, e, AFR_DATA_TRANSACTION, a)
-int32_t
-afr_unlock (call_frame_t *frame, xlator_t *this);
+#define afr_metadata_subvol_get(i, t, s, r, e, a) \
+ afr_read_subvol_get(i, t, s, r, e, AFR_METADATA_TRANSACTION, a)
int
-afr_nonblocking_entrylk (call_frame_t *frame, xlator_t *this);
+afr_inode_refresh(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, afr_inode_refresh_cbk_t cbk);
+
+int32_t
+afr_notify(xlator_t *this, int32_t event, void *data, void *data2);
int
-afr_nonblocking_inodelk (call_frame_t *frame, xlator_t *this);
+xattr_is_equal(dict_t *this, char *key1, data_t *value1, void *data);
int
-afr_blocking_lock (call_frame_t *frame, xlator_t *this);
+afr_add_entry_lockee(afr_local_t *local, loc_t *loc, char *basename,
+ int child_count);
int
-afr_internal_lock_finish (call_frame_t *frame, xlator_t *this);
+afr_add_inode_lockee(afr_local_t *local, int child_count);
void
-afr_lk_transfer_datalock (call_frame_t *dst, call_frame_t *src,
- unsigned int child_count);
-
-int pump_start (call_frame_t *frame, xlator_t *this);
+afr_lockees_cleanup(afr_internal_lock_t *int_lock);
int
-__afr_fd_ctx_set (xlator_t *this, fd_t *fd);
+afr_attempt_lock_recovery(xlator_t *this, int32_t child_index);
int
-afr_fd_ctx_set (xlator_t *this, fd_t *fd);
-
-int32_t
-afr_inode_get_read_ctx (xlator_t *this, inode_t *inode, int32_t *fresh_children);
+afr_mark_locked_nodes(xlator_t *this, fd_t *fd, unsigned char *locked_nodes);
void
-afr_inode_set_read_ctx (xlator_t *this, inode_t *inode, int32_t read_child,
- int32_t *fresh_children);
+afr_set_lk_owner(call_frame_t *frame, xlator_t *this, void *lk_owner);
int
-afr_build_parent_loc (loc_t *parent, loc_t *child, int32_t *op_errno);
+afr_set_lock_number(call_frame_t *frame, xlator_t *this);
-unsigned int
-afr_up_children_count (unsigned char *child_up, unsigned int child_count);
+int32_t
+afr_unlock(call_frame_t *frame, xlator_t *this);
-unsigned int
-afr_locked_children_count (unsigned char *children, unsigned int child_count);
+int
+afr_lock_nonblocking(call_frame_t *frame, xlator_t *this);
-unsigned int
-afr_pre_op_done_children_count (unsigned char *pre_op,
- unsigned int child_count);
+int
+afr_blocking_lock(call_frame_t *frame, xlator_t *this);
-gf_boolean_t
-afr_is_fresh_lookup (loc_t *loc, xlator_t *this);
+int
+afr_internal_lock_finish(call_frame_t *frame, xlator_t *this);
-void
-afr_update_loc_gfids (loc_t *loc, struct iatt *buf, struct iatt *postparent);
+int
+__afr_fd_ctx_set(xlator_t *this, fd_t *fd);
+
+afr_fd_ctx_t *
+afr_fd_ctx_get(fd_t *fd, xlator_t *this);
int
-afr_locked_nodes_count (unsigned char *locked_nodes, int child_count);
+afr_build_parent_loc(loc_t *parent, loc_t *child, int32_t *op_errno);
-void
-afr_local_cleanup (afr_local_t *local, xlator_t *this);
+int
+afr_locked_nodes_count(unsigned char *locked_nodes, int child_count);
int
-afr_frame_return (call_frame_t *frame);
+afr_replies_interpret(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ gf_boolean_t *start_heal);
-gf_boolean_t
-afr_is_split_brain (xlator_t *this, inode_t *inode);
+void
+afr_local_replies_wipe(afr_local_t *local, afr_private_t *priv);
void
-afr_set_split_brain (xlator_t *this, inode_t *inode, gf_boolean_t set);
+afr_local_cleanup(afr_local_t *local, xlator_t *this);
int
-afr_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata);
-
-void
-afr_set_opendir_done (xlator_t *this, inode_t *inode);
+afr_frame_return(call_frame_t *frame);
-gf_boolean_t
-afr_is_opendir_done (xlator_t *this, inode_t *inode);
+int
+afr_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
void
-afr_local_transaction_cleanup (afr_local_t *local, xlator_t *this);
+afr_local_transaction_cleanup(afr_local_t *local, xlator_t *this);
int
-afr_cleanup_fd_ctx (xlator_t *this, fd_t *fd);
+afr_cleanup_fd_ctx(xlator_t *this, fd_t *fd);
+
+#define AFR_STACK_UNWIND(fop, frame, op_ret, op_errno, params...) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ int32_t __op_ret = 0; \
+ int32_t __op_errno = 0; \
+ \
+ __op_ret = op_ret; \
+ __op_errno = op_errno; \
+ if (frame) { \
+ __local = frame->local; \
+ __this = frame->this; \
+ afr_handle_inconsistent_fop(frame, &__op_ret, &__op_errno); \
+ if (__local && __local->is_read_txn) \
+ afr_pending_read_decrement(__this->private, \
+ __local->read_subvol); \
+ if (__local && __local->xdata_req && \
+ afr_is_lock_mode_mandatory(__local->xdata_req)) \
+ afr_dom_lock_release(frame); \
+ frame->local = NULL; \
+ } \
+ \
+ STACK_UNWIND_STRICT(fop, frame, __op_ret, __op_errno, params); \
+ if (__local) { \
+ afr_local_cleanup(__local, __this); \
+ mem_put(__local); \
+ } \
+ } while (0)
+
+#define AFR_STACK_DESTROY(frame) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ STACK_DESTROY(frame->root); \
+ if (__local) { \
+ afr_local_cleanup(__local, __this); \
+ mem_put(__local); \
+ } \
+ } while (0);
+
+#define AFR_FRAME_INIT(frame, op_errno) \
+ ({ \
+ frame->local = mem_get0(THIS->local_pool); \
+ if (afr_local_init(frame->local, frame->this->private, &op_errno)) { \
+ afr_local_cleanup(frame->local, frame->this); \
+ mem_put(frame->local); \
+ frame->local = NULL; \
+ }; \
+ frame->local; \
+ })
+
+#define AFR_STACK_RESET(frame) \
+ do { \
+ afr_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ int __opr; \
+ STACK_RESET(frame->root); \
+ if (__local) { \
+ afr_local_cleanup(__local, __this); \
+ mem_put(__local); \
+ } \
+ AFR_FRAME_INIT(frame, __opr); \
+ } while (0)
-int
-afr_launch_openfd_self_heal (call_frame_t *frame, xlator_t *this, fd_t *fd);
-
-#define AFR_STACK_UNWIND(fop, frame, params ...) \
- do { \
- afr_local_t *__local = NULL; \
- xlator_t *__this = NULL; \
- if (frame) { \
- __local = frame->local; \
- __this = frame->this; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT (fop, frame, params); \
- if (__local) { \
- afr_local_cleanup (__local, __this); \
- mem_put (__local); \
- } \
- } while (0)
-
-#define AFR_STACK_DESTROY(frame) \
- do { \
- afr_local_t *__local = NULL; \
- xlator_t *__this = NULL; \
- __local = frame->local; \
- __this = frame->this; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- if (__local) { \
- afr_local_cleanup (__local, __this); \
- mem_put (__local); \
- } \
- } while (0);
-
-#define AFR_NUM_CHANGE_LOGS 3 /*data + metadata + entry*/
/* allocate and return a string that is the basename of argument */
static inline char *
-AFR_BASENAME (const char *str)
+AFR_BASENAME(const char *str)
{
- char *__tmp_str = NULL;
- char *__basename_str = NULL;
- __tmp_str = gf_strdup (str);
- __basename_str = gf_strdup (basename (__tmp_str));
- GF_FREE (__tmp_str);
- return __basename_str;
+ char *__tmp_str = NULL;
+ char *__basename_str = NULL;
+ __tmp_str = gf_strdup(str);
+ __basename_str = gf_strdup(basename(__tmp_str));
+ GF_FREE(__tmp_str);
+ return __basename_str;
}
+call_frame_t *
+afr_copy_frame(call_frame_t *base);
+
int
-afr_transaction_local_init (afr_local_t *local, xlator_t *this);
+afr_transaction_local_init(afr_local_t *local, xlator_t *this);
int32_t
-afr_marker_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name,afr_local_t *local, afr_private_t *priv );
+afr_marker_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, afr_local_t *local, afr_private_t *priv);
-int32_t *
-afr_children_create (int32_t child_count);
+int
+afr_local_init(afr_local_t *local, afr_private_t *priv, int32_t *op_errno);
int
-afr_local_init (afr_local_t *local, afr_private_t *priv, int32_t *op_errno);
+afr_internal_lock_init(afr_internal_lock_t *lk, size_t child_count);
int
-afr_internal_lock_init (afr_internal_lock_t *lk, size_t child_count,
- transaction_lk_type_t lk_type);
+afr_higher_errno(int32_t old_errno, int32_t new_errno);
int
-afr_first_up_child (unsigned char *child_up, size_t child_count);
+afr_final_errno(afr_local_t *local, afr_private_t *priv);
int
-afr_select_read_child_from_policy (int32_t *fresh_children, int32_t child_count,
- int32_t prev_read_child,
- int32_t config_read_child, int32_t *sources,
- unsigned int hmode, uuid_t gfid);
+afr_xattr_req_prepare(xlator_t *this, dict_t *xattr_req);
void
-afr_set_read_ctx_from_policy (xlator_t *this, inode_t *inode,
- int32_t *fresh_children, int32_t prev_read_child,
- int32_t config_read_child, uuid_t gfid);
+afr_fix_open(fd_t *fd, xlator_t *this);
-int32_t
-afr_get_call_child (xlator_t *this, unsigned char *child_up, int32_t read_child,
- int32_t *fresh_children,
- int32_t *call_child, int32_t *last_index);
+afr_fd_ctx_t *
+afr_fd_ctx_get(fd_t *fd, xlator_t *this);
-int32_t
-afr_next_call_child (int32_t *fresh_children, unsigned char *child_up,
- size_t child_count, int32_t *last_index,
- int32_t read_child);
void
-afr_get_fresh_children (int32_t *success_children, int32_t *sources,
- int32_t *children, unsigned int child_count);
+afr_set_low_priority(call_frame_t *frame);
+int
+afr_child_fd_ctx_set(xlator_t *this, fd_t *fd, int32_t child, int flags);
+
void
-afr_children_add_child (int32_t *children, int32_t child,
- int32_t child_count);
+afr_matrix_cleanup(int32_t **pending, unsigned int m);
+
+int32_t **
+afr_matrix_create(unsigned int m, unsigned int n);
+
+int **
+afr_mark_pending_changelog(afr_private_t *priv, unsigned char *pending,
+ dict_t *xattr, ia_type_t iat);
+
void
-afr_children_rm_child (int32_t *children, int32_t child,
- int32_t child_count);
+afr_filter_xattrs(dict_t *xattr);
+
+/*
+ * Special value indicating we should use the "auto" quorum method instead of
+ * a fixed value (including zero to turn off quorum enforcement).
+ */
+#define AFR_QUORUM_AUTO INT_MAX
+
+int
+afr_fd_report_unstable_write(xlator_t *this, afr_local_t *local);
+
+gf_boolean_t
+afr_fd_has_witnessed_unstable_write(xlator_t *this, inode_t *inode);
+
+void
+afr_reply_wipe(struct afr_reply *reply);
+
void
-afr_reset_children (int32_t *children, int32_t child_count);
+afr_replies_wipe(struct afr_reply *replies, int count);
+
+gf_boolean_t
+afr_xattrs_are_equal(dict_t *dict1, dict_t *dict2);
+
+gf_boolean_t
+afr_is_xattr_ignorable(char *key);
+
+int
+afr_get_heal_info(call_frame_t *frame, xlator_t *this, loc_t *loc);
+
+int
+afr_heal_splitbrain_file(call_frame_t *frame, xlator_t *this, loc_t *loc);
+
+int
+afr_get_split_brain_status(void *opaque);
+
+int
+afr_get_split_brain_status_cbk(int ret, call_frame_t *frame, void *opaque);
+
+int
+afr_inode_split_brain_choice_set(inode_t *inode, xlator_t *this,
+ int spb_choice);
+int
+afr_split_brain_read_subvol_get(inode_t *inode, xlator_t *this,
+ call_frame_t *frame, int *spb_subvol);
+int
+afr_get_child_index_from_name(xlator_t *this, char *name);
+
+int
+afr_is_split_brain(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid, gf_boolean_t *d_spb, gf_boolean_t *m_spb);
+int
+afr_spb_choice_timeout_cancel(xlator_t *this, inode_t *inode);
+
+int
+afr_set_split_brain_choice(int ret, call_frame_t *frame, void *opaque);
+
gf_boolean_t
-afr_error_more_important (int32_t old_errno, int32_t new_errno);
+afr_get_need_heal(xlator_t *this);
+
+void
+afr_set_need_heal(xlator_t *this, afr_local_t *local);
+
int
-afr_errno_count (int32_t *children, int *child_errno,
- unsigned int child_count, int32_t op_errno);
+afr_selfheal_data_open(xlator_t *this, inode_t *inode, fd_t **fd);
+
int
-afr_get_children_count (int32_t *children, unsigned int child_count);
+afr_get_msg_id(char *op_type);
+
+int
+afr_set_in_flight_sb_status(xlator_t *this, call_frame_t *frame,
+ inode_t *inode);
+
+int32_t
+afr_quorum_errno(afr_private_t *priv);
+
gf_boolean_t
-afr_is_child_present (int32_t *success_children, int32_t child_count,
- int32_t child);
+afr_is_consistent_io_possible(afr_local_t *local, afr_private_t *priv,
+ int32_t *op_errno);
void
-afr_update_gfid_from_iatts (uuid_t uuid, struct iatt *bufs,
- int32_t *success_children,
- unsigned int child_count);
+afr_handle_inconsistent_fop(call_frame_t *frame, int32_t *op_ret,
+ int32_t *op_errno);
+
void
-afr_reset_xattr (dict_t **xattr, unsigned int child_count);
-gf_boolean_t
-afr_conflicting_iattrs (struct iatt *bufs, int32_t *success_children,
- unsigned int child_count, const char *path,
- const char *xlator_name);
-unsigned int
-afr_gfid_missing_count (const char *xlator_name, int32_t *children,
- struct iatt *bufs, unsigned int child_count,
- const char *path);
+afr_inode_write_fill(call_frame_t *frame, xlator_t *this, int child_index,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
void
-afr_xattr_req_prepare (xlator_t *this, dict_t *xattr_req, const char *path);
+afr_process_post_writev(call_frame_t *frame, xlator_t *this);
+
void
-afr_children_copy (int32_t *dst, int32_t *src, unsigned int child_count);
-afr_transaction_type
-afr_transaction_type_get (ia_type_t ia_type);
-int32_t
-afr_resultant_errno_get (int32_t *children,
- int *child_errno, unsigned int child_count);
+afr_writev_unwind(call_frame_t *frame, xlator_t *this);
+
void
-afr_inode_rm_stale_children (xlator_t *this, inode_t *inode,
- int32_t *stale_children);
+afr_writev_copy_outvars(call_frame_t *src_frame, call_frame_t *dst_frame);
+
void
-afr_launch_self_heal (call_frame_t *frame, xlator_t *this, inode_t *inode,
- gf_boolean_t background, ia_type_t ia_type, char *reason,
- void (*gfid_sh_success_cbk) (call_frame_t *sh_frame,
- xlator_t *this),
- int (*unwind) (call_frame_t *frame, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- int32_t sh_failed));
+afr_update_uninodelk(afr_local_t *local, afr_internal_lock_t *int_lock,
+ int32_t child_index);
+afr_fd_ctx_t *
+__afr_fd_ctx_get(fd_t *fd, xlator_t *this);
+
+gf_boolean_t
+afr_is_inode_refresh_reqd(inode_t *inode, xlator_t *this, int event_gen1,
+ int event_gen2);
+
+int
+afr_serialize_xattrs_with_delimiter(call_frame_t *frame, xlator_t *this,
+ char *buf, const char *default_str,
+ int32_t *serz_len, char delimiter);
+gf_boolean_t
+afr_is_symmetric_error(call_frame_t *frame, xlator_t *this);
+
int
-afr_fix_open (call_frame_t *frame, xlator_t *this, afr_fd_ctx_t *fd_ctx,
- int need_open_count, int *need_open);
+__afr_inode_ctx_get(xlator_t *this, inode_t *inode, afr_inode_ctx_t **ctx);
+
+uint64_t
+afr_write_subvol_get(call_frame_t *frame, xlator_t *this);
+
int
-afr_open_fd_fix (call_frame_t *frame, xlator_t *this, gf_boolean_t pause_fop);
+afr_write_subvol_set(call_frame_t *frame, xlator_t *this);
+
int
-afr_set_elem_count_get (unsigned char *elems, int child_count);
+afr_write_subvol_reset(call_frame_t *frame, xlator_t *this);
-afr_fd_ctx_t *
-afr_fd_ctx_get (fd_t *fd, xlator_t *this);
+int
+afr_set_inode_local(xlator_t *this, afr_local_t *local, inode_t *inode);
+
+int
+afr_fill_ta_loc(xlator_t *this, loc_t *loc, gf_boolean_t is_gfid_based_fop);
+
+int
+afr_ta_post_op_lock(xlator_t *this, loc_t *loc);
+
+int
+afr_ta_post_op_unlock(xlator_t *this, loc_t *loc);
gf_boolean_t
-afr_open_only_data_self_heal (char *data_self_heal);
+afr_is_pending_set(xlator_t *this, dict_t *xdata, int type);
+
+int
+__afr_get_up_children_count(afr_private_t *priv);
+
+call_frame_t *
+afr_ta_frame_create(xlator_t *this);
gf_boolean_t
-afr_data_self_heal_enabled (char *data_self_heal);
+afr_ta_has_quorum(afr_private_t *priv, afr_local_t *local);
void
-afr_set_low_priority (call_frame_t *frame);
-int
-afr_child_fd_ctx_set (xlator_t *this, fd_t *fd, int32_t child,
- int flags);
+afr_ta_lock_release_synctask(xlator_t *this);
+
+void
+afr_ta_locked_priv_invalidate(afr_private_t *priv);
gf_boolean_t
-afr_have_quorum (char *logname, afr_private_t *priv);
+afr_lookup_has_quorum(call_frame_t *frame,
+ const unsigned int up_children_count);
void
-afr_matrix_cleanup (int32_t **pending, unsigned int m);
+afr_mark_new_entry_changelog(call_frame_t *frame, xlator_t *this);
-int32_t**
-afr_matrix_create (unsigned int m, unsigned int n);
-/*
- * Special value indicating we should use the "auto" quorum method instead of
- * a fixed value (including zero to turn off quorum enforcement).
- */
-#define AFR_QUORUM_AUTO INT_MAX
+void
+afr_handle_replies_quorum(call_frame_t *frame, xlator_t *this);
-/*
- * Having this as a macro will make debugging a bit weirder, but does reduce
- * the probability of functions handling this check inconsistently.
- */
-#define QUORUM_CHECK(_func,_label) do { \
- if (priv->quorum_count && !afr_have_quorum(this->name,priv)) { \
- gf_log(this->name,GF_LOG_WARNING, \
- "failing "#_func" due to lack of quorum"); \
- op_errno = EROFS; \
- goto _label; \
- } \
-} while (0);
+gf_boolean_t
+afr_ta_dict_contains_pending_xattr(dict_t *dict, afr_private_t *priv,
+ int child);
+void
+afr_selfheal_childup(xlator_t *this, afr_private_t *priv);
+
+gf_boolean_t
+afr_is_lock_mode_mandatory(dict_t *xdata);
+
+void
+afr_dom_lock_release(call_frame_t *frame);
+
+void
+afr_fill_success_replies(afr_local_t *local, afr_private_t *priv,
+ unsigned char *replies);
+
+gf_boolean_t
+afr_is_private_directory(afr_private_t *priv, uuid_t pargfid, const char *name,
+ pid_t pid);
#endif /* __AFR_H__ */
diff --git a/xlators/cluster/afr/src/pump.c b/xlators/cluster/afr/src/pump.c
deleted file mode 100644
index eb3f8478928..00000000000
--- a/xlators/cluster/afr/src/pump.c
+++ /dev/null
@@ -1,2636 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <unistd.h>
-#include <sys/time.h>
-#include <stdlib.h>
-#include <fnmatch.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "afr-common.c"
-#include "defaults.c"
-#include "glusterfs.h"
-
-static uint64_t pump_pid = 0;
-static inline void
-pump_fill_loc_info (loc_t *loc, struct iatt *iatt, struct iatt *parent)
-{
- afr_update_loc_gfids (loc, iatt, parent);
- uuid_copy (loc->inode->gfid, iatt->ia_gfid);
-}
-
-static int
-pump_mark_start_pending (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- pump_priv->pump_start_pending = 1;
-
- return 0;
-}
-
-static int
-is_pump_start_pending (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- return (pump_priv->pump_start_pending);
-}
-
-static int
-pump_remove_start_pending (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- pump_priv->pump_start_pending = 0;
-
- return 0;
-}
-
-static pump_state_t
-pump_get_state ()
-{
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- pump_state_t ret;
-
- this = THIS;
- priv = this->private;
- pump_priv = priv->pump_private;
-
- LOCK (&pump_priv->pump_state_lock);
- {
- ret = pump_priv->pump_state;
- }
- UNLOCK (&pump_priv->pump_state_lock);
-
- return ret;
-}
-
-int
-pump_change_state (xlator_t *this, pump_state_t state)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- pump_state_t state_old;
- pump_state_t state_new;
-
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- GF_ASSERT (pump_priv);
-
- LOCK (&pump_priv->pump_state_lock);
- {
- state_old = pump_priv->pump_state;
- state_new = state;
-
- pump_priv->pump_state = state;
-
- }
- UNLOCK (&pump_priv->pump_state_lock);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Pump changing state from %d to %d",
- state_old,
- state_new);
-
- return 0;
-}
-
-static int
-pump_set_resume_path (xlator_t *this, const char *path)
-{
- int ret = 0;
-
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- GF_ASSERT (pump_priv);
-
- LOCK (&pump_priv->resume_path_lock);
- {
- strncpy (pump_priv->resume_path, path, strlen (path) + 1);
- }
- UNLOCK (&pump_priv->resume_path_lock);
-
- return ret;
-}
-
-static int
-pump_save_path (xlator_t *this, const char *path)
-{
- afr_private_t *priv = NULL;
- pump_state_t state;
- dict_t *dict = NULL;
- loc_t loc = {0};
- int dict_ret = 0;
- int ret = -1;
-
- state = pump_get_state ();
- if (state == PUMP_STATE_RESUME)
- return 0;
-
- priv = this->private;
-
- GF_ASSERT (priv->root_inode);
-
- afr_build_root_loc (this, &loc);
-
- dict = dict_new ();
- dict_ret = dict_set_str (dict, PUMP_PATH, (char *)path);
- if (dict_ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set the key %s", path, PUMP_PATH);
-
- ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "setxattr failed - could not save path=%s", path);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "setxattr succeeded - saved path=%s", path);
- }
-
- dict_unref (dict);
-
- loc_wipe (&loc);
- return 0;
-}
-
-static int
-pump_check_and_update_status (xlator_t *this)
-{
- pump_state_t state;
- int ret = -1;
-
- state = pump_get_state ();
-
- switch (state) {
-
- case PUMP_STATE_RESUME:
- case PUMP_STATE_RUNNING:
- {
- ret = 0;
- break;
- }
- case PUMP_STATE_PAUSE:
- {
- ret = -1;
- break;
- }
- case PUMP_STATE_ABORT:
- {
- pump_save_path (this, "/");
- ret = -1;
- break;
- }
- default:
- {
- gf_log (this->name, GF_LOG_DEBUG,
- "Unknown pump state");
- ret = -1;
- break;
- }
-
- }
-
- return ret;
-}
-
-static const char *
-pump_get_resume_path (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- const char *resume_path = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- resume_path = pump_priv->resume_path;
-
- return resume_path;
-}
-
-static int
-pump_update_resume_state (xlator_t *this, const char *path)
-{
- pump_state_t state;
- const char *resume_path = NULL;
-
- state = pump_get_state ();
-
- if (state == PUMP_STATE_RESUME) {
- resume_path = pump_get_resume_path (this);
- if (strcmp (resume_path, "/") == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Reached the resume path (/). Proceeding to change state"
- " to running");
- pump_change_state (this, PUMP_STATE_RUNNING);
- } else if (strcmp (resume_path, path) == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Reached the resume path. Proceeding to change state"
- " to running");
- pump_change_state (this, PUMP_STATE_RUNNING);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not yet hit the resume path:res-path=%s,path=%s",
- resume_path, path);
- }
- }
-
- return 0;
-}
-
-static gf_boolean_t
-is_pump_traversal_allowed (xlator_t *this, const char *path)
-{
- pump_state_t state;
- const char *resume_path = NULL;
- gf_boolean_t ret = _gf_true;
-
- state = pump_get_state ();
-
- if (state == PUMP_STATE_RESUME) {
- resume_path = pump_get_resume_path (this);
- if (strstr (resume_path, path)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "On the right path to resumption path");
- ret = _gf_true;
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not the right path to resuming=> ignoring traverse");
- ret = _gf_false;
- }
- }
-
- return ret;
-}
-
-static int
-pump_save_file_stats (xlator_t *this, const char *path)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- LOCK (&pump_priv->resume_path_lock);
- {
- pump_priv->number_files_pumped++;
-
- strncpy (pump_priv->current_file, path,
- PATH_MAX);
- }
- UNLOCK (&pump_priv->resume_path_lock);
-
- return 0;
-}
-
-static int
-gf_pump_traverse_directory (loc_t *loc)
-{
- xlator_t *this = NULL;
- fd_t *fd = NULL;
- off_t offset = 0;
- loc_t entry_loc = {0};
- gf_dirent_t *entry = NULL;
- gf_dirent_t *tmp = NULL;
- gf_dirent_t entries;
- struct iatt iatt = {0};
- struct iatt parent = {0};
- dict_t *xattr_rsp = NULL;
- int ret = 0;
- gf_boolean_t is_directory_empty = _gf_true;
- gf_boolean_t free_entries = _gf_false;
-
- INIT_LIST_HEAD (&entries.list);
- this = THIS;
-
- GF_ASSERT (loc->inode);
-
- fd = fd_create (loc->inode, pump_pid);
- if (!fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to create fd for %s", loc->path);
- goto out;
- }
-
- ret = syncop_opendir (this, loc, fd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "opendir failed on %s", loc->path);
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "pump opendir on %s returned=%d",
- loc->path, ret);
-
- while (syncop_readdirp (this, fd, 131072, offset, NULL, &entries)) {
- free_entries = _gf_true;
-
- if (list_empty (&entries.list)) {
- gf_log (this->name, GF_LOG_TRACE,
- "no more entries in directory");
- goto out;
- }
-
- list_for_each_entry_safe (entry, tmp, &entries.list, list) {
- gf_log (this->name, GF_LOG_DEBUG,
- "found readdir entry=%s", entry->d_name);
-
- offset = entry->d_off;
- if (uuid_is_null (entry->d_stat.ia_gfid)) {
- gf_log (this->name, GF_LOG_WARNING, "%s/%s: No "
- "gfid present skipping",
- loc->path, entry->d_name);
- continue;
- }
- loc_wipe (&entry_loc);
- ret = afr_build_child_loc (this, &entry_loc, loc,
- entry->d_name);
- if (ret)
- goto out;
-
- if (!IS_ENTRY_CWD (entry->d_name) &&
- !IS_ENTRY_PARENT (entry->d_name)) {
-
- is_directory_empty = _gf_false;
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup %s => %"PRId64,
- entry_loc.path,
- iatt.ia_ino);
-
- ret = syncop_lookup (this, &entry_loc, NULL,
- &iatt, &xattr_rsp, &parent);
-
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: lookup failed",
- entry_loc.path);
- continue;
- }
- pump_fill_loc_info (&entry_loc, &iatt,
- &parent);
-
- pump_update_resume_state (this, entry_loc.path);
-
- pump_save_path (this, entry_loc.path);
- pump_save_file_stats (this, entry_loc.path);
-
- ret = pump_check_and_update_status (this);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Pump beginning to exit out");
- goto out;
- }
-
- if (IA_ISDIR (iatt.ia_type)) {
- if (is_pump_traversal_allowed (this, entry_loc.path)) {
- gf_log (this->name, GF_LOG_TRACE,
- "entering dir=%s",
- entry->d_name);
- gf_pump_traverse_directory (&entry_loc);
- }
- }
- }
- }
-
- gf_dirent_free (&entries);
- free_entries = _gf_false;
- gf_log (this->name, GF_LOG_TRACE,
- "offset incremented to %d",
- (int32_t ) offset);
-
- }
-
- ret = syncop_close (fd);
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG, "closing the fd failed");
-
- if (is_directory_empty && IS_ROOT_PATH (loc->path)) {
- pump_change_state (this, PUMP_STATE_RUNNING);
- gf_log (this->name, GF_LOG_INFO, "Empty source brick. "
- "Nothing to be done.");
- }
-
-out:
- if (entry_loc.path)
- loc_wipe (&entry_loc);
- if (free_entries)
- gf_dirent_free (&entries);
- return 0;
-}
-
-static int
-pump_update_resume_path (xlator_t *this)
-{
- const char *resume_path = NULL;
-
- resume_path = pump_get_resume_path (this);
-
- if (resume_path) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Found a path to resume from: %s",
- resume_path);
-
- }else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Did not find a path=> setting to '/'");
- pump_set_resume_path (this, "/");
- }
-
- pump_change_state (this, PUMP_STATE_RESUME);
-
- return 0;
-}
-
-static int32_t
-pump_xattr_cleaner (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- loc_t loc = {0};
- int i = 0;
- int ret = 0;
- int source = 0;
- int sink = 1;
-
- priv = this->private;
-
- afr_build_root_loc (this, &loc);
-
- ret = syncop_removexattr (priv->children[source], &loc,
- PUMP_PATH);
-
- ret = syncop_removexattr (priv->children[sink], &loc,
- PUMP_SINK_COMPLETE);
-
- for (i = 0; i < priv->child_count; i++) {
- ret = syncop_removexattr (priv->children[i], &loc,
- PUMP_SOURCE_COMPLETE);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG, "removexattr "
- "failed with %s", strerror (errno));
- }
-
- loc_wipe (&loc);
- return pump_command_reply (frame, this);
-}
-
-static int
-pump_complete_migration (xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
- dict_t *dict = NULL;
- pump_state_t state;
- loc_t loc = {0};
- int dict_ret = 0;
- int ret = -1;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- GF_ASSERT (priv->root_inode);
-
- afr_build_root_loc (this, &loc);
-
- dict = dict_new ();
-
- state = pump_get_state ();
- if (state == PUMP_STATE_RUNNING) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Pump finished pumping");
-
- pump_priv->pump_finished = _gf_true;
-
- dict_ret = dict_set_str (dict, PUMP_SOURCE_COMPLETE, "jargon");
- if (dict_ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set the key %s",
- loc.path, PUMP_SOURCE_COMPLETE);
-
- ret = syncop_setxattr (PUMP_SOURCE_CHILD (this), &loc, dict, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setxattr failed - while notifying source complete");
- }
- dict_ret = dict_set_str (dict, PUMP_SINK_COMPLETE, "jargon");
- if (dict_ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set the key %s",
- loc.path, PUMP_SINK_COMPLETE);
-
- ret = syncop_setxattr (PUMP_SINK_CHILD (this), &loc, dict, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "setxattr failed - while notifying sink complete");
- }
-
- pump_save_path (this, "/");
-
- } else if (state == PUMP_STATE_ABORT) {
- gf_log (this->name, GF_LOG_DEBUG, "Starting cleanup "
- "of pump internal xattrs");
- call_resume (pump_priv->cleaner);
- }
-
- loc_wipe (&loc);
- return 0;
-}
-
-static int
-pump_lookup_sink (loc_t *loc)
-{
- xlator_t *this = NULL;
- struct iatt iatt, parent;
- dict_t *xattr_rsp;
- dict_t *xattr_req = NULL;
- int ret = 0;
-
- this = THIS;
-
- xattr_req = dict_new ();
-
- ret = afr_set_root_gfid (xattr_req);
- if (ret)
- goto out;
-
- ret = syncop_lookup (PUMP_SINK_CHILD (this), loc,
- xattr_req, &iatt, &xattr_rsp, &parent);
-
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Lookup on sink child failed");
- goto out;
- }
-
-out:
- if (xattr_req)
- dict_unref (xattr_req);
-
- return ret;
-}
-
-static int
-pump_task (void *data)
-{
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
-
-
- loc_t loc = {0};
- struct iatt iatt, parent;
- dict_t *xattr_rsp = NULL;
- dict_t *xattr_req = NULL;
-
- int ret = -1;
-
- this = THIS;
- priv = this->private;
-
- GF_ASSERT (priv->root_inode);
-
- afr_build_root_loc (this, &loc);
- xattr_req = dict_new ();
- if (!xattr_req) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Out of memory");
- ret = -1;
- goto out;
- }
-
- afr_set_root_gfid (xattr_req);
- ret = syncop_lookup (this, &loc, xattr_req,
- &iatt, &xattr_rsp, &parent);
-
- gf_log (this->name, GF_LOG_TRACE,
- "lookup: path=%s gfid=%s",
- loc.path, uuid_utoa (loc.inode->gfid));
-
- ret = pump_check_and_update_status (this);
- if (ret < 0) {
- goto out;
- }
-
- pump_update_resume_path (this);
-
- afr_set_root_gfid (xattr_req);
- ret = pump_lookup_sink (&loc);
- if (ret) {
- pump_update_resume_path (this);
- goto out;
- }
-
- gf_pump_traverse_directory (&loc);
-
- pump_complete_migration (this);
-out:
- if (xattr_req)
- dict_unref (xattr_req);
-
- loc_wipe (&loc);
- return 0;
-}
-
-
-static int
-pump_task_completion (int ret, call_frame_t *sync_frame, void *data)
-{
- xlator_t *this = NULL;
- afr_private_t *priv = NULL;
-
- this = THIS;
-
- priv = this->private;
-
- inode_unref (priv->root_inode);
- STACK_DESTROY (sync_frame->root);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Pump xlator exiting");
- return 0;
-}
-
-int
-pump_start (call_frame_t *pump_frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- int ret = -1;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- afr_set_lk_owner (pump_frame, this, pump_frame->root);
- pump_pid = (uint64_t) (unsigned long)pump_frame->root;
-
- ret = synctask_new (pump_priv->env, pump_task,
- pump_task_completion,
- pump_frame, NULL);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "starting pump failed");
- pump_change_state (this, PUMP_STATE_ABORT);
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "setting pump as started lk_owner: %s %"PRIu64,
- lkowner_utoa (&pump_frame->root->lk_owner), pump_pid);
-
- priv->use_afr_in_pump = 1;
-out:
- return ret;
-}
-
-static int
-pump_start_synctask (xlator_t *this)
-{
- call_frame_t *frame = NULL;
- int ret = 0;
-
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- ret = -1;
- goto out;
- }
-
- pump_change_state (this, PUMP_STATE_RUNNING);
-
- ret = pump_start (frame, this);
-
-out:
- return ret;
-}
-
-int32_t
-pump_cmd_start_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-
-{
- call_frame_t *prev = NULL;
- afr_local_t *local = NULL;
- int ret = 0;
-
- local = frame->local;
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not initiate destination "
- "brick connect");
- ret = op_ret;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Successfully initiated destination "
- "brick connect");
-
- pump_mark_start_pending (this);
-
- /* send the PARENT_UP as pump is ready now */
- prev = cookie;
- if (prev && prev->this)
- prev->this->notify (prev->this, GF_EVENT_PARENT_UP, this);
-
-out:
- local->op_ret = ret;
- pump_command_reply (frame, this);
-
- return 0;
-}
-
-static int
-pump_initiate_sink_connect (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
- dict_t *dict = NULL;
- data_t *data = NULL;
- char *clnt_cmd = NULL;
- loc_t loc = {0};
-
- int ret = 0;
-
- priv = this->private;
- local = frame->local;
-
- GF_ASSERT (priv->root_inode);
-
- afr_build_root_loc (this, &loc);
-
- data = data_ref (dict_get (local->dict, RB_PUMP_CMD_START));
- if (!data) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "Could not get destination brick value");
- goto out;
- }
-
- dict = dict_new ();
- if (!dict) {
- ret = -1;
- goto out;
- }
-
- clnt_cmd = GF_CALLOC (1, data->len+1, gf_common_mt_char);
- if (!clnt_cmd) {
- ret = -1;
- goto out;
- }
-
- memcpy (clnt_cmd, data->data, data->len);
- clnt_cmd[data->len] = '\0';
- gf_log (this->name, GF_LOG_DEBUG, "Got destination brick %s\n",
- clnt_cmd);
-
- ret = dict_set_dynstr (dict, CLIENT_CMD_CONNECT, clnt_cmd);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not inititiate destination brick "
- "connect");
- goto out;
- }
-
- STACK_WIND (frame,
- pump_cmd_start_setxattr_cbk,
- PUMP_SINK_CHILD(this),
- PUMP_SINK_CHILD(this)->fops->setxattr,
- &loc,
- dict,
- 0, NULL);
-
- ret = 0;
-
-out:
- if (dict)
- dict_unref (dict);
-
- if (data)
- data_unref (data);
-
- if (ret && clnt_cmd)
- GF_FREE (clnt_cmd);
-
- loc_wipe (&loc);
- return ret;
-}
-
-static int
-is_pump_aborted (xlator_t *this)
-{
- pump_state_t state;
-
- state = pump_get_state ();
-
- return ((state == PUMP_STATE_ABORT));
-}
-
-int32_t
-pump_cmd_start_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict, dict_t *xdata)
-{
- afr_local_t *local = NULL;
- char *path = NULL;
-
- pump_state_t state;
- int ret = 0;
- int need_unwind = 0;
- int dict_ret = -1;
-
- local = frame->local;
-
- if (op_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "getxattr failed - changing pump "
- "state to RUNNING with '/'");
- path = "/";
- ret = op_ret;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "getxattr succeeded");
-
- dict_ret = dict_get_str (dict, PUMP_PATH, &path);
- if (dict_ret < 0)
- path = "/";
- }
-
- state = pump_get_state ();
- if ((state == PUMP_STATE_RUNNING) ||
- (state == PUMP_STATE_RESUME)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Pump is already started");
- ret = -1;
- goto out;
- }
-
- pump_set_resume_path (this, path);
-
- if (is_pump_aborted (this))
- /* We're re-starting pump afresh */
- ret = pump_initiate_sink_connect (frame, this);
- else {
- /* We're re-starting pump from a previous
- pause */
- gf_log (this->name, GF_LOG_DEBUG,
- "about to start synctask");
- ret = pump_start_synctask (this);
- need_unwind = 1;
- }
-
-out:
- if ((ret < 0) || (need_unwind == 1)) {
- local->op_ret = ret;
- pump_command_reply (frame, this);
- }
- return 0;
-}
-
-int
-pump_execute_status (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- uint64_t number_files = 0;
-
- char filename[PATH_MAX];
- char *dict_str = NULL;
-
- int32_t op_ret = 0;
- int32_t op_errno = 0;
-
- dict_t *dict = NULL;
- int ret = -1;
-
- priv = this->private;
- pump_priv = priv->pump_private;
-
- LOCK (&pump_priv->resume_path_lock);
- {
- number_files = pump_priv->number_files_pumped;
- strncpy (filename, pump_priv->current_file, PATH_MAX);
- }
- UNLOCK (&pump_priv->resume_path_lock);
-
- dict_str = GF_CALLOC (1, PATH_MAX + 256, gf_afr_mt_char);
- if (!dict_str) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- if (pump_priv->pump_finished) {
- snprintf (dict_str, PATH_MAX + 256, "Number of files migrated = %"PRIu64" Migration complete ",
- number_files);
- } else {
- snprintf (dict_str, PATH_MAX + 256, "Number of files migrated = %"PRIu64" Current file= %s ",
- number_files, filename);
- }
-
- dict = dict_new ();
-
- ret = dict_set_dynstr (dict, RB_PUMP_CMD_STATUS, dict_str);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_set_dynstr returned negative value");
- } else {
- dict_str = NULL;
- }
-
- op_ret = 0;
-
-out:
-
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
-
- if (dict)
- dict_unref (dict);
-
- if (dict_str)
- GF_FREE (dict_str);
-
- return 0;
-}
-
-int
-pump_execute_pause (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- pump_change_state (this, PUMP_STATE_PAUSE);
-
- local->op_ret = 0;
- pump_command_reply (frame, this);
-
- return 0;
-}
-
-int
-pump_execute_start (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
-
- int ret = 0;
- loc_t loc = {0};
-
- priv = this->private;
- local = frame->local;
-
- if (!priv->root_inode) {
- gf_log (this->name, GF_LOG_ERROR,
- "Pump xlator cannot be started without an initial "
- "lookup");
- ret = -1;
- goto out;
- }
-
- GF_ASSERT (priv->root_inode);
-
- afr_build_root_loc (this, &loc);
-
- STACK_WIND (frame,
- pump_cmd_start_getxattr_cbk,
- PUMP_SOURCE_CHILD(this),
- PUMP_SOURCE_CHILD(this)->fops->getxattr,
- &loc,
- PUMP_PATH, NULL);
-
- ret = 0;
-
-out:
- if (ret < 0) {
- local->op_ret = ret;
- pump_command_reply (frame, this);
- }
-
- loc_wipe (&loc);
- return 0;
-}
-
-static int
-pump_cleanup_helper (void *data) {
- call_frame_t *frame = data;
-
- pump_xattr_cleaner (frame, 0, frame->this, 0, 0, NULL);
-
- return 0;
-}
-
-static int
-pump_cleanup_done (int ret, call_frame_t *sync_frame, void *data)
-{
- STACK_DESTROY (sync_frame->root);
-
- return 0;
-}
-
-int
-pump_execute_commit (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *sync_frame = NULL;
- int ret = 0;
-
- priv = this->private;
- pump_priv = priv->pump_private;
- local = frame->local;
-
- local->op_ret = 0;
- if (pump_priv->pump_finished) {
- pump_change_state (this, PUMP_STATE_COMMIT);
- sync_frame = create_frame (this, this->ctx->pool);
- ret = synctask_new (pump_priv->env, pump_cleanup_helper,
- pump_cleanup_done, sync_frame, frame);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG, "Couldn't create "
- "synctask for cleaning up xattrs.");
- }
-
- } else {
- gf_log (this->name, GF_LOG_ERROR, "Commit can't proceed. "
- "Migration in progress");
- local->op_ret = -1;
- local->op_errno = EINPROGRESS;
- pump_command_reply (frame, this);
- }
-
- return 0;
-}
-int
-pump_execute_abort (call_frame_t *frame, xlator_t *this)
-{
- afr_private_t *priv = NULL;
- pump_private_t *pump_priv = NULL;
- afr_local_t *local = NULL;
- call_frame_t *sync_frame = NULL;
- int ret = 0;
-
- priv = this->private;
- pump_priv = priv->pump_private;
- local = frame->local;
-
- pump_change_state (this, PUMP_STATE_ABORT);
-
- LOCK (&pump_priv->resume_path_lock);
- {
- pump_priv->number_files_pumped = 0;
- pump_priv->current_file[0] = '\0';
- }
- UNLOCK (&pump_priv->resume_path_lock);
-
- local->op_ret = 0;
- if (pump_priv->pump_finished) {
- sync_frame = create_frame (this, this->ctx->pool);
- ret = synctask_new (pump_priv->env, pump_cleanup_helper,
- pump_cleanup_done, sync_frame, frame);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG, "Couldn't create "
- "synctask for cleaning up xattrs.");
- }
-
- } else {
- pump_priv->cleaner = fop_setxattr_cbk_stub (frame,
- pump_xattr_cleaner,
- 0, 0, NULL);
- }
-
- return 0;
-}
-
-gf_boolean_t
-pump_command_status (xlator_t *this, dict_t *dict)
-{
- char *cmd = NULL;
- int dict_ret = -1;
- int ret = _gf_true;
-
- dict_ret = dict_get_str (dict, RB_PUMP_CMD_STATUS, &cmd);
- if (dict_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not a pump status command");
- ret = _gf_false;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit a pump command - status");
- ret = _gf_true;
-
-out:
- return ret;
-
-}
-
-gf_boolean_t
-pump_command_pause (xlator_t *this, dict_t *dict)
-{
- char *cmd = NULL;
- int dict_ret = -1;
- int ret = _gf_true;
-
- dict_ret = dict_get_str (dict, RB_PUMP_CMD_PAUSE, &cmd);
- if (dict_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not a pump pause command");
- ret = _gf_false;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit a pump command - pause");
- ret = _gf_true;
-
-out:
- return ret;
-
-}
-
-gf_boolean_t
-pump_command_commit (xlator_t *this, dict_t *dict)
-{
- char *cmd = NULL;
- int dict_ret = -1;
- int ret = _gf_true;
-
- dict_ret = dict_get_str (dict, RB_PUMP_CMD_COMMIT, &cmd);
- if (dict_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not a pump commit command");
- ret = _gf_false;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit a pump command - commit");
- ret = _gf_true;
-
-out:
- return ret;
-
-}
-
-gf_boolean_t
-pump_command_abort (xlator_t *this, dict_t *dict)
-{
- char *cmd = NULL;
- int dict_ret = -1;
- int ret = _gf_true;
-
- dict_ret = dict_get_str (dict, RB_PUMP_CMD_ABORT, &cmd);
- if (dict_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not a pump abort command");
- ret = _gf_false;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit a pump command - abort");
- ret = _gf_true;
-
-out:
- return ret;
-
-}
-
-gf_boolean_t
-pump_command_start (xlator_t *this, dict_t *dict)
-{
- char *cmd = NULL;
- int dict_ret = -1;
- int ret = _gf_true;
-
- dict_ret = dict_get_str (dict, RB_PUMP_CMD_START, &cmd);
- if (dict_ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Not a pump start command");
- ret = _gf_false;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit a pump command - start");
- ret = _gf_true;
-
-out:
- return ret;
-
-}
-
-struct _xattr_key {
- char *key;
- struct list_head list;
-};
-
-static void
-__gather_xattr_keys (dict_t *dict, char *key, data_t *value,
- void *data)
-{
- struct list_head * list = data;
- struct _xattr_key * xkey = NULL;
-
- if (!strncmp (key, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
-
- xkey = GF_CALLOC (1, sizeof (*xkey), gf_afr_mt_xattr_key);
- if (!xkey)
- return;
-
- xkey->key = key;
- INIT_LIST_HEAD (&xkey->list);
-
- list_add_tail (&xkey->list, list);
- }
-}
-
-static void
-__filter_xattrs (dict_t *dict)
-{
- struct list_head keys;
-
- struct _xattr_key *key;
- struct _xattr_key *tmp;
-
- INIT_LIST_HEAD (&keys);
-
- dict_foreach (dict, __gather_xattr_keys,
- (void *) &keys);
-
- list_for_each_entry_safe (key, tmp, &keys, list) {
- dict_del (dict, key->key);
-
- list_del_init (&key->list);
-
- GF_FREE (key);
- }
-}
-
-int32_t
-pump_getxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- afr_local_t *local = NULL;
- xlator_t **children = NULL;
- int unwind = 1;
- int32_t *last_index = NULL;
- int32_t next_call_child = -1;
- int32_t read_child = -1;
- int32_t *fresh_children = NULL;
-
-
- priv = this->private;
- children = priv->children;
-
- local = frame->local;
-
- read_child = (long) cookie;
-
- if (op_ret == -1) {
- last_index = &local->cont.getxattr.last_index;
- fresh_children = local->fresh_children;
- next_call_child = afr_next_call_child (fresh_children,
- local->child_up,
- priv->child_count,
- last_index, read_child);
- if (next_call_child < 0)
- goto out;
-
- unwind = 0;
- STACK_WIND_COOKIE (frame, pump_getxattr_cbk,
- (void *) (long) read_child,
- children[next_call_child],
- children[next_call_child]->fops->getxattr,
- &local->loc,
- local->cont.getxattr.name, NULL);
- }
-
-out:
- if (unwind) {
- if (op_ret >= 0 && dict)
- __filter_xattrs (dict);
-
- AFR_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, NULL);
- }
-
- return 0;
-}
-
-int32_t
-pump_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
-{
- afr_private_t * priv = NULL;
- xlator_t ** children = NULL;
- int call_child = 0;
- afr_local_t *local = NULL;
- int32_t ret = -1;
- int32_t op_errno = 0;
- uint64_t read_child = 0;
-
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- priv = this->private;
- VALIDATE_OR_GOTO (priv->children, out);
-
- children = priv->children;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_getxattr_cbk,
- FIRST_CHILD (this),
- (FIRST_CHILD (this))->fops->getxattr,
- loc, name, xdata);
- return 0;
- }
-
-
- AFR_LOCAL_ALLOC_OR_GOTO (frame->local, out);
- local = frame->local;
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0)
- goto out;
-
- if (name) {
- if (!strncmp (name, AFR_XATTR_PREFIX,
- strlen (AFR_XATTR_PREFIX))) {
-
- op_errno = ENODATA;
- goto out;
- }
-
- if (!strcmp (name, RB_PUMP_CMD_STATUS)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Hit pump command - status");
- pump_execute_status (frame, this);
- ret = 0;
- goto out;
- }
- }
-
- local->fresh_children = GF_CALLOC (priv->child_count,
- sizeof (*local->fresh_children),
- gf_afr_mt_int32_t);
- if (!local->fresh_children) {
- ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
-
- read_child = afr_inode_get_read_ctx (this, loc->inode, local->fresh_children);
- ret = afr_get_call_child (this, local->child_up, read_child,
- local->fresh_children,
- &call_child,
- &local->cont.getxattr.last_index);
- if (ret < 0) {
- op_errno = -ret;
- goto out;
- }
- loc_copy (&local->loc, loc);
- if (name)
- local->cont.getxattr.name = gf_strdup (name);
-
- STACK_WIND_COOKIE (frame, pump_getxattr_cbk,
- (void *) (long) call_child,
- children[call_child], children[call_child]->fops->getxattr,
- loc, name, xdata);
-
- ret = 0;
-out:
- if (ret < 0)
- AFR_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-static int
-afr_setxattr_unwind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = NULL;
- call_frame_t *main_frame = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (local->transaction.main_frame)
- main_frame = local->transaction.main_frame;
- local->transaction.main_frame = NULL;
- }
- UNLOCK (&frame->lock);
-
- if (main_frame) {
- AFR_STACK_UNWIND (setxattr, main_frame,
- local->op_ret, local->op_errno, NULL);
- }
- return 0;
-}
-
-static int
-afr_setxattr_wind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- afr_local_t * local = NULL;
- afr_private_t * priv = NULL;
-
- int call_count = -1;
- int need_unwind = 0;
-
- local = frame->local;
- priv = this->private;
-
- LOCK (&frame->lock);
- {
- if (op_ret != -1) {
- if (local->success_count == 0) {
- local->op_ret = op_ret;
- }
- local->success_count++;
-
- if (local->success_count == priv->child_count) {
- need_unwind = 1;
- }
- }
-
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (need_unwind)
- local->transaction.unwind (frame, this);
-
- call_count = afr_frame_return (frame);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- }
-
- return 0;
-}
-
-static int
-afr_setxattr_wind (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
- afr_private_t *priv = NULL;
-
- int call_count = -1;
- int i = 0;
-
- local = frame->local;
- priv = this->private;
-
- call_count = afr_up_children_count (local->child_up, priv->child_count);
-
- if (call_count == 0) {
- local->transaction.resume (frame, this);
- return 0;
- }
-
- local->call_count = call_count;
-
- for (i = 0; i < priv->child_count; i++) {
- if (local->child_up[i]) {
- STACK_WIND_COOKIE (frame, afr_setxattr_wind_cbk,
- (void *) (long) i,
- priv->children[i],
- priv->children[i]->fops->setxattr,
- &local->loc,
- local->cont.setxattr.dict,
- local->cont.setxattr.flags, NULL);
-
- if (!--call_count)
- break;
- }
- }
-
- return 0;
-}
-
-
-static int
-afr_setxattr_done (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t * local = frame->local;
-
- local->transaction.unwind (frame, this);
-
- AFR_STACK_DESTROY (frame);
-
- return 0;
-}
-
-int32_t
-pump_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- AFR_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int
-pump_command_reply (call_frame_t *frame, xlator_t *this)
-{
- afr_local_t *local = NULL;
-
- local = frame->local;
-
- if (local->op_ret < 0)
- gf_log (this->name, GF_LOG_INFO,
- "Command failed");
- else
- gf_log (this->name, GF_LOG_INFO,
- "Command succeeded");
-
- AFR_STACK_UNWIND (setxattr,
- frame,
- local->op_ret,
- local->op_errno, NULL);
-
- return 0;
-}
-
-int
-pump_parse_command (call_frame_t *frame, xlator_t *this,
- afr_local_t *local, dict_t *dict)
-{
-
- int ret = -1;
-
- if (pump_command_start (this, dict)) {
- frame->local = local;
- local->dict = dict_ref (dict);
- ret = pump_execute_start (frame, this);
-
- } else if (pump_command_pause (this, dict)) {
- frame->local = local;
- local->dict = dict_ref (dict);
- ret = pump_execute_pause (frame, this);
-
- } else if (pump_command_abort (this, dict)) {
- frame->local = local;
- local->dict = dict_ref (dict);
- ret = pump_execute_abort (frame, this);
-
- } else if (pump_command_commit (this, dict)) {
- frame->local = local;
- local->dict = dict_ref (dict);
- ret = pump_execute_commit (frame, this);
- }
- return ret;
-}
-
-int
-pump_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
-{
- afr_private_t * priv = NULL;
- afr_local_t * local = NULL;
- call_frame_t *transaction_frame = NULL;
- data_pair_t * trav = NULL;
- int ret = -1;
- int op_errno = 0;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (this->private, out);
-
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.pump*", dict,
- trav, op_errno, out);
-
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_setxattr_cbk,
- FIRST_CHILD (this),
- (FIRST_CHILD (this))->fops->setxattr,
- loc, dict, flags, xdata);
- return 0;
- }
-
-
- AFR_LOCAL_ALLOC_OR_GOTO (local, out);
-
- ret = afr_local_init (local, priv, &op_errno);
- if (ret < 0) {
- afr_local_cleanup (local, this);
- goto out;
- }
-
- ret = pump_parse_command (frame, this,
- local, dict);
- if (ret >= 0) {
- ret = 0;
- goto out;
- }
-
- transaction_frame = copy_frame (frame);
- if (!transaction_frame) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- ret = -1;
- afr_local_cleanup (local, this);
- goto out;
- }
-
- transaction_frame->local = local;
-
- local->op_ret = -1;
-
- local->cont.setxattr.dict = dict_ref (dict);
- local->cont.setxattr.flags = flags;
-
- local->transaction.fop = afr_setxattr_wind;
- local->transaction.done = afr_setxattr_done;
- local->transaction.unwind = afr_setxattr_unwind;
-
- loc_copy (&local->loc, loc);
-
- local->transaction.main_frame = frame;
- local->transaction.start = LLONG_MAX - 1;
- local->transaction.len = 0;
-
- afr_transaction (transaction_frame, this, AFR_METADATA_TRANSACTION);
-
- ret = 0;
-out:
- if (ret < 0) {
- if (transaction_frame)
- AFR_STACK_DESTROY (transaction_frame);
- AFR_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
- }
-
- return 0;
-}
-
-/* Defaults */
-static int32_t
-pump_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc,
- xattr_req);
- return 0;
- }
-
- afr_lookup (frame, this, loc, xattr_req);
- return 0;
-}
-
-
-static int32_t
-pump_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc,
- offset, xdata);
- return 0;
- }
-
- afr_truncate (frame, this, loc, offset, xdata);
- return 0;
-}
-
-
-static int32_t
-pump_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset, xdata);
- return 0;
- }
-
- afr_ftruncate (frame, this, fd, offset, xdata);
- return 0;
-}
-
-
-
-
-int
-pump_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_mknod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, umask, xdata);
- return 0;
- }
- afr_mknod (frame, this, loc, mode, rdev, umask, xdata);
- return 0;
-
-}
-
-
-
-int
-pump_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_mkdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- loc, mode, umask, xdata);
- return 0;
- }
- afr_mkdir (frame, this, loc, mode, umask, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc, xflag, xdata);
- return 0;
- }
- afr_unlink (frame, this, loc, xflag, xdata);
- return 0;
-
-}
-
-
-static int
-pump_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
-
- priv = this->private;
-
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_rmdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- loc, flags, xdata);
- return 0;
- }
-
- afr_rmdir (frame, this, loc, flags, xdata);
- return 0;
-
-}
-
-
-
-int
-pump_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, umask, xdata);
- return 0;
- }
- afr_symlink (frame, this, linkpath, loc, umask, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_rename_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename,
- oldloc, newloc, xdata);
- return 0;
- }
- afr_rename (frame, this, oldloc, newloc, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_link_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link,
- oldloc, newloc, xdata);
- return 0;
- }
- afr_link (frame, this, oldloc, newloc, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame, default_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, umask, fd, xdata);
- return 0;
- }
- afr_create (frame, this, loc, flags, mode, umask, fd, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_open_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc, flags, fd, xdata);
- return 0;
- }
- afr_open (frame, this, loc, flags, fd, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd,
- vector,
- count,
- off, flags,
- iobref, xdata);
- return 0;
- }
-
- afr_writev (frame, this, fd, vector, count, off, flags, iobref, xdata);
- return 0;
-}
-
-
-static int32_t
-pump_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_flush_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd, xdata);
- return 0;
- }
- afr_flush (frame, this, fd, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd,
- flags, xdata);
- return 0;
- }
- afr_fsync (frame, this, fd, flags, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_opendir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir,
- loc, fd, xdata);
- return 0;
- }
- afr_opendir (frame, this, loc, fd, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_fsyncdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir,
- fd,
- flags, xdata);
- return 0;
- }
- afr_fsyncdir (frame, this, fd, flags, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_xattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop,
- loc,
- flags,
- dict, xdata);
- return 0;
- }
- afr_xattrop (frame, this, loc, flags, dict, xdata);
- return 0;
-
-}
-
-static int32_t
-pump_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_fxattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop,
- fd,
- flags,
- dict, xdata);
- return 0;
- }
- afr_fxattrop (frame, this, fd, flags, dict, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (this, out);
-
- GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.pump*",
- name, op_errno, out);
-
- op_errno = 0;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_removexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- loc,
- name, xdata);
- return 0;
- }
- afr_removexattr (frame, this, loc, name, xdata);
-
- out:
- if (op_errno)
- AFR_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
- return 0;
-
-}
-
-
-
-static int32_t
-pump_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t off, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_readdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir,
- fd, size, off, xdata);
- return 0;
- }
- afr_readdir (frame, this, fd, size, off, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off, dict_t *dict)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_readdirp_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp,
- fd, size, off, dict);
- return 0;
- }
- afr_readdirp (frame, this, fd, size, off, dict);
- return 0;
-
-}
-
-
-
-static int32_t
-pump_releasedir (xlator_t *this,
- fd_t *fd)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (priv->use_afr_in_pump)
- afr_releasedir (this, fd);
- return 0;
-
-}
-
-static int32_t
-pump_release (xlator_t *this,
- fd_t *fd)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (priv->use_afr_in_pump)
- afr_release (this, fd);
- return 0;
-
-}
-
-static int32_t
-pump_forget (xlator_t *this, inode_t *inode)
-{
- afr_private_t *priv = NULL;
-
- priv = this->private;
- if (priv->use_afr_in_pump)
- afr_forget (this, inode);
-
- return 0;
-}
-
-static int32_t
-pump_setattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_setattr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr,
- loc, stbuf, valid, xdata);
- return 0;
- }
- afr_setattr (frame, this, loc, stbuf, valid, xdata);
- return 0;
-
-}
-
-
-static int32_t
-pump_fsetattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
-{
- afr_private_t *priv = NULL;
- priv = this->private;
- if (!priv->use_afr_in_pump) {
- STACK_WIND (frame,
- default_fsetattr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr,
- fd, stbuf, valid, xdata);
- return 0;
- }
- afr_fsetattr (frame, this, fd, stbuf, valid, xdata);
- return 0;
-
-}
-
-
-/* End of defaults */
-
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_afr_mt_end + 1);
-
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-static int
-is_xlator_pump_sink (xlator_t *child)
-{
- return (child == PUMP_SINK_CHILD(THIS));
-}
-
-static int
-is_xlator_pump_source (xlator_t *child)
-{
- return (child == PUMP_SOURCE_CHILD(THIS));
-}
-
-int32_t
-notify (xlator_t *this, int32_t event,
- void *data, ...)
-{
- int ret = -1;
- xlator_t *child_xl = NULL;
-
- child_xl = (xlator_t *) data;
-
- ret = afr_notify (this, event, data, NULL);
-
- switch (event) {
- case GF_EVENT_CHILD_DOWN:
- if (is_xlator_pump_source (child_xl))
- pump_change_state (this, PUMP_STATE_ABORT);
- break;
-
- case GF_EVENT_CHILD_UP:
- if (is_xlator_pump_sink (child_xl))
- if (is_pump_start_pending (this)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "about to start synctask");
- ret = pump_start_synctask (this);
- if (ret < 0)
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not start pump "
- "synctask");
- else
- pump_remove_start_pending (this);
- }
- }
-
- return ret;
-}
-
-int32_t
-init (xlator_t *this)
-{
- afr_private_t * priv = NULL;
- pump_private_t *pump_priv = NULL;
- int child_count = 0;
- xlator_list_t * trav = NULL;
- int i = 0;
- int ret = -1;
- GF_UNUSED int op_errno = 0;
-
- int source_child = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "pump translator needs a source and sink"
- "subvolumes defined.");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling.");
- }
-
- this->private = GF_CALLOC (1, sizeof (afr_private_t),
- gf_afr_mt_afr_private_t);
- if (!this->private)
- goto out;
-
- priv = this->private;
- LOCK_INIT (&priv->lock);
- LOCK_INIT (&priv->read_child_lock);
- //lock recovery is not done in afr
- pthread_mutex_init (&priv->mutex, NULL);
- INIT_LIST_HEAD (&priv->saved_fds);
-
- child_count = xlator_subvolume_count (this);
- if (child_count != 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "There should be exactly 2 children - one source "
- "and one sink");
- return -1;
- }
- priv->child_count = child_count;
-
- priv->read_child = source_child;
- priv->favorite_child = source_child;
- priv->background_self_heal_count = 0;
-
- priv->data_self_heal = "on";
- priv->metadata_self_heal = 1;
- priv->entry_self_heal = 1;
-
- priv->data_self_heal_algorithm = "";
-
- priv->data_self_heal_window_size = 16;
-
- priv->data_change_log = 1;
- priv->metadata_change_log = 1;
- priv->entry_change_log = 1;
- priv->use_afr_in_pump = 1;
-
- /* Locking options */
-
- /* Lock server count infact does not matter. Locks are held
- on all subvolumes, in this case being the source
- and the sink.
- */
-
- priv->strict_readdir = _gf_false;
-
- priv->wait_count = 1;
- priv->child_up = GF_CALLOC (sizeof (unsigned char), child_count,
- gf_afr_mt_char);
- if (!priv->child_up) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto out;
- }
-
- priv->children = GF_CALLOC (sizeof (xlator_t *), child_count,
- gf_afr_mt_xlator_t);
- if (!priv->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto out;
- }
-
- priv->pending_key = GF_CALLOC (sizeof (*priv->pending_key),
- child_count,
- gf_afr_mt_char);
- if (!priv->pending_key) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_errno = ENOMEM;
- goto out;
- }
-
- trav = this->children;
- i = 0;
- while (i < child_count) {
- priv->children[i] = trav->xlator;
-
- ret = gf_asprintf (&priv->pending_key[i], "%s.%s", AFR_XATTR_PREFIX,
- trav->xlator->name);
- if (-1 == ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "asprintf failed to set pending key");
- op_errno = ENOMEM;
- goto out;
- }
-
- trav = trav->next;
- i++;
- }
-
- priv->first_lookup = 1;
- priv->root_inode = NULL;
-
- priv->last_event = GF_CALLOC (child_count, sizeof (*priv->last_event),
- gf_afr_mt_int32_t);
- if (!priv->last_event) {
- ret = -ENOMEM;
- goto out;
- }
-
- pump_priv = GF_CALLOC (1, sizeof (*pump_priv),
- gf_afr_mt_pump_priv);
- if (!pump_priv) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- op_errno = ENOMEM;
- goto out;
- }
-
- LOCK_INIT (&pump_priv->resume_path_lock);
- LOCK_INIT (&pump_priv->pump_state_lock);
-
- pump_priv->resume_path = GF_CALLOC (1, PATH_MAX,
- gf_afr_mt_char);
- if (!pump_priv->resume_path) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory");
- ret = -1;
- goto out;
- }
-
- pump_priv->env = syncenv_new (0);
- if (!pump_priv->env) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not create new sync-environment");
- ret = -1;
- goto out;
- }
-
- /* keep more local here as we may need them for self-heal etc */
- this->local_pool = mem_pool_new (afr_local_t, 128);
- if (!this->local_pool) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto out;
- }
-
- priv->pump_private = pump_priv;
-
- pump_change_state (this, PUMP_STATE_ABORT);
-
- ret = 0;
-out:
- return ret;
-}
-
-int
-fini (xlator_t *this)
-{
- afr_private_t * priv = NULL;
- pump_private_t *pump_priv = NULL;
-
- priv = this->private;
- this->private = NULL;
- if (!priv)
- goto out;
-
- pump_priv = priv->pump_private;
- if (!pump_priv)
- goto afr_priv;
-
- if (pump_priv->env)
- syncenv_destroy (pump_priv->env);
-
- GF_FREE (pump_priv->resume_path);
- LOCK_DESTROY (&pump_priv->resume_path_lock);
- LOCK_DESTROY (&pump_priv->pump_state_lock);
- GF_FREE (pump_priv);
-afr_priv:
- afr_priv_destroy (priv);
-out:
- return 0;
-}
-
-
-struct xlator_fops fops = {
- .lookup = pump_lookup,
- .open = pump_open,
- .flush = pump_flush,
- .fsync = pump_fsync,
- .fsyncdir = pump_fsyncdir,
- .xattrop = pump_xattrop,
- .fxattrop = pump_fxattrop,
- .getxattr = pump_getxattr,
-
- /* inode write */
- .writev = pump_writev,
- .truncate = pump_truncate,
- .ftruncate = pump_ftruncate,
- .setxattr = pump_setxattr,
- .setattr = pump_setattr,
- .fsetattr = pump_fsetattr,
- .removexattr = pump_removexattr,
-
- /* dir read */
- .opendir = pump_opendir,
- .readdir = pump_readdir,
- .readdirp = pump_readdirp,
-
- /* dir write */
- .create = pump_create,
- .mknod = pump_mknod,
- .mkdir = pump_mkdir,
- .unlink = pump_unlink,
- .rmdir = pump_rmdir,
- .link = pump_link,
- .symlink = pump_symlink,
- .rename = pump_rename,
-};
-
-struct xlator_dumpops dumpops = {
- .priv = afr_priv_dump,
-};
-
-
-struct xlator_cbks cbks = {
- .release = pump_release,
- .releasedir = pump_releasedir,
- .forget = pump_forget,
-};
-
-struct volume_options options[] = {
- { .key = {NULL} },
-};
diff --git a/xlators/cluster/afr/src/pump.h b/xlators/cluster/afr/src/pump.h
deleted file mode 100644
index bc4c31a78a5..00000000000
--- a/xlators/cluster/afr/src/pump.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#ifndef __PUMP_H__
-#define __PUMP_H__
-
-#include "syncop.h"
-
-/* FIXME: Needs to be defined in a common file */
-#define CLIENT_CMD_CONNECT "trusted.glusterfs.client-connect"
-#define CLIENT_CMD_DISCONNECT "trusted.glusterfs.client-disconnect"
-
-#define PUMP_SOURCE_COMPLETE "trusted.glusterfs.pump-source-complete"
-#define PUMP_SINK_COMPLETE "trusted.glusterfs.pump-sink-complete"
-
-#define PUMP_PATH "trusted.glusterfs.pump-path"
-
-#define PUMP_SOURCE_CHILD(xl) (xl->children->xlator)
-#define PUMP_SINK_CHILD(xl) (xl->children->next->xlator)
-
-typedef enum {
- PUMP_STATE_RUNNING, /* Pump is running and migrating files */
- PUMP_STATE_RESUME, /* Pump is resuming from a previous pause */
- PUMP_STATE_PAUSE, /* Pump is paused */
- PUMP_STATE_ABORT, /* Pump is aborted */
- PUMP_STATE_COMMIT, /* Pump is commited */
-} pump_state_t;
-
-typedef struct _pump_private {
- struct syncenv *env; /* The env pointer to the pump synctask */
- char *resume_path; /* path to resume from the last pause */
- gf_lock_t resume_path_lock; /* Synchronize resume_path changes */
- gf_lock_t pump_state_lock; /* Synchronize pump_state changes */
- pump_state_t pump_state; /* State of pump */
- char current_file[PATH_MAX]; /* Current file being pumped */
- uint64_t number_files_pumped; /* Number of files pumped */
- gf_boolean_t pump_finished; /* Boolean to indicate pump termination */
- char pump_start_pending; /* Boolean to mark start pending until
- CHILD_UP */
- call_stub_t *cleaner;
-} pump_private_t;
-
-void
-build_root_loc (inode_t *inode, loc_t *loc);
-int pump_start (call_frame_t *frame, xlator_t *this);
-
-gf_boolean_t
-pump_command_start (xlator_t *this, dict_t *dict);
-
-int
-pump_execute_start (call_frame_t *frame, xlator_t *this);
-
-gf_boolean_t
-pump_command_pause (xlator_t *this, dict_t *dict);
-
-int
-pump_execute_pause (call_frame_t *frame, xlator_t *this);
-
-gf_boolean_t
-pump_command_abort (xlator_t *this, dict_t *dict);
-
-int
-pump_execute_abort (call_frame_t *frame, xlator_t *this);
-
-gf_boolean_t
-pump_command_status (xlator_t *this, dict_t *dict);
-
-int
-pump_execute_status (call_frame_t *frame, xlator_t *this);
-
-#endif /* __PUMP_H__ */
diff --git a/xlators/cluster/dht/src/Makefile.am b/xlators/cluster/dht/src/Makefile.am
index e35058d65d4..56f1f2ad7c8 100644
--- a/xlators/cluster/dht/src/Makefile.am
+++ b/xlators/cluster/dht/src/Makefile.am
@@ -1,32 +1,37 @@
-
xlator_LTLIBRARIES = dht.la nufa.la switch.la
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
dht_common_source = dht-layout.c dht-helper.c dht-linkfile.c dht-rebalance.c \
dht-selfheal.c dht-rename.c dht-hashfn.c dht-diskusage.c \
- dht-common.c dht-inode-write.c dht-inode-read.c \
- $(top_builddir)/xlators/lib/src/libxlator.c
+ dht-common.c dht-inode-write.c dht-inode-read.c dht-shared.c \
+ dht-lock.c $(top_builddir)/xlators/lib/src/libxlator.c
dht_la_SOURCES = $(dht_common_source) dht.c
nufa_la_SOURCES = $(dht_common_source) nufa.c
switch_la_SOURCES = $(dht_common_source) switch.c
-dht_la_LDFLAGS = -module -avoidversion
+dht_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
dht_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-nufa_la_LDFLAGS = -module -avoidversion
+nufa_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
nufa_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-switch_la_LDFLAGS = -module -avoidversion
+switch_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
switch_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = dht-common.h dht-mem-types.h \
- $(top_builddir)/xlators/lib/src/libxlator.h
+noinst_HEADERS = dht-common.h dht-mem-types.h dht-messages.h \
+ dht-lock.h $(top_builddir)/xlators/lib/src/libxlator.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/lib/src
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/xlators/lib/src \
+ -DDATADIR=\"$(localstatedir)\" \
+ -DLIBDIR=\"$(libdir)\"
CLEANFILES =
@@ -35,3 +40,9 @@ uninstall-local:
install-data-hook:
ln -sf dht.so $(DESTDIR)$(xlatordir)/distribute.so
+
+if UNITTEST
+CLEANFILES += *.gcda *.gcno *_xunit.xml
+noinst_PROGRAMS =
+TESTS =
+endif
diff --git a/xlators/cluster/dht/src/dht-common.c b/xlators/cluster/dht/src/dht-common.c
index eaafc2bfb3e..8ba0cc4c732 100644
--- a/xlators/cluster/dht/src/dht-common.c
+++ b/xlators/cluster/dht/src/dht-common.c
@@ -8,90 +8,439 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
/* TODO: add NS locking */
-#include "glusterfs.h"
-#include "xlator.h"
#include "libxlator.h"
#include "dht-common.h"
-#include "defaults.h"
-#include "byte-order.h"
+#include "dht-lock.h"
+#include <glusterfs/byte-order.h>
+#include <glusterfs/quota-common-utils.h>
+#include <glusterfs/upcall-utils.h>
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
+#include <glusterfs/common-utils.h>
#include <sys/time.h>
#include <libgen.h>
+#include <signal.h>
-void
-dht_aggregate (dict_t *this, char *key, data_t *value, void *data)
+static int
+dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata);
+
+static int
+dht_link2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+
+static int
+dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req);
+
+static int
+dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this);
+
+static int
+dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata);
+
+static int
+dht_rmdir_unlock(call_frame_t *frame, xlator_t *this);
+
+static const char *dht_dbg_vxattrs[] = {DHT_DBG_HASHED_SUBVOL_PATTERN, NULL};
+
+/* Check the xdata to make sure EBADF has been set by client xlator */
+int32_t
+dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno)
{
- dict_t *dst = NULL;
- int64_t *ptr = 0, *size = NULL;
- int32_t ret = -1;
- data_pair_t *data_pair = NULL;
+ if (op_ret == -1 && (op_errno == EBADF || op_errno == EBADFD) &&
+ !(local->fd_checked)) {
+ return 1;
+ }
+ return 0;
+}
- dst = data;
+/* Sets the blocks and size values to fixed values. This is to be called
+ * only for dirs. The caller is responsible for checking the type
+ */
+int32_t
+dht_set_fixed_dir_stat(struct iatt *stat)
+{
+ if (stat) {
+ stat->ia_blocks = DHT_DIR_STAT_BLOCKS;
+ stat->ia_size = DHT_DIR_STAT_SIZE;
+ return 0;
+ }
+ return -1;
+}
- if (strcmp (key, GF_XATTR_QUOTA_SIZE_KEY) == 0) {
- ret = dict_get_bin (dst, key, (void **)&size);
- if (ret < 0) {
- size = GF_CALLOC (1, sizeof (int64_t),
- gf_common_mt_char);
- if (size == NULL) {
- gf_log ("dht", GF_LOG_WARNING,
- "memory allocation failed");
- return;
- }
- ret = dict_set_bin (dst, key, size, sizeof (int64_t));
- if (ret < 0) {
- gf_log ("dht", GF_LOG_WARNING,
- "dht aggregate dict set failed");
- GF_FREE (size);
- return;
- }
- }
+/* Return true if key exists in array
+ */
+static gf_boolean_t
+dht_match_xattr(const char *key)
+{
+ char **xattrs_to_heal = get_xattrs_to_heal();
- ptr = data_to_bin (value);
- if (ptr == NULL) {
- gf_log ("dht", GF_LOG_WARNING, "data to bin failed");
- return;
- }
+ return gf_get_index_by_elem(xattrs_to_heal, (char *)key) >= 0;
+}
- *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr));
+static int
+dht_aggregate_quota_xattr(dict_t *dst, char *key, data_t *value)
+{
+ int ret = -1;
+ quota_meta_t *meta_dst = NULL;
+ quota_meta_t *meta_src = NULL;
+ int64_t *size = NULL;
+ int64_t dst_dir_count = 0;
+ int64_t src_dir_count = 0;
+
+ if (value == NULL) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_DATA_NULL,
+ "data value is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_bin(dst, key, (void **)&meta_dst);
+ if (ret < 0) {
+ meta_dst = GF_CALLOC(1, sizeof(quota_meta_t), gf_common_quota_meta_t);
+ if (meta_dst == NULL) {
+ gf_msg("dht", GF_LOG_WARNING, ENOMEM, DHT_MSG_NO_MEMORY,
+ "Memory allocation failed");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_bin(dst, key, meta_dst, sizeof(quota_meta_t));
+ if (ret < 0) {
+ gf_msg("dht", GF_LOG_WARNING, EINVAL, DHT_MSG_DICT_SET_FAILED,
+ "dht aggregate dict set failed");
+ GF_FREE(meta_dst);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (value->len > sizeof(int64_t)) {
+ meta_src = data_to_bin(value);
+
+ meta_dst->size = hton64(ntoh64(meta_dst->size) +
+ ntoh64(meta_src->size));
+ meta_dst->file_count = hton64(ntoh64(meta_dst->file_count) +
+ ntoh64(meta_src->file_count));
+
+ if (value->len > (2 * sizeof(int64_t))) {
+ dst_dir_count = ntoh64(meta_dst->dir_count);
+ src_dir_count = ntoh64(meta_src->dir_count);
+
+ if (src_dir_count > dst_dir_count)
+ meta_dst->dir_count = meta_src->dir_count;
} else {
- /* compare user xattrs only */
- if (!strncmp (key, "user.", strlen ("user."))) {
- ret = dict_lookup (dst, key, &data_pair);
- if (!ret && data_pair && value) {
- ret = is_data_equal (data_pair->value, value);
- if (!ret)
- gf_log ("dht", GF_LOG_WARNING,
- "xattr mismatch for %s", key);
- }
- }
- ret = dict_set (dst, key, value);
- if (ret)
- gf_log ("dht", GF_LOG_WARNING, "xattr dict set failed");
+ meta_dst->dir_count = 0;
}
+ } else {
+ size = data_to_bin(value);
+ meta_dst->size = hton64(ntoh64(meta_dst->size) + ntoh64(*size));
+ }
- return;
+ ret = 0;
+out:
+ return ret;
}
+static int
+add_opt(char **optsp, const char *opt)
+{
+ char *newopts = NULL;
+ unsigned oldsize = 0;
+ unsigned newsize = 0;
+
+ if (*optsp == NULL)
+ newopts = gf_strdup(opt);
+ else {
+ oldsize = strlen(*optsp);
+ newsize = oldsize + 1 + strlen(opt) + 1;
+ newopts = GF_REALLOC(*optsp, newsize);
+ if (newopts)
+ sprintf(newopts + oldsize, ",%s", opt);
+ }
+ if (newopts == NULL) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to add choices in buffer in add_opt");
+ return -1;
+ }
+ *optsp = newopts;
+ return 0;
+}
-void
-dht_aggregate_xattr (dict_t *dst, dict_t *src)
+/* Return Choice list from Split brain status */
+static char *
+getChoices(const char *value)
+{
+ int i = 0;
+ char *ptr = NULL;
+ char *tok = NULL;
+ char *result = NULL;
+ char *newval = NULL;
+
+ ptr = strstr(value, "Choices:");
+ if (!ptr) {
+ result = ptr;
+ goto out;
+ }
+
+ newval = gf_strdup(ptr);
+ if (!newval) {
+ result = newval;
+ goto out;
+ }
+
+ tok = strtok(newval, ":");
+ if (!tok) {
+ result = tok;
+ goto out;
+ }
+
+ while (tok) {
+ i++;
+ if (i == 2)
+ break;
+ tok = strtok(NULL, ":");
+ }
+
+ result = gf_strdup(tok);
+
+out:
+ if (newval)
+ GF_FREE(newval);
+
+ return result;
+}
+
+/* This function prepare a list of choices for key
+ (replica.split-brain-status) in case of metadata split brain
+ only on the basis of key-value passed to this function.
+ After prepare the list of choices it update the same key in dict
+ with this value to reflect the same in
+ replica.split-brain-status attr for file.
+
+*/
+
+static int
+dht_aggregate_split_brain_xattr(dict_t *dst, char *key, data_t *value)
{
- if ((dst == NULL) || (src == NULL)) {
+ int ret = 0;
+ char *oldvalue = NULL;
+ char *old_choice = NULL;
+ char *new_choice = NULL;
+ char *full_choice = NULL;
+ char *status = NULL;
+
+ if (value == NULL) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_DATA_NULL,
+ "GF_AFR_SBRAIN_STATUS value is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str(dst, key, &oldvalue);
+ if (ret)
+ goto out;
+
+ /* skip code that is irrelevant if !oldvalue */
+ if (!oldvalue)
+ goto out;
+
+ if (strstr(oldvalue, "not")) {
+ gf_msg_debug("dht", 0, "Need to update split-brain status in dict");
+ ret = -1;
+ goto out;
+ }
+ if (strstr(oldvalue, "metadata-split-brain:yes") &&
+ (strstr(oldvalue, "data-split-brain:no"))) {
+ if (strstr(value->data, "not")) {
+ gf_msg_debug("dht", 0, "No need to update split-brain status");
+ ret = 0;
+ goto out;
+ }
+ if (strstr(value->data, "yes") &&
+ (strncmp(oldvalue, value->data, strlen(oldvalue)))) {
+ old_choice = getChoices(oldvalue);
+ if (!old_choice) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to get choices");
+ ret = -1;
+ goto out;
+ }
+
+ ret = add_opt(&full_choice, old_choice);
+ if (ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to add choices");
+ ret = -1;
goto out;
+ }
+
+ new_choice = getChoices(value->data);
+ if (!new_choice) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to get choices");
+ ret = -1;
+ goto out;
+ }
+
+ ret = add_opt(&full_choice, new_choice);
+ if (ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to add choices ");
+ ret = -1;
+ goto out;
+ }
+ ret = gf_asprintf(&status,
+ "data-split-brain:%s "
+ "metadata-split-brain:%s Choices:%s",
+ "no", "yes", full_choice);
+
+ if (-1 == ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_NO_MEMORY,
+ "Error to prepare status ");
+ goto out;
+ }
+ ret = dict_set_dynstr(dst, key, status);
+ if (ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set full choice");
+ }
}
+ }
- dict_foreach (src, dht_aggregate, dst);
out:
- return;
+ if (old_choice)
+ GF_FREE(old_choice);
+ if (new_choice)
+ GF_FREE(new_choice);
+ if (full_choice)
+ GF_FREE(full_choice);
+
+ return ret;
+}
+
+static int
+dht_aggregate(dict_t *this, char *key, data_t *value, void *data)
+{
+ dict_t *dst = NULL;
+ int32_t ret = -1;
+ data_t *dict_data = NULL;
+
+ dst = data;
+
+ /* compare split brain xattr only */
+ if (strcmp(key, GF_AFR_SBRAIN_STATUS) == 0) {
+ ret = dht_aggregate_split_brain_xattr(dst, key, value);
+ if (!ret)
+ goto out;
+ } else if (strcmp(key, QUOTA_SIZE_KEY) == 0) {
+ ret = dht_aggregate_quota_xattr(dst, key, value);
+ if (ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0,
+ DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED,
+ "Failed to aggregate quota xattr");
+ }
+ goto out;
+ } else if (fnmatch(GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) {
+ ret = gf_get_min_stime(THIS, dst, key, value);
+ goto out;
+ } else {
+ /* compare user xattrs only */
+ if (!strncmp(key, "user.", SLEN("user."))) {
+ ret = dict_lookup(dst, key, &dict_data);
+ if (!ret && dict_data && value) {
+ ret = is_data_equal(dict_data, value);
+ if (!ret)
+ gf_msg_debug("dht", 0, "xattr mismatch for %s", key);
+ }
+ }
+ }
+
+ ret = dict_set(dst, key, value);
+ if (ret) {
+ gf_msg("dht", GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s", key);
+ }
+
+out:
+ return ret;
+}
+
+static void
+dht_aggregate_xattr(dict_t *dst, dict_t *src)
+{
+ if ((dst == NULL) || (src == NULL)) {
+ goto out;
+ }
+
+ dict_foreach(src, dht_aggregate, dst);
+out:
+ return;
+}
+
+/* Code to save hashed subvol on inode ctx as a mds subvol
+ */
+int
+dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+ uint64_t ctx_int = 0;
+ gf_boolean_t ctx_free = _gf_false;
+
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get(inode, this, &ctx_int);
+ if (ctx_int) {
+ ctx = (dht_inode_ctx_t *)(uintptr_t)ctx_int;
+ ctx->mds_subvol = mds_subvol;
+ } else {
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ goto unlock;
+ ctx->mds_subvol = mds_subvol;
+ ctx_free = _gf_true;
+ ctx_int = (long)ctx;
+ ret = __inode_ctx_set(inode, this, &ctx_int);
+ }
+ }
+unlock:
+ UNLOCK(&inode->lock);
+ if (ret && ctx_free)
+ GF_FREE(ctx);
+ return ret;
+}
+
+/*Code to get mds subvol from inode ctx */
+
+int
+dht_inode_ctx_mdsvol_get(inode_t *inode, xlator_t *this, xlator_t **mdsvol)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ if (!mdsvol)
+ return ret;
+
+ if (__is_root_gfid(inode->gfid)) {
+ (*mdsvol) = FIRST_CHILD(this);
+ return 0;
+ }
+
+ ret = dht_inode_ctx_get(inode, this, &ctx);
+
+ if (!ret && ctx) {
+ if (ctx->mds_subvol) {
+ *mdsvol = ctx->mds_subvol;
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+ }
+
+ return ret;
}
/* TODO:
@@ -101,4728 +450,10942 @@ out:
- complete linkfile selfheal
*/
-
-int
-dht_lookup_selfheal_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+static int
+dht_lookup_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int ret = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
- local = frame->local;
- ret = op_ret;
+ local = frame->local;
+ conf = this->private;
+ ret = op_ret;
- FRAME_SU_UNDO (frame, dht_local_t);
+ FRAME_SU_UNDO(frame, dht_local_t);
- if (ret == 0) {
- layout = local->selfheal.layout;
- ret = dht_layout_set (this, local->inode, layout);
- }
+ if (ret == 0) {
+ layout = local->selfheal.layout;
+ ret = dht_layout_set(this, local->inode, layout);
+ }
- WIPE (&local->postparent);
+ dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, &local->postparent,
+ 1);
+ }
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+ /* Delete mds xattr at the time of STACK UNWIND */
+ GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr);
- DHT_STACK_UNWIND (lookup, frame, ret, local->op_errno, local->inode,
- &local->stbuf, local->xattr, &local->postparent);
+ DHT_STACK_UNWIND(lookup, frame, ret, local->op_errno, local->inode,
+ &local->stbuf, local->xattr, &local->postparent);
out:
- return ret;
+ return ret;
}
-
-int
-dht_discover_complete (xlator_t *this, call_frame_t *discover_frame)
+static int
+dht_discover_complete(xlator_t *this, call_frame_t *discover_frame)
{
- dht_local_t *local = NULL;
- call_frame_t *main_frame = NULL;
- int op_errno = 0;
- int ret = -1;
- dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *heal_local = NULL;
+ call_frame_t *main_frame = NULL;
+ call_frame_t *heal_frame = NULL;
+ int op_errno = 0;
+ int ret = -1;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ uint32_t vol_commit_hash = 0;
+ xlator_t *source = NULL;
+ int heal_path = 0;
+ int error_while_marking_mds = 0;
+ int i = 0;
+ loc_t loc = {0};
+ int8_t is_read_only = 0, layout_anomalies = 0;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+
+ local = discover_frame->local;
+ layout = local->layout;
+ conf = this->private;
+ gf_uuid_unparse(local->gfid, gfid_local);
+
+ LOCK(&discover_frame->lock);
+ {
+ main_frame = local->main_frame;
+ local->main_frame = NULL;
+ }
+ UNLOCK(&discover_frame->lock);
- local = discover_frame->local;
- layout = local->layout;
+ if (!main_frame)
+ return 0;
- LOCK(&discover_frame->lock);
- {
- main_frame = local->main_frame;
- local->main_frame = NULL;
+ /* Code to update all extended attributed from
+ subvol to local->xattr on that internal xattr has found
+ */
+ if (conf->subvolume_cnt == 1)
+ local->need_xattr_heal = 0;
+ if (local->need_xattr_heal && (local->mds_xattr)) {
+ dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr,
+ NULL, NULL);
+ dict_unref(local->mds_xattr);
+ local->mds_xattr = NULL;
+ }
+
+ ret = dict_get_int8(local->xattr_req, QUOTA_READ_ONLY_KEY, &is_read_only);
+ if (ret < 0)
+ gf_msg_debug(this->name, 0, "key = %s not present in dict",
+ QUOTA_READ_ONLY_KEY);
+
+ if (local->file_count && local->dir_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_FILE_TYPE_MISMATCH,
+ "path %s exists as a file on one subvolume "
+ "and directory on another. "
+ "Please fix it manually",
+ local->loc.path);
+ op_errno = EIO;
+ goto out;
+ }
+
+ if (local->cached_subvol) {
+ ret = dht_layout_preset(this, local->cached_subvol, local->inode);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SET_FAILED,
+ "failed to set layout for subvolume %s",
+ local->cached_subvol ? local->cached_subvol->name : "<nil>");
+ op_errno = EINVAL;
+ goto out;
+ }
+ } else {
+ ret = dht_layout_normalize(this, &local->loc, layout);
+ if ((ret < 0) || ((ret > 0) && (local->op_ret != 0))) {
+ /* either the layout is incorrect or the directory is
+ * not found even in one subvolume.
+ */
+ gf_msg_debug(this->name, 0,
+ "normalizing failed on %s "
+ "(overlaps/holes present: %s, "
+ "ENOENT errors: %d)",
+ local->loc.path, (ret < 0) ? "yes" : "no",
+ (ret > 0) ? ret : 0);
+ layout_anomalies = 1;
+ } else if (local->inode) {
+ dht_layout_set(this, local->inode, layout);
+ }
+ }
+
+ if (!conf->vch_forced) {
+ ret = dict_get_uint32(local->xattr, conf->commithash_xattr_name,
+ &vol_commit_hash);
+ if (ret == 0) {
+ conf->vol_commit_hash = vol_commit_hash;
}
- UNLOCK(&discover_frame->lock);
+ }
- if (!main_frame)
+ if (IA_ISDIR(local->stbuf.ia_type) && !is_read_only) {
+ for (i = 0; i < layout->cnt; i++) {
+ if (!source && !layout->list[i].err)
+ source = layout->list[i].xlator;
+ if (layout->list[i].err == ENOENT ||
+ layout->list[i].err == ESTALE) {
+ heal_path = 1;
+ }
+
+ if (source && heal_path)
+ break;
+ }
+ }
+
+ if (IA_ISDIR(local->stbuf.ia_type)) {
+ /* Call function to save hashed subvol on inode ctx if
+ internal mds xattr is not present and all subvols are up
+ */
+ if (!local->op_ret && !__is_root_gfid(local->stbuf.ia_gfid))
+ (void)dht_common_mark_mdsxattr(discover_frame,
+ &error_while_marking_mds, 1);
+
+ if (local->need_xattr_heal && !heal_path) {
+ local->need_xattr_heal = 0;
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "xattr heal failed for "
+ "directory gfid is %s ",
+ gfid_local);
+ }
+ }
+ }
+
+ if (source && (heal_path || layout_anomalies || error_while_marking_mds)) {
+ gf_uuid_copy(loc.gfid, local->gfid);
+ if (gf_uuid_is_null(loc.gfid)) {
+ goto done;
+ }
+
+ if (local->inode)
+ loc.inode = inode_ref(local->inode);
+ else
+ goto done;
+
+ heal_frame = create_frame(this, this->ctx->pool);
+ if (heal_frame) {
+ heal_local = dht_local_init(heal_frame, &loc, NULL, 0);
+ if (!heal_local)
+ goto cleanup;
+
+ gf_uuid_copy(heal_local->gfid, local->gfid);
+ heal_frame->cookie = source;
+ heal_local->xattr = dict_ref(local->xattr);
+ heal_local->stbuf = local->stbuf;
+ heal_local->postparent = local->postparent;
+ heal_local->inode = inode_ref(loc.inode);
+ heal_local->main_frame = main_frame;
+ FRAME_SU_DO(heal_frame, dht_local_t);
+ ret = synctask_new(this->ctx->env, dht_heal_full_path,
+ dht_heal_full_path_done, heal_frame, heal_frame);
+ if (!ret) {
+ loc_wipe(&loc);
return 0;
+ }
+ /*
+ * Failed to spawn the synctask. Returning
+ * with out doing heal.
+ */
+ cleanup:
+ loc_wipe(&loc);
+ DHT_STACK_DESTROY(heal_frame);
+ }
+ }
+done:
+ dht_set_fixed_dir_stat(&local->postparent);
+ /* Delete mds xattr at the time of STACK UNWIND */
+ if (local->xattr)
+ GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr);
- if (local->file_count && local->dir_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "path %s exists as a file on one subvolume "
- "and directory on another. "
- "Please fix it manually",
- local->loc.path);
- op_errno = EIO;
- goto out;
- }
+ DHT_STACK_UNWIND(lookup, main_frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
- if (local->cached_subvol) {
- ret = dht_layout_preset (this, local->cached_subvol,
- local->inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set layout for subvolume %s",
- local->cached_subvol ? local->cached_subvol->name : "<nil>");
- op_errno = EINVAL;
- goto out;
- }
- } else {
- ret = dht_layout_normalize (this, &local->loc, layout);
- if ((ret < 0) || ((ret > 0) && (local->op_ret != 0))) {
- /* either the layout is incorrect or the directory is
- * not found even in one subvolume.
- */
- gf_log (this->name, GF_LOG_DEBUG,
- "normalizing failed on %s "
- "(overlaps/holes present: %s, "
- "ENOENT errors: %d)", local->loc.path,
- (ret < 0) ? "yes" : "no", (ret > 0) ? ret : 0);
- op_errno = EINVAL;
- goto out;
- }
+out:
+ DHT_STACK_UNWIND(lookup, main_frame, -1, op_errno, NULL, NULL, NULL, NULL);
- dht_layout_set (this, local->inode, layout);
- }
+ return ret;
+}
- DHT_STACK_UNWIND (lookup, main_frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr,
- &local->postparent);
- return 0;
+static int
+dht_common_mark_mdsxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = cookie;
+ int ret = -1;
+ dht_conf_t *conf = 0;
+ dht_layout_t *layout = NULL;
+ int32_t mds_heal_fresh_lookup = 0;
+
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+
+ local = frame->local;
+ conf = this->private;
+ layout = local->selfheal.layout;
+ mds_heal_fresh_lookup = local->mds_heal_fresh_lookup;
+
+ if (op_ret) {
+ gf_msg_debug(this->name, op_ret,
+ "Failed to set %s on the MDS %s for path %s. ",
+ conf->mds_xattr_key, prev->name, local->loc.path);
+ } else {
+ /* Save mds subvol on inode ctx */
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set mds subvol on inode ctx"
+ " %s for %s ",
+ prev->name, local->loc.path);
+ }
+ }
+ if (!local->mds_heal_fresh_lookup && layout) {
+ dht_selfheal_dir_setattr(frame, &local->loc, &local->stbuf, 0xffffffff,
+ layout);
+ }
out:
- DHT_STACK_UNWIND (lookup, main_frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
-
- return ret;
+ if (mds_heal_fresh_lookup)
+ DHT_STACK_DESTROY(frame);
+ return 0;
}
+static xlator_t *
+dht_inode_get_hashed_subvol(inode_t *inode, xlator_t *this, loc_t *loc)
+{
+ char *path = NULL;
+ loc_t populate_loc = {
+ 0,
+ };
+ char *name = NULL;
+ xlator_t *hash_subvol = NULL;
+
+ if (!inode)
+ return hash_subvol;
+
+ if (loc && loc->parent && loc->path) {
+ if (!loc->name) {
+ name = strrchr(loc->path, '/');
+ if (name) {
+ loc->name = name + 1;
+ } else {
+ goto out;
+ }
+ }
+ hash_subvol = dht_subvol_get_hashed(this, loc);
+ goto out;
+ }
-int
-dht_discover_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- int ret = -1;
- int is_dir = 0;
- int is_linkfile = 0;
- int attempt_unwind = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+ if (!gf_uuid_is_null(inode->gfid)) {
+ populate_loc.inode = inode_ref(inode);
+ populate_loc.parent = inode_parent(populate_loc.inode, NULL, NULL);
+ inode_path(populate_loc.inode, NULL, &path);
- local = frame->local;
- prev = cookie;
+ if (!path)
+ goto out;
- layout = local->layout;
+ populate_loc.path = path;
+ if (!populate_loc.name && populate_loc.path) {
+ name = strrchr(populate_loc.path, '/');
+ if (name) {
+ populate_loc.name = name + 1;
- /* Check if the gfid is different for file from other node */
- if (!op_ret && uuid_compare (local->gfid, stbuf->ia_gfid)) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: gfid different on %s",
- local->loc.path, prev->this->name);
+ } else {
+ goto out;
+ }
}
+ hash_subvol = dht_subvol_get_hashed(this, &populate_loc);
+ }
+out:
+ if (populate_loc.inode)
+ loc_wipe(&populate_loc);
+ return hash_subvol;
+}
+/* Common function call by revalidate/selfheal code path to populate
+ internal xattr if it is not present, mark_during_fresh_lookup value
+ determines either function is call by revalidate_cbk(discover_complete)
+ or call by selfheal code path while fresh lookup.
+ Here we do wind a call serially in case of fresh lookup and
+ for other lookup code path we do wind a call parallel.The reason
+ to wind a call serially is at the time of fresh lookup directory is not
+ discovered and at the time of revalidate_lookup directory is
+ already discovered. So, revalidate codepath can race with setxattr
+ codepath and can get into spurious heals because of an ongoing setxattr.
+ This can slow down revalidates, if healing happens in foreground.
+ However, if healing happens in background, there is no direct performance
+ penalty.
+*/
+int
+dht_common_mark_mdsxattr(call_frame_t *frame, int *errst,
+ int mark_during_fresh_lookup)
+{
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ xlator_t *hashed_subvol = NULL;
+ int ret = 0;
+ int i = 0;
+ dict_t *xattrs = NULL;
+ char gfid_local[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+ int32_t zero[1] = {0};
+ dht_conf_t *conf = 0;
+ dht_layout_t *layout = NULL;
+ dht_local_t *copy_local = NULL;
+ call_frame_t *xattr_frame = NULL;
+ gf_boolean_t vol_down = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ local = frame->local;
+ conf = this->private;
+ layout = local->selfheal.layout;
+ local->mds_heal_fresh_lookup = mark_during_fresh_lookup;
+
+ gf_uuid_unparse(local->gfid, gfid_local);
+
+ /* Code to update hashed subvol consider as a mds subvol
+ and wind a setxattr call on hashed subvol to update
+ internal xattr
+ */
+ if (!local->xattr || !dict_get(local->xattr, conf->mds_xattr_key)) {
+ /* It means no internal MDS xattr has been set yet
+ */
+ /* Check the status of all subvol are up while call
+ this function call by lookup code path
+ */
+ if (mark_during_fresh_lookup) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ vol_down = _gf_true;
+ break;
+ }
+ }
+ if (vol_down) {
+ gf_msg_debug(this->name, 0,
+ "subvol %s is down. Unable to "
+ " save mds subvol on inode for "
+ " path %s gfid is %s ",
+ conf->subvolumes[i]->name, local->loc.path,
+ gfid_local);
+ goto out;
+ }
+ }
- LOCK (&frame->lock);
- {
- /* TODO: assert equal mode on stbuf->st_mode and
- local->stbuf->st_mode
+ /* Calculate hashed subvol based on inode and parent node
+ */
+ hashed_subvol = dht_inode_get_hashed_subvol(local->inode, this,
+ &local->loc);
+ if (!hashed_subvol) {
+ gf_msg(this->name, GF_LOG_DEBUG, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for path %s"
+ "gfid is %s ",
+ local->loc.path, gfid_local);
+ if (errst)
+ (*errst) = 1;
+ ret = -1;
+ goto out;
+ }
+ xattrs = dict_new();
+ if (!xattrs) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+ /* Add internal MDS xattr on disk for hashed subvol
+ */
+ ret = dht_dict_set_array(xattrs, conf->mds_xattr_key, zero, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary"
+ " value:key = %s for "
+ "path %s",
+ conf->mds_xattr_key, local->loc.path);
+ ret = -1;
+ goto out;
+ }
+ /* Create a new frame to wind a call only while
+ this function call by revalidate_cbk code path
+ To wind a call parallel need to create a new frame
+ */
+ if (mark_during_fresh_lookup) {
+ xattr_frame = create_frame(this, this->ctx->pool);
+ if (!xattr_frame) {
+ ret = -1;
+ goto out;
+ }
+ copy_local = dht_local_init(xattr_frame, &(local->loc), NULL, 0);
+ if (!copy_local) {
+ ret = -1;
+ DHT_STACK_DESTROY(xattr_frame);
+ goto out;
+ }
+ copy_local->stbuf = local->stbuf;
+ copy_local->mds_heal_fresh_lookup = mark_during_fresh_lookup;
+ if (!copy_local->inode)
+ copy_local->inode = inode_ref(local->inode);
+ gf_uuid_copy(copy_local->loc.gfid, local->gfid);
+ FRAME_SU_DO(xattr_frame, dht_local_t);
+ STACK_WIND_COOKIE(xattr_frame, dht_common_mark_mdsxattr_cbk,
+ hashed_subvol, hashed_subvol,
+ hashed_subvol->fops->setxattr, &local->loc,
+ xattrs, 0, NULL);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_common_mark_mdsxattr_cbk,
+ (void *)hashed_subvol, hashed_subvol,
+ hashed_subvol->fops->setxattr, &local->loc,
+ xattrs, 0, NULL);
+ }
+ } else {
+ gf_msg_debug(this->name, 0,
+ "internal xattr %s is present on subvol"
+ "on path %s gfid is %s ",
+ conf->mds_xattr_key, local->loc.path, gfid_local);
+ if (!mark_during_fresh_lookup)
+ dht_selfheal_dir_setattr(frame, &local->loc, &local->stbuf,
+ 0xffffffff, layout);
+ }
- else mkdir/chmod/chown and fix
- */
- ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, xattr);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to merge layouts", local->loc.path);
+out:
+ if (xattrs)
+ dict_unref(xattrs);
+ return ret;
+}
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup of %s on %s returned error (%s)",
- local->loc.path, prev->this->name,
- strerror (op_errno));
+/* Get the value of key from dict in the bytewise and save in array after
+ convert from network byte order to host byte order
+*/
+static int32_t
+dht_dict_get_array(dict_t *dict, char *key, int32_t value[], int32_t size,
+ int *errst)
+{
+ void *ptr = NULL;
+ int32_t len = -1;
+ int32_t vindex = -1;
+ int32_t err = -1;
+ int ret = 0;
+
+ if (dict == NULL) {
+ (*errst) = -1;
+ return -EINVAL;
+ }
+ err = dict_get_ptr_and_len(dict, key, &ptr, &len);
+ if (err != 0) {
+ (*errst) = -1;
+ return err;
+ }
+
+ if (len != (size * sizeof(int32_t))) {
+ (*errst) = -1;
+ return -EINVAL;
+ }
+
+ for (vindex = 0; vindex < size; vindex++) {
+ value[vindex] = ntoh32(*((int32_t *)ptr + vindex));
+ if (value[vindex] < 0)
+ ret = -1;
+ }
+
+ return ret;
+}
- goto unlock;
- }
+static int
+dht_discover_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int32_t check_mds = 0;
+ int is_linkfile = 0;
+ int attempt_unwind = 0;
+ dht_conf_t *conf = 0;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char gfid_node[GF_UUID_BUF_SIZE] = {0};
+ int32_t mds_xattr_val[1] = {0};
+ int errst = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ layout = local->layout;
+
+ /* Check if the gfid is different for file from other node */
+ if (!op_ret && gf_uuid_compare(local->gfid, stbuf->ia_gfid)) {
+ gf_uuid_unparse(stbuf->ia_gfid, gfid_node);
+ gf_uuid_unparse(local->gfid, gfid_local);
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH,
+ "%s: gfid different on %s, gfid local = %s"
+ "gfid other = %s",
+ local->loc.path, prev->name, gfid_local, gfid_node);
+ }
+
+ LOCK(&frame->lock);
+ {
+ /* TODO: assert equal mode on stbuf->st_mode and
+ local->stbuf->st_mode
+
+ else mkdir/chmod/chown and fix
+ */
+
+ ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, xattr);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ "%s: failed to merge layouts for subvol %s", local->loc.path,
+ prev->name);
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
- is_dir = check_is_dir (inode, stbuf, xattr);
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ gf_msg_debug(this->name, op_errno,
+ "lookup of %s on %s returned error", local->loc.path,
+ prev->name);
- if (is_dir) {
- local->dir_count ++;
- } else {
- local->file_count ++;
-
- if (!is_linkfile) {
- /* real file */
- local->cached_subvol = prev->this;
- attempt_unwind = 1;
- } else {
- goto unlock;
- }
- }
+ goto unlock;
+ }
- local->op_ret = 0;
+ is_linkfile = check_is_linkfile(inode, stbuf, xattr,
+ conf->link_xattr_name);
+ is_dir = check_is_dir(inode, stbuf, xattr);
- if (local->xattr == NULL) {
- local->xattr = dict_ref (xattr);
- } else {
- dht_aggregate_xattr (local->xattr, xattr);
- }
+ if (is_dir) {
+ local->dir_count++;
+ } else {
+ local->file_count++;
+
+ if (!is_linkfile && !local->cached_subvol) {
+ /* real file */
+ /* Ok, we somehow managed to find a file on
+ * more than one subvol. ignore this or we
+ * will end up overwriting information while a
+ * a thread is potentially unwinding from
+ * dht_discover_complete
+ */
+ local->cached_subvol = prev;
+ attempt_unwind = 1;
+ } else {
+ goto unlock;
+ }
+ }
- if (local->inode == NULL)
- local->inode = inode_ref (inode);
+ local->op_ret = 0;
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
+ if (local->xattr == NULL) {
+ local->xattr = dict_ref(xattr);
+ } else {
+ /* Don't aggregate for files. See BZ#1484709 */
+ if (is_dir)
+ dht_aggregate_xattr(local->xattr, xattr);
}
+
+ if (local->inode == NULL)
+ local->inode = inode_ref(inode);
+
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->postparent, postparent);
+
+ if (!dict_get(xattr, conf->mds_xattr_key)) {
+ goto unlock;
+ } else {
+ gf_msg_debug(this->name, 0,
+ "internal xattr %s is present on subvol"
+ "on path %s gfid is %s ",
+ conf->mds_xattr_key, local->loc.path, gfid_local);
+ }
+ check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
+ mds_xattr_val, 1, &errst);
+ /* save mds subvol on inode ctx */
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s vol is %s",
+ local->loc.path, prev->name);
+ }
+
+ if ((check_mds < 0) && !errst) {
+ local->mds_xattr = dict_ref(xattr);
+ gf_msg_debug(this->name, 0,
+ "Value of %s is not zero on mds subvol"
+ "so xattr needs to be healed on non mds"
+ " path is %s and vol name is %s "
+ " gfid is %s",
+ conf->mds_xattr_key, local->loc.path, prev->name,
+ gfid_local);
+ local->need_xattr_heal = 1;
+ local->mds_subvol = prev;
+ }
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK(&frame->lock);
out:
- this_call_cnt = dht_frame_return (frame);
+ /* Make sure, the thread executing dht_discover_complete is the one
+ * which calls STACK_DESTROY (frame). In the case of "attempt_unwind",
+ * this makes sure that the thread don't call dht_frame_return, till
+ * call to dht_discover_complete is done.
+ */
+ if (attempt_unwind) {
+ dht_discover_complete(this, frame);
+ }
- if (is_last_call (this_call_cnt) || attempt_unwind) {
- dht_discover_complete (this, frame);
- }
+ this_call_cnt = dht_frame_return(frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_DESTROY (frame);
+ if (is_last_call(this_call_cnt) && !attempt_unwind) {
+ dht_discover_complete(this, frame);
+ }
- return 0;
-}
+ if (is_last_call(this_call_cnt))
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
-int
-dht_discover (call_frame_t *frame, xlator_t *this, loc_t *loc)
+static int
+dht_set_file_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
- int ret;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int call_cnt = 0;
- int op_errno = EINVAL;
- int i = 0;
- call_frame_t *discover_frame = NULL;
+ int ret = -EINVAL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf) {
+ goto err;
+ }
+
+ if (!xattr_req) {
+ goto err;
+ }
+
+ /* Used to check whether this is a linkto file.
+ */
+ ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ conf->link_xattr_name, loc->path);
+ goto err;
+ }
+
+ /* This is used to make sure we don't unlink linkto files
+ * which are the target of an ongoing file migration.
+ */
+ ret = dict_set_uint32(xattr_req, GLUSTERFS_OPEN_FD_COUNT, 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ GLUSTERFS_OPEN_FD_COUNT, loc->path);
+ goto err;
+ }
+
+ ret = 0;
+err:
+ return ret;
+}
+/* This is a gfid based nameless lookup. Without a name, the hashed subvol
+ * cannot be calculated so a lookup is sent to all subvols.
+ */
+static int
+dht_do_discover(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int ret;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int op_errno = EINVAL;
+ int i = 0;
+ call_frame_t *discover_frame = NULL;
+
+ conf = this->private;
+ local = frame->local;
+
+ /* As we do not know if this is a file or directory, request
+ * both file and directory xattrs
+ */
+ ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ goto err;
+ }
+
+ ret = dht_set_dir_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ goto err;
+ }
+
+ if (loc_is_root(loc)) {
+ /* Request the DHT commit hash xattr (trusted.glusterfs.dht.commithash)
+ * set on the brick root.
+ */
+ ret = dict_set_uint32(local->xattr_req, conf->commithash_xattr_name,
+ sizeof(uint32_t));
+ }
- conf = this->private;
- local = frame->local;
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set 'trusted.glusterfs.dht' key",
- loc->path);
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set 'trusted.glusterfs.dht.linkto' key",
- loc->path);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ gf_uuid_copy(local->gfid, loc->gfid);
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
+ discover_frame = copy_frame(frame);
+ if (!discover_frame) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- if (!local->layout) {
- op_errno = ENOMEM;
- goto err;
- }
+ discover_frame->local = local;
+ frame->local = NULL;
+ local->main_frame = frame;
- uuid_copy (local->gfid, loc->gfid);
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(discover_frame, dht_discover_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
+ }
- discover_frame = copy_frame (frame);
- if (!discover_frame) {
- op_errno = ENOMEM;
- goto err;
- }
+ return 0;
- discover_frame->local = local;
- frame->local = NULL;
- local->main_frame = frame;
+err:
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (discover_frame, dht_discover_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
+ return 0;
+}
- return 0;
+/* Code to call syntask to heal custom xattr from hashed subvol
+ to non hashed subvol
+*/
+int
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno)
+{
+ dht_local_t *copy_local = NULL;
+ call_frame_t *copy = NULL;
+ int ret = -1;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+
+ if (gf_uuid_is_null(local->gfid)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "No gfid exists for path %s "
+ "so healing xattr is not possible",
+ local->loc.path);
+ *op_errno = EIO;
+ goto out;
+ }
+
+ gf_uuid_unparse(local->gfid, gfid_local);
+ copy = create_frame(this, this->ctx->pool);
+ if (copy) {
+ copy_local = dht_local_init(copy, &(local->loc), NULL, 0);
+ if (!copy_local) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "Memory allocation failed "
+ "for path %s gfid %s ",
+ local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
+ DHT_STACK_DESTROY(copy);
+ } else {
+ copy_local->stbuf = local->stbuf;
+ gf_uuid_copy(copy_local->loc.gfid, local->gfid);
+ copy_local->mds_subvol = local->mds_subvol;
+ FRAME_SU_DO(copy, dht_local_t);
+ ret = synctask_new(this->ctx->env, dht_dir_heal_xattrs,
+ dht_dir_heal_xattrs_done, copy, copy);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "Synctask creation failed to heal xattr "
+ "for path %s gfid %s ",
+ local->loc.path, gfid_local);
+ *op_errno = ENOMEM;
+ DHT_STACK_DESTROY(copy);
+ }
+ }
+ }
+out:
+ return ret;
+}
-err:
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
+static int
+dht_needs_selfheal(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int needs_selfheal = 0;
+ int ret = 0;
+
+ local = frame->local;
+ layout = local->layout;
+
+ if (local->need_attrheal || local->need_xattr_heal ||
+ local->need_selfheal) {
+ needs_selfheal = 1;
+ }
+
+ ret = dht_layout_normalize(this, &local->loc, layout);
+
+ if (ret != 0) {
+ gf_msg_debug(this->name, 0, "fixing assignment on %s", local->loc.path);
+ needs_selfheal = 1;
+ }
+ return needs_selfheal;
+}
+static int
+is_permission_different(ia_prot_t *prot1, ia_prot_t *prot2)
+{
+ if ((prot1->owner.read != prot2->owner.read) ||
+ (prot1->owner.write != prot2->owner.write) ||
+ (prot1->owner.exec != prot2->owner.exec) ||
+ (prot1->group.read != prot2->group.read) ||
+ (prot1->group.write != prot2->group.write) ||
+ (prot1->group.exec != prot2->group.exec) ||
+ (prot1->other.read != prot2->other.read) ||
+ (prot1->other.write != prot2->other.write) ||
+ (prot1->other.exec != prot2->other.exec) ||
+ (prot1->suid != prot2->suid) || (prot1->sgid != prot2->sgid) ||
+ (prot1->sticky != prot2->sticky)) {
+ return 1;
+ } else {
return 0;
+ }
}
-
int
-dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- int ret = -1;
- int is_dir = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int32_t check_mds = 0;
+ int errst = 0;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char gfid_node[GF_UUID_BUF_SIZE] = {0};
+ int32_t mds_xattr_val[1] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ layout = local->layout;
+ gf_msg_debug(this->name, op_errno,
+ "%s: lookup on %s returned with op_ret = %d, op_errno = %d",
+ local->loc.path, prev->name, op_ret, op_errno);
+
+ /* The first successful lookup*/
+ if (!op_ret && gf_uuid_is_null(local->gfid)) {
+ memcpy(local->gfid, stbuf->ia_gfid, 16);
+ }
+ if (!gf_uuid_is_null(local->gfid)) {
+ gf_uuid_unparse(local->gfid, gfid_local);
+ }
+
+ /* Check if the gfid is different for file from other node */
+ if (!op_ret && gf_uuid_compare(local->gfid, stbuf->ia_gfid)) {
+ gf_uuid_unparse(stbuf->ia_gfid, gfid_node);
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH,
+ "%s: gfid different on %s."
+ " gfid local = %s, gfid subvol = %s",
+ local->loc.path, prev->name, gfid_local, gfid_node);
+ }
+
+ LOCK(&frame->lock);
+ {
+ /* TODO: assert equal mode on stbuf->st_mode and
+ local->stbuf->st_mode
+ else mkdir/chmod/chown and fix
+ */
+ ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, xattr);
- local = frame->local;
- prev = cookie;
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
- layout = local->layout;
+ /* The GFID is missing on this subvol. Force a heal. */
+ if (op_errno == ENODATA) {
+ local->need_lookup_everywhere = 1;
+ }
+ goto unlock;
+ }
+
+ is_dir = check_is_dir(inode, stbuf, xattr);
+ if (!is_dir) {
+ gf_msg_debug(this->name, 0,
+ "%s: lookup on %s returned non dir 0%o"
+ "calling lookup_everywhere",
+ local->loc.path, prev->name, stbuf->ia_type);
- if (!op_ret && uuid_is_null (local->gfid))
- memcpy (local->gfid, stbuf->ia_gfid, 16);
+ local->need_lookup_everywhere = 1;
+ goto unlock;
+ }
- /* Check if the gfid is different for file from other node */
- if (!op_ret && uuid_compare (local->gfid, stbuf->ia_gfid)) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: gfid different on %s",
- local->loc.path, prev->this->name);
+ local->op_ret = 0;
+ if (local->xattr == NULL) {
+ local->xattr = dict_ref(xattr);
+ } else {
+ dht_aggregate_xattr(local->xattr, xattr);
+ }
+
+ if (__is_root_gfid(stbuf->ia_gfid)) {
+ ret = dht_dir_has_layout(xattr, conf->xattr_name);
+ if (ret >= 0) {
+ if (is_greater_time(local->prebuf.ia_ctime,
+ local->prebuf.ia_ctime_nsec,
+ stbuf->ia_ctime, stbuf->ia_ctime_nsec)) {
+ /* Choose source */
+ local->prebuf.ia_gid = stbuf->ia_gid;
+ local->prebuf.ia_uid = stbuf->ia_uid;
+
+ local->prebuf.ia_ctime = stbuf->ia_ctime;
+ local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec;
+ local->prebuf.ia_prot = stbuf->ia_prot;
+ }
+ }
}
- LOCK (&frame->lock);
- {
- /* TODO: assert equal mode on stbuf->st_mode and
- local->stbuf->st_mode
+ if (local->stbuf.ia_type != IA_INVAL) {
+ /* This is not the first subvol to respond
+ * Compare values to see if attrs need to be healed
+ */
+ if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+ (local->stbuf.ia_uid != stbuf->ia_uid) ||
+ (is_permission_different(&local->stbuf.ia_prot,
+ &stbuf->ia_prot))) {
+ local->need_attrheal = 1;
+ }
+ }
- else mkdir/chmod/chown and fix
- */
- ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, xattr);
+ if (local->inode == NULL)
+ local->inode = inode_ref(inode);
- if (op_ret == -1) {
- local->op_errno = ENOENT;
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup of %s on %s returned error (%s)",
- local->loc.path, prev->this->name,
- strerror (op_errno));
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->postparent, postparent);
- goto unlock;
- }
+ if (!dict_get(xattr, conf->mds_xattr_key)) {
+ gf_msg_debug(this->name, 0,
+ "%s: mds xattr %s is not present "
+ "on %s(gfid = %s)",
+ local->loc.path, conf->mds_xattr_key, prev->name,
+ gfid_local);
+ goto unlock;
+ }
- is_dir = check_is_dir (inode, stbuf, xattr);
- if (!is_dir) {
- gf_log (this->name, GF_LOG_DEBUG,
- "lookup of %s on %s returned non dir 0%o",
- local->loc.path, prev->this->name,
- stbuf->ia_type);
- local->need_selfheal = 1;
- goto unlock;
- }
+ /* Save the mds subvol info and stbuf. This is the value that will
+ * be used for healing
+ */
+ local->mds_subvol = prev;
+ local->mds_stbuf = *stbuf;
- local->op_ret = 0;
- if (local->xattr == NULL) {
- local->xattr = dict_ref (xattr);
- } else {
- dht_aggregate_xattr (local->xattr, xattr);
- }
+ /* Save mds subvol on inode ctx */
- if (local->inode == NULL)
- local->inode = inode_ref (inode);
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "%s: Failed to set mds (%s)", local->loc.path, prev->name);
+ }
+ check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
+ mds_xattr_val, 1, &errst);
+ if ((check_mds < 0) && !errst) {
+ /* Check if xattrs need to be healed on the directories */
+ local->mds_xattr = dict_ref(xattr);
+ gf_msg_debug(this->name, 0,
+ "%s: %s is not zero on %s. Xattrs need to be healed."
+ "(gfid = %s)",
+ local->loc.path, conf->mds_xattr_key, prev->name,
+ gfid_local);
+ local->need_xattr_heal = 1;
+ }
+ }
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
- }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK(&frame->lock);
+ this_call_cnt = dht_frame_return(frame);
- this_call_cnt = dht_frame_return (frame);
+ if (is_last_call(this_call_cnt)) {
+ /* If the mds subvol is not set correctly*/
+ if (!__is_root_gfid(local->gfid) &&
+ (!dict_get(local->xattr, conf->mds_xattr_key))) {
+ local->need_selfheal = 1;
+ }
- if (is_last_call (this_call_cnt)) {
- if (local->need_selfheal) {
- local->need_selfheal = 0;
- dht_lookup_everywhere (frame, this, &local->loc);
- return 0;
- }
+ /* No need to call xattr heal code if volume count is 1
+ */
+ if (conf->subvolume_cnt == 1) {
+ local->need_xattr_heal = 0;
+ }
- if (local->op_ret == 0) {
- ret = dht_layout_normalize (this, &local->loc, layout);
+ if (local->need_selfheal || local->need_lookup_everywhere) {
+ /* Set the gfid-req so posix will set the GFID*/
+ if (!gf_uuid_is_null(local->gfid)) {
+ /* Ok, this should _never_ happen */
+ ret = dict_set_static_bin(local->xattr_req, "gfid-req",
+ local->gfid, 16);
+ } else {
+ if (!gf_uuid_is_null(local->gfid_req))
+ ret = dict_set_static_bin(local->xattr_req, "gfid-req",
+ local->gfid_req, 16);
+ }
+ }
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "fixing assignment on %s",
- local->loc.path);
- goto selfheal;
- }
+ if (local->need_lookup_everywhere) {
+ local->need_lookup_everywhere = 0;
+ dht_lookup_everywhere(frame, this, &local->loc);
+ return 0;
+ }
- dht_layout_set (this, local->inode, layout);
- }
+ if (local->op_ret == 0) {
+ if (dht_needs_selfheal(frame, this)) {
+ goto selfheal;
+ }
+
+ dht_layout_set(this, local->inode, layout);
+ if (local->inode) {
+ dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1);
+ }
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr,
- &local->postparent);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
+ }
}
- return 0;
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+ /* Delete mds xattr at the time of STACK UNWIND */
+ if (local->xattr)
+ GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr);
+
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ }
+
+ return 0;
selfheal:
- FRAME_SU_DO (frame, dht_local_t);
- uuid_copy (local->loc.gfid, local->gfid);
- ret = dht_selfheal_directory (frame, dht_lookup_selfheal_cbk,
- &local->loc, layout);
+ FRAME_SU_DO(frame, dht_local_t);
+ ret = dht_selfheal_directory(frame, dht_lookup_selfheal_cbk, &local->loc,
+ layout);
out:
- return ret;
+ return ret;
+}
+
+static int
+dht_lookup_directory(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int call_cnt = 0;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, unwind);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, unwind);
+ GF_VALIDATE_OR_GOTO("dht", this->private, unwind);
+ GF_VALIDATE_OR_GOTO("dht", loc, unwind);
+
+ conf = this->private;
+ local = frame->local;
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ goto unwind;
+ }
+
+ if (local->xattr != NULL) {
+ dict_unref(local->xattr);
+ local->xattr = NULL;
+ }
+
+ if (!gf_uuid_is_null(local->gfid)) {
+ /* use this gfid in order to heal any missing ones */
+ ret = dict_set_gfuuid(local->xattr_req, "gfid-req", local->gfid, true);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:"
+ " key = gfid-req",
+ local->loc.path);
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(
+ frame, dht_lookup_dir_cbk, conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc, local->xattr_req);
+ }
+ return 0;
+unwind:
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+out:
+ return 0;
}
int
-dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int is_dir = 0;
- int is_linkfile = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, err);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, err);
- GF_VALIDATE_OR_GOTO ("dht", cookie, err);
+dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int is_dir = 0;
+ int is_linkfile = 0;
+ int follow_link = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ uint32_t vol_commit_hash = 0;
+ xlator_t *subvol = NULL;
+ int32_t check_mds = 0;
+ int errst = 0, i = 0;
+ int32_t mds_xattr_val[1] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, err);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, err);
+ GF_VALIDATE_OR_GOTO("dht", cookie, err);
+ GF_VALIDATE_OR_GOTO("dht", this->private, err);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (!conf->vch_forced) {
+ /* Update the commithash value if available
+ */
+ ret = dict_get_uint32(xattr, conf->commithash_xattr_name,
+ &vol_commit_hash);
+ if (ret == 0) {
+ conf->vol_commit_hash = vol_commit_hash;
+ }
+ }
- local = frame->local;
- prev = cookie;
- conf = this->private;
- if (!conf)
- goto out;
+ gf_uuid_unparse(local->loc.gfid, gfid);
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
-
- if ((op_errno != ENOTCONN)
- && (op_errno != ENOENT)
- && (op_errno != ESTALE)) {
- gf_log (this->name, GF_LOG_INFO,
- "subvolume %s for %s returned -1 (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- }
- if (op_errno == ESTALE) {
- /* propagate the ESTALE to parent.
- * setting local->return_estale would send
- * ESTALE to parent. */
- local->return_estale = 1;
- }
+ gf_msg_debug(this->name, op_errno,
+ "%s: revalidate lookup on %s returned op_ret %d",
+ local->loc.path, prev->name, op_ret);
+
+ LOCK(&frame->lock);
+ {
+ if (gf_uuid_is_null(local->gfid)) {
+ memcpy(local->gfid, local->loc.gfid, 16);
+ }
- /* if it is ENOENT, we may have to do a
- * 'lookup_everywhere()' to make sure
- * the file is not migrated */
- if (op_errno == ENOENT) {
- if (IA_ISREG (local->loc.inode->ia_type)) {
- local->need_lookup_everywhere = 1;
- }
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+
+ if ((op_errno != ENOTCONN) && (op_errno != ENOENT) &&
+ (op_errno != ESTALE)) {
+ gf_msg(this->name, GF_LOG_INFO, op_errno,
+ DHT_MSG_REVALIDATE_CBK_INFO,
+ "Revalidate: subvolume %s for %s "
+ "(gfid = %s) returned -1",
+ prev->name, local->loc.path, gfid);
+ }
+ if (op_errno == ESTALE) {
+ /* propagate the ESTALE to parent.
+ * setting local->return_estale would send
+ * ESTALE to parent. */
+ local->return_estale = 1;
+ }
+
+ /* if it is ENOENT, we may have to do a
+ * 'lookup_everywhere()' to make sure
+ * the file is not migrated */
+ if (op_errno == ENOENT) {
+ if (IA_ISREG(local->loc.inode->ia_type)) {
+ gf_msg_debug(this->name, 0,
+ "found ENOENT for %s. "
+ "Setting "
+ "need_lookup_everywhere"
+ " flag to 1",
+ local->loc.path);
+
+ local->need_lookup_everywhere = 1;
+ } else if (IA_ISDIR(local->loc.inode->ia_type)) {
+ layout = local->layout;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == prev) {
+ layout->list[i].err = op_errno;
+ break;
}
- goto unlock;
+ }
+
+ local->need_selfheal = 1;
}
+ }
- if (stbuf->ia_type != local->inode->ia_type) {
- gf_log (this->name, GF_LOG_INFO,
- "mismatching filetypes 0%o v/s 0%o for %s",
- (stbuf->ia_type), (local->inode->ia_type),
- local->loc.path);
+ /* The GFID is missing on this subvol. Lookup everywhere to force a
+ * gfid heal
+ */
+ if ((op_errno == ENODATA) &&
+ (IA_ISDIR(local->loc.inode->ia_type))) {
+ local->need_lookup_everywhere = 1;
+ }
- local->op_ret = -1;
- local->op_errno = EINVAL;
+ goto unlock;
+ }
- goto unlock;
- }
+ if ((!IA_ISINVAL(local->inode->ia_type)) &&
+ stbuf->ia_type != local->inode->ia_type) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FILE_TYPE_MISMATCH,
+ "mismatching filetypes 0%o v/s 0%o for %s,"
+ " gfid = %s",
+ (stbuf->ia_type), (local->inode->ia_type), local->loc.path,
+ gfid);
- layout = local->layout;
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
- is_dir = check_is_dir (inode, stbuf, xattr);
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ goto unlock;
+ }
- if (is_linkfile) {
- gf_log (this->name, GF_LOG_INFO,
- "linkfile found in revalidate for %s",
- local->loc.path);
- local->return_estale = 1;
+ layout = local->layout;
- goto unlock;
+ is_dir = check_is_dir(inode, stbuf, xattr);
+ is_linkfile = check_is_linkfile(inode, stbuf, xattr,
+ conf->link_xattr_name);
+ if (is_linkfile) {
+ follow_link = 1;
+ goto unlock;
+ }
+ if (is_dir) {
+ ret = dht_dir_has_layout(xattr, conf->xattr_name);
+ if (ret >= 0) {
+ if (is_greater_time(local->prebuf.ia_ctime,
+ local->prebuf.ia_ctime_nsec,
+ stbuf->ia_ctime, stbuf->ia_ctime_nsec)) {
+ /* Choose source */
+ local->prebuf.ia_gid = stbuf->ia_gid;
+ local->prebuf.ia_uid = stbuf->ia_uid;
+
+ local->prebuf.ia_ctime = stbuf->ia_ctime;
+ local->prebuf.ia_ctime_nsec = stbuf->ia_ctime_nsec;
+
+ if (__is_root_gfid(stbuf->ia_gfid))
+ local->prebuf.ia_prot = stbuf->ia_prot;
}
+ }
+
+ if (local->stbuf.ia_type != IA_INVAL) {
+ if ((local->stbuf.ia_gid != stbuf->ia_gid) ||
+ (local->stbuf.ia_uid != stbuf->ia_uid) ||
+ is_permission_different(&local->stbuf.ia_prot,
+ &stbuf->ia_prot)) {
+ local->need_attrheal = 1;
+ }
+ }
+
+ if (!dict_get(xattr, conf->mds_xattr_key)) {
+ gf_msg_debug(this->name, 0,
+ "%s: internal xattr %s is not present"
+ " on subvol %s(gfid is %s)",
+ local->loc.path, conf->mds_xattr_key, prev->name,
+ gfid);
+ } else {
+ check_mds = dht_dict_get_array(xattr, conf->mds_xattr_key,
+ mds_xattr_val, 1, &errst);
+ local->mds_subvol = prev;
+ local->mds_stbuf.ia_gid = stbuf->ia_gid;
+ local->mds_stbuf.ia_uid = stbuf->ia_uid;
+ local->mds_stbuf.ia_prot = stbuf->ia_prot;
+
+ /* save mds subvol on inode ctx */
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this, prev);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set MDS subvol for %s vol is %s",
+ local->loc.path, prev->name);
+ }
+ if ((check_mds < 0) && !errst) {
+ /* Check if xattrs need to be healed on the directory
+ */
+ local->mds_xattr = dict_ref(xattr);
+ gf_msg_debug(this->name, 0,
+ "Value of %s is not zero on "
+ "hashed subvol so xattr needs to"
+ " be healed on non hashed"
+ " path is %s and vol name is %s "
+ " gfid is %s",
+ conf->mds_xattr_key, local->loc.path,
+ prev->name, gfid);
+ local->need_xattr_heal = 1;
+ }
+ }
+ ret = dht_layout_dir_mismatch(this, layout, prev, &local->loc,
+ xattr);
+ if (ret != 0) {
+ /* In memory layout does not match on-disk layout.
+ */
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_MISMATCH,
+ "Mismatching layouts for %s, gfid = %s", local->loc.path,
+ gfid);
- if (is_dir) {
- ret = dht_layout_dir_mismatch (this, layout,
- prev->this, &local->loc,
- xattr);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_INFO,
- "mismatching layouts for %s",
- local->loc.path);
-
- local->layout_mismatch = 1;
+ local->layout_mismatch = 1;
- goto unlock;
- }
- }
+ goto unlock;
+ }
+ }
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
+ gf_uuid_copy(local->stbuf.ia_gfid, stbuf->ia_gfid);
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->postparent, postparent);
- local->op_ret = 0;
+ local->op_ret = 0;
- if (!local->xattr) {
- local->xattr = dict_ref (xattr);
- } else if (is_dir) {
- dht_aggregate_xattr (local->xattr, xattr);
- }
+ if (!local->xattr) {
+ local->xattr = dict_ref(xattr);
+ } else if (is_dir) {
+ dht_aggregate_xattr(local->xattr, xattr);
}
+ }
unlock:
- UNLOCK (&frame->lock);
-out:
- this_call_cnt = dht_frame_return (frame);
-
- if (is_last_call (this_call_cnt)) {
- if (!IA_ISDIR (local->stbuf.ia_type)
- && (local->hashed_subvol != local->cached_subvol)
- && (local->stbuf.ia_nlink == 1)
- && (conf && conf->unhashed_sticky_bit)) {
- local->stbuf.ia_prot.sticky = 1;
- }
- if (local->layout_mismatch) {
- /* Found layout mismatch in the directory, need to
- fix this in the inode context */
- dht_layout_unref (this, local->layout);
- local->layout = NULL;
- dht_lookup_directory (frame, this, &local->loc);
- return 0;
- }
+ UNLOCK(&frame->lock);
- if (local->need_lookup_everywhere) {
- /* As the current layout gave ENOENT error, we would
- need a new layout */
- dht_layout_unref (this, local->layout);
- local->layout = NULL;
-
- /* We know that current cached subvol is no more
- valid, get the new one */
- local->cached_subvol = NULL;
- dht_lookup_everywhere (frame, this, &local->loc);
- return 0;
- }
- if (local->return_estale) {
- local->op_ret = -1;
- local->op_errno = ESTALE;
- }
+ if (follow_link) {
+ /* Found a linkto file. Follow it to see if the target file exists
+ */
+ gf_uuid_copy(local->gfid, stbuf->ia_gfid);
- WIPE (&local->postparent);
+ subvol = dht_linkfile_subvol(this, inode, stbuf, xattr);
+ if (!subvol) {
+ op_errno = ESTALE;
+ local->op_ret = -1;
+ } else {
+ STACK_WIND_COOKIE(frame, dht_lookup_linkfile_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->loc,
+ local->xattr_req);
+ return 0;
+ }
+ }
+
+ this_call_cnt = dht_frame_return(frame);
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr,
- &local->postparent);
+ if (is_last_call(this_call_cnt)) {
+ if (!IA_ISDIR(local->stbuf.ia_type) &&
+ (local->hashed_subvol != local->cached_subvol) &&
+ (local->stbuf.ia_nlink == 1) &&
+ (conf && conf->unhashed_sticky_bit)) {
+ local->stbuf.ia_prot.sticky = 1;
}
+ /* No need to call heal code if volume count is 1
+ */
+ if (conf->subvolume_cnt == 1)
+ local->need_xattr_heal = 0;
+
+ if (IA_ISDIR(local->stbuf.ia_type)) {
+ /* No mds xattr found. Trigger a heal to set it */
+ if (!__is_root_gfid(local->loc.inode->gfid) &&
+ (!dict_get(local->xattr, conf->mds_xattr_key)))
+ local->need_selfheal = 1;
+
+ if (dht_needs_selfheal(frame, this)) {
+ if (!__is_root_gfid(local->loc.inode->gfid)) {
+ if (local->mds_subvol) {
+ local->stbuf.ia_gid = local->mds_stbuf.ia_gid;
+ local->stbuf.ia_uid = local->mds_stbuf.ia_uid;
+ local->stbuf.ia_prot = local->mds_stbuf.ia_prot;
+ }
+ } else {
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+ local->stbuf.ia_prot = local->prebuf.ia_prot;
+ }
-err:
- return ret;
-}
+ layout = local->layout;
+ dht_selfheal_directory(frame, dht_lookup_selfheal_cbk,
+ &local->loc, layout);
+ return 0;
+ }
+ }
+ if (local->layout_mismatch) {
+ /* Found layout mismatch in the directory, need to
+ fix this in the inode context */
+ dht_layout_unref(this, local->layout);
+ local->layout = NULL;
+ dht_lookup_directory(frame, this, &local->loc);
+ return 0;
+ }
-int
-dht_lookup_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+ if (local->need_lookup_everywhere) {
+ /* As the current layout gave ENOENT error, we would
+ need a new layout */
+ dht_layout_unref(this, local->layout);
+ local->layout = NULL;
+
+ /* We know that current cached subvol is no longer
+ valid, get the new one */
+ local->cached_subvol = NULL;
+ if (local->xattr_req) {
+ if (!gf_uuid_is_null(local->gfid)) {
+ ret = dict_set_static_bin(local->xattr_req, "gfid-req",
+ local->gfid, 16);
+ }
+ }
- local = frame->local;
- cached_subvol = local->cached_subvol;
- conf = this->private;
+ dht_lookup_everywhere(frame, this, &local->loc);
+ return 0;
+ }
+ if (local->return_estale) {
+ local->op_ret = -1;
+ local->op_errno = ESTALE;
+ }
- ret = dht_layout_preset (this, local->cached_subvol, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set layout for subvolume %s",
- cached_subvol ? cached_subvol->name : "<nil>");
- local->op_ret = -1;
- local->op_errno = EINVAL;
- goto unwind;
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
}
- local->op_ret = 0;
- if ((local->stbuf.ia_nlink == 1)
- && (conf && conf->unhashed_sticky_bit)) {
- local->stbuf.ia_prot.sticky = 1;
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+
+ /* local->stbuf is updated only from subvols which have a layout
+ * The reason is to avoid choosing attr heal source from newly
+ * added bricks. In case e.g we have only one subvol and for
+ * some reason layout is not present on it, then local->stbuf
+ * will be EINVAL. This is an indication that the subvols
+ * active in the cluster do not have layouts on disk.
+ * Unwind with ESTALE to trigger a fresh lookup */
+ if (is_dir && local->stbuf.ia_type == IA_INVAL) {
+ local->op_ret = -1;
+ local->op_errno = ESTALE;
}
+ /* Delete mds xattr at the time of STACK UNWIND */
+ if (local->xattr)
+ GF_REMOVE_INTERNAL_XATTR(conf->mds_xattr_key, local->xattr);
+
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ }
+
+err:
+ return ret;
+}
+
+static int
+dht_lookup_linkfile_create_cbk(call_frame_t *frame, void *cooie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+
+ local = frame->local;
+ cached_subvol = local->cached_subvol;
+ conf = this->private;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ if (local->locked)
+ dht_unlock_namespace(frame, &local->lock[0]);
+
+ ret = dht_layout_preset(this, local->cached_subvol, local->loc.inode);
+ if (ret < 0) {
+ gf_msg_debug(this->name, EINVAL,
+ "Failed to set layout for subvolume %s, "
+ "(gfid = %s)",
+ cached_subvol ? cached_subvol->name : "<nil>", gfid);
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unwind;
+ }
+
+ local->op_ret = 0;
+ if ((local->stbuf.ia_nlink == 1) && (conf && conf->unhashed_sticky_bit)) {
+ local->stbuf.ia_prot.sticky = 1;
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
unwind:
- WIPE (&local->postparent);
+ gf_msg_debug(this->name, 0,
+ "creation of linkto on hashed subvol:%s, "
+ "returned with op_ret %d and op_errno %d: %s",
+ local->hashed_subvol->name, op_ret, op_errno,
+ uuid_utoa(local->loc.gfid));
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (lookup, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf, local->xattr,
- &local->postparent);
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal(frame, this);
+
+ dht_set_fixed_dir_stat(&local->postparent);
+
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
out:
- return ret;
+ return ret;
}
+static int
+dht_lookup_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ const char *path = NULL;
-int
-dht_lookup_everywhere_done (call_frame_t *frame, xlator_t *this)
+ local = (dht_local_t *)frame->local;
+ path = local->loc.path;
+ FRAME_SU_UNDO(frame, dht_local_t);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_UNLINK_LOOKUP_INFO,
+ "lookup_unlink returned with "
+ "op_ret -> %d and op-errno -> %d for %s",
+ op_ret, op_errno, ((path == NULL) ? "null" : path));
+
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ dht_lookup_everywhere_done(frame, this);
+ }
+
+ return 0;
+}
+
+static int
+dht_lookup_unlink_of_false_linkto_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int ret = 0;
- dht_local_t *local = NULL;
- xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
- dht_layout_t *layout = NULL;
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ const char *path = NULL;
- local = frame->local;
- hashed_subvol = local->hashed_subvol;
- cached_subvol = local->cached_subvol;
+ local = (dht_local_t *)frame->local;
+ path = local->loc.path;
- if (local->file_count && local->dir_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "path %s exists as a file on one subvolume "
- "and directory on another. "
- "Please fix it manually",
- local->loc.path);
- DHT_STACK_UNWIND (lookup, frame, -1, EIO, NULL, NULL, NULL,
- NULL);
- return 0;
- }
+ FRAME_SU_UNDO(frame, dht_local_t);
- if (local->dir_count) {
- dht_lookup_directory (frame, this, &local->loc);
- return 0;
- }
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_UNLINK_LOOKUP_INFO,
+ "lookup_unlink returned with "
+ "op_ret -> %d and op-errno -> %d for %s",
+ op_ret, op_errno, ((path == NULL) ? "null" : path));
- if (!cached_subvol) {
- DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL, NULL, NULL,
- NULL);
- return 0;
- }
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ if ((op_ret == 0) || ((op_errno != EBUSY) && (op_errno != ENOTCONN))) {
+ dht_lookup_everywhere_done(frame, this);
+ } else {
+ /*When dht_lookup_everywhere is performed, one cached
+ *and one hashed file was found and hashed file does
+ *not point to the above mentioned cached node. So it
+ *was considered as stale and an unlink was performed.
+ *But unlink fails. So may be rebalance is in progress.
+ *now ideally we have two data-files. One obtained during
+ *lookup_everywhere and one where unlink-failed. So
+ *at this point in time we cannot decide which one to
+ *choose because there are chances of first cached
+ *file is truncated after rebalance and if it is chosen
+ *as cached node, application will fail. So return EIO.*/
+
+ if (op_errno == EBUSY) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_UNLINK_FAILED,
+ "Could not unlink the linkto file as "
+ "either fd is open and/or linkto xattr "
+ "is set for %s",
+ ((path == NULL) ? "null" : path));
+ }
+ DHT_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL);
+ }
+ }
+
+ return 0;
+}
- if (local->need_lookup_everywhere) {
- if (uuid_compare (local->gfid, local->inode->gfid)) {
- /* GFID different, return error */
- DHT_STACK_UNWIND (lookup, frame, -1, ENOENT, NULL,
- NULL, NULL, NULL);
- return 0;
- }
- local->op_ret = 0;
- local->op_errno = 0;
- layout = dht_layout_for_subvol (this, cached_subvol);
- if (!layout) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: no pre-set layout for subvolume %s",
- local->loc.path, (cached_subvol ?
- cached_subvol->name :
- "<nil>"));
- }
+static int
+dht_lookup_unlink_stale_linkto_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ const char *path = NULL;
- ret = dht_layout_set (this, local->inode, layout);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: failed to set layout for subvol %s",
- local->loc.path, (cached_subvol ?
- cached_subvol->name :
- "<nil>"));
- }
+ /* NOTE:
+ * If stale file unlink fails either there is an open-fd or is not an
+ * dht-linkto-file then posix_unlink returns EBUSY, which is overwritten
+ * to ENOENT
+ */
- WIPE (&local->postparent);
+ local = frame->local;
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno, local->inode,
- &local->stbuf, local->xattr,
- &local->postparent);
- return 0;
- }
+ if (local) {
+ FRAME_SU_UNDO(frame, dht_local_t);
+ if (local->loc.path)
+ path = local->loc.path;
+ }
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_INFO,
- "cannot create linkfile file for %s on %s: "
- "hashed subvolume cannot be found.",
- local->loc.path, cached_subvol->name);
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_UNLINK_LOOKUP_INFO,
+ "Returned with op_ret %d and "
+ "op_errno %d for %s",
+ op_ret, op_errno, ((path == NULL) ? "null" : path));
- local->op_ret = 0;
- local->op_errno = 0;
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL, NULL);
- ret = dht_layout_preset (frame->this, cached_subvol,
- local->inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "failed to set layout for subvol %s",
- cached_subvol ? cached_subvol->name :
- "<nil>");
- local->op_ret = -1;
- local->op_errno = EINVAL;
- }
+ return 0;
+}
- WIPE (&local->postparent);
+static int
+dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_t *dict)
+{
+ int ret = 0;
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno, local->inode,
- &local->stbuf, local->xattr,
- &local->postparent);
- return 0;
- }
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_NON_LINKTO_UNLINK, 1);
- gf_log (this->name, GF_LOG_DEBUG,
- "linking file %s existing on %s to %s (hash)",
- local->loc.path, cached_subvol->name,
- hashed_subvol->name);
+ if (ret)
+ return -1;
- ret = dht_linkfile_create (frame,
- dht_lookup_linkfile_create_cbk,
- cached_subvol, hashed_subvol, &local->loc);
+ ret = dict_set_int32_sizen(dict, DHT_SKIP_OPEN_FD_UNLINK, 1);
- return ret;
+ if (ret)
+ return -1;
+
+ return 0;
}
+static int32_t
+dht_linkfile_create_lookup_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int call_cnt = 0, ret = 0;
+ xlator_t *subvol = NULL;
+ uuid_t gfid = {
+ 0,
+ };
+ char gfid_str[GF_UUID_BUF_SIZE] = {0};
+
+ subvol = cookie;
+ local = frame->local;
+
+ if (subvol == local->hashed_subvol) {
+ if ((op_ret == 0) || (op_errno != ENOENT))
+ local->dont_create_linkto = _gf_true;
+ } else {
+ if (gf_uuid_is_null(local->gfid))
+ gf_uuid_copy(gfid, local->loc.gfid);
+ else
+ gf_uuid_copy(gfid, local->gfid);
+
+ if ((op_ret == 0) && gf_uuid_compare(gfid, buf->ia_gfid)) {
+ gf_uuid_unparse(gfid, gfid_str);
+ gf_msg_debug(this->name, 0,
+ "gfid (%s) different on cached subvol "
+ "(%s) and looked up inode (%s), not "
+ "creating linkto",
+ uuid_utoa(buf->ia_gfid), subvol->name, gfid_str);
+ local->dont_create_linkto = _gf_true;
+ } else if (op_ret == -1) {
+ local->dont_create_linkto = _gf_true;
+ }
+ }
+
+ call_cnt = dht_frame_return(frame);
+ if (is_last_call(call_cnt)) {
+ if (local->dont_create_linkto)
+ goto no_linkto;
+ else {
+ gf_msg_debug(this->name, 0,
+ "Creating linkto file on %s(hash) to "
+ "%s on %s (gfid = %s)",
+ local->hashed_subvol->name, local->loc.path,
+ local->cached_subvol->name, gfid_str);
+
+ ret = dht_linkfile_create(frame, dht_lookup_linkfile_create_cbk,
+ this, local->cached_subvol,
+ local->hashed_subvol, &local->loc);
+
+ if (ret < 0)
+ goto no_linkto;
+ }
+ }
+
+ return 0;
+
+no_linkto:
+ gf_msg_debug(this->name, 0,
+ "skipped linkto creation (path:%s) (gfid:%s) "
+ "(hashed-subvol:%s) (cached-subvol:%s)",
+ local->loc.path, gfid_str, local->hashed_subvol->name,
+ local->cached_subvol->name);
+
+ dht_lookup_linkfile_create_cbk(frame, NULL, this, 0, 0, local->loc.inode,
+ &local->stbuf, &local->preparent,
+ &local->postparent, local->xattr);
+ return 0;
+}
-int
-dht_lookup_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+static int32_t
+dht_call_lookup_linkfile_create(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ int i = 0;
+ xlator_t *subvol = NULL;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_lookup_everywhere_done (frame, this);
- }
+ local = frame->local;
+ if (gf_uuid_is_null(local->gfid))
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ else
+ gf_uuid_unparse(local->gfid, gfid);
- return 0;
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "protecting namespace failed, skipping linkto "
+ "creation (path:%s)(gfid:%s)(hashed-subvol:%s)"
+ "(cached-subvol:%s)",
+ local->loc.path, gfid, local->hashed_subvol->name,
+ local->cached_subvol->name);
+ goto err;
+ }
+
+ local->locked = _gf_true;
+
+ local->call_cnt = 2;
+
+ for (i = 0; i < 2; i++) {
+ subvol = (subvol == NULL) ? local->hashed_subvol : local->cached_subvol;
+
+ STACK_WIND_COOKIE(frame, dht_linkfile_create_lookup_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->loc, NULL);
+ }
+
+ return 0;
+
+err:
+ dht_lookup_linkfile_create_cbk(frame, NULL, this, 0, 0, local->loc.inode,
+ &local->stbuf, &local->preparent,
+ &local->postparent, local->xattr);
+ return 0;
}
+/* Rebalance is performed from cached_node to hashed_node. Initial cached_node
+ * contains a non-linkto file. After migration it is converted to linkto and
+ * then unlinked. And at hashed_subvolume, first a linkto file is present,
+ * then after migration it is converted to a non-linkto file.
+ *
+ * Lets assume a file is present on cached subvolume and a new brick is added
+ * and new brick is the new_hashed subvolume. So fresh lookup on newly added
+ * hashed subvolume will fail and dht_lookup_everywhere gets called. If just
+ * before sending the dht_lookup_everywhere request rebalance is in progress,
+ *
+ * from cached subvolume it may see: Nonlinkto or linkto or No file
+ * from hashed subvolume it may see: No file or linkto file or non-linkto file
+ *
+ * So this boils down to 9 cases:
+ * at cached_subvol at hashed_subvol
+ * ---------------- -----------------
+ *
+ *a) No file No file
+ * [request reached after [Request reached before
+ * migration] Migration]
+ *
+ *b) No file Linkto File
+ *
+ *c) No file Non-Linkto File
+ *
+ *d) Linkto No-File
+ *
+ *e) Linkto Linkto
+ *
+ *f) Linkto Non-Linkto
+ *
+ *g) NonLinkto No-File
+ *
+ *h) NonLinkto Linkto
+ *
+ *i) NonLinkto NonLinkto
+ *
+ * dht_lookup_everywhere_done takes decision based on any of the above case
+ */
-int
-dht_lookup_everywhere_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- int is_linkfile = 0;
- int is_dir = 0;
- xlator_t *subvol = NULL;
- loc_t *loc = NULL;
- xlator_t *link_subvol = NULL;
- int ret = -1;
- int32_t fd_count = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
-
- local = frame->local;
- loc = &local->loc;
-
- prev = cookie;
- subvol = prev->this;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- if (op_errno != ENOENT)
- local->op_errno = op_errno;
- goto unlock;
- }
+static int
+dht_lookup_everywhere_done(call_frame_t *frame, xlator_t *this)
+{
+ int ret = 0;
+ dht_local_t *local = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_layout_t *layout = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ gf_boolean_t found_non_linkto_on_hashed = _gf_false;
+
+ local = frame->local;
+ hashed_subvol = local->hashed_subvol;
+ cached_subvol = local->cached_subvol;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ if (local->file_count && local->dir_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_FILE_TYPE_MISMATCH,
+ "path %s (gfid = %s)exists as a file on one "
+ "subvolume and directory on another. "
+ "Please fix it manually",
+ local->loc.path, gfid);
+ DHT_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL, NULL, NULL);
+ return 0;
+ }
+ if (local->op_ret && local->gfid_missing) {
+ if (gf_uuid_is_null(local->gfid_req)) {
+ DHT_STACK_UNWIND(lookup, frame, -1, ENODATA, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+ /* A hack */
+ dht_lookup_directory(frame, this, &local->loc);
+ return 0;
+ }
+
+ if (local->dir_count) {
+ dht_lookup_directory(frame, this, &local->loc);
+ return 0;
+ }
+
+ gf_msg_debug(this->name, 0,
+ "STATUS: hashed_subvol %s "
+ "cached_subvol %s",
+ (hashed_subvol == NULL) ? "null" : hashed_subvol->name,
+ (cached_subvol == NULL) ? "null" : cached_subvol->name);
+
+ if (!cached_subvol) {
+ if (local->skip_unlink.handle_valid_link && hashed_subvol) {
+ /*Purpose of "DHT_SKIP_NON_LINKTO_UNLINK":
+ * If this lookup is performed by rebalance and this
+ * rebalance process detected hashed file and by
+ * the time it sends the lookup request to cached node,
+ * file got migrated and now at initial hashed_node,
+ * final migrated file is present. With current logic,
+ * because this process fails to find the cached_node,
+ * it will unlink the file at initial hashed_node.
+ *
+ * So we avoid this by setting key, and checking at the
+ * posix_unlink that unlink the file only if file is a
+ * linkto file and not a migrated_file.
+ */
+
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(
+ local->xattr_req);
+
+ if (ret) {
+ /* If for some reason, setting key in the dict
+ * fails, return with ENOENT, as with respect to
+ * this process, it detected only a stale link
+ * file.
+ *
+ * Next lookup will delete it.
+ *
+ * Performing deletion of stale link file when
+ * setting key in dict fails, may cause the data
+ * loss because of the above mentioned race.
+ */
+
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL,
+ NULL);
+ } else {
+ local->skip_unlink.handle_valid_link = _gf_false;
+
+ gf_msg_debug(this->name, 0,
+ "No Cached was found and "
+ "unlink on hashed was skipped"
+ " so performing now: %s",
+ local->loc.path);
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND(frame, dht_lookup_unlink_stale_linkto_cbk,
+ hashed_subvol, hashed_subvol->fops->unlink,
+ &local->loc, 0, local->xattr_req);
+ }
+
+ } else {
+ gf_msg_debug(this->name, 0,
+ "There was no cached file and "
+ "unlink on hashed is not skipped %s",
+ local->loc.path);
+
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL, NULL);
+ }
+ return 0;
+ }
+
+ /* At the time of dht_lookup, no file was found on hashed and that is
+ * why dht_lookup_everywhere is called, but by the time
+ * dht_lookup_everywhere
+ * reached to server, file might have already migrated. In that case we
+ * will find a migrated file at the hashed_node. In this case store the
+ * layout in context and return successfully.
+ */
+
+ if (hashed_subvol || local->need_lookup_everywhere) {
+ if (local->need_lookup_everywhere) {
+ found_non_linkto_on_hashed = _gf_true;
+
+ } else if ((local->file_count == 1) &&
+ (hashed_subvol == cached_subvol)) {
+ gf_msg_debug(this->name, 0,
+ "found cached file on hashed subvolume "
+ "so store in context and return for %s",
+ local->loc.path);
- if (uuid_is_null (local->gfid))
- uuid_copy (local->gfid, buf->ia_gfid);
+ found_non_linkto_on_hashed = _gf_true;
+ }
+
+ if (found_non_linkto_on_hashed)
+ goto preset_layout;
+ }
+
+ if (hashed_subvol) {
+ if (local->skip_unlink.handle_valid_link == _gf_true) {
+ if (cached_subvol == local->skip_unlink.hash_links_to) {
+ if (gf_uuid_compare(local->skip_unlink.cached_gfid,
+ local->skip_unlink.hashed_gfid)) {
+ /*GFID different, return error*/
+ DHT_STACK_UNWIND(lookup, frame, -1, ESTALE, NULL, NULL,
+ NULL, NULL);
- if (uuid_compare (local->gfid, buf->ia_gfid)) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: gfid differs on subvolume %s",
- loc->path, prev->this->name);
+ return 0;
}
- is_linkfile = check_is_linkfile (inode, buf, xattr);
- is_dir = check_is_dir (inode, buf, xattr);
-
- if (is_linkfile) {
- link_subvol = dht_linkfile_subvol (this, inode, buf,
- xattr);
- gf_log (this->name, GF_LOG_DEBUG,
- "found on %s linkfile %s (-> %s)",
- subvol->name, loc->path,
- link_subvol ? link_subvol->name : "''");
- goto unlock;
+ ret = dht_layout_preset(this, cached_subvol, local->loc.inode);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ DHT_MSG_LAYOUT_PRESET_FAILED,
+ "Could not set pre-set layout "
+ "for subvolume %s",
+ cached_subvol->name);
}
- /* non linkfile GFID takes precedence */
- uuid_copy (local->gfid, buf->ia_gfid);
+ local->op_ret = (ret == 0) ? ret : -1;
+ local->op_errno = (ret == 0) ? ret : EINVAL;
- if (is_dir) {
- local->dir_count++;
+ /* Presence of local->cached_subvol validates
+ * that lookup from cached node is successful
+ */
- gf_log (this->name, GF_LOG_DEBUG,
- "found on %s directory %s",
- subvol->name, loc->path);
- } else {
- local->file_count++;
-
- if (!local->cached_subvol) {
- /* found one file */
- dht_iatt_merge (this, &local->stbuf, buf,
- subvol);
- local->xattr = dict_ref (xattr);
- local->cached_subvol = subvol;
- gf_log (this->name, GF_LOG_DEBUG,
- "found on %s file %s",
- subvol->name, loc->path);
-
- dht_iatt_merge (this, &local->postparent,
- postparent, subvol);
- } else {
- /* This is where we need 'rename' both entries logic */
- gf_log (this->name, GF_LOG_WARNING,
- "multiple subvolumes (%s and %s) have "
- "file %s (preferably rename the file "
- "in the backend, and do a fresh lookup)",
- local->cached_subvol->name,
- subvol->name, local->loc.path);
- }
+ if (!local->op_ret && local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
}
- }
-unlock:
- UNLOCK (&frame->lock);
- if (is_linkfile) {
- ret = dict_get_int32 (xattr, GLUSTERFS_OPEN_FD_COUNT, &fd_count);
- /* Delete the linkfile only if there are no open fds on it.
- if there is a open-fd, it may be in migration */
- if (!ret && (fd_count == 0)) {
- gf_log (this->name, GF_LOG_INFO,
- "deleting stale linkfile %s on %s",
- loc->path, subvol->name);
- STACK_WIND (frame, dht_lookup_unlink_cbk,
- subvol, subvol->fops->unlink, loc, 0, NULL);
- return 0;
+ gf_msg_debug(this->name, 0,
+ "Skipped unlinking linkto file "
+ "on the hashed subvolume. "
+ "Returning success as it is a "
+ "valid linkto file. Path:%s",
+ local->loc.path);
+
+ goto unwind_hashed_and_cached;
+ } else {
+ local->skip_unlink.handle_valid_link = _gf_false;
+
+ gf_msg_debug(this->name, 0,
+ "Linkto file found on hashed "
+ "subvol "
+ "and data file found on cached "
+ "subvolume. But linkto points to "
+ "different cached subvolume (%s) "
+ "path %s",
+ (local->skip_unlink.hash_links_to
+ ? local->skip_unlink.hash_links_to->name
+ : " <nil>"),
+ local->loc.path);
+
+ if (local->skip_unlink.opend_fd_count == 0) {
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(
+ local->xattr_req);
+
+ if (ret) {
+ DHT_STACK_UNWIND(lookup, frame, -1, EIO, NULL, NULL,
+ NULL, NULL);
+ } else {
+ local->call_cnt = 1;
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND(frame, dht_lookup_unlink_of_false_linkto_cbk,
+ hashed_subvol, hashed_subvol->fops->unlink,
+ &local->loc, 0, local->xattr_req);
+ }
+
+ return 0;
}
+ }
}
+ }
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_lookup_everywhere_done (frame, this);
+preset_layout:
+
+ if (found_non_linkto_on_hashed) {
+ if (local->need_lookup_everywhere) {
+ if (gf_uuid_compare(local->gfid, local->inode->gfid)) {
+ /* GFID different, return error */
+ DHT_STACK_UNWIND(lookup, frame, -1, ENOENT, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
}
-out:
- return ret;
-}
+ local->op_ret = 0;
+ local->op_errno = 0;
+ layout = dht_layout_for_subvol(this, cached_subvol);
+ if (!layout) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "%s: no pre-set layout for subvolume %s,"
+ " gfid = %s",
+ local->loc.path,
+ (cached_subvol ? cached_subvol->name : "<nil>"), gfid);
+ }
+ ret = dht_layout_set(this, local->inode, layout);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "%s: failed to set layout for subvol %s, "
+ "gfid = %s",
+ local->loc.path,
+ (cached_subvol ? cached_subvol->name : "<nil>"), gfid);
+ }
-int
-dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc)
-{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int i = 0;
- int call_cnt = 0;
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
+ }
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
- GF_VALIDATE_OR_GOTO ("dht", loc, out);
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
+ }
- conf = this->private;
- local = frame->local;
+ if (!hashed_subvol) {
+ gf_msg_debug(this->name, 0,
+ "Cannot create linkfile for %s on %s: "
+ "hashed subvolume cannot be found, gfid = %s.",
+ local->loc.path, cached_subvol->name, gfid);
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ local->op_ret = 0;
+ local->op_errno = 0;
- if (!local->inode)
- local->inode = inode_ref (loc->inode);
+ ret = dht_layout_preset(frame->this, cached_subvol, local->inode);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_PRESET_FAILED,
+ "Failed to set layout for subvol %s"
+ ", gfid = %s",
+ cached_subvol ? cached_subvol->name : "<nil>", gfid);
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ }
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_everywhere_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- loc, local->xattr_req);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
}
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
return 0;
-out:
- DHT_STACK_UNWIND (lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
-err:
- return -1;
+ }
+
+ if (frame->root->op != GF_FOP_RENAME) {
+ local->current = &local->lock[0];
+ ret = dht_protect_namespace(frame, &local->loc, hashed_subvol,
+ &local->current->ns,
+ dht_call_lookup_linkfile_create);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Creating linkto file on %s(hash) to %s on %s "
+ "(gfid = %s)",
+ hashed_subvol->name, local->loc.path, cached_subvol->name,
+ gfid);
+
+ ret = dht_linkfile_create(frame, dht_lookup_linkfile_create_cbk, this,
+ cached_subvol, hashed_subvol, &local->loc);
+ }
+
+ return ret;
+
+unwind_hashed_and_cached:
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(lookup, frame, local->op_ret, local->op_errno,
+ local->inode, &local->stbuf, local->xattr,
+ &local->postparent);
+ return 0;
}
-
-int
-dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
-{
- call_frame_t *prev = NULL;
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- loc_t *loc = NULL;
- dht_conf_t *conf = NULL;
- int ret = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, unwind);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, unwind);
- GF_VALIDATE_OR_GOTO ("dht", this->private, unwind);
- GF_VALIDATE_OR_GOTO ("dht", cookie, unwind);
-
- prev = cookie;
- subvol = prev->this;
- conf = this->private;
- local = frame->local;
- loc = &local->loc;
-
+static int
+dht_lookup_everywhere_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ int is_linkfile = 0;
+ int is_dir = 0;
+ loc_t *loc = NULL;
+ xlator_t *link_subvol = NULL;
+ int ret = -1;
+ int32_t fd_count = 0;
+ dht_conf_t *conf = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ dict_t *dict_req = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+
+ local = frame->local;
+ loc = &local->loc;
+ conf = this->private;
+
+ prev = cookie;
+
+ gf_msg_debug(this->name, 0,
+ "returned with op_ret %d and op_errno %d (%s) "
+ "from subvol %s",
+ op_ret, op_errno, loc->path, prev->name);
+
+ LOCK(&frame->lock);
+ {
if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "lookup of %s on %s (following linkfile) failed (%s)",
- local->loc.path, subvol->name, strerror (op_errno));
- goto err;
+ if (op_errno != ENOENT)
+ local->op_errno = op_errno;
+ if (op_errno == ENODATA)
+ local->gfid_missing = _gf_true;
+ goto unlock;
}
- if (check_is_dir (inode, stbuf, xattr)) {
- gf_log (this->name, GF_LOG_INFO,
- "lookup of %s on %s (following linkfile) reached dir",
- local->loc.path, subvol->name);
- goto err;
- }
+ if (gf_uuid_is_null(local->gfid))
+ gf_uuid_copy(local->gfid, buf->ia_gfid);
- if (check_is_linkfile (inode, stbuf, xattr)) {
- gf_log (this->name, GF_LOG_INFO,
- "lookup of %s on %s (following linkfile) reached link",
- local->loc.path, subvol->name);
- goto err;
- }
+ gf_uuid_unparse(local->gfid, gfid);
- if (uuid_compare (local->gfid, stbuf->ia_gfid)) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: gfid different on data file on %s",
- local->loc.path, subvol->name);
- goto err;
+ if (gf_uuid_compare(local->gfid, buf->ia_gfid)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH,
+ "%s: gfid differs on subvolume %s,"
+ " gfid local = %s, gfid node = %s",
+ loc->path, prev->name, gfid, uuid_utoa(buf->ia_gfid));
}
- if ((stbuf->ia_nlink == 1)
- && (conf && conf->unhashed_sticky_bit)) {
- stbuf->ia_prot.sticky = 1;
+ is_linkfile = check_is_linkfile(inode, buf, xattr,
+ conf->link_xattr_name);
+
+ if (is_linkfile) {
+ link_subvol = dht_linkfile_subvol(this, inode, buf, xattr);
+ gf_msg_debug(this->name, 0, "found on %s linkfile %s (-> %s)",
+ prev->name, loc->path,
+ link_subvol ? link_subvol->name : "''");
+ goto unlock;
}
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "failed to set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
+ is_dir = check_is_dir(inode, buf, xattr);
+
+ /* non linkfile GFID takes precedence but don't overwrite
+ gfid if we have already found a cached file*/
+ if (!local->cached_subvol)
+ gf_uuid_copy(local->gfid, buf->ia_gfid);
+
+ if (is_dir) {
+ local->dir_count++;
+
+ gf_msg_debug(this->name, 0, "found on %s directory %s", prev->name,
+ loc->path);
+ } else {
+ local->file_count++;
+
+ gf_msg_debug(this->name, 0, "found cached file on %s for %s",
+ prev->name, loc->path);
+
+ if (!local->cached_subvol) {
+ /* found one file */
+ dht_iatt_merge(this, &local->stbuf, buf);
+
+ local->xattr = dict_ref(xattr);
+ local->cached_subvol = prev;
+
+ gf_msg_debug(this->name, 0,
+ "storing cached on %s file"
+ " %s",
+ prev->name, loc->path);
+
+ dht_iatt_merge(this, &local->postparent, postparent);
+
+ gf_uuid_copy(local->skip_unlink.cached_gfid, buf->ia_gfid);
+ } else {
+ /* This is where we need 'rename' both entries logic */
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_FILE_ON_MULT_SUBVOL,
+ "multiple subvolumes (%s and %s) have "
+ "file %s (preferably rename the file "
+ "in the backend,and do a fresh lookup)",
+ local->cached_subvol->name, prev->name, local->loc.path);
+ }
}
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ if (is_linkfile) {
+ ret = dict_get_int32(xattr, GLUSTERFS_OPEN_FD_COUNT, &fd_count);
+
+ /* Any linkto file found on the non-hashed subvolume should
+ * be unlinked (performed in the "else if" block below)
+ *
+ * But if a linkto file is found on hashed subvolume, it may be
+ * pointing to valid cached node. So unlinking of linkto
+ * file on hashed subvolume is skipped and inside
+ * dht_lookup_everywhere_done, checks are performed. If this
+ * linkto file is found as stale linkto file, it is deleted
+ * otherwise unlink is skipped.
+ */
-unwind:
- WIPE (postparent);
+ if (local->hashed_subvol && local->hashed_subvol == prev) {
+ local->skip_unlink.handle_valid_link = _gf_true;
+ local->skip_unlink.opend_fd_count = fd_count;
+ local->skip_unlink.hash_links_to = link_subvol;
+ gf_uuid_copy(local->skip_unlink.hashed_gfid, buf->ia_gfid);
+
+ gf_msg_debug(this->name, 0,
+ "Found"
+ " one linkto file on hashed subvol %s "
+ "for %s: Skipping unlinking till "
+ "everywhere_done",
+ prev->name, loc->path);
+
+ } else if (!ret && (fd_count == 0)) {
+ dict_req = dict_new();
+
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(dict_req);
+
+ if (ret) {
+ /* Skip unlinking for dict_failure
+ *File is found as a linkto file on non-hashed,
+ *subvolume. In the current implementation,
+ *finding a linkto-file on non-hashed does not
+ *always implies that it is stale. So deletion
+ *of file should be done only when both fd is
+ *closed and linkto-xattr is set. In case of
+ *dict_set failure, avoid skipping of file.
+ *NOTE: dht_frame_return should get called for
+ * this block.
+ */
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
- postparent);
+ dict_unref(dict_req);
+
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "attempting deletion of stale linkfile "
+ "%s on %s (hashed subvol is %s)",
+ loc->path, prev->name,
+ (local->hashed_subvol ? local->hashed_subvol->name
+ : "<null>"));
+ /* *
+ * These stale files may be created using root
+ * user. Hence deletion will work only with
+ * root.
+ */
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND(frame, dht_lookup_unlink_cbk, prev,
+ prev->fops->unlink, loc, 0, dict_req);
- return 0;
+ dict_unref(dict_req);
+
+ return 0;
+ }
+ }
+ }
+
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ dht_lookup_everywhere_done(frame, this);
+ }
-err:
- dht_lookup_everywhere (frame, this, loc);
out:
- return 0;
+ return ret;
}
-
int
-dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_lookup_everywhere(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int call_cnt = 0;
- int i = 0;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int ret = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ int call_cnt = 0;
- GF_VALIDATE_OR_GOTO ("dht", frame, out);
- GF_VALIDATE_OR_GOTO ("dht", this, unwind);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, unwind);
- GF_VALIDATE_OR_GOTO ("dht", this->private, unwind);
- GF_VALIDATE_OR_GOTO ("dht", loc, unwind);
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+ GF_VALIDATE_OR_GOTO("dht", loc, out);
- conf = this->private;
- local = frame->local;
+ conf = this->private;
+ local = frame->local;
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- goto unwind;
- }
+ if (!local->inode)
+ local->inode = inode_ref(loc->inode);
- if (local->xattr != NULL) {
- dict_unref (local->xattr);
- local->xattr = NULL;
- }
+ gf_msg_debug(this->name, 0, "winding lookup call to %d subvols", call_cnt);
- if (!uuid_is_null (local->gfid)) {
- ret = dict_set_static_bin (local->xattr_req, "gfid-req",
- local->gfid, 16);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set gfid", local->loc.path);
- }
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_lookup_everywhere_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, loc,
+ local->xattr_req);
+ }
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
- return 0;
-unwind:
- DHT_STACK_UNWIND (lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+ return 0;
out:
- return 0;
-
+ DHT_STACK_UNWIND(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+err:
+ return -1;
}
-
int
-dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
-{
- char is_linkfile = 0;
- char is_dir = 0;
- xlator_t *subvol = NULL;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- loc_t *loc = NULL;
- call_frame_t *prev = NULL;
- int ret = 0;
- uint64_t tmp_layout = 0;
- dht_layout_t *parent_layout = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
- GF_VALIDATE_OR_GOTO ("dht", this->private, out);
-
- conf = this->private;
-
- prev = cookie;
- local = frame->local;
- loc = &local->loc;
+dht_lookup_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ xlator_t *prev = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ loc_t *loc = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, unwind);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, unwind);
+ GF_VALIDATE_OR_GOTO("dht", this->private, unwind);
+ GF_VALIDATE_OR_GOTO("dht", cookie, unwind);
+
+ prev = cookie;
+ subvol = prev;
+ conf = this->private;
+ local = frame->local;
+ loc = &local->loc;
+
+ gf_uuid_unparse(loc->gfid, gfid);
+
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_LINK_FILE_LOOKUP_INFO,
+ "Lookup of %s on %s (following linkfile) failed "
+ ",gfid = %s",
+ local->loc.path, subvol->name, gfid);
+
+ /* If cached subvol returned ENOTCONN, do not do
+ lookup_everywhere. We need to make sure linkfile does not get
+ removed, which can take away the namespace, and subvol is
+ anyways down. */
+
+ local->cached_subvol = NULL;
+ if (op_errno != ENOTCONN)
+ goto err;
+ else
+ goto unwind;
+ }
+
+ if (check_is_dir(inode, stbuf, xattr)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LINK_FILE_LOOKUP_INFO,
+ "Lookup of %s on %s (following linkfile) reached dir,"
+ " gfid = %s",
+ local->loc.path, subvol->name, gfid);
+ goto err;
+ }
+
+ if (check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LINK_FILE_LOOKUP_INFO,
+ "lookup of %s on %s (following linkfile) reached link,"
+ "gfid = %s",
+ local->loc.path, subvol->name, gfid);
+ goto err;
+ }
+
+ if (gf_uuid_compare(local->gfid, stbuf->ia_gfid)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH,
+ "%s: gfid different on data file on %s,"
+ " gfid local = %s, gfid node = %s ",
+ local->loc.path, subvol->name, gfid, uuid_utoa(stbuf->ia_gfid));
+ goto err;
+ }
+
+ if ((stbuf->ia_nlink == 1) && (conf && conf->unhashed_sticky_bit)) {
+ stbuf->ia_prot.sticky = 1;
+ }
+
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_PRESET_FAILED,
+ "Failed to set layout for subvolume %s,"
+ "gfid = %s",
+ prev->name, gfid);
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
- /* This is required for handling stale linkfile deletion,
- * or any more call which happens from this 'loc'.
- */
- if (!op_ret && uuid_is_null (local->gfid))
- memcpy (local->gfid, stbuf->ia_gfid, 16);
-
- if (ENTRY_MISSING (op_ret, op_errno)) {
- gf_log (this->name, GF_LOG_TRACE, "Entry %s missing on subvol"
- " %s", loc->path, prev->this->name);
- if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_ON) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
- if ((conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) &&
- (loc->parent)) {
- ret = inode_ctx_get (loc->parent, this, &tmp_layout);
- parent_layout = (dht_layout_t *)(long)tmp_layout;
- if (parent_layout->search_unhashed) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
- }
- }
+unwind:
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ dht_set_fixed_dir_stat(postparent);
+ DHT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+ postparent);
- if (op_ret == 0) {
- is_dir = check_is_dir (inode, stbuf, xattr);
- if (is_dir) {
- local->inode = inode_ref (inode);
- local->xattr = dict_ref (xattr);
+ return 0;
+
+err:
+ dht_lookup_everywhere(frame, this, loc);
+out:
+ return 0;
+}
+
+/* Code to get hashed subvol based on inode and loc
+ First it check if loc->parent and loc->path exist then it get
+ hashed subvol based on loc.
+*/
+
+static gf_boolean_t
+dht_should_lookup_everywhere(xlator_t *this, dht_conf_t *conf, loc_t *loc)
+{
+ dht_layout_t *parent_layout = NULL;
+ int ret = 0;
+ gf_boolean_t lookup_everywhere = _gf_true;
+
+ /* lookup-optimize supersedes lookup-unhashed settings.
+ * If it is set, do not process search_unhashed
+ * If lookup-optimize if enabled, lookup everywhere if:
+ * - this is the rebalance daemon.
+ * - loc->parent is unavailable.
+ * - parent_layout is unavailable
+ * - parent_layout->commit_hash != conf->vol_commit_hash
+ */
+
+ if (conf->lookup_optimize) {
+ if (!conf->defrag && loc->parent) {
+ ret = dht_inode_ctx_layout_get(loc->parent, this, &parent_layout);
+ if (!ret && parent_layout &&
+ (parent_layout->commit_hash == conf->vol_commit_hash)) {
+ lookup_everywhere = _gf_false;
+ }
+ }
+ goto out;
+ } else {
+ if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) {
+ if (loc->parent) {
+ ret = dht_inode_ctx_layout_get(loc->parent, this,
+ &parent_layout);
+ if (ret || !parent_layout ||
+ (!parent_layout->search_unhashed)) {
+ lookup_everywhere = _gf_false;
}
- }
+ } else {
+ lookup_everywhere = _gf_false;
+ }
- if (is_dir || (op_ret == -1 && op_errno == ENOTCONN)) {
- dht_lookup_directory (frame, this, &local->loc);
- return 0;
+ goto out;
}
+ }
+out:
+ return lookup_everywhere;
+}
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "Lookup of %s for subvolume"
- " %s failed with error %s", loc->path, prev->this->name,
- strerror (op_errno));
+int
+dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ char is_linkfile = 0;
+ char is_dir = 0;
+ xlator_t *subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ xlator_t *prev = NULL;
+ int ret = 0;
+ uint32_t vol_commit_hash = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+ GF_VALIDATE_OR_GOTO("dht", this->private, out);
+
+ conf = this->private;
+
+ prev = cookie;
+ local = frame->local;
+ loc = &local->loc;
+
+ gf_msg_debug(this->name, op_errno,
+ "%s: fresh_lookup on %s returned with op_ret %d", loc->path,
+ prev->name, op_ret);
+
+ if (op_ret == -1) {
+ if (ENTRY_MISSING(op_ret, op_errno)) {
+ if (1 == conf->subvolume_cnt) {
+ /* No need to lookup again */
goto out;
- }
-
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
+ }
- if (!is_linkfile) {
- /* non-directory and not a linkfile */
+ gf_msg_debug(this->name, 0, "Entry %s missing on subvol %s",
+ loc->path, prev->name);
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_INFO,
- "could not set pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
- goto out;
- }
+ if (dht_should_lookup_everywhere(this, conf, loc)) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
+ }
- subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "linkfile not having link subvolume. path=%s",
- loc->path);
- dht_lookup_everywhere (frame, this, loc);
+ } else {
+ /* posix returns ENODATA if the gfid is not set but the client and
+ * server protocol layers do not send the stbuf. We need to
+ * heal this so check if this is a directory on the other subvols.
+ */
+ if ((op_errno == ENOTCONN) || (op_errno == ENODATA)) {
+ dht_lookup_directory(frame, this, &local->loc);
return 0;
+ }
+ }
+ gf_msg_debug(this->name, op_errno, "%s: Lookup on subvolume %s failed",
+ loc->path, prev->name);
+ goto out;
+ }
+
+ /* Lookup succeeded - op_ret = 0 */
+
+ /* This is required for handling stale linkfile deletion,
+ * or any more call which happens from this 'loc'.
+ */
+ if (gf_uuid_is_null(local->gfid)) {
+ /*This is set from the first successful response*/
+ memcpy(local->gfid, stbuf->ia_gfid, 16);
+ }
+
+ if (!conf->vch_forced) {
+ /* Update the commit hash in conf if it is found */
+ ret = dict_get_uint32(xattr, conf->commithash_xattr_name,
+ &vol_commit_hash);
+ if (ret == 0) {
+ conf->vol_commit_hash = vol_commit_hash;
}
+ }
- STACK_WIND (frame, dht_lookup_linkfile_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
-
+ is_dir = check_is_dir(inode, stbuf, xattr);
+ if (is_dir) {
+ /* A directory is present on all subvols, send the lookup to
+ * all subvols now */
+ local->inode = inode_ref(inode);
+ local->xattr = dict_ref(xattr);
+ dht_lookup_directory(frame, this, &local->loc);
return 0;
+ }
-out:
- /*
- * FIXME: postparent->ia_size and postparent->st_blocks do not have
- * correct values. since, postparent corresponds to a directory these
- * two members should have values equal to sum of corresponding values
- * from each of the subvolume. See dht_iatt_merge for reference.
+ is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
+
+ if (!is_linkfile) {
+ /* non-directory and not a linkto file. This is a data file
+ * Update the layout to point to the cached subvol
*/
- WIPE (postparent);
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_PRESET_FAILED,
+ "%s: could not set pre-set layout for subvolume %s",
+ loc->path, prev->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ goto out;
+ }
+
+ /* This is a linkto file. Get the value of the target subvol from the
+ * linkto xattr and lookup there to see if the file exists
+ */
+ subvol = dht_linkfile_subvol(this, inode, stbuf, xattr);
+ if (!subvol) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "%s: No link subvol for linkto", loc->path);
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
+ }
+
+ gf_msg_debug(this->name, 0, "%s: Calling lookup on linkto target %s",
+ loc->path, subvol->name);
+
+ STACK_WIND_COOKIE(frame, dht_lookup_linkfile_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->loc, local->xattr_req);
+
+ return 0;
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
- postparent);
+out:
+ /*
+ * FIXME: postparent->ia_size and postparent->st_blocks do not have
+ * correct values. since, postparent corresponds to a directory these
+ * two members should have values equal to sum of corresponding values
+ * from each of the subvolume. See dht_iatt_merge for reference.
+ */
+
+ if (!op_ret && local && local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
+
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ dht_set_fixed_dir_stat(postparent);
+ DHT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+ postparent);
err:
- return 0;
+ return 0;
}
-/* For directories, check if acl xattrs have been requested (by the acl xlator),
- * if not, request for them. These xattrs are needed for dht dir self-heal to
- * perform proper self-healing of dirs
+/* For directories, check if acl xattrs have been requested (by the acl
+ * xlator), if not, request for them. These xattrs are needed for dht dir
+ * self-heal to perform proper self-healing of dirs
*/
-void
-dht_check_and_set_acl_xattr_req (inode_t *inode, dict_t *xattr_req)
+static void
+dht_check_and_set_acl_xattr_req(xlator_t *this, dict_t *xattr_req)
{
- int ret = 0;
+ int ret = 0;
- GF_ASSERT (inode);
- GF_ASSERT (xattr_req);
+ GF_ASSERT(xattr_req);
- if (inode->ia_type != IA_IFDIR)
- return;
+ if (!dict_get(xattr_req, POSIX_ACL_ACCESS_XATTR)) {
+ ret = dict_set_int8(xattr_req, POSIX_ACL_ACCESS_XATTR, 0);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s",
+ POSIX_ACL_ACCESS_XATTR);
+ }
- if (!dict_get (xattr_req, POSIX_ACL_ACCESS_XATTR)) {
- ret = dict_set_int8 (xattr_req, POSIX_ACL_ACCESS_XATTR, 0);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set key %s",
- POSIX_ACL_ACCESS_XATTR);
- }
+ if (!dict_get(xattr_req, POSIX_ACL_DEFAULT_XATTR)) {
+ ret = dict_set_int8(xattr_req, POSIX_ACL_DEFAULT_XATTR, 0);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s",
+ POSIX_ACL_DEFAULT_XATTR);
+ }
- if (!dict_get (xattr_req, POSIX_ACL_DEFAULT_XATTR)) {
- ret = dict_set_int8 (xattr_req, POSIX_ACL_DEFAULT_XATTR, 0);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set key %s",
- POSIX_ACL_DEFAULT_XATTR);
- }
+ return;
+}
- return;
+/* for directories, we need the following info:
+ * the layout : trusted.glusterfs.dht
+ * the mds information : trusted.glusterfs.dht.mds
+ * the acl info: See above
+ */
+static int
+dht_set_dir_xattr_req(xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ int ret = -EINVAL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf) {
+ goto err;
+ }
+
+ if (!xattr_req) {
+ goto err;
+ }
+
+ /* Xattr to get the layout for a directory
+ */
+ ret = dict_set_uint32(xattr_req, conf->xattr_name, 4 * 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ conf->xattr_name, loc->path);
+ goto err;
+ }
+
+ /*Non-fatal failure */
+ ret = dict_set_uint32(xattr_req, conf->mds_xattr_key, 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ conf->mds_xattr_key, loc->path);
+ }
+
+ dht_check_and_set_acl_xattr_req(this, xattr_req);
+ ret = 0;
+err:
+ return ret;
}
-int
-dht_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
-{
- xlator_t *subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int op_errno = -1;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
- loc_t new_loc = {0,};
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- conf = this->private;
- if (!conf)
- goto err;
+/* If the hashed subvol is present, send the lookup to only that subvol first.
+ * If no hashed subvol, send a lookup to all subvols and proceed based on the
+ * responses.
+ */
+static int
+dht_do_fresh_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int call_cnt = 0;
+ int i = 0;
+
+ conf = this->private;
+ if (!conf) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = frame->local;
+ if (!local) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* Since we don't know whether this is a file or a directory,
+ * request all xattrs*/
+ ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ op_errno = -ret;
+ goto err;
+ }
+
+ ret = dht_set_dir_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ op_errno = -ret;
+ goto err;
+ }
+
+ /* Fuse sets a random value in gfid-req. If the gfid is missing
+ * on one or more subvols, posix will set the gfid to this value,
+ * causing GFID mismatches for directories. Remove the value fuse
+ * has sent before sending the lookup.
+ */
+ ret = dict_get_gfuuid(local->xattr_req, "gfid-req", &local->gfid_req);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "%s: No gfid-req available", loc->path);
+ } else {
+ dict_del(local->xattr_req, "gfid-req");
+ }
+ /* This should have been set in dht_lookup */
+ hashed_subvol = local->hashed_subvol;
+
+ if (!hashed_subvol) {
+ gf_msg_debug(this->name, 0,
+ "%s: no subvolume in layout for path, "
+ "checking on all the subvols to see if "
+ "it is a directory",
+ loc->path);
- local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
- ret = dht_filter_loc_subvol_key (this, loc, &new_loc,
- &hashed_subvol);
- if (ret) {
- loc_wipe (&local->loc);
- ret = loc_dup (&new_loc, &local->loc);
-
- /* we no more need 'new_loc' entries */
- loc_wipe (&new_loc);
-
- /* check if loc_dup() is successful */
- if (ret == -1) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "copying location failed for path=%s",
- loc->path);
- goto err;
- }
+ /* Allocate a layout. This will be populated and saved in
+ * the dht inode_ctx on successful lookup
+ */
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
}
- if (xattr_req) {
- local->xattr_req = dict_ref (xattr_req);
- } else {
- local->xattr_req = dict_new ();
- }
+ gf_msg_debug(this->name, 0,
+ "%s: Found null hashed subvol. Calling lookup"
+ " on all nodes.",
+ loc->path);
- if (uuid_is_null (loc->pargfid) && !uuid_is_null (loc->gfid) &&
- !__is_root_gfid (loc->inode->gfid)) {
- local->cached_subvol = NULL;
- dht_discover (frame, this, loc);
- return 0;
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
}
+ return 0;
+ }
- if (!hashed_subvol) {
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to get hashed subvol for %s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
- }
- local->hashed_subvol = hashed_subvol;
+ /* if the hashed_subvol is non-null, send the lookup there first so
+ * as to see whether we have a file or a directory */
+ gf_msg_debug(this->name, 0, "%s: Calling fresh lookup on %s", loc->path,
+ hashed_subvol->name);
- if (is_revalidate (loc)) {
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "revalidate without cache. path=%s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ STACK_WIND_COOKIE(frame, dht_lookup_cbk, hashed_subvol, hashed_subvol,
+ hashed_subvol->fops->lookup, loc, local->xattr_req);
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
+}
- if (layout->gen && (layout->gen < conf->gen)) {
- gf_log (this->name, GF_LOG_TRACE,
- "incomplete layout failure for path=%s",
- loc->path);
+static int
+dht_do_revalidate(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ xlator_t *subvol = NULL;
+ xlator_t *mds_subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+ int gen = 0;
+
+ conf = this->private;
+ if (!conf) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = frame->local;
+ if (!local) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0,
+ "path = %s. No layout found in the inode ctx.", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* Generation number has changed. This layout may be stale. */
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gen = layout->gen;
+ dht_layout_unref(this, local->layout);
+ local->layout = NULL;
+ local->cached_subvol = NULL;
+
+ gf_msg_debug(this->name, 0,
+ "path = %s. In memory layout may be stale."
+ "(layout->gen (%d) is less than "
+ "conf->gen (%d)). Calling fresh lookup.",
+ loc->path, gen, conf->gen);
+
+ dht_do_fresh_lookup(frame, this, loc);
+ return 0;
+ }
+
+ local->inode = inode_ref(loc->inode);
+
+ /* Since we don't know whether this has changed,
+ * request all xattrs*/
+ ret = dht_set_file_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ op_errno = -ret;
+ goto err;
+ }
+
+ ret = dht_set_dir_xattr_req(this, loc, local->xattr_req);
+ if (ret) {
+ op_errno = -ret;
+ goto err;
+ }
+
+ if (IA_ISDIR(local->inode->ia_type)) {
+ ret = dht_inode_ctx_mdsvol_get(local->inode, this, &mds_subvol);
+ if (ret || !mds_subvol) {
+ gf_msg_debug(this->name, 0, "path = %s. No mds subvol in inode ctx",
+ local->loc.path);
+ }
+ local->mds_subvol = mds_subvol;
+ local->call_cnt = conf->subvolume_cnt;
- dht_layout_unref (this, local->layout);
- local->layout = NULL;
- local->cached_subvol = NULL;
- goto do_fresh_lookup;
- }
+ /* local->call_cnt will change as responses are processed. Always use a
+ * local copy to loop through the STACK_WIND calls
+ */
- local->inode = inode_ref (loc->inode);
+ call_cnt = local->call_cnt;
- /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
- * revalidates directly go to the cached-subvolume.
- */
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
-
- if (IA_ISDIR (local->inode->ia_type)) {
- local->call_cnt = call_cnt = conf->subvolume_cnt;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_revalidate_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- loc, local->xattr_req);
- }
- return 0;
- }
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_revalidate_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, loc,
+ local->xattr_req);
+ }
+ return 0;
+ }
- call_cnt = local->call_cnt = layout->cnt;
+ /* If not a dir, this should be 1 */
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
- /* need it for self-healing linkfiles which is
- 'in-migration' state */
- ret = dict_set_uint32 (local->xattr_req,
- GLUSTERFS_OPEN_FD_COUNT, 4);
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
- /* need it for dir self-heal */
- dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
+ gf_msg_debug(this->name, 0,
+ "path = %s. Calling "
+ "revalidate lookup on %s",
+ loc->path, subvol->name);
- for (i = 0; i < call_cnt; i++) {
- subvol = layout->list[i].xlator;
+ STACK_WIND_COOKIE(frame, dht_revalidate_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->loc, local->xattr_req);
+ }
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
+}
- STACK_WIND (frame, dht_revalidate_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
+/* Depending on the input, decide if this is a:
+ * fresh-lookup: loc->name is provided but no dht inode ctx
+ * revalidation: loc->name is provided, dht inode ctx is present
+ * discover: gfid based nameless lookup.
+ */
- }
- } else {
- do_fresh_lookup:
- /* TODO: remove the hard-coding */
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
-
- ret = dict_set_uint32 (local->xattr_req,
- DHT_LINKFILE_KEY, 256);
-
- /* need it for self-healing linkfiles which is
- 'in-migration' state */
- ret = dict_set_uint32 (local->xattr_req,
- GLUSTERFS_OPEN_FD_COUNT, 4);
-
- /* need it for dir self-heal */
- dht_check_and_set_acl_xattr_req (loc->inode, local->xattr_req);
-
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s, "
- "checking on all the subvols to see if "
- "it is a directory", loc->path);
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
-
- local->layout = dht_layout_new (this,
- conf->subvolume_cnt);
- if (!local->layout) {
- op_errno = ENOMEM;
- goto err;
- }
+int
+dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ loc_t new_loc = {
+ 0,
+ };
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ conf = this->private;
+ if (!conf)
+ goto err;
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_LOOKUP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dht_filter_loc_subvol_key(this, loc, &new_loc, &hashed_subvol);
+ if (ret) {
+ loc_wipe(&local->loc);
+ ret = loc_dup(&new_loc, &local->loc);
+
+ /* we no longer need 'new_loc' entries */
+ loc_wipe(&new_loc);
+
+ /* check if loc_dup() is successful */
+ if (ret == -1) {
+ op_errno = errno;
+ gf_msg_debug(this->name, errno,
+ "copying location failed for path=%s", loc->path);
+ goto err;
+ }
+ }
+
+ if (xattr_req) {
+ local->xattr_req = dict_ref(xattr_req);
+ } else {
+ local->xattr_req = dict_new();
+ }
+
+ /* Nameless lookup */
+
+ /* This is usually sent by NFS. Lookups are done based on the gfid and
+ * no name information is available. Without the name, dht cannot calculate
+ * the hash and has to send a lookup to all subvols.
+ */
+ if (gf_uuid_is_null(loc->pargfid) && !gf_uuid_is_null(loc->gfid) &&
+ !__is_root_gfid(loc->inode->gfid)) {
+ local->cached_subvol = NULL;
+ dht_do_discover(frame, this, loc);
+ return 0;
+ }
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
- return 0;
- }
+ if (loc_is_root(loc)) {
+ /* Request the DHT commit hash xattr (trusted.glusterfs.dht.commithash)
+ * set on the brick root.
+ */
+ ret = dict_set_uint32(local->xattr_req, conf->commithash_xattr_name,
+ sizeof(uint32_t));
+ }
- STACK_WIND (frame, dht_lookup_cbk,
- hashed_subvol, hashed_subvol->fops->lookup,
- loc, local->xattr_req);
- }
+ if (!hashed_subvol)
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ local->hashed_subvol = hashed_subvol;
+ if (is_revalidate(loc)) {
+ /* The entry has been looked up before and has a dht inode_ctx
+ */
+ dht_do_revalidate(frame, this, loc);
return 0;
+ } else {
+ /* Entry has not been looked up before
+ */
+ dht_do_fresh_lookup(frame, this, loc);
+ return 0;
+ }
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- return 0;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
-
-int
-dht_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static int
+dht_unlink_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ local = frame->local;
+ prev = cookie;
- local->op_ret = 0;
+ LOCK(&frame->lock);
+ {
+ if ((op_ret == -1) &&
+ !((op_errno == ENOENT) || (op_errno == ENOTCONN))) {
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno,
+ "Unlink link: subvolume %s returned -1", prev->name);
+ goto post_unlock;
+ }
- local->postparent = *postparent;
- local->preparent = *preparent;
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
+
+ return 0;
+}
- WIPE (&local->postparent);
- WIPE (&local->preparent);
+static int
+dht_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *hashed_subvol = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ if (op_errno != ENOENT) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno,
+ "Unlink: subvolume %s returned -1", prev->name);
+ goto post_unlock;
}
-unlock:
- UNLOCK (&frame->lock);
- DHT_STACK_UNWIND (unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, NULL);
+ local->op_ret = 0;
- return 0;
-}
+ local->postparent = *postparent;
+ local->preparent = *preparent;
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->preparent, 0);
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
+ }
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ if (!local->op_ret) {
+ hashed_subvol = dht_subvol_get_hashed(this, &local->loc);
+ if (hashed_subvol && hashed_subvol != local->cached_subvol) {
+ /*
+ * If hashed and cached are different, then we need
+ * to unlink linkfile from hashed subvol if data
+ * file is deleted successfully
+ */
+ STACK_WIND_COOKIE(frame, dht_unlink_linkfile_cbk, hashed_subvol,
+ hashed_subvol, hashed_subvol->fops->unlink,
+ &local->loc, local->flags, xdata);
+ return 0;
+ }
+ }
+
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, xdata);
+
+ return 0;
+}
-int
-dht_unlink_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static int
+dht_common_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ DHT_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
- xlator_t *cached_subvol = NULL;
+static int
+dht_fix_layout_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ if (op_ret == 0) {
+ /* update the layout in the inode ctx */
local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ layout = local->selfheal.layout;
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
+ dht_layout_set(this, local->loc.inode, layout);
+ }
- if (op_ret == -1)
- goto err;
+ DHT_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
- cached_subvol = dht_subvol_get_cached (this, local->loc.inode);
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s",
- local->loc.path);
- local->op_errno = EINVAL;
- goto err;
+static int
+dht_err_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto post_unlock;
}
- STACK_WIND (frame, dht_unlink_cbk,
- cached_subvol, cached_subvol->fops->unlink,
- &local->loc, local->flags, NULL);
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ if ((local->fop == GF_FOP_SETXATTR) ||
+ (local->fop == GF_FOP_FSETXATTR)) {
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ /* 'local' itself may not be valid after this */
+ goto out;
+ }
+ if ((local->fop == GF_FOP_REMOVEXATTR) ||
+ (local->fop == GF_FOP_FREMOVEXATTR)) {
+ DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ }
+ }
- return 0;
+out:
+ return 0;
+}
-err:
- DHT_STACK_UNWIND (unlink, frame, -1, local->op_errno,
- NULL, NULL, NULL);
- return 0;
+/* Set the value[] of key into dict after convert from
+ host byte order to network byte order
+*/
+int32_t
+dht_dict_set_array(dict_t *dict, char *key, int32_t value[], int32_t size)
+{
+ int ret = -1;
+ int32_t *ptr = NULL;
+ int32_t vindex;
+
+ if (value == NULL) {
+ return -EINVAL;
+ }
+
+ ptr = GF_MALLOC(sizeof(int32_t) * size, gf_common_mt_char);
+ if (ptr == NULL) {
+ return -ENOMEM;
+ }
+ for (vindex = 0; vindex < size; vindex++) {
+ ptr[vindex] = hton32(value[vindex]);
+ }
+ ret = dict_set_bin(dict, key, ptr, sizeof(int32_t) * size);
+ if (ret)
+ GF_FREE(ptr);
+ return ret;
}
static int
-dht_ufo_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+dht_common_mds_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *prev = cookie;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
- }
-unlock:
- UNLOCK (&frame->lock);
+ local = frame->local;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (setxattr, frame, local->op_ret,
- local->op_errno, NULL);
- }
+ if (op_ret)
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->this->name);
- return 0;
-}
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, 0, op_errno, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto out;
+ }
+ if (local->fop == GF_FOP_FSETXATTR) {
+ DHT_STACK_UNWIND(fsetxattr, frame, 0, op_errno, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto out;
+ }
-int
-dht_err_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, 0, op_errno, NULL);
+ /* 'local' itself may not be valid after this */
+ goto out;
+ }
+
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ DHT_STACK_UNWIND(fremovexattr, frame, 0, op_errno, NULL);
+ }
+
+out:
+ return 0;
+}
+
+/* Code to wind a xattrop call to add 1 on current mds internal xattr
+ value
+*/
+static int
+dht_setxattr_non_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int ret = 0;
+ dict_t *xattrop = NULL;
+ int32_t addone[1] = {1};
+ call_frame_t *prev = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret && !local->op_ret) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->this->name);
+ goto post_unlock;
+ }
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+
+ if (is_last_call(this_call_cnt)) {
+ if (!local->op_ret) {
+ xattrop = dict_new();
+ if (!xattrop) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
+ "dictionary creation failed");
+ ret = -1;
+ goto out;
+ }
+ ret = dht_dict_set_array(xattrop, conf->mds_xattr_key, addone, 1);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "dictionary set array failed ");
+ ret = -1;
+ goto out;
+ }
+ if ((local->fop == GF_FOP_SETXATTR) ||
+ (local->fop == GF_FOP_REMOVEXATTR)) {
+ STACK_WIND(frame, dht_common_mds_xattrop_cbk, local->mds_subvol,
+ local->mds_subvol->fops->xattrop, &local->loc,
+ GF_XATTROP_ADD_ARRAY, xattrop, NULL);
+ } else {
+ STACK_WIND(frame, dht_common_mds_xattrop_cbk, local->mds_subvol,
+ local->mds_subvol->fops->fxattrop, local->fd,
+ GF_XATTROP_ADD_ARRAY, xattrop, NULL);
+ }
+ } else {
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FSETXATTR) {
+ DHT_STACK_UNWIND(fsetxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, 0, 0, NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ DHT_STACK_UNWIND(fremovexattr, frame, 0, 0, NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+ }
+ }
+out:
+ if (ret) {
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ if (local->fop == GF_FOP_FSETXATTR) {
+ DHT_STACK_UNWIND(fsetxattr, frame, 0, 0, local->xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
- local->op_ret = 0;
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, 0, 0, NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
}
-unlock:
- UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (setxattr, frame, local->op_ret,
- local->op_errno, NULL);
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ DHT_STACK_UNWIND(fremovexattr, frame, 0, 0, NULL);
}
+ }
+just_return:
+ if (xattrop)
+ dict_unref(xattrop);
+ return 0;
+}
- return 0;
+static int
+dht_setxattr_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ call_frame_t *prev = NULL;
+ xlator_t *mds_subvol = NULL;
+ int i = 0;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+ mds_subvol = local->mds_subvol;
+
+ if (op_ret == -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->this->name);
+ goto out;
+ }
+
+ local->op_ret = 0;
+ local->call_cnt = conf->subvolume_cnt - 1;
+ local->xdata = dict_ref(xdata);
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (mds_subvol && (mds_subvol == conf->subvolumes[i]))
+ continue;
+ if (local->fop == GF_FOP_SETXATTR) {
+ STACK_WIND(frame, dht_setxattr_non_mds_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->setxattr, &local->loc,
+ local->xattr, local->flags, local->xattr_req);
+ }
+
+ if (local->fop == GF_FOP_FSETXATTR) {
+ STACK_WIND(frame, dht_setxattr_non_mds_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->fsetxattr, local->fd,
+ local->xattr, local->flags, local->xattr_req);
+ }
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ STACK_WIND(frame, dht_setxattr_non_mds_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->removexattr, &local->loc,
+ local->key, local->xattr_req);
+ }
+
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ STACK_WIND(frame, dht_setxattr_non_mds_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->fremovexattr, local->fd,
+ local->key, local->xattr_req);
+ }
+ }
+
+ return 0;
+out:
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FSETXATTR) {
+ DHT_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ DHT_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ }
+
+just_return:
+ return 0;
+}
+
+static int
+dht_xattrop_mds_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ call_frame_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = op_ret;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->this->name);
+ goto out;
+ }
+
+ if (local->fop == GF_FOP_SETXATTR) {
+ STACK_WIND(frame, dht_setxattr_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->setxattr, &local->loc, local->xattr,
+ local->flags, local->xattr_req);
+ }
+
+ if (local->fop == GF_FOP_FSETXATTR) {
+ STACK_WIND(frame, dht_setxattr_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->fsetxattr, local->fd, local->xattr,
+ local->flags, local->xattr_req);
+ }
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ STACK_WIND(frame, dht_setxattr_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->removexattr, &local->loc,
+ local->key, local->xattr_req);
+ }
+
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ STACK_WIND(frame, dht_setxattr_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->fremovexattr, local->fd, local->key,
+ local->xattr_req);
+ }
+
+ return 0;
+out:
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FSETXATTR) {
+ DHT_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ /* 'local' itself may not be valid after this */
+ goto just_return;
+ }
+
+ if (local->fop == GF_FOP_FREMOVEXATTR) {
+ DHT_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ }
+
+just_return:
+ return 0;
}
static void
-fill_layout_info (dht_layout_t *layout, char *buf)
+fill_layout_info(dht_layout_t *layout, char *buf)
{
- int i = 0;
- char tmp_buf[128] = {0,};
+ int i = 0;
+ char tmp_buf[128] = {
+ 0,
+ };
+
+ for (i = 0; i < layout->cnt; i++) {
+ snprintf(tmp_buf, sizeof(tmp_buf), "(%s %u %u)",
+ layout->list[i].xlator->name, layout->list[i].start,
+ layout->list[i].stop);
+ if (i)
+ strcat(buf, " ");
+ strcat(buf, tmp_buf);
+ }
+}
- for (i = 0; i < layout->cnt; i++) {
- snprintf (tmp_buf, 128, "(%s %u %u)",
- layout->list[i].xlator->name,
- layout->list[i].start,
- layout->list[i].stop);
- if (i)
- strcat (buf, " ");
- strcat (buf, tmp_buf);
- }
+static void
+dht_fill_pathinfo_xattr(xlator_t *this, dht_local_t *local, char *xattr_buf,
+ int32_t alloc_len, int flag, char *layout_buf)
+{
+ if (flag) {
+ if (local->xattr_val) {
+ snprintf(xattr_buf, alloc_len,
+ "((<" DHT_PATHINFO_HEADER "%s> %s) (%s-layout %s))",
+ this->name, local->xattr_val, this->name, layout_buf);
+ } else {
+ snprintf(xattr_buf, alloc_len, "(%s-layout %s)", this->name,
+ layout_buf);
+ }
+ } else if (local->xattr_val) {
+ snprintf(xattr_buf, alloc_len, "(<" DHT_PATHINFO_HEADER "%s> %s)",
+ this->name, local->xattr_val);
+ } else {
+ xattr_buf[0] = '\0';
+ }
}
-void
-dht_fill_pathinfo_xattr (xlator_t *this, dht_local_t *local,
- char *xattr_buf, int32_t alloc_len,
- int flag, char *layout_buf)
-{
- if (flag && local->xattr_val)
- snprintf (xattr_buf, alloc_len,
- "((<"DHT_PATHINFO_HEADER"%s> %s) (%s-layout %s))",
- this->name, local->xattr_val, this->name,
- layout_buf);
- else if (local->xattr_val)
- snprintf (xattr_buf, alloc_len,
- "(<"DHT_PATHINFO_HEADER"%s> %s)",
- this->name, local->xattr_val);
- else if (flag)
- snprintf (xattr_buf, alloc_len, "(%s-layout %s)",
- this->name, layout_buf);
+static int
+dht_vgetxattr_alloc_and_fill(dht_local_t *local, dict_t *xattr, xlator_t *this,
+ int op_errno)
+{
+ int ret = -1;
+ char *value = NULL;
+
+ ret = dict_get_str(xattr, local->xsel, &value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED,
+ "Subvolume %s returned -1", this->name);
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto out;
+ }
+
+ local->alloc_len += strlen(value);
+
+ if (!local->xattr_val) {
+ local->alloc_len += (SLEN(DHT_PATHINFO_HEADER) + 10);
+ local->xattr_val = GF_MALLOC(local->alloc_len, gf_common_mt_char);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
+ }
+ local->xattr_val[0] = '\0';
+ }
+
+ int plen = strlen(local->xattr_val);
+ if (plen) {
+ /* extra byte(s) for \0 to be safe */
+ local->alloc_len += (plen + 2);
+ local->xattr_val = GF_REALLOC(local->xattr_val, local->alloc_len);
+ if (!local->xattr_val) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ (void)strcat(local->xattr_val, value);
+ (void)strcat(local->xattr_val, " ");
+ local->op_ret = 0;
+
+ ret = 0;
+
+out:
+ return ret;
}
-int
-dht_vgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int ret = 0;
- int flag = 0;
- int this_call_cnt = 0;
- char *value_got = NULL;
- char layout_buf[8192] = {0,};
- char *xattr_buf = NULL;
- dict_t *dict = NULL;
- int32_t alloc_len = 0;
- int32_t plen = 0;
- call_frame_t *prev = NULL;
+static int
+dht_vgetxattr_fill_and_set(dht_local_t *local, dict_t **dict, xlator_t *this,
+ gf_boolean_t flag)
+{
+ int ret = -1;
+ char *xattr_buf = NULL;
+ char layout_buf[8192] = {
+ 0,
+ };
+
+ if (flag)
+ fill_layout_info(local->layout, layout_buf);
+
+ *dict = dict_new();
+ if (!*dict)
+ goto out;
+
+ local->xattr_val[strlen(local->xattr_val) - 1] = '\0';
+
+ /* we would need max this many bytes to create xattr string
+ * extra 40 bytes is just an estimated amount of additional
+ * space required as we include translator name and some
+ * spaces, brackets etc. when forming the pathinfo string.
+ *
+ * For node-uuid we just don't have all the pretty formatting,
+ * but since this is a generic routine for pathinfo & node-uuid
+ * we don't have conditional space allocation and try to be
+ * generic
+ */
+ local->alloc_len += (2 * strlen(this->name)) + strlen(layout_buf) + 40;
+ xattr_buf = GF_MALLOC(local->alloc_len, gf_common_mt_char);
+ if (!xattr_buf)
+ goto out;
+
+ if (XATTR_IS_PATHINFO(local->xsel)) {
+ (void)dht_fill_pathinfo_xattr(this, local, xattr_buf, local->alloc_len,
+ flag, layout_buf);
+ } else if ((XATTR_IS_NODE_UUID(local->xsel)) ||
+ (XATTR_IS_NODE_UUID_LIST(local->xsel))) {
+ (void)snprintf(xattr_buf, local->alloc_len, "%s", local->xattr_val);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GET_XATTR_FAILED,
+ "Unknown local->xsel (%s)", local->xsel);
+ GF_FREE(xattr_buf);
+ goto out;
+ }
+
+ ret = dict_set_dynstr(*dict, local->xsel, xattr_buf);
+ if (ret)
+ GF_FREE(xattr_buf);
+ GF_FREE(local->xattr_val);
- local = frame->local;
- prev = cookie;
-
- if (op_ret >= 0) {
- ret = dict_get_str (xattr, local->xsel, &value_got);
- if (!ret) {
- alloc_len = strlen (value_got);
-
- /**
- * allocate the buffer:- we allocate 10 bytes extra in
- * case we need to append ' Link: ' in the buffer for
- * another STACK_WIND
- */
- if (!local->xattr_val) {
- alloc_len += (strlen (DHT_PATHINFO_HEADER) + 10);
- local->xattr_val =
- GF_CALLOC (alloc_len,
- sizeof (char),
- gf_common_mt_char);
- }
+out:
+ return ret;
+}
- if (local->xattr_val) {
- plen = strlen (local->xattr_val);
- if (plen) {
- /* extra byte(s) for \0 to be safe */
- alloc_len += (plen + 2);
- local->xattr_val =
- GF_REALLOC (local->xattr_val,
- alloc_len);
- if (!local->xattr_val)
- goto out;
- }
-
- strcat (local->xattr_val, value_got);
- }
- local->op_ret = 0;
- }
- } else {
+static int
+dht_find_local_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *prev = NULL;
+ int this_call_cnt = 0;
+ int ret = 0;
+ char *uuid_str = NULL;
+ char *uuid_list = NULL;
+ char *next_uuid_str = NULL;
+ char *saveptr = NULL;
+ uuid_t node_uuid = {
+ 0,
+ };
+ char *uuid_list_copy = NULL;
+ int count = 0;
+ int i = 0;
+ int index = 0;
+ int found = 0;
+ nodeuuid_info_t *tmp_ptr = NULL;
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(frame->local, out);
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ VALIDATE_OR_GOTO(conf->defrag, out);
+
+ gf_msg_debug(this->name, 0, "subvol %s returned", prev->name);
+
+ LOCK(&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ if (op_errno == ENODATA)
+ gf_msg_debug(this->name, 0, "failed to get node-uuid");
+ else
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_GET_XATTR_FAILED, "failed to get node-uuid");
+ goto post_unlock;
+ }
+
+ ret = dict_get_str(xattr, local->xsel, &uuid_list);
+
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_GET_FAILED,
+ "Failed to get %s", local->xsel);
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unlock;
+ }
+
+ /* As DHT will not know details of its child xlators
+ * we need to parse this twice to get the count first
+ * and allocate memory later.
+ */
+ count = 0;
+ index = conf->local_subvols_cnt;
+
+ uuid_list_copy = gf_strdup(uuid_list);
+ if (!uuid_list_copy)
+ goto unlock;
+
+ for (uuid_str = strtok_r(uuid_list, " ", &saveptr); uuid_str;
+ uuid_str = next_uuid_str) {
+ next_uuid_str = strtok_r(NULL, " ", &saveptr);
+ if (gf_uuid_parse(uuid_str, node_uuid)) {
local->op_ret = -1;
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR, "Subvolume %s returned -1 "
- "(%s)", prev->this->name, strerror (op_errno));
+ local->op_errno = EINVAL;
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UUID_PARSE_ERROR,
+ "Failed to parse uuid for %s", prev->name);
+ goto post_unlock;
+ }
+
+ count++;
+ if (gf_uuid_compare(node_uuid, conf->defrag->node_uuid)) {
+ gf_msg_debug(this->name, 0,
+ "subvol %s does not"
+ "belong to this node",
+ prev->name);
+ } else {
+ /* handle multiple bricks of the same replica
+ * on the same node */
+ if (found)
+ continue;
+ conf->local_subvols[(conf->local_subvols_cnt)++] = prev;
+ found = 1;
+ gf_msg_debug(this->name, 0,
+ "subvol %s belongs to"
+ " this node",
+ prev->name);
+ }
+ }
+
+ if (!found) {
+ local->op_ret = 0;
+ goto unlock;
+ }
+
+ conf->local_nodeuuids[index].count = count;
+ conf->local_nodeuuids[index].elements = GF_CALLOC(
+ count, sizeof(nodeuuid_info_t), 1);
+
+ /* The node-uuids are guaranteed to be returned in the same
+ * order as the bricks
+ * A null node-uuid is returned for a brick that is down.
+ */
+
+ saveptr = NULL;
+ i = 0;
+
+ for (uuid_str = strtok_r(uuid_list_copy, " ", &saveptr); uuid_str;
+ uuid_str = next_uuid_str) {
+ next_uuid_str = strtok_r(NULL, " ", &saveptr);
+ tmp_ptr = &(conf->local_nodeuuids[index].elements[i]);
+ gf_uuid_parse(uuid_str, tmp_ptr->uuid);
+
+ if (!gf_uuid_compare(tmp_ptr->uuid, conf->defrag->node_uuid)) {
+ tmp_ptr->info = REBAL_NODEUUID_MINE;
+ }
+ i++;
+ tmp_ptr = NULL;
}
+ }
- out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
+ local->op_ret = 0;
+unlock:
+ UNLOCK(&frame->lock);
+post_unlock:
+ if (!is_last_call(this_call_cnt))
+ goto out;
- if (local->op_ret == -1) {
- goto unwind;
- }
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
- if (local->layout->cnt > 1) {
- /* Set it for directory */
- fill_layout_info (local->layout, layout_buf);
- flag = 1;
- }
+ DHT_STACK_UNWIND(getxattr, frame, 0, 0, xattr, xdata);
+ goto out;
+
+unwind:
- dict = dict_new ();
+ GF_FREE(conf->local_nodeuuids[index].elements);
+ conf->local_nodeuuids[index].elements = NULL;
- /* we would need max this many bytes to create xattr string */
- alloc_len += (2 * strlen (this->name))
- + strlen (layout_buf)
- + 40;
- xattr_buf = GF_CALLOC (alloc_len,
- sizeof (char), gf_common_mt_char);
+ DHT_STACK_UNWIND(getxattr, frame, -1, local->op_errno, NULL, xdata);
+out:
+ GF_FREE(uuid_list_copy);
+ return 0;
+}
- if (XATTR_IS_PATHINFO (local->xsel)) {
- (void) dht_fill_pathinfo_xattr (this, local, xattr_buf,
- alloc_len, flag,
- layout_buf);
- } else if (XATTR_IS_NODE_UUID (local->xsel)) {
- (void) snprintf (xattr_buf, alloc_len, "%s",
- local->xattr_val);
- } else
- gf_log (this->name, GF_LOG_WARNING,
- "Unknown local->xsel (%s)", local->xsel);
+static int
+dht_vgetxattr_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ int ret = 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ dict_t *dict = NULL;
- ret = dict_set_dynstr (dict, local->xsel, xattr_buf);
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(frame->local, out);
- if (local->xattr_val)
- GF_FREE (local->xattr_val);
+ local = frame->local;
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict,
- xdata);
+ LOCK(&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ if (op_ret < 0) {
+ if (op_errno != ENOTCONN) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_GET_XATTR_FAILED, "getxattr err for dir");
+ goto post_unlock;
+ }
- if (dict)
- dict_unref (dict);
+ goto unlock;
+ }
- return 0;
+ ret = dht_vgetxattr_alloc_and_fill(local, xattr, this, op_errno);
+ if (ret) {
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_DICT_SET_FAILED,
+ "alloc or fill failure");
+ goto post_unlock;
}
+ }
+unlock:
+ UNLOCK(&frame->lock);
+post_unlock:
+ if (!is_last_call(this_call_cnt))
+ goto out;
+
+ /* -- last call: do patch ups -- */
+
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
+
+ ret = dht_vgetxattr_fill_and_set(local, &dict, this, _gf_true);
+ if (ret)
+ goto unwind;
+
+ DHT_STACK_UNWIND(getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
unwind:
- DHT_STACK_UNWIND (getxattr, frame, -1, local->op_errno, NULL, NULL);
- return 0;
+ DHT_STACK_UNWIND(getxattr, frame, -1, local->op_errno, NULL, NULL);
+cleanup:
+ if (dict)
+ dict_unref(dict);
+out:
+ return 0;
}
-int
-dht_linkinfo_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr,
- dict_t *xdata)
+static int
+dht_vgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xattr, dict_t *xdata)
{
- int ret = 0;
- char *value = NULL;
+ dht_local_t *local = NULL;
+ int ret = 0;
+ dict_t *dict = NULL;
+ xlator_t *prev = NULL;
+ gf_boolean_t flag = _gf_true;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_GET_XATTR_FAILED,
+ "vgetxattr: Subvolume %s returned -1", prev->name);
+ goto unwind;
+ }
+
+ ret = dht_vgetxattr_alloc_and_fill(local, xattr, this, op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_NO_MEMORY,
+ "Allocation or fill failure");
+ goto unwind;
+ }
+
+ flag = (local->layout->cnt > 1) ? _gf_true : _gf_false;
+
+ ret = dht_vgetxattr_fill_and_set(local, &dict, this, flag);
+ if (ret)
+ goto unwind;
+
+ DHT_STACK_UNWIND(getxattr, frame, 0, 0, dict, xdata);
+ goto cleanup;
- if (op_ret != -1) {
- ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value);
- if (!ret) {
- ret = dict_set_str (xattr, GF_XATTR_LINKINFO_KEY, value);
- if (!ret)
- gf_log (this->name, GF_LOG_TRACE,
- "failed to set linkinfo");
- }
+unwind:
+ DHT_STACK_UNWIND(getxattr, frame, -1, local->op_errno, NULL, NULL);
+cleanup:
+ if (dict)
+ dict_unref(dict);
+
+ return 0;
+}
+
+static int
+dht_linkinfo_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ int ret = 0;
+ char *value = NULL;
+
+ if (op_ret != -1) {
+ ret = dict_get_str(xattr, GF_XATTR_PATHINFO_KEY, &value);
+ if (!ret) {
+ ret = dict_set_str(xattr, GF_XATTR_LINKINFO_KEY, value);
+ if (!ret)
+ gf_msg_trace(this->name, 0, "failed to set linkinfo");
}
+ }
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
+ DHT_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata);
- return 0;
+ return 0;
+}
+
+static int
+dht_mds_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(frame->local, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+ local = frame->local;
+
+ if (!xattr || (op_ret == -1)) {
+ local->op_ret = op_ret;
+ goto out;
+ }
+ dict_del(xattr, conf->xattr_name);
+ local->op_ret = 0;
+
+ if (!local->xattr) {
+ local->xattr = dict_copy_with_ref(xattr, NULL);
+ }
+
+out:
+ DHT_STACK_UNWIND(getxattr, frame, local->op_ret, op_errno, local->xattr,
+ xdata);
+ return 0;
+err:
+ DHT_STACK_UNWIND(getxattr, frame, -1, EINVAL, NULL, NULL);
+ return 0;
}
int
-dht_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xattr, dict_t *xdata)
{
- int this_call_cnt = 0;
- dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (frame->local, out);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(frame->local, err);
+ VALIDATE_OR_GOTO(this->private, err);
- local = frame->local;
+ conf = this->private;
+ local = frame->local;
- this_call_cnt = dht_frame_return (frame);
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto err;
+ return 0;
+ }
- if (!xattr || (op_ret == -1))
- goto out;
+ LOCK(&frame->lock);
+ {
+ if (!xattr || (op_ret == -1)) {
+ local->op_ret = op_ret;
+ goto unlock;
+ }
+
+ dict_del(xattr, conf->xattr_name);
+ dict_del(xattr, conf->mds_xattr_key);
- if (dict_get (xattr, "trusted.glusterfs.dht")) {
- dict_del (xattr, "trusted.glusterfs.dht");
+ dict_del(xattr, conf->commithash_xattr_name);
+
+ if (frame->root->pid >= 0) {
+ GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
+ GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr);
}
+
local->op_ret = 0;
if (!local->xattr) {
- local->xattr = dict_copy_with_ref (xattr, NULL);
+ local->xattr = dict_copy_with_ref(xattr, NULL);
} else {
- /* first aggregate everything into xattr and then copy into
- * local->xattr. This is required as we want to have
- * 'local->xattr' as the proper final dictionary passed above
- * distribute xlator.
- */
- dht_aggregate_xattr (xattr, local->xattr);
- local->xattr = dict_copy (xattr, local->xattr);
+ dht_aggregate_xattr(local->xattr, xattr);
}
-out:
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
- local->xattr, NULL);
+
+ if (!local->xdata) {
+ local->xdata = dict_ref(xdata);
+ } else if ((local->inode && IA_ISDIR(local->inode->ia_type)) ||
+ (local->fd && IA_ISDIR(local->fd->inode->ia_type))) {
+ dht_aggregate_xattr(local->xdata, xdata);
}
- return 0;
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ /* If we have a valid xattr received from any one of the
+ * subvolume, let's return it */
+ if (local->xattr) {
+ local->op_ret = 0;
+ }
+
+ DHT_STACK_UNWIND(getxattr, frame, local->op_ret, op_errno, local->xattr,
+ local->xdata);
+ }
+ return 0;
+err:
+ DHT_STACK_UNWIND(getxattr, frame, -1, EINVAL, NULL, NULL);
+ return 0;
}
-int32_t
-dht_getxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+static int32_t
+dht_getxattr_unwind(call_frame_t *frame, int op_ret, int op_errno, dict_t *dict,
+ dict_t *xdata)
{
- DHT_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ DHT_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
+static int
+dht_getxattr_get_real_filename_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xattr, dict_t *xdata)
+{
+ int this_call_cnt = 0;
+ dht_local_t *local = NULL;
-int
-dht_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key, dict_t *xdata)
-{
- xlator_t *subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- xlator_t **sub_volumes = NULL;
- int op_errno = -1;
- int i = 0;
- int cnt = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (this->private, err);
-
- conf = this->private;
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_GETXATTR);
- if (!local) {
- op_errno = ENOMEM;
+ local = frame->local;
- goto err;
+ LOCK(&frame->lock);
+ {
+ if (local->op_errno == EOPNOTSUPP) {
+ /* Nothing to do here, we have already found
+ * a subvol which does not have the get_real_filename
+ * optimization. If condition is for simple logic.
+ */
+ goto unlock;
}
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "layout is NULL");
- op_errno = ENOENT;
- goto err;
- }
+ if (op_ret == -1) {
+ if (op_errno == EOPNOTSUPP) {
+ /* This subvol does not have the optimization.
+ * Better let the user know we don't support it.
+ * Remove previous results if any.
+ */
- if (key) {
- local->key = gf_strdup (key);
- if (!local->key) {
- op_errno = ENOMEM;
- goto err;
+ if (local->xattr) {
+ dict_unref(local->xattr);
+ local->xattr = NULL;
}
- }
- if (key && ((strcmp (key, GF_XATTR_PATHINFO_KEY) == 0)
- || strcmp (key, GF_XATTR_NODE_UUID_KEY) == 0)) {
- cached_subvol = local->cached_subvol;
- (void) strncpy (local->xsel, key, 256);
+ if (local->xattr_req) {
+ dict_unref(local->xattr_req);
+ local->xattr_req = NULL;
+ }
- local->call_cnt = 1;
- STACK_WIND (frame, dht_vgetxattr_cbk, cached_subvol,
- cached_subvol->fops->getxattr, loc, key, NULL);
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UPGRADE_BRICKS,
+ "At least "
+ "one of the bricks does not support "
+ "this operation. Please upgrade all "
+ "bricks.");
+ goto post_unlock;
+ }
+
+ if (op_errno == ENOATTR) {
+ /* Do nothing, our defaults are set to this.
+ */
+ goto unlock;
+ }
+
+ /* This is a place holder for every other error
+ * case. I am not sure of how to interpret
+ * ENOTCONN etc. As of now, choosing to ignore
+ * down subvol and return a good result(if any)
+ * from other subvol.
+ */
+ UNLOCK(&frame->lock);
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_GET_XATTR_FAILED, "Failed to get real filename.");
+ goto post_unlock;
+ }
+
+ /* This subvol has the required file.
+ * There could be other subvols which have returned
+ * success already, choosing to return the latest good
+ * result.
+ */
+ if (local->xattr)
+ dict_unref(local->xattr);
+ local->xattr = dict_ref(xattr);
+
+ if (local->xattr_req) {
+ dict_unref(local->xattr_req);
+ local->xattr_req = NULL;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ local->op_ret = op_ret;
+ local->op_errno = 0;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, 0, "Found a matching file.");
+ goto post_unlock;
+ }
+unlock:
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ DHT_STACK_UNWIND(getxattr, frame, local->op_ret, local->op_errno,
+ local->xattr, local->xattr_req);
+ }
+
+ return 0;
+}
- return 0;
- }
- if (key && (strcmp (key, GF_XATTR_LINKINFO_KEY) == 0)) {
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to get"
- "hashed subvol for %s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+static int
+dht_getxattr_get_real_filename(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
+ int cnt = 0;
+ xlator_t *subvol = NULL;
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to get"
- "cached subvol for %s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local = frame->local;
+ layout = local->layout;
- if (hashed_subvol == cached_subvol) {
- op_errno = ENODATA;
- goto err;
- }
- if (hashed_subvol) {
- STACK_WIND (frame, dht_linkinfo_getxattr_cbk, hashed_subvol,
- hashed_subvol->fops->getxattr, loc,
- GF_XATTR_PATHINFO_KEY, NULL);
- return 0;
- }
- op_errno = ENODATA;
- goto err;
- }
+ cnt = local->call_cnt = layout->cnt;
- if (key && (!strcmp (GF_XATTR_MARKER_KEY, key))
- && (-1 == frame->root->pid)) {
+ local->op_ret = -1;
+ local->op_errno = ENOATTR;
- if (loc->inode-> ia_type == IA_IFDIR) {
- cnt = layout->cnt;
- } else {
- cnt = 1;
- }
- sub_volumes = alloca ( cnt * sizeof (xlator_t *));
- for (i = 0; i < cnt; i++)
- *(sub_volumes + i) = layout->list[i].xlator;
-
- if (cluster_getmarkerattr (frame, this, loc, key,
- local, dht_getxattr_unwind,
- sub_volumes, cnt,
- MARKER_UUID_TYPE, conf->vol_uuid)) {
- op_errno = EINVAL;
- goto err;
- }
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND(frame, dht_getxattr_get_real_filename_cbk, subvol,
+ subvol->fops->getxattr, loc, key, xdata);
+ }
- return 0;
+ return 0;
+}
+
+static int
+dht_marker_populate_args(call_frame_t *frame, int type, int *gauge,
+ xlator_t **subvols)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_layout_t *layout = NULL;
+
+ local = frame->local;
+ layout = local->layout;
+
+ for (i = 0; i < layout->cnt; i++)
+ subvols[i] = layout->list[i].xlator;
+
+ return layout->cnt;
+}
+
+static int
+dht_is_debug_xattr_key(const char **array, char *key)
+{
+ int i = 0;
+
+ for (i = 0; array[i]; i++) {
+ if (fnmatch(array[i], key, FNM_NOESCAPE) == 0)
+ return i;
+ }
+
+ return -1;
+}
+
+/* Note we already have frame->local initialised here*/
+
+static int
+dht_handle_debug_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENODATA;
+ char *value = NULL;
+ loc_t file_loc = {0};
+ const char *name = NULL;
+
+ local = frame->local;
+
+ if (dht_is_debug_xattr_key(dht_dbg_vxattrs, (char *)key) == -1) {
+ goto out;
+ }
+
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (strncmp(key, DHT_DBG_HASHED_SUBVOL_KEY,
+ SLEN(DHT_DBG_HASHED_SUBVOL_KEY)) == 0) {
+ name = key + strlen(DHT_DBG_HASHED_SUBVOL_KEY);
+ if (strlen(name) == 0) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = dht_build_child_loc(this, &file_loc, loc, (char *)name);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto out;
}
- if (key && *conf->vol_uuid) {
- if ((match_uuid_local (key, conf->vol_uuid) == 0) &&
- (-1 == frame->root->pid)) {
- if (loc->inode-> ia_type == IA_IFDIR) {
- cnt = layout->cnt;
- } else {
- cnt = 1;
- }
- sub_volumes = alloca ( cnt * sizeof (xlator_t *));
- for (i = 0; i < cnt; i++)
- sub_volumes[i] = layout->list[i].xlator;
-
- if (cluster_getmarkerattr (frame, this, loc, key,
- local, dht_getxattr_unwind,
- sub_volumes, cnt,
- MARKER_XTIME_TYPE,
- conf->vol_uuid)) {
- op_errno = EINVAL;
- goto err;
- }
+ local->hashed_subvol = dht_subvol_get_hashed(this, &file_loc);
+ if (local->hashed_subvol == NULL) {
+ op_errno = ENODATA;
+ goto out;
+ }
- return 0;
- }
+ value = gf_strdup(local->hashed_subvol->name);
+ if (!value) {
+ op_errno = ENOMEM;
+ goto out;
}
- if (loc->inode-> ia_type == IA_IFDIR) {
- cnt = local->call_cnt = layout->cnt;
- } else {
- cnt = local->call_cnt = 1;
+ ret = dict_set_dynstr(local->xattr, (char *)key, value);
+ if (ret < 0) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
}
+ ret = 0;
+ goto out;
+ }
+
+out:
+ loc_wipe(&file_loc);
+ DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL);
+ return 0;
+}
+
+/* Virtual Xattr which returns 1 if all subvols are up,
+ else returns 0. Geo-rep then uses this virtual xattr
+ after a fresh mount and starts the I/O.
+*/
+
+enum dht_vxattr_subvol {
+ DHT_VXATTR_SUBVOLS_UP = 1,
+ DHT_VXATTR_SUBVOLS_DOWN = 0,
+};
+
+int
+dht_vgetxattr_subvol_status(call_frame_t *frame, xlator_t *this,
+ const char *key)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ int op_errno = ENODATA;
+ int value = DHT_VXATTR_SUBVOLS_UP;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ local = frame->local;
+
+ if (!key) {
+ op_errno = EINVAL;
+ goto out;
+ }
+ local->xattr = dict_new();
+ if (!local->xattr) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ value = DHT_VXATTR_SUBVOLS_DOWN;
+ gf_msg_debug(this->name, 0, "subvol %s is down ",
+ conf->subvolumes[i]->name);
+ break;
+ }
+ }
+ ret = dict_set_int8(local->xattr, (char *)key, value);
+ if (ret < 0) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ DHT_STACK_UNWIND(getxattr, frame, ret, op_errno, local->xattr, NULL);
+ return 0;
+}
+int
+dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *key,
+ dict_t *xdata)
+#define DHT_IS_DIR(layout) (layout->cnt > 1)
+{
+ xlator_t *subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *mds_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int cnt = 0;
+ char *node_uuid_key = NULL;
+ int ret = -1;
+
+ GF_CHECK_XATTR_KEY_AND_GOTO(key, IO_THREADS_QUEUE_SIZE_KEY, op_errno, err);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_GETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_NULL,
+ "Layout is NULL");
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ /* skip over code which is irrelevant without a valid key */
+ if (!key)
+ goto no_key;
+
+ local->key = gf_strdup(key);
+ if (!local->key) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (strncmp(key, conf->mds_xattr_key, strlen(key)) == 0) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ if (strncmp(key, DHT_SUBVOL_STATUS_KEY, SLEN(DHT_SUBVOL_STATUS_KEY)) == 0) {
+ dht_vgetxattr_subvol_status(frame, this, key);
+ return 0;
+ }
+
+ /* skip over code which is irrelevant if !DHT_IS_DIR(layout) */
+ if (!DHT_IS_DIR(layout))
+ goto no_dht_is_dir;
+
+ if ((strncmp(key, GF_XATTR_GET_REAL_FILENAME_KEY,
+ SLEN(GF_XATTR_GET_REAL_FILENAME_KEY)) == 0) &&
+ DHT_IS_DIR(layout)) {
+ dht_getxattr_get_real_filename(frame, this, loc, key, xdata);
+ return 0;
+ }
+
+ if (!strcmp(key, GF_REBAL_FIND_LOCAL_SUBVOL)) {
+ ret = gf_asprintf(&node_uuid_key, "%s", GF_XATTR_LIST_NODE_UUIDS_KEY);
+ if (ret == -1 || !node_uuid_key) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_NO_MEMORY,
+ "Failed to copy node uuid key");
+ op_errno = ENOMEM;
+ goto err;
+ }
+ (void)snprintf(local->xsel, sizeof(local->xsel), "%s", node_uuid_key);
+ cnt = local->call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_find_local_subvol_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->getxattr, loc,
+ node_uuid_key, xdata);
+ }
+ if (node_uuid_key)
+ GF_FREE(node_uuid_key);
+ return 0;
+ }
+
+ if (!strcmp(key, GF_REBAL_OLD_FIND_LOCAL_SUBVOL)) {
+ ret = gf_asprintf(&node_uuid_key, "%s", GF_XATTR_NODE_UUID_KEY);
+ if (ret == -1 || !node_uuid_key) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_NO_MEMORY,
+ "Failed to copy node uuid key");
+ op_errno = ENOMEM;
+ goto err;
+ }
+ (void)snprintf(local->xsel, sizeof(local->xsel), "%s", node_uuid_key);
+ cnt = local->call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_find_local_subvol_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->getxattr, loc,
+ node_uuid_key, xdata);
+ }
+ if (node_uuid_key)
+ GF_FREE(node_uuid_key);
+ return 0;
+ }
+
+ /* for file use cached subvolume (obviously!): see if {}
+ * below
+ * for directory:
+ * wind to all subvolumes and exclude subvolumes which
+ * return ENOTCONN (in callback)
+ *
+ * NOTE: Don't trust inode here, as that may not be valid
+ * (until inode_link() happens)
+ */
+
+ if (XATTR_IS_PATHINFO(key) || (strcmp(key, GF_XATTR_NODE_UUID_KEY) == 0) ||
+ (strcmp(key, GF_XATTR_LIST_NODE_UUIDS_KEY) == 0)) {
+ (void)snprintf(local->xsel, sizeof(local->xsel), "%s", key);
+ cnt = local->call_cnt = layout->cnt;
for (i = 0; i < cnt; i++) {
- subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_getxattr_cbk,
- subvol, subvol->fops->getxattr,
- loc, key, NULL);
+ subvol = layout->list[i].xlator;
+ STACK_WIND(frame, dht_vgetxattr_dir_cbk, subvol,
+ subvol->fops->getxattr, loc, key, xdata);
}
return 0;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
+no_dht_is_dir:
+ /* node-uuid or pathinfo for files */
+ if (XATTR_IS_PATHINFO(key) || (strcmp(key, GF_XATTR_NODE_UUID_KEY) == 0)) {
+ cached_subvol = local->cached_subvol;
+ (void)snprintf(local->xsel, sizeof(local->xsel), "%s", key);
+ local->call_cnt = 1;
+ STACK_WIND_COOKIE(frame, dht_vgetxattr_cbk, cached_subvol,
+ cached_subvol, cached_subvol->fops->getxattr, loc,
+ key, xdata);
return 0;
-}
+ }
-int
-dht_fgetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *key, dict_t *xdata)
-{
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int op_errno = -1;
- int i = 0;
- int cnt = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
- VALIDATE_OR_GOTO (this->private, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FGETXATTR);
- if (!local) {
- op_errno = ENOMEM;
-
- goto err;
+ if (strcmp(key, GF_XATTR_LINKINFO_KEY) == 0) {
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (!hashed_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
}
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_ERROR,
- "layout is NULL");
- op_errno = ENOENT;
- goto err;
+ cached_subvol = dht_subvol_get_cached(this, loc->inode);
+ if (!cached_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_CACHED_SUBVOL_GET_FAILED,
+ "Failed to get cached subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
}
- if (key) {
- local->key = gf_strdup (key);
- if (!local->key) {
- op_errno = ENOMEM;
- goto err;
- }
+ if (hashed_subvol == cached_subvol) {
+ op_errno = ENODATA;
+ goto err;
}
- if (fd->inode->ia_type == IA_IFDIR) {
- cnt = local->call_cnt = layout->cnt;
+ STACK_WIND(frame, dht_linkinfo_getxattr_cbk, hashed_subvol,
+ hashed_subvol->fops->getxattr, loc, GF_XATTR_PATHINFO_KEY,
+ xdata);
+ return 0;
+ }
+
+ if (dht_is_debug_xattr_key(dht_dbg_vxattrs, (char *)key) >= 0) {
+ dht_handle_debug_getxattr(frame, this, loc, key);
+ return 0;
+ }
+
+no_key:
+ if (cluster_handle_marker_getxattr(frame, loc, key, conf->vol_uuid,
+ dht_getxattr_unwind,
+ dht_marker_populate_args) == 0)
+ return 0;
+
+ if (DHT_IS_DIR(layout)) {
+ local->call_cnt = conf->subvolume_cnt;
+ cnt = conf->subvolume_cnt;
+ ret = dht_inode_ctx_mdsvol_get(loc->inode, this, &mds_subvol);
+ if (!mds_subvol) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Cannot determine MDS, fetching xattr %s randomly"
+ " from a subvol for path %s ",
+ key, loc->path);
} else {
- cnt = local->call_cnt = 1;
+ /* TODO need to handle it, As of now we are
+ choosing availability instead of chossing
+ consistencty, in case of mds_subvol is
+ down winding a getxattr call on other subvol
+ and return xattr
+ */
+ local->mds_subvol = mds_subvol;
+ for (i = 0; i < cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ DHT_MSG_HASHED_SUBVOL_DOWN,
+ "MDS %s is down for path"
+ " path %s so fetching xattr "
+ "%s randomly from a subvol ",
+ local->mds_subvol->name, loc->path, key);
+ ret = 1;
+ }
+ }
+ }
}
- for (i = 0; i < cnt; i++) {
- subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_getxattr_cbk,
- subvol, subvol->fops->fgetxattr,
- fd, key, NULL);
+ if (!ret && key && local->mds_subvol && dht_match_xattr(key)) {
+ STACK_WIND(frame, dht_mds_getxattr_cbk, local->mds_subvol,
+ local->mds_subvol->fops->getxattr, loc, key, xdata);
+
+ return 0;
}
- return 0;
+ } else {
+ cnt = local->call_cnt = 1;
+ }
+
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->getxattr, loc,
+ key, xdata);
+ }
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fgetxattr, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
+#undef DHT_IS_DIR
int
-dht_fsetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *xattr, int flags, dict_t *xdata)
+dht_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- int op_errno = EINVAL;
- data_pair_t *trav = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.dht*", xattr,
- trav, op_errno, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FSETXATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int cnt = 0;
+ xlator_t *mds_subvol = NULL;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(fd->inode, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FGETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_NULL,
+ "Layout is NULL");
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ if (key) {
+ local->key = gf_strdup(key);
+ if (!local->key) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ }
+
+ gf_uuid_unparse(fd->inode->gfid, gfid);
+
+ if ((fd->inode->ia_type == IA_IFDIR) && key &&
+ (strncmp(key, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) !=
+ 0)) {
+ local->call_cnt = conf->subvolume_cnt;
+ cnt = conf->subvolume_cnt;
+ ret = dht_inode_ctx_mdsvol_get(fd->inode, this, &mds_subvol);
+
+ if (!mds_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "cannot determine MDS, fetching xattr %s "
+ " randomly from a subvol for gfid %s ",
+ key, gfid);
+ } else {
+ /* TODO need to handle it, As of now we are
+ choosing availability instead of chossing
+ consistencty, in case of hashed_subvol is
+ down winding a getxattr call on other subvol
+ and return xattr
+ */
+ local->mds_subvol = mds_subvol;
+ for (i = 0; i < cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_HASHED_SUBVOL_DOWN,
+ "MDS subvolume %s is down"
+ " for gfid %s so fetching xattr "
+ " %s randomly from a subvol ",
+ local->mds_subvol->name, gfid, key);
+ ret = 1;
+ }
+ }
+ }
}
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
+ if (!ret && key && local->mds_subvol && dht_match_xattr(key)) {
+ STACK_WIND(frame, dht_mds_getxattr_cbk, local->mds_subvol,
+ local->mds_subvol->fops->fgetxattr, fd, key, NULL);
+
+ return 0;
}
- local->call_cnt = 1;
+ } else {
+ cnt = local->call_cnt = 1;
+ }
- STACK_WIND (frame, dht_err_cbk, subvol, subvol->fops->fsetxattr,
- fd, xattr, flags, NULL);
-
- return 0;
+ for (i = 0; i < cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->fgetxattr, fd,
+ key, NULL);
+ }
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsetxattr, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
static int
-dht_common_setxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *xdata)
+dht_setxattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- DHT_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto err;
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ local->rebalance.xdata);
return 0;
+ }
+
+ if (subvol == NULL)
+ goto err;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_SETXATTR) {
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->setxattr, &local->loc,
+ local->rebalance.xattr, local->rebalance.flags,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->fsetxattr, local->fd,
+ local->rebalance.xattr, local->rebalance.flags,
+ local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND(setxattr, frame, (local ? local->op_ret : -1), op_errno,
+ NULL);
+ return 0;
}
int
-dht_checking_pathinfo_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr,
- dict_t *xdata)
+dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- int i = -1;
- int ret = -1;
- char *value = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- call_frame_t *prev = NULL;
- int this_call_cnt = 0;
-
- local = frame->local;
- prev = cookie;
- conf = this->private;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ struct iatt *stbuf = NULL;
+ inode_t *inode = NULL;
+ xlator_t *subvol1 = NULL, *subvol2 = NULL;
- if (op_ret == -1)
- goto out;
+ local = frame->local;
+ prev = cookie;
+ local->op_errno = op_errno;
- ret = dict_get_str (xattr, GF_XATTR_PATHINFO_KEY, &value);
+ if ((local->fop == GF_FOP_FSETXATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
if (ret)
- goto out;
+ goto out;
+ return 0;
+ }
- if (!strcmp (value, local->key)) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == prev->this)
- conf->decommissioned_bricks[i] = prev->this;
- }
- }
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1.",
+ prev->name);
+ goto out;
+ }
-out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (setxattr, frame, local->op_ret, ENOTSUP, NULL);
- }
- return 0;
+ if (local->call_cnt != 1)
+ goto out;
-}
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
-int
-dht_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr, int flags, dict_t *xdata)
-{
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
- int op_errno = EINVAL;
- int ret = -1;
- data_t *tmp = NULL;
- uint32_t dir_spread = 0;
- char value[4096] = {0,};
- gf_dht_migrate_data_type_t forced_rebalance = GF_DHT_MIGRATE_DATA;
- int call_cnt = 0;
- data_pair_t *trav = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.dht*", xattr,
- trav, op_errno, err);
-
- conf = this->private;
- local = dht_local_init (frame, loc, NULL, GF_FOP_SETXATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ if ((!op_ret) && !stbuf) {
+ goto out;
+ }
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local->op_ret = op_ret;
+ local->rebalance.target_op_fn = dht_setxattr2;
+ if (xdata)
+ local->rebalance.xdata = dict_ref(xdata);
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(stbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Phase 1 of migration */
+ if (IS_DHT_MIGRATION_PHASE1(stbuf)) {
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ ret = dht_inode_ctx_get_mig_info(this, inode, &subvol1, &subvol2);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) {
+ dht_setxattr2(this, subvol2, frame, 0);
+ return 0;
}
- local->call_cnt = call_cnt = layout->cnt;
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+
+ if (local->fop == GF_FOP_SETXATTR) {
+ DHT_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+ } else {
+ DHT_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata);
+ }
+
+ return 0;
+}
+
+/* Function is call by dict_foreach_fnmatch if key is match with
+ user.* and set boolean flag to true
+*/
+static int
+dht_is_user_xattr(dict_t *this, char *key, data_t *value, void *data)
+{
+ gf_boolean_t *user_xattr_found = data;
+ *user_xattr_found = _gf_true;
+ return 0;
+}
+
+/* Common code to wind a (f)(set|remove)xattr call to set xattr on directory
+ */
+static int
+dht_dir_common_set_remove_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, dict_t *xattr, int flags,
+ dict_t *xdata, int *op_errno)
- /* This key is sent by Unified File and Object storage
- * to test xattr support in backend.
+{
+ dict_t *xattrop = NULL;
+ int32_t subone[1] = {-1};
+ gf_boolean_t uxattr_key_found = _gf_false;
+ xlator_t *mds_subvol = NULL;
+ xlator_t *travvol = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int i = 0;
+ int call_cnt = 0;
+ dht_local_t *local = NULL;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char **xattrs_to_heal;
+
+ conf = this->private;
+ local = frame->local;
+ call_cnt = conf->subvolume_cnt;
+ local->flags = flags;
+ xattrs_to_heal = get_xattrs_to_heal();
+
+ if (!gf_uuid_is_null(local->gfid)) {
+ gf_uuid_unparse(local->gfid, gfid_local);
+ }
+
+ if ((local->fop == GF_FOP_SETXATTR) || (local->fop == GF_FOP_FSETXATTR)) {
+ /* Check if any user xattr present in xattr
+ */
+ dict_foreach_fnmatch(xattr, "user*", dht_is_user_xattr,
+ &uxattr_key_found);
+
+ /* Check if any custom key xattr present in dict xattr
+ and start index from 1 because user xattr already
+ checked in previous line
+ */
+ for (i = 1; xattrs_to_heal[i]; i++)
+ if (dict_get(xattr, xattrs_to_heal[i]))
+ uxattr_key_found = _gf_true;
+ }
+
+ if ((local->fop == GF_FOP_REMOVEXATTR) ||
+ (local->fop == GF_FOP_FREMOVEXATTR)) {
+ /* Check if any custom key xattr present in local->key
*/
- tmp = dict_get (xattr, "user.ufo-test");
- if (tmp) {
- if (IA_ISREG (loc->inode->ia_type)) {
- op_errno = ENOTSUP;
- goto err;
+ for (i = 0; xattrs_to_heal[i]; i++)
+ if (strstr(local->key, xattrs_to_heal[i]))
+ uxattr_key_found = _gf_true;
+ }
+
+ /* If there is no custom key xattr present or gfid is root
+ or call_cnt is 1 then wind a (f)setxattr call on all subvols
+ */
+ if (!uxattr_key_found || __is_root_gfid(local->gfid) || call_cnt == 1) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ travvol = conf->subvolumes[i];
+ if ((local->fop == GF_FOP_SETXATTR) ||
+ (local->fop == GF_FOP_FSETXATTR)) {
+ if (fd) {
+ STACK_WIND_COOKIE(frame, dht_err_cbk, travvol, travvol,
+ travvol->fops->fsetxattr, fd, xattr,
+ flags, xdata);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_err_cbk, travvol, travvol,
+ travvol->fops->setxattr, loc, xattr,
+ flags, xdata);
}
- local->op_ret = 0;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_ufo_xattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->setxattr,
- loc, xattr, flags, NULL);
+ }
+
+ if ((local->fop == GF_FOP_REMOVEXATTR) ||
+ (local->fop == GF_FOP_FREMOVEXATTR)) {
+ if (fd) {
+ STACK_WIND_COOKIE(frame, dht_err_cbk, travvol, travvol,
+ travvol->fops->fremovexattr, fd,
+ local->key, local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_err_cbk, travvol, travvol,
+ travvol->fops->removexattr, loc,
+ local->key, local->xattr_req);
}
- return 0;
+ }
}
- tmp = dict_get (xattr, "distribute.migrate-data");
- if (tmp) {
- if (IA_ISDIR (loc->inode->ia_type)) {
- op_errno = ENOTSUP;
- goto err;
- }
+ return 0;
+ }
+
+ /* Calculate hash subvol based on inode and parent inode
+ */
+ if (fd) {
+ ret = dht_inode_ctx_mdsvol_get(fd->inode, this, &mds_subvol);
+ } else {
+ ret = dht_inode_ctx_mdsvol_get(loc->inode, this, &mds_subvol);
+ }
+ if (ret || !mds_subvol) {
+ if (fd) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get mds subvol for fd %p"
+ "gfid is %s ",
+ fd, gfid_local);
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "%s: Failed to get mds subvol. (gfid is %s)", loc->path,
+ gfid_local);
+ }
+ (*op_errno) = ENOENT;
+ goto err;
+ }
+
+ local->mds_subvol = mds_subvol;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_HASHED_SUBVOL_DOWN,
+ "MDS subvol is down for path "
+ " %s gfid is %s Unable to set xattr ",
+ local->loc.path, gfid_local);
+ (*op_errno) = ENOTCONN;
+ goto err;
+ }
+ }
+ }
+
+ if (uxattr_key_found) {
+ xattrop = dict_new();
+ if (!xattrop) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, 0,
+ "dictionary creation failed for path %s "
+ "for gfid is %s ",
+ local->loc.path, gfid_local);
+ (*op_errno) = ENOMEM;
+ goto err;
+ }
+ local->xattr = dict_ref(xattr);
+ /* Subtract current MDS xattr value to -1 , value of MDS
+ xattr represents no. of times xattr modification failed
+ on non MDS subvols.
+ */
+ ret = dht_dict_set_array(xattrop, conf->mds_xattr_key, subone, 1);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "dictionary set array failed for path %s "
+ "for gfid is %s ",
+ local->loc.path, gfid_local);
+ if (xattrop)
+ dict_unref(xattrop);
+ (*op_errno) = ret;
+ goto err;
+ }
+ /* Wind a xattrop call to use ref counting approach
+ update mds xattr to -1 before update xattr on
+ hashed subvol and update mds xattr to +1 after update
+ xattr on all non hashed subvol
+ */
+ if (fd) {
+ STACK_WIND(frame, dht_xattrop_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->fxattrop, fd,
+ GF_XATTROP_ADD_ARRAY, xattrop, NULL);
+ } else {
+ STACK_WIND(frame, dht_xattrop_mds_cbk, local->mds_subvol,
+ local->mds_subvol->fops->xattrop, loc,
+ GF_XATTROP_ADD_ARRAY, xattrop, NULL);
+ }
+ if (xattrop)
+ dict_unref(xattrop);
+ }
- /* TODO: need to interpret the 'value' for more meaning
- (ie, 'target' subvolume given there, etc) */
- memcpy (value, tmp->data, tmp->len);
- if (strcmp (value, "force") == 0)
- forced_rebalance =
- GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS;
-
- if (conf->decommission_in_progress)
- forced_rebalance = GF_DHT_MIGRATE_HARDLINK;
-
- local->rebalance.target_node = dht_subvol_get_hashed (this, loc);
- if (!local->rebalance.target_node) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to get "
- "hashed subvol for %s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ return 0;
+err:
+ return -1;
+}
- local->rebalance.from_subvol = local->cached_subvol;
+int
+dht_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(fd->inode, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ if (!conf->defrag)
+ GF_IF_INTERNAL_XATTR_GOTO(conf->wild_xattr_name, xattr, op_errno, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FSETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->xattr_req = xdata ? dict_ref(xdata) : dict_new();
+ local->call_cnt = call_cnt = layout->cnt;
+
+ if (IA_ISDIR(fd->inode->ia_type)) {
+ local->hashed_subvol = NULL;
+ ret = dht_dir_common_set_remove_xattr(frame, this, NULL, fd, xattr,
+ flags, xdata, &op_errno);
+ if (ret)
+ goto err;
+ } else {
+ local->call_cnt = 1;
+ local->rebalance.xattr = dict_ref(xattr);
+ local->rebalance.flags = flags;
- if (local->rebalance.target_node == local->rebalance.from_subvol) {
- op_errno = EEXIST;
- goto err;
- }
- if (local->rebalance.target_node) {
- local->flags = forced_rebalance;
+ ret = dict_set_int8(local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Failed to set dictionary key %s for fd=%p",
+ DHT_IATT_IN_XDATA_KEY, fd);
+ }
- ret = dht_start_rebalance_task (this, frame);
- if (!ret)
- return 0;
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->fsetxattr, fd, xattr, flags,
+ local->xattr_req);
+ }
+ return 0;
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to create a new synctask",
- loc->path);
- }
- op_errno = EINVAL;
- goto err;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fsetxattr, frame, -1, op_errno, NULL);
+ return 0;
+}
+
+static int
+dht_checking_pathinfo_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ int i = -1;
+ int ret = -1;
+ char *value = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *prev = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret == -1)
+ goto out;
+
+ ret = dict_get_str(xattr, GF_XATTR_PATHINFO_KEY, &value);
+ if (ret)
+ goto out;
+
+ if (!strcmp(value, local->key)) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev)
+ conf->decommissioned_bricks[i] = prev;
}
+ }
- tmp = dict_get (xattr, "decommission-brick");
- if (tmp) {
- /* This operation should happen only on '/' */
- if (!__is_root_gfid (loc->inode->gfid)) {
- op_errno = ENOTSUP;
- goto err;
- }
+out:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, ENOTSUP, NULL);
+ }
+ return 0;
+}
- memcpy (value, tmp->data, ((tmp->len < 4095) ? tmp->len : 4095));
- local->key = gf_strdup (value);
- local->call_cnt = conf->subvolume_cnt;
+static int
+dht_nuke_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
- for (i = 0 ; i < conf->subvolume_cnt; i++) {
- /* Get the pathinfo, and then compare */
- STACK_WIND (frame, dht_checking_pathinfo_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->getxattr,
- loc, GF_XATTR_PATHINFO_KEY, NULL);
- }
- return 0;
+static int
+dht_nuke_dir(call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *tmp)
+{
+ if (!IA_ISDIR(loc->inode->ia_type)) {
+ DHT_STACK_UNWIND(setxattr, frame, -1, ENOTSUP, NULL);
+ return 0;
+ }
+
+ /* Setxattr didn't need the parent, but rmdir does. */
+ loc->parent = inode_parent(loc->inode, NULL, NULL);
+ if (!loc->parent) {
+ DHT_STACK_UNWIND(setxattr, frame, -1, ENOENT, NULL);
+ return 0;
+ }
+ gf_uuid_copy(loc->pargfid, loc->parent->gfid);
+
+ if (!loc->name && loc->path) {
+ loc->name = strrchr(loc->path, '/');
+ if (loc->name) {
+ ++(loc->name);
+ }
+ }
+
+ /*
+ * We do this instead of calling dht_rmdir_do directly for two reasons.
+ * The first is that we want to reuse all of the initialization that
+ * dht_rmdir does, so if it ever changes we'll just follow along. The
+ * second (i.e. why we don't use STACK_WIND_TAIL) is so that we don't
+ * obscure the fact that we came in via this path instead of a genuine
+ * rmdir. That makes debugging just a tiny bit easier.
+ */
+ STACK_WIND(frame, dht_nuke_dir_cbk, this, this->fops->rmdir, loc, 1, NULL);
+
+ return 0;
+}
+
+int
+dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr,
+ int flags, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int op_errno = EINVAL;
+ int ret = -1;
+ data_t *tmp = NULL;
+ uint32_t dir_spread = 0;
+ char value[4096] = {
+ 0,
+ };
+ gf_dht_migrate_data_type_t forced_rebalance = GF_DHT_MIGRATE_DATA;
+ int call_cnt = 0;
+ uint32_t new_hash = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, err);
+
+ methods = &(conf->methods);
+
+ /* Rebalance daemon is allowed to set internal keys */
+ if (!conf->defrag)
+ GF_IF_INTERNAL_XATTR_GOTO(conf->wild_xattr_name, xattr, op_errno, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_SETXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+ tmp = dict_get(xattr, conf->mds_xattr_key);
+ if (tmp) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ tmp = dict_get(xattr, GF_XATTR_FILE_MIGRATE_KEY);
+ if (tmp) {
+ if (IA_ISDIR(loc->inode->ia_type)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ /* TODO: need to interpret the 'value' for more meaning
+ (ie, 'target' subvolume given there, etc) */
+ memcpy(value, tmp->data, tmp->len);
+ if (strcmp(value, "force") == 0)
+ forced_rebalance = GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS;
+
+ if (conf->decommission_in_progress)
+ forced_rebalance = GF_DHT_MIGRATE_HARDLINK;
+
+ if (!loc->path) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!local->loc.name)
+ local->loc.name = strrchr(local->loc.path, '/') + 1;
+
+ if (!local->loc.parent)
+ local->loc.parent = inode_parent(local->loc.inode, NULL, NULL);
+
+ if ((!local->loc.name) || (!local->loc.parent)) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (gf_uuid_is_null(local->loc.pargfid))
+ gf_uuid_copy(local->loc.pargfid, local->loc.parent->gfid);
+
+ methods->migration_get_dst_subvol(this, local);
+
+ if (!local->rebalance.target_node) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for %s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->rebalance.from_subvol = local->cached_subvol;
+
+ if (local->rebalance.target_node == local->rebalance.from_subvol) {
+ op_errno = EEXIST;
+ goto err;
}
+ if (local->rebalance.target_node) {
+ local->flags = forced_rebalance;
- tmp = dict_get (xattr, GF_XATTR_FIX_LAYOUT_KEY);
- if (tmp) {
- gf_log (this->name, GF_LOG_INFO,
- "fixing the layout of %s", loc->path);
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
- dht_fix_directory_layout (frame, dht_common_setxattr_cbk,
- layout);
+ ret = dht_start_rebalance_task(this, frame);
+ if (!ret)
return 0;
+
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_START_FAILED,
+ "%s: failed to create a new rebalance synctask", loc->path);
}
+ op_errno = EINVAL;
+ goto err;
+ }
- tmp = dict_get (xattr, "distribute.directory-spread-count");
- if (tmp) {
- /* Setxattr value is packed as 'binary', not string */
- memcpy (value, tmp->data, ((tmp->len < 4095)?tmp->len:4095));
- ret = gf_string2uint32 (value, &dir_spread);
- if (!ret && ((dir_spread <= conf->subvolume_cnt) &&
- (dir_spread > 0))) {
- layout->spread_cnt = dir_spread;
+ tmp = dict_get(xattr, "decommission-brick");
+ if (tmp) {
+ /* This operation should happen only on '/' */
+ if (!__is_root_gfid(loc->inode->gfid)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
- dht_fix_directory_layout (frame,
- dht_common_setxattr_cbk,
- layout);
- return 0;
- }
- gf_log (this->name, GF_LOG_ERROR,
- "wrong 'directory-spread-count' value (%s)", value);
- op_errno = ENOTSUP;
+ memcpy(value, tmp->data, min(tmp->len, 4095));
+ local->key = gf_strdup(value);
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ /* Get the pathinfo, and then compare */
+ STACK_WIND_COOKIE(frame, dht_checking_pathinfo_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->getxattr, loc,
+ GF_XATTR_PATHINFO_KEY, NULL);
+ }
+ return 0;
+ }
+
+ tmp = dict_get(xattr, GF_XATTR_FIX_LAYOUT_KEY);
+ if (tmp) {
+ ret = dict_get_uint32(xattr, "new-commit-hash", &new_hash);
+ if (ret == 0) {
+ gf_msg_debug(this->name, 0,
+ "updating commit hash for %s from %u to %u",
+ uuid_utoa(loc->gfid), layout->commit_hash, new_hash);
+ layout->commit_hash = new_hash;
+
+ ret = dht_update_commit_hash_for_layout(frame);
+ if (ret) {
+ op_errno = ENOTCONN;
goto err;
+ }
+ return ret;
}
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_err_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->setxattr,
- loc, xattr, flags, xdata);
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_FIX_LAYOUT_INFO,
+ "fixing the layout of %s", loc->path);
+
+ ret = dht_fix_directory_layout(frame, dht_fix_layout_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
}
+ return ret;
+ }
+
+ tmp = dict_get(xattr, "distribute.directory-spread-count");
+ if (tmp) {
+ /* Setxattr value is packed as 'binary', not string */
+ memcpy(value, tmp->data, min(tmp->len, 4095));
+ ret = gf_string2uint32(value, &dir_spread);
+ if (!ret && ((dir_spread <= conf->subvolume_cnt) && (dir_spread > 0))) {
+ layout->spread_cnt = dir_spread;
+
+ ret = dht_fix_directory_layout(frame, dht_common_setxattr_cbk,
+ layout);
+ if (ret) {
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ return ret;
+ }
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_OPERATION_NOT_SUP,
+ "wrong 'directory-spread-count' value (%s)", value);
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ tmp = dict_get(xattr, "glusterfs.dht.nuke");
+ if (tmp) {
+ return dht_nuke_dir(frame, this, loc, tmp);
+ }
+ local->xattr_req = xdata ? dict_ref(xdata) : dict_new();
+
+ if (IA_ISDIR(loc->inode->ia_type)) {
+ local->hashed_subvol = NULL;
+ ret = dht_dir_common_set_remove_xattr(frame, this, loc, NULL, xattr,
+ flags, xdata, &op_errno);
+ if (ret)
+ goto err;
+ } else {
+ local->rebalance.xattr = dict_ref(xattr);
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
- return 0;
+ ret = dict_set_int8(local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->setxattr, loc, xattr, flags,
+ local->xattr_req);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-
-int
-dht_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+static int
+dht_removexattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ if (!frame || !frame->local)
+ goto err;
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
+ local = frame->local;
+ op_errno = local->op_errno;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (removexattr, frame, local->op_ret,
- local->op_errno, NULL);
- }
+ local->call_cnt = 2; /* This is the second attempt */
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ local->rebalance.xdata);
return 0;
-}
+ }
+
+ if (subvol == NULL)
+ goto err;
+
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->removexattr, &local->loc, local->key,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->fremovexattr, local->fd, local->key,
+ local->xattr_req);
+ }
+ return 0;
+
+err:
+ DHT_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
+ return 0;
+}
int
-dht_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *key, dict_t *xdata)
+dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int call_cnt = 0;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ struct iatt *stbuf = NULL;
+ inode_t *inode = NULL;
+ xlator_t *subvol1 = NULL, *subvol2 = NULL;
- int i;
+ local = frame->local;
+ prev = cookie;
- VALIDATE_OR_GOTO (this, err);
+ local->op_errno = op_errno;
- GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.dht*",
- key, op_errno, err);
+ if ((local->fop == GF_FOP_FREMOVEXATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
- local = dht_local_init (frame, loc, NULL, GF_FOP_REMOVEXATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ if (local->call_cnt != 1)
+ goto out;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
- layout = local->layout;
- if (!local->layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ if ((!op_ret) && !stbuf) {
+ goto out;
+ }
- local->call_cnt = call_cnt = layout->cnt;
- local->key = gf_strdup (key);
+ local->op_ret = 0;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_removexattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->removexattr,
- loc, key, NULL);
- }
+ local->rebalance.target_op_fn = dht_removexattr2;
+ if (xdata)
+ local->rebalance.xdata = dict_ref(xdata);
- return 0;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(stbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
+ /* Phase 1 of migration */
+ if (IS_DHT_MIGRATION_PHASE1(stbuf)) {
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
- return 0;
+ ret = dht_inode_ctx_get_mig_info(this, inode, &subvol1, &subvol2);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) {
+ dht_removexattr2(this, subvol2, frame, 0);
+ return 0;
+ }
+
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ if (local->fop == GF_FOP_REMOVEXATTR) {
+ DHT_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata);
+ } else {
+ DHT_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
+ }
+ return 0;
}
int
-dht_fremovexattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *key, dict_t *xdata)
+dht_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int call_cnt = 0;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ GF_IF_NATIVE_XATTR_GOTO(conf->wild_xattr_name, key, op_errno, err);
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_REMOVEXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!local->layout) {
+ gf_msg_debug(this->name, 0, "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+
+ local->call_cnt = call_cnt = layout->cnt;
+ local->key = gf_strdup(key);
+
+ if (key && (strncmp(key, conf->mds_xattr_key, strlen(key)) == 0)) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ if (IA_ISDIR(loc->inode->ia_type)) {
+ local->hashed_subvol = NULL;
+ ret = dht_dir_common_set_remove_xattr(frame, this, loc, NULL, NULL, 0,
+ local->xattr_req, &op_errno);
+ if (ret)
+ goto err;
- int i;
+ } else {
+ local->call_cnt = 1;
+ ret = dict_set_int8(local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to "
+ "set dictionary key %s for %s",
+ DHT_IATT_IN_XDATA_KEY, loc->path);
+ }
- VALIDATE_OR_GOTO (this, err);
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->removexattr, loc, key,
+ local->xattr_req);
+ }
- GF_IF_NATIVE_XATTR_GOTO ("trusted.glusterfs.dht*",
- key, op_errno, err);
+ return 0;
- VALIDATE_OR_GOTO (frame, err);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
- local = dht_local_init (frame, NULL, fd, GF_FOP_FREMOVEXATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ return 0;
+}
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for inode=%s",
- uuid_utoa (fd->inode->gfid));
- op_errno = EINVAL;
- goto err;
- }
+int
+dht_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = 0;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ GF_IF_NATIVE_XATTR_GOTO(conf->wild_xattr_name, key, op_errno, err);
+
+ VALIDATE_OR_GOTO(frame, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FREMOVEXATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for inode=%s",
+ uuid_utoa(fd->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!local->layout) {
+ gf_msg_debug(this->name, 0, "no layout for inode=%s",
+ uuid_utoa(fd->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+ local->xattr_req = xdata ? dict_ref(xdata) : dict_new();
+
+ local->call_cnt = call_cnt = layout->cnt;
+ local->key = gf_strdup(key);
+
+ if (IA_ISDIR(fd->inode->ia_type)) {
+ local->hashed_subvol = NULL;
+ ret = dht_dir_common_set_remove_xattr(frame, this, NULL, fd, NULL, 0,
+ local->xattr_req, &op_errno);
+ if (ret)
+ goto err;
- layout = local->layout;
- if (!local->layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for inode=%s", uuid_utoa (fd->inode->gfid));
- op_errno = EINVAL;
- goto err;
+ } else {
+ local->call_cnt = 1;
+ ret = dict_set_int8(local->xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to "
+ "set dictionary key %s for fd=%p",
+ DHT_IATT_IN_XDATA_KEY, fd);
}
- local->call_cnt = call_cnt = layout->cnt;
- local->key = gf_strdup (key);
-
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_removexattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->fremovexattr,
- fd, key, NULL);
- }
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->fremovexattr, fd, key,
+ local->xattr_req);
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fremovexattr, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-
int
-dht_fd_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+dht_fd_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, fd_t *fd, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ local = frame->local;
+ prev = cookie;
- local->op_ret = 0;
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto post_unlock;
}
-unlock:
- UNLOCK (&frame->lock);
-
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (open, frame, local->op_ret, local->op_errno,
- local->fd, NULL);
- return 0;
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt))
+ DHT_STACK_UNWIND(open, frame, local->op_ret, local->op_errno, local->fd,
+ NULL);
+
+ return 0;
}
/*
* dht_normalize_stats -
*/
static void
-dht_normalize_stats (struct statvfs *buf, unsigned long bsize,
- unsigned long frsize)
+dht_normalize_stats(struct statvfs *buf, unsigned long bsize,
+ unsigned long frsize)
{
- double factor = 0;
+ double factor = 0;
+
+ if (buf->f_bsize != bsize) {
+ buf->f_bsize = bsize;
+ }
+
+ if (buf->f_frsize != frsize) {
+ factor = ((double)buf->f_frsize) / frsize;
+ buf->f_frsize = frsize;
+ buf->f_blocks = (fsblkcnt_t)(factor * buf->f_blocks);
+ buf->f_bfree = (fsblkcnt_t)(factor * buf->f_bfree);
+ buf->f_bavail = (fsblkcnt_t)(factor * buf->f_bavail);
+ }
+}
+
+static int
+dht_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct statvfs *statvfs, dict_t *xdata)
+{
+ gf_boolean_t event = _gf_false;
+ qdstatfs_action_t action = qdstatfs_action_OFF;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int bsize = 0;
+ int frsize = 0;
+ GF_UNUSED int ret = 0;
+ unsigned long new_usage = 0;
+ unsigned long cur_usage = 0;
+
+ local = frame->local;
+ GF_ASSERT(local);
+
+ if (xdata)
+ ret = dict_get_int8(xdata, "quota-deem-statfs", (int8_t *)&event);
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+ if (!statvfs) {
+ op_errno = EINVAL;
+ local->op_ret = -1;
+ goto unlock;
+ }
+ local->op_ret = 0;
- if (buf->f_bsize != bsize) {
- buf->f_bsize = bsize;
+ if (local->quota_deem_statfs) {
+ if (event == _gf_true) {
+ action = qdstatfs_action_COMPARE;
+ } else {
+ action = qdstatfs_action_NEGLECT;
+ }
+ } else {
+ if (event == _gf_true) {
+ action = qdstatfs_action_REPLACE;
+ local->quota_deem_statfs = _gf_true;
+ }
}
- if (buf->f_frsize != frsize) {
- factor = ((double) buf->f_frsize) / frsize;
- buf->f_frsize = frsize;
- buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks);
- buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree);
- buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail);
+ if (local->quota_deem_statfs) {
+ switch (action) {
+ case qdstatfs_action_NEGLECT:
+ goto unlock;
+
+ case qdstatfs_action_REPLACE:
+ local->statvfs = *statvfs;
+ goto unlock;
+
+ case qdstatfs_action_COMPARE:
+ new_usage = statvfs->f_blocks - statvfs->f_bfree;
+ cur_usage = local->statvfs.f_blocks -
+ local->statvfs.f_bfree;
+ /* Take the max of the usage from subvols */
+ if (new_usage >= cur_usage)
+ local->statvfs = *statvfs;
+ goto unlock;
+
+ default:
+ break;
+ }
}
+
+ if (local->statvfs.f_bsize != 0) {
+ bsize = max(local->statvfs.f_bsize, statvfs->f_bsize);
+ frsize = max(local->statvfs.f_frsize, statvfs->f_frsize);
+ dht_normalize_stats(&local->statvfs, bsize, frsize);
+ dht_normalize_stats(statvfs, bsize, frsize);
+ } else {
+ local->statvfs.f_bsize = statvfs->f_bsize;
+ local->statvfs.f_frsize = statvfs->f_frsize;
+ }
+
+ local->statvfs.f_blocks += statvfs->f_blocks;
+ local->statvfs.f_bfree += statvfs->f_bfree;
+ local->statvfs.f_bavail += statvfs->f_bavail;
+ local->statvfs.f_files += statvfs->f_files;
+ local->statvfs.f_ffree += statvfs->f_ffree;
+ local->statvfs.f_favail += statvfs->f_favail;
+ local->statvfs.f_fsid = statvfs->f_fsid;
+ local->statvfs.f_flag = statvfs->f_flag;
+ local->statvfs.f_namemax = statvfs->f_namemax;
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt))
+ DHT_STACK_UNWIND(statfs, frame, local->op_ret, local->op_errno,
+ &local->statvfs, xdata);
+
+ return 0;
}
int
-dht_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs, dict_t *xdata)
+dht_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- int bsize = 0;
- int frsize = 0;
-
-
- local = frame->local;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
+ inode_t *inode = NULL;
+ inode_table_t *itable = NULL;
+ static uuid_t root_gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ loc_t newloc = {
+ 0,
+ };
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
- }
- local->op_ret = 0;
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(this->private, err);
- if (local->statvfs.f_bsize != 0) {
- bsize = max(local->statvfs.f_bsize, statvfs->f_bsize);
- frsize = max(local->statvfs.f_frsize, statvfs->f_frsize);
- dht_normalize_stats(&local->statvfs, bsize, frsize);
- dht_normalize_stats(statvfs, bsize, frsize);
- } else {
- local->statvfs.f_bsize = statvfs->f_bsize;
- local->statvfs.f_frsize = statvfs->f_frsize;
- }
+ conf = this->private;
- local->statvfs.f_blocks += statvfs->f_blocks;
- local->statvfs.f_bfree += statvfs->f_bfree;
- local->statvfs.f_bavail += statvfs->f_bavail;
- local->statvfs.f_files += statvfs->f_files;
- local->statvfs.f_ffree += statvfs->f_ffree;
- local->statvfs.f_favail += statvfs->f_favail;
- local->statvfs.f_fsid = statvfs->f_fsid;
- local->statvfs.f_flag = statvfs->f_flag;
- local->statvfs.f_namemax = statvfs->f_namemax;
+ local = dht_local_init(frame, NULL, NULL, GF_FOP_STATFS);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ if (loc->inode && !IA_ISDIR(loc->inode->ia_type)) {
+ itable = loc->inode->table;
+ if (!itable) {
+ op_errno = EINVAL;
+ goto err;
}
-unlock:
- UNLOCK (&frame->lock);
+ loc = &local->loc2;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (statfs, frame, local->op_ret, local->op_errno,
- &local->statvfs, xdata);
+ inode = inode_find(itable, root_gfid);
+ if (!inode) {
+ op_errno = EINVAL;
+ goto err;
+ }
- return 0;
-}
+ dht_build_root_loc(inode, &newloc);
+ loc = &newloc;
+ }
+ local->call_cnt = conf->subvolume_cnt;
-int
-dht_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (this->private, err);
-
- conf = this->private;
-
- local = dht_local_init (frame, NULL, NULL, GF_FOP_STATFS);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND(frame, dht_statfs_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->statfs, loc, xdata);
+ }
+ return 0;
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_cnt = conf->subvolume_cnt;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_statfs_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs, loc,
- xdata);
- }
- return 0;
- }
+ return 0;
+}
- subvol = dht_subvol_get_cached (this, loc->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
+int
+dht_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int ret = 0;
+ gf_boolean_t new_xdata = _gf_false;
+ xlator_t **subvolumes = NULL;
+ int call_count = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, loc, fd, GF_FOP_OPENDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ local->first_up_subvol = dht_first_up_subvol(this);
+
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ new_xdata = _gf_true;
+ }
+
+ ret = dict_set_uint32(xdata, conf->link_xattr_name, 256);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value : key = %s",
+ conf->link_xattr_name);
+
+ /* dht_readdirp will wind to all subvols so open has to be sent to
+ * all subvols whether or not conf->local_subvols is set */
+
+ call_count = local->call_cnt = conf->subvolume_cnt;
+ subvolumes = conf->subvolumes;
+
+ /* In case of parallel-readdir, the readdir-ahead will be loaded
+ * below dht, in this case, if we want to enable or disable SKIP_DIRs
+ * it has to be done in opendir, so that prefetching logic in
+ * readdir-ahead, honors it */
+ for (i = 0; i < call_count; i++) {
+ if (conf->readdir_optimize == _gf_true) {
+ if (subvolumes[i] != local->first_up_subvol) {
+ ret = dict_set_int32(xdata, GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary"
+ " value :key = %s, ret:%d",
+ GF_READDIR_SKIP_DIRS, ret);
+ }
}
- local->call_cnt = 1;
+ STACK_WIND_COOKIE(frame, dht_fd_cbk, subvolumes[i], subvolumes[i],
+ subvolumes[i]->fops->opendir, loc, fd, xdata);
+ dict_del(xdata, GF_READDIR_SKIP_DIRS);
+ }
- STACK_WIND (frame, dht_statfs_cbk,
- subvol, subvol->fops->statfs, loc, xdata);
+ if (new_xdata)
+ dict_unref(xdata);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(opendir, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
+/* dht_readdirp_cbk creates a new dentry and dentry->inode is not assigned.
+ This functions assigns an inode if all of the following conditions are
+ true:
-int
-dht_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
- dict_t *xdata)
+ * DHT has only one child. In this case the entire layout is present on
+ this single child and hence we can set complete layout in inode.
+ * backend has complete layout and there are no anomalies in it and from
+ this information layout can be constructed and set in inode.
+*/
+
+static void
+dht_populate_inode_for_dentry(xlator_t *this, xlator_t *subvol,
+ gf_dirent_t *entry, gf_dirent_t *orig_entry)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
+ dht_layout_t *layout = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+
+ if (gf_uuid_is_null(orig_entry->d_stat.ia_gfid)) {
+ /* this skips the '..' entry for the root of the volume */
+ return;
+ }
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (this->private, err);
+ gf_uuid_copy(loc.gfid, orig_entry->d_stat.ia_gfid);
+ loc.inode = inode_ref(orig_entry->inode);
- conf = this->private;
+ if (is_revalidate(&loc)) {
+ goto out;
+ }
- local = dht_local_init (frame, loc, fd, GF_FOP_OPENDIR);
- if (!local) {
- op_errno = ENOMEM;
+ layout = dht_layout_new(this, 1);
+ if (!layout)
+ goto out;
- goto err;
+ ret = dht_layout_merge(this, layout, subvol, 0, 0, orig_entry->dict);
+ if (!ret) {
+ ret = dht_layout_normalize(this, &loc, layout);
+ if (ret == 0) {
+ dht_layout_set(this, orig_entry->inode, layout);
+ entry->inode = inode_ref(orig_entry->inode);
+ layout = NULL;
}
+ }
- local->call_cnt = conf->subvolume_cnt;
+ if (layout)
+ dht_layout_unref(this, layout);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_fd_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- loc, fd, xdata);
- }
+out:
+ loc_wipe(&loc);
+ return;
+}
- return 0;
+/* Posix returns op_errno = ENOENT to indicate that there are no more
+ * entries
+ */
+static int
+dht_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *orig_entry = NULL;
+ gf_dirent_t *entry = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *next_subvol = NULL;
+ off_t next_offset = 0;
+ int count = 0;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+ xlator_t *subvol = 0;
+ xlator_t *hashed_subvol = 0;
+ int ret = 0;
+ int readdir_optimize = 0;
+ inode_table_t *itable = NULL;
+ inode_t *inode = NULL;
+ gf_boolean_t skip_hashed_check = _gf_false;
+
+ INIT_LIST_HEAD(&entries.list);
+
+ prev = cookie;
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO(this->name, local->fd, unwind);
+
+ itable = local->fd->inode->table;
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, unwind);
+
+ methods = &(conf->methods);
+
+ if (op_ret <= 0) {
+ goto done;
+ }
+
+ /* Why aren't we skipping DHT entirely in case of a single subvol?
+ * Because if this was a larger volume earlier and all but one subvol
+ * was removed, there might be stale linkto files on the subvol.
+ */
+ if (conf->subvolume_cnt == 1) {
+ /* return all directory and file entries except
+ * linkto files for a single child DHT
+ */
+ skip_hashed_check = _gf_true;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
+ if (!local->layout)
+ local->layout = dht_layout_get(this, local->fd->inode);
- return 0;
-}
+ layout = local->layout;
+ /* This will skip the entries on the subvol without a layout,
+ * hence preventing the crash but rmdir might fail with
+ * "directory not empty" errors*/
-int
-dht_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- call_frame_t *prev = NULL;
- xlator_t *next_subvol = NULL;
- off_t next_offset = 0;
- int count = 0;
- dht_layout_t *layout = 0;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = 0;
- int ret = 0;
-
- INIT_LIST_HEAD (&entries.list);
- prev = cookie;
- local = frame->local;
- conf = this->private;
+ if (layout == NULL)
+ goto done;
- if (op_ret < 0)
- goto done;
+ if (conf->readdir_optimize == _gf_true)
+ readdir_optimize = 1;
- if (!local->layout)
- local->layout = dht_layout_get (this, local->fd->inode);
+ gf_msg_debug(this->name, 0, "Processing entries from %s", prev->name);
- layout = local->layout;
+ list_for_each_entry(orig_entry, (&orig_entries->list), list)
+ {
+ next_offset = orig_entry->d_off;
- list_for_each_entry (orig_entry, (&orig_entries->list), list) {
- next_offset = orig_entry->d_off;
- if ((check_is_dir (NULL, (&orig_entry->d_stat), NULL) &&
- (prev->this != dht_first_up_subvol (this))) ||
- check_is_linkfile (NULL, (&orig_entry->d_stat),
- orig_entry->dict)) {
- continue;
- }
+ gf_msg_debug(this->name, 0, "%s: entry = %s, type = %d", prev->name,
+ orig_entry->d_name, orig_entry->d_type);
- entry = gf_dirent_for_name (orig_entry->d_name);
- if (!entry) {
+ if (IA_ISINVAL(orig_entry->d_stat.ia_type)) {
+ /*stat failed somewhere- display this entry but the data may
+ * be inaccurate.
+ */
+ gf_msg_debug(this->name, EINVAL, "Invalid stat for %s (gfid %s)",
+ orig_entry->d_name,
+ uuid_utoa(orig_entry->d_stat.ia_gfid));
+ }
- goto unwind;
- }
+ if (check_is_linkfile(NULL, (&orig_entry->d_stat), orig_entry->dict,
+ conf->link_xattr_name)) {
+ gf_msg_debug(this->name, 0, "%s: %s is a linkto file", prev->name,
+ orig_entry->d_name);
+ continue;
+ }
- /* Do this if conf->search_unhashed is set to "auto" */
- if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) {
- subvol = dht_layout_search (this, layout,
- orig_entry->d_name);
- if (!subvol || (subvol != prev->this)) {
- /* TODO: Count the number of entries which need
- linkfile to prove its existence in fs */
- layout->search_unhashed++;
- }
- }
+ if (skip_hashed_check) {
+ goto list;
+ }
- dht_itransform (this, prev->this, orig_entry->d_off,
- &entry->d_off);
-
- entry->d_stat = orig_entry->d_stat;
- entry->d_ino = orig_entry->d_ino;
- entry->d_type = orig_entry->d_type;
- entry->d_len = orig_entry->d_len;
-
- if (orig_entry->dict)
- entry->dict = dict_ref (orig_entry->dict);
-
- /* making sure we set the inode ctx right with layout,
- currently possible only for non-directories, so for
- directories don't set entry inodes */
- if (!IA_ISDIR(entry->d_stat.ia_type)) {
- ret = dht_layout_preset (this, prev->this,
- orig_entry->inode);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to link the layout in inode");
- entry->inode = inode_ref (orig_entry->inode);
+ if (check_is_dir(NULL, (&orig_entry->d_stat), NULL)) {
+ /*Directory entries filtering :
+ * a) If rebalance is running, pick from first_up_subvol
+ * b) (rebalance not running)hashed subvolume is NULL or
+ * down then filter in first_up_subvolume. Other wise the
+ * corresponding hashed subvolume will take care of the
+ * directory entry.
+ */
+ if (readdir_optimize) {
+ if (prev == local->first_up_subvol)
+ goto list;
+ else
+ continue;
+ }
+
+ hashed_subvol = methods->layout_search(this, layout,
+ orig_entry->d_name);
+
+ if (prev == hashed_subvol)
+ goto list;
+ if ((hashed_subvol && dht_subvol_status(conf, hashed_subvol)) ||
+ (prev != local->first_up_subvol))
+ continue;
+
+ goto list;
+ }
+
+ list:
+ entry = gf_dirent_for_name(orig_entry->d_name);
+ if (!entry) {
+ goto unwind;
+ }
+
+ /* Do this if conf->search_unhashed is set to "auto" */
+ if (conf->search_unhashed == GF_DHT_LOOKUP_UNHASHED_AUTO) {
+ subvol = methods->layout_search(this, layout, orig_entry->d_name);
+ if (!subvol || (subvol != prev)) {
+ /* TODO: Count the number of entries which need
+ linkfile to prove its existence in fs */
+ layout->search_unhashed++;
+ }
+ }
+
+ entry->d_off = orig_entry->d_off;
+ entry->d_stat = orig_entry->d_stat;
+ entry->d_ino = orig_entry->d_ino;
+ entry->d_type = orig_entry->d_type;
+ entry->d_len = orig_entry->d_len;
+
+ if (orig_entry->dict)
+ entry->dict = dict_ref(orig_entry->dict);
+
+ /* making sure we set the inode ctx right with layout,
+ currently possible only for non-directories, so for
+ directories don't set entry inodes */
+ if (IA_ISDIR(entry->d_stat.ia_type)) {
+ entry->d_stat.ia_blocks = DHT_DIR_STAT_BLOCKS;
+ entry->d_stat.ia_size = DHT_DIR_STAT_SIZE;
+ if (orig_entry->inode) {
+ dht_inode_ctx_time_update(orig_entry->inode, this,
+ &entry->d_stat, 1);
+
+ if (conf->subvolume_cnt == 1) {
+ dht_populate_inode_for_dentry(this, prev, entry,
+ orig_entry);
}
-
- list_add_tail (&entry->list, &entries.list);
- count++;
+ }
+ } else {
+ if (orig_entry->dict &&
+ dict_get(orig_entry->dict, conf->link_xattr_name)) {
+ /* Strip out the S and T flags set by rebalance*/
+ DHT_STRIP_PHASE1_FLAGS(&entry->d_stat);
+ }
+
+ if (orig_entry->inode) {
+ ret = dht_layout_preset(this, prev, orig_entry->inode);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SET_FAILED,
+ "failed to link the layout "
+ "in inode for %s",
+ orig_entry->d_name);
+
+ entry->inode = inode_ref(orig_entry->inode);
+ } else if (itable) {
+ /*
+ * orig_entry->inode might be null if any upper
+ * layer xlators below client set to null, to
+ * force a lookup on the inode even if the inode
+ * is present in the inode table. In that case
+ * we just update the ctx to make sure we didn't
+ * missed anything.
+ */
+ inode = inode_find(itable, orig_entry->d_stat.ia_gfid);
+ if (inode) {
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SET_FAILED,
+ "failed to link the layout"
+ " in inode for %s",
+ orig_entry->d_name);
+ inode_unref(inode);
+ inode = NULL;
+ }
+ }
}
- op_ret = count;
- /* We need to ensure that only the last subvolume's end-of-directory
- * notification is respected so that directory reading does not stop
- * before all subvolumes have been read. That could happen because the
- * posix for each subvolume sends a ENOENT on end-of-directory but in
- * distribute we're not concerned only with a posix's view of the
- * directory but the aggregated namespace' view of the directory.
- */
- if (prev->this != dht_last_up_subvol (this))
- op_errno = 0;
+
+ gf_msg_debug(this->name, 0, "%s: Adding entry = %s", prev->name,
+ entry->d_name);
+
+ list_add_tail(&entry->list, &entries.list);
+ count++;
+ }
done:
- if (count == 0) {
- /* non-zero next_offset means that
- EOF is not yet hit on the current subvol
- */
- if (next_offset == 0) {
- next_subvol = dht_subvol_next (this, prev->this);
- } else {
- next_subvol = prev->this;
- }
- if (!next_subvol) {
- goto unwind;
- }
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ * Possible values:
+ * op_ret == 0 and op_errno != 0
+ * if op_errno != ENOENT : Error.Unwind.
+ * if op_errno == ENOENT : There are no more entries on this subvol.
+ * Move to the next one.
+ * op_ret > 0 and count == 0 :
+ * The subvol returned entries to dht but all were stripped out.
+ * For example, if they were linkto files or dirs where
+ * hashed_subvol != prev. Try to get some entries by winding
+ * to the next subvol. This can be dangerous if parallel readdir
+ * is enabled as it grows the stack.
+ *
+ * op_ret > 0 and count > 0:
+ * We found some entries. Unwind even if the buffer is not full.
+ *
+ */
+
+ op_ret = count;
+ if (count == 0) {
+ /* non-zero next_offset means that
+ * EOF is not yet hit on the current subvol
+ */
+ if ((next_offset == 0) || (op_errno == ENOENT)) {
+ next_offset = 0;
+ next_subvol = dht_subvol_next(this, prev);
+ } else {
+ next_subvol = prev;
+ }
- STACK_WIND (frame, dht_readdirp_cbk,
- next_subvol, next_subvol->fops->readdirp,
- local->fd, local->size, next_offset,
- local->xattr);
- return 0;
+ if (!next_subvol) {
+ goto unwind;
}
+ if (conf->readdir_optimize == _gf_true) {
+ if (next_subvol != local->first_up_subvol) {
+ ret = dict_set_int32(local->xattr, GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value"
+ ":key = %s",
+ GF_READDIR_SKIP_DIRS);
+ } else {
+ dict_del(local->xattr, GF_READDIR_SKIP_DIRS);
+ }
+ }
+
+ STACK_WIND_COOKIE(frame, dht_readdirp_cbk, next_subvol, next_subvol,
+ next_subvol->fops->readdirp, local->fd, local->size,
+ next_offset, local->xattr);
+ return 0;
+ }
+
unwind:
- if (op_ret < 0)
- op_ret = 0;
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ */
+ if (op_ret < 0)
+ op_ret = 0;
+
+ if (prev != dht_last_up_subvol(this))
+ op_errno = 0;
+
+ DHT_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL);
+
+ gf_dirent_free(&entries);
+ return 0;
+}
- DHT_STACK_UNWIND (readdirp, frame, op_ret, op_errno, &entries, NULL);
+static int
+dht_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *orig_entries, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *orig_entry = NULL;
+ gf_dirent_t *entry = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *next_subvol = NULL;
+ off_t next_offset = 0;
+ int count = 0;
+ dht_layout_t *layout = 0;
+ xlator_t *subvol = 0;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+ gf_boolean_t skip_hashed_check = _gf_false;
- gf_dirent_free (&entries);
+ INIT_LIST_HEAD(&entries.list);
- return 0;
-}
+ prev = cookie;
+ local = frame->local;
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, done);
+ methods = &(conf->methods);
-int
-dht_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *orig_entries,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- call_frame_t *prev = NULL;
- xlator_t *next_subvol = NULL;
- off_t next_offset = 0;
- int count = 0;
- dht_layout_t *layout = 0;
- xlator_t *subvol = 0;
-
- INIT_LIST_HEAD (&entries.list);
- prev = cookie;
- local = frame->local;
+ if (op_ret <= 0)
+ goto done;
- if (op_ret < 0)
- goto done;
+ if (!local->layout)
+ local->layout = dht_layout_get(this, local->fd->inode);
- if (!local->layout)
- local->layout = dht_layout_get (this, local->fd->inode);
+ layout = local->layout;
- layout = local->layout;
+ gf_msg_debug(this->name, 0, "Processing entries from %s", prev->name);
- list_for_each_entry (orig_entry, (&orig_entries->list), list) {
- next_offset = orig_entry->d_off;
+ if (conf->subvolume_cnt == 1) {
+ /*return everything*/
+ skip_hashed_check = _gf_true;
+ count = op_ret;
+ goto done;
+ }
- subvol = dht_layout_search (this, layout, orig_entry->d_name);
+ list_for_each_entry(orig_entry, (&orig_entries->list), list)
+ {
+ next_offset = orig_entry->d_off;
- if (!subvol || (subvol == prev->this)) {
- entry = gf_dirent_for_name (orig_entry->d_name);
- if (!entry) {
- gf_log (this->name, GF_LOG_ERROR,
- "memory allocation failed :(");
- goto unwind;
- }
+ gf_msg_debug(this->name, 0, "%s: entry = %s, type = %d", prev->name,
+ orig_entry->d_name, orig_entry->d_type);
- dht_itransform (this, prev->this, orig_entry->d_off,
- &entry->d_off);
+ subvol = methods->layout_search(this, layout, orig_entry->d_name);
- entry->d_ino = orig_entry->d_ino;
- entry->d_type = orig_entry->d_type;
- entry->d_len = orig_entry->d_len;
+ if (!subvol || (subvol == prev)) {
+ entry = gf_dirent_for_name(orig_entry->d_name);
+ if (!entry) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "Memory allocation failed ");
+ goto unwind;
+ }
- list_add_tail (&entry->list, &entries.list);
- count++;
- }
- }
- op_ret = count;
- /* We need to ensure that only the last subvolume's end-of-directory
- * notification is respected so that directory reading does not stop
- * before all subvolumes have been read. That could happen because the
- * posix for each subvolume sends a ENOENT on end-of-directory but in
- * distribute we're not concerned only with a posix's view of the
- * directory but the aggregated namespace' view of the directory.
- */
- if (prev->this != dht_last_up_subvol (this))
- op_errno = 0;
+ entry->d_off = orig_entry->d_off;
+ entry->d_ino = orig_entry->d_ino;
+ entry->d_type = orig_entry->d_type;
+ entry->d_len = orig_entry->d_len;
+ gf_msg_debug(this->name, 0, "%s: Adding = entry %s", prev->name,
+ entry->d_name);
+
+ list_add_tail(&entry->list, &entries.list);
+ count++;
+ }
+ }
done:
- if (count == 0) {
- /* non-zero next_offset means that
- EOF is not yet hit on the current subvol
- */
- if (next_offset == 0) {
- next_subvol = dht_subvol_next (this, prev->this);
+ op_ret = count;
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ */
+ if (count == 0) {
+ if ((next_offset == 0) || (op_errno == ENOENT)) {
+ next_offset = 0;
+ next_subvol = dht_subvol_next(this, prev);
+ } else {
+ next_subvol = prev;
+ }
+
+ if (!next_subvol) {
+ goto unwind;
+ }
+
+ STACK_WIND_COOKIE(frame, dht_readdir_cbk, next_subvol, next_subvol,
+ next_subvol->fops->readdir, local->fd, local->size,
+ next_offset, NULL);
+ return 0;
+ }
+
+unwind:
+ /* We need to ensure that only the last subvolume's end-of-directory
+ * notification is respected so that directory reading does not stop
+ * before all subvolumes have been read. That could happen because the
+ * posix for each subvolume sends a ENOENT on end-of-directory but in
+ * distribute we're not concerned only with a posix's view of the
+ * directory but the aggregated namespace' view of the directory.
+ */
+
+ if (prev != dht_last_up_subvol(this))
+ op_errno = 0;
+
+ if (!skip_hashed_check) {
+ DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL);
+ gf_dirent_free(&entries);
+
+ } else {
+ DHT_STACK_UNWIND(readdir, frame, op_ret, op_errno, orig_entries, NULL);
+ }
+ return 0;
+}
+
+static int
+dht_do_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, int whichop, dict_t *dict)
+{
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ xlator_t *xvol = NULL;
+ int ret = 0;
+ dht_conf_t *conf = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, NULL, NULL, whichop);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->fd = fd_ref(fd);
+ local->size = size;
+ local->xattr_req = (dict) ? dict_ref(dict) : NULL;
+ local->first_up_subvol = dht_first_up_subvol(this);
+ local->op_ret = -1;
+
+ dht_deitransform(this, yoff, &xvol);
+
+ /* TODO: do proper readdir */
+ if (whichop == GF_FOP_READDIRP) {
+ if (dict)
+ local->xattr = dict_ref(dict);
+ else
+ local->xattr = dict_new();
+
+ if (local->xattr) {
+ ret = dict_set_uint32(local->xattr, conf->link_xattr_name, 256);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value"
+ " : key = %s",
+ conf->link_xattr_name);
+
+ if (conf->readdir_optimize == _gf_true) {
+ if (xvol != local->first_up_subvol) {
+ ret = dict_set_int32(local->xattr, GF_READDIR_SKIP_DIRS, 1);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "dictionary value: "
+ "key = %s",
+ GF_READDIR_SKIP_DIRS);
} else {
- next_subvol = prev->this;
+ dict_del(local->xattr, GF_READDIR_SKIP_DIRS);
}
-
- if (!next_subvol) {
- goto unwind;
+ }
+
+ if (conf->subvolume_cnt == 1) {
+ ret = dict_set_uint32(local->xattr, conf->xattr_name, 4 * 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary "
+ "value:key = %s ",
+ conf->xattr_name);
}
-
- STACK_WIND (frame, dht_readdir_cbk,
- next_subvol, next_subvol->fops->readdir,
- local->fd, local->size, next_offset, NULL);
- return 0;
+ }
}
-unwind:
- if (op_ret < 0)
- op_ret = 0;
+ STACK_WIND_COOKIE(frame, dht_readdirp_cbk, xvol, xvol,
+ xvol->fops->readdirp, fd, size, yoff, local->xattr);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_readdir_cbk, xvol, xvol,
+ xvol->fops->readdir, fd, size, yoff, local->xattr);
+ }
- DHT_STACK_UNWIND (readdir, frame, op_ret, op_errno, &entries, NULL);
+ return 0;
- gf_dirent_free (&entries);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
int
-dht_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, int whichop, dict_t *dict)
-{
- dht_local_t *local = NULL;
- int op_errno = -1;
- xlator_t *xvol = NULL;
- off_t xoff = 0;
- int ret = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, NULL, whichop);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+dht_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, dict_t *xdata)
+{
+ int op = GF_FOP_READDIR;
+ dht_conf_t *conf = NULL;
+ int i = 0;
- local->fd = fd_ref (fd);
- local->size = size;
- local->xattr_req = (dict)? dict_ref (dict) : NULL;
+ conf = this->private;
+ if (!conf)
+ goto out;
- dht_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ op = GF_FOP_READDIRP;
+ break;
+ }
+ }
- /* TODO: do proper readdir */
- if (whichop == GF_FOP_READDIRP) {
- if (dict)
- local->xattr = dict_ref (dict);
- else
- local->xattr = dict_new ();
+ if (conf->use_readdirp)
+ op = GF_FOP_READDIRP;
- if (local->xattr) {
- ret = dict_set_uint32 (local->xattr,
- "trusted.glusterfs.dht.linkto",
- 256);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set 'glusterfs.dht.linkto'"
- " key");
- }
+out:
+ dht_do_readdir(frame, this, fd, size, yoff, op, 0);
+ return 0;
+}
- STACK_WIND (frame, dht_readdirp_cbk, xvol, xvol->fops->readdirp,
- fd, size, xoff, local->xattr);
- } else {
- STACK_WIND (frame, dht_readdir_cbk, xvol, xvol->fops->readdir,
- fd, size, xoff, local->xattr);
- }
+int
+dht_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t yoff, dict_t *dict)
+{
+ dht_do_readdir(frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
+ return 0;
+}
- return 0;
+static int
+dht_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
+ local = frame->local;
- return 0;
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1)
+ local->op_errno = op_errno;
+ else if (op_ret == 0)
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt))
+ DHT_STACK_UNWIND(fsyncdir, frame, local->op_ret, local->op_errno,
+ xdata);
+
+ return 0;
}
-
int
-dht_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *xdata)
+dht_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
{
- int op = GF_FOP_READDIR;
- dht_conf_t *conf = NULL;
- int i = 0;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
- conf = this->private;
- if (!conf)
- goto out;
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(this->private, err);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->subvolume_status[i]) {
- op = GF_FOP_READDIRP;
- break;
- }
- }
+ conf = this->private;
- if (conf->use_readdirp)
- op = GF_FOP_READDIRP;
+ local = dht_local_init(frame, NULL, NULL, GF_FOP_FSYNCDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
-out:
- dht_do_readdir (frame, this, fd, size, yoff, op, 0);
- return 0;
+ local->fd = fd_ref(fd);
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND(frame, dht_fsyncdir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->fsyncdir, fd, datasync, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fsyncdir, frame, -1, op_errno, NULL);
+
+ return 0;
}
int
-dht_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, dict_t *dict)
+dht_newfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- dht_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP, dict);
- return 0;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+ if (op_ret == -1)
+ goto out;
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ prev = cookie;
+
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0);
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
+
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret < 0) {
+ gf_msg_debug(this->name, EINVAL,
+ "could not set pre-set layout for subvolume %s",
+ prev ? prev->name : NULL);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ if (local->linked == _gf_true)
+ dht_linkfile_attr_heal(frame, this);
+out:
+ /*
+ * FIXME: ia_size and st_blocks of preparent and postparent do not have
+ * correct values. since, preparent and postparent buffers correspond
+ * to a directory these two members should have values equal to sum of
+ * corresponding values from each of the subvolume.
+ * See dht_iatt_merge for reference.
+ */
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ dht_set_fixed_dir_stat(postparent);
+ dht_set_fixed_dir_stat(preparent);
+
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ /* store op_errno for failure case*/
+ local->op_errno = op_errno;
+ local->refresh_layout_unlock(frame, this, op_ret, 1);
+
+ if (op_ret == 0) {
+ DHT_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, stbuf,
+ preparent, postparent, xdata);
+ }
+ } else {
+ DHT_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, stbuf,
+ preparent, postparent, xdata);
+ }
+
+ return 0;
}
+static int
+dht_mknod_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ local = frame->local;
-int
-dht_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ conf = this->private;
+ if (!conf) {
+ local->op_errno = EINVAL;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ cached_subvol = local->cached_subvol;
+
+ if (local->params) {
+ dict_del(local->params, conf->link_xattr_name);
+ dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
+ }
+
+ STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)cached_subvol,
+ cached_subvol, cached_subvol->fops->mknod, &local->loc,
+ local->mode, local->rdev, local->umask, local->params);
+
+ return 0;
+err:
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ local->refresh_layout_unlock(frame, this, -1, 1);
+ } else {
+ DHT_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ }
+ return 0;
+}
+
+static int
+dht_mknod_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc, dev_t rdev,
+ mode_t mode, mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ xlator_t *avail_subvol = NULL;
+ local = frame->local;
- local = frame->local;
+ if (!dht_is_subvol_filled(this, subvol)) {
+ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+ subvol->name);
- LOCK (&frame->lock);
- {
- if (op_ret == -1)
- local->op_errno = op_errno;
+ STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask, params);
+ } else {
+ avail_subvol = dht_free_disk_available_subvol(this, subvol, local);
- if (op_ret == 0)
- local->op_ret = 0;
+ if (avail_subvol != subvol) {
+ local->params = dict_ref(params);
+ local->rdev = rdev;
+ local->mode = mode;
+ local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ local->hashed_subvol = subvol;
+
+ gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)",
+ loc->path, avail_subvol->name, subvol->name);
+
+ dht_linkfile_create(frame, dht_mknod_linkfile_create_cbk, this,
+ avail_subvol, subvol, loc);
+
+ goto out;
}
- UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno, xdata);
+ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+ subvol->name);
+
+ STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask, params);
+ }
+out:
+ return 0;
+}
+
+static int32_t
+dht_mknod_do(call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *refreshed = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+
+ local = frame->local;
+
+ this = THIS;
+
+ conf = this->private;
+
+ GF_VALIDATE_OR_GOTO(this->name, conf, err);
+
+ methods = &(conf->methods);
+
+ /* We don't need parent_loc anymore */
+ loc_wipe(&local->loc);
+
+ loc_copy(&local->loc, &local->loc2);
+
+ loc_wipe(&local->loc2);
+ refreshed = local->selfheal.refreshed_layout;
+
+ subvol = methods->layout_search(this, refreshed, local->loc.name);
+
+ if (!subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "no subvolume in "
+ "layout for path=%s",
+ local->loc.path);
+ local->op_errno = ENOENT;
+ goto err;
+ }
+
+ dht_mknod_wind_to_avail_subvol(frame, this, subvol, &local->loc,
+ local->rdev, local->mode, local->umask,
+ local->params);
+ return 0;
+err:
+ local->refresh_layout_unlock(frame, this, -1, 1);
+
+ return 0;
+}
+
+static int32_t
+dht_mknod_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
+
+static int32_t
+dht_mknod_finish(call_frame_t *frame, xlator_t *this, int op_ret,
+ int invoke_cbk)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
+
+ local = frame->local;
+ lock_count = dht_lock_count(local->lock[0].layout.parent_layout.locks,
+ local->lock[0].layout.parent_layout.lk_count);
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL) {
+ goto done;
+ }
+
+ lock_local = dht_local_init(lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL) {
+ goto done;
+ }
+
+ lock_local->lock[0]
+ .layout.parent_layout.locks = local->lock[0].layout.parent_layout.locks;
+ lock_local->lock[0].layout.parent_layout.lk_count =
+ local->lock[0].layout.parent_layout.lk_count;
+
+ local->lock[0].layout.parent_layout.locks = NULL;
+ local->lock[0].layout.parent_layout.lk_count = 0;
+
+ dht_unlock_inodelk(lock_frame,
+ lock_local->lock[0].layout.parent_layout.locks,
+ lock_local->lock[0].layout.parent_layout.lk_count,
+ dht_mknod_unlock_cbk);
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+
+ if (op_ret == 0)
return 0;
+
+ DHT_STACK_UNWIND(mknod, frame, op_ret, local->op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
}
+static int32_t
+dht_mknod_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
-int
-dht_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int datasync, dict_t *xdata)
+ local = frame->local;
+
+ if (!local) {
+ goto err;
+ }
+
+ if (op_ret < 0) {
+ gf_msg("DHT", GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "mknod lock failed for file: %s", local->loc2.name);
+
+ local->op_errno = op_errno;
+
+ goto err;
+ }
+
+ local->refresh_layout_unlock = dht_mknod_finish;
+
+ local->refresh_layout_done = dht_mknod_do;
+
+ dht_refresh_layout(frame);
+
+ return 0;
+err:
+ if (local)
+ dht_mknod_finish(frame, this, -1, 0);
+ else
+ DHT_STACK_UNWIND(mknod, frame, -1, EINVAL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t
+dht_mknod_lock(call_frame_t *frame, xlator_t *subvol)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1;
+ dht_lock_t **lk_array = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (this->private, err);
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO(frame->this->name, frame->local, err);
- conf = this->private;
+ local = frame->local;
- local = dht_local_init (frame, NULL, NULL, GF_FOP_FSYNCDIR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
- local->fd = fd_ref (fd);
- local->call_cnt = conf->subvolume_cnt;
+ if (lk_array == NULL)
+ goto err;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_fsyncdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->fsyncdir,
- fd, datasync, xdata);
- }
+ lk_array[0] = dht_lock_new(frame->this, subvol, &local->loc, F_RDLCK,
+ DHT_LAYOUT_HEAL_DOMAIN, NULL,
+ IGNORE_ENOENT_ESTALE);
- return 0;
+ if (lk_array[0] == NULL)
+ goto err;
+
+ local->lock[0].layout.parent_layout.locks = lk_array;
+ local->lock[0].layout.parent_layout.lk_count = count;
+ ret = dht_blocking_inodelk(frame, lk_array, count, dht_mknod_lock_cbk);
+
+ if (ret < 0) {
+ local->lock[0].layout.parent_layout.locks = NULL;
+ local->lock[0].layout.parent_layout.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
+ if (lk_array != NULL) {
+ dht_lock_array_free(lk_array, count);
+ GF_FREE(lk_array);
+ }
- return 0;
+ return -1;
}
+static int
+dht_refresh_parent_layout_resume(call_frame_t *frame, xlator_t *this, int ret,
+ int invoke_cbk)
+{
+ dht_local_t *local = NULL, *parent_local = NULL;
+ call_stub_t *stub = NULL;
+ call_frame_t *parent_frame = NULL;
-int
-dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+ local = frame->local;
+
+ stub = local->stub;
+ local->stub = NULL;
+
+ parent_frame = stub->frame;
+ parent_local = parent_frame->local;
+
+ if (ret < 0) {
+ parent_local->op_ret = -1;
+ parent_local->op_errno = local->op_errno ? local->op_errno : EIO;
+ } else {
+ parent_local->op_ret = 0;
+ }
+
+ call_resume(stub);
+
+ DHT_STACK_DESTROY(frame);
+
+ return 0;
+}
+
+static int
+dht_refresh_parent_layout_done(call_frame_t *frame)
{
- call_frame_t *prev = NULL;
- int ret = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ int ret = 0;
+ local = frame->local;
- if (op_ret == -1)
- goto out;
+ if (local->op_ret < 0) {
+ ret = -1;
+ goto resume;
+ }
- local = frame->local;
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ dht_layout_set(frame->this, local->loc.inode,
+ local->selfheal.refreshed_layout);
- prev = cookie;
+resume:
+ dht_refresh_parent_layout_resume(frame, frame->this, ret, 1);
+ return 0;
+}
- if (local->loc.parent) {
- WIPE (preparent);
- WIPE (postparent);
- }
+static int
+dht_handle_parent_layout_change(xlator_t *this, call_stub_t *stub)
+{
+ call_frame_t *refresh_frame = NULL, *frame = NULL;
+ dht_local_t *refresh_local = NULL, *local = NULL;
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
-out:
- /*
- * FIXME: ia_size and st_blocks of preparent and postparent do not have
- * correct values. since, preparent and postparent buffers correspond
- * to a directory these two members should have values equal to sum of
- * corresponding values from each of the subvolume.
- * See dht_iatt_merge for reference.
- */
+ frame = stub->frame;
+ local = frame->local;
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, stbuf,
- preparent, postparent, xdata);
- return 0;
+ refresh_frame = copy_frame(frame);
+ if (!refresh_frame) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "mem allocation failed for refresh_frame");
+ return -1;
+ }
+
+ refresh_local = dht_local_init(refresh_frame, NULL, NULL, stub->fop);
+ if (!refresh_local) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "mem allocation failed for refresh_local");
+ return -1;
+ }
+
+ refresh_local->loc.inode = inode_ref(local->loc.parent);
+ gf_uuid_copy(refresh_local->loc.gfid, local->loc.parent->gfid);
+
+ refresh_local->stub = stub;
+
+ refresh_local->refresh_layout_unlock = dht_refresh_parent_layout_resume;
+ refresh_local->refresh_layout_done = dht_refresh_parent_layout_done;
+
+ dht_refresh_layout(refresh_frame);
+ return 0;
}
-int
-dht_mknod_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+static int32_t
+dht_call_mkdir_stub(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
+ dht_local_t *local = NULL;
+ call_stub_t *stub = NULL;
- if (op_ret == -1)
- goto err;
+ local = frame->local;
+ stub = local->stub;
+ local->stub = NULL;
- local = frame->local;
- cached_subvol = local->cached_subvol;
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ } else {
+ local->op_ret = 0;
+ }
- STACK_WIND (frame, dht_newfile_cbk,
- cached_subvol, cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev, local->umask,
- local->params);
+ call_resume(stub);
- return 0;
+ return 0;
+}
+
+static int32_t
+dht_guard_parent_layout_and_namespace(xlator_t *subvol, call_stub_t *stub)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ loc_t *loc = NULL;
+ xlator_t *hashed_subvol = NULL, *this = NULL;
+ ;
+ call_frame_t *frame = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int32_t *parent_disk_layout = NULL;
+ dht_layout_t *parent_layout = NULL;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", stub, err);
+
+ frame = stub->frame;
+ this = frame->this;
+
+ conf = this->private;
+
+ local = frame->local;
+
+ local->stub = stub;
+
+ /* TODO: recheck whether we should lock on src or dst if we do similar
+ * stale layout checks for rename.
+ */
+ loc = &stub->args.loc;
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ if (local->params == NULL) {
+ local->params = dict_new();
+ if (local->params == NULL) {
+ local->op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "dict allocation failed",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "hashed subvolume not found",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ parent_layout = dht_layout_get(this, loc->parent);
+
+ ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol,
+ &parent_disk_layout);
+ if (ret == -1) {
+ local->op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "extracting in-memory layout of parent failed. ",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ memcpy((void *)local->parent_disk_layout, (void *)parent_disk_layout,
+ sizeof(local->parent_disk_layout));
+
+ dht_layout_unref(this, parent_layout);
+ parent_layout = NULL;
+
+ ret = dict_set_str(local->params, GF_PREOP_PARENT_KEY, conf->xattr_name);
+ if (ret < 0) {
+ local->op_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting %s key in params dictionary failed. ",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path,
+ GF_PREOP_PARENT_KEY);
+ goto err;
+ }
+
+ ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout,
+ 4 * 4);
+ if (ret < 0) {
+ local->op_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting parent-layout in params dictionary failed. ",
+ gf_fop_list[stub->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ parent_disk_layout = NULL;
+ local->hashed_subvol = hashed_subvol;
+
+ local->current = &local->lock[0];
+ ret = dht_protect_namespace(frame, loc, hashed_subvol, &local->current->ns,
+ dht_call_mkdir_stub);
+ if (ret < 0)
+ goto err;
+
+ return 0;
err:
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL);
- return 0;
+
+ if (parent_disk_layout != NULL)
+ GF_FREE(parent_disk_layout);
+
+ if (parent_layout != NULL)
+ dht_layout_unref(this, parent_layout);
+
+ return -1;
}
int
-dht_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
+dht_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *params)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- xlator_t *avail_subvol = NULL;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ int i = 0;
+ int ret = 0;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_MKNOD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = EIO;
+ goto err;
+ }
+
+ /* Post remove-brick, the client layout may not be in sync with
+ * disk layout because of lack of lookup. Hence,a mknod call
+ * may fall on the decommissioned brick. Hence, if the
+ * hashed_subvol is part of decommissioned bricks list, do a
+ * lookup on parent dir. If a fix-layout is already done by the
+ * remove-brick process, the parent directory layout will be in
+ * sync with that of the disk. If fix-layout is still ending
+ * on the parent directory, we can let the file get created on
+ * the decommissioned brick which will be eventually migrated to
+ * non-decommissioned brick based on the new layout.
+ */
+
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] &&
+ conf->decommissioned_bricks[i] == subvol) {
+ gf_msg_debug(this->name, 0,
+ "hashed subvol:%s is "
+ "part of decommission brick list for "
+ "file: %s",
+ subvol->name, loc->path);
+
+ /* dht_refresh_layout needs directory info in
+ * local->loc. Hence, storing the parent_loc in
+ * local->loc and storing the create context in
+ * local->loc2. We will restore this information
+ * in dht_creation do */
+
+ ret = loc_copy(&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", loc->path);
+
+ goto err;
+ }
- dht_get_du_info (frame, this, loc);
+ local->params = dict_ref(params);
+ local->rdev = rdev;
+ local->mode = mode;
+ local->umask = umask;
- local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ loc_wipe(&local->loc);
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno);
- if (!dht_is_subvol_filled (this, subvol)) {
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED,
+ "parent loc build failed");
+ goto err;
+ }
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, umask, params);
- } else {
- avail_subvol = dht_free_disk_available_subvol (this, subvol);
- if (avail_subvol != subvol) {
- /* Choose the minimum filled volume, and create the
- files there */
-
- local->params = dict_ref (params);
- local->cached_subvol = avail_subvol;
- local->mode = mode;
- local->rdev = rdev;
- local->umask = umask;
- dht_linkfile_create (frame,
- dht_mknod_linkfile_create_cbk,
- avail_subvol, subvol, loc);
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ ret = dht_mknod_lock(frame, subvol);
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, umask, params);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto err;
}
+
+ goto done;
+ }
}
+ }
- return 0;
+ dht_mknod_wind_to_avail_subvol(frame, this, subvol, loc, rdev, mode, umask,
+ params);
+
+done:
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
+int
+dht_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *params)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_SYMLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = EIO;
+ goto err;
+ }
+
+ gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name);
+
+ STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->symlink, linkname, loc, umask, params);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
int
-dht_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkname, loc_t *loc, mode_t umask, dict_t *params)
+dht_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_UNLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+
+ cached_subvol = local->cached_subvol;
+ if (!cached_subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->flags = xflag;
+ STACK_WIND_COOKIE(frame, dht_unlink_cbk, cached_subvol, cached_subvol,
+ cached_subvol->fops->unlink, loc, xflag, xdata);
+
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ return 0;
+}
- local = dht_local_init (frame, loc, NULL, GF_FOP_SYMLINK);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+static int
+dht_remove_stale_linkto_cbk(int ret, call_frame_t *sync_frame, void *data)
+{
+ DHT_STACK_DESTROY(sync_frame);
+ return 0;
+}
+
+static int
+dht_remove_stale_linkto(void *data)
+{
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dict_t *xdata_in = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", local, out);
+ GF_VALIDATE_OR_GOTO("dht", local->link_subvol, out);
+
+ xdata_in = dict_new();
+ if (!xdata_in)
+ goto out;
+
+ ret = dht_fill_dict_to_avoid_unlink_of_migrating_file(xdata_in);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
+ "Failed to set keys for stale linkto"
+ "deletion on path %s",
+ local->loc.path);
+ goto out;
+ }
+
+ ret = syncop_unlink(local->link_subvol, &local->loc, xdata_in, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, 0,
+ "Removal of linkto failed"
+ " on path %s at subvol %s",
+ local->loc.path, local->link_subvol->name);
+ }
+out:
+ if (xdata_in)
+ dict_unref(xdata_in);
+ return ret;
+}
- subvol = dht_subvol_get_hashed (this, loc);
+static int
+dht_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ gf_boolean_t stbuf_merged = _gf_false;
+ xlator_t *subvol = NULL;
+ call_frame_t *cleanup_frame = NULL;
+ dht_local_t *cleanup_local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ /* Remove the linkto if exists */
+ if (local->linked) {
+ cleanup_frame = create_frame(this, this->ctx->pool);
+ if (cleanup_frame) {
+ cleanup_local = dht_local_init(cleanup_frame, &local->loc2,
+ NULL, 0);
+ if (!cleanup_local || !local->link_subvol) {
+ DHT_STACK_DESTROY(cleanup_frame);
+ goto out;
+ }
+ cleanup_local->link_subvol = local->link_subvol;
+ FRAME_SU_DO(cleanup_frame, dht_local_t);
+ ret = synctask_new(this->ctx->env, dht_remove_stale_linkto,
+ dht_remove_stale_linkto_cbk, cleanup_frame,
+ cleanup_frame);
+ }
+ }
+ /* No continuation on DHT inode missing errors, as we should
+ * then have a good stbuf that states P2 happened. We would
+ * get inode missing if, the file completed migrated between
+ * the lookup and the link call */
+ goto out;
+ }
+
+ /* Update parent on success, even if P1/2 checks are positive.
+ * The second call on success will further update the parent */
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0);
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
+
+ /* Update linkto attrs, if this is the first call and non-P2,
+ * if we detect P2 then we need to trust the attrs from the
+ * second call, not the first */
+ if (local->linked == _gf_true &&
+ ((local->call_cnt == 1 && !IS_DHT_MIGRATION_PHASE2(stbuf)) ||
+ (local->call_cnt != 1 && IS_DHT_MIGRATION_PHASE2(&local->stbuf)))) {
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ stbuf_merged = _gf_true;
+ dht_linkfile_attr_heal(frame, this);
+ }
+
+ /* No further P1/2 checks if we are in the second iteration of
+ * the call */
+ if (local->call_cnt != 1) {
+ goto out;
+ } else {
+ /* Preserve the return values, in case the migration decides
+ * to recreate the link on the same subvol that the current
+ * hased for the link was created on. */
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+ if (!stbuf_merged) {
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ stbuf_merged = _gf_true;
+ }
+
+ local->inode = inode_ref(inode);
+ }
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_link2;
+ dht_set_local_rebalance(this, local, stbuf, preparent, postparent, xdata);
+
+ /* Check if the rebalance phase2 is true */
+ if (IS_DHT_MIGRATION_PHASE2(stbuf)) {
+ ret = dht_inode_ctx_get_mig_info(this, local->loc.inode, NULL, &subvol);
if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ /* Phase 2 of migration */
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ dht_link2(this, subvol, frame, 0);
+ return 0;
+ }
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(stbuf)) {
+ ret = dht_inode_ctx_get_mig_info(this, local->loc.inode, NULL, &subvol);
+ if (subvol) {
+ dht_link2(this, subvol, frame, 0);
+ return 0;
+ }
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+out:
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ dht_set_fixed_dir_stat(preparent);
+ dht_set_fixed_dir_stat(postparent);
+ DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, NULL);
+
+ return 0;
+}
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->symlink,
- linkname, loc, umask, params);
+static int
+dht_link2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto err;
+
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
+
+ DHT_STACK_UNWIND(link, frame, local->op_ret, op_errno, local->inode,
+ &local->stbuf, &local->preparent, &local->postparent,
+ NULL);
+ return 0;
+ }
+
+ if (subvol == NULL) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* Second call to create link file could result in EEXIST as the
+ * first call created the linkto in the currently
+ * migrating subvol, which could be the new hashed subvol */
+ if (local->link_subvol == subvol) {
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
+ DHT_STACK_UNWIND(link, frame, 0, 0, local->inode, &local->stbuf,
+ &local->preparent, &local->postparent, NULL);
return 0;
+ }
+ local->call_cnt = 2;
+
+ STACK_WIND(frame, dht_link_cbk, subvol, subvol->fops->link, &local->loc,
+ &local->loc2, local->xattr_req);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (link, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
-int
-dht_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
+static int
+dht_link_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
-
- if (dht_filter_loc_subvol_key (this, loc, &local->loc,
- &cached_subvol)) {
- gf_log (this->name, GF_LOG_INFO,
- "unlinking %s on %s (given path %s)",
- local->loc.path, cached_subvol->name, loc->path);
- STACK_WIND (frame, dht_unlink_cbk,
- cached_subvol, cached_subvol->fops->unlink,
- &local->loc, xflag, xdata);
- goto done;
- }
+ dht_local_t *local = NULL;
+ xlator_t *srcvol = NULL;
- local = dht_local_init (frame, loc, NULL, GF_FOP_UNLINK);
- if (!local) {
- op_errno = ENOMEM;
+ if (op_ret == -1)
+ goto err;
- goto err;
- }
+ local = frame->local;
+ srcvol = local->linkfile.srcvol;
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ STACK_WIND(frame, dht_link_cbk, srcvol, srcvol->fops->link, &local->loc,
+ &local->loc2, local->xattr_req);
- cached_subvol = local->cached_subvol;
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ return 0;
- local->flags = xflag;
- if (hashed_subvol != cached_subvol) {
- STACK_WIND (frame, dht_unlink_linkfile_cbk,
- hashed_subvol, hashed_subvol->fops->unlink, loc,
- xflag, xdata);
- } else {
- STACK_WIND (frame, dht_unlink_cbk,
- cached_subvol, cached_subvol->fops->unlink, loc,
- xflag, xdata);
- }
-done:
- return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ dht_set_fixed_dir_stat(preparent);
+ dht_set_fixed_dir_stat(postparent);
+ DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, xdata);
- return 0;
+ return 0;
}
+int
+dht_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(oldloc, err);
+ VALIDATE_OR_GOTO(newloc, err);
+
+ local = dht_local_init(frame, oldloc, NULL, GF_FOP_LINK);
+ if (!local) {
+ op_errno = ENOMEM;
+
+ goto err;
+ }
+ local->call_cnt = 1;
+
+ cached_subvol = local->cached_subvol;
+ if (!cached_subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ oldloc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, newloc);
+ if (!hashed_subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ newloc->path);
+ op_errno = EIO;
+ goto err;
+ }
+
+ ret = loc_copy(&local->loc2, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (hashed_subvol != cached_subvol) {
+ gf_uuid_copy(local->gfid, oldloc->inode->gfid);
+ dht_linkfile_create(frame, dht_link_linkfile_cbk, this, cached_subvol,
+ hashed_subvol, newloc);
+ } else {
+ STACK_WIND(frame, dht_link_cbk, cached_subvol,
+ cached_subvol->fops->link, oldloc, newloc, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
int
-dht_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
{
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ gf_boolean_t parent_layout_changed = _gf_false;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ parent_layout_changed = (xdata &&
+ dict_get(xdata, GF_PREOP_CHECK_FAILED))
+ ? _gf_true
+ : _gf_false;
+
+ if (parent_layout_changed) {
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ /* Returning failure as the layout could not be fixed even under
+ * the lock */
+ goto out;
+ }
- prev = cookie;
+ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "create (%s/%s) (path: %s): parent layout "
+ "changed. Attempting a layout refresh and then a "
+ "retry",
+ pgfid, local->loc.name, local->loc.path);
+
+ /*
+ dht_refresh_layout needs directory info in local->loc.Hence,
+ storing the parent_loc in local->loc and storing the create
+ context in local->loc2. We will restore this information in
+ dht_creation_do.
+ */
+
+ loc_wipe(&local->loc2);
+
+ ret = loc_copy(&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", local->loc.path);
- if (op_ret == -1)
goto out;
+ }
- layout = dht_layout_for_subvol (this, prev->this);
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no pre-set layout for subvolume %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
+ loc_wipe(&local->loc);
+
+ ret = dht_build_parent_loc(this, &local->loc, &local->loc2,
+ &op_errno);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED,
+ "parent loc build failed");
goto out;
+ }
+
+ subvol = dht_subvol_get_hashed(this, &local->loc2);
+
+ ret = dht_create_lock(frame, subvol);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto out;
+ }
+
+ return 0;
}
- WIPE (preparent);
- WIPE (postparent);
+ goto out;
+ }
-out:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent, NULL);
+ prev = cookie;
- return 0;
-}
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this, preparent, 0);
+ dht_inode_ctx_time_update(local->loc.parent, this, postparent, 1);
+ }
-int
-dht_link_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- xlator_t *srcvol = NULL;
+ ret = dht_fd_ctx_set(this, fd, prev);
+ if (ret != 0) {
+ gf_msg_debug(this->name, 0,
+ "Possible fd leak. "
+ "Could not set fd ctx for subvol %s",
+ prev->name);
+ }
- if (op_ret == -1)
- goto err;
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret != 0) {
+ gf_msg_debug(this->name, 0, "could not set preset layout for subvol %s",
+ prev->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
- local = frame->local;
- srcvol = local->linkfile.srcvol;
+ local->op_errno = op_errno;
- STACK_WIND (frame, dht_link_cbk, srcvol, srcvol->fops->link,
- &local->loc, &local->loc2, xdata);
+ if (local->linked == _gf_true) {
+ local->stbuf = *stbuf;
+ dht_linkfile_attr_heal(frame, this);
+ }
+out:
- return 0;
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ dht_set_fixed_dir_stat(preparent);
+ dht_set_fixed_dir_stat(postparent);
-err:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (link, frame, op_ret, op_errno, inode, stbuf, preparent,
- postparent, NULL);
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ /* store op_errno for failure case*/
+ local->op_errno = op_errno;
+ local->refresh_layout_unlock(frame, this, op_ret, 1);
- return 0;
+ if (op_ret == 0) {
+ DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
+ preparent, postparent, xdata);
+ }
+ } else {
+ DHT_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
+ preparent, postparent, xdata);
+ }
+ return 0;
}
+static int
+dht_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *cached_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+ if (!local) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ conf = this->private;
+ if (!conf) {
+ local->op_errno = EINVAL;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ cached_subvol = local->cached_subvol;
+
+ if (local->params) {
+ dict_del(local->params, conf->link_xattr_name);
+ dict_del(local->params, GLUSTERFS_INTERNAL_FOP_KEY);
+ }
+
+ STACK_WIND_COOKIE(frame, dht_create_cbk, cached_subvol, cached_subvol,
+ cached_subvol->fops->create, &local->loc, local->flags,
+ local->mode, local->umask, local->fd, local->params);
+
+ return 0;
+err:
+ if (local && local->lock[0].layout.parent_layout.locks) {
+ local->refresh_layout_unlock(frame, this, -1, 1);
+ } else {
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ }
+ return 0;
+}
-int
-dht_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+static int
+dht_create_wind_to_avail_subvol(call_frame_t *frame, xlator_t *this,
+ xlator_t *subvol, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *params)
{
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- int op_errno = -1;
- int ret = -1;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *avail_subvol = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (oldloc, err);
- VALIDATE_OR_GOTO (newloc, err);
+ local = frame->local;
- local = dht_local_init (frame, oldloc, NULL, GF_FOP_LINK);
- if (!local) {
- op_errno = ENOMEM;
+ if (!dht_is_subvol_filled(this, subvol)) {
+ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+ subvol->name);
- goto err;
- }
+ dht_set_parent_layout_in_dict(loc, this, local);
- cached_subvol = local->cached_subvol;
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", oldloc->path);
- op_errno = EINVAL;
- goto err;
- }
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, loc, flags, mode, umask, fd,
+ params);
- hashed_subvol = dht_subvol_get_hashed (this, newloc);
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- newloc->path);
- op_errno = EINVAL;
- goto err;
- }
+ } else {
+ avail_subvol = dht_free_disk_available_subvol(this, subvol, local);
- ret = loc_copy (&local->loc2, newloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
+ if (avail_subvol != subvol) {
+ local->cached_subvol = avail_subvol;
+ local->hashed_subvol = subvol;
- if (hashed_subvol != cached_subvol) {
- uuid_copy (local->gfid, oldloc->inode->gfid);
- dht_linkfile_create (frame, dht_link_linkfile_cbk,
- cached_subvol, hashed_subvol, newloc);
- } else {
- STACK_WIND (frame, dht_link_cbk,
- cached_subvol, cached_subvol->fops->link,
- oldloc, newloc, xdata);
+ gf_msg_debug(this->name, 0, "creating %s on %s (link at %s)",
+ loc->path, avail_subvol->name, subvol->name);
+
+ dht_linkfile_create(frame, dht_create_linkfile_create_cbk, this,
+ avail_subvol, subvol, loc);
+
+ goto out;
}
- return 0;
+ gf_msg_debug(this->name, 0, "creating %s on %s", loc->path,
+ subvol->name);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ dht_set_parent_layout_in_dict(loc, this, local);
- return 0;
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, loc, flags, mode, umask, fd,
+ params);
+ }
+out:
+ return 0;
}
-
int
-dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t *op_errno)
{
- call_frame_t *prev = NULL;
- int ret = -1;
- dht_local_t *local = NULL;
+ inode_table_t *table = NULL;
+ int ret = -1;
- if (op_ret == -1)
- goto out;
+ if (!parent || !child) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
+ }
- local = frame->local;
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
+ if (child->parent) {
+ parent->inode = inode_ref(child->parent);
+ if (!parent->inode) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
}
- prev = cookie;
+ gf_uuid_copy(parent->gfid, child->pargfid);
- if (local->loc.parent) {
- WIPE (preparent);
- WIPE (postparent);
+ ret = 0;
+
+ goto out;
+ } else {
+ if (gf_uuid_is_null(child->pargfid)) {
+ if (op_errno)
+ *op_errno = EINVAL;
+ goto out;
}
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set preset layout for subvol %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
+ table = this->itable;
+
+ if (!table) {
+ if (op_errno) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+ }
+
+ parent->inode = inode_find(table, child->pargfid);
+
+ if (!parent->inode) {
+ if (op_errno) {
+ *op_errno = EINVAL;
goto out;
+ }
}
+ gf_uuid_copy(parent->gfid, child->pargfid);
+
+ ret = 0;
+ }
+
out:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, stbuf, preparent,
- postparent, NULL);
- return 0;
+ return ret;
}
-
-int
-dht_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+static int32_t
+dht_create_do(call_frame_t *frame)
{
- dht_local_t *local = NULL;
- xlator_t *cached_subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *refreshed = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
- if (op_ret == -1)
- goto err;
+ local = frame->local;
- local = frame->local;
- cached_subvol = local->cached_subvol;
+ this = THIS;
- STACK_WIND (frame, dht_create_cbk,
- cached_subvol, cached_subvol->fops->create,
- &local->loc, local->flags, local->mode,
- local->umask, local->fd, local->params);
+ conf = this->private;
- return 0;
+ GF_VALIDATE_OR_GOTO(this->name, conf, err);
+
+ methods = &(conf->methods);
+
+ /* We don't need parent_loc anymore */
+ loc_wipe(&local->loc);
+
+ loc_copy(&local->loc, &local->loc2);
+
+ loc_wipe(&local->loc2);
+
+ refreshed = local->selfheal.refreshed_layout;
+
+ subvol = methods->layout_search(this, refreshed, local->loc.name);
+
+ if (!subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "no subvolume in "
+ "layout for path=%s",
+ local->loc.path);
+ local->op_errno = ENOENT;
+ goto err;
+ }
+
+ dht_create_wind_to_avail_subvol(frame, this, subvol, &local->loc,
+ local->flags, local->mode, local->umask,
+ local->fd, local->params);
+ return 0;
err:
- DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL, NULL);
+ local->refresh_layout_unlock(frame, this, -1, 1);
+
+ return 0;
+}
+
+static int32_t
+dht_create_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
+
+static int32_t
+dht_create_finish(call_frame_t *frame, xlator_t *this, int op_ret,
+ int invoke_cbk)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
+
+ local = frame->local;
+ lock_count = dht_lock_count(local->lock[0].layout.parent_layout.locks,
+ local->lock[0].layout.parent_layout.lk_count);
+ if (lock_count == 0)
+ goto done;
+
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL) {
+ goto done;
+ }
+
+ lock_local = dht_local_init(lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL) {
+ goto done;
+ }
+
+ lock_local->lock[0]
+ .layout.parent_layout.locks = local->lock[0].layout.parent_layout.locks;
+ lock_local->lock[0].layout.parent_layout.lk_count =
+ local->lock[0].layout.parent_layout.lk_count;
+
+ local->lock[0].layout.parent_layout.locks = NULL;
+ local->lock[0].layout.parent_layout.lk_count = 0;
+
+ dht_unlock_inodelk(lock_frame,
+ lock_local->lock[0].layout.parent_layout.locks,
+ lock_local->lock[0].layout.parent_layout.lk_count,
+ dht_create_unlock_cbk);
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+
+ if (op_ret == 0)
return 0;
+
+ DHT_STACK_UNWIND(create, frame, op_ret, local->op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
}
-int
-dht_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *params)
+static int32_t
+dht_create_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- int op_errno = -1;
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- xlator_t *avail_subvol = NULL;
+ dht_local_t *local = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ local = frame->local;
- dht_get_du_info (frame, this, loc);
+ if (!local) {
+ goto err;
+ }
- local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ if (op_ret < 0) {
+ gf_msg("DHT", GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "Create lock failed for file: %s", local->loc2.name);
- if (dht_filter_loc_subvol_key (this, loc, &local->loc,
- &subvol)) {
- gf_log (this->name, GF_LOG_INFO,
- "creating %s on %s (got create on %s)",
- local->loc.path, subvol->name, loc->path);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- &local->loc, flags, mode, umask, fd, params);
- goto done;
- }
+ local->op_errno = op_errno;
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+ goto err;
+ }
- if (!dht_is_subvol_filled (this, subvol)) {
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, umask, fd, params);
- goto done;
- }
- /* Choose the minimum filled volume, and create the
- files there */
- avail_subvol = dht_free_disk_available_subvol (this, subvol);
- if (avail_subvol != subvol) {
- local->params = dict_ref (params);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
- local->cached_subvol = avail_subvol;
- local->hashed_subvol = subvol;
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s (link at %s)", loc->path,
- avail_subvol->name, subvol->name);
- dht_linkfile_create (frame,
- dht_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
- goto done;
- }
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, umask, fd, params);
-done:
- return 0;
+ local->refresh_layout_unlock = dht_create_finish;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL, NULL);
+ local->refresh_layout_done = dht_create_do;
- return 0;
-}
+ dht_refresh_layout(frame);
+ return 0;
+err:
+ if (local)
+ dht_create_finish(frame, this, -1, 0);
+ else
+ DHT_STACK_UNWIND(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
+}
-int
-dht_mkdir_selfheal_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+int32_t
+dht_create_lock(call_frame_t *frame, xlator_t *subvol)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1;
+ dht_lock_t **lk_array = NULL;
- local = frame->local;
- layout = local->selfheal.layout;
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO(frame->this->name, frame->local, err);
- if (op_ret == 0) {
- dht_layout_set (this, local->inode, layout);
- if (local->loc.parent) {
- WIPE (&local->preparent);
- WIPE (&local->postparent);
- }
- }
+ local = frame->local;
- DHT_STACK_UNWIND (mkdir, frame, op_ret, op_errno,
- local->inode, &local->stbuf, &local->preparent,
- &local->postparent, NULL);
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
- return 0;
+ if (lk_array == NULL)
+ goto err;
+
+ lk_array[0] = dht_lock_new(frame->this, subvol, &local->loc, F_RDLCK,
+ DHT_LAYOUT_HEAL_DOMAIN, NULL,
+ IGNORE_ENOENT_ESTALE);
+
+ if (lk_array[0] == NULL)
+ goto err;
+
+ local->lock[0].layout.parent_layout.locks = lk_array;
+ local->lock[0].layout.parent_layout.lk_count = count;
+
+ ret = dht_blocking_inodelk(frame, lk_array, count, dht_create_lock_cbk);
+
+ if (ret < 0) {
+ local->lock[0].layout.parent_layout.locks = NULL;
+ local->lock[0].layout.parent_layout.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free(lk_array, count);
+ GF_FREE(lk_array);
+ }
+
+ return -1;
}
int
-dht_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- int ret = -1;
- gf_boolean_t subvol_filled = _gf_false;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
+ dht_conf_t *conf = this->private;
+ dht_layout_t *parent_layout = NULL;
+ int *parent_disk_layout = NULL;
+ xlator_t *hashed_subvol = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ parent_layout = dht_layout_get(this, loc->parent);
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+
+ ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol,
+ &parent_disk_layout);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "extracting in-memory layout of parent failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ ret = dict_set_str_sizen(local->params, GF_PREOP_PARENT_KEY,
+ conf->xattr_name);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting %s key in params dictionary failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path,
+ GF_PREOP_PARENT_KEY);
+ goto err;
+ }
+
+ ret = dict_set_bin(local->params, conf->xattr_name, parent_disk_layout,
+ 4 * 4);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "%s (%s/%s) (path: %s): "
+ "setting parent-layout in params dictionary failed. ",
+ gf_fop_list[local->fop], pgfid, loc->name, loc->path);
+ goto err;
+ }
- local = frame->local;
- prev = cookie;
- layout = local->layout;
+err:
+ dht_layout_unref(this, parent_layout);
+ return ret;
+}
- subvol_filled = dht_is_subvol_filled (this, prev->this);
+int
+dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
+{
+ int op_errno = -1;
+ xlator_t *subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->params = dict_ref(params);
+ local->flags = flags;
+ local->mode = mode;
+ local->umask = umask;
+
+ if (dht_filter_loc_subvol_key(this, loc, &local->loc, &subvol)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "creating %s on %s (got create on %s)", local->loc.path,
+ subvol->name, loc->path);
+
+ /* Since lookup-optimize is enabled by default, we need
+ * to create the linkto file if required.
+ * Note this does not check for decommisioned bricks
+ * and min-free-disk limits as this is a debugging tool
+ * and not expected to be used in production.
+ */
+ hashed_subvol = dht_subvol_get_hashed(this, &local->loc);
- LOCK (&frame->lock);
- {
- if (subvol_filled && (op_ret != -1)) {
- ret = dht_layout_merge (this, layout, prev->this,
- -1, ENOSPC, NULL);
- } else {
- ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, NULL);
+ if (hashed_subvol && (hashed_subvol != subvol)) {
+ /* Create the linkto file and then the data file */
+ local->cached_subvol = subvol;
+ local->hashed_subvol = hashed_subvol;
+
+ dht_linkfile_create(frame, dht_create_linkfile_create_cbk, this,
+ subvol, hashed_subvol, &local->loc);
+ goto done;
+ }
+ /* We either don't have a hashed subvol or the hashed subvol is
+ * the same as the one specified. No need to create the linkto
+ * file as we expect a lookup everywhere if there are problems
+ * with the parent layout
+ */
+
+ dht_set_parent_layout_in_dict(loc, this, local);
+
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, &local->loc, flags, mode, umask,
+ fd, params);
+ goto done;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "no subvolume in layout for path=%s", loc->path);
+
+ op_errno = EIO;
+ goto err;
+ }
+
+ /* Post remove-brick, the client layout may not be in sync with
+ * disk layout because of lack of lookup. Hence,a create call
+ * may fall on the decommissioned brick. Hence, if the
+ * hashed_subvol is part of decommissioned bricks list, do a
+ * lookup on parent dir. If a fix-layout is already done by the
+ * remove-brick process, the parent directory layout will be in
+ * sync with that of the disk. If fix-layout is still ending
+ * on the parent directory, we can let the file get created on
+ * the decommissioned brick which will be eventually migrated to
+ * non-decommissioned brick based on the new layout.
+ */
+
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] &&
+ conf->decommissioned_bricks[i] == subvol) {
+ gf_msg_debug(this->name, 0,
+ "hashed subvol:%s is "
+ "part of decommission brick list for "
+ "file: %s",
+ subvol->name, loc->path);
+
+ /* dht_refresh_layout needs directory info in
+ * local->loc. Hence, storing the parent_loc in
+ * local->loc and storing the create context in
+ * local->loc2. We will restore this information
+ * in dht_creation do */
+
+ ret = loc_copy(&local->loc2, &local->loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "loc_copy failed %s", loc->path);
+
+ goto err;
}
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to merge layouts", local->loc.path);
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
+ loc_wipe(&local->loc);
+
+ ret = dht_build_parent_loc(this, &local->loc, loc, &op_errno);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_LOC_FAILED,
+ "parent loc build failed");
+ goto err;
+ }
+
+ ret = dht_create_lock(frame, subvol);
+
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INODE_LK_ERROR,
+ "locking parent failed");
+ goto err;
}
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preparent, preparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
- }
-unlock:
- UNLOCK (&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_new_directory (frame, dht_mkdir_selfheal_cbk,
- layout);
+ goto done;
+ }
}
+ }
- return 0;
+ dht_create_wind_to_avail_subvol(frame, this, subvol, loc, flags, mode,
+ umask, fd, params);
+done:
+ return 0;
+
+err:
+
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
}
-int
-dht_mkdir_hashed_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int ret = -1;
- call_frame_t *prev = NULL;
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
- int i = 0;
- xlator_t *hashed_subvol = NULL;
-
- VALIDATE_OR_GOTO (this->private, err);
+static int
+dht_mkdir_selfheal_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
- local = frame->local;
- prev = cookie;
- layout = local->layout;
- conf = this->private;
- hashed_subvol = local->hashed_subvol;
+ local = frame->local;
+ layout = local->selfheal.layout;
- if (uuid_is_null (local->loc.gfid) && !op_ret)
- uuid_copy (local->loc.gfid, stbuf->ia_gfid);
+ FRAME_SU_UNDO(frame, dht_local_t);
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
- if (dht_is_subvol_filled (this, hashed_subvol))
- ret = dht_layout_merge (this, layout, prev->this,
- -1, ENOSPC, NULL);
- else
- ret = dht_layout_merge (this, layout, prev->this,
- op_ret, op_errno, NULL);
+ if (op_ret == 0) {
+ dht_layout_set(this, local->inode, layout);
- /* TODO: we may have to return from the function
- if layout merge fails. For now, lets just log an error */
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to merge layouts", local->loc.path);
+ dht_inode_ctx_time_update(local->inode, this, &local->stbuf, 1);
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->preparent, 0);
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto err;
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
}
- local->op_ret = 0;
+ }
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preparent, preparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent, prev->this);
+ DHT_STACK_UNWIND(mkdir, frame, op_ret, op_errno, local->inode,
+ &local->stbuf, &local->preparent, &local->postparent,
+ NULL);
- local->call_cnt = conf->subvolume_cnt - 1;
+ return 0;
+}
- if (uuid_is_null (local->loc.gfid))
- uuid_copy (local->loc.gfid, stbuf->ia_gfid);
- if (local->call_cnt == 0) {
- dht_selfheal_directory (frame, dht_mkdir_selfheal_cbk,
- &local->loc, layout);
+static int
+dht_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int ret = -1;
+ gf_boolean_t subvol_filled = _gf_false;
+ gf_boolean_t dir_exists = _gf_false;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ layout = local->layout;
+
+ subvol_filled = dht_is_subvol_filled(this, prev);
+
+ LOCK(&frame->lock);
+ {
+ if (subvol_filled && (op_ret != -1)) {
+ ret = dht_layout_merge(this, layout, prev, -1, ENOSPC, NULL);
+ } else {
+ if (op_ret == -1 && op_errno == EEXIST) {
+ /* Very likely just a race between mkdir and
+ self-heal (from lookup of a concurrent mkdir
+ attempt).
+ Ignore error for now. layout setting will
+ anyways fail if this was a different (old)
+ pre-existing different directory.
+ */
+ op_ret = 0;
+ dir_exists = _gf_true;
+ }
+ ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, NULL);
}
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == hashed_subvol)
- continue;
- STACK_WIND (frame, dht_mkdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->mkdir, &local->loc,
- local->mode, local->umask, local->params);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ "%s: failed to merge layouts for subvol %s", local->loc.path,
+ prev->name);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ goto unlock;
}
- return 0;
-err:
- DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
+
+ if (dir_exists)
+ goto unlock;
+
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ /*Unlock entrylk and inodelk once mkdir is done on all subvols*/
+ dht_unlock_namespace(frame, &local->lock[0]);
+ FRAME_SU_DO(frame, dht_local_t);
+ dht_selfheal_new_directory(frame, dht_mkdir_selfheal_cbk, layout);
+ }
+
+ return 0;
}
+static int
+dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
- int
-dht_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *params)
+static int
+dht_mkdir_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- xlator_t *hashed_subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1, ret = -1;
+ xlator_t *hashed_subvol = NULL;
+ int32_t *parent_disk_layout = NULL;
+ dht_layout_t *parent_layout = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ conf = this->private;
+ local = frame->local;
+
+ if (local->op_ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): refreshing parent layout "
+ "failed.",
+ pgfid, loc->name, loc->path);
+
+ op_errno = local->op_errno;
+ goto err;
+ }
+
+ local->op_ret = -1;
+
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (hashed_subvol == NULL) {
+ gf_msg_debug(this->name, 0,
+ "mkdir (%s/%s) (path: %s): hashed subvol not "
+ "found",
+ pgfid, loc->name, loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ local->hashed_subvol = hashed_subvol;
+
+ parent_layout = dht_layout_get(this, loc->parent);
+
+ ret = dht_disk_layout_extract_for_subvol(this, parent_layout, hashed_subvol,
+ &parent_disk_layout);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, EIO, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "extracting in-memory layout of parent failed. ",
+ pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ if (memcmp(local->parent_disk_layout, parent_disk_layout,
+ sizeof(local->parent_disk_layout)) == 0) {
+ gf_msg(this->name, GF_LOG_WARNING, EIO, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): loop detected. "
+ "parent layout didn't change even though "
+ "previous attempt of mkdir failed because of "
+ "in-memory layout not matching with that on disk.",
+ pgfid, loc->name, loc->path);
+ op_errno = EIO;
+ goto err;
+ }
+
+ memcpy((void *)local->parent_disk_layout, (void *)parent_disk_layout,
+ sizeof(local->parent_disk_layout));
+
+ dht_layout_unref(this, parent_layout);
+ parent_layout = NULL;
+
+ ret = dict_set_str(params, GF_PREOP_PARENT_KEY, conf->xattr_name);
+ if (ret < 0) {
+ local->op_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "setting %s key in params dictionary failed. ",
+ pgfid, loc->name, loc->path, GF_PREOP_PARENT_KEY);
+ goto err;
+ }
+
+ ret = dict_set_bin(params, conf->xattr_name, parent_disk_layout, 4 * 4);
+ if (ret < 0) {
+ local->op_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "setting parent-layout in params dictionary failed. "
+ "mkdir (%s/%s) (path: %s)",
+ pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ parent_disk_layout = NULL;
+
+ STACK_WIND_COOKIE(frame, dht_mkdir_hashed_cbk, hashed_subvol, hashed_subvol,
+ hashed_subvol->fops->mkdir, loc, mode, umask, params);
+
+ return 0;
+err:
+ dht_unlock_namespace(frame, &local->lock[0]);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (this->private, err);
+ op_errno = local ? local->op_errno : op_errno;
+ DHT_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- conf = this->private;
+ if (parent_disk_layout != NULL)
+ GF_FREE(parent_disk_layout);
- dht_get_du_info (frame, this, loc);
+ if (parent_layout != NULL)
+ dht_layout_unref(this, parent_layout);
- local = dht_local_init (frame, loc, NULL, GF_FOP_MKDIR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ return 0;
+}
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- if (hashed_subvol == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "hashed subvol not found for %s",
- loc->path);
- op_errno = EINVAL;
+static int
+dht_mkdir_hashed_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *hashed_subvol = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ gf_boolean_t parent_layout_changed = _gf_false;
+ call_stub_t *stub = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ layout = local->layout;
+ conf = this->private;
+ hashed_subvol = local->hashed_subvol;
+
+ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
+
+ if (gf_uuid_is_null(local->loc.gfid) && !op_ret)
+ gf_uuid_copy(local->loc.gfid, stbuf->ia_gfid);
+
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+
+ parent_layout_changed = (xdata &&
+ dict_get(xdata, GF_PREOP_CHECK_FAILED))
+ ? 1
+ : 0;
+ if (parent_layout_changed) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): parent layout "
+ "changed. Attempting a refresh and then a "
+ "retry",
+ pgfid, local->loc.name, local->loc.path);
+
+ stub = fop_mkdir_stub(frame, dht_mkdir_helper, &local->loc,
+ local->mode, local->umask, local->params);
+ if (stub == NULL) {
goto err;
- }
+ }
- local->hashed_subvol = hashed_subvol;
- local->mode = mode;
- local->umask = umask;
- local->params = dict_ref (params);
- local->inode = inode_ref (loc->inode);
-
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- op_errno = ENOMEM;
+ ret = dht_handle_parent_layout_change(this, stub);
+ if (ret) {
goto err;
- }
+ }
+
+ stub = NULL;
+
+ return 0;
+ }
+
+ goto err;
+ }
+
+ dict_del(local->params, GF_PREOP_PARENT_KEY);
+ dict_del(local->params, conf->xattr_name);
+
+ if (dht_is_subvol_filled(this, hashed_subvol))
+ ret = dht_layout_merge(this, layout, prev, -1, ENOSPC, NULL);
+ else
+ ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, NULL);
+
+ /* TODO: we may have to return from the function
+ if layout merge fails. For now, lets just log an error */
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ "%s: failed to merge layouts for subvol %s", local->loc.path,
+ prev->name);
+
+ local->op_ret = 0;
+
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+
+ local->call_cnt = conf->subvolume_cnt - 1;
+ /* Delete internal mds xattr from params dict to avoid store
+ internal mds xattr on other subvols
+ */
+ dict_del(local->params, conf->mds_xattr_key);
+
+ if (gf_uuid_is_null(local->loc.gfid))
+ gf_uuid_copy(local->loc.gfid, stbuf->ia_gfid);
+
+ /* Set hashed subvol as a mds subvol on inode ctx */
+ /*if (!local->inode)
+ local->inode = inode_ref (inode);
+ */
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this, hashed_subvol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s on inode vol is %s",
+ local->loc.path, hashed_subvol->name);
+ }
+
+ if (local->call_cnt == 0) {
+ /*Unlock namespace lock once mkdir is done on all subvols*/
+ dht_unlock_namespace(frame, &local->lock[0]);
+ FRAME_SU_DO(frame, dht_local_t);
+ dht_selfheal_directory(frame, dht_mkdir_selfheal_cbk, &local->loc,
+ layout);
+ return 0;
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == hashed_subvol)
+ continue;
+ STACK_WIND_COOKIE(frame, dht_mkdir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i], conf->subvolumes[i]->fops->mkdir,
+ &local->loc, local->mode, local->umask,
+ local->params);
+ }
+
+ return 0;
+err:
+ if (local->op_ret != 0) {
+ dht_unlock_namespace(frame, &local->lock[0]);
+ }
- STACK_WIND (frame, dht_mkdir_hashed_cbk,
- hashed_subvol,
- hashed_subvol->fops->mkdir,
- loc, mode, umask, params);
+ DHT_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
+}
+static int
+dht_mkdir_guard_parent_layout_cbk(call_frame_t *frame, xlator_t *this,
+ loc_t *loc, mode_t mode, mode_t umask,
+ dict_t *params)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = 0;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = -1;
+ int32_t zero[1] = {0};
+
+ local = frame->local;
+ conf = this->private;
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ if (local->op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "Acquiring lock on parent to guard against "
+ "layout-change failed.",
+ pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ local->op_ret = -1;
+ /* Add internal MDS xattr on disk for hashed subvol
+ */
+ ret = dht_dict_set_array(params, conf->mds_xattr_key, zero, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value:key = %s for "
+ "path %s",
+ conf->mds_xattr_key, loc->path);
+ }
+
+ STACK_WIND_COOKIE(frame, dht_mkdir_hashed_cbk, local->hashed_subvol,
+ local->hashed_subvol, local->hashed_subvol->fops->mkdir,
+ loc, mode, umask, params);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
+ DHT_STACK_UNWIND(mkdir, frame, -1, local->op_errno, NULL, NULL, NULL, NULL,
+ NULL);
- return 0;
+ return 0;
}
-
int
-dht_rmdir_selfheal_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+dht_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = EINVAL, ret = -1;
+ xlator_t *hashed_subvol = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ call_stub_t *stub = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ conf = this->private;
+
+ if (!params || !dict_get(params, "gfid-req")) {
+ op_errno = EPERM;
+ gf_msg_callingfn(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_GFID_NULL,
+ "mkdir: %s is received "
+ "without gfid-req %p",
+ loc->path, params);
+ goto err;
+ }
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_MKDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (hashed_subvol == NULL) {
+ gf_msg_debug(this->name, 0, "hashed subvol not found for %s",
+ loc->path);
+ local->op_errno = EIO;
+ goto err;
+ }
+
+ local->hashed_subvol = hashed_subvol;
+ local->mode = mode;
+ local->umask = umask;
+ if (params)
+ local->params = dict_ref(params);
+
+ local->inode = inode_ref(loc->inode);
+
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /* set the newly created directory hash to the commit hash
+ * if the configuration option is set. If configuration option
+ * is not set, the older clients may still be connecting to the
+ * volume and hence we need to preserve the 1 in disk[0] part of the
+ * layout xattr */
+ if (conf->lookup_optimize)
+ local->layout->commit_hash = conf->vol_commit_hash;
+ else
+ local->layout->commit_hash = DHT_LAYOUT_HASH_INVALID;
+
+ stub = fop_mkdir_stub(frame, dht_mkdir_guard_parent_layout_cbk, loc, mode,
+ umask, params);
+ if (stub == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s): "
+ "creating stub failed.",
+ pgfid, loc->name, loc->path);
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dht_guard_parent_layout_and_namespace(this, stub);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ "mkdir (%s/%s) (path: %s) cannot wind lock request to "
+ "guard parent layout",
+ pgfid, loc->name, loc->path);
+ goto err;
+ }
+
+ return 0;
- local = frame->local;
-
- DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, NULL);
+err:
+ op_errno = local ? local->op_errno : op_errno;
+ DHT_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
-int
-dht_rmdir_hashed_subvol_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static int
+dht_rmdir_selfheal_cbk(call_frame_t *heal_frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *heal_local = NULL;
+ call_frame_t *main_frame = NULL;
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->op_ret = -1;
- if (op_errno != ENOENT && op_errno != EACCES) {
- local->need_selfheal = 1;
- }
+ heal_local = heal_frame->local;
+ main_frame = heal_local->main_frame;
+ local = main_frame->local;
+ DHT_STACK_DESTROY(heal_frame);
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
- gf_log (this->name, GF_LOG_DEBUG,
- "rmdir on %s for %s failed (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- goto unlock;
- }
+ DHT_STACK_UNWIND(rmdir, main_frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
- dht_iatt_merge (this, &local->preparent, preparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
+ return 0;
+}
+static int
+dht_rmdir_hashed_subvol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_local_t *heal_local = NULL;
+ call_frame_t *heal_frame = NULL;
+ dht_conf_t *conf = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ if (conf->subvolume_cnt != 1) {
+ if (op_errno != ENOENT && op_errno != EACCES &&
+ op_errno != ESTALE) {
+ local->need_selfheal = 1;
+ }
+ }
+
+ gf_msg_debug(this->name, op_errno,
+ "rmdir on %s for %s failed "
+ "(gfid = %s)",
+ prev->name, local->loc.path, gfid);
+ goto unlock;
}
+
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+ }
unlock:
- UNLOCK (&frame->lock);
+ UNLOCK(&frame->lock);
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- if (local->need_selfheal) {
- local->layout =
- dht_layout_get (this, local->loc.inode);
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ if (local->need_selfheal) {
+ dht_rmdir_unlock(frame, this);
+ local->layout = dht_layout_get(this, local->loc.inode);
- /* TODO: neater interface needed below */
- local->stbuf.ia_type = local->loc.inode->ia_type;
+ /* TODO: neater interface needed below */
+ local->stbuf.ia_type = local->loc.inode->ia_type;
- uuid_copy (local->gfid, local->loc.inode->gfid);
- dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
- &local->loc, local->layout);
- } else {
+ gf_uuid_copy(local->gfid, local->loc.inode->gfid);
- if (local->loc.parent) {
- WIPE (&local->preparent);
- WIPE (&local->postparent);
- }
+ /* Use a different frame or else the rmdir op_ret is
+ * overwritten by that of the selfheal */
+
+ heal_frame = copy_frame(frame);
+
+ if (heal_frame == NULL) {
+ goto err;
+ }
- DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
- local->op_errno, &local->preparent,
- &local->postparent, NULL);
- }
+ heal_local = dht_local_init(heal_frame, &local->loc, NULL, 0);
+ if (!heal_local) {
+ DHT_STACK_DESTROY(heal_frame);
+ goto err;
+ }
+
+ heal_local->inode = inode_ref(local->loc.inode);
+ heal_local->main_frame = frame;
+ gf_uuid_copy(heal_local->gfid, local->loc.inode->gfid);
+
+ dht_selfheal_restore(heal_frame, dht_rmdir_selfheal_cbk,
+ &heal_local->loc, heal_local->layout);
+ return 0;
+ } else {
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->preparent, 0);
+
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
+ }
+
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
+
+ dht_rmdir_unlock(frame, this);
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
}
+ }
- return 0;
-}
+ return 0;
+err:
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno, NULL, NULL,
+ NULL);
+ return 0;
+}
-int
-dht_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+static int
+dht_rmdir_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
- int done = 0;
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
- local = frame->local;
- prev = cookie;
+static int
+dht_rmdir_unlock(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->op_ret = -1;
+ local = frame->local;
- if (op_errno != ENOENT && op_errno != EACCES) {
- local->need_selfheal = 1;
- }
+ /* Unlock entrylk */
+ dht_unlock_entrylk_wrapper(frame, &local->lock[0].ns.directory_ns);
- gf_log (this->name, GF_LOG_DEBUG,
- "rmdir on %s for %s failed (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- goto unlock;
- }
+ /* Unlock inodelk */
+ lock_count = dht_lock_count(local->lock[0].ns.parent_layout.locks,
+ local->lock[0].ns.parent_layout.lk_count);
- /* Track if rmdir succeeded on atleast one subvol*/
- local->fop_succeeded = 1;
- dht_iatt_merge (this, &local->preparent, preparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent,
- prev->this);
- }
-unlock:
- UNLOCK (&frame->lock);
+ if (lock_count == 0)
+ goto done;
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL)
+ goto done;
- this_call_cnt = dht_frame_return (frame);
+ lock_local = dht_local_init(lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL)
+ goto done;
- /* if local->hashed_subvol, we are yet to wind to hashed_subvol. */
- if (local->hashed_subvol && (this_call_cnt == 1)) {
- done = 1;
- } else if (!local->hashed_subvol && !this_call_cnt) {
- done = 1;
- }
+ lock_local->lock[0].ns.parent_layout.locks = local->lock[0]
+ .ns.parent_layout.locks;
+ lock_local->lock[0]
+ .ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count;
+ local->lock[0].ns.parent_layout.locks = NULL;
+ local->lock[0].ns.parent_layout.lk_count = 0;
+ dht_unlock_inodelk(lock_frame, lock_local->lock[0].ns.parent_layout.locks,
+ lock_local->lock[0].ns.parent_layout.lk_count,
+ dht_rmdir_unlock_cbk);
+ lock_frame = NULL;
- if (done) {
- if (local->need_selfheal && local->fop_succeeded) {
- local->layout =
- dht_layout_get (this, local->loc.inode);
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
- /* TODO: neater interface needed below */
- local->stbuf.ia_type = local->loc.inode->ia_type;
+ return 0;
+}
- uuid_copy (local->gfid, local->loc.inode->gfid);
- dht_selfheal_restore (frame, dht_rmdir_selfheal_cbk,
- &local->loc, local->layout);
- } else if (this_call_cnt) {
- /* If non-hashed subvol's have responded, proceed */
+static int
+dht_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ int done = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_local_t *heal_local = NULL;
+ call_frame_t *heal_frame = NULL;
+ int ret = -1;
+
+ local = frame->local;
+ prev = cookie;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ if ((op_errno != ENOENT) && (op_errno != ESTALE)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
- local->need_selfheal = 0;
- STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
- local->hashed_subvol,
- local->hashed_subvol->fops->rmdir,
- &local->loc, local->flags, NULL);
- } else if (!this_call_cnt) {
- /* All subvol's have responded, proceed */
+ if (op_errno != EACCES)
+ local->need_selfheal = 1;
+ }
- if (local->loc.parent) {
- WIPE (&local->preparent);
- WIPE (&local->postparent);
- }
+ gf_uuid_unparse(local->loc.gfid, gfid);
- DHT_STACK_UNWIND (rmdir, frame, local->op_ret,
- local->op_errno, &local->preparent,
- &local->postparent, NULL);
- }
+ gf_msg_debug(this->name, op_errno,
+ "rmdir on %s for %s failed."
+ "(gfid = %s)",
+ prev->name, local->loc.path, gfid);
+ goto unlock;
}
- return 0;
-}
+ /* Track if rmdir succeeded on at least one subvol*/
+ local->fop_succeeded = 1;
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+ }
+unlock:
+ UNLOCK(&frame->lock);
+ this_call_cnt = dht_frame_return(frame);
-int
-dht_rmdir_do (call_frame_t *frame, xlator_t *this)
-{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int i = 0;
- xlator_t *hashed_subvol = NULL;
+ /* if local->hashed_subvol, we are yet to wind to hashed_subvol. */
+ if (local->hashed_subvol && (this_call_cnt == 1)) {
+ done = 1;
+ } else if (!local->hashed_subvol && !this_call_cnt) {
+ done = 1;
+ }
- VALIDATE_OR_GOTO (this->private, err);
+ if (done) {
+ if (local->need_selfheal && local->fop_succeeded) {
+ dht_rmdir_unlock(frame, this);
+ local->layout = dht_layout_get(this, local->loc.inode);
- conf = this->private;
- local = frame->local;
+ /* TODO: neater interface needed below */
+ local->stbuf.ia_type = local->loc.inode->ia_type;
- if (local->op_ret == -1)
+ gf_uuid_copy(local->gfid, local->loc.inode->gfid);
+ heal_frame = copy_frame(frame);
+ if (heal_frame == NULL) {
goto err;
+ }
- local->call_cnt = conf->subvolume_cnt;
+ heal_local = dht_local_init(heal_frame, &local->loc, NULL, 0);
+ if (!heal_local) {
+ DHT_STACK_DESTROY(heal_frame);
+ goto err;
+ }
+
+ heal_local->inode = inode_ref(local->loc.inode);
+ heal_local->main_frame = frame;
+ gf_uuid_copy(heal_local->gfid, local->loc.inode->gfid);
+ ret = dht_selfheal_restore(heal_frame, dht_rmdir_selfheal_cbk,
+ &heal_local->loc, heal_local->layout);
+ if (ret) {
+ DHT_STACK_DESTROY(heal_frame);
+ goto err;
+ }
+
+ } else if (this_call_cnt) {
+ /* If non-hashed subvol's have responded, proceed */
+ if (local->op_ret == 0) {
+ /* Delete the dir from the hashed subvol if:
+ * The fop succeeded on at least one subvol
+ * and did not fail on any
+ * or
+ * The fop failed with ENOENT/ESTALE on
+ * all subvols */
+
+ STACK_WIND_COOKIE(frame, dht_rmdir_hashed_subvol_cbk,
+ local->hashed_subvol, local->hashed_subvol,
+ local->hashed_subvol->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ } else {
+ /* hashed-subvol was non-NULL and rmdir failed on
+ * all non hashed-subvols. Unwind rmdir with
+ * local->op_ret and local->op_errno. */
+ dht_rmdir_unlock(frame, this);
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
- /* first remove from non-hashed_subvol */
- hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
+ return 0;
+ }
+ } else if (!this_call_cnt) {
+ /* All subvol's have responded, proceed */
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_WARNING, "failed to get hashed "
- "subvol for %s",local->loc.path);
- } else {
- local->hashed_subvol = hashed_subvol;
- }
+ if (local->loc.parent) {
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->preparent, 0);
- /* When DHT has only 1 child */
- if (conf->subvolume_cnt == 1) {
- STACK_WIND (frame, dht_rmdir_hashed_subvol_cbk,
- conf->subvolumes[0],
- conf->subvolumes[0]->fops->rmdir,
- &local->loc, local->flags, NULL);
- return 0;
- }
+ dht_inode_ctx_time_update(local->loc.parent, this,
+ &local->postparent, 1);
+ }
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (hashed_subvol &&
- (hashed_subvol == conf->subvolumes[i]))
- continue;
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
- STACK_WIND (frame, dht_rmdir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->rmdir,
- &local->loc, local->flags, NULL);
+ dht_rmdir_unlock(frame, this);
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
}
+ }
- return 0;
+ return 0;
err:
- DHT_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, NULL);
- return 0;
+ DHT_STACK_UNWIND(rmdir, frame, -1, local->op_errno, NULL, NULL, NULL);
+ return 0;
}
+static int
+dht_rmdir_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *hashed_subvol;
+
+ conf = this->private;
+ local = frame->local;
-int
-dht_rmdir_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "acquiring entrylk after inodelk failed rmdir for %s)",
+ local->loc.path);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ hashed_subvol = local->hashed_subvol;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (hashed_subvol && (hashed_subvol == conf->subvolumes[i]))
+ continue;
+
+ STACK_WIND_COOKIE(frame, dht_rmdir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i], conf->subvolumes[i]->fops->rmdir,
+ &local->loc, local->flags, NULL);
+ }
+
+ return 0;
+
+err:
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
+
+ return 0;
+}
+
+static int
+dht_rmdir_do(call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *src = NULL;
- call_frame_t *main_frame = NULL;
- dht_local_t *main_local = NULL;
- int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ xlator_t *hashed_subvol = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
- local = frame->local;
- prev = cookie;
- src = prev->this;
+ VALIDATE_OR_GOTO(frame->local, err);
+ local = frame->local;
+ VALIDATE_OR_GOTO(this->private, out);
+ conf = this->private;
- main_frame = local->main_frame;
- main_local = main_frame->local;
+ if (local->op_ret == -1)
+ goto out;
- if (op_ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlinked linkfile %s on %s",
- local->loc.path, src->name);
- } else {
- main_local->op_ret = -1;
- main_local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "unlink of %s on %s failed (%s)",
- local->loc.path, src->name, strerror (op_errno));
- }
+ local->call_cnt = conf->subvolume_cnt;
+
+ /* first remove from non-hashed_subvol */
+ hashed_subvol = dht_subvol_get_hashed(this, &local->loc);
- this_call_cnt = dht_frame_return (main_frame);
- if (is_last_call (this_call_cnt))
- dht_rmdir_do (main_frame, this);
+ if (!hashed_subvol) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
- DHT_STACK_DESTROY (frame);
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for %s (gfid = %s)",
+ local->loc.path, gfid);
+ } else {
+ local->hashed_subvol = hashed_subvol;
+ }
+
+ /* When DHT has only 1 child */
+ if (conf->subvolume_cnt == 1) {
+ STACK_WIND_COOKIE(frame, dht_rmdir_hashed_subvol_cbk,
+ conf->subvolumes[0], conf->subvolumes[0],
+ conf->subvolumes[0]->fops->rmdir, &local->loc,
+ local->flags, NULL);
return 0;
+ }
+
+ local->current = &local->lock[0];
+ ret = dht_protect_namespace(frame, &local->loc, local->hashed_subvol,
+ &local->current->ns, dht_rmdir_lock_cbk);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = errno ? errno : EINVAL;
+ goto out;
+ }
+
+ return 0;
+
+out:
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
+
+ DHT_STACK_UNWIND(rmdir, frame, local->op_ret, local->op_errno,
+ &local->preparent, &local->postparent, NULL);
+ return 0;
+err:
+ DHT_STACK_UNWIND(rmdir, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
}
+static void
+dht_rmdir_readdirp_done(call_frame_t *readdirp_frame, xlator_t *this)
+{
+ call_frame_t *main_frame = NULL;
+ dht_local_t *main_local = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+
+ local = readdirp_frame->local;
+ main_frame = local->main_frame;
+ main_local = main_frame->local;
+
+ /* At least one readdirp failed.
+ * This is a bit hit or miss - if readdirp failed on more than
+ * one subvol, we don't know which error is returned.
+ */
+ if (local->op_ret == -1) {
+ main_local->op_ret = local->op_ret;
+ main_local->op_errno = local->op_errno;
+ }
+
+ this_call_cnt = dht_frame_return(main_frame);
+
+ if (is_last_call(this_call_cnt))
+ dht_rmdir_do(main_frame, this);
+
+ DHT_STACK_DESTROY(readdirp_frame);
+}
-int
-dht_rmdir_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, dict_t *xattr, struct iatt *parent)
+/* Keep sending readdirp on the subvol until it returns no more entries
+ * It is possible that not all entries will fit in a single readdirp in
+ * which case the rmdir will keep failing with ENOTEMPTY
+ */
+
+static int
+dht_rmdir_readdirp_do(call_frame_t *readdirp_frame, xlator_t *this)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *src = NULL;
- call_frame_t *main_frame = NULL;
- dht_local_t *main_local = NULL;
- int this_call_cnt = 0;
+ dht_local_t *local = NULL;
- local = frame->local;
- prev = cookie;
- src = prev->this;
+ local = readdirp_frame->local;
- main_frame = local->main_frame;
- main_local = main_frame->local;
+ if (local->op_ret == -1) {
+ /* there is no point doing another readdirp on this
+ * subvol . */
+ dht_rmdir_readdirp_done(readdirp_frame, this);
+ return 0;
+ }
- if (op_ret != 0)
- goto err;
+ STACK_WIND_COOKIE(readdirp_frame, dht_rmdir_readdirp_cbk,
+ local->hashed_subvol, local->hashed_subvol,
+ local->hashed_subvol->fops->readdirp, local->fd, 4096, 0,
+ local->xattr);
- if (check_is_linkfile (inode, stbuf, xattr) == 0) {
- main_local->op_ret = -1;
- main_local->op_errno = ENOTEMPTY;
+ return 0;
+}
- gf_log (this->name, GF_LOG_WARNING,
- "%s on %s found to be not a linkfile (type=0%o)",
- local->loc.path, src->name, stbuf->ia_type);
- goto err;
- }
+static int
+dht_rmdir_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src = NULL;
+ call_frame_t *readdirp_frame = NULL;
+ dht_local_t *readdirp_local = NULL;
+ int this_call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ prev = cookie;
+ src = prev;
+
+ readdirp_frame = local->main_frame;
+ readdirp_local = readdirp_frame->local;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ if (op_ret == 0) {
+ gf_msg_trace(this->name, 0, "Unlinked linkfile %s on %s, gfid = %s",
+ local->loc.path, src->name, gfid);
+ } else {
+ if (op_errno != ENOENT) {
+ readdirp_local->op_ret = -1;
+ readdirp_local->op_errno = op_errno;
+ }
+ gf_msg_debug(this->name, op_errno,
+ "Unlink of %s on %s failed. (gfid = %s)", local->loc.path,
+ src->name, gfid);
+ }
+
+ this_call_cnt = dht_frame_return(readdirp_frame);
+
+ if (is_last_call(this_call_cnt))
+ dht_rmdir_readdirp_do(readdirp_frame, this);
+
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
- STACK_WIND (frame, dht_rmdir_linkfile_unlink_cbk,
- src, src->fops->unlink, &local->loc, 0, NULL);
- return 0;
+static int
+dht_rmdir_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr, struct iatt *parent)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src = NULL;
+ call_frame_t *readdirp_frame = NULL;
+ dht_local_t *readdirp_local = NULL;
+ int this_call_cnt = 0;
+ dht_conf_t *conf = this->private;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ prev = cookie;
+ src = prev;
+
+ gf_msg_debug(this->name, 0, "dht_rmdir_lookup_cbk %s", local->loc.path);
+
+ readdirp_frame = local->main_frame;
+ readdirp_local = readdirp_frame->local;
+
+ if (op_ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_FILE_LOOKUP_FAILED,
+ "lookup failed for %s on %s", local->loc.path, src->name);
+ goto err;
+ }
+
+ if (!check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name)) {
+ readdirp_local->op_ret = -1;
+ readdirp_local->op_errno = ENOTEMPTY;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR,
+ "%s on %s is not a linkfile (type=0%o, gfid = %s)",
+ local->loc.path, src->name, stbuf->ia_type, gfid);
+ goto err;
+ }
+
+ STACK_WIND_COOKIE(frame, dht_rmdir_linkfile_unlink_cbk, src, src,
+ src->fops->unlink, &local->loc, 0, NULL);
+ return 0;
err:
- this_call_cnt = dht_frame_return (main_frame);
- if (is_last_call (this_call_cnt))
- dht_rmdir_do (main_frame, this);
+ this_call_cnt = dht_frame_return(readdirp_frame);
+ if (is_last_call(this_call_cnt)) {
+ dht_rmdir_readdirp_do(readdirp_frame, this);
+ }
- DHT_STACK_DESTROY (frame);
- return 0;
+ DHT_STACK_DESTROY(frame);
+ return 0;
}
+static int
+dht_rmdir_cached_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *parent)
+{
+ dht_local_t *local = NULL;
+ xlator_t *src = NULL;
+ call_frame_t *readdirp_frame = NULL;
+ dht_local_t *readdirp_local = NULL;
+ int this_call_cnt = 0;
+ dht_conf_t *conf = this->private;
+ dict_t *xattrs = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ src = local->hashed_subvol;
+
+ /* main_frame here is the readdirp_frame */
+
+ readdirp_frame = local->main_frame;
+ readdirp_local = readdirp_frame->local;
+
+ gf_msg_debug(this->name, 0, "returning for %s ", local->loc.path);
+
+ if (op_ret == 0) {
+ readdirp_local->op_ret = -1;
+ readdirp_local->op_errno = ENOTEMPTY;
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SUBVOL_ERROR,
+ "%s found on cached subvol %s", local->loc.path, src->name);
+ goto err;
+ } else if (op_errno != ENOENT) {
+ readdirp_local->op_ret = -1;
+ readdirp_local->op_errno = op_errno;
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_SUBVOL_ERROR,
+ "%s not found on cached subvol %s", local->loc.path, src->name);
+ goto err;
+ }
+
+ xattrs = dict_new();
+ if (!xattrs) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "dict_new failed");
+ goto err;
+ }
+
+ ret = dict_set_uint32(xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s",
+ conf->link_xattr_name);
+ if (xattrs)
+ dict_unref(xattrs);
+ goto err;
+ }
+ STACK_WIND_COOKIE(frame, dht_rmdir_lookup_cbk, src, src, src->fops->lookup,
+ &local->loc, xattrs);
+ if (xattrs)
+ dict_unref(xattrs);
+
+ return 0;
+err:
+
+ this_call_cnt = dht_frame_return(readdirp_frame);
-int
-dht_rmdir_is_subvol_empty (call_frame_t *frame, xlator_t *this,
- gf_dirent_t *entries, xlator_t *src)
+ /* Once all the lookups/unlinks etc have returned, proceed to wind
+ * readdirp on the subvol again until no entries are returned.
+ * This is required if there are more entries than can be returned
+ * in a single readdirp call.
+ */
+
+ if (is_last_call(this_call_cnt))
+ dht_rmdir_readdirp_do(readdirp_frame, this);
+
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
+
+static int
+dht_rmdir_is_subvol_empty(call_frame_t *frame, xlator_t *this,
+ gf_dirent_t *entries, xlator_t *src)
{
- int ret = 0;
- int build_ret = 0;
- gf_dirent_t *trav = NULL;
- call_frame_t *lookup_frame = NULL;
- dht_local_t *lookup_local = NULL;
- dht_local_t *local = NULL;
- dict_t *xattrs = NULL;
+ int ret = 0;
+ int build_ret = 0;
+ gf_dirent_t *trav = NULL;
+ call_frame_t *lookup_frame = NULL;
+ dht_local_t *lookup_local = NULL;
+ dht_local_t *local = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = this->private;
+ xlator_t *subvol = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ int count = 0;
+ gf_boolean_t unwind = _gf_false;
+
+ local = frame->local;
+
+ list_for_each_entry(trav, &entries->list, list)
+ {
+ if (strcmp(trav->d_name, ".") == 0)
+ continue;
+ if (strcmp(trav->d_name, "..") == 0)
+ continue;
+ if (check_is_linkfile(NULL, (&trav->d_stat), trav->dict,
+ conf->link_xattr_name)) {
+ count++;
+ continue;
+ }
+
+ /* this entry is either a directory which is neither "." nor "..",
+ or a non directory which is not a linkfile. the directory is to
+ be treated as non-empty
+ */
+ return 0;
+ }
- local = frame->local;
+ xattrs = dict_new();
+ if (!xattrs) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "dict_new failed");
+ return -1;
+ }
- list_for_each_entry (trav, &entries->list, list) {
- if (strcmp (trav->d_name, ".") == 0)
- continue;
- if (strcmp (trav->d_name, "..") == 0)
- continue;
- if (check_is_linkfile (NULL, (&trav->d_stat), trav->dict)) {
- ret++;
- continue;
- }
+ ret = dict_set_uint32(xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s",
+ conf->link_xattr_name);
- /* this entry is either a directory which is neither "." nor "..",
- or a non directory which is not a linkfile. the directory is to
- be treated as non-empty
- */
- return 0;
+ if (xattrs)
+ dict_unref(xattrs);
+ return -1;
+ }
+
+ local->call_cnt = count;
+ ret = 0;
+
+ list_for_each_entry(trav, &entries->list, list)
+ {
+ if (strcmp(trav->d_name, ".") == 0)
+ continue;
+ if (strcmp(trav->d_name, "..") == 0)
+ continue;
+
+ lookup_frame = copy_frame(frame);
+
+ if (!lookup_frame) {
+ /* out of memory, let the rmdir fail
+ (as non-empty, unfortunately) */
+ goto err;
}
- xattrs = dict_new ();
- if (!xattrs) {
- gf_log (this->name, GF_LOG_ERROR, "dict_new failed");
- return -1;
+ lookup_local = dht_local_init(lookup_frame, NULL, NULL, GF_FOP_LOOKUP);
+ if (!lookup_local) {
+ goto err;
}
- ret = dict_set_uint32 (xattrs, DHT_LINKFILE_KEY, 256);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "failed to set linkto key"
- " in dict");
- if (xattrs)
- dict_unref (xattrs);
- return -1;
- }
-
- list_for_each_entry (trav, &entries->list, list) {
- if (strcmp (trav->d_name, ".") == 0)
- continue;
- if (strcmp (trav->d_name, "..") == 0)
- continue;
-
- lookup_frame = NULL;
- lookup_local = NULL;
-
- lookup_frame = copy_frame (frame);
- if (!lookup_frame) {
- /* out of memory, let the rmdir fail
- (as non-empty, unfortunately) */
- goto err;
- }
+ lookup_frame->local = lookup_local;
+ lookup_local->main_frame = frame;
+ lookup_local->hashed_subvol = src;
- lookup_local = mem_get0 (this->local_pool);
- if (!lookup_local) {
- goto err;
- }
+ build_ret = dht_build_child_loc(this, &lookup_local->loc, &local->loc,
+ trav->d_name);
+ if (build_ret != 0)
+ goto err;
- lookup_frame->local = lookup_local;
- lookup_local->main_frame = frame;
+ gf_uuid_copy(lookup_local->loc.gfid, trav->d_stat.ia_gfid);
- build_ret = dht_build_child_loc (this, &lookup_local->loc,
- &local->loc, trav->d_name);
- if (build_ret != 0)
- goto err;
+ gf_uuid_unparse(lookup_local->loc.gfid, gfid);
- uuid_copy (lookup_local->loc.gfid, trav->d_stat.ia_gfid);
+ gf_msg_trace(this->name, 0, "looking up %s on subvolume %s, gfid = %s",
+ lookup_local->loc.path, src->name, gfid);
- gf_log (this->name, GF_LOG_TRACE,
- "looking up %s on %s",
- lookup_local->loc.path, src->name);
+ subvol = dht_linkfile_subvol(this, NULL, &trav->d_stat, trav->dict);
+ if (!subvol || (subvol == src)) {
+ /* we need to delete the linkto file if it does not have a
+ * valid subvol or it points to itself.
+ */
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_INVALID_LINKFILE,
+ "Linkfile does not have link subvolume. "
+ "path = %s, gfid = %s",
+ lookup_local->loc.path, gfid);
- LOCK (&frame->lock);
- {
- local->call_cnt++;
- }
- UNLOCK (&frame->lock);
+ gf_msg_debug(this->name, 0, "looking up %s on subvol %s, gfid = %s",
+ lookup_local->loc.path, src->name, gfid);
- STACK_WIND (lookup_frame, dht_rmdir_lookup_cbk,
- src, src->fops->lookup,
- &lookup_local->loc, xattrs);
- ret++;
+ STACK_WIND_COOKIE(lookup_frame, dht_rmdir_lookup_cbk, src, src,
+ src->fops->lookup, &lookup_local->loc, xattrs);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Looking up linkfile target %s on "
+ " subvol %s, gfid = %s",
+ lookup_local->loc.path, subvol->name, gfid);
+
+ STACK_WIND(lookup_frame, dht_rmdir_cached_lookup_cbk, subvol,
+ subvol->fops->lookup, &lookup_local->loc, xattrs);
}
+ ret++;
- if (xattrs)
- dict_unref (xattrs);
+ lookup_frame = NULL;
+ lookup_local = NULL;
+ }
- return ret;
+ if (xattrs)
+ dict_unref(xattrs);
+
+ return ret;
err:
- if (xattrs)
- dict_unref (xattrs);
+ if (xattrs)
+ dict_unref(xattrs);
- DHT_STACK_DESTROY (lookup_frame);
- return 0;
+ if (lookup_frame)
+ DHT_STACK_DESTROY(lookup_frame);
+
+ /* Handle the case where the wound calls have unwound before the
+ * loop processing is done
+ */
+
+ LOCK(&frame->lock);
+ {
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+
+ local->call_cnt -= (count - ret);
+ if (!local->call_cnt)
+ unwind = _gf_true;
+ }
+ UNLOCK(&frame->lock);
+
+ if (!unwind) {
+ return ret;
+ }
+ return 0;
}
+/*
+ * No more entries on this subvol. Proceed to the actual rmdir operation.
+ */
-int
-dht_rmdir_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries,
- dict_t *xdata)
+static int
+dht_rmdir_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
- xlator_t *src = NULL;
- int ret = 0;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src = NULL;
+ int ret = 0;
+ char *path = NULL;
+
+ local = frame->local;
+ prev = cookie;
+ src = prev;
+
+ if (op_ret > 2) {
+ /* dht_rmdir_is_subvol_empty() may free the frame,
+ * copy path for logging.
+ */
+ path = gf_strdup(local->loc.path);
- local = frame->local;
- prev = cookie;
- src = prev->this;
-
- if (op_ret > 2) {
- ret = dht_rmdir_is_subvol_empty (frame, this, entries, src);
-
- switch (ret) {
- case 0: /* non linkfiles exist */
- gf_log (this->name, GF_LOG_TRACE,
- "readdir on %s for %s returned %d entries",
- prev->this->name, local->loc.path, op_ret);
- local->op_ret = -1;
- local->op_errno = ENOTEMPTY;
- break;
- default:
- /* @ret number of linkfiles are getting unlinked */
- gf_log (this->name, GF_LOG_TRACE,
- "readdir on %s for %s found %d linkfiles",
- prev->this->name, local->loc.path, ret);
- break;
- }
+ ret = dht_rmdir_is_subvol_empty(frame, this, entries, src);
+
+ switch (ret) {
+ case 0: /* non linkfiles exist */
+ gf_msg_trace(this->name, 0,
+ "readdir on %s for %s returned %d "
+ "entries",
+ prev->name, local->loc.path, op_ret);
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+ break;
+ default:
+ /* @ret number of linkfiles are getting unlinked */
+ gf_msg_trace(this->name, 0,
+ "readdir on %s for %s found %d "
+ "linkfiles",
+ prev->name, path, ret);
+ break;
}
+ }
- this_call_cnt = dht_frame_return (frame);
+ /* readdirp failed or no linkto files were found on this subvol */
+ if (!ret)
+ dht_rmdir_readdirp_done(frame, this);
- if (is_last_call (this_call_cnt)) {
- dht_rmdir_do (frame, this);
- }
+ GF_FREE(path);
+ return 0;
+}
+static int
+dht_rmdir_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ xlator_t *prev = NULL;
+ int ret = 0;
+ dht_conf_t *conf = this->private;
+ dict_t *dict = NULL;
+ int i = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_local_t *readdirp_local = NULL;
+ call_frame_t *readdirp_frame = NULL;
+ int cnt = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ this_call_cnt = dht_frame_return(frame);
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ gf_msg_debug(this->name, op_errno,
+ "opendir on %s for %s failed, "
+ "gfid = %s,",
+ prev->name, local->loc.path, gfid);
+ if ((op_errno != ENOENT) && (op_errno != ESTALE)) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+ goto err;
+ }
+
+ if (!is_last_call(this_call_cnt))
return 0;
-}
+ if (local->op_ret == -1)
+ goto err;
-int
-dht_rmdir_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
- dict_t *dict = NULL;
- int ret = 0;
+ fd_bind(fd);
- local = frame->local;
- prev = cookie;
+ dict = dict_new();
+ if (!dict) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "opendir on %s for %s failed (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- if (op_errno != ENOENT) {
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
- goto err;
+ ret = dict_set_uint32(dict, conf->link_xattr_name, 256);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: Failed to set dictionary value:key = %s", local->loc.path,
+ conf->link_xattr_name);
+
+ cnt = local->call_cnt = conf->subvolume_cnt;
+
+ /* Create a separate frame per subvol as we might need
+ * to resend readdirp multiple times to get all the
+ * entries.
+ */
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ readdirp_frame = copy_frame(frame);
+
+ if (!readdirp_frame) {
+ cnt--;
+ /* Reduce the local->call_cnt as well */
+ (void)dht_frame_return(frame);
+ continue;
}
- dict = dict_new ();
- if (!dict) {
- local->op_ret = -1;
- local->op_errno = ENOMEM;
- goto err;
+ readdirp_local = dht_local_init(readdirp_frame, &local->loc, local->fd,
+ 0);
+
+ if (!readdirp_local) {
+ DHT_STACK_DESTROY(readdirp_frame);
+ cnt--;
+ /* Reduce the local->call_cnt as well */
+ dht_frame_return(frame);
+ continue;
}
+ readdirp_local->main_frame = frame;
+ readdirp_local->op_ret = 0;
+ readdirp_local->xattr = dict_ref(dict);
+ /* overload this field to save the subvol info */
+ readdirp_local->hashed_subvol = conf->subvolumes[i];
- ret = dict_set_uint32 (dict,
- "trusted.glusterfs.dht.linkto", 256);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set 'trusted.glusterfs.dht.linkto' key",
- local->loc.path);
+ STACK_WIND_COOKIE(readdirp_frame, dht_rmdir_readdirp_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->readdirp,
+ readdirp_local->fd, 4096, 0, readdirp_local->xattr);
+ }
- STACK_WIND (frame, dht_rmdir_readdirp_cbk,
- prev->this, prev->this->fops->readdirp,
- local->fd, 4096, 0, dict);
+ if (dict)
+ dict_unref(dict);
- if (dict)
- dict_unref (dict);
+ /* Could not wind readdirp to any subvol */
- return 0;
+ if (!cnt)
+ goto err;
-err:
- this_call_cnt = dht_frame_return (frame);
+ return 0;
- if (is_last_call (this_call_cnt)) {
- dht_rmdir_do (frame, this);
- }
+err:
+ if (is_last_call(this_call_cnt)) {
+ dht_rmdir_do(frame, this);
+ }
- return 0;
+ return 0;
}
-
int
-dht_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- dict_t *xdata)
+dht_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int op_errno = -1;
- int i = -1;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int ret = -1;
+ dict_t *xattr_req = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_RMDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->call_cnt = conf->subvolume_cnt;
+ local->op_ret = 0;
+ local->fop_succeeded = 0;
+
+ local->flags = flags;
+
+ local->fd = fd_create(local->loc.inode, frame->root->pid);
+ if (!local->fd) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (flags) {
+ return dht_rmdir_do(frame, this);
+ }
+ if (xdata) {
+ xattr_req = dict_ref(xdata);
+ } else {
+ xattr_req = dict_new();
+ }
+ if (xattr_req) {
+ ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
+ /* If parallel-readdir is enabled, this is required
+ * to handle stale linkto files in the directory
+ * being deleted. If this fails, log an error but
+ * do not prevent the operation.
+ */
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "%s: failed to set key %s",
+ loc->path, conf->link_xattr_name);
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "%s: failed to set key %s",
+ loc->path, conf->link_xattr_name);
+ }
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_rmdir_opendir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir, loc, local->fd,
+ xattr_req);
+ }
+
+ if (xattr_req) {
+ dict_unref(xattr_req);
+ }
+ return 0;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (this->private, err);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL);
- conf = this->private;
+ return 0;
+}
- local = dht_local_init (frame, loc, NULL, GF_FOP_RMDIR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+static int
+dht_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
- local->call_cnt = conf->subvolume_cnt;
- local->op_ret = 0;
- local->fop_succeeded = 0;
+{
+ DHT_STACK_UNWIND(entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
- local->flags = flags;
+/* TODO
+ * Sending entrylk to cached subvol can result in stale lock
+ * as described in the bug 1311002.
+ */
+int
+dht_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
- local->fd = fd_create (local->loc.inode, frame->root->pid);
- if (!local->fd) {
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
- op_errno = ENOMEM;
- goto err;
- }
+ local = dht_local_init(frame, loc, NULL, GF_FOP_ENTRYLK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rmdir_opendir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- loc, local->fd, NULL);
- }
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_uuid_unparse(loc->gfid, gfid);
- return 0;
+ gf_msg_debug(this->name, 0,
+ "no cached subvolume for path=%s, "
+ "gfid = %s",
+ loc->path, gfid);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+
+ STACK_WIND(frame, dht_entrylk_cbk, subvol, subvol->fops->entrylk, volume,
+ loc, basename, cmd, type, xdata);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rmdir, frame, -1, op_errno,
- NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(entrylk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-int
-dht_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+static int
+dht_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (entrylk, frame, op_ret, op_errno, xdata);
- return 0;
+ DHT_STACK_UNWIND(fentrylk, frame, op_ret, op_errno, NULL);
+ return 0;
}
-
int
-dht_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_ENTRYLK);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+dht_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+ VALIDATE_OR_GOTO(fd->inode, err);
- local->call_cnt = 1;
+ gf_uuid_unparse(fd->inode->gfid, gfid);
- STACK_WIND (frame, dht_entrylk_cbk,
- subvol, subvol->fops->entrylk,
- volume, loc, basename, cmd, type, xdata);
+ subvol = dht_subvol_get_cached(this, fd->inode);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0,
+ "No cached subvolume for fd=%p,"
+ " gfid = %s",
+ fd, gfid);
+ op_errno = EINVAL;
+ goto err;
+ }
- return 0;
+ STACK_WIND(frame, dht_fentrylk_cbk, subvol, subvol->fops->fentrylk, volume,
+ fd, basename, cmd, type, xdata);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (entrylk, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fentrylk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
+static int32_t
+dht_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
-int
-dht_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
-{
- DHT_STACK_UNWIND (fentrylk, frame, op_ret, op_errno, NULL);
- return 0;
-}
+ local = frame->local;
+ LOCK(&frame->lock);
+ {
+ if (op_ret < 0 && op_errno != ENOTCONN) {
+ local->op_errno = op_errno;
+ goto unlock;
+ }
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK(&frame->lock);
-int
-dht_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ DHT_STACK_UNWIND(ipc, frame, local->op_ret, local->op_errno, NULL);
+ }
+
+out:
+ return 0;
+}
+
+int32_t
+dht_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int i = 0;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ if (op != GF_IPC_TARGET_UPCALL)
+ goto wind_default;
- STACK_WIND (frame, dht_fentrylk_cbk,
- subvol, subvol->fops->fentrylk,
- volume, fd, basename, cmd, type, xdata);
+ VALIDATE_OR_GOTO(this->private, err);
+ conf = this->private;
- return 0;
+ local = dht_local_init(frame, NULL, NULL, GF_FOP_IPC);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ if (xdata) {
+ if (dict_set_int8(xdata, conf->xattr_name, 0) < 0)
+ goto err;
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND(frame, dht_ipc_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->ipc, op, xdata);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fentrylk, frame, -1, op_errno, NULL);
+ DHT_STACK_UNWIND(ipc, frame, -1, op_errno, NULL);
- return 0;
-}
+ return 0;
+wind_default:
+ STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ipc, op, xdata);
+ return 0;
+}
int
-dht_forget (xlator_t *this, inode_t *inode)
+dht_forget(xlator_t *this, inode_t *inode)
{
- uint64_t tmp_layout = 0;
- dht_layout_t *layout = NULL;
+ uint64_t ctx_int = 0;
+ dht_inode_ctx_t *ctx = NULL;
+ dht_layout_t *layout = NULL;
- inode_ctx_del (inode, this, &tmp_layout);
-
- if (!tmp_layout)
- return 0;
-
- layout = (dht_layout_t *)(long)tmp_layout;
- dht_layout_unref (this, layout);
+ inode_ctx_del(inode, this, &ctx_int);
+ if (!ctx_int)
return 0;
-}
+ ctx = (dht_inode_ctx_t *)(long)ctx_int;
-int
-dht_notify (xlator_t *this, int event, void *data, ...)
-{
- xlator_t *subvol = NULL;
- int cnt = -1;
- int i = -1;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int propagate = 0;
-
- int had_heard_from_all = 0;
- int have_heard_from_all = 0;
- struct timeval time = {0,};
- gf_defrag_info_t *defrag = NULL;
- dict_t *dict = NULL;
- gf_defrag_type cmd = 0;
- dict_t *output = NULL;
- va_list ap;
-
-
- conf = this->private;
- if (!conf)
- return ret;
+ layout = ctx->layout;
+ ctx->layout = NULL;
+ dht_layout_unref(this, layout);
+ GF_FREE(ctx);
- /* had all subvolumes reported status once till now? */
- had_heard_from_all = 1;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->last_event[i]) {
- had_heard_from_all = 0;
- }
- }
+ return 0;
+}
- switch (event) {
+int
+dht_notify(xlator_t *this, int event, void *data, ...)
+{
+ xlator_t *subvol = NULL;
+ int cnt = -1;
+ int i = -1;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int propagate = 0;
+
+ int had_heard_from_all = 0;
+ int have_heard_from_all = 0;
+ gf_defrag_info_t *defrag = NULL;
+ dict_t *dict = NULL;
+ gf_defrag_type cmd = 0;
+ dict_t *output = NULL;
+ va_list ap;
+ struct gf_upcall *up_data = NULL;
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ /* had all subvolumes reported status once till now? */
+ had_heard_from_all = 1;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->last_event[i]) {
+ had_heard_from_all = 0;
+ }
+ }
+
+ switch (event) {
case GF_EVENT_CHILD_UP:
- subvol = data;
+ subvol = data;
- conf->gen++;
+ conf->gen++;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- cnt = i;
- break;
- }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
}
+ }
- if (cnt == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "got GF_EVENT_CHILD_UP bad subvolume %s",
- subvol->name);
- break;
- }
+ if (cnt == -1) {
+ gf_msg_debug(this->name, 0,
+ "got GF_EVENT_CHILD_UP bad "
+ "subvolume %s",
+ subvol->name);
+ break;
+ }
- gettimeofday (&time, NULL);
- LOCK (&conf->subvolume_lock);
- {
- conf->subvolume_status[cnt] = 1;
- conf->last_event[cnt] = event;
- conf->subvol_up_time[cnt] = time.tv_sec;
- }
- UNLOCK (&conf->subvolume_lock);
+ LOCK(&conf->subvolume_lock);
+ {
+ conf->subvolume_status[cnt] = 1;
+ conf->last_event[cnt] = event;
+ conf->subvol_up_time[cnt] = gf_time();
+ }
+ UNLOCK(&conf->subvolume_lock);
- /* one of the node came back up, do a stat update */
- dht_get_du_info_for_subvol (this, cnt);
+ /* one of the node came back up, do a stat update */
+ dht_get_du_info_for_subvol(this, cnt);
- break;
+ break;
- case GF_EVENT_CHILD_MODIFIED:
- subvol = data;
+ case GF_EVENT_SOME_DESCENDENT_UP:
+ subvol = data;
+ conf->gen++;
+ propagate = 1;
- conf->gen++;
- propagate = 1;
+ break;
- break;
+ case GF_EVENT_SOME_DESCENDENT_DOWN:
+ subvol = data;
+ propagate = 1;
- case GF_EVENT_CHILD_DOWN:
- subvol = data;
-
- if (conf->assert_no_child_down) {
- gf_log (this->name, GF_LOG_WARNING,
- "Received CHILD_DOWN. Exiting");
- if (conf->defrag) {
- gf_defrag_stop (conf->defrag, NULL);
- } else {
- kill (getpid(), SIGTERM);
- }
- }
+ break;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- cnt = i;
- break;
- }
- }
+ case GF_EVENT_CHILD_DOWN:
+ subvol = data;
- if (cnt == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "got GF_EVENT_CHILD_DOWN bad subvolume %s",
- subvol->name);
- break;
+ if (conf->assert_no_child_down) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_CHILD_DOWN,
+ "Received CHILD_DOWN. Exiting");
+ if (conf->defrag) {
+ gf_defrag_stop(conf, GF_DEFRAG_STATUS_FAILED, NULL);
+ } else {
+ kill(getpid(), SIGTERM);
}
+ }
- LOCK (&conf->subvolume_lock);
- {
- conf->subvolume_status[cnt] = 0;
- conf->last_event[cnt] = event;
- conf->subvol_up_time[cnt] = 0;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
}
- UNLOCK (&conf->subvolume_lock);
+ }
+ if (cnt == -1) {
+ gf_msg_debug(this->name, 0,
+ "got GF_EVENT_CHILD_DOWN bad "
+ "subvolume %s",
+ subvol->name);
break;
+ }
- case GF_EVENT_CHILD_CONNECTING:
- subvol = data;
+ LOCK(&conf->subvolume_lock);
+ {
+ conf->subvolume_status[cnt] = 0;
+ conf->last_event[cnt] = event;
+ conf->subvol_up_time[cnt] = 0;
+ }
+ UNLOCK(&conf->subvolume_lock);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- cnt = i;
- break;
- }
- }
+ for (i = 0; i < conf->subvolume_cnt; i++)
+ if (conf->last_event[i] != event)
+ event = GF_EVENT_SOME_DESCENDENT_DOWN;
+ break;
- if (cnt == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "got GF_EVENT_CHILD_CONNECTING bad subvolume %s",
- subvol->name);
- break;
- }
+ case GF_EVENT_CHILD_CONNECTING:
+ subvol = data;
- LOCK (&conf->subvolume_lock);
- {
- conf->last_event[cnt] = event;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ cnt = i;
+ break;
}
- UNLOCK (&conf->subvolume_lock);
+ }
+ if (cnt == -1) {
+ gf_msg_debug(this->name, 0,
+ "got GF_EVENT_CHILD_CONNECTING"
+ " bad subvolume %s",
+ subvol->name);
break;
- case GF_EVENT_VOLUME_DEFRAG:
- {
- if (!conf->defrag) {
- return ret;
- }
- defrag = conf->defrag;
+ }
- dict = data;
- va_start (ap, data);
- output = va_arg (ap, dict_t*);
+ LOCK(&conf->subvolume_lock);
+ {
+ conf->last_event[cnt] = event;
+ }
+ UNLOCK(&conf->subvolume_lock);
- ret = dict_get_int32 (dict, "rebalance-command",
- (int32_t*)&cmd);
- if (ret)
- return ret;
- LOCK (&defrag->lock);
- {
- if (defrag->is_exiting)
- goto unlock;
- if (cmd == GF_DEFRAG_CMD_STATUS)
- gf_defrag_status_get (defrag, output);
- else if (cmd == GF_DEFRAG_CMD_STOP)
- gf_defrag_stop (defrag, output);
- }
-unlock:
- UNLOCK (&defrag->lock);
- return 0;
- break;
- }
+ break;
+ case GF_EVENT_VOLUME_DEFRAG: {
+ if (!conf->defrag) {
+ return ret;
+ }
+ defrag = conf->defrag;
+
+ dict = data;
+ va_start(ap, data);
+ output = va_arg(ap, dict_t *);
+ ret = dict_get_int32(dict, "rebalance-command", (int32_t *)&cmd);
+ if (ret) {
+ va_end(ap);
+ return ret;
+ }
+ LOCK(&defrag->lock);
+ {
+ if (defrag->is_exiting)
+ goto unlock;
+ if ((cmd == GF_DEFRAG_CMD_STATUS) ||
+ (cmd == GF_DEFRAG_CMD_DETACH_STATUS))
+ gf_defrag_status_get(conf, output);
+ else if (cmd == GF_DEFRAG_CMD_DETACH_START)
+ defrag->cmd = GF_DEFRAG_CMD_DETACH_START;
+ else if (cmd == GF_DEFRAG_CMD_STOP ||
+ cmd == GF_DEFRAG_CMD_DETACH_STOP)
+ gf_defrag_stop(conf, GF_DEFRAG_STATUS_STOPPED, output);
+ }
+ unlock:
+ UNLOCK(&defrag->lock);
+ va_end(ap);
+ return ret;
+ break;
+ }
+ case GF_EVENT_UPCALL:
+ up_data = (struct gf_upcall *)data;
+ if (up_data->event_type != GF_UPCALL_CACHE_INVALIDATION)
+ break;
+ up_ci = (struct gf_upcall_cache_invalidation *)up_data->data;
+
+ /* Since md-cache will be aggressively filtering lookups,
+ * the stale layout issue will be more pronounced. Hence
+ * when a layout xattr is changed by the rebalance process
+ * notify all the md-cache clients to invalidate the existing
+ * stat cache and send the lookup next time*/
+ if (up_ci->dict && dict_get(up_ci->dict, conf->xattr_name))
+ up_ci->flags |= UP_EXPLICIT_LOOKUP;
+
+ /* TODO: Instead of invalidating iatt, update the new
+ * hashed/cached subvolume in dht inode_ctx */
+ if (IS_DHT_LINKFILE_MODE(&up_ci->stat))
+ up_ci->flags |= UP_EXPLICIT_LOOKUP;
+
+ propagate = 1;
+ break;
default:
- propagate = 1;
+ propagate = 1;
+ break;
+ }
+
+ /* have all subvolumes reported status once by now? */
+ have_heard_from_all = 1;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->last_event[i])
+ have_heard_from_all = 0;
+ }
+
+ /* if all subvols have reported status, no need to hide anything
+ or wait for anything else. Just propagate blindly */
+ if (have_heard_from_all) {
+ propagate = 1;
+ }
+
+ if (!had_heard_from_all && have_heard_from_all) {
+ static int run_defrag = 0;
+ /* This is the first event which completes aggregation
+ of events from all subvolumes. If at least one subvol
+ had come up, propagate CHILD_UP, but only this time
+ */
+ event = GF_EVENT_CHILD_DOWN;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->last_event[i] == GF_EVENT_CHILD_UP) {
+ event = GF_EVENT_CHILD_UP;
break;
+ }
+
+ if (conf->last_event[i] == GF_EVENT_CHILD_CONNECTING) {
+ event = GF_EVENT_CHILD_CONNECTING;
+ /* continue to check other events for CHILD_UP */
+ }
}
+ /* Rebalance is started with assert_no_child_down. So we do
+ * not need to handle CHILD_DOWN event here.
+ *
+ * If there is a graph switch, we should not restart the
+ * rebalance daemon. Use 'run_defrag' to indicate if the
+ * thread has already started.
+ */
+ if (conf->defrag && !run_defrag) {
+ run_defrag = 1;
+ ret = gf_thread_create(&conf->defrag->th, NULL, gf_defrag_start,
+ this, "dhtdg");
+ if (ret) {
+ GF_FREE(conf->defrag);
+ conf->defrag = NULL;
+ kill(getpid(), SIGTERM);
+ }
+ }
+ }
+
+ ret = 0;
+ if (propagate)
+ ret = default_notify(this, event, data);
+out:
+ return ret;
+}
+
+int
+dht_inode_ctx_layout_get(inode_t *inode, xlator_t *this, dht_layout_t **layout)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- /* have all subvolumes reported status once by now? */
- have_heard_from_all = 1;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->last_event[i])
- have_heard_from_all = 0;
- }
+ ret = dht_inode_ctx_get(inode, this, &ctx);
- /* if all subvols have reported status, no need to hide anything
- or wait for anything else. Just propagate blindly */
- if (have_heard_from_all) {
- propagate = 1;
- if (conf->defrag) {
- ret = pthread_create (&conf->defrag->th, NULL,
- gf_defrag_start, this);
- if (ret) {
- conf->defrag = NULL;
- GF_FREE (conf->defrag);
- kill (getpid(), SIGTERM);
- }
- }
+ if (!ret && ctx) {
+ if (ctx->layout) {
+ if (layout)
+ *layout = ctx->layout;
+ ret = 0;
+ } else {
+ ret = -1;
}
+ }
+ return ret;
+}
- if (!had_heard_from_all && have_heard_from_all) {
- /* This is the first event which completes aggregation
- of events from all subvolumes. If at least one subvol
- had come up, propagate CHILD_UP, but only this time
- */
- event = GF_EVENT_CHILD_DOWN;
+void
+dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc,
+ dht_layout_t *layout)
+{
+ char string[2048] = {0};
+ char *output_string = NULL;
+ int len = 0;
+ int off = 0;
+ int i = 0;
+ gf_loglevel_t log_level = gf_log_get_loglevel();
+ int ret = 0;
+
+ if (log_level < GF_LOG_INFO)
+ return;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->last_event[i] == GF_EVENT_CHILD_UP) {
- event = GF_EVENT_CHILD_UP;
- break;
- }
+ if (!layout)
+ return;
- if (conf->last_event[i] == GF_EVENT_CHILD_CONNECTING) {
- event = GF_EVENT_CHILD_CONNECTING;
- /* continue to check other events for CHILD_UP */
- }
- }
- }
+ if (!layout->cnt)
+ return;
+
+ if (!loc)
+ return;
+
+ if (!loc->path)
+ return;
+
+ ret = snprintf(string, sizeof(string), "Setting layout of %s with ",
+ loc->path);
+
+ if (ret < 0)
+ return;
+
+ len += ret;
+
+ /* Calculation of total length of the string required to calloc
+ * output_string. Log includes subvolume-name, start-range, end-range
+ * and err value.
+ *
+ * This log will help to debug cases where:
+ * a) Different processes set different layout of a directory.
+ * b) Error captured in lookup, which will be filled in layout->err
+ * (like ENOENT, ESTALE etc)
+ */
+
+ for (i = 0; i < layout->cnt; i++) {
+ ret = snprintf(string, sizeof(string),
+ "[Subvol_name: %s, Err: %d , Start: "
+ "0x%x, Stop: 0x%x, Hash: 0x%x], ",
+ layout->list[i].xlator->name, layout->list[i].err,
+ layout->list[i].start, layout->list[i].stop,
+ layout->list[i].commit_hash);
+
+ if (ret < 0)
+ return;
+
+ len += ret;
+ }
+
+ len++;
+
+ output_string = GF_MALLOC(len + 1, gf_common_mt_char);
+
+ if (!output_string)
+ return;
+
+ ret = snprintf(output_string, len + 1, "Setting layout of %s with ",
+ loc->path);
+
+ if (ret < 0)
+ goto err;
+
+ off += ret;
+
+ for (i = 0; i < layout->cnt; i++) {
+ ret = snprintf(output_string + off, len - off,
+ "[Subvol_name: %s, Err: %d , Start: "
+ "0x%x, Stop: 0x%x, Hash: 0x%x], ",
+ layout->list[i].xlator->name, layout->list[i].err,
+ layout->list[i].start, layout->list[i].stop,
+ layout->list[i].commit_hash);
+
+ if (ret < 0)
+ goto err;
+
+ off += ret;
+ }
+
+ gf_msg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_LOG_FIXED_LAYOUT, "%s",
+ output_string);
+
+err:
+ GF_FREE(output_string);
+}
+
+int32_t
+dht_migration_get_dst_subvol(xlator_t *this, dht_local_t *local)
+{
+ int ret = -1;
+ if (!local)
+ goto out;
+
+ local->rebalance.target_node = dht_subvol_get_hashed(this, &local->loc);
+
+ if (local->rebalance.target_node)
ret = 0;
- if (propagate)
- ret = default_notify (this, event, data);
- return ret;
+out:
+ return ret;
+}
+
+/*
+This function should not be called more then once during a FOP
+handling path. It is valid only for for ops on files
+*/
+int32_t
+dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ if (!local)
+ return -1;
+
+ if (local->rebalance.set) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_REBAL_STRUCT_SET,
+ "local->rebalance already set");
+ }
+
+ if (stbuf)
+ memcpy(&local->rebalance.stbuf, stbuf, sizeof(struct iatt));
+
+ if (prebuf)
+ memcpy(&local->rebalance.prebuf, prebuf, sizeof(struct iatt));
+
+ if (postbuf)
+ memcpy(&local->rebalance.postbuf, postbuf, sizeof(struct iatt));
+
+ if (xdata)
+ local->rebalance.xdata = dict_ref(xdata);
+
+ local->rebalance.set = 1;
+
+ return 0;
+}
+
+int32_t
+dht_release(xlator_t *this, fd_t *fd)
+{
+ return dht_fd_ctx_destroy(this, fd);
+}
+
+static int
+dht_pt_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!op_ret) {
+ dht_layout_set(this, inode, local->layout);
+ }
+
+ DHT_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, NULL);
+
+ return 0;
+}
+
+int32_t
+dht_pt_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ bool free_xdata = false;
+ int ret = 0;
+ int op_errno = 0;
+ int32_t *disk_layout_p = NULL;
+
+ conf = this->private;
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_MKDIR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!layout)
+ goto wind;
+
+ local->layout = layout;
+
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata)
+ goto wind;
+ free_xdata = true;
+ }
+
+ /*Set the xlator or the following will crash*/
+ layout->list[0].xlator = conf->subvolumes[0];
+
+ dht_selfheal_layout_new_directory(frame, loc, layout);
+
+ dht_disk_layout_extract(this, layout, 0, &disk_layout_p);
+
+ ret = dict_set_bin(xdata, conf->xattr_name, disk_layout_p, 4 * 4);
+ if (ret) {
+ gf_msg("dht", GF_LOG_DEBUG, EINVAL, DHT_MSG_DICT_SET_FAILED,
+ "dht layout dict set failed");
+ }
+wind:
+ STACK_WIND(frame, dht_pt_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ if (free_xdata)
+ dict_unref(xdata);
+ return 0;
+
+err:
+ op_errno = local ? local->op_errno : op_errno;
+ DHT_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+static int
+dht_pt_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ dict_del(xattr, conf->xattr_name);
+ dict_del(xattr, conf->mds_xattr_key);
+ dict_del(xattr, conf->commithash_xattr_name);
+
+ if (frame->root->pid >= 0) {
+ GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
+ GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr);
+ }
+
+ DHT_STACK_UNWIND(getxattr, frame, op_ret, op_errno, xattr, xdata);
+ return 0;
+}
+
+int
+dht_pt_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *xdata)
+{
+ STACK_WIND(frame, dht_pt_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, key, xdata);
+ return 0;
+}
+
+static int
+dht_pt_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ dict_del(xattr, conf->xattr_name);
+
+ if (frame->root->pid >= 0) {
+ GF_REMOVE_INTERNAL_XATTR("trusted.glusterfs.quota*", xattr);
+ GF_REMOVE_INTERNAL_XATTR("trusted.pgfid*", xattr);
+ }
+
+ DHT_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, xattr, xdata);
+ return 0;
+}
+
+int
+dht_pt_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ dict_t *xdata)
+{
+ STACK_WIND(frame, dht_pt_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, key, xdata);
+ return 0;
+}
+
+/* The job of this function is to check if all the xlators have updated
+ * error in the layout. */
+int
+dht_dir_layout_error_check(xlator_t *this, inode_t *inode)
+{
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+ layout = dht_layout_get(this, inode);
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == 0) {
+ return 0;
+ }
+ }
+
+ /* Returning the first xlator error as all xlators have errors */
+ return layout->list[0].err;
}
diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h
index d3ccacdb8c6..fe0dc3db34a 100644
--- a/xlators/cluster/dht/src/dht-common.h
+++ b/xlators/cluster/dht/src/dht-common.h
@@ -8,659 +8,1377 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include <regex.h>
#include "dht-mem-types.h"
+#include "dht-messages.h"
+#include <glusterfs/call-stub.h>
#include "libxlator.h"
-#include "syncop.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/refcount.h>
+#include <glusterfs/timer.h>
+#include "protocol-common.h"
+#include <glusterfs/glusterfs-acl.h>
#ifndef _DHT_H
#define _DHT_H
-#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
-#define GF_DHT_LOOKUP_UNHASHED_ON 1
+#define GF_XATTR_FIX_LAYOUT_KEY "distribute.fix.layout"
+#define GF_XATTR_FILE_MIGRATE_KEY "trusted.distribute.migrate-data"
+#define DHT_MDS_STR "mds"
+#define GF_DHT_LOOKUP_UNHASHED_OFF 0
+#define GF_DHT_LOOKUP_UNHASHED_ON 1
#define GF_DHT_LOOKUP_UNHASHED_AUTO 2
-#define DHT_PATHINFO_HEADER "DISTRIBUTE:"
+#define DHT_PATHINFO_HEADER "DISTRIBUTE:"
+#define DHT_FILE_MIGRATE_DOMAIN "dht.file.migrate"
+/* Layout synchronization */
+#define DHT_LAYOUT_HEAL_DOMAIN "dht.layout.heal"
+/* Namespace synchronization */
+#define DHT_ENTRY_SYNC_DOMAIN "dht.entry.sync"
+#define DHT_LAYOUT_HASH_INVALID 1
+#define MAX_REBAL_THREADS sysconf(_SC_NPROCESSORS_ONLN)
-#include <fnmatch.h>
+#define DHT_DIR_STAT_BLOCKS 8
+#define DHT_DIR_STAT_SIZE 4096
-typedef int (*dht_selfheal_dir_cbk_t) (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *xdata);
-typedef int (*dht_defrag_cbk_fn_t) (xlator_t *this, call_frame_t *frame,
- int ret);
+/* Virtual xattr for subvols status */
+#define DHT_SUBVOL_STATUS_KEY "dht.subvol.status"
-struct dht_layout {
- int spread_cnt; /* layout spread count per directory,
- is controlled by 'setxattr()' with
- special key */
- int cnt;
- int preset;
- int gen;
- int type;
- int ref; /* use with dht_conf_t->layout_lock */
- int search_unhashed;
- struct {
- int err; /* 0 = normal
- -1 = dir exists and no xattr
- >0 = dir lookup failed with errno
- */
- uint32_t start;
- uint32_t stop;
- xlator_t *xlator;
- } list[0];
-};
-typedef struct dht_layout dht_layout_t;
+/* Virtual xattrs for debugging */
+#define DHT_DBG_HASHED_SUBVOL_PATTERN "dht.file.hashed-subvol.*"
+#define DHT_DBG_HASHED_SUBVOL_KEY "dht.file.hashed-subvol."
-typedef enum {
- DHT_HASH_TYPE_DM,
- DHT_HASH_TYPE_DM_USER,
-} dht_hashfn_type_t;
+/* Rebalance nodeuuid flags */
+#define REBAL_NODEUUID_MINE 0x01
-/* rebalance related */
-struct dht_rebalance_ {
- xlator_t *from_subvol;
- xlator_t *target_node;
- off_t offset;
- size_t size;
- int32_t flags;
- int count;
- struct iobref *iobref;
- struct iovec *vector;
- struct iatt stbuf;
- dht_defrag_cbk_fn_t target_op_fn;
- dict_t *xdata;
-};
+typedef int (*dht_selfheal_dir_cbk_t)(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+typedef int (*dht_defrag_cbk_fn_t)(xlator_t *this, xlator_t *dst_node,
+ call_frame_t *frame, int ret);
-struct dht_local {
- int call_cnt;
- loc_t loc;
- loc_t loc2;
- int op_ret;
- int op_errno;
- int layout_mismatch;
- /* Use stbuf as the postbuf, when we require both
- * pre and post attrs */
- struct iatt stbuf;
- struct iatt prebuf;
- struct iatt preoldparent;
- struct iatt postoldparent;
- struct iatt preparent;
- struct iatt postparent;
- struct statvfs statvfs;
- fd_t *fd;
- inode_t *inode;
- dict_t *params;
- dict_t *xattr;
- dict_t *xattr_req;
- dht_layout_t *layout;
- size_t size;
- ino_t ia_ino;
- xlator_t *src_hashed, *src_cached;
- xlator_t *dst_hashed, *dst_cached;
- xlator_t *cached_subvol;
- xlator_t *hashed_subvol;
- char need_selfheal;
- int file_count;
- int dir_count;
- call_frame_t *main_frame;
- int fop_succeeded;
- struct {
- fop_mknod_cbk_t linkfile_cbk;
- struct iatt stbuf;
- loc_t loc;
- inode_t *inode;
- dict_t *xattr;
- xlator_t *srcvol;
- } linkfile;
- struct {
- uint32_t hole_cnt;
- uint32_t overlaps_cnt;
- uint32_t missing;
- uint32_t down;
- uint32_t misc;
- dht_selfheal_dir_cbk_t dir_cbk;
- dht_layout_t *layout;
- } selfheal;
- uint32_t uid;
- uint32_t gid;
+typedef int (*dht_refresh_layout_unlock)(call_frame_t *frame, xlator_t *this,
+ int op_ret, int invoke_cbk);
- /* needed by nufa */
- int32_t flags;
- mode_t mode;
- dev_t rdev;
- mode_t umask;
+typedef int (*dht_refresh_layout_done_handle)(call_frame_t *frame);
- /* need for file-info */
- char *xattr_val;
- char *key;
+struct dht_layout {
+ int spread_cnt; /* layout spread count per directory,
+ is controlled by 'setxattr()' with
+ special key */
+ int cnt;
+ int preset;
+ /*
+ * The last *configuration* state for which this directory was known
+ * to be in balance. The corresponding vol_commit_hash changes
+ * whenever bricks are added or removed. This value changes when a
+ * (full) rebalance is complete. If they match, it's safe to assume
+ * that every file is where it should be and there's no need to do
+ * lookups for files elsewhere. If they don't, then we have to do a
+ * global lookup to be sure.
+ */
+ uint32_t commit_hash;
+ /*
+ * The *runtime* state of the volume, changes when connections to
+ * bricks are made or lost.
+ */
+ int gen;
+ int type;
+ gf_atomic_t ref; /* use with dht_conf_t->layout_lock */
+ uint32_t search_unhashed;
+ struct {
+ int err; /* 0 = normal
+ -1 = dir exists and no xattr
+ >0 = dir lookup failed with errno
+ */
+ uint32_t start;
+ uint32_t stop;
+ uint32_t commit_hash;
+ xlator_t *xlator;
+ } list[];
+};
+typedef struct dht_layout dht_layout_t;
+
+struct dht_stat_time {
+ uint32_t atime;
+ uint32_t atime_nsec;
+ uint32_t ctime;
+ uint32_t ctime_nsec;
+ uint32_t mtime;
+ uint32_t mtime_nsec;
+};
+
+typedef struct dht_stat_time dht_stat_time_t;
+
+struct dht_inode_ctx {
+ dht_layout_t *layout;
+ dht_stat_time_t time;
+ xlator_t *lock_subvol;
+ xlator_t *mds_subvol; /* This is only used for directories */
+};
- /* which xattr request? */
- char xsel[256];
+typedef struct dht_inode_ctx dht_inode_ctx_t;
- char *newpath;
+typedef enum {
+ DHT_HASH_TYPE_DM,
+ DHT_HASH_TYPE_DM_USER,
+} dht_hashfn_type_t;
- /* gfid related */
- uuid_t gfid;
+typedef enum {
+ DHT_INODELK,
+ DHT_ENTRYLK,
+} dht_lock_type_t;
- /*Marker Related*/
- struct marker_str marker;
+/* rebalance related */
+struct dht_rebalance_ {
+ xlator_t *from_subvol;
+ xlator_t *target_node;
+ off_t offset;
+ size_t size;
+ int32_t flags;
+ int count;
+ struct iobref *iobref;
+ struct iovec *vector;
+ struct iatt stbuf;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ dht_defrag_cbk_fn_t target_op_fn;
+ dict_t *xdata;
+ dict_t *xattr;
+ dict_t *dict;
+ struct gf_flock flock;
+ int32_t set;
+ int lock_cmd;
+};
- /* flag used to make sure we need to return estale in
- {lookup,revalidate}_cbk */
- char return_estale;
- char need_lookup_everywhere;
+/**
+ * Enum to store decided action based on the qdstatfs (quota-deem-statfs)
+ * events
+ **/
+typedef enum {
+ qdstatfs_action_OFF = 0,
+ qdstatfs_action_REPLACE,
+ qdstatfs_action_NEGLECT,
+ qdstatfs_action_COMPARE,
+} qdstatfs_action_t;
- glusterfs_fop_t fop;
+typedef enum {
+ REACTION_INVALID,
+ FAIL_ON_ANY_ERROR,
+ IGNORE_ENOENT_ESTALE,
+ IGNORE_ENOENT_ESTALE_EIO,
+} dht_reaction_type_t;
+
+struct dht_skip_linkto_unlink {
+ xlator_t *hash_links_to;
+ uuid_t cached_gfid;
+ uuid_t hashed_gfid;
+ int opend_fd_count;
+ gf_boolean_t handle_valid_link;
+};
- struct dht_rebalance_ rebalance;
+typedef struct {
+ xlator_t *xl;
+ loc_t loc; /* contains/points to inode to lock on. */
+ char *domain; /* Only locks within a single domain
+ * contend with each other
+ */
+ char *basename; /* Required for entrylk */
+ gf_boolean_t locked;
+ dht_reaction_type_t do_on_failure;
+ short type; /* read/write lock. */
+ gf_lkowner_t lk_owner;
+} dht_lock_t;
+
+/* The lock structure represents inodelk. */
+typedef struct {
+ fop_inodelk_cbk_t inodelk_cbk;
+ dht_lock_t **locks;
+ int lk_count;
+ dht_reaction_type_t reaction;
+
+ /* whether locking failed on _any_ of the "locks" above */
+ int op_ret;
+ int op_errno;
+} dht_ilock_wrap_t;
+
+/* The lock structure represents entrylk. */
+typedef struct {
+ fop_entrylk_cbk_t entrylk_cbk;
+ dht_lock_t **locks;
+ int lk_count;
+ dht_reaction_type_t reaction;
+
+ /* whether locking failed on _any_ of the "locks" above */
+ int op_ret;
+ int op_errno;
+} dht_elock_wrap_t;
+
+/* The first member of dht_dir_transaction_t should be of type dht_ilock_wrap_t.
+ * Otherwise it can result in subtle memory corruption issues as in most of the
+ * places we use lock[0].layout.my_layout or lock[0].layout.parent_layout and
+ * lock[0].ns.parent_layout (like in dht_local_wipe).
+ */
+typedef union {
+ union {
+ dht_ilock_wrap_t my_layout;
+ dht_ilock_wrap_t parent_layout;
+ } layout;
+ struct dht_namespace {
+ dht_ilock_wrap_t parent_layout;
+ dht_elock_wrap_t directory_ns;
+ fop_entrylk_cbk_t ns_cbk;
+ } ns;
+} dht_dir_transaction_t;
+
+typedef int (*dht_selfheal_layout_t)(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout);
+
+typedef gf_boolean_t (*dht_need_heal_t)(call_frame_t *frame,
+ dht_layout_t **inmem,
+ dht_layout_t **ondisk);
+struct dht_local {
+ loc_t loc;
+ loc_t loc2;
+ int call_cnt;
+ int op_ret;
+ int op_errno;
+ int layout_mismatch;
+ /* Use stbuf as the postbuf, when we require both
+ * pre and post attrs */
+ struct iatt stbuf;
+ struct iatt mds_stbuf;
+ struct iatt prebuf;
+ struct iatt preoldparent;
+ struct iatt postoldparent;
+ struct iatt preparent;
+ struct iatt postparent;
+ struct statvfs statvfs;
+ fd_t *fd;
+ inode_t *inode;
+ dict_t *params;
+ dict_t *xattr;
+ dict_t *mds_xattr;
+ dict_t *xdata; /* dict used to save xdata response by xattr fop */
+ dict_t *xattr_req;
+ dht_layout_t *layout;
+ size_t size;
+ ino_t ia_ino;
+ xlator_t *src_hashed, *src_cached;
+ xlator_t *dst_hashed, *dst_cached;
+ xlator_t *cached_subvol;
+ xlator_t *hashed_subvol;
+ xlator_t *mds_subvol; /* This is use for dir only */
+ int file_count;
+ int dir_count;
+ call_frame_t *main_frame;
+ int fop_succeeded;
+ struct {
+ fop_mknod_cbk_t linkfile_cbk;
+ struct iatt stbuf;
+ loc_t loc;
+ inode_t *inode;
+ dict_t *xattr;
+ xlator_t *srcvol;
+ } linkfile;
+ struct {
+ uint32_t hole_cnt;
+ uint32_t overlaps_cnt;
+ uint32_t down;
+ uint32_t misc;
+ dht_selfheal_dir_cbk_t dir_cbk;
+ dht_selfheal_layout_t healer;
+ dht_need_heal_t should_heal;
+ dht_layout_t *layout, *refreshed_layout;
+ uint32_t missing_cnt;
+ gf_boolean_t force_mkdir;
+ } selfheal;
+
+ dht_refresh_layout_unlock refresh_layout_unlock;
+ dht_refresh_layout_done_handle refresh_layout_done;
+
+ uint32_t uid;
+ uint32_t gid;
+ pid_t pid;
+
+ glusterfs_fop_t fop;
+
+ /* need for file-info */
+ char *xattr_val;
+ char *key;
+
+ /* needed by nufa */
+ int32_t flags;
+ mode_t mode;
+ dev_t rdev;
+ mode_t umask;
+
+ /* which xattr request? */
+ char xsel[256];
+ int32_t alloc_len;
+
+ /* gfid related */
+ uuid_t gfid;
+ uuid_t gfid_req;
+
+ xlator_t *link_subvol;
+
+ struct dht_rebalance_ rebalance;
+ xlator_t *first_up_subvol;
+
+ struct dht_skip_linkto_unlink skip_unlink;
+
+ dht_dir_transaction_t lock[2], *current;
+
+ /* inodelks during filerename for backward compatibility */
+ dht_lock_t **rename_inodelk_backward_compatible;
+
+ call_stub_t *stub;
+ int32_t parent_disk_layout[4];
+
+ /* rename rollback */
+ int *ret_cache;
+
+ loc_t loc2_copy;
+
+ int rename_inodelk_bc_count;
+ /* This is use only for directory operation */
+ int32_t valid;
+ int32_t mds_heal_fresh_lookup;
+ short lock_type;
+ char need_selfheal;
+ char need_xattr_heal;
+ char need_attrheal;
+ /* flag used to make sure we need to return estale in
+ {lookup,revalidate}_cbk */
+ char return_estale;
+ char need_lookup_everywhere;
+ /* fd open check */
+ gf_boolean_t fd_checked;
+ gf_boolean_t linked;
+ gf_boolean_t added_link;
+ gf_boolean_t is_linkfile;
+ gf_boolean_t quota_deem_statfs;
+ gf_boolean_t heal_layout;
+ gf_boolean_t locked;
+ gf_boolean_t dont_create_linkto;
+ gf_boolean_t gfid_missing;
};
typedef struct dht_local dht_local_t;
/* du - disk-usage */
struct dht_du {
- double avail_percent;
- double avail_inodes;
- uint64_t avail_space;
- uint32_t log;
+ double avail_percent;
+ double avail_inodes;
+ uint64_t avail_space;
+ uint32_t log;
+ uint32_t chunks;
+ uint32_t total_blocks;
+ uint32_t avail_blocks;
+ uint32_t frsize; /*fragment size*/
};
typedef struct dht_du dht_du_t;
enum gf_defrag_type {
- GF_DEFRAG_CMD_START = 1,
- GF_DEFRAG_CMD_STOP = 1 + 1,
- GF_DEFRAG_CMD_STATUS = 1 + 2,
- GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
- GF_DEFRAG_CMD_START_FORCE = 1 + 4,
+ GF_DEFRAG_CMD_NONE = 0,
+ GF_DEFRAG_CMD_START = 1,
+ GF_DEFRAG_CMD_STOP = 1 + 1,
+ GF_DEFRAG_CMD_STATUS = 1 + 2,
+ GF_DEFRAG_CMD_START_LAYOUT_FIX = 1 + 3,
+ GF_DEFRAG_CMD_START_FORCE = 1 + 4,
+ GF_DEFRAG_CMD_DETACH_STATUS = 1 + 11,
+ GF_DEFRAG_CMD_DETACH_START = 1 + 13,
+ GF_DEFRAG_CMD_DETACH_COMMIT = 1 + 14,
+ GF_DEFRAG_CMD_DETACH_COMMIT_FORCE = 1 + 15,
+ GF_DEFRAG_CMD_DETACH_STOP = 1 + 16,
+ /* new labels are used so it will help
+ * while removing old labels by easily differentiating.
+ * A few labels are added so that the count remains same
+ * between this enum and the ones on the xdr file.
+ * different values for the same enum cause errors and
+ * confusion.
+ */
};
typedef enum gf_defrag_type gf_defrag_type;
enum gf_defrag_status_t {
- GF_DEFRAG_STATUS_NOT_STARTED,
- GF_DEFRAG_STATUS_STARTED,
- GF_DEFRAG_STATUS_STOPPED,
- GF_DEFRAG_STATUS_COMPLETE,
- GF_DEFRAG_STATUS_FAILED,
+ GF_DEFRAG_STATUS_NOT_STARTED,
+ GF_DEFRAG_STATUS_STARTED,
+ GF_DEFRAG_STATUS_STOPPED,
+ GF_DEFRAG_STATUS_COMPLETE,
+ GF_DEFRAG_STATUS_FAILED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE,
+ GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED,
};
typedef enum gf_defrag_status_t gf_defrag_status_t;
+typedef struct gf_defrag_pattern_list gf_defrag_pattern_list_t;
-struct gf_defrag_info_ {
- uint64_t total_files;
- uint64_t total_data;
- uint64_t num_files_lookedup;
- uint64_t total_failures;
- gf_lock_t lock;
- int cmd;
- pthread_t th;
- gf_defrag_status_t defrag_status;
- struct rpc_clnt *rpc;
- uint32_t connected;
- uint32_t is_exiting;
- pid_t pid;
- inode_t *root_inode;
- uuid_t node_uuid;
- struct timeval start_time;
- gf_boolean_t stats;
+struct gf_defrag_pattern_list {
+ char path_pattern[256];
+ uint64_t size;
+ gf_defrag_pattern_list_t *next;
+};
+struct dht_container {
+ union {
+ struct list_head list;
+ struct {
+ struct _gf_dirent_t *next;
+ struct _gf_dirent_t *prev;
+ };
+ };
+ gf_dirent_t *df_entry;
+ xlator_t *this;
+ loc_t *parent_loc;
+ dict_t *migrate_data;
+ int local_subvol_index;
+};
+
+typedef struct nodeuuid_info {
+ char info; /* Set to 1 is this is my node's uuid*/
+ uuid_t uuid; /* Store the nodeuuid as well for debugging*/
+} nodeuuid_info_t;
+
+typedef struct subvol_nodeuuids_info {
+ nodeuuid_info_t *elements;
+ int count;
+} subvol_nodeuuids_info_t;
+
+struct gf_defrag_info_ {
+ uint64_t total_files;
+ uint64_t total_data;
+ uint64_t num_files_lookedup;
+ uint64_t total_failures;
+ uint64_t skipped;
+ uint64_t num_dirs_processed;
+ uint64_t size_processed;
+ gf_lock_t lock;
+ pthread_t th;
+ struct rpc_clnt *rpc;
+ uint32_t connected;
+ uint32_t is_exiting;
+ pid_t pid;
+ int cmd;
+ inode_t *root_inode;
+ uuid_t node_uuid;
+ time_t start_time;
+ uint32_t new_commit_hash;
+ gf_defrag_status_t defrag_status;
+ gf_defrag_pattern_list_t *defrag_pattern;
+
+ pthread_cond_t parallel_migration_cond;
+ pthread_mutex_t dfq_mutex;
+ pthread_cond_t rebalance_crawler_alarm;
+ int32_t q_entry_count;
+ int32_t global_error;
+ struct dht_container *queue;
+ int32_t crawl_done;
+ int32_t abort;
+ int32_t wakeup_crawler;
+
+ /*Throttle params*/
+ /*stands for reconfigured thread count*/
+ int32_t recon_thread_count;
+ pthread_cond_t df_wakeup_thread;
+
+ /* backpointer to make it easier to write functions for rebalance */
+ xlator_t *this;
+
+ pthread_cond_t fc_wakeup_cond;
+ pthread_mutex_t fc_mutex;
+
+ /*stands for current running thread count*/
+ int32_t current_thread_count;
+
+ gf_boolean_t stats;
+ /* lock migration flag */
+ gf_boolean_t lock_migration_enabled;
};
typedef struct gf_defrag_info_ gf_defrag_info_t;
+struct dht_methods_s {
+ int32_t (*migration_get_dst_subvol)(xlator_t *this, dht_local_t *local);
+ int32_t (*migration_other)(xlator_t *this, gf_defrag_info_t *defrag);
+ xlator_t *(*layout_search)(xlator_t *this, dht_layout_t *layout,
+ const char *name);
+};
+
+typedef struct dht_methods_s dht_methods_t;
+
struct dht_conf {
- gf_lock_t subvolume_lock;
- int subvolume_cnt;
- xlator_t **subvolumes;
- char *subvolume_status;
- int *last_event;
- dht_layout_t **file_layouts;
- dht_layout_t **dir_layouts;
- gf_boolean_t search_unhashed;
- int gen;
- dht_du_t *du_stats;
- uint64_t min_free_disk;
- uint32_t min_free_inodes;
- char disk_unit;
- int32_t refresh_interval;
- gf_boolean_t unhashed_sticky_bit;
- struct timeval last_stat_fetch;
- gf_lock_t layout_lock;
- void *private; /* Can be used by wrapper xlators over
- dht */
- gf_boolean_t use_readdirp;
- char vol_uuid[UUID_SIZE + 1];
- gf_boolean_t assert_no_child_down;
- time_t *subvol_up_time;
-
- /* This is the count used as the distribute layout for a directory */
- /* Will be a global flag to control the layout spread count */
- uint32_t dir_spread_cnt;
-
- /* to keep track of nodes which are decomissioned */
- xlator_t **decommissioned_bricks;
- int decommission_in_progress;
-
- /* defrag related */
- gf_defrag_info_t *defrag;
+ xlator_t **subvolumes;
+ char *subvolume_status;
+ int *last_event;
+ dht_layout_t **file_layouts;
+ dht_layout_t **dir_layouts;
+ unsigned int search_unhashed;
+ int gen;
+ dht_du_t *du_stats;
+ double min_free_disk;
+ double min_free_inodes;
+ int subvolume_cnt;
+ int32_t refresh_interval;
+ gf_lock_t subvolume_lock;
+ time_t last_stat_fetch;
+ gf_lock_t layout_lock;
+ dict_t *leaf_to_subvol;
+ void *private; /* Can be used by wrapper xlators over
+ dht */
+ time_t *subvol_up_time;
+
+ /* to keep track of nodes which are decommissioned */
+ xlator_t **decommissioned_bricks;
+ int decommission_in_progress;
+ int decommission_subvols_cnt;
+
+ /* defrag related */
+ gf_defrag_info_t *defrag;
+
+ /* Support regex-based name reinterpretation. */
+ regex_t rsync_regex;
+ regex_t extra_regex;
+
+ /* Support variable xattr names. */
+ char *xattr_name;
+ char *mds_xattr_key;
+ char *link_xattr_name;
+ char *commithash_xattr_name;
+ char *wild_xattr_name;
+
+ dht_methods_t methods;
+
+ struct mem_pool *lock_pool;
+
+ /*local subvol storage for rebalance*/
+ xlator_t **local_subvols;
+ subvol_nodeuuids_info_t *local_nodeuuids;
+ int32_t local_subvols_cnt;
+
+ int dthrottle;
+
+ /* Hard link handle requirement for migration triggered from client*/
+ synclock_t link_lock;
+
+ /* lock migration */
+ gf_lock_t lock;
+
+ /* This is the count used as the distribute layout for a directory */
+ /* Will be a global flag to control the layout spread count */
+ uint32_t dir_spread_cnt;
+
+ /*
+ * "Commit hash" for this volume topology. Changed whenever bricks
+ * are added or removed.
+ */
+ uint32_t vol_commit_hash;
+
+ char vol_uuid[UUID_SIZE + 1];
+
+ char disk_unit;
+
+ gf_boolean_t lock_migration_enabled;
+
+ gf_boolean_t vch_forced;
+
+ gf_boolean_t use_fallocate;
+
+ gf_boolean_t force_migration;
+
+ gf_boolean_t lookup_optimize;
+
+ gf_boolean_t unhashed_sticky_bit;
+
+ gf_boolean_t assert_no_child_down;
+
+ gf_boolean_t use_readdirp;
+
+ /* Request to filter directory entries in readdir request */
+ gf_boolean_t readdir_optimize;
+
+ gf_boolean_t rsync_regex_valid;
+
+ gf_boolean_t extra_regex_valid;
+
+ /* Support size-weighted rebalancing (heterogeneous bricks). */
+ gf_boolean_t do_weighting;
+
+ gf_boolean_t randomize_by_gfid;
};
typedef struct dht_conf dht_conf_t;
+struct dht_dfoffset_ctx {
+ xlator_t *this;
+ off_t offset;
+ int32_t readdir_done;
+};
+typedef struct dht_dfoffset_ctx dht_dfoffset_ctx_t;
struct dht_disk_layout {
- uint32_t cnt;
- uint32_t type;
- struct {
- uint32_t start;
- uint32_t stop;
- } list[1];
+ uint32_t cnt;
+ uint32_t type;
+ struct {
+ uint32_t start;
+ uint32_t stop;
+ } list[1];
};
typedef struct dht_disk_layout dht_disk_layout_t;
typedef enum {
- GF_DHT_MIGRATE_DATA,
- GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS,
- GF_DHT_MIGRATE_HARDLINK,
- GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS
+ GF_DHT_MIGRATE_DATA,
+ GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS,
+ GF_DHT_MIGRATE_HARDLINK,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS
} gf_dht_migrate_data_type_t;
+typedef enum {
+ GF_DHT_EQUAL_DISTRIBUTION,
+ GF_DHT_WEIGHTED_DISTRIBUTION
+} dht_distribution_type_t;
+
+struct dir_dfmeta {
+ gf_dirent_t *equeue;
+ dht_dfoffset_ctx_t *offset_var;
+ struct list_head **head;
+ struct list_head **iterator;
+ int *fetch_entries;
+ /* fds corresponding to local subvols only */
+ fd_t **lfd;
+};
+
+typedef struct dht_migrate_info {
+ xlator_t *src_subvol;
+ xlator_t *dst_subvol;
+ GF_REF_DECL;
+} dht_migrate_info_t;
+
+typedef struct dht_fd_ctx {
+ uint64_t opened_on_dst;
+ GF_REF_DECL;
+} dht_fd_ctx_t;
+
#define ENTRY_MISSING(op_ret, op_errno) (op_ret == -1 && op_errno == ENOENT)
-#define is_revalidate(loc) (inode_ctx_get (loc->inode, this, NULL) == 0)
+#define is_revalidate(loc) \
+ (dht_inode_ctx_layout_get((loc)->inode, this, NULL) == 0)
#define is_last_call(cnt) (cnt == 0)
#define DHT_MIGRATION_IN_PROGRESS 1
-#define DHT_MIGRATION_COMPLETED 2
+#define DHT_MIGRATION_COMPLETED 2
-#define DHT_LINKFILE_KEY "trusted.glusterfs.dht.linkto"
-#define DHT_LINKFILE_MODE (S_ISVTX)
+#define check_is_linkfile(i, s, x, n) \
+ (IS_DHT_LINKFILE_MODE(s) && dict_get(x, n))
-#define check_is_linkfile(i,s,x) ( \
- ((st_mode_from_ia ((s)->ia_prot, (s)->ia_type) & ~S_IFMT) \
- == DHT_LINKFILE_MODE) && \
- dict_get (x, DHT_LINKFILE_KEY))
+#define IS_DHT_MIGRATION_PHASE2(buf) \
+ (IA_ISREG((buf)->ia_type) && \
+ ((st_mode_from_ia((buf)->ia_prot, (buf)->ia_type) & ~S_IFMT) == \
+ DHT_LINKFILE_MODE))
-#define IS_DHT_MIGRATION_PHASE2(buf) ( \
- IA_ISREG ((buf)->ia_type) && \
- ((st_mode_from_ia ((buf)->ia_prot, (buf)->ia_type) & \
- ~S_IFMT) == DHT_LINKFILE_MODE))
+#define IS_DHT_MIGRATION_PHASE1(buf) \
+ (IA_ISREG((buf)->ia_type) && ((buf)->ia_prot.sticky == 1) && \
+ ((buf)->ia_prot.sgid == 1))
-#define IS_DHT_MIGRATION_PHASE1(buf) ( \
- IA_ISREG ((buf)->ia_type) && \
- ((buf)->ia_prot.sticky == 1) && \
- ((buf)->ia_prot.sgid == 1))
+#define DHT_STRIP_PHASE1_FLAGS(buf) \
+ do { \
+ if ((buf) && IS_DHT_MIGRATION_PHASE1(buf)) { \
+ (buf)->ia_prot.sticky = 0; \
+ (buf)->ia_prot.sgid = 0; \
+ } \
+ } while (0)
-#define DHT_STRIP_PHASE1_FLAGS(buf) do { \
- if ((buf) && IS_DHT_MIGRATION_PHASE1(buf)) { \
- (buf)->ia_prot.sticky = 0; \
- (buf)->ia_prot.sgid = 0; \
- } \
- } while (0)
+#define dht_inode_missing(op_errno) (op_errno == ENOENT || op_errno == ESTALE)
-#define check_is_dir(i,s,x) (IA_ISDIR(s->ia_type))
+#define check_is_dir(i, s, x) (IA_ISDIR(s->ia_type))
#define layout_is_sane(layout) ((layout) && (layout->cnt > 0))
-#define DHT_STACK_UNWIND(fop, frame, params ...) do { \
- dht_local_t *__local = NULL; \
- xlator_t *__xl = NULL; \
- if (frame) { \
- __xl = frame->this; \
- __local = frame->local; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT (fop, frame, params); \
- dht_local_wipe (__xl, __local); \
- } while (0)
-
-#define DHT_STACK_DESTROY(frame) do { \
- dht_local_t *__local = NULL; \
- xlator_t *__xl = NULL; \
- __xl = frame->this; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- dht_local_wipe (__xl, __local); \
- } while (0)
-
-dht_layout_t *dht_layout_new (xlator_t *this, int cnt);
-dht_layout_t *dht_layout_get (xlator_t *this, inode_t *inode);
-dht_layout_t *dht_layout_for_subvol (xlator_t *this, xlator_t *subvol);
-xlator_t *dht_layout_search (xlator_t *this, dht_layout_t *layout,
- const char *name);
-int dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout);
-int dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
- uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p,
- uint32_t *misc_p);
-int dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout,
- xlator_t *subvol, loc_t *loc, dict_t *xattr);
-
-xlator_t *dht_linkfile_subvol (xlator_t *this, inode_t *inode,
- struct iatt *buf, dict_t *xattr);
-int dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
- xlator_t *subvol, loc_t *loc);
-
-int dht_layouts_init (xlator_t *this, dht_conf_t *conf);
-int dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- int op_ret, int op_errno, dict_t *xattr);
-
-int dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t **disk_layout_p);
-int dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
- int pos, void *disk_layout_raw, int disk_layout_len);
-
-
-int dht_frame_return (call_frame_t *frame);
-
-int dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y);
-int dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol,
- uint64_t *x);
-
-void dht_local_wipe (xlator_t *this, dht_local_t *local);
-dht_local_t *dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd,
- glusterfs_fop_t fop);
-int dht_iatt_merge (xlator_t *this, struct iatt *to, struct iatt *from,
- xlator_t *subvol);
-
-xlator_t *dht_subvol_get_hashed (xlator_t *this, loc_t *loc);
-xlator_t *dht_subvol_get_cached (xlator_t *this, inode_t *inode);
-xlator_t *dht_subvol_next (xlator_t *this, xlator_t *prev);
-int dht_subvol_cnt (xlator_t *this, xlator_t *subvol);
-
-int dht_hash_compute (int type, const char *name, uint32_t *hash_p);
-
-int dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
- xlator_t *tovol, xlator_t *fromvol, loc_t *loc);
-int dht_lookup_directory (call_frame_t *frame, xlator_t *this, loc_t *loc);
-int dht_lookup_everywhere (call_frame_t *frame, xlator_t *this, loc_t *loc);
-int
-dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
-int
-dht_selfheal_new_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- dht_layout_t *layout);
-int
-dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
- loc_t *loc, dht_layout_t *layout);
-int
-dht_layout_sort_volname (dht_layout_t *layout);
-
-int dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc);
-
-gf_boolean_t dht_is_subvol_filled (xlator_t *this, xlator_t *subvol);
-xlator_t *dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol);
-int dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx);
-
-int dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode);
-int dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout);;
-void dht_layout_unref (xlator_t *this, dht_layout_t *layout);
-dht_layout_t *dht_layout_ref (xlator_t *this, dht_layout_t *layout);
-xlator_t *dht_first_up_subvol (xlator_t *this);
-xlator_t *dht_last_up_subvol (xlator_t *this);
-
-int dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name);
-
-int dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
- xlator_t **subvol);
-
-int dht_rename_cleanup (call_frame_t *frame);
-int dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata);
-
-int dht_fix_directory_layout (call_frame_t *frame,
- dht_selfheal_dir_cbk_t dir_cbk,
- dht_layout_t *layout);
-
-int dht_init_subvolumes (xlator_t *this, dht_conf_t *conf);
+#define we_are_not_migrating(x) ((x) == 1)
+
+#define DHT_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ dht_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ dht_local_wipe(__xl, __local); \
+ } while (0)
+
+#define DHT_STACK_DESTROY(frame) \
+ do { \
+ dht_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY(frame->root); \
+ dht_local_wipe(__xl, __local); \
+ } while (0)
+
+#define DHT_UPDATE_TIME(ctx_sec, ctx_nsec, new_sec, new_nsec, post) \
+ do { \
+ if (ctx_sec == new_sec) \
+ new_nsec = max(new_nsec, ctx_nsec); \
+ else if (ctx_sec > new_sec) { \
+ new_sec = ctx_sec; \
+ new_nsec = ctx_nsec; \
+ } \
+ if (post) { \
+ ctx_sec = new_sec; \
+ ctx_nsec = new_nsec; \
+ } \
+ } while (0)
+
+#define is_greater_time(a, an, b, bn) \
+ (((a) < (b)) || (((a) == (b)) && ((an) < (bn))))
+
+#define DHT_MARK_FOP_INTERNAL(xattr) \
+ do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new(); \
+ if (!xattr) \
+ break; \
+ } \
+ tmp = dict_set_str(xattr, GLUSTERFS_INTERNAL_FOP_KEY, "yes"); \
+ if (tmp) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \
+ "Failed to set dictionary value: key = %s," \
+ " path = %s", \
+ GLUSTERFS_INTERNAL_FOP_KEY, local->loc.path); \
+ } \
+ } while (0)
+
+dht_layout_t *
+dht_layout_new(xlator_t *this, int cnt);
+dht_layout_t *
+dht_layout_get(xlator_t *this, inode_t *inode);
+dht_layout_t *
+dht_layout_for_subvol(xlator_t *this, xlator_t *subvol);
+xlator_t *
+dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name);
+int32_t
+dht_migration_get_dst_subvol(xlator_t *this, dht_local_t *local);
+int32_t
+dht_migration_needed(xlator_t *this);
+int
+dht_layout_normalize(xlator_t *this, loc_t *loc, dht_layout_t *layout);
+void
+dht_layout_anomalies(xlator_t *this, loc_t *loc, dht_layout_t *layout,
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p,
+ uint32_t *no_space_p);
+int
+dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ loc_t *loc, dict_t *xattr);
+xlator_t *
+dht_linkfile_subvol(xlator_t *this, inode_t *inode, struct iatt *buf,
+ dict_t *xattr);
+int
+dht_linkfile_unlink(call_frame_t *frame, xlator_t *this, xlator_t *subvol,
+ loc_t *loc);
+
+int
+dht_layouts_init(xlator_t *this, dht_conf_t *conf);
+int
+dht_layout_merge(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ int op_ret, int op_errno, dict_t *xattr);
+
+int
+dht_disk_layout_extract(xlator_t *this, dht_layout_t *layout, int pos,
+ int32_t **disk_layout_p);
+int
+dht_disk_layout_extract_for_subvol(xlator_t *this, dht_layout_t *layout,
+ xlator_t *subvol, int32_t **disk_layout_p);
+
+int
+dht_frame_return(call_frame_t *frame);
+
+int
+dht_deitransform(xlator_t *this, uint64_t y, xlator_t **subvol);
+
+void
+dht_local_wipe(xlator_t *this, dht_local_t *local);
+dht_local_t *
+dht_local_init(call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop);
+int
+dht_iatt_merge(xlator_t *this, struct iatt *to, struct iatt *from);
+
+xlator_t *
+dht_subvol_get_hashed(xlator_t *this, loc_t *loc);
+xlator_t *
+dht_subvol_get_cached(xlator_t *this, inode_t *inode);
+xlator_t *
+dht_subvol_next(xlator_t *this, xlator_t *prev);
+xlator_t *
+dht_subvol_next_available(xlator_t *this, xlator_t *prev);
+int
+dht_subvol_cnt(xlator_t *this, xlator_t *subvol);
+
+int
+dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p);
+
+int
+dht_linkfile_create(call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *this, xlator_t *tovol, xlator_t *fromvol,
+ loc_t *loc);
+int
+dht_lookup_everywhere(call_frame_t *frame, xlator_t *this, loc_t *loc);
+int
+dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
+int
+dht_selfheal_new_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ dht_layout_t *layout);
+int
+dht_selfheal_restore(call_frame_t *frame, dht_selfheal_dir_cbk_t cbk,
+ loc_t *loc, dht_layout_t *layout);
+void
+dht_layout_sort_volname(dht_layout_t *layout);
+
+int
+dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc);
+
+gf_boolean_t
+dht_is_subvol_filled(xlator_t *this, xlator_t *subvol);
+xlator_t *
+dht_free_disk_available_subvol(xlator_t *this, xlator_t *subvol,
+ dht_local_t *layout);
+int
+dht_get_du_info_for_subvol(xlator_t *this, int subvol_idx);
+
+int
+dht_layout_preset(xlator_t *this, xlator_t *subvol, inode_t *inode);
+int
+dht_layout_set(xlator_t *this, inode_t *inode, dht_layout_t *layout);
+;
+void
+dht_layout_unref(xlator_t *this, dht_layout_t *layout);
+dht_layout_t *
+dht_layout_ref(xlator_t *this, dht_layout_t *layout);
+int
+dht_layout_index_for_subvol(dht_layout_t *layout, xlator_t *subvol);
+xlator_t *
+dht_first_up_subvol(xlator_t *this);
+xlator_t *
+dht_last_up_subvol(xlator_t *this);
+
+int
+dht_build_child_loc(xlator_t *this, loc_t *child, loc_t *parent, char *name);
+
+int
+dht_filter_loc_subvol_key(xlator_t *this, loc_t *loc, loc_t *new_loc,
+ xlator_t **subvol);
+
+int
+dht_rename_cleanup(call_frame_t *frame);
+int
+dht_rename_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
+
+int
+dht_update_commit_hash_for_layout(call_frame_t *frame);
+int
+dht_fix_directory_layout(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout);
+
+int
+dht_init_subvolumes(xlator_t *this, dht_conf_t *conf);
/* migration/rebalance */
-int dht_start_rebalance_task (xlator_t *this, call_frame_t *frame);
+int
+dht_start_rebalance_task(xlator_t *this, call_frame_t *frame);
-int dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame);
-int dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame);
+int
+dht_rebalance_in_progress_check(xlator_t *this, call_frame_t *frame);
+int
+dht_rebalance_complete_check(xlator_t *this, call_frame_t *frame);
+int
+dht_init_local_subvolumes(xlator_t *this, dht_conf_t *conf);
/* FOPS */
-int32_t dht_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req);
-
-int32_t dht_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-int32_t dht_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-int32_t dht_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset, dict_t *xdata);
-
-int32_t dht_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset, dict_t *xdata);
-
-int32_t dht_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask, dict_t *xdata);
-
-int32_t dht_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size, dict_t *xdata);
-
-int32_t dht_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
-
-int32_t dht_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata);
-
-int32_t dht_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata);
-
-int32_t dht_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, dict_t *xdata);
-
-int32_t dht_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, mode_t umask,
- dict_t *xdata);
-
-int32_t dht_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-int32_t dht_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata);
-
-int32_t dht_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *params);
-
-int32_t dht_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd, dict_t *xdata);
-
-int32_t dht_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset, uint32_t flags, dict_t *xdata);
-
-int32_t dht_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset,
- uint32_t flags,
- struct iobref *iobref, dict_t *xdata);
-
-int32_t dht_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata);
-
-int32_t dht_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-int32_t dht_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd, dict_t *xdata);
-
-int32_t dht_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t datasync, dict_t *xdata);
-
-int32_t dht_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata);
-
-int32_t dht_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-int32_t dht_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-
-int32_t dht_fsetxattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- dict_t *dict,
- int32_t flags, dict_t *xdata);
-
-int32_t dht_fgetxattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-int32_t dht_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata);
-int32_t dht_fremovexattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata);
-
-int32_t dht_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t dht_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t dht_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *flock, dict_t *xdata);
-
-int32_t dht_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-int32_t dht_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
-
-int32_t dht_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size, off_t off, dict_t *xdata);
-
-int32_t dht_readdirp (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size, off_t off, dict_t *dict);
-
-int32_t dht_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata);
-
-int32_t dht_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata);
-
-int32_t dht_forget (xlator_t *this, inode_t *inode);
-int32_t dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata);
-int32_t dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid, dict_t *xdata);
-
-int32_t dht_notify (xlator_t *this, int32_t event, void *data, ...);
+int32_t
+dht_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req);
+
+int32_t
+dht_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int32_t
+dht_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
+
+int32_t
+dht_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata);
+
+int32_t
+dht_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata);
+
+int32_t
+dht_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata);
+
+int32_t
+dht_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata);
+
+int32_t
+dht_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata);
+
+int32_t
+dht_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
+
+int32_t
+dht_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int32_t
+dht_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata);
+
+int32_t
+dht_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata);
+
+int32_t
+dht_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int32_t
+dht_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int32_t
+dht_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params);
+
+int32_t
+dht_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+int32_t
+dht_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int32_t
+dht_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata);
+
+int32_t
+dht_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata);
+
+int32_t
+dht_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
+
+int32_t
+dht_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata);
+
+int32_t
+dht_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata);
+
+int32_t
+dht_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int32_t
+dht_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t
+dht_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata);
+
+int32_t
+dht_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata);
+
+int32_t
+dht_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata);
+
+int32_t
+dht_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
+int32_t
+dht_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+int32_t
+dht_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+dht_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata);
+
+int32_t
+dht_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+dht_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata);
+
+int32_t
+dht_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata);
+
+int32_t
+dht_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata);
+
+int32_t
+dht_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+int32_t
+dht_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict);
+
+int32_t
+dht_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+dht_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+
+int32_t
+dht_forget(xlator_t *this, inode_t *inode);
+int32_t
+dht_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
+int32_t
+dht_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
+int32_t
+dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata);
+int32_t
+dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata);
+int32_t
+dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata);
+int32_t
+dht_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata);
+
+int
+dht_set_subvol_range(xlator_t *this);
+int32_t
+dht_init(xlator_t *this);
+void
+dht_fini(xlator_t *this);
+int
+dht_reconfigure(xlator_t *this, dict_t *options);
+int32_t
+dht_notify(xlator_t *this, int32_t event, void *data, ...);
/* definitions for nufa/switch */
-int dht_revalidate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent);
-int dht_lookup_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+int
+dht_revalidate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent);
+int
+dht_lookup_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent);
+int
+dht_lookup_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, inode_t *inode,
struct iatt *stbuf, dict_t *xattr,
struct iatt *postparent);
-int dht_lookup_linkfile_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent);
-int dht_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent);
-int dht_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- fd_t *fd, inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata);
-int dht_newfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata);
+int
+dht_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent);
+int
+dht_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
+int
+dht_newfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata);
+
+int
+dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int
+dht_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xattr, dict_t *xdata);
+
+int
+dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+int
+gf_defrag_status_get(dht_conf_t *conf, dict_t *dict);
+
+int
+gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output);
+
+void *
+gf_defrag_start(void *this);
+
+int32_t
+gf_defrag_handle_hardlink(xlator_t *this, loc_t *loc, int *fop_errno);
+int
+dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ int flag, int *fop_errno);
+int
+dht_inode_ctx_layout_get(inode_t *inode, xlator_t *this,
+ dht_layout_t **layout_int);
+int
+dht_inode_ctx_layout_set(inode_t *inode, xlator_t *this,
+ dht_layout_t *layout_int);
+int
+dht_inode_ctx_time_update(inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t update_ctx);
+void
+dht_inode_ctx_time_set(inode_t *inode, xlator_t *this, struct iatt *stat);
+
+int
+dht_inode_ctx_get(inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx);
+int
+dht_inode_ctx_set(inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx);
+int
+dht_dir_attr_heal(void *data);
+int
+dht_dir_attr_heal_done(int ret, call_frame_t *sync_frame, void *data);
+xlator_t *
+dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
+ xlator_t *ignore, dht_layout_t *layout,
+ uint64_t filesize);
+xlator_t *
+dht_subvol_maxspace_nonzeroinode(xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout);
+int
+dht_dir_has_layout(dict_t *xattr, char *name);
+int
+dht_linkfile_attr_heal(call_frame_t *frame, xlator_t *this);
+
+int32_t
+dht_priv_dump(xlator_t *this);
+int32_t
+dht_inodectx_dump(xlator_t *this, inode_t *inode);
+
+gf_boolean_t
+dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator);
+
+int
+dht_inode_ctx_get_mig_info(xlator_t *this, inode_t *inode,
+ xlator_t **src_subvol, xlator_t **dst_subvol);
+gf_boolean_t
+dht_mig_info_is_invalid(xlator_t *current, xlator_t *src_subvol,
+ xlator_t *dst_subvol);
+
+int
+dht_subvol_status(dht_conf_t *conf, xlator_t *subvol);
+
+void
+dht_log_new_layout_for_dir_selfheal(xlator_t *this, loc_t *loc,
+ dht_layout_t *layout);
int
-gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict);
+dht_layout_sort(dht_layout_t *layout);
int
-gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output);
+dht_heal_full_path(void *data);
-void*
-gf_defrag_start (void *this);
+int
+dht_heal_full_path_done(int op_ret, call_frame_t *frame, void *data);
+
+int
+dht_layout_missing_dirs(dht_layout_t *layout);
+
+int
+dht_refresh_layout(call_frame_t *frame);
+
+int
+dht_build_parent_loc(xlator_t *this, loc_t *parent, loc_t *child,
+ int32_t *op_errno);
+
+int32_t
+dht_set_local_rebalance(xlator_t *this, dht_local_t *local, struct iatt *stbuf,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+void
+dht_build_root_loc(inode_t *inode, loc_t *loc);
+
+gf_boolean_t
+dht_fd_open_on_dst(xlator_t *this, fd_t *fd, xlator_t *dst);
+
+int32_t
+dht_fd_ctx_destroy(xlator_t *this, fd_t *fd);
+
+int32_t
+dht_release(xlator_t *this, fd_t *fd);
+
+int32_t
+dht_set_fixed_dir_stat(struct iatt *stat);
+
+xlator_t *
+dht_get_lock_subvolume(xlator_t *this, struct gf_flock *lock,
+ dht_local_t *local);
+
+int
+dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret);
+
+int
+dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *subvol);
+
+int
+dht_check_and_open_fd_on_subvol(xlator_t *this, call_frame_t *frame);
+
+/* FD fop callbacks */
+
+int
+dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int
+dht_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata);
+
+int
+dht_file_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+
+int
+dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int
+dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int
+dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int
+dht_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int
+dht_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+int
+dht_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iovec *vector, int count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata);
+
+int
+dht_file_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *stbuf, dict_t *xdata);
+
+int
+dht_file_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata);
+
+int
+dht_file_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata);
+
+/* All custom xattr heal functions */
+int
+dht_dir_heal_xattrs(void *data);
+
+int
+dht_dir_heal_xattrs_done(int ret, call_frame_t *sync_frame, void *data);
+
+int32_t
+dht_dict_set_array(dict_t *dict, char *key, int32_t value[], int32_t size);
+
+int
+dht_set_user_xattr(dict_t *dict, char *k, data_t *v, void *data);
+
+void
+dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
+ dict_t *src, int *uret, int *uflag);
+
+int
+dht_dir_xattr_heal(xlator_t *this, dht_local_t *local, int *op_errno);
+
+int
+dht_common_mark_mdsxattr(call_frame_t *frame, int *errst, int flag);
+
+int
+dht_inode_ctx_mdsvol_get(inode_t *inode, xlator_t *this, xlator_t **mdsvol);
+
+int
+dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dht_layout_t *layout);
+
+/* Abstract out the DHT-IATT-IN-DICT */
+
+void
+dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new_layout);
+
+int
+dht_pt_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *key,
+ dict_t *xdata);
+
+int
+dht_pt_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *key, dict_t *xdata);
int32_t
-gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
- struct iatt *stbuf);
+dht_pt_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
+
+int
+dht_pt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
+
+int32_t
+dht_check_remote_fd_failed_error(dht_local_t *local, int op_ret, int op_errno);
+
+int
+dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata);
+
+int32_t
+dht_create_lock(call_frame_t *frame, xlator_t *subvol);
+
+int
+dht_set_parent_layout_in_dict(loc_t *loc, xlator_t *this, dht_local_t *local);
+
+int
+dht_dir_layout_error_check(xlator_t *this, inode_t *inode);
+
int
-dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
- int flag);
-#endif/* _DHT_H */
+dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol);
+#endif /* _DHT_H */
diff --git a/xlators/cluster/dht/src/dht-diskusage.c b/xlators/cluster/dht/src/dht-diskusage.c
index 52ea3a32aca..c0588828fdb 100644
--- a/xlators/cluster/dht/src/dht-diskusage.c
+++ b/xlators/cluster/dht/src/dht-diskusage.c
@@ -8,290 +8,480 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
/* TODO: add NS locking */
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "defaults.h"
#include <sys/time.h>
-
+#include <glusterfs/events.h>
int
-dht_du_info_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct statvfs *statvfs,
- dict_t *xdata)
+dht_du_info_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct statvfs *statvfs, dict_t *xdata)
{
- dht_conf_t *conf = NULL;
- call_frame_t *prev = NULL;
- int this_call_cnt = 0;
- int i = 0;
- double percent = 0;
- double percent_inodes = 0;
- uint64_t bytes = 0;
-
- conf = this->private;
- prev = cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get disk info from %s", prev->this->name);
- goto out;
- }
-
- if (statvfs && statvfs->f_blocks) {
- percent = (statvfs->f_bavail * 100) / statvfs->f_blocks;
- bytes = (statvfs->f_bavail * statvfs->f_frsize);
- }
-
- if (statvfs && statvfs->f_files) {
- percent_inodes = (statvfs->f_ffree * 100) / statvfs->f_files;
- } else {
- /* set percent inodes to 100 for dynamically allocated inode filesystems
- this logic holds good so that, distribute has nothing to worry about
- total inodes rather let the 'create()' to be scheduled on the hashed
- subvol regardless of the total inodes. since we have no awareness on
- loosing inodes this logic fits well
- */
- percent_inodes = 100;
- }
-
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++)
- if (prev->this == conf->subvolumes[i]) {
- conf->du_stats[i].avail_percent = percent;
- conf->du_stats[i].avail_space = bytes;
- conf->du_stats[i].avail_inodes = percent_inodes;
- gf_log (this->name, GF_LOG_DEBUG,
- "on subvolume '%s': avail_percent is: "
- "%.2f and avail_space is: %"PRIu64" "
- "and avail_inodes is: %.2f",
- prev->this->name,
- conf->du_stats[i].avail_percent,
- conf->du_stats[i].avail_space,
- conf->du_stats[i].avail_inodes);
- }
- }
- UNLOCK (&conf->subvolume_lock);
+ dht_conf_t *conf = NULL;
+ xlator_t *prev = NULL;
+ int this_call_cnt = 0;
+ int i = 0;
+ double percent = 0;
+ double percent_inodes = 0;
+ uint64_t bytes = 0;
+ uint32_t bpc; /* blocks per chunk */
+ uint32_t chunks = 0;
+
+ conf = this->private;
+ prev = cookie;
+
+ if (op_ret == -1 || !statvfs) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_GET_DISK_INFO_ERROR, "failed to get disk info from %s",
+ prev->name);
+ goto out;
+ }
+
+ if (statvfs->f_blocks) {
+ percent = (statvfs->f_bavail * 100) / statvfs->f_blocks;
+ bytes = (statvfs->f_bavail * statvfs->f_frsize);
+ /*
+ * A 32-bit count of 1MB chunks allows a maximum brick size of
+ * ~4PB. It's possible that we could see a single local FS
+ * bigger than that some day, but this code is likely to be
+ * irrelevant by then. Meanwhile, it's more important to keep
+ * the chunk size small so the layout-calculation code that
+ * uses this value can be tested on normal machines.
+ */
+ bpc = (1 << 20) / statvfs->f_bsize;
+ chunks = (statvfs->f_blocks + bpc - 1) / bpc;
+ }
+
+ if (statvfs->f_files) {
+ percent_inodes = (statvfs->f_ffree * 100) / statvfs->f_files;
+ } else {
+ /*
+ * Set percent inodes to 100 for dynamically allocated inode
+ * filesystems. The rationale is that distribute need not
+ * worry about total inodes; rather, let the 'create()' be
+ * scheduled on the hashed subvol regardless of the total
+ * inodes.
+ */
+ percent_inodes = 100;
+ }
+
+ LOCK(&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++)
+ if (prev == conf->subvolumes[i]) {
+ conf->du_stats[i].avail_percent = percent;
+ conf->du_stats[i].avail_space = bytes;
+ conf->du_stats[i].avail_inodes = percent_inodes;
+ conf->du_stats[i].chunks = chunks;
+ conf->du_stats[i].total_blocks = statvfs->f_blocks;
+ conf->du_stats[i].avail_blocks = statvfs->f_bavail;
+ conf->du_stats[i].frsize = statvfs->f_frsize;
+
+ gf_msg_debug(this->name, 0,
+ "subvolume '%s': avail_percent "
+ "is: %.2f and avail_space "
+ "is: %" PRIu64
+ " and avail_inodes"
+ " is: %.2f",
+ prev->name, conf->du_stats[i].avail_percent,
+ conf->du_stats[i].avail_space,
+ conf->du_stats[i].avail_inodes);
+ break; /* no point in looping further */
+ }
+ }
+ UNLOCK(&conf->subvolume_lock);
out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_DESTROY (frame);
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt))
+ DHT_STACK_DESTROY(frame);
- return 0;
+ return 0;
}
int
-dht_get_du_info_for_subvol (xlator_t *this, int subvol_idx)
+dht_get_du_info_for_subvol(xlator_t *this, int subvol_idx)
{
- dht_conf_t *conf = NULL;
- call_frame_t *statfs_frame = NULL;
- dht_local_t *statfs_local = NULL;
- call_pool_t *pool = NULL;
- loc_t tmp_loc = {0,};
-
- conf = this->private;
- pool = this->ctx->pool;
-
- statfs_frame = create_frame (this, pool);
- if (!statfs_frame) {
- goto err;
- }
-
- /* local->fop value is not used in this case */
- statfs_local = dht_local_init (statfs_frame, NULL, NULL,
- GF_FOP_MAXVALUE);
- if (!statfs_local) {
- goto err;
- }
-
- /* make it root gfid, should be enough to get the proper info back */
- tmp_loc.gfid[15] = 1;
-
- statfs_local->call_cnt = 1;
- STACK_WIND (statfs_frame, dht_du_info_cbk,
- conf->subvolumes[subvol_idx],
- conf->subvolumes[subvol_idx]->fops->statfs,
- &tmp_loc, NULL);
-
- return 0;
+ dht_conf_t *conf = NULL;
+ call_frame_t *statfs_frame = NULL;
+ dht_local_t *statfs_local = NULL;
+ call_pool_t *pool = NULL;
+ loc_t tmp_loc = {
+ 0,
+ };
+
+ conf = this->private;
+ pool = this->ctx->pool;
+
+ statfs_frame = create_frame(this, pool);
+ if (!statfs_frame) {
+ goto err;
+ }
+
+ /* local->fop value is not used in this case */
+ statfs_local = dht_local_init(statfs_frame, NULL, NULL, GF_FOP_MAXVALUE);
+ if (!statfs_local) {
+ goto err;
+ }
+
+ /* make it root gfid, should be enough to get the proper info back */
+ tmp_loc.gfid[15] = 1;
+
+ statfs_local->call_cnt = 1;
+ STACK_WIND_COOKIE(
+ statfs_frame, dht_du_info_cbk, conf->subvolumes[subvol_idx],
+ conf->subvolumes[subvol_idx],
+ conf->subvolumes[subvol_idx]->fops->statfs, &tmp_loc, NULL);
+
+ return 0;
err:
- if (statfs_frame)
- DHT_STACK_DESTROY (statfs_frame);
+ if (statfs_frame)
+ DHT_STACK_DESTROY(statfs_frame);
- return -1;
+ return -1;
}
int
-dht_get_du_info (call_frame_t *frame, xlator_t *this, loc_t *loc)
+dht_get_du_info(call_frame_t *frame, xlator_t *this, loc_t *loc)
{
- int i = 0;
- dht_conf_t *conf = NULL;
- call_frame_t *statfs_frame = NULL;
- dht_local_t *statfs_local = NULL;
- struct timeval tv = {0,};
- loc_t tmp_loc = {0,};
-
- conf = this->private;
-
- gettimeofday (&tv, NULL);
-
- /* make it root gfid, should be enough to get the proper
- info back */
- tmp_loc.gfid[15] = 1;
-
- if (tv.tv_sec > (conf->refresh_interval
- + conf->last_stat_fetch.tv_sec)) {
-
- statfs_frame = copy_frame (frame);
- if (!statfs_frame) {
- goto err;
- }
-
- /* In this case, 'local->fop' is not used */
- statfs_local = dht_local_init (statfs_frame, loc, NULL,
- GF_FOP_MAXVALUE);
- if (!statfs_local) {
- goto err;
- }
-
- statfs_local->call_cnt = conf->subvolume_cnt;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (statfs_frame, dht_du_info_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->statfs,
- &tmp_loc, NULL);
- }
-
- conf->last_stat_fetch.tv_sec = tv.tv_sec;
- }
- return 0;
+ int i = 0;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ call_frame_t *statfs_frame = NULL;
+ dht_local_t *statfs_local = NULL;
+ loc_t tmp_loc = {
+ 0,
+ };
+ time_t now;
+
+ conf = this->private;
+ now = gf_time();
+ /* make it root gfid, should be enough to get the proper
+ info back */
+ tmp_loc.gfid[15] = 1;
+
+ if (now > (conf->refresh_interval + conf->last_stat_fetch)) {
+ statfs_frame = copy_frame(frame);
+ if (!statfs_frame) {
+ goto err;
+ }
+
+ /* In this case, 'local->fop' is not used */
+ statfs_local = dht_local_init(statfs_frame, loc, NULL, GF_FOP_MAXVALUE);
+ if (!statfs_local) {
+ goto err;
+ }
+
+ statfs_local->params = dict_new();
+ if (!statfs_local->params)
+ goto err;
+
+ ret = dict_set_int8(statfs_local->params,
+ GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict");
+ goto err;
+ }
+
+ statfs_local->call_cnt = conf->subvolume_cnt;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND_COOKIE(statfs_frame, dht_du_info_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->statfs, &tmp_loc,
+ statfs_local->params);
+ }
+
+ conf->last_stat_fetch = now;
+ }
+ return 0;
err:
- if (statfs_frame)
- DHT_STACK_DESTROY (statfs_frame);
+ if (statfs_frame)
+ DHT_STACK_DESTROY(statfs_frame);
- return -1;
+ return -1;
}
-
gf_boolean_t
-dht_is_subvol_filled (xlator_t *this, xlator_t *subvol)
+dht_is_subvol_filled(xlator_t *this, xlator_t *subvol)
+{
+ int i = 0;
+ char vol_name[256];
+ dht_conf_t *conf = NULL;
+ gf_boolean_t subvol_filled_inodes = _gf_false;
+ gf_boolean_t subvol_filled_space = _gf_false;
+ gf_boolean_t is_subvol_filled = _gf_false;
+ double usage = 0;
+
+ conf = this->private;
+
+ /* Check for values above specified percent or free disk */
+ LOCK(&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ if (conf->disk_unit == 'p') {
+ if (conf->du_stats[i].avail_percent < conf->min_free_disk) {
+ subvol_filled_space = _gf_true;
+ break;
+ }
+
+ } else {
+ if (conf->du_stats[i].avail_space < conf->min_free_disk) {
+ subvol_filled_space = _gf_true;
+ break;
+ }
+ }
+ if (conf->du_stats[i].avail_inodes < conf->min_free_inodes) {
+ subvol_filled_inodes = _gf_true;
+ break;
+ }
+ }
+ }
+ }
+ UNLOCK(&conf->subvolume_lock);
+
+ if (subvol_filled_space && conf->subvolume_status[i]) {
+ if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
+ usage = 100 - conf->du_stats[i].avail_percent;
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SUBVOL_INSUFF_SPACE,
+ "disk space on subvolume '%s' is getting "
+ "full (%.2f %%), consider adding more bricks",
+ subvol->name, usage);
+
+ (void)snprintf(vol_name, sizeof(vol_name), "%s", this->name);
+ vol_name[(strlen(this->name) - 4)] = '\0';
+
+ gf_event(EVENT_DHT_DISK_USAGE, "volume=%s;subvol=%s;usage=%.2f %%",
+ vol_name, subvol->name, usage);
+ }
+ }
+
+ if (subvol_filled_inodes && conf->subvolume_status[i]) {
+ if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
+ usage = 100 - conf->du_stats[i].avail_inodes;
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_SUBVOL_INSUFF_INODES,
+ "inodes on subvolume '%s' are at "
+ "(%.2f %%), consider adding more bricks",
+ subvol->name, usage);
+
+ (void)snprintf(vol_name, sizeof(vol_name), "%s", this->name);
+ vol_name[(strlen(this->name) - 4)] = '\0';
+
+ gf_event(EVENT_DHT_INODES_USAGE,
+ "volume=%s;subvol=%s;usage=%.2f %%", vol_name,
+ subvol->name, usage);
+ }
+ }
+
+ is_subvol_filled = (subvol_filled_space || subvol_filled_inodes);
+
+ return is_subvol_filled;
+}
+
+/*Get the best subvolume to create the file in*/
+xlator_t *
+dht_free_disk_available_subvol(xlator_t *this, xlator_t *subvol,
+ dht_local_t *local)
+{
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ loc_t *loc = NULL;
+
+ conf = this->private;
+ if (!local)
+ goto out;
+ loc = &local->loc;
+ if (!local->layout) {
+ layout = dht_layout_get(this, loc->parent);
+
+ if (!layout) {
+ gf_msg_debug(this->name, 0,
+ "Missing layout. path=%s,"
+ " parent gfid = %s",
+ loc->path, uuid_utoa(loc->parent->gfid));
+ goto out;
+ }
+ } else {
+ layout = dht_layout_ref(this, local->layout);
+ }
+
+ LOCK(&conf->subvolume_lock);
+ {
+ avail_subvol = dht_subvol_with_free_space_inodes(this, subvol, NULL,
+ layout, 0);
+ if (!avail_subvol) {
+ avail_subvol = dht_subvol_maxspace_nonzeroinode(this, subvol,
+ layout);
+ }
+ }
+ UNLOCK(&conf->subvolume_lock);
+out:
+ if (!avail_subvol) {
+ gf_msg_debug(this->name, 0,
+ "No subvolume has enough free space \
+ and/or inodes to create");
+ avail_subvol = subvol;
+ }
+
+ if (layout)
+ dht_layout_unref(this, layout);
+ return avail_subvol;
+}
+
+static inline int32_t
+dht_subvol_has_err(dht_conf_t *conf, xlator_t *this, xlator_t *ignore,
+ dht_layout_t *layout)
+{
+ int ret = -1;
+ int i = 0;
+
+ if (!this || !layout)
+ goto out;
+
+ /* this check is meant for rebalance process. The source of the file
+ * should be ignored for space check */
+ if (this == ignore) {
+ goto out;
+ }
+
+ /* check if subvol has layout errors, before selecting it */
+ for (i = 0; i < layout->cnt; i++) {
+ if (!strcmp(layout->list[i].xlator->name, this->name) &&
+ (layout->list[i].err != 0)) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* discard decommissioned subvol */
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] &&
+ conf->decommissioned_bricks[i] == this) {
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/*Get subvolume which has both space and inodes more than the min criteria*/
+xlator_t *
+dht_subvol_with_free_space_inodes(xlator_t *this, xlator_t *subvol,
+ xlator_t *ignore, dht_layout_t *layout,
+ uint64_t filesize)
{
- int i = 0;
- dht_conf_t *conf = NULL;
- gf_boolean_t subvol_filled_inodes = _gf_false;
- gf_boolean_t subvol_filled_space = _gf_false;
- gf_boolean_t is_subvol_filled = _gf_false;
-
- conf = this->private;
-
- /* Check for values above specified percent or free disk */
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- if (conf->disk_unit == 'p') {
- if (conf->du_stats[i].avail_percent <
- conf->min_free_disk) {
- subvol_filled_space = _gf_true;
- break;
- }
-
- } else {
- if (conf->du_stats[i].avail_space <
- conf->min_free_disk) {
- subvol_filled_space = _gf_true;
- break;
- }
- }
- if (conf->du_stats[i].avail_inodes <
- conf->min_free_inodes) {
- subvol_filled_inodes = _gf_true;
- break;
- }
- }
- }
- }
- UNLOCK (&conf->subvolume_lock);
-
- if (subvol_filled_space && conf->subvolume_status[i]) {
- if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
- gf_log (this->name, GF_LOG_WARNING,
- "disk space on subvolume '%s' is getting "
- "full (%.2f %%), consider adding more nodes",
- subvol->name,
- (100 - conf->du_stats[i].avail_percent));
- }
- }
-
- if (subvol_filled_inodes && conf->subvolume_status[i]) {
- if (!(conf->du_stats[i].log++ % (GF_UNIVERSAL_ANSWER * 10))) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "inodes on subvolume '%s' are at "
- "(%.2f %%), consider adding more nodes",
- subvol->name,
- (100 - conf->du_stats[i].avail_inodes));
- }
- }
-
- is_subvol_filled = (subvol_filled_space || subvol_filled_inodes);
-
- return is_subvol_filled;
+ int i = 0;
+ double max = 0;
+ double max_inodes = 0;
+ int ignore_subvol = 0;
+ uint64_t total_blocks = 0;
+ uint64_t avail_blocks = 0;
+ uint64_t frsize = 0;
+ double post_availspace = 0;
+ double post_percent = 0;
+
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors and also it is not a
+ * decommissioned brick, before selecting it */
+ ignore_subvol = dht_subvol_has_err(conf, conf->subvolumes[i], ignore,
+ layout);
+ if (ignore_subvol)
+ continue;
+
+ if ((conf->disk_unit == 'p') &&
+ (conf->du_stats[i].avail_percent > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_percent > max)) {
+ max = conf->du_stats[i].avail_percent;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ total_blocks = conf->du_stats[i].total_blocks;
+ avail_blocks = conf->du_stats[i].avail_blocks;
+ frsize = conf->du_stats[i].frsize;
+ }
+ }
+
+ if ((conf->disk_unit != 'p') &&
+ (conf->du_stats[i].avail_space > conf->min_free_disk) &&
+ (conf->du_stats[i].avail_inodes > conf->min_free_inodes)) {
+ if ((conf->du_stats[i].avail_inodes > max_inodes) ||
+ (conf->du_stats[i].avail_space > max)) {
+ max = conf->du_stats[i].avail_space;
+ max_inodes = conf->du_stats[i].avail_inodes;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+ }
+
+ if (avail_subvol) {
+ if (conf->disk_unit == 'p') {
+ post_availspace = (avail_blocks * frsize) - filesize;
+ post_percent = (post_availspace * 100) / (total_blocks * frsize);
+ if (post_percent < conf->min_free_disk)
+ avail_subvol = NULL;
+ }
+ if (conf->disk_unit != 'p') {
+ if ((max - filesize) < conf->min_free_disk)
+ avail_subvol = NULL;
+ }
+ }
+
+ return avail_subvol;
}
+/* Get subvol which has at least one inode and maximum space */
xlator_t *
-dht_free_disk_available_subvol (xlator_t *this, xlator_t *subvol)
+dht_subvol_maxspace_nonzeroinode(xlator_t *this, xlator_t *subvol,
+ dht_layout_t *layout)
{
- int i = 0;
- double max = 0;
- double max_inodes = 0;
- xlator_t *avail_subvol = NULL;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
-
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->disk_unit == 'p') {
- if ((conf->du_stats[i].avail_percent > max)
- && (conf->du_stats[i].avail_inodes > max_inodes)) {
- max = conf->du_stats[i].avail_percent;
- max_inodes = conf->du_stats[i].avail_inodes;
- avail_subvol = conf->subvolumes[i];
- }
- } else {
- if ((conf->du_stats[i].avail_space > max)
- && (conf->du_stats[i].avail_inodes > max_inodes)) {
- max = conf->du_stats[i].avail_space;
- max_inodes = conf->du_stats[i].avail_inodes;
- avail_subvol = conf->subvolumes[i];
- }
-
- }
- }
- }
- UNLOCK (&conf->subvolume_lock);
-
- if (!avail_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume has enough free space and inodes to create");
- }
-
- if ((max < conf->min_free_disk) && (max_inodes < conf->min_free_inodes))
- avail_subvol = subvol;
-
- if (!avail_subvol)
- avail_subvol = subvol;
-
- return avail_subvol;
+ int i = 0;
+ double max = 0;
+ int ignore_subvol = 0;
+
+ xlator_t *avail_subvol = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ /* check if subvol has layout errors and also it is not a
+ * decommissioned brick, before selecting it*/
+
+ ignore_subvol = dht_subvol_has_err(conf, conf->subvolumes[i], NULL,
+ layout);
+ if (ignore_subvol)
+ continue;
+
+ if (conf->disk_unit == 'p') {
+ if ((conf->du_stats[i].avail_percent > max) &&
+ (conf->du_stats[i].avail_inodes > 0)) {
+ max = conf->du_stats[i].avail_percent;
+ avail_subvol = conf->subvolumes[i];
+ }
+ } else {
+ if ((conf->du_stats[i].avail_space > max) &&
+ (conf->du_stats[i].avail_inodes > 0)) {
+ max = conf->du_stats[i].avail_space;
+ avail_subvol = conf->subvolumes[i];
+ }
+ }
+ }
+
+ return avail_subvol;
}
diff --git a/xlators/cluster/dht/src/dht-hashfn.c b/xlators/cluster/dht/src/dht-hashfn.c
index 97eb1f05fa8..acda67c312a 100644
--- a/xlators/cluster/dht/src/dht-hashfn.c
+++ b/xlators/cluster/dht/src/dht-hashfn.c
@@ -8,66 +8,103 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "hashfn.h"
+#include <glusterfs/hashfn.h>
-
-int
-dht_hash_compute_internal (int type, const char *name, uint32_t *hash_p)
+static int
+dht_hash_compute_internal(int type, const char *name, const int len,
+ uint32_t *hash_p)
{
- int ret = 0;
- uint32_t hash = 0;
+ int ret = 0;
+ uint32_t hash = 0;
- switch (type) {
+ switch (type) {
case DHT_HASH_TYPE_DM:
case DHT_HASH_TYPE_DM_USER:
- hash = gf_dm_hashfn (name, strlen (name));
- break;
+ hash = gf_dm_hashfn(name, len);
+ break;
default:
- ret = -1;
- break;
- }
+ ret = -1;
+ break;
+ }
- if (ret == 0) {
- *hash_p = hash;
- }
+ if (ret == 0) {
+ *hash_p = hash;
+ }
- return ret;
+ return ret;
}
+/* The function returns:
+ * 0 : in case no munge took place
+ * >0 : the length (inc. terminating NULL!) of the newly modified string,
+ * if it was munged.
+ */
+static int
+dht_munge_name(const char *original, char *modified, size_t len, regex_t *re)
+{
+ regmatch_t matches[2] = {
+ {0},
+ };
+ size_t new_len = 0;
+ int ret = 0;
+
+ ret = regexec(re, original, 2, matches, 0);
-#define MAKE_RSYNC_FRIENDLY_NAME(rsync_frndly_name, name) do { \
- rsync_frndly_name = (char *) name; \
- if (name[0] == '.') { \
- char *dot = 0; \
- int namelen = 0; \
- \
- dot = strrchr (name, '.'); \
- if (dot && dot > (name + 1) && *(dot + 1)) { \
- namelen = (dot - name); \
- rsync_frndly_name = alloca (namelen); \
- strncpy (rsync_frndly_name, name + 1, \
- namelen); \
- rsync_frndly_name[namelen - 1] = 0; \
- } \
- } \
- } while (0);
+ if (ret != REG_NOMATCH) {
+ if (matches[1].rm_so != -1) {
+ new_len = matches[1].rm_eo - matches[1].rm_so;
+ /* Equal would fail due to the NUL at the end. */
+ if (new_len < len) {
+ memcpy(modified, original + matches[1].rm_so, new_len);
+ modified[new_len] = '\0';
+ return new_len + 1; /* +1 for the terminating NULL */
+ }
+ }
+ }
+ /* This is guaranteed safe because of how the dest was allocated. */
+ strcpy(modified, original);
+ return 0;
+}
int
-dht_hash_compute (int type, const char *name, uint32_t *hash_p)
+dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p)
{
- char *rsync_friendly_name = NULL;
+ char *rsync_friendly_name = NULL;
+ dht_conf_t *priv = NULL;
+ size_t len = 0;
+ int munged = 0;
+
+ priv = this->private;
+
+ if (name == NULL)
+ return -1;
- MAKE_RSYNC_FRIENDLY_NAME (rsync_friendly_name, name);
+ len = strlen(name) + 1;
+ rsync_friendly_name = alloca(len);
+
+ LOCK(&priv->lock);
+ {
+ if (priv->extra_regex_valid) {
+ munged = dht_munge_name(name, rsync_friendly_name, len,
+ &priv->extra_regex);
+ }
+
+ if (!munged && priv->rsync_regex_valid) {
+ gf_msg_trace(this->name, 0, "trying regex for %s", name);
+ munged = dht_munge_name(name, rsync_friendly_name, len,
+ &priv->rsync_regex);
+ }
+ }
+ UNLOCK(&priv->lock);
+ if (munged) {
+ gf_msg_debug(this->name, 0, "munged down to %s", rsync_friendly_name);
+ len = munged;
+ } else {
+ rsync_friendly_name = (char *)name;
+ }
- return dht_hash_compute_internal (type, rsync_friendly_name, hash_p);
+ return dht_hash_compute_internal(type, rsync_friendly_name, len - 1,
+ hash_p);
}
diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c
index 611de19e48e..3f2fe43d5f3 100644
--- a/xlators/cluster/dht/src/dht-helper.c
+++ b/xlators/cluster/dht/src/dht-helper.c
@@ -8,904 +8,2297 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include "dht-common.h"
+#include "dht-lock.h"
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
+static void
+dht_free_fd_ctx(dht_fd_ctx_t *fd_ctx)
+{
+ GF_FREE(fd_ctx);
+}
-#include "glusterfs.h"
-#include "xlator.h"
-#include "dht-common.h"
+int32_t
+dht_fd_ctx_destroy(xlator_t *this, fd_t *fd)
+{
+ dht_fd_ctx_t *fd_ctx = NULL;
+ uint64_t value = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ ret = fd_ctx_del(fd, this, &value);
+ if (ret) {
+ goto out;
+ }
+
+ fd_ctx = (dht_fd_ctx_t *)(uintptr_t)value;
+ if (fd_ctx) {
+ GF_REF_PUT(fd_ctx);
+ }
+out:
+ return ret;
+}
+
+static int
+__dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst)
+{
+ dht_fd_ctx_t *fd_ctx = NULL;
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ fd_ctx = GF_CALLOC(1, sizeof(*fd_ctx), gf_dht_mt_fd_ctx_t);
+ if (!fd_ctx) {
+ goto out;
+ }
+
+ fd_ctx->opened_on_dst = (uint64_t)(uintptr_t)dst;
+ GF_REF_INIT(fd_ctx, dht_free_fd_ctx);
+
+ value = (uint64_t)(uintptr_t)fd_ctx;
+
+ ret = __fd_ctx_set(fd, this, value);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FD_CTX_SET_FAILED,
+ "fd=0x%p", fd, NULL);
+ GF_REF_PUT(fd_ctx);
+ }
+out:
+ return ret;
+}
int
-dht_frame_return (call_frame_t *frame)
+dht_fd_ctx_set(xlator_t *this, fd_t *fd, xlator_t *dst)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
+ dht_fd_ctx_t *fd_ctx = NULL;
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ LOCK(&fd->lock);
+ {
+ ret = __fd_ctx_get(fd, this, &value);
+ if (ret && value) {
+ fd_ctx = (dht_fd_ctx_t *)(uintptr_t)value;
+ if (fd_ctx->opened_on_dst == (uint64_t)(uintptr_t)dst) {
+ /* This could happen due to racing
+ * check_progress tasks*/
+ goto unlock;
+ } else {
+ /* This would be a big problem*/
+ /* Overwrite and hope for the best*/
+ fd_ctx->opened_on_dst = (uint64_t)(uintptr_t)dst;
+ UNLOCK(&fd->lock);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_VALUE,
+ NULL);
- if (!frame)
- return -1;
+ goto out;
+ }
+ }
+ ret = __dht_fd_ctx_set(this, fd, dst);
+ }
+unlock:
+ UNLOCK(&fd->lock);
+out:
+ return ret;
+}
- local = frame->local;
+static dht_fd_ctx_t *
+dht_fd_ctx_get(xlator_t *this, fd_t *fd)
+{
+ dht_fd_ctx_t *fd_ctx = NULL;
+ int ret = -1;
+ uint64_t tmp_val = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ LOCK(&fd->lock);
+ {
+ ret = __fd_ctx_get(fd, this, &tmp_val);
+ if ((ret < 0) || (tmp_val == 0)) {
+ goto unlock;
+ }
- LOCK (&frame->lock);
- {
- this_call_cnt = --local->call_cnt;
+ fd_ctx = (dht_fd_ctx_t *)(uintptr_t)tmp_val;
+ GF_REF_GET(fd_ctx);
+ }
+unlock:
+ UNLOCK(&fd->lock);
+
+out:
+ return fd_ctx;
+}
+
+gf_boolean_t
+dht_fd_open_on_dst(xlator_t *this, fd_t *fd, xlator_t *dst)
+{
+ dht_fd_ctx_t *fd_ctx = NULL;
+ gf_boolean_t opened = _gf_false;
+
+ fd_ctx = dht_fd_ctx_get(this, fd);
+
+ if (fd_ctx) {
+ if (fd_ctx->opened_on_dst == (uint64_t)(uintptr_t)dst) {
+ opened = _gf_true;
}
- UNLOCK (&frame->lock);
+ GF_REF_PUT(fd_ctx);
+ }
- return this_call_cnt;
+ return opened;
}
+void
+dht_free_mig_info(void *data)
+{
+ dht_migrate_info_t *miginfo = NULL;
-int
-dht_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
+ miginfo = data;
+ GF_FREE(miginfo);
+
+ return;
+}
+
+static int
+dht_inode_ctx_set_mig_info(xlator_t *this, inode_t *inode, xlator_t *src_subvol,
+ xlator_t *dst_subvol)
{
- dht_conf_t *conf = NULL;
- int cnt = 0;
- int max = 0;
- uint64_t y = 0;
+ dht_migrate_info_t *miginfo = NULL;
+ uint64_t value = 0;
+ int ret = -1;
- if (x == ((uint64_t) -1)) {
- y = (uint64_t) -1;
- goto out;
+ miginfo = GF_CALLOC(1, sizeof(*miginfo), gf_dht_mt_miginfo_t);
+ if (miginfo == NULL)
+ goto out;
+
+ miginfo->src_subvol = src_subvol;
+ miginfo->dst_subvol = dst_subvol;
+ GF_REF_INIT(miginfo, dht_free_mig_info);
+
+ value = (uint64_t)(uintptr_t)miginfo;
+
+ ret = inode_ctx_set1(inode, this, &value);
+ if (ret < 0) {
+ GF_REF_PUT(miginfo);
+ }
+
+out:
+ return ret;
+}
+
+int
+dht_inode_ctx_get_mig_info(xlator_t *this, inode_t *inode,
+ xlator_t **src_subvol, xlator_t **dst_subvol)
+{
+ int ret = -1;
+ uint64_t tmp_miginfo = 0;
+ dht_migrate_info_t *miginfo = NULL;
+
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get1(inode, this, &tmp_miginfo);
+ if ((ret < 0) || (tmp_miginfo == 0)) {
+ UNLOCK(&inode->lock);
+ goto out;
}
- conf = this->private;
- if (!conf)
- goto out;
+ miginfo = (dht_migrate_info_t *)(uintptr_t)tmp_miginfo;
+ GF_REF_GET(miginfo);
+ }
+ UNLOCK(&inode->lock);
- max = conf->subvolume_cnt;
- cnt = dht_subvol_cnt (this, subvol);
+ if (src_subvol)
+ *src_subvol = miginfo->src_subvol;
- y = ((x * max) + cnt);
+ if (dst_subvol)
+ *dst_subvol = miginfo->dst_subvol;
+
+ GF_REF_PUT(miginfo);
out:
- if (y_p)
- *y_p = y;
+ return ret;
+}
- return 0;
+gf_boolean_t
+dht_mig_info_is_invalid(xlator_t *current, xlator_t *src_subvol,
+ xlator_t *dst_subvol)
+{
+ /* Not set
+ */
+ if (!src_subvol || !dst_subvol)
+ return _gf_true;
+
+ /* Invalid scenarios:
+ * The src_subvol does not match the subvol on which the current op was sent
+ * so the cached subvol has changed between the last mig_info_set and now.
+ * src_subvol == dst_subvol. The file was migrated without any FOP detecting
+ * a P2 so the old dst is now the current subvol.
+ *
+ * There is still one scenario where the info could be outdated - if
+ * file has undergone multiple migrations and ends up on the same src_subvol
+ * on which the mig_info was first set.
+ */
+ if ((current == dst_subvol) || (current != src_subvol))
+ return _gf_true;
+
+ return _gf_false;
}
+/* Used to check if fd fops have the fd opened on the cached subvol
+ * This is required when:
+ * 1. an fd is opened on FILE1 on subvol1
+ * 2. the file is migrated to subvol2
+ * 3. a lookup updates the cached subvol in the inode_ctx to subvol2
+ * 4. a write comes on the fd
+ * The write is sent to subvol2 on an fd which has been opened only on fd1
+ * Since the migration phase checks don't kick in, the fop fails with EBADF
+ *
+ */
+
int
-dht_filter_loc_subvol_key (xlator_t *this, loc_t *loc, loc_t *new_loc,
- xlator_t **subvol)
+dht_check_and_open_fd_on_subvol_complete(int ret, call_frame_t *frame,
+ void *data)
{
- char *new_name = NULL;
- char *new_path = NULL;
- xlator_list_t *trav = NULL;
- char key[1024] = {0,};
- int ret = 0; /* not found */
+ glusterfs_fop_t fop = 0;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *this = NULL;
+ fd_t *fd = NULL;
+ int op_errno = -1;
+
+ local = frame->local;
+ this = frame->this;
+ fop = local->fop;
+ subvol = local->cached_subvol;
+ fd = local->fd;
+
+ if (ret) {
+ op_errno = local->op_errno;
+ goto handle_err;
+ }
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ STACK_WIND_COOKIE(frame, dht_writev_cbk, subvol, subvol,
+ subvol->fops->writev, fd, local->rebalance.vector,
+ local->rebalance.count, local->rebalance.offset,
+ local->rebalance.flags, local->rebalance.iobref,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FLUSH:
+ STACK_WIND(frame, dht_flush_cbk, subvol, subvol->fops->flush, fd,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FSETATTR:
+ STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol,
+ subvol->fops->fsetattr, fd,
+ &local->rebalance.stbuf, local->rebalance.flags,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_ZEROFILL:
+ STACK_WIND_COOKIE(frame, dht_zerofill_cbk, subvol, subvol,
+ subvol->fops->zerofill, fd,
+ local->rebalance.offset, local->rebalance.size,
+ local->xattr_req);
+
+ break;
+
+ case GF_FOP_DISCARD:
+ STACK_WIND_COOKIE(frame, dht_discard_cbk, subvol, subvol,
+ subvol->fops->discard, local->fd,
+ local->rebalance.offset, local->rebalance.size,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FALLOCATE:
+ STACK_WIND_COOKIE(frame, dht_fallocate_cbk, subvol, subvol,
+ subvol->fops->fallocate, fd,
+ local->rebalance.flags, local->rebalance.offset,
+ local->rebalance.size, local->xattr_req);
+ break;
+
+ case GF_FOP_FTRUNCATE:
+ STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol,
+ subvol->fops->ftruncate, fd,
+ local->rebalance.offset, local->xattr_req);
+ break;
+
+ case GF_FOP_FSYNC:
+ STACK_WIND_COOKIE(frame, dht_fsync_cbk, subvol, subvol,
+ subvol->fops->fsync, local->fd,
+ local->rebalance.flags, local->xattr_req);
+ break;
+
+ case GF_FOP_READ:
+ STACK_WIND(frame, dht_readv_cbk, subvol, subvol->fops->readv,
+ local->fd, local->rebalance.size,
+ local->rebalance.offset, local->rebalance.flags,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FSTAT:
+ STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol,
+ subvol->fops->fstat, fd, local->xattr_req);
+ break;
+
+ case GF_FOP_FSETXATTR:
+ STACK_WIND_COOKIE(frame, dht_file_setxattr_cbk, subvol, subvol,
+ subvol->fops->fsetxattr, local->fd,
+ local->rebalance.xattr, local->rebalance.flags,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FREMOVEXATTR:
+ STACK_WIND_COOKIE(frame, dht_file_removexattr_cbk, subvol, subvol,
+ subvol->fops->fremovexattr, local->fd, local->key,
+ local->xattr_req);
+
+ break;
+
+ case GF_FOP_FXATTROP:
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol,
+ subvol->fops->fxattrop, local->fd,
+ local->rebalance.flags, local->rebalance.xattr,
+ local->xattr_req);
+ break;
+
+ case GF_FOP_FGETXATTR:
+ STACK_WIND(frame, dht_getxattr_cbk, subvol, subvol->fops->fgetxattr,
+ local->fd, local->key, NULL);
+ break;
+
+ case GF_FOP_FINODELK:
+ STACK_WIND(frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
+ local->key, local->fd, local->rebalance.lock_cmd,
+ &local->rebalance.flock, local->xattr_req);
+ break;
+ default:
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, "fd=%p",
+ fd, "gfid=%s", uuid_utoa(fd->inode->gfid), "name=%s",
+ subvol->name, NULL);
+ break;
+ }
+
+ goto out;
+
+ /* Could not open the fd on the dst. Unwind */
+
+handle_err:
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ DHT_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_FLUSH:
+ DHT_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
+ break;
+
+ case GF_FOP_FSETATTR:
+ DHT_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_ZEROFILL:
+ DHT_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_DISCARD:
+ DHT_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_FALLOCATE:
+ DHT_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_FTRUNCATE:
+ DHT_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_FSYNC:
+ DHT_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_READ:
+ DHT_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL,
+ NULL);
+ break;
+
+ case GF_FOP_FSTAT:
+ DHT_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL);
+ break;
+
+ case GF_FOP_FSETXATTR:
+ DHT_STACK_UNWIND(fsetxattr, frame, -1, op_errno, NULL);
+ break;
+
+ case GF_FOP_FREMOVEXATTR:
+ DHT_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL);
+ break;
+
+ case GF_FOP_FXATTROP:
+ DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
+ break;
+
+ case GF_FOP_FGETXATTR:
+ DHT_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL);
+ break;
+
+ case GF_FOP_FINODELK:
+ DHT_STACK_UNWIND(finodelk, frame, -1, op_errno, NULL);
+ break;
+
+ default:
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, "fd=%p",
+ fd, "gfid=%s", uuid_utoa(fd->inode->gfid), "name=%s",
+ subvol->name, NULL);
+ break;
+ }
- /* Why do other tasks if first required 'char' itself is not there */
- if (!new_loc || !loc || !loc->name || !strchr (loc->name, '@'))
- goto out;
+out:
- trav = this->children;
- while (trav) {
- snprintf (key, 1024, "*@%s:%s", this->name, trav->xlator->name);
- if (fnmatch (key, loc->name, FNM_NOESCAPE) == 0) {
- new_name = GF_CALLOC(strlen (loc->name),
- sizeof (char),
- gf_common_mt_char);
- if (!new_name)
- goto out;
- if (fnmatch (key, loc->path, FNM_NOESCAPE) == 0) {
- new_path = GF_CALLOC(strlen (loc->path),
- sizeof (char),
- gf_common_mt_char);
- if (!new_path)
- goto out;
- strncpy (new_path, loc->path, (strlen (loc->path) -
- strlen (key) + 1));
- }
- strncpy (new_name, loc->name, (strlen (loc->name) -
- strlen (key) + 1));
-
- if (new_loc) {
- new_loc->path = ((new_path) ? new_path:
- gf_strdup (loc->path));
- new_loc->name = new_name;
- new_loc->inode = inode_ref (loc->inode);
- new_loc->parent = inode_ref (loc->parent);
- }
- *subvol = trav->xlator;
- ret = 1; /* success */
- goto out;
- }
- trav = trav->next;
+ return 0;
+}
+
+/* Check once again if the fd has been opened on the cached subvol.
+ * If not, open and update the fd_ctx.
+ */
+
+int
+dht_check_and_open_fd_on_subvol_task(void *data)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ fd_t *fd = NULL;
+ xlator_t *this = NULL;
+ xlator_t *subvol = NULL;
+
+ frame = data;
+ local = frame->local;
+ this = THIS;
+ fd = local->fd;
+ subvol = local->cached_subvol;
+
+ local->fd_checked = _gf_true;
+
+ if (fd_is_anonymous(fd) || dht_fd_open_on_dst(this, fd, subvol)) {
+ ret = 0;
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0, "Opening fd (%p, flags=0%o) on file %s @ %s",
+ fd, fd->flags, uuid_utoa(fd->inode->gfid), subvol->name);
+
+ loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(loc.gfid, fd->inode->gfid);
+
+ /* Open this on the dst subvol */
+
+ SYNCTASK_SETID(0, 0);
+
+ ret = syncop_open(subvol, &loc, (fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)),
+ fd, NULL, NULL);
+
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_OPEN_FD_ON_DST_FAILED,
+ "fd=%p", fd, "flags=0%o", fd->flags, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), "name=%s", subvol->name, NULL);
+ /* This can happen if the cached subvol was updated in the
+ * inode_ctx and the fd was opened on the new cached suvol
+ * after this fop was wound on the old cached subvol.
+ * As we do not close the fd on the old subvol (a leak)
+ * don't treat ENOENT as an error and allow the phase1/phase2
+ * checks to handle it.
+ */
+
+ if ((-ret != ENOENT) && (-ret != ESTALE)) {
+ local->op_errno = -ret;
+ ret = -1;
+ } else {
+ ret = 0;
}
+
+ local->op_errno = -ret;
+ ret = -1;
+
+ } else {
+ dht_fd_ctx_set(this, fd, subvol);
+ }
+
+ SYNCTASK_SETID(frame->root->uid, frame->root->gid);
out:
- if (!ret) {
- /* !success */
- if (new_path)
- GF_FREE (new_path);
- if (new_name)
- GF_FREE (new_name);
- }
- return ret;
+ loc_wipe(&loc);
+
+ return ret;
+}
+
+int
+dht_check_and_open_fd_on_subvol(xlator_t *this, call_frame_t *frame)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+ /*
+ if (dht_fd_open_on_dst (this, fd, subvol))
+ goto out;
+ */
+ local = frame->local;
+
+ ret = synctask_new(this->ctx->env, dht_check_and_open_fd_on_subvol_task,
+ dht_check_and_open_fd_on_subvol_complete, frame, frame);
+
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SYNCTASK_CREATE_FAILED,
+ "to-check-and-open fd=%p", local->fd, NULL);
+ }
+
+ return ret;
}
int
-dht_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
- uint64_t *x_p)
+dht_frame_return(call_frame_t *frame)
{
- dht_conf_t *conf = NULL;
- int cnt = 0;
- int max = 0;
- uint64_t x = 0;
- xlator_t *subvol = 0;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
- if (!this->private)
+ if (!frame)
+ return -1;
+
+ local = frame->local;
+
+ LOCK(&frame->lock);
+ {
+ this_call_cnt = --local->call_cnt;
+ }
+ UNLOCK(&frame->lock);
+
+ return this_call_cnt;
+}
+
+/*
+ * Use this function to specify which subvol you want the file created
+ * on - this need not be the hashed subvol.
+ * Format: <filename>@<this->name>:<subvol-name>
+ * Eg: file-1@vol1-dht:vol1-client-0
+ * where vol1 is a pure distribute volume
+ * will create file-1 on vol1-client-0
+ */
+
+int
+dht_filter_loc_subvol_key(xlator_t *this, loc_t *loc, loc_t *new_loc,
+ xlator_t **subvol)
+{
+ char *new_name = NULL;
+ char *new_path = NULL;
+ xlator_list_t *trav = NULL;
+ char key[1024] = {
+ 0,
+ };
+ int ret = 0; /* not found */
+ int keylen = 0;
+ int name_len = 0;
+ int path_len = 0;
+
+ /* Why do other tasks if first required 'char' itself is not there */
+ if (!new_loc || !loc || !loc->name || !strchr(loc->name, '@')) {
+ /* Skip the GF_FREE checks here */
+ return ret;
+ }
+
+ trav = this->children;
+ while (trav) {
+ keylen = snprintf(key, sizeof(key), "*@%s:%s", this->name,
+ trav->xlator->name);
+ /* Ignore '*' */
+ keylen = keylen - 1;
+ if (fnmatch(key, loc->name, FNM_NOESCAPE) == 0) {
+ name_len = strlen(loc->name) - keylen;
+ new_name = GF_MALLOC(name_len + 1, gf_common_mt_char);
+ if (!new_name)
goto out;
+ if (fnmatch(key, loc->path, FNM_NOESCAPE) == 0) {
+ path_len = strlen(loc->path) - keylen;
+ new_path = GF_MALLOC(path_len + 1, gf_common_mt_char);
+ if (!new_path)
+ goto out;
+ snprintf(new_path, path_len + 1, "%s", loc->path);
+ }
+ snprintf(new_name, name_len + 1, "%s", loc->name);
+
+ if (new_loc) {
+ new_loc->path = ((new_path) ? new_path : gf_strdup(loc->path));
+ new_loc->name = new_name;
+ new_loc->inode = inode_ref(loc->inode);
+ new_loc->parent = inode_ref(loc->parent);
+ }
+ *subvol = trav->xlator;
+ ret = 1; /* success */
+ goto out;
+ }
+ trav = trav->next;
+ }
+out:
+ if (!ret) {
+ /* !success */
+ GF_FREE(new_path);
+ GF_FREE(new_name);
+ }
+ return ret;
+}
- conf = this->private;
- max = conf->subvolume_cnt;
+static xlator_t *
+dht_get_subvol_from_id(xlator_t *this, int client_id)
+{
+ xlator_t *xl = NULL;
+ dht_conf_t *conf = NULL;
+ char *sid = NULL;
+ int32_t ret = -1;
- cnt = y % max;
- x = y / max;
+ conf = this->private;
- subvol = conf->subvolumes[cnt];
+ ret = gf_asprintf(&sid, "%d", client_id);
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_ASPRINTF_FAILED, NULL);
+ goto out;
+ }
- if (subvol_p)
- *subvol_p = subvol;
+ if (dict_get_ptr(conf->leaf_to_subvol, sid, (void **)&xl))
+ xl = NULL;
- if (x_p)
- *x_p = x;
+ GF_FREE(sid);
out:
- return 0;
+ return xl;
}
+int
+dht_deitransform(xlator_t *this, uint64_t y, xlator_t **subvol_p)
+{
+ int client_id = 0;
+ xlator_t *subvol = 0;
+ dht_conf_t *conf = NULL;
+
+ if (!this->private)
+ return -1;
+
+ conf = this->private;
+
+ client_id = gf_deitransform(this, y);
+
+ subvol = dht_get_subvol_from_id(this, client_id);
+
+ if (!subvol)
+ subvol = conf->subvolumes[0];
+
+ if (subvol_p)
+ *subvol_p = subvol;
+
+ return 0;
+}
void
-dht_local_wipe (xlator_t *this, dht_local_t *local)
+dht_local_wipe(xlator_t *this, dht_local_t *local)
{
- if (!local)
- return;
+ int i = 0;
- loc_wipe (&local->loc);
- loc_wipe (&local->loc2);
+ if (!local)
+ return;
- if (local->xattr)
- dict_unref (local->xattr);
+ loc_wipe(&local->loc);
+ loc_wipe(&local->loc2);
+ loc_wipe(&local->loc2_copy);
- if (local->inode)
- inode_unref (local->inode);
+ if (local->xattr)
+ dict_unref(local->xattr);
- if (local->layout) {
- dht_layout_unref (this, local->layout);
- local->layout = NULL;
- }
+ if (local->inode)
+ inode_unref(local->inode);
- loc_wipe (&local->linkfile.loc);
+ if (local->layout) {
+ dht_layout_unref(this, local->layout);
+ local->layout = NULL;
+ }
- if (local->linkfile.xattr)
- dict_unref (local->linkfile.xattr);
+ loc_wipe(&local->linkfile.loc);
- if (local->linkfile.inode)
- inode_unref (local->linkfile.inode);
+ if (local->linkfile.xattr)
+ dict_unref(local->linkfile.xattr);
- if (local->fd) {
- fd_unref (local->fd);
- local->fd = NULL;
- }
+ if (local->linkfile.inode)
+ inode_unref(local->linkfile.inode);
- if (local->params) {
- dict_unref (local->params);
- local->params = NULL;
- }
+ if (local->fd) {
+ fd_unref(local->fd);
+ local->fd = NULL;
+ }
- if (local->xattr_req)
- dict_unref (local->xattr_req);
+ if (local->params) {
+ dict_unref(local->params);
+ local->params = NULL;
+ }
- if (local->selfheal.layout) {
- dht_layout_unref (this, local->selfheal.layout);
- local->selfheal.layout = NULL;
- }
+ if (local->xattr_req)
+ dict_unref(local->xattr_req);
+ if (local->mds_xattr)
+ dict_unref(local->mds_xattr);
+ if (local->xdata)
+ dict_unref(local->xdata);
- if (local->newpath) {
- GF_FREE (local->newpath);
- }
+ if (local->selfheal.layout) {
+ dht_layout_unref(this, local->selfheal.layout);
+ local->selfheal.layout = NULL;
+ }
- if (local->key) {
- GF_FREE (local->key);
- }
+ if (local->selfheal.refreshed_layout) {
+ dht_layout_unref(this, local->selfheal.refreshed_layout);
+ local->selfheal.refreshed_layout = NULL;
+ }
- if (local->rebalance.vector)
- GF_FREE (local->rebalance.vector);
+ for (i = 0; i < 2; i++) {
+ dht_lock_array_free(local->lock[i].ns.parent_layout.locks,
+ local->lock[i].ns.parent_layout.lk_count);
- if (local->rebalance.iobref)
- iobref_unref (local->rebalance.iobref);
+ GF_FREE(local->lock[i].ns.parent_layout.locks);
- mem_put (local);
-}
+ dht_lock_array_free(local->lock[i].ns.directory_ns.locks,
+ local->lock[i].ns.directory_ns.lk_count);
+ GF_FREE(local->lock[i].ns.directory_ns.locks);
+ }
+ GF_FREE(local->key);
-dht_local_t *
-dht_local_init (call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)
-{
- dht_local_t *local = NULL;
- inode_t *inode = NULL;
- int ret = 0;
-
- local = mem_get0 (THIS->local_pool);
- if (!local)
- goto out;
+ if (local->rebalance.xdata)
+ dict_unref(local->rebalance.xdata);
- if (loc) {
- ret = loc_copy (&local->loc, loc);
- if (ret)
- goto out;
+ if (local->rebalance.xattr)
+ dict_unref(local->rebalance.xattr);
- inode = loc->inode;
- }
+ if (local->rebalance.dict)
+ dict_unref(local->rebalance.dict);
- if (fd) {
- local->fd = fd_ref (fd);
- if (!inode)
- inode = fd->inode;
- }
+ GF_FREE(local->rebalance.vector);
- local->op_ret = -1;
- local->op_errno = EUCLEAN;
- local->fop = fop;
+ if (local->rebalance.iobref)
+ iobref_unref(local->rebalance.iobref);
- if (inode) {
- local->layout = dht_layout_get (frame->this, inode);
- local->cached_subvol = dht_subvol_get_cached (frame->this,
- inode);
- }
+ if (local->stub) {
+ call_stub_destroy(local->stub);
+ local->stub = NULL;
+ }
- frame->local = local;
+ if (local->ret_cache)
+ GF_FREE(local->ret_cache);
-out:
- if (ret) {
- if (local)
- mem_put (local);
- local = NULL;
- }
- return local;
+ mem_put(local);
}
-
-char *
-basestr (const char *str)
+dht_local_t *
+dht_local_init(call_frame_t *frame, loc_t *loc, fd_t *fd, glusterfs_fop_t fop)
{
- char *basestr = NULL;
+ dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ int ret = 0;
- basestr = strrchr (str, '/');
- if (basestr)
- basestr ++;
+ local = mem_get0(THIS->local_pool);
+ if (!local)
+ goto out;
- return basestr;
-}
+ if (loc) {
+ ret = loc_copy(&local->loc, loc);
+ if (ret)
+ goto out;
+
+ inode = loc->inode;
+ }
+
+ if (fd) {
+ local->fd = fd_ref(fd);
+ if (!inode)
+ inode = fd->inode;
+ }
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+ local->fop = fop;
+
+ if (inode) {
+ local->layout = dht_layout_get(frame->this, inode);
+ local->cached_subvol = dht_subvol_get_cached(frame->this, inode);
+ }
+
+ frame->local = local;
+
+out:
+ if (ret) {
+ if (local)
+ mem_put(local);
+ local = NULL;
+ }
+ return local;
+}
xlator_t *
-dht_first_up_subvol (xlator_t *this)
+dht_first_up_subvol(xlator_t *this)
{
- dht_conf_t *conf = NULL;
- xlator_t *child = NULL;
- int i = 0;
- time_t time = 0;
+ dht_conf_t *conf = NULL;
+ xlator_t *child = NULL;
+ int i = 0;
+ time_t time = 0;
- conf = this->private;
- if (!conf)
- goto out;
+ conf = this->private;
+ if (!conf)
+ goto out;
- LOCK (&conf->subvolume_lock);
- {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvol_up_time[i]) {
- if (!time) {
- time = conf->subvol_up_time[i];
- child = conf->subvolumes[i];
- } else if (time > conf->subvol_up_time[i]) {
- time = conf->subvol_up_time[i];
- child = conf->subvolumes[i];
- }
- }
+ LOCK(&conf->subvolume_lock);
+ {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvol_up_time[i]) {
+ if (!time) {
+ time = conf->subvol_up_time[i];
+ child = conf->subvolumes[i];
+ } else if (time > conf->subvol_up_time[i]) {
+ time = conf->subvol_up_time[i];
+ child = conf->subvolumes[i];
}
+ }
}
- UNLOCK (&conf->subvolume_lock);
+ }
+ UNLOCK(&conf->subvolume_lock);
out:
- return child;
+ return child;
}
xlator_t *
-dht_last_up_subvol (xlator_t *this)
+dht_last_up_subvol(xlator_t *this)
{
- dht_conf_t *conf = NULL;
- xlator_t *child = NULL;
- int i = 0;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- LOCK (&conf->subvolume_lock);
- {
- for (i = conf->subvolume_cnt-1; i >= 0; i--) {
- if (conf->subvolume_status[i]) {
- child = conf->subvolumes[i];
- break;
- }
- }
+ dht_conf_t *conf = NULL;
+ xlator_t *child = NULL;
+ int i = 0;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ LOCK(&conf->subvolume_lock);
+ {
+ for (i = conf->subvolume_cnt - 1; i >= 0; i--) {
+ if (conf->subvolume_status[i]) {
+ child = conf->subvolumes[i];
+ break;
+ }
}
- UNLOCK (&conf->subvolume_lock);
+ }
+ UNLOCK(&conf->subvolume_lock);
out:
- return child;
+ return child;
}
xlator_t *
-dht_subvol_get_hashed (xlator_t *this, loc_t *loc)
+dht_subvol_get_hashed(xlator_t *this, loc_t *loc)
{
- dht_layout_t *layout = NULL;
- xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ xlator_t *subvol = NULL;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO (this->name, loc, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
- if (__is_root_gfid (loc->gfid)) {
- subvol = dht_first_up_subvol (this);
- goto out;
- }
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
- GF_VALIDATE_OR_GOTO (this->name, loc->parent, out);
- GF_VALIDATE_OR_GOTO (this->name, loc->name, out);
+ methods = &(conf->methods);
- layout = dht_layout_get (this, loc->parent);
+ if (__is_root_gfid(loc->gfid)) {
+ subvol = dht_first_up_subvol(this);
+ goto out;
+ }
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout missing path=%s parent=%s",
- loc->path, uuid_utoa (loc->parent->gfid));
- goto out;
- }
+ GF_VALIDATE_OR_GOTO(this->name, loc->parent, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->name, out);
- subvol = dht_layout_search (this, layout, loc->name);
+ layout = dht_layout_get(this, loc->parent);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not find subvolume for path=%s",
- loc->path);
- goto out;
- }
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "Missing layout. path=%s, parent gfid =%s",
+ loc->path, uuid_utoa(loc->parent->gfid));
+ goto out;
+ }
+
+ subvol = methods->layout_search(this, layout, loc->name);
+
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "No hashed subvolume for path=%s",
+ loc->path);
+ goto out;
+ }
out:
- if (layout) {
- dht_layout_unref (this, layout);
- }
+ if (layout) {
+ dht_layout_unref(this, layout);
+ }
- return subvol;
+ return subvol;
}
-
xlator_t *
-dht_subvol_get_cached (xlator_t *this, inode_t *inode)
+dht_subvol_get_cached(xlator_t *this, inode_t *inode)
{
- dht_layout_t *layout = NULL;
- xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ xlator_t *subvol = NULL;
- GF_VALIDATE_OR_GOTO (this->name, this, out);
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- layout = dht_layout_get (this, inode);
+ layout = dht_layout_get(this, inode);
- if (!layout) {
- goto out;
- }
+ if (!layout) {
+ goto out;
+ }
- subvol = layout->list[0].xlator;
+ subvol = layout->list[0].xlator;
out:
- if (layout) {
- dht_layout_unref (this, layout);
- }
+ if (layout) {
+ dht_layout_unref(this, layout);
+ }
- return subvol;
+ return subvol;
}
-
xlator_t *
-dht_subvol_next (xlator_t *this, xlator_t *prev)
+dht_subvol_next(xlator_t *this, xlator_t *prev)
{
- dht_conf_t *conf = NULL;
- int i = 0;
- xlator_t *next = NULL;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == prev) {
- if ((i + 1) < conf->subvolume_cnt)
- next = conf->subvolumes[i + 1];
- break;
- }
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *next = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev) {
+ if ((i + 1) < conf->subvolume_cnt)
+ next = conf->subvolumes[i + 1];
+ break;
}
+ }
out:
- return next;
+ return next;
}
+/* This func wraps around, if prev is actually the last subvol.
+ */
+xlator_t *
+dht_subvol_next_available(xlator_t *this, xlator_t *prev)
+{
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *next = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == prev) {
+ /* if prev is last in conf->subvolumes, then wrap
+ * around.
+ */
+ if ((i + 1) < conf->subvolume_cnt) {
+ next = conf->subvolumes[i + 1];
+ } else {
+ next = conf->subvolumes[0];
+ }
+ break;
+ }
+ }
+out:
+ return next;
+}
int
-dht_subvol_cnt (xlator_t *this, xlator_t *subvol)
+dht_subvol_cnt(xlator_t *this, xlator_t *subvol)
{
- int i = 0;
- int ret = -1;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (subvol == conf->subvolumes[i]) {
- ret = i;
- break;
- }
+ int i = 0;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (subvol == conf->subvolumes[i]) {
+ ret = i;
+ break;
}
+ }
out:
- return ret;
+ return ret;
}
+#define set_if_greater(a, b) \
+ do { \
+ if ((a) < (b)) \
+ (a) = (b); \
+ } while (0)
-#define set_if_greater(a, b) do { \
- if ((a) < (b)) \
- (a) = (b); \
- } while (0)
-
+#define set_if_greater_time(a, an, b, bn) \
+ do { \
+ if (((a) < (b)) || (((a) == (b)) && ((an) < (bn)))) { \
+ (a) = (b); \
+ (an) = (bn); \
+ } \
+ } while (0)
-#define set_if_greater_time(a, an, b, bn) do { \
- if (((a) < (b)) || (((a) == (b)) && ((an) < (bn)))){ \
- (a) = (b); \
- (an) = (bn); \
- } \
- } while (0) \
+int
+dht_iatt_merge(xlator_t *this, struct iatt *to, struct iatt *from)
+{
+ if (!from || !to)
+ return 0;
+ to->ia_dev = from->ia_dev;
+
+ gf_uuid_copy(to->ia_gfid, from->ia_gfid);
+
+ to->ia_ino = from->ia_ino;
+ to->ia_prot = from->ia_prot;
+ to->ia_type = from->ia_type;
+ to->ia_nlink = from->ia_nlink;
+ to->ia_rdev = from->ia_rdev;
+ to->ia_size += from->ia_size;
+ to->ia_blksize = from->ia_blksize;
+ to->ia_blocks += from->ia_blocks;
+
+ if (IA_ISDIR(from->ia_type)) {
+ to->ia_blocks = DHT_DIR_STAT_BLOCKS;
+ to->ia_size = DHT_DIR_STAT_SIZE;
+ }
+ set_if_greater(to->ia_uid, from->ia_uid);
+ set_if_greater(to->ia_gid, from->ia_gid);
+
+ set_if_greater_time(to->ia_atime, to->ia_atime_nsec, from->ia_atime,
+ from->ia_atime_nsec);
+ set_if_greater_time(to->ia_mtime, to->ia_mtime_nsec, from->ia_mtime,
+ from->ia_mtime_nsec);
+ set_if_greater_time(to->ia_ctime, to->ia_ctime_nsec, from->ia_ctime,
+ from->ia_ctime_nsec);
+
+ return 0;
+}
int
-dht_iatt_merge (xlator_t *this, struct iatt *to,
- struct iatt *from, xlator_t *subvol)
+dht_build_child_loc(xlator_t *this, loc_t *child, loc_t *parent, char *name)
{
- if (!from || !to)
- return 0;
+ if (!child) {
+ goto err;
+ }
- to->ia_dev = from->ia_dev;
+ if (strcmp(parent->path, "/") == 0)
+ gf_asprintf((char **)&child->path, "/%s", name);
+ else
+ gf_asprintf((char **)&child->path, "%s/%s", parent->path, name);
- uuid_copy (to->ia_gfid, from->ia_gfid);
+ if (!child->path) {
+ goto err;
+ }
- to->ia_ino = from->ia_ino;
- to->ia_prot = from->ia_prot;
- to->ia_type = from->ia_type;
- to->ia_nlink = from->ia_nlink;
- to->ia_rdev = from->ia_rdev;
- to->ia_size += from->ia_size;
- to->ia_blksize = from->ia_blksize;
- to->ia_blocks += from->ia_blocks;
+ child->name = strrchr(child->path, '/');
+ if (child->name)
+ child->name++;
- set_if_greater (to->ia_uid, from->ia_uid);
- set_if_greater (to->ia_gid, from->ia_gid);
+ child->parent = inode_ref(parent->inode);
+ child->inode = inode_new(parent->inode->table);
- set_if_greater_time(to->ia_atime, to->ia_atime_nsec,
- from->ia_atime, from->ia_atime_nsec);
- set_if_greater_time (to->ia_mtime, to->ia_mtime_nsec,
- from->ia_mtime, from->ia_mtime_nsec);
- set_if_greater_time (to->ia_ctime, to->ia_ctime_nsec,
- from->ia_ctime, from->ia_ctime_nsec);
+ if (!child->inode) {
+ goto err;
+ }
- return 0;
+ return 0;
+err:
+ if (child) {
+ loc_wipe(child);
+ }
+ return -1;
}
int
-dht_build_child_loc (xlator_t *this, loc_t *child, loc_t *parent, char *name)
+dht_init_local_subvolumes(xlator_t *this, dht_conf_t *conf)
{
- if (!child) {
- goto err;
- }
-
- if (strcmp (parent->path, "/") == 0)
- gf_asprintf ((char **)&child->path, "/%s", name);
- else
- gf_asprintf ((char **)&child->path, "%s/%s", parent->path, name);
+ xlator_list_t *subvols = NULL;
+ int cnt = 0;
- if (!child->path) {
- goto err;
- }
+ if (!conf)
+ return -1;
- child->name = strrchr (child->path, '/');
- if (child->name)
- child->name++;
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ cnt++;
- child->parent = inode_ref (parent->inode);
- child->inode = inode_new (parent->inode->table);
+ conf->local_subvols = GF_CALLOC(cnt, sizeof(xlator_t *),
+ gf_dht_mt_xlator_t);
- if (!child->inode) {
- goto err;
- }
+ /* FIX FIX : do this dynamically*/
+ conf->local_nodeuuids = GF_CALLOC(cnt, sizeof(subvol_nodeuuids_info_t),
+ gf_dht_nodeuuids_t);
- return 0;
-err:
- loc_wipe (child);
+ if (!conf->local_subvols || !conf->local_nodeuuids) {
return -1;
-}
+ }
+ conf->local_subvols_cnt = 0;
+ return 0;
+}
int
-dht_init_subvolumes (xlator_t *this, dht_conf_t *conf)
+dht_init_subvolumes(xlator_t *this, dht_conf_t *conf)
{
- xlator_list_t *subvols = NULL;
- int cnt = 0;
+ xlator_list_t *subvols = NULL;
+ int cnt = 0;
- if (!conf)
- return -1;
+ if (!conf)
+ return -1;
- for (subvols = this->children; subvols; subvols = subvols->next)
- cnt++;
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ cnt++;
- conf->subvolumes = GF_CALLOC (cnt, sizeof (xlator_t *),
- gf_dht_mt_xlator_t);
- if (!conf->subvolumes) {
- return -1;
- }
- conf->subvolume_cnt = cnt;
+ conf->subvolumes = GF_CALLOC(cnt, sizeof(xlator_t *), gf_dht_mt_xlator_t);
+ if (!conf->subvolumes) {
+ return -1;
+ }
+ conf->subvolume_cnt = cnt;
+ /* Doesn't make sense to do any dht layer tasks
+ if the subvol count is 1. Set it as pass_through */
+ if (cnt == 1)
+ this->pass_through = _gf_true;
- cnt = 0;
- for (subvols = this->children; subvols; subvols = subvols->next)
- conf->subvolumes[cnt++] = subvols->xlator;
+ conf->local_subvols_cnt = 0;
- conf->subvolume_status = GF_CALLOC (cnt, sizeof (char),
- gf_dht_mt_char);
- if (!conf->subvolume_status) {
- return -1;
- }
+ dht_set_subvol_range(this);
- conf->last_event = GF_CALLOC (cnt, sizeof (int),
- gf_dht_mt_char);
- if (!conf->last_event) {
- return -1;
- }
+ cnt = 0;
+ for (subvols = this->children; subvols; subvols = subvols->next)
+ conf->subvolumes[cnt++] = subvols->xlator;
- conf->subvol_up_time = GF_CALLOC (cnt, sizeof (time_t),
- gf_dht_mt_subvol_time);
- if (!conf->subvol_up_time) {
- return -1;
- }
+ conf->subvolume_status = GF_CALLOC(cnt, sizeof(char), gf_dht_mt_char);
+ if (!conf->subvolume_status) {
+ return -1;
+ }
- conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
- gf_dht_mt_dht_du_t);
- if (!conf->du_stats) {
- return -1;
- }
+ conf->last_event = GF_CALLOC(cnt, sizeof(int), gf_dht_mt_char);
+ if (!conf->last_event) {
+ return -1;
+ }
- conf->decommissioned_bricks = GF_CALLOC (cnt, sizeof (xlator_t *),
- gf_dht_mt_xlator_t);
- if (!conf->decommissioned_bricks) {
- return -1;
- }
+ conf->subvol_up_time = GF_CALLOC(cnt, sizeof(time_t),
+ gf_dht_mt_subvol_time);
+ if (!conf->subvol_up_time) {
+ return -1;
+ }
- return 0;
-}
+ conf->du_stats = GF_CALLOC(conf->subvolume_cnt, sizeof(dht_du_t),
+ gf_dht_mt_dht_du_t);
+ if (!conf->du_stats) {
+ return -1;
+ }
+ conf->decommissioned_bricks = GF_CALLOC(cnt, sizeof(xlator_t *),
+ gf_dht_mt_xlator_t);
+ if (!conf->decommissioned_bricks) {
+ return -1;
+ }
+ return 0;
+}
+/*
+ op_ret values :
+ 0 : Success.
+ -1 : Failure.
+ 1 : File is being migrated but not by this DHT layer.
+*/
static int
-dht_migration_complete_check_done (int op_ret, call_frame_t *frame, void *data)
+dht_migration_complete_check_done(int op_ret, call_frame_t *frame, void *data)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
- local = frame->local;
+ local = frame->local;
- local->rebalance.target_op_fn (THIS, frame, op_ret);
+ if (op_ret != 0)
+ goto out;
- return 0;
-}
+ if (local->cached_subvol == NULL) {
+ local->op_errno = EINVAL;
+ goto out;
+ }
+ subvol = local->cached_subvol;
-int
-dht_migration_complete_check_task (void *data)
-{
- int ret = -1;
- xlator_t *src_node = NULL;
- xlator_t *dst_node = NULL;
- dht_local_t *local = NULL;
- dict_t *dict = NULL;
- dht_layout_t *layout = NULL;
- struct iatt stbuf = {0,};
- xlator_t *this = NULL;
- call_frame_t *frame = NULL;
- loc_t tmp_loc = {0,};
- char *path = NULL;
-
- this = THIS;
- frame = data;
- local = frame->local;
-
- src_node = local->cached_subvol;
-
- /* getxattr on cached_subvol for 'linkto' value */
- if (!local->loc.inode)
- ret = syncop_fgetxattr (src_node, local->fd, &dict,
- DHT_LINKFILE_KEY);
- else
- ret = syncop_getxattr (src_node, &local->loc, &dict,
- DHT_LINKFILE_KEY);
-
- if (!ret)
- dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
+out:
+ local->rebalance.target_op_fn(THIS, subvol, frame, op_ret);
- if (ret) {
- if ((errno != ENOENT) || (!local->loc.inode)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to get the 'linkto' xattr %s",
- local->loc.path, strerror (errno));
- goto out;
- }
- /* Need to do lookup on hashed subvol, then get the file */
- ret = syncop_lookup (this, &local->loc, NULL, &stbuf, NULL,
- NULL);
- if (ret)
- goto out;
- dst_node = dht_subvol_get_cached (this, local->loc.inode);
- }
+ return 0;
+}
- if (!dst_node) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to get the destination node",
- local->loc.path);
- ret = -1;
- goto out;
+int
+dht_migration_complete_check_task(void *data)
+{
+ int ret = -1;
+ xlator_t *src_node = NULL;
+ xlator_t *dst_node = NULL, *linkto_target = NULL;
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ loc_t tmp_loc = {
+ 0,
+ };
+ char *path = NULL;
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ fd_t *tmp = NULL;
+ uint64_t tmp_miginfo = 0;
+ dht_migrate_info_t *miginfo = NULL;
+ gf_boolean_t skip_open = _gf_false;
+ int open_failed = 0;
+
+ this = THIS;
+ frame = data;
+ local = frame->local;
+ conf = this->private;
+
+ src_node = local->cached_subvol;
+
+ if (!local->loc.inode && !local->fd) {
+ local->op_errno = EINVAL;
+ goto out;
+ }
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check won't be done*/
+
+ if (!local->loc.inode) {
+ ret = syncop_fgetxattr(src_node, local->fd, &dict,
+ conf->link_xattr_name, NULL, NULL);
+ } else {
+ SYNCTASK_SETID(0, 0);
+ ret = syncop_getxattr(src_node, &local->loc, &dict,
+ conf->link_xattr_name, NULL, NULL);
+ SYNCTASK_SETID(frame->root->uid, frame->root->gid);
+ }
+
+ /*
+ * Each DHT xlator layer has its own name for the linkto xattr.
+ * If the file mode bits indicate the the file is being migrated but
+ * this layer's linkto xattr is not set, it means that another
+ * DHT layer is migrating the file. In this case, return 1 so
+ * the mode bits can be passed on to the higher layer for appropriate
+ * action.
+ */
+ if (-ret == ENODATA) {
+ /* This DHT translator is not migrating this file */
+
+ ret = inode_ctx_reset1(inode, this, &tmp_miginfo);
+ if (tmp_miginfo) {
+ /* This can be a problem if the file was
+ * migrated by two different layers. Raise
+ * a warning here.
+ */
+ gf_smsg(
+ this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), NULL);
+
+ miginfo = (void *)(uintptr_t)tmp_miginfo;
+ GF_REF_PUT(miginfo);
+ }
+ ret = 1;
+ goto out;
+ }
+
+ if (!ret)
+ linkto_target = dht_linkfile_subvol(this, NULL, NULL, dict);
+
+ if (local->loc.inode) {
+ loc_copy(&tmp_loc, &local->loc);
+ } else {
+ tmp_loc.inode = inode_ref(inode);
+ gf_uuid_copy(tmp_loc.gfid, inode->gfid);
+ }
+
+ ret = syncop_lookup(this, &tmp_loc, &stbuf, 0, 0, 0);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
+ "tmp=%s", tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", this->name, NULL);
+ local->op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ dst_node = dht_subvol_get_cached(this, tmp_loc.inode);
+ if (linkto_target && dst_node != linkto_target) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_LINKFILE,
+ "linkto_target_name=%s", linkto_target->name, "dst_name=%s",
+ dst_node->name, NULL);
+ }
+
+ if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "dst_name=%s", dst_node->name, NULL);
+ ret = -1;
+ local->op_errno = EIO;
+ goto out;
+ }
+
+ /* update local. A layout is set in inode-ctx in lookup already */
+
+ dht_layout_unref(this, local->layout);
+
+ local->layout = dht_layout_get(frame->this, inode);
+ local->cached_subvol = dst_node;
+
+ ret = 0;
+
+ /* once we detect the migration complete, the inode-ctx2 is no more
+ required.. delete the ctx and also, it means, open() already
+ done on all the fd of inode */
+ ret = inode_ctx_reset1(inode, this, &tmp_miginfo);
+ if (tmp_miginfo) {
+ miginfo = (void *)(uintptr_t)tmp_miginfo;
+ GF_REF_PUT(miginfo);
+ goto out;
+ }
+
+ /* perform 'open()' on all the fd's present on the inode */
+ if (tmp_loc.path == NULL) {
+ inode_path(inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+ }
+
+ LOCK(&inode->lock);
+
+ if (list_empty(&inode->fd_list))
+ goto unlock;
+
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID(0, 0);
+
+ /* It's possible that we are the last user of iter_fd after each
+ * iteration. In this case the fd_unref() of iter_fd at the end of
+ * the loop will cause the destruction of the fd. So we need to
+ * iterate the list safely because iter_fd cannot be trusted.
+ */
+ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
+ while (&iter_fd->inode_list != (&inode->fd_list)) {
+ if (fd_is_anonymous(iter_fd) ||
+ (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
+ if (!tmp) {
+ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd),
+ inode_list);
+ continue;
+ }
+ skip_open = _gf_true;
+ }
+ /* We need to release the inode->lock before calling
+ * syncop_open() to avoid possible deadlocks. However this
+ * can cause the iter_fd to be released by other threads.
+ * To avoid this, we take a reference before releasing the
+ * lock.
+ */
+ fd_ref(iter_fd);
+
+ UNLOCK(&inode->lock);
+
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
+ if (skip_open)
+ goto next;
+
+ /* flags for open are stripped down to allow following the
+ * new location of the file, otherwise we can get EEXIST or
+ * truncate the file again as rebalance is moving the data */
+ ret = syncop_open(dst_node, &tmp_loc,
+ (iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)),
+ iter_fd, NULL, NULL);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_OPEN_FD_ON_DST_FAILED, "id=%p", iter_fd,
+ "flags=0%o", iter_fd->flags, "path=%s", path, "name=%s",
+ dst_node->name, NULL);
+
+ open_failed = 1;
+ local->op_errno = -ret;
+ ret = -1;
+ } else {
+ dht_fd_ctx_set(this, iter_fd, dst_node);
}
- /* lookup on dst */
- if (local->loc.inode) {
- ret = syncop_lookup (dst_node, &local->loc, NULL, &stbuf, NULL, NULL);
+ next:
+ LOCK(&inode->lock);
+ skip_open = _gf_false;
+ tmp = iter_fd;
+ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list);
+ }
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to lookup the file on %s",
- local->loc.path, dst_node->name);
- goto out;
- }
+ SYNCTASK_SETID(frame->root->uid, frame->root->gid);
- if (uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: gfid different on the target file on %s",
- local->loc.path, dst_node->name);
- ret = -1;
- goto out;
- }
- }
+ if (open_failed) {
+ ret = -1;
+ goto unlock;
+ }
+ ret = 0;
- /* update inode ctx (the layout) */
- dht_layout_unref (this, local->layout);
-
- if (!local->loc.inode)
- ret = dht_layout_preset (this, dst_node, local->fd->inode);
- else
- ret = dht_layout_preset (this, dst_node, local->loc.inode);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: could not set preset layout for subvol %s",
- local->loc.path, dst_node->name);
- ret = -1;
- goto out;
- }
+unlock:
+ UNLOCK(&inode->lock);
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
- layout = dht_layout_for_subvol (this, dst_node);
- if (!layout) {
- gf_log (this->name, GF_LOG_INFO,
- "%s: no pre-set layout for subvolume %s",
- local->loc.path, dst_node ? dst_node->name : "<nil>");
- ret = -1;
- goto out;
- }
+out:
+ if (dict) {
+ dict_unref(dict);
+ }
- if (!local->loc.inode)
- ret = dht_layout_set (this, local->fd->inode, layout);
- else
- ret = dht_layout_set (this, local->loc.inode, layout);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set the new layout",
- local->loc.path);
- goto out;
+ loc_wipe(&tmp_loc);
+
+ return ret;
+}
+
+int
+dht_rebalance_complete_check(xlator_t *this, call_frame_t *frame)
+{
+ int ret = -1;
+
+ ret = synctask_new(this->ctx->env, dht_migration_complete_check_task,
+ dht_migration_complete_check_done, frame, frame);
+ return ret;
+}
+
+/* During 'in-progress' state, both nodes should have the file */
+/*
+ op_ret values :
+ 0 : Success
+ -1 : Failure.
+ 1 : File is being migrated but not by this DHT layer.
+*/
+static int
+dht_inprogress_check_done(int op_ret, call_frame_t *frame, void *data)
+{
+ dht_local_t *local = NULL;
+ xlator_t *dst_subvol = NULL, *src_subvol = NULL;
+ inode_t *inode = NULL;
+
+ local = frame->local;
+
+ if (op_ret != 0)
+ goto out;
+
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+
+ dht_inode_ctx_get_mig_info(THIS, inode, &src_subvol, &dst_subvol);
+ if (dht_mig_info_is_invalid(local->cached_subvol, src_subvol, dst_subvol)) {
+ dst_subvol = dht_subvol_get_cached(THIS, inode);
+ if (!dst_subvol) {
+ local->op_errno = EINVAL;
+ goto out;
}
+ }
- local->cached_subvol = dst_node;
- ret = 0;
+out:
+ local->rebalance.target_op_fn(THIS, dst_subvol, frame, op_ret);
- if (!local->fd)
- goto out;
+ return 0;
+}
- /* once we detect the migration complete, the fd-ctx is no more
- required.. delete the ctx, and do one extra 'fd_unref' for open fd */
- ret = fd_ctx_del (local->fd, this, NULL);
- if (!ret) {
- fd_unref (local->fd);
- ret = 0;
- goto out;
+static int
+dht_rebalance_inprogress_task(void *data)
+{
+ int ret = -1;
+ xlator_t *src_node = NULL;
+ xlator_t *dst_node = NULL;
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ char *path = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ loc_t tmp_loc = {
+ 0,
+ };
+ dht_conf_t *conf = NULL;
+ inode_t *inode = NULL;
+ fd_t *iter_fd = NULL;
+ fd_t *tmp = NULL;
+ int open_failed = 0;
+ uint64_t tmp_miginfo = 0;
+ dht_migrate_info_t *miginfo = NULL;
+ gf_boolean_t skip_open = _gf_false;
+
+ this = THIS;
+ frame = data;
+ local = frame->local;
+ conf = this->private;
+
+ src_node = local->cached_subvol;
+
+ if (!local->loc.inode && !local->fd)
+ goto out;
+
+ inode = (!local->fd) ? local->loc.inode : local->fd->inode;
+
+ /* getxattr on cached_subvol for 'linkto' value. Do path based getxattr
+ * as root:root. If a fd is already open, access check won't be done*/
+ if (local->loc.inode) {
+ SYNCTASK_SETID(0, 0);
+ ret = syncop_getxattr(src_node, &local->loc, &dict,
+ conf->link_xattr_name, NULL, NULL);
+ SYNCTASK_SETID(frame->root->uid, frame->root->gid);
+ } else {
+ ret = syncop_fgetxattr(src_node, local->fd, &dict,
+ conf->link_xattr_name, NULL, NULL);
+ }
+
+ /*
+ * Each DHT xlator layer has its own name for the linkto xattr.
+ * If the file mode bits indicate the the file is being migrated but
+ * this layer's linkto xattr is not present, it means that another
+ * DHT layer is migrating the file. In this case, return 1 so
+ * the mode bits can be passed on to the higher layer for appropriate
+ * action.
+ */
+
+ if (-ret == ENODATA) {
+ /* This DHT layer is not migrating this file */
+ ret = inode_ctx_reset1(inode, this, &tmp_miginfo);
+ if (tmp_miginfo) {
+ /* This can be a problem if the file was
+ * migrated by two different layers. Raise
+ * a warning here.
+ */
+ gf_smsg(
+ this->name, GF_LOG_WARNING, 0, DHT_MSG_HAS_MIGINFO, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid), NULL);
+ miginfo = (void *)(uintptr_t)tmp_miginfo;
+ GF_REF_PUT(miginfo);
+ }
+ ret = 1;
+ goto out;
+ }
+
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_GET_XATTR_FAILED,
+ "path=%s", local->loc.path, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ dst_node = dht_linkfile_subvol(this, NULL, NULL, dict);
+ if (!dst_node) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GET_XATTR_FAILED,
+ "path=%s", local->loc.path, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ local->rebalance.target_node = dst_node;
+
+ if (local->loc.inode) {
+ loc_copy(&tmp_loc, &local->loc);
+ } else {
+ tmp_loc.inode = inode_ref(inode);
+ gf_uuid_copy(tmp_loc.gfid, inode->gfid);
+ }
+
+ /* lookup on dst */
+ ret = syncop_lookup(dst_node, &tmp_loc, &stbuf, NULL, NULL, NULL);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_FILE_LOOKUP_FAILED,
+ "tmp=%s", tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", dst_node->name, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ if (gf_uuid_compare(stbuf.ia_gfid, tmp_loc.inode->gfid)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH, "tmp=%s",
+ tmp_loc.path ? tmp_loc.path : uuid_utoa(tmp_loc.gfid),
+ "name=%s", dst_node->name, NULL);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+
+ if (tmp_loc.path == NULL) {
+ inode_path(inode, NULL, &path);
+ if (path)
+ tmp_loc.path = path;
+ }
+
+ LOCK(&inode->lock);
+
+ if (list_empty(&inode->fd_list))
+ goto unlock;
+
+ /* perform open as root:root. There is window between linkfile
+ * creation(root:root) and setattr with the correct uid/gid
+ */
+ SYNCTASK_SETID(0, 0);
+
+ /* It's possible that we are the last user of iter_fd after each
+ * iteration. In this case the fd_unref() of iter_fd at the end of
+ * the loop will cause the destruction of the fd. So we need to
+ * iterate the list safely because iter_fd cannot be trusted.
+ */
+ iter_fd = list_entry((&inode->fd_list)->next, typeof(*iter_fd), inode_list);
+ while (&iter_fd->inode_list != (&inode->fd_list)) {
+ /* We need to release the inode->lock before calling
+ * syncop_open() to avoid possible deadlocks. However this
+ * can cause the iter_fd to be released by other threads.
+ * To avoid this, we take a reference before releasing the
+ * lock.
+ */
+
+ if (fd_is_anonymous(iter_fd) ||
+ (dht_fd_open_on_dst(this, iter_fd, dst_node))) {
+ if (!tmp) {
+ iter_fd = list_entry(iter_fd->inode_list.next, typeof(*iter_fd),
+ inode_list);
+ continue;
+ }
+ skip_open = _gf_true;
}
- /* if 'local->fd' (ie, fd based operation), send a 'open()' on
- destination if not already done */
- if (local->loc.inode) {
- ret = syncop_open (dst_node, &local->loc,
- local->fd->flags, local->fd);
- } else {
- tmp_loc.inode = local->fd->inode;
- inode_path (local->fd->inode, NULL, &path);
- if (path)
- tmp_loc.path = path;
- ret = syncop_open (dst_node, &tmp_loc,
- local->fd->flags, local->fd);
- if (path)
- GF_FREE (path);
+ /* Yes, this is ugly but there isn't a cleaner way to do this
+ * the fd_ref is an atomic increment so not too bad. We want to
+ * reduce the number of inode locks and unlocks.
+ */
+
+ fd_ref(iter_fd);
+ UNLOCK(&inode->lock);
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
}
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to send open() on target file at %s",
- local->loc.path, dst_node->name);
- goto out;
+ if (skip_open)
+ goto next;
+
+ /* flags for open are stripped down to allow following the
+ * new location of the file, otherwise we can get EEXIST or
+ * truncate the file again as rebalance is moving the data */
+ ret = syncop_open(dst_node, &tmp_loc,
+ (iter_fd->flags & ~(O_CREAT | O_EXCL | O_TRUNC)),
+ iter_fd, NULL, NULL);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_OPEN_FD_ON_DST_FAILED, "fd=%p", iter_fd,
+ "flags=0%o", iter_fd->flags, "path=%s", path, "name=%s",
+ dst_node->name, NULL);
+ ret = -1;
+ open_failed = 1;
+ } else {
+ /* Potential fd leak if this fails here as it will be
+ reopened at the next Phase1/2 check */
+ dht_fd_ctx_set(this, iter_fd, dst_node);
}
- /* need this unref for the fd on src_node */
- fd_unref (local->fd);
- ret = 0;
+ next:
+ LOCK(&inode->lock);
+ skip_open = _gf_false;
+ tmp = iter_fd;
+ iter_fd = list_entry(tmp->inode_list.next, typeof(*tmp), inode_list);
+ }
+
+ SYNCTASK_SETID(frame->root->uid, frame->root->gid);
+
+unlock:
+ UNLOCK(&inode->lock);
+
+ if (tmp) {
+ fd_unref(tmp);
+ tmp = NULL;
+ }
+ if (open_failed) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dht_inode_ctx_set_mig_info(this, inode, src_node, dst_node);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "path=%s", local->loc.path, "name=%s", dst_node->name, NULL);
+ goto out;
+ }
+
+ ret = 0;
out:
+ if (dict) {
+ dict_unref(dict);
+ }
- return ret;
+ loc_wipe(&tmp_loc);
+ return ret;
}
int
-dht_rebalance_complete_check (xlator_t *this, call_frame_t *frame)
+dht_rebalance_in_progress_check(xlator_t *this, call_frame_t *frame)
{
- int ret = -1;
+ int ret = -1;
- ret = synctask_new (this->ctx->env, dht_migration_complete_check_task,
- dht_migration_complete_check_done,
- frame, frame);
- return ret;
+ ret = synctask_new(this->ctx->env, dht_rebalance_inprogress_task,
+ dht_inprogress_check_done, frame, frame);
+ return ret;
}
-/* During 'in-progress' state, both nodes should have the file */
-static int
-dht_inprogress_check_done (int op_ret, call_frame_t *sync_frame, void *data)
+int
+dht_inode_ctx_layout_set(inode_t *inode, xlator_t *this,
+ dht_layout_t *layout_int)
{
- dht_local_t *local = NULL;
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+
+ ret = dht_inode_ctx_get(inode, this, &ctx);
+ if (!ret && ctx) {
+ ctx->layout = layout_int;
+ } else {
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return ret;
+ ctx->layout = layout_int;
+ }
+
+ ret = dht_inode_ctx_set(inode, this, ctx);
+
+ return ret;
+}
- local = sync_frame->local;
+void
+dht_inode_ctx_time_set(inode_t *inode, xlator_t *this, struct iatt *stat)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ dht_stat_time_t *time = 0;
+ int ret = -1;
- local->rebalance.target_op_fn (THIS, sync_frame, op_ret);
+ ret = dht_inode_ctx_get(inode, this, &ctx);
- return 0;
+ if (ret)
+ return;
+
+ time = &ctx->time;
+
+ time->mtime = stat->ia_mtime;
+ time->mtime_nsec = stat->ia_mtime_nsec;
+
+ time->ctime = stat->ia_ctime;
+ time->ctime_nsec = stat->ia_ctime_nsec;
+
+ time->atime = stat->ia_atime;
+ time->atime_nsec = stat->ia_atime_nsec;
+
+ return;
}
-static int
-dht_rebalance_inprogress_task (void *data)
-{
- int ret = -1;
- xlator_t *src_node = NULL;
- xlator_t *dst_node = NULL;
- dht_local_t *local = NULL;
- dict_t *dict = NULL;
- call_frame_t *frame = NULL;
- xlator_t *this = NULL;
- char *path = NULL;
- struct iatt stbuf = {0,};
- loc_t tmp_loc = {0,};
-
- this = THIS;
- frame = data;
- local = frame->local;
-
- src_node = local->cached_subvol;
-
- /* getxattr on cached_subvol for 'linkto' value */
- if (local->loc.inode)
- ret = syncop_getxattr (src_node, &local->loc, &dict,
- DHT_LINKFILE_KEY);
- else
- ret = syncop_fgetxattr (src_node, local->fd, &dict,
- DHT_LINKFILE_KEY);
+int
+dht_inode_ctx_time_update(inode_t *inode, xlator_t *this, struct iatt *stat,
+ int32_t post)
+{
+ dht_inode_ctx_t *ctx = NULL;
+ dht_stat_time_t *time = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(this->name, stat, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ ret = dht_inode_ctx_get(inode, this, &ctx);
+
+ if (ret) {
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_dht_mt_inode_ctx_t);
+ if (!ctx)
+ return -1;
+ }
+
+ time = &ctx->time;
+
+ LOCK(&inode->lock);
+ {
+ DHT_UPDATE_TIME(time->mtime, time->mtime_nsec, stat->ia_mtime,
+ stat->ia_mtime_nsec, post);
+ DHT_UPDATE_TIME(time->ctime, time->ctime_nsec, stat->ia_ctime,
+ stat->ia_ctime_nsec, post);
+ DHT_UPDATE_TIME(time->atime, time->atime_nsec, stat->ia_atime,
+ stat->ia_atime_nsec, post);
+ }
+ UNLOCK(&inode->lock);
+
+ ret = dht_inode_ctx_set(inode, this, ctx);
+out:
+ return 0;
+}
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to get the 'linkto' xattr %s",
- local->loc.path, strerror (errno));
- goto out;
+int
+dht_inode_ctx_get(inode_t *inode, xlator_t *this, dht_inode_ctx_t **ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ ret = inode_ctx_get(inode, this, &ctx_int);
+
+ if (ret)
+ return ret;
+
+ if (ctx)
+ *ctx = (dht_inode_ctx_t *)(uintptr_t)ctx_int;
+out:
+ return ret;
+}
+
+int
+dht_inode_ctx_set(inode_t *inode, xlator_t *this, dht_inode_ctx_t *ctx)
+{
+ int ret = -1;
+ uint64_t ctx_int = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, ctx, out);
+
+ ctx_int = (long)ctx;
+ ret = inode_ctx_set(inode, this, &ctx_int);
+out:
+ return ret;
+}
+
+int
+dht_subvol_status(dht_conf_t *conf, xlator_t *subvol)
+{
+ int i;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == subvol) {
+ return conf->subvolume_status[i];
}
+ }
+ return 0;
+}
- dst_node = dht_linkfile_subvol (this, NULL, NULL, dict);
- if (!dst_node) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to get the 'linkto' xattr from dict",
- local->loc.path);
- ret = -1;
+inode_t *
+dht_heal_path(xlator_t *this, char *path, inode_table_t *itable)
+{
+ int ret = -1;
+ struct iatt iatt = {
+ 0,
+ };
+ inode_t *linked_inode = NULL;
+ loc_t loc = {
+ 0,
+ };
+ char *bname = NULL;
+ char *save_ptr = NULL;
+ static uuid_t gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ char *tmp_path = NULL;
+
+ tmp_path = gf_strdup(path);
+ if (!tmp_path) {
+ goto out;
+ }
+
+ gf_uuid_copy(loc.pargfid, gfid);
+ loc.parent = inode_ref(itable->root);
+
+ bname = strtok_r(tmp_path, "/", &save_ptr);
+
+ /* sending a lookup on parent directory,
+ * Eg: if path is like /a/b/c/d/e/f/g/
+ * then we will send a lookup on a first and then b,c,d,etc
+ */
+
+ while (bname) {
+ linked_inode = NULL;
+ loc.inode = inode_grep(itable, loc.parent, bname);
+ if (loc.inode == NULL) {
+ loc.inode = inode_new(itable);
+ if (loc.inode == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ } else {
+ /*
+ * Inode is already populated in the inode table.
+ * Which means we already looked up the inode and
+ * linked with a dentry. So that we will skip
+ * lookup on this entry, and proceed to next.
+ */
+ linked_inode = loc.inode;
+ bname = strtok_r(NULL, "/", &save_ptr);
+ if (!bname) {
goto out;
+ }
+ inode_unref(loc.parent);
+ loc.parent = loc.inode;
+ gf_uuid_copy(loc.pargfid, loc.inode->gfid);
+ loc.inode = NULL;
+ continue;
}
- local->rebalance.target_node = dst_node;
-
- if (local->loc.inode) {
- /* lookup on dst */
- ret = syncop_lookup (dst_node, &local->loc, NULL,
- &stbuf, NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to lookup the file on %s",
- local->loc.path, dst_node->name);
- goto out;
- }
+ loc.name = bname;
+ ret = loc_path(&loc, bname);
- if (uuid_compare (stbuf.ia_gfid, local->loc.inode->gfid)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: gfid different on the target file on %s",
- local->loc.path, dst_node->name);
- ret = -1;
- goto out;
- }
+ ret = syncop_lookup(this, &loc, &iatt, NULL, NULL, NULL);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_SELFHEAL_FAILED,
+ "path=%s", path, "subvolume=%s", this->name, "bname=%s",
+ bname, NULL);
+ goto out;
}
- ret = 0;
+ linked_inode = inode_link(loc.inode, loc.parent, bname, &iatt);
+ if (!linked_inode)
+ goto out;
- if (!local->fd)
- goto out;
+ loc_wipe(&loc);
+ gf_uuid_copy(loc.pargfid, linked_inode->gfid);
+ loc.inode = NULL;
- if (local->loc.inode) {
- ret = syncop_open (dst_node, &local->loc,
- local->fd->flags, local->fd);
+ bname = strtok_r(NULL, "/", &save_ptr);
+ if (bname)
+ loc.parent = linked_inode;
+ }
+out:
+ inode_ref(linked_inode);
+ loc_wipe(&loc);
+ GF_FREE(tmp_path);
+
+ return linked_inode;
+}
+
+int
+dht_heal_full_path(void *data)
+{
+ call_frame_t *heal_frame = data;
+ dht_local_t *local = NULL;
+ loc_t loc = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ char *path = NULL;
+ int ret = -1;
+ xlator_t *source = NULL;
+ xlator_t *this = NULL;
+ inode_table_t *itable = NULL;
+ inode_t *inode = NULL;
+ inode_t *tmp_inode = NULL;
+
+ GF_VALIDATE_OR_GOTO("DHT", heal_frame, out);
+
+ local = heal_frame->local;
+ this = heal_frame->this;
+ source = heal_frame->cookie;
+ heal_frame->cookie = NULL;
+ gf_uuid_copy(loc.gfid, local->gfid);
+
+ if (local->loc.inode)
+ loc.inode = inode_ref(local->loc.inode);
+ else
+ goto out;
+
+ itable = loc.inode->table;
+ ret = syncop_getxattr(source, &loc, &dict, GET_ANCESTRY_PATH_KEY, NULL,
+ NULL);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_HEAL_ABORT,
+ "subvol=%s", source->name, NULL);
+ goto out;
+ }
+
+ ret = dict_get_str(dict, GET_ANCESTRY_PATH_KEY, &path);
+ if (path) {
+ inode = dht_heal_path(this, path, itable);
+ if (inode && inode != local->inode) {
+ /*
+ * if inode returned by heal function is different
+ * from what we passed, which means a racing thread
+ * already linked a different inode for dentry.
+ * So we will update our local->inode, so that we can
+ * retrurn proper inode.
+ */
+ tmp_inode = local->inode;
+ local->inode = inode;
+ inode_unref(tmp_inode);
+ tmp_inode = NULL;
} else {
- tmp_loc.inode = local->fd->inode;
- inode_path (local->fd->inode, NULL, &path);
- if (path)
- tmp_loc.path = path;
- ret = syncop_open (dst_node, &tmp_loc,
- local->fd->flags, local->fd);
- if (path)
- GF_FREE (path);
+ inode_unref(inode);
}
+ }
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to send open() on target file at %s",
- local->loc.path, dst_node->name);
- goto out;
- }
+out:
+ loc_wipe(&loc);
+ if (dict)
+ dict_unref(dict);
+ return 0;
+}
- ret = fd_ctx_set (local->fd, this, (uint64_t)(long)dst_node);
+int
+dht_heal_full_path_done(int op_ret, call_frame_t *heal_frame, void *data)
+{
+ call_frame_t *main_frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ int op_errno = 0;
+
+ local = heal_frame->local;
+ main_frame = local->main_frame;
+ local->main_frame = NULL;
+ this = heal_frame->this;
+
+ dht_set_fixed_dir_stat(&local->postparent);
+ if (local->need_xattr_heal) {
+ local->need_xattr_heal = 0;
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set fd-ctx target file at %s",
- local->loc.path, dst_node->name);
- goto out;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ NULL);
}
+ }
- ret = 0;
-out:
- return ret;
+ DHT_STACK_UNWIND(lookup, main_frame, 0, 0, local->inode, &local->stbuf,
+ local->xattr, &local->postparent);
+
+ DHT_STACK_DESTROY(heal_frame);
+ return 0;
}
+/* This function must be called inside an inode lock */
int
-dht_rebalance_in_progress_check (xlator_t *this, call_frame_t *frame)
+__dht_lock_subvol_set(inode_t *inode, xlator_t *this, xlator_t *lock_subvol)
{
+ dht_inode_ctx_t *ctx = NULL;
+ int ret = -1;
+ uint64_t value = 0;
- int ret = -1;
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
- ret = synctask_new (this->ctx->env, dht_rebalance_inprogress_task,
- dht_inprogress_check_done,
- frame, frame);
- return ret;
+ ret = __inode_ctx_get0(inode, this, &value);
+ if (ret || !value) {
+ return -1;
+ }
+
+ ctx = (dht_inode_ctx_t *)(uintptr_t)value;
+ ctx->lock_subvol = lock_subvol;
+out:
+ return ret;
+}
+
+xlator_t *
+dht_get_lock_subvolume(xlator_t *this, struct gf_flock *lock,
+ dht_local_t *local)
+{
+ xlator_t *subvol = NULL;
+ inode_t *inode = NULL;
+ int32_t ret = -1;
+ uint64_t value = 0;
+ xlator_t *cached_subvol = NULL;
+ dht_inode_ctx_t *ctx = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO(this->name, lock, out);
+ GF_VALIDATE_OR_GOTO(this->name, local, out);
+
+ cached_subvol = local->cached_subvol;
+
+ if (local->loc.inode || local->fd) {
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+ }
+
+ if (!inode)
+ goto out;
+
+ if (!(IA_ISDIR(inode->ia_type) || IA_ISINVAL(inode->ia_type))) {
+ /*
+ * We may get non-linked inode for directories as part
+ * of the selfheal code path. So checking for IA_INVAL
+ * type also. This will only happen for directory.
+ */
+ subvol = local->cached_subvol;
+ goto out;
+ }
+
+ if (lock->l_type != F_UNLCK) {
+ /*
+ * inode purging might happen on NFS between a lk
+ * and unlk. Due to this lk and unlk might be sent
+ * to different subvols.
+ * So during a lock request, taking a ref on inode
+ * to prevent inode purging. inode unref will happen
+ * in unlock cbk code path.
+ */
+ inode_ref(inode);
+ }
+
+ LOCK(&inode->lock);
+ ret = __inode_ctx_get0(inode, this, &value);
+ if (!ret && value) {
+ ctx = (dht_inode_ctx_t *)(uintptr_t)value;
+ subvol = ctx->lock_subvol;
+ }
+ if (!subvol && lock->l_type != F_UNLCK && cached_subvol) {
+ ret = __dht_lock_subvol_set(inode, this, cached_subvol);
+ if (ret) {
+ gf_uuid_unparse(inode->gfid, gfid);
+ UNLOCK(&inode->lock);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "lock_subvol gfid=%s", gfid, NULL);
+ goto post_unlock;
+ }
+ subvol = cached_subvol;
+ }
+ UNLOCK(&inode->lock);
+post_unlock:
+ if (!subvol && inode && lock->l_type != F_UNLCK) {
+ inode_unref(inode);
+ }
+out:
+ return subvol;
+}
+
+int
+dht_lk_inode_unref(call_frame_t *frame, int32_t op_ret)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ this = frame->this;
+
+ if (local->loc.inode || local->fd) {
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+ }
+ if (!inode) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LOCK_INODE_UNREF_FAILED,
+ NULL);
+ goto out;
+ }
+
+ if (!(IA_ISDIR(inode->ia_type) || IA_ISINVAL(inode->ia_type))) {
+ ret = 0;
+ goto out;
+ }
+
+ switch (local->lock_type) {
+ case F_RDLCK:
+ case F_WRLCK:
+ if (op_ret) {
+ gf_uuid_unparse(inode->gfid, gfid);
+ gf_msg_debug(this->name, 0, "lock request failed for gfid %s",
+ gfid);
+ inode_unref(inode);
+ goto out;
+ }
+ break;
+
+ case F_UNLCK:
+ if (!op_ret) {
+ inode_unref(inode);
+ } else {
+ gf_uuid_unparse(inode->gfid, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCK_INODE_UNREF_FAILED, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+ default:
+ break;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Code to update custom extended attributes from src dict to dst dict
+ */
+void
+dht_dir_set_heal_xattr(xlator_t *this, dht_local_t *local, dict_t *dst,
+ dict_t *src, int *uret, int *uflag)
+{
+ int ret = -1;
+ data_t *keyval = NULL;
+ int luret = -1;
+ int luflag = -1;
+ int i = 0;
+ char **xattrs_to_heal;
+
+ if (!src || !dst) {
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DST_NULL_SET_FAILED,
+ "path=%s", local->loc.path, NULL);
+ return;
+ }
+ /* Check if any user xattr present in src dict and set
+ it to dst dict
+ */
+ luret = dict_foreach_fnmatch(src, "user.*", dht_set_user_xattr, dst);
+ /* Check if any other custom xattr present in src dict
+ and set it to dst dict, here index start from 1 because
+ user xattr already checked in previous statement
+ */
+
+ xattrs_to_heal = get_xattrs_to_heal();
+
+ for (i = 1; xattrs_to_heal[i]; i++) {
+ keyval = dict_get(src, xattrs_to_heal[i]);
+ if (keyval) {
+ luflag = 1;
+ ret = dict_set(dst, xattrs_to_heal[i], keyval);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_DICT_SET_FAILED, "key=%s", xattrs_to_heal[i],
+ "path=%s", local->loc.path, NULL);
+ keyval = NULL;
+ }
+ }
+ if (uret)
+ (*uret) = luret;
+ if (uflag)
+ (*uflag) = luflag;
}
diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c
index 7e29a7a8ce6..dbb8070b0da 100644
--- a/xlators/cluster/dht/src/dht-inode-read.c
+++ b/xlators/cluster/dht/src/dht-inode-read.c
@@ -8,1108 +8,1651 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "dht-common.h"
-int dht_access2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_readv2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_attr2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_open2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_flush2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_lk2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_fsync2 (xlator_t *this, call_frame_t *frame, int ret);
-
-int
-dht_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+static int
+dht_access2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_readv2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_attr2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_open2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_flush2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_lk2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_fsync2(xlator_t *this, xlator_t *dst_node, call_frame_t *frame, int ret);
+static int
+dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame,
+ int ret);
+
+static int
+dht_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, fd_t *fd, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = 0;
-
- local = frame->local;
- prev = cookie;
-
- local->op_errno = op_errno;
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto out;
- }
-
- if (!op_ret || (local->call_cnt != 1))
- goto out;
-
- /* rebalance would have happened */
- local->rebalance.target_op_fn = dht_open2;
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
+
+ /* Update ctx if the fd has been opened on the target*/
+ if (!op_ret && (local->call_cnt == 1)) {
+ dht_fd_ctx_set(this, fd, prev);
+ goto out;
+ }
+
+ if (!op_ret || (local->call_cnt != 1))
+ goto out;
+
+ /* rebalance would have happened */
+ local->rebalance.target_op_fn = dht_open2;
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
out:
- DHT_STACK_UNWIND (open, frame, op_ret, op_errno, local->fd, xdata);
+ DHT_STACK_UNWIND(open, frame, op_ret, op_errno, local->fd, xdata);
- return 0;
+ return 0;
}
-int
-dht_open2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_open2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- int op_errno = EINVAL;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
- local = frame->local;
- if (!local)
- goto out;
+ if (!frame || !frame->local)
+ goto out;
- op_errno = ENOENT;
- if (op_ret)
- goto out;
-
- local->call_cnt = 2;
- subvol = local->cached_subvol;
+ local = frame->local;
+ op_errno = local->op_errno;
- STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
- &local->loc, local->rebalance.flags, local->fd,
- NULL);
+ if (we_are_not_migrating(ret)) {
+ /* This DHT layer is not migrating the file */
+ DHT_STACK_UNWIND(open, frame, -1, local->op_errno, NULL,
+ local->rebalance.xdata);
return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2;
+
+ STACK_WIND_COOKIE(frame, dht_open_cbk, subvol, subvol, subvol->fops->open,
+ &local->loc, local->rebalance.flags, local->fd,
+ local->xattr_req);
+ return 0;
out:
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
- return 0;
+ DHT_STACK_UNWIND(open, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
int
-dht_open (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, fd_t *fd, dict_t *xdata)
+dht_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, loc, fd, GF_FOP_OPEN);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- local->rebalance.flags = flags;
- local->call_cnt = 1;
+ local = dht_local_init(frame, loc, fd, GF_FOP_OPEN);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- STACK_WIND (frame, dht_open_cbk, subvol, subvol->fops->open,
- loc, flags, fd, xdata);
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
- return 0;
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
+
+ STACK_WIND_COOKIE(frame, dht_open_cbk, subvol, subvol, subvol->fops->open,
+ loc, flags, fd, xdata);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(open, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
int
-dht_file_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
+dht_file_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *stbuf, dict_t *xdata)
{
- uint64_t tmp_subvol = 0;
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
-
- local = frame->local;
- prev = cookie;
-
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto out;
- }
+ xlator_t *subvol1 = 0;
+ xlator_t *subvol2 = 0;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((local->fop == GF_FOP_FSTAT) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- if (local->call_cnt != 1)
- goto out;
-
- /* Check if the rebalance phase2 is true */
- if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
- if (local->fd)
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (ret) {
- /* Phase 2 of migration */
- local->rebalance.target_op_fn = dht_attr2;
- ret = dht_rebalance_complete_check (this, frame);
- } else {
- /* value is already set in fd_ctx, that means no need
- to check for whether its complete or not. */
- dht_attr2 (this, frame, 0);
- }
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ local->op_errno = op_errno;
+ local->op_ret = op_ret;
+
+ /* Check if the rebalance phase2 is true */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(stbuf)) {
+ local->rebalance.target_op_fn = dht_attr2;
+ dht_set_local_rebalance(this, local, NULL, NULL, stbuf, xdata);
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ dht_inode_ctx_get_mig_info(this, inode, &subvol1, &subvol2);
+ if (dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) {
+ /* Phase 2 of migration */
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ /* it is a non-fd op or it is an fd based Fop and
+ opened on the dst.*/
+ if (local->fd && !dht_fd_open_on_dst(this, local->fd, subvol2)) {
+ ret = dht_rebalance_complete_check(this, frame);
if (!ret)
- return 0;
+ return 0;
+ } else {
+ dht_attr2(this, subvol2, frame, 0);
+ return 0;
+ }
}
+ }
out:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (stat, frame, op_ret, op_errno, stbuf, xdata);
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ DHT_STACK_UNWIND(stat, frame, op_ret, op_errno, stbuf, xdata);
err:
- return 0;
+ return 0;
}
-int
-dht_attr2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_attr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- int op_errno = EINVAL;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(stat, frame, local->op_ret, op_errno,
+ &local->rebalance.postbuf, local->rebalance.xdata);
+ return 0;
+ }
- local = frame->local;
- if (!local)
- goto out;
+ if (subvol == NULL)
+ goto out;
- op_errno = local->op_errno;
- if (op_ret == -1)
- goto out;
+ local->call_cnt = 2;
- subvol = local->cached_subvol;
- local->call_cnt = 2;
+ if (local->fop == GF_FOP_FSTAT) {
+ STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol,
+ subvol->fops->fstat, local->fd, local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol,
+ subvol->fops->stat, &local->loc, local->xattr_req);
+ }
- if (local->fop == GF_FOP_FSTAT) {
- STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->fstat, local->fd, NULL);
- } else {
- STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->stat, &local->loc, NULL);
- }
- return 0;
+ return 0;
out:
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
- return 0;
+ DHT_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+static int
+dht_attr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *stbuf, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ local = frame->local;
+ prev = cookie;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+
+ goto post_unlock;
+ }
+
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+
+ local->op_ret = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ DHT_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno,
+ &local->stbuf, xdata);
+ }
+
+ return 0;
}
int
-dht_attr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *stbuf, dict_t *xdata)
+dht_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_STAT);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (IA_ISREG(loc->inode->ia_type)) {
+ local->call_cnt = 1;
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+ subvol = local->cached_subvol;
- local = frame->local;
- prev = cookie;
+ STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol,
+ subvol->fops->stat, loc, xdata);
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
+ return 0;
+ }
- goto unlock;
- }
+ local->call_cnt = call_cnt = layout->cnt;
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND_COOKIE(frame, dht_attr_cbk, subvol, subvol,
+ subvol->fops->stat, loc, xdata);
+ }
+
+ return 0;
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
-out:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- DHT_STACK_UNWIND (stat, frame, local->op_ret, local->op_errno,
- &local->stbuf, xdata);
- }
err:
- return 0;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
}
int
-dht_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+dht_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_STAT);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FSTAT);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (IA_ISREG(fd->inode->ia_type)) {
+ local->call_cnt = 1;
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ subvol = local->cached_subvol;
- if (IA_ISREG (loc->inode->ia_type)) {
- local->call_cnt = 1;
+ STACK_WIND_COOKIE(frame, dht_file_attr_cbk, subvol, subvol,
+ subvol->fops->fstat, fd, xdata);
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = layout->list[i].xlator;
+ STACK_WIND_COOKIE(frame, dht_attr_cbk, subvol, subvol,
+ subvol->fops->fstat, fd, xdata);
+ }
- subvol = local->cached_subvol;
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL);
- STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->stat, loc, xdata);
+ return 0;
+}
+int
+dht_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iovec *vector, int count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+ xlator_t *src_subvol = 0;
+ xlator_t *dst_subvol = 0;
+
+ local = frame->local;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /* This is already second try, no need for re-check */
+ if (local->call_cnt != 1)
+ goto out;
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
+ if ((op_ret == -1) && !dht_inode_missing(op_errno))
+ goto out;
+
+ local->op_errno = op_errno;
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(stbuf)) {
+ local->op_ret = op_ret;
+ local->rebalance.target_op_fn = dht_readv2;
+ dht_set_local_rebalance(this, local, NULL, NULL, stbuf, xdata);
+ /* File would be migrated to other node */
+ ret = dht_inode_ctx_get_mig_info(this, local->fd->inode, &src_subvol,
+ &dst_subvol);
+
+ if (dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol) ||
+ !dht_fd_open_on_dst(this, local->fd, dst_subvol)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
return 0;
+ } else {
+ /* value is already set in fd_ctx, that means no need
+ to check for whether its complete or not. */
+ dht_readv2(this, dst_subvol, frame, 0);
+ return 0;
}
+ }
- local->call_cnt = call_cnt = layout->cnt;
+out:
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
- for (i = 0; i < call_cnt; i++) {
- subvol = layout->list[i].xlator;
+ DHT_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->stat,
- loc, xdata);
- }
+ return 0;
+}
+static int
+dht_readv2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(readv, frame, local->op_ret, op_errno, NULL, 0,
+ &local->rebalance.postbuf, NULL,
+ local->rebalance.xdata);
return 0;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
+ if (subvol == NULL)
+ goto out;
- return 0;
-}
+ local->call_cnt = 2;
+
+ STACK_WIND(frame, dht_readv_cbk, subvol, subvol->fops->readv, local->fd,
+ local->rebalance.size, local->rebalance.offset,
+ local->rebalance.flags, local->xattr_req);
+ return 0;
+
+out:
+ DHT_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+ return 0;
+}
int
-dht_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+dht_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t off,
+ uint32_t flags, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FSTAT);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- if (IA_ISREG (fd->inode->ia_type)) {
- local->call_cnt = 1;
+ local = dht_local_init(frame, NULL, fd, GF_FOP_READ);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- subvol = local->cached_subvol;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
- STACK_WIND (frame, dht_file_attr_cbk, subvol,
- subvol->fops->fstat, fd, xdata);
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
- return 0;
- }
+ local->rebalance.offset = off;
+ local->rebalance.size = size;
+ local->rebalance.flags = flags;
+ local->call_cnt = 1;
- local->call_cnt = call_cnt = layout->cnt;
+ STACK_WIND(frame, dht_readv_cbk, subvol, subvol->fops->readv, local->fd,
+ local->rebalance.size, local->rebalance.offset,
+ local->rebalance.flags, local->xattr_req);
- for (i = 0; i < call_cnt; i++) {
- subvol = layout->list[i].xlator;
- STACK_WIND (frame, dht_attr_cbk,
- subvol, subvol->fops->fstat,
- fd, xdata);
- }
-
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-int
-dht_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- struct iovec *vector, int count, struct iatt *stbuf,
- struct iobref *iobref, dict_t *xdata)
+static int
+dht_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int ret = 0;
-
- local = frame->local;
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (!prev)
+ goto out;
+ if (local->call_cnt != 1)
+ goto out;
+ if ((op_ret == -1) &&
+ ((op_errno == ENOTCONN) || dht_inode_missing(op_errno)) &&
+ IA_ISDIR(local->loc.inode->ia_type)) {
+ subvol = dht_subvol_next_available(this, prev);
+ if (!subvol)
+ goto out;
- /* This is already second try, no need for re-check */
- if (local->call_cnt != 1)
- goto out;
-
- if ((op_ret == -1) && (op_errno != ENOENT))
- goto out;
-
- if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (stbuf)) {
- /* File would be migrated to other node */
- ret = fd_ctx_get (local->fd, this, NULL);
- if (ret) {
- local->rebalance.target_op_fn = dht_readv2;
- ret = dht_rebalance_complete_check (this, frame);
- } else {
- /* value is already set in fd_ctx, that means no need
- to check for whether its complete or not. */
- dht_readv2 (this, frame, 0);
- }
- if (!ret)
- return 0;
+ /* check if we are done with visiting every node */
+ if (subvol == local->cached_subvol) {
+ goto out;
}
-out:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count, stbuf,
- iobref, xdata);
-
+ STACK_WIND_COOKIE(frame, dht_access_cbk, subvol, subvol,
+ subvol->fops->access, &local->loc,
+ local->rebalance.flags, NULL);
return 0;
+ }
+ if ((op_ret == -1) && dht_inode_missing(op_errno) &&
+ !(IA_ISDIR(local->loc.inode->ia_type))) {
+ /* File would be migrated to other node */
+ local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_access2;
+ ret = dht_rebalance_complete_check(frame->this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STACK_UNWIND(access, frame, op_ret, op_errno, xdata);
+ return 0;
}
-int
-dht_readv2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_access2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- int op_errno = EINVAL;
-
- local = frame->local;
- if (!local)
- goto out;
+ dht_local_t *local = NULL;
+ int op_errno = EINVAL;
- op_errno = local->op_errno;
- if (op_ret == -1)
- goto out;
+ local = frame->local;
+ if (!local)
+ goto out;
- local->call_cnt = 2;
- subvol = local->cached_subvol;
+ op_errno = local->op_errno;
- STACK_WIND (frame, dht_readv_cbk, subvol, subvol->fops->readv,
- local->fd, local->rebalance.size, local->rebalance.offset,
- local->rebalance.flags, NULL);
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(access, frame, -1, op_errno, NULL);
return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2;
+
+ STACK_WIND_COOKIE(frame, dht_access_cbk, subvol, subvol,
+ subvol->fops->access, &local->loc, local->rebalance.flags,
+ local->xattr_req);
+
+ return 0;
out:
- DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
- return 0;
+ DHT_STACK_UNWIND(access, frame, -1, op_errno, NULL);
+ return 0;
}
int
-dht_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off, uint32_t flags, dict_t *xdata)
+dht_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_READ);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_ACCESS);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.flags = mask;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND_COOKIE(frame, dht_access_cbk, subvol, subvol,
+ subvol->fops->access, loc, mask, xdata);
+
+ return 0;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(access, frame, -1, op_errno, NULL);
- local->rebalance.offset = off;
- local->rebalance.size = size;
- local->rebalance.flags = flags;
- local->call_cnt = 1;
+ return 0;
+}
- STACK_WIND (frame, dht_readv_cbk,
- subvol, subvol->fops->readv,
- fd, size, off, flags, xdata);
+int
+dht_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = 0;
+ int ret = 0;
- return 0;
+ local = frame->local;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+ local->op_errno = op_errno;
+
+ if (local->call_cnt != 1)
+ goto out;
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
return 0;
-}
+ }
-int
-dht_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- int ret = -1;
- dht_local_t *local = NULL;
+ local->rebalance.target_op_fn = dht_flush2;
- local = frame->local;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- if (local->call_cnt != 1)
- goto out;
+ /* If context is set, then send flush() it to the destination */
+ dht_inode_ctx_get_mig_info(this, local->fd->inode, NULL, &subvol);
+ if (subvol && dht_fd_open_on_dst(this, local->fd, subvol)) {
+ dht_flush2(this, subvol, frame, 0);
+ return 0;
+ }
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- /* File would be migrated to other node */
- local->rebalance.target_op_fn = dht_access2;
- ret = dht_rebalance_complete_check (frame->this, frame);
- if (!ret)
- return 0;
+ if (op_errno == EREMOTE) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret) {
+ return 0;
}
+ }
out:
- DHT_STACK_UNWIND (access, frame, op_ret, op_errno, xdata);
- return 0;
+ DHT_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata);
+
+ return 0;
}
-int
-dht_access2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_flush2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- int op_errno = EINVAL;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- local = frame->local;
- if (!local)
- goto out;
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
- op_errno = local->op_errno;
- if (op_ret == -1)
- goto out;
+ local = frame->local;
- local->call_cnt = 2;
- subvol = local->cached_subvol;
+ op_errno = local->op_errno;
- STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
- &local->loc, local->rebalance.flags, NULL);
+ if (subvol == NULL)
+ goto out;
- return 0;
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND(frame, dht_flush_cbk, subvol, subvol->fops->flush, local->fd,
+ local->xattr_req);
+
+ return 0;
out:
- DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
- return 0;
+ DHT_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
+ return 0;
}
-
int
-dht_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
- dict_t *xdata)
+dht_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_ACCESS);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- local->rebalance.flags = mask;
- local->call_cnt = 1;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- STACK_WIND (frame, dht_access_cbk, subvol, subvol->fops->access,
- loc, mask, xdata);
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FLUSH);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- return 0;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ local->call_cnt = 1;
+
+ STACK_WIND(frame, dht_flush_cbk, subvol, subvol->fops->flush, fd,
+ local->xattr_req);
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (access, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(flush, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-
int
-dht_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
+dht_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+ xlator_t *src_subvol = 0;
+ xlator_t *dst_subvol = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ local->op_errno = op_errno;
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
+ if (op_ret == -1 && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
+ }
+ goto out;
+ }
- local = frame->local;
+ local->op_ret = op_ret;
+ inode = local->fd->inode;
- local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_fsync2;
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
- if (local->call_cnt != 1)
- goto out;
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
- /* If context is set, then send flush() it to the destination */
- ret = fd_ctx_get (local->fd, this, NULL);
- if (!ret) {
- dht_flush2 (this, frame, 0);
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
+
+ dht_inode_ctx_get_mig_info(this, inode, &src_subvol, &dst_subvol);
+
+ if (dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol) ||
+ !dht_fd_open_on_dst(this, local->fd, dst_subvol)) {
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
return 0;
+ } else {
+ dht_fsync2(this, dst_subvol, frame, 0);
+ return 0;
}
+ }
out:
- DHT_STACK_UNWIND (flush, frame, op_ret, op_errno, xdata);
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
- return 0;
+ DHT_STACK_UNWIND(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
}
-int
-dht_flush2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_fsync2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
-
- local = frame->local;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(fsync, frame, local->op_ret, op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
+ if (subvol == NULL)
+ goto out;
- if (!subvol)
- subvol = local->cached_subvol;
+ local->call_cnt = 2; /* This is the second attempt */
- local->call_cnt = 2; /* This is the second attempt */
+ STACK_WIND_COOKIE(frame, dht_fsync_cbk, subvol, subvol, subvol->fops->fsync,
+ local->fd, local->rebalance.flags, local->xattr_req);
- STACK_WIND (frame, dht_flush_cbk,
- subvol, subvol->fops->flush, local->fd, NULL);
+ return 0;
- return 0;
+out:
+ DHT_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-
int
-dht_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+dht_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FLUSH);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- local->call_cnt = 1;
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FSYNC);
+ if (!local) {
+ op_errno = ENOMEM;
- STACK_WIND (frame, dht_flush_cbk,
- subvol, subvol->fops->flush, fd, xdata);
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
- return 0;
+ local->call_cnt = 1;
+ local->rebalance.flags = datasync;
+
+ subvol = local->cached_subvol;
+
+ STACK_WIND_COOKIE(frame, dht_fsync_cbk, subvol, subvol, subvol->fops->fsync,
+ local->fd, local->rebalance.flags, local->xattr_req);
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fsync, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
-int
-dht_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
- int op_errno, struct iatt *prebuf, struct iatt *postbuf,
- dict_t *xdata)
+/* TODO: for 'lk()' call, we need some other special error, may be ESTALE to
+ indicate that lock migration happened on the fd, so we can consider it as
+ phase 2 of migration */
+static int
+dht_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct gf_flock *flock, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = -1;
+ dht_local_t *local = NULL;
+ int ret = -1;
+ xlator_t *subvol = NULL;
- local = frame->local;
- prev = cookie;
+ local = frame->local;
- local->op_errno = op_errno;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto out;
- }
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
- if (local->call_cnt != 1) {
- if (local->stbuf.ia_blocks) {
- dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
- dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
- }
- goto out;
- }
+ if (local->call_cnt != 1)
+ goto out;
- ret = fd_ctx_get (local->fd, this, NULL);
- if (ret) {
- local->rebalance.target_op_fn = dht_fsync2;
+ local->rebalance.target_op_fn = dht_lk2;
- /* Check if the rebalance phase1 is true */
- if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
- dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
- dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- ret = dht_rebalance_in_progress_check (this, frame);
- }
+ if (xdata)
+ local->rebalance.xdata = dict_ref(xdata);
- /* Check if the rebalance phase2 is true */
- if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
- ret = dht_rebalance_complete_check (this, frame);
- }
+ if (op_errno == EREMOTE) {
+ dht_inode_ctx_get_mig_info(this, local->fd->inode, NULL, &subvol);
+ if (subvol && dht_fd_open_on_dst(this, local->fd, subvol)) {
+ dht_lk2(this, subvol, frame, 0);
+ return 0;
} else {
- dht_fsync2 (this, frame, 0);
- }
- if (!ret)
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret) {
return 0;
+ }
+ }
+ }
out:
- DHT_STRIP_PHASE1_FLAGS (postbuf);
- DHT_STRIP_PHASE1_FLAGS (prebuf);
- DHT_STACK_UNWIND (fsync, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ dht_lk_inode_unref(frame, op_ret);
+ DHT_STACK_UNWIND(lk, frame, op_ret, op_errno, flock, xdata);
- return 0;
+ return 0;
}
-int
-dht_fsync2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_lk2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- local = frame->local;
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
+ local = frame->local;
- if (!subvol)
- subvol = local->cached_subvol;
+ op_errno = local->op_errno;
- local->call_cnt = 2; /* This is the second attempt */
+ if (subvol == NULL)
+ goto out;
- STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
- local->fd, local->rebalance.flags, NULL);
+ local->call_cnt = 2; /* This is the second attempt */
- return 0;
+ STACK_WIND(frame, dht_lk_cbk, subvol, subvol->fops->lk, local->fd,
+ local->rebalance.lock_cmd, &local->rebalance.flock,
+ local->xattr_req);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
+ return 0;
}
int
-dht_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
- dict_t *xdata)
+dht_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ xlator_t *lock_subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_LK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->lock_type = flock->l_type;
+ lock_subvol = dht_get_lock_subvolume(this, flock, local);
+ if (!lock_subvol) {
+ gf_msg_debug(this->name, 0, "no lock subvolume for path=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /*
+ local->cached_subvol = lock_subvol;
+ ret = dht_check_and_open_fd_on_subvol (this, frame);
+ if (ret)
+ goto err;
+ */
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ local->rebalance.flock = *flock;
+ local->rebalance.lock_cmd = cmd;
+
+ local->call_cnt = 1;
+
+ STACK_WIND(frame, dht_lk_cbk, lock_subvol, lock_subvol->fops->lk, fd, cmd,
+ flock, xdata);
+
+ return 0;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
- local = dht_local_init (frame, NULL, fd, GF_FOP_FSYNC);
- if (!local) {
- op_errno = ENOMEM;
+ return 0;
+}
- goto err;
- }
+static int
+dht_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct gf_lease *lease, dict_t *xdata)
+{
+ DHT_STACK_UNWIND(lease, frame, op_ret, op_errno, lease, xdata);
- local->call_cnt = 1;
- local->rebalance.flags = datasync;
+ return 0;
+}
- subvol = local->cached_subvol;
+int
+dht_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
- STACK_WIND (frame, dht_fsync_cbk, subvol, subvol->fops->fsync,
- fd, datasync, xdata);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
- return 0;
+ subvol = dht_subvol_get_cached(this, loc->inode);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* TODO: for rebalance, we need to preserve the fop arguments */
+ STACK_WIND(frame, dht_lease_cbk, subvol, subvol->fops->lease, loc, lease,
+ xdata);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lease, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
-/* TODO: for 'lk()' call, we need some other special error, may be ESTALE to
- indicate that lock migration happened on the fd, so we can consider it as
- phase 2 of migration */
-int
-dht_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct gf_flock *flock, dict_t *xdata)
+/* Symlinks are currently not migrated, so no need for any check here */
+static int
+dht_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, const char *path, struct iatt *stbuf,
+ dict_t *xdata)
{
- DHT_STACK_UNWIND (lk, frame, op_ret, op_errno, flock, xdata);
+ dht_local_t *local = NULL;
- return 0;
-}
+ local = frame->local;
+ if (op_ret == -1)
+ goto err;
+
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+err:
+ DHT_STRIP_PHASE1_FLAGS(stbuf);
+ DHT_STACK_UNWIND(readlink, frame, op_ret, op_errno, path, stbuf, xdata);
+ return 0;
+}
int
-dht_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int cmd, struct gf_flock *flock, dict_t *xdata)
+dht_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
-
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_READLINK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ STACK_WIND(frame, dht_readlink_cbk, subvol, subvol->fops->readlink, loc,
+ size, xdata);
+
+ return 0;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(readlink, frame, -1, op_errno, NULL, NULL, NULL);
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ return 0;
+}
- /* TODO: for rebalance, we need to preserve the fop arguments */
- STACK_WIND (frame, dht_lk_cbk, subvol, subvol->fops->lk, fd,
- cmd, flock, xdata);
+/* Get both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
+ * Use DHT_MODE_IN_XDATA_KEY if available, else fall back to
+ * DHT_IATT_IN_XDATA_KEY
+ * This will return a dummy iatt with only the mode and type set
+ */
+static int
+dht_read_iatt_from_xdata(dict_t *xdata, struct iatt *stbuf)
+{
+ int ret = -1;
+ int32_t mode = 0;
- return 0;
+ ret = dict_get_int32(xdata, DHT_MODE_IN_XDATA_KEY, &mode);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
+ if (ret) {
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
+ } else {
+ stbuf->ia_prot = ia_prot_from_st_mode(mode);
+ stbuf->ia_type = ia_type_from_st_mode(mode);
+ }
- return 0;
+ return ret;
}
-/* Symlinks are currently not migrated, so no need for any check here */
int
-dht_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, const char *path,
- struct iatt *stbuf, dict_t *xdata)
+dht_common_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *call_frame = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src_subvol = NULL;
+ xlator_t *dst_subvol = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ int ret = -1;
+ inode_t *inode = NULL;
+
+ local = frame->local;
+ call_frame = cookie;
+ prev = call_frame->this;
+
+ local->op_errno = op_errno;
+
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1.",
+ prev->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1)
+ goto out;
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+
+ ret = dht_read_iatt_from_xdata(xdata, &stbuf);
+
+ if ((!op_ret) && (ret)) {
+ /* This is a potential problem and can cause corruption
+ * with sharding.
+ * Oh well. We tried.
+ */
+ goto out;
+ }
+
+ local->op_ret = op_ret;
+ local->rebalance.target_op_fn = dht_common_xattrop2;
+ if (xdata)
+ local->rebalance.xdata = dict_ref(xdata);
- local = frame->local;
- if (op_ret == -1)
- goto err;
+ if (dict)
+ local->rebalance.dict = dict_ref(dict);
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(&stbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(&stbuf)) {
+ inode = local->loc.inode ? local->loc.inode : local->fd->inode;
+ dht_inode_ctx_get_mig_info(this, inode, &src_subvol, &dst_subvol);
+
+ if (dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol) ||
+ !dht_fd_open_on_dst(this, local->fd, dst_subvol)) {
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ } else {
+ dht_common_xattrop2(this, dst_subvol, frame, 0);
+ return 0;
}
+ }
-err:
- DHT_STRIP_PHASE1_FLAGS (stbuf);
- DHT_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, stbuf, xdata);
+out:
+ if (local->fop == GF_FOP_XATTROP) {
+ DHT_STACK_UNWIND(xattrop, frame, op_ret, op_errno, dict, xdata);
+ } else {
+ DHT_STACK_UNWIND(fxattrop, frame, op_ret, op_errno, dict, xdata);
+ }
- return 0;
+ return 0;
}
-
-int
-dht_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
- dict_t *xdata)
+static int
+dht_common_xattrop2(xlator_t *this, xlator_t *subvol, call_frame_t *frame,
+ int ret)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_READLINK);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ if (local->fop == GF_FOP_XATTROP) {
+ DHT_STACK_UNWIND(xattrop, frame, local->op_ret, op_errno,
+ local->rebalance.dict, local->rebalance.xdata);
+ } else {
+ DHT_STACK_UNWIND(fxattrop, frame, local->op_ret, op_errno,
+ local->rebalance.dict, local->rebalance.xdata);
}
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ return 0;
+ }
- STACK_WIND (frame, dht_readlink_cbk,
- subvol, subvol->fops->readlink,
- loc, size, xdata);
+ if (subvol == NULL)
+ goto out;
- return 0;
+ local->call_cnt = 2; /* This is the second attempt */
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (readlink, frame, -1, op_errno, NULL, NULL, NULL);
+ if (local->fop == GF_FOP_XATTROP) {
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol, subvol->fops->xattrop,
+ &local->loc, local->rebalance.flags, local->rebalance.xattr,
+ local->xattr_req);
+ } else {
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol,
+ subvol->fops->fxattrop, local->fd, local->rebalance.flags,
+ local->rebalance.xattr, local->xattr_req);
+ }
- return 0;
-}
+ return 0;
-/* Currently no translators on top of 'distribute' will be using
- * below fops, hence not implementing 'migration' related checks
- */
+out:
-int
-dht_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+ /* If local is unavailable we could be unwinding the wrong
+ * function here */
+
+ if (local && (local->fop == GF_FOP_XATTROP)) {
+ DHT_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL);
+ } else {
+ DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
+ }
+ return 0;
+}
+
+static int
+dht_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- DHT_STACK_UNWIND (xattrop, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ DHT_STACK_UNWIND(xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
+/* Set both DHT_IATT_IN_XDATA_KEY and DHT_MODE_IN_XDATA_KEY
+ * Use DHT_MODE_IN_XDATA_KEY if available. Else fall back to
+ * DHT_IATT_IN_XDATA_KEY
+ */
+static int
+dht_request_iatt_in_xdata(dict_t *xattr_req)
+{
+ int ret = -1;
+
+ ret = dict_set_int8(xattr_req, DHT_MODE_IN_XDATA_KEY, 1);
+ ret = dict_set_int8(xattr_req, DHT_IATT_IN_XDATA_KEY, 1);
+
+ /* At least one call succeeded */
+ return ret;
+}
int
-dht_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+dht_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_XATTROP);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_XATTROP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for gfid=%s",
+ uuid_utoa(loc->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /* Todo : Handle dirs as well. At the moment the only xlator above dht
+ * that uses xattrop is sharding and that is only for files */
+
+ if (IA_ISDIR(loc->inode->ia_type)) {
+ STACK_WIND(frame, dht_xattrop_cbk, subvol, subvol->fops->xattrop, loc,
+ flags, dict, xdata);
+
+ } else {
+ local->xattr_req = xdata ? dict_ref(xdata) : dict_new();
+ local->call_cnt = 1;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local->rebalance.xattr = dict_ref(dict);
+ local->rebalance.flags = flags;
- local->call_cnt = 1;
+ ret = dht_request_iatt_in_xdata(local->xattr_req);
+
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Failed to set dictionary key %s file=%s",
+ DHT_IATT_IN_XDATA_KEY, loc->path);
+ }
- STACK_WIND (frame,
- dht_xattrop_cbk,
- subvol, subvol->fops->xattrop,
- loc, flags, dict, xdata);
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol, subvol->fops->xattrop,
+ loc, local->rebalance.flags, local->rebalance.xattr,
+ local->xattr_req);
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (xattrop, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-
-int
-dht_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+static int
+dht_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- DHT_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ DHT_STACK_UNWIND(fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
-
int
-dht_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+dht_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ subvol = dht_subvol_get_cached(this, fd->inode);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FXATTROP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /* Todo : Handle dirs as well. At the moment the only xlator above dht
+ * that uses xattrop is sharding and that is only for files */
+
+ if (IA_ISDIR(fd->inode->ia_type)) {
+ STACK_WIND(frame, dht_fxattrop_cbk, subvol, subvol->fops->fxattrop, fd,
+ flags, dict, xdata);
+
+ } else {
+ local->xattr_req = xdata ? dict_ref(xdata) : dict_new();
+ local->call_cnt = 1;
+
+ local->rebalance.xattr = dict_ref(dict);
+ local->rebalance.flags = flags;
+
+ ret = dht_request_iatt_in_xdata(local->xattr_req);
+
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to set dictionary key %s fd=%p",
+ DHT_IATT_IN_XDATA_KEY, fd);
}
- STACK_WIND (frame,
- dht_fxattrop_cbk,
- subvol, subvol->fops->fxattrop,
- fd, flags, dict, xdata);
+ STACK_WIND(frame, dht_common_xattrop_cbk, subvol,
+ subvol->fops->fxattrop, fd, local->rebalance.flags,
+ local->rebalance.xattr, local->xattr_req);
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fxattrop, frame, -1, op_errno, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fxattrop, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
+/* Currently no translators on top of 'distribute' will be using
+ * below fops, hence not implementing 'migration' related checks
+ */
-int
-dht_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+static int
+dht_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (inodelk, frame, op_ret, op_errno, xdata);
- return 0;
+ dht_lk_inode_unref(frame, op_ret);
+ DHT_STACK_UNWIND(inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-
int32_t
-dht_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+dht_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
+ xlator_t *lock_subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
- local = dht_local_init (frame, loc, NULL, GF_FOP_INODELK);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ local = dht_local_init(frame, loc, NULL, GF_FOP_INODELK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local->lock_type = lock->l_type;
+ lock_subvol = dht_get_lock_subvolume(this, lock, local);
+ if (!lock_subvol) {
+ gf_msg_debug(this->name, 0, "no lock subvolume for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
- local->call_cnt = 1;
+ local->call_cnt = 1;
- STACK_WIND (frame,
- dht_inodelk_cbk,
- subvol, subvol->fops->inodelk,
- volume, loc, cmd, lock, xdata);
+ STACK_WIND(frame, dht_inodelk_cbk, lock_subvol, lock_subvol->fops->inodelk,
+ volume, loc, cmd, lock, xdata);
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (inodelk, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(inodelk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-
int
-dht_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+dht_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- DHT_STACK_UNWIND (finodelk, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ dht_local_t *local = NULL;
+ int ret = 0;
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
-int
-dht_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
-{
- xlator_t *subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- subvol = dht_subvol_get_cached (this, fd->inode);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ local = frame->local;
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- STACK_WIND (frame, dht_finodelk_cbk, subvol, subvol->fops->finodelk,
- volume, fd, cmd, lock, xdata);
+out:
+ dht_lk_inode_unref(frame, op_ret);
+ DHT_STACK_UNWIND(finodelk, frame, op_ret, op_errno, xdata);
- return 0;
+ return 0;
+}
+
+int
+dht_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ xlator_t *lock_subvol = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_INODELK);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->call_cnt = 1;
+ local->lock_type = lock->l_type;
+
+ lock_subvol = dht_get_lock_subvolume(this, lock, local);
+ if (!lock_subvol) {
+ gf_msg_debug(this->name, 0, "no lock subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ /*
+ local->cached_subvol = lock_subvol;
+ ret = dht_check_and_open_fd_on_subvol (this, frame);
+ if (ret)
+ goto err;
+ */
+ local->rebalance.flock = *lock;
+ local->rebalance.lock_cmd = cmd;
+ local->key = gf_strdup(volume);
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND(frame, dht_finodelk_cbk, lock_subvol,
+ lock_subvol->fops->finodelk, volume, fd, cmd, lock, xdata);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (finodelk, frame, -1, op_errno, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(finodelk, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
diff --git a/xlators/cluster/dht/src/dht-inode-write.c b/xlators/cluster/dht/src/dht-inode-write.c
index d4a3ecc391c..2f23ce90fbd 100644
--- a/xlators/cluster/dht/src/dht-inode-write.c
+++ b/xlators/cluster/dht/src/dht-inode-write.c
@@ -8,605 +8,1397 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "dht-common.h"
-int dht_writev2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_truncate2 (xlator_t *this, call_frame_t *frame, int ret);
-int dht_setattr2 (xlator_t *this, call_frame_t *frame, int ret);
+static int
+dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
+static int
+dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret);
int
-dht_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+dht_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol1 = NULL;
+ xlator_t *subvol2 = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ /* writev fails with EBADF if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could be a valid bad fd error.
+ */
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- if (op_ret == -1) {
- goto out;
+ if (op_ret == -1 && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug(this->name, 0, "subvolume %s returned -1 (%s)", prev->name,
+ strerror(op_errno));
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ /* preserve the modes of source */
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
}
+ goto out;
+ }
- local = frame->local;
- if (!local) {
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ local->rebalance.target_op_fn = dht_writev2;
- if (local->call_cnt != 1) {
- /* preserve the modes of source */
- if (local->stbuf.ia_blocks) {
- dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
- dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
- }
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ /* We might need to pass the stbuf information to the higher DHT
+ * layer for appropriate handling.
+ */
+
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ if (!local->xattr_req) {
+ local->xattr_req = dict_new();
+ if (!local->xattr_req) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
+ "insufficient memory");
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
goto out;
+ }
}
- local->rebalance.target_op_fn = dht_writev2;
-
- /* Phase 2 of migration */
- if (IS_DHT_MIGRATION_PHASE2 (postbuf)) {
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
+ ret = dict_set_uint32(local->xattr_req, GF_PROTECT_FROM_EXTERNAL_WRITES,
+ 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_DICT_SET_FAILED, 0,
+ "Failed to set key %s in dictionary",
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
+ local->op_errno = ENOMEM;
+ local->op_ret = -1;
+ goto out;
}
- /* Check if the rebalance phase1 is true */
- if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
- dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
- dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
- ret = fd_ctx_get (local->fd, this, NULL);
- if (!ret) {
- dht_writev2 (this, frame, 0);
- return 0;
- }
- ret = dht_rebalance_in_progress_check (this, frame);
- if (!ret)
- return 0;
+ ret = dht_inode_ctx_get_mig_info(this, local->fd->inode, &subvol1,
+ &subvol2);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) {
+ if (dht_fd_open_on_dst(this, local->fd, subvol2)) {
+ dht_writev2(this, subvol2, frame, 0);
+ return 0;
+ }
}
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
out:
- DHT_STRIP_PHASE1_FLAGS (postbuf);
- DHT_STRIP_PHASE1_FLAGS (prebuf);
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
- DHT_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ DHT_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
- return 0;
+ return 0;
}
-int
-dht_writev2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_writev2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if ((frame == NULL) || (frame->local == NULL))
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(writev, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
- local = frame->local;
+ if (subvol == NULL)
+ goto out;
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
+ local->call_cnt = 2; /* This is the second attempt */
- if (!subvol)
- subvol = local->cached_subvol;
+ STACK_WIND_COOKIE(frame, dht_writev_cbk, subvol, subvol,
+ subvol->fops->writev, local->fd, local->rebalance.vector,
+ local->rebalance.count, local->rebalance.offset,
+ local->rebalance.flags, local->rebalance.iobref,
+ local->xattr_req);
- local->call_cnt = 2; /* This is the second attempt */
+ return 0;
- STACK_WIND (frame, dht_writev_cbk,
- subvol, subvol->fops->writev,
- local->fd, local->rebalance.vector, local->rebalance.count,
- local->rebalance.offset, local->rebalance.flags,
- local->rebalance.iobref, NULL);
+out:
+ DHT_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
int
-dht_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int count, off_t off, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
+dht_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int count, off_t off, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_WRITE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ local->rebalance.vector = iov_dup(vector, count);
+ local->rebalance.offset = off;
+ local->rebalance.count = count;
+ local->rebalance.flags = flags;
+ local->rebalance.iobref = iobref_ref(iobref);
+ local->call_cnt = 1;
+
+ STACK_WIND_COOKIE(frame, dht_writev_cbk, subvol, subvol,
+ subvol->fops->writev, fd, local->rebalance.vector,
+ local->rebalance.count, local->rebalance.offset,
+ local->rebalance.flags, local->rebalance.iobref,
+ local->xattr_req);
+
+ return 0;
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
- local = dht_local_init (frame, NULL, fd, GF_FOP_WRITE);
- if (!local) {
+ return 0;
+}
- op_errno = ENOMEM;
- goto err;
- }
+int
+dht_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ xlator_t *src_subvol = NULL;
+ xlator_t *dst_subvol = NULL;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ /* Needs to be checked only for ftruncate.
+ * ftruncate fails with EBADF/EINVAL if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+
+ if ((local->fop == GF_FOP_FTRUNCATE) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
}
+ goto out;
+ }
+ local->rebalance.target_op_fn = dht_truncate2;
- local->rebalance.vector = iov_dup (vector, count);
- local->rebalance.offset = off;
- local->rebalance.count = count;
- local->rebalance.flags = flags;
- local->rebalance.iobref = iobref_ref (iobref);
- local->call_cnt = 1;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- STACK_WIND (frame, dht_writev_cbk,
- subvol, subvol->fops->writev,
- fd, vector, count, off, flags, iobref, xdata);
+ /* We might need to pass the stbuf information to the higher DHT
+ * layer for appropriate handling.
+ */
- return 0;
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
+
+ inode = (local->fd) ? local->fd->inode : local->loc.inode;
+
+ dht_inode_ctx_get_mig_info(this, inode, &src_subvol, &dst_subvol);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol)) {
+ if ((!local->fd) ||
+ ((local->fd) &&
+ dht_fd_open_on_dst(this, local->fd, dst_subvol))) {
+ dht_truncate2(this, dst_subvol, frame, 0);
+ return 0;
+ }
+ }
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
+ DHT_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, xdata);
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+static int
+dht_truncate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto out;
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ /* This dht xlator is not migrating the file */
+ if (we_are_not_migrating(ret)) {
+ DHT_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_TRUNCATE) {
+ STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol,
+ subvol->fops->truncate, &local->loc,
+ local->rebalance.offset, local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol,
+ subvol->fops->ftruncate, local->fd,
+ local->rebalance.offset, local->xattr_req);
+ }
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
+int
+dht_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_TRUNCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for gfid=%s",
+ uuid_utoa(loc->inode->gfid));
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol,
+ subvol->fops->truncate, loc, offset, xdata);
+
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
int
-dht_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+dht_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = -1;
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FTRUNCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.offset = offset;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND_COOKIE(frame, dht_truncate_cbk, subvol, subvol,
+ subvol->fops->ftruncate, fd, local->rebalance.offset,
+ local->xattr_req);
+ return 0;
- GF_VALIDATE_OR_GOTO ("dht", frame, err);
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", frame->local, out);
- GF_VALIDATE_OR_GOTO ("dht", cookie, out);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
- local = frame->local;
- prev = cookie;
+ return 0;
+}
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- local->op_errno = op_errno;
- local->op_ret = -1;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
+int
+dht_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ xlator_t *src_subvol = NULL;
+ xlator_t *dst_subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ /* fallocate fails with EBADF if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- goto out;
- }
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
- if (local->call_cnt != 1) {
- if (local->stbuf.ia_blocks) {
- dht_iatt_merge (this, postbuf, &local->stbuf, NULL);
- dht_iatt_merge (this, prebuf, &local->prebuf, NULL);
- }
- goto out;
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
}
+ goto out;
+ }
- local->rebalance.target_op_fn = dht_truncate2;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ local->rebalance.target_op_fn = dht_fallocate2;
- /* Phase 2 of migration */
- if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
- }
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
- /* Check if the rebalance phase1 is true */
- if (IS_DHT_MIGRATION_PHASE1 (postbuf)) {
- dht_iatt_merge (this, &local->stbuf, postbuf, NULL);
- dht_iatt_merge (this, &local->prebuf, prebuf, NULL);
- ret = fd_ctx_get (local->fd, this, NULL);
- if (!ret) {
- dht_truncate2 (this, frame, 0);
- return 0;
- }
- ret = dht_rebalance_in_progress_check (this, frame);
- if (!ret)
- return 0;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
+
+ dht_inode_ctx_get_mig_info(this, local->fd->inode, &src_subvol,
+ &dst_subvol);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol)) {
+ if (dht_fd_open_on_dst(this, local->fd, dst_subvol)) {
+ dht_fallocate2(this, dst_subvol, frame, 0);
+ return 0;
+ }
}
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
out:
- DHT_STRIP_PHASE1_FLAGS (postbuf);
- DHT_STRIP_PHASE1_FLAGS (prebuf);
- DHT_STACK_UNWIND (truncate, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
+
+ DHT_STACK_UNWIND(fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
err:
- return 0;
+ return 0;
}
-
-int
-dht_truncate2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_fallocate2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(fallocate, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
- local = frame->local;
+ if (subvol == NULL)
+ goto out;
- if (local->fd)
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
+ local->call_cnt = 2; /* This is the second attempt */
- if (!subvol)
- subvol = local->cached_subvol;
+ STACK_WIND_COOKIE(frame, dht_fallocate_cbk, subvol, subvol,
+ subvol->fops->fallocate, local->fd,
+ local->rebalance.flags, local->rebalance.offset,
+ local->rebalance.size, local->xattr_req);
- local->call_cnt = 2; /* This is the second attempt */
+ return 0;
- if (local->fop == GF_FOP_TRUNCATE) {
- STACK_WIND (frame, dht_truncate_cbk, subvol,
- subvol->fops->truncate, &local->loc,
- local->rebalance.offset, NULL);
- } else {
- STACK_WIND (frame, dht_truncate_cbk, subvol,
- subvol->fops->ftruncate, local->fd,
- local->rebalance.offset, NULL);
- }
+out:
+ DHT_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
- return 0;
+int
+dht_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FALLOCATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->rebalance.flags = mode;
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND_COOKIE(frame, dht_fallocate_cbk, subvol, subvol,
+ subvol->fops->fallocate, fd, local->rebalance.flags,
+ local->rebalance.offset, local->rebalance.size,
+ local->xattr_req);
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
}
int
-dht_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
+dht_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_TRUNCATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ xlator_t *src_subvol = NULL;
+ xlator_t *dst_subvol = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ /* discard fails with EBADF if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- local->rebalance.offset = offset;
- local->call_cnt = 1;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ local->op_errno = op_errno;
+ local->op_ret = -1;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
}
+ goto out;
+ }
- STACK_WIND (frame, dht_truncate_cbk,
- subvol, subvol->fops->truncate,
- loc, offset, xdata);
+ local->rebalance.target_op_fn = dht_discard2;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- return 0;
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
+
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
+
+ dht_inode_ctx_get_mig_info(this, local->fd->inode, &src_subvol,
+ &dst_subvol);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, src_subvol,
+ dst_subvol)) {
+ if (dht_fd_open_on_dst(this, local->fd, dst_subvol)) {
+ dht_discard2(this, dst_subvol, frame, 0);
+ return 0;
+ }
+ }
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+out:
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
+ DHT_STACK_UNWIND(discard, frame, op_ret, op_errno, prebuf, postbuf, xdata);
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+static int
+dht_discard2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(discard, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND_COOKIE(frame, dht_discard_cbk, subvol, subvol,
+ subvol->fops->discard, local->fd, local->rebalance.offset,
+ local->rebalance.size, local->xattr_req);
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
int
-dht_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
+dht_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- int op_errno = -1;
- dht_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FTRUNCATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- local->rebalance.offset = offset;
- local->call_cnt = 1;
- subvol = local->cached_subvol;
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no cached subvolume for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- STACK_WIND (frame, dht_truncate_cbk,
- subvol, subvol->fops->ftruncate,
- fd, offset, xdata);
+ local = dht_local_init(frame, NULL, fd, GF_FOP_DISCARD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- return 0;
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
+
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND_COOKIE(frame, dht_discard_cbk, subvol, subvol,
+ subvol->fops->discard, fd, local->rebalance.offset,
+ local->rebalance.size, local->xattr_req);
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-/* handle cases of migration here for 'setattr()' calls */
int
-dht_file_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+dht_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = -1;
-
- local = frame->local;
- prev = cookie;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
+ xlator_t *subvol1 = NULL, *subvol2 = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+ GF_VALIDATE_OR_GOTO("dht", cookie, out);
+
+ local = frame->local;
+ prev = cookie;
+
+ /* zerofill fails with EBADF if dht has not yet opened the fd
+ * on the cached subvol. This could happen if the file was migrated
+ * and a lookup updated the cached subvol in the inode ctx.
+ * We only check once as this could actually be a valid error.
+ */
+ if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
local->op_errno = op_errno;
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto out;
+ local->op_ret = -1;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
+
+ if (local->call_cnt != 1) {
+ if (local->stbuf.ia_blocks) {
+ dht_iatt_merge(this, postbuf, &local->stbuf);
+ dht_iatt_merge(this, prebuf, &local->prebuf);
}
+ goto out;
+ }
- if (local->call_cnt != 1)
- goto out;
+ local->rebalance.target_op_fn = dht_zerofill2;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- local->rebalance.target_op_fn = dht_setattr2;
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
- /* Phase 2 of migration */
- if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2 (postbuf)) {
- ret = dht_rebalance_complete_check (this, frame);
- if (!ret)
- return 0;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
+ /* Check if the rebalance phase1 is true */
+ if (IS_DHT_MIGRATION_PHASE1(postbuf)) {
+ dht_iatt_merge(this, &local->stbuf, postbuf);
+ dht_iatt_merge(this, &local->prebuf, prebuf);
+
+ ret = dht_inode_ctx_get_mig_info(this, local->fd->inode, &subvol1,
+ &subvol2);
+ if (!dht_mig_info_is_invalid(local->cached_subvol, subvol1, subvol2)) {
+ if (dht_fd_open_on_dst(this, local->fd, subvol2)) {
+ dht_zerofill2(this, subvol2, frame, 0);
+ return 0;
+ }
}
- /* At the end of the migration process, whatever 'attr' we
- have on source file will be migrated to destination file
- in one shot, hence we don't need to check for in progress
- state here (ie, PHASE1) */
+ ret = dht_rebalance_in_progress_check(this, frame);
+ if (!ret)
+ return 0;
+ }
+
out:
- DHT_STRIP_PHASE1_FLAGS (postbuf);
- DHT_STRIP_PHASE1_FLAGS (prebuf);
- DHT_STACK_UNWIND (setattr, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
- return 0;
+ DHT_STACK_UNWIND(zerofill, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+err:
+ return 0;
}
-int
-dht_setattr2 (xlator_t *this, call_frame_t *frame, int op_ret)
+static int
+dht_zerofill2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
{
- dht_local_t *local = NULL;
- xlator_t *subvol = NULL;
- uint64_t tmp_subvol = 0;
- int ret = -1;
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- local = frame->local;
+ if (!frame || !frame->local)
+ goto out;
- if (local->fd)
- ret = fd_ctx_get (local->fd, this, &tmp_subvol);
- if (!ret)
- subvol = (xlator_t *)(long)tmp_subvol;
-
- if (!subvol)
- subvol = local->cached_subvol;
-
- local->call_cnt = 2; /* This is the second attempt */
-
- if (local->fop == GF_FOP_SETATTR) {
- STACK_WIND (frame, dht_file_setattr_cbk, subvol,
- subvol->fops->setattr, &local->loc,
- &local->rebalance.stbuf, local->rebalance.flags,
- NULL);
- } else {
- STACK_WIND (frame, dht_file_setattr_cbk, subvol,
- subvol->fops->fsetattr, local->fd,
- &local->rebalance.stbuf, local->rebalance.flags,
- NULL);
- }
+ local = frame->local;
+
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(zerofill, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
return 0;
-}
+ }
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ STACK_WIND_COOKIE(frame, dht_zerofill_cbk, subvol, subvol,
+ subvol->fops->zerofill, local->fd,
+ local->rebalance.offset, local->rebalance.size,
+ local->xattr_req);
+
+ return 0;
+
+out:
+
+ DHT_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
-/* Keep the existing code same for all the cases other than regular file */
int
-dht_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+dht_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
-
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_DEBUG,
- "subvolume %s returned -1 (%s)",
- prev->this->name, strerror (op_errno));
- goto unlock;
- }
+ xlator_t *subvol = NULL;
+ int op_errno = -1;
+ dht_local_t *local = NULL;
- dht_iatt_merge (this, &local->prebuf, statpre, prev->this);
- dht_iatt_merge (this, &local->stbuf, statpost, prev->this);
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
- local->op_ret = 0;
- }
-unlock:
- UNLOCK (&frame->lock);
+ local = dht_local_init(frame, NULL, fd, GF_FOP_ZEROFILL);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt))
- DHT_STACK_UNWIND (setattr, frame, local->op_ret, local->op_errno,
- &local->prebuf, &local->stbuf, xdata);
+ local->rebalance.offset = offset;
+ local->rebalance.size = len;
- return 0;
-}
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ STACK_WIND_COOKIE(frame, dht_zerofill_cbk, subvol, subvol,
+ subvol->fops->zerofill, fd, local->rebalance.offset,
+ local->rebalance.size, local->xattr_req);
+ return 0;
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+/* handle cases of migration here for 'setattr()' calls */
int
-dht_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+dht_file_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- xlator_t *subvol = NULL;
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
- int call_cnt = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_SETATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int ret = -1;
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local = frame->local;
+ prev = cookie;
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for path=%s", loc->path);
- op_errno = EINVAL;
- goto err;
- }
+ local->op_errno = op_errno;
- if (IA_ISREG (loc->inode->ia_type)) {
- /* in the regular file _cbk(), we need to check for
- migration possibilities */
- local->rebalance.stbuf = *stbuf;
- local->rebalance.flags = valid;
- local->call_cnt = 1;
- subvol = local->cached_subvol;
+ if ((local->fop == GF_FOP_FSETATTR) &&
+ dht_check_remote_fd_failed_error(local, op_ret, op_errno)) {
+ ret = dht_check_and_open_fd_on_subvol(this, frame);
+ if (ret)
+ goto out;
+ return 0;
+ }
- STACK_WIND (frame, dht_file_setattr_cbk, subvol,
- subvol->fops->setattr,
- loc, stbuf, valid, xdata);
+ if ((op_ret == -1) && !dht_inode_missing(op_errno)) {
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
- return 0;
- }
+ if (local->call_cnt != 1)
+ goto out;
- local->call_cnt = call_cnt = layout->cnt;
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_setattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->setattr,
- loc, stbuf, valid, xdata);
- }
+ local->rebalance.target_op_fn = dht_setattr2;
- return 0;
+ /* Phase 2 of migration */
+ if ((op_ret == -1) || IS_DHT_MIGRATION_PHASE2(postbuf)) {
+ dht_set_local_rebalance(this, local, NULL, prebuf, postbuf, xdata);
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
+ ret = dht_rebalance_complete_check(this, frame);
+ if (!ret)
+ return 0;
+ }
- return 0;
+ /* At the end of the migration process, whatever 'attr' we
+ have on source file will be migrated to destination file
+ in one shot, hence we don't need to check for in progress
+ state here (ie, PHASE1) */
+out:
+ DHT_STRIP_PHASE1_FLAGS(postbuf);
+ DHT_STRIP_PHASE1_FLAGS(prebuf);
+
+ DHT_STACK_UNWIND(setattr, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
}
+static int
+dht_setattr2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret)
+{
+ dht_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ if (!frame || !frame->local)
+ goto out;
+
+ local = frame->local;
+ op_errno = local->op_errno;
+
+ if (we_are_not_migrating(ret)) {
+ /* This dht xlator is not migrating the file. Unwind and
+ * pass on the original mode bits so the higher DHT layer
+ * can handle this.
+ */
+ DHT_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
+ &local->rebalance.prebuf, &local->rebalance.postbuf,
+ local->rebalance.xdata);
+ return 0;
+ }
+
+ if (subvol == NULL)
+ goto out;
+
+ local->call_cnt = 2; /* This is the second attempt */
+
+ if (local->fop == GF_FOP_SETATTR) {
+ STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol,
+ subvol->fops->setattr, &local->loc,
+ &local->rebalance.stbuf, local->rebalance.flags,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol,
+ subvol->fops->fsetattr, local->fd,
+ &local->rebalance.stbuf, local->rebalance.flags,
+ local->xattr_req);
+ }
+
+ return 0;
+
+out:
+ DHT_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+/* Keep the existing code same for all the cases other than regular file */
int
-dht_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
+dht_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
{
- xlator_t *subvol = NULL;
- dht_layout_t *layout = NULL;
- dht_local_t *local = NULL;
- int op_errno = -1;
- int i = -1;
- int call_cnt = 0;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- local = dht_local_init (frame, NULL, fd, GF_FOP_FSETATTR);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no layout for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
- }
+ local = frame->local;
+ prev = cookie;
- if (!layout_is_sane (layout)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "layout is not sane for fd=%p", fd);
- op_errno = EINVAL;
- goto err;
+ LOCK(&frame->lock);
+ {
+ if (op_ret == -1) {
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto post_unlock;
}
- if (IA_ISREG (fd->inode->ia_type)) {
- /* in the regular file _cbk(), we need to check for
- migration possibilities */
- local->rebalance.stbuf = *stbuf;
- local->rebalance.flags = valid;
- local->call_cnt = 1;
- subvol = local->cached_subvol;
+ dht_iatt_merge(this, &local->prebuf, statpre);
+ dht_iatt_merge(this, &local->stbuf, statpost);
+
+ local->op_ret = 0;
+ local->op_errno = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ if (local->op_ret == 0)
+ dht_inode_ctx_time_set(local->loc.inode, this, &local->stbuf);
+ DHT_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->stbuf, xdata);
+ }
+
+ return 0;
+}
+
+/* Keep the existing code same for all the cases other than regular file */
+int
+dht_non_mds_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ gf_msg(this->name, op_errno, 0, 0, "subvolume %s returned -1",
+ prev->name);
+ goto post_unlock;
+ }
+
+ LOCK(&frame->lock);
+ {
+ dht_iatt_merge(this, &local->prebuf, statpre);
+ dht_iatt_merge(this, &local->stbuf, statpost);
+
+ local->op_ret = 0;
+ local->op_errno = 0;
+ }
+ UNLOCK(&frame->lock);
+post_unlock:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ dht_inode_ctx_time_set(local->loc.inode, this, &local->stbuf);
+ DHT_STACK_UNWIND(setattr, frame, 0, 0, &local->prebuf, &local->stbuf,
+ xdata);
+ }
+
+ return 0;
+}
- STACK_WIND (frame, dht_file_setattr_cbk, subvol,
- subvol->fops->fsetattr,
- fd, stbuf, valid, xdata);
+int
+dht_mds_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
- return 0;
+{
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *mds_subvol = NULL;
+ struct iatt loc_stbuf = {
+ 0,
+ };
+ int i = 0;
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+ mds_subvol = local->mds_subvol;
+
+ if (op_ret == -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ gf_msg_debug(this->name, op_errno, "subvolume %s returned -1",
+ prev->name);
+ goto out;
+ }
+
+ local->op_ret = 0;
+ loc_stbuf = local->stbuf;
+ dht_iatt_merge(this, &local->prebuf, statpre);
+ dht_iatt_merge(this, &local->stbuf, statpost);
+
+ local->call_cnt = conf->subvolume_cnt - 1;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (mds_subvol == conf->subvolumes[i])
+ continue;
+ STACK_WIND_COOKIE(frame, dht_non_mds_setattr_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->setattr, &local->loc,
+ &loc_stbuf, local->valid, local->xattr_req);
+ }
+
+ return 0;
+out:
+ DHT_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->stbuf, xdata);
+
+ return 0;
+}
+
+int
+dht_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ xlator_t *mds_subvol = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int ret = -1;
+ int call_cnt = 0;
+ dht_conf_t *conf = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+
+ conf = this->private;
+ local = dht_local_init(frame, loc, NULL, GF_FOP_SETATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "no layout for path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane(layout)) {
+ gf_msg_debug(this->name, 0, "layout is not sane for path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (IA_ISREG(loc->inode->ia_type)) {
+ /* in the regular file _cbk(), we need to check for
+ migration possibilities */
+ local->rebalance.stbuf = *stbuf;
+ local->rebalance.flags = valid;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+
+ STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol,
+ subvol->fops->setattr, loc, stbuf, valid, xdata);
+
+ return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ if (IA_ISDIR(loc->inode->ia_type) && !__is_root_gfid(loc->inode->gfid) &&
+ call_cnt != 1) {
+ ret = dht_inode_ctx_mdsvol_get(loc->inode, this, &mds_subvol);
+ if (ret || !mds_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get mds subvol for path %s", local->loc.path);
+ op_errno = EINVAL;
+ goto err;
}
- local->call_cnt = call_cnt = layout->cnt;
+ local->mds_subvol = mds_subvol;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg(this->name, GF_LOG_WARNING, layout->list[i].err,
+ DHT_MSG_HASHED_SUBVOL_DOWN,
+ "MDS subvol is down for path "
+ " %s Unable to set attr ",
+ local->loc.path);
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ }
+ }
+ local->valid = valid;
+ local->stbuf = *stbuf;
+ STACK_WIND_COOKIE(frame, dht_mds_setattr_cbk, local->mds_subvol,
+ local->mds_subvol, local->mds_subvol->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+ } else {
for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_setattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->fsetattr,
- fd, stbuf, valid, xdata);
+ STACK_WIND_COOKIE(frame, dht_setattr_cbk, layout->list[i].xlator,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->setattr, loc, stbuf,
+ valid, xdata);
}
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+dht_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ xlator_t *subvol = NULL;
+ dht_layout_t *layout = NULL;
+ dht_local_t *local = NULL;
+ int op_errno = -1;
+ int i = -1;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ local = dht_local_init(frame, NULL, fd, GF_FOP_FSETATTR);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0, "no layout for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (!layout_is_sane(layout)) {
+ gf_msg_debug(this->name, 0, "layout is not sane for fd=%p", fd);
+ op_errno = EINVAL;
+ goto err;
+ }
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (IA_ISREG(fd->inode->ia_type)) {
+ /* in the regular file _cbk(), we need to check for
+ migration possibilities */
+ local->rebalance.stbuf = *stbuf;
+ local->rebalance.flags = valid;
+ local->call_cnt = 1;
+ subvol = local->cached_subvol;
+ STACK_WIND_COOKIE(frame, dht_file_setattr_cbk, subvol, subvol,
+ subvol->fops->fsetattr, fd, &local->rebalance.stbuf,
+ local->rebalance.flags, local->xattr_req);
return 0;
+ }
+
+ local->call_cnt = call_cnt = layout->cnt;
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_setattr_cbk, layout->list[i].xlator,
+ layout->list[i].xlator,
+ layout->list[i].xlator->fops->fsetattr, fd, stbuf,
+ valid, xdata);
+ }
+
+ return 0;
+
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
}
diff --git a/xlators/cluster/dht/src/dht-layout.c b/xlators/cluster/dht/src/dht-layout.c
index 97bbe987fdc..fda904c92c9 100644
--- a/xlators/cluster/dht/src/dht-layout.c
+++ b/xlators/cluster/dht/src/dht-layout.c
@@ -8,734 +8,801 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
+#include "unittest/unittest.h"
-#define layout_base_size (sizeof (dht_layout_t))
+#define layout_base_size (sizeof(dht_layout_t))
-#define layout_entry_size (sizeof ((dht_layout_t *)NULL)->list[0])
+#define layout_entry_size (sizeof((dht_layout_t *)NULL)->list[0])
#define layout_size(cnt) (layout_base_size + (cnt * layout_entry_size))
-
dht_layout_t *
-dht_layout_new (xlator_t *this, int cnt)
+dht_layout_new(xlator_t *this, int cnt)
{
- dht_layout_t *layout = NULL;
- dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ dht_conf_t *conf = NULL;
+ REQUIRE(NULL != this);
+ REQUIRE(cnt >= 0);
- conf = this->private;
+ conf = this->private;
- layout = GF_CALLOC (1, layout_size (cnt),
- gf_dht_mt_dht_layout_t);
- if (!layout) {
- goto out;
- }
+ layout = GF_CALLOC(1, layout_size(cnt), gf_dht_mt_dht_layout_t);
+ if (!layout) {
+ goto out;
+ }
- layout->type = DHT_HASH_TYPE_DM;
- layout->cnt = cnt;
+ layout->type = DHT_HASH_TYPE_DM;
+ layout->cnt = cnt;
- if (conf) {
- layout->spread_cnt = conf->dir_spread_cnt;
- layout->gen = conf->gen;
- }
+ if (conf) {
+ layout->spread_cnt = conf->dir_spread_cnt;
+ layout->gen = conf->gen;
+ }
+
+ GF_ATOMIC_INIT(layout->ref, 1);
- layout->ref = 1;
+ ENSURE(NULL != layout);
+ ENSURE(layout->type == DHT_HASH_TYPE_DM);
+ ENSURE(layout->cnt == cnt);
+ ENSURE(GF_ATOMIC_GET(layout->ref) == 1);
out:
- return layout;
+ return layout;
}
-
dht_layout_t *
-dht_layout_get (xlator_t *this, inode_t *inode)
+dht_layout_get(xlator_t *this, inode_t *inode)
{
- dht_conf_t *conf = NULL;
- uint64_t layout_int = 0;
- dht_layout_t *layout = NULL;
- int ret = -1;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- LOCK (&conf->layout_lock);
- {
- ret = inode_ctx_get (inode, this, &layout_int);
- if (ret == 0) {
- layout = (dht_layout_t *) (unsigned long) layout_int;
- layout->ref++;
- }
- }
- UNLOCK (&conf->layout_lock);
-
-out:
- return layout;
+ dht_layout_t *layout = NULL;
+ int ret = 0;
+
+ ret = dht_inode_ctx_layout_get(inode, this, &layout);
+ if ((!ret) && layout) {
+ GF_ATOMIC_INC(layout->ref);
+ }
+ return layout;
}
-
int
-dht_layout_set (xlator_t *this, inode_t *inode, dht_layout_t *layout)
+dht_layout_set(xlator_t *this, inode_t *inode, dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
- int oldret = -1;
- int ret = 0;
- dht_layout_t *old_layout;
- uint64_t old_layout_int;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- LOCK (&conf->layout_lock);
- {
- oldret = inode_ctx_get (inode, this, &old_layout_int);
-
- layout->ref++;
- ret = inode_ctx_put (inode, this, (uint64_t) (unsigned long)
- layout);
- }
- UNLOCK (&conf->layout_lock);
-
- if (oldret == 0) {
- old_layout = (dht_layout_t *) (unsigned long) old_layout_int;
- dht_layout_unref (this, old_layout);
- }
+ dht_conf_t *conf = NULL;
+ int oldret = -1;
+ int ret = -1;
+ dht_layout_t *old_layout;
+
+ conf = this->private;
+ if (!conf || !layout)
+ goto out;
+
+ LOCK(&conf->layout_lock);
+ {
+ oldret = dht_inode_ctx_layout_get(inode, this, &old_layout);
+ if (layout)
+ GF_ATOMIC_INC(layout->ref);
+ ret = dht_inode_ctx_layout_set(inode, this, layout);
+ }
+ UNLOCK(&conf->layout_lock);
+
+ if (!oldret) {
+ dht_layout_unref(this, old_layout);
+ }
+ if (ret)
+ GF_ATOMIC_DEC(layout->ref);
out:
- return ret;
+ return ret;
}
-
void
-dht_layout_unref (xlator_t *this, dht_layout_t *layout)
+dht_layout_unref(xlator_t *this, dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
- int ref = 0;
+ int ref = 0;
- if (layout->preset || !this->private)
- return;
+ if (!layout || layout->preset || !this->private)
+ return;
- conf = this->private;
+ ref = GF_ATOMIC_DEC(layout->ref);
- LOCK (&conf->layout_lock);
- {
- ref = --layout->ref;
- }
- UNLOCK (&conf->layout_lock);
-
- if (!ref)
- GF_FREE (layout);
+ if (!ref)
+ GF_FREE(layout);
}
-
dht_layout_t *
-dht_layout_ref (xlator_t *this, dht_layout_t *layout)
+dht_layout_ref(xlator_t *this, dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
-
- if (layout->preset || !this->private)
- return layout;
+ if (layout->preset || !this->private)
+ return layout;
- conf = this->private;
- LOCK (&conf->layout_lock);
- {
- layout->ref++;
- }
- UNLOCK (&conf->layout_lock);
+ GF_ATOMIC_INC(layout->ref);
- return layout;
+ return layout;
}
-
xlator_t *
-dht_layout_search (xlator_t *this, dht_layout_t *layout, const char *name)
+dht_layout_search(xlator_t *this, dht_layout_t *layout, const char *name)
{
- uint32_t hash = 0;
- xlator_t *subvol = NULL;
- int i = 0;
- int ret = 0;
-
-
- ret = dht_hash_compute (layout->type, name, &hash);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "hash computation failed for type=%d name=%s",
- layout->type, name);
- goto out;
- }
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].start <= hash
- && layout->list[i].stop >= hash) {
- subvol = layout->list[i].xlator;
- break;
- }
- }
-
- if (!subvol) {
- gf_log (this->name, GF_LOG_WARNING,
- "no subvolume for hash (value) = %u", hash);
- }
+ uint32_t hash = 0;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ int ret = 0;
+
+ ret = dht_hash_compute(this, layout->type, name, &hash);
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMPUTE_HASH_FAILED,
+ "type=%d", layout->type, "name=%s", name, NULL);
+ goto out;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].start <= hash && layout->list[i].stop >= hash) {
+ subvol = layout->list[i].xlator;
+ break;
+ }
+ }
+
+ if (!subvol) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "hash-value=0x%x", hash, NULL);
+ }
out:
- return subvol;
+ return subvol;
}
-
dht_layout_t *
-dht_layout_for_subvol (xlator_t *this, xlator_t *subvol)
+dht_layout_for_subvol(xlator_t *this, xlator_t *subvol)
{
- dht_conf_t *conf = NULL;
- dht_layout_t *layout = NULL;
- int i = 0;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == subvol) {
- layout = conf->file_layouts[i];
- break;
- }
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == subvol) {
+ layout = conf->file_layouts[i];
+ break;
}
+ }
out:
- return layout;
+ return layout;
}
-
int
-dht_layouts_init (xlator_t *this, dht_conf_t *conf)
+dht_layouts_init(xlator_t *this, dht_conf_t *conf)
{
- dht_layout_t *layout = NULL;
- int i = 0;
- int ret = -1;
-
- if (!conf)
- goto out;
-
- conf->file_layouts = GF_CALLOC (conf->subvolume_cnt,
- sizeof (dht_layout_t *),
- gf_dht_mt_dht_layout_t);
- if (!conf->file_layouts) {
- goto out;
- }
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- layout = dht_layout_new (this, 1);
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int ret = -1;
- if (!layout) {
- goto out;
- }
+ if (!conf)
+ goto out;
- layout->preset = 1;
+ conf->file_layouts = GF_CALLOC(conf->subvolume_cnt, sizeof(dht_layout_t *),
+ gf_dht_mt_dht_layout_t);
+ if (!conf->file_layouts) {
+ goto out;
+ }
- layout->list[0].xlator = conf->subvolumes[i];
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ layout = dht_layout_new(this, 1);
- conf->file_layouts[i] = layout;
+ if (!layout) {
+ goto out;
}
- ret = 0;
+ layout->preset = 1;
+
+ layout->list[0].xlator = conf->subvolumes[i];
+
+ conf->file_layouts[i] = layout;
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-dht_disk_layout_extract (xlator_t *this, dht_layout_t *layout,
- int pos, int32_t **disk_layout_p)
+dht_disk_layout_extract(xlator_t *this, dht_layout_t *layout, int pos,
+ int32_t **disk_layout_p)
{
- int ret = -1;
- int32_t *disk_layout = NULL;
+ int ret = -1;
+ int32_t *disk_layout = NULL;
- disk_layout = GF_CALLOC (5, sizeof (int),
- gf_dht_mt_int32_t);
- if (!disk_layout) {
- goto out;
- }
+ disk_layout = GF_CALLOC(5, sizeof(int), gf_dht_mt_int32_t);
+ if (!disk_layout) {
+ goto out;
+ }
- disk_layout[0] = hton32 (1);
- disk_layout[1] = hton32 (layout->type);
- disk_layout[2] = hton32 (layout->list[pos].start);
- disk_layout[3] = hton32 (layout->list[pos].stop);
+ disk_layout[0] = hton32(layout->list[pos].commit_hash);
+ disk_layout[1] = hton32(layout->type);
+ disk_layout[2] = hton32(layout->list[pos].start);
+ disk_layout[3] = hton32(layout->list[pos].stop);
- if (disk_layout_p)
- *disk_layout_p = disk_layout;
- else
- GF_FREE (disk_layout);
+ if (disk_layout_p)
+ *disk_layout_p = disk_layout;
+ else
+ GF_FREE(disk_layout);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-dht_disk_layout_merge (xlator_t *this, dht_layout_t *layout,
- int pos, void *disk_layout_raw, int disk_layout_len)
+dht_disk_layout_extract_for_subvol(xlator_t *this, dht_layout_t *layout,
+ xlator_t *subvol, int32_t **disk_layout_p)
{
- int cnt = 0;
- int type = 0;
- int start_off = 0;
- int stop_off = 0;
- int disk_layout[4];
-
- if (!disk_layout_raw) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "error no layout on disk for merge");
- return -1;
- }
-
- GF_ASSERT (disk_layout_len == sizeof (disk_layout));
-
- memcpy (disk_layout, disk_layout_raw, disk_layout_len);
-
- cnt = ntoh32 (disk_layout[0]);
- if (cnt != 1) {
- gf_log (this->name, GF_LOG_ERROR,
- "disk layout has invalid count %d", cnt);
- return -1;
- }
+ int i = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol)
+ break;
+ }
+
+ if (i == layout->cnt)
+ return -1;
+
+ return dht_disk_layout_extract(this, layout, i, disk_layout_p);
+}
+
+static int
+dht_disk_layout_merge(xlator_t *this, dht_layout_t *layout, int pos,
+ void *disk_layout_raw, int disk_layout_len)
+{
+ int type = 0;
+ int start_off = 0;
+ int stop_off = 0;
+ int commit_hash = 0;
+ int disk_layout[4];
+
+ if (!disk_layout_raw) {
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ NULL);
+ return -1;
+ }
- type = ntoh32 (disk_layout[1]);
- switch (type) {
+ GF_ASSERT(disk_layout_len == sizeof(disk_layout));
+
+ memcpy(disk_layout, disk_layout_raw, disk_layout_len);
+
+ type = ntoh32(disk_layout[1]);
+ switch (type) {
case DHT_HASH_TYPE_DM_USER:
- gf_log (this->name, GF_LOG_DEBUG, "found user-set layout");
- layout->type = type;
- /* Fall through. */
- case DHT_HASH_TYPE_DM:
- break;
+ gf_msg_debug(this->name, 0, "found user-set layout");
+ layout->type = type;
+ /* Fall through. */
+ case DHT_HASH_TYPE_DM:
+ break;
default:
- gf_log (this->name, GF_LOG_CRITICAL,
- "Catastrophic error layout with unknown type found %d",
- disk_layout[1]);
- return -1;
- }
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_DISK_LAYOUT,
+ "layout=%d", disk_layout[1], NULL);
+ return -1;
+ }
- start_off = ntoh32 (disk_layout[2]);
- stop_off = ntoh32 (disk_layout[3]);
+ commit_hash = ntoh32(disk_layout[0]);
+ start_off = ntoh32(disk_layout[2]);
+ stop_off = ntoh32(disk_layout[3]);
- layout->list[pos].start = start_off;
- layout->list[pos].stop = stop_off;
+ layout->list[pos].commit_hash = commit_hash;
+ layout->list[pos].start = start_off;
+ layout->list[pos].stop = stop_off;
- gf_log (this->name, GF_LOG_TRACE,
- "merged to layout: %u - %u (type %d) from %s",
- start_off, stop_off, type,
- layout->list[pos].xlator->name);
+ gf_msg_trace(this->name, 0,
+ "merged to layout: 0x%x - 0x%x (hash 0x%x, type %d) from %s",
+ start_off, stop_off, commit_hash, type,
+ layout->list[pos].xlator->name);
- return 0;
+ return 0;
}
-
int
-dht_layout_merge (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- int op_ret, int op_errno, dict_t *xattr)
+dht_layout_merge(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ int op_ret, int op_errno, dict_t *xattr)
{
- int i = 0;
- int ret = -1;
- int err = -1;
- void *disk_layout_raw = NULL;
- int disk_layout_len = 0;
-
- if (op_ret != 0) {
- err = op_errno;
- }
+ int i = 0;
+ int ret = -1;
+ int err = -1;
+ void *disk_layout_raw = NULL;
+ int disk_layout_len = 0;
+ dht_conf_t *conf = this->private;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == NULL) {
- layout->list[i].err = err;
- layout->list[i].xlator = subvol;
- break;
- }
- }
+ if (op_ret != 0) {
+ err = op_errno;
+ }
- if (op_ret != 0) {
- ret = 0;
- goto out;
- }
+ if (!layout)
+ goto out;
- if (xattr) {
- /* during lookup and not mkdir */
- ret = dict_get_ptr_and_len (xattr, "trusted.glusterfs.dht",
- &disk_layout_raw, &disk_layout_len);
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == NULL) {
+ layout->list[i].err = err;
+ layout->list[i].xlator = subvol;
+ break;
}
+ }
- if (ret != 0) {
- layout->list[i].err = -1;
- gf_log (this->name, GF_LOG_TRACE,
- "missing disk layout on %s. err = %d",
- subvol->name, err);
- ret = 0;
- goto out;
- }
+ if (op_ret != 0) {
+ ret = 0;
+ goto out;
+ }
- ret = dht_disk_layout_merge (this, layout, i, disk_layout_raw,
- disk_layout_len);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "layout merge from subvolume %s failed",
- subvol->name);
- goto out;
- }
+ if (xattr) {
+ /* during lookup and not mkdir */
+ ret = dict_get_ptr_and_len(xattr, conf->xattr_name, &disk_layout_raw,
+ &disk_layout_len);
+ }
+
+ if (ret != 0) {
layout->list[i].err = 0;
+ gf_msg_trace(this->name, 0, "Missing disk layout on %s. err = %d",
+ subvol->name, err);
+ ret = 0;
+ goto out;
+ }
+
+ ret = dht_disk_layout_merge(this, layout, i, disk_layout_raw,
+ disk_layout_len);
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_MERGE_FAILED,
+ "subvolume=%s", subvol->name, NULL);
+ goto out;
+ }
+
+ if (layout->commit_hash == 0) {
+ layout->commit_hash = layout->list[i].commit_hash;
+ } else if (layout->commit_hash != layout->list[i].commit_hash) {
+ layout->commit_hash = DHT_LAYOUT_HASH_INVALID;
+ }
+
+ layout->list[i].err = 0;
out:
- return ret;
+ return ret;
}
+void
+dht_layout_entry_swap(dht_layout_t *layout, int i, int j)
+{
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
+ uint32_t commit_hash_swap = 0;
+ xlator_t *xlator_swap = 0;
+ int err_swap = 0;
+
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
+ xlator_swap = layout->list[i].xlator;
+ err_swap = layout->list[i].err;
+ commit_hash_swap = layout->list[i].commit_hash;
+
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
+ layout->list[i].xlator = layout->list[j].xlator;
+ layout->list[i].err = layout->list[j].err;
+ layout->list[i].commit_hash = layout->list[j].commit_hash;
+
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
+ layout->list[j].xlator = xlator_swap;
+ layout->list[j].err = err_swap;
+ layout->list[j].commit_hash = commit_hash_swap;
+}
void
-dht_layout_entry_swap (dht_layout_t *layout, int i, int j)
+dht_layout_range_swap(dht_layout_t *layout, int i, int j)
+{
+ uint32_t start_swap = 0;
+ uint32_t stop_swap = 0;
+
+ start_swap = layout->list[i].start;
+ stop_swap = layout->list[i].stop;
+
+ layout->list[i].start = layout->list[j].start;
+ layout->list[i].stop = layout->list[j].stop;
+
+ layout->list[j].start = start_swap;
+ layout->list[j].stop = stop_swap;
+}
+static int64_t
+dht_layout_entry_cmp_volname(dht_layout_t *layout, int i, int j)
{
- uint32_t start_swap = 0;
- uint32_t stop_swap = 0;
- xlator_t *xlator_swap = 0;
- int err_swap = 0;
-
- start_swap = layout->list[i].start;
- stop_swap = layout->list[i].stop;
- xlator_swap = layout->list[i].xlator;
- err_swap = layout->list[i].err;
-
- layout->list[i].start = layout->list[j].start;
- layout->list[i].stop = layout->list[j].stop;
- layout->list[i].xlator = layout->list[j].xlator;
- layout->list[i].err = layout->list[j].err;
-
- layout->list[j].start = start_swap;
- layout->list[j].stop = stop_swap;
- layout->list[j].xlator = xlator_swap;
- layout->list[j].err = err_swap;
+ return (strcmp(layout->list[i].xlator->name, layout->list[j].xlator->name));
}
-int64_t
-dht_layout_entry_cmp_volname (dht_layout_t *layout, int i, int j)
+gf_boolean_t
+dht_is_subvol_in_layout(dht_layout_t *layout, xlator_t *xlator)
{
- return (strcmp (layout->list[i].xlator->name,
- layout->list[j].xlator->name));
+ int i = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ /* Check if xlator is already part of layout, and layout is
+ * non-zero. */
+ if (!strcmp(layout->list[i].xlator->name, xlator->name)) {
+ if (layout->list[i].start != layout->list[i].stop)
+ return _gf_true;
+ break;
+ }
+ }
+ return _gf_false;
}
-int64_t
-dht_layout_entry_cmp (dht_layout_t *layout, int i, int j)
+static int64_t
+dht_layout_entry_cmp(dht_layout_t *layout, int i, int j)
{
- int64_t diff = 0;
+ int64_t diff = 0;
- if (layout->list[i].err || layout->list[j].err)
- diff = layout->list[i].err - layout->list[j].err;
- else
- diff = (int64_t) layout->list[i].start
- - (int64_t) layout->list[j].start;
+ /* swap zero'ed out layouts to front, if needed */
+ if (!layout->list[j].start && !layout->list[j].stop) {
+ diff = (int64_t)layout->list[i].stop - (int64_t)layout->list[j].stop;
+ goto out;
+ }
+ diff = (int64_t)layout->list[i].start - (int64_t)layout->list[j].start;
- return diff;
+out:
+ return diff;
}
-
int
-dht_layout_sort (dht_layout_t *layout)
+dht_layout_sort(dht_layout_t *layout)
{
- int i = 0;
- int j = 0;
- int64_t ret = 0;
+ int i = 0;
+ int j = 0;
+ int64_t ret = 0;
- /* TODO: O(n^2) -- bad bad */
+ /* TODO: O(n^2) -- bad bad */
- for (i = 0; i < layout->cnt - 1; i++) {
- for (j = i + 1; j < layout->cnt; j++) {
- ret = dht_layout_entry_cmp (layout, i, j);
- if (ret > 0)
- dht_layout_entry_swap (layout, i, j);
- }
+ for (i = 0; i < layout->cnt - 1; i++) {
+ for (j = i + 1; j < layout->cnt; j++) {
+ ret = dht_layout_entry_cmp(layout, i, j);
+ if (ret > 0)
+ dht_layout_entry_swap(layout, i, j);
}
+ }
- return 0;
+ return 0;
}
-int
-dht_layout_sort_volname (dht_layout_t *layout)
+void
+dht_layout_sort_volname(dht_layout_t *layout)
{
- int i = 0;
- int j = 0;
- int64_t ret = 0;
+ int i = 0;
+ int j = 0;
+ int64_t ret = 0;
- /* TODO: O(n^2) -- bad bad */
+ /* TODO: O(n^2) -- bad bad */
- for (i = 0; i < layout->cnt - 1; i++) {
- for (j = i + 1; j < layout->cnt; j++) {
- ret = dht_layout_entry_cmp_volname (layout, i, j);
- if (ret > 0)
- dht_layout_entry_swap (layout, i, j);
- }
+ for (i = 0; i < layout->cnt - 1; i++) {
+ for (j = i + 1; j < layout->cnt; j++) {
+ ret = dht_layout_entry_cmp_volname(layout, i, j);
+ if (ret > 0)
+ dht_layout_entry_swap(layout, i, j);
}
-
- return 0;
+ }
}
-
-int
-dht_layout_anomalies (xlator_t *this, loc_t *loc, dht_layout_t *layout,
- uint32_t *holes_p, uint32_t *overlaps_p,
- uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p)
+void
+dht_layout_anomalies(xlator_t *this, loc_t *loc, dht_layout_t *layout,
+ uint32_t *holes_p, uint32_t *overlaps_p,
+ uint32_t *missing_p, uint32_t *down_p, uint32_t *misc_p,
+ uint32_t *no_space_p)
{
- uint32_t overlaps = 0;
- uint32_t missing = 0;
- uint32_t down = 0;
- uint32_t misc = 0;
- uint32_t hole_cnt = 0;
- uint32_t overlap_cnt = 0;
- int i = 0;
- int ret = 0;
- uint32_t prev_stop = 0;
- uint32_t last_stop = 0;
- char is_virgin = 1;
-
- /* TODO: explain what is happening */
-
- last_stop = layout->list[0].start - 1;
- prev_stop = last_stop;
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err) {
- switch (layout->list[i].err) {
- case -1:
- case ENOENT:
- missing++;
- break;
- case ENOTCONN:
- down++;
- break;
- case ENOSPC:
- down++;
- break;
- default:
- misc++;
- }
- continue;
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ uint32_t hole_cnt = 0;
+ uint32_t overlap_cnt = 0;
+ int i = 0;
+ uint32_t prev_stop = 0;
+ uint32_t last_stop = 0;
+ char is_virgin = 1;
+ uint32_t no_space = 0;
+
+ /* This function scans through the layout spread of a directory to
+ check if there are any anomalies. Prior to calling this function
+ the layout entries should be sorted in the ascending order.
+
+ If the layout entry has err != 0
+ then increment the corresponding anomaly.
+ else
+ if (start of the current layout entry > stop + 1 of previous
+ non erroneous layout entry)
+ then it indicates a hole in the layout
+ if (start of the current layout entry < stop + 1 of previous
+ non erroneous layout entry)
+ then it indicates an overlap in the layout
+ */
+ last_stop = layout->list[0].start - 1;
+ prev_stop = last_stop;
+
+ for (i = 0; i < layout->cnt; i++) {
+ switch (layout->list[i].err) {
+ case -1:
+ case ENOENT:
+ case ESTALE:
+ missing++;
+ continue;
+ case ENOTCONN:
+ down++;
+ continue;
+ case ENOSPC:
+ no_space++;
+ continue;
+ case 0:
+ /* if err == 0 and start == stop, then it is a non misc++;
+ * participating subvolume(spread-cnt). Then, do not
+ * check for anomalies. If start != stop, then treat it
+ * as misc err */
+ if (layout->list[i].start == layout->list[i].stop) {
+ continue;
}
+ break;
+ default:
+ misc++;
+ continue;
+ }
- is_virgin = 0;
+ is_virgin = 0;
- if ((prev_stop + 1) < layout->list[i].start) {
- hole_cnt++;
- }
+ if ((prev_stop + 1) < layout->list[i].start) {
+ hole_cnt++;
+ }
- if ((prev_stop + 1) > layout->list[i].start) {
- overlap_cnt++;
- overlaps += ((prev_stop + 1) - layout->list[i].start);
- }
- prev_stop = layout->list[i].stop;
+ if ((prev_stop + 1) > layout->list[i].start) {
+ overlap_cnt++;
+ overlaps += ((prev_stop + 1) - layout->list[i].start);
}
+ prev_stop = layout->list[i].stop;
+ }
- if ((last_stop - prev_stop) || is_virgin)
- hole_cnt++;
+ if ((last_stop - prev_stop) || is_virgin)
+ hole_cnt++;
- if (holes_p)
- *holes_p = hole_cnt;
+ if (holes_p)
+ *holes_p = hole_cnt;
- if (overlaps_p)
- *overlaps_p = overlap_cnt;
+ if (overlaps_p)
+ *overlaps_p = overlap_cnt;
- if (missing_p)
- *missing_p = missing;
+ if (missing_p)
+ *missing_p = missing;
- if (down_p)
- *down_p = down;
+ if (down_p)
+ *down_p = down;
- if (misc_p)
- *misc_p = misc;
+ if (misc_p)
+ *misc_p = misc;
- return ret;
+ if (no_space_p)
+ *no_space_p = no_space;
}
-
int
-dht_layout_normalize (xlator_t *this, loc_t *loc, dht_layout_t *layout)
+dht_layout_missing_dirs(dht_layout_t *layout)
{
- int ret = 0;
- int i = 0;
- uint32_t holes = 0;
- uint32_t overlaps = 0;
- uint32_t missing = 0;
- uint32_t down = 0;
- uint32_t misc = 0;
-
- ret = dht_layout_sort (layout);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "sort failed?! how the ....");
- goto out;
- }
+ int i = 0, missing = 0;
- ret = dht_layout_anomalies (this, loc, layout,
- &holes, &overlaps,
- &missing, &down, &misc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "error while finding anomalies in %s -- not good news",
- loc->path);
- goto out;
- }
+ if (layout == NULL)
+ goto out;
- if (holes || overlaps) {
- if (missing == layout->cnt) {
- gf_log (this->name, GF_LOG_DEBUG,
- "directory %s looked up first time",
- loc->path);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "found anomalies in %s. holes=%d overlaps=%d",
- loc->path, holes, overlaps);
- }
- ret = -1;
+ for (i = 0; i < layout->cnt; i++) {
+ if ((layout->list[i].err == ENOENT) ||
+ ((layout->list[i].err == -1) && (layout->list[i].start == 0) &&
+ (layout->list[i].stop == 0))) {
+ missing++;
}
+ }
- for (i = 0; i < layout->cnt; i++) {
- /* TODO During DHT selfheal rewrite (almost) find a better place
- * to detect this - probably in dht_layout_anomalies()
- */
- if (layout->list[i].err > 0) {
- gf_log_callingfn (this->name, GF_LOG_DEBUG,
- "path=%s err=%s on subvol=%s",
- loc->path,
- strerror (layout->list[i].err),
- (layout->list[i].xlator ?
- layout->list[i].xlator->name
- : "<>"));
- if ((layout->list[i].err == ENOENT) && (ret >= 0)) {
- ret++;
- }
- }
- }
+out:
+ return missing;
+}
+int
+dht_layout_normalize(xlator_t *this, loc_t *loc, dht_layout_t *layout)
+{
+ int ret = 0;
+ uint32_t holes = 0;
+ uint32_t overlaps = 0;
+ uint32_t missing = 0;
+ uint32_t down = 0;
+ uint32_t misc = 0, missing_dirs = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ ret = dht_layout_sort(layout);
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_SORT_FAILED,
+ NULL);
+ goto out;
+ }
+
+ gf_uuid_unparse(loc->gfid, gfid);
+
+ dht_layout_anomalies(this, loc, layout, &holes, &overlaps, &missing, &down,
+ &misc, NULL);
+
+ if (holes || overlaps) {
+ if (missing == layout->cnt) {
+ gf_msg_debug(this->name, 0,
+ "Directory %s looked up first time"
+ " gfid = %s",
+ loc->path, gfid);
+ } else {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_ANOMALIES_INFO,
+ "path=%s", loc->path, "gfid=%s", gfid, "holes=%d", holes,
+ "overlaps=%d", overlaps, NULL);
+ }
+ ret = -1;
+ }
+
+ if (ret >= 0) {
+ missing_dirs = dht_layout_missing_dirs(layout);
+ /* TODO During DHT selfheal rewrite (almost) find a better place
+ * to detect this - probably in dht_layout_anomalies()
+ */
+ if (missing_dirs > 0)
+ ret += missing_dirs;
+ }
out:
- return ret;
+ return ret;
}
-
int
-dht_layout_dir_mismatch (xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
- loc_t *loc, dict_t *xattr)
+dht_dir_has_layout(dict_t *xattr, char *name)
{
- int idx = 0;
- int pos = -1;
- int ret = 0;
- int err = 0;
- int dict_ret = 0;
- int32_t disk_layout[4];
- void *disk_layout_raw = NULL;
- int32_t count = -1;
- uint32_t start_off = -1;
- uint32_t stop_off = -1;
-
-
- for (idx = 0; idx < layout->cnt; idx++) {
- if (layout->list[idx].xlator == subvol) {
- pos = idx;
- break;
- }
- }
+ void *disk_layout_raw = NULL;
- if (pos == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s - no layout info for subvolume %s",
- loc->path, subvol->name);
- ret = 1;
- goto out;
- }
+ return dict_get_ptr(xattr, name, &disk_layout_raw);
+}
- err = layout->list[pos].err;
+int
+dht_layout_dir_mismatch(xlator_t *this, dht_layout_t *layout, xlator_t *subvol,
+ loc_t *loc, dict_t *xattr)
+{
+ int idx = 0;
+ int pos = -1;
+ int ret = 0;
+ int err = 0;
+ int dict_ret = 0;
+ int32_t disk_layout[4];
+ void *disk_layout_raw = NULL;
+ uint32_t start_off = -1;
+ uint32_t stop_off = -1;
+ uint32_t commit_hash = -1;
+ dht_conf_t *conf = this->private;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ if (loc && loc->inode)
+ gf_uuid_unparse(loc->inode->gfid, gfid);
+
+ for (idx = 0; idx < layout->cnt; idx++) {
+ if (layout->list[idx].xlator == subvol) {
+ pos = idx;
+ break;
+ }
+ }
+
+ if (pos == -1) {
+ if (loc) {
+ gf_msg_debug(this->name, 0, "%s - no layout info for subvolume %s",
+ loc ? loc->path : "path not found", subvol->name);
+ }
+ ret = 1;
+ goto out;
+ }
+
+ err = layout->list[pos].err;
+
+ if (!xattr) {
+ if (err == 0) {
+ if (loc) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_XATTR_DICT_NULL,
+ "path=%s", loc->path, NULL);
+ } else {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_XATTR_DICT_NULL,
+ "path not found", NULL);
+ }
+ ret = -1;
+ }
+ goto out;
+ }
+
+ dict_ret = dict_get_ptr(xattr, conf->xattr_name, &disk_layout_raw);
+
+ if (dict_ret < 0) {
+ if (err == 0 && layout->list[pos].stop) {
+ if (loc) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
+ } else {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_DISK_LAYOUT_MISSING,
+ "path not found"
+ "gfid=%s",
+ gfid, NULL);
+ }
+ ret = -1;
+ }
+ goto out;
+ }
+
+ memcpy(disk_layout, disk_layout_raw, sizeof(disk_layout));
+
+ start_off = ntoh32(disk_layout[2]);
+ stop_off = ntoh32(disk_layout[3]);
+ commit_hash = ntoh32(disk_layout[0]);
+
+ if ((layout->list[pos].start != start_off) ||
+ (layout->list[pos].stop != stop_off) ||
+ (layout->list[pos].commit_hash != commit_hash)) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_LAYOUT_INFO, "subvol=%s",
+ layout->list[pos].xlator->name, "inode-layout:start=0x%x",
+ layout->list[pos].start, "inode-layout:stop=0x%x",
+ layout->list[pos].stop, "layout-commit-hash=0x%x; ",
+ layout->list[pos].commit_hash, "disk-layout:start-off=0x%x",
+ start_off, "disk-layout:top-off=0x%x", stop_off,
+ "commit-hash=0x%x", commit_hash, NULL);
+ ret = 1;
+ } else {
+ ret = 0;
+ }
+out:
+ return ret;
+}
- if (!xattr) {
- if (err == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "%s - xattr dictionary is NULL",
- loc->path);
- ret = -1;
- }
- goto out;
- }
+int
+dht_layout_preset(xlator_t *this, xlator_t *subvol, inode_t *inode)
+{
+ dht_layout_t *layout = NULL;
+ int ret = -1;
+ dht_conf_t *conf = NULL;
- dict_ret = dict_get_ptr (xattr, "trusted.glusterfs.dht",
- &disk_layout_raw);
+ conf = this->private;
+ if (!conf)
+ goto out;
- if (dict_ret < 0) {
- if (err == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "%s - disk layout missing", loc->path);
- ret = -1;
- }
- goto out;
- }
+ layout = dht_layout_for_subvol(this, subvol);
+ if (!layout) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_NO_LAYOUT_INFO,
+ "subvolume=%s", subvol ? subvol->name : "<nil>", NULL);
+ ret = -1;
+ goto out;
+ }
- memcpy (disk_layout, disk_layout_raw, sizeof (disk_layout));
+ gf_msg_debug(this->name, 0, "file = %s, subvol = %s",
+ uuid_utoa(inode->gfid), subvol ? subvol->name : "<nil>");
- count = ntoh32 (disk_layout[0]);
- if (count != 1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s - disk layout has invalid count %d",
- loc->path, count);
- ret = -1;
- goto out;
- }
+ LOCK(&conf->layout_lock);
+ {
+ dht_inode_ctx_layout_set(inode, this, layout);
+ }
- start_off = ntoh32 (disk_layout[2]);
- stop_off = ntoh32 (disk_layout[3]);
-
- if ((layout->list[pos].start != start_off)
- || (layout->list[pos].stop != stop_off)) {
- gf_log (this->name, GF_LOG_INFO,
- "subvol: %s; inode layout - %"PRIu32" - %"PRIu32"; "
- "disk layout - %"PRIu32" - %"PRIu32,
- layout->list[pos].xlator->name,
- layout->list[pos].start, layout->list[pos].stop,
- start_off, stop_off);
- ret = 1;
- } else {
- ret = 0;
- }
+ UNLOCK(&conf->layout_lock);
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-dht_layout_preset (xlator_t *this, xlator_t *subvol, inode_t *inode)
+dht_layout_index_for_subvol(dht_layout_t *layout, xlator_t *subvol)
{
- dht_layout_t *layout = NULL;
- int ret = -1;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
- if (!conf)
- goto out;
-
- layout = dht_layout_for_subvol (this, subvol);
- if (!layout) {
- gf_log (this->name, GF_LOG_INFO,
- "no pre-set layout for subvolume %s",
- subvol ? subvol->name : "<nil>");
- ret = -1;
- goto out;
- }
+ int i = 0, ret = -1;
- LOCK (&conf->layout_lock);
- {
- inode_ctx_put (inode, this, (uint64_t)(long)layout);
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ ret = i;
+ break;
}
- UNLOCK (&conf->layout_lock);
+ }
- ret = 0;
-out:
- return ret;
+ return ret;
}
diff --git a/xlators/cluster/dht/src/dht-linkfile.c b/xlators/cluster/dht/src/dht-linkfile.c
index 803f344aff1..89ec6cca56e 100644
--- a/xlators/cluster/dht/src/dht-linkfile.c
+++ b/xlators/cluster/dht/src/dht-linkfile.c
@@ -8,183 +8,321 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include <glusterfs/compat.h>
+#include "dht-common.h"
+static int
+dht_linkfile_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ char is_linkfile = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ prev = cookie;
+ conf = this->private;
+
+ if (op_ret)
+ goto out;
+
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
+ if (!is_linkfile)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NOT_LINK_FILE_ERROR,
+ "name=%s", prev->name, "path=%s", local->loc.path, "gfid=%s",
+ gfid, NULL);
+out:
+ local->linkfile.linkfile_cbk(frame, cookie, this, op_ret, op_errno, inode,
+ stbuf, postparent, postparent, xattr);
+ return 0;
+}
-#include "glusterfs.h"
-#include "xlator.h"
-#include "compat.h"
-#include "dht-common.h"
+static int
+dht_linkfile_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ dict_t *xattrs = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ local = frame->local;
+ if (!op_ret)
+ local->linked = _gf_true;
-int
-dht_linkfile_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- dht_local_t *local = NULL;
+ FRAME_SU_UNDO(frame, dht_local_t);
- local = frame->local;
+ if (op_ret && (op_errno == EEXIST)) {
+ conf = this->private;
+ subvol = cookie;
+ if (!subvol)
+ goto out;
+ xattrs = dict_new();
+ if (!xattrs)
+ goto out;
+ ret = dict_set_uint32(xattrs, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "mame=%s", conf->link_xattr_name, NULL);
+ goto out;
+ }
- local->linkfile.linkfile_cbk (frame, cookie, this, op_ret, op_errno,
- inode, stbuf, preparent, postparent,
- xdata);
+ STACK_WIND_COOKIE(frame, dht_linkfile_lookup_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->linkfile.loc, xattrs);
+ if (xattrs)
+ dict_unref(xattrs);
return 0;
+ }
+out:
+ local->linkfile.linkfile_cbk(frame, cookie, this, op_ret, op_errno, inode,
+ stbuf, preparent, postparent, xdata);
+ if (xattrs)
+ dict_unref(xattrs);
+ return 0;
}
-
int
-dht_linkfile_create (call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
- xlator_t *tovol, xlator_t *fromvol, loc_t *loc)
+dht_linkfile_create(call_frame_t *frame, fop_mknod_cbk_t linkfile_cbk,
+ xlator_t *this, xlator_t *tovol, xlator_t *fromvol,
+ loc_t *loc)
{
- dht_local_t *local = NULL;
- dict_t *dict = NULL;
- int need_unref = 0;
- int ret = 0;
-
- local = frame->local;
- local->linkfile.linkfile_cbk = linkfile_cbk;
- local->linkfile.srcvol = tovol;
-
- dict = local->params;
- if (!dict) {
- dict = dict_new ();
- if (!dict)
- goto out;
- need_unref = 1;
- }
-
- if (!uuid_is_null (local->gfid)) {
- ret = dict_set_static_bin (dict, "gfid-req", local->gfid, 16);
- if (ret)
- gf_log ("dht-linkfile", GF_LOG_INFO,
- "%s: gfid set failed", loc->path);
- }
-
- ret = dict_set_str (dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ dht_local_t *local = NULL;
+ dict_t *dict = NULL;
+ int need_unref = 0;
+ int ret = 0;
+ dht_conf_t *conf = this->private;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ local->linkfile.linkfile_cbk = linkfile_cbk;
+ local->linkfile.srcvol = tovol;
+ loc_copy(&local->linkfile.loc, loc);
+
+ local->linked = _gf_false;
+
+ dict = local->params;
+ if (!dict) {
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ need_unref = 1;
+ }
+
+ if (!gf_uuid_is_null(local->gfid)) {
+ gf_uuid_unparse(local->gfid, gfid);
+
+ ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true);
if (ret)
- gf_log ("dht-linkfile", GF_LOG_INFO,
- "%s: internal-fop set failed", loc->path);
+ gf_smsg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
+ } else {
+ gf_uuid_unparse(loc->gfid, gfid);
+ }
+
+ ret = dict_set_str(dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret)
+ gf_smsg("dht-linkfile", GF_LOG_INFO, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", GLUSTERFS_INTERNAL_FOP_KEY,
+ "gfid=%s", gfid, NULL);
+
+ ret = dict_set_str(dict, conf->link_xattr_name, tovol->name);
+
+ if (ret < 0) {
+ gf_smsg(frame->this->name, GF_LOG_INFO, 0, DHT_MSG_CREATE_LINK_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+
+ local->link_subvol = fromvol;
+ /* Always create as root:root. dht_linkfile_attr_heal fixes the
+ * ownsership */
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND_COOKIE(frame, dht_linkfile_create_cbk, fromvol, fromvol,
+ fromvol->fops->mknod, loc, S_IFREG | DHT_LINKFILE_MODE, 0,
+ 0, dict);
+
+ if (need_unref && dict)
+ dict_unref(dict);
+
+ return 0;
+out:
+ local->linkfile.linkfile_cbk(frame, frame->this, frame->this, -1, ENOMEM,
+ loc->inode, NULL, NULL, NULL, NULL);
- ret = dict_set_str (dict, "trusted.glusterfs.dht.linkto",
- tovol->name);
+ if (need_unref && dict)
+ dict_unref(dict);
- if (ret < 0) {
- gf_log (frame->this->name, GF_LOG_INFO,
- "%s: failed to initialize linkfile data",
- loc->path);
- goto out;
- }
+ return 0;
+}
- STACK_WIND (frame, dht_linkfile_create_cbk,
- fromvol, fromvol->fops->mknod, loc,
- S_IFREG | DHT_LINKFILE_MODE, 0, 0, dict);
+int
+dht_linkfile_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
- if (need_unref && dict)
- dict_unref (dict);
+ local = frame->local;
+ subvol = cookie;
- return 0;
-out:
- local->linkfile.linkfile_cbk (frame, NULL, frame->this, -1, ENOMEM,
- loc->inode, NULL, NULL, NULL, NULL);
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_smsg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_UNLINK_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", gfid, "subvolume=%s",
+ subvol->name, NULL);
+ }
- if (need_unref && dict)
- dict_unref (dict);
+ DHT_STACK_DESTROY(frame);
- return 0;
+ return 0;
}
-
int
-dht_linkfile_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+dht_linkfile_unlink(call_frame_t *frame, xlator_t *this, xlator_t *subvol,
+ loc_t *loc)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
-
- local = frame->local;
- prev = cookie;
- subvol = prev->this;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "unlinking linkfile %s on %s failed (%s)",
- local->loc.path, subvol->name, strerror (op_errno));
- }
+ call_frame_t *unlink_frame = NULL;
+ dht_local_t *unlink_local = NULL;
- DHT_STACK_DESTROY (frame);
+ unlink_frame = copy_frame(frame);
+ if (!unlink_frame) {
+ goto err;
+ }
- return 0;
-}
+ /* Using non-fop value here, as anyways, 'local->fop' is not used in
+ this particular case */
+ unlink_local = dht_local_init(unlink_frame, loc, NULL, GF_FOP_MAXVALUE);
+ if (!unlink_local) {
+ goto err;
+ }
+ STACK_WIND_COOKIE(unlink_frame, dht_linkfile_unlink_cbk, subvol, subvol,
+ subvol->fops->unlink, &unlink_local->loc, 0, NULL);
-int
-dht_linkfile_unlink (call_frame_t *frame, xlator_t *this,
- xlator_t *subvol, loc_t *loc)
+ return 0;
+err:
+ if (unlink_frame)
+ DHT_STACK_DESTROY(unlink_frame);
+
+ return -1;
+}
+
+xlator_t *
+dht_linkfile_subvol(xlator_t *this, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr)
{
- call_frame_t *unlink_frame = NULL;
- dht_local_t *unlink_local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ void *volname = NULL;
+ int i = 0, ret = 0;
- unlink_frame = copy_frame (frame);
- if (!unlink_frame) {
- goto err;
- }
+ conf = this->private;
- /* Using non-fop value here, as anyways, 'local->fop' is not used in
- this particular case */
- unlink_local = dht_local_init (unlink_frame, loc, NULL,
- GF_FOP_MAXVALUE);
- if (!unlink_local) {
- goto err;
- }
+ if (!xattr)
+ goto out;
- STACK_WIND (unlink_frame, dht_linkfile_unlink_cbk,
- subvol, subvol->fops->unlink,
- &unlink_local->loc, 0, NULL);
+ ret = dict_get_ptr(xattr, conf->link_xattr_name, &volname);
- return 0;
-err:
- if (unlink_frame)
- DHT_STACK_DESTROY (unlink_frame);
+ if ((-1 == ret) || !volname)
+ goto out;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (strcmp(conf->subvolumes[i]->name, (char *)volname) == 0) {
+ subvol = conf->subvolumes[i];
+ break;
+ }
+ }
- return -1;
+out:
+ return subvol;
}
+static int
+dht_linkfile_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
-xlator_t *
-dht_linkfile_subvol (xlator_t *this, inode_t *inode, struct iatt *stbuf,
- dict_t *xattr)
+ local = frame->local;
+ loc = &local->loc;
+
+ if (op_ret)
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_SETATTR_FAILED,
+ "path=%s", (loc->path ? loc->path : "NULL"), "gfid=%s",
+ uuid_utoa(local->gfid), NULL);
+
+ DHT_STACK_DESTROY(frame);
+
+ return 0;
+}
+
+int
+dht_linkfile_attr_heal(call_frame_t *frame, xlator_t *this)
{
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
- void *volname = NULL;
- int i = 0, ret = 0;
+ int ret = -1;
+ call_frame_t *copy = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *copy_local = NULL;
+ xlator_t *subvol = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO("dht", local, out);
+ GF_VALIDATE_OR_GOTO("dht", local->link_subvol, out);
+
+ if (local->stbuf.ia_type == IA_INVAL)
+ return 0;
- conf = this->private;
+ DHT_MARK_FOP_INTERNAL(xattr);
- if (!xattr)
- goto out;
+ gf_uuid_copy(local->loc.gfid, local->stbuf.ia_gfid);
- ret = dict_get_ptr (xattr, "trusted.glusterfs.dht.linkto", &volname);
+ copy = copy_frame(frame);
- if ((-1 == ret) || !volname)
- goto out;
+ if (!copy)
+ goto out;
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (strcmp (conf->subvolumes[i]->name, (char *)volname) == 0) {
- subvol = conf->subvolumes[i];
- break;
- }
- }
+ copy_local = dht_local_init(copy, &local->loc, NULL, 0);
+
+ if (!copy_local)
+ goto out;
+
+ stbuf = local->stbuf;
+ subvol = local->link_subvol;
+
+ copy->local = copy_local;
+ FRAME_SU_DO(copy, dht_local_t);
+
+ STACK_WIND(copy, dht_linkfile_setattr_cbk, subvol, subvol->fops->setattr,
+ &copy_local->loc, &stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ xattr);
+ ret = 0;
out:
- return subvol;
+ if ((ret < 0) && (copy))
+ DHT_STACK_DESTROY(copy);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return ret;
}
diff --git a/xlators/cluster/dht/src/dht-lock.c b/xlators/cluster/dht/src/dht-lock.c
new file mode 100644
index 00000000000..638821ccee5
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-lock.c
@@ -0,0 +1,1392 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "dht-lock.h"
+
+static char *
+dht_lock_asprintf(dht_lock_t *lock)
+{
+ char *lk_buf = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+
+ if (lock == NULL)
+ goto out;
+
+ uuid_utoa_r(lock->loc.gfid, gfid);
+
+ gf_asprintf(&lk_buf, "%s:%s", lock->xl->name, gfid);
+
+out:
+ return lk_buf;
+}
+
+static void
+dht_log_lk_array(char *name, gf_loglevel_t log_level, dht_lock_t **lk_array,
+ int count)
+{
+ int i = 0;
+ char *lk_buf = NULL;
+
+ if ((lk_array == NULL) || (count == 0))
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ lk_buf = dht_lock_asprintf(lk_array[i]);
+ if (!lk_buf)
+ goto out;
+
+ gf_smsg(name, log_level, 0, DHT_MSG_LK_ARRAY_INFO, "index=%d", i,
+ "lk_buf=%s", lk_buf, NULL);
+ GF_FREE(lk_buf);
+ }
+
+out:
+ return;
+}
+
+static void
+dht_lock_stack_destroy(call_frame_t *lock_frame, dht_lock_type_t lk)
+{
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+
+ if (lk == DHT_INODELK) {
+ local->lock[0].layout.my_layout.locks = NULL;
+ local->lock[0].layout.my_layout.lk_count = 0;
+ } else {
+ local->lock[0].ns.directory_ns.locks = NULL;
+ local->lock[0].ns.directory_ns.lk_count = 0;
+ }
+
+ DHT_STACK_DESTROY(lock_frame);
+ return;
+}
+
+static void
+dht_lock_free(dht_lock_t *lock)
+{
+ if (lock == NULL)
+ goto out;
+
+ loc_wipe(&lock->loc);
+ GF_FREE(lock->domain);
+ GF_FREE(lock->basename);
+ mem_put(lock);
+
+out:
+ return;
+}
+
+static void
+dht_set_lkowner(dht_lock_t **lk_array, int count, gf_lkowner_t *lkowner)
+{
+ int i = 0;
+
+ if (!lk_array || !lkowner)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ lk_array[i]->lk_owner = *lkowner;
+ }
+
+out:
+ return;
+}
+
+static int
+dht_lock_request_cmp(const void *val1, const void *val2)
+{
+ dht_lock_t *lock1 = NULL;
+ dht_lock_t *lock2 = NULL;
+ int ret = -1;
+
+ lock1 = *(dht_lock_t **)val1;
+ lock2 = *(dht_lock_t **)val2;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", lock1, out);
+ GF_VALIDATE_OR_GOTO("dht-locks", lock2, out);
+
+ ret = strcmp(lock1->xl->name, lock2->xl->name);
+
+ if (ret == 0) {
+ ret = gf_uuid_compare(lock1->loc.gfid, lock2->loc.gfid);
+ }
+
+out:
+ return ret;
+}
+
+static int
+dht_lock_order_requests(dht_lock_t **locks, int count)
+{
+ int ret = -1;
+
+ if (!locks || !count)
+ goto out;
+
+ qsort(locks, count, sizeof(*locks), dht_lock_request_cmp);
+ ret = 0;
+
+out:
+ return ret;
+}
+
+void
+dht_lock_array_free(dht_lock_t **lk_array, int count)
+{
+ int i = 0;
+ dht_lock_t *lock = NULL;
+
+ if (lk_array == NULL)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ lock = lk_array[i];
+ lk_array[i] = NULL;
+ dht_lock_free(lock);
+ }
+
+out:
+ return;
+}
+
+int32_t
+dht_lock_count(dht_lock_t **lk_array, int lk_count)
+{
+ int i = 0, locked = 0;
+
+ if ((lk_array == NULL) || (lk_count == 0))
+ goto out;
+
+ for (i = 0; i < lk_count; i++) {
+ if (lk_array[i]->locked)
+ locked++;
+ }
+out:
+ return locked;
+}
+
+static call_frame_t *
+dht_lock_frame(call_frame_t *parent_frame)
+{
+ call_frame_t *lock_frame = NULL;
+
+ lock_frame = copy_frame(parent_frame);
+ if (lock_frame == NULL)
+ goto out;
+
+ set_lk_owner_from_ptr(&lock_frame->root->lk_owner, parent_frame->root);
+
+out:
+ return lock_frame;
+}
+
+dht_lock_t *
+dht_lock_new(xlator_t *this, xlator_t *xl, loc_t *loc, short type,
+ const char *domain, const char *basename,
+ dht_reaction_type_t do_on_failure)
+{
+ dht_conf_t *conf = NULL;
+ dht_lock_t *lock = NULL;
+
+ conf = this->private;
+
+ lock = mem_get0(conf->lock_pool);
+ if (lock == NULL)
+ goto out;
+
+ lock->xl = xl;
+ lock->type = type;
+ lock->do_on_failure = do_on_failure;
+
+ lock->domain = gf_strdup(domain);
+ if (lock->domain == NULL) {
+ dht_lock_free(lock);
+ lock = NULL;
+ goto out;
+ }
+
+ if (basename) {
+ lock->basename = gf_strdup(basename);
+ if (lock->basename == NULL) {
+ dht_lock_free(lock);
+ lock = NULL;
+ goto out;
+ }
+ }
+
+ /* Fill only inode and gfid.
+ posix and protocol/server give preference to pargfid/basename over
+ gfid/inode for resolution if all the three parameters of loc_t are
+ present. I want to avoid the following hypothetical situation:
+
+ 1. rebalance did a lookup on a dentry and got a gfid.
+ 2. rebalance acquires lock on loc_t which was filled with gfid and
+ path (pargfid/bname) from step 1.
+ 3. somebody deleted and recreated the same file
+ 4. rename on the same path acquires lock on loc_t which now points
+ to a different inode (and hence gets the lock).
+ 5. rebalance continues to migrate file (note that not all fops done
+ by rebalance during migration are inode/gfid based Eg., unlink)
+ 6. rename continues.
+ */
+ lock->loc.inode = inode_ref(loc->inode);
+ loc_gfid(loc, lock->loc.gfid);
+
+out:
+ return lock;
+}
+
+static int
+dht_local_entrylk_init(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_entrylk_cbk_t entrylk_cbk)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local == NULL) {
+ local = dht_local_init(frame, NULL, NULL, 0);
+ }
+
+ if (local == NULL) {
+ goto out;
+ }
+
+ local->lock[0].ns.directory_ns.entrylk_cbk = entrylk_cbk;
+ local->lock[0].ns.directory_ns.locks = lk_array;
+ local->lock[0].ns.directory_ns.lk_count = lk_count;
+
+ ret = dht_lock_order_requests(local->lock[0].ns.directory_ns.locks,
+ local->lock[0].ns.directory_ns.lk_count);
+ if (ret < 0)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static void
+dht_entrylk_done(call_frame_t *lock_frame)
+{
+ fop_entrylk_cbk_t entrylk_cbk = NULL;
+ call_frame_t *main_frame = NULL;
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+ main_frame = local->main_frame;
+
+ local->lock[0].ns.directory_ns.locks = NULL;
+ local->lock[0].ns.directory_ns.lk_count = 0;
+
+ entrylk_cbk = local->lock[0].ns.directory_ns.entrylk_cbk;
+ local->lock[0].ns.directory_ns.entrylk_cbk = NULL;
+
+ entrylk_cbk(main_frame, NULL, main_frame->this,
+ local->lock[0].ns.directory_ns.op_ret,
+ local->lock[0].ns.directory_ns.op_errno, NULL);
+
+ dht_lock_stack_destroy(lock_frame, DHT_ENTRYLK);
+ return;
+}
+
+static int32_t
+dht_unlock_entrylk_done(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ gf_uuid_unparse(local->lock[0].ns.directory_ns.locks[0]->loc.inode->gfid,
+ gfid);
+
+ if (op_ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UNLOCK_GFID_FAILED, "gfid=%s", gfid,
+ "DHT_LAYOUT_HEAL_DOMAIN", NULL);
+ }
+
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
+
+static int32_t
+dht_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int lk_index = 0, call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ lk_index = (long)cookie;
+
+ local = frame->local;
+
+ uuid_utoa_r(local->lock[0].ns.directory_ns.locks[lk_index]->loc.gfid, gfid);
+
+ if (op_ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
+ "name=%s",
+ local->lock[0].ns.directory_ns.locks[lk_index]->xl->name,
+ "gfid=%s", gfid, NULL);
+ } else {
+ local->lock[0].ns.directory_ns.locks[lk_index]->locked = 0;
+ }
+
+ call_cnt = dht_frame_return(frame);
+ if (is_last_call(call_cnt)) {
+ dht_entrylk_done(frame);
+ }
+
+ return 0;
+}
+
+static int32_t
+dht_unlock_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_entrylk_cbk_t entrylk_cbk)
+{
+ dht_local_t *local = NULL;
+ int ret = -1, i = 0;
+ call_frame_t *lock_frame = NULL;
+ int call_cnt = 0;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, done);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, done);
+ GF_VALIDATE_OR_GOTO(frame->this->name, entrylk_cbk, done);
+
+ call_cnt = dht_lock_count(lk_array, lk_count);
+ if (call_cnt == 0) {
+ ret = 0;
+ goto done;
+ }
+
+ lock_frame = dht_lock_frame(frame);
+ if (lock_frame == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ NULL);
+
+ dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
+ goto done;
+ }
+
+ ret = dht_local_entrylk_init(lock_frame, lk_array, lk_count, entrylk_cbk);
+ if (ret < 0) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ NULL);
+
+ dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
+
+ goto done;
+ }
+
+ local = lock_frame->local;
+ local->main_frame = frame;
+ local->call_cnt = call_cnt;
+
+ for (i = 0; i < local->lock[0].ns.directory_ns.lk_count; i++) {
+ if (!local->lock[0].ns.directory_ns.locks[i]->locked)
+ continue;
+
+ lock_frame->root
+ ->lk_owner = local->lock[0].ns.directory_ns.locks[i]->lk_owner;
+ STACK_WIND_COOKIE(
+ lock_frame, dht_unlock_entrylk_cbk, (void *)(long)i,
+ local->lock[0].ns.directory_ns.locks[i]->xl,
+ local->lock[0].ns.directory_ns.locks[i]->xl->fops->entrylk,
+ local->lock[0].ns.directory_ns.locks[i]->domain,
+ &local->lock[0].ns.directory_ns.locks[i]->loc,
+ local->lock[0].ns.directory_ns.locks[i]->basename, ENTRYLK_UNLOCK,
+ ENTRYLK_WRLCK, NULL);
+ if (!--call_cnt)
+ break;
+ }
+
+ return 0;
+
+done:
+ if (lock_frame)
+ dht_lock_stack_destroy(lock_frame, DHT_ENTRYLK);
+
+ /* no locks acquired, invoke entrylk_cbk */
+ if (ret == 0)
+ entrylk_cbk(frame, NULL, frame->this, 0, 0, NULL);
+
+ return ret;
+}
+
+int32_t
+dht_unlock_entrylk_wrapper(call_frame_t *frame, dht_elock_wrap_t *entrylk)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+
+ local = frame->local;
+
+ if (!entrylk || !entrylk->locks)
+ goto out;
+
+ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
+
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_COPY_FRAME_FAILED, "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
+ goto done;
+ }
+
+ lock_local = dht_local_init(lock_frame, NULL, NULL, 0);
+ if (lock_local == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_CREATE_FAILED, "local", "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
+ goto done;
+ }
+
+ lock_frame->local = lock_local;
+
+ lock_local->lock[0].ns.directory_ns.locks = entrylk->locks;
+ lock_local->lock[0].ns.directory_ns.lk_count = entrylk->lk_count;
+ entrylk->locks = NULL;
+ entrylk->lk_count = 0;
+
+ ret = dht_unlock_entrylk(
+ lock_frame, lock_local->lock[0].ns.directory_ns.locks,
+ lock_local->lock[0].ns.directory_ns.lk_count, dht_unlock_entrylk_done);
+ if (ret)
+ goto done;
+
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+
+out:
+ return 0;
+}
+
+static int
+dht_entrylk_cleanup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_entrylk_done(frame);
+ return 0;
+}
+
+static void
+dht_entrylk_cleanup(call_frame_t *lock_frame)
+{
+ dht_lock_t **lk_array = NULL;
+ int lk_count = 0, lk_acquired = 0;
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+
+ lk_array = local->lock[0].ns.directory_ns.locks;
+ lk_count = local->lock[0].ns.directory_ns.lk_count;
+
+ lk_acquired = dht_lock_count(lk_array, lk_count);
+ if (lk_acquired != 0) {
+ dht_unlock_entrylk(lock_frame, lk_array, lk_count,
+ dht_entrylk_cleanup_cbk);
+ } else {
+ dht_entrylk_done(lock_frame);
+ }
+
+ return;
+}
+
+static int32_t
+dht_blocking_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int lk_index = 0;
+ int i = 0;
+ dht_local_t *local = NULL;
+
+ lk_index = (long)cookie;
+
+ local = frame->local;
+ if (op_ret == 0) {
+ local->lock[0].ns.directory_ns.locks[lk_index]->locked = _gf_true;
+ } else {
+ switch (op_errno) {
+ case ESTALE:
+ case ENOENT:
+ if (local->lock[0]
+ .ns.directory_ns.locks[lk_index]
+ ->do_on_failure != IGNORE_ENOENT_ESTALE) {
+ local->lock[0].ns.directory_ns.op_ret = -1;
+ local->lock[0].ns.directory_ns.op_errno = op_errno;
+ goto cleanup;
+ }
+ break;
+ default:
+ local->lock[0].ns.directory_ns.op_ret = -1;
+ local->lock[0].ns.directory_ns.op_errno = op_errno;
+ goto cleanup;
+ }
+ }
+
+ if (lk_index == (local->lock[0].ns.directory_ns.lk_count - 1)) {
+ for (i = 0; (i < local->lock[0].ns.directory_ns.lk_count) &&
+ (!local->lock[0].ns.directory_ns.locks[i]->locked);
+ i++)
+ ;
+
+ if (i == local->lock[0].ns.directory_ns.lk_count) {
+ local->lock[0].ns.directory_ns.op_ret = -1;
+ local->lock[0].ns.directory_ns.op_errno = op_errno;
+ }
+
+ dht_entrylk_done(frame);
+ } else {
+ dht_blocking_entrylk_rec(frame, ++lk_index);
+ }
+
+ return 0;
+
+cleanup:
+ dht_entrylk_cleanup(frame);
+
+ return 0;
+}
+
+void
+dht_blocking_entrylk_rec(call_frame_t *frame, int i)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ STACK_WIND_COOKIE(
+ frame, dht_blocking_entrylk_cbk, (void *)(long)i,
+ local->lock[0].ns.directory_ns.locks[i]->xl,
+ local->lock[0].ns.directory_ns.locks[i]->xl->fops->entrylk,
+ local->lock[0].ns.directory_ns.locks[i]->domain,
+ &local->lock[0].ns.directory_ns.locks[i]->loc,
+ local->lock[0].ns.directory_ns.locks[i]->basename, ENTRYLK_LOCK,
+ ENTRYLK_WRLCK, NULL);
+
+ return;
+}
+
+int
+dht_blocking_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_entrylk_cbk_t entrylk_cbk)
+{
+ int ret = -1;
+ call_frame_t *lock_frame = NULL;
+ dht_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, entrylk_cbk, out);
+
+ lock_frame = dht_lock_frame(frame);
+ if (lock_frame == NULL)
+ goto out;
+
+ ret = dht_local_entrylk_init(lock_frame, lk_array, lk_count, entrylk_cbk);
+ if (ret < 0) {
+ goto out;
+ }
+
+ dht_set_lkowner(lk_array, lk_count, &lock_frame->root->lk_owner);
+
+ local = lock_frame->local;
+ local->main_frame = frame;
+
+ dht_blocking_entrylk_rec(lock_frame, 0);
+
+ return 0;
+out:
+ if (lock_frame)
+ dht_lock_stack_destroy(lock_frame, DHT_ENTRYLK);
+
+ return -1;
+}
+
+static int
+dht_local_inodelk_init(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t inodelk_cbk)
+{
+ int ret = -1;
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local == NULL) {
+ local = dht_local_init(frame, NULL, NULL, 0);
+ }
+
+ if (local == NULL) {
+ goto out;
+ }
+
+ local->lock[0].layout.my_layout.inodelk_cbk = inodelk_cbk;
+ local->lock[0].layout.my_layout.locks = lk_array;
+ local->lock[0].layout.my_layout.lk_count = lk_count;
+
+ ret = dht_lock_order_requests(local->lock[0].layout.my_layout.locks,
+ local->lock[0].layout.my_layout.lk_count);
+ if (ret < 0)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static void
+dht_inodelk_done(call_frame_t *lock_frame)
+{
+ fop_inodelk_cbk_t inodelk_cbk = NULL;
+ call_frame_t *main_frame = NULL;
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+ main_frame = local->main_frame;
+
+ local->lock[0].layout.my_layout.locks = NULL;
+ local->lock[0].layout.my_layout.lk_count = 0;
+
+ inodelk_cbk = local->lock[0].layout.my_layout.inodelk_cbk;
+ local->lock[0].layout.my_layout.inodelk_cbk = NULL;
+
+ inodelk_cbk(main_frame, NULL, main_frame->this,
+ local->lock[0].layout.my_layout.op_ret,
+ local->lock[0].layout.my_layout.op_errno, NULL);
+
+ dht_lock_stack_destroy(lock_frame, DHT_INODELK);
+ return;
+}
+
+static int32_t
+dht_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int lk_index = 0, call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ lk_index = (long)cookie;
+
+ local = frame->local;
+ if (op_ret < 0) {
+ uuid_utoa_r(local->lock[0].layout.my_layout.locks[lk_index]->loc.gfid,
+ gfid);
+
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLOCKING_FAILED,
+ "name=%s",
+ local->lock[0].layout.my_layout.locks[lk_index]->xl->name,
+ "gfid=%s", gfid, NULL);
+ } else {
+ local->lock[0].layout.my_layout.locks[lk_index]->locked = 0;
+ }
+
+ call_cnt = dht_frame_return(frame);
+ if (is_last_call(call_cnt)) {
+ dht_inodelk_done(frame);
+ }
+
+ return 0;
+}
+
+static int32_t
+dht_unlock_inodelk_done(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ gf_uuid_unparse(local->lock[0].layout.my_layout.locks[0]->loc.inode->gfid,
+ gfid);
+
+ if (op_ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ DHT_MSG_UNLOCK_GFID_FAILED, "DHT_LAYOUT_HEAL_DOMAIN gfid=%s",
+ gfid, NULL);
+ }
+
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+dht_unlock_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t inodelk_cbk)
+{
+ dht_local_t *local = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+ int ret = -1, i = 0;
+ call_frame_t *lock_frame = NULL;
+ int call_cnt = 0;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, done);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, done);
+ GF_VALIDATE_OR_GOTO(frame->this->name, inodelk_cbk, done);
+
+ call_cnt = dht_lock_count(lk_array, lk_count);
+ if (call_cnt == 0) {
+ ret = 0;
+ goto done;
+ }
+
+ lock_frame = dht_lock_frame(frame);
+ if (lock_frame == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ NULL);
+
+ dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
+ goto done;
+ }
+
+ ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk);
+ if (ret < 0) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ NULL);
+
+ dht_log_lk_array(frame->this->name, GF_LOG_WARNING, lk_array, lk_count);
+
+ goto done;
+ }
+
+ local = lock_frame->local;
+ local->main_frame = frame;
+ local->call_cnt = call_cnt;
+
+ flock.l_type = F_UNLCK;
+
+ for (i = 0; i < local->lock[0].layout.my_layout.lk_count; i++) {
+ if (!local->lock[0].layout.my_layout.locks[i]->locked)
+ continue;
+
+ lock_frame->root
+ ->lk_owner = local->lock[0].layout.my_layout.locks[i]->lk_owner;
+ STACK_WIND_COOKIE(
+ lock_frame, dht_unlock_inodelk_cbk, (void *)(long)i,
+ local->lock[0].layout.my_layout.locks[i]->xl,
+ local->lock[0].layout.my_layout.locks[i]->xl->fops->inodelk,
+ local->lock[0].layout.my_layout.locks[i]->domain,
+ &local->lock[0].layout.my_layout.locks[i]->loc, F_SETLK, &flock,
+ NULL);
+ if (!--call_cnt)
+ break;
+ }
+
+ return 0;
+
+done:
+ if (lock_frame)
+ dht_lock_stack_destroy(lock_frame, DHT_INODELK);
+
+ /* no locks acquired, invoke inodelk_cbk */
+ if (ret == 0)
+ inodelk_cbk(frame, NULL, frame->this, 0, 0, NULL);
+
+ return ret;
+}
+
+int32_t
+dht_unlock_inodelk_wrapper(call_frame_t *frame, dht_ilock_wrap_t *inodelk)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+
+ local = frame->local;
+
+ if (!inodelk || !inodelk->locks)
+ goto out;
+
+ gf_uuid_unparse(local->loc.parent->gfid, pgfid);
+
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_COPY_FRAME_FAILED, "pgfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
+ goto done;
+ }
+
+ lock_local = dht_local_init(lock_frame, NULL, NULL, 0);
+ if (lock_local == NULL) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, ENOMEM,
+ DHT_MSG_CREATE_FAILED, "local", "gfid=%s", pgfid, "name=%s",
+ local->loc.name, "path=%s", local->loc.path, NULL);
+ goto done;
+ }
+
+ lock_frame->local = lock_local;
+
+ lock_local->lock[0].layout.my_layout.locks = inodelk->locks;
+ lock_local->lock[0].layout.my_layout.lk_count = inodelk->lk_count;
+ inodelk->locks = NULL;
+ inodelk->lk_count = 0;
+
+ ret = dht_unlock_inodelk(
+ lock_frame, lock_local->lock[0].layout.my_layout.locks,
+ lock_local->lock[0].layout.my_layout.lk_count, dht_unlock_inodelk_done);
+
+ if (ret)
+ goto done;
+
+ lock_frame = NULL;
+
+done:
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+out:
+ return 0;
+}
+
+static int
+dht_inodelk_cleanup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_inodelk_done(frame);
+ return 0;
+}
+
+static void
+dht_inodelk_cleanup(call_frame_t *lock_frame)
+{
+ dht_lock_t **lk_array = NULL;
+ int lk_count = 0, lk_acquired = 0;
+ dht_local_t *local = NULL;
+
+ local = lock_frame->local;
+
+ lk_array = local->lock[0].layout.my_layout.locks;
+ lk_count = local->lock[0].layout.my_layout.lk_count;
+
+ lk_acquired = dht_lock_count(lk_array, lk_count);
+ if (lk_acquired != 0) {
+ dht_unlock_inodelk(lock_frame, lk_array, lk_count,
+ dht_inodelk_cleanup_cbk);
+ } else {
+ dht_inodelk_done(lock_frame);
+ }
+
+ return;
+}
+
+static int32_t
+dht_nonblocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int lk_index = 0, call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ lk_index = (long)cookie;
+
+ if (op_ret == -1) {
+ local->lock[0].layout.my_layout.op_ret = -1;
+ local->lock[0].layout.my_layout.op_errno = op_errno;
+
+ if (local && local->lock[0].layout.my_layout.locks[lk_index]) {
+ uuid_utoa_r(local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->loc.inode->gfid,
+ gfid);
+
+ gf_msg_debug(
+ this->name, op_errno,
+ "inodelk failed on gfid: %s "
+ "subvolume: %s",
+ gfid,
+ local->lock[0].layout.my_layout.locks[lk_index]->xl->name);
+ }
+
+ goto out;
+ }
+
+ local->lock[0].layout.my_layout.locks[lk_index]->locked = _gf_true;
+
+out:
+ call_cnt = dht_frame_return(frame);
+ if (is_last_call(call_cnt)) {
+ if (local->lock[0].layout.my_layout.op_ret < 0) {
+ dht_inodelk_cleanup(frame);
+ return 0;
+ }
+
+ dht_inodelk_done(frame);
+ }
+
+ return 0;
+}
+
+int
+dht_nonblocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array,
+ int lk_count, fop_inodelk_cbk_t inodelk_cbk)
+{
+ struct gf_flock flock = {
+ 0,
+ };
+ int i = 0, ret = 0;
+ dht_local_t *local = NULL;
+ call_frame_t *lock_frame = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, inodelk_cbk, out);
+
+ lock_frame = dht_lock_frame(frame);
+ if (lock_frame == NULL)
+ goto out;
+
+ ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk);
+ if (ret < 0) {
+ goto out;
+ }
+
+ dht_set_lkowner(lk_array, lk_count, &lock_frame->root->lk_owner);
+
+ local = lock_frame->local;
+ local->main_frame = frame;
+
+ local->call_cnt = lk_count;
+
+ for (i = 0; i < lk_count; i++) {
+ flock.l_type = local->lock[0].layout.my_layout.locks[i]->type;
+
+ STACK_WIND_COOKIE(
+ lock_frame, dht_nonblocking_inodelk_cbk, (void *)(long)i,
+ local->lock[0].layout.my_layout.locks[i]->xl,
+ local->lock[0].layout.my_layout.locks[i]->xl->fops->inodelk,
+ local->lock[0].layout.my_layout.locks[i]->domain,
+ &local->lock[0].layout.my_layout.locks[i]->loc, F_SETLK, &flock,
+ NULL);
+ }
+
+ return 0;
+
+out:
+ if (lock_frame)
+ dht_lock_stack_destroy(lock_frame, DHT_INODELK);
+
+ return -1;
+}
+
+static int32_t
+dht_blocking_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int lk_index = 0;
+ int i = 0;
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+ dht_reaction_type_t reaction = 0;
+
+ lk_index = (long)cookie;
+
+ local = frame->local;
+ if (op_ret == 0) {
+ local->lock[0].layout.my_layout.locks[lk_index]->locked = _gf_true;
+ } else {
+ switch (op_errno) {
+ case ESTALE:
+ case ENOENT:
+ reaction = local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->do_on_failure;
+ if ((reaction != IGNORE_ENOENT_ESTALE) &&
+ (reaction != IGNORE_ENOENT_ESTALE_EIO)) {
+ gf_uuid_unparse(local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->loc.gfid,
+ gfid);
+ local->lock[0].layout.my_layout.op_ret = -1;
+ local->lock[0].layout.my_layout.op_errno = op_errno;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_INODELK_FAILED, "subvol=%s",
+ local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->xl->name,
+ "gfid=%s", gfid, NULL);
+ goto cleanup;
+ }
+ break;
+ case EIO:
+ reaction = local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->do_on_failure;
+ if (reaction != IGNORE_ENOENT_ESTALE_EIO) {
+ gf_uuid_unparse(local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->loc.gfid,
+ gfid);
+ local->lock[0].layout.my_layout.op_ret = -1;
+ local->lock[0].layout.my_layout.op_errno = op_errno;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_INODELK_FAILED, "subvol=%s",
+ local->lock[0]
+ .layout.my_layout.locks[lk_index]
+ ->xl->name,
+ "gfid=%s", gfid, NULL);
+ goto cleanup;
+ }
+ break;
+
+ default:
+ gf_uuid_unparse(
+ local->lock[0].layout.my_layout.locks[lk_index]->loc.gfid,
+ gfid);
+ local->lock[0].layout.my_layout.op_ret = -1;
+ local->lock[0].layout.my_layout.op_errno = op_errno;
+ gf_smsg(
+ this->name, GF_LOG_ERROR, op_errno, DHT_MSG_INODELK_FAILED,
+ "subvol=%s",
+ local->lock[0].layout.my_layout.locks[lk_index]->xl->name,
+ "gfid=%s", gfid, NULL);
+ goto cleanup;
+ }
+ }
+
+ if (lk_index == (local->lock[0].layout.my_layout.lk_count - 1)) {
+ for (i = 0; (i < local->lock[0].layout.my_layout.lk_count) &&
+ (!local->lock[0].layout.my_layout.locks[i]->locked);
+ i++)
+ ;
+
+ if (i == local->lock[0].layout.my_layout.lk_count) {
+ local->lock[0].layout.my_layout.op_ret = -1;
+ local->lock[0].layout.my_layout.op_errno = op_errno;
+ }
+
+ dht_inodelk_done(frame);
+ } else {
+ dht_blocking_inodelk_rec(frame, ++lk_index);
+ }
+
+ return 0;
+
+cleanup:
+ dht_inodelk_cleanup(frame);
+
+ return 0;
+}
+
+void
+dht_blocking_inodelk_rec(call_frame_t *frame, int i)
+{
+ dht_local_t *local = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+
+ local = frame->local;
+
+ flock.l_type = local->lock[0].layout.my_layout.locks[i]->type;
+
+ STACK_WIND_COOKIE(
+ frame, dht_blocking_inodelk_cbk, (void *)(long)i,
+ local->lock[0].layout.my_layout.locks[i]->xl,
+ local->lock[0].layout.my_layout.locks[i]->xl->fops->inodelk,
+ local->lock[0].layout.my_layout.locks[i]->domain,
+ &local->lock[0].layout.my_layout.locks[i]->loc, F_SETLKW, &flock, NULL);
+
+ return;
+}
+
+int
+dht_blocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t inodelk_cbk)
+{
+ int ret = -1;
+ call_frame_t *lock_frame = NULL;
+ dht_local_t *local = NULL;
+ dht_local_t *tmp_local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lk_array, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, inodelk_cbk, out);
+
+ tmp_local = frame->local;
+
+ lock_frame = dht_lock_frame(frame);
+ if (lock_frame == NULL) {
+ gf_uuid_unparse(tmp_local->loc.gfid, gfid);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCK_FRAME_FAILED,
+ "gfid=%s", gfid, "path=%s", tmp_local->loc.path, NULL);
+ goto out;
+ }
+
+ ret = dht_local_inodelk_init(lock_frame, lk_array, lk_count, inodelk_cbk);
+ if (ret < 0) {
+ gf_uuid_unparse(tmp_local->loc.gfid, gfid);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_LOCAL_LOCK_INIT_FAILED,
+ "gfid=%s", gfid, "path=%s", tmp_local->loc.path, NULL);
+ goto out;
+ }
+
+ dht_set_lkowner(lk_array, lk_count, &lock_frame->root->lk_owner);
+
+ local = lock_frame->local;
+ local->main_frame = frame;
+
+ dht_blocking_inodelk_rec(lock_frame, 0);
+
+ return 0;
+out:
+ if (lock_frame)
+ dht_lock_stack_destroy(lock_frame, DHT_INODELK);
+
+ return -1;
+}
+
+void
+dht_unlock_namespace(call_frame_t *frame, dht_dir_transaction_t *lock)
+{
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, lock, out);
+
+ dht_unlock_entrylk_wrapper(frame, &lock->ns.directory_ns);
+ dht_unlock_inodelk_wrapper(frame, &lock->ns.parent_layout);
+
+out:
+ return;
+}
+
+static int32_t
+dht_protect_namespace_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ if (op_ret != 0)
+ dht_unlock_inodelk_wrapper(frame, &local->current->ns.parent_layout);
+
+ local->current->ns.ns_cbk(frame, cookie, this, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+dht_blocking_entrylk_after_inodelk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int ret = -1;
+ loc_t *loc = NULL;
+ dht_lock_t **lk_array = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int count = 0;
+ dht_elock_wrap_t *entrylk = NULL;
+
+ local = frame->local;
+ entrylk = &local->current->ns.directory_ns;
+
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ loc = &entrylk->locks[0]->loc;
+ gf_uuid_unparse(loc->gfid, pgfid);
+
+ local->op_ret = 0;
+ lk_array = entrylk->locks;
+ count = entrylk->lk_count;
+
+ ret = dht_blocking_entrylk(frame, lk_array, count,
+ dht_protect_namespace_cbk);
+
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_ENTRYLK_FAILED_AFT_INODELK, "fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "basename=%s",
+ entrylk->locks[0]->basename, NULL);
+ goto err;
+ }
+
+ return 0;
+
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free(lk_array, count);
+ GF_FREE(lk_array);
+ entrylk->locks = NULL;
+ entrylk->lk_count = 0;
+ }
+
+ /* Unlock inodelk. No harm calling unlock twice */
+ dht_unlock_inodelk_wrapper(frame, &local->current->ns.parent_layout);
+ /* Call ns_cbk. It will take care of unwinding */
+ local->current->ns.ns_cbk(frame, NULL, this, local->op_ret, local->op_errno,
+ NULL);
+ return 0;
+}
+
+/* Given the loc and the subvol, this routine takes the inodelk on
+ * the parent inode and entrylk on (parent, loc->name). This routine
+ * is specific as it supports only one subvol on which it takes inodelk
+ * and then entrylk serially.
+ */
+int
+dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
+ struct dht_namespace *ns, fop_entrylk_cbk_t ns_cbk)
+{
+ dht_ilock_wrap_t *inodelk = NULL;
+ dht_elock_wrap_t *entrylk = NULL;
+ dht_lock_t **lk_array = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ loc_t parent = {
+ 0,
+ };
+ int ret = -1;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ int32_t op_errno = 0;
+ int count = 1;
+
+ GF_VALIDATE_OR_GOTO("dht-locks", frame, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, loc->parent, out);
+ GF_VALIDATE_OR_GOTO(frame->this->name, subvol, out);
+
+ local = frame->local;
+ this = frame->this;
+
+ inodelk = &ns->parent_layout;
+ entrylk = &ns->directory_ns;
+
+ /* Initialize entrylk_cbk and parent loc */
+ ns->ns_cbk = ns_cbk;
+
+ ret = dht_build_parent_loc(this, &parent, loc, &op_errno);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, DHT_MSG_LOC_FAILED,
+ "gfid=%s", loc->gfid, "name=%s", loc->name, "path=%s",
+ loc->path, NULL);
+ goto out;
+ }
+ gf_uuid_unparse(parent.gfid, pgfid);
+
+ /* Alloc inodelk */
+ inodelk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
+ if (inodelk->locks == NULL) {
+ local->op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_CALLOC_FAILED, "fop=%s", gf_fop_list[local->fop],
+ "pgfid=%s", pgfid, "name=%s", loc->name, "path=%s", loc->path,
+ NULL);
+ goto out;
+ }
+
+ inodelk->locks[0] = dht_lock_new(this, subvol, &parent, F_RDLCK,
+ DHT_LAYOUT_HEAL_DOMAIN, NULL,
+ FAIL_ON_ANY_ERROR);
+ if (inodelk->locks[0] == NULL) {
+ local->op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_LOCK_ALLOC_FAILED, "inodelk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
+ goto err;
+ }
+ inodelk->lk_count = count;
+
+ /* Allock entrylk */
+ entrylk->locks = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
+ if (entrylk->locks == NULL) {
+ local->op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_CALLOC_FAILED, "entrylk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
+
+ goto err;
+ }
+
+ entrylk->locks[0] = dht_lock_new(this, subvol, &parent, F_WRLCK,
+ DHT_ENTRY_SYNC_DOMAIN, loc->name,
+ FAIL_ON_ANY_ERROR);
+ if (entrylk->locks[0] == NULL) {
+ local->op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_LOCK_ALLOC_FAILED, "entrylk-fop=%s",
+ gf_fop_list[local->fop], "pgfid=%s", pgfid, "name=%s",
+ loc->name, "path=%s", loc->path, NULL);
+
+ goto err;
+ }
+ entrylk->lk_count = count;
+
+ /* Take read inodelk on parent. If it is successful, take write entrylk
+ * on name in cbk.
+ */
+ lk_array = inodelk->locks;
+ ret = dht_blocking_inodelk(frame, lk_array, count,
+ dht_blocking_entrylk_after_inodelk);
+ if (ret < 0) {
+ local->op_errno = EIO;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_BLOCK_INODELK_FAILED, "fop=%s", gf_fop_list[local->fop],
+ "pgfid=%s", pgfid, "name=%s", loc->name, "path=%s", loc->path,
+ NULL);
+
+ goto err;
+ }
+
+ loc_wipe(&parent);
+
+ return 0;
+err:
+ if (entrylk->locks != NULL) {
+ dht_lock_array_free(entrylk->locks, count);
+ GF_FREE(entrylk->locks);
+ entrylk->locks = NULL;
+ entrylk->lk_count = 0;
+ }
+
+ if (inodelk->locks != NULL) {
+ dht_lock_array_free(inodelk->locks, count);
+ GF_FREE(inodelk->locks);
+ inodelk->locks = NULL;
+ inodelk->lk_count = 0;
+ }
+
+ loc_wipe(&parent);
+out:
+ return -1;
+}
diff --git a/xlators/cluster/dht/src/dht-lock.h b/xlators/cluster/dht/src/dht-lock.h
new file mode 100644
index 00000000000..6485c03fb6e
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-lock.h
@@ -0,0 +1,91 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _DHT_LOCK_H
+#define _DHT_LOCK_H
+
+#include "dht-common.h"
+
+void
+dht_lock_array_free(dht_lock_t **lk_array, int count);
+
+int32_t
+dht_lock_count(dht_lock_t **lk_array, int lk_count);
+
+dht_lock_t *
+dht_lock_new(xlator_t *this, xlator_t *xl, loc_t *loc, short type,
+ const char *domain, const char *basename,
+ dht_reaction_type_t do_on_failure);
+
+int32_t
+dht_unlock_entrylk_wrapper(call_frame_t *, dht_elock_wrap_t *);
+
+void
+dht_blocking_entrylk_rec(call_frame_t *frame, int i);
+
+int
+dht_blocking_entrylk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t entrylk_cbk);
+
+int32_t
+dht_unlock_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t inodelk_cbk);
+
+int32_t
+dht_unlock_inodelk_wrapper(call_frame_t *, dht_ilock_wrap_t *);
+
+/* Acquire non-blocking inodelk on a list of xlators.
+ *
+ * @lk_array: array of lock requests lock on.
+ *
+ * @lk_count: number of locks in @lk_array
+ *
+ * @inodelk_cbk: will be called after inodelk replies are received
+ *
+ * @retval: -1 if stack_winding inodelk fails. 0 otherwise.
+ * inodelk_cbk is called with appropriate error on errors.
+ * On failure to acquire lock on all members of list, successful
+ * locks are unlocked before invoking cbk.
+ */
+
+int
+dht_nonblocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array,
+ int lk_count, fop_inodelk_cbk_t inodelk_cbk);
+
+void
+dht_blocking_inodelk_rec(call_frame_t *frame, int i);
+
+/* same as dht_nonblocking_inodelk, but issues sequential blocking locks on
+ * @lk_array directly. locks are issued on some order which remains same
+ * for a list of xlators (irrespective of order of xlators within list).
+ */
+
+int
+dht_blocking_inodelk(call_frame_t *frame, dht_lock_t **lk_array, int lk_count,
+ fop_inodelk_cbk_t inodelk_cbk);
+
+int32_t
+dht_blocking_entrylk_after_inodelk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+int32_t
+dht_blocking_entrylk_after_inodelk_rename(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata);
+
+void
+dht_unlock_namespace(call_frame_t *, dht_dir_transaction_t *);
+
+int
+dht_protect_namespace(call_frame_t *frame, loc_t *loc, xlator_t *subvol,
+ struct dht_namespace *ns, fop_entrylk_cbk_t ns_cbk);
+
+#endif /* _DHT_LOCK_H */
diff --git a/xlators/cluster/dht/src/dht-mem-types.h b/xlators/cluster/dht/src/dht-mem-types.h
index 4f3a90e914a..e3c4471334a 100644
--- a/xlators/cluster/dht/src/dht-mem-types.h
+++ b/xlators/cluster/dht/src/dht-mem-types.h
@@ -8,26 +8,31 @@
cases as published by the Free Software Foundation.
*/
-
#ifndef __DHT_MEM_TYPES_H__
#define __DHT_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_dht_mem_types_ {
- gf_dht_mt_dht_du_t = gf_common_mt_end + 1,
- gf_dht_mt_dht_conf_t,
- gf_dht_mt_char,
- gf_dht_mt_int32_t,
- gf_dht_mt_xlator_t,
- gf_dht_mt_dht_layout_t,
- gf_switch_mt_dht_conf_t,
- gf_switch_mt_dht_du_t,
- gf_switch_mt_switch_sched_array,
- gf_switch_mt_switch_struct,
- gf_dht_mt_subvol_time,
- gf_dht_mt_loc_t,
- gf_defrag_info_mt,
- gf_dht_mt_end
+ gf_dht_mt_dht_du_t = gf_common_mt_end + 1,
+ gf_dht_mt_dht_conf_t,
+ gf_dht_mt_char,
+ gf_dht_mt_int32_t,
+ gf_dht_mt_xlator_t,
+ gf_dht_mt_dht_layout_t,
+ gf_switch_mt_switch_sched_array,
+ gf_switch_mt_switch_struct,
+ gf_dht_mt_subvol_time,
+ gf_dht_mt_loc_t,
+ gf_defrag_info_mt,
+ gf_dht_mt_inode_ctx_t,
+ gf_dht_mt_dirent_t,
+ gf_dht_mt_container_t,
+ gf_dht_mt_octx_t,
+ gf_dht_mt_miginfo_t,
+ gf_dht_mt_fd_ctx_t,
+ gf_dht_ret_cache_t,
+ gf_dht_nodeuuids_t,
+ gf_dht_mt_end
};
#endif
diff --git a/xlators/cluster/dht/src/dht-messages.h b/xlators/cluster/dht/src/dht-messages.h
new file mode 100644
index 00000000000..601f8dad78b
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-messages.h
@@ -0,0 +1,386 @@
+/*Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _DHT_MESSAGES_H_
+#define _DHT_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(
+ DHT, DHT_MSG_CACHED_SUBVOL_GET_FAILED, DHT_MSG_CREATE_LINK_FAILED,
+ DHT_MSG_DICT_SET_FAILED, DHT_MSG_DIR_ATTR_HEAL_FAILED,
+ DHT_MSG_DIR_SELFHEAL_FAILED, DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ DHT_MSG_FILE_ON_MULT_SUBVOL, DHT_MSG_FILE_TYPE_MISMATCH,
+ DHT_MSG_GFID_MISMATCH, DHT_MSG_GFID_NULL, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ DHT_MSG_INIT_FAILED, DHT_MSG_INVALID_CONFIGURATION,
+ DHT_MSG_INVALID_DISK_LAYOUT, DHT_MSG_INVALID_OPTION,
+ DHT_MSG_LAYOUT_FIX_FAILED, DHT_MSG_LAYOUT_MERGE_FAILED,
+ DHT_MSG_LAYOUT_MISMATCH, DHT_MSG_LAYOUT_NULL, DHT_MSG_MIGRATE_DATA_COMPLETE,
+ DHT_MSG_MIGRATE_DATA_FAILED, DHT_MSG_MIGRATE_FILE_COMPLETE,
+ DHT_MSG_MIGRATE_FILE_FAILED, DHT_MSG_NO_MEMORY, DHT_MSG_OPENDIR_FAILED,
+ DHT_MSG_REBALANCE_FAILED, DHT_MSG_REBALANCE_START_FAILED,
+ DHT_MSG_REBALANCE_STATUS, DHT_MSG_REBALANCE_STOPPED, DHT_MSG_RENAME_FAILED,
+ DHT_MSG_SETATTR_FAILED, DHT_MSG_SUBVOL_INSUFF_INODES,
+ DHT_MSG_SUBVOL_INSUFF_SPACE, DHT_MSG_UNLINK_FAILED,
+ DHT_MSG_LAYOUT_SET_FAILED, DHT_MSG_LOG_FIXED_LAYOUT,
+ DHT_MSG_GET_XATTR_FAILED, DHT_MSG_FILE_LOOKUP_FAILED,
+ DHT_MSG_OPEN_FD_FAILED, DHT_MSG_SET_INODE_CTX_FAILED,
+ DHT_MSG_UNLOCKING_FAILED, DHT_MSG_DISK_LAYOUT_NULL, DHT_MSG_SUBVOL_INFO,
+ DHT_MSG_CHUNK_SIZE_INFO, DHT_MSG_LAYOUT_FORM_FAILED, DHT_MSG_SUBVOL_ERROR,
+ DHT_MSG_LAYOUT_SORT_FAILED, DHT_MSG_REGEX_INFO, DHT_MSG_FOPEN_FAILED,
+ DHT_MSG_SET_HOSTNAME_FAILED, DHT_MSG_BRICK_ERROR, DHT_MSG_SYNCOP_FAILED,
+ DHT_MSG_MIGRATE_INFO, DHT_MSG_SOCKET_ERROR, DHT_MSG_CREATE_FD_FAILED,
+ DHT_MSG_READDIR_ERROR, DHT_MSG_CHILD_LOC_BUILD_FAILED,
+ DHT_MSG_SET_SWITCH_PATTERN_ERROR, DHT_MSG_COMPUTE_HASH_FAILED,
+ DHT_MSG_FIND_LAYOUT_ANOMALIES_ERROR, DHT_MSG_ANOMALIES_INFO,
+ DHT_MSG_LAYOUT_INFO, DHT_MSG_INODE_LK_ERROR, DHT_MSG_RENAME_INFO,
+ DHT_MSG_DATA_NULL, DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED,
+ DHT_MSG_UNLINK_LOOKUP_INFO, DHT_MSG_LINK_FILE_LOOKUP_INFO,
+ DHT_MSG_OPERATION_NOT_SUP, DHT_MSG_NOT_LINK_FILE_ERROR, DHT_MSG_CHILD_DOWN,
+ DHT_MSG_UUID_PARSE_ERROR, DHT_MSG_GET_DISK_INFO_ERROR,
+ DHT_MSG_INVALID_VALUE, DHT_MSG_SWITCH_PATTERN_INFO,
+ DHT_MSG_SUBVOL_OP_FAILED, DHT_MSG_LAYOUT_PRESET_FAILED,
+ DHT_MSG_INVALID_LINKFILE, DHT_MSG_FIX_LAYOUT_INFO,
+ DHT_MSG_GET_HOSTNAME_FAILED, DHT_MSG_WRITE_FAILED,
+ DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED, DHT_MSG_FSYNC_FAILED,
+ DHT_MSG_SUBVOL_DECOMMISSION_INFO, DHT_MSG_BRICK_QUERY_FAILED,
+ DHT_MSG_SUBVOL_NO_LAYOUT_INFO, DHT_MSG_OPEN_FD_ON_DST_FAILED,
+ DHT_MSG_SUBVOL_NOT_FOUND, DHT_MSG_FILE_LOOKUP_ON_DST_FAILED,
+ DHT_MSG_DISK_LAYOUT_MISSING, DHT_MSG_DICT_GET_FAILED,
+ DHT_MSG_REVALIDATE_CBK_INFO, DHT_MSG_UPGRADE_BRICKS, DHT_MSG_LK_ARRAY_INFO,
+ DHT_MSG_RENAME_NOT_LOCAL, DHT_MSG_RECONFIGURE_INFO,
+ DHT_MSG_INIT_LOCAL_SUBVOL_FAILED, DHT_MSG_SYS_CALL_GET_TIME_FAILED,
+ DHT_MSG_NO_DISK_USAGE_STATUS, DHT_MSG_SUBVOL_DOWN_ERROR,
+ DHT_MSG_REBAL_THROTTLE_INFO, DHT_MSG_COMMIT_HASH_INFO,
+ DHT_MSG_REBAL_STRUCT_SET, DHT_MSG_HAS_MIGINFO, DHT_MSG_SETTLE_HASH_FAILED,
+ DHT_MSG_DEFRAG_PROCESS_DIR_FAILED, DHT_MSG_FD_CTX_SET_FAILED,
+ DHT_MSG_STALE_LOOKUP, DHT_MSG_PARENT_LAYOUT_CHANGED,
+ DHT_MSG_LOCK_MIGRATION_FAILED, DHT_MSG_LOCK_INODE_UNREF_FAILED,
+ DHT_MSG_ASPRINTF_FAILED, DHT_MSG_DIR_LOOKUP_FAILED, DHT_MSG_INODELK_FAILED,
+ DHT_MSG_LOCK_FRAME_FAILED, DHT_MSG_LOCAL_LOCK_INIT_FAILED,
+ DHT_MSG_ENTRYLK_ERROR, DHT_MSG_INODELK_ERROR, DHT_MSG_LOC_FAILED,
+ DHT_MSG_UNKNOWN_FOP, DHT_MSG_MIGRATE_FILE_SKIPPED,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, DHT_MSG_HASHED_SUBVOL_DOWN,
+ DHT_MSG_NON_HASHED_SUBVOL_DOWN, DHT_MSG_SYNCTASK_CREATE_FAILED,
+ DHT_MSG_DIR_HEAL_ABORT, DHT_MSG_MIGRATE_SKIP, DHT_MSG_FD_CREATE_FAILED,
+ DHT_MSG_DICT_NEW_FAILED, DHT_MSG_FAILED_TO_OPEN, DHT_MSG_CREATE_FAILED,
+ DHT_MSG_FILE_NOT_EXIST, DHT_MSG_CHOWN_FAILED, DHT_MSG_FALLOCATE_FAILED,
+ DHT_MSG_FTRUNCATE_FAILED, DHT_MSG_STATFS_FAILED, DHT_MSG_WRITE_CROSS,
+ DHT_MSG_NEW_TARGET_FOUND, DHT_MSG_INSUFF_MEMORY, DHT_MSG_SET_XATTR_FAILED,
+ DHT_MSG_SET_MODE_FAILED, DHT_MSG_FILE_EXISTS_IN_DEST,
+ DHT_MSG_SYMLINK_FAILED, DHT_MSG_LINKFILE_DEL_FAILED, DHT_MSG_MKNOD_FAILED,
+ DHT_MSG_MIGRATE_CLEANUP_FAILED, DHT_MSG_LOCK_MIGRATE,
+ DHT_MSG_PARENT_BUILD_FAILED, DHT_MSG_HASHED_SUBVOL_NOT_FOUND,
+ DHT_MSG_ACQUIRE_ENTRYLK_FAILED, DHT_MSG_CREATE_DST_FAILED,
+ DHT_MSG_MIGRATION_EXIT, DHT_MSG_CHANGED_DST, DHT_MSG_TRACE_FAILED,
+ DHT_MSG_WRITE_LOCK_FAILED, DHT_MSG_GETACTIVELK_FAILED, DHT_MSG_STAT_FAILED,
+ DHT_MSG_UNLINK_PERFORM_FAILED, DHT_MSG_CLANUP_SOURCE_FILE_FAILED,
+ DHT_MSG_UNLOCK_FILE_FAILED, DHT_MSG_REMOVE_XATTR_FAILED,
+ DHT_MSG_DATA_MIGRATE_ABORT, DHT_MSG_DEFRAG_NULL, DHT_MSG_PARENT_NULL,
+ DHT_MSG_GFID_NOT_PRESENT, DHT_MSG_CHILD_LOC_FAILED,
+ DHT_MSG_SET_LOOKUP_FAILED, DHT_MSG_DIR_REMOVED, DHT_MSG_FIX_NOT_COMP,
+ DHT_MSG_SUBVOL_DETER_FAILED, DHT_MSG_LOCAL_SUBVOL, DHT_MSG_NODE_UUID,
+ DHT_MSG_SIZE_FILE, DHT_MSG_GET_DATA_SIZE_FAILED,
+ DHT_MSG_PTHREAD_JOIN_FAILED, DHT_MSG_COUNTER_THREAD_CREATE_FAILED,
+ DHT_MSG_MIGRATION_INIT_QUEUE_FAILED, DHT_MSG_PAUSED_TIMEOUT, DHT_MSG_WOKE,
+ DHT_MSG_ABORT_REBALANCE, DHT_MSG_CREATE_TASK_REBAL_FAILED,
+ DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL, DHT_MSG_ADD_CHOICES_ERROR,
+ DHT_MSG_GET_CHOICES_ERROR, DHT_MSG_PREPARE_STATUS_ERROR,
+ DHT_MSG_SET_CHOICE_FAILED, DHT_MSG_SET_HASHED_SUBVOL_FAILED,
+ DHT_MSG_XATTR_HEAL_NOT_POSS, DHT_MSG_LINKTO_FILE_FAILED,
+ DHT_MSG_STALE_LINKFILE_DELETE, DHT_MSG_NO_SUBVOL_FOR_LINKTO,
+ DHT_MSG_SUBVOL_RETURNED, DHT_MSG_UNKNOWN_LOCAL_XSEL, DHT_MSG_GET_XATTR_ERR,
+ DHT_MSG_ALLOC_OR_FILL_FAILED, DHT_MSG_GET_REAL_NAME_FAILED,
+ DHT_MSG_COPY_UUID_FAILED, DHT_MSG_MDS_DETER_FAILED,
+ DHT_MSG_CREATE_REBAL_FAILED, DHT_MSG_LINK_LAYOUT_FAILED,
+ DHT_MSG_NO_SUBVOL_IN_LAYOUT, DHT_MSG_MEM_ALLOC_FAILED,
+ DHT_MSG_SET_IN_PARAMS_DICT_FAILED, DHT_MSG_LOC_COPY_FAILED,
+ DHT_MSG_PARENT_LOC_FAILED, DHT_MSG_CREATE_LOCK_FAILED,
+ DHT_MSG_PREV_ATTEMPT_FAILED, DHT_MSG_REFRESH_ATTEMPT,
+ DHT_MSG_ACQUIRE_LOCK_FAILED, DHT_MSG_CREATE_STUB_FAILED,
+ DHT_MSG_WIND_LOCK_REQ_FAILED, DHT_MSG_REFRESH_FAILED,
+ DHT_MSG_CACHED_SUBVOL_ERROR, DHT_MSG_NO_LINK_SUBVOL, DHT_MSG_SET_KEY_FAILED,
+ DHT_MSG_REMOVE_LINKTO_FAILED, DHT_MSG_LAYOUT_DICT_SET_FAILED,
+ DHT_MSG_XATTR_DICT_NULL, DHT_MSG_DUMMY_ALLOC_FAILED, DHT_MSG_DICT_IS_NULL,
+ DHT_MSG_LINK_INODE_FAILED, DHT_MSG_SELFHEAL_FAILED, DHT_MSG_NO_MDS_SUBVOL,
+ DHT_MSG_LIST_XATTRS_FAILED, DHT_MSG_RESET_INTER_XATTR_FAILED,
+ DHT_MSG_MDS_DOWN_UNABLE_TO_SET, DHT_MSG_WIND_UNLOCK_FAILED,
+ DHT_MSG_COMMIT_HASH_FAILED, DHT_MSG_UNLOCK_GFID_FAILED,
+ DHT_MSG_UNLOCK_FOLLOW_ENTRYLK, DHT_MSG_COPY_FRAME_FAILED,
+ DHT_MSG_UNLOCK_FOLLOW_LOCKS, DHT_MSG_ENTRYLK_FAILED_AFT_INODELK,
+ DHT_MSG_CALLOC_FAILED, DHT_MSG_LOCK_ALLOC_FAILED,
+ DHT_MSG_BLOCK_INODELK_FAILED,
+ DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK,
+ DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS,
+ DHT_MSG_DST_NULL_SET_FAILED);
+
+#define DHT_MSG_FD_CTX_SET_FAILED_STR "Failed to set fd ctx"
+#define DHT_MSG_INVALID_VALUE_STR "Different dst found in the fd ctx"
+#define DHT_MSG_UNKNOWN_FOP_STR "Unknown FOP on file"
+#define DHT_MSG_OPEN_FD_ON_DST_FAILED_STR "Failed to open the fd on file"
+#define DHT_MSG_SYNCTASK_CREATE_FAILED_STR "Failed to create synctask"
+#define DHT_MSG_ASPRINTF_FAILED_STR \
+ "asprintf failed while fetching subvol from the id"
+#define DHT_MSG_HAS_MIGINFO_STR "Found miginfo in the inode ctx"
+#define DHT_MSG_FILE_LOOKUP_FAILED_STR "failed to lookup the file"
+#define DHT_MSG_INVALID_LINKFILE_STR \
+ "linkto target is different from cached-subvol. treating as destination " \
+ "subvol"
+#define DHT_MSG_GFID_MISMATCH_STR "gfid different on the target file"
+#define DHT_MSG_GET_XATTR_FAILED_STR "failed to get 'linkto' xattr"
+#define DHT_MSG_SET_INODE_CTX_FAILED_STR "failed to set inode-ctx target file"
+#define DHT_MSG_DIR_SELFHEAL_FAILED_STR "Healing of path failed"
+#define DHT_MSG_DIR_HEAL_ABORT_STR \
+ "Failed to get path from subvol. Aborting directory healing"
+#define DHT_MSG_DIR_XATTR_HEAL_FAILED_STR "xattr heal failed for directory"
+#define DHT_MSG_LOCK_INODE_UNREF_FAILED_STR \
+ "Found a NULL inode. Failed to unref the inode"
+#define DHT_MSG_DICT_SET_FAILED_STR "Failed to set dictionary value"
+#define DHT_MSG_NOT_LINK_FILE_ERROR_STR "got non-linkfile"
+#define DHT_MSG_CREATE_LINK_FAILED_STR "failed to initialize linkfile data"
+#define DHT_MSG_UNLINK_FAILED_STR "Unlinking linkfile on subvolume failed"
+#define DHT_MSG_MIGRATE_FILE_FAILED_STR "Migrate file failed"
+#define DHT_MSG_NO_MEMORY_STR "could not allocate memory for dict"
+#define DHT_MSG_SUBVOL_ERROR_STR "Failed to get linkto subvol"
+#define DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED_STR "link failed on subvol"
+#define DHT_MSG_MIGRATE_FILE_SKIPPED_STR "Migration skipped"
+#define DHT_MSG_FD_CREATE_FAILED_STR "fd create failed"
+#define DHT_MSG_DICT_NEW_FAILED_STR "dict_new failed"
+#define DHT_MSG_FAILED_TO_OPEN_STR "failed to open"
+#define DHT_MSG_CREATE_FAILED_STR "failed to create"
+#define DHT_MSG_FILE_NOT_EXIST_STR "file does not exist"
+#define DHT_MSG_CHOWN_FAILED_STR "chown failed"
+#define DHT_MSG_FALLOCATE_FAILED_STR "fallocate failed"
+#define DHT_MSG_FTRUNCATE_FAILED_STR "ftruncate failed"
+#define DHT_MSG_STATFS_FAILED_STR "failed to get statfs"
+#define DHT_MSG_WRITE_CROSS_STR \
+ "write will cross min-fre-disk for file on subvol. looking for new subvol"
+#define DHT_MSG_SUBVOL_INSUFF_SPACE_STR \
+ "Could not find any subvol with space accommodating the file. Cosider " \
+ "adding bricks"
+#define DHT_MSG_NEW_TARGET_FOUND_STR "New target found for file"
+#define DHT_MSG_INSUFF_MEMORY_STR "insufficient memory"
+#define DHT_MSG_SET_XATTR_FAILED_STR "failed to set xattr"
+#define DHT_MSG_SET_MODE_FAILED_STR "failed to set mode"
+#define DHT_MSG_FILE_EXISTS_IN_DEST_STR "file exists in destination"
+#define DHT_MSG_LINKFILE_DEL_FAILED_STR "failed to delete the linkfile"
+#define DHT_MSG_SYMLINK_FAILED_STR "symlink failed"
+#define DHT_MSG_MKNOD_FAILED_STR "mknod failed"
+#define DHT_MSG_SETATTR_FAILED_STR "failed to perform setattr"
+#define DHT_MSG_MIGRATE_CLEANUP_FAILED_STR \
+ "Migrate file cleanup failed: failed to fstat file"
+#define DHT_MSG_LOCK_MIGRATE_STR "locks will be migrated for file"
+#define DHT_MSG_PARENT_BUILD_FAILED_STR \
+ "failed to build parent loc, which is needed to acquire entrylk to " \
+ "synchronize with renames on this path. Skipping migration"
+#define DHT_MSG_HASHED_SUBVOL_NOT_FOUND_STR \
+ "cannot find hashed subvol which is needed to synchronize with renames " \
+ "on this path. Skipping migration"
+#define DHT_MSG_ACQUIRE_ENTRYLK_FAILED_STR "failed to acquire entrylk on subvol"
+#define DHT_MSG_CREATE_DST_FAILED_STR "create dst failed for file"
+#define DHT_MSG_MIGRATION_EXIT_STR "Exiting migration"
+#define DHT_MSG_CHANGED_DST_STR "destination changed fo file"
+#define DHT_MSG_TRACE_FAILED_STR "Trace failed"
+#define DHT_MSG_WRITE_LOCK_FAILED_STR "write lock failed"
+#define DHT_MSG_GETACTIVELK_FAILED_STR "getactivelk failed for file"
+#define DHT_MSG_STAT_FAILED_STR "failed to do a stat"
+#define DHT_MSG_UNLINK_PERFORM_FAILED_STR "failed to perform unlink"
+#define DHT_MSG_MIGRATE_FILE_COMPLETE_STR "completed migration"
+#define DHT_MSG_CLANUP_SOURCE_FILE_FAILED_STR "failed to cleanup source file"
+#define DHT_MSG_UNLOCK_FILE_FAILED_STR "failed to unlock file"
+#define DHT_MSG_REMOVE_XATTR_FAILED_STR "remove xattr failed"
+#define DHT_MSG_SOCKET_ERROR_STR "Failed to unlink listener socket"
+#define DHT_MSG_HASHED_SUBVOL_GET_FAILED_STR "Failed to get hashed subvolume"
+#define DHT_MSG_CACHED_SUBVOL_GET_FAILED_STR "Failed to get cached subvolume"
+#define DHT_MSG_MIGRATE_DATA_FAILED_STR "migrate-data failed"
+#define DHT_MSG_DEFRAG_NULL_STR "defrag is NULL"
+#define DHT_MSG_DATA_MIGRATE_ABORT_STR \
+ "Readdirp failed. Aborting data migration for dict"
+#define DHT_MSG_LAYOUT_FIX_FAILED_STR "fix layout failed"
+#define DHT_MSG_PARENT_NULL_STR "parent is NULL"
+#define DHT_MSG_GFID_NOT_PRESENT_STR "gfid not present"
+#define DHT_MSG_CHILD_LOC_FAILED_STR "Child loc build failed"
+#define DHT_MSG_SET_LOOKUP_FAILED_STR "Failed to set lookup"
+#define DHT_MSG_DIR_LOOKUP_FAILED_STR "lookup failed"
+#define DHT_MSG_DIR_REMOVED_STR "Dir renamed or removed. Skipping"
+#define DHT_MSG_READDIR_ERROR_STR "readdir failed, Aborting fix-layout"
+#define DHT_MSG_SETTLE_HASH_FAILED_STR "Settle hash failed"
+#define DHT_MSG_DEFRAG_PROCESS_DIR_FAILED_STR "gf_defrag_process_dir failed"
+#define DHT_MSG_FIX_NOT_COMP_STR \
+ "Unable to retrieve fixlayout xattr. Assume background fix layout not " \
+ "complete"
+#define DHT_MSG_SUBVOL_DETER_FAILED_STR \
+ "local subvolume determination failed with error"
+#define DHT_MSG_LOCAL_SUBVOL_STR "local subvol"
+#define DHT_MSG_NODE_UUID_STR "node uuid"
+#define DHT_MSG_SIZE_FILE_STR "Total size files"
+#define DHT_MSG_GET_DATA_SIZE_FAILED_STR \
+ "Failed to get the total data size. Unable to estimate time to complete " \
+ "rebalance"
+#define DHT_MSG_PTHREAD_JOIN_FAILED_STR \
+ "file_counter_thread: pthread_join failed"
+#define DHT_MSG_COUNTER_THREAD_CREATE_FAILED_STR \
+ "Failed to create the file counter thread"
+#define DHT_MSG_MIGRATION_INIT_QUEUE_FAILED_STR \
+ "Failed to initialise migration queue"
+#define DHT_MSG_REBALANCE_STOPPED_STR "Received stop command on rebalance"
+#define DHT_MSG_PAUSED_TIMEOUT_STR "Request pause timer timeout"
+#define DHT_MSG_WOKE_STR "woken"
+#define DHT_MSG_ABORT_REBALANCE_STR "Aborting rebalance"
+#define DHT_MSG_REBALANCE_START_FAILED_STR \
+ "Failed to start rebalance: look up on / failed"
+#define DHT_MSG_CREATE_TASK_REBAL_FAILED_STR \
+ "Could not create task for rebalance"
+#define DHT_MSG_REBAL_ESTIMATE_NOT_AVAIL_STR \
+ "Rebalance estimates will not be available"
+#define DHT_MSG_REBALANCE_STATUS_STR "Rebalance status"
+#define DHT_MSG_DATA_NULL_STR "data value is NULL"
+#define DHT_MSG_ADD_CHOICES_ERROR_STR "Error to add choices in buffer"
+#define DHT_MSG_GET_CHOICES_ERROR_STR "Error to get choices"
+#define DHT_MSG_PREPARE_STATUS_ERROR_STR "Error to prepare status"
+#define DHT_MSG_SET_CHOICE_FAILED_STR "Failed to set full choice"
+#define DHT_MSG_AGGREGATE_QUOTA_XATTR_FAILED_STR \
+ "Failed to aggregate quota xattr"
+#define DHT_MSG_FILE_TYPE_MISMATCH_STR \
+ "path exists as a file on one subvolume and directory on another. Please " \
+ "fix it manually"
+#define DHT_MSG_LAYOUT_SET_FAILED_STR "failed to set layout for subvolume"
+#define DHT_MSG_LAYOUT_MERGE_FAILED_STR "failed to merge layouts for subvolume"
+#define DHT_MSG_SET_HASHED_SUBVOL_FAILED_STR "Failed to set hashed subvolume"
+#define DHT_MSG_XATTR_HEAL_NOT_POSS_STR \
+ "No gfid exists for path. so healing xattr is not possible"
+#define DHT_MSG_REVALIDATE_CBK_INFO_STR "Revalidate: subvolume returned -1"
+#define DHT_MSG_LAYOUT_MISMATCH_STR "Mismatching layouts"
+#define DHT_MSG_UNLINK_LOOKUP_INFO_STR "lookup_unlink retuened"
+#define DHT_MSG_LINKTO_FILE_FAILED_STR \
+ "Could not unlink the linkto file as either fd is open and/or linkto " \
+ "xattr is set"
+#define DHT_MSG_LAYOUT_PRESET_FAILED_STR \
+ "Could not set pre-set layout for subvolume"
+#define DHT_MSG_FILE_ON_MULT_SUBVOL_STR \
+ "multiple subvolumes have file (preferably rename the file in the " \
+ "backend, and do a fresh lookup"
+#define DHT_MSG_STALE_LINKFILE_DELETE_STR \
+ "attempting deletion of stale linkfile"
+#define DHT_MSG_LINK_FILE_LOOKUP_INFO_STR "Lookup on following linkfile"
+#define DHT_MSG_NO_SUBVOL_FOR_LINKTO_STR "No link subvolume for linkto"
+#define DHT_MSG_SUBVOL_RETURNED_STR "Subvolume returned -1"
+#define DHT_MSG_UNKNOWN_LOCAL_XSEL_STR "Unknown local->xsel"
+#define DHT_MSG_DICT_GET_FAILED_STR "Failed to get"
+#define DHT_MSG_UUID_PARSE_ERROR_STR "Failed to parse uuid"
+#define DHT_MSG_GET_XATTR_ERR_STR "getxattr err for dir"
+#define DHT_MSG_ALLOC_OR_FILL_FAILED_STR "alloc or fill failed"
+#define DHT_MSG_UPGRADE_BRICKS_STR \
+ "At least one of the bricks does not support this operation. Please " \
+ "upgrade all bricks"
+#define DHT_MSG_GET_REAL_NAME_FAILED_STR "Failed to get real filename"
+#define DHT_MSG_LAYOUT_NULL_STR "Layout is NULL"
+#define DHT_MSG_COPY_UUID_FAILED_STR "Failed to copy node uuid key"
+#define DHT_MSG_MDS_DETER_FAILED_STR \
+ "Cannot determine MDS, fetching xattr randomly from a subvol"
+#define DHT_MSG_HASHED_SUBVOL_DOWN_STR \
+ "MDS is down for path, so fetching xattr randomly from subvol"
+#define DHT_MSG_CREATE_REBAL_FAILED_STR \
+ "failed to create a new rebalance synctask"
+#define DHT_MSG_FIX_LAYOUT_INFO_STR "fixing the layout"
+#define DHT_MSG_OPERATION_NOT_SUP_STR "wrong directory-spread-count value"
+#define DHT_MSG_LINK_LAYOUT_FAILED_STR "failed to link the layout in inode"
+#define DHT_MSG_NO_SUBVOL_IN_LAYOUT_STR "no subvolume in layout for path"
+#define DHT_MSG_INODE_LK_ERROR_STR "mknod lock failed for file"
+#define DHT_MSG_MEM_ALLOC_FAILED_STR "mem allocation failed"
+#define DHT_MSG_PARENT_LAYOUT_CHANGED_STR \
+ "extracting in-memory layout of parent failed"
+#define DHT_MSG_SET_IN_PARAMS_DICT_FAILED_STR \
+ "setting in params dictionary failed"
+#define DHT_MSG_LOC_COPY_FAILED_STR "loc_copy failed"
+#define DHT_MSG_LOC_FAILED_STR "parent loc build failed"
+#define DHT_MSG_PARENT_LOC_FAILED_STR "locking parent failed"
+#define DHT_MSG_CREATE_LOCK_FAILED_STR "Create lock failed"
+#define DHT_MSG_PREV_ATTEMPT_FAILED_STR \
+ "mkdir loop detected. parent layout didn't change even though previous " \
+ "attempt of mkdir failed because of in-memory layout not matching with " \
+ "that on disk."
+#define DHT_MSG_REFRESH_ATTEMPT_STR \
+ "mkdir parent layout changed. Attempting a refresh and then a retry"
+#define DHT_MSG_ACQUIRE_LOCK_FAILED_STR \
+ "Acquiring lock on parent to guard against layout-change failed"
+#define DHT_MSG_CREATE_STUB_FAILED_STR "creating stub failed"
+#define DHT_MSG_WIND_LOCK_REQ_FAILED_STR \
+ "cannot wind lock request to guard parent layout"
+#define DHT_MSG_REFRESH_FAILED_STR "refreshing parent layout failed."
+#define DHT_MSG_CACHED_SUBVOL_ERROR_STR "On cached subvol"
+#define DHT_MSG_NO_LINK_SUBVOL_STR "Linkfile does not have link subvolume"
+#define DHT_MSG_SET_KEY_FAILED_STR "failed to set key"
+#define DHT_MSG_CHILD_DOWN_STR "Received CHILD_DOWN. Exiting"
+#define DHT_MSG_LOG_FIXED_LAYOUT_STR "log layout fixed"
+#define DHT_MSG_REBAL_STRUCT_SET_STR "local->rebalance already set"
+#define DHT_MSG_REMOVE_LINKTO_FAILED_STR "Removal of linkto failed at subvol"
+#define DHT_MSG_LAYOUT_DICT_SET_FAILED_STR "dht layout dict set failed"
+#define DHT_MSG_SUBVOL_INFO_STR "creating subvolume"
+#define DHT_MSG_COMPUTE_HASH_FAILED_STR "hash computation failed"
+#define DHT_MSG_INVALID_DISK_LAYOUT_STR \
+ "Invalid disk layout: Catastrophic error layout with unknown type found"
+#define DHT_MSG_LAYOUT_SORT_FAILED_STR "layout sort failed"
+#define DHT_MSG_ANOMALIES_INFO_STR "Found anomalies"
+#define DHT_MSG_XATTR_DICT_NULL_STR "xattr dictionary is NULL"
+#define DHT_MSG_DISK_LAYOUT_MISSING_STR "Disk layout missing"
+#define DHT_MSG_LAYOUT_INFO_STR "layout info"
+#define DHT_MSG_SUBVOL_NO_LAYOUT_INFO_STR "no pre-set layout for subvol"
+#define DHT_MSG_SELFHEAL_XATTR_FAILED_STR "layout setxattr failed"
+#define DHT_MSG_DIR_SELFHEAL_XATTR_FAILED_STR "Directory self heal xattr failed"
+#define DHT_MSG_DUMMY_ALLOC_FAILED_STR "failed to allocate dummy layout"
+#define DHT_MSG_DICT_IS_NULL_STR \
+ "dict is NULL, need to make sure gfids are same"
+#define DHT_MSG_ENTRYLK_ERROR_STR "acquiring entrylk after inodelk failed"
+#define DHT_MSG_NO_DISK_USAGE_STATUS_STR "no du stats"
+#define DHT_MSG_LINK_INODE_FAILED_STR "linking inode failed"
+#define DHT_MSG_SELFHEAL_FAILED_STR "Directory selfheal failed"
+#define DHT_MSG_NO_MDS_SUBVOL_STR "No mds subvol"
+#define DHT_MSG_LIST_XATTRS_FAILED_STR "failed to list xattrs"
+#define DHT_MSG_RESET_INTER_XATTR_FAILED_STR "Failed to reset internal xattr"
+#define DHT_MSG_MDS_DOWN_UNABLE_TO_SET_STR \
+ "mds subvol is down, unable to set xattr"
+#define DHT_MSG_DIR_ATTR_HEAL_FAILED_STR \
+ "Directory attr heal failed. Failed to set uid/gid"
+#define DHT_MSG_WIND_UNLOCK_FAILED_STR \
+ "Winding unlock failed: stale locks left on brick"
+#define DHT_MSG_COMMIT_HASH_FAILED_STR "Directory commit hash updaten failed"
+#define DHT_MSG_LK_ARRAY_INFO_STR "lk info"
+#define DHT_MSG_UNLOCK_GFID_FAILED_STR \
+ "unlock failed on gfid: stale lock might be left"
+#define DHT_MSG_UNLOCKING_FAILED_STR "unlocking failed"
+#define DHT_MSG_UNLOCK_FOLLOW_ENTRYLK_STR "not unlocking following entrylks"
+#define DHT_MSG_COPY_FRAME_FAILED_STR "copy frame failed"
+#define DHT_MSG_UNLOCK_FOLLOW_LOCKS_STR "not unlocking following locks"
+#define DHT_MSG_INODELK_FAILED_STR "inodelk failed on subvol"
+#define DHT_MSG_LOCK_FRAME_FAILED_STR "memory allocation failed for lock_frame"
+#define DHT_MSG_LOCAL_LOCK_INIT_FAILED_STR "dht_local_lock_init failed"
+#define DHT_MSG_ENTRYLK_FAILED_AFT_INODELK_STR \
+ "dht_blocking_entrylk failed after taking inodelk"
+#define DHT_MSG_BLOCK_INODELK_FAILED_STR "dht_blocking_inodelk failed"
+#define DHT_MSG_CALLOC_FAILED_STR "calloc failed"
+#define DHT_MSG_LOCK_ALLOC_FAILED_STR "lock allocation failed"
+#define DHT_MSG_ALLOC_FRAME_FAILED_NOT_UNLOCKING_FOLLOWING_ENTRYLKS_STR \
+ "cannot allocate a frame, not unlocking following entrylks"
+#define DHT_MSG_LOCAL_LOCKS_STORE_FAILED_UNLOCKING_FOLLOWING_ENTRYLK_STR \
+ "storing locks in local failed, not unlocking following entrylks"
+#define DHT_MSG_DST_NULL_SET_FAILED_STR \
+ "src or dst is NULL, Failed to set dictionary value"
+
+#endif /* _DHT_MESSAGES_H_ */
diff --git a/xlators/cluster/dht/src/dht-rebalance.c b/xlators/cluster/dht/src/dht-rebalance.c
index 88c84dd37ec..8ba8082bd86 100644
--- a/xlators/cluster/dht/src/dht-rebalance.c
+++ b/xlators/cluster/dht/src/dht-rebalance.c
@@ -8,608 +8,1457 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
+#include "dht-common.h"
+#include <glusterfs/syscall.h>
+#include <fnmatch.h>
+#include <signal.h>
+#include <glusterfs/events.h>
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
+
+#define GF_DISK_SECTOR_SIZE 512
+#define DHT_REBALANCE_PID 4242 /* Change it if required */
+#define DHT_REBALANCE_BLKSIZE 1048576 /* 1 MB */
+#define MAX_MIGRATE_QUEUE_COUNT 500
+#define MIN_MIGRATE_QUEUE_COUNT 200
+#define MAX_REBAL_TYPE_SIZE 16
+#define FILE_CNT_INTERVAL 600 /* 10 mins */
+#define ESTIMATE_START_INTERVAL 600 /* 10 mins */
+#define HARDLINK_MIG_INPROGRESS -2
+#define SKIP_MIGRATION_FD_POSITIVE -3
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#endif
-#include "dht-common.h"
-#include "xlator.h"
+#define GF_CRAWL_INDEX_MOVE(idx, sv_cnt) \
+ { \
+ idx++; \
+ idx %= sv_cnt; \
+ }
-#define GF_DISK_SECTOR_SIZE 512
-#define DHT_REBALANCE_PID 4242 /* Change it if required */
-#define DHT_REBALANCE_BLKSIZE (128 * 1024)
+uint64_t g_totalfiles = 0;
+uint64_t g_totalsize = 0;
-static int
-dht_write_with_holes (xlator_t *to, fd_t *fd, struct iovec *vec, int count,
- int32_t size, off_t offset, struct iobref *iobref)
+void
+gf_defrag_free_dir_dfmeta(struct dir_dfmeta *meta, int local_subvols_cnt)
{
- int i = 0;
- int ret = -1;
- int start_idx = 0;
- int tmp_offset = 0;
- int write_needed = 0;
- int buf_len = 0;
- int size_pending = 0;
- char *buf = NULL;
-
- /* loop through each vector */
- for (i = 0; i < count; i++) {
- buf = vec[i].iov_base;
- buf_len = vec[i].iov_len;
-
- for (start_idx = 0; (start_idx + GF_DISK_SECTOR_SIZE) <= buf_len;
- start_idx += GF_DISK_SECTOR_SIZE) {
-
- if (mem_0filled (buf + start_idx, GF_DISK_SECTOR_SIZE) != 0) {
- write_needed = 1;
- continue;
- }
+ int i = 0;
+
+ if (meta) {
+ for (i = 0; i < local_subvols_cnt; i++) {
+ if (meta->equeue)
+ gf_dirent_free(&meta->equeue[i]);
+ if (meta->lfd && meta->lfd[i])
+ fd_unref(meta->lfd[i]);
+ }
- if (write_needed) {
- ret = syncop_write (to, fd, (buf + tmp_offset),
- (start_idx - tmp_offset),
- (offset + tmp_offset),
- iobref, 0);
- /* 'path' will be logged in calling function */
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to write (%s)",
- strerror (errno));
- goto out;
- }
-
- write_needed = 0;
- }
- tmp_offset = start_idx + GF_DISK_SECTOR_SIZE;
- }
+ GF_FREE(meta->equeue);
+ GF_FREE(meta->head);
+ GF_FREE(meta->iterator);
+ GF_FREE(meta->offset_var);
+ GF_FREE(meta->fetch_entries);
+ GF_FREE(meta->lfd);
+ GF_FREE(meta);
+ }
+}
- if ((start_idx < buf_len) || write_needed) {
- /* This means, last chunk is not yet written.. write it */
- ret = syncop_write (to, fd, (buf + tmp_offset),
- (buf_len - tmp_offset),
- (offset + tmp_offset), iobref, 0);
- if (ret < 0) {
- /* 'path' will be logged in calling function */
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to write (%s)",
- strerror (errno));
- goto out;
- }
- }
+void
+gf_defrag_free_container(struct dht_container *container)
+{
+ if (container) {
+ gf_dirent_entry_free(container->df_entry);
- size_pending = (size - buf_len);
- if (!size_pending)
- break;
+ if (container->parent_loc) {
+ loc_wipe(container->parent_loc);
}
- ret = size;
-out:
- return ret;
+ GF_FREE(container->parent_loc);
+ GF_FREE(container);
+ }
}
-int32_t
-gf_defrag_handle_hardlink (xlator_t *this, loc_t *loc, dict_t *xattrs,
- struct iatt *stbuf)
+void
+dht_set_global_defrag_error(gf_defrag_info_t *defrag, int ret)
{
- int32_t ret = -1;
- xlator_t *cached_subvol = NULL;
- xlator_t *hashed_subvol = NULL;
- xlator_t *linkto_subvol = NULL;
- data_t *data = NULL;
- struct iatt iatt = {0,};
- int32_t op_errno = 0;
-
- GF_VALIDATE_OR_GOTO ("defrag", loc, out);
- GF_VALIDATE_OR_GOTO ("defrag", loc->name, out);
- GF_VALIDATE_OR_GOTO ("defrag", stbuf, out);
- GF_VALIDATE_OR_GOTO ("defrag", this, out);
- GF_VALIDATE_OR_GOTO ("defrag", xattrs, out);
-
- if (uuid_is_null (loc->pargfid)) {
- gf_log ("", GF_LOG_ERROR, "loc->pargfid is NULL for "
- "%s", loc->path);
- goto out;
- }
+ LOCK(&defrag->lock);
+ {
+ defrag->global_error = ret;
+ }
+ UNLOCK(&defrag->lock);
+ return;
+}
- if (uuid_is_null (loc->gfid)) {
- gf_log ("", GF_LOG_ERROR, "loc->gfid is NULL for "
- "%s", loc->path);
- goto out;
- }
+static int
+dht_send_rebalance_event(xlator_t *this, int cmd, gf_defrag_status_t status)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *tmpstr = NULL;
+ char *ptr = NULL;
+ char *suffix = "-dht";
+ int len = 0;
- cached_subvol = dht_subvol_get_cached (this, loc->inode);
- if (!cached_subvol) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to get cached subvol"
- " for %s on %s", loc->name, this->name);
- goto out;
- }
+ eventtypes_t event = EVENT_LAST;
- hashed_subvol = dht_subvol_get_hashed (this, loc);
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to get hashed subvol"
- " for %s on %s", loc->name, this->name);
- goto out;
+ switch (status) {
+ case GF_DEFRAG_STATUS_COMPLETE:
+ event = EVENT_VOLUME_REBALANCE_COMPLETE;
+ break;
+ case GF_DEFRAG_STATUS_FAILED:
+ event = EVENT_VOLUME_REBALANCE_FAILED;
+ break;
+ case GF_DEFRAG_STATUS_STOPPED:
+ event = EVENT_VOLUME_REBALANCE_STOP;
+ break;
+ default:
+ break;
+ }
+
+ /* DHT volume */
+ len = strlen(this->name) - strlen(suffix);
+ tmpstr = gf_strdup(this->name);
+ if (tmpstr) {
+ ptr = tmpstr + len;
+ if (!strcmp(ptr, suffix)) {
+ tmpstr[len] = '\0';
+ volname = tmpstr;
}
+ }
- gf_log (this->name, GF_LOG_INFO, "Attempting to migrate hardlink %s "
- "with gfid %s from %s -> %s", loc->name, uuid_utoa (loc->gfid),
- cached_subvol->name, hashed_subvol->name);
- data = dict_get (xattrs, DHT_LINKFILE_KEY);
- /* set linkto on cached -> hashed if not present, else link it */
- if (!data) {
- ret = dict_set_str (xattrs, DHT_LINKFILE_KEY,
- hashed_subvol->name);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to set "
- "linkto xattr in dict for %s", loc->name);
- goto out;
- }
+ if (!volname) {
+ /* Better than nothing */
+ volname = this->name;
+ }
- ret = syncop_setxattr (cached_subvol, loc, xattrs, 0);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Linkto setxattr "
- "failed %s -> %s (%s)", cached_subvol->name,
- loc->name, strerror (errno));
- goto out;
- }
- goto out;
- } else {
- linkto_subvol = dht_linkfile_subvol (this, NULL, NULL, xattrs);
- if (!linkto_subvol) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to get "
- "linkto subvol for %s", loc->name);
- } else {
- hashed_subvol = linkto_subvol;
- }
+ if (event != EVENT_LAST) {
+ gf_event(event, "volume=%s", volname);
+ }
- ret = syncop_link (hashed_subvol, loc, loc);
- if (ret) {
- op_errno = errno;
- gf_log (this->name, GF_LOG_ERROR, "link of %s -> %s"
- " failed on subvol %s (%s)", loc->name,
- uuid_utoa(loc->gfid),
- hashed_subvol->name, strerror (op_errno));
- if (op_errno != EEXIST)
- goto out;
- }
- }
- ret = syncop_lookup (hashed_subvol, loc, NULL, &iatt, NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed lookup %s on %s (%s)"
- , loc->name, hashed_subvol->name, strerror (errno));
- goto out;
- }
+ GF_FREE(tmpstr);
+ return ret;
+}
- if (iatt.ia_nlink == stbuf->ia_nlink) {
- ret = dht_migrate_file (this, loc, cached_subvol, hashed_subvol,
- GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS);
- if (ret)
- goto out;
- }
- ret = 0;
-out:
- return ret;
+static void
+dht_strip_out_acls(dict_t *dict)
+{
+ if (dict) {
+ dict_del(dict, "trusted.SGI_ACL_FILE");
+ dict_del(dict, POSIX_ACL_ACCESS_XATTR);
+ }
}
+/*
+ return values:
+ -1 : failure
+ -2 : success
+
+Hard link migration is carried out in three stages.
+
+(Say there are n hardlinks)
+Stage 1: Setting the new hashed subvol information on the 1st hardlink
+ encountered (linkto setxattr)
+
+Stage 2: Creating hardlinks on new hashed subvol for the 2nd to (n-1)th
+ hardlink
+
+Stage 3: Physical migration of the data file for nth hardlink
+
+Why to deem "-2" as success and not "0":
+
+ dht_migrate_file expects return value "0" from _is_file_migratable if
+the file has to be migrated.
+
+ _is_file_migratable returns zero only when it is called with the
+flag "GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS".
-static inline int
-__is_file_migratable (xlator_t *this, loc_t *loc,
- struct iatt *stbuf, dict_t *xattrs, int flags)
+ gf_defrag_handle_hardlink calls dht_migrate_file for physical migration
+of the data file with the flag "GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS"
+
+Hence, gf_defrag_handle_hardlink returning "0" for success will force
+"dht_migrate_file" to migrate each of the hardlink which is not intended.
+
+For each of the three stage mentioned above "-2" will be returned and will
+be converted to "0" in dht_migrate_file.
+
+*/
+
+int32_t
+gf_defrag_handle_hardlink(xlator_t *this, loc_t *loc, int *fop_errno)
{
- int ret = -1;
+ int32_t ret = -1;
+ xlator_t *cached_subvol = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *linkto_subvol = NULL;
+ data_t *data = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ int32_t op_errno = 0;
+ dht_conf_t *conf = NULL;
+ gf_loglevel_t loglevel = 0;
+ dict_t *link_xattr = NULL;
+ dict_t *dict = NULL;
+ dict_t *xattr_rsp = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ *fop_errno = EINVAL;
+
+ GF_VALIDATE_OR_GOTO("defrag", loc, out);
+ GF_VALIDATE_OR_GOTO("defrag", loc->name, out);
+ GF_VALIDATE_OR_GOTO("defrag", this, out);
+ GF_VALIDATE_OR_GOTO("defrag", this->private, out);
+
+ conf = this->private;
+
+ if (gf_uuid_is_null(loc->pargfid)) {
+ gf_msg("", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "loc->pargfid is NULL for %s",
+ loc->path);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ if (gf_uuid_is_null(loc->gfid)) {
+ gf_msg("", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "loc->gfid is NULL for %s",
+ loc->path);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
- if (IA_ISDIR (stbuf->ia_type)) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: migrate-file called on directory", loc->path);
- ret = -1;
- goto out;
+ link_xattr = dict_new();
+ if (!link_xattr) {
+ ret = -1;
+ *fop_errno = ENOMEM;
+ goto out;
+ }
+
+ /*
+ Parallel migration can lead to migration of the hard link multiple
+ times which can lead to data loss. Hence, adding a fresh lookup to
+ decide whether migration is required or not.
+
+ Elaborating the scenario for let say 10 hardlinks [link{1..10}]:
+ Let say the first hard link "link1" does the setxattr of the
+ new hashed subvolume info on the cached file. As there are multiple
+ threads working, we might have already all the links created on the
+ new hashed by the time we reach hardlink let say link5. Now the
+ number of links on hashed is equal to that of cached. Hence, file
+ migration will happen for link6.
+
+ Cached Hashed
+ --------T link6 rwxrwxrwx link6
+
+ Now post above state all the link file on the cached will be zero
+ byte linkto files. Hence, if we still do migration for the following
+ files link{7..10}, we will end up migrating 0 data leading to data
+ loss.
+ Hence, a lookup can make sure whether we need to migrate the
+ file or not.
+ */
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ *fop_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "could not allocate memory for dict");
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, conf->link_xattr_name, 256);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to set 'linkto' key in dict",
+ loc->path);
+ goto out;
+ }
+
+ ret = syncop_lookup(this, loc, &stbuf, NULL, dict, &xattr_rsp);
+ if (ret) {
+ /*Ignore ENOENT and ESTALE as file might have been
+ migrated already*/
+ if (-ret == ENOENT || -ret == ESTALE) {
+ ret = -2;
+ goto out;
+ }
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:%s lookup failed with ret = %d", loc->path,
+ ret);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ cached_subvol = dht_subvol_get_cached(this, loc->inode);
+ if (!cached_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "Failed to get cached subvol"
+ " for %s on %s",
+ loc->name, this->name);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (!hashed_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "Failed to get hashed subvol"
+ " for %s on %s",
+ loc->name, this->name);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ /* Hardlink migration happens only with remove-brick. So this condition will
+ * be true only when the migration has happened. In case hardlinks are
+ * migrated for rebalance case, remove this check. Having this check here
+ * avoid redundant calls below*/
+ if (hashed_subvol == cached_subvol) {
+ ret = -2;
+ goto out;
+ }
+
+ gf_log(this->name, GF_LOG_INFO,
+ "Attempting to migrate hardlink %s "
+ "with gfid %s from %s -> %s",
+ loc->name, uuid_utoa(loc->gfid), cached_subvol->name,
+ hashed_subvol->name);
+
+ data = dict_get(xattr_rsp, conf->link_xattr_name);
+ /* set linkto on cached -> hashed if not present, else link it */
+ if (!data) {
+ ret = dict_set_str(link_xattr, conf->link_xattr_name,
+ hashed_subvol->name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "Failed to set dictionary value:"
+ " key = %s for %s",
+ conf->link_xattr_name, loc->name);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) {
- ret = 0;
- goto out;
+ ret = syncop_setxattr(cached_subvol, loc, link_xattr, 0, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :"
+ "Linkto setxattr failed %s -> %s",
+ cached_subvol->name, loc->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
- if (stbuf->ia_nlink > 1) {
- /* support for decomission */
- if (flags == GF_DHT_MIGRATE_HARDLINK) {
- ret = gf_defrag_handle_hardlink (this, loc,
- xattrs, stbuf);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to migrate file with link",
- loc->path);
- }
- } else {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: file has hardlinks", loc->path);
- }
- ret = ENOTSUP;
+
+ gf_msg_debug(this->name, 0,
+ "hardlink target subvol created on %s "
+ ",cached %s, file %s",
+ hashed_subvol->name, cached_subvol->name, loc->path);
+
+ ret = -2;
+ goto out;
+ } else {
+ linkto_subvol = dht_linkfile_subvol(this, NULL, NULL, xattr_rsp);
+ if (!linkto_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_ERROR,
+ "Failed to get "
+ "linkto subvol for %s",
+ loc->name);
+ } else {
+ hashed_subvol = linkto_subvol;
+ }
+
+ ret = syncop_link(hashed_subvol, loc, loc, &iatt, NULL, NULL);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+
+ loglevel = (op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_ERROR;
+ gf_msg(this->name, loglevel, op_errno,
+ DHT_MSG_MIGRATE_HARDLINK_FILE_FAILED,
+ "link of %s -> %s"
+ " failed on subvol %s",
+ loc->name, uuid_utoa(loc->gfid), hashed_subvol->name);
+ if (op_errno != EEXIST) {
+ *fop_errno = op_errno;
goto out;
+ }
+ } else {
+ gf_msg_debug(this->name, 0,
+ "syncop_link successful for"
+ " hardlink %s on subvol %s, cached %s",
+ loc->path, hashed_subvol->name, cached_subvol->name);
}
+ }
- ret = 0;
+ ret = syncop_lookup(hashed_subvol, loc, &iatt, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed :Failed lookup %s on %s ", loc->name,
+ hashed_subvol->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* There is a race where on the target subvol for the hardlink
+ * (note: hash subvol for the hardlink might differ from this), some
+ * other client(non-rebalance) would have created a linkto file for that
+ * hardlink as part of lookup. So let say there are 10 hardlinks, on the
+ * 5th hardlink it self the hardlinks might have migrated. Now for
+ * (6..10th) hardlinks the cached and target would be same as the file
+ * has already migrated. Hence this check is needed */
+ if (cached_subvol == hashed_subvol) {
+ gf_msg_debug(this->name, 0,
+ "source %s and destination %s "
+ "for hardlink %s are same",
+ cached_subvol->name, hashed_subvol->name, loc->path);
+ ret = -2;
+ goto out;
+ }
+
+ if (iatt.ia_nlink == stbuf.ia_nlink) {
+ ret = dht_migrate_file(this, loc, cached_subvol, hashed_subvol,
+ GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS, fop_errno);
+ if (ret) {
+ goto out;
+ }
+ }
+ ret = -2;
out:
- return ret;
+ if (link_xattr)
+ dict_unref(link_xattr);
+
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+
+ if (dict)
+ dict_unref(dict);
+
+ return ret;
}
-static inline int
-__dht_rebalance_create_dst_file (xlator_t *to, xlator_t *from, loc_t *loc, struct iatt *stbuf,
- dict_t *dict, fd_t **dst_fd)
+static int
+__check_file_has_hardlink(xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ dict_t *xattrs, int flags, gf_defrag_info_t *defrag,
+ dht_conf_t *conf, int *fop_errno)
{
- xlator_t *this = NULL;
- int ret = -1;
- fd_t *fd = NULL;
- struct iatt new_stbuf = {0,};
+ int ret = 0;
- this = THIS;
-
- ret = dict_set_static_bin (dict, "gfid-req", stbuf->ia_gfid, 16);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set gfid in dict for create", loc->path);
- goto out;
+ if (flags == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS) {
+ ret = 0;
+ return ret;
+ }
+ if (stbuf->ia_nlink > 1) {
+ /* support for decomission */
+ if (flags == GF_DHT_MIGRATE_HARDLINK) {
+ synclock_lock(&conf->link_lock);
+ ret = gf_defrag_handle_hardlink(this, loc, fop_errno);
+ synclock_unlock(&conf->link_lock);
+ /*
+ Returning zero will force the file to be remigrated.
+ Checkout gf_defrag_handle_hardlink for more information.
+ */
+ if (ret && ret != -2) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to migrate file with link",
+ loc->path);
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migration skipped for:"
+ "%s: file has hardlinks",
+ loc->path);
+ *fop_errno = ENOTSUP;
+ ret = 1;
}
+ }
+
+ return ret;
+}
- ret = dict_set_str (dict, DHT_LINKFILE_KEY, from->name);
+/*
+ return values
+ 0 : File will be migrated
+ -2 : File will not be migrated
+ (This is the return value from gf_defrag_handle_hardlink. Checkout
+ gf_defrag_handle_hardlink for description of "returning -2")
+ -1 : failure
+*/
+static int
+__is_file_migratable(xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ dict_t *xattrs, int flags, gf_defrag_info_t *defrag,
+ dht_conf_t *conf, int *fop_errno)
+{
+ int ret = -1;
+ int lock_count = 0;
+
+ if (IA_ISDIR(stbuf->ia_type)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: migrate-file called on directory",
+ loc->path);
+ *fop_errno = EISDIR;
+ ret = -1;
+ goto out;
+ }
+
+ if (!conf->lock_migration_enabled) {
+ ret = dict_get_int32(xattrs, GLUSTERFS_POSIXLK_COUNT, &lock_count);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set gfid in dict for create", loc->path);
- goto out;
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: Unable to get lock count for file",
+ loc->path);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
}
- fd = fd_create (loc->inode, DHT_REBALANCE_PID);
- if (!fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: fd create failed (destination) (%s)",
- loc->path, strerror (errno));
- ret = -1;
- goto out;
+ if (lock_count) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: File has locks."
+ " Skipping file migration",
+ loc->path);
+ *fop_errno = ENOTSUP;
+ ret = 1;
+ goto out;
}
+ }
- ret = syncop_lookup (to, loc, NULL, &new_stbuf, NULL, NULL);
- if (!ret) {
- /* File exits in the destination, check if gfid matches */
- if (uuid_compare (stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "file %s exits in %s with different gfid",
- loc->path, to->name);
- fd_unref (fd);
- goto out;
- }
+ /* Check if file has hardlink*/
+ ret = __check_file_has_hardlink(this, loc, stbuf, xattrs, flags, defrag,
+ conf, fop_errno);
+out:
+ return ret;
+}
+
+static int
+__dht_rebalance_create_dst_file(xlator_t *this, xlator_t *to, xlator_t *from,
+ loc_t *loc, struct iatt *stbuf, fd_t **dst_fd,
+ int *fop_errno, int file_has_holes)
+{
+ int ret = -1;
+ int ret2 = -1;
+ fd_t *fd = NULL;
+ struct iatt new_stbuf = {
+ 0,
+ };
+ struct iatt check_stbuf = {
+ 0,
+ };
+ dht_conf_t *conf = NULL;
+ dict_t *dict = NULL;
+ dict_t *xdata = NULL;
+
+ conf = this->private;
+
+ dict = dict_new();
+ if (!dict) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "dictionary allocation failed for"
+ "path:%s",
+ loc->path);
+ goto out;
+ }
+ ret = dict_set_gfuuid(dict, "gfid-req", stbuf->ia_gfid, true);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = gfid-req", loc->path);
+ goto out;
+ }
+
+ ret = dict_set_str(dict, conf->link_xattr_name, from->name);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = %s ", loc->path,
+ conf->link_xattr_name);
+ goto out;
+ }
+
+ fd = fd_create(loc->inode, DHT_REBALANCE_PID);
+ if (!fd) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: fd create failed (destination)", loc->path);
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: dict_new failed)", loc->path);
+ goto out;
+ }
+
+ ret = dict_set_int32_sizen(xdata, GF_CLEAN_WRITE_PROTECTION, 1);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "%s: failed to set dictionary value: key = %s ", loc->path,
+ GF_CLEAN_WRITE_PROTECTION);
+ goto out;
+ }
+
+ ret = syncop_lookup(to, loc, &new_stbuf, NULL, xdata, NULL);
+ if (!ret) {
+ /* File exits in the destination, check if gfid matches */
+ if (gf_uuid_compare(stbuf->ia_gfid, new_stbuf.ia_gfid) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH,
+ "file %s exists in %s with different gfid", loc->path,
+ to->name);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
}
- if ((ret == -1) && (errno != ENOENT)) {
- /* File exists in destination, but not accessible */
- gf_log (THIS->name, GF_LOG_WARNING,
- "%s: failed to lookup file (%s)",
- loc->path, strerror (errno));
- goto out;
+ }
+ if ((ret < 0) && (-ret != ENOENT)) {
+ /* File exists in destination, but not accessible */
+ gf_msg(THIS->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to lookup file", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* Create the destination with LINKFILE mode, and linkto xattr,
+ if the linkfile already exists, just open the file */
+ if (!ret) {
+ /*
+ * File already present, just open the file.
+ */
+ ret = syncop_open(to, loc, O_RDWR, fd, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to open %s on %s", loc->path, to->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
-
- /* Create the destination with LINKFILE mode, and linkto xattr,
- if the linkfile already exists, it will just open the file */
- ret = syncop_create (to, loc, O_RDWR, DHT_LINKFILE_MODE, fd,
- dict);
+ } else {
+ ret = syncop_create(to, loc, O_RDWR, DHT_LINKFILE_MODE, fd, &new_stbuf,
+ dict, NULL);
if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create %s on %s (%s)",
- loc->path, to->name, strerror (errno));
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to create %s on %s", loc->path, to->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+ }
+
+ fd_bind(fd);
+
+ /*Reason of doing lookup after create again:
+ *In the create, there is some time-gap between opening fd at the
+ *server (posix_layer) and binding it in server (incrementing fd count),
+ *so if in that time-gap, if other process sends unlink considering it
+ *as a linkto file, because inode->fd count will be 0, so file will be
+ *unlinked at the backend. And because further operations are performed
+ *on fd, so though migration will be done but will end with no file
+ *at the backend.
+ */
+
+ ret = syncop_lookup(to, loc, &check_stbuf, NULL, NULL, NULL);
+ if (!ret) {
+ if (gf_uuid_compare(stbuf->ia_gfid, check_stbuf.ia_gfid) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_GFID_MISMATCH,
+ "file %s exists in %s with different gfid,"
+ "found in lookup after create",
+ loc->path, to->name);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (-ret == ENOENT) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: file does not exist"
+ "on %s",
+ loc->path, to->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_fsetattr(to, fd, stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID),
+ NULL, NULL, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "chown failed for %s on %s", loc->path, to->name);
+ }
+
+ /* No need to bother about 0 byte size files */
+ if (stbuf->ia_size > 0) {
+ if (conf->use_fallocate && !file_has_holes) {
+ ret = syncop_fallocate(to, fd, 0, 0, stbuf->ia_size, NULL, NULL);
+ if (ret < 0) {
+ if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -ENOSYS) {
+ conf->use_fallocate = _gf_false;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "fallocate failed for %s on %s", loc->path,
+ to->name);
+
+ *fop_errno = -ret;
+
+ /* fallocate does not release the space
+ * in some cases
+ */
+ ret2 = syncop_ftruncate(to, fd, 0, NULL, NULL, NULL, NULL);
+ if (ret2 < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret2,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "ftruncate failed for "
+ "%s on %s",
+ loc->path, to->name);
+ }
+ goto out;
+ }
+ }
+ } else {
+ ret = syncop_ftruncate(to, fd, stbuf->ia_size, NULL, NULL, NULL,
+ NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "ftruncate failed for %s on %s", loc->path, to->name);
+ }
}
+ }
- if (dst_fd)
- *dst_fd = fd;
+ /* success */
+ ret = 0;
- /* success */
- ret = 0;
+ if (dst_fd)
+ *dst_fd = fd;
out:
- return ret;
-}
+ if (ret) {
+ if (fd) {
+ fd_unref(fd);
+ }
+ }
+ if (dict)
+ dict_unref(dict);
-static inline int
-__dht_check_free_space (xlator_t *to, xlator_t *from, loc_t *loc,
- struct iatt *stbuf, int flag)
-{
- struct statvfs src_statfs = {0,};
- struct statvfs dst_statfs = {0,};
- int ret = -1;
- xlator_t *this = NULL;
+ if (xdata)
+ dict_unref(xdata);
- this = THIS;
+ return ret;
+}
- ret = syncop_statfs (from, loc, &src_statfs);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to get statfs of %s on %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
+static int
+__dht_check_free_space(xlator_t *this, xlator_t *to, xlator_t *from, loc_t *loc,
+ struct iatt *stbuf, int flag, dht_conf_t *conf,
+ gf_boolean_t *target_changed, xlator_t **new_subvol,
+ int *fop_errno)
+{
+ struct statvfs src_statfs = {
+ 0,
+ };
+ struct statvfs dst_statfs = {
+ 0,
+ };
+ int ret = -1;
+ dict_t *xdata = NULL;
+ dht_layout_t *layout = NULL;
+ uint64_t src_statfs_blocks = 1;
+ uint64_t dst_statfs_blocks = 1;
+ double dst_post_availspacepercent = 0;
+ double src_post_availspacepercent = 0;
+ uint64_t file_blocks = 0;
+ uint64_t src_total_blocks = 0;
+ uint64_t dst_total_blocks = 0;
+
+ xdata = dict_new();
+ if (!xdata) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "failed to allocate dictionary");
+ goto out;
+ }
+
+ ret = dict_set_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS, 1);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to set " GF_INTERNAL_IGNORE_DEEM_STATFS " in dict");
+ ret = -1;
+ *fop_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = syncop_statfs(from, loc, &src_statfs, xdata, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to get statfs of %s on %s", loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_statfs(to, loc, &dst_statfs, xdata, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to get statfs of %s on %s", loc->path, to->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0,
+ "min_free_disk - %f , block available - %" PRId64
+ ", block size - %lu",
+ conf->min_free_disk, dst_statfs.f_bavail, dst_statfs.f_bsize);
+
+ dst_statfs_blocks = dst_statfs.f_bavail *
+ (dst_statfs.f_frsize / GF_DISK_SECTOR_SIZE);
+
+ src_statfs_blocks = src_statfs.f_bavail *
+ (src_statfs.f_frsize / GF_DISK_SECTOR_SIZE);
+
+ dst_total_blocks = dst_statfs.f_blocks *
+ (dst_statfs.f_frsize / GF_DISK_SECTOR_SIZE);
+
+ src_total_blocks = src_statfs.f_blocks *
+ (src_statfs.f_frsize / GF_DISK_SECTOR_SIZE);
+
+ /* if force option is given, do not check for space @ dst.
+ * Check only if space is avail for the file */
+ if (flag != GF_DHT_MIGRATE_DATA)
+ goto check_avail_space;
+
+ /* Check:
+ During rebalance `migrate-data` - Destination subvol experiences
+ a `reduction` in 'blocks' of free space, at the same time source
+ subvol gains certain 'blocks' of free space. A valid check is
+ necessary here to avoid erroneous move to destination where
+ the space could be scantily available.
+ With heterogeneous brick support, an actual space comparison could
+ prevent any files being migrated to newly added bricks if they are
+ smaller then the free space available on the existing bricks.
+ */
+ if (!conf->use_fallocate) {
+ file_blocks = stbuf->ia_size + GF_DISK_SECTOR_SIZE - 1;
+ file_blocks /= GF_DISK_SECTOR_SIZE;
+
+ if (file_blocks >= dst_statfs_blocks) {
+ dst_statfs_blocks = 0;
+ } else {
+ dst_statfs_blocks -= file_blocks;
}
+ }
+
+ src_post_availspacepercent = ((src_statfs_blocks + file_blocks) * 100) /
+ src_total_blocks;
+
+ dst_post_availspacepercent = (dst_statfs_blocks * 100) / dst_total_blocks;
+
+ if (dst_post_availspacepercent < src_post_availspacepercent) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "data movement of file "
+ "{blocks:%" PRIu64
+ " name:(%s)} would result in "
+ "dst node (%s:%" PRIu64
+ ") having lower disk "
+ "space than the source node (%s:%" PRIu64
+ ")"
+ ".Skipping file.",
+ stbuf->ia_blocks, loc->path, to->name, dst_statfs_blocks,
+ from->name, src_statfs_blocks);
+
+ /* this is not a 'failure', but we don't want to
+ consider this as 'success' too :-/ */
+ *fop_errno = ENOSPC;
+ ret = 1;
+ goto out;
+ }
- ret = syncop_statfs (to, loc, &dst_statfs);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to get statfs of %s on %s (%s)",
- loc->path, to->name, strerror (errno));
- goto out;
+check_avail_space:
+ if (conf->disk_unit == 'p' && dst_statfs.f_blocks) {
+ dst_post_availspacepercent = (dst_statfs_blocks * 100) /
+ dst_total_blocks;
+
+ gf_msg_debug(this->name, 0,
+ "file : %s, post_availspacepercent"
+ " : %lf f_bavail : %" PRIu64 " min-free-disk: %lf",
+ loc->path, dst_post_availspacepercent, dst_statfs.f_bavail,
+ conf->min_free_disk);
+
+ if (dst_post_availspacepercent < conf->min_free_disk) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, 0,
+ "Write will cross min-free-disk for "
+ "file - %s on subvol - %s. Looking "
+ "for new subvol",
+ loc->path, to->name);
+
+ goto find_new_subvol;
+ } else {
+ ret = 0;
+ goto out;
}
+ }
- /* if force option is given, do not check for space @ dst.
- * Check only if space is avail for the file */
- if (flag != GF_DHT_MIGRATE_DATA)
- goto check_avail_space;
+ if (conf->disk_unit != 'p') {
+ if ((dst_statfs_blocks * GF_DISK_SECTOR_SIZE) < conf->min_free_disk) {
+ gf_msg_debug(this->name, 0,
+ "file : %s, destination frsize: %lu "
+ "f_bavail : %" PRIu64 " min-free-disk: %lf",
+ loc->path, dst_statfs.f_frsize, dst_statfs.f_bavail,
+ conf->min_free_disk);
- if (((dst_statfs.f_bavail *
- dst_statfs.f_bsize) / GF_DISK_SECTOR_SIZE) <
- (((src_statfs.f_bavail * src_statfs.f_bsize) /
- GF_DISK_SECTOR_SIZE) - stbuf->ia_blocks)) {
- gf_log (this->name, GF_LOG_WARNING,
- "data movement attempted from node (%s) with"
- " higher disk space to a node (%s) with "
- "lesser disk space (%s)", from->name,
- to->name, loc->path);
+ gf_msg(this->name, GF_LOG_WARNING, 0, 0,
+ "write will"
+ " cross min-free-disk for file - %s on subvol -"
+ " %s. looking for new subvol",
+ loc->path, to->name);
- /* this is not a 'failure', but we don't want to
- consider this as 'success' too :-/ */
- ret = 1;
- goto out;
- }
+ goto find_new_subvol;
-check_avail_space:
- if (((dst_statfs.f_bavail * dst_statfs.f_bsize) /
- GF_DISK_SECTOR_SIZE) < stbuf->ia_blocks) {
- gf_log (this->name, GF_LOG_ERROR,
- "data movement attempted from node (%s) with "
- "to node (%s) which does not have required free space"
- " for %s", from->name, to->name, loc->path);
- ret = 1;
- goto out;
+ } else {
+ ret = 0;
+ goto out;
}
+ }
+find_new_subvol:
+ layout = dht_layout_get(this, loc->parent);
+ if (!layout) {
+ gf_log(this->name, GF_LOG_ERROR, "Layout is NULL");
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ *new_subvol = dht_subvol_with_free_space_inodes(this, to, from, layout,
+ stbuf->ia_size);
+ if ((!(*new_subvol)) || (*new_subvol == from)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SUBVOL_INSUFF_SPACE,
+ "Could not find any subvol"
+ " with space accommodating the file - %s. Consider "
+ "adding bricks",
+ loc->path);
+
+ *target_changed = _gf_false;
+ *fop_errno = ENOSPC;
+ ret = -1;
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "new target found - %s"
+ " for file - %s",
+ (*new_subvol)->name, loc->path);
+ *target_changed = _gf_true;
ret = 0;
+ }
+
out:
- return ret;
+ if (xdata)
+ dict_unref(xdata);
+ return ret;
}
-static inline int
-__dht_rebalance_migrate_data (xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
- uint64_t ia_size, int hole_exists)
+static int
+__dht_rebalance_migrate_data(xlator_t *this, gf_defrag_info_t *defrag,
+ xlator_t *from, xlator_t *to, fd_t *src, fd_t *dst,
+ uint64_t ia_size, int hole_exists, int *fop_errno)
{
- int ret = 0;
- int count = 0;
- off_t offset = 0;
- struct iovec *vector = NULL;
- struct iobref *iobref = NULL;
- uint64_t total = 0;
- size_t read_size = 0;
-
- /* if file size is '0', no need to enter this loop */
- while (total < ia_size) {
- read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE) ?
- DHT_REBALANCE_BLKSIZE : (ia_size - total));
- ret = syncop_readv (from, src, read_size,
- offset, 0, &vector, &count, &iobref);
- if (!ret || (ret < 0)) {
- break;
+ int ret = 0;
+ int count = 0;
+ off_t offset = 0;
+ off_t data_offset = 0;
+ off_t hole_offset = 0;
+ struct iovec *vector = NULL;
+ struct iobref *iobref = NULL;
+ uint64_t total = 0;
+ size_t read_size = 0;
+ size_t data_block_size = 0;
+ dict_t *xdata = NULL;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ /* if file size is '0', no need to enter this loop */
+ while (total < ia_size) {
+ /* This is a regular file - read it sequentially */
+ if (!hole_exists) {
+ read_size = (((ia_size - total) > DHT_REBALANCE_BLKSIZE)
+ ? DHT_REBALANCE_BLKSIZE
+ : (ia_size - total));
+ } else {
+ /* This is a sparse file - read only the data segments in the file
+ */
+
+ /* If the previous data block is fully copied, find the next data
+ * segment
+ * starting at the offset of the last read and written byte, */
+ if (data_block_size <= 0) {
+ ret = syncop_seek(from, src, offset, GF_SEEK_DATA, NULL,
+ &data_offset);
+ if (ret) {
+ if (ret == -ENXIO)
+ ret = 0; /* No more data segments */
+ else
+ *fop_errno = -ret; /* Error occurred */
+
+ break;
}
- if (hole_exists)
- ret = dht_write_with_holes (to, dst, vector, count,
- ret, offset, iobref);
- else
- ret = syncop_writev (to, dst, vector, count,
- offset, iobref, 0);
- if (ret < 0) {
+ /* If the position of the current data segment is greater than
+ * the position of the next hole, find the next hole in order to
+ * calculate the length of the new data segment */
+ if (data_offset > hole_offset) {
+ /* Starting at the offset of the last data segment, find the
+ * next hole */
+ ret = syncop_seek(from, src, data_offset, GF_SEEK_HOLE,
+ NULL, &hole_offset);
+ if (ret) {
+ /* If an error occurred here it's a real error because
+ * if the seek for a data segment was successful then
+ * necessarily another hole must exist (EOF is a hole)
+ */
+ *fop_errno = -ret;
break;
- }
- offset += ret;
- total += ret;
+ }
- if (vector)
- GF_FREE (vector);
- if (iobref)
- iobref_unref (iobref);
- iobref = NULL;
- vector = NULL;
+ /* Calculate the total size of the current data block */
+ data_block_size = hole_offset - data_offset;
+ }
+ } else {
+ /* There is still data in the current segment, move the
+ * data_offset to the position of the last written byte */
+ data_offset = offset;
+ }
+
+ /* Calculate how much data needs to be read and written. If the data
+ * segment's length is bigger than DHT_REBALANCE_BLKSIZE, read and
+ * write DHT_REBALANCE_BLKSIZE data length and the rest in the
+ * next iteration(s) */
+ read_size = ((data_block_size > DHT_REBALANCE_BLKSIZE)
+ ? DHT_REBALANCE_BLKSIZE
+ : data_block_size);
+
+ /* Calculate the remaining size of the data block - maybe there's no
+ * need to seek for data in the next iteration */
+ data_block_size -= read_size;
+
+ /* Set offset to the offset of the data segment so read and write
+ * will have the correct position */
+ offset = data_offset;
}
- if (iobref)
- iobref_unref (iobref);
- if (vector)
- GF_FREE (vector);
-
- if (ret >= 0)
- ret = 0;
-
- return ret;
-}
+ ret = syncop_readv(from, src, read_size, offset, 0, &vector, &count,
+ &iobref, NULL, NULL, NULL);
-static inline int
-__dht_rebalance_open_src_file (xlator_t *from, xlator_t *to, loc_t *loc,
- struct iatt *stbuf, fd_t **src_fd)
-{
- int ret = 0;
- fd_t *fd = NULL;
- dict_t *dict = NULL;
- xlator_t *this = NULL;
- struct iatt iatt = {0,};
+ if (!ret || (ret < 0)) {
+ if (!ret) {
+ /* File was probably truncated*/
+ ret = -1;
+ *fop_errno = ENOSPC;
+ } else {
+ *fop_errno = -ret;
+ }
+ break;
+ }
- this = THIS;
+ if (!conf->force_migration) {
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata) {
+ gf_msg("dht", GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "insufficient memory");
+ ret = -1;
+ *fop_errno = ENOMEM;
+ break;
+ }
- fd = fd_create (loc->inode, DHT_REBALANCE_PID);
- if (!fd) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: fd create failed (source)", loc->path);
- ret = -1;
- goto out;
+ /* Fail this write and abort rebalance if we
+ * detect a write from client since migration of
+ * this file started. This is done to avoid
+ * potential data corruption due to out of order
+ * writes from rebalance and client to the same
+ * region (as compared between src and dst
+ * files). See
+ * https://github.com/gluster/glusterfs/issues/308
+ * for more details.
+ */
+ ret = dict_set_int32_sizen(xdata, GF_AVOID_OVERWRITE, 1);
+ if (ret) {
+ gf_msg("dht", GF_LOG_ERROR, 0, ENOMEM,
+ "failed to set dict");
+ ret = -1;
+ *fop_errno = ENOMEM;
+ break;
+ }
+ }
}
- ret = syncop_open (from, loc, O_RDWR, fd);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to open file %s on %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
+ ret = syncop_writev(to, dst, vector, count, offset, iobref, 0, NULL,
+ NULL, xdata, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ break;
}
+ offset += ret;
+ total += ret;
+
+ GF_FREE(vector);
+ if (iobref)
+ iobref_unref(iobref);
+ iobref = NULL;
+ vector = NULL;
+ }
+ if (iobref)
+ iobref_unref(iobref);
+ GF_FREE(vector);
+
+ if (ret >= 0)
+ ret = 0;
+ else
ret = -1;
- dict = dict_new ();
- if (!dict)
- goto out;
- ret = dict_set_str (dict, DHT_LINKFILE_KEY, to->name);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to set xattr in dict for %s (linkto:%s)",
- loc->path, to->name);
- goto out;
- }
+ if (xdata) {
+ dict_unref(xdata);
+ }
- /* Once the migration starts, the source should have 'linkto' key set
- to show which is the target, so other clients can work around it */
- ret = syncop_setxattr (from, loc, dict, 0);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to set xattr on %s in %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
- }
+ return ret;
+}
- /* mode should be (+S+T) to indicate migration is in progress */
- iatt.ia_prot = stbuf->ia_prot;
- iatt.ia_type = stbuf->ia_type;
- iatt.ia_prot.sticky = 1;
- iatt.ia_prot.sgid = 1;
+static int
+__dht_rebalance_open_src_file(xlator_t *this, xlator_t *from, xlator_t *to,
+ loc_t *loc, struct iatt *stbuf, fd_t **src_fd,
+ gf_boolean_t *clean_src, int *fop_errno)
+{
+ int ret = 0;
+ fd_t *fd = NULL;
+ dict_t *dict = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ *clean_src = _gf_false;
+
+ fd = fd_create(loc->inode, DHT_REBALANCE_PID);
+ if (!fd) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: fd create failed (source)", loc->path);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_open(from, loc, O_RDWR, fd, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to open file %s on %s", loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
- ret = syncop_setattr (from, loc, &iatt, GF_SET_ATTR_MODE, NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to set mode on %s in %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
- }
+ fd_bind(fd);
- if (src_fd)
- *src_fd = fd;
+ if (src_fd)
+ *src_fd = fd;
- /* success */
- ret = 0;
+ ret = -1;
+ dict = dict_new();
+ if (!dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: Could not allocate memory for dict", loc->path);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str(dict, conf->link_xattr_name, to->name);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set xattr in dict for %s (linkto:%s)", loc->path,
+ to->name);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ /* Once the migration starts, the source should have 'linkto' key set
+ to show which is the target, so other clients can work around it */
+ ret = syncop_setxattr(from, loc, dict, 0, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to set xattr on %s in %s", loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* Reset source mode/xattr if migration fails*/
+ *clean_src = _gf_true;
+
+ /* mode should be (+S+T) to indicate migration is in progress */
+ iatt.ia_prot = stbuf->ia_prot;
+ iatt.ia_type = stbuf->ia_type;
+ iatt.ia_prot.sticky = 1;
+ iatt.ia_prot.sgid = 1;
+
+ ret = syncop_setattr(from, loc, &iatt, GF_SET_ATTR_MODE, NULL, NULL, NULL,
+ NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to set mode on %s in %s", loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* success */
+ ret = 0;
out:
- if (dict)
- dict_unref (dict);
+ if (dict)
+ dict_unref(dict);
- return ret;
+ return ret;
}
int
-migrate_special_files (xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
- struct iatt *buf)
+migrate_special_files(xlator_t *this, xlator_t *from, xlator_t *to, loc_t *loc,
+ struct iatt *buf, int *fop_errno)
{
- int ret = -1;
- dict_t *rsp_dict = NULL;
- dict_t *dict = NULL;
- char *link = NULL;
- struct iatt stbuf = {0,};
-
- dict = dict_new ();
- if (!dict)
- goto out;
+ int ret = -1;
+ dict_t *rsp_dict = NULL;
+ dict_t *dict = NULL;
+ char *link = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ dht_conf_t *conf = this->private;
+
+ dict = dict_new();
+ if (!dict) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32(dict, conf->link_xattr_name, 256);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s: failed to set 'linkto' key in dict", loc->path);
+ goto out;
+ }
+
+ /* check in the destination if the file is link file */
+ ret = syncop_lookup(to, loc, &stbuf, NULL, dict, &rsp_dict);
+ if ((ret < 0) && (-ret != ENOENT)) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: lookup failed", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* we no more require this key */
+ dict_del(dict, conf->link_xattr_name);
+
+ /* file exists in target node, only if it is 'linkfile' its valid,
+ otherwise, error out */
+ if (!ret) {
+ if (!check_is_linkfile(loc->inode, &stbuf, rsp_dict,
+ conf->link_xattr_name)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: file exists in destination", loc->path);
+ *fop_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
- ret = dict_set_int32 (dict, DHT_LINKFILE_KEY, 256);
+ /* as file is linkfile, delete it */
+ ret = syncop_unlink(to, loc, NULL, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set 'linkto' key in dict", loc->path);
- goto out;
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to delete the linkfile", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
+ }
- /* check in the destination if the file is link file */
- ret = syncop_lookup (to, loc, dict, &stbuf, &rsp_dict, NULL);
- if ((ret == -1) && (errno != ENOENT)) {
- gf_log (this->name, GF_LOG_WARNING, "%s: lookup failed (%s)",
- loc->path, strerror (errno));
- goto out;
+ /* Set the gfid of the source file in dict */
+ ret = dict_set_gfuuid(dict, "gfid-req", buf->ia_gfid, true);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s: failed to set gfid in dict for create", loc->path);
+ goto out;
+ }
+
+ /* Create the file in target */
+ if (IA_ISLNK(buf->ia_type)) {
+ /* Handle symlinks separately */
+ ret = syncop_readlink(from, loc, &link, buf->ia_size, NULL, NULL);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: readlink on symlink failed", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
- /* we no more require this key */
- dict_del (dict, DHT_LINKFILE_KEY);
+ ret = syncop_symlink(to, loc, link, 0, dict, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED, "%s: creating symlink failed",
+ loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
- /* file exists in target node, only if it is 'linkfile' its valid,
- otherwise, error out */
- if (!ret) {
- if (!check_is_linkfile (loc->inode, &stbuf, rsp_dict)) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: file exists in destination", loc->path);
- ret = -1;
- goto out;
- }
+ goto done;
+ }
- /* as file is linkfile, delete it */
- ret = syncop_unlink (to, loc);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to delete the linkfile (%s)",
- loc->path, strerror (errno));
- goto out;
- }
- }
+ ret = syncop_mknod(to, loc, st_mode_from_ia(buf->ia_prot, buf->ia_type),
+ makedev(ia_major(buf->ia_rdev), ia_minor(buf->ia_rdev)),
+ 0, dict, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: mknod failed", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
- /* Set the gfid of the source file in dict */
- ret = dict_set_static_bin (dict, "gfid-req", buf->ia_gfid, 16);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set gfid in dict for create", loc->path);
- goto out;
- }
+done:
+ ret = syncop_setattr(to, loc, buf,
+ (GF_SET_ATTR_MTIME | GF_SET_ATTR_UID |
+ GF_SET_ATTR_GID | GF_SET_ATTR_MODE),
+ NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to perform setattr on %s", loc->path, to->name);
+ *fop_errno = -ret;
+ }
+
+ ret = syncop_unlink(from, loc, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: unlink failed", loc->path);
+ *fop_errno = -ret;
+ ret = -1;
+ }
- /* Create the file in target */
- if (IA_ISLNK (buf->ia_type)) {
- /* Handle symlinks separately */
- ret = syncop_readlink (from, loc, &link, buf->ia_size);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: readlink on symlink failed (%s)",
- loc->path, strerror (errno));
- goto out;
- }
+out:
+ GF_FREE(link);
+ if (dict)
+ dict_unref(dict);
- ret = syncop_symlink (to, loc, link, dict);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: creating symlink failed (%s)",
- loc->path, strerror (errno));
- goto out;
- }
+ if (rsp_dict)
+ dict_unref(rsp_dict);
- goto done;
- }
+ return ret;
+}
- ret = syncop_mknod (to, loc, st_mode_from_ia (buf->ia_prot,
- buf->ia_type),
- makedev (ia_major (buf->ia_rdev),
- ia_minor (buf->ia_rdev)), dict);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING, "%s: mknod failed (%s)",
- loc->path, strerror (errno));
- goto out;
- }
+static int
+__dht_migration_cleanup_src_file(xlator_t *this, loc_t *loc, fd_t *fd,
+ xlator_t *from, ia_prot_t *src_ia_prot)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ struct iatt new_stbuf = {
+ 0,
+ };
+
+ if (!this || !fd || !from || !src_ia_prot) {
+ goto out;
+ }
+
+ conf = this->private;
+
+ /*Revert source mode and xattr changes*/
+ ret = syncop_fstat(from, fd, &new_stbuf, NULL, NULL);
+ if (ret < 0) {
+ /* Failed to get the stat info */
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file cleanup failed: failed to fstat "
+ "file %s on %s ",
+ loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
-done:
- ret = syncop_unlink (from, loc);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING, "%s: unlink failed (%s)",
- loc->path, strerror (errno));
+ /* Remove the sticky bit and sgid bit set, reset it to 0*/
+ if (!src_ia_prot->sticky)
+ new_stbuf.ia_prot.sticky = 0;
-out:
- if (dict)
- dict_unref (dict);
+ if (!src_ia_prot->sgid)
+ new_stbuf.ia_prot.sgid = 0;
- if (rsp_dict)
- dict_unref (rsp_dict);
+ ret = syncop_fsetattr(from, fd, &new_stbuf,
+ (GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL, NULL,
+ NULL, NULL);
- return ret;
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file cleanup failed:"
+ "%s: failed to perform fsetattr on %s ",
+ loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_fremovexattr(from, fd, conf->link_xattr_name, 0, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s: failed to remove linkto xattr on %s (%s)", loc->path,
+ from->name, strerror(-ret));
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
}
/*
@@ -620,1063 +1469,3234 @@ out:
1 : not a failure, but we can't migrate data as of now
*/
int
-dht_migrate_file (xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
- int flag)
+dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
+ int flag, int *fop_errno)
{
- int ret = -1;
- struct iatt new_stbuf = {0,};
- struct iatt stbuf = {0,};
- struct iatt empty_iatt = {0,};
- ia_prot_t src_ia_prot = {0,};
- fd_t *src_fd = NULL;
- fd_t *dst_fd = NULL;
- dict_t *dict = NULL;
- dict_t *xattr = NULL;
- dict_t *xattr_rsp = NULL;
- int file_has_holes = 0;
-
- gf_log (this->name, GF_LOG_INFO, "%s: attempting to move from %s to %s",
- loc->path, from->name, to->name);
-
- dict = dict_new ();
- if (!dict)
- goto out;
+ int ret = -1;
+ struct iatt new_stbuf = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ struct iatt empty_iatt = {
+ 0,
+ };
+ ia_prot_t src_ia_prot = {
+ 0,
+ };
+ fd_t *src_fd = NULL;
+ fd_t *dst_fd = NULL;
+ dict_t *dict = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xattr_rsp = NULL;
+ int file_has_holes = 0;
+ dht_conf_t *conf = this->private;
+ int rcvd_enoent_from_src = 0;
+ struct gf_flock flock = {
+ 0,
+ };
+ struct gf_flock plock = {
+ 0,
+ };
+ loc_t tmp_loc = {
+ 0,
+ };
+ loc_t parent_loc = {
+ 0,
+ };
+ gf_boolean_t inodelk_locked = _gf_false;
+ gf_boolean_t entrylk_locked = _gf_false;
+ gf_boolean_t p_locked = _gf_false;
+ int lk_ret = -1;
+ gf_defrag_info_t *defrag = NULL;
+ gf_boolean_t clean_src = _gf_false;
+ gf_boolean_t clean_dst = _gf_false;
+ int log_level = GF_LOG_INFO;
+ gf_boolean_t delete_src_linkto = _gf_true;
+ lock_migration_info_t locklist;
+ dict_t *meta_dict = NULL;
+ gf_boolean_t meta_locked = _gf_false;
+ gf_boolean_t target_changed = _gf_false;
+ xlator_t *new_target = NULL;
+ xlator_t *old_target = NULL;
+ xlator_t *hashed_subvol = NULL;
+ fd_t *linkto_fd = NULL;
+ dict_t *xdata = NULL;
+
+ if (from == to) {
+ gf_msg_debug(this->name, 0,
+ "destination and source are same. file %s"
+ " might have migrated already",
+ loc->path);
+ ret = 0;
+ goto out;
+ }
- ret = dict_set_int32 (dict, DHT_LINKFILE_KEY, 256);
+ gf_log(this->name, log_level, "%s: attempting to move from %s to %s",
+ loc->path, from->name, to->name);
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ *fop_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "Could not allocate memory for dict");
+ goto out;
+ }
+ ret = dict_set_int32(dict, conf->link_xattr_name, 256);
+ if (ret) {
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to set 'linkto' key in dict",
+ loc->path);
+ goto out;
+ }
+
+ /* Do not migrate file in case lock migration is not enabled on the
+ * volume*/
+ if (!conf->lock_migration_enabled) {
+ ret = dict_set_int32(dict, GLUSTERFS_POSIXLK_COUNT, sizeof(int32_t));
if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set 'linkto' key in dict", loc->path);
- goto out;
+ *fop_errno = ENOMEM;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: failed to "
+ "set " GLUSTERFS_POSIXLK_COUNT " key in dict",
+ loc->path);
+ goto out;
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "locks will be migrated"
+ " for file: %s",
+ loc->path);
+ }
+
+ /* The file is locked to prevent a rename during a migration. Renames
+ * and migrations on the file at the same time can lead to data loss.
+ */
+
+ ret = dht_build_parent_loc(this, &parent_loc, loc, fop_errno);
+ if (ret < 0) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, *fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to build parent loc, which is needed to "
+ "acquire entrylk to synchronize with renames on this "
+ "path. Skipping migration",
+ loc->path);
+ goto out;
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (hashed_subvol == NULL) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: cannot find hashed subvol which is needed to "
+ "synchronize with renames on this path. "
+ "Skipping migration",
+ loc->path);
+ goto out;
+ }
+
+ flock.l_type = F_WRLCK;
+
+ tmp_loc.inode = inode_ref(loc->inode);
+ gf_uuid_copy(tmp_loc.gfid, loc->gfid);
+ tmp_loc.path = gf_strdup(loc->path);
+
+ /* this inodelk happens with flock.owner being zero. But to synchronize
+ * hardlink migration we need to have different lkowner for each migration
+ * Filed a bug here: https://bugzilla.redhat.com/show_bug.cgi?id=1468202 to
+ * track the fix for this. Currently synclock takes care of synchronizing
+ * hardlink migration. Once this bug is fixed we can avoid taking synclock
+ */
+ ret = syncop_inodelk(from, DHT_FILE_MIGRATE_DOMAIN, &tmp_loc, F_SETLKW,
+ &flock, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, *fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "migrate file failed: "
+ "%s: failed to lock file on %s",
+ loc->path, from->name);
+ goto out;
+ }
+
+ inodelk_locked = _gf_true;
+
+ /* dht_rename has changed to use entrylk on hashed subvol for
+ * synchronization. So, rebalance too has to acquire an entrylk on
+ * hashed subvol.
+ */
+ ret = syncop_entrylk(hashed_subvol, DHT_ENTRY_SYNC_DOMAIN, &parent_loc,
+ loc->name, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, *fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to acquire entrylk on subvol %s", loc->path,
+ hashed_subvol->name);
+ goto out;
+ }
+
+ entrylk_locked = _gf_true;
+
+ /* Phase 1 - Data migration is in progress from now on */
+ ret = syncop_lookup(from, loc, &stbuf, NULL, dict, &xattr_rsp);
+ if (ret) {
+ *fop_errno = -ret;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, *fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: lookup failed on %s",
+ loc->path, from->name);
+ goto out;
+ }
+
+ /* preserve source mode, so set the same to the destination */
+ src_ia_prot = stbuf.ia_prot;
+
+ /* Check if file can be migrated */
+ ret = __is_file_migratable(this, loc, &stbuf, xattr_rsp, flag, defrag, conf,
+ fop_errno);
+ if (ret) {
+ if (ret == HARDLINK_MIG_INPROGRESS)
+ ret = 0;
+ goto out;
+ }
+
+ /* Take care of the special files */
+ if (!IA_ISREG(stbuf.ia_type)) {
+ /* Special files */
+ ret = migrate_special_files(this, from, to, loc, &stbuf, fop_errno);
+ goto out;
+ }
+
+ /* Try to preserve 'holes' while migrating data */
+ if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
+ file_has_holes = 1;
+
+ /* create the destination, with required modes/xattr */
+ ret = __dht_rebalance_create_dst_file(this, to, from, loc, &stbuf, &dst_fd,
+ fop_errno, file_has_holes);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "Create dst failed"
+ " on - %s for file - %s",
+ to->name, loc->path);
+ goto out;
+ }
+
+ clean_dst = _gf_true;
+
+ ret = __dht_check_free_space(this, to, from, loc, &stbuf, flag, conf,
+ &target_changed, &new_target, fop_errno);
+ if (target_changed) {
+ /* Can't handle for hardlinks. Marking this as failure */
+ if (flag == GF_DHT_MIGRATE_HARDLINK_IN_PROGRESS || stbuf.ia_nlink > 1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_INSUFF_SPACE,
+ "Exiting migration for"
+ " file - %s. flag - %d, stbuf.ia_nlink - %d",
+ loc->path, flag, stbuf.ia_nlink);
+ ret = -1;
+ goto out;
}
- /* Phase 1 - Data migration is in progress from now on */
- ret = syncop_lookup (from, loc, dict, &stbuf, &xattr_rsp, NULL);
+ ret = syncop_ftruncate(to, dst_fd, 0, NULL, NULL, NULL, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s: lookup failed on %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s: failed to perform truncate on %s (%s)", loc->path,
+ to->name, strerror(-ret));
}
- /* we no more require this key */
- dict_del (dict, DHT_LINKFILE_KEY);
+ syncop_close(dst_fd);
+ dst_fd = NULL;
- /* preserve source mode, so set the same to the destination */
- src_ia_prot = stbuf.ia_prot;
+ old_target = to;
+ to = new_target;
- /* Check if file can be migrated */
- ret = __is_file_migratable (this, loc, &stbuf, xattr_rsp, flag);
- if (ret)
- goto out;
+ clean_dst = _gf_false;
- /* Take care of the special files */
- if (!IA_ISREG (stbuf.ia_type)) {
- /* Special files */
- ret = migrate_special_files (this, from, to, loc, &stbuf);
- goto out;
+ /* if the file migration is successful to this new target, then
+ * update the xattr on the old destination to point the new
+ * destination. We need to do update this only post migration
+ * as in case of failure the linkto needs to point to the source
+ * subvol */
+ ret = __dht_rebalance_create_dst_file(
+ this, to, from, loc, &stbuf, &dst_fd, fop_errno, file_has_holes);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Create dst failed"
+ " on - %s for file - %s",
+ to->name, loc->path);
+ goto out;
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "destination for file "
+ "- %s is changed to - %s",
+ loc->path, to->name);
+ clean_dst = _gf_true;
}
+ }
+
+ if (ret) {
+ goto out;
+ }
+
+ /* Open the source, and also update mode/xattr */
+ ret = __dht_rebalance_open_src_file(this, from, to, loc, &stbuf, &src_fd,
+ &clean_src, fop_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: failed to open %s on %s", loc->path,
+ from->name);
+ goto out;
+ }
+
+ /* TODO: move all xattr related operations to fd based operations */
+ ret = syncop_listxattr(from, loc, &xattr, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, *fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to get xattr from %s",
+ loc->path, from->name);
+ ret = -1;
+ goto out;
+ }
+
+ /* Copying posix acls to the linkto file messes up the permissions*/
+ dht_strip_out_acls(xattr);
+
+ /* Remove the linkto xattr as we don't want to overwrite the value
+ * set on the dst.
+ */
+ dict_del(xattr, conf->link_xattr_name);
+
+ /* We need to error out if this fails as having the wrong shard xattrs
+ * set on the dst could cause data corruption
+ */
+ ret = syncop_fsetxattr(to, dst_fd, xattr, 0, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to set xattr on %s", loc->path, to->name);
+ ret = -1;
+ goto out;
+ }
- /* create the destination, with required modes/xattr */
- ret = __dht_rebalance_create_dst_file (to, from, loc, &stbuf,
- dict, &dst_fd);
- if (ret)
- goto out;
+ if (xattr_rsp) {
+ /* we no more require this key */
+ dict_del(dict, conf->link_xattr_name);
+ dict_unref(xattr_rsp);
+ }
+
+ ret = syncop_fstat(from, src_fd, &stbuf, dict, &xattr_rsp);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:failed to lookup %s on %s ", loc->path,
+ from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* Check again if file has hardlink */
+ ret = __check_file_has_hardlink(this, loc, &stbuf, xattr_rsp, flag, defrag,
+ conf, fop_errno);
+ if (ret) {
+ if (ret == HARDLINK_MIG_INPROGRESS)
+ ret = 0;
+ goto out;
+ }
+
+ ret = __dht_rebalance_migrate_data(this, defrag, from, to, src_fd, dst_fd,
+ stbuf.ia_size, file_has_holes,
+ fop_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s: failed to migrate data", loc->path);
- ret = __dht_check_free_space (to, from, loc, &stbuf, flag);
- if (ret) {
- goto out;
+ ret = -1;
+ goto out;
+ }
+
+ /* TODO: Sync the locks */
+
+ xdata = dict_new();
+ if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s: failed to set last-fsync flag on "
+ "%s (%s)",
+ loc->path, to->name, strerror(ENOMEM));
+ }
+
+ ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, xdata, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)",
+ loc->path, to->name, strerror(-ret));
+ *fop_errno = -ret;
+ }
+
+ /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */
+
+ ret = syncop_fstat(from, src_fd, &new_stbuf, NULL, NULL);
+ if (ret < 0) {
+ /* Failed to get the stat info */
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: failed to fstat file %s on %s ", loc->path,
+ from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ /* Lock the entire source file to prevent clients from taking a
+ lock on it as dht_lk does not handle file migration.
+
+ This still leaves a small window where conflicting locks can
+ be granted to different clients. If client1 requests a blocking
+ lock on the src file, it will be granted after the migrating
+ process releases its lock. If client2 requests a lock on the dst
+ data file, it will also be granted, but all FOPs will be redirected
+ to the dst data file.
+ */
+
+ /* Take meta lock */
+
+ if (conf->lock_migration_enabled) {
+ meta_dict = dict_new();
+ if (!meta_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "dict_new failed");
+
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- /* Open the source, and also update mode/xattr */
- ret = __dht_rebalance_open_src_file (from, to, loc, &stbuf, &src_fd);
+ ret = dict_set_str(meta_dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "failed to open %s on %s",
- loc->path, from->name);
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s,"
+ " path = %s",
+ GLUSTERFS_INTERNAL_FOP_KEY, loc->path);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- ret = syncop_fstat (from, src_fd, &stbuf);
+ ret = dict_set_int32(meta_dict, GF_META_LOCK_KEY, 1);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "failed to lookup %s on %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace dict_set failed");
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- /* Try to preserve 'holes' while migrating data */
- if (stbuf.ia_size > (stbuf.ia_blocks * GF_DISK_SECTOR_SIZE))
- file_has_holes = 1;
-
- /* All I/O happens in this function */
- ret = __dht_rebalance_migrate_data (from, to, src_fd, dst_fd,
- stbuf.ia_size, file_has_holes);
+ ret = syncop_setxattr(from, loc, meta_dict, 0, NULL, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s: failed to migrate data",
- loc->path);
- /* reset the destination back to 0 */
- ret = syncop_ftruncate (to, dst_fd, 0);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to reset target size back to 0 (%s)",
- loc->path, strerror (errno));
- }
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace syncop_setxattr metalock failed");
- ret = -1;
- goto out;
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ } else {
+ meta_locked = _gf_true;
}
+ }
- /* TODO: move all xattr related operations to fd based operations */
- ret = syncop_listxattr (from, loc, &xattr);
- if (ret == -1)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to get xattr from %s (%s)",
- loc->path, from->name, strerror (errno));
+ if (!conf->lock_migration_enabled) {
+ plock.l_type = F_WRLCK;
+ plock.l_start = 0;
+ plock.l_len = 0;
+ plock.l_whence = SEEK_SET;
- ret = syncop_setxattr (to, loc, xattr, 0);
- if (ret == -1)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set xattr on %s (%s)",
- loc->path, to->name, strerror (errno));
+ ret = syncop_lk(from, src_fd, F_SETLK, &plock, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: Failed to lock on %s",
+ loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ }
- /* TODO: Sync the locks */
+ p_locked = _gf_true;
- ret = syncop_fsync (to, dst_fd);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to fsync on %s (%s)",
- loc->path, to->name, strerror (errno));
+ } else {
+ INIT_LIST_HEAD(&locklist.list);
+ ret = syncop_getactivelk(from, loc, &locklist, NULL, NULL);
+ if (ret == 0) {
+ gf_log(this->name, GF_LOG_INFO, "No active locks on:%s", loc->path);
- /* Phase 2 - Data-Migration Complete, Housekeeping updates pending */
+ } else if (ret > 0) {
+ ret = syncop_setactivelk(to, loc, &locklist, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_LOCK_MIGRATION_FAILED, "write lock failed on:%s",
+ loc->path);
- ret = syncop_fstat (from, src_fd, &new_stbuf);
- if (ret < 0) {
- /* Failed to get the stat info */
- gf_log (this->name, GF_LOG_ERROR,
- "failed to fstat file %s on %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
+ *fop_errno = -ret;
+ ret = -1;
+ goto metaunlock;
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_LOCK_MIGRATION_FAILED,
+ "getactivelk failed for file: %s", loc->path);
+ *fop_errno = -ret;
}
-
- /* source would have both sticky bit and sgid bit set, reset it to 0,
- and set the source permission on destination, if it was not set
- prior to setting rebalance-modes in source */
- if (!src_ia_prot.sticky)
- new_stbuf.ia_prot.sticky = 0;
-
- if (!src_ia_prot.sgid)
- new_stbuf.ia_prot.sgid = 0;
-
- /* TODO: if the source actually had sticky bit, or sgid bit set,
- we are not handling it */
-
- ret = syncop_fsetattr (to, dst_fd, &new_stbuf,
- (GF_SET_ATTR_UID | GF_SET_ATTR_GID |
- GF_SET_ATTR_MODE), NULL, NULL);
+ }
+
+ /* source would have both sticky bit and sgid bit set, reset it to 0,
+ and set the source permission on destination, if it was not set
+ prior to setting rebalance-modes in source */
+ if (!src_ia_prot.sticky)
+ new_stbuf.ia_prot.sticky = 0;
+
+ if (!src_ia_prot.sgid)
+ new_stbuf.ia_prot.sgid = 0;
+
+ /* TODO: if the source actually had sticky bit, or sgid bit set,
+ we are not handling it */
+
+ ret = syncop_fsetattr(
+ to, dst_fd, &new_stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL, NULL,
+ NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to perform setattr on %s ",
+ loc->path, to->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto metaunlock;
+ }
+
+ /* Because 'futimes' is not portable */
+ ret = syncop_setattr(to, loc, &new_stbuf,
+ (GF_SET_ATTR_MTIME | GF_SET_ATTR_ATIME), NULL, NULL,
+ NULL, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s: failed to perform setattr on %s ", loc->path, to->name);
+ *fop_errno = -ret;
+ }
+
+ if (target_changed) {
+ dict_del(dict, GLUSTERFS_POSIXLK_COUNT);
+ ret = dict_set_str(dict, conf->link_xattr_name, to->name);
if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform setattr on %s (%s)",
- loc->path, to->name, strerror (errno));
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set xattr in dict for %s (linkto:%s)", loc->path,
+ to->name);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_setxattr(old_target, loc, dict, 0, NULL, NULL);
+ if (ret && -ret != ESTALE && -ret != ENOENT) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to set xattr on %s in %s", loc->path,
+ old_target->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
+ } else if (-ret == ESTALE || -ret == ENOENT) {
+ /* The failure ESTALE indicates that the linkto
+ * file on the hashed subvol might have been deleted.
+ * In this case will create a linkto file with new target
+ * as linkto xattr value*/
+ linkto_fd = fd_create(loc->inode, DHT_REBALANCE_PID);
+ if (!linkto_fd) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ DHT_MSG_MIGRATE_FILE_FAILED, "%s: fd create failed",
+ loc->path);
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+ ret = syncop_create(old_target, loc, O_RDWR, DHT_LINKFILE_MODE,
+ linkto_fd, NULL, dict, NULL);
+ if (ret != 0 && -ret != EEXIST && -ret != ESTALE) {
+ *fop_errno = -ret;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "failed to create linkto file on %s in %s", loc->path,
+ old_target->name);
goto out;
+ } else if (ret == 0) {
+ ret = syncop_fsetattr(old_target, linkto_fd, &stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL,
+ NULL, NULL, NULL);
+ if (ret < 0) {
+ *fop_errno = -ret;
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "chown failed for %s on %s", loc->path,
+ old_target->name);
+ }
+ }
+ }
+ }
+
+ clean_dst = _gf_false;
+
+ /* Posix acls are not set on DHT linkto files as part of the initial
+ * initial xattrs set on the dst file, so these need
+ * to be set on the dst file after the linkto attrs are removed.
+ * TODO: Optimize this.
+ */
+ if (xattr) {
+ dict_unref(xattr);
+ xattr = NULL;
+ }
+
+ /* Set only the Posix ACLs this time */
+ ret = syncop_getxattr(from, loc, &xattr, POSIX_ACL_ACCESS_XATTR, NULL,
+ NULL);
+ if (ret < 0) {
+ if ((-ret != ENODATA) && (-ret != ENOATTR)) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to get xattr from %s",
+ loc->path, from->name);
+ *fop_errno = -ret;
+ }
+ } else {
+ ret = syncop_setxattr(to, loc, xattr, 0, NULL, NULL);
+ if (ret < 0) {
+ /* Potential problem here where Posix ACLs will
+ * not be set on the target file */
+
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to set xattr on %s",
+ loc->path, to->name);
+ *fop_errno = -ret;
+ }
+ }
+
+ /* The src file is being unlinked after this so we don't need
+ to clean it up */
+ clean_src = _gf_false;
+
+ /* Make the source as a linkfile first before deleting it */
+ empty_iatt.ia_prot.sticky = 1;
+ ret = syncop_fsetattr(from, src_fd, &empty_iatt, GF_SET_ATTR_MODE, NULL,
+ NULL, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed:"
+ "%s: failed to perform setattr on %s ",
+ loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto metaunlock;
+ }
+
+ /* Free up the data blocks on the source node, as the whole
+ file is migrated */
+ ret = syncop_ftruncate(from, src_fd, 0, NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s: failed to perform truncate on %s (%s)", loc->path,
+ from->name, strerror(-ret));
+ *fop_errno = -ret;
+ }
+
+ /* remove the 'linkto' xattr from the destination */
+ ret = syncop_fremovexattr(to, dst_fd, conf->link_xattr_name, 0, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s: failed to perform removexattr on %s (%s)", loc->path,
+ to->name, strerror(-ret));
+ *fop_errno = -ret;
+ }
+
+ /* Do a stat and check the gfid before unlink */
+
+ /*
+ * Cached file changes its state from non-linkto to linkto file after
+ * migrating data. If lookup from any other mount-point is performed,
+ * converted-linkto-cached file will be treated as a stale and will be
+ * unlinked. But by this time, file is already migrated. So further
+ * failure because of ENOENT should not be treated as error
+ */
+
+ ret = syncop_stat(from, loc, &empty_iatt, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to do a stat on %s", loc->path, from->name);
+
+ if (-ret != ENOENT) {
+ *fop_errno = -ret;
+ ret = -1;
+ goto metaunlock;
}
- /* Because 'futimes' is not portable */
- ret = syncop_setattr (to, loc, &new_stbuf,
- (GF_SET_ATTR_MTIME | GF_SET_ATTR_ATIME),
- NULL, NULL);
+ rcvd_enoent_from_src = 1;
+ }
+
+ if ((gf_uuid_compare(empty_iatt.ia_gfid, loc->gfid) == 0) &&
+ (!rcvd_enoent_from_src) && delete_src_linkto) {
+ /* take out the source from namespace */
+ ret = syncop_unlink(from, loc, NULL, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform setattr on %s (%s)",
- loc->path, to->name, strerror (errno));
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to perform unlink on %s", loc->path, from->name);
+ *fop_errno = -ret;
+ ret = -1;
+ goto metaunlock;
}
+ }
+
+ ret = syncop_lookup(this, loc, NULL, NULL, NULL, NULL);
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "%s: failed to lookup the file on subvolumes", loc->path);
+ *fop_errno = -ret;
+ }
+
+ gf_msg(this->name, log_level, 0, DHT_MSG_MIGRATE_FILE_COMPLETE,
+ "completed migration of %s from subvolume %s to %s", loc->path,
+ from->name, to->name);
- /* Make the source as a linkfile first before deleting it */
- empty_iatt.ia_prot.sticky = 1;
- ret = syncop_fsetattr (from, src_fd, &empty_iatt,
- GF_SET_ATTR_MODE, NULL, NULL);
+ ret = 0;
+
+metaunlock:
+
+ if (conf->lock_migration_enabled && meta_locked) {
+ dict_del(meta_dict, GF_META_LOCK_KEY);
+
+ ret = dict_set_int32(meta_dict, GF_META_UNLOCK_KEY, 1);
if (ret) {
- gf_log (this->name, GF_LOG_WARNING, \
- "%s: failed to perform setattr on %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace dict_set failed");
+
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- /* Do a stat and check the gfid before unlink */
- ret = syncop_stat (from, loc, &empty_iatt);
+ if (clean_dst == _gf_false)
+ ret = dict_set_int32(meta_dict, "status", 1);
+ else
+ ret = dict_set_int32(meta_dict, "status", 0);
+
if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to do a stat on %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
- }
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace dict_set failed");
- if (uuid_compare (empty_iatt.ia_gfid, loc->gfid) == 0) {
- /* take out the source from namespace */
- ret = syncop_unlink (from, loc);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform unlink on %s (%s)",
- loc->path, from->name, strerror (errno));
- goto out;
- }
+ *fop_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- /* Free up the data blocks on the source node, as the whole
- file is migrated */
- ret = syncop_ftruncate (from, src_fd, 0);
+ ret = syncop_setxattr(from, loc, meta_dict, 0, NULL, NULL);
if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform truncate on %s (%s)",
- loc->path, from->name, strerror (errno));
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Trace syncop_setxattr meta unlock failed");
+
+ *fop_errno = -ret;
+ ret = -1;
+ goto out;
}
+ }
- /* remove the 'linkto' xattr from the destination */
- ret = syncop_fremovexattr (to, dst_fd, DHT_LINKFILE_KEY);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to perform removexattr on %s (%s)",
- loc->path, to->name, strerror (errno));
+out:
+ if (clean_src) {
+ /* Revert source mode and xattr changes*/
+ lk_ret = __dht_migration_cleanup_src_file(this, loc, src_fd, from,
+ &src_ia_prot);
+ if (lk_ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to cleanup source file on %s", loc->path,
+ from->name);
}
+ }
+
+ /* reset the destination back to 0 */
+ if (clean_dst) {
+ lk_ret = syncop_ftruncate(to, dst_fd, 0, NULL, NULL, NULL, NULL);
+ if (lk_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -lk_ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: "
+ "%s: failed to reset target size back to 0",
+ loc->path);
+ }
+ }
- ret = syncop_lookup (this, loc, NULL, NULL, NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: failed to lookup the file on subvolumes (%s)",
- loc->path, strerror (errno));
+ if (inodelk_locked) {
+ flock.l_type = F_UNLCK;
+
+ lk_ret = syncop_inodelk(from, DHT_FILE_MIGRATE_DOMAIN, &tmp_loc,
+ F_SETLK, &flock, NULL, NULL);
+ if (lk_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to unlock file on %s", loc->path, from->name);
+ }
+ }
+
+ if (entrylk_locked) {
+ lk_ret = syncop_entrylk(hashed_subvol, DHT_ENTRY_SYNC_DOMAIN,
+ &parent_loc, loc->name, ENTRYLK_UNLOCK,
+ ENTRYLK_UNLOCK, NULL, NULL);
+ if (lk_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to unlock entrylk on %s", loc->path,
+ hashed_subvol->name);
}
+ }
- gf_log (this->name, GF_LOG_INFO,
- "completed migration of %s from subvolume %s to %s",
- loc->path, from->name, to->name);
+ if (p_locked) {
+ plock.l_type = F_UNLCK;
+ lk_ret = syncop_lk(from, src_fd, F_SETLK, &plock, NULL, NULL);
- ret = 0;
-out:
- if (dict)
- dict_unref (dict);
+ if (lk_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "%s: failed to unlock file on %s", loc->path, from->name);
+ }
+ }
- if (xattr)
- dict_unref (xattr);
- if (xattr_rsp)
- dict_unref (xattr_rsp);
+ lk_ret = syncop_removexattr(to, loc, GF_PROTECT_FROM_EXTERNAL_WRITES, NULL,
+ NULL);
+ if (lk_ret && (lk_ret != -ENODATA) && (lk_ret != -ENOATTR)) {
+ gf_msg(this->name, GF_LOG_WARNING, -lk_ret, 0,
+ "%s: removexattr failed key %s", loc->path,
+ GF_PROTECT_FROM_EXTERNAL_WRITES);
+ }
- if (dst_fd)
- syncop_close (dst_fd);
- if (src_fd)
- syncop_close (src_fd);
+ if (dict)
+ dict_unref(dict);
- return ret;
-}
+ if (xattr)
+ dict_unref(xattr);
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
-static int
-rebalance_task (void *data)
-{
- int ret = -1;
- dht_local_t *local = NULL;
- call_frame_t *frame = NULL;
+ if (dst_fd)
+ syncop_close(dst_fd);
- frame = data;
+ if (src_fd)
+ syncop_close(src_fd);
+ if (linkto_fd)
+ syncop_close(linkto_fd);
- local = frame->local;
+ if (xdata)
+ dict_unref(xdata);
- /* This function is 'synchrounous', hence if it returns,
- we are done with the task */
- ret = dht_migrate_file (THIS, &local->loc, local->rebalance.from_subvol,
- local->rebalance.target_node, local->flags);
+ loc_wipe(&tmp_loc);
+ loc_wipe(&parent_loc);
- return ret;
+ return ret;
}
static int
-rebalance_task_completion (int op_ret, call_frame_t *sync_frame, void *data)
+rebalance_task(void *data)
{
- int ret = -1;
- uint64_t layout_int = 0;
- dht_layout_t *layout = 0;
- xlator_t *this = NULL;
- dht_local_t *local = NULL;
- int32_t op_errno = EINVAL;
-
- this = THIS;
- local = sync_frame->local;
-
- if (!op_ret) {
- /* Make sure we have valid 'layout' in inode ctx
- after the operation */
- ret = inode_ctx_del (local->loc.inode, this, &layout_int);
- if (!ret && layout_int) {
- layout = (dht_layout_t *)(long)layout_int;
- dht_layout_unref (this, layout);
- }
+ int ret = -1;
+ dht_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ int fop_errno = 0;
- ret = dht_layout_preset (this, local->rebalance.target_node,
- local->loc.inode);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set inode ctx", local->loc.path);
- }
+ frame = data;
- if (op_ret == -1) {
- /* Failure of migration process, mostly due to write process.
- as we can't preserve the exact errno, lets say there was
- no space to migrate-data
- */
- op_errno = ENOSPC;
- }
+ local = frame->local;
- if (op_ret == 1) {
- /* migration didn't happen, but is not a failure, let the user
- understand that he doesn't have permission to migrate the
- file.
- */
- op_ret = -1;
- op_errno = EPERM;
- }
+ /* This function is 'synchrounous', hence if it returns,
+ we are done with the task */
+ ret = dht_migrate_file(THIS, &local->loc, local->rebalance.from_subvol,
+ local->rebalance.target_node, local->flags,
+ &fop_errno);
- DHT_STACK_UNWIND (setxattr, sync_frame, op_ret, op_errno, NULL);
- return 0;
+ return ret;
+}
+
+static int
+rebalance_task_completion(int op_ret, call_frame_t *sync_frame, void *data)
+{
+ int32_t op_errno = EINVAL;
+
+ if (op_ret == -1) {
+ /* Failure of migration process, mostly due to write process.
+ as we can't preserve the exact errno, lets say there was
+ no space to migrate-data
+ */
+ op_errno = ENOSPC;
+ } else if (op_ret == 1) {
+ /* migration didn't happen, but is not a failure, let the user
+ understand that he doesn't have permission to migrate the
+ file.
+ */
+ op_ret = -1;
+ op_errno = EPERM;
+ } else if (op_ret != 0) {
+ op_errno = -op_ret;
+ op_ret = -1;
+ }
+
+ DHT_STACK_UNWIND(setxattr, sync_frame, op_ret, op_errno, NULL);
+ return 0;
}
int
-dht_start_rebalance_task (xlator_t *this, call_frame_t *frame)
+dht_start_rebalance_task(xlator_t *this, call_frame_t *frame)
{
- int ret = -1;
+ int ret = -1;
- ret = synctask_new (this->ctx->env, rebalance_task,
- rebalance_task_completion,
- frame, frame);
- return ret;
+ ret = synctask_new(this->ctx->env, rebalance_task,
+ rebalance_task_completion, frame, frame);
+ return ret;
}
int
-gf_listener_stop (void)
+gf_listener_stop(xlator_t *this)
{
- glusterfs_ctx_t *ctx = NULL;
- cmd_args_t *cmd_args = NULL;
- int ret = 0;
- xlator_t *this = NULL;
-
- ctx = glusterfs_ctx_get ();
- GF_ASSERT (ctx);
- cmd_args = &ctx->cmd_args;
- if (cmd_args->sock_file) {
- ret = unlink (cmd_args->sock_file);
- if (ret && (ENOENT == errno)) {
- ret = 0;
- }
+ glusterfs_ctx_t *ctx = NULL;
+ cmd_args_t *cmd_args = NULL;
+ int ret = 0;
+
+ ctx = this->ctx;
+ GF_ASSERT(ctx);
+ cmd_args = &ctx->cmd_args;
+ if (cmd_args->sock_file) {
+ ret = sys_unlink(cmd_args->sock_file);
+ if (ret && (ENOENT == errno)) {
+ ret = 0;
}
-
- if (ret) {
- this = THIS;
- gf_log (this->name, GF_LOG_ERROR, "Failed to unlink listener "
- "socket %s, error: %s", cmd_args->sock_file,
- strerror (errno));
- }
- return ret;
+ }
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, DHT_MSG_SOCKET_ERROR,
+ "Failed to unlink listener "
+ "socket %s",
+ cmd_args->sock_file);
+ }
+ return ret;
}
void
-dht_build_root_inode (xlator_t *this, inode_t **inode)
+dht_build_root_inode(xlator_t *this, inode_t **inode)
{
- inode_table_t *itable = NULL;
- uuid_t root_gfid = {0, };
+ inode_table_t *itable = NULL;
+ static uuid_t root_gfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
- itable = inode_table_new (0, this);
- if (!itable)
- return;
+ itable = inode_table_new(0, this);
+ if (!itable)
+ return;
- root_gfid[15] = 1;
- *inode = inode_find (itable, root_gfid);
+ *inode = inode_find(itable, root_gfid);
}
void
-dht_build_root_loc (inode_t *inode, loc_t *loc)
+dht_build_root_loc(inode_t *inode, loc_t *loc)
{
- loc->path = "/";
- loc->inode = inode;
- loc->inode->ia_type = IA_IFDIR;
- memset (loc->gfid, 0, 16);
- loc->gfid[15] = 1;
+ loc->path = "/";
+ loc->inode = inode;
+ loc->inode->ia_type = IA_IFDIR;
+ memset(loc->gfid, 0, 16);
+ loc->gfid[15] = 1;
}
-
/* return values: 1 -> error, bug ignore and continue
0 -> proceed
-1 -> error, handle it */
int32_t
-gf_defrag_handle_migrate_error (int32_t op_errno, gf_defrag_info_t *defrag)
+gf_defrag_handle_migrate_error(int32_t op_errno, gf_defrag_info_t *defrag)
{
- /* if errno is not ENOSPC or ENOTCONN, we can still continue
- with rebalance process */
- if ((errno != ENOSPC) || (errno != ENOTCONN))
- return 1;
-
- if (errno == ENOTCONN) {
- /* Most probably mount point went missing (mostly due
- to a brick down), say rebalance failure to user,
- let him restart it if everything is fine */
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- return -1;
+ int ret = 0;
+ /* if errno is not ENOTCONN, we can still continue
+ with rebalance process */
+ if (op_errno != ENOTCONN) {
+ ret = 1;
+ goto out;
+ }
+
+ if (op_errno == ENOTCONN) {
+ /* Most probably mount point went missing (mostly due
+ to a brick down), say rebalance failure to user,
+ let him restart it if everything is fine */
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ ret = -1;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static gf_boolean_t
+gf_defrag_pattern_match(gf_defrag_info_t *defrag, char *name, uint64_t size)
+{
+ gf_defrag_pattern_list_t *trav = NULL;
+ gf_boolean_t match = _gf_false;
+ gf_boolean_t ret = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("dht", defrag, out);
+
+ trav = defrag->defrag_pattern;
+ while (trav) {
+ if (!fnmatch(trav->path_pattern, name, FNM_NOESCAPE)) {
+ match = _gf_true;
+ break;
}
+ trav = trav->next;
+ }
- if (errno == ENOSPC) {
- /* rebalance process itself failed, may be
- remote brick went down, or write failed due to
- disk full etc etc.. */
- defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
- return -1;
+ if ((match == _gf_true) && (size >= trav->size))
+ ret = _gf_true;
+
+out:
+ return ret;
+}
+
+int
+dht_dfreaddirp_done(dht_dfoffset_ctx_t *offset_var, int cnt)
+{
+ int i;
+ int result = 1;
+
+ for (i = 0; i < cnt; i++) {
+ if (offset_var[i].readdir_done == 0) {
+ result = 0;
+ break;
}
+ }
+ return result;
+}
- return 0;
+int static gf_defrag_ctx_subvols_init(dht_dfoffset_ctx_t *offset_var,
+ xlator_t *this)
+{
+ int i;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf)
+ return -1;
+
+ for (i = 0; i < conf->local_subvols_cnt; i++) {
+ offset_var[i].this = conf->local_subvols[i];
+ offset_var[i].offset = (off_t)0;
+ offset_var[i].readdir_done = 0;
+ }
+
+ return 0;
+}
+
+static int
+dht_get_first_non_null_index(subvol_nodeuuids_info_t *entry)
+{
+ int i = 0;
+ int index = 0;
+
+ for (i = 0; i < entry->count; i++) {
+ if (!gf_uuid_is_null(entry->elements[i].uuid)) {
+ index = i;
+ goto out;
+ }
+ }
+
+ if (i == entry->count) {
+ index = -1;
+ }
+out:
+ return index;
}
-/* We do a depth first traversal of directories. But before we move into
- * subdirs, we complete the data migration of those directories whose layouts
- * have been fixed
+/* Return value
+ * 0 : this node does not migrate the file
+ * 1 : this node migrates the file
+ *
+ * Use the hash value of the gfid to determine which node will migrate files.
+ * Using the gfid instead of the name also ensures that the same node handles
+ * all hardlinks.
*/
-int
-gf_defrag_migrate_data (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
- dict_t *migrate_data)
+gf_boolean_t
+gf_defrag_should_i_migrate(xlator_t *this, int local_subvol_index, uuid_t gfid)
{
- int ret = -1;
- loc_t entry_loc = {0,};
- fd_t *fd = NULL;
- gf_dirent_t entries;
- gf_dirent_t *tmp = NULL;
- gf_dirent_t *entry = NULL;
- gf_boolean_t free_entries = _gf_false;
- off_t offset = 0;
- dict_t *dict = NULL;
- struct iatt iatt = {0,};
- int32_t op_errno = 0;
- char *uuid_str = NULL;
- uuid_t node_uuid = {0,};
- int readdir_operrno = 0;
- struct timeval dir_start = {0,};
- struct timeval end = {0,};
- double elapsed = {0,};
- struct timeval start = {0,};
-
- gf_log (this->name, GF_LOG_INFO, "migrate data called on %s",
- loc->path);
- gettimeofday (&dir_start, NULL);
-
- fd = fd_create (loc->inode, defrag->pid);
- if (!fd) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
- goto out;
+ gf_boolean_t ret = _gf_false;
+ int i = local_subvol_index;
+ char *str = NULL;
+ uint32_t hashval = 0;
+ int32_t index = 0;
+ dht_conf_t *conf = NULL;
+ char buf[UUID_CANONICAL_FORM_LEN + 1] = {
+ 0,
+ };
+ subvol_nodeuuids_info_t *entry = NULL;
+
+ conf = this->private;
+
+ /* Pure distribute. A subvol in this case
+ will be handled by only one node */
+
+ entry = &(conf->local_nodeuuids[i]);
+ if (entry->count == 1) {
+ return 1;
+ }
+
+ str = uuid_utoa_r(gfid, buf);
+ if (dht_hash_compute(this, 0, str, &hashval) == 0) {
+ index = (hashval % entry->count);
+ if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
+ /* Index matches this node's nodeuuid.*/
+ ret = _gf_true;
+ goto out;
}
- ret = syncop_opendir (this, loc, fd);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
- loc->path);
+ /* Brick down - some other node has to migrate these files*/
+ if (gf_uuid_is_null(entry->elements[index].uuid)) {
+ /* Fall back to the first non-null index */
+ index = dht_get_first_non_null_index(entry);
+
+ if (index == -1) {
+ /* None of the bricks in the subvol are up.
+ * CHILD_DOWN will kill the process soon */
+
+ return _gf_false;
+ }
+
+ if (entry->elements[index].info == REBAL_NODEUUID_MINE) {
+ /* Index matches this node's nodeuuid.*/
+ ret = _gf_true;
goto out;
+ }
}
+ }
+out:
+ return ret;
+}
- INIT_LIST_HEAD (&entries.list);
-
- while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
- &entries)) != 0) {
- if (ret < 0)
- break;
+int
+gf_defrag_migrate_single_file(void *opaque)
+{
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ int ret = 0;
+ gf_dirent_t *entry = NULL;
+ struct timeval start = {
+ 0,
+ };
+ loc_t entry_loc = {
+ 0,
+ };
+ loc_t *loc = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ dict_t *migrate_data = NULL;
+ struct timeval end = {
+ 0,
+ };
+ double elapsed = {
+ 0,
+ };
+ struct dht_container *rebal_entry = NULL;
+ inode_t *inode = NULL;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ call_frame_t *statfs_frame = NULL;
+ xlator_t *old_THIS = NULL;
+ data_t *tmp = NULL;
+ int fop_errno = 0;
+ gf_dht_migrate_data_type_t rebal_type = GF_DHT_MIGRATE_DATA;
+ char value[MAX_REBAL_TYPE_SIZE] = {
+ 0,
+ };
+ struct iatt *iatt_ptr = NULL;
+ gf_boolean_t update_skippedcount = _gf_true;
+ int i = 0;
+ gf_boolean_t should_i_migrate = 0;
+
+ rebal_entry = (struct dht_container *)opaque;
+ if (!rebal_entry) {
+ gf_log("DHT", GF_LOG_ERROR, "rebal_entry is NULL");
+ ret = -1;
+ goto out;
+ }
- /* Need to keep track of ENOENT errno, that means, there is no
- need to send more readdirp() */
- readdir_operrno = errno;
+ this = rebal_entry->this;
- free_entries = _gf_true;
+ conf = this->private;
- if (list_empty (&entries.list))
- break;
- list_for_each_entry_safe (entry, tmp, &entries.list, list) {
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
- ret = 1;
- goto out;
- }
+ defrag = conf->defrag;
- offset = entry->d_off;
+ loc = rebal_entry->parent_loc;
- if (!strcmp (entry->d_name, ".") ||
- !strcmp (entry->d_name, ".."))
- continue;
+ migrate_data = rebal_entry->migrate_data;
- if (IA_ISDIR (entry->d_stat.ia_type))
- continue;
+ entry = rebal_entry->df_entry;
+ iatt_ptr = &entry->d_stat;
- defrag->num_files_lookedup++;
- if (defrag->stats == _gf_true) {
- gettimeofday (&start, NULL);
- }
- loc_wipe (&entry_loc);
- ret =dht_build_child_loc (this, &entry_loc, loc,
- entry->d_name);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Child loc"
- " build failed");
- goto out;
- }
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = -1;
+ goto out;
+ }
- if (uuid_is_null (entry->d_stat.ia_gfid)) {
- gf_log (this->name, GF_LOG_ERROR, "%s/%s"
- " gfid not present", loc->path,
- entry->d_name);
- continue;
- }
+ if (defrag->stats == _gf_true) {
+ gettimeofday(&start, NULL);
+ }
- uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
+ if (defrag->defrag_pattern &&
+ (gf_defrag_pattern_match(defrag, entry->d_name,
+ entry->d_stat.ia_size) == _gf_false)) {
+ gf_log(this->name, GF_LOG_ERROR, "pattern_match failed");
+ goto out;
+ }
- if (uuid_is_null (loc->gfid)) {
- gf_log (this->name, GF_LOG_ERROR, "%s/%s"
- " gfid not present", loc->path,
- entry->d_name);
- continue;
- }
+ memset(&entry_loc, 0, sizeof(entry_loc));
- uuid_copy (entry_loc.pargfid, loc->gfid);
+ ret = dht_build_child_loc(this, &entry_loc, loc, entry->d_name);
+ if (ret) {
+ LOCK(&defrag->lock);
+ {
+ defrag->total_failures += 1;
+ }
+ UNLOCK(&defrag->lock);
- entry_loc.inode->ia_type = entry->d_stat.ia_type;
+ ret = 0;
- ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
- NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s"
- " lookup failed", entry_loc.path);
- continue;
- }
+ gf_log(this->name, GF_LOG_ERROR, "Child loc build failed");
- ret = syncop_getxattr (this, &entry_loc, &dict,
- GF_XATTR_NODE_UUID_KEY);
- if(ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to "
- "get node-uuid for %s", entry_loc.path);
- continue;
- }
+ goto out;
+ }
- ret = dict_get_str (dict, GF_XATTR_NODE_UUID_KEY,
- &uuid_str);
- if(ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to "
- "get node-uuid from dict for %s",
- entry_loc.path);
- continue;
- }
+ should_i_migrate = gf_defrag_should_i_migrate(
+ this, rebal_entry->local_subvol_index, entry->d_stat.ia_gfid);
- if (uuid_parse (uuid_str, node_uuid)) {
- gf_log (this->name, GF_LOG_ERROR, "uuid_parse "
- "failed for %s", entry_loc.path);
- continue;
- }
+ gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
- /* if file belongs to different node, skip migration
- * the other node will take responsibility of migration
- */
- if (uuid_compare (node_uuid, defrag->node_uuid)) {
- gf_log (this->name, GF_LOG_TRACE, "%s does not"
- "belong to this node", entry_loc.path);
- continue;
- }
+ gf_uuid_copy(entry_loc.pargfid, loc->gfid);
- uuid_str = NULL;
+ ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
- dict_del (dict, GF_XATTR_NODE_UUID_KEY);
+ if (!should_i_migrate) {
+ /* this node isn't supposed to migrate the file. suppressing any
+ * potential error from lookup as this file is under migration by
+ * another node */
+ if (ret) {
+ gf_msg_debug(this->name, -ret,
+ "Ignoring lookup failure: node isn't migrating %s",
+ entry_loc.path);
+ ret = 0;
+ }
+ gf_msg_debug(this->name, 0, "Don't migrate %s ", entry_loc.path);
+ goto out;
+ }
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_MIGRATE_FILE_FAILED,
+ "Migrate file failed: %s lookup failed", entry_loc.path);
+
+ /* Increase failure count only for remove-brick op, so that
+ * user is warned to check the removed-brick for any files left
+ * unmigrated
+ */
+ if (conf->decommission_subvols_cnt) {
+ LOCK(&defrag->lock);
+ {
+ defrag->total_failures += 1;
+ }
+ UNLOCK(&defrag->lock);
+ }
+ ret = 0;
+ goto out;
+ }
- /* if distribute is present, it will honor this key.
- * -1 is returned if distribute is not present or file
- * doesn't have a link-file. If file has link-file, the
- * path of link-file will be the value, and also that
- * guarantees that file has to be mostly migrated */
+ iatt_ptr = &iatt;
- ret = syncop_getxattr (this, &entry_loc, &dict,
- GF_XATTR_LINKINFO_KEY);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_TRACE, "failed to "
- "get link-to key for %s",
- entry_loc.path);
- continue;
- }
+ hashed_subvol = dht_subvol_get_hashed(this, &entry_loc);
+ if (!hashed_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_HASHED_SUBVOL_GET_FAILED,
+ "Failed to get hashed subvol for %s", entry_loc.path);
+ ret = 0;
+ goto out;
+ }
- ret = syncop_setxattr (this, &entry_loc, migrate_data,
- 0);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "migrate-data"
- " failed for %s", entry_loc.path);
- defrag->total_failures +=1;
- }
+ cached_subvol = dht_subvol_get_cached(this, entry_loc.inode);
+ if (!cached_subvol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CACHED_SUBVOL_GET_FAILED,
+ "Failed to get cached subvol for %s", entry_loc.path);
- if (ret == -1) {
- op_errno = errno;
- ret = gf_defrag_handle_migrate_error (op_errno,
- defrag);
-
- if (!ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "migrate-data on %s failed: %s",
- entry_loc.path,
- strerror (op_errno));
- else if (ret == 1)
- continue;
- else if (ret == -1)
- goto out;
- }
+ ret = 0;
+ goto out;
+ }
- LOCK (&defrag->lock);
+ if (hashed_subvol == cached_subvol) {
+ ret = 0;
+ goto out;
+ }
+
+ inode = inode_link(entry_loc.inode, entry_loc.parent, entry->d_name, &iatt);
+ inode_unref(entry_loc.inode);
+ /* use the inode returned by inode_link */
+ entry_loc.inode = inode;
+
+ old_THIS = THIS;
+ THIS = this;
+ statfs_frame = create_frame(this, this->ctx->pool);
+ if (!statfs_frame) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
+ "Insufficient memory. Frame creation failed");
+ ret = -1;
+ goto out;
+ }
+
+ /* async statfs information for honoring min-free-disk */
+ dht_get_du_info(statfs_frame, this, loc);
+ THIS = old_THIS;
+
+ tmp = dict_get(migrate_data, GF_XATTR_FILE_MIGRATE_KEY);
+ if (tmp) {
+ memcpy(value, tmp->data, tmp->len);
+ if (strcmp(value, "force") == 0)
+ rebal_type = GF_DHT_MIGRATE_DATA_EVEN_IF_LINK_EXISTS;
+
+ if (conf->decommission_in_progress)
+ rebal_type = GF_DHT_MIGRATE_HARDLINK;
+ }
+
+ ret = dht_migrate_file(this, &entry_loc, cached_subvol, hashed_subvol,
+ rebal_type, &fop_errno);
+ if (ret == 1) {
+ if (fop_errno == ENOSPC) {
+ gf_msg_debug(this->name, 0,
+ "migrate-data skipped for"
+ " %s due to space constraints",
+ entry_loc.path);
+
+ /* For remove-brick case if the source is not one of the
+ * removed-brick, do not mark the error as failure */
+ if (conf->decommission_subvols_cnt) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i] == cached_subvol) {
+ LOCK(&defrag->lock);
{
- defrag->total_files += 1;
- defrag->total_data += iatt.ia_size;
+ defrag->total_failures += 1;
+ update_skippedcount = _gf_false;
}
- UNLOCK (&defrag->lock);
- if (defrag->stats == _gf_true) {
- gettimeofday (&end, NULL);
- elapsed = (end.tv_sec - start.tv_sec) * 1e6 +
- (end.tv_usec - start.tv_usec);
- gf_log (this->name, GF_LOG_INFO, "Migration of "
- "file:%s size:%"PRIu64" bytes took %.2f"
- "secs", entry_loc.path, iatt.ia_size,
- elapsed/1e6);
- }
- }
-
- gf_dirent_free (&entries);
- free_entries = _gf_false;
- INIT_LIST_HEAD (&entries.list);
+ UNLOCK(&defrag->lock);
- if (readdir_operrno == ENOENT)
break;
+ }
+ }
+ }
+
+ if (update_skippedcount) {
+ LOCK(&defrag->lock);
+ {
+ defrag->skipped += 1;
+ }
+ UNLOCK(&defrag->lock);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_MIGRATE_FILE_SKIPPED,
+ "File migration skipped for %s.", entry_loc.path);
+ }
+
+ } else if (fop_errno == ENOTSUP) {
+ gf_msg_debug(this->name, 0,
+ "migrate-data skipped for"
+ " hardlink %s ",
+ entry_loc.path);
+ LOCK(&defrag->lock);
+ {
+ defrag->skipped += 1;
+ }
+ UNLOCK(&defrag->lock);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_MIGRATE_FILE_SKIPPED,
+ "File migration skipped for %s.", entry_loc.path);
}
- gettimeofday (&end, NULL);
- elapsed = (end.tv_sec - dir_start.tv_sec) * 1e6 +
- (end.tv_usec - dir_start.tv_usec);
- gf_log (this->name, GF_LOG_INFO, "Migration operation on dir %s took "
- "%.2f secs", loc->path, elapsed/1e6);
ret = 0;
-out:
- if (free_entries)
- gf_dirent_free (&entries);
+ goto out;
+ } else if (ret < 0) {
+ if (fop_errno != EEXIST) {
+ gf_msg(this->name, GF_LOG_ERROR, fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED, "migrate-data failed for %s",
+ entry_loc.path);
+
+ LOCK(&defrag->lock);
+ {
+ defrag->total_failures += 1;
+ }
+ UNLOCK(&defrag->lock);
+ }
- loc_wipe (&entry_loc);
+ ret = gf_defrag_handle_migrate_error(fop_errno, defrag);
- if (dict)
- dict_unref(dict);
+ if (!ret) {
+ gf_msg(this->name, GF_LOG_ERROR, fop_errno,
+ DHT_MSG_MIGRATE_FILE_FAILED,
+ "migrate-data on %s failed:", entry_loc.path);
+ } else if (ret == 1) {
+ ret = 0;
+ }
- if (fd)
- fd_unref (fd);
- return ret;
+ goto out;
+ }
+
+ LOCK(&defrag->lock);
+ {
+ defrag->total_files += 1;
+ defrag->total_data += iatt.ia_size;
+ }
+ UNLOCK(&defrag->lock);
+
+ if (defrag->stats == _gf_true) {
+ gettimeofday(&end, NULL);
+ elapsed = gf_tvdiff(&start, &end);
+ gf_log(this->name, GF_LOG_INFO,
+ "Migration of "
+ "file:%s size:%" PRIu64
+ " bytes took %.2f"
+ "secs and ret: %d",
+ entry_loc.name, iatt.ia_size, elapsed / 1e6, ret);
+ }
-}
+out:
+ if (statfs_frame) {
+ STACK_DESTROY(statfs_frame->root);
+ }
+
+ if (iatt_ptr) {
+ LOCK(&defrag->lock);
+ {
+ defrag->size_processed += iatt_ptr->ia_size;
+ }
+ UNLOCK(&defrag->lock);
+ }
+ loc_wipe(&entry_loc);
+ return ret;
+}
-int
-gf_defrag_fix_layout (xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
- dict_t *fix_layout, dict_t *migrate_data)
+void *
+gf_defrag_task(void *opaque)
{
- int ret = -1;
- loc_t entry_loc = {0,};
- fd_t *fd = NULL;
- gf_dirent_t entries;
- gf_dirent_t *tmp = NULL;
- gf_dirent_t *entry = NULL;
- gf_boolean_t free_entries = _gf_false;
- dict_t *dict = NULL;
- off_t offset = 0;
- struct iatt iatt = {0,};
-
- ret = syncop_lookup (this, loc, NULL, &iatt, NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Lookup failed on %s",
- loc->path);
- goto out;
+ struct list_head *q_head = NULL;
+ struct dht_container *iterator = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ int ret = 0;
+ pid_t pid = GF_CLIENT_PID_DEFRAG;
+
+ defrag = (gf_defrag_info_t *)opaque;
+ if (!defrag) {
+ gf_msg("dht", GF_LOG_ERROR, 0, 0, "defrag is NULL");
+ goto out;
+ }
+
+ syncopctx_setfspid(&pid);
+
+ q_head = &(defrag->queue[0].list);
+
+ /* The following while loop will dequeue one entry from the defrag->queue
+ under lock. We will update the defrag->global_error only when there
+ is an error which is critical to stop the rebalance process. The stop
+ message will be intimated to other migrator threads by setting the
+ defrag->defrag_status to GF_DEFRAG_STATUS_FAILED.
+
+ In defrag->queue, a low watermark (MIN_MIGRATE_QUEUE_COUNT) is
+ maintained so that crawler does not starve the file migration
+ workers and a high watermark (MAX_MIGRATE_QUEUE_COUNT) so that
+ crawler does not go far ahead in filling up the queue.
+ */
+
+ while (_gf_true) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ pthread_cond_broadcast(&defrag->rebalance_crawler_alarm);
+ pthread_cond_broadcast(&defrag->parallel_migration_cond);
+ goto out;
}
- if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
- ret = gf_defrag_migrate_data (this, defrag, loc, migrate_data);
- if (ret)
- goto out;
+ pthread_mutex_lock(&defrag->dfq_mutex);
+ {
+ /*Throttle down:
+ If the reconfigured count is less than current thread
+ count, then the current thread will sleep */
+
+ /*TODO: Need to refactor the following block to work
+ *under defrag->lock. For now access
+ * defrag->current_thread_count and rthcount under
+ * dfq_mutex lock */
+ while (!defrag->crawl_done && (defrag->recon_thread_count <
+ defrag->current_thread_count)) {
+ defrag->current_thread_count--;
+ gf_msg_debug("DHT", 0,
+ "Thread sleeping. "
+ "current thread count: %d",
+ defrag->current_thread_count);
+
+ pthread_cond_wait(&defrag->df_wakeup_thread,
+ &defrag->dfq_mutex);
+
+ defrag->current_thread_count++;
+ gf_msg_debug("DHT", 0,
+ "Thread wokeup. "
+ "current thread count: %d",
+ defrag->current_thread_count);
+ }
+
+ if (defrag->q_entry_count) {
+ iterator = list_entry(q_head->next, typeof(*iterator), list);
+
+ gf_msg_debug("DHT", 0,
+ "picking entry "
+ "%s",
+ iterator->df_entry->d_name);
+
+ list_del_init(&(iterator->list));
+
+ defrag->q_entry_count--;
+
+ if ((defrag->q_entry_count < MIN_MIGRATE_QUEUE_COUNT) &&
+ defrag->wakeup_crawler) {
+ pthread_cond_broadcast(&defrag->rebalance_crawler_alarm);
+ }
+ pthread_mutex_unlock(&defrag->dfq_mutex);
+ ret = gf_defrag_migrate_single_file((void *)iterator);
+
+ /*Critical errors: ENOTCONN and ENOSPACE*/
+ if (ret) {
+ dht_set_global_defrag_error(defrag, ret);
+
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+
+ pthread_cond_broadcast(&defrag->rebalance_crawler_alarm);
+
+ pthread_cond_broadcast(&defrag->parallel_migration_cond);
+
+ goto out;
+ }
+
+ gf_defrag_free_container(iterator);
+
+ continue;
+ } else {
+ /* defrag->crawl_done flag is set means crawling
+ file system is done and hence a list_empty when
+ the above flag is set indicates there are no more
+ entries to be added to the queue and rebalance is
+ finished */
+
+ if (!defrag->crawl_done) {
+ defrag->current_thread_count--;
+ gf_msg_debug("DHT", 0,
+ "Thread "
+ "sleeping while waiting "
+ "for migration entries. "
+ "current thread count:%d",
+ defrag->current_thread_count);
+
+ pthread_cond_wait(&defrag->parallel_migration_cond,
+ &defrag->dfq_mutex);
+ }
+
+ if (defrag->crawl_done && !defrag->q_entry_count) {
+ defrag->current_thread_count++;
+ gf_msg_debug("DHT", 0, "Exiting thread");
+
+ pthread_cond_broadcast(&defrag->parallel_migration_cond);
+ goto unlock;
+ } else {
+ defrag->current_thread_count++;
+ gf_msg_debug("DHT", 0,
+ "Thread woke up"
+ " as found migrating entries. "
+ "current thread count:%d",
+ defrag->current_thread_count);
+
+ pthread_mutex_unlock(&defrag->dfq_mutex);
+ continue;
+ }
+ }
}
+ unlock:
+ pthread_mutex_unlock(&defrag->dfq_mutex);
+ break;
+ }
+out:
+ return NULL;
+}
- gf_log (this->name, GF_LOG_TRACE, "fix layout called on %s", loc->path);
+int static gf_defrag_get_entry(xlator_t *this, int i,
+ struct dht_container **container, loc_t *loc,
+ dht_conf_t *conf, gf_defrag_info_t *defrag,
+ fd_t *fd, dict_t *migrate_data,
+ struct dir_dfmeta *dir_dfmeta, dict_t *xattr_req,
+ int *perrno)
+{
+ int ret = 0;
+ char is_linkfile = 0;
+ gf_dirent_t *df_entry = NULL;
+ struct dht_container *tmp_container = NULL;
+
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = -1;
+ goto out;
+ }
- fd = fd_create (loc->inode, defrag->pid);
+ if (dir_dfmeta->offset_var[i].readdir_done == 1) {
+ ret = 0;
+ goto out;
+ }
+
+ if (dir_dfmeta->fetch_entries[i] == 1) {
if (!fd) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to create fd");
- ret = -1;
- goto out;
+ dir_dfmeta->fetch_entries[i] = 0;
+ dir_dfmeta->offset_var[i].readdir_done = 1;
+ ret = 0;
+ goto out;
}
- ret = syncop_opendir (this, loc, fd);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s",
- loc->path);
- ret = -1;
- goto out;
+ ret = syncop_readdirp(conf->local_subvols[i], fd, 131072,
+ dir_dfmeta->offset_var[i].offset,
+ &(dir_dfmeta->equeue[i]), xattr_req, NULL);
+ if (ret == 0) {
+ dir_dfmeta->offset_var[i].readdir_done = 1;
+ ret = 0;
+ goto out;
}
- INIT_LIST_HEAD (&entries.list);
- while ((ret = syncop_readdirp (this, fd, 131072, offset, NULL,
- &entries)) != 0)
- {
- if ((ret < 0) || (ret && (errno == ENOENT)))
- break;
- free_entries = _gf_true;
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ DHT_MSG_MIGRATE_DATA_FAILED,
+ "Readdirp failed. Aborting data migration for "
+ "directory: %s",
+ loc->path);
+ *perrno = -ret;
+ ret = -1;
+ goto out;
+ }
- if (list_empty (&entries.list))
- break;
- list_for_each_entry_safe (entry, tmp, &entries.list, list) {
- if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
- ret = 1;
- goto out;
- }
+ if (list_empty(&(dir_dfmeta->equeue[i].list))) {
+ dir_dfmeta->offset_var[i].readdir_done = 1;
+ ret = 0;
+ goto out;
+ }
- offset = entry->d_off;
+ dir_dfmeta->fetch_entries[i] = 0;
+ }
- if (!strcmp (entry->d_name, ".") ||
- !strcmp (entry->d_name, ".."))
- continue;
+ while (1) {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = -1;
+ goto out;
+ }
- if (!IA_ISDIR (entry->d_stat.ia_type))
- continue;
+ df_entry = list_entry(dir_dfmeta->iterator[i]->next, typeof(*df_entry),
+ list);
- loc_wipe (&entry_loc);
- ret =dht_build_child_loc (this, &entry_loc, loc,
- entry->d_name);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Child loc"
- " build failed");
- goto out;
- }
+ if (&df_entry->list == dir_dfmeta->head[i]) {
+ gf_dirent_free(&(dir_dfmeta->equeue[i]));
+ INIT_LIST_HEAD(&(dir_dfmeta->equeue[i].list));
+ dir_dfmeta->fetch_entries[i] = 1;
+ dir_dfmeta->iterator[i] = dir_dfmeta->head[i];
+ ret = 0;
+ goto out;
+ }
- if (uuid_is_null (entry->d_stat.ia_gfid)) {
- gf_log (this->name, GF_LOG_ERROR, "%s/%s"
- "gfid not present", loc->path,
- entry->d_name);
- continue;
- }
+ dir_dfmeta->iterator[i] = dir_dfmeta->iterator[i]->next;
- entry_loc.inode->ia_type = entry->d_stat.ia_type;
+ dir_dfmeta->offset_var[i].offset = df_entry->d_off;
+ if (!strcmp(df_entry->d_name, ".") || !strcmp(df_entry->d_name, ".."))
+ continue;
- uuid_copy (entry_loc.gfid, entry->d_stat.ia_gfid);
- if (uuid_is_null (loc->gfid)) {
- gf_log (this->name, GF_LOG_ERROR, "%s/%s"
- "gfid not present", loc->path,
- entry->d_name);
- continue;
- }
+ if (IA_ISDIR(df_entry->d_stat.ia_type)) {
+ defrag->size_processed += df_entry->d_stat.ia_size;
+ continue;
+ }
- uuid_copy (entry_loc.pargfid, loc->gfid);
+ defrag->num_files_lookedup++;
- ret = syncop_lookup (this, &entry_loc, NULL, &iatt,
- NULL, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "%s"
- " lookup failed", entry_loc.path);
- continue;
- }
+ if (defrag->defrag_pattern &&
+ (gf_defrag_pattern_match(defrag, df_entry->d_name,
+ df_entry->d_stat.ia_size) == _gf_false)) {
+ defrag->size_processed += df_entry->d_stat.ia_size;
+ continue;
+ }
- ret = syncop_setxattr (this, &entry_loc, fix_layout,
- 0);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Setxattr "
- "failed for %s", entry_loc.path);
- defrag->defrag_status =
- GF_DEFRAG_STATUS_FAILED;
- goto out;
- }
- ret = gf_defrag_fix_layout (this, defrag, &entry_loc,
- fix_layout, migrate_data);
+ is_linkfile = check_is_linkfile(NULL, &df_entry->d_stat, df_entry->dict,
+ conf->link_xattr_name);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Fix layout "
- "failed for %s", entry_loc.path);
- goto out;
- }
+ if (is_linkfile) {
+ /* No need to add linkto file to the queue for
+ migration. Only the actual data file need to
+ be checked for migration criteria.
+ */
- }
- gf_dirent_free (&entries);
- free_entries = _gf_false;
- INIT_LIST_HEAD (&entries.list);
+ gf_msg_debug(this->name, 0,
+ "Skipping linkfile"
+ " %s on subvol: %s",
+ df_entry->d_name, conf->local_subvols[i]->name);
+ continue;
+ }
+
+ /*Build Container Structure */
+
+ tmp_container = GF_CALLOC(1, sizeof(struct dht_container),
+ gf_dht_mt_container_t);
+ if (!tmp_container) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to allocate "
+ "memory for container");
+ ret = -1;
+ goto out;
}
+ tmp_container->df_entry = gf_dirent_for_name(df_entry->d_name);
+ if (!tmp_container->df_entry) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to allocate "
+ "memory for df_entry");
+ ret = -1;
+ goto out;
+ }
+
+ tmp_container->local_subvol_index = i;
+
+ tmp_container->df_entry->d_stat = df_entry->d_stat;
+
+ tmp_container->df_entry->d_ino = df_entry->d_ino;
+
+ tmp_container->df_entry->d_type = df_entry->d_type;
+
+ tmp_container->df_entry->d_len = df_entry->d_len;
+
+ tmp_container->parent_loc = GF_CALLOC(1, sizeof(*loc), gf_dht_mt_loc_t);
+ if (!tmp_container->parent_loc) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to allocate "
+ "memory for loc");
+ ret = -1;
+ goto out;
+ }
+
+ ret = loc_copy(tmp_container->parent_loc, loc);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "loc_copy failed");
+ ret = -1;
+ goto out;
+ }
+
+ tmp_container->migrate_data = migrate_data;
+
+ tmp_container->this = this;
+
+ if (df_entry->dict)
+ tmp_container->df_entry->dict = dict_ref(df_entry->dict);
+
+ /*Build Container Structure >> END*/
ret = 0;
+ goto out;
+ }
+
out:
- if (free_entries)
- gf_dirent_free (&entries);
+ if (ret == 0) {
+ *container = tmp_container;
+ } else {
+ if (tmp_container) {
+ gf_defrag_free_container(tmp_container);
+ }
+ }
- loc_wipe (&entry_loc);
+ return ret;
+}
- if (dict)
- dict_unref(dict);
+int
+gf_defrag_process_dir(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *migrate_data, int *perrno)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ gf_dirent_t entries;
+ dict_t *xattr_req = NULL;
+ struct timeval dir_start = {
+ 0,
+ };
+ struct timeval end = {
+ 0,
+ };
+ double elapsed = {
+ 0,
+ };
+ int local_subvols_cnt = 0;
+ int i = 0;
+ int j = 0;
+ struct dht_container *container = NULL;
+ int ldfq_count = 0;
+ int dfc_index = 0;
+ int throttle_up = 0;
+ struct dir_dfmeta *dir_dfmeta = NULL;
+ xlator_t *old_THIS = NULL;
+
+ gf_log(this->name, GF_LOG_INFO, "migrate data called on %s", loc->path);
+ gettimeofday(&dir_start, NULL);
+
+ conf = this->private;
+ local_subvols_cnt = conf->local_subvols_cnt;
+
+ if (!local_subvols_cnt) {
+ ret = 0;
+ goto out;
+ }
- if (fd)
- fd_unref (fd);
+ old_THIS = THIS;
+ THIS = this;
- return ret;
+ dir_dfmeta = GF_CALLOC(1, sizeof(*dir_dfmeta), gf_common_mt_pointer);
+ if (!dir_dfmeta) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ dir_dfmeta->lfd = GF_CALLOC(local_subvols_cnt, sizeof(fd_t *),
+ gf_common_mt_pointer);
+ if (!dir_dfmeta->lfd) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY,
+ "for dir_dfmeta", NULL);
+ ret = -1;
+ *perrno = ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < local_subvols_cnt; i++) {
+ dir_dfmeta->lfd[i] = fd_create(loc->inode, defrag->pid);
+ if (!dir_dfmeta->lfd[i]) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_FD_CREATE_FAILED,
+ NULL);
+ *perrno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+ ret = syncop_opendir(conf->local_subvols[i], loc, dir_dfmeta->lfd[i],
+ NULL, NULL);
+ if (ret) {
+ fd_unref(dir_dfmeta->lfd[i]);
+ dir_dfmeta->lfd[i] = NULL;
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_FAILED_TO_OPEN,
+ "dir: %s", loc->path, "subvol: %s",
+ conf->local_subvols[i]->name, NULL);
+
+ if (conf->decommission_in_progress) {
+ *perrno = -ret;
+ ret = -1;
+ goto out;
+ }
+ } else {
+ fd_bind(dir_dfmeta->lfd[i]);
+ }
+ }
+
+ dir_dfmeta->head = GF_CALLOC(local_subvols_cnt, sizeof(*(dir_dfmeta->head)),
+ gf_common_mt_pointer);
+ if (!dir_dfmeta->head) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->head is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ dir_dfmeta->iterator = GF_CALLOC(local_subvols_cnt,
+ sizeof(*(dir_dfmeta->iterator)),
+ gf_common_mt_pointer);
+ if (!dir_dfmeta->iterator) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->iterator is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ dir_dfmeta->equeue = GF_CALLOC(local_subvols_cnt, sizeof(entries),
+ gf_dht_mt_dirent_t);
+ if (!dir_dfmeta->equeue) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->equeue is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ dir_dfmeta->offset_var = GF_CALLOC(
+ local_subvols_cnt, sizeof(dht_dfoffset_ctx_t), gf_dht_mt_octx_t);
+ if (!dir_dfmeta->offset_var) {
+ gf_log(this->name, GF_LOG_ERROR, "dir_dfmeta->offset_var is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_defrag_ctx_subvols_init(dir_dfmeta->offset_var, this);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "dht_dfoffset_ctx_t"
+ "initialization failed");
+ ret = -1;
+ goto out;
+ }
+
+ dir_dfmeta->fetch_entries = GF_CALLOC(local_subvols_cnt, sizeof(int),
+ gf_common_mt_int);
+ if (!dir_dfmeta->fetch_entries) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_INSUFF_MEMORY,
+ "for dir_dfmeta->fetch_entries", NULL);
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; i < local_subvols_cnt; i++) {
+ INIT_LIST_HEAD(&(dir_dfmeta->equeue[i].list));
+ dir_dfmeta->head[i] = &(dir_dfmeta->equeue[i].list);
+ dir_dfmeta->iterator[i] = dir_dfmeta->head[i];
+ dir_dfmeta->fetch_entries[i] = 1;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ gf_log(this->name, GF_LOG_ERROR, "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set dict for "
+ "key: %s",
+ conf->link_xattr_name);
+ ret = -1;
+ goto out;
+ }
+
+ /*
+ Job: Read entries from each local subvol and store the entries
+ in equeue array of linked list. Now pick one entry from the
+ equeue array in a round robin basis and add them to defrag Queue.
+ */
+
+ while (!dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) {
+ pthread_mutex_lock(&defrag->dfq_mutex);
+ {
+ /*Throttle up: If reconfigured count is higher than
+ current thread count, wake up the sleeping threads
+ TODO: Need to refactor this. Instead of making the
+ thread sleep and wake, we should terminate and spawn
+ threads on-demand*/
+
+ if (defrag->recon_thread_count > defrag->current_thread_count) {
+ throttle_up = (defrag->recon_thread_count -
+ defrag->current_thread_count);
+ for (j = 0; j < throttle_up; j++) {
+ pthread_cond_signal(&defrag->df_wakeup_thread);
+ }
+ }
+
+ while (defrag->q_entry_count > MAX_MIGRATE_QUEUE_COUNT) {
+ defrag->wakeup_crawler = 1;
+ pthread_cond_wait(&defrag->rebalance_crawler_alarm,
+ &defrag->dfq_mutex);
+ }
+
+ ldfq_count = defrag->q_entry_count;
+
+ if (defrag->wakeup_crawler) {
+ defrag->wakeup_crawler = 0;
+ }
+ }
+ pthread_mutex_unlock(&defrag->dfq_mutex);
+
+ while (
+ ldfq_count <= MAX_MIGRATE_QUEUE_COUNT &&
+ !dht_dfreaddirp_done(dir_dfmeta->offset_var, local_subvols_cnt)) {
+ ret = gf_defrag_get_entry(this, dfc_index, &container, loc, conf,
+ defrag, dir_dfmeta->lfd[dfc_index],
+ migrate_data, dir_dfmeta, xattr_req,
+ perrno);
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED) {
+ goto out;
+ }
+
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Found "
+ "error from gf_defrag_get_entry");
+
+ ret = -1;
+ goto out;
+ }
+
+ /* Check if we got an entry, else we need to move the
+ index to the next subvol */
+ if (!container) {
+ GF_CRAWL_INDEX_MOVE(dfc_index, local_subvols_cnt);
+ continue;
+ }
+
+ /* Q this entry in the dfq */
+ pthread_mutex_lock(&defrag->dfq_mutex);
+ {
+ list_add_tail(&container->list, &(defrag->queue[0].list));
+ defrag->q_entry_count++;
+ ldfq_count = defrag->q_entry_count;
+
+ gf_msg_debug(this->name, 0,
+ "added "
+ "file:%s parent:%s to the queue ",
+ container->df_entry->d_name,
+ container->parent_loc->path);
+
+ pthread_cond_signal(&defrag->parallel_migration_cond);
+ }
+ pthread_mutex_unlock(&defrag->dfq_mutex);
+
+ GF_CRAWL_INDEX_MOVE(dfc_index, local_subvols_cnt);
+ }
+ }
+
+ gettimeofday(&end, NULL);
+ elapsed = gf_tvdiff(&dir_start, &end);
+ gf_log(this->name, GF_LOG_INFO,
+ "Migration operation on dir %s took "
+ "%.2f secs",
+ loc->path, elapsed / 1e6);
+ ret = 0;
+out:
+ THIS = old_THIS;
+ gf_defrag_free_dir_dfmeta(dir_dfmeta, local_subvols_cnt);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ /* It does not matter if it errored out - this number is
+ * used to calculate rebalance estimated time to complete.
+ * No locking required as dirs are processed by a single thread.
+ */
+ defrag->num_dirs_processed++;
+ return ret;
}
+int
+gf_defrag_settle_hash(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *fix_layout)
+{
+ int ret;
+ dht_conf_t *conf = NULL;
+ /*
+ * Now we're ready to update the directory commit hash for the volume
+ * root, so that hash miscompares and broadcast lookups can stop.
+ * However, we want to skip that if fix-layout is all we did. In
+ * that case, we want the miscompares etc. to continue until a real
+ * rebalance is complete.
+ */
+ if (defrag->cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX ||
+ defrag->cmd == GF_DEFRAG_CMD_DETACH_START) {
+ return 0;
+ }
+
+ conf = this->private;
+ if (!conf) {
+ /*Uh oh
+ */
+ return -1;
+ }
+
+ if (conf->local_subvols_cnt == 0 || !conf->lookup_optimize) {
+ /* Commit hash updates are only done on local subvolumes and
+ * only when lookup optimization is needed (for older client
+ * support)
+ */
+ return 0;
+ }
+
+ ret = dict_set_uint32(fix_layout, "new-commit-hash",
+ defrag->new_commit_hash);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Failed to set new-commit-hash");
+ return -1;
+ }
+
+ ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
+ "fix layout on %s failed", loc->path);
+
+ if (-ret == ENOENT || -ret == ESTALE) {
+ /* Dir most likely is deleted */
+ return 0;
+ }
+
+ return -1;
+ }
+
+ /* TBD: find more efficient solution than adding/deleting every time */
+ dict_del(fix_layout, "new-commit-hash");
+
+ return 0;
+}
int
-gf_defrag_start_crawl (void *data)
+gf_defrag_fix_layout(xlator_t *this, gf_defrag_info_t *defrag, loc_t *loc,
+ dict_t *fix_layout, dict_t *migrate_data)
{
- xlator_t *this = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- int ret = -1;
- loc_t loc = {0,};
- struct iatt iatt = {0,};
- struct iatt parent = {0,};
- dict_t *fix_layout = NULL;
- dict_t *migrate_data = NULL;
- dict_t *status = NULL;
-
- this = data;
- if (!this)
- goto out;
+ int ret = -1;
+ loc_t entry_loc = {
+ 0,
+ };
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *tmp = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t free_entries = _gf_false;
+ off_t offset = 0;
+ struct iatt iatt = {
+ 0,
+ };
+ inode_t *linked_inode = NULL, *inode = NULL;
+ dht_conf_t *conf = NULL;
+ int perrno = 0;
+
+ conf = this->private;
+ if (!conf) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_lookup(this, loc, &iatt, NULL, NULL, NULL);
+ if (ret) {
+ if (strcmp(loc->path, "/") == 0) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_LOOKUP_FAILED,
+ "lookup failed for:%s", loc->path);
+
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
- conf = this->private;
- if (!conf)
- goto out;
+ if (-ret == ENOENT || -ret == ESTALE) {
+ gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_DIR_LOOKUP_FAILED,
+ "Dir:%s renamed or removed. Skipping", loc->path);
+ if (conf->decommission_subvols_cnt) {
+ defrag->total_failures++;
+ }
+ ret = 0;
+ goto out;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_DIR_LOOKUP_FAILED,
+ "lookup failed for:%s", loc->path);
+
+ defrag->total_failures++;
+ goto out;
+ }
+ }
- defrag = conf->defrag;
- if (!defrag)
+ fd = fd_create(loc->inode, defrag->pid);
+ if (!fd) {
+ gf_log(this->name, GF_LOG_ERROR, "Failed to create fd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir(this, loc, fd, NULL, NULL);
+ if (ret) {
+ if (-ret == ENOENT || -ret == ESTALE) {
+ if (conf->decommission_subvols_cnt) {
+ defrag->total_failures++;
+ }
+ ret = 0;
+ goto out;
+ }
+
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to open dir %s, "
+ "err:%d",
+ loc->path, -ret);
+
+ ret = -1;
+ goto out;
+ }
+
+ fd_bind(fd);
+ INIT_LIST_HEAD(&entries.list);
+
+ while ((ret = syncop_readdirp(this, fd, 131072, offset, &entries, NULL,
+ NULL)) != 0) {
+ if (ret < 0) {
+ if (-ret == ENOENT || -ret == ESTALE) {
+ if (conf->decommission_subvols_cnt) {
+ defrag->total_failures++;
+ }
+ ret = 0;
goto out;
+ }
+
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_READDIR_ERROR,
+ "readdirp failed for "
+ "path %s. Aborting fix-layout",
+ loc->path);
+
+ ret = -1;
+ goto out;
+ }
- gettimeofday (&defrag->start_time, NULL);
- dht_build_root_inode (this, &defrag->root_inode);
- if (!defrag->root_inode)
+ if (list_empty(&entries.list))
+ break;
+
+ free_entries = _gf_true;
+
+ list_for_each_entry_safe(entry, tmp, &entries.list, list)
+ {
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED) {
+ ret = 1;
goto out;
+ }
- dht_build_root_loc (defrag->root_inode, &loc);
+ offset = entry->d_off;
- /* fix-layout on '/' first */
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ continue;
+ if (!IA_ISDIR(entry->d_stat.ia_type)) {
+ continue;
+ }
+ loc_wipe(&entry_loc);
- ret = syncop_lookup (this, &loc, NULL, &iatt, NULL, &parent);
+ ret = dht_build_child_loc(this, &entry_loc, loc, entry->d_name);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Child loc"
+ " build failed for entry: %s",
+ entry->d_name);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "look up on / failed");
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+
+ goto out;
+ } else {
+ continue;
+ }
+ }
+
+ if (gf_uuid_is_null(entry->d_stat.ia_gfid)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s/%s"
+ " gfid not present",
+ loc->path, entry->d_name);
+ continue;
+ }
+
+ gf_uuid_copy(entry_loc.gfid, entry->d_stat.ia_gfid);
+
+ /*In case the gfid stored in the inode by inode_link
+ * and the gfid obtained in the lookup differs, then
+ * client3_3_lookup_cbk will return ESTALE and proper
+ * error will be captured
+ */
+
+ linked_inode = inode_link(entry_loc.inode, loc->inode,
+ entry->d_name, &entry->d_stat);
+
+ inode = entry_loc.inode;
+ entry_loc.inode = linked_inode;
+ inode_unref(inode);
+
+ if (gf_uuid_is_null(loc->gfid)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s/%s"
+ " gfid not present",
+ loc->path, entry->d_name);
+ continue;
+ }
+
+ gf_uuid_copy(entry_loc.pargfid, loc->gfid);
+
+ ret = syncop_lookup(this, &entry_loc, &iatt, NULL, NULL, NULL);
+ if (ret) {
+ if (-ret == ENOENT || -ret == ESTALE) {
+ gf_msg(this->name, GF_LOG_INFO, -ret,
+ DHT_MSG_DIR_LOOKUP_FAILED,
+ "Dir:%s renamed or removed. "
+ "Skipping",
+ loc->path);
+ ret = 0;
+ if (conf->decommission_subvols_cnt) {
+ defrag->total_failures++;
+ }
+ continue;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_LOOKUP_FAILED, "lookup failed for:%s",
+ entry_loc.path);
+
+ defrag->total_failures++;
+
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ ret = -1;
+ goto out;
+ } else {
+ continue;
+ }
+ }
+ }
+
+ /* A return value of 2 means, either process_dir or
+ * lookup of a dir failed. Hence, don't commit hash
+ * for the current directory*/
+
+ ret = gf_defrag_fix_layout(this, defrag, &entry_loc, fix_layout,
+ migrate_data);
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STOPPED ||
+ defrag->defrag_status == GF_DEFRAG_STATUS_FAILED) {
goto out;
+ }
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_LAYOUT_FIX_FAILED,
+ "Fix layout failed for %s", entry_loc.path);
+
+ defrag->total_failures++;
+
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+
+ goto out;
+ } else {
+ /* Let's not commit-hash if
+ * gf_defrag_fix_layout failed*/
+ continue;
+ }
+ }
}
- fix_layout = dict_new ();
- if (!fix_layout) {
+ gf_dirent_free(&entries);
+ free_entries = _gf_false;
+ INIT_LIST_HEAD(&entries.list);
+ }
+
+ /* A directory layout is fixed only after its subdirs are healed to
+ * any newly added bricks. If the layout is fixed before subdirs are
+ * healed, the newly added brick will get a non-null layout.
+ * Any subdirs which hash to that layout will no longer show up
+ * in a directory listing until they are healed.
+ */
+
+ ret = syncop_setxattr(this, loc, fix_layout, 0, NULL, NULL);
+
+ /* In case of a race where the directory is deleted just before
+ * layout setxattr, the errors are updated in the layout structure.
+ * We can use this information to make a decision whether the directory
+ * is deleted entirely.
+ */
+ if (ret == 0) {
+ ret = dht_dir_layout_error_check(this, loc->inode);
+ ret = -ret;
+ }
+
+ if (ret) {
+ if (-ret == ENOENT || -ret == ESTALE) {
+ gf_msg(this->name, GF_LOG_INFO, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
+ "Setxattr failed. Dir %s "
+ "renamed or removed",
+ loc->path);
+ if (conf->decommission_subvols_cnt) {
+ defrag->total_failures++;
+ }
+ ret = 0;
+ goto out;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LAYOUT_FIX_FAILED,
+ "Setxattr failed for %s", loc->path);
+
+ defrag->total_failures++;
+
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
ret = -1;
goto out;
+ }
}
+ }
- ret = dict_set_str (fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes");
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to set dict str");
- goto out;
- }
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ ret = gf_defrag_process_dir(this, defrag, loc, migrate_data, &perrno);
- ret = syncop_setxattr (this, &loc, fix_layout, 0);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "fix layout on %s failed",
- loc.path);
+ if (perrno == ENOENT || perrno == ESTALE) {
+ ret = 0;
goto out;
- }
+ } else {
+ defrag->total_failures++;
- if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
- migrate_data = dict_new ();
- if (!migrate_data) {
- ret = -1;
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_DEFRAG_PROCESS_DIR_FAILED,
+ "gf_defrag_process_dir failed for "
+ "directory: %s",
+ loc->path);
+
+ if (conf->decommission_in_progress) {
+ goto out;
}
- if (defrag->cmd == GF_DEFRAG_CMD_START_FORCE)
- ret = dict_set_str (migrate_data,
- "distribute.migrate-data", "force");
- else
- ret = dict_set_str (migrate_data,
- "distribute.migrate-data",
- "non-force");
- if (ret)
- goto out;
+ }
}
- ret = gf_defrag_fix_layout (this, defrag, &loc, fix_layout,
- migrate_data);
- if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) &&
- (defrag->defrag_status != GF_DEFRAG_STATUS_FAILED)) {
- defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+ }
+
+ gf_msg_trace(this->name, 0, "fix layout called on %s", loc->path);
+
+ if (gf_defrag_settle_hash(this, defrag, loc, fix_layout) != 0) {
+ defrag->total_failures++;
+
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SETTLE_HASH_FAILED,
+ "Settle hash failed for %s", loc->path);
+
+ ret = -1;
+
+ if (conf->decommission_in_progress) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ goto out;
}
+ }
+
+ ret = 0;
+out:
+ if (free_entries)
+ gf_dirent_free(&entries);
+ loc_wipe(&entry_loc);
+ if (fd)
+ fd_unref(fd);
+
+ return ret;
+}
+
+int
+dht_init_local_subvols_and_nodeuuids(xlator_t *this, dht_conf_t *conf,
+ loc_t *loc)
+{
+ dict_t *dict = NULL;
+ uuid_t *uuid_ptr = NULL;
+ int ret = -1;
+ int i = 0;
+ int j = 0;
+
+ /* Find local subvolumes */
+ ret = syncop_getxattr(this, loc, &dict, GF_REBAL_FIND_LOCAL_SUBVOL, NULL,
+ NULL);
+ if (ret && (ret != -ENODATA)) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
+ "local "
+ "subvolume determination failed with error: %d",
+ -ret);
+ ret = -1;
+ goto out;
+ }
+
+ if (!ret)
+ goto out;
+
+ ret = syncop_getxattr(this, loc, &dict, GF_REBAL_OLD_FIND_LOCAL_SUBVOL,
+ NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
+ "local "
+ "subvolume determination failed with error: %d",
+ -ret);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
out:
- LOCK (&defrag->lock);
- {
- status = dict_new ();
- gf_defrag_status_get (defrag, status);
- glusterfs_rebalance_event_notify (status);
- if (status)
- dict_unref (status);
- defrag->is_exiting = 1;
+ if (ret) {
+ return ret;
+ }
+
+ for (i = 0; i < conf->local_subvols_cnt; i++) {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "local subvol: "
+ "%s",
+ conf->local_subvols[i]->name);
+
+ for (j = 0; j < conf->local_nodeuuids[i].count; j++) {
+ uuid_ptr = &(conf->local_nodeuuids[i].elements[j].uuid);
+ gf_msg(this->name, GF_LOG_INFO, 0, 0, "node uuid : %s",
+ uuid_utoa(*uuid_ptr));
}
- UNLOCK (&defrag->lock);
+ }
+
+ return ret;
+}
+
+/* Functions for the rebalance estimates feature */
+
+uint64_t
+gf_defrag_subvol_file_size(xlator_t *this, loc_t *root_loc)
+{
+ int ret = -1;
+ struct statvfs buf = {
+ 0,
+ };
+
+ ret = syncop_statfs(this, root_loc, &buf, NULL, NULL);
+ if (ret) {
+ /* Aargh! */
+ return 0;
+ }
+ return ((buf.f_blocks - buf.f_bfree) * buf.f_frsize);
+}
+
+uint64_t
+gf_defrag_total_file_size(xlator_t *this, loc_t *root_loc)
+{
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ uint64_t size_files = 0;
+ uint64_t total_size = 0;
- if (defrag) {
- GF_FREE (defrag);
- conf->defrag = NULL;
+ conf = this->private;
+ if (!conf) {
+ return 0;
+ }
+
+ for (i = 0; i < conf->local_subvols_cnt; i++) {
+ size_files = gf_defrag_subvol_file_size(conf->local_subvols[i],
+ root_loc);
+ total_size += size_files;
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "local subvol: %s,"
+ "cnt = %" PRIu64,
+ conf->local_subvols[i]->name, size_files);
+ }
+
+ gf_msg(this->name, GF_LOG_INFO, 0, 0, "Total size files = %" PRIu64,
+ total_size);
+
+ return total_size;
+}
+
+static void *
+dht_file_counter_thread(void *args)
+{
+ gf_defrag_info_t *defrag = NULL;
+ loc_t root_loc = {
+ 0,
+ };
+ struct timespec time_to_wait = {
+ 0,
+ };
+ uint64_t tmp_size = 0;
+
+ if (!args)
+ return NULL;
+
+ defrag = (gf_defrag_info_t *)args;
+ dht_build_root_loc(defrag->root_inode, &root_loc);
+
+ while (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
+ timespec_now(&time_to_wait);
+ time_to_wait.tv_sec += 600;
+
+ pthread_mutex_lock(&defrag->fc_mutex);
+ pthread_cond_timedwait(&defrag->fc_wakeup_cond, &defrag->fc_mutex,
+ &time_to_wait);
+
+ pthread_mutex_unlock(&defrag->fc_mutex);
+
+ if (defrag->defrag_status != GF_DEFRAG_STATUS_STARTED)
+ break;
+
+ tmp_size = gf_defrag_total_file_size(defrag->this, &root_loc);
+
+ gf_log("dht", GF_LOG_INFO, "tmp data size =%" PRIu64, tmp_size);
+
+ if (!tmp_size) {
+ gf_msg("dht", GF_LOG_ERROR, 0, 0,
+ "Failed to get "
+ "the total data size. Unable to estimate "
+ "time to complete rebalance.");
+ } else {
+ g_totalsize = tmp_size;
+ gf_msg_debug("dht", 0, "total data size =%" PRIu64, g_totalsize);
}
+ }
- return ret;
+ return NULL;
}
+int
+gf_defrag_estimates_cleanup(xlator_t *this, gf_defrag_info_t *defrag,
+ pthread_t filecnt_thread)
+{
+ int ret = -1;
+
+ /* Wake up the filecounter thread.
+ * By now the defrag status will no longer be
+ * GF_DEFRAG_STATUS_STARTED so the thread will exit the loop.
+ */
+ pthread_mutex_lock(&defrag->fc_mutex);
+ {
+ pthread_cond_broadcast(&defrag->fc_wakeup_cond);
+ }
+ pthread_mutex_unlock(&defrag->fc_mutex);
+
+ ret = pthread_join(filecnt_thread, NULL);
+ if (ret) {
+ gf_msg("dht", GF_LOG_ERROR, ret, 0,
+ "file_counter_thread: pthread_join failed.");
+ ret = -1;
+ }
+ return ret;
+}
-static int
-gf_defrag_done (int ret, call_frame_t *sync_frame, void *data)
+int
+gf_defrag_estimates_init(xlator_t *this, loc_t *loc, pthread_t *filecnt_thread)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+
+ conf = this->private;
+ defrag = conf->defrag;
+
+ g_totalsize = gf_defrag_total_file_size(this, loc);
+ if (!g_totalsize) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "Failed to get "
+ "the total data size. Unable to estimate "
+ "time to complete rebalance.");
+ goto out;
+ }
+
+ ret = gf_thread_create(filecnt_thread, NULL, dht_file_counter_thread,
+ (void *)defrag, "dhtfcnt");
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ret, 0,
+ "Failed to "
+ "create the file counter thread ");
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Init and cleanup functions for parallel file migration*/
+int
+gf_defrag_parallel_migration_init(xlator_t *this, gf_defrag_info_t *defrag,
+ pthread_t **tid_array, int *thread_index)
{
- gf_listener_stop();
+ int ret = -1;
+ int thread_spawn_count = 0;
+ int index = 0;
+ pthread_t *tid = NULL;
- STACK_DESTROY (sync_frame->root);
- kill (getpid(), SIGTERM);
- return 0;
+ if (!defrag)
+ goto out;
+
+ /* Initialize global entry queue */
+ defrag->queue = GF_CALLOC(1, sizeof(struct dht_container),
+ gf_dht_mt_container_t);
+
+ if (!defrag->queue) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "Failed to initialise migration queue");
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&(defrag->queue[0].list));
+
+ thread_spawn_count = MAX(MAX_REBAL_THREADS, 4);
+
+ gf_msg_debug(this->name, 0, "thread_spawn_count: %d", thread_spawn_count);
+
+ tid = GF_CALLOC(thread_spawn_count, sizeof(pthread_t),
+ gf_common_mt_pthread_t);
+ if (!tid) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "Failed to create migration threads");
+ ret = -1;
+ goto out;
+ }
+ defrag->current_thread_count = thread_spawn_count;
+
+ /*Spawn Threads Here*/
+ while (index < thread_spawn_count) {
+ ret = gf_thread_create(&(tid[index]), NULL, gf_defrag_task,
+ (void *)defrag, "dhtmig%d", (index + 1) & 0x3ff);
+ if (ret != 0) {
+ gf_msg("DHT", GF_LOG_ERROR, ret, 0, "Thread[%d] creation failed. ",
+ index);
+ ret = -1;
+ goto out;
+ } else {
+ gf_log("DHT", GF_LOG_INFO,
+ "Thread[%d] "
+ "creation successful",
+ index);
+ }
+ index++;
+ }
+
+ ret = 0;
+out:
+ *thread_index = index;
+ *tid_array = tid;
+
+ return ret;
}
-void *
-gf_defrag_start (void *data)
+int
+gf_defrag_parallel_migration_cleanup(gf_defrag_info_t *defrag,
+ pthread_t *tid_array, int thread_index)
{
- int ret = -1;
- call_frame_t *frame = NULL;
- dht_conf_t *conf = NULL;
- gf_defrag_info_t *defrag = NULL;
- xlator_t *this = NULL;
-
- this = data;
- conf = this->private;
- if (!conf)
- goto out;
+ int ret = -1;
+ int i = 0;
- defrag = conf->defrag;
- if (!defrag)
- goto out;
+ if (!defrag)
+ goto out;
- frame = create_frame (this, this->ctx->pool);
- if (!frame)
- goto out;
+ /* Wake up all migration threads */
+ pthread_mutex_lock(&defrag->dfq_mutex);
+ {
+ defrag->crawl_done = 1;
- frame->root->pid = GF_CLIENT_PID_DEFRAG;
+ pthread_cond_broadcast(&defrag->parallel_migration_cond);
+ pthread_cond_broadcast(&defrag->df_wakeup_thread);
+ }
+ pthread_mutex_unlock(&defrag->dfq_mutex);
- defrag->pid = frame->root->pid;
+ /*Wait for all the threads to complete their task*/
+ for (i = 0; i < thread_index; i++) {
+ pthread_join(tid_array[i], NULL);
+ }
- defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
+ GF_FREE(tid_array);
- ret = synctask_new (this->ctx->env, gf_defrag_start_crawl,
- gf_defrag_done, frame, this);
+ /* Cleanup the migration queue */
+ if (defrag->queue) {
+ gf_dirent_free(defrag->queue[0].df_entry);
+ INIT_LIST_HEAD(&(defrag->queue[0].list));
+ }
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Could not create"
- " task for rebalance");
+ GF_FREE(defrag->queue);
+
+ ret = 0;
out:
- return NULL;
+ return ret;
}
int
-gf_defrag_status_get (gf_defrag_info_t *defrag, dict_t *dict)
+gf_defrag_start_crawl(void *data)
{
- int ret = 0;
- uint64_t files = 0;
- uint64_t size = 0;
- uint64_t lookup = 0;
- uint64_t failures = 0;
- char *status = "";
- double elapsed = 0;
- struct timeval end = {0,};
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ dict_t *fix_layout = NULL;
+ dict_t *migrate_data = NULL;
+ dict_t *status = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ call_frame_t *statfs_frame = NULL;
+ xlator_t *old_THIS = NULL;
+ int ret = -1;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ struct iatt parent = {
+ 0,
+ };
+ int thread_index = 0;
+ pthread_t *tid = NULL;
+ pthread_t filecnt_thread;
+ gf_boolean_t fc_thread_started = _gf_false;
+
+ this = data;
+ if (!this)
+ goto exit;
+
+ ctx = this->ctx;
+ if (!ctx)
+ goto exit;
+
+ conf = this->private;
+ if (!conf)
+ goto exit;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto exit;
+
+ defrag->start_time = gf_time();
+
+ dht_build_root_inode(this, &defrag->root_inode);
+ if (!defrag->root_inode)
+ goto out;
+
+ dht_build_root_loc(defrag->root_inode, &loc);
+
+ /* fix-layout on '/' first */
+
+ ret = syncop_lookup(this, &loc, &iatt, &parent, NULL, NULL);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_START_FAILED,
+ "Failed to start rebalance: look up on / failed");
+ ret = -1;
+ goto out;
+ }
+ old_THIS = THIS;
+ THIS = this;
- if (!defrag)
- goto out;
+ statfs_frame = create_frame(this, this->ctx->pool);
+ if (!statfs_frame) {
+ gf_msg(this->name, GF_LOG_ERROR, DHT_MSG_NO_MEMORY, ENOMEM,
+ "Insufficient memory. Frame creation failed");
+ ret = -1;
+ goto out;
+ }
- ret = 0;
- if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED)
- goto out;
+ /* async statfs update for honoring min-free-disk */
+ dht_get_du_info(statfs_frame, this, &loc);
+ THIS = old_THIS;
- files = defrag->total_files;
- size = defrag->total_data;
- lookup = defrag->num_files_lookedup;
- failures = defrag->total_failures;
+ fix_layout = dict_new();
+ if (!fix_layout) {
+ ret = -1;
+ goto out;
+ }
+
+ /*
+ * Unfortunately, we can't do special xattrs (like fix.layout) and
+ * real ones in the same call currently, and changing it seems
+ * riskier than just doing two calls.
+ */
+
+ gf_log(this->name, GF_LOG_INFO, "%s using commit hash %u", __func__,
+ conf->vol_commit_hash);
+
+ ret = dict_set_uint32(fix_layout, conf->commithash_xattr_name,
+ conf->vol_commit_hash);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Failed to set %s",
+ conf->commithash_xattr_name);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_setxattr(this, &loc, fix_layout, 0, NULL, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to set commit hash on %s. "
+ "Rebalance cannot proceed.",
+ loc.path);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ /* We now return to our regularly scheduled program. */
+
+ ret = dict_set_str(fix_layout, GF_XATTR_FIX_LAYOUT_KEY, "yes");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_START_FAILED,
+ "Failed to start rebalance:"
+ "Failed to set dictionary value: key = %s",
+ GF_XATTR_FIX_LAYOUT_KEY);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
- gettimeofday (&end, NULL);
+ defrag->new_commit_hash = conf->vol_commit_hash;
- elapsed = end.tv_sec - defrag->start_time.tv_sec;
+ ret = syncop_setxattr(this, &loc, fix_layout, 0, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_REBALANCE_FAILED,
+ "fix layout on %s failed", loc.path);
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
- if (!dict)
- goto log;
+ if (defrag->cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ /* We need to migrate files */
- ret = dict_set_uint64 (dict, "files", files);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set file count");
+ migrate_data = dict_new();
+ if (!migrate_data) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_str(
+ migrate_data, GF_XATTR_FILE_MIGRATE_KEY,
+ (defrag->cmd == GF_DEFRAG_CMD_START_FORCE) ? "force" : "non-force");
+ if (ret) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
- ret = dict_set_uint64 (dict, "size", size);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set size of xfer");
+ ret = dht_init_local_subvols_and_nodeuuids(this, conf, &loc);
+ if (ret) {
+ ret = -1;
+ goto out;
+ }
- ret = dict_set_uint64 (dict, "lookups", lookup);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set lookedup file count");
+ /* Initialise the structures required for parallel migration */
+ ret = gf_defrag_parallel_migration_init(this, defrag, &tid,
+ &thread_index);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "Aborting rebalance.");
+ goto out;
+ }
- ret = dict_set_int32 (dict, "status", defrag->defrag_status);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set status");
- if (elapsed) {
- ret = dict_set_double (dict, "run-time", elapsed);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set run-time");
+ ret = gf_defrag_estimates_init(this, &loc, &filecnt_thread);
+ if (ret) {
+ /* Not a fatal error. Allow the rebalance to proceed*/
+ ret = 0;
+ } else {
+ fc_thread_started = _gf_true;
}
+ }
+
+ ret = gf_defrag_fix_layout(this, defrag, &loc, fix_layout, migrate_data);
+ if (ret) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ if (gf_defrag_settle_hash(this, defrag, &loc, fix_layout) != 0) {
+ defrag->total_failures++;
+ ret = -1;
+ goto out;
+ }
+
+ gf_log("DHT", GF_LOG_INFO, "crawling file-system completed");
+out:
+
+ /* We are here means crawling the entire file system is done
+ or something failed. Set defrag->crawl_done flag to intimate
+ the migrator threads to exhaust the defrag->queue and terminate*/
+
+ if (ret) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_FAILED;
+ }
+
+ gf_defrag_parallel_migration_cleanup(defrag, tid, thread_index);
+
+ if ((defrag->defrag_status != GF_DEFRAG_STATUS_STOPPED) &&
+ (defrag->defrag_status != GF_DEFRAG_STATUS_FAILED)) {
+ defrag->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+ }
+
+ if (fc_thread_started) {
+ gf_defrag_estimates_cleanup(this, defrag, filecnt_thread);
+ }
+
+ dht_send_rebalance_event(this, defrag->cmd, defrag->defrag_status);
+
+ status = dict_new();
+ LOCK(&defrag->lock);
+ {
+ gf_defrag_status_get(conf, status);
+ if (ctx && ctx->notify)
+ ctx->notify(GF_EN_DEFRAG_STATUS, status);
+ if (status)
+ dict_unref(status);
+ defrag->is_exiting = 1;
+ }
+ UNLOCK(&defrag->lock);
+
+ GF_FREE(defrag);
+ conf->defrag = NULL;
+
+ if (migrate_data)
+ dict_unref(migrate_data);
+
+ if (statfs_frame) {
+ STACK_DESTROY(statfs_frame->root);
+ }
+exit:
+ return ret;
+}
+
+static int
+gf_defrag_done(int ret, call_frame_t *sync_frame, void *data)
+{
+ gf_listener_stop(sync_frame->this);
+
+ STACK_DESTROY(sync_frame->root);
+ kill(getpid(), SIGTERM);
+ return 0;
+}
+
+void *
+gf_defrag_start(void *data)
+{
+ int ret = -1;
+ call_frame_t *frame = NULL;
+ dht_conf_t *conf = NULL;
+ gf_defrag_info_t *defrag = NULL;
+ xlator_t *this = NULL;
+ xlator_t *old_THIS = NULL;
+
+ this = data;
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ defrag = conf->defrag;
+ if (!defrag)
+ goto out;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ frame->root->pid = GF_CLIENT_PID_DEFRAG;
+
+ defrag->pid = frame->root->pid;
+
+ defrag->defrag_status = GF_DEFRAG_STATUS_STARTED;
+
+ old_THIS = THIS;
+ THIS = this;
+ ret = synctask_new(this->ctx->env, gf_defrag_start_crawl, gf_defrag_done,
+ frame, this);
+
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_REBALANCE_START_FAILED,
+ "Could not create task for rebalance");
+ THIS = old_THIS;
+out:
+ return NULL;
+}
+
+uint64_t
+gf_defrag_get_estimates_based_on_size(dht_conf_t *conf)
+{
+ gf_defrag_info_t *defrag = NULL;
+ double rate_processed = 0;
+ uint64_t total_processed = 0;
+ uint64_t tmp_count = 0;
+ uint64_t time_to_complete = 0;
+ double elapsed = 0;
+
+ defrag = conf->defrag;
+
+ if (!g_totalsize)
+ goto out;
+
+ elapsed = gf_time() - defrag->start_time;
+
+ /* Don't calculate the estimates for the first 10 minutes.
+ * It is unlikely to be accurate and estimates are not required
+ * if the process finishes in less than 10 mins.
+ */
+
+ if (elapsed < ESTIMATE_START_INTERVAL) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, 0,
+ "Rebalance estimates will not be available for the "
+ "first %d seconds.",
+ ESTIMATE_START_INTERVAL);
+
+ goto out;
+ }
+
+ total_processed = defrag->size_processed;
+
+ /* rate at which files processed */
+ rate_processed = (total_processed) / elapsed;
+
+ tmp_count = g_totalsize;
+
+ if (rate_processed) {
+ time_to_complete = (tmp_count) / rate_processed;
+
+ } else {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, 0,
+ "Unable to calculate estimated time for rebalance");
+ }
+
+ gf_log(THIS->name, GF_LOG_INFO,
+ "TIME: (size) total_processed=%" PRIu64 " tmp_cnt = %" PRIu64
+ ","
+ "rate_processed=%f, elapsed = %f",
+ total_processed, tmp_count, rate_processed, elapsed);
+
+out:
+ return time_to_complete;
+}
+
+int
+gf_defrag_status_get(dht_conf_t *conf, dict_t *dict)
+{
+ int ret = 0;
+ uint64_t files = 0;
+ uint64_t size = 0;
+ uint64_t lookup = 0;
+ uint64_t failures = 0;
+ uint64_t skipped = 0;
+ char *status = "";
+ double elapsed = 0;
+ uint64_t time_to_complete = 0;
+ uint64_t time_left = 0;
+ gf_defrag_info_t *defrag = conf->defrag;
+
+ if (!defrag)
+ goto out;
+
+ ret = 0;
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED)
+ goto out;
+
+ files = defrag->total_files;
+ size = defrag->total_data;
+ lookup = defrag->num_files_lookedup;
+ failures = defrag->total_failures;
+ skipped = defrag->skipped;
+
+ elapsed = gf_time() - defrag->start_time;
+
+ /* The rebalance is still in progress */
+
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_STARTED) {
+ time_to_complete = gf_defrag_get_estimates_based_on_size(conf);
+
+ if (time_to_complete && (time_to_complete > elapsed))
+ time_left = time_to_complete - elapsed;
+
+ gf_log(THIS->name, GF_LOG_INFO,
+ "TIME: Estimated total time to complete (size)= %" PRIu64
+ " seconds, seconds left = %" PRIu64 "",
+ time_to_complete, time_left);
+ }
+
+ if (!dict)
+ goto log;
+
+ ret = dict_set_uint64(dict, "files", files);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set file count");
+
+ ret = dict_set_uint64(dict, "size", size);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set size of xfer");
+
+ ret = dict_set_uint64(dict, "lookups", lookup);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set lookedup file count");
+
+ ret = dict_set_int32(dict, "status", defrag->defrag_status);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set status");
+
+ ret = dict_set_double(dict, "run-time", elapsed);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set run-time");
+
+ ret = dict_set_uint64(dict, "failures", failures);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set failure count");
+
+ ret = dict_set_uint64(dict, "skipped", skipped);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set skipped file count");
+
+ ret = dict_set_uint64(dict, "time-left", time_left);
+ if (ret)
+ gf_log(THIS->name, GF_LOG_WARNING, "failed to set time-left");
- ret = dict_set_uint64 (dict, "failures", failures);
log:
- switch (defrag->defrag_status) {
+ switch (defrag->defrag_status) {
case GF_DEFRAG_STATUS_NOT_STARTED:
- status = "not started";
- break;
+ status = "not started";
+ break;
case GF_DEFRAG_STATUS_STARTED:
- status = "in progress";
- break;
+ status = "in progress";
+ break;
case GF_DEFRAG_STATUS_STOPPED:
- status = "stopped";
- break;
+ status = "stopped";
+ break;
case GF_DEFRAG_STATUS_COMPLETE:
- status = "completed";
- break;
+ status = "completed";
+ break;
case GF_DEFRAG_STATUS_FAILED:
- status = "failed";
- break;
- }
-
- gf_log (THIS->name, GF_LOG_INFO, "Rebalance is %s. Time taken is %.2f "
- "secs", status, elapsed);
- gf_log (THIS->name, GF_LOG_INFO, "Files migrated: %"PRIu64", size: %"
- PRIu64", lookups: %"PRIu64", failures: %"PRIu64, files, size,
- lookup, failures);
-
-
+ status = "failed";
+ break;
+ default:
+ break;
+ }
+
+ gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
+ "Rebalance is %s. Time taken is %.2f secs", status, elapsed);
+ gf_msg(THIS->name, GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STATUS,
+ "Files migrated: %" PRIu64 ", size: %" PRIu64 ", lookups: %" PRIu64
+ ", failures: %" PRIu64
+ ", skipped: "
+ "%" PRIu64,
+ files, size, lookup, failures, skipped);
out:
- return 0;
+ return 0;
}
int
-gf_defrag_stop (gf_defrag_info_t *defrag, dict_t *output)
+gf_defrag_stop(dht_conf_t *conf, gf_defrag_status_t status, dict_t *output)
{
- /* TODO: set a variable 'stop_defrag' here, it should be checked
- in defrag loop */
- int ret = -1;
- GF_ASSERT (defrag);
+ /* TODO: set a variable 'stop_defrag' here, it should be checked
+ in defrag loop */
+ int ret = -1;
+ gf_defrag_info_t *defrag = conf->defrag;
- if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {
- goto out;
- }
+ GF_ASSERT(defrag);
- gf_log ("", GF_LOG_INFO, "Recieved stop command on rebalance");
- defrag->defrag_status = GF_DEFRAG_STATUS_STOPPED;
+ if (defrag->defrag_status == GF_DEFRAG_STATUS_NOT_STARTED) {
+ goto out;
+ }
- if (output)
- gf_defrag_status_get (defrag, output);
- ret = 0;
+ gf_msg("", GF_LOG_INFO, 0, DHT_MSG_REBALANCE_STOPPED,
+ "Received stop command on rebalance");
+ defrag->defrag_status = status;
+
+ if (output)
+ gf_defrag_status_get(conf, output);
+ ret = 0;
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug("", 0, "Returning %d", ret);
+ return ret;
}
diff --git a/xlators/cluster/dht/src/dht-rename.c b/xlators/cluster/dht/src/dht-rename.c
index f1a4364df20..d9dbf50492f 100644
--- a/xlators/cluster/dht/src/dht-rename.c
+++ b/xlators/cluster/dht/src/dht-rename.c
@@ -11,852 +11,1987 @@
/* TODO: link(oldpath, newpath) fails if newpath already exists. DHT should
* delete the newpath if it gets EEXISTS from link() call.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "xlator.h"
#include "dht-common.h"
-#include "defaults.h"
+#include "dht-lock.h"
+#include <glusterfs/defaults.h>
+int
+dht_rename_unlock(call_frame_t *frame, xlator_t *this);
+int32_t
+dht_rename_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
int
-dht_rename_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = 0;
- call_frame_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- if (op_ret == -1) {
- /* TODO: undo the damage */
-
- gf_log (this->name, GF_LOG_INFO,
- "rename %s -> %s on %s failed (%s)",
- local->loc.path, local->loc2.path,
- prev->this->name, strerror (op_errno));
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- goto unwind;
- }
- /* TODO: construct proper stbuf for dir */
- /*
- * FIXME: is this the correct way to build stbuf and
- * parent bufs?
- */
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preoldparent, preoldparent,
- prev->this);
- dht_iatt_merge (this, &local->postoldparent, postoldparent,
- prev->this);
- dht_iatt_merge (this, &local->preparent, prenewparent,
- prev->this);
- dht_iatt_merge (this, &local->postparent, postnewparent,
- prev->this);
+dht_rename_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ local = frame->local;
-unwind:
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- WIPE (&local->preoldparent);
- WIPE (&local->postoldparent);
- WIPE (&local->preparent);
- WIPE (&local->postparent);
-
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent,
- &local->preparent, &local->postparent, xdata);
- }
+ dht_set_fixed_dir_stat(&local->preoldparent);
+ dht_set_fixed_dir_stat(&local->postoldparent);
+ dht_set_fixed_dir_stat(&local->preparent);
+ dht_set_fixed_dir_stat(&local->postparent);
- return 0;
+ if (IA_ISREG(local->stbuf.ia_type))
+ DHT_STRIP_PHASE1_FLAGS(&local->stbuf);
+
+ DHT_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
+ &local->stbuf, &local->preoldparent, &local->postoldparent,
+ &local->preparent, &local->postparent, local->xattr);
+ return 0;
}
+static void
+dht_rename_dir_unlock_src(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+
+ local = frame->local;
+ dht_unlock_namespace(frame, &local->lock[0]);
+ return;
+}
+
+static void
+dht_rename_dir_unlock_dst(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ int op_ret = -1;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+
+ /* Unlock entrylk */
+ dht_unlock_entrylk_wrapper(frame, &local->lock[1].ns.directory_ns);
+
+ /* Unlock inodelk */
+ op_ret = dht_unlock_inodelk(frame, local->lock[1].ns.parent_layout.locks,
+ local->lock[1].ns.parent_layout.lk_count,
+ dht_rename_unlock_cbk);
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ if (IA_ISREG(local->stbuf.ia_type))
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
+ "winding unlock inodelk failed "
+ "rename (%s:%s:%s %s:%s:%s), "
+ "stale locks left on bricks",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+ else
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
+ "winding unlock inodelk failed "
+ "rename (%s:%s %s:%s), "
+ "stale locks left on bricks",
+ local->loc.path, src_gfid, local->loc2.path, dst_gfid);
+
+ dht_rename_unlock_cbk(frame, NULL, this, 0, 0, NULL);
+ }
+
+ return;
+}
+static int
+dht_rename_dir_unlock(call_frame_t *frame, xlator_t *this)
+{
+ dht_rename_dir_unlock_src(frame, this);
+ dht_rename_dir_unlock_dst(frame, this);
+ return 0;
+}
int
-dht_rename_hashed_dir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent, dict_t *xdata)
-{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int call_cnt = 0;
- call_frame_t *prev = NULL;
- int i = 0;
-
- conf = this->private;
- local = frame->local;
- prev = cookie;
-
- if (op_ret == -1) {
- /* TODO: undo the damage */
-
- gf_log (this->name, GF_LOG_INFO,
- "rename %s -> %s on %s failed (%s)",
- local->loc.path, local->loc2.path,
- prev->this->name, strerror (op_errno));
-
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- goto unwind;
- }
- /* TODO: construct proper stbuf for dir */
- /*
- * FIXME: is this the correct way to build stbuf and
- * parent bufs?
+dht_rename_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ int i = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ int subvol_cnt = -1;
+
+ conf = this->private;
+ local = frame->local;
+ prev = cookie;
+ subvol_cnt = dht_subvol_cnt(this, prev);
+ local->ret_cache[subvol_cnt] = op_ret;
+
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.inode->gfid, gfid);
+
+ gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_RENAME_FAILED,
+ "Rename %s -> %s on %s failed, (gfid = %s)", local->loc.path,
+ local->loc2.path, prev->name, gfid);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+ /* TODO: construct proper stbuf for dir */
+ /*
+ * FIXME: is this the correct way to build stbuf and
+ * parent bufs?
+ */
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->preoldparent, preoldparent);
+ dht_iatt_merge(this, &local->postoldparent, postoldparent);
+ dht_iatt_merge(this, &local->preparent, prenewparent);
+ dht_iatt_merge(this, &local->postparent, postnewparent);
+
+unwind:
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ /* We get here with local->call_cnt == 0. Which means
+ * we are the only one executing this code, there is
+ * no contention. Therefore it's safe to manipulate or
+ * deref local->call_cnt directly (without locking).
*/
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preoldparent, preoldparent,
- prev->this);
- dht_iatt_merge (this, &local->postoldparent, postoldparent,
- prev->this);
- dht_iatt_merge (this, &local->preparent, prenewparent,
- prev->this);
- dht_iatt_merge (this, &local->postparent, postnewparent,
- prev->this);
-
- call_cnt = local->call_cnt = conf->subvolume_cnt - 1;
-
- if (!local->call_cnt)
- goto unwind;
-
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (conf->subvolumes[i] == local->dst_hashed)
+ if (local->ret_cache[conf->subvolume_cnt] == 0) {
+ /* count errant subvols in last field of ret_cache */
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (local->ret_cache[i] != 0)
+ ++local->ret_cache[conf->subvolume_cnt];
+ }
+ if (local->ret_cache[conf->subvolume_cnt]) {
+ /* undoing the damage:
+ * for all subvolumes, where rename
+ * succeeded, we perform the reverse operation
+ */
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (local->ret_cache[i] == 0)
+ ++local->call_cnt;
+ }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (local->ret_cache[i])
continue;
- STACK_WIND (frame, dht_rename_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->rename,
- &local->loc, &local->loc2, NULL);
- if (!--call_cnt)
- break;
- }
+ STACK_WIND(frame, dht_rename_dir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i]->fops->rename, &local->loc2,
+ &local->loc, NULL);
+ }
- return 0;
-unwind:
- WIPE (&local->preoldparent);
- WIPE (&local->postoldparent);
- WIPE (&local->preparent);
- WIPE (&local->postparent);
+ return 0;
+ }
+ }
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent,
- &local->preparent, &local->postparent, NULL);
+ WIPE(&local->preoldparent);
+ WIPE(&local->postoldparent);
+ WIPE(&local->preparent);
+ WIPE(&local->postparent);
- return 0;
+ dht_rename_dir_unlock(frame, this);
+ }
+
+ return 0;
}
+int
+dht_rename_hashed_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ int call_cnt = 0;
+ xlator_t *prev = NULL;
+ int i = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ conf = this->private;
+ local = frame->local;
+ prev = cookie;
+
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.inode->gfid, gfid);
+
+ gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_RENAME_FAILED,
+ "rename %s -> %s on %s failed, (gfid = %s) ", local->loc.path,
+ local->loc2.path, prev->name, gfid);
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+ /* TODO: construct proper stbuf for dir */
+ /*
+ * FIXME: is this the correct way to build stbuf and
+ * parent bufs?
+ */
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->preoldparent, preoldparent);
+ dht_iatt_merge(this, &local->postoldparent, postoldparent);
+ dht_iatt_merge(this, &local->preparent, prenewparent);
+ dht_iatt_merge(this, &local->postparent, postnewparent);
+
+ call_cnt = local->call_cnt = conf->subvolume_cnt - 1;
+
+ if (!local->call_cnt)
+ goto unwind;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == local->dst_hashed)
+ continue;
+ STACK_WIND_COOKIE(
+ frame, dht_rename_dir_cbk, conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->rename, &local->loc, &local->loc2, NULL);
+ if (!--call_cnt)
+ break;
+ }
+
+ return 0;
+unwind:
+ WIPE(&local->preoldparent);
+ WIPE(&local->postoldparent);
+ WIPE(&local->preparent);
+ WIPE(&local->postparent);
+
+ dht_rename_dir_unlock(frame, this);
+ return 0;
+}
int
-dht_rename_dir_do (call_frame_t *frame, xlator_t *this)
+dht_rename_dir_do(call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (local->op_ret == -1)
- goto err;
+ if (local->op_ret == -1)
+ goto err;
- local->op_ret = 0;
+ local->op_ret = 0;
- STACK_WIND (frame, dht_rename_hashed_dir_cbk,
- local->dst_hashed,
- local->dst_hashed->fops->rename,
- &local->loc, &local->loc2, NULL);
- return 0;
+ STACK_WIND_COOKIE(frame, dht_rename_hashed_dir_cbk, local->dst_hashed,
+ local->dst_hashed, local->dst_hashed->fops->rename,
+ &local->loc, &local->loc2, NULL);
+ return 0;
err:
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno, NULL, NULL,
- NULL, NULL, NULL, NULL);
- return 0;
+ dht_rename_dir_unlock(frame, this);
+ return 0;
}
-
int
-dht_rename_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
-
- local = frame->local;
- prev = cookie;
-
- if (op_ret > 2) {
- gf_log (this->name, GF_LOG_TRACE,
- "readdir on %s for %s returned %d entries",
- prev->this->name, local->loc.path, op_ret);
- local->op_ret = -1;
- local->op_errno = ENOTEMPTY;
- }
+dht_rename_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ xlator_t *prev = NULL;
- this_call_cnt = dht_frame_return (frame);
+ local = frame->local;
+ prev = cookie;
- if (is_last_call (this_call_cnt)) {
- dht_rename_dir_do (frame, this);
- }
+ if (op_ret > 2) {
+ gf_msg_trace(this->name, 0, "readdir on %s for %s returned %d entries",
+ prev->name, local->loc.path, op_ret);
+ local->op_ret = -1;
+ local->op_errno = ENOTEMPTY;
+ }
- return 0;
-}
+ this_call_cnt = dht_frame_return(frame);
+ if (is_last_call(this_call_cnt)) {
+ dht_rename_dir_do(frame, this);
+ }
+
+ return 0;
+}
int
-dht_rename_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+dht_rename_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
{
- dht_local_t *local = NULL;
- int this_call_cnt = -1;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ int this_call_cnt = -1;
+ xlator_t *prev = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ local = frame->local;
+ prev = cookie;
- local = frame->local;
- prev = cookie;
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.inode->gfid, gfid);
+ gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_OPENDIR_FAILED,
+ "opendir on %s for %s failed,(gfid = %s) ", prev->name,
+ local->loc.path, gfid);
+ goto err;
+ }
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_INFO,
- "opendir on %s for %s failed (%s)",
- prev->this->name, local->loc.path,
- strerror (op_errno));
- goto err;
- }
-
- STACK_WIND (frame, dht_rename_readdir_cbk,
- prev->this, prev->this->fops->readdir,
- local->fd, 4096, 0, NULL);
+ fd_bind(fd);
+ STACK_WIND_COOKIE(frame, dht_rename_readdir_cbk, prev, prev,
+ prev->fops->readdir, local->fd, 4096, 0, NULL);
- return 0;
+ return 0;
err:
- this_call_cnt = dht_frame_return (frame);
+ this_call_cnt = dht_frame_return(frame);
- if (is_last_call (this_call_cnt)) {
- dht_rename_dir_do (frame, this);
- }
+ if (is_last_call(this_call_cnt)) {
+ dht_rename_dir_do(frame, this);
+ }
- return 0;
+ return 0;
}
-
int
-dht_rename_dir (call_frame_t *frame, xlator_t *this)
+dht_rename_dir_lock2_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- int i = 0;
- int op_errno = -1;
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_conf_t *conf = NULL;
+ int i = 0;
+
+ local = frame->local;
+ conf = this->private;
+
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "acquiring entrylk after inodelk failed"
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ local->fd = fd_create(local->loc.inode, frame->root->pid);
+ if (!local->fd) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->op_ret = 0;
+
+ if (!local->dst_cached) {
+ dht_rename_dir_do(frame, this);
+ return 0;
+ }
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_rename_opendir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->opendir, &local->loc2,
+ local->fd, NULL);
+ }
- conf = frame->this->private;
- local = frame->local;
+ return 0;
- local->call_cnt = conf->subvolume_cnt;
+err:
+ /* No harm in calling an extra unlock */
+ dht_rename_dir_unlock(frame, this);
+ return 0;
+}
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!conf->subvolume_status[i]) {
- gf_log (this->name, GF_LOG_INFO,
- "one of the subvolumes down (%s)",
- conf->subvolumes[i]->name);
- op_errno = ENOTCONN;
- goto err;
- }
- }
+int
+dht_rename_dir_lock1_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+ loc_t *loc = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "acquiring entrylk after inodelk failed"
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ if (local->current == &local->lock[0]) {
+ loc = &local->loc2;
+ subvol = local->dst_hashed;
+ local->current = &local->lock[1];
+ } else {
+ loc = &local->loc;
+ subvol = local->src_hashed;
+ local->current = &local->lock[0];
+ }
+ ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns,
+ dht_rename_dir_lock2_cbk);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ return 0;
+err:
+ /* No harm in calling an extra unlock */
+ dht_rename_dir_unlock(frame, this);
+ return 0;
+}
- local->fd = fd_create (local->loc.inode, frame->root->pid);
- if (!local->fd) {
- op_errno = ENOMEM;
- goto err;
+/*
+ * If the hashed subvolumes of both source and dst are the different,
+ * lock in dictionary order of hashed subvol->name. This is important
+ * in case the parent directory is the same for both src and dst to
+ * prevent inodelk deadlocks when racing with a fix-layout op on the parent.
+ *
+ * If the hashed subvols are the same, use the gfid/name to determine
+ * the order of taking locks to prevent entrylk deadlocks when the parent
+ * dirs are the same.
+ *
+ */
+static int
+dht_order_rename_lock(call_frame_t *frame, loc_t **loc, xlator_t **subvol)
+{
+ int ret = 0;
+ int op_ret = 0;
+ dht_local_t *local = NULL;
+ char *src = NULL;
+ char *dst = NULL;
+
+ local = frame->local;
+
+ if (local->src_hashed->name == local->dst_hashed->name) {
+ ret = 0;
+ } else {
+ ret = strcmp(local->src_hashed->name, local->dst_hashed->name);
+ }
+
+ if (ret == 0) {
+ /* hashed subvols are the same for src and dst */
+ /* Entrylks need to be ordered*/
+
+ src = alloca(GF_UUID_BNAME_BUF_SIZE + strlen(local->loc.name) + 1);
+ if (!src) {
+ gf_msg(frame->this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "Insufficient memory for src");
+ op_ret = -1;
+ goto out;
}
- local->op_ret = 0;
+ if (!gf_uuid_is_null(local->loc.pargfid))
+ uuid_utoa_r(local->loc.pargfid, src);
+ else if (local->loc.parent)
+ uuid_utoa_r(local->loc.parent->gfid, src);
+ else
+ src[0] = '\0';
- if (!local->dst_cached) {
- dht_rename_dir_do (frame, this);
- return 0;
- }
+ strcat(src, local->loc.name);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- STACK_WIND (frame, dht_rename_opendir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->opendir,
- &local->loc2, local->fd, NULL);
+ dst = alloca(GF_UUID_BNAME_BUF_SIZE + strlen(local->loc2.name) + 1);
+ if (!dst) {
+ gf_msg(frame->this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "Insufficient memory for dst");
+ op_ret = -1;
+ goto out;
}
- return 0;
+ if (!gf_uuid_is_null(local->loc2.pargfid))
+ uuid_utoa_r(local->loc2.pargfid, dst);
+ else if (local->loc2.parent)
+ uuid_utoa_r(local->loc2.parent->gfid, dst);
+ else
+ dst[0] = '\0';
+
+ strcat(dst, local->loc2.name);
+ ret = strcmp(src, dst);
+ }
+
+ if (ret <= 0) {
+ /*inodelk in dictionary order of hashed subvol names*/
+ /*entrylk in dictionary order of gfid/basename */
+ local->current = &local->lock[0];
+ *loc = &local->loc;
+ *subvol = local->src_hashed;
+
+ } else {
+ local->current = &local->lock[1];
+ *loc = &local->loc2;
+ *subvol = local->dst_hashed;
+ }
+
+ op_ret = 0;
+
+out:
+ return op_ret;
+}
+
+int
+dht_rename_dir(call_frame_t *frame, xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ int ret = 0;
+ int op_errno = -1;
+
+ conf = frame->this->private;
+ local = frame->local;
+
+ local->ret_cache = GF_CALLOC(conf->subvolume_cnt + 1, sizeof(int),
+ gf_dht_ret_cache_t);
+
+ if (local->ret_cache == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->call_cnt = conf->subvolume_cnt;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i]) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED,
+ "Rename dir failed: subvolume down (%s)",
+ conf->subvolumes[i]->name);
+ op_errno = ENOTCONN;
+ goto err;
+ }
+ }
+
+ /* Locks on src and dst needs to ordered which otherwise might cause
+ * deadlocks when rename (src, dst) and rename (dst, src) is done from
+ * two different clients
+ */
+ ret = dht_order_rename_lock(frame, &loc, &subvol);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /* Rename must take locks on src to avoid lookup selfheal from
+ * recreating src on those subvols where the rename was successful.
+ * The locks can't be issued parallel as two different clients might
+ * attempt same rename command and be in dead lock.
+ */
+ ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns,
+ dht_rename_dir_lock1_cbk);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
+ DHT_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
+static int
+dht_rename_track_for_changelog(xlator_t *this, dict_t *xattr, loc_t *oldloc,
+ loc_t *newloc)
+{
+ int ret = -1;
+ dht_changelog_rename_info_t *info = NULL;
+ char *name = NULL;
+ int len1 = 0;
+ int len2 = 0;
+ int size = 0;
+
+ if (!xattr || !oldloc || !newloc || !this)
+ return ret;
+
+ len1 = strlen(oldloc->name) + 1;
+ len2 = strlen(newloc->name) + 1;
+ size = sizeof(dht_changelog_rename_info_t) + len1 + len2;
+
+ info = GF_CALLOC(size, sizeof(char), gf_common_mt_char);
+ if (!info) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to calloc memory");
+ return ret;
+ }
+
+ gf_uuid_copy(info->old_pargfid, oldloc->pargfid);
+ gf_uuid_copy(info->new_pargfid, newloc->pargfid);
+
+ info->oldname_len = len1;
+ info->newname_len = len2;
+ strncpy(info->buffer, oldloc->name, len1);
+ name = info->buffer + len1;
+ strncpy(name, newloc->name, len2);
+
+ ret = dict_set_bin(xattr, DHT_CHANGELOG_RENAME_OP_KEY, info, size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dictionary value: key = %s,"
+ " path = %s",
+ DHT_CHANGELOG_RENAME_OP_KEY, oldloc->name);
+ GF_FREE(info);
+ }
+
+ return ret;
+}
+
+#define DHT_MARKER_DONT_ACCOUNT(xattr) \
+ do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new(); \
+ if (!xattr) \
+ break; \
+ } \
+ tmp = dict_set_str(xattr, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY, "yes"); \
+ if (tmp) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \
+ "Failed to set dictionary value: key = %s," \
+ " path = %s", \
+ GLUSTERFS_MARKER_DONT_ACCOUNT_KEY, local->loc.path); \
+ } \
+ } while (0)
+
+#define DHT_CHANGELOG_TRACK_AS_RENAME(xattr, oldloc, newloc) \
+ do { \
+ int tmp = -1; \
+ if (!xattr) { \
+ xattr = dict_new(); \
+ if (!xattr) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \
+ "Failed to create dictionary to " \
+ "track rename"); \
+ break; \
+ } \
+ } \
+ \
+ tmp = dht_rename_track_for_changelog(this, xattr, oldloc, newloc); \
+ \
+ if (tmp) { \
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, \
+ "Failed to set dictionary value: key = %s," \
+ " path = %s", \
+ DHT_CHANGELOG_RENAME_OP_KEY, (oldloc)->path); \
+ } \
+ } while (0)
int
-dht_rename_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+dht_rename_unlock(call_frame_t *frame, xlator_t *this)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int this_call_cnt = 0;
+ dht_local_t *local = NULL;
+ int op_ret = -1;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ dht_ilock_wrap_t inodelk_wrapper = {
+ 0,
+ };
+
+ local = frame->local;
+ inodelk_wrapper.locks = local->rename_inodelk_backward_compatible;
+ inodelk_wrapper.lk_count = local->rename_inodelk_bc_count;
+
+ op_ret = dht_unlock_inodelk_wrapper(frame, &inodelk_wrapper);
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ if (IA_ISREG(local->stbuf.ia_type))
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
+ "winding unlock inodelk failed "
+ "rename (%s:%s:%s %s:%s:%s), "
+ "stale locks left on bricks",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+ else
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_UNLOCKING_FAILED,
+ "winding unlock inodelk failed "
+ "rename (%s:%s %s:%s), "
+ "stale locks left on bricks",
+ local->loc.path, src_gfid, local->loc2.path, dst_gfid);
+ }
+
+ dht_unlock_namespace(frame, &local->lock[0]);
+ dht_unlock_namespace(frame, &local->lock[1]);
+
+ dht_rename_unlock_cbk(frame, NULL, this, local->op_ret, local->op_errno,
+ NULL);
+ return 0;
+}
- local = frame->local;
- prev = cookie;
+int
+dht_rename_done(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "!local, should not happen");
- goto out;
- }
+ local = frame->local;
- this_call_cnt = dht_frame_return (frame);
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal(frame, this);
+ }
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: unlink on %s failed (%s)",
- local->loc.path, prev->this->name, strerror (op_errno));
- }
+ dht_rename_unlock(frame, this);
+ return 0;
+}
- WIPE (&local->preoldparent);
- WIPE (&local->postoldparent);
- WIPE (&local->preparent);
- WIPE (&local->postparent);
-
- if (is_last_call (this_call_cnt)) {
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent, &local->preparent,
- &local->postparent, NULL);
- }
+int
+dht_rename_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+ prev = cookie;
+
+ FRAME_SU_UNDO(frame, dht_local_t);
+ if (!local) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_VALUE,
+ "!local, should not happen");
+ goto out;
+ }
+
+ this_call_cnt = dht_frame_return(frame);
+
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_UNLINK_FAILED,
+ "%s: Rename: unlink on %s failed ", local->loc.path, prev->name);
+ }
+
+ WIPE(&local->preoldparent);
+ WIPE(&local->postoldparent);
+ WIPE(&local->preparent);
+ WIPE(&local->postparent);
+
+ if (is_last_call(this_call_cnt)) {
+ dht_rename_done(frame, this);
+ }
out:
- return 0;
+ return 0;
}
-
int
-dht_rename_cleanup (call_frame_t *frame)
+dht_rename_cleanup(call_frame_t *frame)
{
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *dst_cached = NULL;
- int call_cnt = 0;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ int call_cnt = 0;
+ dict_t *xattr = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ local = frame->local;
+ this = frame->this;
- local = frame->local;
- this = frame->this;
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
- src_hashed = local->src_hashed;
- src_cached = local->src_cached;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
+ if (src_cached == dst_cached)
+ goto nolinks;
- if (src_cached == dst_cached)
- goto nolinks;
+ if (local->linked && (dst_hashed != src_hashed) &&
+ (dst_hashed != src_cached)) {
+ call_cnt++;
+ }
- if (dst_hashed != src_hashed && dst_hashed != src_cached)
- call_cnt++;
+ if (local->added_link && (src_cached != dst_hashed)) {
+ call_cnt++;
+ }
- if (src_cached != dst_hashed)
- call_cnt++;
+ local->call_cnt = call_cnt;
- local->call_cnt = call_cnt;
+ if (!call_cnt)
+ goto nolinks;
- if (!call_cnt)
- goto nolinks;
+ DHT_MARK_FOP_INTERNAL(xattr);
- if (dst_hashed != src_hashed && dst_hashed != src_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlinking linkfile %s @ %s => %s",
- local->loc.path, dst_hashed->name, src_cached->name);
- STACK_WIND (frame, dht_rename_unlink_cbk,
- dst_hashed, dst_hashed->fops->unlink,
- &local->loc, 0, NULL);
- }
+ gf_uuid_unparse(local->loc.inode->gfid, gfid);
- if (src_cached != dst_hashed) {
- gf_log (this->name, GF_LOG_TRACE,
- "unlinking link %s => %s (%s)", local->loc.path,
- local->loc2.path, src_cached->name);
- STACK_WIND (frame, dht_rename_unlink_cbk,
- src_cached, src_cached->fops->unlink,
- &local->loc2, 0, NULL);
- }
+ if (local->linked && (dst_hashed != src_hashed) &&
+ (dst_hashed != src_cached)) {
+ dict_t *xattr_new = NULL;
- return 0;
+ gf_msg_trace(this->name, 0,
+ "unlinking linkfile %s @ %s => %s, (gfid = %s)",
+ local->loc.path, dst_hashed->name, src_cached->name, gfid);
-nolinks:
- WIPE (&local->preoldparent);
- WIPE (&local->postoldparent);
- WIPE (&local->preparent);
- WIPE (&local->postparent);
+ xattr_new = dict_copy_with_ref(xattr, NULL);
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent, &local->preparent,
- &local->postparent, NULL);
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
- return 0;
-}
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, dst_hashed, dst_hashed,
+ dst_hashed->fops->unlink, &local->loc, 0, xattr_new);
+ dict_unref(xattr_new);
+ xattr_new = NULL;
+ }
-int
-dht_rename_links_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- call_frame_t *prev = NULL;
- dht_local_t *local = NULL;
-
- prev = cookie;
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "link/file %s on %s failed (%s)",
- local->loc.path, prev->this->name, strerror (op_errno));
+ if (local->added_link && (src_cached != dst_hashed)) {
+ dict_t *xattr_new = NULL;
+
+ gf_msg_trace(this->name, 0, "unlinking link %s => %s (%s), (gfid = %s)",
+ local->loc.path, local->loc2.path, src_cached->name, gfid);
+
+ xattr_new = dict_copy_with_ref(xattr, NULL);
+
+ if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
}
+ /* *
+ * The link to file is created using root permission.
+ * Hence deletion should happen using root. Otherwise
+ * it will fail.
+ */
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, src_cached, src_cached,
+ src_cached->fops->unlink, &local->loc2, 0, xattr_new);
- DHT_STACK_DESTROY (frame);
+ dict_unref(xattr_new);
+ xattr_new = NULL;
+ }
- return 0;
-}
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
+
+nolinks:
+ WIPE(&local->preoldparent);
+ WIPE(&local->postoldparent);
+ WIPE(&local->preparent);
+ WIPE(&local->postparent);
+ dht_rename_unlock(frame, this);
+ return 0;
+}
int
-dht_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *rename_subvol = NULL;
- call_frame_t *link_frame = NULL;
- dht_local_t *link_local = NULL;
-
- local = frame->local;
- prev = cookie;
-
- src_hashed = local->src_hashed;
- src_cached = local->src_cached;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: rename on %s failed (%s)", local->loc.path,
- prev->this->name, strerror (op_errno));
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- goto cleanup;
- }
+dht_rename_unlink(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *rename_subvol = NULL;
+ dict_t *xattr = NULL;
- if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) {
- link_frame = copy_frame (frame);
- if (!link_frame) {
- goto err;
- }
+ local = frame->local;
- /* fop value sent as maxvalue because it is not used
- anywhere in this case */
- link_local = dht_local_init (link_frame, &local->loc2, NULL,
- GF_FOP_MAXVALUE);
- if (!link_local) {
- goto err;
- }
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
- if (link_local->loc.inode)
- inode_unref (link_local->loc.inode);
- link_local->loc.inode = inode_ref (local->loc.inode);
- uuid_copy (link_local->gfid, local->loc.inode->gfid);
+ local->call_cnt = 0;
- dht_linkfile_create (link_frame, dht_rename_links_create_cbk,
- src_cached, dst_hashed, &link_local->loc);
- }
+ /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk
+ * is called. since rename has already happened on rename_subvol,
+ * unlink shouldn't be sent for oldpath (either linkfile or cached-file)
+ * on rename_subvol. */
+ if (src_cached == dst_cached)
+ rename_subvol = src_cached;
+ else
+ rename_subvol = dst_hashed;
-err:
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preoldparent, preoldparent, prev->this);
- dht_iatt_merge (this, &local->postoldparent, postoldparent, prev->this);
- dht_iatt_merge (this, &local->preparent, prenewparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postnewparent, prev->this);
-
- /* NOTE: rename_subvol is the same subvolume from which dht_rename_cbk
- * is called. since rename has already happened on rename_subvol,
- * unlink should not be sent for oldpath (either linkfile or cached-file)
- * on rename_subvol. */
- if (src_cached == dst_cached)
- rename_subvol = src_cached;
- else
- rename_subvol = dst_hashed;
+ /* TODO: delete files in background */
- /* TODO: delete files in background */
+ if (src_cached != dst_hashed && src_cached != dst_cached)
+ local->call_cnt++;
- if (src_cached != dst_hashed && src_cached != dst_cached)
- local->call_cnt++;
+ if (src_hashed != rename_subvol && src_hashed != src_cached)
+ local->call_cnt++;
- if (src_hashed != rename_subvol && src_hashed != src_cached)
- local->call_cnt++;
+ if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached)
+ local->call_cnt++;
- if (dst_cached && dst_cached != dst_hashed && dst_cached != src_cached)
- local->call_cnt++;
+ if (local->call_cnt == 0)
+ goto unwind;
- if (local->call_cnt == 0)
- goto unwind;
+ DHT_MARK_FOP_INTERNAL(xattr);
- if (src_cached != dst_hashed && src_cached != dst_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "deleting old src datafile %s @ %s",
- local->loc.path, src_cached->name);
+ if (src_cached != dst_hashed && src_cached != dst_cached) {
+ dict_t *xattr_new = NULL;
- STACK_WIND (frame, dht_rename_unlink_cbk,
- src_cached, src_cached->fops->unlink,
- &local->loc, 0, NULL);
- }
+ xattr_new = dict_copy_with_ref(xattr, NULL);
- if (src_hashed != rename_subvol && src_hashed != src_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "deleting old src linkfile %s @ %s",
- local->loc.path, src_hashed->name);
+ gf_msg_trace(this->name, 0, "deleting old src datafile %s @ %s",
+ local->loc.path, src_cached->name);
- STACK_WIND (frame, dht_rename_unlink_cbk,
- src_hashed, src_hashed->fops->unlink,
- &local->loc, 0, NULL);
+ if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
}
- if (dst_cached
- && (dst_cached != dst_hashed)
- && (dst_cached != src_cached)) {
- gf_log (this->name, GF_LOG_TRACE,
- "deleting old dst datafile %s @ %s",
- local->loc2.path, dst_cached->name);
+ DHT_CHANGELOG_TRACK_AS_RENAME(xattr_new, &local->loc, &local->loc2);
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, src_cached, src_cached,
+ src_cached->fops->unlink, &local->loc, 0, xattr_new);
- STACK_WIND (frame, dht_rename_unlink_cbk,
- dst_cached, dst_cached->fops->unlink,
- &local->loc2, 0, NULL);
- }
- return 0;
+ dict_unref(xattr_new);
+ xattr_new = NULL;
+ }
-unwind:
- WIPE (&local->preoldparent);
- WIPE (&local->postoldparent);
- WIPE (&local->preparent);
- WIPE (&local->postparent);
+ if (src_hashed != rename_subvol && src_hashed != src_cached) {
+ dict_t *xattr_new = NULL;
- DHT_STRIP_PHASE1_FLAGS (&local->stbuf);
- DHT_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preoldparent,
- &local->postoldparent, &local->preparent,
- &local->postparent, NULL);
+ xattr_new = dict_copy_with_ref(xattr, NULL);
- return 0;
+ gf_msg_trace(this->name, 0, "deleting old src linkfile %s @ %s",
+ local->loc.path, src_hashed->name);
-cleanup:
- dht_rename_cleanup (frame);
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
- return 0;
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, src_hashed, src_hashed,
+ src_hashed->fops->unlink, &local->loc, 0, xattr_new);
+
+ dict_unref(xattr_new);
+ xattr_new = NULL;
+ }
+
+ if (dst_cached && (dst_cached != dst_hashed) &&
+ (dst_cached != src_cached)) {
+ gf_msg_trace(this->name, 0, "deleting old dst datafile %s @ %s",
+ local->loc2.path, dst_cached->name);
+
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_cbk, dst_cached, dst_cached,
+ dst_cached->fops->unlink, &local->loc2, 0, xattr);
+ }
+ if (xattr)
+ dict_unref(xattr);
+ return 0;
+
+unwind:
+ WIPE(&local->preoldparent);
+ WIPE(&local->postoldparent);
+ WIPE(&local->preparent);
+ WIPE(&local->postparent);
+
+ dht_rename_done(frame, this);
+
+ return 0;
}
+int
+dht_rename_links_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ xlator_t *prev = NULL;
+ dht_local_t *local = NULL;
+ call_frame_t *main_frame = NULL;
+
+ prev = cookie;
+ local = frame->local;
+ main_frame = local->main_frame;
+
+ /* TODO: Handle this case in lookup-optimize */
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_CREATE_LINK_FAILED,
+ "link/file %s on %s failed", local->loc.path, prev->name);
+ }
+
+ if (local->linked == _gf_true) {
+ local->linked = _gf_false;
+ dht_linkfile_attr_heal(frame, this);
+ }
+
+ dht_rename_unlink(main_frame, this);
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
int
-dht_do_rename (call_frame_t *frame)
+dht_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *this = NULL;
- xlator_t *rename_subvol = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ call_frame_t *link_frame = NULL;
+ dht_local_t *link_local = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+
+ if (local->linked == _gf_true)
+ FRAME_SU_UNDO(frame, dht_local_t);
+
+ /* It is a critical failure iff we fail to rename the cached file
+ * if the rename of the linkto failed, it is not a critical failure,
+ * and we do not want to lose the created hard link for the new
+ * name as that could have been read by other clients.
+ *
+ * NOTE: If another client is attempting the same oldname -> newname
+ * rename, and finds both file names as existing, and are hard links
+ * to each other, then FUSE would send in an unlink for oldname. In
+ * this time duration if we treat the linkto as a critical error and
+ * unlink the newname we created, we would have effectively lost the
+ * file to rename operations.
+ *
+ * Repercussions of treating this as a non-critical error is that
+ * we could leave behind a stale linkto file and/or not create the new
+ * linkto file, the second case would be rectified by a subsequent
+ * lookup, the first case by a rebalance, like for all stale linkto
+ * files */
+
+ if (op_ret == -1) {
+ /* Critical failure: unable to rename the cached file */
+ if (prev == src_cached) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_RENAME_FAILED,
+ "%s: Rename on %s failed, (gfid = %s) ", local->loc.path,
+ prev->name,
+ local->loc.inode ? uuid_utoa(local->loc.inode->gfid) : "");
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto cleanup;
+ } else {
+ /* Non-critical failure, unable to rename the linkto
+ * file
+ */
+ gf_msg(this->name, GF_LOG_INFO, op_errno, DHT_MSG_RENAME_FAILED,
+ "%s: Rename (linkto file) on %s failed, "
+ "(gfid = %s) ",
+ local->loc.path, prev->name,
+ local->loc.inode ? uuid_utoa(local->loc.inode->gfid) : "");
+ }
+ }
+ if (xdata) {
+ if (!local->xattr)
+ local->xattr = dict_ref(xdata);
+ else
+ local->xattr = dict_copy_with_ref(xdata, local->xattr);
+ }
+
+ /* Merge attrs only from src_cached. In case there of src_cached !=
+ * dst_hashed, this ignores linkfile attrs. */
+ if (prev == src_cached) {
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ dht_iatt_merge(this, &local->preoldparent, preoldparent);
+ dht_iatt_merge(this, &local->postoldparent, postoldparent);
+ dht_iatt_merge(this, &local->preparent, prenewparent);
+ dht_iatt_merge(this, &local->postparent, postnewparent);
+ }
+
+ /* Create the linkto file for the dst file */
+ if ((src_cached == dst_cached) && (dst_hashed != dst_cached)) {
+ link_frame = copy_frame(frame);
+ if (!link_frame) {
+ goto unlink;
+ }
+ /* fop value sent as maxvalue because it is not used
+ * anywhere in this case */
+ link_local = dht_local_init(link_frame, &local->loc2, NULL,
+ GF_FOP_MAXVALUE);
+ if (!link_local) {
+ goto unlink;
+ }
- local = frame->local;
- this = frame->this;
+ if (link_local->loc.inode)
+ inode_unref(link_local->loc.inode);
+ link_local->loc.inode = inode_ref(local->loc.inode);
+ link_local->main_frame = frame;
+ link_local->stbuf = local->stbuf;
+ gf_uuid_copy(link_local->gfid, local->loc.inode->gfid);
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
- src_cached = local->src_cached;
+ dht_linkfile_create(link_frame, dht_rename_links_create_cbk, this,
+ src_cached, dst_hashed, &link_local->loc);
+ return 0;
+ }
- if (src_cached == dst_cached)
- rename_subvol = src_cached;
- else
- rename_subvol = dst_hashed;
+unlink:
- gf_log (this->name, GF_LOG_TRACE,
- "renaming %s => %s (%s)",
- local->loc.path, local->loc2.path, rename_subvol->name);
+ if (link_frame) {
+ DHT_STACK_DESTROY(link_frame);
+ }
+ dht_rename_unlink(frame, this);
+ return 0;
- STACK_WIND (frame, dht_rename_cbk,
- rename_subvol, rename_subvol->fops->rename,
- &local->loc, &local->loc2, NULL);
+cleanup:
+ dht_rename_cleanup(frame);
- return 0;
+ return 0;
}
+int
+dht_do_rename(call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *this = NULL;
+ xlator_t *rename_subvol = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
+ src_cached = local->src_cached;
+
+ if (src_cached == dst_cached)
+ rename_subvol = src_cached;
+ else
+ rename_subvol = dst_hashed;
+
+ if ((src_cached != dst_hashed) && (rename_subvol == dst_hashed)) {
+ DHT_MARKER_DONT_ACCOUNT(local->xattr_req);
+ }
+
+ if (rename_subvol == src_cached) {
+ DHT_CHANGELOG_TRACK_AS_RENAME(local->xattr_req, &local->loc,
+ &local->loc2);
+ }
+
+ gf_msg_trace(this->name, 0, "renaming %s => %s (%s)", local->loc.path,
+ local->loc2.path, rename_subvol->name);
+
+ if (local->linked == _gf_true)
+ FRAME_SU_DO(frame, dht_local_t);
+ STACK_WIND_COOKIE(frame, dht_rename_cbk, rename_subvol, rename_subvol,
+ rename_subvol->fops->rename, &local->loc, &local->loc2,
+ local->xattr_req);
+ return 0;
+}
int
-dht_rename_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int this_call_cnt = 0;
-
-
- local = frame->local;
- prev = cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "link/file on %s failed (%s)",
- prev->this->name, strerror (op_errno));
- local->op_ret = -1;
- if (op_errno != ENOENT)
- local->op_errno = op_errno;
- }
+dht_rename_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
- this_call_cnt = dht_frame_return (frame);
- if (is_last_call (this_call_cnt)) {
- if (local->op_ret == -1)
- goto cleanup;
+ local = frame->local;
+ prev = cookie;
- dht_do_rename (frame);
- }
+ if (op_ret == -1) {
+ gf_msg_debug(this->name, 0, "link/file on %s failed (%s)", prev->name,
+ strerror(op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ local->added_link = _gf_false;
+ } else
+ dht_iatt_merge(this, &local->stbuf, stbuf);
- return 0;
+ if (local->op_ret == -1)
+ goto cleanup;
+
+ dht_do_rename(frame);
+
+ return 0;
cleanup:
- dht_rename_cleanup (frame);
+ dht_rename_cleanup(frame);
- return 0;
+ return 0;
}
-
int
-dht_rename_unlink_links_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+dht_rename_linkto_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *src_cached = NULL;
+ dict_t *xattr = NULL;
+ local = frame->local;
+ DHT_MARK_FOP_INTERNAL(xattr);
+ prev = cookie;
+ src_cached = local->src_cached;
- local = frame->local;
- prev = cookie;
+ if (op_ret == -1) {
+ gf_msg_debug(this->name, 0, "link/file on %s failed (%s)", prev->name,
+ strerror(op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlink of %s on %s failed (%s)",
- local->loc2.path, prev->this->name,
- strerror (op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
+ /* If linkto creation failed move to failure cleanup code,
+ * instead of continuing with creating the link file */
+ if (local->op_ret != 0) {
+ goto cleanup;
+ }
- if (local->op_ret == -1)
- goto cleanup;
+ gf_msg_trace(this->name, 0, "link %s => %s (%s)", local->loc.path,
+ local->loc2.path, src_cached->name);
+ if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr);
+ }
- dht_do_rename (frame);
+ local->added_link = _gf_true;
- return 0;
+ STACK_WIND_COOKIE(frame, dht_rename_link_cbk, src_cached, src_cached,
+ src_cached->fops->link, &local->loc, &local->loc2, xattr);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
cleanup:
- dht_rename_cleanup (frame);
+ dht_rename_cleanup(frame);
+
+ if (xattr)
+ dict_unref(xattr);
- return 0;
+ return 0;
}
+int
+dht_rename_unlink_links_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *prev = NULL;
+
+ local = frame->local;
+ prev = cookie;
+
+ if ((op_ret == -1) && (op_errno != ENOENT)) {
+ gf_msg_debug(this->name, 0, "unlink of %s on %s failed (%s)",
+ local->loc2.path, prev->name, strerror(op_errno));
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ }
+
+ if (local->op_ret == -1)
+ goto cleanup;
+
+ dht_do_rename(frame);
+
+ return 0;
+
+cleanup:
+ dht_rename_cleanup(frame);
+
+ return 0;
+}
int
-dht_rename_create_links (call_frame_t *frame)
+dht_rename_create_links(call_frame_t *frame)
{
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *src_cached = NULL;
- xlator_t *dst_hashed = NULL;
- xlator_t *dst_cached = NULL;
- int call_cnt = 0;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *src_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ int call_cnt = 0;
+ dict_t *xattr = NULL;
+ local = frame->local;
+ this = frame->this;
- local = frame->local;
- this = frame->this;
+ src_hashed = local->src_hashed;
+ src_cached = local->src_cached;
+ dst_hashed = local->dst_hashed;
+ dst_cached = local->dst_cached;
- src_hashed = local->src_hashed;
- src_cached = local->src_cached;
- dst_hashed = local->dst_hashed;
- dst_cached = local->dst_cached;
+ DHT_MARK_FOP_INTERNAL(xattr);
+ if (src_cached == dst_cached) {
+ dict_t *xattr_new = NULL;
- if (src_cached == dst_cached) {
- if (dst_hashed == dst_cached)
- goto nolinks;
+ if (dst_hashed == dst_cached)
+ goto nolinks;
- gf_log (this->name, GF_LOG_TRACE,
- "unlinking dst linkfile %s @ %s",
- local->loc2.path, dst_hashed->name);
+ xattr_new = dict_copy_with_ref(xattr, NULL);
- STACK_WIND (frame, dht_rename_unlink_links_cbk,
- dst_hashed, dst_hashed->fops->unlink,
- &local->loc2, 0, NULL);
- return 0;
- }
+ gf_msg_trace(this->name, 0, "unlinking dst linkfile %s @ %s",
+ local->loc2.path, dst_hashed->name);
- if (dst_hashed != src_hashed && dst_hashed != src_cached)
- call_cnt++;
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
- if (src_cached != dst_hashed)
- call_cnt++;
+ STACK_WIND_COOKIE(frame, dht_rename_unlink_links_cbk, dst_hashed,
+ dst_hashed, dst_hashed->fops->unlink, &local->loc2, 0,
+ xattr_new);
- local->call_cnt = call_cnt;
+ dict_unref(xattr_new);
+ if (xattr)
+ dict_unref(xattr);
- if (dst_hashed != src_hashed && dst_hashed != src_cached) {
- gf_log (this->name, GF_LOG_TRACE,
- "linkfile %s @ %s => %s",
- local->loc.path, dst_hashed->name, src_cached->name);
- memcpy (local->gfid, local->loc.inode->gfid, 16);
- dht_linkfile_create (frame, dht_rename_links_cbk,
- src_cached, dst_hashed, &local->loc);
- }
+ return 0;
+ }
+
+ if (src_cached != dst_hashed) {
+ /* needed to create the link file */
+ call_cnt++;
+ if (dst_hashed != src_hashed)
+ /* needed to create the linkto file */
+ call_cnt++;
+ }
+
+ /* We should not have any failures post the link creation, as this
+ * introduces the newname into the namespace. Clients could have cached
+ * the existence of the newname and may start taking actions based on
+ * the same. Hence create the linkto first, and then attempt the link.
+ *
+ * NOTE: If another client is attempting the same oldname -> newname
+ * rename, and finds both file names as existing, and are hard links
+ * to each other, then FUSE would send in an unlink for oldname. In
+ * this time duration if we treat the linkto as a critical error and
+ * unlink the newname we created, we would have effectively lost the
+ * file to rename operations. */
+ if (dst_hashed != src_hashed && src_cached != dst_hashed) {
+ gf_msg_trace(this->name, 0, "linkfile %s @ %s => %s", local->loc.path,
+ dst_hashed->name, src_cached->name);
+
+ memcpy(local->gfid, local->loc.inode->gfid, 16);
+ dht_linkfile_create(frame, dht_rename_linkto_cbk, this, src_cached,
+ dst_hashed, &local->loc);
+ } else if (src_cached != dst_hashed) {
+ dict_t *xattr_new = NULL;
+
+ xattr_new = dict_copy_with_ref(xattr, NULL);
+
+ gf_msg_trace(this->name, 0, "link %s => %s (%s)", local->loc.path,
+ local->loc2.path, src_cached->name);
+ if (gf_uuid_compare(local->loc.pargfid, local->loc2.pargfid) == 0) {
+ DHT_MARKER_DONT_ACCOUNT(xattr_new);
+ }
- if (src_cached != dst_hashed) {
- gf_log (this->name, GF_LOG_TRACE,
- "link %s => %s (%s)", local->loc.path,
- local->loc2.path, src_cached->name);
- STACK_WIND (frame, dht_rename_links_cbk,
- src_cached, src_cached->fops->link,
- &local->loc, &local->loc2, NULL);
- }
+ local->added_link = _gf_true;
+
+ STACK_WIND_COOKIE(frame, dht_rename_link_cbk, src_cached, src_cached,
+ src_cached->fops->link, &local->loc, &local->loc2,
+ xattr_new);
+
+ dict_unref(xattr_new);
+ }
nolinks:
- if (!call_cnt) {
- /* skip to next step */
- dht_do_rename (frame);
+ if (!call_cnt) {
+ /* skip to next step */
+ dht_do_rename(frame);
+ }
+ if (xattr)
+ dict_unref(xattr);
+
+ return 0;
+}
+
+int
+dht_rename_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int call_cnt = 0;
+ dht_conf_t *conf = NULL;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ char gfid_server[GF_UUID_BUF_SIZE] = {0};
+ int child_index = -1;
+ gf_boolean_t is_src = _gf_false;
+ loc_t *loc = NULL;
+
+ child_index = (long)cookie;
+
+ local = frame->local;
+ conf = this->private;
+
+ is_src = (child_index == 0);
+ if (is_src)
+ loc = &local->loc;
+ else
+ loc = &local->loc2;
+
+ if (op_ret >= 0) {
+ if (is_src)
+ local->src_cached = dht_subvol_get_cached(this, local->loc.inode);
+ else {
+ if (loc->inode)
+ gf_uuid_unparse(loc->inode->gfid, gfid_local);
+
+ gf_msg_debug(this->name, 0,
+ "dst_cached before lookup: %s, "
+ "(path:%s)(gfid:%s),",
+ local->loc2.path,
+ local->dst_cached ? local->dst_cached->name : NULL,
+ local->dst_cached ? gfid_local : NULL);
+
+ local->dst_cached = dht_subvol_get_cached(this,
+ local->loc2_copy.inode);
+
+ gf_uuid_unparse(stbuf->ia_gfid, gfid_local);
+
+ gf_msg_debug(this->name, GF_LOG_WARNING,
+ "dst_cached after lookup: %s, "
+ "(path:%s)(gfid:%s)",
+ local->loc2.path,
+ local->dst_cached ? local->dst_cached->name : NULL,
+ local->dst_cached ? gfid_local : NULL);
+
+ if ((local->loc2.inode == NULL) ||
+ gf_uuid_compare(stbuf->ia_gfid, local->loc2.inode->gfid)) {
+ if (local->loc2.inode != NULL) {
+ inode_unlink(local->loc2.inode, local->loc2.parent,
+ local->loc2.name);
+ inode_unref(local->loc2.inode);
+ }
+
+ local->loc2.inode = inode_link(local->loc2_copy.inode,
+ local->loc2_copy.parent,
+ local->loc2_copy.name, stbuf);
+ gf_uuid_copy(local->loc2.gfid, stbuf->ia_gfid);
+ }
+ }
+ }
+
+ if (op_ret < 0) {
+ if (is_src) {
+ /* The meaning of is_linkfile is overloaded here. For locking
+ * to work properly both rebalance and rename should acquire
+ * lock on datafile. The reason for sending this lookup is to
+ * find out whether we've acquired a lock on data file.
+ * Between the lookup before rename and this rename, the
+ * file could be migrated by a rebalance process and now this
+ * file this might be a linkto file. We verify that by sending
+ * this lookup. However, if this lookup fails we cannot really
+ * say whether we've acquired lock on a datafile or linkto file.
+ * So, we act conservatively and _assume_
+ * that this is a linkfile and fail the rename operation.
+ */
+ local->is_linkfile = _gf_true;
+ local->op_errno = op_errno;
+ } else {
+ if (local->dst_cached)
+ gf_msg_debug(this->name, op_errno,
+ "file %s (gfid:%s) was present "
+ "(hashed-subvol=%s, "
+ "cached-subvol=%s) before rename,"
+ " but lookup failed",
+ local->loc2.path,
+ uuid_utoa(local->loc2.inode->gfid),
+ local->dst_hashed->name, local->dst_cached->name);
+ if (dht_inode_missing(op_errno))
+ local->dst_cached = NULL;
+ }
+ } else if (is_src && xattr &&
+ check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name)) {
+ local->is_linkfile = _gf_true;
+ /* Found linkto file instead of data file, passdown ENOENT
+ * based on the above comment */
+ local->op_errno = ENOENT;
+ }
+
+ if (!local->is_linkfile && (op_ret >= 0) &&
+ gf_uuid_compare(loc->gfid, stbuf->ia_gfid)) {
+ gf_uuid_unparse(loc->gfid, gfid_local);
+ gf_uuid_unparse(stbuf->ia_gfid, gfid_server);
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_MISMATCH,
+ "path:%s, received a different gfid, local_gfid= %s"
+ " server_gfid: %s",
+ local->loc.path, gfid_local, gfid_server);
+
+ /* Will passdown ENOENT anyway since the file we sent on
+ * rename is replaced with a different file */
+ local->op_errno = ENOENT;
+ /* Since local->is_linkfile is used here to detect failure,
+ * marking this to true */
+ local->is_linkfile = _gf_true;
+ }
+
+ call_cnt = dht_frame_return(frame);
+ if (is_last_call(call_cnt)) {
+ if (local->is_linkfile) {
+ local->op_ret = -1;
+ goto fail;
}
- return 0;
-}
+ dht_rename_create_links(frame);
+ }
+ return 0;
+fail:
+ dht_rename_unlock(frame, this);
+ return 0;
+}
int
-dht_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
-{
- xlator_t *src_cached = NULL;
- xlator_t *src_hashed = NULL;
- xlator_t *dst_cached = NULL;
- xlator_t *dst_hashed = NULL;
- int op_errno = -1;
- int ret = -1;
- dht_local_t *local = NULL;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (oldloc, err);
- VALIDATE_OR_GOTO (newloc, err);
-
- src_hashed = dht_subvol_get_hashed (this, oldloc);
- if (!src_hashed) {
- gf_log (this->name, GF_LOG_INFO,
- "no subvolume in layout for path=%s",
- oldloc->path);
- op_errno = EINVAL;
- goto err;
- }
+dht_rename_file_lock1_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+ loc_t *loc = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "protecting namespace of %s failed"
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->current == &local->lock[0] ? local->loc.path
+ : local->loc2.path,
+ local->loc.path, src_gfid, local->src_hashed->name,
+ local->loc2.path, dst_gfid,
+ local->dst_hashed ? local->dst_hashed->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ if (local->current == &local->lock[0]) {
+ loc = &local->loc2;
+ subvol = local->dst_hashed;
+ local->current = &local->lock[1];
+ } else {
+ loc = &local->loc;
+ subvol = local->src_hashed;
+ local->current = &local->lock[0];
+ }
+
+ ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns,
+ dht_rename_lock_cbk);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ return 0;
+err:
+ /* No harm in calling an extra unlock */
+ dht_rename_unlock(frame, this);
+ return 0;
+}
- src_cached = dht_subvol_get_cached (this, oldloc->inode);
- if (!src_cached) {
- gf_log (this->name, GF_LOG_INFO,
- "no cached subvolume for path=%s", oldloc->path);
- op_errno = EINVAL;
- goto err;
- }
+int32_t
+dht_rename_file_protect_namespace(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ int ret = 0;
+ loc_t *loc = NULL;
+ xlator_t *subvol = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "acquiring inodelk failed "
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->loc.path, src_gfid, local->src_cached->name,
+ local->loc2.path, dst_gfid,
+ local->dst_cached ? local->dst_cached->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ goto err;
+ }
+
+ /* Locks on src and dst needs to ordered which otherwise might cause
+ * deadlocks when rename (src, dst) and rename (dst, src) is done from
+ * two different clients
+ */
+ ret = dht_order_rename_lock(frame, &loc, &subvol);
+ if (ret) {
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dht_protect_namespace(frame, loc, subvol, &local->current->ns,
+ dht_rename_file_lock1_cbk);
+ if (ret < 0) {
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ return 0;
- dst_hashed = dht_subvol_get_hashed (this, newloc);
- if (!dst_hashed) {
- gf_log (this->name, GF_LOG_INFO,
- "no subvolume in layout for path=%s",
- newloc->path);
- op_errno = EINVAL;
- goto err;
- }
+err:
+ /* Its fine to call unlock even when no locks are acquired, as we check
+ * for lock->locked before winding a unlock call.
+ */
+ dht_rename_unlock(frame, this);
- if (newloc->inode)
- dst_cached = dht_subvol_get_cached (this, newloc->inode);
+ return 0;
+}
- local = dht_local_init (frame, oldloc, NULL, GF_FOP_RENAME);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- /* cached_subvol will be set from dht_local_init, reset it to NULL,
- as the logic of handling rename is different */
- local->cached_subvol = NULL;
-
- ret = loc_copy (&local->loc2, newloc);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
+int32_t
+dht_rename_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ char src_gfid[GF_UUID_BUF_SIZE] = {0};
+ char dst_gfid[GF_UUID_BUF_SIZE] = {0};
+ dict_t *xattr_req = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ xlator_t *subvol = NULL;
+ dht_lock_t *lock = NULL;
+
+ local = frame->local;
+ conf = this->private;
+
+ if (op_ret < 0) {
+ uuid_utoa_r(local->loc.inode->gfid, src_gfid);
+
+ if (local->loc2.inode)
+ uuid_utoa_r(local->loc2.inode->gfid, dst_gfid);
+
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_INODE_LK_ERROR,
+ "protecting namespace of %s failed. "
+ "rename (%s:%s:%s %s:%s:%s)",
+ local->current == &local->lock[0] ? local->loc.path
+ : local->loc2.path,
+ local->loc.path, src_gfid, local->src_hashed->name,
+ local->loc2.path, dst_gfid,
+ local->dst_hashed ? local->dst_hashed->name : NULL);
+
+ local->op_ret = -1;
+ local->op_errno = op_errno;
+
+ goto done;
+ }
+
+ xattr_req = dict_new();
+ if (xattr_req == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto done;
+ }
+
+ op_ret = dict_set_uint32(xattr_req, conf->link_xattr_name, 256);
+ if (op_ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -op_ret;
+ goto done;
+ }
+
+ /* dst_cached might've changed. This normally happens for two reasons:
+ * 1. rebalance migrated dst
+ * 2. Another parallel rename was done overwriting dst
+ *
+ * Doing a lookup on local->loc2 when dst exists, but is associated
+ * with a different gfid will result in an ESTALE error. So, do a fresh
+ * lookup with a new inode on dst-path and handle change of dst-cached
+ * in the cbk. Also, to identify dst-cached changes we do a lookup on
+ * "this" rather than the subvol.
+ */
+ loc_copy(&local->loc2_copy, &local->loc2);
+ inode_unref(local->loc2_copy.inode);
+ local->loc2_copy.inode = inode_new(local->loc.inode->table);
+
+ /* Why not use local->lock.locks[?].loc for lookup post lock phase
+ * ---------------------------------------------------------------
+ * "layout.parent_layout.locks[?].loc" does not have the name and pargfid
+ * populated.
+ * Reason: If we had populated the name and pargfid, server might
+ * resolve to a successful lookup even if there is a file with same name
+ * with a different gfid(unlink & create) as server does name based
+ * resolution on first priority. And this can result in operating on a
+ * different inode entirely.
+ *
+ * Now consider a scenario where source file was renamed by some other
+ * client to a new name just before this lock was granted. So if a
+ * lookup would be done on local->lock[0].layout.parent_layout.locks[?].loc,
+ * server will send success even if the entry was renamed (since server will
+ * do a gfid based resolution). So once a lock is granted, make sure the
+ * file exists with the name that the client requested with.
+ * */
+
+ local->call_cnt = 2;
+ for (i = 0; i < 2; i++) {
+ if (i == 0) {
+ lock = local->rename_inodelk_backward_compatible[0];
+ if (gf_uuid_compare(local->loc.gfid, lock->loc.gfid) == 0)
+ subvol = lock->xl;
+ else {
+ lock = local->rename_inodelk_backward_compatible[1];
+ subvol = lock->xl;
+ }
+ } else {
+ subvol = this;
}
- local->src_hashed = src_hashed;
- local->src_cached = src_cached;
- local->dst_hashed = dst_hashed;
- local->dst_cached = dst_cached;
+ STACK_WIND_COOKIE(frame, dht_rename_lookup_cbk, (void *)(long)i, subvol,
+ subvol->fops->lookup,
+ (i == 0) ? &local->loc : &local->loc2_copy,
+ xattr_req);
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "renaming %s (hash=%s/cache=%s) => %s (hash=%s/cache=%s)",
- oldloc->path, src_hashed->name, src_cached->name,
- newloc->path, dst_hashed->name,
- dst_cached ? dst_cached->name : "<nul>");
+ dict_unref(xattr_req);
+ return 0;
- if (IA_ISDIR (oldloc->inode->ia_type)) {
- dht_rename_dir (frame, this);
- } else {
- local->op_ret = 0;
- dht_rename_create_links (frame);
+done:
+ /* Its fine to call unlock even when no locks are acquired, as we check
+ * for lock->locked before winding a unlock call.
+ */
+ dht_rename_unlock(frame, this);
+
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ return 0;
+}
+
+int
+dht_rename_lock(call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1;
+ dht_lock_t **lk_array = NULL;
+
+ local = frame->local;
+
+ if (local->dst_cached)
+ count++;
+
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_pointer);
+ if (lk_array == NULL)
+ goto err;
+
+ lk_array[0] = dht_lock_new(frame->this, local->src_cached, &local->loc,
+ F_WRLCK, DHT_FILE_MIGRATE_DOMAIN, NULL,
+ FAIL_ON_ANY_ERROR);
+ if (lk_array[0] == NULL)
+ goto err;
+
+ if (local->dst_cached) {
+ /* dst might be removed by the time inodelk reaches bricks,
+ * which can result in ESTALE errors. POSIX imposes no
+ * restriction for dst to be present for renames to be
+ * successful. So, we'll ignore ESTALE errors. As far as
+ * synchronization on dst goes, we'll achieve the same by
+ * holding entrylk on parent directory of dst in the namespace
+ * of basename(dst). Also, there might not be quorum in cluster
+ * xlators like EC/disperse on errno, in which case they return
+ * EIO. For eg., in a disperse (4 + 2), 3 might return success
+ * and three might return ESTALE. Disperse, having no Quorum
+ * unwinds inodelk with EIO. So, ignore EIO too.
+ */
+ lk_array[1] = dht_lock_new(frame->this, local->dst_cached, &local->loc2,
+ F_WRLCK, DHT_FILE_MIGRATE_DOMAIN, NULL,
+ IGNORE_ENOENT_ESTALE_EIO);
+ if (lk_array[1] == NULL)
+ goto err;
+ }
+
+ local->rename_inodelk_backward_compatible = lk_array;
+ local->rename_inodelk_bc_count = count;
+
+ /* retaining inodelks for the sake of backward compatibility. Please
+ * make sure to remove this inodelk once all of 3.10, 3.12 and 3.13
+ * reach EOL. Better way of getting synchronization would be to acquire
+ * entrylks on src and dst parent directories in the namespace of
+ * basenames of src and dst
+ */
+ ret = dht_blocking_inodelk(frame, lk_array, count,
+ dht_rename_file_protect_namespace);
+ if (ret < 0) {
+ local->rename_inodelk_backward_compatible = NULL;
+ local->rename_inodelk_bc_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ int tmp_count = 0, i = 0;
+
+ for (i = 0; (i < count) && (lk_array[i]); i++, tmp_count++)
+ ;
+
+ dht_lock_array_free(lk_array, tmp_count);
+ GF_FREE(lk_array);
+ }
+
+ return -1;
+}
+
+int
+dht_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ xlator_t *src_cached = NULL;
+ xlator_t *src_hashed = NULL;
+ xlator_t *dst_cached = NULL;
+ xlator_t *dst_hashed = NULL;
+ int op_errno = -1;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ char newgfid[GF_UUID_BUF_SIZE] = {0};
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(oldloc, err);
+ VALIDATE_OR_GOTO(newloc, err);
+
+ gf_uuid_unparse(oldloc->inode->gfid, gfid);
+
+ src_hashed = dht_subvol_get_hashed(this, oldloc);
+ if (!src_hashed) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED,
+ "No hashed subvolume in layout for path=%s,"
+ "(gfid = %s)",
+ oldloc->path, gfid);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ src_cached = dht_subvol_get_cached(this, oldloc->inode);
+ if (!src_cached) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED,
+ "No cached subvolume for path = %s,"
+ "(gfid = %s)",
+ oldloc->path, gfid);
+
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ dst_hashed = dht_subvol_get_hashed(this, newloc);
+ if (!dst_hashed) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_FAILED,
+ "No hashed subvolume in layout for path=%s", newloc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (newloc->inode)
+ dst_cached = dht_subvol_get_cached(this, newloc->inode);
+
+ local = dht_local_init(frame, oldloc, NULL, GF_FOP_RENAME);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ /* cached_subvol will be set from dht_local_init, reset it to NULL,
+ as the logic of handling rename is different */
+ local->cached_subvol = NULL;
+
+ ret = loc_copy(&local->loc2, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->src_hashed = src_hashed;
+ local->src_cached = src_cached;
+ local->dst_hashed = dst_hashed;
+ local->dst_cached = dst_cached;
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ if (newloc->inode)
+ gf_uuid_unparse(newloc->inode->gfid, newgfid);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_RENAME_INFO,
+ "renaming %s (%s) (hash=%s/cache=%s) => %s (%s) "
+ "(hash=%s/cache=%s) ",
+ oldloc->path, gfid, src_hashed->name, src_cached->name, newloc->path,
+ newloc->inode ? newgfid : NULL, dst_hashed->name,
+ dst_cached ? dst_cached->name : "<nul>");
+
+ if (IA_ISDIR(oldloc->inode->ia_type)) {
+ dht_rename_dir(frame, this);
+ } else {
+ local->op_ret = 0;
+ ret = dht_rename_lock(frame);
+ if (ret < 0) {
+ op_errno = ENOMEM;
+ goto err;
}
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
- NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
- return 0;
+ return 0;
+}
+
+int
+dht_pt_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ gf_boolean_t free_xdata = _gf_false;
+
+ /* Just a pass through */
+ if (!IA_ISDIR(oldloc->inode->ia_type)) {
+ if (!xdata) {
+ free_xdata = _gf_true;
+ }
+ DHT_CHANGELOG_TRACK_AS_RENAME(xdata, oldloc, newloc);
+ }
+ default_rename(frame, this, oldloc, newloc, xdata);
+ if (free_xdata && xdata) {
+ dict_unref(xdata);
+ xdata = NULL;
+ }
+ return 0;
}
diff --git a/xlators/cluster/dht/src/dht-selfheal.c b/xlators/cluster/dht/src/dht-selfheal.c
index 7ceb80157d5..3e24065227c 100644
--- a/xlators/cluster/dht/src/dht-selfheal.c
+++ b/xlators/cluster/dht/src/dht-selfheal.c
@@ -8,922 +8,2593 @@
cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "dht-common.h"
-
-#define DHT_SET_LAYOUT_RANGE(layout,i,srt,chunk,cnt,path) do { \
- layout->list[i].start = srt; \
- layout->list[i].stop = srt + chunk - 1; \
- \
- gf_log (this->name, GF_LOG_TRACE, \
- "gave fix: %u - %u on %s for %s", \
- layout->list[i].start, layout->list[i].stop, \
- layout->list[i].xlator->name, path); \
- } while (0)
-
-static inline uint32_t
-dht_find_overlap (int idx, int cnk_idx, uint32_t start, uint32_t stop,
- uint32_t chunk_size)
-{
- uint32_t overlap = 0;
- uint32_t chunk_begin = 0;
-
- chunk_begin = cnk_idx * chunk_size;
-
- /* There is no chance of overlap */
- if ((chunk_begin > stop) ||
- ((chunk_begin + chunk_size) < start))
- goto out;
+#include "dht-lock.h"
+
+#define DHT_SET_LAYOUT_RANGE(layout, i, srt, chunk, path) \
+ do { \
+ layout->list[i].start = srt; \
+ layout->list[i].stop = srt + chunk - 1; \
+ layout->list[i].commit_hash = layout->commit_hash; \
+ \
+ gf_msg_trace(this->name, 0, \
+ "gave fix: 0x%x - 0x%x, with commit-hash 0x%x" \
+ " on %s for %s", \
+ layout->list[i].start, layout->list[i].stop, \
+ layout->list[i].commit_hash, \
+ layout->list[i].xlator->name, path); \
+ } while (0)
+
+#define DHT_RESET_LAYOUT_RANGE(layout) \
+ do { \
+ int cnt = 0; \
+ for (cnt = 0; cnt < layout->cnt; cnt++) { \
+ layout->list[cnt].start = 0; \
+ layout->list[cnt].stop = 0; \
+ } \
+ } while (0)
+
+static int
+dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
+ gf_boolean_t newdir, dht_selfheal_layout_t healer,
+ dht_need_heal_t should_heal);
+
+static uint32_t
+dht_overlap_calc(dht_layout_t *old, int o, dht_layout_t *new, int n)
+{
+ if (o >= old->cnt || n >= new->cnt)
+ return 0;
- if ((chunk_begin <= start) &&
- ((chunk_begin + chunk_size) <= stop)) {
- overlap = ((chunk_begin + chunk_size) - start);
- goto out;
- }
+ if (old->list[o].err > 0 || new->list[n].err > 0)
+ return 0;
- if ((chunk_begin <= start) &&
- ((chunk_begin + chunk_size) >= stop)) {
- overlap = (stop - start);
- goto out;
- }
+ if (old->list[o].start == old->list[o].stop) {
+ return 0;
+ }
- if ((chunk_begin < stop) &&
- ((chunk_begin + chunk_size) >= stop)) {
- overlap = (stop - chunk_begin);
- goto out;
- }
+ if (new->list[n].start == new->list[n].stop) {
+ return 0;
+ }
-out:
- return overlap;
+ if ((old->list[o].start > new->list[n].stop) ||
+ (old->list[o].stop < new->list[n].start))
+ return 0;
+
+ return min(old->list[o].stop, new->list[n].stop) -
+ max(old->list[o].start, new->list[n].start) + 1;
}
int
-dht_selfheal_dir_finish (call_frame_t *frame, xlator_t *this, int ret)
+dht_selfheal_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- dht_local_t *local = NULL;
+ DHT_STACK_DESTROY(frame);
+ return 0;
+}
- local = frame->local;
- local->selfheal.dir_cbk (frame, NULL, frame->this, ret,
- local->op_errno, NULL);
+int
+dht_selfheal_dir_finish(call_frame_t *frame, xlator_t *this, int ret,
+ int invoke_cbk)
+{
+ dht_local_t *local = NULL, *lock_local = NULL;
+ call_frame_t *lock_frame = NULL;
+ int lock_count = 0;
- return 0;
-}
+ local = frame->local;
+ /* Unlock entrylk */
+ dht_unlock_entrylk_wrapper(frame, &local->lock[0].ns.directory_ns);
-int
-dht_selfheal_dir_xattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xdata)
-{
- dht_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
- int i = 0;
- dht_layout_t *layout = NULL;
- int err = 0;
- int this_call_cnt = 0;
-
- local = frame->local;
- layout = local->selfheal.layout;
- prev = cookie;
- subvol = prev->this;
-
- if (op_ret == 0)
- err = 0;
- else
- err = op_errno;
+ /* Unlock inodelk */
+ lock_count = dht_lock_count(local->lock[0].ns.parent_layout.locks,
+ local->lock[0].ns.parent_layout.lk_count);
+ if (lock_count == 0)
+ goto done;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == subvol) {
- layout->list[i].err = err;
- break;
- }
- }
+ lock_frame = copy_frame(frame);
+ if (lock_frame == NULL) {
+ goto done;
+ }
- this_call_cnt = dht_frame_return (frame);
+ lock_local = dht_local_init(lock_frame, &local->loc, NULL,
+ lock_frame->root->op);
+ if (lock_local == NULL) {
+ goto done;
+ }
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_dir_finish (frame, this, 0);
- }
+ lock_local->lock[0].ns.parent_layout.locks = local->lock[0]
+ .ns.parent_layout.locks;
+ lock_local->lock[0]
+ .ns.parent_layout.lk_count = local->lock[0].ns.parent_layout.lk_count;
- return 0;
-}
+ local->lock[0].ns.parent_layout.locks = NULL;
+ local->lock[0].ns.parent_layout.lk_count = 0;
+ dht_unlock_inodelk(lock_frame, lock_local->lock[0].ns.parent_layout.locks,
+ lock_local->lock[0].ns.parent_layout.lk_count,
+ dht_selfheal_unlock_cbk);
+ lock_frame = NULL;
+
+done:
+ if (invoke_cbk)
+ local->selfheal.dir_cbk(frame, NULL, frame->this, ret, local->op_errno,
+ NULL);
+ if (lock_frame != NULL) {
+ DHT_STACK_DESTROY(lock_frame);
+ }
+
+ return 0;
+}
int
-dht_selfheal_dir_xattr_persubvol (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout, int i)
+dht_refresh_layout_done(call_frame_t *frame)
{
- xlator_t *subvol = NULL;
- dict_t *xattr = NULL;
- int ret = 0;
- xlator_t *this = NULL;
- int32_t *disk_layout = NULL;
- dht_local_t *local = NULL;
+ int ret = -1;
+ dht_layout_t *refreshed = NULL, *heal = NULL;
+ dht_local_t *local = NULL;
+ dht_need_heal_t should_heal = NULL;
+ dht_selfheal_layout_t healer = NULL;
+ local = frame->local;
- local = frame->local;
- subvol = layout->list[i].xlator;
- this = frame->this;
+ refreshed = local->selfheal.refreshed_layout;
+ heal = local->selfheal.layout;
- GF_VALIDATE_OR_GOTO ("", this, err);
- GF_VALIDATE_OR_GOTO (this->name, layout, err);
- GF_VALIDATE_OR_GOTO (this->name, local, err);
- GF_VALIDATE_OR_GOTO (this->name, subvol, err);
+ healer = local->selfheal.healer;
+ should_heal = local->selfheal.should_heal;
- xattr = get_new_dict ();
- if (!xattr) {
- goto err;
+ ret = dht_layout_sort(refreshed);
+ if (ret == -1) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_LAYOUT_SORT_FAILED, NULL);
+ goto err;
+ }
+
+ if (should_heal(frame, &heal, &refreshed)) {
+ healer(frame, &local->loc, heal);
+ } else {
+ local->selfheal.layout = NULL;
+ local->selfheal.refreshed_layout = NULL;
+ local->selfheal.layout = refreshed;
+
+ dht_layout_unref(frame->this, heal);
+
+ dht_selfheal_dir_finish(frame, frame->this, 0, 1);
+ }
+
+ return 0;
+
+err:
+ dht_selfheal_dir_finish(frame, frame->this, -1, 1);
+ return 0;
+}
+
+int
+dht_refresh_layout_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+ xlator_t *prev = NULL;
+ dht_layout_t *layout = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO("dht", this, err);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, err);
+ GF_VALIDATE_OR_GOTO("dht", this->private, err);
+
+ local = frame->local;
+ prev = cookie;
+
+ layout = local->selfheal.refreshed_layout;
+
+ LOCK(&frame->lock);
+ {
+ op_ret = dht_layout_merge(this, layout, prev, op_ret, op_errno, xattr);
+
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+
+ if (op_ret == -1) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ local->op_errno = op_errno;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_FILE_LOOKUP_FAILED, "path=%s", local->loc.path,
+ "name=%s", prev->name, "gfid=%s", gfid, NULL);
+
+ goto unlock;
}
- ret = dht_disk_layout_extract (this, layout, i, &disk_layout);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: (subvol %s) failed to extract disk layout",
- loc->path, subvol->name);
- goto err;
+ local->op_ret = 0;
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ this_call_cnt = dht_frame_return(frame);
+
+ if (is_last_call(this_call_cnt)) {
+ if (local->op_ret == 0) {
+ local->refresh_layout_done(frame);
+ } else {
+ goto err;
}
+ }
- ret = dict_set_bin (xattr, "trusted.glusterfs.dht",
- disk_layout, 4 * 4);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: (subvol %s) failed to set xattr dictionary",
- loc->path, subvol->name);
- goto err;
+ return 0;
+
+err:
+ if (local) {
+ local->refresh_layout_unlock(frame, this, -1, 1);
+ }
+ return 0;
+}
+
+int
+dht_refresh_layout(call_frame_t *frame)
+{
+ int call_cnt = 0;
+ int i = 0, ret = -1;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("dht", frame, out);
+ GF_VALIDATE_OR_GOTO("dht", frame->local, out);
+
+ this = frame->this;
+ conf = this->private;
+ local = frame->local;
+
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+ local->op_ret = -1;
+
+ if (local->selfheal.refreshed_layout) {
+ dht_layout_unref(this, local->selfheal.refreshed_layout);
+ local->selfheal.refreshed_layout = NULL;
+ }
+
+ local->selfheal.refreshed_layout = dht_layout_new(this,
+ conf->subvolume_cnt);
+ if (!local->selfheal.refreshed_layout) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+
+ if (local->xattr != NULL) {
+ dict_del(local->xattr, conf->xattr_name);
+ }
+
+ if (local->xattr_req == NULL) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ local->xattr_req = dict_new();
+ if (local->xattr_req == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
}
- disk_layout = NULL;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "setting hash range %u - %u (type %d) on subvolume %s for %s",
- layout->list[i].start, layout->list[i].stop,
- layout->type, subvol->name, loc->path);
+ if (dict_get(local->xattr_req, conf->xattr_name) == 0) {
+ ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", local->loc.path, "key=%s", conf->xattr_name,
+ NULL);
+ }
- dict_ref (xattr);
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_refresh_layout_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
+ }
- if (!uuid_is_null (local->gfid))
- uuid_copy (loc->gfid, local->gfid);
+ return 0;
- STACK_WIND (frame, dht_selfheal_dir_xattr_cbk,
- subvol, subvol->fops->setxattr,
- loc, xattr, 0, NULL);
+out:
+ if (local) {
+ local->refresh_layout_unlock(frame, this, -1, 1);
+ }
+ return 0;
+}
- dict_unref (xattr);
+int32_t
+dht_selfheal_layout_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
- return 0;
+ local = frame->local;
-err:
- if (xattr)
- dict_destroy (xattr);
+ if (!local) {
+ goto err;
+ }
- if (disk_layout)
- GF_FREE (disk_layout);
+ if (op_ret < 0) {
+ local->op_errno = op_errno;
+ goto err;
+ }
- dht_selfheal_dir_xattr_cbk (frame, subvol, frame->this,
- -1, ENOMEM, NULL);
- return 0;
+ local->refresh_layout_unlock = dht_selfheal_dir_finish;
+ local->refresh_layout_done = dht_refresh_layout_done;
+
+ dht_refresh_layout(frame);
+ return 0;
+
+err:
+ dht_selfheal_dir_finish(frame, this, -1, 1);
+ return 0;
}
-int
-dht_fix_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+gf_boolean_t
+dht_should_heal_layout(call_frame_t *frame, dht_layout_t **heal,
+ dht_layout_t **ondisk)
{
- dht_local_t *local = NULL;
- int i = 0;
- int count = 0;
- xlator_t *this = NULL;
+ gf_boolean_t fixit = _gf_true;
+ dht_local_t *local = NULL;
+ int heal_missing_dirs = 0;
+
+ local = frame->local;
+
+ if ((heal == NULL) || (*heal == NULL) || (ondisk == NULL) ||
+ (*ondisk == NULL))
+ goto out;
+
+ dht_layout_anomalies(
+ frame->this, &local->loc, *ondisk, &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt, &local->selfheal.missing_cnt,
+ &local->selfheal.down, &local->selfheal.misc, NULL);
+
+ /* Directories might've been created as part of this self-heal. We've to
+ * sync non-layout xattrs and set range 0-0 on new directories
+ */
+ heal_missing_dirs = local->selfheal.force_mkdir
+ ? local->selfheal.force_mkdir
+ : dht_layout_missing_dirs(*heal);
+
+ if ((local->selfheal.hole_cnt == 0) &&
+ (local->selfheal.overlaps_cnt == 0) && heal_missing_dirs) {
+ dht_layout_t *tmp = NULL;
+
+ /* Just added a brick and need to set 0-0 range on this brick.
+ * But ondisk layout is well-formed. So, swap layouts "heal" and
+ * "ondisk". Now "ondisk" layout will be used for healing
+ * xattrs. If there are any non-participating subvols in
+ * "ondisk" layout, dht_selfheal_dir_xattr_persubvol will set
+ * 0-0 and non-layout xattrs. This way we won't end up in
+ * "corrupting" already set and well-formed "ondisk" layout.
+ */
+ tmp = *heal;
+ *heal = *ondisk;
+ *ondisk = tmp;
+
+ /* Current selfheal code, heals non-layout xattrs only after
+ * an add-brick. In fact non-layout xattrs are considered as
+ * secondary citizens which are healed only if layout xattrs
+ * need to be healed. This is wrong, since for eg., quota can be
+ * set when layout is well-formed, but a node is down. Also,
+ * just for healing non-layout xattrs, we don't need locking.
+ * This issue is _NOT FIXED_ by this patch.
+ */
+ }
+
+ fixit = (local->selfheal.hole_cnt || local->selfheal.overlaps_cnt ||
+ heal_missing_dirs);
- local = frame->local;
- this = frame->this;
+out:
+ return fixit;
+}
- gf_log (this->name, GF_LOG_DEBUG,
- "writing the new range for all subvolumes");
+int
+dht_layout_span(dht_layout_t *layout)
+{
+ int i = 0, count = 0;
- local->call_cnt = count = layout->cnt;
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err)
+ continue;
- for (i = 0; i < layout->cnt; i++) {
- dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i);
+ if (layout->list[i].start != layout->list[i].stop)
+ count++;
+ }
- if (--count == 0)
- break;
- }
- return 0;
+ return count;
}
int
-dht_selfheal_dir_xattr (call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+dht_decommissioned_bricks_in_layout(xlator_t *this, dht_layout_t *layout)
{
- dht_local_t *local = NULL;
- int missing_xattr = 0;
- int i = 0;
- xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ int count = 0, i = 0, j = 0;
- local = frame->local;
- this = frame->this;
+ if ((this == NULL) || (layout == NULL))
+ goto out;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err != -1 || !layout->list[i].stop) {
- /* err != -1 would mean xattr present on the directory
- * or the directory is non existent.
- * !layout->list[i].stop would mean layout absent
- */
+ conf = this->private;
- continue;
- }
- missing_xattr++;
+ for (i = 0; i < layout->cnt; i++) {
+ for (j = 0; j < conf->subvolume_cnt; j++) {
+ if (conf->decommissioned_bricks[j] &&
+ conf->decommissioned_bricks[j] == layout->list[i].xlator) {
+ count++;
+ }
}
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "%d subvolumes missing xattr for %s",
- missing_xattr, loc->path);
+out:
+ return count;
+}
- if (missing_xattr == 0) {
- dht_selfheal_dir_finish (frame, this, 0);
- return 0;
+dht_distribution_type_t
+dht_distribution_type(xlator_t *this, dht_layout_t *layout)
+{
+ dht_distribution_type_t type = GF_DHT_EQUAL_DISTRIBUTION;
+ int i = 0;
+ uint32_t start_range = 0, range = 0, diff = 0;
+
+ if ((this == NULL) || (layout == NULL) || (layout->cnt < 1)) {
+ goto out;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (start_range == 0) {
+ start_range = layout->list[i].stop - layout->list[i].start;
+ continue;
}
- local->call_cnt = missing_xattr;
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err != -1 || !layout->list[i].stop)
- continue;
-
- dht_selfheal_dir_xattr_persubvol (frame, loc, layout, i);
+ range = layout->list[i].stop - layout->list[i].start;
+ diff = (range >= start_range) ? range - start_range
+ : start_range - range;
- if (--missing_xattr == 0)
- break;
+ if ((range != 0) && (diff > layout->cnt)) {
+ type = GF_DHT_WEIGHTED_DISTRIBUTION;
+ break;
}
- return 0;
+ }
+
+out:
+ return type;
}
-int
-dht_selfheal_dir_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+gf_boolean_t
+dht_should_fix_layout(call_frame_t *frame, dht_layout_t **inmem,
+ dht_layout_t **ondisk)
{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- int this_call_cnt = 0;
+ gf_boolean_t fixit = _gf_true;
- local = frame->local;
- layout = local->selfheal.layout;
+ dht_local_t *local = NULL;
+ int layout_span = 0;
+ int decommissioned_bricks = 0;
+ dht_conf_t *conf = NULL;
+ dht_distribution_type_t inmem_dist_type = 0;
+ dht_distribution_type_t ondisk_dist_type = 0;
- this_call_cnt = dht_frame_return (frame);
+ conf = frame->this->private;
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_dir_xattr (frame, &local->loc, layout);
- }
+ local = frame->local;
- return 0;
-}
+ if ((inmem == NULL) || (*inmem == NULL) || (ondisk == NULL) ||
+ (*ondisk == NULL))
+ goto out;
+ dht_layout_anomalies(frame->this, &local->loc, *ondisk,
+ &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt, NULL,
+ &local->selfheal.down, &local->selfheal.misc, NULL);
-int
-dht_selfheal_dir_setattr (call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
- int32_t valid, dht_layout_t *layout)
+ if (local->selfheal.down || local->selfheal.misc) {
+ fixit = _gf_false;
+ goto out;
+ }
+
+ if (local->selfheal.hole_cnt || local->selfheal.overlaps_cnt)
+ goto out;
+
+ /* If commit hashes are being updated, let it through */
+ if ((*inmem)->commit_hash != (*ondisk)->commit_hash)
+ goto out;
+
+ layout_span = dht_layout_span(*ondisk);
+
+ decommissioned_bricks = dht_decommissioned_bricks_in_layout(frame->this,
+ *ondisk);
+ inmem_dist_type = dht_distribution_type(frame->this, *inmem);
+ ondisk_dist_type = dht_distribution_type(frame->this, *ondisk);
+
+ if ((decommissioned_bricks == 0) &&
+ (layout_span ==
+ (conf->subvolume_cnt - conf->decommission_subvols_cnt)) &&
+ (inmem_dist_type == ondisk_dist_type))
+ fixit = _gf_false;
+
+out:
+
+ return fixit;
+}
+
+static int
+dht_selfheal_layout_lock(call_frame_t *frame, dht_layout_t *layout,
+ gf_boolean_t newdir, dht_selfheal_layout_t healer,
+ dht_need_heal_t should_heal)
{
- int missing_attr = 0;
- int i = 0;
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1, i = 0;
+ dht_lock_t **lk_array = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *tmp = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
- local = frame->local;
- this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO(frame->this->name, frame->local, err);
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == -1)
- missing_attr++;
+ local = frame->local;
+
+ conf = frame->this->private;
+
+ local->selfheal.healer = healer;
+ local->selfheal.should_heal = should_heal;
+
+ tmp = local->selfheal.layout;
+ local->selfheal.layout = dht_layout_ref(frame->this, layout);
+ dht_layout_unref(frame->this, tmp);
+
+ if (!newdir) {
+ count = conf->subvolume_cnt;
+
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char);
+ if (lk_array == NULL) {
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_smsg("dht", GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
+ goto err;
}
- if (missing_attr == 0) {
- dht_selfheal_dir_xattr (frame, loc, layout);
- return 0;
+ for (i = 0; i < count; i++) {
+ lk_array[i] = dht_lock_new(
+ frame->this, conf->subvolumes[i], &local->loc, F_WRLCK,
+ DHT_LAYOUT_HEAL_DOMAIN, NULL, FAIL_ON_ANY_ERROR);
+ if (lk_array[i] == NULL) {
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM,
+ DHT_MSG_MEM_ALLOC_FAILED, "lk_array-gfid=%s", gfid,
+ "path=%s", local->loc.path, NULL);
+ goto err;
+ }
+ }
+ } else {
+ count = 1;
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char);
+ if (lk_array == NULL) {
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
+ goto err;
+ }
+
+ lk_array[0] = dht_lock_new(frame->this, local->hashed_subvol,
+ &local->loc, F_WRLCK, DHT_LAYOUT_HEAL_DOMAIN,
+ NULL, FAIL_ON_ANY_ERROR);
+ if (lk_array[0] == NULL) {
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_smsg(THIS->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "lk_array-gfid=%s", gfid, "path=%s", local->loc.path, NULL);
+ goto err;
}
+ }
- if (!uuid_is_null (local->gfid))
- uuid_copy (loc->gfid, local->gfid);
+ local->lock[0].layout.my_layout.locks = lk_array;
+ local->lock[0].layout.my_layout.lk_count = count;
- local->call_cnt = missing_attr;
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "setattr for %s on subvol %s",
- loc->path, layout->list[i].xlator->name);
-
- STACK_WIND (frame, dht_selfheal_dir_setattr_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->setattr,
- loc, stbuf, valid, NULL);
- }
+ ret = dht_blocking_inodelk(frame, lk_array, count,
+ dht_selfheal_layout_lock_cbk);
+ if (ret < 0) {
+ local->lock[0].layout.my_layout.locks = NULL;
+ local->lock[0].layout.my_layout.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free(lk_array, count);
+ GF_FREE(lk_array);
+ }
+
+ return -1;
+}
+
+static int
+dht_selfheal_dir_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ struct iatt *stbuf = NULL;
+ int i = 0;
+ int ret = 0;
+ dht_layout_t *layout = NULL;
+ int err = 0;
+ int this_call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+ subvol = cookie;
+
+ if (op_ret == 0) {
+ err = 0;
+ } else {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "name=%s", subvol->name,
+ "path=%s", local->loc.path, "gfid=%s", gfid, NULL);
+ err = op_errno;
+ }
+
+ ret = dict_get_bin(xdata, DHT_IATT_IN_XDATA_KEY, (void **)&stbuf);
+ if (ret < 0) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_msg_debug(this->name, 0,
+ "key = %s not present in dict"
+ ", path:%s gfid:%s",
+ DHT_IATT_IN_XDATA_KEY, local->loc.path, gfid);
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].xlator == subvol) {
+ layout->list[i].err = err;
+ break;
}
+ }
- return 0;
+ LOCK(&frame->lock);
+ {
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ }
+ UNLOCK(&frame->lock);
+
+ this_call_cnt = dht_frame_return(frame);
+
+ if (is_last_call(this_call_cnt)) {
+ dht_selfheal_dir_finish(frame, this, 0, 1);
+ }
+
+ return 0;
}
+/* Code is required to set user xattr to local->xattr
+ */
int
-dht_selfheal_dir_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- dht_local_t *local = NULL;
- dht_layout_t *layout = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
- int i = 0;
- int this_call_cnt = 0;
-
-
- local = frame->local;
- layout = local->selfheal.layout;
- prev = cookie;
- subvol = prev->this;
-
- if ((op_ret == 0) || ((op_ret == -1) && (op_errno == EEXIST))) {
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].xlator == subvol) {
- layout->list[i].err = -1;
- break;
- }
- }
- }
+dht_set_user_xattr(dict_t *dict, char *k, data_t *v, void *data)
+{
+ dict_t *set_xattr = data;
+ int ret = -1;
- if (op_ret) {
- gf_log (this->name, ((op_errno == EEXIST) ? GF_LOG_DEBUG :
- GF_LOG_WARNING),
- "selfhealing directory %s failed: %s",
- local->loc.path, strerror (op_errno));
- goto out;
+ ret = dict_set(set_xattr, k, v);
+ return ret;
+}
+
+static int
+dht_selfheal_dir_xattr_persubvol(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout, int i,
+ xlator_t *req_subvol)
+{
+ xlator_t *subvol = NULL;
+ dict_t *xattr = NULL;
+ dict_t *xdata = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+ int32_t *disk_layout = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ data_t *data = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ if (req_subvol)
+ subvol = req_subvol;
+ else
+ subvol = layout->list[i].xlator;
+ this = frame->this;
+
+ GF_VALIDATE_OR_GOTO("", this, err);
+ GF_VALIDATE_OR_GOTO(this->name, layout, err);
+ GF_VALIDATE_OR_GOTO(this->name, local, err);
+ GF_VALIDATE_OR_GOTO(this->name, subvol, err);
+ VALIDATE_OR_GOTO(this->private, err);
+
+ conf = this->private;
+
+ xattr = dict_new();
+ if (!xattr) {
+ goto err;
+ }
+
+ xdata = dict_new();
+ if (!xdata)
+ goto err;
+
+ ret = dict_set_str(xdata, GLUSTERFS_INTERNAL_FOP_KEY, "yes");
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", GLUSTERFS_INTERNAL_FOP_KEY,
+ "gfid=%s", gfid, NULL);
+ goto err;
+ }
+
+ ret = dict_set_int8(xdata, DHT_IATT_IN_XDATA_KEY, 1);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", DHT_IATT_IN_XDATA_KEY,
+ "gfid=%s", gfid, NULL);
+ goto err;
+ }
+
+ gf_uuid_unparse(loc->inode->gfid, gfid);
+
+ ret = dht_disk_layout_extract(this, layout, i, &disk_layout);
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED,
+ "extract-disk-layout-failed, path=%s", loc->path, "subvol=%s",
+ subvol->name, "gfid=%s", gfid, NULL);
+ goto err;
+ }
+
+ ret = dict_set_bin(xattr, conf->xattr_name, disk_layout, 4 * 4);
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "path=%s", loc->path,
+ "subvol=%s", subvol->name,
+ "set-xattr-dictionary-failed"
+ "gfid=%s",
+ gfid, NULL);
+ goto err;
+ }
+ disk_layout = NULL;
+
+ gf_msg_trace(this->name, 0,
+ "setting hash range 0x%x - 0x%x (type %d) on subvolume %s"
+ " for %s",
+ layout->list[i].start, layout->list[i].stop, layout->type,
+ subvol->name, loc->path);
+
+ if (local->xattr) {
+ data = dict_get(local->xattr, QUOTA_LIMIT_KEY);
+ if (data) {
+ ret = dict_add(xattr, QUOTA_LIMIT_KEY, data);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", QUOTA_LIMIT_KEY, NULL);
+ }
}
+ data = dict_get(local->xattr, QUOTA_LIMIT_OBJECTS_KEY);
+ if (data) {
+ ret = dict_add(xattr, QUOTA_LIMIT_OBJECTS_KEY, data);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=%s", QUOTA_LIMIT_OBJECTS_KEY,
+ NULL);
+ }
+ }
+ }
+
+ if (!gf_uuid_is_null(local->gfid))
+ gf_uuid_copy(loc->gfid, local->gfid);
+
+ STACK_WIND_COOKIE(frame, dht_selfheal_dir_xattr_cbk, (void *)subvol, subvol,
+ subvol->fops->setxattr, loc, xattr, 0, xdata);
+
+ dict_unref(xattr);
+ dict_unref(xdata);
+
+ return 0;
+
+err:
+ if (xattr)
+ dict_unref(xattr);
+ if (xdata)
+ dict_unref(xdata);
+
+ GF_FREE(disk_layout);
- dht_iatt_merge (this, &local->stbuf, stbuf, prev->this);
- dht_iatt_merge (this, &local->preparent, preparent, prev->this);
- dht_iatt_merge (this, &local->postparent, postparent, prev->this);
+ dht_selfheal_dir_xattr_cbk(frame, (void *)subvol, frame->this, -1, ENOMEM,
+ NULL);
+ return 0;
+}
+
+static int
+dht_fix_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ int i = 0;
+ int count = 0;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ gf_msg_debug(this->name, 0, "%s: Writing the new range for all subvolumes",
+ loc->path);
+
+ local->call_cnt = count = conf->subvolume_cnt;
+
+ if (gf_log_get_loglevel() >= GF_LOG_DEBUG)
+ dht_log_new_layout_for_dir_selfheal(this, loc, layout);
+
+ for (i = 0; i < layout->cnt; i++) {
+ dht_selfheal_dir_xattr_persubvol(frame, loc, layout, i, NULL);
+
+ if (--count == 0)
+ goto out;
+ }
+ /* if we are here, subvolcount > layout_count. subvols-per-directory
+ * option might be set here. We need to clear out layout from the
+ * non-participating subvolumes, else it will result in overlaps */
+ dummy = dht_layout_new(this, 1);
+ if (!dummy)
+ goto out;
+ dummy->commit_hash = layout->commit_hash;
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false == dht_is_subvol_in_layout(layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol(frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ if (--count == 0)
+ break;
+ }
+ }
+ dht_layout_unref(this, dummy);
out:
- this_call_cnt = dht_frame_return (frame);
+ return 0;
+}
- if (is_last_call (this_call_cnt)) {
- dht_selfheal_dir_setattr (frame, &local->loc, &local->stbuf, 0xffffff, layout);
+static int
+dht_selfheal_dir_xattr(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ int missing_xattr = 0;
+ int i = 0;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *dummy = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop) {
+ /* err != -1 would mean xattr present on the directory
+ * or the directory is non existent.
+ * !layout->list[i].stop would mean layout absent
+ */
+
+ continue;
}
+ missing_xattr++;
+ }
+ /* Also account for subvolumes with no-layout. Used for zero'ing out
+ * the layouts and for setting quota key's if present */
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (_gf_false == dht_is_subvol_in_layout(layout, conf->subvolumes[i])) {
+ missing_xattr++;
+ }
+ }
+ gf_msg_trace(this->name, 0, "%d subvolumes missing xattr for %s",
+ missing_xattr, loc->path);
+ if (missing_xattr == 0) {
+ dht_selfheal_dir_finish(frame, this, 0, 1);
return 0;
+ }
+
+ local->call_cnt = missing_xattr;
+
+ if (gf_log_get_loglevel() >= GF_LOG_DEBUG)
+ dht_log_new_layout_for_dir_selfheal(this, loc, layout);
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err != -1 || !layout->list[i].stop)
+ continue;
+
+ dht_selfheal_dir_xattr_persubvol(frame, loc, layout, i, NULL);
+
+ if (--missing_xattr == 0)
+ break;
+ }
+ dummy = dht_layout_new(this, 1);
+ if (!dummy) {
+ gf_uuid_unparse(loc->gfid, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_DUMMY_ALLOC_FAILED,
+ "path=%s", loc->path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+ for (i = 0; i < conf->subvolume_cnt && missing_xattr; i++) {
+ if (_gf_false == dht_is_subvol_in_layout(layout, conf->subvolumes[i])) {
+ dht_selfheal_dir_xattr_persubvol(frame, loc, dummy, 0,
+ conf->subvolumes[i]);
+ missing_xattr--;
+ }
+ }
+
+ dht_layout_unref(this, dummy);
+out:
+ return 0;
}
-void
-dht_selfheal_dir_mkdir_setacl (dict_t *xattr, dict_t *dict)
+int
+dht_selfheal_dir_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
{
- data_t *acl_default = NULL;
- data_t *acl_access = NULL;
- xlator_t *this = NULL;
- int ret = -1;
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ int this_call_cnt = 0, ret = -1;
- GF_ASSERT (xattr);
- GF_ASSERT (dict);
+ local = frame->local;
+ layout = local->selfheal.layout;
- this = THIS;
- GF_ASSERT (this);
+ this_call_cnt = dht_frame_return(frame);
- acl_default = dict_get (xattr, POSIX_ACL_DEFAULT_XATTR);
+ if (is_last_call(this_call_cnt)) {
+ if (!local->heal_layout) {
+ gf_msg_trace(this->name, 0, "Skip heal layout for %s gfid = %s ",
+ local->loc.path, uuid_utoa(local->gfid));
- if (!acl_default) {
- gf_log (this->name, GF_LOG_DEBUG,
- "ACL_DEFAULT xattr not present");
- goto cont;
+ dht_selfheal_dir_finish(frame, this, 0, 1);
+ return 0;
}
- ret = dict_set (dict, POSIX_ACL_DEFAULT_XATTR, acl_default);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "Could not set ACL_DEFAULT xattr");
-cont:
- acl_access = dict_get (xattr, POSIX_ACL_ACCESS_XATTR);
- if (!acl_access) {
- gf_log (this->name, GF_LOG_DEBUG,
- "ACL_ACCESS xattr not present");
- goto out;
+ ret = dht_selfheal_layout_lock(frame, layout, _gf_false,
+ dht_selfheal_dir_xattr,
+ dht_should_heal_layout);
+
+ if (ret < 0) {
+ dht_selfheal_dir_finish(frame, this, -1, 1);
}
- ret = dict_set (dict, POSIX_ACL_ACCESS_XATTR, acl_access);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "Could not set ACL_ACCESS xattr");
+ }
-out:
- return;
+ return 0;
}
int
-dht_selfheal_dir_mkdir (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout, int force)
+dht_selfheal_dir_setattr(call_frame_t *frame, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dht_layout_t *layout)
{
- int missing_dirs = 0;
- int i = 0;
- int ret = -1;
- dht_local_t *local = NULL;
- xlator_t *this = NULL;
- dict_t *dict = NULL;
-
- local = frame->local;
- this = frame->this;
-
- for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == ENOENT || force)
- missing_dirs++;
+ int missing_attr = 0;
+ int i = 0, ret = -1;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ int cnt = 0;
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ /* We need to heal the attrs if:
+ * 1. Any directories were missing - the newly created dirs will need
+ * to have the correct attrs set
+ * 2. An existing dir does not have the correct permissions -they may
+ * have been changed when a brick was down.
+ */
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == -1)
+ missing_attr++;
+ }
+
+ if ((missing_attr == 0) && (local->need_attrheal == 0)) {
+ if (!local->heal_layout) {
+ gf_msg_trace(this->name, 0, "Skip heal layout for %s gfid = %s ",
+ loc->path, uuid_utoa(loc->gfid));
+ dht_selfheal_dir_finish(frame, this, 0, 1);
+ return 0;
}
+ ret = dht_selfheal_layout_lock(frame, layout, _gf_false,
+ dht_selfheal_dir_xattr,
+ dht_should_heal_layout);
- if (missing_dirs == 0) {
- dht_selfheal_dir_setattr (frame, loc, &local->stbuf, 0xffffffff, layout);
- return 0;
+ if (ret < 0) {
+ dht_selfheal_dir_finish(frame, this, -1, 1);
}
- local->call_cnt = missing_dirs;
- if (!uuid_is_null (local->gfid)) {
- dict = dict_new ();
- if (!dict)
- return -1;
+ return 0;
+ }
- ret = dict_set_static_bin (dict, "gfid-req", local->gfid, 16);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set gfid in dict", loc->path);
- } else if (local->params) {
- /* Send the dictionary from higher layers directly */
- dict = dict_ref (local->params);
- }
- /* Set acls */
- if (local->xattr && dict)
- dht_selfheal_dir_mkdir_setacl (local->xattr, dict);
+ cnt = local->call_cnt = conf->subvolume_cnt;
- if (!dict)
- gf_log (this->name, GF_LOG_WARNING,
- "dict is NULL, need to make sure gfids are same");
+ for (i = 0; i < cnt; i++) {
+ STACK_WIND(frame, dht_selfheal_dir_setattr_cbk, layout->list[i].xlator,
+ layout->list[i].xlator->fops->setattr, loc, stbuf, valid,
+ NULL);
+ }
+
+ return 0;
+}
+static int
+dht_selfheal_dir_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ dht_layout_t *layout = NULL;
+ xlator_t *prev = NULL;
+ xlator_t *subvol = NULL;
+ int i = 0, ret = -1;
+ int this_call_cnt = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+ layout = local->selfheal.layout;
+ prev = cookie;
+ subvol = prev;
+
+ if ((op_ret == 0) || ((op_ret == -1) && (op_errno == EEXIST))) {
for (i = 0; i < layout->cnt; i++) {
- if (layout->list[i].err == ENOENT || force) {
- gf_log (this->name, GF_LOG_DEBUG,
- "creating directory %s on subvol %s",
- loc->path, layout->list[i].xlator->name);
-
- STACK_WIND (frame, dht_selfheal_dir_mkdir_cbk,
- layout->list[i].xlator,
- layout->list[i].xlator->fops->mkdir,
- loc,
- st_mode_from_ia (local->stbuf.ia_prot,
- local->stbuf.ia_type),
- 0, dict);
- }
+ if (layout->list[i].xlator == subvol) {
+ layout->list[i].err = -1;
+ break;
+ }
}
+ }
+
+ if (op_ret) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
+ gf_smsg(this->name,
+ ((op_errno == EEXIST) ? GF_LOG_DEBUG : GF_LOG_WARNING),
+ op_errno, DHT_MSG_DIR_SELFHEAL_FAILED, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+ dht_iatt_merge(this, &local->preparent, preparent);
+ dht_iatt_merge(this, &local->postparent, postparent);
+ ret = 0;
- if (dict)
- dict_unref (dict);
+out:
+ this_call_cnt = dht_frame_return(frame);
- return 0;
-}
+ if (is_last_call(this_call_cnt)) {
+ dht_selfheal_dir_finish(frame, this, ret, 0);
+ dht_selfheal_dir_setattr(frame, &local->loc, &local->stbuf, 0xffffff,
+ layout);
+ }
+ return 0;
+}
-int
-dht_selfheal_layout_alloc_start (xlator_t *this, loc_t *loc,
- dht_layout_t *layout)
+static int
+dht_selfheal_dir_mkdir_lookup_done(call_frame_t *frame, xlator_t *this)
{
- int start = 0;
- uint32_t hashval = 0;
- int ret = 0;
+ dht_local_t *local = NULL;
+ int i = 0;
+ dict_t *dict = NULL;
+ dht_layout_t *layout = NULL;
+ loc_t *loc = NULL;
+ int cnt = 0;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO(this->private, err);
+
+ local = frame->local;
+ layout = local->layout;
+ loc = &local->loc;
+
+ if (!gf_uuid_is_null(local->gfid)) {
+ dict = dict_new();
+ if (!dict)
+ return -1;
- ret = dht_hash_compute (layout->type, loc->path, &hashval);
- if (ret == 0) {
- start = (hashval % layout->cnt);
+ ret = dict_set_gfuuid(dict, "gfid-req", local->gfid, true);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", loc->path, "key=gfid-req", NULL);
+ } else if (local->params) {
+ /* Send the dictionary from higher layers directly */
+
+ dict = dict_ref(local->params);
+ }
+ /* Code to update all extended attributed from local->xattr
+ to dict
+ */
+ dht_dir_set_heal_xattr(this, local, dict, local->xattr, NULL, NULL);
+
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_IS_NULL, NULL);
+ dict = dict_new();
+ if (!dict)
+ return -1;
+ }
+ ret = dict_set_flag(dict, GF_INTERNAL_CTX_KEY, GF_DHT_HEAL_DIR);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, "key=%s",
+ GF_INTERNAL_CTX_KEY, "path=%s", loc->path, NULL);
+ /* We can still continue. As heal can still happen
+ * unless quota limits have reached for the dir.
+ */
+ }
+
+ cnt = layout->cnt;
+ for (i = 0; i < cnt; i++) {
+ if (layout->list[i].err == ESTALE || layout->list[i].err == ENOENT ||
+ local->selfheal.force_mkdir) {
+ gf_msg_debug(this->name, 0, "Creating directory %s on subvol %s",
+ loc->path, layout->list[i].xlator->name);
+
+ STACK_WIND_COOKIE(
+ frame, dht_selfheal_dir_mkdir_cbk, layout->list[i].xlator,
+ layout->list[i].xlator, layout->list[i].xlator->fops->mkdir,
+ loc,
+ st_mode_from_ia(local->stbuf.ia_prot, local->stbuf.ia_type), 0,
+ dict);
}
+ }
+
+ if (dict)
+ dict_unref(dict);
- return start;
+ return 0;
+
+err:
+ dht_selfheal_dir_finish(frame, this, -1, 1);
+ return 0;
}
-static inline int
-dht_get_layout_count (xlator_t *this, dht_layout_t *layout, int new_layout)
+static int
+dht_selfheal_dir_mkdir_lookup_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent)
{
- int i = 0;
- int j = 0;
- int err = 0;
- int count = 0;
- dht_conf_t *conf = NULL;
-
- /* Gets in use only for replace-brick, remove-brick */
- conf = this->private;
- for (i = 0; i < layout->cnt; i++) {
- for (j = 0; j < conf->subvolume_cnt; j++) {
- if (conf->decommissioned_bricks[j] &&
- conf->decommissioned_bricks[j] == layout->list[i].xlator) {
- layout->list[i].err = -EINVAL;
- break;
- }
- }
+ dht_local_t *local = NULL;
+ int i = 0;
+ int this_call_cnt = 0;
+ int missing_dirs = 0;
+ dht_layout_t *layout = NULL;
+ xlator_t *prev = 0;
+ loc_t *loc = NULL;
+ char gfid_local[GF_UUID_BUF_SIZE] = {0};
+ int index = -1;
+
+ VALIDATE_OR_GOTO(this->private, err);
+
+ local = frame->local;
+ layout = local->layout;
+ loc = &local->loc;
+ prev = cookie;
+
+ if (!gf_uuid_is_null(local->gfid))
+ gf_uuid_unparse(local->gfid, gfid_local);
+
+ LOCK(&frame->lock);
+ {
+ index = dht_layout_index_for_subvol(layout, prev);
+ if ((op_ret < 0) && (op_errno == ENOENT || op_errno == ESTALE)) {
+ local->selfheal.hole_cnt = !local->selfheal.hole_cnt
+ ? 1
+ : local->selfheal.hole_cnt + 1;
+ /* the status might have changed. Update the layout with the
+ * new status
+ */
+ if (index >= 0) {
+ layout->list[index].err = op_errno;
+ }
}
- for (i = 0; i < layout->cnt; i++) {
- err = layout->list[i].err;
- if (err == -1 || err == 0) {
- layout->list[i].err = -1;
- count++;
- }
+ if (!op_ret) {
+ dht_iatt_merge(this, &local->stbuf, stbuf);
+ if (prev == local->mds_subvol) {
+ dict_unref(local->xattr);
+ local->xattr = dict_ref(xattr);
+ }
+ /* the status might have changed. Update the layout with the
+ * new status
+ */
+ if (index >= 0) {
+ layout->list[index].err = -1;
+ }
}
+ }
+ UNLOCK(&frame->lock);
+
+ this_call_cnt = dht_frame_return(frame);
+
+ if (is_last_call(this_call_cnt)) {
+ if (local->selfheal.hole_cnt == layout->cnt) {
+ gf_msg_debug(this->name, op_errno,
+ "Lookup failed, an rmdir could have "
+ "deleted this entry %s",
+ loc->name);
+ local->op_errno = op_errno;
+ goto err;
+ } else {
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ENOENT ||
+ layout->list[i].err == ESTALE ||
+ local->selfheal.force_mkdir)
+ missing_dirs++;
+ }
+
+ if (missing_dirs == 0) {
+ dht_selfheal_dir_finish(frame, this, 0, 0);
+ dht_selfheal_dir_setattr(frame, loc, &local->stbuf, 0xffffffff,
+ layout);
+ return 0;
+ }
- /* no subvolume has enough space, but can't stop directory creation */
- if (!count || !new_layout) {
- for (i = 0; i < layout->cnt; i++) {
- err = layout->list[i].err;
- if (err == ENOSPC) {
- layout->list[i].err = -1;
- count++;
- }
- }
+ local->call_cnt = missing_dirs;
+ dht_selfheal_dir_mkdir_lookup_done(frame, this);
}
+ }
- count = ((layout->spread_cnt) ? layout->spread_cnt :
- ((count) ? count : 1));
+ return 0;
- return count;
+err:
+ dht_selfheal_dir_finish(frame, this, -1, 1);
+ return 0;
}
-
-dht_layout_t *
-dht_fix_layout_of_directory (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout)
+static int
+dht_selfheal_dir_mkdir_lock_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- uint32_t chunk = 0;
- uint32_t start = 0;
- uint32_t stop = 0;
- uint32_t overlap = 0;
- uint32_t max_overlap = 0;
- uint32_t chunk_begin = 0;
- int count = 0;
- int cnt = 0;
- int i = 0;
- int j = 0;
- int k = 0;
- int loop_cnt = 0;
- int start_subvol = 0;
- int *fix_array = NULL;
- xlator_t *this = NULL;
- dht_layout_t *new_layout = NULL;
- dht_conf_t *priv = NULL;
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int i = 0;
+ int ret = -1;
+ xlator_t *mds_subvol = NULL;
- this = frame->this;
- priv = this->private;
- local = frame->local;
+ VALIDATE_OR_GOTO(this->private, err);
- if (layout->type == DHT_HASH_TYPE_DM_USER) {
- gf_log (THIS->name, GF_LOG_DEBUG, "leaving %s alone",
- loc->path);
- goto done;
+ conf = this->private;
+ local = frame->local;
+ mds_subvol = local->mds_subvol;
+
+ local->call_cnt = conf->subvolume_cnt;
+
+ if (op_ret < 0) {
+ if (op_errno == EINVAL) {
+ local->call_cnt = 1;
+ dht_selfheal_dir_mkdir_lookup_done(frame, this);
+ return 0;
}
- count = cnt = dht_get_layout_count (this, layout, 0);
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, DHT_MSG_ENTRYLK_ERROR,
+ "path=%s", local->loc.path, NULL);
+
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ /* After getting locks, perform lookup again to ensure that the
+ directory was not deleted by a racing rmdir
+ */
+ if (!local->xattr_req)
+ local->xattr_req = dict_new();
+
+ ret = dict_set_int32(local->xattr_req, "list-xattr", 1);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED, "path=%s",
+ local->loc.path, NULL);
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (mds_subvol && conf->subvolumes[i] == mds_subvol) {
+ STACK_WIND_COOKIE(frame, dht_selfheal_dir_mkdir_lookup_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
+ } else {
+ STACK_WIND_COOKIE(frame, dht_selfheal_dir_mkdir_lookup_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ NULL);
+ }
+ }
- chunk = ((unsigned long) 0xffffffff) / ((cnt) ? cnt : 1);
+ return 0;
- start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout);
+err:
+ dht_selfheal_dir_finish(frame, this, -1, 1);
+ return 0;
+}
- fix_array = GF_CALLOC (sizeof (int), layout->cnt, gf_common_mt_char);
- if (!fix_array) {
- /* No fix, use the existing layout itself */
- goto done;
+static int
+dht_selfheal_dir_mkdir(call_frame_t *frame, loc_t *loc, dht_layout_t *layout,
+ int force)
+{
+ int missing_dirs = 0;
+ int i = 0;
+ int op_errno = 0;
+ int ret = -1;
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ conf = this->private;
+
+ local->selfheal.force_mkdir = force;
+ local->selfheal.hole_cnt = 0;
+
+ for (i = 0; i < layout->cnt; i++) {
+ if (layout->list[i].err == ENOENT || force)
+ missing_dirs++;
+ }
+
+ if (missing_dirs == 0) {
+ /* We don't need to create any directories. Proceed to heal the
+ * attrs and xattrs
+ */
+ if (!__is_root_gfid(local->stbuf.ia_gfid)) {
+ if (local->need_xattr_heal) {
+ local->need_xattr_heal = 0;
+ ret = dht_dir_xattr_heal(this, local, &op_errno);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s",
+ local->loc.path, "gfid=%s", local->gfid, NULL);
+ }
+ } else {
+ if (!gf_uuid_is_null(local->gfid))
+ gf_uuid_copy(loc->gfid, local->gfid);
+
+ ret = dht_common_mark_mdsxattr(frame, NULL, 0);
+ if (!ret)
+ return 0;
+
+ gf_smsg(this->name, GF_LOG_INFO, 0, DHT_MSG_SET_XATTR_FAILED,
+ "path=%s", local->loc.path, "gfid=%s", local->gfid,
+ NULL);
+ }
+ }
+ dht_selfheal_dir_setattr(frame, loc, &local->stbuf, 0xffffffff, layout);
+ return 0;
+ }
+
+ /* MDS xattr is populated only while DHT is having more than one
+ subvol.In case of graph switch while adding more dht subvols need to
+ consider hash subvol as a MDS to avoid MDS check failure at the time
+ of running fop on directory
+ */
+ if (!dict_get(local->xattr, conf->mds_xattr_key) &&
+ (conf->subvolume_cnt > 1)) {
+ if (local->hashed_subvol == NULL) {
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s",
+ loc->pargfid, "name=%s", loc->name, "path=%s",
+ loc->path, NULL);
+ goto err;
+ }
+ }
+ ret = dht_inode_ctx_mdsvol_set(local->inode, this,
+ local->hashed_subvol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SET_INODE_CTX_FAILED,
+ "Failed to set hashed subvol for %s on inode vol is %s",
+ local->loc.path,
+ local->hashed_subvol ? local->hashed_subvol->name : "NULL");
+ goto err;
}
+ }
+
+ if (local->hashed_subvol == NULL) {
+ local->hashed_subvol = dht_subvol_get_hashed(this, loc);
+ if (local->hashed_subvol == NULL) {
+ local->op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_WARNING, local->op_errno,
+ DHT_MSG_HASHED_SUBVOL_GET_FAILED, "gfid=%s", loc->pargfid,
+ "name=%s", loc->name, "path=%s", loc->path, NULL);
+ goto err;
+ }
+ }
+
+ local->current = &local->lock[0];
+ ret = dht_protect_namespace(frame, loc, local->hashed_subvol,
+ &local->current->ns,
+ dht_selfheal_dir_mkdir_lock_cbk);
+
+ if (ret < 0)
+ goto err;
- new_layout = dht_layout_new (this, priv->subvolume_cnt);
- if (!new_layout)
- goto done;
+ return 0;
+err:
+ return -1;
+}
- for (i = 0; i < new_layout->cnt; i++) {
- /* TODO: fix this in layout_alloc() itself */
- new_layout->list[i].err = -ENOENT;
- if (i < layout->cnt)
- new_layout->list[i].xlator = layout->list[i].xlator;
+static int
+dht_selfheal_layout_alloc_start(xlator_t *this, loc_t *loc,
+ dht_layout_t *layout)
+{
+ int start = 0;
+ uint32_t hashval = 0;
+ int ret = 0;
+ const char *str = NULL;
+ dht_conf_t *conf = NULL;
+ char buf[UUID_CANONICAL_FORM_LEN + 1] = {
+ 0,
+ };
+
+ conf = this->private;
+
+ if (conf->randomize_by_gfid) {
+ str = uuid_utoa_r(loc->gfid, buf);
+ } else {
+ str = loc->path;
+ }
+
+ ret = dht_hash_compute(this, layout->type, str, &hashval);
+ if (ret == 0) {
+ start = (hashval % layout->cnt);
+ }
+
+ return start;
+}
+
+static int
+dht_get_layout_count(xlator_t *this, dht_layout_t *layout, int new_layout)
+{
+ int i = 0;
+ int j = 0;
+ int err = 0;
+ int count = 0;
+ dht_conf_t *conf = NULL;
+
+ /* Gets in use only for replace-brick, remove-brick */
+ conf = this->private;
+ for (i = 0; i < layout->cnt; i++) {
+ for (j = 0; j < conf->subvolume_cnt; j++) {
+ if (conf->decommissioned_bricks[j] &&
+ conf->decommissioned_bricks[j] == layout->list[i].xlator) {
+ layout->list[i].err = EINVAL;
+ break;
+ }
+ }
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ err = layout->list[i].err;
+ if (err == -1 || err == 0 || err == ENOENT) {
+ /* Take this with a pinch of salt. The behaviour seems
+ * to be slightly different when this function is
+ * invoked from mkdir codepath. For eg., err == 0 in
+ * mkdir codepath means directory created but xattr
+ * is not set yet.
+ */
+
+ /* Setting list[i].err = -1 is an indication for
+ dht_selfheal_layout_new_directory() to assign
+ a range. We set it to -1 based on any one of
+ the three criteria:
+
+ - err == -1 already, which means directory
+ existed but layout was not set on it.
+
+ - err == 0, which means directory exists and
+ has an old layout piece which will be
+ overwritten now.
+
+ - err == ENOENT, which means directory does
+ not exist (possibly racing with mkdir or
+ finishing half done mkdir). The missing
+ directory will be attempted to be recreated.
+ */
+ count++;
+ if (!err)
+ layout->list[i].err = -1;
}
+ }
- /* Check if there are any overlap in layout, and give the proper fix */
+ /* no subvolume has enough space, but can't stop directory creation */
+ if (!count || !new_layout) {
for (i = 0; i < layout->cnt; i++) {
- /* No need to fix if 'err' is not '-1' */
- if (layout->list[i].err != -1)
- continue;
-
- /* If already existing layout is having no range, skip it */
- start = layout->list[i].start;
- stop = layout->list[i].stop;
- if ((stop - start) == 0)
- continue;
-
- max_overlap = 0;
-
- /* 'j' is used as starting point of each chunk */
- for (j = 1; j <= count; j++) {
- /* if chunk is already used, don't use it again */
- for (k = 0; k < i; k++)
- if (j == fix_array[k])
- break;
- if (k < i)
- continue;
-
- overlap = dht_find_overlap (i, (j-1), start, stop, chunk);
- if (max_overlap < overlap) {
- max_overlap = overlap;
- fix_array[i] = j;
- }
- }
+ err = layout->list[i].err;
+ if (err == ENOSPC) {
+ layout->list[i].err = -1;
+ count++;
+ }
+ }
+ }
- /* If we have any overlap, then use that itself as new
- layout for the subvolume */
- if (fix_array[i]) {
- chunk_begin = chunk * (fix_array[i] - 1);
- new_layout->list[i].err = -1;
- DHT_SET_LAYOUT_RANGE (new_layout, i, chunk_begin,
- chunk, cnt, loc->path);
- /* make sure to give (max - 1) as 'stop' range,
- if it is last chunk */
- if (fix_array[i] == count)
- new_layout->list[i].stop = 0xffffffff;
- if (--cnt == 0)
- goto done;
+ /* if layout->spread_cnt is set, check if it is <= available
+ * subvolumes (down brick and decommissioned bricks are considered
+ * un-available). Else return count (available up bricks) */
+ count = ((layout->spread_cnt && (layout->spread_cnt <= count))
+ ? layout->spread_cnt
+ : ((count) ? count : 1));
- }
- }
+ return count;
+}
- /* Now, look for layouts which are not having any overlaps
- and give it a fix */
- for (loop_cnt = 0, i = start_subvol; loop_cnt < new_layout->cnt;
- i++, loop_cnt++) {
- if (i == new_layout->cnt)
- i = 0;
+void
+dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new_layout);
- /* If 'fix_array[i]' is set, the layout is already fixed. */
- if (fix_array[i])
- continue;
+void
+dht_layout_range_swap(dht_layout_t *layout, int i, int j);
- if (layout->list[i].err != -1) {
- new_layout->list[i].err = layout->list[i].err;
- continue;
- }
+/*
+ * It's a bit icky using local variables in a macro, but it makes the rest
+ * of the code a lot clearer.
+ */
+#define OV_ENTRY(x, y) table[x * new->cnt + y]
+
+static void
+dht_selfheal_layout_maximize_overlap(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *new, dht_layout_t *old)
+{
+ int i = 0;
+ int j = 0;
+ uint32_t curr_overlap = 0;
+ uint32_t max_overlap = 0;
+ int max_overlap_idx = -1;
+ uint32_t overlap = 0;
+ uint32_t *table = NULL;
+
+ dht_layout_sort_volname(old);
+ /* Now both old_layout->list[] and new_layout->list[]
+ are match the same xlators/subvolumes. i.e,
+ old_layout->[i] and new_layout->[i] are referring
+ to the same subvolumes
+ */
+
+ /* Build a table of overlaps between new[i] and old[j]. */
+ table = alloca(sizeof(overlap) * old->cnt * new->cnt);
+ if (!table) {
+ return;
+ }
+ memset(table, 0, sizeof(overlap) * old->cnt * new->cnt);
+ for (i = 0; i < new->cnt; ++i) {
+ for (j = 0; j < old->cnt; ++j) {
+ OV_ENTRY(i, j) = dht_overlap_calc(old, j, new, i);
+ }
+ }
+
+ for (i = 0; i < new->cnt; i++) {
+ if (new->list[i].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
- for (k = 1; k <= count; k++) {
- for (j = 0; j < new_layout->cnt; j++) {
- if (k == fix_array[j])
- break;
- }
- /* Didn't find any of the list begining with 'k' */
- if (j == new_layout->cnt)
- break;
+ max_overlap = 0;
+ max_overlap_idx = i;
+ for (j = (i + 1); j < new->cnt; ++j) {
+ if (new->list[j].err > 0) {
+ /* Subvol might be marked for decommission
+ with EINVAL, or some other serious error
+ marked with positive errno.
+ */
+ continue;
+ }
+ /* Calculate the overlap now. */
+ curr_overlap = OV_ENTRY(i, i) + OV_ENTRY(j, j);
+ /* Calculate the overlap after the proposed swap. */
+ overlap = OV_ENTRY(i, j) + OV_ENTRY(j, i);
+ /* Are we better than status quo? */
+ if (overlap > curr_overlap) {
+ overlap -= curr_overlap;
+ /* Are we better than the previous choice? */
+ if (overlap > max_overlap) {
+ max_overlap = overlap;
+ max_overlap_idx = j;
}
+ }
+ }
- fix_array[i] = k;
- chunk_begin = (k - 1) * chunk;
- new_layout->list[i].err = -1;
- DHT_SET_LAYOUT_RANGE (new_layout, i, chunk_begin, chunk, cnt,
- loc->path);
- /* make sure to give (max - 1) as 'stop' range,
- if it is last chunk */
- if (k == count)
- new_layout->list[i].stop = 0xffffffff;
- if (--cnt == 0)
- goto done;
+ if (max_overlap_idx != i) {
+ dht_layout_range_swap(new, i, max_overlap_idx);
+ /* Need to swap the table values too. */
+ for (j = 0; j < old->cnt; ++j) {
+ overlap = OV_ENTRY(i, j);
+ OV_ENTRY(i, j) = OV_ENTRY(max_overlap_idx, j);
+ OV_ENTRY(max_overlap_idx, j) = overlap;
+ }
}
+ }
+}
+static dht_layout_t *
+dht_fix_layout_of_directory(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout)
+{
+ int i = 0;
+ xlator_t *this = NULL;
+ dht_layout_t *new_layout = NULL;
+ dht_conf_t *priv = NULL;
+ dht_local_t *local = NULL;
+ uint32_t subvol_down = 0;
+ gf_boolean_t maximize_overlap = _gf_true;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ this = frame->this;
+ priv = this->private;
+ local = frame->local;
+
+ if (layout->type == DHT_HASH_TYPE_DM_USER) {
+ gf_msg_debug(THIS->name, 0, "leaving %s alone", loc->path);
+ goto done;
+ }
+
+ new_layout = dht_layout_new(this, priv->subvolume_cnt);
+ if (!new_layout) {
+ gf_uuid_unparse(loc->gfid, gfid);
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_MEM_ALLOC_FAILED,
+ "new_layout, path=%s", loc->path, "gfid=%s", gfid, NULL);
+ goto done;
+ }
+
+ /* If a subvolume is down, do not re-write the layout. */
+ dht_layout_anomalies(this, loc, layout, NULL, NULL, NULL, &subvol_down,
+ NULL, NULL);
+
+ if (subvol_down) {
+ gf_uuid_unparse(loc->gfid, gfid);
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LAYOUT_FIX_FAILED,
+ "subvol-down=%u", subvol_down, "Skipping-fix-layout", "path=%s",
+ loc->path, "gfid=%s", gfid, NULL);
+ GF_FREE(new_layout);
+ return NULL;
+ }
+
+ for (i = 0; i < new_layout->cnt; i++) {
+ if (layout->list[i].err != ENOSPC)
+ new_layout->list[i].err = layout->list[i].err;
+ else
+ new_layout->list[i].err = -1;
+
+ new_layout->list[i].xlator = layout->list[i].xlator;
+ }
+
+ new_layout->commit_hash = layout->commit_hash;
+
+ if (priv->du_stats) {
+ for (i = 0; i < priv->subvolume_cnt; ++i) {
+ gf_smsg(this->name, GF_LOG_DEBUG, 0, DHT_MSG_SUBVOL_INFO,
+ "index=%d", i, "name=%s", priv->subvolumes[i]->name,
+ "chunks=%u", priv->du_stats[i].chunks, "path=%s", loc->path,
+ NULL);
+
+ /* Maximize overlap if the bricks are all the same
+ * size.
+ * This is probably not going to be very common on
+ * live setups but will benefit our regression tests
+ */
+ if (i && (priv->du_stats[i].chunks != priv->du_stats[0].chunks)) {
+ maximize_overlap = _gf_false;
+ }
+ }
+ } else {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_DISK_USAGE_STATUS,
+ NULL);
+ }
+
+ /* First give it a layout as though it is a new directory. This
+ ensures rotation to kick in */
+ dht_layout_sort_volname(new_layout);
+ dht_selfheal_layout_new_directory(frame, loc, new_layout);
+
+ /* Maximize overlap if weighted-rebalance is disabled */
+ if (!priv->do_weighting)
+ maximize_overlap = _gf_true;
+
+ /* Now selectively re-assign ranges only when it helps */
+ if (maximize_overlap) {
+ dht_selfheal_layout_maximize_overlap(frame, loc, new_layout, layout);
+ }
done:
- if (new_layout) {
- /* Now that the new layout has all the proper layout, change the
- inode context */
- dht_layout_set (this, loc->inode, new_layout);
+ if (new_layout) {
+ /* Make sure the extra 'ref' for existing layout is removed */
+ dht_layout_unref(this, local->layout);
- /* Make sure the extra 'ref' for existing layout is removed */
- dht_layout_unref (this, local->layout);
+ local->layout = new_layout;
+ }
- local->layout = new_layout;
- }
+ return local->layout;
+}
- if (fix_array)
- GF_FREE (fix_array);
+/*
+ * Having to call this 2x for each entry in the layout is pretty horrible, but
+ * that's what all of this layout-sorting nonsense gets us.
+ */
+static uint32_t
+dht_get_chunks_from_xl(xlator_t *parent, xlator_t *child)
+{
+ dht_conf_t *priv = parent->private;
+ xlator_list_t *trav;
+ uint32_t index = 0;
- return local->layout;
-}
+ if (!priv->du_stats) {
+ return 0;
+ }
+ for (trav = parent->children; trav; trav = trav->next) {
+ if (trav->xlator == child) {
+ return priv->du_stats[index].chunks;
+ }
+ ++index;
+ }
+
+ return 0;
+}
void
-dht_selfheal_layout_new_directory (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout)
-{
- xlator_t *this = NULL;
- uint32_t chunk = 0;
- int i = 0;
- uint32_t start = 0;
- int cnt = 0;
- int err = 0;
- int start_subvol = 0;
-
- this = frame->this;
-
- cnt = dht_get_layout_count (this, layout, 1);
-
- chunk = ((unsigned long) 0xffffffff) / ((cnt) ? cnt : 1);
-
- start_subvol = dht_selfheal_layout_alloc_start (this, loc, layout);
-
- for (i = start_subvol; i < layout->cnt; i++) {
- err = layout->list[i].err;
- if (err == -1) {
- DHT_SET_LAYOUT_RANGE(layout, i, start, chunk,
- cnt, loc->path);
- if (--cnt == 0) {
- layout->list[i].stop = 0xffffffff;
- goto done;
- }
- start += chunk;
- }
+dht_selfheal_layout_new_directory(call_frame_t *frame, loc_t *loc,
+ dht_layout_t *layout)
+{
+ xlator_t *this = NULL;
+ double chunk = 0;
+ int i = 0;
+ uint32_t start = 0;
+ int bricks_to_use = 0;
+ int err = 0;
+ int start_subvol = 0;
+ uint32_t curr_size;
+ uint32_t range_size;
+ uint64_t total_size = 0;
+ int real_i;
+ dht_conf_t *priv;
+ gf_boolean_t weight_by_size;
+ int bricks_used = 0;
+
+ this = frame->this;
+ priv = this->private;
+ weight_by_size = priv->do_weighting;
+
+ bricks_to_use = dht_get_layout_count(this, layout, 1);
+ GF_ASSERT(bricks_to_use > 0);
+
+ bricks_used = 0;
+ for (i = 0; i < layout->cnt; ++i) {
+ err = layout->list[i].err;
+ if ((err != -1) && (err != ENOENT)) {
+ continue;
}
-
- for (i = 0; i < start_subvol; i++) {
- err = layout->list[i].err;
- if (err == -1) {
- DHT_SET_LAYOUT_RANGE(layout, i, start, chunk,
- cnt, loc->path);
- if (--cnt == 0) {
- layout->list[i].stop = 0xffffffff;
- goto done;
- }
- start += chunk;
- }
+ curr_size = dht_get_chunks_from_xl(this, layout->list[i].xlator);
+ if (!curr_size) {
+ weight_by_size = _gf_false;
+ break;
+ }
+ total_size += curr_size;
+ if (++bricks_used >= bricks_to_use) {
+ break;
}
+ }
+
+ if (weight_by_size && total_size) {
+ /* We know total_size is not zero. */
+ chunk = ((double)0xffffffff) / ((double)total_size);
+ gf_msg_debug(this->name, 0,
+ "chunk size = 0xffffffff / %" PRIu64 " = %f", total_size,
+ chunk);
+ } else {
+ weight_by_size = _gf_false;
+ chunk = ((unsigned long)0xffffffff) / bricks_to_use;
+ }
+
+ start_subvol = dht_selfheal_layout_alloc_start(this, loc, layout);
+
+ /* clear out the range, as we are re-computing here */
+ DHT_RESET_LAYOUT_RANGE(layout);
+
+ /*
+ * OK, what's this "real_i" stuff about? This used to be two loops -
+ * from start_subvol to layout->cnt-1, then from 0 to start_subvol-1.
+ * That way is practically an open invitation to bugs when only one
+ * of the loops is updated. Using real_i and modulo operators to make
+ * it one loop avoids this problem. Remember, folks: it's everyone's
+ * responsibility to help stamp out copy/paste abuse.
+ */
+ bricks_used = 0;
+ for (real_i = 0; real_i < layout->cnt; real_i++) {
+ i = (real_i + start_subvol) % layout->cnt;
+ err = layout->list[i].err;
+ if ((err != -1) && (err != ENOENT)) {
+ continue;
+ }
+ if (weight_by_size) {
+ curr_size = dht_get_chunks_from_xl(this, layout->list[i].xlator);
+ if (!curr_size) {
+ continue;
+ }
+ } else {
+ curr_size = 1;
+ }
+ range_size = chunk * curr_size;
+ gf_msg_debug(this->name, 0, "assigning range size 0x%x to %s",
+ range_size, layout->list[i].xlator->name);
+ DHT_SET_LAYOUT_RANGE(layout, i, start, range_size, loc->path);
+ if (++bricks_used >= bricks_to_use) {
+ layout->list[i].stop = 0xffffffff;
+ goto done;
+ }
+ start += range_size;
+ }
done:
- return;
+ return;
+}
+
+static int
+dht_selfheal_dir_getafix(call_frame_t *frame, loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ uint32_t holes = 0;
+ int ret = -1;
+ int i = -1;
+ uint32_t overlaps = 0;
+
+ local = frame->local;
+
+ holes = local->selfheal.hole_cnt;
+ overlaps = local->selfheal.overlaps_cnt;
+
+ if (holes || overlaps) {
+ /* If the layout has anomalies which would change the hash
+ * ranges, then we need to reset the commit_hash for this
+ * directory, as the layout would change and things may not
+ * be in place as expected */
+ layout->commit_hash = DHT_LAYOUT_HASH_INVALID;
+ dht_selfheal_layout_new_directory(frame, loc, layout);
+ ret = 0;
+ }
+
+ for (i = 0; i < layout->cnt; i++) {
+ /* directory not present */
+ if (layout->list[i].err == ENOENT) {
+ ret = 0;
+ break;
+ }
+ }
+
+ /* TODO: give a fix to these non-virgins */
+
+ return ret;
}
int
-dht_selfheal_dir_getafix (call_frame_t *frame, loc_t *loc,
- dht_layout_t *layout)
+dht_selfheal_new_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout)
{
- dht_conf_t *conf = NULL;
- xlator_t *this = NULL;
- dht_local_t *local = NULL;
- int missing = -1;
- int down = -1;
- int holes = -1;
- int ret = -1;
- int i = -1;
- int overlaps = -1;
+ dht_local_t *local = NULL;
+ int ret = 0;
+ inode_t *linked_inode = NULL, *inode = NULL;
+ loc_t *loc = NULL;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ int32_t op_errno = EIO;
- this = frame->this;
- conf = this->private;
- local = frame->local;
+ local = frame->local;
- missing = local->selfheal.missing;
- down = local->selfheal.down;
- holes = local->selfheal.hole_cnt;
- overlaps = local->selfheal.overlaps_cnt;
+ loc = &local->loc;
- if ((missing + down) == conf->subvolume_cnt) {
- dht_selfheal_layout_new_directory (frame, loc, layout);
- ret = 0;
- }
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
- if (holes <= down) {
- /* the down subvol might fill up the holes */
- ret = 0;
- }
+ linked_inode = inode_link(loc->inode, loc->parent, loc->name,
+ &local->stbuf);
+ if (!linked_inode) {
+ gf_smsg(frame->this->name, GF_LOG_WARNING, 0, DHT_MSG_LINK_INODE_FAILED,
+ "pgfid=%s", pgfid, "name=%s", loc->name, "gfid=%s", gfid, NULL);
+ ret = -1;
+ goto out;
+ }
- if (holes || overlaps) {
- dht_selfheal_layout_new_directory (frame, loc, layout);
- ret = 0;
- }
+ inode = loc->inode;
+ loc->inode = linked_inode;
+ inode_unref(inode);
- for (i = 0; i < layout->cnt; i++) {
- /* directory not present */
- if (layout->list[i].err == ENOENT) {
- ret = 0;
- break;
- }
- }
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref(frame->this, layout);
+
+ dht_layout_sort_volname(layout);
+ dht_selfheal_layout_new_directory(frame, &local->loc, layout);
+
+ op_errno = ENOMEM;
+ ret = dht_selfheal_layout_lock(frame, layout, _gf_true,
+ dht_selfheal_dir_xattr,
+ dht_should_heal_layout);
- /* TODO: give a fix to these non-virgins */
+out:
+ if (ret < 0) {
+ dir_cbk(frame, NULL, frame->this, -1, op_errno, NULL);
+ }
- return ret;
+ return 0;
}
int
-dht_selfheal_new_directory (call_frame_t *frame,
- dht_selfheal_dir_cbk_t dir_cbk,
- dht_layout_t *layout)
+dht_fix_directory_layout(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ dht_layout_t *layout)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ dht_layout_t *tmp_layout = NULL;
+ int ret = 0;
- local = frame->local;
+ local = frame->local;
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = dht_layout_ref (frame->this, layout);
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref(frame->this, layout);
- dht_layout_sort_volname (layout);
- dht_selfheal_layout_new_directory (frame, &local->loc, layout);
- dht_selfheal_dir_xattr (frame, &local->loc, layout);
- return 0;
+ /* No layout sorting required here */
+ tmp_layout = dht_fix_layout_of_directory(frame, &local->loc, layout);
+ if (!tmp_layout) {
+ return -1;
+ }
+
+ ret = dht_selfheal_layout_lock(frame, tmp_layout, _gf_false,
+ dht_fix_dir_xattr, dht_should_fix_layout);
+
+ return ret;
+}
+
+int
+dht_selfheal_directory(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ loc_t *loc, dht_layout_t *layout)
+{
+ dht_local_t *local = NULL;
+ xlator_t *this = NULL;
+ uint32_t down = 0;
+ uint32_t misc = 0;
+ int ret = 0;
+ char pgfid[GF_UUID_BUF_SIZE] = {0};
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ inode_t *linked_inode = NULL, *inode = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref(this, layout);
+
+ if (local->need_attrheal) {
+ if (__is_root_gfid(local->stbuf.ia_gfid)) {
+ local->stbuf.ia_gid = local->prebuf.ia_gid;
+ local->stbuf.ia_uid = local->prebuf.ia_uid;
+
+ local->stbuf.ia_ctime = local->prebuf.ia_ctime;
+ local->stbuf.ia_ctime_nsec = local->prebuf.ia_ctime_nsec;
+ local->stbuf.ia_prot = local->prebuf.ia_prot;
+
+ } else if (!IA_ISINVAL(local->mds_stbuf.ia_type)) {
+ local->stbuf = local->mds_stbuf;
+ }
+ }
+
+ if (!__is_root_gfid(local->stbuf.ia_gfid)) {
+ gf_uuid_unparse(local->stbuf.ia_gfid, gfid);
+ gf_uuid_unparse(loc->parent->gfid, pgfid);
+
+ linked_inode = inode_link(loc->inode, loc->parent, loc->name,
+ &local->stbuf);
+ if (!linked_inode) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_LINK_INODE_FAILED,
+ "pgfid=%s", pgfid, "name=%s", loc->name, "gfid=%s", gfid,
+ NULL);
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ inode = loc->inode;
+ loc->inode = linked_inode;
+ inode_unref(inode);
+ }
+
+ if (local->need_xattr_heal && (local->mds_xattr)) {
+ dht_dir_set_heal_xattr(this, local, local->xattr, local->mds_xattr,
+ NULL, NULL);
+ dict_unref(local->mds_xattr);
+ local->mds_xattr = NULL;
+ }
+
+ dht_layout_anomalies(this, loc, layout, &local->selfheal.hole_cnt,
+ &local->selfheal.overlaps_cnt,
+ &local->selfheal.missing_cnt, &local->selfheal.down,
+ &local->selfheal.misc, NULL);
+
+ down = local->selfheal.down;
+ misc = local->selfheal.misc;
+
+ if (down) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SELFHEAL_FAILED,
+ "path=%s", loc->path, "subvol-down=%d", down, "Not-fixing",
+ "gfid=%s", gfid, NULL);
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ if (misc) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_SELFHEAL_FAILED,
+ "path=%s", loc->path, "misc=%d", misc, "unrecoverable-errors",
+ "gfid=%s", gfid, NULL);
+
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ dht_layout_sort_volname(layout);
+ local->heal_layout = _gf_true;
+
+ /* Ignore return value as it can be inferred from result of
+ * dht_layout_anomalies
+ */
+ dht_selfheal_dir_getafix(frame, loc, layout);
+
+ if (!(local->selfheal.hole_cnt || local->selfheal.overlaps_cnt ||
+ local->selfheal.missing_cnt)) {
+ local->heal_layout = _gf_false;
+ }
+
+ ret = dht_selfheal_dir_mkdir(frame, loc, layout, 0);
+ if (ret < 0) {
+ ret = 0;
+ goto sorry_no_fix;
+ }
+
+ return 0;
+
+sorry_no_fix:
+ /* TODO: need to put appropriate local->op_errno */
+ dht_selfheal_dir_finish(frame, this, ret, 1);
+
+ return 0;
}
int
-dht_fix_directory_layout (call_frame_t *frame,
- dht_selfheal_dir_cbk_t dir_cbk,
- dht_layout_t *layout)
+dht_selfheal_restore(call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
+ loc_t *loc, dht_layout_t *layout)
{
- dht_local_t *local = NULL;
- dht_layout_t *tmp_layout = NULL;
+ int ret = 0;
+ dht_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = dht_layout_ref (frame->this, layout);
+ local->selfheal.dir_cbk = dir_cbk;
+ local->selfheal.layout = dht_layout_ref(frame->this, layout);
- /* No layout sorting required here */
- tmp_layout = dht_fix_layout_of_directory (frame, &local->loc, layout);
- dht_fix_dir_xattr (frame, &local->loc, tmp_layout);
+ ret = dht_selfheal_dir_mkdir(frame, loc, layout, 1);
- return 0;
+ return ret;
}
+int
+dht_dir_heal_xattrs(void *data)
+{
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *mds_subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ dict_t *user_xattr = NULL;
+ dict_t *internal_xattr = NULL;
+ dict_t *mds_xattr = NULL;
+ dict_t *xdata = NULL;
+ int call_cnt = 0;
+ int ret = -1;
+ int uret = 0;
+ int uflag = 0;
+ int i = 0;
+ int xattr_hashed = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ int32_t allzero[1] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, local, out);
+ mds_subvol = local->mds_subvol;
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+ gf_uuid_unparse(local->loc.gfid, gfid);
+
+ if (!mds_subvol) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_MDS_SUBVOL, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+
+ if ((local->loc.inode && gf_uuid_is_null(local->loc.inode->gfid)) ||
+ gf_uuid_is_null(local->loc.gfid)) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_GFID_NOT_PRESENT,
+ "skip-heal path=%s", local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+
+ internal_xattr = dict_new();
+ if (!internal_xattr) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
+ goto out;
+ }
+ xdata = dict_new();
+ if (!xdata) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
+ goto out;
+ }
+
+ call_cnt = conf->subvolume_cnt;
+
+ user_xattr = dict_new();
+ if (!user_xattr) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_CREATE_FAILED,
+ "dictionary", NULL);
+ goto out;
+ }
+
+ ret = syncop_listxattr(local->mds_subvol, &local->loc, &mds_xattr, NULL,
+ NULL);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, DHT_MSG_LIST_XATTRS_FAILED,
+ "path=%s", local->loc.path, "name=%s", local->mds_subvol->name,
+ NULL);
+ }
+
+ if (!mds_xattr)
+ goto out;
+
+ dht_dir_set_heal_xattr(this, local, user_xattr, mds_xattr, &uret, &uflag);
+
+ /* To set quota related xattr need to set GLUSTERFS_INTERNAL_FOP_KEY
+ * key value to 1
+ */
+ if (dict_get(user_xattr, QUOTA_LIMIT_KEY) ||
+ dict_get(user_xattr, QUOTA_LIMIT_OBJECTS_KEY)) {
+ ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "key=%s", GLUSTERFS_INTERNAL_FOP_KEY, "path=%s",
+ local->loc.path, NULL);
+ goto out;
+ }
+ }
+ if (uret <= 0 && !uflag)
+ goto out;
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = conf->subvolumes[i];
+ if (subvol == mds_subvol)
+ continue;
+ if (uret || uflag) {
+ /* Custom xattr heal is required - let posix handle it */
+ ret = dict_set_int8(xdata, "sync_backend_xattrs", _gf_true);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "path=%s", local->loc.path, "key=%s",
+ "sync_backend_xattrs", NULL);
+ goto out;
+ }
+
+ ret = syncop_setxattr(subvol, &local->loc, user_xattr, 0, xdata,
+ NULL);
+ if (ret) {
+ xattr_hashed = 1;
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED,
+ "set-user-xattr-failed path=%s", local->loc.path,
+ "subvol=%s", subvol->name, "gfid=%s", gfid, NULL);
+ } else {
+ dict_del(xdata, "sync_backend_xattrs");
+ }
+ }
+ }
+ /* After heal all custom xattr reset internal MDS xattr to 0 */
+ if (!xattr_hashed) {
+ ret = dht_dict_set_array(internal_xattr, conf->mds_xattr_key, allzero,
+ 1);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, DHT_MSG_DICT_SET_FAILED,
+ "key=%s", conf->mds_xattr_key, "path=%s", local->loc.path,
+ NULL);
+ goto out;
+ }
+ ret = syncop_setxattr(mds_subvol, &local->loc, internal_xattr, 0, NULL,
+ NULL);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_XATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", mds_subvol->name, "gfid=%s", gfid, NULL);
+ }
+ }
+
+out:
+ if (user_xattr)
+ dict_unref(user_xattr);
+ if (mds_xattr)
+ dict_unref(mds_xattr);
+ if (internal_xattr)
+ dict_unref(internal_xattr);
+ if (xdata)
+ dict_unref(xdata);
+ return 0;
+}
int
-dht_selfheal_directory (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
- loc_t *loc, dht_layout_t *layout)
+dht_dir_heal_xattrs_done(int ret, call_frame_t *sync_frame, void *data)
{
- dht_local_t *local = NULL;
- uint32_t down = 0;
- uint32_t misc = 0;
- int ret = 0;
- xlator_t *this = NULL;
+ DHT_STACK_DESTROY(sync_frame);
+ return 0;
+}
- local = frame->local;
- this = frame->this;
+int
+dht_dir_attr_heal(void *data)
+{
+ call_frame_t *frame = NULL;
+ dht_local_t *local = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *mds_subvol = NULL;
+ xlator_t *this = NULL;
+ dht_conf_t *conf = NULL;
+ int call_cnt = 0;
+ int ret = -1;
+ int i = 0;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ GF_VALIDATE_OR_GOTO("dht", data, out);
+
+ frame = data;
+ local = frame->local;
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", local, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO("dht", conf, out);
+
+ mds_subvol = local->mds_subvol;
+ call_cnt = conf->subvolume_cnt;
+
+ if (!__is_root_gfid(local->stbuf.ia_gfid) && (!mds_subvol)) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_NO_MDS_SUBVOL, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+
+ if (!__is_root_gfid(local->stbuf.ia_gfid)) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->subvolumes[i] == mds_subvol) {
+ if (!conf->subvolume_status[i]) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_MDS_DOWN_UNABLE_TO_SET, "path=%s",
+ local->loc.path, "gfid=%s", gfid, NULL);
+ goto out;
+ }
+ }
+ }
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ subvol = conf->subvolumes[i];
+ if (!subvol || subvol == mds_subvol)
+ continue;
+ if (__is_root_gfid(local->stbuf.ia_gfid)) {
+ ret = syncop_setattr(
+ subvol, &local->loc, &local->stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL,
+ NULL, NULL, NULL);
+ } else {
+ ret = syncop_setattr(
+ subvol, &local->loc, &local->mds_stbuf,
+ (GF_SET_ATTR_UID | GF_SET_ATTR_GID | GF_SET_ATTR_MODE), NULL,
+ NULL, NULL, NULL);
+ }
+
+ if (ret) {
+ gf_uuid_unparse(local->loc.gfid, gfid);
- dht_layout_anomalies (this, loc, layout,
- &local->selfheal.hole_cnt,
- &local->selfheal.overlaps_cnt,
- &local->selfheal.missing,
- &local->selfheal.down,
- &local->selfheal.misc);
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ DHT_MSG_DIR_ATTR_HEAL_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", subvol->name, "gfid=%s", gfid, NULL);
+ }
+ }
+out:
+ return 0;
+}
- down = local->selfheal.down;
- misc = local->selfheal.misc;
+int
+dht_dir_attr_heal_done(int ret, call_frame_t *sync_frame, void *data)
+{
+ DHT_STACK_DESTROY(sync_frame);
+ return 0;
+}
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = dht_layout_ref (this, layout);
+/* EXIT: dht_update_commit_hash_for_layout */
+static int
+dht_update_commit_hash_for_layout_done(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
- if (down) {
- gf_log (this->name, GF_LOG_WARNING,
- "%d subvolumes down -- not fixing", down);
- ret = 0;
- goto sorry_no_fix;
+ local = frame->local;
+
+ /* preserve oldest error */
+ if (op_ret && !local->op_ret) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+
+ DHT_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno, NULL);
+
+ return 0;
+}
+
+static int
+dht_update_commit_hash_for_layout_unlock(call_frame_t *frame, xlator_t *this)
+{
+ dht_local_t *local = NULL;
+ int ret = 0;
+
+ local = frame->local;
+
+ ret = dht_unlock_inodelk(frame, local->lock[0].layout.my_layout.locks,
+ local->lock[0].layout.my_layout.lk_count,
+ dht_update_commit_hash_for_layout_done);
+ if (ret < 0) {
+ /* preserve oldest error, just ... */
+ if (!local->op_ret) {
+ local->op_errno = errno;
+ local->op_ret = -1;
}
- if (misc) {
- gf_log (this->name, GF_LOG_WARNING,
- "%d subvolumes have unrecoverable errors", misc);
- ret = 0;
- goto sorry_no_fix;
+ gf_smsg(this->name, GF_LOG_WARNING, errno, DHT_MSG_WIND_UNLOCK_FAILED,
+ "path=%s", local->loc.path, NULL);
+
+ dht_update_commit_hash_for_layout_done(frame, NULL, this, 0, 0, NULL);
+ }
+
+ return 0;
+}
+
+static int
+dht_update_commit_hash_for_layout_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int this_call_cnt = 0;
+
+ local = frame->local;
+
+ LOCK(&frame->lock);
+ /* store first failure, just because */
+ if (op_ret && !local->op_ret) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ UNLOCK(&frame->lock);
+
+ this_call_cnt = dht_frame_return(frame);
+
+ if (is_last_call(this_call_cnt)) {
+ dht_update_commit_hash_for_layout_unlock(frame, this);
+ }
+
+ return 0;
+}
+
+static int
+dht_update_commit_hash_for_layout_resume(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1, i = 0, j = 0;
+ dht_conf_t *conf = NULL;
+ dht_layout_t *layout = NULL;
+ int32_t *disk_layout = NULL;
+ dict_t **xattr = NULL;
+
+ local = frame->local;
+ conf = frame->this->private;
+ count = conf->local_subvols_cnt;
+ layout = local->layout;
+
+ if (op_ret < 0) {
+ goto err_done;
+ }
+
+ /* We precreate the xattr list as we cannot change call count post the
+ * first wind as we may never continue from there. So we finish prep
+ * work before winding the setxattrs */
+ xattr = GF_CALLOC(count, sizeof(*xattr), gf_common_mt_char);
+ if (!xattr) {
+ local->op_errno = errno;
+
+ gf_smsg(this->name, GF_LOG_WARNING, errno, DHT_MSG_COMMIT_HASH_FAILED,
+ "allocation-failed path=%s", local->loc.path, NULL);
+
+ goto err;
+ }
+
+ for (i = 0; i < count; i++) {
+ /* find the layout index for the subvolume */
+ ret = dht_layout_index_for_subvol(layout, conf->local_subvols[i]);
+ if (ret < 0) {
+ local->op_errno = ENOENT;
+
+ gf_smsg(this->name, GF_LOG_WARNING, 0, DHT_MSG_COMMIT_HASH_FAILED,
+ "path=%s", local->loc.path, "subvol=%s",
+ conf->local_subvols[i]->name, "find-disk-layout-failed",
+ NULL);
+
+ goto err;
}
+ j = ret;
- dht_layout_sort_volname (layout);
- ret = dht_selfheal_dir_getafix (frame, loc, layout);
+ /* update the commit hash for the layout */
+ layout->list[j].commit_hash = layout->commit_hash;
+ /* extract the current layout */
+ ret = dht_disk_layout_extract(this, layout, j, &disk_layout);
if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "not able to form layout for the directory");
- goto sorry_no_fix;
+ local->op_errno = errno;
+
+ gf_smsg(this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_COMMIT_HASH_FAILED, "path=%s", local->loc.path,
+ "subvol=%s", conf->local_subvols[i]->name,
+ "extract-disk-layout-failed", NULL);
+
+ goto err;
}
- dht_selfheal_dir_mkdir (frame, loc, layout, 0);
+ xattr[i] = dict_new();
+ if (!xattr[i]) {
+ local->op_errno = errno;
- return 0;
+ gf_smsg(this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_COMMIT_HASH_FAILED, "path=%s Allocation-failed",
+ local->loc.path, NULL);
-sorry_no_fix:
- /* TODO: need to put appropriate local->op_errno */
- dht_selfheal_dir_finish (frame, this, ret);
+ goto err;
+ }
- return 0;
-}
+ ret = dict_set_bin(xattr[i], conf->xattr_name, disk_layout, 4 * 4);
+ if (ret != 0) {
+ local->op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ DHT_MSG_DIR_SELFHEAL_XATTR_FAILED, "path=%s",
+ local->loc.path, "subvol=%s", conf->local_subvols[i]->name,
+ "set-xattr-failed", NULL);
-int
-dht_selfheal_restore (call_frame_t *frame, dht_selfheal_dir_cbk_t dir_cbk,
- loc_t *loc, dht_layout_t *layout)
-{
- int ret = 0;
- dht_local_t *local = NULL;
+ goto err;
+ }
+ disk_layout = NULL;
- local = frame->local;
+ gf_msg_trace(this->name, 0,
+ "setting commit hash %u on subvolume %s"
+ " for %s",
+ layout->list[j].commit_hash, conf->local_subvols[i]->name,
+ local->loc.path);
+ }
+
+ /* wind the setting of the commit hash across the local subvols */
+ local->call_cnt = count;
+ local->op_ret = 0;
+ local->op_errno = 0;
+ for (i = 0; i < count; i++) {
+ STACK_WIND(frame, dht_update_commit_hash_for_layout_cbk,
+ conf->local_subvols[i],
+ conf->local_subvols[i]->fops->setxattr, &local->loc,
+ xattr[i], 0, NULL);
+ }
+ for (i = 0; i < count; i++)
+ dict_unref(xattr[i]);
+ GF_FREE(xattr);
+
+ return 0;
+err:
+ if (xattr) {
+ for (i = 0; i < count; i++) {
+ if (xattr[i])
+ dict_unref(xattr[i]);
+ }
- local->selfheal.dir_cbk = dir_cbk;
- local->selfheal.layout = dht_layout_ref (frame->this, layout);
+ GF_FREE(xattr);
+ }
- ret = dht_selfheal_dir_mkdir (frame, loc, layout, 1);
+ GF_FREE(disk_layout);
+
+ local->op_ret = -1;
+
+ dht_update_commit_hash_for_layout_unlock(frame, this);
+
+ return 0;
+err_done:
+ local->op_ret = -1;
+
+ dht_update_commit_hash_for_layout_done(frame, NULL, this, 0, 0, NULL);
+
+ return 0;
+}
+
+/* ENTER: dht_update_commit_hash_for_layout (see EXIT above)
+ * This function is invoked from rebalance only.
+ * As a result, the check here is simple enough to see if defrag is present
+ * in the conf, as other data would be populated appropriately if so.
+ * If ever this was to be used in other code paths, checks would need to
+ * change.
+ *
+ * Functional details:
+ * - Lock the inodes on the subvols that we want the commit hash updated
+ * - Update each layout with the inode layout, modified to take in the new
+ * commit hash.
+ * - Unlock and return.
+ */
+int
+dht_update_commit_hash_for_layout(call_frame_t *frame)
+{
+ dht_local_t *local = NULL;
+ int count = 1, ret = -1, i = 0;
+ dht_lock_t **lk_array = NULL;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", frame, err);
+ GF_VALIDATE_OR_GOTO(frame->this->name, frame->local, err);
+
+ local = frame->local;
+ conf = frame->this->private;
+
+ if (!conf->defrag)
+ goto err;
+
+ count = conf->local_subvols_cnt;
+ lk_array = GF_CALLOC(count, sizeof(*lk_array), gf_common_mt_char);
+ if (lk_array == NULL)
+ goto err;
+
+ for (i = 0; i < count; i++) {
+ lk_array[i] = dht_lock_new(frame->this, conf->local_subvols[i],
+ &local->loc, F_WRLCK, DHT_LAYOUT_HEAL_DOMAIN,
+ NULL, FAIL_ON_ANY_ERROR);
+ if (lk_array[i] == NULL)
+ goto err;
+ }
+
+ local->lock[0].layout.my_layout.locks = lk_array;
+ local->lock[0].layout.my_layout.lk_count = count;
+
+ ret = dht_blocking_inodelk(frame, lk_array, count,
+ dht_update_commit_hash_for_layout_resume);
+ if (ret < 0) {
+ local->lock[0].layout.my_layout.locks = NULL;
+ local->lock[0].layout.my_layout.lk_count = 0;
+ goto err;
+ }
+
+ return 0;
+err:
+ if (lk_array != NULL) {
+ dht_lock_array_free(lk_array, count);
+ GF_FREE(lk_array);
+ }
- return ret;
+ return -1;
}
diff --git a/xlators/cluster/dht/src/dht-shared.c b/xlators/cluster/dht/src/dht-shared.c
new file mode 100644
index 00000000000..bb72b0ffbb5
--- /dev/null
+++ b/xlators/cluster/dht/src/dht-shared.c
@@ -0,0 +1,1104 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* TODO: add NS locking */
+#include <glusterfs/statedump.h>
+#include "dht-common.h"
+#include "dht-messages.h"
+
+#ifndef MAX
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+#endif
+
+/* TODO:
+ - use volumename in xattr instead of "dht"
+ - use NS locks
+ - handle all cases in self heal layout reconstruction
+ - complete linkfile selfheal
+*/
+
+static void
+dht_layout_dump(dht_layout_t *layout, const char *prefix)
+{
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+
+ if (!layout)
+ goto out;
+
+ gf_proc_dump_build_key(key, prefix, "cnt");
+ gf_proc_dump_write(key, "%d", layout->cnt);
+ gf_proc_dump_build_key(key, prefix, "preset");
+ gf_proc_dump_write(key, "%d", layout->preset);
+ gf_proc_dump_build_key(key, prefix, "gen");
+ gf_proc_dump_write(key, "%d", layout->gen);
+ if (layout->type != IA_INVAL) {
+ gf_proc_dump_build_key(key, prefix, "inode type");
+ gf_proc_dump_write(key, "%d", layout->type);
+ }
+
+ if (!IA_ISDIR(layout->type))
+ goto out;
+
+ for (i = 0; i < layout->cnt; i++) {
+ gf_proc_dump_build_key(key, prefix, "list[%d].err", i);
+ gf_proc_dump_write(key, "%d", layout->list[i].err);
+ gf_proc_dump_build_key(key, prefix, "list[%d].start", i);
+ gf_proc_dump_write(key, "0x%x", layout->list[i].start);
+ gf_proc_dump_build_key(key, prefix, "list[%d].stop", i);
+ gf_proc_dump_write(key, "0x%x", layout->list[i].stop);
+ if (layout->list[i].xlator) {
+ gf_proc_dump_build_key(key, prefix, "list[%d].xlator.type", i);
+ gf_proc_dump_write(key, "%s", layout->list[i].xlator->type);
+ gf_proc_dump_build_key(key, prefix, "list[%d].xlator.name", i);
+ gf_proc_dump_write(key, "%s", layout->list[i].xlator->name);
+ }
+ }
+
+out:
+ return;
+}
+
+int32_t
+dht_priv_dump(xlator_t *this)
+{
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char key[GF_DUMP_MAX_BUF_LEN];
+ int i = 0;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+
+ if (!this)
+ goto out;
+
+ conf = this->private;
+ if (!conf)
+ goto out;
+
+ ret = TRY_LOCK(&conf->subvolume_lock);
+ if (ret != 0) {
+ return ret;
+ }
+
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
+ gf_proc_dump_build_key(key_prefix, "xlator.cluster.dht", "%s.priv",
+ this->name);
+ gf_proc_dump_write("subvol_cnt", "%d", conf->subvolume_cnt);
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ snprintf(key, sizeof(key), "subvolumes[%d]", i);
+ gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
+ conf->subvolumes[i]->name);
+ if (conf->file_layouts && conf->file_layouts[i]) {
+ snprintf(key, sizeof(key), "file_layouts[%d]", i);
+ dht_layout_dump(conf->file_layouts[i], key);
+ }
+ if (conf->dir_layouts && conf->dir_layouts[i]) {
+ snprintf(key, sizeof(key), "dir_layouts[%d]", i);
+ dht_layout_dump(conf->dir_layouts[i], key);
+ }
+ if (conf->subvolume_status) {
+ snprintf(key, sizeof(key), "subvolume_status[%d]", i);
+ gf_proc_dump_write(key, "%d", (int)conf->subvolume_status[i]);
+ }
+ }
+
+ gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);
+ gf_proc_dump_write("gen", "%d", conf->gen);
+ gf_proc_dump_write("min_free_disk", "%lf", conf->min_free_disk);
+ gf_proc_dump_write("min_free_inodes", "%lf", conf->min_free_inodes);
+ gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);
+ gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);
+ gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit);
+ gf_proc_dump_write("use-readdirp", "%d", conf->use_readdirp);
+
+ if (conf->du_stats && conf->subvolume_status) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!conf->subvolume_status[i])
+ continue;
+
+ snprintf(key, sizeof(key), "subvolumes[%d]", i);
+ gf_proc_dump_write(key, "%s", conf->subvolumes[i]->name);
+
+ snprintf(key, sizeof(key), "du_stats[%d].avail_percent", i);
+ gf_proc_dump_write(key, "%lf", conf->du_stats[i].avail_percent);
+
+ snprintf(key, sizeof(key), "du_stats[%d].avail_space", i);
+ gf_proc_dump_write(key, "%" PRIu64, conf->du_stats[i].avail_space);
+
+ snprintf(key, sizeof(key), "du_stats[%d].avail_inodes", i);
+ gf_proc_dump_write(key, "%lf", conf->du_stats[i].avail_inodes);
+
+ snprintf(key, sizeof(key), "du_stats[%d].log", i);
+ gf_proc_dump_write(key, "%" PRIu32, conf->du_stats[i].log);
+ }
+ }
+
+ if (conf->last_stat_fetch)
+ gf_proc_dump_write("last_stat_fetch", "%s",
+ ctime(&conf->last_stat_fetch));
+
+ UNLOCK(&conf->subvolume_lock);
+
+out:
+ return ret;
+}
+
+int32_t
+dht_inodectx_dump(xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ dht_layout_t *layout = NULL;
+
+ if (!this)
+ goto out;
+ if (!inode)
+ goto out;
+
+ ret = dht_inode_ctx_layout_get(inode, this, &layout);
+
+ if ((ret != 0) || !layout)
+ return ret;
+
+ gf_proc_dump_add_section("xlator.cluster.dht.%s.inode", this->name);
+ dht_layout_dump(layout, "layout");
+
+out:
+ return ret;
+}
+
+void
+dht_fini(xlator_t *this)
+{
+ int i = 0;
+ dht_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+
+ conf = this->private;
+ this->private = NULL;
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE(conf->file_layouts[i]);
+ }
+ GF_FREE(conf->file_layouts);
+ }
+
+ dict_unref(conf->leaf_to_subvol);
+
+ /* allocated in dht_init_subvolumes() */
+ GF_FREE(conf->subvolumes);
+ GF_FREE(conf->subvolume_status);
+ GF_FREE(conf->last_event);
+ GF_FREE(conf->subvol_up_time);
+ GF_FREE(conf->du_stats);
+ GF_FREE(conf->decommissioned_bricks);
+
+ /* allocated in dht_init() */
+ GF_FREE(conf->mds_xattr_key);
+ GF_FREE(conf->link_xattr_name);
+ GF_FREE(conf->commithash_xattr_name);
+ GF_FREE(conf->wild_xattr_name);
+
+ /* allocated in dht_init_regex() */
+ if (conf->rsync_regex_valid)
+ regfree(&conf->rsync_regex);
+ if (conf->extra_regex_valid)
+ regfree(&conf->extra_regex);
+
+ synclock_destroy(&conf->link_lock);
+
+ if (conf->lock_pool)
+ mem_pool_destroy(conf->lock_pool);
+
+ GF_FREE(conf);
+ }
+out:
+ return;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+
+ ret = xlator_mem_acct_init(this, gf_dht_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_NO_MEMORY,
+ "Memory accounting init failed");
+ return ret;
+ }
+out:
+ return ret;
+}
+
+static int
+dht_parse_decommissioned_bricks(xlator_t *this, dht_conf_t *conf,
+ const char *bricks)
+{
+ int i = 0;
+ int ret = -1;
+ char *tmpstr = NULL;
+ char *dup_brick = NULL;
+ char *node = NULL;
+
+ if (!conf || !bricks)
+ goto out;
+
+ dup_brick = gf_strdup(bricks);
+ if (dup_brick == NULL) {
+ goto out;
+ }
+
+ node = strtok_r(dup_brick, ",", &tmpstr);
+ while (node) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (!strcmp(conf->subvolumes[i]->name, node)) {
+ conf->decommissioned_bricks[i] = conf->subvolumes[i];
+ conf->decommission_subvols_cnt++;
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ DHT_MSG_SUBVOL_DECOMMISSION_INFO,
+ "decommissioning subvolume %s",
+ conf->subvolumes[i]->name);
+ break;
+ }
+ }
+ if (i == conf->subvolume_cnt) {
+ /* Wrong node given. */
+ goto out;
+ }
+ node = strtok_r(NULL, ",", &tmpstr);
+ }
+
+ ret = 0;
+ conf->decommission_in_progress = 1;
+out:
+ GF_FREE(dup_brick);
+
+ return ret;
+}
+
+static void
+dht_decommissioned_remove(xlator_t *this, dht_conf_t *conf)
+{
+ int i = 0;
+
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ if (conf->decommissioned_bricks[i]) {
+ conf->decommissioned_bricks[i] = NULL;
+ conf->decommission_subvols_cnt--;
+ }
+ }
+}
+
+static void
+dht_init_regex(xlator_t *this, dict_t *odict, char *name, regex_t *re,
+ gf_boolean_t *re_valid, dht_conf_t *conf)
+{
+ char *temp_str = NULL;
+
+ if (dict_get_str(odict, name, &temp_str) != 0) {
+ if (strcmp(name, "rsync-hash-regex")) {
+ return;
+ }
+ temp_str = "^\\.(.+)\\.[^.]+$";
+ }
+
+ LOCK(&conf->lock);
+ {
+ if (*re_valid) {
+ regfree(re);
+ *re_valid = _gf_false;
+ }
+
+ if (!strcmp(temp_str, "none")) {
+ goto unlock;
+ }
+
+ if (regcomp(re, temp_str, REG_EXTENDED) == 0) {
+ gf_msg_debug(this->name, 0, "using regex %s = %s", name, temp_str);
+ *re_valid = _gf_true;
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_REGEX_INFO,
+ "compiling regex %s failed", temp_str);
+ }
+ }
+unlock:
+ UNLOCK(&conf->lock);
+}
+
+int
+dht_set_subvol_range(xlator_t *this)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf)
+ goto out;
+
+ conf->leaf_to_subvol = dict_new();
+ if (!conf->leaf_to_subvol)
+ goto out;
+
+ ret = glusterfs_reachable_leaves(this, conf->leaf_to_subvol);
+
+out:
+ return ret;
+}
+
+static int
+dht_configure_throttle(xlator_t *this, dht_conf_t *conf, char *temp_str)
+{
+ int rebal_thread_count = 0;
+ int ret = 0;
+
+ pthread_mutex_lock(&conf->defrag->dfq_mutex);
+ {
+ if (!strcasecmp(temp_str, "lazy")) {
+ conf->defrag->recon_thread_count = 1;
+ } else if (!strcasecmp(temp_str, "normal")) {
+ conf->defrag->recon_thread_count = 2;
+ } else if (!strcasecmp(temp_str, "aggressive")) {
+ conf->defrag->recon_thread_count = MAX(MAX_REBAL_THREADS - 4, 4);
+ } else if ((gf_string2int(temp_str, &rebal_thread_count) == 0)) {
+ if ((rebal_thread_count > 0) &&
+ (rebal_thread_count <= MAX_REBAL_THREADS)) {
+ conf->defrag->recon_thread_count = rebal_thread_count;
+ pthread_mutex_unlock(&conf->defrag->dfq_mutex);
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "rebal thread count configured to %d",
+ rebal_thread_count);
+ goto out;
+ } else {
+ pthread_mutex_unlock(&conf->defrag->dfq_mutex);
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option: Reconfigure: "
+ "rebal-throttle should be "
+ "within range of 0 and maximum number of"
+ " cores available");
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option: Reconfigure: "
+ "rebal-throttle should be {lazy|normal|aggressive}"
+ " or a number up to the number of cores available,"
+ " not (%s), defaulting to (%d)",
+ temp_str, conf->dthrottle);
+ ret = -1;
+ }
+ }
+ pthread_mutex_unlock(&conf->defrag->dfq_mutex);
+
+out:
+ return ret;
+}
+
+int
+dht_reconfigure(xlator_t *this, dict_t *options)
+{
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ gf_boolean_t search_unhashed;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+ GF_VALIDATE_OR_GOTO("dht", options, out);
+
+ conf = this->private;
+ if (!conf)
+ return 0;
+
+ if (dict_get_str(options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean*/
+ if (strcasecmp(temp_str, "auto")) {
+ if (!gf_string2boolean(temp_str, &search_unhashed)) {
+ gf_msg_debug(this->name, 0,
+ "Reconfigure: "
+ "lookup-unhashed reconfigured(%s)",
+ temp_str);
+ conf->search_unhashed = search_unhashed;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option: Reconfigure: "
+ "lookup-unhashed should be boolean,"
+ " not (%s), defaulting to (%d)",
+ temp_str, conf->search_unhashed);
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Reconfigure:"
+ " lookup-unhashed reconfigured auto ");
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
+ }
+ }
+
+ GF_OPTION_RECONF("lookup-optimize", conf->lookup_optimize, options, bool,
+ out);
+
+ GF_OPTION_RECONF("min-free-disk", conf->min_free_disk, options,
+ percent_or_size, out);
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100.0)
+ conf->disk_unit = 'p';
+
+ GF_OPTION_RECONF("min-free-inodes", conf->min_free_inodes, options, percent,
+ out);
+
+ GF_OPTION_RECONF("directory-layout-spread", conf->dir_spread_cnt, options,
+ uint32, out);
+
+ GF_OPTION_RECONF("readdir-optimize", conf->readdir_optimize, options, bool,
+ out);
+ GF_OPTION_RECONF("randomize-hash-range-by-gfid", conf->randomize_by_gfid,
+ options, bool, out);
+
+ GF_OPTION_RECONF("lock-migration", conf->lock_migration_enabled, options,
+ bool, out);
+
+ GF_OPTION_RECONF("force-migration", conf->force_migration, options, bool,
+ out);
+
+ if (conf->defrag) {
+ if (dict_get_str(options, "rebal-throttle", &temp_str) == 0) {
+ ret = dht_configure_throttle(this, conf, temp_str);
+ if (ret == -1)
+ goto out;
+ }
+ }
+
+ if (conf->defrag) {
+ conf->defrag->lock_migration_enabled = conf->lock_migration_enabled;
+ }
+
+ if (conf->defrag) {
+ GF_OPTION_RECONF("rebalance-stats", conf->defrag->stats, options, bool,
+ out);
+ }
+
+ if (dict_get_str(options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks(this, conf, temp_str);
+ if (ret == -1)
+ goto out;
+ } else {
+ dht_decommissioned_remove(this, conf);
+ }
+
+ dht_init_regex(this, options, "rsync-hash-regex", &conf->rsync_regex,
+ &conf->rsync_regex_valid, conf);
+ dht_init_regex(this, options, "extra-hash-regex", &conf->extra_regex,
+ &conf->extra_regex_valid, conf);
+
+ GF_OPTION_RECONF("weighted-rebalance", conf->do_weighting, options, bool,
+ out);
+
+ GF_OPTION_RECONF("use-readdirp", conf->use_readdirp, options, bool, out);
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+gf_defrag_pattern_list_fill(xlator_t *this, gf_defrag_info_t *defrag,
+ char *data)
+{
+ int ret = -1;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *num = NULL;
+ char *pattern_str = NULL;
+ char *pattern = NULL;
+ gf_defrag_pattern_list_t *temp_list = NULL;
+ gf_defrag_pattern_list_t *pattern_list = NULL;
+
+ if (!this || !defrag || !data)
+ goto out;
+
+ /* Get the pattern for pattern list. "pattern:<optional-size>"
+ * eg: *avi, *pdf:10MB, *:1TB
+ */
+ pattern_str = strtok_r(data, ",", &tmp_str);
+ while (pattern_str) {
+ dup_str = gf_strdup(pattern_str);
+ if (!dup_str)
+ goto out;
+ pattern_list = GF_CALLOC(1, sizeof(gf_defrag_pattern_list_t), 1);
+ if (!pattern_list) {
+ goto out;
+ }
+ pattern = strtok_r(dup_str, ":", &tmp_str1);
+ num = strtok_r(NULL, ":", &tmp_str1);
+ if (!pattern)
+ goto out;
+ if (!num) {
+ if (gf_string2bytesize_uint64(pattern, &pattern_list->size) == 0) {
+ pattern = "*";
+ }
+ } else if (gf_string2bytesize_uint64(num, &pattern_list->size) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option. Defrag pattern:"
+ " Invalid number format \"%s\"",
+ num);
+ goto out;
+ }
+ memcpy(pattern_list->path_pattern, pattern, strlen(dup_str));
+
+ if (!defrag->defrag_pattern)
+ temp_list = NULL;
+ else
+ temp_list = defrag->defrag_pattern;
+
+ pattern_list->next = temp_list;
+
+ defrag->defrag_pattern = pattern_list;
+ pattern_list = NULL;
+
+ GF_FREE(dup_str);
+ dup_str = NULL;
+
+ pattern_str = strtok_r(NULL, ",", &tmp_str);
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ GF_FREE(pattern_list);
+ GF_FREE(dup_str);
+
+ return ret;
+}
+
+static int
+dht_init_methods(xlator_t *this)
+{
+ int ret = -1;
+ dht_conf_t *conf = NULL;
+ dht_methods_t *methods = NULL;
+
+ GF_VALIDATE_OR_GOTO("dht", this, err);
+
+ conf = this->private;
+ methods = &(conf->methods);
+
+ methods->migration_get_dst_subvol = dht_migration_get_dst_subvol;
+ methods->migration_other = NULL;
+ methods->layout_search = dht_layout_search;
+
+ ret = 0;
+err:
+ return ret;
+}
+
+int
+dht_init(xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ char *temp_str = NULL;
+ int ret = -1;
+ int i = 0;
+ gf_defrag_info_t *defrag = NULL;
+ int cmd = 0;
+ char *node_uuid = NULL;
+ uint32_t commit_hash = 0;
+
+ GF_VALIDATE_OR_GOTO("dht", this, err);
+
+ if (!this->children) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, DHT_MSG_INVALID_CONFIGURATION,
+ "Distribute needs more than one subvolume");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_INVALID_CONFIGURATION,
+ "dangling volume. check volfile");
+ }
+
+ conf = GF_CALLOC(1, sizeof(*conf), gf_dht_mt_dht_conf_t);
+ if (!conf) {
+ goto err;
+ }
+
+ LOCK_INIT(&conf->subvolume_lock);
+ LOCK_INIT(&conf->layout_lock);
+ LOCK_INIT(&conf->lock);
+ synclock_init(&conf->link_lock, SYNC_LOCK_DEFAULT);
+
+ /* We get the commit-hash to set only for rebalance process */
+ if (dict_get_uint32(this->options, "commit-hash", &commit_hash) == 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_COMMIT_HASH_INFO,
+ "%s using commit hash %u", __func__, commit_hash);
+ conf->vol_commit_hash = commit_hash;
+ conf->vch_forced = _gf_true;
+ }
+
+ ret = dict_get_int32(this->options, "rebalance-cmd", &cmd);
+
+ if (cmd) {
+ defrag = GF_CALLOC(1, sizeof(gf_defrag_info_t), gf_defrag_info_mt);
+
+ GF_VALIDATE_OR_GOTO(this->name, defrag, err);
+
+ LOCK_INIT(&defrag->lock);
+
+ defrag->is_exiting = 0;
+
+ conf->defrag = defrag;
+ defrag->this = this;
+
+ ret = dict_get_str(this->options, "node-uuid", &node_uuid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_CONFIGURATION,
+ "Invalid volume configuration: "
+ "node-uuid not specified");
+ goto err;
+ }
+
+ if (gf_uuid_parse(node_uuid, defrag->node_uuid)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option:"
+ " Cannot parse glusterd node uuid");
+ goto err;
+ }
+
+ defrag->cmd = cmd;
+
+ defrag->stats = _gf_false;
+
+ defrag->queue = NULL;
+
+ defrag->crawl_done = 0;
+
+ defrag->global_error = 0;
+
+ defrag->q_entry_count = 0;
+
+ defrag->wakeup_crawler = 0;
+
+ pthread_mutex_init(&defrag->dfq_mutex, 0);
+ pthread_cond_init(&defrag->parallel_migration_cond, 0);
+ pthread_cond_init(&defrag->rebalance_crawler_alarm, 0);
+ pthread_cond_init(&defrag->df_wakeup_thread, 0);
+
+ pthread_mutex_init(&defrag->fc_mutex, 0);
+ pthread_cond_init(&defrag->fc_wakeup_cond, 0);
+
+ defrag->global_error = 0;
+ }
+
+ conf->use_fallocate = 1;
+
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
+ if (dict_get_str(this->options, "lookup-unhashed", &temp_str) == 0) {
+ /* If option is not "auto", other options _should_ be boolean */
+ if (strcasecmp(temp_str, "auto")) {
+ gf_boolean_t search_unhashed_bool;
+ ret = gf_string2boolean(temp_str, &search_unhashed_bool);
+ if (ret == -1) {
+ goto err;
+ }
+ conf->search_unhashed = search_unhashed_bool
+ ? GF_DHT_LOOKUP_UNHASHED_ON
+ : GF_DHT_LOOKUP_UNHASHED_OFF;
+ } else {
+ conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
+ }
+ }
+
+ GF_OPTION_INIT("lookup-optimize", conf->lookup_optimize, bool, err);
+
+ GF_OPTION_INIT("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool, err);
+
+ GF_OPTION_INIT("use-readdirp", conf->use_readdirp, bool, err);
+
+ GF_OPTION_INIT("min-free-disk", conf->min_free_disk, percent_or_size, err);
+
+ GF_OPTION_INIT("min-free-inodes", conf->min_free_inodes, percent, err);
+
+ conf->dir_spread_cnt = conf->subvolume_cnt;
+ GF_OPTION_INIT("directory-layout-spread", conf->dir_spread_cnt, uint32,
+ err);
+
+ GF_OPTION_INIT("assert-no-child-down", conf->assert_no_child_down, bool,
+ err);
+
+ GF_OPTION_INIT("readdir-optimize", conf->readdir_optimize, bool, err);
+
+ GF_OPTION_INIT("lock-migration", conf->lock_migration_enabled, bool, err);
+
+ GF_OPTION_INIT("force-migration", conf->force_migration, bool, err);
+
+ if (defrag) {
+ defrag->lock_migration_enabled = conf->lock_migration_enabled;
+
+ GF_OPTION_INIT("rebalance-stats", defrag->stats, bool, err);
+ if (dict_get_str(this->options, "rebalance-filter", &temp_str) == 0) {
+ if (gf_defrag_pattern_list_fill(this, defrag, temp_str) == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INVALID_OPTION,
+ "Invalid option:"
+ " Cannot parse rebalance-filter (%s)",
+ temp_str);
+
+ goto err;
+ }
+ }
+ }
+
+ /* option can be any one of percent or bytes */
+ conf->disk_unit = 0;
+ if (conf->min_free_disk < 100)
+ conf->disk_unit = 'p';
+
+ ret = dht_init_subvolumes(this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ if (cmd) {
+ ret = dht_init_local_subvolumes(this, conf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_INIT_LOCAL_SUBVOL_FAILED,
+ "dht_init_local_subvolumes failed");
+ goto err;
+ }
+ }
+
+ if (dict_get_str(this->options, "decommissioned-bricks", &temp_str) == 0) {
+ ret = dht_parse_decommissioned_bricks(this, conf, temp_str);
+ if (ret == -1)
+ goto err;
+ }
+
+ dht_init_regex(this, this->options, "rsync-hash-regex", &conf->rsync_regex,
+ &conf->rsync_regex_valid, conf);
+ dht_init_regex(this, this->options, "extra-hash-regex", &conf->extra_regex,
+ &conf->extra_regex_valid, conf);
+
+ ret = dht_layouts_init(this, conf);
+ if (ret == -1) {
+ goto err;
+ }
+
+ conf->gen = 1;
+
+ this->local_pool = mem_pool_new(dht_local_t, 512);
+ if (!this->local_pool) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, DHT_MSG_NO_MEMORY,
+ " DHT initialisation failed. "
+ "failed to create local_t's memory pool");
+ goto err;
+ }
+
+ GF_OPTION_INIT("randomize-hash-range-by-gfid", conf->randomize_by_gfid,
+ bool, err);
+
+ if (defrag) {
+ GF_OPTION_INIT("rebal-throttle", temp_str, str, err);
+ if (temp_str) {
+ ret = dht_configure_throttle(this, conf, temp_str);
+ if (ret == -1)
+ goto err;
+ }
+ }
+
+ GF_OPTION_INIT("xattr-name", conf->xattr_name, str, err);
+ gf_asprintf(&conf->mds_xattr_key, "%s." DHT_MDS_STR, conf->xattr_name);
+ gf_asprintf(&conf->link_xattr_name, "%s." DHT_LINKFILE_STR,
+ conf->xattr_name);
+ gf_asprintf(&conf->commithash_xattr_name, "%s." DHT_COMMITHASH_STR,
+ conf->xattr_name);
+ gf_asprintf(&conf->wild_xattr_name, "%s*", conf->xattr_name);
+ if (!conf->link_xattr_name || !conf->wild_xattr_name) {
+ goto err;
+ }
+
+ GF_OPTION_INIT("weighted-rebalance", conf->do_weighting, bool, err);
+
+ conf->lock_pool = mem_pool_new(dht_lock_t, 512);
+ if (!conf->lock_pool) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_INIT_FAILED,
+ "failed to create lock mem_pool, failing "
+ "initialization");
+ goto err;
+ }
+
+ this->private = conf;
+
+ if (dht_set_subvol_range(this))
+ goto err;
+
+ if (dht_init_methods(this))
+ goto err;
+
+ return 0;
+
+err:
+ if (conf) {
+ if (conf->file_layouts) {
+ for (i = 0; i < conf->subvolume_cnt; i++) {
+ GF_FREE(conf->file_layouts[i]);
+ }
+ GF_FREE(conf->file_layouts);
+ }
+
+ GF_FREE(conf->subvolumes);
+
+ GF_FREE(conf->subvolume_status);
+
+ GF_FREE(conf->du_stats);
+
+ GF_FREE(conf->defrag);
+
+ GF_FREE(conf->xattr_name);
+ GF_FREE(conf->link_xattr_name);
+ GF_FREE(conf->wild_xattr_name);
+ GF_FREE(conf->mds_xattr_key);
+
+ if (conf->lock_pool)
+ mem_pool_destroy(conf->lock_pool);
+
+ GF_FREE(conf);
+ }
+
+ return -1;
+}
+
+struct volume_options dht_options[] = {
+ {
+ .key = {"lookup-unhashed"},
+ .value = {"auto", "yes", "no", "enable", "disable", "1", "0", "on",
+ "off"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "on",
+ .description =
+ "This option if set to ON, does a lookup through "
+ "all the sub-volumes, in case a lookup didn't return any result "
+ "from the hash subvolume. If set to OFF, it does not do a lookup "
+ "on the remaining subvolumes.",
+ .op_version = {1},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE,
+ .level = OPT_STATUS_BASIC,
+ },
+ {.key = {"lookup-optimize"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description =
+ "This option if set to ON enables the optimization "
+ "of -ve lookups, by not doing a lookup on non-hashed subvolumes for "
+ "files, in case the hashed subvolume does not return any result. "
+ "This option disregards the lookup-unhashed setting, when enabled.",
+ .op_version = {GD_OP_VERSION_3_7_2},
+ .level = OPT_STATUS_ADVANCED,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"min-free-disk"},
+ .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
+ .default_value = "10%",
+ .description =
+ "Percentage/Size of disk space, after which the "
+ "process starts balancing out the cluster, and logs will appear "
+ "in log files",
+ .op_version = {1},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"min-free-inodes"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .default_value = "5%",
+ .description = "after system has only N% of inodes, warnings "
+ "starts to appear in log files",
+ .op_version = {1},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {
+ .key = {"unhashed-sticky-bit"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ },
+ {.key = {"use-readdirp"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This option if set to ON, forces the use of "
+ "readdirp, and hence also displays the stats of the files.",
+ .level = OPT_STATUS_ADVANCED,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"assert-no-child-down"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option if set to ON, in the event of "
+ "CHILD_DOWN, will call exit."},
+ {
+ .key = {"directory-layout-spread"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .validate = GF_OPT_VALIDATE_MIN,
+ .description = "Specifies the directory layout spread. Takes number "
+ "of subvolumes as default value.",
+
+ .op_version = {2},
+ },
+ {
+ .key = {"decommissioned-bricks"},
+ .type = GF_OPTION_TYPE_ANY,
+ .description =
+ "This option if set to ON, decommissions "
+ "the brick, so that no new data is allowed to be created "
+ "on that brick.",
+ .level = OPT_STATUS_ADVANCED,
+ },
+ {
+ .key = {"rebalance-cmd"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ {
+ .key = {"commit-hash"},
+ .type = GF_OPTION_TYPE_INT,
+ },
+ {
+ .key = {"node-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+ {
+ .key = {"rebalance-stats"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description =
+ "This option if set to ON displays and logs the "
+ " time taken for migration of each file, during the rebalance "
+ "process. If set to OFF, the rebalance logs will only display the "
+ "time spent in each directory.",
+ .op_version = {2},
+ .level = OPT_STATUS_BASIC,
+ },
+ {.key = {"readdir-optimize"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description =
+ "This option if set to ON enables the optimization "
+ "that allows DHT to requests non-first subvolumes to filter out "
+ "directory entries.",
+ .op_version = {1},
+ .level = OPT_STATUS_ADVANCED,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"rsync-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description =
+ "Regular expression for stripping temporary-file "
+ "suffix and prefix used by rsync, to prevent relocation when the "
+ "file is renamed.",
+ .op_version = {3},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {.key = {"extra-hash-regex"},
+ .type = GF_OPTION_TYPE_STR,
+ /* Setting a default here doesn't work. See dht_init_regex. */
+ .description =
+ "Regular expression for stripping temporary-file "
+ "suffix and prefix used by an application, to prevent relocation when "
+ "the file is renamed.",
+ .op_version = {3},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+ {
+ .key = {"rebalance-filter"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+
+ {
+ .key = {"xattr-name"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "trusted.glusterfs.dht",
+ .description =
+ "Base for extended attributes used by this "
+ "translator instance, to avoid conflicts with others above or "
+ "below it.",
+ .op_version = {3},
+ },
+
+ {.key = {"weighted-rebalance"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description =
+ "When enabled, files will be allocated to bricks "
+ "with a probability proportional to their size. Otherwise, all "
+ "bricks will have the same probability (legacy behavior).",
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+
+ /* NUFA option */
+ {.key = {"local-volume-name"}, .type = GF_OPTION_TYPE_XLATOR},
+
+ /* switch option */
+ {.key = {"pattern.switch.case"}, .type = GF_OPTION_TYPE_ANY},
+
+ {
+ .key = {"randomize-hash-range-by-gfid"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description =
+ "Use gfid of directory to determine the subvolume "
+ "from which hash ranges are allocated starting with 0. "
+ "Note that we still use a directory/file's name to determine the "
+ "subvolume to which it hashes",
+ .op_version = {GD_OP_VERSION_3_6_0},
+ },
+
+ {.key = {"rebal-throttle"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "normal",
+ .description = " Sets the maximum number of parallel file migrations "
+ "allowed on a node during the rebalance operation. The"
+ " default value is normal and allows a max of "
+ "[($(processing units) - 4) / 2), 2] files to be "
+ "migrated at a time. Lazy will allow only one file to "
+ "be migrated at a time and aggressive will allow "
+ "max of [($(processing units) - 4) / 2), 4]",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .level = OPT_STATUS_BASIC,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC
+
+ },
+
+ {.key = {"lock-migration"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = " If enabled this feature will migrate the posix locks"
+ " associated with a file during rebalance",
+ .op_version = {GD_OP_VERSION_3_8_0},
+ .level = OPT_STATUS_ADVANCED,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+
+ {.key = {"force-migration"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "If disabled, rebalance will not migrate files that "
+ "are being written to by an application",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .level = OPT_STATUS_ADVANCED,
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC},
+
+ {.key = {NULL}},
+};
+
+#define NUM_DHT_OPTIONS (sizeof(dht_options) / sizeof(dht_options[0]))
+
+extern struct volume_options options[NUM_DHT_OPTIONS]
+ __attribute__((alias("dht_options")));
diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c
index c5128544279..53de8292704 100644
--- a/xlators/cluster/dht/src/dht.c
+++ b/xlators/cluster/dht/src/dht.c
@@ -8,608 +8,116 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-/* TODO: add NS locking */
-
-#include "statedump.h"
#include "dht-common.h"
-/* TODO:
- - use volumename in xattr instead of "dht"
- - use NS locks
- - handle all cases in self heal layout reconstruction
- - complete linkfile selfheal
-*/
-struct volume_options options[];
-
-void
-dht_layout_dump (dht_layout_t *layout, const char *prefix)
-{
-
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", layout, out);
- GF_VALIDATE_OR_GOTO ("dht", prefix, out);
-
- gf_proc_dump_build_key(key, prefix, "cnt");
- gf_proc_dump_write(key, "%d", layout->cnt);
- gf_proc_dump_build_key(key, prefix, "preset");
- gf_proc_dump_write(key, "%d", layout->preset);
- gf_proc_dump_build_key(key, prefix, "gen");
- gf_proc_dump_write(key, "%d", layout->gen);
- gf_proc_dump_build_key(key, prefix, "type");
- gf_proc_dump_write(key, "%d", layout->type);
-
- for (i = 0; i < layout->cnt; i++) {
- gf_proc_dump_build_key(key, prefix,"list[%d].err", i);
- gf_proc_dump_write(key, "%d", layout->list[i].err);
- gf_proc_dump_build_key(key, prefix,"list[%d].start", i);
- gf_proc_dump_write(key, "%u", layout->list[i].start);
- gf_proc_dump_build_key(key, prefix,"list[%d].stop", i);
- gf_proc_dump_write(key, "%u", layout->list[i].stop);
- if (layout->list[i].xlator) {
- gf_proc_dump_build_key(key, prefix,
- "list[%d].xlator.type", i);
- gf_proc_dump_write(key, "%s",
- layout->list[i].xlator->type);
- gf_proc_dump_build_key(key, prefix,
- "list[%d].xlator.name", i);
- gf_proc_dump_write(key, "%s",
- layout->list[i].xlator->name);
- }
- }
-
-out:
- return;
-}
-
-
-int32_t
-dht_priv_dump (xlator_t *this)
-{
- char key_prefix[GF_DUMP_MAX_BUF_LEN];
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
- dht_conf_t *conf = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- conf = this->private;
-
- if (!conf)
- return -1;
-
- ret = TRY_LOCK(&conf->subvolume_lock);
-
- if (ret != 0) {
- gf_log("", GF_LOG_WARNING, "Unable to lock dht subvolume %s",
- this->name);
- return ret;
- }
-
- gf_proc_dump_add_section("xlator.cluster.dht.%s.priv", this->name);
- gf_proc_dump_build_key(key_prefix,"xlator.cluster.dht","%s.priv",
- this->name);
- gf_proc_dump_write("subvol_cnt","%d", conf->subvolume_cnt);
- for (i = 0; i < conf->subvolume_cnt; i++) {
- sprintf (key, "subvolumes[%d]", i);
- gf_proc_dump_write(key, "%s.%s", conf->subvolumes[i]->type,
- conf->subvolumes[i]->name);
- if (conf->file_layouts && conf->file_layouts[i]){
- sprintf (key, "file_layouts[%d]", i);
- dht_layout_dump(conf->file_layouts[i], key);
- }
- if (conf->dir_layouts && conf->dir_layouts[i]) {
- sprintf (key, "dir_layouts[%d]", i);
- dht_layout_dump(conf->dir_layouts[i], key);
- }
- if (conf->subvolume_status) {
-
- sprintf (key, "subvolume_status[%d]", i);
- gf_proc_dump_write(key, "%d",
- (int)conf->subvolume_status[i]);
- }
-
- }
-
- gf_proc_dump_write("search_unhashed", "%d", conf->search_unhashed);
- gf_proc_dump_write("gen", "%d", conf->gen);
- gf_proc_dump_write("min_free_disk", "%lu", conf->min_free_disk);
- gf_proc_dump_write("min_free_inodes", "%lu", conf->min_free_inodes);
- gf_proc_dump_write("disk_unit", "%c", conf->disk_unit);
- gf_proc_dump_write("refresh_interval", "%d", conf->refresh_interval);
- gf_proc_dump_write("unhashed_sticky_bit", "%d", conf->unhashed_sticky_bit);
- if (conf ->du_stats) {
- gf_proc_dump_write("du_stats.avail_percent", "%lf",
- conf->du_stats->avail_percent);
- gf_proc_dump_write("du_stats.avail_space", "%lu",
- conf->du_stats->avail_space);
- gf_proc_dump_write("du_stats.avail_inodes", "%lf",
- conf->du_stats->avail_inodes);
- gf_proc_dump_write("du_stats.log", "%lu", conf->du_stats->log);
- }
-
- if (conf->last_stat_fetch.tv_sec)
- gf_proc_dump_write("last_stat_fetch", "%s",
- ctime(&conf->last_stat_fetch.tv_sec));
-
- UNLOCK(&conf->subvolume_lock);
-
-out:
- return ret;
-}
-
-int32_t
-dht_inodectx_dump (xlator_t *this, inode_t *inode)
-{
- int ret = -1;
- dht_layout_t *layout = NULL;
- uint64_t tmp_layout = 0;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", inode, out);
-
- ret = inode_ctx_get (inode, this, &tmp_layout);
-
- if (ret != 0)
- return ret;
-
- layout = (dht_layout_t *)(long)tmp_layout;
-
- if (!layout)
- return -1;
-
- gf_proc_dump_add_section("xlator.cluster.dht.%s.inode", this->name);
- dht_layout_dump(layout, "layout");
-
-out:
- return ret;
-}
-
-int
-notify (xlator_t *this, int event, void *data, ...)
-{
- int ret = -1;
- va_list ap;
- dict_t *output = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
-
- if (!data)
- goto out;
-
- va_start (ap, data);
- output = va_arg (ap, dict_t*);
-
- ret = dht_notify (this, event, data, output);
-
-out:
- return ret;
-}
-
-void
-fini (xlator_t *this)
-{
- int i = 0;
- dht_conf_t *conf = NULL;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- conf = this->private;
- this->private = NULL;
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- GF_FREE (conf);
- }
-out:
- return;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
-
- ret = xlator_mem_acct_init (this, gf_dht_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-out:
- return ret;
-}
-
-
-int
-dht_parse_decommissioned_bricks (xlator_t *this, dht_conf_t *conf,
- const char *bricks)
-{
- int i = 0;
- int ret = -1;
- char *tmpstr = NULL;
- char *dup_brick = NULL;
- char *node = NULL;
-
- if (!conf || !bricks)
- goto out;
-
- dup_brick = gf_strdup (bricks);
- node = strtok_r (dup_brick, ",", &tmpstr);
- while (node) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- if (!strcmp (conf->subvolumes[i]->name, node)) {
- conf->decommissioned_bricks[i] =
- conf->subvolumes[i];
- gf_log (this->name, GF_LOG_INFO,
- "decommissioning subvolume %s",
- conf->subvolumes[i]->name);
- break;
- }
- }
- if (i == conf->subvolume_cnt) {
- /* Wrong node given. */
- goto out;
- }
- node = strtok_r (NULL, ",", &tmpstr);
- }
-
- ret = 0;
- conf->decommission_in_progress = 1;
-out:
- if (dup_brick)
- GF_FREE (dup_brick);
-
- return ret;
-}
-
-int
-reconfigure (xlator_t *this, dict_t *options)
-{
- dht_conf_t *conf = NULL;
- char *temp_str = NULL;
- gf_boolean_t search_unhashed;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("dht", this, out);
- GF_VALIDATE_OR_GOTO ("dht", options, out);
-
- conf = this->private;
- if (!conf)
- return 0;
-
- if (dict_get_str (options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean*/
- if (strcasecmp (temp_str, "auto")) {
- if (!gf_string2boolean (temp_str, &search_unhashed)) {
- gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
- " lookup-unhashed reconfigured (%s)",
- temp_str);
- conf->search_unhashed = search_unhashed;
- } else {
- gf_log(this->name, GF_LOG_ERROR, "Reconfigure:"
- " lookup-unhashed should be boolean,"
- " not (%s), defaulting to (%d)",
- temp_str, conf->search_unhashed);
- //return -1;
- ret = -1;
- goto out;
- }
- } else {
- gf_log(this->name, GF_LOG_DEBUG, "Reconfigure:"
- " lookup-unhashed reconfigured auto ");
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
- }
-
- GF_OPTION_RECONF ("min-free-disk", conf->min_free_disk, options,
- percent_or_size, out);
- GF_OPTION_RECONF ("min-free-inodes", conf->min_free_inodes, options,
- percent, out);
- GF_OPTION_RECONF ("directory-layout-spread", conf->dir_spread_cnt,
- options, uint32, out);
-
- if (conf->defrag) {
- GF_OPTION_RECONF ("rebalance-stats", conf->defrag->stats,
- options, bool, out);
- }
-
- if (dict_get_str (options, "decommissioned-bricks", &temp_str) == 0) {
- ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
- if (ret == -1)
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-
-int
-init (xlator_t *this)
-{
- dht_conf_t *conf = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
- gf_defrag_info_t *defrag = NULL;
- int cmd = 0;
- char *node_uuid = NULL;
-
-
- GF_VALIDATE_OR_GOTO ("dht", this, err);
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Distribute needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf), gf_dht_mt_dht_conf_t);
- if (!conf) {
- goto err;
- }
-
- ret = dict_get_int32 (this->options, "rebalance-cmd", &cmd);
-
- if (cmd) {
- defrag = GF_CALLOC (1, sizeof (gf_defrag_info_t),
- gf_defrag_info_mt);
-
- GF_VALIDATE_OR_GOTO (this->name, defrag, err);
-
- LOCK_INIT (&defrag->lock);
-
- defrag->is_exiting = 0;
-
- conf->defrag = defrag;
-
- ret = dict_get_str (this->options, "node-uuid", &node_uuid);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "node-uuid not "
- "specified");
- goto err;
- }
-
- if (uuid_parse (node_uuid, defrag->node_uuid)) {
- gf_log (this->name, GF_LOG_ERROR, "Cannot parse "
- "glusterd node uuid");
- goto err;
- }
-
- defrag->cmd = cmd;
-
- defrag->stats = _gf_false;
- }
-
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto"))
- gf_string2boolean (temp_str, &conf->search_unhashed);
- else
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
-
- GF_OPTION_INIT ("unhashed-sticky-bit", conf->unhashed_sticky_bit, bool,
- err);
-
- GF_OPTION_INIT ("use-readdirp", conf->use_readdirp, bool, err);
-
- GF_OPTION_INIT ("min-free-disk", conf->min_free_disk, percent_or_size,
- err);
-
- GF_OPTION_INIT ("min-free-inodes", conf->min_free_inodes, percent,
- err);
-
- conf->dir_spread_cnt = conf->subvolume_cnt;
- GF_OPTION_INIT ("directory-layout-spread", conf->dir_spread_cnt,
- uint32, err);
-
- GF_OPTION_INIT ("assert-no-child-down", conf->assert_no_child_down,
- bool, err);
-
- if (defrag) {
- GF_OPTION_INIT ("rebalance-stats", defrag->stats, bool, err);
- }
-
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- if (dict_get_str (this->options, "decommissioned-bricks", &temp_str) == 0) {
- ret = dht_parse_decommissioned_bricks (this, conf, temp_str);
- if (ret == -1)
- goto err;
- }
-
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
-
- conf->gen = 1;
-
- this->local_pool = mem_pool_new (dht_local_t, 512);
- if (!this->local_pool) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto err;
- }
-
- this->private = conf;
-
- return 0;
-
-err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- GF_FREE (conf->du_stats);
-
- if (conf->defrag)
- GF_FREE (conf->defrag);
-
- GF_FREE (conf);
- }
-
- return -1;
-}
-
+struct xlator_fops dht_pt_fops = {
+ /* we need to keep mkdir to make sure we
+ have layout on new directory */
+ .mkdir = dht_pt_mkdir,
+ .getxattr = dht_pt_getxattr,
+ .fgetxattr = dht_pt_fgetxattr,
+
+ /* required to trace fop properly in changelog */
+ .rename = dht_pt_rename,
+
+ /* FIXME: commenting the '.lookup()' below made some of
+ the failing tests to pass. I would remove the below
+ line, but keeping it here as a reminder for people
+ to check for issues if they find concerns with DHT
+ pass-through logic */
+ /*
+ .lookup = dht_lookup,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ */
+ /* Keeping above as commented, mainly to support the
+ usecase of a gluster volume getting to 1x(anytype),
+ due to remove-brick (shrinking) exercise. In that case,
+ we would need above fops to be available, so we can
+ handle the case of dangling linkto files (if any) */
+};
struct xlator_fops fops = {
- .lookup = dht_lookup,
- .mknod = dht_mknod,
- .create = dht_create,
-
- .open = dht_open,
- .statfs = dht_statfs,
- .opendir = dht_opendir,
- .readdir = dht_readdir,
- .readdirp = dht_readdirp,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = dht_unlink,
- .link = dht_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
-
- /* Inode read operations */
- .stat = dht_stat,
- .fstat = dht_fstat,
- .access = dht_access,
- .readlink = dht_readlink,
- .getxattr = dht_getxattr,
- .fgetxattr = dht_fgetxattr,
- .readv = dht_readv,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .lk = dht_lk,
-
- /* Inode write operations */
- .fremovexattr = dht_fremovexattr,
- .removexattr = dht_removexattr,
- .setxattr = dht_setxattr,
- .fsetxattr = dht_fsetxattr,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .writev = dht_writev,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
- .setattr = dht_setattr,
- .fsetattr = dht_fsetattr,
+ .ipc = dht_ipc,
+ .lookup = dht_lookup,
+ .mknod = dht_mknod,
+ .create = dht_create,
+
+ .open = dht_open,
+ .statfs = dht_statfs,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+
+ /* Inode read operations */
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .getxattr = dht_getxattr,
+ .fgetxattr = dht_fgetxattr,
+ .readv = dht_readv,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .lk = dht_lk,
+ .lease = dht_lease,
+
+ /* Inode write operations */
+ .fremovexattr = dht_fremovexattr,
+ .removexattr = dht_removexattr,
+ .setxattr = dht_setxattr,
+ .fsetxattr = dht_fsetxattr,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .writev = dht_writev,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+ .setattr = dht_setattr,
+ .fsetattr = dht_fsetattr,
+ .fallocate = dht_fallocate,
+ .discard = dht_discard,
+ .zerofill = dht_zerofill,
};
struct xlator_dumpops dumpops = {
- .priv = dht_priv_dump,
- .inodectx = dht_inodectx_dump,
+ .priv = dht_priv_dump,
+ .inodectx = dht_inodectx_dump,
};
-
struct xlator_cbks cbks = {
-// .release = dht_release,
-// .releasedir = dht_releasedir,
- .forget = dht_forget
+ .release = dht_release,
+ // .releasedir = dht_releasedir,
+ .forget = dht_forget,
};
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "on",
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- .default_value = "10%",
- .description = "Percentage/Size of disk space that must be "
- "kept free."
- },
- { .key = {"min-free-inodes"},
- .type = GF_OPTION_TYPE_PERCENT,
- .default_value = "5%",
- .description = "Percentage inodes that must be "
- "kept free."
- },
- { .key = {"unhashed-sticky-bit"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"use-readdirp"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "on",
- },
- { .key = {"assert-no-child-down"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
- { .key = {"directory-layout-spread"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"decommissioned-bricks"},
- .type = GF_OPTION_TYPE_ANY,
- },
- { .key = {"rebalance-cmd"},
- .type = GF_OPTION_TYPE_INT,
- },
- { .key = {"node-uuid"},
- .type = GF_OPTION_TYPE_STR,
- },
- { .key = {"rebalance-stats"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- },
-
- { .key = {NULL} },
+extern int32_t
+mem_acct_init(xlator_t *this);
+
+extern struct volume_options dht_options[];
+
+xlator_api_t xlator_api = {
+ .init = dht_init,
+ .fini = dht_fini,
+ .notify = dht_notify,
+ .reconfigure = dht_reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = dht_options,
+ .identifier = "distribute",
+ .pass_through_fops = &dht_pt_fops,
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/cluster/dht/src/nufa.c b/xlators/cluster/dht/src/nufa.c
index 2179870d9ad..3648a564840 100644
--- a/xlators/cluster/dht/src/nufa.c
+++ b/xlators/cluster/dht/src/nufa.c
@@ -8,700 +8,650 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "dht-common.h"
/* TODO: all 'TODO's in dht.c holds good */
+extern struct volume_options dht_options[];
+
int
-nufa_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
+nufa_local_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- xlator_t *subvol = NULL;
- char is_linkfile = 0;
- char is_dir = 0;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- loc_t *loc = NULL;
- int i = 0;
- call_frame_t *prev = NULL;
- int call_cnt = 0;
- int ret = 0;
-
- conf = this->private;
-
- prev = cookie;
- local = frame->local;
- loc = &local->loc;
-
- if (ENTRY_MISSING (op_ret, op_errno)) {
- if (conf->search_unhashed) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
+ xlator_t *subvol = NULL;
+ char is_linkfile = 0;
+ char is_dir = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ int i = 0;
+ xlator_t *prev = NULL;
+ int call_cnt = 0;
+ int ret = 0;
+
+ conf = this->private;
+
+ prev = cookie;
+ local = frame->local;
+ loc = &local->loc;
+
+ if (ENTRY_MISSING(op_ret, op_errno)) {
+ if (conf->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
+ }
+ }
+
+ if (op_ret == -1)
+ goto out;
+
+ is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
+ is_dir = check_is_dir(inode, stbuf, xattr);
+
+ if (!is_dir && !is_linkfile) {
+ /* non-directory and not a linkfile */
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0,
+ "could not set pre-set layout for subvol"
+ " %s",
+ prev->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
}
- if (op_ret == -1)
- goto out;
-
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
- is_dir = check_is_dir (inode, stbuf, xattr);
-
- if (!is_dir && !is_linkfile) {
- /* non-directory and not a linkfile */
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set pre-set layout for subvol %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto err;
- }
-
- goto out;
- }
+ goto out;
+ }
- if (is_dir) {
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
-
- local->inode = inode_ref (inode);
- local->xattr = dict_ref (xattr);
-
- local->op_ret = 0;
- local->op_errno = 0;
-
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto err;
- }
-
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
- }
+ if (is_dir) {
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
- if (is_linkfile) {
- subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
+ local->inode = inode_ref(inode);
+ local->xattr = dict_ref(xattr);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "linkfile not having link subvolume. path=%s",
- loc->path);
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
+ local->op_ret = 0;
+ local->op_errno = 0;
- STACK_WIND (frame, dht_lookup_linkfile_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto err;
}
- return 0;
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
+ }
+ }
-out:
- if (!local->hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- local->loc.path);
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
+ if (is_linkfile) {
+ subvol = dht_linkfile_subvol(this, inode, stbuf, xattr);
+
+ if (!subvol) {
+ gf_msg_debug(this->name, 0,
+ "linkfile has no link subvolume. path=%s", loc->path);
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
}
- STACK_WIND (frame, dht_lookup_cbk,
- local->hashed_subvol, local->hashed_subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ STACK_WIND_COOKIE(frame, dht_lookup_linkfile_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->loc, local->xattr_req);
+ }
+
+ return 0;
+out:
+ if (!local->hashed_subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ local->loc.path);
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere(frame, this, loc);
return 0;
+ }
+
+ STACK_WIND_COOKIE(frame, dht_lookup_cbk, local->hashed_subvol,
+ local->hashed_subvol, local->hashed_subvol->fops->lookup,
+ &local->loc, local->xattr_req);
+
+ return 0;
err:
- DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno,
- inode, stbuf, xattr, postparent);
- return 0;
+ DHT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+ postparent);
+ return 0;
}
int
-nufa_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+nufa_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
{
- xlator_t *hashed_subvol = NULL;
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int op_errno = -1;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- conf = this->private;
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_LOOKUP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (xattr_req) {
+ local->xattr_req = dict_ref(xattr_req);
+ } else {
+ local->xattr_req = dict_new();
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, &local->loc);
+
+ local->hashed_subvol = hashed_subvol;
+
+ if (is_revalidate(loc)) {
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0,
+ "revalidate lookup without cache. "
+ "path=%s",
+ loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_msg_debug(this->name, 0, "incomplete layout failure for path=%s",
+ loc->path);
+ dht_layout_unref(this, local->layout);
+ goto do_fresh_lookup;
+ }
+
+ local->inode = inode_ref(loc->inode);
+
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
+
+ /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
+ * revalidates directly go to the cached-subvolume.
+ */
+ ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
}
- if (xattr_req) {
- local->xattr_req = dict_ref (xattr_req);
- } else {
- local->xattr_req = dict_new ();
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND_COOKIE(frame, dht_revalidate_cbk, subvol, subvol,
+ subvol->fops->lookup, loc, local->xattr_req);
+
+ if (!--call_cnt)
+ break;
+ }
+ } else {
+ do_fresh_lookup:
+ ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
}
- hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
-
- local->hashed_subvol = hashed_subvol;
-
- if (is_revalidate (loc)) {
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "revalidate without cache. path=%s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- if (layout->gen && (layout->gen < conf->gen)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "incomplete layout failure for path=%s",
- loc->path);
- dht_layout_unref (this, local->layout);
- goto do_fresh_lookup;
- }
-
- local->inode = inode_ref (loc->inode);
-
- local->call_cnt = layout->cnt;
- call_cnt = local->call_cnt;
-
- /* NOTE: we don't require 'trusted.glusterfs.dht.linkto' attribute,
- * revalidates directly go to the cached-subvolume.
- */
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to set dict value.");
- op_errno = -1;
- goto err;
- }
-
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
-
- STACK_WIND (frame, dht_revalidate_cbk,
- subvol, subvol->fops->lookup,
- loc, local->xattr_req);
-
- if (!--call_cnt)
- break;
- }
- } else {
- do_fresh_lookup:
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to set dict value.");
- op_errno = -1;
- goto err;
- }
-
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to set dict value.");
- op_errno = -1;
- goto err;
- }
-
- /* Send it to only local volume */
- STACK_WIND (frame, nufa_local_lookup_cbk,
- (xlator_t *)conf->private,
- ((xlator_t *)conf->private)->fops->lookup,
- loc, local->xattr_req);
+ ret = dict_set_uint32(local->xattr_req, conf->link_xattr_name, 256);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_DICT_SET_FAILED,
+ "Failed to set dict value.");
+ op_errno = -1;
+ goto err;
}
- return 0;
+ /* Send it to only local volume */
+ STACK_WIND_COOKIE(
+ frame, nufa_local_lookup_cbk, ((xlator_t *)conf->private),
+ ((xlator_t *)conf->private),
+ ((xlator_t *)conf->private)->fops->lookup, loc, local->xattr_req);
+ }
+
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
- return 0;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
int
-nufa_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+nufa_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret == -1)
- goto err;
+ if (op_ret == -1)
+ goto err;
- STACK_WIND (frame, dht_create_cbk,
- local->cached_subvol, local->cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->umask,
- local->fd, local->params);
+ STACK_WIND_COOKIE(frame, dht_create_cbk, local->cached_subvol,
+ local->cached_subvol, local->cached_subvol->fops->create,
+ &local->loc, local->flags, local->mode, local->umask,
+ local->fd, local->params);
- return 0;
+ return 0;
err:
- DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
int
-nufa_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *params)
+nufa_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
- xlator_t *avail_subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *avail_subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ avail_subvol = conf->private;
+ if (dht_is_subvol_filled(this, (xlator_t *)conf->private)) {
+ avail_subvol = dht_free_disk_available_subvol(
+ this, (xlator_t *)conf->private, local);
+ }
+
+ if (subvol != avail_subvol) {
+ /* create a link file instead of actual file */
+ local->params = dict_ref(params);
+ local->mode = mode;
+ local->flags = flags;
+ local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ dht_linkfile_create(frame, nufa_create_linkfile_create_cbk, this,
+ avail_subvol, subvol, loc);
+ return 0;
+ }
- conf = this->private;
+ gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name);
- dht_get_du_info (frame, this, loc);
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, loc, flags, mode, umask, fd,
+ params);
- local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
+ return 0;
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
- avail_subvol = conf->private;
- if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
- avail_subvol =
- dht_free_disk_available_subvol (this,
- (xlator_t *)conf->private);
- }
+ return 0;
+}
- if (subvol != avail_subvol) {
- /* create a link file instead of actual file */
- local->params = dict_ref (params);
- local->mode = mode;
- local->flags = flags;
- local->umask = umask;
- local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame,
- nufa_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
- return 0;
- }
+int
+nufa_mknod_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ dht_local_t *local = NULL;
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto err;
+ }
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, umask, fd, params);
+ if (op_ret >= 0) {
+ STACK_WIND_COOKIE(
+ frame, dht_newfile_cbk, (void *)local->cached_subvol,
+ local->cached_subvol, local->cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev, local->umask, local->params);
return 0;
-
+ }
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL, NULL);
+ WIPE(postparent);
+ WIPE(preparent);
- return 0;
+ DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, xdata);
+ return 0;
}
int
-nufa_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+nufa_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *avail_subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_MKNOD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ /* Consider the disksize in consideration */
+ avail_subvol = conf->private;
+ if (dht_is_subvol_filled(this, (xlator_t *)conf->private)) {
+ avail_subvol = dht_free_disk_available_subvol(
+ this, (xlator_t *)conf->private, local);
+ }
+
+ if (avail_subvol != subvol) {
+ /* Create linkfile first */
+
+ local->params = dict_ref(params);
+ local->mode = mode;
+ local->umask = umask;
+ local->rdev = rdev;
+ local->cached_subvol = avail_subvol;
+
+ dht_linkfile_create(frame, nufa_mknod_linkfile_cbk, this, avail_subvol,
+ subvol, loc);
+ return 0;
+ }
- local = frame->local;
+ gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name);
- if (op_ret >= 0) {
- STACK_WIND (frame, dht_newfile_cbk,
- local->cached_subvol,
- local->cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev,
- local->umask, local->params);
+ STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask, params);
- return 0;
- }
+ return 0;
- WIPE (postparent);
- WIPE (preparent);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
- inode, stbuf, preparent, postparent, xdata);
- return 0;
+ return 0;
}
-
-int
-nufa_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *params)
+gf_boolean_t
+same_first_part(char *str1, char term1, char *str2, char term2)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
- xlator_t *avail_subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
+ gf_boolean_t ended1;
+ gf_boolean_t ended2;
- conf = this->private;
-
- dht_get_du_info (frame, this, loc);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
+ for (;;) {
+ ended1 = ((*str1 == '\0') || (*str1 == term1));
+ ended2 = ((*str2 == '\0') || (*str2 == term2));
+ if (ended1 && ended2) {
+ return _gf_true;
}
-
- /* Consider the disksize in consideration */
- avail_subvol = conf->private;
- if (dht_is_subvol_filled (this, (xlator_t *)conf->private)) {
- avail_subvol =
- dht_free_disk_available_subvol (this,
- (xlator_t *)conf->private);
- }
-
- if (avail_subvol != subvol) {
- /* Create linkfile first */
-
- local->params = dict_ref (params);
- local->mode = mode;
- local->umask = umask;
- local->rdev = rdev;
- local->cached_subvol = avail_subvol;
-
- dht_linkfile_create (frame, nufa_mknod_linkfile_cbk,
- avail_subvol, subvol, loc);
- return 0;
+ if (ended1 || ended2 || (*str1 != *str2)) {
+ return _gf_false;
}
+ ++str1;
+ ++str2;
+ }
+}
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+typedef struct nufa_args {
+ xlator_t *this;
+ char *volname;
+ gf_boolean_t addr_match;
+} nufa_args_t;
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, umask, params);
+static void
+nufa_find_local_brick(xlator_t *xl, void *data)
+{
+ nufa_args_t *args = data;
+ xlator_t *this = args->this;
+ char *local_volname = args->volname;
+ gf_boolean_t addr_match = args->addr_match;
+ char *brick_host = NULL;
+ dht_conf_t *conf = this->private;
+ int ret = -1;
+
+ /*This means a local subvol was already found. We pick the first brick
+ * that is local*/
+ if (conf->private)
+ return;
- return 0;
+ if (strcmp(xl->name, local_volname) == 0) {
+ conf->private = xl;
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "Using specified subvol %s", local_volname);
+ return;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
+ if (!addr_match)
+ return;
- return 0;
+ ret = dict_get_str(xl->options, "remote-host", &brick_host);
+ if ((ret == 0) && (gf_is_same_address(local_volname, brick_host) ||
+ gf_is_local_addr(brick_host))) {
+ conf->private = xl;
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "Using the first local "
+ "subvol %s",
+ xl->name);
+ return;
+ }
}
-
-int
-notify (xlator_t *this, int event, void *data, ...)
+static void
+nufa_to_dht(xlator_t *this)
{
- int ret = -1;
+ GF_ASSERT(this);
+ GF_ASSERT(this->fops);
- ret = dht_notify (this, event, data);
-
- return ret;
+ this->fops->lookup = dht_lookup;
+ this->fops->create = dht_create;
+ this->fops->mknod = dht_mknod;
}
-void
-fini (xlator_t *this)
+int
+nufa_find_local_subvol(xlator_t *this, void (*fn)(xlator_t *each, void *data),
+ void *data)
{
- int i = 0;
- dht_conf_t *conf = NULL;
-
- conf = this->private;
-
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- GF_FREE (conf);
- }
-
- return;
+ int ret = -1;
+ dht_conf_t *conf = this->private;
+ xlator_list_t *trav = NULL;
+ xlator_t *parent = NULL;
+ xlator_t *candidate = NULL;
+
+ xlator_foreach_depth_first(this, fn, data);
+ if (!conf->private) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_BRICK_ERROR,
+ "Couldn't find a local "
+ "brick");
+ return -1;
+ }
+
+ candidate = conf->private;
+ trav = candidate->parents;
+ while (trav) {
+ parent = trav->xlator;
+ if (strcmp(parent->type, "cluster/nufa") == 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "Found local subvol, "
+ "%s",
+ candidate->name);
+ ret = 0;
+ conf->private = candidate;
+ break;
+ }
+
+ candidate = parent;
+ trav = parent->parents;
+ }
+
+ return ret;
}
int
-init (xlator_t *this)
+nufa_init(xlator_t *this)
{
- dht_conf_t *conf = NULL;
- xlator_list_t *trav = NULL;
- data_t *data = NULL;
- char *local_volname = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
- char my_hostname[256];
- uint32_t temp_free_disk = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "NUFA needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf),
- gf_dht_mt_dht_conf_t);
- if (!conf) {
- goto err;
- }
-
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto"))
- gf_string2boolean (temp_str, &conf->search_unhashed);
- else
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
-
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
- }
-
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
+ data_t *data = NULL;
+ char *local_volname = NULL;
+ int ret = -1;
+ char my_hostname[256];
+ gf_boolean_t addr_match = _gf_false;
+ nufa_args_t args = {
+ 0,
+ };
+
+ ret = dht_init(this);
+ if (ret) {
+ return ret;
+ }
- conf->gen = 1;
+ if ((data = dict_get(this->options, "local-volume-name"))) {
+ local_volname = data->data;
+ } else {
+ addr_match = _gf_true;
local_volname = "localhost";
- ret = gethostname (my_hostname, 256);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "could not find hostname (%s)",
- strerror (errno));
- }
-
+ ret = gethostname(my_hostname, 256);
if (ret == 0)
- local_volname = my_hostname;
-
- data = dict_get (this->options, "local-volume-name");
- if (data) {
- local_volname = data->data;
- }
-
- trav = this->children;
- while (trav) {
- if (strcmp (trav->xlator->name, local_volname) == 0)
- break;
- trav = trav->next;
- }
-
- if (!trav) {
- gf_log (this->name, GF_LOG_ERROR,
- "Could not find subvolume named '%s'. "
- "Please define volume with the name as the hostname "
- "or override it with 'option local-volume-name'",
- local_volname);
- goto err;
- }
- /* The volume specified exists */
- conf->private = trav->xlator;
-
- conf->min_free_disk = 10;
- conf->disk_unit = 'p';
-
- if (dict_get_str (this->options, "min-free-disk",
- &temp_str) == 0) {
- if (gf_string2percent (temp_str,
- &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- conf->disk_unit = 'p';
- }
- } else {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- }
- }
-
- conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
- gf_dht_mt_dht_du_t);
- if (!conf->du_stats) {
- goto err;
- }
-
- this->local_pool = mem_pool_new (dht_local_t, 128);
- if (!this->local_pool) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto err;
- }
-
- this->private = conf;
-
- return 0;
-
-err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- GF_FREE (conf->du_stats);
-
- GF_FREE (conf);
- }
-
- return -1;
+ local_volname = my_hostname;
+
+ else
+ gf_msg(this->name, GF_LOG_WARNING, errno,
+ DHT_MSG_GET_HOSTNAME_FAILED, "could not find hostname");
+ }
+
+ args.this = this;
+ args.volname = local_volname;
+ args.addr_match = addr_match;
+ ret = nufa_find_local_subvol(this, nufa_find_local_brick, &args);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, DHT_MSG_SUBVOL_INFO,
+ "Unable to find local subvolume, switching "
+ "to dht mode");
+ nufa_to_dht(this);
+ }
+ return 0;
}
-
-struct xlator_fops fops = {
- .lookup = nufa_lookup,
- .create = nufa_create,
- .mknod = nufa_mknod,
-
- .stat = dht_stat,
- .fstat = dht_fstat,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .access = dht_access,
- .readlink = dht_readlink,
- .setxattr = dht_setxattr,
- .getxattr = dht_getxattr,
- .removexattr = dht_removexattr,
- .open = dht_open,
- .readv = dht_readv,
- .writev = dht_writev,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .statfs = dht_statfs,
- .lk = dht_lk,
- .opendir = dht_opendir,
- .readdir = dht_readdir,
- .readdirp = dht_readdirp,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = dht_unlink,
- .link = dht_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
- .setattr = dht_setattr,
+dht_methods_t dht_methods = {
+ .migration_get_dst_subvol = dht_migration_get_dst_subvol,
+ .layout_search = dht_layout_search,
};
-
-struct xlator_cbks cbks = {
- .forget = dht_forget
+struct xlator_fops fops = {
+ .lookup = nufa_lookup,
+ .create = nufa_create,
+ .mknod = nufa_mknod,
+
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .setxattr = dht_setxattr,
+ .getxattr = dht_getxattr,
+ .removexattr = dht_removexattr,
+ .open = dht_open,
+ .readv = dht_readv,
+ .writev = dht_writev,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .statfs = dht_statfs,
+ .lk = dht_lk,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+ .setattr = dht_setattr,
};
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"local-volume-name"},
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- },
- { .key = {NULL} },
+struct xlator_cbks cbks = {.forget = dht_forget};
+extern int32_t
+mem_acct_init(xlator_t *this);
+
+xlator_api_t xlator_api = {
+ .init = nufa_init,
+ .fini = dht_fini,
+ .notify = dht_notify,
+ .reconfigure = dht_reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = dht_options,
+ .identifier = "nufa",
+ .category = GF_TECH_PREVIEW,
};
diff --git a/xlators/cluster/dht/src/switch.c b/xlators/cluster/dht/src/switch.c
index ab261da870f..207d109a025 100644
--- a/xlators/cluster/dht/src/switch.c
+++ b/xlators/cluster/dht/src/switch.c
@@ -8,12 +8,6 @@
cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "dht-common.h"
#include "dht-mem-types.h"
@@ -22,1011 +16,876 @@
#include <fnmatch.h>
#include <string.h>
+extern struct volume_options dht_options[];
+
struct switch_sched_array {
- xlator_t *xl;
- int32_t eligible;
- int32_t considered;
+ xlator_t *xl;
+ int32_t eligible;
+ int32_t considered;
};
/* Select one of this struct based on the path's pattern match */
struct switch_struct {
- struct switch_struct *next;
- struct switch_sched_array *array;
- int32_t node_index; /* Index of the node in
- this pattern. */
- int32_t num_child; /* Total num of child nodes
- with this pattern. */
- char path_pattern[256];
+ struct switch_struct *next;
+ struct switch_sched_array *array;
+ int32_t node_index; /* Index of the node in
+ this pattern. */
+ int32_t num_child; /* Total num of child nodes
+ with this pattern. */
+ char path_pattern[256];
};
/* TODO: all 'TODO's in dht.c holds good */
/* This function should return child node as '*:subvolumes' is inserterd */
static int32_t
-gf_switch_valid_child (xlator_t *this, const char *child)
+gf_switch_valid_child(xlator_t *this, const char *child)
{
- xlator_list_t *children = NULL;
- int32_t ret = 0;
+ xlator_list_t *children = NULL;
+ int32_t ret = 0;
- children = this->children;
- while (children) {
- if (!strcmp (child, children->xlator->name)) {
- ret = 1;
- break;
- }
- children = children->next;
+ children = this->children;
+ while (children) {
+ if (!strcmp(child, children->xlator->name)) {
+ ret = 1;
+ break;
}
+ children = children->next;
+ }
- return ret;
+ return ret;
}
static xlator_t *
-get_switch_matching_subvol (const char *path, dht_conf_t *conf,
- xlator_t *hashed_subvol)
+get_switch_matching_subvol(const char *path, dht_conf_t *conf,
+ xlator_t *hashed_subvol)
{
- struct switch_struct *cond = NULL;
- struct switch_struct *trav = NULL;
- char *pathname = NULL;
- int idx = 0;
- xlator_t *subvol = NULL;
-
- cond = conf->private;
- subvol = hashed_subvol;
- if (!cond)
- goto out;
-
- pathname = gf_strdup (path);
- if (!pathname)
- goto out;
-
- trav = cond;
- while (trav) {
- if (fnmatch (trav->path_pattern,
- pathname, FNM_NOESCAPE) == 0) {
- for (idx = 0; idx < trav->num_child; idx++) {
- if (trav->array[idx].xl == hashed_subvol)
- goto out;
- }
- idx = trav->node_index++;
- trav->node_index %= trav->num_child;
- subvol = trav->array[idx].xl;
- goto out;
- }
- trav = trav->next;
- }
+ struct switch_struct *cond = NULL;
+ struct switch_struct *trav = NULL;
+ char *pathname = NULL;
+ int idx = 0;
+ xlator_t *subvol = NULL;
+
+ cond = conf->private;
+ subvol = hashed_subvol;
+ if (!cond)
+ goto out;
+
+ pathname = gf_strdup(path);
+ if (!pathname)
+ goto out;
+
+ trav = cond;
+ while (trav) {
+ if (fnmatch(trav->path_pattern, pathname, FNM_NOESCAPE) == 0) {
+ for (idx = 0; idx < trav->num_child; idx++) {
+ if (trav->array[idx].xl == hashed_subvol)
+ goto out;
+ }
+ idx = trav->node_index++;
+ trav->node_index %= trav->num_child;
+ subvol = trav->array[idx].xl;
+ goto out;
+ }
+ trav = trav->next;
+ }
out:
- if (pathname)
- GF_FREE (pathname);
+ GF_FREE(pathname);
- return subvol;
+ return subvol;
}
-
int
-switch_local_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf, dict_t *xattr,
- struct iatt *postparent)
+switch_local_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
{
- xlator_t *subvol = NULL;
- char is_linkfile = 0;
- char is_dir = 0;
- dht_conf_t *conf = NULL;
- dht_local_t *local = NULL;
- loc_t *loc = NULL;
- int i = 0;
- call_frame_t *prev = NULL;
- int call_cnt = 0;
- int ret = 0;
-
- conf = this->private;
-
- prev = cookie;
- local = frame->local;
- loc = &local->loc;
-
- if (ENTRY_MISSING (op_ret, op_errno)) {
- if (conf->search_unhashed) {
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
- }
+ xlator_t *subvol = NULL;
+ char is_linkfile = 0;
+ char is_dir = 0;
+ dht_conf_t *conf = NULL;
+ dht_local_t *local = NULL;
+ loc_t *loc = NULL;
+ int i = 0;
+ xlator_t *prev = NULL;
+ int call_cnt = 0;
+ int ret = 0;
- if (op_ret == -1)
- goto out;
+ conf = this->private;
- is_linkfile = check_is_linkfile (inode, stbuf, xattr);
- is_dir = check_is_dir (inode, stbuf, xattr);
+ prev = cookie;
+ local = frame->local;
+ loc = &local->loc;
- if (!is_dir && !is_linkfile) {
- /* non-directory and not a linkfile */
+ if (ENTRY_MISSING(op_ret, op_errno)) {
+ if (conf->search_unhashed) {
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
+ }
+ }
- ret = dht_layout_preset (this, prev->this, inode);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not set pre-set layout for subvol %s",
- prev->this->name);
- op_ret = -1;
- op_errno = EINVAL;
- goto err;
- }
+ if (op_ret == -1)
+ goto out;
- goto out;
+ is_linkfile = check_is_linkfile(inode, stbuf, xattr, conf->link_xattr_name);
+ is_dir = check_is_dir(inode, stbuf, xattr);
+
+ if (!is_dir && !is_linkfile) {
+ /* non-directory and not a linkfile */
+
+ ret = dht_layout_preset(this, prev, inode);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0,
+ "could not set pre-set layout "
+ "for subvol %s",
+ prev->name);
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
}
- if (is_dir) {
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
+ goto out;
+ }
- local->inode = inode_ref (inode);
- local->xattr = dict_ref (xattr);
+ if (is_dir) {
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
- local->op_ret = 0;
- local->op_errno = 0;
+ local->inode = inode_ref(inode);
+ local->xattr = dict_ref(xattr);
- local->layout = dht_layout_new (this, conf->subvolume_cnt);
- if (!local->layout) {
- op_ret = -1;
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_DEBUG,
- "memory allocation failed :(");
- goto err;
- }
+ local->op_ret = 0;
+ local->op_errno = 0;
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg_debug(this->name, 0, "memory allocation failed :(");
+ goto err;
}
- if (is_linkfile) {
- subvol = dht_linkfile_subvol (this, inode, stbuf, xattr);
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk, conf->subvolumes[i],
+ conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup, &local->loc,
+ local->xattr_req);
+ }
+ }
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "linkfile not having link subvolume. path=%s",
- loc->path);
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
+ if (is_linkfile) {
+ subvol = dht_linkfile_subvol(this, inode, stbuf, xattr);
- STACK_WIND (frame, dht_lookup_linkfile_cbk,
- subvol, subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0,
+ "linkfile has no link subvolume.path=%s", loc->path);
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
}
- return 0;
+ STACK_WIND_COOKIE(frame, dht_lookup_linkfile_cbk, subvol, subvol,
+ subvol->fops->lookup, &local->loc, local->xattr_req);
+ }
+
+ return 0;
out:
- if (!local->hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- local->loc.path);
- local->op_errno = ENOENT;
- dht_lookup_everywhere (frame, this, loc);
- return 0;
- }
+ if (!local->hashed_subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ local->loc.path);
+ local->op_errno = ENOENT;
+ dht_lookup_everywhere(frame, this, loc);
+ return 0;
+ }
- STACK_WIND (frame, dht_lookup_cbk,
- local->hashed_subvol, local->hashed_subvol->fops->lookup,
- &local->loc, local->xattr_req);
+ STACK_WIND_COOKIE(frame, dht_lookup_cbk, local->hashed_subvol,
+ local->hashed_subvol, local->hashed_subvol->fops->lookup,
+ &local->loc, local->xattr_req);
- return 0;
+ return 0;
err:
- DHT_STACK_UNWIND (lookup, frame, op_ret, op_errno,
- inode, stbuf, xattr, NULL);
- return 0;
+ DHT_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+ NULL);
+ return 0;
}
int
-switch_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+switch_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xattr_req)
{
- xlator_t *hashed_subvol = NULL;
- xlator_t *cached_subvol = NULL;
- xlator_t *subvol = NULL;
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- int ret = -1;
- int op_errno = -1;
- dht_layout_t *layout = NULL;
- int i = 0;
- int call_cnt = 0;
-
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- conf = this->private;
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_LOOKUP);
- if (!local) {
+ xlator_t *hashed_subvol = NULL;
+ xlator_t *cached_subvol = NULL;
+ xlator_t *subvol = NULL;
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = -1;
+ dht_layout_t *layout = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+ VALIDATE_OR_GOTO(loc->path, err);
+
+ conf = this->private;
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_LOOKUP);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (xattr_req) {
+ local->xattr_req = dict_ref(xattr_req);
+ } else {
+ local->xattr_req = dict_new();
+ }
+
+ hashed_subvol = dht_subvol_get_hashed(this, &local->loc);
+ cached_subvol = local->cached_subvol;
+
+ local->hashed_subvol = hashed_subvol;
+
+ if (is_revalidate(loc)) {
+ layout = local->layout;
+ if (!layout) {
+ gf_msg_debug(this->name, 0,
+ "revalidate lookup without cache. path=%s", loc->path);
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ if (layout->gen && (layout->gen < conf->gen)) {
+ gf_msg_debug(this->name, 0, "incomplete layout failure for path=%s",
+ loc->path);
+ dht_layout_unref(this, local->layout);
+ goto do_fresh_lookup;
+ }
+
+ local->inode = inode_ref(loc->inode);
+
+ local->call_cnt = layout->cnt;
+ call_cnt = local->call_cnt;
+
+ /* NOTE: we don't require 'trusted.glusterfs.dht.linkto'
+ * attribute, revalidates directly go to the cached-subvolume.
+ */
+ ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret < 0)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "failed to set dict value for %s", conf->xattr_name);
+
+ for (i = 0; i < layout->cnt; i++) {
+ subvol = layout->list[i].xlator;
+
+ STACK_WIND_COOKIE(frame, dht_revalidate_cbk, subvol, subvol,
+ subvol->fops->lookup, loc, local->xattr_req);
+
+ if (!--call_cnt)
+ break;
+ }
+ } else {
+ do_fresh_lookup:
+ ret = dict_set_uint32(local->xattr_req, conf->xattr_name, 4 * 4);
+ if (ret < 0)
+ gf_msg(this->name, GF_LOG_WARNING, 0, DHT_MSG_DICT_SET_FAILED,
+ "failed to set dict value for %s", conf->xattr_name);
+
+ ret = dict_set_uint32(local->xattr_req, conf->link_xattr_name, 256);
+ if (ret < 0)
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, DHT_MSG_DICT_SET_FAILED,
+ "failed to set dict value for %s", conf->link_xattr_name);
+
+ if (!hashed_subvol) {
+ gf_msg_debug(this->name, 0,
+ "no subvolume in layout for path=%s, "
+ "checking on all the subvols to see if "
+ "it is a directory",
+ loc->path);
+ call_cnt = conf->subvolume_cnt;
+ local->call_cnt = call_cnt;
+
+ local->layout = dht_layout_new(this, conf->subvolume_cnt);
+ if (!local->layout) {
op_errno = ENOMEM;
goto err;
- }
-
- if (xattr_req) {
- local->xattr_req = dict_ref (xattr_req);
+ }
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, dht_lookup_dir_cbk,
+ conf->subvolumes[i], conf->subvolumes[i],
+ conf->subvolumes[i]->fops->lookup,
+ &local->loc, local->xattr_req);
+ }
+ return 0;
+ }
+
+ /* */
+ cached_subvol = get_switch_matching_subvol(loc->path, conf,
+ hashed_subvol);
+ if (cached_subvol == hashed_subvol) {
+ STACK_WIND_COOKIE(frame, dht_lookup_cbk, hashed_subvol,
+ hashed_subvol, hashed_subvol->fops->lookup, loc,
+ local->xattr_req);
} else {
- local->xattr_req = dict_new ();
- }
-
- hashed_subvol = dht_subvol_get_hashed (this, &local->loc);
- cached_subvol = local->cached_subvol;
-
- local->hashed_subvol = hashed_subvol;
-
- if (is_revalidate (loc)) {
- layout = local->layout;
- if (!layout) {
- gf_log (this->name, GF_LOG_DEBUG,
- "revalidate without cache. path=%s",
- loc->path);
- op_errno = EINVAL;
- goto err;
- }
-
- if (layout->gen && (layout->gen < conf->gen)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "incomplete layout failure for path=%s",
- loc->path);
- dht_layout_unref (this, local->layout);
- goto do_fresh_lookup;
- }
-
- local->inode = inode_ref (loc->inode);
-
- local->call_cnt = layout->cnt;
- call_cnt = local->call_cnt;
-
- /* NOTE: we don't require 'trusted.glusterfs.dht.linkto'
- * attribute, revalidates directly go to the cached-subvolume.
- */
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set dict value for "
- "trusted.glusterfs.dht");
-
- for (i = 0; i < layout->cnt; i++) {
- subvol = layout->list[i].xlator;
-
- STACK_WIND (frame, dht_revalidate_cbk,
- subvol, subvol->fops->lookup,
- loc, local->xattr_req);
-
- if (!--call_cnt)
- break;
- }
- } else {
- do_fresh_lookup:
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht", 4 * 4);
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set dict value for "
- "trusted.glusterfs.dht");
-
- ret = dict_set_uint32 (local->xattr_req,
- "trusted.glusterfs.dht.linkto", 256);
- if (ret < 0)
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set dict value for "
- "trusted.glusterfs.dht.linkto");
-
- if (!hashed_subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s, "
- "checking on all the subvols to see if "
- "it is a directory", loc->path);
- call_cnt = conf->subvolume_cnt;
- local->call_cnt = call_cnt;
-
- local->layout = dht_layout_new (this,
- conf->subvolume_cnt);
- if (!local->layout) {
- op_errno = ENOMEM;
- goto err;
- }
-
- for (i = 0; i < call_cnt; i++) {
- STACK_WIND (frame, dht_lookup_dir_cbk,
- conf->subvolumes[i],
- conf->subvolumes[i]->fops->lookup,
- &local->loc, local->xattr_req);
- }
- return 0;
- }
-
- /* */
- cached_subvol = get_switch_matching_subvol (loc->path, conf,
- hashed_subvol);
- if (cached_subvol == hashed_subvol) {
- STACK_WIND (frame, dht_lookup_cbk,
- hashed_subvol,
- hashed_subvol->fops->lookup,
- loc, local->xattr_req);
- } else {
- STACK_WIND (frame, switch_local_lookup_cbk,
- cached_subvol,
- cached_subvol->fops->lookup,
- loc, local->xattr_req);
- }
+ STACK_WIND_COOKIE(frame, switch_local_lookup_cbk, cached_subvol,
+ cached_subvol, cached_subvol->fops->lookup, loc,
+ local->xattr_req);
}
+ }
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (lookup, frame, -1, op_errno,
- NULL, NULL, NULL, NULL);
- return 0;
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
}
int
-switch_create_linkfile_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf,
- struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+switch_create_linkfile_create_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
+ dht_local_t *local = NULL;
- local = frame->local;
+ local = frame->local;
- if (op_ret == -1)
- goto err;
+ if (op_ret == -1)
+ goto err;
- STACK_WIND (frame, dht_create_cbk,
- local->cached_subvol, local->cached_subvol->fops->create,
- &local->loc, local->flags, local->mode, local->umask,
- local->fd, local->params);
+ STACK_WIND_COOKIE(frame, dht_create_cbk, local->cached_subvol,
+ local->cached_subvol, local->cached_subvol->fops->create,
+ &local->loc, local->flags, local->mode, local->umask,
+ local->fd, local->params);
- return 0;
+ return 0;
err:
- DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
int
-switch_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *params)
+switch_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
- xlator_t *avail_subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
-
- conf = this->private;
-
- dht_get_du_info (frame, this, loc);
-
- local = dht_local_init (frame, loc, fd, GF_FOP_CREATE);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
-
- avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);
- if (dht_is_subvol_filled (this, avail_subvol)) {
- avail_subvol =
- dht_free_disk_available_subvol (this, avail_subvol);
- }
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *avail_subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, fd, GF_FOP_CREATE);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ avail_subvol = get_switch_matching_subvol(loc->path, conf, subvol);
+ if (dht_is_subvol_filled(this, avail_subvol)) {
+ avail_subvol = dht_free_disk_available_subvol(this, avail_subvol,
+ local);
+ }
+
+ if (subvol != avail_subvol) {
+ /* create a link file instead of actual file */
+ local->mode = mode;
+ local->flags = flags;
+ local->umask = umask;
+ local->cached_subvol = avail_subvol;
+ dht_linkfile_create(frame, switch_create_linkfile_create_cbk, this,
+ avail_subvol, subvol, loc);
+ return 0;
+ }
- if (subvol != avail_subvol) {
- /* create a link file instead of actual file */
- local->mode = mode;
- local->flags = flags;
- local->umask = umask;
- local->cached_subvol = avail_subvol;
- dht_linkfile_create (frame,
- switch_create_linkfile_create_cbk,
- avail_subvol, subvol, loc);
- return 0;
- }
+ gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name);
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
+ STACK_WIND_COOKIE(frame, dht_create_cbk, subvol, subvol,
+ subvol->fops->create, loc, flags, mode, umask, fd,
+ params);
- STACK_WIND (frame, dht_create_cbk,
- subvol, subvol->fops->create,
- loc, flags, mode, umask, fd, params);
-
- return 0;
+ return 0;
err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (create, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL, NULL);
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
- return 0;
+ return 0;
}
int
-switch_mknod_linkfile_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+switch_mknod_linkfile_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- dht_local_t *local = NULL;
-
- local = frame->local;
+ dht_local_t *local = NULL;
- if (op_ret >= 0) {
- STACK_WIND (frame, dht_newfile_cbk,
- local->cached_subvol,
- local->cached_subvol->fops->mknod,
- &local->loc, local->mode, local->rdev,
- local->umask, local->params);
+ local = frame->local;
+ if (!local || !local->cached_subvol) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto err;
+ }
- return 0;
- }
+ if (op_ret >= 0) {
+ STACK_WIND_COOKIE(
+ frame, dht_newfile_cbk, (void *)local->cached_subvol,
+ local->cached_subvol, local->cached_subvol->fops->mknod,
+ &local->loc, local->mode, local->rdev, local->umask, local->params);
- DHT_STACK_UNWIND (link, frame, op_ret, op_errno,
- inode, stbuf, preparent, postparent, xdata);
return 0;
+ }
+err:
+ DHT_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, xdata);
+ return 0;
}
-
int
-switch_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *params)
+switch_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *params)
{
- dht_local_t *local = NULL;
- dht_conf_t *conf = NULL;
- xlator_t *subvol = NULL;
- xlator_t *avail_subvol = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
-
- conf = this->private;
-
- dht_get_du_info (frame, this, loc);
-
- local = dht_local_init (frame, loc, NULL, GF_FOP_MKNOD);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- subvol = dht_subvol_get_hashed (this, loc);
- if (!subvol) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no subvolume in layout for path=%s",
- loc->path);
- op_errno = ENOENT;
- goto err;
- }
-
- /* Consider the disksize in consideration */
- avail_subvol = get_switch_matching_subvol (loc->path, conf, subvol);
- if (dht_is_subvol_filled (this, avail_subvol)) {
- avail_subvol =
- dht_free_disk_available_subvol (this, avail_subvol);
- }
-
- if (avail_subvol != subvol) {
- /* Create linkfile first */
-
- local->params = dict_ref (params);
- local->mode = mode;
- local->umask = umask;
- local->rdev = rdev;
- local->cached_subvol = avail_subvol;
-
- dht_linkfile_create (frame, switch_mknod_linkfile_cbk,
- avail_subvol, subvol, loc);
- return 0;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "creating %s on %s", loc->path, subvol->name);
-
- STACK_WIND (frame, dht_newfile_cbk,
- subvol, subvol->fops->mknod,
- loc, mode, rdev, umask, params);
-
+ dht_local_t *local = NULL;
+ dht_conf_t *conf = NULL;
+ xlator_t *subvol = NULL;
+ xlator_t *avail_subvol = NULL;
+ int op_errno = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ conf = this->private;
+
+ dht_get_du_info(frame, this, loc);
+
+ local = dht_local_init(frame, loc, NULL, GF_FOP_MKNOD);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ subvol = dht_subvol_get_hashed(this, loc);
+ if (!subvol) {
+ gf_msg_debug(this->name, 0, "no subvolume in layout for path=%s",
+ loc->path);
+ op_errno = ENOENT;
+ goto err;
+ }
+
+ /* Consider the disksize in consideration */
+ avail_subvol = get_switch_matching_subvol(loc->path, conf, subvol);
+ if (dht_is_subvol_filled(this, avail_subvol)) {
+ avail_subvol = dht_free_disk_available_subvol(this, avail_subvol,
+ local);
+ }
+
+ if (avail_subvol != subvol) {
+ /* Create linkfile first */
+
+ local->params = dict_ref(params);
+ local->mode = mode;
+ local->umask = umask;
+ local->rdev = rdev;
+ local->cached_subvol = avail_subvol;
+
+ dht_linkfile_create(frame, switch_mknod_linkfile_cbk, this,
+ avail_subvol, subvol, loc);
return 0;
+ }
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- DHT_STACK_UNWIND (mknod, frame, -1, op_errno,
- NULL, NULL, NULL, NULL, NULL);
-
- return 0;
-}
+ gf_msg_trace(this->name, 0, "creating %s on %s", loc->path, subvol->name);
+ STACK_WIND_COOKIE(frame, dht_newfile_cbk, (void *)subvol, subvol,
+ subvol->fops->mknod, loc, mode, rdev, umask, params);
-int
-notify (xlator_t *this, int event, void *data, ...)
-{
- int ret = -1;
+ return 0;
- ret = dht_notify (this, event, data);
+err:
+ op_errno = (op_errno == -1) ? errno : op_errno;
+ DHT_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return ret;
+ return 0;
}
void
-fini (xlator_t *this)
+switch_fini(xlator_t *this)
{
- int i = 0;
- dht_conf_t *conf = NULL;
- struct switch_struct *trav = NULL;
- struct switch_struct *prev = NULL;
-
- conf = this->private;
-
- if (conf) {
- trav = (struct switch_struct *)conf->private;
- conf->private = NULL;
- while (trav) {
- if (trav->array)
- GF_FREE (trav->array);
- prev = trav;
- trav = trav->next;
- GF_FREE (prev);
- }
+ dht_conf_t *conf = NULL;
+ struct switch_struct *trav = NULL;
+ struct switch_struct *prev = NULL;
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
+ conf = this->private;
- GF_FREE (conf);
+ if (conf) {
+ trav = (struct switch_struct *)conf->private;
+ conf->private = NULL;
+ while (trav) {
+ GF_FREE(trav->array);
+ prev = trav;
+ trav = trav->next;
+ GF_FREE(prev);
}
+ }
- return;
+ dht_fini(this);
}
int
-set_switch_pattern (xlator_t *this, dht_conf_t *conf,
- const char *pattern_str)
+set_switch_pattern(xlator_t *this, dht_conf_t *conf, const char *pattern_str)
{
- int flag = 0;
- int idx = 0;
- int index = 0;
- int child_count = 0;
- char *tmp = NULL;
- char *tmp1 = NULL;
- char *child = NULL;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *dup_str = NULL;
- char *dup_childs = NULL;
- char *switch_str = NULL;
- char *pattern = NULL;
- char *childs = NULL;
- char *option_string = NULL;
- struct switch_struct *switch_buf = NULL;
- struct switch_struct *switch_opt = NULL;
- struct switch_struct *trav = NULL;
- struct switch_sched_array *switch_buf_array = NULL;
- xlator_list_t *trav_xl = NULL;
-
- trav_xl = this->children;
- while (trav_xl) {
- index++;
- trav_xl = trav_xl->next;
- }
- child_count = index;
- switch_buf_array = GF_CALLOC ((index + 1),
- sizeof (struct switch_sched_array),
- gf_switch_mt_switch_sched_array);
- if (!switch_buf_array)
+ int flag = 0;
+ int idx = 0;
+ int index = 0;
+ int child_count = 0;
+ char *tmp = NULL;
+ char *tmp1 = NULL;
+ char *child = NULL;
+ char *tmp_str = NULL;
+ char *tmp_str1 = NULL;
+ char *dup_str = NULL;
+ char *dup_childs = NULL;
+ char *switch_str = NULL;
+ char *pattern = NULL;
+ char *childs = NULL;
+ char *option_string = NULL;
+ size_t pattern_length;
+ struct switch_struct *switch_buf = NULL;
+ struct switch_struct *switch_opt = NULL;
+ struct switch_struct *trav = NULL;
+ struct switch_sched_array *switch_buf_array = NULL;
+ xlator_list_t *trav_xl = NULL;
+
+ trav_xl = this->children;
+ while (trav_xl) {
+ index++;
+ trav_xl = trav_xl->next;
+ }
+ child_count = index;
+ switch_buf_array = GF_CALLOC((index + 1), sizeof(struct switch_sched_array),
+ gf_switch_mt_switch_sched_array);
+ if (!switch_buf_array)
+ goto err;
+
+ trav_xl = this->children;
+ index = 0;
+
+ while (trav_xl) {
+ switch_buf_array[index].xl = trav_xl->xlator;
+ switch_buf_array[index].eligible = 1;
+ trav_xl = trav_xl->next;
+ index++;
+ }
+
+ /* *jpg:child1,child2;*mpg:child3;*:child4,child5,child6 */
+
+ /* Get the pattern for considering switch case.
+ "option block-size *avi:10MB" etc */
+ option_string = gf_strdup(pattern_str);
+ if (option_string == NULL) {
+ goto err;
+ }
+ switch_str = strtok_r(option_string, ";", &tmp_str);
+ while (switch_str) {
+ dup_str = gf_strdup(switch_str);
+ if (dup_str == NULL) {
+ goto err;
+ }
+ switch_opt = GF_CALLOC(1, sizeof(struct switch_struct),
+ gf_switch_mt_switch_struct);
+ if (!switch_opt) {
+ GF_FREE(dup_str);
+ goto err;
+ }
+
+ pattern = strtok_r(dup_str, ":", &tmp_str1);
+ childs = strtok_r(NULL, ":", &tmp_str1);
+ if (strncmp(pattern, "*", 2) == 0) {
+ gf_msg("switch", GF_LOG_INFO, 0, DHT_MSG_SWITCH_PATTERN_INFO,
+ "'*' pattern will be taken by default "
+ "for all the unconfigured child nodes,"
+ " hence neglecting current option");
+ switch_str = strtok_r(NULL, ";", &tmp_str);
+ GF_FREE(switch_opt);
+ switch_opt = NULL;
+ GF_FREE(dup_str);
+ continue;
+ }
+ GF_FREE(dup_str);
+
+ pattern_length = strlen(pattern);
+ if (pattern_length >= (sizeof(switch_opt->path_pattern))) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ DHT_MSG_SET_SWITCH_PATTERN_ERROR, "Pattern (%s) too long",
+ pattern);
+ goto err;
+ }
+ memcpy(switch_opt->path_pattern, pattern, pattern_length);
+ switch_opt->path_pattern[pattern_length] = '\0';
+
+ if (childs) {
+ dup_childs = gf_strdup(childs);
+ if (dup_childs == NULL) {
goto err;
-
- trav_xl = this->children;
- index = 0;
-
- while (trav_xl) {
- switch_buf_array[index].xl = trav_xl->xlator;
- switch_buf_array[index].eligible = 1;
- trav_xl = trav_xl->next;
- index++;
- }
-
- /* *jpg:child1,child2;*mpg:child3;*:child4,child5,child6 */
-
- /* Get the pattern for considering switch case.
- "option block-size *avi:10MB" etc */
- option_string = gf_strdup (pattern_str);
- switch_str = strtok_r (option_string, ";", &tmp_str);
- while (switch_str) {
- dup_str = gf_strdup (switch_str);
- switch_opt = GF_CALLOC (1, sizeof (struct switch_struct),
- gf_switch_mt_switch_struct);
- if (!switch_opt) {
- GF_FREE (dup_str);
- goto err;
- }
-
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- childs = strtok_r (NULL, ":", &tmp_str1);
- if (strncmp (pattern, "*", 2) == 0) {
- gf_log ("switch", GF_LOG_INFO,
- "'*' pattern will be taken by default "
- "for all the unconfigured child nodes,"
- " hence neglecting current option");
- switch_str = strtok_r (NULL, ";", &tmp_str);
- GF_FREE (switch_opt);
- GF_FREE (dup_str);
- continue;
- }
- memcpy (switch_opt->path_pattern, pattern, strlen (pattern));
- if (childs) {
- dup_childs = gf_strdup (childs);
- child = strtok_r (dup_childs, ",", &tmp);
- while (child) {
- if (gf_switch_valid_child (this, child)) {
- idx++;
- child = strtok_r (NULL, ",", &tmp);
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "%s is not a subvolume of %s. "
- "pattern can only be scheduled "
- "only to a subvolume of %s",
- child, this->name, this->name);
- goto err;
- }
- }
- GF_FREE (dup_childs);
- child = strtok_r (childs, ",", &tmp1);
- switch_opt->num_child = idx;
- switch_opt->array = GF_CALLOC (1, (idx *
- sizeof (struct switch_sched_array)),
- gf_switch_mt_switch_sched_array);
- if (!switch_opt->array)
- goto err;
- idx = 0;
- while (child) {
- for (index = 0; index < child_count; index++) {
- if (strcmp (switch_buf_array[index].xl->name,
- child) == 0) {
- gf_log ("switch", GF_LOG_DEBUG,
- "'%s' pattern will be "
- "scheduled to \"%s\"",
- switch_opt->path_pattern, child);
- /*
- if (switch_buf_array[index-1].considered) {
- gf_log ("switch", GF_LOG_DEBUG,
- "ambiguity found, exiting");
- return -1;
- }
- */
- switch_opt->array[idx].xl = switch_buf_array[index].xl;
- switch_buf_array[index].considered = 1;
- idx++;
- break;
- }
- }
- child = strtok_r (NULL, ",", &tmp1);
- }
- } else {
- /* error */
- gf_log ("switch", GF_LOG_ERROR,
- "Check \"scheduler.switch.case\" "
- "option in unify volume. Exiting");
- goto err;
- }
- GF_FREE (dup_str);
-
- /* Link it to the main structure */
- if (switch_buf) {
- /* there are already few entries */
- trav = switch_buf;
- while (trav->next)
- trav = trav->next;
- trav->next = switch_opt;
+ }
+ child = strtok_r(dup_childs, ",", &tmp);
+ while (child) {
+ if (gf_switch_valid_child(this, child)) {
+ idx++;
+ child = strtok_r(NULL, ",", &tmp);
} else {
- /* First entry */
- switch_buf = switch_opt;
- }
- switch_opt = NULL;
- switch_str = strtok_r (NULL, ";", &tmp_str);
- }
-
- /* Now, all the pattern based considerations done, so for all the
- * remaining pattern, '*' to all the remaining child nodes
- */
- {
- for (index=0; index < child_count; index++) {
- /* check for considered flag */
- if (switch_buf_array[index].considered)
- continue;
- flag++;
- }
- if (!flag) {
- gf_log ("switch", GF_LOG_ERROR,
- "No nodes left for pattern '*'. Exiting");
- goto err;
+ gf_msg(this->name, GF_LOG_ERROR, 0, DHT_MSG_SUBVOL_ERROR,
+ "%s is not a subvolume of %s. "
+ "pattern can only be scheduled "
+ "only to a subvolume of %s",
+ child, this->name, this->name);
+ GF_FREE(dup_childs);
+ goto err;
}
- switch_opt = GF_CALLOC (1, sizeof (struct switch_struct),
- gf_switch_mt_switch_struct);
- if (!switch_opt)
- goto err;
-
- /* Add the '*' pattern to the array */
- memcpy (switch_opt->path_pattern, "*", 2);
- switch_opt->num_child = flag;
- switch_opt->array =
- GF_CALLOC (1,
- flag * sizeof (struct switch_sched_array),
- gf_switch_mt_switch_sched_array);
- if (!switch_opt->array)
- goto err;
- flag = 0;
- for (index=0; index < child_count; index++) {
- /* check for considered flag */
- if (switch_buf_array[index].considered)
- continue;
- gf_log ("switch", GF_LOG_DEBUG,
- "'%s' pattern will be scheduled to \"%s\"",
- switch_opt->path_pattern,
- switch_buf_array[index].xl->name);
- switch_opt->array[flag].xl =
- switch_buf_array[index].xl;
+ }
+ GF_FREE(dup_childs);
+ child = strtok_r(childs, ",", &tmp1);
+ switch_opt->num_child = idx;
+ switch_opt->array = GF_CALLOC(
+ 1, (idx * sizeof(struct switch_sched_array)),
+ gf_switch_mt_switch_sched_array);
+ if (!switch_opt->array)
+ goto err;
+ idx = 0;
+ while (child) {
+ for (index = 0; index < child_count; index++) {
+ if (strcmp(switch_buf_array[index].xl->name, child) == 0) {
+ gf_msg_debug("switch", 0,
+ "'%s' pattern will be "
+ "scheduled to \"%s\"",
+ switch_opt->path_pattern, child);
+ /*
+ if (switch_buf_array[index-1].considered) {
+ gf_msg_debug ("switch", 0,
+ "ambiguity found, exiting");
+ return -1;
+ }
+ */
+ switch_opt->array[idx].xl = switch_buf_array[index].xl;
switch_buf_array[index].considered = 1;
- flag++;
- }
- if (switch_buf) {
- /* there are already few entries */
- trav = switch_buf;
- while (trav->next)
- trav = trav->next;
- trav->next = switch_opt;
- } else {
- /* First entry */
- switch_buf = switch_opt;
+ idx++;
+ break;
+ }
}
- switch_opt = NULL;
+ child = strtok_r(NULL, ",", &tmp1);
+ }
+ } else {
+ /* error */
+ gf_msg("switch", GF_LOG_ERROR, 0, DHT_MSG_SET_SWITCH_PATTERN_ERROR,
+ "Check \"scheduler.switch.case\" "
+ "option in unify volume. Exiting");
+ goto err;
}
- /* */
- conf->private = switch_buf;
-
- return 0;
-err:
- if (switch_buf_array)
- GF_FREE (switch_buf_array);
- if (switch_opt)
- GF_FREE (switch_opt);
+ /* Link it to the main structure */
if (switch_buf) {
- trav = switch_buf;
- while (trav) {
- if (trav->array)
- GF_FREE (trav->array);
- switch_opt = trav;
- trav = trav->next;
- GF_FREE (switch_opt);
- }
- }
- return -1;
-}
-
-
-int
-init (xlator_t *this)
-{
- dht_conf_t *conf = NULL;
- data_t *data = NULL;
- char *temp_str = NULL;
- int ret = -1;
- int i = 0;
- uint32_t temp_free_disk = 0;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "SWITCH needs more than one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
-
- conf = GF_CALLOC (1, sizeof (*conf), gf_switch_mt_dht_conf_t);
- if (!conf) {
- goto err;
- }
-
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_ON;
- if (dict_get_str (this->options, "lookup-unhashed", &temp_str) == 0) {
- /* If option is not "auto", other options _should_ be boolean */
- if (strcasecmp (temp_str, "auto"))
- gf_string2boolean (temp_str, &conf->search_unhashed);
- else
- conf->search_unhashed = GF_DHT_LOOKUP_UNHASHED_AUTO;
- }
-
- conf->unhashed_sticky_bit = 0;
- if (dict_get_str (this->options, "unhashed-sticky-bit",
- &temp_str) == 0) {
- gf_string2boolean (temp_str, &conf->unhashed_sticky_bit);
- }
-
- conf->min_free_disk = 10;
- conf->disk_unit = 'p';
-
- if (dict_get_str (this->options, "min-free-disk",
- &temp_str) == 0) {
- if (gf_string2percent (temp_str,
- &temp_free_disk) == 0) {
- if (temp_free_disk > 100) {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- } else {
- conf->min_free_disk = (uint64_t)temp_free_disk;
- conf->disk_unit = 'p';
- }
- } else {
- gf_string2bytesize (temp_str,
- &conf->min_free_disk);
- conf->disk_unit = 'b';
- }
+ /* there are already few entries */
+ trav = switch_buf;
+ while (trav->next)
+ trav = trav->next;
+ trav->next = switch_opt;
+ } else {
+ /* First entry */
+ switch_buf = switch_opt;
+ }
+ switch_opt = NULL;
+ switch_str = strtok_r(NULL, ";", &tmp_str);
+ }
+
+ /* Now, all the pattern based considerations done, so for all the
+ * remaining pattern, '*' to all the remaining child nodes
+ */
+ {
+ for (index = 0; index < child_count; index++) {
+ /* check for considered flag */
+ if (switch_buf_array[index].considered)
+ continue;
+ flag++;
+ }
+ if (!flag) {
+ gf_msg("switch", GF_LOG_ERROR, 0, DHT_MSG_SET_SWITCH_PATTERN_ERROR,
+ "No nodes left for pattern '*'. Exiting");
+ goto err;
+ }
+ switch_opt = GF_CALLOC(1, sizeof(struct switch_struct),
+ gf_switch_mt_switch_struct);
+ if (!switch_opt)
+ goto err;
+
+ /* Add the '*' pattern to the array */
+ memcpy(switch_opt->path_pattern, "*", 2);
+ switch_opt->num_child = flag;
+ switch_opt->array = GF_CALLOC(1,
+ flag * sizeof(struct switch_sched_array),
+ gf_switch_mt_switch_sched_array);
+ if (!switch_opt->array)
+ goto err;
+ flag = 0;
+ for (index = 0; index < child_count; index++) {
+ /* check for considered flag */
+ if (switch_buf_array[index].considered)
+ continue;
+ gf_msg_debug("switch", 0,
+ "'%s'"
+ " pattern will be scheduled to \"%s\"",
+ switch_opt->path_pattern,
+ switch_buf_array[index].xl->name);
+
+ switch_opt->array[flag].xl = switch_buf_array[index].xl;
+ switch_buf_array[index].considered = 1;
+ flag++;
}
-
- data = dict_get (this->options, "pattern.switch.case");
- if (data) {
- /* TODO: */
- ret = set_switch_pattern (this, conf, data->data);
- if (ret) {
- goto err;
- }
+ if (switch_buf) {
+ /* there are already few entries */
+ trav = switch_buf;
+ while (trav->next)
+ trav = trav->next;
+ trav->next = switch_opt;
+ } else {
+ /* First entry */
+ switch_buf = switch_opt;
}
+ switch_opt = NULL;
+ }
+ /* */
+ conf->private = switch_buf;
- ret = dht_init_subvolumes (this, conf);
- if (ret == -1) {
- goto err;
- }
+ GF_FREE(option_string);
+ return 0;
+err:
+ GF_FREE(switch_buf_array);
+ GF_FREE(switch_opt);
+ GF_FREE(option_string);
- ret = dht_layouts_init (this, conf);
- if (ret == -1) {
- goto err;
+ if (switch_buf) {
+ trav = switch_buf;
+ while (trav) {
+ GF_FREE(trav->array);
+ switch_opt = trav;
+ trav = trav->next;
+ GF_FREE(switch_opt);
}
+ }
+ return -1;
+}
- LOCK_INIT (&conf->subvolume_lock);
- LOCK_INIT (&conf->layout_lock);
-
- conf->gen = 1;
+int32_t
+switch_init(xlator_t *this)
+{
+ dht_conf_t *conf = NULL;
+ data_t *data = NULL;
+ int ret = -1;
- conf->du_stats = GF_CALLOC (conf->subvolume_cnt, sizeof (dht_du_t),
- gf_switch_mt_dht_du_t);
- if (!conf->du_stats) {
- goto err;
- }
+ ret = dht_init(this);
+ if (ret) {
+ return ret;
+ }
+ conf = this->private;
- this->local_pool = mem_pool_new (dht_local_t, 128);
- if (!this->local_pool) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto err;
+ data = dict_get(this->options, "pattern.switch.case");
+ if (data) {
+ /* TODO: */
+ ret = set_switch_pattern(this, conf, data->data);
+ if (ret) {
+ goto err;
}
+ }
- this->private = conf;
-
- return 0;
+ this->private = conf;
+ return 0;
err:
- if (conf) {
- if (conf->file_layouts) {
- for (i = 0; i < conf->subvolume_cnt; i++) {
- GF_FREE (conf->file_layouts[i]);
- }
- GF_FREE (conf->file_layouts);
- }
-
- if (conf->subvolumes)
- GF_FREE (conf->subvolumes);
-
- if (conf->subvolume_status)
- GF_FREE (conf->subvolume_status);
-
- if (conf->du_stats)
- GF_FREE (conf->du_stats);
-
- GF_FREE (conf);
- }
-
- return -1;
+ dht_fini(this);
+ return -1;
}
-
struct xlator_fops fops = {
- .lookup = switch_lookup,
- .create = switch_create,
- .mknod = switch_mknod,
-
- .stat = dht_stat,
- .fstat = dht_fstat,
- .truncate = dht_truncate,
- .ftruncate = dht_ftruncate,
- .access = dht_access,
- .readlink = dht_readlink,
- .setxattr = dht_setxattr,
- .getxattr = dht_getxattr,
- .removexattr = dht_removexattr,
- .open = dht_open,
- .readv = dht_readv,
- .writev = dht_writev,
- .flush = dht_flush,
- .fsync = dht_fsync,
- .statfs = dht_statfs,
- .lk = dht_lk,
- .opendir = dht_opendir,
- .readdir = dht_readdir,
- .readdirp = dht_readdirp,
- .fsyncdir = dht_fsyncdir,
- .symlink = dht_symlink,
- .unlink = dht_unlink,
- .link = dht_link,
- .mkdir = dht_mkdir,
- .rmdir = dht_rmdir,
- .rename = dht_rename,
- .inodelk = dht_inodelk,
- .finodelk = dht_finodelk,
- .entrylk = dht_entrylk,
- .fentrylk = dht_fentrylk,
- .xattrop = dht_xattrop,
- .fxattrop = dht_fxattrop,
- .setattr = dht_setattr,
+ .lookup = switch_lookup,
+ .create = switch_create,
+ .mknod = switch_mknod,
+
+ .stat = dht_stat,
+ .fstat = dht_fstat,
+ .truncate = dht_truncate,
+ .ftruncate = dht_ftruncate,
+ .access = dht_access,
+ .readlink = dht_readlink,
+ .setxattr = dht_setxattr,
+ .getxattr = dht_getxattr,
+ .removexattr = dht_removexattr,
+ .open = dht_open,
+ .readv = dht_readv,
+ .writev = dht_writev,
+ .flush = dht_flush,
+ .fsync = dht_fsync,
+ .statfs = dht_statfs,
+ .lk = dht_lk,
+ .opendir = dht_opendir,
+ .readdir = dht_readdir,
+ .readdirp = dht_readdirp,
+ .fsyncdir = dht_fsyncdir,
+ .symlink = dht_symlink,
+ .unlink = dht_unlink,
+ .link = dht_link,
+ .mkdir = dht_mkdir,
+ .rmdir = dht_rmdir,
+ .rename = dht_rename,
+ .inodelk = dht_inodelk,
+ .finodelk = dht_finodelk,
+ .entrylk = dht_entrylk,
+ .fentrylk = dht_fentrylk,
+ .xattrop = dht_xattrop,
+ .fxattrop = dht_fxattrop,
+ .setattr = dht_setattr,
};
-
-struct xlator_cbks cbks = {
- .forget = dht_forget
-};
-
-
-struct volume_options options[] = {
- { .key = {"lookup-unhashed"},
- .value = {"auto", "yes", "no", "enable", "disable", "1", "0",
- "on", "off"},
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"pattern.switch.case"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"min-free-disk"},
- .type = GF_OPTION_TYPE_PERCENT_OR_SIZET,
- },
- { .key = {NULL} },
+struct xlator_cbks cbks = {.forget = dht_forget};
+extern int32_t
+mem_acct_init(xlator_t *this);
+
+xlator_api_t xlator_api = {
+ .init = switch_init,
+ .fini = switch_fini,
+ .notify = dht_notify,
+ .reconfigure = dht_reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = dht_options,
+ .identifier = "switch",
+ .category = GF_TECH_PREVIEW,
};
diff --git a/xlators/cluster/dht/src/unittest/dht_layout_mock.c b/xlators/cluster/dht/src/unittest/dht_layout_mock.c
new file mode 100644
index 00000000000..771452963d1
--- /dev/null
+++ b/xlators/cluster/dht/src/unittest/dht_layout_mock.c
@@ -0,0 +1,73 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include "dht-common.h"
+#include <glusterfs/byte-order.h>
+
+int
+dht_hash_compute(xlator_t *this, int type, const char *name, uint32_t *hash_p)
+{
+ return 0;
+}
+
+int
+dht_inode_ctx_layout_get(inode_t *inode, xlator_t *this, dht_layout_t **layout)
+{
+ return 0;
+}
+
+int
+dht_inode_ctx_layout_set(inode_t *inode, xlator_t *this,
+ dht_layout_t *layout_int)
+{
+ return 0;
+}
+
+int
+dict_get_ptr(dict_t *this, char *key, void **ptr)
+{
+ return 0;
+}
+
+int
+dict_get_ptr_and_len(dict_t *this, char *key, void **ptr, int *len)
+{
+ return 0;
+}
+
+int
+_gf_log(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, const char *fmt, ...)
+{
+ return 0;
+}
+
+int
+_gf_log_callingfn(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, const char *fmt, ...)
+{
+ return 0;
+}
+
+void
+gf_uuid_unparse(const uuid_t uu, char *out)
+{
+ // could call a will-return function here
+ // to place the correct data in *out
+}
+
+int
+_gf_msg(const char *domain, const char *file, const char *function,
+ int32_t line, gf_loglevel_t level, int errnum, int trace,
+ uint64_t msgid, const char *fmt, ...)
+{
+ return 0;
+}
diff --git a/xlators/cluster/dht/src/unittest/dht_layout_unittest.c b/xlators/cluster/dht/src/unittest/dht_layout_unittest.c
new file mode 100644
index 00000000000..c94a1d0a2e1
--- /dev/null
+++ b/xlators/cluster/dht/src/unittest/dht_layout_unittest.c
@@ -0,0 +1,127 @@
+/*
+ Copyright (c) 2008-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "dht-common.h"
+#include <glusterfs/logging.h>
+#include <glusterfs/xlator.h>
+
+#include <inttypes.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <setjmp.h>
+#include <cmocka_pbc.h>
+#include <cmocka.h>
+
+/*
+ * Helper functions
+ */
+
+static xlator_t *
+helper_xlator_init(uint32_t num_types)
+{
+ xlator_t *xl;
+ int i, ret;
+
+ REQUIRE(num_types > 0);
+
+ xl = test_calloc(1, sizeof(xlator_t));
+ assert_non_null(xl);
+ xl->mem_acct->num_types = num_types;
+ xl->mem_acct = test_calloc(sizeof(struct mem_acct) +
+ sizeof(struct mem_acct_rec) + num_types);
+ assert_non_null(xl->mem_acct);
+
+ xl->ctx = test_calloc(1, sizeof(glusterfs_ctx_t));
+ assert_non_null(xl->ctx);
+
+ for (i = 0; i < num_types; i++) {
+ ret = LOCK_INIT(&(xl->mem_acct.rec[i].lock));
+ assert_false(ret);
+ }
+
+ ENSURE(num_types == xl->mem_acct.num_types);
+ ENSURE(NULL != xl);
+
+ return xl;
+}
+
+static int
+helper_xlator_destroy(xlator_t *xl)
+{
+ int i, ret;
+
+ for (i = 0; i < xl->mem_acct.num_types; i++) {
+ ret = LOCK_DESTROY(&(xl->mem_acct.rec[i].lock));
+ assert_int_equal(ret, 0);
+ }
+
+ free(xl->mem_acct.rec);
+ free(xl->ctx);
+ free(xl);
+ return 0;
+}
+
+/*
+ * Unit tests
+ */
+static void
+test_dht_layout_new(void **state)
+{
+ xlator_t *xl;
+ dht_layout_t *layout;
+ dht_conf_t *conf;
+ int cnt;
+
+ expect_assert_failure(dht_layout_new(NULL, 0));
+ expect_assert_failure(dht_layout_new((xlator_t *)0x12345, -1));
+ xl = helper_xlator_init(10);
+
+ // xl->private is NULL
+ assert_null(xl->private);
+ cnt = 100;
+ layout = dht_layout_new(xl, cnt);
+ assert_non_null(layout);
+ assert_int_equal(layout->type, DHT_HASH_TYPE_DM);
+ assert_int_equal(layout->cnt, cnt);
+ assert_int_equal(GF_ATOMIC_GET(layout->ref), 1);
+ assert_int_equal(layout->gen, 0);
+ assert_int_equal(layout->spread_cnt, 0);
+ free(layout);
+
+ // xl->private is not NULL
+ cnt = 110;
+ conf = (dht_conf_t *)test_calloc(1, sizeof(dht_conf_t));
+ assert_non_null(conf);
+ conf->dir_spread_cnt = 12345;
+ conf->gen = -123;
+ xl->private = conf;
+
+ layout = dht_layout_new(xl, cnt);
+ assert_non_null(layout);
+ assert_int_equal(layout->type, DHT_HASH_TYPE_DM);
+ assert_int_equal(layout->cnt, cnt);
+ assert_int_equal(GF_ATOMIC_GET(layout->ref), 1);
+ assert_int_equal(layout->gen, conf->gen);
+ assert_int_equal(layout->spread_cnt, conf->dir_spread_cnt);
+ free(layout);
+
+ free(conf);
+ helper_xlator_destroy(xl);
+}
+
+int
+main(void)
+{
+ const struct CMUnitTest xlator_dht_layout_tests[] = {
+ unit_test(test_dht_layout_new),
+ };
+
+ return cmocka_run_group_tests(xlator_dht_layout_tests, NULL, NULL);
+}
diff --git a/libglusterfsclient/Makefile.am b/xlators/cluster/ec/Makefile.am
index d471a3f9243..d471a3f9243 100644
--- a/libglusterfsclient/Makefile.am
+++ b/xlators/cluster/ec/Makefile.am
diff --git a/xlators/cluster/ec/src/Makefile.am b/xlators/cluster/ec/src/Makefile.am
new file mode 100644
index 00000000000..406a636bbc2
--- /dev/null
+++ b/xlators/cluster/ec/src/Makefile.am
@@ -0,0 +1,83 @@
+xlator_LTLIBRARIES = ec.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
+
+ec_sources := ec.c
+ec_sources += ec-data.c
+ec_sources += ec-helpers.c
+ec_sources += ec-common.c
+ec_sources += ec-generic.c
+ec_sources += ec-locks.c
+ec_sources += ec-dir-read.c
+ec_sources += ec-dir-write.c
+ec_sources += ec-inode-read.c
+ec_sources += ec-inode-write.c
+ec_sources += ec-combine.c
+ec_sources += ec-method.c
+ec_sources += ec-galois.c
+ec_sources += ec-code.c
+ec_sources += ec-code-c.c
+ec_sources += ec-gf8.c
+ec_sources += ec-heal.c
+ec_sources += ec-heald.c
+
+ec_headers := ec.h
+ec_headers += ec-mem-types.h
+ec_headers += ec-helpers.h
+ec_headers += ec-data.h
+ec_headers += ec-fops.h
+ec_headers += ec-common.h
+ec_headers += ec-combine.h
+ec_headers += ec-method.h
+ec_headers += ec-galois.h
+ec_headers += ec-code.h
+ec_headers += ec-code-c.h
+ec_headers += ec-gf8.h
+ec_headers += ec-heald.h
+ec_headers += ec-messages.h
+ec_headers += ec-types.h
+
+if ENABLE_EC_DYNAMIC_INTEL
+ ec_sources += ec-code-intel.c
+ ec_headers += ec-code-intel.h
+endif
+
+if ENABLE_EC_DYNAMIC_X64
+ ec_sources += ec-code-x64.c
+ ec_headers += ec-code-x64.h
+endif
+
+if ENABLE_EC_DYNAMIC_SSE
+ ec_sources += ec-code-sse.c
+ ec_headers += ec-code-sse.h
+endif
+
+if ENABLE_EC_DYNAMIC_AVX
+ ec_sources += ec-code-avx.c
+ ec_headers += ec-code-avx.h
+endif
+
+ec_ext_sources = $(top_builddir)/xlators/lib/src/libxlator.c
+
+ec_ext_headers = $(top_builddir)/xlators/lib/src/libxlator.h
+
+ec_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+ec_la_SOURCES = $(ec_sources) $(ec_headers) $(ec_ext_sources) $(ec_ext_headers)
+ec_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS)
+AM_CPPFLAGS += -I$(top_srcdir)/libglusterfs/src
+AM_CPPFLAGS += -I$(top_srcdir)/xlators/lib/src
+AM_CPPFLAGS += -I$(top_srcdir)/rpc/rpc-lib/src
+AM_CPPFLAGS += -I$(top_srcdir)/rpc/xdr/src
+AM_CPPFLAGS += -I$(top_builddir)/rpc/xdr/src
+AM_CPPFLAGS += -DGLUSTERFS_LIBEXECDIR=\"$(GLUSTERFS_LIBEXECDIR)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
+install-data-hook:
+ ln -sf ec.so $(DESTDIR)$(xlatordir)/disperse.so
+
+uninstall-local:
+ rm -f $(DESTDIR)$(xlatordir)/disperse.so
diff --git a/xlators/cluster/ec/src/ec-code-avx.c b/xlators/cluster/ec/src/ec-code-avx.c
new file mode 100644
index 00000000000..70afaa00f54
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-avx.c
@@ -0,0 +1,109 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <errno.h>
+
+#include "ec-code-intel.h"
+
+static void
+ec_code_avx_prolog(ec_code_builder_t *builder)
+{
+ builder->loop = builder->address;
+}
+
+static void
+ec_code_avx_epilog(ec_code_builder_t *builder)
+{
+ ec_code_intel_op_add_i2r(builder, 32, REG_DX);
+ ec_code_intel_op_add_i2r(builder, 32, REG_DI);
+ ec_code_intel_op_test_i2r(builder, builder->width - 1, REG_DX);
+ ec_code_intel_op_jne(builder, builder->loop);
+
+ ec_code_intel_op_ret(builder, 0);
+}
+
+static void
+ec_code_avx_load(ec_code_builder_t *builder, uint32_t dst, uint32_t idx,
+ uint32_t bit)
+{
+ if (builder->linear) {
+ ec_code_intel_op_mov_m2avx(
+ builder, REG_SI, REG_DX, 1,
+ idx * builder->width * builder->bits + bit * builder->width, dst);
+ } else {
+ if (builder->base != idx) {
+ ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8,
+ REG_AX);
+ builder->base = idx;
+ }
+ ec_code_intel_op_mov_m2avx(builder, REG_AX, REG_DX, 1,
+ bit * builder->width, dst);
+ }
+}
+
+static void
+ec_code_avx_store(ec_code_builder_t *builder, uint32_t src, uint32_t bit)
+{
+ ec_code_intel_op_mov_avx2m(builder, src, REG_DI, REG_NULL, 0,
+ bit * builder->width);
+}
+
+static void
+ec_code_avx_copy(ec_code_builder_t *builder, uint32_t dst, uint32_t src)
+{
+ ec_code_intel_op_mov_avx2avx(builder, src, dst);
+}
+
+static void
+ec_code_avx_xor2(ec_code_builder_t *builder, uint32_t dst, uint32_t src)
+{
+ ec_code_intel_op_xor_avx2avx(builder, src, dst);
+}
+
+static void
+ec_code_avx_xor3(ec_code_builder_t *builder, uint32_t dst, uint32_t src1,
+ uint32_t src2)
+{
+ ec_code_intel_op_mov_avx2avx(builder, src1, dst);
+ ec_code_intel_op_xor_avx2avx(builder, src2, dst);
+}
+
+static void
+ec_code_avx_xorm(ec_code_builder_t *builder, uint32_t dst, uint32_t idx,
+ uint32_t bit)
+{
+ if (builder->linear) {
+ ec_code_intel_op_xor_m2avx(
+ builder, REG_SI, REG_DX, 1,
+ idx * builder->width * builder->bits + bit * builder->width, dst);
+ } else {
+ if (builder->base != idx) {
+ ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8,
+ REG_AX);
+ builder->base = idx;
+ }
+ ec_code_intel_op_xor_m2avx(builder, REG_AX, REG_DX, 1,
+ bit * builder->width, dst);
+ }
+}
+
+static char *ec_code_avx_needed_flags[] = {"avx2", NULL};
+
+ec_code_gen_t ec_code_gen_avx = {.name = "avx",
+ .flags = ec_code_avx_needed_flags,
+ .width = 32,
+ .prolog = ec_code_avx_prolog,
+ .epilog = ec_code_avx_epilog,
+ .load = ec_code_avx_load,
+ .store = ec_code_avx_store,
+ .copy = ec_code_avx_copy,
+ .xor2 = ec_code_avx_xor2,
+ .xor3 = ec_code_avx_xor3,
+ .xorm = ec_code_avx_xorm};
diff --git a/xlators/cluster/ec/src/ec-code-avx.h b/xlators/cluster/ec/src/ec-code-avx.h
new file mode 100644
index 00000000000..fdca4ad2c8f
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-avx.h
@@ -0,0 +1,18 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_CODE_AVX_H__
+#define __EC_CODE_AVX_H__
+
+#include "ec-code.h"
+
+extern ec_code_gen_t ec_code_gen_avx;
+
+#endif /* __EC_CODE_AVX_H__ */
diff --git a/xlators/cluster/ec/src/ec-code-c.c b/xlators/cluster/ec/src/ec-code-c.c
new file mode 100644
index 00000000000..acdc665c2cf
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-c.c
@@ -0,0 +1,11679 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <inttypes.h>
+#include <string.h>
+
+#include "ec-method.h"
+#include "ec-code-c.h"
+
+#define WIDTH (EC_METHOD_WORD_SIZE / sizeof(uint64_t))
+
+static void
+gf8_muladd_00(void *out, void *in)
+{
+ memcpy(out, in, EC_METHOD_WORD_SIZE * 8);
+}
+
+static void
+gf8_muladd_01(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ out_ptr[0] ^= in_ptr[0];
+ out_ptr[WIDTH] ^= in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] ^= in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] ^= in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] ^= in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] ^= in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] ^= in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] ^= in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_02(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in7;
+ out1 = in0;
+ out7 = in6;
+ out5 = in4;
+ out6 = in5;
+ out3 = in2 ^ in7;
+ out4 = in3 ^ in7;
+ out2 = in1 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_03(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in0 ^ in7;
+ tmp0 = in2 ^ in7;
+ out1 = in0 ^ in1;
+ out7 = in6 ^ in7;
+ out5 = in4 ^ in5;
+ out6 = in5 ^ in6;
+ out4 = in3 ^ in4 ^ in7;
+ out2 = tmp0 ^ in1;
+ out3 = tmp0 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_04(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in6;
+ out1 = in7;
+ out7 = in5;
+ out6 = in4;
+ tmp0 = in6 ^ in7;
+ out2 = in0 ^ in6;
+ out5 = in3 ^ in7;
+ out3 = tmp0 ^ in1;
+ out4 = tmp0 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_05(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in0 ^ in6;
+ out1 = in1 ^ in7;
+ out7 = in5 ^ in7;
+ out6 = in4 ^ in6;
+ out2 = out0 ^ in2;
+ out3 = out1 ^ in3 ^ in6;
+ out5 = out7 ^ in3;
+ out4 = out6 ^ in2 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_06(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in6 ^ in7;
+ tmp0 = in1 ^ in6;
+ out1 = in0 ^ in7;
+ out7 = in5 ^ in6;
+ out6 = in4 ^ in5;
+ out4 = in2 ^ in3 ^ in6;
+ out5 = in3 ^ in4 ^ in7;
+ out3 = tmp0 ^ in2;
+ out2 = tmp0 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_07(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in6;
+ tmp1 = in5 ^ in6;
+ tmp2 = in0 ^ in7;
+ tmp3 = tmp0 ^ in3;
+ out6 = tmp1 ^ in4;
+ out7 = tmp1 ^ in7;
+ out0 = tmp2 ^ in6;
+ out1 = tmp2 ^ in1;
+ out3 = tmp3 ^ in1;
+ out4 = tmp3 ^ in4;
+ out5 = out4 ^ out7 ^ in2;
+ out2 = tmp0 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_08(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in5;
+ out1 = in6;
+ out7 = in4;
+ out6 = in3 ^ in7;
+ out3 = in0 ^ in5 ^ in6;
+ out5 = in2 ^ in6 ^ in7;
+ out2 = in5 ^ in7;
+ out4 = out2 ^ in1 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_09(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in0 ^ in5;
+ tmp0 = in3 ^ in6;
+ out1 = in1 ^ in6;
+ out7 = in4 ^ in7;
+ out2 = in2 ^ in5 ^ in7;
+ out3 = tmp0 ^ out0;
+ out6 = tmp0 ^ in7;
+ out4 = out1 ^ out7 ^ in5;
+ out5 = out2 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_0A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in5 ^ in7;
+ out1 = in0 ^ in6;
+ out7 = in4 ^ in6;
+ out2 = in1 ^ in5;
+ out6 = out0 ^ in3;
+ out3 = out0 ^ out1 ^ in2;
+ out5 = out7 ^ in2 ^ in7;
+ out4 = out2 ^ in3 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_0B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = in0 ^ in6;
+ tmp2 = in4 ^ in7;
+ out0 = in0 ^ in5 ^ in7;
+ out2 = tmp0 ^ in1;
+ out1 = tmp1 ^ in1;
+ out6 = tmp1 ^ out0 ^ in3;
+ out7 = tmp2 ^ in6;
+ out4 = tmp2 ^ out6 ^ in1;
+ out3 = out6 ^ in0 ^ in2;
+ out5 = tmp0 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_0C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in5 ^ in6;
+ out1 = in6 ^ in7;
+ out7 = in4 ^ in5;
+ tmp0 = in1 ^ in5;
+ tmp1 = in0 ^ in7;
+ out5 = in2 ^ in3 ^ in6;
+ out6 = in3 ^ in4 ^ in7;
+ out2 = tmp1 ^ out0;
+ out4 = tmp0 ^ in2;
+ out3 = tmp0 ^ tmp1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_0D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in4 ^ in5;
+ tmp1 = in5 ^ in6;
+ out1 = in1 ^ in6 ^ in7;
+ out7 = tmp0 ^ in7;
+ out4 = tmp0 ^ in1 ^ in2;
+ out0 = tmp1 ^ in0;
+ tmp2 = tmp1 ^ in3;
+ out6 = tmp2 ^ out7;
+ out2 = out0 ^ in2 ^ in7;
+ out3 = out0 ^ out1 ^ in3;
+ out5 = tmp2 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_0E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in2 ^ in5;
+ tmp2 = in5 ^ in6;
+ out1 = in0 ^ in6 ^ in7;
+ out3 = tmp0 ^ tmp1;
+ out2 = tmp0 ^ tmp2;
+ tmp3 = tmp1 ^ in3;
+ out7 = tmp2 ^ in4;
+ out0 = tmp2 ^ in7;
+ out4 = tmp3 ^ in1 ^ in7;
+ out5 = tmp3 ^ out7;
+ out6 = out0 ^ out5 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_0F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in6 ^ in7;
+ tmp1 = tmp0 ^ in1;
+ tmp2 = tmp0 ^ in5;
+ out1 = tmp1 ^ in0;
+ out7 = tmp2 ^ in4;
+ out0 = tmp2 ^ in0;
+ out6 = out7 ^ in3;
+ out5 = out6 ^ in2 ^ in7;
+ tmp3 = tmp1 ^ out0 ^ in2;
+ out4 = tmp1 ^ out5;
+ out2 = tmp3 ^ in6;
+ out3 = tmp3 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_10(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in4;
+ out1 = in5;
+ out7 = in3 ^ in7;
+ tmp0 = in6 ^ in7;
+ out2 = in4 ^ in6;
+ tmp1 = out2 ^ in5;
+ out6 = tmp0 ^ in2;
+ out3 = tmp0 ^ tmp1;
+ out5 = out2 ^ out3 ^ in1;
+ out4 = tmp1 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_11(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out7 = in3;
+ out0 = in0 ^ in4;
+ out1 = in1 ^ in5;
+ out6 = in2 ^ in7;
+ out4 = in0 ^ in5 ^ in6;
+ out5 = in1 ^ in6 ^ in7;
+ out2 = in2 ^ in4 ^ in6;
+ out3 = in3 ^ in4 ^ in5 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_12(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in4 ^ in7;
+ out1 = in0 ^ in5;
+ out3 = in2 ^ in4 ^ in5;
+ tmp0 = out0 ^ in6;
+ out2 = tmp0 ^ in1;
+ tmp1 = tmp0 ^ in3;
+ out6 = tmp0 ^ out3;
+ out5 = out2 ^ in5;
+ out7 = tmp1 ^ in4;
+ out4 = tmp1 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_13(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out7 = in3 ^ in6;
+ tmp0 = in0 ^ in5;
+ tmp1 = in4 ^ in7;
+ out6 = in2 ^ in5 ^ in7;
+ out4 = tmp0 ^ out7 ^ in7;
+ out1 = tmp0 ^ in1;
+ out0 = tmp1 ^ in0;
+ out5 = tmp1 ^ in1 ^ in6;
+ out3 = tmp1 ^ out6 ^ in3;
+ out2 = out5 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_14(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in4 ^ in6;
+ out1 = in5 ^ in7;
+ out2 = in0 ^ in4;
+ tmp0 = out0 ^ in5;
+ out7 = out1 ^ in3;
+ tmp1 = out1 ^ in2;
+ out3 = tmp0 ^ in1;
+ out6 = tmp0 ^ tmp1;
+ out4 = tmp1 ^ out2;
+ out5 = out3 ^ in3 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_15(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out7 = in3 ^ in5;
+ tmp0 = in0 ^ in4;
+ out1 = in1 ^ in5 ^ in7;
+ out5 = in1 ^ in3 ^ in6;
+ out0 = tmp0 ^ in6;
+ out2 = tmp0 ^ in2;
+ out3 = out5 ^ in4 ^ in5;
+ out6 = out2 ^ in0 ^ in7;
+ out4 = tmp0 ^ out6 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_16(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in5;
+ tmp1 = in4 ^ in7;
+ tmp2 = in2 ^ in3 ^ in4;
+ out1 = tmp0 ^ in7;
+ out4 = tmp0 ^ tmp2;
+ out0 = tmp1 ^ in6;
+ tmp3 = tmp1 ^ in1;
+ out6 = out0 ^ in2 ^ in5;
+ out2 = tmp3 ^ in0;
+ out3 = out6 ^ in1;
+ out7 = tmp2 ^ out6;
+ out5 = tmp3 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_17(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = in3 ^ in6;
+ tmp2 = tmp0 ^ in4;
+ out4 = tmp0 ^ in0 ^ in3;
+ out7 = tmp1 ^ in5;
+ tmp3 = tmp1 ^ in1;
+ out6 = tmp2 ^ in7;
+ out5 = tmp3 ^ in4;
+ out3 = tmp3 ^ out6;
+ out0 = out3 ^ out4 ^ in1;
+ out2 = out3 ^ out7 ^ in0;
+ out1 = tmp2 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_18(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in4 ^ in5;
+ out1 = in5 ^ in6;
+ tmp0 = in4 ^ in7;
+ out5 = in1 ^ in2 ^ in5;
+ out6 = in2 ^ in3 ^ in6;
+ out2 = tmp0 ^ out1;
+ out7 = tmp0 ^ in3;
+ tmp1 = tmp0 ^ in0;
+ out3 = tmp1 ^ in6;
+ out4 = tmp1 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_19(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out5 = in1 ^ in2;
+ out7 = in3 ^ in4;
+ tmp0 = in0 ^ in7;
+ out6 = in2 ^ in3;
+ out1 = in1 ^ in5 ^ in6;
+ out0 = in0 ^ in4 ^ in5;
+ out4 = tmp0 ^ in1;
+ tmp1 = tmp0 ^ in6;
+ out2 = tmp1 ^ out0 ^ in2;
+ out3 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_1A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in4 ^ in5;
+ tmp1 = in5 ^ in6;
+ tmp2 = tmp0 ^ in1;
+ out0 = tmp0 ^ in7;
+ out1 = tmp1 ^ in0;
+ tmp3 = tmp1 ^ in3;
+ out5 = tmp2 ^ in2;
+ out2 = tmp2 ^ in6;
+ out7 = tmp3 ^ out0;
+ out6 = tmp3 ^ in2;
+ out4 = tmp3 ^ out2 ^ in0;
+ out3 = tmp0 ^ out1 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_1B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in4;
+ tmp1 = in2 ^ in5;
+ tmp2 = in3 ^ in6;
+ out5 = tmp0 ^ in1;
+ tmp3 = tmp0 ^ in0;
+ out6 = tmp1 ^ in3;
+ out0 = tmp1 ^ tmp3 ^ in7;
+ out7 = tmp2 ^ in4;
+ tmp4 = out5 ^ in6;
+ out3 = tmp2 ^ tmp3;
+ out2 = tmp4 ^ in5;
+ out4 = tmp4 ^ out3;
+ out1 = tmp3 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_1C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3;
+ tmp1 = in4 ^ in6;
+ tmp2 = in5 ^ in7;
+ out6 = tmp0 ^ tmp1;
+ out0 = tmp1 ^ in5;
+ out1 = tmp2 ^ in6;
+ tmp3 = tmp2 ^ in1;
+ tmp4 = tmp2 ^ in4;
+ out2 = tmp4 ^ in0;
+ out7 = tmp4 ^ in3;
+ out5 = tmp0 ^ tmp3;
+ out3 = tmp3 ^ out2;
+ out4 = out3 ^ in2 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_1D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in3;
+ tmp1 = in0 ^ in4;
+ tmp2 = in3 ^ in4;
+ tmp3 = in2 ^ in7;
+ out3 = tmp0 ^ tmp1;
+ out5 = tmp0 ^ tmp3;
+ tmp4 = tmp1 ^ in5;
+ out6 = tmp2 ^ in2;
+ out7 = tmp2 ^ in5;
+ out2 = tmp3 ^ tmp4;
+ out4 = out3 ^ out6 ^ in6;
+ out0 = tmp4 ^ in6;
+ out1 = out2 ^ out4 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_1E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in4;
+ tmp1 = in2 ^ in7;
+ tmp2 = tmp0 ^ in1;
+ out3 = tmp1 ^ tmp2;
+ out2 = tmp2 ^ in5;
+ out4 = out3 ^ in3 ^ in6;
+ tmp3 = out4 ^ in7;
+ out6 = tmp3 ^ out2 ^ in4;
+ out7 = tmp1 ^ out6;
+ out0 = out7 ^ in3;
+ out1 = tmp0 ^ out0;
+ out5 = tmp3 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_1F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in4 ^ in6;
+ tmp1 = tmp0 ^ in5;
+ out7 = tmp1 ^ in3;
+ out0 = tmp1 ^ in0 ^ in7;
+ out6 = out7 ^ in2 ^ in6;
+ out1 = out0 ^ in1 ^ in4;
+ out4 = out0 ^ out6 ^ in1;
+ out3 = tmp0 ^ out4;
+ out2 = out4 ^ out7 ^ in7;
+ out5 = out3 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_20(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in4;
+ out0 = in3 ^ in7;
+ tmp0 = in3 ^ in4;
+ tmp1 = in6 ^ in7;
+ out2 = out0 ^ in5;
+ out4 = tmp0 ^ in5;
+ out3 = tmp0 ^ tmp1;
+ out7 = tmp1 ^ in2;
+ out6 = tmp1 ^ in1 ^ in5;
+ out5 = out2 ^ out3 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_21(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in1 ^ in4;
+ tmp0 = in4 ^ in6;
+ out4 = in3 ^ in5;
+ out7 = in2 ^ in6;
+ out0 = in0 ^ in3 ^ in7;
+ out6 = in1 ^ in5 ^ in7;
+ out3 = tmp0 ^ in7;
+ out5 = tmp0 ^ in0;
+ out2 = out4 ^ in2 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_22(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in3;
+ out1 = in0 ^ in4;
+ out7 = in2 ^ in7;
+ out4 = in4 ^ in5 ^ in7;
+ out5 = in0 ^ in5 ^ in6;
+ out6 = in1 ^ in6 ^ in7;
+ out3 = in2 ^ in3 ^ in4 ^ in6;
+ out2 = in1 ^ in3 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_23(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out7 = in2;
+ out0 = in0 ^ in3;
+ out4 = in5 ^ in7;
+ out5 = in0 ^ in6;
+ out6 = in1 ^ in7;
+ out3 = in2 ^ in4 ^ in6;
+ out1 = in0 ^ in1 ^ in4;
+ out2 = out4 ^ out6 ^ in2 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_24(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in4 ^ in7;
+ tmp0 = in3 ^ in4;
+ out0 = in3 ^ in6 ^ in7;
+ out3 = tmp0 ^ in1;
+ tmp1 = out0 ^ in5;
+ out6 = tmp1 ^ out3;
+ out2 = tmp1 ^ in0;
+ out7 = tmp1 ^ in2 ^ in3;
+ out5 = out2 ^ in4;
+ out4 = tmp0 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_25(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in1 ^ in4;
+ tmp0 = in2 ^ in5;
+ out1 = out3 ^ in7;
+ out7 = tmp0 ^ in6;
+ out6 = out1 ^ in5;
+ out4 = out7 ^ in3 ^ in7;
+ out2 = out4 ^ in0;
+ out0 = tmp0 ^ out2;
+ out5 = out0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_26(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in3 ^ in6;
+ tmp0 = in4 ^ in7;
+ out7 = in2 ^ in5 ^ in7;
+ tmp1 = out0 ^ in0 ^ in5;
+ out1 = tmp0 ^ in0;
+ tmp2 = tmp0 ^ in6;
+ out2 = tmp1 ^ in1;
+ out5 = tmp1 ^ in7;
+ out6 = tmp2 ^ in1;
+ out4 = tmp2 ^ out7;
+ out3 = out0 ^ out6 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_27(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out7 = in2 ^ in5;
+ out0 = in0 ^ in3 ^ in6;
+ out6 = in1 ^ in4 ^ in7;
+ out4 = out7 ^ in6;
+ out2 = out0 ^ out7 ^ in1;
+ out5 = out0 ^ in7;
+ out1 = out6 ^ in0;
+ out3 = out6 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_28(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in3;
+ out1 = in4 ^ in6;
+ out0 = in3 ^ in5 ^ in7;
+ tmp0 = out1 ^ in7;
+ tmp1 = out0 ^ in4;
+ out7 = tmp0 ^ in2;
+ tmp2 = tmp0 ^ in1;
+ out3 = tmp1 ^ in0;
+ out6 = tmp1 ^ tmp2;
+ out4 = tmp2 ^ in3;
+ out5 = out3 ^ in2 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_29(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in2 ^ in3;
+ tmp0 = in1 ^ in3;
+ tmp1 = in4 ^ in6;
+ tmp2 = in0 ^ in4 ^ in7;
+ out6 = tmp0 ^ in5;
+ out4 = tmp0 ^ in6 ^ in7;
+ out1 = tmp1 ^ in1;
+ out7 = tmp1 ^ in2;
+ out3 = tmp2 ^ in5;
+ out5 = tmp2 ^ in2;
+ out0 = out3 ^ in3 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_2A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in3 ^ in5;
+ tmp0 = in1 ^ in3;
+ tmp1 = in0 ^ in4;
+ out7 = in2 ^ in4 ^ in7;
+ out3 = tmp1 ^ out0 ^ in2;
+ out2 = tmp0 ^ in7;
+ out6 = tmp0 ^ in6;
+ out1 = tmp1 ^ in6;
+ out5 = tmp1 ^ out7 ^ in5;
+ out4 = out1 ^ in0 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_2B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in1 ^ in6;
+ out7 = in2 ^ in4;
+ tmp0 = in0 ^ in5;
+ tmp1 = in2 ^ in7;
+ out6 = in1 ^ in3;
+ out1 = out4 ^ in0 ^ in4;
+ out3 = tmp0 ^ out7;
+ out0 = tmp0 ^ in3;
+ out5 = tmp1 ^ in0;
+ out2 = tmp1 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_2C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = in2 ^ in3 ^ in4;
+ tmp2 = tmp0 ^ in6;
+ out4 = tmp1 ^ in1;
+ out5 = tmp1 ^ in0 ^ in5;
+ tmp3 = tmp2 ^ in4;
+ out6 = tmp2 ^ out4;
+ out7 = tmp3 ^ in7;
+ out2 = tmp3 ^ out5;
+ out3 = out6 ^ in0;
+ out0 = tmp1 ^ out7;
+ out1 = tmp0 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_2D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3;
+ out4 = tmp0 ^ in1;
+ tmp1 = tmp0 ^ in0;
+ out2 = tmp1 ^ in6;
+ out5 = tmp1 ^ in4;
+ tmp2 = out2 ^ in2;
+ tmp3 = tmp2 ^ in5;
+ out0 = tmp3 ^ in7;
+ out7 = tmp3 ^ out5;
+ out6 = out4 ^ out7 ^ in6;
+ out3 = tmp2 ^ out6;
+ out1 = out0 ^ out6 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_2E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in4 ^ in7;
+ out0 = in3 ^ in5 ^ in6;
+ tmp1 = tmp0 ^ in0;
+ tmp2 = tmp0 ^ in2;
+ out1 = tmp1 ^ in6;
+ out4 = tmp2 ^ in1;
+ out7 = tmp2 ^ in5;
+ out3 = out0 ^ out4 ^ in0;
+ out2 = out3 ^ out7 ^ in7;
+ out6 = tmp1 ^ out2;
+ out5 = tmp1 ^ out7 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_2F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in2 ^ in5;
+ out4 = in1 ^ in2 ^ in7;
+ out6 = in1 ^ in3 ^ in4;
+ out5 = tmp0 ^ in2;
+ tmp2 = tmp0 ^ in6;
+ out7 = tmp1 ^ in4;
+ out0 = tmp2 ^ in5;
+ out2 = tmp2 ^ out4;
+ out1 = tmp2 ^ out6 ^ in7;
+ out3 = tmp1 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_30(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in4 ^ in5;
+ tmp0 = in3 ^ in6;
+ tmp1 = in4 ^ in7;
+ out6 = in1 ^ in2 ^ in5;
+ out3 = tmp0 ^ in5;
+ out4 = tmp0 ^ in0;
+ out7 = tmp0 ^ in2;
+ out0 = tmp1 ^ in3;
+ out2 = tmp1 ^ out3;
+ out5 = tmp1 ^ in0 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_31(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in5 ^ in6;
+ tmp0 = in4 ^ in5;
+ tmp1 = in0 ^ in3 ^ in4;
+ tmp2 = out3 ^ in2;
+ out1 = tmp0 ^ in1;
+ out0 = tmp1 ^ in7;
+ out4 = tmp1 ^ in6;
+ out6 = tmp2 ^ in1;
+ out2 = tmp2 ^ out0 ^ in0;
+ out5 = out1 ^ in0 ^ in7;
+ out7 = tmp0 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_32(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in3 ^ in4;
+ out7 = in2 ^ in3;
+ tmp0 = in5 ^ in6;
+ tmp1 = in0 ^ in7;
+ out6 = in1 ^ in2;
+ out1 = in0 ^ in4 ^ in5;
+ out2 = tmp0 ^ out0 ^ in1;
+ out3 = tmp0 ^ out7 ^ in7;
+ out4 = tmp1 ^ in6;
+ out5 = tmp1 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_33(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3;
+ tmp1 = in0 ^ in4;
+ tmp2 = in1 ^ in5;
+ out6 = in1 ^ in2 ^ in6;
+ out7 = tmp0 ^ in7;
+ out0 = tmp1 ^ in3;
+ out1 = tmp1 ^ tmp2;
+ tmp3 = tmp2 ^ in7;
+ tmp4 = tmp2 ^ in4 ^ in6;
+ out5 = tmp3 ^ in0;
+ out3 = tmp3 ^ out6;
+ out4 = tmp4 ^ out5;
+ out2 = tmp0 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_34(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in4;
+ tmp1 = in4 ^ in5;
+ tmp2 = tmp0 ^ in1;
+ tmp3 = tmp0 ^ in6;
+ out1 = tmp1 ^ in7;
+ tmp4 = tmp1 ^ in2;
+ out5 = tmp2 ^ in0;
+ out3 = tmp2 ^ out1;
+ out0 = tmp3 ^ in7;
+ out7 = tmp3 ^ tmp4;
+ out6 = tmp4 ^ in1;
+ out2 = out3 ^ out5 ^ in3;
+ out4 = tmp4 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_35(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in6;
+ tmp1 = in5 ^ in7;
+ out7 = tmp0 ^ tmp1 ^ in3;
+ out3 = tmp1 ^ in1;
+ out1 = out3 ^ in4;
+ tmp2 = out1 ^ in7;
+ out5 = tmp2 ^ in0 ^ in3;
+ out6 = tmp0 ^ tmp2;
+ out0 = out3 ^ out5 ^ in6;
+ out4 = tmp0 ^ out0;
+ out2 = out4 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_36(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in0 ^ in2;
+ tmp0 = in1 ^ in3;
+ out0 = in3 ^ in4 ^ in6;
+ out6 = in1 ^ in2 ^ in4;
+ out5 = tmp0 ^ in0;
+ tmp1 = out5 ^ in5;
+ out2 = tmp1 ^ in4;
+ out3 = tmp1 ^ out4;
+ out1 = tmp0 ^ out2 ^ in7;
+ out7 = out3 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_37(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in2 ^ in4;
+ tmp2 = tmp0 ^ in6;
+ out3 = tmp0 ^ in5;
+ out4 = tmp1 ^ in0;
+ out6 = tmp2 ^ in4;
+ out1 = out3 ^ out4 ^ in7;
+ tmp3 = out4 ^ in1 ^ in3;
+ out7 = tmp3 ^ out1;
+ out2 = tmp3 ^ in5;
+ out5 = tmp1 ^ out2;
+ out0 = tmp2 ^ tmp3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_38(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in0 ^ in3;
+ tmp0 = in3 ^ in4;
+ tmp1 = in5 ^ in7;
+ tmp2 = out3 ^ in1;
+ out2 = tmp0 ^ in6;
+ out0 = tmp0 ^ tmp1;
+ out4 = tmp1 ^ tmp2;
+ out7 = out2 ^ in2;
+ out1 = out2 ^ in3 ^ in5;
+ out6 = out4 ^ in0 ^ in2;
+ out5 = tmp2 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_39(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in0;
+ tmp0 = in1 ^ in5;
+ tmp1 = tmp0 ^ in4;
+ out1 = tmp1 ^ in6;
+ out5 = out1 ^ in0 ^ in2;
+ tmp2 = tmp0 ^ out5;
+ out2 = tmp2 ^ in0 ^ in3;
+ out7 = out2 ^ in7;
+ out6 = tmp1 ^ out7;
+ out4 = tmp2 ^ out6;
+ out0 = out4 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_3A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in0 ^ in2;
+ tmp2 = in3 ^ in4;
+ tmp3 = in1 ^ in6;
+ tmp4 = in3 ^ in7;
+ out4 = tmp0 ^ in5;
+ out5 = tmp1 ^ tmp3;
+ out3 = tmp1 ^ tmp4;
+ out0 = tmp2 ^ in5;
+ out7 = tmp2 ^ in2;
+ tmp5 = tmp3 ^ in4;
+ out2 = tmp4 ^ tmp5;
+ out1 = tmp5 ^ out4;
+ out6 = tmp0 ^ out3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_3B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in6;
+ tmp1 = in2 ^ in7;
+ tmp2 = tmp0 ^ in3;
+ out3 = tmp1 ^ in0;
+ out6 = tmp1 ^ tmp2;
+ out2 = out6 ^ in4;
+ out7 = tmp0 ^ out2;
+ out0 = out3 ^ out7 ^ in5;
+ out5 = out0 ^ out2 ^ in7;
+ out1 = tmp2 ^ out0;
+ out4 = out1 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_3C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in2 ^ in7;
+ tmp2 = in1 ^ in6 ^ in7;
+ out2 = tmp0 ^ in4;
+ out3 = tmp0 ^ tmp2;
+ out4 = tmp1 ^ out3 ^ in5;
+ out5 = tmp2 ^ out2 ^ in2;
+ out1 = out4 ^ out5 ^ in6;
+ out0 = out1 ^ in3;
+ out7 = tmp1 ^ out0;
+ out6 = tmp2 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_3D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in2;
+ tmp1 = tmp0 ^ in3;
+ out2 = tmp1 ^ in4;
+ tmp2 = out2 ^ in5;
+ out4 = tmp2 ^ in1 ^ in6;
+ out5 = out4 ^ in7;
+ out6 = out5 ^ in0;
+ out7 = out6 ^ in1;
+ out0 = tmp0 ^ out7;
+ out1 = tmp1 ^ out5;
+ out3 = tmp2 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_3E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in5;
+ tmp1 = tmp0 ^ in4;
+ out0 = tmp1 ^ in6;
+ out7 = tmp1 ^ in2;
+ out6 = out7 ^ in1 ^ in5 ^ in7;
+ out2 = out6 ^ in0 ^ in2;
+ out4 = out0 ^ out6 ^ in0;
+ out5 = tmp0 ^ out4;
+ out3 = out5 ^ in7;
+ out1 = out3 ^ out6 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_3F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ out3 = tmp0 ^ in2 ^ in6;
+ tmp1 = out3 ^ in5 ^ in7;
+ out4 = tmp1 ^ in4;
+ out5 = tmp1 ^ in3;
+ out1 = out4 ^ in2;
+ out7 = out1 ^ out3 ^ in3;
+ out2 = tmp0 ^ out7 ^ in5;
+ tmp2 = out2 ^ in0;
+ out6 = tmp2 ^ in6;
+ out0 = tmp1 ^ tmp2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_40(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in3 ^ in7;
+ tmp0 = in3 ^ in4;
+ tmp1 = in6 ^ in7;
+ out4 = tmp0 ^ in2;
+ out5 = tmp0 ^ in5;
+ out0 = tmp1 ^ in2;
+ out7 = tmp1 ^ in1 ^ in5;
+ out2 = out0 ^ in4;
+ out3 = out2 ^ out5 ^ in7;
+ out6 = out3 ^ out4 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_41(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in2 ^ in3;
+ tmp0 = in5 ^ in6;
+ tmp1 = in6 ^ in7;
+ out5 = in3 ^ in4;
+ out1 = in1 ^ in3 ^ in7;
+ out6 = in0 ^ in4 ^ in5;
+ out3 = tmp0 ^ in2;
+ out7 = tmp0 ^ in1;
+ out2 = tmp1 ^ in4;
+ out0 = tmp1 ^ in0 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_42(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in2 ^ in6;
+ out5 = in3 ^ in5;
+ out1 = in0 ^ in3 ^ in7;
+ out7 = in1 ^ in5 ^ in7;
+ out4 = in2 ^ in4 ^ in7;
+ out6 = in0 ^ in4 ^ in6;
+ out2 = out0 ^ in1 ^ in4;
+ out3 = out5 ^ in6 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_43(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out5 = in3;
+ out7 = in1 ^ in5;
+ out4 = in2 ^ in7;
+ out6 = in0 ^ in4;
+ out0 = in0 ^ in2 ^ in6;
+ out3 = in5 ^ in6 ^ in7;
+ out2 = in1 ^ in4 ^ in6;
+ out1 = in0 ^ in1 ^ in3 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_44(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in3;
+ out0 = in2 ^ in7;
+ tmp0 = in4 ^ in7;
+ out7 = in1 ^ in6 ^ in7;
+ out6 = in0 ^ in5 ^ in6;
+ out4 = tmp0 ^ in3 ^ in6;
+ out3 = out0 ^ in1 ^ in3 ^ in5;
+ out2 = out0 ^ in0 ^ in4;
+ out5 = tmp0 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_45(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in1 ^ in3;
+ out7 = in1 ^ in6;
+ out5 = in4 ^ in7;
+ out6 = in0 ^ in5;
+ out0 = in0 ^ in2 ^ in7;
+ out4 = in3 ^ in6 ^ in7;
+ out2 = out5 ^ in0;
+ out3 = out0 ^ out6 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_46(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in2;
+ out1 = in0 ^ in3;
+ out7 = in1 ^ in7;
+ out4 = in4 ^ in6;
+ out5 = in5 ^ in7;
+ out6 = in0 ^ in6;
+ out3 = in1 ^ in3 ^ in5;
+ out2 = out4 ^ out6 ^ in1 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_47(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in6;
+ out7 = in1;
+ out5 = in7;
+ out6 = in0;
+ tmp0 = in0 ^ in1;
+ out3 = in1 ^ in5;
+ out0 = in0 ^ in2;
+ out1 = tmp0 ^ in3;
+ out2 = tmp0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_48(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3;
+ out1 = in3 ^ in6 ^ in7;
+ out3 = tmp0 ^ in0;
+ out0 = tmp0 ^ out1 ^ in5;
+ tmp1 = out0 ^ in4;
+ out2 = tmp1 ^ in7;
+ out5 = tmp1 ^ in3;
+ out4 = out5 ^ in1;
+ out7 = tmp0 ^ out4;
+ out6 = tmp1 ^ out3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_49(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in0 ^ in2;
+ tmp0 = in2 ^ in5;
+ out2 = in4 ^ in5 ^ in6;
+ tmp1 = tmp0 ^ out2 ^ in3;
+ out7 = out2 ^ in1;
+ out5 = tmp1 ^ in7;
+ out4 = out5 ^ out7 ^ in6;
+ out1 = tmp0 ^ out4;
+ out6 = out1 ^ out7 ^ in0;
+ out0 = tmp1 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_4A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in6;
+ tmp1 = in3 ^ in7;
+ out0 = tmp0 ^ in5;
+ out3 = tmp1 ^ in0;
+ out5 = tmp1 ^ out0;
+ out4 = out0 ^ in1 ^ in4;
+ out1 = out3 ^ in6;
+ out2 = out4 ^ in7;
+ out6 = out1 ^ in4;
+ out7 = tmp0 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_4B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in0 ^ in7;
+ tmp0 = in1 ^ in5;
+ tmp1 = in2 ^ in6;
+ tmp2 = out3 ^ in3;
+ out7 = tmp0 ^ in4;
+ out4 = tmp0 ^ tmp1;
+ tmp3 = tmp1 ^ in0;
+ out6 = tmp2 ^ in4;
+ out5 = tmp2 ^ tmp3;
+ out1 = tmp2 ^ in1 ^ in6;
+ out2 = out7 ^ in6 ^ in7;
+ out0 = tmp3 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_4C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in3 ^ in6;
+ tmp0 = in2 ^ in5;
+ tmp1 = out1 ^ in5 ^ in7;
+ out0 = tmp0 ^ in7;
+ tmp2 = tmp0 ^ in4;
+ out6 = tmp1 ^ in0;
+ out2 = tmp2 ^ in0;
+ out5 = tmp2 ^ in6;
+ out3 = tmp0 ^ out6 ^ in1;
+ out7 = out0 ^ out5 ^ in1;
+ out4 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_4D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in5;
+ tmp1 = in1 ^ in6;
+ out4 = in1 ^ in3 ^ in5;
+ tmp2 = tmp0 ^ in7;
+ out2 = tmp0 ^ in4;
+ out1 = tmp1 ^ in3;
+ out7 = tmp1 ^ in4;
+ out0 = tmp2 ^ in2;
+ out6 = tmp2 ^ in3;
+ out5 = out7 ^ in1 ^ in2;
+ out3 = tmp1 ^ out0 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_4E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in2 ^ in5;
+ out7 = in1 ^ in4 ^ in7;
+ out1 = in0 ^ in3 ^ in6;
+ out5 = out0 ^ in6;
+ out4 = out7 ^ in5;
+ out3 = out1 ^ in1;
+ out6 = out1 ^ in7;
+ out2 = out4 ^ in0 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_4F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out5 = in2 ^ in6;
+ out7 = in1 ^ in4;
+ out3 = in0 ^ in1 ^ in6;
+ out4 = in1 ^ in5 ^ in7;
+ out0 = in0 ^ in2 ^ in5;
+ out6 = in0 ^ in3 ^ in7;
+ out1 = out3 ^ in3;
+ out2 = out4 ^ in0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_50(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in2 ^ in7;
+ tmp0 = in3 ^ in5;
+ out0 = out2 ^ in4 ^ in6;
+ out1 = tmp0 ^ in7;
+ tmp1 = tmp0 ^ in6;
+ out3 = out0 ^ in3;
+ out7 = tmp1 ^ in1;
+ tmp2 = tmp1 ^ in0;
+ out5 = out3 ^ in1 ^ in2;
+ out4 = tmp2 ^ in2;
+ out6 = tmp2 ^ out3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_51(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in7;
+ out3 = in2 ^ in4 ^ in6 ^ in7;
+ out0 = out3 ^ in0;
+ out6 = out0 ^ in5;
+ out4 = out6 ^ in3 ^ in7;
+ out1 = out0 ^ out4 ^ in1;
+ out7 = out1 ^ in6;
+ out5 = out7 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_52(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in1 ^ in2;
+ tmp0 = in2 ^ in4;
+ tmp1 = in3 ^ in5;
+ tmp2 = in3 ^ in6;
+ tmp3 = in0 ^ in7;
+ out0 = tmp0 ^ in6;
+ out6 = tmp0 ^ tmp3;
+ out7 = tmp1 ^ in1;
+ out1 = tmp1 ^ tmp3;
+ out3 = tmp2 ^ in4;
+ out5 = tmp2 ^ in1 ^ in7;
+ out4 = tmp2 ^ out1 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_53(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in1;
+ out3 = in4 ^ in6;
+ out0 = out3 ^ in0 ^ in2;
+ out6 = out0 ^ in7;
+ out4 = out6 ^ in5;
+ out7 = out0 ^ out4 ^ in1 ^ in3;
+ out1 = out7 ^ in0;
+ out5 = out7 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_54(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in3 ^ in5;
+ tmp0 = in1 ^ in3;
+ tmp1 = in2 ^ in4;
+ tmp2 = in0 ^ in7;
+ out5 = in1 ^ in4 ^ in6;
+ out4 = tmp2 ^ out1;
+ out7 = tmp0 ^ in6;
+ out3 = tmp0 ^ tmp1;
+ out0 = tmp1 ^ in7;
+ tmp3 = tmp2 ^ in2;
+ out2 = tmp3 ^ in6;
+ out6 = tmp3 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_55(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in3;
+ tmp1 = in1 ^ in4;
+ tmp2 = in6 ^ in7;
+ out7 = tmp0 ^ tmp2;
+ out1 = tmp0 ^ in5;
+ out3 = tmp1 ^ in2;
+ out5 = tmp1 ^ in5 ^ in6;
+ out2 = tmp2 ^ in0;
+ out4 = out5 ^ out7 ^ in0;
+ out6 = out2 ^ in2 ^ in5;
+ out0 = out5 ^ out6 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_56(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in2 ^ in4;
+ tmp0 = in0 ^ in2;
+ out4 = in0 ^ in5;
+ out7 = in1 ^ in3;
+ out5 = in1 ^ in6;
+ out6 = tmp0 ^ in7;
+ out2 = tmp0 ^ out5;
+ out1 = out4 ^ in3;
+ out3 = out7 ^ in4 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_57(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in5;
+ tmp1 = in1 ^ in7;
+ out0 = in0 ^ in2 ^ in4;
+ out5 = in1 ^ in5 ^ in6;
+ out4 = tmp0 ^ in4;
+ out1 = tmp0 ^ in1 ^ in3;
+ out2 = tmp0 ^ out5;
+ out3 = tmp1 ^ in4;
+ out7 = tmp1 ^ in3;
+ out6 = tmp1 ^ out2 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_58(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in2 ^ in5;
+ tmp0 = in2 ^ in3 ^ in4;
+ out5 = tmp0 ^ in1;
+ out6 = tmp0 ^ in0 ^ in5;
+ out3 = out6 ^ in7;
+ tmp1 = out2 ^ out5;
+ out7 = tmp1 ^ in6;
+ out4 = tmp1 ^ out3 ^ in3;
+ out0 = out4 ^ out7 ^ in0;
+ out1 = tmp0 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_59(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in5;
+ tmp0 = in0 ^ in5 ^ in7;
+ out3 = tmp0 ^ in2 ^ in4;
+ out0 = out3 ^ in6;
+ tmp1 = out0 ^ in7;
+ out6 = tmp1 ^ in3;
+ out5 = out6 ^ in0 ^ in1 ^ in6;
+ out4 = tmp0 ^ out5;
+ out1 = tmp1 ^ out4;
+ out7 = out1 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_5A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in2 ^ in5;
+ out5 = tmp0 ^ in3;
+ out4 = tmp0 ^ in0;
+ tmp2 = tmp1 ^ in4;
+ out2 = tmp1 ^ in1 ^ in7;
+ out7 = tmp2 ^ out5;
+ out6 = out4 ^ out7 ^ in5;
+ out0 = tmp2 ^ in6;
+ out1 = out0 ^ out6 ^ in7;
+ out3 = tmp1 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_5B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3;
+ tmp1 = in0 ^ in4;
+ tmp2 = in1 ^ in5;
+ out5 = tmp0 ^ tmp2;
+ tmp3 = tmp1 ^ in6;
+ out3 = tmp1 ^ in5;
+ out2 = tmp2 ^ in7;
+ tmp4 = out3 ^ in2;
+ out7 = out2 ^ in3 ^ in4;
+ out0 = tmp4 ^ in6;
+ out6 = tmp0 ^ tmp3;
+ out4 = tmp2 ^ tmp4;
+ out1 = tmp3 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_5C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in6;
+ tmp1 = in0 ^ in2 ^ in5;
+ out1 = tmp0 ^ in5;
+ tmp2 = tmp0 ^ in1;
+ out2 = tmp1 ^ in6;
+ out6 = tmp1 ^ in3;
+ out4 = tmp2 ^ in0;
+ out7 = tmp2 ^ in4;
+ out3 = tmp1 ^ out7;
+ out0 = out3 ^ out4 ^ in7;
+ out5 = out0 ^ in1 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_5D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in0 ^ in6;
+ out2 = tmp1 ^ in5;
+ tmp2 = out2 ^ in3;
+ out6 = tmp2 ^ in2;
+ out1 = tmp0 ^ tmp2;
+ tmp3 = out1 ^ in4 ^ in5;
+ out4 = tmp3 ^ in0;
+ out7 = tmp3 ^ in7;
+ tmp4 = out4 ^ out6;
+ out5 = tmp4 ^ in7;
+ out0 = tmp0 ^ out5;
+ out3 = tmp1 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_5E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = in3 ^ in5;
+ tmp2 = in1 ^ in7;
+ out7 = in1 ^ in3 ^ in4;
+ out0 = tmp0 ^ in4;
+ tmp3 = tmp1 ^ in0;
+ out5 = tmp2 ^ in2;
+ out1 = tmp3 ^ in6;
+ out6 = tmp0 ^ tmp3;
+ tmp4 = tmp2 ^ out1;
+ out3 = tmp4 ^ in4;
+ out4 = tmp1 ^ tmp4;
+ out2 = tmp0 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_5F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in5;
+ tmp1 = in0 ^ in6;
+ tmp2 = tmp0 ^ in7;
+ tmp3 = tmp1 ^ in3;
+ out2 = tmp1 ^ tmp2;
+ out5 = tmp2 ^ in2;
+ out6 = tmp3 ^ in2;
+ out3 = out2 ^ in4;
+ out4 = out3 ^ in5;
+ out1 = tmp0 ^ tmp3;
+ out7 = tmp3 ^ out4;
+ out0 = out4 ^ out5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_60(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in2 ^ in5;
+ tmp0 = in3 ^ in6;
+ out1 = in3 ^ in4 ^ in7;
+ out7 = out4 ^ in1;
+ tmp1 = out4 ^ in4;
+ out0 = tmp0 ^ in2;
+ out5 = tmp0 ^ in0;
+ out2 = tmp0 ^ tmp1;
+ out3 = tmp1 ^ in7;
+ out6 = out3 ^ out7 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_61(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in5;
+ out4 = tmp0 ^ in4;
+ tmp1 = out4 ^ in3;
+ out3 = tmp1 ^ in7;
+ out2 = tmp1 ^ in2 ^ in6;
+ out1 = tmp0 ^ out3 ^ in1;
+ out0 = out2 ^ out4 ^ in0;
+ out7 = tmp1 ^ out1;
+ out6 = out0 ^ out1 ^ in2;
+ out5 = tmp0 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_62(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in4 ^ in5;
+ tmp0 = in0 ^ in3 ^ in4;
+ out1 = tmp0 ^ in7;
+ out5 = tmp0 ^ in6;
+ tmp1 = out1 ^ in0;
+ tmp2 = tmp1 ^ out3;
+ out4 = tmp2 ^ in2;
+ tmp3 = tmp2 ^ in1;
+ out0 = out4 ^ in5 ^ in6;
+ out7 = tmp3 ^ out0;
+ out6 = tmp0 ^ tmp3;
+ out2 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_63(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in4;
+ tmp1 = in1 ^ in7;
+ out3 = tmp0 ^ in5;
+ tmp2 = out3 ^ in6;
+ out4 = out3 ^ in2 ^ in7;
+ out5 = tmp2 ^ in0;
+ tmp3 = out5 ^ in3;
+ out0 = tmp3 ^ out4;
+ out2 = tmp1 ^ tmp2;
+ out6 = tmp1 ^ tmp3;
+ tmp4 = tmp0 ^ out2;
+ out1 = tmp4 ^ out5;
+ out7 = tmp4 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_64(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in2 ^ in3;
+ out1 = in3 ^ in4;
+ out7 = in1 ^ in2;
+ tmp0 = in4 ^ in5;
+ tmp1 = in0 ^ in7;
+ out4 = in5 ^ in6 ^ in7;
+ out2 = tmp0 ^ out0 ^ in0;
+ out3 = tmp0 ^ out7 ^ in6;
+ out5 = tmp1 ^ in6;
+ out6 = tmp1 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_65(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in4 ^ in5;
+ tmp2 = in6 ^ in7;
+ out7 = in1 ^ in2 ^ in7;
+ out1 = in1 ^ in3 ^ in4;
+ out0 = tmp0 ^ in2;
+ out2 = tmp0 ^ tmp1;
+ out4 = tmp1 ^ tmp2;
+ tmp3 = tmp2 ^ in0;
+ out3 = out4 ^ out7 ^ in3;
+ out5 = tmp3 ^ in5;
+ out6 = tmp3 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_66(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in2 ^ in3;
+ tmp2 = in0 ^ in4;
+ out7 = tmp0 ^ in6;
+ out0 = tmp1 ^ in7;
+ out1 = tmp2 ^ in3;
+ tmp3 = tmp2 ^ in6;
+ tmp4 = out1 ^ in5;
+ out5 = tmp3 ^ in7;
+ out4 = tmp3 ^ tmp4;
+ out2 = tmp0 ^ tmp4 ^ in7;
+ out6 = tmp1 ^ out2 ^ in4;
+ out3 = tmp3 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_67(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = tmp0 ^ in1;
+ tmp2 = tmp0 ^ in7;
+ out1 = tmp1 ^ in4;
+ out0 = tmp2 ^ in2;
+ tmp3 = out1 ^ in7;
+ out2 = tmp3 ^ in5;
+ out3 = out2 ^ in0 ^ in6;
+ out7 = tmp1 ^ out0 ^ in6;
+ out5 = tmp1 ^ out3;
+ out4 = tmp2 ^ out5;
+ out6 = tmp3 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_68(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in4;
+ tmp1 = in2 ^ in3 ^ in5;
+ tmp2 = tmp0 ^ in1;
+ tmp3 = tmp0 ^ in6;
+ out0 = tmp1 ^ in6;
+ out6 = tmp2 ^ in0;
+ out7 = tmp1 ^ tmp2;
+ out1 = tmp3 ^ in7;
+ out2 = out1 ^ in2;
+ out4 = tmp2 ^ out2;
+ out3 = out4 ^ out6 ^ in3;
+ out5 = tmp3 ^ out3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_69(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in6 ^ in7;
+ out2 = tmp0 ^ in3 ^ in4;
+ out1 = out2 ^ in1;
+ out3 = out2 ^ in0 ^ in2;
+ out4 = out1 ^ in2 ^ in3;
+ out6 = out1 ^ in0 ^ in7;
+ out7 = out4 ^ in5 ^ in6;
+ out5 = out4 ^ out6 ^ in5;
+ out0 = tmp0 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_6A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in6;
+ out3 = in0 ^ in4 ^ in6;
+ tmp1 = tmp0 ^ in3;
+ out4 = tmp1 ^ in1;
+ tmp2 = tmp1 ^ in7;
+ out2 = out4 ^ in4;
+ out0 = tmp2 ^ in5;
+ out5 = tmp2 ^ out3;
+ out7 = out2 ^ in3 ^ in5;
+ out1 = tmp0 ^ out5;
+ out6 = tmp1 ^ out7 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_6B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in4 ^ in6;
+ out2 = tmp0 ^ in1 ^ in3;
+ out4 = out2 ^ in2;
+ tmp1 = out2 ^ in0;
+ out7 = out4 ^ in3 ^ in5 ^ in7;
+ out1 = tmp1 ^ in7;
+ out3 = tmp1 ^ in1;
+ out6 = tmp1 ^ in5;
+ out0 = tmp1 ^ out7 ^ in6;
+ out5 = tmp0 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_6C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in1;
+ tmp0 = in2 ^ in3;
+ out5 = in0 ^ in2;
+ out1 = in3 ^ in4 ^ in6;
+ tmp1 = out5 ^ in1;
+ out0 = tmp0 ^ in5;
+ out6 = tmp0 ^ tmp1;
+ out3 = tmp1 ^ in4;
+ out7 = out3 ^ in0;
+ out2 = out6 ^ out7 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_6D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in1 ^ in4;
+ tmp0 = in0 ^ in2;
+ tmp1 = out4 ^ in3;
+ out7 = out4 ^ in2 ^ in7;
+ out5 = tmp0 ^ in5;
+ out3 = tmp0 ^ tmp1;
+ out1 = tmp1 ^ in6;
+ out0 = out5 ^ in3;
+ out2 = out3 ^ out7 ^ in4;
+ out6 = out1 ^ in0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_6E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in3;
+ tmp1 = in0 ^ in4;
+ out4 = tmp0 ^ in7;
+ out6 = tmp0 ^ in0 ^ in5;
+ out5 = tmp1 ^ in2;
+ tmp2 = tmp1 ^ in3;
+ out3 = tmp2 ^ out4;
+ out1 = tmp2 ^ in6;
+ out2 = tmp0 ^ out5;
+ out0 = out2 ^ out3 ^ in5;
+ out7 = out1 ^ out2 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_6F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in7;
+ tmp1 = tmp0 ^ in4;
+ tmp2 = tmp0 ^ in0 ^ in2;
+ out4 = tmp1 ^ in1;
+ out0 = tmp2 ^ in5;
+ out3 = out4 ^ in0;
+ out2 = out3 ^ in7;
+ out1 = out2 ^ in6;
+ out6 = out1 ^ in4 ^ in5;
+ out7 = tmp2 ^ out1;
+ out5 = tmp1 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_70(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in2;
+ tmp0 = in2 ^ in4;
+ out2 = in2 ^ in3 ^ in5;
+ tmp1 = tmp0 ^ in6;
+ tmp2 = out2 ^ in7;
+ out0 = tmp1 ^ in3;
+ out4 = tmp1 ^ in0;
+ out7 = tmp2 ^ in1;
+ out6 = out4 ^ in1;
+ out5 = out7 ^ in0 ^ in2;
+ out1 = tmp0 ^ tmp2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_71(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in3 ^ in5;
+ out3 = in2 ^ in3;
+ tmp0 = in0 ^ in2;
+ tmp1 = out2 ^ in1;
+ out4 = tmp0 ^ in6;
+ tmp2 = tmp0 ^ in1;
+ out7 = tmp1 ^ in2;
+ out1 = tmp1 ^ in4 ^ in7;
+ out0 = out4 ^ in3 ^ in4;
+ out6 = tmp2 ^ in4;
+ out5 = tmp2 ^ out3 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_72(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in7;
+ tmp0 = in0 ^ in4;
+ tmp1 = tmp0 ^ in3 ^ in7;
+ out1 = tmp1 ^ in5;
+ out5 = out1 ^ in1;
+ tmp2 = tmp0 ^ out5;
+ out2 = tmp2 ^ in2;
+ out7 = out2 ^ in6;
+ out6 = tmp1 ^ out7;
+ out4 = tmp2 ^ out6;
+ out0 = out4 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_73(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in3 ^ in7;
+ out2 = out3 ^ in1 ^ in5;
+ out1 = out2 ^ in0 ^ in4;
+ out5 = out1 ^ in5;
+ out6 = out1 ^ out3 ^ in2;
+ out0 = out2 ^ out6 ^ in6;
+ out7 = out0 ^ out1 ^ in3;
+ out4 = out0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_74(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in4;
+ tmp1 = in1 ^ in2 ^ in6;
+ out4 = in0 ^ in4 ^ in7;
+ out5 = in0 ^ in1 ^ in5;
+ out0 = tmp0 ^ in2;
+ out1 = tmp0 ^ in5;
+ out3 = tmp1 ^ in7;
+ out6 = tmp1 ^ in0;
+ out2 = tmp1 ^ out5 ^ in3;
+ out7 = out3 ^ in3 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_75(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in0 ^ in7;
+ tmp0 = in1 ^ in3;
+ out5 = in0 ^ in1;
+ out7 = tmp0 ^ in2;
+ tmp1 = tmp0 ^ in4;
+ out6 = out5 ^ in2;
+ tmp2 = out7 ^ in6;
+ out1 = tmp1 ^ in5;
+ out0 = tmp1 ^ out6;
+ out3 = tmp2 ^ in7;
+ out2 = tmp2 ^ out6 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_76(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in1 ^ in6;
+ tmp0 = in0 ^ in5;
+ tmp1 = in3 ^ in7;
+ tmp2 = tmp0 ^ in4;
+ tmp3 = tmp1 ^ in2;
+ out5 = tmp2 ^ in1;
+ out1 = tmp2 ^ in3;
+ out0 = tmp3 ^ in4;
+ out4 = out1 ^ in5;
+ out7 = tmp3 ^ out3;
+ out2 = tmp0 ^ out7;
+ out6 = tmp1 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_77(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in0 ^ in3;
+ tmp0 = in1 ^ in4;
+ tmp1 = in1 ^ in6;
+ tmp2 = out4 ^ in5;
+ out5 = tmp0 ^ in0;
+ out1 = tmp0 ^ tmp2;
+ out3 = tmp1 ^ in3;
+ out2 = tmp1 ^ tmp2 ^ in7;
+ out7 = out3 ^ in2;
+ tmp3 = out7 ^ in6;
+ out6 = tmp2 ^ tmp3;
+ out0 = tmp3 ^ out5 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_78(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in2 ^ in7;
+ tmp2 = in0 ^ in5 ^ in6;
+ out2 = tmp1 ^ in3;
+ out3 = tmp2 ^ in2;
+ out5 = out3 ^ in1 ^ in3;
+ out0 = tmp0 ^ out3 ^ in4;
+ out1 = tmp1 ^ out0;
+ out4 = out1 ^ out5 ^ in5;
+ out7 = tmp0 ^ out4;
+ out6 = tmp2 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_79(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in3 ^ in7;
+ tmp0 = in3 ^ in4;
+ tmp1 = in1 ^ in5;
+ tmp2 = tmp1 ^ in2;
+ out4 = tmp2 ^ in0 ^ in7;
+ tmp3 = out4 ^ in5;
+ out5 = tmp3 ^ out2 ^ in6;
+ out7 = tmp0 ^ tmp2;
+ out6 = tmp0 ^ tmp3;
+ out3 = tmp1 ^ out5;
+ out0 = out3 ^ in4;
+ out1 = tmp3 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_7A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2;
+ out2 = tmp0 ^ in3;
+ tmp1 = out2 ^ in4;
+ out4 = tmp1 ^ in0 ^ in5;
+ out5 = out4 ^ in6;
+ out6 = out5 ^ in7;
+ out7 = out6 ^ in0;
+ out0 = out7 ^ in1;
+ out1 = tmp0 ^ out6;
+ out3 = tmp1 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_7B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in1 ^ in3;
+ tmp0 = in0 ^ in5;
+ out4 = tmp0 ^ out2 ^ in2;
+ tmp1 = out4 ^ in4;
+ out6 = tmp1 ^ in7;
+ out5 = tmp1 ^ in5 ^ in6;
+ out0 = out6 ^ in1 ^ in6;
+ tmp2 = out0 ^ in2;
+ out1 = tmp2 ^ in1;
+ out3 = tmp2 ^ in4;
+ out7 = tmp0 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_7C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in5;
+ tmp1 = tmp0 ^ in4;
+ out0 = tmp1 ^ in2;
+ out1 = tmp1 ^ in6;
+ out7 = out0 ^ in1 ^ in5 ^ in7;
+ out5 = out1 ^ out7 ^ in0;
+ out3 = out5 ^ in6;
+ out6 = tmp0 ^ out5;
+ out2 = out6 ^ in1;
+ out4 = out2 ^ out7 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_7D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = tmp0 ^ in3;
+ tmp2 = tmp0 ^ in6;
+ out7 = tmp1 ^ in4;
+ tmp3 = tmp2 ^ in0;
+ out5 = tmp3 ^ in7;
+ out4 = tmp3 ^ in2 ^ in5;
+ out2 = tmp1 ^ out5;
+ out6 = tmp2 ^ out2;
+ out0 = out4 ^ out7 ^ in6;
+ out1 = tmp3 ^ out0;
+ out3 = out6 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_7E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in4;
+ tmp1 = in0 ^ in5;
+ out1 = tmp0 ^ tmp1 ^ in6;
+ out3 = tmp1 ^ in1;
+ out4 = out1 ^ in1 ^ in7;
+ tmp2 = out4 ^ in3;
+ out5 = tmp2 ^ in2;
+ out6 = tmp0 ^ out5;
+ out7 = tmp1 ^ out4 ^ in2;
+ out2 = out6 ^ in5 ^ in7;
+ out0 = tmp2 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_7F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in7;
+ tmp1 = tmp0 ^ in3 ^ in5;
+ tmp2 = tmp1 ^ in0;
+ out0 = tmp2 ^ in4;
+ out6 = tmp2 ^ in1;
+ out3 = tmp0 ^ out6;
+ tmp3 = out3 ^ in6;
+ out1 = tmp3 ^ in4;
+ out2 = tmp3 ^ in5;
+ out4 = tmp3 ^ in7;
+ out5 = tmp1 ^ out1;
+ out7 = out0 ^ out4 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_80(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3;
+ tmp1 = in4 ^ in5;
+ out1 = in2 ^ in6 ^ in7;
+ out5 = tmp0 ^ in4;
+ tmp2 = tmp0 ^ in1;
+ out6 = tmp1 ^ in3;
+ out7 = tmp1 ^ in0 ^ in6;
+ out4 = tmp2 ^ in7;
+ out3 = tmp2 ^ out6;
+ out2 = out3 ^ out5 ^ in6;
+ out0 = out2 ^ in3 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_81(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in4 ^ in6;
+ tmp1 = tmp0 ^ in3;
+ out6 = tmp1 ^ in5;
+ out5 = out6 ^ in2 ^ in6;
+ out3 = out5 ^ in1;
+ out2 = tmp0 ^ out3;
+ out1 = out3 ^ out6 ^ in7;
+ out4 = tmp1 ^ out1;
+ out7 = out2 ^ out4 ^ in0;
+ out0 = out7 ^ in1 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_82(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in1 ^ in2;
+ tmp0 = in6 ^ in7;
+ out5 = in2 ^ in3;
+ out6 = in3 ^ in4;
+ out7 = in0 ^ in4 ^ in5;
+ out0 = in1 ^ in5 ^ in6;
+ out1 = tmp0 ^ in0 ^ in2;
+ out2 = tmp0 ^ in3 ^ in5;
+ out3 = tmp0 ^ out0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_83(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in2 ^ in5;
+ tmp2 = in3 ^ in6;
+ out4 = in1 ^ in2 ^ in4;
+ out0 = tmp0 ^ in5 ^ in6;
+ out5 = tmp1 ^ in3;
+ tmp3 = tmp1 ^ in7;
+ out6 = tmp2 ^ in4;
+ out2 = tmp2 ^ tmp3;
+ tmp4 = tmp3 ^ out4;
+ out1 = tmp3 ^ out0;
+ out3 = tmp4 ^ in3;
+ out7 = tmp0 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_84(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in2 ^ in6;
+ out6 = in3 ^ in5;
+ out0 = in1 ^ in5 ^ in7;
+ out7 = in0 ^ in4 ^ in6;
+ out4 = in1 ^ in3 ^ in6;
+ out5 = in2 ^ in4 ^ in7;
+ out2 = out6 ^ in0 ^ in1;
+ out3 = out5 ^ in5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_85(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in6;
+ tmp1 = in3 ^ in6;
+ tmp2 = tmp0 ^ in4;
+ out1 = tmp0 ^ in2;
+ out6 = tmp1 ^ in5;
+ out4 = tmp2 ^ in3;
+ tmp3 = out1 ^ out6;
+ out2 = tmp3 ^ in0;
+ out3 = tmp2 ^ tmp3 ^ in7;
+ out7 = out2 ^ out3 ^ in1;
+ out5 = tmp1 ^ out3;
+ out0 = tmp2 ^ out7 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_86(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out6 = in3;
+ out7 = in0 ^ in4;
+ out0 = in1 ^ in5;
+ out5 = in2 ^ in7;
+ out3 = in4 ^ in5 ^ in6;
+ out1 = in0 ^ in2 ^ in6;
+ out4 = in1 ^ in6 ^ in7;
+ out2 = in0 ^ in3 ^ in5 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_87(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out6 = in3 ^ in6;
+ tmp0 = in0 ^ in1;
+ out7 = in0 ^ in4 ^ in7;
+ out5 = in2 ^ in5 ^ in7;
+ out3 = out6 ^ in4 ^ in5;
+ out0 = tmp0 ^ in5;
+ tmp1 = tmp0 ^ in6;
+ out2 = out5 ^ in0 ^ in3;
+ out1 = tmp1 ^ in2;
+ out4 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_88(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in2 ^ in7;
+ tmp0 = in5 ^ in6;
+ out0 = in1 ^ in6 ^ in7;
+ out6 = in4 ^ in5 ^ in7;
+ out3 = out0 ^ out1 ^ in0 ^ in4;
+ out7 = tmp0 ^ in0;
+ tmp1 = tmp0 ^ in3;
+ out2 = out0 ^ in3;
+ out4 = tmp1 ^ in2;
+ out5 = tmp1 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_89(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in7;
+ tmp1 = in2 ^ in7;
+ tmp2 = tmp0 ^ in6;
+ out1 = tmp1 ^ in1;
+ out7 = tmp2 ^ in5;
+ out0 = tmp2 ^ in1;
+ out2 = out1 ^ in3 ^ in6;
+ out6 = out7 ^ in0 ^ in4;
+ out5 = out6 ^ in3;
+ out3 = tmp0 ^ out2 ^ in4;
+ out4 = tmp1 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_8A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in1 ^ in6;
+ out7 = in0 ^ in5;
+ out2 = in3 ^ in6;
+ out6 = in4 ^ in7;
+ out1 = in0 ^ in2 ^ in7;
+ out3 = out0 ^ out6 ^ in0;
+ out4 = out1 ^ out7 ^ in6;
+ out5 = out2 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_8B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in3 ^ in6;
+ tmp2 = in5 ^ in7;
+ tmp3 = tmp0 ^ in7;
+ out0 = tmp0 ^ in6;
+ out2 = tmp1 ^ in2;
+ out5 = tmp1 ^ tmp2;
+ out7 = tmp2 ^ in0;
+ tmp4 = tmp3 ^ in4;
+ out1 = tmp3 ^ in2;
+ out6 = tmp4 ^ out0;
+ out4 = out6 ^ in2 ^ in5;
+ out3 = tmp1 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_8C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in2;
+ out0 = in1 ^ in7;
+ out7 = in0 ^ in6;
+ out5 = in4 ^ in6;
+ out6 = in5 ^ in7;
+ out2 = out0 ^ in0 ^ in3;
+ out3 = out5 ^ out7 ^ in2 ^ in7;
+ out4 = out6 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_8D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in1 ^ in2;
+ tmp0 = in6 ^ in7;
+ out0 = in0 ^ in1 ^ in7;
+ out5 = in4 ^ in5 ^ in6;
+ out6 = tmp0 ^ in5;
+ out7 = tmp0 ^ in0;
+ out4 = tmp0 ^ out5 ^ in3;
+ out2 = out0 ^ in2 ^ in3;
+ out3 = out2 ^ in1 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_8E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in1;
+ out4 = in5;
+ out7 = in0;
+ out5 = in6;
+ out6 = in7;
+ out3 = in0 ^ in4;
+ out1 = in0 ^ in2;
+ out2 = in0 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_8F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in0 ^ in1;
+ tmp0 = in0 ^ in3;
+ out4 = in4 ^ in5;
+ out7 = in0 ^ in7;
+ out5 = in5 ^ in6;
+ out6 = in6 ^ in7;
+ out1 = out0 ^ in2;
+ out2 = tmp0 ^ in2;
+ out3 = tmp0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_90(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in2 ^ in6 ^ in7;
+ out3 = tmp0 ^ in7;
+ out1 = tmp1 ^ in5;
+ tmp2 = out1 ^ in4;
+ out6 = tmp2 ^ in3;
+ out5 = out6 ^ in1;
+ out4 = out5 ^ in0;
+ out0 = tmp0 ^ tmp2;
+ out7 = tmp0 ^ out4;
+ out2 = tmp1 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_91(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in4;
+ tmp1 = tmp0 ^ in3 ^ in5;
+ out2 = tmp1 ^ in1;
+ out6 = tmp1 ^ in7;
+ tmp2 = out2 ^ in5 ^ in7;
+ out3 = tmp2 ^ in4;
+ out5 = tmp2 ^ in6;
+ out1 = tmp1 ^ out5 ^ in2;
+ tmp3 = out1 ^ in0;
+ out4 = tmp3 ^ in3;
+ out0 = tmp0 ^ tmp3;
+ out7 = tmp2 ^ tmp3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_92(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in1;
+ tmp0 = in4 ^ in5;
+ tmp1 = tmp0 ^ in1;
+ out2 = tmp0 ^ in3 ^ in7;
+ out0 = tmp1 ^ in6;
+ out7 = out2 ^ in0;
+ out4 = out0 ^ in0 ^ in2;
+ out5 = out4 ^ out7 ^ in5;
+ out6 = tmp1 ^ out5;
+ out1 = out6 ^ out7 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_93(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in1 ^ in3;
+ tmp0 = in2 ^ in7;
+ tmp1 = out3 ^ in6;
+ tmp2 = tmp0 ^ in4;
+ out5 = tmp0 ^ tmp1;
+ out6 = tmp2 ^ in3;
+ out2 = out6 ^ in5;
+ out0 = out2 ^ out5 ^ in0;
+ out7 = tmp1 ^ out0;
+ out1 = tmp2 ^ out0;
+ out4 = out1 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_94(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in2 ^ in6;
+ tmp0 = in1 ^ in4 ^ in5;
+ out1 = out3 ^ in5;
+ out5 = tmp0 ^ out3;
+ out0 = tmp0 ^ in7;
+ out4 = tmp0 ^ in0 ^ in3;
+ out6 = out1 ^ in3 ^ in7;
+ out2 = out4 ^ in6;
+ out7 = out0 ^ out2 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_95(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3;
+ out3 = tmp0 ^ in6;
+ tmp1 = tmp0 ^ in7;
+ tmp2 = out3 ^ in0;
+ out6 = tmp1 ^ in5;
+ tmp3 = tmp2 ^ in4;
+ out7 = tmp3 ^ in2;
+ tmp4 = tmp3 ^ in5;
+ out2 = tmp4 ^ in1;
+ tmp5 = out2 ^ in6;
+ out0 = tmp1 ^ tmp5;
+ out1 = tmp5 ^ out7;
+ out4 = tmp2 ^ out1;
+ out5 = tmp4 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_96(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in6 ^ in7;
+ tmp0 = in1 ^ in5;
+ tmp1 = in5 ^ in6;
+ out6 = out3 ^ in2 ^ in3;
+ out0 = tmp0 ^ in4;
+ tmp2 = tmp1 ^ in2;
+ out4 = out0 ^ in0 ^ in7;
+ out1 = tmp2 ^ in0;
+ out5 = tmp2 ^ in1;
+ out7 = tmp0 ^ out4 ^ in3;
+ out2 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_97(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in4;
+ tmp1 = in2 ^ in6;
+ out3 = in3 ^ in6 ^ in7;
+ out7 = tmp0 ^ in3;
+ tmp2 = tmp0 ^ in5;
+ out5 = tmp1 ^ in1;
+ out6 = tmp1 ^ out3;
+ out0 = tmp2 ^ in1;
+ out2 = tmp2 ^ out3 ^ in2;
+ tmp3 = out0 ^ in4;
+ out4 = tmp3 ^ in7;
+ out1 = tmp1 ^ tmp3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_98(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in5 ^ in7;
+ tmp1 = in1 ^ in4 ^ in7;
+ out1 = tmp0 ^ in2;
+ out0 = tmp1 ^ in6;
+ out2 = tmp1 ^ in3;
+ out6 = out0 ^ out1 ^ in1;
+ out5 = tmp0 ^ out2;
+ out3 = tmp1 ^ out6 ^ in0;
+ out7 = out0 ^ out5 ^ in0;
+ out4 = out6 ^ out7 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_99(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ out5 = in1 ^ in3 ^ in4;
+ out6 = in2 ^ in4 ^ in5;
+ out4 = tmp0 ^ in2;
+ tmp1 = tmp0 ^ in6;
+ tmp2 = out5 ^ in7;
+ out7 = tmp1 ^ in5;
+ out0 = tmp1 ^ tmp2;
+ out2 = tmp2 ^ in2;
+ out3 = out0 ^ out6 ^ in3;
+ out1 = tmp1 ^ out3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_9A(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in3 ^ in4;
+ tmp0 = in0 ^ in5;
+ tmp1 = in1 ^ in6;
+ out5 = in1 ^ in3 ^ in5;
+ tmp2 = tmp0 ^ in7;
+ out3 = tmp0 ^ tmp1;
+ out0 = tmp1 ^ in4;
+ out7 = tmp2 ^ in3;
+ out1 = tmp2 ^ in2;
+ out6 = out0 ^ in1 ^ in2;
+ out4 = out1 ^ in4 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_9B(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out5 = in1 ^ in3;
+ tmp0 = in3 ^ in5;
+ out6 = in2 ^ in4;
+ out4 = in0 ^ in2 ^ in7;
+ out7 = tmp0 ^ in0;
+ out2 = out6 ^ in3;
+ out1 = out4 ^ in1 ^ in5;
+ out3 = out7 ^ in1 ^ in6;
+ out0 = tmp0 ^ out3 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_9C(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out1 = in2 ^ in5;
+ tmp0 = in0 ^ in3 ^ in6;
+ out3 = out1 ^ in0;
+ out6 = out1 ^ in6;
+ out7 = tmp0 ^ in7;
+ out4 = out7 ^ in4;
+ out2 = out4 ^ in1;
+ out0 = tmp0 ^ out2;
+ out5 = out0 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_9D(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out6 = in2 ^ in5;
+ tmp0 = in0 ^ in3;
+ out5 = in1 ^ in4 ^ in7;
+ out1 = out6 ^ in1;
+ out3 = tmp0 ^ out6;
+ out7 = tmp0 ^ in6;
+ out0 = out5 ^ in0;
+ out4 = out7 ^ in7;
+ out2 = out5 ^ out7 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_9E(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in1 ^ in4;
+ tmp0 = in0 ^ in5;
+ out6 = in2 ^ in6;
+ out7 = in0 ^ in3 ^ in7;
+ out4 = in0 ^ in4 ^ in6;
+ out5 = in1 ^ in5 ^ in7;
+ out1 = tmp0 ^ in2;
+ out3 = tmp0 ^ in7;
+ out2 = out4 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_9F(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out6 = in2;
+ out7 = in0 ^ in3;
+ tmp0 = in0 ^ in1;
+ out4 = in0 ^ in6;
+ out5 = in1 ^ in7;
+ out1 = tmp0 ^ in2 ^ in5;
+ out2 = out7 ^ in2 ^ in4 ^ in6;
+ out3 = out7 ^ in5 ^ in7;
+ out0 = tmp0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A0(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in6;
+ out2 = tmp0 ^ in7;
+ tmp1 = tmp0 ^ in5;
+ out6 = out2 ^ in3 ^ in4;
+ out0 = tmp1 ^ in3;
+ tmp2 = out0 ^ in2;
+ out3 = tmp2 ^ in7;
+ tmp3 = tmp2 ^ in1;
+ out5 = tmp3 ^ in0;
+ out4 = tmp3 ^ out6;
+ out7 = out5 ^ out6 ^ in1;
+ out1 = tmp1 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A1(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = tmp0 ^ in1;
+ tmp2 = tmp0 ^ in4;
+ out4 = tmp1 ^ in7;
+ out7 = tmp2 ^ in0;
+ out6 = tmp2 ^ out4 ^ in3;
+ out3 = out4 ^ in6;
+ out2 = out3 ^ in5;
+ out1 = out2 ^ in4;
+ out5 = out1 ^ out6 ^ in0;
+ out0 = tmp1 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A2(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in6;
+ tmp0 = in1 ^ in3 ^ in5;
+ out3 = tmp0 ^ in6;
+ out4 = tmp0 ^ in2 ^ in4;
+ out0 = out3 ^ in7;
+ out6 = out0 ^ in4;
+ out1 = out0 ^ out4 ^ in0;
+ out7 = out1 ^ in5;
+ out5 = out7 ^ in3 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A3(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in2 ^ in6;
+ out3 = in1 ^ in5 ^ in6;
+ tmp0 = out2 ^ in0;
+ out4 = out2 ^ out3 ^ in3;
+ tmp1 = tmp0 ^ in4;
+ out0 = tmp0 ^ out4 ^ in7;
+ out5 = tmp1 ^ in3;
+ out7 = tmp1 ^ in5;
+ out1 = tmp1 ^ in1 ^ in7;
+ out6 = tmp1 ^ out0 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A4(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in3;
+ tmp1 = in2 ^ in4;
+ tmp2 = in2 ^ in5;
+ tmp3 = in0 ^ in7;
+ out0 = tmp0 ^ in5;
+ out6 = tmp0 ^ in6 ^ in7;
+ out1 = tmp1 ^ in6;
+ out7 = tmp1 ^ tmp3;
+ out3 = tmp2 ^ in3;
+ tmp4 = tmp2 ^ out1;
+ out2 = tmp3 ^ in1;
+ out5 = tmp4 ^ out7;
+ out4 = tmp4 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A5(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in2 ^ in5;
+ tmp0 = in1 ^ in6;
+ tmp1 = in0 ^ in1;
+ tmp2 = in2 ^ in4;
+ out6 = in1 ^ in3 ^ in7;
+ out4 = tmp0 ^ in5;
+ out1 = tmp0 ^ tmp2;
+ out0 = tmp1 ^ in3 ^ in5;
+ out2 = tmp1 ^ in2 ^ in7;
+ out7 = tmp2 ^ in0;
+ out5 = tmp0 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A6(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in0;
+ out3 = in3 ^ in5 ^ in7;
+ out1 = in0 ^ in2 ^ in4 ^ in6;
+ out0 = out3 ^ in1;
+ out7 = out1 ^ in7;
+ out6 = out0 ^ in6;
+ out5 = out7 ^ in5;
+ out4 = out6 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A7(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in0 ^ in2;
+ out3 = in5 ^ in7;
+ out7 = out2 ^ in4 ^ in6;
+ out6 = out3 ^ in1 ^ in3;
+ out1 = out7 ^ in1;
+ out5 = out7 ^ in7;
+ out0 = out6 ^ in0;
+ out4 = out6 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A8(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in4;
+ tmp1 = in1 ^ in6;
+ tmp2 = in0 ^ in2 ^ in7;
+ out1 = tmp0 ^ in7;
+ out4 = tmp0 ^ in6;
+ out0 = tmp1 ^ in3;
+ out2 = tmp1 ^ in5;
+ out6 = tmp1 ^ in4;
+ out7 = tmp2 ^ in5;
+ out3 = tmp2 ^ out0 ^ in6;
+ out5 = out7 ^ in2 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_A9(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in2 ^ in6;
+ out6 = in1 ^ in4;
+ out7 = in0 ^ in2 ^ in5;
+ out5 = in0 ^ in3 ^ in7;
+ out2 = out4 ^ in1 ^ in5;
+ out1 = out6 ^ in2 ^ in7;
+ out0 = out2 ^ out7 ^ in3;
+ out3 = out1 ^ in0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_AA(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in2;
+ tmp1 = in1 ^ in3;
+ tmp2 = in6 ^ in7;
+ out1 = tmp0 ^ in4 ^ in7;
+ out3 = tmp1 ^ in0;
+ out0 = tmp1 ^ tmp2;
+ out2 = tmp2 ^ in5;
+ out7 = tmp0 ^ out2;
+ out6 = out1 ^ out7 ^ in1;
+ out5 = out0 ^ out6 ^ in0;
+ out4 = out5 ^ out7 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_AB(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in0 ^ in1;
+ tmp0 = in1 ^ in4;
+ tmp1 = in0 ^ in7;
+ out6 = tmp0 ^ in5;
+ out1 = tmp0 ^ tmp1 ^ in2;
+ out5 = tmp1 ^ in3 ^ in4;
+ out0 = tmp0 ^ out5 ^ in6;
+ out4 = out0 ^ out3 ^ in2;
+ out2 = out4 ^ in3 ^ in5;
+ out7 = tmp1 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_AC(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in1 ^ in3;
+ out1 = in2 ^ in4;
+ tmp0 = in0 ^ in2;
+ out4 = in4 ^ in7;
+ out5 = in0 ^ in5;
+ out6 = in1 ^ in6;
+ out7 = tmp0 ^ in7;
+ out3 = tmp0 ^ in3 ^ in6;
+ out2 = out5 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_AD(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in7;
+ out5 = in0;
+ out6 = in1;
+ out7 = in0 ^ in2;
+ out0 = in0 ^ in1 ^ in3;
+ out2 = out7 ^ in1 ^ in5;
+ out1 = in1 ^ in2 ^ in4;
+ out3 = out7 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_AE(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in3 ^ in4;
+ tmp0 = in0 ^ in4;
+ tmp1 = in0 ^ in7;
+ out0 = in1 ^ in3 ^ in7;
+ out1 = tmp0 ^ in2;
+ out5 = tmp0 ^ in5;
+ tmp2 = tmp1 ^ in6;
+ out2 = tmp1 ^ in5;
+ out3 = tmp2 ^ in3;
+ out7 = tmp2 ^ in2;
+ out6 = tmp2 ^ out2 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_AF(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in3;
+ tmp0 = in0 ^ in7;
+ out5 = in0 ^ in4;
+ out6 = in1 ^ in5;
+ out7 = in0 ^ in2 ^ in6;
+ out0 = tmp0 ^ in1 ^ in3;
+ out3 = tmp0 ^ in6;
+ out2 = tmp0 ^ in2 ^ in5;
+ out1 = out5 ^ in1 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B0(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in4;
+ tmp1 = in3 ^ in6;
+ out2 = tmp0 ^ in7;
+ tmp2 = tmp0 ^ tmp1;
+ out0 = tmp2 ^ in5;
+ out3 = tmp2 ^ in2;
+ out6 = out3 ^ in6;
+ tmp3 = out6 ^ in0 ^ in1;
+ out7 = tmp3 ^ in5;
+ out5 = tmp3 ^ out2;
+ out1 = out0 ^ out5 ^ in0;
+ out4 = tmp1 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B1(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in4;
+ out2 = tmp0 ^ in2 ^ in7;
+ tmp1 = out2 ^ in6;
+ out1 = tmp1 ^ in5;
+ out3 = tmp1 ^ in7;
+ out4 = tmp1 ^ in0;
+ out6 = out3 ^ in3;
+ out0 = out6 ^ in0 ^ in2 ^ in5;
+ out5 = tmp1 ^ out0 ^ in1;
+ out7 = tmp0 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B2(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in4;
+ tmp0 = in4 ^ in7;
+ tmp1 = in1 ^ in3 ^ in6;
+ out3 = tmp0 ^ tmp1;
+ tmp2 = tmp1 ^ in0;
+ out0 = out3 ^ in5;
+ out4 = tmp2 ^ in2;
+ tmp3 = out4 ^ in6;
+ out5 = tmp0 ^ tmp3;
+ out1 = tmp3 ^ out0;
+ tmp4 = out1 ^ in7;
+ out7 = tmp4 ^ in3;
+ out6 = tmp2 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B3(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in2 ^ in4;
+ tmp0 = in0 ^ in5;
+ tmp1 = in1 ^ in6;
+ out3 = tmp1 ^ in4 ^ in7;
+ tmp2 = tmp0 ^ out3;
+ out0 = tmp2 ^ in3;
+ out1 = tmp2 ^ in2;
+ out5 = out0 ^ in2 ^ in6;
+ out7 = tmp1 ^ out5;
+ out4 = out7 ^ in1 ^ in5 ^ in7;
+ out6 = tmp0 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B4(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in0 ^ in1;
+ out5 = out4 ^ in2;
+ tmp0 = out4 ^ in4;
+ out6 = out5 ^ in0 ^ in3;
+ out7 = tmp0 ^ out6;
+ out2 = tmp0 ^ in6 ^ in7;
+ out3 = out7 ^ in0 ^ in7;
+ out0 = out5 ^ out7 ^ in5;
+ out1 = out0 ^ out6 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B5(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in2 ^ in4;
+ out4 = tmp0 ^ in4;
+ out3 = tmp1 ^ in7;
+ tmp2 = out4 ^ in5;
+ out7 = out3 ^ in0 ^ in3;
+ out0 = tmp2 ^ in3;
+ out2 = tmp0 ^ out3 ^ in6;
+ out5 = tmp1 ^ tmp2;
+ out6 = out2 ^ out7 ^ in2;
+ out1 = tmp0 ^ out0 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B6(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in3 ^ in4;
+ tmp0 = in1 ^ in2;
+ tmp1 = in0 ^ in4;
+ tmp2 = in3 ^ in5;
+ tmp3 = out3 ^ in1 ^ in7;
+ out5 = tmp0 ^ tmp1;
+ out6 = tmp0 ^ tmp2;
+ out2 = tmp1 ^ in6;
+ out4 = tmp1 ^ tmp3;
+ out0 = tmp3 ^ in5;
+ out1 = out2 ^ in2 ^ in5;
+ out7 = tmp2 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B7(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in4;
+ tmp0 = in0 ^ in4;
+ out2 = tmp0 ^ in2 ^ in6;
+ tmp1 = out2 ^ in7;
+ out1 = out2 ^ in1 ^ in5;
+ out7 = tmp1 ^ in3;
+ out5 = out1 ^ in6;
+ out6 = tmp0 ^ out1 ^ in3;
+ out0 = tmp1 ^ out6;
+ out4 = out0 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B8(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in4;
+ tmp1 = in2 ^ in5;
+ out2 = tmp0 ^ in5;
+ out4 = tmp1 ^ in0;
+ tmp2 = tmp1 ^ in7;
+ out6 = tmp2 ^ out2;
+ out7 = out4 ^ in3;
+ out1 = tmp2 ^ in4;
+ out3 = tmp0 ^ out7;
+ out0 = out3 ^ out4 ^ in6;
+ out5 = out0 ^ in0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_B9(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in2;
+ tmp1 = in4 ^ in5;
+ out4 = tmp0 ^ tmp1;
+ tmp2 = tmp0 ^ in3 ^ in7;
+ out3 = out4 ^ in1;
+ out7 = tmp2 ^ in5;
+ out2 = out3 ^ in0;
+ out1 = out2 ^ in7;
+ out6 = out1 ^ in5 ^ in6;
+ out0 = tmp2 ^ out6;
+ out5 = tmp1 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_BA(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in5 ^ in7;
+ out2 = tmp0 ^ in4;
+ tmp1 = out2 ^ in2;
+ out1 = tmp1 ^ in0;
+ out6 = tmp1 ^ in1;
+ out4 = out1 ^ in3 ^ in4;
+ tmp2 = out4 ^ out6;
+ out7 = out4 ^ in6 ^ in7;
+ out5 = tmp2 ^ in6;
+ out3 = tmp0 ^ tmp2;
+ out0 = out6 ^ out7 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_BB(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in2 ^ in4 ^ in5 ^ in7;
+ tmp0 = out2 ^ in1;
+ out4 = out2 ^ in0 ^ in3;
+ out1 = tmp0 ^ in0;
+ out6 = tmp0 ^ in6;
+ out3 = out1 ^ in2;
+ tmp1 = out4 ^ out6 ^ in4;
+ out0 = tmp1 ^ in7;
+ out5 = tmp1 ^ in5;
+ out7 = tmp0 ^ tmp1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_BC(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in2;
+ tmp1 = in2 ^ in4;
+ out0 = in1 ^ in3 ^ in4;
+ out6 = in1 ^ in2 ^ in7;
+ out7 = tmp0 ^ in3;
+ out5 = tmp0 ^ out6 ^ in6;
+ out1 = tmp1 ^ in5;
+ tmp2 = out1 ^ out5 ^ in1;
+ out3 = tmp2 ^ in3;
+ out4 = tmp1 ^ tmp2;
+ out2 = tmp2 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_BD(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in1 ^ in4;
+ out0 = tmp0 ^ tmp1;
+ out7 = tmp0 ^ in2 ^ in7;
+ out1 = tmp1 ^ in2 ^ in5;
+ tmp2 = out1 ^ in0;
+ out2 = tmp2 ^ in6;
+ out3 = out2 ^ in1 ^ in7;
+ out4 = out3 ^ in2;
+ out5 = tmp1 ^ out4;
+ out6 = tmp2 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_BE(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3 ^ in6;
+ out4 = tmp0 ^ in5;
+ out7 = tmp0 ^ in2;
+ out3 = out4 ^ in4;
+ out1 = out3 ^ out7 ^ in0;
+ out2 = out3 ^ in3 ^ in7;
+ out0 = out2 ^ out4 ^ in1;
+ out5 = tmp0 ^ out0;
+ out6 = out1 ^ out5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_BF(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in4;
+ out3 = tmp0 ^ in5 ^ in6;
+ out4 = out3 ^ in3;
+ tmp1 = out3 ^ in7;
+ out2 = tmp1 ^ in2;
+ out5 = tmp1 ^ in1;
+ tmp2 = out2 ^ in5;
+ out7 = tmp2 ^ in3 ^ in4;
+ tmp3 = tmp0 ^ out5;
+ out0 = tmp3 ^ out4;
+ out1 = tmp2 ^ tmp3;
+ out6 = tmp3 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C0(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out5 = in2 ^ in5;
+ tmp0 = in1 ^ in4;
+ tmp1 = in3 ^ in6;
+ out0 = out5 ^ in1;
+ out4 = tmp0 ^ in7;
+ out3 = tmp0 ^ tmp1;
+ out1 = tmp1 ^ in2;
+ out6 = tmp1 ^ in0;
+ out7 = out4 ^ in0;
+ out2 = out4 ^ out5 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C1(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out5 = in2;
+ tmp0 = in0 ^ in1;
+ out4 = in1 ^ in7;
+ out6 = in0 ^ in3;
+ out3 = in1 ^ in4 ^ in6;
+ tmp1 = tmp0 ^ in2;
+ out7 = tmp0 ^ in4;
+ out0 = tmp1 ^ in5;
+ out1 = tmp1 ^ out6 ^ in6;
+ out2 = out6 ^ out7 ^ in5 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C2(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in1 ^ in3 ^ in4;
+ tmp0 = in0 ^ in3 ^ in6;
+ out5 = in2 ^ in4 ^ in5;
+ tmp1 = out4 ^ in7;
+ out1 = tmp0 ^ in2;
+ out6 = tmp0 ^ in5;
+ out2 = out5 ^ in3;
+ out7 = tmp0 ^ tmp1;
+ out3 = tmp1 ^ in2 ^ in6;
+ out0 = tmp1 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C3(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in1 ^ in3;
+ tmp0 = in0 ^ in2;
+ tmp1 = in3 ^ in5;
+ out5 = in2 ^ in4;
+ tmp2 = tmp0 ^ out4;
+ out2 = tmp1 ^ in4;
+ out6 = tmp1 ^ in0;
+ out0 = tmp1 ^ tmp2 ^ in7;
+ out1 = tmp2 ^ in6;
+ out7 = out1 ^ out5 ^ in3;
+ out3 = tmp0 ^ out7 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C4(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in7;
+ out3 = tmp0 ^ in4;
+ tmp1 = tmp0 ^ in2;
+ out1 = tmp1 ^ in6;
+ out5 = tmp1 ^ in5;
+ out4 = out1 ^ out3 ^ in1;
+ out0 = out4 ^ in4 ^ in5;
+ out2 = out0 ^ out3 ^ in0;
+ out7 = out1 ^ out2 ^ in7;
+ out6 = tmp1 ^ out0 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C5(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in4 ^ in7;
+ tmp0 = in3 ^ in7;
+ out4 = in1 ^ in2 ^ in6;
+ out6 = in0 ^ in3 ^ in4;
+ out5 = tmp0 ^ in2;
+ out1 = tmp0 ^ out4;
+ out0 = out4 ^ in0 ^ in5;
+ out2 = out0 ^ out5 ^ in4;
+ out7 = tmp0 ^ out2 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C6(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in5 ^ in6;
+ tmp1 = in1 ^ in7;
+ tmp2 = tmp0 ^ in0;
+ tmp3 = tmp0 ^ tmp1;
+ tmp4 = tmp2 ^ in4;
+ out0 = tmp3 ^ in2;
+ out6 = tmp4 ^ in3;
+ out2 = out6 ^ in2;
+ out7 = tmp1 ^ tmp4;
+ out3 = tmp2 ^ out2;
+ tmp5 = out3 ^ in5;
+ out5 = tmp5 ^ in7;
+ out4 = tmp3 ^ tmp5;
+ out1 = tmp4 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C7(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in2 ^ in4;
+ tmp0 = in3 ^ in5;
+ tmp1 = out3 ^ in7;
+ out6 = tmp0 ^ in0 ^ in4;
+ out5 = tmp1 ^ in3;
+ out2 = out6 ^ in6;
+ out7 = out2 ^ in1 ^ in3;
+ out0 = tmp1 ^ out7;
+ out1 = tmp0 ^ out0;
+ out4 = out1 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C8(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out0 = in1 ^ in2;
+ out1 = in2 ^ in3;
+ tmp0 = in5 ^ in6;
+ tmp1 = in0 ^ in7;
+ out2 = out1 ^ in1 ^ in4;
+ out4 = tmp0 ^ in4;
+ out5 = tmp0 ^ in7;
+ out6 = tmp1 ^ in6;
+ out7 = tmp1 ^ in1;
+ out3 = out2 ^ in0 ^ in2 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_C9(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in5 ^ in6;
+ out7 = in0 ^ in1;
+ tmp0 = in1 ^ in3;
+ out5 = in6 ^ in7;
+ out6 = in0 ^ in7;
+ out0 = out7 ^ in2;
+ out3 = out7 ^ in4 ^ in5;
+ out1 = tmp0 ^ in2;
+ out2 = tmp0 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_CA(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in7;
+ tmp1 = in2 ^ in7;
+ tmp2 = tmp0 ^ in6;
+ out0 = tmp1 ^ in1;
+ tmp3 = tmp1 ^ in3;
+ out6 = tmp2 ^ in5;
+ out7 = tmp2 ^ in1;
+ out2 = tmp3 ^ in4;
+ out5 = out6 ^ in0 ^ in4;
+ out4 = out5 ^ in3;
+ out1 = tmp0 ^ tmp3;
+ out3 = tmp3 ^ out5 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_CB(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in4 ^ in7;
+ tmp1 = in5 ^ in7;
+ out7 = in0 ^ in1 ^ in6;
+ out5 = tmp0 ^ in6;
+ out2 = tmp0 ^ in3;
+ out6 = tmp1 ^ in0;
+ out4 = tmp1 ^ in3 ^ in6;
+ tmp2 = out5 ^ out7 ^ in2;
+ out1 = tmp2 ^ out2;
+ out0 = tmp2 ^ in4;
+ out3 = tmp2 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_CC(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in5;
+ tmp1 = in1 ^ in6;
+ out1 = in2 ^ in3 ^ in7;
+ out5 = tmp0 ^ in6;
+ out0 = tmp1 ^ in2;
+ tmp2 = out5 ^ in0 ^ in7;
+ out3 = tmp2 ^ in4;
+ out6 = tmp0 ^ out3;
+ out7 = tmp1 ^ tmp2 ^ in3;
+ tmp3 = out1 ^ out6;
+ out4 = tmp2 ^ tmp3;
+ out2 = tmp3 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_CD(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out5 = in3 ^ in6;
+ tmp0 = in0 ^ in1;
+ tmp1 = in2 ^ in7;
+ out6 = in0 ^ in4 ^ in7;
+ out2 = tmp0 ^ out5 ^ in4;
+ out7 = tmp0 ^ in5;
+ out0 = tmp0 ^ in2 ^ in6;
+ out4 = tmp1 ^ in5;
+ out1 = tmp1 ^ in1 ^ in3;
+ out3 = out6 ^ in5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_CE(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in5;
+ tmp1 = tmp0 ^ in3;
+ out4 = tmp1 ^ in4;
+ tmp2 = out4 ^ in6;
+ out3 = tmp2 ^ in0;
+ out5 = tmp2 ^ in2;
+ out2 = out3 ^ in5 ^ in7;
+ out6 = tmp1 ^ out2;
+ out7 = out2 ^ out4 ^ in1;
+ out1 = tmp2 ^ out6;
+ out0 = tmp0 ^ out7 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_CF(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in6;
+ tmp1 = in0 ^ in1 ^ in5;
+ out4 = in2 ^ in3 ^ in5;
+ out5 = tmp0 ^ in4;
+ out7 = tmp1 ^ in6;
+ out1 = tmp1 ^ out4 ^ in7;
+ tmp2 = out5 ^ in0;
+ out2 = tmp2 ^ in7;
+ out3 = tmp2 ^ out4;
+ out6 = tmp0 ^ out2 ^ in5;
+ out0 = tmp0 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D0(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ tmp1 = in1 ^ in4;
+ tmp2 = in2 ^ in5;
+ out7 = tmp0 ^ tmp1;
+ out0 = tmp1 ^ tmp2;
+ tmp3 = tmp2 ^ in3;
+ out1 = tmp3 ^ in6;
+ tmp4 = out1 ^ in1;
+ out2 = tmp4 ^ in7;
+ out3 = out2 ^ in2;
+ out4 = tmp0 ^ out3;
+ out5 = tmp3 ^ out3;
+ out6 = tmp4 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D1(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in5 ^ in6;
+ tmp1 = tmp0 ^ in1;
+ out1 = tmp1 ^ in2;
+ out2 = tmp1 ^ in7;
+ out3 = out2 ^ in3;
+ out5 = out3 ^ in2;
+ tmp2 = out3 ^ in0;
+ out4 = tmp2 ^ in4;
+ out7 = tmp0 ^ out4;
+ out6 = tmp2 ^ out1 ^ in6;
+ out0 = out2 ^ out6 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D2(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in5 ^ in6;
+ out2 = tmp0 ^ in2 ^ in3;
+ out1 = out2 ^ in0;
+ out3 = out2 ^ in1;
+ out4 = out1 ^ in1 ^ in2;
+ out6 = out1 ^ in6 ^ in7;
+ out7 = out4 ^ in4 ^ in5;
+ out5 = out4 ^ out6 ^ in4;
+ out0 = tmp0 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D3(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in3 ^ in5 ^ in6;
+ tmp0 = out2 ^ in2;
+ tmp1 = tmp0 ^ in1;
+ out1 = tmp1 ^ in0;
+ out3 = tmp1 ^ in3;
+ out4 = out1 ^ in2 ^ in4;
+ tmp2 = out4 ^ in5;
+ out7 = tmp2 ^ in7;
+ out0 = tmp0 ^ out7;
+ tmp3 = out0 ^ in0;
+ out5 = tmp3 ^ in6;
+ out6 = tmp2 ^ tmp3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D4(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in3 ^ in5;
+ tmp0 = in1 ^ in5;
+ tmp1 = tmp0 ^ in2;
+ out4 = tmp1 ^ in0;
+ tmp2 = tmp1 ^ in6;
+ out2 = out4 ^ in3 ^ in7;
+ out0 = tmp2 ^ in4;
+ out5 = tmp2 ^ out3;
+ out1 = tmp0 ^ out5 ^ in7;
+ out6 = tmp0 ^ out2 ^ in4;
+ out7 = tmp1 ^ out6 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D5(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in5;
+ tmp0 = in0 ^ in4;
+ tmp1 = tmp0 ^ in1 ^ in5;
+ out4 = tmp1 ^ in2;
+ out0 = out4 ^ in6;
+ tmp2 = tmp0 ^ out0;
+ out5 = tmp2 ^ in3;
+ out1 = out5 ^ in7;
+ out6 = tmp1 ^ out1;
+ out7 = tmp2 ^ out6;
+ out2 = out7 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D6(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2 ^ in4 ^ in6;
+ out5 = tmp0 ^ in3;
+ out0 = tmp0 ^ in5 ^ in7;
+ out3 = out0 ^ out5 ^ in2;
+ tmp1 = out3 ^ in0;
+ out1 = tmp1 ^ in6;
+ out2 = tmp1 ^ in7;
+ out4 = tmp1 ^ in1;
+ out6 = tmp1 ^ in4;
+ out7 = tmp0 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D7(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in3;
+ out3 = in2 ^ in5 ^ in7;
+ out2 = tmp0 ^ in5;
+ tmp1 = tmp0 ^ out3 ^ in1;
+ out1 = tmp1 ^ in6;
+ out4 = tmp1 ^ in4;
+ tmp2 = out1 ^ in4;
+ out6 = tmp2 ^ in1;
+ out7 = tmp2 ^ in2;
+ out0 = tmp2 ^ in3;
+ out5 = tmp2 ^ in0 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D8(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in0;
+ out5 = in1;
+ tmp0 = in1 ^ in2;
+ out6 = in0 ^ in2;
+ out0 = tmp0 ^ in4;
+ tmp1 = tmp0 ^ in3;
+ out7 = tmp1 ^ out6;
+ out2 = tmp1 ^ in6;
+ out3 = out7 ^ in7;
+ out1 = tmp1 ^ in1 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_D9(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in0 ^ in4;
+ out5 = in1 ^ in5;
+ out2 = in1 ^ in3 ^ in6;
+ out3 = in0 ^ in1 ^ in7;
+ out6 = in0 ^ in2 ^ in6;
+ out0 = out4 ^ in1 ^ in2;
+ out1 = out5 ^ in2 ^ in3;
+ out7 = out3 ^ in3;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_DA(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out5 = in1 ^ in4;
+ tmp0 = in2 ^ in7;
+ tmp1 = in0 ^ in2 ^ in3;
+ out0 = tmp0 ^ out5;
+ out4 = tmp0 ^ tmp1;
+ out2 = tmp0 ^ in3 ^ in6;
+ out1 = tmp1 ^ in5;
+ out3 = tmp1 ^ in1;
+ out6 = out1 ^ in3;
+ out7 = out3 ^ in2 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_DB(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in1 ^ in5;
+ tmp2 = in3 ^ in7;
+ out3 = tmp0 ^ in2;
+ out5 = tmp1 ^ in4;
+ out6 = tmp1 ^ out3 ^ in6;
+ out2 = tmp2 ^ in6;
+ tmp3 = tmp2 ^ in4;
+ tmp4 = out3 ^ in3;
+ out4 = tmp3 ^ in0;
+ out1 = tmp4 ^ in5;
+ out0 = tmp3 ^ tmp4;
+ out7 = tmp0 ^ out2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_DC(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in2;
+ tmp1 = in0 ^ in3;
+ out6 = tmp0 ^ in4;
+ tmp2 = tmp0 ^ in7;
+ out3 = tmp1 ^ in6;
+ tmp3 = tmp1 ^ in1;
+ out1 = tmp1 ^ tmp2 ^ in5;
+ out4 = tmp2 ^ in6;
+ out2 = tmp3 ^ in2;
+ out7 = tmp3 ^ in5;
+ out5 = tmp2 ^ out2;
+ out0 = out2 ^ out3 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_DD(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in0 ^ in6;
+ out2 = in0 ^ in1 ^ in3;
+ out6 = out3 ^ in2 ^ in4;
+ out7 = out2 ^ in5 ^ in7;
+ out0 = out6 ^ in1;
+ out4 = out6 ^ in7;
+ out5 = out7 ^ in0;
+ out1 = out5 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_DE(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3 ^ in6;
+ tmp1 = in3 ^ in4 ^ in7;
+ out4 = tmp0 ^ in0;
+ out5 = tmp1 ^ in1;
+ out3 = out4 ^ in7;
+ out2 = out3 ^ in6;
+ out1 = out2 ^ in5;
+ out6 = tmp1 ^ out1;
+ out0 = tmp0 ^ out5;
+ out7 = out0 ^ out1 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_DF(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in0 ^ in3 ^ in7;
+ tmp0 = out2 ^ in1 ^ in5;
+ out1 = tmp0 ^ in2;
+ out7 = tmp0 ^ in6;
+ out5 = tmp0 ^ in0 ^ in4;
+ tmp1 = out1 ^ out5 ^ in6;
+ out4 = tmp1 ^ in3;
+ out6 = tmp1 ^ in5;
+ tmp2 = tmp1 ^ in7;
+ out0 = tmp2 ^ in1;
+ out3 = tmp2 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E0(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in1 ^ in7;
+ tmp0 = in2 ^ in4;
+ out4 = out3 ^ in3 ^ in5;
+ out2 = tmp0 ^ in1;
+ tmp1 = tmp0 ^ in6;
+ out0 = out4 ^ in2;
+ out6 = out4 ^ in0;
+ out1 = tmp1 ^ in3;
+ out5 = tmp1 ^ in0;
+ out7 = out5 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E1(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in1 ^ in4;
+ tmp0 = in1 ^ in7;
+ out3 = tmp0 ^ in3;
+ tmp1 = out3 ^ in5;
+ out4 = tmp1 ^ in4;
+ tmp2 = tmp1 ^ in0;
+ out0 = tmp2 ^ in2;
+ out6 = tmp2 ^ in6;
+ tmp3 = out0 ^ out4 ^ in6;
+ out5 = tmp3 ^ in5;
+ out7 = tmp0 ^ tmp3;
+ out1 = tmp2 ^ out5 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E2(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in1 ^ in2;
+ out4 = in1 ^ in5;
+ out2 = in2 ^ in4 ^ in7;
+ out5 = in0 ^ in2 ^ in6;
+ out0 = out3 ^ in3 ^ in5;
+ out7 = out3 ^ in0 ^ in4;
+ out6 = out2 ^ out7 ^ in3;
+ out1 = out5 ^ in3 ^ in4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E3(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in4 ^ in7;
+ tmp0 = in1 ^ in3;
+ out3 = tmp0 ^ in2;
+ tmp1 = out3 ^ in0;
+ out0 = tmp1 ^ in5;
+ tmp2 = tmp1 ^ in4;
+ out1 = tmp2 ^ in6;
+ tmp3 = tmp2 ^ in3;
+ out7 = tmp3 ^ in7;
+ out6 = out1 ^ out2 ^ in2;
+ tmp4 = tmp0 ^ out0;
+ out5 = tmp4 ^ in6;
+ out4 = tmp3 ^ tmp4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E4(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in6;
+ tmp0 = in0 ^ in4;
+ tmp1 = tmp0 ^ in2 ^ in6;
+ out2 = tmp1 ^ in1;
+ out7 = out2 ^ in5;
+ tmp2 = tmp0 ^ out7;
+ out4 = tmp2 ^ in3;
+ out0 = out4 ^ in7;
+ out6 = tmp1 ^ out0;
+ out5 = tmp2 ^ out6;
+ out1 = out5 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E5(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in3 ^ in6;
+ tmp0 = in0 ^ in1;
+ tmp1 = in5 ^ in7;
+ out2 = tmp0 ^ in4 ^ in6;
+ tmp2 = tmp1 ^ out2;
+ out6 = tmp2 ^ in3;
+ out7 = tmp2 ^ in2;
+ out0 = out6 ^ in2 ^ in4;
+ out5 = out6 ^ in1 ^ in2;
+ out1 = tmp0 ^ out5 ^ in5;
+ out4 = tmp1 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E6(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in2 ^ in6 ^ in7;
+ out2 = out3 ^ in0 ^ in4;
+ out4 = out3 ^ in1 ^ in5;
+ out1 = out2 ^ in3;
+ out7 = out2 ^ out4 ^ in2;
+ out0 = out4 ^ in3 ^ in7;
+ out5 = out1 ^ in4;
+ out6 = out0 ^ out2 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E7(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in3;
+ out3 = tmp0 ^ in6 ^ in7;
+ tmp1 = out3 ^ in0;
+ out5 = tmp1 ^ in5;
+ tmp2 = tmp1 ^ in4;
+ tmp3 = out5 ^ in7;
+ out1 = tmp2 ^ in1;
+ out0 = tmp3 ^ in1;
+ out6 = out1 ^ in2;
+ out2 = tmp0 ^ tmp2;
+ tmp4 = tmp3 ^ out6;
+ out4 = tmp4 ^ in6;
+ out7 = tmp4 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E8(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in3 ^ in6;
+ tmp0 = in4 ^ in7;
+ out1 = in2 ^ in3 ^ in4;
+ out5 = tmp0 ^ in0;
+ tmp1 = tmp0 ^ in1;
+ tmp2 = tmp1 ^ in5;
+ out0 = tmp1 ^ out1;
+ out2 = tmp2 ^ in2;
+ out6 = tmp2 ^ out5;
+ tmp3 = out6 ^ in6;
+ out3 = tmp3 ^ in7;
+ out7 = tmp3 ^ in2 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_E9(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = in3 ^ in6;
+ tmp2 = tmp0 ^ in6;
+ out4 = tmp1 ^ in4;
+ out6 = tmp2 ^ in5;
+ out7 = tmp2 ^ in2 ^ in7;
+ out3 = out6 ^ in3 ^ in7;
+ out0 = tmp1 ^ out7;
+ out2 = out3 ^ out4 ^ in0;
+ out5 = tmp0 ^ out2;
+ out1 = out0 ^ out5 ^ in5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_EA(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in6 ^ in7;
+ out5 = in0 ^ in7;
+ out6 = in0 ^ in1;
+ out0 = in1 ^ in2 ^ in3;
+ out2 = in2 ^ in4 ^ in5;
+ out7 = out6 ^ in2;
+ out1 = out0 ^ out6 ^ in4;
+ out3 = out7 ^ in5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_EB(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in4 ^ in5;
+ tmp0 = in0 ^ in1;
+ out4 = in4 ^ in6 ^ in7;
+ out5 = in0 ^ in5 ^ in7;
+ out6 = tmp0 ^ in6;
+ tmp1 = tmp0 ^ in2;
+ out0 = tmp1 ^ in3;
+ out7 = tmp1 ^ in7;
+ out1 = out0 ^ in4;
+ out3 = out0 ^ in5 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_EC(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out3 = in0 ^ in5;
+ out4 = in2 ^ in3 ^ in7;
+ out5 = in0 ^ in3 ^ in4;
+ out6 = out3 ^ in1 ^ in4;
+ out1 = out4 ^ in4;
+ out0 = out4 ^ in1 ^ in6;
+ out2 = out0 ^ out5 ^ in5;
+ out7 = out2 ^ in4 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_ED(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in2 ^ in4;
+ tmp1 = in3 ^ in5;
+ out4 = tmp0 ^ in3 ^ in7;
+ out3 = tmp1 ^ in0;
+ out1 = out4 ^ in1;
+ out5 = out3 ^ in4;
+ out7 = out1 ^ out5 ^ in6;
+ out2 = tmp0 ^ out7;
+ out0 = tmp1 ^ out7;
+ out6 = out2 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_EE(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in2;
+ tmp0 = in0 ^ in1;
+ out5 = in0 ^ in3;
+ tmp1 = tmp0 ^ in2;
+ out6 = tmp0 ^ in4;
+ tmp2 = tmp1 ^ out5;
+ out7 = tmp1 ^ in5;
+ out1 = tmp2 ^ out6 ^ in7;
+ out0 = tmp2 ^ in6;
+ tmp3 = out7 ^ in1;
+ out3 = tmp3 ^ in7;
+ out2 = tmp3 ^ in4 ^ in6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_EF(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out4 = in2 ^ in4;
+ tmp0 = in0 ^ in5;
+ tmp1 = in4 ^ in6;
+ out5 = tmp0 ^ in3;
+ out2 = tmp0 ^ tmp1;
+ out6 = tmp1 ^ in0 ^ in1;
+ out3 = out5 ^ in2 ^ in7;
+ out7 = out3 ^ in1 ^ in3;
+ out0 = out4 ^ out6 ^ in3;
+ out1 = tmp1 ^ out0 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F0(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in4 ^ in5;
+ out2 = tmp0 ^ in6;
+ out3 = tmp1 ^ in1;
+ tmp2 = tmp1 ^ in7;
+ out1 = out2 ^ out3 ^ in3;
+ tmp3 = tmp0 ^ tmp2;
+ out0 = tmp3 ^ in3;
+ out5 = tmp3 ^ in0;
+ out4 = out1 ^ out5 ^ in4;
+ out7 = out4 ^ in2;
+ out6 = tmp2 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F1(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in1 ^ in6;
+ tmp0 = in3 ^ in5;
+ out3 = tmp0 ^ in1 ^ in4;
+ tmp1 = out3 ^ in2;
+ out1 = tmp1 ^ in6;
+ tmp2 = tmp1 ^ in0;
+ tmp3 = out1 ^ in5;
+ out0 = tmp2 ^ in7;
+ out6 = tmp2 ^ in4;
+ out7 = tmp3 ^ in0;
+ out5 = tmp0 ^ out0;
+ out4 = tmp3 ^ out5 ^ in1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F2(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in4 ^ in5;
+ out2 = in2 ^ in6 ^ in7;
+ tmp1 = tmp0 ^ in1;
+ tmp2 = tmp1 ^ in2;
+ out0 = tmp2 ^ in3;
+ out3 = tmp2 ^ in7;
+ out5 = out3 ^ in0 ^ in4;
+ tmp3 = tmp0 ^ out5;
+ out7 = tmp3 ^ in3;
+ out4 = tmp3 ^ out2;
+ out1 = out0 ^ out4 ^ in4;
+ out6 = tmp1 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F3(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in6 ^ in7;
+ tmp0 = in0 ^ in1;
+ out4 = tmp0 ^ in6;
+ tmp1 = tmp0 ^ in2;
+ out5 = tmp1 ^ in7;
+ out6 = tmp1 ^ in3;
+ out7 = out6 ^ in4;
+ out0 = out7 ^ in5;
+ out1 = out0 ^ in6;
+ out3 = out0 ^ in0 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F4(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in0 ^ in1 ^ in2;
+ tmp0 = out2 ^ in3;
+ out4 = tmp0 ^ in4;
+ out5 = out4 ^ in5;
+ out6 = out5 ^ in6;
+ out7 = out6 ^ in7;
+ out0 = out7 ^ in0;
+ out1 = out0 ^ in1;
+ out3 = tmp0 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F5(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in0 ^ in1;
+ tmp0 = out2 ^ in2;
+ out4 = tmp0 ^ in3;
+ out5 = out4 ^ in4;
+ out6 = out5 ^ in5;
+ out7 = out6 ^ in6;
+ out0 = out7 ^ in7;
+ out1 = out0 ^ in0;
+ out3 = tmp0 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F6(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in7;
+ out2 = tmp0 ^ in2;
+ out4 = out2 ^ in1 ^ in4;
+ out7 = out4 ^ in3 ^ in5;
+ out5 = out7 ^ in4 ^ in7;
+ out0 = tmp0 ^ out7 ^ in6;
+ tmp1 = out0 ^ in1;
+ out6 = out0 ^ in0 ^ in5;
+ out3 = tmp1 ^ in3;
+ out1 = tmp0 ^ tmp1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F7(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in0 ^ in7;
+ tmp0 = out2 ^ in1;
+ out4 = tmp0 ^ in2;
+ out5 = out4 ^ in3 ^ in7;
+ out6 = out5 ^ in4;
+ out7 = out6 ^ in5;
+ out0 = out7 ^ in6;
+ out1 = out0 ^ in7;
+ out3 = tmp0 ^ out1;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F8(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in4;
+ tmp1 = in3 ^ in5;
+ tmp2 = tmp0 ^ in6;
+ out4 = tmp0 ^ tmp1;
+ out1 = tmp1 ^ in2 ^ in4;
+ out3 = tmp2 ^ in1;
+ out5 = out3 ^ in5;
+ out7 = out1 ^ out5 ^ in7;
+ out6 = tmp1 ^ out7;
+ out0 = tmp2 ^ out7;
+ out2 = out6 ^ in0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_F9(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in3 ^ in5;
+ tmp1 = in0 ^ in6;
+ out4 = tmp0 ^ in0;
+ tmp2 = tmp1 ^ in4;
+ tmp3 = tmp1 ^ in2;
+ out5 = tmp2 ^ in1;
+ out3 = out5 ^ in3;
+ tmp4 = tmp3 ^ out3;
+ out1 = tmp4 ^ in5;
+ out0 = tmp4 ^ in0 ^ in7;
+ out6 = tmp0 ^ out0 ^ in4;
+ out7 = tmp2 ^ tmp4;
+ out2 = tmp3 ^ out6;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_FA(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in1;
+ tmp1 = tmp0 ^ in2;
+ tmp2 = tmp0 ^ in5;
+ tmp3 = tmp1 ^ in7;
+ out5 = tmp2 ^ in6;
+ out6 = tmp3 ^ in6;
+ out7 = tmp3 ^ in3;
+ out3 = out6 ^ in4;
+ out2 = tmp1 ^ out5;
+ out4 = out2 ^ out3 ^ in1;
+ out0 = out4 ^ out7 ^ in5;
+ out1 = tmp2 ^ out0;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_FB(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in5 ^ in6;
+ tmp0 = in0 ^ in1;
+ out4 = in0 ^ in5 ^ in7;
+ out5 = tmp0 ^ in6;
+ tmp1 = tmp0 ^ in2;
+ out6 = tmp1 ^ in7;
+ out7 = tmp1 ^ in3;
+ out0 = out7 ^ in4;
+ out1 = out0 ^ in5;
+ out3 = out0 ^ in6 ^ in7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_FC(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in1 ^ in2;
+ tmp1 = in0 ^ in7;
+ out2 = tmp0 ^ tmp1 ^ in5;
+ out3 = tmp1 ^ in4;
+ tmp2 = out2 ^ in6;
+ out6 = tmp2 ^ in4;
+ out7 = tmp2 ^ in3;
+ out4 = out6 ^ in1 ^ in3;
+ tmp3 = out4 ^ in0;
+ out1 = tmp3 ^ in6;
+ out0 = tmp3 ^ in1 ^ in5;
+ out5 = tmp0 ^ out4;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_FD(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in5;
+ tmp1 = in1 ^ in7;
+ out2 = tmp0 ^ tmp1;
+ out6 = out2 ^ in2 ^ in4;
+ tmp2 = out6 ^ in0;
+ out1 = tmp2 ^ in3;
+ out0 = tmp0 ^ out1 ^ in6;
+ out5 = out0 ^ in2;
+ tmp3 = out5 ^ in1;
+ out3 = tmp3 ^ in6;
+ out7 = tmp2 ^ tmp3;
+ out4 = tmp1 ^ out7;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_FE(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3, tmp4;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ tmp0 = in0 ^ in2;
+ out2 = tmp0 ^ in5;
+ out3 = tmp0 ^ in4;
+ tmp1 = out3 ^ in6;
+ out4 = tmp1 ^ in5;
+ tmp2 = tmp1 ^ in1;
+ out6 = tmp2 ^ in7;
+ tmp3 = tmp2 ^ in0;
+ out0 = tmp3 ^ in3;
+ tmp4 = out0 ^ out4 ^ in7;
+ out5 = tmp4 ^ in6;
+ out7 = tmp4 ^ in2;
+ out1 = tmp3 ^ out5;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void
+gf8_muladd_FF(void *out, void *in)
+{
+ unsigned int i;
+ uint64_t *in_ptr = (uint64_t *)in;
+ uint64_t *out_ptr = (uint64_t *)out;
+
+ for (i = 0; i < WIDTH; i++) {
+ uint64_t out0, out1, out2, out3, out4, out5, out6, out7;
+ uint64_t tmp0, tmp1, tmp2, tmp3;
+
+ uint64_t in0 = out_ptr[0];
+ uint64_t in1 = out_ptr[WIDTH];
+ uint64_t in2 = out_ptr[WIDTH * 2];
+ uint64_t in3 = out_ptr[WIDTH * 3];
+ uint64_t in4 = out_ptr[WIDTH * 4];
+ uint64_t in5 = out_ptr[WIDTH * 5];
+ uint64_t in6 = out_ptr[WIDTH * 6];
+ uint64_t in7 = out_ptr[WIDTH * 7];
+
+ out2 = in0 ^ in5;
+ tmp0 = in4 ^ in7;
+ tmp1 = out2 ^ in2;
+ out4 = tmp1 ^ in6;
+ out7 = tmp1 ^ in1 ^ in3;
+ out1 = tmp0 ^ out7;
+ tmp2 = out1 ^ in5;
+ out6 = tmp2 ^ in3;
+ tmp3 = tmp2 ^ in7;
+ out0 = tmp3 ^ in6;
+ out3 = tmp3 ^ in1;
+ out5 = tmp0 ^ out0 ^ in2;
+
+ out_ptr[0] = out0 ^ in_ptr[0];
+ out_ptr[WIDTH] = out1 ^ in_ptr[WIDTH];
+ out_ptr[WIDTH * 2] = out2 ^ in_ptr[WIDTH * 2];
+ out_ptr[WIDTH * 3] = out3 ^ in_ptr[WIDTH * 3];
+ out_ptr[WIDTH * 4] = out4 ^ in_ptr[WIDTH * 4];
+ out_ptr[WIDTH * 5] = out5 ^ in_ptr[WIDTH * 5];
+ out_ptr[WIDTH * 6] = out6 ^ in_ptr[WIDTH * 6];
+ out_ptr[WIDTH * 7] = out7 ^ in_ptr[WIDTH * 7];
+
+ in_ptr++;
+ out_ptr++;
+ }
+}
+
+static void (*gf8_muladd[])(void *out, void *in) = {
+ gf8_muladd_00, gf8_muladd_01, gf8_muladd_02, gf8_muladd_03, gf8_muladd_04,
+ gf8_muladd_05, gf8_muladd_06, gf8_muladd_07, gf8_muladd_08, gf8_muladd_09,
+ gf8_muladd_0A, gf8_muladd_0B, gf8_muladd_0C, gf8_muladd_0D, gf8_muladd_0E,
+ gf8_muladd_0F, gf8_muladd_10, gf8_muladd_11, gf8_muladd_12, gf8_muladd_13,
+ gf8_muladd_14, gf8_muladd_15, gf8_muladd_16, gf8_muladd_17, gf8_muladd_18,
+ gf8_muladd_19, gf8_muladd_1A, gf8_muladd_1B, gf8_muladd_1C, gf8_muladd_1D,
+ gf8_muladd_1E, gf8_muladd_1F, gf8_muladd_20, gf8_muladd_21, gf8_muladd_22,
+ gf8_muladd_23, gf8_muladd_24, gf8_muladd_25, gf8_muladd_26, gf8_muladd_27,
+ gf8_muladd_28, gf8_muladd_29, gf8_muladd_2A, gf8_muladd_2B, gf8_muladd_2C,
+ gf8_muladd_2D, gf8_muladd_2E, gf8_muladd_2F, gf8_muladd_30, gf8_muladd_31,
+ gf8_muladd_32, gf8_muladd_33, gf8_muladd_34, gf8_muladd_35, gf8_muladd_36,
+ gf8_muladd_37, gf8_muladd_38, gf8_muladd_39, gf8_muladd_3A, gf8_muladd_3B,
+ gf8_muladd_3C, gf8_muladd_3D, gf8_muladd_3E, gf8_muladd_3F, gf8_muladd_40,
+ gf8_muladd_41, gf8_muladd_42, gf8_muladd_43, gf8_muladd_44, gf8_muladd_45,
+ gf8_muladd_46, gf8_muladd_47, gf8_muladd_48, gf8_muladd_49, gf8_muladd_4A,
+ gf8_muladd_4B, gf8_muladd_4C, gf8_muladd_4D, gf8_muladd_4E, gf8_muladd_4F,
+ gf8_muladd_50, gf8_muladd_51, gf8_muladd_52, gf8_muladd_53, gf8_muladd_54,
+ gf8_muladd_55, gf8_muladd_56, gf8_muladd_57, gf8_muladd_58, gf8_muladd_59,
+ gf8_muladd_5A, gf8_muladd_5B, gf8_muladd_5C, gf8_muladd_5D, gf8_muladd_5E,
+ gf8_muladd_5F, gf8_muladd_60, gf8_muladd_61, gf8_muladd_62, gf8_muladd_63,
+ gf8_muladd_64, gf8_muladd_65, gf8_muladd_66, gf8_muladd_67, gf8_muladd_68,
+ gf8_muladd_69, gf8_muladd_6A, gf8_muladd_6B, gf8_muladd_6C, gf8_muladd_6D,
+ gf8_muladd_6E, gf8_muladd_6F, gf8_muladd_70, gf8_muladd_71, gf8_muladd_72,
+ gf8_muladd_73, gf8_muladd_74, gf8_muladd_75, gf8_muladd_76, gf8_muladd_77,
+ gf8_muladd_78, gf8_muladd_79, gf8_muladd_7A, gf8_muladd_7B, gf8_muladd_7C,
+ gf8_muladd_7D, gf8_muladd_7E, gf8_muladd_7F, gf8_muladd_80, gf8_muladd_81,
+ gf8_muladd_82, gf8_muladd_83, gf8_muladd_84, gf8_muladd_85, gf8_muladd_86,
+ gf8_muladd_87, gf8_muladd_88, gf8_muladd_89, gf8_muladd_8A, gf8_muladd_8B,
+ gf8_muladd_8C, gf8_muladd_8D, gf8_muladd_8E, gf8_muladd_8F, gf8_muladd_90,
+ gf8_muladd_91, gf8_muladd_92, gf8_muladd_93, gf8_muladd_94, gf8_muladd_95,
+ gf8_muladd_96, gf8_muladd_97, gf8_muladd_98, gf8_muladd_99, gf8_muladd_9A,
+ gf8_muladd_9B, gf8_muladd_9C, gf8_muladd_9D, gf8_muladd_9E, gf8_muladd_9F,
+ gf8_muladd_A0, gf8_muladd_A1, gf8_muladd_A2, gf8_muladd_A3, gf8_muladd_A4,
+ gf8_muladd_A5, gf8_muladd_A6, gf8_muladd_A7, gf8_muladd_A8, gf8_muladd_A9,
+ gf8_muladd_AA, gf8_muladd_AB, gf8_muladd_AC, gf8_muladd_AD, gf8_muladd_AE,
+ gf8_muladd_AF, gf8_muladd_B0, gf8_muladd_B1, gf8_muladd_B2, gf8_muladd_B3,
+ gf8_muladd_B4, gf8_muladd_B5, gf8_muladd_B6, gf8_muladd_B7, gf8_muladd_B8,
+ gf8_muladd_B9, gf8_muladd_BA, gf8_muladd_BB, gf8_muladd_BC, gf8_muladd_BD,
+ gf8_muladd_BE, gf8_muladd_BF, gf8_muladd_C0, gf8_muladd_C1, gf8_muladd_C2,
+ gf8_muladd_C3, gf8_muladd_C4, gf8_muladd_C5, gf8_muladd_C6, gf8_muladd_C7,
+ gf8_muladd_C8, gf8_muladd_C9, gf8_muladd_CA, gf8_muladd_CB, gf8_muladd_CC,
+ gf8_muladd_CD, gf8_muladd_CE, gf8_muladd_CF, gf8_muladd_D0, gf8_muladd_D1,
+ gf8_muladd_D2, gf8_muladd_D3, gf8_muladd_D4, gf8_muladd_D5, gf8_muladd_D6,
+ gf8_muladd_D7, gf8_muladd_D8, gf8_muladd_D9, gf8_muladd_DA, gf8_muladd_DB,
+ gf8_muladd_DC, gf8_muladd_DD, gf8_muladd_DE, gf8_muladd_DF, gf8_muladd_E0,
+ gf8_muladd_E1, gf8_muladd_E2, gf8_muladd_E3, gf8_muladd_E4, gf8_muladd_E5,
+ gf8_muladd_E6, gf8_muladd_E7, gf8_muladd_E8, gf8_muladd_E9, gf8_muladd_EA,
+ gf8_muladd_EB, gf8_muladd_EC, gf8_muladd_ED, gf8_muladd_EE, gf8_muladd_EF,
+ gf8_muladd_F0, gf8_muladd_F1, gf8_muladd_F2, gf8_muladd_F3, gf8_muladd_F4,
+ gf8_muladd_F5, gf8_muladd_F6, gf8_muladd_F7, gf8_muladd_F8, gf8_muladd_F9,
+ gf8_muladd_FA, gf8_muladd_FB, gf8_muladd_FC, gf8_muladd_FD, gf8_muladd_FE,
+ gf8_muladd_FF};
+
+static uint64_t zero[EC_METHOD_WORD_SIZE * 8] = {
+ 0,
+};
+
+void
+ec_code_c_prepare(ec_gf_t *gf, uint32_t *values, uint32_t count)
+{
+ uint32_t i, last, tmp;
+
+ last = 1;
+ for (i = count; i > 0; i--) {
+ if (values[i - 1] != 0) {
+ tmp = values[i - 1];
+ values[i - 1] = ec_gf_div(gf, tmp, last);
+ last = tmp;
+ }
+ }
+}
+
+void
+ec_code_c_linear(void *dst, void *src, uint64_t offset, uint32_t *values,
+ uint32_t count)
+{
+ src += offset;
+ gf8_muladd_00(dst, src);
+ while (--count > 0) {
+ src += EC_METHOD_CHUNK_SIZE;
+ gf8_muladd[*values](dst, src);
+ values++;
+ }
+}
+
+void
+ec_code_c_interleaved(void *dst, void **src, uint64_t offset, uint32_t *values,
+ uint32_t count)
+{
+ uint32_t i, last, tmp;
+
+ i = 0;
+ while ((last = *values++) == 0) {
+ i++;
+ }
+ gf8_muladd_00(dst, src[i++] + offset);
+ while (i < count) {
+ tmp = *values++;
+ if (tmp != 0) {
+ gf8_muladd[last](dst, src[i] + offset);
+ last = tmp;
+ }
+ i++;
+ }
+ gf8_muladd[last](dst, zero);
+}
diff --git a/xlators/cluster/ec/src/ec-code-c.h b/xlators/cluster/ec/src/ec-code-c.h
new file mode 100644
index 00000000000..42b5a064eb8
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-c.h
@@ -0,0 +1,27 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_CODE_C_H__
+#define __EC_CODE_C_H__
+
+#include "ec-types.h"
+
+void
+ec_code_c_prepare(ec_gf_t *gf, uint32_t *values, uint32_t count);
+
+void
+ec_code_c_linear(void *dst, void *src, uint64_t offset, uint32_t *values,
+ uint32_t count);
+
+void
+ec_code_c_interleaved(void *dst, void **src, uint64_t offset, uint32_t *values,
+ uint32_t count);
+
+#endif /* __EC_CODE_C_H__ */
diff --git a/xlators/cluster/ec/src/ec-code-intel.c b/xlators/cluster/ec/src/ec-code-intel.c
new file mode 100644
index 00000000000..f1c4e13e321
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-intel.c
@@ -0,0 +1,594 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <inttypes.h>
+#include <string.h>
+#include <errno.h>
+
+#include "ec-code-intel.h"
+
+static void
+ec_code_intel_init(ec_code_intel_t *intel)
+{
+ memset(intel, 0, sizeof(ec_code_intel_t));
+}
+
+static void
+ec_code_intel_prefix(ec_code_intel_t *intel, uint8_t prefix)
+{
+ intel->prefix.data[intel->prefix.bytes++] = prefix;
+}
+
+static void
+ec_code_intel_rex(ec_code_intel_t *intel, gf_boolean_t w)
+{
+ gf_boolean_t present = _gf_false;
+
+ if (w) {
+ intel->rex.w = 1;
+ present = _gf_true;
+ }
+ if (intel->modrm.present) {
+ if (intel->modrm.reg > 7) {
+ intel->modrm.reg &= 7;
+ intel->rex.r = 1;
+ present = _gf_true;
+ }
+ if (intel->sib.present) {
+ if (intel->sib.index > 7) {
+ intel->sib.index &= 7;
+ intel->rex.x = 1;
+ present = _gf_true;
+ }
+ if (intel->sib.base > 7) {
+ intel->sib.base &= 7;
+ intel->rex.b = 1;
+ present = _gf_true;
+ }
+ } else if (intel->modrm.rm > 7) {
+ intel->modrm.rm &= 7;
+ intel->rex.b = 1;
+ present = _gf_true;
+ }
+ } else if (intel->reg > 7) {
+ intel->reg &= 7;
+ intel->rex.b = 1;
+ present = _gf_true;
+ }
+ intel->rex.present = present;
+}
+
+static void
+ec_code_intel_vex(ec_code_intel_t *intel, gf_boolean_t w, gf_boolean_t l,
+ ec_code_vex_opcode_t opcode, ec_code_vex_prefix_t prefix,
+ uint32_t reg)
+{
+ ec_code_intel_rex(intel, w);
+ if (((intel->rex.w == 1) || (intel->rex.x == 0) || (intel->rex.b == 0)) ||
+ ((opcode != VEX_OPCODE_NONE) && (opcode != VEX_OPCODE_0F))) {
+ intel->rex.present = _gf_false;
+
+ intel->vex.bytes = 3;
+ intel->vex.data[0] = 0xC4;
+ intel->vex.data[1] = ((intel->rex.r << 7) | (intel->rex.x << 6) |
+ (intel->rex.b << 5) | opcode) ^
+ 0xE0;
+ intel->vex.data[2] = (intel->rex.w << 7) | ((~reg & 0x0F) << 3) |
+ (l ? 0x04 : 0x00) | prefix;
+ } else {
+ intel->vex.bytes = 2;
+ intel->vex.data[0] = 0xC5;
+ intel->vex.data[1] = (intel->rex.r << 7) | ((~reg & 0x0F) << 3) |
+ (l ? 0x04 : 0x00) | prefix;
+ }
+}
+
+static void
+ec_code_intel_modrm_reg(ec_code_intel_t *intel, uint32_t rm, uint32_t reg)
+{
+ intel->modrm.present = _gf_true;
+ intel->modrm.mod = 3;
+ intel->modrm.rm = rm;
+ intel->modrm.reg = reg;
+}
+
+static void
+ec_code_intel_modrm_mem(ec_code_intel_t *intel, uint32_t reg,
+ ec_code_intel_reg_t base, ec_code_intel_reg_t index,
+ uint32_t scale, int32_t offset)
+{
+ if (index == REG_SP) {
+ intel->invalid = _gf_true;
+ return;
+ }
+ if ((index != REG_NULL) && (scale != 1) && (scale != 2) && (scale != 4) &&
+ (scale != 8)) {
+ intel->invalid = _gf_true;
+ return;
+ }
+ scale >>= 1;
+ if (scale == 4) {
+ scale = 3;
+ }
+
+ intel->modrm.present = _gf_true;
+ intel->modrm.reg = reg;
+
+ intel->offset.value = offset;
+ if ((offset == 0) && (base != REG_BP)) {
+ intel->modrm.mod = 0;
+ intel->offset.bytes = 0;
+ } else if ((offset >= -128) && (offset <= 127)) {
+ intel->modrm.mod = 1;
+ intel->offset.bytes = 1;
+ } else {
+ intel->modrm.mod = 2;
+ intel->offset.bytes = 4;
+ }
+
+ intel->modrm.rm = base;
+ if ((index != REG_NULL) || (base == REG_SP)) {
+ intel->modrm.rm = 4;
+ intel->sib.present = _gf_true;
+ intel->sib.index = index;
+ if (index == REG_NULL) {
+ intel->sib.index = 4;
+ }
+ intel->sib.scale = scale;
+ intel->sib.base = base;
+ if (base == REG_NULL) {
+ intel->sib.base = 5;
+ intel->modrm.mod = 0;
+ intel->offset.bytes = 4;
+ }
+ } else if (base == REG_NULL) {
+ intel->modrm.mod = 0;
+ intel->modrm.rm = 5;
+ intel->offset.bytes = 4;
+ }
+}
+
+static void
+ec_code_intel_op_1(ec_code_intel_t *intel, uint8_t opcode, uint32_t reg)
+{
+ intel->reg = reg;
+ intel->opcode.bytes = 1;
+ intel->opcode.data[0] = opcode;
+}
+
+static void
+ec_code_intel_op_2(ec_code_intel_t *intel, uint8_t opcode1, uint8_t opcode2,
+ uint32_t reg)
+{
+ intel->reg = reg;
+ intel->opcode.bytes = 2;
+ intel->opcode.data[0] = opcode1;
+ intel->opcode.data[1] = opcode2;
+}
+
+static void
+ec_code_intel_immediate_1(ec_code_intel_t *intel, uint32_t value)
+{
+ intel->immediate.bytes = 1;
+ intel->immediate.value = value;
+}
+
+static void
+ec_code_intel_immediate_2(ec_code_intel_t *intel, uint32_t value)
+{
+ intel->immediate.bytes = 2;
+ intel->immediate.value = value;
+}
+
+static void
+ec_code_intel_immediate_4(ec_code_intel_t *intel, uint32_t value)
+{
+ intel->immediate.bytes = 4;
+ intel->immediate.value = value;
+}
+
+static void
+ec_code_intel_emit(ec_code_builder_t *builder, ec_code_intel_t *intel)
+{
+ uint8_t insn[15];
+ uint32_t i, count;
+
+ if (intel->invalid) {
+ ec_code_error(builder, EINVAL);
+ return;
+ }
+
+ count = 0;
+ for (i = 0; i < intel->prefix.bytes; i++) {
+ insn[count++] = intel->prefix.data[i];
+ }
+ for (i = 0; i < intel->vex.bytes; i++) {
+ insn[count++] = intel->vex.data[i];
+ }
+ if (intel->rex.present) {
+ insn[count++] = 0x40 | (intel->rex.w << 3) | (intel->rex.r << 2) |
+ (intel->rex.x << 1) | (intel->rex.b << 0);
+ }
+ for (i = 0; i < intel->opcode.bytes; i++) {
+ insn[count++] = intel->opcode.data[i];
+ }
+ if (intel->modrm.present) {
+ insn[count++] = (intel->modrm.mod << 6) | (intel->modrm.reg << 3) |
+ (intel->modrm.rm << 0);
+ if (intel->sib.present) {
+ insn[count++] = (intel->sib.scale << 6) | (intel->sib.index << 3) |
+ (intel->sib.base << 0);
+ }
+ }
+ for (i = 0; i < intel->offset.bytes; i++) {
+ insn[count++] = intel->offset.data[i];
+ }
+ for (i = 0; i < intel->immediate.bytes; i++) {
+ insn[count++] = intel->immediate.data[i];
+ }
+
+ ec_code_emit(builder, insn, count);
+}
+
+void
+ec_code_intel_op_push_r(ec_code_builder_t *builder, ec_code_intel_reg_t reg)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_op_1(&intel, 0x50 | (reg & 7), reg);
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_pop_r(ec_code_builder_t *builder, ec_code_intel_reg_t reg)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_op_1(&intel, 0x58 | (reg & 7), reg);
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_ret(ec_code_builder_t *builder, uint32_t size)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ if (size == 0) {
+ ec_code_intel_op_1(&intel, 0xC3, 0);
+ } else {
+ ec_code_intel_immediate_2(&intel, size);
+ ec_code_intel_op_1(&intel, 0xC2, 0);
+ }
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_r2r(ec_code_builder_t *builder, ec_code_intel_reg_t src,
+ ec_code_intel_reg_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_reg(&intel, dst, src);
+ ec_code_intel_op_1(&intel, 0x89, 0);
+ ec_code_intel_rex(&intel, _gf_true);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_r2m(ec_code_builder_t *builder, ec_code_intel_reg_t src,
+ ec_code_intel_reg_t base, ec_code_intel_reg_t index,
+ uint32_t scale, int32_t offset)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_mem(&intel, src, base, index, scale, offset);
+ ec_code_intel_op_1(&intel, 0x89, 0);
+ ec_code_intel_rex(&intel, _gf_true);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_m2r(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, ec_code_intel_reg_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_mem(&intel, dst, base, index, scale, offset);
+ ec_code_intel_op_1(&intel, 0x8B, 0);
+ ec_code_intel_rex(&intel, _gf_true);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_xor_r2r(ec_code_builder_t *builder, ec_code_intel_reg_t src,
+ ec_code_intel_reg_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_reg(&intel, dst, src);
+ ec_code_intel_op_1(&intel, 0x31, 0);
+ ec_code_intel_rex(&intel, _gf_true);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_xor_m2r(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, ec_code_intel_reg_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_mem(&intel, dst, base, index, scale, offset);
+ ec_code_intel_op_1(&intel, 0x33, 0);
+ ec_code_intel_rex(&intel, _gf_true);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_add_i2r(ec_code_builder_t *builder, int32_t value,
+ ec_code_intel_reg_t reg)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ if ((value >= -128) && (value < 128)) {
+ ec_code_intel_modrm_reg(&intel, reg, 0);
+ ec_code_intel_op_1(&intel, 0x83, 0);
+ ec_code_intel_immediate_1(&intel, value);
+ } else {
+ if (reg == REG_AX) {
+ ec_code_intel_op_1(&intel, 0x05, reg);
+ } else {
+ ec_code_intel_modrm_reg(&intel, reg, 0);
+ ec_code_intel_op_1(&intel, 0x81, 0);
+ }
+ ec_code_intel_immediate_4(&intel, value);
+ }
+ ec_code_intel_rex(&intel, _gf_true);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_test_i2r(ec_code_builder_t *builder, uint32_t value,
+ ec_code_intel_reg_t reg)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ if (reg == REG_AX) {
+ ec_code_intel_op_1(&intel, 0xA9, reg);
+ } else {
+ ec_code_intel_modrm_reg(&intel, reg, 0);
+ ec_code_intel_op_1(&intel, 0xF7, 0);
+ }
+ ec_code_intel_immediate_4(&intel, value);
+ ec_code_intel_rex(&intel, _gf_true);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_jne(ec_code_builder_t *builder, uint32_t address)
+{
+ ec_code_intel_t intel;
+ int32_t rel;
+
+ ec_code_intel_init(&intel);
+
+ rel = address - builder->address - 2;
+ if ((rel >= -128) && (rel < 128)) {
+ ec_code_intel_op_1(&intel, 0x75, 0);
+ ec_code_intel_immediate_1(&intel, rel);
+ } else {
+ rel -= 4;
+ ec_code_intel_op_2(&intel, 0x0F, 0x85, 0);
+ ec_code_intel_immediate_4(&intel, rel);
+ }
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_sse2sse(ec_code_builder_t *builder, uint32_t src,
+ uint32_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_prefix(&intel, 0x66);
+ ec_code_intel_modrm_reg(&intel, src, dst);
+ ec_code_intel_op_2(&intel, 0x0F, 0x6F, 0);
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_sse2m(ec_code_builder_t *builder, uint32_t src,
+ ec_code_intel_reg_t base, ec_code_intel_reg_t index,
+ uint32_t scale, int32_t offset)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_prefix(&intel, 0x66);
+ ec_code_intel_modrm_mem(&intel, src, base, index, scale, offset);
+ ec_code_intel_op_2(&intel, 0x0F, 0x7F, 0);
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_m2sse(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, uint32_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_prefix(&intel, 0x66);
+ ec_code_intel_modrm_mem(&intel, dst, base, index, scale, offset);
+ ec_code_intel_op_2(&intel, 0x0F, 0x6F, 0);
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_xor_sse2sse(ec_code_builder_t *builder, uint32_t src,
+ uint32_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_prefix(&intel, 0x66);
+ ec_code_intel_modrm_reg(&intel, src, dst);
+ ec_code_intel_op_2(&intel, 0x0F, 0xEF, 0);
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_xor_m2sse(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, uint32_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_prefix(&intel, 0x66);
+ ec_code_intel_modrm_mem(&intel, dst, base, index, scale, offset);
+ ec_code_intel_op_2(&intel, 0x0F, 0xEF, 0);
+ ec_code_intel_rex(&intel, _gf_false);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_avx2avx(ec_code_builder_t *builder, uint32_t src,
+ uint32_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_reg(&intel, src, dst);
+ ec_code_intel_op_1(&intel, 0x6F, 0);
+ ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, VEX_PREFIX_66,
+ VEX_REG_NONE);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_avx2m(ec_code_builder_t *builder, uint32_t src,
+ ec_code_intel_reg_t base, ec_code_intel_reg_t index,
+ uint32_t scale, int32_t offset)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_mem(&intel, src, base, index, scale, offset);
+ ec_code_intel_op_1(&intel, 0x7F, 0);
+ ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, VEX_PREFIX_66,
+ VEX_REG_NONE);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_mov_m2avx(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, uint32_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_mem(&intel, dst, base, index, scale, offset);
+ ec_code_intel_op_1(&intel, 0x6F, 0);
+ ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, VEX_PREFIX_66,
+ VEX_REG_NONE);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_xor_avx2avx(ec_code_builder_t *builder, uint32_t src,
+ uint32_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_reg(&intel, src, dst);
+ ec_code_intel_op_1(&intel, 0xEF, 0);
+ ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, VEX_PREFIX_66,
+ dst);
+
+ ec_code_intel_emit(builder, &intel);
+}
+
+void
+ec_code_intel_op_xor_m2avx(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, uint32_t dst)
+{
+ ec_code_intel_t intel;
+
+ ec_code_intel_init(&intel);
+
+ ec_code_intel_modrm_mem(&intel, dst, base, index, scale, offset);
+ ec_code_intel_op_1(&intel, 0xEF, 0);
+ ec_code_intel_vex(&intel, _gf_false, _gf_true, VEX_OPCODE_0F, VEX_PREFIX_66,
+ dst);
+
+ ec_code_intel_emit(builder, &intel);
+}
diff --git a/xlators/cluster/ec/src/ec-code-intel.h b/xlators/cluster/ec/src/ec-code-intel.h
new file mode 100644
index 00000000000..3fa4a174765
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-intel.h
@@ -0,0 +1,191 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_CODE_INTEL_H__
+#define __EC_CODE_INTEL_H__
+
+#include "ec-code.h"
+
+#define VEX_REG_NONE 0
+
+enum _ec_code_intel_reg;
+typedef enum _ec_code_intel_reg ec_code_intel_reg_t;
+
+enum _ec_code_vex_prefix;
+typedef enum _ec_code_vex_prefix ec_code_vex_prefix_t;
+
+enum _ec_code_vex_opcode;
+typedef enum _ec_code_vex_opcode ec_code_vex_opcode_t;
+
+struct _ec_code_intel_buffer;
+typedef struct _ec_code_intel_buffer ec_code_intel_buffer_t;
+
+struct _ec_code_intel_sib;
+typedef struct _ec_code_intel_sib ec_code_intel_sib_t;
+
+struct _ec_code_intel_modrm;
+typedef struct _ec_code_intel_modrm ec_code_intel_modrm_t;
+
+struct _ec_code_intel_rex;
+typedef struct _ec_code_intel_rex ec_code_intel_rex_t;
+
+struct _ec_code_intel;
+typedef struct _ec_code_intel ec_code_intel_t;
+
+enum _ec_code_intel_reg {
+ REG_NULL = -1,
+ REG_AX,
+ REG_CX,
+ REG_DX,
+ REG_BX,
+ REG_SP,
+ REG_BP,
+ REG_SI,
+ REG_DI,
+ REG_8,
+ REG_9,
+ REG_10,
+ REG_11,
+ REG_12,
+ REG_13,
+ REG_14,
+ REG_15
+};
+
+enum _ec_code_vex_prefix {
+ VEX_PREFIX_NONE = 0,
+ VEX_PREFIX_66,
+ VEX_PREFIX_F3,
+ VEX_PREFIX_F2
+};
+
+enum _ec_code_vex_opcode {
+ VEX_OPCODE_NONE = 0,
+ VEX_OPCODE_0F,
+ VEX_OPCODE_0F_38,
+ VEX_OPCODE_0F_3A
+};
+
+struct _ec_code_intel_buffer {
+ uint32_t bytes;
+ union {
+ uint8_t data[4];
+ uint32_t value;
+ };
+};
+
+struct _ec_code_intel_sib {
+ gf_boolean_t present;
+ uint32_t base;
+ uint32_t index;
+ uint32_t scale;
+};
+
+struct _ec_code_intel_modrm {
+ gf_boolean_t present;
+ uint32_t mod;
+ uint32_t rm;
+ uint32_t reg;
+};
+
+struct _ec_code_intel_rex {
+ gf_boolean_t present;
+ uint32_t w;
+ uint32_t r;
+ uint32_t x;
+ uint32_t b;
+};
+
+struct _ec_code_intel {
+ gf_boolean_t invalid;
+ ec_code_intel_buffer_t prefix;
+ ec_code_intel_buffer_t opcode;
+ ec_code_intel_buffer_t offset;
+ ec_code_intel_buffer_t immediate;
+ ec_code_intel_buffer_t vex;
+ ec_code_intel_rex_t rex;
+ ec_code_intel_modrm_t modrm;
+ ec_code_intel_sib_t sib;
+ uint32_t reg;
+};
+
+void
+ec_code_intel_op_push_r(ec_code_builder_t *builder, ec_code_intel_reg_t reg);
+void
+ec_code_intel_op_pop_r(ec_code_builder_t *builder, ec_code_intel_reg_t reg);
+void
+ec_code_intel_op_ret(ec_code_builder_t *builder, uint32_t size);
+
+void
+ec_code_intel_op_mov_r2r(ec_code_builder_t *builder, ec_code_intel_reg_t src,
+ ec_code_intel_reg_t dst);
+void
+ec_code_intel_op_mov_r2m(ec_code_builder_t *builder, ec_code_intel_reg_t src,
+ ec_code_intel_reg_t base, ec_code_intel_reg_t index,
+ uint32_t scale, int32_t offset);
+void
+ec_code_intel_op_mov_m2r(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, ec_code_intel_reg_t dst);
+void
+ec_code_intel_op_xor_r2r(ec_code_builder_t *builder, ec_code_intel_reg_t src,
+ ec_code_intel_reg_t dst);
+void
+ec_code_intel_op_xor_m2r(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, ec_code_intel_reg_t dst);
+void
+ec_code_intel_op_add_i2r(ec_code_builder_t *builder, int32_t value,
+ ec_code_intel_reg_t reg);
+void
+ec_code_intel_op_test_i2r(ec_code_builder_t *builder, uint32_t value,
+ ec_code_intel_reg_t reg);
+void
+ec_code_intel_op_jne(ec_code_builder_t *builder, uint32_t address);
+
+void
+ec_code_intel_op_mov_sse2sse(ec_code_builder_t *builder, uint32_t src,
+ uint32_t dst);
+void
+ec_code_intel_op_mov_sse2m(ec_code_builder_t *builder, uint32_t src,
+ ec_code_intel_reg_t base, ec_code_intel_reg_t index,
+ uint32_t scale, int32_t offset);
+void
+ec_code_intel_op_mov_m2sse(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, uint32_t dst);
+void
+ec_code_intel_op_xor_sse2sse(ec_code_builder_t *builder, uint32_t src,
+ uint32_t dst);
+void
+ec_code_intel_op_xor_m2sse(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, uint32_t dst);
+
+void
+ec_code_intel_op_mov_avx2avx(ec_code_builder_t *builder, uint32_t src,
+ uint32_t dst);
+void
+ec_code_intel_op_mov_avx2m(ec_code_builder_t *builder, uint32_t src,
+ ec_code_intel_reg_t base, ec_code_intel_reg_t index,
+ uint32_t scale, int32_t offset);
+void
+ec_code_intel_op_mov_m2avx(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, uint32_t dst);
+void
+ec_code_intel_op_xor_avx2avx(ec_code_builder_t *builder, uint32_t src,
+ uint32_t dst);
+void
+ec_code_intel_op_xor_m2avx(ec_code_builder_t *builder, ec_code_intel_reg_t base,
+ ec_code_intel_reg_t index, uint32_t scale,
+ int32_t offset, uint32_t dst);
+
+#endif /* __EC_CODE_INTEL_H__ */
diff --git a/xlators/cluster/ec/src/ec-code-sse.c b/xlators/cluster/ec/src/ec-code-sse.c
new file mode 100644
index 00000000000..e11e7ff8400
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-sse.c
@@ -0,0 +1,101 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <errno.h>
+
+#include "ec-code-intel.h"
+
+static void
+ec_code_sse_prolog(ec_code_builder_t *builder)
+{
+ builder->loop = builder->address;
+}
+
+static void
+ec_code_sse_epilog(ec_code_builder_t *builder)
+{
+ ec_code_intel_op_add_i2r(builder, 16, REG_DX);
+ ec_code_intel_op_add_i2r(builder, 16, REG_DI);
+ ec_code_intel_op_test_i2r(builder, builder->width - 1, REG_DX);
+ ec_code_intel_op_jne(builder, builder->loop);
+
+ ec_code_intel_op_ret(builder, 0);
+}
+
+static void
+ec_code_sse_load(ec_code_builder_t *builder, uint32_t dst, uint32_t idx,
+ uint32_t bit)
+{
+ if (builder->linear) {
+ ec_code_intel_op_mov_m2sse(
+ builder, REG_SI, REG_DX, 1,
+ idx * builder->width * builder->bits + bit * builder->width, dst);
+ } else {
+ if (builder->base != idx) {
+ ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8,
+ REG_AX);
+ builder->base = idx;
+ }
+ ec_code_intel_op_mov_m2sse(builder, REG_AX, REG_DX, 1,
+ bit * builder->width, dst);
+ }
+}
+
+static void
+ec_code_sse_store(ec_code_builder_t *builder, uint32_t src, uint32_t bit)
+{
+ ec_code_intel_op_mov_sse2m(builder, src, REG_DI, REG_NULL, 0,
+ bit * builder->width);
+}
+
+static void
+ec_code_sse_copy(ec_code_builder_t *builder, uint32_t dst, uint32_t src)
+{
+ ec_code_intel_op_mov_sse2sse(builder, src, dst);
+}
+
+static void
+ec_code_sse_xor2(ec_code_builder_t *builder, uint32_t dst, uint32_t src)
+{
+ ec_code_intel_op_xor_sse2sse(builder, src, dst);
+}
+
+static void
+ec_code_sse_xorm(ec_code_builder_t *builder, uint32_t dst, uint32_t idx,
+ uint32_t bit)
+{
+ if (builder->linear) {
+ ec_code_intel_op_xor_m2sse(
+ builder, REG_SI, REG_DX, 1,
+ idx * builder->width * builder->bits + bit * builder->width, dst);
+ } else {
+ if (builder->base != idx) {
+ ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8,
+ REG_AX);
+ builder->base = idx;
+ }
+ ec_code_intel_op_xor_m2sse(builder, REG_AX, REG_DX, 1,
+ bit * builder->width, dst);
+ }
+}
+
+static char *ec_code_sse_needed_flags[] = {"sse2", NULL};
+
+ec_code_gen_t ec_code_gen_sse = {.name = "sse",
+ .flags = ec_code_sse_needed_flags,
+ .width = 16,
+ .prolog = ec_code_sse_prolog,
+ .epilog = ec_code_sse_epilog,
+ .load = ec_code_sse_load,
+ .store = ec_code_sse_store,
+ .copy = ec_code_sse_copy,
+ .xor2 = ec_code_sse_xor2,
+ .xor3 = NULL,
+ .xorm = ec_code_sse_xorm};
diff --git a/xlators/cluster/ec/src/ec-code-sse.h b/xlators/cluster/ec/src/ec-code-sse.h
new file mode 100644
index 00000000000..f1acbcf894b
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-sse.h
@@ -0,0 +1,18 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_CODE_SSE_H__
+#define __EC_CODE_SSE_H__
+
+#include "ec-code.h"
+
+extern ec_code_gen_t ec_code_gen_sse;
+
+#endif /* __EC_CODE_SSE_H__ */
diff --git a/xlators/cluster/ec/src/ec-code-x64.c b/xlators/cluster/ec/src/ec-code-x64.c
new file mode 100644
index 00000000000..26565b4493f
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-x64.c
@@ -0,0 +1,144 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <errno.h>
+
+#include "ec-code-intel.h"
+
+static ec_code_intel_reg_t ec_code_x64_regmap[] = {
+ REG_AX, REG_CX, REG_BP, REG_8, REG_9, REG_10,
+ REG_11, REG_12, REG_13, REG_14, REG_15};
+
+static void
+ec_code_x64_prolog(ec_code_builder_t *builder)
+{
+ uint32_t i;
+
+ ec_code_intel_op_push_r(builder, REG_BP);
+ if (!builder->linear) {
+ ec_code_intel_op_push_r(builder, REG_BX);
+ }
+ if (builder->regs > 11) {
+ ec_code_error(builder, EINVAL);
+ return;
+ }
+ for (i = 7; i < builder->regs; i++) {
+ ec_code_intel_op_push_r(builder, ec_code_x64_regmap[i]);
+ }
+
+ builder->loop = builder->address;
+}
+
+static void
+ec_code_x64_epilog(ec_code_builder_t *builder)
+{
+ uint32_t i;
+
+ ec_code_intel_op_add_i2r(builder, 8, REG_DX);
+ ec_code_intel_op_add_i2r(builder, 8, REG_DI);
+ ec_code_intel_op_test_i2r(builder, builder->width - 1, REG_DX);
+ ec_code_intel_op_jne(builder, builder->loop);
+
+ if (builder->regs > 11) {
+ ec_code_error(builder, EINVAL);
+ return;
+ }
+ for (i = builder->regs; i > 7; i--) {
+ ec_code_intel_op_pop_r(builder, ec_code_x64_regmap[i - 1]);
+ }
+ if (!builder->linear) {
+ ec_code_intel_op_pop_r(builder, REG_BX);
+ }
+ ec_code_intel_op_pop_r(builder, REG_BP);
+ ec_code_intel_op_ret(builder, 0);
+}
+
+static void
+ec_code_x64_load(ec_code_builder_t *builder, uint32_t dst, uint32_t idx,
+ uint32_t bit)
+{
+ dst = ec_code_x64_regmap[dst];
+
+ if (builder->linear) {
+ ec_code_intel_op_mov_m2r(
+ builder, REG_SI, REG_DX, 1,
+ idx * builder->width * builder->bits + bit * builder->width, dst);
+ } else {
+ if (builder->base != idx) {
+ ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8,
+ REG_BX);
+ builder->base = idx;
+ }
+ ec_code_intel_op_mov_m2r(builder, REG_BX, REG_DX, 1,
+ bit * builder->width, dst);
+ }
+}
+
+static void
+ec_code_x64_store(ec_code_builder_t *builder, uint32_t src, uint32_t bit)
+{
+ src = ec_code_x64_regmap[src];
+
+ ec_code_intel_op_mov_r2m(builder, src, REG_DI, REG_NULL, 0,
+ bit * builder->width);
+}
+
+static void
+ec_code_x64_copy(ec_code_builder_t *builder, uint32_t dst, uint32_t src)
+{
+ dst = ec_code_x64_regmap[dst];
+ src = ec_code_x64_regmap[src];
+
+ ec_code_intel_op_mov_r2r(builder, src, dst);
+}
+
+static void
+ec_code_x64_xor2(ec_code_builder_t *builder, uint32_t dst, uint32_t src)
+{
+ dst = ec_code_x64_regmap[dst];
+ src = ec_code_x64_regmap[src];
+
+ ec_code_intel_op_xor_r2r(builder, src, dst);
+}
+
+static void
+ec_code_x64_xorm(ec_code_builder_t *builder, uint32_t dst, uint32_t idx,
+ uint32_t bit)
+{
+ dst = ec_code_x64_regmap[dst];
+
+ if (builder->linear) {
+ ec_code_intel_op_xor_m2r(
+ builder, REG_SI, REG_DX, 1,
+ idx * builder->width * builder->bits + bit * builder->width, dst);
+ } else {
+ if (builder->base != idx) {
+ ec_code_intel_op_mov_m2r(builder, REG_SI, REG_NULL, 0, idx * 8,
+ REG_BX);
+ builder->base = idx;
+ }
+ ec_code_intel_op_xor_m2r(builder, REG_BX, REG_DX, 1,
+ bit * builder->width, dst);
+ }
+}
+
+static char *ec_code_x64_needed_flags[] = {NULL};
+
+ec_code_gen_t ec_code_gen_x64 = {.name = "x64",
+ .flags = ec_code_x64_needed_flags,
+ .width = sizeof(uint64_t),
+ .prolog = ec_code_x64_prolog,
+ .epilog = ec_code_x64_epilog,
+ .load = ec_code_x64_load,
+ .store = ec_code_x64_store,
+ .copy = ec_code_x64_copy,
+ .xor2 = ec_code_x64_xor2,
+ .xor3 = NULL,
+ .xorm = ec_code_x64_xorm};
diff --git a/xlators/cluster/ec/src/ec-code-x64.h b/xlators/cluster/ec/src/ec-code-x64.h
new file mode 100644
index 00000000000..bd8174e4bf5
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code-x64.h
@@ -0,0 +1,18 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_CODE_X64_H__
+#define __EC_CODE_X64_H__
+
+#include "ec-code.h"
+
+extern ec_code_gen_t ec_code_gen_x64;
+
+#endif /* __EC_CODE_X64_H__ */
diff --git a/xlators/cluster/ec/src/ec-code.c b/xlators/cluster/ec/src/ec-code.c
new file mode 100644
index 00000000000..03162ae05a9
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code.c
@@ -0,0 +1,1060 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <ctype.h>
+
+#include <glusterfs/syscall.h>
+
+#include "ec-mem-types.h"
+#include "ec-code.h"
+#include "ec-messages.h"
+#include "ec-code-c.h"
+#include "ec-helpers.h"
+
+#ifdef USE_EC_DYNAMIC_X64
+#include "ec-code-x64.h"
+#endif
+
+#ifdef USE_EC_DYNAMIC_SSE
+#include "ec-code-sse.h"
+#endif
+
+#ifdef USE_EC_DYNAMIC_AVX
+#include "ec-code-avx.h"
+#endif
+
+#define EC_CODE_SIZE (1024 * 64)
+#define EC_CODE_ALIGN 4096
+
+#define EC_CODE_CHUNK_MIN_SIZE 512
+
+#define EC_PROC_BUFFER_SIZE 4096
+
+#define PROC_CPUINFO "/proc/cpuinfo"
+
+struct _ec_code_proc;
+typedef struct _ec_code_proc ec_code_proc_t;
+
+struct _ec_code_proc {
+ int32_t fd;
+ gf_boolean_t eof;
+ gf_boolean_t error;
+ gf_boolean_t skip;
+ ssize_t size;
+ ssize_t pos;
+ char buffer[EC_PROC_BUFFER_SIZE];
+};
+
+static ec_code_gen_t *ec_code_gen_table[] = {
+#ifdef USE_EC_DYNAMIC_AVX
+ &ec_code_gen_avx,
+#endif
+#ifdef USE_EC_DYNAMIC_SSE
+ &ec_code_gen_sse,
+#endif
+#ifdef USE_EC_DYNAMIC_X64
+ &ec_code_gen_x64,
+#endif
+ NULL};
+
+static void
+ec_code_arg_set(ec_code_arg_t *arg, uint32_t value)
+{
+ arg->value = value;
+}
+
+static void
+ec_code_arg_assign(ec_code_builder_t *builder, ec_code_op_t *op,
+ ec_code_arg_t *arg, uint32_t reg)
+{
+ arg->value = reg;
+
+ if (builder->regs <= reg) {
+ builder->regs = reg + 1;
+ }
+}
+
+static void
+ec_code_arg_use(ec_code_builder_t *builder, ec_code_op_t *op,
+ ec_code_arg_t *arg, uint32_t reg)
+{
+ arg->value = reg;
+}
+
+static void
+ec_code_arg_update(ec_code_builder_t *builder, ec_code_op_t *op,
+ ec_code_arg_t *arg, uint32_t reg)
+{
+ arg->value = reg;
+}
+
+static ec_code_op_t *
+ec_code_op_next(ec_code_builder_t *builder)
+{
+ ec_code_op_t *op;
+
+ op = &builder->ops[builder->count++];
+ memset(op, 0, sizeof(ec_code_op_t));
+
+ return op;
+}
+
+static void
+ec_code_load(ec_code_builder_t *builder, uint32_t bit, uint32_t offset)
+{
+ ec_code_op_t *op;
+
+ op = ec_code_op_next(builder);
+
+ op->op = EC_GF_OP_LOAD;
+ ec_code_arg_assign(builder, op, &op->arg1, builder->map[bit]);
+ ec_code_arg_set(&op->arg2, offset);
+ ec_code_arg_set(&op->arg3, bit);
+}
+
+static void
+ec_code_store(ec_code_builder_t *builder, uint32_t reg, uint32_t bit)
+{
+ ec_code_op_t *op;
+
+ op = ec_code_op_next(builder);
+
+ op->op = EC_GF_OP_STORE;
+ ec_code_arg_use(builder, op, &op->arg1, builder->map[reg]);
+ ec_code_arg_set(&op->arg2, 0);
+ ec_code_arg_set(&op->arg3, bit);
+}
+
+static void
+ec_code_copy(ec_code_builder_t *builder, uint32_t dst, uint32_t src)
+{
+ ec_code_op_t *op;
+
+ op = ec_code_op_next(builder);
+
+ op->op = EC_GF_OP_COPY;
+ ec_code_arg_assign(builder, op, &op->arg1, builder->map[dst]);
+ ec_code_arg_use(builder, op, &op->arg2, builder->map[src]);
+ ec_code_arg_set(&op->arg3, 0);
+}
+
+static void
+ec_code_xor2(ec_code_builder_t *builder, uint32_t dst, uint32_t src)
+{
+ ec_code_op_t *op;
+
+ op = ec_code_op_next(builder);
+
+ op->op = EC_GF_OP_XOR2;
+ ec_code_arg_update(builder, op, &op->arg1, builder->map[dst]);
+ ec_code_arg_use(builder, op, &op->arg2, builder->map[src]);
+ ec_code_arg_set(&op->arg3, 0);
+}
+
+static void
+ec_code_xor3(ec_code_builder_t *builder, uint32_t dst, uint32_t src1,
+ uint32_t src2)
+{
+ ec_code_op_t *op;
+
+ if (builder->code->gen->xor3 == NULL) {
+ ec_code_copy(builder, dst, src1);
+ ec_code_xor2(builder, dst, src2);
+
+ return;
+ }
+
+ op = ec_code_op_next(builder);
+
+ op->op = EC_GF_OP_XOR3;
+ ec_code_arg_assign(builder, op, &op->arg1, builder->map[dst]);
+ ec_code_arg_use(builder, op, &op->arg2, builder->map[src1]);
+ ec_code_arg_use(builder, op, &op->arg3, builder->map[src2]);
+}
+
+static void
+ec_code_xorm(ec_code_builder_t *builder, uint32_t bit, uint32_t offset)
+{
+ ec_code_op_t *op;
+
+ op = ec_code_op_next(builder);
+
+ op->op = EC_GF_OP_XORM;
+ ec_code_arg_update(builder, op, &op->arg1, builder->map[bit]);
+ ec_code_arg_set(&op->arg2, offset);
+ ec_code_arg_set(&op->arg3, bit);
+}
+
+static void
+ec_code_dup(ec_code_builder_t *builder, ec_gf_op_t *op)
+{
+ switch (op->op) {
+ case EC_GF_OP_COPY:
+ ec_code_copy(builder, op->arg1, op->arg2);
+ break;
+ case EC_GF_OP_XOR2:
+ ec_code_xor2(builder, op->arg1, op->arg2);
+ break;
+ case EC_GF_OP_XOR3:
+ ec_code_xor3(builder, op->arg1, op->arg2, op->arg3);
+ break;
+ default:
+ break;
+ }
+}
+
+static void
+ec_code_gf_load(ec_code_builder_t *builder, uint32_t offset)
+{
+ uint32_t i;
+
+ for (i = 0; i < builder->code->gf->bits; i++) {
+ ec_code_load(builder, i, offset);
+ }
+}
+
+static void
+ec_code_gf_load_xor(ec_code_builder_t *builder, uint32_t offset)
+{
+ uint32_t i;
+
+ for (i = 0; i < builder->code->gf->bits; i++) {
+ ec_code_xorm(builder, i, offset);
+ }
+}
+
+static void
+ec_code_gf_store(ec_code_builder_t *builder)
+{
+ uint32_t i;
+
+ for (i = 0; i < builder->code->gf->bits; i++) {
+ ec_code_store(builder, i, i);
+ }
+}
+
+static void
+ec_code_gf_clear(ec_code_builder_t *builder)
+{
+ uint32_t i;
+
+ ec_code_xor2(builder, 0, 0);
+ for (i = 0; i < builder->code->gf->bits; i++) {
+ ec_code_store(builder, 0, i);
+ }
+}
+
+static void
+ec_code_gf_mul(ec_code_builder_t *builder, uint32_t value)
+{
+ ec_gf_mul_t *mul;
+ ec_gf_op_t *op;
+ uint32_t map[EC_GF_MAX_REGS];
+ int32_t i;
+
+ mul = builder->code->gf->table[value];
+ for (op = mul->ops; op->op != EC_GF_OP_END; op++) {
+ ec_code_dup(builder, op);
+ }
+
+ for (i = 0; i < mul->regs; i++) {
+ map[i] = builder->map[mul->map[i]];
+ }
+ memcpy(builder->map, map, sizeof(uint32_t) * mul->regs);
+}
+
+static ec_code_builder_t *
+ec_code_prepare(ec_code_t *code, uint32_t count, uint32_t width,
+ gf_boolean_t linear)
+{
+ ec_code_builder_t *builder;
+ uint32_t i;
+
+ count *= code->gf->bits + code->gf->max_ops;
+ count += code->gf->bits;
+ builder = GF_MALLOC(
+ sizeof(ec_code_builder_t) + sizeof(ec_code_op_t) * count,
+ ec_mt_ec_code_builder_t);
+ if (builder == NULL) {
+ return EC_ERR(ENOMEM);
+ }
+
+ builder->address = 0;
+ builder->code = code;
+ builder->size = 0;
+ builder->count = 0;
+ builder->regs = 0;
+ builder->error = 0;
+ builder->bits = code->gf->bits;
+ builder->width = width;
+ builder->data = NULL;
+ builder->linear = linear;
+ builder->base = -1;
+
+ for (i = 0; i < EC_GF_MAX_REGS; i++) {
+ builder->map[i] = i;
+ }
+
+ return builder;
+}
+
+static size_t
+ec_code_space_size(void)
+{
+ return (sizeof(ec_code_space_t) + 15) & ~15;
+}
+
+static size_t
+ec_code_chunk_size(void)
+{
+ return (sizeof(ec_code_chunk_t) + 15) & ~15;
+}
+
+static ec_code_chunk_t *
+ec_code_chunk_from_space(ec_code_space_t *space)
+{
+ return (ec_code_chunk_t *)((uintptr_t)space + ec_code_space_size());
+}
+
+static void *
+ec_code_to_executable(ec_code_space_t *space, void *addr)
+{
+ return (void *)((uintptr_t)addr - (uintptr_t)space +
+ (uintptr_t)space->exec);
+}
+
+static void *
+ec_code_from_executable(ec_code_space_t *space, void *addr)
+{
+ return (void *)((uintptr_t)addr - (uintptr_t)space->exec +
+ (uintptr_t)space);
+}
+
+static void *
+ec_code_func_from_chunk(ec_code_chunk_t *chunk, void **exec)
+{
+ void *addr;
+
+ addr = (void *)((uintptr_t)chunk + ec_code_chunk_size());
+
+ *exec = ec_code_to_executable(chunk->space, addr);
+
+ return addr;
+}
+
+static ec_code_chunk_t *
+ec_code_chunk_from_func(ec_code_func_linear_t func)
+{
+ ec_code_chunk_t *chunk;
+
+ chunk = (ec_code_chunk_t *)((uintptr_t)func - ec_code_chunk_size());
+
+ return ec_code_from_executable(chunk->space, chunk);
+}
+
+static ec_code_chunk_t *
+ec_code_chunk_split(ec_code_chunk_t *chunk, size_t size)
+{
+ ec_code_chunk_t *extra;
+ ssize_t avail;
+
+ avail = chunk->size - size - ec_code_chunk_size();
+ if (avail > 0) {
+ extra = (ec_code_chunk_t *)((uintptr_t)chunk + chunk->size - avail);
+ extra->space = chunk->space;
+ extra->size = avail;
+ list_add(&extra->list, &chunk->list);
+ chunk->size = size;
+ }
+ list_del_init(&chunk->list);
+
+ return chunk;
+}
+
+static gf_boolean_t
+ec_code_chunk_touch(ec_code_chunk_t *prev, ec_code_chunk_t *next)
+{
+ uintptr_t end;
+
+ end = (uintptr_t)prev + ec_code_chunk_size() + prev->size;
+ return (end == (uintptr_t)next);
+}
+
+static ec_code_space_t *
+ec_code_space_create(ec_code_t *code, size_t size)
+{
+ char path[] = GLUSTERFS_LIBEXECDIR "/ec-code-dynamic.XXXXXX";
+ ec_code_space_t *space;
+ void *exec;
+ int32_t fd, err;
+
+ /* We need to create memory areas to store the generated dynamic code.
+ * Obviously these areas need to be written to be able to create the
+ * code and they also need to be executable to execute it.
+ *
+ * However it's a bad practice to have a memory region that is both
+ * writable *and* executable. In fact, selinux forbids this and causes
+ * attempts to do so to fail (unless specifically configured).
+ *
+ * To solve the problem we'll use two distinct memory areas mapped to
+ * the same physical storage. One of the memory areas will have write
+ * permission, and the other will have execute permission. Both areas
+ * will have the same contents. The physical storage will be a regular
+ * file that will be mmapped to both areas.
+ */
+
+ /* We need to create a temporary file as the backend storage for the
+ * memory mapped areas. */
+ /* coverity[secure_temp] mkstemp uses 0600 as the mode and is safe */
+ fd = mkstemp(path);
+ if (fd < 0) {
+ err = errno;
+ gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED,
+ "Unable to create a temporary file for the ec dynamic "
+ "code");
+ space = EC_ERR(err);
+ goto done;
+ }
+ /* Once created we don't need to keep it in the file system. It will
+ * still exist until we close the last file descriptor or unmap the
+ * memory areas bound to the file. */
+ sys_unlink(path);
+
+ size = (size + EC_CODE_ALIGN - 1) & ~(EC_CODE_ALIGN - 1);
+ if (sys_ftruncate(fd, size) < 0) {
+ err = errno;
+ gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED,
+ "Unable to resize the file for the ec dynamic code");
+ space = EC_ERR(err);
+ goto done_close;
+ }
+
+ /* This creates an executable memory area to be able to run the
+ * generated fragments of code. */
+ exec = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
+ if (exec == MAP_FAILED) {
+ err = errno;
+ gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED,
+ "Unable to map the executable area for the ec dynamic "
+ "code");
+ space = EC_ERR(err);
+ goto done_close;
+ }
+ /* It's not important to check the return value of mlock(). If it fails
+ * everything will continue to work normally. */
+ mlock(exec, size);
+
+ /* This maps a read/write memory area to be able to create the dynamici
+ * code. */
+ space = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (space == MAP_FAILED) {
+ err = errno;
+ gf_msg(THIS->name, GF_LOG_ERROR, err, EC_MSG_DYN_CREATE_FAILED,
+ "Unable to map the writable area for the ec dynamic "
+ "code");
+ space = EC_ERR(err);
+
+ munmap(exec, size);
+
+ goto done_close;
+ }
+
+ space->exec = exec;
+ space->size = size;
+ space->code = code;
+ list_add_tail(&space->list, &code->spaces);
+ INIT_LIST_HEAD(&space->chunks);
+
+done_close:
+ /* If everything has succeeded, we already have the memory areas
+ * mapped. We don't need the file descriptor anymore because the
+ * backend storage will be there until the mmap()'d regions are
+ * unmapped. */
+ sys_close(fd);
+done:
+ return space;
+}
+
+static void
+ec_code_space_destroy(ec_code_space_t *space)
+{
+ list_del_init(&space->list);
+
+ munmap(space->exec, space->size);
+ munmap(space, space->size);
+}
+
+static void
+ec_code_chunk_merge(ec_code_chunk_t *chunk)
+{
+ ec_code_chunk_t *item, *tmp;
+
+ list_for_each_entry_safe(item, tmp, &chunk->space->chunks, list)
+ {
+ if ((uintptr_t)item > (uintptr_t)chunk) {
+ list_add_tail(&chunk->list, &item->list);
+ if (ec_code_chunk_touch(chunk, item)) {
+ chunk->size += item->size + ec_code_chunk_size();
+ list_del_init(&item->list);
+ }
+
+ goto check;
+ }
+ if (ec_code_chunk_touch(item, chunk)) {
+ item->size += chunk->size + ec_code_chunk_size();
+ list_del_init(&item->list);
+ chunk = item;
+ }
+ }
+ list_add_tail(&chunk->list, &chunk->space->chunks);
+
+check:
+ if (chunk->size ==
+ chunk->space->size - ec_code_space_size() - ec_code_chunk_size()) {
+ ec_code_space_destroy(chunk->space);
+ }
+}
+
+static ec_code_chunk_t *
+ec_code_space_alloc(ec_code_t *code, size_t size)
+{
+ ec_code_space_t *space;
+ ec_code_chunk_t *chunk;
+ size_t map_size;
+
+ /* To minimize fragmentation, we only allocate chunks of sizes multiples
+ * of EC_CODE_CHUNK_MIN_SIZE. */
+ size = ((size + ec_code_chunk_size() + EC_CODE_CHUNK_MIN_SIZE - 1) &
+ ~(EC_CODE_CHUNK_MIN_SIZE - 1)) -
+ ec_code_chunk_size();
+ list_for_each_entry(space, &code->spaces, list)
+ {
+ list_for_each_entry(chunk, &space->chunks, list)
+ {
+ if (chunk->size >= size) {
+ goto out;
+ }
+ }
+ }
+
+ map_size = EC_CODE_SIZE - ec_code_space_size() - ec_code_chunk_size();
+ if (map_size < size) {
+ map_size = size;
+ }
+ space = ec_code_space_create(code, map_size);
+ if (EC_IS_ERR(space)) {
+ return (ec_code_chunk_t *)space;
+ }
+
+ chunk = ec_code_chunk_from_space(space);
+ chunk->size = map_size - ec_code_space_size() - ec_code_chunk_size();
+ list_add(&chunk->list, &space->chunks);
+
+out:
+ chunk->space = space;
+
+ return ec_code_chunk_split(chunk, size);
+}
+
+static ec_code_chunk_t *
+ec_code_alloc(ec_code_t *code, uint32_t size)
+{
+ ec_code_chunk_t *chunk;
+
+ LOCK(&code->lock);
+
+ chunk = ec_code_space_alloc(code, size);
+
+ UNLOCK(&code->lock);
+
+ return chunk;
+}
+
+static void
+ec_code_free(ec_code_chunk_t *chunk)
+{
+ gf_lock_t *lock;
+
+ lock = &chunk->space->code->lock;
+ LOCK(lock);
+
+ ec_code_chunk_merge(chunk);
+
+ UNLOCK(lock);
+}
+
+static int32_t
+ec_code_write(ec_code_builder_t *builder)
+{
+ ec_code_gen_t *gen;
+ ec_code_op_t *op;
+ uint32_t i;
+
+ builder->error = 0;
+ builder->size = 0;
+ builder->address = 0;
+ builder->base = -1;
+
+ gen = builder->code->gen;
+ gen->prolog(builder);
+ for (i = 0; i < builder->count; i++) {
+ op = &builder->ops[i];
+ switch (op->op) {
+ case EC_GF_OP_LOAD:
+ gen->load(builder, op->arg1.value, op->arg2.value,
+ op->arg3.value);
+ break;
+ case EC_GF_OP_STORE:
+ gen->store(builder, op->arg1.value, op->arg3.value);
+ break;
+ case EC_GF_OP_COPY:
+ gen->copy(builder, op->arg1.value, op->arg2.value);
+ break;
+ case EC_GF_OP_XOR2:
+ gen->xor2(builder, op->arg1.value, op->arg2.value);
+ break;
+ case EC_GF_OP_XOR3:
+ gen->xor3(builder, op->arg1.value, op->arg2.value,
+ op->arg3.value);
+ break;
+ case EC_GF_OP_XORM:
+ gen->xorm(builder, op->arg1.value, op->arg2.value,
+ op->arg3.value);
+ break;
+ default:
+ break;
+ }
+ }
+ gen->epilog(builder);
+
+ return builder->error;
+}
+
+static void *
+ec_code_compile(ec_code_builder_t *builder)
+{
+ ec_code_chunk_t *chunk;
+ void *func;
+ int32_t err;
+
+ err = ec_code_write(builder);
+ if (err != 0) {
+ return EC_ERR(err);
+ }
+
+ chunk = ec_code_alloc(builder->code, builder->size);
+ if (EC_IS_ERR(chunk)) {
+ return chunk;
+ }
+ builder->data = ec_code_func_from_chunk(chunk, &func);
+
+ err = ec_code_write(builder);
+ if (err != 0) {
+ ec_code_free(chunk);
+
+ return EC_ERR(err);
+ }
+
+ GF_FREE(builder);
+
+ return func;
+}
+
+ec_code_t *
+ec_code_create(ec_gf_t *gf, ec_code_gen_t *gen)
+{
+ ec_code_t *code;
+
+ code = GF_MALLOC(sizeof(ec_code_t), ec_mt_ec_code_t);
+ if (code == NULL) {
+ return EC_ERR(ENOMEM);
+ }
+ memset(code, 0, sizeof(ec_code_t));
+ INIT_LIST_HEAD(&code->spaces);
+ LOCK_INIT(&code->lock);
+
+ code->gf = gf;
+ code->gen = gen;
+
+ return code;
+}
+
+void
+ec_code_destroy(ec_code_t *code)
+{
+ if (!list_empty(&code->spaces)) {
+ }
+
+ LOCK_DESTROY(&code->lock);
+
+ GF_FREE(code);
+}
+
+static uint32_t
+ec_code_value_next(uint32_t *values, uint32_t count, uint32_t *offset)
+{
+ uint32_t i, next;
+
+ next = 0;
+ for (i = *offset + 1; i < count; i++) {
+ next = values[i];
+ if (next != 0) {
+ break;
+ }
+ }
+ *offset = i;
+
+ return next;
+}
+
+static void *
+ec_code_build_dynamic(ec_code_t *code, uint32_t width, uint32_t *values,
+ uint32_t count, gf_boolean_t linear)
+{
+ ec_code_builder_t *builder;
+ uint32_t offset, val, next;
+
+ builder = ec_code_prepare(code, count, width, linear);
+ if (EC_IS_ERR(builder)) {
+ return builder;
+ }
+
+ offset = -1;
+ next = ec_code_value_next(values, count, &offset);
+ if (next != 0) {
+ ec_code_gf_load(builder, offset);
+ do {
+ val = next;
+ next = ec_code_value_next(values, count, &offset);
+ if (next != 0) {
+ ec_code_gf_mul(builder, ec_gf_div(code->gf, val, next));
+ ec_code_gf_load_xor(builder, offset);
+ }
+ } while (next != 0);
+ ec_code_gf_mul(builder, val);
+ ec_code_gf_store(builder);
+ } else {
+ ec_code_gf_clear(builder);
+ }
+
+ return ec_code_compile(builder);
+}
+
+static void *
+ec_code_build(ec_code_t *code, uint32_t width, uint32_t *values, uint32_t count,
+ gf_boolean_t linear)
+{
+ void *func;
+
+ if (code->gen != NULL) {
+ func = ec_code_build_dynamic(code, width, values, count, linear);
+ if (!EC_IS_ERR(func)) {
+ return func;
+ }
+
+ gf_msg_debug(THIS->name, GF_LOG_DEBUG,
+ "Unable to generate dynamic code. Falling back "
+ "to precompiled code");
+
+ /* The dynamic code generation shouldn't fail in normal
+ * conditions, but if it fails at some point, it's very
+ * probable that it will fail again, so we completely disable
+ * dynamic code generation. */
+ code->gen = NULL;
+ }
+
+ ec_code_c_prepare(code->gf, values, count);
+
+ if (linear) {
+ return ec_code_c_linear;
+ }
+
+ return ec_code_c_interleaved;
+}
+
+ec_code_func_linear_t
+ec_code_build_linear(ec_code_t *code, uint32_t width, uint32_t *values,
+ uint32_t count)
+{
+ return (ec_code_func_linear_t)ec_code_build(code, width, values, count,
+ _gf_true);
+}
+
+ec_code_func_interleaved_t
+ec_code_build_interleaved(ec_code_t *code, uint32_t width, uint32_t *values,
+ uint32_t count)
+{
+ return (ec_code_func_interleaved_t)ec_code_build(code, width, values, count,
+ _gf_false);
+}
+
+void
+ec_code_release(ec_code_t *code, ec_code_func_t *func)
+{
+ if ((func->linear != ec_code_c_linear) &&
+ (func->interleaved != ec_code_c_interleaved)) {
+ ec_code_free(ec_code_chunk_from_func(func->linear));
+ }
+}
+
+void
+ec_code_error(ec_code_builder_t *builder, int32_t error)
+{
+ if (builder->error == 0) {
+ gf_msg(THIS->name, GF_LOG_ERROR, error, EC_MSG_DYN_CODEGEN_FAILED,
+ "Failed to generate dynamic code");
+ builder->error = error;
+ }
+}
+
+void
+ec_code_emit(ec_code_builder_t *builder, uint8_t *bytes, uint32_t count)
+{
+ if (builder->error != 0) {
+ return;
+ }
+
+ if (builder->data != NULL) {
+ memcpy(builder->data + builder->size, bytes, count);
+ }
+
+ builder->size += count;
+ builder->address += count;
+}
+
+static char *
+ec_code_proc_trim_left(char *text, ssize_t *length)
+{
+ ssize_t len;
+
+ for (len = *length; (len > 0) && isspace(*text); len--) {
+ text++;
+ }
+ *length = len;
+
+ return text;
+}
+
+static char *
+ec_code_proc_trim_right(char *text, ssize_t *length, char sep)
+{
+ char *last;
+ ssize_t len;
+
+ len = *length;
+
+ last = text;
+ for (len = *length; (len > 0) && (*text != sep); len--) {
+ if (!isspace(*text)) {
+ last = text + 1;
+ }
+ text++;
+ }
+ *last = 0;
+ *length = len;
+
+ return text;
+}
+
+static char *
+ec_code_proc_line_parse(ec_code_proc_t *file, ssize_t *length)
+{
+ char *text, *end;
+ ssize_t len;
+
+ len = file->size - file->pos;
+ text = ec_code_proc_trim_left(file->buffer + file->pos, &len);
+ end = ec_code_proc_trim_right(text, &len, '\n');
+ if (len == 0) {
+ if (!file->eof) {
+ if (text == file->buffer) {
+ file->size = file->pos = 0;
+ file->skip = _gf_true;
+ } else {
+ file->size = file->pos = end - text;
+ memmove(file->buffer, text, file->pos + 1);
+ }
+ len = sys_read(file->fd, file->buffer + file->pos,
+ sizeof(file->buffer) - file->pos - 1);
+ if (len > 0) {
+ file->size += len;
+ }
+ file->error = len < 0;
+ file->eof = len <= 0;
+
+ return NULL;
+ }
+ file->size = file->pos = 0;
+ } else {
+ file->pos = end - file->buffer + 1;
+ }
+
+ *length = end - text;
+
+ if (file->skip) {
+ file->skip = _gf_false;
+ text = NULL;
+ }
+
+ return text;
+}
+
+static char *
+ec_code_proc_line(ec_code_proc_t *file, ssize_t *length)
+{
+ char *text;
+
+ text = NULL;
+ while (!file->eof) {
+ text = ec_code_proc_line_parse(file, length);
+ if (text != NULL) {
+ break;
+ }
+ }
+
+ return text;
+}
+
+static char *
+ec_code_proc_split(char *text, ssize_t *length, char sep)
+{
+ text = ec_code_proc_trim_right(text, length, sep);
+ if (*length == 0) {
+ return NULL;
+ }
+ (*length)--;
+ text++;
+
+ return ec_code_proc_trim_left(text, length);
+}
+
+static uint32_t
+ec_code_cpu_check(uint32_t idx, char *list, uint32_t count)
+{
+ ec_code_gen_t *gen;
+ char **ptr;
+ char *table[count + 1];
+ uint32_t i;
+
+ for (i = 0; i < count; i++) {
+ table[i] = list;
+ list += strlen(list) + 1;
+ }
+
+ gen = ec_code_gen_table[idx];
+ while (gen != NULL) {
+ for (ptr = gen->flags; *ptr != NULL; ptr++) {
+ for (i = 0; i < count; i++) {
+ if (strcmp(*ptr, table[i]) == 0) {
+ break;
+ }
+ }
+ if (i >= count) {
+ gen = ec_code_gen_table[++idx];
+ break;
+ }
+ }
+ if (*ptr == NULL) {
+ break;
+ }
+ }
+
+ return idx;
+}
+
+ec_code_gen_t *
+ec_code_detect(xlator_t *xl, const char *def)
+{
+ ec_code_proc_t file;
+ ec_code_gen_t *gen = NULL;
+ char *line, *data, *list;
+ ssize_t length;
+ uint32_t count, base, select;
+
+ if (strcmp(def, "none") == 0) {
+ gf_msg(xl->name, GF_LOG_INFO, 0, EC_MSG_EXTENSION_NONE,
+ "Not using any cpu extensions");
+
+ return NULL;
+ }
+
+ file.fd = sys_open(PROC_CPUINFO, O_RDONLY, 0);
+ if (file.fd < 0) {
+ goto out;
+ }
+ file.size = file.pos = 0;
+ file.eof = file.error = file.skip = _gf_false;
+
+ select = 0;
+ if (strcmp(def, "auto") != 0) {
+ while (ec_code_gen_table[select] != NULL) {
+ if (strcmp(ec_code_gen_table[select]->name, def) == 0) {
+ break;
+ }
+ select++;
+ }
+ if (ec_code_gen_table[select] == NULL) {
+ gf_msg(xl->name, GF_LOG_WARNING, EINVAL, EC_MSG_EXTENSION_UNKNOWN,
+ "CPU extension '%s' is not known. Not using any cpu "
+ "extensions",
+ def);
+
+ return NULL;
+ }
+ } else {
+ def = NULL;
+ }
+
+ while ((line = ec_code_proc_line(&file, &length)) != NULL) {
+ data = ec_code_proc_split(line, &length, ':');
+ if ((data != NULL) && (strcmp(line, "flags") == 0)) {
+ list = data;
+ count = 0;
+ while ((data != NULL) && (*data != 0)) {
+ count++;
+ data = ec_code_proc_split(data, &length, ' ');
+ }
+ base = select;
+ select = ec_code_cpu_check(select, list, count);
+ if ((base != select) && (def != NULL)) {
+ gf_msg(xl->name, GF_LOG_WARNING, ENOTSUP,
+ EC_MSG_EXTENSION_UNSUPPORTED,
+ "CPU extension '%s' is not supported", def);
+ def = NULL;
+ }
+ }
+ }
+
+ if (file.error) {
+ gf_msg(xl->name, GF_LOG_WARNING, 0, EC_MSG_EXTENSION_FAILED,
+ "Unable to determine supported CPU extensions. Not using any "
+ "cpu extensions");
+
+ gen = NULL;
+ } else {
+ gen = ec_code_gen_table[select];
+ if (gen == NULL) {
+ gf_msg(xl->name, GF_LOG_INFO, 0, EC_MSG_EXTENSION_NONE,
+ "Not using any cpu extensions");
+ } else {
+ gf_msg(xl->name, GF_LOG_INFO, 0, EC_MSG_EXTENSION,
+ "Using '%s' CPU extensions", gen->name);
+ }
+ }
+
+ sys_close(file.fd);
+
+out:
+ return gen;
+}
diff --git a/xlators/cluster/ec/src/ec-code.h b/xlators/cluster/ec/src/ec-code.h
new file mode 100644
index 00000000000..75fb35d93e3
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-code.h
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_CODE_H__
+#define __EC_CODE_H__
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/list.h>
+
+#include "ec-types.h"
+#include "ec-galois.h"
+
+ec_code_gen_t *
+ec_code_detect(xlator_t *xl, const char *def);
+
+ec_code_t *
+ec_code_create(ec_gf_t *gf, ec_code_gen_t *gen);
+
+void
+ec_code_destroy(ec_code_t *code);
+
+ec_code_func_linear_t
+ec_code_build_linear(ec_code_t *code, uint32_t width, uint32_t *values,
+ uint32_t count);
+ec_code_func_interleaved_t
+ec_code_build_interleaved(ec_code_t *code, uint32_t width, uint32_t *values,
+ uint32_t count);
+void
+ec_code_release(ec_code_t *code, ec_code_func_t *func);
+
+void
+ec_code_error(ec_code_builder_t *builder, int32_t error);
+
+void
+ec_code_emit(ec_code_builder_t *builder, uint8_t *bytes, uint32_t count);
+
+#endif /* __EC_CODE_H__ */
diff --git a/xlators/cluster/ec/src/ec-combine.c b/xlators/cluster/ec/src/ec-combine.c
new file mode 100644
index 00000000000..703a30e2485
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-combine.c
@@ -0,0 +1,995 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <fnmatch.h>
+
+#include "libxlator.h"
+#include <glusterfs/byte-order.h>
+
+#include "ec-types.h"
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-combine.h"
+#include "ec-messages.h"
+#include <glusterfs/quota-common-utils.h>
+
+#define EC_QUOTA_PREFIX "trusted.glusterfs.quota."
+
+#define EC_MISSING_DATA ((data_t *)1ULL)
+
+struct _ec_dict_info;
+typedef struct _ec_dict_info ec_dict_info_t;
+
+struct _ec_dict_combine;
+typedef struct _ec_dict_combine ec_dict_combine_t;
+
+struct _ec_dict_info {
+ dict_t *dict;
+ int32_t count;
+};
+
+struct _ec_dict_combine {
+ ec_cbk_data_t *cbk;
+ int32_t which;
+};
+
+int32_t
+ec_combine_write(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
+{
+ int valid = 0;
+
+ if (!fop || !dst || !src)
+ return 0;
+
+ switch (fop->id) {
+ case GF_FOP_REMOVEXATTR:
+ case GF_FOP_FREMOVEXATTR:
+ case GF_FOP_SETXATTR:
+ case GF_FOP_FSETXATTR:
+ return 1;
+
+ case GF_FOP_SYMLINK:
+ case GF_FOP_LINK:
+ case GF_FOP_CREATE:
+ case GF_FOP_MKNOD:
+ case GF_FOP_MKDIR:
+ valid = 3;
+ break;
+ case GF_FOP_UNLINK:
+ case GF_FOP_RMDIR:
+ case GF_FOP_SETATTR:
+ case GF_FOP_FSETATTR:
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ case GF_FOP_WRITE:
+ case GF_FOP_FALLOCATE:
+ case GF_FOP_DISCARD:
+ case GF_FOP_ZEROFILL:
+ valid = 2;
+ break;
+ case GF_FOP_RENAME:
+ valid = 5;
+ break;
+ default:
+ gf_msg_callingfn(fop->xl->name, GF_LOG_WARNING, EINVAL,
+ EC_MSG_INVALID_FOP, "Invalid fop %d", fop->id);
+ return 0;
+ break;
+ }
+
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, valid)) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_IATT_MISMATCH,
+ "Mismatching iatt in "
+ "answers of '%s'",
+ gf_fop_list[fop->id]);
+ return 0;
+ }
+ return 1;
+}
+
+void
+ec_iatt_time_merge(int64_t *dst_sec, uint32_t *dst_nsec, int64_t src_sec,
+ uint32_t src_nsec)
+{
+ if ((*dst_sec < src_sec) ||
+ ((*dst_sec == src_sec) && (*dst_nsec < src_nsec))) {
+ *dst_sec = src_sec;
+ *dst_nsec = src_nsec;
+ }
+}
+
+static gf_boolean_t
+ec_iatt_is_trusted(ec_fop_data_t *fop, struct iatt *iatt)
+{
+ uint64_t ino;
+ int32_t i;
+
+ /* Only the top level fop will have fop->locks filled. */
+ while (fop->parent != NULL) {
+ fop = fop->parent;
+ }
+
+ /* Lookups are special requests always done without locks taken but they
+ * require to be able to identify differences between bricks. Special
+ * handling of these differences is already done in lookup specific code
+ * so we shouldn't ignore any difference here and consider all iatt
+ * structures as trusted. */
+ if (fop->id == GF_FOP_LOOKUP) {
+ return _gf_true;
+ }
+
+ /* Check if the iatt references an inode locked by the current fop */
+ for (i = 0; i < fop->lock_count; i++) {
+ ino = gfid_to_ino(fop->locks[i].lock->loc.inode->gfid);
+ if (iatt->ia_ino == ino) {
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
+}
+
+int32_t
+ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src,
+ int32_t count)
+{
+ int32_t i;
+ gf_boolean_t failed = _gf_false;
+
+ for (i = 0; i < count; i++) {
+ /* Check for basic fields. These fields must be equal always, even if
+ * the inode is not locked because in these cases the parent inode
+ * will be locked and differences in these fields require changes in
+ * the parent directory. */
+ if ((dst[i].ia_ino != src[i].ia_ino) ||
+ (((dst[i].ia_type == IA_IFBLK) || (dst[i].ia_type == IA_IFCHR)) &&
+ (dst[i].ia_rdev != src[i].ia_rdev)) ||
+ (gf_uuid_compare(dst[i].ia_gfid, src[i].ia_gfid) != 0)) {
+ failed = _gf_true;
+ }
+ /* Check for not so stable fields. These fields can change if the
+ * inode is not locked. */
+ if (!failed && ((dst[i].ia_uid != src[i].ia_uid) ||
+ (dst[i].ia_gid != src[i].ia_gid) ||
+ (st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type) !=
+ st_mode_from_ia(src[i].ia_prot, src[i].ia_type)))) {
+ if (ec_iatt_is_trusted(fop, dst)) {
+ /* If the iatt contains information from an inode that is
+ * locked, these differences are real problems, so we need to
+ * report them. Otherwise we ignore them and don't care which
+ * data is returned. */
+ failed = _gf_true;
+ } else {
+ gf_msg_debug(fop->xl->name, 0,
+ "Ignoring iatt differences because inode is not "
+ "locked");
+ }
+ }
+ if (failed) {
+ gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_IATT_COMBINE_FAIL,
+ "Failed to combine iatt (inode: %" PRIu64 "-%" PRIu64
+ ", "
+ "links: %u-%u, uid: %u-%u, gid: %u-%u, "
+ "rdev: %" PRIu64 "-%" PRIu64 ", size: %" PRIu64 "-%" PRIu64
+ ", "
+ "mode: %o-%o), %s",
+ dst[i].ia_ino, src[i].ia_ino, dst[i].ia_nlink,
+ src[i].ia_nlink, dst[i].ia_uid, src[i].ia_uid, dst[i].ia_gid,
+ src[i].ia_gid, dst[i].ia_rdev, src[i].ia_rdev,
+ dst[i].ia_size, src[i].ia_size,
+ st_mode_from_ia(dst[i].ia_prot, dst[i].ia_type),
+ st_mode_from_ia(src[i].ia_prot, dst[i].ia_type),
+ ec_msg_str(fop));
+
+ return 0;
+ }
+ }
+
+ while (count-- > 0) {
+ dst[count].ia_blocks += src[count].ia_blocks;
+ if (dst[count].ia_blksize < src[count].ia_blksize) {
+ dst[count].ia_blksize = src[count].ia_blksize;
+ }
+
+ ec_iatt_time_merge(&dst[count].ia_atime, &dst[count].ia_atime_nsec,
+ src[count].ia_atime, src[count].ia_atime_nsec);
+ ec_iatt_time_merge(&dst[count].ia_mtime, &dst[count].ia_mtime_nsec,
+ src[count].ia_mtime, src[count].ia_mtime_nsec);
+ ec_iatt_time_merge(&dst[count].ia_ctime, &dst[count].ia_ctime_nsec,
+ src[count].ia_ctime, src[count].ia_ctime_nsec);
+ }
+
+ return 1;
+}
+
+void
+ec_iatt_rebuild(ec_t *ec, struct iatt *iatt, int32_t count, int32_t answers)
+{
+ uint64_t blocks;
+
+ while (count-- > 0) {
+ blocks = iatt[count].ia_blocks * ec->fragments + answers - 1;
+ blocks /= answers;
+ iatt[count].ia_blocks = blocks;
+ }
+}
+
+gf_boolean_t
+ec_xattr_match(dict_t *dict, char *key, data_t *value, void *arg)
+{
+ if ((fnmatch(GF_XATTR_STIME_PATTERN, key, 0) == 0) ||
+ (strcmp(key, GET_LINK_COUNT) == 0) ||
+ (strcmp(key, GLUSTERFS_INODELK_COUNT) == 0) ||
+ (strcmp(key, GLUSTERFS_ENTRYLK_COUNT) == 0) ||
+ (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0)) {
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+gf_boolean_t
+ec_value_ignore(char *key)
+{
+ if ((strcmp(key, GF_CONTENT_KEY) == 0) ||
+ (strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) ||
+ (strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0) ||
+ (strcmp(key, GF_XATTR_LOCKINFO_KEY) == 0) ||
+ (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0) ||
+ (strcmp(key, GLUSTERFS_INODELK_COUNT) == 0) ||
+ (strcmp(key, GLUSTERFS_ENTRYLK_COUNT) == 0) ||
+ (strncmp(key, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) == 0) ||
+ (strcmp(key, DHT_IATT_IN_XDATA_KEY) == 0) ||
+ (strncmp(key, EC_QUOTA_PREFIX, SLEN(EC_QUOTA_PREFIX)) == 0) ||
+ (fnmatch(MARKER_XATTR_PREFIX ".*." XTIME, key, 0) == 0) ||
+ (fnmatch(GF_XATTR_MARKER_KEY ".*", key, 0) == 0) ||
+ (XATTR_IS_NODE_UUID(key))) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+int32_t
+ec_dict_compare(dict_t *dict1, dict_t *dict2)
+{
+ if (are_dicts_equal(dict1, dict2, ec_xattr_match, ec_value_ignore))
+ return 1;
+ return 0;
+}
+
+static uint32_t
+ec_dict_list(data_t **list, ec_cbk_data_t *cbk, int32_t which, char *key,
+ gf_boolean_t global)
+{
+ ec_t *ec = cbk->fop->xl->private;
+ ec_cbk_data_t *ans = NULL;
+ dict_t *dict = NULL;
+ data_t *data;
+ uint32_t count;
+ int32_t i;
+
+ for (i = 0; i < ec->nodes; i++) {
+ /* We initialize the list with EC_MISSING_DATA if we are
+ * returning a global list or the current subvolume belongs
+ * to the group of the accepted answer. Note that if some
+ * subvolume is known to be down before issuing the request,
+ * we won't have any answer from it, so we set here the
+ * appropriate default value. */
+ if (global || ((cbk->mask & (1ULL << i)) != 0)) {
+ list[i] = EC_MISSING_DATA;
+ } else {
+ list[i] = NULL;
+ }
+ }
+
+ count = 0;
+ list_for_each_entry(ans, &cbk->fop->answer_list, answer_list)
+ {
+ if (global || ((cbk->mask & ans->mask) != 0)) {
+ dict = (which == EC_COMBINE_XDATA) ? ans->xdata : ans->dict;
+ data = dict_get(dict, key);
+ if (data != NULL) {
+ list[ans->idx] = data;
+ count++;
+ }
+ }
+ }
+
+ return count;
+}
+
+int32_t
+ec_concat_prepare(xlator_t *xl, char **str, char **sep, char **post,
+ const char *fmt, va_list args)
+{
+ char *tmp;
+ int32_t len;
+
+ len = gf_vasprintf(str, fmt, args);
+ if (len < 0) {
+ return -ENOMEM;
+ }
+
+ tmp = strchr(*str, '{');
+ if (tmp == NULL) {
+ goto out;
+ }
+ *tmp++ = 0;
+ *sep = tmp;
+ tmp = strchr(tmp, '}');
+ if (tmp == NULL) {
+ goto out;
+ }
+ *tmp++ = 0;
+ *post = tmp;
+
+ return 0;
+
+out:
+ gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_FORMAT,
+ "Invalid concat format");
+
+ GF_FREE(*str);
+
+ return -EINVAL;
+}
+
+static int32_t
+ec_dict_data_concat(ec_cbk_data_t *cbk, int32_t which, char *key, char *new_key,
+ const char *def, gf_boolean_t global, const char *fmt, ...)
+{
+ ec_t *ec = cbk->fop->xl->private;
+ data_t *data[ec->nodes];
+ char *str = NULL, *pre = NULL, *sep, *post;
+ dict_t *dict;
+ va_list args;
+ int32_t i, num, len, deflen, prelen, postlen, seplen, tmp;
+ int32_t err;
+
+ ec_dict_list(data, cbk, which, key, global);
+
+ va_start(args, fmt);
+ err = ec_concat_prepare(cbk->fop->xl, &pre, &sep, &post, fmt, args);
+ va_end(args);
+
+ if (err != 0) {
+ return err;
+ }
+
+ prelen = strlen(pre);
+ seplen = strlen(sep);
+ postlen = strlen(post);
+
+ deflen = 0;
+ if (def != NULL) {
+ deflen = strlen(def);
+ }
+
+ len = prelen + postlen + 1;
+ num = -1;
+ for (i = 0; i < ec->nodes; i++) {
+ if (data[i] == NULL) {
+ continue;
+ }
+ if (data[i] == EC_MISSING_DATA) {
+ if (def == NULL) {
+ continue;
+ }
+ len += deflen;
+ } else {
+ len += data[i]->len - 1;
+ }
+ if (num >= 0) {
+ len += seplen;
+ }
+ num++;
+ }
+
+ err = -ENOMEM;
+
+ str = GF_MALLOC(len, gf_common_mt_char);
+ if (str == NULL) {
+ goto out;
+ }
+
+ memcpy(str, pre, prelen);
+ len = prelen;
+ for (i = 0; i < ec->nodes; i++) {
+ if (data[i] == NULL) {
+ continue;
+ }
+ if (data[i] == EC_MISSING_DATA) {
+ if (deflen == 0) {
+ continue;
+ }
+ tmp = deflen;
+ memcpy(str + len, def, tmp);
+ } else {
+ tmp = data[i]->len - 1;
+ memcpy(str + len, data[i]->data, tmp);
+ }
+ len += tmp;
+ if (i < num) {
+ memcpy(str + len, sep, seplen);
+ len += seplen;
+ }
+ }
+ memcpy(str + len, post, postlen + 1);
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ if (new_key) {
+ key = new_key;
+ }
+ err = dict_set_dynstr(dict, key, str);
+ if (err != 0) {
+ goto out;
+ }
+
+ str = NULL;
+
+out:
+ GF_FREE(str);
+ GF_FREE(pre);
+
+ return err;
+}
+
+int32_t
+ec_dict_data_merge(ec_cbk_data_t *cbk, int32_t which, char *key)
+{
+ ec_t *ec = cbk->fop->xl->private;
+ data_t *data[ec->nodes];
+ dict_t *dict, *lockinfo, *tmp = NULL;
+ char *ptr = NULL;
+ int32_t i, len;
+ int32_t err;
+
+ ec_dict_list(data, cbk, which, key, _gf_false);
+
+ lockinfo = dict_new();
+ if (lockinfo == NULL) {
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < ec->nodes; i++) {
+ if ((data[i] == NULL) || (data[i] == EC_MISSING_DATA)) {
+ continue;
+ }
+
+ tmp = dict_new();
+ if (tmp == NULL) {
+ err = -ENOMEM;
+
+ goto out;
+ }
+ err = dict_unserialize(data[i]->data, data[i]->len, &tmp);
+ if (err != 0) {
+ goto out;
+ }
+ if (dict_copy(tmp, lockinfo) == NULL) {
+ err = -ENOMEM;
+
+ goto out;
+ }
+
+ dict_unref(tmp);
+ }
+
+ tmp = NULL;
+
+ err = dict_allocate_and_serialize(lockinfo, (char **)&ptr,
+ (unsigned int *)&len);
+ if (err != 0) {
+ goto out;
+ }
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ err = dict_set_dynptr(dict, key, ptr, len);
+ if (err != 0) {
+ goto out;
+ }
+
+ ptr = NULL;
+
+out:
+ GF_FREE(ptr);
+ dict_unref(lockinfo);
+ if (tmp != NULL) {
+ dict_unref(tmp);
+ }
+
+ return err;
+}
+
+int32_t
+ec_dict_data_uuid(ec_cbk_data_t *cbk, int32_t which, char *key)
+{
+ ec_cbk_data_t *ans, *min;
+ dict_t *src, *dst;
+ data_t *data;
+
+ min = cbk;
+ for (ans = cbk->next; ans != NULL; ans = ans->next) {
+ if (ans->idx < min->idx) {
+ min = ans;
+ }
+ }
+
+ if (min != cbk) {
+ src = (which == EC_COMBINE_XDATA) ? min->xdata : min->dict;
+ dst = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+
+ data = dict_get(src, key);
+ if (data == NULL) {
+ return -ENOENT;
+ }
+ if (dict_set(dst, key, data) != 0) {
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
+int32_t
+ec_dict_data_iatt(ec_cbk_data_t *cbk, int32_t which, char *key)
+{
+ ec_t *ec = cbk->fop->xl->private;
+ data_t *data[ec->nodes];
+ dict_t *dict;
+ struct iatt *stbuf, *tmp;
+ int32_t i, ret;
+
+ ec_dict_list(data, cbk, which, key, _gf_false);
+
+ stbuf = NULL;
+ for (i = 0; i < ec->nodes; i++) {
+ if ((data[i] == NULL) || (data[i] == EC_MISSING_DATA)) {
+ continue;
+ }
+ tmp = data_to_iatt(data[i], key);
+ if (tmp == NULL) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if (stbuf == NULL) {
+ stbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char);
+ if (stbuf == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ *stbuf = *tmp;
+ } else {
+ if (!ec_iatt_combine(cbk->fop, stbuf, tmp, 1)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+ }
+
+ if ((stbuf != NULL) && (stbuf->ia_type == IA_IFREG)) {
+ ec_iatt_rebuild(ec, stbuf, 1, cbk->count);
+ /* TODO: not sure if an iatt could come in xdata from a fop that takes
+ * no locks. */
+ if (!ec_get_inode_size(cbk->fop, cbk->fop->locks[0].lock->loc.inode,
+ &stbuf->ia_size)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ }
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ ret = dict_set_iatt(dict, key, stbuf, false);
+ if (ret >= 0) {
+ stbuf = NULL;
+ }
+
+out:
+ GF_FREE(stbuf);
+
+ return ret;
+}
+
+int32_t
+ec_dict_data_max32(ec_cbk_data_t *cbk, int32_t which, char *key)
+{
+ ec_t *ec = cbk->fop->xl->private;
+ data_t *data[ec->nodes];
+ dict_t *dict;
+ int32_t i;
+ uint32_t max, tmp;
+
+ ec_dict_list(data, cbk, which, key, _gf_false);
+
+ max = 0;
+ for (i = 0; i < ec->nodes; i++) {
+ if ((data[i] == NULL) || (data[i] == EC_MISSING_DATA)) {
+ continue;
+ }
+
+ tmp = data_to_uint32(data[i]);
+ if (max < tmp) {
+ max = tmp;
+ }
+ }
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ return dict_set_uint32(dict, key, max);
+}
+
+int32_t
+ec_dict_data_max64(ec_cbk_data_t *cbk, int32_t which, char *key)
+{
+ ec_t *ec = cbk->fop->xl->private;
+ data_t *data[ec->nodes];
+ dict_t *dict;
+ int32_t i;
+ uint64_t max, tmp;
+
+ ec_dict_list(data, cbk, which, key, _gf_false);
+
+ max = 0;
+ for (i = 0; i < ec->nodes; i++) {
+ if ((data[i] == NULL) || (data[i] == EC_MISSING_DATA)) {
+ continue;
+ }
+
+ tmp = data_to_uint64(data[i]);
+ if (max < tmp) {
+ max = tmp;
+ }
+ }
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ return dict_set_uint64(dict, key, max);
+}
+
+int32_t
+ec_dict_data_quota(ec_cbk_data_t *cbk, int32_t which, char *key)
+{
+ ec_t *ec = cbk->fop->xl->private;
+ data_t *data[ec->nodes];
+ dict_t *dict = NULL;
+ int32_t i = 0;
+ quota_meta_t size = {
+ 0,
+ };
+ quota_meta_t max_size = {
+ 0,
+ };
+
+ if (ec_dict_list(data, cbk, which, key, _gf_false) == 0) {
+ return 0;
+ }
+
+ /* Quota size xattr is managed outside of the control of the ec xlator.
+ * This means that it might not be updated at the same time on all
+ * bricks and we can receive slightly different values. If that's the
+ * case, we take the maximum of all received values.
+ */
+ for (i = 0; i < ec->nodes; i++) {
+ if ((data[i] == NULL) || (data[i] == EC_MISSING_DATA) ||
+ (quota_data_to_meta(data[i], &size) < 0)) {
+ continue;
+ }
+
+ if (size.size > max_size.size)
+ max_size.size = size.size;
+ if (size.file_count > max_size.file_count)
+ max_size.file_count = size.file_count;
+ if (size.dir_count > max_size.dir_count)
+ max_size.dir_count = size.dir_count;
+ }
+
+ max_size.size *= ec->fragments;
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ return quota_dict_set_meta(dict, key, &max_size, IA_IFDIR);
+}
+
+int32_t
+ec_dict_data_stime(ec_cbk_data_t *cbk, int32_t which, char *key)
+{
+ ec_t *ec = cbk->fop->xl->private;
+ data_t *data[ec->nodes];
+ dict_t *dict;
+ int32_t i, err;
+
+ ec_dict_list(data, cbk, which, key, _gf_false);
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ for (i = 0; i < ec->nodes; i++) {
+ if ((data[i] == NULL) || (data[i] == EC_MISSING_DATA)) {
+ continue;
+ }
+ err = gf_get_max_stime(cbk->fop->xl, dict, key, data[i]);
+ if (err != 0) {
+ gf_msg(cbk->fop->xl->name, GF_LOG_ERROR, -err,
+ EC_MSG_STIME_COMBINE_FAIL, "STIME combination failed");
+
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int32_t
+ec_dict_data_combine(dict_t *dict, char *key, data_t *value, void *arg)
+{
+ ec_dict_combine_t *data = arg;
+
+ if ((strcmp(key, GF_XATTR_PATHINFO_KEY) == 0) ||
+ (strcmp(key, GF_XATTR_USER_PATHINFO_KEY) == 0)) {
+ return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL,
+ _gf_false, _gf_false, "(<EC:%s> { })",
+ data->cbk->fop->xl->name);
+ }
+
+ if (strncmp(key, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) == 0) {
+ return ec_dict_data_concat(data->cbk, data->which, key, NULL, NULL,
+ _gf_false, "{\n}");
+ }
+
+ if (strncmp(key, GF_XATTR_LOCKINFO_KEY, SLEN(GF_XATTR_LOCKINFO_KEY)) == 0) {
+ return ec_dict_data_merge(data->cbk, data->which, key);
+ }
+
+ if (strcmp(key, GET_LINK_COUNT) == 0) {
+ return ec_dict_data_max32(data->cbk, data->which, key);
+ }
+
+ if (strcmp(key, GLUSTERFS_OPEN_FD_COUNT) == 0) {
+ return ec_dict_data_max32(data->cbk, data->which, key);
+ }
+ if ((strcmp(key, GLUSTERFS_INODELK_COUNT) == 0) ||
+ (strcmp(key, GLUSTERFS_ENTRYLK_COUNT) == 0)) {
+ return ec_dict_data_max32(data->cbk, data->which, key);
+ }
+
+ if (strcmp(key, QUOTA_SIZE_KEY) == 0) {
+ return ec_dict_data_quota(data->cbk, data->which, key);
+ }
+ /* Ignore all other quota attributes */
+ if (strncmp(key, EC_QUOTA_PREFIX, SLEN(EC_QUOTA_PREFIX)) == 0) {
+ return 0;
+ }
+
+ if (XATTR_IS_NODE_UUID(key)) {
+ if (data->cbk->fop->int32) {
+ /* List of node uuid is requested */
+ return ec_dict_data_concat(data->cbk, data->which, key,
+ GF_XATTR_LIST_NODE_UUIDS_KEY, UUID0_STR,
+ _gf_true, "{ }");
+ } else {
+ return ec_dict_data_uuid(data->cbk, data->which, key);
+ }
+ }
+
+ if (fnmatch(GF_XATTR_STIME_PATTERN, key, FNM_NOESCAPE) == 0) {
+ return ec_dict_data_stime(data->cbk, data->which, key);
+ }
+
+ if (fnmatch(MARKER_XATTR_PREFIX ".*." XTIME, key, FNM_NOESCAPE) == 0) {
+ return ec_dict_data_max64(data->cbk, data->which, key);
+ }
+
+ if (strcmp(key, GF_PRESTAT) == 0 || strcmp(key, GF_POSTSTAT) == 0) {
+ return ec_dict_data_iatt(data->cbk, data->which, key);
+ }
+
+ return 0;
+}
+
+int32_t
+ec_dict_combine(ec_cbk_data_t *cbk, int32_t which)
+{
+ dict_t *dict = NULL;
+ ec_dict_combine_t data;
+ int32_t err = 0;
+
+ data.cbk = cbk;
+ data.which = which;
+
+ dict = (which == EC_COMBINE_XDATA) ? cbk->xdata : cbk->dict;
+ if (dict != NULL) {
+ err = dict_foreach(dict, ec_dict_data_combine, &data);
+ if (err != 0) {
+ gf_msg(cbk->fop->xl->name, GF_LOG_ERROR, -err,
+ EC_MSG_DICT_COMBINE_FAIL, "Dictionary combination failed");
+
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int32_t
+ec_vector_compare(struct iovec *dst_vector, int32_t dst_count,
+ struct iovec *src_vector, int32_t src_count)
+{
+ int32_t dst_size = 0, src_size = 0;
+
+ if (dst_count > 0) {
+ dst_size = iov_length(dst_vector, dst_count);
+ }
+ if (src_count > 0) {
+ src_size = iov_length(src_vector, src_count);
+ }
+
+ return (dst_size == src_size);
+}
+
+int32_t
+ec_flock_compare(struct gf_flock *dst, struct gf_flock *src)
+{
+ if ((dst->l_type != src->l_type) || (dst->l_whence != src->l_whence) ||
+ (dst->l_start != src->l_start) || (dst->l_len != src->l_len) ||
+ (dst->l_pid != src->l_pid) ||
+ !is_same_lkowner(&dst->l_owner, &src->l_owner)) {
+ return 0;
+ }
+
+ return 1;
+}
+
+void
+ec_statvfs_combine(struct statvfs *dst, struct statvfs *src)
+{
+ if (dst->f_bsize < src->f_bsize) {
+ dst->f_bsize = src->f_bsize;
+ }
+
+ if (dst->f_frsize < src->f_frsize) {
+ dst->f_blocks *= dst->f_frsize;
+ dst->f_blocks /= src->f_frsize;
+
+ dst->f_bfree *= dst->f_frsize;
+ dst->f_bfree /= src->f_frsize;
+
+ dst->f_bavail *= dst->f_frsize;
+ dst->f_bavail /= src->f_frsize;
+
+ dst->f_frsize = src->f_frsize;
+ } else if (dst->f_frsize > src->f_frsize) {
+ src->f_blocks *= src->f_frsize;
+ src->f_blocks /= dst->f_frsize;
+
+ src->f_bfree *= src->f_frsize;
+ src->f_bfree /= dst->f_frsize;
+
+ src->f_bavail *= src->f_frsize;
+ src->f_bavail /= dst->f_frsize;
+ }
+ if (dst->f_blocks > src->f_blocks) {
+ dst->f_blocks = src->f_blocks;
+ }
+ if (dst->f_bfree > src->f_bfree) {
+ dst->f_bfree = src->f_bfree;
+ }
+ if (dst->f_bavail > src->f_bavail) {
+ dst->f_bavail = src->f_bavail;
+ }
+
+ if (dst->f_files < src->f_files) {
+ dst->f_files = src->f_files;
+ }
+ if (dst->f_ffree > src->f_ffree) {
+ dst->f_ffree = src->f_ffree;
+ }
+ if (dst->f_favail > src->f_favail) {
+ dst->f_favail = src->f_favail;
+ }
+ if (dst->f_namemax > src->f_namemax) {
+ dst->f_namemax = src->f_namemax;
+ }
+
+ if (dst->f_flag != src->f_flag) {
+ gf_msg_debug(THIS->name, 0,
+ "Mismatching file system flags "
+ "(%lX, %lX)",
+ dst->f_flag, src->f_flag);
+ }
+ dst->f_flag &= src->f_flag;
+}
+
+int32_t
+ec_combine_check(ec_cbk_data_t *dst, ec_cbk_data_t *src, ec_combine_f combine)
+{
+ ec_fop_data_t *fop = dst->fop;
+
+ if (dst->op_ret != src->op_ret) {
+ gf_msg_debug(fop->xl->name, 0,
+ "Mismatching return code in "
+ "answers of '%s': %d <-> %d",
+ ec_fop_name(fop->id), dst->op_ret, src->op_ret);
+
+ return 0;
+ }
+ if (dst->op_ret < 0) {
+ if (dst->op_errno != src->op_errno) {
+ gf_msg_debug(fop->xl->name, 0,
+ "Mismatching errno code in "
+ "answers of '%s': %d <-> %d",
+ ec_fop_name(fop->id), dst->op_errno, src->op_errno);
+
+ return 0;
+ }
+ }
+
+ if (!ec_dict_compare(dst->xdata, src->xdata)) {
+ gf_msg(fop->xl->name, GF_LOG_DEBUG, 0, EC_MSG_XDATA_MISMATCH,
+ "Mismatching xdata in answers "
+ "of '%s'",
+ ec_fop_name(fop->id));
+
+ return 0;
+ }
+
+ if ((dst->op_ret >= 0) && (combine != NULL)) {
+ return combine(fop, dst, src);
+ }
+
+ return 1;
+}
+
+void
+ec_combine(ec_cbk_data_t *newcbk, ec_combine_f combine)
+{
+ ec_fop_data_t *fop = newcbk->fop;
+ ec_cbk_data_t *cbk = NULL, *tmp = NULL;
+ struct list_head *item = NULL;
+ int32_t needed = 0;
+ char str[32];
+
+ LOCK(&fop->lock);
+
+ fop->received |= newcbk->mask;
+
+ item = fop->cbk_list.prev;
+ list_for_each_entry(cbk, &fop->cbk_list, list)
+ {
+ if (ec_combine_check(newcbk, cbk, combine)) {
+ newcbk->count += cbk->count;
+ newcbk->mask |= cbk->mask;
+
+ item = cbk->list.prev;
+ while (item != &fop->cbk_list) {
+ tmp = list_entry(item, ec_cbk_data_t, list);
+ if (tmp->count >= newcbk->count) {
+ break;
+ }
+ item = item->prev;
+ }
+ list_del(&cbk->list);
+
+ newcbk->next = cbk;
+
+ break;
+ }
+ }
+ list_add(&newcbk->list, item);
+
+ ec_trace("ANSWER", fop, "combine=%s[%d]",
+ ec_bin(str, sizeof(str), newcbk->mask, 0), newcbk->count);
+
+ cbk = list_entry(fop->cbk_list.next, ec_cbk_data_t, list);
+ if ((fop->mask ^ fop->remaining) == fop->received) {
+ needed = fop->minimum - cbk->count;
+ }
+
+ UNLOCK(&fop->lock);
+
+ if (needed > 0) {
+ ec_dispatch_next(fop, newcbk->idx);
+ }
+}
diff --git a/xlators/cluster/ec/src/ec-combine.h b/xlators/cluster/ec/src/ec-combine.h
new file mode 100644
index 00000000000..1010cc3be26
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-combine.h
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_COMBINE_H__
+#define __EC_COMBINE_H__
+
+#define EC_COMBINE_DICT 0
+#define EC_COMBINE_XDATA 1
+
+typedef int32_t (*ec_combine_f)(ec_fop_data_t *fop, ec_cbk_data_t *dst,
+ ec_cbk_data_t *src);
+
+void
+ec_iatt_rebuild(ec_t *ec, struct iatt *iatt, int32_t count, int32_t answers);
+
+int32_t
+ec_iatt_combine(ec_fop_data_t *fop, struct iatt *dst, struct iatt *src,
+ int32_t count);
+int32_t
+ec_dict_compare(dict_t *dict1, dict_t *dict2);
+int32_t
+ec_vector_compare(struct iovec *dst_vector, int32_t dst_count,
+ struct iovec *src_vector, int32_t src_count);
+int32_t
+ec_flock_compare(struct gf_flock *dst, struct gf_flock *src);
+void
+ec_statvfs_combine(struct statvfs *dst, struct statvfs *src);
+
+int32_t
+ec_dict_combine(ec_cbk_data_t *cbk, int32_t which);
+
+void
+ec_combine(ec_cbk_data_t *cbk, ec_combine_f combine);
+
+int32_t
+ec_combine_write(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src);
+#endif /* __EC_COMBINE_H__ */
diff --git a/xlators/cluster/ec/src/ec-common.c b/xlators/cluster/ec/src/ec-common.c
new file mode 100644
index 00000000000..b955efd8c2d
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-common.c
@@ -0,0 +1,3042 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/byte-order.h>
+#include <glusterfs/hashfn.h>
+
+#include "ec-mem-types.h"
+#include "ec-types.h"
+#include "ec-helpers.h"
+#include "ec-combine.h"
+#include "ec-common.h"
+#include "ec-fops.h"
+#include "ec-method.h"
+#include "ec.h"
+#include "ec-messages.h"
+
+#define EC_INVALID_INDEX UINT32_MAX
+
+void
+ec_update_fd_status(fd_t *fd, xlator_t *xl, int idx, int32_t ret_status)
+{
+ ec_fd_t *fd_ctx;
+
+ if (fd == NULL)
+ return;
+
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __ec_fd_get(fd, xl);
+ if (fd_ctx) {
+ if (ret_status >= 0)
+ fd_ctx->fd_status[idx] = EC_FD_OPENED;
+ else
+ fd_ctx->fd_status[idx] = EC_FD_NOT_OPENED;
+ }
+ }
+ UNLOCK(&fd->lock);
+}
+
+static uintptr_t
+ec_fd_ctx_need_open(fd_t *fd, xlator_t *this, uintptr_t mask)
+{
+ int i = 0;
+ int count = 0;
+ ec_t *ec = NULL;
+ ec_fd_t *fd_ctx = NULL;
+ uintptr_t need_open = 0;
+
+ ec = this->private;
+
+ fd_ctx = ec_fd_get(fd, this);
+ if (!fd_ctx)
+ return count;
+
+ LOCK(&fd->lock);
+ {
+ for (i = 0; i < ec->nodes; i++) {
+ if ((fd_ctx->fd_status[i] == EC_FD_NOT_OPENED) &&
+ ((ec->xl_up & (1 << i)) != 0) && ((mask & (1 << i)) != 0)) {
+ fd_ctx->fd_status[i] = EC_FD_OPENING;
+ need_open |= (1 << i);
+ count++;
+ }
+ }
+ }
+ UNLOCK(&fd->lock);
+
+ /* If fd needs to open on minimum number of nodes
+ * then ignore fixing the fd as it has been
+ * requested from heal operation.
+ */
+ if (count >= ec->fragments) {
+ need_open = 0;
+ }
+
+ return need_open;
+}
+
+static gf_boolean_t
+ec_is_fd_fixable(fd_t *fd)
+{
+ if (!fd || !fd->inode)
+ return _gf_false;
+ else if (fd_is_anonymous(fd))
+ return _gf_false;
+ else if (gf_uuid_is_null(fd->inode->gfid))
+ return _gf_false;
+
+ return _gf_true;
+}
+
+static void
+ec_fix_open(ec_fop_data_t *fop, uintptr_t mask)
+{
+ uintptr_t need_open = 0;
+ int ret = 0;
+ int32_t flags = 0;
+ loc_t loc = {
+ 0,
+ };
+
+ if (!ec_is_fd_fixable(fop->fd))
+ goto out;
+
+ /* Evaluate how many remote fd's to be opened */
+ need_open = ec_fd_ctx_need_open(fop->fd, fop->xl, mask);
+ if (need_open == 0) {
+ goto out;
+ }
+
+ loc.inode = inode_ref(fop->fd->inode);
+ gf_uuid_copy(loc.gfid, fop->fd->inode->gfid);
+ ret = loc_path(&loc, NULL);
+ if (ret < 0) {
+ goto out;
+ }
+
+ flags = fop->fd->flags & (~(O_TRUNC | O_APPEND | O_CREAT | O_EXCL));
+ if (IA_IFDIR == fop->fd->inode->ia_type) {
+ ec_opendir(fop->frame, fop->xl, need_open,
+ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL,
+ &fop->loc[0], fop->fd, NULL);
+ } else {
+ ec_open(fop->frame, fop->xl, need_open,
+ EC_MINIMUM_ONE | EC_FOP_NO_PROPAGATE_ERROR, NULL, NULL, &loc,
+ flags, fop->fd, NULL);
+ }
+
+out:
+ loc_wipe(&loc);
+}
+
+static off_t
+ec_range_end_get(off_t fl_start, uint64_t fl_size)
+{
+ if (fl_size > 0) {
+ if (fl_size >= EC_RANGE_FULL) {
+ /* Infinity */
+ fl_start = LLONG_MAX;
+ } else {
+ fl_start += fl_size - 1;
+ if (fl_start < 0) {
+ /* Overflow */
+ fl_start = LLONG_MAX;
+ }
+ }
+ }
+
+ return fl_start;
+}
+
+static gf_boolean_t
+ec_is_range_conflict(ec_lock_link_t *l1, ec_lock_link_t *l2)
+{
+ return ((l1->fl_end >= l2->fl_start) && (l2->fl_end >= l1->fl_start));
+}
+
+static gf_boolean_t
+ec_lock_conflict(ec_lock_link_t *l1, ec_lock_link_t *l2)
+{
+ ec_t *ec = l1->fop->xl->private;
+
+ /* Fops like access/stat won't have to worry what the other fops are
+ * modifying as the fop is wound only to one brick. So it can be
+ * executed in parallel*/
+ if (l1->fop->minimum == EC_MINIMUM_ONE ||
+ l2->fop->minimum == EC_MINIMUM_ONE)
+ return _gf_false;
+
+ if ((l1->fop->flags & EC_FLAG_LOCK_SHARED) &&
+ (l2->fop->flags & EC_FLAG_LOCK_SHARED))
+ return _gf_false;
+
+ if (!ec->parallel_writes) {
+ return _gf_true;
+ }
+
+ return ec_is_range_conflict(l1, l2);
+}
+
+uint32_t
+ec_select_first_by_read_policy(ec_t *ec, ec_fop_data_t *fop)
+{
+ if (ec->read_policy == EC_ROUND_ROBIN) {
+ return ec->idx;
+ } else if (ec->read_policy == EC_GFID_HASH) {
+ if (fop->use_fd) {
+ return SuperFastHash((char *)fop->fd->inode->gfid,
+ sizeof(fop->fd->inode->gfid)) %
+ ec->nodes;
+ } else {
+ if (gf_uuid_is_null(fop->loc[0].gfid))
+ loc_gfid(&fop->loc[0], fop->loc[0].gfid);
+ return SuperFastHash((char *)fop->loc[0].gfid,
+ sizeof(fop->loc[0].gfid)) %
+ ec->nodes;
+ }
+ }
+ return 0;
+}
+
+static gf_boolean_t
+ec_child_valid(ec_t *ec, ec_fop_data_t *fop, uint32_t idx)
+{
+ return (idx < ec->nodes) && (((fop->remaining >> idx) & 1) == 1);
+}
+
+static uint32_t
+ec_child_next(ec_t *ec, ec_fop_data_t *fop, uint32_t idx)
+{
+ while (!ec_child_valid(ec, fop, idx)) {
+ if (++idx >= ec->nodes) {
+ idx = 0;
+ }
+ if (idx == fop->first) {
+ return EC_INVALID_INDEX;
+ }
+ }
+
+ return idx;
+}
+
+int32_t
+ec_heal_report(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uintptr_t mask, uintptr_t good,
+ uintptr_t bad, uint32_t pending, dict_t *xdata)
+{
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_DEBUG, op_errno, EC_MSG_HEAL_FAIL,
+ "Heal failed");
+ } else {
+ if ((mask & ~good) != 0) {
+ gf_msg(this->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_SUCCESS,
+ "Heal succeeded on %d/%d "
+ "subvolumes",
+ gf_bits_count(mask & ~(good | bad)),
+ gf_bits_count(mask & ~good));
+ }
+ }
+
+ return 0;
+}
+
+static uintptr_t
+ec_fop_needs_name_heal(ec_fop_data_t *fop)
+{
+ ec_t *ec = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ ec_cbk_data_t *enoent_cbk = NULL;
+
+ ec = fop->xl->private;
+ if (fop->id != GF_FOP_LOOKUP)
+ return 0;
+
+ if (!fop->loc[0].name || strlen(fop->loc[0].name) == 0)
+ return 0;
+
+ list_for_each_entry(cbk, &fop->cbk_list, list)
+ {
+ if (cbk->op_ret < 0 && cbk->op_errno == ENOENT) {
+ enoent_cbk = cbk;
+ break;
+ }
+ }
+
+ if (!enoent_cbk)
+ return 0;
+
+ return ec->xl_up & ~enoent_cbk->mask;
+}
+
+int32_t
+ec_fop_needs_heal(ec_fop_data_t *fop)
+{
+ ec_t *ec = fop->xl->private;
+
+ if (fop->lock_count == 0) {
+ /*
+ * if fop->lock_count is zero that means it saw version mismatch
+ * without any locks so it can't be trusted. If we launch a heal
+ * based on this it will lead to INODELKs which will affect I/O
+ * performance. Considering self-heal-daemon and operations on
+ * the inode from client which take locks can still trigger the
+ * heal we can choose to not attempt a heal when fop->lock_count
+ * is zero.
+ */
+ return 0;
+ }
+ return (ec->xl_up & ~(fop->remaining | fop->good)) != 0;
+}
+
+void
+ec_check_status(ec_fop_data_t *fop)
+{
+ ec_t *ec = fop->xl->private;
+ int32_t partial = 0;
+ char str1[32], str2[32], str3[32], str4[32], str5[32];
+
+ if (!ec_fop_needs_name_heal(fop) && !ec_fop_needs_heal(fop)) {
+ return;
+ }
+
+ if (fop->answer && fop->answer->op_ret >= 0) {
+ if ((fop->id == GF_FOP_LOOKUP) || (fop->id == GF_FOP_STAT) ||
+ (fop->id == GF_FOP_FSTAT)) {
+ partial = fop->answer->iatt[0].ia_type == IA_IFDIR;
+ } else if (fop->id == GF_FOP_OPENDIR) {
+ partial = 1;
+ }
+ }
+
+ gf_msg(
+ fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_FAIL_ON_SUBVOLS,
+ "Operation failed on %d of %d subvolumes.(up=%s, mask=%s, "
+ "remaining=%s, good=%s, bad=%s,"
+ "(Least significant bit represents first client/brick of subvol), %s)",
+ gf_bits_count(ec->xl_up & ~(fop->remaining | fop->good)), ec->nodes,
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->remaining, ec->nodes),
+ ec_bin(str4, sizeof(str4), fop->good, ec->nodes),
+ ec_bin(str5, sizeof(str5), ec->xl_up & ~(fop->remaining | fop->good),
+ ec->nodes),
+ ec_msg_str(fop));
+ if (fop->use_fd) {
+ if (fop->fd != NULL) {
+ ec_fheal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
+ fop->fd, partial, NULL);
+ }
+ } else {
+ ec_heal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
+ &fop->loc[0], partial, NULL);
+
+ if (fop->loc[1].inode != NULL) {
+ ec_heal(NULL, fop->xl, -1, EC_MINIMUM_ONE, ec_heal_report, NULL,
+ &fop->loc[1], partial, NULL);
+ }
+ }
+}
+
+void
+ec_update_good(ec_fop_data_t *fop, uintptr_t good)
+{
+ fop->good = good;
+
+ /* Fops that are executed only on one brick do not have enough information
+ * to decide if healing is needed or not. */
+ if ((fop->expected != 1) && (fop->parent == NULL)) {
+ ec_check_status(fop);
+ }
+}
+
+void
+ec_lock_update_good(ec_lock_t *lock, ec_fop_data_t *fop)
+{
+ /* Fops that are executed only on one brick do not have enough information
+ * to update the global mask of good bricks. */
+ if (fop->expected == 1) {
+ return;
+ }
+
+ /* When updating the good mask of the lock, we only take into consideration
+ * those bits corresponding to the bricks where the fop has been executed.
+ * Bad bricks are removed from good_mask, but once marked as bad it's never
+ * set to good until the lock is released and reacquired */
+
+ lock->good_mask &= fop->good | fop->remaining;
+}
+
+void
+__ec_fop_set_error(ec_fop_data_t *fop, int32_t error)
+{
+ if ((error != 0) && (fop->error == 0)) {
+ fop->error = error;
+ }
+}
+
+void
+ec_fop_set_error(ec_fop_data_t *fop, int32_t error)
+{
+ LOCK(&fop->lock);
+
+ __ec_fop_set_error(fop, error);
+
+ UNLOCK(&fop->lock);
+}
+
+gf_boolean_t
+ec_cbk_set_error(ec_cbk_data_t *cbk, int32_t error, gf_boolean_t ro)
+{
+ if ((error != 0) && (cbk->op_ret >= 0)) {
+ /* If cbk->op_errno was 0, it means that the fop succeeded and this
+ * error has happened while processing the answer. If the operation was
+ * read-only, there's no problem (i.e. we simply return the generated
+ * error code). However if it caused a modification, we must return EIO
+ * to indicate that the operation has been partially executed. */
+ cbk->op_errno = ro ? error : EIO;
+ cbk->op_ret = -1;
+
+ ec_fop_set_error(cbk->fop, cbk->op_errno);
+ }
+
+ return (cbk->op_ret < 0);
+}
+
+ec_cbk_data_t *
+ec_fop_prepare_answer(ec_fop_data_t *fop, gf_boolean_t ro)
+{
+ ec_cbk_data_t *cbk;
+ int32_t err;
+
+ cbk = fop->answer;
+ if (cbk == NULL) {
+ ec_fop_set_error(fop, EIO);
+
+ return NULL;
+ }
+
+ if (cbk->op_ret < 0) {
+ ec_fop_set_error(fop, cbk->op_errno);
+ }
+
+ err = ec_dict_combine(cbk, EC_COMBINE_XDATA);
+ if (ec_cbk_set_error(cbk, -err, ro)) {
+ return NULL;
+ }
+
+ return cbk;
+}
+
+void
+ec_sleep(ec_fop_data_t *fop)
+{
+ LOCK(&fop->lock);
+
+ GF_ASSERT(fop->refs > 0);
+ fop->refs++;
+ fop->jobs++;
+
+ UNLOCK(&fop->lock);
+}
+
+int32_t
+ec_check_complete(ec_fop_data_t *fop, ec_resume_f resume)
+{
+ int32_t error = -1;
+
+ LOCK(&fop->lock);
+
+ GF_ASSERT(fop->resume == NULL);
+
+ if (--fop->jobs != 0) {
+ ec_trace("WAIT", fop, "resume=%p", resume);
+
+ fop->resume = resume;
+ } else {
+ error = fop->error;
+ fop->error = 0;
+ }
+
+ UNLOCK(&fop->lock);
+
+ return error;
+}
+
+void
+ec_resume(ec_fop_data_t *fop, int32_t error)
+{
+ ec_resume_f resume = NULL;
+
+ LOCK(&fop->lock);
+
+ __ec_fop_set_error(fop, error);
+
+ if (--fop->jobs == 0) {
+ resume = fop->resume;
+ fop->resume = NULL;
+ if (resume != NULL) {
+ ec_trace("RESUME", fop, "error=%d", error);
+
+ if (fop->error != 0) {
+ error = fop->error;
+ }
+ fop->error = 0;
+ }
+ }
+
+ UNLOCK(&fop->lock);
+
+ if (resume != NULL) {
+ resume(fop, error);
+ }
+
+ ec_fop_data_release(fop);
+}
+
+void
+ec_resume_parent(ec_fop_data_t *fop)
+{
+ ec_fop_data_t *parent;
+ int32_t error = 0;
+
+ parent = fop->parent;
+ if (parent != NULL) {
+ if ((fop->fop_flags & EC_FOP_NO_PROPAGATE_ERROR) == 0) {
+ error = fop->error;
+ }
+ ec_trace("RESUME_PARENT", fop, "error=%u", error);
+ fop->parent = NULL;
+ ec_resume(parent, error);
+ }
+}
+
+gf_boolean_t
+ec_is_recoverable_error(int32_t op_errno)
+{
+ switch (op_errno) {
+ case ENOTCONN:
+ case ESTALE:
+ case ENOENT:
+ case EBADFD: /*Opened fd but brick is disconnected*/
+ case EIO: /*Backend-fs crash like XFS/ext4 etc*/
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+void
+ec_complete(ec_fop_data_t *fop)
+{
+ ec_cbk_data_t *cbk = NULL;
+ int32_t resume = 0, update = 0;
+ int healing_count = 0;
+
+ LOCK(&fop->lock);
+
+ ec_trace("COMPLETE", fop, "");
+
+ if (--fop->winds == 0) {
+ if (fop->answer == NULL) {
+ if (!list_empty(&fop->cbk_list)) {
+ cbk = list_entry(fop->cbk_list.next, ec_cbk_data_t, list);
+ healing_count = gf_bits_count(cbk->mask & fop->healing);
+ /* fop shouldn't be treated as success if it is not
+ * successful on at least fop->minimum good copies*/
+ if ((cbk->count - healing_count) >= fop->minimum) {
+ fop->answer = cbk;
+
+ update = 1;
+ }
+ }
+
+ resume = 1;
+ }
+ }
+
+ UNLOCK(&fop->lock);
+
+ /* ec_update_good() locks inode->lock. This may cause deadlocks with
+ fop->lock when used in another order. Since ec_update_good() will not
+ be called more than once for each fop, it can be called from outside
+ the fop->lock locked region. */
+ if (update) {
+ ec_update_good(fop, cbk->mask);
+ }
+
+ if (resume) {
+ ec_resume(fop, 0);
+ }
+
+ ec_fop_data_release(fop);
+}
+
+/* There could be already granted locks sitting on the bricks, unlock for which
+ * must be wound at all costs*/
+static gf_boolean_t
+ec_must_wind(ec_fop_data_t *fop)
+{
+ if ((fop->id == GF_FOP_INODELK) || (fop->id == GF_FOP_FINODELK) ||
+ (fop->id == GF_FOP_LK)) {
+ if (fop->flock.l_type == F_UNLCK)
+ return _gf_true;
+ } else if ((fop->id == GF_FOP_ENTRYLK) || (fop->id == GF_FOP_FENTRYLK)) {
+ if (fop->entrylk_cmd == ENTRYLK_UNLOCK)
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+static gf_boolean_t
+ec_internal_op(ec_fop_data_t *fop)
+{
+ if (ec_must_wind(fop))
+ return _gf_true;
+ if (fop->id == GF_FOP_XATTROP)
+ return _gf_true;
+ if (fop->id == GF_FOP_FXATTROP)
+ return _gf_true;
+ if (fop->id == GF_FOP_OPEN)
+ return _gf_true;
+ return _gf_false;
+}
+
+char *
+ec_msg_str(ec_fop_data_t *fop)
+{
+ loc_t *loc1 = NULL;
+ loc_t *loc2 = NULL;
+ char gfid1[64] = {0};
+ char gfid2[64] = {0};
+ ec_fop_data_t *parent = fop->parent;
+
+ if (fop->errstr)
+ return fop->errstr;
+ if (!fop->use_fd) {
+ loc1 = &fop->loc[0];
+ loc2 = &fop->loc[1];
+
+ if (fop->id == GF_FOP_RENAME) {
+ gf_asprintf(&fop->errstr,
+ "FOP : '%s' failed on '%s' and '%s' with gfids "
+ "%s and %s respectively. Parent FOP: %s",
+ ec_fop_name(fop->id), loc1->path, loc2->path,
+ uuid_utoa_r(loc1->gfid, gfid1),
+ uuid_utoa_r(loc2->gfid, gfid2),
+ parent ? ec_fop_name(parent->id) : "No Parent");
+ } else {
+ gf_asprintf(
+ &fop->errstr,
+ "FOP : '%s' failed on '%s' with gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), loc1->path,
+ uuid_utoa_r(loc1->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
+ }
+ } else {
+ gf_asprintf(
+ &fop->errstr, "FOP : '%s' failed on gfid %s. Parent FOP: %s",
+ ec_fop_name(fop->id), uuid_utoa_r(fop->fd->inode->gfid, gfid1),
+ parent ? ec_fop_name(parent->id) : "No Parent");
+ }
+ return fop->errstr;
+}
+
+static void
+ec_log_insufficient_vol(ec_fop_data_t *fop, int32_t have, uint32_t need,
+ int32_t loglevel)
+{
+ ec_t *ec = fop->xl->private;
+ char str1[32], str2[32], str3[32];
+
+ gf_msg(ec->xl->name, loglevel, 0, EC_MSG_CHILDS_INSUFFICIENT,
+ "Insufficient available children for this request: "
+ "Have : %d, Need : %u : Child UP : %s "
+ "Mask: %s, Healing : %s : %s ",
+ have, need, ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->mask, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->healing, ec->nodes),
+ ec_msg_str(fop));
+}
+
+static int32_t
+ec_child_select(ec_fop_data_t *fop)
+{
+ ec_t *ec = fop->xl->private;
+ int32_t first = 0, num = 0;
+
+ ec_fop_cleanup(fop);
+
+ fop->mask &= ec->node_mask;
+ /* Wind the fop on same subvols as parent for any internal extra fops like
+ * head/tail read in case of writev fop. Unlocks shouldn't do this because
+ * unlock should go on all subvols where lock is performed*/
+ if (fop->parent && !ec_internal_op(fop)) {
+ fop->mask &= (fop->parent->mask & ~fop->parent->healing);
+ if (ec_is_data_fop(fop->id)) {
+ fop->healing |= fop->parent->healing;
+ }
+ }
+
+ if ((fop->mask & ~ec->xl_up) != 0) {
+ gf_msg(fop->xl->name, GF_LOG_WARNING, 0, EC_MSG_OP_EXEC_UNAVAIL,
+ "Executing operation with "
+ "some subvolumes unavailable. (%" PRIXPTR "). %s ",
+ fop->mask & ~ec->xl_up, ec_msg_str(fop));
+ fop->mask &= ec->xl_up;
+ }
+
+ switch (fop->minimum) {
+ case EC_MINIMUM_ALL:
+ fop->minimum = gf_bits_count(fop->mask);
+ if (fop->minimum >= ec->fragments) {
+ break;
+ }
+ case EC_MINIMUM_MIN:
+ fop->minimum = ec->fragments;
+ break;
+ case EC_MINIMUM_ONE:
+ fop->minimum = 1;
+ }
+
+ if (ec->read_policy == EC_ROUND_ROBIN) {
+ first = ec->idx;
+ if (++first >= ec->nodes) {
+ first = 0;
+ }
+ ec->idx = first;
+ }
+
+ num = gf_bits_count(fop->mask);
+ /*Unconditionally wind on healing subvolumes*/
+ fop->mask |= fop->healing;
+ fop->remaining = fop->mask;
+ fop->received = 0;
+
+ ec_trace("SELECT", fop, "");
+
+ if ((num < fop->minimum) && (num < ec->fragments)) {
+ ec_log_insufficient_vol(fop, num, fop->minimum, GF_LOG_ERROR);
+ return 0;
+ }
+
+ if (!fop->parent && fop->lock_count &&
+ (fop->locks[0].update[EC_DATA_TXN] ||
+ fop->locks[0].update[EC_METADATA_TXN])) {
+ if (ec->quorum_count && (num < ec->quorum_count)) {
+ ec_log_insufficient_vol(fop, num, ec->quorum_count, GF_LOG_ERROR);
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+void
+ec_dispatch_next(ec_fop_data_t *fop, uint32_t idx)
+{
+ uint32_t i = EC_INVALID_INDEX;
+ ec_t *ec = fop->xl->private;
+
+ LOCK(&fop->lock);
+
+ i = ec_child_next(ec, fop, idx);
+ if (i < EC_MAX_NODES) {
+ idx = i;
+
+ fop->remaining ^= 1ULL << idx;
+
+ ec_trace("EXECUTE", fop, "idx=%d", idx);
+
+ fop->winds++;
+ fop->refs++;
+ }
+
+ UNLOCK(&fop->lock);
+
+ if (i < EC_MAX_NODES) {
+ fop->wind(ec, fop, idx);
+ }
+}
+
+void
+ec_dispatch_mask(ec_fop_data_t *fop, uintptr_t mask)
+{
+ ec_t *ec = fop->xl->private;
+ int32_t count, idx;
+
+ count = gf_bits_count(mask);
+
+ LOCK(&fop->lock);
+
+ ec_trace("EXECUTE", fop, "mask=%lX", mask);
+
+ fop->remaining ^= mask;
+
+ fop->winds += count;
+ fop->refs += count;
+
+ UNLOCK(&fop->lock);
+
+ idx = 0;
+ while (mask != 0) {
+ if ((mask & 1) != 0) {
+ fop->wind(ec, fop, idx);
+ }
+ idx++;
+ mask >>= 1;
+ }
+}
+
+void
+ec_dispatch_start(ec_fop_data_t *fop)
+{
+ fop->answer = NULL;
+ fop->good = 0;
+
+ INIT_LIST_HEAD(&fop->cbk_list);
+
+ if (fop->lock_count > 0) {
+ ec_owner_copy(fop->frame, &fop->req_frame->root->lk_owner);
+ }
+}
+
+void
+ec_dispatch_one(ec_fop_data_t *fop)
+{
+ ec_dispatch_start(fop);
+
+ if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
+ fop->expected = 1;
+ fop->first = ec_select_first_by_read_policy(fop->xl->private, fop);
+
+ ec_dispatch_next(fop, fop->first);
+ }
+}
+
+gf_boolean_t
+ec_dispatch_one_retry(ec_fop_data_t *fop, ec_cbk_data_t **cbk)
+{
+ ec_cbk_data_t *tmp;
+
+ tmp = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ *cbk = tmp;
+ }
+ if ((tmp != NULL) && (tmp->op_ret < 0) &&
+ ec_is_recoverable_error(tmp->op_errno)) {
+ GF_ASSERT(fop->mask & (1ULL << tmp->idx));
+ fop->mask ^= (1ULL << tmp->idx);
+ if (fop->mask) {
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
+}
+
+void
+ec_dispatch_inc(ec_fop_data_t *fop)
+{
+ ec_dispatch_start(fop);
+
+ if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
+ fop->expected = gf_bits_count(fop->remaining);
+ fop->first = 0;
+
+ ec_dispatch_next(fop, 0);
+ }
+}
+
+void
+ec_dispatch_all(ec_fop_data_t *fop)
+{
+ ec_dispatch_start(fop);
+
+ if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
+ fop->expected = gf_bits_count(fop->remaining);
+ fop->first = 0;
+
+ ec_dispatch_mask(fop, fop->remaining);
+ }
+}
+
+void
+ec_dispatch_min(ec_fop_data_t *fop)
+{
+ ec_t *ec = fop->xl->private;
+ uintptr_t mask;
+ uint32_t idx;
+ int32_t count;
+
+ ec_dispatch_start(fop);
+
+ if (ec_child_select(fop)) {
+ ec_sleep(fop);
+
+ fop->expected = count = ec->fragments;
+ fop->first = ec_select_first_by_read_policy(fop->xl->private, fop);
+ idx = fop->first - 1;
+ mask = 0;
+ while (count-- > 0) {
+ idx = ec_child_next(ec, fop, idx + 1);
+ if (idx < EC_MAX_NODES)
+ mask |= 1ULL << idx;
+ }
+
+ ec_dispatch_mask(fop, mask);
+ }
+}
+
+void
+ec_succeed_all(ec_fop_data_t *fop)
+{
+ ec_dispatch_start(fop);
+
+ if (ec_child_select(fop)) {
+ fop->expected = gf_bits_count(fop->remaining);
+ fop->first = 0;
+
+ /* Simulate a successful execution on all bricks */
+ ec_trace("SUCCEED", fop, "");
+
+ fop->good = fop->remaining;
+ fop->remaining = 0;
+ }
+}
+
+ec_lock_t *
+ec_lock_allocate(ec_fop_data_t *fop, loc_t *loc)
+{
+ ec_t *ec = fop->xl->private;
+ ec_lock_t *lock;
+ int32_t err;
+
+ if ((loc->inode == NULL) ||
+ (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid))) {
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_INODE,
+ "Trying to lock based on an invalid "
+ "inode");
+
+ __ec_fop_set_error(fop, EINVAL);
+
+ return NULL;
+ }
+
+ lock = mem_get0(ec->lock_pool);
+ if (lock != NULL) {
+ lock->good_mask = UINTPTR_MAX;
+ INIT_LIST_HEAD(&lock->owners);
+ INIT_LIST_HEAD(&lock->waiting);
+ INIT_LIST_HEAD(&lock->frozen);
+ err = ec_loc_from_loc(fop->xl, &lock->loc, loc);
+ if (err != 0) {
+ mem_put(lock);
+ lock = NULL;
+
+ __ec_fop_set_error(fop, -err);
+ }
+ }
+
+ return lock;
+}
+
+void
+ec_lock_destroy(ec_lock_t *lock)
+{
+ loc_wipe(&lock->loc);
+ if (lock->fd != NULL) {
+ fd_unref(lock->fd);
+ }
+
+ mem_put(lock);
+}
+
+int32_t
+ec_lock_compare(ec_lock_t *lock1, ec_lock_t *lock2)
+{
+ return gf_uuid_compare(lock1->loc.gfid, lock2->loc.gfid);
+}
+
+static void
+ec_lock_insert(ec_fop_data_t *fop, ec_lock_t *lock, uint32_t flags, loc_t *base,
+ off_t fl_start, uint64_t fl_size)
+{
+ ec_lock_link_t *link;
+
+ /* This check is only prepared for up to 2 locks per fop. If more locks
+ * are needed this must be changed. */
+ if ((fop->lock_count > 0) &&
+ (ec_lock_compare(fop->locks[0].lock, lock) < 0)) {
+ fop->first_lock = fop->lock_count;
+ } else {
+ /* When the first lock is added to the current fop, request lock
+ * counts from locks xlator to be able to determine if there is
+ * contention and release the lock sooner. */
+ if (fop->xdata == NULL) {
+ fop->xdata = dict_new();
+ if (fop->xdata == NULL) {
+ ec_fop_set_error(fop, ENOMEM);
+ return;
+ }
+ }
+ if (dict_set_str(fop->xdata, GLUSTERFS_INODELK_DOM_COUNT,
+ fop->xl->name) != 0) {
+ ec_fop_set_error(fop, ENOMEM);
+ return;
+ }
+ }
+
+ link = &fop->locks[fop->lock_count++];
+
+ link->lock = lock;
+ link->fop = fop;
+ link->update[EC_DATA_TXN] = (flags & EC_UPDATE_DATA) != 0;
+ link->update[EC_METADATA_TXN] = (flags & EC_UPDATE_META) != 0;
+ link->base = base;
+ link->fl_start = fl_start;
+ link->fl_end = ec_range_end_get(fl_start, fl_size);
+
+ lock->refs_pending++;
+}
+
+static void
+ec_lock_prepare_inode_internal(ec_fop_data_t *fop, loc_t *loc, uint32_t flags,
+ loc_t *base, off_t fl_start, uint64_t fl_size)
+{
+ ec_lock_t *lock = NULL;
+ ec_inode_t *ctx;
+
+ if ((fop->parent != NULL) || (fop->error != 0) || (loc->inode == NULL)) {
+ return;
+ }
+
+ LOCK(&loc->inode->lock);
+
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx == NULL) {
+ __ec_fop_set_error(fop, ENOMEM);
+
+ goto unlock;
+ }
+
+ if (ctx->inode_lock != NULL) {
+ lock = ctx->inode_lock;
+
+ /* If there's another lock, make sure that it's not the same. Otherwise
+ * do not insert it.
+ *
+ * This can only happen on renames where source and target names are
+ * in the same directory. */
+ if ((fop->lock_count > 0) && (fop->locks[0].lock == lock)) {
+ /* Combine data/meta updates */
+ fop->locks[0].update[EC_DATA_TXN] |= (flags & EC_UPDATE_DATA) != 0;
+ fop->locks[0].update[EC_METADATA_TXN] |= (flags & EC_UPDATE_META) !=
+ 0;
+
+ /* Only one base inode is allowed per fop, so there shouldn't be
+ * overwrites here. */
+ if (base != NULL) {
+ fop->locks[0].base = base;
+ }
+
+ goto update_query;
+ }
+
+ ec_trace("LOCK_INODELK", fop,
+ "lock=%p, inode=%p. Lock already "
+ "acquired",
+ lock, loc->inode);
+
+ goto insert;
+ }
+
+ lock = ec_lock_allocate(fop, loc);
+ if (lock == NULL) {
+ goto unlock;
+ }
+
+ ec_trace("LOCK_CREATE", fop, "lock=%p", lock);
+
+ lock->flock.l_type = F_WRLCK;
+ lock->flock.l_whence = SEEK_SET;
+
+ lock->ctx = ctx;
+ ctx->inode_lock = lock;
+
+insert:
+ ec_lock_insert(fop, lock, flags, base, fl_start, fl_size);
+update_query:
+ lock->query |= (flags & EC_QUERY_INFO) != 0;
+unlock:
+ UNLOCK(&loc->inode->lock);
+}
+
+void
+ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags,
+ off_t fl_start, uint64_t fl_size)
+{
+ ec_lock_prepare_inode_internal(fop, loc, flags, NULL, fl_start, fl_size);
+}
+
+void
+ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base,
+ uint32_t flags)
+{
+ loc_t tmp;
+ int32_t err;
+
+ if (fop->error != 0) {
+ return;
+ }
+
+ err = ec_loc_parent(fop->xl, loc, &tmp);
+ if (err != 0) {
+ ec_fop_set_error(fop, -err);
+
+ return;
+ }
+
+ if ((flags & EC_INODE_SIZE) != 0) {
+ flags ^= EC_INODE_SIZE;
+ } else {
+ base = NULL;
+ }
+
+ ec_lock_prepare_inode_internal(fop, &tmp, flags, base, 0, EC_RANGE_FULL);
+
+ loc_wipe(&tmp);
+}
+
+void
+ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags, off_t fl_start,
+ uint64_t fl_size)
+{
+ loc_t loc;
+ int32_t err;
+
+ if (fop->error != 0) {
+ return;
+ }
+
+ err = ec_loc_from_fd(fop->xl, &loc, fd);
+ if (err != 0) {
+ ec_fop_set_error(fop, -err);
+
+ return;
+ }
+
+ ec_lock_prepare_inode_internal(fop, &loc, flags, NULL, fl_start, fl_size);
+
+ loc_wipe(&loc);
+}
+
+gf_boolean_t
+ec_config_check(xlator_t *xl, ec_config_t *config)
+{
+ ec_t *ec;
+
+ ec = xl->private;
+ if ((config->version != EC_CONFIG_VERSION) ||
+ (config->algorithm != EC_CONFIG_ALGORITHM) ||
+ (config->gf_word_size != EC_GF_BITS) || (config->bricks != ec->nodes) ||
+ (config->redundancy != ec->redundancy) ||
+ (config->chunk_size != EC_METHOD_CHUNK_SIZE)) {
+ uint32_t data_bricks;
+
+ /* This combination of version/algorithm requires the following
+ values. Incorrect values for these fields are a sign of
+ corruption:
+
+ redundancy > 0
+ redundancy * 2 < bricks
+ gf_word_size must be a power of 2
+ chunk_size (in bits) must be a multiple of gf_word_size *
+ (bricks - redundancy) */
+
+ data_bricks = config->bricks - config->redundancy;
+ if ((config->redundancy < 1) ||
+ (config->redundancy * 2 >= config->bricks) ||
+ !ec_is_power_of_2(config->gf_word_size) ||
+ ((config->chunk_size * 8) % (config->gf_word_size * data_bricks) !=
+ 0)) {
+ gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_CONFIG,
+ "Invalid or corrupted config");
+ } else {
+ gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_CONFIG,
+ "Unsupported config "
+ "(V=%u, A=%u, W=%u, "
+ "N=%u, R=%u, S=%u)",
+ config->version, config->algorithm, config->gf_word_size,
+ config->bricks, config->redundancy, config->chunk_size);
+ }
+
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+gf_boolean_t
+ec_set_dirty_flag(ec_lock_link_t *link, ec_inode_t *ctx, uint64_t *dirty)
+{
+ gf_boolean_t set_dirty = _gf_false;
+
+ if (link->update[EC_DATA_TXN] && !ctx->dirty[EC_DATA_TXN]) {
+ if (!link->optimistic_changelog)
+ dirty[EC_DATA_TXN] = 1;
+ }
+
+ if (link->update[EC_METADATA_TXN] && !ctx->dirty[EC_METADATA_TXN]) {
+ if (!link->optimistic_changelog)
+ dirty[EC_METADATA_TXN] = 1;
+ }
+
+ if (dirty[EC_METADATA_TXN] || dirty[EC_DATA_TXN]) {
+ set_dirty = _gf_true;
+ }
+
+ return set_dirty;
+}
+
+int32_t
+ec_prepare_update_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ struct list_head list;
+ ec_fop_data_t *fop = cookie, *parent, *tmp;
+ ec_lock_link_t *parent_link = fop->data;
+ ec_lock_link_t *link = NULL;
+ ec_lock_t *lock = NULL;
+ ec_inode_t *ctx;
+ gf_boolean_t release = _gf_false;
+ uint64_t provided_flags = 0;
+ uint64_t dirty[EC_VERSION_SIZE] = {0, 0};
+ lock = parent_link->lock;
+ parent = parent_link->fop;
+ ctx = lock->ctx;
+
+ INIT_LIST_HEAD(&list);
+ provided_flags = EC_PROVIDED_FLAGS(parent_link->waiting_flags);
+
+ LOCK(&lock->loc.inode->lock);
+
+ list_for_each_entry(link, &lock->owners, owner_list)
+ {
+ if ((link->waiting_flags & provided_flags) != 0) {
+ link->waiting_flags ^= (link->waiting_flags & provided_flags);
+ if (EC_NEEDED_FLAGS(link->waiting_flags) == 0)
+ list_add_tail(&link->fop->cbk_list, &list);
+ }
+ }
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_SIZE_VERS_GET_FAIL,
+ "Failed to get size and version : %s", ec_msg_str(fop));
+
+ goto unlock;
+ }
+
+ if (EC_FLAGS_HAVE(provided_flags, EC_FLAG_XATTROP)) {
+ op_errno = -ec_dict_del_array(dict, EC_XATTR_VERSION, ctx->pre_version,
+ EC_VERSION_SIZE);
+ if (op_errno != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ EC_MSG_VER_XATTR_GET_FAIL, "Unable to get version xattr. %s",
+ ec_msg_str(fop));
+ goto unlock;
+ }
+ ctx->post_version[0] += ctx->pre_version[0];
+ ctx->post_version[1] += ctx->pre_version[1];
+
+ ctx->have_version = _gf_true;
+
+ if (lock->loc.inode->ia_type == IA_IFREG ||
+ lock->loc.inode->ia_type == IA_INVAL) {
+ op_errno = -ec_dict_del_number(dict, EC_XATTR_SIZE, &ctx->pre_size);
+ if (op_errno != 0) {
+ if (lock->loc.inode->ia_type == IA_IFREG) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ EC_MSG_SIZE_XATTR_GET_FAIL,
+ "Unable to get size xattr. %s", ec_msg_str(fop));
+ goto unlock;
+ }
+ } else {
+ ctx->post_size = ctx->pre_size;
+
+ ctx->have_size = _gf_true;
+ }
+
+ op_errno = -ec_dict_del_config(dict, EC_XATTR_CONFIG, &ctx->config);
+ if (op_errno != 0) {
+ if ((lock->loc.inode->ia_type == IA_IFREG) ||
+ (op_errno != ENODATA)) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ EC_MSG_CONFIG_XATTR_GET_FAIL,
+ "Unable to get config xattr. %s", ec_msg_str(fop));
+
+ goto unlock;
+ }
+ } else {
+ if (!ec_config_check(parent->xl, &ctx->config)) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ EC_MSG_CONFIG_XATTR_INVALID, "Invalid config xattr");
+
+ op_errno = EINVAL;
+
+ goto unlock;
+ }
+ ctx->have_config = _gf_true;
+ }
+ }
+ ctx->have_info = _gf_true;
+ }
+
+ ec_set_dirty_flag(fop->data, ctx, dirty);
+ if (dirty[EC_METADATA_TXN] &&
+ (EC_FLAGS_HAVE(provided_flags, EC_FLAG_METADATA_DIRTY))) {
+ GF_ASSERT(!ctx->dirty[EC_METADATA_TXN]);
+ ctx->dirty[EC_METADATA_TXN] = 1;
+ }
+
+ if (dirty[EC_DATA_TXN] &&
+ (EC_FLAGS_HAVE(provided_flags, EC_FLAG_DATA_DIRTY))) {
+ GF_ASSERT(!ctx->dirty[EC_DATA_TXN]);
+ ctx->dirty[EC_DATA_TXN] = 1;
+ }
+ op_errno = 0;
+unlock:
+
+ lock->waiting_flags ^= provided_flags;
+
+ if (op_errno == 0) {
+ /* If the fop fails on any of the good bricks, it is important to mark
+ * it dirty and update versions right away if dirty was not set before.
+ */
+ if (lock->good_mask & ~(fop->good | fop->remaining)) {
+ release = _gf_true;
+ }
+
+ if (parent_link->update[0] && !parent_link->dirty[0]) {
+ lock->release |= release;
+ }
+
+ if (parent_link->update[1] && !parent_link->dirty[1]) {
+ lock->release |= release;
+ }
+
+ /* We don't allow the main fop to be executed on bricks that have not
+ * succeeded the initial xattrop. */
+ ec_lock_update_good(lock, fop);
+
+ /*As of now only data healing marks bricks as healing*/
+ lock->healing |= fop->healing;
+ }
+
+ UNLOCK(&lock->loc.inode->lock);
+
+ while (!list_empty(&list)) {
+ tmp = list_entry(list.next, ec_fop_data_t, cbk_list);
+ list_del_init(&tmp->cbk_list);
+
+ if (op_errno == 0) {
+ tmp->mask &= fop->good;
+
+ /*As of now only data healing marks bricks as healing*/
+ if (ec_is_data_fop(tmp->id)) {
+ tmp->healing |= fop->healing;
+ }
+ }
+
+ ec_resume(tmp, op_errno);
+ }
+
+ return 0;
+}
+
+static gf_boolean_t
+ec_set_needed_flag(ec_lock_t *lock, ec_lock_link_t *link, uint64_t flag)
+{
+ uint64_t current;
+
+ link->waiting_flags |= EC_FLAG_NEEDS(flag);
+
+ current = EC_NEEDED_FLAGS(lock->waiting_flags);
+ if (!EC_FLAGS_HAVE(current, flag)) {
+ lock->waiting_flags |= EC_FLAG_NEEDS(flag);
+ link->waiting_flags |= EC_FLAG_PROVIDES(flag);
+
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+static uint64_t
+ec_set_xattrop_flags_and_params(ec_lock_t *lock, ec_lock_link_t *link,
+ uint64_t *dirty)
+{
+ uint64_t oldflags = 0;
+ uint64_t newflags = 0;
+ ec_inode_t *ctx = lock->ctx;
+
+ oldflags = EC_NEEDED_FLAGS(lock->waiting_flags);
+
+ if (lock->query && !ctx->have_info) {
+ ec_set_needed_flag(lock, link, EC_FLAG_XATTROP);
+ }
+
+ if (dirty[EC_DATA_TXN]) {
+ if (!ec_set_needed_flag(lock, link, EC_FLAG_DATA_DIRTY)) {
+ dirty[EC_DATA_TXN] = 0;
+ }
+ }
+
+ if (dirty[EC_METADATA_TXN]) {
+ if (!ec_set_needed_flag(lock, link, EC_FLAG_METADATA_DIRTY)) {
+ dirty[EC_METADATA_TXN] = 0;
+ }
+ }
+ newflags = EC_NEEDED_FLAGS(lock->waiting_flags);
+
+ return oldflags ^ newflags;
+}
+
+void
+ec_get_size_version(ec_lock_link_t *link)
+{
+ loc_t loc;
+ ec_lock_t *lock;
+ ec_inode_t *ctx;
+ ec_fop_data_t *fop;
+ dict_t *dict = NULL;
+ dict_t *xdata = NULL;
+ ec_t *ec = NULL;
+ int32_t error = 0;
+ gf_boolean_t set_dirty = _gf_false;
+ uint64_t allzero[EC_VERSION_SIZE] = {0, 0};
+ uint64_t dirty[EC_VERSION_SIZE] = {0, 0};
+ lock = link->lock;
+ ctx = lock->ctx;
+ fop = link->fop;
+ ec = fop->xl->private;
+ uint64_t changed_flags = 0;
+
+ if (ec->optimistic_changelog && !(ec->node_mask & ~link->lock->good_mask) &&
+ !ec_is_data_fop(fop->id))
+ link->optimistic_changelog = _gf_true;
+
+ memset(&loc, 0, sizeof(loc));
+
+ LOCK(&lock->loc.inode->lock);
+
+ set_dirty = ec_set_dirty_flag(link, ctx, dirty);
+
+ /* If ec metadata has already been retrieved, do not try again. */
+ if (ctx->have_info) {
+ if (ec_is_data_fop(fop->id)) {
+ fop->healing |= lock->healing;
+ }
+ if (!set_dirty)
+ goto unlock;
+ }
+
+ /* Determine if there's something we need to retrieve for the current
+ * operation. */
+ if (!set_dirty && !lock->query && (lock->loc.inode->ia_type != IA_IFREG) &&
+ (lock->loc.inode->ia_type != IA_INVAL)) {
+ goto unlock;
+ }
+
+ changed_flags = ec_set_xattrop_flags_and_params(lock, link, dirty);
+ if (link->waiting_flags) {
+ /* This fop needs to wait until all its flags are cleared which
+ * potentially can be cleared by other xattrops that are already
+ * wound*/
+ ec_sleep(fop);
+ } else {
+ GF_ASSERT(!changed_flags);
+ }
+
+unlock:
+ UNLOCK(&lock->loc.inode->lock);
+
+ if (!changed_flags)
+ goto out;
+
+ dict = dict_new();
+ if (dict == NULL) {
+ error = -ENOMEM;
+ goto out;
+ }
+
+ if (EC_FLAGS_HAVE(changed_flags, EC_FLAG_XATTROP)) {
+ /* Once we know that an xattrop will be needed,
+ * we try to get all available information in a
+ * single call. */
+ error = ec_dict_set_array(dict, EC_XATTR_VERSION, allzero,
+ EC_VERSION_SIZE);
+ if (error != 0) {
+ goto out;
+ }
+
+ if (lock->loc.inode->ia_type == IA_IFREG ||
+ lock->loc.inode->ia_type == IA_INVAL) {
+ error = ec_dict_set_number(dict, EC_XATTR_SIZE, 0);
+ if (error == 0) {
+ error = ec_dict_set_number(dict, EC_XATTR_CONFIG, 0);
+ }
+ if (error != 0) {
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (xdata == NULL || dict_set_int32(xdata, GF_GET_SIZE, 1)) {
+ error = -ENOMEM;
+ goto out;
+ }
+ }
+ }
+
+ if (memcmp(allzero, dirty, sizeof(allzero))) {
+ error = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE);
+ if (error != 0) {
+ goto out;
+ }
+ }
+
+ fop->frame->root->uid = 0;
+ fop->frame->root->gid = 0;
+
+ /* For normal fops, ec_[f]xattrop() must succeed on at least
+ * EC_MINIMUM_MIN bricks, however when this is called as part of a
+ * self-heal operation the mask of target bricks (fop->mask) could
+ * contain less than EC_MINIMUM_MIN bricks, causing the xattrop to
+ * always fail. Thus we always use the same minimum used for the main
+ * fop.
+ */
+ if (lock->fd == NULL) {
+ error = ec_loc_from_loc(fop->xl, &loc, &lock->loc);
+ if (error != 0) {
+ goto out;
+ }
+ if (gf_uuid_is_null(loc.pargfid)) {
+ if (loc.parent != NULL) {
+ inode_unref(loc.parent);
+ loc.parent = NULL;
+ }
+ GF_FREE((char *)loc.path);
+ loc.path = NULL;
+ loc.name = NULL;
+ }
+
+ ec_xattrop(fop->frame, fop->xl, fop->mask, fop->minimum,
+ ec_prepare_update_cbk, link, &loc, GF_XATTROP_ADD_ARRAY64,
+ dict, xdata);
+ } else {
+ ec_fxattrop(fop->frame, fop->xl, fop->mask, fop->minimum,
+ ec_prepare_update_cbk, link, lock->fd,
+ GF_XATTROP_ADD_ARRAY64, dict, xdata);
+ }
+
+ error = 0;
+
+out:
+ fop->frame->root->uid = fop->uid;
+ fop->frame->root->gid = fop->gid;
+
+ loc_wipe(&loc);
+
+ if (dict != NULL) {
+ dict_unref(dict);
+ }
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ if (error != 0) {
+ ec_fop_set_error(fop, -error);
+ }
+}
+
+gf_boolean_t
+__ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t *size)
+{
+ ec_inode_t *ctx;
+ gf_boolean_t found = _gf_false;
+
+ ctx = __ec_inode_get(inode, fop->xl);
+ if (ctx == NULL) {
+ goto out;
+ }
+
+ if (ctx->have_size) {
+ *size = ctx->post_size;
+ found = _gf_true;
+ }
+
+out:
+ return found;
+}
+
+gf_boolean_t
+ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t *size)
+{
+ gf_boolean_t found = _gf_false;
+
+ LOCK(&inode->lock);
+ {
+ found = __ec_get_inode_size(fop, inode, size);
+ }
+ UNLOCK(&inode->lock);
+
+ return found;
+}
+
+gf_boolean_t
+__ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t size)
+{
+ ec_inode_t *ctx;
+ gf_boolean_t found = _gf_false;
+
+ ctx = __ec_inode_get(inode, fop->xl);
+ if (ctx == NULL) {
+ goto out;
+ }
+
+ /* Normal fops always have ctx->have_size set. However self-heal calls this
+ * to prepare the inode, so ctx->have_size will be false. In this case we
+ * prepare both pre_size and post_size, and set have_size and have_info to
+ * true. */
+ if (!ctx->have_size) {
+ ctx->pre_size = size;
+ ctx->have_size = ctx->have_info = _gf_true;
+ }
+ ctx->post_size = size;
+
+ found = _gf_true;
+
+out:
+ return found;
+}
+
+gf_boolean_t
+ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t size)
+{
+ gf_boolean_t found = _gf_false;
+
+ LOCK(&inode->lock);
+ {
+ found = __ec_set_inode_size(fop, inode, size);
+ }
+ UNLOCK(&inode->lock);
+
+ return found;
+}
+
+static void
+ec_release_stripe_cache(ec_inode_t *ctx)
+{
+ ec_stripe_list_t *stripe_cache = NULL;
+ ec_stripe_t *stripe = NULL;
+
+ stripe_cache = &ctx->stripe_cache;
+ while (!list_empty(&stripe_cache->lru)) {
+ stripe = list_first_entry(&stripe_cache->lru, ec_stripe_t, lru);
+ list_del(&stripe->lru);
+ GF_FREE(stripe);
+ }
+ stripe_cache->count = 0;
+ stripe_cache->max = 0;
+}
+
+void
+ec_clear_inode_info(ec_fop_data_t *fop, inode_t *inode)
+{
+ ec_inode_t *ctx;
+
+ LOCK(&inode->lock);
+
+ ctx = __ec_inode_get(inode, fop->xl);
+ if (ctx == NULL) {
+ goto unlock;
+ }
+
+ ec_release_stripe_cache(ctx);
+ ctx->have_info = _gf_false;
+ ctx->have_config = _gf_false;
+ ctx->have_version = _gf_false;
+ ctx->have_size = _gf_false;
+
+ memset(&ctx->config, 0, sizeof(ctx->config));
+ memset(ctx->pre_version, 0, sizeof(ctx->pre_version));
+ memset(ctx->post_version, 0, sizeof(ctx->post_version));
+ ctx->pre_size = ctx->post_size = 0;
+ memset(ctx->dirty, 0, sizeof(ctx->dirty));
+
+unlock:
+ UNLOCK(&inode->lock);
+}
+
+int32_t
+ec_get_real_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_lock_link_t *link;
+
+ if (op_ret >= 0) {
+ link = fop->data;
+ link->size = buf->ia_size;
+ } else {
+ /* Prevent failure of parent fop. */
+ fop->error = 0;
+ }
+
+ return 0;
+}
+
+/* This function is used to get the trusted.ec.size xattr from a file when
+ * no lock is needed on the inode. This is only required to maintain iatt
+ * structs on fops that manipulate directory entries but do not operate
+ * directly on the inode, like link, rename, ...
+ *
+ * Any error processing this request is ignored. In the worst case, an invalid
+ * or not up to date value in the iatt could cause some cache invalidation.
+ */
+void
+ec_get_real_size(ec_lock_link_t *link)
+{
+ ec_fop_data_t *fop;
+ dict_t *xdata;
+
+ if (link->base == NULL || link->base->inode == NULL) {
+ return;
+ }
+
+ if (link->base->inode->ia_type != IA_IFREG) {
+ return;
+ }
+
+ fop = link->fop;
+
+ if (ec_get_inode_size(fop, link->base->inode, &link->size)) {
+ return;
+ }
+
+ xdata = dict_new();
+ if (xdata == NULL) {
+ return;
+ }
+ if (ec_dict_set_number(xdata, EC_XATTR_SIZE, 0) != 0) {
+ goto out;
+ }
+
+ /* Send a simple lookup. A single answer is considered ok since this value
+ * is only used to return an iatt struct related to an inode that is not
+ * locked and have not suffered any operation. */
+ ec_lookup(fop->frame, fop->xl, fop->mask, 1, ec_get_real_size_cbk, link,
+ link->base, xdata);
+
+out:
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+}
+
+static void
+ec_lock_update_fd(ec_lock_t *lock, ec_fop_data_t *fop)
+{
+ /* If the fop has an fd available, attach it to the lock structure to be
+ * able to do fxattrop calls instead of xattrop. */
+ if (fop->use_fd && (lock->fd == NULL)) {
+ lock->fd = __fd_ref(fop->fd);
+ }
+}
+
+static gf_boolean_t
+ec_link_has_lock_conflict(ec_lock_link_t *link, gf_boolean_t waitlist_check)
+{
+ ec_lock_link_t *trav_link = NULL;
+
+ list_for_each_entry(trav_link, &link->lock->owners, owner_list)
+ {
+ if (ec_lock_conflict(trav_link, link))
+ return _gf_true;
+ }
+
+ if (!waitlist_check)
+ return _gf_false;
+
+ list_for_each_entry(trav_link, &link->lock->waiting, wait_list)
+ {
+ if (ec_lock_conflict(trav_link, link))
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+static void
+ec_lock_wake_shared(ec_lock_t *lock, struct list_head *list)
+{
+ ec_fop_data_t *fop;
+ ec_lock_link_t *link;
+ gf_boolean_t conflict = _gf_false;
+
+ while (!conflict && !list_empty(&lock->waiting)) {
+ link = list_entry(lock->waiting.next, ec_lock_link_t, wait_list);
+ fop = link->fop;
+
+ /* If lock is not acquired, at most one fop can be assigned as owner.
+ * The following fops will need to wait in the lock->waiting queue
+ * until the lock has been fully acquired. */
+ conflict = !lock->acquired;
+
+ /* If the fop is not shareable, only this fop can be assigned as owner.
+ * Other fops will need to wait until this one finishes. */
+ if (ec_link_has_lock_conflict(link, _gf_false)) {
+ conflict = _gf_true;
+ }
+
+ /* If only one fop is allowed, it can be assigned as the owner of the
+ * lock only if there weren't any other owner. */
+ if (conflict && !list_empty(&lock->owners)) {
+ break;
+ }
+
+ list_move_tail(&link->wait_list, list);
+
+ list_add_tail(&link->owner_list, &lock->owners);
+ lock->refs_owners++;
+
+ ec_lock_update_fd(lock, fop);
+ }
+}
+
+static void
+ec_lock_apply(ec_lock_link_t *link)
+{
+ ec_fop_data_t *fop = link->fop;
+
+ fop->mask &= link->lock->good_mask;
+ fop->locked++;
+
+ ec_get_size_version(link);
+ ec_get_real_size(link);
+}
+
+gf_boolean_t
+ec_lock_acquire(ec_lock_link_t *link);
+
+static void
+ec_lock_resume_shared(struct list_head *list)
+{
+ ec_lock_link_t *link;
+
+ while (!list_empty(list)) {
+ link = list_entry(list->next, ec_lock_link_t, wait_list);
+ list_del_init(&link->wait_list);
+
+ if (link->lock->acquired) {
+ ec_lock_apply(link);
+ ec_lock(link->fop);
+ } else {
+ GF_ASSERT(list_empty(list));
+
+ ec_lock_acquire(link);
+ }
+
+ ec_resume(link->fop, 0);
+ }
+}
+
+void
+ec_lock_acquired(ec_lock_link_t *link)
+{
+ struct list_head list;
+ ec_lock_t *lock;
+ ec_fop_data_t *fop;
+
+ lock = link->lock;
+ fop = link->fop;
+
+ ec_trace("LOCKED", fop, "lock=%p", lock);
+
+ INIT_LIST_HEAD(&list);
+
+ LOCK(&lock->loc.inode->lock);
+
+ lock->acquired = _gf_true;
+ if (lock->contention) {
+ lock->release = _gf_true;
+ lock->contention = _gf_false;
+ }
+
+ ec_lock_update_fd(lock, fop);
+ ec_lock_wake_shared(lock, &list);
+
+ UNLOCK(&lock->loc.inode->lock);
+
+ ec_lock_apply(link);
+
+ if (fop->use_fd &&
+ (link->update[EC_DATA_TXN] || link->update[EC_METADATA_TXN])) {
+ /* Try to reopen closed fd's only if lock has succeeded. */
+ ec_fix_open(fop, lock->mask);
+ }
+
+ ec_lock_resume_shared(&list);
+}
+
+int32_t
+ec_locked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_lock_link_t *link = NULL;
+ ec_lock_t *lock = NULL;
+
+ link = fop->data;
+ lock = link->lock;
+ if (op_ret >= 0) {
+ lock->mask = lock->good_mask = fop->good;
+ lock->healing = 0;
+
+ ec_lock_acquired(link);
+ ec_lock(fop->parent);
+ } else {
+ LOCK(&lock->loc.inode->lock);
+ {
+ lock->contention = _gf_false;
+ }
+ UNLOCK(&lock->loc.inode->lock);
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_PREOP_LOCK_FAILED,
+ "Failed to complete preop lock");
+ }
+
+ return 0;
+}
+
+gf_boolean_t
+ec_lock_acquire(ec_lock_link_t *link)
+{
+ ec_lock_t *lock;
+ ec_fop_data_t *fop;
+ gf_lkowner_t lk_owner;
+
+ lock = link->lock;
+ fop = link->fop;
+
+ if (!lock->acquired) {
+ set_lk_owner_from_ptr(&lk_owner, lock);
+
+ ec_trace("LOCK_ACQUIRE", fop, "lock=%p, inode=%p", lock,
+ lock->loc.inode);
+
+ lock->flock.l_type = F_WRLCK;
+ ec_inodelk(fop->frame, fop->xl, &lk_owner, -1, EC_MINIMUM_ALL,
+ ec_locked, link, fop->xl->name, &lock->loc, F_SETLKW,
+ &lock->flock, NULL);
+
+ return _gf_false;
+ }
+
+ ec_trace("LOCK_REUSE", fop, "lock=%p", lock);
+
+ ec_lock_acquired(link);
+
+ return _gf_true;
+}
+
+static ec_lock_link_t *
+ec_lock_timer_cancel(xlator_t *xl, ec_lock_t *lock)
+{
+ ec_lock_link_t *timer_link;
+
+ /* If we don't have any timer, there's nothing to cancel. */
+ if (lock->timer == NULL) {
+ return NULL;
+ }
+
+ /* We are trying to access a lock that has an unlock timer active.
+ * This means that the lock must be idle, i.e. no fop can be in the
+ * owner, waiting or frozen lists. It also means that the lock cannot
+ * have been marked as being released (this is done without timers).
+ * There should only be one owner reference, but it's possible that
+ * some fops are being prepared to use this lock. */
+ GF_ASSERT((lock->refs_owners == 1) && list_empty(&lock->owners) &&
+ list_empty(&lock->waiting));
+
+ /* We take the timer_link before cancelling the timer, since a
+ * successful cancellation will destroy it. It must not be NULL
+ * because it references the fop responsible for the delayed unlock
+ * that we are currently trying to cancel. */
+ timer_link = lock->timer->data;
+ GF_ASSERT(timer_link != NULL);
+
+ if (gf_timer_call_cancel(xl->ctx, lock->timer) < 0) {
+ /* It's too late to avoid the execution of the timer callback.
+ * Since we need to be sure that the callback has access to all
+ * needed resources, we cannot resume the execution of the
+ * timer fop now. This will be done in the callback. */
+ timer_link = NULL;
+ } else {
+ /* The timer has been cancelled. The fop referenced by
+ * timer_link holds the last reference. The caller is
+ * responsible to release it when not needed anymore. */
+ ec_trace("UNLOCK_CANCELLED", timer_link->fop, "lock=%p", lock);
+ }
+
+ /* We have two options here:
+ *
+ * 1. The timer has been successfully cancelled.
+ *
+ * This is the easiest case and we can continue with the currently
+ * acquired lock.
+ *
+ * 2. The timer callback has already been fired.
+ *
+ * In this case we have not been able to cancel the timer before
+ * the timer callback has been fired, but we also know that
+ * lock->timer != NULL. This means that the timer callback is still
+ * trying to acquire the inode mutex that we currently own. We are
+ * safe until we release it. In this case we can safely clear
+ * lock->timer. This will cause that the timer callback does nothing
+ * once it acquires the mutex.
+ */
+ lock->timer = NULL;
+
+ return timer_link;
+}
+
+static gf_boolean_t
+ec_lock_assign_owner(ec_lock_link_t *link)
+{
+ ec_fop_data_t *fop;
+ ec_lock_t *lock;
+ ec_lock_link_t *timer_link = NULL;
+ gf_boolean_t assigned = _gf_false;
+
+ /* The link cannot be in any list because we have just finished preparing
+ * it. */
+ GF_ASSERT(list_empty(&link->wait_list));
+
+ fop = link->fop;
+ lock = link->lock;
+
+ LOCK(&lock->loc.inode->lock);
+
+ /* Since the link has just been prepared but it's not active yet, the
+ * refs_pending must be one at least (the ref owned by this link). */
+ GF_ASSERT(lock->refs_pending > 0);
+ /* The link is not pending any more. It will be assigned to the owner,
+ * waiting or frozen list. */
+ lock->refs_pending--;
+
+ if (lock->release) {
+ ec_trace("LOCK_QUEUE_FREEZE", fop, "lock=%p", lock);
+
+ /* When lock->release is set, we'll unlock the lock as soon as
+ * possible, meaning that we won't use a timer. */
+ GF_ASSERT(lock->timer == NULL);
+
+ /* The lock is marked to be released. We can still have owners and fops
+ * in the waiting ilist f they have been added before the lock has been
+ * marked to be released. However new fops are put into the frozen list
+ * to wait for the next unlock/lock cycle. */
+ list_add_tail(&link->wait_list, &lock->frozen);
+
+ goto unlock;
+ }
+
+ /* The lock is not marked to be released, so the frozen list should be
+ * empty. */
+ GF_ASSERT(list_empty(&lock->frozen));
+
+ timer_link = ec_lock_timer_cancel(fop->xl, lock);
+
+ if (!list_empty(&lock->owners)) {
+ /* There are other owners of this lock. We can only take ownership if
+ * the lock is already acquired and doesn't have conflict with existing
+ * owners, or waiters(to prevent starvation).
+ * Otherwise we need to wait.
+ */
+ if (!lock->acquired || ec_link_has_lock_conflict(link, _gf_true)) {
+ ec_trace("LOCK_QUEUE_WAIT", fop, "lock=%p", lock);
+
+ list_add_tail(&link->wait_list, &lock->waiting);
+
+ goto unlock;
+ }
+ }
+
+ list_add_tail(&link->owner_list, &lock->owners);
+
+ /* If timer_link is not NULL, it means that we have inherited the owner
+ * reference assigned to the timer fop. In this case we simply reuse it.
+ * Otherwise we need to increase the number of owners. */
+ if (timer_link == NULL) {
+ lock->refs_owners++;
+ }
+
+ assigned = _gf_true;
+
+unlock:
+ if (!assigned) {
+ /* We have not been able to take ownership of this lock. The fop must
+ * be put to sleep. */
+ ec_sleep(fop);
+ }
+
+ UNLOCK(&lock->loc.inode->lock);
+
+ /* If we have cancelled the timer, we need to resume the fop that was
+ * waiting for it. */
+ if (timer_link != NULL) {
+ ec_resume(timer_link->fop, 0);
+ }
+
+ return assigned;
+}
+
+static void
+ec_lock_next_owner(ec_lock_link_t *link, ec_cbk_data_t *cbk,
+ gf_boolean_t release)
+{
+ struct list_head list;
+ ec_lock_t *lock = link->lock;
+ ec_fop_data_t *fop = link->fop;
+ ec_inode_t *ctx = lock->ctx;
+
+ INIT_LIST_HEAD(&list);
+
+ LOCK(&lock->loc.inode->lock);
+
+ ec_trace("LOCK_DONE", fop, "lock=%p", lock);
+
+ /* Current link must belong to the owner list of the lock. We don't
+ * decrement lock->refs_owners here because the inode mutex is released
+ * before ec_unlock() is called and we need to know when the last owner
+ * unlocks the lock to do proper cleanup. lock->refs_owners is used for
+ * this task. */
+ GF_ASSERT((lock->refs_owners > 0) && !list_empty(&link->owner_list));
+ list_del_init(&link->owner_list);
+
+ lock->release |= release;
+
+ if ((fop->error == 0) && (cbk != NULL) && (cbk->op_ret >= 0)) {
+ if (link->update[0]) {
+ ctx->post_version[0]++;
+ }
+ if (link->update[1]) {
+ ctx->post_version[1]++;
+ }
+ /* If the fop fails on any of the good bricks, it is important to mark
+ * it dirty and update versions right away. */
+ if (link->update[0] || link->update[1]) {
+ if (lock->good_mask & ~(fop->good | fop->remaining)) {
+ lock->release = _gf_true;
+ }
+ }
+ }
+
+ if (fop->healing) {
+ lock->healing = fop->healing & (fop->good | fop->remaining);
+ }
+ ec_lock_update_good(lock, fop);
+
+ ec_lock_wake_shared(lock, &list);
+
+ UNLOCK(&lock->loc.inode->lock);
+
+ ec_lock_resume_shared(&list);
+}
+
+void
+ec_lock(ec_fop_data_t *fop)
+{
+ ec_lock_link_t *link;
+
+ /* There is a chance that ec_resume is called on fop even before ec_sleep.
+ * Which can result in refs == 0 for fop leading to use after free in this
+ * function when it calls ec_sleep so do ec_sleep at start and ec_resume at
+ * the end of this function.*/
+ ec_sleep(fop);
+
+ while (fop->locked < fop->lock_count) {
+ /* Since there are only up to 2 locks per fop, this xor will change
+ * the order of the locks if fop->first_lock is 1. */
+ link = &fop->locks[fop->locked ^ fop->first_lock];
+
+ if (!ec_lock_assign_owner(link) || !ec_lock_acquire(link)) {
+ break;
+ }
+ }
+
+ ec_resume(fop, 0);
+}
+
+void
+ec_lock_unfreeze(ec_lock_link_t *link)
+{
+ struct list_head list;
+ ec_lock_t *lock;
+ gf_boolean_t destroy = _gf_false;
+
+ lock = link->lock;
+
+ INIT_LIST_HEAD(&list);
+
+ LOCK(&lock->loc.inode->lock);
+
+ /* The lock must be marked to be released here, since we have just released
+ * it and any attempt to assign it to more fops must have added them to the
+ * frozen list. We can only have one active reference here: the one that
+ * is processing this unfreeze. */
+ GF_ASSERT(lock->release && (lock->refs_owners == 1));
+ lock->release = _gf_false;
+ lock->refs_owners = 0;
+
+ lock->acquired = _gf_false;
+
+ /* We are unfreezing a lock. This means that the lock has already been
+ * released. In this state it shouldn't have a pending timer nor have any
+ * owner, and the waiting list should be empty. Only the frozen list can
+ * contain some fop. */
+ GF_ASSERT((lock->timer == NULL) && list_empty(&lock->waiting) &&
+ list_empty(&lock->owners));
+
+ /* We move all frozen fops to the waiting list. */
+ list_splice_init(&lock->frozen, &lock->waiting);
+
+ /* If we don't have any fop waiting nor there are any prepared fops using
+ * this lock, we can finally dispose it. */
+ destroy = list_empty(&lock->waiting) && (lock->refs_pending == 0);
+ if (destroy) {
+ ec_trace("LOCK_DESTROY", link->fop, "lock=%p", lock);
+
+ lock->ctx->inode_lock = NULL;
+ } else {
+ ec_trace("LOCK_UNFREEZE", link->fop, "lock=%p", lock);
+
+ ec_lock_wake_shared(lock, &list);
+ }
+
+ UNLOCK(&lock->loc.inode->lock);
+
+ ec_lock_resume_shared(&list);
+
+ if (destroy) {
+ ec_lock_destroy(lock);
+ }
+}
+
+int32_t
+ec_unlocked(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_lock_link_t *link = fop->data;
+
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_UNLOCK_FAILED,
+ "entry/inode unlocking failed :(%s)", ec_msg_str(link->fop));
+ } else {
+ ec_trace("UNLOCKED", link->fop, "lock=%p", link->lock);
+ }
+
+ ec_lock_unfreeze(link);
+
+ return 0;
+}
+
+void
+ec_unlock_lock(ec_lock_link_t *link)
+{
+ ec_lock_t *lock;
+ ec_fop_data_t *fop;
+ gf_lkowner_t lk_owner;
+
+ lock = link->lock;
+ fop = link->fop;
+
+ lock->unlock_now = _gf_false;
+ ec_clear_inode_info(fop, lock->loc.inode);
+
+ if ((lock->mask != 0) && lock->acquired) {
+ set_lk_owner_from_ptr(&lk_owner, lock);
+ lock->flock.l_type = F_UNLCK;
+ ec_trace("UNLOCK_INODELK", fop, "lock=%p, inode=%p", lock,
+ lock->loc.inode);
+
+ ec_inodelk(fop->frame, fop->xl, &lk_owner, lock->mask, EC_MINIMUM_ONE,
+ ec_unlocked, link, fop->xl->name, &lock->loc, F_SETLK,
+ &lock->flock, NULL);
+ } else {
+ ec_lock_unfreeze(link);
+ }
+}
+
+void
+ec_inode_bad_inc(inode_t *inode, xlator_t *xl)
+{
+ ec_inode_t *ctx = NULL;
+
+ LOCK(&inode->lock);
+ {
+ ctx = __ec_inode_get(inode, xl);
+ if (ctx == NULL) {
+ goto unlock;
+ }
+ ctx->bad_version++;
+ }
+unlock:
+ UNLOCK(&inode->lock);
+}
+
+int32_t
+ec_update_size_version_done(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_lock_link_t *link;
+ ec_lock_t *lock;
+ ec_inode_t *ctx;
+
+ link = fop->data;
+ lock = link->lock;
+ ctx = lock->ctx;
+
+ if (op_ret < 0) {
+ if (link->lock->fd == NULL) {
+ ec_inode_bad_inc(link->lock->loc.inode, this);
+ } else {
+ ec_inode_bad_inc(link->lock->fd->inode, this);
+ }
+
+ gf_msg(fop->xl->name, fop_log_level(fop->id, op_errno), op_errno,
+ EC_MSG_SIZE_VERS_UPDATE_FAIL,
+ "Failed to update version and size. %s", ec_msg_str(fop));
+ } else {
+ fop->parent->good &= fop->good;
+
+ ec_lock_update_good(lock, fop);
+
+ if (ec_dict_del_array(xattr, EC_XATTR_VERSION, ctx->post_version,
+ EC_VERSION_SIZE) == 0) {
+ ctx->pre_version[0] = ctx->post_version[0];
+ ctx->pre_version[1] = ctx->post_version[1];
+
+ ctx->have_version = _gf_true;
+ }
+ if (ec_dict_del_number(xattr, EC_XATTR_SIZE, &ctx->post_size) == 0) {
+ ctx->pre_size = ctx->post_size;
+
+ ctx->have_size = _gf_true;
+ }
+ if ((ec_dict_del_config(xdata, EC_XATTR_CONFIG, &ctx->config) == 0) &&
+ ec_config_check(fop->xl, &ctx->config)) {
+ ctx->have_config = _gf_true;
+ }
+
+ ctx->have_info = _gf_true;
+ }
+ /* If we are here because of fop's and other than unlock request,
+ * that means we are still holding a lock. That make sure
+ * lock->unlock_now can not be modified.
+ */
+ if (lock->unlock_now) {
+ ec_unlock_lock(fop->data);
+ }
+
+ return 0;
+}
+
+void
+ec_update_size_version(ec_lock_link_t *link, uint64_t *version, uint64_t size,
+ uint64_t *dirty)
+{
+ ec_fop_data_t *fop;
+ ec_lock_t *lock;
+ ec_inode_t *ctx;
+ dict_t *dict = NULL;
+ uintptr_t update_on = 0;
+ int32_t err = -ENOMEM;
+
+ fop = link->fop;
+ lock = link->lock;
+ ctx = lock->ctx;
+
+ ec_trace("UPDATE", fop, "version=%ld/%ld, size=%ld, dirty=%ld/%ld",
+ version[0], version[1], size, dirty[0], dirty[1]);
+
+ dict = dict_new();
+ if (dict == NULL) {
+ goto out;
+ }
+
+ /* If we don't have version information or it has been modified, we
+ * update it. */
+ if (!ctx->have_version || (version[0] != 0) || (version[1] != 0)) {
+ err = ec_dict_set_array(dict, EC_XATTR_VERSION, version,
+ EC_VERSION_SIZE);
+ if (err != 0) {
+ goto out;
+ }
+ }
+
+ if (size != 0) {
+ /* If size has been changed, we should already
+ * know the previous size of the file. */
+ GF_ASSERT(ctx->have_size);
+
+ err = ec_dict_set_number(dict, EC_XATTR_SIZE, size);
+ if (err != 0) {
+ goto out;
+ }
+ }
+
+ if (dirty[0] || dirty[1]) {
+ err = ec_dict_set_array(dict, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE);
+ if (err != 0) {
+ goto out;
+ }
+ }
+
+ /* If config information is not known, we request it now. */
+ if ((lock->loc.inode->ia_type == IA_IFREG) && !ctx->have_config) {
+ /* A failure requesting this xattr is ignored because it's not
+ * absolutely required right now. */
+ (void)ec_dict_set_number(dict, EC_XATTR_CONFIG, 0);
+ }
+
+ fop->frame->root->uid = 0;
+ fop->frame->root->gid = 0;
+
+ update_on = lock->good_mask | lock->healing;
+
+ if (link->lock->fd == NULL) {
+ ec_xattrop(fop->frame, fop->xl, update_on, EC_MINIMUM_MIN,
+ ec_update_size_version_done, link, &link->lock->loc,
+ GF_XATTROP_ADD_ARRAY64, dict, NULL);
+ } else {
+ ec_fxattrop(fop->frame, fop->xl, update_on, EC_MINIMUM_MIN,
+ ec_update_size_version_done, link, link->lock->fd,
+ GF_XATTROP_ADD_ARRAY64, dict, NULL);
+ }
+
+ fop->frame->root->uid = fop->uid;
+ fop->frame->root->gid = fop->gid;
+
+ dict_unref(dict);
+
+ return;
+
+out:
+ if (dict != NULL) {
+ dict_unref(dict);
+ }
+
+ ec_fop_set_error(fop, -err);
+
+ gf_msg(fop->xl->name, GF_LOG_ERROR, -err, EC_MSG_SIZE_VERS_UPDATE_FAIL,
+ "Unable to update version and size. %s", ec_msg_str(fop));
+
+ if (lock->unlock_now) {
+ ec_unlock_lock(fop->data);
+ }
+}
+
+gf_boolean_t
+ec_update_info(ec_lock_link_t *link)
+{
+ ec_lock_t *lock;
+ ec_inode_t *ctx;
+ uint64_t version[2] = {0, 0};
+ uint64_t dirty[2] = {0, 0};
+ uint64_t size;
+ ec_t *ec = NULL;
+ uintptr_t mask;
+
+ lock = link->lock;
+ ctx = lock->ctx;
+ ec = link->fop->xl->private;
+
+ /* pre_version[*] will be 0 if have_version is false */
+ version[EC_DATA_TXN] = ctx->post_version[EC_DATA_TXN] -
+ ctx->pre_version[EC_DATA_TXN];
+ version[EC_METADATA_TXN] = ctx->post_version[EC_METADATA_TXN] -
+ ctx->pre_version[EC_METADATA_TXN];
+
+ size = ctx->post_size - ctx->pre_size;
+ /* If we set the dirty flag for update fop, we have to unset it.
+ * If fop has failed on some bricks, leave the dirty as marked. */
+
+ if (lock->unlock_now) {
+ if (version[EC_DATA_TXN]) {
+ /*A data fop will have difference in post and pre version
+ *and for data fop we send writes on healing bricks also */
+ mask = lock->good_mask | lock->healing;
+ } else {
+ mask = lock->good_mask;
+ }
+ /* Ensure that nodes are up while doing final
+ * metadata update.*/
+ if (!(ec->node_mask & ~(mask)) && !(ec->node_mask & ~ec->xl_up)) {
+ if (ctx->dirty[EC_DATA_TXN] != 0) {
+ dirty[EC_DATA_TXN] = -1;
+ }
+ if (ctx->dirty[EC_METADATA_TXN] != 0) {
+ dirty[EC_METADATA_TXN] = -1;
+ }
+ /*If everything is fine and we already
+ *have version xattr set on entry, there
+ *is no need to update version again*/
+ if (ctx->pre_version[EC_DATA_TXN]) {
+ version[EC_DATA_TXN] = 0;
+ }
+ if (ctx->pre_version[EC_METADATA_TXN]) {
+ version[EC_METADATA_TXN] = 0;
+ }
+ } else {
+ link->optimistic_changelog = _gf_false;
+ ec_set_dirty_flag(link, ctx, dirty);
+ }
+ memset(ctx->dirty, 0, sizeof(ctx->dirty));
+ }
+
+ if ((version[EC_DATA_TXN] != 0) || (version[EC_METADATA_TXN] != 0) ||
+ (dirty[EC_DATA_TXN] != 0) || (dirty[EC_METADATA_TXN] != 0)) {
+ ec_update_size_version(link, version, size, dirty);
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+void
+ec_unlock_now(ec_lock_link_t *link)
+{
+ ec_lock_t *lock;
+ lock = link->lock;
+
+ ec_trace("UNLOCK_NOW", link->fop, "lock=%p", link->lock);
+ /*At this point, lock is not being used by any fop and
+ *can not be reused by any fop as it is going to be released.
+ *lock->unlock_now can not be modified at any other place.
+ */
+ lock->unlock_now = _gf_true;
+
+ if (!ec_update_info(link)) {
+ ec_unlock_lock(link);
+ }
+
+ ec_resume(link->fop, 0);
+}
+
+void
+ec_lock_release(ec_t *ec, inode_t *inode)
+{
+ ec_lock_t *lock;
+ ec_inode_t *ctx;
+ ec_lock_link_t *timer_link = NULL;
+
+ LOCK(&inode->lock);
+
+ ctx = __ec_inode_get(inode, ec->xl);
+ if (ctx == NULL) {
+ goto done;
+ }
+ lock = ctx->inode_lock;
+ if ((lock == NULL) || lock->release) {
+ goto done;
+ }
+
+ gf_msg_debug(ec->xl->name, 0, "Releasing inode %p due to lock contention",
+ inode);
+
+ if (!lock->acquired) {
+ /* This happens if some bricks already got the lock while inodelk is in
+ * progress. Set release to true after lock is acquired*/
+ lock->contention = _gf_true;
+ goto done;
+ }
+
+ /* The lock is not marked to be released, so the frozen list should be
+ * empty. */
+ GF_ASSERT(list_empty(&lock->frozen));
+
+ timer_link = ec_lock_timer_cancel(ec->xl, lock);
+
+ /* We mark the lock to be released as soon as possible. */
+ lock->release = _gf_true;
+
+done:
+ UNLOCK(&inode->lock);
+
+ /* If we have cancelled the timer, we need to start the unlock of the
+ * inode. If there was a timer but we have been unable to cancel it
+ * because it was just triggered, the timer callback will take care
+ * of releasing the inode. */
+ if (timer_link != NULL) {
+ ec_unlock_now(timer_link);
+ }
+}
+
+void
+ec_unlock_timer_add(ec_lock_link_t *link);
+
+void
+ec_unlock_timer_del(ec_lock_link_t *link)
+{
+ ec_lock_t *lock;
+ inode_t *inode;
+ gf_boolean_t now = _gf_false;
+
+ /* If we are here, it means that the timer has expired before having
+ * been cancelled. This guarantees that 'link' is still valid because
+ * the fop that contains it must be pending (if timer cancellation in
+ * ec_lock_assign_owner() fails, the fop is left sleeping).
+ *
+ * At the same time, the fop still has a reference to the lock, so
+ * it must also be valid.
+ */
+ lock = link->lock;
+
+ /* 'lock' must have a valid inode since it can only be destroyed
+ * when the lock itself is destroyed, but we have a reference to the
+ * lock to avoid this.
+ */
+ inode = lock->loc.inode;
+
+ LOCK(&inode->lock);
+
+ if (lock->timer != NULL) {
+ ec_trace("UNLOCK_DELAYED", link->fop, "lock=%p", lock);
+
+ /* The unlock timer has expired without anyone cancelling it.
+ * This means that it shouldn't have any owner, and the waiting
+ * and frozen lists should be empty. It must have only one
+ * owner reference, but there can be fops being prepared
+ * though.
+ * */
+ GF_ASSERT(!lock->release && (lock->refs_owners == 1) &&
+ list_empty(&lock->owners) && list_empty(&lock->waiting) &&
+ list_empty(&lock->frozen));
+
+ gf_timer_call_cancel(link->fop->xl->ctx, lock->timer);
+ lock->timer = NULL;
+
+ /* Any fop being processed from now on, will need to wait
+ * until the next unlock/lock cycle. */
+ lock->release = now = _gf_true;
+ }
+
+ UNLOCK(&inode->lock);
+
+ if (now) {
+ ec_unlock_now(link);
+ } else {
+ /* The timer has been cancelled just after firing it but before
+ * getting here. This means that another fop has used the lock
+ * and everything should be handled as if this callback were
+ * have not been executed. However we still have an owner
+ * reference.
+ *
+ * We need to release our reference. If this is not the last
+ * reference (the most common case because another fop has
+ * taken another ref) we only need to decrement the counter.
+ * Otherwise we have been delayed enough so that the other fop
+ * has had time to acquire the reference, do its operation and
+ * release it. At the time of releasing it, the fop did found
+ * that the ref counter was > 1 (our reference), so the delayed
+ * unlock timer wasn't started. We need to start it again if we
+ * are the last reference.
+ *
+ * ec_unlock_timer_add() handles both cases.
+ */
+ ec_unlock_timer_add(link);
+
+ /* We need to resume the fop that was waiting for the delayed
+ * unlock.
+ */
+ ec_resume(link->fop, 0);
+ }
+}
+
+void
+ec_unlock_timer_cbk(void *data)
+{
+ ec_unlock_timer_del(data);
+}
+
+static gf_boolean_t
+ec_eager_lock_used(ec_t *ec, ec_fop_data_t *fop)
+{
+ /* Fops with no locks at this point mean that they are sent as sub-fops
+ * of other higher level fops. In this case we simply assume that the
+ * parent fop will take correct care of the eager lock. */
+ if (fop->lock_count == 0) {
+ return _gf_true;
+ }
+
+ /* We may have more than one lock, but this only happens in the rename
+ * fop, and both locks will reference an inode of the same type (a
+ * directory in this case), so we only need to check the first lock. */
+ if (fop->locks[0].lock->loc.inode->ia_type == IA_IFREG) {
+ return ec->eager_lock;
+ }
+
+ return ec->other_eager_lock;
+}
+
+static uint32_t
+ec_eager_lock_timeout(ec_t *ec, ec_lock_t *lock)
+{
+ if (lock->loc.inode->ia_type == IA_IFREG) {
+ return ec->eager_lock_timeout;
+ }
+
+ return ec->other_eager_lock_timeout;
+}
+
+static gf_boolean_t
+ec_lock_delay_create(ec_lock_link_t *link)
+{
+ struct timespec delay;
+ ec_fop_data_t *fop = link->fop;
+ ec_lock_t *lock = link->lock;
+
+ delay.tv_sec = ec_eager_lock_timeout(fop->xl->private, lock);
+ delay.tv_nsec = 0;
+ lock->timer = gf_timer_call_after(fop->xl->ctx, delay, ec_unlock_timer_cbk,
+ link);
+ if (lock->timer == NULL) {
+ gf_msg(fop->xl->name, GF_LOG_WARNING, ENOMEM,
+ EC_MSG_UNLOCK_DELAY_FAILED, "Unable to delay an unlock");
+
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+void
+ec_unlock_timer_add(ec_lock_link_t *link)
+{
+ ec_fop_data_t *fop = link->fop;
+ ec_lock_t *lock = link->lock;
+ gf_boolean_t now = _gf_false;
+
+ LOCK(&lock->loc.inode->lock);
+
+ /* We are trying to unlock the lock. We can have multiple scenarios here,
+ * but all of them need to have lock->timer == NULL:
+ *
+ * 1. There are other owners currently running that can call ec_unlock().
+ *
+ * None of them can have started the timer until the last one. But this
+ * call should be the consequence of this lastest one.
+ *
+ * 2. There are fops in the waiting or frozen lists.
+ *
+ * These fops cannot call ec_unlock(). So we should be here.
+ *
+ * We must reach here with at least one owner reference.
+ */
+ GF_ASSERT((lock->timer == NULL) && (lock->refs_owners > 0));
+
+ /* If the fop detects that a heal is needed, we mark the lock to be
+ * released as soon as possible. */
+ lock->release |= ec_fop_needs_heal(fop);
+
+ if (lock->refs_owners > 1) {
+ ec_trace("UNLOCK_SKIP", fop, "lock=%p", lock);
+
+ /* If there are other owners we cannot do anything else with the lock.
+ * Note that the current fop has already been removed from the owners
+ * list in ec_lock_reuse(). */
+ lock->refs_owners--;
+
+ UNLOCK(&lock->loc.inode->lock);
+ } else if (lock->acquired) {
+ /* There are no other owners and the lock is acquired. If there were
+ * fops waiting, at least one of them should have been promoted to an
+ * owner, so the waiting list should be empty. */
+ GF_ASSERT(list_empty(&lock->owners) && list_empty(&lock->waiting));
+
+ ec_t *ec = fop->xl->private;
+
+ /* If everything goes as expected this fop will be put to sleep until
+ * the timer callback is executed. */
+ ec_sleep(fop);
+
+ /* If the lock needs to be released, or ec is shutting down, do not
+ * delay lock release. */
+ if (!lock->release && !ec->shutdown) {
+ ec_trace("UNLOCK_DELAY", fop, "lock=%p, release=%d", lock,
+ lock->release);
+
+ if (!ec_lock_delay_create(link)) {
+ /* We are unable to create a new timer. We immediately release
+ * the lock. */
+ lock->release = now = _gf_true;
+ }
+
+ } else {
+ ec_trace("UNLOCK_FORCE", fop, "lock=%p, release=%d", lock,
+ lock->release);
+ lock->release = now = _gf_true;
+ }
+
+ UNLOCK(&lock->loc.inode->lock);
+
+ if (now) {
+ ec_unlock_now(link);
+ }
+ } else {
+ /* There are no owners and the lock is not acquired. This can only
+ * happen if a lock attempt has failed and we get to the unlock step
+ * of the fop. As in the previous case, the waiting list must be
+ * empty. */
+ GF_ASSERT(list_empty(&lock->owners) && list_empty(&lock->waiting));
+
+ /* We need to mark the lock to be released to correctly handle fops
+ * that may get in after we release the inode mutex but before
+ * ec_lock_unfreeze() is processed. */
+ lock->release = _gf_true;
+
+ UNLOCK(&lock->loc.inode->lock);
+
+ ec_lock_unfreeze(link);
+ }
+}
+
+void
+ec_unlock(ec_fop_data_t *fop)
+{
+ int32_t i;
+
+ for (i = 0; i < fop->lock_count; i++) {
+ ec_unlock_timer_add(&fop->locks[i]);
+ }
+}
+
+void
+ec_flush_size_version(ec_fop_data_t *fop)
+{
+ GF_ASSERT(fop->lock_count == 1);
+ ec_update_info(&fop->locks[0]);
+}
+
+static void
+ec_update_stripe(ec_t *ec, ec_stripe_list_t *stripe_cache, ec_stripe_t *stripe,
+ ec_fop_data_t *fop)
+{
+ off_t base;
+
+ /* On write fops, we only update existing fragments if the write has
+ * succeeded. Otherwise, we remove them from the cache. */
+ if ((fop->id == GF_FOP_WRITE) && (fop->answer != NULL) &&
+ (fop->answer->op_ret >= 0)) {
+ base = stripe->frag_offset - fop->frag_range.first;
+ base *= ec->fragments;
+
+ /* We check if the stripe offset falls inside the real region
+ * modified by the write fop (a write request is allowed,
+ * though uncommon, to write less bytes than requested). The
+ * current write fop implementation doesn't allow partial
+ * writes of fragments, so if there's no error, we are sure
+ * that a full stripe has been completely modified or not
+ * touched at all. The value of op_ret may not be a multiple
+ * of the stripe size because it depends on the requested
+ * size by the user, so we update the stripe if the write has
+ * modified at least one byte (meaning ec has written the full
+ * stripe). */
+ if (base < fop->answer->op_ret + fop->head) {
+ memcpy(stripe->data, fop->vector[0].iov_base + base,
+ ec->stripe_size);
+ list_move_tail(&stripe->lru, &stripe_cache->lru);
+
+ GF_ATOMIC_INC(ec->stats.stripe_cache.updates);
+ }
+ } else {
+ stripe->frag_offset = -1;
+ list_move(&stripe->lru, &stripe_cache->lru);
+
+ GF_ATOMIC_INC(ec->stats.stripe_cache.invals);
+ }
+}
+
+static void
+ec_update_cached_stripes(ec_fop_data_t *fop)
+{
+ uint64_t first;
+ uint64_t last;
+ ec_stripe_t *stripe = NULL;
+ ec_inode_t *ctx = NULL;
+ ec_stripe_list_t *stripe_cache = NULL;
+ inode_t *inode = NULL;
+ struct list_head *temp;
+ struct list_head sentinel;
+
+ first = fop->frag_range.first;
+ /* 'last' represents the first stripe not touched by the operation */
+ last = fop->frag_range.last;
+
+ /* If there are no modified stripes, we don't need to do anything
+ * else. */
+ if (last <= first) {
+ return;
+ }
+
+ if (!fop->use_fd) {
+ inode = fop->loc[0].inode;
+ } else {
+ inode = fop->fd->inode;
+ }
+
+ LOCK(&inode->lock);
+
+ ctx = __ec_inode_get(inode, fop->xl);
+ if (ctx == NULL) {
+ goto out;
+ }
+ stripe_cache = &ctx->stripe_cache;
+
+ /* Since we'll be moving elements of the list to the tail, we might
+ * end in an infinite loop. To avoid it, we insert a sentinel element
+ * into the list, so that it will be used to detect when we have
+ * traversed all existing elements once. */
+ list_add_tail(&sentinel, &stripe_cache->lru);
+ temp = stripe_cache->lru.next;
+ while (temp != &sentinel) {
+ stripe = list_entry(temp, ec_stripe_t, lru);
+ temp = temp->next;
+ if ((first <= stripe->frag_offset) && (stripe->frag_offset < last)) {
+ ec_update_stripe(fop->xl->private, stripe_cache, stripe, fop);
+ }
+ }
+ list_del(&sentinel);
+
+out:
+ UNLOCK(&inode->lock);
+}
+
+void
+ec_lock_reuse(ec_fop_data_t *fop)
+{
+ ec_cbk_data_t *cbk;
+ ec_t *ec = NULL;
+ int32_t i, count;
+ gf_boolean_t release = _gf_false;
+ ec = fop->xl->private;
+ cbk = fop->answer;
+
+ if (ec_eager_lock_used(ec, fop) && cbk != NULL) {
+ if (cbk->xdata != NULL) {
+ if ((dict_get_int32(cbk->xdata, GLUSTERFS_INODELK_COUNT, &count) ==
+ 0) &&
+ (count > 1)) {
+ release = _gf_true;
+ }
+ if (release) {
+ gf_msg_debug(fop->xl->name, 0, "Lock contention detected");
+ }
+ }
+ } else {
+ /* If eager lock is disabled or if we haven't get
+ * an answer with enough quorum, we always release
+ * the lock. */
+ release = _gf_true;
+ }
+ ec_update_cached_stripes(fop);
+
+ for (i = 0; i < fop->lock_count; i++) {
+ ec_lock_next_owner(&fop->locks[i], cbk, release);
+ }
+}
+
+void
+__ec_manager(ec_fop_data_t *fop, int32_t error)
+{
+ ec_t *ec = fop->xl->private;
+
+ do {
+ ec_trace("MANAGER", fop, "error=%d", error);
+
+ if (!ec_must_wind(fop)) {
+ if (ec->xl_up_count < ec->fragments) {
+ error = ENOTCONN;
+ }
+ }
+
+ if (error != 0) {
+ fop->error = error;
+ fop->state = -fop->state;
+ }
+
+ if ((fop->state == EC_STATE_END) || (fop->state == -EC_STATE_END)) {
+ ec_fop_data_release(fop);
+
+ break;
+ }
+
+ /* At each state, fop must not be used anywhere else and there
+ * shouldn't be any pending subfop going on. */
+ GF_ASSERT(fop->jobs == 0);
+
+ /* While the manager is running we need to avoid that subfops launched
+ * from it could finish and call ec_resume() before the fop->handler
+ * has completed. This could lead to the same manager being executed
+ * by two threads concurrently. ec_check_complete() will take care of
+ * this reference. */
+ fop->jobs = 1;
+
+ fop->state = fop->handler(fop, fop->state);
+ GF_ASSERT(fop->state >= 0);
+
+ error = ec_check_complete(fop, __ec_manager);
+ } while (error >= 0);
+}
+
+void
+ec_manager(ec_fop_data_t *fop, int32_t error)
+{
+ GF_ASSERT(fop->jobs == 0);
+ GF_ASSERT(fop->winds == 0);
+ GF_ASSERT(fop->error == 0);
+
+ if (fop->state == EC_STATE_START) {
+ fop->state = EC_STATE_INIT;
+ }
+
+ __ec_manager(fop, error);
+}
+
+gf_boolean_t
+__ec_is_last_fop(ec_t *ec)
+{
+ if ((list_empty(&ec->pending_fops)) &&
+ (GF_ATOMIC_GET(ec->async_fop_count) == 0)) {
+ return _gf_true;
+ }
+ return _gf_false;
+}
diff --git a/xlators/cluster/ec/src/ec-common.h b/xlators/cluster/ec/src/ec-common.h
new file mode 100644
index 00000000000..51493612ac6
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-common.h
@@ -0,0 +1,234 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_COMMON_H__
+#define __EC_COMMON_H__
+
+#include "glusterfs/compat-errno.h" // for ENODATA on BSD
+#include "ec-data.h"
+
+typedef enum { EC_DATA_TXN, EC_METADATA_TXN } ec_txn_t;
+
+#define EC_FOP_HEAL -1
+#define EC_FOP_FHEAL -2
+
+#define EC_CONFIG_VERSION 0
+
+#define EC_CONFIG_ALGORITHM 0
+
+#define EC_FLAG_LOCK_SHARED 0x0001
+
+#define QUORUM_CBK(fn, fop, frame, cookie, this, op_ret, op_errno, params...) \
+ do { \
+ ec_t *__ec = fop->xl->private; \
+ int32_t __op_ret = 0; \
+ int32_t __op_errno = 0; \
+ int32_t __success_count = gf_bits_count(fop->good); \
+ \
+ __op_ret = op_ret; \
+ __op_errno = op_errno; \
+ if (!fop->parent && frame && \
+ (GF_CLIENT_PID_SELF_HEALD != frame->root->pid) && \
+ __ec->quorum_count && (__success_count < __ec->quorum_count) && \
+ op_ret >= 0) { \
+ __op_ret = -1; \
+ __op_errno = EIO; \
+ gf_msg(__ec->xl->name, GF_LOG_ERROR, 0, \
+ EC_MSG_CHILDS_INSUFFICIENT, \
+ "Insufficient available children for this request " \
+ "(have %d, need %d). %s", \
+ __success_count, __ec->quorum_count, ec_msg_str(fop)); \
+ } \
+ fn(frame, cookie, this, __op_ret, __op_errno, params); \
+ } while (0)
+
+enum _ec_xattrop_flags {
+ EC_FLAG_XATTROP,
+ EC_FLAG_DATA_DIRTY,
+ EC_FLAG_METADATA_DIRTY,
+
+ /* Add any new flag here, before EC_FLAG_MAX. The maximum number of
+ * flags that can be defined is 16. */
+
+ EC_FLAG_MAX
+};
+
+/* We keep two sets of flags. One to determine what's really providing the
+ * current xattrop and the other to know what the parent fop of the xattrop
+ * needs to proceed. It might happen that a fop needs some information that
+ * is being already requested by a previous fop. The two sets are stored
+ * contiguously. */
+
+#define EC_FLAG_NEEDS(_flag) (1 << (_flag))
+#define EC_FLAG_PROVIDES(_flag) (1 << ((_flag) + EC_FLAG_MAX))
+
+#define EC_NEEDED_FLAGS(_flags) ((_flags) & ((1 << EC_FLAG_MAX) - 1))
+
+#define EC_PROVIDED_FLAGS(_flags) EC_NEEDED_FLAGS((_flags) >> EC_FLAG_MAX)
+
+#define EC_FLAGS_HAVE(_flags, _flag) (((_flags) & (1 << (_flag))) != 0)
+
+#define EC_SELFHEAL_BIT 62
+
+#define EC_MINIMUM_ONE (1 << 6)
+#define EC_MINIMUM_MIN (2 << 6)
+#define EC_MINIMUM_ALL (3 << 6)
+#define EC_FOP_NO_PROPAGATE_ERROR (1 << 8)
+#define EC_FOP_MINIMUM(_flags) ((_flags)&255)
+#define EC_FOP_FLAGS(_flags) ((_flags) & ~255)
+
+#define EC_UPDATE_DATA 1
+#define EC_UPDATE_META 2
+#define EC_QUERY_INFO 4
+#define EC_INODE_SIZE 8
+
+#define EC_STATE_START 0
+#define EC_STATE_END 0
+#define EC_STATE_INIT 1
+#define EC_STATE_LOCK 2
+#define EC_STATE_DISPATCH 3
+#define EC_STATE_PREPARE_ANSWER 4
+#define EC_STATE_REPORT 5
+#define EC_STATE_LOCK_REUSE 6
+#define EC_STATE_UNLOCK 7
+
+#define EC_STATE_DELAYED_START 100
+
+#define EC_STATE_HEAL_ENTRY_LOOKUP 200
+#define EC_STATE_HEAL_ENTRY_PREPARE 201
+#define EC_STATE_HEAL_PRE_INODELK_LOCK 202
+#define EC_STATE_HEAL_PRE_INODE_LOOKUP 203
+#define EC_STATE_HEAL_XATTRIBUTES_REMOVE 204
+#define EC_STATE_HEAL_XATTRIBUTES_SET 205
+#define EC_STATE_HEAL_ATTRIBUTES 206
+#define EC_STATE_HEAL_OPEN 207
+#define EC_STATE_HEAL_REOPEN_FD 208
+#define EC_STATE_HEAL_UNLOCK 209
+#define EC_STATE_HEAL_UNLOCK_ENTRY 210
+#define EC_STATE_HEAL_DATA_LOCK 211
+#define EC_STATE_HEAL_DATA_COPY 212
+#define EC_STATE_HEAL_DATA_UNLOCK 213
+#define EC_STATE_HEAL_POST_INODELK_LOCK 214
+#define EC_STATE_HEAL_POST_INODE_LOOKUP 215
+#define EC_STATE_HEAL_SETATTR 216
+#define EC_STATE_HEAL_POST_INODELK_UNLOCK 217
+#define EC_STATE_HEAL_DISPATCH 218
+
+/* Value to cover the full range of a file */
+#define EC_RANGE_FULL ((uint64_t)LLONG_MAX + 1)
+
+gf_boolean_t
+ec_dispatch_one_retry(ec_fop_data_t *fop, ec_cbk_data_t **cbk);
+void
+ec_dispatch_next(ec_fop_data_t *fop, uint32_t idx);
+
+void
+ec_complete(ec_fop_data_t *fop);
+
+void
+ec_update_good(ec_fop_data_t *fop, uintptr_t good);
+
+void
+ec_fop_set_error(ec_fop_data_t *fop, int32_t error);
+
+void
+__ec_fop_set_error(ec_fop_data_t *fop, int32_t error);
+
+ec_cbk_data_t *
+ec_fop_prepare_answer(ec_fop_data_t *fop, gf_boolean_t ro);
+
+gf_boolean_t
+ec_cbk_set_error(ec_cbk_data_t *cbk, int32_t error, gf_boolean_t ro);
+
+void
+ec_lock_prepare_inode(ec_fop_data_t *fop, loc_t *loc, uint32_t flags,
+ off_t fl_start, uint64_t fl_size);
+void
+ec_lock_prepare_parent_inode(ec_fop_data_t *fop, loc_t *loc, loc_t *base,
+ uint32_t flags);
+void
+ec_lock_prepare_fd(ec_fop_data_t *fop, fd_t *fd, uint32_t flags, off_t fl_start,
+ uint64_t fl_size);
+void
+ec_lock(ec_fop_data_t *fop);
+void
+ec_lock_reuse(ec_fop_data_t *fop);
+void
+ec_unlock(ec_fop_data_t *fop);
+void
+ec_lock_release(ec_t *ec, inode_t *inode);
+
+gf_boolean_t
+ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t *size);
+gf_boolean_t
+__ec_get_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t *size);
+gf_boolean_t
+ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t size);
+gf_boolean_t
+__ec_set_inode_size(ec_fop_data_t *fop, inode_t *inode, uint64_t size);
+void
+ec_clear_inode_info(ec_fop_data_t *fop, inode_t *inode);
+
+void
+ec_flush_size_version(ec_fop_data_t *fop);
+
+void
+ec_dispatch_all(ec_fop_data_t *fop);
+void
+ec_dispatch_inc(ec_fop_data_t *fop);
+void
+ec_dispatch_min(ec_fop_data_t *fop);
+void
+ec_dispatch_one(ec_fop_data_t *fop);
+
+void
+ec_succeed_all(ec_fop_data_t *fop);
+
+void
+ec_sleep(ec_fop_data_t *fop);
+void
+ec_resume(ec_fop_data_t *fop, int32_t error);
+void
+ec_resume_parent(ec_fop_data_t *fop);
+
+void
+ec_manager(ec_fop_data_t *fop, int32_t error);
+gf_boolean_t
+ec_is_recoverable_error(int32_t op_errno);
+void
+ec_handle_healers_done(ec_fop_data_t *fop);
+
+int32_t
+ec_heal_inspect(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *locked_on, gf_boolean_t self_locked,
+ gf_boolean_t thorough, ec_heal_need_t *need_heal);
+int32_t
+ec_get_heal_info(xlator_t *this, loc_t *loc, dict_t **dict);
+
+int32_t
+ec_lock_unlocked(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+void
+ec_update_fd_status(fd_t *fd, xlator_t *xl, int child_index,
+ int32_t ret_status);
+gf_boolean_t
+ec_is_entry_healing(ec_fop_data_t *fop);
+void
+ec_set_entry_healing(ec_fop_data_t *fop);
+void
+ec_reset_entry_healing(ec_fop_data_t *fop);
+char *
+ec_msg_str(ec_fop_data_t *fop);
+gf_boolean_t
+__ec_is_last_fop(ec_t *ec);
+void
+ec_lock_update_good(ec_lock_t *lock, ec_fop_data_t *fop);
+#endif /* __EC_COMMON_H__ */
diff --git a/xlators/cluster/ec/src/ec-data.c b/xlators/cluster/ec/src/ec-data.c
new file mode 100644
index 00000000000..06388833546
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-data.c
@@ -0,0 +1,288 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-data.h"
+#include "ec-messages.h"
+
+ec_cbk_data_t *
+ec_cbk_data_allocate(call_frame_t *frame, xlator_t *this, ec_fop_data_t *fop,
+ int32_t id, int32_t idx, int32_t op_ret, int32_t op_errno)
+{
+ ec_cbk_data_t *cbk;
+ ec_t *ec = this->private;
+
+ if (fop->xl != this) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_XLATOR_MISMATCH,
+ "Mismatching xlators between request "
+ "and answer (req=%s, ans=%s).",
+ fop->xl->name, this->name);
+
+ return NULL;
+ }
+ if (fop->frame != frame) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_FRAME_MISMATCH,
+ "Mismatching frames between request "
+ "and answer (req=%p, ans=%p).",
+ fop->frame, frame);
+
+ return NULL;
+ }
+ if (fop->id != id) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_FOP_MISMATCH,
+ "Mismatching fops between request "
+ "and answer (req=%d, ans=%d).",
+ fop->id, id);
+
+ return NULL;
+ }
+
+ cbk = mem_get0(ec->cbk_pool);
+ if (cbk == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to allocate memory for an "
+ "answer.");
+ return NULL;
+ }
+
+ cbk->fop = fop;
+ cbk->idx = idx;
+ cbk->mask = 1ULL << idx;
+ cbk->count = 1;
+ cbk->op_ret = op_ret;
+ cbk->op_errno = op_errno;
+ INIT_LIST_HEAD(&cbk->entries.list);
+
+ LOCK(&fop->lock);
+
+ list_add_tail(&cbk->answer_list, &fop->answer_list);
+
+ UNLOCK(&fop->lock);
+
+ return cbk;
+}
+
+void
+ec_cbk_data_destroy(ec_cbk_data_t *cbk)
+{
+ if (cbk->xdata != NULL) {
+ dict_unref(cbk->xdata);
+ }
+ if (cbk->dict != NULL) {
+ dict_unref(cbk->dict);
+ }
+ if (cbk->inode != NULL) {
+ inode_unref(cbk->inode);
+ }
+ if (cbk->fd != NULL) {
+ fd_unref(cbk->fd);
+ }
+ if (cbk->buffers != NULL) {
+ iobref_unref(cbk->buffers);
+ }
+ GF_FREE(cbk->vector);
+ gf_dirent_free(&cbk->entries);
+ GF_FREE(cbk->str);
+
+ mem_put(cbk);
+}
+
+ec_fop_data_t *
+ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id,
+ uint32_t flags, uintptr_t target, uint32_t fop_flags,
+ ec_wind_f wind, ec_handler_f handler, ec_cbk_t cbks,
+ void *data)
+{
+ ec_fop_data_t *fop, *parent;
+ ec_t *ec = this->private;
+
+ fop = mem_get0(ec->fop_pool);
+ if (fop == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to allocate memory for a "
+ "request.");
+
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&fop->cbk_list);
+ INIT_LIST_HEAD(&fop->healer);
+ INIT_LIST_HEAD(&fop->answer_list);
+ INIT_LIST_HEAD(&fop->pending_list);
+ INIT_LIST_HEAD(&fop->locks[0].owner_list);
+ INIT_LIST_HEAD(&fop->locks[0].wait_list);
+ INIT_LIST_HEAD(&fop->locks[1].owner_list);
+ INIT_LIST_HEAD(&fop->locks[1].wait_list);
+
+ fop->xl = this;
+ fop->req_frame = frame;
+
+ /* fops need a private frame to be able to execute some postop operations
+ * even if the original fop has completed and reported back to the upper
+ * xlator and it has destroyed the base frame.
+ *
+ * TODO: minimize usage of private frames. Reuse req_frame as much as
+ * possible.
+ */
+ if (frame != NULL) {
+ fop->frame = copy_frame(frame);
+ } else {
+ fop->frame = create_frame(this, this->ctx->pool);
+ }
+ if (fop->frame == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to create a private frame "
+ "for a request");
+
+ mem_put(fop);
+
+ return NULL;
+ }
+ fop->id = id;
+ fop->refs = 1;
+
+ fop->flags = flags;
+ fop->minimum = EC_FOP_MINIMUM(fop_flags);
+ fop->fop_flags = EC_FOP_FLAGS(fop_flags);
+ fop->mask = target;
+
+ fop->wind = wind;
+ fop->handler = handler;
+ fop->cbks = cbks;
+ fop->data = data;
+
+ fop->uid = fop->frame->root->uid;
+ fop->gid = fop->frame->root->gid;
+
+ LOCK_INIT(&fop->lock);
+
+ fop->frame->local = fop;
+
+ if (frame != NULL) {
+ parent = frame->local;
+ if (parent != NULL) {
+ ec_sleep(parent);
+ }
+
+ fop->parent = parent;
+ }
+
+ LOCK(&ec->lock);
+
+ list_add_tail(&fop->pending_list, &ec->pending_fops);
+
+ UNLOCK(&ec->lock);
+
+ return fop;
+}
+
+void
+ec_fop_data_acquire(ec_fop_data_t *fop)
+{
+ LOCK(&fop->lock);
+
+ ec_trace("ACQUIRE", fop, "");
+
+ fop->refs++;
+
+ UNLOCK(&fop->lock);
+}
+
+static void
+ec_handle_last_pending_fop_completion(ec_fop_data_t *fop, gf_boolean_t *notify)
+{
+ ec_t *ec = fop->xl->private;
+
+ *notify = _gf_false;
+
+ if (!list_empty(&fop->pending_list)) {
+ LOCK(&ec->lock);
+ {
+ list_del_init(&fop->pending_list);
+ *notify = __ec_is_last_fop(ec);
+ }
+ UNLOCK(&ec->lock);
+ }
+}
+
+void
+ec_fop_cleanup(ec_fop_data_t *fop)
+{
+ ec_cbk_data_t *cbk, *tmp;
+
+ list_for_each_entry_safe(cbk, tmp, &fop->answer_list, answer_list)
+ {
+ list_del_init(&cbk->answer_list);
+
+ ec_cbk_data_destroy(cbk);
+ }
+ INIT_LIST_HEAD(&fop->cbk_list);
+
+ fop->answer = NULL;
+}
+
+void
+ec_fop_data_release(ec_fop_data_t *fop)
+{
+ ec_t *ec = NULL;
+ int32_t refs;
+ gf_boolean_t notify = _gf_false;
+
+ LOCK(&fop->lock);
+
+ ec_trace("RELEASE", fop, "");
+
+ GF_ASSERT(fop->refs > 0);
+ refs = --fop->refs;
+
+ UNLOCK(&fop->lock);
+
+ if (refs == 0) {
+ fop->frame->local = NULL;
+ STACK_DESTROY(fop->frame->root);
+
+ LOCK_DESTROY(&fop->lock);
+
+ if (fop->xdata != NULL) {
+ dict_unref(fop->xdata);
+ }
+ if (fop->dict != NULL) {
+ dict_unref(fop->dict);
+ }
+ if (fop->inode != NULL) {
+ inode_unref(fop->inode);
+ }
+ if (fop->fd != NULL) {
+ fd_unref(fop->fd);
+ }
+ if (fop->buffers != NULL) {
+ iobref_unref(fop->buffers);
+ }
+ GF_FREE(fop->vector);
+ GF_FREE(fop->str[0]);
+ GF_FREE(fop->str[1]);
+ loc_wipe(&fop->loc[0]);
+ loc_wipe(&fop->loc[1]);
+ GF_FREE(fop->errstr);
+
+ ec_resume_parent(fop);
+
+ ec_fop_cleanup(fop);
+
+ ec = fop->xl->private;
+ ec_handle_last_pending_fop_completion(fop, &notify);
+ ec_handle_healers_done(fop);
+ mem_put(fop);
+ if (notify) {
+ ec_pending_fops_completed(ec);
+ }
+ }
+}
diff --git a/xlators/cluster/ec/src/ec-data.h b/xlators/cluster/ec/src/ec-data.h
new file mode 100644
index 00000000000..c8a74ffe1ed
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-data.h
@@ -0,0 +1,35 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_DATA_H__
+#define __EC_DATA_H__
+
+#include "ec-types.h"
+
+ec_cbk_data_t *
+ec_cbk_data_allocate(call_frame_t *frame, xlator_t *this, ec_fop_data_t *fop,
+ int32_t id, int32_t idx, int32_t op_ret, int32_t op_errno);
+ec_fop_data_t *
+ec_fop_data_allocate(call_frame_t *frame, xlator_t *this, int32_t id,
+ uint32_t flags, uintptr_t target, uint32_t fop_flags,
+ ec_wind_f wind, ec_handler_f handler, ec_cbk_t cbks,
+ void *data);
+void
+ec_fop_data_acquire(ec_fop_data_t *fop);
+void
+ec_fop_data_release(ec_fop_data_t *fop);
+
+void
+ec_fop_cleanup(ec_fop_data_t *fop);
+
+void
+ec_pending_fops_completed(ec_t *ec);
+
+#endif /* __EC_DATA_H__ */
diff --git a/xlators/cluster/ec/src/ec-dir-read.c b/xlators/cluster/ec/src/ec-dir-read.c
new file mode 100644
index 00000000000..f71dcfac293
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-dir-read.c
@@ -0,0 +1,647 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "ec.h"
+#include "ec-messages.h"
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-combine.h"
+#include "ec-fops.h"
+
+/****************************************************************
+ *
+ * File Operation: opendir
+ *
+ ***************************************************************/
+
+int32_t
+ec_combine_opendir(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
+{
+ if (dst->fd != src->fd) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_FD_MISMATCH,
+ "Mismatching fd in answers "
+ "of 'GF_FOP_OPENDIR': %p <-> %p",
+ dst->fd, src->fd);
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t
+ec_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_OPENDIR, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (fd != NULL) {
+ cbk->fd = fd_ref(fd);
+ if (cbk->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ }
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_opendir);
+
+ ec_update_fd_status(fd, this, idx, op_ret);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_opendir(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_opendir_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->opendir,
+ &fop->loc[0], fop->fd, fop->xdata);
+}
+
+int32_t
+ec_manager_opendir(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+ ec_fd_t *ctx;
+ int32_t err;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ LOCK(&fop->fd->lock);
+
+ ctx = __ec_fd_get(fop->fd, fop->xl);
+ if (ctx == NULL) {
+ UNLOCK(&fop->fd->lock);
+
+ fop->error = ENOMEM;
+
+ return EC_STATE_REPORT;
+ }
+ if (!ctx->loc.inode) {
+ err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
+ if (err != 0) {
+ UNLOCK(&fop->fd->lock);
+
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+ }
+
+ UNLOCK(&fop->fd->lock);
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ /* Save which subvolumes successfully opened the directory.
+ * If ctx->open is 0, it means that readdir cannot be
+ * processed in this directory.
+ */
+ LOCK(&fop->fd->lock);
+
+ ctx = __ec_fd_get(fop->fd, fop->xl);
+ if (ctx != NULL) {
+ ctx->open |= cbk->mask;
+ }
+
+ UNLOCK(&fop->fd->lock);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.opendir != NULL) {
+ fop->cbks.opendir(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->fd, cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.opendir != NULL) {
+ fop->cbks.opendir(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_opendir_cbk_t func, void *data, loc_t *loc,
+ fd_t *fd, dict_t *xdata)
+{
+ ec_cbk_t callback = {.opendir = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(OPENDIR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_OPENDIR, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_opendir,
+ ec_manager_opendir, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
+
+/* Returns -1 if client_id is invalid else index of child subvol in xl_list */
+int
+ec_deitransform(xlator_t *this, off_t offset)
+{
+ int idx = -1;
+ int client_id = -1;
+ ec_t *ec = this->private;
+ char id[32] = {0};
+ int err;
+
+ client_id = gf_deitransform(this, offset);
+ sprintf(id, "%d", client_id);
+ err = dict_get_int32(ec->leaf_to_subvolid, id, &idx);
+ if (err < 0) {
+ idx = err;
+ goto out;
+ }
+
+out:
+ if (idx < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_REQUEST,
+ "Invalid index %d in readdirp request", client_id);
+ idx = -EINVAL;
+ }
+ return idx;
+}
+
+/* FOP: readdir */
+
+void
+ec_adjust_readdirp(ec_t *ec, int32_t idx, gf_dirent_t *entries)
+{
+ gf_dirent_t *entry;
+
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ if (!entry->inode)
+ continue;
+
+ if (entry->d_stat.ia_type == IA_IFREG) {
+ if ((entry->dict == NULL) ||
+ (ec_dict_del_number(entry->dict, EC_XATTR_SIZE,
+ &entry->d_stat.ia_size) != 0)) {
+ inode_unref(entry->inode);
+ entry->inode = NULL;
+ } else {
+ ec_iatt_rebuild(ec, &entry->d_stat, 1, 1);
+ }
+ }
+ }
+}
+
+int32_t
+ec_common_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, fop->id, idx, op_ret,
+ op_errno);
+ if (cbk) {
+ if (xdata)
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->op_ret >= 0)
+ list_splice_init(&entries->list, &cbk->entries.list);
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_readdir(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_common_readdir_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->readdir,
+ fop->fd, fop->size, fop->offset, fop->xdata);
+}
+
+int32_t
+ec_manager_readdir(ec_fop_data_t *fop, int32_t state)
+{
+ ec_fd_t *ctx = NULL;
+ ec_cbk_data_t *cbk = NULL;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ /* Return error if opendir has not been successfully called on
+ * any subvolume. */
+ ctx = ec_fd_get(fop->fd, fop->xl);
+ if (ctx == NULL) {
+ fop->error = ENOMEM;
+ } else if (ctx->open == 0) {
+ fop->error = EBADFD;
+ }
+
+ if (fop->error) {
+ gf_msg(fop->xl->name, GF_LOG_ERROR, fop->error,
+ EC_MSG_INVALID_REQUEST, "EC is not winding readdir: %s",
+ ec_msg_str(fop));
+ return EC_STATE_REPORT;
+ }
+
+ if (fop->id == GF_FOP_READDIRP) {
+ int32_t err;
+
+ if (fop->xdata == NULL) {
+ fop->xdata = dict_new();
+ if (fop->xdata == NULL) {
+ fop->error = ENOMEM;
+
+ return EC_STATE_REPORT;
+ }
+ }
+
+ err = dict_set_uint64(fop->xdata, EC_XATTR_SIZE, 0);
+ if (err != 0) {
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+ }
+
+ if (fop->offset != 0) {
+ /* Non-zero offset is irrecoverable error as the offset may not
+ * be valid on other bricks*/
+ int32_t idx = -1;
+
+ idx = ec_deitransform(fop->xl, fop->offset);
+
+ if (idx < 0) {
+ fop->error = -idx;
+ return EC_STATE_REPORT;
+ }
+ fop->mask &= 1ULL << idx;
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
+ ec_lock(fop);
+ }
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_one(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ if (ec_dispatch_one_retry(fop, &cbk)) {
+ return EC_STATE_DISPATCH;
+ }
+
+ if ((cbk != NULL) && (cbk->op_ret > 0) &&
+ (fop->id == GF_FOP_READDIRP)) {
+ ec_adjust_readdirp(fop->xl->private, cbk->idx, &cbk->entries);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+ GF_ASSERT(cbk);
+ if (fop->id == GF_FOP_READDIR) {
+ if (fop->cbks.readdir != NULL) {
+ fop->cbks.readdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->entries, cbk->xdata);
+ }
+ } else {
+ if (fop->cbks.readdirp != NULL) {
+ fop->cbks.readdirp(fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno,
+ &cbk->entries, cbk->xdata);
+ }
+ }
+ if (fop->offset == 0)
+ return EC_STATE_LOCK_REUSE;
+ else
+ return EC_STATE_END;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ if (fop->id == GF_FOP_READDIR) {
+ if (fop->cbks.readdir != NULL) {
+ fop->cbks.readdir(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL);
+ }
+ } else {
+ if (fop->cbks.readdirp != NULL) {
+ fop->cbks.readdirp(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL);
+ }
+ }
+ if (fop->offset == 0)
+ return EC_STATE_LOCK_REUSE;
+ else
+ return EC_STATE_END;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ GF_ASSERT(fop->offset == 0);
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ GF_ASSERT(fop->offset == 0);
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readdir_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+{
+ ec_cbk_t callback = {.readdir = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(READDIR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_READDIR, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_readdir,
+ ec_manager_readdir, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->size = size;
+ fop->offset = offset;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
+
+/* FOP: readdirp */
+
+void
+ec_wind_readdirp(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_common_readdir_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->readdirp,
+ fop->fd, fop->size, fop->offset, fop->xdata);
+}
+
+void
+ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readdirp_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata)
+{
+ ec_cbk_t callback = {.readdirp = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(READDIRP) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(
+ frame, this, GF_FOP_READDIRP, EC_FLAG_LOCK_SHARED, target, fop_flags,
+ ec_wind_readdirp, ec_manager_readdir, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->size = size;
+ fop->offset = offset;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
diff --git a/xlators/cluster/ec/src/ec-dir-write.c b/xlators/cluster/ec/src/ec-dir-write.c
new file mode 100644
index 00000000000..53d27d895c3
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-dir-write.c
@@ -0,0 +1,1487 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "ec.h"
+#include "ec-messages.h"
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-combine.h"
+#include "ec-method.h"
+#include "ec-fops.h"
+
+int
+ec_dir_write_cbk(call_frame_t *frame, xlator_t *this, void *cookie, int op_ret,
+ int op_errno, struct iatt *poststat, struct iatt *preparent,
+ struct iatt *postparent, struct iatt *preparent2,
+ struct iatt *postparent2, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int i = 0;
+ int idx = 0;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+ idx = (long)cookie;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, fop->id, idx, op_ret,
+ op_errno);
+ if (!cbk)
+ goto out;
+
+ if (xdata)
+ cbk->xdata = dict_ref(xdata);
+
+ if (op_ret < 0)
+ goto out;
+
+ if (poststat)
+ cbk->iatt[i++] = *poststat;
+
+ if (preparent)
+ cbk->iatt[i++] = *preparent;
+
+ if (postparent)
+ cbk->iatt[i++] = *postparent;
+
+ if (preparent2)
+ cbk->iatt[i++] = *preparent2;
+
+ if (postparent2)
+ cbk->iatt[i++] = *postparent2;
+
+out:
+ if (cbk)
+ ec_combine(cbk, ec_combine_write);
+
+ if (fop)
+ ec_complete(fop);
+ return 0;
+}
+
+/* FOP: create */
+
+int32_t
+ec_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
+}
+
+void
+ec_wind_create(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_create_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->create,
+ &fop->loc[0], fop->int32, fop->mode[0], fop->mode[1],
+ fop->fd, fop->xdata);
+}
+
+int32_t
+ec_manager_create(ec_fop_data_t *fop, int32_t state)
+{
+ ec_config_t config;
+ ec_t *ec;
+ ec_cbk_data_t *cbk;
+ ec_fd_t *ctx;
+ uint64_t version[2] = {0, 0};
+ int32_t err;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ LOCK(&fop->fd->lock);
+
+ ctx = __ec_fd_get(fop->fd, fop->xl);
+ if (ctx == NULL) {
+ UNLOCK(&fop->fd->lock);
+
+ fop->error = ENOMEM;
+
+ return EC_STATE_REPORT;
+ }
+ err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
+ if (err != 0) {
+ UNLOCK(&fop->fd->lock);
+
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+
+ ctx->flags = fop->int32;
+
+ UNLOCK(&fop->fd->lock);
+
+ if (fop->xdata == NULL) {
+ fop->xdata = dict_new();
+ if (fop->xdata == NULL) {
+ fop->error = ENOMEM;
+
+ return EC_STATE_REPORT;
+ }
+ }
+
+ ec = fop->xl->private;
+
+ config.version = EC_CONFIG_VERSION;
+ config.algorithm = EC_CONFIG_ALGORITHM;
+ config.gf_word_size = EC_GF_BITS;
+ config.bricks = ec->nodes;
+ config.redundancy = ec->redundancy;
+ config.chunk_size = EC_METHOD_CHUNK_SIZE;
+
+ err = ec_dict_set_config(fop->xdata, EC_XATTR_CONFIG, &config);
+ if (err != 0) {
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+ err = ec_dict_set_array(fop->xdata, EC_XATTR_VERSION, version,
+ EC_VERSION_SIZE);
+ if (err != 0) {
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+ err = ec_dict_set_number(fop->xdata, EC_XATTR_SIZE, 0);
+ if (err != 0) {
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+
+ /* We need to write to specific offsets on the bricks, so we
+ * need to remove O_APPEND from flags (if present) */
+ fop->int32 &= ~O_APPEND;
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,
+ EC_UPDATE_DATA | EC_UPDATE_META);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count);
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
+ &cbk->iatt[0]);
+ if (!ec_cbk_set_error(cbk, -err, _gf_false)) {
+ LOCK(&fop->fd->lock);
+
+ ctx = __ec_fd_get(fop->fd, fop->xl);
+ if (ctx != NULL) {
+ ctx->open |= cbk->mask;
+ }
+
+ UNLOCK(&fop->fd->lock);
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.create != NULL) {
+ QUORUM_CBK(fop->cbks.create, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->fd,
+ fop->loc[0].inode, &cbk->iatt[0], &cbk->iatt[1],
+ &cbk->iatt[2], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.create != NULL) {
+ fop->cbks.create(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_create_cbk_t func, void *data, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ ec_cbk_t callback = {.create = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(CREATE) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_CREATE, 0, target, fop_flags,
+ ec_wind_create, ec_manager_create, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = flags;
+ fop->mode[0] = mode;
+ fop->mode[1] = umask;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: link */
+
+int32_t
+ec_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
+}
+
+void
+ec_wind_link(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_link_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->link,
+ &fop->loc[0], &fop->loc[1], fop->xdata);
+}
+
+int32_t
+ec_manager_link(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_parent_inode(
+ fop, &fop->loc[1], &fop->loc[0],
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_INODE_SIZE);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count);
+
+ if (cbk->iatt[0].ia_type == IA_IFREG) {
+ cbk->iatt[0].ia_size = fop->locks[0].size;
+ }
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
+ &cbk->iatt[0]);
+ ec_cbk_set_error(cbk, -err, _gf_false);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.link != NULL) {
+ QUORUM_CBK(fop->cbks.link, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.link != NULL) {
+ fop->cbks.link(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_link_cbk_t func, void *data, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ ec_cbk_t callback = {.link = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(LINK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_LINK, 0, target, fop_flags,
+ ec_wind_link, ec_manager_link, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ if (oldloc != NULL) {
+ if (loc_copy(&fop->loc[0], oldloc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (newloc != NULL) {
+ if (loc_copy(&fop->loc[1], newloc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: mkdir */
+
+int32_t
+ec_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
+}
+
+void
+ec_wind_mkdir(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_mkdir_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->mkdir,
+ &fop->loc[0], fop->mode[0], fop->mode[1], fop->xdata);
+}
+
+int32_t
+ec_manager_mkdir(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+ uint64_t version[2] = {0, 0};
+ int32_t err;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ if (fop->xdata == NULL) {
+ fop->xdata = dict_new();
+ if (fop->xdata == NULL) {
+ fop->error = ENOMEM;
+
+ return EC_STATE_REPORT;
+ }
+ }
+
+ err = ec_dict_set_array(fop->xdata, EC_XATTR_VERSION, version,
+ EC_VERSION_SIZE);
+ if (err != 0) {
+ fop->error = -err;
+ return EC_STATE_REPORT;
+ }
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,
+ EC_UPDATE_DATA | EC_UPDATE_META);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count);
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
+ &cbk->iatt[0]);
+ ec_cbk_set_error(cbk, -err, _gf_false);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.mkdir != NULL) {
+ QUORUM_CBK(fop->cbks.mkdir, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ cbk = fop->answer;
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.mkdir != NULL) {
+ fop->cbks.mkdir(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL, NULL,
+ ((cbk) ? cbk->xdata : NULL));
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mkdir_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata)
+{
+ ec_cbk_t callback = {.mkdir = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(MKDIR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKDIR, 0, target, fop_flags,
+ ec_wind_mkdir, ec_manager_mkdir, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->mode[0] = mode;
+ fop->mode[1] = umask;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: mknod */
+
+int32_t
+ec_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
+}
+
+void
+ec_wind_mknod(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_mknod_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->mknod,
+ &fop->loc[0], fop->mode[0], fop->dev, fop->mode[1],
+ fop->xdata);
+}
+
+int32_t
+ec_manager_mknod(ec_fop_data_t *fop, int32_t state)
+{
+ ec_config_t config;
+ ec_t *ec;
+ ec_cbk_data_t *cbk;
+ uint64_t version[2] = {0, 0};
+
+ switch (state) {
+ case EC_STATE_INIT:
+ if (S_ISREG(fop->mode[0])) {
+ int32_t err;
+
+ if (fop->xdata == NULL) {
+ fop->xdata = dict_new();
+ if (fop->xdata == NULL) {
+ fop->error = ENOMEM;
+
+ return EC_STATE_REPORT;
+ }
+ }
+
+ ec = fop->xl->private;
+
+ config.version = EC_CONFIG_VERSION;
+ config.algorithm = EC_CONFIG_ALGORITHM;
+ config.gf_word_size = EC_GF_BITS;
+ config.bricks = ec->nodes;
+ config.redundancy = ec->redundancy;
+ config.chunk_size = EC_METHOD_CHUNK_SIZE;
+
+ err = ec_dict_set_config(fop->xdata, EC_XATTR_CONFIG, &config);
+ if (err != 0) {
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+ err = ec_dict_set_array(fop->xdata, EC_XATTR_VERSION, version,
+ EC_VERSION_SIZE);
+ if (err != 0) {
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+ err = ec_dict_set_number(fop->xdata, EC_XATTR_SIZE, 0);
+ if (err != 0) {
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+ }
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,
+ EC_UPDATE_DATA | EC_UPDATE_META);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count);
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
+ &cbk->iatt[0]);
+ ec_cbk_set_error(cbk, -err, _gf_false);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.mknod != NULL) {
+ QUORUM_CBK(fop->cbks.mknod, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.mknod != NULL) {
+ fop->cbks.mknod(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mknod_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ ec_cbk_t callback = {.mknod = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(MKNOD) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_MKNOD, 0, target, fop_flags,
+ ec_wind_mknod, ec_manager_mknod, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->mode[0] = mode;
+ fop->dev = rdev;
+ fop->mode[1] = umask;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: rename */
+
+int32_t
+ec_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, struct iatt *preoldparent,
+ struct iatt *postoldparent, struct iatt *prenewparent,
+ struct iatt *postnewparent, dict_t *xdata)
+{
+ return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
+}
+
+void
+ec_wind_rename(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_rename_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->rename,
+ &fop->loc[0], &fop->loc[1], fop->xdata);
+}
+
+int32_t
+ec_manager_rename(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_parent_inode(
+ fop, &fop->loc[0], &fop->loc[0],
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_INODE_SIZE);
+ ec_lock_prepare_parent_inode(fop, &fop->loc[1], NULL,
+ EC_UPDATE_DATA | EC_UPDATE_META);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 5, cbk->count);
+
+ if (cbk->iatt[0].ia_type == IA_IFREG) {
+ cbk->iatt[0].ia_size = fop->locks[0].size;
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.rename != NULL) {
+ QUORUM_CBK(fop->cbks.rename, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], &cbk->iatt[2], &cbk->iatt[3],
+ &cbk->iatt[4], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.rename != NULL) {
+ fop->cbks.rename(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_rename_cbk_t func, void *data, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ ec_cbk_t callback = {.rename = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(RENAME) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_RENAME, 0, target, fop_flags,
+ ec_wind_rename, ec_manager_rename, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ if (oldloc != NULL) {
+ if (loc_copy(&fop->loc[0], oldloc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (newloc != NULL) {
+ if (loc_copy(&fop->loc[1], newloc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: rmdir */
+
+int32_t
+ec_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
+}
+
+void
+ec_wind_rmdir(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_rmdir_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->rmdir,
+ &fop->loc[0], fop->int32, fop->xdata);
+}
+
+int32_t
+ec_manager_rmdir(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,
+ EC_UPDATE_DATA | EC_UPDATE_META);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ ec_fop_prepare_answer(fop, _gf_false);
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.rmdir != NULL) {
+ QUORUM_CBK(fop->cbks.rmdir, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.rmdir != NULL) {
+ fop->cbks.rmdir(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_rmdir_cbk_t func, void *data, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+ ec_cbk_t callback = {.rmdir = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(RMDIR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_RMDIR, 0, target, fop_flags,
+ ec_wind_rmdir, ec_manager_rmdir, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = xflags;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: symlink */
+
+int32_t
+ec_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, buf,
+ preparent, postparent, NULL, NULL, xdata);
+}
+
+void
+ec_wind_symlink(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_symlink_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->symlink,
+ fop->str[0], &fop->loc[0], fop->mode[0], fop->xdata);
+}
+
+int32_t
+ec_manager_symlink(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,
+ EC_UPDATE_DATA | EC_UPDATE_META);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 3, cbk->count);
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode,
+ &cbk->iatt[0]);
+ ec_cbk_set_error(cbk, -err, _gf_false);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.symlink != NULL) {
+ QUORUM_CBK(fop->cbks.symlink, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, fop->loc[0].inode,
+ &cbk->iatt[0], &cbk->iatt[1], &cbk->iatt[2],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.symlink != NULL) {
+ fop->cbks.symlink(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_symlink_cbk_t func, void *data,
+ const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ ec_cbk_t callback = {.symlink = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(SYMLINK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SYMLINK, 0, target,
+ fop_flags, ec_wind_symlink, ec_manager_symlink,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->mode[0] = umask;
+
+ if (linkname != NULL) {
+ fop->str[0] = gf_strdup(linkname);
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: unlink */
+
+int32_t
+ec_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ return ec_dir_write_cbk(frame, this, cookie, op_ret, op_errno, NULL,
+ preparent, postparent, NULL, NULL, xdata);
+}
+
+void
+ec_wind_unlink(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_unlink_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->unlink,
+ &fop->loc[0], fop->int32, fop->xdata);
+}
+
+int32_t
+ec_manager_unlink(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_parent_inode(fop, &fop->loc[0], NULL,
+ EC_UPDATE_DATA | EC_UPDATE_META);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ ec_fop_prepare_answer(fop, _gf_false);
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.unlink != NULL) {
+ QUORUM_CBK(fop->cbks.unlink, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.unlink != NULL) {
+ fop->cbks.unlink(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_unlink_cbk_t func, void *data, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+ ec_cbk_t callback = {.unlink = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(UNLINK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_UNLINK, 0, target, fop_flags,
+ ec_wind_unlink, ec_manager_unlink, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = xflags;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
diff --git a/xlators/cluster/ec/src/ec-fops.h b/xlators/cluster/ec/src/ec-fops.h
new file mode 100644
index 00000000000..07edf8a7fec
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-fops.h
@@ -0,0 +1,254 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_FOPS_H__
+#define __EC_FOPS_H__
+
+#include <glusterfs/xlator.h>
+
+#include "ec-types.h"
+#include "ec-common.h"
+
+void
+ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_access_cbk_t func, void *data, loc_t *loc,
+ int32_t mask, dict_t *xdata);
+
+void
+ec_create(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_create_cbk_t func, void *data, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+
+void
+ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_entrylk_cbk_t func, void *data,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+
+void
+ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fentrylk_cbk_t func, void *data,
+ const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata);
+
+void
+ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata);
+
+void
+ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsync_cbk_t func, void *data, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+void
+ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
+ int32_t datasync, dict_t *xdata);
+
+void
+ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_getxattr_cbk_t func, void *data, loc_t *loc,
+ const char *name, dict_t *xdata);
+
+void
+ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
+ const char *name, dict_t *xdata);
+
+void
+ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc,
+ int32_t partial, dict_t *xdata);
+
+void
+ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd,
+ int32_t partial, dict_t *xdata);
+
+void
+ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
+ uintptr_t target, uint32_t fop_flags, fop_inodelk_cbk_t func,
+ void *data, const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+void
+ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
+ uintptr_t target, uint32_t fop_flags, fop_finodelk_cbk_t func,
+ void *data, const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+void
+ec_link(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_link_cbk_t func, void *data, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+void
+ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, uint32_t fop_flags,
+ fop_lk_cbk_t func, void *data, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata);
+
+void
+ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_lookup_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata);
+
+void
+ec_mkdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mkdir_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata);
+
+void
+ec_mknod(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_mknod_cbk_t func, void *data, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata);
+
+void
+ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_open_cbk_t func, void *data, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata);
+
+void
+ec_opendir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_opendir_cbk_t func, void *data, loc_t *loc,
+ fd_t *fd, dict_t *xdata);
+
+void
+ec_readdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readdir_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata);
+
+void
+ec_readdirp(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readdirp_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, dict_t *xdata);
+
+void
+ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readlink_cbk_t func, void *data, loc_t *loc,
+ size_t size, dict_t *xdata);
+
+void
+ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readv_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata);
+
+void
+ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_removexattr_cbk_t func, void *data,
+ loc_t *loc, const char *name, dict_t *xdata);
+
+void
+ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fremovexattr_cbk_t func, void *data,
+ fd_t *fd, const char *name, dict_t *xdata);
+
+void
+ec_rename(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_rename_cbk_t func, void *data, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata);
+
+void
+ec_rmdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_rmdir_cbk_t func, void *data, loc_t *loc,
+ int xflags, dict_t *xdata);
+
+void
+ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_setattr_cbk_t func, void *data, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+void
+ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+void
+ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_setxattr_cbk_t func, void *data, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata);
+
+void
+ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata);
+
+void
+ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_stat_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata);
+
+void
+ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fstat_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata);
+
+void
+ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_statfs_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata);
+
+void
+ec_symlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_symlink_cbk_t func, void *data,
+ const char *linkname, loc_t *loc, mode_t umask, dict_t *xdata);
+
+void
+ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fallocate_cbk_t func, void *data, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata);
+
+void
+ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_discard_cbk_t func, void *data, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata);
+
+void
+ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_truncate_cbk_t func, void *data, loc_t *loc,
+ off_t offset, dict_t *xdata);
+
+void
+ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
+ off_t offset, dict_t *xdata);
+
+void
+ec_unlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_unlink_cbk_t func, void *data, loc_t *loc,
+ int xflags, dict_t *xdata);
+
+void
+ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_writev_cbk_t func, void *data, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata);
+
+void
+ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_xattrop_cbk_t func, void *data, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+void
+ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata);
+
+void
+ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_seek_cbk_t func, void *data, fd_t *fd,
+ off_t offset, gf_seek_what_t what, dict_t *xdata);
+
+void
+ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_ipc_cbk_t func, void *data, int32_t op,
+ dict_t *xdata);
+
+#endif /* __EC_FOPS_H__ */
diff --git a/xlators/cluster/ec/src/ec-galois.c b/xlators/cluster/ec/src/ec-galois.c
new file mode 100644
index 00000000000..6e4990c71f5
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-galois.c
@@ -0,0 +1,183 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <string.h>
+
+#include "ec-mem-types.h"
+#include "ec-gf8.h"
+#include "ec-helpers.h"
+
+static ec_gf_t *
+ec_gf_alloc(uint32_t bits, uint32_t mod)
+{
+ ec_gf_t *gf;
+
+ gf = GF_MALLOC(sizeof(ec_gf_t), ec_mt_ec_gf_t);
+ if (gf == NULL) {
+ goto failed;
+ }
+
+ gf->bits = bits;
+ gf->size = 1 << bits;
+ gf->mod = mod;
+
+ gf->log = GF_MALLOC(sizeof(uint32_t) * (gf->size * 2 - 1),
+ gf_common_mt_int);
+ if (gf->log == NULL) {
+ goto failed_gf;
+ }
+ gf->pow = GF_MALLOC(sizeof(uint32_t) * (gf->size * 2 - 1),
+ gf_common_mt_int);
+ if (gf->pow == NULL) {
+ goto failed_log;
+ }
+
+ return gf;
+
+failed_log:
+ GF_FREE(gf->log);
+failed_gf:
+ GF_FREE(gf);
+failed:
+ return EC_ERR(ENOMEM);
+}
+
+static void
+ec_gf_init_tables(ec_gf_t *gf)
+{
+ uint32_t i, tmp;
+
+ memset(gf->log, -1, sizeof(uint32_t) * gf->size);
+
+ gf->pow[0] = 1;
+ gf->log[0] = gf->size;
+ gf->log[1] = 0;
+ for (i = 1; i < gf->size; i++) {
+ tmp = gf->pow[i - 1] << 1;
+ if (tmp >= gf->size) {
+ tmp ^= gf->mod;
+ }
+ gf->pow[i + gf->size - 1] = gf->pow[i] = tmp;
+ gf->log[tmp + gf->size - 1] = gf->log[tmp] = i;
+ }
+}
+
+ec_gf_t *
+ec_gf_prepare(uint32_t bits, uint32_t mod)
+{
+ ec_gf_mul_t **tbl;
+ ec_gf_t *gf;
+ uint32_t i, j;
+
+ if (bits != 8) {
+ return EC_ERR(EINVAL);
+ }
+
+ tbl = ec_gf8_mul;
+ if (mod == 0) {
+ mod = 0x11d;
+ }
+
+ gf = ec_gf_alloc(bits, mod);
+ if (EC_IS_ERR(gf)) {
+ return gf;
+ }
+ ec_gf_init_tables(gf);
+
+ gf->table = tbl;
+ gf->min_ops = bits * bits;
+ gf->max_ops = 0;
+ gf->avg_ops = 0;
+ for (i = 1; i < gf->size; i++) {
+ for (j = 0; tbl[i]->ops[j].op != EC_GF_OP_END; j++) {
+ }
+ if (gf->max_ops < j) {
+ gf->max_ops = j;
+ }
+ if (gf->min_ops > j) {
+ gf->min_ops = j;
+ }
+ gf->avg_ops += j;
+ }
+ gf->avg_ops /= gf->size;
+
+ return gf;
+}
+
+void
+ec_gf_destroy(ec_gf_t *gf)
+{
+ GF_FREE(gf->pow);
+ GF_FREE(gf->log);
+ GF_FREE(gf);
+}
+
+uint32_t
+ec_gf_add(ec_gf_t *gf, uint32_t a, uint32_t b)
+{
+ if ((a >= gf->size) || (b >= gf->size)) {
+ return gf->size;
+ }
+
+ return a ^ b;
+}
+
+uint32_t
+ec_gf_mul(ec_gf_t *gf, uint32_t a, uint32_t b)
+{
+ if ((a >= gf->size) || (b >= gf->size)) {
+ return gf->size;
+ }
+
+ if ((a != 0) && (b != 0)) {
+ return gf->pow[gf->log[a] + gf->log[b]];
+ }
+
+ return 0;
+}
+
+uint32_t
+ec_gf_div(ec_gf_t *gf, uint32_t a, uint32_t b)
+{
+ if ((a >= gf->size) || (b >= gf->size)) {
+ return gf->size;
+ }
+
+ if (b != 0) {
+ if (a != 0) {
+ return gf->pow[gf->size - 1 + gf->log[a] - gf->log[b]];
+ }
+
+ return 0;
+ }
+
+ return gf->size;
+}
+
+uint32_t
+ec_gf_exp(ec_gf_t *gf, uint32_t a, uint32_t b)
+{
+ uint32_t r;
+
+ if ((a >= gf->size) || ((a == 0) && (b == 0))) {
+ return gf->size;
+ }
+
+ r = 1;
+ while (b != 0) {
+ if ((b & 1) != 0) {
+ r = ec_gf_mul(gf, r, a);
+ }
+ a = ec_gf_mul(gf, a, a);
+ b >>= 1;
+ }
+
+ return r;
+}
diff --git a/xlators/cluster/ec/src/ec-galois.h b/xlators/cluster/ec/src/ec-galois.h
new file mode 100644
index 00000000000..ed55d53e419
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-galois.h
@@ -0,0 +1,32 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_GALOIS_H__
+#define __EC_GALOIS_H__
+
+#include <inttypes.h>
+
+#include "ec-types.h"
+
+ec_gf_t *
+ec_gf_prepare(uint32_t bits, uint32_t mod);
+void
+ec_gf_destroy(ec_gf_t *gf);
+
+uint32_t
+ec_gf_add(ec_gf_t *gf, uint32_t a, uint32_t b);
+uint32_t
+ec_gf_mul(ec_gf_t *gf, uint32_t a, uint32_t b);
+uint32_t
+ec_gf_div(ec_gf_t *gf, uint32_t a, uint32_t b);
+uint32_t
+ec_gf_exp(ec_gf_t *gf, uint32_t a, uint32_t b);
+
+#endif /* __EC_GALOIS_H__ */
diff --git a/xlators/cluster/ec/src/ec-generic.c b/xlators/cluster/ec/src/ec-generic.c
new file mode 100644
index 00000000000..884deb93669
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-generic.c
@@ -0,0 +1,1591 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/byte-order.h>
+
+#include "ec.h"
+#include "ec-messages.h"
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-combine.h"
+#include "ec-fops.h"
+
+/* FOP: flush */
+
+int32_t
+ec_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FLUSH, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_flush(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_flush_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->flush, fop->fd,
+ fop->xdata);
+}
+
+int32_t
+ec_manager_flush(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd, 0, 0, EC_RANGE_FULL);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_flush_size_version(fop);
+
+ return EC_STATE_DELAYED_START;
+
+ case EC_STATE_DELAYED_START:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ ec_fop_prepare_answer(fop, _gf_false);
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.flush != NULL) {
+ fop->cbks.flush(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DELAYED_START:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.flush != NULL) {
+ fop->cbks.flush(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+static int32_t
+ec_validate_fd(fd_t *fd, xlator_t *xl)
+{
+ uint64_t iversion = 0;
+ uint64_t fversion = 0;
+ ec_inode_t *inode_ctx = NULL;
+ ec_fd_t *fd_ctx = NULL;
+
+ LOCK(&fd->lock);
+ {
+ fd_ctx = __ec_fd_get(fd, xl);
+ if (fd_ctx) {
+ fversion = fd_ctx->bad_version;
+ }
+ }
+ UNLOCK(&fd->lock);
+
+ LOCK(&fd->inode->lock);
+ {
+ inode_ctx = __ec_inode_get(fd->inode, xl);
+ if (inode_ctx) {
+ iversion = inode_ctx->bad_version;
+ }
+ }
+ UNLOCK(&fd->inode->lock);
+ if (fversion < iversion) {
+ return EBADF;
+ }
+ return 0;
+}
+
+void
+ec_flush(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_flush_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata)
+{
+ ec_cbk_t callback = {.flush = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(FLUSH) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ if (fd) {
+ error = ec_validate_fd(fd, this);
+ if (error) {
+ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
+ "Failing %s on %s", gf_fop_list[GF_FOP_FLUSH],
+ fd->inode ? uuid_utoa(fd->inode->gfid) : "");
+ goto out;
+ }
+ }
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FLUSH, 0, target, fop_flags,
+ ec_wind_flush, ec_manager_flush, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: fsync */
+
+int32_t
+ec_combine_fsync(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
+{
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 2)) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_IATT_MISMATCH,
+ "Mismatching iatt in "
+ "answers of 'GF_FOP_FSYNC'");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t
+ec_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FSYNC, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (prebuf != NULL) {
+ cbk->iatt[0] = *prebuf;
+ }
+ if (postbuf != NULL) {
+ cbk->iatt[1] = *postbuf;
+ }
+ }
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_fsync);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_fsync(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fsync_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fsync, fop->fd,
+ fop->int32, fop->xdata);
+}
+
+int32_t
+ec_manager_fsync(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, 0, EC_RANGE_FULL);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_flush_size_version(fop);
+
+ return EC_STATE_DELAYED_START;
+
+ case EC_STATE_DELAYED_START:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->fd->inode,
+ &cbk->iatt[0].ia_size));
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.fsync != NULL) {
+ fop->cbks.fsync(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->iatt[0], &cbk->iatt[1],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ case -EC_STATE_DELAYED_START:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.fsync != NULL) {
+ fop->cbks.fsync(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_fsync(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsync_cbk_t func, void *data, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
+{
+ ec_cbk_t callback = {.fsync = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(FSYNC) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ if (fd) {
+ error = ec_validate_fd(fd, this);
+ if (error) {
+ gf_msg(this->name, GF_LOG_ERROR, EBADF, EC_MSG_FD_BAD,
+ "Failing %s on %s", gf_fop_list[GF_FOP_FSYNC],
+ fd->inode ? uuid_utoa(fd->inode->gfid) : "");
+ goto out;
+ }
+ }
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNC, 0, target, fop_flags,
+ ec_wind_fsync, ec_manager_fsync, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->int32 = datasync;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: fsyncdir */
+
+int32_t
+ec_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FSYNCDIR, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_fsyncdir(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fsyncdir_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fsyncdir,
+ fop->fd, fop->int32, fop->xdata);
+}
+
+int32_t
+ec_manager_fsyncdir(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd, 0, 0, EC_RANGE_FULL);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_flush_size_version(fop);
+
+ return EC_STATE_DELAYED_START;
+
+ case EC_STATE_DELAYED_START:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ ec_fop_prepare_answer(fop, _gf_false);
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.fsyncdir != NULL) {
+ fop->cbks.fsyncdir(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ case -EC_STATE_DELAYED_START:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.fsyncdir != NULL) {
+ fop->cbks.fsyncdir(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_fsyncdir(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsyncdir_cbk_t func, void *data, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
+{
+ ec_cbk_t callback = {.fsyncdir = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(FSYNCDIR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSYNCDIR, 0, target,
+ fop_flags, ec_wind_fsyncdir, ec_manager_fsyncdir,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->int32 = datasync;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: lookup */
+
+void
+ec_lookup_rebuild(ec_t *ec, ec_fop_data_t *fop, ec_cbk_data_t *cbk)
+{
+ ec_inode_t *ctx = NULL;
+ uint64_t size = 0;
+ int32_t have_size = 0, err;
+
+ if (cbk->op_ret < 0) {
+ return;
+ }
+
+ ec_dict_del_array(cbk->xdata, EC_XATTR_VERSION, cbk->version,
+ EC_VERSION_SIZE);
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->inode, &cbk->iatt[0]);
+ if (ec_cbk_set_error(cbk, -err, _gf_true)) {
+ return;
+ }
+
+ LOCK(&cbk->inode->lock);
+
+ ctx = __ec_inode_get(cbk->inode, fop->xl);
+ if (ctx != NULL) {
+ if (ctx->have_version) {
+ cbk->version[0] = ctx->post_version[0];
+ cbk->version[1] = ctx->post_version[1];
+ }
+ if (ctx->have_size) {
+ size = ctx->post_size;
+ have_size = 1;
+ }
+ }
+
+ UNLOCK(&cbk->inode->lock);
+
+ if (cbk->iatt[0].ia_type == IA_IFREG) {
+ cbk->size = cbk->iatt[0].ia_size;
+ ec_dict_del_number(cbk->xdata, EC_XATTR_SIZE, &cbk->iatt[0].ia_size);
+ if (have_size) {
+ cbk->iatt[0].ia_size = size;
+ }
+ }
+}
+
+int32_t
+ec_combine_lookup(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
+{
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 2)) {
+ gf_msg(fop->xl->name, GF_LOG_DEBUG, 0, EC_MSG_IATT_MISMATCH,
+ "Mismatching iatt in "
+ "answers of 'GF_FOP_LOOKUP'");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t
+ec_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+ uint64_t dirty[2] = {0};
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_LOOKUP, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (inode != NULL) {
+ cbk->inode = inode_ref(inode);
+ if (cbk->inode == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_INODE_REF_FAIL,
+ "Failed to reference an inode.");
+
+ goto out;
+ }
+ }
+ if (buf != NULL) {
+ cbk->iatt[0] = *buf;
+ }
+ if (postparent != NULL) {
+ cbk->iatt[1] = *postparent;
+ }
+ }
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ ec_dict_del_array(xdata, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE);
+ }
+
+ ec_combine(cbk, ec_combine_lookup);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_lookup(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_lookup_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->lookup,
+ &fop->loc[0], fop->xdata);
+}
+
+int32_t
+ec_manager_lookup(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+ int32_t err;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ if (fop->xdata == NULL) {
+ fop->xdata = dict_new();
+ if (fop->xdata == NULL) {
+ gf_msg(fop->xl->name, GF_LOG_ERROR, ENOMEM,
+ EC_MSG_LOOKUP_REQ_PREP_FAIL,
+ "Unable to prepare "
+ "lookup request");
+
+ fop->error = ENOMEM;
+
+ return EC_STATE_REPORT;
+ }
+ } else {
+ /*TODO: To be handled once we have 'syndromes' */
+ dict_del(fop->xdata, GF_CONTENT_KEY);
+ }
+ err = dict_set_uint64(fop->xdata, EC_XATTR_SIZE, 0);
+ if (err == 0) {
+ err = dict_set_uint64(fop->xdata, EC_XATTR_VERSION, 0);
+ }
+ if (err == 0) {
+ err = dict_set_uint64(fop->xdata, EC_XATTR_DIRTY, 0);
+ }
+ if (err != 0) {
+ gf_msg(fop->xl->name, GF_LOG_ERROR, -err,
+ EC_MSG_LOOKUP_REQ_PREP_FAIL,
+ "Unable to prepare lookup "
+ "request");
+
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+
+ /* Fall through */
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ /*
+ * Lookup happens without any lock, so there is a chance that it
+ * will have answers before modification happened and after
+ * modification happened in the same response. So choose the next
+ * best answer when the answers don't match for EC_MINIMUM_MIN
+ */
+
+ if (!fop->answer && !list_empty(&fop->cbk_list)) {
+ fop->answer = list_entry(fop->cbk_list.next, ec_cbk_data_t,
+ list);
+ }
+
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
+
+ ec_lookup_rebuild(fop->xl->private, fop, cbk);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.lookup != NULL) {
+ fop->cbks.lookup(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->inode, &cbk->iatt[0],
+ cbk->xdata, &cbk->iatt[1]);
+ }
+
+ return EC_STATE_END;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.lookup != NULL) {
+ fop->cbks.lookup(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_lookup(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_lookup_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata)
+{
+ ec_cbk_t callback = {.lookup = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(LOOKUP) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_LOOKUP, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_lookup,
+ ec_manager_lookup, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ /* Do not log failures here as a memory problem would have already
+ * been logged by the corresponding alloc functions */
+ if (fop->xdata == NULL)
+ goto out;
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: statfs */
+
+int32_t
+ec_combine_statfs(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
+{
+ ec_statvfs_combine(&dst->statvfs, &src->statvfs);
+
+ return 1;
+}
+
+int32_t
+ec_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct statvfs *buf, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_STATFS, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (buf != NULL) {
+ cbk->statvfs = *buf;
+ }
+ }
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_statfs);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_statfs(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_statfs_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->statfs,
+ &fop->loc[0], fop->xdata);
+}
+
+int32_t
+ec_manager_statfs(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk = NULL;
+ gf_boolean_t deem_statfs_enabled = _gf_false;
+ int32_t err = 0;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ ec_t *ec = fop->xl->private;
+
+ if (cbk->xdata) {
+ err = dict_get_int8(cbk->xdata, "quota-deem-statfs",
+ (int8_t *)&deem_statfs_enabled);
+ if (err != -ENOENT) {
+ ec_cbk_set_error(cbk, -err, _gf_true);
+ }
+ }
+
+ if (err != 0 || deem_statfs_enabled == _gf_false) {
+ cbk->statvfs.f_blocks *= ec->fragments;
+ cbk->statvfs.f_bfree *= ec->fragments;
+ cbk->statvfs.f_bavail *= ec->fragments;
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.statfs != NULL) {
+ fop->cbks.statfs(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->statvfs, cbk->xdata);
+ }
+
+ return EC_STATE_END;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.statfs != NULL) {
+ fop->cbks.statfs(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL);
+ }
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_statfs(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_statfs_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata)
+{
+ ec_cbk_t callback = {.statfs = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(STATFS) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_STATFS, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_statfs,
+ ec_manager_statfs, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
+
+/* FOP: xattrop */
+
+int32_t
+ec_combine_xattrop(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
+{
+ if (!ec_dict_compare(dst->dict, src->dict)) {
+ gf_msg(fop->xl->name, GF_LOG_DEBUG, 0, EC_MSG_DICT_MISMATCH,
+ "Mismatching dictionary in "
+ "answers of 'GF_FOP_XATTROP'");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t
+ec_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_lock_link_t *link = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ uint64_t dirty[2] = {0};
+ data_t *data;
+ uint64_t *version;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, fop->id, idx, op_ret,
+ op_errno);
+ if (!cbk)
+ goto out;
+
+ if (op_ret >= 0) {
+ cbk->dict = dict_ref(xattr);
+
+ data = dict_get(cbk->dict, EC_XATTR_VERSION);
+ if ((data != NULL) && (data->len >= sizeof(uint64_t))) {
+ version = (uint64_t *)data->data;
+
+ if (((ntoh64(version[0]) >> EC_SELFHEAL_BIT) & 1) != 0) {
+ LOCK(&fop->lock);
+
+ fop->healing |= 1ULL << idx;
+
+ UNLOCK(&fop->lock);
+ }
+ }
+
+ ec_dict_del_array(xattr, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE);
+ link = fop->data;
+ if (link) {
+ /*Keep a note of if the dirty is already set or not*/
+ link->dirty[0] |= (dirty[0] != 0);
+ link->dirty[1] |= (dirty[1] != 0);
+ }
+ }
+
+ if (xdata)
+ cbk->xdata = dict_ref(xdata);
+
+ ec_combine(cbk, ec_combine_xattrop);
+
+out:
+ if (fop)
+ ec_complete(fop);
+
+ return 0;
+}
+
+void
+ec_wind_xattrop(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_xattrop_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->xattrop,
+ &fop->loc[0], fop->xattrop_flags, fop->dict, fop->xdata);
+}
+
+int32_t
+ec_manager_xattrop(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ if (fop->fd == NULL) {
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_UPDATE_META, 0,
+ EC_RANGE_FULL);
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd, EC_UPDATE_META, 0,
+ EC_RANGE_FULL);
+ }
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ err = ec_dict_combine(cbk, EC_COMBINE_DICT);
+ ec_cbk_set_error(cbk, -err, _gf_false);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->id == GF_FOP_XATTROP) {
+ if (fop->cbks.xattrop != NULL) {
+ fop->cbks.xattrop(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->dict, cbk->xdata);
+ }
+ } else {
+ if (fop->cbks.fxattrop != NULL) {
+ fop->cbks.fxattrop(fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, cbk->dict,
+ cbk->xdata);
+ }
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->id == GF_FOP_XATTROP) {
+ if (fop->cbks.xattrop != NULL) {
+ fop->cbks.xattrop(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL);
+ }
+ } else {
+ if (fop->cbks.fxattrop != NULL) {
+ fop->cbks.fxattrop(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL);
+ }
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_xattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_xattrop_cbk_t func, void *data, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ ec_cbk_t callback = {.xattrop = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(XATTROP) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_XATTROP, 0, target,
+ fop_flags, ec_wind_xattrop, ec_manager_xattrop,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->xattrop_flags = optype;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xattr != NULL) {
+ fop->dict = dict_ref(xattr);
+ if (fop->dict == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
+
+void
+ec_wind_fxattrop(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_xattrop_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fxattrop,
+ fop->fd, fop->xattrop_flags, fop->dict, fop->xdata);
+}
+
+void
+ec_fxattrop(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fxattrop_cbk_t func, void *data, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ ec_cbk_t callback = {.fxattrop = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(FXATTROP) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FXATTROP, 0, target,
+ fop_flags, ec_wind_fxattrop, ec_manager_xattrop,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->xattrop_flags = optype;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xattr != NULL) {
+ fop->dict = dict_ref(xattr);
+ if (fop->dict == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
+
+/* FOP: IPC */
+
+int32_t
+ec_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_IPC, idx, op_ret,
+ op_errno);
+
+ if (cbk != NULL) {
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_ipc(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_ipc_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->ipc, fop->int32,
+ fop->xdata);
+}
+
+int32_t
+ec_manager_ipc(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ ec_fop_prepare_answer(fop, _gf_true);
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+ if (fop->cbks.ipc != NULL) {
+ fop->cbks.ipc(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->xdata);
+ }
+
+ return EC_STATE_END;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.ipc != NULL) {
+ fop->cbks.ipc(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL);
+ }
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_ipc(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_ipc_cbk_t func, void *data, int32_t op,
+ dict_t *xdata)
+{
+ ec_cbk_t callback = {.ipc = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(IPC) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_IPC, 0, target, fop_flags,
+ ec_wind_ipc, ec_manager_ipc, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ }
+ fop->int32 = op;
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
diff --git a/xlators/cluster/ec/src/ec-gf8.c b/xlators/cluster/ec/src/ec-gf8.c
new file mode 100644
index 00000000000..039adae5929
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-gf8.c
@@ -0,0 +1,5882 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "ec-gf8.h"
+
+static ec_gf_op_t ec_gf8_mul_00_ops[] = {{EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_00 = {0,
+ {
+ 0,
+ },
+ ec_gf8_mul_00_ops};
+
+static ec_gf_op_t ec_gf8_mul_01_ops[] = {{EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_01 = {8,
+ {
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_01_ops};
+
+static ec_gf_op_t ec_gf8_mul_02_ops[] = {{EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_02 = {8,
+ {
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_02_ops};
+
+static ec_gf_op_t ec_gf8_mul_03_ops[] = {
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_COPY, 8, 3, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_03 = {9,
+ {
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ },
+ ec_gf8_mul_03_ops};
+
+static ec_gf_op_t ec_gf8_mul_04_ops[] = {
+ {EC_GF_OP_XOR3, 8, 6, 7}, {EC_GF_OP_XOR2, 2, 8, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_04 = {9,
+ {
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 8,
+ },
+ ec_gf8_mul_04_ops};
+
+static ec_gf_op_t ec_gf8_mul_05_ops[] = {
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_05 = {8,
+ {
+ 0,
+ 1,
+ 2,
+ 6,
+ 7,
+ 3,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_05_ops};
+
+static ec_gf_op_t ec_gf8_mul_06_ops[] = {
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_COPY, 8, 2, 0},
+ {EC_GF_OP_XOR2, 8, 3, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_06 = {9,
+ {
+ 7,
+ 0,
+ 1,
+ 2,
+ 8,
+ 3,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_06_ops};
+
+static ec_gf_op_t ec_gf8_mul_07_ops[] = {
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_07 = {8,
+ {
+ 6,
+ 0,
+ 1,
+ 3,
+ 2,
+ 4,
+ 5,
+ 7,
+ },
+ ec_gf8_mul_07_ops};
+
+static ec_gf_op_t ec_gf8_mul_08_ops[] = {
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR3, 8, 6, 7},
+ {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 2, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_08 = {9,
+ {
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 8,
+ },
+ ec_gf8_mul_08_ops};
+
+static ec_gf_op_t ec_gf8_mul_09_ops[] = {
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_09 = {8,
+ {
+ 0,
+ 1,
+ 2,
+ 3,
+ 5,
+ 6,
+ 7,
+ 4,
+ },
+ ec_gf8_mul_09_ops};
+
+static ec_gf_op_t ec_gf8_mul_0A_ops[] = {
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_0A = {8,
+ {
+ 5,
+ 0,
+ 1,
+ 2,
+ 6,
+ 7,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_0A_ops};
+
+static ec_gf_op_t ec_gf8_mul_0B_ops[] = {
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_COPY, 9, 3, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_COPY, 8, 5, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR3, 3, 8, 6}, {EC_GF_OP_XOR2, 1, 9, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_0B = {10,
+ {
+ 7,
+ 1,
+ 5,
+ 2,
+ 4,
+ 3,
+ 0,
+ 6,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_0B_ops};
+
+static ec_gf_op_t ec_gf8_mul_0C_ops[] = {
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_COPY, 8, 1, 0},
+ {EC_GF_OP_XOR2, 8, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_0C = {9,
+ {
+ 5,
+ 7,
+ 0,
+ 1,
+ 8,
+ 2,
+ 3,
+ 4,
+ 6,
+ },
+ ec_gf8_mul_0C_ops};
+
+static ec_gf_op_t ec_gf8_mul_0D_ops[] = {
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR3, 8, 2, 4}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR3, 2, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_0D = {9,
+ {
+ 5,
+ 6,
+ 7,
+ 3,
+ 1,
+ 0,
+ 2,
+ 4,
+ 8,
+ },
+ ec_gf8_mul_0D_ops};
+
+static ec_gf_op_t ec_gf8_mul_0E_ops[] = {
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_0E = {8,
+ {
+ 7,
+ 0,
+ 6,
+ 1,
+ 3,
+ 2,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_0E_ops};
+
+static ec_gf_op_t ec_gf8_mul_0F_ops[] = {
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_0F = {8,
+ {
+ 1,
+ 0,
+ 5,
+ 6,
+ 7,
+ 2,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_0F_ops};
+
+static ec_gf_op_t ec_gf8_mul_10_ops[] = {
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_10 = {8,
+ {
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ },
+ ec_gf8_mul_10_ops};
+
+static ec_gf_op_t ec_gf8_mul_11_ops[] = {
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_11 = {8,
+ {
+ 4,
+ 1,
+ 2,
+ 6,
+ 0,
+ 5,
+ 7,
+ 3,
+ },
+ ec_gf8_mul_11_ops};
+
+static ec_gf_op_t ec_gf8_mul_12_ops[] = {
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_12 = {8,
+ {
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 5,
+ 6,
+ 4,
+ },
+ ec_gf8_mul_12_ops};
+
+static ec_gf_op_t ec_gf8_mul_13_ops[] = {
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR3, 8, 3, 7},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 6, 8, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 0, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_13 = {9,
+ {
+ 4,
+ 5,
+ 2,
+ 6,
+ 0,
+ 1,
+ 7,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_13_ops};
+
+static ec_gf_op_t ec_gf8_mul_14_ops[] = {
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_14 = {8,
+ {
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 4,
+ 5,
+ 3,
+ },
+ ec_gf8_mul_14_ops};
+
+static ec_gf_op_t ec_gf8_mul_15_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR3, 5, 8, 7},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_15 = {9,
+ {
+ 0,
+ 1,
+ 2,
+ 4,
+ 7,
+ 6,
+ 5,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_15_ops};
+
+static ec_gf_op_t ec_gf8_mul_16_ops[] = {
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_16 = {8,
+ {
+ 6,
+ 7,
+ 4,
+ 1,
+ 2,
+ 3,
+ 5,
+ 0,
+ },
+ ec_gf8_mul_16_ops};
+
+static ec_gf_op_t ec_gf8_mul_17_ops[] = {
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_17 = {8,
+ {
+ 5,
+ 7,
+ 0,
+ 1,
+ 3,
+ 2,
+ 4,
+ 6,
+ },
+ ec_gf8_mul_17_ops};
+
+static ec_gf_op_t ec_gf8_mul_18_ops[] = {
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_18 = {9,
+ {
+ 4,
+ 5,
+ 7,
+ 6,
+ 0,
+ 1,
+ 2,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_18_ops};
+
+static ec_gf_op_t ec_gf8_mul_19_ops[] = {
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_19 = {8,
+ {
+ 0,
+ 5,
+ 2,
+ 6,
+ 7,
+ 1,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_19_ops};
+
+static ec_gf_op_t ec_gf8_mul_1A_ops[] = {
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_1A = {8,
+ {
+ 7,
+ 0,
+ 4,
+ 5,
+ 3,
+ 1,
+ 2,
+ 6,
+ },
+ ec_gf8_mul_1A_ops};
+
+static ec_gf_op_t ec_gf8_mul_1B_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_1B = {8,
+ {
+ 7,
+ 4,
+ 5,
+ 6,
+ 3,
+ 1,
+ 2,
+ 0,
+ },
+ ec_gf8_mul_1B_ops};
+
+static ec_gf_op_t ec_gf8_mul_1C_ops[] = {
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_1C = {8,
+ {
+ 5,
+ 4,
+ 3,
+ 0,
+ 1,
+ 7,
+ 2,
+ 6,
+ },
+ ec_gf8_mul_1C_ops};
+
+static ec_gf_op_t ec_gf8_mul_1D_ops[] = {
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR3, 8, 4, 2},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_1D = {9,
+ {
+ 0,
+ 7,
+ 5,
+ 8,
+ 2,
+ 3,
+ 4,
+ 1,
+ 6,
+ },
+ ec_gf8_mul_1D_ops};
+
+static ec_gf_op_t ec_gf8_mul_1E_ops[] = {
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_1E = {8,
+ {
+ 4,
+ 7,
+ 5,
+ 1,
+ 6,
+ 0,
+ 2,
+ 3,
+ },
+ ec_gf8_mul_1E_ops};
+
+static ec_gf_op_t ec_gf8_mul_1F_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR3, 8, 3, 7},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_1F = {9,
+ {
+ 1,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 3,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_1F_ops};
+
+static ec_gf_op_t ec_gf8_mul_20_ops[] = {
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_20 = {8,
+ {
+ 7,
+ 4,
+ 5,
+ 6,
+ 3,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_20_ops};
+
+static ec_gf_op_t ec_gf8_mul_21_ops[] = {
+ {EC_GF_OP_COPY, 9, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR3, 8, 7, 5}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR2, 2, 8, 0},
+ {EC_GF_OP_XOR2, 4, 9, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_21 = {10,
+ {
+ 0,
+ 1,
+ 2,
+ 7,
+ 5,
+ 4,
+ 3,
+ 6,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_21_ops};
+
+static ec_gf_op_t ec_gf8_mul_22_ops[] = {
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_22 = {8,
+ {
+ 3,
+ 0,
+ 5,
+ 2,
+ 6,
+ 4,
+ 1,
+ 7,
+ },
+ ec_gf8_mul_22_ops};
+
+static ec_gf_op_t ec_gf8_mul_23_ops[] = {
+ {EC_GF_OP_COPY, 8, 2, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 3, 8, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_23 = {9,
+ {
+ 0,
+ 4,
+ 3,
+ 2,
+ 5,
+ 6,
+ 1,
+ 8,
+ 7,
+ },
+ ec_gf8_mul_23_ops};
+
+static ec_gf_op_t ec_gf8_mul_24_ops[] = {
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_24 = {8,
+ {
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 4,
+ 5,
+ 3,
+ },
+ ec_gf8_mul_24_ops};
+
+static ec_gf_op_t ec_gf8_mul_25_ops[] = {
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_25 = {8,
+ {
+ 2,
+ 7,
+ 0,
+ 1,
+ 3,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_25_ops};
+
+static ec_gf_op_t ec_gf8_mul_26_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_26 = {8,
+ {
+ 3,
+ 4,
+ 1,
+ 2,
+ 0,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_26_ops};
+
+static ec_gf_op_t ec_gf8_mul_27_ops[] = {
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_27 = {8,
+ {
+ 3,
+ 0,
+ 1,
+ 2,
+ 6,
+ 7,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_27_ops};
+
+static ec_gf_op_t ec_gf8_mul_28_ops[] = {
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_28 = {8,
+ {
+ 5,
+ 6,
+ 3,
+ 0,
+ 1,
+ 2,
+ 4,
+ 7,
+ },
+ ec_gf8_mul_28_ops};
+
+static ec_gf_op_t ec_gf8_mul_29_ops[] = {
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_29 = {8,
+ {
+ 4,
+ 6,
+ 3,
+ 5,
+ 7,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_29_ops};
+
+static ec_gf_op_t ec_gf8_mul_2A_ops[] = {
+ {EC_GF_OP_COPY, 8, 1, 0}, {EC_GF_OP_XOR2, 8, 0, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR3, 6, 8, 4}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_2A = {9,
+ {
+ 3,
+ 4,
+ 7,
+ 2,
+ 6,
+ 5,
+ 1,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_2A_ops};
+
+static ec_gf_op_t ec_gf8_mul_2B_ops[] = {
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_2B = {8,
+ {
+ 3,
+ 4,
+ 7,
+ 5,
+ 6,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_2B_ops};
+
+static ec_gf_op_t ec_gf8_mul_2C_ops[] = {
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_2C = {8,
+ {
+ 5,
+ 6,
+ 7,
+ 0,
+ 2,
+ 3,
+ 4,
+ 1,
+ },
+ ec_gf8_mul_2C_ops};
+
+static ec_gf_op_t ec_gf8_mul_2D_ops[] = {
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR3, 8, 4, 6},
+ {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_2D = {9,
+ {
+ 7,
+ 0,
+ 3,
+ 5,
+ 1,
+ 4,
+ 2,
+ 6,
+ 8,
+ },
+ ec_gf8_mul_2D_ops};
+
+static ec_gf_op_t ec_gf8_mul_2E_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_COPY, 8, 4, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 8, 7, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 2, 8, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_2E = {9,
+ {
+ 5,
+ 0,
+ 7,
+ 3,
+ 2,
+ 6,
+ 4,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_2E_ops};
+
+static ec_gf_op_t ec_gf8_mul_2F_ops[] = {
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR3, 8, 7, 6}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 3, 8, 0},
+ {EC_GF_OP_XOR2, 2, 8, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_2F = {9,
+ {
+ 6,
+ 3,
+ 2,
+ 5,
+ 7,
+ 0,
+ 1,
+ 4,
+ 8,
+ },
+ ec_gf8_mul_2F_ops};
+
+static ec_gf_op_t ec_gf8_mul_30_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 8, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR3, 6, 8, 7},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_30 = {9,
+ {
+ 3,
+ 4,
+ 7,
+ 5,
+ 0,
+ 6,
+ 1,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_30_ops};
+
+static ec_gf_op_t ec_gf8_mul_31_ops[] = {
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_31 = {8,
+ {
+ 7,
+ 1,
+ 4,
+ 5,
+ 6,
+ 0,
+ 2,
+ 3,
+ },
+ ec_gf8_mul_31_ops};
+
+static ec_gf_op_t ec_gf8_mul_32_ops[] = {
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_32 = {8,
+ {
+ 3,
+ 4,
+ 6,
+ 7,
+ 5,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_32_ops};
+
+static ec_gf_op_t ec_gf8_mul_33_ops[] = {
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_33 = {8,
+ {
+ 5,
+ 4,
+ 3,
+ 0,
+ 2,
+ 1,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_33_ops};
+
+static ec_gf_op_t ec_gf8_mul_34_ops[] = {
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_34 = {8,
+ {
+ 7,
+ 5,
+ 3,
+ 0,
+ 2,
+ 4,
+ 1,
+ 6,
+ },
+ ec_gf8_mul_34_ops};
+
+static ec_gf_op_t ec_gf8_mul_35_ops[] = {
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_35 = {8,
+ {
+ 6,
+ 7,
+ 5,
+ 4,
+ 2,
+ 0,
+ 1,
+ 3,
+ },
+ ec_gf8_mul_35_ops};
+
+static ec_gf_op_t ec_gf8_mul_36_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_36 = {8,
+ {
+ 6,
+ 7,
+ 4,
+ 1,
+ 2,
+ 3,
+ 0,
+ 5,
+ },
+ ec_gf8_mul_36_ops};
+
+static ec_gf_op_t ec_gf8_mul_37_ops[] = {
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR3, 8, 0, 1},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_37 = {9,
+ {
+ 6,
+ 7,
+ 2,
+ 1,
+ 0,
+ 3,
+ 4,
+ 5,
+ 8,
+ },
+ ec_gf8_mul_37_ops};
+
+static ec_gf_op_t ec_gf8_mul_38_ops[] = {
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR3, 8, 6, 7},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 4, 8, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_38 = {9,
+ {
+ 4,
+ 5,
+ 6,
+ 3,
+ 0,
+ 1,
+ 7,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_38_ops};
+
+static ec_gf_op_t ec_gf8_mul_39_ops[] = {
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_39 = {8,
+ {
+ 1,
+ 6,
+ 3,
+ 0,
+ 5,
+ 2,
+ 4,
+ 7,
+ },
+ ec_gf8_mul_39_ops};
+
+static ec_gf_op_t ec_gf8_mul_3A_ops[] = {
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_3A = {8,
+ {
+ 3,
+ 4,
+ 7,
+ 0,
+ 5,
+ 6,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_3A_ops};
+
+static ec_gf_op_t ec_gf8_mul_3B_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR3, 8, 7, 3}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_3B = {9,
+ {
+ 3,
+ 0,
+ 1,
+ 7,
+ 6,
+ 2,
+ 4,
+ 8,
+ 5,
+ },
+ ec_gf8_mul_3B_ops};
+
+static ec_gf_op_t ec_gf8_mul_3C_ops[] = {
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_3C = {8,
+ {
+ 3,
+ 6,
+ 4,
+ 1,
+ 7,
+ 2,
+ 0,
+ 5,
+ },
+ ec_gf8_mul_3C_ops};
+
+static ec_gf_op_t ec_gf8_mul_3D_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_3D = {8,
+ {
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ },
+ ec_gf8_mul_3D_ops};
+
+static ec_gf_op_t ec_gf8_mul_3E_ops[] = {
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_3E = {8,
+ {
+ 6,
+ 1,
+ 2,
+ 7,
+ 0,
+ 3,
+ 5,
+ 4,
+ },
+ ec_gf8_mul_3E_ops};
+
+static ec_gf_op_t ec_gf8_mul_3F_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_COPY, 10, 4, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_COPY, 9, 2, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR3, 4, 9, 7},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 3, 10, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_3F = {11,
+ {
+ 1,
+ 7,
+ 6,
+ 2,
+ 4,
+ 3,
+ 5,
+ 0,
+ 8,
+ 9,
+ 10,
+ },
+ ec_gf8_mul_3F_ops};
+
+static ec_gf_op_t ec_gf8_mul_40_ops[] = {
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR3, 8, 7, 6},
+ {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_40 = {9,
+ {
+ 5,
+ 7,
+ 4,
+ 6,
+ 2,
+ 3,
+ 0,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_40_ops};
+
+static ec_gf_op_t ec_gf8_mul_41_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 8, 4, 0},
+ {EC_GF_OP_XOR2, 8, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_41 = {9,
+ {
+ 0,
+ 7,
+ 6,
+ 5,
+ 3,
+ 4,
+ 8,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_41_ops};
+
+static ec_gf_op_t ec_gf8_mul_42_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 8, 3, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_42 = {9,
+ {
+ 2,
+ 7,
+ 1,
+ 6,
+ 4,
+ 3,
+ 0,
+ 5,
+ 8,
+ },
+ ec_gf8_mul_42_ops};
+
+static ec_gf_op_t ec_gf8_mul_43_ops[] = {
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_43 = {8,
+ {
+ 2,
+ 6,
+ 4,
+ 1,
+ 7,
+ 3,
+ 0,
+ 5,
+ },
+ ec_gf8_mul_43_ops};
+
+static ec_gf_op_t ec_gf8_mul_44_ops[] = {
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_44 = {8,
+ {
+ 2,
+ 3,
+ 4,
+ 1,
+ 6,
+ 5,
+ 0,
+ 7,
+ },
+ ec_gf8_mul_44_ops};
+
+static ec_gf_op_t ec_gf8_mul_45_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_45 = {8,
+ {
+ 2,
+ 3,
+ 0,
+ 1,
+ 7,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_45_ops};
+
+static ec_gf_op_t ec_gf8_mul_46_ops[] = {
+ {EC_GF_OP_XOR3, 8, 2, 4}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 8, 0, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_46 = {9,
+ {
+ 2,
+ 0,
+ 1,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 8,
+ },
+ ec_gf8_mul_46_ops};
+
+static ec_gf_op_t ec_gf8_mul_47_ops[] = {
+ {EC_GF_OP_XOR3, 8, 0, 1}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_47 = {9,
+ {
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_47_ops};
+
+static ec_gf_op_t ec_gf8_mul_48_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_48 = {8,
+ {
+ 4,
+ 5,
+ 6,
+ 0,
+ 1,
+ 3,
+ 7,
+ 2,
+ },
+ ec_gf8_mul_48_ops};
+
+static ec_gf_op_t ec_gf8_mul_49_ops[] = {
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR3, 8, 0, 6},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR3, 1, 8, 5},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_49 = {9,
+ {
+ 7,
+ 2,
+ 4,
+ 0,
+ 3,
+ 5,
+ 1,
+ 6,
+ 8,
+ },
+ ec_gf8_mul_49_ops};
+
+static ec_gf_op_t ec_gf8_mul_4A_ops[] = {
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_4A = {8,
+ {
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 3,
+ 4,
+ 2,
+ },
+ ec_gf8_mul_4A_ops};
+
+static ec_gf_op_t ec_gf8_mul_4B_ops[] = {
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR3, 8, 3, 7}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 8, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_4B = {9,
+ {
+ 5,
+ 3,
+ 6,
+ 7,
+ 0,
+ 2,
+ 4,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_4B_ops};
+
+static ec_gf_op_t ec_gf8_mul_4C_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_4C = {8,
+ {
+ 5,
+ 3,
+ 4,
+ 7,
+ 0,
+ 6,
+ 2,
+ 1,
+ },
+ ec_gf8_mul_4C_ops};
+
+static ec_gf_op_t ec_gf8_mul_4D_ops[] = {
+ {EC_GF_OP_COPY, 8, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR3, 9, 3, 1},
+ {EC_GF_OP_XOR2, 5, 9, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR3, 0, 8, 2},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_4D = {10,
+ {
+ 0,
+ 9,
+ 3,
+ 5,
+ 6,
+ 4,
+ 7,
+ 1,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_4D_ops};
+
+static ec_gf_op_t ec_gf8_mul_4E_ops[] = {
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_4E = {8,
+ {
+ 2,
+ 3,
+ 0,
+ 1,
+ 5,
+ 6,
+ 7,
+ 4,
+ },
+ ec_gf8_mul_4E_ops};
+
+static ec_gf_op_t ec_gf8_mul_4F_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_4F = {8,
+ {
+ 0,
+ 3,
+ 5,
+ 6,
+ 1,
+ 2,
+ 7,
+ 4,
+ },
+ ec_gf8_mul_4F_ops};
+
+static ec_gf_op_t ec_gf8_mul_50_ops[] = {
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_50 = {8,
+ {
+ 4,
+ 5,
+ 7,
+ 3,
+ 0,
+ 1,
+ 2,
+ 6,
+ },
+ ec_gf8_mul_50_ops};
+
+static ec_gf_op_t ec_gf8_mul_51_ops[] = {
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_51 = {8,
+ {
+ 0,
+ 1,
+ 7,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_51_ops};
+
+static ec_gf_op_t ec_gf8_mul_52_ops[] = {
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_COPY, 9, 4, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR3, 3, 5, 8},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 2, 9, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_52 = {10,
+ {
+ 2,
+ 3,
+ 1,
+ 4,
+ 6,
+ 7,
+ 0,
+ 5,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_52_ops};
+
+static ec_gf_op_t ec_gf8_mul_53_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_53 = {8,
+ {
+ 2,
+ 0,
+ 1,
+ 4,
+ 5,
+ 6,
+ 7,
+ 3,
+ },
+ ec_gf8_mul_53_ops};
+
+static ec_gf_op_t ec_gf8_mul_54_ops[] = {
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_54 = {8,
+ {
+ 7,
+ 3,
+ 0,
+ 4,
+ 2,
+ 6,
+ 5,
+ 1,
+ },
+ ec_gf8_mul_54_ops};
+
+static ec_gf_op_t ec_gf8_mul_55_ops[] = {
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_55 = {8,
+ {
+ 1,
+ 5,
+ 6,
+ 4,
+ 3,
+ 7,
+ 2,
+ 0,
+ },
+ ec_gf8_mul_55_ops};
+
+static ec_gf_op_t ec_gf8_mul_56_ops[] = {
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_56 = {8,
+ {
+ 2,
+ 3,
+ 0,
+ 4,
+ 5,
+ 6,
+ 7,
+ 1,
+ },
+ ec_gf8_mul_56_ops};
+
+static ec_gf_op_t ec_gf8_mul_57_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_57 = {8,
+ {
+ 2,
+ 3,
+ 0,
+ 1,
+ 4,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_57_ops};
+
+static ec_gf_op_t ec_gf8_mul_58_ops[] = {
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_58 = {8,
+ {
+ 4,
+ 3,
+ 2,
+ 7,
+ 0,
+ 1,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_58_ops};
+
+static ec_gf_op_t ec_gf8_mul_59_ops[] = {
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_59 = {8,
+ {
+ 7,
+ 3,
+ 5,
+ 6,
+ 1,
+ 2,
+ 0,
+ 4,
+ },
+ ec_gf8_mul_59_ops};
+
+static ec_gf_op_t ec_gf8_mul_5A_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_5A = {8,
+ {
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 5,
+ 4,
+ },
+ ec_gf8_mul_5A_ops};
+
+static ec_gf_op_t ec_gf8_mul_5B_ops[] = {
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_5B = {8,
+ {
+ 6,
+ 0,
+ 7,
+ 5,
+ 2,
+ 1,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_5B_ops};
+
+static ec_gf_op_t ec_gf8_mul_5C_ops[] = {
+ {EC_GF_OP_COPY, 8, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 2, 8, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_5C = {9,
+ {
+ 7,
+ 5,
+ 2,
+ 4,
+ 1,
+ 0,
+ 6,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_5C_ops};
+
+static ec_gf_op_t ec_gf8_mul_5D_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_5D = {8,
+ {
+ 1,
+ 3,
+ 5,
+ 4,
+ 6,
+ 7,
+ 2,
+ 0,
+ },
+ ec_gf8_mul_5D_ops};
+
+static ec_gf_op_t ec_gf8_mul_5E_ops[] = {
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_5E = {8,
+ {
+ 4,
+ 3,
+ 6,
+ 2,
+ 5,
+ 7,
+ 0,
+ 1,
+ },
+ ec_gf8_mul_5E_ops};
+
+static ec_gf_op_t ec_gf8_mul_5F_ops[] = {
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_5F = {8,
+ {
+ 6,
+ 1,
+ 3,
+ 4,
+ 5,
+ 7,
+ 2,
+ 0,
+ },
+ ec_gf8_mul_5F_ops};
+
+static ec_gf_op_t ec_gf8_mul_60_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_60 = {8,
+ {
+ 2,
+ 3,
+ 4,
+ 7,
+ 5,
+ 6,
+ 0,
+ 1,
+ },
+ ec_gf8_mul_60_ops};
+
+static ec_gf_op_t ec_gf8_mul_61_ops[] = {
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_61 = {8,
+ {
+ 0,
+ 5,
+ 6,
+ 7,
+ 4,
+ 2,
+ 1,
+ 3,
+ },
+ ec_gf8_mul_61_ops};
+
+static ec_gf_op_t ec_gf8_mul_62_ops[] = {
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_62 = {8,
+ {
+ 2,
+ 0,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 1,
+ },
+ ec_gf8_mul_62_ops};
+
+static ec_gf_op_t ec_gf8_mul_63_ops[] = {
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_63 = {8,
+ {
+ 3,
+ 4,
+ 6,
+ 5,
+ 7,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_63_ops};
+
+static ec_gf_op_t ec_gf8_mul_64_ops[] = {
+ {EC_GF_OP_COPY, 8, 1, 0}, {EC_GF_OP_XOR2, 8, 0, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 8, 7, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_64 = {9,
+ {
+ 2,
+ 3,
+ 4,
+ 6,
+ 5,
+ 7,
+ 8,
+ 1,
+ 0,
+ },
+ ec_gf8_mul_64_ops};
+
+static ec_gf_op_t ec_gf8_mul_65_ops[] = {
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_65 = {8,
+ {
+ 2,
+ 5,
+ 1,
+ 3,
+ 4,
+ 0,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_65_ops};
+
+static ec_gf_op_t ec_gf8_mul_66_ops[] = {
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_66 = {8,
+ {
+ 2,
+ 3,
+ 1,
+ 4,
+ 5,
+ 7,
+ 0,
+ 6,
+ },
+ ec_gf8_mul_66_ops};
+
+static ec_gf_op_t ec_gf8_mul_67_ops[] = {
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_67 = {8,
+ {
+ 2,
+ 4,
+ 5,
+ 6,
+ 7,
+ 3,
+ 1,
+ 0,
+ },
+ ec_gf8_mul_67_ops};
+
+static ec_gf_op_t ec_gf8_mul_68_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_68 = {8,
+ {
+ 5,
+ 7,
+ 2,
+ 3,
+ 0,
+ 6,
+ 4,
+ 1,
+ },
+ ec_gf8_mul_68_ops};
+
+static ec_gf_op_t ec_gf8_mul_69_ops[] = {
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_69 = {8,
+ {
+ 0,
+ 1,
+ 3,
+ 2,
+ 4,
+ 5,
+ 7,
+ 6,
+ },
+ ec_gf8_mul_69_ops};
+
+static ec_gf_op_t ec_gf8_mul_6A_ops[] = {
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_6A = {8,
+ {
+ 5,
+ 7,
+ 4,
+ 6,
+ 1,
+ 2,
+ 0,
+ 3,
+ },
+ ec_gf8_mul_6A_ops};
+
+static ec_gf_op_t ec_gf8_mul_6B_ops[] = {
+ {EC_GF_OP_COPY, 8, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_6B = {9,
+ {
+ 6,
+ 7,
+ 2,
+ 0,
+ 3,
+ 1,
+ 5,
+ 4,
+ 8,
+ },
+ ec_gf8_mul_6B_ops};
+
+static ec_gf_op_t ec_gf8_mul_6C_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_6C = {8,
+ {
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_6C_ops};
+
+static ec_gf_op_t ec_gf8_mul_6D_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR3, 8, 3, 4}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_6D = {9,
+ {
+ 3,
+ 6,
+ 7,
+ 0,
+ 4,
+ 5,
+ 1,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_6D_ops};
+
+static ec_gf_op_t ec_gf8_mul_6E_ops[] = {
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_6E = {8,
+ {
+ 5,
+ 6,
+ 3,
+ 1,
+ 7,
+ 2,
+ 0,
+ 4,
+ },
+ ec_gf8_mul_6E_ops};
+
+static ec_gf_op_t ec_gf8_mul_6F_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR3, 0, 8, 7}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_6F = {9,
+ {
+ 2,
+ 6,
+ 3,
+ 7,
+ 0,
+ 1,
+ 4,
+ 5,
+ 8,
+ },
+ ec_gf8_mul_6F_ops};
+
+static ec_gf_op_t ec_gf8_mul_70_ops[] = {
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_70 = {8,
+ {
+ 3,
+ 4,
+ 5,
+ 2,
+ 6,
+ 0,
+ 1,
+ 7,
+ },
+ ec_gf8_mul_70_ops};
+
+static ec_gf_op_t ec_gf8_mul_71_ops[] = {
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_71 = {8,
+ {
+ 4,
+ 7,
+ 5,
+ 3,
+ 6,
+ 0,
+ 2,
+ 1,
+ },
+ ec_gf8_mul_71_ops};
+
+static ec_gf_op_t ec_gf8_mul_72_ops[] = {
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_72 = {8,
+ {
+ 0,
+ 5,
+ 2,
+ 7,
+ 4,
+ 1,
+ 3,
+ 6,
+ },
+ ec_gf8_mul_72_ops};
+
+static ec_gf_op_t ec_gf8_mul_73_ops[] = {
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_73 = {8,
+ {
+ 6,
+ 0,
+ 1,
+ 7,
+ 4,
+ 5,
+ 2,
+ 3,
+ },
+ ec_gf8_mul_73_ops};
+
+static ec_gf_op_t ec_gf8_mul_74_ops[] = {
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_74 = {8,
+ {
+ 3,
+ 2,
+ 1,
+ 0,
+ 4,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_74_ops};
+
+static ec_gf_op_t ec_gf8_mul_75_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_75 = {8,
+ {
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ },
+ ec_gf8_mul_75_ops};
+
+static ec_gf_op_t ec_gf8_mul_76_ops[] = {
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR3, 8, 6, 2},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 0, 8, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_76 = {9,
+ {
+ 2,
+ 3,
+ 0,
+ 6,
+ 5,
+ 1,
+ 7,
+ 8,
+ 4,
+ },
+ ec_gf8_mul_76_ops};
+
+static ec_gf_op_t ec_gf8_mul_77_ops[] = {
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_77 = {8,
+ {
+ 7,
+ 4,
+ 3,
+ 6,
+ 0,
+ 1,
+ 5,
+ 2,
+ },
+ ec_gf8_mul_77_ops};
+
+static ec_gf_op_t ec_gf8_mul_78_ops[] = {
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR3, 8, 0, 2},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_78 = {9,
+ {
+ 4,
+ 7,
+ 3,
+ 2,
+ 5,
+ 1,
+ 6,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_78_ops};
+
+static ec_gf_op_t ec_gf8_mul_79_ops[] = {
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR3, 8, 4, 7},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_79 = {9,
+ {
+ 4,
+ 5,
+ 7,
+ 3,
+ 1,
+ 6,
+ 2,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_79_ops};
+
+static ec_gf_op_t ec_gf8_mul_7A_ops[] = {
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_7A = {8,
+ {
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ },
+ ec_gf8_mul_7A_ops};
+
+static ec_gf_op_t ec_gf8_mul_7B_ops[] = {
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR3, 8, 5, 3},
+ {EC_GF_OP_XOR2, 8, 0, 0}, {EC_GF_OP_COPY, 9, 4, 0},
+ {EC_GF_OP_XOR2, 8, 2, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR3, 4, 1, 9},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_7B = {10,
+ {
+ 1,
+ 2,
+ 3,
+ 4,
+ 8,
+ 5,
+ 6,
+ 0,
+ 7,
+ 9,
+ },
+ ec_gf8_mul_7B_ops};
+
+static ec_gf_op_t ec_gf8_mul_7C_ops[] = {
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_7C = {8,
+ {
+ 2,
+ 4,
+ 1,
+ 6,
+ 3,
+ 5,
+ 7,
+ 0,
+ },
+ ec_gf8_mul_7C_ops};
+
+static ec_gf_op_t ec_gf8_mul_7D_ops[] = {
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_7D = {8,
+ {
+ 1,
+ 0,
+ 3,
+ 5,
+ 6,
+ 7,
+ 2,
+ 4,
+ },
+ ec_gf8_mul_7D_ops};
+
+static ec_gf_op_t ec_gf8_mul_7E_ops[] = {
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_COPY, 8, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR3, 6, 2, 7},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_7E = {9,
+ {
+ 5,
+ 1,
+ 2,
+ 0,
+ 7,
+ 3,
+ 4,
+ 6,
+ 8,
+ },
+ ec_gf8_mul_7E_ops};
+
+static ec_gf_op_t ec_gf8_mul_7F_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR3, 9, 7, 5}, {EC_GF_OP_XOR2, 2, 9, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 6, 9, 0},
+ {EC_GF_OP_XOR3, 9, 6, 4}, {EC_GF_OP_XOR2, 7, 9, 0},
+ {EC_GF_OP_XOR2, 3, 9, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_7F = {10,
+ {
+ 4,
+ 1,
+ 0,
+ 5,
+ 6,
+ 7,
+ 2,
+ 3,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_7F_ops};
+
+static ec_gf_op_t ec_gf8_mul_80_ops[] = {
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_80 = {8,
+ {
+ 7,
+ 5,
+ 6,
+ 4,
+ 1,
+ 2,
+ 3,
+ 0,
+ },
+ ec_gf8_mul_80_ops};
+
+static ec_gf_op_t ec_gf8_mul_81_ops[] = {
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_81 = {8,
+ {
+ 2,
+ 7,
+ 4,
+ 1,
+ 5,
+ 6,
+ 3,
+ 0,
+ },
+ ec_gf8_mul_81_ops};
+
+static ec_gf_op_t ec_gf8_mul_82_ops[] = {
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_COPY, 8, 6, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR3, 5, 8, 7}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_82 = {9,
+ {
+ 6,
+ 2,
+ 7,
+ 5,
+ 1,
+ 3,
+ 4,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_82_ops};
+
+static ec_gf_op_t ec_gf8_mul_83_ops[] = {
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_83 = {8,
+ {
+ 3,
+ 5,
+ 6,
+ 7,
+ 1,
+ 2,
+ 4,
+ 0,
+ },
+ ec_gf8_mul_83_ops};
+
+static ec_gf_op_t ec_gf8_mul_84_ops[] = {
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_84 = {8,
+ {
+ 7,
+ 6,
+ 0,
+ 4,
+ 1,
+ 5,
+ 3,
+ 2,
+ },
+ ec_gf8_mul_84_ops};
+
+static ec_gf_op_t ec_gf8_mul_85_ops[] = {
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_85 = {8,
+ {
+ 7,
+ 6,
+ 0,
+ 3,
+ 2,
+ 4,
+ 5,
+ 1,
+ },
+ ec_gf8_mul_85_ops};
+
+static ec_gf_op_t ec_gf8_mul_86_ops[] = {
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_86 = {8,
+ {
+ 1,
+ 2,
+ 6,
+ 4,
+ 5,
+ 7,
+ 3,
+ 0,
+ },
+ ec_gf8_mul_86_ops};
+
+static ec_gf_op_t ec_gf8_mul_87_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_COPY, 8, 1, 0},
+ {EC_GF_OP_XOR2, 8, 6, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR3, 5, 8, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 8, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_87 = {9,
+ {
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 7,
+ 6,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_87_ops};
+
+static ec_gf_op_t ec_gf8_mul_88_ops[] = {
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_88 = {8,
+ {
+ 6,
+ 7,
+ 3,
+ 1,
+ 2,
+ 4,
+ 5,
+ 0,
+ },
+ ec_gf8_mul_88_ops};
+
+static ec_gf_op_t ec_gf8_mul_89_ops[] = {
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR3, 8, 5, 2},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_89 = {9,
+ {
+ 2,
+ 1,
+ 6,
+ 5,
+ 7,
+ 3,
+ 4,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_89_ops};
+
+static ec_gf_op_t ec_gf8_mul_8A_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_8A = {8,
+ {
+ 1,
+ 2,
+ 3,
+ 0,
+ 6,
+ 7,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_8A_ops};
+
+static ec_gf_op_t ec_gf8_mul_8B_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_8B = {8,
+ {
+ 6,
+ 1,
+ 2,
+ 3,
+ 5,
+ 7,
+ 4,
+ 0,
+ },
+ ec_gf8_mul_8B_ops};
+
+static ec_gf_op_t ec_gf8_mul_8C_ops[] = {
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_8C = {8,
+ {
+ 1,
+ 2,
+ 0,
+ 7,
+ 3,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_8C_ops};
+
+static ec_gf_op_t ec_gf8_mul_8D_ops[] = {
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_8D = {8,
+ {
+ 7,
+ 1,
+ 3,
+ 2,
+ 4,
+ 5,
+ 0,
+ 6,
+ },
+ ec_gf8_mul_8D_ops};
+
+static ec_gf_op_t ec_gf8_mul_8E_ops[] = {{EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_8E = {8,
+ {
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ },
+ ec_gf8_mul_8E_ops};
+
+static ec_gf_op_t ec_gf8_mul_8F_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_8F = {8,
+ {
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ },
+ ec_gf8_mul_8F_ops};
+
+static ec_gf_op_t ec_gf8_mul_90_ops[] = {
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_90 = {8,
+ {
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 3,
+ 2,
+ },
+ ec_gf8_mul_90_ops};
+
+static ec_gf_op_t ec_gf8_mul_91_ops[] = {
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_COPY, 9, 1, 0}, {EC_GF_OP_COPY, 8, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 7, 9, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR3, 5, 8, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_91 = {10,
+ {
+ 2,
+ 3,
+ 1,
+ 4,
+ 0,
+ 6,
+ 7,
+ 5,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_91_ops};
+
+static ec_gf_op_t ec_gf8_mul_92_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_92 = {8,
+ {
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 5,
+ 4,
+ },
+ ec_gf8_mul_92_ops};
+
+static ec_gf_op_t ec_gf8_mul_93_ops[] = {
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_93 = {8,
+ {
+ 6,
+ 4,
+ 5,
+ 1,
+ 7,
+ 2,
+ 3,
+ 0,
+ },
+ ec_gf8_mul_93_ops};
+
+static ec_gf_op_t ec_gf8_mul_94_ops[] = {
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_94 = {8,
+ {
+ 7,
+ 5,
+ 0,
+ 2,
+ 6,
+ 1,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_94_ops};
+
+static ec_gf_op_t ec_gf8_mul_95_ops[] = {
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_95 = {8,
+ {
+ 7,
+ 6,
+ 1,
+ 3,
+ 0,
+ 4,
+ 5,
+ 2,
+ },
+ ec_gf8_mul_95_ops};
+
+static ec_gf_op_t ec_gf8_mul_96_ops[] = {
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR3, 8, 0, 4}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 8, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_96 = {9,
+ {
+ 4,
+ 0,
+ 1,
+ 6,
+ 7,
+ 2,
+ 3,
+ 5,
+ 8,
+ },
+ ec_gf8_mul_96_ops};
+
+static ec_gf_op_t ec_gf8_mul_97_ops[] = {
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_COPY, 8, 2, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 8, 6, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_97 = {9,
+ {
+ 4,
+ 5,
+ 3,
+ 6,
+ 7,
+ 1,
+ 2,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_97_ops};
+
+static ec_gf_op_t ec_gf8_mul_98_ops[] = {
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_98 = {8,
+ {
+ 4,
+ 2,
+ 3,
+ 6,
+ 7,
+ 5,
+ 1,
+ 0,
+ },
+ ec_gf8_mul_98_ops};
+
+static ec_gf_op_t ec_gf8_mul_99_ops[] = {
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_99 = {8,
+ {
+ 6,
+ 5,
+ 3,
+ 7,
+ 0,
+ 1,
+ 4,
+ 2,
+ },
+ ec_gf8_mul_99_ops};
+
+static ec_gf_op_t ec_gf8_mul_9A_ops[] = {
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR3, 8, 4, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_9A = {9,
+ {
+ 6,
+ 3,
+ 4,
+ 0,
+ 5,
+ 1,
+ 2,
+ 7,
+ 8,
+ },
+ ec_gf8_mul_9A_ops};
+
+static ec_gf_op_t ec_gf8_mul_9B_ops[] = {
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_COPY, 9, 5, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR3, 8, 3, 2}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 3, 9, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_9B = {10,
+ {
+ 4,
+ 5,
+ 8,
+ 6,
+ 7,
+ 1,
+ 2,
+ 0,
+ 3,
+ 9,
+ },
+ ec_gf8_mul_9B_ops};
+
+static ec_gf_op_t ec_gf8_mul_9C_ops[] = {
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_9C = {8,
+ {
+ 3,
+ 2,
+ 1,
+ 0,
+ 4,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_9C_ops};
+
+static ec_gf_op_t ec_gf8_mul_9D_ops[] = {
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_9D = {8,
+ {
+ 0,
+ 1,
+ 2,
+ 3,
+ 7,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_9D_ops};
+
+static ec_gf_op_t ec_gf8_mul_9E_ops[] = {
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_COPY, 8, 7, 0},
+ {EC_GF_OP_XOR2, 8, 5, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_9E = {9,
+ {
+ 4,
+ 5,
+ 3,
+ 8,
+ 6,
+ 0,
+ 2,
+ 7,
+ 1,
+ },
+ ec_gf8_mul_9E_ops};
+
+static ec_gf_op_t ec_gf8_mul_9F_ops[] = {
+ {EC_GF_OP_XOR3, 8, 1, 2}, {EC_GF_OP_XOR2, 8, 3, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_9F = {9,
+ {
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_9F_ops};
+
+static ec_gf_op_t ec_gf8_mul_A0_ops[] = {
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A0 = {8,
+ {
+ 3,
+ 1,
+ 6,
+ 7,
+ 5,
+ 2,
+ 4,
+ 0,
+ },
+ ec_gf8_mul_A0_ops};
+
+static ec_gf_op_t ec_gf8_mul_A1_ops[] = {
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR3, 8, 0, 6},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A1 = {9,
+ {
+ 7,
+ 4,
+ 1,
+ 5,
+ 6,
+ 0,
+ 2,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_A1_ops};
+
+static ec_gf_op_t ec_gf8_mul_A2_ops[] = {
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A2 = {8,
+ {
+ 7,
+ 0,
+ 6,
+ 3,
+ 2,
+ 1,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_A2_ops};
+
+static ec_gf_op_t ec_gf8_mul_A3_ops[] = {
+ {EC_GF_OP_COPY, 8, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 3, 8, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A3 = {9,
+ {
+ 3,
+ 7,
+ 2,
+ 6,
+ 1,
+ 4,
+ 0,
+ 5,
+ 8,
+ },
+ ec_gf8_mul_A3_ops};
+
+static ec_gf_op_t ec_gf8_mul_A4_ops[] = {
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A4 = {8,
+ {
+ 5,
+ 6,
+ 7,
+ 2,
+ 4,
+ 3,
+ 0,
+ 1,
+ },
+ ec_gf8_mul_A4_ops};
+
+static ec_gf_op_t ec_gf8_mul_A5_ops[] = {
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR3, 8, 5, 6}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A5 = {9,
+ {
+ 1,
+ 4,
+ 2,
+ 5,
+ 6,
+ 7,
+ 3,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_A5_ops};
+
+static ec_gf_op_t ec_gf8_mul_A6_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A6 = {8,
+ {
+ 1,
+ 2,
+ 0,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_A6_ops};
+
+static ec_gf_op_t ec_gf8_mul_A7_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A7 = {8,
+ {
+ 0,
+ 1,
+ 2,
+ 5,
+ 6,
+ 7,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_A7_ops};
+
+static ec_gf_op_t ec_gf8_mul_A8_ops[] = {
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 8, 1, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_COPY, 9, 4, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 8, 3, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 9, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A8 = {10,
+ {
+ 1,
+ 7,
+ 5,
+ 8,
+ 6,
+ 3,
+ 4,
+ 0,
+ 2,
+ 9,
+ },
+ ec_gf8_mul_A8_ops};
+
+static ec_gf_op_t ec_gf8_mul_A9_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_A9 = {8,
+ {
+ 3,
+ 7,
+ 6,
+ 1,
+ 2,
+ 0,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_A9_ops};
+
+static ec_gf_op_t ec_gf8_mul_AA_ops[] = {
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_AA = {8,
+ {
+ 0,
+ 4,
+ 5,
+ 3,
+ 6,
+ 7,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_AA_ops};
+
+static ec_gf_op_t ec_gf8_mul_AB_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_COPY, 9, 6, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 8, 7, 0}, {EC_GF_OP_XOR2, 3, 8, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR3, 3, 9, 7},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_AB = {10,
+ {
+ 2,
+ 3,
+ 8,
+ 0,
+ 5,
+ 6,
+ 1,
+ 4,
+ 7,
+ 9,
+ },
+ ec_gf8_mul_AB_ops};
+
+static ec_gf_op_t ec_gf8_mul_AC_ops[] = {
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_AC = {8,
+ {
+ 3,
+ 2,
+ 1,
+ 0,
+ 4,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_AC_ops};
+
+static ec_gf_op_t ec_gf8_mul_AD_ops[] = {
+ {EC_GF_OP_XOR3, 8, 1, 2}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 4, 8, 0},
+ {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_AD = {9,
+ {
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_AD_ops};
+
+static ec_gf_op_t ec_gf8_mul_AE_ops[] = {
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_COPY, 8, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_AE = {9,
+ {
+ 7,
+ 0,
+ 5,
+ 6,
+ 3,
+ 4,
+ 1,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_AE_ops};
+
+static ec_gf_op_t ec_gf8_mul_AF_ops[] = {
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_AF = {8,
+ {
+ 0,
+ 1,
+ 2,
+ 7,
+ 3,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_AF_ops};
+
+static ec_gf_op_t ec_gf8_mul_B0_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B0 = {8,
+ {
+ 4,
+ 0,
+ 7,
+ 2,
+ 3,
+ 1,
+ 6,
+ 5,
+ },
+ ec_gf8_mul_B0_ops};
+
+static ec_gf_op_t ec_gf8_mul_B1_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_COPY, 8, 4, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR3, 5, 8, 1}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B1 = {9,
+ {
+ 2,
+ 6,
+ 4,
+ 7,
+ 0,
+ 1,
+ 3,
+ 5,
+ 8,
+ },
+ ec_gf8_mul_B1_ops};
+
+static ec_gf_op_t ec_gf8_mul_B2_ops[] = {
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR3, 8, 4, 5},
+ {EC_GF_OP_XOR2, 2, 8, 0}, {EC_GF_OP_XOR2, 8, 1, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR2, 3, 8, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B2 = {9,
+ {
+ 0,
+ 7,
+ 4,
+ 5,
+ 6,
+ 1,
+ 2,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_B2_ops};
+
+static ec_gf_op_t ec_gf8_mul_B3_ops[] = {
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_COPY, 9, 5, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR3, 8, 6, 4},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 8, 5, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR3, 1, 9, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B3 = {10,
+ {
+ 2,
+ 3,
+ 4,
+ 5,
+ 1,
+ 6,
+ 0,
+ 7,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_B3_ops};
+
+static ec_gf_op_t ec_gf8_mul_B4_ops[] = {
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B4 = {8,
+ {
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_B4_ops};
+
+static ec_gf_op_t ec_gf8_mul_B5_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_COPY, 8, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR3, 4, 8, 3}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B5 = {9,
+ {
+ 3,
+ 4,
+ 0,
+ 7,
+ 1,
+ 5,
+ 6,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_B5_ops};
+
+static ec_gf_op_t ec_gf8_mul_B6_ops[] = {
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B6 = {8,
+ {
+ 5,
+ 3,
+ 6,
+ 4,
+ 7,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_B6_ops};
+
+static ec_gf_op_t ec_gf8_mul_B7_ops[] = {
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B7 = {8,
+ {
+ 5,
+ 0,
+ 1,
+ 4,
+ 2,
+ 6,
+ 7,
+ 3,
+ },
+ ec_gf8_mul_B7_ops};
+
+static ec_gf_op_t ec_gf8_mul_B8_ops[] = {
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B8 = {8,
+ {
+ 6,
+ 4,
+ 5,
+ 1,
+ 2,
+ 0,
+ 7,
+ 3,
+ },
+ ec_gf8_mul_B8_ops};
+
+static ec_gf_op_t ec_gf8_mul_B9_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR3, 0, 8, 2}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_B9 = {9,
+ {
+ 6,
+ 7,
+ 0,
+ 2,
+ 1,
+ 4,
+ 5,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_B9_ops};
+
+static ec_gf_op_t ec_gf8_mul_BA_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_BA = {8,
+ {
+ 1,
+ 2,
+ 4,
+ 3,
+ 5,
+ 6,
+ 0,
+ 7,
+ },
+ ec_gf8_mul_BA_ops};
+
+static ec_gf_op_t ec_gf8_mul_BB_ops[] = {
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_COPY, 8, 3, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 8, 5, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 8, 7, 0}, {EC_GF_OP_XOR2, 2, 8, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_BB = {9,
+ {
+ 7,
+ 2,
+ 1,
+ 8,
+ 3,
+ 5,
+ 6,
+ 4,
+ 0,
+ },
+ ec_gf8_mul_BB_ops};
+
+static ec_gf_op_t ec_gf8_mul_BC_ops[] = {
+ {EC_GF_OP_COPY, 8, 1, 0}, {EC_GF_OP_XOR2, 8, 2, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR3, 2, 8, 4},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_BC = {9,
+ {
+ 2,
+ 6,
+ 3,
+ 4,
+ 5,
+ 1,
+ 7,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_BC_ops};
+
+static ec_gf_op_t ec_gf8_mul_BD_ops[] = {
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_BD = {8,
+ {
+ 4,
+ 5,
+ 0,
+ 2,
+ 7,
+ 1,
+ 6,
+ 3,
+ },
+ ec_gf8_mul_BD_ops};
+
+static ec_gf_op_t ec_gf8_mul_BE_ops[] = {
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_BE = {8,
+ {
+ 0,
+ 6,
+ 7,
+ 4,
+ 5,
+ 1,
+ 3,
+ 2,
+ },
+ ec_gf8_mul_BE_ops};
+
+static ec_gf_op_t ec_gf8_mul_BF_ops[] = {
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_BF = {8,
+ {
+ 5,
+ 6,
+ 1,
+ 7,
+ 3,
+ 0,
+ 2,
+ 4,
+ },
+ ec_gf8_mul_BF_ops};
+
+static ec_gf_op_t ec_gf8_mul_C0_ops[] = {
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C0 = {8,
+ {
+ 1,
+ 2,
+ 3,
+ 4,
+ 7,
+ 5,
+ 6,
+ 0,
+ },
+ ec_gf8_mul_C0_ops};
+
+static ec_gf_op_t ec_gf8_mul_C1_ops[] = {
+ {EC_GF_OP_XOR3, 8, 1, 2}, {EC_GF_OP_XOR2, 8, 3, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 6, 8, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C1 = {9,
+ {
+ 5,
+ 6,
+ 7,
+ 4,
+ 1,
+ 2,
+ 3,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_C1_ops};
+
+static ec_gf_op_t ec_gf8_mul_C2_ops[] = {
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C2 = {8,
+ {
+ 7,
+ 6,
+ 3,
+ 0,
+ 1,
+ 4,
+ 5,
+ 2,
+ },
+ ec_gf8_mul_C2_ops};
+
+static ec_gf_op_t ec_gf8_mul_C3_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR3, 0, 2, 6}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR3, 9, 1, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 7, 9, 0},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C3 = {10,
+ {
+ 5,
+ 6,
+ 4,
+ 7,
+ 1,
+ 2,
+ 3,
+ 0,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_C3_ops};
+
+static ec_gf_op_t ec_gf8_mul_C4_ops[] = {
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 1, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C4 = {8,
+ {
+ 0,
+ 2,
+ 1,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_C4_ops};
+
+static ec_gf_op_t ec_gf8_mul_C5_ops[] = {
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C5 = {8,
+ {
+ 4,
+ 3,
+ 5,
+ 7,
+ 6,
+ 2,
+ 0,
+ 1,
+ },
+ ec_gf8_mul_C5_ops};
+
+static ec_gf_op_t ec_gf8_mul_C6_ops[] = {
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_COPY, 8, 4, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR3, 9, 5, 4},
+ {EC_GF_OP_XOR2, 6, 9, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 7, 9, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 6, 8, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C6 = {10,
+ {
+ 6,
+ 3,
+ 0,
+ 4,
+ 5,
+ 7,
+ 2,
+ 1,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_C6_ops};
+
+static ec_gf_op_t ec_gf8_mul_C7_ops[] = {
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C7 = {8,
+ {
+ 7,
+ 0,
+ 6,
+ 2,
+ 5,
+ 3,
+ 4,
+ 1,
+ },
+ ec_gf8_mul_C7_ops};
+
+static ec_gf_op_t ec_gf8_mul_C8_ops[] = {
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C8 = {8,
+ {
+ 1,
+ 3,
+ 2,
+ 4,
+ 6,
+ 7,
+ 5,
+ 0,
+ },
+ ec_gf8_mul_C8_ops};
+
+static ec_gf_op_t ec_gf8_mul_C9_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_C9 = {8,
+ {
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ },
+ ec_gf8_mul_C9_ops};
+
+static ec_gf_op_t ec_gf8_mul_CA_ops[] = {
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 4, 5, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_CA = {8,
+ {
+ 1,
+ 2,
+ 5,
+ 7,
+ 3,
+ 4,
+ 0,
+ 6,
+ },
+ ec_gf8_mul_CA_ops};
+
+static ec_gf_op_t ec_gf8_mul_CB_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_CB = {8,
+ {
+ 2,
+ 3,
+ 4,
+ 5,
+ 7,
+ 6,
+ 0,
+ 1,
+ },
+ ec_gf8_mul_CB_ops};
+
+static ec_gf_op_t ec_gf8_mul_CC_ops[] = {
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_CC = {8,
+ {
+ 2,
+ 7,
+ 1,
+ 0,
+ 5,
+ 6,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_CC_ops};
+
+static ec_gf_op_t ec_gf8_mul_CD_ops[] = {
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_CD = {8,
+ {
+ 0,
+ 6,
+ 1,
+ 2,
+ 7,
+ 3,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_CD_ops};
+
+static ec_gf_op_t ec_gf8_mul_CE_ops[] = {
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_COPY, 8, 7, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR3, 3, 6, 8},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR3, 8, 2, 3},
+ {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 4, 8, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_CE = {9,
+ {
+ 5,
+ 7,
+ 3,
+ 0,
+ 2,
+ 6,
+ 4,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_CE_ops};
+
+static ec_gf_op_t ec_gf8_mul_CF_ops[] = {
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_CF = {8,
+ {
+ 3,
+ 6,
+ 7,
+ 0,
+ 2,
+ 4,
+ 5,
+ 1,
+ },
+ ec_gf8_mul_CF_ops};
+
+static ec_gf_op_t ec_gf8_mul_D0_ops[] = {
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D0 = {8,
+ {
+ 5,
+ 6,
+ 7,
+ 2,
+ 0,
+ 3,
+ 1,
+ 4,
+ },
+ ec_gf8_mul_D0_ops};
+
+static ec_gf_op_t ec_gf8_mul_D1_ops[] = {
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR3, 8, 6, 0},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D1 = {9,
+ {
+ 5,
+ 6,
+ 3,
+ 2,
+ 0,
+ 7,
+ 4,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_D1_ops};
+
+static ec_gf_op_t ec_gf8_mul_D2_ops[] = {
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D2 = {8,
+ {
+ 7,
+ 0,
+ 2,
+ 1,
+ 3,
+ 4,
+ 6,
+ 5,
+ },
+ ec_gf8_mul_D2_ops};
+
+static ec_gf_op_t ec_gf8_mul_D3_ops[] = {
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_COPY, 8, 4, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 8, 6, 0}, {EC_GF_OP_XOR2, 3, 8, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 1, 3, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D3 = {9,
+ {
+ 0,
+ 3,
+ 2,
+ 8,
+ 4,
+ 6,
+ 7,
+ 1,
+ 5,
+ },
+ ec_gf8_mul_D3_ops};
+
+static ec_gf_op_t ec_gf8_mul_D4_ops[] = {
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_COPY, 8, 1, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR3, 1, 7, 8},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D4 = {9,
+ {
+ 4,
+ 1,
+ 7,
+ 5,
+ 0,
+ 6,
+ 3,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_D4_ops};
+
+static ec_gf_op_t ec_gf8_mul_D5_ops[] = {
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D5 = {8,
+ {
+ 6,
+ 7,
+ 4,
+ 5,
+ 2,
+ 3,
+ 1,
+ 0,
+ },
+ ec_gf8_mul_D5_ops};
+
+static ec_gf_op_t ec_gf8_mul_D6_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D6 = {9,
+ {
+ 0,
+ 6,
+ 2,
+ 7,
+ 1,
+ 3,
+ 4,
+ 5,
+ 8,
+ },
+ ec_gf8_mul_D6_ops};
+
+static ec_gf_op_t ec_gf8_mul_D7_ops[] = {
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR3, 8, 3, 5}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR3, 6, 7, 8}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D7 = {9,
+ {
+ 3,
+ 4,
+ 6,
+ 5,
+ 0,
+ 7,
+ 1,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_D7_ops};
+
+static ec_gf_op_t ec_gf8_mul_D8_ops[] = {
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D8 = {8,
+ {
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ },
+ ec_gf8_mul_D8_ops};
+
+static ec_gf_op_t ec_gf8_mul_D9_ops[] = {
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 7, 0, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_D9 = {8,
+ {
+ 1,
+ 2,
+ 6,
+ 7,
+ 4,
+ 5,
+ 0,
+ 3,
+ },
+ ec_gf8_mul_D9_ops};
+
+static ec_gf_op_t ec_gf8_mul_DA_ops[] = {
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR3, 8, 2, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 2, 4, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_DA = {9,
+ {
+ 2,
+ 5,
+ 7,
+ 1,
+ 0,
+ 4,
+ 3,
+ 6,
+ 8,
+ },
+ ec_gf8_mul_DA_ops};
+
+static ec_gf_op_t ec_gf8_mul_DB_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 8, 4, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 6, 3, 0}, {EC_GF_OP_XOR2, 3, 8, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_DB = {9,
+ {
+ 7,
+ 5,
+ 6,
+ 2,
+ 3,
+ 4,
+ 1,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_DB_ops};
+
+static ec_gf_op_t ec_gf8_mul_DC_ops[] = {
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_DC = {8,
+ {
+ 4,
+ 5,
+ 2,
+ 6,
+ 7,
+ 1,
+ 0,
+ 3,
+ },
+ ec_gf8_mul_DC_ops};
+
+static ec_gf_op_t ec_gf8_mul_DD_ops[] = {
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_DD = {8,
+ {
+ 1,
+ 2,
+ 3,
+ 6,
+ 7,
+ 0,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_DD_ops};
+
+static ec_gf_op_t ec_gf8_mul_DE_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_DE = {8,
+ {
+ 0,
+ 5,
+ 2,
+ 6,
+ 7,
+ 1,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_DE_ops};
+
+static ec_gf_op_t ec_gf8_mul_DF_ops[] = {
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 8, 3, 0},
+ {EC_GF_OP_COPY, 9, 0, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 8, 7, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR3, 1, 9, 2}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_DF = {10,
+ {
+ 7,
+ 2,
+ 8,
+ 4,
+ 3,
+ 1,
+ 0,
+ 6,
+ 5,
+ 9,
+ },
+ ec_gf8_mul_DF_ops};
+
+static ec_gf_op_t ec_gf8_mul_E0_ops[] = {
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 4, 1, 0}, {EC_GF_OP_XOR2, 7, 1, 0},
+ {EC_GF_OP_XOR2, 5, 7, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E0 = {8,
+ {
+ 2,
+ 3,
+ 4,
+ 7,
+ 5,
+ 6,
+ 0,
+ 1,
+ },
+ ec_gf8_mul_E0_ops};
+
+static ec_gf_op_t ec_gf8_mul_E1_ops[] = {
+ {EC_GF_OP_COPY, 8, 1, 0}, {EC_GF_OP_XOR2, 8, 7, 0},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR3, 9, 5, 3},
+ {EC_GF_OP_XOR2, 0, 9, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 4, 9, 0}, {EC_GF_OP_XOR2, 0, 2, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 2, 8, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E1 = {10,
+ {
+ 0,
+ 7,
+ 1,
+ 3,
+ 4,
+ 5,
+ 6,
+ 2,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_E1_ops};
+
+static ec_gf_op_t ec_gf8_mul_E2_ops[] = {
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E2 = {8,
+ {
+ 2,
+ 3,
+ 7,
+ 1,
+ 5,
+ 6,
+ 0,
+ 4,
+ },
+ ec_gf8_mul_E2_ops};
+
+static ec_gf_op_t ec_gf8_mul_E3_ops[] = {
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 3, 1, 0},
+ {EC_GF_OP_XOR3, 8, 2, 7}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 0, 1, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_XOR2, 0, 8, 0}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR3, 6, 8, 4},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E3 = {9,
+ {
+ 5,
+ 4,
+ 7,
+ 2,
+ 1,
+ 3,
+ 6,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_E3_ops};
+
+static ec_gf_op_t ec_gf8_mul_E4_ops[] = {
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 4, 5, 0},
+ {EC_GF_OP_XOR2, 3, 4, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E4 = {8,
+ {
+ 7,
+ 0,
+ 1,
+ 6,
+ 3,
+ 4,
+ 2,
+ 5,
+ },
+ ec_gf8_mul_E4_ops};
+
+static ec_gf_op_t ec_gf8_mul_E5_ops[] = {
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_COPY, 8, 0, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E5 = {9,
+ {
+ 4,
+ 5,
+ 3,
+ 6,
+ 7,
+ 1,
+ 0,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_E5_ops};
+
+static ec_gf_op_t ec_gf8_mul_E6_ops[] = {
+ {EC_GF_OP_XOR2, 6, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 1, 4, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E6 = {8,
+ {
+ 5,
+ 4,
+ 3,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_E6_ops};
+
+static ec_gf_op_t ec_gf8_mul_E7_ops[] = {
+ {EC_GF_OP_COPY, 8, 6, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR3, 9, 0, 6}, {EC_GF_OP_XOR2, 4, 9, 0},
+ {EC_GF_OP_XOR2, 5, 9, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E7 = {10,
+ {
+ 1,
+ 4,
+ 3,
+ 6,
+ 7,
+ 5,
+ 2,
+ 0,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_E7_ops};
+
+static ec_gf_op_t ec_gf8_mul_E8_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 2, 5, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 1, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E8 = {8,
+ {
+ 1,
+ 4,
+ 2,
+ 7,
+ 3,
+ 0,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_E8_ops};
+
+static ec_gf_op_t ec_gf8_mul_E9_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_COPY, 8, 1, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 6, 3, 0},
+ {EC_GF_OP_XOR2, 4, 6, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR3, 1, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_E9 = {9,
+ {
+ 6,
+ 2,
+ 0,
+ 3,
+ 4,
+ 1,
+ 5,
+ 7,
+ 8,
+ },
+ ec_gf8_mul_E9_ops};
+
+static ec_gf_op_t ec_gf8_mul_EA_ops[] = {
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_EA = {8,
+ {
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_EA_ops};
+
+static ec_gf_op_t ec_gf8_mul_EB_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_EB = {8,
+ {
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ },
+ ec_gf8_mul_EB_ops};
+
+static ec_gf_op_t ec_gf8_mul_EC_ops[] = {
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR3, 8, 4, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_XOR2, 7, 3, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_EC = {9,
+ {
+ 7,
+ 4,
+ 3,
+ 0,
+ 2,
+ 5,
+ 1,
+ 6,
+ 8,
+ },
+ ec_gf8_mul_EC_ops};
+
+static ec_gf_op_t ec_gf8_mul_ED_ops[] = {
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 4, 0, 0},
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 3, 6, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 5, 2, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_ED = {8,
+ {
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 4,
+ 3,
+ 2,
+ },
+ ec_gf8_mul_ED_ops};
+
+static ec_gf_op_t ec_gf8_mul_EE_ops[] = {
+ {EC_GF_OP_XOR2, 5, 3, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR3, 8, 2, 3},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_XOR2, 4, 8, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 8, 5, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 1, 8, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_XOR2, 6, 0, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_EE = {9,
+ {
+ 6,
+ 4,
+ 5,
+ 7,
+ 2,
+ 3,
+ 0,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_EE_ops};
+
+static ec_gf_op_t ec_gf8_mul_EF_ops[] = {
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_COPY, 8, 0, 0},
+ {EC_GF_OP_XOR2, 8, 2, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 6, 8, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 7, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_EF = {9,
+ {
+ 6,
+ 4,
+ 5,
+ 7,
+ 2,
+ 0,
+ 3,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_EF_ops};
+
+static ec_gf_op_t ec_gf8_mul_F0_ops[] = {
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR3, 8, 3, 6},
+ {EC_GF_OP_XOR2, 5, 8, 0}, {EC_GF_OP_XOR2, 8, 4, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 7, 8, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 1, 8, 0},
+ {EC_GF_OP_XOR2, 0, 2, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F0 = {9,
+ {
+ 3,
+ 4,
+ 6,
+ 1,
+ 2,
+ 0,
+ 5,
+ 7,
+ 8,
+ },
+ ec_gf8_mul_F0_ops};
+
+static ec_gf_op_t ec_gf8_mul_F1_ops[] = {
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_COPY, 8, 3, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 2, 3, 0}, {EC_GF_OP_COPY, 9, 2, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 9, 0, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 5, 2, 0},
+ {EC_GF_OP_XOR2, 7, 9, 0}, {EC_GF_OP_XOR2, 4, 9, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR3, 9, 8, 7},
+ {EC_GF_OP_XOR2, 1, 9, 0}, {EC_GF_OP_XOR2, 5, 9, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F1 = {10,
+ {
+ 7,
+ 2,
+ 6,
+ 3,
+ 5,
+ 1,
+ 4,
+ 0,
+ 8,
+ 9,
+ },
+ ec_gf8_mul_F1_ops};
+
+static ec_gf_op_t ec_gf8_mul_F2_ops[] = {
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 7, 2, 0},
+ {EC_GF_OP_XOR2, 0, 6, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 2, 3, 0},
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_XOR3, 8, 6, 4},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 3, 8, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F2 = {9,
+ {
+ 1,
+ 0,
+ 6,
+ 7,
+ 4,
+ 5,
+ 2,
+ 3,
+ 8,
+ },
+ ec_gf8_mul_F2_ops};
+
+static ec_gf_op_t ec_gf8_mul_F3_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 6, 5, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F3 = {8,
+ {
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ },
+ ec_gf8_mul_F3_ops};
+
+static ec_gf_op_t ec_gf8_mul_F4_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 1, 7, 0}, {EC_GF_OP_XOR2, 3, 7, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F4 = {8,
+ {
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ 7,
+ },
+ ec_gf8_mul_F4_ops};
+
+static ec_gf_op_t ec_gf8_mul_F5_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F5 = {8,
+ {
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ 6,
+ },
+ ec_gf8_mul_F5_ops};
+
+static ec_gf_op_t ec_gf8_mul_F6_ops[] = {
+ {EC_GF_OP_XOR2, 3, 1, 0}, {EC_GF_OP_COPY, 8, 3, 0},
+ {EC_GF_OP_XOR2, 3, 5, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_COPY, 9, 3, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 2, 7, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 9, 4, 0}, {EC_GF_OP_XOR2, 4, 1, 0},
+ {EC_GF_OP_XOR2, 6, 9, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 5, 7, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR3, 7, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F6 = {10,
+ {
+ 0,
+ 6,
+ 2,
+ 7,
+ 4,
+ 3,
+ 5,
+ 9,
+ 1,
+ 8,
+ },
+ ec_gf8_mul_F6_ops};
+
+static ec_gf_op_t ec_gf8_mul_F7_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 3, 2, 0}, {EC_GF_OP_XOR2, 4, 3, 0},
+ {EC_GF_OP_XOR2, 5, 4, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F7 = {8,
+ {
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ 4,
+ 5,
+ },
+ ec_gf8_mul_F7_ops};
+
+static ec_gf_op_t ec_gf8_mul_F8_ops[] = {
+ {EC_GF_OP_XOR2, 4, 0, 0}, {EC_GF_OP_XOR2, 3, 5, 0},
+ {EC_GF_OP_XOR2, 6, 4, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 1, 6, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 5, 1, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 6, 7, 0},
+ {EC_GF_OP_XOR2, 0, 3, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F8 = {8,
+ {
+ 6,
+ 2,
+ 0,
+ 1,
+ 4,
+ 5,
+ 3,
+ 7,
+ },
+ ec_gf8_mul_F8_ops};
+
+static ec_gf_op_t ec_gf8_mul_F9_ops[] = {
+ {EC_GF_OP_XOR2, 1, 5, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 0, 5, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 6, 4, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR3, 8, 7, 1}, {EC_GF_OP_XOR2, 1, 3, 0},
+ {EC_GF_OP_XOR2, 4, 8, 0}, {EC_GF_OP_XOR2, 5, 8, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_F9 = {9,
+ {
+ 4,
+ 1,
+ 7,
+ 6,
+ 0,
+ 3,
+ 5,
+ 2,
+ 8,
+ },
+ ec_gf8_mul_F9_ops};
+
+static ec_gf_op_t ec_gf8_mul_FA_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 2, 1, 0}, {EC_GF_OP_XOR2, 0, 7, 0},
+ {EC_GF_OP_XOR2, 7, 2, 0}, {EC_GF_OP_XOR2, 1, 5, 0},
+ {EC_GF_OP_XOR2, 3, 7, 0}, {EC_GF_OP_XOR2, 5, 0, 0},
+ {EC_GF_OP_XOR2, 7, 6, 0}, {EC_GF_OP_XOR2, 0, 3, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_FA = {8,
+ {
+ 0,
+ 1,
+ 2,
+ 4,
+ 5,
+ 6,
+ 7,
+ 3,
+ },
+ ec_gf8_mul_FA_ops};
+
+static ec_gf_op_t ec_gf8_mul_FB_ops[] = {
+ {EC_GF_OP_XOR2, 1, 0, 0}, {EC_GF_OP_XOR2, 2, 1, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 3, 2, 0},
+ {EC_GF_OP_XOR2, 0, 7, 0}, {EC_GF_OP_XOR2, 2, 7, 0},
+ {EC_GF_OP_XOR2, 1, 6, 0}, {EC_GF_OP_XOR2, 7, 6, 0},
+ {EC_GF_OP_XOR2, 4, 3, 0}, {EC_GF_OP_XOR2, 6, 5, 0},
+ {EC_GF_OP_XOR2, 7, 4, 0}, {EC_GF_OP_XOR2, 5, 4, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_FB = {8,
+ {
+ 4,
+ 5,
+ 6,
+ 7,
+ 0,
+ 1,
+ 2,
+ 3,
+ },
+ ec_gf8_mul_FB_ops};
+
+static ec_gf_op_t ec_gf8_mul_FC_ops[] = {
+ {EC_GF_OP_XOR2, 7, 0, 0}, {EC_GF_OP_XOR2, 7, 4, 0},
+ {EC_GF_OP_XOR2, 5, 1, 0}, {EC_GF_OP_COPY, 9, 3, 0},
+ {EC_GF_OP_XOR3, 8, 5, 7}, {EC_GF_OP_XOR2, 3, 6, 0},
+ {EC_GF_OP_XOR2, 8, 3, 0}, {EC_GF_OP_XOR2, 2, 8, 0},
+ {EC_GF_OP_XOR2, 1, 2, 0}, {EC_GF_OP_XOR2, 4, 2, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 3, 4, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 6, 0, 0},
+ {EC_GF_OP_XOR3, 0, 9, 2}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_FC = {10,
+ {
+ 5,
+ 6,
+ 3,
+ 7,
+ 1,
+ 8,
+ 0,
+ 4,
+ 2,
+ 9,
+ },
+ ec_gf8_mul_FC_ops};
+
+static ec_gf_op_t ec_gf8_mul_FD_ops[] = {
+ {EC_GF_OP_XOR2, 7, 1, 0}, {EC_GF_OP_COPY, 8, 7, 0},
+ {EC_GF_OP_XOR2, 5, 0, 0}, {EC_GF_OP_XOR2, 7, 5, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 4, 7, 0},
+ {EC_GF_OP_XOR2, 5, 6, 0}, {EC_GF_OP_XOR2, 0, 4, 0},
+ {EC_GF_OP_XOR2, 3, 0, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 2, 5, 0}, {EC_GF_OP_XOR2, 1, 2, 0},
+ {EC_GF_OP_XOR2, 0, 1, 0}, {EC_GF_OP_XOR2, 6, 1, 0},
+ {EC_GF_OP_XOR3, 1, 8, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_FD = {9,
+ {
+ 5,
+ 3,
+ 7,
+ 6,
+ 1,
+ 2,
+ 4,
+ 0,
+ 8,
+ },
+ ec_gf8_mul_FD_ops};
+
+static ec_gf_op_t ec_gf8_mul_FE_ops[] = {
+ {EC_GF_OP_XOR2, 2, 0, 0}, {EC_GF_OP_COPY, 8, 2, 0},
+ {EC_GF_OP_XOR2, 2, 4, 0}, {EC_GF_OP_XOR2, 6, 2, 0},
+ {EC_GF_OP_XOR2, 8, 5, 0}, {EC_GF_OP_XOR2, 5, 6, 0},
+ {EC_GF_OP_XOR2, 6, 1, 0}, {EC_GF_OP_XOR2, 0, 6, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 7, 8, 0}, {EC_GF_OP_XOR2, 3, 0, 0},
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR2, 0, 4, 0}, {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_FE = {9,
+ {
+ 3,
+ 4,
+ 8,
+ 2,
+ 5,
+ 0,
+ 6,
+ 1,
+ 7,
+ },
+ ec_gf8_mul_FE_ops};
+
+static ec_gf_op_t ec_gf8_mul_FF_ops[] = {
+ {EC_GF_OP_XOR2, 4, 7, 0}, {EC_GF_OP_COPY, 9, 0, 0},
+ {EC_GF_OP_COPY, 8, 4, 0}, {EC_GF_OP_XOR2, 9, 1, 0},
+ {EC_GF_OP_XOR2, 4, 2, 0}, {EC_GF_OP_XOR2, 9, 4, 0},
+ {EC_GF_OP_XOR2, 0, 5, 0}, {EC_GF_OP_XOR2, 2, 0, 0},
+ {EC_GF_OP_XOR2, 3, 9, 0}, {EC_GF_OP_XOR2, 7, 3, 0},
+ {EC_GF_OP_XOR2, 2, 6, 0}, {EC_GF_OP_XOR2, 5, 3, 0},
+ {EC_GF_OP_XOR2, 6, 7, 0}, {EC_GF_OP_XOR2, 1, 7, 0},
+ {EC_GF_OP_XOR3, 3, 8, 5}, {EC_GF_OP_XOR2, 4, 6, 0},
+ {EC_GF_OP_END, 0, 0, 0}};
+
+static ec_gf_mul_t ec_gf8_mul_FF = {10,
+ {
+ 6,
+ 5,
+ 0,
+ 1,
+ 2,
+ 4,
+ 9,
+ 3,
+ 7,
+ 8,
+ },
+ ec_gf8_mul_FF_ops};
+
+ec_gf_mul_t *ec_gf8_mul[] = {
+ &ec_gf8_mul_00, &ec_gf8_mul_01, &ec_gf8_mul_02, &ec_gf8_mul_03,
+ &ec_gf8_mul_04, &ec_gf8_mul_05, &ec_gf8_mul_06, &ec_gf8_mul_07,
+ &ec_gf8_mul_08, &ec_gf8_mul_09, &ec_gf8_mul_0A, &ec_gf8_mul_0B,
+ &ec_gf8_mul_0C, &ec_gf8_mul_0D, &ec_gf8_mul_0E, &ec_gf8_mul_0F,
+ &ec_gf8_mul_10, &ec_gf8_mul_11, &ec_gf8_mul_12, &ec_gf8_mul_13,
+ &ec_gf8_mul_14, &ec_gf8_mul_15, &ec_gf8_mul_16, &ec_gf8_mul_17,
+ &ec_gf8_mul_18, &ec_gf8_mul_19, &ec_gf8_mul_1A, &ec_gf8_mul_1B,
+ &ec_gf8_mul_1C, &ec_gf8_mul_1D, &ec_gf8_mul_1E, &ec_gf8_mul_1F,
+ &ec_gf8_mul_20, &ec_gf8_mul_21, &ec_gf8_mul_22, &ec_gf8_mul_23,
+ &ec_gf8_mul_24, &ec_gf8_mul_25, &ec_gf8_mul_26, &ec_gf8_mul_27,
+ &ec_gf8_mul_28, &ec_gf8_mul_29, &ec_gf8_mul_2A, &ec_gf8_mul_2B,
+ &ec_gf8_mul_2C, &ec_gf8_mul_2D, &ec_gf8_mul_2E, &ec_gf8_mul_2F,
+ &ec_gf8_mul_30, &ec_gf8_mul_31, &ec_gf8_mul_32, &ec_gf8_mul_33,
+ &ec_gf8_mul_34, &ec_gf8_mul_35, &ec_gf8_mul_36, &ec_gf8_mul_37,
+ &ec_gf8_mul_38, &ec_gf8_mul_39, &ec_gf8_mul_3A, &ec_gf8_mul_3B,
+ &ec_gf8_mul_3C, &ec_gf8_mul_3D, &ec_gf8_mul_3E, &ec_gf8_mul_3F,
+ &ec_gf8_mul_40, &ec_gf8_mul_41, &ec_gf8_mul_42, &ec_gf8_mul_43,
+ &ec_gf8_mul_44, &ec_gf8_mul_45, &ec_gf8_mul_46, &ec_gf8_mul_47,
+ &ec_gf8_mul_48, &ec_gf8_mul_49, &ec_gf8_mul_4A, &ec_gf8_mul_4B,
+ &ec_gf8_mul_4C, &ec_gf8_mul_4D, &ec_gf8_mul_4E, &ec_gf8_mul_4F,
+ &ec_gf8_mul_50, &ec_gf8_mul_51, &ec_gf8_mul_52, &ec_gf8_mul_53,
+ &ec_gf8_mul_54, &ec_gf8_mul_55, &ec_gf8_mul_56, &ec_gf8_mul_57,
+ &ec_gf8_mul_58, &ec_gf8_mul_59, &ec_gf8_mul_5A, &ec_gf8_mul_5B,
+ &ec_gf8_mul_5C, &ec_gf8_mul_5D, &ec_gf8_mul_5E, &ec_gf8_mul_5F,
+ &ec_gf8_mul_60, &ec_gf8_mul_61, &ec_gf8_mul_62, &ec_gf8_mul_63,
+ &ec_gf8_mul_64, &ec_gf8_mul_65, &ec_gf8_mul_66, &ec_gf8_mul_67,
+ &ec_gf8_mul_68, &ec_gf8_mul_69, &ec_gf8_mul_6A, &ec_gf8_mul_6B,
+ &ec_gf8_mul_6C, &ec_gf8_mul_6D, &ec_gf8_mul_6E, &ec_gf8_mul_6F,
+ &ec_gf8_mul_70, &ec_gf8_mul_71, &ec_gf8_mul_72, &ec_gf8_mul_73,
+ &ec_gf8_mul_74, &ec_gf8_mul_75, &ec_gf8_mul_76, &ec_gf8_mul_77,
+ &ec_gf8_mul_78, &ec_gf8_mul_79, &ec_gf8_mul_7A, &ec_gf8_mul_7B,
+ &ec_gf8_mul_7C, &ec_gf8_mul_7D, &ec_gf8_mul_7E, &ec_gf8_mul_7F,
+ &ec_gf8_mul_80, &ec_gf8_mul_81, &ec_gf8_mul_82, &ec_gf8_mul_83,
+ &ec_gf8_mul_84, &ec_gf8_mul_85, &ec_gf8_mul_86, &ec_gf8_mul_87,
+ &ec_gf8_mul_88, &ec_gf8_mul_89, &ec_gf8_mul_8A, &ec_gf8_mul_8B,
+ &ec_gf8_mul_8C, &ec_gf8_mul_8D, &ec_gf8_mul_8E, &ec_gf8_mul_8F,
+ &ec_gf8_mul_90, &ec_gf8_mul_91, &ec_gf8_mul_92, &ec_gf8_mul_93,
+ &ec_gf8_mul_94, &ec_gf8_mul_95, &ec_gf8_mul_96, &ec_gf8_mul_97,
+ &ec_gf8_mul_98, &ec_gf8_mul_99, &ec_gf8_mul_9A, &ec_gf8_mul_9B,
+ &ec_gf8_mul_9C, &ec_gf8_mul_9D, &ec_gf8_mul_9E, &ec_gf8_mul_9F,
+ &ec_gf8_mul_A0, &ec_gf8_mul_A1, &ec_gf8_mul_A2, &ec_gf8_mul_A3,
+ &ec_gf8_mul_A4, &ec_gf8_mul_A5, &ec_gf8_mul_A6, &ec_gf8_mul_A7,
+ &ec_gf8_mul_A8, &ec_gf8_mul_A9, &ec_gf8_mul_AA, &ec_gf8_mul_AB,
+ &ec_gf8_mul_AC, &ec_gf8_mul_AD, &ec_gf8_mul_AE, &ec_gf8_mul_AF,
+ &ec_gf8_mul_B0, &ec_gf8_mul_B1, &ec_gf8_mul_B2, &ec_gf8_mul_B3,
+ &ec_gf8_mul_B4, &ec_gf8_mul_B5, &ec_gf8_mul_B6, &ec_gf8_mul_B7,
+ &ec_gf8_mul_B8, &ec_gf8_mul_B9, &ec_gf8_mul_BA, &ec_gf8_mul_BB,
+ &ec_gf8_mul_BC, &ec_gf8_mul_BD, &ec_gf8_mul_BE, &ec_gf8_mul_BF,
+ &ec_gf8_mul_C0, &ec_gf8_mul_C1, &ec_gf8_mul_C2, &ec_gf8_mul_C3,
+ &ec_gf8_mul_C4, &ec_gf8_mul_C5, &ec_gf8_mul_C6, &ec_gf8_mul_C7,
+ &ec_gf8_mul_C8, &ec_gf8_mul_C9, &ec_gf8_mul_CA, &ec_gf8_mul_CB,
+ &ec_gf8_mul_CC, &ec_gf8_mul_CD, &ec_gf8_mul_CE, &ec_gf8_mul_CF,
+ &ec_gf8_mul_D0, &ec_gf8_mul_D1, &ec_gf8_mul_D2, &ec_gf8_mul_D3,
+ &ec_gf8_mul_D4, &ec_gf8_mul_D5, &ec_gf8_mul_D6, &ec_gf8_mul_D7,
+ &ec_gf8_mul_D8, &ec_gf8_mul_D9, &ec_gf8_mul_DA, &ec_gf8_mul_DB,
+ &ec_gf8_mul_DC, &ec_gf8_mul_DD, &ec_gf8_mul_DE, &ec_gf8_mul_DF,
+ &ec_gf8_mul_E0, &ec_gf8_mul_E1, &ec_gf8_mul_E2, &ec_gf8_mul_E3,
+ &ec_gf8_mul_E4, &ec_gf8_mul_E5, &ec_gf8_mul_E6, &ec_gf8_mul_E7,
+ &ec_gf8_mul_E8, &ec_gf8_mul_E9, &ec_gf8_mul_EA, &ec_gf8_mul_EB,
+ &ec_gf8_mul_EC, &ec_gf8_mul_ED, &ec_gf8_mul_EE, &ec_gf8_mul_EF,
+ &ec_gf8_mul_F0, &ec_gf8_mul_F1, &ec_gf8_mul_F2, &ec_gf8_mul_F3,
+ &ec_gf8_mul_F4, &ec_gf8_mul_F5, &ec_gf8_mul_F6, &ec_gf8_mul_F7,
+ &ec_gf8_mul_F8, &ec_gf8_mul_F9, &ec_gf8_mul_FA, &ec_gf8_mul_FB,
+ &ec_gf8_mul_FC, &ec_gf8_mul_FD, &ec_gf8_mul_FE, &ec_gf8_mul_FF};
diff --git a/xlators/cluster/ec/src/ec-gf8.h b/xlators/cluster/ec/src/ec-gf8.h
new file mode 100644
index 00000000000..4aca91127fc
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-gf8.h
@@ -0,0 +1,18 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_GF8_H__
+#define __EC_GF8_H__
+
+#include "ec-galois.h"
+
+extern ec_gf_mul_t *ec_gf8_mul[];
+
+#endif /* __EC_GF8_H__ */
diff --git a/xlators/cluster/ec/src/ec-heal.c b/xlators/cluster/ec/src/ec-heal.c
new file mode 100644
index 00000000000..7d991f04aac
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-heal.c
@@ -0,0 +1,3367 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
+#include <glusterfs/cluster-syncop.h>
+
+#include "ec.h"
+#include "ec-types.h"
+#include "ec-messages.h"
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-combine.h"
+#include "ec-method.h"
+#include "ec-fops.h"
+#include "ec-heald.h"
+
+#define EC_COUNT(array, max) \
+ ({ \
+ int __i; \
+ int __res = 0; \
+ for (__i = 0; __i < max; __i++) \
+ if (array[__i]) \
+ __res++; \
+ __res; \
+ })
+#define EC_INTERSECT(dst, src1, src2, max) \
+ ({ \
+ int __i; \
+ for (__i = 0; __i < max; __i++) \
+ dst[__i] = src1[__i] && src2[__i]; \
+ })
+#define EC_ADJUST_SOURCE(source, sources, max) \
+ ({ \
+ int __i; \
+ if (sources[source] == 0) { \
+ source = -1; \
+ for (__i = 0; __i < max; __i++) \
+ if (sources[__i]) \
+ source = __i; \
+ } \
+ })
+#define IA_EQUAL(f, s, field) \
+ (memcmp(&(f.ia_##field), &(s.ia_##field), sizeof(s.ia_##field)) == 0)
+#define EC_REPLIES_ALLOC(replies, numsubvols) \
+ do { \
+ int __i = 0; \
+ replies = alloca0(numsubvols * sizeof(*replies)); \
+ for (__i = 0; __i < numsubvols; __i++) \
+ INIT_LIST_HEAD(&replies[__i].entries.list); \
+ } while (0)
+
+struct ec_name_data {
+ call_frame_t *frame;
+ unsigned char *participants;
+ unsigned char *failed_on;
+ unsigned char *gfidless;
+ unsigned char *enoent;
+ unsigned char *same;
+ char *name;
+ inode_t *parent;
+ default_args_cbk_t *replies;
+ uint32_t heal_pending;
+};
+
+static char *ec_ignore_xattrs[] = {GF_SELINUX_XATTR_KEY, QUOTA_SIZE_KEY, NULL};
+
+static gf_boolean_t
+ec_ignorable_key_match(dict_t *dict, char *key, data_t *val, void *mdata)
+{
+ int i = 0;
+
+ if (!key)
+ goto out;
+
+ if (strncmp(key, EC_XATTR_PREFIX, SLEN(EC_XATTR_PREFIX)) == 0)
+ return _gf_true;
+
+ for (i = 0; ec_ignore_xattrs[i]; i++) {
+ if (!strcmp(key, ec_ignore_xattrs[i]))
+ return _gf_true;
+ }
+
+out:
+ return _gf_false;
+}
+
+static gf_boolean_t
+ec_sh_key_match(dict_t *dict, char *key, data_t *val, void *mdata)
+{
+ return !ec_ignorable_key_match(dict, key, val, mdata);
+}
+/* FOP: heal */
+
+void
+ec_set_entry_healing(ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ loc_t *loc = NULL;
+
+ if (!fop)
+ return;
+
+ loc = &fop->loc[0];
+ LOCK(&loc->inode->lock);
+ {
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ ctx->heal_count += 1;
+ }
+ }
+ UNLOCK(&loc->inode->lock);
+}
+
+void
+ec_reset_entry_healing(ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ loc_t *loc = NULL;
+ int32_t heal_count = 0;
+ if (!fop)
+ return;
+
+ loc = &fop->loc[0];
+ LOCK(&loc->inode->lock);
+ {
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ ctx->heal_count += -1;
+ heal_count = ctx->heal_count;
+ }
+ }
+ UNLOCK(&loc->inode->lock);
+ GF_ASSERT(heal_count >= 0);
+}
+
+uintptr_t
+ec_heal_check(ec_fop_data_t *fop, uintptr_t *pgood)
+{
+ ec_cbk_data_t *cbk;
+ uintptr_t mask[2] = {0, 0};
+
+ list_for_each_entry(cbk, &fop->cbk_list, list)
+ {
+ mask[cbk->op_ret >= 0] |= cbk->mask;
+ }
+
+ if (pgood != NULL) {
+ *pgood = mask[1];
+ }
+
+ return mask[0];
+}
+
+void
+ec_heal_update(ec_fop_data_t *fop, int32_t is_open)
+{
+ ec_heal_t *heal = fop->data;
+ uintptr_t good, bad;
+
+ bad = ec_heal_check(fop, &good);
+
+ LOCK(&heal->lock);
+
+ heal->bad &= ~bad;
+ if (is_open) {
+ heal->open |= good;
+ }
+
+ UNLOCK(&heal->lock);
+
+ fop->error = 0;
+}
+
+void
+ec_heal_avoid(ec_fop_data_t *fop)
+{
+ ec_heal_t *heal = fop->data;
+ uintptr_t bad;
+
+ bad = ec_heal_check(fop, NULL);
+
+ LOCK(&heal->lock);
+
+ heal->good &= ~bad;
+
+ UNLOCK(&heal->lock);
+}
+
+int32_t
+ec_heal_lock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_heal_t *heal = fop->data;
+
+ if (op_ret >= 0) {
+ GF_ASSERT(
+ ec_set_inode_size(heal->fop, heal->fd->inode, heal->total_size));
+ }
+
+ return 0;
+}
+
+void
+ec_heal_lock(ec_heal_t *heal, int32_t type, fd_t *fd, loc_t *loc, off_t offset,
+ size_t size)
+{
+ struct gf_flock flock;
+ fop_inodelk_cbk_t cbk = NULL;
+
+ flock.l_type = type;
+ flock.l_whence = SEEK_SET;
+ flock.l_start = offset;
+ flock.l_len = size;
+ flock.l_pid = 0;
+ flock.l_owner.len = 0;
+
+ if (type == F_UNLCK) {
+ /* Remove inode size information before unlocking it. */
+ if (fd == NULL) {
+ ec_clear_inode_info(heal->fop, heal->loc.inode);
+ } else {
+ ec_clear_inode_info(heal->fop, heal->fd->inode);
+ }
+ cbk = ec_lock_unlocked;
+ } else {
+ /* Otherwise use the callback to update size information. */
+ cbk = ec_heal_lock_cbk;
+ }
+
+ if (fd != NULL) {
+ ec_finodelk(heal->fop->frame, heal->xl,
+ &heal->fop->frame->root->lk_owner, heal->fop->mask,
+ EC_MINIMUM_ALL, cbk, heal, heal->xl->name, fd, F_SETLKW,
+ &flock, NULL);
+ } else {
+ ec_inodelk(heal->fop->frame, heal->xl,
+ &heal->fop->frame->root->lk_owner, heal->fop->mask,
+ EC_MINIMUM_ALL, cbk, heal, heal->xl->name, loc, F_SETLKW,
+ &flock, NULL);
+ }
+}
+
+void
+ec_heal_inodelk(ec_heal_t *heal, int32_t type, int32_t use_fd, off_t offset,
+ size_t size)
+{
+ ec_heal_lock(heal, type, use_fd ? heal->fd : NULL, &heal->loc, offset,
+ size);
+}
+
+int32_t
+ec_heal_xattr_clean(dict_t *dict, char *key, data_t *data, void *arg)
+{
+ dict_t *base = arg;
+
+ if (ec_ignorable_key_match(NULL, key, NULL, NULL)) {
+ dict_del(dict, key);
+ return 0;
+ }
+
+ if (dict_get(base, key) != NULL)
+ dict_del(dict, key);
+
+ return 0;
+}
+
+/********************************************************************
+ * ec_wind_xattrop_parallel:
+ * Helper function to update the extended attributes
+ * in parallel.
+ *
+ *******************************************************************/
+void
+ec_wind_xattrop_parallel(call_frame_t *frame, xlator_t *subvol, int child_index,
+ loc_t *loc, gf_xattrop_flags_t flags, dict_t **dict,
+ dict_t *xdata)
+{
+ gf_msg_debug("EC", 0, "WIND: on child %d ", child_index);
+ STACK_WIND_COOKIE(
+ frame, cluster_xattrop_cbk, (void *)(uintptr_t)child_index, subvol,
+ subvol->fops->xattrop, loc, flags, dict[child_index], xdata);
+}
+
+int32_t
+ec_heal_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_heal_t *heal = fop->data;
+
+ ec_trace("WRITE_CBK", cookie, "ret=%d, errno=%d", op_ret, op_errno);
+
+ gf_msg_debug(fop->xl->name, 0,
+ "%s: write op_ret %d, op_errno %s"
+ " at %" PRIu64,
+ uuid_utoa(heal->fd->inode->gfid), op_ret, strerror(op_errno),
+ heal->offset);
+
+ ec_heal_update(cookie, 0);
+
+ return 0;
+}
+
+int32_t
+ec_heal_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_heal_t *heal = fop->data;
+
+ ec_trace("READ_CBK", fop, "ret=%d, errno=%d", op_ret, op_errno);
+
+ ec_heal_avoid(fop);
+
+ if (op_ret > 0) {
+ gf_msg_debug(fop->xl->name, 0,
+ "%s: read succeeded, proceeding "
+ "to write at %" PRIu64,
+ uuid_utoa(heal->fd->inode->gfid), heal->offset);
+ ec_writev(heal->fop->frame, heal->xl, heal->bad, EC_MINIMUM_ONE,
+ ec_heal_writev_cbk, heal, heal->fd, vector, count,
+ heal->offset, 0, iobref, NULL);
+ } else {
+ if (op_ret < 0) {
+ gf_msg_debug(fop->xl->name, 0,
+ "%s: read failed %s, failing "
+ "to heal block at %" PRIu64,
+ uuid_utoa(heal->fd->inode->gfid), strerror(op_errno),
+ heal->offset);
+ heal->bad = 0;
+ }
+ heal->done = 1;
+ }
+
+ return 0;
+}
+
+void
+ec_heal_data_block(ec_heal_t *heal)
+{
+ ec_trace("DATA", heal->fop, "good=%lX, bad=%lX", heal->good, heal->bad);
+
+ if ((heal->good != 0) && (heal->bad != 0) &&
+ (heal->iatt.ia_type == IA_IFREG)) {
+ ec_readv(heal->fop->frame, heal->xl, heal->good, EC_MINIMUM_MIN,
+ ec_heal_readv_cbk, heal, heal->fd, heal->size, heal->offset, 0,
+ NULL);
+ }
+}
+
+/* FOP: fheal */
+
+void
+ec_fheal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fheal_cbk_t func, void *data, fd_t *fd,
+ int32_t partial, dict_t *xdata)
+{
+ ec_fd_t *ctx = ec_fd_get(fd, this);
+
+ if (ctx != NULL) {
+ gf_msg_trace("ec", 0, "FHEAL ctx: flags=%X, open=%" PRIXPTR, ctx->flags,
+ ctx->open);
+ ec_heal(frame, this, target, fop_flags, func, data, &ctx->loc, partial,
+ xdata);
+ }
+}
+
+/* Common heal code */
+void
+ec_mask_to_char_array(uintptr_t mask, unsigned char *array, int numsubvols)
+{
+ int i = 0;
+
+ for (i = 0; i < numsubvols; i++)
+ array[i] = ((mask >> i) & 1);
+}
+
+uintptr_t
+ec_char_array_to_mask(unsigned char *array, int numsubvols)
+{
+ int i = 0;
+ uintptr_t mask = 0;
+
+ if (array == NULL)
+ goto out;
+
+ for (i = 0; i < numsubvols; i++)
+ if (array[i])
+ mask |= (1ULL << i);
+out:
+ return mask;
+}
+
+int
+ec_heal_entry_find_direction(ec_t *ec, default_args_cbk_t *replies,
+ uint64_t *versions, uint64_t *dirty,
+ unsigned char *sources,
+ unsigned char *healed_sinks)
+{
+ uint64_t xattr[EC_VERSION_SIZE] = {0};
+ int source = -1;
+ uint64_t max_version = 0;
+ int ret = 0;
+ int i = 0;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret == -1)
+ continue;
+
+ if (source == -1)
+ source = i;
+
+ ret = ec_dict_get_array(replies[i].xdata, EC_XATTR_VERSION, xattr,
+ EC_VERSION_SIZE);
+ if (ret == 0) {
+ versions[i] = xattr[EC_DATA_TXN];
+ if (max_version < versions[i]) {
+ max_version = versions[i];
+ source = i;
+ }
+ }
+
+ memset(xattr, 0, sizeof(xattr));
+ ret = ec_dict_get_array(replies[i].xdata, EC_XATTR_DIRTY, xattr,
+ EC_VERSION_SIZE);
+ if (ret == 0) {
+ dirty[i] = xattr[EC_DATA_TXN];
+ }
+ }
+
+ if (source < 0)
+ goto out;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret == -1)
+ continue;
+
+ if (versions[i] == versions[source])
+ sources[i] = 1;
+ else
+ healed_sinks[i] = 1;
+ }
+
+out:
+ return source;
+}
+
+int
+ec_adjust_versions(call_frame_t *frame, ec_t *ec, ec_txn_t type, inode_t *inode,
+ int source, unsigned char *sources,
+ unsigned char *healed_sinks, uint64_t *versions,
+ uint64_t *dirty)
+{
+ int i = 0;
+ int ret = 0;
+ int call_count = 0;
+ dict_t **xattr = NULL;
+ int op_ret = 0;
+ loc_t loc = {0};
+ gf_boolean_t erase_dirty = _gf_false;
+ uint64_t *versions_xattr = NULL;
+ uint64_t *dirty_xattr = NULL;
+ uint64_t allzero[2] = {0};
+ unsigned char *on = NULL;
+ unsigned char *output = NULL;
+ default_args_cbk_t *replies = NULL;
+
+ /* Allocate the required memory */
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
+ if (!xattr) {
+ op_ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < ec->nodes; i++) {
+ xattr[i] = dict_new();
+ if (!xattr[i]) {
+ op_ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ /* dirty xattr represents if the file/dir needs heal. Unless all the
+ * copies are healed, don't erase it */
+ if (EC_COUNT(sources, ec->nodes) + EC_COUNT(healed_sinks, ec->nodes) ==
+ ec->nodes)
+ erase_dirty = _gf_true;
+ else
+ op_ret = -ENOTCONN;
+
+ /* Populate the xattr array */
+ for (i = 0; i < ec->nodes; i++) {
+ if (!sources[i] && !healed_sinks[i])
+ continue;
+ versions_xattr = GF_CALLOC(EC_VERSION_SIZE, sizeof(*versions_xattr),
+ gf_common_mt_pointer);
+ if (!versions_xattr) {
+ op_ret = -ENOMEM;
+ continue;
+ }
+
+ versions_xattr[type] = hton64(versions[source] - versions[i]);
+ ret = dict_set_bin(xattr[i], EC_XATTR_VERSION, versions_xattr,
+ (sizeof(*versions_xattr) * EC_VERSION_SIZE));
+ if (ret < 0) {
+ op_ret = -ENOMEM;
+ continue;
+ }
+
+ if (erase_dirty) {
+ dirty_xattr = GF_CALLOC(EC_VERSION_SIZE, sizeof(*dirty_xattr),
+ gf_common_mt_pointer);
+ if (!dirty_xattr) {
+ op_ret = -ENOMEM;
+ continue;
+ }
+
+ dirty_xattr[type] = hton64(-dirty[i]);
+ ret = dict_set_bin(xattr[i], EC_XATTR_DIRTY, dirty_xattr,
+ (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
+ if (ret < 0) {
+ op_ret = -ENOMEM;
+ continue;
+ }
+ }
+
+ if (memcmp(versions_xattr, allzero,
+ (sizeof(*versions_xattr) * EC_VERSION_SIZE)) == 0) {
+ if (!erase_dirty) {
+ continue;
+ }
+
+ if (memcmp(dirty_xattr, allzero,
+ (sizeof(*dirty_xattr) * EC_VERSION_SIZE)) == 0) {
+ continue;
+ }
+ }
+
+ on[i] = 1;
+ call_count++;
+ }
+
+ /* Update the bricks with xattr */
+ if (call_count) {
+ PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
+ ec_wind_xattrop_parallel, &loc,
+ GF_XATTROP_ADD_ARRAY64, xattr, NULL);
+ ret = cluster_fop_success_fill(replies, ec->nodes, output);
+ }
+
+ if (ret < call_count) {
+ op_ret = -ENOTCONN;
+ goto out;
+ }
+
+out:
+ /* Cleanup */
+ if (xattr) {
+ for (i = 0; i < ec->nodes; i++) {
+ if (xattr[i])
+ dict_unref(xattr[i]);
+ }
+ GF_FREE(xattr);
+ }
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ return op_ret;
+}
+
+int
+ec_heal_metadata_find_direction(ec_t *ec, default_args_cbk_t *replies,
+ uint64_t *versions, uint64_t *dirty,
+ unsigned char *sources,
+ unsigned char *healed_sinks)
+{
+ uint64_t xattr[EC_VERSION_SIZE] = {0};
+ uint64_t max_version = 0;
+ int same_count = 0;
+ int max_same_count = 0;
+ int same_source = -1;
+ int ret = 0;
+ int i = 0;
+ int j = 0;
+ int *groups = NULL;
+ struct iatt source_ia = {0};
+ struct iatt child_ia = {0};
+
+ groups = alloca0(ec->nodes * sizeof(*groups));
+ for (i = 0; i < ec->nodes; i++)
+ groups[i] = -1;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret < 0)
+ continue;
+ ret = ec_dict_get_array(replies[i].xdata, EC_XATTR_VERSION, xattr,
+ EC_VERSION_SIZE);
+ if (ret == 0) {
+ versions[i] = xattr[EC_METADATA_TXN];
+ }
+
+ memset(xattr, 0, sizeof(xattr));
+ ret = ec_dict_get_array(replies[i].xdata, EC_XATTR_DIRTY, xattr,
+ EC_VERSION_SIZE);
+ if (ret == 0) {
+ dirty[i] = xattr[EC_METADATA_TXN];
+ }
+ if (groups[i] >= 0) /*Already part of group*/
+ continue;
+ groups[i] = i;
+ same_count = 1;
+ source_ia = replies[i].stat;
+ for (j = i + 1; j < ec->nodes; j++) {
+ if (!replies[j].valid || replies[j].op_ret < 0)
+ continue;
+ child_ia = replies[j].stat;
+ if (!IA_EQUAL(source_ia, child_ia, gfid) ||
+ !IA_EQUAL(source_ia, child_ia, type) ||
+ !IA_EQUAL(source_ia, child_ia, prot) ||
+ !IA_EQUAL(source_ia, child_ia, uid) ||
+ !IA_EQUAL(source_ia, child_ia, gid))
+ continue;
+ if (!are_dicts_equal(replies[i].xdata, replies[j].xdata,
+ ec_sh_key_match, NULL))
+ continue;
+ groups[j] = i;
+ same_count++;
+ }
+
+ if (max_same_count < same_count) {
+ max_same_count = same_count;
+ same_source = i;
+ }
+ }
+
+ if (max_same_count < ec->fragments) {
+ ret = -EIO;
+ goto out;
+ }
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (groups[i] == groups[same_source])
+ sources[i] = 1;
+ else if (replies[i].valid && replies[i].op_ret >= 0)
+ healed_sinks[i] = 1;
+ }
+ for (i = 0; i < ec->nodes; i++) {
+ if (sources[i] && (versions[i] > max_version)) {
+ same_source = i;
+ max_version = versions[i];
+ }
+ }
+ ret = same_source;
+out:
+ return ret;
+}
+
+int
+__ec_heal_metadata_prepare(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *locked_on,
+ default_args_cbk_t *replies, uint64_t *versions,
+ uint64_t *dirty, unsigned char *sources,
+ unsigned char *healed_sinks)
+{
+ loc_t loc = {0};
+ unsigned char *output = NULL;
+ unsigned char *lookup_on = NULL;
+ int ret = 0;
+ int source = 0;
+ default_args_cbk_t *greplies = NULL;
+ int i = 0;
+ EC_REPLIES_ALLOC(greplies, ec->nodes);
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ output = alloca0(ec->nodes);
+ lookup_on = alloca0(ec->nodes);
+ ret = cluster_lookup(ec->xl_list, locked_on, ec->nodes, replies, output,
+ frame, ec->xl, &loc, NULL);
+ if (ret <= ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ memcpy(lookup_on, output, ec->nodes);
+ /*Use getxattr to get the filtered xattrs which filter internal xattrs*/
+ ret = cluster_getxattr(ec->xl_list, lookup_on, ec->nodes, greplies, output,
+ frame, ec->xl, &loc, NULL, NULL);
+ for (i = 0; i < ec->nodes; i++) {
+ if (lookup_on[i] && !output[i]) {
+ replies[i].valid = 0;
+ continue;
+ }
+ if (replies[i].xdata) {
+ dict_unref(replies[i].xdata);
+ replies[i].xdata = NULL;
+ if (greplies[i].xattr)
+ replies[i].xdata = dict_ref(greplies[i].xattr);
+ }
+ }
+
+ source = ec_heal_metadata_find_direction(ec, replies, versions, dirty,
+ sources, healed_sinks);
+ if (source < 0) {
+ ret = -EIO;
+ goto out;
+ }
+ ret = source;
+out:
+ cluster_replies_wipe(greplies, ec->nodes);
+ loc_wipe(&loc);
+ return ret;
+}
+
+/* Metadata heal */
+int
+__ec_removexattr_sinks(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ int source, unsigned char *sources,
+ unsigned char *healed_sinks, default_args_cbk_t *replies)
+{
+ int i = 0;
+ int ret = 0;
+ loc_t loc = {0};
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (i == source)
+ continue;
+ if (!sources[i] && !healed_sinks[i])
+ continue;
+ ret = dict_foreach(replies[i].xdata, ec_heal_xattr_clean,
+ replies[source].xdata);
+ if (ret < 0) {
+ sources[i] = 0;
+ healed_sinks[i] = 0;
+ continue;
+ }
+
+ if (replies[i].xdata->count == 0) {
+ continue;
+ } else if (sources[i]) {
+ /* This can happen if setxattr/removexattr succeeds on
+ * the bricks but fails to update the version. This
+ * will make sure that the xattrs are made equal after
+ * heal*/
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ }
+
+ ret = syncop_removexattr(ec->xl_list[i], &loc, "", replies[i].xdata,
+ NULL);
+ if (ret < 0)
+ healed_sinks[i] = 0;
+ }
+
+ loc_wipe(&loc);
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0)
+ return -ENOTCONN;
+ return 0;
+}
+
+int
+__ec_heal_metadata(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *locked_on, unsigned char *sources,
+ unsigned char *healed_sinks)
+{
+ loc_t loc = {0};
+ int ret = 0;
+ int source = 0;
+ default_args_cbk_t *replies = NULL;
+ default_args_cbk_t *sreplies = NULL;
+ uint64_t *versions = NULL;
+ uint64_t *dirty = NULL;
+ unsigned char *output = NULL;
+ dict_t *source_dict = NULL;
+ struct iatt source_buf = {0};
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ EC_REPLIES_ALLOC(sreplies, ec->nodes);
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ output = alloca0(ec->nodes);
+ versions = alloca0(ec->nodes * sizeof(*versions));
+ dirty = alloca0(ec->nodes * sizeof(*dirty));
+ source = __ec_heal_metadata_prepare(frame, ec, inode, locked_on, replies,
+ versions, dirty, sources, healed_sinks);
+ if (source < 0) {
+ ret = -EIO;
+ goto out;
+ }
+
+ if ((EC_COUNT(sources, ec->nodes) == ec->nodes) ||
+ (EC_COUNT(healed_sinks, ec->nodes) == 0)) {
+ ret = 0;
+ goto erase_dirty;
+ }
+
+ source_buf = replies[source].stat;
+ ret = cluster_setattr(ec->xl_list, healed_sinks, ec->nodes, sreplies,
+ output, frame, ec->xl, &loc, &source_buf,
+ GF_SET_ATTR_MODE | GF_SET_ATTR_UID | GF_SET_ATTR_GID,
+ NULL);
+ /*In case the operation fails on some of the subvols*/
+ memcpy(healed_sinks, output, ec->nodes);
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ ret = __ec_removexattr_sinks(frame, ec, inode, source, sources,
+ healed_sinks, replies);
+ if (ret < 0)
+ goto out;
+
+ source_dict = dict_ref(replies[source].xdata);
+ if (dict_foreach_match(source_dict, ec_ignorable_key_match, NULL,
+ dict_remove_foreach_fn, NULL) == -1) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = cluster_setxattr(ec->xl_list, healed_sinks, ec->nodes, replies,
+ output, frame, ec->xl, &loc, source_dict, 0, NULL);
+
+ EC_INTERSECT(healed_sinks, healed_sinks, output, ec->nodes);
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+erase_dirty:
+ ret = ec_adjust_versions(frame, ec, EC_METADATA_TXN, inode, source, sources,
+ healed_sinks, versions, dirty);
+out:
+ if (source_dict)
+ dict_unref(source_dict);
+
+ loc_wipe(&loc);
+ cluster_replies_wipe(replies, ec->nodes);
+ cluster_replies_wipe(sreplies, ec->nodes);
+ return ret;
+}
+
+int
+ec_heal_metadata(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *sources, unsigned char *healed_sinks)
+{
+ unsigned char *locked_on = NULL;
+ unsigned char *up_subvols = NULL;
+ unsigned char *output = NULL;
+ int ret = 0;
+ default_args_cbk_t *replies = NULL;
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ up_subvols = alloca0(ec->nodes);
+ ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes);
+ ret = cluster_inodelk(ec->xl_list, up_subvols, ec->nodes, replies,
+ locked_on, frame, ec->xl, ec->xl->name, inode, 0, 0);
+ {
+ if (ret <= ec->fragments) {
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked",
+ uuid_utoa(inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ ret = __ec_heal_metadata(frame, ec, inode, locked_on, sources,
+ healed_sinks);
+ }
+unlock:
+ cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
+ ec->xl, ec->xl->name, inode, 0, 0);
+ cluster_replies_wipe(replies, ec->nodes);
+ return ret;
+}
+
+/*entry heal*/
+int
+__ec_heal_entry_prepare(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *locked_on, uint64_t *versions,
+ uint64_t *dirty, unsigned char *sources,
+ unsigned char *healed_sinks)
+{
+ loc_t loc = {0};
+ int source = 0;
+ int ret = 0;
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ dict_t *xdata = NULL;
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ xdata = dict_new();
+ if (!xdata) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (dict_set_uint64(xdata, EC_XATTR_VERSION, 0) ||
+ dict_set_uint64(xdata, EC_XATTR_DIRTY, 0)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ output = alloca0(ec->nodes);
+ ret = cluster_lookup(ec->xl_list, locked_on, ec->nodes, replies, output,
+ frame, ec->xl, &loc, xdata);
+ if (ret <= ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ source = ec_heal_entry_find_direction(ec, replies, versions, dirty, sources,
+ healed_sinks);
+ if (source < 0) {
+ ret = -EIO;
+ goto out;
+ }
+ ret = source;
+out:
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+ cluster_replies_wipe(replies, ec->nodes);
+ return ret;
+}
+int32_t
+ec_set_new_entry_dirty(ec_t *ec, loc_t *loc, struct iatt *ia,
+ call_frame_t *frame, xlator_t *this, unsigned char *on)
+{
+ dict_t *xattr = NULL;
+ int32_t ret = -1;
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ uint64_t dirty[EC_VERSION_SIZE] = {1, 1};
+ loc_t newloc = {0};
+
+ /*Symlinks don't have any data to be healed*/
+ if (ia->ia_type == IA_IFLNK)
+ dirty[EC_DATA_TXN] = 0;
+
+ newloc.inode = inode_ref(loc->inode);
+ gf_uuid_copy(newloc.gfid, ia->ia_gfid);
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ output = alloca0(ec->nodes);
+ xattr = dict_new();
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = ec_dict_set_array(xattr, EC_XATTR_DIRTY, dirty, EC_VERSION_SIZE);
+ if (ret)
+ goto out;
+
+ ret = cluster_xattrop(ec->xl_list, on, ec->nodes, replies, output, frame,
+ ec->xl, &newloc, GF_XATTROP_ADD_ARRAY64, xattr, NULL);
+
+ if (ret < ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&newloc);
+ return ret;
+}
+
+/*Name heal*/
+int
+ec_delete_stale_name(dict_t *gfid_db, char *key, data_t *d, void *data)
+{
+ struct ec_name_data *name_data = data;
+ struct iatt *ia = NULL;
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+ unsigned char *same = data_to_bin(d);
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ int ret = 0;
+ int estale_count = 0;
+ int i = 0;
+ call_frame_t *frame = name_data->frame;
+ uuid_t gfid;
+
+ ec = name_data->frame->this->private;
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ if (EC_COUNT(same, ec->nodes) >= ec->fragments) {
+ ret = 0;
+ goto out;
+ }
+
+ loc.parent = inode_ref(name_data->parent);
+ loc.inode = inode_new(name_data->parent->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_parse(key, gfid);
+ gf_uuid_copy(loc.pargfid, name_data->parent->gfid);
+ loc.name = name_data->name;
+ output = alloca0(ec->nodes);
+ ret = cluster_lookup(ec->xl_list, name_data->participants, ec->nodes,
+ replies, output, name_data->frame, ec->xl, &loc, NULL);
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret == -1) {
+ if (replies[i].op_errno == ESTALE || replies[i].op_errno == ENOENT)
+ estale_count++;
+ else
+ name_data->participants[i] = 0;
+ } else if (gf_uuid_compare(gfid, replies[i].stat.ia_gfid)) {
+ estale_count++;
+ gf_msg_debug(ec->xl->name, 0, "%s/%s: different gfid as %s",
+ uuid_utoa(name_data->parent->gfid), name_data->name,
+ key);
+ }
+ }
+
+ if (estale_count <= ec->redundancy) {
+ /* We have at least ec->fragments number of fragments, so the
+ * file is recoverable, so don't delete it*/
+
+ /* Please note that the lookup call above could fail with
+ * ENOTCONN on all subvoumes and still this branch will be
+ * true, but in those cases conservatively we decide to not
+ * delete the file until we are sure*/
+ ret = 0;
+ goto out;
+ }
+
+ /*Noway to recover, delete the name*/
+ loc_wipe(&loc);
+ loc.parent = inode_ref(name_data->parent);
+ gf_uuid_copy(loc.pargfid, loc.parent->gfid);
+ loc.name = name_data->name;
+ for (i = 0; i < ec->nodes; i++) {
+ if (same[i] && replies[i].valid && (replies[i].op_ret == 0)) {
+ ia = &replies[i].stat;
+ break;
+ }
+ }
+
+ if (!ia) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ if (IA_ISDIR(ia->ia_type)) {
+ ret = cluster_rmdir(ec->xl_list, same, ec->nodes, replies, output,
+ frame, ec->xl, &loc, 1, NULL);
+ gf_msg_debug(ec->xl->name, 0,
+ "cluster rmdir succeeded on %d "
+ "nodes",
+ ret);
+ } else {
+ ret = cluster_unlink(ec->xl_list, same, ec->nodes, replies, output,
+ frame, ec->xl, &loc, 0, NULL);
+ gf_msg_debug(ec->xl->name, 0,
+ "cluster unlink succeeded on %d "
+ "nodes",
+ ret);
+ }
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (output[i]) {
+ same[i] = 0;
+ name_data->enoent[i] = 1;
+ } else {
+ /*op failed*/
+ if (same[i])
+ name_data->participants[i] = 0;
+ }
+ }
+ ret = 0;
+ /*This will help in making decisions about creating names*/
+ dict_del(gfid_db, key);
+out:
+ if (ret < 0) {
+ gf_msg_debug(ec->xl->name, 0, "%s/%s: heal failed %s",
+ uuid_utoa(name_data->parent->gfid), name_data->name,
+ strerror(-ret));
+ }
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
+ec_delete_stale_names(call_frame_t *frame, ec_t *ec, inode_t *parent,
+ char *name, default_args_cbk_t *replies, dict_t *gfid_db,
+ unsigned char *enoent, unsigned char *gfidless,
+ unsigned char *participants)
+{
+ struct ec_name_data name_data = {0};
+
+ name_data.enoent = enoent;
+ name_data.gfidless = gfidless;
+ name_data.participants = participants;
+ name_data.name = name;
+ name_data.parent = parent;
+ name_data.frame = frame;
+ name_data.replies = replies;
+ return dict_foreach(gfid_db, ec_delete_stale_name, &name_data);
+}
+
+int
+_assign_same(dict_t *dict, char *key, data_t *value, void *data)
+{
+ struct ec_name_data *name_data = data;
+
+ name_data->same = data_to_bin(value);
+ return 0;
+}
+
+int
+ec_create_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
+ default_args_cbk_t *lookup_replies, dict_t *gfid_db,
+ unsigned char *enoent, unsigned char *participants)
+{
+ int ret = 0;
+ int i = 0;
+ struct ec_name_data name_data = {0};
+ struct iatt *ia = NULL;
+ unsigned char *output = 0;
+ unsigned char *output1 = 0;
+ unsigned char *on = NULL;
+ default_args_cbk_t *replies = NULL;
+ loc_t loc = {0};
+ loc_t srcloc = {0};
+ unsigned char *link = NULL;
+ unsigned char *create = NULL;
+ dict_t *xdata = NULL;
+ char *linkname = NULL;
+ ec_config_t config;
+
+ /* There should be just one gfid key */
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ if (gfid_db->count != 1) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = dict_foreach(gfid_db, _assign_same, &name_data);
+ if (ret < 0)
+ goto out;
+ /*There should at least be one valid success reply with gfid*/
+ for (i = 0; i < ec->nodes; i++)
+ if (name_data.same[i])
+ break;
+
+ if (i == ec->nodes) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ia = &lookup_replies[i].stat;
+ xdata = dict_new();
+ loc.parent = inode_ref(parent);
+ gf_uuid_copy(loc.pargfid, parent->gfid);
+ loc.inode = inode_new(parent->table);
+ if (loc.inode)
+ srcloc.inode = inode_ref(loc.inode);
+ gf_uuid_copy(srcloc.gfid, ia->ia_gfid);
+ if (!loc.inode || !xdata ||
+ dict_set_static_bin(xdata, "gfid-req", ia->ia_gfid,
+ sizeof(ia->ia_gfid))) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ loc.name = name;
+ link = alloca0(ec->nodes);
+ create = alloca0(ec->nodes);
+ on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ output1 = alloca0(ec->nodes);
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!lookup_replies[i].valid)
+ continue;
+ if (lookup_replies[i].op_ret)
+ continue;
+ on[i] = 1;
+ }
+ switch (ia->ia_type) {
+ case IA_IFDIR:
+ ec_set_new_entry_dirty(ec, &loc, ia, frame, ec->xl, on);
+ (void)cluster_mkdir(
+ ec->xl_list, enoent, ec->nodes, replies, output, frame, ec->xl,
+ &loc, st_mode_from_ia(ia->ia_prot, ia->ia_type), 0, xdata);
+ break;
+
+ case IA_IFLNK:
+ /*Check for hard links and create/link*/
+ ret = cluster_lookup(ec->xl_list, enoent, ec->nodes, replies,
+ output, frame, ec->xl, &srcloc, NULL);
+ for (i = 0; i < ec->nodes; i++) {
+ if (output[i]) {
+ link[i] = 1;
+ } else {
+ if (replies[i].op_errno == ENOENT ||
+ replies[i].op_errno == ESTALE) {
+ create[i] = 1;
+ }
+ }
+ }
+
+ if (EC_COUNT(link, ec->nodes)) {
+ cluster_link(ec->xl_list, link, ec->nodes, replies, output1,
+ frame, ec->xl, &srcloc, &loc, NULL);
+ }
+
+ if (EC_COUNT(create, ec->nodes)) {
+ cluster_readlink(ec->xl_list, name_data.same, ec->nodes,
+ replies, output, frame, ec->xl, &srcloc, 4096,
+ NULL);
+ if (EC_COUNT(output, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (output[i])
+ break;
+ }
+ linkname = alloca0(strlen(replies[i].buf) + 1);
+ strcpy(linkname, replies[i].buf);
+ ec_set_new_entry_dirty(ec, &loc, ia, frame, ec->xl, on);
+ cluster_symlink(ec->xl_list, create, ec->nodes, replies, output,
+ frame, ec->xl, linkname, &loc, 0, xdata);
+ }
+ for (i = 0; i < ec->nodes; i++)
+ if (output1[i])
+ output[i] = 1;
+ break;
+ case IA_IFREG:
+ ec_set_new_entry_dirty(ec, &loc, ia, frame, ec->xl, on);
+ config.version = EC_CONFIG_VERSION;
+ config.algorithm = EC_CONFIG_ALGORITHM;
+ config.gf_word_size = EC_GF_BITS;
+ config.bricks = ec->nodes;
+ config.redundancy = ec->redundancy;
+ config.chunk_size = EC_METHOD_CHUNK_SIZE;
+
+ ret = ec_dict_set_config(xdata, EC_XATTR_CONFIG, &config);
+ if (ret != 0) {
+ goto out;
+ }
+
+ /* Fall through */
+
+ default:
+ ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+ if (ret)
+ goto out;
+ ret = cluster_mknod(
+ ec->xl_list, enoent, ec->nodes, replies, output, frame, ec->xl,
+ &loc, st_mode_from_ia(ia->ia_prot, ia->ia_type),
+ makedev(ia_major(ia->ia_rdev), ia_minor(ia->ia_rdev)), 0,
+ xdata);
+ break;
+ }
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (enoent[i] && !output[i])
+ participants[i] = 0;
+ }
+
+ ret = 0;
+out:
+ if (ret < 0)
+ gf_msg_debug(ec->xl->name, 0, "%s/%s: heal failed %s",
+ uuid_utoa(parent->gfid), name, strerror(-ret));
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ loc_wipe(&srcloc);
+ if (xdata)
+ dict_unref(xdata);
+ return ret;
+}
+
+int
+__ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
+ unsigned char *participants)
+{
+ unsigned char *output = NULL;
+ unsigned char *enoent = NULL;
+ default_args_cbk_t *replies = NULL;
+ dict_t *xdata = NULL;
+ dict_t *gfid_db = NULL;
+ int ret = 0;
+ loc_t loc = {0};
+ int i = 0;
+ struct iatt *ia = NULL;
+ char gfid[64] = {0};
+ unsigned char *same = NULL;
+ unsigned char *gfidless = NULL;
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ loc.parent = inode_ref(parent);
+ loc.inode = inode_new(parent->table);
+ gf_uuid_copy(loc.pargfid, parent->gfid);
+ loc.name = name;
+ xdata = dict_new();
+ gfid_db = dict_new();
+ if (!xdata || !gfid_db || !loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_int32(xdata, GF_GFIDLESS_LOOKUP, 1);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ output = alloca0(ec->nodes);
+ gfidless = alloca0(ec->nodes);
+ enoent = alloca0(ec->nodes);
+ ret = cluster_lookup(ec->xl_list, participants, ec->nodes, replies, output,
+ frame, ec->xl, &loc, NULL);
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
+
+ if (replies[i].op_ret == -1) {
+ /*If ESTALE comes here, that means parent dir is not
+ * present, nothing to do there, so reset participants
+ * for that brick*/
+ if (replies[i].op_errno == ENOENT)
+ enoent[i] = 1;
+ else
+ participants[i] = 0;
+ continue;
+ }
+ ia = &replies[i].stat;
+ if (gf_uuid_is_null(ia->ia_gfid)) {
+ if (IA_ISDIR(ia->ia_type) || ia->ia_size == 0)
+ gfidless[i] = 1;
+ else
+ participants[i] = 0;
+ } else {
+ uuid_utoa_r(ia->ia_gfid, gfid);
+ ret = dict_get_bin(gfid_db, gfid, (void **)&same);
+ if (ret < 0) {
+ same = alloca0(ec->nodes);
+ }
+ same[i] = 1;
+ if (ret < 0) {
+ ret = dict_set_static_bin(gfid_db, gfid, same, ec->nodes);
+ }
+ if (ret < 0)
+ goto out;
+ }
+ }
+
+ ret = ec_delete_stale_names(frame, ec, parent, name, replies, gfid_db,
+ enoent, gfidless, participants);
+
+ if (gfid_db->count == 0) {
+ /* All entries seem to be stale entries and deleted,
+ * nothing more to do.*/
+ goto out;
+ }
+
+ if (gfid_db->count > 1) {
+ gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
+ "%s/%s: Not able to heal", uuid_utoa(parent->gfid), name);
+ memset(participants, 0, ec->nodes);
+ goto out;
+ }
+
+ EC_INTERSECT(enoent, enoent, participants, ec->nodes);
+ if (EC_COUNT(enoent, ec->nodes) == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = ec_create_name(frame, ec, parent, name, replies, gfid_db, enoent,
+ participants);
+ if (ret >= 0) {
+ /* If ec_create_name() succeeded we return 1 to indicate that a new
+ * file has been created and it will need to be healed. */
+ ret = 1;
+ }
+out:
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ if (xdata)
+ dict_unref(xdata);
+ if (gfid_db)
+ dict_unref(gfid_db);
+ return ret;
+}
+
+int
+ec_heal_name(call_frame_t *frame, ec_t *ec, inode_t *parent, char *name,
+ unsigned char *participants)
+{
+ int ret = 0;
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ unsigned char *locked_on = NULL;
+ loc_t loc = {0};
+
+ loc.parent = inode_ref(parent);
+ loc.name = name;
+ loc.inode = inode_new(parent->table);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ output = alloca0(ec->nodes);
+ locked_on = alloca0(ec->nodes);
+ ret = cluster_inodelk(ec->xl_list, participants, ec->nodes, replies,
+ locked_on, frame, ec->xl, ec->xl->name, parent, 0, 0);
+ {
+ if (ret <= ec->fragments) {
+ gf_msg_debug(ec->xl->name, 0,
+ "%s/%s: Skipping "
+ "heal as only %d number of subvolumes could "
+ "be locked",
+ uuid_utoa(parent->gfid), name, ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ EC_INTERSECT(participants, participants, locked_on, ec->nodes);
+ ret = __ec_heal_name(frame, ec, parent, name, participants);
+ }
+unlock:
+ cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
+ ec->xl, ec->xl->name, parent, 0, 0);
+out:
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
+ec_name_heal_handler(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct ec_name_data *name_data = data;
+ xlator_t *this = THIS;
+ ec_t *ec = this->private;
+ unsigned char *name_on = alloca0(ec->nodes);
+ int i = 0;
+ int ret = 0;
+
+ if (ec->shutdown) {
+ gf_msg_debug(this->name, 0,
+ "Cancelling directory heal "
+ "because EC is stopping.");
+ return -ENOTCONN;
+ }
+
+ memcpy(name_on, name_data->participants, ec->nodes);
+ ret = ec_heal_name(name_data->frame, ec, parent->inode, entry->d_name,
+ name_on);
+
+ if (ret < 0) {
+ memset(name_on, 0, ec->nodes);
+ } else {
+ name_data->heal_pending += ret;
+ }
+
+ for (i = 0; i < ec->nodes; i++)
+ if (name_data->participants[i] && !name_on[i])
+ name_data->failed_on[i] = 1;
+
+ return 0;
+}
+
+int
+ec_heal_names(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *participants, uint32_t *pending)
+{
+ int i = 0;
+ int j = 0;
+ loc_t loc = {0};
+ struct ec_name_data name_data = {0};
+ int ret = 0;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ name_data.frame = frame;
+ name_data.participants = participants;
+ name_data.failed_on = alloca0(ec->nodes);
+ name_data.heal_pending = 0;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!participants[i])
+ continue;
+ ret = syncop_dir_scan(ec->xl_list[i], &loc, GF_CLIENT_PID_SELF_HEALD,
+ &name_data, ec_name_heal_handler);
+ if (ret < 0) {
+ break;
+ }
+ for (j = 0; j < ec->nodes; j++)
+ if (name_data.failed_on[j])
+ participants[j] = 0;
+
+ if (EC_COUNT(participants, ec->nodes) <= ec->fragments) {
+ ret = -ENOTCONN;
+ break;
+ }
+ }
+ *pending += name_data.heal_pending;
+
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
+__ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *heal_on, unsigned char *sources,
+ unsigned char *healed_sinks, uint32_t *pending)
+{
+ unsigned char *locked_on = NULL;
+ unsigned char *output = NULL;
+ uint64_t *versions = NULL;
+ uint64_t *dirty = NULL;
+ unsigned char *participants = NULL;
+ default_args_cbk_t *replies = NULL;
+ int ret = 0;
+ int source = 0;
+ int i = 0;
+
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ versions = alloca0(ec->nodes * sizeof(*versions));
+ dirty = alloca0(ec->nodes * sizeof(*dirty));
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ ret = cluster_inodelk(ec->xl_list, heal_on, ec->nodes, replies, locked_on,
+ frame, ec->xl, ec->xl->name, inode, 0, 0);
+ {
+ if (ret <= ec->fragments) {
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked",
+ uuid_utoa(inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ ret = __ec_heal_entry_prepare(frame, ec, inode, locked_on, versions,
+ dirty, sources, healed_sinks);
+ source = ret;
+ }
+unlock:
+ cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
+ ec->xl, ec->xl->name, inode, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ participants = alloca0(ec->nodes);
+ for (i = 0; i < ec->nodes; i++) {
+ if (sources[i] || healed_sinks[i])
+ participants[i] = 1;
+ }
+ ret = ec_heal_names(frame, ec, inode, participants, pending);
+
+ if (EC_COUNT(participants, ec->nodes) <= ec->fragments)
+ goto out;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!participants[i]) {
+ sources[i] = 0;
+ healed_sinks[i] = 0;
+ }
+ }
+
+ ec_adjust_versions(frame, ec, EC_DATA_TXN, inode, source, sources,
+ healed_sinks, versions, dirty);
+out:
+ cluster_replies_wipe(replies, ec->nodes);
+ return ret;
+}
+
+int
+ec_heal_entry(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *sources, unsigned char *healed_sinks,
+ uint32_t *pending)
+{
+ unsigned char *locked_on = NULL;
+ unsigned char *up_subvols = NULL;
+ unsigned char *output = NULL;
+ char selfheal_domain[1024] = {0};
+ int ret = 0;
+ default_args_cbk_t *replies = NULL;
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ up_subvols = alloca0(ec->nodes);
+
+ sprintf(selfheal_domain, "%s:self-heal", ec->xl->name);
+ ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes);
+ /*If other processes are already doing the heal, don't block*/
+ ret = cluster_tiebreaker_inodelk(ec->xl_list, up_subvols, ec->nodes,
+ replies, locked_on, frame, ec->xl,
+ selfheal_domain, inode, 0, 0);
+ {
+ if (ret <= ec->fragments) {
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked",
+ uuid_utoa(inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ ret = __ec_heal_entry(frame, ec, inode, locked_on, sources,
+ healed_sinks, pending);
+ }
+unlock:
+ cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
+ ec->xl, selfheal_domain, inode, 0, 0);
+ cluster_replies_wipe(replies, ec->nodes);
+ return ret;
+}
+
+/*Find direction for data heal and heal info*/
+int
+ec_heal_data_find_direction(ec_t *ec, default_args_cbk_t *replies,
+ uint64_t *data_versions, uint64_t *dirty,
+ uint64_t *size, unsigned char *sources,
+ unsigned char *healed_sinks,
+ gf_boolean_t check_ondisksize, int which)
+{
+ uint64_t xattr[EC_VERSION_SIZE] = {0};
+ char version_size[128] = {0};
+ dict_t *version_size_db = NULL;
+ unsigned char *same = NULL;
+ int max_same_count = 0;
+ int source = 0;
+ int i = 0;
+ int ret = 0;
+ dict_t *dict = NULL;
+ uint64_t source_size = 0;
+
+ version_size_db = dict_new();
+ if (!version_size_db) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!replies[i].valid)
+ continue;
+ if (replies[i].op_ret < 0)
+ continue;
+ dict = (which == EC_COMBINE_XDATA) ? replies[i].xdata
+ : replies[i].xattr;
+
+ ret = ec_dict_get_array(dict, EC_XATTR_VERSION, xattr, EC_VERSION_SIZE);
+ if (ret == 0) {
+ data_versions[i] = xattr[EC_DATA_TXN];
+ }
+
+ memset(xattr, 0, sizeof(xattr));
+ ret = ec_dict_get_array(dict, EC_XATTR_DIRTY, xattr, EC_VERSION_SIZE);
+ if (ret == 0) {
+ dirty[i] = xattr[EC_DATA_TXN];
+ }
+ ret = ec_dict_del_number(dict, EC_XATTR_SIZE, &size[i]);
+ /*Build a db of same metadata and data version and size*/
+ snprintf(version_size, sizeof(version_size), "%" PRIu64 "-%" PRIu64,
+ data_versions[i], size[i]);
+
+ ret = dict_get_bin(version_size_db, version_size, (void **)&same);
+ if (ret < 0) {
+ same = alloca0(ec->nodes);
+ }
+
+ same[i] = 1;
+ if (max_same_count < EC_COUNT(same, ec->nodes)) {
+ max_same_count = EC_COUNT(same, ec->nodes);
+ source = i;
+ }
+
+ if (ret < 0) {
+ ret = dict_set_static_bin(version_size_db, version_size, same,
+ ec->nodes);
+ }
+
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ /* If we don't have ec->fragments number of same version,size it is not
+ * recoverable*/
+ if (max_same_count < ec->fragments) {
+ ret = -EIO;
+ goto out;
+ } else {
+ snprintf(version_size, sizeof(version_size), "%" PRIu64 "-%" PRIu64,
+ data_versions[source], size[source]);
+
+ ret = dict_get_bin(version_size_db, version_size, (void **)&same);
+ if (ret < 0)
+ goto out;
+ memcpy(sources, same, ec->nodes);
+ for (i = 0; i < ec->nodes; i++) {
+ if (replies[i].valid && (replies[i].op_ret == 0) && !sources[i])
+ healed_sinks[i] = 1;
+ }
+ }
+
+ /* There could be files with versions, size same but on disk ia_size
+ * could be different because of disk crashes, mark them as sinks as
+ * well*/
+
+ if (check_ondisksize) {
+ source_size = size[source];
+ ec_adjust_size_up(ec, &source_size, _gf_true);
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (sources[i]) {
+ if (replies[i].stat.ia_size != source_size) {
+ sources[i] = 0;
+ healed_sinks[i] = 1;
+ max_same_count--;
+ } else {
+ source = i;
+ }
+ }
+ }
+ if (max_same_count < ec->fragments) {
+ ret = -EIO;
+ goto out;
+ }
+ }
+
+ ret = source;
+out:
+ if (version_size_db)
+ dict_unref(version_size_db);
+ return ret;
+}
+
+int
+__ec_heal_data_prepare(call_frame_t *frame, ec_t *ec, fd_t *fd,
+ unsigned char *locked_on, uint64_t *versions,
+ uint64_t *dirty, uint64_t *size, unsigned char *sources,
+ unsigned char *healed_sinks, unsigned char *trim,
+ struct iatt *stbuf)
+{
+ default_args_cbk_t *replies = NULL;
+ default_args_cbk_t *fstat_replies = NULL;
+ unsigned char *output = NULL;
+ unsigned char *fstat_output = NULL;
+ dict_t *xattrs = NULL;
+ uint64_t zero_array[2] = {0};
+ int source = 0;
+ int ret = 0;
+ uint64_t zero_value = 0;
+ int i = 0;
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ EC_REPLIES_ALLOC(fstat_replies, ec->nodes);
+ output = alloca0(ec->nodes);
+ fstat_output = alloca0(ec->nodes);
+ xattrs = dict_new();
+ if (!xattrs ||
+ dict_set_static_bin(xattrs, EC_XATTR_VERSION, zero_array,
+ sizeof(zero_array)) ||
+ dict_set_static_bin(xattrs, EC_XATTR_DIRTY, zero_array,
+ sizeof(zero_array)) ||
+ dict_set_static_bin(xattrs, EC_XATTR_SIZE, &zero_value,
+ sizeof(zero_value))) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = cluster_fxattrop(ec->xl_list, locked_on, ec->nodes, replies, output,
+ frame, ec->xl, fd, GF_XATTROP_ADD_ARRAY64, xattrs,
+ NULL);
+
+ ret = cluster_fstat(ec->xl_list, locked_on, ec->nodes, fstat_replies,
+ fstat_output, frame, ec->xl, fd, NULL);
+
+ for (i = 0; i < ec->nodes; i++) {
+ output[i] = output[i] && fstat_output[i];
+ replies[i].valid = output[i];
+ if (output[i])
+ replies[i].stat = fstat_replies[i].stat;
+ }
+
+ if (EC_COUNT(output, ec->nodes) <= ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ source = ec_heal_data_find_direction(ec, replies, versions, dirty, size,
+ sources, healed_sinks, _gf_true,
+ EC_COMBINE_DICT);
+ ret = source;
+ if (ret < 0)
+ goto out;
+
+ if (stbuf)
+ *stbuf = replies[source].stat;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (healed_sinks[i]) {
+ if (replies[i].stat.ia_size)
+ trim[i] = 1;
+ }
+ }
+
+ if (EC_COUNT(sources, ec->nodes) < ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ ret = source;
+out:
+ if (xattrs)
+ dict_unref(xattrs);
+ cluster_replies_wipe(replies, ec->nodes);
+ cluster_replies_wipe(fstat_replies, ec->nodes);
+ if (ret < 0) {
+ gf_msg_debug(ec->xl->name, 0, "%s: heal failed %s",
+ uuid_utoa(fd->inode->gfid), strerror(-ret));
+ } else {
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: sources: %d, sinks: "
+ "%d",
+ uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes),
+ EC_COUNT(healed_sinks, ec->nodes));
+ }
+ return ret;
+}
+
+int
+__ec_heal_mark_sinks(call_frame_t *frame, ec_t *ec, fd_t *fd,
+ uint64_t *versions, unsigned char *healed_sinks)
+{
+ int i = 0;
+ int ret = 0;
+ unsigned char *mark = NULL;
+ dict_t *xattrs = NULL;
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ uint64_t versions_xattr[2] = {0};
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ xattrs = dict_new();
+ if (!xattrs) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ mark = alloca0(ec->nodes);
+ for (i = 0; i < ec->nodes; i++) {
+ if (!healed_sinks[i])
+ continue;
+ if ((versions[i] >> EC_SELFHEAL_BIT) & 1)
+ continue;
+ mark[i] = 1;
+ }
+
+ if (EC_COUNT(mark, ec->nodes) == 0)
+ return 0;
+
+ versions_xattr[EC_DATA_TXN] = hton64(1ULL << EC_SELFHEAL_BIT);
+ if (dict_set_static_bin(xattrs, EC_XATTR_VERSION, versions_xattr,
+ sizeof(versions_xattr))) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ output = alloca0(ec->nodes);
+ ret = cluster_fxattrop(ec->xl_list, mark, ec->nodes, replies, output, frame,
+ ec->xl, fd, GF_XATTROP_ADD_ARRAY64, xattrs, NULL);
+ for (i = 0; i < ec->nodes; i++) {
+ if (!output[i]) {
+ if (mark[i])
+ healed_sinks[i] = 0;
+ continue;
+ }
+ versions[i] |= (1ULL << EC_SELFHEAL_BIT);
+ }
+
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+ ret = 0;
+
+out:
+ cluster_replies_wipe(replies, ec->nodes);
+ if (xattrs)
+ dict_unref(xattrs);
+ if (ret < 0)
+ gf_msg_debug(ec->xl->name, 0, "%s: heal failed %s",
+ uuid_utoa(fd->inode->gfid), strerror(-ret));
+ return ret;
+}
+
+int32_t
+ec_manager_heal_block(ec_fop_data_t *fop, int32_t state)
+{
+ ec_heal_t *heal = fop->data;
+ heal->fop = fop;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ ec_owner_set(fop->frame, fop->frame->root);
+
+ ec_heal_inodelk(heal, F_WRLCK, 1, 0, 0);
+
+ return EC_STATE_HEAL_DATA_COPY;
+
+ case EC_STATE_HEAL_DATA_COPY:
+ gf_msg_debug(fop->xl->name, 0, "%s: read/write starting",
+ uuid_utoa(heal->fd->inode->gfid));
+ ec_heal_data_block(heal);
+
+ return EC_STATE_HEAL_DATA_UNLOCK;
+
+ case -EC_STATE_HEAL_DATA_COPY:
+ case -EC_STATE_HEAL_DATA_UNLOCK:
+ case EC_STATE_HEAL_DATA_UNLOCK:
+ ec_heal_inodelk(heal, F_UNLCK, 1, 0, 0);
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ if (fop->cbks.heal) {
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, 0, 0,
+ (heal->good | heal->bad), heal->good, heal->bad,
+ 0, NULL);
+ }
+
+ return EC_STATE_END;
+ case -EC_STATE_REPORT:
+ if (fop->cbks.heal) {
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, -1,
+ fop->error, 0, 0, 0, 0, NULL);
+ }
+
+ return EC_STATE_END;
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, 0, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+/*Takes lock */
+void
+ec_heal_block(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_heal_cbk_t func, ec_heal_t *heal)
+{
+ ec_cbk_t callback = {.heal = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(HEAL) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags,
+ NULL, ec_manager_heal_block, callback, heal);
+ if (fop == NULL)
+ goto out;
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, heal, this, -1, error, 0, 0, 0, 0, NULL);
+ }
+}
+
+int32_t
+ec_heal_block_done(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uintptr_t mask,
+ uintptr_t good, uintptr_t bad, uint32_t pending,
+ dict_t *xdata)
+{
+ ec_heal_t *heal = cookie;
+
+ if (heal->fop) {
+ heal->fop->heal = NULL;
+ }
+ heal->fop = NULL;
+ heal->error = op_ret < 0 ? op_errno : 0;
+ syncbarrier_wake(heal->data);
+ return 0;
+}
+
+int
+ec_sync_heal_block(call_frame_t *frame, xlator_t *this, ec_heal_t *heal)
+{
+ ec_heal_block(frame, this, heal->bad | heal->good, EC_MINIMUM_ONE,
+ ec_heal_block_done, heal);
+ syncbarrier_wait(heal->data, 1);
+ if (heal->error != 0) {
+ return -heal->error;
+ }
+ if (heal->bad == 0)
+ return -ENOTCONN;
+ return 0;
+}
+
+int
+ec_rebuild_data(call_frame_t *frame, ec_t *ec, fd_t *fd, uint64_t size,
+ unsigned char *sources, unsigned char *healed_sinks)
+{
+ ec_heal_t *heal = NULL;
+ int ret = 0;
+ syncbarrier_t barrier;
+
+ if (syncbarrier_init(&barrier))
+ return -ENOMEM;
+
+ heal = alloca0(sizeof(*heal));
+ heal->fd = fd_ref(fd);
+ heal->xl = ec->xl;
+ heal->data = &barrier;
+ ec_adjust_size_up(ec, &size, _gf_false);
+ heal->total_size = size;
+ heal->size = (128 * GF_UNIT_KB * (ec->self_heal_window_size));
+ /* We need to adjust the size to a multiple of the stripe size of the
+ * volume. Otherwise writes would need to fill gaps (head and/or tail)
+ * with existent data from the bad bricks. This could be garbage on a
+ * damaged file or it could fail if there aren't enough bricks. */
+ heal->size -= heal->size % ec->stripe_size;
+ heal->bad = ec_char_array_to_mask(healed_sinks, ec->nodes);
+ heal->good = ec_char_array_to_mask(sources, ec->nodes);
+ heal->iatt.ia_type = IA_IFREG;
+ LOCK_INIT(&heal->lock);
+
+ for (heal->offset = 0; (heal->offset < size) && !heal->done;
+ heal->offset += heal->size) {
+ /* We immediately abort any heal if a shutdown request has been
+ * received to avoid delays. The healing of this file will be
+ * restarted by another SHD or other client that accesses the
+ * file. */
+ if (ec->shutdown) {
+ gf_msg_debug(ec->xl->name, 0,
+ "Cancelling heal because "
+ "EC is stopping.");
+ ret = -ENOTCONN;
+ break;
+ }
+
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: sources: %d, sinks: "
+ "%d, offset: %" PRIu64 " bsize: %" PRIu64,
+ uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes),
+ EC_COUNT(healed_sinks, ec->nodes), heal->offset,
+ heal->size);
+ ret = ec_sync_heal_block(frame, ec->xl, heal);
+ if (ret < 0)
+ break;
+ }
+ memset(healed_sinks, 0, ec->nodes);
+ ec_mask_to_char_array(heal->bad, healed_sinks, ec->nodes);
+ fd_unref(heal->fd);
+ LOCK_DESTROY(&heal->lock);
+ syncbarrier_destroy(heal->data);
+ if (ret < 0)
+ gf_msg_debug(ec->xl->name, 0, "%s: heal failed %s",
+ uuid_utoa(fd->inode->gfid), strerror(-ret));
+ return ret;
+}
+
+int
+__ec_heal_trim_sinks(call_frame_t *frame, ec_t *ec, fd_t *fd,
+ unsigned char *healed_sinks, unsigned char *trim,
+ uint64_t size)
+{
+ default_args_cbk_t *replies = NULL;
+ unsigned char *output = NULL;
+ int ret = 0;
+ int i = 0;
+ off_t trim_offset = 0;
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ output = alloca0(ec->nodes);
+
+ if (EC_COUNT(trim, ec->nodes) == 0) {
+ ret = 0;
+ goto out;
+ }
+ trim_offset = size;
+ ec_adjust_offset_up(ec, &trim_offset, _gf_true);
+ ret = cluster_ftruncate(ec->xl_list, trim, ec->nodes, replies, output,
+ frame, ec->xl, fd, trim_offset, NULL);
+ for (i = 0; i < ec->nodes; i++) {
+ if (!output[i] && trim[i])
+ healed_sinks[i] = 0;
+ }
+
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+out:
+ cluster_replies_wipe(replies, ec->nodes);
+ if (ret < 0)
+ gf_msg_debug(ec->xl->name, 0, "%s: heal failed %s",
+ uuid_utoa(fd->inode->gfid), strerror(-ret));
+ return ret;
+}
+
+int
+ec_data_undo_pending(call_frame_t *frame, ec_t *ec, fd_t *fd, dict_t *xattr,
+ uint64_t *versions, uint64_t *dirty, uint64_t *size,
+ int source, gf_boolean_t erase_dirty, int idx)
+{
+ uint64_t versions_xattr[2] = {0};
+ uint64_t dirty_xattr[2] = {0};
+ uint64_t allzero[2] = {0};
+ uint64_t size_xattr = 0;
+ int ret = 0;
+
+ versions_xattr[EC_DATA_TXN] = hton64(versions[source] - versions[idx]);
+ ret = dict_set_static_bin(xattr, EC_XATTR_VERSION, versions_xattr,
+ sizeof(versions_xattr));
+ if (ret < 0)
+ goto out;
+
+ size_xattr = hton64(size[source] - size[idx]);
+ ret = dict_set_static_bin(xattr, EC_XATTR_SIZE, &size_xattr,
+ sizeof(size_xattr));
+ if (ret < 0)
+ goto out;
+
+ if (erase_dirty) {
+ dirty_xattr[EC_DATA_TXN] = hton64(-dirty[idx]);
+ ret = dict_set_static_bin(xattr, EC_XATTR_DIRTY, dirty_xattr,
+ sizeof(dirty_xattr));
+ if (ret < 0)
+ goto out;
+ }
+
+ if ((memcmp(versions_xattr, allzero, sizeof(allzero)) == 0) &&
+ (memcmp(dirty_xattr, allzero, sizeof(allzero)) == 0) &&
+ (size_xattr == 0)) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = syncop_fxattrop(ec->xl_list[idx], fd, GF_XATTROP_ADD_ARRAY64, xattr,
+ NULL, NULL, NULL);
+out:
+ return ret;
+}
+
+int
+__ec_fd_data_adjust_versions(call_frame_t *frame, ec_t *ec, fd_t *fd,
+ unsigned char *sources,
+ unsigned char *healed_sinks, uint64_t *versions,
+ uint64_t *dirty, uint64_t *size)
+{
+ dict_t *xattr = NULL;
+ int i = 0;
+ int ret = 0;
+ int op_ret = 0;
+ int source = -1;
+ gf_boolean_t erase_dirty = _gf_false;
+
+ xattr = dict_new();
+ if (!xattr) {
+ op_ret = -ENOMEM;
+ goto out;
+ }
+
+ /* dirty xattr represents if the file needs heal. Unless all the
+ * copies are healed, don't erase it */
+ if (EC_COUNT(sources, ec->nodes) + EC_COUNT(healed_sinks, ec->nodes) ==
+ ec->nodes)
+ erase_dirty = _gf_true;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (sources[i]) {
+ source = i;
+ break;
+ }
+ }
+
+ if (source == -1) {
+ op_ret = -ENOTCONN;
+ goto out;
+ }
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (healed_sinks[i]) {
+ ret = ec_data_undo_pending(frame, ec, fd, xattr, versions, dirty,
+ size, source, erase_dirty, i);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
+ if (!erase_dirty)
+ goto out;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (sources[i]) {
+ ret = ec_data_undo_pending(frame, ec, fd, xattr, versions, dirty,
+ size, source, erase_dirty, i);
+ if (ret < 0)
+ continue;
+ }
+ }
+out:
+ if (xattr)
+ dict_unref(xattr);
+ return op_ret;
+}
+
+int
+ec_restore_time_and_adjust_versions(call_frame_t *frame, ec_t *ec, fd_t *fd,
+ unsigned char *sources,
+ unsigned char *healed_sinks,
+ uint64_t *versions, uint64_t *dirty,
+ uint64_t *size)
+{
+ unsigned char *locked_on = NULL;
+ unsigned char *participants = NULL;
+ unsigned char *output = NULL;
+ default_args_cbk_t *replies = NULL;
+ unsigned char *postsh_sources = NULL;
+ unsigned char *postsh_healed_sinks = NULL;
+ unsigned char *postsh_trim = NULL;
+ uint64_t *postsh_versions = NULL;
+ uint64_t *postsh_dirty = NULL;
+ uint64_t *postsh_size = NULL;
+ int ret = 0;
+ int i = 0;
+ struct iatt source_buf = {0};
+ loc_t loc = {0};
+
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ participants = alloca0(ec->nodes);
+ postsh_sources = alloca0(ec->nodes);
+ postsh_healed_sinks = alloca0(ec->nodes);
+ postsh_trim = alloca0(ec->nodes);
+ postsh_versions = alloca0(ec->nodes * sizeof(*postsh_versions));
+ postsh_dirty = alloca0(ec->nodes * sizeof(*postsh_dirty));
+ postsh_size = alloca0(ec->nodes * sizeof(*postsh_size));
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (healed_sinks[i] || sources[i])
+ participants[i] = 1;
+ }
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ ret = cluster_inodelk(ec->xl_list, participants, ec->nodes, replies,
+ locked_on, frame, ec->xl, ec->xl->name, fd->inode, 0,
+ 0);
+ {
+ if (ret <= ec->fragments) {
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked",
+ uuid_utoa(fd->inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __ec_heal_data_prepare(frame, ec, fd, locked_on, postsh_versions,
+ postsh_dirty, postsh_size, postsh_sources,
+ postsh_healed_sinks, postsh_trim,
+ &source_buf);
+ if (ret < 0)
+ goto unlock;
+
+ loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(loc.gfid, fd->inode->gfid);
+ ret = cluster_setattr(
+ ec->xl_list, healed_sinks, ec->nodes, replies, output, frame,
+ ec->xl, &loc, &source_buf,
+ GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME | GF_SET_ATTR_CTIME, NULL);
+ EC_INTERSECT(healed_sinks, healed_sinks, output, ec->nodes);
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ ret = __ec_fd_data_adjust_versions(frame, ec, fd, sources, healed_sinks,
+ versions, dirty, size);
+ }
+unlock:
+ cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
+ ec->xl, ec->xl->name, fd->inode, 0, 0);
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
+__ec_heal_data(call_frame_t *frame, ec_t *ec, fd_t *fd, unsigned char *heal_on,
+ unsigned char *sources, unsigned char *healed_sinks)
+{
+ unsigned char *locked_on = NULL;
+ unsigned char *output = NULL;
+ uint64_t *versions = NULL;
+ uint64_t *dirty = NULL;
+ uint64_t *size = NULL;
+ unsigned char *trim = NULL;
+ default_args_cbk_t *replies = NULL;
+ int ret = 0;
+ int source = 0;
+
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ trim = alloca0(ec->nodes);
+ versions = alloca0(ec->nodes * sizeof(*versions));
+ dirty = alloca0(ec->nodes * sizeof(*dirty));
+ size = alloca0(ec->nodes * sizeof(*size));
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ ret = cluster_inodelk(ec->xl_list, heal_on, ec->nodes, replies, locked_on,
+ frame, ec->xl, ec->xl->name, fd->inode, 0, 0);
+ {
+ if (ret <= ec->fragments) {
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked",
+ uuid_utoa(fd->inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+
+ ret = __ec_heal_data_prepare(frame, ec, fd, locked_on, versions, dirty,
+ size, sources, healed_sinks, trim, NULL);
+ if (ret < 0)
+ goto unlock;
+
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0) {
+ ret = __ec_fd_data_adjust_versions(
+ frame, ec, fd, sources, healed_sinks, versions, dirty, size);
+ goto unlock;
+ }
+
+ source = ret;
+ ret = __ec_heal_mark_sinks(frame, ec, fd, versions, healed_sinks);
+ if (ret < 0)
+ goto unlock;
+
+ ret = __ec_heal_trim_sinks(frame, ec, fd, healed_sinks, trim,
+ size[source]);
+ }
+unlock:
+ cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
+ ec->xl, ec->xl->name, fd->inode, 0, 0);
+ if (ret < 0)
+ goto out;
+
+ if (EC_COUNT(healed_sinks, ec->nodes) == 0)
+ goto out;
+
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: sources: %d, sinks: "
+ "%d",
+ uuid_utoa(fd->inode->gfid), EC_COUNT(sources, ec->nodes),
+ EC_COUNT(healed_sinks, ec->nodes));
+
+ ret = ec_rebuild_data(frame, ec, fd, size[source], sources, healed_sinks);
+ if (ret < 0)
+ goto out;
+
+ ret = ec_restore_time_and_adjust_versions(
+ frame, ec, fd, sources, healed_sinks, versions, dirty, size);
+out:
+ cluster_replies_wipe(replies, ec->nodes);
+ return ret;
+}
+
+int
+ec_heal_data(call_frame_t *frame, ec_t *ec, gf_boolean_t block, inode_t *inode,
+ unsigned char *sources, unsigned char *healed_sinks)
+{
+ unsigned char *locked_on = NULL;
+ unsigned char *up_subvols = NULL;
+ unsigned char *output = NULL;
+ default_args_cbk_t *replies = NULL;
+ fd_t *fd = NULL;
+ loc_t loc = {0};
+ char selfheal_domain[1024] = {0};
+ int ret = 0;
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ up_subvols = alloca0(ec->nodes);
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ fd = fd_create(inode, 0);
+ if (!fd) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes);
+
+ ret = cluster_open(ec->xl_list, up_subvols, ec->nodes, replies, output,
+ frame, ec->xl, &loc, O_RDWR | O_LARGEFILE, fd, NULL);
+ if (ret <= ec->fragments) {
+ ret = -ENOTCONN;
+ goto out;
+ }
+
+ fd_bind(fd);
+ sprintf(selfheal_domain, "%s:self-heal", ec->xl->name);
+ /*If other processes are already doing the heal, don't block*/
+ if (block) {
+ ret = cluster_inodelk(ec->xl_list, output, ec->nodes, replies,
+ locked_on, frame, ec->xl, selfheal_domain, inode,
+ 0, 0);
+ } else {
+ ret = cluster_tiebreaker_inodelk(ec->xl_list, output, ec->nodes,
+ replies, locked_on, frame, ec->xl,
+ selfheal_domain, inode, 0, 0);
+ }
+ {
+ if (ret <= ec->fragments) {
+ gf_msg_debug(ec->xl->name, 0,
+ "%s: Skipping heal "
+ "as only %d number of subvolumes could "
+ "be locked",
+ uuid_utoa(inode->gfid), ret);
+ ret = -ENOTCONN;
+ goto unlock;
+ }
+ ret = __ec_heal_data(frame, ec, fd, locked_on, sources, healed_sinks);
+ }
+unlock:
+ cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
+ ec->xl, selfheal_domain, inode, 0, 0);
+out:
+ if (fd)
+ fd_unref(fd);
+ loc_wipe(&loc);
+ cluster_replies_wipe(replies, ec->nodes);
+ return ret;
+}
+
+int
+ec_heal_purge_stale_index(call_frame_t *frame, ec_t *ec, inode_t *inode)
+{
+ int i = 0;
+ int ret = 0;
+ dict_t **xattr = NULL;
+ loc_t loc = {0};
+ uint64_t dirty_xattr[EC_VERSION_SIZE] = {0};
+ unsigned char *on = NULL;
+ default_args_cbk_t *replies = NULL;
+ dict_t *dict = NULL;
+
+ /* Allocate the required memory */
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ on = alloca0(ec->nodes);
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ xattr = GF_CALLOC(ec->nodes, sizeof(*xattr), gf_common_mt_pointer);
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ dict = dict_new();
+ if (!dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ for (i = 0; i < ec->nodes; i++) {
+ xattr[i] = dict;
+ on[i] = 1;
+ }
+ ret = dict_set_static_bin(dict, EC_XATTR_DIRTY, dirty_xattr,
+ (sizeof(*dirty_xattr) * EC_VERSION_SIZE));
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ PARALLEL_FOP_ONLIST(ec->xl_list, on, ec->nodes, replies, frame,
+ ec_wind_xattrop_parallel, &loc, GF_XATTROP_ADD_ARRAY64,
+ xattr, NULL);
+out:
+ if (dict) {
+ dict_unref(dict);
+ }
+ if (xattr) {
+ GF_FREE(xattr);
+ }
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ return ret;
+}
+
+void
+ec_heal_do(xlator_t *this, void *data, loc_t *loc, int32_t partial)
+{
+ call_frame_t *frame = NULL;
+ unsigned char *participants = NULL;
+ unsigned char *msources = NULL;
+ unsigned char *mhealed_sinks = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *healed_sinks = NULL;
+ ec_t *ec = NULL;
+ int ret = 0;
+ int op_ret = 0;
+ int op_errno = 0;
+ intptr_t mgood = 0;
+ intptr_t mbad = 0;
+ intptr_t good = 0;
+ intptr_t bad = 0;
+ uint32_t pending = 0;
+ ec_fop_data_t *fop = data;
+ gf_boolean_t blocking = _gf_false;
+ ec_heal_need_t need_heal = EC_HEAL_NONEED;
+ unsigned char *up_subvols = NULL;
+ char up_bricks[32];
+
+ ec = this->private;
+
+ /* If it is heal request from getxattr, complete the heal and then
+ * unwind, if it is ec_heal with NULL as frame then no need to block
+ * the heal as the caller doesn't care about its completion. In case
+ * of heald whichever gets tiebreaking inodelk will take care of the
+ * heal, so no need to block*/
+ if (fop->req_frame && !ec->shd.iamshd)
+ blocking = _gf_true;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame)
+ goto out;
+
+ ec_owner_set(frame, frame->root);
+ /*Do heal as root*/
+ frame->root->uid = 0;
+ frame->root->gid = 0;
+ /*Mark the fops as internal*/
+ frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
+ participants = alloca0(ec->nodes);
+ ec_mask_to_char_array(ec->xl_up, participants, ec->nodes);
+
+ up_subvols = alloca0(ec->nodes);
+ ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes);
+
+ if (loc->name && strlen(loc->name)) {
+ ret = ec_heal_name(frame, ec, loc->parent, (char *)loc->name,
+ participants);
+ if (ret >= 0) {
+ gf_msg_debug(this->name, 0,
+ "%s: name heal "
+ "successful on %" PRIXPTR,
+ loc->path,
+ ec_char_array_to_mask(participants, ec->nodes));
+ } else {
+ gf_msg_debug(
+ this->name, 0,
+ "%s: name heal "
+ "failed. ret = %d, subvolumes up = %s",
+ loc->path, ret,
+ ec_bin(up_bricks, sizeof(up_bricks), ec->xl_up, ec->nodes));
+ }
+ }
+
+ /* Mount triggers heal only when it detects that it must need heal, shd
+ * triggers heals periodically which need not be thorough*/
+ if (ec->shd.iamshd && (ret <= 0)) {
+ ec_heal_inspect(frame, ec, loc->inode, up_subvols, _gf_false, _gf_false,
+ &need_heal);
+
+ if (need_heal == EC_HEAL_PURGE_INDEX) {
+ gf_msg(ec->xl->name, GF_LOG_INFO, 0, EC_MSG_HEAL_FAIL,
+ "Index entry needs to be purged for: %s ",
+ uuid_utoa(loc->gfid));
+ /* We need to send zero-xattrop so that stale index entry could be
+ * removed. We need not take lock on this entry to do so as
+ * xattrop on a brick is atomic. */
+ ec_heal_purge_stale_index(frame, ec, loc->inode);
+ goto out;
+ } else if (need_heal == EC_HEAL_NONEED) {
+ gf_msg(ec->xl->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
+ "Heal is not required for : %s ", uuid_utoa(loc->gfid));
+ goto out;
+ }
+ }
+
+ sources = alloca0(ec->nodes);
+ healed_sinks = alloca0(ec->nodes);
+ if (IA_ISREG(loc->inode->ia_type)) {
+ ret = ec_heal_data(frame, ec, blocking, loc->inode, sources,
+ healed_sinks);
+ } else if (IA_ISDIR(loc->inode->ia_type) && !partial) {
+ ret = ec_heal_entry(frame, ec, loc->inode, sources, healed_sinks,
+ &pending);
+ } else {
+ ret = 0;
+ memcpy(sources, participants, ec->nodes);
+ memcpy(healed_sinks, participants, ec->nodes);
+ }
+
+ if (ret == 0) {
+ good = ec_char_array_to_mask(sources, ec->nodes);
+ bad = ec_char_array_to_mask(healed_sinks, ec->nodes);
+ } else {
+ op_ret = -1;
+ op_errno = -ret;
+ }
+ msources = alloca0(ec->nodes);
+ mhealed_sinks = alloca0(ec->nodes);
+ ret = ec_heal_metadata(frame, ec, loc->inode, msources, mhealed_sinks);
+ if (ret == 0) {
+ mgood = ec_char_array_to_mask(msources, ec->nodes);
+ mbad = ec_char_array_to_mask(mhealed_sinks, ec->nodes);
+ } else {
+ op_ret = -1;
+ op_errno = -ret;
+ }
+
+out:
+ ec_reset_entry_healing(fop);
+ if (fop->cbks.heal) {
+ fop->cbks.heal(fop->req_frame, fop->data, fop->xl, op_ret, op_errno,
+ ec_char_array_to_mask(participants, ec->nodes),
+ mgood & good, mbad & bad, pending, NULL);
+ }
+ if (frame)
+ STACK_DESTROY(frame->root);
+
+ return;
+}
+
+int
+ec_synctask_heal_wrap(void *opaque)
+{
+ ec_fop_data_t *fop = opaque;
+ ec_heal_do(fop->xl, fop, &fop->loc[0], fop->int32);
+ return 0;
+}
+
+int
+ec_heal_done(int ret, call_frame_t *heal, void *opaque)
+{
+ if (opaque)
+ ec_fop_data_release(opaque);
+ return 0;
+}
+
+ec_fop_data_t *
+__ec_dequeue_heals(ec_t *ec)
+{
+ ec_fop_data_t *fop = NULL;
+
+ if (list_empty(&ec->heal_waiting))
+ goto none;
+
+ if ((ec->background_heals > 0) && (ec->healers >= ec->background_heals))
+ goto none;
+
+ fop = list_entry(ec->heal_waiting.next, ec_fop_data_t, healer);
+ ec->heal_waiters--;
+ list_del_init(&fop->healer);
+ list_add(&fop->healer, &ec->healing);
+ ec->healers++;
+ return fop;
+none:
+ gf_msg_debug(ec->xl->name, 0, "Num healers: %d, Num Waiters: %d",
+ ec->healers, ec->heal_waiters);
+ return NULL;
+}
+
+void
+ec_heal_fail(ec_t *ec, ec_fop_data_t *fop)
+{
+ if (fop->cbks.heal) {
+ fop->cbks.heal(fop->req_frame, fop->data, ec->xl, -1, fop->error, 0, 0,
+ 0, 0, NULL);
+ }
+ ec_fop_data_release(fop);
+}
+
+void
+ec_launch_heal(ec_t *ec, ec_fop_data_t *fop)
+{
+ int ret = 0;
+ call_frame_t *frame = NULL;
+
+ frame = create_frame(ec->xl, ec->xl->ctx->pool);
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+
+ ec_owner_set(frame, frame->root);
+ /*Do heal as root*/
+ frame->root->uid = 0;
+ frame->root->gid = 0;
+ /*Mark the fops as internal*/
+ frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
+
+ ret = synctask_new(ec->xl->ctx->env, ec_synctask_heal_wrap, ec_heal_done,
+ frame, fop);
+out:
+ if (ret < 0) {
+ ec_fop_set_error(fop, ENOMEM);
+ ec_heal_fail(ec, fop);
+ }
+
+ if (frame)
+ STACK_DESTROY(frame->root);
+}
+
+void
+ec_handle_healers_done(ec_fop_data_t *fop)
+{
+ ec_t *ec = fop->xl->private;
+ ec_fop_data_t *heal_fop = NULL;
+
+ if (list_empty(&fop->healer))
+ return;
+
+ LOCK(&ec->lock);
+
+ list_del_init(&fop->healer);
+
+ do {
+ ec->healers--;
+ heal_fop = __ec_dequeue_heals(ec);
+
+ if ((heal_fop != NULL) && ec->shutdown) {
+ /* This will prevent ec_handle_healers_done() to be
+ * called recursively. That would be problematic if
+ * the queue is too big. */
+ list_del_init(&heal_fop->healer);
+
+ UNLOCK(&ec->lock);
+
+ ec_fop_set_error(fop, ENOTCONN);
+ ec_heal_fail(ec, heal_fop);
+
+ LOCK(&ec->lock);
+ }
+ } while ((heal_fop != NULL) && ec->shutdown);
+
+ UNLOCK(&ec->lock);
+
+ if (heal_fop)
+ ec_launch_heal(ec, heal_fop);
+}
+
+gf_boolean_t
+ec_is_entry_healing(ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ int32_t heal_count = 0;
+ loc_t *loc = NULL;
+
+ loc = &fop->loc[0];
+
+ LOCK(&loc->inode->lock);
+ {
+ ctx = __ec_inode_get(loc->inode, fop->xl);
+ if (ctx) {
+ heal_count = ctx->heal_count;
+ }
+ }
+ UNLOCK(&loc->inode->lock);
+ GF_ASSERT(heal_count >= 0);
+ return heal_count;
+}
+
+void
+ec_heal_throttle(xlator_t *this, ec_fop_data_t *fop)
+{
+ gf_boolean_t can_heal = _gf_true;
+ ec_t *ec = this->private;
+ ec_fop_data_t *fop_rel = NULL;
+
+ if (fop->req_frame == NULL) {
+ LOCK(&ec->lock);
+ {
+ if ((ec->background_heals > 0) &&
+ (ec->heal_wait_qlen + ec->background_heals) >
+ (ec->heal_waiters + ec->healers)) {
+ if (!ec_is_entry_healing(fop)) {
+ list_add_tail(&fop->healer, &ec->heal_waiting);
+ ec->heal_waiters++;
+ ec_set_entry_healing(fop);
+ } else {
+ fop_rel = fop;
+ }
+ fop = __ec_dequeue_heals(ec);
+ } else {
+ can_heal = _gf_false;
+ }
+ }
+ UNLOCK(&ec->lock);
+ }
+
+ if (can_heal) {
+ if (fop) {
+ if (fop->req_frame != NULL) {
+ ec_set_entry_healing(fop);
+ }
+ ec_launch_heal(ec, fop);
+ }
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Max number of heals are "
+ "pending, background self-heal rejected");
+ ec_fop_set_error(fop, EBUSY);
+ ec_heal_fail(ec, fop);
+ }
+ if (fop_rel) {
+ ec_heal_done(0, NULL, fop_rel);
+ }
+}
+
+void
+ec_heal(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_heal_cbk_t func, void *data, loc_t *loc,
+ int32_t partial, dict_t *xdata)
+{
+ ec_cbk_t callback = {.heal = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t err = EINVAL;
+
+ gf_msg_trace("ec", 0, "EC(HEAL) %p", frame);
+
+ VALIDATE_OR_GOTO(this, fail);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, fail);
+
+ if (!loc || !loc->inode || gf_uuid_is_null(loc->inode->gfid))
+ goto fail;
+
+ if (frame && frame->local)
+ goto fail;
+ fop = ec_fop_data_allocate(frame, this, EC_FOP_HEAL, 0, target, fop_flags,
+ NULL, NULL, callback, data);
+
+ err = ENOMEM;
+
+ if (fop == NULL)
+ goto fail;
+
+ fop->int32 = partial;
+
+ if (loc) {
+ if (loc_copy(&fop->loc[0], loc) != 0)
+ goto fail;
+ }
+
+ if (xdata)
+ fop->xdata = dict_ref(xdata);
+
+ ec_heal_throttle(this, fop);
+
+ return;
+
+fail:
+ if (fop)
+ ec_fop_data_release(fop);
+ if (func)
+ func(frame, data, this, -1, err, 0, 0, 0, 0, NULL);
+}
+
+int
+ec_replace_heal_done(int ret, call_frame_t *heal, void *opaque)
+{
+ ec_t *ec = opaque;
+ gf_boolean_t last_fop = _gf_false;
+
+ if (GF_ATOMIC_DEC(ec->async_fop_count) == 0) {
+ LOCK(&ec->lock);
+ {
+ last_fop = __ec_is_last_fop(ec);
+ }
+ UNLOCK(&ec->lock);
+ }
+ gf_msg_debug(ec->xl->name, 0, "getxattr on bricks is done ret %d", ret);
+
+ if (last_fop)
+ ec_pending_fops_completed(ec);
+
+ return 0;
+}
+
+int32_t
+ec_replace_heal(ec_t *ec, inode_t *inode)
+{
+ loc_t loc = {0};
+ int ret = 0;
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ ret = syncop_getxattr(ec->xl, &loc, NULL, EC_XATTR_HEAL, NULL, NULL);
+ if (ret < 0)
+ gf_msg_debug(ec->xl->name, 0, "Heal failed for replace brick ret = %d",
+ ret);
+
+ /* Once the root inode has been checked, it might have triggered a
+ * self-heal on it after a replace brick command or for some other
+ * reason. It can also happen that the volume already had damaged
+ * files in the index, even if the heal on the root directory failed.
+ * In both cases we need to wake all index healers to continue
+ * healing remaining entries that are marked as dirty. */
+ ec_shd_index_healer_wake(ec);
+
+ loc_wipe(&loc);
+ return ret;
+}
+
+int32_t
+ec_replace_brick_heal_wrap(void *opaque)
+{
+ ec_t *ec = opaque;
+ inode_table_t *itable = NULL;
+ int32_t ret = -1;
+
+ if (ec->xl->itable)
+ itable = ec->xl->itable;
+ else
+ goto out;
+
+ if (xlator_is_cleanup_starting(ec->xl))
+ goto out;
+
+ ret = ec_replace_heal(ec, itable->root);
+out:
+ return ret;
+}
+
+int32_t
+ec_launch_replace_heal(ec_t *ec)
+{
+ int ret = -1;
+
+ ret = synctask_new(ec->xl->ctx->env, ec_replace_brick_heal_wrap,
+ ec_replace_heal_done, NULL, ec);
+
+ if (ret < 0) {
+ gf_msg_debug(ec->xl->name, 0, "Heal failed for replace brick ret = %d",
+ ret);
+ ec_replace_heal_done(-1, NULL, ec);
+ }
+
+ return ret;
+}
+
+int32_t
+ec_set_heal_info(dict_t **dict_rsp, char *status)
+{
+ dict_t *dict = NULL;
+ int ret = 0;
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = dict_set_str(dict, "heal-info", status);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_WARNING, -ret, EC_MSG_HEAL_FAIL,
+ "Failed to set heal-info key to "
+ "%s",
+ status);
+ dict_unref(dict);
+ dict = NULL;
+ }
+ *dict_rsp = dict;
+out:
+ return ret;
+}
+
+static int32_t
+_need_heal_calculate(ec_t *ec, uint64_t *dirty, unsigned char *sources,
+ gf_boolean_t self_locked, int32_t lock_count,
+ ec_heal_need_t *need_heal, uint64_t *versions)
+{
+ int i = 0;
+ int source_count = 0;
+
+ source_count = EC_COUNT(sources, ec->nodes);
+ if (source_count == ec->nodes) {
+ *need_heal = EC_HEAL_NONEED;
+ if (self_locked || lock_count == 0) {
+ for (i = 0; i < ec->nodes; i++) {
+ if (dirty[i] || (versions[i] != versions[0])) {
+ *need_heal = EC_HEAL_MUST;
+ goto out;
+ }
+ }
+ /* If lock count is 0, all dirty flags are 0 and all the
+ * versions are macthing then why are we here. It looks
+ * like something went wrong while removing the index entries
+ * after completing a successful heal or fop. In this case
+ * we need to remove this index entry to avoid triggering heal
+ * in a loop and causing lookups again and again*/
+ *need_heal = EC_HEAL_PURGE_INDEX;
+ } else {
+ for (i = 0; i < ec->nodes; i++) {
+ /* Since each lock can only increment the dirty
+ * count once, if dirty is > 1 it means that
+ * another operation has left the dirty count
+ * set and this indicates a problem in the
+ * inode.*/
+ if (dirty[i] > 1) {
+ *need_heal = EC_HEAL_MUST;
+ goto out;
+ }
+ if (dirty[i] != dirty[0] || (versions[i] != versions[0])) {
+ *need_heal = EC_HEAL_MAYBE;
+ }
+ }
+ }
+ } else {
+ *need_heal = EC_HEAL_MUST;
+ }
+
+out:
+ return source_count;
+}
+
+static int32_t
+ec_need_metadata_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
+ int32_t lock_count, gf_boolean_t self_locked,
+ gf_boolean_t thorough, ec_heal_need_t *need_heal)
+{
+ uint64_t *dirty = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *healed_sinks = NULL;
+ uint64_t *meta_versions = NULL;
+ int ret = 0;
+
+ sources = alloca0(ec->nodes);
+ healed_sinks = alloca0(ec->nodes);
+ dirty = alloca0(ec->nodes * sizeof(*dirty));
+ meta_versions = alloca0(ec->nodes * sizeof(*meta_versions));
+ ret = ec_heal_metadata_find_direction(ec, replies, meta_versions, dirty,
+ sources, healed_sinks);
+ if (ret < 0 && ret != -EIO) {
+ goto out;
+ }
+
+ ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
+ need_heal, meta_versions);
+out:
+ return ret;
+}
+
+static int32_t
+ec_need_data_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
+ int32_t lock_count, gf_boolean_t self_locked,
+ gf_boolean_t thorough, ec_heal_need_t *need_heal)
+{
+ uint64_t *dirty = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *healed_sinks = NULL;
+ uint64_t *data_versions = NULL;
+ uint64_t *size = NULL;
+ int ret = 0;
+
+ sources = alloca0(ec->nodes);
+ healed_sinks = alloca0(ec->nodes);
+ dirty = alloca0(ec->nodes * sizeof(*dirty));
+ data_versions = alloca0(ec->nodes * sizeof(*data_versions));
+ size = alloca0(ec->nodes * sizeof(*size));
+
+ /* When dd is going on and heal info is called there is a very good
+ * chance for on disk sizes to mismatch even though nothing is wrong
+ * we don't need ondisk size check there. But if the file is either
+ * self-locked or the caller wants a thorough check then make sure to
+ * perform on disk check also. */
+ ret = ec_heal_data_find_direction(
+ ec, replies, data_versions, dirty, size, sources, healed_sinks,
+ self_locked || thorough, EC_COMBINE_XDATA);
+ if (ret < 0 && ret != -EIO) {
+ goto out;
+ }
+
+ ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
+ need_heal, data_versions);
+out:
+ return ret;
+}
+
+static int32_t
+ec_need_entry_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
+ int32_t lock_count, gf_boolean_t self_locked,
+ gf_boolean_t thorough, ec_heal_need_t *need_heal)
+{
+ uint64_t *dirty = NULL;
+ unsigned char *sources = NULL;
+ unsigned char *healed_sinks = NULL;
+ uint64_t *data_versions = NULL;
+ int ret = 0;
+
+ sources = alloca0(ec->nodes);
+ healed_sinks = alloca0(ec->nodes);
+ dirty = alloca0(ec->nodes * sizeof(*dirty));
+ data_versions = alloca0(ec->nodes * sizeof(*data_versions));
+
+ ret = ec_heal_entry_find_direction(ec, replies, data_versions, dirty,
+ sources, healed_sinks);
+ if (ret < 0 && ret != -EIO) {
+ goto out;
+ }
+
+ ret = _need_heal_calculate(ec, dirty, sources, self_locked, lock_count,
+ need_heal, data_versions);
+out:
+ return ret;
+}
+
+static int32_t
+ec_need_heal(ec_t *ec, inode_t *inode, default_args_cbk_t *replies,
+ int32_t lock_count, gf_boolean_t self_locked,
+ gf_boolean_t thorough, ec_heal_need_t *need_heal)
+{
+ int ret = 0;
+
+ ret = ec_need_metadata_heal(ec, inode, replies, lock_count, self_locked,
+ thorough, need_heal);
+ if (ret < 0)
+ goto out;
+
+ if (*need_heal == EC_HEAL_MUST)
+ goto out;
+
+ if (inode->ia_type == IA_IFREG) {
+ ret = ec_need_data_heal(ec, inode, replies, lock_count, self_locked,
+ thorough, need_heal);
+ } else if (inode->ia_type == IA_IFDIR) {
+ ret = ec_need_entry_heal(ec, inode, replies, lock_count, self_locked,
+ thorough, need_heal);
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+ec_heal_inspect(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ unsigned char *locked_on, gf_boolean_t self_locked,
+ gf_boolean_t thorough, ec_heal_need_t *need_heal)
+{
+ loc_t loc = {0};
+ int i = 0;
+ int ret = 0;
+ dict_t *xdata = NULL;
+ uint64_t zero_array[2] = {0};
+ uint64_t zero_value = 0;
+ unsigned char *output = NULL;
+ default_args_cbk_t *replies = NULL;
+ int32_t lock_count = 0;
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ output = alloca0(ec->nodes);
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ xdata = dict_new();
+ if (!xdata ||
+ dict_set_static_bin(xdata, EC_XATTR_VERSION, zero_array,
+ sizeof(zero_array)) ||
+ dict_set_static_bin(xdata, EC_XATTR_DIRTY, zero_array,
+ sizeof(zero_array)) ||
+ dict_set_static_bin(xdata, EC_XATTR_SIZE, &zero_value,
+ sizeof(zero_value))) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ if (!self_locked) {
+ ret = dict_set_str(xdata, GLUSTERFS_INODELK_DOM_COUNT, ec->xl->name);
+ if (ret) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ ret = cluster_lookup(ec->xl_list, locked_on, ec->nodes, replies, output,
+ frame, ec->xl, &loc, xdata);
+
+ if (ret != ec->nodes) {
+ ret = ec->nodes;
+ *need_heal = EC_HEAL_MUST;
+ goto out;
+ }
+
+ if (self_locked)
+ goto need_heal;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (!output[i] || !replies[i].xdata) {
+ continue;
+ }
+ if ((dict_get_int32(replies[i].xdata, GLUSTERFS_INODELK_COUNT,
+ &lock_count) == 0) &&
+ lock_count > 0) {
+ break;
+ }
+ }
+need_heal:
+ ret = ec_need_heal(ec, inode, replies, lock_count, self_locked, thorough,
+ need_heal);
+out:
+ cluster_replies_wipe(replies, ec->nodes);
+ loc_wipe(&loc);
+ if (xdata) {
+ dict_unref(xdata);
+ }
+ return ret;
+}
+
+int32_t
+ec_heal_locked_inspect(call_frame_t *frame, ec_t *ec, inode_t *inode,
+ ec_heal_need_t *need_heal)
+{
+ unsigned char *locked_on = NULL;
+ unsigned char *up_subvols = NULL;
+ unsigned char *output = NULL;
+ default_args_cbk_t *replies = NULL;
+ int ret = 0;
+
+ EC_REPLIES_ALLOC(replies, ec->nodes);
+ locked_on = alloca0(ec->nodes);
+ output = alloca0(ec->nodes);
+ up_subvols = alloca0(ec->nodes);
+ ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes);
+
+ ret = cluster_inodelk(ec->xl_list, up_subvols, ec->nodes, replies,
+ locked_on, frame, ec->xl, ec->xl->name, inode, 0, 0);
+ if (ret != ec->nodes) {
+ *need_heal = EC_HEAL_MUST;
+ goto unlock;
+ }
+ ret = ec_heal_inspect(frame, ec, inode, locked_on, _gf_true, _gf_true,
+ need_heal);
+unlock:
+ cluster_uninodelk(ec->xl_list, locked_on, ec->nodes, replies, output, frame,
+ ec->xl, ec->xl->name, inode, 0, 0);
+ cluster_replies_wipe(replies, ec->nodes);
+ return ret;
+}
+
+int32_t
+ec_get_heal_info(xlator_t *this, loc_t *entry_loc, dict_t **dict_rsp)
+{
+ int ret = -ENOMEM;
+ ec_heal_need_t need_heal = EC_HEAL_NONEED;
+ call_frame_t *frame = NULL;
+ ec_t *ec = NULL;
+ unsigned char *up_subvols = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, entry_loc, out);
+
+ ec = this->private;
+ up_subvols = alloca0(ec->nodes);
+ ec_mask_to_char_array(ec->xl_up, up_subvols, ec->nodes);
+
+ if (EC_COUNT(up_subvols, ec->nodes) != ec->nodes) {
+ need_heal = EC_HEAL_MUST;
+ goto set_heal;
+ }
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ goto out;
+ }
+ ec_owner_set(frame, frame->root);
+ frame->root->uid = 0;
+ frame->root->gid = 0;
+ frame->root->pid = GF_CLIENT_PID_SELF_HEALD;
+
+ if (loc_copy(&loc, entry_loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+ goto out;
+ }
+ if (!loc.inode) {
+ ret = syncop_inode_find(this, this, loc.gfid, &loc.inode, NULL, NULL);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = ec_heal_inspect(frame, ec, loc.inode, up_subvols, _gf_false,
+ _gf_false, &need_heal);
+ if (ret == ec->nodes && need_heal != EC_HEAL_MAYBE) {
+ goto set_heal;
+ }
+ need_heal = EC_HEAL_NONEED;
+ ret = ec_heal_locked_inspect(frame, ec, loc.inode, &need_heal);
+ if (ret < 0)
+ goto out;
+set_heal:
+ if (need_heal == EC_HEAL_MUST) {
+ ret = ec_set_heal_info(dict_rsp, "heal");
+ } else {
+ ret = ec_set_heal_info(dict_rsp, "no-heal");
+ }
+out:
+ if (frame) {
+ STACK_DESTROY(frame->root);
+ }
+ loc_wipe(&loc);
+ return ret;
+}
diff --git a/xlators/cluster/ec/src/ec-heald.c b/xlators/cluster/ec/src/ec-heald.c
new file mode 100644
index 00000000000..5c1586bc9c5
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-heald.c
@@ -0,0 +1,681 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat-errno.h>
+#include "ec.h"
+#include "ec-messages.h"
+#include "ec-heald.h"
+#include "ec-mem-types.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
+#include "protocol-common.h"
+
+#define NTH_INDEX_HEALER(this, n) \
+ (&((((ec_t *)this->private))->shd.index_healers[n]))
+#define NTH_FULL_HEALER(this, n) \
+ (&((((ec_t *)this->private))->shd.full_healers[n]))
+
+gf_boolean_t
+ec_shd_is_subvol_local(xlator_t *this, int subvol)
+{
+ ec_t *ec = NULL;
+ gf_boolean_t is_local = _gf_false;
+ loc_t loc = {
+ 0,
+ };
+
+ ec = this->private;
+ loc.inode = this->itable->root;
+ syncop_is_subvol_local(ec->xl_list[subvol], &loc, &is_local);
+ return is_local;
+}
+
+char *
+ec_subvol_name(xlator_t *this, int subvol)
+{
+ ec_t *ec = NULL;
+
+ ec = this->private;
+ if (subvol < 0 || subvol > ec->nodes)
+ return NULL;
+
+ return ec->xl_list[subvol]->name;
+}
+
+int
+__ec_shd_healer_wait(struct subvol_healer *healer)
+{
+ ec_t *ec = NULL;
+ struct timespec wait_till = {
+ 0,
+ };
+ int ret = 0;
+
+ ec = healer->this->private;
+
+disabled_loop:
+ wait_till.tv_sec = gf_time() + ec->shd.timeout;
+
+ while (!healer->rerun) {
+ ret = pthread_cond_timedwait(&healer->cond, &healer->mutex, &wait_till);
+ if (ret == ETIMEDOUT)
+ break;
+ }
+
+ if (ec->shutdown) {
+ healer->running = _gf_false;
+ return -1;
+ }
+
+ ret = healer->rerun;
+ healer->rerun = 0;
+
+ if (!ec->shd.enabled || !ec->up)
+ goto disabled_loop;
+
+ return ret;
+}
+
+int
+ec_shd_healer_wait(struct subvol_healer *healer)
+{
+ int ret = 0;
+
+ pthread_mutex_lock(&healer->mutex);
+ {
+ ret = __ec_shd_healer_wait(healer);
+ }
+ pthread_mutex_unlock(&healer->mutex);
+
+ return ret;
+}
+
+int
+ec_shd_index_inode(xlator_t *this, xlator_t *subvol, inode_t **inode)
+{
+ loc_t rootloc = {
+ 0,
+ };
+ int ret = 0;
+ dict_t *xattr = NULL;
+ void *index_gfid = NULL;
+
+ *inode = NULL;
+ rootloc.inode = inode_ref(this->itable->root);
+ gf_uuid_copy(rootloc.gfid, rootloc.inode->gfid);
+
+ ret = syncop_getxattr(subvol, &rootloc, &xattr, GF_XATTROP_INDEX_GFID, NULL,
+ NULL);
+ if (ret < 0)
+ goto out;
+ if (!xattr) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = dict_get_ptr(xattr, GF_XATTROP_INDEX_GFID, &index_gfid);
+ if (ret)
+ goto out;
+
+ gf_msg_debug(this->name, 0, "index-dir gfid for %s: %s", subvol->name,
+ uuid_utoa(index_gfid));
+
+ ret = syncop_inode_find(this, subvol, index_gfid, inode, NULL, NULL);
+
+out:
+ loc_wipe(&rootloc);
+
+ if (xattr)
+ dict_unref(xattr);
+
+ return ret;
+}
+
+int
+ec_shd_index_purge(xlator_t *subvol, inode_t *inode, char *name)
+{
+ loc_t loc = {
+ 0,
+ };
+ int ret = 0;
+
+ loc.parent = inode_ref(inode);
+ loc.name = name;
+
+ ret = syncop_unlink(subvol, &loc, NULL, NULL);
+
+ loc_wipe(&loc);
+ return ret;
+}
+
+static gf_boolean_t
+ec_is_heal_completed(char *status)
+{
+ char *bad_pos = NULL;
+ char *zero_pos = NULL;
+
+ if (!status) {
+ return _gf_false;
+ }
+
+ /*Logic:
+ * Status will be of the form Good: <binary>, Bad: <binary>
+ * If heal completes, if we do strchr for '0' it should be present after
+ * 'Bad:' i.e. strRchr for ':'
+ * */
+
+ zero_pos = strchr(status, '0');
+ bad_pos = strrchr(status, ':');
+ if (!zero_pos || !bad_pos) {
+ /*malformed status*/
+ return _gf_false;
+ }
+
+ if (zero_pos > bad_pos) {
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+int
+ec_shd_selfheal(struct subvol_healer *healer, int child, loc_t *loc,
+ gf_boolean_t full)
+{
+ dict_t *xdata = NULL;
+ dict_t *dict = NULL;
+ uint32_t count;
+ int32_t ret;
+ char *heal_status = NULL;
+ ec_t *ec = healer->this->private;
+
+ GF_ATOMIC_INC(ec->stats.shd.attempted);
+ ret = syncop_getxattr(healer->this, loc, &dict, EC_XATTR_HEAL, NULL,
+ &xdata);
+ if (ret == 0) {
+ if (dict && (dict_get_str(dict, EC_XATTR_HEAL, &heal_status) == 0)) {
+ if (ec_is_heal_completed(heal_status)) {
+ GF_ATOMIC_INC(ec->stats.shd.completed);
+ }
+ }
+ }
+
+ if (!full && (loc->inode->ia_type == IA_IFDIR)) {
+ /* If we have just healed a directory, it's possible that
+ * other index entries have appeared to be healed. */
+ if ((xdata != NULL) &&
+ (dict_get_uint32(xdata, EC_XATTR_HEAL_NEW, &count) == 0) &&
+ (count > 0)) {
+ /* Force a rerun of the index healer. */
+ gf_msg_debug(healer->this->name, 0, "%d more entries to heal",
+ count);
+
+ healer->rerun = _gf_true;
+ }
+ }
+
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ if (dict) {
+ dict_unref(dict);
+ }
+
+ return ret;
+}
+
+int
+ec_shd_index_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+ int ret = 0;
+
+ ec = healer->this->private;
+ if (ec->xl_up_count <= ec->fragments) {
+ return -ENOTCONN;
+ }
+ if (!ec->shd.enabled)
+ return -EBUSY;
+
+ gf_msg_debug(healer->this->name, 0, "got entry: %s", entry->d_name);
+
+ ret = gf_uuid_parse(entry->d_name, loc.gfid);
+ if (ret)
+ return 0;
+
+ /* If this fails with ENOENT/ESTALE index is stale */
+ ret = syncop_gfid_to_path(healer->this->itable, subvol, loc.gfid,
+ (char **)&loc.path);
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_inode_find(healer->this, healer->this, loc.gfid, &loc.inode,
+ NULL, NULL);
+ if (ret < 0)
+ goto out;
+
+ ec_shd_selfheal(healer, healer->subvol, &loc, _gf_false);
+out:
+ if (ret == -ENOENT || ret == -ESTALE) {
+ gf_msg(healer->this->name, GF_LOG_DEBUG, 0, EC_MSG_HEAL_FAIL,
+ "Purging index for gfid %s:", uuid_utoa(loc.gfid));
+ ec_shd_index_purge(subvol, parent->inode, entry->d_name);
+ }
+ loc_wipe(&loc);
+
+ return 0;
+}
+
+int
+ec_shd_index_sweep(struct subvol_healer *healer)
+{
+ loc_t loc = {0};
+ ec_t *ec = NULL;
+ int ret = 0;
+ xlator_t *subvol = NULL;
+ dict_t *xdata = NULL;
+
+ ec = healer->this->private;
+ subvol = ec->xl_list[healer->subvol];
+
+ ret = ec_shd_index_inode(healer->this, subvol, &loc.inode);
+ if (ret < 0) {
+ gf_msg(healer->this->name, GF_LOG_WARNING, errno,
+ EC_MSG_INDEX_DIR_GET_FAIL, "unable to get index-dir on %s",
+ subvol->name);
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (!xdata || dict_set_int32(xdata, "get-gfid-type", 1)) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ _mask_cancellation();
+ ret = syncop_mt_dir_scan(NULL, subvol, &loc, GF_CLIENT_PID_SELF_HEALD,
+ healer, ec_shd_index_heal, xdata,
+ ec->shd.max_threads, ec->shd.wait_qlength);
+ _unmask_cancellation();
+out:
+ if (xdata)
+ dict_unref(xdata);
+ loc_wipe(&loc);
+
+ return ret;
+}
+
+int
+ec_shd_full_heal(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ struct subvol_healer *healer = data;
+ xlator_t *this = healer->this;
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+ int ret = 0;
+
+ ec = this->private;
+
+ if (this->cleanup_starting) {
+ return -ENOTCONN;
+ }
+
+ if (ec->xl_up_count <= ec->fragments) {
+ return -ENOTCONN;
+ }
+ if (!ec->shd.enabled)
+ return -EBUSY;
+
+ if (gf_uuid_is_null(entry->d_stat.ia_gfid)) {
+ /* It's possible that an entry has been removed just after
+ * being seen in a directory but before getting its stat info.
+ * In this case we'll receive a NULL gfid here. Since the file
+ * doesn't exist anymore, we can safely ignore it. */
+ return 0;
+ }
+
+ loc.parent = inode_ref(parent->inode);
+ loc.name = entry->d_name;
+ gf_uuid_copy(loc.gfid, entry->d_stat.ia_gfid);
+
+ /* If this fails with ENOENT/ESTALE index is stale */
+ ret = syncop_gfid_to_path(this->itable, subvol, loc.gfid,
+ (char **)&loc.path);
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_inode_find(this, this, loc.gfid, &loc.inode, NULL, NULL);
+ if (ret < 0)
+ goto out;
+
+ ec_shd_selfheal(healer, healer->subvol, &loc, _gf_true);
+
+ ret = 0;
+
+out:
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
+ec_shd_full_sweep(struct subvol_healer *healer, inode_t *inode)
+{
+ ec_t *ec = NULL;
+ loc_t loc = {0};
+ int ret = -1;
+
+ ec = healer->this->private;
+ loc.inode = inode;
+ _mask_cancellation();
+ ret = syncop_ftw(ec->xl_list[healer->subvol], &loc,
+ GF_CLIENT_PID_SELF_HEALD, healer, ec_shd_full_heal);
+ _unmask_cancellation();
+ return ret;
+}
+
+void *
+ec_shd_index_healer(void *data)
+{
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+ int run = 0;
+
+ healer = data;
+ THIS = this = healer->this;
+ ec_t *ec = this->private;
+
+ for (;;) {
+ run = ec_shd_healer_wait(healer);
+ if (run == -1)
+ break;
+
+ if (ec->xl_up_count > ec->fragments) {
+ gf_msg_debug(this->name, 0, "starting index sweep on subvol %s",
+ ec_subvol_name(this, healer->subvol));
+ ec_shd_index_sweep(healer);
+ }
+ gf_msg_debug(this->name, 0, "finished index sweep on subvol %s",
+ ec_subvol_name(this, healer->subvol));
+ }
+
+ return NULL;
+}
+
+void *
+ec_shd_full_healer(void *data)
+{
+ struct subvol_healer *healer = NULL;
+ xlator_t *this = NULL;
+ loc_t rootloc = {0};
+
+ int run = 0;
+
+ healer = data;
+ THIS = this = healer->this;
+ ec_t *ec = this->private;
+
+ rootloc.inode = this->itable->root;
+ for (;;) {
+ run = ec_shd_healer_wait(healer);
+ if (run < 0) {
+ break;
+ } else if (run == 0) {
+ continue;
+ }
+
+ if (ec->xl_up_count > ec->fragments) {
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_START,
+ "starting full sweep on subvol %s",
+ ec_subvol_name(this, healer->subvol));
+
+ ec_shd_selfheal(healer, healer->subvol, &rootloc, _gf_true);
+ ec_shd_full_sweep(healer, this->itable->root);
+ }
+
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_FULL_SWEEP_STOP,
+ "finished full sweep on subvol %s",
+ ec_subvol_name(this, healer->subvol));
+ }
+
+ return NULL;
+}
+
+int
+ec_shd_healer_init(xlator_t *this, struct subvol_healer *healer)
+{
+ int ret = 0;
+
+ ret = pthread_mutex_init(&healer->mutex, NULL);
+ if (ret)
+ goto out;
+
+ ret = pthread_cond_init(&healer->cond, NULL);
+ if (ret)
+ goto out;
+
+ healer->this = this;
+ healer->running = _gf_false;
+ healer->rerun = _gf_false;
+out:
+ return ret;
+}
+
+int
+ec_shd_healer_spawn(xlator_t *this, struct subvol_healer *healer,
+ void *(threadfn)(void *))
+{
+ int ret = 0;
+
+ pthread_mutex_lock(&healer->mutex);
+ {
+ if (healer->running) {
+ pthread_cond_signal(&healer->cond);
+ } else {
+ ret = gf_thread_create(&healer->thread, NULL, threadfn, healer,
+ "ecshd");
+ if (ret)
+ goto unlock;
+ healer->running = 1;
+ }
+
+ healer->rerun = 1;
+ }
+unlock:
+ pthread_mutex_unlock(&healer->mutex);
+
+ return ret;
+}
+
+int
+ec_shd_full_healer_spawn(xlator_t *this, int subvol)
+{
+ if (xlator_is_cleanup_starting(this))
+ return -1;
+
+ return ec_shd_healer_spawn(this, NTH_FULL_HEALER(this, subvol),
+ ec_shd_full_healer);
+}
+
+int
+ec_shd_index_healer_spawn(xlator_t *this, int subvol)
+{
+ if (xlator_is_cleanup_starting(this))
+ return -1;
+
+ return ec_shd_healer_spawn(this, NTH_INDEX_HEALER(this, subvol),
+ ec_shd_index_healer);
+}
+
+void
+ec_shd_index_healer_wake(ec_t *ec)
+{
+ int32_t i;
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (((ec->xl_up >> i) & 1) != 0) {
+ ec_shd_index_healer_spawn(ec->xl, i);
+ }
+ }
+}
+
+int
+ec_selfheal_daemon_init(xlator_t *this)
+{
+ ec_t *ec = NULL;
+ ec_self_heald_t *shd = NULL;
+ int ret = -1;
+ int i = 0;
+
+ ec = this->private;
+ shd = &ec->shd;
+
+ shd->index_healers = GF_CALLOC(sizeof(*shd->index_healers), ec->nodes,
+ ec_mt_subvol_healer_t);
+ if (!shd->index_healers)
+ goto out;
+
+ for (i = 0; i < ec->nodes; i++) {
+ shd->index_healers[i].subvol = i;
+ ret = ec_shd_healer_init(this, &shd->index_healers[i]);
+ if (ret)
+ goto out;
+ }
+
+ shd->full_healers = GF_CALLOC(sizeof(*shd->full_healers), ec->nodes,
+ ec_mt_subvol_healer_t);
+ if (!shd->full_healers)
+ goto out;
+
+ for (i = 0; i < ec->nodes; i++) {
+ shd->full_healers[i].subvol = i;
+ ret = ec_shd_healer_init(this, &shd->full_healers[i]);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+ec_heal_op(xlator_t *this, dict_t *output, gf_xl_afr_op_t op, int xl_id)
+{
+ char key[64] = {0};
+ int op_ret = 0;
+ ec_t *ec = NULL;
+ int i = 0;
+ GF_UNUSED int ret = 0;
+
+ ec = this->private;
+
+ op_ret = -1;
+ for (i = 0; i < ec->nodes; i++) {
+ snprintf(key, sizeof(key), "%d-%d-status", xl_id, i);
+
+ if (((ec->xl_up >> i) & 1) == 0) {
+ ret = dict_set_str(output, key, "Brick is not connected");
+ } else if (!ec->up) {
+ ret = dict_set_str(output, key, "Disperse subvolume is not up");
+ } else if (!ec_shd_is_subvol_local(this, i)) {
+ ret = dict_set_str(output, key, "Brick is remote");
+ } else {
+ ret = dict_set_str(output, key, "Started self-heal");
+ if (op == GF_SHD_OP_HEAL_FULL) {
+ ec_shd_full_healer_spawn(this, i);
+ } else if (op == GF_SHD_OP_HEAL_INDEX) {
+ ec_shd_index_healer_spawn(this, i);
+ }
+ op_ret = 0;
+ }
+ }
+ return op_ret;
+}
+
+int
+ec_xl_op(xlator_t *this, dict_t *input, dict_t *output)
+{
+ gf_xl_afr_op_t op = GF_SHD_OP_INVALID;
+ int ret = 0;
+ int xl_id = 0;
+
+ ret = dict_get_int32(input, "xl-op", (int32_t *)&op);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32(input, this->name, &xl_id);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32(output, this->name, xl_id);
+ if (ret)
+ goto out;
+
+ switch (op) {
+ case GF_SHD_OP_HEAL_FULL:
+ ret = ec_heal_op(this, output, op, xl_id);
+ break;
+
+ case GF_SHD_OP_HEAL_INDEX:
+ ret = ec_heal_op(this, output, op, xl_id);
+ break;
+
+ default:
+ ret = -1;
+ break;
+ }
+out:
+ dict_del(output, this->name);
+ return ret;
+}
+
+void
+ec_destroy_healer_object(xlator_t *this, struct subvol_healer *healer)
+{
+ if (!healer)
+ return;
+
+ pthread_cond_destroy(&healer->cond);
+ pthread_mutex_destroy(&healer->mutex);
+}
+
+void
+ec_selfheal_daemon_fini(xlator_t *this)
+{
+ struct subvol_healer *healer = NULL;
+ ec_self_heald_t *shd = NULL;
+ ec_t *priv = NULL;
+ int i = 0;
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ shd = &priv->shd;
+ if (!shd->iamshd)
+ return;
+
+ for (i = 0; i < priv->nodes; i++) {
+ healer = &shd->index_healers[i];
+ ec_destroy_healer_object(this, healer);
+
+ healer = &shd->full_healers[i];
+ ec_destroy_healer_object(this, healer);
+ }
+
+ GF_FREE(shd->index_healers);
+ GF_FREE(shd->full_healers);
+}
diff --git a/xlators/cluster/ec/src/ec-heald.h b/xlators/cluster/ec/src/ec-heald.h
new file mode 100644
index 00000000000..6c7da4edc10
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-heald.h
@@ -0,0 +1,30 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_HEALD_H__
+#define __EC_HEALD_H__
+
+#include "ec-types.h" // for ec_t
+#include "glusterfs/dict.h" // for dict_t
+#include "glusterfs/globals.h" // for xlator_t
+
+int
+ec_xl_op(xlator_t *this, dict_t *input, dict_t *output);
+
+int
+ec_selfheal_daemon_init(xlator_t *this);
+
+void
+ec_shd_index_healer_wake(ec_t *ec);
+
+void
+ec_selfheal_daemon_fini(xlator_t *this);
+
+#endif /* __EC_HEALD_H__ */
diff --git a/xlators/cluster/ec/src/ec-helpers.c b/xlators/cluster/ec/src/ec-helpers.c
new file mode 100644
index 00000000000..48f54475e01
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-helpers.c
@@ -0,0 +1,867 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <libgen.h>
+
+#include <glusterfs/byte-order.h>
+
+#include "ec.h"
+#include "ec-mem-types.h"
+#include "ec-messages.h"
+#include "ec-fops.h"
+#include "ec-method.h"
+#include "ec-helpers.h"
+
+static const char *ec_fop_list[] = {[-EC_FOP_HEAL] = "HEAL"};
+
+const char *
+ec_bin(char *str, size_t size, uint64_t value, int32_t digits)
+{
+ str += size;
+
+ if (size-- < 1) {
+ goto failed;
+ }
+ *--str = 0;
+
+ while ((value != 0) || (digits > 0)) {
+ if (size-- < 1) {
+ goto failed;
+ }
+ *--str = '0' + (value & 1);
+ digits--;
+ value >>= 1;
+ }
+
+ return str;
+
+failed:
+ return "<buffer too small>";
+}
+
+const char *
+ec_fop_name(int32_t id)
+{
+ if (id >= 0) {
+ return gf_fop_list[id];
+ }
+
+ return ec_fop_list[-id];
+}
+
+void
+ec_trace(const char *event, ec_fop_data_t *fop, const char *fmt, ...)
+{
+ char str1[32], str2[32], str3[32];
+ char *msg;
+ ec_t *ec = fop->xl->private;
+ va_list args;
+ int32_t ret;
+
+ va_start(args, fmt);
+ ret = vasprintf(&msg, fmt, args);
+ va_end(args);
+
+ if (ret < 0) {
+ msg = "<memory allocation error>";
+ }
+
+ gf_msg_trace("ec", 0,
+ "%s(%s) %p(%p) [refs=%d, winds=%d, jobs=%d] "
+ "frame=%p/%p, min/exp=%d/%d, err=%d state=%d "
+ "{%s:%s:%s} %s",
+ event, ec_fop_name(fop->id), fop, fop->parent, fop->refs,
+ fop->winds, fop->jobs, fop->req_frame, fop->frame,
+ fop->minimum, fop->expected, fop->error, fop->state,
+ ec_bin(str1, sizeof(str1), fop->mask, ec->nodes),
+ ec_bin(str2, sizeof(str2), fop->remaining, ec->nodes),
+ ec_bin(str3, sizeof(str3), fop->good, ec->nodes), msg);
+
+ if (ret >= 0) {
+ free(msg);
+ }
+}
+
+int32_t
+ec_bits_consume(uint64_t *n)
+{
+ uint64_t tmp;
+
+ tmp = *n;
+ tmp &= -tmp;
+ *n ^= tmp;
+
+ return gf_bits_index(tmp);
+}
+
+size_t
+ec_iov_copy_to(void *dst, struct iovec *vector, int32_t count, off_t offset,
+ size_t size)
+{
+ int32_t i = 0;
+ size_t total = 0, len = 0;
+
+ while (i < count) {
+ if (offset < vector[i].iov_len) {
+ while ((i < count) && (size > 0)) {
+ len = size;
+ if (len > vector[i].iov_len - offset) {
+ len = vector[i].iov_len - offset;
+ }
+ memcpy(dst, vector[i++].iov_base + offset, len);
+ offset = 0;
+ dst += len;
+ total += len;
+ size -= len;
+ }
+
+ break;
+ }
+
+ offset -= vector[i].iov_len;
+ i++;
+ }
+
+ return total;
+}
+
+int32_t
+ec_buffer_alloc(xlator_t *xl, size_t size, struct iobref **piobref, void **ptr)
+{
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ int32_t ret = -ENOMEM;
+
+ iobuf = iobuf_get_page_aligned(xl->ctx->iobuf_pool, size,
+ EC_METHOD_WORD_SIZE);
+ if (iobuf == NULL) {
+ goto out;
+ }
+
+ iobref = *piobref;
+ if (iobref == NULL) {
+ iobref = iobref_new();
+ if (iobref == NULL) {
+ goto out;
+ }
+ }
+
+ ret = iobref_add(iobref, iobuf);
+ if (ret != 0) {
+ if (iobref != *piobref) {
+ iobref_unref(iobref);
+ }
+ iobref = NULL;
+
+ goto out;
+ }
+
+ GF_ASSERT(EC_ALIGN_CHECK(iobuf->ptr, EC_METHOD_WORD_SIZE));
+
+ *ptr = iobuf->ptr;
+
+out:
+ if (iobuf != NULL) {
+ iobuf_unref(iobuf);
+ }
+
+ if (iobref != NULL) {
+ *piobref = iobref;
+ }
+
+ return ret;
+}
+
+int32_t
+ec_dict_set_array(dict_t *dict, char *key, uint64_t value[], int32_t size)
+{
+ int ret = -1;
+ uint64_t *ptr = NULL;
+ int32_t vindex;
+
+ if (value == NULL) {
+ return -EINVAL;
+ }
+
+ ptr = GF_MALLOC(sizeof(uint64_t) * size, gf_common_mt_char);
+ if (ptr == NULL) {
+ return -ENOMEM;
+ }
+ for (vindex = 0; vindex < size; vindex++) {
+ ptr[vindex] = hton64(value[vindex]);
+ }
+ ret = dict_set_bin(dict, key, ptr, sizeof(uint64_t) * size);
+ if (ret)
+ GF_FREE(ptr);
+ return ret;
+}
+
+int32_t
+ec_dict_get_array(dict_t *dict, char *key, uint64_t value[], int32_t size)
+{
+ void *ptr;
+ int32_t len;
+ int32_t vindex;
+ int32_t old_size = 0;
+ int32_t err;
+
+ if (dict == NULL) {
+ return -EINVAL;
+ }
+ err = dict_get_ptr_and_len(dict, key, &ptr, &len);
+ if (err != 0) {
+ return err;
+ }
+
+ if (len > (size * sizeof(uint64_t)) || (len % sizeof(uint64_t))) {
+ return -EINVAL;
+ }
+
+ /* 3.6 version ec would have stored version in 64 bit. In that case treat
+ * metadata versions same as data*/
+ old_size = min(size, len / sizeof(uint64_t));
+ for (vindex = 0; vindex < old_size; vindex++) {
+ value[vindex] = ntoh64(*((uint64_t *)ptr + vindex));
+ }
+
+ if (old_size < size) {
+ for (vindex = old_size; vindex < size; vindex++) {
+ value[vindex] = value[old_size - 1];
+ }
+ }
+
+ return 0;
+}
+
+int32_t
+ec_dict_del_array(dict_t *dict, char *key, uint64_t value[], int32_t size)
+{
+ int ret = 0;
+
+ ret = ec_dict_get_array(dict, key, value, size);
+ if (ret == 0)
+ dict_del(dict, key);
+
+ return ret;
+}
+
+int32_t
+ec_dict_set_number(dict_t *dict, char *key, uint64_t value)
+{
+ int ret = -1;
+ uint64_t *ptr;
+
+ ptr = GF_MALLOC(sizeof(value), gf_common_mt_char);
+ if (ptr == NULL) {
+ return -ENOMEM;
+ }
+
+ *ptr = hton64(value);
+
+ ret = dict_set_bin(dict, key, ptr, sizeof(value));
+ if (ret)
+ GF_FREE(ptr);
+
+ return ret;
+}
+
+int32_t
+ec_dict_del_number(dict_t *dict, char *key, uint64_t *value)
+{
+ void *ptr;
+ int32_t len, err;
+
+ if (dict == NULL) {
+ return -EINVAL;
+ }
+ err = dict_get_ptr_and_len(dict, key, &ptr, &len);
+ if (err != 0) {
+ return err;
+ }
+ if (len != sizeof(uint64_t)) {
+ return -EINVAL;
+ }
+
+ *value = ntoh64(*(uint64_t *)ptr);
+
+ dict_del(dict, key);
+
+ return 0;
+}
+
+int32_t
+ec_dict_set_config(dict_t *dict, char *key, ec_config_t *config)
+{
+ int ret = -1;
+ uint64_t *ptr, data;
+
+ if (config->version > EC_CONFIG_VERSION) {
+ gf_msg("ec", GF_LOG_ERROR, EINVAL, EC_MSG_UNSUPPORTED_VERSION,
+ "Trying to store an unsupported config "
+ "version (%u)",
+ config->version);
+
+ return -EINVAL;
+ }
+
+ ptr = GF_MALLOC(sizeof(uint64_t), gf_common_mt_char);
+ if (ptr == NULL) {
+ return -ENOMEM;
+ }
+
+ data = ((uint64_t)config->version) << 56;
+ data |= ((uint64_t)config->algorithm) << 48;
+ data |= ((uint64_t)config->gf_word_size) << 40;
+ data |= ((uint64_t)config->bricks) << 32;
+ data |= ((uint64_t)config->redundancy) << 24;
+ data |= config->chunk_size;
+
+ *ptr = hton64(data);
+
+ ret = dict_set_bin(dict, key, ptr, sizeof(uint64_t));
+ if (ret)
+ GF_FREE(ptr);
+
+ return ret;
+}
+
+int32_t
+ec_dict_del_config(dict_t *dict, char *key, ec_config_t *config)
+{
+ void *ptr;
+ uint64_t data;
+ int32_t len, err;
+
+ if (dict == NULL) {
+ return -EINVAL;
+ }
+ err = dict_get_ptr_and_len(dict, key, &ptr, &len);
+ if (err != 0) {
+ return err;
+ }
+ if (len != sizeof(uint64_t)) {
+ return -EINVAL;
+ }
+
+ data = ntoh64(*(uint64_t *)ptr);
+ /* Currently we need to get the config xattr for entries of type IA_INVAL.
+ * These entries can later become IA_DIR entries (after inode_link()),
+ * which don't have a config xattr. However, since the xattr is requested
+ * using an xattrop() fop, it will always return a config full of 0's
+ * instead of saying that it doesn't exist.
+ *
+ * We need to filter out this case and consider that a config xattr == 0 is
+ * the same as a non-existent xattr. Otherwise ec_config_check() will fail.
+ */
+ if (data == 0) {
+ return -ENODATA;
+ }
+
+ config->version = (data >> 56) & 0xff;
+ if (config->version > EC_CONFIG_VERSION) {
+ gf_msg("ec", GF_LOG_ERROR, EINVAL, EC_MSG_UNSUPPORTED_VERSION,
+ "Found an unsupported config version (%u)", config->version);
+
+ return -EINVAL;
+ }
+
+ config->algorithm = (data >> 48) & 0xff;
+ config->gf_word_size = (data >> 40) & 0xff;
+ config->bricks = (data >> 32) & 0xff;
+ config->redundancy = (data >> 24) & 0xff;
+ config->chunk_size = data & 0xffffff;
+
+ dict_del(dict, key);
+
+ return 0;
+}
+
+gf_boolean_t
+ec_loc_gfid_check(xlator_t *xl, uuid_t dst, uuid_t src)
+{
+ if (gf_uuid_is_null(src)) {
+ return _gf_true;
+ }
+
+ if (gf_uuid_is_null(dst)) {
+ gf_uuid_copy(dst, src);
+
+ return _gf_true;
+ }
+
+ if (gf_uuid_compare(dst, src) != 0) {
+ gf_msg(xl->name, GF_LOG_WARNING, 0, EC_MSG_GFID_MISMATCH,
+ "Mismatching GFID's in loc");
+
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+int32_t
+ec_loc_setup_inode(xlator_t *xl, inode_table_t *table, loc_t *loc)
+{
+ int32_t ret = -EINVAL;
+
+ if (loc->inode != NULL) {
+ if (!ec_loc_gfid_check(xl, loc->gfid, loc->inode->gfid)) {
+ goto out;
+ }
+ } else if (table != NULL) {
+ if (!gf_uuid_is_null(loc->gfid)) {
+ loc->inode = inode_find(table, loc->gfid);
+ } else if (loc->path && strchr(loc->path, '/')) {
+ loc->inode = inode_resolve(table, (char *)loc->path);
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+ec_loc_setup_parent(xlator_t *xl, inode_table_t *table, loc_t *loc)
+{
+ char *path, *parent;
+ int32_t ret = -EINVAL;
+
+ if (loc->parent != NULL) {
+ if (!ec_loc_gfid_check(xl, loc->pargfid, loc->parent->gfid)) {
+ goto out;
+ }
+ } else if (table != NULL) {
+ if (!gf_uuid_is_null(loc->pargfid)) {
+ loc->parent = inode_find(table, loc->pargfid);
+ } else if (loc->path && strchr(loc->path, '/')) {
+ path = gf_strdup(loc->path);
+ if (path == NULL) {
+ gf_msg(xl->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Unable to duplicate path '%s'", loc->path);
+
+ ret = -ENOMEM;
+
+ goto out;
+ }
+ parent = dirname(path);
+ loc->parent = inode_resolve(table, parent);
+ if (loc->parent != NULL) {
+ gf_uuid_copy(loc->pargfid, loc->parent->gfid);
+ }
+ GF_FREE(path);
+ }
+ }
+
+ /* If 'pargfid' has not been determined, clear 'name' to avoid resolutions
+ based on <gfid:pargfid>/name. */
+ if (gf_uuid_is_null(loc->pargfid)) {
+ loc->name = NULL;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+ec_loc_setup_path(xlator_t *xl, loc_t *loc)
+{
+ static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ char *name;
+ int32_t ret = -EINVAL;
+
+ if (loc->path != NULL) {
+ name = strrchr(loc->path, '/');
+ if (name == NULL) {
+ /* Allow gfid paths: <gfid:...> */
+ if (strncmp(loc->path, "<gfid:", 6) == 0) {
+ ret = 0;
+ }
+ goto out;
+ }
+ if (name == loc->path) {
+ if (name[1] == 0) {
+ if (!ec_loc_gfid_check(xl, loc->gfid, root)) {
+ goto out;
+ }
+ } else {
+ if (!ec_loc_gfid_check(xl, loc->pargfid, root)) {
+ goto out;
+ }
+ }
+ }
+ name++;
+
+ if (loc->name != NULL) {
+ if (strcmp(loc->name, name) != 0) {
+ gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_LOC_NAME,
+ "Invalid name '%s' in loc", loc->name);
+
+ goto out;
+ }
+ } else {
+ loc->name = name;
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent)
+{
+ inode_table_t *table = NULL;
+ char *str = NULL;
+ int32_t ret = -ENOMEM;
+
+ memset(parent, 0, sizeof(loc_t));
+
+ if (loc->parent != NULL) {
+ table = loc->parent->table;
+ parent->inode = inode_ref(loc->parent);
+ } else if (loc->inode != NULL) {
+ table = loc->inode->table;
+ }
+ if (!gf_uuid_is_null(loc->pargfid)) {
+ gf_uuid_copy(parent->gfid, loc->pargfid);
+ }
+ if (loc->path && strchr(loc->path, '/')) {
+ str = gf_strdup(loc->path);
+ if (str == NULL) {
+ gf_msg(xl->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Unable to duplicate path '%s'", loc->path);
+
+ goto out;
+ }
+ parent->path = gf_strdup(dirname(str));
+ if (parent->path == NULL) {
+ gf_msg(xl->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Unable to duplicate path '%s'", dirname(str));
+
+ goto out;
+ }
+ }
+
+ ret = ec_loc_setup_path(xl, parent);
+ if (ret == 0) {
+ ret = ec_loc_setup_inode(xl, table, parent);
+ }
+ if (ret == 0) {
+ ret = ec_loc_setup_parent(xl, table, parent);
+ }
+ if (ret != 0) {
+ goto out;
+ }
+
+ if ((parent->inode == NULL) && (parent->path == NULL) &&
+ gf_uuid_is_null(parent->gfid)) {
+ gf_msg(xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_LOC_PARENT_INODE_MISSING,
+ "Parent inode missing for loc_t");
+
+ ret = -EINVAL;
+
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ GF_FREE(str);
+
+ if (ret != 0) {
+ loc_wipe(parent);
+ }
+
+ return ret;
+}
+
+int32_t
+ec_loc_update(xlator_t *xl, loc_t *loc, inode_t *inode, struct iatt *iatt)
+{
+ inode_table_t *table = NULL;
+ int32_t ret = -EINVAL;
+
+ if (inode != NULL) {
+ table = inode->table;
+ if (loc->inode != inode) {
+ if (loc->inode != NULL) {
+ inode_unref(loc->inode);
+ }
+ loc->inode = inode_ref(inode);
+ gf_uuid_copy(loc->gfid, inode->gfid);
+ }
+ } else if (loc->inode != NULL) {
+ table = loc->inode->table;
+ } else if (loc->parent != NULL) {
+ table = loc->parent->table;
+ }
+
+ if (iatt != NULL) {
+ if (!ec_loc_gfid_check(xl, loc->gfid, iatt->ia_gfid)) {
+ goto out;
+ }
+ }
+
+ ret = ec_loc_setup_path(xl, loc);
+ if (ret == 0) {
+ ret = ec_loc_setup_inode(xl, table, loc);
+ }
+ if (ret == 0) {
+ ret = ec_loc_setup_parent(xl, table, loc);
+ }
+ if (ret != 0) {
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+ec_loc_from_fd(xlator_t *xl, loc_t *loc, fd_t *fd)
+{
+ ec_fd_t *ctx;
+ int32_t ret = -ENOMEM;
+
+ memset(loc, 0, sizeof(*loc));
+
+ ctx = ec_fd_get(fd, xl);
+ if (ctx != NULL) {
+ if (loc_copy(loc, &ctx->loc) != 0) {
+ goto out;
+ }
+ }
+
+ ret = ec_loc_update(xl, loc, fd->inode, NULL);
+ if (ret != 0) {
+ goto out;
+ }
+
+out:
+ if (ret != 0) {
+ loc_wipe(loc);
+ }
+
+ return ret;
+}
+
+int32_t
+ec_loc_from_loc(xlator_t *xl, loc_t *dst, loc_t *src)
+{
+ int32_t ret = -ENOMEM;
+
+ memset(dst, 0, sizeof(*dst));
+
+ if (loc_copy(dst, src) != 0) {
+ goto out;
+ }
+
+ ret = ec_loc_update(xl, dst, NULL, NULL);
+ if (ret != 0) {
+ goto out;
+ }
+
+out:
+ if (ret != 0) {
+ loc_wipe(dst);
+ }
+
+ return ret;
+}
+
+void
+ec_owner_set(call_frame_t *frame, void *owner)
+{
+ set_lk_owner_from_ptr(&frame->root->lk_owner, owner);
+}
+
+void
+ec_owner_copy(call_frame_t *frame, gf_lkowner_t *owner)
+{
+ lk_owner_copy(&frame->root->lk_owner, owner);
+}
+
+static void
+ec_stripe_cache_init(ec_t *ec, ec_inode_t *ctx)
+{
+ ec_stripe_list_t *stripe_cache = NULL;
+
+ stripe_cache = &(ctx->stripe_cache);
+ if (stripe_cache->max == 0) {
+ stripe_cache->max = ec->stripe_cache;
+ }
+}
+
+ec_inode_t *
+__ec_inode_get(inode_t *inode, xlator_t *xl)
+{
+ ec_inode_t *ctx = NULL;
+ uint64_t value = 0;
+
+ if ((__inode_ctx_get(inode, xl, &value) != 0) || (value == 0)) {
+ ctx = GF_MALLOC(sizeof(*ctx), ec_mt_ec_inode_t);
+ if (ctx != NULL) {
+ memset(ctx, 0, sizeof(*ctx));
+ INIT_LIST_HEAD(&ctx->heal);
+ INIT_LIST_HEAD(&ctx->stripe_cache.lru);
+ ctx->heal_count = 0;
+ value = (uint64_t)(uintptr_t)ctx;
+ if (__inode_ctx_set(inode, xl, &value) != 0) {
+ GF_FREE(ctx);
+
+ return NULL;
+ }
+ }
+ } else {
+ ctx = (ec_inode_t *)(uintptr_t)value;
+ }
+ if (ctx)
+ ec_stripe_cache_init(xl->private, ctx);
+
+ return ctx;
+}
+
+ec_inode_t *
+ec_inode_get(inode_t *inode, xlator_t *xl)
+{
+ ec_inode_t *ctx = NULL;
+
+ LOCK(&inode->lock);
+
+ ctx = __ec_inode_get(inode, xl);
+
+ UNLOCK(&inode->lock);
+
+ return ctx;
+}
+
+ec_fd_t *
+__ec_fd_get(fd_t *fd, xlator_t *xl)
+{
+ int i = 0;
+ ec_fd_t *ctx = NULL;
+ ec_inode_t *ictx = NULL;
+ uint64_t value = 0;
+ ec_t *ec = xl->private;
+
+ if ((__fd_ctx_get(fd, xl, &value) != 0) || (value == 0)) {
+ ctx = GF_MALLOC(sizeof(*ctx) + (sizeof(ec_fd_status_t) * ec->nodes),
+ ec_mt_ec_fd_t);
+ if (ctx != NULL) {
+ memset(ctx, 0, sizeof(*ctx));
+
+ for (i = 0; i < ec->nodes; i++) {
+ if (fd_is_anonymous(fd)) {
+ ctx->fd_status[i] = EC_FD_OPENED;
+ } else {
+ ctx->fd_status[i] = EC_FD_NOT_OPENED;
+ }
+ }
+
+ value = (uint64_t)(uintptr_t)ctx;
+ if (__fd_ctx_set(fd, xl, value) != 0) {
+ GF_FREE(ctx);
+ return NULL;
+ }
+ /* Only refering bad-version so no need for lock
+ * */
+ ictx = __ec_inode_get(fd->inode, xl);
+ if (ictx) {
+ ctx->bad_version = ictx->bad_version;
+ }
+ }
+ } else {
+ ctx = (ec_fd_t *)(uintptr_t)value;
+ }
+
+ /* Treat anonymous fd specially */
+ if (fd->anonymous && ctx) {
+ /* Mark the fd open for all subvolumes. */
+ ctx->open = -1;
+ /* Try to populate ctx->loc with fd->inode information. */
+ ec_loc_update(xl, &ctx->loc, fd->inode, NULL);
+ }
+
+ return ctx;
+}
+
+ec_fd_t *
+ec_fd_get(fd_t *fd, xlator_t *xl)
+{
+ ec_fd_t *ctx = NULL;
+
+ LOCK(&fd->lock);
+
+ ctx = __ec_fd_get(fd, xl);
+
+ UNLOCK(&fd->lock);
+
+ return ctx;
+}
+
+gf_boolean_t
+ec_is_internal_xattr(dict_t *dict, char *key, data_t *value, void *data)
+{
+ if (key && (strncmp(key, EC_XATTR_PREFIX, SLEN(EC_XATTR_PREFIX)) == 0))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+void
+ec_filter_internal_xattrs(dict_t *xattr)
+{
+ dict_foreach_match(xattr, ec_is_internal_xattr, NULL,
+ dict_remove_foreach_fn, NULL);
+}
+
+gf_boolean_t
+ec_is_data_fop(glusterfs_fop_t fop)
+{
+ switch (fop) {
+ case GF_FOP_WRITE:
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ case GF_FOP_FALLOCATE:
+ case GF_FOP_DISCARD:
+ case GF_FOP_ZEROFILL:
+ return _gf_true;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
+}
+/*
+gf_boolean_t
+ec_is_metadata_fop (int32_t lock_kind, glusterfs_fop_t fop)
+{
+ if (lock_kind == EC_LOCK_ENTRY) {
+ return _gf_false;
+ }
+
+ switch (fop) {
+ case GF_FOP_SETATTR:
+ case GF_FOP_FSETATTR:
+ case GF_FOP_SETXATTR:
+ case GF_FOP_FSETXATTR:
+ case GF_FOP_REMOVEXATTR:
+ case GF_FOP_FREMOVEXATTR:
+ return _gf_true;
+ default:
+ return _gf_false;
+ }
+ return _gf_false;
+}*/
diff --git a/xlators/cluster/ec/src/ec-helpers.h b/xlators/cluster/ec/src/ec-helpers.h
new file mode 100644
index 00000000000..015db514e05
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-helpers.h
@@ -0,0 +1,200 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_HELPERS_H__
+#define __EC_HELPERS_H__
+
+#include "ec-types.h"
+
+#define EC_ERR(_x) ((void *)-(intptr_t)(_x))
+#define EC_IS_ERR(_x) (((uintptr_t)(_x) & ~0xfffULL) == ~0xfffULL)
+#define EC_GET_ERR(_x) ((int32_t)(intptr_t)(_x))
+
+#define EC_ALIGN_CHECK(_ptr, _align) ((((uintptr_t)(_ptr)) & ((_align)-1)) == 0)
+
+const char *
+ec_bin(char *str, size_t size, uint64_t value, int32_t digits);
+const char *
+ec_fop_name(int32_t id);
+void
+ec_trace(const char *event, ec_fop_data_t *fop, const char *fmt, ...);
+int32_t
+ec_bits_consume(uint64_t *n);
+size_t
+ec_iov_copy_to(void *dst, struct iovec *vector, int32_t count, off_t offset,
+ size_t size);
+int32_t
+ec_buffer_alloc(xlator_t *xl, size_t size, struct iobref **piobref, void **ptr);
+int32_t
+ec_dict_set_array(dict_t *dict, char *key, uint64_t *value, int32_t size);
+int32_t
+ec_dict_get_array(dict_t *dict, char *key, uint64_t value[], int32_t size);
+
+int32_t
+ec_dict_del_array(dict_t *dict, char *key, uint64_t *value, int32_t size);
+int32_t
+ec_dict_set_number(dict_t *dict, char *key, uint64_t value);
+int32_t
+ec_dict_del_number(dict_t *dict, char *key, uint64_t *value);
+int32_t
+ec_dict_set_config(dict_t *dict, char *key, ec_config_t *config);
+int32_t
+ec_dict_del_config(dict_t *dict, char *key, ec_config_t *config);
+
+int32_t
+ec_loc_parent(xlator_t *xl, loc_t *loc, loc_t *parent);
+int32_t
+ec_loc_update(xlator_t *xl, loc_t *loc, inode_t *inode, struct iatt *iatt);
+
+int32_t
+ec_loc_from_fd(xlator_t *xl, loc_t *loc, fd_t *fd);
+int32_t
+ec_loc_from_loc(xlator_t *xl, loc_t *dst, loc_t *src);
+
+void
+ec_owner_set(call_frame_t *frame, void *owner);
+void
+ec_owner_copy(call_frame_t *frame, gf_lkowner_t *owner);
+
+ec_inode_t *
+__ec_inode_get(inode_t *inode, xlator_t *xl);
+ec_inode_t *
+ec_inode_get(inode_t *inode, xlator_t *xl);
+ec_fd_t *
+__ec_fd_get(fd_t *fd, xlator_t *xl);
+ec_fd_t *
+ec_fd_get(fd_t *fd, xlator_t *xl);
+
+static inline uint32_t
+ec_adjust_size_down(ec_t *ec, uint64_t *value, gf_boolean_t scale)
+{
+ uint64_t head, tmp;
+
+ tmp = *value;
+ head = tmp % ec->stripe_size;
+ tmp -= head;
+
+ if (scale) {
+ tmp /= ec->fragments;
+ }
+
+ *value = tmp;
+
+ return (uint32_t)head;
+}
+
+/* This function can cause an overflow if the passed value is too near to the
+ * uint64_t limit. If this happens, it returns the tail in negative form and
+ * the value is set to UINT64_MAX. */
+static inline int32_t
+ec_adjust_size_up(ec_t *ec, uint64_t *value, gf_boolean_t scale)
+{
+ uint64_t tmp;
+ int32_t tail;
+
+ tmp = *value;
+ /* We first adjust the value down. This never causes overflow. */
+ tail = ec_adjust_size_down(ec, &tmp, scale);
+
+ /* If the value was already aligned, tail will be 0 and nothing else
+ * needs to be done. */
+ if (tail != 0) {
+ /* Otherwise, we need to compute the real tail and adjust the
+ * returned value to the next stripe. */
+ tail = ec->stripe_size - tail;
+ if (scale) {
+ tmp += ec->fragment_size;
+ } else {
+ tmp += ec->stripe_size;
+ /* If no scaling is requested there's a possibility of
+ * overflow. */
+ if (tmp < ec->stripe_size) {
+ tmp = UINT64_MAX;
+ tail = -tail;
+ }
+ }
+ }
+
+ *value = tmp;
+
+ return tail;
+}
+
+/* This function is equivalent to ec_adjust_size_down() but with a potentially
+ * different parameter size (off_t vs uint64_t). */
+static inline uint32_t
+ec_adjust_offset_down(ec_t *ec, off_t *value, gf_boolean_t scale)
+{
+ off_t head, tmp;
+
+ tmp = *value;
+ head = tmp % ec->stripe_size;
+ tmp -= head;
+
+ if (scale) {
+ tmp /= ec->fragments;
+ }
+
+ *value = tmp;
+
+ return (uint32_t)head;
+}
+
+/* This function is equivalent to ec_adjust_size_up() but with a potentially
+ * different parameter size (off_t vs uint64_t). */
+static inline int32_t
+ec_adjust_offset_up(ec_t *ec, off_t *value, gf_boolean_t scale)
+{
+ uint64_t tail, tmp;
+
+ /* An offset is a signed type that can only have positive values, so
+ * we take advantage of this to avoid overflows. We simply convert it
+ * to an unsigned integer and operate normally. This won't cause an
+ * overflow. Overflow is only checked when converting back to an
+ * off_t. */
+ tmp = *value;
+ tail = ec->stripe_size;
+ tail -= (tmp + tail - 1) % tail + 1;
+ tmp += tail;
+ if (scale) {
+ /* If we are scaling, we'll never get an overflow. */
+ tmp /= ec->fragments;
+ } else {
+ /* Check if there has been an overflow. */
+ if ((off_t)tmp < 0) {
+ tmp = GF_OFF_MAX;
+ tail = -tail;
+ }
+ }
+
+ *value = (off_t)tmp;
+
+ return (int32_t)tail;
+}
+
+static inline int32_t
+ec_is_power_of_2(uint32_t value)
+{
+ return (value != 0) && ((value & (value - 1)) == 0);
+}
+
+gf_boolean_t
+ec_is_internal_xattr(dict_t *dict, char *key, data_t *value, void *data);
+
+void
+ec_filter_internal_xattrs(dict_t *xattr);
+
+gf_boolean_t
+ec_is_data_fop(glusterfs_fop_t fop);
+
+int32_t
+ec_launch_replace_heal(ec_t *ec);
+
+#endif /* __EC_HELPERS_H__ */
diff --git a/xlators/cluster/ec/src/ec-inode-read.c b/xlators/cluster/ec/src/ec-inode-read.c
new file mode 100644
index 00000000000..dad5f4d7018
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-inode-read.c
@@ -0,0 +1,2046 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "ec.h"
+#include "ec-messages.h"
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-combine.h"
+#include "ec-method.h"
+#include "ec-fops.h"
+
+/* FOP: access */
+
+int32_t
+ec_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_ACCESS, idx, op_ret,
+ op_errno);
+ if (cbk) {
+ if (xdata)
+ cbk->xdata = dict_ref(xdata);
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_access(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_access_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->access,
+ &fop->loc[0], fop->int32, fop->xdata);
+}
+
+int32_t
+ec_manager_access(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk = NULL;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_one(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ if (ec_dispatch_one_retry(fop, NULL)) {
+ return EC_STATE_DISPATCH;
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+ GF_ASSERT(cbk);
+ if (fop->cbks.access != NULL) {
+ if (cbk) {
+ fop->cbks.access(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->xdata);
+ }
+ }
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ if (fop->cbks.access != NULL) {
+ fop->cbks.access(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL);
+ }
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_access(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_access_cbk_t func, void *data, loc_t *loc,
+ int32_t mask, dict_t *xdata)
+{
+ ec_cbk_t callback = {.access = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(ACCESS) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_ACCESS, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_access,
+ ec_manager_access, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = mask;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: getxattr */
+
+int32_t
+ec_combine_getxattr(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
+{
+ if (!ec_dict_compare(dst->dict, src->dict)) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_DICT_MISMATCH,
+ "Mismatching dictionary in "
+ "answers of 'GF_FOP_GETXATTR'");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t
+ec_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_GETXATTR, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (dict != NULL) {
+ cbk->dict = dict_ref(dict);
+ if (cbk->dict == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+ }
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_getxattr);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_getxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_getxattr_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->getxattr,
+ &fop->loc[0], fop->str[0], fop->xdata);
+}
+
+void
+ec_handle_special_xattrs(ec_fop_data_t *fop)
+{
+ ec_cbk_data_t *cbk = NULL;
+ /* Stime may not be available on all the bricks, so even if some of the
+ * subvols succeed the operation, treat it as answer.*/
+ if (fop->str[0] && fnmatch(GF_XATTR_STIME_PATTERN, fop->str[0], 0) == 0) {
+ if (!fop->answer || (fop->answer->op_ret < 0)) {
+ list_for_each_entry(cbk, &fop->cbk_list, list)
+ {
+ if (cbk->op_ret >= 0) {
+ fop->answer = cbk;
+ break;
+ }
+ }
+ }
+ }
+}
+
+int32_t
+ec_manager_getxattr(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ /* clear-locks commands must be done without any locks acquired
+ to avoid interferences. */
+ if ((fop->str[0] == NULL) ||
+ (strncmp(fop->str[0], GF_XATTR_CLRLK_CMD,
+ SLEN(GF_XATTR_CLRLK_CMD)) != 0)) {
+ if (fop->fd == NULL) {
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
+ }
+ ec_lock(fop);
+ }
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ if (fop->minimum == EC_MINIMUM_ALL) {
+ ec_dispatch_all(fop);
+ } else {
+ ec_dispatch_one(fop);
+ }
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ ec_handle_special_xattrs(fop);
+ if (fop->minimum == EC_MINIMUM_ALL) {
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ } else {
+ if (ec_dispatch_one_retry(fop, &cbk)) {
+ return EC_STATE_DISPATCH;
+ }
+ }
+ if (cbk != NULL) {
+ int32_t err;
+
+ err = ec_dict_combine(cbk, EC_COMBINE_DICT);
+ if (!ec_cbk_set_error(cbk, -err, _gf_true)) {
+ if (cbk->xdata != NULL)
+ ec_filter_internal_xattrs(cbk->xdata);
+
+ if (cbk->dict != NULL)
+ ec_filter_internal_xattrs(cbk->dict);
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.getxattr != NULL) {
+ fop->cbks.getxattr(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->dict, cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.getxattr != NULL) {
+ fop->cbks.getxattr(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+int32_t
+ec_getxattr_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *xl,
+ int32_t op_ret, int32_t op_errno, uintptr_t mask,
+ uintptr_t good, uintptr_t bad, uint32_t pending,
+ dict_t *xdata)
+{
+ fop_getxattr_cbk_t func = cookie;
+ ec_t *ec = xl->private;
+ dict_t *dict = NULL;
+ char *str;
+ char bin1[65], bin2[65];
+
+ /* We try to return the 'pending' information in xdata, but if this cannot
+ * be set, we will ignore it silently. We prefer to report the success or
+ * failure of the heal itself. */
+ if (xdata == NULL) {
+ xdata = dict_new();
+ } else {
+ dict_ref(xdata);
+ }
+ if (xdata != NULL) {
+ if (dict_set_uint32(xdata, EC_XATTR_HEAL_NEW, pending) != 0) {
+ /* dict_set_uint32() is marked as 'warn_unused_result' and gcc
+ * enforces to check the result in this case. However we don't
+ * really care if it succeeded or not. We'll just do the same.
+ *
+ * This empty 'if' avoids the warning, and it will be removed by
+ * the optimizer. */
+ }
+ }
+
+ if (op_ret >= 0) {
+ dict = dict_new();
+ if (dict == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ } else {
+ if (gf_asprintf(&str, "Good: %s, Bad: %s",
+ ec_bin(bin1, sizeof(bin1), good, ec->nodes),
+ ec_bin(bin2, sizeof(bin2), mask & ~(good | bad),
+ ec->nodes)) < 0) {
+ dict_unref(dict);
+ dict = NULL;
+
+ op_ret = -1;
+ op_errno = ENOMEM;
+
+ goto out;
+ }
+
+ if (dict_set_dynstr(dict, EC_XATTR_HEAL, str) != 0) {
+ GF_FREE(str);
+ dict_unref(dict);
+ dict = NULL;
+
+ op_ret = -1;
+ op_errno = ENOMEM;
+
+ goto out;
+ }
+ }
+ }
+
+out:
+ func(frame, NULL, xl, op_ret, op_errno, dict, xdata);
+
+ if (dict != NULL) {
+ dict_unref(dict);
+ }
+ if (xdata != NULL) {
+ dict_unref(xdata);
+ }
+
+ return 0;
+}
+
+void
+ec_getxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_getxattr_cbk_t func, void *data, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ ec_cbk_t callback = {.getxattr = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(GETXATTR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ /* Special handling of an explicit self-heal request */
+ if ((name != NULL) && (strcmp(name, EC_XATTR_HEAL) == 0)) {
+ ec_heal(frame, this, target, EC_MINIMUM_ONE, ec_getxattr_heal_cbk, func,
+ loc, 0, NULL);
+
+ return;
+ }
+
+ fop = ec_fop_data_allocate(
+ frame, this, GF_FOP_GETXATTR, EC_FLAG_LOCK_SHARED, target, fop_flags,
+ ec_wind_getxattr, ec_manager_getxattr, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (name != NULL) {
+ /* In case of list-node-uuids xattr, set flag to indicate
+ * the same and use node-uuid xattr for winding fop */
+ if (XATTR_IS_NODE_UUID_LIST(name)) {
+ fop->int32 = 1;
+ fop->str[0] = gf_strdup(GF_XATTR_NODE_UUID_KEY);
+ } else {
+ fop->str[0] = gf_strdup(name);
+ }
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
+
+/* FOP: fgetxattr */
+
+int32_t
+ec_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FGETXATTR, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (dict != NULL) {
+ cbk->dict = dict_ref(dict);
+ if (cbk->dict == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+ }
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_getxattr);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_fgetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fgetxattr_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fgetxattr,
+ fop->fd, fop->str[0], fop->xdata);
+}
+
+void
+ec_fgetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fgetxattr_cbk_t func, void *data, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ ec_cbk_t callback = {.fgetxattr = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(FGETXATTR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(
+ frame, this, GF_FOP_FGETXATTR, EC_FLAG_LOCK_SHARED, target, fop_flags,
+ ec_wind_fgetxattr, ec_manager_getxattr, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (name != NULL) {
+ fop->str[0] = gf_strdup(name);
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
+
+/* FOP: open */
+
+int32_t
+ec_combine_open(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
+{
+ if (dst->fd != src->fd) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_FD_MISMATCH,
+ "Mismatching fd in answers "
+ "of 'GF_FOP_OPEN': %p <-> %p",
+ dst->fd, src->fd);
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t
+ec_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_OPEN, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (fd != NULL) {
+ cbk->fd = fd_ref(fd);
+ if (cbk->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ }
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_open);
+
+ ec_update_fd_status(fd, this, idx, op_ret);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_open(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_open_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->open,
+ &fop->loc[0], fop->int32, fop->fd, fop->xdata);
+}
+
+int32_t
+ec_open_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ int32_t error = 0;
+
+ fop = fop->data;
+ if (op_ret >= 0) {
+ fop->answer->iatt[0] = *postbuf;
+ } else {
+ error = op_errno;
+ }
+
+ ec_resume(fop, error);
+
+ return 0;
+}
+
+int32_t
+ec_manager_open(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+ ec_fd_t *ctx;
+ int32_t err;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ LOCK(&fop->fd->lock);
+
+ ctx = __ec_fd_get(fop->fd, fop->xl);
+ if (ctx == NULL) {
+ UNLOCK(&fop->fd->lock);
+
+ fop->error = ENOMEM;
+
+ return EC_STATE_REPORT;
+ }
+ if (!ctx->loc.inode) {
+ err = ec_loc_from_loc(fop->xl, &ctx->loc, &fop->loc[0]);
+ if (err != 0) {
+ UNLOCK(&fop->fd->lock);
+
+ fop->error = -err;
+
+ return EC_STATE_REPORT;
+ }
+ }
+
+ ctx->flags = fop->int32;
+
+ UNLOCK(&fop->fd->lock);
+
+ /* We need to write to specific offsets on the bricks, so we
+ need to remove O_APPEND from flags (if present).
+ If O_TRUNC is specified, we remove it from open and an
+ ftruncate will be executed later, which will correctly update
+ the file size taking appropriate locks. O_TRUNC flag is saved
+ into fop->uint32 to use it later.*/
+ fop->uint32 = fop->int32 & O_TRUNC;
+ fop->int32 &= ~(O_APPEND | O_TRUNC);
+
+ /* Fall through */
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ int32_t err;
+
+ err = ec_loc_update(fop->xl, &fop->loc[0], cbk->fd->inode,
+ NULL);
+ if (!ec_cbk_set_error(cbk, -err, _gf_true)) {
+ LOCK(&fop->fd->lock);
+
+ ctx = __ec_fd_get(fop->fd, fop->xl);
+ if (ctx != NULL) {
+ ctx->open |= cbk->mask;
+ }
+
+ UNLOCK(&fop->fd->lock);
+
+ /* If O_TRUNC was specified, call ftruncate to
+ effectively trunc the file with appropriate locks
+ acquired. We don't use ctx->flags because self-heal
+ can use the same fd with different flags. */
+ if (fop->uint32 != 0) {
+ ec_sleep(fop);
+ ec_ftruncate(fop->req_frame, fop->xl, cbk->mask,
+ fop->minimum, ec_open_truncate_cbk, fop,
+ cbk->fd, 0, NULL);
+ }
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.open != NULL) {
+ fop->cbks.open(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->fd, cbk->xdata);
+ }
+
+ return EC_STATE_END;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.open != NULL) {
+ fop->cbks.open(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL);
+ }
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_open(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_open_cbk_t func, void *data, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ ec_cbk_t callback = {.open = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(OPEN) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_OPEN, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_open, ec_manager_open,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = flags;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
+
+/* FOP: readlink */
+
+int32_t
+ec_combine_readlink(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
+{
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_IATT_MISMATCH,
+ "Mismatching iatt in "
+ "answers of 'GF_FOP_READLINK'");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t
+ec_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, fop->id, idx, op_ret,
+ op_errno);
+ if (cbk) {
+ if (xdata)
+ cbk->xdata = dict_ref(xdata);
+
+ if (cbk->op_ret >= 0) {
+ cbk->iatt[0] = *buf;
+ cbk->str = gf_strdup(path);
+ if (!cbk->str) {
+ ec_cbk_set_error(cbk, ENOMEM, _gf_true);
+ }
+ }
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL)
+ ec_complete(fop);
+
+ return 0;
+}
+
+void
+ec_wind_readlink(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_readlink_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->readlink,
+ &fop->loc[0], fop->size, fop->xdata);
+}
+
+int32_t
+ec_manager_readlink(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk = NULL;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
+ ec_lock(fop);
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_one(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ if (ec_dispatch_one_retry(fop, &cbk)) {
+ return EC_STATE_DISPATCH;
+ }
+
+ if ((cbk != NULL) && (cbk->op_ret >= 0)) {
+ ec_iatt_rebuild(fop->xl->private, &cbk->iatt[0], 1, 1);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+ GF_ASSERT(cbk);
+ if (fop->cbks.readlink != NULL) {
+ fop->cbks.readlink(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->str, &cbk->iatt[0],
+ cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ if (fop->cbks.readlink != NULL) {
+ fop->cbks.readlink(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL);
+ }
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_readlink(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readlink_cbk_t func, void *data, loc_t *loc,
+ size_t size, dict_t *xdata)
+{
+ ec_cbk_t callback = {.readlink = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(READLINK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(
+ frame, this, GF_FOP_READLINK, EC_FLAG_LOCK_SHARED, target, fop_flags,
+ ec_wind_readlink, ec_manager_readlink, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->size = size;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: readv */
+
+int32_t
+ec_readv_rebuild(ec_t *ec, ec_fop_data_t *fop, ec_cbk_data_t *cbk)
+{
+ struct iovec vector[1];
+ ec_cbk_data_t *ans = NULL;
+ struct iobref *iobref = NULL;
+ void *ptr;
+ uint64_t fsize = 0, size = 0, max = 0;
+ int32_t pos, err = -ENOMEM;
+
+ if (cbk->op_ret < 0) {
+ err = -cbk->op_errno;
+
+ goto out;
+ }
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->fd->inode, &cbk->iatt[0].ia_size));
+
+ if (cbk->op_ret > 0) {
+ void *blocks[cbk->count];
+ uint32_t values[cbk->count];
+
+ fsize = cbk->op_ret;
+ size = fsize * ec->fragments;
+ for (ans = cbk; ans != NULL; ans = ans->next) {
+ pos = gf_bits_count(cbk->mask & ((1 << ans->idx) - 1));
+ values[pos] = ans->idx + 1;
+ blocks[pos] = ans->vector[0].iov_base;
+ if ((ans->int32 != 1) ||
+ !EC_ALIGN_CHECK(blocks[pos], EC_METHOD_WORD_SIZE)) {
+ if (iobref == NULL) {
+ err = ec_buffer_alloc(ec->xl, size, &iobref, &ptr);
+ if (err != 0) {
+ goto out;
+ }
+ }
+ ec_iov_copy_to(ptr, ans->vector, ans->int32, 0, fsize);
+ blocks[pos] = ptr;
+ ptr += fsize;
+ }
+ }
+
+ err = ec_buffer_alloc(ec->xl, size, &iobref, &ptr);
+ if (err != 0) {
+ goto out;
+ }
+
+ err = ec_method_decode(&ec->matrix, fsize, cbk->mask, values, blocks,
+ ptr);
+ if (err != 0) {
+ goto out;
+ }
+
+ vector[0].iov_base = ptr + fop->head;
+ vector[0].iov_len = size - fop->head;
+
+ max = fop->offset * ec->fragments + size;
+ if (max > cbk->iatt[0].ia_size) {
+ max = cbk->iatt[0].ia_size;
+ }
+ max -= fop->offset * ec->fragments + fop->head;
+ if (max > fop->user_size) {
+ max = fop->user_size;
+ }
+ size -= fop->head;
+ if (size > max) {
+ vector[0].iov_len -= size - max;
+ size = max;
+ }
+
+ cbk->op_ret = size;
+ cbk->int32 = 1;
+
+ iobref_unref(cbk->buffers);
+ cbk->buffers = iobref;
+
+ GF_FREE(cbk->vector);
+ cbk->vector = iov_dup(vector, 1);
+ if (cbk->vector == NULL) {
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+
+out:
+ if (iobref != NULL) {
+ iobref_unref(iobref);
+ }
+
+ return err;
+}
+
+int32_t
+ec_combine_readv(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
+{
+ if (!ec_vector_compare(dst->vector, dst->int32, src->vector, src->int32)) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_VECTOR_MISMATCH,
+ "Mismatching vector in "
+ "answers of 'GF_FOP_READ'");
+
+ return 0;
+ }
+
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_IATT_MISMATCH,
+ "Mismatching iatt in "
+ "answers of 'GF_FOP_READ'");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t
+ec_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ ec_t *ec = this->private;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_READ, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ cbk->int32 = count;
+
+ if (count > 0) {
+ cbk->vector = iov_dup(vector, count);
+ if (cbk->vector == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a "
+ "vector list.");
+
+ goto out;
+ }
+ cbk->int32 = count;
+ }
+ if (stbuf != NULL) {
+ cbk->iatt[0] = *stbuf;
+ }
+ if (iobref != NULL) {
+ cbk->buffers = iobref_ref(iobref);
+ if (cbk->buffers == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_BUF_REF_FAIL,
+ "Failed to reference a "
+ "buffer.");
+
+ goto out;
+ }
+ }
+ }
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ if ((op_ret > 0) && ((op_ret % ec->fragment_size) != 0)) {
+ ec_cbk_set_error(cbk, EIO, _gf_true);
+ }
+
+ ec_combine(cbk, ec_combine_readv);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_readv(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_readv_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->readv, fop->fd,
+ fop->size, fop->offset, fop->uint32, fop->xdata);
+}
+
+int32_t
+ec_manager_readv(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+ ec_t *ec = fop->xl->private;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ fop->user_size = fop->size;
+ fop->head = ec_adjust_offset_down(fop->xl->private, &fop->offset,
+ _gf_true);
+ fop->size += fop->head;
+ ec_adjust_size_up(fop->xl->private, &fop->size, _gf_true);
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, fop->offset,
+ fop->size);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ if (ec->read_mask) {
+ fop->mask &= ec->read_mask;
+ }
+ ec_dispatch_min(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 1, cbk->count);
+
+ err = ec_readv_rebuild(fop->xl->private, fop, cbk);
+ if (err != 0) {
+ ec_cbk_set_error(cbk, -err, _gf_true);
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.readv != NULL) {
+ fop->cbks.readv(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->vector, cbk->int32,
+ &cbk->iatt[0], cbk->buffers, cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.readv != NULL) {
+ fop->cbks.readv(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, 0, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_readv(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_readv_cbk_t func, void *data, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ ec_cbk_t callback = {.readv = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(READ) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_READ, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_readv,
+ ec_manager_readv, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->size = size;
+ fop->offset = offset;
+ fop->uint32 = flags;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, 0, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: seek */
+
+int32_t
+ec_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, off_t offset, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ ec_t *ec = this->private;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_SEEK, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ cbk->offset = offset;
+ }
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ }
+
+ if ((op_ret > 0) && ((cbk->offset % ec->fragment_size) != 0)) {
+ cbk->op_ret = -1;
+ cbk->op_errno = EIO;
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_seek(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_seek_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->seek, fop->fd,
+ fop->offset, fop->seek, fop->xdata);
+}
+
+int32_t
+ec_manager_seek(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+ uint64_t size;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ fop->user_size = fop->offset;
+ fop->head = ec_adjust_offset_down(fop->xl->private, &fop->offset,
+ _gf_true);
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, fop->offset,
+ EC_RANGE_FULL);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(
+ ec_get_inode_size(fop, fop->locks[0].lock->loc.inode, &size));
+
+ if (fop->user_size >= size) {
+ ec_fop_set_error(fop, ENXIO);
+
+ return EC_STATE_REPORT;
+ }
+
+ ec_dispatch_one(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ if (ec_dispatch_one_retry(fop, &cbk)) {
+ return EC_STATE_DISPATCH;
+ }
+ if ((cbk != NULL) && (cbk->op_ret >= 0)) {
+ ec_t *ec = fop->xl->private;
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode,
+ &size));
+
+ cbk->offset *= ec->fragments;
+ if (cbk->offset < fop->user_size) {
+ cbk->offset = fop->user_size;
+ }
+ if (cbk->offset > size) {
+ cbk->offset = size;
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.seek != NULL) {
+ fop->cbks.seek(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->offset, cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.seek != NULL) {
+ fop->cbks.seek(fop->req_frame, fop, fop->xl, -1, fop->error, 0,
+ NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, 0, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_seek(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_seek_cbk_t func, void *data, fd_t *fd,
+ off_t offset, gf_seek_what_t what, dict_t *xdata)
+{
+ ec_cbk_t callback = {.seek = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = EIO;
+
+ gf_msg_trace("ec", 0, "EC(SEEK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SEEK, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_seek, ec_manager_seek,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->offset = offset;
+ fop->seek = what;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, EIO, 0, NULL);
+ }
+}
+
+/* FOP: stat */
+
+int32_t
+ec_combine_stat(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
+{
+ if (!ec_iatt_combine(fop, dst->iatt, src->iatt, 1)) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_IATT_MISMATCH,
+ "Mismatching iatt in "
+ "answers of 'GF_FOP_STAT'");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t
+ec_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_STAT, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (buf != NULL) {
+ cbk->iatt[0] = *buf;
+ }
+ }
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_stat);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_stat(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_stat_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->stat,
+ &fop->loc[0], fop->xdata);
+}
+
+int32_t
+ec_manager_stat(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ if (fop->fd == NULL) {
+ ec_lock_prepare_inode(fop, &fop->loc[0], EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd, EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
+ }
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_true);
+
+ if (cbk != NULL) {
+ if (cbk->iatt[0].ia_type == IA_IFREG) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 1, cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop,
+ fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->id == GF_FOP_STAT) {
+ if (fop->cbks.stat != NULL) {
+ fop->cbks.stat(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->iatt[0], cbk->xdata);
+ }
+ } else {
+ if (fop->cbks.fstat != NULL) {
+ fop->cbks.fstat(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->iatt[0], cbk->xdata);
+ }
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->id == GF_FOP_STAT) {
+ if (fop->cbks.stat != NULL) {
+ fop->cbks.stat(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL);
+ }
+ } else {
+ if (fop->cbks.fstat != NULL) {
+ fop->cbks.fstat(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL);
+ }
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_stat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_stat_cbk_t func, void *data, loc_t *loc,
+ dict_t *xdata)
+{
+ ec_cbk_t callback = {.stat = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(STAT) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_STAT, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_stat, ec_manager_stat,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
+
+/* FOP: fstat */
+
+int32_t
+ec_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FSTAT, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (buf != NULL) {
+ cbk->iatt[0] = *buf;
+ }
+ }
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_stat);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_fstat(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fstat_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fstat, fop->fd,
+ fop->xdata);
+}
+
+void
+ec_fstat(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fstat_cbk_t func, void *data, fd_t *fd,
+ dict_t *xdata)
+{
+ ec_cbk_t callback = {.fstat = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(FSTAT) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSTAT, EC_FLAG_LOCK_SHARED,
+ target, fop_flags, ec_wind_fstat,
+ ec_manager_stat, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
diff --git a/xlators/cluster/ec/src/ec-inode-write.c b/xlators/cluster/ec/src/ec-inode-write.c
new file mode 100644
index 00000000000..9b5fe2a7fdc
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-inode-write.c
@@ -0,0 +1,2369 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "ec-messages.h"
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-combine.h"
+#include "ec-method.h"
+#include "ec-fops.h"
+#include "ec-mem-types.h"
+
+int32_t
+ec_update_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ ec_cbk_data_t *cbk = NULL;
+ ec_fop_data_t *parent = fop->parent;
+ int i = 0;
+
+ ec_trace("UPDATE_WRITEV_CBK", cookie, "ret=%d, errno=%d, parent-fop=%s",
+ op_ret, op_errno, ec_fop_name(parent->id));
+
+ if (op_ret < 0) {
+ ec_fop_set_error(parent, op_errno);
+ goto out;
+ }
+ cbk = ec_cbk_data_allocate(parent->frame, this, parent, parent->id, 0,
+ op_ret, op_errno);
+ if (!cbk) {
+ ec_fop_set_error(parent, ENOMEM);
+ goto out;
+ }
+
+ if (xdata)
+ cbk->xdata = dict_ref(xdata);
+
+ if (prebuf)
+ cbk->iatt[i++] = *prebuf;
+
+ if (postbuf)
+ cbk->iatt[i++] = *postbuf;
+
+ LOCK(&parent->lock);
+ {
+ parent->good &= fop->good;
+
+ if (gf_bits_count(parent->good) < parent->minimum) {
+ __ec_fop_set_error(parent, EIO);
+ } else if (fop->error == 0 && parent->answer == NULL) {
+ parent->answer = cbk;
+ }
+ }
+ UNLOCK(&parent->lock);
+out:
+ return 0;
+}
+
+static int32_t
+ec_update_write(ec_fop_data_t *fop, uintptr_t mask, off_t offset, uint64_t size)
+{
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iovec vector;
+ int32_t err = -ENOMEM;
+
+ iobref = iobref_new();
+ if (iobref == NULL) {
+ goto out;
+ }
+ iobuf = iobuf_get(fop->xl->ctx->iobuf_pool);
+ if (iobuf == NULL) {
+ goto out;
+ }
+ err = iobref_add(iobref, iobuf);
+ if (err != 0) {
+ goto out;
+ }
+
+ if (fop->locks[0].lock)
+ ec_lock_update_good(fop->locks[0].lock, fop);
+ vector.iov_base = iobuf->ptr;
+ vector.iov_len = size;
+ memset(vector.iov_base, 0, vector.iov_len);
+
+ ec_writev(fop->frame, fop->xl, mask, fop->minimum, ec_update_writev_cbk,
+ NULL, fop->fd, &vector, 1, offset, 0, iobref, NULL);
+
+ err = 0;
+
+out:
+ if (iobuf != NULL) {
+ iobuf_unref(iobuf);
+ }
+ if (iobref != NULL) {
+ iobref_unref(iobref);
+ }
+
+ return err;
+}
+
+int
+ec_inode_write_cbk(call_frame_t *frame, xlator_t *this, void *cookie,
+ int op_ret, int op_errno, struct iatt *prestat,
+ struct iatt *poststat, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int i = 0;
+ int idx = 0;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+ idx = (int32_t)(uintptr_t)cookie;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, fop->id, idx, op_ret,
+ op_errno);
+ if (!cbk)
+ goto out;
+
+ if (op_ret < 0)
+ goto out;
+
+ if (xdata)
+ cbk->xdata = dict_ref(xdata);
+
+ if (prestat)
+ cbk->iatt[i++] = *prestat;
+
+ if (poststat)
+ cbk->iatt[i++] = *poststat;
+
+out:
+ if (cbk)
+ ec_combine(cbk, ec_combine_write);
+
+ if (fop)
+ ec_complete(fop);
+ return 0;
+}
+/* FOP: removexattr */
+
+int32_t
+ec_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, NULL, NULL,
+ xdata);
+}
+
+void
+ec_wind_removexattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_removexattr_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->removexattr,
+ &fop->loc[0], fop->str[0], fop->xdata);
+}
+
+void
+ec_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ switch (fop->id) {
+ case GF_FOP_SETXATTR:
+ if (fop->cbks.setxattr) {
+ QUORUM_CBK(fop->cbks.setxattr, fop, frame, cookie, this, op_ret,
+ op_errno, xdata);
+ }
+ break;
+ case GF_FOP_REMOVEXATTR:
+ if (fop->cbks.removexattr) {
+ QUORUM_CBK(fop->cbks.removexattr, fop, frame, cookie, this,
+ op_ret, op_errno, xdata);
+ }
+ break;
+ case GF_FOP_FSETXATTR:
+ if (fop->cbks.fsetxattr) {
+ QUORUM_CBK(fop->cbks.fsetxattr, fop, frame, cookie, this,
+ op_ret, op_errno, xdata);
+ }
+ break;
+ case GF_FOP_FREMOVEXATTR:
+ if (fop->cbks.fremovexattr) {
+ QUORUM_CBK(fop->cbks.fremovexattr, fop, frame, cookie, this,
+ op_ret, op_errno, xdata);
+ }
+ break;
+ }
+}
+
+int32_t
+ec_manager_xattr(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ if (fop->fd == NULL) {
+ ec_lock_prepare_inode(fop, &fop->loc[0],
+ EC_UPDATE_META | EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd, EC_UPDATE_META | EC_QUERY_INFO,
+ 0, EC_RANGE_FULL);
+ }
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ ec_fop_prepare_answer(fop, _gf_false);
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ ec_xattr_cbk(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->xdata);
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ ec_xattr_cbk(fop->req_frame, fop, fop->xl, -1, fop->error, NULL);
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_removexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_removexattr_cbk_t func, void *data,
+ loc_t *loc, const char *name, dict_t *xdata)
+{
+ ec_cbk_t callback = {.removexattr = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(REMOVEXATTR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_REMOVEXATTR, 0, target,
+ fop_flags, ec_wind_removexattr, ec_manager_xattr,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (name != NULL) {
+ fop->str[0] = gf_strdup(name);
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: fremovexattr */
+
+int32_t
+ec_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, NULL, NULL,
+ xdata);
+}
+
+void
+ec_wind_fremovexattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fremovexattr_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fremovexattr,
+ fop->fd, fop->str[0], fop->xdata);
+}
+
+void
+ec_fremovexattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fremovexattr_cbk_t func, void *data,
+ fd_t *fd, const char *name, dict_t *xdata)
+{
+ ec_cbk_t callback = {.fremovexattr = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(FREMOVEXATTR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FREMOVEXATTR, 0, target,
+ fop_flags, ec_wind_fremovexattr,
+ ec_manager_xattr, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (name != NULL) {
+ fop->str[0] = gf_strdup(name);
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: setattr */
+
+int32_t
+ec_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prestat,
+ struct iatt *poststat, dict_t *xdata)
+{
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat,
+ poststat, xdata);
+}
+
+void
+ec_wind_setattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_setattr_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->setattr,
+ &fop->loc[0], &fop->iatt, fop->int32, fop->xdata);
+}
+
+int32_t
+ec_manager_setattr(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ if (fop->fd == NULL) {
+ ec_lock_prepare_inode(fop, &fop->loc[0],
+ EC_UPDATE_META | EC_QUERY_INFO, 0,
+ EC_RANGE_FULL);
+ } else {
+ ec_lock_prepare_fd(fop, fop->fd, EC_UPDATE_META | EC_QUERY_INFO,
+ 0, EC_RANGE_FULL);
+ }
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ if (cbk->iatt[0].ia_type == IA_IFREG) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop,
+ fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->id == GF_FOP_SETATTR) {
+ if (fop->cbks.setattr != NULL) {
+ QUORUM_CBK(fop->cbks.setattr, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
+ }
+ } else {
+ if (fop->cbks.fsetattr != NULL) {
+ QUORUM_CBK(fop->cbks.fsetattr, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
+ }
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->id == GF_FOP_SETATTR) {
+ if (fop->cbks.setattr != NULL) {
+ fop->cbks.setattr(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL, NULL);
+ }
+ } else {
+ if (fop->cbks.fsetattr != NULL) {
+ fop->cbks.fsetattr(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL, NULL);
+ }
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_setattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_setattr_cbk_t func, void *data, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ ec_cbk_t callback = {.setattr = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(SETATTR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETATTR, 0, target,
+ fop_flags, ec_wind_setattr, ec_manager_setattr,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = valid;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (stbuf != NULL) {
+ fop->iatt = *stbuf;
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: fsetattr */
+
+int32_t
+ec_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prestat,
+ struct iatt *poststat, dict_t *xdata)
+{
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat,
+ poststat, xdata);
+}
+
+void
+ec_wind_fsetattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fsetattr_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fsetattr,
+ fop->fd, &fop->iatt, fop->int32, fop->xdata);
+}
+
+void
+ec_fsetattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsetattr_cbk_t func, void *data, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ ec_cbk_t callback = {.fsetattr = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(FSETATTR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETATTR, 0, target,
+ fop_flags, ec_wind_fsetattr, ec_manager_setattr,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->int32 = valid;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (stbuf != NULL) {
+ fop->iatt = *stbuf;
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: setxattr */
+
+int32_t
+ec_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, NULL, NULL,
+ xdata);
+}
+
+void
+ec_wind_setxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_setxattr_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->setxattr,
+ &fop->loc[0], fop->dict, fop->int32, fop->xdata);
+}
+
+void
+ec_setxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_setxattr_cbk_t func, void *data, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ ec_cbk_t callback = {.setxattr = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(SETXATTR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_SETXATTR, 0, target,
+ fop_flags, ec_wind_setxattr, ec_manager_xattr,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = flags;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (dict != NULL) {
+ fop->dict = dict_copy_with_ref(dict, NULL);
+ if (fop->dict == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: fsetxattr */
+
+int32_t
+ec_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FSETXATTR, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_fsetxattr(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fsetxattr_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fsetxattr,
+ fop->fd, fop->dict, fop->int32, fop->xdata);
+}
+
+void
+ec_fsetxattr(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fsetxattr_cbk_t func, void *data, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ ec_cbk_t callback = {.fsetxattr = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(FSETXATTR) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FSETXATTR, 0, target,
+ fop_flags, ec_wind_fsetxattr, ec_manager_xattr,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->int32 = flags;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (dict != NULL) {
+ fop->dict = dict_copy_with_ref(dict, NULL);
+ if (fop->dict == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/*********************************************************************
+ *
+ * File Operation : fallocate
+ *
+ *********************************************************************/
+
+int32_t
+ec_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+}
+
+void
+ec_wind_fallocate(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fallocate_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fallocate,
+ fop->fd, fop->int32, fop->offset, fop->size, fop->xdata);
+}
+
+int32_t
+ec_manager_fallocate(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk = NULL;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ if (fop->size == 0) {
+ ec_fop_set_error(fop, EINVAL);
+ return EC_STATE_REPORT;
+ }
+ if (fop->int32 &
+ (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE |
+ FALLOC_FL_ZERO_RANGE | FALLOC_FL_PUNCH_HOLE)) {
+ ec_fop_set_error(fop, ENOTSUP);
+ return EC_STATE_REPORT;
+ }
+ fop->user_size = fop->offset + fop->size;
+ fop->head = ec_adjust_offset_down(fop->xl->private, &fop->offset,
+ _gf_true);
+ fop->size += fop->head;
+ ec_adjust_size_up(fop->xl->private, &fop->size, _gf_true);
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd,
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
+ fop->offset, fop->size);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ LOCK(&fop->locks[0].lock->loc.inode->lock);
+ {
+ GF_ASSERT(__ec_get_inode_size(fop,
+ fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
+
+ /*If mode has FALLOC_FL_KEEP_SIZE keep the size */
+ if (fop->int32 & FALLOC_FL_KEEP_SIZE) {
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ } else if (fop->user_size > cbk->iatt[0].ia_size) {
+ cbk->iatt[1].ia_size = fop->user_size;
+
+ /* This shouldn't fail because we have the inode
+ * locked. */
+ GF_ASSERT(__ec_set_inode_size(
+ fop, fop->locks[0].lock->loc.inode,
+ cbk->iatt[1].ia_size));
+ } else {
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ }
+ }
+ UNLOCK(&fop->locks[0].lock->loc.inode->lock);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.fallocate != NULL) {
+ QUORUM_CBK(fop->cbks.fallocate, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.fallocate != NULL) {
+ fop->cbks.fallocate(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_fallocate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fallocate_cbk_t func, void *data, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata)
+{
+ ec_cbk_t callback = {.fallocate = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(FALLOCATE) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FALLOCATE, 0, target,
+ fop_flags, ec_wind_fallocate,
+ ec_manager_fallocate, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+ fop->int32 = mode;
+ fop->offset = offset;
+ fop->size = len;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+ goto out;
+ }
+ }
+
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/*********************************************************************
+ *
+ * File Operation : Discard
+ *
+ *********************************************************************/
+void
+ec_update_discard_write(ec_fop_data_t *fop, uintptr_t mask)
+{
+ ec_t *ec = fop->xl->private;
+ off_t off_head = 0;
+ off_t off_tail = 0;
+ uint64_t size_head = 0;
+ uint64_t size_tail = 0;
+ int error = 0;
+
+ off_head = fop->offset * ec->fragments - fop->int32;
+ if (fop->size == 0) {
+ error = ec_update_write(fop, mask, off_head, fop->user_size);
+ } else {
+ size_head = fop->int32;
+ size_tail = (off_head + fop->user_size) % ec->stripe_size;
+ off_tail = off_head + fop->user_size - size_tail;
+ if (size_head) {
+ error = ec_update_write(fop, mask, off_head, size_head);
+ if (error) {
+ goto out;
+ }
+ }
+ if (size_tail) {
+ error = ec_update_write(fop, mask, off_tail, size_tail);
+ }
+ }
+out:
+ if (error)
+ ec_fop_set_error(fop, -error);
+}
+
+void
+ec_discard_adjust_offset_size(ec_fop_data_t *fop)
+{
+ ec_t *ec = fop->xl->private;
+
+ fop->user_size = fop->size;
+ /* If discard length covers at least a fragment on brick, we will
+ * perform discard operation(when fop->size is non-zero) else we just
+ * write zeros.
+ */
+ fop->int32 = ec_adjust_offset_up(ec, &fop->offset, _gf_true);
+ fop->frag_range.first = fop->offset;
+ if (fop->size < fop->int32) {
+ fop->size = 0;
+ } else {
+ fop->size -= fop->int32;
+ ec_adjust_size_down(ec, &fop->size, _gf_true);
+ }
+ fop->frag_range.last = fop->offset + fop->size;
+}
+
+int32_t
+ec_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+}
+
+void
+ec_wind_discard(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_discard_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->discard,
+ fop->fd, fop->offset, fop->size, fop->xdata);
+}
+
+int32_t
+ec_manager_discard(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk = NULL;
+ off_t fl_start = 0;
+ uint64_t fl_size = 0;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ if ((fop->size <= 0) || (fop->offset < 0)) {
+ ec_fop_set_error(fop, EINVAL);
+ return EC_STATE_REPORT;
+ }
+ /* Because of the head/tail writes, "discard" happens on the
+ * remaining regions, but we need to compute region including
+ * head/tail writes so compute them separately*/
+ fl_start = fop->offset;
+ fl_size = fop->size;
+ fl_size += ec_adjust_offset_down(fop->xl->private, &fl_start,
+ _gf_true);
+ ec_adjust_size_up(fop->xl->private, &fl_size, _gf_true);
+
+ ec_discard_adjust_offset_size(fop);
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ ec_lock_prepare_fd(fop, fop->fd,
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
+ fl_start, fl_size);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+
+ /* Dispatch discard fop only if we have whole fragment
+ * to deallocate */
+ if (fop->size) {
+ ec_dispatch_all(fop);
+ return EC_STATE_DELAYED_START;
+ } else {
+ /* Assume discard to have succeeded on all bricks */
+ ec_succeed_all(fop);
+ }
+
+ /* Fall through */
+
+ case EC_STATE_DELAYED_START:
+
+ if (fop->size) {
+ if (fop->answer && fop->answer->op_ret == 0)
+ ec_update_discard_write(fop, fop->answer->mask);
+ } else {
+ ec_update_discard_write(fop, fop->mask);
+ }
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
+
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ }
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.discard != NULL) {
+ QUORUM_CBK(fop->cbks.discard, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_DELAYED_START:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.discard != NULL) {
+ fop->cbks.discard(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_discard(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_discard_cbk_t func, void *data, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ ec_cbk_t callback = {.discard = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(DISCARD) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_DISCARD, 0, target,
+ fop_flags, ec_wind_discard, ec_manager_discard,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+ fop->offset = offset;
+ fop->size = len;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ }
+
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/*********************************************************************
+ *
+ * File Operation : truncate
+ *
+ *********************************************************************/
+
+int32_t
+ec_update_truncate_write(ec_fop_data_t *fop, uintptr_t mask)
+{
+ ec_t *ec = fop->xl->private;
+ uint64_t size = fop->offset * ec->fragments - fop->user_size;
+ return ec_update_write(fop, mask, fop->user_size, size);
+}
+
+int32_t
+ec_truncate_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ ec_fop_data_t *fop = cookie;
+ int32_t err;
+
+ fop->parent->good &= fop->good;
+ if (op_ret >= 0) {
+ fd_bind(fd);
+ err = ec_update_truncate_write(fop->parent, fop->answer->mask);
+ if (err != 0) {
+ ec_fop_set_error(fop->parent, -err);
+ }
+ }
+
+ return 0;
+}
+
+int32_t
+ec_truncate_clean(ec_fop_data_t *fop)
+{
+ if (fop->fd == NULL) {
+ fop->fd = fd_create(fop->loc[0].inode, fop->frame->root->pid);
+ if (fop->fd == NULL) {
+ return -ENOMEM;
+ }
+
+ ec_open(fop->frame, fop->xl, fop->answer->mask, fop->minimum,
+ ec_truncate_open_cbk, fop, &fop->loc[0], O_RDWR, fop->fd, NULL);
+
+ return 0;
+ } else {
+ return ec_update_truncate_write(fop, fop->answer->mask);
+ }
+}
+
+int32_t
+ec_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prestat,
+ struct iatt *poststat, dict_t *xdata)
+{
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat,
+ poststat, xdata);
+}
+
+void
+ec_wind_truncate(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_truncate_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->truncate,
+ &fop->loc[0], fop->offset, fop->xdata);
+}
+
+int32_t
+ec_manager_truncate(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+ off_t offset_down;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ fop->user_size = fop->offset;
+ ec_adjust_offset_up(fop->xl->private, &fop->offset, _gf_true);
+ fop->frag_range.first = fop->offset;
+ fop->frag_range.last = UINT64_MAX;
+
+ /* Fall through */
+
+ case EC_STATE_LOCK:
+ offset_down = fop->user_size;
+ ec_adjust_offset_down(fop->xl->private, &offset_down, _gf_true);
+
+ if (fop->id == GF_FOP_TRUNCATE) {
+ ec_lock_prepare_inode(
+ fop, &fop->loc[0],
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
+ offset_down, EC_RANGE_FULL);
+ } else {
+ ec_lock_prepare_fd(
+ fop, fop->fd,
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
+ offset_down, EC_RANGE_FULL);
+ }
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ int32_t err;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ /* Inode size doesn't need to be updated under locks, because
+ * conflicting operations won't be in-flight
+ */
+ GF_ASSERT(ec_get_inode_size(fop, fop->locks[0].lock->loc.inode,
+ &cbk->iatt[0].ia_size));
+ cbk->iatt[1].ia_size = fop->user_size;
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_set_inode_size(fop, fop->locks[0].lock->loc.inode,
+ fop->user_size));
+ if ((cbk->iatt[0].ia_size > cbk->iatt[1].ia_size) &&
+ (fop->user_size != fop->offset)) {
+ err = ec_truncate_clean(fop);
+ if (err != 0) {
+ ec_cbk_set_error(cbk, -err, _gf_false);
+ }
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->id == GF_FOP_TRUNCATE) {
+ if (fop->cbks.truncate != NULL) {
+ QUORUM_CBK(fop->cbks.truncate, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
+ }
+ } else {
+ if (fop->cbks.ftruncate != NULL) {
+ QUORUM_CBK(fop->cbks.ftruncate, fop, fop->req_frame, fop,
+ fop->xl, cbk->op_ret, cbk->op_errno,
+ &cbk->iatt[0], &cbk->iatt[1], cbk->xdata);
+ }
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->id == GF_FOP_TRUNCATE) {
+ if (fop->cbks.truncate != NULL) {
+ fop->cbks.truncate(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL, NULL);
+ }
+ } else {
+ if (fop->cbks.ftruncate != NULL) {
+ fop->cbks.ftruncate(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL, NULL, NULL);
+ }
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_truncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_truncate_cbk_t func, void *data, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ ec_cbk_t callback = {.truncate = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(TRUNCATE) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_TRUNCATE, 0, target,
+ fop_flags, ec_wind_truncate, ec_manager_truncate,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->offset = offset;
+
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: ftruncate */
+
+int32_t
+ec_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prestat,
+ struct iatt *poststat, dict_t *xdata)
+{
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat,
+ poststat, xdata);
+}
+
+void
+ec_wind_ftruncate(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_ftruncate_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->ftruncate,
+ fop->fd, fop->offset, fop->xdata);
+}
+
+void
+ec_ftruncate(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_ftruncate_cbk_t func, void *data, fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ ec_cbk_t callback = {.ftruncate = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(FTRUNCATE) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FTRUNCATE, 0, target,
+ fop_flags, ec_wind_ftruncate,
+ ec_manager_truncate, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->offset = offset;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
+
+/* FOP: writev */
+static ec_stripe_t *
+ec_allocate_stripe(ec_t *ec, ec_stripe_list_t *stripe_cache)
+{
+ ec_stripe_t *stripe = NULL;
+
+ if (stripe_cache->count >= stripe_cache->max) {
+ GF_ASSERT(!list_empty(&stripe_cache->lru));
+ stripe = list_first_entry(&stripe_cache->lru, ec_stripe_t, lru);
+ list_move_tail(&stripe->lru, &stripe_cache->lru);
+ GF_ATOMIC_INC(ec->stats.stripe_cache.evicts);
+ } else {
+ stripe = GF_MALLOC(sizeof(ec_stripe_t) + ec->stripe_size,
+ ec_mt_ec_stripe_t);
+ if (stripe != NULL) {
+ stripe_cache->count++;
+ list_add_tail(&stripe->lru, &stripe_cache->lru);
+ GF_ATOMIC_INC(ec->stats.stripe_cache.allocs);
+ } else {
+ GF_ATOMIC_INC(ec->stats.stripe_cache.errors);
+ }
+ }
+
+ return stripe;
+}
+
+static void
+ec_write_stripe_data(ec_t *ec, ec_fop_data_t *fop, ec_stripe_t *stripe)
+{
+ off_t base;
+
+ base = fop->size - ec->stripe_size;
+ memcpy(stripe->data, fop->vector[0].iov_base + base, ec->stripe_size);
+ stripe->frag_offset = fop->frag_range.last - ec->fragment_size;
+}
+
+static void
+ec_add_stripe_in_cache(ec_t *ec, ec_fop_data_t *fop)
+{
+ ec_inode_t *ctx = NULL;
+ ec_stripe_t *stripe = NULL;
+ ec_stripe_list_t *stripe_cache = NULL;
+ gf_boolean_t failed = _gf_true;
+
+ LOCK(&fop->fd->inode->lock);
+
+ ctx = __ec_inode_get(fop->fd->inode, fop->xl);
+ if (ctx == NULL) {
+ goto out;
+ }
+
+ stripe_cache = &ctx->stripe_cache;
+ if (stripe_cache->max > 0) {
+ stripe = ec_allocate_stripe(ec, stripe_cache);
+ if (stripe == NULL) {
+ goto out;
+ }
+
+ ec_write_stripe_data(ec, fop, stripe);
+ }
+
+ failed = _gf_false;
+
+out:
+ UNLOCK(&fop->fd->inode->lock);
+
+ if (failed) {
+ gf_msg(ec->xl->name, GF_LOG_DEBUG, ENOMEM, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to create and add stripe in cache");
+ }
+}
+
+int32_t
+ec_writev_merge_tail(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ ec_t *ec = this->private;
+ ec_fop_data_t *fop = frame->local;
+ uint64_t size, base, tmp;
+
+ if (op_ret >= 0) {
+ tmp = 0;
+ size = fop->size - fop->user_size - fop->head;
+ base = ec->stripe_size - size;
+ if (op_ret > base) {
+ tmp = min(op_ret - base, size);
+ ec_iov_copy_to(fop->vector[0].iov_base + fop->size - size, vector,
+ count, base, tmp);
+
+ size -= tmp;
+ }
+
+ if (size > 0) {
+ memset(fop->vector[0].iov_base + fop->size - size, 0, size);
+ }
+
+ if (ec->stripe_cache) {
+ ec_add_stripe_in_cache(ec, fop);
+ }
+ }
+ return 0;
+}
+
+int32_t
+ec_writev_merge_head(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ ec_t *ec = this->private;
+ ec_fop_data_t *fop = frame->local;
+ uint64_t size, base;
+
+ if (op_ret >= 0) {
+ size = fop->head;
+ base = 0;
+
+ if (op_ret > 0) {
+ base = min(op_ret, size);
+ ec_iov_copy_to(fop->vector[0].iov_base, vector, count, 0, base);
+
+ size -= base;
+ }
+
+ if (size > 0) {
+ memset(fop->vector[0].iov_base + base, 0, size);
+ }
+
+ size = fop->size - fop->user_size - fop->head;
+ if ((size > 0) && (fop->size == ec->stripe_size)) {
+ ec_writev_merge_tail(frame, cookie, this, op_ret, op_errno, vector,
+ count, stbuf, iobref, xdata);
+ }
+ }
+
+ return 0;
+}
+
+static int
+ec_make_internal_fop_xdata(dict_t **xdata)
+{
+ dict_t *dict = NULL;
+
+ if (*xdata)
+ return 0;
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ if (dict_set_str(dict, GLUSTERFS_INTERNAL_FOP_KEY, "yes"))
+ goto out;
+
+ *xdata = dict;
+ return 0;
+out:
+ if (dict)
+ dict_unref(dict);
+ return -1;
+}
+
+static int32_t
+ec_writev_prepare_buffers(ec_t *ec, ec_fop_data_t *fop)
+{
+ struct iobref *iobref = NULL;
+ struct iovec *iov;
+ void *ptr;
+ int32_t err;
+
+ fop->user_size = iov_length(fop->vector, fop->int32);
+ fop->head = ec_adjust_offset_down(ec, &fop->offset, _gf_false);
+ fop->frag_range.first = fop->offset / ec->fragments;
+ fop->size = fop->user_size + fop->head;
+ ec_adjust_size_up(ec, &fop->size, _gf_false);
+ fop->frag_range.last = fop->frag_range.first + fop->size / ec->fragments;
+
+ if ((fop->int32 != 1) || (fop->head != 0) || (fop->size > fop->user_size) ||
+ !EC_ALIGN_CHECK(fop->vector[0].iov_base, EC_METHOD_WORD_SIZE)) {
+ err = ec_buffer_alloc(ec->xl, fop->size, &iobref, &ptr);
+ if (err != 0) {
+ goto out;
+ }
+
+ ec_iov_copy_to(ptr + fop->head, fop->vector, fop->int32, 0,
+ fop->user_size);
+
+ fop->vector[0].iov_base = ptr;
+ fop->vector[0].iov_len = fop->size;
+
+ iobref_unref(fop->buffers);
+ fop->buffers = iobref;
+ }
+
+ if (fop->int32 != 2) {
+ iov = GF_MALLOC(VECTORSIZE(2), gf_common_mt_iovec);
+ if (iov == NULL) {
+ err = -ENOMEM;
+
+ goto out;
+ }
+ iov[0].iov_base = fop->vector[0].iov_base;
+ iov[0].iov_len = fop->vector[0].iov_len;
+
+ GF_FREE(fop->vector);
+ fop->vector = iov;
+ }
+
+ fop->vector[1].iov_len = fop->size / ec->fragments;
+ err = ec_buffer_alloc(ec->xl, fop->vector[1].iov_len * ec->nodes,
+ &fop->buffers, &fop->vector[1].iov_base);
+ if (err != 0) {
+ goto out;
+ }
+
+ err = 0;
+
+out:
+ return err;
+}
+
+static void
+ec_merge_stripe_head_locked(ec_t *ec, ec_fop_data_t *fop, ec_stripe_t *stripe)
+{
+ uint32_t head, size;
+
+ head = fop->head;
+ memcpy(fop->vector[0].iov_base, stripe->data, head);
+
+ size = ec->stripe_size - head;
+ if (size > fop->user_size) {
+ head += fop->user_size;
+ size = ec->stripe_size - head;
+ memcpy(fop->vector[0].iov_base + head, stripe->data + head, size);
+ }
+}
+
+static void
+ec_merge_stripe_tail_locked(ec_t *ec, ec_fop_data_t *fop, ec_stripe_t *stripe)
+{
+ uint32_t head, tail;
+ off_t offset;
+
+ offset = fop->user_size + fop->head;
+ tail = fop->size - offset;
+ head = ec->stripe_size - tail;
+
+ memcpy(fop->vector[0].iov_base + offset, stripe->data + head, tail);
+}
+
+static ec_stripe_t *
+ec_get_stripe_from_cache_locked(ec_t *ec, ec_fop_data_t *fop,
+ uint64_t frag_offset)
+{
+ ec_inode_t *ctx = NULL;
+ ec_stripe_t *stripe = NULL;
+ ec_stripe_list_t *stripe_cache = NULL;
+
+ ctx = __ec_inode_get(fop->fd->inode, fop->xl);
+ if (ctx == NULL) {
+ GF_ATOMIC_INC(ec->stats.stripe_cache.errors);
+ return NULL;
+ }
+
+ stripe_cache = &ctx->stripe_cache;
+ list_for_each_entry(stripe, &stripe_cache->lru, lru)
+ {
+ if (stripe->frag_offset == frag_offset) {
+ list_move_tail(&stripe->lru, &stripe_cache->lru);
+ GF_ATOMIC_INC(ec->stats.stripe_cache.hits);
+ return stripe;
+ }
+ }
+
+ GF_ATOMIC_INC(ec->stats.stripe_cache.misses);
+
+ return NULL;
+}
+
+static gf_boolean_t
+ec_get_and_merge_stripe(ec_t *ec, ec_fop_data_t *fop, ec_stripe_part_t which)
+{
+ uint64_t frag_offset;
+ ec_stripe_t *stripe = NULL;
+ gf_boolean_t found = _gf_false;
+
+ if (!ec->stripe_cache) {
+ return found;
+ }
+
+ LOCK(&fop->fd->inode->lock);
+ if (which == EC_STRIPE_HEAD) {
+ frag_offset = fop->frag_range.first;
+ stripe = ec_get_stripe_from_cache_locked(ec, fop, frag_offset);
+ if (stripe) {
+ ec_merge_stripe_head_locked(ec, fop, stripe);
+ found = _gf_true;
+ }
+ }
+
+ if (which == EC_STRIPE_TAIL) {
+ frag_offset = fop->frag_range.last - ec->fragment_size;
+ stripe = ec_get_stripe_from_cache_locked(ec, fop, frag_offset);
+ if (stripe) {
+ ec_merge_stripe_tail_locked(ec, fop, stripe);
+ found = _gf_true;
+ }
+ }
+ UNLOCK(&fop->fd->inode->lock);
+
+ return found;
+}
+
+static uintptr_t
+ec_get_lock_good_mask(inode_t *inode, xlator_t *xl)
+{
+ ec_lock_t *lock = NULL;
+ ec_inode_t *ictx = NULL;
+ LOCK(&inode->lock);
+ {
+ ictx = __ec_inode_get(inode, xl);
+ if (ictx)
+ lock = ictx->inode_lock;
+ }
+ UNLOCK(&inode->lock);
+ if (lock)
+ return lock->good_mask;
+ return 0;
+}
+
+void
+ec_writev_start(ec_fop_data_t *fop)
+{
+ ec_t *ec = fop->xl->private;
+ ec_fd_t *ctx;
+ fd_t *fd;
+ dict_t *xdata = NULL;
+ uint64_t tail, current;
+ int32_t err = -ENOMEM;
+ gf_boolean_t found_stripe = _gf_false;
+
+ /* This shouldn't fail because we have the inode locked. */
+ GF_ASSERT(ec_get_inode_size(fop, fop->fd->inode, &current));
+
+ fd = fd_anonymous(fop->fd->inode);
+ if (fd == NULL) {
+ goto failed;
+ }
+
+ fop->frame->root->uid = 0;
+ fop->frame->root->gid = 0;
+
+ ctx = ec_fd_get(fop->fd, fop->xl);
+ if (ctx != NULL) {
+ if ((ctx->flags & O_APPEND) != 0) {
+ /* Appending writes take full locks so size won't change because
+ * of any parallel operations
+ */
+ fop->offset = current;
+ }
+ }
+
+ err = ec_writev_prepare_buffers(ec, fop);
+ if (err != 0) {
+ goto failed_fd;
+ }
+ tail = fop->size - fop->user_size - fop->head;
+ if (fop->head > 0) {
+ if (current > fop->offset) {
+ found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_HEAD);
+ if (!found_stripe) {
+ if (ec_make_internal_fop_xdata(&xdata)) {
+ err = -ENOMEM;
+ goto failed_xdata;
+ }
+ ec_readv(fop->frame, fop->xl,
+ ec_get_lock_good_mask(fop->fd->inode, fop->xl),
+ EC_MINIMUM_MIN, ec_writev_merge_head, NULL, fd,
+ ec->stripe_size, fop->offset, 0, xdata);
+ }
+ } else {
+ memset(fop->vector[0].iov_base, 0, fop->head);
+ memset(fop->vector[0].iov_base + fop->size - tail, 0, tail);
+ if (ec->stripe_cache && (fop->size <= ec->stripe_size)) {
+ ec_add_stripe_in_cache(ec, fop);
+ }
+ }
+ }
+
+ if ((tail > 0) && ((fop->head == 0) || (fop->size > ec->stripe_size))) {
+ /* Current locking scheme will make sure the 'current' below will
+ * never decrease while the fop is in progress, so the checks will
+ * work as expected
+ */
+ if (current > fop->offset + fop->head + fop->user_size) {
+ found_stripe = ec_get_and_merge_stripe(ec, fop, EC_STRIPE_TAIL);
+ if (!found_stripe) {
+ if (ec_make_internal_fop_xdata(&xdata)) {
+ err = -ENOMEM;
+ goto failed_xdata;
+ }
+ ec_readv(fop->frame, fop->xl,
+ ec_get_lock_good_mask(fop->fd->inode, fop->xl),
+ EC_MINIMUM_MIN, ec_writev_merge_tail, NULL, fd,
+ ec->stripe_size,
+ fop->offset + fop->size - ec->stripe_size, 0, xdata);
+ }
+ } else {
+ memset(fop->vector[0].iov_base + fop->size - tail, 0, tail);
+ if (ec->stripe_cache) {
+ ec_add_stripe_in_cache(ec, fop);
+ }
+ }
+ }
+
+ err = 0;
+
+failed_xdata:
+ if (xdata) {
+ dict_unref(xdata);
+ }
+failed_fd:
+ fd_unref(fd);
+failed:
+ ec_fop_set_error(fop, -err);
+}
+
+int32_t
+ec_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prestat, struct iatt *poststat,
+ dict_t *xdata)
+{
+ ec_t *ec = NULL;
+ if (this && this->private) {
+ ec = this->private;
+ if ((op_ret > 0) && ((op_ret % ec->fragment_size) != 0)) {
+ op_ret = -1;
+ op_errno = EIO;
+ }
+ }
+ return ec_inode_write_cbk(frame, this, cookie, op_ret, op_errno, prestat,
+ poststat, xdata);
+}
+
+void
+ec_wind_writev(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ struct iovec vector[1];
+ size_t size;
+
+ size = fop->vector[1].iov_len;
+
+ vector[0].iov_base = fop->vector[1].iov_base + idx * size;
+ vector[0].iov_len = size;
+
+ STACK_WIND_COOKIE(fop->frame, ec_writev_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->writev, fop->fd,
+ vector, 1, fop->offset / ec->fragments, fop->uint32,
+ fop->buffers, fop->xdata);
+}
+
+static void
+ec_writev_encode(ec_fop_data_t *fop)
+{
+ ec_t *ec = fop->xl->private;
+ void *blocks[ec->nodes];
+ uint32_t i;
+
+ blocks[0] = fop->vector[1].iov_base;
+ for (i = 1; i < ec->nodes; i++) {
+ blocks[i] = blocks[i - 1] + fop->vector[1].iov_len;
+ }
+ ec_method_encode(&ec->matrix, fop->vector[0].iov_len,
+ fop->vector[0].iov_base, blocks);
+}
+
+int32_t
+ec_manager_writev(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+ ec_fd_t *ctx = NULL;
+ ec_t *ec = fop->xl->private;
+ off_t fl_start = 0;
+ uint64_t fl_size = LONG_MAX;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ case EC_STATE_LOCK:
+ ctx = ec_fd_get(fop->fd, fop->xl);
+ if (ctx != NULL) {
+ if ((ctx->flags & O_APPEND) == 0) {
+ off_t user_size = 0;
+ off_t head = 0;
+
+ fl_start = fop->offset;
+ user_size = iov_length(fop->vector, fop->int32);
+ head = ec_adjust_offset_down(ec, &fl_start, _gf_true);
+ fl_size = user_size + head;
+ ec_adjust_size_up(ec, &fl_size, _gf_true);
+ }
+ }
+ ec_lock_prepare_fd(fop, fop->fd,
+ EC_UPDATE_DATA | EC_UPDATE_META | EC_QUERY_INFO,
+ fl_start, fl_size);
+ ec_lock(fop);
+
+ return EC_STATE_DISPATCH;
+
+ case EC_STATE_DISPATCH:
+ ec_writev_start(fop);
+
+ return EC_STATE_DELAYED_START;
+
+ case EC_STATE_DELAYED_START:
+ /* Restore uid, gid if they were changed to do some partial
+ * reads. */
+ fop->frame->root->uid = fop->uid;
+ fop->frame->root->gid = fop->gid;
+
+ ec_writev_encode(fop);
+
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ cbk = ec_fop_prepare_answer(fop, _gf_false);
+ if (cbk != NULL) {
+ ec_t *ec = fop->xl->private;
+ uint64_t size;
+
+ ec_iatt_rebuild(fop->xl->private, cbk->iatt, 2, cbk->count);
+
+ /* This shouldn't fail because we have the inode locked. */
+ LOCK(&fop->fd->inode->lock);
+ {
+ GF_ASSERT(__ec_get_inode_size(fop, fop->fd->inode,
+ &cbk->iatt[0].ia_size));
+ cbk->iatt[1].ia_size = cbk->iatt[0].ia_size;
+ size = fop->offset + fop->head + fop->user_size;
+ if (size > cbk->iatt[0].ia_size) {
+ /* Only update inode size if this is a top level fop.
+ * Otherwise this is an internal write and the top
+ * level fop should take care of the real inode size.
+ */
+ if (fop->parent == NULL) {
+ /* This shouldn't fail because we have the inode
+ * locked. */
+ GF_ASSERT(
+ __ec_set_inode_size(fop, fop->fd->inode, size));
+ }
+ cbk->iatt[1].ia_size = size;
+ }
+ }
+ UNLOCK(&fop->fd->inode->lock);
+
+ if (fop->error == 0) {
+ cbk->op_ret *= ec->fragments;
+ if (cbk->op_ret < fop->head) {
+ cbk->op_ret = 0;
+ } else {
+ cbk->op_ret -= fop->head;
+ }
+ if (cbk->op_ret > fop->user_size) {
+ cbk->op_ret = fop->user_size;
+ }
+ }
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.writev != NULL) {
+ QUORUM_CBK(fop->cbks.writev, fop, fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, &cbk->iatt[0],
+ &cbk->iatt[1], cbk->xdata);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_DELAYED_START:
+ /* We have failed while doing partial reads. We need to restore
+ * original uid, gid. */
+ fop->frame->root->uid = fop->uid;
+ fop->frame->root->gid = fop->gid;
+
+ /* Fall through */
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_LOCK:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.writev != NULL) {
+ fop->cbks.writev(fop->req_frame, fop, fop->xl, -1, fop->error,
+ NULL, NULL, NULL);
+ }
+
+ return EC_STATE_LOCK_REUSE;
+
+ case -EC_STATE_LOCK_REUSE:
+ case EC_STATE_LOCK_REUSE:
+ ec_lock_reuse(fop);
+
+ return EC_STATE_UNLOCK;
+
+ case -EC_STATE_UNLOCK:
+ case EC_STATE_UNLOCK:
+ ec_unlock(fop);
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_writev(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_writev_cbk_t func, void *data, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ ec_cbk_t callback = {.writev = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(WRITE) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_WRITE, 0, target, fop_flags,
+ ec_wind_writev, ec_manager_writev, callback,
+ data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = count;
+ fop->offset = offset;
+ fop->uint32 = flags;
+
+ fop->use_fd = 1;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (count > 0) {
+ fop->vector = iov_dup(vector, count);
+ if (fop->vector == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a "
+ "vector list.");
+
+ goto out;
+ }
+ fop->int32 = count;
+ }
+ if (iobref != NULL) {
+ fop->buffers = iobref_ref(iobref);
+ if (fop->buffers == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_BUF_REF_FAIL,
+ "Failed to reference a "
+ "buffer.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_copy_with_ref(xdata, NULL);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL, NULL);
+ }
+}
diff --git a/xlators/cluster/ec/src/ec-locks.c b/xlators/cluster/ec/src/ec-locks.c
new file mode 100644
index 00000000000..601960d6154
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-locks.c
@@ -0,0 +1,1128 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-combine.h"
+#include "ec-fops.h"
+#include "ec-messages.h"
+
+#define EC_LOCK_MODE_NONE 0
+#define EC_LOCK_MODE_INC 1
+#define EC_LOCK_MODE_ALL 2
+
+int32_t
+ec_lock_check(ec_fop_data_t *fop, uintptr_t *mask)
+{
+ ec_t *ec = fop->xl->private;
+ ec_cbk_data_t *ans = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ uintptr_t locked = 0;
+ int32_t good = 0;
+ int32_t eagain = 0;
+ int32_t estale = 0;
+ int32_t error = -1;
+
+ /* There are some errors that we'll handle in an special way while trying
+ * to acquire a lock.
+ *
+ * EAGAIN: If it's found during a parallel non-blocking lock request, we
+ * consider that there's contention on the inode, so we consider
+ * the acquisition a failure and try again with a sequential
+ * blocking lock request. This will ensure that we get a lock on
+ * as many bricks as possible (ignoring EAGAIN here would cause
+ * unnecessary triggers of self-healing).
+ *
+ * If it's found during a sequential blocking lock request, it's
+ * considered an error. Lock will only succeed if there are
+ * enough other bricks locked.
+ *
+ * ESTALE: This can appear during parallel or sequential lock request if
+ * the inode has just been unlinked. We consider this error is
+ * not recoverable, but we also don't consider it as fatal. So,
+ * if it happens during parallel lock, we won't attempt a
+ * sequential one unless there are EAGAIN errors on other
+ * bricks (and are enough to form a quorum), but if we reach
+ * quorum counting the ESTALE bricks, we consider the whole
+ * result of the operation is ESTALE instead of EIO.
+ */
+
+ list_for_each_entry(ans, &fop->cbk_list, list)
+ {
+ if (ans->op_ret >= 0) {
+ if (locked != 0) {
+ error = EIO;
+ }
+ locked |= ans->mask;
+ good = ans->count;
+ cbk = ans;
+ } else if (ans->op_errno == ESTALE) {
+ estale += ans->count;
+ } else if ((ans->op_errno == EAGAIN) &&
+ (fop->uint32 != EC_LOCK_MODE_INC)) {
+ eagain += ans->count;
+ }
+ }
+
+ if (error == -1) {
+ /* If we have enough quorum with succeeded and EAGAIN answers, we
+ * ignore for now any ESTALE answer. If there are EAGAIN answers,
+ * we retry with a sequential blocking lock request if needed.
+ * Otherwise we succeed. */
+ if ((good + eagain) >= ec->fragments) {
+ if (eagain == 0) {
+ if (fop->answer == NULL) {
+ fop->answer = cbk;
+ }
+
+ ec_update_good(fop, locked);
+
+ error = 0;
+ } else {
+ switch (fop->uint32) {
+ case EC_LOCK_MODE_NONE:
+ error = EAGAIN;
+ break;
+ case EC_LOCK_MODE_ALL:
+ fop->uint32 = EC_LOCK_MODE_INC;
+ break;
+ default:
+ /* This shouldn't happen because eagain cannot be > 0
+ * when fop->uint32 is EC_LOCK_MODE_INC. */
+ error = EIO;
+ break;
+ }
+ }
+ } else {
+ /* We have been unable to find enough candidates that will be able
+ * to take the lock. If we have quorum on some answer, we return
+ * it. Otherwise we check if ESTALE answers allow us to reach
+ * quorum. If so, we return ESTALE. */
+ if (fop->answer && fop->answer->op_ret < 0) {
+ error = fop->answer->op_errno;
+ } else if ((good + eagain + estale) >= ec->fragments) {
+ error = ESTALE;
+ } else {
+ error = EIO;
+ }
+ }
+ }
+
+ *mask = locked;
+
+ return error;
+}
+
+int32_t
+ec_lock_unlocked(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_UNLOCK_FAILED,
+ "Failed to unlock an entry/inode");
+ }
+
+ return 0;
+}
+
+int32_t
+ec_lock_lk_unlocked(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *flock,
+ dict_t *xdata)
+{
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, EC_MSG_LK_UNLOCK_FAILED,
+ "Failed to unlock an lk");
+ }
+
+ return 0;
+}
+
+/* FOP: entrylk */
+
+int32_t
+ec_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_ENTRYLK, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_entrylk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_entrylk_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->entrylk,
+ fop->str[0], &fop->loc[0], fop->str[1], fop->entrylk_cmd,
+ fop->entrylk_type, fop->xdata);
+}
+
+int32_t
+ec_manager_entrylk(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ if (fop->entrylk_cmd == ENTRYLK_LOCK) {
+ fop->uint32 = EC_LOCK_MODE_ALL;
+ fop->entrylk_cmd = ENTRYLK_LOCK_NB;
+ }
+
+ /* Fall through */
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_PREPARE_ANSWER:
+ if (fop->entrylk_cmd != ENTRYLK_UNLOCK) {
+ uintptr_t mask;
+
+ ec_fop_set_error(fop, ec_lock_check(fop, &mask));
+ if (fop->error != 0) {
+ if (mask != 0) {
+ if (fop->id == GF_FOP_ENTRYLK) {
+ ec_entrylk(
+ fop->frame, fop->xl, mask, 1, ec_lock_unlocked,
+ NULL, fop->str[0], &fop->loc[0], fop->str[1],
+ ENTRYLK_UNLOCK, fop->entrylk_type, fop->xdata);
+ } else {
+ ec_fentrylk(fop->frame, fop->xl, mask, 1,
+ ec_lock_unlocked, NULL, fop->str[0],
+ fop->fd, fop->str[1], ENTRYLK_UNLOCK,
+ fop->entrylk_type, fop->xdata);
+ }
+ }
+ if (fop->error < 0) {
+ fop->error = 0;
+
+ fop->entrylk_cmd = ENTRYLK_LOCK;
+
+ ec_dispatch_inc(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+ }
+ }
+ } else {
+ ec_fop_prepare_answer(fop, _gf_true);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->id == GF_FOP_ENTRYLK) {
+ if (fop->cbks.entrylk != NULL) {
+ fop->cbks.entrylk(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->xdata);
+ }
+ } else {
+ if (fop->cbks.fentrylk != NULL) {
+ fop->cbks.fentrylk(fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, cbk->xdata);
+ }
+ }
+
+ return EC_STATE_END;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->id == GF_FOP_ENTRYLK) {
+ if (fop->cbks.entrylk != NULL) {
+ fop->cbks.entrylk(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL);
+ }
+ } else {
+ if (fop->cbks.fentrylk != NULL) {
+ fop->cbks.fentrylk(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL);
+ }
+ }
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_entrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_entrylk_cbk_t func, void *data,
+ const char *volume, loc_t *loc, const char *basename,
+ entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+{
+ ec_cbk_t callback = {.entrylk = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(ENTRYLK) %p", frame);
+
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_ENTRYLK, 0, target,
+ fop_flags, ec_wind_entrylk, ec_manager_entrylk,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->entrylk_cmd = cmd;
+ fop->entrylk_type = type;
+
+ if (volume != NULL) {
+ fop->str[0] = gf_strdup(volume);
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (basename != NULL) {
+ fop->str[1] = gf_strdup(basename);
+ if (fop->str[1] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: fentrylk */
+
+int32_t
+ec_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FENTRYLK, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_fentrylk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_fentrylk_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->fentrylk,
+ fop->str[0], fop->fd, fop->str[1], fop->entrylk_cmd,
+ fop->entrylk_type, fop->xdata);
+}
+
+void
+ec_fentrylk(call_frame_t *frame, xlator_t *this, uintptr_t target,
+ uint32_t fop_flags, fop_fentrylk_cbk_t func, void *data,
+ const char *volume, fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ ec_cbk_t callback = {.fentrylk = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(FENTRYLK) %p", frame);
+
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FENTRYLK, 0, target,
+ fop_flags, ec_wind_fentrylk, ec_manager_entrylk,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->entrylk_cmd = cmd;
+ fop->entrylk_type = type;
+
+ if (volume != NULL) {
+ fop->str[0] = gf_strdup(volume);
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (basename != NULL) {
+ fop->str[1] = gf_strdup(basename);
+ if (fop->str[1] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: inodelk */
+
+int32_t
+ec_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_INODELK, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_inodelk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_inodelk_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->inodelk,
+ fop->str[0], &fop->loc[0], fop->int32, &fop->flock,
+ fop->xdata);
+}
+
+int32_t
+ec_manager_inodelk(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ fop->flock.l_len += ec_adjust_offset_down(
+ fop->xl->private, &fop->flock.l_start, _gf_true);
+ ec_adjust_offset_up(fop->xl->private, &fop->flock.l_len, _gf_true);
+ if ((fop->int32 == F_SETLKW) && (fop->flock.l_type != F_UNLCK)) {
+ fop->uint32 = EC_LOCK_MODE_ALL;
+ fop->int32 = F_SETLK;
+ }
+
+ /* Fall through */
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_PREPARE_ANSWER:
+ if (fop->flock.l_type != F_UNLCK) {
+ uintptr_t mask;
+
+ ec_fop_set_error(fop, ec_lock_check(fop, &mask));
+ if (fop->error != 0) {
+ if (mask != 0) {
+ ec_t *ec = fop->xl->private;
+ struct gf_flock flock;
+
+ flock.l_type = F_UNLCK;
+ flock.l_whence = fop->flock.l_whence;
+ flock.l_start = fop->flock.l_start * ec->fragments;
+ flock.l_len = fop->flock.l_len * ec->fragments;
+ flock.l_pid = 0;
+ flock.l_owner.len = 0;
+
+ if (fop->id == GF_FOP_INODELK) {
+ ec_inodelk(fop->frame, fop->xl,
+ &fop->frame->root->lk_owner, mask, 1,
+ ec_lock_unlocked, NULL, fop->str[0],
+ &fop->loc[0], F_SETLK, &flock,
+ fop->xdata);
+ } else {
+ ec_finodelk(fop->frame, fop->xl,
+ &fop->frame->root->lk_owner, mask, 1,
+ ec_lock_unlocked, NULL, fop->str[0],
+ fop->fd, F_SETLK, &flock, fop->xdata);
+ }
+ }
+ if (fop->error < 0) {
+ fop->error = 0;
+
+ fop->int32 = F_SETLKW;
+
+ ec_dispatch_inc(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+ }
+ }
+ } else {
+ ec_fop_prepare_answer(fop, _gf_true);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->id == GF_FOP_INODELK) {
+ if (fop->cbks.inodelk != NULL) {
+ fop->cbks.inodelk(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, cbk->xdata);
+ }
+ } else {
+ if (fop->cbks.finodelk != NULL) {
+ fop->cbks.finodelk(fop->req_frame, fop, fop->xl,
+ cbk->op_ret, cbk->op_errno, cbk->xdata);
+ }
+ }
+
+ return EC_STATE_END;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->id == GF_FOP_INODELK) {
+ if (fop->cbks.inodelk != NULL) {
+ fop->cbks.inodelk(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL);
+ }
+ } else {
+ if (fop->cbks.finodelk != NULL) {
+ fop->cbks.finodelk(fop->req_frame, fop, fop->xl, -1,
+ fop->error, NULL);
+ }
+ }
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_inodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
+ uintptr_t target, uint32_t fop_flags, fop_inodelk_cbk_t func,
+ void *data, const char *volume, loc_t *loc, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ ec_cbk_t callback = {.inodelk = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(INODELK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_INODELK, 0, target,
+ fop_flags, ec_wind_inodelk, ec_manager_inodelk,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->int32 = cmd;
+ ec_owner_copy(fop->frame, owner);
+
+ if (volume != NULL) {
+ fop->str[0] = gf_strdup(volume);
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (loc != NULL) {
+ if (loc_copy(&fop->loc[0], loc) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_LOC_COPY_FAIL,
+ "Failed to copy a location.");
+
+ goto out;
+ }
+ }
+ if (flock != NULL) {
+ fop->flock.l_type = flock->l_type;
+ fop->flock.l_whence = flock->l_whence;
+ fop->flock.l_start = flock->l_start;
+ fop->flock.l_len = flock->l_len;
+ fop->flock.l_pid = flock->l_pid;
+ fop->flock.l_owner.len = flock->l_owner.len;
+ if (flock->l_owner.len > 0) {
+ memcpy(fop->flock.l_owner.data, flock->l_owner.data,
+ flock->l_owner.len);
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: finodelk */
+
+int32_t
+ec_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_FINODELK, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, NULL);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_finodelk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_finodelk_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->finodelk,
+ fop->str[0], fop->fd, fop->int32, &fop->flock,
+ fop->xdata);
+}
+
+void
+ec_finodelk(call_frame_t *frame, xlator_t *this, gf_lkowner_t *owner,
+ uintptr_t target, uint32_t fop_flags, fop_finodelk_cbk_t func,
+ void *data, const char *volume, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ ec_cbk_t callback = {.finodelk = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(FINODELK) %p", frame);
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_FINODELK, 0, target,
+ fop_flags, ec_wind_finodelk, ec_manager_inodelk,
+ callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->int32 = cmd;
+ ec_owner_copy(fop->frame, owner);
+
+ if (volume != NULL) {
+ fop->str[0] = gf_strdup(volume);
+ if (fop->str[0] == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to duplicate a string.");
+
+ goto out;
+ }
+ }
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (flock != NULL) {
+ fop->flock.l_type = flock->l_type;
+ fop->flock.l_whence = flock->l_whence;
+ fop->flock.l_start = flock->l_start;
+ fop->flock.l_len = flock->l_len;
+ fop->flock.l_pid = flock->l_pid;
+ fop->flock.l_owner.len = flock->l_owner.len;
+ if (flock->l_owner.len > 0) {
+ memcpy(fop->flock.l_owner.data, flock->l_owner.data,
+ flock->l_owner.len);
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL);
+ }
+}
+
+/* FOP: lk */
+
+int32_t
+ec_combine_lk(ec_fop_data_t *fop, ec_cbk_data_t *dst, ec_cbk_data_t *src)
+{
+ if (!ec_flock_compare(&dst->flock, &src->flock)) {
+ gf_msg(fop->xl->name, GF_LOG_NOTICE, 0, EC_MSG_LOCK_MISMATCH,
+ "Mismatching lock in "
+ "answers of 'GF_FOP_LK'");
+
+ return 0;
+ }
+
+ return 1;
+}
+
+int32_t
+ec_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *flock, dict_t *xdata)
+{
+ ec_fop_data_t *fop = NULL;
+ ec_cbk_data_t *cbk = NULL;
+ int32_t idx = (int32_t)(uintptr_t)cookie;
+
+ VALIDATE_OR_GOTO(this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame->local, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = frame->local;
+
+ ec_trace("CBK", fop, "idx=%d, frame=%p, op_ret=%d, op_errno=%d", idx, frame,
+ op_ret, op_errno);
+
+ cbk = ec_cbk_data_allocate(frame, this, fop, GF_FOP_LK, idx, op_ret,
+ op_errno);
+ if (cbk != NULL) {
+ if (op_ret >= 0) {
+ if (flock != NULL) {
+ cbk->flock.l_type = flock->l_type;
+ cbk->flock.l_whence = flock->l_whence;
+ cbk->flock.l_start = flock->l_start;
+ cbk->flock.l_len = flock->l_len;
+ cbk->flock.l_pid = flock->l_pid;
+ cbk->flock.l_owner.len = flock->l_owner.len;
+ if (flock->l_owner.len > 0) {
+ memcpy(cbk->flock.l_owner.data, flock->l_owner.data,
+ flock->l_owner.len);
+ }
+ }
+ }
+ if (xdata != NULL) {
+ cbk->xdata = dict_ref(xdata);
+ if (cbk->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ ec_combine(cbk, ec_combine_lk);
+ }
+
+out:
+ if (fop != NULL) {
+ ec_complete(fop);
+ }
+
+ return 0;
+}
+
+void
+ec_wind_lk(ec_t *ec, ec_fop_data_t *fop, int32_t idx)
+{
+ ec_trace("WIND", fop, "idx=%d", idx);
+
+ STACK_WIND_COOKIE(fop->frame, ec_lk_cbk, (void *)(uintptr_t)idx,
+ ec->xl_list[idx], ec->xl_list[idx]->fops->lk, fop->fd,
+ fop->int32, &fop->flock, fop->xdata);
+}
+
+int32_t
+ec_manager_lk(ec_fop_data_t *fop, int32_t state)
+{
+ ec_cbk_data_t *cbk;
+
+ switch (state) {
+ case EC_STATE_INIT:
+ if ((fop->int32 == F_SETLKW) && (fop->flock.l_type != F_UNLCK)) {
+ fop->uint32 = EC_LOCK_MODE_ALL;
+ fop->int32 = F_SETLK;
+ }
+
+ /* Fall through */
+
+ case EC_STATE_DISPATCH:
+ ec_dispatch_all(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+
+ case EC_STATE_PREPARE_ANSWER:
+ case -EC_STATE_PREPARE_ANSWER:
+ if (fop->flock.l_type != F_UNLCK) {
+ uintptr_t mask;
+
+ ec_fop_set_error(fop, ec_lock_check(fop, &mask));
+ if (fop->error != 0) {
+ if (mask != 0) {
+ struct gf_flock flock = {0};
+
+ flock.l_type = F_UNLCK;
+ flock.l_whence = fop->flock.l_whence;
+ flock.l_start = fop->flock.l_start;
+ flock.l_len = fop->flock.l_len;
+ flock.l_pid = fop->flock.l_pid;
+ lk_owner_copy(&flock.l_owner, &fop->flock.l_owner);
+
+ ec_lk(fop->frame, fop->xl, mask, 1, ec_lock_lk_unlocked,
+ NULL, fop->fd, F_SETLK, &flock, fop->xdata);
+ }
+
+ if (fop->error < 0) {
+ fop->error = 0;
+
+ fop->int32 = F_SETLKW;
+
+ ec_dispatch_inc(fop);
+
+ return EC_STATE_PREPARE_ANSWER;
+ }
+ }
+ } else {
+ ec_fop_prepare_answer(fop, _gf_true);
+ }
+
+ return EC_STATE_REPORT;
+
+ case EC_STATE_REPORT:
+ cbk = fop->answer;
+
+ GF_ASSERT(cbk != NULL);
+
+ if (fop->cbks.lk != NULL) {
+ fop->cbks.lk(fop->req_frame, fop, fop->xl, cbk->op_ret,
+ cbk->op_errno, &cbk->flock, cbk->xdata);
+ }
+
+ return EC_STATE_END;
+
+ case -EC_STATE_INIT:
+ case -EC_STATE_DISPATCH:
+ case -EC_STATE_REPORT:
+ GF_ASSERT(fop->error != 0);
+
+ if (fop->cbks.lk != NULL) {
+ fop->cbks.lk(fop->req_frame, fop, fop->xl, -1, fop->error, NULL,
+ NULL);
+ }
+
+ return EC_STATE_END;
+
+ default:
+ gf_msg(fop->xl->name, GF_LOG_ERROR, EINVAL, EC_MSG_UNHANDLED_STATE,
+ "Unhandled state %d for %s", state, ec_fop_name(fop->id));
+
+ return EC_STATE_END;
+ }
+}
+
+void
+ec_lk(call_frame_t *frame, xlator_t *this, uintptr_t target, uint32_t fop_flags,
+ fop_lk_cbk_t func, void *data, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ ec_cbk_t callback = {.lk = func};
+ ec_fop_data_t *fop = NULL;
+ int32_t error = ENOMEM;
+
+ gf_msg_trace("ec", 0, "EC(LK) %p", frame);
+
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ fop = ec_fop_data_allocate(frame, this, GF_FOP_LK, 0, target, fop_flags,
+ ec_wind_lk, ec_manager_lk, callback, data);
+ if (fop == NULL) {
+ goto out;
+ }
+
+ fop->use_fd = 1;
+
+ fop->int32 = cmd;
+
+ if (fd != NULL) {
+ fop->fd = fd_ref(fd);
+ if (fop->fd == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_FILE_DESC_REF_FAIL,
+ "Failed to reference a "
+ "file descriptor.");
+
+ goto out;
+ }
+ }
+ if (flock != NULL) {
+ fop->flock.l_type = flock->l_type;
+ fop->flock.l_whence = flock->l_whence;
+ fop->flock.l_start = flock->l_start;
+ fop->flock.l_len = flock->l_len;
+ fop->flock.l_pid = flock->l_pid;
+ fop->flock.l_owner.len = flock->l_owner.len;
+ if (flock->l_owner.len > 0) {
+ memcpy(fop->flock.l_owner.data, flock->l_owner.data,
+ flock->l_owner.len);
+ }
+ }
+ if (xdata != NULL) {
+ fop->xdata = dict_ref(xdata);
+ if (fop->xdata == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_DICT_REF_FAIL,
+ "Failed to reference a "
+ "dictionary.");
+
+ goto out;
+ }
+ }
+
+ error = 0;
+
+out:
+ if (fop != NULL) {
+ ec_manager(fop, error);
+ } else {
+ func(frame, NULL, this, -1, error, NULL, NULL);
+ }
+}
diff --git a/xlators/cluster/ec/src/ec-mem-types.h b/xlators/cluster/ec/src/ec-mem-types.h
new file mode 100644
index 00000000000..3252c4c1c58
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-mem-types.h
@@ -0,0 +1,30 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_MEM_TYPES_H__
+#define __EC_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_ec_mem_types_ {
+ ec_mt_ec_t = gf_common_mt_end + 1,
+ ec_mt_xlator_t,
+ ec_mt_ec_inode_t,
+ ec_mt_ec_fd_t,
+ ec_mt_subvol_healer_t,
+ ec_mt_ec_gf_t,
+ ec_mt_ec_code_t,
+ ec_mt_ec_code_builder_t,
+ ec_mt_ec_matrix_t,
+ ec_mt_ec_stripe_t,
+ ec_mt_end
+};
+
+#endif /* __EC_MEM_TYPES_H__ */
diff --git a/xlators/cluster/ec/src/ec-messages.h b/xlators/cluster/ec/src/ec-messages.h
new file mode 100644
index 00000000000..72e98f11286
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-messages.h
@@ -0,0 +1,61 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _EC_MESSAGES_H_
+#define _EC_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(EC, EC_MSG_INVALID_CONFIG, EC_MSG_HEAL_FAIL,
+ EC_MSG_DICT_COMBINE_FAIL, EC_MSG_STIME_COMBINE_FAIL,
+ EC_MSG_INVALID_DICT_NUMS, EC_MSG_IATT_COMBINE_FAIL,
+ EC_MSG_INVALID_FORMAT, EC_MSG_DICT_GET_FAILED,
+ EC_MSG_UNHANDLED_STATE, EC_MSG_FILE_DESC_REF_FAIL,
+ EC_MSG_LOC_COPY_FAIL, EC_MSG_BUF_REF_FAIL, EC_MSG_DICT_REF_FAIL,
+ EC_MSG_LK_UNLOCK_FAILED, EC_MSG_UNLOCK_FAILED,
+ EC_MSG_LOC_PARENT_INODE_MISSING, EC_MSG_INVALID_LOC_NAME,
+ EC_MSG_NO_MEMORY, EC_MSG_GFID_MISMATCH, EC_MSG_UNSUPPORTED_VERSION,
+ EC_MSG_FD_CREATE_FAIL, EC_MSG_READDIRP_REQ_PREP_FAIL,
+ EC_MSG_LOOKUP_REQ_PREP_FAIL, EC_MSG_INODE_REF_FAIL,
+ EC_MSG_LOOKUP_READAHEAD_FAIL, EC_MSG_FRAME_MISMATCH,
+ EC_MSG_XLATOR_MISMATCH, EC_MSG_VECTOR_MISMATCH, EC_MSG_IATT_MISMATCH,
+ EC_MSG_FD_MISMATCH, EC_MSG_DICT_MISMATCH, EC_MSG_INDEX_DIR_GET_FAIL,
+ EC_MSG_PREOP_LOCK_FAILED, EC_MSG_CHILDS_INSUFFICIENT,
+ EC_MSG_OP_EXEC_UNAVAIL, EC_MSG_UNLOCK_DELAY_FAILED,
+ EC_MSG_SIZE_VERS_UPDATE_FAIL, EC_MSG_INVALID_REQUEST,
+ EC_MSG_INVALID_LOCK_TYPE, EC_MSG_SIZE_VERS_GET_FAIL,
+ EC_MSG_FILE_SIZE_GET_FAIL, EC_MSG_FOP_MISMATCH,
+ EC_MSG_SUBVOL_ID_DICT_SET_FAIL, EC_MSG_SUBVOL_BUILD_FAIL,
+ EC_MSG_XLATOR_INIT_FAIL, EC_MSG_NO_PARENTS, EC_MSG_TIMER_CREATE_FAIL,
+ EC_MSG_TOO_MANY_SUBVOLS, EC_MSG_DATA_UNAVAILABLE,
+ EC_MSG_INODE_REMOVE_FAIL, EC_MSG_INVALID_REDUNDANCY,
+ EC_MSG_XLATOR_PARSE_OPT_FAIL, EC_MSG_OP_FAIL_ON_SUBVOLS,
+ EC_MSG_INVALID_INODE, EC_MSG_LOCK_MISMATCH, EC_MSG_XDATA_MISMATCH,
+ EC_MSG_HEALING_INFO, EC_MSG_HEAL_SUCCESS, EC_MSG_FULL_SWEEP_START,
+ EC_MSG_FULL_SWEEP_STOP, EC_MSG_INVALID_FOP, EC_MSG_EC_UP,
+ EC_MSG_EC_DOWN, EC_MSG_SIZE_XATTR_GET_FAIL,
+ EC_MSG_VER_XATTR_GET_FAIL, EC_MSG_CONFIG_XATTR_GET_FAIL,
+ EC_MSG_CONFIG_XATTR_INVALID, EC_MSG_EXTENSION, EC_MSG_EXTENSION_NONE,
+ EC_MSG_EXTENSION_UNKNOWN, EC_MSG_EXTENSION_UNSUPPORTED,
+ EC_MSG_EXTENSION_FAILED, EC_MSG_NO_GF, EC_MSG_MATRIX_FAILED,
+ EC_MSG_DYN_CREATE_FAILED, EC_MSG_DYN_CODEGEN_FAILED,
+ EC_MSG_THREAD_CLEANUP_FAILED, EC_MSG_FD_BAD);
+
+#endif /* !_EC_MESSAGES_H_ */
diff --git a/xlators/cluster/ec/src/ec-method.c b/xlators/cluster/ec/src/ec-method.c
new file mode 100644
index 00000000000..55faed0b193
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-method.c
@@ -0,0 +1,433 @@
+/*
+ Copyright (c) 2012-2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <string.h>
+#include <inttypes.h>
+
+#include "ec-types.h"
+#include "ec-mem-types.h"
+#include "ec-galois.h"
+#include "ec-code.h"
+#include "ec-method.h"
+#include "ec-helpers.h"
+
+static void
+ec_method_matrix_normal(ec_gf_t *gf, uint32_t *matrix, uint32_t columns,
+ uint32_t *values, uint32_t count)
+{
+ uint32_t i, j, v, tmp;
+
+ columns--;
+ for (i = 0; i < count; i++) {
+ v = *values++;
+ *matrix++ = tmp = ec_gf_exp(gf, v, columns);
+ for (j = 0; j < columns; j++) {
+ *matrix++ = tmp = ec_gf_div(gf, tmp, v);
+ }
+ }
+}
+
+static void
+ec_method_matrix_inverse(ec_gf_t *gf, uint32_t *matrix, uint32_t *values,
+ uint32_t count)
+{
+ uint32_t a[count];
+ uint32_t i, j, p, last, tmp;
+
+ last = count - 1;
+ for (i = 0; i < last; i++) {
+ a[i] = 1;
+ }
+ a[i] = values[0];
+ for (i = last; i > 0; i--) {
+ for (j = i - 1; j < last; j++) {
+ a[j] = a[j + 1] ^ ec_gf_mul(gf, values[i], a[j]);
+ }
+ a[j] = ec_gf_mul(gf, values[i], a[j]);
+ }
+ for (i = 0; i < count; i++) {
+ p = a[0];
+ matrix += count;
+ *matrix = tmp = p ^ values[i];
+ for (j = 1; j < last; j++) {
+ matrix += count;
+ *matrix = tmp = a[j] ^ ec_gf_mul(gf, values[i], tmp);
+ p = tmp ^ ec_gf_mul(gf, values[i], p);
+ }
+ for (j = 0; j < last; j++) {
+ *matrix = ec_gf_div(gf, *matrix, p);
+ matrix -= count;
+ }
+ *matrix = ec_gf_div(gf, 1, p);
+ matrix++;
+ }
+}
+
+static void
+ec_method_matrix_init(ec_matrix_list_t *list, ec_matrix_t *matrix,
+ uintptr_t mask, uint32_t *rows, gf_boolean_t inverse)
+{
+ uint32_t i;
+
+ matrix->refs = 1;
+ matrix->mask = mask;
+ matrix->code = list->code;
+ matrix->columns = list->columns;
+ INIT_LIST_HEAD(&matrix->lru);
+
+ if (inverse) {
+ matrix->rows = list->columns;
+ ec_method_matrix_inverse(matrix->code->gf, matrix->values, rows,
+ matrix->rows);
+ for (i = 0; i < matrix->rows; i++) {
+ matrix->row_data[i].values = matrix->values + i * matrix->columns;
+ matrix->row_data[i].func.interleaved = ec_code_build_interleaved(
+ matrix->code, EC_METHOD_WORD_SIZE, matrix->row_data[i].values,
+ matrix->columns);
+ }
+ } else {
+ matrix->rows = list->rows;
+ ec_method_matrix_normal(matrix->code->gf, matrix->values,
+ matrix->columns, rows, matrix->rows);
+ for (i = 0; i < matrix->rows; i++) {
+ matrix->row_data[i].values = matrix->values + i * matrix->columns;
+ matrix->row_data[i].func.linear = ec_code_build_linear(
+ matrix->code, EC_METHOD_WORD_SIZE, matrix->row_data[i].values,
+ matrix->columns);
+ }
+ }
+}
+
+static void
+ec_method_matrix_release(ec_matrix_t *matrix)
+{
+ uint32_t i;
+
+ for (i = 0; i < matrix->rows; i++) {
+ if (matrix->row_data[i].func.linear != NULL) {
+ ec_code_release(matrix->code, &matrix->row_data[i].func);
+ matrix->row_data[i].func.linear = NULL;
+ }
+ }
+}
+
+static void
+ec_method_matrix_destroy(ec_matrix_list_t *list, ec_matrix_t *matrix)
+{
+ list_del_init(&matrix->lru);
+
+ ec_method_matrix_release(matrix);
+
+ mem_put(matrix);
+
+ list->count--;
+}
+
+static void
+ec_method_matrix_unref(ec_matrix_list_t *list, ec_matrix_t *matrix)
+{
+ if (--matrix->refs == 0) {
+ list_add_tail(&matrix->lru, &list->lru);
+ if (list->count > list->max) {
+ matrix = list_first_entry(&list->lru, ec_matrix_t, lru);
+ ec_method_matrix_destroy(list, matrix);
+ }
+ }
+}
+
+static ec_matrix_t *
+ec_method_matrix_lookup(ec_matrix_list_t *list, uintptr_t mask, uint32_t *pos)
+{
+ ec_matrix_t *matrix;
+ uint32_t i, j, k;
+
+ i = 0;
+ j = list->count;
+ while (i < j) {
+ k = (i + j) >> 1;
+ matrix = list->objects[k];
+ if (matrix->mask == mask) {
+ *pos = k;
+ return matrix;
+ }
+ if (matrix->mask < mask) {
+ i = k + 1;
+ } else {
+ j = k;
+ }
+ }
+ *pos = i;
+
+ return NULL;
+}
+
+static void
+ec_method_matrix_remove(ec_matrix_list_t *list, uintptr_t mask)
+{
+ uint32_t pos;
+
+ if (ec_method_matrix_lookup(list, mask, &pos) != NULL) {
+ list->count--;
+ if (pos < list->count) {
+ memmove(list->objects + pos, list->objects + pos + 1,
+ sizeof(ec_matrix_t *) * (list->count - pos));
+ }
+ }
+}
+
+static void
+ec_method_matrix_insert(ec_matrix_list_t *list, ec_matrix_t *matrix)
+{
+ uint32_t pos;
+
+ GF_ASSERT(ec_method_matrix_lookup(list, matrix->mask, &pos) == NULL);
+
+ if (pos < list->count) {
+ memmove(list->objects + pos + 1, list->objects + pos,
+ sizeof(ec_matrix_t *) * (list->count - pos));
+ }
+ list->objects[pos] = matrix;
+ list->count++;
+}
+
+static ec_matrix_t *
+ec_method_matrix_get(ec_matrix_list_t *list, uintptr_t mask, uint32_t *rows)
+{
+ ec_matrix_t *matrix;
+ uint32_t pos;
+
+ LOCK(&list->lock);
+
+ matrix = ec_method_matrix_lookup(list, mask, &pos);
+ if (matrix != NULL) {
+ list_del_init(&matrix->lru);
+ matrix->refs++;
+
+ goto out;
+ }
+
+ if ((list->count >= list->max) && !list_empty(&list->lru)) {
+ matrix = list_first_entry(&list->lru, ec_matrix_t, lru);
+ list_del_init(&matrix->lru);
+
+ ec_method_matrix_remove(list, matrix->mask);
+
+ ec_method_matrix_release(matrix);
+ } else {
+ matrix = mem_get0(list->pool);
+ if (matrix == NULL) {
+ matrix = EC_ERR(ENOMEM);
+ goto out;
+ }
+ matrix->values = (uint32_t *)((uintptr_t)matrix + sizeof(ec_matrix_t) +
+ sizeof(ec_matrix_row_t) * list->columns);
+ }
+
+ ec_method_matrix_init(list, matrix, mask, rows, _gf_true);
+
+ if (list->count < list->max) {
+ ec_method_matrix_insert(list, matrix);
+ } else {
+ matrix->mask = 0;
+ }
+
+out:
+ UNLOCK(&list->lock);
+
+ return matrix;
+}
+
+static void
+ec_method_matrix_put(ec_matrix_list_t *list, ec_matrix_t *matrix)
+{
+ LOCK(&list->lock);
+
+ ec_method_matrix_unref(list, matrix);
+
+ UNLOCK(&list->lock);
+}
+
+static int32_t
+ec_method_setup(xlator_t *xl, ec_matrix_list_t *list, const char *gen)
+{
+ ec_matrix_t *matrix;
+ uint32_t values[list->rows];
+ uint32_t i;
+ int32_t err;
+
+ matrix = GF_MALLOC(sizeof(ec_matrix_t) +
+ sizeof(ec_matrix_row_t) * list->rows +
+ sizeof(uint32_t) * list->columns * list->rows,
+ ec_mt_ec_matrix_t);
+ if (matrix == NULL) {
+ err = -ENOMEM;
+ goto failed;
+ }
+ memset(matrix, 0, sizeof(ec_matrix_t));
+ matrix->values = (uint32_t *)((uintptr_t)matrix + sizeof(ec_matrix_t) +
+ sizeof(ec_matrix_row_t) * list->rows);
+
+ list->code = ec_code_create(list->gf, ec_code_detect(xl, gen));
+ if (EC_IS_ERR(list->code)) {
+ err = EC_GET_ERR(list->code);
+ list->code = NULL;
+ goto failed_matrix;
+ }
+
+ for (i = 0; i < list->rows; i++) {
+ values[i] = i + 1;
+ }
+ ec_method_matrix_init(list, matrix, 0, values, _gf_false);
+
+ list->encode = matrix;
+
+ return 0;
+
+failed_matrix:
+ GF_FREE(matrix);
+failed:
+ return err;
+}
+
+int32_t
+ec_method_init(xlator_t *xl, ec_matrix_list_t *list, uint32_t columns,
+ uint32_t rows, uint32_t max, const char *gen)
+{
+ list->columns = columns;
+ list->rows = rows;
+ list->max = max;
+ list->stripe = EC_METHOD_CHUNK_SIZE * list->columns;
+ INIT_LIST_HEAD(&list->lru);
+ int32_t err;
+
+ list->pool = mem_pool_new_fn(xl->ctx,
+ sizeof(ec_matrix_t) +
+ sizeof(ec_matrix_row_t) * columns +
+ sizeof(uint32_t) * columns * columns,
+ 128, "ec_matrix_t");
+ if (list->pool == NULL) {
+ err = -ENOMEM;
+ goto failed;
+ }
+
+ list->objects = GF_MALLOC(sizeof(ec_matrix_t *) * max, ec_mt_ec_matrix_t);
+ if (list->objects == NULL) {
+ err = -ENOMEM;
+ goto failed_pool;
+ }
+
+ list->gf = ec_gf_prepare(EC_GF_BITS, EC_GF_MOD);
+ if (EC_IS_ERR(list->gf)) {
+ err = EC_GET_ERR(list->gf);
+ goto failed_objects;
+ }
+
+ err = ec_method_setup(xl, list, gen);
+ if (err != 0) {
+ goto failed_gf;
+ }
+
+ LOCK_INIT(&list->lock);
+
+ return 0;
+
+failed_gf:
+ ec_gf_destroy(list->gf);
+failed_objects:
+ GF_FREE(list->objects);
+failed_pool:
+ mem_pool_destroy(list->pool);
+failed:
+ list->pool = NULL;
+ list->objects = NULL;
+ list->gf = NULL;
+
+ return err;
+}
+
+void
+ec_method_fini(ec_matrix_list_t *list)
+{
+ ec_matrix_t *matrix;
+
+ if (list->encode == NULL) {
+ return;
+ }
+
+ while (!list_empty(&list->lru)) {
+ matrix = list_first_entry(&list->lru, ec_matrix_t, lru);
+ ec_method_matrix_destroy(list, matrix);
+ }
+
+ GF_ASSERT(list->count == 0);
+
+ if (list->pool) /*Init was successful*/
+ LOCK_DESTROY(&list->lock);
+
+ ec_method_matrix_release(list->encode);
+ GF_FREE(list->encode);
+
+ ec_code_destroy(list->code);
+ ec_gf_destroy(list->gf);
+ GF_FREE(list->objects);
+
+ if (list->pool)
+ mem_pool_destroy(list->pool);
+}
+
+int32_t
+ec_method_update(xlator_t *xl, ec_matrix_list_t *list, const char *gen)
+{
+ /* TODO: Allow changing code generator */
+
+ return 0;
+}
+
+void
+ec_method_encode(ec_matrix_list_t *list, uint64_t size, void *in, void **out)
+{
+ ec_matrix_t *matrix;
+ uint64_t pos;
+ uint32_t i;
+
+ matrix = list->encode;
+ for (pos = 0; pos < size; pos += list->stripe) {
+ for (i = 0; i < matrix->rows; i++) {
+ matrix->row_data[i].func.linear(
+ out[i], in, pos, matrix->row_data[i].values, list->columns);
+ out[i] += EC_METHOD_CHUNK_SIZE;
+ }
+ }
+}
+
+int32_t
+ec_method_decode(ec_matrix_list_t *list, uint64_t size, uintptr_t mask,
+ uint32_t *rows, void **in, void *out)
+{
+ ec_matrix_t *matrix;
+ uint64_t pos;
+ uint32_t i;
+
+ matrix = ec_method_matrix_get(list, mask, rows);
+ if (EC_IS_ERR(matrix)) {
+ return EC_GET_ERR(matrix);
+ }
+ for (pos = 0; pos < size; pos += EC_METHOD_CHUNK_SIZE) {
+ for (i = 0; i < matrix->rows; i++) {
+ matrix->row_data[i].func.interleaved(
+ out, in, pos, matrix->row_data[i].values, list->columns);
+ out += EC_METHOD_CHUNK_SIZE;
+ }
+ }
+
+ ec_method_matrix_put(list, matrix);
+
+ return 0;
+}
diff --git a/xlators/cluster/ec/src/ec-method.h b/xlators/cluster/ec/src/ec-method.h
new file mode 100644
index 00000000000..f91233b2f88
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-method.h
@@ -0,0 +1,48 @@
+/*
+ Copyright (c) 2012-2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_METHOD_H__
+#define __EC_METHOD_H__
+
+#include "ec-types.h"
+#include "ec-galois.h"
+
+#define EC_GF_BITS 8
+#define EC_GF_MOD 0x11D
+
+#define EC_GF_SIZE (1 << EC_GF_BITS)
+
+/* Determines the maximum size of the matrix used to encode/decode data */
+#define EC_METHOD_MAX_FRAGMENTS 16
+/* Determines the maximum number of usable elements in the Galois Field */
+#define EC_METHOD_MAX_NODES (EC_GF_SIZE - 1)
+
+#define EC_METHOD_WORD_SIZE 64
+
+#define EC_METHOD_CHUNK_SIZE (EC_METHOD_WORD_SIZE * EC_GF_BITS)
+
+int32_t
+ec_method_init(xlator_t *xl, ec_matrix_list_t *list, uint32_t columns,
+ uint32_t rows, uint32_t max, const char *gen);
+
+void
+ec_method_fini(ec_matrix_list_t *list);
+
+int32_t
+ec_method_update(xlator_t *xl, ec_matrix_list_t *list, const char *gen);
+
+void
+ec_method_encode(ec_matrix_list_t *list, uint64_t size, void *in, void **out);
+
+int32_t
+ec_method_decode(ec_matrix_list_t *list, uint64_t size, uintptr_t mask,
+ uint32_t *rows, void **in, void *out);
+
+#endif /* __EC_METHOD_H__ */
diff --git a/xlators/cluster/ec/src/ec-types.h b/xlators/cluster/ec/src/ec-types.h
new file mode 100644
index 00000000000..de9b89bb2c9
--- /dev/null
+++ b/xlators/cluster/ec/src/ec-types.h
@@ -0,0 +1,690 @@
+/*
+ Copyright (c) 2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_TYPES_H__
+#define __EC_TYPES_H__
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/timer.h>
+#include "libxlator.h"
+#include <glusterfs/atomic.h>
+
+#define EC_GF_MAX_REGS 16
+
+enum _ec_heal_need;
+typedef enum _ec_heal_need ec_heal_need_t;
+
+enum _ec_stripe_part;
+typedef enum _ec_stripe_part ec_stripe_part_t;
+
+enum _ec_read_policy;
+typedef enum _ec_read_policy ec_read_policy_t;
+
+struct _ec_config;
+typedef struct _ec_config ec_config_t;
+
+struct _ec_fd;
+typedef struct _ec_fd ec_fd_t;
+
+struct _ec_fragment_range;
+typedef struct _ec_fragment_range ec_fragment_range_t;
+
+struct _ec_inode;
+typedef struct _ec_inode ec_inode_t;
+
+union _ec_cbk;
+typedef union _ec_cbk ec_cbk_t;
+
+struct _ec_lock;
+typedef struct _ec_lock ec_lock_t;
+
+struct _ec_lock_link;
+typedef struct _ec_lock_link ec_lock_link_t;
+
+struct _ec_fop_data;
+typedef struct _ec_fop_data ec_fop_data_t;
+
+struct _ec_cbk_data;
+typedef struct _ec_cbk_data ec_cbk_data_t;
+
+enum _ec_gf_opcode;
+typedef enum _ec_gf_opcode ec_gf_opcode_t;
+
+struct _ec_gf_op;
+typedef struct _ec_gf_op ec_gf_op_t;
+
+struct _ec_gf_mul;
+typedef struct _ec_gf_mul ec_gf_mul_t;
+
+struct _ec_gf;
+typedef struct _ec_gf ec_gf_t;
+
+struct _ec_code_gen;
+typedef struct _ec_code_gen ec_code_gen_t;
+
+struct _ec_code;
+typedef struct _ec_code ec_code_t;
+
+struct _ec_code_arg;
+typedef struct _ec_code_arg ec_code_arg_t;
+
+struct _ec_code_op;
+typedef struct _ec_code_op ec_code_op_t;
+
+struct _ec_code_builder;
+typedef struct _ec_code_builder ec_code_builder_t;
+
+struct _ec_code_chunk;
+typedef struct _ec_code_chunk ec_code_chunk_t;
+
+struct _ec_stripe;
+typedef struct _ec_stripe ec_stripe_t;
+
+struct _ec_stripe_list;
+typedef struct _ec_stripe_list ec_stripe_list_t;
+
+struct _ec_code_space;
+typedef struct _ec_code_space ec_code_space_t;
+
+typedef void (*ec_code_func_linear_t)(void *dst, void *src, uint64_t offset,
+ uint32_t *values, uint32_t count);
+
+typedef void (*ec_code_func_interleaved_t)(void *dst, void **src,
+ uint64_t offset, uint32_t *values,
+ uint32_t count);
+
+union _ec_code_func;
+typedef union _ec_code_func ec_code_func_t;
+
+struct _ec_matrix_row;
+typedef struct _ec_matrix_row ec_matrix_row_t;
+
+struct _ec_matrix;
+typedef struct _ec_matrix ec_matrix_t;
+
+struct _ec_matrix_list;
+typedef struct _ec_matrix_list ec_matrix_list_t;
+
+struct _ec_heal;
+typedef struct _ec_heal ec_heal_t;
+
+struct _ec_self_heald;
+typedef struct _ec_self_heald ec_self_heald_t;
+
+struct _ec_statistics;
+typedef struct _ec_statistics ec_statistics_t;
+
+struct _ec;
+typedef struct _ec ec_t;
+
+typedef void (*ec_wind_f)(ec_t *, ec_fop_data_t *, int32_t);
+typedef int32_t (*ec_handler_f)(ec_fop_data_t *, int32_t);
+typedef void (*ec_resume_f)(ec_fop_data_t *, int32_t);
+
+enum _ec_read_policy { EC_ROUND_ROBIN, EC_GFID_HASH, EC_READ_POLICY_MAX };
+
+enum _ec_heal_need {
+ EC_HEAL_NONEED,
+ EC_HEAL_MAYBE,
+ EC_HEAL_MUST,
+ EC_HEAL_PURGE_INDEX
+};
+
+enum _ec_stripe_part { EC_STRIPE_HEAD, EC_STRIPE_TAIL };
+
+/* Enumartions to indicate FD status. */
+typedef enum { EC_FD_NOT_OPENED, EC_FD_OPENED, EC_FD_OPENING } ec_fd_status_t;
+
+struct _ec_config {
+ uint32_t version;
+ uint8_t algorithm;
+ uint8_t gf_word_size;
+ uint8_t bricks;
+ uint8_t redundancy;
+ uint32_t chunk_size;
+};
+
+struct _ec_fd {
+ loc_t loc;
+ uintptr_t open;
+ int32_t flags;
+ uint64_t bad_version;
+ ec_fd_status_t fd_status[0];
+};
+
+struct _ec_stripe {
+ struct list_head lru; /* LRU list member */
+ uint64_t frag_offset; /* Fragment offset of this stripe */
+ char data[]; /* Contents of the stripe */
+};
+
+struct _ec_stripe_list {
+ struct list_head lru;
+ uint32_t count;
+ uint32_t max;
+};
+
+struct _ec_inode {
+ ec_lock_t *inode_lock;
+ gf_boolean_t have_info;
+ gf_boolean_t have_config;
+ gf_boolean_t have_version;
+ gf_boolean_t have_size;
+ int32_t heal_count;
+ ec_config_t config;
+ uint64_t pre_version[2];
+ uint64_t post_version[2];
+ uint64_t pre_size;
+ uint64_t post_size;
+ uint64_t dirty[2];
+ struct list_head heal;
+ ec_stripe_list_t stripe_cache;
+ uint64_t bad_version;
+};
+
+typedef int32_t (*fop_heal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
+ int32_t, uintptr_t, uintptr_t, uintptr_t,
+ uint32_t, dict_t *);
+typedef int32_t (*fop_fheal_cbk_t)(call_frame_t *, void *, xlator_t *, int32_t,
+ int32_t, uintptr_t, uintptr_t, uintptr_t,
+ uint32_t, dict_t *);
+
+union _ec_cbk {
+ fop_access_cbk_t access;
+ fop_create_cbk_t create;
+ fop_discard_cbk_t discard;
+ fop_entrylk_cbk_t entrylk;
+ fop_fentrylk_cbk_t fentrylk;
+ fop_fallocate_cbk_t fallocate;
+ fop_flush_cbk_t flush;
+ fop_fsync_cbk_t fsync;
+ fop_fsyncdir_cbk_t fsyncdir;
+ fop_getxattr_cbk_t getxattr;
+ fop_fgetxattr_cbk_t fgetxattr;
+ fop_heal_cbk_t heal;
+ fop_fheal_cbk_t fheal;
+ fop_inodelk_cbk_t inodelk;
+ fop_finodelk_cbk_t finodelk;
+ fop_link_cbk_t link;
+ fop_lk_cbk_t lk;
+ fop_lookup_cbk_t lookup;
+ fop_mkdir_cbk_t mkdir;
+ fop_mknod_cbk_t mknod;
+ fop_open_cbk_t open;
+ fop_opendir_cbk_t opendir;
+ fop_readdir_cbk_t readdir;
+ fop_readdirp_cbk_t readdirp;
+ fop_readlink_cbk_t readlink;
+ fop_readv_cbk_t readv;
+ fop_removexattr_cbk_t removexattr;
+ fop_fremovexattr_cbk_t fremovexattr;
+ fop_rename_cbk_t rename;
+ fop_rmdir_cbk_t rmdir;
+ fop_setattr_cbk_t setattr;
+ fop_fsetattr_cbk_t fsetattr;
+ fop_setxattr_cbk_t setxattr;
+ fop_fsetxattr_cbk_t fsetxattr;
+ fop_stat_cbk_t stat;
+ fop_fstat_cbk_t fstat;
+ fop_statfs_cbk_t statfs;
+ fop_symlink_cbk_t symlink;
+ fop_truncate_cbk_t truncate;
+ fop_ftruncate_cbk_t ftruncate;
+ fop_unlink_cbk_t unlink;
+ fop_writev_cbk_t writev;
+ fop_xattrop_cbk_t xattrop;
+ fop_fxattrop_cbk_t fxattrop;
+ fop_zerofill_cbk_t zerofill;
+ fop_seek_cbk_t seek;
+ fop_ipc_cbk_t ipc;
+};
+
+struct _ec_lock {
+ ec_inode_t *ctx;
+ gf_timer_t *timer;
+
+ /* List of owners of this lock. All fops added to this list are running
+ * concurrently. */
+ struct list_head owners;
+
+ /* List of fops waiting to be an owner of the lock. Fops are added to this
+ * list when the current owner has an incompatible access (conflicting lock)
+ * or the lock is not acquired yet. */
+ struct list_head waiting;
+
+ /* List of fops that will wait until the next unlock/lock cycle. This
+ * happens when the currently acquired lock is decided to be released as
+ * soon as possible. In this case, all frozen fops will be continued only
+ * after the lock is reacquired. */
+ struct list_head frozen;
+
+ uintptr_t mask;
+ uintptr_t good_mask;
+ uintptr_t healing;
+ uint32_t refs_owners; /* Refs for fops owning the lock */
+ uint32_t refs_pending; /* Refs assigned to fops being prepared */
+ uint32_t waiting_flags; /*Track xattrop/dirty marking*/
+ gf_boolean_t acquired;
+ gf_boolean_t contention;
+ gf_boolean_t unlock_now;
+ gf_boolean_t release;
+ gf_boolean_t query;
+ fd_t *fd;
+ loc_t loc;
+ union {
+ entrylk_type type;
+ struct gf_flock flock;
+ };
+};
+
+struct _ec_lock_link {
+ ec_lock_t *lock;
+ ec_fop_data_t *fop;
+ struct list_head owner_list;
+ struct list_head wait_list;
+ gf_boolean_t update[2];
+ gf_boolean_t dirty[2];
+ gf_boolean_t optimistic_changelog;
+ loc_t *base;
+ uint64_t size;
+ uint32_t waiting_flags;
+ off_t fl_start;
+ off_t fl_end;
+};
+
+/* This structure keeps a range of fragment offsets affected by a fop. Since
+ * real file offsets can be difficult to handle correctly because of overflows,
+ * we use the 'scaled' offset, which corresponds to the offset of the fragment
+ * seen by the bricks, which is always smaller and cannot overflow. */
+struct _ec_fragment_range {
+ uint64_t first; /* Address of the first affected fragment as seen by the
+ bricks (offset on brick) */
+ uint64_t last; /* Address of the first non affected fragment as seen by
+ the bricks (offset on brick) */
+};
+
+/* EC xlator data structure to collect all the data required to perform
+ * the file operation.*/
+struct _ec_fop_data {
+ int32_t id; /* ID of the file operation */
+ int32_t refs;
+ int32_t state;
+ uint32_t minimum; /* Minimum number of successful
+ operation required to conclude a
+ fop as successful */
+ int32_t expected;
+ int32_t winds;
+ int32_t jobs;
+ int32_t error;
+ ec_fop_data_t *parent;
+ xlator_t *xl; /* points to EC xlator */
+ call_frame_t *req_frame; /* frame of the calling xlator */
+ call_frame_t *frame; /* frame used by this fop */
+ struct list_head cbk_list; /* sorted list of groups of answers */
+ struct list_head answer_list; /* list of answers */
+ struct list_head pending_list; /* member of ec_t.pending_fops */
+ ec_cbk_data_t *answer; /* accepted answer */
+ int32_t lock_count;
+ int32_t locked;
+ gf_lock_t lock;
+ ec_lock_link_t locks[2];
+ int32_t first_lock;
+
+ uint32_t fop_flags; /* Flags passed by the caller. */
+ uint32_t flags; /* Internal flags. */
+ uint32_t first;
+ uintptr_t mask;
+ uintptr_t healing; /*Dispatch is done but call is successful only
+ if fop->minimum number of subvolumes succeed
+ which are not healing*/
+ uintptr_t remaining;
+ uintptr_t received; /* Mask of responses */
+ uintptr_t good;
+
+ uid_t uid;
+ gid_t gid;
+
+ ec_wind_f wind; /* Function to wind to */
+ ec_handler_f handler; /* FOP manager function */
+ ec_resume_f resume;
+ ec_cbk_t cbks; /* Callback function for this FOP */
+ void *data;
+ ec_heal_t *heal;
+ struct list_head healer;
+
+ uint64_t user_size;
+ uint32_t head;
+
+ int32_t use_fd; /* Indicates whether this FOP uses FD or
+ not */
+
+ dict_t *xdata;
+ dict_t *dict;
+ int32_t int32;
+ uint32_t uint32;
+ uint64_t size;
+ off_t offset;
+ mode_t mode[2];
+ entrylk_cmd entrylk_cmd;
+ entrylk_type entrylk_type;
+ gf_xattrop_flags_t xattrop_flags;
+ dev_t dev;
+ inode_t *inode;
+ fd_t *fd; /* FD of the file on which FOP is
+ being carried upon */
+ struct iatt iatt;
+ char *str[2];
+ loc_t loc[2]; /* Holds the location details for
+ the file */
+ struct gf_flock flock;
+ struct iovec *vector;
+ struct iobref *buffers;
+ gf_seek_what_t seek;
+ ec_fragment_range_t frag_range; /* This will hold the range of stripes
+ affected by the fop. */
+ char *errstr; /*String of fop name, path and gfid
+ to be used in gf_msg. */
+};
+
+struct _ec_cbk_data {
+ struct list_head list; /* item in the sorted list of groups */
+ struct list_head answer_list; /* item in the list of answers */
+ ec_fop_data_t *fop;
+ ec_cbk_data_t *next; /* next answer in the same group */
+ uint32_t idx;
+ int32_t op_ret;
+ int32_t op_errno;
+ int32_t count;
+ uintptr_t mask;
+
+ dict_t *xdata;
+ dict_t *dict;
+ int32_t int32;
+ uintptr_t uintptr[3];
+ uint64_t size;
+ uint64_t version[2];
+ inode_t *inode;
+ fd_t *fd;
+ struct statvfs statvfs;
+ struct iatt iatt[5];
+ struct gf_flock flock;
+ struct iovec *vector;
+ struct iobref *buffers;
+ char *str;
+ gf_dirent_t entries;
+ off_t offset;
+ gf_seek_what_t what;
+};
+
+enum _ec_gf_opcode {
+ EC_GF_OP_LOAD,
+ EC_GF_OP_STORE,
+ EC_GF_OP_COPY,
+ EC_GF_OP_XOR2,
+ EC_GF_OP_XOR3,
+ EC_GF_OP_XORM,
+ EC_GF_OP_END
+};
+
+struct _ec_gf_op {
+ ec_gf_opcode_t op;
+ uint32_t arg1;
+ uint32_t arg2;
+ uint32_t arg3;
+};
+
+struct _ec_gf_mul {
+ uint32_t regs;
+ uint32_t map[EC_GF_MAX_REGS];
+ ec_gf_op_t *ops;
+};
+
+struct _ec_gf {
+ uint32_t bits;
+ uint32_t size;
+ uint32_t mod;
+ uint32_t min_ops;
+ uint32_t max_ops;
+ uint32_t avg_ops;
+ uint32_t *log;
+ uint32_t *pow;
+ ec_gf_mul_t **table;
+};
+
+struct _ec_code_gen {
+ char *name;
+ char **flags;
+ uint32_t width;
+
+ void (*prolog)(ec_code_builder_t *builder);
+ void (*epilog)(ec_code_builder_t *builder);
+ void (*load)(ec_code_builder_t *builder, uint32_t reg, uint32_t offset,
+ uint32_t bit);
+ void (*store)(ec_code_builder_t *builder, uint32_t reg, uint32_t bit);
+ void (*copy)(ec_code_builder_t *builder, uint32_t dst, uint32_t src);
+ void (*xor2)(ec_code_builder_t *builder, uint32_t dst, uint32_t src);
+ void (*xor3)(ec_code_builder_t *builder, uint32_t dst, uint32_t src1,
+ uint32_t src2);
+ void (*xorm)(ec_code_builder_t *builder, uint32_t dst, uint32_t offset,
+ uint32_t bit);
+};
+
+struct _ec_code {
+ gf_lock_t lock;
+ struct list_head spaces;
+ ec_gf_t *gf;
+ ec_code_gen_t *gen;
+};
+
+struct _ec_code_arg {
+ uint32_t value;
+};
+
+struct _ec_code_op {
+ ec_gf_opcode_t op;
+ ec_code_arg_t arg1;
+ ec_code_arg_t arg2;
+ ec_code_arg_t arg3;
+};
+
+struct _ec_code_builder {
+ ec_code_t *code;
+ uint64_t address;
+ uint8_t *data;
+ uint32_t size;
+ int32_t error;
+ uint32_t regs;
+ uint32_t bits;
+ uint32_t width;
+ uint32_t count;
+ uint32_t base;
+ uint32_t map[EC_GF_MAX_REGS];
+ gf_boolean_t linear;
+ uint64_t loop;
+ ec_code_op_t ops[0];
+};
+
+struct _ec_code_chunk {
+ struct list_head list;
+ size_t size;
+ ec_code_space_t *space;
+};
+
+struct _ec_code_space {
+ struct list_head list;
+ struct list_head chunks;
+ ec_code_t *code;
+ void *exec;
+ size_t size;
+};
+
+union _ec_code_func {
+ ec_code_func_linear_t linear;
+ ec_code_func_interleaved_t interleaved;
+};
+
+struct _ec_matrix_row {
+ ec_code_func_t func;
+ uint32_t *values;
+};
+
+struct _ec_matrix {
+ struct list_head lru;
+ uint32_t refs;
+ uint32_t columns;
+ uint32_t rows;
+ uintptr_t mask;
+ ec_code_t *code;
+ uint32_t *values;
+ ec_matrix_row_t row_data[0];
+};
+
+struct _ec_matrix_list {
+ struct list_head lru;
+ gf_lock_t lock;
+ uint32_t columns;
+ uint32_t rows;
+ uint32_t max;
+ uint32_t count;
+ uint32_t stripe;
+ struct mem_pool *pool;
+ ec_gf_t *gf;
+ ec_code_t *code;
+ ec_matrix_t *encode;
+ ec_matrix_t **objects;
+};
+
+struct _ec_heal {
+ struct list_head list;
+ gf_lock_t lock;
+ xlator_t *xl;
+ ec_fop_data_t *fop;
+ void *data;
+ ec_fop_data_t *lookup;
+ loc_t loc;
+ struct iatt iatt;
+ char *symlink;
+ fd_t *fd;
+ int32_t partial;
+ int32_t done;
+ int32_t error;
+ gf_boolean_t nameheal;
+ uintptr_t available;
+ uintptr_t good;
+ uintptr_t bad;
+ uintptr_t open;
+ uintptr_t fixed;
+ uint64_t offset;
+ uint64_t size;
+ uint64_t total_size;
+ uint64_t version[2];
+ uint64_t raw_size;
+};
+
+struct subvol_healer {
+ xlator_t *this;
+ int subvol;
+ gf_boolean_t running;
+ gf_boolean_t rerun;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ pthread_t thread;
+};
+
+struct _ec_self_heald {
+ gf_boolean_t iamshd;
+ gf_boolean_t enabled;
+ int timeout;
+ uint32_t max_threads;
+ uint32_t wait_qlength;
+ struct subvol_healer *index_healers;
+ struct subvol_healer *full_healers;
+};
+
+struct _ec_statistics {
+ struct {
+ gf_atomic_t hits; /* Cache hits. */
+ gf_atomic_t misses; /* Cache misses. */
+ gf_atomic_t updates; /* Number of times an existing stripe has
+ been updated with new content. */
+ gf_atomic_t invals; /* Number of times an existing stripe has
+ been invalidated because of truncates
+ or discards. */
+ gf_atomic_t evicts; /* Number of times that an existing entry
+ has been evicted to make room for newer
+ entries. */
+ gf_atomic_t allocs; /* Number of memory allocations made to
+ store stripes. */
+ gf_atomic_t errors; /* Number of errors that have caused extra
+ requests. (Basically memory allocation
+ errors). */
+ } stripe_cache;
+ struct {
+ gf_atomic_t attempted; /*Number of heals attempted on
+ files/directories*/
+ gf_atomic_t completed; /*Number of heals complted on files/directories*/
+ } shd;
+};
+
+struct _ec {
+ xlator_t *xl;
+ int32_t healers;
+ int32_t heal_waiters;
+ int32_t nodes; /* Total number of bricks(n) */
+ int32_t bits_for_nodes;
+ int32_t fragments; /* Data bricks(k) */
+ int32_t redundancy; /* Redundant bricks(m) */
+ uint32_t fragment_size; /* Size of fragment/chunk on a
+ brick. */
+ uint32_t stripe_size; /* (fragment_size * fragments)
+ maximum size of user data
+ stored in one stripe. */
+ int32_t up; /* Represents whether EC volume is
+ up or not. */
+ uint32_t idx;
+ uint32_t xl_up_count; /* Number of UP bricks. */
+ uintptr_t xl_up; /* Bit flag representing UP
+ bricks */
+ uint32_t xl_notify_count; /* Number of notifications. */
+ uintptr_t xl_notify; /* Bit flag representing
+ notification for bricks. */
+ uintptr_t node_mask;
+ uintptr_t read_mask; /*Stores user defined read-mask*/
+ gf_atomic_t async_fop_count; /* Number of on going asynchronous fops. */
+ xlator_t **xl_list;
+ gf_lock_t lock;
+ gf_timer_t *timer;
+ gf_boolean_t shutdown;
+ gf_boolean_t eager_lock;
+ gf_boolean_t other_eager_lock;
+ gf_boolean_t optimistic_changelog;
+ gf_boolean_t parallel_writes;
+ uint32_t stripe_cache;
+ uint32_t quorum_count;
+ uint32_t background_heals;
+ uint32_t heal_wait_qlen;
+ uint32_t self_heal_window_size; /* max size of read/writes */
+ uint32_t eager_lock_timeout;
+ uint32_t other_eager_lock_timeout;
+ struct list_head pending_fops;
+ struct list_head heal_waiting;
+ struct list_head healing;
+ struct mem_pool *fop_pool;
+ struct mem_pool *cbk_pool;
+ struct mem_pool *lock_pool;
+ ec_self_heald_t shd;
+ char vol_uuid[UUID_SIZE + 1];
+ dict_t *leaf_to_subvolid;
+ ec_read_policy_t read_policy;
+ ec_matrix_list_t matrix;
+ ec_statistics_t stats;
+};
+
+#endif /* __EC_TYPES_H__ */
diff --git a/xlators/cluster/ec/src/ec.c b/xlators/cluster/ec/src/ec.c
new file mode 100644
index 00000000000..7344be4968d
--- /dev/null
+++ b/xlators/cluster/ec/src/ec.c
@@ -0,0 +1,1873 @@
+/*
+ Copyright (c) 2012-2015 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/defaults.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/upcall-utils.h>
+
+#include "ec.h"
+#include "ec-messages.h"
+#include "ec-mem-types.h"
+#include "ec-types.h"
+#include "ec-helpers.h"
+#include "ec-common.h"
+#include "ec-fops.h"
+#include "ec-method.h"
+#include "ec-code.h"
+#include "ec-heald.h"
+#include <glusterfs/events.h>
+
+static char *ec_read_policies[EC_READ_POLICY_MAX + 1] = {
+ [EC_ROUND_ROBIN] = "round-robin",
+ [EC_GFID_HASH] = "gfid-hash",
+ [EC_READ_POLICY_MAX] = NULL};
+
+#define EC_INTERNAL_XATTR_OR_GOTO(name, xattr, op_errno, label) \
+ do { \
+ if (ec_is_internal_xattr(NULL, (char *)name, NULL, NULL)) { \
+ op_errno = EPERM; \
+ goto label; \
+ } \
+ if (name && (strlen(name) == 0) && xattr) { \
+ /* Bulk [f]removexattr/[f]setxattr */ \
+ GF_IF_INTERNAL_XATTR_GOTO(EC_XATTR_PREFIX "*", xattr, op_errno, \
+ label); \
+ } \
+ } while (0)
+
+int32_t
+ec_parse_options(xlator_t *this)
+{
+ ec_t *ec = this->private;
+ int32_t error = EINVAL;
+ uintptr_t mask;
+
+ GF_OPTION_INIT("redundancy", ec->redundancy, int32, out);
+ ec->fragments = ec->nodes - ec->redundancy;
+ if ((ec->redundancy < 1) || (ec->redundancy >= ec->fragments) ||
+ (ec->fragments > EC_MAX_FRAGMENTS)) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_INVALID_REDUNDANCY,
+ "Invalid redundancy (must be between "
+ "1 and %d)",
+ (ec->nodes - 1) / 2);
+
+ goto out;
+ }
+
+ ec->bits_for_nodes = 1;
+ mask = 2;
+ while (ec->nodes > mask) {
+ ec->bits_for_nodes++;
+ mask <<= 1;
+ }
+ ec->node_mask = (1ULL << ec->nodes) - 1ULL;
+ ec->fragment_size = EC_METHOD_CHUNK_SIZE;
+ ec->stripe_size = ec->fragment_size * ec->fragments;
+
+ gf_msg_debug("ec", 0,
+ "Initialized with: nodes=%u, fragments=%u, "
+ "stripe_size=%u, node_mask=%" PRIxFAST32,
+ ec->nodes, ec->fragments, ec->stripe_size, ec->node_mask);
+
+ error = 0;
+
+out:
+ return error;
+}
+
+int32_t
+ec_prepare_childs(xlator_t *this)
+{
+ ec_t *ec = this->private;
+ xlator_list_t *child = NULL;
+ int32_t count = 0;
+
+ for (child = this->children; child != NULL; child = child->next) {
+ count++;
+ }
+ if (count > EC_MAX_NODES) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_TOO_MANY_SUBVOLS,
+ "Too many subvolumes");
+
+ return EINVAL;
+ }
+ ec->nodes = count;
+
+ ec->xl_list = GF_CALLOC(count, sizeof(ec->xl_list[0]), ec_mt_xlator_t);
+ if (ec->xl_list == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Allocation of xlator list failed");
+
+ return ENOMEM;
+ }
+ ec->xl_up = 0;
+ ec->xl_up_count = 0;
+
+ count = 0;
+ for (child = this->children; child != NULL; child = child->next) {
+ ec->xl_list[count++] = child->xlator;
+ }
+
+ return 0;
+}
+
+/* This function transforms the subvol to subvol-id*/
+static int
+_subvol_to_subvolid(dict_t *this, char *key, data_t *value, void *data)
+{
+ ec_t *ec = data;
+ xlator_t *subvol = NULL;
+ int i = 0;
+ int ret = -1;
+
+ subvol = data_to_ptr(value);
+ for (i = 0; i < ec->nodes; i++) {
+ if (ec->xl_list[i] == subvol) {
+ ret = dict_set_int32(this, key, i);
+ /* -1 stops dict_foreach and returns -1*/
+ if (ret < 0)
+ ret = -1;
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+int
+ec_subvol_to_subvol_id_transform(ec_t *ec, dict_t *leaf_to_subvolid)
+{
+ return dict_foreach(leaf_to_subvolid, _subvol_to_subvolid, ec);
+}
+
+void
+__ec_destroy_private(xlator_t *this)
+{
+ ec_t *ec = this->private;
+
+ if (ec != NULL) {
+ LOCK(&ec->lock);
+
+ if (ec->timer != NULL) {
+ gf_timer_call_cancel(this->ctx, ec->timer);
+ ec->timer = NULL;
+ }
+
+ UNLOCK(&ec->lock);
+
+ /* There is a race with timer because there is no way to know if
+ * timer callback has really been cancelled or it has been scheduled
+ * for execution. If it has been scheduled, it will crash if we
+ * destroy ec too fast.
+ *
+ * Not sure how this can be solved without using global variables or
+ * having support from gf_timer_call_cancel()
+ */
+ sleep(2);
+
+ this->private = NULL;
+ if (ec->xl_list != NULL) {
+ GF_FREE(ec->xl_list);
+ ec->xl_list = NULL;
+ }
+
+ if (ec->fop_pool != NULL) {
+ mem_pool_destroy(ec->fop_pool);
+ }
+
+ if (ec->cbk_pool != NULL) {
+ mem_pool_destroy(ec->cbk_pool);
+ }
+
+ if (ec->lock_pool != NULL) {
+ mem_pool_destroy(ec->lock_pool);
+ }
+
+ LOCK_DESTROY(&ec->lock);
+
+ if (ec->leaf_to_subvolid)
+ dict_unref(ec->leaf_to_subvolid);
+
+ ec_method_fini(&ec->matrix);
+
+ GF_FREE(ec);
+ }
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ if (xlator_mem_acct_init(this, ec_mt_end + 1) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Memory accounting initialization "
+ "failed.");
+
+ return -1;
+ }
+
+ return 0;
+}
+
+void
+ec_configure_background_heal_opts(ec_t *ec, int background_heals,
+ int heal_wait_qlen)
+{
+ if (background_heals == 0) {
+ ec->heal_wait_qlen = 0;
+ } else {
+ ec->heal_wait_qlen = heal_wait_qlen;
+ }
+ ec->background_heals = background_heals;
+}
+
+int
+ec_assign_read_policy(ec_t *ec, char *read_policy)
+{
+ int read_policy_idx = -1;
+
+ read_policy_idx = gf_get_index_by_elem(ec_read_policies, read_policy);
+ if (read_policy_idx < 0 || read_policy_idx >= EC_READ_POLICY_MAX)
+ return -1;
+
+ ec->read_policy = read_policy_idx;
+ return 0;
+}
+
+int32_t
+reconfigure(xlator_t *this, dict_t *options)
+{
+ ec_t *ec = this->private;
+ char *read_policy = NULL;
+ char *extensions = NULL;
+ uint32_t heal_wait_qlen = 0;
+ uint32_t background_heals = 0;
+ int32_t ret = -1;
+ int32_t err;
+
+ GF_OPTION_RECONF("cpu-extensions", extensions, options, str, failed);
+
+ GF_OPTION_RECONF("self-heal-daemon", ec->shd.enabled, options, bool,
+ failed);
+ GF_OPTION_RECONF("iam-self-heal-daemon", ec->shd.iamshd, options, bool,
+ failed);
+ GF_OPTION_RECONF("eager-lock", ec->eager_lock, options, bool, failed);
+ GF_OPTION_RECONF("other-eager-lock", ec->other_eager_lock, options, bool,
+ failed);
+ GF_OPTION_RECONF("eager-lock-timeout", ec->eager_lock_timeout, options,
+ uint32, failed);
+ GF_OPTION_RECONF("other-eager-lock-timeout", ec->other_eager_lock_timeout,
+ options, uint32, failed);
+ GF_OPTION_RECONF("background-heals", background_heals, options, uint32,
+ failed);
+ GF_OPTION_RECONF("heal-wait-qlength", heal_wait_qlen, options, uint32,
+ failed);
+ GF_OPTION_RECONF("self-heal-window-size", ec->self_heal_window_size,
+ options, uint32, failed);
+ GF_OPTION_RECONF("heal-timeout", ec->shd.timeout, options, int32, failed);
+ ec_configure_background_heal_opts(ec, background_heals, heal_wait_qlen);
+ GF_OPTION_RECONF("shd-max-threads", ec->shd.max_threads, options, uint32,
+ failed);
+ GF_OPTION_RECONF("shd-wait-qlength", ec->shd.wait_qlength, options, uint32,
+ failed);
+
+ GF_OPTION_RECONF("read-policy", read_policy, options, str, failed);
+
+ GF_OPTION_RECONF("optimistic-change-log", ec->optimistic_changelog, options,
+ bool, failed);
+ GF_OPTION_RECONF("parallel-writes", ec->parallel_writes, options, bool,
+ failed);
+ GF_OPTION_RECONF("stripe-cache", ec->stripe_cache, options, uint32, failed);
+ GF_OPTION_RECONF("quorum-count", ec->quorum_count, options, uint32, failed);
+ ret = 0;
+ if (ec_assign_read_policy(ec, read_policy)) {
+ ret = -1;
+ }
+
+ err = ec_method_update(this, &ec->matrix, extensions);
+ if (err != 0) {
+ ret = -1;
+ }
+
+failed:
+ return ret;
+}
+
+glusterfs_event_t
+ec_get_event_from_state(ec_t *ec)
+{
+ int down_count = 0;
+
+ if (ec->xl_up_count >= ec->fragments) {
+ /* If ec is up but some subvolumes are yet to notify, give
+ * grace time for other subvols to notify to prevent start of
+ * I/O which may result in self-heals */
+ if (ec->xl_notify_count < ec->nodes)
+ return GF_EVENT_MAXVAL;
+
+ return GF_EVENT_CHILD_UP;
+ } else {
+ down_count = ec->xl_notify_count - ec->xl_up_count;
+ if (down_count > ec->redundancy)
+ return GF_EVENT_CHILD_DOWN;
+ }
+
+ return GF_EVENT_MAXVAL;
+}
+
+void
+ec_up(xlator_t *this, ec_t *ec)
+{
+ char str1[32], str2[32];
+
+ if (ec->timer != NULL) {
+ gf_timer_call_cancel(this->ctx, ec->timer);
+ ec->timer = NULL;
+ }
+
+ ec->up = 1;
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_UP,
+ "Going UP : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
+
+ gf_event(EVENT_EC_MIN_BRICKS_UP, "subvol=%s", this->name);
+}
+
+void
+ec_down(xlator_t *this, ec_t *ec)
+{
+ char str1[32], str2[32];
+
+ if (ec->timer != NULL) {
+ gf_timer_call_cancel(this->ctx, ec->timer);
+ ec->timer = NULL;
+ }
+
+ ec->up = 0;
+ gf_msg(this->name, GF_LOG_INFO, 0, EC_MSG_EC_DOWN,
+ "Going DOWN : Child UP = %s Child Notify = %s",
+ ec_bin(str1, sizeof(str1), ec->xl_up, ec->nodes),
+ ec_bin(str2, sizeof(str2), ec->xl_notify, ec->nodes));
+
+ gf_event(EVENT_EC_MIN_BRICKS_NOT_UP, "subvol=%s", this->name);
+}
+
+void
+ec_notify_cbk(void *data)
+{
+ ec_t *ec = data;
+ glusterfs_event_t event = GF_EVENT_MAXVAL;
+ gf_boolean_t propagate = _gf_false;
+ gf_boolean_t launch_heal = _gf_false;
+
+ LOCK(&ec->lock);
+ {
+ if (!ec->timer) {
+ /*
+ * Either child_up/child_down is already sent to parent
+ * This is a spurious wake up.
+ */
+ goto unlock;
+ }
+
+ gf_timer_call_cancel(ec->xl->ctx, ec->timer);
+ ec->timer = NULL;
+
+ /* The timeout has expired, so any subvolume that has not
+ * already reported its state, will be considered to be down.
+ * We mark as if all bricks had reported. */
+ ec->xl_notify = (1ULL << ec->nodes) - 1ULL;
+ ec->xl_notify_count = ec->nodes;
+
+ /* Since we have marked all subvolumes as notified, it's
+ * guaranteed that ec_get_event_from_state() will return
+ * CHILD_UP or CHILD_DOWN, but not MAXVAL. */
+ event = ec_get_event_from_state(ec);
+ if (event == GF_EVENT_CHILD_UP) {
+ /* We are ready to bring the volume up. If there are
+ * still bricks DOWN, they will be healed when they
+ * come up. */
+ ec_up(ec->xl, ec);
+
+ if (ec->shd.iamshd && !ec->shutdown) {
+ launch_heal = _gf_true;
+ GF_ATOMIC_INC(ec->async_fop_count);
+ }
+ }
+
+ propagate = _gf_true;
+ }
+unlock:
+ UNLOCK(&ec->lock);
+
+ if (launch_heal) {
+ /* We have just brought the volume UP, so we trigger
+ * a self-heal check on the root directory. */
+ ec_launch_replace_heal(ec);
+ }
+ if (propagate) {
+ default_notify(ec->xl, event, NULL);
+ }
+}
+
+void
+ec_launch_notify_timer(xlator_t *this, ec_t *ec)
+{
+ struct timespec delay = {
+ 0,
+ };
+
+ gf_msg_debug(this->name, 0, "Initiating child-down timer");
+ delay.tv_sec = 10;
+ delay.tv_nsec = 0;
+ ec->timer = gf_timer_call_after(this->ctx, delay, ec_notify_cbk, ec);
+ if (ec->timer == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_TIMER_CREATE_FAIL,
+ "Cannot create timer "
+ "for delayed initialization");
+ }
+}
+
+gf_boolean_t
+ec_disable_delays(ec_t *ec)
+{
+ ec->shutdown = _gf_true;
+
+ return __ec_is_last_fop(ec);
+}
+
+void
+ec_cleanup_healer_object(ec_t *ec)
+{
+ struct subvol_healer *healer = NULL;
+ ec_self_heald_t *shd = NULL;
+ void *res = NULL;
+ int i = 0;
+ gf_boolean_t is_join = _gf_false;
+
+ shd = &ec->shd;
+ if (!shd->iamshd)
+ return;
+
+ for (i = 0; i < ec->nodes; i++) {
+ healer = &shd->index_healers[i];
+ pthread_mutex_lock(&healer->mutex);
+ {
+ healer->rerun = 1;
+ if (healer->running) {
+ pthread_cond_signal(&healer->cond);
+ is_join = _gf_true;
+ }
+ }
+ pthread_mutex_unlock(&healer->mutex);
+ if (is_join) {
+ pthread_join(healer->thread, &res);
+ is_join = _gf_false;
+ }
+
+ healer = &shd->full_healers[i];
+ pthread_mutex_lock(&healer->mutex);
+ {
+ healer->rerun = 1;
+ if (healer->running) {
+ pthread_cond_signal(&healer->cond);
+ is_join = _gf_true;
+ }
+ }
+ pthread_mutex_unlock(&healer->mutex);
+ if (is_join) {
+ pthread_join(healer->thread, &res);
+ is_join = _gf_false;
+ }
+ }
+}
+void
+ec_pending_fops_completed(ec_t *ec)
+{
+ if (ec->shutdown) {
+ default_notify(ec->xl, GF_EVENT_PARENT_DOWN, NULL);
+ }
+}
+
+static gf_boolean_t
+ec_set_up_state(ec_t *ec, uintptr_t index_mask, uintptr_t new_state)
+{
+ uintptr_t current_state = 0;
+
+ if (xlator_is_cleanup_starting(ec->xl))
+ return _gf_false;
+
+ if ((ec->xl_notify & index_mask) == 0) {
+ ec->xl_notify |= index_mask;
+ ec->xl_notify_count++;
+ }
+ current_state = ec->xl_up & index_mask;
+ if (current_state != new_state) {
+ ec->xl_up ^= index_mask;
+ ec->xl_up_count += (current_state ? -1 : 1);
+
+ return _gf_true;
+ }
+
+ return _gf_false;
+}
+
+static gf_boolean_t
+ec_upcall(ec_t *ec, struct gf_upcall *upcall)
+{
+ struct gf_upcall_cache_invalidation *ci = NULL;
+ struct gf_upcall_inodelk_contention *lc = NULL;
+ inode_t *inode;
+ inode_table_t *table;
+
+ switch (upcall->event_type) {
+ case GF_UPCALL_CACHE_INVALIDATION:
+ ci = upcall->data;
+ ci->flags |= UP_INVAL_ATTR;
+ return _gf_true;
+
+ case GF_UPCALL_INODELK_CONTENTION:
+ lc = upcall->data;
+ if (strcmp(lc->domain, ec->xl->name) != 0) {
+ /* The lock is not owned by EC, ignore it. */
+ return _gf_true;
+ }
+ table = ((xlator_t *)ec->xl->graph->top)->itable;
+ if (table == NULL) {
+ /* Self-heal daemon doesn't have an inode table on the top
+ * xlator because it doesn't need it. In this case we should
+ * use the inode table managed by EC itself where all inodes
+ * being healed should be present. However self-heal doesn't
+ * use eager-locking and inodelk's are already released as
+ * soon as possible. In this case we can safely ignore these
+ * notifications. */
+ return _gf_false;
+ }
+ inode = inode_find(table, upcall->gfid);
+ /* If inode is not found, it means that it's already released,
+ * so we can ignore it. Probably it has been released and
+ * destroyed while the contention notification was being sent.
+ */
+ if (inode != NULL) {
+ ec_lock_release(ec, inode);
+ inode_unref(inode);
+ }
+
+ return _gf_false;
+
+ default:
+ return _gf_true;
+ }
+}
+
+int32_t
+ec_notify(xlator_t *this, int32_t event, void *data, void *data2)
+{
+ ec_t *ec = this->private;
+ int32_t idx = 0;
+ int32_t error = 0;
+ glusterfs_event_t old_event = GF_EVENT_MAXVAL;
+ dict_t *input = NULL;
+ dict_t *output = NULL;
+ gf_boolean_t propagate = _gf_true;
+ gf_boolean_t needs_shd_check = _gf_false;
+ int32_t orig_event = event;
+ uintptr_t mask = 0;
+
+ gf_msg_trace(this->name, 0, "NOTIFY(%d): %p, %p", event, data, data2);
+
+ if (event == GF_EVENT_UPCALL) {
+ propagate = ec_upcall(ec, data);
+ goto done;
+ }
+
+ if (event == GF_EVENT_TRANSLATOR_OP) {
+ if (!ec->up) {
+ error = -1;
+ } else {
+ input = data;
+ output = data2;
+ error = ec_xl_op(this, input, output);
+ }
+ goto out;
+ }
+
+ for (idx = 0; idx < ec->nodes; idx++) {
+ if (ec->xl_list[idx] == data) {
+ break;
+ }
+ }
+
+ LOCK(&ec->lock);
+
+ if (event == GF_EVENT_PARENT_UP) {
+ /*
+ * Start a timer which sends appropriate event to parent
+ * xlator to prevent the 'mount' syscall from hanging.
+ */
+ ec_launch_notify_timer(this, ec);
+ goto unlock;
+ } else if (event == GF_EVENT_PARENT_DOWN) {
+ /* If there aren't pending fops running after we have waken up
+ * them, we immediately propagate the notification. */
+ propagate = ec_disable_delays(ec);
+ ec_cleanup_healer_object(ec);
+ goto unlock;
+ }
+
+ if (idx < ec->nodes) { /* CHILD_* events */
+ old_event = ec_get_event_from_state(ec);
+
+ mask = 1ULL << idx;
+ if (event == GF_EVENT_CHILD_UP) {
+ /* We need to trigger a selfheal if a brick changes
+ * to UP state. */
+ if (ec_set_up_state(ec, mask, mask) && ec->shd.iamshd &&
+ !ec->shutdown) {
+ needs_shd_check = _gf_true;
+ }
+ } else if (event == GF_EVENT_CHILD_DOWN) {
+ ec_set_up_state(ec, mask, 0);
+ }
+
+ event = ec_get_event_from_state(ec);
+
+ if (event == GF_EVENT_CHILD_UP) {
+ if (!ec->up) {
+ ec_up(this, ec);
+ }
+ } else {
+ /* If the volume is not UP, it's irrelevant if one
+ * brick has come up. We cannot heal anything. */
+ needs_shd_check = _gf_false;
+
+ if ((event == GF_EVENT_CHILD_DOWN) && ec->up) {
+ ec_down(this, ec);
+ }
+ }
+
+ if (event != GF_EVENT_MAXVAL) {
+ if (event == old_event) {
+ if (orig_event == GF_EVENT_CHILD_UP)
+ event = GF_EVENT_SOME_DESCENDENT_UP;
+ else /* orig_event has to be GF_EVENT_CHILD_DOWN */
+ event = GF_EVENT_SOME_DESCENDENT_DOWN;
+ }
+ } else {
+ propagate = _gf_false;
+ needs_shd_check = _gf_false;
+ }
+
+ if (needs_shd_check) {
+ GF_ATOMIC_INC(ec->async_fop_count);
+ }
+ }
+unlock:
+ UNLOCK(&ec->lock);
+
+done:
+ if (needs_shd_check) {
+ ec_launch_replace_heal(ec);
+ }
+ if (propagate) {
+ error = default_notify(this, event, data);
+ }
+
+out:
+ return error;
+}
+
+int32_t
+notify(xlator_t *this, int32_t event, void *data, ...)
+{
+ int ret = -1;
+ va_list ap;
+ void *data2 = NULL;
+
+ va_start(ap, data);
+ data2 = va_arg(ap, dict_t *);
+ va_end(ap);
+ ret = ec_notify(this, event, data, data2);
+
+ return ret;
+}
+
+static void
+ec_statistics_init(ec_t *ec)
+{
+ GF_ATOMIC_INIT(ec->stats.stripe_cache.hits, 0);
+ GF_ATOMIC_INIT(ec->stats.stripe_cache.misses, 0);
+ GF_ATOMIC_INIT(ec->stats.stripe_cache.updates, 0);
+ GF_ATOMIC_INIT(ec->stats.stripe_cache.invals, 0);
+ GF_ATOMIC_INIT(ec->stats.stripe_cache.evicts, 0);
+ GF_ATOMIC_INIT(ec->stats.stripe_cache.allocs, 0);
+ GF_ATOMIC_INIT(ec->stats.stripe_cache.errors, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.attempted, 0);
+ GF_ATOMIC_INIT(ec->stats.shd.completed, 0);
+}
+
+static int
+ec_assign_read_mask(ec_t *ec, char *read_mask_str)
+{
+ char *mask = NULL;
+ char *maskptr = NULL;
+ char *saveptr = NULL;
+ char *id_str = NULL;
+ int id = 0;
+ int ret = 0;
+ uintptr_t read_mask = 0;
+
+ if (!read_mask_str) {
+ ec->read_mask = 0;
+ ret = 0;
+ goto out;
+ }
+
+ mask = gf_strdup(read_mask_str);
+ if (!mask) {
+ ret = -1;
+ goto out;
+ }
+ maskptr = mask;
+
+ for (;;) {
+ id_str = strtok_r(maskptr, ":", &saveptr);
+ if (id_str == NULL)
+ break;
+ if (gf_string2int(id_str, &id)) {
+ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL,
+ "In read-mask \"%s\" id %s is not a valid integer",
+ read_mask_str, id_str);
+ ret = -1;
+ goto out;
+ }
+
+ if ((id < 0) || (id >= ec->nodes)) {
+ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL,
+ "In read-mask \"%s\" id %d is not in range [0 - %d]",
+ read_mask_str, id, ec->nodes - 1);
+ ret = -1;
+ goto out;
+ }
+ read_mask |= (1UL << id);
+ maskptr = NULL;
+ }
+
+ if (gf_bits_count(read_mask) < ec->fragments) {
+ gf_msg(ec->xl->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL,
+ "read-mask \"%s\" should contain at least %d ids", read_mask_str,
+ ec->fragments);
+ ret = -1;
+ goto out;
+ }
+ ec->read_mask = read_mask;
+ ret = 0;
+out:
+ GF_FREE(mask);
+ return ret;
+}
+
+int32_t
+init(xlator_t *this)
+{
+ ec_t *ec = NULL;
+ char *read_policy = NULL;
+ char *extensions = NULL;
+ int32_t err;
+ char *read_mask_str = NULL;
+
+ if (this->parents == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, EC_MSG_NO_PARENTS,
+ "Volume does not have parents.");
+ }
+
+ ec = GF_MALLOC(sizeof(*ec), ec_mt_ec_t);
+ if (ec == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to allocate private memory.");
+
+ return -1;
+ }
+ memset(ec, 0, sizeof(*ec));
+
+ this->private = ec;
+
+ ec->xl = this;
+ LOCK_INIT(&ec->lock);
+
+ GF_ATOMIC_INIT(ec->async_fop_count, 0);
+ INIT_LIST_HEAD(&ec->pending_fops);
+ INIT_LIST_HEAD(&ec->heal_waiting);
+ INIT_LIST_HEAD(&ec->healing);
+
+ ec->fop_pool = mem_pool_new(ec_fop_data_t, 1024);
+ ec->cbk_pool = mem_pool_new(ec_cbk_data_t, 4096);
+ ec->lock_pool = mem_pool_new(ec_lock_t, 1024);
+ if ((ec->fop_pool == NULL) || (ec->cbk_pool == NULL) ||
+ (ec->lock_pool == NULL)) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, EC_MSG_NO_MEMORY,
+ "Failed to create memory pools.");
+
+ goto failed;
+ }
+
+ if (ec_prepare_childs(this) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_XLATOR_INIT_FAIL,
+ "Failed to initialize xlator");
+
+ goto failed;
+ }
+
+ if (ec_parse_options(this) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, EC_MSG_XLATOR_PARSE_OPT_FAIL,
+ "Failed to parse xlator options");
+
+ goto failed;
+ }
+
+ GF_OPTION_INIT("cpu-extensions", extensions, str, failed);
+
+ err = ec_method_init(this, &ec->matrix, ec->fragments, ec->nodes,
+ ec->nodes * 2, extensions);
+ if (err != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, -err, EC_MSG_MATRIX_FAILED,
+ "Failed to initialize matrix management");
+
+ goto failed;
+ }
+
+ GF_OPTION_INIT("self-heal-daemon", ec->shd.enabled, bool, failed);
+ GF_OPTION_INIT("iam-self-heal-daemon", ec->shd.iamshd, bool, failed);
+ GF_OPTION_INIT("eager-lock", ec->eager_lock, bool, failed);
+ GF_OPTION_INIT("other-eager-lock", ec->other_eager_lock, bool, failed);
+ GF_OPTION_INIT("eager-lock-timeout", ec->eager_lock_timeout, uint32,
+ failed);
+ GF_OPTION_INIT("other-eager-lock-timeout", ec->other_eager_lock_timeout,
+ uint32, failed);
+ GF_OPTION_INIT("background-heals", ec->background_heals, uint32, failed);
+ GF_OPTION_INIT("heal-wait-qlength", ec->heal_wait_qlen, uint32, failed);
+ GF_OPTION_INIT("self-heal-window-size", ec->self_heal_window_size, uint32,
+ failed);
+ ec_configure_background_heal_opts(ec, ec->background_heals,
+ ec->heal_wait_qlen);
+ GF_OPTION_INIT("read-policy", read_policy, str, failed);
+ if (ec_assign_read_policy(ec, read_policy))
+ goto failed;
+
+ GF_OPTION_INIT("heal-timeout", ec->shd.timeout, int32, failed);
+ GF_OPTION_INIT("shd-max-threads", ec->shd.max_threads, uint32, failed);
+ GF_OPTION_INIT("shd-wait-qlength", ec->shd.wait_qlength, uint32, failed);
+ GF_OPTION_INIT("optimistic-change-log", ec->optimistic_changelog, bool,
+ failed);
+ GF_OPTION_INIT("parallel-writes", ec->parallel_writes, bool, failed);
+ GF_OPTION_INIT("stripe-cache", ec->stripe_cache, uint32, failed);
+ GF_OPTION_INIT("quorum-count", ec->quorum_count, uint32, failed);
+ GF_OPTION_INIT("ec-read-mask", read_mask_str, str, failed);
+
+ if (ec_assign_read_mask(ec, read_mask_str))
+ goto failed;
+
+ this->itable = inode_table_new(EC_SHD_INODE_LRU_LIMIT, this);
+ if (!this->itable)
+ goto failed;
+
+ if (ec->shd.iamshd)
+ ec_selfheal_daemon_init(this);
+ gf_msg_debug(this->name, 0, "Disperse translator initialized.");
+
+ ec->leaf_to_subvolid = dict_new();
+ if (!ec->leaf_to_subvolid)
+ goto failed;
+ if (glusterfs_reachable_leaves(this, ec->leaf_to_subvolid)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_SUBVOL_BUILD_FAIL,
+ "Failed to build subvol "
+ "dictionary");
+ goto failed;
+ }
+
+ if (ec_subvol_to_subvol_id_transform(ec, ec->leaf_to_subvolid) < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, EC_MSG_SUBVOL_ID_DICT_SET_FAIL,
+ "Failed to build subvol-id "
+ "dictionary");
+ goto failed;
+ }
+
+ ec_statistics_init(ec);
+
+ return 0;
+
+failed:
+ __ec_destroy_private(this);
+
+ return -1;
+}
+
+void
+fini(xlator_t *this)
+{
+ ec_selfheal_daemon_fini(this);
+ __ec_destroy_private(this);
+}
+
+int32_t
+ec_gf_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ ec_access(frame, this, -1, EC_MINIMUM_ONE, default_access_cbk, NULL, loc,
+ mask, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ ec_create(frame, this, -1, EC_MINIMUM_MIN, default_create_cbk, NULL, loc,
+ flags, mode, umask, fd, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ ec_discard(frame, this, -1, EC_MINIMUM_MIN, default_discard_cbk, NULL, fd,
+ offset, len, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ uint32_t fop_flags = EC_MINIMUM_ALL;
+
+ if (cmd == ENTRYLK_UNLOCK)
+ fop_flags = EC_MINIMUM_ONE;
+ ec_entrylk(frame, this, -1, fop_flags, default_entrylk_cbk, NULL, volume,
+ loc, basename, cmd, type, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ uint32_t fop_flags = EC_MINIMUM_ALL;
+
+ if (cmd == ENTRYLK_UNLOCK)
+ fop_flags = EC_MINIMUM_ONE;
+ ec_fentrylk(frame, this, -1, fop_flags, default_fentrylk_cbk, NULL, volume,
+ fd, basename, cmd, type, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ ec_fallocate(frame, this, -1, EC_MINIMUM_MIN, default_fallocate_cbk, NULL,
+ fd, mode, offset, len, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ ec_flush(frame, this, -1, EC_MINIMUM_MIN, default_flush_cbk, NULL, fd,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ ec_fsync(frame, this, -1, EC_MINIMUM_MIN, default_fsync_cbk, NULL, fd,
+ datasync, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ ec_fsyncdir(frame, this, -1, EC_MINIMUM_MIN, default_fsyncdir_cbk, NULL, fd,
+ datasync, xdata);
+
+ return 0;
+}
+
+int
+ec_marker_populate_args(call_frame_t *frame, int type, int *gauge,
+ xlator_t **subvols)
+{
+ xlator_t *this = frame->this;
+ ec_t *ec = this->private;
+
+ memcpy(subvols, ec->xl_list, sizeof(*subvols) * ec->nodes);
+
+ if (type == MARKER_XTIME_TYPE) {
+ /*Don't error out on ENOENT/ENOTCONN */
+ gauge[MCNT_NOTFOUND] = 0;
+ gauge[MCNT_ENOTCONN] = 0;
+ }
+
+ return ec->nodes;
+}
+
+int32_t
+ec_handle_heal_commands(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ dict_t *dict_rsp = NULL;
+ int op_ret = -1;
+ int op_errno = ENOMEM;
+
+ if (!name || strcmp(name, GF_HEAL_INFO))
+ return -1;
+
+ op_errno = -ec_get_heal_info(this, loc, &dict_rsp);
+ if (op_errno <= 0) {
+ op_errno = op_ret = 0;
+ }
+
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict_rsp, NULL);
+ if (dict_rsp)
+ dict_unref(dict_rsp);
+ return 0;
+}
+
+int32_t
+ec_gf_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int error = 0;
+ ec_t *ec = this->private;
+ int32_t fop_flags = EC_MINIMUM_ONE;
+
+ if (name && strcmp(name, EC_XATTR_HEAL) != 0) {
+ EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out);
+ }
+
+ if (ec_handle_heal_commands(frame, this, loc, name, xdata) == 0)
+ return 0;
+
+ if (cluster_handle_marker_getxattr(frame, loc, name, ec->vol_uuid, NULL,
+ ec_marker_populate_args) == 0)
+ return 0;
+
+ if (name && ((fnmatch(GF_XATTR_STIME_PATTERN, name, 0) == 0) ||
+ XATTR_IS_NODE_UUID(name) || XATTR_IS_NODE_UUID_LIST(name))) {
+ fop_flags = EC_MINIMUM_ALL;
+ }
+
+ ec_getxattr(frame, this, -1, fop_flags, default_getxattr_cbk, NULL, loc,
+ name, xdata);
+
+ return 0;
+out:
+ error = ENODATA;
+ STACK_UNWIND_STRICT(getxattr, frame, -1, error, NULL, NULL);
+ return 0;
+}
+
+int32_t
+ec_gf_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ int error = 0;
+
+ EC_INTERNAL_XATTR_OR_GOTO(name, NULL, error, out);
+
+ ec_fgetxattr(frame, this, -1, EC_MINIMUM_ONE, default_fgetxattr_cbk, NULL,
+ fd, name, xdata);
+ return 0;
+out:
+ error = ENODATA;
+ STACK_UNWIND_STRICT(fgetxattr, frame, -1, error, NULL, NULL);
+ return 0;
+}
+
+int32_t
+ec_gf_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ int32_t fop_flags = EC_MINIMUM_ALL;
+
+ if (flock->l_type == F_UNLCK)
+ fop_flags = EC_MINIMUM_ONE;
+
+ ec_inodelk(frame, this, &frame->root->lk_owner, -1, fop_flags,
+ default_inodelk_cbk, NULL, volume, loc, cmd, flock, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ int32_t fop_flags = EC_MINIMUM_ALL;
+
+ if (flock->l_type == F_UNLCK)
+ fop_flags = EC_MINIMUM_ONE;
+ ec_finodelk(frame, this, &frame->root->lk_owner, -1, fop_flags,
+ default_finodelk_cbk, NULL, volume, fd, cmd, flock, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ ec_link(frame, this, -1, EC_MINIMUM_MIN, default_link_cbk, NULL, oldloc,
+ newloc, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ int32_t fop_flags = EC_MINIMUM_ALL;
+
+ if (flock->l_type == F_UNLCK)
+ fop_flags = EC_MINIMUM_ONE;
+ ec_lk(frame, this, -1, fop_flags, default_lk_cbk, NULL, fd, cmd, flock,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ ec_lookup(frame, this, -1, EC_MINIMUM_MIN, default_lookup_cbk, NULL, loc,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ ec_mkdir(frame, this, -1, EC_MINIMUM_MIN, default_mkdir_cbk, NULL, loc,
+ mode, umask, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ ec_mknod(frame, this, -1, EC_MINIMUM_MIN, default_mknod_cbk, NULL, loc,
+ mode, rdev, umask, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ ec_open(frame, this, -1, EC_MINIMUM_MIN, default_open_cbk, NULL, loc, flags,
+ fd, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ ec_opendir(frame, this, -1, EC_MINIMUM_MIN, default_opendir_cbk, NULL, loc,
+ fd, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ ec_readdir(frame, this, -1, EC_MINIMUM_ONE, default_readdir_cbk, NULL, fd,
+ size, offset, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ ec_readdirp(frame, this, -1, EC_MINIMUM_ONE, default_readdirp_cbk, NULL, fd,
+ size, offset, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ ec_readlink(frame, this, -1, EC_MINIMUM_ONE, default_readlink_cbk, NULL,
+ loc, size, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ ec_readv(frame, this, -1, EC_MINIMUM_MIN, default_readv_cbk, NULL, fd, size,
+ offset, flags, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int error = 0;
+
+ EC_INTERNAL_XATTR_OR_GOTO(name, xdata, error, out);
+
+ ec_removexattr(frame, this, -1, EC_MINIMUM_MIN, default_removexattr_cbk,
+ NULL, loc, name, xdata);
+
+ return 0;
+out:
+ STACK_UNWIND_STRICT(removexattr, frame, -1, error, NULL);
+ return 0;
+}
+
+int32_t
+ec_gf_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int error = 0;
+
+ EC_INTERNAL_XATTR_OR_GOTO(name, xdata, error, out);
+
+ ec_fremovexattr(frame, this, -1, EC_MINIMUM_MIN, default_fremovexattr_cbk,
+ NULL, fd, name, xdata);
+
+ return 0;
+out:
+ STACK_UNWIND_STRICT(fremovexattr, frame, -1, error, NULL);
+ return 0;
+}
+
+int32_t
+ec_gf_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ ec_rename(frame, this, -1, EC_MINIMUM_MIN, default_rename_cbk, NULL, oldloc,
+ newloc, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
+{
+ ec_rmdir(frame, this, -1, EC_MINIMUM_MIN, default_rmdir_cbk, NULL, loc,
+ xflags, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ ec_setattr(frame, this, -1, EC_MINIMUM_MIN, default_setattr_cbk, NULL, loc,
+ stbuf, valid, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ ec_fsetattr(frame, this, -1, EC_MINIMUM_MIN, default_fsetattr_cbk, NULL, fd,
+ stbuf, valid, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int error = 0;
+
+ EC_INTERNAL_XATTR_OR_GOTO("", dict, error, out);
+
+ ec_setxattr(frame, this, -1, EC_MINIMUM_MIN, default_setxattr_cbk, NULL,
+ loc, dict, flags, xdata);
+
+ return 0;
+out:
+ STACK_UNWIND_STRICT(setxattr, frame, -1, error, NULL);
+ return 0;
+}
+
+int32_t
+ec_gf_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int error = 0;
+
+ EC_INTERNAL_XATTR_OR_GOTO("", dict, error, out);
+
+ ec_fsetxattr(frame, this, -1, EC_MINIMUM_MIN, default_fsetxattr_cbk, NULL,
+ fd, dict, flags, xdata);
+
+ return 0;
+out:
+ STACK_UNWIND_STRICT(fsetxattr, frame, -1, error, NULL);
+ return 0;
+}
+
+int32_t
+ec_gf_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ ec_stat(frame, this, -1, EC_MINIMUM_MIN, default_stat_cbk, NULL, loc,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ ec_fstat(frame, this, -1, EC_MINIMUM_MIN, default_fstat_cbk, NULL, fd,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ ec_statfs(frame, this, -1, EC_MINIMUM_MIN, default_statfs_cbk, NULL, loc,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ ec_symlink(frame, this, -1, EC_MINIMUM_MIN, default_symlink_cbk, NULL,
+ linkname, loc, umask, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ ec_truncate(frame, this, -1, EC_MINIMUM_MIN, default_truncate_cbk, NULL,
+ loc, offset, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ ec_ftruncate(frame, this, -1, EC_MINIMUM_MIN, default_ftruncate_cbk, NULL,
+ fd, offset, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
+{
+ ec_unlink(frame, this, -1, EC_MINIMUM_MIN, default_unlink_cbk, NULL, loc,
+ xflags, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ ec_writev(frame, this, -1, EC_MINIMUM_MIN, default_writev_cbk, NULL, fd,
+ vector, count, offset, flags, iobref, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ ec_xattrop(frame, this, -1, EC_MINIMUM_MIN, default_xattrop_cbk, NULL, loc,
+ optype, xattr, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ ec_fxattrop(frame, this, -1, EC_MINIMUM_MIN, default_fxattrop_cbk, NULL, fd,
+ optype, xattr, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ default_zerofill_failure_cbk(frame, ENOTSUP);
+
+ return 0;
+}
+
+int32_t
+ec_gf_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ ec_seek(frame, this, -1, EC_MINIMUM_ONE, default_seek_cbk, NULL, fd, offset,
+ what, xdata);
+
+ return 0;
+}
+
+int32_t
+ec_gf_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
+{
+ ec_ipc(frame, this, -1, EC_MINIMUM_MIN, default_ipc_cbk, NULL, op, xdata);
+ return 0;
+}
+
+int32_t
+ec_gf_forget(xlator_t *this, inode_t *inode)
+{
+ uint64_t value = 0;
+ ec_inode_t *ctx = NULL;
+
+ if ((inode_ctx_del(inode, this, &value) == 0) && (value != 0)) {
+ ctx = (ec_inode_t *)(uintptr_t)value;
+ /* We can only forget an inode if it has been unlocked, so the stripe
+ * cache should also be empty. */
+ GF_ASSERT(list_empty(&ctx->stripe_cache.lru));
+ GF_FREE(ctx);
+ }
+
+ return 0;
+}
+
+void
+ec_gf_release_fd(xlator_t *this, fd_t *fd)
+{
+ uint64_t value = 0;
+ ec_fd_t *ctx = NULL;
+
+ if ((fd_ctx_del(fd, this, &value) == 0) && (value != 0)) {
+ ctx = (ec_fd_t *)(uintptr_t)value;
+ loc_wipe(&ctx->loc);
+ GF_FREE(ctx);
+ }
+}
+
+int32_t
+ec_gf_release(xlator_t *this, fd_t *fd)
+{
+ ec_gf_release_fd(this, fd);
+
+ return 0;
+}
+
+int32_t
+ec_gf_releasedir(xlator_t *this, fd_t *fd)
+{
+ ec_gf_release_fd(this, fd);
+
+ return 0;
+}
+
+int32_t
+ec_dump_private(xlator_t *this)
+{
+ ec_t *ec = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ char tmp[65];
+
+ GF_ASSERT(this);
+
+ ec = this->private;
+ GF_ASSERT(ec);
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+ gf_proc_dump_write("up", "%u", ec->up);
+ gf_proc_dump_write("nodes", "%u", ec->nodes);
+ gf_proc_dump_write("redundancy", "%u", ec->redundancy);
+ gf_proc_dump_write("fragment_size", "%u", ec->fragment_size);
+ gf_proc_dump_write("stripe_size", "%u", ec->stripe_size);
+ gf_proc_dump_write("childs_up", "%u", ec->xl_up_count);
+ gf_proc_dump_write("childs_up_mask", "%s",
+ ec_bin(tmp, sizeof(tmp), ec->xl_up, ec->nodes));
+ if (ec->read_mask) {
+ gf_proc_dump_write("read-mask", "%s",
+ ec_bin(tmp, sizeof(tmp), ec->read_mask, ec->nodes));
+ }
+ gf_proc_dump_write("background-heals", "%d", ec->background_heals);
+ gf_proc_dump_write("heal-wait-qlength", "%d", ec->heal_wait_qlen);
+ gf_proc_dump_write("self-heal-window-size", "%" PRIu32,
+ ec->self_heal_window_size);
+ gf_proc_dump_write("healers", "%d", ec->healers);
+ gf_proc_dump_write("heal-waiters", "%d", ec->heal_waiters);
+ gf_proc_dump_write("read-policy", "%s", ec_read_policies[ec->read_policy]);
+ gf_proc_dump_write("parallel-writes", "%d", ec->parallel_writes);
+ gf_proc_dump_write("quorum-count", "%u", ec->quorum_count);
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s.stats.stripe_cache",
+ this->type, this->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+
+ gf_proc_dump_write("hits", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.stripe_cache.hits));
+ gf_proc_dump_write("misses", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.stripe_cache.misses));
+ gf_proc_dump_write("updates", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.stripe_cache.updates));
+ gf_proc_dump_write("invalidations", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.stripe_cache.invals));
+ gf_proc_dump_write("evicts", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.stripe_cache.evicts));
+ gf_proc_dump_write("allocations", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.stripe_cache.allocs));
+ gf_proc_dump_write("errors", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.stripe_cache.errors));
+ gf_proc_dump_write("heals-attempted", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.attempted));
+ gf_proc_dump_write("heals-completed", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(ec->stats.shd.completed));
+
+ return 0;
+}
+
+struct xlator_fops fops = {.lookup = ec_gf_lookup,
+ .stat = ec_gf_stat,
+ .fstat = ec_gf_fstat,
+ .truncate = ec_gf_truncate,
+ .ftruncate = ec_gf_ftruncate,
+ .access = ec_gf_access,
+ .readlink = ec_gf_readlink,
+ .mknod = ec_gf_mknod,
+ .mkdir = ec_gf_mkdir,
+ .unlink = ec_gf_unlink,
+ .rmdir = ec_gf_rmdir,
+ .symlink = ec_gf_symlink,
+ .rename = ec_gf_rename,
+ .link = ec_gf_link,
+ .create = ec_gf_create,
+ .open = ec_gf_open,
+ .readv = ec_gf_readv,
+ .writev = ec_gf_writev,
+ .flush = ec_gf_flush,
+ .fsync = ec_gf_fsync,
+ .opendir = ec_gf_opendir,
+ .readdir = ec_gf_readdir,
+ .readdirp = ec_gf_readdirp,
+ .fsyncdir = ec_gf_fsyncdir,
+ .statfs = ec_gf_statfs,
+ .setxattr = ec_gf_setxattr,
+ .getxattr = ec_gf_getxattr,
+ .fsetxattr = ec_gf_fsetxattr,
+ .fgetxattr = ec_gf_fgetxattr,
+ .removexattr = ec_gf_removexattr,
+ .fremovexattr = ec_gf_fremovexattr,
+ .lk = ec_gf_lk,
+ .inodelk = ec_gf_inodelk,
+ .finodelk = ec_gf_finodelk,
+ .entrylk = ec_gf_entrylk,
+ .fentrylk = ec_gf_fentrylk,
+ .xattrop = ec_gf_xattrop,
+ .fxattrop = ec_gf_fxattrop,
+ .setattr = ec_gf_setattr,
+ .fsetattr = ec_gf_fsetattr,
+ .fallocate = ec_gf_fallocate,
+ .discard = ec_gf_discard,
+ .zerofill = ec_gf_zerofill,
+ .seek = ec_gf_seek,
+ .ipc = ec_gf_ipc};
+
+struct xlator_cbks cbks = {.forget = ec_gf_forget,
+ .release = ec_gf_release,
+ .releasedir = ec_gf_releasedir};
+
+struct xlator_dumpops dumpops = {.priv = ec_dump_private};
+
+struct volume_options options[] = {
+ {.key = {"redundancy"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "{{ volume.redundancy }}",
+ .description = "Maximum number of bricks that can fail "
+ "simultaneously without losing data."},
+ {
+ .key = {"self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "self-heal daemon enable/disable",
+ .default_value = "enable",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ },
+ {.key = {"iam-self-heal-daemon"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option differentiates if the disperse "
+ "translator is running as part of self-heal-daemon "
+ "or not."},
+ {.key = {"eager-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {GD_OP_VERSION_3_7_10},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description = "Enable/Disable eager lock for regular files on a "
+ "disperse volume. If a fop takes a lock and completes "
+ "its operation, it waits for next 1 second before "
+ "releasing the lock, to see if the lock can be reused "
+ "for next fop from the same client. If ec finds any lock "
+ "contention within 1 second it releases the lock "
+ "immediately before time expires. This improves the "
+ "performance of file operations. However, as it takes "
+ "lock on first brick, for few operations like read, "
+ "discovery of lock contention might take long time and "
+ "can actually degrade the performance. If eager lock is "
+ "disabled, lock will be released as soon as fop "
+ "completes."},
+ {.key = {"other-eager-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {GD_OP_VERSION_3_13_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description = "It's equivalent to the eager-lock option but for non "
+ "regular files."},
+ {.key = {"eager-lock-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 60,
+ .default_value = "1",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse", "locks", "timeout"},
+ .description = "Maximum time (in seconds) that a lock on an inode is "
+ "kept held if no new operations on the inode are "
+ "received."},
+ {.key = {"other-eager-lock-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 60,
+ .default_value = "1",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse", "locks", "timeout"},
+ .description = "It's equivalent to eager-lock-timeout option but for "
+ "non regular files."},
+ {
+ .key = {"background-heals"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0, /*Disabling background heals*/
+ .max = 256,
+ .default_value = "8",
+ .op_version = {GD_OP_VERSION_3_7_3},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description = "This option can be used to control number of parallel"
+ " heals",
+ },
+ {
+ .key = {"heal-wait-qlength"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max =
+ 65536, /*Around 100MB as of now with sizeof(ec_fop_data_t) at 1800*/
+ .default_value = "128",
+ .op_version = {GD_OP_VERSION_3_7_3},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description = "This option can be used to control number of heals"
+ " that can wait",
+ },
+ {.key = {"heal-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 60,
+ .max = INT_MAX,
+ .default_value = "600",
+ .op_version = {GD_OP_VERSION_3_7_3},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"disperse"},
+ .description = "time interval for checking the need to self-heal "
+ "in self-heal-daemon"},
+ {
+ .key = {"read-policy"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"round-robin", "gfid-hash"},
+ .default_value = "gfid-hash",
+ .op_version = {GD_OP_VERSION_3_7_6},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description =
+ "inode-read fops happen only on 'k' number of bricks in"
+ " n=k+m disperse subvolume. 'round-robin' selects the read"
+ " subvolume using round-robin algo. 'gfid-hash' selects read"
+ " subvolume based on hash of the gfid of that file/directory.",
+ },
+ {.key = {"shd-max-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 64,
+ .default_value = "1",
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description = "Maximum number of parallel heals SHD can do per local "
+ "brick. This can substantially lower heal times, "
+ "but can also crush your bricks if you don't have "
+ "the storage hardware to support this."},
+ {.key = {"shd-wait-qlength"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 65536,
+ .default_value = "1024",
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description = "This option can be used to control number of heals"
+ " that can wait in SHD per subvolume"},
+ {.key = {"cpu-extensions"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"none", "auto", "x64", "sse", "avx"},
+ .default_value = "auto",
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description = "force the cpu extensions to be used to accelerate the "
+ "galois field computations."},
+ {.key = {"self-heal-window-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 1,
+ .max = 1024,
+ .default_value = "1",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"disperse"},
+ .description = "Maximum number blocks(128KB) per file for which "
+ "self-heal process would be applied simultaneously."},
+ {.key = {"optimistic-change-log"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {GD_OP_VERSION_3_10_1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT,
+ .tags = {"disperse"},
+ .description = "Set/Unset dirty flag for every update fop at the start"
+ "of the fop. If OFF, this option impacts performance of"
+ "entry operations or metadata operations as it will"
+ "set dirty flag at the start and unset it at the end of"
+ "ALL update fop. If ON and all the bricks are good,"
+ "dirty flag will be set at the start only for file fops"
+ "For metadata and entry fops dirty flag will not be set"
+ "at the start, if all the bricks are good. This does"
+ "not impact performance for metadata operations and"
+ "entry operation but has a very small window to miss"
+ "marking entry as dirty in case it is required to be"
+ "healed"},
+ {.key = {"parallel-writes"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "This controls if writes can be wound in parallel as long"
+ "as it doesn't modify same stripes"},
+ {.key = {"stripe-cache"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0, /*Disabling stripe_cache*/
+ .max = EC_STRIPE_CACHE_MAX_SIZE,
+ .default_value = "4",
+ .description = "This option will keep the last stripe of write fop"
+ "in memory. If next write falls in this stripe, we need"
+ "not to read it again from backend and we can save READ"
+ "fop going over the network. This will improve performance,"
+ "specially for sequential writes. However, this will also"
+ "lead to extra memory consumption, maximum "
+ "(cache size * stripe size) Bytes per open file."},
+ {
+ .key = {"quorum-count"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "0",
+ .description =
+ "This option can be used to define how many successes on"
+ "the bricks constitute a success to the application. This"
+ " count should be in the range"
+ "[disperse-data-count, disperse-count] (inclusive)",
+ },
+ {
+ .key = {"ec-read-mask"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = NULL,
+ .description = "This option can be used to choose which bricks can be"
+ " used for reading data/metadata of a file/directory",
+ },
+ {
+ .key = {NULL},
+ },
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "disperse",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/cluster/ec/src/ec.h b/xlators/cluster/ec/src/ec.h
new file mode 100644
index 00000000000..6f6de6d5981
--- /dev/null
+++ b/xlators/cluster/ec/src/ec.h
@@ -0,0 +1,34 @@
+/*
+ Copyright (c) 2012-2014 DataLab, s.l. <http://www.datalab.es>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __EC_H__
+#define __EC_H__
+
+#include "ec-method.h"
+
+#define EC_XATTR_PREFIX "trusted.ec."
+#define EC_XATTR_CONFIG EC_XATTR_PREFIX "config"
+#define EC_XATTR_SIZE EC_XATTR_PREFIX "size"
+#define EC_XATTR_VERSION EC_XATTR_PREFIX "version"
+#define EC_XATTR_HEAL EC_XATTR_PREFIX "heal"
+#define EC_XATTR_HEAL_NEW EC_XATTR_PREFIX "heal-new"
+#define EC_XATTR_DIRTY EC_XATTR_PREFIX "dirty"
+#define EC_STRIPE_CACHE_MAX_SIZE 10
+#define EC_VERSION_SIZE 2
+#define EC_SHD_INODE_LRU_LIMIT 10
+
+#define EC_MAX_FRAGMENTS EC_METHOD_MAX_FRAGMENTS
+/* The maximum number of nodes is derived from the maximum allowed fragments
+ * using the rule that redundancy cannot be equal or greater than the number
+ * of fragments.
+ */
+#define EC_MAX_NODES min(EC_MAX_FRAGMENTS * 2 - 1, EC_METHOD_MAX_NODES)
+
+#endif /* __EC_H__ */
diff --git a/xlators/cluster/ha/Makefile.am b/xlators/cluster/ha/Makefile.am
deleted file mode 100644
index d471a3f9243..00000000000
--- a/xlators/cluster/ha/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/cluster/ha/src/Makefile.am b/xlators/cluster/ha/src/Makefile.am
deleted file mode 100644
index 5f78a296533..00000000000
--- a/xlators/cluster/ha/src/Makefile.am
+++ /dev/null
@@ -1,15 +0,0 @@
-xlator_LTLIBRARIES = ha.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/cluster
-
-ha_la_LDFLAGS = -module -avoidversion
-
-ha_la_SOURCES = ha-helpers.c ha.c
-ha_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = ha.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/cluster/ha/src/ha-helpers.c b/xlators/cluster/ha/src/ha-helpers.c
deleted file mode 100644
index 1e4af1b622f..00000000000
--- a/xlators/cluster/ha/src/ha-helpers.c
+++ /dev/null
@@ -1,204 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include "xlator.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "dict.h"
-#include "compat-errno.h"
-#include "ha.h"
-
-#define HA_TRANSPORT_NOTCONN(_ret, _errno, _fd) \
- ((_ret == -1) && (_fd ? (_errno == EBADFD):(_errno == ENOTCONN)))
-
-int ha_alloc_init_fd (call_frame_t *frame, fd_t *fd)
-{
- ha_local_t *local = NULL;
- int i = -1;
- ha_private_t *pvt = NULL;
- int child_count = 0;
- int ret = -1;
- hafd_t *hafdp = NULL;
- xlator_t *this = NULL;
- uint64_t tmp_hafdp = 0;
-
- this = frame->this;
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
-
- if (local == NULL) {
- ret = fd_ctx_get (fd, this, &tmp_hafdp);
- if (ret < 0) {
- goto out;
- }
- hafdp = (hafd_t *)(long)tmp_hafdp;
- local = frame->local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (local == NULL) {
- ret = -ENOMEM;
- goto out;
- }
- local->state = GF_CALLOC (1, child_count,
- gf_ha_mt_child_count);
- if (local->state == NULL) {
- ret = -ENOMEM;
- goto out;
- }
-
- /* take care of the preferred subvolume */
- if (pvt->pref_subvol == -1)
- local->active = hafdp->active;
- else
- local->active = pvt->pref_subvol;
-
- LOCK (&hafdp->lock);
- memcpy (local->state, hafdp->fdstate, child_count);
- UNLOCK (&hafdp->lock);
-
- /* in case the preferred subvolume is down */
- if ((local->active != -1) && (local->state[local->active] == 0))
- local->active = -1;
-
- for (i = 0; i < child_count; i++) {
- if (local->state[i]) {
- if (local->active == -1)
- local->active = i;
- local->tries++;
- }
- }
- if (local->active == -1) {
- ret = -ENOTCONN;
- goto out;
- }
- local->fd = fd_ref (fd);
- }
- ret = 0;
-out:
- return ret;
-}
-
-int ha_handle_cbk (call_frame_t *frame, void *cookie, int op_ret, int op_errno)
-{
- xlator_t *xl = NULL;
- ha_private_t *pvt = NULL;
- xlator_t **children = NULL;
- int prev_child = -1;
- hafd_t *hafdp = NULL;
- int ret = -1;
- call_stub_t *stub = NULL;
- ha_local_t *local = NULL;
- uint64_t tmp_hafdp = 0;
-
- xl = frame->this;
- pvt = xl->private;
- children = pvt->children;
- prev_child = (long) cookie;
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log (xl->name, GF_LOG_ERROR ,"(child=%s) (op_ret=%d op_errno=%s)",
- children[prev_child]->name, op_ret, strerror (op_errno));
- }
-
- if (HA_TRANSPORT_NOTCONN (op_ret, op_errno, (local->fd))) {
- ret = 0;
- if (local->fd) {
- ret = fd_ctx_get (local->fd, xl, &tmp_hafdp);
- }
- hafdp = (hafd_t *)(long)tmp_hafdp;
- if (ret == 0) {
- if (local->fd) {
- LOCK(&hafdp->lock);
- hafdp->fdstate[prev_child] = 0;
- UNLOCK(&hafdp->lock);
- }
- local->tries--;
- if (local->tries != 0) {
- while (1) {
- local->active = (local->active + 1) % pvt->child_count;
- if (local->state[local->active])
- break;
- }
- stub = local->stub;
- local->stub = NULL;
- call_resume (stub);
- return -1;
- }
- }
- }
- if (local->stub) {
- call_stub_destroy (local->stub);
- local->stub = NULL;
- }
-
- if (local->fd) {
- GF_FREE (local->state);
- local->state = NULL;
-
- fd_unref (local->fd);
- local->fd = NULL;
- }
- return 0;
-}
-
-int ha_alloc_init_inode (call_frame_t *frame, inode_t *inode)
-{
- int i = -1;
- ha_private_t *pvt = NULL;
- xlator_t *xl = NULL;
- int ret = -1;
- ha_local_t *local = NULL;
- uint64_t tmp_state = 0;
-
- xl = frame->this;
- pvt = xl->private;
- local = frame->local;
-
- if (local == NULL) {
- local = frame->local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (local == NULL) {
- ret = -ENOMEM;
- goto out;
- }
- local->active = pvt->pref_subvol;
- ret = inode_ctx_get (inode, xl, &tmp_state);
- if (ret < 0) {
- goto out;
- }
- local->state = (char *)(long)tmp_state;
- if (local->active != -1 && local->state[local->active] == 0)
- local->active = -1;
- for (i = 0; i < pvt->child_count; i++) {
- if (local->state[i]) {
- if (local->active == -1)
- local->active = i;
- local->tries++;
- }
- }
- if (local->active == -1) {
- ret = -ENOTCONN;
- goto out;
- }
- }
- ret = 0;
-out:
- return ret;
-}
diff --git a/xlators/cluster/ha/src/ha-mem-types.h b/xlators/cluster/ha/src/ha-mem-types.h
deleted file mode 100644
index 9bfb3972b27..00000000000
--- a/xlators/cluster/ha/src/ha-mem-types.h
+++ /dev/null
@@ -1,37 +0,0 @@
-
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __HA_MEM_TYPES_H__
-#define __HA_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_ha_mem_types_ {
- gf_ha_mt_ha_local_t = gf_common_mt_end + 1,
- gf_ha_mt_hafd_t,
- gf_ha_mt_char,
- gf_ha_mt_child_count,
- gf_ha_mt_xlator_t,
- gf_ha_mt_ha_private_t,
- gf_ha_mt_end
-};
-#endif
-
diff --git a/xlators/cluster/ha/src/ha.c b/xlators/cluster/ha/src/ha.c
deleted file mode 100644
index 38d4229d3cb..00000000000
--- a/xlators/cluster/ha/src/ha.c
+++ /dev/null
@@ -1,4023 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/* generate errors randomly, code is simple now, better alogorithm
- * can be written to decide what error to be returned and when
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "dict.h"
-#include "compat-errno.h"
-#include "ha.h"
-
-/*
- * TODO:
- * - dbench fails if ha over server side afr
- * - lock calls - lock on all subvols.
- * - support preferred-subvolume option. code already there.
- * - do not alloc the call-stub in case only one subvol is up.
- */
-
-void
-ha_local_wipe (ha_local_t *local)
-{
- if (local->stub) {
- call_stub_destroy (local->stub);
- local->stub = NULL;
- }
-
- if (local->state) {
- GF_FREE (local->state);
- local->state = NULL;
- }
-
- if (local->dict) {
- dict_unref (local->dict);
- local->dict = NULL;
- }
-
- loc_wipe (&local->loc);
-
- if (local->fd) {
- fd_unref (local->fd);
- local->fd = NULL;
- }
-
- if (local->inode) {
- inode_unref (local->inode);
- local->inode = NULL;
- }
-
- GF_FREE (local);
- return;
-}
-
-
-int
-ha_forget (xlator_t *this,
- inode_t *inode)
-{
- uint64_t stateino = 0;
- char *state = NULL;
- if (!inode_ctx_del (inode, this, &stateino)) {
- state = ((char *)(long)stateino);
- GF_FREE (state);
- }
-
- return 0;
-
-}
-
-int32_t
-ha_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- int child_count = 0, i = 0, callcnt = 0;
- char *state = NULL;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_state = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++) {
- if (pvt->children[i] == prev_frame->this)
- break;
- }
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_ERROR, "(child=%s) (op_ret=%d op_errno=%s)",
- children[i]->name, op_ret, strerror (op_errno));
- }
- inode_ctx_get (local->inode, this, &tmp_state);
- state = (char *)(long)tmp_state;
-
- LOCK (&frame->lock);
- if (local->revalidate == 1) {
- if ((!op_ret) != state[i]) {
- local->revalidate_error = 1;
- gf_log (this->name, GF_LOG_DEBUG, "revalidate error on %s",
- pvt->children[i]->name);
- }
- } else {
- if (op_ret == 0) {
- state[i] = 1;
- }
- }
- if (local->op_ret == -1 && op_ret == 0) {
- local->op_ret = 0;
- local->buf = *buf;
- local->postparent = *postparent;
- if (dict)
- local->dict = dict_ref (dict);
- }
- if (op_ret == -1 && op_ret != ENOTCONN)
- local->op_errno = op_errno;
- callcnt = --local->call_count;
- UNLOCK (&frame->lock);
-
- if (callcnt == 0) {
- dict_t *ctx = local->dict;
- inode_t *inode = local->inode;
- if (local->revalidate_error == 1) {
- local->op_ret = -1;
- local->op_errno = EIO;
- gf_log (this->name, GF_LOG_DEBUG, "revalidate error, returning EIO");
- }
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- inode,
- &local->buf,
- ctx,
- &local->postparent);
- if (inode)
- inode_unref (inode);
- if (ctx)
- dict_unref (ctx);
- }
- return 0;
-}
-
-int32_t
-ha_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- int child_count = 0, i = 0;
- char *state = NULL;
- xlator_t **children = NULL;
- int ret = -1;
- int32_t op_errno = EINVAL;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- children = pvt->children;
-
- frame->local = local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto unwind;
- }
-
- child_count = pvt->child_count;
- local->inode = inode_ref (loc->inode);
-
- ret = inode_ctx_get (loc->inode, this, NULL);
- if (ret) {
- state = GF_CALLOC (1, child_count, gf_ha_mt_child_count);
- if (state == NULL) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto unwind;
- }
-
- inode_ctx_put (loc->inode, this, (uint64_t)(long)state);
- } else
- local->revalidate = 1;
-
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->call_count = child_count;
-
- for (i = 0; i < child_count; i++) {
- STACK_WIND (frame,
- ha_lookup_cbk,
- children[i],
- children[i]->fops->lookup,
- loc,
- xattr_req);
- }
- return 0;
-
-unwind:
- local = frame->local;
- frame->local = NULL;
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_stat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- }
- return 0;
-}
-
-int32_t
-ha_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- ha_local_t *local = NULL;
- int op_errno = ENOTCONN;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_stat_stub (frame, ha_stat, loc);
-
- STACK_WIND_COOKIE (frame,
- ha_stat_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->stat,
- loc);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
-}
-
-int32_t
-ha_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame, op_ret, op_errno, statpre, statpost);
- }
- return 0;
-}
-
-
-int32_t
-ha_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
- int32_t valid)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_setattr_stub (frame, ha_setattr, loc, stbuf, valid);
-
- STACK_WIND_COOKIE (frame,
- ha_setattr_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->setattr,
- loc, stbuf, valid);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-ha_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
- int32_t valid)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_fsetattr_stub (frame, ha_fsetattr, fd, stbuf, valid);
-
- STACK_WIND_COOKIE (frame,
- ha_setattr_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->fsetattr,
- fd, stbuf, valid);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-ha_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- prebuf,
- postbuf);
- }
- return 0;
-}
-
-int32_t
-ha_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_truncate_stub (frame, ha_truncate, loc, offset);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_truncate_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->truncate,
- loc,
- offset);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
- int32_t
-ha_ftruncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- prebuf, postbuf);
- }
- return 0;
-}
-
-int32_t
-ha_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_ftruncate_stub (frame, ha_ftruncate, fd, offset);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_ftruncate_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->ftruncate,
- fd,
- offset);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
-int32_t
-ha_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- }
- return 0;
-}
-
-int32_t
-ha_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_access_stub (frame, ha_access, loc, mask);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_access_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->access,
- loc,
- mask);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno);
-
- ha_local_wipe (local);
- return 0;
-}
-
-
- int32_t
-ha_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *sbuf)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- path,
- sbuf);
- }
- return 0;
-}
-
-int32_t
-ha_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
-{
- ha_local_t *local = frame->local;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_readlink_stub (frame, ha_readlink, loc, size);
-
- STACK_WIND_COOKIE (frame,
- ha_readlink_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->readlink,
- loc,
- size);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int
-ha_mknod_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- int child_count = 0, i = 0, cnt = 0, ret = 0;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++)
- if (prev_frame->this == children[i])
- break;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "(path=%s) (op_ret=%d op_errno=%d)",
- local->stub->args.mknod.loc.path, op_ret, op_errno);
- }
- ret = inode_ctx_get (local->stub->args.mknod.loc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "unwind(-1), inode_ctx_get() error");
- /* It is difficult to handle this error at this stage
- * as we still expect more cbks, we can't return as
- * of now
- */
- } else if (op_ret == 0) {
- stateino[i] = 1;
- }
- LOCK (&frame->lock);
- cnt = --local->call_count;
- UNLOCK (&frame->lock);
-
- if (cnt == 0) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- local->stub->args.mknod.loc.inode,
- &local->buf, &local->preparent,
- &local->postparent);
- call_stub_destroy (stub);
- }
- return 0;
-}
-
-int32_t
-ha_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- int child_count = 0, i = 0, cnt = 0, ret = 0;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++)
- if (prev_frame->this == children[i])
- break;
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.mknod.loc.path, op_ret, op_errno);
- }
-
- ret = inode_ctx_get (local->stub->args.mknod.loc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "inode_ctx_get() error");
- /* FIXME: handle the case */
- }
- if (op_ret == 0) {
- stateino[i] = 1;
- local->op_ret = 0;
- local->first_success = 1;
- local->buf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
- }
- cnt = --local->call_count;
- for (i = local->active + 1; i < child_count; i++) {
- if (local->state[i])
- break;
- }
-
- if (cnt == 0 || i == child_count) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- stub = local->stub;
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->stub->args.mknod.loc.inode, &local->buf,
- &local->preparent, &local->postparent);
- call_stub_destroy (stub);
- return 0;
- }
-
- local->active = i;
-
- if (local->first_success == 0) {
- STACK_WIND (frame,
- ha_mknod_cbk,
- children[i],
- children[i]->fops->mknod,
- &local->stub->args.mknod.loc,
- local->stub->args.mknod.mode,
- local->stub->args.mknod.rdev);
- return 0;
- }
- cnt = local->call_count;
-
- for (; i < child_count; i++) {
- if (local->state[i]) {
- STACK_WIND (frame,
- ha_mknod_lookup_cbk,
- children[i],
- children[i]->fops->lookup,
- &local->stub->args.mknod.loc,
- 0);
- if (--cnt == 0)
- break;
- }
- }
- return 0;
-}
-
-int32_t
-ha_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t rdev)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- int child_count = 0, i = 0;
- char *stateino = NULL;
- int32_t op_errno = EINVAL;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
-
- frame->local = local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->stub = fop_mknod_stub (frame, ha_mknod, loc, mode, rdev);
- if (!local->stub) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!local->state) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- memcpy (local->state, pvt->state, child_count);
- local->active = -1;
-
- stateino = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!stateino) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
-
- for (i = 0; i < child_count; i++) {
- if (local->state[i]) {
- local->call_count++;
- if (local->active == -1)
- local->active = i;
- }
- }
-
- STACK_WIND (frame,
- ha_mknod_cbk,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->mknod,
- loc, mode, rdev);
- return 0;
-
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
- ha_local_wipe (local);
- return 0;
-}
-
-
-int
-ha_mkdir_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- int child_count = 0, i = 0, cnt = 0;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++)
- if (prev_frame->this == children[i])
- break;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.mkdir.loc.path, op_ret, op_errno);
- }
- inode_ctx_get (local->stub->args.mkdir.loc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (op_ret == 0)
- stateino[i] = 1;
-
- LOCK (&frame->lock);
- cnt = --local->call_count;
- UNLOCK (&frame->lock);
-
- if (cnt == 0) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- local->stub->args.mkdir.loc.inode, &local->buf,
- &local->preparent, &local->postparent);
- call_stub_destroy (stub);
- }
- return 0;
-}
-
-int32_t
-ha_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- int child_count = 0, i = 0, cnt = 0;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++)
- if (prev_frame->this == children[i])
- break;
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.mkdir.loc.path, op_ret, op_errno);
- }
-
- inode_ctx_get (local->stub->args.mkdir.loc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (op_ret == 0) {
- stateino[i] = 1;
- local->op_ret = 0;
- local->first_success = 1;
- local->buf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
- }
- cnt = --local->call_count;
- for (i = local->active + 1; i < child_count; i++) {
- if (local->state[i])
- break;
- }
-
- if (cnt == 0 || i == child_count) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- stub = local->stub;
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->stub->args.mkdir.loc.inode, &local->buf,
- &local->preparent, &local->postparent);
- call_stub_destroy (stub);
- return 0;
- }
-
- local->active = i;
-
- if (local->first_success == 0) {
- STACK_WIND (frame,
- ha_mkdir_cbk,
- children[i],
- children[i]->fops->mkdir,
- &local->stub->args.mkdir.loc,
- local->stub->args.mkdir.mode);
- return 0;
- }
- cnt = local->call_count;
-
- for (; i < child_count; i++) {
- if (local->state[i]) {
- STACK_WIND (frame,
- ha_mkdir_lookup_cbk,
- children[i],
- children[i]->fops->lookup,
- &local->stub->args.mkdir.loc,
- 0);
- if (--cnt == 0)
- break;
- }
- }
- return 0;
-}
-
-int32_t
-ha_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- int child_count = 0, i = 0;
- char *stateino = NULL;
- int32_t op_errno = EINVAL;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
-
- frame->local = local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!frame->local) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->stub = fop_mkdir_stub (frame, ha_mkdir, loc, mode);
- if (!local->stub) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!local->state) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- memcpy (local->state, pvt->state, child_count);
- local->active = -1;
-
- stateino = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!stateino) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
- for (i = 0; i < child_count; i++) {
- if (local->state[i]) {
- local->call_count++;
- if (local->active == -1)
- local->active = i;
- }
- }
-
- STACK_WIND (frame,
- ha_mkdir_cbk,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->mkdir,
- loc, mode);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent);
- }
- return 0;
-}
-
-int32_t
-ha_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
-
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_unlink_stub (frame, ha_unlink, loc);
- if (!local->stub) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_unlink_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->unlink,
- loc);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
- int32_t
-ha_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- preparent,
- postparent);
- }
- return 0;
-}
-
-int32_t
-ha_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- ha_local_t *local = frame->local;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_rmdir_stub (frame, ha_rmdir, loc);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_rmdir_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->rmdir,
- loc);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int
-ha_symlink_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- int child_count = 0, i = 0, cnt = 0;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++)
- if (prev_frame->this == children[i])
- break;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.symlink.loc.path, op_ret, op_errno);
- }
- inode_ctx_get (local->stub->args.symlink.loc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (op_ret == 0)
- stateino[i] = 1;
-
- LOCK (&frame->lock);
- cnt = --local->call_count;
- UNLOCK (&frame->lock);
-
- if (cnt == 0) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- local->stub->args.symlink.loc.inode, &local->buf,
- &local->preparent, &local->postparent);
- call_stub_destroy (stub);
- }
- return 0;
-}
-
-int32_t
-ha_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- int child_count = 0, i = 0, cnt = 0;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++)
- if (prev_frame->this == children[i])
- break;
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.symlink.loc.path, op_ret, op_errno);
- }
- inode_ctx_get (local->stub->args.symlink.loc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (op_ret == 0) {
- stateino[i] = 1;
- local->op_ret = 0;
- local->first_success = 1;
- local->buf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
- }
- cnt = --local->call_count;
- for (i = local->active + 1; i < child_count; i++) {
- if (local->state[i])
- break;
- }
-
- if (cnt == 0 || i == child_count) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- stub = local->stub;
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->stub->args.symlink.loc.inode, &local->buf,
- &local->preparent, &local->postparent);
- call_stub_destroy (stub);
- return 0;
- }
-
- local->active = i;
-
- if (local->first_success == 0) {
- STACK_WIND (frame,
- ha_symlink_cbk,
- children[i],
- children[i]->fops->symlink,
- local->stub->args.symlink.linkname,
- &local->stub->args.symlink.loc);
- return 0;
- }
- cnt = local->call_count;
-
- for (; i < child_count; i++) {
- if (local->state[i]) {
- STACK_WIND (frame,
- ha_symlink_lookup_cbk,
- children[i],
- children[i]->fops->lookup,
- &local->stub->args.symlink.loc,
- 0);
- if (--cnt == 0)
- break;
- }
- }
- return 0;
-}
-
-int32_t
-ha_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkname,
- loc_t *loc)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- int child_count = 0, i = 0;
- char *stateino = NULL;
- int32_t op_errno = EINVAL;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
-
- frame->local = local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->stub = fop_symlink_stub (frame, ha_symlink, linkname, loc);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!local->state) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- memcpy (local->state, pvt->state, child_count);
- local->active = -1;
-
- stateino = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!stateino) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
-
- for (i = 0; i < child_count; i++) {
- if (local->state[i]) {
- local->call_count++;
- if (local->active == -1) {
- local->active = i;
- }
- }
- }
-
- STACK_WIND (frame,
- ha_symlink_cbk,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->symlink,
- linkname, loc);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame, op_ret, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent);
- }
- return 0;
-}
-
-int32_t
-ha_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, oldloc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_rename_stub (frame, ha_rename, oldloc, newloc);
- if (!local->stub) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_rename_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->rename,
- oldloc, newloc);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-int
-ha_link_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- int child_count = 0, i = 0, cnt = 0;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++)
- if (prev_frame->this == children[i])
- break;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.link.newloc.path, op_ret, op_errno);
- }
- inode_ctx_get (local->stub->args.link.newloc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (op_ret == 0)
- stateino[i] = 1;
-
- LOCK (&frame->lock);
- cnt = --local->call_count;
- UNLOCK (&frame->lock);
-
- if (cnt == 0) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- local->stub->args.link.oldloc.inode, &local->buf,
- &local->preparent, &local->postparent);
- call_stub_destroy (stub);
- }
- return 0;
-}
-
-int32_t
-ha_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- int child_count = 0, i = 0, cnt = 0;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- for (i = 0; i < child_count; i++)
- if (prev_frame->this == children[i])
- break;
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.link.newloc.path, op_ret, op_errno);
- }
- inode_ctx_get (local->stub->args.link.newloc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (op_ret == 0) {
- stateino[i] = 1;
- local->op_ret = 0;
- local->first_success = 1;
- local->buf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
- }
- cnt = --local->call_count;
- for (i = local->active + 1; i < child_count; i++) {
- if (local->state[i])
- break;
- }
-
- if (cnt == 0 || i == child_count) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- stub = local->stub;
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local->stub->args.link.oldloc.inode, &local->buf,
- &local->preparent, &local->postparent);
- call_stub_destroy (stub);
- return 0;
- }
-
- local->active = i;
-
- if (local->first_success == 0) {
- STACK_WIND (frame,
- ha_link_cbk,
- children[i],
- children[i]->fops->link,
- &local->stub->args.link.oldloc,
- &local->stub->args.link.newloc);
- return 0;
- }
- cnt = local->call_count;
-
- for (; i < child_count; i++) {
- if (local->state[i]) {
- STACK_WIND (frame,
- ha_link_lookup_cbk,
- children[i],
- children[i]->fops->lookup,
- &local->stub->args.link.newloc,
- 0);
- if (--cnt == 0)
- break;
- }
- }
- return 0;
-}
-
-int32_t
-ha_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- int child_count = 0, i = 0;
- char *stateino = NULL;
- int32_t ret = 0, op_errno = 0;
- uint64_t tmp_stateino = 0;
-
- ret = inode_ctx_get (newloc->inode, this, &tmp_stateino);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "dict_ptr_error()");
- }
- stateino = (char *)(long)tmp_stateino;
-
- if (stateino == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "newloc->inode's ctx is NULL, returning EINVAL");
- STACK_UNWIND (frame, -1, EINVAL, oldloc->inode, NULL, NULL,
- NULL);
- return 0;
- }
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
-
- frame->local = local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!frame->local) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->stub = fop_link_stub (frame, ha_link, oldloc, newloc);
- if (!local->stub) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!local->state) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- memcpy (local->state, pvt->state, child_count);
- local->active = -1;
-
- for (i = 0; i < child_count; i++) {
- if (local->state[i]) {
- local->call_count++;
- if (local->active == -1)
- local->active = i;
- }
- }
-
- STACK_WIND (frame,
- ha_link_cbk,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->link,
- oldloc,
- newloc);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-ha_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- int i, child_count = 0, cnt = 0, ret = 0;
- char *stateino = NULL;
- hafd_t *hafdp = NULL;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- uint64_t tmp_stateino = 0;
- uint64_t tmp_hafdp = 0;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- prev_frame = cookie;
- children = pvt->children;
-
- ret = inode_ctx_get (local->stub->args.create.loc.inode,
- this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "dict_to_ptr() error");
- /* FIXME: handle */
- }
- ret = fd_ctx_get (local->stub->args.create.fd, this, &tmp_hafdp);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "dict_to_ptr() error");
- /* FIXME: handle */
- }
- hafdp = (hafd_t *)(long)tmp_hafdp;
-
- for (i = 0; i < child_count; i++) {
- if (prev_frame->this == children[i])
- break;
- }
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR, "(path=%s) (op_ret=%d op_errno=%d)", local->stub->args.create.loc.path, op_ret, op_errno);
- }
- if (op_ret != -1) {
- stateino[i] = 1;
- hafdp->fdstate[i] = 1;
- if (local->op_ret == -1) {
- local->op_ret = 0;
- local->buf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
- local->first_success = 1;
- }
- local->stub->args.create.flags &= (~O_EXCL);
- }
- LOCK (&frame->lock);
- cnt = --local->call_count;
- UNLOCK (&frame->lock);
-
- for (i = local->active + 1; i < child_count; i++) {
- if (local->state[i])
- break;
- }
-
- if (cnt == 0 || i == child_count) {
- char *state = local->state;
- call_stub_t *stub = local->stub;
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- stub->args.create.fd,
- stub->args.create.loc.inode, &local->buf,
- &local->preparent, &local->postparent);
- GF_FREE (state);
- call_stub_destroy (stub);
- return 0;
- }
- local->active = i;
- cnt = local->call_count;
- for (; i < child_count; i++) {
- if (local->state[i]) {
- STACK_WIND (frame,
- ha_create_cbk,
- children[i],
- children[i]->fops->create,
- &local->stub->args.create.loc,
- local->stub->args.create.flags,
- local->stub->args.create.mode,
- local->stub->args.create.fd);
- if ((local->first_success == 0) || (cnt == 0))
- break;
- }
- }
- return 0;
-}
-
-int32_t
-ha_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode, fd_t *fd)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- int i, child_count = 0;
- char *stateino = NULL;
- xlator_t **children = NULL;
- hafd_t *hafdp = NULL;
- int32_t op_errno = EINVAL;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- children = pvt->children;
-
- if (local == NULL) {
- frame->local = local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->stub = fop_create_stub (frame, ha_create, loc, flags, mode, fd);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!local->state) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->active = -1;
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- memcpy (local->state, pvt->state, child_count);
-
- for (i = 0; i < pvt->child_count; i++) {
- if (local->state[i]) {
- local->call_count++;
- if (local->active == -1)
- local->active = i;
- }
- }
- /* FIXME handle active -1 */
- stateino = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!stateino) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- hafdp = GF_CALLOC (1, sizeof (*hafdp), gf_ha_mt_hafd_t);
- if (!hafdp) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- hafdp->fdstate = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!hafdp->fdstate) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- hafdp->path = gf_strdup(loc->path);
- if (!hafdp->path) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- LOCK_INIT (&hafdp->lock);
- fd_ctx_set (fd, this, (uint64_t)(long)hafdp);
- inode_ctx_put (loc->inode, this, (uint64_t)(long)stateino);
- }
-
- STACK_WIND (frame,
- ha_create_cbk,
- children[local->active],
- children[local->active]->fops->create,
- loc, flags, mode, fd);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- ha_local_wipe (local);
-
- if (stateino) {
- GF_FREE (stateino);
- stateino = NULL;
- }
-
- if (hafdp) {
- if (hafdp->fdstate) {
- GF_FREE (hafdp->fdstate);
- }
-
- if (hafdp->path) {
- GF_FREE (hafdp->path);
- }
-
- GF_FREE (hafdp);
- }
-
- return 0;
-}
-
- int32_t
-ha_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- xlator_t **children = NULL;
- int i = 0, child_count = 0, callcnt = 0, ret = 0;
- call_frame_t *prev_frame = NULL;
- hafd_t *hafdp = NULL;
- uint64_t tmp_hafdp = 0;
-
- local = frame->local;
- pvt = this->private;
- children = pvt->children;
- child_count = pvt->child_count;
- prev_frame = cookie;
-
- ret = fd_ctx_get (local->fd, this, &tmp_hafdp);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "dict_ptr_error()");
- }
- hafdp = (hafd_t *)(long)tmp_hafdp;
-
- for (i = 0; i < child_count; i++)
- if (children[i] == prev_frame->this)
- break;
- LOCK (&frame->lock);
- if (op_ret != -1) {
- hafdp->fdstate[i] = 1;
- local->op_ret = 0;
- }
- if (op_ret == -1 && op_errno != ENOTCONN)
- local->op_errno = op_errno;
- callcnt = --local->call_count;
- UNLOCK (&frame->lock);
-
- if (callcnt == 0) {
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- local->fd);
- }
- return 0;
-}
-
-int32_t
-ha_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd, int wbflags)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- xlator_t **children = NULL;
- int cnt = 0, i, child_count = 0, ret = 0;
- hafd_t *hafdp = NULL;
- uint64_t tmp_stateino = 0;
- int32_t op_errno = ENOMEM;
-
- local = frame->local;
- pvt = this->private;
- children = pvt->children;
- child_count = pvt->child_count;
-
-
- frame->local = local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->fd = fd;
-
- hafdp = GF_CALLOC (1, sizeof (*hafdp), gf_ha_mt_hafd_t);
- if (!hafdp) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- hafdp->fdstate = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!hafdp->fdstate) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- hafdp->path = gf_strdup (loc->path);
- if (!hafdp->path) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- hafdp->active = -1;
- if (pvt->pref_subvol == -1) {
- hafdp->active = fd->inode->ino % child_count;
- }
-
- LOCK_INIT (&hafdp->lock);
- fd_ctx_set (fd, this, (uint64_t)(long)hafdp);
- ret = inode_ctx_get (loc->inode, this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- for (i = 0; i < child_count; i++)
- if (stateino[i])
- cnt++;
- local->call_count = cnt;
- for (i = 0; i < child_count; i++) {
- if (stateino[i]) {
- STACK_WIND (frame,
- ha_open_cbk,
- children[i],
- children[i]->fops->open,
- loc, flags, fd, wbflags);
- if (--cnt == 0)
- break;
- }
- }
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, fd);
- if (hafdp) {
- if (hafdp->fdstate) {
- GF_FREE (hafdp->fdstate);
- hafdp->fdstate = NULL;
- }
-
- if (hafdp->path) {
- GF_FREE (hafdp->path);
- hafdp->path = NULL;
- }
-
- GF_FREE (hafdp);
- }
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref)
-{
- int ret = 0;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- vector,
- count,
- stbuf,
- iobref);
- }
- return 0;
-}
-
-int32_t
-ha_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_readv_stub (frame, ha_readv, fd, size, offset);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_readv_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->readv,
- fd,
- size,
- offset);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- int ret = 0;
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- prebuf,
- postbuf);
- }
- return 0;
-}
-
-int32_t
-ha_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off,
- struct iobref *iobref)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_writev_stub (frame, ha_writev, fd, vector, count, off,
- iobref);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_writev_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->writev,
- fd,
- vector,
- count,
- off,
- iobref);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int ret = 0;
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- }
- return 0;
-}
-
-int32_t
-ha_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_flush_stub (frame, ha_flush, fd);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_flush_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->flush,
- fd);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno);
-
- ha_local_wipe (local);
- return 0;
-}
-
-
- int32_t
-ha_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- int ret = 0;
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- }
- return 0;
-}
-
-int32_t
-ha_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_fsync_stub (frame, ha_fsync, fd, flags);
- if (!local->stub) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- op_errno = ENOMEM;
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_fsync_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->fsync,
- fd,
- flags);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_fstat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- int ret = 0;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- }
- return 0;
-}
-
-int32_t
-ha_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
-
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_fstat_stub (frame, ha_fstat, fd);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_fstat_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->fstat,
- fd);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
-int32_t
-ha_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- xlator_t **children = NULL;
- int i = 0, child_count = 0, callcnt = 0, ret = 0;
- call_frame_t *prev_frame = NULL;
- hafd_t *hafdp = NULL;
- uint64_t tmp_hafdp = 0;
-
- local = frame->local;
- pvt = this->private;
- children = pvt->children;
- child_count = pvt->child_count;
- prev_frame = cookie;
-
- ret = fd_ctx_get (local->fd, this, &tmp_hafdp);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "dict_ptr_error()");
- }
- hafdp = (hafd_t *)(long)tmp_hafdp;
-
- for (i = 0; i < child_count; i++)
- if (children[i] == prev_frame->this)
- break;
- LOCK (&frame->lock);
- if (op_ret != -1) {
- hafdp->fdstate[i] = 1;
- local->op_ret = 0;
- }
- if (op_ret == -1 && op_errno != ENOTCONN)
- local->op_errno = op_errno;
- callcnt = --local->call_count;
- UNLOCK (&frame->lock);
-
- if (callcnt == 0) {
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- local->fd);
- }
- return 0;
-}
-
-int32_t
-ha_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- char *stateino = NULL;
- xlator_t **children = NULL;
- int cnt = 0, i, child_count = 0, ret = 0;
- hafd_t *hafdp = NULL;
- uint64_t tmp_stateino = 0;
- int32_t op_errno = EINVAL;
-
- local = frame->local;
- pvt = this->private;
- children = pvt->children;
- child_count = pvt->child_count;
-
- frame->local = local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->fd = fd;
-
- hafdp = GF_CALLOC (1, sizeof (*hafdp), gf_ha_mt_hafd_t);
- if (!hafdp) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- hafdp->fdstate = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!hafdp->fdstate) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- hafdp->path = gf_strdup (loc->path);
- if (!hafdp->path) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- LOCK_INIT (&hafdp->lock);
- fd_ctx_set (fd, this, (uint64_t)(long)hafdp);
- ret = inode_ctx_get (loc->inode, this, &tmp_stateino);
- stateino = (char *)(long)tmp_stateino;
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "inode_ctx_get() error");
- }
- for (i = 0; i < child_count; i++)
- if (stateino[i])
- cnt++;
- local->call_count = cnt;
- for (i = 0; i < child_count; i++) {
- if (stateino[i]) {
- STACK_WIND (frame,
- ha_opendir_cbk,
- children[i],
- children[i]->fops->opendir,
- loc, fd);
- if (--cnt == 0)
- break;
- }
- }
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL);
- ha_local_wipe (local);
- if (hafdp) {
- if (hafdp->fdstate) {
- GF_FREE (hafdp->fdstate);
- hafdp->fdstate = NULL;
- }
-
- if (hafdp->path) {
- GF_FREE (hafdp->path);
- hafdp->path = NULL;
- }
-
- GF_FREE (hafdp);
- }
- return 0;
-}
-
- int32_t
-ha_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count)
-{
- int ret = 0;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- entries,
- count);
- }
- return 0;
-}
-
-int32_t
-ha_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_getdents_stub (frame, ha_getdents, fd, size, offset,
- flag);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_getdents_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->getdents,
- fd,
- size,
- offset,
- flag);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL, 0);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int ret = 0;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- }
- return 0;
-}
-
-int32_t
-ha_setdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags,
- dir_entry_t *entries,
- int32_t count)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
-
- local->stub = fop_setdents_stub (frame, ha_setdents, fd, flags, entries,
- count);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_setdents_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->setdents,
- fd,
- flags,
- entries,
- count);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int ret = 0;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- }
- return 0;
-}
-
-int32_t
-ha_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_fsyncdir_stub (frame, ha_fsyncdir, fd, flags);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_fsyncdir_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->fsyncdir,
- fd,
- flags);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
-
- int32_t
-ha_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf)
-{
- ha_local_t *local = NULL;
- ha_private_t *priv = NULL;
-
- local = frame->local;
- if (-1 == op_ret) {
- local->active = (local->active + 1) % priv->child_count;
- local->tries--;
- if (!local->tries)
- goto out;
-
- STACK_WIND (frame, ha_statfs_cbk,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->statfs,
- &local->loc);
- return 0;
- }
-
- out:
- loc_wipe (&local->loc);
- STACK_UNWIND (frame, op_ret, op_errno, buf);
-
- return 0;
-}
-
-int32_t
-ha_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- ha_private_t *priv = NULL;
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- /* The normal way of handling failover doesn't work here
- * as loc->inode may be null in this case.
- */
- local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- priv = this->private;
- frame->local = local;
- local->active = priv->pref_subvol;
- if (-1 == local->active)
- local->active = 0;
- local->tries = priv->child_count;
- loc_copy (&local->loc, loc);
-
- STACK_WIND (frame, ha_statfs_cbk, HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->statfs, loc);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
- return 0;
-}
-
- int32_t
-ha_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- }
- return 0;
-}
-
-int32_t
-ha_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_setxattr_stub (frame, ha_setxattr, loc, dict, flags);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_setxattr_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->setxattr,
- loc,
- dict,
- flags);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
-
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- dict);
- }
- return 0;
-}
-
-int32_t
-ha_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_getxattr_stub (frame, ha_getxattr, loc, name);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_getxattr_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->getxattr,
- loc,
- name);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
-int32_t
-ha_xattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- int ret = -1;
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- }
- return 0;
-}
-
-
-int32_t
-ha_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
-
- local->stub = fop_xattrop_stub (frame, ha_xattrop, loc, flags, dict);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_xattrop_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->xattrop,
- loc,
- flags,
- dict);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, dict);
-
- ha_local_wipe (local);
- return 0;
-}
-
-int32_t
-ha_fxattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- int ret = -1;
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0)
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
-}
-
-int32_t
-ha_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_fxattrop_stub (frame, ha_fxattrop, fd, flags, dict);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_fxattrop_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->fxattrop,
- fd,
- flags,
- dict);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, dict);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int ret = -1;
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- }
- return 0;
-}
-
-int32_t
-ha_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
-
- local->stub = fop_removexattr_stub (frame, ha_removexattr, loc, name);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_removexattr_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->removexattr,
- loc,
- name);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno);
-
- ha_local_wipe (local);
- return 0;
-}
-
-int32_t
-ha_lk_setlk_unlck_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *lock)
-{
- ha_local_t *local = NULL;
- int cnt = 0;
- call_stub_t *stub = NULL;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- cnt = --local->call_count;
- if (op_ret == 0)
- local->op_ret = 0;
- UNLOCK (&frame->lock);
-
- if (cnt == 0) {
- stub = local->stub;
- GF_FREE (local->state);
- if (stub->args.lk.lock.l_type == F_UNLCK) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno, &stub->args.lk.lock);
- } else {
- STACK_UNWIND (frame, -1, EIO, NULL);
- }
- call_stub_destroy (stub);
- }
- return 0;
-}
-
-int32_t
-ha_lk_setlk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *lock)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- xlator_t **children = NULL;
- int i = 0, cnt = 0, j = 0;
- int child_count = 0;
- call_frame_t *prev_frame = NULL;
- char *state = NULL;
-
- local = frame->local;
- pvt = this->private;
- children = pvt->children;
- child_count = pvt->child_count;
- prev_frame = cookie;
- state = local->state;
-
- if (op_ret == 0)
- local->op_ret = 0;
-
- if ((op_ret == 0) || (op_ret == -1 && op_errno == ENOTCONN)) {
- for (i = 0; i < child_count; i++) {
- if (prev_frame->this == cookie)
- break;
- }
- i++;
- for (; i < child_count; i++) {
- if (local->state[i])
- break;
- }
- if (i == child_count) {
- call_stub_t *stub = local->stub;
- GF_FREE (local->state);
- STACK_UNWIND (frame, 0, op_errno, &stub->args.lk.lock);
- call_stub_destroy (stub);
- return 0;
- }
- STACK_WIND (frame,
- ha_lk_setlk_cbk,
- children[i],
- children[i]->fops->lk,
- local->stub->args.lk.fd,
- local->stub->args.lk.cmd,
- &local->stub->args.lk.lock);
- return 0;
- } else {
- for (i = 0; i < child_count; i++) {
- if (prev_frame->this == cookie)
- break;
- }
- cnt = 0;
- for (j = 0; j < i; j++) {
- if (state[i])
- cnt++;
- }
- if (cnt) {
- struct gf_flock lock;
- lock = local->stub->args.lk.lock;
- for (i = 0; i < child_count; i++) {
- if (state[i]) {
- STACK_WIND (frame,
- ha_lk_setlk_unlck_cbk,
- children[i],
- children[i]->fops->lk,
- local->stub->args.lk.fd,
- local->stub->args.lk.cmd,
- &lock);
- if (--cnt == 0)
- break;
- }
- }
- return 0;
- } else {
- GF_FREE (local->state);
- call_stub_destroy (local->stub);
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- lock);
- return 0;
- }
- }
-}
-
-int32_t
-ha_lk_getlk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *lock)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- fd_t *fd = NULL;
- int child_count = 0, i = 0;
- xlator_t **children = NULL;
- call_frame_t *prev_frame = NULL;
-
- local = frame->local;
- pvt = this->private;
- fd = local->stub->args.lk.fd;
- child_count = pvt->child_count;
- children = pvt->children;
- prev_frame = cookie;
-
- if (op_ret == 0) {
- GF_FREE (local->state);
- call_stub_destroy (local->stub);
- STACK_UNWIND (frame, 0, 0, lock);
- return 0;
- }
-
- for (i = 0; i < child_count; i++) {
- if (prev_frame->this == children[i])
- break;
- }
-
- for (; i < child_count; i++) {
- if (local->state[i])
- break;
- }
-
- if (i == child_count) {
- GF_FREE (local->state);
- call_stub_destroy (local->stub);
- STACK_UNWIND (frame, op_ret, op_errno, lock);
- return 0;
- }
-
- STACK_WIND (frame,
- ha_lk_getlk_cbk,
- children[i],
- children[i]->fops->lk,
- fd,
- local->stub->args.lk.cmd,
- &local->stub->args.lk.lock);
- return 0;
-}
-
-int32_t
-ha_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *lock)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- hafd_t *hafdp = NULL;
- char *state = NULL;
- int child_count = 0, i = 0, cnt = 0, ret = 0;
- xlator_t **children = NULL;
- uint64_t tmp_hafdp = 0;
- int32_t op_errno = EINVAL;
-
- local = frame->local;
- pvt = this->private;
- child_count = pvt->child_count;
- children = pvt->children;
- ret = fd_ctx_get (fd, this, &tmp_hafdp);
- if (ret < 0)
- gf_log (this->name, GF_LOG_ERROR, "fd_ctx_get failed");
-
- if (local == NULL) {
- local = frame->local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->active = -1;
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- }
- hafdp = (hafd_t *)(long)tmp_hafdp;
-
- if (local->active == -1) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- local->stub = fop_lk_stub (frame, ha_lk, fd, cmd, lock);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- local->state = GF_CALLOC (1, child_count, gf_ha_mt_char);
- if (!local->state) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- state = hafdp->fdstate;
- LOCK (&hafdp->lock);
- memcpy (local->state, state, child_count);
- UNLOCK (&hafdp->lock);
- if (cmd == F_GETLK) {
- for (i = 0; i < child_count; i++) {
- if (local->state[i])
- break;
- }
- STACK_WIND (frame,
- ha_lk_getlk_cbk,
- children[i],
- children[i]->fops->lk,
- fd,
- cmd,
- lock);
- } else if (cmd == F_SETLK && lock->l_type == F_UNLCK) {
- for (i = 0; i < child_count; i++) {
- if (local->state[i])
- local->call_count++;
- }
- cnt = local->call_count;
- for (i = 0; i < child_count; i++) {
- if (local->state[i]) {
- STACK_WIND (frame,
- ha_lk_setlk_unlck_cbk,
- children[i],
- children[i]->fops->lk,
- fd, cmd, lock);
- if (--cnt == 0)
- break;
- }
- }
- } else {
- for (i = 0; i < child_count; i++) {
- if (local->state[i])
- break;
- }
- STACK_WIND (frame,
- ha_lk_setlk_cbk,
- children[i],
- children[i]->fops->lk,
- fd,
- cmd,
- lock);
- }
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
- int32_t
-ha_inode_entry_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- }
- return 0;
-}
-
-int32_t
-ha_inodelk (call_frame_t *frame,
- xlator_t *this,
- const char *volume,
- loc_t *loc,
- int32_t cmd,
- struct gf_flock *lock)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_inodelk_stub (frame, ha_inodelk, volume,
- loc, cmd, lock);
- STACK_WIND_COOKIE (frame,
- ha_inode_entry_lk_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->inodelk,
- volume,
- loc,
- cmd,
- lock);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
-}
-
-int32_t
-ha_entrylk (call_frame_t *frame,
- xlator_t *this,
- const char *volume,
- loc_t *loc,
- const char *basename,
- entrylk_cmd cmd,
- entrylk_type type)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_entrylk_stub (frame, ha_entrylk, volume,
- loc, basename, cmd, type);
- STACK_WIND_COOKIE (frame,
- ha_inode_entry_lk_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->entrylk,
- volume, loc, basename, cmd, type);
- return 0;
-err:
- STACK_UNWIND (frame, -1, op_errno);
- return 0;
-}
-
- int32_t
-ha_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- int ret = -1;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0) {
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- file_checksum,
- dir_checksum);
- }
- return 0;
-}
-
-int32_t
-ha_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
-{
- int op_errno = 0;
- ha_local_t *local = NULL;
-
- op_errno = ha_alloc_init_inode (frame, loc->inode);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- local->stub = fop_checksum_stub (frame, ha_checksum, loc, flag);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- STACK_WIND_COOKIE (frame,
- ha_checksum_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->checksum,
- loc,
- flag);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
-int32_t
-ha_common_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
-{
- int ret = 0;
-
- ret = ha_handle_cbk (frame, cookie, op_ret, op_errno);
- if (ret == 0)
- STACK_UNWIND (frame, op_ret, op_errno, entries);
- return 0;
-}
-
-int32_t
-ha_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off);
-
-int32_t
-ha_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off);
-int32_t
-ha_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off, int whichop)
-{
- ha_local_t *local = NULL;
- int op_errno = 0;
-
- op_errno = ha_alloc_init_fd (frame, fd);
- if (op_errno < 0) {
- op_errno = -op_errno;
- goto err;
- }
- local = frame->local;
- if (whichop == GF_FOP_READDIR)
- local->stub = fop_readdir_stub (frame, ha_readdir, fd, size,
- off);
- else
- local->stub = fop_readdirp_stub (frame, ha_readdirp, fd, size,
- off);
- if (!local->stub) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- if (whichop == GF_FOP_READDIR)
- STACK_WIND_COOKIE (frame, ha_common_readdir_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->readdir,
- fd, size, off);
- else
- STACK_WIND_COOKIE (frame, ha_common_readdir_cbk,
- (void *)(long)local->active,
- HA_ACTIVE_CHILD(this, local),
- HA_ACTIVE_CHILD(this, local)->fops->readdirp,
- fd, size, off);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
-
- ha_local_wipe (local);
- return 0;
-}
-
-
-int32_t
-ha_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off)
-{
- ha_do_readdir (frame, this, fd, size, off, GF_FOP_READDIR);
- return 0;
-}
-
-int32_t
-ha_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off)
-{
- ha_do_readdir (frame, this, fd, size, off, GF_FOP_READDIRP);
- return 0;
-}
-
-/* Management operations */
-
- int32_t
-ha_stats_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct xlator_stats *stats)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- int i = 0;
-
- local = frame->local;
- pvt = this->private;
- prev_frame = cookie;
- children = pvt->children;
-
- if (op_ret == -1 && op_errno == ENOTCONN) {
- for (i = 0; i < pvt->child_count; i++) {
- if (prev_frame->this == children[i])
- break;
- }
- i++;
- for (; i < pvt->child_count; i++) {
- if (pvt->state[i])
- break;
- }
-
- if (i == pvt->child_count) {
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
- return 0;
- }
- STACK_WIND (frame,
- ha_stats_cbk,
- children[i],
- children[i]->mops->stats,
- local->flags);
- return 0;
- }
-
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- stats);
- return 0;
-}
-
-int32_t
-ha_stats (call_frame_t *frame,
- xlator_t *this,
- int32_t flags)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- xlator_t **children = NULL;
- int i = 0;
- int32_t op_errno = EINVAL;
-
- local = frame->local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- pvt = this->private;
- children = pvt->children;
- for (i = 0; i < pvt->child_count; i++) {
- if (pvt->state[i])
- break;
- }
-
- if (i == pvt->child_count) {
- op_errno = ENOTCONN;
- goto err;
- }
- local->flags = flags;
-
- STACK_WIND (frame,
- ha_stats_cbk,
- children[i],
- children[i]->mops->stats,
- flags);
- return 0;
-
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
-
- ha_local_wipe (local);
- return 0;
-
-}
-
-
-int32_t
-ha_getspec_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- char *spec_data)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- call_frame_t *prev_frame = NULL;
- xlator_t **children = NULL;
- int i = 0;
-
- local = frame->local;
- pvt = this->private;
- prev_frame = cookie;
- children = pvt->children;
-
- if (op_ret == -1 && op_errno == ENOTCONN) {
- for (i = 0; i < pvt->child_count; i++) {
- if (prev_frame->this == children[i])
- break;
- }
- i++;
- for (; i < pvt->child_count; i++) {
- if (pvt->state[i])
- break;
- }
-
- if (i == pvt->child_count) {
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
- return 0;
- }
- STACK_WIND (frame,
- ha_getspec_cbk,
- children[i],
- children[i]->mops->getspec,
- local->pattern,
- local->flags);
- return 0;
- }
-
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- spec_data);
- return 0;
-}
-
-int32_t
-ha_getspec (call_frame_t *frame,
- xlator_t *this,
- const char *key,
- int32_t flags)
-{
- ha_local_t *local = NULL;
- ha_private_t *pvt = NULL;
- xlator_t **children = NULL;
- int i = 0;
- int32_t op_errno = EINVAL;
-
- local = frame->local = GF_CALLOC (1, sizeof (*local),
- gf_ha_mt_ha_local_t);
- if (!local) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto err;
- }
-
- pvt = this->private;
- children = pvt->children;
-
- for (i = 0; i < pvt->child_count; i++) {
- if (pvt->state[i])
- break;
- }
-
- if (i == pvt->child_count) {
- op_errno = ENOTCONN;
- goto err;
- }
- local->flags = flags;
- local->pattern = (char *)key;
-
- STACK_WIND (frame,
- ha_getspec_cbk,
- children[i],
- children[i]->mops->getspec,
- key, flags);
- return 0;
-err:
- local = frame->local;
- frame->local = NULL;
-
- STACK_UNWIND (frame, -1, ENOTCONN, NULL);
-
- ha_local_wipe (local);
- return 0;
-
-}
-
-int32_t
-ha_closedir (xlator_t *this,
- fd_t *fd)
-{
- hafd_t *hafdp = NULL;
- int op_errno = 0;
- uint64_t tmp_hafdp = 0;
-
- op_errno = fd_ctx_del (fd, this, &tmp_hafdp);
- if (op_errno != 0) {
- gf_log (this->name, GF_LOG_ERROR, "fd_ctx_del() error");
- return 0;
- }
- hafdp = (hafd_t *)(long)tmp_hafdp;
-
- GF_FREE (hafdp->fdstate);
- GF_FREE (hafdp->path);
- LOCK_DESTROY (&hafdp->lock);
- return 0;
-}
-
-int32_t
-ha_close (xlator_t *this,
- fd_t *fd)
-{
- hafd_t *hafdp = NULL;
- int op_errno = 0;
- uint64_t tmp_hafdp = 0;
-
- op_errno = fd_ctx_del (fd, this, &tmp_hafdp);
- if (op_errno != 0) {
- gf_log (this->name, GF_LOG_ERROR, "fd_ctx_del() error");
- return 0;
- }
- hafdp = (hafd_t *)(long)tmp_hafdp;
-
- GF_FREE (hafdp->fdstate);
- GF_FREE (hafdp->path);
- LOCK_DESTROY (&hafdp->lock);
- return 0;
-}
-
-/* notify */
-int32_t
-notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
-{
- ha_private_t *pvt = NULL;
- int32_t i = 0, upcnt = 0;
-
- pvt = this->private;
- if (pvt == NULL) {
- gf_log (this->name, GF_LOG_DEBUG, "got notify before init()");
- return 0;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_DOWN:
- {
- for (i = 0; i < pvt->child_count; i++) {
- if (data == pvt->children[i])
- break;
- }
- gf_log (this->name, GF_LOG_DEBUG, "GF_EVENT_CHILD_DOWN from %s", pvt->children[i]->name);
- pvt->state[i] = 0;
- for (i = 0; i < pvt->child_count; i++) {
- if (pvt->state[i])
- break;
- }
- if (i == pvt->child_count) {
- default_notify (this, event, data);
- }
- }
- break;
- case GF_EVENT_CHILD_UP:
- {
- for (i = 0; i < pvt->child_count; i++) {
- if (data == pvt->children[i])
- break;
- }
-
- gf_log (this->name, GF_LOG_DEBUG, "GF_EVENT_CHILD_UP from %s", pvt->children[i]->name);
-
- pvt->state[i] = 1;
-
- for (i = 0; i < pvt->child_count; i++) {
- if (pvt->state[i])
- upcnt++;
- }
-
- if (upcnt == 1) {
- default_notify (this, event, data);
- }
- }
- break;
-
- default:
- {
- default_notify (this, event, data);
- }
- }
-
- return 0;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_ha_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-int
-init (xlator_t *this)
-{
- ha_private_t *pvt = NULL;
- xlator_list_t *trav = NULL;
- int count = 0, ret = 0;
-
-
- if (!this->children) {
- gf_log (this->name,GF_LOG_ERROR,
- "FATAL: ha should have one or more child defined");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- trav = this->children;
- pvt = GF_CALLOC (1, sizeof (ha_private_t), gf_ha_mt_ha_private_t);
-
- ret = dict_get_int32 (this->options, "preferred-subvolume",
- &pvt->pref_subvol);
- if (ret < 0) {
- pvt->pref_subvol = -1;
- }
-
- trav = this->children;
- while (trav) {
- count++;
- trav = trav->next;
- }
-
- pvt->child_count = count;
- pvt->children = GF_CALLOC (count, sizeof (xlator_t*),
- gf_ha_mt_xlator_t);
-
- trav = this->children;
- count = 0;
- while (trav) {
- pvt->children[count] = trav->xlator;
- count++;
- trav = trav->next;
- }
-
- pvt->state = GF_CALLOC (1, count, gf_ha_mt_char);
- this->private = pvt;
- return 0;
-}
-
-void
-fini (xlator_t *this)
-{
- ha_private_t *priv = NULL;
- priv = this->private;
- GF_FREE (priv);
- return;
-}
-
-
-struct xlator_fops fops = {
- .lookup = ha_lookup,
- .stat = ha_stat,
- .readlink = ha_readlink,
- .mknod = ha_mknod,
- .mkdir = ha_mkdir,
- .unlink = ha_unlink,
- .rmdir = ha_rmdir,
- .symlink = ha_symlink,
- .rename = ha_rename,
- .link = ha_link,
- .truncate = ha_truncate,
- .create = ha_create,
- .open = ha_open,
- .readv = ha_readv,
- .writev = ha_writev,
- .statfs = ha_statfs,
- .flush = ha_flush,
- .fsync = ha_fsync,
- .setxattr = ha_setxattr,
- .getxattr = ha_getxattr,
- .removexattr = ha_removexattr,
- .opendir = ha_opendir,
- .readdir = ha_readdir,
- .readdirp = ha_readdirp,
- .getdents = ha_getdents,
- .fsyncdir = ha_fsyncdir,
- .access = ha_access,
- .ftruncate = ha_ftruncate,
- .fstat = ha_fstat,
- .lk = ha_lk,
- .setdents = ha_setdents,
- .lookup_cbk = ha_lookup_cbk,
- .checksum = ha_checksum,
- .xattrop = ha_xattrop,
- .fxattrop = ha_fxattrop,
- .setattr = ha_setattr,
- .fsetattr = ha_fsetattr,
-};
-
-struct xlator_cbks cbks = {
- .release = ha_close,
- .releasedir = ha_closedir,
- .forget = ha_forget,
-};
diff --git a/xlators/cluster/ha/src/ha.h b/xlators/cluster/ha/src/ha.h
deleted file mode 100644
index 39b6851e7c2..00000000000
--- a/xlators/cluster/ha/src/ha.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __HA_H_
-#define __HA_H_
-
-#include "ha-mem-types.h"
-
-typedef struct {
- call_stub_t *stub;
- int32_t op_ret, op_errno;
- int32_t active, tries, revalidate, revalidate_error;
- int32_t call_count;
- char *state, *pattern;
- dict_t *dict;
- loc_t loc;
- struct iatt buf;
- struct iatt postparent;
- struct iatt preparent;
- fd_t *fd;
- inode_t *inode;
- int32_t flags;
- int32_t first_success;
-} ha_local_t;
-
-typedef struct {
- char *state;
- xlator_t **children;
- int child_count, pref_subvol;
-} ha_private_t;
-
-typedef struct {
- char *fdstate;
- char *path;
- gf_lock_t lock;
- int active;
-} hafd_t;
-
-#define HA_ACTIVE_CHILD(this, local) (((ha_private_t *)this->private)->children[local->active])
-
-extern int ha_alloc_init_fd (call_frame_t *frame, fd_t *fd);
-
-extern int ha_handle_cbk (call_frame_t *frame, void *cookie, int op_ret, int op_errno) ;
-
-extern int ha_alloc_init_inode (call_frame_t *frame, inode_t *inode);
-
-#endif
diff --git a/xlators/cluster/map/Makefile.am b/xlators/cluster/map/Makefile.am
deleted file mode 100644
index d471a3f9243..00000000000
--- a/xlators/cluster/map/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/cluster/map/src/Makefile.am b/xlators/cluster/map/src/Makefile.am
deleted file mode 100644
index 26e19137a8b..00000000000
--- a/xlators/cluster/map/src/Makefile.am
+++ /dev/null
@@ -1,15 +0,0 @@
-xlator_LTLIBRARIES = map.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/cluster
-
-map_la_LDFLAGS = -module -avoidversion
-
-map_la_SOURCES = map.c map-helper.c
-map_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = map.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/cluster/map/src/map-helper.c b/xlators/cluster/map/src/map-helper.c
deleted file mode 100644
index 81212fcfdf8..00000000000
--- a/xlators/cluster/map/src/map-helper.c
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- Copyright (c) 2009-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "map.h"
-
-
-xlator_t *
-map_subvol_next (xlator_t *this, xlator_t *prev)
-{
- map_private_t *priv = NULL;
- xlator_t *next = NULL;
- int i = 0;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (priv->xlarray[i].xl == prev) {
- if ((i + 1) < priv->child_count)
- next = priv->xlarray[i + 1].xl;
- break;
- }
- }
-
- return next;
-}
-
-int
-map_subvol_cnt (xlator_t *this, xlator_t *subvol)
-{
- int i = 0;
- int ret = -1;
- map_private_t *priv = NULL;
-
- priv = this->private;
-
- for (i = 0; i < priv->child_count; i++) {
- if (subvol == priv->xlarray[i].xl) {
- ret = i;
- break;
- }
- }
-
- return ret;
-}
-
-int
-map_itransform (xlator_t *this, xlator_t *subvol, uint64_t x, uint64_t *y_p)
-{
- map_private_t *priv = NULL;
- int cnt = 0;
- int max = 0;
- uint64_t y = 0;
-
- if (x == ((uint64_t) -1)) {
- y = (uint64_t) -1;
- goto out;
- }
-
- priv = this->private;
-
- max = priv->child_count;
- cnt = map_subvol_cnt (this, subvol);
-
- y = ((x * max) + cnt);
-
-out:
- if (y_p)
- *y_p = y;
-
- return 0;
-}
-
-
-int
-map_deitransform (xlator_t *this, uint64_t y, xlator_t **subvol_p,
- uint64_t *x_p)
-{
- int cnt = 0;
- int max = 0;
- uint64_t x = 0;
- xlator_t *subvol = 0;
- map_private_t *priv = NULL;
-
- priv = this->private;
- max = priv->child_count;
-
- cnt = y % max;
- x = y / max;
-
- subvol = priv->xlarray[cnt].xl;
-
- if (subvol_p)
- *subvol_p = subvol;
-
- if (x_p)
- *x_p = x;
-
- return 0;
-}
-
-
-xlator_t *
-get_mapping_subvol_from_path (xlator_t *this, const char *path)
-{
- map_private_t *priv = NULL;
- struct map_pattern *map = NULL;
-
- /* To make sure we handle '/' properly */
- if (!strcmp (path, "/"))
- return NULL;
-
- priv = this->private;
-
- map = priv->map;
- while (map) {
- if (!strncmp (map->directory, path, map->dir_len)) {
- if ((path[map->dir_len] == '/') ||
- (path[map->dir_len] == '\0')) {
- return map->xl;
- }
- }
-
- map = map->next;
- }
-
- return priv->default_xl;
-}
-
-xlator_t *
-get_mapping_subvol_from_ctx (xlator_t *this, inode_t *inode)
-{
- uint64_t subvol = 0;
- int ret = -1;
-
- ret = inode_ctx_get (inode, this, &subvol);
- if (ret != 0)
- return NULL;
-
- return (xlator_t *)(long)subvol;
-}
-
-int
-check_multiple_volume_entry (xlator_t *this,
- xlator_t *subvol)
-{
- int ret = -1;
- int idx = 0;
- map_private_t *priv = NULL;
-
- priv = this->private;
-
- for (idx = 0; idx < priv->child_count; idx++) {
- if (priv->xlarray[idx].xl == subvol) {
- if (priv->xlarray[idx].mapped) {
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume '%s' is already mapped",
- subvol->name);
- goto out;
- }
- priv->xlarray[idx].mapped = 1;
- ret = 0;
- goto out;
- }
- }
-
- gf_log (this->name, GF_LOG_ERROR,
- "subvolume '%s' is not found",
- subvol->name);
-
- out:
- return ret;
-}
-
-int
-verify_dir_and_assign_subvol (xlator_t *this,
- const char *directory,
- const char *subvol)
-{
- int default_flag = 0;
- int ret = -1;
- int idx = 0;
- map_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- struct map_pattern *tmp_map = NULL;
-
- priv = this->private;
-
- /* check if directory is valid, ie, its a top level dir, and
- * not includes a '*' in it.
- */
- if (!strcmp ("*", directory)) {
- default_flag = 1;
- } else {
- if (directory[0] != '/') {
- gf_log (this->name, GF_LOG_ERROR,
- "map takes absolute path, starting with '/'. "
- "not '%s'", directory);
- goto out;
- }
- for (idx = 1; idx < (strlen (directory) - 1); idx++) {
- if (directory[idx] == '/') {
- gf_log (this->name, GF_LOG_ERROR,
- "map takes only top level directory, "
- "not '%s'", directory);
- goto out;
- }
- }
- }
-
- /* Assign proper subvolume */
- trav = this->children;
- while (trav) {
- if (!strcmp (trav->xlator->name, subvol)) {
-
- /* Check if there is another directory for
- * same volume, if yes, return error.
- */
- ret = check_multiple_volume_entry (this,
- trav->xlator);
- if (ret != 0) {
- goto out;
- }
-
- ret = 0;
- if (default_flag) {
- if (priv->default_xl) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "'*' specified more than "
- "once. don't confuse me!!!");
- }
-
- priv->default_xl = trav->xlator;
- goto out;
- }
-
- tmp_map = GF_CALLOC (1, sizeof (struct map_pattern),
- gf_map_mt_map_pattern);
- tmp_map->xl = trav->xlator;
- tmp_map->dir_len = strlen (directory);
-
- /* make sure that the top level directory starts
- * with '/' and ends without '/'
- */
- tmp_map->directory = gf_strdup (directory);
- if (directory[tmp_map->dir_len - 1] == '/') {
- tmp_map->dir_len--;
- }
-
- if (!priv->map)
- priv->map = tmp_map;
- else {
- struct map_pattern *trav_map = NULL;
- trav_map = priv->map;
- while (trav_map->next)
- trav_map = trav_map->next;
- trav_map->next = tmp_map;
- }
-
- goto out;
- }
-
- trav = trav->next;
- }
-
- gf_log (this->name, GF_LOG_ERROR,
- "map volume '%s' is not proper subvolume", subvol);
-
- out:
- return ret;
-}
-
-int
-assign_default_subvol (xlator_t *this, const char *default_xl)
-{
- int ret = -1;
- map_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
-
- priv = this->private;
- trav = this->children;
-
- while (trav) {
- if (!strcmp (trav->xlator->name, default_xl)) {
- ret = check_multiple_volume_entry (this,
- trav->xlator);
- if (ret != 0) {
- goto out;
- }
- if (priv->default_xl)
- gf_log (this->name, GF_LOG_WARNING,
- "default-volume option provided, "
- "overriding earlier '*' option");
- priv->default_xl = trav->xlator;
- return 0;
- }
- trav = trav->next;
- }
-
- gf_log (this->name, GF_LOG_ERROR,
- "default-volume value is not an valid subvolume. check again");
- out:
- return -1;
-}
-
-void
-verify_if_all_subvolumes_got_used (xlator_t *this)
-{
- int idx = 0;
- map_private_t *priv = NULL;
-
- priv = this->private;
-
- for (idx = 0; idx < priv->child_count; idx++) {
- if (!priv->xlarray[idx].mapped) {
- if (!priv->default_xl) {
- priv->default_xl = priv->xlarray[idx].xl;
- priv->xlarray[idx].mapped = 1;
- } else {
- gf_log (this->name, GF_LOG_WARNING,
- "subvolume '%s' is not mapped to "
- "any directory",
- priv->xlarray[idx].xl->name);
- }
- }
- }
-
- if (!priv->default_xl) {
- gf_log (this->name, GF_LOG_WARNING,
- "default subvolume not specified, filesystem "
- "may not work properly. Check 'map' translator "
- "documentation for more info");
- }
-
- return ;
-}
diff --git a/xlators/cluster/map/src/map-mem-types.h b/xlators/cluster/map/src/map-mem-types.h
deleted file mode 100644
index 669b93dc2c2..00000000000
--- a/xlators/cluster/map/src/map-mem-types.h
+++ /dev/null
@@ -1,35 +0,0 @@
-
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __MAP_MEM_TYPES_H__
-#define __MAP_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_map_mem_types_ {
- gf_map_mt_map_private_t = gf_common_mt_end + 1,
- gf_map_mt_map_local_t,
- gf_map_mt_map_xlator_array,
- gf_map_mt_map_pattern,
- gf_map_mt_end
-};
-#endif
-
diff --git a/xlators/cluster/map/src/map.c b/xlators/cluster/map/src/map.c
deleted file mode 100644
index ead9da0b92c..00000000000
--- a/xlators/cluster/map/src/map.c
+++ /dev/null
@@ -1,2577 +0,0 @@
-/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "map.h"
-
-/* TODO :
- * -> support for 'get' 'put' API in through xattrs.
- * -> define the behavior of notify()
- */
-
-static int32_t
-map_stat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-static int32_t
-map_setattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *statpre,
- struct iatt *statpost)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, statpre->ia_ino, &statpre->ia_ino);
- map_itransform (this, prev->this, statpost->ia_ino, &statpost->ia_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, statpre, statpost);
- return 0;
-}
-
-static int32_t
-map_fsetattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *statpre,
- struct iatt *statpost)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, statpre->ia_ino, &statpre->ia_ino);
- map_itransform (this, prev->this, statpost->ia_ino, &statpost->ia_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, statpre, statpost);
- return 0;
-}
-
-static int32_t
-map_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, postbuf->ia_ino, &postbuf->ia_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
-
-static int32_t
-map_ftruncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, postbuf->ia_ino, &postbuf->ia_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
-
-
-static int32_t
-map_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-static int32_t
-map_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *sbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, path, sbuf);
- return 0;
-}
-
-static int32_t
-map_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent);
- return 0;
-}
-
-static int32_t
-map_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent);
- return 0;
-}
-
-
-static int32_t
-map_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-static int32_t
-map_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
-}
-
-static int32_t
-map_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
- return 0;
-}
-
-static int32_t
-map_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, stbuf->ia_ino, &stbuf->ia_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
- return 0;
-}
-
-static int32_t
-map_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, postbuf->ia_ino, &postbuf->ia_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
-
-static int32_t
-map_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-static int32_t
-map_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
-
-
-static int32_t
-map_fstat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-
-static int32_t
-map_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count)
-{
- STACK_UNWIND (frame, op_ret, op_errno, entries, count);
- return 0;
-}
-
-
-static int32_t
-map_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-static int32_t
-map_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-static int32_t
-map_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-static int32_t
-map_fsetxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-static int32_t
-map_fgetxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
-}
-
-
-
-static int32_t
-map_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
-}
-
-int32_t
-map_xattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
-}
-
-int32_t
-map_fxattrop_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
-}
-
-static int32_t
-map_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-static int32_t
-map_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *lock)
-{
- STACK_UNWIND (frame, op_ret, op_errno, lock);
- return 0;
-}
-
-
-static int32_t
-map_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-
-static int32_t
-map_finodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-static int32_t
-map_entrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-static int32_t
-map_fentrylk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno)
-
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-static int32_t
-map_newentry_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
-
-}
-
-
-static int32_t
-map_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
- return 0;
-}
-
-
-/*
- * map_normalize_stats -
- */
-void
-map_normalize_stats (struct statvfs *buf,
- unsigned long bsize,
- unsigned long frsize)
-{
- double factor;
-
- if (buf->f_bsize != bsize) {
- factor = ((double) buf->f_bsize) / bsize;
- buf->f_bsize = bsize;
- buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree);
- buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail);
- }
-
- if (buf->f_frsize != frsize) {
- factor = ((double) buf->f_frsize) / frsize;
- buf->f_frsize = frsize;
- buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks);
- }
-}
-
-
-int32_t
-map_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *stbuf)
-{
- struct statvfs *dict_buf = NULL;
- map_local_t *local = NULL;
- int this_call_cnt = 0;
- unsigned long bsize;
- unsigned long frsize;
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- this_call_cnt = --local->call_count;
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
- }
- local->op_ret = 0;
-
- /* when a call is successfull, add it to local->dict */
- dict_buf = &local->statvfs;
-
- if (dict_buf->f_bsize != 0) {
- bsize = max (dict_buf->f_bsize,
- stbuf->f_bsize);
-
- frsize = max (dict_buf->f_frsize,
- stbuf->f_frsize);
- map_normalize_stats(dict_buf, bsize, frsize);
- map_normalize_stats(stbuf, bsize, frsize);
- } else {
- dict_buf->f_bsize = stbuf->f_bsize;
- dict_buf->f_frsize = stbuf->f_frsize;
- }
-
- dict_buf->f_blocks += stbuf->f_blocks;
- dict_buf->f_bfree += stbuf->f_bfree;
- dict_buf->f_bavail += stbuf->f_bavail;
- dict_buf->f_files += stbuf->f_files;
- dict_buf->f_ffree += stbuf->f_ffree;
- dict_buf->f_favail += stbuf->f_favail;
- dict_buf->f_fsid = stbuf->f_fsid;
- dict_buf->f_flag = stbuf->f_flag;
- dict_buf->f_namemax = stbuf->f_namemax;
- }
-unlock:
- UNLOCK (&frame->lock);
-
- if (!this_call_cnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->statvfs);
- }
-
- return 0;
-}
-
-int32_t
-map_single_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- call_frame_t *prev = NULL;
- prev = cookie;
-
- map_itransform (this, prev->this, buf->ia_ino, &buf->ia_ino);
-
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf, dict);
-
- return 0;
-}
-
-int32_t
-map_root_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- int callcnt = 0;
- map_local_t *local = NULL;
- inode_t *tmp_inode = NULL;
- dict_t *tmp_dict = NULL;
-
- local = frame->local;
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if ((op_ret == 0) && (local->op_ret == -1)) {
- local->op_ret = 0;
- local->stbuf = *buf;
- if (dict)
- local->dict = dict_ref (dict);
- local->inode = inode_ref (inode);
- }
- if (op_ret == -1)
- local->op_errno = op_errno;
-
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- tmp_dict = local->dict;
- tmp_inode = local->inode;
-
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->inode,
- &local->stbuf, local->dict);
-
- inode_unref (local->inode);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
-
- return 0;
-}
-
-
-int32_t
-map_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int callcnt = 0;
- map_local_t *local = NULL;
- fd_t *local_fd = NULL;
-
- local = frame->local;
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- goto unlock;
- }
-
- local->op_ret = 0;
- }
- unlock:
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local_fd = local->fd;
- local->fd = NULL;
-
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local_fd);
-
- fd_unref (local_fd);
- }
- return 0;
-}
-
-int32_t
-map_single_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries)
-{
- call_frame_t *prev = NULL;
- gf_dirent_t *orig_entry = NULL;
-
- prev = cookie;
-
- list_for_each_entry (orig_entry, &entries->list, list) {
- map_itransform (this, prev->this, orig_entry->d_ino,
- &orig_entry->d_ino);
- }
- STACK_UNWIND (frame, op_ret, op_errno, entries);
-
- return 0;
-}
-
-
-int32_t
-map_single_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries)
-{
- call_frame_t *prev = NULL;
- gf_dirent_t *orig_entry = NULL;
-
- prev = cookie;
-
- list_for_each_entry (orig_entry, &entries->list, list) {
- map_itransform (this, prev->this, orig_entry->d_ino,
- &orig_entry->d_ino);
- orig_entry->d_stat.ia_ino = orig_entry->d_ino;
- }
- STACK_UNWIND (frame, op_ret, op_errno, entries);
- return 0;
-}
-
-int
-map_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *orig_entries);
-
-int
-map_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *orig_entries);
-
-int
-map_generic_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *orig_entries,
- int whichop)
-{
- map_local_t *local = NULL;
- gf_dirent_t entries;
- gf_dirent_t *orig_entry = NULL;
- gf_dirent_t *entry = NULL;
- call_frame_t *prev = NULL;
- xlator_t *subvol = NULL;
- xlator_t *next_subvol = NULL;
- off_t next_offset = 0;
- int count = 0;
- fd_t *local_fd = NULL;
-
- INIT_LIST_HEAD (&entries.list);
- prev = cookie;
- local = frame->local;
-
- if (op_ret < 0)
- goto done;
-
- list_for_each_entry (orig_entry, &orig_entries->list, list) {
- subvol = prev->this;
-
- entry = gf_dirent_for_name (orig_entry->d_name);
- if (!entry) {
- gf_log (this->name, GF_LOG_ERROR,
- "memory allocation failed :(");
- goto unwind;
- }
-
- map_itransform (this, subvol, orig_entry->d_ino,
- &entry->d_ino);
- map_itransform (this, subvol, orig_entry->d_off,
- &entry->d_off);
-
- if (whichop == GF_FOP_READDIRP)
- entry->d_stat.ia_ino = entry->d_ino;
- entry->d_type = orig_entry->d_type;
- entry->d_len = orig_entry->d_len;
-
- list_add_tail (&entry->list, &entries.list);
- count++;
- next_offset = orig_entry->d_off;
- }
-
- op_ret = count;
-
-done:
- if (count == 0) {
- /* non-zero next_offset means that
- EOF is not yet hit on the current subvol
- */
- if (next_offset == 0) {
- next_subvol = map_subvol_next (this, prev->this);
- } else {
- next_subvol = prev->this;
- }
-
- if (!next_subvol) {
- goto unwind;
- }
-
- if (whichop == GF_FOP_READDIR)
- STACK_WIND (frame, map_readdir_cbk, next_subvol,
- next_subvol->fops->readdir, local->fd,
- local->size, 0);
- else
- STACK_WIND (frame, map_readdirp_cbk, next_subvol,
- next_subvol->fops->readdirp, local->fd,
- local->size, 0);
- return 0;
- }
-
-unwind:
- if (op_ret < 0)
- op_ret = 0;
-
- local_fd = local->fd;
- local->fd = NULL;
-
- STACK_UNWIND (frame, op_ret, op_errno, &entries);
-
- fd_unref (local_fd);
-
- gf_dirent_free (&entries);
-
- return 0;
-}
-
-
-int
-map_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *orig_entries)
-{
- map_generic_readdir_cbk (frame, cookie, this, op_ret, op_errno,
- orig_entries, GF_FOP_READDIR);
- return 0;
-}
-
-
-int
-map_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *orig_entries)
-{
- map_generic_readdir_cbk (frame, cookie, this, op_ret, op_errno,
- orig_entries, GF_FOP_READDIRP);
- return 0;
-}
-
-
-/* Management operations */
-
-static int32_t
-map_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- STACK_UNWIND (frame, op_ret, op_errno, file_checksum, dir_checksum);
- return 0;
-}
-
-
-/* Fops starts here */
-
-int32_t
-map_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_stat_cbk, subvol, subvol->fops->stat, loc);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_setattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct iatt *stbuf,
- int32_t valid)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- GF_VALIDATE_OR_GOTO ("map", this, err);
- GF_VALIDATE_OR_GOTO (this->name, frame, err);
- GF_VALIDATE_OR_GOTO (this->name, loc, err);
- GF_VALIDATE_OR_GOTO (this->name, loc->inode, err);
- GF_VALIDATE_OR_GOTO (this->name, loc->path, err);
- GF_VALIDATE_OR_GOTO (this->name, stbuf, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_setattr_cbk, subvol,
- subvol->fops->setattr, loc, stbuf, valid);
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_fsetattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iatt *stbuf,
- int32_t valid)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- GF_VALIDATE_OR_GOTO ("map", this, err);
- GF_VALIDATE_OR_GOTO (this->name, frame, err);
- GF_VALIDATE_OR_GOTO (this->name, fd, err);
- GF_VALIDATE_OR_GOTO (this->name, stbuf, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_fsetattr_cbk, subvol,
- subvol->fops->fsetattr, fd, stbuf, valid);
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_truncate_cbk, subvol,
- subvol->fops->truncate, loc, offset);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_ftruncate_cbk, subvol,
- subvol->fops->ftruncate, fd, offset);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_access_cbk, subvol,
- subvol->fops->access, loc, mask);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-map_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_readlink_cbk, subvol,
- subvol->fops->readlink, loc, size);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_unlink_cbk, subvol, subvol->fops->unlink, loc);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_rmdir_cbk, subvol, subvol->fops->rmdir, loc);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-map_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- int32_t op_errno = 1;
- xlator_t *old_subvol = NULL;
- xlator_t *new_subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (oldloc, err);
- VALIDATE_OR_GOTO (oldloc->inode, err);
- VALIDATE_OR_GOTO (oldloc->path, err);
- VALIDATE_OR_GOTO (newloc, err);
-
- old_subvol = get_mapping_subvol_from_ctx (this, oldloc->inode);
- if (!old_subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- if (newloc->path) {
- new_subvol = get_mapping_subvol_from_path (this, newloc->path);
- if (new_subvol && (new_subvol != old_subvol)) {
- op_errno = EXDEV;
- goto err;
- }
- }
-
- STACK_WIND (frame, map_rename_cbk, old_subvol,
- old_subvol->fops->rename, oldloc, newloc);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-map_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- int32_t op_errno = 1;
- xlator_t *old_subvol = NULL;
- xlator_t *new_subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (oldloc, err);
- VALIDATE_OR_GOTO (oldloc->inode, err);
- VALIDATE_OR_GOTO (oldloc->path, err);
- VALIDATE_OR_GOTO (newloc, err);
-
- old_subvol = get_mapping_subvol_from_ctx (this, oldloc->inode);
- if (!old_subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- if (newloc->path) {
- new_subvol = get_mapping_subvol_from_path (this, newloc->path);
- if (new_subvol && (new_subvol != old_subvol)) {
- op_errno = EXDEV;
- goto err;
- }
- }
-
- STACK_WIND (frame, map_link_cbk, old_subvol,
- old_subvol->fops->link, oldloc, newloc);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-map_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd, int wbflags)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_open_cbk, subvol,
- subvol->fops->open, loc, flags, fd, wbflags);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_readv_cbk, subvol,
- subvol->fops->readv, fd, size, offset);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, 0, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off,
- struct iobref *iobref)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_writev_cbk, subvol,
- subvol->fops->writev, fd, vector, count, off, iobref);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_flush_cbk, subvol, subvol->fops->flush, fd);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-map_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_fsync_cbk, subvol,
- subvol->fops->fsync, fd, flags);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_fstat_cbk, subvol, subvol->fops->fstat, fd);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_getdents_cbk, subvol,
- subvol->fops->getdents, fd, size, offset, flag);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_setdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags,
- dir_entry_t *entries,
- int32_t count)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_setdents_cbk, subvol,
- subvol->fops->setdents, fd, flags, entries, count);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-map_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_fsyncdir_cbk, subvol,
- subvol->fops->fsyncdir, fd, flags);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-
-
-int32_t
-map_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_setxattr_cbk, subvol,
- subvol->fops->setxattr, loc, dict, flags);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_getxattr_cbk, subvol,
- subvol->fops->getxattr, loc, name);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-map_fsetxattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- dict_t *dict,
- int32_t flags)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_fsetxattr_cbk, subvol,
- subvol->fops->fsetxattr, fd, dict, flags);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_fgetxattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_fgetxattr_cbk, subvol,
- subvol->fops->fgetxattr, fd, name);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-map_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_xattrop_cbk, subvol,
- subvol->fops->xattrop, loc, flags, dict);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_fxattrop_cbk, subvol,
- subvol->fops->fxattrop, fd, flags, dict);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_removexattr_cbk, subvol,
- subvol->fops->removexattr, loc, name);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *lock)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_lk_cbk, subvol,
- subvol->fops->lk, fd, cmd, lock);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-map_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_inodelk_cbk, subvol,
- subvol->fops->inodelk, volume, loc, cmd, lock);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-map_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_finodelk_cbk, subvol,
- subvol->fops->finodelk, volume, fd, cmd, lock);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_entrylk_cbk, subvol,
- subvol->fops->entrylk, volume, loc, basename, cmd, type);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_fentrylk_cbk, subvol,
- subvol->fops->fentrylk, volume, fd, basename, cmd, type);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (loc->path, err);
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- STACK_WIND (frame, map_checksum_cbk, subvol,
- subvol->fops->checksum, loc, flag);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-map_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t rdev)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- subvol = get_mapping_subvol_from_path (this, loc->path);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- op_errno = inode_ctx_put (loc->inode, this, (uint64_t)(long)subvol);
- if (op_errno != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set subvolume ptr in inode ctx",
- loc->path);
- }
-
- STACK_WIND (frame, map_newentry_cbk, subvol,
- subvol->fops->mknod, loc, mode, rdev);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- subvol = get_mapping_subvol_from_path (this, loc->path);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- op_errno = inode_ctx_put (loc->inode, this, (uint64_t)(long)subvol);
- if (op_errno != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set subvolume ptr in inode ctx",
- loc->path);
- }
-
- STACK_WIND (frame, map_newentry_cbk, subvol,
- subvol->fops->mkdir, loc, mode);
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-map_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- subvol = get_mapping_subvol_from_path (this, loc->path);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- op_errno = inode_ctx_put (loc->inode, this, (uint64_t)(long)subvol);
- if (op_errno != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set subvolume ptr in inode ctx",
- loc->path);
- }
-
- STACK_WIND (frame, map_newentry_cbk, subvol,
- subvol->fops->symlink, linkpath, loc);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-map_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode, fd_t *fd)
-{
- int32_t op_errno = 1;
- xlator_t *subvol = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- subvol = get_mapping_subvol_from_path (this, loc->path);
- if (!subvol) {
- op_errno = EINVAL;
- goto err;
- }
-
- op_errno = inode_ctx_put (loc->inode, this, (uint64_t)(long)subvol);
- if (op_errno != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set subvolume ptr in inode ctx",
- loc->path);
- }
-
- STACK_WIND (frame, map_create_cbk, subvol,
- subvol->fops->create, loc, flags, mode, fd);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-map_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- int32_t op_errno = EINVAL;
- xlator_t *subvol = NULL;
- map_local_t *local = NULL;
- map_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- priv = this->private;
-
- if (loc->inode->ino == 1)
- goto root_inode;
-
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- subvol = get_mapping_subvol_from_path (this, loc->path);
- if (!subvol) {
- goto err;
- }
-
- op_errno = inode_ctx_put (loc->inode, this,
- (uint64_t)(long)subvol);
- if (op_errno != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to set subvolume in inode ctx",
- loc->path);
- }
- }
-
- /* Just one callback */
- STACK_WIND (frame, map_single_lookup_cbk, subvol,
- subvol->fops->lookup, loc, xattr_req);
-
- return 0;
-
- root_inode:
- local = GF_CALLOC (1, sizeof (map_local_t),
- gf_map_mt_map_local_t);
-
- frame->local = local;
- local->call_count = priv->child_count;
- local->op_ret = -1;
-
- trav = this->children;
- while (trav) {
- STACK_WIND (frame, map_root_lookup_cbk, trav->xlator,
- trav->xlator->fops->lookup, loc, xattr_req);
- trav = trav->next;
- }
-
- return 0;
-
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL, NULL);
-
- return 0;
-}
-
-
-int32_t
-map_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- int32_t op_errno = EINVAL;
- xlator_t *subvol = NULL;
- map_local_t *local = NULL;
- map_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- if (loc->inode->ino == 1)
- goto root_inode;
- subvol = get_mapping_subvol_from_ctx (this, loc->inode);
- if (!subvol) {
- goto err;
- }
-
- /* Just one callback */
- STACK_WIND (frame, map_statfs_cbk, subvol, subvol->fops->statfs, loc);
-
- return 0;
-
- root_inode:
- local = GF_CALLOC (1, sizeof (map_local_t),
- gf_map_mt_map_local_t);
-
- priv = this->private;
- frame->local = local;
- local->call_count = priv->child_count;
- local->op_ret = -1;
-
- trav = this->children;
- while (trav) {
- STACK_WIND (frame, map_statfs_cbk, trav->xlator,
- trav->xlator->fops->statfs, loc);
- trav = trav->next;
- }
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
-
- return 0;
-}
-
-int32_t
-map_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd)
-{
- int32_t op_errno = EINVAL;
- xlator_t *subvol = NULL;
- map_local_t *local = NULL;
- map_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- if (loc->inode->ino == 1)
- goto root_inode;
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- goto err;
- }
-
- /* Just one callback */
- STACK_WIND (frame, map_opendir_cbk, subvol,
- subvol->fops->opendir, loc, fd);
-
- return 0;
-
- root_inode:
- local = GF_CALLOC (1, sizeof (map_local_t),
- gf_map_mt_map_local_t);
-
- priv = this->private;
- frame->local = local;
- local->call_count = priv->child_count;
- local->op_ret = -1;
- local->fd = fd_ref (fd);
-
- trav = this->children;
- while (trav) {
- STACK_WIND (frame, map_opendir_cbk, trav->xlator,
- trav->xlator->fops->opendir, loc, fd);
- trav = trav->next;
- }
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
-
- return 0;
-}
-
-
-int32_t
-map_do_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff, int whichop)
-{
- int32_t op_errno = EINVAL;
- xlator_t *subvol = NULL;
- map_local_t *local = NULL;
- map_private_t *priv = NULL;
- xlator_t *xvol = NULL;
- off_t xoff = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- if (fd->inode->ino == 1)
- goto root_inode;
-
- subvol = get_mapping_subvol_from_ctx (this, fd->inode);
- if (!subvol) {
- goto err;
- }
-
- /* Just one callback */
- if (whichop == GF_FOP_READDIR)
- STACK_WIND (frame, map_single_readdir_cbk, subvol,
- subvol->fops->readdir, fd, size, yoff);
- else
- STACK_WIND (frame, map_single_readdirp_cbk, subvol,
- subvol->fops->readdirp, fd, size, yoff);
-
- return 0;
-
- root_inode:
- /* readdir on '/' */
- local = GF_CALLOC (1, sizeof (map_local_t),
- gf_map_mt_map_local_t);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR,
- "memory allocation failed :(");
- op_errno = ENOMEM;
- goto err;
- }
-
- priv = this->private;
- frame->local = local;
- local->op_errno = ENOENT;
- local->op_ret = -1;
-
- local->fd = fd_ref (fd);
- local->size = size;
-
- map_deitransform (this, yoff, &xvol, (uint64_t *)&xoff);
-
- if (whichop == GF_FOP_READDIR)
- STACK_WIND (frame, map_readdir_cbk, xvol, xvol->fops->readdir,
- fd, size, xoff);
- else
- STACK_WIND (frame, map_readdirp_cbk, xvol, xvol->fops->readdirp,
- fd, size, xoff);
-
- return 0;
- err:
- STACK_UNWIND (frame, -1, op_errno, NULL);
-
- return 0;
-}
-
-
-
-int32_t
-map_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff)
-{
- map_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIR);
- return 0;
-}
-
-int32_t
-map_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t yoff)
-{
- map_do_readdir (frame, this, fd, size, yoff, GF_FOP_READDIRP);
- return 0;
-}
-
-
-void
-fini (xlator_t *this)
-{
- map_private_t *priv = NULL;
- struct map_pattern *trav_map = NULL;
- struct map_pattern *tmp_map = NULL;
-
- priv = this->private;
-
- if (priv) {
- if (priv->xlarray)
- GF_FREE (priv->xlarray);
-
- trav_map = priv->map;
- while (trav_map) {
- tmp_map = trav_map;
- trav_map = trav_map->next;
- GF_FREE (tmp_map);
- }
-
- GF_FREE(priv);
- }
-
- return;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_map_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-int
-init (xlator_t *this)
-{
- map_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int count = 0;
- int ret = -1;
- char *pattern_string = NULL;
- char *map_pair_str = NULL;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *dup_map_pair = NULL;
- char *dir_str = NULL;
- char *subvol_str = NULL;
- char *map_xl = NULL;
-
-
- if (!this->children) {
- gf_log (this->name,GF_LOG_ERROR,
- "FATAL: map should have one or more child defined");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- priv = GF_CALLOC (1, sizeof (map_private_t),
- gf_map_mt_map_private_t);
- this->private = priv;
-
- /* allocate xlator array */
- trav = this->children;
- while (trav) {
- count++;
- trav = trav->next;
- }
- priv->xlarray = GF_CALLOC (1, sizeof (struct map_xlator_array) * count,
- gf_map_mt_map_xlator_array);
- priv->child_count = count;
-
- /* build xlator array */
- count = 0;
- trav = this->children;
- while (trav) {
- priv->xlarray[count++].xl = trav->xlator;
- trav = trav->next;
- }
-
- /* map dir1:brick1;dir2:brick2;dir3:brick3;*:brick4 */
- ret = dict_get_str (this->options, "map-directory", &pattern_string);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "map.pattern not given, can't continue");
- goto err;
- }
- map_pair_str = strtok_r (pattern_string, ";", &tmp_str);
- while (map_pair_str) {
- dup_map_pair = gf_strdup (map_pair_str);
- dir_str = strtok_r (dup_map_pair, ":", &tmp_str1);
- if (!dir_str) {
- gf_log (this->name, GF_LOG_ERROR,
- "directory string invalid");
- goto err;
- }
- subvol_str = strtok_r (NULL, ":", &tmp_str1);
- if (!subvol_str) {
- gf_log (this->name, GF_LOG_ERROR,
- "mapping subvolume string invalid");
- goto err;
- }
- ret = verify_dir_and_assign_subvol (this,
- dir_str,
- subvol_str);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "verification failed");
- goto err;
- }
-
- GF_FREE (dup_map_pair);
-
- map_pair_str = strtok_r (NULL, ";", &tmp_str);
- }
-
- /* default-volume brick4 */
- ret = dict_get_str (this->options, "default-volume", &map_xl);
- if (ret == 0) {
- ret = assign_default_subvol (this, map_xl);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "assigning default failed");
- goto err;
- }
- }
-
- verify_if_all_subvolumes_got_used (this);
-
- return 0;
- err:
- fini (this);
- return -1;
-}
-
-
-struct xlator_fops fops = {
- .lookup = map_lookup,
- .mknod = map_mknod,
- .create = map_create,
-
- .stat = map_stat,
- .fstat = map_fstat,
- .truncate = map_truncate,
- .ftruncate = map_ftruncate,
- .access = map_access,
- .readlink = map_readlink,
- .setxattr = map_setxattr,
- .getxattr = map_getxattr,
- .fsetxattr = map_fsetxattr,
- .fgetxattr = map_fgetxattr,
- .removexattr = map_removexattr,
- .open = map_open,
- .readv = map_readv,
- .writev = map_writev,
- .flush = map_flush,
- .fsync = map_fsync,
- .statfs = map_statfs,
- .lk = map_lk,
- .opendir = map_opendir,
- .readdir = map_readdir,
- .readdirp = map_readdirp,
- .fsyncdir = map_fsyncdir,
- .symlink = map_symlink,
- .unlink = map_unlink,
- .link = map_link,
- .mkdir = map_mkdir,
- .rmdir = map_rmdir,
- .rename = map_rename,
- .inodelk = map_inodelk,
- .finodelk = map_finodelk,
- .entrylk = map_entrylk,
- .fentrylk = map_fentrylk,
- .xattrop = map_xattrop,
- .fxattrop = map_fxattrop,
- .setdents = map_setdents,
- .getdents = map_getdents,
- .checksum = map_checksum,
- .setattr = map_setattr,
- .fsetattr = map_fsetattr,
-};
-
-struct xlator_cbks cbks = {
-};
-
-struct volume_options options[] = {
- { .key = {"map-directory"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {"default-volume"},
- .type = GF_OPTION_TYPE_XLATOR
- },
-
- { .key = {NULL} }
-};
diff --git a/xlators/cluster/map/src/map.h b/xlators/cluster/map/src/map.h
deleted file mode 100644
index bccac437cdc..00000000000
--- a/xlators/cluster/map/src/map.h
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __MAP_H__
-#define __MAP_H__
-
-#include "xlator.h"
-#include "map-mem-types.h"
-
-struct map_pattern {
- struct map_pattern *next;
- xlator_t *xl;
- char *directory;
- int dir_len;
-};
-
-struct map_xlator_array {
- xlator_t *xl;
- int mapped; /* yes/no */
-};
-
-typedef struct {
- struct map_pattern *map;
- xlator_t *default_xl;
- struct map_xlator_array *xlarray;
- int child_count;
-} map_private_t;
-
-typedef struct {
- int32_t op_ret;
- int32_t op_errno;
- int call_count;
- struct statvfs statvfs;
- struct iatt stbuf;
- inode_t *inode;
- dict_t *dict;
- fd_t *fd;
-
- size_t size;
-} map_local_t;
-
-xlator_t *map_subvol_next (xlator_t *this, xlator_t *prev);
-int map_subvol_cnt (xlator_t *this, xlator_t *subvol);
-
-int map_itransform (xlator_t *this, xlator_t *subvol,
- uint64_t x, uint64_t *y_p);
-int map_deitransform (xlator_t *this, uint64_t y,
- xlator_t **subvol_p, uint64_t *x_p);
-
-
-xlator_t *get_mapping_subvol_from_path (xlator_t *this, const char *path);
-xlator_t *get_mapping_subvol_from_ctx (xlator_t *this, inode_t *inode);
-
-int check_multiple_volume_entry (xlator_t *this, xlator_t *subvol);
-int verify_dir_and_assign_subvol (xlator_t *this,
- const char *directory, const char *subvol);
-int assign_default_subvol (xlator_t *this, const char *default_xl);
-void verify_if_all_subvolumes_got_used (xlator_t *this);
-
-
-#endif /* __MAP_H__ */
diff --git a/xlators/cluster/stripe/Makefile.am b/xlators/cluster/stripe/Makefile.am
deleted file mode 100644
index d471a3f9243..00000000000
--- a/xlators/cluster/stripe/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/cluster/stripe/src/Makefile.am b/xlators/cluster/stripe/src/Makefile.am
deleted file mode 100644
index 8c48d341091..00000000000
--- a/xlators/cluster/stripe/src/Makefile.am
+++ /dev/null
@@ -1,19 +0,0 @@
-
-xlator_LTLIBRARIES = stripe.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/cluster
-
-stripe_la_LDFLAGS = -module -avoidversion
-
-stripe_la_SOURCES = stripe.c stripe-helpers.c \
- $(top_builddir)/xlators/lib/src/libxlator.c
-
-stripe_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = stripe.h stripe-mem-types.h $(top_builddir)/xlators/lib/src/libxlator.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
- -I$(top_srcdir)/xlators/lib/src
-
-CLEANFILES =
-
diff --git a/xlators/cluster/stripe/src/stripe-helpers.c b/xlators/cluster/stripe/src/stripe-helpers.c
deleted file mode 100644
index 336da793e55..00000000000
--- a/xlators/cluster/stripe/src/stripe-helpers.c
+++ /dev/null
@@ -1,590 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-#include <fnmatch.h>
-
-#include "stripe.h"
-#include "byte-order.h"
-
-void
-stripe_local_wipe (stripe_local_t *local)
-{
- if (!local)
- goto out;
-
- loc_wipe (&local->loc);
- loc_wipe (&local->loc2);
-
- if (local->fd)
- fd_unref (local->fd);
-
- if (local->inode)
- inode_unref (local->inode);
-
- if (local->xattr)
- dict_unref (local->xattr);
-
- if (local->xdata)
- dict_unref (local->xdata);
-
-out:
- return;
-}
-
-
-
-void
-stripe_aggregate (dict_t *this, char *key, data_t *value, void *data)
-{
- dict_t *dst = NULL;
- int64_t *ptr = 0, *size = NULL;
- int32_t ret = -1;
-
- dst = data;
-
- if (strcmp (key, GF_XATTR_QUOTA_SIZE_KEY) == 0) {
- ret = dict_get_bin (dst, key, (void **)&size);
- if (ret < 0) {
- size = GF_CALLOC (1, sizeof (int64_t),
- gf_common_mt_char);
- if (size == NULL) {
- gf_log ("stripe", GF_LOG_WARNING,
- "memory allocation failed");
- goto out;
- }
- ret = dict_set_bin (dst, key, size, sizeof (int64_t));
- if (ret < 0) {
- gf_log ("stripe", GF_LOG_WARNING,
- "stripe aggregate dict set failed");
- GF_FREE (size);
- goto out;
- }
- }
-
- ptr = data_to_bin (value);
- if (ptr == NULL) {
- gf_log ("stripe", GF_LOG_WARNING, "data to bin failed");
- goto out;
- }
-
- *size = hton64 (ntoh64 (*size) + ntoh64 (*ptr));
- } else if (strcmp (key, GF_CONTENT_KEY)) {
- /* No need to aggregate 'CONTENT' data */
- ret = dict_set (dst, key, value);
- if (ret)
- gf_log ("stripe", GF_LOG_WARNING, "xattr dict set failed");
- }
-
-out:
- return;
-}
-
-
-void
-stripe_aggregate_xattr (dict_t *dst, dict_t *src)
-{
- if ((dst == NULL) || (src == NULL)) {
- goto out;
- }
-
- dict_foreach (src, stripe_aggregate, dst);
-out:
- return;
-}
-
-
-int32_t
-stripe_xattr_aggregate (char *buffer, stripe_local_t *local, int32_t *total)
-{
- int32_t i = 0;
- int32_t ret = -1;
- int32_t len = 0;
- char *sbuf = NULL;
- stripe_xattr_sort_t *xattr = NULL;
-
- if (!buffer || !local || !local->xattr_list)
- goto out;
-
- sbuf = buffer;
-
- for (i = 0; i < local->nallocs; i++) {
- xattr = local->xattr_list + i;
- len = xattr->xattr_len;
-
- if (len && xattr && xattr->xattr_value) {
- memcpy (buffer, xattr->xattr_value, len);
- buffer += len;
- *buffer++ = ' ';
- }
- }
-
- *--buffer = '\0';
- if (total)
- *total = buffer - sbuf;
- ret = 0;
-
- out:
- return ret;
-}
-
-int32_t
-stripe_free_xattr_str (stripe_local_t *local)
-{
- int32_t i = 0;
- int32_t ret = -1;
- stripe_xattr_sort_t *xattr = NULL;
-
- if (!local || !local->xattr_list)
- goto out;
-
- for (i = 0; i < local->nallocs; i++) {
- xattr = local->xattr_list + i;
-
- if (xattr && xattr->xattr_value)
- GF_FREE (xattr->xattr_value);
- }
-
- ret = 0;
- out:
- return ret;
-}
-
-
-int32_t
-stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local,
- char **xattr_serz)
-{
- int ret = -1;
- int32_t padding = 0;
- int32_t tlen = 0;
- char stripe_size_str[20] = {0,};
- char *pathinfo_serz = NULL;
-
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR, "Possible NULL deref");
- goto out;
- }
-
- (void) snprintf (stripe_size_str, 20, "%ld",
- (local->fctx) ? local->fctx->stripe_size : 0);
-
- /* extra bytes for decorations (brackets and <>'s) */
- padding = strlen (this->name) + strlen (STRIPE_PATHINFO_HEADER)
- + strlen (stripe_size_str) + 7;
- local->xattr_total_len += (padding + 2);
-
- pathinfo_serz = GF_CALLOC (local->xattr_total_len, sizeof (char),
- gf_common_mt_char);
- if (!pathinfo_serz)
- goto out;
-
- /* xlator info */
- (void) sprintf (pathinfo_serz, "(<"STRIPE_PATHINFO_HEADER"%s:[%s]> ",
- this->name, stripe_size_str);
-
- ret = stripe_xattr_aggregate (pathinfo_serz + padding, local, &tlen);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Cannot aggregate pathinfo list");
- goto out;
- }
-
- *(pathinfo_serz + padding + tlen) = ')';
- *(pathinfo_serz + padding + tlen + 1) = '\0';
-
- *xattr_serz = pathinfo_serz;
-
- ret = 0;
- out:
- return ret;
-}
-
-/**
- * stripe_get_matching_bs - Get the matching block size for the given path.
- */
-int32_t
-stripe_get_matching_bs (const char *path, stripe_private_t *priv)
-{
- struct stripe_options *trav = NULL;
- uint64_t block_size = 0;
-
- GF_VALIDATE_OR_GOTO ("stripe", priv, out);
- GF_VALIDATE_OR_GOTO ("stripe", path, out);
-
- LOCK (&priv->lock);
- {
- block_size = priv->block_size;
- trav = priv->pattern;
- while (trav) {
- if (!fnmatch (trav->path_pattern, path, FNM_NOESCAPE)) {
- block_size = trav->block_size;
- break;
- }
- trav = trav->next;
- }
- }
- UNLOCK (&priv->lock);
-
-out:
- return block_size;
-}
-
-int32_t
-stripe_ctx_handle (xlator_t *this, call_frame_t *prev, stripe_local_t *local,
- dict_t *dict)
-{
- char key[256] = {0,};
- data_t *data = NULL;
- int32_t index = 0;
- stripe_private_t *priv = NULL;
-
- priv = this->private;
-
-
- if (!local->fctx) {
- local->fctx = GF_CALLOC (1, sizeof (stripe_fd_ctx_t),
- gf_stripe_mt_stripe_fd_ctx_t);
- if (!local->fctx) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto out;
- }
-
- local->fctx->static_array = 0;
- }
- /* Stripe block size */
- sprintf (key, "trusted.%s.stripe-size", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to get stripe-size");
- goto out;
- } else {
- if (!local->fctx->stripe_size) {
- local->fctx->stripe_size =
- data_to_int64 (data);
- }
-
- if (local->fctx->stripe_size != data_to_int64 (data)) {
- gf_log (this->name, GF_LOG_WARNING,
- "stripe-size mismatch in blocks");
- local->xattr_self_heal_needed = 1;
- }
- }
-
- /* Stripe count */
- sprintf (key, "trusted.%s.stripe-count", this->name);
- data = dict_get (dict, key);
-
- if (!data) {
- local->xattr_self_heal_needed = 1;
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to get stripe-count");
- goto out;
- }
- if (!local->fctx->xl_array) {
- local->fctx->stripe_count = data_to_int32 (data);
- if (!local->fctx->stripe_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-count xattr");
- local->op_ret = -1;
- local->op_errno = EIO;
- goto out;
- }
-
- local->fctx->xl_array = GF_CALLOC (local->fctx->stripe_count,
- sizeof (xlator_t *),
- gf_stripe_mt_xlator_t);
-
- if (!local->fctx->xl_array) {
- local->op_errno = ENOMEM;
- local->op_ret = -1;
- goto out;
- }
- }
- if (local->fctx->stripe_count != data_to_int32 (data)) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-count xattr (%d != %d)",
- local->fctx->stripe_count, data_to_int32 (data));
- local->op_ret = -1;
- local->op_errno = EIO;
- goto out;
- }
-
- /* index */
- sprintf (key, "trusted.%s.stripe-index", this->name);
- data = dict_get (dict, key);
- if (!data) {
- local->xattr_self_heal_needed = 1;
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to get stripe-index");
- goto out;
- }
- index = data_to_int32 (data);
- if (index > priv->child_count) {
- gf_log (this->name, GF_LOG_ERROR,
- "error with stripe-index xattr (%d)", index);
- local->op_ret = -1;
- local->op_errno = EIO;
- goto out;
- }
- if (local->fctx->xl_array) {
- if (!local->fctx->xl_array[index])
- local->fctx->xl_array[index] = prev->this;
- }
-
- sprintf(key, "trusted.%s.stripe-coalesce", this->name);
- data = dict_get(dict, key);
- if (!data) {
- /*
- * The file was probably created prior to coalesce support.
- * Assume non-coalesce mode for this file to maintain backwards
- * compatibility.
- */
- gf_log(this->name, GF_LOG_DEBUG, "missing stripe-coalesce "
- "attr, assume non-coalesce mode");
- local->fctx->stripe_coalesce = 0;
- } else {
- local->fctx->stripe_coalesce = data_to_int32(data);
- }
-
-
-out:
- return 0;
-}
-
-int32_t
-stripe_xattr_request_build (xlator_t *this, dict_t *dict, uint64_t stripe_size,
- uint32_t stripe_count, uint32_t stripe_index,
- uint32_t stripe_coalesce)
-{
- char key[256] = {0,};
- int32_t ret = -1;
-
- sprintf (key, "trusted.%s.stripe-size", this->name);
- ret = dict_set_int64 (dict, key, stripe_size);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set %s in xattr_req dict", key);
- goto out;
- }
-
- sprintf (key, "trusted.%s.stripe-count", this->name);
- ret = dict_set_int32 (dict, key, stripe_count);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set %s in xattr_req dict", key);
- goto out;
- }
-
- sprintf (key, "trusted.%s.stripe-index", this->name);
- ret = dict_set_int32 (dict, key, stripe_index);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set %s in xattr_req dict", key);
- goto out;
- }
-
- sprintf(key, "trusted.%s.stripe-coalesce", this->name);
- ret = dict_set_int32(dict, key, stripe_coalesce);
- if (ret) {
- gf_log(this->name, GF_LOG_WARNING,
- "failed to set %s in xattr_req_dict", key);
- goto out;
- }
-out:
- return ret;
-}
-
-
-static int
-set_default_block_size (stripe_private_t *priv, char *num)
-{
-
- int ret = -1;
- GF_VALIDATE_OR_GOTO ("stripe", THIS, out);
- GF_VALIDATE_OR_GOTO (THIS->name, priv, out);
- GF_VALIDATE_OR_GOTO (THIS->name, num, out);
-
-
- if (gf_string2bytesize (num, &priv->block_size) != 0) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "invalid number format \"%s\"", num);
- goto out;
- }
-
- ret = 0;
-
- out:
- return ret;
-
-}
-
-
-int
-set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data)
-{
- int ret = -1;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *dup_str = NULL;
- char *stripe_str = NULL;
- char *pattern = NULL;
- char *num = NULL;
- struct stripe_options *temp_stripeopt = NULL;
- struct stripe_options *stripe_opt = NULL;
-
- if (!this || !priv || !data)
- goto out;
-
- /* Get the pattern for striping.
- "option block-size *avi:10MB" etc */
- stripe_str = strtok_r (data, ",", &tmp_str);
- while (stripe_str) {
- dup_str = gf_strdup (stripe_str);
- stripe_opt = GF_CALLOC (1, sizeof (struct stripe_options),
- gf_stripe_mt_stripe_options);
- if (!stripe_opt) {
- goto out;
- }
-
- pattern = strtok_r (dup_str, ":", &tmp_str1);
- num = strtok_r (NULL, ":", &tmp_str1);
- if (!num) {
- num = pattern;
- pattern = "*";
- ret = set_default_block_size (priv, num);
- if (ret)
- goto out;
- }
- if (gf_string2bytesize (num, &stripe_opt->block_size) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"", num);
- goto out;
- }
-
- if (stripe_opt->block_size < STRIPE_MIN_BLOCK_SIZE) {
- gf_log (this->name, GF_LOG_ERROR, "Invalid Block-size: "
- "%s. Should be atleast %llu bytes", num,
- STRIPE_MIN_BLOCK_SIZE);
- goto out;
- }
- if (stripe_opt->block_size % 512) {
- gf_log (this->name, GF_LOG_ERROR, "Block-size: %s should"
- " be a multiple of 512 bytes", num);
- goto out;
- }
-
- memcpy (stripe_opt->path_pattern, pattern, strlen (pattern));
-
- gf_log (this->name, GF_LOG_DEBUG,
- "block-size : pattern %s : size %"PRId64,
- stripe_opt->path_pattern, stripe_opt->block_size);
-
- if (priv->pattern)
- temp_stripeopt = NULL;
- else
- temp_stripeopt = priv->pattern;
-
- stripe_opt->next = temp_stripeopt;
-
- priv->pattern = stripe_opt;
- stripe_opt = NULL;
-
- GF_FREE (dup_str);
- dup_str = NULL;
-
- stripe_str = strtok_r (NULL, ",", &tmp_str);
- }
-
- ret = 0;
-out:
-
- if (dup_str)
- GF_FREE (dup_str);
-
- if (stripe_opt)
- GF_FREE (stripe_opt);
-
- return ret;
-}
-
-int32_t
-stripe_iatt_merge (struct iatt *from, struct iatt *to)
-{
- if (to->ia_size < from->ia_size)
- to->ia_size = from->ia_size;
- if (to->ia_mtime < from->ia_mtime)
- to->ia_mtime = from->ia_mtime;
- if (to->ia_ctime < from->ia_ctime)
- to->ia_ctime = from->ia_ctime;
- if (to->ia_atime < from->ia_atime)
- to->ia_atime = from->ia_atime;
- return 0;
-}
-
-off_t
-coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count)
-{
- size_t line_size = 0;
- uint64_t stripe_num = 0;
- off_t coalesced_offset = 0;
-
- line_size = stripe_size * stripe_count;
- stripe_num = offset / line_size;
-
- coalesced_offset = (stripe_num * stripe_size) +
- (offset % stripe_size);
-
- return coalesced_offset;
-}
-
-off_t
-uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
- int stripe_index)
-{
- uint64_t nr_full_stripe_chunks = 0, mod = 0;
-
- if (!size)
- return size;
-
- /*
- * Estimate the number of fully written stripes from the
- * local file size. Each stripe_size chunk corresponds to
- * a stripe.
- */
- nr_full_stripe_chunks = (size / stripe_size) * stripe_count;
- mod = size % stripe_size;
-
- if (!mod) {
- /*
- * There is no remainder, thus we could have overestimated
- * the size of the file in terms of chunks. Trim the number
- * of chunks by the following stripe members and leave it
- * up to those nodes to respond with a larger size (if
- * necessary).
- */
- nr_full_stripe_chunks -= stripe_count -
- (stripe_index + 1);
- size = nr_full_stripe_chunks * stripe_size;
- } else {
- /*
- * There is a remainder and thus we own the last chunk of the
- * file. Add the preceding stripe members of the final stripe
- * along with the remainder to calculate the exact size.
- */
- nr_full_stripe_chunks += stripe_index;
- size = nr_full_stripe_chunks * stripe_size + mod;
- }
-
- return size;
-}
-
diff --git a/xlators/cluster/stripe/src/stripe-mem-types.h b/xlators/cluster/stripe/src/stripe-mem-types.h
deleted file mode 100644
index c8781d7d761..00000000000
--- a/xlators/cluster/stripe/src/stripe-mem-types.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-
-#ifndef __STRIPE_MEM_TYPES_H__
-#define __STRIPE_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_stripe_mem_types_ {
- gf_stripe_mt_iovec = gf_common_mt_end + 1,
- gf_stripe_mt_readv_replies,
- gf_stripe_mt_stripe_fd_ctx_t,
- gf_stripe_mt_char,
- gf_stripe_mt_int8_t,
- gf_stripe_mt_xlator_t,
- gf_stripe_mt_stripe_private_t,
- gf_stripe_mt_stripe_options,
- gf_stripe_mt_xattr_sort_t,
- gf_stripe_mt_end
-};
-#endif
-
diff --git a/xlators/cluster/stripe/src/stripe.c b/xlators/cluster/stripe/src/stripe.c
deleted file mode 100644
index efee9444e9e..00000000000
--- a/xlators/cluster/stripe/src/stripe.c
+++ /dev/null
@@ -1,4969 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-/**
- * xlators/cluster/stripe:
- * Stripe translator, stripes the data across its child nodes,
- * as per the options given in the volfile. The striping works
- * fairly simple. It writes files at different offset as per
- * calculation. So, 'ls -l' output at the real posix level will
- * show file size bigger than the actual size. But when one does
- * 'df' or 'du <file>', real size of the file on the server is shown.
- *
- * WARNING:
- * Stripe translator can't regenerate data if a child node gets disconnected.
- * So, no 'self-heal' for stripe. Hence the advice, use stripe only when its
- * very much necessary, or else, use it in combination with AFR, to have a
- * backup copy.
- */
-#include <fnmatch.h>
-
-#include "stripe.h"
-#include "libxlator.h"
-#include "byte-order.h"
-#include "statedump.h"
-
-struct volume_options options[];
-
-int32_t
-stripe_sh_chown_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
-{
- int callcnt = -1;
- stripe_local_t *local = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_DESTROY (frame);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_sh_make_entry_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!frame || !frame->local || !cookie || !this) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- STACK_WIND (frame, stripe_sh_chown_cbk, prev->this,
- prev->this->fops->setattr, &local->loc,
- &local->stbuf, (GF_SET_ATTR_UID | GF_SET_ATTR_GID), NULL);
-
-out:
- return 0;
-}
-
-int32_t
-stripe_entry_self_heal (call_frame_t *frame, xlator_t *this,
- stripe_local_t *local)
-{
- xlator_list_t *trav = NULL;
- call_frame_t *rframe = NULL;
- stripe_local_t *rlocal = NULL;
- stripe_private_t *priv = NULL;
- dict_t *xdata = NULL;
- int ret = 0;
-
- if (!local || !this || !frame) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- if (!(IA_ISREG (local->stbuf.ia_type) ||
- IA_ISDIR (local->stbuf.ia_type)))
- return 0;
-
- priv = this->private;
- trav = this->children;
- rframe = copy_frame (frame);
- if (!rframe) {
- goto out;
- }
- rlocal = mem_get0 (this->local_pool);
- if (!rlocal) {
- goto out;
- }
- rframe->local = rlocal;
- rlocal->call_count = priv->child_count;
- loc_copy (&rlocal->loc, &local->loc);
- memcpy (&rlocal->stbuf, &local->stbuf, sizeof (struct iatt));
-
- xdata = dict_new ();
- if (!xdata)
- goto out;
-
- ret = dict_set_static_bin (xdata, "gfid-req", local->stbuf.ia_gfid, 16);
- if (ret)
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to set gfid-req", local->loc.path);
-
- while (trav) {
- if (IA_ISREG (local->stbuf.ia_type)) {
- STACK_WIND (rframe, stripe_sh_make_entry_cbk,
- trav->xlator, trav->xlator->fops->mknod,
- &local->loc,
- st_mode_from_ia (local->stbuf.ia_prot,
- local->stbuf.ia_type),
- 0, 0, xdata);
- }
- if (IA_ISDIR (local->stbuf.ia_type)) {
- STACK_WIND (rframe, stripe_sh_make_entry_cbk,
- trav->xlator, trav->xlator->fops->mkdir,
- &local->loc,
- st_mode_from_ia (local->stbuf.ia_prot,
- local->stbuf.ia_type),
- 0, xdata);
- }
- trav = trav->next;
- }
-
- if (xdata)
- dict_unref (xdata);
- return 0;
-
-out:
- if (rframe)
- STRIPE_STACK_DESTROY (rframe);
- if (xdata)
- dict_unref (xdata);
-
- return 0;
-}
-
-
-int32_t
-stripe_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata, struct iatt *postparent)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
- int ret = 0;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- if (op_errno != ENOENT)
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name,
- strerror (op_errno));
- if (local->op_errno != ESTALE)
- local->op_errno = op_errno;
- if (((op_errno != ENOENT) && (op_errno != ENOTCONN)) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- if (op_errno == ENOENT)
- local->entry_self_heal_needed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
- if (IA_ISREG (buf->ia_type)) {
- ret = stripe_ctx_handle (this, prev, local,
- xdata);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "Error getting fctx info from"
- " dict");
- }
-
- if (FIRST_CHILD(this) == prev->this) {
- local->stbuf = *buf;
- local->postparent = *postparent;
- local->inode = inode_ref (inode);
- if (xdata)
- local->xdata = dict_ref (xdata);
- if (local->xattr) {
- stripe_aggregate_xattr (local->xdata,
- local->xattr);
- dict_unref (local->xattr);
- local->xattr = NULL;
- }
- }
-
- if (!local->xdata && !local->xattr) {
- local->xattr = dict_ref (xdata);
- } else if (local->xdata) {
- stripe_aggregate_xattr (local->xdata, xdata);
- } else if (local->xattr) {
- stripe_aggregate_xattr (local->xattr, xdata);
- }
-
- local->stbuf_blocks += buf->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
-
- if (uuid_is_null (local->ia_gfid))
- uuid_copy (local->ia_gfid, buf->ia_gfid);
-
- /* Make sure the gfid on all the nodes are same */
- if (uuid_compare (local->ia_gfid, buf->ia_gfid)) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: gfid different on subvolume %s",
- local->loc.path, prev->this->name);
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->op_ret == 0 && local->entry_self_heal_needed &&
- !uuid_is_null (local->loc.inode->gfid))
- stripe_entry_self_heal (frame, this, local);
-
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->stbuf.ia_blocks = local->stbuf_blocks;
- local->stbuf.ia_size = local->stbuf_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- inode_ctx_put (local->inode, this,
- (uint64_t) (long)local->fctx);
- }
-
- STRIPE_STACK_UNWIND (lookup, frame, local->op_ret,
- local->op_errno, local->inode,
- &local->stbuf, local->xdata,
- &local->postparent);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
- int64_t filesize = 0;
- int ret = 0;
- uint64_t tmpctx = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- loc_copy (&local->loc, loc);
-
- inode_ctx_get (local->inode, this, &tmpctx);
- if (tmpctx)
- local->fctx = (stripe_fd_ctx_t*) (long)tmpctx;
-
- /* quick-read friendly changes */
- if (xdata && dict_get (xdata, GF_CONTENT_KEY)) {
- ret = dict_get_int64 (xdata, GF_CONTENT_KEY, &filesize);
- if (!ret && (filesize > priv->block_size))
- dict_del (xdata, GF_CONTENT_KEY);
- }
-
- /* get stripe-size xattr on lookup. This would be required for
- * open/read/write/pathinfo calls. Hence we send down the request
- * even when type == IA_INVAL */
-
- /*
- * We aren't guaranteed to have xdata here. We need the format info for
- * the file, so allocate xdata if necessary.
- */
- if (!xdata)
- xdata = dict_new();
- else
- xdata = dict_ref(xdata);
-
- if (xdata && (IA_ISREG (loc->inode->ia_type) ||
- (loc->inode->ia_type == IA_INVAL))) {
- ret = stripe_xattr_request_build (this, xdata, 8, 4, 4, 0);
- if (ret)
- gf_log (this->name , GF_LOG_ERROR, "Failed to build"
- " xattr request for %s", loc->path);
-
- }
-
- /* Everytime in stripe lookup, all child nodes
- should be looked up */
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND (frame, stripe_lookup_cbk, trav->xlator,
- trav->xlator->fops->lookup, loc, xdata);
- trav = trav->next;
- }
-
- dict_unref(xdata);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-stripe_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
- prev = cookie;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (FIRST_CHILD(this) == prev->this) {
- local->stbuf = *buf;
- }
-
- local->stbuf_blocks += buf->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
-
- STRIPE_STACK_UNWIND (stat, frame, local->op_ret,
- local->op_errno, &local->stbuf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- if (IA_ISREG(loc->inode->ia_type)) {
- inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
- if (!fctx)
- goto err;
- local->fctx = fctx;
- }
-
- while (trav) {
- STACK_WIND (frame, stripe_stat_cbk, trav->xlator,
- trav->xlator->fops->stat, loc, NULL);
- trav = trav->next;
- }
-
- return 0;
-
-err:
- STRIPE_STACK_UNWIND (stat, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-stripe_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *stbuf, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- int32_t callcnt = 0;
-
- if (!this || !frame || !frame->local) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret && (op_errno != ENOTCONN)) {
- local->op_errno = op_errno;
- }
- if (op_ret == 0) {
- struct statvfs *dict_buf = &local->statvfs_buf;
- dict_buf->f_bsize = stbuf->f_bsize;
- dict_buf->f_frsize = stbuf->f_frsize;
- dict_buf->f_blocks += stbuf->f_blocks;
- dict_buf->f_bfree += stbuf->f_bfree;
- dict_buf->f_bavail += stbuf->f_bavail;
- dict_buf->f_files += stbuf->f_files;
- dict_buf->f_ffree += stbuf->f_ffree;
- dict_buf->f_favail += stbuf->f_favail;
- dict_buf->f_fsid = stbuf->f_fsid;
- dict_buf->f_flag = stbuf->f_flag;
- dict_buf->f_namemax = stbuf->f_namemax;
- local->op_ret = 0;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_UNWIND (statfs, frame, local->op_ret,
- local->op_errno, &local->statvfs_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
-
- trav = this->children;
- priv = this->private;
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- frame->local = local;
-
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND (frame, stripe_statfs_cbk, trav->xlator,
- trav->xlator->fops->statfs, loc, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (statfs, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-
-int32_t
-stripe_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
- if (FIRST_CHILD(this) == prev->this) {
- local->pre_buf = *prebuf;
- local->post_buf = *postbuf;
- }
-
- local->prebuf_blocks += prebuf->ia_blocks;
- local->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, local->fctx, prev);
- correct_file_size(postbuf, local->fctx, prev);
-
- if (local->prebuf_size < prebuf->ia_size)
- local->prebuf_size = prebuf->ia_size;
-
- if (local->postbuf_size < postbuf->ia_size)
- local->postbuf_size = postbuf->ia_size;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->pre_buf.ia_blocks = local->prebuf_blocks;
- local->pre_buf.ia_size = local->prebuf_size;
- local->post_buf.ia_blocks = local->postbuf_blocks;
- local->post_buf.ia_size = local->postbuf_size;
- }
-
- STRIPE_STACK_UNWIND (truncate, frame, local->op_ret,
- local->op_errno, &local->pre_buf,
- &local->post_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = EINVAL;
- int i, eof_idx;
- off_t dest_offset, tmp_offset;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- priv = this->private;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
- if (!fctx) {
- gf_log(this->name, GF_LOG_ERROR, "no stripe context");
- op_errno = EINVAL;
- goto err;
- }
-
- local->fctx = fctx;
- eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
-
- for (i = 0; i < fctx->stripe_count; i++) {
- if (!fctx->xl_array[i]) {
- gf_log(this->name, GF_LOG_ERROR,
- "no xlator at index %d", i);
- op_errno = EINVAL;
- goto err;
- }
-
- if (fctx->stripe_coalesce) {
- /*
- * The node that owns EOF is truncated to the exact
- * coalesced offset. Nodes prior to this index should
- * be rounded up to the size of the complete stripe,
- * while nodes after this index should be rounded down
- * to the size of the previous stripe.
- */
- if (i < eof_idx)
- tmp_offset = roof(offset, fctx->stripe_size *
- fctx->stripe_count);
- else if (i > eof_idx)
- tmp_offset = floor(offset, fctx->stripe_size *
- fctx->stripe_count);
- else
- tmp_offset = offset;
-
- dest_offset = coalesced_offset(tmp_offset,
- fctx->stripe_size, fctx->stripe_count);
- } else {
- dest_offset = offset;
- }
-
- STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
- fctx->xl_array[i]->fops->truncate, loc, dest_offset,
- NULL);
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (truncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-stripe_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (FIRST_CHILD(this) == prev->this) {
- local->pre_buf = *preop;
- local->post_buf = *postop;
- }
-
- local->prebuf_blocks += preop->ia_blocks;
- local->postbuf_blocks += postop->ia_blocks;
-
- correct_file_size(preop, local->fctx, prev);
- correct_file_size(postop, local->fctx, prev);
-
- if (local->prebuf_size < preop->ia_size)
- local->prebuf_size = preop->ia_size;
- if (local->postbuf_size < postop->ia_size)
- local->postbuf_size = postop->ia_size;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->pre_buf.ia_blocks = local->prebuf_blocks;
- local->pre_buf.ia_size = local->prebuf_size;
- local->post_buf.ia_blocks = local->postbuf_blocks;
- local->post_buf.ia_size = local->postbuf_size;
- }
-
- STRIPE_STACK_UNWIND (setattr, frame, local->op_ret,
- local->op_errno, &local->pre_buf,
- &local->post_buf, NULL);
- }
-out:
- return 0;
-}
-
-
-int32_t
-stripe_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- if (!IA_ISDIR (loc->inode->ia_type) &&
- !IA_ISREG (loc->inode->ia_type)) {
- local->call_count = 1;
- STACK_WIND (frame, stripe_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr,
- loc, stbuf, valid, NULL);
- return 0;
- }
-
- if (IA_ISREG(loc->inode->ia_type)) {
- inode_ctx_get(loc->inode, this, (uint64_t *) &fctx);
- if (!fctx)
- goto err;
- local->fctx = fctx;
- }
-
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND (frame, stripe_setattr_cbk,
- trav->xlator, trav->xlator->fops->setattr,
- loc, stbuf, valid, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (setattr, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-stripe_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_setattr_cbk, trav->xlator,
- trav->xlator->fops->fsetattr, fd, stbuf, valid, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_stack_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- local->stbuf.ia_blocks += buf->ia_blocks;
- local->preparent.ia_blocks += preoldparent->ia_blocks;
- local->postparent.ia_blocks += postoldparent->ia_blocks;
- local->pre_buf.ia_blocks += prenewparent->ia_blocks;
- local->post_buf.ia_blocks += postnewparent->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf.ia_size < buf->ia_size)
- local->stbuf.ia_size = buf->ia_size;
-
- if (local->preparent.ia_size < preoldparent->ia_size)
- local->preparent.ia_size = preoldparent->ia_size;
-
- if (local->postparent.ia_size < postoldparent->ia_size)
- local->postparent.ia_size = postoldparent->ia_size;
-
- if (local->pre_buf.ia_size < prenewparent->ia_size)
- local->pre_buf.ia_size = prenewparent->ia_size;
-
- if (local->post_buf.ia_size < postnewparent->ia_size)
- local->post_buf.ia_size = postnewparent->ia_size;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- STRIPE_STACK_UNWIND (rename, frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->preparent,
- &local->postparent, &local->pre_buf,
- &local->post_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_first_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- op_errno = EINVAL;
- goto unwind;
- }
-
- if (op_ret == -1) {
- goto unwind;
- }
-
- local = frame->local;
- trav = this->children;
-
- local->stbuf = *buf;
- local->preparent = *preoldparent;
- local->postparent = *postoldparent;
- local->pre_buf = *prenewparent;
- local->post_buf = *postnewparent;
-
- local->op_ret = 0;
- local->call_count--;
-
- trav = trav->next; /* Skip first child */
- while (trav) {
- STACK_WIND (frame, stripe_stack_rename_cbk,
- trav->xlator, trav->xlator->fops->rename,
- &local->loc, &local->loc2, NULL);
- trav = trav->next;
- }
- return 0;
-
-unwind:
- STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent, NULL);
- return 0;
-}
-
-int32_t
-stripe_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
-{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (oldloc, err);
- VALIDATE_OR_GOTO (oldloc->path, err);
- VALIDATE_OR_GOTO (oldloc->inode, err);
- VALIDATE_OR_GOTO (newloc, err);
-
- priv = this->private;
- trav = this->children;
-
- /* If any one node is down, don't allow rename */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- loc_copy (&local->loc, oldloc);
- loc_copy (&local->loc2, newloc);
-
- local->call_count = priv->child_count;
-
- inode_ctx_get(oldloc->inode, this, (uint64_t *) &fctx);
- if (!fctx)
- goto err;
- local->fctx = fctx;
-
- frame->local = local;
-
- STACK_WIND (frame, stripe_first_rename_cbk, trav->xlator,
- trav->xlator->fops->rename, oldloc, newloc, NULL);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL, NULL);
- return 0;
-}
-int32_t
-stripe_first_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "%s returned %s",
- prev->this->name, strerror (op_errno));
- goto out;
- }
- local->op_ret = 0;
- local->preparent = *preparent;
- local->postparent = *postparent;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- STRIPE_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
- return 0;
-out:
- STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
-
- return 0;
-}
-
-
-
-
-int32_t
-stripe_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "%s returned %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- if (op_errno != ENOENT) {
- local->failed = 1;
- local->op_ret = op_ret;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (callcnt == 1) {
- if (local->failed) {
- op_errno = local->op_errno;
- goto out;
- }
- STACK_WIND(frame, stripe_first_unlink_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->unlink, &local->loc,
- local->xflag, local->xdata);
- }
- return 0;
-out:
- STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
-
- return 0;
-}
-
-int32_t
-stripe_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int xflag, dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Don't unlink a file if a node is down */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- loc_copy (&local->loc, loc);
- local->xflag = xflag;
-
- if (xdata)
- local->xdata = dict_ref (xdata);
-
- frame->local = local;
- local->call_count = priv->child_count;
- trav = trav->next; /* Skip the first child */
-
- while (trav) {
- STACK_WIND (frame, stripe_unlink_cbk,
- trav->xlator, trav->xlator->fops->unlink,
- loc, xflag, xdata);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (unlink, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-stripe_first_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- op_errno = EINVAL;
- goto err;
- }
-
- if (op_ret == -1) {
- goto err;
- }
-
- local = frame->local;
- local->op_ret = 0;
-
- local->call_count--; /* First child successful */
-
- local->preparent = *preparent;
- local->postparent = *postparent;
- local->preparent_size = preparent->ia_size;
- local->postparent_size = postparent->ia_size;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- STRIPE_STACK_UNWIND (rmdir, frame, local->op_ret, local->op_errno,
- &local->preparent, &local->postparent, xdata);
- return 0;
-err:
- STRIPE_STACK_UNWIND (rmdir, frame, op_ret, op_errno, NULL, NULL, NULL);
- return 0;
-
-}
-
-int32_t
-stripe_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "%s returned %s",
- prev->this->name, strerror (op_errno));
- if (op_errno != ENOENT)
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (callcnt == 1) {
- if (local->failed)
- goto out;
- STACK_WIND (frame, stripe_first_rmdir_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->rmdir, &local->loc,
- local->flags, NULL);
- }
- return 0;
-out:
- STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* don't delete a directory if any of the subvolume is down */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- loc_copy (&local->loc, loc);
- local->flags = flags;
- local->call_count = priv->child_count;
- trav = trav->next; /* skip the first child */
-
- while (trav) {
- STACK_WIND (frame, stripe_rmdir_cbk, trav->xlator,
- trav->xlator->fops->rmdir, loc, flags, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (rmdir, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-stripe_mknod_ifreg_fail_unlink_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf,
- &local->preparent, &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-
-/**
- */
-int32_t
-stripe_mknod_ifreg_setxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->op_ret == -1) {
- local->call_count = priv->child_count;
- while (trav) {
- STACK_WIND (frame,
- stripe_mknod_ifreg_fail_unlink_cbk,
- trav->xlator,
- trav->xlator->fops->unlink,
- &local->loc, 0, NULL);
- trav = trav->next;
- }
- return 0;
- }
-
- STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf,
- &local->preparent, &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_mknod_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- local->op_errno = op_errno;
- }
- if (op_ret >= 0) {
- local->op_ret = op_ret;
-
- /* Can be used as a mechanism to understand if mknod
- was successful in at least one place */
- if (uuid_is_null (local->ia_gfid))
- uuid_copy (local->ia_gfid, buf->ia_gfid);
-
- if (stripe_ctx_handle(this, prev, local, xdata))
- gf_log(this->name, GF_LOG_ERROR,
- "Error getting fctx info from dict");
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if ((local->op_ret == -1) && !uuid_is_null (local->ia_gfid)) {
- /* ia_gfid set means, at least on one node 'mknod'
- is successful */
- local->call_count = priv->child_count;
- trav = this->children;
- while (trav) {
- STACK_WIND (frame,
- stripe_mknod_ifreg_fail_unlink_cbk,
- trav->xlator,
- trav->xlator->fops->unlink,
- &local->loc, 0, NULL);
- trav = trav->next;
- }
- return 0;
- }
-
-
- if (local->op_ret != -1) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- inode_ctx_put (local->inode, this,
- (uint64_t)(long) local->fctx);
-
- }
- STRIPE_STACK_UNWIND (mknod, frame, local->op_ret, local->op_errno,
- local->inode, &local->stbuf,
- &local->preparent, &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-
-int32_t
-stripe_mknod_first_ifreg_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
- int i = 1;
- dict_t *dict = NULL;
- int ret = 0;
- int need_unref = 0;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
- trav = this->children;
-
- local->call_count--;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->failed = 1;
- local->op_errno = op_errno;
- goto out;
- }
-
- local->op_ret = op_ret;
-
- local->stbuf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
-
- if (uuid_is_null (local->ia_gfid))
- uuid_copy (local->ia_gfid, buf->ia_gfid);
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
-
- trav = trav->next;
- while (trav) {
- if (priv->xattr_supported) {
- dict = dict_new ();
- if (!dict) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to allocate dict %s", local->loc.path);
- }
- need_unref = 1;
-
- dict_copy (local->xattr, dict);
-
- ret = stripe_xattr_request_build (this, dict,
- local->stripe_size,
- priv->child_count, i,
- priv->coalesce);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to build xattr request");
-
- } else {
- dict = local->xattr;
- }
-
- STACK_WIND (frame, stripe_mknod_ifreg_cbk,
- trav->xlator, trav->xlator->fops->mknod,
- &local->loc, local->mode, local->rdev, 0, dict);
- trav = trav->next;
- i++;
-
- if (dict && need_unref)
- dict_unref (dict);
- }
-
- return 0;
-
-out:
-
- STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-stripe_single_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- STRIPE_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
-}
-
-
-int
-stripe_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata)
-{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- int32_t op_errno = EINVAL;
- int32_t i = 0;
- dict_t *dict = NULL;
- int ret = 0;
- int need_unref = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- priv = this->private;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- if (S_ISREG(mode)) {
- /* NOTE: on older kernels (older than 2.6.9),
- creat() fops is sent as mknod() + open(). Hence handling
- S_IFREG files is necessary */
- if (priv->nodes_down) {
- gf_log (this->name, GF_LOG_WARNING,
- "Some node down, returning EIO");
- op_errno = EIO;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs (loc->path, priv);
- frame->local = local;
- local->inode = inode_ref (loc->inode);
- loc_copy (&local->loc, loc);
- local->xattr = dict_copy_with_ref (xdata, NULL);
- local->mode = mode;
- local->umask = umask;
- local->rdev = rdev;
-
- /* Everytime in stripe lookup, all child nodes should
- be looked up */
- local->call_count = priv->child_count;
-
- if (priv->xattr_supported) {
- dict = dict_new ();
- if (!dict) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to allocate dict %s", loc->path);
- }
- need_unref = 1;
-
- dict_copy (xdata, dict);
-
- ret = stripe_xattr_request_build (this, dict,
- local->stripe_size,
- priv->child_count,
- i, priv->coalesce);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "failed to build xattr request");
- } else {
- dict = xdata;
- }
-
- STACK_WIND (frame, stripe_mknod_first_ifreg_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->mknod,
- loc, mode, rdev, umask, dict);
-
- if (dict && need_unref)
- dict_unref (dict);
- return 0;
- }
-
- STACK_WIND (frame, stripe_single_mknod_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, umask, xdata);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-stripe_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed != -1) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
- STRIPE_STACK_UNWIND (mkdir, frame, local->op_ret,
- local->op_errno, local->inode,
- &local->stbuf, &local->preparent,
- &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-
-int32_t
-stripe_first_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
- trav = this->children;
-
- local->call_count--; /* first child is successful */
- trav = trav->next; /* skip first child */
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- goto out;
- }
-
- local->op_ret = 0;
-
- local->inode = inode_ref (inode);
- local->stbuf = *buf;
- local->postparent = *postparent;
- local->preparent = *preparent;
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- local->stbuf_size = buf->ia_size;
- local->preparent_size = preparent->ia_size;
- local->postparent_size = postparent->ia_size;
-
- while (trav) {
- STACK_WIND (frame, stripe_mkdir_cbk, trav->xlator,
- trav->xlator->fops->mkdir, &local->loc, local->mode,
- local->umask, local->xdata);
- trav = trav->next;
- }
- return 0;
-out:
- STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
-
- return 0;
-
-}
-
-
-int
-stripe_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
-{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- local->call_count = priv->child_count;
- if (xdata)
- local->xdata = dict_ref (xdata);
- local->mode = mode;
- local->umask = umask;
- loc_copy (&local->loc, loc);
- frame->local = local;
-
- /* Everytime in stripe lookup, all child nodes should be looked up */
- STACK_WIND (frame, stripe_first_mkdir_cbk, trav->xlator,
- trav->xlator->fops->mkdir, loc, mode, umask, xdata);
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-stripe_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
- stripe_fd_ctx_t *fctx = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- inode_ctx_get(inode, this, (uint64_t *) &fctx);
- if (!fctx) {
- gf_log(this->name, GF_LOG_ERROR, "failed to get stripe "
- "context");
- op_ret = -1;
- op_errno = EINVAL;
- }
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- if (FIRST_CHILD(this) == prev->this) {
- local->inode = inode_ref (inode);
- local->stbuf = *buf;
- local->postparent = *postparent;
- local->preparent = *preparent;
- }
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- correct_file_size(buf, fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
- STRIPE_STACK_UNWIND (link, frame, local->op_ret,
- local->op_errno, local->inode,
- &local->stbuf, &local->preparent,
- &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (oldloc, err);
- VALIDATE_OR_GOTO (oldloc->path, err);
- VALIDATE_OR_GOTO (oldloc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* If any one node is down, don't allow link operation */
- if (priv->nodes_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- /* Everytime in stripe lookup, all child
- nodes should be looked up */
- while (trav) {
- STACK_WIND (frame, stripe_link_cbk,
- trav->xlator, trav->xlator->fops->link,
- oldloc, newloc, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_create_fail_unlink_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_UNWIND (create, frame, local->op_ret, local->op_errno,
- local->fd, local->inode, &local->stbuf,
- &local->preparent, &local->postparent, NULL);
- }
-out:
- return 0;
-}
-
-
-int32_t
-stripe_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- if (IA_ISREG(buf->ia_type)) {
- if (stripe_ctx_handle(this, prev, local, xdata))
- gf_log(this->name, GF_LOG_ERROR,
- "Error getting fctx info from "
- "dict");
- }
-
- local->op_ret = op_ret;
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret == -1) {
- local->call_count = priv->child_count;
- trav = this->children;
- while (trav) {
- STACK_WIND (frame,
- stripe_create_fail_unlink_cbk,
- trav->xlator,
- trav->xlator->fops->unlink,
- &local->loc, 0, NULL);
- trav = trav->next;
- }
-
- return 0;
- }
-
- if (local->op_ret >= 0) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
-
- stripe_copy_xl_array(local->fctx->xl_array,
- priv->xl_array,
- local->fctx->stripe_count);
- inode_ctx_put(local->inode, this,
- (uint64_t) local->fctx);
- }
-
- /* Create itself has failed.. so return
- without setxattring */
- STRIPE_STACK_UNWIND (create, frame, local->op_ret,
- local->op_errno, local->fd,
- local->inode, &local->stbuf,
- &local->preparent, &local->postparent, NULL);
- }
-
-out:
- return 0;
-}
-
-
-
-int32_t
-stripe_first_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- call_frame_t *prev = NULL;
- xlator_list_t *trav = NULL;
- int i = 1;
- dict_t *dict = NULL;
- loc_t *loc = NULL;
- int32_t need_unref = 0;
- int32_t ret = -1;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- priv = this->private;
- local = frame->local;
- trav = this->children;
- loc = &local->loc;
-
- --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- local->op_ret = 0;
- /* Get the mapping in inode private */
- /* Get the stat buf right */
- local->stbuf = *buf;
- local->preparent = *preparent;
- local->postparent = *postparent;
-
- local->stbuf_blocks += buf->ia_blocks;
- local->preparent_blocks += preparent->ia_blocks;
- local->postparent_blocks += postparent->ia_blocks;
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- if (local->preparent_size < preparent->ia_size)
- local->preparent_size = preparent->ia_size;
- if (local->postparent_size < postparent->ia_size)
- local->postparent_size = postparent->ia_size;
-
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret == -1) {
- local->call_count = 1;
- STACK_WIND (frame, stripe_create_fail_unlink_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->unlink,
- &local->loc, 0, NULL);
- return 0;
- }
-
- if (local->op_ret >= 0) {
- local->preparent.ia_blocks = local->preparent_blocks;
- local->preparent.ia_size = local->preparent_size;
- local->postparent.ia_blocks = local->postparent_blocks;
- local->postparent.ia_size = local->postparent_size;
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
-
- /* Send a setxattr request to nodes where the
- files are created */
- trav = trav->next;
- while (trav) {
- if (priv->xattr_supported) {
- dict = dict_new ();
- if (!dict) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to allocate dict %s", loc->path);
- }
- need_unref = 1;
-
- dict_copy (local->xattr, dict);
-
- ret = stripe_xattr_request_build (this, dict,
- local->stripe_size,
- priv->child_count,
- i, priv->coalesce);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "failed to build xattr request");
- } else {
- dict = local->xattr;
- }
-
- STACK_WIND (frame, stripe_create_cbk, trav->xlator,
- trav->xlator->fops->create, &local->loc,
- local->flags, local->mode, local->umask, local->fd,
- dict);
- trav = trav->next;
- if (need_unref && dict)
- dict_unref (dict);
- i++;
- }
-
-out:
- return 0;
-}
-
-
-
-/**
- * stripe_create - If a block-size is specified for the 'name', create the
- * file in all the child nodes. If not, create it in only first child.
- *
- * @name- complete path of the file to be created.
- */
-int32_t
-stripe_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
-{
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- int32_t op_errno = EINVAL;
- int ret = 0;
- int need_unref = 0;
- int i = 0;
- dict_t *dict = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- priv = this->private;
-
- /* files created in O_APPEND mode does not allow lseek() on fd */
- flags &= ~O_APPEND;
-
- if (priv->first_child_down || priv->nodes_down) {
- gf_log (this->name, GF_LOG_DEBUG,
- "First node down, returning EIO");
- op_errno = EIO;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- local->op_errno = ENOTCONN;
- local->stripe_size = stripe_get_matching_bs (loc->path, priv);
- frame->local = local;
- local->inode = inode_ref (loc->inode);
- loc_copy (&local->loc, loc);
- local->fd = fd_ref (fd);
- local->flags = flags;
- local->mode = mode;
- local->umask = umask;
- if (xdata)
- local->xattr = dict_ref (xdata);
-
- local->call_count = priv->child_count;
- /* Send a setxattr request to nodes where the
- files are created */
-
- if (priv->xattr_supported) {
- dict = dict_new ();
- if (!dict) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to allocate dict %s", loc->path);
- }
- need_unref = 1;
-
- dict_copy (xdata, dict);
-
- ret = stripe_xattr_request_build (this, dict,
- local->stripe_size,
- priv->child_count,
- i, priv->coalesce);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "failed to build xattr request");
- } else {
- dict = xdata;
- }
-
-
- STACK_WIND (frame, stripe_first_create_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->create, loc, flags, mode,
- umask, fd, dict);
-
- if (need_unref && dict)
- dict_unref (dict);
-
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL, xdata);
- return 0;
-}
-
-int32_t
-stripe_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0)
- local->op_ret = op_ret;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- STRIPE_STACK_UNWIND (open, frame, local->op_ret,
- local->op_errno, local->fd, xdata);
- }
-out:
- return 0;
-}
-
-
-/**
- * stripe_open -
- */
-int32_t
-stripe_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- /* files opened in O_APPEND mode does not allow lseek() on fd */
- flags &= ~O_APPEND;
-
- local->fd = fd_ref (fd);
- frame->local = local;
- loc_copy (&local->loc, loc);
-
- /* Striped files */
- local->flags = flags;
- local->call_count = priv->child_count;
- local->stripe_size = stripe_get_matching_bs (loc->path, priv);
-
- while (trav) {
- STACK_WIND (frame, stripe_open_cbk, trav->xlator,
- trav->xlator->fops->open,
- &local->loc, local->flags, local->fd,
- xdata);
- trav = trav->next;
- }
- return 0;
-err:
- STRIPE_STACK_UNWIND (open, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-stripe_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_ret = -1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0)
- local->op_ret = op_ret;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STRIPE_STACK_UNWIND (opendir, frame, local->op_ret,
- local->op_errno, local->fd, NULL);
- }
-out:
- return 0;
-}
-
-
-int32_t
-stripe_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
-{
- xlator_list_t *trav = NULL;
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
- local->call_count = priv->child_count;
- local->fd = fd_ref (fd);
-
- while (trav) {
- STACK_WIND (frame, stripe_opendir_cbk, trav->xlator,
- trav->xlator->fops->opendir, loc, fd, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (opendir, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- }
- if (op_ret >= 0) {
- if (FIRST_CHILD(this) == prev->this) {
- /* First successful call, copy the *lock */
- local->op_ret = op_ret;
- local->lock = *lock;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
- STRIPE_STACK_UNWIND (lk, frame, local->op_ret,
- local->op_errno, &local->lock, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- trav = this->children;
- priv = this->private;
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_lk_cbk, trav->xlator,
- trav->xlator->fops->lk, fd, cmd, lock, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (lk, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-stripe_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- }
- if (op_ret >= 0)
- local->op_ret = op_ret;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- STRIPE_STACK_UNWIND (flush, frame, local->op_ret,
- local->op_errno, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_flush_cbk, trav->xlator,
- trav->xlator->fops->flush, fd, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (flush, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-
-int32_t
-stripe_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- }
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (FIRST_CHILD(this) == prev->this) {
- local->pre_buf = *prebuf;
- local->post_buf = *postbuf;
- }
- local->prebuf_blocks += prebuf->ia_blocks;
- local->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, local->fctx, prev);
- correct_file_size(postbuf, local->fctx, prev);
-
- if (local->prebuf_size < prebuf->ia_size)
- local->prebuf_size = prebuf->ia_size;
-
- if (local->postbuf_size < postbuf->ia_size)
- local->postbuf_size = postbuf->ia_size;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->pre_buf.ia_blocks = local->prebuf_blocks;
- local->pre_buf.ia_size = local->prebuf_size;
- local->post_buf.ia_blocks = local->postbuf_blocks;
- local->post_buf.ia_size = local->postbuf_size;
- }
-
- STRIPE_STACK_UNWIND (fsync, frame, local->op_ret,
- local->op_errno, &local->pre_buf,
- &local->post_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
- if (!fctx) {
- op_errno = EINVAL;
- goto err;
- }
- local->fctx = fctx;
-
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_fsync_cbk, trav->xlator,
- trav->xlator->fops->fsync, fd, flags, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (fsync, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-int32_t
-stripe_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (FIRST_CHILD(this) == prev->this)
- local->stbuf = *buf;
-
- local->stbuf_blocks += buf->ia_blocks;
-
- correct_file_size(buf, local->fctx, prev);
-
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- if (local->op_ret != -1) {
- local->stbuf.ia_size = local->stbuf_size;
- local->stbuf.ia_blocks = local->stbuf_blocks;
- }
-
- STRIPE_STACK_UNWIND (fstat, frame, local->op_ret,
- local->op_errno, &local->stbuf, NULL);
- }
-
-out:
- return 0;
-}
-
-int32_t
-stripe_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- if (IA_ISREG(fd->inode->ia_type)) {
- inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
- if (!fctx)
- goto err;
- local->fctx = fctx;
- }
-
- while (trav) {
- STACK_WIND (frame, stripe_fstat_cbk, trav->xlator,
- trav->xlator->fops->fstat, fd, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (fstat, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-stripe_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int i, eof_idx;
- off_t dest_offset, tmp_offset;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- priv = this->private;
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- inode_ctx_get(fd->inode, this, (uint64_t *) &fctx);
- if (!fctx) {
- gf_log(this->name, GF_LOG_ERROR, "no stripe context");
- op_errno = EINVAL;
- goto err;
- }
- if (!fctx->stripe_count) {
- gf_log(this->name, GF_LOG_ERROR, "no stripe count");
- op_errno = EINVAL;
- goto err;
- }
-
- local->fctx = fctx;
- eof_idx = (offset / fctx->stripe_size) % fctx->stripe_count;
-
- for (i = 0; i < fctx->stripe_count; i++) {
- if (!fctx->xl_array[i]) {
- gf_log(this->name, GF_LOG_ERROR, "no xlator at index "
- "%d", i);
- op_errno = EINVAL;
- goto err;
- }
-
- if (fctx->stripe_coalesce) {
- if (i < eof_idx)
- tmp_offset = roof(offset, fctx->stripe_size *
- fctx->stripe_count);
- else if (i > eof_idx)
- tmp_offset = floor(offset, fctx->stripe_size *
- fctx->stripe_count);
- else
- tmp_offset = offset;
-
- dest_offset = coalesced_offset(tmp_offset,
- fctx->stripe_size, fctx->stripe_count);
- } else {
- dest_offset = offset;
- }
-
- STACK_WIND(frame, stripe_truncate_cbk, fctx->xl_array[i],
- fctx->xl_array[i]->fops->ftruncate, fd, dest_offset,
- NULL);
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-stripe_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- if ((op_errno != ENOENT) ||
- (prev->this == FIRST_CHILD (this)))
- local->failed = 1;
- }
- if (op_ret >= 0)
- local->op_ret = op_ret;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed)
- local->op_ret = -1;
-
- STRIPE_STACK_UNWIND (fsyncdir, frame, local->op_ret,
- local->op_errno, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int32_t op_errno = 1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- local->call_count = priv->child_count;
-
- while (trav) {
- STACK_WIND (frame, stripe_fsyncdir_cbk, trav->xlator,
- trav->xlator->fops->fsyncdir, fd, flags, NULL);
- trav = trav->next;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (fsyncdir, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-int32_t
-stripe_readv_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
-{
- int32_t i = 0;
- int32_t callcnt = 0;
- int32_t count = 0;
- stripe_local_t *local = NULL;
- struct iovec *vec = NULL;
- struct iatt tmp_stbuf = {0,};
- struct iobref *tmp_iobref = NULL;
- struct iobuf *iobuf = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- local = frame->local;
- prev = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret != -1) {
- correct_file_size(buf, local->fctx, prev);
- if (local->stbuf_size < buf->ia_size)
- local->stbuf_size = buf->ia_size;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- op_ret = 0;
-
- /* Keep extra space for filling in '\0's */
- vec = GF_CALLOC ((local->count * 2), sizeof (struct iovec),
- gf_stripe_mt_iovec);
- if (!vec) {
- op_ret = -1;
- goto done;
- }
-
- for (i = 0; i < local->wind_count; i++) {
- if (local->replies[i].op_ret) {
- memcpy ((vec + count), local->replies[i].vector,
- (local->replies[i].count * sizeof (struct iovec)));
- count += local->replies[i].count;
- op_ret += local->replies[i].op_ret;
- }
- if ((local->replies[i].op_ret <
- local->replies[i].requested_size) &&
- (local->stbuf_size > (local->offset + op_ret))) {
- /* Fill in 0s here */
- vec[count].iov_len =
- (local->replies[i].requested_size -
- local->replies[i].op_ret);
- iobuf = iobuf_get2 (this->ctx->iobuf_pool,
- vec[count].iov_len);
- if (!iobuf) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- op_ret = -1;
- op_errno = ENOMEM;
- goto done;
- }
- memset (iobuf->ptr, 0, vec[count].iov_len);
- vec[count].iov_base = iobuf->ptr;
-
- iobref_add (local->iobref, iobuf);
- iobuf_unref(iobuf);
-
- op_ret += vec[count].iov_len;
- count++;
- }
- GF_FREE (local->replies[i].vector);
- }
-
- /* FIXME: notice that st_ino, and st_dev (gen) will be
- * different than what inode will have. Make sure this doesn't
- * cause any bugs at higher levels */
- memcpy (&tmp_stbuf, &local->replies[0].stbuf,
- sizeof (struct iatt));
- tmp_stbuf.ia_size = local->stbuf_size;
-
- done:
- GF_FREE (local->replies);
- tmp_iobref = local->iobref;
- STRIPE_STACK_UNWIND (readv, frame, op_ret, op_errno, vec,
- count, &tmp_stbuf, tmp_iobref, NULL);
-
- iobref_unref (tmp_iobref);
- if (vec)
- GF_FREE (vec);
- }
-out:
- return 0;
-}
-
-/**
- * stripe_readv_cbk - get all the striped reads, and order it properly, send it
- * to above layer after putting it in a single vector.
- */
-int32_t
-stripe_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int32_t final_count = 0;
- int32_t need_to_check_proper_size = 0;
- call_frame_t *mframe = NULL;
- stripe_local_t *mlocal = NULL;
- stripe_local_t *local = NULL;
- struct iovec *final_vec = NULL;
- struct iatt tmp_stbuf = {0,};
- struct iatt *tmp_stbuf_p = NULL; //need it for a warning
- struct iobref *tmp_iobref = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto end;
- }
-
- local = frame->local;
- index = local->node_index;
- prev = cookie;
- mframe = local->orig_frame;
- if (!mframe)
- goto out;
-
- mlocal = mframe->local;
- if (!mlocal)
- goto out;
-
- fctx = mlocal->fctx;
-
- LOCK (&mframe->lock);
- {
- mlocal->replies[index].op_ret = op_ret;
- mlocal->replies[index].op_errno = op_errno;
- mlocal->replies[index].requested_size = local->readv_size;
- if (op_ret >= 0) {
- mlocal->replies[index].stbuf = *stbuf;
- mlocal->replies[index].count = count;
- mlocal->replies[index].vector = iov_dup (vector, count);
-
- correct_file_size(stbuf, fctx, prev);
-
- if (local->stbuf_size < stbuf->ia_size)
- local->stbuf_size = stbuf->ia_size;
- local->stbuf_blocks += stbuf->ia_blocks;
-
- if (!mlocal->iobref)
- mlocal->iobref = iobref_new ();
- iobref_merge (mlocal->iobref, iobref);
- }
- callcnt = ++mlocal->call_count;
- }
- UNLOCK(&mframe->lock);
-
- if (callcnt == mlocal->wind_count) {
- op_ret = 0;
-
- for (index=0; index < mlocal->wind_count; index++) {
- /* check whether each stripe returned
- * 'expected' number of bytes */
- if (mlocal->replies[index].op_ret == -1) {
- op_ret = -1;
- op_errno = mlocal->replies[index].op_errno;
- break;
- }
- /* TODO: handle the 'holes' within the read range
- properly */
- if (mlocal->replies[index].op_ret <
- mlocal->replies[index].requested_size) {
- need_to_check_proper_size = 1;
- }
-
- op_ret += mlocal->replies[index].op_ret;
- mlocal->count += mlocal->replies[index].count;
- }
- if (op_ret == -1)
- goto done;
- if (need_to_check_proper_size)
- goto check_size;
-
- final_vec = GF_CALLOC (mlocal->count, sizeof (struct iovec),
- gf_stripe_mt_iovec);
-
- if (!final_vec) {
- op_ret = -1;
- goto done;
- }
-
- for (index = 0; index < mlocal->wind_count; index++) {
- memcpy ((final_vec + final_count),
- mlocal->replies[index].vector,
- (mlocal->replies[index].count *
- sizeof (struct iovec)));
- final_count += mlocal->replies[index].count;
- GF_FREE (mlocal->replies[index].vector);
- }
-
- /* FIXME: notice that st_ino, and st_dev (gen) will be
- * different than what inode will have. Make sure this doesn't
- * cause any bugs at higher levels */
- memcpy (&tmp_stbuf, &mlocal->replies[0].stbuf,
- sizeof (struct iatt));
- tmp_stbuf.ia_size = local->stbuf_size;
- tmp_stbuf.ia_blocks = local->stbuf_blocks;
-
- done:
- /* */
- GF_FREE (mlocal->replies);
- tmp_iobref = mlocal->iobref;
- /* work around for nfs truncated read. Bug 3774 */
- tmp_stbuf_p = &tmp_stbuf;
- WIPE (tmp_stbuf_p);
- STRIPE_STACK_UNWIND (readv, mframe, op_ret, op_errno, final_vec,
- final_count, &tmp_stbuf, tmp_iobref, NULL);
-
- iobref_unref (tmp_iobref);
- if (final_vec)
- GF_FREE (final_vec);
- }
-
- goto out;
-
-check_size:
- mlocal->call_count = fctx->stripe_count;
-
- for (index = 0; index < fctx->stripe_count; index++) {
- STACK_WIND (mframe, stripe_readv_fstat_cbk,
- (fctx->xl_array[index]),
- (fctx->xl_array[index])->fops->fstat,
- mlocal->fd, NULL);
- }
-
-out:
- STRIPE_STACK_DESTROY (frame);
-end:
- return 0;
-}
-
-
-int32_t
-stripe_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset, uint32_t flags, dict_t *xdata)
-{
- int32_t op_errno = EINVAL;
- int32_t idx = 0;
- int32_t index = 0;
- int32_t num_stripe = 0;
- int32_t off_index = 0;
- size_t frame_size = 0;
- off_t rounded_end = 0;
- uint64_t tmp_fctx = 0;
- uint64_t stripe_size = 0;
- off_t rounded_start = 0;
- off_t frame_offset = offset;
- off_t dest_offset = 0;
- stripe_local_t *local = NULL;
- call_frame_t *rframe = NULL;
- stripe_local_t *rlocal = NULL;
- stripe_fd_ctx_t *fctx = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- inode_ctx_get (fd->inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- op_errno = EBADFD;
- goto err;
- }
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
- stripe_size = fctx->stripe_size;
-
- STRIPE_VALIDATE_FCTX (fctx, err);
-
- if (!stripe_size) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Wrong stripe size for the file");
- goto err;
- }
- /* The file is stripe across the child nodes. Send the read request
- * to the child nodes appropriately after checking which region of
- * the file is in which child node. Always '0-<stripe_size>' part of
- * the file resides in the first child.
- */
- rounded_start = floor (offset, stripe_size);
- rounded_end = roof (offset+size, stripe_size);
- num_stripe = (rounded_end- rounded_start)/stripe_size;
-
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
-
- /* This is where all the vectors should be copied. */
- local->replies = GF_CALLOC (num_stripe, sizeof (struct readv_replies),
- gf_stripe_mt_readv_replies);
- if (!local->replies) {
- op_errno = ENOMEM;
- goto err;
- }
-
- off_index = (offset / stripe_size) % fctx->stripe_count;
- local->wind_count = num_stripe;
- local->readv_size = size;
- local->offset = offset;
- local->fd = fd_ref (fd);
- local->fctx = fctx;
-
- for (index = off_index; index < (num_stripe + off_index); index++) {
- rframe = copy_frame (frame);
- rlocal = mem_get0 (this->local_pool);
- if (!rlocal) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame_size = min (roof (frame_offset+1, stripe_size),
- (offset + size)) - frame_offset;
-
- rlocal->node_index = index - off_index;
- rlocal->orig_frame = frame;
- rlocal->readv_size = frame_size;
- rframe->local = rlocal;
- idx = (index % fctx->stripe_count);
-
- if (fctx->stripe_coalesce)
- dest_offset = coalesced_offset(frame_offset,
- stripe_size, fctx->stripe_count);
- else
- dest_offset = frame_offset;
-
- STACK_WIND (rframe, stripe_readv_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->readv,
- fd, frame_size, dest_offset, flags, xdata);
-
- frame_offset += frame_size;
- }
-
- return 0;
-err:
- if (rframe)
- STRIPE_STACK_DESTROY (rframe);
-
- STRIPE_STACK_UNWIND (readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-stripe_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- int32_t callcnt = 0;
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
-
- prev = cookie;
- local = frame->local;
-
- LOCK(&frame->lock);
- {
- callcnt = ++local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- local->op_ret = -1;
- }
- if (op_ret >= 0) {
- local->op_ret += op_ret;
- local->post_buf = *postbuf;
- local->pre_buf = *prebuf;
-
- local->prebuf_blocks += prebuf->ia_blocks;
- local->postbuf_blocks += postbuf->ia_blocks;
-
- correct_file_size(prebuf, local->fctx, prev);
- correct_file_size(postbuf, local->fctx, prev);
-
- if (local->prebuf_size < prebuf->ia_size)
- local->prebuf_size = prebuf->ia_size;
- if (local->postbuf_size < postbuf->ia_size)
- local->postbuf_size = postbuf->ia_size;
- }
- }
- UNLOCK (&frame->lock);
-
- if ((callcnt == local->wind_count) && local->unwind) {
- local->pre_buf.ia_size = local->prebuf_size;
- local->pre_buf.ia_blocks = local->prebuf_blocks;
- local->post_buf.ia_size = local->postbuf_size;
- local->post_buf.ia_blocks = local->postbuf_blocks;
-
- STRIPE_STACK_UNWIND (writev, frame, local->op_ret,
- local->op_errno, &local->pre_buf,
- &local->post_buf, NULL);
- }
-out:
- return 0;
-}
-
-int32_t
-stripe_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- uint32_t flags, struct iobref *iobref, dict_t *xdata)
-{
- struct iovec *tmp_vec = NULL;
- stripe_local_t *local = NULL;
- stripe_fd_ctx_t *fctx = NULL;
- int32_t op_errno = 1;
- int32_t idx = 0;
- int32_t total_size = 0;
- int32_t offset_offset = 0;
- int32_t remaining_size = 0;
- int32_t tmp_count = count;
- off_t fill_size = 0;
- uint64_t stripe_size = 0;
- uint64_t tmp_fctx = 0;
- off_t dest_offset = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
- VALIDATE_OR_GOTO (fd->inode, err);
-
- inode_ctx_get (fd->inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- op_errno = EINVAL;
- goto err;
- }
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
- stripe_size = fctx->stripe_size;
-
- STRIPE_VALIDATE_FCTX (fctx, err);
-
- /* File has to be stripped across the child nodes */
- for (idx = 0; idx< count; idx ++) {
- total_size += vector[idx].iov_len;
- }
- remaining_size = total_size;
-
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- frame->local = local;
- local->stripe_size = stripe_size;
- local->fctx = fctx;
-
- if (!stripe_size) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Wrong stripe size for the file");
- op_errno = EINVAL;
- goto err;
- }
-
- while (1) {
- /* Send striped chunk of the vector to child
- nodes appropriately. */
- idx = (((offset + offset_offset) /
- local->stripe_size) % fctx->stripe_count);
-
- fill_size = (local->stripe_size -
- ((offset + offset_offset) % local->stripe_size));
- if (fill_size > remaining_size)
- fill_size = remaining_size;
-
- remaining_size -= fill_size;
-
- tmp_count = iov_subset (vector, count, offset_offset,
- offset_offset + fill_size, NULL);
- tmp_vec = GF_CALLOC (tmp_count, sizeof (struct iovec),
- gf_stripe_mt_iovec);
- if (!tmp_vec) {
- op_errno = ENOMEM;
- goto err;
- }
- tmp_count = iov_subset (vector, count, offset_offset,
- offset_offset + fill_size, tmp_vec);
-
- local->wind_count++;
- if (remaining_size == 0)
- local->unwind = 1;
-
- if (fctx->stripe_coalesce)
- dest_offset = coalesced_offset(offset + offset_offset,
- local->stripe_size, fctx->stripe_count);
- else
- dest_offset = offset + offset_offset;
-
- STACK_WIND (frame, stripe_writev_cbk, fctx->xl_array[idx],
- fctx->xl_array[idx]->fops->writev, fd, tmp_vec,
- tmp_count, dest_offset, flags, iobref,
- xdata);
-
- GF_FREE (tmp_vec);
- offset_offset += fill_size;
- if (remaining_size == 0)
- break;
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
-
-
-int32_t
-stripe_release (xlator_t *this, fd_t *fd)
-{
- return 0;
-}
-
-int
-stripe_forget (xlator_t *this, inode_t *inode)
-{
- uint64_t tmp_fctx = 0;
- stripe_fd_ctx_t *fctx = NULL;
-
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (inode, err);
-
- (void) inode_ctx_del (inode, this, &tmp_fctx);
- if (!tmp_fctx) {
- goto err;
- }
-
- fctx = (stripe_fd_ctx_t *)(long)tmp_fctx;
-
- if (!fctx->static_array)
- GF_FREE (fctx->xl_array);
-
- GF_FREE (fctx);
-err:
- return 0;
-}
-
-int32_t
-notify (xlator_t *this, int32_t event, void *data, ...)
-{
- stripe_private_t *priv = NULL;
- int down_client = 0;
- int i = 0;
-
- if (!this)
- return 0;
-
- priv = this->private;
- if (!priv)
- return 0;
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- case GF_EVENT_CHILD_CONNECTING:
- {
- /* get an index number to set */
- for (i = 0; i < priv->child_count; i++) {
- if (data == priv->xl_array[i])
- break;
- }
- priv->state[i] = 1;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->state[i])
- down_client++;
- }
-
- LOCK (&priv->lock);
- {
- priv->nodes_down = down_client;
- if (data == FIRST_CHILD (this))
- priv->first_child_down = 0;
- if (!priv->nodes_down)
- default_notify (this, event, data);
- }
- UNLOCK (&priv->lock);
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- /* get an index number to set */
- for (i = 0; i < priv->child_count; i++) {
- if (data == priv->xl_array[i])
- break;
- }
- priv->state[i] = 0;
- for (i = 0; i < priv->child_count; i++) {
- if (!priv->state[i])
- down_client++;
- }
-
- LOCK (&priv->lock);
- {
- priv->nodes_down = down_client;
-
- if (data == FIRST_CHILD (this))
- priv->first_child_down = 1;
- if (priv->nodes_down)
- default_notify (this, event, data);
- }
- UNLOCK (&priv->lock);
- }
- break;
-
- default:
- {
- /* */
- default_notify (this, event, data);
- }
- break;
- }
-
- return 0;
-}
-
-int
-stripe_setxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
-{
- int ret = -1;
- int call_cnt = 0;
- stripe_local_t *local = NULL;
-
- if (!frame || !frame->local || !this) {
- gf_log ("", GF_LOG_ERROR, "Possible NULL deref");
- return ret;
- }
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- call_cnt = --local->wind_count;
-
- /**
- * We overwrite ->op_* values here for subsequent faliure
- * conditions, hence we propogate the last errno down the
- * stack.
- */
- if (op_ret < 0) {
- local->op_ret = op_ret;
- local->op_errno = op_errno;
- goto unlock;
- }
- }
-
- unlock:
- UNLOCK (&frame->lock);
-
- if (!call_cnt) {
- STRIPE_STACK_UNWIND (setxattr, frame, local->op_ret,
- local->op_errno, xdata);
- }
-
- return 0;
-}
-
-int
-stripe_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
-{
- data_pair_t *pair = NULL;
- int32_t op_errno = EINVAL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- stripe_local_t *local = NULL;
- int i = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict,
- pair, op_errno, err);
-
- priv = this->private;
- trav = this->children;
-
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame->local = local;
- local->wind_count = priv->child_count;
- local->op_ret = local->op_errno = 0;
-
- /**
- * Set xattrs for directories on all subvolumes. Additionally
- * this power is only given to a special client.
- */
- if ((frame->root->pid == -1) && IA_ISDIR (loc->inode->ia_type)) {
- for (i = 0; i < priv->child_count; i++, trav = trav->next) {
- STACK_WIND (frame, stripe_setxattr_cbk,
- trav->xlator, trav->xlator->fops->setxattr,
- loc, dict, flags, xdata);
- }
- } else {
- local->wind_count = 1;
- STACK_WIND (frame, stripe_setxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags, xdata);
- }
-
- return 0;
-err:
- STRIPE_STACK_UNWIND (setxattr, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-int
-stripe_fsetxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
-{
- STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int
-stripe_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int flags, dict_t *xdata)
-{
- data_pair_t *trav = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.*stripe*", dict,
- trav, op_errno, err);
-
- STACK_WIND (frame, stripe_fsetxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr,
- fd, dict, flags, xdata);
- return 0;
- err:
- STRIPE_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL);
- return 0;
-}
-
-int
-stripe_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- STRIPE_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int
-stripe_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
-{
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (this, err);
-
- GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*",
- name, op_errno, err);
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (loc, err);
-
- STACK_WIND (frame, stripe_removexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- loc, name, xdata);
- return 0;
-err:
- STRIPE_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
- return 0;
-}
-
-
-int
-stripe_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int
-stripe_fremovexattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata)
-{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- GF_IF_NATIVE_XATTR_GOTO ("trusted.*stripe*",
- name, op_errno, err);
-
- STACK_WIND (frame, stripe_fremovexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr,
- fd, name, xdata);
- return 0;
- err:
- STRIPE_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-int32_t
-stripe_readdirp_lookup_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno,
- inode_t *inode, struct iatt *stbuf,
- dict_t *xattr, struct iatt *parent)
-{
- stripe_local_t *local = NULL;
- call_frame_t *main_frame = NULL;
- stripe_local_t *main_local = NULL;
- gf_dirent_t *entry = NULL;
- call_frame_t *prev = NULL;
- int done = 0;
-
- local = frame->local;
- prev = cookie;
-
- entry = local->dirent;
-
- main_frame = local->orig_frame;
- main_local = main_frame->local;
- LOCK (&frame->lock);
- {
-
- local->call_count--;
- if (!local->call_count)
- done = 1;
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->op_ret = op_ret;
- goto unlock;
- }
-
- if (stripe_ctx_handle(this, prev, local, xattr))
- gf_log(this->name, GF_LOG_ERROR,
- "Error getting fctx info from dict.");
-
- correct_file_size(stbuf, local->fctx, prev);
-
- stripe_iatt_merge (stbuf, &entry->d_stat);
- local->stbuf_blocks += stbuf->ia_blocks;
- }
-unlock:
- UNLOCK(&frame->lock);
-
- if (done) {
- inode_ctx_put (entry->inode, this,
- (uint64_t) (long)local->fctx);
-
- done = 0;
- LOCK (&main_frame->lock);
- {
- main_local->wind_count--;
- if (!main_local->wind_count)
- done = 1;
- if (local->op_ret == -1) {
- main_local->op_errno = local->op_errno;
- main_local->op_ret = local->op_ret;
- }
- entry->d_stat.ia_blocks = local->stbuf_blocks;
- }
- UNLOCK (&main_frame->lock);
- if (done) {
- main_frame->local = NULL;
- STRIPE_STACK_UNWIND (readdir, main_frame,
- main_local->op_ret,
- main_local->op_errno,
- &main_local->entries, NULL);
- gf_dirent_free (&main_local->entries);
- stripe_local_wipe (main_local);
- mem_put (main_local);
- }
- frame->local = NULL;
- stripe_local_wipe (local);
- mem_put (local);
- STRIPE_STACK_DESTROY (frame);
- }
-
- return 0;
-}
-
-int32_t
-stripe_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- gf_dirent_t *orig_entries, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- call_frame_t *prev = NULL;
- gf_dirent_t *local_entry = NULL;
- gf_dirent_t *tmp_entry = NULL;
- xlator_list_t *trav = NULL;
- loc_t loc = {0, };
- int32_t count = 0;
- stripe_private_t *priv = NULL;
- int32_t subvols = 0;
- dict_t *xattrs = NULL;
- call_frame_t *local_frame = NULL;
- stripe_local_t *local_ent = NULL;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("stripe", GF_LOG_DEBUG, "possible NULL deref");
- goto out;
- }
- prev = cookie;
- local = frame->local;
- trav = this->children;
- priv = this->private;
-
- subvols = priv->child_count;
-
- LOCK (&frame->lock);
- {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s returned error %s",
- prev->this->name, strerror (op_errno));
- local->op_errno = op_errno;
- local->op_ret = op_ret;
- goto unlock;
- } else {
- local->op_ret = op_ret;
- list_splice_init (&orig_entries->list,
- &local->entries.list);
- local->wind_count = op_ret;
- }
-
- }
-unlock:
- UNLOCK (&frame->lock);
-
- if (op_ret == -1)
- goto out;
-
- xattrs = dict_new ();
- if (xattrs)
- (void) stripe_xattr_request_build (this, xattrs, 0, 0, 0, 0);
- count = op_ret;
- list_for_each_entry_safe (local_entry, tmp_entry,
- (&local->entries.list), list) {
-
- if (!local_entry)
- break;
- if (!IA_ISREG (local_entry->d_stat.ia_type)) {
- LOCK (&frame->lock);
- {
- local->wind_count--;
- count = local->wind_count;
- }
- UNLOCK (&frame->lock);
- continue;
- }
-
- local_frame = copy_frame (frame);
-
- if (!local_frame) {
- op_errno = ENOMEM;
- op_ret = -1;
- goto out;
- }
-
- local_ent = mem_get0 (this->local_pool);
- if (!local_ent) {
- op_errno = ENOMEM;
- op_ret = -1;
- goto out;
- }
-
- loc.inode = inode_ref (local_entry->inode);
-
- uuid_copy (loc.gfid, local_entry->d_stat.ia_gfid);
-
- local_ent->orig_frame = frame;
-
- local_ent->call_count = subvols;
-
- local_ent->dirent = local_entry;
-
- local_frame->local = local_ent;
-
- trav = this->children;
- while (trav) {
- STACK_WIND (local_frame, stripe_readdirp_lookup_cbk,
- trav->xlator, trav->xlator->fops->lookup,
- &loc, xattrs);
- trav = trav->next;
- }
- loc_wipe (&loc);
- }
-out:
- if (!count) {
- /* all entries are directories */
- frame->local = NULL;
- STRIPE_STACK_UNWIND (readdir, frame, local->op_ret,
- local->op_errno, &local->entries, NULL);
- gf_dirent_free (&local->entries);
- stripe_local_wipe (local);
- mem_put (local);
- }
- if (xattrs)
- dict_unref (xattrs);
- return 0;
-
-}
-int32_t
-stripe_readdirp (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- stripe_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- int op_errno = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- priv = this->private;
- trav = this->children;
-
- if (priv->first_child_down) {
- op_errno = ENOTCONN;
- goto err;
- }
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame->local = local;
-
- local->fd = fd_ref (fd);
-
- local->wind_count = 0;
-
- local->count = 0;
- local->op_ret = -1;
- INIT_LIST_HEAD(&local->entries);
-
- if (!trav)
- goto err;
-
- STACK_WIND (frame, stripe_readdirp_cbk, trav->xlator,
- trav->xlator->fops->readdirp, fd, size, off, xdata);
- return 0;
-err:
- op_errno = (op_errno == -1) ? errno : op_errno;
- STRIPE_STACK_UNWIND (readdir, frame, -1, op_errno, NULL, NULL);
-
- return 0;
-
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- goto out;
-
- ret = xlator_mem_acct_init (this, gf_stripe_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- goto out;
- }
-
-out:
- return ret;
-}
-
-static int
-clear_pattern_list (stripe_private_t *priv)
-{
- struct stripe_options *prev = NULL;
- struct stripe_options *trav = NULL;
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("stripe", priv, out);
-
- trav = priv->pattern;
- priv->pattern = NULL;
- while (trav) {
- prev = trav;
- trav = trav->next;
- GF_FREE (prev);
- }
-
- ret = 0;
- out:
- return ret;
-
-
-}
-
-
-int
-reconfigure (xlator_t *this, dict_t *options)
-{
-
- stripe_private_t *priv = NULL;
- data_t *data = NULL;
- int ret = -1;
- volume_option_t *opt = NULL;
-
- GF_ASSERT (this);
- GF_ASSERT (this->private);
-
- priv = this->private;
-
-
- ret = 0;
- LOCK (&priv->lock);
- {
- ret = clear_pattern_list (priv);
- if (ret)
- goto unlock;
-
- data = dict_get (options, "block-size");
- if (data) {
- ret = set_stripe_block_size (this, priv, data->data);
- if (ret)
- goto unlock;
- } else {
- opt = xlator_volume_option_get (this, "block-size");
- if (!opt) {
- gf_log (this->name, GF_LOG_WARNING,
- "option 'block-size' not found");
- ret = -1;
- goto unlock;
- }
-
- if (gf_string2bytesize (opt->default_value, &priv->block_size)){
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to set default block-size ");
- ret = -1;
- goto unlock;
- }
- }
-
- GF_OPTION_RECONF("coalesce", priv->coalesce, options, bool,
- unlock);
- }
- unlock:
- UNLOCK (&priv->lock);
- if (ret)
- goto out;
-
- ret = 0;
- out:
- return ret;
-
-}
-
-/**
- * init - This function is called when xlator-graph gets initialized.
- * The option given in volfiles are parsed here.
- * @this -
- */
-int32_t
-init (xlator_t *this)
-{
- stripe_private_t *priv = NULL;
- volume_option_t *opt = NULL;
- xlator_list_t *trav = NULL;
- data_t *data = NULL;
- int32_t count = 0;
- int ret = -1;
-
- if (!this)
- goto out;
-
- trav = this->children;
- while (trav) {
- count++;
- trav = trav->next;
- }
-
- if (!count) {
- gf_log (this->name, GF_LOG_ERROR,
- "stripe configured without \"subvolumes\" option. "
- "exiting");
- goto out;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- if (count == 1) {
- gf_log (this->name, GF_LOG_ERROR,
- "stripe configured with only one \"subvolumes\" option."
- " please check the volume. exiting");
- goto out;
- }
-
- priv = GF_CALLOC (1, sizeof (stripe_private_t),
- gf_stripe_mt_stripe_private_t);
-
- if (!priv)
- goto out;
- priv->xl_array = GF_CALLOC (count, sizeof (xlator_t *),
- gf_stripe_mt_xlator_t);
- if (!priv->xl_array)
- goto out;
-
- priv->state = GF_CALLOC (count, sizeof (int8_t),
- gf_stripe_mt_int8_t);
- if (!priv->state)
- goto out;
-
- priv->child_count = count;
- LOCK_INIT (&priv->lock);
-
- trav = this->children;
- count = 0;
- while (trav) {
- priv->xl_array[count++] = trav->xlator;
- trav = trav->next;
- }
-
- if (count > 256) {
- gf_log (this->name, GF_LOG_ERROR,
- "maximum number of stripe subvolumes supported "
- "is 256");
- goto out;
- }
-
- ret = 0;
- LOCK (&priv->lock);
- {
- opt = xlator_volume_option_get (this, "block-size");
- if (!opt) {
- gf_log (this->name, GF_LOG_WARNING,
- "option 'block-size' not found");
- ret = -1;
- goto unlock;
- }
- if (gf_string2bytesize (opt->default_value, &priv->block_size)){
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to set default block-size ");
- ret = -1;
- goto unlock;
- }
- /* option stripe-pattern *avi:1GB,*pdf:16K */
- data = dict_get (this->options, "block-size");
- if (data) {
- ret = set_stripe_block_size (this, priv, data->data);
- if (ret)
- goto unlock;
- }
- }
- unlock:
- UNLOCK (&priv->lock);
- if (ret)
- goto out;
-
- GF_OPTION_INIT ("use-xattr", priv->xattr_supported, bool, out);
- /* notify related */
- priv->nodes_down = priv->child_count;
-
- GF_OPTION_INIT("coalesce", priv->coalesce, bool, out);
-
- this->local_pool = mem_pool_new (stripe_local_t, 128);
- if (!this->local_pool) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto out;
- }
-
- this->private = priv;
-
- ret = 0;
-out:
- if (ret) {
- if (priv) {
- if (priv->xl_array)
- GF_FREE (priv->xl_array);
- GF_FREE (priv);
- }
- }
- return ret;
-}
-
-/**
- * fini - Free all the private variables
- * @this -
- */
-void
-fini (xlator_t *this)
-{
- stripe_private_t *priv = NULL;
- struct stripe_options *prev = NULL;
- struct stripe_options *trav = NULL;
-
- if (!this)
- goto out;
-
- priv = this->private;
- if (priv) {
- this->private = NULL;
- if (priv->xl_array)
- GF_FREE (priv->xl_array);
-
- trav = priv->pattern;
- while (trav) {
- prev = trav;
- trav = trav->next;
- GF_FREE (prev);
- }
- LOCK_DESTROY (&priv->lock);
- GF_FREE (priv);
- }
-
-out:
- return;
-}
-
-int32_t
-stripe_getxattr_unwind (call_frame_t *frame,
- int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
-
-{
- STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
-
-int
-stripe_internal_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr,
- dict_t *xdata)
-{
-
- char size_key[256] = {0,};
- char index_key[256] = {0,};
- char count_key[256] = {0,};
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (frame->local, out);
-
- if (!xattr || (op_ret == -1))
- goto out;
-
- sprintf (size_key, "trusted.%s.stripe-size", this->name);
- sprintf (count_key, "trusted.%s.stripe-count", this->name);
- sprintf (index_key, "trusted.%s.stripe-index", this->name);
-
- dict_del (xattr, size_key);
- dict_del (xattr, count_key);
- dict_del (xattr, index_key);
-
-out:
- STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno, xattr, xdata);
-
- return 0;
-
-}
-
-int
-stripe_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
-{
- int call_cnt = 0;
- stripe_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (frame->local, out);
-
- local = frame->local;
-
- LOCK (&frame->lock);
- {
- call_cnt = --local->wind_count;
- }
- UNLOCK (&frame->lock);
-
- if (!xattr || (op_ret < 0))
- goto out;
-
- local->op_ret = 0;
-
- if (!local->xattr) {
- local->xattr = dict_ref (xattr);
- } else {
- stripe_aggregate_xattr (local->xattr, xattr);
- }
-
-out:
- if (!call_cnt) {
- STRIPE_STACK_UNWIND (getxattr, frame, local->op_ret, op_errno,
- local->xattr, xdata);
- }
-
- return 0;
-}
-
-int32_t
-stripe_vgetxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *dict, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- int32_t callcnt = 0;
- int32_t ret = -1;
- long cky = 0;
- char *xattr_val = NULL;
- char *xattr_serz = NULL;
- stripe_xattr_sort_t *xattr = NULL;
- dict_t *stripe_xattr = NULL;
-
- if (!frame || !frame->local || !this) {
- gf_log ("", GF_LOG_ERROR, "Possible NULL deref");
- return ret;
- }
-
- local = frame->local;
- cky = (long) cookie;
-
- if (local->xsel[0] == '\0') {
- gf_log (this->name, GF_LOG_ERROR, "Empty xattr in cbk");
- return ret;
- }
-
- LOCK (&frame->lock);
- {
- callcnt = --local->wind_count;
-
- if (!dict || (op_ret < 0))
- goto out;
-
- if (!local->xattr_list)
- local->xattr_list = (stripe_xattr_sort_t *)
- GF_CALLOC (local->nallocs,
- sizeof (stripe_xattr_sort_t),
- gf_stripe_mt_xattr_sort_t);
-
- if (local->xattr_list) {
- ret = dict_get_str (dict, local->xsel, &xattr_val);
- if (ret)
- goto out;
-
- xattr = local->xattr_list + (int32_t) cky;
-
- xattr_val = gf_strdup (xattr_val);
- xattr->pos = cky;
- xattr->xattr_value = xattr_val;
- xattr->xattr_len = strlen (xattr_val);
-
- local->xattr_total_len += xattr->xattr_len + 1;
- }
- }
- out:
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (!local->xattr_total_len)
- goto unwind;
-
- stripe_xattr = dict_new ();
- if (!stripe_xattr)
- goto unwind;
-
- /* select filler based on ->xsel */
- if (XATTR_IS_PATHINFO (local->xsel))
- ret = stripe_fill_pathinfo_xattr (this, local,
- &xattr_serz);
- else {
- gf_log (this->name, GF_LOG_WARNING,
- "Unknown xattr in xattr request");
- goto unwind;
- }
-
- if (!ret) {
- ret = dict_set_dynstr (stripe_xattr, local->xsel,
- xattr_serz);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "Can't set %s key in dict", local->xsel);
- }
-
- unwind:
- STRIPE_STACK_UNWIND (getxattr, frame, op_ret, op_errno,
- stripe_xattr, NULL);
-
- ret = stripe_free_xattr_str (local);
-
- if (local->xattr_list)
- GF_FREE (local->xattr_list);
-
- if (stripe_xattr)
- dict_unref (stripe_xattr);
- }
-
- return ret;
-}
-
-int32_t
-stripe_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
-{
- stripe_local_t *local = NULL;
- xlator_list_t *trav = NULL;
- stripe_private_t *priv = NULL;
- int32_t op_errno = EINVAL;
- int i = 0;
- xlator_t **sub_volumes;
- int ret = 0;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
-
- priv = this->private;
- trav = this->children;
-
- /* Initialization */
- local = mem_get0 (this->local_pool);
- if (!local) {
- op_errno = ENOMEM;
- goto err;
- }
- local->op_ret = -1;
- frame->local = local;
- loc_copy (&local->loc, loc);
-
-
- if (name && (strcmp (GF_XATTR_MARKER_KEY, name) == 0)
- && (-1 == frame->root->pid)) {
- local->marker.call_count = priv->child_count;
-
- sub_volumes = alloca ( priv->child_count *
- sizeof (xlator_t *));
- for (i = 0, trav = this->children; trav ;
- trav = trav->next, i++) {
-
- *(sub_volumes + i) = trav->xlator;
-
- }
-
- if (cluster_getmarkerattr (frame, this, loc, name,
- local, stripe_getxattr_unwind,
- sub_volumes, priv->child_count,
- MARKER_UUID_TYPE, priv->vol_uuid)) {
- op_errno = EINVAL;
- goto err;
- }
-
- return 0;
- }
-
- if (name && strncmp (name, GF_XATTR_QUOTA_SIZE_KEY,
- strlen (GF_XATTR_QUOTA_SIZE_KEY)) == 0) {
- local->wind_count = priv->child_count;
-
- for (i = 0, trav=this->children; i < priv->child_count; i++,
- trav = trav->next) {
- STACK_WIND (frame, stripe_getxattr_cbk,
- trav->xlator, trav->xlator->fops->getxattr,
- loc, name, xdata);
- }
-
- return 0;
- }
-
- if (name &&
- ((strncmp (name, GF_XATTR_PATHINFO_KEY,
- strlen (GF_XATTR_PATHINFO_KEY)) == 0))) {
- if (IA_ISREG (loc->inode->ia_type)) {
- ret = inode_ctx_get (loc->inode, this,
- (uint64_t *) &local->fctx);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "stripe size unavailable from fctx"
- " relying on pathinfo could lead to"
- " wrong results");
- }
-
- local->nallocs = local->wind_count = priv->child_count;
- (void) strncpy (local->xsel, name, strlen (name));
-
- /**
- * for xattrs that need info from all childs, fill ->xsel
- * as above and call the filler function in cbk based on
- * it
- */
- for (i = 0, trav = this->children; i < priv->child_count; i++,
- trav = trav->next) {
- STACK_WIND_COOKIE (frame, stripe_vgetxattr_cbk,
- (void *) (long) i, trav->xlator,
- trav->xlator->fops->getxattr,
- loc, name, xdata);
- }
-
- return 0;
- }
-
- if (name &&(*priv->vol_uuid)) {
- if ((match_uuid_local (name, priv->vol_uuid) == 0)
- && (-1 == frame->root->pid)) {
-
- if (!IA_FILE_OR_DIR (loc->inode->ia_type))
- local->marker.call_count = 1;
- else
- local->marker.call_count = priv->child_count;
-
- sub_volumes = alloca (local->marker.call_count *
- sizeof (xlator_t *));
-
- for (i = 0, trav = this->children;
- i < local->marker.call_count;
- i++, trav = trav->next) {
- *(sub_volumes + i) = trav->xlator;
-
- }
-
- if (cluster_getmarkerattr (frame, this, loc, name,
- local,
- stripe_getxattr_unwind,
- sub_volumes,
- local->marker.call_count,
- MARKER_XTIME_TYPE,
- priv->vol_uuid)) {
- op_errno = EINVAL;
- goto err;
- }
-
- return 0;
- }
- }
-
-
- STACK_WIND (frame, stripe_internal_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
-
- return 0;
-
-err:
- STRIPE_STACK_UNWIND (getxattr, frame, -1, op_errno, NULL, NULL);
- return 0;
-}
-
-
-
-int32_t
-stripe_priv_dump (xlator_t *this)
-{
- char key[GF_DUMP_MAX_BUF_LEN];
- int i = 0;
- stripe_private_t *priv = NULL;
- int ret = -1;
- struct stripe_options *options = NULL;
-
- GF_VALIDATE_OR_GOTO ("stripe", this, out);
-
- priv = this->private;
- if (!priv)
- goto out;
-
- ret = TRY_LOCK (&priv->lock);
- if (ret != 0)
- goto out;
-
- gf_proc_dump_add_section("xlator.cluster.stripe.%s.priv", this->name);
- gf_proc_dump_write("child_count","%d", priv->child_count);
-
- for (i = 0; i < priv->child_count; i++) {
- sprintf (key, "subvolumes[%d]", i);
- gf_proc_dump_write (key, "%s.%s", priv->xl_array[i]->type,
- priv->xl_array[i]->name);
- }
-
- options = priv->pattern;
- while (options != NULL) {
- gf_proc_dump_write ("path_pattern", "%s", priv->pattern->path_pattern);
- gf_proc_dump_write ("options_block_size", "%ul", options->block_size);
-
- options = options->next;
- }
-
- gf_proc_dump_write ("block_size", "%ul", priv->block_size);
- gf_proc_dump_write ("nodes-down", "%d", priv->nodes_down);
- gf_proc_dump_write ("first-child_down", "%d", priv->first_child_down);
- gf_proc_dump_write ("xattr_supported", "%d", priv->xattr_supported);
-
- UNLOCK (&priv->lock);
-
-out:
- return ret;
-}
-
-struct xlator_fops fops = {
- .stat = stripe_stat,
- .unlink = stripe_unlink,
- .rename = stripe_rename,
- .link = stripe_link,
- .truncate = stripe_truncate,
- .create = stripe_create,
- .open = stripe_open,
- .readv = stripe_readv,
- .writev = stripe_writev,
- .statfs = stripe_statfs,
- .flush = stripe_flush,
- .fsync = stripe_fsync,
- .ftruncate = stripe_ftruncate,
- .fstat = stripe_fstat,
- .mkdir = stripe_mkdir,
- .rmdir = stripe_rmdir,
- .lk = stripe_lk,
- .opendir = stripe_opendir,
- .fsyncdir = stripe_fsyncdir,
- .setattr = stripe_setattr,
- .fsetattr = stripe_fsetattr,
- .lookup = stripe_lookup,
- .mknod = stripe_mknod,
- .setxattr = stripe_setxattr,
- .fsetxattr = stripe_fsetxattr,
- .getxattr = stripe_getxattr,
- .removexattr = stripe_removexattr,
- .fremovexattr = stripe_fremovexattr,
- .readdirp = stripe_readdirp,
-};
-
-struct xlator_cbks cbks = {
- .release = stripe_release,
- .forget = stripe_forget,
-};
-
-struct xlator_dumpops dumpops = {
- .priv = stripe_priv_dump,
-};
-
-struct volume_options options[] = {
- { .key = {"block-size"},
- .type = GF_OPTION_TYPE_SIZE_LIST,
- .default_value = "128KB",
- .min = STRIPE_MIN_BLOCK_SIZE,
- .description = "Size of the stripe unit that would be read "
- "from or written to the striped servers."
- },
- { .key = {"use-xattr"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "true"
- },
- { .key = {"coalesce"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "false",
- .description = "Enable coalesce mode to flatten striped files as "
- "stored on the server (i.e., eliminate holes caused "
- "by the traditional format)."
- },
- { .key = {NULL} },
-};
diff --git a/xlators/cluster/stripe/src/stripe.h b/xlators/cluster/stripe/src/stripe.h
deleted file mode 100644
index 1b9e660c126..00000000000
--- a/xlators/cluster/stripe/src/stripe.h
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- This file is licensed to you under your choice of the GNU Lesser
- General Public License, version 3 or any later version (LGPLv3 or
- later), or the GNU General Public License, version 2 (GPLv2), in all
- cases as published by the Free Software Foundation.
-*/
-
-
-#ifndef _STRIPE_H_
-#define _STRIPE_H_
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "stripe-mem-types.h"
-#include "libxlator.h"
-#include <fnmatch.h>
-#include <signal.h>
-
-#define STRIPE_PATHINFO_HEADER "STRIPE:"
-#define STRIPE_MIN_BLOCK_SIZE (16*GF_UNIT_KB)
-
-#define STRIPE_STACK_UNWIND(fop, frame, params ...) do { \
- stripe_local_t *__local = NULL; \
- if (frame) { \
- __local = frame->local; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT (fop, frame, params); \
- if (__local) { \
- stripe_local_wipe(__local); \
- mem_put (__local); \
- } \
- } while (0)
-
-#define STRIPE_STACK_DESTROY(frame) do { \
- stripe_local_t *__local = NULL; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_DESTROY (frame->root); \
- if (__local) { \
- stripe_local_wipe (__local); \
- mem_put (__local); \
- } \
- } while (0)
-
-#define STRIPE_VALIDATE_FCTX(fctx, label) do { \
- int idx = 0; \
- if (!fctx) { \
- op_errno = EINVAL; \
- goto label; \
- } \
- for (idx = 0; idx < fctx->stripe_count; idx++) { \
- if (!fctx->xl_array[idx]) { \
- gf_log (this->name, GF_LOG_ERROR, \
- "fctx->xl_array[%d] is NULL", \
- idx); \
- op_errno = ESTALE; \
- goto label; \
- } \
- } \
- } while (0)
-
-typedef struct stripe_xattr_sort {
- int32_t pos;
- int32_t xattr_len;
- char *xattr_value;
-} stripe_xattr_sort_t;
-
-/**
- * struct stripe_options : This keeps the pattern and the block-size
- * information, which is used for striping on a file.
- */
-struct stripe_options {
- struct stripe_options *next;
- char path_pattern[256];
- uint64_t block_size;
-};
-
-/**
- * Private structure for stripe translator
- */
-struct stripe_private {
- struct stripe_options *pattern;
- xlator_t **xl_array;
- uint64_t block_size;
- gf_lock_t lock;
- uint8_t nodes_down;
- int8_t first_child_down;
- int8_t child_count;
- int8_t *state; /* Current state of child node */
- gf_boolean_t xattr_supported; /* default yes */
- gf_boolean_t coalesce;
- char vol_uuid[UUID_SIZE + 1];
-};
-
-/**
- * Used to keep info about the replies received from fops->readv calls
- */
-struct readv_replies {
- struct iovec *vector;
- int32_t count; //count of vector
- int32_t op_ret; //op_ret of readv
- int32_t op_errno;
- int32_t requested_size;
- struct iatt stbuf; /* 'stbuf' is also a part of reply */
-};
-
-typedef struct _stripe_fd_ctx {
- off_t stripe_size;
- int stripe_count;
- int stripe_coalesce;
- int static_array;
- xlator_t **xl_array;
-} stripe_fd_ctx_t;
-
-
-/**
- * Local structure to be passed with all the frames in case of STACK_WIND
- */
-struct stripe_local; /* this itself is used inside the structure; */
-
-struct stripe_local {
- struct stripe_local *next;
- call_frame_t *orig_frame;
-
- stripe_fd_ctx_t *fctx;
-
- /* Used by _cbk functions */
- struct iatt stbuf;
- struct iatt pre_buf;
- struct iatt post_buf;
- struct iatt preparent;
- struct iatt postparent;
-
- off_t stbuf_size;
- off_t prebuf_size;
- off_t postbuf_size;
- off_t preparent_size;
- off_t postparent_size;
-
- blkcnt_t stbuf_blocks;
- blkcnt_t prebuf_blocks;
- blkcnt_t postbuf_blocks;
- blkcnt_t preparent_blocks;
- blkcnt_t postparent_blocks;
-
- struct readv_replies *replies;
- struct statvfs statvfs_buf;
- dir_entry_t *entry;
-
- int8_t revalidate;
- int8_t failed;
- int8_t unwind;
-
- size_t readv_size;
- int32_t entry_count;
- int32_t node_index;
- int32_t call_count;
- int32_t wind_count; /* used instead of child_cound
- in case of read and write */
- int32_t op_ret;
- int32_t op_errno;
- int32_t count;
- int32_t flags;
- char *name;
- inode_t *inode;
-
- loc_t loc;
- loc_t loc2;
-
- mode_t mode;
- dev_t rdev;
- /* For File I/O fops */
- dict_t *xdata;
-
- stripe_xattr_sort_t *xattr_list;
- int32_t xattr_total_len;
- int32_t nallocs;
- char xsel[256];
-
- struct marker_str marker;
-
- /* General usage */
- off_t offset;
- off_t stripe_size;
-
- int xattr_self_heal_needed;
- int entry_self_heal_needed;
-
- int8_t *list;
- struct gf_flock lock;
- fd_t *fd;
- void *value;
- struct iobref *iobref;
- gf_dirent_t entries;
- gf_dirent_t *dirent;
- dict_t *xattr;
- uuid_t ia_gfid;
-
- int xflag;
- mode_t umask;
-};
-
-typedef struct stripe_local stripe_local_t;
-typedef struct stripe_private stripe_private_t;
-
-/*
- * Determine the stripe index of a particular frame based on the translator.
- */
-static inline int32_t stripe_get_frame_index(stripe_fd_ctx_t *fctx,
- call_frame_t *prev)
-{
- int32_t i, idx = -1;
-
- for (i = 0; i < fctx->stripe_count; i++) {
- if (fctx->xl_array[i] == prev->this) {
- idx = i;
- break;
- }
- }
-
- return idx;
-}
-
-static inline void stripe_copy_xl_array(xlator_t **dst, xlator_t **src,
- int count)
-{
- int i;
-
- for (i = 0; i < count; i++)
- dst[i] = src[i];
-}
-
-void stripe_local_wipe (stripe_local_t *local);
-int32_t stripe_ctx_handle (xlator_t *this, call_frame_t *prev,
- stripe_local_t *local, dict_t *dict);
-void stripe_aggregate_xattr (dict_t *dst, dict_t *src);
-int32_t stripe_xattr_request_build (xlator_t *this, dict_t *dict,
- uint64_t stripe_size, uint32_t stripe_count,
- uint32_t stripe_index,
- uint32_t stripe_coalesce);
-int32_t stripe_get_matching_bs (const char *path, stripe_private_t *priv);
-int set_stripe_block_size (xlator_t *this, stripe_private_t *priv, char *data);
-int32_t stripe_iatt_merge (struct iatt *from, struct iatt *to);
-int32_t stripe_fill_pathinfo_xattr (xlator_t *this, stripe_local_t *local,
- char **xattr_serz);
-int32_t stripe_free_xattr_str (stripe_local_t *local);
-int32_t stripe_xattr_aggregate (char *buffer, stripe_local_t *local,
- int32_t *total);
-off_t coalesced_offset(off_t offset, uint64_t stripe_size, int stripe_count);
-off_t uncoalesced_size(off_t size, uint64_t stripe_size, int stripe_count,
- int stripe_index);
-
-/*
- * Adjust the size attribute for files if coalesce is enabled.
- */
-static inline void correct_file_size(struct iatt *buf, stripe_fd_ctx_t *fctx,
- call_frame_t *prev)
-{
- int index;
-
- if (!IA_ISREG(buf->ia_type))
- return;
-
- if (!fctx || !fctx->stripe_coalesce)
- return;
-
- index = stripe_get_frame_index(fctx, prev);
- buf->ia_size = uncoalesced_size(buf->ia_size, fctx->stripe_size,
- fctx->stripe_count, index);
-}
-
-#endif /* _STRIPE_H_ */
diff --git a/xlators/cluster/unify/Makefile.am b/xlators/cluster/unify/Makefile.am
deleted file mode 100644
index d471a3f9243..00000000000
--- a/xlators/cluster/unify/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/cluster/unify/src/Makefile.am b/xlators/cluster/unify/src/Makefile.am
deleted file mode 100644
index 2a1fe837263..00000000000
--- a/xlators/cluster/unify/src/Makefile.am
+++ /dev/null
@@ -1,16 +0,0 @@
-
-xlator_LTLIBRARIES = unify.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/legacy/cluster
-
-unify_la_LDFLAGS = -module -avoidversion
-
-unify_la_SOURCES = unify.c unify-self-heal.c
-unify_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = unify.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/cluster/unify/src/unify-mem-types.h b/xlators/cluster/unify/src/unify-mem-types.h
deleted file mode 100644
index 13c9cc1f7b3..00000000000
--- a/xlators/cluster/unify/src/unify-mem-types.h
+++ /dev/null
@@ -1,41 +0,0 @@
-
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef __UNIFY_MEM_TYPES_H__
-#define __UNIFY_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_unify_mem_types_ {
- gf_unify_mt_char = gf_common_mt_end + 1,
- gf_unify_mt_int16_t,
- gf_unify_mt_xlator_t,
- gf_unify_mt_unify_private_t,
- gf_unify_mt_xlator_list_t,
- gf_unify_mt_dir_entry_t,
- gf_unify_mt_off_t,
- gf_unify_mt_int,
- gf_unify_mt_unify_self_heal_struct,
- gf_unify_mt_unify_local_t,
- gf_unify_mt_end
-};
-#endif
-
diff --git a/xlators/cluster/unify/src/unify-self-heal.c b/xlators/cluster/unify/src/unify-self-heal.c
deleted file mode 100644
index f99e4c7c360..00000000000
--- a/xlators/cluster/unify/src/unify-self-heal.c
+++ /dev/null
@@ -1,1239 +0,0 @@
-/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * unify-self-heal.c :
- * This file implements few functions which enables 'unify' translator
- * to be consistent in its behaviour when
- * > a node fails,
- * > a node gets added,
- * > a failed node comes back
- * > a new namespace server is added (ie, an fresh namespace server).
- *
- * This functionality of 'unify' will enable glusterfs to support storage
- * system failure, and maintain consistancy. This works both ways, ie, when
- * an entry (either file or directory) is found on namespace server, and not
- * on storage nodes, its created in storage nodes and vica-versa.
- *
- * The two fops, where it can be implemented are 'getdents ()' and 'lookup ()'
- *
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "unify.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "common-utils.h"
-
-int32_t
-unify_sh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_sh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_bgsh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-int32_t
-unify_bgsh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count);
-
-/**
- * unify_local_wipe - free all the extra allocation of local->* here.
- */
-static void
-unify_local_wipe (unify_local_t *local)
-{
- /* Free the strdup'd variables in the local structure */
- if (local->name) {
- GF_FREE (local->name);
- }
-
- if (local->sh_struct) {
- if (local->sh_struct->offset_list)
- GF_FREE (local->sh_struct->offset_list);
-
- if (local->sh_struct->entry_list)
- GF_FREE (local->sh_struct->entry_list);
-
- if (local->sh_struct->count_list)
- GF_FREE (local->sh_struct->count_list);
-
- GF_FREE (local->sh_struct);
- }
-
- loc_wipe (&local->loc1);
- loc_wipe (&local->loc2);
-}
-
-int32_t
-unify_sh_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- /* if local->call_count == 0, that means, setdents on
- * storagenodes is still pending.
- */
- if (local->call_count)
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (callcnt == 0) {
- if (local->sh_struct->entry_list[0]) {
- prev = entry = local->sh_struct->entry_list[0];
- if (!entry)
- return 0;
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
-
- if (!local->flags) {
- if (local->sh_struct->count_list[0] >=
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- /* count == size, that means, there are more entries
- to read from */
- //local->call_count = 0;
- local->sh_struct->offset_list[0] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[0],
- GF_GET_DIR_ONLY);
- }
- } else {
- inode = local->loc1.inode;
- fd_unref (local->fd);
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- inode, &local->stbuf, local->dict,
- &local->oldpostparent);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_sh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = 0;
- unsigned long final = 0;
- dir_entry_t *tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
-
- local->sh_struct->entry_list[0] = tmp;
- local->sh_struct->count_list[0] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
-
- if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) {
- final = 1;
- }
-
- LOCK (&frame->lock);
- {
- /* local->call_count will be '0' till now. make it 1 so, it
- can be UNWIND'ed for the last call. */
- local->call_count = priv->child_count;
- if (final)
- local->flags = 1;
- }
- UNLOCK (&frame->lock);
-
- for (index = 0; index < priv->child_count; index++)
- {
- STACK_WIND_COOKIE (frame,
- unify_sh_setdents_cbk,
- (void *)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setdents,
- local->fd, GF_SET_DIR_ONLY,
- local->sh_struct->entry_list[0], count);
- }
-
- return 0;
-}
-
-int32_t
-unify_sh_ns_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- if (local->sh_struct->entry_list[index]) {
- prev = entry = local->sh_struct->entry_list[index];
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
- }
- UNLOCK (&frame->lock);
-
- if (local->sh_struct->count_list[index] <
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-
-/**
- * unify_sh_getdents_cbk -
- */
-int32_t
-unify_sh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *tmp = NULL;
-
- if (op_ret >= 0 && count > 0) {
- /* There is some dentry found, just send the dentry to NS */
- tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
- local->sh_struct->entry_list[index] = tmp;
- local->sh_struct->count_list[index] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
- STACK_WIND_COOKIE (frame,
- unify_sh_ns_setdents_cbk,
- cookie,
- NS(this),
- NS(this)->fops->setdents,
- local->fd,
- GF_SET_IF_NOT_PRESENT,
- local->sh_struct->entry_list[index],
- count);
- return 0;
- }
-
- if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_sh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-/**
- * unify_sh_opendir_cbk -
- *
- * @cookie:
- */
-int32_t
-unify_sh_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- } else {
- gf_log (this->name, GF_LOG_WARNING, "failed");
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->call_count = priv->child_count + 1;
-
- if (!local->failed) {
- /* send getdents() namespace after finishing
- storage nodes */
- local->call_count--;
-
- fd_bind (fd);
-
- if (local->call_count) {
- /* Used as the offset index. This list keeps
- * track of offset sent to each node during
- * STACK_WIND.
- */
- local->sh_struct->offset_list =
- GF_CALLOC (priv->child_count,
- sizeof (off_t),
- gf_unify_mt_off_t);
- ERR_ABORT (local->sh_struct->offset_list);
-
- local->sh_struct->entry_list =
- GF_CALLOC (priv->child_count,
- sizeof (dir_entry_t *),
- gf_unify_mt_dir_entry_t);
- ERR_ABORT (local->sh_struct->entry_list);
-
- local->sh_struct->count_list =
- GF_CALLOC (priv->child_count,
- sizeof (int),
- gf_unify_mt_int);
- ERR_ABORT (local->sh_struct->count_list);
-
- /* Send getdents on all the fds */
- for (index = 0;
- index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_getdents_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_ALL);
- }
-
- /* did stack wind, so no need to unwind here */
- return 0;
- } /* (local->call_count) */
- } /* (!local->failed) */
-
- /* Opendir failed on one node. */
- inode = local->loc1.inode;
- fd_unref (local->fd);
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
- /* Only 'self-heal' failed, lookup() was successful. */
- local->op_ret = 0;
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame, local->op_ret, local->op_errno, inode,
- &local->stbuf, local->dict, &local->oldpostparent);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
-
- return 0;
-}
-
-/**
- * gf_sh_checksum_cbk -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-unify_sh_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int32_t callcnt = 0;
- inode_t *inode = NULL;
- dict_t *tmp_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (NS(this) == (xlator_t *)cookie) {
- memcpy (local->sh_struct->ns_file_checksum,
- file_checksum, NAME_MAX);
- memcpy (local->sh_struct->ns_dir_checksum,
- dir_checksum, NAME_MAX);
- } else {
- if (local->entry_count == 0) {
- /* Initialize the dir_checksum to be
- * used for comparision with other
- * storage nodes. Should be done for
- * the first successful call *only*.
- */
- /* Using 'entry_count' as a flag */
- local->entry_count = 1;
- memcpy (local->sh_struct->dir_checksum,
- dir_checksum, NAME_MAX);
- }
-
- /* Reply from the storage nodes */
- for (index = 0;
- index < NAME_MAX; index++) {
- /* Files should be present in
- only one node */
- local->sh_struct->file_checksum[index] ^= file_checksum[index];
-
- /* directory structure should be
- same accross */
- if (local->sh_struct->dir_checksum[index] != dir_checksum[index])
- local->failed = 1;
- }
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- for (index = 0; index < NAME_MAX ; index++) {
- if (local->sh_struct->file_checksum[index] !=
- local->sh_struct->ns_file_checksum[index]) {
- local->failed = 1;
- break;
- }
- if (local->sh_struct->dir_checksum[index] !=
- local->sh_struct->ns_dir_checksum[index]) {
- local->failed = 1;
- break;
- }
- }
-
- if (local->failed) {
- /* Log it, it should be a rare event */
- gf_log (this->name, GF_LOG_WARNING,
- "Self-heal triggered on directory %s",
- local->loc1.path);
-
- /* Any self heal will be done at directory level */
- local->call_count = 0;
- local->op_ret = -1;
- local->failed = 0;
-
- local->fd = fd_create (local->loc1.inode,
- frame->root->pid);
-
- local->call_count = priv->child_count + 1;
-
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_opendir_cbk,
- priv->xl_array[index]->name,
- priv->xl_array[index],
- priv->xl_array[index]->fops->opendir,
- &local->loc1,
- local->fd);
- }
- /* opendir can be done on the directory */
- return 0;
- }
-
- /* no mismatch */
- inode = local->loc1.inode;
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- inode,
- &local->stbuf,
- local->dict, &local->oldpostparent);
- if (tmp_dict)
- dict_unref (tmp_dict);
- }
-
- return 0;
-}
-
-/* Foreground self-heal part over */
-
-/* Background self-heal part */
-
-int32_t
-unify_bgsh_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- dir_entry_t *prev, *entry, *trav;
-
- LOCK (&frame->lock);
- {
- /* if local->call_count == 0, that means, setdents
- on storagenodes is still pending. */
- if (local->call_count)
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
-
- if (callcnt == 0) {
- if (local->sh_struct->entry_list[0]) {
- prev = entry = local->sh_struct->entry_list[0];
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
-
- if (!local->flags) {
- if (local->sh_struct->count_list[0] >=
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- /* count == size, that means, there are more
- entries to read from */
- //local->call_count = 0;
- local->sh_struct->offset_list[0] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[0],
- GF_GET_DIR_ONLY);
- }
- } else {
- fd_unref (local->fd);
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_bgsh_ns_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = 0;
- unsigned long final = 0;
- dir_entry_t *tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
-
- local->sh_struct->entry_list[0] = tmp;
- local->sh_struct->count_list[0] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
-
- if ((count < UNIFY_SELF_HEAL_GETDENTS_COUNT) || !entry) {
- final = 1;
- }
-
- LOCK (&frame->lock);
- {
- /* local->call_count will be '0' till now. make it 1 so,
- it can be UNWIND'ed for the last call. */
- local->call_count = priv->child_count;
- if (final)
- local->flags = 1;
- }
- UNLOCK (&frame->lock);
-
- for (index = 0; index < priv->child_count; index++)
- {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_setdents_cbk,
- (void *)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setdents,
- local->fd, GF_SET_DIR_ONLY,
- local->sh_struct->entry_list[0], count);
- }
-
- return 0;
-}
-
-int32_t
-unify_bgsh_ns_setdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *prev, *entry, *trav;
-
- if (local->sh_struct->entry_list[index]) {
- prev = entry = local->sh_struct->entry_list[index];
- if (!entry)
- return 0;
- trav = entry->next;
- while (trav) {
- prev->next = trav->next;
- GF_FREE (trav->name);
- if (IA_ISLNK (trav->buf.ia_type))
- GF_FREE (trav->link);
- GF_FREE (trav);
- trav = prev->next;
- }
- GF_FREE (entry);
- }
-
- if (local->sh_struct->count_list[index] <
- UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries
- to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-
-/**
- * unify_bgsh_getdents_cbk -
- */
-int32_t
-unify_bgsh_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- int32_t callcnt = -1;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- long index = (long)cookie;
- dir_entry_t *tmp = NULL;
-
- if (op_ret >= 0 && count > 0) {
- /* There is some dentry found, just send the dentry to NS */
- tmp = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_unify_mt_dir_entry_t);
- local->sh_struct->entry_list[index] = tmp;
- local->sh_struct->count_list[index] = count;
- if (entry) {
- tmp->next = entry->next;
- entry->next = NULL;
- }
- STACK_WIND_COOKIE (frame,
- unify_bgsh_ns_setdents_cbk,
- cookie,
- NS(this),
- NS(this)->fops->setdents,
- local->fd,
- GF_SET_IF_NOT_PRESENT,
- local->sh_struct->entry_list[index],
- count);
- return 0;
- }
-
- if (count < UNIFY_SELF_HEAL_GETDENTS_COUNT) {
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
- } else {
- /* count == size, that means, there are more entries to read from */
- local->sh_struct->offset_list[index] +=
- UNIFY_SELF_HEAL_GETDENTS_COUNT;
-
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- cookie,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- local->sh_struct->offset_list[index],
- GF_GET_ALL);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir on (%s) with offset %"PRId64"",
- priv->xl_array[index]->name,
- local->sh_struct->offset_list[index]);
- }
-
- if (!callcnt) {
- /* All storage nodes have done unified setdents on NS node.
- * Now, do getdents from NS and do setdents on storage nodes.
- */
-
- /* sh_struct->offset_list is no longer required for
- storage nodes now */
- local->sh_struct->offset_list[0] = 0; /* reset */
-
- STACK_WIND (frame,
- unify_bgsh_ns_getdents_cbk,
- NS(this),
- NS(this)->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_DIR_ONLY);
- }
-
- return 0;
-}
-
-/**
- * unify_bgsh_opendir_cbk -
- *
- * @cookie:
- */
-int32_t
-unify_bgsh_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int32_t callcnt = 0;
- int16_t index = 0;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- } else {
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->call_count = priv->child_count + 1;
-
- if (!local->failed) {
- /* send getdents() namespace after finishing
- storage nodes */
- local->call_count--;
- callcnt = local->call_count;
-
- fd_bind (fd);
-
- if (local->call_count) {
- /* Used as the offset index. This list keeps
- track of offset sent to each node during
- STACK_WIND. */
- local->sh_struct->offset_list =
- GF_CALLOC (priv->child_count,
- sizeof (off_t),
- gf_unify_mt_off_t);
- ERR_ABORT (local->sh_struct->offset_list);
-
- local->sh_struct->entry_list =
- GF_CALLOC (priv->child_count,
- sizeof (dir_entry_t *),
- gf_unify_mt_dir_entry_t);
- ERR_ABORT (local->sh_struct->entry_list);
-
- local->sh_struct->count_list =
- GF_CALLOC (priv->child_count,
- sizeof (int),
- gf_unify_mt_int);
- ERR_ABORT (local->sh_struct->count_list);
-
- /* Send getdents on all the fds */
- for (index = 0;
- index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_getdents_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getdents,
- local->fd,
- UNIFY_SELF_HEAL_GETDENTS_COUNT,
- 0, /* In this call, do send '0' as offset */
- GF_GET_ALL);
- }
- /* did a stack wind, so no need to unwind here */
- return 0;
- } /* (local->call_count) */
- } /* (!local->failed) */
-
- /* Opendir failed on one node. */
- fd_unref (local->fd);
-
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
-
- return 0;
-}
-
-/**
- * gf_bgsh_checksum_cbk -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-unify_bgsh_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *file_checksum,
- uint8_t *dir_checksum)
-{
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int32_t callcnt = 0;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (NS(this) == (xlator_t *)cookie) {
- memcpy (local->sh_struct->ns_file_checksum,
- file_checksum, NAME_MAX);
- memcpy (local->sh_struct->ns_dir_checksum,
- dir_checksum, NAME_MAX);
- } else {
- if (local->entry_count == 0) {
- /* Initialize the dir_checksum to be
- * used for comparision with other
- * storage nodes. Should be done for
- * the first successful call *only*.
- */
- /* Using 'entry_count' as a flag */
- local->entry_count = 1;
- memcpy (local->sh_struct->dir_checksum,
- dir_checksum, NAME_MAX);
- }
-
- /* Reply from the storage nodes */
- for (index = 0;
- index < NAME_MAX; index++) {
- /* Files should be present in only
- one node */
- local->sh_struct->file_checksum[index] ^= file_checksum[index];
-
- /* directory structure should be same
- accross */
- if (local->sh_struct->dir_checksum[index] != dir_checksum[index])
- local->failed = 1;
- }
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- for (index = 0; index < NAME_MAX ; index++) {
- if (local->sh_struct->file_checksum[index] !=
- local->sh_struct->ns_file_checksum[index]) {
- local->failed = 1;
- break;
- }
- if (local->sh_struct->dir_checksum[index] !=
- local->sh_struct->ns_dir_checksum[index]) {
- local->failed = 1;
- break;
- }
- }
-
- if (local->failed) {
- /* Log it, it should be a rare event */
- gf_log (this->name, GF_LOG_WARNING,
- "Self-heal triggered on directory %s",
- local->loc1.path);
-
- /* Any self heal will be done at the directory level */
- local->op_ret = -1;
- local->failed = 0;
-
- local->fd = fd_create (local->loc1.inode,
- frame->root->pid);
- local->call_count = priv->child_count + 1;
-
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_bgsh_opendir_cbk,
- priv->xl_array[index]->name,
- priv->xl_array[index],
- priv->xl_array[index]->fops->opendir,
- &local->loc1,
- local->fd);
- }
-
- /* opendir can be done on the directory */
- return 0;
- }
-
- /* no mismatch */
- unify_local_wipe (local);
- STACK_DESTROY (frame->root);
- }
-
- return 0;
-}
-
-/* Background self-heal part over */
-
-
-
-
-/**
- * zr_unify_self_heal -
- *
- * @frame: frame used in lookup. get a copy of it, and use that copy.
- * @this: pointer to unify xlator.
- * @inode: pointer to inode, for which the consistency check is required.
- *
- */
-int32_t
-zr_unify_self_heal (call_frame_t *frame,
- xlator_t *this,
- unify_local_t *local)
-{
- unify_private_t *priv = this->private;
- call_frame_t *bg_frame = NULL;
- unify_local_t *bg_local = NULL;
- inode_t *tmp_inode = NULL;
- dict_t *tmp_dict = NULL;
- int16_t index = 0;
-
- if (local->inode_generation < priv->inode_generation) {
- /* Any self heal will be done at the directory level */
- /* Update the inode's generation to the current generation
- value. */
- local->inode_generation = priv->inode_generation;
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->inode_generation);
-
- if (priv->self_heal == ZR_UNIFY_FG_SELF_HEAL) {
- local->op_ret = 0;
- local->failed = 0;
- local->call_count = priv->child_count + 1;
- local->sh_struct =
- GF_CALLOC (1, sizeof (struct unify_self_heal_struct),
- gf_unify_mt_unify_self_heal_struct);
-
- /* +1 is for NS */
- for (index = 0;
- index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (frame,
- unify_sh_checksum_cbk,
- priv->xl_array[index],
- priv->xl_array[index],
- priv->xl_array[index]->fops->checksum,
- &local->loc1,
- 0);
- }
-
- /* Self-heal in foreground, hence no need
- to UNWIND here */
- return 0;
- }
-
- /* Self Heal done in background */
- bg_frame = copy_frame (frame);
- INIT_LOCAL (bg_frame, bg_local);
- loc_copy (&bg_local->loc1, &local->loc1);
- bg_local->op_ret = 0;
- bg_local->failed = 0;
- bg_local->call_count = priv->child_count + 1;
- bg_local->sh_struct =
- GF_CALLOC (1, sizeof (struct unify_self_heal_struct),
- gf_unify_mt_unify_self_heal_struct);
-
- /* +1 is for NS */
- for (index = 0; index < (priv->child_count + 1); index++) {
- STACK_WIND_COOKIE (bg_frame,
- unify_bgsh_checksum_cbk,
- priv->xl_array[index],
- priv->xl_array[index],
- priv->xl_array[index]->fops->checksum,
- &bg_local->loc1,
- 0);
- }
- }
-
- /* generation number matches, self heal already done or
- * self heal done in background: just do STACK_UNWIND
- */
- tmp_inode = local->loc1.inode;
- tmp_dict = local->dict;
-
- unify_local_wipe (local);
-
- /* This is lookup_cbk ()'s UNWIND. */
- STACK_UNWIND (frame,
- local->op_ret,
- local->op_errno,
- tmp_inode,
- &local->stbuf,
- local->dict,
- &local->oldpostparent);
-
- if (tmp_dict)
- dict_unref (tmp_dict);
-
- return 0;
-}
-
diff --git a/xlators/cluster/unify/src/unify.c b/xlators/cluster/unify/src/unify.c
deleted file mode 100644
index f3da6d0bac7..00000000000
--- a/xlators/cluster/unify/src/unify.c
+++ /dev/null
@@ -1,4589 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/**
- * xlators/cluster/unify:
- * - This xlator is one of the main translator in GlusterFS, which
- * actually does the clustering work of the file system. One need to
- * understand that, unify assumes file to be existing in only one of
- * the child node, and directories to be present on all the nodes.
- *
- * NOTE:
- * Now, unify has support for global namespace, which is used to keep a
- * global view of fs's namespace tree. The stat for directories are taken
- * just from the namespace, where as for files, just 'ia_ino' is taken from
- * Namespace node, and other stat info is taken from the actual storage node.
- * Also Namespace node helps to keep consistant inode for files across
- * glusterfs (re-)mounts.
- */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "unify.h"
-#include "dict.h"
-#include "xlator.h"
-#include "hashfn.h"
-#include "logging.h"
-#include "stack.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include <signal.h>
-#include <libgen.h>
-#include "compat-errno.h"
-#include "compat.h"
-
-#define UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR(_loc) do { \
- if (!(_loc && _loc->inode)) { \
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-
-#define UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(_fd) do { \
- if (!(_fd && !fd_ctx_get (_fd, this, NULL))) { \
- STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-#define UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(_fd) do { \
- if (!_fd) { \
- STACK_UNWIND (frame, -1, EBADFD, NULL, NULL); \
- return 0; \
- } \
-} while(0)
-
-/**
- * unify_local_wipe - free all the extra allocation of local->* here.
- */
-static void
-unify_local_wipe (unify_local_t *local)
-{
- /* Free the strdup'd variables in the local structure */
- if (local->name) {
- GF_FREE (local->name);
- }
- loc_wipe (&local->loc1);
- loc_wipe (&local->loc2);
-}
-
-
-
-/*
- * unify_normalize_stats -
- */
-void
-unify_normalize_stats (struct statvfs *buf,
- unsigned long bsize,
- unsigned long frsize)
-{
- double factor;
-
- if (buf->f_bsize != bsize) {
- factor = ((double) buf->f_bsize) / bsize;
- buf->f_bsize = bsize;
- buf->f_bfree = (fsblkcnt_t) (factor * buf->f_bfree);
- buf->f_bavail = (fsblkcnt_t) (factor * buf->f_bavail);
- }
-
- if (buf->f_frsize != frsize) {
- factor = ((double) buf->f_frsize) / frsize;
- buf->f_frsize = frsize;
- buf->f_blocks = (fsblkcnt_t) (factor * buf->f_blocks);
- }
-}
-
-
-xlator_t *
-unify_loc_subvol (loc_t *loc, xlator_t *this)
-{
- unify_private_t *priv = NULL;
- xlator_t *subvol = NULL;
- int16_t *list = NULL;
- long index = 0;
- xlator_t *subvol_i = NULL;
- int ret = 0;
- uint64_t tmp_list = 0;
-
- priv = this->private;
- subvol = NS (this);
-
- if (!IA_ISDIR (loc->inode->ia_type)) {
- ret = inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
- if (!list)
- goto out;
-
- for (index = 0; list[index] != -1; index++) {
- subvol_i = priv->xl_array[list[index]];
- if (subvol_i != NS (this)) {
- subvol = subvol_i;
- break;
- }
- }
- }
-out:
- return subvol;
-}
-
-
-
-/**
- * unify_statfs_cbk -
- */
-int32_t
-unify_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *stbuf)
-{
- int32_t callcnt = 0;
- struct statvfs *dict_buf = NULL;
- unsigned long bsize;
- unsigned long frsize;
- unify_local_t *local = (unify_local_t *)frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- /* when a call is successfull, add it to local->dict */
- dict_buf = &local->statvfs_buf;
-
- if (dict_buf->f_bsize != 0) {
- bsize = max (dict_buf->f_bsize,
- stbuf->f_bsize);
-
- frsize = max (dict_buf->f_frsize,
- stbuf->f_frsize);
- unify_normalize_stats(dict_buf, bsize, frsize);
- unify_normalize_stats(stbuf, bsize, frsize);
- } else {
- dict_buf->f_bsize = stbuf->f_bsize;
- dict_buf->f_frsize = stbuf->f_frsize;
- }
-
- dict_buf->f_blocks += stbuf->f_blocks;
- dict_buf->f_bfree += stbuf->f_bfree;
- dict_buf->f_bavail += stbuf->f_bavail;
- dict_buf->f_files += stbuf->f_files;
- dict_buf->f_ffree += stbuf->f_ffree;
- dict_buf->f_favail += stbuf->f_favail;
- dict_buf->f_fsid = stbuf->f_fsid;
- dict_buf->f_flag = stbuf->f_flag;
- dict_buf->f_namemax = stbuf->f_namemax;
- local->op_ret = op_ret;
- } else {
- /* fop on storage node has failed due to some error */
- if (op_errno != ENOTCONN) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): %s",
- prev_frame->this->name,
- strerror (op_errno));
- }
- local->op_errno = op_errno;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->statvfs_buf);
- }
-
- return 0;
-}
-
-/**
- * unify_statfs -
- */
-int32_t
-unify_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
- xlator_list_t *trav = this->children;
-
- INIT_LOCAL (frame, local);
- local->call_count = ((unify_private_t *)this->private)->child_count;
-
- while(trav) {
- STACK_WIND (frame,
- unify_statfs_cbk,
- trav->xlator,
- trav->xlator->fops->statfs,
- loc);
- trav = trav->next;
- }
-
- return 0;
-}
-
-/**
- * unify_buf_cbk -
- */
-int32_t
-unify_buf_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s(): child(%s): path(%s): %s",
- gf_fop_list[frame->root->op],
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
-
- local->op_errno = op_errno;
- if ((op_errno == ENOENT) && priv->optimist)
- local->op_ret = 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- if (NS (this) == prev_frame->this) {
- local->ia_ino = buf->ia_ino;
- /* If the entry is directory, get the stat
- from NS node */
- if (IA_ISDIR (buf->ia_type) ||
- !local->stbuf.ia_blksize) {
- local->stbuf = *buf;
- }
- }
-
- if ((!IA_ISDIR (buf->ia_type)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from Storage
- node. */
- local->stbuf = *buf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* If the inode number is not filled, operation should
- fail */
- if (!local->ia_ino)
- local->op_ret = -1;
-
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
-
- return 0;
-}
-
-#define check_if_dht_linkfile(s) \
- ((st_mode_from_ia (s->ia_prot, s->ia_type) & ~S_IFMT) == S_ISVTX)
-
-/**
- * unify_lookup_cbk -
- */
-int32_t
-unify_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- inode_t *tmp_inode = NULL;
- dict_t *local_dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- if (local->revalidate &&
- (op_errno == ESTALE)) {
- /* ESTALE takes priority */
- local->op_errno = op_errno;
- local->failed = 1;
- }
-
- if ((op_errno != ENOTCONN)
- && (op_errno != ENOENT)
- && (local->op_errno != ESTALE)) {
- /* if local->op_errno is already ESTALE, then
- * ESTALE has to propogated to the parent first.
- * do not enter here.
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
-
- } else if (local->revalidate &&
- (local->op_errno != ESTALE) &&
- !(priv->optimist && (op_errno == ENOENT))) {
-
- gf_log (this->name,
- (op_errno == ENOTCONN) ?
- GF_LOG_DEBUG:GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
- }
-
- if (op_ret == 0) {
- local->op_ret = 0;
-
- if (check_if_dht_linkfile(buf)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "file %s may be DHT link file on %s, "
- "make sure the backend is not shared "
- "between unify and DHT",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- }
-
- if (local->stbuf.ia_type && local->stbuf.ia_blksize) {
- /* make sure we already have a stbuf
- stored in local->stbuf */
- if (IA_ISDIR (local->stbuf.ia_type) &&
- !IA_ISDIR (buf->ia_type)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] '%s' is directory "
- "on namespace, non-directory "
- "on node '%s', returning EIO",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- local->return_eio = 1;
- }
- if (!IA_ISDIR (local->stbuf.ia_type) &&
- IA_ISDIR (buf->ia_type)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] '%s' is directory "
- "on node '%s', non-directory "
- "on namespace, returning EIO",
- local->loc1.path,
- priv->xl_array[(long)cookie]->name);
- local->return_eio = 1;
- }
- }
-
- if (!local->revalidate && !IA_ISDIR (buf->ia_type)) {
- /* This is the first time lookup on file*/
- if (!local->list) {
- /* list is not allocated, allocate
- the max possible range */
- local->list = GF_CALLOC (1, 2 * (priv->child_count + 2),
- gf_unify_mt_int16_t);
- if (!local->list) {
- gf_log (this->name,
- GF_LOG_CRITICAL,
- "Not enough memory");
- STACK_UNWIND (frame, -1,
- ENOMEM, inode,
- NULL, NULL, NULL);
- return 0;
- }
- }
- /* update the index of the list */
- local->list [local->index++] =
- (int16_t)(long)cookie;
- }
-
- if (!local->revalidate && IA_ISDIR (buf->ia_type)) {
- /* fresh lookup of a directory */
- inode_ctx_put (local->loc1.inode, this,
- priv->inode_generation);
- }
-
- if ((!local->dict) && dict &&
- (priv->xl_array[(long)cookie] != NS(this))) {
- local->dict = dict_ref (dict);
- }
-
- /* index of NS node is == total child count */
- if (priv->child_count == (int16_t)(long)cookie) {
- /* Take the inode number from namespace */
- local->ia_ino = buf->ia_ino;
- if (IA_ISDIR (buf->ia_type) ||
- !(local->stbuf.ia_blksize)) {
- local->stbuf = *buf;
- local->oldpostparent = *postparent;
- }
- } else if (!IA_ISDIR (buf->ia_type)) {
- /* If file, then get the stat from
- storage node */
- local->stbuf = *buf;
- }
-
- if (local->ia_nlink < buf->ia_nlink) {
- local->ia_nlink = buf->ia_nlink;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local_dict = local->dict;
- if (local->return_eio) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "[CRITICAL] Unable to fix the path (%s) with "
- "self-heal, try manual verification. "
- "returning EIO.", local->loc1.path);
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EIO, inode, NULL, NULL);
- if (local_dict) {
- dict_unref (local_dict);
- }
- return 0;
- }
-
- if (!local->stbuf.ia_blksize) {
- /* Inode not present */
- local->op_ret = -1;
- } else {
- if (!local->revalidate &&
- !IA_ISDIR (local->stbuf.ia_type)) {
- /* If its a file, big array is useless,
- allocate the smaller one */
- int16_t *list = NULL;
- list = GF_CALLOC (1, 2 * (local->index + 1),
- gf_unify_mt_int16_t);
- ERR_ABORT (list);
- memcpy (list, local->list, 2 * local->index);
- /* Make the end of the list as -1 */
- GF_FREE (local->list);
- local->list = list;
- local->list [local->index] = -1;
- /* Update the inode's ctx with proper array */
- /* TODO: log on failure */
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->list);
- }
-
- if (IA_ISDIR(local->loc1.inode->ia_type)) {
- /* lookup is done for directory */
- if (local->failed && priv->self_heal) {
- /* Triggering self-heal */
- /* means, self-heal required for this
- inode */
- local->inode_generation = 0;
- priv->inode_generation++;
- }
- } else {
- local->stbuf.ia_ino = local->ia_ino;
- }
-
- local->stbuf.ia_nlink = local->ia_nlink;
- }
- if (local->op_ret == -1) {
- if (!local->revalidate && local->list)
- GF_FREE (local->list);
- }
-
- if ((local->op_ret >= 0) && local->failed &&
- local->revalidate) {
- /* Done revalidate, but it failed */
- if ((op_errno != ENOTCONN)
- && (local->op_errno != ESTALE)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Revalidate failed for path(%s): %s",
- local->loc1.path, strerror (op_errno));
- }
- local->op_ret = -1;
- }
-
- if ((priv->self_heal && !priv->optimist) &&
- (!local->revalidate && (local->op_ret == 0) &&
- IA_ISDIR(local->stbuf.ia_type))) {
- /* Let the self heal be done here */
- zr_unify_self_heal (frame, this, local);
- local_dict = NULL;
- } else {
- if (local->failed) {
- /* NOTE: directory lookup is sent to all
- * subvolumes and success from a subvolume
- * might set local->op_ret to 0 (zero) */
- local->op_ret = -1;
- }
-
- /* either no self heal, or op_ret == -1 (failure) */
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- tmp_inode, &local->stbuf, local->dict,
- &local->oldpostparent);
- }
- if (local_dict) {
- dict_unref (local_dict);
- }
- }
-
- return 0;
-}
-
-/**
- * unify_lookup -
- */
-int32_t
-unify_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- long index = 0;
-
- if (!(loc && loc->inode)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: Argument not right", loc?loc->path:"(null)");
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL, NULL, NULL);
- return 0;
- }
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL, NULL, NULL);
- return 0;
- }
-
- if (inode_ctx_get (loc->inode, this, NULL)
- && IA_ISDIR (loc->inode->ia_type)) {
- local->revalidate = 1;
- }
-
- if (!inode_ctx_get (loc->inode, this, NULL) &&
- loc->inode->ia_type &&
- !IA_ISDIR (loc->inode->ia_type)) {
- uint64_t tmp_list = 0;
- /* check if revalidate or fresh lookup */
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
- }
-
- if (local->list) {
- list = local->list;
- for (index = 0; list[index] != -1; index++);
- if (index != 2) {
- if (index < 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "returning ESTALE for %s: file "
- "count is %ld", loc->path, index);
- /* Print where all the file is present */
- for (index = 0;
- local->list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", loc->path,
- priv->xl_array[list[index]]->name);
- }
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, ESTALE,
- NULL, NULL, NULL, NULL);
- return 0;
- } else {
- /* There are more than 2 presences */
- /* Just log and continue */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: file count is %ld",
- loc->path, index);
- /* Print where all the file is present */
- for (index = 0;
- local->list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", loc->path,
- priv->xl_array[list[index]]->name);
- }
- }
- }
-
- /* is revalidate */
- local->revalidate = 1;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_lookup_cbk,
- (void *)(long)list[index], //cookie
- priv->xl_array [list[index]],
- priv->xl_array [list[index]]->fops->lookup,
- loc,
- xattr_req);
- if (need_break)
- break;
- }
- } else {
- if (loc->inode->ia_type) {
- if (inode_ctx_get (loc->inode, this, NULL)) {
- inode_ctx_get (loc->inode, this,
- &local->inode_generation);
- }
- }
- /* This is first call, there is no list */
- /* call count should be all child + 1 namespace */
- local->call_count = priv->child_count + 1;
-
- for (index = 0; index <= priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_lookup_cbk,
- (void *)index, //cookie
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- loc,
- xattr_req);
- }
- }
-
- return 0;
-}
-
-/**
- * unify_stat - if directory, get the stat directly from NameSpace child.
- * if file, check for a hint and send it only there (also to NS).
- * if its a fresh stat, then do it on all the nodes.
- *
- * NOTE: for all the call, sending cookie as xlator pointer, which will be
- * used in cbk.
- */
-int32_t
-unify_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int16_t index = 0;
- int16_t *list = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL);
- return 0;
- }
- local->ia_ino = loc->inode->ino;
- if (IA_ISDIR (loc->inode->ia_type)) {
- /* Directory */
- local->call_count = 1;
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->stat, loc);
- } else {
- /* File */
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND (frame,
- unify_buf_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->stat,
- loc);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-/**
- * unify_access_cbk -
- */
-int32_t
-unify_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-/**
- * unify_access - Send request to only namespace, which has all the
- * attributes set for the file.
- */
-int32_t
-unify_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
-{
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- STACK_WIND (frame,
- unify_access_cbk,
- NS(this),
- NS(this)->fops->access,
- loc,
- mask);
-
- return 0;
-}
-
-int32_t
-unify_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- inode_t *tmp_inode = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if ((op_ret == -1) && !(priv->optimist &&
- (op_errno == ENOENT ||
- op_errno == EEXIST))) {
- /* TODO: Decrement the inode_generation of
- * this->inode's parent inode, hence the missing
- * directory is created properly by self-heal.
- * Currently, there is no way to get the parent
- * inode directly.
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- if (op_errno != EEXIST)
- local->failed = 1;
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0)
- local->op_ret = 0;
-
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (!local->failed) {
- inode_ctx_put (local->loc1.inode, this,
- priv->inode_generation);
- }
-
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- tmp_inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
- }
-
- return 0;
-}
-
-/**
- * unify_ns_mkdir_cbk -
- */
-int32_t
-unify_ns_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- long index = 0;
-
- if (op_ret == -1) {
- /* No need to send mkdir request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->name, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, NULL,
- NULL, NULL);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->stbuf = *buf;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- local->call_count = priv->child_count;
-
- /* Send mkdir request to all the nodes now */
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND_COOKIE (frame,
- unify_mkdir_cbk,
- (void *)index, //cookie
- priv->xl_array[index],
- priv->xl_array[index]->fops->mkdir,
- &local->loc1,
- local->mode);
- }
-
- return 0;
-}
-
-
-/**
- * unify_mkdir -
- */
-int32_t
-unify_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
-
- loc_copy (&local->loc1, loc);
-
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_mkdir_cbk,
- NS(this),
- NS(this)->fops->mkdir,
- loc,
- mode);
- return 0;
-}
-
-/**
- * unify_rmdir_cbk -
- */
-int32_t
-unify_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == 0 || (priv->optimist && (op_errno == ENOENT)))
- local->op_ret = 0;
- if (op_ret == -1)
- local->op_errno = op_errno;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->oldpreparent, &local->oldpostparent);
- }
-
- return 0;
-}
-
-/**
- * unify_ns_rmdir_cbk -
- */
-int32_t
-unify_ns_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* No need to send rmdir request to other servers,
- * as namespace action failed
- */
- gf_log (this->name,
- ((op_errno != ENOTEMPTY) ?
- GF_LOG_ERROR : GF_LOG_DEBUG),
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, NULL, NULL);
- return 0;
- }
-
- local->call_count = priv->child_count;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_rmdir_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->rmdir,
- &local->loc1);
- }
-
- return 0;
-}
-
-/**
- * unify_rmdir -
- */
-int32_t
-unify_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_rmdir_cbk,
- NS(this),
- NS(this)->fops->rmdir,
- loc);
-
- return 0;
-}
-
-/**
- * unify_open_cbk -
- */
-int32_t
-unify_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (NS(this) != (xlator_t *)cookie) {
- /* Store child node's ptr, used in
- all the f*** / FileIO calls */
- fd_ctx_set (fd, this, (uint64_t)(long)cookie);
- }
- }
- if (op_ret == -1) {
- local->op_errno = op_errno;
- local->failed = 1;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if ((local->failed == 1) && (local->op_ret >= 0)) {
- local->call_count = 1;
- /* return -1 to user */
- local->op_ret = -1;
- //local->op_errno = EIO;
-
- if (!fd_ctx_get (local->fd, this, NULL)) {
- gf_log (this->name, GF_LOG_ERROR,
- "Open success on child node, "
- "failed on namespace");
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "Open success on namespace, "
- "failed on child node");
- }
- }
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- }
-
- return 0;
-}
-
-#ifdef GF_DARWIN_HOST_OS
-/**
- * unify_create_lookup_cbk -
- */
-int32_t
-unify_open_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if ((op_ret == -1) && (op_errno != ENOENT)) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->index++;
- if (NS(this) == priv->xl_array[(long)cookie]) {
- local->list[0] = (int16_t)(long)cookie;
- } else {
- local->list[1] = (int16_t)(long)cookie;
- }
- if (IA_ISDIR (buf->ia_type))
- local->failed = 1;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- int16_t file_list[3] = {0,};
- local->op_ret = -1;
-
- file_list[0] = local->list[0];
- file_list[1] = local->list[1];
- file_list[2] = -1;
-
- if (local->index != 2) {
- /* Lookup failed, can't do open */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: present on %d nodes",
- local->name, local->index);
-
- if (local->index < 2) {
- unify_local_wipe (local);
- gf_log (this->name, GF_LOG_ERROR,
- "returning as file found on less "
- "than 2 nodes");
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, local->fd);
- return 0;
- }
- }
-
- if (local->failed) {
- /* Open on directory, return EISDIR */
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EISDIR, local->fd);
- return 0;
- }
-
- /* Everything is perfect :) */
- local->call_count = 2;
-
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_open_cbk,
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- &local->loc1,
- local->flags,
- local->fd, local->wbflags);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_open_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *sbuf)
-{
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- STACK_UNWIND (frame, -1, ENOENT);
- return 0;
- }
-
- if (path[0] == '/') {
- local->name = gf_strdup (path);
- ERR_ABORT (local->name);
- } else {
- char *tmp_str = gf_strdup (local->loc1.path);
- char *tmp_base = dirname (tmp_str);
- local->name = GF_CALLOC (1, PATH_MAX, gf_unify_mt_char);
- strcpy (local->name, tmp_base);
- strncat (local->name, "/", 1);
- strcat (local->name, path);
- GF_FREE (tmp_str);
- }
-
- local->list = GF_CALLOC (1, sizeof (int16_t) * 3,
- gf_unify_mt_int16_t);
- ERR_ABORT (local->list);
- local->call_count = priv->child_count + 1;
- local->op_ret = -1;
- for (index = 0; index <= priv->child_count; index++) {
- /* Send the lookup to all the nodes including namespace */
- STACK_WIND_COOKIE (frame,
- unify_open_lookup_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- &local->loc1,
- NULL);
- }
-
- return 0;
-}
-#endif /* GF_DARWIN_HOST_OS */
-
-/**
- * unify_open -
- */
-int32_t
-unify_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- fd_t *fd,
- int32_t wbflags)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int16_t file_list[3] = {0,};
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Init */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->fd = fd;
- local->flags = flags;
- local->wbflags = wbflags;
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- local->list = list;
- file_list[0] = priv->child_count; /* Thats namespace */
- file_list[2] = -1;
- for (index = 0; list[index] != -1; index++) {
- local->call_count++;
- if (list[index] != priv->child_count)
- file_list[1] = list[index];
- }
-
- if (local->call_count != 2) {
- /* If the lookup was done for file */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: entry_count is %d",
- loc->path, local->call_count);
- for (index = 0; local->list[index] != -1; index++)
- gf_log (this->name, GF_LOG_ERROR, "%s: found on %s",
- loc->path, priv->xl_array[list[index]]->name);
-
- if (local->call_count < 2) {
- gf_log (this->name, GF_LOG_ERROR,
- "returning EIO as file found on onlyone node");
- STACK_UNWIND (frame, -1, EIO, fd);
- return 0;
- }
- }
-
-#ifdef GF_DARWIN_HOST_OS
- /* Handle symlink here */
- if (IA_ISLNK (loc->inode->ia_type)) {
- /* Callcount doesn't matter here */
- STACK_WIND (frame,
- unify_open_readlink_cbk,
- NS(this),
- NS(this)->fops->readlink,
- loc, PATH_MAX);
- return 0;
- }
-#endif /* GF_DARWIN_HOST_OS */
-
- local->call_count = 2;
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_open_cbk,
- priv->xl_array[file_list[index]], //cookie
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- loc,
- flags,
- fd, wbflags);
- if (need_break)
- break;
- }
-
- return 0;
-}
-
-
-int32_t
-unify_create_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
- inode_t *inode = local->loc1.inode;
-
- unify_local_wipe (local);
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd,
- inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_create_open_cbk -
- */
-int32_t
-unify_create_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- int ret = 0;
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- inode_t *inode = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_value = 0;
-
- LOCK (&frame->lock);
- {
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- if (NS(this) != (xlator_t *)cookie) {
- /* Store child node's ptr, used in all
- the f*** / FileIO calls */
- /* TODO: log on failure */
- ret = fd_ctx_get (fd, this, &tmp_value);
- cookie = (void *)(long)tmp_value;
- } else {
- /* NOTE: open successful on namespace.
- * fd's ctx can be used to identify open
- * failure on storage subvolume. cool
- * ide ;) */
- local->failed = 0;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- ((xlator_t *)cookie)->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed == 1 && (local->op_ret >= 0)) {
- local->call_count = 1;
- /* return -1 to user */
- local->op_ret = -1;
- local->op_errno = EIO;
- local->fd = fd;
- local->call_count = 1;
-
- if (!fd_ctx_get (local->fd, this, &tmp_value)) {
- child = (xlator_t *)(long)tmp_value;
-
- gf_log (this->name, GF_LOG_ERROR,
- "Create success on child node, "
- "failed on namespace");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- child,
- child->fops->unlink,
- &local->loc1);
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "Create success on namespace, "
- "failed on child node");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
- }
- return 0;
- }
- inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, fd,
- inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
- }
- return 0;
-}
-
-/**
- * unify_create_lookup_cbk -
- */
-int32_t
-unify_create_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- int16_t index = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- priv->xl_array[(long)cookie]->name,
- local->loc1.path, strerror (op_errno));
- local->op_errno = op_errno;
- local->failed = 1;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->list[local->index++] = (int16_t)(long)cookie;
- if (NS(this) == priv->xl_array[(long)cookie]) {
- local->ia_ino = buf->ia_ino;
- } else {
- local->stbuf = *buf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- int16_t *list = local->list;
- int16_t file_list[3] = {0,};
- local->op_ret = -1;
-
- local->list [local->index] = -1;
- file_list[0] = list[0];
- file_list[1] = list[1];
- file_list[2] = -1;
-
- local->stbuf.ia_ino = local->ia_ino;
- /* TODO: log on failure */
- inode_ctx_put (local->loc1.inode, this,
- (uint64_t)(long)local->list);
-
- if (local->index != 2) {
- /* Lookup failed, can't do open */
- gf_log (this->name, GF_LOG_ERROR,
- "%s: present on %d nodes",
- local->loc1.path, local->index);
- file_list[0] = priv->child_count;
- for (index = 0; list[index] != -1; index++) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: found on %s", local->loc1.path,
- priv->xl_array[list[index]]->name);
- if (list[index] != priv->child_count)
- file_list[1] = list[index];
- }
-
- if (local->index < 2) {
- unify_local_wipe (local);
- gf_log (this->name, GF_LOG_ERROR,
- "returning EIO as file found on "
- "only one node");
- STACK_UNWIND (frame, -1, EIO,
- local->fd, inode, NULL,
- NULL, NULL);
- return 0;
- }
- }
- /* Everything is perfect :) */
- local->call_count = 2;
-
- for (index = 0; file_list[index] != -1; index++) {
- char need_break = (file_list[index+1] == -1);
- STACK_WIND_COOKIE (frame,
- unify_create_open_cbk,
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]],
- priv->xl_array[file_list[index]]->fops->open,
- &local->loc1,
- local->flags,
- local->fd, 0);
- if (need_break)
- break;
- }
- }
-
- return 0;
-}
-
-
-/**
- * unify_create_cbk -
- */
-int32_t
-unify_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int ret = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
- inode_t *tmp_inode = NULL;
-
- if (op_ret == -1) {
- /* send unlink () on Namespace */
- local->op_errno = op_errno;
- local->op_ret = -1;
- local->call_count = 1;
- gf_log (this->name, GF_LOG_ERROR,
- "create failed on %s (file %s, error %s), "
- "sending unlink to namespace",
- prev_frame->this->name,
- local->loc1.path, strerror (op_errno));
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = op_ret;
- local->stbuf = *buf;
- /* Just inode number should be from NS node */
- local->stbuf.ia_ino = local->ia_ino;
-
- /* TODO: log on failure */
- ret = fd_ctx_set (fd, this, (uint64_t)(long)prev_frame->this);
- }
-
- tmp_inode = local->loc1.inode;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, local->fd,
- tmp_inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_ns_create_cbk -
- *
- */
-int32_t
-unify_ns_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send create request to other servers, as
- namespace action failed. Handle exclusive create here. */
- if ((op_errno != EEXIST) ||
- ((op_errno == EEXIST) &&
- ((local->flags & O_EXCL) == O_EXCL))) {
- /* If its just a create call without O_EXCL,
- don't do this */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
- return 0;
- }
- }
-
- if (op_ret >= 0) {
- /* Get the inode number from the NS node */
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- local->op_ret = -1;
-
- /* Start the mapping list */
- list = GF_CALLOC (1, sizeof (int16_t) * 3,
- gf_unify_mt_int16_t);
- ERR_ABORT (list);
- inode_ctx_put (inode, this, (uint64_t)(long)list);
- list[0] = priv->child_count;
- list[2] = -1;
-
- /* This means, file doesn't exist anywhere in the Filesystem */
- sched_ops = priv->sched_ops;
-
- /* Send create request to the scheduled node now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (sched_xl == NULL)
- {
- /* send unlink () on Namespace */
- local->op_errno = ENOTCONN;
- local->op_ret = -1;
- local->call_count = 1;
- gf_log (this->name, GF_LOG_ERROR,
- "no node online to schedule create:(file %s) "
- "sending unlink to namespace",
- (local->loc1.path)?local->loc1.path:"");
-
- STACK_WIND (frame,
- unify_create_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame, unify_create_cbk,
- sched_xl, sched_xl->fops->create,
- &local->loc1, local->flags, local->mode, fd);
- } else {
- /* File already exists, and there is no O_EXCL flag */
-
- gf_log (this->name, GF_LOG_DEBUG,
- "File(%s) already exists on namespace, sending "
- "open instead", local->loc1.path);
-
- local->list = GF_CALLOC (1, sizeof (int16_t) * 3,
- gf_unify_mt_int16_t);
- ERR_ABORT (local->list);
- local->call_count = priv->child_count + 1;
- local->op_ret = -1;
- for (index = 0; index <= priv->child_count; index++) {
- /* Send lookup() to all nodes including namespace */
- STACK_WIND_COOKIE (frame,
- unify_create_lookup_cbk,
- (void *)(long)index,
- priv->xl_array[index],
- priv->xl_array[index]->fops->lookup,
- &local->loc1,
- NULL);
- }
- }
- return 0;
-}
-
-/**
- * unify_create - create a file in global namespace first, so other
- * clients can see them. Create the file in storage nodes in background.
- */
-int32_t
-unify_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode,
- fd_t *fd)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
- local->flags = flags;
- local->fd = fd;
-
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, fd, loc->inode, NULL,
- NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_create_cbk,
- NS(this),
- NS(this)->fops->create,
- loc,
- flags | O_EXCL,
- mode,
- fd);
-
- return 0;
-}
-
-
-/**
- * unify_opendir_cbk -
- */
-int32_t
-unify_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
-
- return 0;
-}
-
-/**
- * unify_opendir -
- */
-int32_t
-unify_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- fd_t *fd)
-{
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- STACK_WIND (frame, unify_opendir_cbk,
- NS(this), NS(this)->fops->opendir, loc, fd);
-
- return 0;
-}
-
-
-int32_t
-unify_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s(): child(%s): path(%s): %s",
- gf_fop_list[frame->root->op],
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
-
- local->op_errno = op_errno;
- if ((op_errno == ENOENT) && priv->optimist)
- local->op_ret = 0;
- }
-
- if (op_ret >= 0) {
- local->op_ret = 0;
-
- if (NS (this) == prev_frame->this) {
- local->ia_ino = statpost->ia_ino;
- /* If the entry is directory, get the stat
- from NS node */
- if (IA_ISDIR (statpost->ia_type) ||
- !local->stpost.ia_blksize) {
- local->stpre = *statpre;
- local->stpost = *statpost;
- }
- }
-
- if ((!IA_ISDIR (statpost->ia_type)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from Storage
- node. */
- local->stpre = *statpre;
- local->stpost = *statpost;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- /* If the inode number is not filled, operation should
- fail */
- if (!local->ia_ino)
- local->op_ret = -1;
-
- local->stpre.ia_ino = local->ia_ino;
- local->stpost.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stpre, &local->stpost);
- }
-
- return 0;
-}
-
-
-int32_t
-unify_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- if (!(loc && loc->inode)) {
- STACK_UNWIND (frame, -1, EINVAL, NULL, NULL);
- return 0;
- }
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_setattr_cbk,
- NS (this),
- NS (this)->fops->setattr,
- loc, stbuf, valid);
- } else {
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- for (index = 0; local->list[index] != -1; index++) {
- STACK_WIND (frame,
- unify_setattr_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->setattr,
- loc, stbuf, valid);
-
- if (!--callcnt)
- break;
- }
- }
-
- return 0;
-}
-
-
-int32_t
-unify_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid)
-{
- unify_local_t *local = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- if (!fd_ctx_get (fd, this, &tmp_child)) {
- /* If its set, then its file */
- child = (xlator_t *)(long)tmp_child;
-
- local->call_count = 2;
-
- STACK_WIND (frame, unify_setattr_cbk, child,
- child->fops->fsetattr, fd, stbuf, valid);
-
- STACK_WIND (frame, unify_setattr_cbk, NS(this),
- NS(this)->fops->fsetattr, fd, stbuf, valid);
- } else {
- local->call_count = 1;
-
- STACK_WIND (frame, unify_setattr_cbk,
- NS(this), NS(this)->fops->fsetattr,
- fd, stbuf, valid);
- }
-
- return 0;
-}
-
-
-/**
- * unify_truncate_cbk -
- */
-int32_t
-unify_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- local->op_errno = op_errno;
- if (!((op_errno == ENOENT) && priv->optimist))
- local->op_ret = -1;
- }
-
- if (op_ret >= 0) {
- if (NS (this) == prev_frame->this) {
- local->ia_ino = postbuf->ia_ino;
- /* If the entry is directory, get the
- stat from NS node */
- if (IA_ISDIR (postbuf->ia_type) ||
- !local->stbuf.ia_blksize) {
- local->stbuf = *prebuf;
- local->poststbuf = *postbuf;
- }
- }
-
- if ((!IA_ISDIR (postbuf->ia_type)) &&
- (NS (this) != prev_frame->this)) {
- /* If file, take the stat info from
- Storage node. */
- local->stbuf = *prebuf;
- local->poststbuf = *postbuf;
- }
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->ia_ino) {
- local->stbuf.ia_ino = local->ia_ino;
- local->poststbuf.ia_ino = local->ia_ino;
- } else {
- local->op_ret = -1;
- }
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->poststbuf);
- }
-
- return 0;
-}
-
-
-/**
- * unify_truncate -
- */
-int32_t
-unify_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
-{
- unify_local_t *local = NULL;
- unify_private_t *priv = this->private;
- int32_t index = 0;
- int32_t callcnt = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->ia_ino = loc->inode->ino;
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_truncate_cbk,
- NS(this),
- NS(this)->fops->truncate,
- loc,
- 0);
- } else {
- local->op_ret = 0;
- inode_ctx_get (loc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- for (index = 0; local->list[index] != -1; index++) {
- local->call_count++;
- callcnt++;
- }
-
- /* Don't send offset to NS truncate */
- STACK_WIND (frame, unify_truncate_cbk, NS(this),
- NS(this)->fops->truncate, loc, 0);
- callcnt--;
-
- for (index = 0; local->list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[local->list[index]]) {
- STACK_WIND (frame,
- unify_truncate_cbk,
- priv->xl_array[local->list[index]],
- priv->xl_array[local->list[index]]->fops->truncate,
- loc,
- offset);
- if (!--callcnt)
- break;
- }
- }
- }
-
- return 0;
-}
-
-/**
- * unify_readlink_cbk -
- */
-int32_t
-unify_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *sbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, path, sbuf);
- return 0;
-}
-
-/**
- * unify_readlink - Read the link only from the storage node.
- */
-int32_t
-unify_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
-{
- unify_private_t *priv = this->private;
- int32_t entry_count = 0;
- int16_t *list = NULL;
- int16_t index = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- entry_count++;
-
- if (entry_count >= 2) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_readlink_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->readlink,
- loc,
- size);
- break;
- }
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "returning ENOENT, no softlink files found "
- "on storage node");
- STACK_UNWIND (frame, -1, ENOENT, NULL);
- }
-
- return 0;
-}
-
-
-/**
- * unify_unlink_cbk -
- */
-int32_t
-unify_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == 0 || ((op_errno == ENOENT) && priv->optimist))
- local->op_ret = 0;
- if (op_ret == -1)
- local->op_errno = op_errno;
-
- if (((call_frame_t *)cookie)->this == NS(this)) {
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->oldpreparent, &local->oldpostparent);
- }
-
- return 0;
-}
-
-
-/**
- * unify_unlink -
- */
-int32_t
-unify_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++)
- local->call_count++;
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- STACK_WIND (frame,
- unify_unlink_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->unlink,
- loc);
- if (need_break)
- break;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: returning ENOENT", loc->path);
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- }
-
- return 0;
-}
-
-
-/**
- * unify_readv_cbk -
- */
-int32_t
-unify_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref)
-{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count, stbuf, iobref);
- return 0;
-}
-
-/**
- * unify_readv -
- */
-int32_t
-unify_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame,
- unify_readv_cbk,
- child,
- child->fops->readv,
- fd,
- size,
- offset);
-
-
- return 0;
-}
-
-/**
- * unify_writev_cbk -
- */
-int32_t
-unify_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- unify_local_t *local = NULL;
-
- local = frame->local;
-
- local->stbuf = *prebuf;
- local->stbuf.ia_ino = local->ia_ino;
-
- local->poststbuf = *postbuf;
- local->poststbuf.ia_ino = local->ia_ino;
-
- STACK_UNWIND (frame, op_ret, op_errno,
- &local->stbuf, &local->poststbuf);
- return 0;
-}
-
-/**
- * unify_writev -
- */
-int32_t
-unify_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off,
- struct iobref *iobref)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
- unify_local_t *local = NULL;
-
- INIT_LOCAL (frame, local);
- local->ia_ino = fd->inode->ino;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame,
- unify_writev_cbk,
- child,
- child->fops->writev,
- fd,
- vector,
- count,
- off,
- iobref);
-
- return 0;
-}
-
-/**
- * unify_ftruncate -
- */
-int32_t
-unify_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
-{
- xlator_t *child = NULL;
- unify_local_t *local = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR(fd);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->op_ret = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- local->call_count = 2;
-
- STACK_WIND (frame, unify_truncate_cbk,
- child, child->fops->ftruncate,
- fd, offset);
-
- STACK_WIND (frame, unify_truncate_cbk,
- NS(this), NS(this)->fops->ftruncate,
- fd, 0);
-
- return 0;
-}
-
-
-/**
- * unify_flush_cbk -
- */
-int32_t
-unify_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_flush -
- */
-int32_t
-unify_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_flush_cbk, child,
- child->fops->flush, fd);
-
- return 0;
-}
-
-
-/**
- * unify_fsync_cbk -
- */
-int32_t
-unify_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
-
-/**
- * unify_fsync -
- */
-int32_t
-unify_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fsync_cbk, child,
- child->fops->fsync, fd, flags);
-
- return 0;
-}
-
-/**
- * unify_fstat - Send fstat FOP to Namespace only if its directory, and to
- * both namespace and the storage node if its a file.
- */
-int32_t
-unify_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- unify_local_t *local = NULL;
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR(fd);
-
- INIT_LOCAL (frame, local);
- local->ia_ino = fd->inode->ino;
-
- if (!fd_ctx_get (fd, this, &tmp_child)) {
- /* If its set, then its file */
- child = (xlator_t *)(long)tmp_child;
- local->call_count = 2;
-
- STACK_WIND (frame, unify_buf_cbk, child,
- child->fops->fstat, fd);
-
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fstat, fd);
-
- } else {
- /* this is an directory */
- local->call_count = 1;
- STACK_WIND (frame, unify_buf_cbk, NS(this),
- NS(this)->fops->fstat, fd);
- }
-
- return 0;
-}
-
-/**
- * unify_getdents_cbk -
- */
-int32_t
-unify_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entry,
- int32_t count)
-{
- STACK_UNWIND (frame, op_ret, op_errno, entry, count);
- return 0;
-}
-
-/**
- * unify_getdents - send the FOP request to all the nodes.
- */
-int32_t
-unify_getdents (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset,
- int32_t flag)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_getdents_cbk, NS(this),
- NS(this)->fops->getdents, fd, size, offset, flag);
-
- return 0;
-}
-
-
-/**
- * unify_readdir_cbk -
- */
-int32_t
-unify_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
-
- return 0;
-}
-
-/**
- * unify_readdir - send the FOP request to all the nodes.
- */
-int32_t
-unify_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_readdir_cbk, NS(this),
- NS(this)->fops->readdir, fd, size, offset);
-
- return 0;
-}
-
-
-int32_t
-unify_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
-
- return 0;
-}
-
-
-int32_t
-unify_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_readdirp_cbk, NS(this),
- NS(this)->fops->readdirp, fd, size, offset);
-
- return 0;
-}
-
-
-/**
- * unify_fsyncdir_cbk -
- */
-int32_t
-unify_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
-
- return 0;
-}
-
-/**
- * unify_fsyncdir -
- */
-int32_t
-unify_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags)
-{
- UNIFY_CHECK_FD_AND_UNWIND_ON_ERR (fd);
-
- STACK_WIND (frame, unify_fsyncdir_cbk,
- NS(this), NS(this)->fops->fsyncdir, fd, flags);
-
- return 0;
-}
-
-/**
- * unify_lk_cbk - UNWIND frame with the proper return arguments.
- */
-int32_t
-unify_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *lock)
-{
- STACK_UNWIND (frame, op_ret, op_errno, lock);
- return 0;
-}
-
-/**
- * unify_lk - Send it to all the storage nodes, (should be 1) which has file.
- */
-int32_t
-unify_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *lock)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_lk_cbk, child,
- child->fops->lk, fd, cmd, lock);
-
- return 0;
-}
-
-
-int32_t
-unify_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno);
-
-static int32_t
-unify_setxattr_file_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- unify_private_t *private = this->private;
- unify_local_t *local = frame->local;
- xlator_t *sched_xl = NULL;
- struct sched_ops *sched_ops = NULL;
-
- if (op_ret == -1) {
- if (!ENOTSUP)
- gf_log (this->name, GF_LOG_ERROR,
- "setxattr with XATTR_CREATE on ns: "
- "path(%s) key(%s): %s",
- local->loc1.path, local->name,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
- }
-
- LOCK (&frame->lock);
- {
- local->failed = 0;
- local->op_ret = 0;
- local->op_errno = 0;
- local->call_count = 1;
- }
- UNLOCK (&frame->lock);
-
- /* schedule XATTR_CREATE on one of the child node */
- sched_ops = private->sched_ops;
-
- /* Send create request to the scheduled node now */
- sched_xl = sched_ops->schedule (this, local->name);
- if (!sched_xl) {
- STACK_UNWIND (frame, -1, ENOTCONN);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_setxattr_cbk,
- sched_xl,
- sched_xl->fops->setxattr,
- &local->loc1,
- local->dict,
- local->flags);
- return 0;
-}
-
-/**
- * unify_setxattr_cbk - When all the child nodes return, UNWIND frame.
- */
-int32_t
-unify_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
- dict_t *dict = NULL;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- gf_log (this->name, (((op_errno == ENOENT) ||
- (op_errno == ENOTSUP))?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- if (local->failed == -1) {
- local->failed = 1;
- }
- local->op_errno = op_errno;
- } else {
- local->failed = 0;
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- if (local->failed && local->name &&
- ZR_FILE_CONTENT_REQUEST(local->name)) {
- dict = get_new_dict ();
- dict_set (dict, local->dict->members_list->key,
- data_from_dynptr(NULL, 0));
- dict_ref (dict);
-
- local->call_count = 1;
-
- STACK_WIND (frame,
- unify_setxattr_file_cbk,
- NS(this),
- NS(this)->fops->setxattr,
- &local->loc1,
- dict,
- XATTR_CREATE);
-
- dict_unref (dict);
- return 0;
- }
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-/**
- * unify_sexattr - This function should be sent to all the storage nodes,
- * which contains the file, (excluding namespace).
- */
-int32_t
-unify_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int32_t call_count = 0;
- uint64_t tmp_list = 0;
- data_pair_t *trav = dict->members_list;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->failed = -1;
- loc_copy (&local->loc1, loc);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
-
- if (trav && trav->key && ZR_FILE_CONTENT_REQUEST(trav->key)) {
- /* direct the storage xlators to change file
- content only if file exists */
- local->flags = flags;
- local->dict = dict;
- local->name = gf_strdup (trav->key);
- flags |= XATTR_REPLACE;
- }
-
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_setxattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->setxattr,
- loc, dict, flags);
- }
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- call_count++;
- }
- }
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_setxattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->setxattr,
- loc,
- dict,
- flags);
- if (!--call_count)
- break;
- }
- }
- return 0;
- }
-
- /* No entry in storage nodes */
- gf_log (this->name, GF_LOG_DEBUG,
- "returning ENOENT, file not found on storage node.");
- STACK_UNWIND (frame, -1, ENOENT);
-
- return 0;
-}
-
-
-/**
- * unify_getxattr_cbk - This function is called from only one child, so, no
- * need of any lock or anything else, just send it to above layer
- */
-int32_t
-unify_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *value)
-{
- int32_t callcnt = 0;
- dict_t *local_value = NULL;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (op_ret == -1) {
- local->op_errno = op_errno;
- gf_log (this->name,
- (((op_errno == ENOENT) ||
- (op_errno == ENODATA) ||
- (op_errno == ENOTSUP)) ?
- GF_LOG_DEBUG : GF_LOG_ERROR),
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- (local->loc1.path)?local->loc1.path:"",
- strerror (op_errno));
- } else {
- if (!local->dict)
- local->dict = dict_ref (value);
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local_value = local->dict;
- local->dict = NULL;
-
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- local_value);
-
- if (local_value)
- dict_unref (local_value);
- }
-
- return 0;
-}
-
-
-/**
- * unify_getxattr - This FOP is sent to only the storage node.
- */
-int32_t
-unify_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
- int16_t count = 0;
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
- INIT_LOCAL (frame, local);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++)
- STACK_WIND (frame,
- unify_getxattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->getxattr,
- loc,
- name);
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- count++;
- }
- }
-
- if (count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_getxattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->getxattr,
- loc,
- name);
- if (!--count)
- break;
- }
- }
- } else {
- dict_t *tmp_dict = get_new_dict ();
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: returning ENODATA, no file found on storage node",
- loc->path);
- STACK_UNWIND (frame, -1, ENODATA, tmp_dict);
- dict_destroy (tmp_dict);
- }
-
- return 0;
-}
-
-/**
- * unify_removexattr_cbk - Wait till all the child node returns the call
- * and then UNWIND to above layer.
- */
-int32_t
-unify_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret == -1) {
- local->op_errno = op_errno;
- if (op_errno != ENOTSUP)
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name,
- local->loc1.path, strerror (op_errno));
- } else {
- local->op_ret = op_ret;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- STACK_UNWIND (frame, local->op_ret, local->op_errno);
- }
-
- return 0;
-}
-
-/**
- * unify_removexattr - Send it to all the child nodes which has the files.
- */
-int32_t
-unify_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = NULL;
- int16_t *list = NULL;
- int16_t index = 0;
- int32_t call_count = 0;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (loc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- if (IA_ISDIR (loc->inode->ia_type)) {
- local->call_count = priv->child_count;
- for (index = 0; index < priv->child_count; index++)
- STACK_WIND (frame,
- unify_removexattr_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->removexattr,
- loc,
- name);
-
- return 0;
- }
-
- inode_ctx_get (loc->inode, this, &tmp_list);
- list = (int16_t *)(long)tmp_list;
-
- for (index = 0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- call_count++;
- }
- }
-
- if (local->call_count) {
- for (index = 0; list[index] != -1; index++) {
- if (priv->xl_array[list[index]] != NS(this)) {
- STACK_WIND (frame,
- unify_removexattr_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->removexattr,
- loc,
- name);
- if (!--call_count)
- break;
- }
- }
- return 0;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: returning ENOENT, not found on storage node.", loc->path);
- STACK_UNWIND (frame, -1, ENOENT);
-
- return 0;
-}
-
-
-int32_t
-unify_mknod_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: %s", local->loc1.path, strerror (op_errno));
-
- unify_local_wipe (local);
- /* No log required here as this -1 is for mknod call */
- STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL);
- return 0;
-}
-
-/**
- * unify_mknod_cbk -
- */
-int32_t
-unify_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "mknod failed on storage node, sending unlink to "
- "namespace");
- local->op_errno = op_errno;
- STACK_WIND (frame,
- unify_mknod_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
- return 0;
- }
-
- local->stbuf = *buf;
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
- return 0;
-}
-
-/**
- * unify_ns_mknod_cbk -
- */
-int32_t
-unify_ns_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t *list = NULL;
- int16_t index = 0;
- call_frame_t *prev_frame = cookie;
-
- if (op_ret == -1) {
- /* No need to send mknod request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s): %s",
- prev_frame->this->name, local->loc1.path,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->stbuf = *buf;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- list = GF_CALLOC (1, sizeof (int16_t) * 3, gf_unify_mt_int16_t);
- ERR_ABORT (list);
- list[0] = priv->child_count;
- list[2] = -1;
- inode_ctx_put (inode, this, (uint64_t)(long)list);
-
- sched_ops = priv->sched_ops;
-
- /* Send mknod request to scheduled node now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (!sched_xl) {
- gf_log (this->name, GF_LOG_ERROR,
- "mknod failed on storage node, no node online "
- "at the moment, sending unlink to NS");
- local->op_errno = ENOTCONN;
- STACK_WIND (frame,
- unify_mknod_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame, unify_mknod_cbk,
- sched_xl, sched_xl->fops->mknod,
- &local->loc1, local->mode, local->dev);
-
- return 0;
-}
-
-/**
- * unify_mknod - Create a device on namespace first, and later create on
- * the storage node.
- */
-int32_t
-unify_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t rdev)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- local->mode = mode;
- local->dev = rdev;
- loc_copy (&local->loc1, loc);
- if (local->loc1.path == NULL) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_mknod_cbk,
- NS(this),
- NS(this)->fops->mknod,
- loc,
- mode,
- rdev);
-
- return 0;
-}
-
-int32_t
-unify_symlink_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
- if (op_ret == -1)
- gf_log (this->name, GF_LOG_ERROR,
- "%s: %s", local->loc1.path, strerror (op_errno));
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, local->op_errno, NULL, NULL);
- return 0;
-}
-
-/**
- * unify_symlink_cbk -
- */
-int32_t
-unify_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* Symlink on storage node failed, hence send unlink
- to the NS node */
- local->op_errno = op_errno;
- gf_log (this->name, GF_LOG_ERROR,
- "symlink on storage node failed, sending unlink "
- "to namespace");
-
- STACK_WIND (frame,
- unify_symlink_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- local->stbuf = *buf;
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_ns_symlink_cbk -
- */
-int32_t
-unify_ns_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
-
- struct sched_ops *sched_ops = NULL;
- xlator_t *sched_xl = NULL;
- int16_t *list = NULL;
- unify_local_t *local = frame->local;
- unify_private_t *priv = this->private;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send symlink request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s): %s",
- local->loc1.path, strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, NULL, buf,
- preparent, postparent);
- return 0;
- }
-
- /* Create one inode for this entry */
- local->op_ret = 0;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- /* Start the mapping list */
-
- list = GF_CALLOC (1, sizeof (int16_t) * 3, gf_unify_mt_int16_t);
- ERR_ABORT (list);
- list[0] = priv->child_count; //namespace's index
- list[2] = -1;
- inode_ctx_put (inode, this, (uint64_t)(long)list);
-
- sched_ops = priv->sched_ops;
-
- /* Send symlink request to all the nodes now */
- sched_xl = sched_ops->schedule (this, local->loc1.path);
- if (!sched_xl) {
- /* Symlink on storage node failed, hence send unlink
- to the NS node */
- local->op_errno = ENOTCONN;
- gf_log (this->name, GF_LOG_ERROR,
- "symlink on storage node failed, no node online, "
- "sending unlink to namespace");
-
- STACK_WIND (frame,
- unify_symlink_unlink_cbk,
- NS(this),
- NS(this)->fops->unlink,
- &local->loc1);
-
- return 0;
- }
-
- for (index = 0; index < priv->child_count; index++)
- if (sched_xl == priv->xl_array[index])
- break;
- list[1] = index;
-
- STACK_WIND (frame,
- unify_symlink_cbk,
- sched_xl,
- sched_xl->fops->symlink,
- local->name,
- &local->loc1);
-
- return 0;
-}
-
-/**
- * unify_symlink -
- */
-int32_t
-unify_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc)
-{
- unify_local_t *local = NULL;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, loc);
- local->name = gf_strdup (linkpath);
-
- if ((local->name == NULL) ||
- (local->loc1.path == NULL)) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, loc->inode, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- unify_ns_symlink_cbk,
- NS(this),
- NS(this)->fops->symlink,
- linkpath,
- loc);
-
- return 0;
-}
-
-
-int32_t
-unify_rename_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int32_t callcnt = 0;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s -> %s): %s",
- prev_frame->this->name,
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
-
- }
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf);
- }
- return 0;
-}
-
-int32_t
-unify_ns_rename_undo_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- }
-
- local->stbuf.ia_ino = local->ia_ino;
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno, &local->stbuf);
- return 0;
-}
-
-int32_t
-unify_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int16_t *list = NULL;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- call_frame_t *prev_frame = cookie;
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- if (op_ret >= 0) {
- if (!IA_ISDIR (buf->ia_type))
- local->stbuf = *buf;
- local->op_ret = op_ret;
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "child(%s): path(%s -> %s): %s",
- prev_frame->this->name,
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- local->op_errno = op_errno;
- }
- }
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- local->stbuf.ia_ino = local->ia_ino;
- if (IA_ISDIR (local->loc1.inode->ia_type)) {
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret, local->op_errno,
- &local->stbuf, &local->oldpreparent,
- &local->oldpostparent, &local->newpreparent,
- &local->newpostparent);
- return 0;
- }
-
- if (local->op_ret == -1) {
- /* TODO: check this logic */
-
- /* Rename failed in storage node, successful on NS,
- * hence, rename back the entries in NS */
- /* NOTE: this will be done only if the destination
- * doesn't exists, if the destination exists, the
- * job of correcting NS is left to self-heal
- */
- if (!local->index) {
- loc_t tmp_oldloc = {
- /* its actual 'newloc->path' */
- .path = local->loc2.path,
- .inode = local->loc1.inode,
- .parent = local->loc2.parent
- };
-
- loc_t tmp_newloc = {
- /* Actual 'oldloc->path' */
- .path = local->loc1.path,
- .parent = local->loc1.parent
- };
-
- gf_log (this->name, GF_LOG_ERROR,
- "rename succussful on namespace, on "
- "stroage node failed, reverting back");
-
- STACK_WIND (frame,
- unify_ns_rename_undo_cbk,
- NS(this),
- NS(this)->fops->rename,
- &tmp_oldloc,
- &tmp_newloc);
- return 0;
- }
- } else {
- /* Rename successful on storage nodes */
-
- int32_t idx = 0;
- int16_t *tmp_list = NULL;
- uint64_t tmp_list_int64 = 0;
- if (local->loc2.inode) {
- inode_ctx_get (local->loc2.inode,
- this, &tmp_list_int64);
- list = (int16_t *)(long)tmp_list_int64;
-
- }
-
- if (list) {
- for (index = 0; list[index] != -1; index++);
- tmp_list = GF_CALLOC (1, index * 2,
- gf_unify_mt_int16_t);
- memcpy (tmp_list, list, index * 2);
-
- for (index = 0; list[index] != -1; index++) {
- /* TODO: Check this logic. */
- /* If the destination file exists in
- * the same storage node where we sent
- * 'rename' call, no need to send
- * unlink
- */
- for (idx = 0;
- local->list[idx] != -1; idx++) {
- if (tmp_list[index] == local->list[idx]) {
- tmp_list[index] = priv->child_count;
- continue;
- }
- }
-
- if (NS(this) != priv->xl_array[tmp_list[index]]) {
- local->call_count++;
- callcnt++;
- }
- }
-
- if (local->call_count) {
- if (callcnt > 1)
- gf_log (this->name,
- GF_LOG_ERROR,
- "%s->%s: more (%d) "
- "subvolumes have the "
- "newloc entry",
- local->loc1.path,
- local->loc2.path,
- callcnt);
-
- for (index=0;
- tmp_list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[tmp_list[index]]) {
- STACK_WIND (frame,
- unify_rename_unlink_cbk,
- priv->xl_array[tmp_list[index]],
- priv->xl_array[tmp_list[index]]->fops->unlink,
- &local->loc2);
- if (!--callcnt)
- break;
- }
- }
-
- GF_FREE (tmp_list);
- return 0;
- }
- if (tmp_list)
- GF_FREE (tmp_list);
- }
- }
-
- /* Need not send 'unlink' to storage node */
- unify_local_wipe (local);
- STACK_UNWIND (frame, local->op_ret,
- local->op_errno, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent,
- &local->newpreparent, &local->newpostparent);
- }
-
- return 0;
-}
-
-int32_t
-unify_ns_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- int32_t index = 0;
- int32_t callcnt = 0;
- int16_t *list = NULL;
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
-
- if (op_ret == -1) {
- /* Free local->new_inode */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, buf,
- preoldparent, postoldparent,
- prenewparent, postnewparent);
- return 0;
- }
-
- local->stbuf = *buf;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preoldparent;
- local->oldpostparent = *postoldparent;
- local->newpreparent = *prenewparent;
- local->newpostparent = *postnewparent;
-
- /* Everything is fine. */
- if (IA_ISDIR (buf->ia_type)) {
- local->call_count = priv->child_count;
- for (index=0; index < priv->child_count; index++) {
- STACK_WIND (frame,
- unify_rename_cbk,
- priv->xl_array[index],
- priv->xl_array[index]->fops->rename,
- &local->loc1,
- &local->loc2);
- }
-
- return 0;
- }
-
- local->call_count = 0;
- /* send rename */
- list = local->list;
- for (index=0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- local->call_count++;
- callcnt++;
- }
- }
-
- if (local->call_count) {
- for (index=0; list[index] != -1; index++) {
- if (NS(this) != priv->xl_array[list[index]]) {
- STACK_WIND (frame,
- unify_rename_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->rename,
- &local->loc1,
- &local->loc2);
- if (!--callcnt)
- break;
- }
- }
- } else {
- /* file doesn't seem to be present in storage nodes */
- gf_log (this->name, GF_LOG_CRITICAL,
- "CRITICAL: source file not in storage node, "
- "rename successful on namespace :O");
- unify_local_wipe (local);
- STACK_UNWIND (frame, -1, EIO, NULL,
- NULL, NULL, /* preoldparent, postoldparent */
- NULL, NULL); /* prenewparent, postnewparent */
- }
- return 0;
-}
-
-
-/**
- * unify_rename - One of the tricky function. The deadliest of all :O
- */
-int32_t
-unify_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- /* Initialization */
- INIT_LOCAL (frame, local);
- loc_copy (&local->loc1, oldloc);
- loc_copy (&local->loc2, newloc);
-
- if ((local->loc1.path == NULL) ||
- (local->loc2.path == NULL)) {
- gf_log (this->name, GF_LOG_CRITICAL, "Not enough memory :O");
- STACK_UNWIND (frame, -1, ENOMEM, NULL,
- NULL, NULL, /* preoldparent, postoldparent */
- NULL, NULL); /* prenewparent, postnewparent */
- return 0;
- }
-
- inode_ctx_get (oldloc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- STACK_WIND (frame,
- unify_ns_rename_cbk,
- NS(this),
- NS(this)->fops->rename,
- oldloc,
- newloc);
- return 0;
-}
-
-/**
- * unify_link_cbk -
- */
-int32_t
-unify_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_local_t *local = frame->local;
-
- if (op_ret >= 0)
- local->stbuf = *buf;
- local->stbuf.ia_ino = local->ia_ino;
-
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, &local->stbuf,
- &local->oldpreparent, &local->oldpostparent);
-
- return 0;
-}
-
-/**
- * unify_ns_link_cbk -
- */
-int32_t
-unify_ns_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- unify_private_t *priv = this->private;
- unify_local_t *local = frame->local;
- int16_t *list = local->list;
- int16_t index = 0;
-
- if (op_ret == -1) {
- /* No need to send link request to other servers,
- * as namespace action failed
- */
- gf_log (this->name, GF_LOG_ERROR,
- "namespace: path(%s -> %s): %s",
- local->loc1.path, local->loc2.path,
- strerror (op_errno));
- unify_local_wipe (local);
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
- }
-
- /* Update inode for this entry */
- local->op_ret = 0;
- local->ia_ino = buf->ia_ino;
-
- local->oldpreparent = *preparent;
- local->oldpostparent = *postparent;
-
- /* Send link request to the node now */
- for (index = 0; list[index] != -1; index++) {
- char need_break = (list[index+1] == -1);
- if (priv->xl_array[list[index]] != NS (this)) {
- STACK_WIND (frame,
- unify_link_cbk,
- priv->xl_array[list[index]],
- priv->xl_array[list[index]]->fops->link,
- &local->loc1,
- &local->loc2);
- break;
- }
- if (need_break)
- break;
- }
-
- return 0;
-}
-
-/**
- * unify_link -
- */
-int32_t
-unify_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- unify_local_t *local = NULL;
- uint64_t tmp_list = 0;
-
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (oldloc);
- UNIFY_CHECK_INODE_CTX_AND_UNWIND_ON_ERR (newloc);
-
- /* Initialization */
- INIT_LOCAL (frame, local);
-
- loc_copy (&local->loc1, oldloc);
- loc_copy (&local->loc2, newloc);
-
- inode_ctx_get (oldloc->inode, this, &tmp_list);
- local->list = (int16_t *)(long)tmp_list;
-
- STACK_WIND (frame,
- unify_ns_link_cbk,
- NS(this),
- NS(this)->fops->link,
- oldloc,
- newloc);
-
- return 0;
-}
-
-
-/**
- * unify_checksum_cbk -
- */
-int32_t
-unify_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *fchecksum,
- uint8_t *dchecksum)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
-
- return 0;
-}
-
-/**
- * unify_checksum -
- */
-int32_t
-unify_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
-{
- STACK_WIND (frame,
- unify_checksum_cbk,
- NS(this),
- NS(this)->fops->checksum,
- loc,
- flag);
-
- return 0;
-}
-
-
-/**
- * unify_finodelk_cbk -
- */
-int
-unify_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_finodelk
- */
-int
-unify_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int cmd, struct gf_flock *flock)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_finodelk_cbk,
- child, child->fops->finodelk,
- volume, fd, cmd, flock);
-
- return 0;
-}
-
-
-
-/**
- * unify_fentrylk_cbk -
- */
-int
-unify_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_fentrylk
- */
-int
-unify_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fentrylk_cbk,
- child, child->fops->fentrylk,
- volume, fd, basename, cmd, type);
-
- return 0;
-}
-
-
-
-/**
- * unify_fxattrop_cbk -
- */
-int
-unify_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- STACK_UNWIND (frame, op_ret, op_errno, xattr);
- return 0;
-}
-
-/**
- * unify_fxattrop
- */
-int
-unify_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t optype, dict_t *xattr)
-{
- UNIFY_CHECK_FD_CTX_AND_UNWIND_ON_ERR (fd);
- xlator_t *child = NULL;
- uint64_t tmp_child = 0;
-
- fd_ctx_get (fd, this, &tmp_child);
- child = (xlator_t *)(long)tmp_child;
-
- STACK_WIND (frame, unify_fxattrop_cbk,
- child, child->fops->fxattrop,
- fd, optype, xattr);
-
- return 0;
-}
-
-
-/**
- * unify_inodelk_cbk -
- */
-int
-unify_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-
-/**
- * unify_inodelk
- */
-int
-unify_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int cmd, struct gf_flock *flock)
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_inodelk_cbk,
- child, child->fops->inodelk,
- volume, loc, cmd, flock);
-
- return 0;
-}
-
-
-
-/**
- * unify_entrylk_cbk -
- */
-int
-unify_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/**
- * unify_entrylk
- */
-int
-unify_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_entrylk_cbk,
- child, child->fops->entrylk,
- volume, loc, basename, cmd, type);
-
- return 0;
-}
-
-
-
-/**
- * unify_xattrop_cbk -
- */
-int
-unify_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr)
-{
- STACK_UNWIND (frame, op_ret, op_errno, xattr);
- return 0;
-}
-
-/**
- * unify_xattrop
- */
-int
-unify_xattrop (call_frame_t *frame, xlator_t *this,
- loc_t *loc, gf_xattrop_flags_t optype, dict_t *xattr)
-{
- xlator_t *child = NULL;
-
- child = unify_loc_subvol (loc, this);
-
- STACK_WIND (frame, unify_xattrop_cbk,
- child, child->fops->xattrop,
- loc, optype, xattr);
-
- return 0;
-}
-
-int
-unify_forget (xlator_t *this,
- inode_t *inode)
-{
- int16_t *list = NULL;
- uint64_t tmp_list = 0;
-
- if (inode->ia_type && (!IA_ISDIR(inode->ia_type))) {
- inode_ctx_get (inode, this, &tmp_list);
- if (tmp_list) {
- list = (int16_t *)(long)tmp_list;
- GF_FREE (list);
- }
- }
-
- return 0;
-}
-
-/**
- * notify
- */
-int32_t
-notify (xlator_t *this,
- int32_t event,
- void *data,
- ...)
-{
- unify_private_t *priv = this->private;
- struct sched_ops *sched = NULL;
-
- if (!priv) {
- return 0;
- }
-
- sched = priv->sched_ops;
- if (!sched) {
- gf_log (this->name, GF_LOG_CRITICAL, "No scheduler :O");
- raise (SIGTERM);
- return 0;
- }
- if (priv->namespace == data) {
- if (event == GF_EVENT_CHILD_UP) {
- sched->notify (this, event, data);
- }
- return 0;
- }
-
- switch (event)
- {
- case GF_EVENT_CHILD_UP:
- {
- /* Call scheduler's update () to enable it for scheduling */
- sched->notify (this, event, data);
-
- LOCK (&priv->lock);
- {
- /* Increment the inode's generation, which is
- used for self_heal */
- ++priv->inode_generation;
- ++priv->num_child_up;
- }
- UNLOCK (&priv->lock);
-
- if (!priv->is_up) {
- default_notify (this, event, data);
- priv->is_up = 1;
- }
- }
- break;
- case GF_EVENT_CHILD_DOWN:
- {
- /* Call scheduler's update () to disable the child node
- * for scheduling
- */
- sched->notify (this, event, data);
- LOCK (&priv->lock);
- {
- --priv->num_child_up;
- }
- UNLOCK (&priv->lock);
-
- if (priv->num_child_up == 0) {
- /* Send CHILD_DOWN to upper layer */
- default_notify (this, event, data);
- priv->is_up = 0;
- }
- }
- break;
-
- default:
- {
- default_notify (this, event, data);
- }
- break;
- }
-
- return 0;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_unify_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-/**
- * init - This function is called first in the xlator, while initializing.
- * All the config file options are checked and appropriate flags are set.
- *
- * @this -
- */
-int32_t
-init (xlator_t *this)
-{
- int32_t ret = 0;
- int32_t count = 0;
- data_t *scheduler = NULL;
- data_t *data = NULL;
- xlator_t *ns_xl = NULL;
- xlator_list_t *trav = NULL;
- xlator_list_t *xlparent = NULL;
- xlator_list_t *parent = NULL;
- unify_private_t *_private = NULL;
-
-
- /* Check for number of child nodes, if there is no child nodes, exit */
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "No child nodes specified. check \"subvolumes \" "
- "option in volfile");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- /* Check for 'scheduler' in volume */
- scheduler = dict_get (this->options, "scheduler");
- if (!scheduler) {
- gf_log (this->name, GF_LOG_ERROR,
- "\"option scheduler <x>\" is missing in volfile");
- return -1;
- }
-
- /* Setting "option namespace <node>" */
- data = dict_get (this->options, "namespace");
- if(!data) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace option not specified, Exiting");
- return -1;
- }
- /* Search namespace in the child node, if found, exit */
- trav = this->children;
- while (trav) {
- if (strcmp (trav->xlator->name, data->data) == 0)
- break;
- trav = trav->next;
- }
- if (trav) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace node used as a subvolume, Exiting");
- return -1;
- }
-
- /* Search for the namespace node, if found, continue */
- ns_xl = this->next;
- while (ns_xl) {
- if (strcmp (ns_xl->name, data->data) == 0)
- break;
- ns_xl = ns_xl->next;
- }
- if (!ns_xl) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "namespace node not found in volfile, Exiting");
- return -1;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "namespace node specified as %s", data->data);
-
- _private = GF_CALLOC (1, sizeof (*_private),
- gf_unify_mt_unify_private_t);
- ERR_ABORT (_private);
- _private->sched_ops = get_scheduler (this, scheduler->data);
- if (!_private->sched_ops) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Error while loading scheduler. Exiting");
- GF_FREE (_private);
- return -1;
- }
-
- if (ns_xl->parents) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Namespace node should not be a child of any other node. Exiting");
- GF_FREE (_private);
- return -1;
- }
-
- _private->namespace = ns_xl;
-
- /* update _private structure */
- {
- count = 0;
- trav = this->children;
- /* Get the number of child count */
- while (trav) {
- count++;
- trav = trav->next;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Child node count is %d", count);
-
- _private->child_count = count;
- if (count == 1) {
- /* TODO: Should I error out here? */
- gf_log (this->name, GF_LOG_CRITICAL,
- "WARNING: You have defined only one "
- "\"subvolumes\" for unify volume. It may not "
- "be the desired config, review your volume "
- "volfile. If this is how you are testing it,"
- " you may hit some performance penalty");
- }
-
- _private->xl_array = GF_CALLOC (1,
- sizeof (xlator_t) * (count + 1),
- gf_unify_mt_xlator_t);
- ERR_ABORT (_private->xl_array);
-
- count = 0;
- trav = this->children;
- while (trav) {
- _private->xl_array[count++] = trav->xlator;
- trav = trav->next;
- }
- _private->xl_array[count] = _private->namespace;
-
- /* self-heal part, start with generation '1' */
- _private->inode_generation = 1;
- /* Because, Foreground part is tested well */
- _private->self_heal = ZR_UNIFY_FG_SELF_HEAL;
- data = dict_get (this->options, "self-heal");
- if (data) {
- if (strcasecmp (data->data, "off") == 0)
- _private->self_heal = ZR_UNIFY_SELF_HEAL_OFF;
-
- if (strcasecmp (data->data, "foreground") == 0)
- _private->self_heal = ZR_UNIFY_FG_SELF_HEAL;
-
- if (strcasecmp (data->data, "background") == 0)
- _private->self_heal = ZR_UNIFY_BG_SELF_HEAL;
- }
-
- /* optimist - ask bulde for more about it */
- data = dict_get (this->options, "optimist");
- if (data) {
- if (gf_string2boolean (data->data,
- &_private->optimist) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "optimist excepts only boolean "
- "options");
- }
- }
-
- LOCK_INIT (&_private->lock);
- }
-
- /* Now that everything is fine. */
- this->private = (void *)_private;
- {
- ret = _private->sched_ops->mem_acct_init (this);
-
- if (ret == -1) {
- return -1;
- }
-
- /* Initialize scheduler, if everything else is successful */
- ret = _private->sched_ops->init (this);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Initializing scheduler failed, Exiting");
- GF_FREE (_private);
- return -1;
- }
-
-
- ret = 0;
-
- /* This section is required because some fops may look
- * for 'xl->parent' variable
- */
- xlparent = GF_CALLOC (1, sizeof (*xlparent),
- gf_unify_mt_xlator_list_t);
- xlparent->xlator = this;
- if (!ns_xl->parents) {
- ns_xl->parents = xlparent;
- } else {
- parent = ns_xl->parents;
- while (parent->next)
- parent = parent->next;
- parent->next = xlparent;
- }
- }
-
- /* Tell namespace node that init is done */
- xlator_notify (ns_xl, GF_EVENT_PARENT_UP, this);
-
- return 0;
-}
-
-/**
- * fini - Free all the allocated memory
- */
-void
-fini (xlator_t *this)
-{
- unify_private_t *priv = this->private;
- priv->sched_ops->fini (this);
- this->private = NULL;
- LOCK_DESTROY (&priv->lock);
- GF_FREE (priv->xl_array);
- GF_FREE (priv);
- return;
-}
-
-
-struct xlator_fops fops = {
- .stat = unify_stat,
- .readlink = unify_readlink,
- .mknod = unify_mknod,
- .mkdir = unify_mkdir,
- .unlink = unify_unlink,
- .rmdir = unify_rmdir,
- .symlink = unify_symlink,
- .rename = unify_rename,
- .link = unify_link,
- .truncate = unify_truncate,
- .create = unify_create,
- .open = unify_open,
- .readv = unify_readv,
- .writev = unify_writev,
- .statfs = unify_statfs,
- .flush = unify_flush,
- .fsync = unify_fsync,
- .setxattr = unify_setxattr,
- .getxattr = unify_getxattr,
- .removexattr = unify_removexattr,
- .opendir = unify_opendir,
- .readdir = unify_readdir,
- .readdirp = unify_readdirp,
- .fsyncdir = unify_fsyncdir,
- .access = unify_access,
- .ftruncate = unify_ftruncate,
- .fstat = unify_fstat,
- .lk = unify_lk,
- .lookup = unify_lookup,
- .getdents = unify_getdents,
- .checksum = unify_checksum,
- .inodelk = unify_inodelk,
- .finodelk = unify_finodelk,
- .entrylk = unify_entrylk,
- .fentrylk = unify_fentrylk,
- .xattrop = unify_xattrop,
- .fxattrop = unify_fxattrop,
- .setattr = unify_setattr,
- .fsetattr = unify_fsetattr,
-};
-
-
-struct xlator_cbks cbks = {
- .forget = unify_forget,
-};
-
-struct volume_options options[] = {
- { .key = { "namespace" },
- .type = GF_OPTION_TYPE_XLATOR
- },
- { .key = { "scheduler" },
- .value = { "alu", "rr", "random", "nufa", "switch" },
- .type = GF_OPTION_TYPE_STR
- },
- { .key = {"self-heal"},
- .value = { "foreground", "background", "off" },
- .type = GF_OPTION_TYPE_STR
- },
- /* TODO: remove it some time later */
- { .key = {"optimist"},
- .type = GF_OPTION_TYPE_BOOL
- },
-
- { .key = {NULL} },
-};
diff --git a/xlators/cluster/unify/src/unify.h b/xlators/cluster/unify/src/unify.h
deleted file mode 100644
index dbd5e44a2f1..00000000000
--- a/xlators/cluster/unify/src/unify.h
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef _UNIFY_H
-#define _UNIFY_H
-
-#include "scheduler.h"
-#include "list.h"
-#include "unify-mem-types.h"
-
-#define MAX_DIR_ENTRY_STRING (32 * 1024)
-
-#define ZR_UNIFY_SELF_HEAL_OFF 0
-#define ZR_UNIFY_FG_SELF_HEAL 1
-#define ZR_UNIFY_BG_SELF_HEAL 2
-
-/* Sometimes one should use completely random numbers.. its good :p */
-#define UNIFY_SELF_HEAL_GETDENTS_COUNT 512
-
-#define NS(xl) (((unify_private_t *)xl->private)->namespace)
-
-/* This is used to allocate memory for local structure */
-#define INIT_LOCAL(fr, loc) \
-do { \
- loc = GF_CALLOC (1, sizeof (unify_local_t), gf_unify_mt_unify_local_t); \
- ERR_ABORT (loc); \
- if (!loc) { \
- STACK_UNWIND (fr, -1, ENOMEM); \
- return 0; \
- } \
- fr->local = loc; \
- loc->op_ret = -1; \
- loc->op_errno = ENOENT; \
-} while (0)
-
-
-
-struct unify_private {
- /* Update this structure depending on requirement */
- void *scheduler; /* THIS SHOULD BE THE FIRST VARIABLE,
- if xlator is using scheduler */
- struct sched_ops *sched_ops; /* Scheduler options */
- xlator_t *namespace; /* ptr to namespace xlator */
- xlator_t **xl_array;
- gf_boolean_t optimist;
- int16_t child_count;
- int16_t num_child_up;
- uint8_t self_heal;
- uint8_t is_up;
- uint64_t inode_generation;
- gf_lock_t lock;
-};
-typedef struct unify_private unify_private_t;
-
-struct unify_self_heal_struct {
- uint8_t dir_checksum[NAME_MAX];
- uint8_t ns_dir_checksum[NAME_MAX];
- uint8_t file_checksum[NAME_MAX];
- uint8_t ns_file_checksum[NAME_MAX];
- off_t *offset_list;
- int *count_list;
- dir_entry_t **entry_list;
-};
-
-
-struct _unify_local_t {
- int32_t call_count;
- int32_t op_ret;
- int32_t op_errno;
- mode_t mode;
- off_t offset;
- dev_t dev;
- uid_t uid;
- gid_t gid;
- int32_t flags;
- int32_t entry_count;
- int32_t count; // dir_entry_t count;
- fd_t *fd;
- struct iatt stbuf;
- struct iatt stpre;
- struct iatt stpost;
- struct statvfs statvfs_buf;
- struct timespec tv[2];
- char *name;
- int32_t revalidate;
-
- ino_t ia_ino;
- nlink_t ia_nlink;
-
- dict_t *dict;
-
- int16_t *list;
- int16_t *new_list; /* Used only in case of rename */
- int16_t index;
-
- int32_t failed;
- int32_t return_eio; /* Used in case of different st-mode
- present for a given path */
-
- uint64_t inode_generation; /* used to store the per directory
- * inode_generation. Got from inode's ctx
- * of directory inodes
- */
-
- struct unify_self_heal_struct *sh_struct;
- loc_t loc1, loc2;
-
- struct iatt poststbuf;
- /* When not used for rename, old*
- * are used as the attrs for the current
- * parent directory.
- */
- struct iatt oldpreparent;
- struct iatt oldpostparent;
- struct iatt newpreparent;
- struct iatt newpostparent;
- int32_t wbflags;
-};
-typedef struct _unify_local_t unify_local_t;
-
-int32_t zr_unify_self_heal (call_frame_t *frame,
- xlator_t *this,
- unify_local_t *local);
-
-#endif /* _UNIFY_H */
diff --git a/xlators/debug/Makefile.am b/xlators/debug/Makefile.am
index b655554efec..88fac1c6d9e 100644
--- a/xlators/debug/Makefile.am
+++ b/xlators/debug/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = trace error-gen io-stats
+SUBDIRS = error-gen io-stats sink trace delay-gen
CLEANFILES =
diff --git a/scheduler/random/Makefile.am b/xlators/debug/delay-gen/Makefile.am
index d471a3f9243..a985f42a877 100644
--- a/scheduler/random/Makefile.am
+++ b/xlators/debug/delay-gen/Makefile.am
@@ -1,3 +1,3 @@
SUBDIRS = src
-CLEANFILES =
+CLEANFILES =
diff --git a/xlators/debug/delay-gen/src/Makefile.am b/xlators/debug/delay-gen/src/Makefile.am
new file mode 100644
index 00000000000..8f758dec199
--- /dev/null
+++ b/xlators/debug/delay-gen/src/Makefile.am
@@ -0,0 +1,11 @@
+
+xlator_LTLIBRARIES = delay-gen.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
+delay_gen_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+delay_gen_la_SOURCES = delay-gen.c
+delay_gen_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+noinst_HEADERS = delay-gen.h delay-gen-mem-types.h delay-gen-messages.h
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
+CLEANFILES =
diff --git a/xlators/debug/delay-gen/src/delay-gen-mem-types.h b/xlators/debug/delay-gen/src/delay-gen-mem-types.h
new file mode 100644
index 00000000000..c89a9217193
--- /dev/null
+++ b/xlators/debug/delay-gen/src/delay-gen-mem-types.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __DELAY_GEN_MEM_TYPES_H__
+#define __DELAY_GEN_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_delay_gen_mem_types_ {
+ gf_delay_gen_mt_dg_t = gf_common_mt_end + 1,
+ gf_delay_gen_mt_end
+};
+
+#endif /* __DELAY_GEN_MEM_TYPES_H__ */
diff --git a/xlators/debug/delay-gen/src/delay-gen-messages.h b/xlators/debug/delay-gen/src/delay-gen-messages.h
new file mode 100644
index 00000000000..bc98cec2885
--- /dev/null
+++ b/xlators/debug/delay-gen/src/delay-gen-messages.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __DELAY_GEN_MESSAGES_H__
+#define __DELAY_GEN_MESSAGES_H__
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+#endif /* __DELAY_GEN_MESSAGES_H__ */
diff --git a/xlators/debug/delay-gen/src/delay-gen.c b/xlators/debug/delay-gen/src/delay-gen.c
new file mode 100644
index 00000000000..4698f1fd785
--- /dev/null
+++ b/xlators/debug/delay-gen/src/delay-gen.c
@@ -0,0 +1,697 @@
+/*
+ * Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#include "delay-gen.h"
+
+#define DELAY_GRANULARITY (1 << 20)
+
+#define DG_FOP(fop, name, frame, this, args...) \
+ do { \
+ delay_gen(this, fop); \
+ default_##name(frame, this, args); \
+ } while (0)
+
+int
+delay_gen(xlator_t *this, int fop)
+{
+ dg_t *dg = this->private;
+
+ if (!dg->enable[fop] || !dg->delay_ppm)
+ return 0;
+
+ if ((rand() % DELAY_GRANULARITY) < dg->delay_ppm)
+ gf_nanosleep(dg->delay_duration * GF_US_IN_NS);
+
+ return 0;
+}
+
+int32_t
+dg_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ DG_FOP(GF_FOP_RENAME, rename, frame, this, oldloc, newloc, xdata);
+ return 0;
+}
+
+int32_t
+dg_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_IPC, ipc, frame, this, op, xdata);
+ return 0;
+}
+
+int32_t
+dg_setactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_SETACTIVELK, setactivelk, frame, this, loc, locklist, xdata);
+ return 0;
+}
+
+int32_t
+dg_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_FLUSH, flush, frame, this, fd, xdata);
+ return 0;
+}
+
+int32_t
+dg_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_READDIR, readdir, frame, this, fd, size, off, xdata);
+ return 0;
+}
+
+int32_t
+dg_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_SETXATTR, setxattr, frame, this, loc, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+dg_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_MKNOD, mknod, frame, this, loc, mode, rdev, umask, xdata);
+ return 0;
+}
+
+int32_t
+dg_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_FSETXATTR, fsetxattr, frame, this, fd, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+dg_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_READ, readv, frame, this, fd, size, offset, flags, xdata);
+ return 0;
+}
+
+int32_t
+dg_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_INODELK, inodelk, frame, this, volume, loc, cmd, lock, xdata);
+ return 0;
+}
+
+int32_t
+dg_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ DG_FOP(GF_FOP_FREMOVEXATTR, fremovexattr, frame, this, fd, name, xdata);
+ return 0;
+}
+
+int32_t
+dg_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_OPEN, open, frame, this, loc, flags, fd, xdata);
+ return 0;
+}
+
+int32_t
+dg_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_XATTROP, xattrop, frame, this, loc, flags, dict, xdata);
+ return 0;
+}
+
+int32_t
+dg_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ DG_FOP(GF_FOP_ENTRYLK, entrylk, frame, this, volume, loc, basename, cmd,
+ type, xdata);
+ return 0;
+}
+
+int32_t
+dg_getactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_GETACTIVELK, getactivelk, frame, this, loc, xdata);
+ return 0;
+}
+
+int32_t
+dg_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_FINODELK, finodelk, frame, this, volume, fd, cmd, lock,
+ xdata);
+ return 0;
+}
+
+int32_t
+dg_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_CREATE, create, frame, this, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+}
+
+int32_t
+dg_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_DISCARD, discard, frame, this, fd, offset, len, xdata);
+ return 0;
+}
+
+int32_t
+dg_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_MKDIR, mkdir, frame, this, loc, mode, umask, xdata);
+ return 0;
+}
+
+int32_t
+dg_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_LK, lk, frame, this, fd, cmd, lock, xdata);
+ return 0;
+}
+
+int32_t
+dg_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t off, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ DG_FOP(GF_FOP_WRITE, writev, frame, this, fd, vector, count, off, flags,
+ iobref, xdata);
+ return 0;
+}
+
+int32_t
+dg_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ DG_FOP(GF_FOP_ACCESS, access, frame, this, loc, mask, xdata);
+ return 0;
+}
+
+int32_t
+dg_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_LOOKUP, lookup, frame, this, loc, xdata);
+ return 0;
+}
+
+int32_t
+dg_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ dict_t *xdata)
+{
+ DG_FOP(GF_FOP_RMDIR, rmdir, frame, this, loc, flags, xdata);
+ return 0;
+}
+
+int32_t
+dg_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_FALLOCATE, fallocate, frame, this, fd, keep_size, offset, len,
+ xdata);
+ return 0;
+}
+
+int32_t
+dg_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_FSTAT, fstat, frame, this, fd, xdata);
+ return 0;
+}
+
+int32_t
+dg_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_LEASE, lease, frame, this, loc, lease, xdata);
+ return 0;
+}
+
+int32_t
+dg_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_STAT, stat, frame, this, loc, xdata);
+ return 0;
+}
+
+int32_t
+dg_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ DG_FOP(GF_FOP_TRUNCATE, truncate, frame, this, loc, offset, xdata);
+ return 0;
+}
+
+int32_t
+dg_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata)
+{
+ DG_FOP(GF_FOP_GETXATTR, getxattr, frame, this, loc, name, xdata);
+ return 0;
+}
+
+int32_t
+dg_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_SYMLINK, symlink, frame, this, linkpath, loc, umask, xdata);
+ return 0;
+}
+
+int32_t
+dg_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_ZEROFILL, zerofill, frame, this, fd, offset, len, xdata);
+ return 0;
+}
+
+int32_t
+dg_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ DG_FOP(GF_FOP_FSYNCDIR, fsyncdir, frame, this, fd, flags, xdata);
+ return 0;
+}
+
+int32_t
+dg_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ DG_FOP(GF_FOP_FGETXATTR, fgetxattr, frame, this, fd, name, xdata);
+ return 0;
+}
+
+int32_t
+dg_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_READDIRP, readdirp, frame, this, fd, size, off, xdata);
+ return 0;
+}
+
+int32_t
+dg_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ DG_FOP(GF_FOP_LINK, link, frame, this, oldloc, newloc, xdata);
+ return 0;
+}
+
+int32_t
+dg_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_FXATTROP, fxattrop, frame, this, fd, flags, dict, xdata);
+ return 0;
+}
+
+int32_t
+dg_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ DG_FOP(GF_FOP_FTRUNCATE, ftruncate, frame, this, fd, offset, xdata);
+ return 0;
+}
+
+int32_t
+dg_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_RCHECKSUM, rchecksum, frame, this, fd, offset, len, xdata);
+ return 0;
+}
+
+int32_t
+dg_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ dict_t *xdata)
+{
+ DG_FOP(GF_FOP_UNLINK, unlink, frame, this, loc, flags, xdata);
+ return 0;
+}
+
+int32_t
+dg_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ DG_FOP(GF_FOP_FENTRYLK, fentrylk, frame, this, volume, fd, basename, cmd,
+ type, xdata);
+ return 0;
+}
+
+int32_t
+dg_getspec(call_frame_t *frame, xlator_t *this, const char *key, int32_t flags)
+{
+ DG_FOP(GF_FOP_GETSPEC, getspec, frame, this, key, flags);
+ return 0;
+}
+
+int32_t
+dg_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_SETATTR, setattr, frame, this, loc, stbuf, valid, xdata);
+ return 0;
+}
+
+int32_t
+dg_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ DG_FOP(GF_FOP_FSYNC, fsync, frame, this, fd, flags, xdata);
+ return 0;
+}
+
+int32_t
+dg_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_STATFS, statfs, frame, this, loc, xdata);
+ return 0;
+}
+
+int32_t
+dg_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_SEEK, seek, frame, this, fd, offset, what, xdata);
+ return 0;
+}
+
+int32_t
+dg_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_FSETATTR, fsetattr, frame, this, fd, stbuf, valid, xdata);
+ return 0;
+}
+
+int32_t
+dg_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ DG_FOP(GF_FOP_OPENDIR, opendir, frame, this, loc, fd, xdata);
+ return 0;
+}
+
+int32_t
+dg_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ DG_FOP(GF_FOP_READLINK, readlink, frame, this, loc, size, xdata);
+ return 0;
+}
+
+int32_t
+dg_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ DG_FOP(GF_FOP_REMOVEXATTR, removexattr, frame, this, loc, name, xdata);
+ return 0;
+}
+
+int32_t
+dg_forget(xlator_t *this, inode_t *inode)
+{
+ return 0;
+}
+
+int32_t
+dg_release(xlator_t *this, fd_t *fd)
+{
+ return 0;
+}
+
+int32_t
+dg_releasedir(xlator_t *this, fd_t *fd)
+{
+ return 0;
+}
+
+static int
+delay_gen_parse_fill_fops(dg_t *dg, char *enable_fops)
+{
+ char *op_no_str = NULL;
+ int op_no = -1;
+ int i = 0;
+ int ret = 0;
+ xlator_t *this = THIS;
+ char *saveptr = NULL;
+ char *dup_enable_fops = NULL;
+
+ if (strlen(enable_fops) == 0) {
+ for (i = GF_FOP_NULL + 1; i < GF_FOP_MAXVALUE; i++)
+ dg->enable[i] = 1;
+ } else {
+ dup_enable_fops = gf_strdup(enable_fops);
+ if (!dup_enable_fops) {
+ ret = -1;
+ goto out;
+ }
+ op_no_str = strtok_r(dup_enable_fops, ",", &saveptr);
+ while (op_no_str) {
+ op_no = gf_fop_int(op_no_str);
+ if (op_no == -1) {
+ gf_log(this->name, GF_LOG_WARNING, "Wrong option value %s",
+ op_no_str);
+ ret = -1;
+ goto out;
+ } else {
+ dg->enable[op_no] = 1;
+ }
+
+ op_no_str = strtok_r(NULL, ",", &saveptr);
+ }
+ }
+out:
+ GF_FREE(dup_enable_fops);
+ return ret;
+}
+
+void
+delay_gen_set_delay_ppm(dg_t *dg, double percent)
+{
+ double ppm;
+
+ ppm = (percent / 100.0) * (double)DELAY_GRANULARITY;
+ dg->delay_ppm = ppm;
+}
+
+int32_t
+init(xlator_t *this)
+{
+ dg_t *dg = NULL;
+ int32_t ret = 0;
+ double delay_percent = 0;
+ char *delay_enable_fops = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "delay-gen not configured with one subvolume");
+ ret = -1;
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
+ }
+
+ dg = GF_CALLOC(1, sizeof(*dg), gf_delay_gen_mt_dg_t);
+
+ if (!dg) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = -1;
+
+ GF_OPTION_INIT("delay-percentage", delay_percent, percent, out);
+ GF_OPTION_INIT("enable", delay_enable_fops, str, out);
+ GF_OPTION_INIT("delay-duration", dg->delay_duration, int32, out);
+
+ delay_gen_set_delay_ppm(dg, delay_percent);
+
+ ret = delay_gen_parse_fill_fops(dg, delay_enable_fops);
+ if (ret)
+ goto out;
+
+ this->private = dg;
+
+ ret = 0;
+out:
+ if (ret)
+ GF_FREE(dg);
+ return ret;
+}
+
+void
+fini(xlator_t *this)
+{
+ GF_FREE(this->private);
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_delay_gen_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Memory accounting init"
+ " failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+reconfigure(xlator_t *this, dict_t *dict)
+{
+ /*At the moment I don't see any need to implement this. In future
+ *if this is needed we can add code here.
+ */
+ return 0;
+}
+
+int
+notify(xlator_t *this, int event, void *data, ...)
+{
+ return default_notify(this, event, data);
+}
+
+struct xlator_fops fops = {
+ .rename = dg_rename,
+ .ipc = dg_ipc,
+ .setactivelk = dg_setactivelk,
+ .flush = dg_flush,
+ .readdir = dg_readdir,
+ .setxattr = dg_setxattr,
+ .mknod = dg_mknod,
+ .fsetxattr = dg_fsetxattr,
+ .readv = dg_readv,
+ .inodelk = dg_inodelk,
+ .fremovexattr = dg_fremovexattr,
+ .open = dg_open,
+ .xattrop = dg_xattrop,
+ .entrylk = dg_entrylk,
+ .getactivelk = dg_getactivelk,
+ .finodelk = dg_finodelk,
+ .create = dg_create,
+ .discard = dg_discard,
+ .mkdir = dg_mkdir,
+ .lk = dg_lk,
+ .writev = dg_writev,
+ .access = dg_access,
+ .lookup = dg_lookup,
+ .rmdir = dg_rmdir,
+ .fallocate = dg_fallocate,
+ .fstat = dg_fstat,
+ .lease = dg_lease,
+ .stat = dg_stat,
+ .truncate = dg_truncate,
+ .getxattr = dg_getxattr,
+ .symlink = dg_symlink,
+ .zerofill = dg_zerofill,
+ .fsyncdir = dg_fsyncdir,
+ .fgetxattr = dg_fgetxattr,
+ .readdirp = dg_readdirp,
+ .link = dg_link,
+ .fxattrop = dg_fxattrop,
+ .ftruncate = dg_ftruncate,
+ .rchecksum = dg_rchecksum,
+ .unlink = dg_unlink,
+ .fentrylk = dg_fentrylk,
+ .getspec = dg_getspec,
+ .setattr = dg_setattr,
+ .fsync = dg_fsync,
+ .statfs = dg_statfs,
+ .seek = dg_seek,
+ .fsetattr = dg_fsetattr,
+ .opendir = dg_opendir,
+ .readlink = dg_readlink,
+ .removexattr = dg_removexattr,
+};
+
+struct xlator_cbks cbks = {
+ .forget = dg_forget,
+ .release = dg_release,
+ .releasedir = dg_releasedir,
+};
+
+struct volume_options options[] = {
+ {
+ .key = {"delay-percentage"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .default_value = "10%",
+ .description = "Percentage delay of operations when enabled.",
+ .op_version = {GD_OP_VERSION_3_13_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"delay-gen"},
+ },
+
+ {
+ .key = {"delay-duration"},
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Delay duration in micro seconds",
+ .default_value = "100000",
+ .op_version = {GD_OP_VERSION_3_13_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"delay-gen"},
+ },
+
+ {
+ .key = {"enable"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Accepts a string which takes ',' separated fop "
+ "strings to denote which fops are enabled for delay",
+ .op_version = {GD_OP_VERSION_3_13_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"delay-gen"},
+ .default_value = "",
+ },
+
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {GD_OP_VERSION_3_12_0},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "delay-gen",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/debug/delay-gen/src/delay-gen.h b/xlators/debug/delay-gen/src/delay-gen.h
new file mode 100644
index 00000000000..afa95e5eb2d
--- /dev/null
+++ b/xlators/debug/delay-gen/src/delay-gen.h
@@ -0,0 +1,27 @@
+/*
+ * Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __DELAY_GEN_H__
+#define __DELAY_GEN_H__
+
+#include "delay-gen-mem-types.h"
+#include "delay-gen-messages.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+typedef struct {
+ int enable[GF_FOP_MAXVALUE];
+ int op_count;
+ int delay_ppm;
+ int delay_duration;
+} dg_t;
+
+#endif /* __DELAY_GEN_H__ */
diff --git a/xlators/debug/error-gen/src/Makefile.am b/xlators/debug/error-gen/src/Makefile.am
index df908035888..038d2e8e66d 100644
--- a/xlators/debug/error-gen/src/Makefile.am
+++ b/xlators/debug/error-gen/src/Makefile.am
@@ -2,15 +2,17 @@
xlator_LTLIBRARIES = error-gen.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-error_gen_la_LDFLAGS = -module -avoidversion
+error_gen_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
error_gen_la_SOURCES = error-gen.c
error_gen_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = error-gen.h error-gen-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/error-gen/src/error-gen-mem-types.h b/xlators/debug/error-gen/src/error-gen-mem-types.h
index b643dc5f71b..b9b713af8fc 100644
--- a/xlators/debug/error-gen/src/error-gen-mem-types.h
+++ b/xlators/debug/error-gen/src/error-gen-mem-types.h
@@ -1,30 +1,20 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __ERROR_GEN_MEM_TYPES_H__
#define __ERROR_GEN_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_error_gen_mem_types_ {
- gf_error_gen_mt_eg_t = gf_common_mt_end + 1,
- gf_error_gen_mt_end
+ gf_error_gen_mt_eg_t = gf_common_mt_end + 1,
+ gf_error_gen_mt_end
};
#endif
diff --git a/xlators/debug/error-gen/src/error-gen.c b/xlators/debug/error-gen/src/error-gen.c
index 6ecca2ee508..d45655ef4c3 100644
--- a/xlators/debug/error-gen/src/error-gen.c
+++ b/xlators/debug/error-gen/src/error-gen.c
@@ -1,2150 +1,1663 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#include "xlator.h"
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/xlator.h>
#include "error-gen.h"
+#include <glusterfs/statedump.h>
+#include <glusterfs/defaults.h>
-sys_error_t error_no_list[] = {
- [GF_FOP_LOOKUP] = { .error_no_count = 4,
- .error_no = {ENOENT,ENOTDIR,
- ENAMETOOLONG,EAGAIN}},
- [GF_FOP_STAT] = { .error_no_count = 7,
- .error_no = {EACCES,EBADF,EFAULT,
- ENAMETOOLONG,ENOENT,
- ENOMEM,ENOTDIR}},
- [GF_FOP_READLINK] = { .error_no_count = 8,
- .error_no = {EACCES,EFAULT,EINVAL,EIO,
- ENAMETOOLONG,ENOENT,ENOMEM,
- ENOTDIR}},
- [GF_FOP_MKNOD] = { .error_no_count = 11,
- .error_no = {EACCES,EEXIST,EFAULT,
- EINVAL,ENAMETOOLONG,
- ENOENT,ENOMEM,ENOSPC,
- ENOTDIR,EPERM,EROFS}},
- [GF_FOP_MKDIR] = { .error_no_count = 10,
- .error_no = {EACCES,EEXIST,EFAULT,
- ENAMETOOLONG,ENOENT,
- ENOMEM,ENOSPC,ENOTDIR,
- EPERM,EROFS}},
- [GF_FOP_UNLINK] = { .error_no_count = 10,
- .error_no = {EACCES,EBUSY,EFAULT,EIO,
- EISDIR,ENAMETOOLONG,
- ENOENT,ENOMEM,ENOTDIR,
- EPERM,EROFS}},
- [GF_FOP_RMDIR] = { .error_no_count = 8,
- .error_no = {EACCES,EBUSY,EFAULT,
- ENOMEM,ENOTDIR,ENOTEMPTY,
- EPERM,EROFS}},
- [GF_FOP_SYMLINK] = { .error_no_count = 11,
- .error_no = {EACCES,EEXIST,EFAULT,EIO,
- ENAMETOOLONG,ENOENT,ENOMEM,
- ENOSPC,ENOTDIR,EPERM,
- EROFS}},
- [GF_FOP_RENAME] = { .error_no_count = 13,
- .error_no = {EACCES,EBUSY,EFAULT,
- EINVAL,EISDIR,EMLINK,
- ENAMETOOLONG,ENOENT,ENOMEM,
- ENOSPC,ENOTDIR,EEXIST,
- EXDEV}},
- [GF_FOP_LINK] = { .error_no_count = 13,
- .error_no = {EACCES,EFAULT,EEXIST,EIO,
- EMLINK,ENAMETOOLONG,
- ENOENT,ENOMEM,ENOSPC,
- ENOTDIR,EPERM,EROFS,
- EXDEV}},
- [GF_FOP_TRUNCATE] = { .error_no_count = 10,
- .error_no = {EACCES,EFAULT,EFBIG,
- EINTR,EINVAL,EIO,EISDIR,
- ENAMETOOLONG,ENOENT,
- EISDIR}},
- [GF_FOP_CREATE] = {.error_no_count = 10,
- .error_no = {EACCES,EEXIST,EFAULT,
- EISDIR,EMFILE,ENAMETOOLONG,
- ENFILE,ENODEV,ENOENT,
- ENODEV}},
- [GF_FOP_OPEN] = { .error_no_count = 10,
- .error_no = {EACCES,EEXIST,EFAULT,
- EISDIR,EMFILE,
- ENAMETOOLONG,ENFILE,
- ENODEV,ENOENT,ENOMEM}},
- [GF_FOP_READ] = { .error_no_count = 5,
- .error_no = {EINVAL,EBADF,EFAULT,EISDIR,
- ENAMETOOLONG}},
- [GF_FOP_WRITE] = { .error_no_count = 5,
- .error_no = {EINVAL,EBADF,EFAULT,EISDIR,
- ENAMETOOLONG}},
- [GF_FOP_STATFS] = {.error_no_count = 10,
- .error_no = {EACCES,EBADF,EFAULT,EINTR,
- EIO,ENAMETOOLONG,ENOENT,
- ENOMEM,ENOSYS,ENOTDIR}},
- [GF_FOP_FLUSH] = { .error_no_count = 5,
- .error_no = {EACCES,EFAULT,
- ENAMETOOLONG,ENOSYS,
- ENOENT}},
- [GF_FOP_FSYNC] = { .error_no_count = 4,
- .error_no = {EBADF,EIO,EROFS,EINVAL}},
- [GF_FOP_SETXATTR] = { .error_no_count = 4,
- .error_no = {EACCES,EBADF,EINTR,
- ENAMETOOLONG}},
- [GF_FOP_GETXATTR] = { .error_no_count = 4,
- .error_no = {EACCES,EBADF,ENAMETOOLONG,
- EINTR}},
- [GF_FOP_REMOVEXATTR] = { .error_no_count = 4,
- .error_no = {EACCES,EBADF,ENAMETOOLONG,
- EINTR}},
- [GF_FOP_FSETXATTR] = { .error_no_count = 4,
- .error_no = {EACCES,EBADF,EINTR,
- ENAMETOOLONG}},
- [GF_FOP_FGETXATTR] = { .error_no_count = 4,
- .error_no = {EACCES,EBADF,ENAMETOOLONG,
- EINTR}},
- [GF_FOP_FREMOVEXATTR] = { .error_no_count = 4,
- .error_no = {EACCES,EBADF,ENAMETOOLONG,
- EINTR}},
- [GF_FOP_OPENDIR] = { .error_no_count = 8,
- .error_no = {EACCES,EEXIST,EFAULT,
- EISDIR,EMFILE,
- ENAMETOOLONG,ENFILE,
- ENODEV}},
- [GF_FOP_READDIR] = { .error_no_count = 5,
- .error_no = {EINVAL,EACCES,EBADF,
- EMFILE,ENOENT}},
- [GF_FOP_READDIRP] = { .error_no_count = 5,
- .error_no = {EINVAL,EACCES,EBADF,
- EMFILE,ENOENT}},
- [GF_FOP_FSYNCDIR] = { .error_no_count = 4,
- .error_no = {EBADF,EIO,EROFS,EINVAL}},
- [GF_FOP_ACCESS] = { .error_no_count = 8,
- .error_no = {EACCES,ENAMETOOLONG,
- ENOENT,ENOTDIR,EROFS,
- EFAULT,EINVAL,EIO}},
- [GF_FOP_FTRUNCATE] = { .error_no_count = 9,
- .error_no = {EACCES,EFAULT,EFBIG,
- EINTR,EINVAL,EIO,EISDIR,
- ENAMETOOLONG,ENOENT}},
- [GF_FOP_FSTAT] = { .error_no_count = 7,
- .error_no = {EACCES,EBADF,EFAULT,
- ENAMETOOLONG,ENOENT,
- ENOMEM,ENOTDIR}},
- [GF_FOP_LK] = { .error_no_count = 4,
- .error_no = {EACCES,EFAULT,ENOENT,
- EINTR}},
- [GF_FOP_XATTROP] = { .error_no_count = 5,
- .error_no = {EACCES,EFAULT,
- ENAMETOOLONG,ENOSYS,
- ENOENT}},
- [GF_FOP_FXATTROP] = { .error_no_count = 4,
- .error_no = {EBADF,EIO,EROFS,EINVAL}},
- [GF_FOP_INODELK] = { .error_no_count = 4,
- .error_no = {EACCES,EBADF,EINTR,
- ENAMETOOLONG}},
- [GF_FOP_FINODELK] = { .error_no_count = 4,
- .error_no = {EACCES,EBADF,EINTR,
- ENAMETOOLONG}},
- [GF_FOP_ENTRYLK] = { .error_no_count = 4,
- .error_no = {EACCES,EBADF,
- ENAMETOOLONG,EINTR}},
- [GF_FOP_FENTRYLK] = { .error_no_count = 10,
- .error_no = {EACCES,EEXIST,EFAULT,
- EISDIR,EMFILE,
- ENAMETOOLONG,ENFILE,
- ENODEV,ENOENT,ENOMEM}},
- [GF_FOP_SETATTR] = {.error_no_count = 11,
- .error_no = {EACCES,EFAULT,EIO,
- ENAMETOOLONG,ENOENT,
- ENOMEM,ENOTDIR,EPERM,
- EROFS,EBADF,EIO}},
- [GF_FOP_FSETATTR] = { .error_no_count = 11,
- .error_no = {EACCES,EFAULT,EIO,
- ENAMETOOLONG,ENOENT,
- ENOMEM,ENOTDIR,EPERM,
- EROFS,EBADF,EIO}},
- [GF_FOP_GETSPEC] = { .error_no_count = 4,
- .error_no = {EACCES,EBADF,ENAMETOOLONG,
- EINTR}}
-};
-
-int
-generate_rand_no (int op_no)
-{
- int rand_no = 0;
-
- if (op_no < GF_FOP_MAXVALUE)
- rand_no = rand () % error_no_list[op_no].error_no_count;
- return rand_no;
-}
-
-int
-conv_errno_to_int (char **error_no)
-{
- if (!strcmp ((*error_no), "ENOENT"))
- return ENOENT;
- else if (!strcmp ((*error_no), "ENOTDIR"))
- return ENOTDIR;
- else if (!strcmp ((*error_no), "ENAMETOOLONG"))
- return ENAMETOOLONG;
- else if (!strcmp ((*error_no), "EACCES"))
- return EACCES;
- else if (!strcmp ((*error_no), "EBADF"))
- return EBADF;
- else if (!strcmp ((*error_no), "EFAULT"))
- return EFAULT;
- else if (!strcmp ((*error_no), "ENOMEM"))
- return ENOMEM;
- else if (!strcmp ((*error_no), "EINVAL"))
- return EINVAL;
- else if (!strcmp ((*error_no), "EIO"))
- return EIO;
- else if (!strcmp ((*error_no), "EEXIST"))
- return EEXIST;
- else if (!strcmp ((*error_no), "ENOSPC"))
- return ENOSPC;
- else if (!strcmp ((*error_no), "EPERM"))
- return EPERM;
- else if (!strcmp ((*error_no), "EROFS"))
- return EROFS;
- else if (!strcmp ((*error_no), "EBUSY"))
- return EBUSY;
- else if (!strcmp ((*error_no), "EISDIR"))
- return EISDIR;
- else if (!strcmp ((*error_no), "ENOTEMPTY"))
- return ENOTEMPTY;
- else if (!strcmp ((*error_no), "EMLINK"))
- return EMLINK;
- else if (!strcmp ((*error_no), "ENODEV"))
- return ENODEV;
- else if (!strcmp ((*error_no), "EXDEV"))
- return EXDEV;
- else if (!strcmp ((*error_no), "EMFILE"))
- return EMFILE;
- else if (!strcmp ((*error_no), "ENFILE"))
- return ENFILE;
- else if (!strcmp ((*error_no), "ENOSYS"))
- return ENOSYS;
- else if (!strcmp ((*error_no), "EINTR"))
- return EINTR;
- else if (!strcmp ((*error_no), "EFBIG"))
- return EFBIG;
- else
- return EAGAIN;
-}
-
-int
-get_fop_int (char **op_no_str)
-{
- if (!strcmp ((*op_no_str), "lookup"))
- return GF_FOP_LOOKUP;
- else if (!strcmp ((*op_no_str), "stat"))
- return GF_FOP_STAT;
- else if (!strcmp ((*op_no_str), "readlink"))
- return GF_FOP_READLINK;
- else if (!strcmp ((*op_no_str), "mknod"))
- return GF_FOP_MKNOD;
- else if (!strcmp ((*op_no_str), "mkdir"))
- return GF_FOP_MKDIR;
- else if (!strcmp ((*op_no_str), "unlink"))
- return GF_FOP_UNLINK;
- else if (!strcmp ((*op_no_str), "rmdir"))
- return GF_FOP_RMDIR;
- else if (!strcmp ((*op_no_str), "symlink"))
- return GF_FOP_SYMLINK;
- else if (!strcmp ((*op_no_str), "rename"))
- return GF_FOP_RENAME;
- else if (!strcmp ((*op_no_str), "link"))
- return GF_FOP_LINK;
- else if (!strcmp ((*op_no_str), "truncate"))
- return GF_FOP_TRUNCATE;
- else if (!strcmp ((*op_no_str), "create"))
- return GF_FOP_CREATE;
- else if (!strcmp ((*op_no_str), "open"))
- return GF_FOP_OPEN;
- else if (!strcmp ((*op_no_str), "readv"))
- return GF_FOP_READ;
- else if (!strcmp ((*op_no_str), "writev"))
- return GF_FOP_WRITE;
- else if (!strcmp ((*op_no_str), "statfs"))
- return GF_FOP_STATFS;
- else if (!strcmp ((*op_no_str), "flush"))
- return GF_FOP_FLUSH;
- else if (!strcmp ((*op_no_str), "fsync"))
- return GF_FOP_FSYNC;
- else if (!strcmp ((*op_no_str), "setxattr"))
- return GF_FOP_SETXATTR;
- else if (!strcmp ((*op_no_str), "getxattr"))
- return GF_FOP_GETXATTR;
- else if (!strcmp ((*op_no_str), "removexattr"))
- return GF_FOP_REMOVEXATTR;
- else if (!strcmp ((*op_no_str), "fsetxattr"))
- return GF_FOP_FSETXATTR;
- else if (!strcmp ((*op_no_str), "fgetxattr"))
- return GF_FOP_FGETXATTR;
- else if (!strcmp ((*op_no_str), "fremovexattr"))
- return GF_FOP_FREMOVEXATTR;
- else if (!strcmp ((*op_no_str), "opendir"))
- return GF_FOP_OPENDIR;
- else if (!strcmp ((*op_no_str), "readdir"))
- return GF_FOP_READDIR;
- else if (!strcmp ((*op_no_str), "readdirp"))
- return GF_FOP_READDIRP;
- else if (!strcmp ((*op_no_str), "fsyncdir"))
- return GF_FOP_FSYNCDIR;
- else if (!strcmp ((*op_no_str), "access"))
- return GF_FOP_ACCESS;
- else if (!strcmp ((*op_no_str), "ftruncate"))
- return GF_FOP_FTRUNCATE;
- else if (!strcmp ((*op_no_str), "fstat"))
- return GF_FOP_FSTAT;
- else if (!strcmp ((*op_no_str), "lk"))
- return GF_FOP_LK;
- else if (!strcmp ((*op_no_str), "xattrop"))
- return GF_FOP_XATTROP;
- else if (!strcmp ((*op_no_str), "fxattrop"))
- return GF_FOP_FXATTROP;
- else if (!strcmp ((*op_no_str), "inodelk"))
- return GF_FOP_INODELK;
- else if (!strcmp ((*op_no_str), "finodelk"))
- return GF_FOP_FINODELK;
- else if (!strcmp ((*op_no_str), "etrylk"))
- return GF_FOP_ENTRYLK;
- else if (!strcmp ((*op_no_str), "fentrylk"))
- return GF_FOP_FENTRYLK;
- else if (!strcmp ((*op_no_str), "setattr"))
- return GF_FOP_SETATTR;
- else if (!strcmp ((*op_no_str), "fsetattr"))
- return GF_FOP_FSETATTR;
- else if (!strcmp ((*op_no_str), "getspec"))
- return GF_FOP_GETSPEC;
- else
- return -1;
-}
-
-int
-error_gen (xlator_t *this, int op_no)
-{
- eg_t *egp = NULL;
- int count = 0;
- int failure_iter_no = GF_FAILURE_DEFAULT;
- char *error_no = NULL;
- int rand_no = 0;
- int ret = 0;
-
- egp = this->private;
+/*
+ * The user can specify an error probability as a float percentage, but we
+ * store it internally as a numerator with this as the denominator. When it's
+ * used, it's like this:
+ *
+ * (rand() % FAILURE_GRANULARITY) < error_rate
+ *
+ * To minimize rounding errors from the modulo operation, it's good for this to
+ * be a power of two.
+ *
+ * (BTW this is just the normal case. If "random-failure" is set, that does
+ * something completely different and this number is irrelevant. See error_gen
+ * for the legacy code.)
+ */
+#define FAILURE_GRANULARITY (1 << 20)
- LOCK (&egp->lock);
+sys_error_t error_no_list[] = {
+ [GF_FOP_LOOKUP] = {.error_no_count = 4,
+ .error_no = {ENOENT, ENOTDIR, ENAMETOOLONG, EAGAIN}},
+ [GF_FOP_STAT] = {.error_no_count = 6,
+ .error_no = {EACCES, EFAULT, ENAMETOOLONG, ENOENT, ENOMEM,
+ ENOTDIR}},
+ [GF_FOP_READLINK] = {.error_no_count = 8,
+ .error_no = {EACCES, EFAULT, EINVAL, EIO, ENAMETOOLONG,
+ ENOENT, ENOMEM, ENOTDIR}},
+ [GF_FOP_MKNOD] = {.error_no_count = 11,
+ .error_no = {EACCES, EEXIST, EFAULT, EINVAL, ENAMETOOLONG,
+ ENOENT, ENOMEM, ENOSPC, ENOTDIR, EPERM,
+ EROFS}},
+ [GF_FOP_MKDIR] = {.error_no_count = 10,
+ .error_no = {EACCES, EEXIST, EFAULT, ENAMETOOLONG, ENOENT,
+ ENOMEM, ENOSPC, ENOTDIR, EPERM, EROFS}},
+ [GF_FOP_UNLINK] = {.error_no_count = 10,
+ .error_no = {EACCES, EBUSY, EFAULT, EIO, EISDIR,
+ ENAMETOOLONG, ENOENT, ENOMEM, ENOTDIR,
+ EPERM, EROFS}},
+ [GF_FOP_RMDIR] = {.error_no_count = 8,
+ .error_no = {EACCES, EBUSY, EFAULT, ENOMEM, ENOTDIR,
+ ENOTEMPTY, EPERM, EROFS}},
+ [GF_FOP_SYMLINK] = {.error_no_count = 11,
+ .error_no = {EACCES, EEXIST, EFAULT, EIO, ENAMETOOLONG,
+ ENOENT, ENOMEM, ENOSPC, ENOTDIR, EPERM,
+ EROFS}},
+ [GF_FOP_RENAME] = {.error_no_count = 13,
+ .error_no = {EACCES, EBUSY, EFAULT, EINVAL, EISDIR,
+ EMLINK, ENAMETOOLONG, ENOENT, ENOMEM,
+ ENOSPC, ENOTDIR, EEXIST, EXDEV}},
+ [GF_FOP_LINK] = {.error_no_count = 13,
+ .error_no = {EACCES, EFAULT, EEXIST, EIO, EMLINK,
+ ENAMETOOLONG, ENOENT, ENOMEM, ENOSPC, ENOTDIR,
+ EPERM, EROFS, EXDEV}},
+ [GF_FOP_TRUNCATE] = {.error_no_count = 10,
+ .error_no = {EACCES, EFAULT, EFBIG, EINTR, EINVAL, EIO,
+ EISDIR, ENAMETOOLONG, ENOENT, EISDIR}},
+ [GF_FOP_CREATE] = {.error_no_count = 10,
+ .error_no = {EACCES, EEXIST, EFAULT, EISDIR, EMFILE,
+ ENAMETOOLONG, ENFILE, ENODEV, ENOENT,
+ ENODEV}},
+ [GF_FOP_OPEN] = {.error_no_count = 10,
+ .error_no = {EACCES, EEXIST, EFAULT, EISDIR, EMFILE,
+ ENAMETOOLONG, ENFILE, ENODEV, ENOENT,
+ ENOMEM}},
+ [GF_FOP_READ] = {.error_no_count = 5,
+ .error_no = {EINVAL, EBADF, EFAULT, EISDIR, ENAMETOOLONG}},
+ [GF_FOP_WRITE] = {.error_no_count = 7,
+ .error_no = {EINVAL, EBADF, EFAULT, EISDIR, ENAMETOOLONG,
+ ENOSPC, GF_ERROR_SHORT_WRITE}},
+ [GF_FOP_STATFS] = {.error_no_count = 9,
+ .error_no = {EACCES, EFAULT, EINTR, EIO, ENAMETOOLONG,
+ ENOENT, ENOMEM, ENOSYS, ENOTDIR}},
+ [GF_FOP_FLUSH] = {.error_no_count = 5,
+ .error_no = {EACCES, EFAULT, ENAMETOOLONG, ENOSYS,
+ ENOENT}},
+ [GF_FOP_FSYNC] = {.error_no_count = 4,
+ .error_no = {EBADF, EIO, EROFS, EINVAL}},
+ [GF_FOP_SETXATTR] = {.error_no_count = 3,
+ .error_no = {EACCES, EINTR, ENAMETOOLONG}},
+ [GF_FOP_GETXATTR] = {.error_no_count = 3,
+ .error_no = {EACCES, ENAMETOOLONG, EINTR}},
+ [GF_FOP_REMOVEXATTR] = {.error_no_count = 3,
+ .error_no = {EACCES, ENAMETOOLONG, EINTR}},
+ [GF_FOP_FSETXATTR] = {.error_no_count = 4,
+ .error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}},
+ [GF_FOP_FGETXATTR] = {.error_no_count = 4,
+ .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}},
+ [GF_FOP_FREMOVEXATTR] = {.error_no_count = 4,
+ .error_no = {EACCES, EBADF, ENAMETOOLONG, EINTR}},
+ [GF_FOP_OPENDIR] = {.error_no_count = 8,
+ .error_no = {EACCES, EEXIST, EFAULT, EISDIR, EMFILE,
+ ENAMETOOLONG, ENFILE, ENODEV}},
+ [GF_FOP_READDIR] = {.error_no_count = 5,
+ .error_no = {EINVAL, EACCES, EBADF, EMFILE, ENOENT}},
+ [GF_FOP_READDIRP] = {.error_no_count = 5,
+ .error_no = {EINVAL, EACCES, EBADF, EMFILE, ENOENT}},
+ [GF_FOP_FSYNCDIR] = {.error_no_count = 4,
+ .error_no = {EBADF, EIO, EROFS, EINVAL}},
+ [GF_FOP_ACCESS] = {.error_no_count = 8,
+ .error_no = {EACCES, ENAMETOOLONG, ENOENT, ENOTDIR,
+ EROFS, EFAULT, EINVAL, EIO}},
+ [GF_FOP_FTRUNCATE] = {.error_no_count = 9,
+ .error_no = {EACCES, EFAULT, EFBIG, EINTR, EINVAL,
+ EIO, EISDIR, ENAMETOOLONG, ENOENT}},
+ [GF_FOP_FSTAT] = {.error_no_count = 7,
+ .error_no = {EACCES, EBADF, EFAULT, ENAMETOOLONG, ENOENT,
+ ENOMEM, ENOTDIR}},
+ [GF_FOP_LK] = {.error_no_count = 4,
+ .error_no = {EACCES, EFAULT, ENOENT, EINTR}},
+ [GF_FOP_XATTROP] = {.error_no_count = 5,
+ .error_no = {EACCES, EFAULT, ENAMETOOLONG, ENOSYS,
+ ENOENT}},
+ [GF_FOP_FXATTROP] = {.error_no_count = 4,
+ .error_no = {EBADF, EIO, EROFS, EINVAL}},
+ [GF_FOP_INODELK] = {.error_no_count = 3,
+ .error_no = {EACCES, EINTR, ENAMETOOLONG}},
+ [GF_FOP_FINODELK] = {.error_no_count = 4,
+ .error_no = {EACCES, EBADF, EINTR, ENAMETOOLONG}},
+ [GF_FOP_ENTRYLK] = {.error_no_count = 3,
+ .error_no = {EACCES, ENAMETOOLONG, EINTR}},
+ [GF_FOP_FENTRYLK] = {.error_no_count = 10,
+ .error_no = {EACCES, EEXIST, EFAULT, EISDIR, EMFILE,
+ ENAMETOOLONG, ENFILE, ENODEV, ENOENT,
+ ENOMEM}},
+ [GF_FOP_SETATTR] = {.error_no_count = 10,
+ .error_no = {EACCES, EFAULT, EIO, ENAMETOOLONG, ENOENT,
+ ENOMEM, ENOTDIR, EPERM, EROFS, EIO}},
+ [GF_FOP_FSETATTR] = {.error_no_count = 11,
+ .error_no = {EACCES, EFAULT, EIO, ENAMETOOLONG, ENOENT,
+ ENOMEM, ENOTDIR, EPERM, EROFS, EBADF,
+ EIO}},
+ [GF_FOP_GETSPEC] = {.error_no_count = 3,
+ .error_no = {EACCES, ENAMETOOLONG, EINTR}}};
+
+int
+generate_rand_no(int op_no)
+{
+ int rand_no = 0;
+ int error_no_list_size = 0;
+
+ error_no_list_size = sizeof(error_no_list) / sizeof(error_no_list[0]);
+
+ if (op_no < error_no_list_size)
+ /* coverity[DC.WEAK_CRYPTO] */
+ rand_no = rand() % error_no_list[op_no].error_no_count;
+ return rand_no;
+}
+
+int
+conv_errno_to_int(char **error_no)
+{
+ if (!strcmp((*error_no), "ENOENT"))
+ return ENOENT;
+ else if (!strcmp((*error_no), "ENOTDIR"))
+ return ENOTDIR;
+ else if (!strcmp((*error_no), "ENAMETOOLONG"))
+ return ENAMETOOLONG;
+ else if (!strcmp((*error_no), "EACCES"))
+ return EACCES;
+ else if (!strcmp((*error_no), "EBADF"))
+ return EBADF;
+ else if (!strcmp((*error_no), "EFAULT"))
+ return EFAULT;
+ else if (!strcmp((*error_no), "ENOMEM"))
+ return ENOMEM;
+ else if (!strcmp((*error_no), "EINVAL"))
+ return EINVAL;
+ else if (!strcmp((*error_no), "EIO"))
+ return EIO;
+ else if (!strcmp((*error_no), "EEXIST"))
+ return EEXIST;
+ else if (!strcmp((*error_no), "ENOSPC"))
+ return ENOSPC;
+ else if (!strcmp((*error_no), "EPERM"))
+ return EPERM;
+ else if (!strcmp((*error_no), "EROFS"))
+ return EROFS;
+ else if (!strcmp((*error_no), "EBUSY"))
+ return EBUSY;
+ else if (!strcmp((*error_no), "EISDIR"))
+ return EISDIR;
+ else if (!strcmp((*error_no), "ENOTEMPTY"))
+ return ENOTEMPTY;
+ else if (!strcmp((*error_no), "EMLINK"))
+ return EMLINK;
+ else if (!strcmp((*error_no), "ENODEV"))
+ return ENODEV;
+ else if (!strcmp((*error_no), "EXDEV"))
+ return EXDEV;
+ else if (!strcmp((*error_no), "EMFILE"))
+ return EMFILE;
+ else if (!strcmp((*error_no), "ENFILE"))
+ return ENFILE;
+ else if (!strcmp((*error_no), "ENOSYS"))
+ return ENOSYS;
+ else if (!strcmp((*error_no), "EINTR"))
+ return EINTR;
+ else if (!strcmp((*error_no), "EFBIG"))
+ return EFBIG;
+ else if (!strcmp((*error_no), "GF_ERROR_SHORT_WRITE"))
+ return GF_ERROR_SHORT_WRITE;
+ else
+ return EAGAIN;
+}
+
+int
+error_gen(xlator_t *this, int op_no)
+{
+ eg_t *egp = NULL;
+ int count = 0;
+ int error_no_int = 0;
+ int rand_no = 0;
+ int ret = 0;
+ gf_boolean_t should_err = _gf_false;
+ int error_no_list_size = 0;
+
+ egp = this->private;
+
+ if (egp->random_failure) {
+ /*
+ * I honestly don't know why anyone would use this "feature"
+ * but I'll try to preserve its functionality anyway. Without
+ * locking twice to update failure_iter_no and egp->op_count
+ * separately, then not locking at all to update
+ * egp->failure_iter_no. That's not needed for compatibility,
+ * and it's abhorrently wrong. I have *some* standards.
+ */
+ LOCK(&egp->lock);
{
- count = ++egp->op_count;
- failure_iter_no = egp->failure_iter_no;
- error_no = egp->error_no;
+ count = ++(egp->op_count);
+ error_no_int = egp->error_no_int;
+ if ((count % egp->failure_iter_no) == 0) {
+ egp->op_count = 0;
+ /* coverity[DC.WEAK_CRYPTO] */
+ egp->failure_iter_no = 3 + (rand() % GF_UNIVERSAL_ANSWER);
+ should_err = _gf_true;
+ }
}
- UNLOCK (&egp->lock);
-
- if((count % failure_iter_no) == 0) {
- LOCK (&egp->lock);
- {
- egp->op_count = 0;
- }
- UNLOCK (&egp->lock);
-
- if (error_no)
- ret = conv_errno_to_int (&error_no);
- else {
-
- rand_no = generate_rand_no (op_no);
- if (op_no >= GF_FOP_MAXVALUE)
- op_no = 0;
- if (rand_no >= error_no_list[op_no].error_no_count)
- rand_no = 0;
- ret = error_no_list[op_no].error_no[rand_no];
- }
- if (egp->random_failure == _gf_true)
- egp->failure_iter_no = 3 + (rand () % GF_UNIVERSAL_ANSWER);
+ UNLOCK(&egp->lock);
+ } else {
+ /*
+ * It turns out that rand() is almost universally implemented
+ * as a linear congruential PRNG, which is about as cheap as
+ * it gets. This gets us real random behavior, including
+ * phenomena like streaks and dry spells, with controllable
+ * long-term probability, cheaply.
+ */
+ if ((rand() % FAILURE_GRANULARITY) < egp->failure_iter_no) {
+ should_err = _gf_true;
}
- return ret;
-}
-
+ }
+
+ error_no_list_size = sizeof(error_no_list) / sizeof(error_no_list[0]);
+ if (should_err) {
+ if (error_no_int)
+ ret = error_no_int;
+ else {
+ rand_no = generate_rand_no(op_no);
+ if (op_no >= error_no_list_size)
+ op_no = 0;
+ if (rand_no >= error_no_list[op_no].error_no_count)
+ rand_no = 0;
+ ret = error_no_list[op_no].error_no[rand_no];
+ }
+ }
-int
-error_gen_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xdata, struct iatt *postparent)
-{
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode,
- buf, xdata, postparent);
- return 0;
+ return ret;
}
-
int
-error_gen_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xdata)
+error_gen_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_LOOKUP];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_LOOKUP];
- if (enable)
- op_errno = error_gen (this, GF_FOP_LOOKUP);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_LOOKUP);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (lookup, frame, -1, op_errno, NULL, NULL, NULL,
- NULL);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup,
+ loc, xdata);
+ return 0;
}
-
int
-error_gen_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+error_gen_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
-
-int
-error_gen_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_STAT];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_STAT];
- if (enable)
- op_errno = error_gen (this, GF_FOP_STAT);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_STAT);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (stat, frame, -1, op_errno, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(stat, frame, -1, op_errno, NULL, xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat,
+ loc, xdata);
+ return 0;
}
-
int
-error_gen_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
+error_gen_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, xdata);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_SETATTR];
+
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_SETATTR);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(setattr, frame, -1, op_errno, NULL, NULL, xdata);
return 0;
-}
+ }
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+}
int
-error_gen_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
+error_gen_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_SETATTR];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSETATTR];
- if (enable)
- op_errno = error_gen (this, GF_FOP_SETATTR);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_FSETATTR);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (setattr, frame, -1, op_errno, NULL, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(fsetattr, frame, -1, op_errno, NULL, NULL, xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_setattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
}
-
int
-error_gen_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+error_gen_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_FSETATTR];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_TRUNCATE];
- if (enable)
- op_errno = error_gen (this, GF_FOP_FSETATTR);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_TRUNCATE);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fsetattr, frame, -1, op_errno, NULL, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(truncate, frame, -1, op_errno, NULL, NULL, xdata);
return 0;
- }
-
- STACK_WIND (frame, error_gen_setattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetattr,
- fd, stbuf, valid, xdata);
- return 0;
-}
+ }
-
-int
-error_gen_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
}
-
int
-error_gen_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset, dict_t *xdata)
+error_gen_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_TRUNCATE];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FTRUNCATE];
- if (enable)
- op_errno = error_gen (this, GF_FOP_TRUNCATE);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_FTRUNCATE);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (truncate, frame, -1, op_errno,
- NULL, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(ftruncate, frame, -1, op_errno, NULL, NULL, xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc, offset, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
}
-
int
-error_gen_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
- return 0;
-}
-
-
-int
-error_gen_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd,
- off_t offset, dict_t *xdata)
+error_gen_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp =NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_FTRUNCATE];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_ACCESS];
- if (enable)
- op_errno = error_gen (this, GF_FOP_FTRUNCATE);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_ACCESS);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (ftruncate, frame, -1, op_errno,
- NULL, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(access, frame, -1, op_errno, xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd, offset, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->access,
+ loc, mask, xdata);
+ return 0;
}
-
int
-error_gen_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+error_gen_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
- return 0;
-}
-
-
-int
-error_gen_access (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t mask, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_ACCESS];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_READLINK];
- if (enable)
- op_errno = error_gen (this, GF_FOP_ACCESS);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_READLINK);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (access, frame, -1, op_errno, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(readlink, frame, -1, op_errno, NULL, NULL, xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_access_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access,
- loc, mask, xdata);
- return 0;
-}
-
-
-int
-error_gen_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *path, struct iatt *sbuf, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, sbuf, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readlink,
+ loc, size, xdata);
+ return 0;
}
-
int
-error_gen_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc,
- size_t size, dict_t *xdata)
+error_gen_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_READLINK];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_MKNOD];
- if (enable)
- op_errno = error_gen (this, GF_FOP_READLINK);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_MKNOD);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (readlink, frame, -1, op_errno, NULL, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_readlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink,
- loc, size, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
+ loc, mode, rdev, umask, xdata);
+ return 0;
}
-
int
-error_gen_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+error_gen_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno,
- inode, buf,
- preparent, postparent, xdata);
- return 0;
-}
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+ egp = this->private;
+ enable = egp->enable[GF_FOP_MKDIR];
-int
-error_gen_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
-
- egp = this->private;
- enable = egp->enable[GF_FOP_MKNOD];
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_MKDIR);
- if (enable)
- op_errno = error_gen (this, GF_FOP_MKNOD);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (mknod, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_mknod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, umask, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ loc, mode, umask, xdata);
+ return 0;
}
-
int
-error_gen_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+error_gen_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno,
- inode, buf,
- preparent, postparent, xdata);
- return 0;
-}
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
-int
-error_gen_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
-
- egp = this->private;
- enable = egp->enable[GF_FOP_MKDIR];
-
- if (enable)
- op_errno = error_gen (this, GF_FOP_MKDIR);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (mkdir, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, xdata);
- return 0;
- }
-
- STACK_WIND (frame, error_gen_mkdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- loc, mode, umask, xdata);
- return 0;
-}
+ egp = this->private;
+ enable = egp->enable[GF_FOP_UNLINK];
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_UNLINK);
-int
-error_gen_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- preparent, postparent, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(unlink, frame, -1, op_errno, NULL, NULL, xdata);
return 0;
-}
+ }
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
+ loc, xflag, xdata);
+ return 0;
+}
int
-error_gen_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
+error_gen_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
-
- egp = this->private;
- enable = egp->enable[GF_FOP_UNLINK];
-
- if (enable)
- op_errno = error_gen (this, GF_FOP_UNLINK);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL,
- xdata);
- return 0;
- }
-
- STACK_WIND (frame, error_gen_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc, xflag, xdata);
- return 0;
-}
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+ egp = this->private;
+ enable = egp->enable[GF_FOP_RMDIR];
-int
-error_gen_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- preparent, postparent, xdata);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_RMDIR);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(rmdir, frame, -1, op_errno, NULL, NULL, xdata);
return 0;
-}
+ }
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir,
+ loc, flags, xdata);
+ return 0;
+}
int
-error_gen_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- dict_t *xdata)
+error_gen_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_RMDIR];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_SYMLINK];
- if (enable)
- op_errno = error_gen (this, GF_FOP_RMDIR);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_SYMLINK);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (rmdir, frame, -1, op_errno, NULL, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(symlink, frame, -1, op_errno, NULL, NULL, NULL,
+ NULL, NULL); /* pre & post parent attr */
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_rmdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- loc, flags, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc, umask, xdata);
+ return 0;
}
-
int
-error_gen_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+error_gen_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
- STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
-}
-
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
-int
-error_gen_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, mode_t umask, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
-
- egp = this->private;
- enable = egp->enable[GF_FOP_SYMLINK];
-
- if (enable)
- op_errno = error_gen (this, GF_FOP_SYMLINK);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (symlink, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL); /* pre & post parent attr */
- return 0;
- }
-
- STACK_WIND (frame, error_gen_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, umask, xdata);
- return 0;
-}
+ egp = this->private;
+ enable = egp->enable[GF_FOP_RENAME];
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_RENAME);
-int
-error_gen_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
- preoldparent, postoldparent,
- prenewparent, postnewparent, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
-}
-
+ }
-int
-error_gen_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
-
- egp = this->private;
- enable = egp->enable[GF_FOP_RENAME];
-
- if (enable)
- op_errno = error_gen (this, GF_FOP_RENAME);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (rename, frame, -1, op_errno, NULL,
- NULL, NULL, NULL, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame, error_gen_rename_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename,
- oldloc, newloc, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
}
-
int
-error_gen_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+error_gen_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
- STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
-}
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+ egp = this->private;
+ enable = egp->enable[GF_FOP_LINK];
-int
-error_gen_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
-
- egp = this->private;
- enable = egp->enable[GF_FOP_LINK];
-
- if (enable)
- op_errno = error_gen (this, GF_FOP_LINK);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (link, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame, error_gen_link_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link,
- oldloc, newloc, xdata);
- return 0;
-}
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_LINK);
-
-int
-error_gen_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(link, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
return 0;
-}
+ }
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+}
int
-error_gen_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
- dict_t *xdata)
+error_gen_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
-
- egp = this->private;
- enable = egp->enable[GF_FOP_CREATE];
-
- if (enable)
- op_errno = error_gen (this, GF_FOP_CREATE);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (create, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame, error_gen_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, umask, fd, xdata);
- return 0;
-}
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+ egp = this->private;
+ enable = egp->enable[GF_FOP_CREATE];
-int
-error_gen_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_CREATE);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(create, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
return 0;
-}
+ }
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+}
int
-error_gen_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, dict_t *xdata)
+error_gen_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_OPEN];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_OPEN];
- if (enable)
- op_errno = error_gen (this, GF_FOP_OPEN);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_OPEN);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (open, frame, -1, op_errno, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(open, frame, -1, op_errno, NULL, xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_open_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc, flags, fd, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
+ loc, flags, fd, xdata);
+ return 0;
}
-
int
-error_gen_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+error_gen_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
- vector, count, stbuf, iobref, xdata);
- return 0;
-}
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+ egp = this->private;
+ enable = egp->enable[GF_FOP_READ];
-int
-error_gen_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
-
- egp = this->private;
- enable = egp->enable[GF_FOP_READ];
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_READ);
- if (enable)
- op_errno = error_gen (this, GF_FOP_READ);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (readv, frame, -1, op_errno, NULL, 0,
- NULL, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(readv, frame, -1, op_errno, NULL, 0, NULL, NULL,
+ xdata);
return 0;
- }
-
+ }
- STACK_WIND (frame, error_gen_readv_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd, size, offset, flags, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
}
-
int
-error_gen_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+error_gen_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
- return 0;
-}
-
-
-int
-error_gen_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count,
- off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+ struct iovec *shortvec = NULL;
- egp = this->private;
- enable = egp->enable[GF_FOP_WRITE];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_WRITE];
- if (enable)
- op_errno = error_gen (this, GF_FOP_WRITE);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_WRITE);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL, xdata);
+ if (op_errno == GF_ERROR_SHORT_WRITE) {
+ /*
+ * A short write error returns some value less than what was
+ * requested from a write. To simulate this, replace the vector
+ * with one half the size;
+ */
+ shortvec = iov_dup(vector, 1);
+ shortvec->iov_len /= 2;
+ count = 1;
+ goto wind;
+ } else if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(writev, frame, -1, op_errno, NULL, NULL, xdata);
return 0;
- }
+ }
+wind:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
+ fd, shortvec ? shortvec : vector, count, off, flags, iobref,
+ xdata);
- STACK_WIND (frame, error_gen_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd, vector, count, off, flags, iobref, xdata);
- return 0;
-}
-
-
-int
-error_gen_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
- return 0;
+ if (shortvec)
+ GF_FREE(shortvec);
+ return 0;
}
-
int
-error_gen_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+error_gen_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_FLUSH];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FLUSH];
- if (enable)
- op_errno = error_gen (this, GF_FOP_FLUSH);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_FLUSH);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (flush, frame, -1, op_errno, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(flush, frame, -1, op_errno, xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_flush_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->flush,
+ fd, xdata);
+ return 0;
}
-
int
-error_gen_fsync_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+error_gen_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSYNC];
+
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_FSYNC);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(fsync, frame, -1, op_errno, NULL, NULL, xdata);
return 0;
-}
+ }
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
+ fd, flags, xdata);
+ return 0;
+}
int
-error_gen_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
+error_gen_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_FSYNC];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSTAT];
- if (enable)
- op_errno = error_gen (this, GF_FOP_FSYNC);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_FSTAT);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fsync, frame, -1, op_errno, NULL, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(fstat, frame, -1, op_errno, NULL, xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd, flags, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat,
+ fd, xdata);
+ return 0;
}
-
int
-error_gen_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+error_gen_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_OPENDIR];
+
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_OPENDIR);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(opendir, frame, -1, op_errno, NULL, xdata);
return 0;
-}
+ }
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->opendir,
+ loc, fd, xdata);
+ return 0;
+}
int
-error_gen_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+error_gen_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_FSTAT];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSYNCDIR];
- if (enable)
- op_errno = error_gen (this, GF_FOP_FSTAT);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_FSYNCDIR);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fstat, frame, -1, op_errno, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(fsyncdir, frame, -1, op_errno, xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsyncdir,
+ fd, flags, xdata);
+ return 0;
}
-
int
-error_gen_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+error_gen_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_STATFS];
+
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_STATFS);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(statfs, frame, -1, op_errno, NULL, xdata);
return 0;
-}
+ }
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->statfs,
+ loc, xdata);
+ return 0;
+}
int
-error_gen_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
+error_gen_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_OPENDIR];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_SETXATTR];
- if (enable)
- op_errno = error_gen (this, GF_FOP_OPENDIR);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_SETXATTR);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (opendir, frame, -1, op_errno, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(setxattr, frame, -1, op_errno, xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_opendir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir,
- loc, fd, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
}
int
-error_gen_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+error_gen_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_GETXATTR];
+
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_GETXATTR);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(getxattr, frame, -1, op_errno, NULL, xdata);
return 0;
-}
+ }
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->getxattr,
+ loc, name, xdata);
+ return 0;
+}
int
-error_gen_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
+error_gen_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
int32_t flags, dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
-
- egp = this->private;
- enable = egp->enable[GF_FOP_FSYNCDIR];
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- if (enable)
- op_errno = error_gen (this, GF_FOP_FSYNCDIR);
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FSETXATTR];
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, op_errno, xdata);
- return 0;
- }
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_FSETXATTR);
- STACK_WIND (frame, error_gen_fsyncdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir,
- fd, flags, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(fsetxattr, frame, -1, op_errno, xdata);
return 0;
-}
-
+ }
-int
-error_gen_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
}
-
int
-error_gen_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+error_gen_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
-
- egp = this->private;
- enable = egp->enable[GF_FOP_STATFS];
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- if (enable)
- op_errno = error_gen (this, GF_FOP_STATFS);
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FGETXATTR];
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (statfs, frame, -1, op_errno, NULL, xdata);
- return 0;
- }
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_FGETXATTR);
- STACK_WIND (frame, error_gen_statfs_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs,
- loc, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(fgetxattr, frame, -1, op_errno, NULL, xdata);
return 0;
-}
-
+ }
-int
-error_gen_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
}
-
int
-error_gen_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *dict, int32_t flags, dict_t *xdata)
+error_gen_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_SETXATTR];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_XATTROP];
- if (enable)
- op_errno = error_gen (this, GF_FOP_SETXATTR);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_XATTROP);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (setxattr, frame, -1, op_errno, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(xattrop, frame, -1, op_errno, NULL, xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_setxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->xattrop,
+ loc, flags, dict, xdata);
+ return 0;
}
-
int
-error_gen_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+error_gen_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FXATTROP];
-int
-error_gen_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_FXATTROP);
- egp = this->private;
- enable = egp->enable[GF_FOP_GETXATTR];
-
- if (enable)
- op_errno = error_gen (this, GF_FOP_GETXATTR);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (getxattr, frame, -1, op_errno, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(fxattrop, frame, -1, op_errno, NULL, xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_getxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- loc, name, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fxattrop,
+ fd, flags, dict, xdata);
+ return 0;
}
int
-error_gen_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+error_gen_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+ egp = this->private;
+ enable = egp->enable[GF_FOP_REMOVEXATTR];
-int
-error_gen_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
-
- egp = this->private;
- enable = egp->enable[GF_FOP_FSETXATTR];
-
- if (enable)
- op_errno = error_gen (this, GF_FOP_FSETXATTR);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, op_errno, xdata);
- return 0;
- }
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_REMOVEXATTR);
- STACK_WIND (frame, error_gen_fsetxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr,
- fd, dict, flags, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(removexattr, frame, -1, op_errno, xdata);
return 0;
-}
-
+ }
-int
-error_gen_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
}
-
int
-error_gen_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
+error_gen_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_FGETXATTR];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FREMOVEXATTR];
- if (enable)
- op_errno = error_gen (this, GF_FOP_FGETXATTR);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_FREMOVEXATTR);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fgetxattr, frame, -1, op_errno, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(fremovexattr, frame, -1, op_errno, xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_fgetxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr,
- fd, name, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ return 0;
}
-
int
-error_gen_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+error_gen_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+ egp = this->private;
+ enable = egp->enable[GF_FOP_LK];
-int
-error_gen_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_LK);
- egp = this->private;
- enable = egp->enable[GF_FOP_XATTROP];
-
- if (enable)
- op_errno = error_gen (this, GF_FOP_XATTROP);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (xattrop, frame, -1, op_errno, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(lk, frame, -1, op_errno, NULL, xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_xattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk, fd,
+ cmd, lock, xdata);
+ return 0;
}
-
int
-error_gen_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+error_gen_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
-
-
-int
-error_gen_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_FXATTROP];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_INODELK];
- if (enable)
- op_errno = error_gen (this, GF_FOP_FXATTROP);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_INODELK);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fxattrop, frame, -1, op_errno, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(inodelk, frame, -1, op_errno, xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_fxattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict, xdata);
- return 0;
-}
-
-
-int
-error_gen_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->inodelk,
+ volume, loc, cmd, lock, xdata);
+ return 0;
}
-
int
-error_gen_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
+error_gen_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_REMOVEXATTR];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FINODELK];
- if (enable)
- op_errno = error_gen (this, GF_FOP_REMOVEXATTR);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_FINODELK);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (removexattr, frame, -1, op_errno, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(finodelk, frame, -1, op_errno, xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_removexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- loc, name, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->finodelk,
+ volume, fd, cmd, lock, xdata);
+ return 0;
}
int
-error_gen_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+error_gen_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+ egp = this->private;
+ enable = egp->enable[GF_FOP_ENTRYLK];
-int
-error_gen_fremovexattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
-
- egp = this->private;
- enable = egp->enable[GF_FOP_FREMOVEXATTR];
-
- if (enable)
- op_errno = error_gen (this, GF_FOP_FREMOVEXATTR);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fremovexattr, frame, -1, op_errno, xdata);
- return 0;
- }
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_ENTRYLK);
- STACK_WIND (frame, error_gen_fremovexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr,
- fd, name, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(entrylk, frame, -1, op_errno, xdata);
return 0;
-}
-
+ }
-int
-error_gen_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->entrylk,
+ volume, loc, basename, cmd, type, xdata);
+ return 0;
}
-
int
-error_gen_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
+error_gen_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_LK];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_FENTRYLK];
- if (enable)
- op_errno = error_gen (this, GF_FOP_LK);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_FENTRYLK);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (lk, frame, -1, op_errno, NULL, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(fentrylk, frame, -1, op_errno, xdata);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_lk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk,
- fd, cmd, lock, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
+ return 0;
}
-
int
-error_gen_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+error_gen_getspec(call_frame_t *frame, xlator_t *this, const char *key,
+ int32_t flags)
{
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+ egp = this->private;
+ enable = egp->enable[GF_FOP_GETSPEC];
-int
-error_gen_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_GETSPEC);
- egp = this->private;
- enable = egp->enable[GF_FOP_INODELK];
-
- if (enable)
- op_errno = error_gen (this, GF_FOP_INODELK);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (inodelk, frame, -1, op_errno, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(getspec, frame, -1, op_errno, NULL);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_inodelk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- volume, loc, cmd, lock, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->getspec,
+ key, flags);
+ return 0;
}
-
int
-error_gen_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+error_gen_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
{
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
+
+ egp = this->private;
+ enable = egp->enable[GF_FOP_READDIR];
+
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_READDIR);
+
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(readdir, frame, -1, op_errno, NULL, xdata);
return 0;
-}
+ }
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir,
+ fd, size, off, xdata);
+ return 0;
+}
int
-error_gen_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
+error_gen_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ int op_errno = 0;
+ eg_t *egp = NULL;
+ int enable = 1;
- egp = this->private;
- enable = egp->enable[GF_FOP_FINODELK];
+ egp = this->private;
+ enable = egp->enable[GF_FOP_READDIRP];
- if (enable)
- op_errno = error_gen (this, GF_FOP_FINODELK);
+ if (enable)
+ op_errno = error_gen(this, GF_FOP_READDIRP);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (finodelk, frame, -1, op_errno, xdata);
+ if (op_errno) {
+ GF_ERROR(this, "unwind(-1, %s)", strerror(op_errno));
+ STACK_UNWIND_STRICT(readdirp, frame, -1, op_errno, NULL, NULL);
return 0;
- }
+ }
- STACK_WIND (frame, error_gen_finodelk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk,
- volume, fd, cmd, lock, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+ fd, size, off, dict);
+ return 0;
}
-
-int
-error_gen_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+static void
+error_gen_set_failure(eg_t *pvt, double percent)
{
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ double ppm;
+ GF_ASSERT(pvt);
-int
-error_gen_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ ppm = (percent / 100.0) * (double)FAILURE_GRANULARITY;
+ pvt->failure_iter_no = (int)ppm;
+}
- egp = this->private;
- enable = egp->enable[GF_FOP_ENTRYLK];
+static void
+error_gen_parse_fill_fops(eg_t *pvt, char *enable_fops)
+{
+ char *op_no_str = NULL;
+ int op_no = -1;
+ int i = 0;
+ xlator_t *this = THIS;
+ char *saveptr = NULL;
- if (enable)
- op_errno = error_gen (this, GF_FOP_ENTRYLK);
+ GF_ASSERT(pvt);
+ GF_ASSERT(this);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (entrylk, frame, -1, op_errno, xdata);
- return 0;
- }
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ pvt->enable[i] = 0;
- STACK_WIND (frame, error_gen_entrylk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->entrylk,
- volume, loc, basename, cmd, type, xdata);
- return 0;
+ if (!enable_fops) {
+ gf_log(this->name, GF_LOG_WARNING, "All fops are enabled.");
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ pvt->enable[i] = 1;
+ } else {
+ op_no_str = strtok_r(enable_fops, ",", &saveptr);
+ while (op_no_str) {
+ op_no = gf_fop_int(op_no_str);
+ if (op_no == -1) {
+ gf_log(this->name, GF_LOG_WARNING, "Wrong option value %s",
+ op_no_str);
+ } else
+ pvt->enable[op_no] = 1;
+
+ op_no_str = strtok_r(NULL, ",", &saveptr);
+ }
+ }
}
-
-int
-error_gen_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+int32_t
+error_gen_priv_dump(xlator_t *this)
{
- STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ char key_prefix[GF_DUMP_MAX_BUF_LEN];
+ int ret = -1;
+ eg_t *conf = NULL;
+ if (!this)
+ goto out;
-int
-error_gen_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
-
- egp = this->private;
- enable = egp->enable[GF_FOP_FENTRYLK];
-
- if (enable)
- op_errno = error_gen (this, GF_FOP_FENTRYLK);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (fentrylk, frame, -1, op_errno, xdata);
- return 0;
- }
-
- STACK_WIND (frame, error_gen_fentrylk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fentrylk,
- volume, fd, basename, cmd, type, xdata);
- return 0;
-}
+ conf = this->private;
+ if (!conf)
+ goto out;
+ ret = TRY_LOCK(&conf->lock);
+ if (ret != 0) {
+ return ret;
+ }
-/* Management operations */
+ gf_proc_dump_add_section("xlator.debug.error-gen.%s.priv", this->name);
+ gf_proc_dump_build_key(key_prefix, "xlator.debug.error-gen", "%s.priv",
+ this->name);
+ gf_proc_dump_write("op_count", "%d", conf->op_count);
+ gf_proc_dump_write("failure_iter_no", "%d", conf->failure_iter_no);
+ gf_proc_dump_write("error_no", "%d", conf->error_no_int);
+ gf_proc_dump_write("random_failure", "%d", conf->random_failure);
-int
-error_gen_getspec_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, char *spec_data)
-{
- STACK_UNWIND_STRICT (getspec, frame, op_ret, op_errno, spec_data);
- return 0;
+ UNLOCK(&conf->lock);
+out:
+ return ret;
}
-
-int
-error_gen_getspec (call_frame_t *frame, xlator_t *this, const char *key,
- int32_t flags)
+int32_t
+mem_acct_init(xlator_t *this)
{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
-
- egp = this->private;
- enable = egp->enable[GF_FOP_GETSPEC];
-
- if (enable)
- op_errno = error_gen (this, GF_FOP_GETSPEC);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (getspec, frame, -1, op_errno, NULL);
- return 0;
- }
-
- STACK_WIND (frame, error_gen_getspec_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getspec,
- key, flags);
- return 0;
-}
+ int ret = -1;
+ if (!this)
+ return ret;
-int
-error_gen_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
-{
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
- return 0;
-}
+ ret = xlator_mem_acct_init(this, gf_error_gen_mt_end + 1);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Memory accounting init"
+ " failed");
+ return ret;
+ }
-int
-error_gen_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t off, dict_t *xdata)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
-
- egp = this->private;
- enable = egp->enable[GF_FOP_READDIR];
-
- if (enable)
- op_errno = error_gen (this, GF_FOP_READDIR);
-
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (readdir, frame, -1, op_errno, NULL, xdata);
- return 0;
- }
-
- STACK_WIND (frame, error_gen_readdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir,
- fd, size, off, xdata);
- return 0;
+ return ret;
}
-
int
-error_gen_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
- dict_t *xdata)
+reconfigure(xlator_t *this, dict_t *options)
{
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
- return 0;
-}
+ eg_t *pvt = NULL;
+ int32_t ret = 0;
+ char *error_enable_fops = NULL;
+ char *error_no = NULL;
+ double failure_percent_dbl = 0.0;
+ if (!this || !this->private)
+ goto out;
-int
-error_gen_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t off, dict_t *dict)
-{
- int op_errno = 0;
- eg_t *egp = NULL;
- int enable = 1;
+ pvt = this->private;
- egp = this->private;
- enable = egp->enable[GF_FOP_READDIRP];
+ ret = -1;
- if (enable)
- op_errno = error_gen (this, GF_FOP_READDIRP);
+ GF_OPTION_RECONF("error-no", error_no, options, str, out);
- if (op_errno) {
- GF_ERROR(this, "unwind(-1, %s)", strerror (op_errno));
- STACK_UNWIND_STRICT (readdirp, frame, -1, op_errno, NULL, NULL);
- return 0;
- }
+ if (error_no)
+ pvt->error_no_int = conv_errno_to_int(&error_no);
- STACK_WIND (frame, error_gen_readdirp_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp,
- fd, size, off, dict);
- return 0;
-}
+ GF_OPTION_RECONF("failure", failure_percent_dbl, options, percent, out);
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
+ GF_OPTION_RECONF("enable", error_enable_fops, options, str, out);
- if (!this)
- return ret;
+ GF_OPTION_RECONF("random-failure", pvt->random_failure, options, bool, out);
- ret = xlator_mem_acct_init (this, gf_error_gen_mt_end + 1);
+ error_gen_parse_fill_fops(pvt, error_enable_fops);
+ error_gen_set_failure(pvt, failure_percent_dbl);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- " failed");
- return ret;
- }
-
- return ret;
+ ret = 0;
+out:
+ gf_log(this ? this->name : "error-gen", GF_LOG_DEBUG,
+ "reconfigure returning %d", ret);
+ return ret;
}
int
-init (xlator_t *this)
+init(xlator_t *this)
{
- eg_t *pvt = NULL;
- data_t *error_no = NULL;
- data_t *failure_percent = NULL;
- data_t *enable = NULL;
- gf_boolean_t random_failure = _gf_false;
- int32_t ret = 0;
- char *error_enable_fops = NULL;
- char *op_no_str = NULL;
- int op_no = -1;
- int i = 0;
- int32_t failure_percent_int = 0;
-
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "error-gen not configured with one subvolume");
- ret = -1;
- goto out;
- }
+ eg_t *pvt = NULL;
+ int32_t ret = 0;
+ char *error_enable_fops = NULL;
+ char *error_no = NULL;
+ double failure_percent_dbl = 0.0;
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
+ if (!this->children || this->children->next) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "error-gen not configured with one subvolume");
+ ret = -1;
+ goto out;
+ }
- error_no = dict_get (this->options, "error-no");
- failure_percent = dict_get (this->options, "failure");
- enable = dict_get (this->options, "enable");
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
+ }
- pvt = GF_CALLOC (1, sizeof (eg_t), gf_error_gen_mt_eg_t);
+ pvt = GF_CALLOC(1, sizeof(eg_t), gf_error_gen_mt_eg_t);
- if (!pvt) {
- gf_log (this->name, GF_LOG_ERROR,
- "out of memory.");
- ret = -1;
- goto out;
- }
+ if (!pvt) {
+ ret = -1;
+ goto out;
+ }
- LOCK_INIT (&pvt->lock);
+ LOCK_INIT(&pvt->lock);
- for (i = 0; i < GF_FOP_MAXVALUE; i++)
- pvt->enable[i] = 0;
- if (!error_no) {
- gf_log (this->name, GF_LOG_DEBUG,
- "error-no not specified.");
- } else {
- pvt->error_no = data_to_str (error_no);
- }
+ ret = -1;
- if (!failure_percent) {
- gf_log (this->name, GF_LOG_DEBUG,
- "failure percent not specified.");
- pvt->failure_iter_no = 100/GF_FAILURE_DEFAULT;
- } else {
- failure_percent_int = data_to_int32 (failure_percent);
- if (failure_percent_int)
- pvt->failure_iter_no = 100/failure_percent_int;
- else
- pvt->failure_iter_no = 100/GF_FAILURE_DEFAULT;
- }
+ GF_OPTION_INIT("error-no", error_no, str, out);
- if (!enable) {
- gf_log (this->name, GF_LOG_WARNING,
- "All fops are enabled.");
- for (i = 0; i < GF_FOP_MAXVALUE; i++)
- pvt->enable[i] = 1;
- } else {
- error_enable_fops = data_to_str (enable);
- op_no_str = error_enable_fops;
- while ((*error_enable_fops) != '\0') {
- error_enable_fops++;
- if (((*error_enable_fops) == ',') ||
- ((*error_enable_fops) == '\0')) {
- if ((*error_enable_fops) != '\0') {
- (*error_enable_fops) = '\0';
- error_enable_fops++;
- }
- op_no = get_fop_int (&op_no_str);
- if (op_no == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "Wrong option value %s",
- op_no_str);
- } else
- pvt->enable[op_no] = 1;
- op_no_str = error_enable_fops;
- }
- }
- }
+ if (error_no)
+ pvt->error_no_int = conv_errno_to_int(&error_no);
+
+ GF_OPTION_INIT("failure", failure_percent_dbl, percent, out);
- random_failure = dict_get_str_boolean (this->options, "random-failure",
- _gf_false);
- pvt->random_failure = random_failure;
+ GF_OPTION_INIT("enable", error_enable_fops, str, out);
- this->private = pvt;
+ GF_OPTION_INIT("random-failure", pvt->random_failure, bool, out);
- /* Give some seed value here */
- srand (time(NULL));
+ error_gen_parse_fill_fops(pvt, error_enable_fops);
+ error_gen_set_failure(pvt, failure_percent_dbl);
+
+ this->private = pvt;
+
+ /* Give some seed value here. */
+ srand(gf_time());
+
+ ret = 0;
out:
- return ret;
+ if (ret)
+ GF_FREE(pvt);
+ return ret;
}
-
void
-fini (xlator_t *this)
+fini(xlator_t *this)
{
- eg_t *pvt = NULL;
-
- if (!this)
- return;
- pvt = this->private;
+ eg_t *pvt = NULL;
- if (pvt) {
- LOCK_DESTROY (&pvt->lock);
- GF_FREE (pvt);
- gf_log (this->name, GF_LOG_DEBUG, "fini called");
- }
+ if (!this)
return;
+ pvt = this->private;
+
+ if (pvt) {
+ LOCK_DESTROY(&pvt->lock);
+ GF_FREE(pvt);
+ gf_log(this->name, GF_LOG_DEBUG, "fini called");
+ }
+ return;
}
-struct xlator_fops cbks = {
+struct xlator_dumpops dumpops = {
+ .priv = error_gen_priv_dump,
};
+struct xlator_cbks cbks;
+
struct xlator_fops fops = {
- .lookup = error_gen_lookup,
- .stat = error_gen_stat,
- .readlink = error_gen_readlink,
- .mknod = error_gen_mknod,
- .mkdir = error_gen_mkdir,
- .unlink = error_gen_unlink,
- .rmdir = error_gen_rmdir,
- .symlink = error_gen_symlink,
- .rename = error_gen_rename,
- .link = error_gen_link,
- .truncate = error_gen_truncate,
- .create = error_gen_create,
- .open = error_gen_open,
- .readv = error_gen_readv,
- .writev = error_gen_writev,
- .statfs = error_gen_statfs,
- .flush = error_gen_flush,
- .fsync = error_gen_fsync,
- .setxattr = error_gen_setxattr,
- .getxattr = error_gen_getxattr,
- .removexattr = error_gen_removexattr,
- .fsetxattr = error_gen_fsetxattr,
- .fgetxattr = error_gen_fgetxattr,
- .fremovexattr = error_gen_fremovexattr,
- .opendir = error_gen_opendir,
- .readdir = error_gen_readdir,
- .readdirp = error_gen_readdirp,
- .fsyncdir = error_gen_fsyncdir,
- .access = error_gen_access,
- .ftruncate = error_gen_ftruncate,
- .fstat = error_gen_fstat,
- .lk = error_gen_lk,
- .lookup_cbk = error_gen_lookup_cbk,
- .xattrop = error_gen_xattrop,
- .fxattrop = error_gen_fxattrop,
- .inodelk = error_gen_inodelk,
- .finodelk = error_gen_finodelk,
- .entrylk = error_gen_entrylk,
- .fentrylk = error_gen_fentrylk,
- .setattr = error_gen_setattr,
- .fsetattr = error_gen_fsetattr,
- .getspec = error_gen_getspec,
+ .lookup = error_gen_lookup,
+ .stat = error_gen_stat,
+ .readlink = error_gen_readlink,
+ .mknod = error_gen_mknod,
+ .mkdir = error_gen_mkdir,
+ .unlink = error_gen_unlink,
+ .rmdir = error_gen_rmdir,
+ .symlink = error_gen_symlink,
+ .rename = error_gen_rename,
+ .link = error_gen_link,
+ .truncate = error_gen_truncate,
+ .create = error_gen_create,
+ .open = error_gen_open,
+ .readv = error_gen_readv,
+ .writev = error_gen_writev,
+ .statfs = error_gen_statfs,
+ .flush = error_gen_flush,
+ .fsync = error_gen_fsync,
+ .setxattr = error_gen_setxattr,
+ .getxattr = error_gen_getxattr,
+ .removexattr = error_gen_removexattr,
+ .fsetxattr = error_gen_fsetxattr,
+ .fgetxattr = error_gen_fgetxattr,
+ .fremovexattr = error_gen_fremovexattr,
+ .opendir = error_gen_opendir,
+ .readdir = error_gen_readdir,
+ .readdirp = error_gen_readdirp,
+ .fsyncdir = error_gen_fsyncdir,
+ .access = error_gen_access,
+ .ftruncate = error_gen_ftruncate,
+ .fstat = error_gen_fstat,
+ .lk = error_gen_lk,
+ .xattrop = error_gen_xattrop,
+ .fxattrop = error_gen_fxattrop,
+ .inodelk = error_gen_inodelk,
+ .finodelk = error_gen_finodelk,
+ .entrylk = error_gen_entrylk,
+ .fentrylk = error_gen_fentrylk,
+ .setattr = error_gen_setattr,
+ .fsetattr = error_gen_fsetattr,
+ .getspec = error_gen_getspec,
};
struct volume_options options[] = {
- { .key = {"failure"},
- .type = GF_OPTION_TYPE_INT },
- { .key = {"error-no"},
- .value = {"ENOENT","ENOTDIR","ENAMETOOLONG","EACCES","EBADF",
- "EFAULT","ENOMEM","EINVAL","EIO","EEXIST","ENOSPC",
- "EPERM","EROFS","EBUSY","EISDIR","ENOTEMPTY","EMLINK"
- "ENODEV","EXDEV","EMFILE","ENFILE","ENOSYS","EINTR",
- "EFBIG","EAGAIN"},
- .type = GF_OPTION_TYPE_STR },
- { .key = {"random-failure"},
- .type = GF_OPTION_TYPE_BOOL},
- { .key = {"enable"},
- .type = GF_OPTION_TYPE_STR },
- { .key = {NULL} }
+ {
+ .key = {"failure"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .description = "Percentage failure of operations when enabled.",
+ },
+
+ {
+ .key = {"error-no"},
+ .value = {"ENOENT",
+ "ENOTDIR",
+ "ENAMETOOLONG",
+ "EACCES",
+ "EBADF",
+ "EFAULT",
+ "ENOMEM",
+ "EINVAL",
+ "EIO",
+ "EEXIST",
+ "ENOSPC",
+ "EPERM",
+ "EROFS",
+ "EBUSY",
+ "EISDIR",
+ "ENOTEMPTY",
+ "EMLINK"
+ "ENODEV",
+ "EXDEV",
+ "EMFILE",
+ "ENFILE",
+ "ENOSYS",
+ "EINTR",
+ "EFBIG",
+ "EAGAIN",
+ "GF_ERROR_SHORT_WRITE"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {3},
+ .tags = {"error-gen"},
+ .flags = OPT_FLAG_SETTABLE,
+
+ },
+
+ {
+ .key = {"random-failure"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {3},
+ .tags = {"error-gen"},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+
+ {
+ .key = {"enable", "error-fops"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Accepts a string which takes ',' separated fop "
+ "strings to denote which fops are enabled for error",
+ .op_version = {3},
+ .tags = {"error-gen"},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "error-gen",
+ .category = GF_TECH_PREVIEW,
};
diff --git a/xlators/debug/error-gen/src/error-gen.h b/xlators/debug/error-gen/src/error-gen.h
index bb3adb2ab98..2478cd5b21c 100644
--- a/xlators/debug/error-gen/src/error-gen.h
+++ b/xlators/debug/error-gen/src/error-gen.h
@@ -1,46 +1,49 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _ERROR_GEN_H
#define _ERROR_GEN_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "error-gen-mem-types.h"
#define GF_FAILURE_DEFAULT 10
+/*
+ * Pseudo-errors refer to errors beyond the scope of traditional <-1, op_errno>
+ * returns. This facilitates the ability to return unexpected, but not -1 values
+ * and/or to inject operations that lead to implicit error conditions. The range
+ * for pseudo errors resides at a high value to avoid conflicts with the errno
+ * range.
+ */
+enum GF_PSEUDO_ERRORS {
+ GF_ERROR_SHORT_WRITE = 1000, /* short writev return value */
+ GF_ERROR_MAX
+};
+
typedef struct {
- int enable[GF_FOP_MAXVALUE];
- int op_count;
- int failure_iter_no;
- char *error_no;
- gf_boolean_t random_failure;
- gf_lock_t lock;
+ int enable[GF_FOP_MAXVALUE];
+ int op_count;
+ /*
+ * This is only an iteration number in the random-failure case. For
+ * the normal controlled-probability case, it's actually a numerator
+ * for the failure probability (see FAILURE_GRANULARITY declaration).
+ * It's just not worth blowing up the diff by changing it.
+ */
+ int failure_iter_no;
+ int error_no_int;
+ gf_boolean_t random_failure;
+ gf_lock_t lock;
} eg_t;
typedef struct {
- int error_no_count;
- int error_no[20];
+ int error_no_count;
+ int error_no[20];
} sys_error_t;
#endif
diff --git a/xlators/debug/io-stats/src/Makefile.am b/xlators/debug/io-stats/src/Makefile.am
index b894e79c3fe..c69f3caf0fe 100644
--- a/xlators/debug/io-stats/src/Makefile.am
+++ b/xlators/debug/io-stats/src/Makefile.am
@@ -2,14 +2,18 @@
xlator_LTLIBRARIES = io-stats.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-io_stats_la_LDFLAGS = -module -avoidversion
+io_stats_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
io_stats_la_SOURCES = io-stats.c
io_stats_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = io-stats-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -DDATADIR=\"$(localstatedir)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/io-stats/src/io-stats-mem-types.h b/xlators/debug/io-stats/src/io-stats-mem-types.h
index 2063f6d6a07..51d38d8b97c 100644
--- a/xlators/debug/io-stats/src/io-stats-mem-types.h
+++ b/xlators/debug/io-stats/src/io-stats-mem-types.h
@@ -1,35 +1,27 @@
-
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __IO_STATS_MEM_TYPES_H__
#define __IO_STATS_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
+
+extern const char *__progname;
enum gf_io_stats_mem_types_ {
- gf_io_stats_mt_ios_conf = gf_common_mt_end + 1,
- gf_io_stats_mt_ios_fd,
- gf_io_stats_mt_ios_stat,
- gf_io_stats_mt_ios_stat_list,
- gf_io_stats_mt_end
+ gf_io_stats_mt_ios_conf = gf_common_mt_end + 1,
+ gf_io_stats_mt_ios_fd,
+ gf_io_stats_mt_ios_stat,
+ gf_io_stats_mt_ios_stat_list,
+ gf_io_stats_mt_ios_sample_buf,
+ gf_io_stats_mt_ios_sample,
+ gf_io_stats_mt_end
};
#endif
-
diff --git a/xlators/debug/io-stats/src/io-stats.c b/xlators/debug/io-stats/src/io-stats.c
index 2cf28f1ccff..aa00c446e5a 100644
--- a/xlators/debug/io-stats/src/io-stats.c
+++ b/xlators/debug/io-stats/src/io-stats.c
@@ -1,27 +1,14 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#include "xlator.h"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/xlator.h>
+#include <glusterfs/syscall.h>
/**
* xlators/debug/io_stats :
@@ -30,2859 +17,4464 @@
*
* a) total read data - since process start, last interval and per fd
* b) total write data - since process start, last interval and per fd
- * c) counts of read IO block size - since process start, last interval and per fd
- * d) counts of write IO block size - since process start, last interval and per fd
- * e) counts of all FOP types passing through it
+ * c) counts of read IO block size - since process start, last interval and per
+ * fd d) counts of write IO block size - since process start, last interval and
+ * per fd e) counts of all FOP types passing through it
*
- * Usage: setfattr -n io-stats-dump /tmp/filename /mnt/gluster
+ * Usage: setfattr -n trusted.io-stats-dump /tmp/filename /mnt/gluster
+ * output is written to /tmp/filename.<iostats xlator instance name>
*
*/
#include <fnmatch.h>
#include <errno.h>
-#include "glusterfs.h"
-#include "xlator.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
#include "io-stats-mem-types.h"
#include <stdarg.h>
-#include "defaults.h"
-#include "logging.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/syncop.h>
+#include <pwd.h>
+#include <grp.h>
+#include <glusterfs/upcall-utils.h>
+#include <glusterfs/async.h>
#define MAX_LIST_MEMBERS 100
+#define DEFAULT_PWD_BUF_SZ 16384
+#define DEFAULT_GRP_BUF_SZ 16384
+#define IOS_BLOCK_COUNT_SIZE 32
+
+#define IOS_STATS_DUMP_DIR DEFAULT_VAR_RUN_DIRECTORY
typedef enum {
- IOS_STATS_TYPE_NONE,
- IOS_STATS_TYPE_OPEN,
- IOS_STATS_TYPE_READ,
- IOS_STATS_TYPE_WRITE,
- IOS_STATS_TYPE_OPENDIR,
- IOS_STATS_TYPE_READDIRP,
- IOS_STATS_TYPE_READ_THROUGHPUT,
- IOS_STATS_TYPE_WRITE_THROUGHPUT,
- IOS_STATS_TYPE_MAX
-}ios_stats_type_t;
+ IOS_STATS_TYPE_NONE,
+ IOS_STATS_TYPE_OPEN,
+ IOS_STATS_TYPE_READ,
+ IOS_STATS_TYPE_WRITE,
+ IOS_STATS_TYPE_OPENDIR,
+ IOS_STATS_TYPE_READDIRP,
+ IOS_STATS_TYPE_READ_THROUGHPUT,
+ IOS_STATS_TYPE_WRITE_THROUGHPUT,
+ IOS_STATS_TYPE_MAX
+} ios_stats_type_t;
typedef enum {
- IOS_STATS_THRU_READ,
- IOS_STATS_THRU_WRITE,
- IOS_STATS_THRU_MAX,
-}ios_stats_thru_t;
+ IOS_STATS_THRU_READ,
+ IOS_STATS_THRU_WRITE,
+ IOS_STATS_THRU_MAX,
+} ios_stats_thru_t;
+
+/* This is same as gf1_cli_info_op */
+/* had to be defined here again, so we have modularity between
+ xdr, xlator, and library functions */
+typedef enum ios_info_op {
+ GF_IOS_INFO_NONE = 0,
+ GF_IOS_INFO_ALL = 1,
+ GF_IOS_INFO_INCREMENTAL = 2,
+ GF_IOS_INFO_CUMULATIVE = 3,
+ GF_IOS_INFO_CLEAR = 4,
+} ios_info_op_t;
struct ios_stat_lat {
- struct timeval time;
- double throughput;
+ struct timeval time;
+ double throughput;
};
struct ios_stat {
- gf_lock_t lock;
- uuid_t gfid;
- char *filename;
- uint64_t counters [IOS_STATS_TYPE_MAX];
- struct ios_stat_lat thru_counters [IOS_STATS_THRU_MAX];
- int refcnt;
+ gf_lock_t lock;
+ uuid_t gfid;
+ char *filename;
+ gf_atomic_t counters[IOS_STATS_TYPE_MAX];
+ struct ios_stat_lat thru_counters[IOS_STATS_THRU_MAX];
+ gf_atomic_t refcnt;
};
struct ios_stat_list {
- struct list_head list;
- struct ios_stat *iosstat;
- double value;
+ struct list_head list;
+ struct ios_stat *iosstat;
+ double value;
};
struct ios_stat_head {
- gf_lock_t lock;
- double min_cnt;
- uint64_t members;
- struct ios_stat_list *iosstats;
+ gf_lock_t lock;
+ double min_cnt;
+ uint64_t members;
+ struct ios_stat_list *iosstats;
};
+typedef struct _ios_sample_t {
+ uid_t uid;
+ gid_t gid;
+ char identifier[UNIX_PATH_MAX];
+ glusterfs_fop_t fop_type;
+ struct timeval timestamp;
+ double elapsed;
+} ios_sample_t;
+
+typedef struct _ios_sample_buf_t {
+ uint64_t pos; /* Position in write buffer */
+ uint64_t size; /* Size of ring buffer */
+ uint64_t collected; /* Number of samples we've collected */
+ uint64_t observed; /* Number of FOPs we've observed */
+ ios_sample_t *ios_samples; /* Our list of samples */
+} ios_sample_buf_t;
+
struct ios_lat {
- double min;
- double max;
- double avg;
+ double min;
+ double max;
+ double avg;
+ uint64_t total;
};
struct ios_global_stats {
- uint64_t data_written;
- uint64_t data_read;
- uint64_t block_count_write[32];
- uint64_t block_count_read[32];
- uint64_t fop_hits[GF_FOP_MAXVALUE];
- struct timeval started_at;
- struct ios_lat latency[GF_FOP_MAXVALUE];
- uint64_t nr_opens;
- uint64_t max_nr_opens;
- struct timeval max_openfd_time;
+ gf_atomic_t data_written;
+ gf_atomic_t data_read;
+ gf_atomic_t block_count_write[IOS_BLOCK_COUNT_SIZE];
+ gf_atomic_t block_count_read[IOS_BLOCK_COUNT_SIZE];
+ gf_atomic_t fop_hits[GF_FOP_MAXVALUE];
+ gf_atomic_t upcall_hits[GF_UPCALL_FLAGS_MAXVALUE];
+ time_t started_at;
+ struct ios_lat latency[GF_FOP_MAXVALUE];
+ uint64_t nr_opens;
+ uint64_t max_nr_opens;
+ struct timeval max_openfd_time;
};
+typedef enum {
+ IOS_DUMP_TYPE_NONE = 0,
+ IOS_DUMP_TYPE_FILE = 1,
+ IOS_DUMP_TYPE_DICT = 2,
+ IOS_DUMP_TYPE_JSON_FILE = 3,
+ IOS_DUMP_TYPE_SAMPLES = 4,
+ IOS_DUMP_TYPE_MAX = 5
+} ios_dump_type_t;
struct ios_conf {
- gf_lock_t lock;
- struct ios_global_stats cumulative;
- uint64_t increment;
- struct ios_global_stats incremental;
- gf_boolean_t dump_fd_stats;
- gf_boolean_t count_fop_hits;
- gf_boolean_t measure_latency;
- struct ios_stat_head list[IOS_STATS_TYPE_MAX];
- struct ios_stat_head thru_list[IOS_STATS_THRU_MAX];
+ gf_lock_t lock;
+ struct ios_global_stats cumulative;
+ uint64_t increment;
+ struct ios_global_stats incremental;
+ gf_boolean_t dump_fd_stats;
+ gf_boolean_t count_fop_hits;
+ gf_boolean_t measure_latency;
+ struct ios_stat_head list[IOS_STATS_TYPE_MAX];
+ struct ios_stat_head thru_list[IOS_STATS_THRU_MAX];
+ int32_t ios_dump_interval;
+ pthread_t dump_thread;
+ gf_boolean_t dump_thread_should_die;
+ gf_boolean_t dump_thread_running;
+ gf_lock_t ios_sampling_lock;
+ int32_t ios_sample_interval;
+ int32_t ios_sample_buf_size;
+ ios_sample_buf_t *ios_sample_buf;
+ struct dnscache *dnscache;
+ int32_t ios_dnscache_ttl_sec;
+ /*
+ * What we really need here is just a unique value to keep files
+ * created by this instance distinct from those created by any other.
+ * On the client side this isn't a problem, so we just use the
+ * translator name. On the server side conflicts can occur, so the
+ * volfile-generation code automatically sets this (via an option)
+ * to be the brick path.
+ *
+ * NB While the *field* name has changed, it didn't seem worth changing
+ * all of the cases where "xlator_name" is used as a *variable* name.
+ */
+ char *unique_id;
+ ios_dump_type_t dump_format;
};
-
struct ios_fd {
- char *filename;
- uint64_t data_written;
- uint64_t data_read;
- uint64_t block_count_write[32];
- uint64_t block_count_read[32];
- struct timeval opened_at;
+ char *filename;
+ gf_atomic_t data_written;
+ gf_atomic_t data_read;
+ gf_atomic_t block_count_write[IOS_BLOCK_COUNT_SIZE];
+ gf_atomic_t block_count_read[IOS_BLOCK_COUNT_SIZE];
+ struct timeval opened_at;
};
-typedef enum {
- IOS_DUMP_TYPE_NONE = 0,
- IOS_DUMP_TYPE_FILE = 1,
- IOS_DUMP_TYPE_DICT = 2,
- IOS_DUMP_TYPE_MAX = 3
-} ios_dump_type_t;
-
struct ios_dump_args {
- ios_dump_type_t type;
- union {
- FILE *logfp;
- dict_t *dict;
- } u;
+ ios_dump_type_t type;
+ union {
+ FILE *logfp;
+ dict_t *dict;
+ } u;
};
-typedef int (*block_dump_func) (xlator_t *, struct ios_dump_args*,
- int , int , uint64_t ) ;
+typedef int (*block_dump_func)(xlator_t *, struct ios_dump_args *, int, int,
+ uint64_t);
struct ios_local {
- struct timeval wind_at;
- struct timeval unwind_at;
+ struct timeval wind_at;
+ struct timeval unwind_at;
};
struct volume_options options[];
-inline static int
-is_fop_latency_started (call_frame_t *frame)
-{
- GF_ASSERT (frame);
- struct timeval epoch = {0,};
- return memcmp (&frame->begin, &epoch, sizeof (epoch));
-}
-
-#define END_FOP_LATENCY(frame, op) \
- do { \
- struct ios_conf *conf = NULL; \
- \
- conf = this->private; \
- if (conf && conf->measure_latency) { \
- gettimeofday (&frame->end, NULL); \
- update_ios_latency (conf, frame, GF_FOP_##op); \
- } \
- } while (0)
-
-#define START_FOP_LATENCY(frame) \
- do { \
- struct ios_conf *conf = NULL; \
- \
- conf = this->private; \
- if (conf && conf->measure_latency) { \
- gettimeofday (&frame->begin, NULL); \
- } else { \
- memset (&frame->begin, 0, sizeof (frame->begin));\
- } \
- } while (0)
-
-
-#define BUMP_FOP(op) \
- do { \
- struct ios_conf *conf = NULL; \
- \
- conf = this->private; \
- if (!conf) \
- break; \
- conf->cumulative.fop_hits[GF_FOP_##op]++; \
- conf->incremental.fop_hits[GF_FOP_##op]++; \
- } while (0)
-
-#define UPDATE_PROFILE_STATS(frame, op) \
- do { \
- struct ios_conf *conf = NULL; \
- \
- if (!is_fop_latency_started (frame)) \
- break; \
- conf = this->private; \
- LOCK (&conf->lock); \
- { \
- if (conf && conf->measure_latency && \
- conf->count_fop_hits) { \
- BUMP_FOP(op); \
- gettimeofday (&frame->end, NULL); \
- update_ios_latency (conf, frame, GF_FOP_##op);\
- } \
- } \
- UNLOCK (&conf->lock); \
- } while (0)
-
-#define BUMP_READ(fd, len) \
- do { \
- struct ios_conf *conf = NULL; \
- struct ios_fd *iosfd = NULL; \
- int lb2 = 0; \
- \
- conf = this->private; \
- lb2 = log_base2 (len); \
- ios_fd_ctx_get (fd, this, &iosfd); \
- if (!conf) \
- break; \
- \
- LOCK (&conf->lock); \
- { \
- conf->cumulative.data_read += len; \
- conf->incremental.data_read += len; \
- conf->cumulative.block_count_read[lb2]++; \
- conf->incremental.block_count_read[lb2]++; \
- \
- if (iosfd) { \
- iosfd->data_read += len; \
- iosfd->block_count_read[lb2]++; \
- } \
- } \
- UNLOCK (&conf->lock); \
- } while (0)
-
-
-#define BUMP_WRITE(fd, len) \
- do { \
- struct ios_conf *conf = NULL; \
- struct ios_fd *iosfd = NULL; \
- int lb2 = 0; \
- \
- conf = this->private; \
- lb2 = log_base2 (len); \
- ios_fd_ctx_get (fd, this, &iosfd); \
- if (!conf) \
- break; \
- \
- LOCK (&conf->lock); \
- { \
- conf->cumulative.data_written += len; \
- conf->incremental.data_written += len; \
- conf->cumulative.block_count_write[lb2]++; \
- conf->incremental.block_count_write[lb2]++; \
- \
- if (iosfd) { \
- iosfd->data_written += len; \
- iosfd->block_count_write[lb2]++; \
- } \
- } \
- UNLOCK (&conf->lock); \
- } while (0)
-
-
-#define BUMP_STATS(iosstat, type) \
- do { \
- struct ios_conf *conf = NULL; \
- uint64_t value = 0; \
- \
- conf = this->private; \
- \
- LOCK(&iosstat->lock); \
- { \
- iosstat->counters[type]++; \
- value = iosstat->counters[type]; \
- } \
- UNLOCK (&iosstat->lock); \
- ios_stat_add_to_list (&conf->list[type], \
- value, iosstat); \
- \
- } while (0)
-
-
-#define BUMP_THROUGHPUT(iosstat, type) \
- do { \
- struct ios_conf *conf = NULL; \
- double elapsed; \
- struct timeval *begin, *end; \
- double throughput; \
- int flag = 0; \
- \
- begin = &frame->begin; \
- end = &frame->end; \
- \
- elapsed = (end->tv_sec - begin->tv_sec) * 1e6 \
- + (end->tv_usec - begin->tv_usec); \
- throughput = op_ret / elapsed; \
- \
- conf = this->private; \
- LOCK(&iosstat->lock); \
- { \
- if (iosstat->thru_counters[type].throughput \
- <= throughput) { \
- iosstat->thru_counters[type].throughput = \
- throughput; \
- gettimeofday (&iosstat-> \
- thru_counters[type].time, NULL); \
- flag = 1; \
- } \
- } \
- UNLOCK (&iosstat->lock); \
- if (flag) \
- ios_stat_add_to_list (&conf->thru_list[type], \
- throughput, iosstat); \
- } while (0)
-
-int
-ios_fd_ctx_get (fd_t *fd, xlator_t *this, struct ios_fd **iosfd)
-{
- uint64_t iosfd64 = 0;
- unsigned long iosfdlong = 0;
- int ret = 0;
-
- ret = fd_ctx_get (fd, this, &iosfd64);
- iosfdlong = iosfd64;
- if (ret != -1)
- *iosfd = (void *) iosfdlong;
-
- return ret;
+static int
+is_fop_latency_started(call_frame_t *frame)
+{
+ GF_ASSERT(frame);
+ struct timeval epoch = {
+ 0,
+ };
+ return memcmp(&frame->begin, &epoch, sizeof(epoch));
}
+#define _IOS_SAMP_DIR DEFAULT_LOG_FILE_DIRECTORY "/samples"
+#ifdef GF_LINUX_HOST_OS
+#define _IOS_DUMP_DIR DATADIR "/lib/glusterd/stats"
+#else
+#define _IOS_DUMP_DIR DATADIR "/db/glusterd/stats"
+#endif
+#define END_FOP_LATENCY(frame, op) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ \
+ conf = this->private; \
+ if (conf && conf->measure_latency) { \
+ timespec_now(&frame->end); \
+ update_ios_latency(conf, frame, GF_FOP_##op); \
+ } \
+ } while (0)
+
+#define START_FOP_LATENCY(frame) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ \
+ conf = this->private; \
+ if (conf && conf->measure_latency) { \
+ timespec_now(&frame->begin); \
+ } else { \
+ memset(&frame->begin, 0, sizeof(frame->begin)); \
+ } \
+ } while (0)
+
+#define BUMP_FOP(op) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ \
+ conf = this->private; \
+ if (!conf) \
+ break; \
+ GF_ATOMIC_INC(conf->cumulative.fop_hits[GF_FOP_##op]); \
+ GF_ATOMIC_INC(conf->incremental.fop_hits[GF_FOP_##op]); \
+ } while (0)
+
+#define UPDATE_PROFILE_STATS(frame, op) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ \
+ if (!is_fop_latency_started(frame)) \
+ break; \
+ conf = this->private; \
+ if (conf && conf->measure_latency && conf->count_fop_hits) { \
+ BUMP_FOP(op); \
+ timespec_now(&frame->end); \
+ update_ios_latency(conf, frame, GF_FOP_##op); \
+ } \
+ } while (0)
+
+#define BUMP_THROUGHPUT(iosstat, type) \
+ do { \
+ struct ios_conf *conf = NULL; \
+ double elapsed; \
+ struct timespec *begin, *end; \
+ double throughput; \
+ int flag = 0; \
+ struct timeval tv = { \
+ 0, \
+ }; \
+ \
+ begin = &frame->begin; \
+ end = &frame->end; \
+ \
+ elapsed = gf_tsdiff(begin, end) / 1000.0; \
+ throughput = op_ret / elapsed; \
+ \
+ conf = this->private; \
+ gettimeofday(&tv, NULL); \
+ LOCK(&iosstat->lock); \
+ { \
+ if (iosstat->thru_counters[type].throughput <= throughput) { \
+ iosstat->thru_counters[type].throughput = throughput; \
+ memcpy(&iosstat->thru_counters[type].time, &tv, \
+ sizeof(struct timeval)); \
+ flag = 1; \
+ } \
+ } \
+ UNLOCK(&iosstat->lock); \
+ if (flag) \
+ ios_stat_add_to_list(&conf->thru_list[type], throughput, iosstat); \
+ } while (0)
-int
-ios_fd_ctx_set (fd_t *fd, xlator_t *this, struct ios_fd *iosfd)
+static int
+ios_fd_ctx_get(fd_t *fd, xlator_t *this, struct ios_fd **iosfd)
{
- uint64_t iosfd64 = 0;
- int ret = 0;
+ uint64_t iosfd64 = 0;
+ unsigned long iosfdlong = 0;
+ int ret = 0;
- iosfd64 = (unsigned long) iosfd;
- ret = fd_ctx_set (fd, this, iosfd64);
+ ret = fd_ctx_get(fd, this, &iosfd64);
+ iosfdlong = iosfd64;
+ if (ret != -1)
+ *iosfd = (void *)iosfdlong;
- return ret;
+ return ret;
}
-int
-ios_stat_ref (struct ios_stat *iosstat)
+static int
+ios_fd_ctx_set(fd_t *fd, xlator_t *this, struct ios_fd *iosfd)
{
- LOCK (&iosstat->lock);
- {
- iosstat->refcnt++;
+ uint64_t iosfd64 = 0;
+ int ret = 0;
+
+ iosfd64 = (unsigned long)iosfd;
+ ret = fd_ctx_set(fd, this, iosfd64);
+
+ return ret;
+}
+
+static int
+ios_stat_ref(struct ios_stat *iosstat)
+{
+ uint64_t refcnt = 0;
+ refcnt = GF_ATOMIC_INC(iosstat->refcnt);
+
+ return refcnt;
+}
+
+static int
+ios_stat_unref(struct ios_stat *iosstat)
+{
+ int cleanup = 0;
+ uint64_t refcnt = 0;
+
+ refcnt = GF_ATOMIC_DEC(iosstat->refcnt);
+ if (refcnt == 0) {
+ if (iosstat->filename) {
+ GF_FREE(iosstat->filename);
+ iosstat->filename = NULL;
}
- UNLOCK (&iosstat->lock);
+ cleanup = 1;
+ }
- return iosstat->refcnt;
+ if (cleanup) {
+ LOCK_DESTROY(&iosstat->lock);
+ GF_FREE(iosstat);
+ iosstat = NULL;
+ }
+
+ return 0;
}
-int
-ios_stat_unref (struct ios_stat *iosstat)
+static int
+ios_stat_add_to_list(struct ios_stat_head *list_head, uint64_t value,
+ struct ios_stat *iosstat)
{
- int cleanup = 0;
- LOCK (&iosstat->lock);
+ struct ios_stat_list *new = NULL;
+ struct ios_stat_list *entry = NULL;
+ struct ios_stat_list *t = NULL;
+ struct ios_stat_list *list_entry = NULL;
+ struct ios_stat_list *tmp = NULL;
+ struct ios_stat_list *last = NULL;
+ struct ios_stat *stat = NULL;
+ int cnt = 0;
+ int found = 0;
+ int reposition = 0;
+ double min_count = 0;
+
+ LOCK(&list_head->lock);
+ {
+ if (list_head->min_cnt == 0)
+ list_head->min_cnt = value;
+ if ((list_head->members == MAX_LIST_MEMBERS) &&
+ (list_head->min_cnt > value))
+ goto out;
+
+ list_for_each_entry_safe(entry, t, &list_head->iosstats->list, list)
{
- iosstat->refcnt--;
- if (iosstat->refcnt == 0) {
- if (iosstat->filename) {
- GF_FREE (iosstat->filename);
- iosstat->filename = NULL;
- }
- cleanup = 1;
+ cnt++;
+ if (cnt == list_head->members)
+ last = entry;
+
+ if (!gf_uuid_compare(iosstat->gfid, entry->iosstat->gfid)) {
+ list_entry = entry;
+ found = cnt;
+ entry->value = value;
+ if (!reposition) {
+ if (cnt == list_head->members)
+ list_head->min_cnt = value;
+ goto out;
}
+ break;
+ } else if (entry->value <= value && !reposition) {
+ reposition = cnt;
+ tmp = entry;
+ if (cnt == list_head->members - 1)
+ min_count = entry->value;
+ }
}
- UNLOCK (&iosstat->lock);
-
- if (cleanup) {
- GF_FREE (iosstat);
- iosstat = NULL;
+ if (found) {
+ list_del(&list_entry->list);
+ list_add_tail(&list_entry->list, &tmp->list);
+ if (min_count)
+ list_head->min_cnt = min_count;
+ goto out;
+ } else if (list_head->members == MAX_LIST_MEMBERS && reposition) {
+ new = GF_CALLOC(1, sizeof(*new), gf_io_stats_mt_ios_stat_list);
+ new->iosstat = iosstat;
+ new->value = value;
+ ios_stat_ref(iosstat);
+ list_add_tail(&new->list, &tmp->list);
+ if (last) {
+ stat = last->iosstat;
+ last->iosstat = NULL;
+ ios_stat_unref(stat);
+ list_del(&last->list);
+ GF_FREE(last);
+ }
+ if (reposition == MAX_LIST_MEMBERS)
+ list_head->min_cnt = value;
+ else if (min_count) {
+ list_head->min_cnt = min_count;
+ }
+ } else if (list_head->members < MAX_LIST_MEMBERS) {
+ new = GF_CALLOC(1, sizeof(*new), gf_io_stats_mt_ios_stat_list);
+ new->iosstat = iosstat;
+ new->value = value;
+ ios_stat_ref(iosstat);
+ if (reposition) {
+ list_add_tail(&new->list, &tmp->list);
+ } else {
+ list_add_tail(&new->list, &entry->list);
+ }
+ list_head->members++;
+ if (list_head->min_cnt > value)
+ list_head->min_cnt = value;
}
+ }
+out:
+ UNLOCK(&list_head->lock);
+ return 0;
+}
- return 0;
+static void
+ios_bump_read(xlator_t *this, fd_t *fd, size_t len)
+{
+ struct ios_conf *conf = NULL;
+ struct ios_fd *iosfd = NULL;
+ int lb2 = 0;
+
+ conf = this->private;
+ lb2 = log_base2(len);
+ ios_fd_ctx_get(fd, this, &iosfd);
+ if (!conf)
+ return;
+
+ GF_ATOMIC_ADD(conf->cumulative.data_read, len);
+ GF_ATOMIC_ADD(conf->incremental.data_read, len);
+ GF_ATOMIC_INC(conf->cumulative.block_count_read[lb2]);
+ GF_ATOMIC_INC(conf->incremental.block_count_read[lb2]);
+
+ if (iosfd) {
+ GF_ATOMIC_ADD(iosfd->data_read, len);
+ GF_ATOMIC_INC(iosfd->block_count_read[lb2]);
+ }
}
-int
-ios_inode_ctx_set (inode_t *inode, xlator_t *this, struct ios_stat *iosstat)
+static void
+ios_bump_write(xlator_t *this, fd_t *fd, size_t len)
{
- uint64_t iosstat64 = 0;
- int ret = 0;
+ struct ios_conf *conf = NULL;
+ struct ios_fd *iosfd = NULL;
+ int lb2 = 0;
+
+ conf = this->private;
+ lb2 = log_base2(len);
+ ios_fd_ctx_get(fd, this, &iosfd);
+ if (!conf)
+ return;
- ios_stat_ref (iosstat);
- iosstat64 = (unsigned long )iosstat;
- ret = inode_ctx_put (inode, this, iosstat64);
- return ret;
+ GF_ATOMIC_ADD(conf->cumulative.data_written, len);
+ GF_ATOMIC_ADD(conf->incremental.data_written, len);
+ GF_ATOMIC_INC(conf->cumulative.block_count_write[lb2]);
+ GF_ATOMIC_INC(conf->incremental.block_count_write[lb2]);
+
+ if (iosfd) {
+ GF_ATOMIC_ADD(iosfd->data_written, len);
+ GF_ATOMIC_INC(iosfd->block_count_write[lb2]);
+ }
}
-int
-ios_inode_ctx_get (inode_t *inode, xlator_t *this, struct ios_stat **iosstat)
+static void
+ios_bump_upcall(xlator_t *this, gf_upcall_flags_t event)
{
- uint64_t iosstat64 = 0;
- unsigned long iosstatlong = 0;
- int ret = 0;
+ struct ios_conf *conf = NULL;
+
+ conf = this->private;
+ if (!conf)
+ return;
+ if (conf->count_fop_hits) {
+ GF_ATOMIC_INC(conf->cumulative.upcall_hits[event]);
+ GF_ATOMIC_INC(conf->incremental.upcall_hits[event]);
+ }
+}
- ret = inode_ctx_get (inode, this, &iosstat64);
- iosstatlong = iosstat64;
- if (ret != -1)
- *iosstat = (void *) iosstatlong;
+static void
+ios_bump_stats(xlator_t *this, struct ios_stat *iosstat, ios_stats_type_t type)
+{
+ struct ios_conf *conf = NULL;
+ uint64_t value = 0;
- return ret;
+ conf = this->private;
+ value = GF_ATOMIC_INC(iosstat->counters[type]);
+ ios_stat_add_to_list(&conf->list[type], value, iosstat);
}
int
-ios_stat_add_to_list (struct ios_stat_head *list_head, uint64_t value,
- struct ios_stat *iosstat)
+ios_inode_ctx_set(inode_t *inode, xlator_t *this, struct ios_stat *iosstat)
{
- struct ios_stat_list *new = NULL;
- struct ios_stat_list *entry = NULL;
- struct ios_stat_list *t = NULL;
- struct ios_stat_list *list_entry = NULL;
- struct ios_stat_list *tmp = NULL;
- struct ios_stat_list *last = NULL;
- struct ios_stat *stat = NULL;
- int cnt = 0;
- int found = 0;
- int reposition = 0;
- double min_count = 0;
+ uint64_t iosstat64 = 0;
+ int ret = 0;
- LOCK (&list_head->lock);
- {
+ ios_stat_ref(iosstat);
+ iosstat64 = (unsigned long)iosstat;
+ ret = inode_ctx_put(inode, this, iosstat64);
+ return ret;
+}
- if (list_head->min_cnt == 0)
- list_head->min_cnt = value;
- if ((list_head->members == MAX_LIST_MEMBERS) &&
- (list_head->min_cnt > value))
- goto out;
-
- list_for_each_entry_safe (entry, t,
- &list_head->iosstats->list, list) {
- cnt++;
- if (cnt == list_head->members)
- last = entry;
-
- if (!uuid_compare (iosstat->gfid,
- entry->iosstat->gfid)) {
- list_entry = entry;
- found = cnt;
- entry->value = value;
- if (!reposition) {
- if (cnt == list_head->members)
- list_head->min_cnt = value;
- goto out;
- }
- break;
- } else if (entry->value <= value && !reposition) {
- reposition = cnt;
- tmp = entry;
- if (cnt == list_head->members - 1)
- min_count = entry->value;
- }
- }
- if (found) {
- list_del (&list_entry->list);
- list_add_tail (&list_entry->list, &tmp->list);
- if (min_count)
- list_head->min_cnt = min_count;
- goto out;
- } else if (list_head->members == MAX_LIST_MEMBERS && reposition) {
- new = GF_CALLOC (1, sizeof (*new),
- gf_io_stats_mt_ios_stat_list);
- new->iosstat = iosstat;
- new->value = value;
- ios_stat_ref (iosstat);
- list_add_tail (&new->list, &tmp->list);
- stat = last->iosstat;
- last->iosstat = NULL;
- ios_stat_unref (stat);
- list_del (&last->list);
- GF_FREE (last);
- if (reposition == MAX_LIST_MEMBERS)
- list_head->min_cnt = value;
- else if (min_count) {
- list_head->min_cnt = min_count;
- }
- } else if (list_head->members < MAX_LIST_MEMBERS) {
- new = GF_CALLOC (1, sizeof (*new),
- gf_io_stats_mt_ios_stat_list);
- new->iosstat = iosstat;
- new->value = value;
- ios_stat_ref (iosstat);
- if (reposition) {
- list_add_tail (&new->list, &tmp->list);
- } else {
- list_add_tail (&new->list, &entry->list);
- }
- list_head->members++;
- if (list_head->min_cnt > value)
- list_head->min_cnt = value;
- }
- }
-out:
- UNLOCK (&list_head->lock);
- return 0;
+int
+ios_inode_ctx_get(inode_t *inode, xlator_t *this, struct ios_stat **iosstat)
+{
+ uint64_t iosstat64 = 0;
+ unsigned long iosstatlong = 0;
+ int ret = 0;
+
+ ret = inode_ctx_get(inode, this, &iosstat64);
+ iosstatlong = iosstat64;
+ if (ret != -1)
+ *iosstat = (void *)iosstatlong;
+
+ return ret;
+}
+
+/*
+ * So why goto all this trouble? Why not just queue up some samples in
+ * a big list and malloc away? Well malloc is expensive relative
+ * to what we are measuring, so cannot have any malloc's (or worse
+ * callocs) in our measurement code paths. Instead, we are going to
+ * pre-allocate a circular buffer and collect a maximum number of samples.
+ * Prior to dumping them all we'll create a new buffer and swap the
+ * old buffer with the new, and then proceed to dump the statistics
+ * in our dump thread.
+ *
+ */
+ios_sample_buf_t *
+ios_create_sample_buf(size_t buf_size)
+{
+ ios_sample_buf_t *ios_sample_buf = NULL;
+ ios_sample_t *ios_samples = NULL;
+
+ ios_sample_buf = GF_CALLOC(1, sizeof(*ios_sample_buf),
+ gf_io_stats_mt_ios_sample_buf);
+ if (!ios_sample_buf)
+ goto err;
+
+ ios_samples = GF_CALLOC(buf_size, sizeof(*ios_samples),
+ gf_io_stats_mt_ios_sample);
+
+ if (!ios_samples)
+ goto err;
+
+ ios_sample_buf->ios_samples = ios_samples;
+ ios_sample_buf->size = buf_size;
+ ios_sample_buf->pos = 0;
+ ios_sample_buf->observed = 0;
+ ios_sample_buf->collected = 0;
+
+ return ios_sample_buf;
+err:
+ GF_FREE(ios_sample_buf);
+ return NULL;
+}
+
+void
+ios_destroy_sample_buf(ios_sample_buf_t *ios_sample_buf)
+{
+ GF_FREE(ios_sample_buf->ios_samples);
+ GF_FREE(ios_sample_buf);
}
-inline int
-ios_stats_cleanup (xlator_t *this, inode_t *inode)
+static int
+ios_init_sample_buf(struct ios_conf *conf)
{
+ int32_t ret = -1;
+
+ GF_ASSERT(conf);
+ LOCK(&conf->lock);
+ conf->ios_sample_buf = ios_create_sample_buf(conf->ios_sample_buf_size);
+ if (!conf->ios_sample_buf)
+ goto out;
+ ret = 0;
+out:
+ UNLOCK(&conf->lock);
+ return ret;
+}
- struct ios_stat *iosstat = NULL;
- uint64_t iosstat64 = 0;
+static int
+ios_stats_cleanup(xlator_t *this, inode_t *inode)
+{
+ struct ios_stat *iosstat = NULL;
+ uint64_t iosstat64 = 0;
- inode_ctx_del (inode, this, &iosstat64);
- if (!iosstat64) {
- gf_log (this->name, GF_LOG_WARNING,
- "could not get inode ctx");
- return 0;
- }
- iosstat = (void *) (long)iosstat64;
- if (iosstat) {
- ios_stat_unref (iosstat);
- }
+ inode_ctx_del(inode, this, &iosstat64);
+ if (!iosstat64) {
+ gf_log(this->name, GF_LOG_WARNING, "could not get inode ctx");
return 0;
+ }
+ iosstat = (void *)(long)iosstat64;
+ if (iosstat) {
+ ios_stat_unref(iosstat);
+ }
+ return 0;
}
-#define ios_log(this, logfp, fmt ...) \
- do { \
- if (logfp) { \
- fprintf (logfp, fmt); \
- fprintf (logfp, "\n"); \
- } \
- gf_log (this->name, GF_LOG_INFO, fmt); \
- } while (0)
+#define ios_log(this, logfp, fmt...) \
+ do { \
+ if (logfp) { \
+ fprintf(logfp, fmt); \
+ fprintf(logfp, "\n"); \
+ } \
+ gf_log(this->name, GF_LOG_DEBUG, fmt); \
+ } while (0)
int
-ios_dump_file_stats (struct ios_stat_head *list_head, xlator_t *this, FILE* logfp)
+ios_dump_file_stats(struct ios_stat_head *list_head, xlator_t *this,
+ FILE *logfp)
{
- struct ios_stat_list *entry = NULL;
+ struct ios_stat_list *entry = NULL;
- LOCK (&list_head->lock);
+ LOCK(&list_head->lock);
+ {
+ list_for_each_entry(entry, &list_head->iosstats->list, list)
{
- list_for_each_entry (entry, &list_head->iosstats->list, list) {
- ios_log (this, logfp, "%-12.0f %s",
- entry->value, entry->iosstat->filename);
- }
+ ios_log(this, logfp, "%-12.0f %s", entry->value,
+ entry->iosstat->filename);
}
- UNLOCK (&list_head->lock);
- return 0;
+ }
+ UNLOCK(&list_head->lock);
+ return 0;
}
int
-ios_dump_throughput_stats (struct ios_stat_head *list_head, xlator_t *this,
- FILE* logfp, ios_stats_type_t type)
+ios_dump_throughput_stats(struct ios_stat_head *list_head, xlator_t *this,
+ FILE *logfp, ios_stats_thru_t type)
{
- struct ios_stat_list *entry = NULL;
- struct timeval time = {0, };
- char timestr[256] = {0, };
-
- LOCK (&list_head->lock);
+ struct ios_stat_list *entry = NULL;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+
+ LOCK(&list_head->lock);
+ {
+ list_for_each_entry(entry, &list_head->iosstats->list, list)
{
- list_for_each_entry (entry, &list_head->iosstats->list, list) {
- gf_time_fmt (timestr, sizeof timestr,
- entry->iosstat->thru_counters[type].time.tv_sec,
- gf_timefmt_FT);
- snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
- ".%"GF_PRI_SUSECONDS, time.tv_usec);
-
- ios_log (this, logfp, "%s \t %-10.2f \t %s",
- timestr, entry->value, entry->iosstat->filename);
- }
+ gf_time_fmt_tv(timestr, sizeof timestr,
+ &entry->iosstat->thru_counters[type].time,
+ gf_timefmt_FT);
+
+ ios_log(this, logfp, "%s \t %-10.2f \t %s", timestr, entry->value,
+ entry->iosstat->filename);
}
- UNLOCK (&list_head->lock);
- return 0;
+ }
+ UNLOCK(&list_head->lock);
+ return 0;
}
int
-io_stats_dump_global_to_logfp (xlator_t *this, struct ios_global_stats *stats,
- struct timeval *now, int interval, FILE* logfp)
+_io_stats_get_key_prefix(xlator_t *this, char **key_prefix)
{
- int i = 0;
- int per_line = 0;
- int index = 0;
- struct ios_stat_head *list_head = NULL;
- struct ios_conf *conf = NULL;
- char timestr[256] = {0, };
- char str_header[128] = {0};
- char str_read[128] = {0};
- char str_write[128] = {0};
-
- conf = this->private;
+ char *key_root = "gluster";
+ char *xlator_name = NULL;
+ char *instance_name = NULL;
+ size_t key_len = 0;
+ int bytes_written = 0;
+ int i = 0;
+ int ret = 0;
+ struct ios_conf *conf = this->private;
+
+ xlator_name = strdupa(conf->unique_id);
+ for (i = 0; i < strlen(xlator_name); i++) {
+ if (xlator_name[i] == '/')
+ xlator_name[i] = '_';
+ }
+
+ instance_name = this->instance_name;
+ if (this->name && strcmp(this->name, "glustershd") == 0) {
+ xlator_name = "shd";
+ } else if (this->prev && strcmp(this->prev->name, "nfs-server") == 0) {
+ xlator_name = "nfsd";
+ if (this->prev->instance_name)
+ instance_name = strdupa(this->prev->instance_name);
+ }
+
+ if (strcmp(__progname, "glusterfsd") == 0)
+ key_root = "gluster.brick";
+
+ if (instance_name) {
+ /* +3 for 2 x "." + NULL */
+ key_len = strlen(key_root) + strlen(xlator_name) +
+ strlen(instance_name) + 3;
+ *key_prefix = GF_CALLOC(key_len, sizeof(char), gf_common_mt_char);
+ if (!*key_prefix) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ bytes_written = snprintf(*key_prefix, key_len, "%s.%s.%s", key_root,
+ xlator_name, instance_name);
+ if (bytes_written != key_len - 1) {
+ ret = -EINVAL;
+ goto err;
+ }
+ } else {
+ /* +2 for 1 x "." + NULL */
+ key_len = strlen(key_root) + strlen(xlator_name) + 2;
+ *key_prefix = GF_CALLOC(key_len, sizeof(char), gf_common_mt_char);
+ if (!*key_prefix) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ bytes_written = snprintf(*key_prefix, key_len, "%s.%s", key_root,
+ xlator_name);
+ if (bytes_written != key_len - 1) {
+ ret = -EINVAL;
+ goto err;
+ }
+ }
+ return 0;
+err:
+ GF_FREE(*key_prefix);
+ *key_prefix = NULL;
+ return ret;
+}
- if (interval == -1)
- ios_log (this, logfp, "\n=== Cumulative stats ===");
- else
- ios_log (this, logfp, "\n=== Interval %d stats ===",
- interval);
- ios_log (this, logfp, " Duration : %"PRId64" secs",
- (uint64_t) (now->tv_sec - stats->started_at.tv_sec));
- ios_log (this, logfp, " BytesRead : %"PRId64,
- stats->data_read);
- ios_log (this, logfp, " BytesWritten : %"PRId64"\n",
- stats->data_written);
-
- snprintf (str_header, sizeof (str_header), "%-12s %c", "Block Size", ':');
- snprintf (str_read, sizeof (str_read), "%-12s %c", "Read Count", ':');
- snprintf (str_write, sizeof (str_write), "%-12s %c", "Write Count", ':');
- index = 14;
- for (i = 0; i < 32; i++) {
- if ((stats->block_count_read[i] == 0) &&
- (stats->block_count_write[i] == 0))
- continue;
- per_line++;
-
- snprintf (str_header+index, sizeof (str_header)-index,
- "%16dB+", (1<<i));
- if (stats->block_count_read[i])
- snprintf (str_read+index, sizeof (str_read)-index,
- "%18"PRId64, stats->block_count_read[i]);
- else snprintf (str_read+index, sizeof (str_read)-index,
- "%18s", "0");
- if (stats->block_count_write[i])
- snprintf (str_write+index, sizeof (str_write)-index,
- "%18"PRId64, stats->block_count_write[i]);
- else snprintf (str_write+index, sizeof (str_write)-index,
- "%18s", "0");
-
- index += 18;
- if (per_line == 3) {
- ios_log (this, logfp, "%s", str_header);
- ios_log (this, logfp, "%s", str_read);
- ios_log (this, logfp, "%s\n", str_write);
-
- memset (str_header, 0, sizeof (str_header));
- memset (str_read, 0, sizeof (str_read));
- memset (str_write, 0, sizeof (str_write));
-
- snprintf (str_header, sizeof (str_header), "%-12s %c",
- "Block Size", ':');
- snprintf (str_read, sizeof (str_read), "%-12s %c",
- "Read Count", ':');
- snprintf (str_write, sizeof (str_write), "%-12s %c",
- "Write Count", ':');
-
- index = 14;
- per_line = 0;
- }
+int
+io_stats_dump_global_to_json_logfp(xlator_t *this,
+ struct ios_global_stats *stats, time_t now,
+ int interval, FILE *logfp)
+{
+ int i = 0;
+ int j = 0;
+ struct ios_conf *conf = NULL;
+ char *key_prefix = NULL;
+ char *str_prefix = NULL;
+ char *lc_fop_name = NULL;
+ int ret = 1; /* Default to error */
+ int rw_size;
+ char *rw_unit = NULL;
+ uint64_t fop_hits;
+ float fop_lat_ave;
+ float fop_lat_min;
+ float fop_lat_max;
+ double interval_sec;
+ double fop_ave_usec = 0.0;
+ double fop_ave_usec_sum = 0.0;
+ double weighted_fop_ave_usec = 0.0;
+ double weighted_fop_ave_usec_sum = 0.0;
+ long total_fop_hits = 0;
+ loc_t unused_loc = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+
+ interval_sec = (double)(now - stats->started_at);
+
+ conf = this->private;
+
+ ret = _io_stats_get_key_prefix(this, &key_prefix);
+ if (ret) {
+ goto out;
+ }
+
+ if (interval == -1) {
+ str_prefix = "aggr";
+
+ } else {
+ str_prefix = "inter";
+ }
+ ios_log(this, logfp, "{");
+
+ for (i = 0; i < 31; i++) {
+ rw_size = (1 << i);
+ if (rw_size >= 1024 * 1024) {
+ rw_size = rw_size / (1024 * 1024);
+ rw_unit = "mb";
+ } else if (rw_size >= 1024) {
+ rw_size = rw_size / 1024;
+ rw_unit = "kb";
+ } else {
+ rw_unit = "b";
}
- if (per_line != 0) {
- ios_log (this, logfp, "%s", str_header);
- ios_log (this, logfp, "%s", str_read);
- ios_log (this, logfp, "%s\n", str_write);
+ if (interval == -1) {
+ ios_log(this, logfp, "\"%s.%s.read_%d%s\": %" GF_PRI_ATOMIC ",",
+ key_prefix, str_prefix, rw_size, rw_unit,
+ GF_ATOMIC_GET(stats->block_count_read[i]));
+ ios_log(this, logfp, "\"%s.%s.write_%d%s\": %" GF_PRI_ATOMIC ",",
+ key_prefix, str_prefix, rw_size, rw_unit,
+ GF_ATOMIC_GET(stats->block_count_write[i]));
+ } else {
+ ios_log(this, logfp, "\"%s.%s.read_%d%s_per_sec\": %0.2lf,",
+ key_prefix, str_prefix, rw_size, rw_unit,
+ (double)(GF_ATOMIC_GET(stats->block_count_read[i]) /
+ interval_sec));
+ ios_log(this, logfp, "\"%s.%s.write_%d%s_per_sec\": %0.2lf,",
+ key_prefix, str_prefix, rw_size, rw_unit,
+ (double)(GF_ATOMIC_GET(stats->block_count_write[i]) /
+ interval_sec));
+ }
+ }
+
+ if (interval == -1) {
+ ios_log(this, logfp, "\"%s.%s.fds.open_count\": %" PRId64 ",",
+ key_prefix, str_prefix, conf->cumulative.nr_opens);
+ ios_log(this, logfp, "\"%s.%s.fds.max_open_count\": %" PRId64 ",",
+ key_prefix, str_prefix, conf->cumulative.max_nr_opens);
+ }
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ lc_fop_name = strdupa(gf_fop_list[i]);
+ for (j = 0; lc_fop_name[j]; j++) {
+ lc_fop_name[j] = tolower(lc_fop_name[j]);
}
- ios_log (this, logfp, "%-13s %10s %14s %14s %14s", "Fop",
- "Call Count", "Avg-Latency", "Min-Latency",
- "Max-Latency");
- ios_log (this, logfp, "%-13s %10s %14s %14s %14s", "---", "----------",
- "-----------", "-----------", "-----------");
-
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- if (stats->fop_hits[i] && !stats->latency[i].avg)
- ios_log (this, logfp, "%-13s %10"PRId64" %11s "
- "us %11s us %11s us", gf_fop_list[i],
- stats->fop_hits[i], "0", "0", "0");
- else if (stats->fop_hits[i] && stats->latency[i].avg)
- ios_log (this, logfp, "%-13s %10"PRId64" %11.2lf us "
- "%11.2lf us %11.2lf us", gf_fop_list[i],
- stats->fop_hits[i], stats->latency[i].avg,
- stats->latency[i].min, stats->latency[i].max);
+ fop_hits = GF_ATOMIC_GET(stats->fop_hits[i]);
+ fop_lat_ave = 0.0;
+ fop_lat_min = 0.0;
+ fop_lat_max = 0.0;
+ if (fop_hits) {
+ if (stats->latency[i].avg) {
+ fop_lat_ave = stats->latency[i].avg;
+ fop_lat_min = stats->latency[i].min;
+ fop_lat_max = stats->latency[i].max;
+ }
+ }
+ if (interval == -1) {
+ ios_log(this, logfp, "\"%s.%s.fop.%s.count\": %" GF_PRI_ATOMIC ",",
+ key_prefix, str_prefix, lc_fop_name, fop_hits);
+ } else {
+ ios_log(this, logfp, "\"%s.%s.fop.%s.per_sec\": %0.2lf,",
+ key_prefix, str_prefix, lc_fop_name,
+ (double)(fop_hits / interval_sec));
}
- ios_log (this, logfp, "------ ----- ----- ----- ----- ----- ----- ----- "
- " ----- ----- ----- -----\n");
+ ios_log(this, logfp, "\"%s.%s.fop.%s.latency_ave_usec\": %0.2lf,",
+ key_prefix, str_prefix, lc_fop_name, fop_lat_ave);
+ ios_log(this, logfp, "\"%s.%s.fop.%s.latency_min_usec\": %0.2lf,",
+ key_prefix, str_prefix, lc_fop_name, fop_lat_min);
+ ios_log(this, logfp, "\"%s.%s.fop.%s.latency_max_usec\": %0.2lf,",
+ key_prefix, str_prefix, lc_fop_name, fop_lat_max);
+
+ fop_ave_usec_sum += fop_lat_ave;
+ weighted_fop_ave_usec_sum += fop_hits * fop_lat_ave;
+ total_fop_hits += fop_hits;
+ }
+
+ if (total_fop_hits) {
+ weighted_fop_ave_usec = weighted_fop_ave_usec_sum / total_fop_hits;
+ /* Extra key that does not print out an entry w/ 0.00 for
+ * intervals with no data
+ */
+ ios_log(this, logfp,
+ "\"%s.%s.fop.weighted_latency_ave_usec_nozerofill\": "
+ "%0.4lf,",
+ key_prefix, str_prefix, weighted_fop_ave_usec);
+ }
+ ios_log(this, logfp, "\"%s.%s.fop.weighted_latency_ave_usec\": %0.4lf,",
+ key_prefix, str_prefix, weighted_fop_ave_usec);
+ ios_log(this, logfp, "\"%s.%s.fop.weighted_fop_count\": %ld,", key_prefix,
+ str_prefix, total_fop_hits);
+
+ fop_ave_usec = fop_ave_usec_sum / GF_FOP_MAXVALUE;
+ ios_log(this, logfp, "\"%s.%s.fop.unweighted_latency_ave_usec\":%0.4lf,",
+ key_prefix, str_prefix, fop_ave_usec);
+
+ for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) {
+ lc_fop_name = strdupa(gf_upcall_list[i]);
+ for (j = 0; lc_fop_name[j]; j++) {
+ lc_fop_name[j] = tolower(lc_fop_name[j]);
+ }
+ fop_hits = GF_ATOMIC_GET(stats->upcall_hits[i]);
if (interval == -1) {
- LOCK (&conf->lock);
- {
- gf_time_fmt (timestr, sizeof timestr,
- conf->cumulative.max_openfd_time.tv_sec,
- gf_timefmt_FT);
- snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
- ".%"GF_PRI_SUSECONDS,
- conf->cumulative.max_openfd_time.tv_usec);
- ios_log (this, logfp, "Current open fd's: %"PRId64
- " Max open fd's: %"PRId64" time %s",
- conf->cumulative.nr_opens,
- conf->cumulative.max_nr_opens, timestr);
- }
- UNLOCK (&conf->lock);
- ios_log (this, logfp, "\n==========Open File Stats========");
- ios_log (this, logfp, "\nCOUNT: \t FILE NAME");
- list_head = &conf->list[IOS_STATS_TYPE_OPEN];
- ios_dump_file_stats (list_head, this, logfp);
-
-
- ios_log (this, logfp, "\n==========Read File Stats========");
- ios_log (this, logfp, "\nCOUNT: \t FILE NAME");
- list_head = &conf->list[IOS_STATS_TYPE_READ];
- ios_dump_file_stats (list_head, this, logfp);
-
- ios_log (this, logfp, "\n==========Write File Stats========");
- ios_log (this, logfp, "\nCOUNT: \t FILE NAME");
- list_head = &conf->list[IOS_STATS_TYPE_WRITE];
- ios_dump_file_stats (list_head, this, logfp);
-
- ios_log (this, logfp, "\n==========Directory open stats========");
- ios_log (this, logfp, "\nCOUNT: \t DIRECTORY NAME");
- list_head = &conf->list[IOS_STATS_TYPE_OPENDIR];
- ios_dump_file_stats (list_head, this, logfp);
-
- ios_log (this, logfp, "\n========Directory readdirp Stats=======");
- ios_log (this, logfp, "\nCOUNT: \t DIRECTORY NAME");
- list_head = &conf->list[IOS_STATS_TYPE_READDIRP];
- ios_dump_file_stats (list_head, this, logfp);
-
- ios_log (this, logfp, "\n========Read Throughput File Stats=====");
- ios_log (this, logfp, "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
- "\tFILE NAME");
- list_head = &conf->thru_list[IOS_STATS_THRU_READ];
- ios_dump_throughput_stats(list_head, this, logfp, IOS_STATS_THRU_READ);
-
- ios_log (this, logfp, "\n======Write Throughput File Stats======");
- ios_log (this, logfp, "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
- "\tFILE NAME");
- list_head = &conf->thru_list[IOS_STATS_THRU_WRITE];
- ios_dump_throughput_stats (list_head, this, logfp, IOS_STATS_THRU_WRITE);
+ ios_log(this, logfp, "\"%s.%s.fop.%s.count\": %" GF_PRI_ATOMIC ",",
+ key_prefix, str_prefix, lc_fop_name, fop_hits);
+ } else {
+ ios_log(this, logfp, "\"%s.%s.fop.%s.per_sec\": %0.2lf,",
+ key_prefix, str_prefix, lc_fop_name,
+ (double)(fop_hits / interval_sec));
}
- return 0;
+ }
+
+ ret = syncop_getxattr(this, &unused_loc, &xattr, IO_THREADS_QUEUE_SIZE_KEY,
+ NULL, NULL);
+ if (xattr) {
+ /*
+ * Iterate over the dictionary returned to us by io-threads and
+ * dump the results to the stats file.
+ */
+ data_pair_t *curr = NULL;
+
+ dict_foreach_inline(xattr, curr)
+ {
+ ios_log(this, logfp, "\"%s.%s.%s.queue_size\": %d,", key_prefix,
+ str_prefix, curr->key, data_to_int32(curr->value));
+ }
+
+ /* Free the dictionary */
+ dict_unref(xattr);
+ } else {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Unable to get queue size counts from "
+ "the io-threads translator!");
+ }
+
+ if (interval == -1) {
+ ios_log(this, logfp, "\"%s.%s.uptime\": %" PRIu64 ",", key_prefix,
+ str_prefix, (uint64_t)(now - stats->started_at));
+ ios_log(this, logfp,
+ "\"%s.%s.bytes_read\": "
+ "%" GF_PRI_ATOMIC ",",
+ key_prefix, str_prefix, GF_ATOMIC_GET(stats->data_read));
+ ios_log(this, logfp,
+ "\"%s.%s.bytes_written\": "
+ "%" GF_PRI_ATOMIC "",
+ key_prefix, str_prefix, GF_ATOMIC_GET(stats->data_written));
+ } else {
+ ios_log(this, logfp, "\"%s.%s.sample_interval_sec\": %0.2lf,",
+ key_prefix, str_prefix, interval_sec);
+ ios_log(this, logfp, "\"%s.%s.bytes_read_per_sec\": %0.2lf,",
+ key_prefix, str_prefix,
+ (double)(GF_ATOMIC_GET(stats->data_read) / interval_sec));
+ ios_log(this, logfp, "\"%s.%s.bytes_written_per_sec\": %0.2lf",
+ key_prefix, str_prefix,
+ (double)(GF_ATOMIC_GET(stats->data_written) / interval_sec));
+ }
+
+ ios_log(this, logfp, "}");
+ ret = 0;
+out:
+ GF_FREE(key_prefix);
+ return ret;
}
+char *
+_resolve_username(xlator_t *this, uid_t uid)
+{
+ struct passwd pwd;
+ struct passwd *pwd_result = NULL;
+ size_t pwd_buf_len;
+ char *pwd_buf = NULL;
+ char *ret = NULL;
+
+ /* Prepare our buffer for the uid->username translation */
+#ifdef _SC_GETGR_R_SIZE_MAX
+ pwd_buf_len = sysconf(_SC_GETGR_R_SIZE_MAX);
+#else
+ pwd_buf_len = -1;
+#endif
+ if (pwd_buf_len == -1) {
+ pwd_buf_len = DEFAULT_PWD_BUF_SZ; /* per the man page */
+ }
+
+ pwd_buf = alloca(pwd_buf_len);
+ if (!pwd_buf)
+ goto err;
+
+ getpwuid_r(uid, &pwd, pwd_buf, pwd_buf_len, &pwd_result);
+ if (!pwd_result)
+ goto err;
+
+ ret = gf_strdup(pwd.pw_name);
+ if (ret)
+ return ret;
+ else
+ gf_log(this->name, GF_LOG_ERROR,
+ "gf_strdup failed, failing username "
+ "resolution.");
+err:
+ return ret;
+}
+
+char *
+_resolve_group_name(xlator_t *this, gid_t gid)
+{
+ struct group grp;
+ struct group *grp_result = NULL;
+ size_t grp_buf_len;
+ char *grp_buf = NULL;
+ char *ret = NULL;
+
+ /* Prepare our buffer for the gid->group name translation */
+#ifdef _SC_GETGR_R_SIZE_MAX
+ grp_buf_len = sysconf(_SC_GETGR_R_SIZE_MAX);
+#else
+ grp_buf_len = -1;
+#endif
+ if (grp_buf_len == -1) {
+ grp_buf_len = DEFAULT_GRP_BUF_SZ; /* per the man page */
+ }
+
+ grp_buf = alloca(grp_buf_len);
+ if (!grp_buf) {
+ goto err;
+ }
+
+ if (getgrgid_r(gid, &grp, grp_buf, grp_buf_len, &grp_result) != 0)
+ goto err;
+
+ if (!grp_result)
+ goto err;
+
+ ret = gf_strdup(grp.gr_name);
+ if (ret)
+ return ret;
+ else
+ gf_log(this->name, GF_LOG_ERROR,
+ "gf_strdup failed, failing username "
+ "resolution.");
+err:
+ return ret;
+}
+
+/*
+ * This function writes out a latency sample to a given file descriptor
+ * and beautifies the output in the process.
+ */
+void
+_io_stats_write_latency_sample(xlator_t *this, ios_sample_t *sample,
+ FILE *logfp)
+{
+ double epoch_time = 0.00;
+ char *xlator_name = NULL;
+ char *instance_name = NULL;
+ char *hostname = NULL;
+ char *identifier = NULL;
+ char *port = NULL;
+ char *port_pos = NULL;
+ char *group_name = NULL;
+ char *username = NULL;
+ struct ios_conf *conf = NULL;
+
+ conf = this->private;
+
+ epoch_time = (sample->timestamp).tv_sec +
+ ((sample->timestamp).tv_usec / 1000000.0);
+
+ if (strlen(sample->identifier) == 0) {
+ hostname = "Unknown";
+ port = "Unknown";
+ } else {
+ identifier = strdupa(sample->identifier);
+ port_pos = strrchr(identifier, ':');
+ if (!port_pos || strlen(port_pos) < 2)
+ goto err;
+ port = strdupa(port_pos + 1);
+ if (!port)
+ goto err;
+ *port_pos = '\0';
+ hostname = gf_rev_dns_lookup_cached(identifier, conf->dnscache);
+ if (!hostname)
+ hostname = "Unknown";
+ }
+
+ xlator_name = conf->unique_id;
+ if (!xlator_name || strlen(xlator_name) == 0)
+ xlator_name = "Unknown";
+
+ instance_name = this->instance_name;
+ if (!instance_name || strlen(instance_name) == 0)
+ instance_name = "N/A";
+
+ /* Resolve the UID to a string username */
+ username = _resolve_username(this, sample->uid);
+ if (!username) {
+ username = GF_MALLOC(30, gf_common_mt_char);
+ if (!username) {
+ goto out;
+ }
+ sprintf(username, "%d", (int32_t)sample->uid);
+ }
+
+ /* Resolve the GID to a string group name */
+ group_name = _resolve_group_name(this, sample->gid);
+ if (!group_name) {
+ group_name = GF_MALLOC(30, gf_common_mt_char);
+ if (!group_name) {
+ goto out;
+ }
+ sprintf(group_name, "%d", (int32_t)sample->gid);
+ }
+
+ ios_log(this, logfp, "%0.6lf,%s,%s,%0.4lf,%s,%s,%s,%s,%s,%s", epoch_time,
+ fop_enum_to_pri_string(sample->fop_type),
+ gf_fop_string(sample->fop_type), sample->elapsed, xlator_name,
+ instance_name, username, group_name, hostname, port);
+ goto out;
+err:
+ gf_log(this->name, GF_LOG_ERROR, "Error parsing socket identifier");
+out:
+ GF_FREE(group_name);
+ GF_FREE(username);
+}
+
+/*
+ * Takes our current sample buffer in conf->io_sample_buf, and saves
+ * a reference to this, init's a new buffer, and then dumps out the
+ * contents of the saved reference.
+ */
int
-io_stats_dump_global_to_dict (xlator_t *this, struct ios_global_stats *stats,
- struct timeval *now, int interval, dict_t *dict)
+io_stats_dump_latency_samples_logfp(xlator_t *this, FILE *logfp)
{
- int ret = 0;
- char key[256] = {0};
- uint64_t sec = 0;
- int i = 0;
- uint64_t count = 0;
+ uint64_t i = 0;
+ struct ios_conf *conf = NULL;
+ ios_sample_buf_t *sample_buf = NULL;
+ int ret = 1; /* Default to error */
+
+ conf = this->private;
+
+ /* Save pointer to old buffer; the CS equivalent of
+ * Indiana Jones: https://www.youtube.com/watch?v=Pr-8AP0To4k,
+ * though ours will end better I hope!
+ */
+ sample_buf = conf->ios_sample_buf;
+ if (!sample_buf) {
+ gf_log(this->name, GF_LOG_WARNING, "Sampling buffer is null, bailing!");
+ goto out;
+ }
+
+ /* Empty case, nothing to do, exit. */
+ if (sample_buf->collected == 0) {
+ gf_log(this->name, GF_LOG_DEBUG, "No samples, dump not required.");
+ ret = 0;
+ goto out;
+ }
+
+ /* Init a new buffer, so we are free to work on the one we saved a
+ * reference to above.
+ */
+ if (ios_init_sample_buf(conf) != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to init new sampling buffer, out of memory?");
+ goto out;
+ }
+
+ /* Wrap-around case, dump from pos to sample_buf->size -1
+ * and then from 0 to sample_buf->pos (covered off by
+ * "simple case")
+ */
+ if (sample_buf->collected > sample_buf->pos + 1) {
+ for (i = sample_buf->pos; i < sample_buf->size; i++) {
+ _io_stats_write_latency_sample(this, &(sample_buf->ios_samples[i]),
+ logfp);
+ }
+ }
- GF_ASSERT (stats);
- GF_ASSERT (now);
- GF_ASSERT (dict);
- GF_ASSERT (this);
+ /* Simple case: Dump from 0 to sample_buf->pos */
+ for (i = 0; i < sample_buf->pos; i++) {
+ _io_stats_write_latency_sample(this, &(sample_buf->ios_samples[i]),
+ logfp);
+ }
+ ios_destroy_sample_buf(sample_buf);
- if (interval == -1)
- snprintf (key, sizeof (key), "cumulative");
+out:
+ return ret;
+}
+
+int
+io_stats_dump_global_to_logfp(xlator_t *this, struct ios_global_stats *stats,
+ time_t now, int interval, FILE *logfp)
+{
+ int i = 0;
+ int per_line = 0;
+ int index = 0;
+ struct ios_stat_head *list_head = NULL;
+ struct ios_conf *conf = NULL;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char str_header[128] = {0};
+ char str_read[128] = {0};
+ char str_write[128] = {0};
+ uint64_t fop_hits = 0;
+ uint64_t block_count_read = 0;
+ uint64_t block_count_write = 0;
+
+ conf = this->private;
+
+ if (interval == -1)
+ ios_log(this, logfp, "\n=== Cumulative stats ===");
+ else
+ ios_log(this, logfp, "\n=== Interval %d stats ===", interval);
+ ios_log(this, logfp, " Duration : %" PRIu64 " secs",
+ (uint64_t)(now - stats->started_at));
+ ios_log(this, logfp, " BytesRead : %" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(stats->data_read));
+ ios_log(this, logfp, " BytesWritten : %" GF_PRI_ATOMIC "\n",
+ GF_ATOMIC_GET(stats->data_written));
+
+ snprintf(str_header, sizeof(str_header), "%-12s %c", "Block Size", ':');
+ snprintf(str_read, sizeof(str_read), "%-12s %c", "Read Count", ':');
+ snprintf(str_write, sizeof(str_write), "%-12s %c", "Write Count", ':');
+ index = 14;
+ for (i = 0; i < IOS_BLOCK_COUNT_SIZE; i++) {
+ block_count_read = GF_ATOMIC_GET(stats->block_count_read[i]);
+ block_count_write = GF_ATOMIC_GET(stats->block_count_write[i]);
+ if ((block_count_read == 0) && (block_count_write == 0))
+ continue;
+ per_line++;
+
+ snprintf(str_header + index, sizeof(str_header) - index, "%16dB+",
+ (1 << i));
+ if (block_count_read)
+ snprintf(str_read + index, sizeof(str_read) - index, "%18" PRId64,
+ block_count_read);
else
- snprintf (key, sizeof (key), "interval");
- ret = dict_set_int32 (dict, key, interval);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "failed to set "
- "interval %d", interval);
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-duration", interval);
- sec = (uint64_t) (now->tv_sec - stats->started_at.tv_sec);
- ret = dict_set_uint64 (dict, key, sec);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "failed to set "
- "duration(%d) - %"PRId64, interval, sec);
- goto out;
+ snprintf(str_read + index, sizeof(str_read) - index, "%18s", "0");
+ if (block_count_write)
+ snprintf(str_write + index, sizeof(str_write) - index,
+ "%18" GF_PRI_ATOMIC, block_count_write);
+ else
+ snprintf(str_write + index, sizeof(str_write) - index, "%18s", "0");
+
+ index += 18;
+ if (per_line == 3) {
+ ios_log(this, logfp, "%s", str_header);
+ ios_log(this, logfp, "%s", str_read);
+ ios_log(this, logfp, "%s\n", str_write);
+
+ snprintf(str_header, sizeof(str_header), "%-12s %c", "Block Size",
+ ':');
+ snprintf(str_read, sizeof(str_read), "%-12s %c", "Read Count", ':');
+ snprintf(str_write, sizeof(str_write), "%-12s %c", "Write Count",
+ ':');
+
+ index = 14;
+ per_line = 0;
+ }
+ }
+
+ if (per_line != 0) {
+ ios_log(this, logfp, "%s", str_header);
+ ios_log(this, logfp, "%s", str_read);
+ ios_log(this, logfp, "%s\n", str_write);
+ }
+
+ ios_log(this, logfp, "%-13s %10s %14s %14s %14s", "Fop", "Call Count",
+ "Avg-Latency", "Min-Latency", "Max-Latency");
+ ios_log(this, logfp, "%-13s %10s %14s %14s %14s", "---", "----------",
+ "-----------", "-----------", "-----------");
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ fop_hits = GF_ATOMIC_GET(stats->fop_hits[i]);
+ if (fop_hits && !stats->latency[i].avg)
+ ios_log(this, logfp,
+ "%-13s %10" GF_PRI_ATOMIC
+ " %11s "
+ "us %11s us %11s us",
+ gf_fop_list[i], fop_hits, "0", "0", "0");
+ else if (fop_hits && stats->latency[i].avg)
+ ios_log(this, logfp,
+ "%-13s %10" GF_PRI_ATOMIC
+ " "
+ "%11.2lf us %11.2lf us %11.2lf us",
+ gf_fop_list[i], fop_hits, stats->latency[i].avg,
+ stats->latency[i].min, stats->latency[i].max);
+ }
+
+ for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) {
+ fop_hits = GF_ATOMIC_GET(stats->upcall_hits[i]);
+ if (fop_hits)
+ ios_log(this, logfp,
+ "%-13s %10" PRId64
+ " %11s "
+ "us %11s us %11s us",
+ gf_upcall_list[i], fop_hits, "0", "0", "0");
+ }
+
+ ios_log(this, logfp,
+ "------ ----- ----- ----- ----- ----- ----- ----- "
+ " ----- ----- ----- -----\n");
+
+ if (interval == -1) {
+ LOCK(&conf->lock);
+ {
+ gf_time_fmt_tv(timestr, sizeof timestr,
+ &conf->cumulative.max_openfd_time, gf_timefmt_FT);
+ ios_log(this, logfp,
+ "Current open fd's: %" PRId64 " Max open fd's: %" PRId64
+ " time %s",
+ conf->cumulative.nr_opens, conf->cumulative.max_nr_opens,
+ timestr);
}
+ UNLOCK(&conf->lock);
+ ios_log(this, logfp, "\n==========Open File Stats========");
+ ios_log(this, logfp, "\nCOUNT: \t FILE NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_OPEN];
+ ios_dump_file_stats(list_head, this, logfp);
+
+ ios_log(this, logfp, "\n==========Read File Stats========");
+ ios_log(this, logfp, "\nCOUNT: \t FILE NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_READ];
+ ios_dump_file_stats(list_head, this, logfp);
+
+ ios_log(this, logfp, "\n==========Write File Stats========");
+ ios_log(this, logfp, "\nCOUNT: \t FILE NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_WRITE];
+ ios_dump_file_stats(list_head, this, logfp);
+
+ ios_log(this, logfp, "\n==========Directory open stats========");
+ ios_log(this, logfp, "\nCOUNT: \t DIRECTORY NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_OPENDIR];
+ ios_dump_file_stats(list_head, this, logfp);
+
+ ios_log(this, logfp, "\n========Directory readdirp Stats=======");
+ ios_log(this, logfp, "\nCOUNT: \t DIRECTORY NAME");
+ list_head = &conf->list[IOS_STATS_TYPE_READDIRP];
+ ios_dump_file_stats(list_head, this, logfp);
+
+ ios_log(this, logfp, "\n========Read Throughput File Stats=====");
+ ios_log(this, logfp,
+ "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
+ "\tFILE NAME");
+ list_head = &conf->thru_list[IOS_STATS_THRU_READ];
+ ios_dump_throughput_stats(list_head, this, logfp, IOS_STATS_THRU_READ);
+
+ ios_log(this, logfp, "\n======Write Throughput File Stats======");
+ ios_log(this, logfp,
+ "\nTIMESTAMP \t\t\t THROUGHPUT(KBPS)"
+ "\tFILE NAME");
+ list_head = &conf->thru_list[IOS_STATS_THRU_WRITE];
+ ios_dump_throughput_stats(list_head, this, logfp, IOS_STATS_THRU_WRITE);
+ }
+ return 0;
+}
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-total-read", interval);
- ret = dict_set_uint64 (dict, key, stats->data_read);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "failed to set total "
- "read(%d) - %"PRId64, interval, stats->data_read);
+int
+io_stats_dump_global_to_dict(xlator_t *this, struct ios_global_stats *stats,
+ time_t now, int interval, dict_t *dict)
+{
+ int ret = 0;
+ char key[64] = {0};
+ uint64_t sec = 0;
+ int i = 0;
+ uint64_t count = 0;
+ uint64_t fop_hits = 0;
+
+ GF_ASSERT(stats);
+ GF_ASSERT(now);
+ GF_ASSERT(dict);
+ GF_ASSERT(this);
+
+ if (interval == -1)
+ snprintf(key, sizeof(key), "cumulative");
+ else
+ snprintf(key, sizeof(key), "interval");
+ ret = dict_set_int32(dict, key, interval);
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set "
+ "interval %d",
+ interval);
+
+ snprintf(key, sizeof(key), "%d-duration", interval);
+ sec = now - stats->started_at;
+ ret = dict_set_uint64(dict, key, sec);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set "
+ "duration(%d) - %" PRId64,
+ interval, sec);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%d-total-read", interval);
+ ret = dict_set_uint64(dict, key, GF_ATOMIC_GET(stats->data_read));
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set total "
+ "read(%d) - %" GF_PRI_ATOMIC,
+ interval, GF_ATOMIC_GET(stats->data_read));
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%d-total-write", interval);
+ ret = dict_set_uint64(dict, key, GF_ATOMIC_GET(stats->data_written));
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set total "
+ "write(%d) - %" GF_PRI_ATOMIC,
+ interval, GF_ATOMIC_GET(stats->data_written));
+ goto out;
+ }
+ for (i = 0; i < 32; i++) {
+ count = GF_ATOMIC_GET(stats->block_count_read[i]);
+ if (count) {
+ snprintf(key, sizeof(key), "%d-read-%d", interval, (1 << i));
+ ret = dict_set_uint64(dict, key, count);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to "
+ "set read-%db+, with: %" PRId64,
+ (1 << i), count);
goto out;
+ }
}
-
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-total-write", interval);
- ret = dict_set_uint64 (dict, key, stats->data_written);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "failed to set total "
- "write(%d) - %"PRId64, interval, stats->data_written);
+ }
+
+ for (i = 0; i < IOS_BLOCK_COUNT_SIZE; i++) {
+ count = GF_ATOMIC_GET(stats->block_count_write[i]);
+ if (count) {
+ snprintf(key, sizeof(key), "%d-write-%d", interval, (1 << i));
+ ret = dict_set_uint64(dict, key, count);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to "
+ "set write-%db+, with: %" PRId64,
+ (1 << i), count);
goto out;
+ }
}
- for (i = 0; i < 32; i++) {
- if (stats->block_count_read[i]) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "%d-read-%d", interval,
- (1 << i));
- count = stats->block_count_read[i];
- ret = dict_set_uint64 (dict, key, count);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "failed to "
- "set read-%db+, with: %"PRId64,
- (1<<i), count);
- goto out;
- }
- }
+ }
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ fop_hits = GF_ATOMIC_GET(stats->fop_hits[i]);
+ if (fop_hits == 0)
+ continue;
+ snprintf(key, sizeof(key), "%d-%d-hits", interval, i);
+ ret = dict_set_uint64(dict, key, fop_hits);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set "
+ "%s-fop-hits: %" GF_PRI_ATOMIC,
+ gf_fop_list[i], fop_hits);
+ goto out;
}
- for (i = 0; i < 32; i++) {
- if (stats->block_count_write[i]) {
- snprintf (key, sizeof (key), "%d-write-%d", interval,
- (1<<i));
- count = stats->block_count_write[i];
- ret = dict_set_uint64 (dict, key, count);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "failed to "
- "set write-%db+, with: %"PRId64,
- (1<<i), count);
- goto out;
- }
- }
+ if (stats->latency[i].avg == 0)
+ continue;
+ snprintf(key, sizeof(key), "%d-%d-avglatency", interval, i);
+ ret = dict_set_double(dict, key, stats->latency[i].avg);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set %s "
+ "avglatency(%d) with %f",
+ gf_fop_list[i], interval, stats->latency[i].avg);
+ goto out;
}
-
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- if (stats->fop_hits[i] == 0)
- continue;
- snprintf (key, sizeof (key), "%d-%d-hits", interval, i);
- ret = dict_set_uint64 (dict, key, stats->fop_hits[i]);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "failed to "
- "set %s-fop-hits: %"PRIu64, gf_fop_list[i],
- stats->fop_hits[i]);
- goto out;
- }
-
- if (stats->latency[i].avg == 0)
- continue;
- snprintf (key, sizeof (key), "%d-%d-avglatency", interval, i);
- ret = dict_set_double (dict, key, stats->latency[i].avg);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "failed to set %s "
- "avglatency(%d) with %f", gf_fop_list[i],
- interval, stats->latency[i].avg);
- goto out;
- }
- snprintf (key, sizeof (key), "%d-%d-minlatency", interval, i);
- ret = dict_set_double (dict, key, stats->latency[i].min);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "failed to set %s "
- "minlatency(%d) with %f", gf_fop_list[i],
- interval, stats->latency[i].min);
- goto out;
- }
- snprintf (key, sizeof (key), "%d-%d-maxlatency", interval, i);
- ret = dict_set_double (dict, key, stats->latency[i].max);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "failed to set %s "
- "maxlatency(%d) with %f", gf_fop_list[i],
- interval, stats->latency[i].max);
- goto out;
- }
+ snprintf(key, sizeof(key), "%d-%d-minlatency", interval, i);
+ ret = dict_set_double(dict, key, stats->latency[i].min);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set %s "
+ "minlatency(%d) with %f",
+ gf_fop_list[i], interval, stats->latency[i].min);
+ goto out;
+ }
+ snprintf(key, sizeof(key), "%d-%d-maxlatency", interval, i);
+ ret = dict_set_double(dict, key, stats->latency[i].max);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to set %s "
+ "maxlatency(%d) with %f",
+ gf_fop_list[i], interval, stats->latency[i].max);
+ goto out;
+ }
+ }
+ for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++) {
+ fop_hits = GF_ATOMIC_GET(stats->upcall_hits[i]);
+ if (fop_hits == 0)
+ continue;
+ snprintf(key, sizeof(key), "%d-%d-upcall-hits", interval, i);
+ ret = dict_set_uint64(dict, key, fop_hits);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to "
+ "set %s-upcall-hits: %" PRIu64,
+ gf_upcall_list[i], fop_hits);
+ goto out;
}
+ }
out:
- gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret);
- return ret;
+ gf_log(this->name, GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
}
int
-io_stats_dump_global (xlator_t *this, struct ios_global_stats *stats,
- struct timeval *now, int interval,
- struct ios_dump_args *args)
+io_stats_dump_global(xlator_t *this, struct ios_global_stats *stats, time_t now,
+ int interval, struct ios_dump_args *args)
{
- int ret = -1;
-
- GF_ASSERT (args);
- GF_ASSERT (now);
- GF_ASSERT (stats);
- GF_ASSERT (this);
-
-
-
- switch (args->type) {
+ int ret = -1;
+
+ GF_ASSERT(args);
+ GF_ASSERT(now);
+ GF_ASSERT(stats);
+ GF_ASSERT(this);
+
+ switch (args->type) {
+ case IOS_DUMP_TYPE_JSON_FILE:
+ ret = io_stats_dump_global_to_json_logfp(this, stats, now, interval,
+ args->u.logfp);
+ break;
case IOS_DUMP_TYPE_FILE:
- ret = io_stats_dump_global_to_logfp (this, stats, now,
- interval, args->u.logfp);
- break;
+ ret = io_stats_dump_global_to_logfp(this, stats, now, interval,
+ args->u.logfp);
+ break;
case IOS_DUMP_TYPE_DICT:
- ret = io_stats_dump_global_to_dict (this, stats, now,
- interval, args->u.dict);
- break;
+ ret = io_stats_dump_global_to_dict(this, stats, now, interval,
+ args->u.dict);
+ break;
default:
- GF_ASSERT (0);
- ret = -1;
- break;
- }
- return ret;
+ GF_ASSERT(0);
+ ret = -1;
+ break;
+ }
+ return ret;
}
int
-ios_dump_args_init (struct ios_dump_args *args, ios_dump_type_t type,
- void *output)
+ios_dump_args_init(struct ios_dump_args *args, ios_dump_type_t type,
+ void *output)
{
- int ret = 0;
+ int ret = 0;
- GF_ASSERT (args);
- GF_ASSERT (type > IOS_DUMP_TYPE_NONE && type < IOS_DUMP_TYPE_MAX);
- GF_ASSERT (output);
+ GF_ASSERT(args);
+ GF_ASSERT(type > IOS_DUMP_TYPE_NONE && type < IOS_DUMP_TYPE_MAX);
+ GF_ASSERT(output);
- args->type = type;
- switch (args->type) {
+ args->type = type;
+ switch (args->type) {
+ case IOS_DUMP_TYPE_JSON_FILE:
case IOS_DUMP_TYPE_FILE:
- args->u.logfp = output;
- break;
+ args->u.logfp = output;
+ break;
case IOS_DUMP_TYPE_DICT:
- args->u.dict = output;
- break;
+ args->u.dict = output;
+ break;
default:
- GF_ASSERT (0);
- ret = -1;
- }
+ GF_ASSERT(0);
+ ret = -1;
+ }
- return ret;
+ return ret;
}
-int
-io_stats_dump (xlator_t *this, struct ios_dump_args *args)
+static void
+ios_global_stats_clear(struct ios_global_stats *stats, time_t now)
{
- struct ios_conf *conf = NULL;
- struct ios_global_stats cumulative = {0, };
- struct ios_global_stats incremental = {0, };
- int increment = 0;
- struct timeval now;
-
- GF_ASSERT (this);
- GF_ASSERT (args);
- GF_ASSERT (args->type > IOS_DUMP_TYPE_NONE);
- GF_ASSERT (args->type < IOS_DUMP_TYPE_MAX);
-
- conf = this->private;
+ GF_ASSERT(stats);
+ GF_ASSERT(now);
- gettimeofday (&now, NULL);
- LOCK (&conf->lock);
- {
- cumulative = conf->cumulative;
- incremental = conf->incremental;
+ memset(stats, 0, sizeof(*stats));
+ stats->started_at = now;
+}
+int
+io_stats_dump(xlator_t *this, struct ios_dump_args *args, ios_info_op_t op,
+ gf_boolean_t is_peek)
+{
+ struct ios_conf *conf = NULL;
+ struct ios_global_stats cumulative = {};
+ struct ios_global_stats incremental = {};
+ int increment = 0;
+ time_t now = 0;
+
+ GF_ASSERT(this);
+ GF_ASSERT(args);
+ GF_ASSERT(args->type > IOS_DUMP_TYPE_NONE);
+ GF_ASSERT(args->type < IOS_DUMP_TYPE_MAX);
+
+ conf = this->private;
+ now = gf_time();
+
+ LOCK(&conf->lock);
+ {
+ if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_CUMULATIVE)
+ cumulative = conf->cumulative;
+
+ if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_INCREMENTAL) {
+ incremental = conf->incremental;
+ increment = conf->increment;
+
+ if (!is_peek) {
increment = conf->increment++;
- memset (&conf->incremental, 0, sizeof (conf->incremental));
- conf->incremental.started_at = now;
+ ios_global_stats_clear(&conf->incremental, now);
+ }
}
- UNLOCK (&conf->lock);
+ }
+ UNLOCK(&conf->lock);
- io_stats_dump_global (this, &cumulative, &now, -1, args);
- io_stats_dump_global (this, &incremental, &now, increment, args);
+ if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_CUMULATIVE)
+ io_stats_dump_global(this, &cumulative, now, -1, args);
- return 0;
-}
+ if (op == GF_IOS_INFO_ALL || op == GF_IOS_INFO_INCREMENTAL)
+ io_stats_dump_global(this, &incremental, now, increment, args);
+ return 0;
+}
int
-io_stats_dump_fd (xlator_t *this, struct ios_fd *iosfd)
+io_stats_dump_fd(xlator_t *this, struct ios_fd *iosfd)
{
- struct ios_conf *conf = NULL;
- struct timeval now;
- uint64_t sec = 0;
- uint64_t usec = 0;
- int i = 0;
-
- conf = this->private;
-
- if (!conf->dump_fd_stats)
- return 0;
-
- if (!iosfd)
- return 0;
+ struct ios_conf *conf = NULL;
+ struct timeval now;
+ int i = 0;
+ double usecs = 0;
+ uint64_t data_read = 0;
+ uint64_t data_written = 0;
+ uint64_t block_count_read = 0;
+ uint64_t block_count_write = 0;
+
+ conf = this->private;
+
+ if (!conf->dump_fd_stats)
+ return 0;
- gettimeofday (&now, NULL);
+ if (!iosfd)
+ return 0;
- if (iosfd->opened_at.tv_usec > now.tv_usec) {
- now.tv_usec += 1000000;
- now.tv_usec--;
- }
+ gettimeofday(&now, NULL);
+ usecs = gf_tvdiff(&iosfd->opened_at, &now);
+
+ gf_log(this->name, GF_LOG_INFO, "--- fd stats ---");
+
+ if (iosfd->filename)
+ gf_log(this->name, GF_LOG_INFO, " Filename : %s", iosfd->filename);
+
+ if (usecs)
+ gf_log(this->name, GF_LOG_INFO, " Lifetime : %lf secs", usecs);
+
+ data_read = GF_ATOMIC_GET(iosfd->data_read);
+ if (data_read)
+ gf_log(this->name, GF_LOG_INFO, " BytesRead : %" PRId64 " bytes",
+ data_read);
+
+ data_written = GF_ATOMIC_GET(iosfd->data_written);
+ if (data_written)
+ gf_log(this->name, GF_LOG_INFO, " BytesWritten : %" PRId64 " bytes",
+ data_written);
+
+ for (i = 0; i < 32; i++) {
+ block_count_read = GF_ATOMIC_GET(iosfd->block_count_read[i]);
+ if (block_count_read)
+ gf_log(this->name, GF_LOG_INFO,
+ " Read %06db+ :"
+ "%" PRId64,
+ (1 << i), block_count_read);
+ }
+ for (i = 0; i < IOS_BLOCK_COUNT_SIZE; i++) {
+ block_count_write = GF_ATOMIC_GET(iosfd->block_count_write[i]);
+ if (block_count_write)
+ gf_log(this->name, GF_LOG_INFO, "Write %06db+ : %" PRId64, (1 << i),
+ block_count_write);
+ }
+ return 0;
+}
- sec = now.tv_sec - iosfd->opened_at.tv_sec;
- usec = now.tv_usec - iosfd->opened_at.tv_usec;
-
- gf_log (this->name, GF_LOG_INFO,
- "--- fd stats ---");
-
- if (iosfd->filename)
- gf_log (this->name, GF_LOG_INFO,
- " Filename : %s",
- iosfd->filename);
-
- if (sec)
- gf_log (this->name, GF_LOG_INFO,
- " Lifetime : %"PRId64"secs, %"PRId64"usecs",
- sec, usec);
-
- if (iosfd->data_read)
- gf_log (this->name, GF_LOG_INFO,
- " BytesRead : %"PRId64" bytes",
- iosfd->data_read);
-
- if (iosfd->data_written)
- gf_log (this->name, GF_LOG_INFO,
- " BytesWritten : %"PRId64" bytes",
- iosfd->data_written);
-
- for (i = 0; i < 32; i++) {
- if (iosfd->block_count_read[i])
- gf_log (this->name, GF_LOG_INFO,
- " Read %06db+ : %"PRId64,
- (1 << i), iosfd->block_count_read[i]);
- }
- for (i = 0; i < 32; i++) {
- if (iosfd->block_count_write[i])
- gf_log (this->name, GF_LOG_INFO,
- "Write %06db+ : %"PRId64,
- (1 << i), iosfd->block_count_write[i]);
- }
- return 0;
+void
+collect_ios_latency_sample(struct ios_conf *conf, glusterfs_fop_t fop_type,
+ double elapsed, call_frame_t *frame)
+{
+ ios_sample_buf_t *ios_sample_buf = NULL;
+ ios_sample_t *ios_sample = NULL;
+ struct timespec *timestamp = NULL;
+ call_stack_t *root = NULL;
+
+ ios_sample_buf = conf->ios_sample_buf;
+ LOCK(&conf->ios_sampling_lock);
+ if (conf->ios_sample_interval == 0 ||
+ ios_sample_buf->observed % conf->ios_sample_interval != 0)
+ goto out;
+
+ timestamp = &frame->begin;
+ root = frame->root;
+
+ ios_sample = &(ios_sample_buf->ios_samples[ios_sample_buf->pos]);
+ ios_sample->elapsed = elapsed;
+ ios_sample->fop_type = fop_type;
+ ios_sample->uid = root->uid;
+ ios_sample->gid = root->gid;
+ (ios_sample->timestamp).tv_sec = timestamp->tv_sec;
+ (ios_sample->timestamp).tv_usec = timestamp->tv_nsec / 1000;
+ memcpy(&ios_sample->identifier, &root->identifier,
+ sizeof(root->identifier));
+
+ /* We've reached the end of the circular buffer, start from the
+ * beginning. */
+ if (ios_sample_buf->pos == (ios_sample_buf->size - 1))
+ ios_sample_buf->pos = 0;
+ else
+ ios_sample_buf->pos++;
+ ios_sample_buf->collected++;
+out:
+ ios_sample_buf->observed++;
+ UNLOCK(&conf->ios_sampling_lock);
+ return;
}
static void
-update_ios_latency_stats (struct ios_global_stats *stats, double elapsed,
- glusterfs_fop_t op)
+update_ios_latency_stats(struct ios_global_stats *stats, double elapsed,
+ glusterfs_fop_t op)
{
- double avg;
+ double avg;
- GF_ASSERT (stats);
+ GF_ASSERT(stats);
- if (!stats->latency[op].min)
- stats->latency[op].min = elapsed;
- if (stats->latency[op].min > elapsed)
- stats->latency[op].min = elapsed;
- if (stats->latency[op].max < elapsed)
- stats->latency[op].max = elapsed;
+ stats->latency[op].total += elapsed;
- avg = stats->latency[op].avg;
+ if (!stats->latency[op].min)
+ stats->latency[op].min = elapsed;
+ if (stats->latency[op].min > elapsed)
+ stats->latency[op].min = elapsed;
+ if (stats->latency[op].max < elapsed)
+ stats->latency[op].max = elapsed;
- stats->latency[op].avg = avg + (elapsed - avg) / stats->fop_hits[op];
+ avg = stats->latency[op].avg;
+
+ stats->latency[op].avg = avg + (elapsed - avg) /
+ GF_ATOMIC_GET(stats->fop_hits[op]);
}
int
-update_ios_latency (struct ios_conf *conf, call_frame_t *frame,
- glusterfs_fop_t op)
+update_ios_latency(struct ios_conf *conf, call_frame_t *frame,
+ glusterfs_fop_t op)
{
- double elapsed;
- struct timeval *begin, *end;
+ double elapsed;
+ struct timespec *begin, *end;
- begin = &frame->begin;
- end = &frame->end;
+ begin = &frame->begin;
+ end = &frame->end;
- elapsed = (end->tv_sec - begin->tv_sec) * 1e6
- + (end->tv_usec - begin->tv_usec);
+ elapsed = gf_tsdiff(begin, end) / 1000.0;
- update_ios_latency_stats (&conf->cumulative, elapsed, op);
- update_ios_latency_stats (&conf->incremental, elapsed, op);
+ update_ios_latency_stats(&conf->cumulative, elapsed, op);
+ update_ios_latency_stats(&conf->incremental, elapsed, op);
+ collect_ios_latency_sample(conf, op, elapsed, frame);
- return 0;
+ return 0;
}
int32_t
-io_stats_dump_stats_to_dict (xlator_t *this, dict_t *resp,
- ios_stats_type_t flags, int32_t list_cnt)
-{
- struct ios_conf *conf = NULL;
- int cnt = 0;
- char key[256];
- struct ios_stat_head *list_head = NULL;
- struct ios_stat_list *entry = NULL;
- int ret = -1;
- ios_stats_thru_t index = IOS_STATS_THRU_MAX;
- char timestr[256] = {0, };
- char *dict_timestr = NULL;
-
- conf = this->private;
-
- switch (flags) {
- case IOS_STATS_TYPE_OPEN:
- list_head = &conf->list[IOS_STATS_TYPE_OPEN];
- LOCK (&conf->lock);
- {
- ret = dict_set_uint64 (resp, "current-open",
- conf->cumulative.nr_opens);
- if (ret)
- goto unlock;
- ret = dict_set_uint64 (resp, "max-open",
- conf->cumulative.max_nr_opens);
-
- gf_time_fmt (timestr, sizeof timestr,
- conf->cumulative.max_openfd_time.tv_sec,
- gf_timefmt_FT);
- snprintf (timestr + strlen (timestr), sizeof timestr - strlen (timestr),
- ".%"GF_PRI_SUSECONDS,
- conf->cumulative.max_openfd_time.tv_usec);
-
- dict_timestr = gf_strdup (timestr);
- if (!dict_timestr)
- goto unlock;
- ret = dict_set_dynstr (resp, "max-openfd-time",
- dict_timestr);
- if (ret)
- goto unlock;
- }
+io_stats_dump_stats_to_dict(xlator_t *this, dict_t *resp,
+ ios_stats_type_t flags, int32_t list_cnt)
+{
+ struct ios_conf *conf = NULL;
+ int cnt = 0;
+ char key[32];
+ int keylen;
+ struct ios_stat_head *list_head = NULL;
+ struct ios_stat_list *entry = NULL;
+ int ret = -1;
+ ios_stats_thru_t index = IOS_STATS_THRU_MAX;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char *dict_timestr = NULL;
+
+ conf = this->private;
+
+ switch (flags) {
+ case IOS_STATS_TYPE_OPEN:
+ list_head = &conf->list[IOS_STATS_TYPE_OPEN];
+ LOCK(&conf->lock);
+ {
+ ret = dict_set_uint64(resp, "current-open",
+ conf->cumulative.nr_opens);
+ if (ret)
+ goto unlock;
+ ret = dict_set_uint64(resp, "max-open",
+ conf->cumulative.max_nr_opens);
+
+ gf_time_fmt_tv(timestr, sizeof timestr,
+ &conf->cumulative.max_openfd_time,
+ gf_timefmt_FT);
+
+ dict_timestr = gf_strdup(timestr);
+ if (!dict_timestr)
+ goto unlock;
+ ret = dict_set_dynstr(resp, "max-openfd-time", dict_timestr);
+ if (ret)
+ goto unlock;
+ }
unlock:
- UNLOCK (&conf->lock);
- /* Do not proceed if we came here because of some error
- * during the dict operation */
- if (ret)
- goto out;
- break;
- case IOS_STATS_TYPE_READ:
- list_head = &conf->list[IOS_STATS_TYPE_READ];
- break;
- case IOS_STATS_TYPE_WRITE:
- list_head = &conf->list[IOS_STATS_TYPE_WRITE];
- break;
- case IOS_STATS_TYPE_OPENDIR:
- list_head = &conf->list[IOS_STATS_TYPE_OPENDIR];
- break;
- case IOS_STATS_TYPE_READDIRP:
- list_head = &conf->list[IOS_STATS_TYPE_READDIRP];
- break;
- case IOS_STATS_TYPE_READ_THROUGHPUT:
- list_head = &conf->thru_list[IOS_STATS_THRU_READ];
- index = IOS_STATS_THRU_READ;
- break;
- case IOS_STATS_TYPE_WRITE_THROUGHPUT:
- list_head = &conf->thru_list[IOS_STATS_THRU_WRITE];
- index = IOS_STATS_THRU_WRITE;
- break;
-
- default:
- goto out;
- }
- ret = dict_set_int32 (resp, "top-op", flags);
- if (!list_cnt)
+ UNLOCK(&conf->lock);
+ /* Do not proceed if we came here because of some error
+ * during the dict operation */
+ if (ret)
goto out;
- LOCK (&list_head->lock);
- {
- list_for_each_entry (entry, &list_head->iosstats->list, list) {
-
- cnt++;
- snprintf (key, 256, "%s-%d", "filename", cnt);
- ret = dict_set_str (resp, key, entry->iosstat->filename);
- if (ret)
- goto unlock_list_head;
- snprintf (key, 256, "%s-%d", "value",cnt);
- ret = dict_set_uint64 (resp, key, entry->value);
- if (ret)
- goto unlock_list_head;
- if (index != IOS_STATS_THRU_MAX) {
- snprintf (key, 256, "%s-%d", "time-sec", cnt);
- ret = dict_set_int32 (resp, key,
- entry->iosstat->thru_counters[index].time.tv_sec);
- if (ret)
- goto unlock_list_head;
- snprintf (key, 256, "%s-%d", "time-usec", cnt);
- ret = dict_set_int32 (resp, key,
- entry->iosstat->thru_counters[index].time.tv_usec);
- if (ret)
- goto unlock_list_head;
- }
- if (cnt == list_cnt)
- break;
+ break;
+ case IOS_STATS_TYPE_READ:
+ list_head = &conf->list[IOS_STATS_TYPE_READ];
+ break;
+ case IOS_STATS_TYPE_WRITE:
+ list_head = &conf->list[IOS_STATS_TYPE_WRITE];
+ break;
+ case IOS_STATS_TYPE_OPENDIR:
+ list_head = &conf->list[IOS_STATS_TYPE_OPENDIR];
+ break;
+ case IOS_STATS_TYPE_READDIRP:
+ list_head = &conf->list[IOS_STATS_TYPE_READDIRP];
+ break;
+ case IOS_STATS_TYPE_READ_THROUGHPUT:
+ list_head = &conf->thru_list[IOS_STATS_THRU_READ];
+ index = IOS_STATS_THRU_READ;
+ break;
+ case IOS_STATS_TYPE_WRITE_THROUGHPUT:
+ list_head = &conf->thru_list[IOS_STATS_THRU_WRITE];
+ index = IOS_STATS_THRU_WRITE;
+ break;
- }
+ default:
+ goto out;
+ }
+ ret = dict_set_int32_sizen(resp, "top-op", flags);
+ if (!list_cnt)
+ goto out;
+ LOCK(&list_head->lock);
+ {
+ list_for_each_entry(entry, &list_head->iosstats->list, list)
+ {
+ cnt++;
+ keylen = snprintf(key, sizeof(key), "filename-%d", cnt);
+ ret = dict_set_strn(resp, key, keylen, entry->iosstat->filename);
+ if (ret)
+ goto unlock_list_head;
+ snprintf(key, sizeof(key), "value-%d", cnt);
+ ret = dict_set_uint64(resp, key, entry->value);
+ if (ret)
+ goto unlock_list_head;
+ if (index != IOS_STATS_THRU_MAX) {
+ keylen = snprintf(key, sizeof(key), "time-sec-%d", cnt);
+ ret = dict_set_int32n(
+ resp, key, keylen,
+ entry->iosstat->thru_counters[index].time.tv_sec);
+ if (ret)
+ goto unlock_list_head;
+ keylen = snprintf(key, sizeof(key), "time-usec-%d", cnt);
+ ret = dict_set_int32n(
+ resp, key, keylen,
+ entry->iosstat->thru_counters[index].time.tv_usec);
+ if (ret)
+ goto unlock_list_head;
+ }
+ if (cnt == list_cnt)
+ break;
}
+ }
unlock_list_head:
- UNLOCK (&list_head->lock);
- /* ret is !=0 if some dict operation in the above critical region
- * failed. */
- if (ret)
- goto out;
- ret = dict_set_int32 (resp, "members", cnt);
- out:
- return ret;
+ UNLOCK(&list_head->lock);
+ /* ret is !=0 if some dict operation in the above critical region
+ * failed. */
+ if (ret)
+ goto out;
+ ret = dict_set_int32_sizen(resp, "members", cnt);
+out:
+ return ret;
}
-int
-io_stats_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+static struct ios_stat *
+ios_init_iosstat(xlator_t *this, char *path, uuid_t gfid, inode_t *inode)
{
- struct ios_fd *iosfd = NULL;
- char *path = NULL;
- struct ios_stat *iosstat = NULL;
- struct ios_conf *conf = NULL;
+ struct ios_stat *iosstat = NULL;
+ int i = 0;
- conf = this->private;
+ iosstat = GF_CALLOC(1, sizeof(*iosstat), gf_io_stats_mt_ios_stat);
+ if (!iosstat)
+ goto out;
- path = frame->local;
- frame->local = NULL;
+ iosstat->filename = gf_strdup(path);
+ gf_uuid_copy(iosstat->gfid, gfid);
+ LOCK_INIT(&iosstat->lock);
- if (!path)
- goto unwind;
+ for (i = 0; i < IOS_STATS_TYPE_MAX; i++)
+ GF_ATOMIC_INIT(iosstat->counters[i], 0);
- if (op_ret < 0) {
- GF_FREE (path);
- goto unwind;
- }
-
- iosfd = GF_CALLOC (1, sizeof (*iosfd), gf_io_stats_mt_ios_fd);
- if (!iosfd) {
- GF_FREE (path);
- goto unwind;
- }
+ ios_inode_ctx_set(inode, this, iosstat);
- iosfd->filename = path;
- gettimeofday (&iosfd->opened_at, NULL);
+out:
+ return iosstat;
+}
- ios_fd_ctx_set (fd, this, iosfd);
- LOCK (&conf->lock);
- {
- conf->cumulative.nr_opens++;
- if (conf->cumulative.nr_opens > conf->cumulative.max_nr_opens) {
- conf->cumulative.max_nr_opens = conf->cumulative.nr_opens;
- conf->cumulative.max_openfd_time = iosfd->opened_at;
- }
+int
+io_stats_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ struct ios_fd *iosfd = NULL;
+ char *path = NULL;
+ struct ios_stat *iosstat = NULL;
+ struct ios_conf *conf = NULL;
+
+ conf = this->private;
+
+ path = frame->local;
+ frame->local = NULL;
+
+ if (!path)
+ goto unwind;
+
+ if (op_ret < 0) {
+ GF_FREE(path);
+ goto unwind;
+ }
+
+ iosfd = GF_CALLOC(1, sizeof(*iosfd), gf_io_stats_mt_ios_fd);
+ if (!iosfd) {
+ GF_FREE(path);
+ goto unwind;
+ }
+
+ iosfd->filename = path;
+ gettimeofday(&iosfd->opened_at, NULL);
+
+ ios_fd_ctx_set(fd, this, iosfd);
+ LOCK(&conf->lock);
+ {
+ conf->cumulative.nr_opens++;
+ if (conf->cumulative.nr_opens > conf->cumulative.max_nr_opens) {
+ conf->cumulative.max_nr_opens = conf->cumulative.nr_opens;
+ conf->cumulative.max_openfd_time = iosfd->opened_at;
}
- UNLOCK (&conf->lock);
+ }
+ UNLOCK(&conf->lock);
- iosstat = GF_CALLOC (1, sizeof (*iosstat), gf_io_stats_mt_ios_stat);
- if (!iosstat) {
- GF_FREE (path);
- goto unwind;
- }
- iosstat->filename = gf_strdup (path);
- uuid_copy (iosstat->gfid, buf->ia_gfid);
- LOCK_INIT (&iosstat->lock);
- ios_inode_ctx_set (fd->inode, this, iosstat);
+ iosstat = ios_init_iosstat(this, path, buf->ia_gfid, inode);
+ if (!iosstat)
+ GF_FREE(path);
unwind:
- UPDATE_PROFILE_STATS (frame, CREATE);
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, CREATE);
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
}
-
int
-io_stats_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+io_stats_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- struct ios_fd *iosfd = NULL;
- char *path = NULL;
- struct ios_stat *iosstat = NULL;
- struct ios_conf *conf = NULL;
-
- conf = this->private;
- path = frame->local;
- frame->local = NULL;
-
- if (!path)
- goto unwind;
-
- if (op_ret < 0) {
- GF_FREE (path);
- goto unwind;
- }
-
- iosfd = GF_CALLOC (1, sizeof (*iosfd), gf_io_stats_mt_ios_fd);
- if (!iosfd) {
- GF_FREE (path);
- goto unwind;
- }
-
- iosfd->filename = path;
- gettimeofday (&iosfd->opened_at, NULL);
-
- ios_fd_ctx_set (fd, this, iosfd);
-
- ios_inode_ctx_get (fd->inode, this, &iosstat);
- if (!iosstat) {
- iosstat = GF_CALLOC (1, sizeof (*iosstat),
- gf_io_stats_mt_ios_stat);
- if (iosstat) {
- iosstat->filename = gf_strdup (path);
- uuid_copy (iosstat->gfid, fd->inode->gfid);
- LOCK_INIT (&iosstat->lock);
- ios_inode_ctx_set (fd->inode, this, iosstat);
- }
- }
-
- LOCK (&conf->lock);
- {
- conf->cumulative.nr_opens++;
- if (conf->cumulative.nr_opens > conf->cumulative.max_nr_opens) {
- conf->cumulative.max_nr_opens = conf->cumulative.nr_opens;
- conf->cumulative.max_openfd_time = iosfd->opened_at;
- }
- }
- UNLOCK (&conf->lock);
- if (iosstat) {
- BUMP_STATS (iosstat, IOS_STATS_TYPE_OPEN);
- iosstat = NULL;
+ struct ios_fd *iosfd = NULL;
+ char *path = NULL;
+ struct ios_stat *iosstat = NULL;
+ struct ios_conf *conf = NULL;
+ int i = 0;
+
+ conf = this->private;
+ path = frame->local;
+ frame->local = NULL;
+
+ if (!path)
+ goto unwind;
+
+ if (op_ret < 0) {
+ GF_FREE(path);
+ goto unwind;
+ }
+
+ iosfd = GF_CALLOC(1, sizeof(*iosfd), gf_io_stats_mt_ios_fd);
+ if (!iosfd) {
+ GF_FREE(path);
+ goto unwind;
+ }
+
+ iosfd->filename = path;
+ GF_ATOMIC_INIT(iosfd->data_read, 0);
+ GF_ATOMIC_INIT(iosfd->data_written, 0);
+ for (i = 0; i < IOS_BLOCK_COUNT_SIZE; i++) {
+ GF_ATOMIC_INIT(iosfd->block_count_write[i], 0);
+ GF_ATOMIC_INIT(iosfd->block_count_read[i], 0);
+ }
+ gettimeofday(&iosfd->opened_at, NULL);
+
+ ios_fd_ctx_set(fd, this, iosfd);
+
+ ios_inode_ctx_get(fd->inode, this, &iosstat);
+ if (!iosstat) {
+ iosstat = ios_init_iosstat(this, path, fd->inode->gfid, fd->inode);
+ }
+
+ LOCK(&conf->lock);
+ {
+ conf->cumulative.nr_opens++;
+ if (conf->cumulative.nr_opens > conf->cumulative.max_nr_opens) {
+ conf->cumulative.max_nr_opens = conf->cumulative.nr_opens;
+ conf->cumulative.max_openfd_time = iosfd->opened_at;
}
+ }
+ UNLOCK(&conf->lock);
+ if (iosstat) {
+ ios_bump_stats(this, iosstat, IOS_STATS_TYPE_OPEN);
+ iosstat = NULL;
+ }
unwind:
- UPDATE_PROFILE_STATS (frame, OPEN);
-
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, OPEN);
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
-
int
-io_stats_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+io_stats_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, STAT);
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, STAT);
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
+int
+io_stats_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *buf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ int len = 0;
+ fd_t *fd = NULL;
+ struct ios_stat *iosstat = NULL;
+
+ fd = frame->local;
+ frame->local = NULL;
+
+ if (op_ret > 0) {
+ len = iov_length(vector, count);
+ ios_bump_read(this, fd, len);
+ }
+
+ UPDATE_PROFILE_STATS(frame, READ);
+ ios_inode_ctx_get(fd->inode, this, &iosstat);
+
+ if (iosstat) {
+ ios_bump_stats(this, iosstat, IOS_STATS_TYPE_READ);
+ BUMP_THROUGHPUT(iosstat, IOS_STATS_THRU_READ);
+ iosstat = NULL;
+ }
+
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, buf,
+ iobref, xdata);
+ return 0;
+}
int
-io_stats_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count,
- struct iatt *buf, struct iobref *iobref, dict_t *xdata)
+io_stats_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- int len = 0;
- fd_t *fd = NULL;
- struct ios_stat *iosstat = NULL;
+ struct ios_stat *iosstat = NULL;
+ inode_t *inode = NULL;
- fd = frame->local;
+ UPDATE_PROFILE_STATS(frame, WRITE);
+ if (frame->local) {
+ inode = frame->local;
frame->local = NULL;
-
- if (op_ret > 0) {
- len = iov_length (vector, count);
- BUMP_READ (fd, len);
- }
-
- UPDATE_PROFILE_STATS (frame, READ);
- ios_inode_ctx_get (fd->inode, this, &iosstat);
-
+ ios_inode_ctx_get(inode, this, &iosstat);
if (iosstat) {
- BUMP_STATS (iosstat, IOS_STATS_TYPE_READ);
- BUMP_THROUGHPUT (iosstat, IOS_STATS_THRU_READ);
- iosstat = NULL;
+ ios_bump_stats(this, iosstat, IOS_STATS_TYPE_WRITE);
+ BUMP_THROUGHPUT(iosstat, IOS_STATS_THRU_WRITE);
+ inode = NULL;
+ iosstat = NULL;
}
+ }
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
- vector, count, buf, iobref, xdata);
- return 0;
-
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
-
int
-io_stats_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+io_stats_copy_file_range_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *stbuf, struct iatt *prebuf_dst,
+ struct iatt *postbuf_dst, dict_t *xdata)
{
- struct ios_stat *iosstat = NULL;
- inode_t *inode = NULL;
-
- UPDATE_PROFILE_STATS (frame, WRITE);
- if (frame->local){
- inode = frame->local;
- frame->local = NULL;
- ios_inode_ctx_get (inode, this, &iosstat);
- if (iosstat) {
- BUMP_STATS (iosstat, IOS_STATS_TYPE_WRITE);
- BUMP_THROUGHPUT (iosstat, IOS_STATS_THRU_WRITE);
- inode = NULL;
- iosstat = NULL;
- }
- }
-
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, COPY_FILE_RANGE);
+ STACK_UNWIND_STRICT(copy_file_range, frame, op_ret, op_errno, stbuf,
+ prebuf_dst, postbuf_dst, xdata);
+ return 0;
}
-
-
-
int
-io_stats_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata)
+io_stats_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
+ dict_t *xdata)
{
- struct ios_stat *iosstat = NULL;
- inode_t *inode = frame->local;
+ struct ios_stat *iosstat = NULL;
+ inode_t *inode = frame->local;
- frame->local = NULL;
+ frame->local = NULL;
- UPDATE_PROFILE_STATS (frame, READDIRP);
+ UPDATE_PROFILE_STATS(frame, READDIRP);
- ios_inode_ctx_get (inode, this, &iosstat);
+ ios_inode_ctx_get(inode, this, &iosstat);
- if (iosstat) {
- BUMP_STATS (iosstat, IOS_STATS_TYPE_READDIRP);
- iosstat = NULL;
- }
+ if (iosstat) {
+ ios_bump_stats(this, iosstat, IOS_STATS_TYPE_READDIRP);
+ iosstat = NULL;
+ }
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
-
int
-io_stats_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata)
+io_stats_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
+ dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, READDIR);
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, READDIR);
+ STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
-
int
-io_stats_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+io_stats_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, FSYNC);
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, FSYNC);
+ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
-
int
-io_stats_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preop, struct iatt *postop, dict_t *xdata)
+io_stats_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preop,
+ struct iatt *postop, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, SETATTR);
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, preop, postop, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, SETATTR);
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, preop, postop, xdata);
+ return 0;
}
-
int
-io_stats_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+io_stats_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, UNLINK);
- STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- preparent, postparent, xdata);
- return 0;
-
+ UPDATE_PROFILE_STATS(frame, UNLINK);
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
}
-
int
-io_stats_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata)
+io_stats_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, RENAME);
- STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
- preoldparent, postoldparent,
- prenewparent, postnewparent, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, RENAME);
+ STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
+ return 0;
}
-
int
-io_stats_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *buf,
- struct iatt *sbuf, dict_t *xdata)
+io_stats_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *buf,
+ struct iatt *sbuf, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, READLINK);
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, sbuf, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, READLINK);
+ STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, buf, sbuf, xdata);
+ return 0;
}
-
int
-io_stats_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- dict_t *xdata, struct iatt *postparent)
+io_stats_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- UPDATE_PROFILE_STATS (frame, LOOKUP);
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf, xdata,
- postparent);
- return 0;
+ UPDATE_PROFILE_STATS(frame, LOOKUP);
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
}
-
int
-io_stats_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+io_stats_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, SYMLINK);
- STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, SYMLINK);
+ STACK_UNWIND_STRICT(symlink, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
}
-
int
-io_stats_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+io_stats_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, MKNOD);
- STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, MKNOD);
+ STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
}
-
int
-io_stats_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent,
- dict_t *xdata)
+io_stats_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- struct ios_stat *iosstat = NULL;
- char *path = frame->local;
+ char *path = frame->local;
- UPDATE_PROFILE_STATS (frame, MKDIR);
- if (op_ret < 0)
- goto unwind;
+ if (!path)
+ goto unwind;
- iosstat = GF_CALLOC (1, sizeof (*iosstat), gf_io_stats_mt_ios_stat);
- if (iosstat) {
- LOCK_INIT (&iosstat->lock);
- iosstat->filename = gf_strdup(path);
- uuid_copy (iosstat->gfid, buf->ia_gfid);
- ios_inode_ctx_set (inode, this, iosstat);
- }
+ UPDATE_PROFILE_STATS(frame, MKDIR);
+ if (op_ret < 0)
+ goto unwind;
+
+ /* allocate a struct ios_stat and set the inode ctx */
+ ios_init_iosstat(this, path, buf->ia_gfid, inode);
unwind:
- /* local is assigned with path */
- if (frame->local)
- GF_FREE (frame->local);
- frame->local = NULL;
- STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
+ /* local is assigned with path */
+ GF_FREE(frame->local);
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(mkdir, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
}
-
int
-io_stats_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+io_stats_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, LINK);
- STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, LINK);
+ STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
}
-
int
-io_stats_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+io_stats_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, FLUSH);
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, FLUSH);
+ STACK_UNWIND_STRICT(flush, frame, op_ret, op_errno, xdata);
+ return 0;
}
-
int
-io_stats_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+io_stats_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- struct ios_stat *iosstat = NULL;
- int ret = -1;
+ struct ios_stat *iosstat = NULL;
+ int ret = -1;
- UPDATE_PROFILE_STATS (frame, OPENDIR);
- if (op_ret < 0)
- goto unwind;
+ UPDATE_PROFILE_STATS(frame, OPENDIR);
+ if (op_ret < 0)
+ goto unwind;
- ios_fd_ctx_set (fd, this, 0);
+ ios_fd_ctx_set(fd, this, 0);
- ret = ios_inode_ctx_get (fd->inode, this, &iosstat);
- if (!ret)
- BUMP_STATS (iosstat, IOS_STATS_TYPE_OPENDIR);
+ ret = ios_inode_ctx_get(fd->inode, this, &iosstat);
+ if (!ret)
+ ios_bump_stats(this, iosstat, IOS_STATS_TYPE_OPENDIR);
unwind:
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
- return 0;
+ STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
-
int
-io_stats_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+io_stats_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
+ UPDATE_PROFILE_STATS(frame, RMDIR);
- UPDATE_PROFILE_STATS (frame, RMDIR);
-
- STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- preparent, postparent, xdata);
- return 0;
+ STACK_UNWIND_STRICT(rmdir, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
}
+int
+io_stats_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, TRUNCATE);
+ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
int
-io_stats_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+io_stats_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, TRUNCATE);
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, STATFS);
+ STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
+int
+io_stats_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, SETXATTR);
+ STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
int
-io_stats_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
+io_stats_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, STATFS);
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, GETXATTR);
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
+int
+io_stats_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, REMOVEXATTR);
+ STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
int
-io_stats_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+io_stats_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, SETXATTR);
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, FSETXATTR);
+ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
-
int
-io_stats_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+io_stats_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, GETXATTR);
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, FGETXATTR);
+ STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
-
int
-io_stats_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+io_stats_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, REMOVEXATTR);
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, FREMOVEXATTR);
+ STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
-io_stats_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+io_stats_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, FSETXATTR);
- STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, FSYNCDIR);
+ STACK_UNWIND_STRICT(fsyncdir, frame, op_ret, op_errno, xdata);
+ return 0;
}
-
int
-io_stats_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+io_stats_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, FGETXATTR);
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, ACCESS);
+ STACK_UNWIND_STRICT(access, frame, op_ret, op_errno, xdata);
+ return 0;
}
-
int
-io_stats_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+io_stats_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, FREMOVEXATTR);
- STACK_UNWIND_STRICT (fremovexattr, frame, op_ret, op_errno, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, FTRUNCATE);
+ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
-
int
-io_stats_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+io_stats_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, FSYNCDIR);
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, FSTAT);
+ STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
-
int
-io_stats_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+io_stats_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, ACCESS);
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, FALLOCATE);
+ STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
-
int
-io_stats_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+io_stats_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, FTRUNCATE);
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, DISCARD);
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
-
int
-io_stats_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+io_stats_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, FSTAT);
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, ZEROFILL);
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
+int32_t
+io_stats_ipc_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, IPC);
+ STACK_UNWIND_STRICT(ipc, frame, op_ret, op_errno, xdata);
+ return 0;
+}
int
-io_stats_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+io_stats_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_flock *lock,
+ dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, LK);
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, LK);
+ STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
}
+int
+io_stats_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, ENTRYLK);
+ STACK_UNWIND_STRICT(entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
int
-io_stats_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+io_stats_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, ENTRYLK);
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, FENTRYLK);
+ STACK_UNWIND_STRICT(fentrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-
int
-io_stats_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+io_stats_rchecksum_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, XATTROP);
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, RCHECKSUM);
+ STACK_UNWIND_STRICT(rchecksum, frame, op_ret, op_errno, weak_checksum,
+ strong_checksum, xdata);
+ return 0;
}
+int
+io_stats_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, off_t offset, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, SEEK);
+ STACK_UNWIND_STRICT(seek, frame, op_ret, op_errno, offset, xdata);
+ return 0;
+}
int
-io_stats_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+io_stats_lease_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct gf_lease *lease,
+ dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, FXATTROP);
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, LEASE);
+ STACK_UNWIND_STRICT(lease, frame, op_ret, op_errno, lease, xdata);
+ return 0;
}
+int
+io_stats_getactivelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ lock_migration_info_t *locklist, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, GETACTIVELK);
+ STACK_UNWIND_STRICT(getactivelk, frame, op_ret, op_errno, locklist, xdata);
+ return 0;
+}
int
-io_stats_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+io_stats_setactivelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- UPDATE_PROFILE_STATS (frame, INODELK);
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
- return 0;
+ UPDATE_PROFILE_STATS(frame, SETACTIVELK);
+ STACK_UNWIND_STRICT(setactivelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
-io_stats_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+io_stats_compound_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, void *data,
+ dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ UPDATE_PROFILE_STATS(frame, COMPOUND);
+ STACK_UNWIND_STRICT(compound, frame, op_ret, op_errno, data, xdata);
+ return 0;
+}
- STACK_WIND (frame, io_stats_entrylk_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->entrylk,
- volume, loc, basename, cmd, type, xdata);
- return 0;
+int
+io_stats_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, XATTROP);
+ STACK_UNWIND_STRICT(xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
}
+int
+io_stats_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, FXATTROP);
+ STACK_UNWIND_STRICT(fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
int
-io_stats_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+io_stats_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ UPDATE_PROFILE_STATS(frame, INODELK);
+ STACK_UNWIND_STRICT(inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
- START_FOP_LATENCY (frame);
+int
+io_stats_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_inodelk_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->inodelk,
- volume, loc, cmd, flock, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, volume, loc, basename, cmd,
+ type, xdata);
+ return 0;
}
-
int
-io_stats_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+io_stats_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
+ START_FOP_LATENCY(frame);
- UPDATE_PROFILE_STATS (frame, FINODELK);
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_fentrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fentrylk, volume, fd, basename, cmd,
+ type, xdata);
+ return 0;
}
-
int
-io_stats_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+io_stats_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_finodelk_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->finodelk,
- volume, fd, cmd, flock, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_inodelk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk, volume, loc, cmd, flock,
+ xdata);
+ return 0;
}
+int
+io_stats_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ UPDATE_PROFILE_STATS(frame, FINODELK);
+ STACK_UNWIND_STRICT(finodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
int
-io_stats_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+io_stats_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_xattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_finodelk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk, volume, fd, cmd, flock,
+ xdata);
+ return 0;
}
-
int
-io_stats_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+io_stats_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_fxattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata);
+ return 0;
}
-
int
-io_stats_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xdata)
+io_stats_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_fxattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata);
+ return 0;
}
-
int
-io_stats_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+io_stats_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
}
-
int
-io_stats_readlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, size_t size, dict_t *xdata)
+io_stats_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_readlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink,
- loc, size, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
}
-
int
-io_stats_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
+io_stats_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_mknod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod,
- loc, mode, dev, umask, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_readlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
+ return 0;
}
+int
+io_stats_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, dev, umask, xdata);
+ return 0;
+}
int
-io_stats_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
+io_stats_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- frame->local = gf_strdup (loc->path);
+ if (loc->path)
+ frame->local = gf_strdup(loc->path);
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_mkdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- loc, mode, umask, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ return 0;
}
-
int
-io_stats_unlink (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata)
+io_stats_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc, xflag, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
}
-
int
-io_stats_rmdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int flags, dict_t *xdata)
+io_stats_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_rmdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- loc, flags, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
+ return 0;
}
-
int
-io_stats_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, mode_t umask, dict_t *xdata)
+io_stats_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, umask, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask, xdata);
+ return 0;
}
-
int
-io_stats_rename (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+io_stats_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_rename_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename,
- oldloc, newloc, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ return 0;
}
-
int
-io_stats_link (call_frame_t *frame, xlator_t *this,
- loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+io_stats_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_link_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link,
- oldloc, newloc, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
}
-
int
-io_stats_setattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, struct iatt *stbuf, int32_t valid, dict_t *xdata)
+io_stats_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_setattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
}
-
int
-io_stats_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset, dict_t *xdata)
+io_stats_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc, offset, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
}
-
int
-io_stats_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, dict_t *xdata)
+io_stats_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- frame->local = gf_strdup (loc->path);
+ if (loc->path)
+ frame->local = gf_strdup(loc->path);
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_open_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc, flags, fd, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
}
-
int
-io_stats_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata)
+io_stats_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- frame->local = gf_strdup (loc->path);
+ if (loc->path)
+ frame->local = gf_strdup(loc->path);
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, umask, fd, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
}
-
int
-io_stats_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+io_stats_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- frame->local = fd;
+ frame->local = fd;
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_readv_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd, size, offset, flags, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+ return 0;
}
-
int
-io_stats_writev (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iovec *vector,
- int32_t count, off_t offset,
- uint32_t flags, struct iobref *iobref, dict_t *xdata)
+io_stats_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- int len = 0;
+ int len = 0;
- if (fd->inode)
- frame->local = fd->inode;
- len = iov_length (vector, count);
-
- BUMP_WRITE (fd, len);
- START_FOP_LATENCY (frame);
+ if (fd->inode)
+ frame->local = fd->inode;
+ len = iov_length(vector, count);
- STACK_WIND (frame, io_stats_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, flags, iobref, xdata);
- return 0;
+ ios_bump_write(this, fd, len);
+ START_FOP_LATENCY(frame);
+ STACK_WIND(frame, io_stats_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
+ return 0;
}
-
int
-io_stats_statfs (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xdata)
+io_stats_copy_file_range(call_frame_t *frame, xlator_t *this, fd_t *fd_in,
+ off_t off_in, fd_t *fd_out, off_t off_out, size_t len,
+ uint32_t flags, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_statfs_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs,
- loc, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_copy_file_range_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->copy_file_range, fd_in, off_in, fd_out,
+ off_out, len, flags, xdata);
+ return 0;
}
-
int
-io_stats_flush (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *xdata)
+io_stats_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_flush_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
}
+int
+io_stats_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+}
int
-io_stats_fsync (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t flags, dict_t *xdata)
+io_stats_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd, flags, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
}
+int
+conditional_dump(dict_t *dict, char *key, data_t *value, void *data)
+{
+ struct {
+ xlator_t *this;
+ inode_t *inode;
+ const char *path;
+ } * stub;
+ xlator_t *this = NULL;
+ char *filename = NULL;
+ FILE *logfp = NULL;
+ struct ios_dump_args args = {0};
+ int pid, namelen, dirlen;
+ char dump_key[100];
+ char *slash_ptr = NULL;
+ char *path_in_value = NULL;
+ char *identifier = NULL;
+ struct ios_conf *conf = NULL;
+
+ stub = data;
+ this = stub->this;
+ conf = this->private;
+
+ /* Don't do this on 'brick-side', only do this on client side */
+ /* Addresses CVE-2018-14659 */
+ if (this->ctx->process_mode != GF_CLIENT_PROCESS) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "taking io-stats dump using setxattr not permitted on brick."
+ " Use 'gluster profile' instead");
+ return -1;
+ }
+
+ /* Create a file name that is appended with the io-stats instance
+ name as well. This helps when there is more than a single io-stats
+ instance in the graph, or the client and server processes are running
+ on the same node */
+ /* For the sanity of where the file should be located, we should make
+ sure file is written only inside RUNDIR (ie, /var/run/gluster) */
+ /* TODO: provide an option to dump it to different directory of
+ choice, based on options */
+ /* name format: /var/run/gluster/<passed in path/filename>.<xlator name
+ * slashes to -> */
+
+ path_in_value = alloca0(value->len + 1);
+
+ /* We need a memcpy here because of the way dict_unserialize works */
+
+ memcpy(path_in_value, data_to_str(value), value->len);
+ path_in_value[value->len] = '\0';
+
+ if (strstr(path_in_value, "../")) {
+ gf_log(this->name, GF_LOG_ERROR, "%s: no \"../\" allowed in path",
+ path_in_value);
+ return -1;
+ }
+
+ if (path_in_value[0] == '/') {
+ path_in_value = path_in_value + 1;
+ }
+
+ dirlen = strlen(IOS_STATS_DUMP_DIR);
+ if (conf->unique_id) {
+ /* this->name will be the same for all bricks of the volume */
+ identifier = conf->unique_id;
+ } else {
+ identifier = this->name;
+ }
+
+ namelen = (dirlen + value->len + strlen(identifier) + 3);
+ /* +3 for '/', '.' and '\0' added in snprintf below*/
+
+ filename = alloca0(namelen);
+ snprintf(filename, namelen, "%s/%s.%s", IOS_STATS_DUMP_DIR, path_in_value,
+ identifier);
+
+ /* convert any slashes to '-' so that fopen works correctly */
+ slash_ptr = strchr(filename + dirlen + 1, '/');
+ while (slash_ptr) {
+ *slash_ptr = '-';
+ slash_ptr = strchr(slash_ptr, '/');
+ }
+
+ pid = getpid();
+
+ if (!strncmp(filename, "", 1)) {
+ gf_log(this->name, GF_LOG_ERROR, "No filename given");
+ return -1;
+ }
+ logfp = fopen(filename, "w+");
+ if (!logfp) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to open %s "
+ "for writing",
+ filename);
+ return -1;
+ }
+ sprintf(dump_key, "*io*stat*%d_json_dump", pid);
+ if (fnmatch(dump_key, key, 0) == 0) {
+ (void)ios_dump_args_init(&args, IOS_DUMP_TYPE_JSON_FILE, logfp);
+ } else {
+ (void)ios_dump_args_init(&args, IOS_DUMP_TYPE_FILE, logfp);
+ }
+ io_stats_dump(this, &args, GF_IOS_INFO_ALL, _gf_false);
+ fclose(logfp);
+ return 0;
+}
-void
-conditional_dump (dict_t *dict, char *key, data_t *value, void *data)
-{
- struct {
- xlator_t *this;
- inode_t *inode;
- const char *path;
- } *stub;
- xlator_t *this = NULL;
- char *filename = NULL;
- FILE *logfp = NULL;
- struct ios_dump_args args = {0};
-
- stub = data;
- this = stub->this;
-
- filename = alloca (value->len + 1);
- memset (filename, 0, value->len + 1);
- memcpy (filename, data_to_str (value), value->len);
-
- if (fnmatch ("*io*stat*dump", key, 0) == 0) {
-
- if (!strncmp (filename, "", 1)) {
- gf_log (this->name, GF_LOG_ERROR, "No filename given");
- return;
- }
- logfp = fopen (filename, "w+");
- GF_ASSERT (logfp);
- if (!logfp) {
- gf_log (this->name, GF_LOG_ERROR, "failed to open %s "
- "for writing", filename);
- return;
- }
- (void) ios_dump_args_init (&args, IOS_DUMP_TYPE_FILE,
- logfp);
- io_stats_dump (this, &args);
- fclose (logfp);
+int
+_ios_destroy_dump_thread(struct ios_conf *conf)
+{
+ conf->dump_thread_should_die = _gf_true;
+ if (conf->dump_thread_running) {
+ (void)pthread_cancel(conf->dump_thread);
+ (void)pthread_join(conf->dump_thread, NULL);
+ }
+ return 0;
+}
+
+void *
+_ios_dump_thread(xlator_t *this)
+{
+ struct ios_conf *conf = NULL;
+ FILE *stats_logfp = NULL;
+ FILE *samples_logfp = NULL;
+ struct ios_dump_args args = {0};
+ int i;
+ int stats_bytes_written = 0;
+ int samples_bytes_written = 0;
+ char stats_filename[PATH_MAX];
+ char samples_filename[PATH_MAX];
+ char *xlator_name;
+ char *instance_name;
+ gf_boolean_t log_stats_fopen_failure = _gf_true;
+ gf_boolean_t log_samples_fopen_failure = _gf_true;
+ int old_cancel_type;
+
+ conf = this->private;
+ gf_log(this->name, GF_LOG_INFO,
+ "IO stats dump thread started, "
+ "polling IO stats every %d seconds",
+ conf->ios_dump_interval);
+ xlator_name = strdupa(conf->unique_id);
+ for (i = 0; i < strlen(xlator_name); i++) {
+ if (xlator_name[i] == '/')
+ xlator_name[i] = '_';
+ }
+ instance_name = this->instance_name;
+ if (this->name && strcmp(this->name, "glustershd") == 0) {
+ xlator_name = "shd";
+ } else if (this->prev && strcmp(this->prev->name, "nfs-server") == 0) {
+ xlator_name = "nfsd";
+ instance_name = this->prev->instance_name;
+ }
+ if (sys_mkdir(_IOS_DUMP_DIR, S_IRWXU | S_IRWXO | S_IRWXG) == (-1)) {
+ if (errno != EEXIST) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "could not create stats-dump directory %s", _IOS_DUMP_DIR);
+ goto out;
+ }
+ }
+ if (sys_mkdir(_IOS_SAMP_DIR, S_IRWXU | S_IRWXO | S_IRWXG) == (-1)) {
+ if (errno != EEXIST) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "could not create stats-sample directory %s", _IOS_SAMP_DIR);
+ goto out;
}
+ }
+ if (instance_name) {
+ stats_bytes_written = snprintf(stats_filename, PATH_MAX,
+ "%s/%s_%s_%s.dump", _IOS_DUMP_DIR,
+ __progname, xlator_name, instance_name);
+ samples_bytes_written = snprintf(
+ samples_filename, PATH_MAX, "%s/%s_%s_%s.samp", _IOS_SAMP_DIR,
+ __progname, xlator_name, instance_name);
+ } else {
+ stats_bytes_written = snprintf(stats_filename, PATH_MAX,
+ "%s/%s_%s.dump", _IOS_DUMP_DIR,
+ __progname, xlator_name);
+ samples_bytes_written = snprintf(samples_filename, PATH_MAX,
+ "%s/%s_%s.samp", _IOS_SAMP_DIR,
+ __progname, xlator_name);
+ }
+ if ((stats_bytes_written >= PATH_MAX) ||
+ (samples_bytes_written >= PATH_MAX)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Invalid path for stats dump (%s) and/or latency "
+ "samples (%s)",
+ stats_filename, samples_filename);
+ goto out;
+ }
+ while (1) {
+ if (conf->dump_thread_should_die)
+ break;
+ (void)pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS,
+ &old_cancel_type);
+ sleep(conf->ios_dump_interval);
+ (void)pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, &old_cancel_type);
+ /*
+ * It's not clear whether we should reopen this each time, or
+ * just hold it open and rewind/truncate on each iteration.
+ * Leaving it alone for now.
+ */
+ stats_logfp = fopen(stats_filename, "w+");
+ if (stats_logfp) {
+ (void)ios_dump_args_init(&args, conf->dump_format, stats_logfp);
+ io_stats_dump(this, &args, GF_IOS_INFO_ALL, _gf_false);
+ fclose(stats_logfp);
+ log_stats_fopen_failure = _gf_true;
+ } else if (log_stats_fopen_failure) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "could not open stats-dump file %s (%s)", stats_filename,
+ strerror(errno));
+ log_stats_fopen_failure = _gf_false;
+ }
+ samples_logfp = fopen(samples_filename, "w+");
+ if (samples_logfp) {
+ io_stats_dump_latency_samples_logfp(this, samples_logfp);
+ fclose(samples_logfp);
+ log_samples_fopen_failure = _gf_true;
+ } else if (log_samples_fopen_failure) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "could not open samples-dump file %s (%s)", samples_filename,
+ strerror(errno));
+ log_samples_fopen_failure = _gf_false;
+ }
+ }
+out:
+ conf->dump_thread_running = _gf_false;
+ gf_log(this->name, GF_LOG_INFO, "IO stats dump thread terminated");
+ return NULL;
}
+static gf_boolean_t
+match_special_xattr(dict_t *d, char *k, data_t *val, void *mdata)
+{
+ gf_boolean_t ret = _gf_false;
+ if (fnmatch("*io*stat*dump", k, 0) == 0) {
+ ret = _gf_true;
+ }
+
+ return ret;
+}
int
-io_stats_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
+io_stats_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- struct {
- xlator_t *this;
- inode_t *inode;
- const char *path;
- } stub;
+ struct {
+ xlator_t *this;
+ inode_t *inode;
+ const char *path;
+ } stub;
- stub.this = this;
- stub.inode = loc->inode;
- stub.path = loc->path;
+ stub.this = this;
+ stub.inode = loc->inode;
+ stub.path = loc->path;
- dict_foreach (dict, conditional_dump, &stub);
+ (void)dict_foreach_match(dict, match_special_xattr, NULL, conditional_dump,
+ &stub);
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_setxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
+ return 0;
}
-
int
-io_stats_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
+io_stats_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_getxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- loc, name, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
}
-
int
-io_stats_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
+io_stats_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_removexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- loc, name, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
}
-
int
-io_stats_fsetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
+io_stats_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_fsetxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr,
- fd, dict, flags, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
}
-
int
-io_stats_fgetxattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata)
+io_stats_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_fgetxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr,
- fd, name, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
}
-
int
-io_stats_fremovexattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata)
+io_stats_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_fremovexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr,
- fd, name, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ return 0;
}
-
int
-io_stats_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd, dict_t *xdata)
+io_stats_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
+ START_FOP_LATENCY(frame);
- START_FOP_LATENCY (frame);
+ STACK_WIND(frame, io_stats_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+}
- STACK_WIND (frame, io_stats_opendir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir,
- loc, fd, xdata);
- return 0;
+int
+io_stats_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ frame->local = fd->inode;
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict);
+ return 0;
}
int
-io_stats_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *dict)
+io_stats_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- frame->local = fd->inode;
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_readdirp_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp,
- fd, size, offset, dict);
- return 0;
+ STACK_WIND(frame, io_stats_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
+ return 0;
}
+int
+io_stats_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t datasync, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_fsyncdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir, fd, datasync, xdata);
+ return 0;
+}
int
-io_stats_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, dict_t *xdata)
+io_stats_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_readdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir,
- fd, size, offset, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_access_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access, loc, mask, xdata);
+ return 0;
}
+int
+io_stats_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+}
int
-io_stats_fsyncdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t datasync, dict_t *xdata)
+io_stats_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_fsyncdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir,
- fd, datasync, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
}
+int
+io_stats_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+}
int
-io_stats_access (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t mask, dict_t *xdata)
+io_stats_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_access_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access,
- loc, mask, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+
+ return 0;
}
+int
+io_stats_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+
+ return 0;
+}
int
-io_stats_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset, dict_t *xdata)
+io_stats_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd, offset, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+
+ return 0;
}
+int32_t
+io_stats_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_ipc_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ipc, op, xdata);
+ return 0;
+}
int
-io_stats_fsetattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, struct iatt *stbuf, int32_t valid, dict_t *xdata)
+io_stats_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_setattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetattr,
- fd, stbuf, valid, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_lk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata);
+ return 0;
}
+int
+io_stats_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_rchecksum_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rchecksum, fd, offset, len, xdata);
+ return 0;
+}
int
-io_stats_fstat (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *xdata)
+io_stats_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_seek_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->seek, fd, offset, what, xdata);
+ return 0;
}
+int
+io_stats_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_lease_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lease, loc, lease, xdata);
+ return 0;
+}
int
-io_stats_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+io_stats_getactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
{
- START_FOP_LATENCY (frame);
+ START_FOP_LATENCY(frame);
- STACK_WIND (frame, io_stats_lk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk,
- fd, cmd, lock, xdata);
- return 0;
+ STACK_WIND(frame, io_stats_getactivelk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getactivelk, loc, xdata);
+ return 0;
}
+int
+io_stats_setactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata)
+{
+ START_FOP_LATENCY(frame);
+
+ STACK_WIND(frame, io_stats_setactivelk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setactivelk, loc, locklist, xdata);
+ return 0;
+}
int
-io_stats_release (xlator_t *this, fd_t *fd)
+io_stats_compound(call_frame_t *frame, xlator_t *this, void *args,
+ dict_t *xdata)
{
- struct ios_fd *iosfd = NULL;
- struct ios_conf *conf = NULL;
+ START_FOP_LATENCY(frame);
- BUMP_FOP (RELEASE);
+ STACK_WIND(frame, io_stats_compound_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->compound, args, xdata);
+ return 0;
+}
- conf = this->private;
+int
+io_stats_release(xlator_t *this, fd_t *fd)
+{
+ struct ios_fd *iosfd = NULL;
+ struct ios_conf *conf = NULL;
- LOCK (&conf->lock);
+ BUMP_FOP(RELEASE);
+
+ conf = this->private;
+ if (conf) {
+ LOCK(&conf->lock);
{
- conf->cumulative.nr_opens--;
+ conf->cumulative.nr_opens--;
}
- UNLOCK (&conf->lock);
+ UNLOCK(&conf->lock);
+ }
- ios_fd_ctx_get (fd, this, &iosfd);
- if (iosfd) {
- io_stats_dump_fd (this, iosfd);
+ ios_fd_ctx_get(fd, this, &iosfd);
+ if (iosfd) {
+ io_stats_dump_fd(this, iosfd);
- if (iosfd->filename)
- GF_FREE (iosfd->filename);
- GF_FREE (iosfd);
- }
+ GF_FREE(iosfd->filename);
+ GF_FREE(iosfd);
+ }
- return 0;
+ return 0;
}
-
int
-io_stats_releasedir (xlator_t *this, fd_t *fd)
+io_stats_releasedir(xlator_t *this, fd_t *fd)
{
- BUMP_FOP (RELEASEDIR);
+ BUMP_FOP(RELEASEDIR);
- return 0;
+ return 0;
}
-
int
-io_stats_forget (xlator_t *this, inode_t *inode)
+io_stats_forget(xlator_t *this, inode_t *inode)
{
- BUMP_FOP (FORGET);
- ios_stats_cleanup (this, inode);
- return 0;
+ BUMP_FOP(FORGET);
+ ios_stats_cleanup(this, inode);
+ return 0;
}
static int
-ios_init_top_stats (struct ios_conf *conf)
+ios_init_top_stats(struct ios_conf *conf)
{
- int i = 0;
+ int i = 0;
- GF_ASSERT (conf);
+ GF_ASSERT(conf);
- for (i = 0; i <IOS_STATS_TYPE_MAX; i++) {
- conf->list[i].iosstats = GF_CALLOC (1,
- sizeof(*conf->list[i].iosstats),
- gf_io_stats_mt_ios_stat);
+ for (i = 0; i < IOS_STATS_TYPE_MAX; i++) {
+ conf->list[i].iosstats = GF_CALLOC(1, sizeof(*conf->list[i].iosstats),
+ gf_io_stats_mt_ios_stat);
- if (!conf->list[i].iosstats)
- return -1;
+ if (!conf->list[i].iosstats)
+ return -1;
- INIT_LIST_HEAD(&conf->list[i].iosstats->list);
- LOCK_INIT (&conf->list[i].lock);
- }
+ INIT_LIST_HEAD(&conf->list[i].iosstats->list);
+ LOCK_INIT(&conf->list[i].lock);
+ }
- for (i = 0; i < IOS_STATS_THRU_MAX; i ++) {
- conf->thru_list[i].iosstats = GF_CALLOC (1,
- sizeof (*conf->thru_list[i].iosstats),
- gf_io_stats_mt_ios_stat);
+ for (i = 0; i < IOS_STATS_THRU_MAX; i++) {
+ conf->thru_list[i].iosstats = GF_CALLOC(
+ 1, sizeof(*conf->thru_list[i].iosstats), gf_io_stats_mt_ios_stat);
- if (!conf->thru_list[i].iosstats)
- return -1;
+ if (!conf->thru_list[i].iosstats)
+ return -1;
- INIT_LIST_HEAD(&conf->thru_list[i].iosstats->list);
- LOCK_INIT (&conf->thru_list[i].lock);
- }
+ INIT_LIST_HEAD(&conf->thru_list[i].iosstats->list);
+ LOCK_INIT(&conf->thru_list[i].lock);
+ }
- return 0;
+ return 0;
}
static void
-ios_destroy_top_stats (struct ios_conf *conf)
-{
- int i = 0;
- struct ios_stat_head *list_head = NULL;
- struct ios_stat_list *entry = NULL;
- struct ios_stat_list *tmp = NULL;
- struct ios_stat_list *list = NULL;
- struct ios_stat *stat = NULL;
-
- GF_ASSERT (conf);
-
- LOCK (&conf->lock);
-
- conf->cumulative.nr_opens = 0;
- conf->cumulative.max_nr_opens = 0;
- conf->cumulative.max_openfd_time.tv_sec = 0;
- conf->cumulative.max_openfd_time.tv_usec = 0;
-
- for (i = 0; i < IOS_STATS_TYPE_MAX; i++) {
- list_head = &conf->list[i];
- if (!list_head)
- continue;
- list_for_each_entry_safe (entry, tmp,
- &list_head->iosstats->list, list) {
- list = entry;
- stat = list->iosstat;
- ios_stat_unref (stat);
- list_del (&list->list);
- if (list)
- GF_FREE (list);
- list_head->members--;
- }
+ios_destroy_top_stats(struct ios_conf *conf)
+{
+ int i = 0;
+ struct ios_stat_head *list_head = NULL;
+ struct ios_stat_list *entry = NULL;
+ struct ios_stat_list *tmp = NULL;
+ struct ios_stat_list *list = NULL;
+ struct ios_stat *stat = NULL;
+
+ GF_ASSERT(conf);
+
+ LOCK(&conf->lock);
+
+ conf->cumulative.nr_opens = 0;
+ conf->cumulative.max_nr_opens = 0;
+ conf->cumulative.max_openfd_time.tv_sec = 0;
+ conf->cumulative.max_openfd_time.tv_usec = 0;
+
+ for (i = 0; i < IOS_STATS_TYPE_MAX; i++) {
+ list_head = &conf->list[i];
+ if (!list_head)
+ continue;
+ list_for_each_entry_safe(entry, tmp, &list_head->iosstats->list, list)
+ {
+ list = entry;
+ stat = list->iosstat;
+ ios_stat_unref(stat);
+ list_del(&list->list);
+ GF_FREE(list);
+ list_head->members--;
}
-
- for (i = 0; i < IOS_STATS_THRU_MAX; i++) {
- list_head = &conf->thru_list[i];
- if (!list_head)
- continue;
- list_for_each_entry_safe (entry, tmp,
- &list_head->iosstats->list, list) {
- list = entry;
- stat = list->iosstat;
- ios_stat_unref (stat);
- list_del (&list->list);
- if (list)
- GF_FREE (list);
- list_head->members--;
- }
+ GF_FREE(list_head->iosstats);
+ }
+
+ for (i = 0; i < IOS_STATS_THRU_MAX; i++) {
+ list_head = &conf->thru_list[i];
+ if (!list_head)
+ continue;
+ list_for_each_entry_safe(entry, tmp, &list_head->iosstats->list, list)
+ {
+ list = entry;
+ stat = list->iosstat;
+ ios_stat_unref(stat);
+ list_del(&list->list);
+ GF_FREE(list);
+ list_head->members--;
}
+ GF_FREE(list_head->iosstats);
+ }
- UNLOCK (&conf->lock);
+ UNLOCK(&conf->lock);
- return;
+ return;
}
-int
-reconfigure (xlator_t *this, dict_t *options)
+static void
+io_stats_clear(struct ios_conf *conf)
{
- struct ios_conf *conf = NULL;
- int ret = -1;
- char *sys_log_str = NULL;
- int sys_log_level = -1;
- char *log_str = NULL;
- int log_level = -1;
+ time_t now = 0;
+
+ GF_ASSERT(conf);
+ now = gf_time();
+
+ LOCK(&conf->lock);
+ {
+ ios_global_stats_clear(&conf->cumulative, now);
+ ios_global_stats_clear(&conf->incremental, now);
+ conf->increment = 0;
+ }
+ UNLOCK(&conf->lock);
+}
- if (!this || !this->private)
- goto out;
+int32_t
+io_priv(xlator_t *this)
+{
+ int i;
+ char key[GF_DUMP_MAX_BUF_LEN];
+ char key_prefix_cumulative[GF_DUMP_MAX_BUF_LEN];
+ char key_prefix_incremental[GF_DUMP_MAX_BUF_LEN];
+ double min, max, avg;
+ uint64_t count, total;
+ struct ios_conf *conf = NULL;
+
+ conf = this->private;
+ if (!conf)
+ return -1;
+
+ if (!conf->count_fop_hits || !conf->measure_latency)
+ return -1;
+
+ gf_proc_dump_write("cumulative.data_read", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(conf->cumulative.data_read));
+ gf_proc_dump_write("cumulative.data_written", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(conf->cumulative.data_written));
+
+ gf_proc_dump_write("incremental.data_read", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(conf->incremental.data_read));
+ gf_proc_dump_write("incremental.data_written", "%" GF_PRI_ATOMIC,
+ GF_ATOMIC_GET(conf->incremental.data_written));
+
+ snprintf(key_prefix_cumulative, GF_DUMP_MAX_BUF_LEN, "%s.cumulative",
+ this->name);
+ snprintf(key_prefix_incremental, GF_DUMP_MAX_BUF_LEN, "%s.incremental",
+ this->name);
+
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ count = GF_ATOMIC_GET(conf->cumulative.fop_hits[i]);
+ total = conf->cumulative.latency[i].total;
+ min = conf->cumulative.latency[i].min;
+ max = conf->cumulative.latency[i].max;
+ avg = conf->cumulative.latency[i].avg;
+
+ gf_proc_dump_build_key(key, key_prefix_cumulative, "%s",
+ (char *)gf_fop_list[i]);
+
+ gf_proc_dump_write(key, "%" PRId64 ",%" PRId64 ",%.03f,%.03f,%.03f",
+ count, total, min, max, avg);
+
+ count = GF_ATOMIC_GET(conf->incremental.fop_hits[i]);
+ total = conf->incremental.latency[i].total;
+ min = conf->incremental.latency[i].min;
+ max = conf->incremental.latency[i].max;
+ avg = conf->incremental.latency[i].avg;
+
+ gf_proc_dump_build_key(key, key_prefix_incremental, "%s",
+ (char *)gf_fop_list[i]);
+
+ gf_proc_dump_write(key, "%" PRId64 ",%" PRId64 ",%.03f,%.03f,%.03f",
+ count, total, min, max, avg);
+ }
+
+ return 0;
+}
- conf = this->private;
+static void
+ios_set_log_format_code(struct ios_conf *conf, char *dump_format_str)
+{
+ if (strcmp(dump_format_str, "json") == 0)
+ conf->dump_format = IOS_DUMP_TYPE_JSON_FILE;
+ else if (strcmp(dump_format_str, "text") == 0)
+ conf->dump_format = IOS_DUMP_TYPE_FILE;
+ else if (strcmp(dump_format_str, "dict") == 0)
+ conf->dump_format = IOS_DUMP_TYPE_DICT;
+ else if (strcmp(dump_format_str, "samples") == 0)
+ conf->dump_format = IOS_DUMP_TYPE_SAMPLES;
+}
- GF_OPTION_RECONF ("dump-fd-stats", conf->dump_fd_stats, options, bool,
- out);
+void
+xlator_set_loglevel(xlator_t *this, int log_level)
+{
+ glusterfs_ctx_t *ctx = NULL;
+ glusterfs_graph_t *active = NULL;
+ xlator_t *top = NULL;
+ xlator_t *trav = this;
- GF_OPTION_RECONF ("count-fop-hits", conf->count_fop_hits, options, bool,
- out);
+ ctx = this->ctx;
+ GF_ASSERT(ctx);
+ active = ctx->active;
+ top = active->first;
- GF_OPTION_RECONF ("latency-measurement", conf->measure_latency,
- options, bool, out);
+ if (log_level == -1)
+ return;
- GF_OPTION_RECONF ("sys-log-level", sys_log_str, options, str, out);
- if (sys_log_str) {
- sys_log_level = glusterd_check_log_level (sys_log_str);
- set_sys_log_level (sys_log_level);
- }
+ if (ctx->cmd_args.brick_mux) {
+ /* Set log-level for all brick xlators */
+ top->loglevel = log_level;
- GF_OPTION_RECONF ("log-level", log_str, options, str, out);
- if (log_str) {
- log_level = glusterd_check_log_level (log_str);
- gf_log_set_loglevel (log_level);
+ /* Set log-level for parent xlator */
+ if (this->parents)
+ this->parents->xlator->loglevel = log_level;
+
+ while (trav) {
+ trav->loglevel = log_level;
+ trav = trav->next;
}
+ } else {
+ gf_log_set_loglevel(this->ctx, log_level);
+ }
+}
- ret = 0;
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ struct ios_conf *conf = NULL;
+ int ret = -1;
+ char *sys_log_str = NULL;
+ char *log_format_str = NULL;
+ char *logger_str = NULL;
+ char *dump_format_str = NULL;
+ int sys_log_level = -1;
+ char *log_str = NULL;
+ int log_level = -1;
+ int log_format = -1;
+ int logger = -1;
+ uint32_t log_buf_size = 0;
+ uint32_t log_flush_timeout = 0;
+ int32_t old_dump_interval;
+ int32_t threads;
+
+ if (!this || !this->private)
+ goto out;
+
+ conf = this->private;
+
+ GF_OPTION_RECONF("dump-fd-stats", conf->dump_fd_stats, options, bool, out);
+
+ GF_OPTION_RECONF("count-fop-hits", conf->count_fop_hits, options, bool,
+ out);
+
+ GF_OPTION_RECONF("latency-measurement", conf->measure_latency, options,
+ bool, out);
+
+ old_dump_interval = conf->ios_dump_interval;
+ GF_OPTION_RECONF("ios-dump-interval", conf->ios_dump_interval, options,
+ int32, out);
+ if ((old_dump_interval <= 0) && (conf->ios_dump_interval > 0)) {
+ conf->dump_thread_running = _gf_true;
+ conf->dump_thread_should_die = _gf_false;
+ ret = gf_thread_create(&conf->dump_thread, NULL,
+ (void *)&_ios_dump_thread, this, "iosdump");
+ if (ret) {
+ conf->dump_thread_running = _gf_false;
+ gf_log(this ? this->name : "io-stats", GF_LOG_ERROR,
+ "Failed to start thread"
+ "while reconfigure. Returning %d",
+ ret);
+ goto out;
+ }
+ } else if ((old_dump_interval > 0) && (conf->ios_dump_interval == 0)) {
+ _ios_destroy_dump_thread(conf);
+ }
+
+ GF_OPTION_RECONF("ios-sample-interval", conf->ios_sample_interval, options,
+ int32, out);
+ GF_OPTION_RECONF("ios-dump-format", dump_format_str, options, str, out);
+ ios_set_log_format_code(conf, dump_format_str);
+ GF_OPTION_RECONF("ios-sample-buf-size", conf->ios_sample_buf_size, options,
+ int32, out);
+ GF_OPTION_RECONF("sys-log-level", sys_log_str, options, str, out);
+ if (sys_log_str) {
+ sys_log_level = glusterd_check_log_level(sys_log_str);
+ set_sys_log_level(sys_log_level);
+ }
+
+ GF_OPTION_RECONF("log-level", log_str, options, str, out);
+ if (log_str) {
+ log_level = glusterd_check_log_level(log_str);
+ /* Set loglevel for all children and server xlators */
+ xlator_set_loglevel(this, log_level);
+ }
+
+ GF_OPTION_RECONF("logger", logger_str, options, str, out);
+ if (logger_str) {
+ logger = gf_check_logger(logger_str);
+ gf_log_set_logger(logger);
+ }
+
+ GF_OPTION_RECONF("log-format", log_format_str, options, str, out);
+ if (log_format_str) {
+ log_format = gf_check_log_format(log_format_str);
+ gf_log_set_logformat(log_format);
+ }
+
+ GF_OPTION_RECONF("log-buf-size", log_buf_size, options, uint32, out);
+ gf_log_set_log_buf_size(log_buf_size);
+
+ GF_OPTION_RECONF("log-flush-timeout", log_flush_timeout, options, time,
+ out);
+ gf_log_set_log_flush_timeout(log_flush_timeout);
+
+ GF_OPTION_RECONF("threads", threads, options, int32, out);
+ gf_async_adjust_threads(threads);
+
+ ret = 0;
out:
- gf_log (this->name, GF_LOG_DEBUG, "reconfigure returning %d", ret);
- return ret;
+ gf_log(this ? this->name : "io-stats", GF_LOG_DEBUG,
+ "reconfigure returning %d", ret);
+ return ret;
}
-
int32_t
-mem_acct_init (xlator_t *this)
+mem_acct_init(xlator_t *this)
{
- int ret = -1;
-
- if (!this)
- return ret;
+ int ret = -1;
- ret = xlator_mem_acct_init (this, gf_io_stats_mt_end + 1);
+ if (!this)
+ return ret;
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- " failed");
- return ret;
- }
+ ret = xlator_mem_acct_init(this, gf_io_stats_mt_end + 1);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Memory accounting init"
+ " failed");
return ret;
+ }
+
+ return ret;
}
-int
-init (xlator_t *this)
+void
+ios_conf_destroy(struct ios_conf *conf)
{
- struct ios_conf *conf = NULL;
- char *sys_log_str = NULL;
- int sys_log_level = -1;
- char *log_str = NULL;
- int log_level = -1;
- int ret = -1;
-
- if (!this)
- return -1;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "io_stats translator requires atleast one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- /* This is very much valid as io-stats currently is loaded
- * on top of volumes on both client and server, hence this is
- * not an warning message */
- gf_log (this->name, GF_LOG_DEBUG,
- "dangling volume. check volfile ");
- }
-
- conf = GF_CALLOC (1, sizeof(*conf), gf_io_stats_mt_ios_conf);
-
- if (!conf) {
- gf_log (this->name, GF_LOG_ERROR,
- "Out of memory.");
- return -1;
- }
+ if (!conf)
+ return;
- LOCK_INIT (&conf->lock);
+ ios_destroy_top_stats(conf);
+ _ios_destroy_dump_thread(conf);
+ ios_destroy_sample_buf(conf->ios_sample_buf);
+ LOCK_DESTROY(&conf->lock);
+ gf_dnscache_deinit(conf->dnscache);
+ GF_FREE(conf);
+}
- gettimeofday (&conf->cumulative.started_at, NULL);
- gettimeofday (&conf->incremental.started_at, NULL);
+static void
+ios_init_stats(struct ios_global_stats *stats)
+{
+ int i = 0;
- ret = ios_init_top_stats (conf);
- if (ret)
- return -1;
+ GF_ATOMIC_INIT(stats->data_read, 0);
+ GF_ATOMIC_INIT(stats->data_written, 0);
- GF_OPTION_INIT ("dump-fd-stats", conf->dump_fd_stats, bool, out);
+ for (i = 0; i < IOS_BLOCK_COUNT_SIZE; i++) {
+ GF_ATOMIC_INIT(stats->block_count_write[i], 0);
+ GF_ATOMIC_INIT(stats->block_count_read[i], 0);
+ }
- GF_OPTION_INIT ("count-fop-hits", conf->count_fop_hits, bool, out);
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ GF_ATOMIC_INIT(stats->fop_hits[i], 0);
- GF_OPTION_INIT ("latency-measurement", conf->measure_latency,
- bool, out);
+ for (i = 0; i < GF_UPCALL_FLAGS_MAXVALUE; i++)
+ GF_ATOMIC_INIT(stats->upcall_hits[i], 0);
- GF_OPTION_INIT ("sys-log-level", sys_log_str, str, out);
- if (sys_log_str) {
- sys_log_level = glusterd_check_log_level (sys_log_str);
- set_sys_log_level (sys_log_level);
- }
+ stats->started_at = gf_time();
+}
- GF_OPTION_INIT ("log-level", log_str, str, out);
- if (log_str) {
- log_level = glusterd_check_log_level (log_str);
- gf_log_set_loglevel (log_level);
+int
+init(xlator_t *this)
+{
+ struct ios_conf *conf = NULL;
+ char *volume_id = NULL;
+ char *sys_log_str = NULL;
+ char *logger_str = NULL;
+ char *log_format_str = NULL;
+ char *dump_format_str = NULL;
+ int logger = -1;
+ int log_format = -1;
+ int sys_log_level = -1;
+ char *log_str = NULL;
+ int log_level = -1;
+ int ret = -1;
+ uint32_t log_buf_size = 0;
+ uint32_t log_flush_timeout = 0;
+ int32_t threads;
+
+ if (!this)
+ return -1;
+
+ if (!this->children) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "io_stats translator requires at least one subvolume");
+ return -1;
+ }
+
+ if (!this->parents) {
+ /* This is very much valid as io-stats currently is loaded
+ * on top of volumes on both client and server, hence this is
+ * not an warning message */
+ gf_log(this->name, GF_LOG_DEBUG, "dangling volume. check volfile ");
+ }
+
+ conf = GF_CALLOC(1, sizeof(*conf), gf_io_stats_mt_ios_conf);
+
+ if (!conf)
+ goto out;
+
+ if (dict_get_str(this->options, "unique-id", &conf->unique_id) != 0) {
+ /* This is always set on servers, so we must be a client. */
+ conf->unique_id = this->name;
+ }
+
+ ret = dict_get_strn(this->options, "volume-id", SLEN("volume-id"),
+ &volume_id);
+ if (!ret) {
+ strncpy(this->graph->volume_id, volume_id, GF_UUID_BUF_SIZE);
+ }
+ /*
+ * Init it just after calloc, so that we are sure the lock is inited
+ * in case of error paths.
+ */
+ LOCK_INIT(&conf->lock);
+ LOCK_INIT(&conf->ios_sampling_lock);
+
+ ios_init_stats(&conf->cumulative);
+ ios_init_stats(&conf->incremental);
+
+ ret = ios_init_top_stats(conf);
+ if (ret)
+ goto out;
+
+ GF_OPTION_INIT("dump-fd-stats", conf->dump_fd_stats, bool, out);
+
+ GF_OPTION_INIT("count-fop-hits", conf->count_fop_hits, bool, out);
+
+ GF_OPTION_INIT("latency-measurement", conf->measure_latency, bool, out);
+
+ GF_OPTION_INIT("ios-dump-interval", conf->ios_dump_interval, int32, out);
+
+ GF_OPTION_INIT("ios-sample-interval", conf->ios_sample_interval, int32,
+ out);
+
+ GF_OPTION_INIT("ios-dump-format", dump_format_str, str, out);
+ ios_set_log_format_code(conf, dump_format_str);
+
+ GF_OPTION_INIT("ios-sample-buf-size", conf->ios_sample_buf_size, int32,
+ out);
+
+ ret = ios_init_sample_buf(conf);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Out of memory.");
+ goto out;
+ }
+
+ GF_OPTION_INIT("ios-dnscache-ttl-sec", conf->ios_dnscache_ttl_sec, int32,
+ out);
+ conf->dnscache = gf_dnscache_init(conf->ios_dnscache_ttl_sec);
+ if (!conf->dnscache) {
+ ret = -1;
+ goto out;
+ }
+
+ GF_OPTION_INIT("sys-log-level", sys_log_str, str, out);
+ if (sys_log_str) {
+ sys_log_level = glusterd_check_log_level(sys_log_str);
+ set_sys_log_level(sys_log_level);
+ }
+
+ GF_OPTION_INIT("log-level", log_str, str, out);
+ if (log_str) {
+ log_level = glusterd_check_log_level(log_str);
+ if (DEFAULT_LOG_LEVEL != log_level)
+ gf_log_set_loglevel(this->ctx, log_level);
+ }
+
+ GF_OPTION_INIT("logger", logger_str, str, out);
+ if (logger_str) {
+ logger = gf_check_logger(logger_str);
+ gf_log_set_logger(logger);
+ }
+
+ GF_OPTION_INIT("log-format", log_format_str, str, out);
+ if (log_format_str) {
+ log_format = gf_check_log_format(log_format_str);
+ gf_log_set_logformat(log_format);
+ }
+
+ GF_OPTION_INIT("log-buf-size", log_buf_size, uint32, out);
+ gf_log_set_log_buf_size(log_buf_size);
+
+ GF_OPTION_INIT("log-flush-timeout", log_flush_timeout, time, out);
+ gf_log_set_log_flush_timeout(log_flush_timeout);
+
+ GF_OPTION_INIT("threads", threads, int32, out);
+ gf_async_adjust_threads(threads);
+
+ this->private = conf;
+ if (conf->ios_dump_interval > 0) {
+ conf->dump_thread_running = _gf_true;
+ conf->dump_thread_should_die = _gf_false;
+ ret = gf_thread_create(&conf->dump_thread, NULL,
+ (void *)&_ios_dump_thread, this, "iosdump");
+ if (ret) {
+ conf->dump_thread_running = _gf_false;
+ gf_log(this ? this->name : "io-stats", GF_LOG_ERROR,
+ "Failed to start thread"
+ "in init. Returning %d",
+ ret);
+ goto out;
}
-
- this->private = conf;
- ret = 0;
+ }
+ return 0;
out:
- return ret;
+ ios_conf_destroy(conf);
+ return ret;
}
-
void
-fini (xlator_t *this)
+fini(xlator_t *this)
{
- struct ios_conf *conf = NULL;
-
- if (!this)
- return;
-
- conf = this->private;
-
- if (!conf)
- return;
- this->private = NULL;
+ struct ios_conf *conf = NULL;
- ios_destroy_top_stats (conf);
+ if (!this)
+ return;
- if (conf)
- GF_FREE(conf);
+ conf = this->private;
- gf_log (this->name, GF_LOG_INFO,
- "io-stats translator unloaded");
- return;
+ ios_conf_destroy(conf);
+ this->private = NULL;
+ gf_log(this->name, GF_LOG_INFO, "io-stats translator unloaded");
+ return;
}
int
-notify (xlator_t *this, int32_t event, void *data, ...)
+notify(xlator_t *this, int32_t event, void *data, ...)
{
- int ret = 0;
- struct ios_dump_args args = {0};
- dict_t *output = NULL;
- dict_t *dict = NULL;
- int32_t top_op = 0;
- int32_t list_cnt = 0;
- double throughput = 0;
- double time = 0;
- va_list ap;
-
- dict = data;
- va_start (ap, data);
- output = va_arg (ap, dict_t*);
- va_end (ap);
- switch (event) {
+ int ret = 0;
+ struct ios_dump_args args = {0};
+ dict_t *output = NULL;
+ dict_t *dict = NULL;
+ int32_t op = 0;
+ int32_t list_cnt = 0;
+ double throughput = 0;
+ double time = 0;
+ gf_boolean_t is_peek = _gf_false;
+ va_list ap;
+ struct gf_upcall *up_data = NULL;
+ struct gf_upcall_cache_invalidation *up_ci = NULL;
+
+ dict = data;
+ va_start(ap, data);
+ output = va_arg(ap, dict_t *);
+ va_end(ap);
+ switch (event) {
case GF_EVENT_TRANSLATOR_INFO:
- ret = dict_get_str_boolean (dict, "clear-stats", _gf_false);
+ ret = dict_get_str_boolean(dict, "clear-stats", _gf_false);
+ if (ret) {
+ ret = dict_set_int32(output, "top-op", op);
if (ret) {
- ret = dict_set_int32 (output, "top-op", top_op);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to set top-op in dict");
- goto out;
- }
- ios_destroy_top_stats (this->private);
- ret = ios_init_top_stats (this->private);
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to set top-op in dict");
+ goto out;
+ }
+ ios_destroy_top_stats(this->private);
+ ret = ios_init_top_stats(this->private);
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to reset top stats");
+ ret = dict_set_int32(output, "stats-cleared", ret ? 0 : 1);
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to set stats-cleared"
+ " in dict");
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, "top-op", &op);
+ if (!ret) {
+ ret = dict_get_int32(dict, "list-cnt", &list_cnt);
+ if (op > IOS_STATS_TYPE_NONE && op < IOS_STATS_TYPE_MAX)
+ ret = io_stats_dump_stats_to_dict(this, output, op,
+ list_cnt);
+ if (op == IOS_STATS_TYPE_READ_THROUGHPUT ||
+ op == IOS_STATS_TYPE_WRITE_THROUGHPUT) {
+ ret = dict_get_double(dict, "throughput", &throughput);
+ if (!ret) {
+ ret = dict_get_double(dict, "time", &time);
if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to reset top stats");
- ret = dict_set_int32 (output, "stats-cleared",
- ret ? 0 : 1);
+ goto out;
+ ret = dict_set_double(output, "throughput", throughput);
if (ret)
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to set stats-cleared"
- " in dict");
- goto out;
+ goto out;
+ ret = dict_set_double(output, "time", time);
+ if (ret)
+ goto out;
+ }
+ ret = 0;
}
+ } else {
+ ret = dict_get_int32(dict, "info-op", &op);
+ if (ret || op < GF_IOS_INFO_ALL || GF_IOS_INFO_CLEAR < op)
+ op = GF_IOS_INFO_ALL;
+
+ ret = dict_set_int32(output, "info-op", op);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to set info-op in dict");
+ goto out;
+ }
+
+ if (GF_IOS_INFO_CLEAR == op) {
+ io_stats_clear(this->private);
- ret = dict_get_int32 (dict, "top-op", &top_op);
- if (!ret) {
- ret = dict_get_int32 (dict, "list-cnt", &list_cnt);
- if (top_op > IOS_STATS_TYPE_NONE &&
- top_op < IOS_STATS_TYPE_MAX)
- ret = io_stats_dump_stats_to_dict (this, output,
- top_op, list_cnt);
- if (top_op == IOS_STATS_TYPE_READ_THROUGHPUT ||
- top_op == IOS_STATS_TYPE_WRITE_THROUGHPUT) {
- ret = dict_get_double (dict, "throughput",
- &throughput);
- if (!ret) {
- ret = dict_get_double (dict, "time",
- &time);
- if (ret)
- goto out;
- ret = dict_set_double (output,
- "throughput", throughput);
- if (ret)
- goto out;
- ret = dict_set_double (output, "time",
- time);
- if (ret)
- goto out;
- }
- ret = 0;
-
- }
+ ret = dict_set_int32(output, "stats-cleared", 1);
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to set stats-cleared"
+ " in dict");
} else {
- (void) ios_dump_args_init (&args, IOS_DUMP_TYPE_DICT,
- output);
- ret = io_stats_dump (this, &args);
+ ret = dict_get_str_boolean(dict, "peek", _gf_false);
+ if (-1 != ret)
+ is_peek = ret;
+
+ (void)ios_dump_args_init(&args, IOS_DUMP_TYPE_DICT, output);
+ ret = io_stats_dump(this, &args, op, is_peek);
}
- break;
+ }
+ break;
+ case GF_EVENT_UPCALL:
+ up_data = (struct gf_upcall *)data;
+ ios_bump_upcall(this, GF_UPCALL);
+
+ switch (up_data->event_type) {
+ case GF_UPCALL_RECALL_LEASE:
+ ios_bump_upcall(this, GF_UPCALL_LEASE_RECALL);
+ break;
+ case GF_UPCALL_CACHE_INVALIDATION:
+ up_ci = (struct gf_upcall_cache_invalidation *)
+ up_data->data;
+ if (up_ci->flags & (UP_XATTR | UP_XATTR_RM))
+ ios_bump_upcall(this, GF_UPCALL_CI_XATTR);
+ if (up_ci->flags & IATT_UPDATE_FLAGS)
+ ios_bump_upcall(this, GF_UPCALL_CI_STAT);
+ if (up_ci->flags & UP_RENAME_FLAGS)
+ ios_bump_upcall(this, GF_UPCALL_CI_RENAME);
+ if (up_ci->flags & UP_FORGET)
+ ios_bump_upcall(this, GF_UPCALL_CI_FORGET);
+ if (up_ci->flags & UP_NLINK)
+ ios_bump_upcall(this, GF_UPCALL_CI_NLINK);
+ break;
+ default:
+ gf_msg_debug(this->name, 0,
+ "Unknown upcall event "
+ "type :%d",
+ up_data->event_type);
+ break;
+ }
+
+ default_notify(this, event, data);
+ break;
default:
- default_notify (this, event, data);
- break;
-
- }
+ default_notify(this, event, data);
+ break;
+ }
out:
- return ret;
+ return ret;
}
+struct xlator_dumpops dumpops = {.priv = io_priv};
+
struct xlator_fops fops = {
- .stat = io_stats_stat,
- .readlink = io_stats_readlink,
- .mknod = io_stats_mknod,
- .mkdir = io_stats_mkdir,
- .unlink = io_stats_unlink,
- .rmdir = io_stats_rmdir,
- .symlink = io_stats_symlink,
- .rename = io_stats_rename,
- .link = io_stats_link,
- .truncate = io_stats_truncate,
- .open = io_stats_open,
- .readv = io_stats_readv,
- .writev = io_stats_writev,
- .statfs = io_stats_statfs,
- .flush = io_stats_flush,
- .fsync = io_stats_fsync,
- .setxattr = io_stats_setxattr,
- .getxattr = io_stats_getxattr,
- .removexattr = io_stats_removexattr,
- .fsetxattr = io_stats_fsetxattr,
- .fgetxattr = io_stats_fgetxattr,
- .fremovexattr = io_stats_fremovexattr,
- .opendir = io_stats_opendir,
- .readdir = io_stats_readdir,
- .readdirp = io_stats_readdirp,
- .fsyncdir = io_stats_fsyncdir,
- .access = io_stats_access,
- .ftruncate = io_stats_ftruncate,
- .fstat = io_stats_fstat,
- .create = io_stats_create,
- .lk = io_stats_lk,
- .inodelk = io_stats_inodelk,
- .finodelk = io_stats_finodelk,
- .entrylk = io_stats_entrylk,
- .lookup = io_stats_lookup,
- .xattrop = io_stats_xattrop,
- .fxattrop = io_stats_fxattrop,
- .setattr = io_stats_setattr,
- .fsetattr = io_stats_fsetattr,
+ .stat = io_stats_stat,
+ .readlink = io_stats_readlink,
+ .mknod = io_stats_mknod,
+ .mkdir = io_stats_mkdir,
+ .unlink = io_stats_unlink,
+ .rmdir = io_stats_rmdir,
+ .symlink = io_stats_symlink,
+ .rename = io_stats_rename,
+ .link = io_stats_link,
+ .truncate = io_stats_truncate,
+ .open = io_stats_open,
+ .readv = io_stats_readv,
+ .writev = io_stats_writev,
+ .statfs = io_stats_statfs,
+ .flush = io_stats_flush,
+ .fsync = io_stats_fsync,
+ .setxattr = io_stats_setxattr,
+ .getxattr = io_stats_getxattr,
+ .removexattr = io_stats_removexattr,
+ .fsetxattr = io_stats_fsetxattr,
+ .fgetxattr = io_stats_fgetxattr,
+ .fremovexattr = io_stats_fremovexattr,
+ .opendir = io_stats_opendir,
+ .readdir = io_stats_readdir,
+ .readdirp = io_stats_readdirp,
+ .fsyncdir = io_stats_fsyncdir,
+ .access = io_stats_access,
+ .ftruncate = io_stats_ftruncate,
+ .fstat = io_stats_fstat,
+ .create = io_stats_create,
+ .lk = io_stats_lk,
+ .inodelk = io_stats_inodelk,
+ .finodelk = io_stats_finodelk,
+ .entrylk = io_stats_entrylk,
+ .fentrylk = io_stats_fentrylk,
+ .lookup = io_stats_lookup,
+ .xattrop = io_stats_xattrop,
+ .fxattrop = io_stats_fxattrop,
+ .setattr = io_stats_setattr,
+ .fsetattr = io_stats_fsetattr,
+ .fallocate = io_stats_fallocate,
+ .discard = io_stats_discard,
+ .zerofill = io_stats_zerofill,
+ .ipc = io_stats_ipc,
+ .rchecksum = io_stats_rchecksum,
+ .seek = io_stats_seek,
+ .lease = io_stats_lease,
+ .getactivelk = io_stats_getactivelk,
+ .setactivelk = io_stats_setactivelk,
+ .compound = io_stats_compound,
+ .copy_file_range = io_stats_copy_file_range,
};
struct xlator_cbks cbks = {
- .release = io_stats_release,
- .releasedir = io_stats_releasedir,
- .forget = io_stats_forget,
+ .release = io_stats_release,
+ .releasedir = io_stats_releasedir,
+ .forget = io_stats_forget,
};
struct volume_options options[] = {
- { .key = {"dump-fd-stats"},
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- .description = "If on stats related to file-operations would be "
- "tracked inside GlusterFS data-structures."
- },
- { .key = { "latency-measurement" },
- .type = GF_OPTION_TYPE_BOOL,
- .default_value = "off",
- .description = "If on stats related to the latency of each operation "
- "would be tracked inside GlusterFS data-structures. "
- },
- { .key = {"count-fop-hits"},
- .type = GF_OPTION_TYPE_BOOL,
- },
- { .key = {"log-level"},
- .type = GF_OPTION_TYPE_STR,
- .value = { "DEBUG", "WARNING", "ERROR", "INFO",
- "CRITICAL", "NONE", "TRACE"}
- },
-
- /* These are synthetic entries to assist validation of CLI's *
- * volume set command */
- { .key = {"client-log-level"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "INFO",
- .description = "Changes the log-level of the clients",
- .value = { "DEBUG", "WARNING", "ERROR", "INFO",
- "CRITICAL", "NONE", "TRACE"}
- },
- { .key = {"sys-log-level"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "CRITICAL",
- .description = "Gluster's syslog log-level",
- .value = { "WARNING", "ERROR", "CRITICAL"}
- },
- { .key = {"brick-log-level"},
- .type = GF_OPTION_TYPE_STR,
- .default_value = "INFO",
- .description = "Changes the log-level of the bricks",
- .value = { "DEBUG", "WARNING", "ERROR", "INFO",
- "CRITICAL", "NONE", "TRACE"}
- },
- { .key = {NULL} },
+ {.key = {"dump-fd-stats"},
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "If on stats related to file-operations would be "
+ "tracked inside GlusterFS data-structures."},
+ {.key = {"ios-dump-interval"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .min = 0,
+ .max = 3600,
+ .default_value = "0",
+ .description = "Interval (in seconds) at which to auto-dump "
+ "statistics. Zero disables automatic dumping."},
+ {.key = {"ios-sample-interval"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .min = 0,
+ .max = 65535,
+ .default_value = "0",
+ .description = "Interval in which we want to collect FOP latency "
+ "samples. 2 means collect a sample every 2nd FOP."},
+ {.key = {"ios-dump-format"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_3_12_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .default_value = "json",
+ .description = " The dump-format option specifies the format in which"
+ " to dump the statistics. Select between \"text\", "
+ "\"json\", \"dict\" and \"samples\". Default is "
+ "\"json\".",
+ .value = {"text", "json", "dict", "samples"}},
+ {.key = {"ios-sample-buf-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .min = 1024,
+ .max = 1024 * 1024,
+ .default_value = "65535",
+ .description = "The maximum size of our FOP sampling ring buffer."},
+ {.key = {"ios-dnscache-ttl-sec"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .min = 1,
+ .max = 3600 * 72,
+ .default_value = "86400",
+ .description = "The interval after wish a cached DNS entry will be "
+ "re-validated. Default: 24 hrs"},
+ {.key = {"latency-measurement"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .default_value = "off",
+ .description = "If on stats related to the latency of each operation "
+ "would be tracked inside GlusterFS data-structures. "},
+ {
+ .key = {"count-fop-hits"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"io-stats"},
+ },
+ {.key = {"log-level"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {"DEBUG", "WARNING", "ERROR", "INFO", "CRITICAL", "NONE",
+ "TRACE"}},
+
+ /* These are synthetic entries to assist validation of CLI's *
+ * volume set command */
+ {.key = {"client-log-level"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .default_value = "INFO",
+ .description = "Changes the log-level of the clients",
+ .value = {"DEBUG", "WARNING", "ERROR", "INFO", "CRITICAL", "NONE",
+ "TRACE"}},
+ {.key = {"sys-log-level"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "CRITICAL",
+ .description = "Gluster's syslog log-level",
+ .value = {"WARNING", "ERROR", "INFO", "CRITICAL"}},
+ {.key = {"brick-log-level"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .default_value = "INFO",
+ .description = "Changes the log-level of the bricks",
+ .value = {"DEBUG", "WARNING", "ERROR", "INFO", "CRITICAL", "NONE",
+ "TRACE"}},
+ {.key = {"logger"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {GF_LOGGER_GLUSTER_LOG, GF_LOGGER_SYSLOG}},
+ {.key = {"client-logger"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .default_value = GF_LOGGER_GLUSTER_LOG,
+ .description = "Changes the logging sub-system to log to, for the "
+ "clients",
+ .value = {GF_LOGGER_GLUSTER_LOG, GF_LOGGER_SYSLOG}},
+ {.key = {"brick-logger"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .default_value = GF_LOGGER_GLUSTER_LOG,
+ .description = "Changes the logging sub-system to log to, for the "
+ "bricks",
+ .value = {GF_LOGGER_GLUSTER_LOG, GF_LOGGER_SYSLOG}},
+ {.key = {"log-format"},
+ .type = GF_OPTION_TYPE_STR,
+ .value = {GF_LOG_FORMAT_NO_MSG_ID, GF_LOG_FORMAT_WITH_MSG_ID}},
+ {.key = {"client-log-format"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .default_value = GF_LOG_FORMAT_WITH_MSG_ID,
+ .description = "Changes log format for the clients",
+ .value = {GF_LOG_FORMAT_NO_MSG_ID, GF_LOG_FORMAT_WITH_MSG_ID}},
+ {.key = {"brick-log-format"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .default_value = GF_LOG_FORMAT_WITH_MSG_ID,
+ .description = "Changes the log format for the bricks",
+ .value = {GF_LOG_FORMAT_NO_MSG_ID, GF_LOG_FORMAT_WITH_MSG_ID}},
+ {
+ .key = {"log-buf-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = GF_LOG_LRU_BUFSIZE_MIN,
+ .max = GF_LOG_LRU_BUFSIZE_MAX,
+ .default_value = "5",
+ },
+ {.key = {"client-log-buf-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .min = GF_LOG_LRU_BUFSIZE_MIN,
+ .max = GF_LOG_LRU_BUFSIZE_MAX,
+ .default_value = "5",
+ .description = "This option determines the maximum number of unique "
+ "log messages that can be buffered for a time equal to"
+ " the value of the option client-log-flush-timeout."},
+ {.key = {"brick-log-buf-size"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .min = GF_LOG_LRU_BUFSIZE_MIN,
+ .max = GF_LOG_LRU_BUFSIZE_MAX,
+ .default_value = "5",
+ .description = "This option determines the maximum number of unique "
+ "log messages that can be buffered for a time equal to"
+ " the value of the option brick-log-flush-timeout."},
+ {
+ .key = {"log-flush-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = GF_LOG_FLUSH_TIMEOUT_MIN,
+ .max = GF_LOG_FLUSH_TIMEOUT_MAX,
+ .default_value = "120",
+ },
+ {.key = {"client-log-flush-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .min = GF_LOG_FLUSH_TIMEOUT_MIN,
+ .max = GF_LOG_FLUSH_TIMEOUT_MAX,
+ .default_value = "120",
+ .description = "This option determines the maximum number of unique "
+ "log messages that can be buffered for a time equal to"
+ " the value of the option client-log-flush-timeout."},
+ {.key = {"brick-log-flush-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats"},
+ .min = GF_LOG_FLUSH_TIMEOUT_MIN,
+ .max = GF_LOG_FLUSH_TIMEOUT_MAX,
+ .default_value = "120",
+ .description = "This option determines the maximum number of unique "
+ "log messages that can be buffered for a time equal to"
+ " the value of the option brick-log-flush-timeout."},
+ {.key = {"unique-id"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "/no/such/path",
+ .description = "Unique ID for our files."},
+ {.key = {"global-threading"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"io-stats", "threading"},
+ .description = "This option enables the global threading support for "
+ "bricks. If enabled, it's recommended to also enable "
+ "'performance.iot-pass-through'"},
+ {.key = {"threads"}, .type = GF_OPTION_TYPE_INT},
+ {.key = {"brick-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "16",
+ .min = 0,
+ .max = GF_ASYNC_MAX_THREADS,
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"io-stats", "threading"},
+ .description = "When global threading is used, this value determines the "
+ "maximum amount of threads that can be created on bricks"},
+ {.key = {"client-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = "16",
+ .min = 0,
+ .max = GF_ASYNC_MAX_THREADS,
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"io-stats", "threading"},
+ .description = "When global threading is used, this value determines the "
+ "maximum amount of threads that can be created on clients"},
+ {.key = {"volume-id"},
+ .type = GF_OPTION_TYPE_STR,
+ .op_version = {GD_OP_VERSION_7_1},
+ .tags = {"global", "volume-id"},
+ .description =
+ "This option points to the 'unique' UUID particular to this "
+ "volume, which would be set in 'graph->volume_id'"},
+ {.key = {NULL}},
+};
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "io-stats",
+ .category = GF_MAINTAINED,
};
diff --git a/scheduler/alu/Makefile.am b/xlators/debug/sink/Makefile.am
index d471a3f9243..f2689244371 100644
--- a/scheduler/alu/Makefile.am
+++ b/xlators/debug/sink/Makefile.am
@@ -1,3 +1,2 @@
SUBDIRS = src
-CLEANFILES =
diff --git a/xlators/debug/sink/src/Makefile.am b/xlators/debug/sink/src/Makefile.am
new file mode 100644
index 00000000000..f952c2ce6bc
--- /dev/null
+++ b/xlators/debug/sink/src/Makefile.am
@@ -0,0 +1,14 @@
+xlator_LTLIBRARIES = sink.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_builddir)/rpc/xdr/src
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+sink_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+sink_la_SOURCES = sink.c
+sink_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+CLEANFILES =
+
diff --git a/xlators/debug/sink/src/sink.c b/xlators/debug/sink/src/sink.c
new file mode 100644
index 00000000000..9822bbb732e
--- /dev/null
+++ b/xlators/debug/sink/src/sink.c
@@ -0,0 +1,94 @@
+/*
+ Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+int32_t
+init(xlator_t *this)
+{
+ return 0;
+}
+
+void
+fini(xlator_t *this)
+{
+ return;
+}
+
+/*
+ * notify - when parent sends PARENT_UP, send CHILD_UP event from here
+ */
+int32_t
+notify(xlator_t *this, int32_t event, void *data, ...)
+{
+ switch (event) {
+ case GF_EVENT_PARENT_UP:
+ /* Tell the parent that this xlator is up */
+ default_notify(this, GF_EVENT_CHILD_UP, data);
+ break;
+ case GF_EVENT_PARENT_DOWN:
+ /* Tell the parent that this xlator is down */
+ default_notify(this, GF_EVENT_CHILD_DOWN, data);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+/*
+ * A lookup on "/" is done while mounting or glfs_init() is performed. This
+ * needs to return a valid directory for the root of the mountpoint.
+ *
+ * In case this xlator is used for more advanced debugging, it will need to be
+ * extended to support different LOOKUPs too.
+ */
+static int32_t
+sink_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ struct iatt stbuf = {
+ 0,
+ };
+ struct iatt postparent = {
+ 0,
+ };
+
+ /* the root of the volume always need to be a directory */
+ stbuf.ia_type = IA_IFDIR;
+
+ STACK_UNWIND_STRICT(lookup, frame, 0, 0, loc ? loc->inode : NULL, &stbuf,
+ xdata, &postparent);
+
+ return 0;
+}
+
+struct xlator_fops fops = {
+ .lookup = sink_lookup,
+};
+
+struct xlator_cbks cbks = {};
+
+struct volume_options options[] = {
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .op_version = {GD_OP_VERSION_3_12_0},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "sink",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/debug/trace/src/Makefile.am b/xlators/debug/trace/src/Makefile.am
index 0f1679a049d..a37ea63af04 100644
--- a/xlators/debug/trace/src/Makefile.am
+++ b/xlators/debug/trace/src/Makefile.am
@@ -2,13 +2,16 @@
xlator_LTLIBRARIES = trace.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/debug
-trace_la_LDFLAGS = -module -avoidversion
+trace_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
trace_la_SOURCES = trace.c
trace_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+noinst_HEADERS = trace.h trace-mem-types.h
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/debug/trace/src/trace-mem-types.h b/xlators/debug/trace/src/trace-mem-types.h
new file mode 100644
index 00000000000..18a7e0414a6
--- /dev/null
+++ b/xlators/debug/trace/src/trace-mem-types.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __TRACE_MEM_TYPES_H__
+#define __TRACE_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_trace_mem_types_ {
+ gf_trace_mt_trace_conf_t = gf_common_mt_end + 1,
+ gf_trace_mt_end
+};
+#endif
diff --git a/xlators/debug/trace/src/trace.c b/xlators/debug/trace/src/trace.c
index a1136a2e5b5..6ed0ca00342 100644
--- a/xlators/debug/trace/src/trace.c
+++ b/xlators/debug/trace/src/trace.c
@@ -1,26 +1,15 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#include "trace.h"
+#include "trace-mem-types.h"
/**
* xlators/debug/trace :
@@ -28,2400 +17,3518 @@
* their _cbk functions, which later passes the call to next layer.
* Very helpful translator for debugging.
*/
+#define TRACE_STAT_TO_STR(buf, str) trace_stat_to_str(buf, str, sizeof(str))
+
+static void
+trace_stat_to_str(struct iatt *buf, char *str, size_t len)
+{
+ char atime_buf[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char mtime_buf[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char ctime_buf[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+
+ if (!buf)
+ return;
-#include <time.h>
-#include <errno.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "common-utils.h"
+ gf_time_fmt(atime_buf, sizeof atime_buf, buf->ia_atime, gf_timefmt_dirent);
+
+ gf_time_fmt(mtime_buf, sizeof mtime_buf, buf->ia_mtime, gf_timefmt_dirent);
+
+ gf_time_fmt(ctime_buf, sizeof ctime_buf, buf->ia_ctime, gf_timefmt_dirent);
+
+ snprintf(str, len,
+ "gfid=%s ino=%" PRIu64
+ ", mode=%o, "
+ "nlink=%" GF_PRI_NLINK ", uid=%u, gid=%u, size=%" PRIu64
+ ", "
+ "blocks=%" PRIu64
+ ", atime=%s mtime=%s ctime=%s "
+ "atime_sec=%" PRId64 ", atime_nsec=%" PRIu32
+ ","
+ " mtime_sec=%" PRId64 ", mtime_nsec=%" PRIu32
+ ", "
+ "ctime_sec=%" PRId64 ", ctime_nsec=%" PRIu32 "",
+ uuid_utoa(buf->ia_gfid), buf->ia_ino,
+ st_mode_from_ia(buf->ia_prot, buf->ia_type), buf->ia_nlink,
+ buf->ia_uid, buf->ia_gid, buf->ia_size, buf->ia_blocks, atime_buf,
+ mtime_buf, ctime_buf, buf->ia_atime, buf->ia_atime_nsec,
+ buf->ia_mtime, buf->ia_mtime_nsec, buf->ia_ctime,
+ buf->ia_ctime_nsec);
+}
+
+int
+dump_history_trace(circular_buffer_t *cb, void *data)
+{
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+
+ /* Since we are continuing with adding entries to the buffer even when
+ gettimeofday () fails, it's safe to check tm and then dump the time
+ at which the entry was added to the buffer */
+
+ gf_time_fmt_tv(timestr, sizeof timestr, &cb->tv, gf_timefmt_Ymd_T);
+ gf_proc_dump_write("TIME", "%s", timestr);
+
+ gf_proc_dump_write("FOP", "%s\n", (char *)cb->data);
+
+ return 0;
+}
+
+int
+trace_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ char statstr[1024] = {
+ 0,
+ };
+ char preparentstr[1024] = {
+ 0,
+ };
+ char postparentstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_CREATE].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (op_ret >= 0) {
+ TRACE_STAT_TO_STR(buf, statstr);
+ TRACE_STAT_TO_STR(preparent, preparentstr);
+ TRACE_STAT_TO_STR(postparent, postparentstr);
+
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s (op_ret=%d, fd=%p"
+ "*stbuf {%s}, *preparent {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique, uuid_utoa(inode->gfid), op_ret, fd,
+ statstr, preparentstr, postparentstr);
+
+ /* for 'release' log */
+ fd_ctx_set(fd, this, 0);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+int
+trace_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
-struct {
- char *name;
- int enabled;
-} trace_fop_names[GF_FOP_MAXVALUE];
+ conf = this->private;
-int trace_log_level = GF_LOG_INFO;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_OPEN].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, op_errno=%d, "
+ "*fd=%p",
+ frame->root->unique, uuid_utoa(frame->local), op_ret, op_errno,
+ fd);
+
+ LOG_ELEMENT(conf, string);
+ }
+
+out:
+ /* for 'release' log */
+ if (op_ret >= 0)
+ fd_ctx_set(fd, this, 0);
+
+ TRACE_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int
+trace_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ char statstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_STAT].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR(buf, statstr);
+ (void)snprintf(
+ string, sizeof(string), "%" PRId64 ": gfid=%s op_ret=%d buf=%s",
+ frame->root->unique, uuid_utoa(frame->local), op_ret, statstr);
+ } else {
+ (void)snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int
+trace_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *buf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ char statstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READ].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (op_ret >= 0) {
+ TRACE_STAT_TO_STR(buf, statstr);
+ snprintf(
+ string, sizeof(string), "%" PRId64 ": gfid=%s op_ret=%d buf=%s",
+ frame->root->unique, uuid_utoa(frame->local), op_ret, statstr);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, buf,
+ iobref, xdata);
+ return 0;
+}
+
+int
+trace_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ char preopstr[1024] = {
+ 0,
+ };
+ char postopstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_WRITE].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (op_ret >= 0) {
+ TRACE_STAT_TO_STR(prebuf, preopstr);
+ TRACE_STAT_TO_STR(postbuf, postopstr);
+
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret, preopstr, postopstr);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
-static char *
-trace_stat_to_str (struct iatt *buf)
+int
+trace_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
+ dict_t *xdata)
{
- char *statstr = NULL;
- char atime_buf[64] = {0,};
- char mtime_buf[64] = {0,};
- char ctime_buf[64] = {0,};
- int asprint_ret_value = 0;
+ trace_conf_t *conf = NULL;
- if (!buf) {
- statstr = NULL;
- goto out;
- }
+ conf = this->private;
- gf_time_fmt (atime_buf, sizeof atime_buf, buf->ia_atime, gf_timefmt_bdT);
- gf_time_fmt (mtime_buf, sizeof mtime_buf, buf->ia_mtime, gf_timefmt_bdT);
- gf_time_fmt (ctime_buf, sizeof ctime_buf, buf->ia_ctime, gf_timefmt_bdT);
- asprint_ret_value = gf_asprintf (&statstr,
- "gfid=%s ino=%"PRIu64", mode=%o, "
- "nlink=%"GF_PRI_NLINK", uid=%u, "
- "gid=%u, size=%"PRIu64", "
- "blocks=%"PRIu64", atime=%s, "
- "mtime=%s, ctime=%s",
- uuid_utoa (buf->ia_gfid), buf->ia_ino,
- st_mode_from_ia (buf->ia_prot,
- buf->ia_type),
- buf->ia_nlink, buf->ia_uid,
- buf->ia_gid, buf->ia_size,
- buf->ia_blocks, atime_buf,
- mtime_buf, ctime_buf);
-
- if (asprint_ret_value < 0)
- statstr = NULL;
-
-out:
- return statstr;
-}
-
-
-int
-trace_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
-{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_CREATE].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d, fd=%p"
- "*stbuf {%s}, *preparent {%s}, *postparent = "
- "{%s})", frame->root->unique,
- uuid_utoa (inode->gfid), op_ret, fd,
- statstr, preparentstr, postparentstr);
-
- if (statstr)
- GF_FREE (statstr);
- if (preparentstr)
- GF_FREE (preparentstr);
- if (postparentstr)
- GF_FREE (postparentstr);
-
- /* for 'release' log */
- fd_ctx_set (fd, this, 0);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READDIR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 " : gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata);
- return 0;
-}
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(readdir, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
+}
+
+int
+trace_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *buf,
+ dict_t *xdata)
+{
+ int count = 0;
+ char statstr[1024] = {
+ 0,
+ };
+ char string[4096] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+ gf_dirent_t *entry = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READDIRP].enabled) {
+ snprintf(string, sizeof(string),
+ "%" PRId64 " : gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+
+ LOG_ELEMENT(conf, string);
+ }
+ if (op_ret < 0)
+ goto out;
+
+ list_for_each_entry(entry, &buf->list, list)
+ {
+ count++;
+ TRACE_STAT_TO_STR(&entry->d_stat, statstr);
+ snprintf(string, sizeof(string),
+ "entry no. %d, pargfid=%s, "
+ "bname=%s *buf {%s}",
+ count, uuid_utoa(frame->local), entry->d_name, statstr);
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(readdirp, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int
+trace_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ char preopstr[1024] = {
+ 0,
+ };
+ char postopstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSYNC].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR(prebuf, preopstr);
+ TRACE_STAT_TO_STR(postbuf, postopstr);
+
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s}",
+ frame->root->unique, op_ret, preopstr, postopstr);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
+}
+
+int
+trace_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ char preopstr[1024] = {
+ 0,
+ };
+ char postopstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SETATTR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR(statpre, preopstr);
+ TRACE_STAT_TO_STR(statpost, postopstr);
+
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret, preopstr, postopstr);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(setattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+ return 0;
+}
+
+int
+trace_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ char preopstr[1024] = {
+ 0,
+ };
+ char postopstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR(statpre, preopstr);
+ TRACE_STAT_TO_STR(statpost, postopstr);
+
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s})",
+ frame->root->unique, op_ret, preopstr, postopstr);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s op_ret=%d, op_errno=%d)",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+ return 0;
+}
+
+int
+trace_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ char preparentstr[1024] = {
+ 0,
+ };
+ char postparentstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_UNLINK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR(preparent, preparentstr);
+ TRACE_STAT_TO_STR(postparent, postparentstr);
+
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ " *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ preparentstr, postparentstr);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int
+trace_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ char statstr[1024] = {
+ 0,
+ };
+ char preoldparentstr[1024] = {
+ 0,
+ };
+ char postoldparentstr[1024] = {
+ 0,
+ };
+ char prenewparentstr[1024] = {
+ 0,
+ };
+ char postnewparentstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RENAME].enabled) {
+ char string[6044] = {
+ 0,
+ };
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR(buf, statstr);
+ TRACE_STAT_TO_STR(preoldparent, preoldparentstr);
+ TRACE_STAT_TO_STR(postoldparent, postoldparentstr);
+ TRACE_STAT_TO_STR(prenewparent, prenewparentstr);
+ TRACE_STAT_TO_STR(postnewparent, postnewparentstr);
+
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": (op_ret=%d, "
+ "*stbuf = {%s}, *preoldparent = {%s},"
+ " *postoldparent = {%s}"
+ " *prenewparent = {%s}, "
+ "*postnewparent = {%s})",
+ frame->root->unique, op_ret, statstr, preoldparentstr,
+ postoldparentstr, prenewparentstr, postnewparentstr);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(rename, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
+ return 0;
+}
+
+int
+trace_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *buf,
+ struct iatt *stbuf, dict_t *xdata)
+{
+ char statstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READLINK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR(stbuf, statstr);
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": (op_ret=%d, op_errno=%d,"
+ "buf=%s, stbuf = { %s })",
+ frame->root->unique, op_ret, op_errno, buf, statstr);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ }
+
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(readlink, frame, op_ret, op_errno, buf, stbuf, xdata);
+ return 0;
+}
+
+int
+trace_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ char statstr[1024] = {
+ 0,
+ };
+ char postparentstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR(buf, statstr);
+ TRACE_STAT_TO_STR(postparent, postparentstr);
+ /* print buf->ia_gfid instead of inode->gfid,
+ * since if the inode is not yet linked to the
+ * inode table (fresh lookup) then null gfid
+ * will be printed.
+ */
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s (op_ret=%d "
+ "*buf {%s}, *postparent {%s}",
+ frame->root->unique, uuid_utoa(buf->ia_gfid), op_ret,
+ statstr, postparentstr);
+
+ /* For 'forget' */
+ inode_ctx_put(inode, this, 0);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
+}
+
+int
+trace_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ char statstr[1024] = {
+ 0,
+ };
+ char preparentstr[1024] = {
+ 0,
+ };
+ char postparentstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR(buf, statstr);
+ TRACE_STAT_TO_STR(preparent, preparentstr);
+ TRACE_STAT_TO_STR(postparent, postparentstr);
+
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s (op_ret=%d "
+ "*stbuf = {%s}, *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique, uuid_utoa(inode->gfid), op_ret,
+ statstr, preparentstr, postparentstr);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": op_ret=%d, op_errno=%d", frame->root->unique,
+ op_ret, op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(symlink, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int
+trace_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ char statstr[1024] = {
+ 0,
+ };
+ char preparentstr[1024] = {
+ 0,
+ };
+ char postparentstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {
+ 0,
+ };
+ if (trace_fop_names[GF_FOP_MKNOD].enabled) {
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR(buf, statstr);
+ TRACE_STAT_TO_STR(preparent, preparentstr);
+ TRACE_STAT_TO_STR(postparent, postparentstr);
+
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s (op_ret=%d "
+ "*stbuf = {%s}, *preparent = {%s}, "
+ "*postparent = {%s})",
+ frame->root->unique, uuid_utoa(inode->gfid), op_ret,
+ statstr, preparentstr, postparentstr);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int
+trace_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ char statstr[1024] = {
+ 0,
+ };
+ char preparentstr[1024] = {
+ 0,
+ };
+ char postparentstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_MKDIR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR(buf, statstr);
+ TRACE_STAT_TO_STR(preparent, preparentstr);
+ TRACE_STAT_TO_STR(postparent, postparentstr);
+
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s (op_ret=%d "
+ ", *stbuf = {%s}, *prebuf = {%s}, "
+ "*postbuf = {%s} )",
+ frame->root->unique, uuid_utoa(inode->gfid), op_ret,
+ statstr, preparentstr, postparentstr);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": (op_ret=%d, op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int
+trace_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ char statstr[1024] = {
+ 0,
+ };
+ char preparentstr[1024] = {
+ 0,
+ };
+ char postparentstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {
+ 0,
+ };
+ if (trace_fop_names[GF_FOP_LINK].enabled) {
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR(buf, statstr);
+ TRACE_STAT_TO_STR(preparent, preparentstr);
+ TRACE_STAT_TO_STR(postparent, postparentstr);
+
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": (op_ret=%d, "
+ "*stbuf = {%s}, *prebuf = {%s},"
+ " *postbuf = {%s})",
+ frame->root->unique, op_ret, statstr, preparentstr,
+ postparentstr);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
int
-trace_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+trace_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_OPEN].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, *fd=%p",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno, fd);
- }
+ conf = this->private;
- /* for 'release' log */
- if (op_ret >= 0)
- fd_ctx_set (fd, this, 0);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {
+ 0,
+ };
+ if (trace_fop_names[GF_FOP_FLUSH].enabled) {
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
- return 0;
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata);
+ return 0;
}
-
int
-trace_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+trace_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- char *statstr = NULL;
- if (trace_fop_names[GF_FOP_STAT].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d buf=%s",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, statstr);
+ trace_conf_t *conf = NULL;
- if (statstr)
- GF_FREE (statstr);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
- }
- }
+ conf = this->private;
- frame->local = NULL;
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ char string[4096] = {
+ 0,
+ };
+ if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, op_errno=%d,"
+ " fd=%p",
+ frame->root->unique, uuid_utoa(frame->local), op_ret, op_errno,
+ fd);
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ /* for 'releasedir' log */
+ if (op_ret >= 0)
+ fd_ctx_set(fd, this, 0);
+
+ TRACE_STACK_UNWIND(opendir, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int
+trace_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ char preparentstr[1024] = {
+ 0,
+ };
+ char postparentstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RMDIR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR(preparent, preparentstr);
+ TRACE_STAT_TO_STR(postparent, postparentstr);
+
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "*prebuf={%s}, *postbuf={%s}",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ preparentstr, postparentstr);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(rmdir, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int
+trace_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ char preopstr[1024] = {
+ 0,
+ };
+ char postopstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR(prebuf, preopstr);
+ TRACE_STAT_TO_STR(postbuf, postopstr);
+
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": (op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s} )",
+ frame->root->unique, op_ret, preopstr, postopstr);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+trace_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_STATFS].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (op_ret == 0) {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": ({f_bsize=%lu, "
+ "f_frsize=%lu, "
+ "f_blocks=%" GF_PRI_FSBLK ", f_bfree=%" GF_PRI_FSBLK
+ ", "
+ "f_bavail=%" GF_PRI_FSBLK
+ ", "
+ "f_files=%" GF_PRI_FSBLK
+ ", "
+ "f_ffree=%" GF_PRI_FSBLK
+ ", "
+ "f_favail=%" GF_PRI_FSBLK
+ ", "
+ "f_fsid=%lu, f_flag=%lu, "
+ "f_namemax=%lu}) => ret=%d",
+ frame->root->unique, buf->f_bsize, buf->f_frsize,
+ buf->f_blocks, buf->f_bfree, buf->f_bavail, buf->f_files,
+ buf->f_ffree, buf->f_favail, buf->f_fsid, buf->f_flag,
+ buf->f_namemax, op_ret);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": (op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique, op_ret, op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
int
-trace_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *buf, struct iobref *iobref, dict_t *xdata)
+trace_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- char *statstr = NULL;
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_READ].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d buf=%s",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, statstr);
+ conf = this->private;
- if (statstr)
- GF_FREE (statstr);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
- }
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- buf, iobref, xdata);
- return 0;
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
-
int
-trace_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+trace_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_WRITE].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (prebuf);
- postopstr = trace_stat_to_str (postbuf);
+ conf = this->private;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s})",
- frame->root->unique, op_ret,
- preopstr, postopstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, op_errno=%d,"
+ " dict=%p",
+ frame->root->unique, uuid_utoa(frame->local), op_ret, op_errno,
+ dict);
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
- }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
- frame->local = NULL;
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
- return 0;
+ return 0;
}
-
-
int
-trace_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata)
+trace_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_READDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
- }
+ trace_conf_t *conf = NULL;
- frame->local = NULL;
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, buf, xdata);
+ conf = this->private;
- return 0;
-}
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSETXATTR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
int
-trace_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *buf, dict_t *xdata)
+trace_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_READDIRP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64" : gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
- }
+ trace_conf_t *conf = NULL;
- frame->local = NULL;
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, buf, xdata);
+ conf = this->private;
- return 0;
-}
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FGETXATTR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, op_errno=%d,"
+ " dict=%p",
+ frame->root->unique, uuid_utoa(frame->local), op_ret, op_errno,
+ dict);
+
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
int
-trace_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+trace_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
-
- if (trace_fop_names[GF_FOP_FSYNC].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (prebuf);
- postopstr = trace_stat_to_str (postbuf);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s}",
- frame->root->unique, op_ret,
- preopstr, postopstr);
+ trace_conf_t *conf = NULL;
- if (preopstr)
- GF_FREE (preopstr);
+ conf = this->private;
- if (postopstr)
- GF_FREE (postopstr);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
- }
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata);
- return 0;
+ return 0;
}
-
int
-trace_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+trace_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_SETATTR].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (statpre);
- postopstr = trace_stat_to_str (statpost);
+ conf = this->private;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s})",
- frame->root->unique, op_ret,
- preopstr, postopstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
- if (preopstr)
- GF_FREE (preopstr);
-
- if (postopstr)
- GF_FREE (postopstr);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
- }
- }
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre, statpost, xdata);
- return 0;
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(fsyncdir, frame, op_ret, op_errno, xdata);
+ return 0;
}
-
int
-trace_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *statpre, struct iatt *statpost, dict_t *xdata)
+trace_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
-
- if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (statpre);
- postopstr = trace_stat_to_str (statpost);
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_ACCESS].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(access, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+trace_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ char prebufstr[1024] = {
+ 0,
+ };
+ char postbufstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR(prebuf, prebufstr);
+ TRACE_STAT_TO_STR(postbuf, postbufstr);
+
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": op_ret=%d, "
+ "*prebuf = {%s}, *postbuf = {%s} )",
+ frame->root->unique, op_ret, prebufstr, postbufstr);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int
+trace_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ char statstr[1024] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSTAT].enabled) {
+ char string[4096] = {0.};
+ if (op_ret == 0) {
+ TRACE_STAT_TO_STR(buf, statstr);
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d "
+ "buf=%s",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ statstr);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ }
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int
+trace_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (op_ret == 0) {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "{l_type=%d, l_whence=%d, "
+ "l_start=%" PRId64
+ ", "
+ "l_len=%" PRId64 ", l_pid=%u})",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ lock->l_type, lock->l_whence, lock->l_start, lock->l_len,
+ lock->l_pid);
+ } else {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d, "
+ "op_errno=%d)",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ }
+
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(lk, frame, op_ret, op_errno, lock, xdata);
+ return 0;
+}
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s})",
- frame->root->unique, op_ret,
- preopstr, postopstr);
+int
+trace_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
- if (preopstr)
- GF_FREE (preopstr);
+ conf = this->private;
- if (postopstr)
- GF_FREE (postopstr);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
- }
- }
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno,
- statpre, statpost, xdata);
- return 0;
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(entrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-
int
-trace_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+trace_fentrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- char *preparentstr = NULL;
- char *postparentstr = NULL;
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_UNLINK].enabled) {
- if (op_ret >= 0) {
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
+ conf = this->private;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, *preparent = {%s}, "
- "*postparent = {%s})",
- frame->root->unique, uuid_utoa (frame->local), op_ret, preparentstr,
- postparentstr);
-
- if (preparentstr)
- GF_FREE (preparentstr);
-
- if (postparentstr)
- GF_FREE (postparentstr);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
- }
- }
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FENTRYLK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno,
- preparent, postparent, xdata);
- return 0;
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(fentrylk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-
int
-trace_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent, dict_t *xdata)
+trace_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
{
- char *statstr = NULL;
- char *preoldparentstr = NULL;
- char *postoldparentstr = NULL;
- char *prenewparentstr = NULL;
- char *postnewparentstr = NULL;
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_RENAME].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preoldparentstr = trace_stat_to_str (preoldparent);
- postoldparentstr = trace_stat_to_str (postoldparent);
+ conf = this->private;
- prenewparentstr = trace_stat_to_str (prenewparent);
- postnewparentstr = trace_stat_to_str (postnewparent);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_XATTROP].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *stbuf = {%s}, "
- "*preoldparent = {%s}, *postoldparent = {%s}"
- " *prenewparent = {%s}, *postnewparent = {%s})",
- frame->root->unique, op_ret, statstr,
- preoldparentstr, postoldparentstr,
- prenewparentstr, postnewparentstr);
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
- if (statstr)
- GF_FREE (statstr);
+int
+trace_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
- if (preoldparentstr)
- GF_FREE (preoldparentstr);
+ conf = this->private;
- if (postoldparentstr)
- GF_FREE (postoldparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
- if (prenewparentstr)
- GF_FREE (prenewparentstr);
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(fxattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
- if (postnewparentstr)
- GF_FREE (postnewparentstr);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
- }
+int
+trace_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
- frame->local = NULL;
- STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
- preoldparent, postoldparent,
- prenewparent, postnewparent, xdata);
- return 0;
-}
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_INODELK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(inodelk, frame, op_ret, op_errno, xdata);
+ return 0;
+}
int
-trace_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- const char *buf, struct iatt *stbuf, dict_t *xdata)
+trace_finodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- char *statstr = NULL;
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_READLINK].enabled) {
+ conf = this->private;
- if (op_ret == 0) {
- statstr = trace_stat_to_str (stbuf);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d, buf=%s, "
- "stbuf = { %s })",
- frame->root->unique, op_ret, op_errno, buf,
- statstr);
- } else
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
-
- if (statstr)
- GF_FREE (statstr);
- }
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FINODELK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s op_ret=%d, op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
- frame->local = NULL;
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, buf, stbuf, xdata);
- return 0;
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(finodelk, frame, op_ret, op_errno, xdata);
+ return 0;
}
-
int
-trace_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- dict_t *xdata, struct iatt *postparent)
+trace_rchecksum_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata)
{
- char *statstr = NULL;
- char *postparentstr = NULL;
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- postparentstr = trace_stat_to_str (postparent);
+ conf = this->private;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d "
- "*buf {%s}, *postparent {%s}",
- frame->root->unique, uuid_utoa (inode->gfid),
- op_ret, statstr, postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RCHECKSUM].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s op_ret=%d op_errno=%d",
+ frame->root->unique, uuid_utoa(frame->local), op_ret,
+ op_errno);
- if (statstr)
- GF_FREE (statstr);
- if (postparentstr)
- GF_FREE (postparentstr);
+ LOG_ELEMENT(conf, string);
+ }
- /* For 'forget' */
- inode_ctx_put (inode, this, 0);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
- }
+out:
+ TRACE_STACK_UNWIND(rchecksum, frame, op_ret, op_errno, weak_checksum,
+ strong_checksum, xdata);
- frame->local = NULL;
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
- xdata, postparent);
- return 0;
+ return 0;
}
+/* *_cbk section over <----------> fop section start */
int
-trace_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+trace_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
+ trace_conf_t *conf = NULL;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d "
- "*stbuf = {%s}, *preparent = {%s}, "
- "*postparent = {%s})",
- frame->root->unique, uuid_utoa (inode->gfid),
- op_ret, statstr, preparentstr, postparentstr);
+ conf = this->private;
- if (statstr)
- GF_FREE (statstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s volume=%s, (path=%s "
+ "basename=%s, cmd=%s, type=%s)",
+ frame->root->unique, uuid_utoa(loc->inode->gfid), volume,
+ loc->path, basename,
+ ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" : "ENTRYLK_UNLOCK"),
+ ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK"));
- if (preparentstr)
- GF_FREE (preparentstr);
+ frame->local = loc->inode->gfid;
- if (postparentstr)
- GF_FREE (postparentstr);
-
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": op_ret=%d, op_errno=%d",
- frame->root->unique, op_ret, op_errno);
- }
- }
+ LOG_ELEMENT(conf, string);
+ }
- frame->local = NULL;
- STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
+out:
+ STACK_WIND(frame, trace_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, volume, loc, basename, cmd,
+ type, xdata);
+ return 0;
}
-
int
-trace_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+trace_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_MKNOD].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+ trace_conf_t *conf = NULL;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d "
- "*stbuf = {%s}, *preparent = {%s}, "
- "*postparent = {%s})",
- frame->root->unique, uuid_utoa (inode->gfid),
- op_ret, statstr, preparentstr, postparentstr);
+ conf = this->private;
- if (statstr)
- GF_FREE (statstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_INODELK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ switch (cmd) {
+#if F_GETLK != F_GETLK64
+ case F_GETLK64:
+#endif
+ case F_GETLK:
+ cmd_str = "GETLK";
+ break;
- if (preparentstr)
- GF_FREE (preparentstr);
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ cmd_str = "SETLK";
+ break;
- if (postparentstr)
- GF_FREE (postparentstr);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+ cmd_str = "SETLKW";
+ break;
+
+ default:
+ cmd_str = "UNKNOWN";
+ break;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf(
+ string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s volume=%s, (path=%s "
+ "cmd=%s, type=%s, start=%llu, len=%llu, "
+ "pid=%llu)",
+ frame->root->unique, uuid_utoa(loc->inode->gfid), volume, loc->path,
+ cmd_str, type_str, (unsigned long long)flock->l_start,
+ (unsigned long long)flock->l_len, (unsigned long long)flock->l_pid);
+
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT(conf, string);
+ }
- frame->local = NULL;
- STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
+out:
+ STACK_WIND(frame, trace_inodelk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk, volume, loc, cmd, flock,
+ xdata);
+ return 0;
}
-
int
-trace_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+trace_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
-
- if (trace_fop_names[GF_FOP_MKDIR].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
+ char *cmd_str = NULL;
+ char *type_str = NULL;
+ trace_conf_t *conf = NULL;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (op_ret=%d "
- ", *stbuf = {%s}, *prebuf = {%s}, "
- "*postbuf = {%s} )",
- frame->root->unique, uuid_utoa (inode->gfid),
- op_ret, statstr, preparentstr, postparentstr);
+ conf = this->private;
- if (statstr)
- GF_FREE (statstr);
-
- if (preparentstr)
- GF_FREE (preparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FINODELK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ switch (cmd) {
+#if F_GETLK != F_GETLK64
+ case F_GETLK64:
+#endif
+ case F_GETLK:
+ cmd_str = "GETLK";
+ break;
- if (postparentstr)
- GF_FREE (postparentstr);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
+#if F_SETLK != F_SETLK64
+ case F_SETLK64:
+#endif
+ case F_SETLK:
+ cmd_str = "SETLK";
+ break;
- frame->local = NULL;
- STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
+#if F_SETLKW != F_SETLKW64
+ case F_SETLKW64:
+#endif
+ case F_SETLKW:
+ cmd_str = "SETLKW";
+ break;
+
+ default:
+ cmd_str = "UNKNOWN";
+ break;
+ }
+
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s volume=%s, (fd =%p "
+ "cmd=%s, type=%s, start=%llu, len=%llu, "
+ "pid=%llu)",
+ frame->root->unique, uuid_utoa(fd->inode->gfid), volume, fd,
+ cmd_str, type_str, (unsigned long long)flock->l_start,
+ (unsigned long long)flock->l_len,
+ (unsigned long long)flock->l_pid);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ STACK_WIND(frame, trace_finodelk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk, volume, fd, cmd, flock,
+ xdata);
+ return 0;
}
-
int
-trace_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+trace_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- char *statstr = NULL;
- char *preparentstr = NULL;
- char *postparentstr = NULL;
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_LINK].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
+ conf = this->private;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *stbuf = {%s}, "
- " *prebuf = {%s}, *postbuf = {%s})",
- frame->root->unique, op_ret,
- statstr, preparentstr, postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_XATTROP].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s (path=%s flags=%d)", frame->root->unique,
+ uuid_utoa(loc->inode->gfid), loc->path, flags);
- if (statstr)
- GF_FREE (statstr);
+ frame->local = loc->inode->gfid;
- if (preparentstr)
- GF_FREE (preparentstr);
+ LOG_ELEMENT(conf, string);
+ }
- if (postparentstr)
- GF_FREE (postparentstr);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
- }
+out:
+ STACK_WIND(frame, trace_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata);
- frame->local = NULL;
- STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
- return 0;
+ return 0;
}
-
int
-trace_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+trace_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_FLUSH].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ trace_conf_t *conf = NULL;
- frame->local = NULL;
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string), "%" PRId64 ": gfid=%s fd=%p, flags=%d",
+ frame->root->unique, uuid_utoa(fd->inode->gfid), fd, flags);
-int
-trace_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, fd=%p",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno, fd);
- }
+ frame->local = fd->inode->gfid;
- /* for 'releasedir' log */
- if (op_ret >= 0)
- fd_ctx_set (fd, this, 0);
+ LOG_ELEMENT(conf, string);
+ }
- frame->local = NULL;
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
- return 0;
-}
+out:
+ STACK_WIND(frame, trace_fxattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata);
+ return 0;
+}
int
-trace_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+trace_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- char *preparentstr = NULL;
- char *postparentstr = NULL;
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_RMDIR].enabled) {
- if (op_ret >= 0) {
- preparentstr = trace_stat_to_str (preparent);
- postparentstr = trace_stat_to_str (postparent);
+ conf = this->private;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s}",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, preparentstr, postparentstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ /* TODO: print all the keys mentioned in xattr_req */
+ snprintf(string, sizeof(string), "%" PRId64 ": gfid=%s path=%s",
+ frame->root->unique, uuid_utoa(loc->inode->gfid), loc->path);
- if (preparentstr)
- GF_FREE (preparentstr);
+ frame->local = loc->inode->gfid;
- if (postparentstr)
- GF_FREE (postparentstr);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
- }
+ LOG_ELEMENT(conf, string);
+ }
- frame->local = NULL;
- STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno,
- preparent, postparent, xdata);
- return 0;
-}
+out:
+ STACK_WIND(frame, trace_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
int
-trace_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+trace_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- char *preopstr = NULL;
- char *postopstr = NULL;
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
- if (op_ret >= 0) {
- preopstr = trace_stat_to_str (prebuf);
- postopstr = trace_stat_to_str (postbuf);
+ conf = this->private;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s} )",
- frame->root->unique, op_ret, preopstr,
- postopstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_STAT].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string), "%" PRId64 ": gfid=%s path=%s",
+ frame->root->unique, uuid_utoa(loc->inode->gfid), loc->path);
- if (preopstr)
- GF_FREE (preopstr);
+ frame->local = loc->inode->gfid;
- if (postopstr)
- GF_FREE (postopstr);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
- }
+ LOG_ELEMENT(conf, string);
+ }
- frame->local = NULL;
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf, postbuf, xdata);
- return 0;
-}
-
-
-int
-trace_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_STATFS].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": ({f_bsize=%lu, f_frsize=%lu, f_blocks=%"GF_PRI_FSBLK
- ", f_bfree=%"GF_PRI_FSBLK", f_bavail=%"GF_PRI_FSBLK", "
- "f_files=%"GF_PRI_FSBLK", f_ffree=%"GF_PRI_FSBLK", f_favail=%"
- GF_PRI_FSBLK", f_fsid=%lu, f_flag=%lu, f_namemax=%lu}) => ret=%d",
- frame->root->unique, buf->f_bsize, buf->f_frsize, buf->f_blocks,
- buf->f_bfree, buf->f_bavail, buf->f_files, buf->f_ffree,
- buf->f_favail, buf->f_fsid, buf->f_flag, buf->f_namemax, op_ret);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": (op_ret=%d, op_errno=%d)",
- frame->root->unique, op_ret, op_errno);
- }
- }
+out:
+ STACK_WIND(frame, trace_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
- frame->local = NULL;
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ return 0;
}
-
int
-trace_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+trace_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ trace_conf_t *conf = NULL;
- frame->local = NULL;
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READLINK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s path=%s, "
+ "size=%" GF_PRI_SIZET ")",
+ frame->root->unique, uuid_utoa(loc->inode->gfid), loc->path,
+ size);
-int
-trace_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, dict=%p",
- frame->root->unique, uuid_utoa (frame->local), op_ret,
- op_errno, dict);
- }
+ frame->local = loc->inode->gfid;
- frame->local = NULL;
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+ LOG_ELEMENT(conf, string);
+ }
- return 0;
+out:
+ STACK_WIND(frame, trace_readlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
+
+ return 0;
}
int
-trace_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+trace_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_FSETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret,
- op_errno);
- }
+ trace_conf_t *conf = NULL;
- frame->local = NULL;
- STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_MKNOD].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s path=%s mode=%d "
+ "umask=0%o, dev=%" GF_PRI_DEV ")",
+ frame->root->unique, uuid_utoa(loc->inode->gfid), loc->path,
+ mode, umask, dev);
-int
-trace_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_FGETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d, dict=%p",
- frame->root->unique, uuid_utoa (frame->local), op_ret,
- op_errno, dict);
- }
+ LOG_ELEMENT(conf, string);
+ }
- frame->local = NULL;
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+out:
+ STACK_WIND(frame, trace_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, dev, umask, xdata);
- return 0;
+ return 0;
}
int
-trace_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+trace_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ trace_conf_t *conf = NULL;
- frame->local = NULL;
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ conf = this->private;
- return 0;
-}
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_MKDIR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s path=%s mode=%d"
+ " umask=0%o",
+ frame->root->unique, uuid_utoa(loc->inode->gfid), loc->path,
+ mode, umask);
+ LOG_ELEMENT(conf, string);
+ }
+
+out:
+ STACK_WIND(frame, trace_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ return 0;
+}
int
-trace_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+trace_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ trace_conf_t *conf = NULL;
- frame->local = NULL;
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_UNLINK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string), "%" PRId64 ": gfid=%s path=%s flag=%d",
+ frame->root->unique, uuid_utoa(loc->inode->gfid), loc->path,
+ xflag);
-int
-trace_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_ACCESS].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ frame->local = loc->inode->gfid;
- frame->local = NULL;
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
- return 0;
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ STACK_WIND(frame, trace_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
}
-
int
-trace_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf, dict_t *xdata)
+trace_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- char *prebufstr = NULL;
- char *postbufstr = NULL;
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
- if (op_ret >= 0) {
- prebufstr = trace_stat_to_str (prebuf);
- postbufstr = trace_stat_to_str (postbuf);
+ conf = this->private;
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": op_ret=%d, *prebuf = {%s}, "
- "*postbuf = {%s} )",
- frame->root->unique, op_ret,
- prebufstr, postbufstr);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RMDIR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s path=%s flags=%d", frame->root->unique,
+ uuid_utoa(loc->inode->gfid), loc->path, flags);
- if (prebufstr)
- GF_FREE (prebufstr);
+ frame->local = loc->inode->gfid;
- if (postbufstr)
- GF_FREE (postbufstr);
+ LOG_ELEMENT(conf, string);
+ }
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
- }
+out:
+ STACK_WIND(frame, trace_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
- frame->local = NULL;
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf, postbuf, xdata);
- return 0;
+ return 0;
}
-
int
-trace_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+trace_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- char *statstr = NULL;
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_FSTAT].enabled) {
- if (op_ret >= 0) {
- statstr = trace_stat_to_str (buf);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d buf=%s",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, statstr);
+ conf = this->private;
- if (statstr)
- GF_FREE (statstr);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
- }
-
- frame->local = NULL;
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s linkpath=%s, path=%s"
+ " umask=0%o",
+ frame->root->unique, uuid_utoa(loc->inode->gfid), linkpath,
+ loc->path, umask);
+ LOG_ELEMENT(conf, string);
+ }
-int
-trace_lk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_LK].enabled) {
- if (op_ret >= 0) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, {l_type=%d, l_whence=%d, "
- "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, lock->l_type, lock->l_whence,
- lock->l_start, lock->l_len, lock->l_pid);
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d)",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
- }
+out:
+ STACK_WIND(frame, trace_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask, xdata);
- frame->local = NULL;
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, lock, xdata);
- return 0;
+ return 0;
}
-
-
int
-trace_entrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+trace_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char oldgfid[50] = {
+ 0,
+ };
+ char newgfid[50] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RENAME].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (newloc->inode)
+ uuid_utoa_r(newloc->inode->gfid, newgfid);
+ else
+ strcpy(newgfid, "0");
+
+ uuid_utoa_r(oldloc->inode->gfid, oldgfid);
+
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": oldgfid=%s oldpath=%s --> "
+ "newgfid=%s newpath=%s",
+ frame->root->unique, oldgfid, oldloc->path, newgfid,
+ newloc->path);
+
+ frame->local = oldloc->inode->gfid;
+
+ LOG_ELEMENT(conf, string);
+ }
+
+out:
+ STACK_WIND(frame, trace_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
- frame->local = NULL;
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, xdata);
- return 0;
+ return 0;
}
int
-trace_fentrylk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+trace_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_FENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char oldgfid[50] = {
+ 0,
+ };
+ char newgfid[50] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
- frame->local = NULL;
- STACK_UNWIND_STRICT (fentrylk, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LINK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (newloc->inode)
+ uuid_utoa_r(newloc->inode->gfid, newgfid);
+ else
+ strcpy(newgfid, "0");
-int
-trace_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_XATTROP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ uuid_utoa_r(oldloc->inode->gfid, oldgfid);
- frame->local = NULL;
- STACK_UNWIND_STRICT (xattrop, frame, op_ret, op_errno, dict, xdata);
- return 0;
-}
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": oldgfid=%s oldpath=%s --> "
+ "newgfid=%s newpath=%s",
+ frame->root->unique, oldgfid, oldloc->path, newgfid,
+ newloc->path);
+ frame->local = oldloc->inode->gfid;
-int
-trace_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ LOG_ELEMENT(conf, string);
+ }
- frame->local = NULL;
- STACK_UNWIND_STRICT (fxattrop, frame, op_ret, op_errno, dict, xdata);
- return 0;
+out:
+ STACK_WIND(frame, trace_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
}
-
int
-trace_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+trace_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_INODELK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local),
- op_ret, op_errno);
- }
+ char actime_str[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char modtime_str[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
- frame->local = NULL;
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ conf = this->private;
-int
-trace_finodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_FINODELK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d, op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
- }
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SETATTR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (valid & GF_SET_ATTR_MODE) {
+ snprintf(
+ string, sizeof(string), "%" PRId64 ": gfid=%s path=%s mode=%o)",
+ frame->root->unique, uuid_utoa(loc->inode->gfid), loc->path,
+ st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type));
- frame->local = NULL;
- STACK_UNWIND_STRICT (finodelk, frame, op_ret, op_errno, xdata);
- return 0;
-}
+ LOG_ELEMENT(conf, string);
+ memset(string, 0, sizeof(string));
+ }
+ if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s path=%s uid=%o,"
+ " gid=%o",
+ frame->root->unique, uuid_utoa(loc->inode->gfid),
+ loc->path, stbuf->ia_uid, stbuf->ia_gid);
-int
-trace_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- uint32_t weak_checksum, uint8_t *strong_checksum, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_RCHECKSUM].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s op_ret=%d op_errno=%d",
- frame->root->unique, uuid_utoa (frame->local), op_ret, op_errno);
+ LOG_ELEMENT(conf, string);
+ memset(string, 0, sizeof(string));
}
- frame->local = NULL;
- STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum,
- strong_checksum, xdata);
+ if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
+ gf_time_fmt(actime_str, sizeof actime_str, stbuf->ia_atime,
+ gf_timefmt_bdT);
- return 0;
-}
+ gf_time_fmt(modtime_str, sizeof modtime_str, stbuf->ia_mtime,
+ gf_timefmt_bdT);
-/* *_cbk section over <----------> fop section start */
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s path=%s "
+ "ia_atime=%s, ia_mtime=%s",
+ frame->root->unique, uuid_utoa(loc->inode->gfid),
+ loc->path, actime_str, modtime_str);
-int
-trace_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_ENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s volume=%s, (path=%s basename=%s, "
- "cmd=%s, type=%s)",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- volume, loc->path, basename,
- ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" : "ENTRYLK_UNLOCK"),
- ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK"));
- frame->local = loc->inode->gfid;
+ LOG_ELEMENT(conf, string);
+ memset(string, 0, sizeof(string));
}
+ frame->local = loc->inode->gfid;
+ }
- STACK_WIND (frame, trace_entrylk_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->entrylk,
- volume, loc, basename, cmd, type, xdata);
- return 0;
-}
+out:
+ STACK_WIND(frame, trace_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
+}
int
-trace_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+trace_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- char *cmd_str = NULL;
- char *type_str = NULL;
+ char actime_str[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char modtime_str[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_INODELK].enabled) {
- switch (cmd) {
-#if F_GETLK != F_GETLK64
- case F_GETLK64:
-#endif
- case F_GETLK:
- cmd_str = "GETLK";
- break;
+ conf = this->private;
-#if F_SETLK != F_SETLK64
- case F_SETLK64:
-#endif
- case F_SETLK:
- cmd_str = "SETLK";
- break;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ if (valid & GF_SET_ATTR_MODE) {
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s fd=%p, mode=%o", frame->root->unique,
+ uuid_utoa(fd->inode->gfid), fd,
+ st_mode_from_ia(stbuf->ia_prot, stbuf->ia_type));
-#if F_SETLKW != F_SETLKW64
- case F_SETLKW64:
-#endif
- case F_SETLKW:
- cmd_str = "SETLKW";
- break;
-
- default:
- cmd_str = "UNKNOWN";
- break;
- }
-
- switch (flock->l_type) {
- case F_RDLCK:
- type_str = "READ";
- break;
- case F_WRLCK:
- type_str = "WRITE";
- break;
- case F_UNLCK:
- type_str = "UNLOCK";
- break;
- default:
- type_str = "UNKNOWN";
- break;
- }
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s volume=%s, (path=%s "
- "cmd=%s, type=%s, start=%llu, len=%llu, pid=%llu)",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- volume, loc->path,
- cmd_str, type_str, (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid);
- frame->local = loc->inode->gfid;
+ LOG_ELEMENT(conf, string);
+ memset(string, 0, sizeof(string));
}
- STACK_WIND (frame, trace_inodelk_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->inodelk,
- volume, loc, cmd, flock, xdata);
- return 0;
-}
+ if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s fd=%p, uid=%o, "
+ "gid=%o",
+ frame->root->unique, uuid_utoa(fd->inode->gfid), fd,
+ stbuf->ia_uid, stbuf->ia_gid);
+ LOG_ELEMENT(conf, string);
+ memset(string, 0, sizeof(string));
+ }
-int
-trace_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
-{
- char *cmd_str = NULL, *type_str = NULL;
+ if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
+ gf_time_fmt(actime_str, sizeof actime_str, stbuf->ia_atime,
+ gf_timefmt_bdT);
- if (trace_fop_names[GF_FOP_FINODELK].enabled) {
- switch (cmd) {
-#if F_GETLK != F_GETLK64
- case F_GETLK64:
-#endif
- case F_GETLK:
- cmd_str = "GETLK";
- break;
+ gf_time_fmt(modtime_str, sizeof modtime_str, stbuf->ia_mtime,
+ gf_timefmt_bdT);
-#if F_SETLK != F_SETLK64
- case F_SETLK64:
-#endif
- case F_SETLK:
- cmd_str = "SETLK";
- break;
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s fd=%p "
+ "ia_atime=%s, ia_mtime=%s",
+ frame->root->unique, uuid_utoa(fd->inode->gfid), fd,
+ actime_str, modtime_str);
-#if F_SETLKW != F_SETLKW64
- case F_SETLKW64:
-#endif
- case F_SETLKW:
- cmd_str = "SETLKW";
- break;
-
- default:
- cmd_str = "UNKNOWN";
- break;
- }
-
- switch (flock->l_type) {
- case F_RDLCK:
- type_str = "READ";
- break;
- case F_WRLCK:
- type_str = "WRITE";
- break;
- case F_UNLCK:
- type_str = "UNLOCK";
- break;
- default:
- type_str = "UNKNOWN";
- break;
- }
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s volume=%s, (fd =%p "
- "cmd=%s, type=%s, start=%llu, len=%llu, pid=%llu)",
- frame->root->unique, uuid_utoa (fd->inode->gfid), volume, fd,
- cmd_str, type_str, (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid);
- frame->local = fd->inode->gfid;
+ LOG_ELEMENT(conf, string);
+ memset(string, 0, sizeof(string));
}
+ frame->local = fd->inode->gfid;
+ }
- STACK_WIND (frame, trace_finodelk_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->finodelk,
- volume, fd, cmd, flock, xdata);
- return 0;
-}
+out:
+ STACK_WIND(frame, trace_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+
+ return 0;
+}
+
+static int
+trace_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, off_t offset, dict_t *xdata)
+{
+ trace_conf_t *conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SEEK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s op_ret=%d op_errno=%d, "
+ "offset=%" PRId64 "",
+ frame->root->unique, uuid_utoa(frame->local), op_ret, op_errno,
+ offset);
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ TRACE_STACK_UNWIND(seek, frame, op_ret, op_errno, offset, xdata);
+ return 0;
+}
+
+static int
+trace_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ trace_conf_t *conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SEEK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s fd=%p "
+ "offset=%" PRId64 " what=%d",
+ frame->root->unique, uuid_utoa(fd->inode->gfid), fd, offset,
+ what);
+
+ frame->local = fd->inode->gfid;
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ STACK_WIND(frame, trace_seek_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->seek, fd, offset, what, xdata);
+ return 0;
+}
int
-trace_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+trace_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_XATTROP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s (path=%s flags=%d)",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, flags);
- frame->local = loc->inode->gfid;
- }
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame, trace_xattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop,
- loc, flags, dict, xdata);
+ conf = this->private;
- return 0;
-}
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s path=%s, "
+ "offset=%" PRId64 "",
+ frame->root->unique, uuid_utoa(loc->inode->gfid), loc->path,
+ offset);
+ frame->local = loc->inode->gfid;
-int
-trace_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_FXATTROP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, flags=%d",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, flags);
- frame->local = fd->inode->gfid;
- }
+ LOG_ELEMENT(conf, string);
+ }
- STACK_WIND (frame, trace_fxattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop,
- fd, flags, dict, xdata);
+out:
+ STACK_WIND(frame, trace_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
- return 0;
+ return 0;
}
-
int
-trace_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xdata)
+trace_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
- /* TODO: print all the keys mentioned in xattr_req */
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path);
- frame->local = loc->inode->gfid;
- }
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame, trace_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc, xdata);
+ conf = this->private;
- return 0;
-}
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_OPEN].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s path=%s flags=%d fd=%p",
+ frame->root->unique, uuid_utoa(loc->inode->gfid), loc->path,
+ flags, fd);
+ frame->local = loc->inode->gfid;
-int
-trace_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_STAT].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path);
- frame->local = loc->inode->gfid;
- }
-
- STACK_WIND (frame, trace_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc, xdata);
+ LOG_ELEMENT(conf, string);
+ }
- return 0;
+out:
+ STACK_WIND(frame, trace_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
}
-
int
-trace_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size, dict_t *xdata)
+trace_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_READLINK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s, size=%"GF_PRI_SIZET")",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, size);
- frame->local = loc->inode->gfid;
- }
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame, trace_readlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink,
- loc, size, xdata);
+ conf = this->private;
- return 0;
-}
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_CREATE].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s path=%s, fd=%p, "
+ "flags=0%o mode=0%o umask=0%o",
+ frame->root->unique, uuid_utoa(loc->inode->gfid), loc->path,
+ fd, flags, mode, umask);
+
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ STACK_WIND(frame, trace_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+}
int
-trace_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t dev, mode_t umask, dict_t *xdata)
+trace_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_MKNOD].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s mode=0%o umask=0%o "
- "dev=%"GF_PRI_DEV")",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, mode, umask, dev);
- }
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame, trace_mknod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod,
- loc, mode, dev, umask, xdata);
+ conf = this->private;
- return 0;
-}
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READ].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s fd=%p, size=%" GF_PRI_SIZET
+ "offset=%" PRId64 " flags=0%x)",
+ frame->root->unique, uuid_utoa(fd->inode->gfid), fd, size,
+ offset, flags);
+ frame->local = fd->inode->gfid;
-int
-trace_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_MKDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s mode=0%o umask=0%o",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, mode, umask);
- }
+ LOG_ELEMENT(conf, string);
+ }
- STACK_WIND (frame, trace_mkdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- loc, mode, umask, xdata);
- return 0;
+out:
+ STACK_WIND(frame, trace_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+ return 0;
}
-
int
-trace_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
+trace_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_UNLINK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s flag=%d",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, xflag);
- frame->local = loc->inode->gfid;
- }
+ trace_conf_t *conf = NULL;
+ int i = 0;
+ size_t total_size = 0;
- STACK_WIND (frame, trace_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc, xflag, xdata);
- return 0;
-}
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_WRITE].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ for (i = 0; i < count; i++)
+ total_size += vector[i].iov_len;
-int
-trace_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_RMDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s flags=%d",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, flags);
- frame->local = loc->inode->gfid;
- }
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s fd=%p, count=%d, "
+ " offset=%" PRId64 " flags=0%x write_size=%zu",
+ frame->root->unique, uuid_utoa(fd->inode->gfid), fd, count,
+ offset, flags, total_size);
- STACK_WIND (frame, trace_rmdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- loc, flags, xdata);
+ frame->local = fd->inode->gfid;
- return 0;
-}
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ STACK_WIND(frame, trace_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
+ return 0;
+}
int
-trace_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, mode_t umask, dict_t *xdata)
+trace_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_SYMLINK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s linkpath=%s, path=%s umask=0%o",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- linkpath, loc->path, umask);
- }
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame, trace_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, umask, xdata);
+ conf = this->private;
- return 0;
-}
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_STATFS].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string), "%" PRId64 ": gfid=%s path=%s",
+ frame->root->unique,
+ (loc->inode) ? uuid_utoa(loc->inode->gfid) : "0", loc->path);
+ LOG_ELEMENT(conf, string);
+ }
+
+out:
+ STACK_WIND(frame, trace_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+}
int
-trace_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
+trace_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- char oldgfid[50] = {0,};
- char newgfid[50] = {0,};
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_RENAME].enabled) {
- if (newloc->inode)
- uuid_utoa_r (newloc->inode->gfid, newgfid);
- else
- strcpy (newgfid, "0");
+ conf = this->private;
- uuid_utoa_r (oldloc->inode->gfid, oldgfid);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FLUSH].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string), "%" PRId64 ": gfid=%s fd=%p",
+ frame->root->unique, uuid_utoa(fd->inode->gfid), fd);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": oldgfid=%s oldpath=%s --> newgfid=%s newpath=%s",
- frame->root->unique, oldgfid, oldloc->path, newgfid, newloc->path);
-
- frame->local = oldloc->inode->gfid;
- }
+ frame->local = fd->inode->gfid;
- STACK_WIND (frame, trace_rename_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename,
- oldloc, newloc, xdata);
+ LOG_ELEMENT(conf, string);
+ }
- return 0;
+out:
+ STACK_WIND(frame, trace_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
}
-
int
-trace_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+trace_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
dict_t *xdata)
{
- char oldgfid[50] = {0,};
- char newgfid[50] = {0,};
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_LINK].enabled) {
- if (newloc->inode)
- uuid_utoa_r (newloc->inode->gfid, newgfid);
- else
- strcpy (newgfid, "0");
+ conf = this->private;
- uuid_utoa_r (oldloc->inode->gfid, oldgfid);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSYNC].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string), "%" PRId64 ": gfid=%s flags=%d fd=%p",
+ frame->root->unique, uuid_utoa(fd->inode->gfid), flags, fd);
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": oldgfid=%s oldpath=%s --> newgfid=%s newpath=%s",
- frame->root->unique, oldgfid, oldloc->path,
- newgfid, newloc->path);
- frame->local = oldloc->inode->gfid;
- }
+ frame->local = fd->inode->gfid;
- STACK_WIND (frame, trace_link_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link,
- oldloc, newloc, xdata);
- return 0;
-}
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ STACK_WIND(frame, trace_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+}
int
-trace_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+trace_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- char actime_str[64] = {0,};
- char modtime_str[64] = {0,};
-
- if (trace_fop_names[GF_FOP_SETATTR].enabled) {
- if (valid & GF_SET_ATTR_MODE) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s mode=%o)",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type));
- }
-
- if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s uid=%o, gid=%o",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, stbuf->ia_uid, stbuf->ia_gid);
- }
-
- if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
- gf_time_fmt (actime_str, sizeof actime_str,
- stbuf->ia_atime, gf_timefmt_bdT);
- gf_time_fmt (modtime_str, sizeof modtime_str,
- stbuf->ia_mtime, gf_timefmt_bdT);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s ia_atime=%s, ia_mtime=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, actime_str, modtime_str);
- }
- frame->local = loc->inode->gfid;
- }
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame, trace_setattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr,
- loc, stbuf, valid, xdata);
+ conf = this->private;
- return 0;
-}
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s path=%s flags=%d", frame->root->unique,
+ uuid_utoa(loc->inode->gfid), loc->path, flags);
+ frame->local = loc->inode->gfid;
+
+ LOG_ELEMENT(conf, string);
+ }
+
+out:
+ STACK_WIND(frame, trace_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
+ return 0;
+}
int
-trace_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+trace_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- char actime_str[64] = {0,};
- char modtime_str[64] = {0,};
+ trace_conf_t *conf = NULL;
- if (trace_fop_names[GF_FOP_FSETATTR].enabled) {
- if (valid & GF_SET_ATTR_MODE) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, mode=%o",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd,
- st_mode_from_ia (stbuf->ia_prot, stbuf->ia_type));
- }
+ conf = this->private;
- if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, uid=%o, gid=%o",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, stbuf->ia_uid, stbuf->ia_gid);
- }
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string), "%" PRId64 ": gfid=%s path=%s name=%s",
+ frame->root->unique, uuid_utoa(loc->inode->gfid), loc->path,
+ name);
- if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
- gf_time_fmt (actime_str, sizeof actime_str,
- stbuf->ia_atime, gf_timefmt_bdT);
- gf_time_fmt (modtime_str, sizeof modtime_str,
- stbuf->ia_mtime, gf_timefmt_bdT);
-
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p ia_atime=%s, ia_mtime=%s",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, actime_str, modtime_str);
- }
- frame->local = fd->inode->gfid;
- }
+ frame->local = loc->inode->gfid;
- STACK_WIND (frame, trace_fsetattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetattr,
- fd, stbuf, valid, xdata);
+ LOG_ELEMENT(conf, string);
+ }
- return 0;
+out:
+ STACK_WIND(frame, trace_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
}
-
int
-trace_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset, dict_t *xdata)
+trace_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_TRUNCATE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s, offset=%"PRId64"",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, offset);
- frame->local = loc->inode->gfid;
- }
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame, trace_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc, offset, xdata);
+ conf = this->private;
- return 0;
-}
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string), "%" PRId64 ": gfid=%s path=%s name=%s",
+ frame->root->unique, uuid_utoa(loc->inode->gfid), loc->path,
+ name);
+ frame->local = loc->inode->gfid;
-int
-trace_open (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, fd_t *fd, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_OPEN].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s flags=%d fd=%p",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, flags, fd);
- frame->local = loc->inode->gfid;
- }
+ LOG_ELEMENT(conf, string);
+ }
- STACK_WIND (frame, trace_open_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc, flags, fd, xdata);
- return 0;
-}
+out:
+ STACK_WIND(frame, trace_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
+}
int
-trace_create (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+trace_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_CREATE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s fd=%p flags=0%o mode=0%o "
- "umask=0%o",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, fd, flags, mode, umask);
- }
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame, trace_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, umask, fd, xdata);
+ conf = this->private;
- return 0;
-}
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string), "%" PRId64 ": gfid=%s path=%s fd=%p",
+ frame->root->unique, uuid_utoa(loc->inode->gfid), loc->path,
+ fd);
+ frame->local = loc->inode->gfid;
-int
-trace_readv (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset, uint32_t flags, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_READ].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET", "
- "offset=%"PRId64" flags=0%x)",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, size, offset, flags);
- frame->local = fd->inode->gfid;
- }
+ LOG_ELEMENT(conf, string);
+ }
- STACK_WIND (frame, trace_readv_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd, size, offset, flags, xdata);
- return 0;
+out:
+ STACK_WIND(frame, trace_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
}
-
int
-trace_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count,
- off_t offset, uint32_t flags, struct iobref *iobref, dict_t *xdata)
+trace_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
{
- if (trace_fop_names[GF_FOP_WRITE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, count=%d, offset=%"PRId64
- " flag=0%x)",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, count, offset, flags);
- frame->local = fd->inode->gfid;
- }
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame, trace_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset, flags, iobref, xdata);
- return 0;
-}
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READDIRP].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s fd=%p, size=%" GF_PRI_SIZET
+ ", offset=%" PRId64 " dict=%p",
+ frame->root->unique, uuid_utoa(fd->inode->gfid), fd, size,
+ offset, dict);
-int
-trace_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_STATFS].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s",
- frame->root->unique, (loc->inode)?
- uuid_utoa (loc->inode->gfid):"0", loc->path);
- }
+ frame->local = fd->inode->gfid;
- STACK_WIND (frame, trace_statfs_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs,
- loc, xdata);
- return 0;
-}
+ LOG_ELEMENT(conf, string);
+ }
+
+out:
+ STACK_WIND(frame, trace_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict);
+ return 0;
+}
int
-trace_flush (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+trace_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_FLUSH].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd);
- frame->local = fd->inode->gfid;
- }
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame, trace_flush_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd, xdata);
- return 0;
-}
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_READDIR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s fd=%p, size=%" GF_PRI_SIZET
+ ", offset=%" PRId64,
+ frame->root->unique, uuid_utoa(fd->inode->gfid), fd, size,
+ offset);
-int
-trace_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_FSYNC].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s flags=%d fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid), flags, fd);
- frame->local = fd->inode->gfid;
- }
+ frame->local = fd->inode->gfid;
- STACK_WIND (frame, trace_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd, flags, xdata);
- return 0;
-}
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ STACK_WIND(frame, trace_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
+
+ return 0;
+}
int
-trace_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int32_t flags, dict_t *xdata)
+trace_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_SETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s flags=%d",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, flags);
- frame->local = loc->inode->gfid;
- }
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame, trace_setxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags, xdata);
- return 0;
-}
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s datasync=%d fd=%p", frame->root->unique,
+ uuid_utoa(fd->inode->gfid), datasync, fd);
-int
-trace_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_GETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s name=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, name);
- frame->local = loc->inode->gfid;
- }
+ frame->local = fd->inode->gfid;
- STACK_WIND (frame, trace_getxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- loc, name, xdata);
- return 0;
-}
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ STACK_WIND(frame, trace_fsyncdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir, fd, datasync, xdata);
+ return 0;
+}
int
-trace_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
+trace_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_REMOVEXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s name=%s",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, name);
- frame->local = loc->inode->gfid;
- }
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame, trace_removexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- loc, name, xdata);
+ conf = this->private;
- return 0;
-}
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_ACCESS].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s path=%s mask=0%o", frame->root->unique,
+ uuid_utoa(loc->inode->gfid), loc->path, mask);
+ frame->local = loc->inode->gfid;
-int
-trace_opendir (call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s fd=%p",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, fd);
- frame->local = loc->inode->gfid;
- }
+ LOG_ELEMENT(conf, string);
+ }
- STACK_WIND (frame, trace_opendir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir,
- loc, fd, xdata);
- return 0;
+out:
+ STACK_WIND(frame, trace_access_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access, loc, mask, xdata);
+ return 0;
}
-int
-trace_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *dict)
+int32_t
+trace_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_READDIRP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET", "
- "offset=%"PRId64" dict=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, size, offset, dict);
- frame->local = fd->inode->gfid;
- }
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame, trace_readdirp_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp,
- fd, size, offset, dict);
+ conf = this->private;
- return 0;
-}
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_RCHECKSUM].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s offset=%" PRId64 "len=%u fd=%p",
+ frame->root->unique, uuid_utoa(fd->inode->gfid), offset, len,
+ fd);
+ frame->local = fd->inode->gfid;
-int
-trace_readdir (call_frame_t *frame, xlator_t *this, fd_t *fd,
- size_t size, off_t offset, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_READDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, size=%"GF_PRI_SIZET", offset=%"PRId64,
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, size, offset);
- frame->local = fd->inode->gfid;
- }
+ LOG_ELEMENT(conf, string);
+ }
- STACK_WIND (frame, trace_readdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir,
- fd, size, offset, xdata);
+out:
+ STACK_WIND(frame, trace_rchecksum_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rchecksum, fd, offset, len, xdata);
- return 0;
+ return 0;
}
+int32_t
+trace_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FENTRYLK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s volume=%s, (fd=%p "
+ "basename=%s, cmd=%s, type=%s)",
+ frame->root->unique, uuid_utoa(fd->inode->gfid), volume, fd,
+ basename,
+ ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" : "ENTRYLK_UNLOCK"),
+ ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK"));
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT(conf, string);
+ }
-int
-trace_fsyncdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t datasync, dict_t *xdata)
+out:
+ STACK_WIND(frame, trace_fentrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fentrylk, volume, fd, basename, cmd,
+ type, xdata);
+ return 0;
+}
+
+int32_t
+trace_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_FSYNCDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s datasync=%d fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- datasync, fd);
- frame->local = fd->inode->gfid;
- }
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame, trace_fsyncdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir,
- fd, datasync, xdata);
- return 0;
-}
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FGETXATTR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string), "%" PRId64 ": gfid=%s fd=%p name=%s",
+ frame->root->unique, uuid_utoa(fd->inode->gfid), fd, name);
-int
-trace_access (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_ACCESS].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s path=%s mask=0%o",
- frame->root->unique, uuid_utoa (loc->inode->gfid),
- loc->path, mask);
- frame->local = loc->inode->gfid;
- }
+ frame->local = fd->inode->gfid;
- STACK_WIND (frame, trace_access_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access,
- loc, mask, xdata);
- return 0;
-}
+ LOG_ELEMENT(conf, string);
+ }
+out:
+ STACK_WIND(frame, trace_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
+}
int32_t
-trace_rchecksum (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- int32_t len, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_RCHECKSUM].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s offset=%"PRId64" len=%u fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- offset, len, fd);
- frame->local = fd->inode->gfid;
- }
+trace_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
- STACK_WIND (frame, trace_rchecksum_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rchecksum,
- fd, offset, len, xdata);
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSETXATTR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string), "%" PRId64 ": gfid=%s fd=%p flags=%d",
+ frame->root->unique, uuid_utoa(fd->inode->gfid), fd, flags);
- return 0;
+ frame->local = fd->inode->gfid;
+ LOG_ELEMENT(conf, string);
+ }
+
+out:
+ STACK_WIND(frame, trace_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
}
-int32_t
-trace_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, const char *basename, entrylk_cmd cmd,
- entrylk_type type, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_FENTRYLK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s volume=%s, (fd=%p basename=%s, "
- "cmd=%s, type=%s)",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- volume, fd, basename,
- ((cmd == ENTRYLK_LOCK) ? "ENTRYLK_LOCK" : "ENTRYLK_UNLOCK"),
- ((type == ENTRYLK_RDLCK) ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK"));
- frame->local = fd->inode->gfid;
- }
+int
+trace_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame, trace_fentrylk_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fentrylk,
- volume, fd, basename, cmd, type, xdata);
- return 0;
+ conf = this->private;
-}
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64 ": gfid=%s offset=%" PRId64 " fd=%p",
+ frame->root->unique, uuid_utoa(fd->inode->gfid), offset, fd);
-int32_t
-trace_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_FGETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p name=%s",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, name);
- frame->local = fd->inode->gfid;
- }
+ frame->local = fd->inode->gfid;
- STACK_WIND (frame, trace_fgetxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr,
- fd, name, xdata);
- return 0;
-}
+ LOG_ELEMENT(conf, string);
+ }
-int32_t
-trace_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int32_t flags, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_FSETXATTR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p flags=%d",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- fd, flags);
- frame->local = fd->inode->gfid;
- }
+out:
+ STACK_WIND(frame, trace_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
- STACK_WIND (frame, trace_fsetxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr,
- fd, dict, flags, xdata);
- return 0;
+ return 0;
}
int
-trace_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset, dict_t *xdata)
+trace_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_FTRUNCATE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s offset=%"PRId64" fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid),
- offset, fd);
- frame->local = fd->inode->gfid;
- }
+ trace_conf_t *conf = NULL;
- STACK_WIND (frame, trace_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd, offset, xdata);
+ conf = this->private;
- return 0;
-}
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_FSTAT].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string), "%" PRId64 ": gfid=%s fd=%p",
+ frame->root->unique, uuid_utoa(fd->inode->gfid), fd);
+ frame->local = fd->inode->gfid;
-int
-trace_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
-{
- if (trace_fop_names[GF_FOP_FSTAT].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd);
- frame->local = fd->inode->gfid;
- }
+ LOG_ELEMENT(conf, string);
+ }
- STACK_WIND (frame, trace_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd, xdata);
- return 0;
+out:
+ STACK_WIND(frame, trace_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
}
-
int
-trace_lk (call_frame_t *frame, xlator_t *this, fd_t *fd,
- int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+trace_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- if (trace_fop_names[GF_FOP_LK].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "%"PRId64": gfid=%s fd=%p, cmd=%d, lock {l_type=%d, l_whence=%d, "
- "l_start=%"PRId64", l_len=%"PRId64", l_pid=%u})",
- frame->root->unique, uuid_utoa (fd->inode->gfid), fd,
- cmd, lock->l_type, lock->l_whence,
- lock->l_start, lock->l_len, lock->l_pid);
- frame->local = fd->inode->gfid;
- }
+ trace_conf_t *conf = NULL;
+
+ conf = this->private;
- STACK_WIND (frame, trace_lk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk,
- fd, cmd, lock, xdata);
- return 0;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LK].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string),
+ "%" PRId64
+ ": gfid=%s fd=%p, cmd=%d, "
+ "lock {l_type=%d, "
+ "l_whence=%d, l_start=%" PRId64
+ ", "
+ "l_len=%" PRId64 ", l_pid=%u})",
+ frame->root->unique, uuid_utoa(fd->inode->gfid), fd, cmd,
+ lock->l_type, lock->l_whence, lock->l_start, lock->l_len,
+ lock->l_pid);
+
+ frame->local = fd->inode->gfid;
+
+ LOG_ELEMENT(conf, string);
+ }
+
+out:
+ STACK_WIND(frame, trace_lk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata);
+ return 0;
}
int32_t
-trace_forget (xlator_t *this, inode_t *inode)
+trace_forget(xlator_t *this, inode_t *inode)
{
- /* If user want to understand when a lookup happens,
- he should know about 'forget' too */
- if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "gfid=%s", uuid_utoa (inode->gfid));
- }
- return 0;
-}
+ trace_conf_t *conf = NULL;
+ conf = this->private;
+ /* If user want to understand when a lookup happens,
+ he should know about 'forget' too */
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_LOOKUP].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string), "gfid=%s", uuid_utoa(inode->gfid));
+
+ LOG_ELEMENT(conf, string);
+ }
+
+out:
+ return 0;
+}
int32_t
-trace_releasedir (xlator_t *this, fd_t *fd)
+trace_releasedir(xlator_t *this, fd_t *fd)
{
- if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "gfid=%s fd=%p", uuid_utoa (fd->inode->gfid), fd);
- }
+ trace_conf_t *conf = NULL;
- return 0;
+ conf = this->private;
+
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_OPENDIR].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string), "gfid=%s fd=%p",
+ uuid_utoa(fd->inode->gfid), fd);
+
+ LOG_ELEMENT(conf, string);
+ }
+
+out:
+ return 0;
}
int32_t
-trace_release (xlator_t *this, fd_t *fd)
+trace_release(xlator_t *this, fd_t *fd)
{
- if (trace_fop_names[GF_FOP_OPEN].enabled ||
- trace_fop_names[GF_FOP_CREATE].enabled) {
- gf_log (this->name, GF_LOG_INFO,
- "gfid=%s fd=%p", uuid_utoa (fd->inode->gfid), fd);
- }
- return 0;
-}
+ trace_conf_t *conf = NULL;
+ conf = this->private;
+ if (!conf->log_file && !conf->log_history)
+ goto out;
+ if (trace_fop_names[GF_FOP_OPEN].enabled ||
+ trace_fop_names[GF_FOP_CREATE].enabled) {
+ char string[4096] = {
+ 0,
+ };
+ snprintf(string, sizeof(string), "gfid=%s fd=%p",
+ uuid_utoa(fd->inode->gfid), fd);
+
+ LOG_ELEMENT(conf, string);
+ }
+
+out:
+ return 0;
+}
void
-enable_all_calls (int enabled)
+enable_all_calls(int enabled)
{
- int i;
+ int i;
- for (i = 0; i < GF_FOP_MAXVALUE; i++)
- trace_fop_names[i].enabled = enabled;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ trace_fop_names[i].enabled = enabled;
}
-
void
-enable_call (const char *name, int enabled)
+enable_call(const char *name, int enabled)
{
- int i;
- for (i = 0; i < GF_FOP_MAXVALUE; i++)
- if (!strcasecmp(trace_fop_names[i].name, name))
- trace_fop_names[i].enabled = enabled;
+ int i;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++)
+ if (!strcasecmp(trace_fop_names[i].name, name))
+ trace_fop_names[i].enabled = enabled;
}
-
/*
include = 1 for "include-ops"
= 0 for "exclude-ops"
*/
void
-process_call_list (const char *list, int include)
+process_call_list(const char *list, int include)
{
- enable_all_calls (include ? 0 : 1);
+ enable_all_calls(include ? 0 : 1);
- char *call = strsep ((char **)&list, ",");
+ char *call = strsep((char **)&list, ",");
- while (call) {
- enable_call (call, include);
- call = strsep ((char **)&list, ",");
- }
+ while (call) {
+ enable_call(call, include);
+ call = strsep((char **)&list, ",");
+ }
}
+int32_t
+trace_dump_history(xlator_t *this)
+{
+ int ret = -1;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ trace_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO("trace", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->history, out);
+
+ conf = this->private;
+ // Is it ok to return silently if log-history option his off?
+ if (conf && conf->log_history == _gf_true) {
+ gf_proc_dump_build_key(key_prefix, "xlator.debug.trace", "history");
+ gf_proc_dump_add_section("%s", key_prefix);
+ eh_dump(this->history, NULL, dump_history_trace);
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
int32_t
-init (xlator_t *this)
+mem_acct_init(xlator_t *this)
{
- dict_t *options = NULL;
- char *includes = NULL, *excludes = NULL;
- char *forced_loglevel = NULL;
+ int ret = -1;
- if (!this)
- return -1;
+ if (!this)
+ return ret;
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "trace translator requires one subvolume");
- return -1;
- }
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
+ ret = xlator_mem_acct_init(this, gf_trace_mt_end + 1);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Memory accounting init"
+ " failed");
+ return ret;
+ }
- options = this->options;
- includes = data_to_str (dict_get (options, "include-ops"));
- excludes = data_to_str (dict_get (options, "exclude-ops"));
+ return ret;
+}
- {
- int i;
- for (i = 0; i < GF_FOP_MAXVALUE; i++) {
- trace_fop_names[i].name = (gf_fop_list[i] ?
- gf_fop_list[i] : ":O");
- trace_fop_names[i].enabled = 1;
- }
- }
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ trace_conf_t *conf = NULL;
+ char *includes = NULL, *excludes = NULL;
- if (includes && excludes) {
- gf_log (this->name,
- GF_LOG_ERROR,
- "must specify only one of 'include-ops' and 'exclude-ops'");
- return -1;
- }
- if (includes)
- process_call_list (includes, 1);
- if (excludes)
- process_call_list (excludes, 0);
-
- if (dict_get (options, "force-log-level")) {
- forced_loglevel = data_to_str (dict_get (options,
- "force-log-level"));
- if (!forced_loglevel)
- goto setloglevel;
-
- if (strcmp (forced_loglevel, "INFO") == 0)
- trace_log_level = GF_LOG_INFO;
- else if (strcmp (forced_loglevel, "TRACE") == 0)
- trace_log_level = GF_LOG_TRACE;
- else if (strcmp (forced_loglevel, "ERROR") == 0)
- trace_log_level = GF_LOG_ERROR;
- else if (strcmp (forced_loglevel, "DEBUG") == 0)
- trace_log_level = GF_LOG_DEBUG;
- else if (strcmp (forced_loglevel, "WARNING") == 0)
- trace_log_level = GF_LOG_WARNING;
- else if (strcmp (forced_loglevel, "CRITICAL") == 0)
- trace_log_level = GF_LOG_CRITICAL;
- else if (strcmp (forced_loglevel, "NONE") == 0)
- trace_log_level = GF_LOG_NONE;
- }
+ GF_VALIDATE_OR_GOTO("quick-read", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, options, out);
+
+ conf = this->private;
+
+ includes = data_to_str(dict_get(options, "include-ops"));
+ excludes = data_to_str(dict_get(options, "exclude-ops"));
+
+ {
+ int i;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (gf_fop_list[i])
+ strncpy(trace_fop_names[i].name, gf_fop_list[i],
+ sizeof(trace_fop_names[i].name));
+ else
+ strncpy(trace_fop_names[i].name, ":0",
+ sizeof(trace_fop_names[i].name));
+ trace_fop_names[i].enabled = 1;
+ trace_fop_names[i].name[sizeof(trace_fop_names[i].name) - 1] = 0;
+ }
+ }
+
+ if (includes && excludes) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "must specify only one of 'include-ops' and "
+ "'exclude-ops'");
+ goto out;
+ }
+
+ if (includes)
+ process_call_list(includes, 1);
+ if (excludes)
+ process_call_list(excludes, 0);
+
+ /* Should resizing of the event-history be allowed in reconfigure?
+ * for which a new event_history might have to be allocated and the
+ * older history has to be freed.
+ */
+ GF_OPTION_RECONF("log-file", conf->log_file, options, bool, out);
+
+ GF_OPTION_RECONF("log-history", conf->log_history, options, bool, out);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+init(xlator_t *this)
+{
+ dict_t *options = NULL;
+ char *includes = NULL, *excludes = NULL;
+ char *forced_loglevel = NULL;
+ eh_t *history = NULL;
+ int ret = -1;
+ uint64_t history_size = TRACE_DEFAULT_HISTORY_SIZE;
+ trace_conf_t *conf = NULL;
+
+ if (!this)
+ return -1;
+
+ if (!this->children || this->children->next) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "trace translator requires one subvolume");
+ return -1;
+ }
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
+ }
+
+ conf = GF_CALLOC(1, sizeof(trace_conf_t), gf_trace_mt_trace_conf_t);
+ if (!conf) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "cannot allocate "
+ "xl->private");
+ return -1;
+ }
+
+ options = this->options;
+ includes = data_to_str(dict_get(options, "include-ops"));
+ excludes = data_to_str(dict_get(options, "exclude-ops"));
+
+ {
+ int i;
+ for (i = 0; i < GF_FOP_MAXVALUE; i++) {
+ if (gf_fop_list[i])
+ strncpy(trace_fop_names[i].name, gf_fop_list[i],
+ sizeof(trace_fop_names[i].name));
+ else
+ strncpy(trace_fop_names[i].name, ":O",
+ sizeof(trace_fop_names[i].name));
+ trace_fop_names[i].enabled = 1;
+ trace_fop_names[i].name[sizeof(trace_fop_names[i].name) - 1] = 0;
+ }
+ }
+
+ if (includes && excludes) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "must specify only one of 'include-ops' and "
+ "'exclude-ops'");
+ return -1;
+ }
+
+ if (includes)
+ process_call_list(includes, 1);
+ if (excludes)
+ process_call_list(excludes, 0);
+
+ GF_OPTION_INIT("history-size", history_size, size, out);
+ conf->history_size = history_size;
+
+ gf_log(this->name, GF_LOG_INFO, "history size %" PRIu64, history_size);
+
+ GF_OPTION_INIT("log-file", conf->log_file, bool, out);
+
+ gf_log(this->name, GF_LOG_INFO, "logging to file %s",
+ (conf->log_file == _gf_true) ? "enabled" : "disabled");
+
+ GF_OPTION_INIT("log-history", conf->log_history, bool, out);
+
+ gf_log(this->name, GF_LOG_DEBUG, "logging to history %s",
+ (conf->log_history == _gf_true) ? "enabled" : "disabled");
+
+ history = eh_new(history_size, _gf_false, NULL);
+ if (!history) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "event history cannot be "
+ "initialized");
+ return -1;
+ }
+
+ this->history = history;
+
+ conf->trace_log_level = GF_LOG_INFO;
+
+ if (dict_get(options, "force-log-level")) {
+ forced_loglevel = data_to_str(dict_get(options, "force-log-level"));
+ if (!forced_loglevel)
+ goto setloglevel;
+
+ if (strcmp(forced_loglevel, "INFO") == 0)
+ conf->trace_log_level = GF_LOG_INFO;
+ else if (strcmp(forced_loglevel, "TRACE") == 0)
+ conf->trace_log_level = GF_LOG_TRACE;
+ else if (strcmp(forced_loglevel, "ERROR") == 0)
+ conf->trace_log_level = GF_LOG_ERROR;
+ else if (strcmp(forced_loglevel, "DEBUG") == 0)
+ conf->trace_log_level = GF_LOG_DEBUG;
+ else if (strcmp(forced_loglevel, "WARNING") == 0)
+ conf->trace_log_level = GF_LOG_WARNING;
+ else if (strcmp(forced_loglevel, "CRITICAL") == 0)
+ conf->trace_log_level = GF_LOG_CRITICAL;
+ else if (strcmp(forced_loglevel, "NONE") == 0)
+ conf->trace_log_level = GF_LOG_NONE;
+ }
setloglevel:
- gf_log_set_loglevel (trace_log_level);
+ gf_log_set_loglevel(this->ctx, conf->trace_log_level);
+ this->private = conf;
+ ret = 0;
+out:
+ if (ret == -1) {
+ if (history)
+ GF_FREE(history);
+ if (conf)
+ GF_FREE(conf);
+ }
- return 0;
+ return ret;
}
void
-fini (xlator_t *this)
+fini(xlator_t *this)
{
- if (!this)
- return;
-
- gf_log (this->name, GF_LOG_INFO,
- "trace translator unloaded");
+ if (!this)
return;
+
+ if (this->history)
+ eh_destroy(this->history);
+
+ gf_log(this->name, GF_LOG_INFO, "trace translator unloaded");
+ return;
}
struct xlator_fops fops = {
- .stat = trace_stat,
- .readlink = trace_readlink,
- .mknod = trace_mknod,
- .mkdir = trace_mkdir,
- .unlink = trace_unlink,
- .rmdir = trace_rmdir,
- .symlink = trace_symlink,
- .rename = trace_rename,
- .link = trace_link,
- .truncate = trace_truncate,
- .open = trace_open,
- .readv = trace_readv,
- .writev = trace_writev,
- .statfs = trace_statfs,
- .flush = trace_flush,
- .fsync = trace_fsync,
- .setxattr = trace_setxattr,
- .getxattr = trace_getxattr,
- .fsetxattr = trace_fsetxattr,
- .fgetxattr = trace_fgetxattr,
- .removexattr = trace_removexattr,
- .opendir = trace_opendir,
- .readdir = trace_readdir,
- .readdirp = trace_readdirp,
- .fsyncdir = trace_fsyncdir,
- .access = trace_access,
- .ftruncate = trace_ftruncate,
- .fstat = trace_fstat,
- .create = trace_create,
- .lk = trace_lk,
- .inodelk = trace_inodelk,
- .finodelk = trace_finodelk,
- .entrylk = trace_entrylk,
- .fentrylk = trace_fentrylk,
- .lookup = trace_lookup,
- .rchecksum = trace_rchecksum,
- .xattrop = trace_xattrop,
- .fxattrop = trace_fxattrop,
- .setattr = trace_setattr,
- .fsetattr = trace_fsetattr,
+ .stat = trace_stat,
+ .readlink = trace_readlink,
+ .mknod = trace_mknod,
+ .mkdir = trace_mkdir,
+ .unlink = trace_unlink,
+ .rmdir = trace_rmdir,
+ .symlink = trace_symlink,
+ .rename = trace_rename,
+ .link = trace_link,
+ .truncate = trace_truncate,
+ .open = trace_open,
+ .readv = trace_readv,
+ .writev = trace_writev,
+ .statfs = trace_statfs,
+ .flush = trace_flush,
+ .fsync = trace_fsync,
+ .setxattr = trace_setxattr,
+ .getxattr = trace_getxattr,
+ .fsetxattr = trace_fsetxattr,
+ .fgetxattr = trace_fgetxattr,
+ .removexattr = trace_removexattr,
+ .opendir = trace_opendir,
+ .readdir = trace_readdir,
+ .readdirp = trace_readdirp,
+ .fsyncdir = trace_fsyncdir,
+ .access = trace_access,
+ .ftruncate = trace_ftruncate,
+ .fstat = trace_fstat,
+ .create = trace_create,
+ .lk = trace_lk,
+ .inodelk = trace_inodelk,
+ .finodelk = trace_finodelk,
+ .entrylk = trace_entrylk,
+ .fentrylk = trace_fentrylk,
+ .lookup = trace_lookup,
+ .rchecksum = trace_rchecksum,
+ .xattrop = trace_xattrop,
+ .fxattrop = trace_fxattrop,
+ .setattr = trace_setattr,
+ .fsetattr = trace_fsetattr,
+ .seek = trace_seek,
};
-
struct xlator_cbks cbks = {
- .release = trace_release,
- .releasedir = trace_releasedir,
- .forget = trace_forget,
+ .release = trace_release,
+ .releasedir = trace_releasedir,
+ .forget = trace_forget,
};
struct volume_options options[] = {
- { .key = {"include-ops", "include"},
- .type = GF_OPTION_TYPE_STR,
- /*.value = { ""} */
- },
- { .key = {"exclude-ops", "exclude"},
- .type = GF_OPTION_TYPE_STR
- /*.value = { ""} */
- },
- { .key = {NULL} },
+ {
+ .key = {"include-ops", "include"},
+ .type = GF_OPTION_TYPE_STR,
+ /*.value = { ""} */
+ },
+ {
+ .key = {"exclude-ops", "exclude"},
+ .type = GF_OPTION_TYPE_STR
+ /*.value = { ""} */
+ },
+ {
+ .key = {"history-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "1024",
+ },
+ {
+ .key = {"log-file"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ },
+ {
+ .key = {"log-history"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "no",
+ },
+ {.key = {NULL}},
+};
+
+struct xlator_dumpops dumpops = {.history = trace_dump_history};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "trace",
+ .category = GF_TECH_PREVIEW,
};
diff --git a/xlators/debug/trace/src/trace.h b/xlators/debug/trace/src/trace.h
new file mode 100644
index 00000000000..b16304799da
--- /dev/null
+++ b/xlators/debug/trace/src/trace.h
@@ -0,0 +1,55 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <time.h>
+#include <errno.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/event-history.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/circ-buff.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/options.h>
+
+#define TRACE_DEFAULT_HISTORY_SIZE 1024
+
+typedef struct {
+ /* Since the longest fop name is fremovexattr i.e 12 characters, array size
+ * is kept 24, i.e double of the maximum.
+ */
+ char name[24];
+ int enabled;
+} trace_fop_name_t;
+
+trace_fop_name_t trace_fop_names[GF_FOP_MAXVALUE];
+
+typedef struct {
+ gf_boolean_t log_file;
+ gf_boolean_t log_history;
+ uint64_t history_size;
+ int trace_log_level;
+} trace_conf_t;
+
+#define TRACE_STACK_UNWIND(op, frame, params...) \
+ do { \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT(op, frame, params); \
+ } while (0);
+
+#define LOG_ELEMENT(_conf, _string) \
+ do { \
+ if (_conf) { \
+ if ((_conf->log_history) == _gf_true) \
+ gf_log_eh("%s", _string); \
+ if ((_conf->log_file) == _gf_true) \
+ gf_log(THIS->name, _conf->trace_log_level, "%s", _string); \
+ } \
+ } while (0);
diff --git a/xlators/encryption/Makefile.am b/xlators/encryption/Makefile.am
deleted file mode 100644
index 2cbde680fac..00000000000
--- a/xlators/encryption/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = rot-13
-
-CLEANFILES =
diff --git a/xlators/encryption/rot-13/Makefile.am b/xlators/encryption/rot-13/Makefile.am
deleted file mode 100644
index d471a3f9243..00000000000
--- a/xlators/encryption/rot-13/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/encryption/rot-13/src/rot-13.c b/xlators/encryption/rot-13/src/rot-13.c
deleted file mode 100644
index 697465fd313..00000000000
--- a/xlators/encryption/rot-13/src/rot-13.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <ctype.h>
-#include <sys/uio.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-
-#include "rot-13.h"
-
-/*
- * This is a rot13 ``encryption'' xlator. It rot13's data when
- * writing to disk and rot13's it back when reading it.
- * This xlator is meant as an example, NOT FOR PRODUCTION
- * USE ;) (hence no error-checking)
- */
-
-void
-rot13 (char *buf, int len)
-{
- int i;
- for (i = 0; i < len; i++) {
- if (buf[i] >= 'a' && buf[i] <= 'z')
- buf[i] = 'a' + ((buf[i] - 'a' + 13) % 26);
- else if (buf[i] >= 'A' && buf[i] <= 'Z')
- buf[i] = 'A' + ((buf[i] - 'A' + 13) % 26);
- }
-}
-
-void
-rot13_iovec (struct iovec *vector, int count)
-{
- int i;
- for (i = 0; i < count; i++) {
- rot13 (vector[i].iov_base, vector[i].iov_len);
- }
-}
-
-int32_t
-rot13_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref, dict_t *xdata)
-{
- rot_13_private_t *priv = (rot_13_private_t *)this->private;
-
- if (priv->decrypt_read)
- rot13_iovec (vector, count);
-
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref, xdata);
- return 0;
-}
-
-int32_t
-rot13_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
-{
- STACK_WIND (frame,
- rot13_readv_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->readv,
- fd, size, offset, flags, xdata);
- return 0;
-}
-
-int32_t
-rot13_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
-{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
- return 0;
-}
-
-int32_t
-rot13_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
-{
- rot_13_private_t *priv = (rot_13_private_t *)this->private;
- if (priv->encrypt_write)
- rot13_iovec (vector, count);
-
- STACK_WIND (frame,
- rot13_writev_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset, flags,
- iobref, xdata);
- return 0;
-}
-
-int32_t
-init (xlator_t *this)
-{
- data_t *data = NULL;
- rot_13_private_t *priv = NULL;
-
- if (!this->children || this->children->next) {
- gf_log ("rot13", GF_LOG_ERROR,
- "FATAL: rot13 should have exactly one child");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- priv = GF_CALLOC (sizeof (rot_13_private_t), 1, 0);
- if (!priv)
- return -1;
-
- priv->decrypt_read = 1;
- priv->encrypt_write = 1;
-
- data = dict_get (this->options, "encrypt-write");
- if (data) {
- if (gf_string2boolean (data->data, &priv->encrypt_write) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "encrypt-write takes only boolean options");
- return -1;
- }
- }
-
- data = dict_get (this->options, "decrypt-read");
- if (data) {
- if (gf_string2boolean (data->data, &priv->decrypt_read) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "decrypt-read takes only boolean options");
- return -1;
- }
- }
-
- this->private = priv;
- gf_log ("rot13", GF_LOG_DEBUG, "rot13 xlator loaded");
- return 0;
-}
-
-void
-fini (xlator_t *this)
-{
- rot_13_private_t *priv = this->private;
-
- if (!priv)
- return;
- this->private = NULL;
- GF_FREE (priv);
-
- return;
-}
-
-struct xlator_fops fops = {
- .readv = rot13_readv,
- .writev = rot13_writev
-};
-
-struct xlator_cbks cbks = {
-};
-
-struct volume_options options[] = {
- { .key = {"encrypt-write"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {"decrypt-read"},
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {NULL} },
-};
diff --git a/xlators/encryption/rot-13/src/rot-13.h b/xlators/encryption/rot-13/src/rot-13.h
deleted file mode 100644
index 8ef8162aeb5..00000000000
--- a/xlators/encryption/rot-13/src/rot-13.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __ROT_13_H__
-#define __ROT_13_H__
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-typedef struct {
- gf_boolean_t encrypt_write;
- gf_boolean_t decrypt_read;
-} rot_13_private_t;
-
-#endif /* __ROT_13_H__ */
diff --git a/xlators/features/Makefile.am b/xlators/features/Makefile.am
index be48f93fa52..c57897f11ea 100644
--- a/xlators/features/Makefile.am
+++ b/xlators/features/Makefile.am
@@ -1,3 +1,14 @@
-SUBDIRS = locks quota read-only mac-compat quiesce marker index # trash path-converter # filter
+if BUILD_CLOUDSYNC
+ CLOUDSYNC_DIR = cloudsync
+endif
+
+if BUILD_METADISP
+ METADISP_DIR = metadisp
+endif
+
+SUBDIRS = locks quota read-only quiesce marker index barrier arbiter upcall \
+ compress changelog gfid-access snapview-client snapview-server trash \
+ shard bit-rot leases selinux sdfs namespace $(CLOUDSYNC_DIR) thin-arbiter \
+ utime $(METADISP_DIR)
CLEANFILES =
diff --git a/xlators/features/arbiter/Makefile.am b/xlators/features/arbiter/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/arbiter/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/arbiter/src/Makefile.am b/xlators/features/arbiter/src/Makefile.am
new file mode 100644
index 00000000000..badc42f37be
--- /dev/null
+++ b/xlators/features/arbiter/src/Makefile.am
@@ -0,0 +1,19 @@
+if WITH_SERVER
+xlator_LTLIBRARIES = arbiter.la
+endif
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+arbiter_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+arbiter_la_SOURCES = arbiter.c
+arbiter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = arbiter.h arbiter-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/arbiter/src/arbiter-mem-types.h b/xlators/features/arbiter/src/arbiter-mem-types.h
new file mode 100644
index 00000000000..05d18374c46
--- /dev/null
+++ b/xlators/features/arbiter/src/arbiter-mem-types.h
@@ -0,0 +1,18 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __ARBITER_MEM_TYPES_H__
+#define __ARBITER_MEM_TYPES_H__
+#include <glusterfs/mem-types.h>
+
+typedef enum gf_arbiter_mem_types_ {
+ gf_arbiter_mt_inode_ctx_t = gf_common_mt_end + 1,
+ gf_arbiter_mt_end
+} gf_arbiter_mem_types_t;
+#endif
diff --git a/xlators/features/arbiter/src/arbiter.c b/xlators/features/arbiter/src/arbiter.c
new file mode 100644
index 00000000000..83a97e3354b
--- /dev/null
+++ b/xlators/features/arbiter/src/arbiter.c
@@ -0,0 +1,380 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "arbiter.h"
+#include "arbiter-mem-types.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+
+static arbiter_inode_ctx_t *
+__arbiter_inode_ctx_get(inode_t *inode, xlator_t *this)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+ uint64_t ctx_addr = 0;
+
+ ret = __inode_ctx_get(inode, this, &ctx_addr);
+ if (ret == 0) {
+ ctx = (arbiter_inode_ctx_t *)(long)ctx_addr;
+ goto out;
+ }
+
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_arbiter_mt_inode_ctx_t);
+ if (!ctx)
+ goto out;
+
+ ret = __inode_ctx_put(inode, this, (uint64_t)(uintptr_t)ctx);
+ if (ret) {
+ GF_FREE(ctx);
+ ctx = NULL;
+ gf_log_callingfn(this->name, GF_LOG_ERROR,
+ "failed to "
+ "set the inode ctx (%s)",
+ uuid_utoa(inode->gfid));
+ }
+out:
+ return ctx;
+}
+
+static arbiter_inode_ctx_t *
+arbiter_inode_ctx_get(inode_t *inode, xlator_t *this)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+
+ LOCK(&inode->lock);
+ {
+ ctx = __arbiter_inode_ctx_get(inode, this);
+ }
+ UNLOCK(&inode->lock);
+ return ctx;
+}
+
+int32_t
+arbiter_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+
+ if (op_ret != 0)
+ goto unwind;
+ ctx = arbiter_inode_ctx_get(inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ memcpy(&ctx->iattbuf, buf, sizeof(ctx->iattbuf));
+
+unwind:
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
+}
+
+int32_t
+arbiter_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ STACK_WIND(frame, arbiter_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
+
+int32_t
+arbiter_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get(loc->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = &ctx->iattbuf;
+unwind:
+ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, buf, buf, NULL);
+ return 0;
+}
+
+int32_t
+arbiter_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get(fd->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = &ctx->iattbuf;
+unwind:
+ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, buf, buf, NULL);
+ return 0;
+}
+
+dict_t *
+arbiter_fill_writev_xdata(fd_t *fd, dict_t *xdata, xlator_t *this)
+{
+ dict_t *rsp_xdata = NULL;
+ int32_t ret = 0;
+ int is_append = 1;
+
+ if (!fd || !fd->inode || gf_uuid_is_null(fd->inode->gfid)) {
+ goto out;
+ }
+
+ if (!xdata)
+ goto out;
+
+ rsp_xdata = dict_new();
+ if (!rsp_xdata)
+ goto out;
+
+ if (dict_get(xdata, GLUSTERFS_OPEN_FD_COUNT)) {
+ ret = dict_set_uint32(rsp_xdata, GLUSTERFS_OPEN_FD_COUNT,
+ fd->inode->fd_count);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0,
+ "Failed to set dict value"
+ " for GLUSTERFS_OPEN_FD_COUNT");
+ }
+ }
+ if (dict_get(xdata, GLUSTERFS_WRITE_IS_APPEND)) {
+ ret = dict_set_uint32(rsp_xdata, GLUSTERFS_WRITE_IS_APPEND, is_append);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0,
+ "Failed to set dict value"
+ " for GLUSTERFS_WRITE_IS_APPEND");
+ }
+ }
+out:
+ return rsp_xdata;
+}
+
+int32_t
+arbiter_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ dict_t *rsp_xdata = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get(fd->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = &ctx->iattbuf;
+ op_ret = iov_length(vector, count);
+ rsp_xdata = arbiter_fill_writev_xdata(fd, xdata, this);
+unwind:
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, buf, buf, rsp_xdata);
+ if (rsp_xdata)
+ dict_unref(rsp_xdata);
+ return 0;
+}
+
+int32_t
+arbiter_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get(fd->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = &ctx->iattbuf;
+unwind:
+ STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, buf, buf, NULL);
+ return 0;
+}
+
+int32_t
+arbiter_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get(fd->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = &ctx->iattbuf;
+unwind:
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, buf, buf, NULL);
+ return 0;
+}
+
+int32_t
+arbiter_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ struct iatt *buf = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+
+ ctx = arbiter_inode_ctx_get(fd->inode, this);
+ if (!ctx) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ buf = &ctx->iattbuf;
+unwind:
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, buf, buf, NULL);
+ return 0;
+}
+
+static int32_t
+arbiter_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(readv, frame, -1, ENOSYS, NULL, 0, NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+arbiter_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(seek, frame, -1, ENOSYS, 0, xdata);
+ return 0;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init(this, gf_arbiter_mt_end + 1);
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR,
+ "Memory accounting "
+ "initialization failed.");
+ return ret;
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ return 0;
+}
+
+int
+arbiter_forget(xlator_t *this, inode_t *inode)
+{
+ arbiter_inode_ctx_t *ctx = NULL;
+ uint64_t ctx_addr = 0;
+
+ inode_ctx_del(inode, this, &ctx_addr);
+ if (!ctx_addr)
+ return 0;
+ ctx = (arbiter_inode_ctx_t *)(long)ctx_addr;
+ GF_FREE(ctx);
+ return 0;
+}
+
+int32_t
+init(xlator_t *this)
+{
+ if (!this->children || this->children->next) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'arbiter' not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents)
+ gf_log(this->name, GF_LOG_ERROR, "dangling volume. check volfile ");
+
+ return 0;
+}
+
+void
+fini(xlator_t *this)
+{
+ return;
+}
+
+struct xlator_fops fops = {
+ .lookup = arbiter_lookup,
+
+ /* Return success for these inode write FOPS without winding it down to
+ * posix; this is needed for AFR write transaction logic to work.*/
+ .truncate = arbiter_truncate,
+ .writev = arbiter_writev,
+ .ftruncate = arbiter_ftruncate,
+ .fallocate = arbiter_fallocate,
+ .discard = arbiter_discard,
+ .zerofill = arbiter_zerofill,
+
+ /* AFR is not expected to wind these inode read FOPS initiated by the
+ * application to the arbiter brick. But in case a bug causes them
+ * to be called, we return ENOSYS. */
+ .readv = arbiter_readv,
+ .seek = arbiter_seek,
+
+ /* The following inode read FOPS initiated by the application are not
+ * wound by AFR either but internal logic like shd, glfsheal and
+ * client side healing in AFR will send them for selfheal/ inode refresh
+ * operations etc.,so we need to wind them down to posix:
+ *
+ * (f)stat, readdir(p), readlink, (f)getxattr.*/
+
+ /* All other FOPs not listed here are safe to be wound down to posix.*/
+};
+
+struct xlator_cbks cbks = {
+ .forget = arbiter_forget,
+};
+
+struct volume_options options[] = {
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "arbiter",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/arbiter/src/arbiter.h b/xlators/features/arbiter/src/arbiter.h
new file mode 100644
index 00000000000..546db7b751a
--- /dev/null
+++ b/xlators/features/arbiter/src/arbiter.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _ARBITER_H
+#define _ARBITER_H
+
+#include <glusterfs/locking.h>
+#include <glusterfs/common-utils.h>
+
+typedef struct arbiter_inode_ctx_ {
+ struct iatt iattbuf;
+} arbiter_inode_ctx_t;
+
+#endif /* _ARBITER_H */
diff --git a/xlators/features/barrier/Makefile.am b/xlators/features/barrier/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/barrier/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/barrier/src/Makefile.am b/xlators/features/barrier/src/Makefile.am
new file mode 100644
index 00000000000..25099bc56e5
--- /dev/null
+++ b/xlators/features/barrier/src/Makefile.am
@@ -0,0 +1,17 @@
+xlator_LTLIBRARIES = barrier.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+barrier_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+barrier_la_SOURCES = barrier.c
+
+barrier_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = barrier.h barrier-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/barrier/src/barrier-mem-types.h b/xlators/features/barrier/src/barrier-mem-types.h
new file mode 100644
index 00000000000..71ed7898d9c
--- /dev/null
+++ b/xlators/features/barrier/src/barrier-mem-types.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BARRIER_MEM_TYPES_H__
+#define __BARRIER_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_barrier_mem_types_ {
+ gf_barrier_mt_priv_t = gf_common_mt_end + 1,
+ gf_barrier_mt_end
+};
+#endif
diff --git a/xlators/features/barrier/src/barrier.c b/xlators/features/barrier/src/barrier.c
new file mode 100644
index 00000000000..852bbacb99d
--- /dev/null
+++ b/xlators/features/barrier/src/barrier.c
@@ -0,0 +1,809 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "barrier.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/call-stub.h>
+
+#include <glusterfs/statedump.h>
+
+void
+barrier_local_set_gfid(call_frame_t *frame, uuid_t gfid, xlator_t *this)
+{
+ if (gfid) {
+ uuid_t *id = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!id) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Could not set gfid"
+ ". gfid will not be dumped in statedump file.");
+ return;
+ }
+ gf_uuid_copy(*id, gfid);
+ frame->local = id;
+ }
+}
+
+void
+barrier_local_free_gfid(call_frame_t *frame)
+{
+ if (frame->local) {
+ GF_FREE(frame->local);
+ frame->local = NULL;
+ }
+}
+
+int32_t
+barrier_truncate_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ barrier_local_free_gfid(frame);
+ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int32_t
+barrier_ftruncate_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ barrier_local_free_gfid(frame);
+ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int32_t
+barrier_unlink_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ barrier_local_free_gfid(frame);
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int32_t
+barrier_rmdir_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ barrier_local_free_gfid(frame);
+ STACK_UNWIND_STRICT(rmdir, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int32_t
+barrier_rename_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ barrier_local_free_gfid(frame);
+ STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
+ return 0;
+}
+
+int32_t
+barrier_writev_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ barrier_local_free_gfid(frame);
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int32_t
+barrier_fsync_cbk_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ barrier_local_free_gfid(frame);
+ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+barrier_removexattr_cbk_resume(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ dict_t *xdata)
+{
+ barrier_local_free_gfid(frame);
+ STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+barrier_fremovexattr_cbk_resume(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ barrier_local_free_gfid(frame);
+ STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+barrier_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ BARRIER_FOP_CBK(writev, out, frame, this, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ BARRIER_FOP_CBK(fremovexattr, out, frame, this, op_ret, op_errno, xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ BARRIER_FOP_CBK(removexattr, out, frame, this, op_ret, op_errno, xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ BARRIER_FOP_CBK(truncate, out, frame, this, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ BARRIER_FOP_CBK(ftruncate, out, frame, this, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ BARRIER_FOP_CBK(rename, out, frame, this, op_ret, op_errno, buf,
+ preoldparent, postoldparent, prenewparent, postnewparent,
+ xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ BARRIER_FOP_CBK(rmdir, out, frame, this, op_ret, op_errno, preparent,
+ postparent, xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ BARRIER_FOP_CBK(unlink, out, frame, this, op_ret, op_errno, preparent,
+ postparent, xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ BARRIER_FOP_CBK(fsync, out, frame, this, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+out:
+ return 0;
+}
+
+int32_t
+barrier_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ if (!((flags | fd->flags) & (O_SYNC | O_DSYNC))) {
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, off,
+ flags, iobref, xdata);
+
+ return 0;
+ }
+
+ barrier_local_set_gfid(frame, fd->inode->gfid, this);
+ STACK_WIND(frame, barrier_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, off, flags,
+ iobref, xdata);
+ return 0;
+}
+
+int32_t
+barrier_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ barrier_local_set_gfid(frame, fd->inode->gfid, this);
+ STACK_WIND(frame, barrier_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ return 0;
+}
+
+int32_t
+barrier_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ barrier_local_set_gfid(frame, loc->inode->gfid, this);
+ STACK_WIND(frame, barrier_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
+}
+
+int32_t
+barrier_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ barrier_local_set_gfid(frame, loc->inode->gfid, this);
+ STACK_WIND(frame, barrier_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+}
+
+int32_t
+barrier_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ barrier_local_set_gfid(frame, oldloc->inode->gfid, this);
+ STACK_WIND(frame, barrier_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ return 0;
+}
+
+int
+barrier_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ barrier_local_set_gfid(frame, loc->inode->gfid, this);
+ STACK_WIND(frame, barrier_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
+ return 0;
+}
+
+int32_t
+barrier_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ barrier_local_set_gfid(frame, loc->inode->gfid, this);
+ STACK_WIND(frame, barrier_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int32_t
+barrier_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ barrier_local_set_gfid(frame, fd->inode->gfid, this);
+ STACK_WIND(frame, barrier_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+}
+
+int32_t
+barrier_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ barrier_local_set_gfid(frame, fd->inode->gfid, this);
+ STACK_WIND(frame, barrier_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+}
+
+call_stub_t *
+__barrier_dequeue(xlator_t *this, struct list_head *queue)
+{
+ call_stub_t *stub = NULL;
+ barrier_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ if (list_empty(queue))
+ goto out;
+
+ stub = list_entry(queue->next, call_stub_t, list);
+ list_del_init(&stub->list);
+
+out:
+ return stub;
+}
+
+void
+barrier_dequeue_all(xlator_t *this, struct list_head *queue)
+{
+ call_stub_t *stub = NULL;
+
+ gf_log(this->name, GF_LOG_INFO, "Dequeuing all the barriered fops");
+
+ /* TODO: Start the below task in a new thread */
+ while ((stub = __barrier_dequeue(this, queue)))
+ call_resume(stub);
+
+ gf_log(this->name, GF_LOG_INFO,
+ "Dequeuing the barriered fops is "
+ "finished");
+ return;
+}
+
+void
+barrier_timeout(void *data)
+{
+ xlator_t *this = NULL;
+ barrier_priv_t *priv = NULL;
+ struct list_head queue = {
+ 0,
+ };
+
+ this = data;
+ THIS = this;
+ priv = this->private;
+
+ INIT_LIST_HEAD(&queue);
+
+ gf_log(this->name, GF_LOG_CRITICAL,
+ "Disabling barrier because of "
+ "the barrier timeout.");
+
+ LOCK(&priv->lock);
+ {
+ __barrier_disable(this, &queue);
+ }
+ UNLOCK(&priv->lock);
+
+ barrier_dequeue_all(this, &queue);
+
+ return;
+}
+
+void
+__barrier_enqueue(xlator_t *this, call_stub_t *stub)
+{
+ barrier_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ list_add_tail(&stub->list, &priv->queue);
+ priv->queue_size++;
+
+ return;
+}
+
+void
+__barrier_disable(xlator_t *this, struct list_head *queue)
+{
+ GF_UNUSED int ret = 0;
+ barrier_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ if (priv->timer) {
+ ret = gf_timer_call_cancel(this->ctx, priv->timer);
+ priv->timer = NULL;
+ }
+
+ list_splice_init(&priv->queue, queue);
+ priv->queue_size = 0;
+ priv->barrier_enabled = _gf_false;
+}
+
+int
+__barrier_enable(xlator_t *this, barrier_priv_t *priv)
+{
+ int ret = -1;
+
+ priv->timer = gf_timer_call_after(this->ctx, priv->timeout, barrier_timeout,
+ (void *)this);
+ if (!priv->timer) {
+ gf_log(this->name, GF_LOG_CRITICAL,
+ "Couldn't add barrier "
+ "timeout event.");
+ goto out;
+ }
+
+ priv->barrier_enabled = _gf_true;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+notify(xlator_t *this, int event, void *data, ...)
+{
+ barrier_priv_t *priv = this->private;
+ dict_t *dict = NULL;
+ int ret = -1;
+ int barrier_enabled = _gf_false;
+ struct list_head queue = {
+ 0,
+ };
+
+ GF_ASSERT(priv);
+ INIT_LIST_HEAD(&queue);
+
+ switch (event) {
+ case GF_EVENT_TRANSLATOR_OP: {
+ dict = data;
+ barrier_enabled = dict_get_str_boolean(dict, "barrier", -1);
+
+ if (barrier_enabled == -1) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Could not fetch "
+ " barrier key from the dictionary.");
+ goto out;
+ }
+
+ LOCK(&priv->lock);
+ {
+ if (!priv->barrier_enabled) {
+ if (barrier_enabled) {
+ ret = __barrier_enable(this, priv);
+ } else {
+ UNLOCK(&priv->lock);
+ gf_log(this->name, GF_LOG_ERROR, "Already disabled.");
+ goto post_unlock;
+ }
+ } else {
+ if (!barrier_enabled) {
+ __barrier_disable(this, &queue);
+ ret = 0;
+ } else {
+ UNLOCK(&priv->lock);
+ gf_log(this->name, GF_LOG_ERROR, "Already enabled");
+ goto post_unlock;
+ }
+ }
+ }
+ UNLOCK(&priv->lock);
+ post_unlock:
+ if (!list_empty(&queue))
+ barrier_dequeue_all(this, &queue);
+
+ break;
+ }
+ default: {
+ default_notify(this, event, data);
+ ret = 0;
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ barrier_priv_t *priv = NULL;
+ int ret = -1;
+ gf_boolean_t barrier_enabled = _gf_false;
+ uint32_t timeout = {
+ 0,
+ };
+ struct list_head queue = {
+ 0,
+ };
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GF_OPTION_RECONF("barrier", barrier_enabled, options, bool, out);
+ GF_OPTION_RECONF("barrier-timeout", timeout, options, time, out);
+
+ INIT_LIST_HEAD(&queue);
+
+ LOCK(&priv->lock);
+ {
+ if (!priv->barrier_enabled) {
+ if (barrier_enabled) {
+ ret = __barrier_enable(this, priv);
+ if (ret) {
+ goto unlock;
+ }
+ }
+ } else {
+ if (!barrier_enabled) {
+ __barrier_disable(this, &queue);
+ }
+ }
+ priv->timeout.tv_sec = timeout;
+ ret = 0;
+ }
+unlock:
+ UNLOCK(&priv->lock);
+
+ if (!list_empty(&queue))
+ barrier_dequeue_all(this, &queue);
+
+out:
+ return ret;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init(this, gf_barrier_mt_end + 1);
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR,
+ "Memory accounting "
+ "initialization failed.");
+
+ return ret;
+}
+
+int
+init(xlator_t *this)
+{
+ int ret = -1;
+ barrier_priv_t *priv = NULL;
+ uint32_t timeout = {
+ 0,
+ };
+
+ if (!this->children || this->children->next) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'barrier' not configured with exactly one child");
+ goto out;
+ }
+
+ if (!this->parents)
+ gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_barrier_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ LOCK_INIT(&priv->lock);
+
+ GF_OPTION_INIT("barrier", priv->barrier_enabled, bool, out);
+ GF_OPTION_INIT("barrier-timeout", timeout, time, out);
+ priv->timeout.tv_sec = timeout;
+
+ INIT_LIST_HEAD(&priv->queue);
+
+ if (priv->barrier_enabled) {
+ ret = __barrier_enable(this, priv);
+ if (ret == -1)
+ goto out;
+ }
+
+ this->private = priv;
+ ret = 0;
+out:
+ if (ret && priv)
+ GF_FREE(priv);
+
+ return ret;
+}
+
+void
+fini(xlator_t *this)
+{
+ barrier_priv_t *priv = NULL;
+ struct list_head queue = {
+ 0,
+ };
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ INIT_LIST_HEAD(&queue);
+
+ gf_log(this->name, GF_LOG_INFO,
+ "Disabling barriering and dequeuing "
+ "all the queued fops");
+ LOCK(&priv->lock);
+ {
+ __barrier_disable(this, &queue);
+ }
+ UNLOCK(&priv->lock);
+
+ if (!list_empty(&queue))
+ barrier_dequeue_all(this, &queue);
+
+ this->private = NULL;
+
+ LOCK_DESTROY(&priv->lock);
+ GF_FREE(priv);
+out:
+ return;
+}
+
+static void
+barrier_dump_stub(call_stub_t *stub, char *prefix)
+{
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+
+ gf_proc_dump_build_key(key, prefix, "fop");
+ gf_proc_dump_write(key, "%s", gf_fop_list[stub->fop]);
+
+ if (stub->frame->local) {
+ gf_proc_dump_build_key(key, prefix, "gfid");
+ gf_proc_dump_write(key, "%s",
+ uuid_utoa(*(uuid_t *)(stub->frame->local)));
+ }
+ if (stub->args.loc.path) {
+ gf_proc_dump_build_key(key, prefix, "path");
+ gf_proc_dump_write(key, "%s", stub->args.loc.path);
+ }
+ if (stub->args.loc.name) {
+ gf_proc_dump_build_key(key, prefix, "name");
+ gf_proc_dump_write(key, "%s", stub->args.loc.name);
+ }
+
+ return;
+}
+
+static void
+__barrier_dump_queue(barrier_priv_t *priv)
+{
+ call_stub_t *stub = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO("barrier", priv, out);
+
+ list_for_each_entry(stub, &priv->queue, list)
+ {
+ snprintf(key, sizeof(key), "stub.%d", i++);
+ gf_proc_dump_add_section("%s", key);
+ barrier_dump_stub(stub, key);
+ }
+
+out:
+ return;
+}
+
+int
+barrier_dump_priv(xlator_t *this)
+{
+ int ret = -1;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ barrier_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("barrier", this, out);
+
+ priv = this->private;
+ if (!priv)
+ return 0;
+
+ gf_proc_dump_build_key(key, "xlator.features.barrier", "priv");
+ gf_proc_dump_add_section("%s", key);
+ gf_proc_dump_build_key(key, "barrier", "enabled");
+
+ LOCK(&priv->lock);
+ {
+ gf_proc_dump_write(key, "%d", priv->barrier_enabled);
+ gf_proc_dump_build_key(key, "barrier", "timeout");
+ gf_proc_dump_write(key, "%ld", priv->timeout.tv_sec);
+ if (priv->barrier_enabled) {
+ gf_proc_dump_build_key(key, "barrier", "queue_size");
+ gf_proc_dump_write(key, "%d", priv->queue_size);
+ __barrier_dump_queue(priv);
+ }
+ }
+ UNLOCK(&priv->lock);
+
+out:
+ return ret;
+}
+
+struct xlator_fops fops = {
+
+ /* Barrier Class fops */
+ .rmdir = barrier_rmdir,
+ .unlink = barrier_unlink,
+ .rename = barrier_rename,
+ .removexattr = barrier_removexattr,
+ .fremovexattr = barrier_fremovexattr,
+ .truncate = barrier_truncate,
+ .ftruncate = barrier_ftruncate,
+ .fsync = barrier_fsync,
+
+ /* Writes with only O_SYNC flag */
+ .writev = barrier_writev,
+};
+
+struct xlator_dumpops dumpops = {
+ .priv = barrier_dump_priv,
+};
+
+struct xlator_cbks cbks;
+
+struct volume_options options[] = {
+ {.key = {"barrier"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "disable",
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "When \"enabled\", blocks acknowledgements to application "
+ "for file operations such as rmdir, rename, unlink, "
+ "removexattr, fremovexattr, truncate, ftruncate, "
+ "write (with O_SYNC), fsync. It is turned \"off\" by "
+ "default."},
+ {.key = {"barrier-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .default_value = BARRIER_TIMEOUT,
+ .op_version = {GD_OP_VERSION_3_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "After 'timeout' seconds since the time 'barrier' "
+ "option was set to \"on\", acknowledgements to file "
+ "operations are no longer blocked and previously "
+ "blocked acknowledgements are sent to the application"},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "barrier",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/barrier/src/barrier.h b/xlators/features/barrier/src/barrier.h
new file mode 100644
index 00000000000..1337f311f7d
--- /dev/null
+++ b/xlators/features/barrier/src/barrier.h
@@ -0,0 +1,89 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BARRIER_H__
+#define __BARRIER_H__
+
+#include "barrier-mem-types.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/timer.h>
+#include <glusterfs/call-stub.h>
+
+#define BARRIER_FOP_CBK(fop_name, label, frame, this, params...) \
+ do { \
+ barrier_priv_t *_priv = NULL; \
+ call_stub_t *_stub = NULL; \
+ gf_boolean_t _barrier_enabled = _gf_false; \
+ struct list_head queue = { \
+ 0, \
+ }; \
+ \
+ INIT_LIST_HEAD(&queue); \
+ \
+ _priv = this->private; \
+ GF_ASSERT(_priv); \
+ \
+ LOCK(&_priv->lock); \
+ { \
+ if (_priv->barrier_enabled) { \
+ _barrier_enabled = _priv->barrier_enabled; \
+ \
+ _stub = fop_##fop_name##_cbk_stub( \
+ frame, barrier_##fop_name##_cbk_resume, params); \
+ if (!_stub) { \
+ __barrier_disable(this, &queue); \
+ goto unlock; \
+ } \
+ \
+ __barrier_enqueue(this, _stub); \
+ } \
+ } \
+ unlock: \
+ UNLOCK(&_priv->lock); \
+ \
+ if (_stub) \
+ goto label; \
+ \
+ if (_barrier_enabled && !_stub) { \
+ gf_log(this->name, GF_LOG_CRITICAL, \
+ "Failed to barrier FOPs, disabling " \
+ "barrier. FOP: %s, ERROR: %s", \
+ #fop_name, strerror(ENOMEM)); \
+ barrier_dequeue_all(this, &queue); \
+ } \
+ barrier_local_free_gfid(frame); \
+ STACK_UNWIND_STRICT(fop_name, frame, params); \
+ goto label; \
+ } while (0)
+
+typedef struct {
+ gf_timer_t *timer;
+ gf_lock_t lock;
+ struct list_head queue;
+ struct timespec timeout;
+ uint32_t queue_size;
+ gf_boolean_t barrier_enabled;
+ char _pad[3]; /* manual padding */
+} barrier_priv_t;
+
+int
+__barrier_enable(xlator_t *this, barrier_priv_t *priv);
+void
+__barrier_enqueue(xlator_t *this, call_stub_t *stub);
+void
+__barrier_disable(xlator_t *this, struct list_head *queue);
+void
+barrier_timeout(void *data);
+void
+barrier_dequeue_all(xlator_t *this, struct list_head *queue);
+call_stub_t *
+__barrier_dequeue(xlator_t *this, struct list_head *queue);
+
+#endif
diff --git a/rpc/rpc-transport/rdma/Makefile.am b/xlators/features/bit-rot/Makefile.am
index f963effea22..f963effea22 100644
--- a/rpc/rpc-transport/rdma/Makefile.am
+++ b/xlators/features/bit-rot/Makefile.am
diff --git a/xlators/features/bit-rot/src/Makefile.am b/xlators/features/bit-rot/src/Makefile.am
new file mode 100644
index 00000000000..b5e4a7d62a0
--- /dev/null
+++ b/xlators/features/bit-rot/src/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = stub bitd
diff --git a/xlators/features/bit-rot/src/bitd/Makefile.am b/xlators/features/bit-rot/src/bitd/Makefile.am
new file mode 100644
index 00000000000..6db800e6565
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/Makefile.am
@@ -0,0 +1,23 @@
+if WITH_SERVER
+xlator_LTLIBRARIES = bit-rot.la
+endif
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+bit_rot_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src/ -I$(top_builddir)/rpc/xdr/src/ \
+ -I$(top_srcdir)/rpc/rpc-lib/src -I$(CONTRIBDIR)/timer-wheel \
+ -I$(top_srcdir)/xlators/features/bit-rot/src/stub
+
+bit_rot_la_SOURCES = bit-rot.c bit-rot-scrub.c bit-rot-ssm.c \
+ bit-rot-scrub-status.c
+bit_rot_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/xlators/features/changelog/lib/src/libgfchangelog.la
+
+noinst_HEADERS = bit-rot.h bit-rot-scrub.h bit-rot-bitd-messages.h bit-rot-ssm.h \
+ bit-rot-scrub-status.h
+
+AM_CFLAGS = -Wall -DBR_RATE_LIMIT_SIGNER $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h
new file mode 100644
index 00000000000..5bc5103a27c
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-bitd-messages.h
@@ -0,0 +1,101 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _BITROT_BITD_MESSAGES_H_
+#define _BITROT_BITD_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(BITROT_BITD, BRB_MSG_FD_CREATE_FAILED, BRB_MSG_READV_FAILED,
+ BRB_MSG_BLOCK_READ_FAILED, BRB_MSG_CALC_CHECKSUM_FAILED,
+ BRB_MSG_NO_MEMORY, BRB_MSG_GET_SIGN_FAILED, BRB_MSG_SET_SIGN_FAILED,
+ BRB_MSG_OP_FAILED, BRB_MSG_READ_AND_SIGN_FAILED, BRB_MSG_SIGN_FAILED,
+ BRB_MSG_GET_SUBVOL_FAILED, BRB_MSG_SET_TIMER_FAILED,
+ BRB_MSG_GET_INFO_FAILED, BRB_MSG_PATH_FAILED, BRB_MSG_MARK_BAD_FILE,
+ BRB_MSG_TRIGGER_SIGN, BRB_MSG_REGISTER_FAILED,
+ BRB_MSG_CRAWLING_START, BRB_MSG_SPAWN_FAILED,
+ BRB_MSG_INVALID_SUBVOL_CHILD, BRB_MSG_SKIP_OBJECT, BRB_MSG_NO_CHILD,
+ BRB_MSG_CHECKSUM_MISMATCH, BRB_MSG_MARK_CORRUPTED,
+ BRB_MSG_CRAWLING_FINISH, BRB_MSG_CALC_ERROR, BRB_MSG_LOOKUP_FAILED,
+ BRB_MSG_PARTIAL_VERSION_PRESENCE, BRB_MSG_MEM_ACNT_FAILED,
+ BRB_MSG_TIMER_WHEEL_UNAVAILABLE, BRB_MSG_BITROT_LOADED,
+ BRB_MSG_SCALE_DOWN_FAILED, BRB_MSG_SCALE_UP_FAILED,
+ BRB_MSG_SCALE_DOWN_SCRUBBER, BRB_MSG_SCALING_UP_SCRUBBER,
+ BRB_MSG_UNKNOWN_THROTTLE, BRB_MSG_RATE_LIMIT_INFO,
+ BRB_MSG_SCRUB_INFO, BRB_MSG_CONNECTED_TO_BRICK, BRB_MSG_BRICK_INFO,
+ BRB_MSG_SUBVOL_CONNECT_FAILED, BRB_MSG_INVALID_SUBVOL,
+ BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, BRB_MSG_SCRUB_START,
+ BRB_MSG_SCRUB_FINISH, BRB_MSG_SCRUB_RUNNING,
+ BRB_MSG_SCRUB_RESCHEDULED, BRB_MSG_SCRUB_TUNABLE,
+ BRB_MSG_SCRUB_THREAD_CLEANUP, BRB_MSG_SCRUBBER_CLEANED,
+ BRB_MSG_GENERIC_SSM_INFO, BRB_MSG_ZERO_TIMEOUT_BUG,
+ BRB_MSG_BAD_OBJ_READDIR_FAIL, BRB_MSG_SSM_FAILED,
+ BRB_MSG_SCRUB_WAIT_FAILED, BRB_MSG_TRIGGER_SIGN_FAILED,
+ BRB_MSG_EVENT_UNHANDLED, BRB_MSG_COULD_NOT_SCHEDULE_SCRUB,
+ BRB_MSG_THREAD_CREATION_FAILED, BRB_MSG_MEM_POOL_ALLOC,
+ BRB_MSG_SAVING_HASH_FAILED);
+
+#define BRB_MSG_FD_CREATE_FAILED_STR "failed to create fd for the inode"
+#define BRB_MSG_READV_FAILED_STR "readv failed"
+#define BRB_MSG_BLOCK_READ_FAILED_STR "reading block failed"
+#define BRB_MSG_NO_MEMORY_STR "failed to allocate memory"
+#define BRB_MSG_CALC_CHECKSUM_FAILED_STR "calculating checksum failed"
+#define BRB_MSG_GET_SIGN_FAILED_STR "failed to get the signature"
+#define BRB_MSG_SET_SIGN_FAILED_STR "signing failed"
+#define BRB_MSG_OP_FAILED_STR "failed on object"
+#define BRB_MSG_TRIGGER_SIGN_FAILED_STR "Could not trigger signing"
+#define BRB_MSG_READ_AND_SIGN_FAILED_STR "reading and signing of object failed"
+#define BRB_MSG_SET_TIMER_FAILED_STR "Failed to allocate object expiry timer"
+#define BRB_MSG_GET_SUBVOL_FAILED_STR \
+ "failed to get the subvolume for the brick"
+#define BRB_MSG_PATH_FAILED_STR "path failed"
+#define BRB_MSG_SKIP_OBJECT_STR "Entry is marked corrupted. skipping"
+#define BRB_MSG_PARTIAL_VERSION_PRESENCE_STR \
+ "PArtial version xattr presence detected, ignoring"
+#define BRB_MSG_TRIGGER_SIGN_STR "Triggering signing"
+#define BRB_MSG_CRAWLING_START_STR \
+ "Crawling brick, scanning for unsigned objects"
+#define BRB_MSG_CRAWLING_FINISH_STR "Completed crawling brick"
+#define BRB_MSG_REGISTER_FAILED_STR "Register to changelog failed"
+#define BRB_MSG_SPAWN_FAILED_STR "failed to spawn"
+#define BRB_MSG_CONNECTED_TO_BRICK_STR "Connected to brick"
+#define BRB_MSG_LOOKUP_FAILED_STR "lookup on root failed"
+#define BRB_MSG_GET_INFO_FAILED_STR "failed to get stub info"
+#define BRB_MSG_SCRUB_THREAD_CLEANUP_STR "Error cleaning up scanner thread"
+#define BRB_MSG_SCRUBBER_CLEANED_STR "clened up scrubber for brick"
+#define BRB_MSG_SUBVOL_CONNECT_FAILED_STR \
+ "callback handler for subvolume failed"
+#define BRB_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed"
+#define BRB_MSG_EVENT_UNHANDLED_STR "Event unhandled for child"
+#define BRB_MSG_INVALID_SUBVOL_STR "Got event from invalid subvolume"
+#define BRB_MSG_RESCHEDULE_SCRUBBER_FAILED_STR \
+ "on demand scrub schedule failed. Scrubber is not in pending state."
+#define BRB_MSG_COULD_NOT_SCHEDULE_SCRUB_STR \
+ "Could not schedule ondemand scrubbing. Scrubbing will continue " \
+ "according to old frequency."
+#define BRB_MSG_THREAD_CREATION_FAILED_STR "thread creation failed"
+#define BRB_MSG_RATE_LIMIT_INFO_STR "Rate Limit Info"
+#define BRB_MSG_MEM_POOL_ALLOC_STR "failed to allocate mem-pool for timer"
+#define BRB_MSG_NO_CHILD_STR "FATAL: no children"
+#define BRB_MSG_TIMER_WHEEL_UNAVAILABLE_STR "global timer wheel unavailable"
+#define BRB_MSG_BITROT_LOADED_STR "bit-rot xlator loaded"
+#define BRB_MSG_SAVING_HASH_FAILED_STR \
+ "failed to allocate memory for saving hash of the object"
+#endif /* !_BITROT_BITD_MESSAGES_H_ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
new file mode 100644
index 00000000000..5cef2ffa5e5
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.c
@@ -0,0 +1,78 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <string.h>
+#include <stdio.h>
+
+#include "bit-rot-scrub-status.h"
+
+void
+br_inc_unsigned_file_count(br_scrub_stats_t *scrub_stat)
+{
+ if (!scrub_stat)
+ return;
+
+ pthread_mutex_lock(&scrub_stat->lock);
+ {
+ scrub_stat->unsigned_files++;
+ }
+ pthread_mutex_unlock(&scrub_stat->lock);
+}
+
+void
+br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat)
+{
+ if (!scrub_stat)
+ return;
+
+ pthread_mutex_lock(&scrub_stat->lock);
+ {
+ scrub_stat->scrubbed_files++;
+ }
+ pthread_mutex_unlock(&scrub_stat->lock);
+}
+
+void
+br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time)
+{
+ if (!scrub_stat)
+ return;
+
+ pthread_mutex_lock(&scrub_stat->lock);
+ {
+ scrub_stat->scrub_start_time = time;
+ }
+ pthread_mutex_unlock(&scrub_stat->lock);
+}
+
+void
+br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
+ time_t time)
+{
+ int lst_size = 0;
+
+ if (!scrub_stat)
+ return;
+
+ lst_size = sizeof(scrub_stat->last_scrub_time);
+ if (strlen(timestr) >= lst_size)
+ return;
+
+ pthread_mutex_lock(&scrub_stat->lock);
+ {
+ scrub_stat->scrub_end_time = time;
+
+ scrub_stat->scrub_duration = scrub_stat->scrub_end_time -
+ scrub_stat->scrub_start_time;
+
+ snprintf(scrub_stat->last_scrub_time, lst_size, "%s", timestr);
+ }
+ pthread_mutex_unlock(&scrub_stat->lock);
+}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
new file mode 100644
index 00000000000..f022aa831eb
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub-status.h
@@ -0,0 +1,50 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_SCRUB_STATUS_H__
+#define __BIT_ROT_SCRUB_STATUS_H__
+
+#include <stdint.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+#include <glusterfs/common-utils.h>
+
+struct br_scrub_stats {
+ uint64_t scrubbed_files; /* Total number of scrubbed files. */
+
+ uint64_t unsigned_files; /* Total number of unsigned files. */
+
+ uint64_t scrub_duration; /* Duration of last scrub. */
+
+ char last_scrub_time[GF_TIMESTR_SIZE]; /* Last scrub completion time. */
+
+ time_t scrub_start_time; /* Scrubbing starting time. */
+
+ time_t scrub_end_time; /* Scrubbing finishing time. */
+
+ int8_t scrub_running; /* Whether scrub running or not. */
+
+ pthread_mutex_t lock;
+};
+
+typedef struct br_scrub_stats br_scrub_stats_t;
+
+void
+br_inc_unsigned_file_count(br_scrub_stats_t *scrub_stat);
+void
+br_inc_scrubbed_file(br_scrub_stats_t *scrub_stat);
+void
+br_update_scrub_start_time(br_scrub_stats_t *scrub_stat, time_t time);
+void
+br_update_scrub_finish_time(br_scrub_stats_t *scrub_stat, char *timestr,
+ time_t time);
+
+#endif /* __BIT_ROT_SCRUB_STATUS_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
new file mode 100644
index 00000000000..289dd53f610
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.c
@@ -0,0 +1,2070 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <math.h>
+#include <ctype.h>
+#include <sys/uio.h>
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
+
+#include "bit-rot-scrub.h"
+#include <pthread.h>
+#include "bit-rot-bitd-messages.h"
+#include "bit-rot-scrub-status.h"
+#include <glusterfs/events.h>
+
+struct br_scrubbers {
+ pthread_t scrubthread;
+
+ struct list_head list;
+};
+
+struct br_fsscan_entry {
+ void *data;
+
+ loc_t parent;
+
+ gf_dirent_t *entry;
+
+ struct br_scanfs *fsscan; /* backpointer to subvolume scanner */
+
+ struct list_head list;
+};
+
+/**
+ * fetch signature extended attribute from an object's fd.
+ * NOTE: On success @xattr is not unref'd as @sign points
+ * to the dictionary value.
+ */
+static int32_t
+bitd_fetch_signature(xlator_t *this, br_child_t *child, fd_t *fd,
+ dict_t **xattr, br_isignature_out_t **sign)
+{
+ int32_t ret = -1;
+
+ ret = syncop_fgetxattr(child->xl, fd, xattr, GLUSTERFS_GET_OBJECT_SIGNATURE,
+ NULL, NULL);
+ if (ret < 0) {
+ br_log_object(this, "fgetxattr", fd->inode->gfid, -ret);
+ goto out;
+ }
+
+ ret = dict_get_ptr(*xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void **)sign);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED,
+ "failed to extract signature info [GFID: %s]",
+ uuid_utoa(fd->inode->gfid));
+ goto unref_dict;
+ }
+
+ return 0;
+
+unref_dict:
+ dict_unref(*xattr);
+out:
+ return -1;
+}
+
+/**
+ * POST COMPUTE CHECK
+ *
+ * Checks to be performed before verifying calculated signature
+ * Object is skipped if:
+ * - has stale signature
+ * - mismatches versions caches in pre-compute check
+ */
+
+int32_t
+bitd_scrub_post_compute_check(xlator_t *this, br_child_t *child, fd_t *fd,
+ unsigned long version,
+ br_isignature_out_t **signature,
+ br_scrub_stats_t *scrub_stat,
+ gf_boolean_t skip_stat)
+{
+ int32_t ret = 0;
+ size_t signlen = 0;
+ dict_t *xattr = NULL;
+ br_isignature_out_t *signptr = NULL;
+
+ ret = bitd_fetch_signature(this, child, fd, &xattr, &signptr);
+ if (ret < 0) {
+ if (!skip_stat)
+ br_inc_unsigned_file_count(scrub_stat);
+ goto out;
+ }
+
+ /**
+ * Either the object got dirtied during the time the signature was
+ * calculated OR the version we saved during pre-compute check does
+ * not match now, implying that the object got dirtied and signed in
+ * between scrubs pre & post compute checks (checksum window).
+ *
+ * The log entry looks pretty ugly, but helps in debugging..
+ */
+ if (signptr->stale || (signptr->version != version)) {
+ if (!skip_stat)
+ br_inc_unsigned_file_count(scrub_stat);
+ gf_msg_debug(this->name, 0,
+ "<STAGE: POST> Object [GFID: %s] "
+ "either has a stale signature OR underwent "
+ "signing during checksumming {Stale: %d | "
+ "Version: %lu,%lu}",
+ uuid_utoa(fd->inode->gfid), (signptr->stale) ? 1 : 0,
+ version, signptr->version);
+ ret = -1;
+ goto unref_dict;
+ }
+
+ signlen = signptr->signaturelen;
+ *signature = GF_MALLOC(sizeof(br_isignature_out_t) + signlen,
+ gf_common_mt_char);
+
+ (void)memcpy(*signature, signptr, sizeof(br_isignature_out_t) + signlen);
+
+ (*signature)->signaturelen = signlen;
+
+unref_dict:
+ dict_unref(xattr);
+out:
+ return ret;
+}
+
+static int32_t
+bitd_signature_staleness(xlator_t *this, br_child_t *child, fd_t *fd,
+ int *stale, unsigned long *version,
+ br_scrub_stats_t *scrub_stat, gf_boolean_t skip_stat)
+{
+ int32_t ret = -1;
+ dict_t *xattr = NULL;
+ br_isignature_out_t *signptr = NULL;
+
+ ret = bitd_fetch_signature(this, child, fd, &xattr, &signptr);
+ if (ret < 0) {
+ if (!skip_stat)
+ br_inc_unsigned_file_count(scrub_stat);
+ goto out;
+ }
+
+ /**
+ * save version for validation in post compute stage
+ * c.f. bitd_scrub_post_compute_check()
+ */
+ *stale = signptr->stale ? 1 : 0;
+ *version = signptr->version;
+
+ dict_unref(xattr);
+
+out:
+ return ret;
+}
+
+/**
+ * PRE COMPUTE CHECK
+ *
+ * Checks to be performed before initiating object signature calculation.
+ * An object is skipped if:
+ * - it's already marked corrupted
+ * - has stale signature
+ */
+int32_t
+bitd_scrub_pre_compute_check(xlator_t *this, br_child_t *child, fd_t *fd,
+ unsigned long *version,
+ br_scrub_stats_t *scrub_stat,
+ gf_boolean_t skip_stat)
+{
+ int stale = 0;
+ int32_t ret = -1;
+
+ if (bitd_is_bad_file(this, child, NULL, fd)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT,
+ "Object [GFID: %s] is marked corrupted, skipping..",
+ uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+ ret = bitd_signature_staleness(this, child, fd, &stale, version, scrub_stat,
+ skip_stat);
+ if (!ret && stale) {
+ if (!skip_stat)
+ br_inc_unsigned_file_count(scrub_stat);
+ gf_msg_debug(this->name, 0,
+ "<STAGE: PRE> Object [GFID: %s] "
+ "has stale signature",
+ uuid_utoa(fd->inode->gfid));
+ ret = -1;
+ }
+
+out:
+ return ret;
+}
+
+/* static int */
+int
+bitd_compare_ckum(xlator_t *this, br_isignature_out_t *sign, unsigned char *md,
+ inode_t *linked_inode, gf_dirent_t *entry, fd_t *fd,
+ br_child_t *child, loc_t *loc)
+{
+ int ret = -1;
+ dict_t *xattr = NULL;
+
+ GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, sign, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, child, out);
+ GF_VALIDATE_OR_GOTO(this->name, linked_inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, md, out);
+ GF_VALIDATE_OR_GOTO(this->name, entry, out);
+
+ if (strncmp(sign->signature, (char *)md, sign->signaturelen) == 0) {
+ gf_msg_debug(this->name, 0,
+ "%s [GFID: %s | Brick: %s] "
+ "matches calculated checksum",
+ loc->path, uuid_utoa(linked_inode->gfid),
+ child->brick_path);
+ return 0;
+ }
+
+ gf_msg(this->name, GF_LOG_DEBUG, 0, BRB_MSG_CHECKSUM_MISMATCH,
+ "Object checksum mismatch: %s [GFID: %s | Brick: %s]", loc->path,
+ uuid_utoa(linked_inode->gfid), child->brick_path);
+ gf_msg(this->name, GF_LOG_ALERT, 0, BRB_MSG_CHECKSUM_MISMATCH,
+ "CORRUPTION DETECTED: Object %s {Brick: %s | GFID: %s}", loc->path,
+ child->brick_path, uuid_utoa(linked_inode->gfid));
+
+ /* Perform bad-file marking */
+ xattr = dict_new();
+ if (!xattr) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32(xattr, BITROT_OBJECT_BAD_KEY, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_MARK_BAD_FILE,
+ "Error setting bad-file marker for %s [GFID: %s | "
+ "Brick: %s]",
+ loc->path, uuid_utoa(linked_inode->gfid), child->brick_path);
+ goto dictfree;
+ }
+
+ gf_msg(this->name, GF_LOG_ALERT, 0, BRB_MSG_MARK_CORRUPTED,
+ "Marking"
+ " %s [GFID: %s | Brick: %s] as corrupted..",
+ loc->path, uuid_utoa(linked_inode->gfid), child->brick_path);
+ gf_event(EVENT_BITROT_BAD_FILE, "gfid=%s;path=%s;brick=%s",
+ uuid_utoa(linked_inode->gfid), loc->path, child->brick_path);
+ ret = syncop_fsetxattr(child->xl, fd, xattr, 0, NULL, NULL);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_MARK_BAD_FILE,
+ "Error marking object %s [GFID: %s] as corrupted", loc->path,
+ uuid_utoa(linked_inode->gfid));
+
+dictfree:
+ dict_unref(xattr);
+out:
+ return ret;
+}
+
+/**
+ * "The Scrubber"
+ *
+ * Perform signature validation for a given object with the assumption
+ * that the signature is SHA256 (because signer as of now _always_
+ * signs with SHA256).
+ */
+int
+br_scrubber_scrub_begin(xlator_t *this, struct br_fsscan_entry *fsentry)
+{
+ int32_t ret = -1;
+ fd_t *fd = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ struct iatt parent_buf = {
+ 0,
+ };
+ pid_t pid = 0;
+ br_child_t *child = NULL;
+ unsigned char *md = NULL;
+ inode_t *linked_inode = NULL;
+ br_isignature_out_t *sign = NULL;
+ unsigned long signedversion = 0;
+ gf_dirent_t *entry = NULL;
+ br_private_t *priv = NULL;
+ loc_t *parent = NULL;
+ gf_boolean_t skip_stat = _gf_false;
+ uuid_t shard_root_gfid = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("bit-rot", fsentry, out);
+
+ entry = fsentry->entry;
+ parent = &fsentry->parent;
+ child = fsentry->data;
+
+ priv = this->private;
+
+ GF_VALIDATE_OR_GOTO("bit-rot", entry, out);
+ GF_VALIDATE_OR_GOTO("bit-rot", parent, out);
+ GF_VALIDATE_OR_GOTO("bit-rot", child, out);
+ GF_VALIDATE_OR_GOTO("bit-rot", priv, out);
+
+ pid = GF_CLIENT_PID_SCRUB;
+
+ ret = br_prepare_loc(this, child, parent, entry, &loc);
+ if (!ret)
+ goto out;
+
+ syncopctx_setfspid(&pid);
+
+ ret = syncop_lookup(child->xl, &loc, &iatt, &parent_buf, NULL, NULL);
+ if (ret) {
+ br_log_object_path(this, "lookup", loc.path, -ret);
+ goto out;
+ }
+
+ linked_inode = inode_link(loc.inode, parent->inode, loc.name, &iatt);
+ if (linked_inode)
+ inode_lookup(linked_inode);
+
+ gf_msg_debug(this->name, 0, "Scrubbing object %s [GFID: %s]", entry->d_name,
+ uuid_utoa(linked_inode->gfid));
+
+ if (iatt.ia_type != IA_IFREG) {
+ gf_msg_debug(this->name, 0, "%s is not a regular file", entry->d_name);
+ ret = 0;
+ goto unref_inode;
+ }
+
+ if (IS_DHT_LINKFILE_MODE((&iatt))) {
+ gf_msg_debug(this->name, 0, "%s is a dht sticky bit file",
+ entry->d_name);
+ ret = 0;
+ goto unref_inode;
+ }
+
+ /* skip updating scrub statistics for shard entries */
+ gf_uuid_parse(SHARD_ROOT_GFID, shard_root_gfid);
+ if (gf_uuid_compare(loc.pargfid, shard_root_gfid) == 0)
+ skip_stat = _gf_true;
+
+ /**
+ * open() an fd for subsequent operations
+ */
+ fd = fd_create(linked_inode, 0);
+ if (!fd) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED,
+ "failed to create fd for inode %s",
+ uuid_utoa(linked_inode->gfid));
+ goto unref_inode;
+ }
+
+ ret = syncop_open(child->xl, &loc, O_RDWR, fd, NULL, NULL);
+ if (ret) {
+ br_log_object(this, "open", linked_inode->gfid, -ret);
+ ret = -1;
+ goto unrefd;
+ }
+
+ fd_bind(fd);
+
+ /**
+ * perform pre compute checks before initiating checksum
+ * computation
+ * - presence of bad object
+ * - signature staleness
+ */
+ ret = bitd_scrub_pre_compute_check(this, child, fd, &signedversion,
+ &priv->scrub_stat, skip_stat);
+ if (ret)
+ goto unrefd; /* skip this object */
+
+ /* if all's good, proceed to calculate the hash */
+ md = GF_MALLOC(SHA256_DIGEST_LENGTH, gf_common_mt_char);
+ if (!md)
+ goto unrefd;
+
+ ret = br_calculate_obj_checksum(md, child, fd, &iatt);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_CALC_ERROR,
+ "error calculating hash for object [GFID: %s]",
+ uuid_utoa(fd->inode->gfid));
+ ret = -1;
+ goto free_md;
+ }
+
+ /**
+ * perform post compute checks as an object's signature may have
+ * become stale while scrubber calculated checksum.
+ */
+ ret = bitd_scrub_post_compute_check(this, child, fd, signedversion, &sign,
+ &priv->scrub_stat, skip_stat);
+ if (ret)
+ goto free_md;
+
+ ret = bitd_compare_ckum(this, sign, md, linked_inode, entry, fd, child,
+ &loc);
+
+ if (!skip_stat)
+ br_inc_scrubbed_file(&priv->scrub_stat);
+
+ GF_FREE(sign); /* allocated on post-compute */
+
+ /** fd_unref() takes care of closing fd.. like syncop_close() */
+
+free_md:
+ GF_FREE(md);
+unrefd:
+ fd_unref(fd);
+unref_inode:
+ inode_unref(linked_inode);
+out:
+ loc_wipe(&loc);
+ return ret;
+}
+
+static void
+_br_lock_cleaner(void *arg)
+{
+ pthread_mutex_t *mutex = arg;
+
+ pthread_mutex_unlock(mutex);
+}
+
+static void
+wait_for_scrubbing(xlator_t *this, struct br_scanfs *fsscan)
+{
+ br_private_t *priv = NULL;
+ struct br_scrubber *fsscrub = NULL;
+
+ priv = this->private;
+ fsscrub = &priv->fsscrub;
+
+ pthread_cleanup_push(_br_lock_cleaner, &fsscan->waitlock);
+ pthread_mutex_lock(&fsscan->waitlock);
+ {
+ pthread_cleanup_push(_br_lock_cleaner, &fsscrub->mutex);
+ pthread_mutex_lock(&fsscrub->mutex);
+ {
+ list_replace_init(&fsscan->queued, &fsscan->ready);
+
+ /* wake up scrubbers */
+ pthread_cond_broadcast(&fsscrub->cond);
+ }
+ pthread_mutex_unlock(&fsscrub->mutex);
+ pthread_cleanup_pop(0);
+
+ while (fsscan->entries != 0)
+ pthread_cond_wait(&fsscan->waitcond, &fsscan->waitlock);
+ }
+ pthread_mutex_unlock(&fsscan->waitlock);
+ pthread_cleanup_pop(0);
+}
+
+static void
+_br_fsscan_inc_entry_count(struct br_scanfs *fsscan)
+{
+ fsscan->entries++;
+}
+
+static void
+_br_fsscan_dec_entry_count(struct br_scanfs *fsscan)
+{
+ if (--fsscan->entries == 0) {
+ pthread_mutex_lock(&fsscan->waitlock);
+ {
+ pthread_cond_signal(&fsscan->waitcond);
+ }
+ pthread_mutex_unlock(&fsscan->waitlock);
+ }
+}
+
+static void
+_br_fsscan_collect_entry(struct br_scanfs *fsscan,
+ struct br_fsscan_entry *fsentry)
+{
+ list_add_tail(&fsentry->list, &fsscan->queued);
+ _br_fsscan_inc_entry_count(fsscan);
+}
+
+#define NR_ENTRIES (1 << 7) /* ..bulk scrubbing */
+
+int
+br_fsscanner_handle_entry(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ int32_t ret = -1;
+ int scrub = 0;
+ br_child_t *child = NULL;
+ xlator_t *this = NULL;
+ struct br_scanfs *fsscan = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ GF_VALIDATE_OR_GOTO("bit-rot", subvol, error_return);
+ GF_VALIDATE_OR_GOTO("bit-rot", data, error_return);
+
+ child = data;
+ this = child->this;
+ fsscan = &child->fsscan;
+
+ _mask_cancellation();
+
+ fsentry = GF_CALLOC(1, sizeof(*fsentry), gf_br_mt_br_fsscan_entry_t);
+ if (!fsentry)
+ goto error_return;
+
+ {
+ fsentry->data = data;
+ fsentry->fsscan = &child->fsscan;
+
+ /* copy parent loc */
+ ret = loc_copy(&fsentry->parent, parent);
+ if (ret)
+ goto dealloc;
+
+ /* copy child entry */
+ fsentry->entry = entry_copy(entry);
+ if (!fsentry->entry)
+ goto locwipe;
+
+ INIT_LIST_HEAD(&fsentry->list);
+ }
+
+ LOCK(&fsscan->entrylock);
+ {
+ _br_fsscan_collect_entry(fsscan, fsentry);
+
+ /**
+ * need not be a equality check as entries may be pushed
+ * back onto the scanned queue when thread(s) are cleaned.
+ */
+ if (fsscan->entries >= NR_ENTRIES)
+ scrub = 1;
+ }
+ UNLOCK(&fsscan->entrylock);
+
+ _unmask_cancellation();
+
+ if (scrub)
+ wait_for_scrubbing(this, fsscan);
+
+ return 0;
+
+locwipe:
+ loc_wipe(&fsentry->parent);
+dealloc:
+ GF_FREE(fsentry);
+error_return:
+ return -1;
+}
+
+int32_t
+br_fsscan_deactivate(xlator_t *this)
+{
+ int ret = 0;
+ br_private_t *priv = NULL;
+ br_scrub_state_t nstate = 0;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ ret = gf_tw_del_timer(priv->timer_wheel, scrub_monitor->timer);
+ if (ret == 0) {
+ nstate = BR_SCRUB_STATE_STALLED;
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+ "Volume is under active scrubbing. Pausing scrub..");
+ } else {
+ nstate = BR_SCRUB_STATE_PAUSED;
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+ "Scrubber paused");
+ }
+
+ _br_monitor_set_scrub_state(scrub_monitor, nstate);
+
+ return 0;
+}
+
+static void
+br_scrubber_log_time(xlator_t *this, const char *sfx)
+{
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ br_private_t *priv = NULL;
+ time_t now = 0;
+
+ now = gf_time();
+ priv = this->private;
+
+ gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT);
+
+ if (strcasecmp(sfx, "started") == 0) {
+ br_update_scrub_start_time(&priv->scrub_stat, now);
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_START,
+ "Scrubbing %s at %s", sfx, timestr);
+ } else {
+ br_update_scrub_finish_time(&priv->scrub_stat, timestr, now);
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_FINISH,
+ "Scrubbing %s at %s", sfx, timestr);
+ }
+}
+
+static void
+br_fsscanner_log_time(xlator_t *this, br_child_t *child, const char *sfx)
+{
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ time_t now = 0;
+
+ now = gf_time();
+ gf_time_fmt(timestr, sizeof(timestr), now, gf_timefmt_FT);
+
+ if (strcasecmp(sfx, "started") == 0) {
+ gf_msg_debug(this->name, 0, "Scrubbing \"%s\" %s at %s",
+ child->brick_path, sfx, timestr);
+ } else {
+ gf_msg_debug(this->name, 0, "Scrubbing \"%s\" %s at %s",
+ child->brick_path, sfx, timestr);
+ }
+}
+
+void
+br_child_set_scrub_state(br_child_t *child, gf_boolean_t state)
+{
+ child->active_scrubbing = state;
+}
+
+static void
+br_fsscanner_wait_until_kicked(xlator_t *this, br_child_t *child)
+{
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ pthread_cleanup_push(_br_lock_cleaner, &scrub_monitor->wakelock);
+ pthread_mutex_lock(&scrub_monitor->wakelock);
+ {
+ while (!scrub_monitor->kick)
+ pthread_cond_wait(&scrub_monitor->wakecond,
+ &scrub_monitor->wakelock);
+
+ /* Child lock is to synchronize with disconnect events */
+ pthread_cleanup_push(_br_lock_cleaner, &child->lock);
+ pthread_mutex_lock(&child->lock);
+ {
+ scrub_monitor->active_child_count++;
+ br_child_set_scrub_state(child, _gf_true);
+ }
+ pthread_mutex_unlock(&child->lock);
+ pthread_cleanup_pop(0);
+ }
+ pthread_mutex_unlock(&scrub_monitor->wakelock);
+ pthread_cleanup_pop(0);
+}
+
+static void
+br_scrubber_entry_control(xlator_t *this)
+{
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ LOCK(&scrub_monitor->lock);
+ {
+ /* Move the state to BR_SCRUB_STATE_ACTIVE */
+ if (scrub_monitor->state == BR_SCRUB_STATE_PENDING)
+ scrub_monitor->state = BR_SCRUB_STATE_ACTIVE;
+ br_scrubber_log_time(this, "started");
+ priv->scrub_stat.scrub_running = 1;
+ }
+ UNLOCK(&scrub_monitor->lock);
+}
+
+static void
+br_scrubber_exit_control(xlator_t *this)
+{
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ LOCK(&scrub_monitor->lock);
+ {
+ br_scrubber_log_time(this, "finished");
+ priv->scrub_stat.scrub_running = 0;
+
+ if (scrub_monitor->state == BR_SCRUB_STATE_ACTIVE) {
+ (void)br_fsscan_activate(this);
+ } else {
+ UNLOCK(&scrub_monitor->lock);
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+ "Volume waiting to get rescheduled..");
+ return;
+ }
+ }
+ UNLOCK(&scrub_monitor->lock);
+}
+
+static void
+br_fsscanner_entry_control(xlator_t *this, br_child_t *child)
+{
+ br_fsscanner_log_time(this, child, "started");
+}
+
+static void
+br_fsscanner_exit_control(xlator_t *this, br_child_t *child)
+{
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ if (!_br_is_child_connected(child)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SCRUB_INFO,
+ "Brick [%s] disconnected while scrubbing. Scrubbing "
+ "might be incomplete",
+ child->brick_path);
+ }
+
+ br_fsscanner_log_time(this, child, "finished");
+
+ pthread_cleanup_push(_br_lock_cleaner, &scrub_monitor->wakelock);
+ pthread_mutex_lock(&scrub_monitor->wakelock);
+ {
+ scrub_monitor->active_child_count--;
+ pthread_cleanup_push(_br_lock_cleaner, &child->lock);
+ pthread_mutex_lock(&child->lock);
+ {
+ br_child_set_scrub_state(child, _gf_false);
+ }
+ pthread_mutex_unlock(&child->lock);
+ pthread_cleanup_pop(0);
+
+ if (scrub_monitor->active_child_count == 0) {
+ /* The last child has finished scrubbing.
+ * Set the kick to false and wake up other
+ * children who are waiting for the last
+ * child to complete scrubbing.
+ */
+ scrub_monitor->kick = _gf_false;
+ pthread_cond_broadcast(&scrub_monitor->wakecond);
+
+ /* Signal monitor thread waiting for the all
+ * the children to finish scrubbing.
+ */
+ pthread_cleanup_push(_br_lock_cleaner, &scrub_monitor->donelock);
+ pthread_mutex_lock(&scrub_monitor->donelock);
+ {
+ scrub_monitor->done = _gf_true;
+ pthread_cond_signal(&scrub_monitor->donecond);
+ }
+ pthread_mutex_unlock(&scrub_monitor->donelock);
+ pthread_cleanup_pop(0);
+ } else {
+ while (scrub_monitor->active_child_count)
+ pthread_cond_wait(&scrub_monitor->wakecond,
+ &scrub_monitor->wakelock);
+ }
+ }
+ pthread_mutex_unlock(&scrub_monitor->wakelock);
+ pthread_cleanup_pop(0);
+}
+
+void *
+br_fsscanner(void *arg)
+{
+ loc_t loc = {
+ 0,
+ };
+ br_child_t *child = NULL;
+ xlator_t *this = NULL;
+ struct br_scanfs *fsscan = NULL;
+
+ child = arg;
+ this = child->this;
+ fsscan = &child->fsscan;
+
+ THIS = this;
+ loc.inode = child->table->root;
+
+ while (1) {
+ br_fsscanner_wait_until_kicked(this, child);
+ {
+ /* precursor for scrub */
+ br_fsscanner_entry_control(this, child);
+
+ /* scrub */
+ (void)syncop_ftw(child->xl, &loc, GF_CLIENT_PID_SCRUB, child,
+ br_fsscanner_handle_entry);
+ if (!list_empty(&fsscan->queued))
+ wait_for_scrubbing(this, fsscan);
+
+ /* scrub exit criteria */
+ br_fsscanner_exit_control(this, child);
+ }
+ }
+
+ return NULL;
+}
+
+/**
+ * Keep this routine extremely simple and do not ever try to acquire
+ * child->lock here: it may lead to deadlock. Scrubber state is
+ * modified in br_fsscanner(). An intermediate state change to pause
+ * changes the scrub state to the _correct_ state by identifying a
+ * non-pending timer.
+ */
+void
+br_kickstart_scanner(struct gf_tw_timer_list *timer, void *data,
+ unsigned long calltime)
+{
+ xlator_t *this = NULL;
+ struct br_monitor *scrub_monitor = data;
+ br_private_t *priv = NULL;
+
+ THIS = this = scrub_monitor->this;
+ priv = this->private;
+
+ /* Reset scrub statistics */
+ priv->scrub_stat.scrubbed_files = 0;
+ priv->scrub_stat.unsigned_files = 0;
+
+ /* Moves state from PENDING to ACTIVE */
+ (void)br_scrubber_entry_control(this);
+
+ /* kickstart scanning.. */
+ pthread_mutex_lock(&scrub_monitor->wakelock);
+ {
+ scrub_monitor->kick = _gf_true;
+ GF_ASSERT(scrub_monitor->active_child_count == 0);
+ pthread_cond_broadcast(&scrub_monitor->wakecond);
+ }
+ pthread_mutex_unlock(&scrub_monitor->wakelock);
+
+ return;
+}
+
+static uint32_t
+br_fsscan_calculate_delta(uint32_t times)
+{
+ return times;
+}
+
+#define BR_SCRUB_ONDEMAND (1)
+#define BR_SCRUB_MINUTE (60)
+#define BR_SCRUB_HOURLY (60 * 60)
+#define BR_SCRUB_DAILY (1 * 24 * 60 * 60)
+#define BR_SCRUB_WEEKLY (7 * 24 * 60 * 60)
+#define BR_SCRUB_BIWEEKLY (14 * 24 * 60 * 60)
+#define BR_SCRUB_MONTHLY (30 * 24 * 60 * 60)
+
+static unsigned int
+br_fsscan_calculate_timeout(scrub_freq_t freq)
+{
+ uint32_t timo = 0;
+
+ switch (freq) {
+ case BR_FSSCRUB_FREQ_MINUTE:
+ timo = br_fsscan_calculate_delta(BR_SCRUB_MINUTE);
+ break;
+ case BR_FSSCRUB_FREQ_HOURLY:
+ timo = br_fsscan_calculate_delta(BR_SCRUB_HOURLY);
+ break;
+ case BR_FSSCRUB_FREQ_DAILY:
+ timo = br_fsscan_calculate_delta(BR_SCRUB_DAILY);
+ break;
+ case BR_FSSCRUB_FREQ_WEEKLY:
+ timo = br_fsscan_calculate_delta(BR_SCRUB_WEEKLY);
+ break;
+ case BR_FSSCRUB_FREQ_BIWEEKLY:
+ timo = br_fsscan_calculate_delta(BR_SCRUB_BIWEEKLY);
+ break;
+ case BR_FSSCRUB_FREQ_MONTHLY:
+ timo = br_fsscan_calculate_delta(BR_SCRUB_MONTHLY);
+ break;
+ default:
+ timo = 0;
+ }
+
+ return timo;
+}
+
+int32_t
+br_fsscan_schedule(xlator_t *this)
+{
+ uint32_t timo = 0;
+ br_private_t *priv = NULL;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ struct br_scrubber *fsscrub = NULL;
+ struct gf_tw_timer_list *timer = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ fsscrub = &priv->fsscrub;
+ scrub_monitor = &priv->scrub_monitor;
+
+ scrub_monitor->boot = gf_time();
+
+ timo = br_fsscan_calculate_timeout(fsscrub->frequency);
+ if (timo == 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
+ "BUG: Zero schedule timeout");
+ goto error_return;
+ }
+
+ scrub_monitor->timer = GF_CALLOC(1, sizeof(*scrub_monitor->timer),
+ gf_br_stub_mt_br_scanner_freq_t);
+ if (!scrub_monitor->timer)
+ goto error_return;
+
+ timer = scrub_monitor->timer;
+ INIT_LIST_HEAD(&timer->entry);
+
+ timer->data = scrub_monitor;
+ timer->expires = timo;
+ timer->function = br_kickstart_scanner;
+
+ gf_tw_add_timer(priv->timer_wheel, timer);
+ _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING);
+
+ gf_time_fmt(timestr, sizeof(timestr), (scrub_monitor->boot + timo),
+ gf_timefmt_FT);
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+ "Scrubbing is "
+ "scheduled to run at %s",
+ timestr);
+
+ return 0;
+
+error_return:
+ return -1;
+}
+
+int32_t
+br_fsscan_activate(xlator_t *this)
+{
+ uint32_t timo = 0;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ time_t now = 0;
+ br_private_t *priv = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ fsscrub = &priv->fsscrub;
+ scrub_monitor = &priv->scrub_monitor;
+
+ now = gf_time();
+ timo = br_fsscan_calculate_timeout(fsscrub->frequency);
+ if (timo == 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
+ "BUG: Zero schedule timeout");
+ return -1;
+ }
+
+ pthread_mutex_lock(&scrub_monitor->donelock);
+ {
+ scrub_monitor->done = _gf_false;
+ }
+ pthread_mutex_unlock(&scrub_monitor->donelock);
+
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
+ (void)gf_tw_mod_timer(priv->timer_wheel, scrub_monitor->timer, timo);
+
+ _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING);
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+ "Scrubbing is "
+ "rescheduled to run at %s",
+ timestr);
+
+ return 0;
+}
+
+int32_t
+br_fsscan_reschedule(xlator_t *this)
+{
+ int32_t ret = 0;
+ uint32_t timo = 0;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ time_t now = 0;
+ br_private_t *priv = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ fsscrub = &priv->fsscrub;
+ scrub_monitor = &priv->scrub_monitor;
+
+ if (!fsscrub->frequency_reconf)
+ return 0;
+
+ now = gf_time();
+ timo = br_fsscan_calculate_timeout(fsscrub->frequency);
+ if (timo == 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_ZERO_TIMEOUT_BUG,
+ "BUG: Zero schedule timeout");
+ return -1;
+ }
+
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
+
+ pthread_mutex_lock(&scrub_monitor->donelock);
+ {
+ scrub_monitor->done = _gf_false;
+ }
+ pthread_mutex_unlock(&scrub_monitor->donelock);
+
+ ret = gf_tw_mod_timer_pending(priv->timer_wheel, scrub_monitor->timer,
+ timo);
+ if (ret == 0)
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+ "Scrubber is currently running and would be "
+ "rescheduled after completion");
+ else {
+ _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING);
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+ "Scrubbing rescheduled to run at %s", timestr);
+ }
+
+ return 0;
+}
+
+int32_t
+br_fsscan_ondemand(xlator_t *this)
+{
+ int32_t ret = 0;
+ uint32_t timo = 0;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ time_t now = 0;
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ now = gf_time();
+ timo = BR_SCRUB_ONDEMAND;
+ gf_time_fmt(timestr, sizeof(timestr), now + timo, gf_timefmt_FT);
+
+ pthread_mutex_lock(&scrub_monitor->donelock);
+ {
+ scrub_monitor->done = _gf_false;
+ }
+ pthread_mutex_unlock(&scrub_monitor->donelock);
+
+ ret = gf_tw_mod_timer_pending(priv->timer_wheel, scrub_monitor->timer,
+ timo);
+ if (ret == 0)
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+ "Scrubber is currently running and would be "
+ "rescheduled after completion");
+ else {
+ _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PENDING);
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+ "Ondemand Scrubbing scheduled to run at %s", timestr);
+ }
+
+ return 0;
+}
+
+#define BR_SCRUB_THREAD_SCALE_LAZY 0
+#define BR_SCRUB_THREAD_SCALE_NORMAL 0.4
+#define BR_SCRUB_THREAD_SCALE_AGGRESSIVE 1.0
+
+#ifndef M_E
+#define M_E 2.718
+#endif
+
+/**
+ * This is just a simple exponential scale to a fixed value selected
+ * per throttle config. We probably need to be more smart and select
+ * the scale based on the number of processor cores too.
+ */
+static unsigned int
+br_scrubber_calc_scale(xlator_t *this, br_private_t *priv,
+ scrub_throttle_t throttle)
+{
+ unsigned int scale = 0;
+
+ switch (throttle) {
+ case BR_SCRUB_THROTTLE_VOID:
+ case BR_SCRUB_THROTTLE_STALLED:
+ scale = 0;
+ break;
+ case BR_SCRUB_THROTTLE_LAZY:
+ scale = priv->child_count * pow(M_E, BR_SCRUB_THREAD_SCALE_LAZY);
+ break;
+ case BR_SCRUB_THROTTLE_NORMAL:
+ scale = priv->child_count * pow(M_E, BR_SCRUB_THREAD_SCALE_NORMAL);
+ break;
+ case BR_SCRUB_THROTTLE_AGGRESSIVE:
+ scale = priv->child_count *
+ pow(M_E, BR_SCRUB_THREAD_SCALE_AGGRESSIVE);
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_UNKNOWN_THROTTLE,
+ "Unknown throttle %d", throttle);
+ }
+
+ return scale;
+}
+
+static br_child_t *
+_br_scrubber_get_next_child(struct br_scrubber *fsscrub)
+{
+ br_child_t *child = NULL;
+
+ child = list_first_entry(&fsscrub->scrublist, br_child_t, list);
+ list_rotate_left(&fsscrub->scrublist);
+
+ return child;
+}
+
+static void
+_br_scrubber_get_entry(br_child_t *child, struct br_fsscan_entry **fsentry)
+{
+ struct br_scanfs *fsscan = &child->fsscan;
+
+ if (list_empty(&fsscan->ready))
+ return;
+ *fsentry = list_first_entry(&fsscan->ready, struct br_fsscan_entry, list);
+ list_del_init(&(*fsentry)->list);
+}
+
+static void
+_br_scrubber_find_scrubbable_entry(struct br_scrubber *fsscrub,
+ struct br_fsscan_entry **fsentry)
+{
+ br_child_t *child = NULL;
+ br_child_t *firstchild = NULL;
+
+ while (1) {
+ while (list_empty(&fsscrub->scrublist))
+ pthread_cond_wait(&fsscrub->cond, &fsscrub->mutex);
+
+ firstchild = NULL;
+ for (child = _br_scrubber_get_next_child(fsscrub); child != firstchild;
+ child = _br_scrubber_get_next_child(fsscrub)) {
+ if (!firstchild)
+ firstchild = child;
+
+ _br_scrubber_get_entry(child, fsentry);
+ if (*fsentry)
+ break;
+ }
+
+ if (*fsentry)
+ break;
+
+ /* nothing to work on.. wait till available */
+ pthread_cond_wait(&fsscrub->cond, &fsscrub->mutex);
+ }
+}
+
+static void
+br_scrubber_pick_entry(struct br_scrubber *fsscrub,
+ struct br_fsscan_entry **fsentry)
+{
+ pthread_cleanup_push(_br_lock_cleaner, &fsscrub->mutex);
+
+ pthread_mutex_lock(&fsscrub->mutex);
+ {
+ *fsentry = NULL;
+ _br_scrubber_find_scrubbable_entry(fsscrub, fsentry);
+ }
+ pthread_mutex_unlock(&fsscrub->mutex);
+
+ pthread_cleanup_pop(0);
+}
+
+struct br_scrub_entry {
+ gf_boolean_t scrubbed;
+ struct br_fsscan_entry *fsentry;
+};
+
+/**
+ * We need to be a bit careful here. These thread(s) are prone to cancellations
+ * when threads are scaled down (depending on the thottling value configured)
+ * and pausing scrub. A thread can get cancelled while it's waiting for entries
+ * in the ->pending queue or when an object is undergoing scrubbing.
+ */
+static void
+br_scrubber_entry_handle(void *arg)
+{
+ struct br_scanfs *fsscan = NULL;
+ struct br_scrub_entry *sentry = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ sentry = arg;
+
+ fsentry = sentry->fsentry;
+ fsscan = fsentry->fsscan;
+
+ LOCK(&fsscan->entrylock);
+ {
+ if (sentry->scrubbed) {
+ _br_fsscan_dec_entry_count(fsscan);
+
+ /* cleanup ->entry */
+ fsentry->data = NULL;
+ fsentry->fsscan = NULL;
+ loc_wipe(&fsentry->parent);
+ gf_dirent_entry_free(fsentry->entry);
+
+ GF_FREE(sentry->fsentry);
+ } else {
+ /* (re)queue the entry again for scrub */
+ _br_fsscan_collect_entry(fsscan, sentry->fsentry);
+ }
+ }
+ UNLOCK(&fsscan->entrylock);
+}
+
+static void
+br_scrubber_scrub_entry(xlator_t *this, struct br_fsscan_entry *fsentry)
+{
+ struct br_scrub_entry sentry = {
+ 0,
+ };
+
+ sentry.scrubbed = 0;
+ sentry.fsentry = fsentry;
+
+ pthread_cleanup_push(br_scrubber_entry_handle, &sentry);
+ {
+ (void)br_scrubber_scrub_begin(this, fsentry);
+ sentry.scrubbed = 1;
+ }
+ pthread_cleanup_pop(1);
+}
+
+void *
+br_scrubber_proc(void *arg)
+{
+ xlator_t *this = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ struct br_fsscan_entry *fsentry = NULL;
+
+ fsscrub = arg;
+ THIS = this = fsscrub->this;
+
+ while (1) {
+ br_scrubber_pick_entry(fsscrub, &fsentry);
+ br_scrubber_scrub_entry(this, fsentry);
+ sleep(1);
+ }
+
+ return NULL;
+}
+
+static int32_t
+br_scrubber_scale_up(xlator_t *this, struct br_scrubber *fsscrub,
+ unsigned int v1, unsigned int v2)
+{
+ int i = 0;
+ int32_t ret = -1;
+ int diff = 0;
+ struct br_scrubbers *scrub = NULL;
+
+ diff = (int)(v2 - v1);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCALING_UP_SCRUBBER,
+ "Scaling up scrubbers [%d => %d]", v1, v2);
+
+ for (i = 0; i < diff; i++) {
+ scrub = GF_CALLOC(diff, sizeof(*scrub), gf_br_mt_br_scrubber_t);
+ if (!scrub)
+ break;
+
+ INIT_LIST_HEAD(&scrub->list);
+ ret = gf_thread_create(&scrub->scrubthread, NULL, br_scrubber_proc,
+ fsscrub, "brsproc");
+ if (ret)
+ break;
+
+ fsscrub->nr_scrubbers++;
+ list_add_tail(&scrub->list, &fsscrub->scrubbers);
+ }
+
+ if ((i != diff) && !scrub)
+ goto error_return;
+
+ if (i != diff) /* degraded scaling.. */
+ gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SCALE_UP_FAILED,
+ "Could not fully scale up to %d scrubber(s). Spawned "
+ "%d/%d [total scrubber(s): %d]",
+ v2, i, diff, (v1 + i));
+
+ return 0;
+
+error_return:
+ return -1;
+}
+
+static int32_t
+br_scrubber_scale_down(xlator_t *this, struct br_scrubber *fsscrub,
+ unsigned int v1, unsigned int v2)
+{
+ int i = 0;
+ int diff = 0;
+ int32_t ret = -1;
+ struct br_scrubbers *scrub = NULL;
+
+ diff = (int)(v1 - v2);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCALE_DOWN_SCRUBBER,
+ "Scaling down scrubbers [%d => %d]", v1, v2);
+
+ for (i = 0; i < diff; i++) {
+ scrub = list_first_entry(&fsscrub->scrubbers, struct br_scrubbers,
+ list);
+
+ list_del_init(&scrub->list);
+ ret = gf_thread_cleanup_xint(scrub->scrubthread);
+ if (ret)
+ break;
+ GF_FREE(scrub);
+
+ fsscrub->nr_scrubbers--;
+ }
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SCALE_DOWN_FAILED,
+ "Could not fully scale down "
+ "to %d scrubber(s). Terminated %d/%d [total "
+ "scrubber(s): %d]",
+ v1, i, diff, (v2 - i));
+ ret = 0;
+ }
+
+ return ret;
+}
+
+static int32_t
+br_scrubber_configure(xlator_t *this, br_private_t *priv,
+ struct br_scrubber *fsscrub, scrub_throttle_t nthrottle)
+{
+ int32_t ret = 0;
+ unsigned int v1 = 0;
+ unsigned int v2 = 0;
+
+ v1 = fsscrub->nr_scrubbers;
+ v2 = br_scrubber_calc_scale(this, priv, nthrottle);
+
+ if (v1 == v2)
+ return 0;
+
+ if (v1 > v2)
+ ret = br_scrubber_scale_down(this, fsscrub, v1, v2);
+ else
+ ret = br_scrubber_scale_up(this, fsscrub, v1, v2);
+
+ return ret;
+}
+
+static int32_t
+br_scrubber_fetch_option(xlator_t *this, char *opt, dict_t *options,
+ char **value)
+{
+ if (options)
+ GF_OPTION_RECONF(opt, *value, options, str, error_return);
+ else
+ GF_OPTION_INIT(opt, *value, str, error_return);
+
+ return 0;
+
+error_return:
+ return -1;
+}
+
+/* internal "throttle" override */
+#define BR_SCRUB_STALLED "STALLED"
+
+/* TODO: token buket spec */
+static int32_t
+br_scrubber_handle_throttle(xlator_t *this, br_private_t *priv, dict_t *options,
+ gf_boolean_t scrubstall)
+{
+ int32_t ret = 0;
+ char *tmp = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ scrub_throttle_t nthrottle = BR_SCRUB_THROTTLE_VOID;
+
+ fsscrub = &priv->fsscrub;
+ fsscrub->throttle_reconf = _gf_false;
+
+ ret = br_scrubber_fetch_option(this, "scrub-throttle", options, &tmp);
+ if (ret)
+ goto error_return;
+
+ if (scrubstall)
+ tmp = BR_SCRUB_STALLED;
+
+ if (strcasecmp(tmp, "lazy") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_LAZY;
+ else if (strcasecmp(tmp, "normal") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_NORMAL;
+ else if (strcasecmp(tmp, "aggressive") == 0)
+ nthrottle = BR_SCRUB_THROTTLE_AGGRESSIVE;
+ else if (strcasecmp(tmp, BR_SCRUB_STALLED) == 0)
+ nthrottle = BR_SCRUB_THROTTLE_STALLED;
+ else
+ goto error_return;
+
+ /* on failure old throttling value is preserved */
+ ret = br_scrubber_configure(this, priv, fsscrub, nthrottle);
+ if (ret)
+ goto error_return;
+
+ if (fsscrub->throttle != nthrottle)
+ fsscrub->throttle_reconf = _gf_true;
+
+ fsscrub->throttle = nthrottle;
+ return 0;
+
+error_return:
+ return -1;
+}
+
+static int32_t
+br_scrubber_handle_stall(xlator_t *this, br_private_t *priv, dict_t *options,
+ gf_boolean_t *scrubstall)
+{
+ int32_t ret = 0;
+ char *tmp = NULL;
+
+ ret = br_scrubber_fetch_option(this, "scrub-state", options, &tmp);
+ if (ret)
+ goto error_return;
+
+ if (strcasecmp(tmp, "pause") == 0) /* anything else is active */
+ *scrubstall = _gf_true;
+
+ return 0;
+
+error_return:
+ return -1;
+}
+
+static int32_t
+br_scrubber_handle_freq(xlator_t *this, br_private_t *priv, dict_t *options,
+ gf_boolean_t scrubstall)
+{
+ int32_t ret = -1;
+ char *tmp = NULL;
+ scrub_freq_t frequency = BR_FSSCRUB_FREQ_HOURLY;
+ struct br_scrubber *fsscrub = NULL;
+
+ fsscrub = &priv->fsscrub;
+ fsscrub->frequency_reconf = _gf_true;
+
+ ret = br_scrubber_fetch_option(this, "scrub-freq", options, &tmp);
+ if (ret)
+ goto error_return;
+
+ if (scrubstall)
+ tmp = BR_SCRUB_STALLED;
+
+ if (strcasecmp(tmp, "hourly") == 0) {
+ frequency = BR_FSSCRUB_FREQ_HOURLY;
+ } else if (strcasecmp(tmp, "daily") == 0) {
+ frequency = BR_FSSCRUB_FREQ_DAILY;
+ } else if (strcasecmp(tmp, "weekly") == 0) {
+ frequency = BR_FSSCRUB_FREQ_WEEKLY;
+ } else if (strcasecmp(tmp, "biweekly") == 0) {
+ frequency = BR_FSSCRUB_FREQ_BIWEEKLY;
+ } else if (strcasecmp(tmp, "monthly") == 0) {
+ frequency = BR_FSSCRUB_FREQ_MONTHLY;
+ } else if (strcasecmp(tmp, "minute") == 0) {
+ frequency = BR_FSSCRUB_FREQ_MINUTE;
+ } else if (strcasecmp(tmp, BR_SCRUB_STALLED) == 0) {
+ frequency = BR_FSSCRUB_FREQ_STALLED;
+ } else
+ goto error_return;
+
+ if (fsscrub->frequency == frequency)
+ fsscrub->frequency_reconf = _gf_false;
+ else
+ fsscrub->frequency = frequency;
+
+ return 0;
+
+error_return:
+ return -1;
+}
+
+static void
+br_scrubber_log_option(xlator_t *this, br_private_t *priv,
+ gf_boolean_t scrubstall)
+{
+ struct br_scrubber *fsscrub = &priv->fsscrub;
+ char *scrub_throttle_str[] = {
+ [BR_SCRUB_THROTTLE_LAZY] = "lazy",
+ [BR_SCRUB_THROTTLE_NORMAL] = "normal",
+ [BR_SCRUB_THROTTLE_AGGRESSIVE] = "aggressive",
+ [BR_SCRUB_THROTTLE_STALLED] = "stalled",
+ };
+
+ char *scrub_freq_str[] = {
+ [0] = "",
+ [BR_FSSCRUB_FREQ_HOURLY] = "hourly",
+ [BR_FSSCRUB_FREQ_DAILY] = "daily",
+ [BR_FSSCRUB_FREQ_WEEKLY] = "weekly",
+ [BR_FSSCRUB_FREQ_BIWEEKLY] = "biweekly",
+ [BR_FSSCRUB_FREQ_MONTHLY] = "monthly (30 days)",
+ [BR_FSSCRUB_FREQ_MINUTE] = "every minute",
+ };
+
+ if (scrubstall)
+ return; /* logged as pause */
+
+ if (fsscrub->frequency_reconf || fsscrub->throttle_reconf) {
+ if (fsscrub->throttle == BR_SCRUB_THROTTLE_VOID)
+ return;
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_TUNABLE,
+ "SCRUB TUNABLES:: [Frequency: %s, Throttle: %s]",
+ scrub_freq_str[fsscrub->frequency],
+ scrub_throttle_str[fsscrub->throttle]);
+ }
+}
+
+int32_t
+br_scrubber_handle_options(xlator_t *this, br_private_t *priv, dict_t *options)
+{
+ int32_t ret = 0;
+ gf_boolean_t scrubstall = _gf_false; /* not as dangerous as it sounds */
+
+ ret = br_scrubber_handle_stall(this, priv, options, &scrubstall);
+ if (ret)
+ goto error_return;
+
+ ret = br_scrubber_handle_throttle(this, priv, options, scrubstall);
+ if (ret)
+ goto error_return;
+
+ ret = br_scrubber_handle_freq(this, priv, options, scrubstall);
+ if (ret)
+ goto error_return;
+
+ br_scrubber_log_option(this, priv, scrubstall);
+
+ return 0;
+
+error_return:
+ return -1;
+}
+
+inode_t *
+br_lookup_bad_obj_dir(xlator_t *this, br_child_t *child, uuid_t gfid)
+{
+ struct iatt statbuf = {
+ 0,
+ };
+ inode_table_t *table = NULL;
+ int32_t ret = -1;
+ loc_t loc = {
+ 0,
+ };
+ inode_t *linked_inode = NULL;
+ int32_t op_errno = 0;
+
+ GF_VALIDATE_OR_GOTO("bit-rot-scrubber", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, child, out);
+
+ table = child->table;
+
+ loc.inode = inode_new(table);
+ if (!loc.inode) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
+ "failed to allocate a new inode for"
+ "bad object directory");
+ goto out;
+ }
+
+ gf_uuid_copy(loc.gfid, gfid);
+
+ ret = syncop_lookup(child->xl, &loc, &statbuf, NULL, NULL, NULL);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_LOOKUP_FAILED,
+ "failed to lookup the bad "
+ "objects directory (gfid: %s (%s))",
+ uuid_utoa(gfid), strerror(op_errno));
+ goto out;
+ }
+
+ linked_inode = inode_link(loc.inode, NULL, NULL, &statbuf);
+ if (linked_inode)
+ inode_lookup(linked_inode);
+
+out:
+ loc_wipe(&loc);
+ return linked_inode;
+}
+
+int32_t
+br_read_bad_object_dir(xlator_t *this, br_child_t *child, fd_t *fd,
+ dict_t *dict)
+{
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ int32_t ret = -1;
+ off_t offset = 0;
+ int32_t count = 0;
+ char key[32] = {
+ 0,
+ };
+ dict_t *out_dict = NULL;
+
+ INIT_LIST_HEAD(&entries.list);
+
+ while ((ret = syncop_readdir(child->xl, fd, 131072, offset, &entries, NULL,
+ &out_dict))) {
+ if (ret < 0)
+ goto out;
+
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ offset = entry->d_off;
+
+ snprintf(key, sizeof(key), "quarantine-%d", count);
+
+ /*
+ * ignore the dict_set errors for now. The intention is
+ * to get as many bad objects as possible instead of
+ * erroring out at the first failure.
+ */
+ ret = dict_set_dynstr_with_alloc(dict, key, entry->d_name);
+ if (!ret)
+ count++;
+
+ if (out_dict) {
+ dict_copy(out_dict, dict);
+ dict_unref(out_dict);
+ out_dict = NULL;
+ }
+ }
+
+ gf_dirent_free(&entries);
+ }
+
+ ret = count;
+ ret = dict_set_int32_sizen(dict, "count", count);
+
+out:
+ return ret;
+}
+
+int32_t
+br_get_bad_objects_from_child(xlator_t *this, dict_t *dict, br_child_t *child)
+{
+ inode_t *inode = NULL;
+ inode_table_t *table = NULL;
+ fd_t *fd = NULL;
+ int32_t ret = -1;
+ loc_t loc = {
+ 0,
+ };
+ int32_t op_errno = 0;
+
+ GF_VALIDATE_OR_GOTO("bit-rot-scrubber", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, child, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+ table = child->table;
+
+ inode = inode_find(table, BR_BAD_OBJ_CONTAINER);
+ if (!inode) {
+ inode = br_lookup_bad_obj_dir(this, child, BR_BAD_OBJ_CONTAINER);
+ if (!inode)
+ goto out;
+ }
+
+ fd = fd_create(inode, 0);
+ if (!fd) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_FD_CREATE_FAILED,
+ "fd creation for the bad "
+ "objects directory failed (gfid: %s)",
+ uuid_utoa(BR_BAD_OBJ_CONTAINER));
+ goto out;
+ }
+
+ loc.inode = inode;
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ ret = syncop_opendir(child->xl, &loc, fd, NULL, NULL);
+ if (ret < 0) {
+ op_errno = -ret;
+ fd_unref(fd);
+ fd = NULL;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_FD_CREATE_FAILED,
+ "failed to open the bad "
+ "objects directory %s",
+ uuid_utoa(BR_BAD_OBJ_CONTAINER));
+ goto out;
+ }
+
+ fd_bind(fd);
+
+ ret = br_read_bad_object_dir(this, child, fd, dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, BRB_MSG_BAD_OBJ_READDIR_FAIL,
+ "readdir of the bad "
+ "objects directory (%s) failed ",
+ uuid_utoa(BR_BAD_OBJ_CONTAINER));
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ loc_wipe(&loc);
+ if (fd)
+ fd_unref(fd);
+ return ret;
+}
+
+int32_t
+br_collect_bad_objects_of_child(xlator_t *this, br_child_t *child, dict_t *dict,
+ dict_t *child_dict, int32_t total_count)
+{
+ int32_t ret = -1;
+ int32_t count = 0;
+ char key[32] = {
+ 0,
+ };
+ char main_key[32] = {
+ 0,
+ };
+ int32_t j = 0;
+ int32_t tmp_count = 0;
+ char *entry = NULL;
+ char tmp[PATH_MAX] = {
+ 0,
+ };
+ char *path = NULL;
+ int32_t len = 0;
+
+ ret = dict_get_int32_sizen(child_dict, "count", &count);
+ if (ret)
+ goto out;
+
+ tmp_count = total_count;
+
+ for (j = 0; j < count; j++) {
+ len = snprintf(key, sizeof(key), "quarantine-%d", j);
+ ret = dict_get_strn(child_dict, key, len, &entry);
+ if (ret)
+ continue;
+
+ ret = dict_get_str(child_dict, entry, &path);
+ len = snprintf(tmp, PATH_MAX, "%s ==> BRICK: %s\n path: %s", entry,
+ child->brick_path, path);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ continue;
+ }
+ snprintf(main_key, sizeof(main_key), "quarantine-%d", tmp_count);
+
+ ret = dict_set_dynstr_with_alloc(dict, main_key, tmp);
+ if (!ret)
+ tmp_count++;
+ path = NULL;
+ }
+
+ ret = tmp_count;
+
+out:
+ return ret;
+}
+
+int32_t
+br_collect_bad_objects_from_children(xlator_t *this, dict_t *dict)
+{
+ int32_t ret = -1;
+ dict_t *child_dict = NULL;
+ int32_t i = 0;
+ int32_t total_count = 0;
+ br_child_t *child = NULL;
+ br_private_t *priv = NULL;
+ dict_t *tmp_dict = NULL;
+
+ priv = this->private;
+ tmp_dict = dict;
+
+ for (i = 0; i < priv->child_count; i++) {
+ child = &priv->children[i];
+ GF_ASSERT(child);
+ if (!_br_is_child_connected(child))
+ continue;
+
+ child_dict = dict_new();
+ if (!child_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
+ "failed to allocate dict");
+ continue;
+ }
+ ret = br_get_bad_objects_from_child(this, child_dict, child);
+ /*
+ * Continue asking the remaining children for the list of
+ * bad objects even though getting the list from one of them
+ * fails.
+ */
+ if (ret) {
+ dict_unref(child_dict);
+ continue;
+ }
+
+ ret = br_collect_bad_objects_of_child(this, child, tmp_dict, child_dict,
+ total_count);
+ if (ret < 0) {
+ dict_unref(child_dict);
+ continue;
+ }
+
+ total_count = ret;
+ dict_unref(child_dict);
+ child_dict = NULL;
+ }
+
+ ret = dict_set_int32(tmp_dict, "total-count", total_count);
+
+ return ret;
+}
+
+int32_t
+br_get_bad_objects_list(xlator_t *this, dict_t **dict)
+{
+ int32_t ret = -1;
+ dict_t *tmp_dict = NULL;
+
+ GF_VALIDATE_OR_GOTO("bir-rot-scrubber", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+ tmp_dict = *dict;
+ if (!tmp_dict) {
+ tmp_dict = dict_new();
+ if (!tmp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
+ "failed to allocate dict");
+ goto out;
+ }
+ *dict = tmp_dict;
+ }
+
+ ret = br_collect_bad_objects_from_children(this, tmp_dict);
+
+out:
+ return ret;
+}
+
+static int
+wait_for_scrub_to_finish(xlator_t *this)
+{
+ int ret = -1;
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ GF_VALIDATE_OR_GOTO("bit-rot", scrub_monitor, out);
+ GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_INFO,
+ "Waiting for all children to start and finish scrub");
+
+ pthread_mutex_lock(&scrub_monitor->donelock);
+ {
+ while (!scrub_monitor->done)
+ pthread_cond_wait(&scrub_monitor->donecond,
+ &scrub_monitor->donelock);
+ }
+ pthread_mutex_unlock(&scrub_monitor->donelock);
+ ret = 0;
+out:
+ return ret;
+}
+
+/**
+ * This function is executed in a separate thread. This is scrubber monitor
+ * thread that takes care of state machine.
+ */
+void *
+br_monitor_thread(void *arg)
+{
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ this = arg;
+ priv = this->private;
+
+ /*
+ * Since, this is the topmost xlator, THIS has to be set by bit-rot
+ * xlator itself (STACK_WIND won't help in this case). Also it has
+ * to be done for each thread that gets spawned. Otherwise, a new
+ * thread will get global_xlator's pointer when it does "THIS".
+ */
+ THIS = this;
+
+ scrub_monitor = &priv->scrub_monitor;
+
+ pthread_mutex_lock(&scrub_monitor->mutex);
+ {
+ while (!scrub_monitor->inited)
+ pthread_cond_wait(&scrub_monitor->cond, &scrub_monitor->mutex);
+ }
+ pthread_mutex_unlock(&scrub_monitor->mutex);
+
+ /* this needs to be serialized with reconfigure() */
+ pthread_mutex_lock(&priv->lock);
+ {
+ ret = br_scrub_state_machine(this, _gf_false);
+ }
+ pthread_mutex_unlock(&priv->lock);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_SSM_FAILED,
+ "Scrub state machine failed");
+ goto out;
+ }
+
+ while (1) {
+ /* Wait for all children to finish scrubbing */
+ ret = wait_for_scrub_to_finish(this);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_SCRUB_WAIT_FAILED,
+ "Scrub wait failed");
+ goto out;
+ }
+
+ /* scrub exit criteria: Move the state to PENDING */
+ br_scrubber_exit_control(this);
+ }
+
+out:
+ return NULL;
+}
+
+static void
+br_set_scrub_state(struct br_monitor *scrub_monitor, br_scrub_state_t state)
+{
+ LOCK(&scrub_monitor->lock);
+ {
+ _br_monitor_set_scrub_state(scrub_monitor, state);
+ }
+ UNLOCK(&scrub_monitor->lock);
+}
+
+int32_t
+br_scrubber_monitor_init(xlator_t *this, br_private_t *priv)
+{
+ struct br_monitor *scrub_monitor = NULL;
+ int ret = 0;
+
+ scrub_monitor = &priv->scrub_monitor;
+
+ LOCK_INIT(&scrub_monitor->lock);
+ scrub_monitor->this = this;
+
+ scrub_monitor->inited = _gf_false;
+ pthread_mutex_init(&scrub_monitor->mutex, NULL);
+ pthread_cond_init(&scrub_monitor->cond, NULL);
+
+ scrub_monitor->kick = _gf_false;
+ scrub_monitor->active_child_count = 0;
+ pthread_mutex_init(&scrub_monitor->wakelock, NULL);
+ pthread_cond_init(&scrub_monitor->wakecond, NULL);
+
+ scrub_monitor->done = _gf_false;
+ pthread_mutex_init(&scrub_monitor->donelock, NULL);
+ pthread_cond_init(&scrub_monitor->donecond, NULL);
+
+ /* Set the state to INACTIVE */
+ br_set_scrub_state(&priv->scrub_monitor, BR_SCRUB_STATE_INACTIVE);
+
+ /* Start the monitor thread */
+ ret = gf_thread_create(&scrub_monitor->thread, NULL, br_monitor_thread,
+ this, "brmon");
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_SPAWN_FAILED,
+ "monitor thread creation failed");
+ ret = -1;
+ goto err;
+ }
+
+ return 0;
+err:
+ pthread_mutex_destroy(&scrub_monitor->mutex);
+ pthread_cond_destroy(&scrub_monitor->cond);
+
+ pthread_mutex_destroy(&scrub_monitor->wakelock);
+ pthread_cond_destroy(&scrub_monitor->wakecond);
+
+ pthread_mutex_destroy(&scrub_monitor->donelock);
+ pthread_cond_destroy(&scrub_monitor->donecond);
+
+ LOCK_DESTROY(&scrub_monitor->lock);
+
+ return ret;
+}
+
+int32_t
+br_scrubber_init(xlator_t *this, br_private_t *priv)
+{
+ struct br_scrubber *fsscrub = NULL;
+ int ret = 0;
+
+ priv->tbf = tbf_init(NULL, 0);
+ if (!priv->tbf)
+ return -1;
+
+ ret = br_scrubber_monitor_init(this, priv);
+ if (ret)
+ return -1;
+
+ fsscrub = &priv->fsscrub;
+
+ fsscrub->this = this;
+ fsscrub->throttle = BR_SCRUB_THROTTLE_VOID;
+
+ pthread_mutex_init(&fsscrub->mutex, NULL);
+ pthread_cond_init(&fsscrub->cond, NULL);
+
+ fsscrub->nr_scrubbers = 0;
+ INIT_LIST_HEAD(&fsscrub->scrubbers);
+ INIT_LIST_HEAD(&fsscrub->scrublist);
+
+ return 0;
+}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
new file mode 100644
index 00000000000..4e5f67bc021
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-scrub.h
@@ -0,0 +1,46 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_SCRUB_H__
+#define __BIT_ROT_SCRUB_H__
+
+#include <glusterfs/xlator.h>
+#include "bit-rot.h"
+
+void *
+br_fsscanner(void *);
+
+int32_t
+br_fsscan_schedule(xlator_t *);
+int32_t
+br_fsscan_reschedule(xlator_t *);
+int32_t
+br_fsscan_activate(xlator_t *);
+int32_t
+br_fsscan_deactivate(xlator_t *);
+int32_t
+br_fsscan_ondemand(xlator_t *);
+
+int32_t
+br_scrubber_handle_options(xlator_t *, br_private_t *, dict_t *);
+
+int32_t
+br_scrubber_monitor_init(xlator_t *, br_private_t *);
+
+int32_t
+br_scrubber_init(xlator_t *, br_private_t *);
+
+int32_t
+br_collect_bad_objects_from_children(xlator_t *this, dict_t *dict);
+
+void
+br_child_set_scrub_state(br_child_t *, gf_boolean_t);
+
+#endif /* __BIT_ROT_SCRUB_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c
new file mode 100644
index 00000000000..753e31a3b23
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.c
@@ -0,0 +1,124 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "bit-rot-ssm.h"
+#include "bit-rot-scrub.h"
+#include "bit-rot-bitd-messages.h"
+
+int
+br_scrub_ssm_noop(xlator_t *this)
+{
+ return 0;
+}
+
+int
+br_scrub_ssm_state_pause(xlator_t *this)
+{
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO,
+ "Scrubber paused");
+ _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_PAUSED);
+ return 0;
+}
+
+int
+br_scrub_ssm_state_ipause(xlator_t *this)
+{
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO,
+ "Scrubber paused");
+ _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_IPAUSED);
+ return 0;
+}
+
+int
+br_scrub_ssm_state_active(xlator_t *this)
+{
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ if (scrub_monitor->done) {
+ (void)br_fsscan_activate(this);
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO,
+ "Scrubbing resumed");
+ _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_ACTIVE);
+ }
+
+ return 0;
+}
+
+int
+br_scrub_ssm_state_stall(xlator_t *this)
+{
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ gf_msg(this->name, GF_LOG_INFO, 0, BRB_MSG_GENERIC_SSM_INFO,
+ "Volume is under active scrubbing. Pausing scrub..");
+ _br_monitor_set_scrub_state(scrub_monitor, BR_SCRUB_STATE_STALLED);
+ return 0;
+}
+
+static br_scrub_ssm_call *br_scrub_ssm[BR_SCRUB_MAXSTATES][BR_SCRUB_MAXEVENTS] =
+ {
+ /* INACTIVE */
+ {br_fsscan_schedule, br_scrub_ssm_state_ipause, br_scrub_ssm_noop},
+ /* PENDING */
+ {br_fsscan_reschedule, br_fsscan_deactivate, br_fsscan_ondemand},
+ /* ACTIVE */
+ {br_scrub_ssm_noop, br_scrub_ssm_state_stall, br_scrub_ssm_noop},
+ /* PAUSED */
+ {br_fsscan_activate, br_scrub_ssm_noop, br_scrub_ssm_noop},
+ /* IPAUSED */
+ {br_fsscan_schedule, br_scrub_ssm_noop, br_scrub_ssm_noop},
+ /* STALLED */
+ {br_scrub_ssm_state_active, br_scrub_ssm_noop, br_scrub_ssm_noop},
+};
+
+int32_t
+br_scrub_state_machine(xlator_t *this, gf_boolean_t scrub_ondemand)
+{
+ br_private_t *priv = NULL;
+ br_scrub_ssm_call *call = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ br_scrub_state_t currstate = 0;
+ br_scrub_event_t event = 0;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ fsscrub = &priv->fsscrub;
+ scrub_monitor = &priv->scrub_monitor;
+
+ currstate = scrub_monitor->state;
+ if (scrub_ondemand)
+ event = BR_SCRUB_EVENT_ONDEMAND;
+ else
+ event = _br_child_get_scrub_event(fsscrub);
+
+ call = br_scrub_ssm[currstate][event];
+ return call(this);
+}
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
new file mode 100644
index 00000000000..37b45a42eac
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot-ssm.h
@@ -0,0 +1,38 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_SSM_H__
+#define __BIT_ROT_SSM_H__
+
+#include <glusterfs/xlator.h>
+
+typedef enum br_scrub_state {
+ BR_SCRUB_STATE_INACTIVE = 0,
+ BR_SCRUB_STATE_PENDING,
+ BR_SCRUB_STATE_ACTIVE,
+ BR_SCRUB_STATE_PAUSED,
+ BR_SCRUB_STATE_IPAUSED,
+ BR_SCRUB_STATE_STALLED,
+ BR_SCRUB_MAXSTATES,
+} br_scrub_state_t;
+
+typedef enum br_scrub_event {
+ BR_SCRUB_EVENT_SCHEDULE = 0,
+ BR_SCRUB_EVENT_PAUSE,
+ BR_SCRUB_EVENT_ONDEMAND,
+ BR_SCRUB_MAXEVENTS,
+} br_scrub_event_t;
+
+struct br_monitor;
+
+int32_t
+br_scrub_state_machine(xlator_t *, gf_boolean_t);
+
+#endif /* __BIT_ROT_SSM_H__ */
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.c b/xlators/features/bit-rot/src/bitd/bit-rot.c
new file mode 100644
index 00000000000..a2f1c343a1d
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.c
@@ -0,0 +1,2232 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <ctype.h>
+
+#include <glusterfs/logging.h>
+#include <glusterfs/compat-errno.h>
+
+#include "bit-rot.h"
+#include "bit-rot-scrub.h"
+#include <pthread.h>
+#include "bit-rot-bitd-messages.h"
+
+#define BR_HASH_CALC_READ_SIZE (128 * 1024)
+
+typedef int32_t(br_child_handler)(xlator_t *, br_child_t *);
+
+struct br_child_event {
+ xlator_t *this;
+
+ br_child_t *child;
+
+ br_child_handler *call;
+
+ struct list_head list;
+};
+
+static int
+br_find_child_index(xlator_t *this, xlator_t *child)
+{
+ br_private_t *priv = NULL;
+ int i = -1;
+ int index = -1;
+
+ GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, child, out);
+
+ priv = this->private;
+
+ for (i = 0; i < priv->child_count; i++) {
+ if (child == priv->children[i].xl) {
+ index = i;
+ break;
+ }
+ }
+
+out:
+ return index;
+}
+
+br_child_t *
+br_get_child_from_brick_path(xlator_t *this, char *brick_path)
+{
+ br_private_t *priv = NULL;
+ br_child_t *child = NULL;
+ br_child_t *tmp = NULL;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, brick_path, out);
+
+ priv = this->private;
+
+ pthread_mutex_lock(&priv->lock);
+ {
+ for (i = 0; i < priv->child_count; i++) {
+ tmp = &priv->children[i];
+ if (!strcmp(tmp->brick_path, brick_path)) {
+ child = tmp;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock(&priv->lock);
+
+out:
+ return child;
+}
+
+/**
+ * probably we'll encapsulate brick inside our own structure when
+ * needed -- later.
+ */
+void *
+br_brick_init(void *xl, struct gf_brick_spec *brick)
+{
+ return brick;
+}
+
+/**
+ * and cleanup things here when allocated br_brick_init().
+ */
+void
+br_brick_fini(void *xl, char *brick, void *data)
+{
+ return;
+}
+
+/**
+ * TODO: Signature can contain null terminators which causes bitrot
+ * stub to store truncated hash as it depends on string length of
+ * the hash.
+ *
+ * FIX: Send the string length as part of the signature struct and
+ * change stub to handle this change.
+ */
+static br_isignature_t *
+br_prepare_signature(const unsigned char *sign, unsigned long hashlen,
+ int8_t hashtype, br_object_t *object)
+{
+ br_isignature_t *signature = NULL;
+
+ /* TODO: use mem-pool */
+ signature = GF_CALLOC(1, signature_size(hashlen + 1),
+ gf_br_stub_mt_signature_t);
+ if (!signature)
+ return NULL;
+
+ /* object version */
+ signature->signedversion = object->signedversion;
+
+ /* signature length & type */
+ signature->signaturelen = hashlen;
+ signature->signaturetype = hashtype;
+
+ /* signature itself */
+ memcpy(signature->signature, (char *)sign, hashlen);
+ signature->signature[hashlen + 1] = '\0';
+
+ return signature;
+}
+
+gf_boolean_t
+bitd_is_bad_file(xlator_t *this, br_child_t *child, loc_t *loc, fd_t *fd)
+{
+ int32_t ret = -1;
+ dict_t *xattr = NULL;
+ inode_t *inode = NULL;
+ gf_boolean_t bad_file = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+
+ inode = (loc) ? loc->inode : fd->inode;
+
+ if (fd)
+ ret = syncop_fgetxattr(child->xl, fd, &xattr, BITROT_OBJECT_BAD_KEY,
+ NULL, NULL);
+ else if (loc)
+ ret = syncop_getxattr(child->xl, loc, &xattr, BITROT_OBJECT_BAD_KEY,
+ NULL, NULL);
+
+ if (!ret) {
+ gf_msg_debug(this->name, 0, "[GFID: %s] is marked corrupted",
+ uuid_utoa(inode->gfid));
+ bad_file = _gf_true;
+ }
+
+ if (xattr)
+ dict_unref(xattr);
+
+out:
+ return bad_file;
+}
+
+/**
+ * Do a lookup on the gfid present within the object.
+ */
+static int32_t
+br_object_lookup(xlator_t *this, br_object_t *object, struct iatt *iatt,
+ inode_t **linked_inode)
+{
+ int ret = -EINVAL;
+ loc_t loc = {
+ 0,
+ };
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, object, out);
+
+ inode = inode_find(object->child->table, object->gfid);
+
+ if (inode)
+ loc.inode = inode;
+ else
+ loc.inode = inode_new(object->child->table);
+
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_copy(loc.gfid, object->gfid);
+
+ ret = syncop_lookup(object->child->xl, &loc, iatt, NULL, NULL, NULL);
+ if (ret < 0)
+ goto out;
+
+ /*
+ * The file might have been deleted by the application
+ * after getting the event, but before doing a lookup.
+ * So use linked_inode after inode_link is done.
+ */
+ *linked_inode = inode_link(loc.inode, NULL, NULL, iatt);
+ if (*linked_inode)
+ inode_lookup(*linked_inode);
+
+out:
+ loc_wipe(&loc);
+ return ret;
+}
+
+/**
+ * open the object with O_RDONLY flags and return the fd. How to let brick
+ * know that open is being done by bitd because syncop framework does not allow
+ * passing xdata -- may be use frame->root->pid itself.
+ */
+static int32_t
+br_object_open(xlator_t *this, br_object_t *object, inode_t *inode,
+ fd_t **openfd)
+{
+ int32_t ret = -1;
+ fd_t *fd = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, object, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ ret = -EINVAL;
+ fd = fd_create(inode, 0);
+ if (!fd) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+ goto out;
+ }
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ ret = syncop_open(object->child->xl, &loc, O_RDONLY, fd, NULL, NULL);
+ if (ret) {
+ br_log_object(this, "open", inode->gfid, -ret);
+ fd_unref(fd);
+ fd = NULL;
+ } else {
+ fd_bind(fd);
+ *openfd = fd;
+ }
+
+ loc_wipe(&loc);
+
+out:
+ return ret;
+}
+
+/**
+ * read 128k block from the object @object from the offset @offset
+ * and return the buffer.
+ */
+static int32_t
+br_object_read_block_and_sign(xlator_t *this, fd_t *fd, br_child_t *child,
+ off_t offset, size_t size, SHA256_CTX *sha256)
+{
+ int32_t ret = -1;
+ tbf_t *tbf = NULL;
+ struct iovec *iovec = NULL;
+ struct iobref *iobref = NULL;
+ br_private_t *priv = NULL;
+ int count = 0;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd->inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, child, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ priv = this->private;
+
+ GF_VALIDATE_OR_GOTO(this->name, priv->tbf, out);
+ tbf = priv->tbf;
+
+ ret = syncop_readv(child->xl, fd, size, offset, 0, &iovec, &count, &iobref,
+ NULL, NULL, NULL);
+
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRB_MSG_READV_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ ret = -1;
+ goto out;
+ }
+
+ if (ret == 0)
+ goto out;
+
+ for (i = 0; i < count; i++) {
+ TBF_THROTTLE_BEGIN(tbf, TBF_OP_HASH, iovec[i].iov_len);
+ {
+ SHA256_Update(sha256, (const unsigned char *)(iovec[i].iov_base),
+ iovec[i].iov_len);
+ }
+ TBF_THROTTLE_BEGIN(tbf, TBF_OP_HASH, iovec[i].iov_len);
+ }
+
+out:
+ if (iovec)
+ GF_FREE(iovec);
+
+ if (iobref)
+ iobref_unref(iobref);
+
+ return ret;
+}
+
+int32_t
+br_calculate_obj_checksum(unsigned char *md, br_child_t *child, fd_t *fd,
+ struct iatt *iatt)
+{
+ int32_t ret = -1;
+ off_t offset = 0;
+ size_t block = BR_HASH_CALC_READ_SIZE;
+ xlator_t *this = NULL;
+
+ SHA256_CTX sha256;
+
+ GF_VALIDATE_OR_GOTO("bit-rot", child, out);
+ GF_VALIDATE_OR_GOTO("bit-rot", iatt, out);
+ GF_VALIDATE_OR_GOTO("bit-rot", fd, out);
+
+ this = child->this;
+
+ SHA256_Init(&sha256);
+
+ while (1) {
+ ret = br_object_read_block_and_sign(this, fd, child, offset, block,
+ &sha256);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_BLOCK_READ_FAILED,
+ "offset=%" PRIu64, offset, "object-gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
+ break;
+ }
+
+ if (ret == 0)
+ break;
+
+ offset += ret;
+ }
+
+ if (ret == 0)
+ SHA256_Final(md, &sha256);
+
+out:
+ return ret;
+}
+
+static int32_t
+br_object_checksum(unsigned char *md, br_object_t *object, fd_t *fd,
+ struct iatt *iatt)
+{
+ return br_calculate_obj_checksum(md, object->child, fd, iatt);
+}
+
+static int32_t
+br_object_read_sign(inode_t *linked_inode, fd_t *fd, br_object_t *object,
+ struct iatt *iatt)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ dict_t *xattr = NULL;
+ unsigned char *md = NULL;
+ br_isignature_t *sign = NULL;
+
+ GF_VALIDATE_OR_GOTO("bit-rot", object, out);
+ GF_VALIDATE_OR_GOTO("bit-rot", linked_inode, out);
+ GF_VALIDATE_OR_GOTO("bit-rot", fd, out);
+
+ this = object->this;
+
+ md = GF_MALLOC(SHA256_DIGEST_LENGTH, gf_common_mt_char);
+ if (!md) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_SAVING_HASH_FAILED,
+ "object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto out;
+ }
+
+ ret = br_object_checksum(md, object, fd, iatt);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_CALC_CHECKSUM_FAILED,
+ "object-gfid=%s", uuid_utoa(linked_inode->gfid), NULL);
+ goto free_signature;
+ }
+
+ sign = br_prepare_signature(md, SHA256_DIGEST_LENGTH,
+ BR_SIGNATURE_TYPE_SHA256, object);
+ if (!sign) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED,
+ "object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto free_signature;
+ }
+
+ xattr = dict_for_key_value(GLUSTERFS_SET_OBJECT_SIGNATURE, (void *)sign,
+ signature_size(SHA256_DIGEST_LENGTH), _gf_true);
+
+ if (!xattr) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
+ "dict-allocation object-gfid=%s", uuid_utoa(fd->inode->gfid),
+ NULL);
+ goto free_isign;
+ }
+
+ ret = syncop_fsetxattr(object->child->xl, fd, xattr, 0, NULL, NULL);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
+ "fsetxattr object-gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto unref_dict;
+ }
+
+ ret = 0;
+
+unref_dict:
+ dict_unref(xattr);
+free_isign:
+ GF_FREE(sign);
+free_signature:
+ GF_FREE(md);
+out:
+ return ret;
+}
+
+static int
+br_object_sign_softerror(int32_t op_errno)
+{
+ return ((op_errno == ENOENT) || (op_errno == ESTALE) ||
+ (op_errno == ENODATA));
+}
+
+void
+br_log_object(xlator_t *this, char *op, uuid_t gfid, int32_t op_errno)
+{
+ int softerror = br_object_sign_softerror(op_errno);
+ if (softerror) {
+ gf_msg_debug(this->name, 0,
+ "%s() failed on object %s "
+ "[reason: %s]",
+ op, uuid_utoa(gfid), strerror(op_errno));
+ } else {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, "op=%s",
+ op, "gfid=%s", uuid_utoa(gfid), NULL);
+ }
+}
+
+void
+br_log_object_path(xlator_t *this, char *op, const char *path, int32_t op_errno)
+{
+ int softerror = br_object_sign_softerror(op_errno);
+ if (softerror) {
+ gf_msg_debug(this->name, 0,
+ "%s() failed on object %s "
+ "[reason: %s]",
+ op, path, strerror(op_errno));
+ } else {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_OP_FAILED, "op=%s",
+ op, "path=%s", path, NULL);
+ }
+}
+
+static void
+br_trigger_sign(xlator_t *this, br_child_t *child, inode_t *linked_inode,
+ loc_t *loc, gf_boolean_t need_reopen)
+{
+ fd_t *fd = NULL;
+ int32_t ret = -1;
+ uint32_t val = 0;
+ dict_t *dict = NULL;
+ pid_t pid = GF_CLIENT_PID_BITD;
+
+ syncopctx_setfspid(&pid);
+
+ val = (need_reopen == _gf_true) ? BR_OBJECT_REOPEN : BR_OBJECT_RESIGN;
+
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ ret = dict_set_uint32(dict, BR_REOPEN_SIGN_HINT_KEY, val);
+ if (ret)
+ goto cleanup_dict;
+
+ ret = -1;
+ fd = fd_create(linked_inode, 0);
+ if (!fd) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_FD_CREATE_FAILED,
+ "gfid=%s", uuid_utoa(linked_inode->gfid), NULL);
+ goto cleanup_dict;
+ }
+
+ ret = syncop_open(child->xl, loc, O_RDWR, fd, NULL, NULL);
+ if (ret) {
+ br_log_object(this, "open", linked_inode->gfid, -ret);
+ goto unref_fd;
+ }
+
+ fd_bind(fd);
+
+ ret = syncop_fsetxattr(child->xl, fd, dict, 0, NULL, NULL);
+ if (ret)
+ br_log_object(this, "fsetxattr", linked_inode->gfid, -ret);
+
+ /* passthough: fd_unref() */
+
+unref_fd:
+ fd_unref(fd);
+cleanup_dict:
+ dict_unref(dict);
+out:
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_TRIGGER_SIGN_FAILED,
+ "gfid=%s", uuid_utoa(linked_inode->gfid), "reopen-hint-val=%d",
+ val, NULL);
+ }
+}
+
+static void
+br_object_resign(xlator_t *this, br_object_t *object, inode_t *linked_inode)
+{
+ loc_t loc = {
+ 0,
+ };
+
+ loc.inode = inode_ref(linked_inode);
+ gf_uuid_copy(loc.gfid, linked_inode->gfid);
+
+ br_trigger_sign(this, object->child, linked_inode, &loc, _gf_false);
+
+ loc_wipe(&loc);
+}
+
+/**
+ * Sign a given object. This routine runs full throttle. There needs to be
+ * some form of priority scheduling and/or read burstness to avoid starving
+ * (or kicking) client I/O's.
+ */
+static int32_t
+br_sign_object(br_object_t *object)
+{
+ int32_t ret = -1;
+ inode_t *linked_inode = NULL;
+ xlator_t *this = NULL;
+ fd_t *fd = NULL;
+ struct iatt iatt = {
+ 0,
+ };
+ pid_t pid = GF_CLIENT_PID_BITD;
+ br_sign_state_t sign_info = BR_SIGN_NORMAL;
+
+ GF_VALIDATE_OR_GOTO("bit-rot", object, out);
+
+ this = object->this;
+
+ /**
+ * FIXME: This is required as signing an object is restricted to
+ * clients with special frame->root->pid. Change the way client
+ * pid is set.
+ */
+ syncopctx_setfspid(&pid);
+
+ ret = br_object_lookup(this, object, &iatt, &linked_inode);
+ if (ret) {
+ br_log_object(this, "lookup", object->gfid, -ret);
+ goto out;
+ }
+
+ /**
+ * For fd's that have notified for reopening, we send an explicit
+ * open() followed by a dummy write() call. This triggers the
+ * actual signing of the object.
+ */
+ sign_info = ntohl(object->sign_info);
+ if (sign_info == BR_SIGN_REOPEN_WAIT) {
+ br_object_resign(this, object, linked_inode);
+ goto unref_inode;
+ }
+
+ ret = br_object_open(this, object, linked_inode, &fd);
+ if (!fd) {
+ br_log_object(this, "open", object->gfid, -ret);
+ goto unref_inode;
+ }
+
+ /**
+ * we have an open file descriptor on the object. from here on,
+ * do not be generous to file operation errors.
+ */
+ gf_msg_debug(this->name, 0, "Signing object [%s]",
+ uuid_utoa(linked_inode->gfid));
+
+ ret = br_object_read_sign(linked_inode, fd, object, &iatt);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_READ_AND_SIGN_FAILED,
+ "gfid=%s", uuid_utoa(linked_inode->gfid), NULL);
+ goto unref_fd;
+ }
+
+ ret = 0;
+
+unref_fd:
+ fd_unref(fd);
+unref_inode:
+ inode_unref(linked_inode);
+out:
+ return ret;
+}
+
+static br_object_t *
+__br_pick_object(br_private_t *priv)
+{
+ br_object_t *object = NULL;
+
+ while (list_empty(&priv->obj_queue->objects)) {
+ pthread_cond_wait(&priv->object_cond, &priv->lock);
+ }
+
+ object = list_first_entry(&priv->obj_queue->objects, br_object_t, list);
+ list_del_init(&object->list);
+
+ return object;
+}
+
+/**
+ * This is the place where the signing of the objects is triggered.
+ */
+void *
+br_process_object(void *arg)
+{
+ xlator_t *this = NULL;
+ br_object_t *object = NULL;
+ br_private_t *priv = NULL;
+ int32_t ret = -1;
+
+ this = arg;
+ priv = this->private;
+
+ THIS = this;
+
+ for (;;) {
+ pthread_mutex_lock(&priv->lock);
+ {
+ object = __br_pick_object(priv);
+ }
+ pthread_mutex_unlock(&priv->lock);
+
+ ret = br_sign_object(object);
+ if (ret && !br_object_sign_softerror(-ret))
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_SIGN_FAILED,
+ "gfid=%s", uuid_utoa(object->gfid), NULL);
+ GF_FREE(object);
+ }
+
+ return NULL;
+}
+
+/**
+ * This function gets kicked in once the object is expired from the
+ * timer wheel. This actually adds the object received via notification
+ * from the changelog to the queue from where the objects gets picked
+ * up for signing.
+ *
+ * This routine can be made lightweight by introducing an alternate
+ * timer-wheel API that dispatches _all_ expired objects in one-shot
+ * rather than an object at-a-time. This routine can then just simply
+ * be a call to list_splice_tail().
+ *
+ * NOTE: use call_time to instrument signing time in br_sign_object().
+ */
+void
+br_add_object_to_queue(struct gf_tw_timer_list *timer, void *data,
+ unsigned long call_time)
+{
+ br_object_t *object = NULL;
+ xlator_t *this = NULL;
+ br_private_t *priv = NULL;
+
+ object = data;
+ this = object->this;
+ priv = this->private;
+
+ THIS = this;
+
+ pthread_mutex_lock(&priv->lock);
+ {
+ list_add_tail(&object->list, &priv->obj_queue->objects);
+ pthread_cond_broadcast(&priv->object_cond);
+ }
+ pthread_mutex_unlock(&priv->lock);
+
+ if (timer)
+ mem_put(timer);
+ return;
+}
+
+static br_object_t *
+br_initialize_object(xlator_t *this, br_child_t *child, changelog_event_t *ev)
+{
+ br_object_t *object = NULL;
+
+ object = GF_CALLOC(1, sizeof(*object), gf_br_mt_br_object_t);
+ if (!object)
+ goto out;
+ INIT_LIST_HEAD(&object->list);
+
+ object->this = this;
+ object->child = child;
+ gf_uuid_copy(object->gfid, ev->u.releasebr.gfid);
+
+ /* NOTE: it's BE, but no worry */
+ object->signedversion = ev->u.releasebr.version;
+ object->sign_info = ev->u.releasebr.sign_info;
+
+out:
+ return object;
+}
+
+static struct gf_tw_timer_list *
+br_initialize_timer(xlator_t *this, br_object_t *object, br_child_t *child,
+ changelog_event_t *ev)
+{
+ br_private_t *priv = NULL;
+ struct gf_tw_timer_list *timer = NULL;
+
+ priv = this->private;
+
+ timer = mem_get0(child->timer_pool);
+ if (!timer)
+ goto out;
+ INIT_LIST_HEAD(&timer->entry);
+
+ timer->expires = priv->expiry_time;
+ if (!timer->expires)
+ timer->expires = 1;
+
+ timer->data = object;
+ timer->function = br_add_object_to_queue;
+ gf_tw_add_timer(priv->timer_wheel, timer);
+
+out:
+ return timer;
+}
+
+static int32_t
+br_schedule_object_reopen(xlator_t *this, br_object_t *object,
+ br_child_t *child, changelog_event_t *ev)
+{
+ struct gf_tw_timer_list *timer = NULL;
+
+ timer = br_initialize_timer(this, object, child, ev);
+ if (!timer)
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SET_TIMER_FAILED,
+ "gfid=%s", uuid_utoa(object->gfid), NULL);
+ return timer ? 0 : -1;
+}
+
+static int32_t
+br_object_quicksign(xlator_t *this, br_object_t *object)
+{
+ br_add_object_to_queue(NULL, object, 0ULL);
+ return 0;
+}
+
+/**
+ * This callback function registered with the changelog is executed
+ * whenever a notification from the changelog is received. This should
+ * add the object (or the gfid) on which the notification has come to
+ * the timer-wheel with some expiry time.
+ *
+ * TODO: use mem-pool for allocations and maybe allocate timer and
+ * object as a single alloc and bifurcate their respective pointers.
+ */
+void
+br_brick_callback(void *xl, char *brick, void *data, changelog_event_t *ev)
+{
+ int32_t ret = 0;
+ uuid_t gfid = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ br_object_t *object = NULL;
+ br_child_t *child = NULL;
+ br_sign_state_t sign_info = BR_SIGN_INVALID;
+
+ this = xl;
+
+ GF_VALIDATE_OR_GOTO(this->name, ev, out);
+ GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ GF_ASSERT(ev->ev_type == CHANGELOG_OP_TYPE_BR_RELEASE);
+ GF_ASSERT(!gf_uuid_is_null(ev->u.releasebr.gfid));
+
+ gf_uuid_copy(gfid, ev->u.releasebr.gfid);
+
+ gf_msg_debug(this->name, 0, "RELEASE EVENT [GFID %s]", uuid_utoa(gfid));
+
+ child = br_get_child_from_brick_path(this, brick);
+ if (!child) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SUBVOL_FAILED,
+ "brick=%s", brick, NULL);
+ goto out;
+ }
+
+ object = br_initialize_object(this, child, ev);
+ if (!object) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY,
+ "object-gfid=%s", uuid_utoa(gfid), NULL);
+ goto out;
+ }
+
+ /* sanity check */
+ sign_info = ntohl(object->sign_info);
+ GF_ASSERT(sign_info != BR_SIGN_NORMAL);
+
+ if (sign_info == BR_SIGN_REOPEN_WAIT)
+ ret = br_schedule_object_reopen(this, object, child, ev);
+ else
+ ret = br_object_quicksign(this, object);
+
+ if (ret)
+ goto free_object;
+
+ gf_msg_debug(this->name, 0, "->callback: brick [%s], type [%d]\n", brick,
+ ev->ev_type);
+ return;
+
+free_object:
+ GF_FREE(object);
+out:
+ return;
+}
+
+void
+br_fill_brick_spec(struct gf_brick_spec *brick, char *path)
+{
+ brick->brick_path = gf_strdup(path);
+ brick->filter = CHANGELOG_OP_TYPE_BR_RELEASE;
+
+ brick->init = br_brick_init;
+ brick->fini = br_brick_fini;
+ brick->callback = br_brick_callback;
+ brick->connected = NULL;
+ brick->disconnected = NULL;
+}
+
+static gf_boolean_t
+br_check_object_need_sign(xlator_t *this, dict_t *xattr, br_child_t *child)
+{
+ int32_t ret = -1;
+ gf_boolean_t need_sign = _gf_false;
+ br_isignature_out_t *sign = NULL;
+
+ GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, xattr, out);
+ GF_VALIDATE_OR_GOTO(this->name, child, out);
+
+ ret = dict_get_ptr(xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void **)&sign);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_SIGN_FAILED,
+ "object-info", NULL);
+ goto out;
+ }
+
+ /* Object has been opened and hence dirty. Do not sign it */
+ if (sign->stale)
+ need_sign = _gf_true;
+
+out:
+ return need_sign;
+}
+
+int32_t
+br_prepare_loc(xlator_t *this, br_child_t *child, loc_t *parent,
+ gf_dirent_t *entry, loc_t *loc)
+{
+ int32_t ret = -1;
+ inode_t *inode = NULL;
+
+ inode = inode_grep(child->table, parent->inode, entry->d_name);
+ if (!inode)
+ loc->inode = inode_new(child->table);
+ else {
+ loc->inode = inode;
+ if (loc->inode->ia_type != IA_IFREG) {
+ gf_msg_debug(this->name, 0,
+ "%s is not a regular "
+ "file",
+ entry->d_name);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ loc->parent = inode_ref(parent->inode);
+ gf_uuid_copy(loc->pargfid, parent->inode->gfid);
+
+ ret = inode_path(parent->inode, entry->d_name, (char **)&loc->path);
+ if (ret < 0 || !loc->path) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_PATH_FAILED,
+ "inode_path=%s", entry->d_name, "parent-gfid=%s",
+ uuid_utoa(parent->inode->gfid), NULL);
+ goto out;
+ }
+
+ loc->name = strrchr(loc->path, '/');
+ if (loc->name)
+ loc->name++;
+
+ ret = 1;
+
+out:
+ return ret;
+}
+
+/**
+ * Oneshot crawler
+ * ---------------
+ * This is a catchup mechanism. Objects that remained unsigned from the
+ * last run for whatever reason (node crashes, reboots, etc..) become
+ * candidates for signing. This allows the signature to "catch up" with
+ * the current state of the object. Triggering signing is easy: perform
+ * an open() followed by a close() thereby resulting in call boomerang.
+ * (though not back to itself :))
+ */
+int
+bitd_oneshot_crawl(xlator_t *subvol, gf_dirent_t *entry, loc_t *parent,
+ void *data)
+{
+ int op_errno = 0;
+ br_child_t *child = NULL;
+ xlator_t *this = NULL;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt iatt = {
+ 0,
+ };
+ struct iatt parent_buf = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ int32_t ret = -1;
+ inode_t *linked_inode = NULL;
+ gf_boolean_t need_signing = _gf_false;
+ gf_boolean_t need_reopen = _gf_true;
+
+ GF_VALIDATE_OR_GOTO("bit-rot", subvol, out);
+ GF_VALIDATE_OR_GOTO("bit-rot", data, out);
+
+ child = data;
+ this = child->this;
+
+ ret = br_prepare_loc(this, child, parent, entry, &loc);
+ if (!ret)
+ goto out;
+
+ ret = syncop_lookup(child->xl, &loc, &iatt, &parent_buf, NULL, NULL);
+ if (ret) {
+ br_log_object_path(this, "lookup", loc.path, -ret);
+ goto out;
+ }
+
+ linked_inode = inode_link(loc.inode, parent->inode, loc.name, &iatt);
+ if (linked_inode)
+ inode_lookup(linked_inode);
+
+ if (iatt.ia_type != IA_IFREG) {
+ gf_msg_debug(this->name, 0,
+ "%s is not a regular file, "
+ "skipping..",
+ entry->d_name);
+ ret = 0;
+ goto unref_inode;
+ }
+
+ /**
+ * As of now, 2 cases are possible and handled.
+ * 1) GlusterFS is upgraded from a previous version which does not
+ * have any idea about bit-rot and have data in the filesystem.
+ * In this case syncop_getxattr fails with ENODATA and the object
+ * is signed. (In real, when crawler sends lookup, bit-rot-stub
+ * creates the xattrs before returning lookup reply)
+ * 2) Bit-rot was not enabled or BitD was does for some reasons, during
+ * which some files were created, but since BitD was down, were not
+ * signed.
+ * If the file was just created and was being written some data when
+ * the down BitD came up, then bit-rot stub should be intelligent to
+ * identify this case (by comparing the ongoing version or by checking
+ * if there are any fds present for that inode) and handle properly.
+ */
+
+ if (bitd_is_bad_file(this, child, &loc, NULL)) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SKIP_OBJECT, "path=%s",
+ loc.path, NULL);
+ goto unref_inode;
+ }
+
+ ret = syncop_getxattr(child->xl, &loc, &xattr,
+ GLUSTERFS_GET_OBJECT_SIGNATURE, NULL, NULL);
+ if (ret < 0) {
+ op_errno = -ret;
+ br_log_object(this, "getxattr", linked_inode->gfid, op_errno);
+
+ /**
+ * No need to sign the zero byte objects as the signing
+ * happens upon first modification of the object.
+ */
+ if (op_errno == ENODATA && (iatt.ia_size != 0))
+ need_signing = _gf_true;
+ if (op_errno == EINVAL)
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ BRB_MSG_PARTIAL_VERSION_PRESENCE, "gfid=%s",
+ uuid_utoa(linked_inode->gfid), NULL);
+ } else {
+ need_signing = br_check_object_need_sign(this, xattr, child);
+
+ /*
+ * If we are here means, bitrot daemon has started. Is it just
+ * a simple restart of the daemon or is it started because the
+ * feature is enabled is something hard to determine. Hence,
+ * if need_signing is false (because bit-rot version and signature
+ * are present), then still go ahead and sign it.
+ */
+ if (!need_signing) {
+ need_signing = _gf_true;
+ need_reopen = _gf_true;
+ }
+ }
+
+ if (!need_signing)
+ goto unref_dict;
+
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_TRIGGER_SIGN, "path=%s",
+ loc.path, "gfid=%s", uuid_utoa(linked_inode->gfid), "Brick-path=%s",
+ child->brick_path, NULL);
+ br_trigger_sign(this, child, linked_inode, &loc, need_reopen);
+
+ ret = 0;
+
+unref_dict:
+ if (xattr)
+ dict_unref(xattr);
+unref_inode:
+ inode_unref(linked_inode);
+out:
+ loc_wipe(&loc);
+
+ return ret;
+}
+
+#define BR_CRAWL_THROTTLE_COUNT 50
+#define BR_CRAWL_THROTTLE_ZZZ 5
+
+void *
+br_oneshot_signer(void *arg)
+{
+ loc_t loc = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ br_child_t *child = NULL;
+
+ child = arg;
+ this = child->this;
+
+ THIS = this;
+
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_START, "brick-path=%s",
+ child->brick_path, NULL);
+
+ loc.inode = child->table->root;
+ (void)syncop_ftw_throttle(child->xl, &loc, GF_CLIENT_PID_BITD, child,
+ bitd_oneshot_crawl, BR_CRAWL_THROTTLE_COUNT,
+ BR_CRAWL_THROTTLE_ZZZ);
+
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CRAWLING_FINISH,
+ "brick-path=%s", child->brick_path, NULL);
+
+ return NULL;
+}
+
+static void
+br_set_child_state(br_child_t *child, br_child_state_t state)
+{
+ pthread_mutex_lock(&child->lock);
+ {
+ _br_set_child_state(child, state);
+ }
+ pthread_mutex_unlock(&child->lock);
+}
+
+/**
+ * At this point a thread is spawned to crawl the filesystem (in
+ * tortoise pace) to sign objects that were not signed in previous run(s).
+ * Such objects are identified by examining it's dirtyness and timestamp.
+ *
+ * pick object:
+ * signature_is_stale() && (object_timestamp() <= stub_init_time())
+ *
+ * Also, we register to the changelog library to subscribe for event
+ * notifications.
+ */
+static int32_t
+br_enact_signer(xlator_t *this, br_child_t *child, br_stub_init_t *stub)
+{
+ int32_t ret = 0;
+ br_private_t *priv = NULL;
+ struct gf_brick_spec *brick = NULL;
+
+ priv = this->private;
+
+ brick = GF_CALLOC(1, sizeof(struct gf_brick_spec),
+ gf_common_mt_gf_brick_spec_t);
+ if (!brick)
+ goto error_return;
+
+ br_fill_brick_spec(brick, stub->export);
+ ret = gf_changelog_register_generic(brick, 1, 1,
+ this->ctx->cmd_args.log_file, -1, this);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRB_MSG_REGISTER_FAILED, NULL);
+ goto dealloc;
+ }
+
+ child->threadrunning = 0;
+ ret = gf_thread_create(&child->thread, NULL, br_oneshot_signer, child,
+ "brosign");
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_SPAWN_FAILED,
+ "FS-crawler-thread", NULL);
+ else
+ child->threadrunning = 1;
+
+ /* it's OK to continue, "old" objects would be signed when modified */
+ list_add_tail(&child->list, &priv->signing);
+ return 0;
+
+dealloc:
+ GF_FREE(brick);
+error_return:
+ return -1;
+}
+
+static int32_t
+br_launch_scrubber(xlator_t *this, br_child_t *child, struct br_scanfs *fsscan,
+ struct br_scrubber *fsscrub)
+{
+ int32_t ret = -1;
+ br_private_t *priv = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+
+ scrub_monitor = &priv->scrub_monitor;
+ ret = gf_thread_create(&child->thread, NULL, br_fsscanner, child,
+ "brfsscan");
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ALERT, 0, BRB_MSG_SPAWN_FAILED,
+ "bitrot-scrubber-daemon Brick-path=%s", child->brick_path,
+ NULL);
+ goto error_return;
+ }
+
+ /* Signal monitor to kick off state machine*/
+ pthread_mutex_lock(&scrub_monitor->mutex);
+ {
+ if (!scrub_monitor->inited)
+ pthread_cond_signal(&scrub_monitor->cond);
+ scrub_monitor->inited = _gf_true;
+ }
+ pthread_mutex_unlock(&scrub_monitor->mutex);
+
+ /**
+ * Everything has been setup.. add this subvolume to scrubbers
+ * list.
+ */
+ pthread_mutex_lock(&fsscrub->mutex);
+ {
+ list_add_tail(&child->list, &fsscrub->scrublist);
+ pthread_cond_broadcast(&fsscrub->cond);
+ }
+ pthread_mutex_unlock(&fsscrub->mutex);
+
+ return 0;
+
+error_return:
+ return -1;
+}
+
+static int32_t
+br_enact_scrubber(xlator_t *this, br_child_t *child)
+{
+ int32_t ret = 0;
+ br_private_t *priv = NULL;
+ struct br_scanfs *fsscan = NULL;
+ struct br_scrubber *fsscrub = NULL;
+
+ priv = this->private;
+
+ fsscan = &child->fsscan;
+ fsscrub = &priv->fsscrub;
+
+ /**
+ * if this child already witnesses a successful connection earlier
+ * there's no need to initialize mutexes, condvars, etc..
+ */
+ if (_br_child_witnessed_connection(child))
+ return br_launch_scrubber(this, child, fsscan, fsscrub);
+
+ LOCK_INIT(&fsscan->entrylock);
+ pthread_mutex_init(&fsscan->waitlock, NULL);
+ pthread_cond_init(&fsscan->waitcond, NULL);
+
+ fsscan->entries = 0;
+ INIT_LIST_HEAD(&fsscan->queued);
+ INIT_LIST_HEAD(&fsscan->ready);
+
+ ret = br_launch_scrubber(this, child, fsscan, fsscrub);
+ if (ret)
+ goto error_return;
+
+ return 0;
+
+error_return:
+ LOCK_DESTROY(&fsscan->entrylock);
+ pthread_mutex_destroy(&fsscan->waitlock);
+ pthread_cond_destroy(&fsscan->waitcond);
+
+ return -1;
+}
+
+static int32_t
+br_child_enaction(xlator_t *this, br_child_t *child, br_stub_init_t *stub)
+{
+ int32_t ret = -1;
+ br_private_t *priv = this->private;
+
+ pthread_mutex_lock(&child->lock);
+ {
+ if (priv->iamscrubber)
+ ret = br_enact_scrubber(this, child);
+ else
+ ret = br_enact_signer(this, child, stub);
+
+ if (!ret) {
+ child->witnessed = 1;
+ _br_set_child_state(child, BR_CHILD_STATE_CONNECTED);
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_CONNECTED_TO_BRICK,
+ "brick-path=%s", child->brick_path, NULL);
+ }
+ }
+ pthread_mutex_unlock(&child->lock);
+
+ return ret;
+}
+
+/**
+ * This routine fetches various attributes associated with a child which
+ * is basically a subvolume. Attributes include brick path and the stub
+ * birth time. This is done by performing a lookup on the root followed
+ * by getxattr() on a virtual key. Depending on the configuration, the
+ * process either acts as a signer or a scrubber.
+ */
+int32_t
+br_brick_connect(xlator_t *this, br_child_t *child)
+{
+ int32_t ret = -1;
+ loc_t loc = {
+ 0,
+ };
+ struct iatt buf = {
+ 0,
+ };
+ struct iatt parent = {
+ 0,
+ };
+ br_stub_init_t *stub = NULL;
+ dict_t *xattr = NULL;
+ int op_errno = 0;
+
+ GF_VALIDATE_OR_GOTO("bit-rot", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, child, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ br_child_set_scrub_state(child, _gf_false);
+ br_set_child_state(child, BR_CHILD_STATE_INITIALIZING);
+
+ loc.inode = inode_ref(child->table->root);
+ gf_uuid_copy(loc.gfid, loc.inode->gfid);
+ loc.path = gf_strdup("/");
+
+ ret = syncop_lookup(child->xl, &loc, &buf, &parent, NULL, NULL);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_LOOKUP_FAILED,
+ NULL);
+ goto wipeloc;
+ }
+
+ ret = syncop_getxattr(child->xl, &loc, &xattr,
+ GLUSTERFS_GET_BR_STUB_INIT_TIME, NULL, NULL);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, BRB_MSG_GET_INFO_FAILED,
+ NULL);
+ goto wipeloc;
+ }
+
+ ret = dict_get_ptr(xattr, GLUSTERFS_GET_BR_STUB_INIT_TIME, (void **)&stub);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_GET_INFO_FAILED, NULL);
+ goto free_dict;
+ }
+
+ memcpy(child->brick_path, stub->export, strlen(stub->export) + 1);
+ child->tv.tv_sec = ntohl(stub->timebuf[0]);
+ child->tv.tv_usec = ntohl(stub->timebuf[1]);
+
+ ret = br_child_enaction(this, child, stub);
+
+free_dict:
+ dict_unref(xattr);
+wipeloc:
+ loc_wipe(&loc);
+out:
+ if (ret)
+ br_set_child_state(child, BR_CHILD_STATE_CONNFAILED);
+ return ret;
+}
+
+/* TODO: cleanup signer */
+static int32_t
+br_cleanup_signer(xlator_t *this, br_child_t *child)
+{
+ return 0;
+}
+
+static int32_t
+br_cleanup_scrubber(xlator_t *this, br_child_t *child)
+{
+ int32_t ret = 0;
+ br_private_t *priv = NULL;
+ struct br_scrubber *fsscrub = NULL;
+ struct br_monitor *scrub_monitor = NULL;
+
+ priv = this->private;
+ fsscrub = &priv->fsscrub;
+ scrub_monitor = &priv->scrub_monitor;
+
+ if (_br_is_child_scrub_active(child)) {
+ scrub_monitor->active_child_count--;
+ br_child_set_scrub_state(child, _gf_false);
+ }
+
+ /**
+ * 0x0: child (brick) goes out of rotation
+ *
+ * This is fully safe w.r.t. entries for this child being actively
+ * scrubbed. Each of the scrubber thread(s) would finish scrubbing
+ * the entry (probably failing due to disconnection) and either
+ * putting the entry back into the queue or continuing further.
+ * Either way, pending entries for this child's queue need not be
+ * drained; entries just sit there in the queued/ready list to be
+ * consumed later upon re-connection.
+ */
+ pthread_mutex_lock(&fsscrub->mutex);
+ {
+ list_del_init(&child->list);
+ }
+ pthread_mutex_unlock(&fsscrub->mutex);
+
+ /**
+ * 0x1: cleanup scanner thread
+ *
+ * The pending timer needs to be removed _after_ cleaning up the
+ * filesystem scanner (scheduling the next scrub time is not a
+ * cancellation point).
+ */
+ ret = gf_thread_cleanup_xint(child->thread);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUB_THREAD_CLEANUP, NULL);
+
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_SCRUBBER_CLEANED,
+ "brick-path=%s", child->brick_path, NULL);
+
+ return 0;
+}
+
+/**
+ * OK.. this child has made it's mind to go down the drain. So,
+ * let's clean up what it touched. (NOTE: there's no need to clean
+ * the inode table, it's just reused taking care of stale inodes)
+ */
+int32_t
+br_brick_disconnect(xlator_t *this, br_child_t *child)
+{
+ int32_t ret = 0;
+ struct br_monitor *scrub_monitor = NULL;
+ br_private_t *priv = this->private;
+
+ scrub_monitor = &priv->scrub_monitor;
+
+ /* Lock order should be wakelock and then child lock to
+ * dead locks.
+ */
+ pthread_mutex_lock(&scrub_monitor->wakelock);
+ {
+ pthread_mutex_lock(&child->lock);
+ {
+ if (!_br_is_child_connected(child))
+ goto unblock;
+
+ /* child is on death row.. */
+ _br_set_child_state(child, BR_CHILD_STATE_DISCONNECTED);
+
+ if (priv->iamscrubber)
+ ret = br_cleanup_scrubber(this, child);
+ else
+ ret = br_cleanup_signer(this, child);
+ }
+ unblock:
+ pthread_mutex_unlock(&child->lock);
+ }
+ pthread_mutex_unlock(&scrub_monitor->wakelock);
+
+ return ret;
+}
+
+/**
+ * This function is executed in a separate thread. The thread gets the
+ * brick from where CHILD_UP has received from the queue and gets the
+ * information regarding that brick (such as brick path).
+ */
+void *
+br_handle_events(void *arg)
+{
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+ br_private_t *priv = NULL;
+ br_child_t *child = NULL;
+ struct br_child_event *childev = NULL;
+
+ this = arg;
+ priv = this->private;
+
+ /*
+ * Since, this is the topmost xlator, THIS has to be set by bit-rot
+ * xlator itself (STACK_WIND won't help in this case). Also it has
+ * to be done for each thread that gets spawned. Otherwise, a new
+ * thread will get global_xlator's pointer when it does "THIS".
+ */
+ THIS = this;
+
+ while (1) {
+ pthread_mutex_lock(&priv->lock);
+ {
+ while (list_empty(&priv->bricks))
+ pthread_cond_wait(&priv->cond, &priv->lock);
+
+ childev = list_first_entry(&priv->bricks, struct br_child_event,
+ list);
+ list_del_init(&childev->list);
+ }
+ pthread_mutex_unlock(&priv->lock);
+
+ child = childev->child;
+ ret = childev->call(this, child);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_SUBVOL_CONNECT_FAILED,
+ "name=%s", child->xl->name, NULL);
+ GF_FREE(childev);
+ }
+
+ return NULL;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int32_t ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_br_stub_mt_end + 1);
+
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRB_MSG_MEM_ACNT_FAILED, NULL);
+ return ret;
+ }
+
+ return ret;
+}
+
+static void
+_br_qchild_event(xlator_t *this, br_child_t *child, br_child_handler *call)
+{
+ br_private_t *priv = NULL;
+ struct br_child_event *childev = NULL;
+
+ priv = this->private;
+
+ childev = GF_CALLOC(1, sizeof(*childev), gf_br_mt_br_child_event_t);
+ if (!childev) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_EVENT_UNHANDLED,
+ "Brick-name=%s", child->xl->name, NULL);
+ return;
+ }
+
+ INIT_LIST_HEAD(&childev->list);
+ childev->this = this;
+ childev->child = child;
+ childev->call = call;
+
+ list_add_tail(&childev->list, &priv->bricks);
+}
+
+int
+br_scrubber_status_get(xlator_t *this, dict_t **dict)
+{
+ int ret = -1;
+ br_private_t *priv = NULL;
+ struct br_scrub_stats *scrub_stats = NULL;
+
+ priv = this->private;
+
+ GF_VALIDATE_OR_GOTO("bit-rot", priv, out);
+
+ scrub_stats = &priv->scrub_stat;
+
+ ret = br_get_bad_objects_list(this, dict);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Failed to collect corrupt "
+ "files");
+ }
+
+ ret = dict_set_int8(*dict, "scrub-running", scrub_stats->scrub_running);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Failed setting scrub_running "
+ "entry to the dictionary");
+ }
+
+ ret = dict_set_uint64(*dict, "scrubbed-files", scrub_stats->scrubbed_files);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Failed to setting scrubbed file "
+ "entry to the dictionary");
+ }
+
+ ret = dict_set_uint64(*dict, "unsigned-files", scrub_stats->unsigned_files);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Failed to set unsigned file count"
+ " entry to the dictionary");
+ }
+
+ ret = dict_set_uint64(*dict, "scrub-duration", scrub_stats->scrub_duration);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Failed to set scrub duration"
+ " entry to the dictionary");
+ }
+
+ ret = dict_set_dynstr_with_alloc(*dict, "last-scrub-time",
+ scrub_stats->last_scrub_time);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Failed to set "
+ "last scrub time value");
+ }
+
+out:
+ return ret;
+}
+
+int
+notify(xlator_t *this, int32_t event, void *data, ...)
+{
+ int idx = -1;
+ int ret = -1;
+ xlator_t *subvol = NULL;
+ br_child_t *child = NULL;
+ br_private_t *priv = NULL;
+ dict_t *output = NULL;
+ va_list ap;
+ struct br_monitor *scrub_monitor = NULL;
+
+ subvol = (xlator_t *)data;
+ priv = this->private;
+ scrub_monitor = &priv->scrub_monitor;
+
+ gf_msg_trace(this->name, 0, "Notification received: %d", event);
+
+ idx = br_find_child_index(this, subvol);
+
+ switch (event) {
+ case GF_EVENT_CHILD_UP:
+ if (idx < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL,
+ "event=%d", event, NULL);
+ goto out;
+ }
+
+ pthread_mutex_lock(&priv->lock);
+ {
+ child = &priv->children[idx];
+ if (child->child_up == 1)
+ goto unblock_0;
+ priv->up_children++;
+
+ child->child_up = 1;
+ child->xl = subvol;
+ if (!child->table)
+ child->table = inode_table_new(4096, subvol);
+
+ _br_qchild_event(this, child, br_brick_connect);
+ pthread_cond_signal(&priv->cond);
+ }
+ unblock_0:
+ pthread_mutex_unlock(&priv->lock);
+
+ if (priv->up_children == priv->child_count)
+ default_notify(this, event, data);
+ break;
+
+ case GF_EVENT_CHILD_DOWN:
+ if (idx < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_INVALID_SUBVOL,
+ "event=%d", event, NULL);
+ goto out;
+ }
+
+ pthread_mutex_lock(&priv->lock);
+ {
+ child = &priv->children[idx];
+ if (child->child_up == 0)
+ goto unblock_1;
+
+ child->child_up = 0;
+ priv->up_children--;
+
+ _br_qchild_event(this, child, br_brick_disconnect);
+ pthread_cond_signal(&priv->cond);
+ }
+ unblock_1:
+ pthread_mutex_unlock(&priv->lock);
+
+ if (priv->up_children == 0)
+ default_notify(this, event, data);
+ break;
+
+ case GF_EVENT_SCRUB_STATUS:
+ gf_msg_debug(this->name, GF_LOG_INFO,
+ "BitRot scrub status "
+ "called");
+ va_start(ap, data);
+ output = va_arg(ap, dict_t *);
+ va_end(ap);
+
+ ret = br_scrubber_status_get(this, &output);
+ gf_msg_debug(this->name, 0, "returning %d", ret);
+ break;
+
+ case GF_EVENT_SCRUB_ONDEMAND:
+ gf_log(this->name, GF_LOG_INFO,
+ "BitRot scrub ondemand "
+ "called");
+
+ if (scrub_monitor->state != BR_SCRUB_STATE_PENDING) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRB_MSG_RESCHEDULE_SCRUBBER_FAILED, "Current-state=%d",
+ scrub_monitor->state, NULL);
+ return -2;
+ }
+
+ /* Needs synchronization with reconfigure thread */
+ pthread_mutex_lock(&priv->lock);
+ {
+ ret = br_scrub_state_machine(this, _gf_true);
+ }
+ pthread_mutex_unlock(&priv->lock);
+
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRB_MSG_COULD_NOT_SCHEDULE_SCRUB, NULL);
+ }
+ gf_msg_debug(this->name, 0, "returning %d", ret);
+ break;
+ default:
+ default_notify(this, event, data);
+ }
+
+out:
+ return 0;
+}
+
+static void
+br_fini_signer(xlator_t *this, br_private_t *priv)
+{
+ int i = 0;
+
+ if (priv == NULL)
+ return;
+
+ for (; i < priv->signer_th_count; i++) {
+ (void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]);
+ }
+ GF_FREE(priv->obj_queue->workers);
+
+ pthread_cond_destroy(&priv->object_cond);
+}
+
+/**
+ * Initialize signer specific structures, spawn worker threads.
+ */
+
+static int32_t
+br_init_signer(xlator_t *this, br_private_t *priv)
+{
+ int i = 0;
+ int32_t ret = -1;
+
+ /* initialize gfchangelog xlator context */
+ ret = gf_changelog_init(this);
+ if (ret)
+ goto out;
+
+ pthread_cond_init(&priv->object_cond, NULL);
+
+ priv->obj_queue = GF_CALLOC(1, sizeof(*priv->obj_queue),
+ gf_br_mt_br_ob_n_wk_t);
+ if (!priv->obj_queue)
+ goto cleanup_cond;
+ INIT_LIST_HEAD(&priv->obj_queue->objects);
+
+ priv->obj_queue->workers = GF_CALLOC(
+ priv->signer_th_count, sizeof(pthread_t), gf_br_mt_br_worker_t);
+ if (!priv->obj_queue->workers)
+ goto cleanup_obj_queue;
+
+ for (i = 0; i < priv->signer_th_count; i++) {
+ ret = gf_thread_create(&priv->obj_queue->workers[i], NULL,
+ br_process_object, this, "brpobj");
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret,
+ BRB_MSG_THREAD_CREATION_FAILED, NULL);
+ ret = -1;
+ goto cleanup_threads;
+ }
+ }
+
+ return 0;
+
+cleanup_threads:
+ for (i--; i >= 0; i--) {
+ (void)gf_thread_cleanup_xint(priv->obj_queue->workers[i]);
+ }
+ GF_FREE(priv->obj_queue->workers);
+
+cleanup_obj_queue:
+ GF_FREE(priv->obj_queue);
+
+cleanup_cond:
+ /* that's explicit */
+ pthread_cond_destroy(&priv->object_cond);
+out:
+ return -1;
+}
+
+/**
+ * For signer, only rate limit CPU usage (during hash calculation) when
+ * compiled with -DBR_RATE_LIMIT_SIGNER cflags, else let it run full
+ * throttle.
+ */
+static int32_t
+br_rate_limit_signer(xlator_t *this, int child_count, int numbricks)
+{
+ br_private_t *priv = NULL;
+ tbf_opspec_t spec = {
+ 0,
+ };
+
+ priv = this->private;
+
+ spec.op = TBF_OP_HASH;
+ spec.rate = 0;
+ spec.maxlimit = 0;
+
+ /**
+ * OK. Most implementations of TBF I've come across generate tokens
+ * every second (UML, etc..) and some chose sub-second granularity
+ * (blk-iothrottle cgroups). TBF algorithm itself does not enforce
+ * any logic for choosing generation interval and it seems pretty
+ * logical as one could jack up token count per interval w.r.t.
+ * generation rate.
+ *
+ * Value used here is chosen based on a series of test(s) performed
+ * to balance object signing time and not maxing out on all available
+ * CPU cores. It's obvious to have seconds granularity and jack up
+ * token count per interval, thereby achieving close to similar
+ * results. Let's stick to this as it seems to be working fine for
+ * the set of ops that are throttled.
+ **/
+ spec.token_gen_interval = 600000; /* In usec */
+
+#ifdef BR_RATE_LIMIT_SIGNER
+
+ double contribution = 0;
+ contribution = ((double)1 - ((double)child_count / (double)numbricks));
+ if (contribution == 0)
+ contribution = 1;
+ spec.rate = BR_HASH_CALC_READ_SIZE * contribution;
+ spec.maxlimit = priv->signer_th_count * BR_HASH_CALC_READ_SIZE;
+
+#endif
+
+ if (!spec.rate)
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO,
+ "FULL THROTTLE", NULL);
+ else
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_RATE_LIMIT_INFO,
+ "tokens/sec-rate=%lu", spec.rate, "maxlimit=%lu", spec.maxlimit,
+ NULL);
+
+ priv->tbf = tbf_init(&spec, 1);
+ return priv->tbf ? 0 : -1;
+}
+
+static int32_t
+br_signer_handle_options(xlator_t *this, br_private_t *priv, dict_t *options)
+{
+ if (options) {
+ GF_OPTION_RECONF("expiry-time", priv->expiry_time, options, uint32,
+ error_return);
+ GF_OPTION_RECONF("signer-threads", priv->signer_th_count, options,
+ uint32, error_return);
+ } else {
+ GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return);
+ GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32,
+ error_return);
+ }
+
+ return 0;
+
+error_return:
+ return -1;
+}
+
+static int32_t
+br_signer_init(xlator_t *this, br_private_t *priv)
+{
+ int32_t ret = 0;
+ int numbricks = 0;
+
+ GF_OPTION_INIT("expiry-time", priv->expiry_time, uint32, error_return);
+ GF_OPTION_INIT("brick-count", numbricks, int32, error_return);
+ GF_OPTION_INIT("signer-threads", priv->signer_th_count, uint32,
+ error_return);
+
+ ret = br_rate_limit_signer(this, priv->child_count, numbricks);
+ if (ret)
+ goto error_return;
+
+ ret = br_init_signer(this, priv);
+ if (ret)
+ goto cleanup_tbf;
+
+ return 0;
+
+cleanup_tbf:
+ /* cleanup TBF */
+error_return:
+ return -1;
+}
+
+static void
+br_free_scrubber_monitor(xlator_t *this, br_private_t *priv)
+{
+ struct br_monitor *scrub_monitor = &priv->scrub_monitor;
+
+ if (scrub_monitor->timer) {
+ (void)gf_tw_del_timer(priv->timer_wheel, scrub_monitor->timer);
+
+ GF_FREE(scrub_monitor->timer);
+ scrub_monitor->timer = NULL;
+ }
+
+ (void)gf_thread_cleanup_xint(scrub_monitor->thread);
+
+ /* Clean up cond and mutex variables */
+ pthread_mutex_destroy(&scrub_monitor->mutex);
+ pthread_cond_destroy(&scrub_monitor->cond);
+
+ pthread_mutex_destroy(&scrub_monitor->wakelock);
+ pthread_cond_destroy(&scrub_monitor->wakecond);
+
+ pthread_mutex_destroy(&scrub_monitor->donelock);
+ pthread_cond_destroy(&scrub_monitor->donecond);
+
+ LOCK_DESTROY(&scrub_monitor->lock);
+}
+
+static void
+br_free_children(xlator_t *this, br_private_t *priv, int count)
+{
+ br_child_t *child = NULL;
+
+ for (--count; count >= 0; count--) {
+ child = &priv->children[count];
+ mem_pool_destroy(child->timer_pool);
+ pthread_mutex_destroy(&child->lock);
+ }
+
+ GF_FREE(priv->children);
+ priv->children = NULL;
+}
+
+static int
+br_init_children(xlator_t *this, br_private_t *priv)
+{
+ int i = 0;
+ br_child_t *child = NULL;
+ xlator_list_t *trav = NULL;
+
+ priv->child_count = xlator_subvolume_count(this);
+ priv->children = GF_CALLOC(priv->child_count, sizeof(*priv->children),
+ gf_br_mt_br_child_t);
+ if (!priv->children)
+ goto err;
+
+ trav = this->children;
+ while (trav) {
+ child = &priv->children[i];
+
+ pthread_mutex_init(&child->lock, NULL);
+ child->witnessed = 0;
+
+ br_set_child_state(child, BR_CHILD_STATE_DISCONNECTED);
+
+ child->this = this;
+ child->xl = trav->xlator;
+
+ child->timer_pool = mem_pool_new(struct gf_tw_timer_list, 4096);
+ if (!child->timer_pool) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_MEM_POOL_ALLOC,
+ NULL);
+ errno = ENOMEM;
+ goto freechild;
+ }
+
+ INIT_LIST_HEAD(&child->list);
+
+ i++;
+ trav = trav->next;
+ }
+
+ return 0;
+
+freechild:
+ br_free_children(this, priv, i);
+err:
+ return -1;
+}
+
+int32_t
+init(xlator_t *this)
+{
+ int32_t ret = -1;
+ br_private_t *priv = NULL;
+
+ if (!this->children) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_NO_CHILD, NULL);
+ goto out;
+ }
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_br_mt_br_private_t);
+ if (!priv) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRB_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ GF_OPTION_INIT("scrubber", priv->iamscrubber, bool, free_priv);
+
+ ret = br_init_children(this, priv);
+ if (ret)
+ goto free_priv;
+
+ pthread_mutex_init(&priv->lock, NULL);
+ pthread_cond_init(&priv->cond, NULL);
+
+ INIT_LIST_HEAD(&priv->bricks);
+ INIT_LIST_HEAD(&priv->signing);
+
+ priv->timer_wheel = glusterfs_ctx_tw_get(this->ctx);
+ if (!priv->timer_wheel) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_TIMER_WHEEL_UNAVAILABLE,
+ NULL);
+ goto cleanup;
+ }
+
+ this->private = priv;
+
+ if (!priv->iamscrubber) {
+ ret = br_signer_init(this, priv);
+ if (!ret)
+ ret = br_signer_handle_options(this, priv, NULL);
+ } else {
+ ret = br_scrubber_init(this, priv);
+ if (!ret)
+ ret = br_scrubber_handle_options(this, priv, NULL);
+ }
+
+ if (ret)
+ goto cleanup;
+
+ ret = gf_thread_create(&priv->thread, NULL, br_handle_events, this,
+ "brhevent");
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, -ret, BRB_MSG_THREAD_CREATION_FAILED,
+ NULL);
+ ret = -1;
+ }
+
+ if (!ret) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRB_MSG_BITROT_LOADED, "mode=%s",
+ (priv->iamscrubber) ? "SCRUBBER" : "SIGNER", NULL);
+ return 0;
+ }
+
+cleanup:
+ (void)pthread_cond_destroy(&priv->cond);
+ (void)pthread_mutex_destroy(&priv->lock);
+
+ br_free_children(this, priv, priv->child_count);
+
+free_priv:
+ GF_FREE(priv);
+out:
+ this->private = NULL;
+ return -1;
+}
+
+void
+fini(xlator_t *this)
+{
+ br_private_t *priv = this->private;
+
+ if (!priv)
+ return;
+
+ if (!priv->iamscrubber)
+ br_fini_signer(this, priv);
+ else
+ (void)br_free_scrubber_monitor(this, priv);
+
+ br_free_children(this, priv, priv->child_count);
+
+ this->private = NULL;
+ GF_FREE(priv);
+
+ glusterfs_ctx_tw_put(this->ctx);
+
+ return;
+}
+
+static void
+br_reconfigure_monitor(xlator_t *this)
+{
+ int32_t ret = 0;
+
+ ret = br_scrub_state_machine(this, _gf_false);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRB_MSG_COULD_NOT_SCHEDULE_SCRUB,
+ NULL);
+ }
+}
+
+static int
+br_reconfigure_scrubber(xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ br_private_t *priv = NULL;
+
+ priv = this->private;
+
+ pthread_mutex_lock(&priv->lock);
+ {
+ ret = br_scrubber_handle_options(this, priv, options);
+ }
+ pthread_mutex_unlock(&priv->lock);
+
+ if (ret)
+ goto err;
+
+ /* change state for all _up_ subvolume(s) */
+ pthread_mutex_lock(&priv->lock);
+ {
+ br_reconfigure_monitor(this);
+ }
+ pthread_mutex_unlock(&priv->lock);
+
+err:
+ return ret;
+}
+
+static int
+br_reconfigure_signer(xlator_t *this, dict_t *options)
+{
+ br_private_t *priv = this->private;
+
+ return br_signer_handle_options(this, priv, options);
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ int ret = 0;
+ br_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (priv->iamscrubber)
+ ret = br_reconfigure_scrubber(this, options);
+ else
+ ret = br_reconfigure_signer(this, options);
+
+ return ret;
+}
+
+struct xlator_fops fops;
+
+struct xlator_cbks cbks;
+
+struct volume_options options[] = {
+ {
+ .key = {"expiry-time"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = SIGNING_TIMEOUT,
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Waiting time for an object on which it waits "
+ "before it is signed",
+ },
+ {
+ .key = {"brick-count"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Total number of bricks for the current node for "
+ "all volumes in the trusted storage pool.",
+ },
+ {
+ .key = {"scrubber", "scrub"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_FORCE,
+ .description = "option to run as a scrubber",
+ },
+ {
+ .key = {"scrub-throttle"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "lazy",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Scrub-throttle value is a measure of how fast "
+ "or slow the scrubber scrubs the filesystem for "
+ "volume <VOLNAME>",
+ },
+ {
+ .key = {"scrub-freq"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "biweekly",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Scrub frequency for volume <VOLNAME>",
+ },
+ {
+ .key = {"scrub-state"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "active",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Pause/Resume scrub. Upon resume, scrubber "
+ "continues from where it left off.",
+ },
+ {
+ .key = {"signer-threads"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = BR_WORKERS,
+ .op_version = {GD_OP_VERSION_8_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Number of signing process threads. As a best "
+ "practice, set this to the number of processor cores",
+ },
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "bit-rot",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/bit-rot/src/bitd/bit-rot.h b/xlators/features/bit-rot/src/bitd/bit-rot.h
new file mode 100644
index 00000000000..8ac7dcdac3d
--- /dev/null
+++ b/xlators/features/bit-rot/src/bitd/bit-rot.h
@@ -0,0 +1,302 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_H__
+#define __BIT_ROT_H__
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
+#include "changelog.h"
+#include "timer-wheel.h"
+
+#include <glusterfs/throttle-tbf.h>
+#include "bit-rot-ssm.h"
+
+#include "bit-rot-common.h"
+#include "bit-rot-stub-mem-types.h"
+#include "bit-rot-scrub-status.h"
+
+#include <openssl/sha.h>
+
+typedef enum scrub_throttle {
+ BR_SCRUB_THROTTLE_VOID = -1,
+ BR_SCRUB_THROTTLE_LAZY = 0,
+ BR_SCRUB_THROTTLE_NORMAL = 1,
+ BR_SCRUB_THROTTLE_AGGRESSIVE = 2,
+ BR_SCRUB_THROTTLE_STALLED = 3,
+} scrub_throttle_t;
+
+typedef enum scrub_freq {
+ BR_FSSCRUB_FREQ_HOURLY = 1,
+ BR_FSSCRUB_FREQ_DAILY,
+ BR_FSSCRUB_FREQ_WEEKLY,
+ BR_FSSCRUB_FREQ_BIWEEKLY,
+ BR_FSSCRUB_FREQ_MONTHLY,
+ BR_FSSCRUB_FREQ_MINUTE,
+ BR_FSSCRUB_FREQ_STALLED,
+} scrub_freq_t;
+
+#define signature_size(hl) (sizeof(br_isignature_t) + hl + 1)
+
+struct br_scanfs {
+ gf_lock_t entrylock;
+
+ pthread_mutex_t waitlock;
+ pthread_cond_t waitcond;
+
+ unsigned int entries;
+ struct list_head queued;
+ struct list_head ready;
+};
+
+/* just need three states to track child status */
+typedef enum br_child_state {
+ BR_CHILD_STATE_CONNECTED = 1,
+ BR_CHILD_STATE_INITIALIZING,
+ BR_CHILD_STATE_CONNFAILED,
+ BR_CHILD_STATE_DISCONNECTED,
+} br_child_state_t;
+
+struct br_child {
+ pthread_mutex_t lock; /* protects child state */
+ char witnessed; /* witnessed at least one successful
+ connection */
+ br_child_state_t c_state; /* current state of this child */
+
+ char child_up; /* Indicates whether this child is
+ up or not */
+ xlator_t *xl; /* client xlator corresponding to
+ this child */
+ inode_table_t *table; /* inode table for this child */
+ char brick_path[PATH_MAX]; /* brick export directory of this
+ child */
+ struct list_head list; /* hook to attach to the list of
+ UP children */
+ xlator_t *this; /* Bit rot xlator */
+
+ pthread_t thread; /* initial crawler for unsigned
+ object(s) or scrub crawler */
+ int threadrunning; /* active thread */
+
+ struct mem_pool *timer_pool; /* timer-wheel's timer mem-pool */
+
+ struct timeval tv;
+
+ struct br_scanfs fsscan; /* per subvolume FS scanner */
+
+ gf_boolean_t active_scrubbing; /* Actively scrubbing or not */
+};
+
+typedef struct br_child br_child_t;
+
+struct br_obj_n_workers {
+ struct list_head objects; /* queue of objects expired from the
+ timer wheel and ready to be picked
+ up for signing */
+ pthread_t *workers; /* Threads which pick up the objects
+ from the above queue and start
+ signing each object */
+};
+
+struct br_scrubber {
+ xlator_t *this;
+
+ scrub_throttle_t throttle;
+
+ /**
+ * frequency of scanning for this subvolume. this should
+ * normally be per-child, but since all children follow the
+ * same frequency for a volume, this option ends up here
+ * instead of br_child_t.
+ */
+ scrub_freq_t frequency;
+
+ gf_boolean_t frequency_reconf;
+ gf_boolean_t throttle_reconf;
+
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+
+ unsigned int nr_scrubbers;
+ struct list_head scrubbers;
+
+ /**
+ * list of "rotatable" subvolume(s) undergoing scrubbing
+ */
+ struct list_head scrublist;
+};
+
+struct br_monitor {
+ gf_lock_t lock;
+ pthread_t thread; /* Monitor thread */
+
+ gf_boolean_t inited;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond; /* Thread starts and will be waiting on cond.
+ First child which is up wakes this up */
+
+ xlator_t *this;
+ /* scheduler */
+ uint32_t boot;
+
+ int32_t active_child_count; /* Number of children currently scrubbing */
+ gf_boolean_t kick; /* This variable tracks the scrubber is
+ * kicked or not. Both 'kick' and
+ * 'active_child_count' uses the same pair
+ * of mutex-cond variable, i.e, wakelock and
+ * wakecond. */
+
+ pthread_mutex_t wakelock;
+ pthread_cond_t wakecond;
+
+ gf_boolean_t done;
+ pthread_mutex_t donelock;
+ pthread_cond_t donecond;
+
+ struct gf_tw_timer_list *timer;
+ br_scrub_state_t state; /* current scrub state */
+};
+
+typedef struct br_obj_n_workers br_obj_n_workers_t;
+
+typedef struct br_private br_private_t;
+
+typedef void (*br_scrubbed_file_update)(br_private_t *priv);
+
+struct br_private {
+ pthread_mutex_t lock;
+
+ struct list_head bricks; /* list of bricks from which enents
+ have been received */
+
+ struct list_head signing;
+
+ pthread_cond_t object_cond; /* handling signing of objects */
+ int child_count;
+ br_child_t *children; /* list of subvolumes */
+ int up_children;
+
+ pthread_cond_t cond; /* handling CHILD_UP notifications */
+ pthread_t thread; /* thread for connecting each UP
+ child with changelog */
+
+ struct tvec_base *timer_wheel; /* timer wheel where the objects which
+ changelog has sent sits and waits
+ for expiry */
+ br_obj_n_workers_t *obj_queue; /* place holder for all the objects
+ that are expired from timer wheel
+ and ready to be picked up for
+ signing and the workers which sign
+ the objects */
+
+ uint32_t expiry_time; /* objects "wait" time */
+
+ uint32_t signer_th_count; /* Number of signing process threads */
+
+ tbf_t *tbf; /* token bucket filter */
+
+ gf_boolean_t iamscrubber; /* function as a fs scrubber */
+
+ struct br_scrub_stats scrub_stat; /* statistics of scrub*/
+
+ struct br_scrubber fsscrub; /* scrubbers for this subvolume */
+
+ struct br_monitor scrub_monitor; /* scrubber monitor */
+};
+
+struct br_object {
+ xlator_t *this;
+
+ uuid_t gfid;
+
+ unsigned long signedversion; /* version against which this object will
+ be signed */
+ br_child_t *child; /* object's subvolume */
+
+ int sign_info;
+
+ struct list_head list; /* hook to add to the queue once the
+ object is expired from timer wheel */
+ void *data;
+};
+
+typedef struct br_object br_object_t;
+typedef int32_t(br_scrub_ssm_call)(xlator_t *);
+
+void
+br_log_object(xlator_t *, char *, uuid_t, int32_t);
+
+void
+br_log_object_path(xlator_t *, char *, const char *, int32_t);
+
+int32_t
+br_calculate_obj_checksum(unsigned char *, br_child_t *, fd_t *, struct iatt *);
+
+int32_t
+br_prepare_loc(xlator_t *, br_child_t *, loc_t *, gf_dirent_t *, loc_t *);
+
+gf_boolean_t
+bitd_is_bad_file(xlator_t *, br_child_t *, loc_t *, fd_t *);
+
+static inline void
+_br_set_child_state(br_child_t *child, br_child_state_t state)
+{
+ child->c_state = state;
+}
+
+static inline int
+_br_is_child_connected(br_child_t *child)
+{
+ return (child->c_state == BR_CHILD_STATE_CONNECTED);
+}
+
+static inline int
+_br_is_child_scrub_active(br_child_t *child)
+{
+ return child->active_scrubbing;
+}
+
+static inline int
+_br_child_failed_conn(br_child_t *child)
+{
+ return (child->c_state == BR_CHILD_STATE_CONNFAILED);
+}
+
+static inline int
+_br_child_witnessed_connection(br_child_t *child)
+{
+ return (child->witnessed == 1);
+}
+
+/* scrub state */
+static inline void
+_br_monitor_set_scrub_state(struct br_monitor *scrub_monitor,
+ br_scrub_state_t state)
+{
+ scrub_monitor->state = state;
+}
+
+static inline br_scrub_event_t
+_br_child_get_scrub_event(struct br_scrubber *fsscrub)
+{
+ return (fsscrub->frequency == BR_FSSCRUB_FREQ_STALLED)
+ ? BR_SCRUB_EVENT_PAUSE
+ : BR_SCRUB_EVENT_SCHEDULE;
+}
+
+int32_t
+br_get_bad_objects_list(xlator_t *this, dict_t **dict);
+
+#endif /* __BIT_ROT_H__ */
diff --git a/xlators/features/bit-rot/src/stub/Makefile.am b/xlators/features/bit-rot/src/stub/Makefile.am
new file mode 100644
index 00000000000..f13de7145fc
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/Makefile.am
@@ -0,0 +1,20 @@
+if WITH_SERVER
+xlator_LTLIBRARIES = bitrot-stub.la
+endif
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+bitrot_stub_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+bitrot_stub_la_SOURCES = bit-rot-stub-helpers.c bit-rot-stub.c
+bitrot_stub_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = bit-rot-stub.h bit-rot-common.h bit-rot-stub-mem-types.h \
+ bit-rot-object-version.h bit-rot-stub-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-common.h b/xlators/features/bit-rot/src/stub/bit-rot-common.h
new file mode 100644
index 00000000000..20561aa7764
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-common.h
@@ -0,0 +1,178 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_COMMON_H__
+#define __BIT_ROT_COMMON_H__
+
+#include <glusterfs/glusterfs.h>
+#include "bit-rot-object-version.h"
+
+#define BR_VXATTR_VERSION (1 << 0)
+#define BR_VXATTR_SIGNATURE (1 << 1)
+
+#define BR_VXATTR_SIGN_MISSING (BR_VXATTR_SIGNATURE)
+#define BR_VXATTR_ALL_MISSING (BR_VXATTR_VERSION | BR_VXATTR_SIGNATURE)
+
+#define BR_BAD_OBJ_CONTAINER \
+ (uuid_t) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }
+
+typedef enum br_vxattr_state {
+ BR_VXATTR_STATUS_FULL = 0,
+ BR_VXATTR_STATUS_MISSING = 1,
+ BR_VXATTR_STATUS_UNSIGNED = 2,
+ BR_VXATTR_STATUS_INVALID = 3,
+} br_vxattr_status_t;
+
+typedef enum br_sign_state {
+ BR_SIGN_INVALID = -1,
+ BR_SIGN_NORMAL = 0,
+ BR_SIGN_REOPEN_WAIT = 1,
+ BR_SIGN_QUICK = 2,
+} br_sign_state_t;
+
+static inline br_vxattr_status_t
+br_version_xattr_state(dict_t *xattr, br_version_t **obuf,
+ br_signature_t **sbuf, gf_boolean_t *objbad)
+{
+ int32_t ret = 0;
+ int32_t vxattr = 0;
+ br_vxattr_status_t status;
+ void *data = NULL;
+
+ /**
+ * The key being present in the dict indicates the xattr was set on
+ * disk. The presence of xattr itself as of now is suffecient to say
+ * the the object is bad.
+ */
+ *objbad = _gf_false;
+ ret = dict_get_bin(xattr, BITROT_OBJECT_BAD_KEY, (void **)&data);
+ if (!ret)
+ *objbad = _gf_true;
+
+ ret = dict_get_bin(xattr, BITROT_CURRENT_VERSION_KEY, (void **)obuf);
+ if (ret)
+ vxattr |= BR_VXATTR_VERSION;
+
+ ret = dict_get_bin(xattr, BITROT_SIGNING_VERSION_KEY, (void **)sbuf);
+ if (ret)
+ vxattr |= BR_VXATTR_SIGNATURE;
+
+ switch (vxattr) {
+ case 0:
+ status = BR_VXATTR_STATUS_FULL;
+ break;
+ case BR_VXATTR_SIGN_MISSING:
+ status = BR_VXATTR_STATUS_UNSIGNED;
+ break;
+ case BR_VXATTR_ALL_MISSING:
+ status = BR_VXATTR_STATUS_MISSING;
+ break;
+ default:
+ status = BR_VXATTR_STATUS_INVALID;
+ }
+
+ return status;
+}
+
+/**
+ * in-memory representation of signature used by signer for object
+ * signing.
+ */
+typedef struct br_isignature_in {
+ int8_t signaturetype; /* signature type */
+
+ unsigned long signedversion; /* version against which the
+ object was signed */
+
+ size_t signaturelen; /* signature length */
+ char signature[0]; /* object signature */
+} br_isignature_t;
+
+/**
+ * in-memory representation of signature used by scrubber for object
+ * verification.
+ */
+typedef struct br_isignature_out {
+ char stale; /* stale signature? */
+
+ unsigned long version; /* current signed version */
+
+ uint32_t time[2]; /* time when the object
+ got dirtied */
+
+ int8_t signaturetype; /* hash type */
+ size_t signaturelen; /* signature length */
+ char signature[0]; /* signature (hash) */
+} br_isignature_out_t;
+
+typedef struct br_stub_init {
+ uint32_t timebuf[2];
+ char export[PATH_MAX];
+} br_stub_init_t;
+
+typedef enum {
+ BR_SIGNATURE_TYPE_VOID = -1, /* object is not signed */
+ BR_SIGNATURE_TYPE_ZERO = 0, /* min boundary */
+ BR_SIGNATURE_TYPE_SHA256 = 1, /* signed with SHA256 */
+ BR_SIGNATURE_TYPE_MAX = 2, /* max boundary */
+} br_signature_type;
+
+/* BitRot stub start time (virtual xattr) */
+#define GLUSTERFS_GET_BR_STUB_INIT_TIME "trusted.glusterfs.bit-rot.stub-init"
+
+/* signing/reopen hint */
+#define BR_OBJECT_RESIGN 0
+#define BR_OBJECT_REOPEN 1
+#define BR_REOPEN_SIGN_HINT_KEY "trusted.glusterfs.bit-rot.reopen-hint"
+
+static inline int
+br_is_signature_type_valid(int8_t signaturetype)
+{
+ return ((signaturetype > BR_SIGNATURE_TYPE_ZERO) &&
+ (signaturetype < BR_SIGNATURE_TYPE_MAX));
+}
+
+static inline void
+br_set_default_ongoingversion(br_version_t *buf, uint32_t *tv)
+{
+ buf->ongoingversion = BITROT_DEFAULT_CURRENT_VERSION;
+ buf->timebuf[0] = tv[0];
+ buf->timebuf[1] = tv[1];
+}
+
+static inline void
+br_set_default_signature(br_signature_t *buf, size_t *size)
+{
+ buf->signaturetype = (int8_t)BR_SIGNATURE_TYPE_VOID;
+ buf->signedversion = BITROT_DEFAULT_SIGNING_VERSION;
+
+ *size = sizeof(br_signature_t); /* no signature */
+}
+
+static inline void
+br_set_ongoingversion(br_version_t *buf, unsigned long version, uint32_t *tv)
+{
+ buf->ongoingversion = version;
+ buf->timebuf[0] = tv[0];
+ buf->timebuf[1] = tv[1];
+}
+
+static inline void
+br_set_signature(br_signature_t *buf, br_isignature_t *sign,
+ size_t signaturelen, size_t *size)
+{
+ buf->signaturetype = sign->signaturetype;
+ buf->signedversion = ntohl(sign->signedversion);
+
+ memcpy(buf->signature, sign->signature, signaturelen);
+ *size = sizeof(br_signature_t) + signaturelen;
+}
+
+#endif /* __BIT_ROT_COMMON_H__ */
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-object-version.h b/xlators/features/bit-rot/src/stub/bit-rot-object-version.h
new file mode 100644
index 00000000000..7ae6a5200df
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-object-version.h
@@ -0,0 +1,30 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __BIT_ROT_OBJECT_VERSION_H
+#define __BIT_ROT_OBJECT_VERSION_H
+
+/**
+ * on-disk formats for ongoing version and object signature.
+ */
+typedef struct br_version {
+ unsigned long ongoingversion;
+ uint32_t timebuf[2];
+} br_version_t;
+
+typedef struct __attribute__((__packed__)) br_signature {
+ int8_t signaturetype;
+
+ unsigned long signedversion;
+
+ char signature[0];
+} br_signature_t;
+
+#endif
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c b/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c
new file mode 100644
index 00000000000..8ac13a09941
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-helpers.c
@@ -0,0 +1,796 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "bit-rot-stub.h"
+
+br_stub_fd_t *
+br_stub_fd_new(void)
+{
+ br_stub_fd_t *br_stub_fd = NULL;
+
+ br_stub_fd = GF_CALLOC(1, sizeof(*br_stub_fd), gf_br_stub_mt_br_stub_fd_t);
+
+ return br_stub_fd;
+}
+
+int
+__br_stub_fd_ctx_set(xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd)
+{
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, br_stub_fd, out);
+
+ value = (uint64_t)(long)br_stub_fd;
+
+ ret = __fd_ctx_set(fd, this, value);
+
+out:
+ return ret;
+}
+
+br_stub_fd_t *
+__br_stub_fd_ctx_get(xlator_t *this, fd_t *fd)
+{
+ br_stub_fd_t *br_stub_fd = NULL;
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ ret = __fd_ctx_get(fd, this, &value);
+ if (ret)
+ return NULL;
+
+ br_stub_fd = (br_stub_fd_t *)((long)value);
+
+out:
+ return br_stub_fd;
+}
+
+br_stub_fd_t *
+br_stub_fd_ctx_get(xlator_t *this, fd_t *fd)
+{
+ br_stub_fd_t *br_stub_fd = NULL;
+
+ GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ LOCK(&fd->lock);
+ {
+ br_stub_fd = __br_stub_fd_ctx_get(this, fd);
+ }
+ UNLOCK(&fd->lock);
+
+out:
+ return br_stub_fd;
+}
+
+int32_t
+br_stub_fd_ctx_set(xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, br_stub_fd, out);
+
+ LOCK(&fd->lock);
+ {
+ ret = __br_stub_fd_ctx_set(this, fd, br_stub_fd);
+ }
+ UNLOCK(&fd->lock);
+
+out:
+ return ret;
+}
+
+/**
+ * Adds an entry to the bad objects directory.
+ * @gfid: gfid of the bad object being added to the bad objects directory
+ */
+int
+br_stub_add(xlator_t *this, uuid_t gfid)
+{
+ char gfid_path[BR_PATH_MAX_PLUS] = {0};
+ char bad_gfid_path[BR_PATH_MAX_PLUS] = {0};
+ int ret = 0;
+ br_stub_private_t *priv = NULL;
+ struct stat st = {0};
+
+ priv = this->private;
+ GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, !gf_uuid_is_null(gfid), out,
+ errno, EINVAL);
+
+ snprintf(gfid_path, sizeof(gfid_path), "%s/%s", priv->stub_basepath,
+ uuid_utoa(gfid));
+
+ ret = sys_stat(gfid_path, &st);
+ if (!ret)
+ goto out;
+ snprintf(bad_gfid_path, sizeof(bad_gfid_path), "%s/stub-%s",
+ priv->stub_basepath, uuid_utoa(priv->bad_object_dir_gfid));
+
+ ret = sys_link(bad_gfid_path, gfid_path);
+ if (ret) {
+ if ((errno != ENOENT) && (errno != EMLINK) && (errno != EEXIST))
+ goto out;
+
+ /*
+ * Continue with success. At least we'll have half of the
+ * functionality, in the sense, object is marked bad and
+ * would be inaccessible. It's only scrub status that would
+ * show up less number of objects. That's fine as we'll have
+ * the log files that will have the missing information.
+ */
+ gf_smsg(this->name, GF_LOG_WARNING, errno, BRS_MSG_LINK_FAIL, "gfid=%s",
+ uuid_utoa(gfid), NULL);
+ }
+
+ return 0;
+out:
+ return -1;
+}
+
+int
+br_stub_del(xlator_t *this, uuid_t gfid)
+{
+ int32_t op_errno __attribute__((unused)) = 0;
+ br_stub_private_t *priv = NULL;
+ int ret = 0;
+ char gfid_path[BR_PATH_MAX_PLUS] = {0};
+
+ priv = this->private;
+ GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, !gf_uuid_is_null(gfid), out,
+ op_errno, EINVAL);
+ snprintf(gfid_path, sizeof(gfid_path), "%s/%s", priv->stub_basepath,
+ uuid_utoa(gfid));
+ ret = sys_unlink(gfid_path);
+ if (ret && (errno != ENOENT)) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJ_UNLINK_FAIL,
+ "path=%s", gfid_path, NULL);
+ ret = -errno;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+br_stub_check_stub_directory(xlator_t *this, char *fullpath)
+{
+ int ret = 0;
+ struct stat st = {
+ 0,
+ };
+ char oldpath[BR_PATH_MAX_PLUS] = {0};
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+
+ snprintf(oldpath, sizeof(oldpath), "%s/%s", priv->export,
+ OLD_BR_STUB_QUARANTINE_DIR);
+
+ ret = sys_stat(fullpath, &st);
+ if (!ret && !S_ISDIR(st.st_mode))
+ goto error_return;
+ if (ret) {
+ if (errno != ENOENT)
+ goto error_return;
+ ret = sys_stat(oldpath, &st);
+ if (ret)
+ ret = mkdir_p(fullpath, 0600, _gf_true);
+ else
+ ret = sys_rename(oldpath, fullpath);
+ }
+
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
+ "create-path=%s", fullpath, NULL);
+ return ret;
+
+error_return:
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
+ "verify-path=%s", fullpath, NULL);
+ return -1;
+}
+
+/**
+ * Function to create the container for the bad objects within the bad objects
+ * directory.
+ */
+static int
+br_stub_check_stub_file(xlator_t *this, char *path)
+{
+ int ret = 0;
+ int fd = -1;
+ struct stat st = {
+ 0,
+ };
+
+ ret = sys_stat(path, &st);
+ if (!ret && !S_ISREG(st.st_mode))
+ goto error_return;
+ if (ret) {
+ if (errno != ENOENT)
+ goto error_return;
+ fd = sys_creat(path, 0);
+ if (fd < 0)
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ BRS_MSG_BAD_OBJECT_DIR_FAIL, "create-path=%s", path, NULL);
+ }
+
+ if (fd >= 0) {
+ sys_close(fd);
+ ret = 0;
+ }
+
+ return ret;
+
+error_return:
+ gf_smsg(this->name, GF_LOG_ERROR, errno, BRS_MSG_BAD_OBJECT_DIR_FAIL,
+ "verify-path=%s", path, NULL);
+ return -1;
+}
+
+int
+br_stub_dir_create(xlator_t *this, br_stub_private_t *priv)
+{
+ int ret = -1;
+ char fullpath[BR_PATH_MAX_PLUS] = {
+ 0,
+ };
+ char stub_gfid_path[BR_PATH_MAX_PLUS] = {
+ 0,
+ };
+
+ gf_uuid_copy(priv->bad_object_dir_gfid, BR_BAD_OBJ_CONTAINER);
+
+ if (snprintf(fullpath, sizeof(fullpath), "%s", priv->stub_basepath) >=
+ sizeof(fullpath))
+ goto out;
+
+ if (snprintf(stub_gfid_path, sizeof(stub_gfid_path), "%s/stub-%s",
+ priv->stub_basepath, uuid_utoa(priv->bad_object_dir_gfid)) >=
+ sizeof(stub_gfid_path))
+ goto out;
+
+ ret = br_stub_check_stub_directory(this, fullpath);
+ if (ret)
+ goto out;
+ ret = br_stub_check_stub_file(this, stub_gfid_path);
+ if (ret)
+ goto out;
+
+ return 0;
+
+out:
+ return -1;
+}
+
+call_stub_t *
+__br_stub_dequeue(struct list_head *callstubs)
+{
+ call_stub_t *stub = NULL;
+
+ if (!list_empty(callstubs)) {
+ stub = list_entry(callstubs->next, call_stub_t, list);
+ list_del_init(&stub->list);
+ }
+
+ return stub;
+}
+
+void
+__br_stub_enqueue(struct list_head *callstubs, call_stub_t *stub)
+{
+ list_add_tail(&stub->list, callstubs);
+}
+
+void
+br_stub_worker_enqueue(xlator_t *this, call_stub_t *stub)
+{
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+ pthread_mutex_lock(&priv->container.bad_lock);
+ {
+ __br_stub_enqueue(&priv->container.bad_queue, stub);
+ pthread_cond_signal(&priv->container.bad_cond);
+ }
+ pthread_mutex_unlock(&priv->container.bad_lock);
+}
+
+void *
+br_stub_worker(void *data)
+{
+ br_stub_private_t *priv = NULL;
+ xlator_t *this = NULL;
+ call_stub_t *stub = NULL;
+
+ THIS = data;
+ this = data;
+ priv = this->private;
+
+ for (;;) {
+ pthread_mutex_lock(&priv->container.bad_lock);
+ {
+ while (list_empty(&priv->container.bad_queue)) {
+ (void)pthread_cond_wait(&priv->container.bad_cond,
+ &priv->container.bad_lock);
+ }
+
+ stub = __br_stub_dequeue(&priv->container.bad_queue);
+ }
+ pthread_mutex_unlock(&priv->container.bad_lock);
+
+ if (stub) /* guard against spurious wakeups */
+ call_resume(stub);
+ }
+
+ return NULL;
+}
+
+int32_t
+br_stub_lookup_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xattr_req)
+{
+ br_stub_private_t *priv = NULL;
+ struct stat lstatbuf = {0};
+ int ret = 0;
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ struct iatt stbuf = {
+ 0,
+ };
+ struct iatt postparent = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ gf_boolean_t ver_enabled = _gf_false;
+
+ BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled);
+ priv = this->private;
+ BR_STUB_VER_COND_GOTO(priv, (!ver_enabled), done);
+
+ VALIDATE_OR_GOTO(loc, done);
+ if (gf_uuid_compare(loc->gfid, priv->bad_object_dir_gfid))
+ goto done;
+
+ ret = sys_lstat(priv->stub_basepath, &lstatbuf);
+ if (ret) {
+ gf_msg_debug(this->name, errno,
+ "Stat failed on stub bad "
+ "object dir");
+ op_errno = errno;
+ goto done;
+ } else if (!S_ISDIR(lstatbuf.st_mode)) {
+ gf_msg_debug(this->name, errno,
+ "bad object container is not "
+ "a directory");
+ op_errno = ENOTDIR;
+ goto done;
+ }
+
+ iatt_from_stat(&stbuf, &lstatbuf);
+ gf_uuid_copy(stbuf.ia_gfid, priv->bad_object_dir_gfid);
+
+ op_ret = op_errno = 0;
+ xattr = dict_new();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+
+done:
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, loc->inode, &stbuf,
+ xattr, &postparent);
+ if (xattr)
+ dict_unref(xattr);
+ return 0;
+}
+
+static int
+is_bad_gfid_file_current(char *filename, uuid_t gfid)
+{
+ char current_stub_gfid[GF_UUID_BUF_SIZE + 16] = {
+ 0,
+ };
+
+ snprintf(current_stub_gfid, sizeof current_stub_gfid, "stub-%s",
+ uuid_utoa(gfid));
+ return (!strcmp(filename, current_stub_gfid));
+}
+
+static void
+check_delete_stale_bad_file(xlator_t *this, char *filename)
+{
+ int ret = 0;
+ struct stat st = {0};
+ char filepath[BR_PATH_MAX_PLUS] = {0};
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (is_bad_gfid_file_current(filename, priv->bad_object_dir_gfid))
+ return;
+
+ snprintf(filepath, sizeof(filepath), "%s/%s", priv->stub_basepath,
+ filename);
+
+ ret = sys_stat(filepath, &st);
+ if (!ret && st.st_nlink == 1)
+ sys_unlink(filepath);
+}
+
+static int
+br_stub_fill_readdir(fd_t *fd, br_stub_fd_t *fctx, DIR *dir, off_t off,
+ size_t size, gf_dirent_t *entries)
+{
+ off_t in_case = -1;
+ off_t last_off = 0;
+ size_t filled = 0;
+ int count = 0;
+ int32_t this_size = -1;
+ gf_dirent_t *this_entry = NULL;
+ xlator_t *this = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+
+ this = THIS;
+ if (!off) {
+ rewinddir(dir);
+ } else {
+ seekdir(dir, off);
+#ifndef GF_LINUX_HOST_OS
+ if ((u_long)telldir(dir) != off && off != fctx->bad_object.dir_eof) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL, "off=(0x%llx)", off,
+ "dir=%p", dir, NULL);
+ errno = EINVAL;
+ count = -1;
+ goto out;
+ }
+#endif /* GF_LINUX_HOST_OS */
+ }
+
+ while (filled <= size) {
+ in_case = (u_long)telldir(dir);
+
+ if (in_case == -1) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL, "dir=%p", dir, "err=%s",
+ strerror(errno), NULL);
+ goto out;
+ }
+
+ errno = 0;
+ entry = sys_readdir(dir, scratch);
+ if (!entry || errno != 0) {
+ if (errno == EBADF) {
+ gf_smsg(THIS->name, GF_LOG_WARNING, 0,
+ BRS_MSG_BAD_OBJECT_DIR_READ_FAIL, "dir=%p", dir,
+ "err=%s", strerror(errno), NULL);
+ goto out;
+ }
+ break;
+ }
+
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ continue;
+
+ if (!strncmp(entry->d_name, "stub-", strlen("stub-"))) {
+ check_delete_stale_bad_file(this, entry->d_name);
+ continue;
+ }
+
+ this_size = max(sizeof(gf_dirent_t), sizeof(gfs3_dirplist)) +
+ strlen(entry->d_name) + 1;
+
+ if (this_size + filled > size) {
+ seekdir(dir, in_case);
+#ifndef GF_LINUX_HOST_OS
+ if ((u_long)telldir(dir) != in_case &&
+ in_case != fctx->bad_object.dir_eof) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL, "in_case=(0x%llx)",
+ in_case, "dir=%p", dir, NULL);
+ errno = EINVAL;
+ count = -1;
+ goto out;
+ }
+#endif /* GF_LINUX_HOST_OS */
+ break;
+ }
+
+ this_entry = gf_dirent_for_name(entry->d_name);
+
+ if (!this_entry) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CREATE_GF_DIRENT_FAILED, "entry-name=%s",
+ entry->d_name, "err=%s", strerror(errno), NULL);
+ goto out;
+ }
+ /*
+ * we store the offset of next entry here, which is
+ * probably not intended, but code using syncop_readdir()
+ * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it
+ * for directory read resumption.
+ */
+ last_off = (u_long)telldir(dir);
+ this_entry->d_off = last_off;
+ this_entry->d_ino = entry->d_ino;
+
+ list_add_tail(&this_entry->list, &entries->list);
+
+ filled += this_size;
+ count++;
+ }
+
+ if ((!sys_readdir(dir, scratch) && (errno == 0))) {
+ /* Indicate EOF */
+ errno = ENOENT;
+ /* Remember EOF offset for later detection */
+ fctx->bad_object.dir_eof = last_off;
+ }
+out:
+ return count;
+}
+
+int32_t
+br_stub_readdir_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *xdata)
+{
+ br_stub_fd_t *fctx = NULL;
+ DIR *dir = NULL;
+ int ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int count = 0;
+ gf_dirent_t entries;
+ gf_boolean_t xdata_unref = _gf_false;
+ dict_t *dict = NULL;
+
+ INIT_LIST_HEAD(&entries.list);
+
+ fctx = br_stub_fd_ctx_get(this, fd);
+ if (!fctx) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_FD_CONTEXT_FAILED,
+ "fd=%p", fd, NULL);
+ op_errno = -ret;
+ goto done;
+ }
+
+ dir = fctx->bad_object.dir;
+
+ if (!dir) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_BAD_HANDLE_DIR_NULL,
+ "fd=%p", fd, NULL);
+ op_errno = EINVAL;
+ goto done;
+ }
+
+ count = br_stub_fill_readdir(fd, fctx, dir, off, size, &entries);
+
+ /* pick ENOENT to indicate EOF */
+ op_errno = errno;
+ op_ret = count;
+
+ dict = xdata;
+ (void)br_stub_bad_objects_path(this, fd, &entries, &dict);
+ if (!xdata && dict) {
+ xdata = dict;
+ xdata_unref = _gf_true;
+ }
+
+done:
+ STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, &entries, xdata);
+ gf_dirent_free(&entries);
+ if (xdata_unref)
+ dict_unref(xdata);
+ return 0;
+}
+
+/**
+ * This function is called to mainly obtain the paths of the corrupt
+ * objects (files as of now). Currently scrub status prints only the
+ * gfid of the corrupted files. Reason is, bitrot-stub maintains the
+ * list of the corrupted objects as entries inside the quarantine
+ * directory (<brick export>/.glusterfs/quarantine)
+ *
+ * And the name of each entry in the qurantine directory is the gfid
+ * of the corrupted object. So scrub status will just show that info.
+ * But it helps the users a lot if the actual path to the object is
+ * also reported. Hence the below function to get that information.
+ * The function allocates a new dict to be returned (if it does not
+ * get one from the caller of readdir i.e. scrubber as of now), and
+ * stores the paths of each corrupted gfid there. The gfid is used as
+ * the key and path is used as the value.
+ *
+ * NOTE: The path will be there in following situations
+ * 1) gfid2path option has been enabled (posix xlator option)
+ * and the corrupted file contains the path as an extended
+ * attribute.
+ * 2) If the gfid2path option is not enabled, OR if the xattr
+ * is absent, then the inode table should have it.
+ * The path will be there if a name based lookup has happened
+ * on the file which has been corrupted. With lookup a inode and
+ * dentry would be created in the inode table. And the path is
+ * constructed using the in memory inode and dentry. If a lookup
+ * has not happened OR the inode corresponding to the corrupted
+ * file does not exist in the inode table (because it got purged
+ * as lru limit of the inodes exceeded) OR a nameless lookup had
+ * happened to populate the inode in the inode table, then the
+ * path will not be printed in scrub and only the gfid will be there.
+ **/
+int
+br_stub_bad_objects_path(xlator_t *this, fd_t *fd, gf_dirent_t *entries,
+ dict_t **dict)
+{
+ gf_dirent_t *entry = NULL;
+ inode_t *inode = NULL;
+ char *hpath = NULL;
+ uuid_t gfid = {0};
+ int ret = -1;
+ dict_t *tmp_dict = NULL;
+ char str_gfid[64] = {0};
+
+ if (list_empty(&entries->list))
+ return 0;
+
+ tmp_dict = *dict;
+
+ if (!tmp_dict) {
+ tmp_dict = dict_new();
+ /*
+ * If the allocation of dict fails then no need treat it
+ * it as a error. This path (or function) is executed when
+ * "gluster volume bitrot <volume name> scrub status" is
+ * executed, to get the list of the corrupted objects.
+ * And the motive of this function is to get the paths of
+ * the corrupted objects. If the dict allocation fails, then
+ * the scrub status will only show the gfids of those corrupted
+ * objects (which is the behavior as of the time of this patch
+ * being worked upon). So just return and only the gfids will
+ * be shown.
+ */
+ if (!tmp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_FAILED, NULL);
+ goto out;
+ }
+ }
+
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ gf_uuid_clear(gfid);
+ gf_uuid_parse(entry->d_name, gfid);
+
+ inode = inode_find(fd->inode->table, gfid);
+
+ /* No need to check the return value here.
+ * Because @hpath is examined.
+ */
+ (void)br_stub_get_path_of_gfid(this, fd->inode, inode, gfid, &hpath);
+
+ if (hpath) {
+ gf_msg_debug(this->name, 0,
+ "path of the corrupted "
+ "object (gfid: %s) is %s",
+ uuid_utoa(gfid), hpath);
+ br_stub_entry_xattr_fill(this, hpath, entry, tmp_dict);
+ } else
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
+ "gfid=%s", uuid_utoa_r(gfid, str_gfid), NULL);
+
+ inode = NULL;
+ hpath = NULL;
+ }
+
+ ret = 0;
+ *dict = tmp_dict;
+
+out:
+ return ret;
+}
+
+int
+br_stub_get_path_of_gfid(xlator_t *this, inode_t *parent, inode_t *inode,
+ uuid_t gfid, char **path)
+{
+ int32_t ret = -1;
+ char gfid_str[64] = {0};
+
+ GF_VALIDATE_OR_GOTO("bitrot-stub", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, parent, out);
+ GF_VALIDATE_OR_GOTO(this->name, path, out);
+
+ /* Above, No need to validate the @inode for hard resolution. Because
+ * inode can be NULL and if it is NULL, then syncop_gfid_to_path_hard
+ * will allocate a new inode and proceed. So no need to bother about
+ * @inode. Because we need it only to send a syncop_getxattr call
+ * from inside syncop_gfid_to_path_hard. And getxattr fetches the
+ * path from the backend.
+ */
+
+ ret = syncop_gfid_to_path_hard(parent->table, FIRST_CHILD(this), gfid,
+ inode, path, _gf_true);
+ if (ret < 0)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
+ "gfid=%s", uuid_utoa_r(gfid, gfid_str), NULL);
+
+ /*
+ * Try with soft resolution of path if hard resolve fails. Because
+ * checking the xattr on disk to get the path of a inode (or gfid)
+ * is dependent on whether that option is enabled in the posix
+ * xlator or not. If it is not enabled, then hard resolution by
+ * checking the on disk xattr fails.
+ *
+ * Thus in such situations fall back to the soft resolution which
+ * mainly depends on the inode_path() function. And for using
+ * inode_path, @inode has to be linked i.e. a successful lookup should
+ * have happened on the gfid (or the path) to link the inode to the
+ * inode table. And if @inode is NULL, means, the inode has not been
+ * found in the inode table and better not to do inode_path() on the
+ * inode which has not been linked.
+ */
+ if (ret < 0 && inode) {
+ ret = syncop_gfid_to_path_hard(parent->table, FIRST_CHILD(this), gfid,
+ inode, path, _gf_false);
+ if (ret < 0)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_PATH_GET_FAILED,
+ "from-memory gfid=%s", uuid_utoa_r(gfid, gfid_str), NULL);
+ }
+
+out:
+ return ret;
+}
+
+/**
+ * NOTE: If the file has multiple hardlinks (in gluster volume
+ * namespace), the path would be one of the hardlinks. Its up to
+ * the user to find the remaining hardlinks (using find -samefile)
+ * and remove them.
+ **/
+void
+br_stub_entry_xattr_fill(xlator_t *this, char *hpath, gf_dirent_t *entry,
+ dict_t *dict)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, hpath, out);
+
+ /*
+ * Use the entry->d_name (which is nothing but the gfid of the
+ * corrupted object) as the key. And the value will be the actual
+ * path of that object (or file).
+ *
+ * ALso ignore the dict_set errors. scrubber will get the gfid of
+ * the corrupted object for sure. So, for now lets just log the
+ * dict_set_dynstr failure and move on.
+ */
+
+ ret = dict_set_dynstr(dict, entry->d_name, hpath);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_DICT_SET_FAILED,
+ "path=%s", hpath, "object-name=%s", entry->d_name, NULL);
+out:
+ return;
+}
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
new file mode 100644
index 00000000000..9d93caf069f
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-mem-types.h
@@ -0,0 +1,36 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _BR_MEM_TYPES_H
+#define _BR_MEM_TYPES_H
+
+#include <glusterfs/mem-types.h>
+
+enum br_mem_types {
+ gf_br_stub_mt_private_t = gf_common_mt_end + 1,
+ gf_br_stub_mt_version_t,
+ gf_br_stub_mt_inode_ctx_t,
+ gf_br_stub_mt_signature_t,
+ gf_br_mt_br_private_t,
+ gf_br_mt_br_child_t,
+ gf_br_mt_br_object_t,
+ gf_br_mt_br_ob_n_wk_t,
+ gf_br_mt_br_scrubber_t,
+ gf_br_mt_br_fsscan_entry_t,
+ gf_br_stub_mt_br_stub_fd_t,
+ gf_br_stub_mt_br_scanner_freq_t,
+ gf_br_stub_mt_sigstub_t,
+ gf_br_mt_br_child_event_t,
+ gf_br_stub_mt_misc,
+ gf_br_mt_br_worker_t,
+ gf_br_stub_mt_end,
+};
+
+#endif
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
new file mode 100644
index 00000000000..6c15a166f18
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub-messages.h
@@ -0,0 +1,117 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _BITROT_STUB_MESSAGES_H_
+#define _BITROT_STUB_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(BITROT_STUB, BRS_MSG_NO_MEMORY, BRS_MSG_SET_EVENT_FAILED,
+ BRS_MSG_MEM_ACNT_FAILED, BRS_MSG_CREATE_FRAME_FAILED,
+ BRS_MSG_SET_CONTEXT_FAILED, BRS_MSG_CHANGE_VERSION_FAILED,
+ BRS_MSG_ADD_FD_TO_LIST_FAILED, BRS_MSG_SET_FD_CONTEXT_FAILED,
+ BRS_MSG_CREATE_ANONYMOUS_FD_FAILED, BRS_MSG_NO_CHILD,
+ BRS_MSG_STUB_ALLOC_FAILED, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+ BRS_MSG_CANCEL_SIGN_THREAD_FAILED, BRS_MSG_ADD_FD_TO_INODE,
+ BRS_MSG_SIGN_VERSION_ERROR, BRS_MSG_BAD_OBJ_MARK_FAIL,
+ BRS_MSG_NON_SCRUB_BAD_OBJ_MARK, BRS_MSG_REMOVE_INTERNAL_XATTR,
+ BRS_MSG_SET_INTERNAL_XATTR, BRS_MSG_BAD_OBJECT_ACCESS,
+ BRS_MSG_BAD_CONTAINER_FAIL, BRS_MSG_BAD_OBJECT_DIR_FAIL,
+ BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL, BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL,
+ BRS_MSG_BAD_OBJECT_DIR_READ_FAIL, BRS_MSG_GET_FD_CONTEXT_FAILED,
+ BRS_MSG_BAD_HANDLE_DIR_NULL, BRS_MSG_BAD_OBJ_THREAD_FAIL,
+ BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL, BRS_MSG_LINK_FAIL,
+ BRS_MSG_BAD_OBJ_UNLINK_FAIL, BRS_MSG_DICT_SET_FAILED,
+ BRS_MSG_PATH_GET_FAILED, BRS_MSG_NULL_LOCAL,
+ BRS_MSG_SPAWN_SIGN_THRD_FAILED, BRS_MSG_KILL_SIGN_THREAD,
+ BRS_MSG_NON_BITD_PID, BRS_MSG_SIGN_PREPARE_FAIL,
+ BRS_MSG_USING_DEFAULT_THREAD_SIZE, BRS_MSG_ALLOC_MEM_FAILED,
+ BRS_MSG_DICT_ALLOC_FAILED, BRS_MSG_CREATE_GF_DIRENT_FAILED,
+ BRS_MSG_ALLOC_FAILED, BRS_MSG_PATH_XATTR_GET_FAILED,
+ BRS_MSG_VERSION_PREPARE_FAIL);
+
+#define BRS_MSG_MEM_ACNT_FAILED_STR "Memory accounting init failed"
+#define BRS_MSG_BAD_OBJ_THREAD_FAIL_STR "pthread_init failed"
+#define BRS_MSG_USING_DEFAULT_THREAD_SIZE_STR "Using default thread stack size"
+#define BRS_MSG_NO_CHILD_STR "FATAL: no children"
+#define BRS_MSG_SPAWN_SIGN_THRD_FAILED_STR \
+ "failed to create the new thread for signer"
+#define BRS_MSG_BAD_CONTAINER_FAIL_STR \
+ "failed to launch the thread for storing bad gfids"
+#define BRS_MSG_CANCEL_SIGN_THREAD_FAILED_STR \
+ "Could not cancel sign serializer thread"
+#define BRS_MSG_KILL_SIGN_THREAD_STR "killed the signer thread"
+#define BRS_MSG_GET_INODE_CONTEXT_FAILED_STR \
+ "failed to init the inode context for the inode"
+#define BRS_MSG_ADD_FD_TO_INODE_STR "failed to add fd to the inode"
+#define BRS_MSG_NO_MEMORY_STR "local allocation failed"
+#define BRS_MSG_BAD_OBJECT_ACCESS_STR "bad object accessed. Returning"
+#define BRS_MSG_SIGN_VERSION_ERROR_STR "Signing version exceeds current version"
+#define BRS_MSG_NON_BITD_PID_STR \
+ "PID from where signature request came, does not belong to bit-rot " \
+ "daemon. Unwinding the fop"
+#define BRS_MSG_SIGN_PREPARE_FAIL_STR \
+ "failed to prepare the signature. Unwinding the fop"
+#define BRS_MSG_VERSION_PREPARE_FAIL_STR \
+ "failed to prepare the version. Unwinding the fop"
+#define BRS_MSG_STUB_ALLOC_FAILED_STR "failed to allocate stub fop, Unwinding"
+#define BRS_MSG_BAD_OBJ_MARK_FAIL_STR "failed to mark object as bad"
+#define BRS_MSG_NON_SCRUB_BAD_OBJ_MARK_STR \
+ "bad object marking is not from the scrubber"
+#define BRS_MSG_ALLOC_MEM_FAILED_STR "failed to allocate memory"
+#define BRS_MSG_SET_INTERNAL_XATTR_STR "called on the internal xattr"
+#define BRS_MSG_REMOVE_INTERNAL_XATTR_STR "removexattr called on internal xattr"
+#define BRS_MSG_CREATE_ANONYMOUS_FD_FAILED_STR \
+ "failed to create anonymous fd for the inode"
+#define BRS_MSG_ADD_FD_TO_LIST_FAILED_STR "failed add fd to the list"
+#define BRS_MSG_SET_FD_CONTEXT_FAILED_STR \
+ "failed to set the fd context for the file"
+#define BRS_MSG_NULL_LOCAL_STR "local is NULL"
+#define BRS_MSG_DICT_ALLOC_FAILED_STR \
+ "dict allocation failed: cannot send IPC FOP to changelog"
+#define BRS_MSG_SET_EVENT_FAILED_STR "cannot set release event in dict"
+#define BRS_MSG_CREATE_FRAME_FAILED_STR "create_frame() failure"
+#define BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL_STR "closedir error"
+#define BRS_MSG_LINK_FAIL_STR "failed to record gfid"
+#define BRS_MSG_BAD_OBJ_UNLINK_FAIL_STR \
+ "failed to delete bad object link from quaratine directory"
+#define BRS_MSG_BAD_OBJECT_DIR_FAIL_STR "failed stub directory"
+#define BRS_MSG_BAD_OBJECT_DIR_SEEK_FAIL_STR \
+ "seekdir failed. Invalid argument (offset reused from another DIR * " \
+ "structure)"
+#define BRS_MSG_BAD_OBJECT_DIR_TELL_FAIL_STR "telldir failed on dir"
+#define BRS_MSG_BAD_OBJECT_DIR_READ_FAIL_STR "readdir failed on dir"
+#define BRS_MSG_CREATE_GF_DIRENT_FAILED_STR "could not create gf_dirent"
+#define BRS_MSG_GET_FD_CONTEXT_FAILED_STR "pfd is NULL"
+#define BRS_MSG_BAD_HANDLE_DIR_NULL_STR "dir if NULL"
+#define BRS_MSG_ALLOC_FAILED_STR \
+ "failed to allocate new dict for saving the paths of the corrupted " \
+ "objects. Scrub status will only display the gfid"
+#define BRS_MSG_PATH_GET_FAILED_STR "failed to get the path"
+#define BRS_MSG_PATH_XATTR_GET_FAILED_STR \
+ "failed to get the path xattr from disk for the gfid. Trying to get path " \
+ "from the memory"
+#define BRS_MSG_DICT_SET_FAILED_STR \
+ "failed to set the actual path as the value in the dict for the " \
+ "corrupted object"
+#define BRS_MSG_SET_CONTEXT_FAILED_STR \
+ "could not set fd context for release callback"
+#define BRS_MSG_CHANGE_VERSION_FAILED_STR "change version failed"
+#endif /* !_BITROT_STUB_MESSAGES_H_ */
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.c b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
new file mode 100644
index 00000000000..447dd47ff41
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.c
@@ -0,0 +1,3590 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <ctype.h>
+#include <sys/uio.h>
+#include <signal.h>
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include "changelog.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/call-stub.h>
+
+#include "bit-rot-stub.h"
+#include "bit-rot-stub-mem-types.h"
+#include "bit-rot-stub-messages.h"
+#include "bit-rot-common.h"
+
+#define BR_STUB_REQUEST_COOKIE 0x1
+
+void
+br_stub_lock_cleaner(void *arg)
+{
+ pthread_mutex_t *clean_mutex = arg;
+
+ pthread_mutex_unlock(clean_mutex);
+ return;
+}
+
+void *
+br_stub_signth(void *);
+
+struct br_stub_signentry {
+ unsigned long v;
+
+ call_stub_t *stub;
+
+ struct list_head list;
+};
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int32_t ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_br_stub_mt_end + 1);
+
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_MEM_ACNT_FAILED, NULL);
+ return ret;
+ }
+
+ return ret;
+}
+
+int
+br_stub_bad_object_container_init(xlator_t *this, br_stub_private_t *priv)
+{
+ pthread_attr_t w_attr;
+ int ret = -1;
+
+ ret = pthread_cond_init(&priv->container.bad_cond, NULL);
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
+ "cond_init ret=%d", ret, NULL);
+ goto out;
+ }
+
+ ret = pthread_mutex_init(&priv->container.bad_lock, NULL);
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
+ "mutex_init ret=%d", ret, NULL);
+ goto cleanup_cond;
+ }
+
+ ret = pthread_attr_init(&w_attr);
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_THREAD_FAIL,
+ "attr_init ret=%d", ret, NULL);
+ goto cleanup_lock;
+ }
+
+ ret = pthread_attr_setstacksize(&w_attr, BAD_OBJECT_THREAD_STACK_SIZE);
+ if (ret == EINVAL) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ BRS_MSG_USING_DEFAULT_THREAD_SIZE, NULL);
+ }
+
+ INIT_LIST_HEAD(&priv->container.bad_queue);
+ ret = br_stub_dir_create(this, priv);
+ if (ret < 0)
+ goto cleanup_lock;
+
+ ret = gf_thread_create(&priv->container.thread, &w_attr, br_stub_worker,
+ this, "brswrker");
+ if (ret)
+ goto cleanup_attr;
+
+ return 0;
+
+cleanup_attr:
+ pthread_attr_destroy(&w_attr);
+cleanup_lock:
+ pthread_mutex_destroy(&priv->container.bad_lock);
+cleanup_cond:
+ pthread_cond_destroy(&priv->container.bad_cond);
+out:
+ return -1;
+}
+
+int32_t
+init(xlator_t *this)
+{
+ int ret = 0;
+ char *tmp = NULL;
+ struct timeval tv = {
+ 0,
+ };
+ br_stub_private_t *priv = NULL;
+
+ if (!this->children) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NO_CHILD, NULL);
+ goto error_return;
+ }
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_br_stub_mt_private_t);
+ if (!priv)
+ goto error_return;
+
+ priv->local_pool = mem_pool_new(br_stub_local_t, 512);
+ if (!priv->local_pool)
+ goto free_priv;
+
+ GF_OPTION_INIT("bitrot", priv->do_versioning, bool, free_mempool);
+
+ GF_OPTION_INIT("export", tmp, str, free_mempool);
+
+ if (snprintf(priv->export, PATH_MAX, "%s", tmp) >= PATH_MAX)
+ goto free_mempool;
+
+ if (snprintf(priv->stub_basepath, sizeof(priv->stub_basepath), "%s/%s",
+ priv->export,
+ BR_STUB_QUARANTINE_DIR) >= sizeof(priv->stub_basepath))
+ goto free_mempool;
+
+ (void)gettimeofday(&tv, NULL);
+
+ /* boot time is in network endian format */
+ priv->boot[0] = htonl(tv.tv_sec);
+ priv->boot[1] = htonl(tv.tv_usec);
+
+ pthread_mutex_init(&priv->lock, NULL);
+ pthread_cond_init(&priv->cond, NULL);
+ INIT_LIST_HEAD(&priv->squeue);
+
+ /* Thread creations need 'this' to be passed so that THIS can be
+ * assigned inside the thread. So setting this->private here.
+ */
+ this->private = priv;
+ if (!priv->do_versioning)
+ return 0;
+
+ ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
+ "brssign");
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SPAWN_SIGN_THRD_FAILED,
+ NULL);
+ goto cleanup_lock;
+ }
+
+ ret = br_stub_bad_object_container_init(this, priv);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_CONTAINER_FAIL, NULL);
+ goto cleanup_lock;
+ }
+
+ gf_msg_debug(this->name, 0, "bit-rot stub loaded");
+
+ return 0;
+
+cleanup_lock:
+ pthread_cond_destroy(&priv->cond);
+ pthread_mutex_destroy(&priv->lock);
+free_mempool:
+ mem_pool_destroy(priv->local_pool);
+ priv->local_pool = NULL;
+free_priv:
+ GF_FREE(priv);
+ this->private = NULL;
+error_return:
+ return -1;
+}
+
+/* TODO:
+ * As of now enabling bitrot option does 2 things.
+ * 1) Start the Bitrot Daemon which signs the objects (currently files only)
+ * upon getting notified by the stub.
+ * 2) Enable versioning of the objects. Object versions (again files only) are
+ * incremented upon modification.
+ * So object versioning is tied to bitrot daemon's signing. In future, object
+ * versioning might be necessary for other things as well apart from bit-rot
+ * detection (well that's the objective of bringing in object-versioning :)).
+ * In that case, better to make versioning a new option and letting it to be
+ * enabled despite bit-rot detection is not needed.
+ * Ex: ICAP.
+ */
+int32_t
+reconfigure(xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+
+ GF_OPTION_RECONF("bitrot", priv->do_versioning, options, bool, err);
+ if (priv->do_versioning && !priv->signth) {
+ ret = gf_thread_create(&priv->signth, NULL, br_stub_signth, this,
+ "brssign");
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ BRS_MSG_SPAWN_SIGN_THRD_FAILED, NULL);
+ goto err;
+ }
+
+ ret = br_stub_bad_object_container_init(this, priv);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_CONTAINER_FAIL,
+ NULL);
+ goto err;
+ }
+ } else {
+ if (priv->signth) {
+ if (gf_thread_cleanup_xint(priv->signth)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL);
+ } else {
+ gf_smsg(this->name, GF_LOG_INFO, 0, BRS_MSG_KILL_SIGN_THREAD,
+ NULL);
+ priv->signth = 0;
+ }
+ }
+
+ if (priv->container.thread) {
+ if (gf_thread_cleanup_xint(priv->container.thread)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL);
+ }
+ priv->container.thread = 0;
+ }
+ }
+
+ ret = 0;
+ return ret;
+err:
+ if (priv->signth) {
+ if (gf_thread_cleanup_xint(priv->signth)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL);
+ }
+ priv->signth = 0;
+ }
+
+ if (priv->container.thread) {
+ if (gf_thread_cleanup_xint(priv->container.thread)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_CANCEL_SIGN_THREAD_FAILED, NULL);
+ }
+ priv->container.thread = 0;
+ }
+ ret = -1;
+ return ret;
+}
+
+int
+notify(xlator_t *this, int event, void *data, ...)
+{
+ br_stub_private_t *priv = NULL;
+
+ if (!this)
+ return 0;
+
+ priv = this->private;
+ if (!priv)
+ return 0;
+
+ default_notify(this, event, data);
+ return 0;
+}
+
+void
+fini(xlator_t *this)
+{
+ int32_t ret = 0;
+ br_stub_private_t *priv = this->private;
+ struct br_stub_signentry *sigstub = NULL;
+ call_stub_t *stub = NULL;
+
+ if (!priv)
+ return;
+
+ if (!priv->do_versioning)
+ goto cleanup;
+
+ ret = gf_thread_cleanup_xint(priv->signth);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
+ NULL);
+ goto out;
+ }
+ priv->signth = 0;
+
+ while (!list_empty(&priv->squeue)) {
+ sigstub = list_first_entry(&priv->squeue, struct br_stub_signentry,
+ list);
+ list_del_init(&sigstub->list);
+
+ call_stub_destroy(sigstub->stub);
+ GF_FREE(sigstub);
+ }
+
+ ret = gf_thread_cleanup_xint(priv->container.thread);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CANCEL_SIGN_THREAD_FAILED,
+ NULL);
+ goto out;
+ }
+
+ priv->container.thread = 0;
+
+ while (!list_empty(&priv->container.bad_queue)) {
+ stub = list_first_entry(&priv->container.bad_queue, call_stub_t, list);
+ list_del_init(&stub->list);
+ call_stub_destroy(stub);
+ }
+
+ pthread_mutex_destroy(&priv->container.bad_lock);
+ pthread_cond_destroy(&priv->container.bad_cond);
+
+cleanup:
+ pthread_mutex_destroy(&priv->lock);
+ pthread_cond_destroy(&priv->cond);
+
+ if (priv->local_pool) {
+ mem_pool_destroy(priv->local_pool);
+ priv->local_pool = NULL;
+ }
+
+ this->private = NULL;
+ GF_FREE(priv);
+
+out:
+ return;
+}
+
+static int
+br_stub_alloc_versions(br_version_t **obuf, br_signature_t **sbuf,
+ size_t signaturelen)
+{
+ void *mem = NULL;
+ size_t size = 0;
+
+ if (obuf)
+ size += sizeof(br_version_t);
+ if (sbuf)
+ size += sizeof(br_signature_t) + signaturelen;
+
+ mem = GF_CALLOC(1, size, gf_br_stub_mt_version_t);
+ if (!mem)
+ goto error_return;
+
+ if (obuf) {
+ *obuf = (br_version_t *)mem;
+ mem = ((char *)mem + sizeof(br_version_t));
+ }
+ if (sbuf) {
+ *sbuf = (br_signature_t *)mem;
+ }
+
+ return 0;
+
+error_return:
+ return -1;
+}
+
+static void
+br_stub_dealloc_versions(void *mem)
+{
+ GF_FREE(mem);
+}
+
+static br_stub_local_t *
+br_stub_alloc_local(xlator_t *this)
+{
+ br_stub_private_t *priv = this->private;
+
+ return mem_get0(priv->local_pool);
+}
+
+static void
+br_stub_dealloc_local(br_stub_local_t *ptr)
+{
+ if (!ptr)
+ return;
+
+ mem_put(ptr);
+}
+
+static int
+br_stub_prepare_version_request(xlator_t *this, dict_t *dict,
+ br_version_t *obuf, unsigned long oversion)
+{
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+ br_set_ongoingversion(obuf, oversion, priv->boot);
+
+ return dict_set_bin(dict, BITROT_CURRENT_VERSION_KEY, (void *)obuf,
+ sizeof(br_version_t));
+}
+
+static int
+br_stub_prepare_signing_request(dict_t *dict, br_signature_t *sbuf,
+ br_isignature_t *sign, size_t signaturelen)
+{
+ size_t size = 0;
+
+ br_set_signature(sbuf, sign, signaturelen, &size);
+
+ return dict_set_bin(dict, BITROT_SIGNING_VERSION_KEY, (void *)sbuf, size);
+}
+
+/**
+ * initialize an inode context starting with a given ongoing version.
+ * a fresh lookup() or a first creat() call initializes the inode
+ * context, hence the inode is marked dirty. this routine also
+ * initializes the transient inode version.
+ */
+static int
+br_stub_init_inode_versions(xlator_t *this, fd_t *fd, inode_t *inode,
+ unsigned long version, gf_boolean_t markdirty,
+ gf_boolean_t bad_object, uint64_t *ctx_addr)
+{
+ int32_t ret = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+
+ ctx = GF_CALLOC(1, sizeof(br_stub_inode_ctx_t), gf_br_stub_mt_inode_ctx_t);
+ if (!ctx)
+ goto error_return;
+
+ INIT_LIST_HEAD(&ctx->fd_list);
+ (markdirty) ? __br_stub_mark_inode_dirty(ctx)
+ : __br_stub_mark_inode_synced(ctx);
+ __br_stub_set_ongoing_version(ctx, version);
+
+ if (bad_object)
+ __br_stub_mark_object_bad(ctx);
+
+ if (fd) {
+ ret = br_stub_add_fd_to_inode(this, fd, ctx);
+ if (ret)
+ goto free_ctx;
+ }
+
+ ret = br_stub_set_inode_ctx(this, inode, ctx);
+ if (ret)
+ goto free_ctx;
+
+ if (ctx_addr)
+ *ctx_addr = (uint64_t)(uintptr_t)ctx;
+ return 0;
+
+free_ctx:
+ GF_FREE(ctx);
+error_return:
+ return -1;
+}
+
+/**
+ * modify the ongoing version of an inode.
+ */
+static int
+br_stub_mod_inode_versions(xlator_t *this, fd_t *fd, inode_t *inode,
+ unsigned long version)
+{
+ int32_t ret = -1;
+ br_stub_inode_ctx_t *ctx = 0;
+
+ LOCK(&inode->lock);
+ {
+ ctx = __br_stub_get_ongoing_version_ctx(this, inode, NULL);
+ if (ctx == NULL)
+ goto unblock;
+ if (__br_stub_is_inode_dirty(ctx)) {
+ __br_stub_set_ongoing_version(ctx, version);
+ __br_stub_mark_inode_synced(ctx);
+ }
+
+ ret = 0;
+ }
+unblock:
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+static void
+br_stub_fill_local(br_stub_local_t *local, call_stub_t *stub, fd_t *fd,
+ inode_t *inode, uuid_t gfid, int versioningtype,
+ unsigned long memversion)
+{
+ local->fopstub = stub;
+ local->versioningtype = versioningtype;
+ local->u.context.version = memversion;
+ if (fd)
+ local->u.context.fd = fd_ref(fd);
+ if (inode)
+ local->u.context.inode = inode_ref(inode);
+ gf_uuid_copy(local->u.context.gfid, gfid);
+}
+
+static void
+br_stub_cleanup_local(br_stub_local_t *local)
+{
+ if (!local)
+ return;
+
+ local->fopstub = NULL;
+ local->versioningtype = 0;
+ local->u.context.version = 0;
+ if (local->u.context.fd) {
+ fd_unref(local->u.context.fd);
+ local->u.context.fd = NULL;
+ }
+ if (local->u.context.inode) {
+ inode_unref(local->u.context.inode);
+ local->u.context.inode = NULL;
+ }
+ memset(local->u.context.gfid, '\0', sizeof(uuid_t));
+}
+
+static int
+br_stub_need_versioning(xlator_t *this, fd_t *fd, gf_boolean_t *versioning,
+ gf_boolean_t *modified, br_stub_inode_ctx_t **ctx)
+{
+ int32_t ret = -1;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *c = NULL;
+ unsigned long version = BITROT_DEFAULT_CURRENT_VERSION;
+
+ *versioning = _gf_false;
+ *modified = _gf_false;
+
+ /* Bitrot stub inode context was initialized only in lookup, create
+ * and mknod cbk path. Object versioning was enabled by default
+ * irrespective of bitrot enabled or not. But it's made optional now.
+ * As a consequence there could be cases where getting inode ctx would
+ * fail because it's not set yet.
+ * e.g., If versioning (with bitrot enable) is enabled while I/O is
+ * happening, it could directly get other fops like writev without
+ * lookup, where getting inode ctx would fail. Hence initialize the
+ * inode ctx on failure to get ctx. This is done in all places where
+ * applicable.
+ */
+ ret = br_stub_get_inode_ctx(this, fd->inode, &ctx_addr);
+ if (ret < 0) {
+ ret = br_stub_init_inode_versions(this, fd, fd->inode, version,
+ _gf_true, _gf_false, &ctx_addr);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
+ goto error_return;
+ }
+ }
+
+ c = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+ LOCK(&fd->inode->lock);
+ {
+ if (__br_stub_is_inode_dirty(c))
+ *versioning = _gf_true;
+ if (__br_stub_is_inode_modified(c))
+ *modified = _gf_true;
+ }
+ UNLOCK(&fd->inode->lock);
+
+ if (ctx)
+ *ctx = c;
+ return 0;
+
+error_return:
+ return -1;
+}
+
+static int32_t
+br_stub_anon_fd_ctx(xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ br_stub_fd_t *br_stub_fd = NULL;
+
+ br_stub_fd = br_stub_fd_ctx_get(this, fd);
+ if (!br_stub_fd) {
+ ret = br_stub_add_fd_to_inode(this, fd, ctx);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_INODE,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int
+br_stub_versioning_prep(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ br_stub_inode_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ br_stub_local_t *local = NULL;
+
+ local = br_stub_alloc_local(this);
+ if (!local) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_NO_MEMORY, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
+ goto error_return;
+ }
+
+ if (fd_is_anonymous(fd)) {
+ ret = br_stub_anon_fd_ctx(this, fd, ctx);
+ if (ret)
+ goto free_local;
+ }
+
+ frame->local = local;
+
+ return 0;
+
+free_local:
+ br_stub_dealloc_local(local);
+error_return:
+ return -1;
+}
+
+static int
+br_stub_mark_inode_modified(xlator_t *this, br_stub_local_t *local)
+{
+ fd_t *fd = NULL;
+ int32_t ret = 0;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+ unsigned long version = BITROT_DEFAULT_CURRENT_VERSION;
+
+ fd = local->u.context.fd;
+
+ ret = br_stub_get_inode_ctx(this, fd->inode, &ctx_addr);
+ if (ret < 0) {
+ ret = br_stub_init_inode_versions(this, fd, fd->inode, version,
+ _gf_true, _gf_false, &ctx_addr);
+ if (ret)
+ goto error_return;
+ }
+
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+ LOCK(&fd->inode->lock);
+ {
+ __br_stub_set_inode_modified(ctx);
+ }
+ UNLOCK(&fd->inode->lock);
+
+ return 0;
+
+error_return:
+ return -1;
+}
+
+/**
+ * The possible return values from br_stub_is_bad_object () are:
+ * 1) 0 => as per the inode context object is not bad
+ * 2) -1 => Failed to get the inode context itself
+ * 3) -2 => As per the inode context object is bad
+ * Both -ve values means the fop which called this function is failed
+ * and error is returned upwards.
+ */
+static int
+br_stub_check_bad_object(xlator_t *this, inode_t *inode, int32_t *op_ret,
+ int32_t *op_errno)
+{
+ int ret = -1;
+ unsigned long version = BITROT_DEFAULT_CURRENT_VERSION;
+
+ ret = br_stub_is_bad_object(this, inode);
+ if (ret == -2) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJECT_ACCESS,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+ *op_ret = -1;
+ *op_errno = EIO;
+ }
+
+ if (ret == -1) {
+ ret = br_stub_init_inode_versions(this, NULL, inode, version, _gf_true,
+ _gf_false, NULL);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(inode->gfid), NULL);
+ *op_ret = -1;
+ *op_errno = EINVAL;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * callback for inode/fd versioning
+ */
+int
+br_stub_fd_incversioning_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ fd_t *fd = NULL;
+ inode_t *inode = NULL;
+ unsigned long version = 0;
+ br_stub_local_t *local = NULL;
+
+ local = (br_stub_local_t *)frame->local;
+ if (op_ret < 0)
+ goto done;
+ fd = local->u.context.fd;
+ inode = local->u.context.inode;
+ version = local->u.context.version;
+
+ op_ret = br_stub_mod_inode_versions(this, fd, inode, version);
+ if (op_ret < 0)
+ op_errno = EINVAL;
+
+done:
+ if (op_ret < 0) {
+ frame->local = NULL;
+ call_unwind_error(local->fopstub, -1, op_errno);
+ br_stub_cleanup_local(local);
+ br_stub_dealloc_local(local);
+ } else {
+ call_resume(local->fopstub);
+ }
+ return 0;
+}
+
+/**
+ * Initial object versioning
+ *
+ * Version persists two (2) extended attributes as explained below:
+ * 1. Current (ongoing) version: This is incremented on an writev ()
+ * or truncate () and is the running version for an object.
+ * 2. Signing version: This is the version against which an object
+ * was signed (checksummed).
+ *
+ * During initial versioning, both ongoing and signing versions are
+ * set of one and zero respectively. A write() call increments the
+ * ongoing version as an indication of modification to the object.
+ * Additionally this needs to be persisted on disk and needs to be
+ * durable: fsync().. :-/
+ * As an optimization only the first write() synchronizes the ongoing
+ * version to disk, subsequent write()s before the *last* release()
+ * are no-op's.
+ *
+ * create(), just like lookup() initializes the object versions to
+ * the default. As an optimization this is not a durable operation:
+ * in case of a crash, hard reboot etc.. absence of versioning xattrs
+ * is ignored in scrubber along with the one time crawler explicitly
+ * triggering signing for such objects.
+ *
+ * c.f. br_stub_writev() / br_stub_truncate()
+ */
+
+/**
+ * perform full or incremental versioning on an inode pointd by an
+ * fd. incremental versioning is done when an inode is dirty and a
+ * writeback is triggered.
+ */
+
+int
+br_stub_fd_versioning(xlator_t *this, call_frame_t *frame, call_stub_t *stub,
+ dict_t *dict, fd_t *fd, br_stub_version_cbk *callback,
+ unsigned long memversion, int versioningtype, int durable)
+{
+ int32_t ret = -1;
+ int flags = 0;
+ dict_t *xdata = NULL;
+ br_stub_local_t *local = NULL;
+
+ xdata = dict_new();
+ if (!xdata)
+ goto done;
+
+ ret = dict_set_int32(xdata, GLUSTERFS_INTERNAL_FOP_KEY, 1);
+ if (ret)
+ goto dealloc_xdata;
+
+ if (durable) {
+ ret = dict_set_int32(xdata, GLUSTERFS_DURABLE_OP, 0);
+ if (ret)
+ goto dealloc_xdata;
+ }
+
+ local = frame->local;
+
+ br_stub_fill_local(local, stub, fd, fd->inode, fd->inode->gfid,
+ versioningtype, memversion);
+
+ STACK_WIND(frame, callback, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+
+ ret = 0;
+
+dealloc_xdata:
+ dict_unref(xdata);
+done:
+ return ret;
+}
+
+static int
+br_stub_perform_incversioning(xlator_t *this, call_frame_t *frame,
+ call_stub_t *stub, fd_t *fd,
+ br_stub_inode_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ br_version_t *obuf = NULL;
+ unsigned long writeback_version = 0;
+ int op_errno = 0;
+ br_stub_local_t *local = NULL;
+
+ op_errno = EINVAL;
+ local = frame->local;
+
+ writeback_version = __br_stub_writeback_version(ctx);
+
+ op_errno = ENOMEM;
+ dict = dict_new();
+ if (!dict)
+ goto out;
+ ret = br_stub_alloc_versions(&obuf, NULL, 0);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto out;
+ }
+ ret = br_stub_prepare_version_request(this, dict, obuf, writeback_version);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_VERSION_PREPARE_FAIL,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ br_stub_dealloc_versions(obuf);
+ goto out;
+ }
+
+ ret = br_stub_fd_versioning(
+ this, frame, stub, dict, fd, br_stub_fd_incversioning_cbk,
+ writeback_version, BR_STUB_INCREMENTAL_VERSIONING, !WRITEBACK_DURABLE);
+out:
+ if (dict)
+ dict_unref(dict);
+ if (ret) {
+ if (local)
+ frame->local = NULL;
+ call_unwind_error(stub, -1, op_errno);
+ if (local) {
+ br_stub_cleanup_local(local);
+ br_stub_dealloc_local(local);
+ }
+ }
+
+ return ret;
+}
+
+/** {{{ */
+
+/* fsetxattr() */
+
+int32_t
+br_stub_perform_objsign(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
+{
+ STACK_WIND(frame, default_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+
+ dict_unref(xdata);
+ return 0;
+}
+
+void *
+br_stub_signth(void *arg)
+{
+ xlator_t *this = arg;
+ br_stub_private_t *priv = this->private;
+ struct br_stub_signentry *sigstub = NULL;
+
+ THIS = this;
+ while (1) {
+ /*
+ * Disabling bit-rot feature leads to this particular thread
+ * getting cleaned up by reconfigure via a call to the function
+ * gf_thread_cleanup_xint (which in turn calls pthread_cancel
+ * and pthread_join). But, if this thread had held the mutex
+ * &priv->lock at the time of cancellation, then it leads to
+ * deadlock in future when bit-rot feature is enabled (which
+ * again spawns this thread which cant hold the lock as the
+ * mutex is still held by the previous instance of the thread
+ * which got killed). Also, the br_stub_handle_object_signature
+ * function which is called whenever file has to be signed
+ * also gets blocked as it too attempts to acquire &priv->lock.
+ *
+ * So, arrange for the lock to be unlocked as part of the
+ * cleanup of this thread using pthread_cleanup_push and
+ * pthread_cleanup_pop.
+ */
+ pthread_cleanup_push(br_stub_lock_cleaner, &priv->lock);
+ pthread_mutex_lock(&priv->lock);
+ {
+ while (list_empty(&priv->squeue))
+ pthread_cond_wait(&priv->cond, &priv->lock);
+
+ sigstub = list_first_entry(&priv->squeue, struct br_stub_signentry,
+ list);
+ list_del_init(&sigstub->list);
+ }
+ pthread_mutex_unlock(&priv->lock);
+ pthread_cleanup_pop(0);
+
+ call_resume(sigstub->stub);
+
+ GF_FREE(sigstub);
+ }
+
+ return NULL;
+}
+
+static gf_boolean_t
+br_stub_internal_xattr(dict_t *dict)
+{
+ if (dict_get(dict, GLUSTERFS_SET_OBJECT_SIGNATURE) ||
+ dict_get(dict, GLUSTERFS_GET_OBJECT_SIGNATURE) ||
+ dict_get(dict, BR_REOPEN_SIGN_HINT_KEY) ||
+ dict_get(dict, BITROT_OBJECT_BAD_KEY) ||
+ dict_get(dict, BITROT_SIGNING_VERSION_KEY) ||
+ dict_get(dict, BITROT_CURRENT_VERSION_KEY))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+int
+orderq(struct list_head *elem1, struct list_head *elem2)
+{
+ struct br_stub_signentry *s1 = NULL;
+ struct br_stub_signentry *s2 = NULL;
+
+ s1 = list_entry(elem1, struct br_stub_signentry, list);
+ s2 = list_entry(elem2, struct br_stub_signentry, list);
+
+ return (s1->v > s2->v);
+}
+
+static int
+br_stub_compare_sign_version(xlator_t *this, inode_t *inode,
+ br_signature_t *sbuf, dict_t *dict,
+ int *fakesuccess)
+{
+ int32_t ret = -1;
+ uint64_t tmp_ctx = 0;
+ gf_boolean_t invalid = _gf_false;
+ br_stub_inode_ctx_t *ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO("bit-rot-stub", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, sbuf, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+ ret = br_stub_get_inode_ctx(this, inode, &tmp_ctx);
+ if (ret) {
+ dict_del(dict, BITROT_SIGNING_VERSION_KEY);
+ goto out;
+ }
+
+ ctx = (br_stub_inode_ctx_t *)(long)tmp_ctx;
+
+ LOCK(&inode->lock);
+ {
+ if (ctx->currentversion < sbuf->signedversion) {
+ invalid = _gf_true;
+ } else if (ctx->currentversion > sbuf->signedversion) {
+ gf_msg_debug(this->name, 0,
+ "\"Signing version\" "
+ "(%lu) lower than \"Current version \" "
+ "(%lu)",
+ ctx->currentversion, sbuf->signedversion);
+ *fakesuccess = 1;
+ }
+ }
+ UNLOCK(&inode->lock);
+
+ if (invalid) {
+ ret = -1;
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_VERSION_ERROR,
+ "Signing-ver=%lu", sbuf->signedversion, "current-ver=%lu",
+ ctx->currentversion, NULL);
+ }
+
+out:
+ return ret;
+}
+
+static int
+br_stub_prepare_signature(xlator_t *this, dict_t *dict, inode_t *inode,
+ br_isignature_t *sign, int *fakesuccess)
+{
+ int32_t ret = -1;
+ size_t signaturelen = 0;
+ br_signature_t *sbuf = NULL;
+
+ if (!br_is_signature_type_valid(sign->signaturetype))
+ goto out;
+
+ signaturelen = sign->signaturelen;
+ ret = br_stub_alloc_versions(NULL, &sbuf, signaturelen);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+ ret = -1;
+ goto out;
+ }
+ ret = br_stub_prepare_signing_request(dict, sbuf, sign, signaturelen);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SIGN_PREPARE_FAIL,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+ ret = -1;
+ br_stub_dealloc_versions(sbuf);
+ goto out;
+ }
+
+ /* At this point sbuf has been added to dict, so the memory will be freed
+ * when the data from the dict is destroyed
+ */
+ ret = br_stub_compare_sign_version(this, inode, sbuf, dict, fakesuccess);
+out:
+ return ret;
+}
+
+static void
+br_stub_handle_object_signature(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, br_isignature_t *sign,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ int fakesuccess = 0;
+ br_stub_private_t *priv = NULL;
+ struct br_stub_signentry *sigstub = NULL;
+
+ priv = this->private;
+
+ if (frame->root->pid != GF_CLIENT_PID_BITD) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno, BRS_MSG_NON_BITD_PID,
+ "PID=%d", frame->root->pid, NULL);
+ goto dofop;
+ }
+
+ ret = br_stub_prepare_signature(this, dict, fd->inode, sign, &fakesuccess);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SIGN_PREPARE_FAIL,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto dofop;
+ }
+ if (fakesuccess) {
+ op_ret = op_errno = 0;
+ goto dofop;
+ }
+
+ dict_del(dict, GLUSTERFS_SET_OBJECT_SIGNATURE);
+
+ ret = -1;
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata)
+ goto dofop;
+ } else {
+ dict_ref(xdata);
+ }
+
+ ret = dict_set_int32(xdata, GLUSTERFS_DURABLE_OP, 0);
+ if (ret)
+ goto unref_dict;
+
+ /* prepare dispatch stub to order object signing */
+ sigstub = GF_CALLOC(1, sizeof(*sigstub), gf_br_stub_mt_sigstub_t);
+ if (!sigstub)
+ goto unref_dict;
+
+ INIT_LIST_HEAD(&sigstub->list);
+ sigstub->v = ntohl(sign->signedversion);
+ sigstub->stub = fop_fsetxattr_stub(frame, br_stub_perform_objsign, fd, dict,
+ 0, xdata);
+ if (!sigstub->stub)
+ goto cleanup_stub;
+
+ pthread_mutex_lock(&priv->lock);
+ {
+ list_add_order(&sigstub->list, &priv->squeue, orderq);
+ pthread_cond_signal(&priv->cond);
+ }
+ pthread_mutex_unlock(&priv->lock);
+
+ return;
+
+cleanup_stub:
+ GF_FREE(sigstub);
+unref_dict:
+ dict_unref(xdata);
+dofop:
+ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL);
+}
+
+int32_t
+br_stub_fsetxattr_resume(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int32_t ret = -1;
+ br_stub_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+
+ ret = br_stub_mark_inode_modified(this, local);
+ if (ret) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata);
+
+ br_stub_cleanup_local(local);
+ br_stub_dealloc_local(local);
+
+ return 0;
+}
+
+/**
+ * Handles object reopens. Object reopens can be of 3 types. 2 are from
+ * oneshot crawler and 1 from the regular signer.
+ * ONESHOT CRAWLER:
+ * For those objects which were created before bitrot was enabled. oneshow
+ * crawler crawls the namespace and signs all the objects. It has to do
+ * the versioning before making bit-rot-stub send a sign notification.
+ * So it sends fsetxattr with BR_OBJECT_REOPEN as the value. And bit-rot-stub
+ * upon getting BR_OBJECT_REOPEN value checks if the version has to be
+ * increased or not. By default the version will be increased. But if the
+ * object is modified before BR_OBJECT_REOPEN from oneshot crawler, then
+ * versioning need not be done. In that case simply a success is returned.
+ * SIGNER:
+ * Signer wait for 2 minutes upon getting the notification from bit-rot-stub
+ * and then it sends a dummy write (in reality a fsetxattr) call, to change
+ * the state of the inode from REOPEN_WAIT to SIGN_QUICK. The funny part here
+ * is though the inode's state is REOPEN_WAIT, the call sent by signer is
+ * BR_OBJECT_RESIGN. Once the state is changed to SIGN_QUICK, then yet another
+ * notification is sent upon release (RESIGN would have happened via fsetxattr,
+ * so a fd is needed) and the object is signed truly this time.
+ * There is a challenge in the above RESIGN method by signer. After sending
+ * the 1st notification, the inode could be forgotten before RESIGN request
+ * is received. In that case, the inode's context (the newly looked up inode)
+ * would not indicate the inode as being modified (it would be in the default
+ * state) and because of this, a SIGN_QUICK notification to truly sign the
+ * object would not be sent. So, this is how its handled.
+ * if (request == RESIGN) {
+ * if (inode->sign_info == NORMAL) {
+ * mark_inode_non_dirty;
+ * mark_inode_modified;
+ * }
+ * GOBACK (means unwind without doing versioning)
+ * }
+ */
+static void
+br_stub_handle_object_reopen(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ uint32_t val)
+{
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ call_stub_t *stub = NULL;
+ gf_boolean_t inc_version = _gf_false;
+ gf_boolean_t modified = _gf_false;
+ br_stub_inode_ctx_t *ctx = NULL;
+ br_stub_local_t *local = NULL;
+ gf_boolean_t goback = _gf_true;
+
+ ret = br_stub_need_versioning(this, fd, &inc_version, &modified, &ctx);
+ if (ret)
+ goto unwind;
+
+ LOCK(&fd->inode->lock);
+ {
+ if ((val == BR_OBJECT_REOPEN) && inc_version)
+ goback = _gf_false;
+ if (val == BR_OBJECT_RESIGN && ctx->info_sign == BR_SIGN_NORMAL) {
+ __br_stub_mark_inode_synced(ctx);
+ __br_stub_set_inode_modified(ctx);
+ }
+ (void)__br_stub_inode_sign_state(ctx, GF_FOP_FSETXATTR, fd);
+ }
+ UNLOCK(&fd->inode->lock);
+
+ if (goback) {
+ op_ret = op_errno = 0;
+ goto unwind;
+ }
+
+ ret = br_stub_versioning_prep(frame, this, fd, ctx);
+ if (ret)
+ goto unwind;
+ local = frame->local;
+
+ stub = fop_fsetxattr_cbk_stub(frame, br_stub_fsetxattr_resume, 0, 0, NULL);
+ if (!stub) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+ "fsetxattr gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto cleanup_local;
+ }
+
+ (void)br_stub_perform_incversioning(this, frame, stub, fd, ctx);
+ return;
+
+cleanup_local:
+ br_stub_cleanup_local(local);
+ br_stub_dealloc_local(local);
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL);
+}
+
+/**
+ * This function only handles bad file identification. Instead of checking in
+ * fops like open, readv, writev whether the object is bad or not by doing
+ * getxattr calls, better to catch them when scrubber marks it as bad.
+ * So this callback is called only when the fsetxattr is sent by the scrubber
+ * to mark the object as bad.
+ */
+int
+br_stub_fsetxattr_bad_object_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ br_stub_local_t *local = NULL;
+ int32_t ret = -1;
+
+ local = frame->local;
+ frame->local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ /*
+ * What to do if marking the object as bad fails? (i.e. in memory
+ * marking within the inode context. If we are here means fsetxattr
+ * fop has succeeded on disk and the bad object xattr has been set).
+ * We can return failure to scruber, but there is nothing the scrubber
+ * can do with it (it might assume that the on disk setxattr itself has
+ * failed). The main purpose of this operation is to help identify the
+ * bad object by checking the inode context itself (thus avoiding the
+ * necessity of doing a getxattr fop on the disk).
+ *
+ * So as of now, success itself is being returned even though inode
+ * context set operation fails.
+ * In future if there is any change in the policy which can handle this,
+ * then appropriate response should be sent (i.e. success or error).
+ */
+ ret = br_stub_mark_object_bad(this, local->u.context.inode);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_MARK_FAIL,
+ "gfid=%s", uuid_utoa(local->u.context.inode->gfid), NULL);
+
+ ret = br_stub_add(this, local->u.context.inode->gfid);
+
+unwind:
+ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata);
+ br_stub_cleanup_local(local);
+ br_stub_dealloc_local(local);
+ return 0;
+}
+
+static int32_t
+br_stub_handle_bad_object_key(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int flags, dict_t *xdata)
+{
+ br_stub_local_t *local = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ if (frame->root->pid != GF_CLIENT_PID_SCRUB) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_NON_SCRUB_BAD_OBJ_MARK,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto unwind;
+ }
+
+ local = br_stub_alloc_local(this);
+ if (!local) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ALLOC_MEM_FAILED,
+ "fsetxattr gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ br_stub_fill_local(local, NULL, fd, fd->inode, fd->inode->gfid,
+ BR_STUB_NO_VERSIONING, 0);
+ frame->local = local;
+
+ STACK_WIND(frame, br_stub_fsetxattr_bad_object_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+/**
+ * As of now, versioning is done by the stub (though as a setxattr
+ * operation) as part of inode modification operations such as writev,
+ * truncate, ftruncate. And signing is done by BitD by a fsetxattr call.
+ * So any kind of setxattr coming on the versioning and the signing xattr is
+ * not allowed (i.e. BITROT_CURRENT_VERSION_KEY and BITROT_SIGNING_VERSION_KEY).
+ * In future if BitD/scrubber are allowed to change the versioning
+ * xattrs (though I cannot see a reason for it as of now), then the below
+ * function can be modified to block setxattr on version for only applications.
+ *
+ * NOTE: BitD sends sign request on GLUSTERFS_SET_OBJECT_SIGNATURE key.
+ * BITROT_SIGNING_VERSION_KEY is the xattr used to save the signature.
+ *
+ */
+static int32_t
+br_stub_handle_internal_xattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ char *key)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR,
+ "setxattr key=%s", key, "inode-gfid=%s", uuid_utoa(fd->inode->gfid),
+ NULL);
+
+ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+static void
+br_stub_dump_xattr(xlator_t *this, dict_t *dict, int *op_errno)
+{
+ char *format = "(%s:%s)";
+ char *dump = NULL;
+
+ dump = GF_CALLOC(1, BR_STUB_DUMP_STR_SIZE, gf_br_stub_mt_misc);
+ if (!dump) {
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ dict_dump_to_str(dict, dump, BR_STUB_DUMP_STR_SIZE, format);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_INTERNAL_XATTR,
+ "fsetxattr dump=%s", dump, NULL);
+out:
+ if (dump) {
+ GF_FREE(dump);
+ }
+ return;
+}
+
+int
+br_stub_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int flags, dict_t *xdata)
+{
+ int32_t ret = 0;
+ uint32_t val = 0;
+ br_isignature_t *sign = NULL;
+ br_stub_private_t *priv = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ priv = this->private;
+
+ if ((frame->root->pid != GF_CLIENT_PID_BITD &&
+ frame->root->pid != GF_CLIENT_PID_SCRUB) &&
+ br_stub_internal_xattr(dict)) {
+ br_stub_dump_xattr(this, dict, &op_errno);
+ goto unwind;
+ }
+
+ if (!priv->do_versioning)
+ goto wind;
+
+ if (!IA_ISREG(fd->inode->ia_type))
+ goto wind;
+
+ /* object signature request */
+ ret = dict_get_bin(dict, GLUSTERFS_SET_OBJECT_SIGNATURE, (void **)&sign);
+ if (!ret) {
+ gf_msg_debug(this->name, 0, "got SIGNATURE request on %s",
+ uuid_utoa(fd->inode->gfid));
+ br_stub_handle_object_signature(frame, this, fd, dict, sign, xdata);
+ goto done;
+ }
+
+ /* signing xattr */
+ if (dict_get(dict, BITROT_SIGNING_VERSION_KEY)) {
+ br_stub_handle_internal_xattr(frame, this, fd,
+ BITROT_SIGNING_VERSION_KEY);
+ goto done;
+ }
+
+ /* version xattr */
+ if (dict_get(dict, BITROT_CURRENT_VERSION_KEY)) {
+ br_stub_handle_internal_xattr(frame, this, fd,
+ BITROT_CURRENT_VERSION_KEY);
+ goto done;
+ }
+
+ if (dict_get(dict, GLUSTERFS_GET_OBJECT_SIGNATURE)) {
+ br_stub_handle_internal_xattr(frame, this, fd,
+ GLUSTERFS_GET_OBJECT_SIGNATURE);
+ goto done;
+ }
+
+ /* object reopen request */
+ ret = dict_get_uint32(dict, BR_REOPEN_SIGN_HINT_KEY, &val);
+ if (!ret) {
+ br_stub_handle_object_reopen(frame, this, fd, val);
+ goto done;
+ }
+
+ /* handle bad object */
+ if (dict_get(dict, BITROT_OBJECT_BAD_KEY)) {
+ br_stub_handle_bad_object_key(frame, this, fd, dict, flags, xdata);
+ goto done;
+ }
+
+wind:
+ STACK_WIND(frame, default_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL);
+
+done:
+ return 0;
+}
+
+/**
+ * Currently BitD and scrubber are doing fsetxattr to either sign the object
+ * or to mark it as bad. Hence setxattr on any of those keys is denied directly
+ * without checking from where the fop is coming.
+ * Later, if BitD or Scrubber does setxattr of those keys, then appropriate
+ * check has to be added below.
+ */
+int
+br_stub_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ if (br_stub_internal_xattr(dict)) {
+ br_stub_dump_xattr(this, dict, &op_errno);
+ goto unwind;
+ }
+
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
+ loc, dict, flags, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* {f}removexattr() */
+
+int32_t
+br_stub_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ if (!strcmp(BITROT_OBJECT_BAD_KEY, name) ||
+ !strcmp(BITROT_SIGNING_VERSION_KEY, name) ||
+ !strcmp(BITROT_CURRENT_VERSION_KEY, name)) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR,
+ "name=%s", name, "file-path=%s", loc->path, NULL);
+ goto unwind;
+ }
+
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+br_stub_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ if (!strcmp(BITROT_OBJECT_BAD_KEY, name) ||
+ !strcmp(BITROT_SIGNING_VERSION_KEY, name) ||
+ !strcmp(BITROT_CURRENT_VERSION_KEY, name)) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_REMOVE_INTERNAL_XATTR,
+ "name=%s", name, "inode-gfid=%s", uuid_utoa(fd->inode->gfid),
+ NULL);
+ goto unwind;
+ }
+
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(fremovexattr, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* {f}getxattr() */
+
+int
+br_stub_listxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ if (op_ret < 0)
+ goto unwind;
+
+ br_stub_remove_vxattrs(xattr, _gf_true);
+
+unwind:
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, xattr, xdata);
+ return 0;
+}
+
+/**
+ * ONE SHOT CRAWLER from BitD signs the objects that it encounters while
+ * crawling, if the object is identified as stale by the stub. Stub follows
+ * the below logic to mark an object as stale or not.
+ * If the ongoing version and the signed_version match, then the object is not
+ * stale. Just return. Otherwise if they does not match, then it means one
+ * of the below things.
+ * 1) If the inode does not need write back of the version and the sign state is
+ * is NORMAL, then some active i/o is going on the object. So skip it.
+ * A notification will be sent to trigger the sign once the release is
+ * received on the object.
+ * 2) If inode does not need writeback of the version and the sign state is
+ * either reopen wait or quick sign, then it means:
+ * A) BitD restarted and it is not sure whether the object it encountered
+ * while crawling is in its timer wheel or not. Since there is no way to
+ * scan the timer wheel as of now, ONE SHOT CRAWLER just goes ahead and
+ * signs the object. Since the inode does not need writeback, version will
+ * not be incremented and directly the object will be signed.
+ * 3) If the inode needs writeback, then it means the inode was forgotten after
+ * the versioning and it has to be signed now.
+ *
+ * This is the algorithm followed:
+ * if (ongoing_version == signed_version); then
+ * object_is_not_stale;
+ * return;
+ * else; then
+ * if (!inode_needs_writeback && inode_sign_state != NORMAL); then
+ * object_is_stale;
+ * if (inode_needs_writeback); then
+ * object_is_stale;
+ *
+ * For SCRUBBER, no need to check for the sign state and inode writeback.
+ * If the ondisk ongoingversion and the ondisk signed version does not match,
+ * then treat the object as stale.
+ */
+char
+br_stub_is_object_stale(xlator_t *this, call_frame_t *frame, inode_t *inode,
+ br_version_t *obuf, br_signature_t *sbuf)
+{
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+ int32_t ret = -1;
+ char stale = 0;
+
+ if (obuf->ongoingversion == sbuf->signedversion)
+ goto out;
+
+ if (frame->root->pid == GF_CLIENT_PID_SCRUB) {
+ stale = 1;
+ goto out;
+ }
+
+ ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+ goto out;
+ }
+
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+ LOCK(&inode->lock);
+ {
+ if ((!__br_stub_is_inode_dirty(ctx) &&
+ ctx->info_sign != BR_SIGN_NORMAL) ||
+ __br_stub_is_inode_dirty(ctx))
+ stale = 1;
+ }
+ UNLOCK(&inode->lock);
+
+out:
+ return stale;
+}
+
+int
+br_stub_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xattr, dict_t *xdata)
+{
+ int32_t ret = 0;
+ size_t totallen = 0;
+ size_t signaturelen = 0;
+ br_stub_private_t *priv = NULL;
+ br_version_t *obuf = NULL;
+ br_signature_t *sbuf = NULL;
+ br_isignature_out_t *sign = NULL;
+ br_vxattr_status_t status;
+ br_stub_local_t *local = NULL;
+ inode_t *inode = NULL;
+ gf_boolean_t bad_object = _gf_false;
+ gf_boolean_t ver_enabled = _gf_false;
+
+ BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled);
+ priv = this->private;
+
+ if (op_ret < 0)
+ goto unwind;
+ BR_STUB_VER_COND_GOTO(priv, (!ver_enabled), delkeys);
+
+ if (cookie != (void *)BR_STUB_REQUEST_COOKIE)
+ goto unwind;
+
+ local = frame->local;
+ frame->local = NULL;
+ if (!local) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unwind;
+ }
+ inode = local->u.context.inode;
+
+ op_ret = -1;
+ status = br_version_xattr_state(xattr, &obuf, &sbuf, &bad_object);
+
+ op_errno = EIO;
+ if (bad_object)
+ goto delkeys;
+
+ op_errno = EINVAL;
+ if (status == BR_VXATTR_STATUS_INVALID)
+ goto delkeys;
+
+ op_errno = ENODATA;
+ if ((status == BR_VXATTR_STATUS_MISSING) ||
+ (status == BR_VXATTR_STATUS_UNSIGNED))
+ goto delkeys;
+
+ /**
+ * okay.. we have enough information to satisfy the request,
+ * namely: version and signing extended attribute. what's
+ * pending is the signature length -- that's figured out
+ * indirectly via the size of the _whole_ xattr and the
+ * on-disk signing xattr header size.
+ */
+ op_errno = EINVAL;
+ ret = dict_get_uint32(xattr, BITROT_SIGNING_XATTR_SIZE_KEY,
+ (uint32_t *)&signaturelen);
+ if (ret)
+ goto delkeys;
+
+ signaturelen -= sizeof(br_signature_t);
+ totallen = sizeof(br_isignature_out_t) + signaturelen;
+
+ op_errno = ENOMEM;
+ sign = GF_CALLOC(1, totallen, gf_br_stub_mt_signature_t);
+ if (!sign)
+ goto delkeys;
+
+ sign->time[0] = obuf->timebuf[0];
+ sign->time[1] = obuf->timebuf[1];
+
+ /* Object's dirty state & current signed version */
+ sign->version = sbuf->signedversion;
+ sign->stale = br_stub_is_object_stale(this, frame, inode, obuf, sbuf);
+
+ /* Object's signature */
+ sign->signaturelen = signaturelen;
+ sign->signaturetype = sbuf->signaturetype;
+ (void)memcpy(sign->signature, sbuf->signature, signaturelen);
+
+ op_errno = EINVAL;
+ ret = dict_set_bin(xattr, GLUSTERFS_GET_OBJECT_SIGNATURE, (void *)sign,
+ totallen);
+ if (ret < 0) {
+ GF_FREE(sign);
+ goto delkeys;
+ }
+ op_errno = 0;
+ op_ret = totallen;
+
+delkeys:
+ br_stub_remove_vxattrs(xattr, _gf_true);
+
+unwind:
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, xattr, xdata);
+ br_stub_cleanup_local(local);
+ br_stub_dealloc_local(local);
+ return 0;
+}
+
+static void
+br_stub_send_stub_init_time(call_frame_t *frame, xlator_t *this)
+{
+ int op_ret = 0;
+ int op_errno = 0;
+ dict_t *xattr = NULL;
+ br_stub_init_t stub = {
+ {
+ 0,
+ },
+ };
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+
+ xattr = dict_new();
+ if (!xattr) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ stub.timebuf[0] = priv->boot[0];
+ stub.timebuf[1] = priv->boot[1];
+ memcpy(stub.export, priv->export, strlen(priv->export) + 1);
+
+ op_ret = dict_set_static_bin(xattr, GLUSTERFS_GET_BR_STUB_INIT_TIME,
+ (void *)&stub, sizeof(br_stub_init_t));
+ if (op_ret < 0) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ op_ret = sizeof(br_stub_init_t);
+
+unwind:
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, xattr, NULL);
+
+ if (xattr)
+ dict_unref(xattr);
+}
+
+int
+br_stub_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ void *cookie = NULL;
+ static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ fop_getxattr_cbk_t cbk = br_stub_getxattr_cbk;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ br_stub_local_t *local = NULL;
+ br_stub_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, loc, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind);
+
+ if (!name) {
+ cbk = br_stub_listxattr_cbk;
+ goto wind;
+ }
+
+ if (br_stub_is_internal_xattr(name))
+ goto unwind;
+
+ priv = this->private;
+ BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ /**
+ * If xattr is node-uuid and the inode is marked bad, return EIO.
+ * Returning EIO would result in AFR to choose correct node-uuid
+ * corresponding to the subvolume * where the good copy of the
+ * file resides.
+ */
+ if (IA_ISREG(loc->inode->ia_type) && XATTR_IS_NODE_UUID(name) &&
+ br_stub_check_bad_object(this, loc->inode, &op_ret, &op_errno)) {
+ goto unwind;
+ }
+
+ /**
+ * this special extended attribute is allowed only on root
+ */
+ if (name &&
+ (strncmp(name, GLUSTERFS_GET_BR_STUB_INIT_TIME,
+ sizeof(GLUSTERFS_GET_BR_STUB_INIT_TIME) - 1) == 0) &&
+ ((gf_uuid_compare(loc->gfid, rootgfid) == 0) ||
+ (gf_uuid_compare(loc->inode->gfid, rootgfid) == 0))) {
+ BR_STUB_RESET_LOCAL_NULL(frame);
+ br_stub_send_stub_init_time(frame, this);
+ return 0;
+ }
+
+ if (!IA_ISREG(loc->inode->ia_type))
+ goto wind;
+
+ if (name && (strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE,
+ sizeof(GLUSTERFS_GET_OBJECT_SIGNATURE) - 1) == 0)) {
+ cookie = (void *)BR_STUB_REQUEST_COOKIE;
+
+ local = br_stub_alloc_local(this);
+ if (!local) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ br_stub_fill_local(local, NULL, NULL, loc->inode, loc->inode->gfid,
+ BR_STUB_NO_VERSIONING, 0);
+ frame->local = local;
+ }
+
+wind:
+ STACK_WIND_COOKIE(frame, cbk, cookie, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+unwind:
+ BR_STUB_RESET_LOCAL_NULL(frame);
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, NULL, NULL);
+ return 0;
+}
+
+int
+br_stub_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ void *cookie = NULL;
+ static uuid_t rootgfid = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ fop_fgetxattr_cbk_t cbk = br_stub_getxattr_cbk;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ br_stub_local_t *local = NULL;
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!name) {
+ cbk = br_stub_listxattr_cbk;
+ goto wind;
+ }
+
+ if (br_stub_is_internal_xattr(name))
+ goto unwind;
+
+ BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ /**
+ * If xattr is node-uuid and the inode is marked bad, return EIO.
+ * Returning EIO would result in AFR to choose correct node-uuid
+ * corresponding to the subvolume * where the good copy of the
+ * file resides.
+ */
+ if (IA_ISREG(fd->inode->ia_type) && XATTR_IS_NODE_UUID(name) &&
+ br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno)) {
+ goto unwind;
+ }
+
+ /**
+ * this special extended attribute is allowed only on root
+ */
+ if (name &&
+ (strncmp(name, GLUSTERFS_GET_BR_STUB_INIT_TIME,
+ sizeof(GLUSTERFS_GET_BR_STUB_INIT_TIME) - 1) == 0) &&
+ (gf_uuid_compare(fd->inode->gfid, rootgfid) == 0)) {
+ BR_STUB_RESET_LOCAL_NULL(frame);
+ br_stub_send_stub_init_time(frame, this);
+ return 0;
+ }
+
+ if (!IA_ISREG(fd->inode->ia_type))
+ goto wind;
+
+ if (name && (strncmp(name, GLUSTERFS_GET_OBJECT_SIGNATURE,
+ sizeof(GLUSTERFS_GET_OBJECT_SIGNATURE) - 1) == 0)) {
+ cookie = (void *)BR_STUB_REQUEST_COOKIE;
+
+ local = br_stub_alloc_local(this);
+ if (!local) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ br_stub_fill_local(local, NULL, fd, fd->inode, fd->inode->gfid,
+ BR_STUB_NO_VERSIONING, 0);
+ frame->local = local;
+ }
+
+wind:
+ STACK_WIND_COOKIE(frame, cbk, cookie, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
+unwind:
+ BR_STUB_RESET_LOCAL_NULL(frame);
+ STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, NULL, NULL);
+ return 0;
+}
+
+int32_t
+br_stub_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ int32_t ret = -1;
+ br_stub_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, frame, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, fd, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, fd->inode, unwind);
+
+ priv = this->private;
+ if (!priv->do_versioning)
+ goto wind;
+
+ ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno);
+ if (ret)
+ goto unwind;
+
+wind:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, NULL, 0, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+/**
+ * The first write response on the first fd in the list of fds will set
+ * the flag to indicate that the inode is modified. The subsequent write
+ * respnses coming on either the first fd or some other fd will not change
+ * the fd. The inode-modified flag is unset only upon release of all the
+ * fds.
+ */
+int32_t
+br_stub_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t ret = 0;
+ br_stub_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ ret = br_stub_mark_inode_modified(this, local);
+ if (ret) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ br_stub_cleanup_local(local);
+ br_stub_dealloc_local(local);
+
+ return 0;
+}
+
+int32_t
+br_stub_writev_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ STACK_WIND(frame, br_stub_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
+ return 0;
+}
+
+/**
+ * This is probably the most crucial part about the whole versioning thing.
+ * There's absolutely no differentiation as such between an anonymous fd
+ * and a regular fd except the fd context initialization. Object versioning
+ * is performed when the inode is dirty. Parallel write operations are no
+ * special with each write performing object versioning followed by marking
+ * the inode as non-dirty (synced). This is followed by the actual operation
+ * (writev() in this case) which on a success marks the inode as modified.
+ * This prevents signing of objects that have not been modified.
+ */
+int32_t
+br_stub_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ gf_boolean_t inc_version = _gf_false;
+ gf_boolean_t modified = _gf_false;
+ br_stub_inode_ctx_t *ctx = NULL;
+ int32_t ret = -1;
+ fop_writev_cbk_t cbk = default_writev_cbk;
+ br_stub_local_t *local = NULL;
+ br_stub_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, frame, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, fd, unwind);
+
+ priv = this->private;
+ if (!priv->do_versioning)
+ goto wind;
+
+ ret = br_stub_need_versioning(this, fd, &inc_version, &modified, &ctx);
+ if (ret)
+ goto unwind;
+
+ ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno);
+ if (ret)
+ goto unwind;
+
+ /**
+ * The inode is not dirty and also witnessed at least one successful
+ * modification operation. Therefore, subsequent operations need not
+ * perform any special tracking.
+ */
+ if (!inc_version && modified)
+ goto wind;
+
+ /**
+ * okay.. so, either the inode needs versioning or the modification
+ * needs to be tracked. ->cbk is set to the appropriate callback
+ * routine for this.
+ * NOTE: ->local needs to be deallocated on failures from here on.
+ */
+ ret = br_stub_versioning_prep(frame, this, fd, ctx);
+ if (ret)
+ goto unwind;
+
+ local = frame->local;
+ if (!inc_version) {
+ br_stub_fill_local(local, NULL, fd, fd->inode, fd->inode->gfid,
+ BR_STUB_NO_VERSIONING, 0);
+ cbk = br_stub_writev_cbk;
+ goto wind;
+ }
+
+ stub = fop_writev_stub(frame, br_stub_writev_resume, fd, vector, count,
+ offset, flags, iobref, xdata);
+
+ if (!stub) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+ "write gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto cleanup_local;
+ }
+
+ /* Perform Versioning */
+ return br_stub_perform_incversioning(this, frame, stub, fd, ctx);
+
+wind:
+ STACK_WIND(frame, cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
+ fd, vector, count, offset, flags, iobref, xdata);
+ return 0;
+
+cleanup_local:
+ br_stub_cleanup_local(local);
+ br_stub_dealloc_local(local);
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+br_stub_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t ret = -1;
+ br_stub_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ ret = br_stub_mark_inode_modified(this, local);
+ if (ret) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ br_stub_cleanup_local(local);
+ br_stub_dealloc_local(local);
+
+ return 0;
+}
+
+int32_t
+br_stub_ftruncate_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ STACK_WIND(frame, br_stub_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+}
+
+/* c.f. br_stub_writev() for explanation */
+int32_t
+br_stub_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ br_stub_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ gf_boolean_t inc_version = _gf_false;
+ gf_boolean_t modified = _gf_false;
+ br_stub_inode_ctx_t *ctx = NULL;
+ int32_t ret = -1;
+ fop_ftruncate_cbk_t cbk = default_ftruncate_cbk;
+ br_stub_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, frame, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, fd, unwind);
+
+ priv = this->private;
+ if (!priv->do_versioning)
+ goto wind;
+
+ ret = br_stub_need_versioning(this, fd, &inc_version, &modified, &ctx);
+ if (ret)
+ goto unwind;
+
+ ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno);
+ if (ret)
+ goto unwind;
+
+ if (!inc_version && modified)
+ goto wind;
+
+ ret = br_stub_versioning_prep(frame, this, fd, ctx);
+ if (ret)
+ goto unwind;
+
+ local = frame->local;
+ if (!inc_version) {
+ br_stub_fill_local(local, NULL, fd, fd->inode, fd->inode->gfid,
+ BR_STUB_NO_VERSIONING, 0);
+ cbk = br_stub_ftruncate_cbk;
+ goto wind;
+ }
+
+ stub = fop_ftruncate_stub(frame, br_stub_ftruncate_resume, fd, offset,
+ xdata);
+ if (!stub) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+ "ftruncate gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto cleanup_local;
+ }
+
+ return br_stub_perform_incversioning(this, frame, stub, fd, ctx);
+
+wind:
+ STACK_WIND(frame, cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+
+cleanup_local:
+ br_stub_cleanup_local(local);
+ br_stub_dealloc_local(local);
+
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int32_t
+br_stub_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int32_t ret = 0;
+ br_stub_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ ret = br_stub_mark_inode_modified(this, local);
+ if (ret) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ br_stub_cleanup_local(local);
+ br_stub_dealloc_local(local);
+ return 0;
+}
+
+int32_t
+br_stub_truncate_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ br_stub_local_t *local = frame->local;
+
+ fd_unref(local->u.context.fd);
+ STACK_WIND(frame, br_stub_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+}
+
+/**
+ * Bit-rot-stub depends heavily on the fd based operations to for doing
+ * versioning and sending notification. It starts tracking the operation
+ * upon getting first fd based modify operation by doing versioning and
+ * sends notification when last fd using which the inode was modified is
+ * released.
+ * But for truncate there is no fd and hence it becomes difficult to do
+ * the versioning and send notification. It is handled by doing versioning
+ * on an anonymous fd. The fd will be valid till the completion of the
+ * truncate call. It guarantees that release on this anonymous fd will happen
+ * after the truncate call and notification is sent after the truncate call.
+ *
+ * c.f. br_writev_cbk() for explanation
+ */
+int32_t
+br_stub_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ br_stub_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ gf_boolean_t inc_version = _gf_false;
+ gf_boolean_t modified = _gf_false;
+ br_stub_inode_ctx_t *ctx = NULL;
+ int32_t ret = -1;
+ fd_t *fd = NULL;
+ fop_truncate_cbk_t cbk = default_truncate_cbk;
+ br_stub_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, frame, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, loc, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind);
+
+ priv = this->private;
+ if (!priv->do_versioning)
+ goto wind;
+
+ fd = fd_anonymous(loc->inode);
+ if (!fd) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_CREATE_ANONYMOUS_FD_FAILED,
+ "inode-gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
+ goto unwind;
+ }
+
+ ret = br_stub_need_versioning(this, fd, &inc_version, &modified, &ctx);
+ if (ret)
+ goto cleanup_fd;
+
+ ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno);
+ if (ret)
+ goto unwind;
+
+ if (!inc_version && modified)
+ goto wind;
+
+ ret = br_stub_versioning_prep(frame, this, fd, ctx);
+ if (ret)
+ goto cleanup_fd;
+
+ local = frame->local;
+ if (!inc_version) {
+ br_stub_fill_local(local, NULL, fd, fd->inode, fd->inode->gfid,
+ BR_STUB_NO_VERSIONING, 0);
+ cbk = br_stub_truncate_cbk;
+ goto wind;
+ }
+
+ stub = fop_truncate_stub(frame, br_stub_truncate_resume, loc, offset,
+ xdata);
+ if (!stub) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_STUB_ALLOC_FAILED,
+ "truncate gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto cleanup_local;
+ }
+
+ return br_stub_perform_incversioning(this, frame, stub, fd, ctx);
+
+wind:
+ STACK_WIND(frame, cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate,
+ loc, offset, xdata);
+ if (fd)
+ fd_unref(fd);
+ return 0;
+
+cleanup_local:
+ br_stub_cleanup_local(local);
+ br_stub_dealloc_local(local);
+cleanup_fd:
+ fd_unref(fd);
+unwind:
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* open() */
+
+/**
+ * It's probably worth mentioning a bit about why some of the housekeeping
+ * work is done in open() call path, rather than the callback path.
+ * Two (or more) open()'s in parallel can race and lead to a situation
+ * where a release() gets triggered (possibly after a series of write()
+ * calls) when *other* open()'s have still not reached callback path
+ * thereby having an active fd on an inode that is in process of getting
+ * signed with the current version.
+ *
+ * Maintaining fd list in the call path ensures that a release() would
+ * not be triggered if an open() call races ahead (followed by a close())
+ * threby finding non-empty fd list.
+ */
+
+int
+br_stub_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ int32_t ret = -1;
+ br_stub_inode_ctx_t *ctx = NULL;
+ uint64_t ctx_addr = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ br_stub_private_t *priv = NULL;
+ unsigned long version = BITROT_DEFAULT_CURRENT_VERSION;
+
+ GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, loc, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, fd, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, fd->inode, unwind);
+
+ priv = this->private;
+
+ if (!priv->do_versioning)
+ goto wind;
+
+ ret = br_stub_get_inode_ctx(this, fd->inode, &ctx_addr);
+ if (ret) {
+ ret = br_stub_init_inode_versions(this, fd, fd->inode, version,
+ _gf_true, _gf_false, &ctx_addr);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ BRS_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto unwind;
+ }
+ }
+
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+ ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno);
+ if (ret)
+ goto unwind;
+
+ if (frame->root->pid == GF_CLIENT_PID_SCRUB)
+ goto wind;
+
+ if (flags == O_RDONLY)
+ goto wind;
+
+ ret = br_stub_add_fd_to_inode(this, fd, ctx);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_ADD_FD_TO_LIST_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto unwind;
+ }
+
+wind:
+ STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, NULL, NULL);
+ return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* creat() */
+
+/**
+ * This routine registers a release callback for the given fd and adds the
+ * fd to the inode context fd tracking list.
+ */
+int32_t
+br_stub_add_fd_to_inode(xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ br_stub_fd_t *br_stub_fd = NULL;
+
+ ret = br_stub_require_release_call(this, fd, &br_stub_fd);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_SET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto out;
+ }
+
+ LOCK(&fd->inode->lock);
+ {
+ list_add_tail(&ctx->fd_list, &br_stub_fd->list);
+ }
+ UNLOCK(&fd->inode->lock);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+br_stub_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t ret = 0;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+ unsigned long version = BITROT_DEFAULT_CURRENT_VERSION;
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ if (!priv->do_versioning)
+ goto unwind;
+
+ ret = br_stub_get_inode_ctx(this, fd->inode, &ctx_addr);
+ if (ret < 0) {
+ ret = br_stub_init_inode_versions(this, fd, inode, version, _gf_true,
+ _gf_false, &ctx_addr);
+ if (ret) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+ } else {
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+ ret = br_stub_add_fd_to_inode(this, fd, ctx);
+ }
+
+unwind:
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, stbuf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+br_stub_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, loc, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, fd, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, fd->inode, unwind);
+
+ STACK_WIND(frame, br_stub_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+int
+br_stub_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+ unsigned long version = BITROT_DEFAULT_CURRENT_VERSION;
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ if (!priv->do_versioning)
+ goto unwind;
+
+ ret = br_stub_init_inode_versions(this, NULL, inode, version, _gf_true,
+ _gf_false, NULL);
+ /**
+ * Like lookup, if init_inode_versions fail, return EINVAL
+ */
+ if (ret) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int
+br_stub_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata)
+{
+ GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, loc, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind);
+
+ STACK_WIND(frame, br_stub_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, dev, umask, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(mknod, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+/** }}} */
+
+/**
+ * As of now, only lookup searches for bad object xattr and marks the
+ * object as bad in its inode context if the xattr is present. But there
+ * is a possibility that, at the time of the lookup the object was not
+ * marked bad (i.e. bad object xattr was not set), and later its marked
+ * as bad. In this case, object is not bad, so when a fop such as open or
+ * readv or writev comes on the object, the fop will be sent downward instead
+ * of sending as error upwards.
+ * The solution for this is to do a getxattr for the below list of fops.
+ * lookup, readdirp, open, readv, writev.
+ * But doing getxattr for each of the above fops might be costly.
+ * So another method followed is to catch the bad file marking by the scrubber
+ * and set that info within the object's inode context. In this way getxattr
+ * calls can be avoided and bad objects can be caught instantly. Fetching the
+ * xattr is needed only in lookups when there is a brick restart or inode
+ * forget.
+ *
+ * If the dict (@xattr) is NULL, then how should that be handled? Fail the
+ * lookup operation? Or let it continue with version being initialized to
+ * BITROT_DEFAULT_CURRENT_VERSION. But what if the version was different
+ * on disk (and also a right signature was there), but posix failed to
+ * successfully allocate the dict? Posix does not treat call back xdata
+ * creattion failure as the lookup failure.
+ */
+static int32_t
+br_stub_lookup_version(xlator_t *this, uuid_t gfid, inode_t *inode,
+ dict_t *xattr)
+{
+ unsigned long version = 0;
+ br_version_t *obuf = NULL;
+ br_signature_t *sbuf = NULL;
+ br_vxattr_status_t status;
+ gf_boolean_t bad_object = _gf_false;
+
+ /**
+ * versioning xattrs were requested from POSIX. if available, figure
+ * out the correct version to use in the inode context (start with
+ * the default version if unavailable). As of now versions are not
+ * persisted on-disk. The inode is marked dirty, so that the first
+ * operation (such as write(), etc..) triggers synchronization to
+ * disk.
+ */
+ status = br_version_xattr_state(xattr, &obuf, &sbuf, &bad_object);
+ version = ((status == BR_VXATTR_STATUS_FULL) ||
+ (status == BR_VXATTR_STATUS_UNSIGNED))
+ ? obuf->ongoingversion
+ : BITROT_DEFAULT_CURRENT_VERSION;
+
+ /**
+ * If signature is there, but version is not there then that status is
+ * is treated as INVALID. So in that case, we should not initialize the
+ * inode context with wrong version names etc.
+ */
+ if (status == BR_VXATTR_STATUS_INVALID)
+ return -1;
+
+ return br_stub_init_inode_versions(this, NULL, inode, version, _gf_true,
+ bad_object, NULL);
+}
+
+/** {{{ */
+
+int32_t
+br_stub_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ br_stub_private_t *priv = NULL;
+ br_stub_fd_t *fd_ctx = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ priv = this->private;
+ if (gf_uuid_compare(fd->inode->gfid, priv->bad_object_dir_gfid))
+ goto normal;
+
+ fd_ctx = br_stub_fd_new();
+ if (!fd_ctx) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ fd_ctx->bad_object.dir_eof = -1;
+ fd_ctx->bad_object.dir = sys_opendir(priv->stub_basepath);
+ if (!fd_ctx->bad_object.dir) {
+ op_errno = errno;
+ goto err_freectx;
+ }
+
+ op_ret = br_stub_fd_ctx_set(this, fd, fd_ctx);
+ if (!op_ret)
+ goto unwind;
+
+ sys_closedir(fd_ctx->bad_object.dir);
+
+err_freectx:
+ GF_FREE(fd_ctx);
+unwind:
+ STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, NULL);
+ return 0;
+
+normal:
+ STACK_WIND(frame, default_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+}
+
+int32_t
+br_stub_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv->do_versioning)
+ goto out;
+
+ if (gf_uuid_compare(fd->inode->gfid, priv->bad_object_dir_gfid))
+ goto out;
+ stub = fop_readdir_stub(frame, br_stub_readdir_wrapper, fd, size, off,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
+ br_stub_worker_enqueue(this, stub);
+ return 0;
+out:
+ STACK_WIND(frame, default_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata);
+ return 0;
+}
+
+int
+br_stub_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *dict)
+{
+ int32_t ret = 0;
+ uint64_t ctxaddr = 0;
+ gf_dirent_t *entry = NULL;
+ br_stub_private_t *priv = NULL;
+ gf_boolean_t ver_enabled = _gf_false;
+
+ BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled);
+ priv = this->private;
+ BR_STUB_VER_COND_GOTO(priv, (!ver_enabled), unwind);
+
+ if (op_ret < 0)
+ goto unwind;
+
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ if ((strcmp(entry->d_name, ".") == 0) ||
+ (strcmp(entry->d_name, "..") == 0))
+ continue;
+
+ if (!IA_ISREG(entry->d_stat.ia_type))
+ continue;
+
+ /*
+ * Readdirp for most part is a bulk lookup for all the entries
+ * present in the directory being read. Ideally, for each
+ * entry, the handling should be similar to that of a lookup
+ * callback. But for now, just keeping this as it has been
+ * until now (which means, this comment has been added much
+ * later as part of a change that wanted to send the flag
+ * of true/false to br_stub_remove_vxattrs to indicate whether
+ * the bad-object xattr should be removed from the entry->dict
+ * or not). Until this change, the function br_stub_remove_vxattrs
+ * was just removing all the xattrs associated with bit-rot-stub
+ * (like version, bad-object, signature etc). But, there are
+ * scenarios where we only want to send bad-object xattr and not
+ * others. So this comment is part of that change which also
+ * mentions about another possible change that might be needed
+ * in future.
+ * But for now, adding _gf_true means functionally its same as
+ * what this function was doing before. Just remove all the stub
+ * related xattrs.
+ */
+ ret = br_stub_get_inode_ctx(this, entry->inode, &ctxaddr);
+ if (ret < 0)
+ ctxaddr = 0;
+ if (ctxaddr) { /* already has the context */
+ br_stub_remove_vxattrs(entry->dict, _gf_true);
+ continue;
+ }
+
+ ret = br_stub_lookup_version(this, entry->inode->gfid, entry->inode,
+ entry->dict);
+ br_stub_remove_vxattrs(entry->dict, _gf_true);
+ if (ret) {
+ /**
+ * there's no per-file granularity support in case of
+ * failure. let's fail the entire request for now..
+ */
+ break;
+ }
+ }
+
+ if (ret) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, dict);
+
+ return 0;
+}
+
+int
+br_stub_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ int32_t ret = -1;
+ int op_errno = 0;
+ gf_boolean_t xref = _gf_false;
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+ BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ op_errno = ENOMEM;
+ if (!dict) {
+ dict = dict_new();
+ if (!dict)
+ goto unwind;
+ } else {
+ dict = dict_ref(dict);
+ }
+
+ xref = _gf_true;
+
+ op_errno = EINVAL;
+ ret = dict_set_uint32(dict, BITROT_CURRENT_VERSION_KEY, 0);
+ if (ret)
+ goto unwind;
+ ret = dict_set_uint32(dict, BITROT_SIGNING_VERSION_KEY, 0);
+ if (ret)
+ goto unwind;
+ ret = dict_set_uint32(dict, BITROT_OBJECT_BAD_KEY, 0);
+ if (ret)
+ goto unwind;
+
+wind:
+ STACK_WIND(frame, br_stub_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict);
+ goto unref_dict;
+
+unwind:
+ if (frame->local == (void *)0x1)
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(readdirp, frame, -1, op_errno, NULL, NULL);
+ return 0;
+
+unref_dict:
+ if (xref)
+ dict_unref(dict);
+ return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* lookup() */
+
+/**
+ * This function mainly handles the ENOENT error for the bad objects. Though
+ * br_stub_forget () handles removal of the link for the bad object from the
+ * quarantine directory, its better to handle it in lookup as well, where
+ * a failed lookup on a bad object with ENOENT, will trigger deletion of the
+ * link for the bad object from quarantine directory. So whoever comes first
+ * either forget () or lookup () will take care of removing the link.
+ */
+void
+br_stub_handle_lookup_error(xlator_t *this, inode_t *inode, int32_t op_errno)
+{
+ int32_t ret = -1;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+
+ if (op_errno != ENOENT)
+ goto out;
+
+ if (!inode_is_linked(inode))
+ goto out;
+
+ ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
+ if (ret)
+ goto out;
+
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+ LOCK(&inode->lock);
+ {
+ if (__br_stub_is_bad_object(ctx))
+ (void)br_stub_del(this, inode->gfid);
+ }
+ UNLOCK(&inode->lock);
+
+ if (__br_stub_is_bad_object(ctx)) {
+ /* File is not present, might be deleted for recovery,
+ * del the bitrot inode context
+ */
+ ctx_addr = 0;
+ inode_ctx_del(inode, this, &ctx_addr);
+ if (ctx_addr) {
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+ GF_FREE(ctx);
+ }
+ }
+
+out:
+ return;
+}
+
+int
+br_stub_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, inode_t *inode, struct iatt *stbuf,
+ dict_t *xattr, struct iatt *postparent)
+{
+ int32_t ret = 0;
+ br_stub_private_t *priv = NULL;
+ gf_boolean_t ver_enabled = _gf_false;
+ gf_boolean_t remove_bad_file_marker = _gf_true;
+
+ BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled);
+ priv = this->private;
+
+ if (op_ret < 0) {
+ (void)br_stub_handle_lookup_error(this, inode, op_errno);
+
+ /*
+ * If the lookup error is not ENOENT, then it is better
+ * to send the bad file marker to the higher layer (if
+ * it has been set)
+ */
+ if (op_errno != ENOENT)
+ remove_bad_file_marker = _gf_false;
+ goto delkey;
+ }
+
+ BR_STUB_VER_COND_GOTO(priv, (!ver_enabled), delkey);
+
+ if (!IA_ISREG(stbuf->ia_type))
+ goto unwind;
+
+ /**
+ * If the object is bad, then "bad inode" marker has to be sent back
+ * in resoinse, for revalidated lookups as well. Some xlators such as
+ * quick-read might cache the data in revalidated lookup as fresh
+ * lookup would anyway have sent "bad inode" marker.
+ * In general send bad inode marker for every lookup operation on the
+ * bad object.
+ */
+ if (cookie != (void *)BR_STUB_REQUEST_COOKIE) {
+ ret = br_stub_mark_xdata_bad_object(this, inode, xattr);
+ if (ret) {
+ op_ret = -1;
+ op_errno = EIO;
+ /*
+ * This flag ensures that in the label @delkey below,
+ * bad file marker is not removed from the dictinary,
+ * but other virtual xattrs (such as version, signature)
+ * are removed.
+ */
+ remove_bad_file_marker = _gf_false;
+ }
+ goto delkey;
+ }
+
+ ret = br_stub_lookup_version(this, stbuf->ia_gfid, inode, xattr);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto delkey;
+ }
+
+ /**
+ * If the object is bad, send "bad inode" marker back in response
+ * for xlator(s) to act accordingly (such as quick-read, etc..)
+ */
+ ret = br_stub_mark_xdata_bad_object(this, inode, xattr);
+ if (ret) {
+ /**
+ * aaha! bad object, but sorry we would not
+ * satisfy the request on allocation failures.
+ */
+ op_ret = -1;
+ op_errno = EIO;
+ goto delkey;
+ }
+
+delkey:
+ br_stub_remove_vxattrs(xattr, remove_bad_file_marker);
+unwind:
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+ postparent);
+
+ return 0;
+}
+
+int
+br_stub_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int32_t ret = 0;
+ int op_errno = 0;
+ void *cookie = NULL;
+ uint64_t ctx_addr = 0;
+ gf_boolean_t xref = _gf_false;
+ br_stub_private_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ GF_VALIDATE_OR_GOTO("bit-rot-stub", this, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, loc, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, unwind);
+
+ priv = this->private;
+
+ BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ if (!gf_uuid_compare(loc->gfid, priv->bad_object_dir_gfid) ||
+ !gf_uuid_compare(loc->pargfid, priv->bad_object_dir_gfid)) {
+ stub = fop_lookup_stub(frame, br_stub_lookup_wrapper, loc, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ br_stub_worker_enqueue(this, stub);
+ return 0;
+ }
+
+ ret = br_stub_get_inode_ctx(this, loc->inode, &ctx_addr);
+ if (ret < 0)
+ ctx_addr = 0;
+ if (ctx_addr != 0)
+ goto wind;
+
+ /**
+ * fresh lookup: request version keys from POSIX
+ */
+ op_errno = ENOMEM;
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata)
+ goto unwind;
+ } else {
+ xdata = dict_ref(xdata);
+ }
+
+ xref = _gf_true;
+
+ /**
+ * Requesting both xattrs provides a way of sanity checking the
+ * object. Anomaly checking is done in cbk by examining absence
+ * of either or both xattrs.
+ */
+ op_errno = EINVAL;
+ ret = dict_set_uint32(xdata, BITROT_CURRENT_VERSION_KEY, 0);
+ if (ret)
+ goto unwind;
+ ret = dict_set_uint32(xdata, BITROT_SIGNING_VERSION_KEY, 0);
+ if (ret)
+ goto unwind;
+ ret = dict_set_uint32(xdata, BITROT_OBJECT_BAD_KEY, 0);
+ if (ret)
+ goto unwind;
+ cookie = (void *)BR_STUB_REQUEST_COOKIE;
+
+wind:
+ STACK_WIND_COOKIE(frame, br_stub_lookup_cbk, cookie, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ goto dealloc_dict;
+
+unwind:
+ if (frame->local == (void *)0x1)
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+dealloc_dict:
+ if (xref)
+ dict_unref(xdata);
+ return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* stat */
+int
+br_stub_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int32_t ret = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!priv->do_versioning)
+ goto wind;
+
+ if (!IA_ISREG(loc->inode->ia_type))
+ goto wind;
+
+ ret = br_stub_check_bad_object(this, loc->inode, &op_ret, &op_errno);
+ if (ret)
+ goto unwind;
+
+wind:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat,
+ loc, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, NULL, NULL);
+ return 0;
+}
+
+/* fstat */
+int
+br_stub_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int32_t ret = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+
+ if (!priv->do_versioning)
+ goto wind;
+
+ if (!IA_ISREG(fd->inode->ia_type))
+ goto wind;
+
+ ret = br_stub_check_bad_object(this, fd->inode, &op_ret, &op_errno);
+ if (ret)
+ goto unwind;
+
+wind:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat,
+ fd, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, NULL, NULL);
+ return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* unlink() */
+
+int
+br_stub_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ br_stub_local_t *local = NULL;
+ inode_t *inode = NULL;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+ int32_t ret = -1;
+ br_stub_private_t *priv = NULL;
+ gf_boolean_t ver_enabled = _gf_false;
+
+ BR_STUB_VER_ENABLED_IN_CALLPATH(frame, ver_enabled);
+ priv = this->private;
+ BR_STUB_VER_COND_GOTO(priv, (!ver_enabled), unwind);
+
+ local = frame->local;
+ frame->local = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ if (!local) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_NULL_LOCAL, NULL);
+ goto unwind;
+ }
+ inode = local->u.context.inode;
+ if (!IA_ISREG(inode->ia_type))
+ goto unwind;
+
+ ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
+ if (ret) {
+ /**
+ * If the inode is bad AND context is not there, then there
+ * is a possibility of the gfid of the object being listed
+ * in the quarantine directory and will be shown in the
+ * bad objects list. So continuing with the fop with a
+ * warning log. The entry from the quarantine directory
+ * has to be removed manually. Its not a good idea to fail
+ * the fop, as the object has already been deleted.
+ */
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode-gfid=%s", uuid_utoa(inode->gfid), NULL);
+ goto unwind;
+ }
+
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+ LOCK(&inode->lock);
+ {
+ /**
+ * Ignoring the return value of br_stub_del ().
+ * There is not much that can be done if unlinking
+ * of the entry in the quarantine directory fails.
+ * The failure is logged.
+ */
+ if (__br_stub_is_bad_object(ctx))
+ (void)br_stub_del(this, inode->gfid);
+ }
+ UNLOCK(&inode->lock);
+
+unwind:
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ br_stub_cleanup_local(local);
+ br_stub_dealloc_local(local);
+ return 0;
+}
+
+int
+br_stub_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ br_stub_local_t *local = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ br_stub_private_t *priv = NULL;
+
+ priv = this->private;
+ BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ local = br_stub_alloc_local(this);
+ if (!local) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, BRS_MSG_ALLOC_MEM_FAILED,
+ "local path=%s", loc->path, "gfid=%s",
+ uuid_utoa(loc->inode->gfid), NULL);
+ goto unwind;
+ }
+
+ br_stub_fill_local(local, NULL, NULL, loc->inode, loc->inode->gfid,
+ BR_STUB_NO_VERSIONING, 0);
+
+ frame->local = local;
+
+wind:
+ STACK_WIND(frame, br_stub_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, flag, xdata);
+ return 0;
+
+unwind:
+ if (frame->local == (void *)0x1)
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* forget() */
+
+int
+br_stub_forget(xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_del(inode, this, &ctx_addr);
+ if (!ctx_addr)
+ return 0;
+
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+ GF_FREE(ctx);
+
+ return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+int32_t
+br_stub_noop(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+static void
+br_stub_send_ipc_fop(xlator_t *this, fd_t *fd, unsigned long releaseversion,
+ int sign_info)
+{
+ int32_t op = 0;
+ int32_t ret = 0;
+ dict_t *xdata = NULL;
+ call_frame_t *frame = NULL;
+ changelog_event_t ev = {
+ 0,
+ };
+
+ ev.ev_type = CHANGELOG_OP_TYPE_BR_RELEASE;
+ ev.u.releasebr.version = releaseversion;
+ ev.u.releasebr.sign_info = sign_info;
+ gf_uuid_copy(ev.u.releasebr.gfid, fd->inode->gfid);
+
+ xdata = dict_new();
+ if (!xdata) {
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, BRS_MSG_DICT_ALLOC_FAILED,
+ NULL);
+ goto out;
+ }
+
+ ret = dict_set_static_bin(xdata, "RELEASE-EVENT", &ev, CHANGELOG_EV_SIZE);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_EVENT_FAILED, NULL);
+ goto dealloc_dict;
+ }
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_CREATE_FRAME_FAILED,
+ NULL);
+ goto dealloc_dict;
+ }
+
+ op = GF_IPC_TARGET_CHANGELOG;
+ STACK_WIND(frame, br_stub_noop, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ipc, op, xdata);
+
+dealloc_dict:
+ dict_unref(xdata);
+out:
+ return;
+}
+
+/**
+ * This is how the state machine of sign info works:
+ * 3 states:
+ * 1) BR_SIGN_NORMAL => The default State of the inode
+ * 2) BR_SIGN_REOPEN_WAIT => A release has been sent and is waiting for reopen
+ * 3) BR_SIGN_QUICK => reopen has happened and this release should trigger sign
+ * 2 events:
+ * 1) GF_FOP_RELEASE
+ * 2) GF_FOP_WRITE (actually a dummy write for BitD)
+ *
+ * This is how states are changed based on events:
+ * EVENT: GF_FOP_RELEASE:
+ * if (state == BR_SIGN_NORMAL) ; then
+ * set state = BR_SIGN_REOPEN_WAIT;
+ * if (state == BR_SIGN_QUICK); then
+ * set state = BR_SIGN_NORMAL;
+ * EVENT: GF_FOP_WRITE:
+ * if (state == BR_SIGN_REOPEN_WAIT); then
+ * set state = BR_SIGN_QUICK;
+ */
+br_sign_state_t
+__br_stub_inode_sign_state(br_stub_inode_ctx_t *ctx, glusterfs_fop_t fop,
+ fd_t *fd)
+{
+ br_sign_state_t sign_info = BR_SIGN_INVALID;
+
+ switch (fop) {
+ case GF_FOP_FSETXATTR:
+ sign_info = ctx->info_sign = BR_SIGN_QUICK;
+ break;
+
+ case GF_FOP_RELEASE:
+ GF_ASSERT(ctx->info_sign != BR_SIGN_REOPEN_WAIT);
+
+ if (ctx->info_sign == BR_SIGN_NORMAL) {
+ sign_info = ctx->info_sign = BR_SIGN_REOPEN_WAIT;
+ } else {
+ sign_info = ctx->info_sign;
+ ctx->info_sign = BR_SIGN_NORMAL;
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ return sign_info;
+}
+
+int32_t
+br_stub_release(xlator_t *this, fd_t *fd)
+{
+ int32_t ret = 0;
+ int32_t flags = 0;
+ inode_t *inode = NULL;
+ unsigned long releaseversion = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+ uint64_t tmp = 0;
+ br_stub_fd_t *br_stub_fd = NULL;
+ int32_t signinfo = 0;
+
+ inode = fd->inode;
+
+ LOCK(&inode->lock);
+ {
+ ctx = __br_stub_get_ongoing_version_ctx(this, inode, NULL);
+ if (ctx == NULL)
+ goto unblock;
+ br_stub_fd = br_stub_fd_ctx_get(this, fd);
+ if (br_stub_fd) {
+ list_del_init(&br_stub_fd->list);
+ }
+
+ ret = __br_stub_can_trigger_release(inode, ctx, &releaseversion);
+ if (!ret)
+ goto unblock;
+
+ signinfo = __br_stub_inode_sign_state(ctx, GF_FOP_RELEASE, fd);
+ signinfo = htonl(signinfo);
+
+ /* inode back to initital state: mark dirty */
+ if (ctx->info_sign == BR_SIGN_NORMAL) {
+ __br_stub_mark_inode_dirty(ctx);
+ __br_stub_unset_inode_modified(ctx);
+ }
+ }
+unblock:
+ UNLOCK(&inode->lock);
+
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "releaseversion: %lu | flags: %d "
+ "| signinfo: %d",
+ (unsigned long)ntohl(releaseversion), flags,
+ ntohl(signinfo));
+ br_stub_send_ipc_fop(this, fd, releaseversion, signinfo);
+ }
+
+ ret = fd_ctx_del(fd, this, &tmp);
+ br_stub_fd = (br_stub_fd_t *)(long)tmp;
+
+ GF_FREE(br_stub_fd);
+
+ return 0;
+}
+
+int32_t
+br_stub_releasedir(xlator_t *this, fd_t *fd)
+{
+ br_stub_fd_t *fctx = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ ret = fd_ctx_del(fd, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ fctx = (br_stub_fd_t *)(long)ctx;
+ if (fctx->bad_object.dir) {
+ ret = sys_closedir(fctx->bad_object.dir);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_BAD_OBJ_DIR_CLOSE_FAIL,
+ "error=%s", strerror(errno), NULL);
+ }
+
+ GF_FREE(fctx);
+out:
+ return 0;
+}
+
+/** }}} */
+
+/** {{{ */
+
+/* ictxmerge */
+
+void
+br_stub_ictxmerge(xlator_t *this, fd_t *fd, inode_t *inode,
+ inode_t *linked_inode)
+{
+ int32_t ret = 0;
+ uint64_t ctxaddr = 0;
+ uint64_t lctxaddr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+ br_stub_inode_ctx_t *lctx = NULL;
+ br_stub_fd_t *br_stub_fd = NULL;
+
+ ret = br_stub_get_inode_ctx(this, inode, &ctxaddr);
+ if (ret < 0)
+ goto done;
+ ctx = (br_stub_inode_ctx_t *)(uintptr_t)ctxaddr;
+
+ LOCK(&linked_inode->lock);
+ {
+ ret = __br_stub_get_inode_ctx(this, linked_inode, &lctxaddr);
+ if (ret < 0)
+ goto unblock;
+ lctx = (br_stub_inode_ctx_t *)(uintptr_t)lctxaddr;
+
+ GF_ASSERT(list_is_singular(&ctx->fd_list));
+ br_stub_fd = list_first_entry(&ctx->fd_list, br_stub_fd_t, list);
+ if (br_stub_fd) {
+ GF_ASSERT(br_stub_fd->fd == fd);
+ list_move_tail(&br_stub_fd->list, &lctx->fd_list);
+ }
+ }
+unblock:
+ UNLOCK(&linked_inode->lock);
+
+done:
+ return;
+}
+
+/** }}} */
+
+struct xlator_fops fops = {
+ .lookup = br_stub_lookup,
+ .stat = br_stub_stat,
+ .fstat = br_stub_fstat,
+ .open = br_stub_open,
+ .create = br_stub_create,
+ .readdirp = br_stub_readdirp,
+ .getxattr = br_stub_getxattr,
+ .fgetxattr = br_stub_fgetxattr,
+ .fsetxattr = br_stub_fsetxattr,
+ .writev = br_stub_writev,
+ .truncate = br_stub_truncate,
+ .ftruncate = br_stub_ftruncate,
+ .mknod = br_stub_mknod,
+ .readv = br_stub_readv,
+ .removexattr = br_stub_removexattr,
+ .fremovexattr = br_stub_fremovexattr,
+ .setxattr = br_stub_setxattr,
+ .opendir = br_stub_opendir,
+ .readdir = br_stub_readdir,
+ .unlink = br_stub_unlink,
+};
+
+struct xlator_cbks cbks = {
+ .forget = br_stub_forget,
+ .release = br_stub_release,
+ .ictxmerge = br_stub_ictxmerge,
+};
+
+struct volume_options options[] = {
+ {.key = {"bitrot"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_FORCE,
+ .tags = {"bitrot"},
+ .description = "enable/disable bitrot stub"},
+ {.key = {"export"},
+ .type = GF_OPTION_TYPE_PATH,
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .tags = {"bitrot"},
+ .description = "brick path for versioning",
+ .default_value = "{{ brick.path }}"},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "bitrot-stub",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/bit-rot/src/stub/bit-rot-stub.h b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
new file mode 100644
index 00000000000..edd79a77e4f
--- /dev/null
+++ b/xlators/features/bit-rot/src/stub/bit-rot-stub.h
@@ -0,0 +1,515 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __BIT_ROT_STUB_H__
+#define __BIT_ROT_STUB_H__
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/call-stub.h>
+#include "bit-rot-stub-mem-types.h"
+#include <glusterfs/syscall.h>
+#include <glusterfs/common-utils.h>
+#include "bit-rot-common.h"
+#include "bit-rot-stub-messages.h"
+#include "glusterfs3-xdr.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/syncop-utils.h>
+
+#define BAD_OBJECT_THREAD_STACK_SIZE ((size_t)(1024 * 1024))
+#define BR_STUB_DUMP_STR_SIZE 65536
+
+#define BR_PATH_MAX_EXTRA (PATH_MAX + 1024)
+#define BR_PATH_MAX_PLUS (PATH_MAX + 2048)
+
+/*
+ * Oops. Spelling mistake. Correcting it
+ */
+#define OLD_BR_STUB_QUARANTINE_DIR GF_HIDDEN_PATH "/quanrantine"
+#define BR_STUB_QUARANTINE_DIR GF_HIDDEN_PATH "/quarantine"
+
+/* do not reference frame->local in cbk unless initialized.
+ * Assigned 0x1 marks verisoning flag between call path and
+ * cbk path.
+ */
+#define BR_STUB_VER_NOT_ACTIVE_THEN_GOTO(frame, priv, label) \
+ do { \
+ if (priv->do_versioning) \
+ frame->local = (void *)0x1; \
+ else \
+ goto label; \
+ } while (0)
+
+#define BR_STUB_VER_COND_GOTO(priv, cond, label) \
+ do { \
+ if (!priv->do_versioning || cond) \
+ goto label; \
+ } while (0)
+
+#define BR_STUB_VER_ENABLED_IN_CALLPATH(frame, flag) \
+ do { \
+ if (frame->local) \
+ flag = _gf_true; \
+ if (frame->local == (void *)0x1) \
+ frame->local = NULL; \
+ } while (0)
+
+#define BR_STUB_RESET_LOCAL_NULL(frame) \
+ do { \
+ if (frame->local == (void *)0x1) \
+ frame->local = NULL; \
+ } while (0)
+
+typedef int(br_stub_version_cbk)(call_frame_t *, void *, xlator_t *, int32_t,
+ int32_t, dict_t *);
+
+typedef struct br_stub_inode_ctx {
+ int need_writeback; /* does the inode need
+ a writeback to disk? */
+ unsigned long currentversion; /* ongoing version */
+
+ int info_sign;
+ struct list_head fd_list; /* list of open fds or fds participating in
+ write operations */
+ gf_boolean_t bad_object;
+} br_stub_inode_ctx_t;
+
+typedef struct br_stub_fd {
+ fd_t *fd;
+ struct list_head list;
+ struct bad_object_dir {
+ DIR *dir;
+ off_t dir_eof;
+ } bad_object;
+} br_stub_fd_t;
+
+#define I_DIRTY (1 << 0) /* inode needs writeback */
+#define I_MODIFIED (1 << 1)
+#define WRITEBACK_DURABLE 1 /* writeback is durable */
+
+/**
+ * This could just have been a plain struct without unions and all,
+ * but we may need additional things in the future.
+ */
+typedef struct br_stub_local {
+ call_stub_t *fopstub; /* stub for original fop */
+
+ int versioningtype; /* not much used atm */
+
+ union {
+ struct br_stub_ctx {
+ fd_t *fd;
+ uuid_t gfid;
+ inode_t *inode;
+ unsigned long version;
+ } context;
+ } u;
+} br_stub_local_t;
+
+#define BR_STUB_NO_VERSIONING (1 << 0)
+#define BR_STUB_INCREMENTAL_VERSIONING (1 << 1)
+
+typedef struct br_stub_private {
+ gf_boolean_t do_versioning;
+
+ uint32_t boot[2];
+ char export[PATH_MAX];
+
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+
+ struct list_head squeue; /* ordered signing queue */
+ pthread_t signth;
+ struct bad_objects_container {
+ pthread_t thread;
+ pthread_mutex_t bad_lock;
+ pthread_cond_t bad_cond;
+ struct list_head bad_queue;
+ } container;
+ struct mem_pool *local_pool;
+
+ char stub_basepath[BR_PATH_MAX_EXTRA];
+
+ uuid_t bad_object_dir_gfid;
+} br_stub_private_t;
+
+br_stub_fd_t *
+br_stub_fd_new(void);
+
+int
+__br_stub_fd_ctx_set(xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd);
+
+br_stub_fd_t *
+__br_stub_fd_ctx_get(xlator_t *this, fd_t *fd);
+
+br_stub_fd_t *
+br_stub_fd_ctx_get(xlator_t *this, fd_t *fd);
+
+int32_t
+br_stub_fd_ctx_set(xlator_t *this, fd_t *fd, br_stub_fd_t *br_stub_fd);
+
+static inline gf_boolean_t
+__br_stub_is_bad_object(br_stub_inode_ctx_t *ctx)
+{
+ return ctx->bad_object;
+}
+
+static inline void
+__br_stub_mark_object_bad(br_stub_inode_ctx_t *ctx)
+{
+ ctx->bad_object = _gf_true;
+}
+
+/* inode writeback helpers */
+static inline void
+__br_stub_mark_inode_dirty(br_stub_inode_ctx_t *ctx)
+{
+ ctx->need_writeback |= I_DIRTY;
+}
+
+static inline void
+__br_stub_mark_inode_synced(br_stub_inode_ctx_t *ctx)
+{
+ ctx->need_writeback &= ~I_DIRTY;
+}
+
+static inline int
+__br_stub_is_inode_dirty(br_stub_inode_ctx_t *ctx)
+{
+ return (ctx->need_writeback & I_DIRTY);
+}
+
+/* inode mofification markers */
+static inline void
+__br_stub_set_inode_modified(br_stub_inode_ctx_t *ctx)
+{
+ ctx->need_writeback |= I_MODIFIED;
+}
+
+static inline void
+__br_stub_unset_inode_modified(br_stub_inode_ctx_t *ctx)
+{
+ ctx->need_writeback &= ~I_MODIFIED;
+}
+
+static inline int
+__br_stub_is_inode_modified(br_stub_inode_ctx_t *ctx)
+{
+ return (ctx->need_writeback & I_MODIFIED);
+}
+
+static inline int
+br_stub_require_release_call(xlator_t *this, fd_t *fd, br_stub_fd_t **fd_ctx)
+{
+ int32_t ret = 0;
+ br_stub_fd_t *br_stub_fd = NULL;
+
+ br_stub_fd = br_stub_fd_new();
+ if (!br_stub_fd)
+ return -1;
+
+ br_stub_fd->fd = fd;
+ INIT_LIST_HEAD(&br_stub_fd->list);
+
+ ret = br_stub_fd_ctx_set(this, fd, br_stub_fd);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, BRS_MSG_SET_CONTEXT_FAILED,
+ NULL);
+ else
+ *fd_ctx = br_stub_fd;
+
+ return ret;
+}
+
+/* get/set inode context helpers */
+
+static inline int
+__br_stub_get_inode_ctx(xlator_t *this, inode_t *inode, uint64_t *ctx)
+{
+ return __inode_ctx_get(inode, this, ctx);
+}
+
+static inline int
+br_stub_get_inode_ctx(xlator_t *this, inode_t *inode, uint64_t *ctx)
+{
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __br_stub_get_inode_ctx(this, inode, ctx);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+static inline int
+br_stub_set_inode_ctx(xlator_t *this, inode_t *inode, br_stub_inode_ctx_t *ctx)
+{
+ uint64_t ctx_addr = (uint64_t)(uintptr_t)ctx;
+ return inode_ctx_set(inode, this, &ctx_addr);
+}
+
+/* version get/set helpers */
+
+static inline unsigned long
+__br_stub_writeback_version(br_stub_inode_ctx_t *ctx)
+{
+ return (ctx->currentversion + 1);
+}
+
+static inline void
+__br_stub_set_ongoing_version(br_stub_inode_ctx_t *ctx, unsigned long version)
+{
+ if (ctx->currentversion < version)
+ ctx->currentversion = version;
+ else
+ gf_smsg("bit-rot-stub", GF_LOG_WARNING, 0,
+ BRS_MSG_CHANGE_VERSION_FAILED, "current version=%lu",
+ ctx->currentversion, "new version=%lu", version, NULL);
+}
+
+static inline int
+__br_stub_can_trigger_release(inode_t *inode, br_stub_inode_ctx_t *ctx,
+ unsigned long *version)
+{
+ /**
+ * If the inode is modified, then it has to be dirty. An inode is
+ * marked dirty once version is increased. Its marked as modified
+ * when the modification call (write/truncate) which triggered
+ * the versioning is successful.
+ */
+ if (__br_stub_is_inode_modified(ctx) && list_empty(&ctx->fd_list) &&
+ (ctx->info_sign != BR_SIGN_REOPEN_WAIT)) {
+ GF_ASSERT(__br_stub_is_inode_dirty(ctx) == 0);
+
+ if (version)
+ *version = htonl(ctx->currentversion);
+ return 1;
+ }
+
+ return 0;
+}
+
+static inline int32_t
+br_stub_get_ongoing_version(xlator_t *this, inode_t *inode,
+ unsigned long *version)
+{
+ int32_t ret = 0;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get(inode, this, &ctx_addr);
+ if (ret < 0)
+ goto unblock;
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+ *version = ctx->currentversion;
+ }
+unblock:
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+/**
+ * fetch the current version from inode and return the context.
+ * inode->lock should be held before invoking this as context
+ * *needs* to be valid in the caller.
+ */
+static inline br_stub_inode_ctx_t *
+__br_stub_get_ongoing_version_ctx(xlator_t *this, inode_t *inode,
+ unsigned long *version)
+{
+ int32_t ret = 0;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get(inode, this, &ctx_addr);
+ if (ret < 0)
+ return NULL;
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+ if (version)
+ *version = ctx->currentversion;
+
+ return ctx;
+}
+
+/* filter for xattr fetch */
+static inline int
+br_stub_is_internal_xattr(const char *name)
+{
+ if (name && ((strncmp(name, BITROT_CURRENT_VERSION_KEY,
+ SLEN(BITROT_CURRENT_VERSION_KEY)) == 0) ||
+ (strncmp(name, BITROT_SIGNING_VERSION_KEY,
+ SLEN(BITROT_SIGNING_VERSION_KEY)) == 0)))
+ return 1;
+ return 0;
+}
+
+static inline void
+br_stub_remove_vxattrs(dict_t *xattr, gf_boolean_t remove_bad_marker)
+{
+ if (xattr) {
+ /*
+ * When a file is corrupted, bad-object should be
+ * set in the dict. But, other info such as version,
+ * signature etc should not be set. Hence the flag
+ * remove_bad_marker. The consumer should know whether
+ * to send the bad-object info in the dict or not.
+ */
+ if (remove_bad_marker)
+ dict_del(xattr, BITROT_OBJECT_BAD_KEY);
+ dict_del(xattr, BITROT_CURRENT_VERSION_KEY);
+ dict_del(xattr, BITROT_SIGNING_VERSION_KEY);
+ dict_del(xattr, BITROT_SIGNING_XATTR_SIZE_KEY);
+ }
+}
+
+/**
+ * This function returns the below values for different situations
+ * 0 => as per the inode context object is not bad
+ * -1 => Failed to get the inode context itself
+ * -2 => As per the inode context object is bad
+ * Both -ve values means the fop which called this function is failed
+ * and error is returned upwards.
+ * In future if needed or more errors have to be handled, then those
+ * errors can be made into enums.
+ */
+static inline int
+br_stub_is_bad_object(xlator_t *this, inode_t *inode)
+{
+ int bad_object = 0;
+ gf_boolean_t tmp = _gf_false;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+ int32_t ret = -1;
+
+ ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode-gfid=%s", uuid_utoa(inode->gfid), NULL);
+ bad_object = -1;
+ goto out;
+ }
+
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+ LOCK(&inode->lock);
+ {
+ tmp = __br_stub_is_bad_object(ctx);
+ if (tmp)
+ bad_object = -2;
+ }
+ UNLOCK(&inode->lock);
+
+out:
+ return bad_object;
+}
+
+static inline int32_t
+br_stub_mark_object_bad(xlator_t *this, inode_t *inode)
+{
+ int32_t ret = -1;
+ uint64_t ctx_addr = 0;
+ br_stub_inode_ctx_t *ctx = NULL;
+
+ ret = br_stub_get_inode_ctx(this, inode, &ctx_addr);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, BRS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode-gfid=%s", uuid_utoa(inode->gfid), NULL);
+ goto out;
+ }
+
+ ctx = (br_stub_inode_ctx_t *)(long)ctx_addr;
+
+ LOCK(&inode->lock);
+ {
+ __br_stub_mark_object_bad(ctx);
+ }
+ UNLOCK(&inode->lock);
+
+out:
+ return ret;
+}
+
+/**
+ * There is a possibility that dict_set might fail. The o/p of dict_set is
+ * given to the caller and the caller has to decide what to do.
+ */
+static inline int32_t
+br_stub_mark_xdata_bad_object(xlator_t *this, inode_t *inode, dict_t *xdata)
+{
+ int32_t ret = 0;
+
+ if (br_stub_is_bad_object(this, inode) == -2)
+ ret = dict_set_int32(xdata, GLUSTERFS_BAD_INODE, 1);
+
+ return ret;
+}
+
+int32_t
+br_stub_add_fd_to_inode(xlator_t *this, fd_t *fd, br_stub_inode_ctx_t *ctx);
+
+br_sign_state_t
+__br_stub_inode_sign_state(br_stub_inode_ctx_t *ctx, glusterfs_fop_t fop,
+ fd_t *fd);
+
+int
+br_stub_dir_create(xlator_t *this, br_stub_private_t *priv);
+
+int
+br_stub_add(xlator_t *this, uuid_t gfid);
+
+int32_t
+br_stub_create_stub_gfid(xlator_t *this, char *stub_gfid_path, uuid_t gfid);
+
+int
+br_stub_dir_create(xlator_t *this, br_stub_private_t *priv);
+
+call_stub_t *
+__br_stub_dequeue(struct list_head *callstubs);
+
+void
+__br_stub_enqueue(struct list_head *callstubs, call_stub_t *stub);
+
+void
+br_stub_worker_enqueue(xlator_t *this, call_stub_t *stub);
+
+void *
+br_stub_worker(void *data);
+
+int32_t
+br_stub_lookup_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xattr_req);
+
+int32_t
+br_stub_readdir_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *xdata);
+
+int
+br_stub_del(xlator_t *this, uuid_t gfid);
+
+int
+br_stub_bad_objects_path(xlator_t *this, fd_t *fd, gf_dirent_t *entries,
+ dict_t **dict);
+
+void
+br_stub_entry_xattr_fill(xlator_t *this, char *hpath, gf_dirent_t *entry,
+ dict_t *dict);
+
+int
+br_stub_get_path_of_gfid(xlator_t *this, inode_t *parent, inode_t *inode,
+ uuid_t gfid, char **path);
+
+#endif /* __BIT_ROT_STUB_H__ */
diff --git a/xlators/features/changelog/Makefile.am b/xlators/features/changelog/Makefile.am
new file mode 100644
index 00000000000..153bb685076
--- /dev/null
+++ b/xlators/features/changelog/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src lib
+
+CLEANFILES =
diff --git a/xlators/features/changelog/lib/Makefile.am b/xlators/features/changelog/lib/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/changelog/lib/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/changelog/lib/examples/c/get-changes-multi.c b/xlators/features/changelog/lib/examples/c/get-changes-multi.c
new file mode 100644
index 00000000000..5ea5bbb6630
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/c/get-changes-multi.c
@@ -0,0 +1,90 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * Compile it using:
+ * gcc -o getchanges-multi `pkg-config --cflags libgfchangelog` \
+ * get-changes-multi.c `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <errno.h>
+
+#include "changelog.h"
+
+void *
+brick_init(void *xl, struct gf_brick_spec *brick)
+{
+ return brick;
+}
+
+void
+brick_fini(void *xl, char *brick, void *data)
+{
+ return;
+}
+
+void
+brick_callback(void *xl, char *brick, void *data, changelog_event_t *ev)
+{
+ printf("->callback: (brick,type) [%s:%d]\n", brick, ev->ev_type);
+}
+
+void
+fill_brick_spec(struct gf_brick_spec *brick, char *path)
+{
+ brick->brick_path = strdup(path);
+ brick->filter = CHANGELOG_OP_TYPE_BR_RELEASE;
+
+ brick->init = brick_init;
+ brick->fini = brick_fini;
+ brick->callback = brick_callback;
+ brick->connected = NULL;
+ brick->disconnected = NULL;
+}
+
+int
+main(int argc, char **argv)
+{
+ int ret = 0;
+ void *bricks = NULL;
+ struct gf_brick_spec *brick = NULL;
+
+ bricks = calloc(2, sizeof(struct gf_brick_spec));
+ if (!bricks)
+ goto error_return;
+
+ brick = (struct gf_brick_spec *)bricks;
+ fill_brick_spec(brick, "/export/z1/zwoop");
+
+ brick++;
+ fill_brick_spec(brick, "/export/z2/zwoop");
+
+ ret = gf_changelog_init(NULL);
+ if (ret)
+ goto error_return;
+
+ ret = gf_changelog_register_generic((struct gf_brick_spec *)bricks, 2, 0,
+ "/tmp/multi-changes.log", 9, NULL);
+ if (ret)
+ goto error_return;
+
+ /* let callbacks do the job */
+ select(0, NULL, NULL, NULL, NULL);
+
+error_return:
+ return -1;
+}
diff --git a/xlators/features/changelog/lib/examples/c/get-changes.c b/xlators/features/changelog/lib/examples/c/get-changes.c
new file mode 100644
index 00000000000..8bc651c24a4
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/c/get-changes.c
@@ -0,0 +1,93 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * get set of new changes every 10 seconds (just print the file names)
+ *
+ * Compile it using:
+ * gcc -o getchanges `pkg-config --cflags libgfchangelog` get-changes.c \
+ * `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <errno.h>
+
+#include "changelog.h"
+
+#define handle_error(fn) printf("%s (reason: %s)\n", fn, strerror(errno))
+
+int
+main(int argc, char **argv)
+{
+ int i = 0;
+ int ret = 0;
+ ssize_t nr_changes = 0;
+ ssize_t changes = 0;
+ char fbuf[PATH_MAX] = {
+ 0,
+ };
+
+ ret = gf_changelog_init(NULL);
+ if (ret) {
+ handle_error("Init failed");
+ goto out;
+ }
+
+ /* get changes for brick "/home/vshankar/export/yow/yow-1" */
+ ret = gf_changelog_register("/export/z1/zwoop", "/tmp/scratch",
+ "/tmp/change.log", 9, 5);
+ if (ret) {
+ handle_error("register failed");
+ goto out;
+ }
+
+ while (1) {
+ i = 0;
+ nr_changes = gf_changelog_scan();
+ if (nr_changes < 0) {
+ handle_error("scan(): ");
+ break;
+ }
+
+ if (nr_changes == 0)
+ goto next;
+
+ printf("Got %ld changelog files\n", nr_changes);
+
+ while ((changes = gf_changelog_next_change(fbuf, PATH_MAX)) > 0) {
+ printf("changelog file [%d]: %s\n", ++i, fbuf);
+
+ /* process changelog */
+ /* ... */
+ /* ... */
+ /* ... */
+ /* done processing */
+
+ ret = gf_changelog_done(fbuf);
+ if (ret)
+ handle_error("gf_changelog_done");
+ }
+
+ if (changes == -1)
+ handle_error("gf_changelog_next_change");
+
+ next:
+ sleep(10);
+ }
+
+out:
+ return ret;
+}
diff --git a/xlators/features/changelog/lib/examples/c/get-history.c b/xlators/features/changelog/lib/examples/c/get-history.c
new file mode 100644
index 00000000000..3e888d75ca6
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/c/get-history.c
@@ -0,0 +1,116 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/**
+ * get set of new changes every 10 seconds (just print the file names)
+ *
+ * Compile it using:
+ * gcc -o gethistory `pkg-config --cflags libgfchangelog` get-history.c \
+ * `pkg-config --libs libgfchangelog`
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/un.h>
+#include <limits.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <errno.h>
+
+#include "changelog.h"
+
+#define handle_error(fn) printf("%s (reason: %s)\n", fn, strerror(errno))
+
+int
+main(int argc, char **argv)
+{
+ int i = 0;
+ int ret = 0;
+ ssize_t nr_changes = 0;
+ ssize_t changes = 0;
+ char fbuf[PATH_MAX] = {
+ 0,
+ };
+ unsigned long end_ts = 0;
+
+ ret = gf_changelog_init(NULL);
+ if (ret) {
+ handle_error("init failed");
+ goto out;
+ }
+
+ ret = gf_changelog_register("/export/z1/zwoop", "/tmp/scratch_v1",
+ "/tmp/changes.log", 9, 5);
+ if (ret) {
+ handle_error("register failed");
+ goto out;
+ }
+
+ int a, b;
+ printf("give the two numbers start and end\t");
+ scanf("%d%d", &a, &b);
+ ret = gf_history_changelog("/export/z1/zwoop/.glusterfs/changelogs", a, b,
+ 3, &end_ts);
+ if (ret == -1) {
+ printf("history failed");
+ goto out;
+ }
+
+ printf("end time till when changelog available : %d , ret(%d) \t", end_ts,
+ ret);
+ fflush(stdout);
+
+ while (1) {
+ nr_changes = gf_history_changelog_scan();
+ printf("scanned, nr_changes : %d\n", nr_changes);
+ if (nr_changes < 0) {
+ handle_error("scan(): ");
+ break;
+ }
+
+ if (nr_changes == 0) {
+ printf("done scanning \n");
+ goto out;
+ }
+
+ printf("Got %ld changelog files\n", nr_changes);
+
+ while ((changes = gf_history_changelog_next_change(fbuf, PATH_MAX)) >
+ 0) {
+ printf("changelog file [%d]: %s\n", ++i, fbuf);
+
+ /* process changelog */
+ /* ... */
+ /* ... */
+ /* ... */
+ /* done processing */
+
+ ret = gf_history_changelog_done(fbuf);
+ if (ret)
+ handle_error("gf_changelog_done");
+ }
+ /*
+ if (changes == -1)
+ handle_error ("gf_changelog_next_change");
+ if (nr_changes ==1){
+ printf("continue scanning\n");
+ }
+
+ if(nr_changes == 0){
+ printf("done scanning \n");
+ goto out;
+ }
+ */
+ }
+
+out:
+ return ret;
+}
diff --git a/xlators/features/changelog/lib/examples/python/changes.py b/xlators/features/changelog/lib/examples/python/changes.py
new file mode 100755
index 00000000000..c410d3b000d
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/python/changes.py
@@ -0,0 +1,34 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+import os
+import sys
+import time
+import libgfchangelog
+
+cl = libgfchangelog.Changes()
+
+def get_changes(brick, scratch_dir, log_file, log_level, interval):
+ change_list = []
+ try:
+ cl.cl_init()
+ cl.cl_register(brick, scratch_dir, log_file, log_level)
+ while True:
+ cl.cl_scan()
+ change_list = cl.cl_getchanges()
+ if change_list:
+ print(change_list)
+ for change in change_list:
+ print(('done with %s' % (change)))
+ cl.cl_done(change)
+ time.sleep(interval)
+ except OSError:
+ ex = sys.exc_info()[1]
+ print(ex)
+
+if __name__ == '__main__':
+ if len(sys.argv) != 6:
+ print(("usage: %s <brick> <scratch-dir> <log-file> <fetch-interval>"
+ % (sys.argv[0])))
+ sys.exit(1)
+ get_changes(sys.argv[1], sys.argv[2], sys.argv[3], 9, int(sys.argv[4]))
diff --git a/xlators/features/changelog/lib/examples/python/libgfchangelog.py b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
new file mode 100644
index 00000000000..2da9f2d2a8c
--- /dev/null
+++ b/xlators/features/changelog/lib/examples/python/libgfchangelog.py
@@ -0,0 +1,71 @@
+import os
+from ctypes import *
+from ctypes.util import find_library
+
+class Changes(object):
+ libgfc = CDLL(find_library("gfchangelog"), mode=RTLD_GLOBAL,
+ use_errno=True)
+
+ @classmethod
+ def geterrno(cls):
+ return get_errno()
+
+ @classmethod
+ def raise_oserr(cls):
+ errn = cls.geterrno()
+ raise OSError(errn, os.strerror(errn))
+
+ @classmethod
+ def _get_api(cls, call):
+ return getattr(cls.libgfc, call)
+
+ @classmethod
+ def cl_init(cls):
+ ret = cls._get_api('gf_changelog_init')(None)
+ if ret == -1:
+ cls.raise_changelog_err()
+
+ @classmethod
+ def cl_register(cls, brick, path, log_file, log_level, retries = 0):
+ ret = cls._get_api('gf_changelog_register')(brick, path,
+ log_file, log_level, retries)
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_scan(cls):
+ ret = cls._get_api('gf_changelog_scan')()
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_startfresh(cls):
+ ret = cls._get_api('gf_changelog_start_fresh')()
+ if ret == -1:
+ cls.raise_oserr()
+
+ @classmethod
+ def cl_getchanges(cls):
+ """ remove hardcoding for path name length """
+ def clsort(f):
+ return f.split('.')[-1]
+ changes = []
+ buf = create_string_buffer('\0', 4096)
+ call = cls._get_api('gf_changelog_next_change')
+
+ while True:
+ ret = call(buf, 4096)
+ if ret in (0, -1):
+ break;
+ changes.append(buf.raw[:ret-1])
+ if ret == -1:
+ cls.raise_oserr()
+ # cleanup tracker
+ cls.cl_startfresh()
+ return sorted(changes, key=clsort)
+
+ @classmethod
+ def cl_done(cls, clfile):
+ ret = cls._get_api('gf_changelog_done')(clfile)
+ if ret == -1:
+ cls.raise_oserr()
diff --git a/xlators/features/changelog/lib/src/Makefile.am b/xlators/features/changelog/lib/src/Makefile.am
new file mode 100644
index 00000000000..c933ec53ed2
--- /dev/null
+++ b/xlators/features/changelog/lib/src/Makefile.am
@@ -0,0 +1,35 @@
+libgfchangelog_la_CFLAGS = -Wall $(GF_CFLAGS) $(GF_DARWIN_LIBGLUSTERFS_CFLAGS) \
+ -DDATADIR=\"$(localstatedir)\"
+
+libgfchangelog_la_CPPFLAGS = $(GF_CPPFLAGS) -D__USE_FILE_OFFSET64 -D__USE_LARGEFILE64 -fpic \
+ -I../../../src/ -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/features/changelog/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/rpc-transport/socket/src \
+ -DDATADIR=\"$(localstatedir)\"
+
+libgfchangelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
+
+libgfchangelog_la_LDFLAGS = $(GF_LDFLAGS) \
+ -version-info $(LIBGFCHANGELOG_LT_VERSION) \
+ $(GF_NO_UNDEFINED)
+
+lib_LTLIBRARIES = libgfchangelog.la
+
+CONTRIB_BUILDDIR = $(top_builddir)/contrib
+
+libgfchangelog_la_SOURCES = gf-changelog.c gf-changelog-journal-handler.c \
+ gf-changelog-helpers.c gf-changelog-api.c gf-history-changelog.c \
+ gf-changelog-rpc.c gf-changelog-reborp.c \
+ $(top_srcdir)/xlators/features/changelog/src/changelog-rpc-common.c
+
+noinst_HEADERS = gf-changelog-helpers.h gf-changelog-rpc.h \
+ gf-changelog-journal.h changelog-lib-messages.h
+
+CLEANFILES =
+
+$(top_builddir)/libglusterfs/src/libglusterfs.la:
+ $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
diff --git a/xlators/features/changelog/lib/src/changelog-lib-messages.h b/xlators/features/changelog/lib/src/changelog-lib-messages.h
new file mode 100644
index 00000000000..d7fe7274353
--- /dev/null
+++ b/xlators/features/changelog/lib/src/changelog-lib-messages.h
@@ -0,0 +1,74 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _CHANGELOG_LIB_MESSAGES_H_
+#define _CHANGELOG_LIB_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(
+ CHANGELOG_LIB, CHANGELOG_LIB_MSG_OPEN_FAILED,
+ CHANGELOG_LIB_MSG_FAILED_TO_RMDIR,
+ CHANGELOG_LIB_MSG_SCRATCH_DIR_ENTRIES_CREATION_ERROR,
+ CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED, CHANGELOG_LIB_MSG_OPENDIR_ERROR,
+ CHANGELOG_LIB_MSG_RENAME_FAILED, CHANGELOG_LIB_MSG_READ_ERROR,
+ CHANGELOG_LIB_MSG_HTIME_ERROR, CHANGELOG_LIB_MSG_GET_TIME_ERROR,
+ CHANGELOG_LIB_MSG_WRITE_FAILED, CHANGELOG_LIB_MSG_PTHREAD_ERROR,
+ CHANGELOG_LIB_MSG_MMAP_FAILED, CHANGELOG_LIB_MSG_MUNMAP_FAILED,
+ CHANGELOG_LIB_MSG_ASCII_ERROR, CHANGELOG_LIB_MSG_STAT_FAILED,
+ CHANGELOG_LIB_MSG_GET_XATTR_FAILED, CHANGELOG_LIB_MSG_PUBLISH_ERROR,
+ CHANGELOG_LIB_MSG_PARSE_ERROR, CHANGELOG_LIB_MSG_MIN_MAX_INFO,
+ CHANGELOG_LIB_MSG_CLEANUP_ERROR, CHANGELOG_LIB_MSG_UNLINK_FAILED,
+ CHANGELOG_LIB_MSG_NOTIFY_REGISTER_FAILED,
+ CHANGELOG_LIB_MSG_INVOKE_RPC_FAILED, CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO,
+ CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO,
+ CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO, CHANGELOG_LIB_MSG_XDR_DECODING_FAILED,
+ CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO,
+ CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING,
+ CHANGELOG_LIB_MSG_COPY_FROM_BUFFER_FAILED,
+ CHANGELOG_LIB_MSG_PTHREAD_JOIN_FAILED, CHANGELOG_LIB_MSG_HIST_FAILED,
+ CHANGELOG_LIB_MSG_DRAINED_EVENT_INFO, CHANGELOG_LIB_MSG_PARSE_ERROR_CEASED,
+ CHANGELOG_LIB_MSG_REQUESTING_INFO, CHANGELOG_LIB_MSG_FINAL_INFO);
+
+#define CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO_STR "Registering brick"
+#define CHANGELOG_LIB_MSG_RENAME_FAILED_STR "error moving changelog file"
+#define CHANGELOG_LIB_MSG_OPEN_FAILED_STR "cannot open changelog file"
+#define CHANGELOG_LIB_MSG_UNLINK_FAILED_STR "failed to unlink"
+#define CHANGELOG_LIB_MSG_FAILED_TO_RMDIR_STR "failed to rmdir"
+#define CHANGELOG_LIB_MSG_STAT_FAILED_STR "stat failed on changelog file"
+#define CHANGELOG_LIB_MSG_PARSE_ERROR_STR "could not parse changelog"
+#define CHANGELOG_LIB_MSG_PARSE_ERROR_CEASED_STR \
+ "parsing error, ceased publishing..."
+#define CHANGELOG_LIB_MSG_HTIME_ERROR_STR "fop failed on htime file"
+#define CHANGELOG_LIB_MSG_GET_XATTR_FAILED_STR \
+ "error extracting max timstamp from htime file"
+#define CHANGELOG_LIB_MSG_MIN_MAX_INFO_STR "changelogs min max"
+#define CHANGELOG_LIB_MSG_REQUESTING_INFO_STR "Requesting historical changelogs"
+#define CHANGELOG_LIB_MSG_FINAL_INFO_STR "FINAL"
+#define CHANGELOG_LIB_MSG_HIST_FAILED_STR \
+ "Requested changelog range is not available"
+#define CHANGELOG_LIB_MSG_GET_TIME_ERROR_STR "wrong result"
+#define CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO_STR \
+ "Cleaning brick entry for brick"
+#define CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO_STR "Draining event"
+#define CHANGELOG_LIB_MSG_DRAINED_EVENT_INFO_STR "Drained event"
+#define CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO_STR "freeing entry"
+
+#endif /* !_CHANGELOG_MESSAGES_H_ */
diff --git a/xlators/features/changelog/lib/src/gf-changelog-api.c b/xlators/features/changelog/lib/src/gf-changelog-api.c
new file mode 100644
index 00000000000..81a5cbfec10
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-api.c
@@ -0,0 +1,224 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/compat-uuid.h>
+#include <glusterfs/globals.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/syscall.h>
+
+#include "gf-changelog-helpers.h"
+#include "gf-changelog-journal.h"
+#include "changelog-mem-types.h"
+#include "changelog-lib-messages.h"
+
+int
+gf_changelog_done(char *file)
+{
+ int ret = -1;
+ char *buffer = NULL;
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+ char to_path[PATH_MAX] = {
+ 0,
+ };
+
+ errno = EINVAL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this);
+ if (!jnl)
+ goto out;
+
+ if (!file || !strlen(file))
+ goto out;
+
+ /* make sure 'file' is inside ->jnl_working_dir */
+ buffer = realpath(file, NULL);
+ if (!buffer)
+ goto out;
+
+ if (strncmp(jnl->jnl_working_dir, buffer, strlen(jnl->jnl_working_dir)))
+ goto out;
+
+ (void)snprintf(to_path, PATH_MAX, "%s%s", jnl->jnl_processed_dir,
+ basename(buffer));
+ gf_msg_debug(this->name, 0, "moving %s to processed directory", file);
+ ret = sys_rename(buffer, to_path);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", file, "to=%s",
+ to_path, NULL);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (buffer)
+ free(buffer); /* allocated by realpath() */
+ return ret;
+}
+
+/**
+ * @API
+ * for a set of changelogs, start from the beginning
+ */
+int
+gf_changelog_start_fresh()
+{
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ errno = EINVAL;
+
+ jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this);
+ if (!jnl)
+ goto out;
+
+ if (gf_ftruncate(jnl->jnl_fd, 0))
+ goto out;
+
+ return 0;
+
+out:
+ return -1;
+}
+
+/**
+ * @API
+ * return the next changelog file entry. zero means all chanelogs
+ * consumed.
+ */
+ssize_t
+gf_changelog_next_change(char *bufptr, size_t maxlen)
+{
+ ssize_t size = -1;
+ int tracker_fd = 0;
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+ char buffer[PATH_MAX] = {
+ 0,
+ };
+
+ errno = EINVAL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this);
+ if (!jnl)
+ goto out;
+
+ tracker_fd = jnl->jnl_fd;
+
+ size = gf_readline(tracker_fd, buffer, maxlen);
+ if (size < 0) {
+ size = -1;
+ goto out;
+ }
+
+ if (size == 0)
+ goto out;
+
+ memcpy(bufptr, buffer, size - 1);
+ bufptr[size - 1] = '\0';
+
+out:
+ return size;
+}
+
+/**
+ * @API
+ * gf_changelog_scan() - scan and generate a list of change entries
+ *
+ * calling this api multiple times (without calling gf_changlog_done())
+ * would result new changelogs(s) being refreshed in the tracker file.
+ * This call also acts as a cancellation point for the consumer.
+ */
+ssize_t
+gf_changelog_scan()
+{
+ int tracker_fd = 0;
+ size_t off = 0;
+ xlator_t *this = NULL;
+ size_t nr_entries = 0;
+ gf_changelog_journal_t *jnl = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char buffer[PATH_MAX] = {
+ 0,
+ };
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this);
+ if (!jnl)
+ goto out;
+ if (JNL_IS_API_DISCONNECTED(jnl)) {
+ errno = ENOTCONN;
+ goto out;
+ }
+
+ errno = EINVAL;
+
+ tracker_fd = jnl->jnl_fd;
+ if (gf_ftruncate(tracker_fd, 0))
+ goto out;
+
+ rewinddir(jnl->jnl_dir);
+
+ for (;;) {
+ errno = 0;
+ entry = sys_readdir(jnl->jnl_dir, scratch);
+ if (!entry || errno != 0)
+ break;
+
+ if (!strcmp(basename(entry->d_name), ".") ||
+ !strcmp(basename(entry->d_name), ".."))
+ continue;
+
+ nr_entries++;
+
+ GF_CHANGELOG_FILL_BUFFER(jnl->jnl_processing_dir, buffer, off,
+ strlen(jnl->jnl_processing_dir));
+ GF_CHANGELOG_FILL_BUFFER(entry->d_name, buffer, off,
+ strlen(entry->d_name));
+ GF_CHANGELOG_FILL_BUFFER("\n", buffer, off, 1);
+
+ if (gf_changelog_write(tracker_fd, buffer, off) != off) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_LIB_MSG_WRITE_FAILED,
+ "error writing changelog filename"
+ " to tracker file");
+ break;
+ }
+ off = 0;
+ }
+
+ if (!entry) {
+ if (gf_lseek(tracker_fd, 0, SEEK_SET) != -1)
+ return nr_entries;
+ }
+out:
+ return -1;
+}
diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.c b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
new file mode 100644
index 00000000000..75f8a6dfc08
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.c
@@ -0,0 +1,170 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-mem-types.h"
+#include "gf-changelog-helpers.h"
+#include "changelog-lib-messages.h"
+#include <glusterfs/syscall.h>
+
+size_t
+gf_changelog_write(int fd, char *buffer, size_t len)
+{
+ ssize_t size = 0;
+ size_t written = 0;
+
+ while (written < len) {
+ size = sys_write(fd, buffer + written, len - written);
+ if (size <= 0)
+ break;
+
+ written += size;
+ }
+
+ return written;
+}
+
+void
+gf_rfc3986_encode_space_newline(unsigned char *s, char *enc, char *estr)
+{
+ for (; *s; s++) {
+ if (estr[*s])
+ sprintf(enc, "%c", estr[*s]);
+ else
+ sprintf(enc, "%%%02X", *s);
+ while (*++enc)
+ ;
+ }
+}
+
+/**
+ * thread safe version of readline with buffering
+ * (taken from Unix Network Programming Volume I, W.R. Stevens)
+ *
+ * This is favoured over fgets() as we'd need to ftruncate()
+ * (see gf_changelog_scan() API) to record new changelog files.
+ * stream open functions does have a truncate like api (although
+ * that can be done via @fflush(fp), @ftruncate(fd) and @fseek(fp),
+ * but this involves mixing POSIX file descriptors and stream FILE *).
+ *
+ * NOTE: This implementation still does work with more than one fd's
+ * used to perform gf_readline(). For this very reason it's not
+ * made a part of libglusterfs.
+ */
+
+static __thread read_line_t thread_tsd = {};
+
+static ssize_t
+my_read(read_line_t *tsd, int fd, char *ptr)
+{
+ if (tsd->rl_cnt <= 0) {
+ tsd->rl_cnt = sys_read(fd, tsd->rl_buf, MAXLINE);
+
+ if (tsd->rl_cnt < 0)
+ return -1;
+ else if (tsd->rl_cnt == 0)
+ return 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+ }
+
+ tsd->rl_cnt--;
+ *ptr = *tsd->rl_bufptr++;
+ return 1;
+}
+
+ssize_t
+gf_readline(int fd, void *vptr, size_t maxlen)
+{
+ size_t n = 0;
+ size_t rc = 0;
+ char c = ' ';
+ char *ptr = NULL;
+ read_line_t *tsd = &thread_tsd;
+
+ ptr = vptr;
+ for (n = 1; n < maxlen; n++) {
+ if ((rc = my_read(tsd, fd, &c)) == 1) {
+ *ptr++ = c;
+ if (c == '\n')
+ break;
+ } else if (rc == 0) {
+ *ptr = '\0';
+ return (n - 1);
+ } else
+ return -1;
+ }
+
+ *ptr = '\0';
+ return n;
+}
+
+off_t
+gf_lseek(int fd, off_t offset, int whence)
+{
+ off_t off = 0;
+ read_line_t *tsd = &thread_tsd;
+
+ off = sys_lseek(fd, offset, whence);
+ if (off == -1)
+ return -1;
+
+ tsd->rl_cnt = 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+
+ return off;
+}
+
+int
+gf_ftruncate(int fd, off_t length)
+{
+ read_line_t *tsd = &thread_tsd;
+
+ if (sys_ftruncate(fd, 0))
+ return -1;
+
+ tsd->rl_cnt = 0;
+ tsd->rl_bufptr = tsd->rl_buf;
+
+ return 0;
+}
+
+int
+gf_thread_cleanup(xlator_t *this, pthread_t thread)
+{
+ int ret = 0;
+ void *res = NULL;
+
+ ret = pthread_cancel(thread);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING,
+ "Failed to send cancellation to thread");
+ goto error_return;
+ }
+
+ ret = pthread_join(thread, &res);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING,
+ "failed to join thread");
+ goto error_return;
+ }
+
+ if (res != PTHREAD_CANCELED) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_LIB_MSG_THREAD_CLEANUP_WARNING,
+ "Thread could not be cleaned up");
+ goto error_return;
+ }
+
+ return 0;
+
+error_return:
+ return -1;
+}
diff --git a/xlators/features/changelog/lib/src/gf-changelog-helpers.h b/xlators/features/changelog/lib/src/gf-changelog-helpers.h
new file mode 100644
index 00000000000..9c609d33172
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-helpers.h
@@ -0,0 +1,255 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GF_CHANGELOG_HELPERS_H
+#define _GF_CHANGELOG_HELPERS_H
+
+#include <unistd.h>
+#include <dirent.h>
+#include <limits.h>
+#include <glusterfs/locking.h>
+
+#include <glusterfs/xlator.h>
+
+#include "changelog.h"
+
+#include "changelog-rpc-common.h"
+#include "gf-changelog-journal.h"
+
+#define GF_CHANGELOG_TRACKER "tracker"
+
+#define GF_CHANGELOG_CURRENT_DIR ".current"
+#define GF_CHANGELOG_PROCESSED_DIR ".processed"
+#define GF_CHANGELOG_PROCESSING_DIR ".processing"
+#define GF_CHANGELOG_HISTORY_DIR ".history"
+#define TIMESTAMP_LENGTH 10
+
+#ifndef MAXLINE
+#define MAXLINE 4096
+#endif
+
+#define GF_CHANGELOG_FILL_BUFFER(ptr, ascii, off, len) \
+ do { \
+ memcpy(ascii + off, ptr, len); \
+ off += len; \
+ } while (0)
+
+typedef struct read_line {
+ int rl_cnt;
+ char *rl_bufptr;
+ char rl_buf[MAXLINE];
+} read_line_t;
+
+struct gf_changelog;
+struct gf_event;
+
+/**
+ * Event list for ordered event notification
+ *
+ * ->next_seq holds the next _expected_ sequence number.
+ */
+struct gf_event_list {
+ pthread_mutex_t lock; /* protects this structure */
+ pthread_cond_t cond;
+
+ pthread_t invoker;
+
+ unsigned long next_seq; /* next sequence number expected:
+ zero during bootstrap */
+
+ struct gf_changelog *entry; /* backpointer to it's brick
+ encapsulator (entry) */
+ struct list_head events; /* list of events */
+};
+
+/**
+ * include a refcount if it's of use by additional layers
+ */
+struct gf_event {
+ int count;
+
+ unsigned long seq;
+
+ struct list_head list;
+
+ struct iovec iov[0];
+};
+#define GF_EVENT_CALLOC_SIZE(cnt, len) \
+ (sizeof(struct gf_event) + (cnt * sizeof(struct iovec)) + len)
+
+/**
+ * assign the base address of the IO vector to the correct memory
+o * area and set it's addressable length.
+ */
+#define GF_EVENT_ASSIGN_IOVEC(vec, event, len, pos) \
+ do { \
+ vec->iov_base = ((char *)event) + sizeof(struct gf_event) + \
+ (event->count * sizeof(struct iovec)) + pos; \
+ vec->iov_len = len; \
+ pos += len; \
+ } while (0)
+
+typedef enum gf_changelog_conn_state {
+ GF_CHANGELOG_CONN_STATE_PENDING = 0,
+ GF_CHANGELOG_CONN_STATE_ACCEPTED,
+ GF_CHANGELOG_CONN_STATE_DISCONNECTED,
+} gf_changelog_conn_state_t;
+
+/**
+ * An instance of this structure is allocated for each brick for which
+ * notifications are streamed.
+ */
+typedef struct gf_changelog {
+ gf_lock_t statelock;
+ gf_changelog_conn_state_t connstate;
+
+ xlator_t *this;
+
+ struct list_head list; /* list of instances */
+
+ char brick[PATH_MAX]; /* brick path for this end-point */
+
+ changelog_rpc_t grpc; /* rpc{-clnt,svc} for this brick */
+#define RPC_PROBER(ent) ent->grpc.rpc
+#define RPC_REBORP(ent) ent->grpc.svc
+#define RPC_SOCK(ent) ent->grpc.sock
+
+ unsigned int notify; /* notification flag(s) */
+
+ FINI *fini; /* destructor callback */
+ CALLBACK *callback; /* event callback dispatcher */
+ CONNECT *connected; /* connect callback */
+ DISCONNECT *disconnected; /* disconnection callback */
+
+ void *ptr; /* owner specific private data */
+ xlator_t *invokerxl; /* consumers _this_, if valid,
+ assigned to THIS before cbk is
+ invoked */
+
+ gf_boolean_t ordered;
+
+ void (*queueevent)(struct gf_event_list *, struct gf_event *);
+ void (*pickevent)(struct gf_event_list *, struct gf_event **);
+
+ struct gf_event_list event;
+} gf_changelog_t;
+
+static inline int
+gf_changelog_filter_check(gf_changelog_t *entry, changelog_event_t *event)
+{
+ if (event->ev_type & entry->notify)
+ return 1;
+ return 0;
+}
+
+#define GF_NEED_ORDERED_EVENTS(ent) (ent->ordered == _gf_true)
+
+/** private structure */
+typedef struct gf_private {
+ pthread_mutex_t lock; /* protects ->connections, cleanups */
+ pthread_cond_t cond;
+
+ void *api; /* pointer for API access */
+
+ pthread_t poller; /* event poller thread */
+ pthread_t connectionjanitor; /* connection cleaner */
+
+ struct list_head connections; /* list of connections */
+ struct list_head cleanups; /* list of connection to be
+ cleaned up */
+} gf_private_t;
+
+#define GF_CHANGELOG_GET_API_PTR(this) (((gf_private_t *)this->private)->api)
+
+/**
+ * upcall: invoke callback with _correct_ THIS
+ */
+#define GF_CHANGELOG_INVOKE_CBK(this, cbk, brick, args...) \
+ do { \
+ xlator_t *old_this = NULL; \
+ xlator_t *invokerxl = NULL; \
+ \
+ invokerxl = entry->invokerxl; \
+ old_this = this; \
+ \
+ if (invokerxl) { \
+ THIS = invokerxl; \
+ } \
+ \
+ cbk(invokerxl, brick, args); \
+ THIS = old_this; \
+ \
+ } while (0)
+
+#define SAVE_THIS(xl) \
+ do { \
+ old_this = xl; \
+ THIS = master; \
+ } while (0)
+
+#define RESTORE_THIS() \
+ do { \
+ if (old_this) \
+ THIS = old_this; \
+ } while (0)
+
+/** APIs and the rest */
+
+void *
+gf_changelog_process(void *data);
+
+void
+gf_rfc3986_encode_space_newline(unsigned char *s, char *enc, char *estr);
+
+size_t
+gf_changelog_write(int fd, char *buffer, size_t len);
+
+ssize_t
+gf_readline(int fd, void *vptr, size_t maxlen);
+
+int
+gf_ftruncate(int fd, off_t length);
+
+off_t
+gf_lseek(int fd, off_t offset, int whence);
+
+int
+gf_changelog_consume(xlator_t *this, gf_changelog_journal_t *jnl,
+ char *from_path, gf_boolean_t no_publish);
+int
+gf_changelog_publish(xlator_t *this, gf_changelog_journal_t *jnl,
+ char *from_path);
+int
+gf_thread_cleanup(xlator_t *this, pthread_t thread);
+void *
+gf_changelog_callback_invoker(void *arg);
+
+int
+gf_cleanup_event(xlator_t *, struct gf_event_list *);
+
+/* (un)ordered event queueing */
+void
+queue_ordered_event(struct gf_event_list *, struct gf_event *);
+
+void
+queue_unordered_event(struct gf_event_list *, struct gf_event *);
+
+/* (un)ordered event picking */
+void
+pick_event_ordered(struct gf_event_list *, struct gf_event **);
+
+void
+pick_event_unordered(struct gf_event_list *, struct gf_event **);
+
+/* connection janitor thread */
+void *
+gf_changelog_connection_janitor(void *);
+
+#endif
diff --git a/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c b/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c
new file mode 100644
index 00000000000..7f6e2329e71
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-journal-handler.c
@@ -0,0 +1,1029 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/compat-uuid.h>
+#include <glusterfs/globals.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/compat-errno.h>
+
+#include "gf-changelog-helpers.h"
+
+/* from the changelog translator */
+#include "changelog-misc.h"
+#include "changelog-mem-types.h"
+
+#include "gf-changelog-journal.h"
+#include "changelog-lib-messages.h"
+
+extern int byebye;
+
+enum changelog_versions { VERSION_1_1 = 0, VERSION_1_2 = 1 };
+
+/**
+ * number of gfid records after fop number
+ */
+int nr_gfids[2][GF_FOP_MAXVALUE] = {{
+ [GF_FOP_MKNOD] = 1,
+ [GF_FOP_MKDIR] = 1,
+ [GF_FOP_UNLINK] = 1,
+ [GF_FOP_RMDIR] = 1,
+ [GF_FOP_SYMLINK] = 1,
+ [GF_FOP_RENAME] = 2,
+ [GF_FOP_LINK] = 1,
+ [GF_FOP_CREATE] = 1,
+ },
+ {
+ [GF_FOP_MKNOD] = 1,
+ [GF_FOP_MKDIR] = 1,
+ [GF_FOP_UNLINK] = 2,
+ [GF_FOP_RMDIR] = 2,
+ [GF_FOP_SYMLINK] = 1,
+ [GF_FOP_RENAME] = 2,
+ [GF_FOP_LINK] = 1,
+ [GF_FOP_CREATE] = 1,
+ }};
+
+int nr_extra_recs[2][GF_FOP_MAXVALUE] = {{
+ [GF_FOP_MKNOD] = 3,
+ [GF_FOP_MKDIR] = 3,
+ [GF_FOP_UNLINK] = 0,
+ [GF_FOP_RMDIR] = 0,
+ [GF_FOP_SYMLINK] = 0,
+ [GF_FOP_RENAME] = 0,
+ [GF_FOP_LINK] = 0,
+ [GF_FOP_CREATE] = 3,
+ },
+ {
+ [GF_FOP_MKNOD] = 3,
+ [GF_FOP_MKDIR] = 3,
+ [GF_FOP_UNLINK] = 0,
+ [GF_FOP_RMDIR] = 0,
+ [GF_FOP_SYMLINK] = 0,
+ [GF_FOP_RENAME] = 0,
+ [GF_FOP_LINK] = 0,
+ [GF_FOP_CREATE] = 3,
+ }};
+
+static char *
+binary_to_ascii(uuid_t uuid)
+{
+ return uuid_utoa(uuid);
+}
+
+static char *
+conv_noop(char *ptr)
+{
+ return ptr;
+}
+
+#define VERIFY_SEPARATOR(ptr, plen, perr) \
+ { \
+ if (*(ptr + plen) != '\0') { \
+ perr = 1; \
+ break; \
+ } \
+ }
+
+#define MOVER_MOVE(mover, nleft, bytes) \
+ { \
+ mover += bytes; \
+ nleft -= bytes; \
+ }
+
+#define PARSE_GFID(mov, ptr, le, fn, perr) \
+ { \
+ VERIFY_SEPARATOR(mov, le, perr); \
+ ptr = fn(mov); \
+ if (!ptr) { \
+ perr = 1; \
+ break; \
+ } \
+ }
+
+#define FILL_AND_MOVE(pt, buf, of, mo, nl, le) \
+ { \
+ GF_CHANGELOG_FILL_BUFFER(pt, buf, of, strlen(pt)); \
+ MOVER_MOVE(mo, nl, le); \
+ }
+
+#define PARSE_GFID_MOVE(ptr, uuid, mover, nleft, perr) \
+ { \
+ memcpy(uuid, mover, sizeof(uuid_t)); \
+ ptr = binary_to_ascii(uuid); \
+ if (!ptr) { \
+ perr = 1; \
+ break; \
+ } \
+ MOVER_MOVE(mover, nleft, sizeof(uuid_t)); \
+ }
+
+#define LINE_BUFSIZE (3 * PATH_MAX) /* enough buffer for extra chars too */
+
+/**
+ * using mmap() makes parsing easy. fgets() cannot be used here as
+ * the binary gfid could contain a line-feed (0x0A), in that case fgets()
+ * would read an incomplete line and parsing would fail. using POSIX fds
+ * would result is additional code to maintain state in case of partial
+ * reads of data (where multiple entries do not fit extirely in the buffer).
+ *
+ * mmap() gives the flexibility of pointing to an offset in the file
+ * without us worrying about reading it in memory (VM does that for us for
+ * free).
+ */
+
+static int
+gf_changelog_parse_binary(xlator_t *this, gf_changelog_journal_t *jnl,
+ int from_fd, int to_fd, size_t start_offset,
+ struct stat *stbuf, int version_idx)
+
+{
+ int ret = -1;
+ off_t off = 0;
+ off_t nleft = 0;
+ uuid_t uuid = {
+ 0,
+ };
+ char *ptr = NULL;
+ char *bname_start = NULL;
+ char *bname_end = NULL;
+ char *mover = NULL;
+ void *start = NULL;
+ char current_mover = ' ';
+ size_t blen = 0;
+ int parse_err = 0;
+ char *ascii = NULL;
+
+ ascii = GF_CALLOC(LINE_BUFSIZE, sizeof(char), gf_common_mt_char);
+
+ nleft = stbuf->st_size;
+
+ start = mmap(NULL, nleft, PROT_READ, MAP_PRIVATE, from_fd, 0);
+ if (start == MAP_FAILED) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_MMAP_FAILED,
+ "mmap() error");
+ goto out;
+ }
+
+ mover = start;
+
+ MOVER_MOVE(mover, nleft, start_offset);
+
+ while (nleft > 0) {
+ off = blen = 0;
+ ptr = bname_start = bname_end = NULL;
+
+ current_mover = *mover;
+
+ switch (current_mover) {
+ case 'D':
+ case 'M':
+ MOVER_MOVE(mover, nleft, 1);
+ PARSE_GFID_MOVE(ptr, uuid, mover, nleft, parse_err);
+
+ break;
+
+ case 'E':
+ MOVER_MOVE(mover, nleft, 1);
+ PARSE_GFID_MOVE(ptr, uuid, mover, nleft, parse_err);
+
+ bname_start = mover;
+ bname_end = strchr(mover, '\n');
+ if (bname_end == NULL) {
+ parse_err = 1;
+ break;
+ }
+
+ blen = bname_end - bname_start;
+ MOVER_MOVE(mover, nleft, blen);
+
+ break;
+
+ default:
+ parse_err = 1;
+ }
+
+ if (parse_err)
+ break;
+
+ GF_CHANGELOG_FILL_BUFFER(&current_mover, ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER(" ", ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER(ptr, ascii, off, strlen(ptr));
+ if (blen)
+ GF_CHANGELOG_FILL_BUFFER(bname_start, ascii, off, blen);
+ GF_CHANGELOG_FILL_BUFFER("\n", ascii, off, 1);
+
+ if (gf_changelog_write(to_fd, ascii, off) != off) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_ASCII_ERROR,
+ "processing binary changelog failed due to "
+ " error in writing ascii change");
+ break;
+ }
+
+ MOVER_MOVE(mover, nleft, 1);
+ }
+
+ if ((nleft == 0) && (!parse_err))
+ ret = 0;
+
+ if (munmap(start, stbuf->st_size))
+ gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_MUNMAP_FAILED,
+ "munmap() error");
+out:
+ if (ascii)
+ GF_FREE(ascii);
+ return ret;
+}
+
+/**
+ * ascii decoder:
+ * - separate out one entry from another
+ * - use fop name rather than fop number
+ */
+static int
+gf_changelog_parse_ascii(xlator_t *this, gf_changelog_journal_t *jnl,
+ int from_fd, int to_fd, size_t start_offset,
+ struct stat *stbuf, int version_idx)
+{
+ int ng = 0;
+ int ret = -1;
+ int fop = 0;
+ int len = 0;
+ off_t off = 0;
+ off_t nleft = 0;
+ char *ptr = NULL;
+ char *eptr = NULL;
+ void *start = NULL;
+ char *mover = NULL;
+ int parse_err = 0;
+ char current_mover = ' ';
+ char *ascii = NULL;
+ const char *fopname = NULL;
+
+ ascii = GF_CALLOC(LINE_BUFSIZE, sizeof(char), gf_common_mt_char);
+
+ nleft = stbuf->st_size;
+
+ start = mmap(NULL, nleft, PROT_READ, MAP_PRIVATE, from_fd, 0);
+ if (start == MAP_FAILED) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_MMAP_FAILED,
+ "mmap() error");
+ goto out;
+ }
+
+ mover = start;
+
+ MOVER_MOVE(mover, nleft, start_offset);
+
+ while (nleft > 0) {
+ off = 0;
+ current_mover = *mover;
+
+ GF_CHANGELOG_FILL_BUFFER(&current_mover, ascii, off, 1);
+ GF_CHANGELOG_FILL_BUFFER(" ", ascii, off, 1);
+
+ switch (current_mover) {
+ case 'D':
+ MOVER_MOVE(mover, nleft, 1);
+
+ /* target gfid */
+ PARSE_GFID(mover, ptr, UUID_CANONICAL_FORM_LEN, conv_noop,
+ parse_err);
+ FILL_AND_MOVE(ptr, ascii, off, mover, nleft,
+ UUID_CANONICAL_FORM_LEN);
+ break;
+ case 'M':
+ MOVER_MOVE(mover, nleft, 1);
+
+ /* target gfid */
+ PARSE_GFID(mover, ptr, UUID_CANONICAL_FORM_LEN, conv_noop,
+ parse_err);
+ FILL_AND_MOVE(ptr, ascii, off, mover, nleft,
+ UUID_CANONICAL_FORM_LEN);
+ FILL_AND_MOVE(" ", ascii, off, mover, nleft, 1);
+
+ /* fop */
+ len = strlen(mover);
+ VERIFY_SEPARATOR(mover, len, parse_err);
+
+ fop = atoi(mover);
+ fopname = gf_fop_list[fop];
+ if (fopname == NULL) {
+ parse_err = 1;
+ break;
+ }
+
+ MOVER_MOVE(mover, nleft, len);
+
+ len = strlen(fopname);
+ GF_CHANGELOG_FILL_BUFFER(fopname, ascii, off, len);
+
+ break;
+
+ case 'E':
+ MOVER_MOVE(mover, nleft, 1);
+
+ /* target gfid */
+ PARSE_GFID(mover, ptr, UUID_CANONICAL_FORM_LEN, conv_noop,
+ parse_err);
+ FILL_AND_MOVE(ptr, ascii, off, mover, nleft,
+ UUID_CANONICAL_FORM_LEN);
+ FILL_AND_MOVE(" ", ascii, off, mover, nleft, 1);
+
+ /* fop */
+ len = strlen(mover);
+ VERIFY_SEPARATOR(mover, len, parse_err);
+
+ fop = atoi(mover);
+ fopname = gf_fop_list[fop];
+ if (fopname == NULL) {
+ parse_err = 1;
+ break;
+ }
+
+ MOVER_MOVE(mover, nleft, len);
+
+ len = strlen(fopname);
+ GF_CHANGELOG_FILL_BUFFER(fopname, ascii, off, len);
+
+ ng = nr_extra_recs[version_idx][fop];
+ for (; ng > 0; ng--) {
+ MOVER_MOVE(mover, nleft, 1);
+ len = strlen(mover);
+ VERIFY_SEPARATOR(mover, len, parse_err);
+
+ GF_CHANGELOG_FILL_BUFFER(" ", ascii, off, 1);
+ FILL_AND_MOVE(mover, ascii, off, mover, nleft, len);
+ }
+
+ /* pargfid + bname */
+ ng = nr_gfids[version_idx][fop];
+ while (ng-- > 0) {
+ MOVER_MOVE(mover, nleft, 1);
+ len = strlen(mover);
+ if (!len) {
+ MOVER_MOVE(mover, nleft, 1);
+ continue;
+ }
+
+ GF_CHANGELOG_FILL_BUFFER(" ", ascii, off, 1);
+
+ PARSE_GFID(mover, ptr, len, conv_noop, parse_err);
+ eptr = calloc(3, strlen(ptr));
+ if (!eptr) {
+ parse_err = 1;
+ break;
+ }
+
+ gf_rfc3986_encode_space_newline((unsigned char *)ptr, eptr,
+ jnl->rfc3986_space_newline);
+ FILL_AND_MOVE(eptr, ascii, off, mover, nleft, len);
+ free(eptr);
+ }
+
+ break;
+ default:
+ parse_err = 1;
+ }
+
+ if (parse_err)
+ break;
+
+ GF_CHANGELOG_FILL_BUFFER("\n", ascii, off, 1);
+
+ if (gf_changelog_write(to_fd, ascii, off) != off) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_ASCII_ERROR,
+ "processing ascii changelog failed due to "
+ " error in writing change");
+ break;
+ }
+
+ MOVER_MOVE(mover, nleft, 1);
+ }
+
+ if ((nleft == 0) && (!parse_err))
+ ret = 0;
+
+ if (munmap(start, stbuf->st_size))
+ gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_MUNMAP_FAILED,
+ "munmap() error");
+
+out:
+ if (ascii)
+ GF_FREE(ascii);
+
+ return ret;
+}
+
+static int
+gf_changelog_decode(xlator_t *this, gf_changelog_journal_t *jnl, int from_fd,
+ int to_fd, struct stat *stbuf, int *zerob)
+{
+ int ret = -1;
+ int encoding = -1;
+ int major_version = -1;
+ int minor_version = -1;
+ int version_idx = -1;
+ size_t elen = 0;
+ char buffer[1024] = {
+ 0,
+ };
+
+ CHANGELOG_GET_HEADER_INFO(from_fd, buffer, sizeof(buffer), encoding,
+ major_version, minor_version, elen);
+ if (encoding == -1) /* unknown encoding */
+ goto out;
+
+ if (major_version == -1) /* unknown major version */
+ goto out;
+
+ if (minor_version == -1) /* unknown minor version */
+ goto out;
+
+ if (!CHANGELOG_VALID_ENCODING(encoding))
+ goto out;
+
+ if (elen == stbuf->st_size) {
+ *zerob = 1;
+ goto out;
+ }
+
+ if (major_version == 1 && minor_version == 1) {
+ version_idx = VERSION_1_1;
+ } else if (major_version == 1 && minor_version == 2) {
+ version_idx = VERSION_1_2;
+ }
+
+ if (version_idx == -1) /* unknown version number */
+ goto out;
+
+ /**
+ * start processing after the header
+ */
+ if (sys_lseek(from_fd, elen, SEEK_SET) < 0) {
+ goto out;
+ }
+ switch (encoding) {
+ case CHANGELOG_ENCODE_BINARY:
+ /**
+ * this ideally should have been a part of changelog-encoders.c
+ * (ie. part of the changelog translator).
+ */
+ ret = gf_changelog_parse_binary(this, jnl, from_fd, to_fd, elen,
+ stbuf, version_idx);
+ break;
+
+ case CHANGELOG_ENCODE_ASCII:
+ ret = gf_changelog_parse_ascii(this, jnl, from_fd, to_fd, elen,
+ stbuf, version_idx);
+ break;
+ }
+
+out:
+ return ret;
+}
+
+int
+gf_changelog_publish(xlator_t *this, gf_changelog_journal_t *jnl,
+ char *from_path)
+{
+ int ret = 0;
+ char dest[PATH_MAX] = {
+ 0,
+ };
+ char to_path[PATH_MAX] = {
+ 0,
+ };
+ struct stat stbuf = {
+ 0,
+ };
+
+ if (snprintf(to_path, PATH_MAX, "%s%s", jnl->jnl_current_dir,
+ basename(from_path)) >= PATH_MAX)
+ return -1;
+
+ /* handle zerob file that won't exist in current */
+ ret = sys_stat(to_path, &stbuf);
+ if (ret) {
+ if (errno == ENOENT)
+ ret = 0;
+ goto out;
+ }
+
+ if (snprintf(dest, PATH_MAX, "%s%s", jnl->jnl_processing_dir,
+ basename(from_path)) >= PATH_MAX)
+ return -1;
+
+ ret = sys_rename(to_path, dest);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", to_path, "to=%s",
+ dest, NULL);
+ }
+
+out:
+ return ret;
+}
+
+int
+gf_changelog_consume(xlator_t *this, gf_changelog_journal_t *jnl,
+ char *from_path, gf_boolean_t no_publish)
+{
+ int ret = -1;
+ int fd1 = 0;
+ int fd2 = 0;
+ int zerob = 0;
+ struct stat stbuf = {
+ 0,
+ };
+ char dest[PATH_MAX] = {
+ 0,
+ };
+ char to_path[PATH_MAX] = {
+ 0,
+ };
+
+ if (snprintf(to_path, PATH_MAX, "%s%s", jnl->jnl_current_dir,
+ basename(from_path)) >= PATH_MAX)
+ goto out;
+ if (snprintf(dest, PATH_MAX, "%s%s", jnl->jnl_processing_dir,
+ basename(from_path)) >= PATH_MAX)
+ goto out;
+
+ ret = sys_stat(from_path, &stbuf);
+ if (ret || !S_ISREG(stbuf.st_mode)) {
+ ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_STAT_FAILED,
+ "path=%s", from_path, NULL);
+ goto out;
+ }
+
+ fd1 = open(from_path, O_RDONLY);
+ if (fd1 < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_OPEN_FAILED,
+ "path=%s", from_path, NULL);
+ goto out;
+ }
+
+ fd2 = open(to_path, O_CREAT | O_TRUNC | O_RDWR,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (fd2 < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_OPEN_FAILED,
+ "path=%s", to_path, NULL);
+ goto close_fd;
+ } else {
+ ret = gf_changelog_decode(this, jnl, fd1, fd2, &stbuf, &zerob);
+
+ sys_close(fd2);
+
+ if (!ret) {
+ /* move it to processing on a successful
+ decode */
+ if (no_publish == _gf_true)
+ goto close_fd;
+ ret = sys_rename(to_path, dest);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", to_path,
+ "to=%s", dest, NULL);
+ }
+
+ /* remove it from .current if it's an empty file */
+ if (zerob) {
+ /* zerob changelogs must be unlinked */
+ ret = sys_unlink(to_path);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_UNLINK_FAILED, "name=empty changelog",
+ "path=%s", to_path, NULL);
+ }
+ }
+
+close_fd:
+ sys_close(fd1);
+
+out:
+ return ret;
+}
+
+void *
+gf_changelog_process(void *data)
+{
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+ gf_changelog_entry_t *entry = NULL;
+ gf_changelog_processor_t *jnl_proc = NULL;
+
+ jnl = data;
+ jnl_proc = jnl->jnl_proc;
+ THIS = jnl->this;
+ this = jnl->this;
+
+ while (1) {
+ pthread_mutex_lock(&jnl_proc->lock);
+ {
+ while (list_empty(&jnl_proc->entries)) {
+ jnl_proc->waiting = _gf_true;
+ pthread_cond_wait(&jnl_proc->cond, &jnl_proc->lock);
+ }
+
+ entry = list_first_entry(&jnl_proc->entries, gf_changelog_entry_t,
+ list);
+ if (entry)
+ list_del(&entry->list);
+
+ jnl_proc->waiting = _gf_false;
+ }
+ pthread_mutex_unlock(&jnl_proc->lock);
+
+ if (entry) {
+ (void)gf_changelog_consume(this, jnl, entry->path, _gf_false);
+ GF_FREE(entry);
+ }
+ }
+
+ return NULL;
+}
+
+void
+gf_changelog_queue_journal(gf_changelog_processor_t *jnl_proc,
+ changelog_event_t *event)
+{
+ size_t len = 0;
+ gf_changelog_entry_t *entry = NULL;
+
+ entry = GF_CALLOC(1, sizeof(gf_changelog_entry_t),
+ gf_changelog_mt_libgfchangelog_entry_t);
+ if (!entry)
+ return;
+ INIT_LIST_HEAD(&entry->list);
+
+ len = strlen(event->u.journal.path);
+ (void)memcpy(entry->path, event->u.journal.path, len + 1);
+ entry->path[len] = '\0';
+
+ pthread_mutex_lock(&jnl_proc->lock);
+ {
+ list_add_tail(&entry->list, &jnl_proc->entries);
+ if (jnl_proc->waiting)
+ pthread_cond_signal(&jnl_proc->cond);
+ }
+ pthread_mutex_unlock(&jnl_proc->lock);
+
+ return;
+}
+
+void
+gf_changelog_handle_journal(void *xl, char *brick, void *cbkdata,
+ changelog_event_t *event)
+{
+ gf_changelog_journal_t *jnl = NULL;
+ gf_changelog_processor_t *jnl_proc = NULL;
+
+ jnl = cbkdata;
+ jnl_proc = jnl->jnl_proc;
+
+ gf_changelog_queue_journal(jnl_proc, event);
+}
+
+void
+gf_changelog_journal_disconnect(void *xl, char *brick, void *data)
+{
+ gf_changelog_journal_t *jnl = NULL;
+
+ jnl = data;
+
+ pthread_spin_lock(&jnl->lock);
+ {
+ JNL_SET_API_STATE(jnl, JNL_API_DISCONNECTED);
+ };
+ pthread_spin_unlock(&jnl->lock);
+}
+
+void
+gf_changelog_journal_connect(void *xl, char *brick, void *data)
+{
+ gf_changelog_journal_t *jnl = NULL;
+
+ jnl = data;
+
+ pthread_spin_lock(&jnl->lock);
+ {
+ JNL_SET_API_STATE(jnl, JNL_API_CONNECTED);
+ };
+ pthread_spin_unlock(&jnl->lock);
+
+ return;
+}
+
+void
+gf_changelog_cleanup_processor(gf_changelog_journal_t *jnl)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ gf_changelog_processor_t *jnl_proc = NULL;
+
+ this = THIS;
+ if (!this || !jnl || !jnl->jnl_proc)
+ goto error_return;
+
+ jnl_proc = jnl->jnl_proc;
+
+ ret = gf_thread_cleanup(this, jnl_proc->processor);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_LIB_MSG_CLEANUP_ERROR,
+ "failed to cleanup processor thread");
+ goto error_return;
+ }
+
+ (void)pthread_mutex_destroy(&jnl_proc->lock);
+ (void)pthread_cond_destroy(&jnl_proc->cond);
+
+ GF_FREE(jnl_proc);
+
+error_return:
+ return;
+}
+
+int
+gf_changelog_init_processor(gf_changelog_journal_t *jnl)
+{
+ int ret = -1;
+ gf_changelog_processor_t *jnl_proc = NULL;
+
+ jnl_proc = GF_CALLOC(1, sizeof(gf_changelog_processor_t),
+ gf_changelog_mt_libgfchangelog_t);
+ if (!jnl_proc)
+ goto error_return;
+
+ ret = pthread_mutex_init(&jnl_proc->lock, NULL);
+ if (ret != 0)
+ goto free_jnl_proc;
+ ret = pthread_cond_init(&jnl_proc->cond, NULL);
+ if (ret != 0)
+ goto cleanup_mutex;
+
+ INIT_LIST_HEAD(&jnl_proc->entries);
+ jnl_proc->waiting = _gf_false;
+ jnl->jnl_proc = jnl_proc;
+
+ ret = gf_thread_create(&jnl_proc->processor, NULL, gf_changelog_process,
+ jnl, "clogproc");
+ if (ret != 0) {
+ jnl->jnl_proc = NULL;
+ goto cleanup_cond;
+ }
+
+ return 0;
+
+cleanup_cond:
+ (void)pthread_cond_destroy(&jnl_proc->cond);
+cleanup_mutex:
+ (void)pthread_mutex_destroy(&jnl_proc->lock);
+free_jnl_proc:
+ GF_FREE(jnl_proc);
+error_return:
+ return -1;
+}
+
+static void
+gf_changelog_cleanup_fds(gf_changelog_journal_t *jnl)
+{
+ /* tracker fd */
+ if (jnl->jnl_fd != -1)
+ sys_close(jnl->jnl_fd);
+ /* processing dir */
+ if (jnl->jnl_dir)
+ sys_closedir(jnl->jnl_dir);
+
+ if (jnl->jnl_working_dir)
+ free(jnl->jnl_working_dir); /* allocated by realpath */
+}
+
+static int
+gf_changelog_open_dirs(xlator_t *this, gf_changelog_journal_t *jnl)
+{
+ int ret = -1;
+ DIR *dir = NULL;
+ int tracker_fd = 0;
+ char tracker_path[PATH_MAX] = {
+ 0,
+ };
+
+ /* .current */
+ (void)snprintf(jnl->jnl_current_dir, PATH_MAX,
+ "%s/" GF_CHANGELOG_CURRENT_DIR "/", jnl->jnl_working_dir);
+ ret = recursive_rmdir(jnl->jnl_current_dir);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, "path=%s",
+ jnl->jnl_current_dir, NULL);
+ goto out;
+ }
+ ret = mkdir_p(jnl->jnl_current_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ /* .processed */
+ (void)snprintf(jnl->jnl_processed_dir, PATH_MAX,
+ "%s/" GF_CHANGELOG_PROCESSED_DIR "/", jnl->jnl_working_dir);
+ ret = mkdir_p(jnl->jnl_processed_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ /* .processing */
+ (void)snprintf(jnl->jnl_processing_dir, PATH_MAX,
+ "%s/" GF_CHANGELOG_PROCESSING_DIR "/", jnl->jnl_working_dir);
+ ret = recursive_rmdir(jnl->jnl_processing_dir);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_FAILED_TO_RMDIR, "path=%s",
+ jnl->jnl_processing_dir, NULL);
+ goto out;
+ }
+
+ ret = mkdir_p(jnl->jnl_processing_dir, 0600, _gf_false);
+ if (ret)
+ goto out;
+
+ dir = sys_opendir(jnl->jnl_processing_dir);
+ if (!dir) {
+ gf_msg("", GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_OPENDIR_ERROR,
+ "opendir() error");
+ goto out;
+ }
+
+ jnl->jnl_dir = dir;
+
+ (void)snprintf(tracker_path, PATH_MAX, "%s/" GF_CHANGELOG_TRACKER,
+ jnl->jnl_working_dir);
+
+ tracker_fd = open(tracker_path, O_CREAT | O_APPEND | O_RDWR,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (tracker_fd < 0) {
+ sys_closedir(jnl->jnl_dir);
+ ret = -1;
+ goto out;
+ }
+
+ jnl->jnl_fd = tracker_fd;
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+gf_changelog_init_history(xlator_t *this, gf_changelog_journal_t *jnl,
+ char *brick_path)
+{
+ int i = 0;
+ int ret = 0;
+ char hist_scratch_dir[PATH_MAX] = {
+ 0,
+ };
+
+ jnl->hist_jnl = GF_CALLOC(1, sizeof(*jnl),
+ gf_changelog_mt_libgfchangelog_t);
+ if (!jnl->hist_jnl)
+ goto error_return;
+
+ jnl->hist_jnl->jnl_dir = NULL;
+ jnl->hist_jnl->jnl_fd = -1;
+
+ (void)snprintf(hist_scratch_dir, PATH_MAX,
+ "%s/" GF_CHANGELOG_HISTORY_DIR "/", jnl->jnl_working_dir);
+
+ ret = mkdir_p(hist_scratch_dir, 0600, _gf_false);
+ if (ret)
+ goto dealloc_hist;
+
+ jnl->hist_jnl->jnl_working_dir = realpath(hist_scratch_dir, NULL);
+ if (!jnl->hist_jnl->jnl_working_dir)
+ goto dealloc_hist;
+
+ ret = gf_changelog_open_dirs(this, jnl->hist_jnl);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_LIB_MSG_OPENDIR_ERROR,
+ "could not create entries in history scratch dir");
+ goto dealloc_hist;
+ }
+
+ if (snprintf(jnl->hist_jnl->jnl_brickpath, PATH_MAX, "%s", brick_path) >=
+ PATH_MAX)
+ goto dealloc_hist;
+
+ for (i = 0; i < 256; i++) {
+ jnl->hist_jnl->rfc3986_space_newline[i] = (i == ' ' || i == '\n' ||
+ i == '%')
+ ? 0
+ : i;
+ }
+
+ return 0;
+
+dealloc_hist:
+ GF_FREE(jnl->hist_jnl);
+ jnl->hist_jnl = NULL;
+error_return:
+ return -1;
+}
+
+void
+gf_changelog_journal_fini(void *xl, char *brick, void *data)
+{
+ gf_changelog_journal_t *jnl = NULL;
+
+ jnl = data;
+
+ gf_changelog_cleanup_processor(jnl);
+
+ gf_changelog_cleanup_fds(jnl);
+ if (jnl->hist_jnl)
+ gf_changelog_cleanup_fds(jnl->hist_jnl);
+
+ GF_FREE(jnl);
+}
+
+void *
+gf_changelog_journal_init(void *xl, struct gf_brick_spec *brick)
+{
+ int i = 0;
+ int ret = 0;
+ xlator_t *this = NULL;
+ struct stat buf = {
+ 0,
+ };
+ char *scratch_dir = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+
+ this = xl;
+ scratch_dir = (char *)brick->ptr;
+
+ jnl = GF_CALLOC(1, sizeof(gf_changelog_journal_t),
+ gf_changelog_mt_libgfchangelog_t);
+ if (!jnl)
+ goto error_return;
+
+ if (snprintf(jnl->jnl_brickpath, PATH_MAX, "%s", brick->brick_path) >=
+ PATH_MAX)
+ goto dealloc_private;
+
+ if (sys_stat(scratch_dir, &buf) && errno == ENOENT) {
+ ret = mkdir_p(scratch_dir, 0600, _gf_true);
+ if (ret)
+ goto dealloc_private;
+ }
+
+ jnl->jnl_working_dir = realpath(scratch_dir, NULL);
+ if (!jnl->jnl_working_dir)
+ goto dealloc_private;
+
+ ret = gf_changelog_open_dirs(this, jnl);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_LIB_MSG_OPENDIR_ERROR,
+ "could not create entries in scratch dir");
+ goto dealloc_private;
+ }
+
+ /* RFC 3986 {de,en}coding */
+ for (i = 0; i < 256; i++) {
+ jnl->rfc3986_space_newline[i] = (i == ' ' || i == '\n' || i == '%') ? 0
+ : i;
+ }
+
+ ret = gf_changelog_init_history(this, jnl, brick->brick_path);
+ if (ret)
+ goto cleanup_fds;
+
+ /* initialize journal processor */
+ jnl->this = this;
+ ret = gf_changelog_init_processor(jnl);
+ if (ret)
+ goto cleanup_fds;
+
+ JNL_SET_API_STATE(jnl, JNL_API_CONN_INPROGESS);
+ ret = pthread_spin_init(&jnl->lock, 0);
+ if (ret != 0)
+ goto cleanup_processor;
+ return jnl;
+
+cleanup_processor:
+ gf_changelog_cleanup_processor(jnl);
+cleanup_fds:
+ gf_changelog_cleanup_fds(jnl);
+ if (jnl->hist_jnl)
+ gf_changelog_cleanup_fds(jnl->hist_jnl);
+dealloc_private:
+ GF_FREE(jnl);
+error_return:
+ return NULL;
+}
diff --git a/xlators/features/changelog/lib/src/gf-changelog-journal.h b/xlators/features/changelog/lib/src/gf-changelog-journal.h
new file mode 100644
index 00000000000..ba5b9bf827e
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-journal.h
@@ -0,0 +1,116 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __GF_CHANGELOG_JOURNAL_H
+#define __GF_CHANGELOG_JOURNAL_H
+
+#include <unistd.h>
+#include <pthread.h>
+
+#include "changelog.h"
+
+enum api_conn {
+ JNL_API_CONNECTED,
+ JNL_API_CONN_INPROGESS,
+ JNL_API_DISCONNECTED,
+};
+
+typedef struct gf_changelog_entry {
+ char path[PATH_MAX];
+
+ struct list_head list;
+} gf_changelog_entry_t;
+
+typedef struct gf_changelog_processor {
+ pthread_mutex_t lock; /* protects ->entries */
+ pthread_cond_t cond; /* waiter during empty list */
+ gf_boolean_t waiting;
+
+ pthread_t processor; /* thread-id of journal processing thread */
+
+ struct list_head entries;
+} gf_changelog_processor_t;
+
+typedef struct gf_changelog_journal {
+ DIR *jnl_dir; /* 'processing' directory stream */
+
+ int jnl_fd; /* fd to the tracker file */
+
+ char jnl_brickpath[PATH_MAX]; /* brick path for this end-point */
+
+ gf_changelog_processor_t *jnl_proc;
+
+ char *jnl_working_dir; /* scratch directory */
+
+ char jnl_current_dir[PATH_MAX];
+ char jnl_processed_dir[PATH_MAX];
+ char jnl_processing_dir[PATH_MAX];
+
+ char rfc3986_space_newline[256]; /* RFC 3986 string encoding */
+
+ struct gf_changelog_journal *hist_jnl;
+ int hist_done; /* holds 0 done scanning,
+ 1 keep scanning and -1 error */
+
+ pthread_spinlock_t lock;
+ int connected;
+ xlator_t *this;
+} gf_changelog_journal_t;
+
+#define JNL_SET_API_STATE(jnl, state) (jnl->connected = state)
+#define JNL_IS_API_DISCONNECTED(jnl) (jnl->connected == JNL_API_DISCONNECTED)
+
+/* History API */
+typedef struct gf_changelog_history_data {
+ int len;
+
+ int htime_fd;
+
+ /* parallelism count */
+ int n_parallel;
+
+ /* history from, to indexes */
+ unsigned long from;
+ unsigned long to;
+ xlator_t *this;
+} gf_changelog_history_data_t;
+
+typedef struct gf_changelog_consume_data {
+ /** set of inputs */
+
+ /* fd to read from */
+ int fd;
+
+ /* from @offset */
+ off_t offset;
+
+ xlator_t *this;
+
+ gf_changelog_journal_t *jnl;
+
+ /** set of outputs */
+
+ /* return value */
+ int retval;
+
+ /* journal processed */
+ char changelog[PATH_MAX];
+} gf_changelog_consume_data_t;
+
+/* event handler */
+CALLBACK gf_changelog_handle_journal;
+
+/* init, connect & disconnect handler */
+INIT gf_changelog_journal_init;
+FINI gf_changelog_journal_fini;
+CONNECT gf_changelog_journal_connect;
+DISCONNECT gf_changelog_journal_disconnect;
+
+#endif
diff --git a/xlators/features/changelog/lib/src/gf-changelog-reborp.c b/xlators/features/changelog/lib/src/gf-changelog-reborp.c
new file mode 100644
index 00000000000..56b11cbb705
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-reborp.c
@@ -0,0 +1,413 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-misc.h"
+#include "changelog-mem-types.h"
+
+#include "gf-changelog-helpers.h"
+#include "changelog-rpc-common.h"
+#include "changelog-lib-messages.h"
+
+#include <glusterfs/syscall.h>
+
+/**
+ * Reverse socket: actual data transfer handler. Connection
+ * initiator is PROBER, data transfer is REBORP.
+ */
+
+static struct rpcsvc_program *gf_changelog_reborp_programs[];
+
+void *
+gf_changelog_connection_janitor(void *arg)
+{
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+ gf_private_t *priv = NULL;
+ gf_changelog_t *entry = NULL;
+ struct gf_event *event = NULL;
+ struct gf_event_list *ev = NULL;
+ unsigned long drained = 0;
+
+ this = arg;
+ THIS = this;
+
+ priv = this->private;
+
+ while (1) {
+ pthread_mutex_lock(&priv->lock);
+ {
+ while (list_empty(&priv->cleanups))
+ pthread_cond_wait(&priv->cond, &priv->lock);
+
+ entry = list_first_entry(&priv->cleanups, gf_changelog_t, list);
+ list_del_init(&entry->list);
+ }
+ pthread_mutex_unlock(&priv->lock);
+
+ drained = 0;
+ ev = &entry->event;
+
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_LIB_MSG_CLEANING_BRICK_ENTRY_INFO, "brick=%s",
+ entry->brick, NULL);
+
+ /* 0x0: disable rpc-clnt */
+ rpc_clnt_disable(RPC_PROBER(entry));
+
+ /* 0x1: cleanup callback invoker thread */
+ ret = gf_cleanup_event(this, ev);
+ if (ret)
+ continue;
+
+ /* 0x2: drain pending events */
+ while (!list_empty(&ev->events)) {
+ event = list_first_entry(&ev->events, struct gf_event, list);
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_LIB_MSG_DRAINING_EVENT_INFO, "seq=%lu",
+ event->seq, "payload=%d", event->count, NULL);
+
+ GF_FREE(event);
+ drained++;
+ }
+
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_LIB_MSG_DRAINED_EVENT_INFO, "num=%lu", drained, NULL);
+
+ /* 0x3: freeup brick entry */
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_LIB_MSG_FREEING_ENTRY_INFO, "entry=%p", entry, NULL);
+ LOCK_DESTROY(&entry->statelock);
+ GF_FREE(entry);
+ }
+
+ return NULL;
+}
+
+int
+gf_changelog_reborp_rpcsvc_notify(rpcsvc_t *rpc, void *mydata,
+ rpcsvc_event_t event, void *data)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ gf_changelog_t *entry = NULL;
+
+ if (!(event == RPCSVC_EVENT_ACCEPT || event == RPCSVC_EVENT_DISCONNECT))
+ return 0;
+
+ entry = mydata;
+ this = entry->this;
+
+ switch (event) {
+ case RPCSVC_EVENT_ACCEPT:
+ ret = sys_unlink(RPC_SOCK(entry));
+ if (ret != 0)
+ gf_smsg(this->name, GF_LOG_WARNING, errno,
+ CHANGELOG_LIB_MSG_UNLINK_FAILED, "name=reverse socket",
+ "path=%s", RPC_SOCK(entry), NULL);
+ if (entry->connected)
+ GF_CHANGELOG_INVOKE_CBK(this, entry->connected, entry->brick,
+ entry->ptr);
+ break;
+ case RPCSVC_EVENT_DISCONNECT:
+ if (entry->disconnected)
+ GF_CHANGELOG_INVOKE_CBK(this, entry->disconnected, entry->brick,
+ entry->ptr);
+ /* passthrough */
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+rpcsvc_t *
+gf_changelog_reborp_init_rpc_listner(xlator_t *this, char *path, char *sock,
+ void *cbkdata)
+{
+ CHANGELOG_MAKE_TMP_SOCKET_PATH(path, sock, UNIX_PATH_MAX);
+ return changelog_rpc_server_init(this, sock, cbkdata,
+ gf_changelog_reborp_rpcsvc_notify,
+ gf_changelog_reborp_programs);
+}
+
+/**
+ * This is dirty and painful as of now until there is event filtering in the
+ * server. The entire event buffer is scanned and interested events are picked,
+ * whereas we _should_ be notified with the events we were interested in
+ * (selected at the time of probe). As of now this is complete BS and needs
+ * fixture ASAP. I just made it work, it needs to be better.
+ *
+ * @FIXME: cleanup this bugger once server filters events.
+ */
+void
+gf_changelog_invoke_callback(gf_changelog_t *entry, struct iovec **vec,
+ int payloadcnt)
+{
+ int i = 0;
+ int evsize = 0;
+ xlator_t *this = NULL;
+ changelog_event_t *event = NULL;
+
+ this = entry->this;
+
+ for (; i < payloadcnt; i++) {
+ event = (changelog_event_t *)vec[i]->iov_base;
+ evsize = vec[i]->iov_len / CHANGELOG_EV_SIZE;
+
+ for (; evsize > 0; evsize--, event++) {
+ if (gf_changelog_filter_check(entry, event)) {
+ GF_CHANGELOG_INVOKE_CBK(this, entry->callback, entry->brick,
+ entry->ptr, event);
+ }
+ }
+ }
+}
+
+/**
+ * Ordered event handler is self-adaptive.. if the event sequence number
+ * is what's expected (->next_seq) there is no ordering list that's
+ * maintained. On out-of-order event notifications, event buffers are
+ * dynamically allocated and ordered.
+ */
+
+int
+__is_expected_sequence(struct gf_event_list *ev, struct gf_event *event)
+{
+ return (ev->next_seq == event->seq);
+}
+
+int
+__can_process_event(struct gf_event_list *ev, struct gf_event **event)
+{
+ *event = list_first_entry(&ev->events, struct gf_event, list);
+
+ if (__is_expected_sequence(ev, *event)) {
+ list_del(&(*event)->list);
+ ev->next_seq++;
+ return 1;
+ }
+
+ return 0;
+}
+
+void
+pick_event_ordered(struct gf_event_list *ev, struct gf_event **event)
+{
+ pthread_mutex_lock(&ev->lock);
+ {
+ while (list_empty(&ev->events) || !__can_process_event(ev, event))
+ pthread_cond_wait(&ev->cond, &ev->lock);
+ }
+ pthread_mutex_unlock(&ev->lock);
+}
+
+void
+pick_event_unordered(struct gf_event_list *ev, struct gf_event **event)
+{
+ pthread_mutex_lock(&ev->lock);
+ {
+ while (list_empty(&ev->events))
+ pthread_cond_wait(&ev->cond, &ev->lock);
+ *event = list_first_entry(&ev->events, struct gf_event, list);
+ list_del(&(*event)->list);
+ }
+ pthread_mutex_unlock(&ev->lock);
+}
+
+void *
+gf_changelog_callback_invoker(void *arg)
+{
+ xlator_t *this = NULL;
+ gf_changelog_t *entry = NULL;
+ struct iovec *vec = NULL;
+ struct gf_event *event = NULL;
+ struct gf_event_list *ev = NULL;
+
+ ev = arg;
+ entry = ev->entry;
+ THIS = this = entry->this;
+
+ while (1) {
+ entry->pickevent(ev, &event);
+
+ vec = (struct iovec *)&event->iov;
+ gf_changelog_invoke_callback(entry, &vec, event->count);
+
+ GF_FREE(event);
+ }
+
+ return NULL;
+}
+
+static int
+orderfn(struct list_head *pos1, struct list_head *pos2)
+{
+ struct gf_event *event1 = NULL;
+ struct gf_event *event2 = NULL;
+
+ event1 = list_entry(pos1, struct gf_event, list);
+ event2 = list_entry(pos2, struct gf_event, list);
+
+ if (event1->seq > event2->seq)
+ return 1;
+ return -1;
+}
+
+void
+queue_ordered_event(struct gf_event_list *ev, struct gf_event *event)
+{
+ /* add event to the ordered event list and wake up listener(s) */
+ pthread_mutex_lock(&ev->lock);
+ {
+ list_add_order(&event->list, &ev->events, orderfn);
+ if (!ev->next_seq)
+ ev->next_seq = event->seq;
+ if (ev->next_seq == event->seq)
+ pthread_cond_signal(&ev->cond);
+ }
+ pthread_mutex_unlock(&ev->lock);
+}
+
+void
+queue_unordered_event(struct gf_event_list *ev, struct gf_event *event)
+{
+ /* add event to the tail of the queue and wake up listener(s) */
+ pthread_mutex_lock(&ev->lock);
+ {
+ list_add_tail(&event->list, &ev->events);
+ pthread_cond_signal(&ev->cond);
+ }
+ pthread_mutex_unlock(&ev->lock);
+}
+
+int
+gf_changelog_event_handler(rpcsvc_request_t *req, xlator_t *this,
+ gf_changelog_t *entry)
+{
+ int i = 0;
+ size_t payloadlen = 0;
+ ssize_t len = 0;
+ int payloadcnt = 0;
+ changelog_event_req rpc_req = {
+ 0,
+ };
+ changelog_event_rsp rpc_rsp = {
+ 0,
+ };
+ struct iovec *vec = NULL;
+ struct gf_event *event = NULL;
+ struct gf_event_list *ev = NULL;
+
+ ev = &entry->event;
+
+ len = xdr_to_generic(req->msg[0], &rpc_req,
+ (xdrproc_t)xdr_changelog_event_req);
+ if (len < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_XDR_DECODING_FAILED, "xdr decoding failed");
+ req->rpc_err = GARBAGE_ARGS;
+ goto handle_xdr_error;
+ }
+
+ if (len < req->msg[0].iov_len) {
+ payloadcnt = 1;
+ payloadlen = (req->msg[0].iov_len - len);
+ }
+ for (i = 1; i < req->count; i++) {
+ payloadcnt++;
+ payloadlen += req->msg[i].iov_len;
+ }
+
+ event = GF_CALLOC(1, GF_EVENT_CALLOC_SIZE(payloadcnt, payloadlen),
+ gf_changelog_mt_libgfchangelog_event_t);
+ if (!event)
+ goto handle_xdr_error;
+ INIT_LIST_HEAD(&event->list);
+
+ payloadlen = 0;
+ event->seq = rpc_req.seq;
+ event->count = payloadcnt;
+
+ /* deep copy IO vectors */
+ vec = &event->iov[0];
+ GF_EVENT_ASSIGN_IOVEC(vec, event, (req->msg[0].iov_len - len), payloadlen);
+ (void)memcpy(vec->iov_base, req->msg[0].iov_base + len, vec->iov_len);
+
+ for (i = 1; i < req->count; i++) {
+ vec = &event->iov[i];
+ GF_EVENT_ASSIGN_IOVEC(vec, event, req->msg[i].iov_len, payloadlen);
+ (void)memcpy(event->iov[i].iov_base, req->msg[i].iov_base,
+ req->msg[i].iov_len);
+ }
+
+ gf_msg_debug(this->name, 0,
+ "seq: %" PRIu64 " [%s] (time: %" PRIu64 ".%" PRIu64
+ "), "
+ "(vec: %d, len: %zd)",
+ rpc_req.seq, entry->brick, rpc_req.tv_sec, rpc_req.tv_usec,
+ payloadcnt, payloadlen);
+
+ /* dispatch event */
+ entry->queueevent(ev, event);
+
+ /* ack sequence number */
+ rpc_rsp.op_ret = 0;
+ rpc_rsp.seq = rpc_req.seq;
+
+ goto submit_rpc;
+
+handle_xdr_error:
+ rpc_rsp.op_ret = -1;
+ rpc_rsp.seq = 0; /* invalid */
+submit_rpc:
+ return changelog_rpc_sumbit_reply(req, &rpc_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_changelog_event_rsp);
+}
+
+int
+gf_changelog_reborp_handle_event(rpcsvc_request_t *req)
+{
+ xlator_t *this = NULL;
+ rpcsvc_t *svc = NULL;
+ gf_changelog_t *entry = NULL;
+
+ svc = rpcsvc_request_service(req);
+ entry = svc->mydata;
+
+ this = THIS = entry->this;
+
+ return gf_changelog_event_handler(req, this, entry);
+}
+
+static rpcsvc_actor_t gf_changelog_reborp_actors[CHANGELOG_REV_PROC_MAX] = {
+ [CHANGELOG_REV_PROC_EVENT] = {"CHANGELOG EVENT HANDLER",
+ gf_changelog_reborp_handle_event, NULL,
+ CHANGELOG_REV_PROC_EVENT, DRC_NA, 0},
+};
+
+/**
+ * Do not use synctask as the RPC layer dereferences ->mydata as THIS.
+ * In gf_changelog_setup_rpc(), @cbkdata is of type @gf_changelog_t,
+ * and that's required to invoke the callback with the appropriate
+ * brick path and it's private data.
+ */
+static struct rpcsvc_program gf_changelog_reborp_prog = {
+ .progname = "LIBGFCHANGELOG REBORP",
+ .prognum = CHANGELOG_REV_RPC_PROCNUM,
+ .progver = CHANGELOG_REV_RPC_PROCVER,
+ .numactors = CHANGELOG_REV_PROC_MAX,
+ .actors = gf_changelog_reborp_actors,
+ .synctask = _gf_false,
+};
+
+static struct rpcsvc_program *gf_changelog_reborp_programs[] = {
+ &gf_changelog_reborp_prog,
+ NULL,
+};
diff --git a/xlators/features/changelog/lib/src/gf-changelog-rpc.c b/xlators/features/changelog/lib/src/gf-changelog-rpc.c
new file mode 100644
index 00000000000..8ec6ffbcebc
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-rpc.c
@@ -0,0 +1,98 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "gf-changelog-rpc.h"
+#include "changelog-misc.h"
+#include "changelog-mem-types.h"
+
+struct rpc_clnt_program gf_changelog_clnt;
+
+/* TODO: piggyback reconnect to called (upcall) */
+int
+gf_changelog_rpc_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ break;
+ case RPC_CLNT_DISCONNECT:
+ case RPC_CLNT_MSG:
+ case RPC_CLNT_DESTROY:
+ case RPC_CLNT_PING:
+ break;
+ }
+
+ return 0;
+}
+
+struct rpc_clnt *
+gf_changelog_rpc_init(xlator_t *this, gf_changelog_t *entry)
+{
+ char sockfile[UNIX_PATH_MAX] = {
+ 0,
+ };
+
+ CHANGELOG_MAKE_SOCKET_PATH(entry->brick, sockfile, UNIX_PATH_MAX);
+ return changelog_rpc_client_init(this, entry, sockfile,
+ gf_changelog_rpc_notify);
+}
+
+/**
+ * remote procedure calls declarations.
+ */
+
+int
+gf_probe_changelog_cbk(struct rpc_req *req, struct iovec *iovec, int count,
+ void *myframe)
+{
+ return 0;
+}
+
+int
+gf_probe_changelog_filter(call_frame_t *frame, xlator_t *this, void *data)
+{
+ char *sock = NULL;
+ gf_changelog_t *entry = NULL;
+ changelog_probe_req req = {
+ 0,
+ };
+
+ entry = data;
+ sock = RPC_SOCK(entry);
+
+ (void)memcpy(&req.sock, sock, strlen(sock));
+ req.filter = entry->notify;
+
+ /* invoke RPC */
+ return changelog_rpc_sumbit_req(
+ RPC_PROBER(entry), (void *)&req, frame, &gf_changelog_clnt,
+ CHANGELOG_RPC_PROBE_FILTER, NULL, 0, NULL, this, gf_probe_changelog_cbk,
+ (xdrproc_t)xdr_changelog_probe_req);
+}
+
+int
+gf_changelog_invoke_rpc(xlator_t *this, gf_changelog_t *entry, int procidx)
+{
+ return changelog_invoke_rpc(this, RPC_PROBER(entry), &gf_changelog_clnt,
+ procidx, entry);
+}
+
+struct rpc_clnt_procedure gf_changelog_procs[CHANGELOG_RPC_PROC_MAX] = {
+ [CHANGELOG_RPC_PROC_NULL] = {"NULL", NULL},
+ [CHANGELOG_RPC_PROBE_FILTER] = {"PROBE FILTER", gf_probe_changelog_filter},
+};
+
+struct rpc_clnt_program gf_changelog_clnt = {
+ .progname = "LIBGFCHANGELOG",
+ .prognum = CHANGELOG_RPC_PROGNUM,
+ .progver = CHANGELOG_RPC_PROGVER,
+ .numproc = CHANGELOG_RPC_PROC_MAX,
+ .proctable = gf_changelog_procs,
+};
diff --git a/xlators/features/changelog/lib/src/gf-changelog-rpc.h b/xlators/features/changelog/lib/src/gf-changelog-rpc.h
new file mode 100644
index 00000000000..5c82d6f1c08
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog-rpc.h
@@ -0,0 +1,28 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __GF_CHANGELOG_RPC_H
+#define __GF_CHANGELOG_RPC_H
+
+#include <glusterfs/xlator.h>
+
+#include "gf-changelog-helpers.h"
+#include "changelog-rpc-common.h"
+
+struct rpc_clnt *
+gf_changelog_rpc_init(xlator_t *, gf_changelog_t *);
+
+int
+gf_changelog_invoke_rpc(xlator_t *, gf_changelog_t *, int);
+
+rpcsvc_t *
+gf_changelog_reborp_init_rpc_listner(xlator_t *, char *, char *, void *);
+
+#endif
diff --git a/xlators/features/changelog/lib/src/gf-changelog.c b/xlators/features/changelog/lib/src/gf-changelog.c
new file mode 100644
index 00000000000..57c3d39ef76
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-changelog.c
@@ -0,0 +1,652 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <errno.h>
+#include <dirent.h>
+#include <stddef.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <string.h>
+
+#include <glusterfs/globals.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/syncop.h>
+
+#include "gf-changelog-rpc.h"
+#include "gf-changelog-helpers.h"
+
+/* from the changelog translator */
+#include "changelog-misc.h"
+#include "changelog-mem-types.h"
+#include "changelog-lib-messages.h"
+
+/**
+ * Global singleton xlator pointer for the library, initialized
+ * during library load. This should probably be hidden inside
+ * an initialized object which is an handle for the consumer.
+ *
+ * TODO: do away with the global..
+ */
+xlator_t *master = NULL;
+
+static inline gf_private_t *
+gf_changelog_alloc_priv()
+{
+ int ret = 0;
+ gf_private_t *priv = NULL;
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_changelog_mt_priv_t);
+ if (!priv)
+ goto error_return;
+ INIT_LIST_HEAD(&priv->connections);
+ INIT_LIST_HEAD(&priv->cleanups);
+
+ ret = pthread_mutex_init(&priv->lock, NULL);
+ if (ret != 0)
+ goto free_priv;
+ ret = pthread_cond_init(&priv->cond, NULL);
+ if (ret != 0)
+ goto cleanup_mutex;
+
+ priv->api = NULL;
+ return priv;
+
+cleanup_mutex:
+ (void)pthread_mutex_destroy(&priv->lock);
+free_priv:
+ GF_FREE(priv);
+error_return:
+ return NULL;
+}
+
+#define GF_CHANGELOG_EVENT_POOL_SIZE 16384
+#define GF_CHANGELOG_EVENT_THREAD_COUNT 4
+
+static int
+gf_changelog_ctx_defaults_init(glusterfs_ctx_t *ctx)
+{
+ cmd_args_t *cmd_args = NULL;
+ struct rlimit lim = {
+ 0,
+ };
+ call_pool_t *pool = NULL;
+ int ret = -1;
+
+ ret = xlator_mem_acct_init(THIS, gf_changelog_mt_end);
+ if (ret != 0)
+ return -1;
+
+ ctx->process_uuid = generate_glusterfs_ctx_id();
+ if (!ctx->process_uuid)
+ return -1;
+
+ ctx->page_size = 128 * GF_UNIT_KB;
+
+ ctx->iobuf_pool = iobuf_pool_new();
+ if (!ctx->iobuf_pool)
+ goto free_pool;
+
+ ctx->event_pool = gf_event_pool_new(GF_CHANGELOG_EVENT_POOL_SIZE,
+ GF_CHANGELOG_EVENT_THREAD_COUNT);
+ if (!ctx->event_pool)
+ goto free_pool;
+
+ pool = GF_CALLOC(1, sizeof(call_pool_t),
+ gf_changelog_mt_libgfchangelog_call_pool_t);
+ if (!pool)
+ goto free_pool;
+
+ /* frame_mem_pool size 112 * 64 */
+ pool->frame_mem_pool = mem_pool_new(call_frame_t, 32);
+ if (!pool->frame_mem_pool)
+ goto free_pool;
+
+ /* stack_mem_pool size 256 * 128 */
+ pool->stack_mem_pool = mem_pool_new(call_stack_t, 16);
+
+ if (!pool->stack_mem_pool)
+ goto free_pool;
+
+ ctx->stub_mem_pool = mem_pool_new(call_stub_t, 16);
+ if (!ctx->stub_mem_pool)
+ goto free_pool;
+
+ ctx->dict_pool = mem_pool_new(dict_t, 32);
+ if (!ctx->dict_pool)
+ goto free_pool;
+
+ ctx->dict_pair_pool = mem_pool_new(data_pair_t, 512);
+ if (!ctx->dict_pair_pool)
+ goto free_pool;
+
+ ctx->dict_data_pool = mem_pool_new(data_t, 512);
+ if (!ctx->dict_data_pool)
+ goto free_pool;
+
+ ctx->logbuf_pool = mem_pool_new(log_buf_t, 256);
+ if (!ctx->logbuf_pool)
+ goto free_pool;
+
+ INIT_LIST_HEAD(&pool->all_frames);
+ LOCK_INIT(&pool->lock);
+ ctx->pool = pool;
+
+ LOCK_INIT(&ctx->lock);
+
+ cmd_args = &ctx->cmd_args;
+
+ INIT_LIST_HEAD(&cmd_args->xlator_options);
+
+ lim.rlim_cur = RLIM_INFINITY;
+ lim.rlim_max = RLIM_INFINITY;
+ setrlimit(RLIMIT_CORE, &lim);
+
+ return 0;
+
+free_pool:
+ if (pool) {
+ GF_FREE(pool->frame_mem_pool);
+
+ GF_FREE(pool->stack_mem_pool);
+
+ GF_FREE(pool);
+ }
+
+ GF_FREE(ctx->stub_mem_pool);
+
+ GF_FREE(ctx->dict_pool);
+
+ GF_FREE(ctx->dict_pair_pool);
+
+ GF_FREE(ctx->dict_data_pool);
+
+ GF_FREE(ctx->logbuf_pool);
+
+ GF_FREE(ctx->iobuf_pool);
+
+ GF_FREE(ctx->event_pool);
+
+ return -1;
+}
+
+/* TODO: cleanup ctx defaults */
+void
+gf_changelog_cleanup_this(xlator_t *this)
+{
+ glusterfs_ctx_t *ctx = NULL;
+
+ if (!this)
+ return;
+
+ ctx = this->ctx;
+ syncenv_destroy(ctx->env);
+ free(ctx);
+
+ this->private = NULL;
+ this->ctx = NULL;
+
+ mem_pools_fini();
+}
+
+static int
+gf_changelog_init_context()
+{
+ glusterfs_ctx_t *ctx = NULL;
+
+ ctx = glusterfs_ctx_new();
+ if (!ctx)
+ goto error_return;
+
+ if (glusterfs_globals_init(ctx))
+ goto free_ctx;
+
+ THIS->ctx = ctx;
+ if (gf_changelog_ctx_defaults_init(ctx))
+ goto free_ctx;
+
+ ctx->env = syncenv_new(0, 0, 0);
+ if (!ctx->env)
+ goto free_ctx;
+ return 0;
+
+free_ctx:
+ free(ctx);
+ THIS->ctx = NULL;
+error_return:
+ return -1;
+}
+
+static int
+gf_changelog_init_master()
+{
+ int ret = 0;
+
+ ret = gf_changelog_init_context();
+ mem_pools_init();
+
+ return ret;
+}
+
+/* TODO: cleanup clnt/svc on failure */
+int
+gf_changelog_setup_rpc(xlator_t *this, gf_changelog_t *entry, int proc)
+{
+ int ret = 0;
+ rpcsvc_t *svc = NULL;
+ struct rpc_clnt *rpc = NULL;
+
+ /**
+ * Initialize a connect back socket. A probe() RPC call to the server
+ * triggers a reverse connect.
+ */
+ svc = gf_changelog_reborp_init_rpc_listner(this, entry->brick,
+ RPC_SOCK(entry), entry);
+ if (!svc)
+ goto error_return;
+ RPC_REBORP(entry) = svc;
+
+ /* Initialize an RPC client */
+ rpc = gf_changelog_rpc_init(this, entry);
+ if (!rpc)
+ goto error_return;
+ RPC_PROBER(entry) = rpc;
+
+ /**
+ * @FIXME
+ * till we have connection state machine, let's delay the RPC call
+ * for now..
+ */
+ sleep(2);
+
+ /**
+ * Probe changelog translator for reverse connection. After a successful
+ * call, there's less use of the client and can be disconnected, but
+ * let's leave the connection active for any future RPC calls.
+ */
+ ret = gf_changelog_invoke_rpc(this, entry, proc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_LIB_MSG_INVOKE_RPC_FAILED,
+ "Could not initiate probe RPC, bailing out!!!");
+ goto error_return;
+ }
+
+ return 0;
+
+error_return:
+ return -1;
+}
+
+int
+gf_cleanup_event(xlator_t *this, struct gf_event_list *ev)
+{
+ int ret = 0;
+
+ ret = gf_thread_cleanup(this, ev->invoker);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret,
+ CHANGELOG_LIB_MSG_CLEANUP_ERROR,
+ "cannot cleanup callback invoker thread."
+ " Not freeing resources");
+ return -1;
+ }
+
+ ev->entry = NULL;
+
+ return 0;
+}
+
+static int
+gf_init_event(gf_changelog_t *entry)
+{
+ int ret = 0;
+ struct gf_event_list *ev = NULL;
+
+ ev = &entry->event;
+ ev->entry = entry;
+
+ ret = pthread_mutex_init(&ev->lock, NULL);
+ if (ret != 0)
+ goto error_return;
+ ret = pthread_cond_init(&ev->cond, NULL);
+ if (ret != 0)
+ goto cleanup_mutex;
+ INIT_LIST_HEAD(&ev->events);
+
+ ev->next_seq = 0; /* bootstrap sequencing */
+
+ if (GF_NEED_ORDERED_EVENTS(entry)) {
+ entry->pickevent = pick_event_ordered;
+ entry->queueevent = queue_ordered_event;
+ } else {
+ entry->pickevent = pick_event_unordered;
+ entry->queueevent = queue_unordered_event;
+ }
+
+ ret = gf_thread_create(&ev->invoker, NULL, gf_changelog_callback_invoker,
+ ev, "clogcbki");
+ if (ret != 0) {
+ entry->pickevent = NULL;
+ entry->queueevent = NULL;
+ goto cleanup_cond;
+ }
+
+ return 0;
+
+cleanup_cond:
+ (void)pthread_cond_destroy(&ev->cond);
+cleanup_mutex:
+ (void)pthread_mutex_destroy(&ev->lock);
+error_return:
+ return -1;
+}
+
+/**
+ * TODO:
+ * - cleanup invoker thread
+ * - cleanup event list
+ * - destroy rpc{-clnt, svc}
+ */
+int
+gf_cleanup_brick_connection(xlator_t *this, gf_changelog_t *entry)
+{
+ return 0;
+}
+
+int
+gf_cleanup_connections(xlator_t *this)
+{
+ return 0;
+}
+
+static int
+gf_setup_brick_connection(xlator_t *this, struct gf_brick_spec *brick,
+ gf_boolean_t ordered, void *xl)
+{
+ int ret = 0;
+ gf_private_t *priv = NULL;
+ gf_changelog_t *entry = NULL;
+
+ priv = this->private;
+
+ if (!brick->callback || !brick->init || !brick->fini)
+ goto error_return;
+
+ entry = GF_CALLOC(1, sizeof(*entry), gf_changelog_mt_libgfchangelog_t);
+ if (!entry)
+ goto error_return;
+ INIT_LIST_HEAD(&entry->list);
+
+ LOCK_INIT(&entry->statelock);
+ entry->connstate = GF_CHANGELOG_CONN_STATE_PENDING;
+
+ entry->notify = brick->filter;
+ if (snprintf(entry->brick, PATH_MAX, "%s", brick->brick_path) >= PATH_MAX)
+ goto free_entry;
+
+ entry->this = this;
+ entry->invokerxl = xl;
+
+ entry->ordered = ordered;
+ ret = gf_init_event(entry);
+ if (ret)
+ goto free_entry;
+
+ entry->fini = brick->fini;
+ entry->callback = brick->callback;
+ entry->connected = brick->connected;
+ entry->disconnected = brick->disconnected;
+
+ entry->ptr = brick->init(this, brick);
+ if (!entry->ptr)
+ goto cleanup_event;
+ priv->api = entry->ptr; /* pointer to API, if required */
+
+ pthread_mutex_lock(&priv->lock);
+ {
+ list_add_tail(&entry->list, &priv->connections);
+ }
+ pthread_mutex_unlock(&priv->lock);
+
+ ret = gf_changelog_setup_rpc(this, entry, CHANGELOG_RPC_PROBE_FILTER);
+ if (ret)
+ goto cleanup_event;
+ return 0;
+
+cleanup_event:
+ (void)gf_cleanup_event(this, &entry->event);
+free_entry:
+ gf_msg_debug(this->name, 0, "freeing entry %p", entry);
+ list_del(&entry->list); /* FIXME: kludge for now */
+ GF_FREE(entry);
+error_return:
+ return -1;
+}
+
+int
+gf_changelog_register_brick(xlator_t *this, struct gf_brick_spec *brick,
+ gf_boolean_t ordered, void *xl)
+{
+ return gf_setup_brick_connection(this, brick, ordered, xl);
+}
+
+static int
+gf_changelog_setup_logging(xlator_t *this, char *logfile, int loglevel)
+{
+ /* passing ident as NULL means to use default ident for syslog */
+ if (gf_log_init(this->ctx, logfile, NULL))
+ return -1;
+
+ gf_log_set_loglevel(this->ctx, (loglevel == -1) ? GF_LOG_INFO : loglevel);
+ return 0;
+}
+
+static int
+gf_changelog_set_master(xlator_t *master, void *xl)
+{
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+ xlator_t *old_this = NULL;
+ gf_private_t *priv = NULL;
+
+ this = xl;
+ if (!this || !this->ctx) {
+ ret = gf_changelog_init_master();
+ if (ret)
+ return -1;
+ this = THIS;
+ }
+
+ master->ctx = this->ctx;
+
+ INIT_LIST_HEAD(&master->volume_options);
+ SAVE_THIS(THIS);
+
+ ret = xlator_mem_acct_init(THIS, gf_changelog_mt_end);
+ if (ret != 0)
+ goto restore_this;
+
+ priv = gf_changelog_alloc_priv();
+ if (!priv) {
+ ret = -1;
+ goto restore_this;
+ }
+
+ if (!xl) {
+ /* poller thread */
+ ret = gf_thread_create(&priv->poller, NULL, changelog_rpc_poller, THIS,
+ "clogpoll");
+ if (ret != 0) {
+ GF_FREE(priv);
+ gf_msg(master->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED,
+ "failed to spawn poller thread");
+ goto restore_this;
+ }
+ }
+
+ master->private = priv;
+
+restore_this:
+ RESTORE_THIS();
+
+ return ret;
+}
+
+int
+gf_changelog_init(void *xl)
+{
+ int ret = 0;
+ gf_private_t *priv = NULL;
+
+ if (master)
+ return 0;
+
+ master = calloc(1, sizeof(*master));
+ if (!master)
+ goto error_return;
+
+ master->name = strdup("gfchangelog");
+ if (!master->name)
+ goto dealloc_master;
+
+ ret = gf_changelog_set_master(master, xl);
+ if (ret)
+ goto dealloc_name;
+
+ priv = master->private;
+ ret = gf_thread_create(&priv->connectionjanitor, NULL,
+ gf_changelog_connection_janitor, master, "clogjan");
+ if (ret != 0) {
+ /* TODO: cleanup priv, mutex (poller thread for !xl) */
+ goto dealloc_name;
+ }
+
+ return 0;
+
+dealloc_name:
+ free(master->name);
+dealloc_master:
+ free(master);
+ master = NULL;
+error_return:
+ return -1;
+}
+
+int
+gf_changelog_register_generic(struct gf_brick_spec *bricks, int count,
+ int ordered, char *logfile, int lvl, void *xl)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ xlator_t *old_this = NULL;
+ struct gf_brick_spec *brick = NULL;
+ gf_boolean_t need_order = _gf_false;
+
+ SAVE_THIS(xl);
+
+ this = THIS;
+ if (!this)
+ goto error_return;
+
+ ret = gf_changelog_setup_logging(this, logfile, lvl);
+ if (ret)
+ goto error_return;
+
+ need_order = (ordered) ? _gf_true : _gf_false;
+
+ brick = bricks;
+ while (count--) {
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_LIB_MSG_NOTIFY_REGISTER_INFO, "brick=%s",
+ brick->brick_path, "notify_filter=%d", brick->filter, NULL);
+
+ ret = gf_changelog_register_brick(this, brick, need_order, xl);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_NOTIFY_REGISTER_FAILED,
+ "Error registering with changelog xlator");
+ break;
+ }
+
+ brick++;
+ }
+
+ if (ret != 0)
+ goto cleanup_inited_bricks;
+
+ RESTORE_THIS();
+ return 0;
+
+cleanup_inited_bricks:
+ gf_cleanup_connections(this);
+error_return:
+ RESTORE_THIS();
+ return -1;
+}
+
+/**
+ * @API
+ * gf_changelog_register()
+ *
+ * This is _NOT_ a generic register API. It's a special API to handle
+ * updates at a journal granulality. This is used by consumers wanting
+ * to process persistent journal such as geo-replication via a set of
+ * APIs. All of this is required to maintain backward compatibility.
+ * Owner specific private data is stored in ->api (in gf_private_t),
+ * which is used by APIs to access it's private data. This limits
+ * the library access to a single brick, but that's how it used to
+ * be anyway. Furthermore, this API solely _owns_ "this", therefore
+ * callers already having a notion of "this" are expected to use the
+ * newer API.
+ *
+ * Newer applications wanting to use this library need not face this
+ * limitation and reply of the much more feature rich generic register
+ * API, which is purely callback based.
+ *
+ * NOTE: @max_reconnects is not used but required for backward compat.
+ *
+ * For generic API, refer gf_changelog_register_generic().
+ */
+int
+gf_changelog_register(char *brick_path, char *scratch_dir, char *log_file,
+ int log_level, int max_reconnects)
+{
+ struct gf_brick_spec brick = {
+ 0,
+ };
+
+ if (master)
+ THIS = master;
+ else
+ return -1;
+
+ brick.brick_path = brick_path;
+ brick.filter = CHANGELOG_OP_TYPE_JOURNAL;
+
+ brick.init = gf_changelog_journal_init;
+ brick.fini = gf_changelog_journal_fini;
+ brick.callback = gf_changelog_handle_journal;
+ brick.connected = gf_changelog_journal_connect;
+ brick.disconnected = gf_changelog_journal_disconnect;
+
+ brick.ptr = scratch_dir;
+
+ return gf_changelog_register_generic(&brick, 1, 1, log_file, log_level,
+ NULL);
+}
diff --git a/xlators/features/changelog/lib/src/gf-history-changelog.c b/xlators/features/changelog/lib/src/gf-history-changelog.c
new file mode 100644
index 00000000000..a16219f3664
--- /dev/null
+++ b/xlators/features/changelog/lib/src/gf-history-changelog.c
@@ -0,0 +1,1020 @@
+#include <errno.h>
+#include <dirent.h>
+#include <stddef.h>
+#include <sys/types.h>
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <string.h>
+
+#include <glusterfs/globals.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/syscall.h>
+
+#include "gf-changelog-helpers.h"
+#include "gf-changelog-journal.h"
+
+/* from the changelog translator */
+#include "changelog-misc.h"
+#include "changelog-lib-messages.h"
+#include "changelog-mem-types.h"
+
+/**
+ * @API
+ * gf_history_changelog_done:
+ * Move processed history changelog file from .processing
+ * to .processed
+ *
+ * ARGUMENTS:
+ * file(IN): path to processed history changelog file in
+ * .processing directory.
+ *
+ * RETURN VALUE:
+ * 0: On success.
+ * -1: On error.
+ */
+int
+gf_history_changelog_done(char *file)
+{
+ int ret = -1;
+ char *buffer = NULL;
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+ gf_changelog_journal_t *hist_jnl = NULL;
+ char to_path[PATH_MAX] = {
+ 0,
+ };
+
+ errno = EINVAL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this);
+ if (!jnl)
+ goto out;
+
+ hist_jnl = jnl->hist_jnl;
+ if (!hist_jnl)
+ goto out;
+
+ if (!file || !strlen(file))
+ goto out;
+
+ /* make sure 'file' is inside ->jnl_working_dir */
+ buffer = realpath(file, NULL);
+ if (!buffer)
+ goto out;
+
+ if (strncmp(hist_jnl->jnl_working_dir, buffer,
+ strlen(hist_jnl->jnl_working_dir)))
+ goto out;
+
+ (void)snprintf(to_path, PATH_MAX, "%s%s", hist_jnl->jnl_processed_dir,
+ basename(buffer));
+ gf_msg_debug(this->name, 0, "moving %s to processed directory", file);
+ ret = sys_rename(buffer, to_path);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_RENAME_FAILED, "from=%s", file, "to=%s",
+ to_path, NULL);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (buffer)
+ free(buffer); /* allocated by realpath() */
+ return ret;
+}
+
+/**
+ * @API
+ * gf_history_changelog_start_fresh:
+ * For a set of changelogs, start from the beginning.
+ * It will truncates the history tracker fd.
+ *
+ * RETURN VALUES:
+ * 0: On success.
+ * -1: On error.
+ */
+int
+gf_history_changelog_start_fresh()
+{
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+ gf_changelog_journal_t *hist_jnl = NULL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ errno = EINVAL;
+
+ jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this);
+ if (!jnl)
+ goto out;
+
+ hist_jnl = jnl->hist_jnl;
+ if (!hist_jnl)
+ goto out;
+
+ if (gf_ftruncate(hist_jnl->jnl_fd, 0))
+ goto out;
+
+ return 0;
+
+out:
+ return -1;
+}
+
+/**
+ * @API
+ * gf_history_changelog_next_change:
+ * Return the next history changelog file entry. Zero means all
+ * history chanelogs are consumed.
+ *
+ * ARGUMENTS:
+ * bufptr(OUT): Path to unprocessed history changelog file
+ * from tracker file.
+ * maxlen(IN): Usually PATH_MAX.
+ *
+ * RETURN VALUES:
+ * size: On success.
+ * -1 : On error.
+ */
+ssize_t
+gf_history_changelog_next_change(char *bufptr, size_t maxlen)
+{
+ ssize_t size = -1;
+ int tracker_fd = 0;
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+ gf_changelog_journal_t *hist_jnl = NULL;
+ char buffer[PATH_MAX] = {
+ 0,
+ };
+
+ if (maxlen > PATH_MAX) {
+ errno = ENAMETOOLONG;
+ goto out;
+ }
+
+ errno = EINVAL;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this);
+ if (!jnl)
+ goto out;
+
+ hist_jnl = jnl->hist_jnl;
+ if (!hist_jnl)
+ goto out;
+
+ tracker_fd = hist_jnl->jnl_fd;
+
+ size = gf_readline(tracker_fd, buffer, maxlen);
+ if (size < 0) {
+ size = -1;
+ goto out;
+ }
+
+ if (size == 0)
+ goto out;
+
+ memcpy(bufptr, buffer, size - 1);
+ bufptr[size - 1] = '\0';
+
+out:
+ return size;
+}
+
+/**
+ * @API
+ * gf_history_changelog_scan:
+ * Scan and generate a list of change entries.
+ * Calling this api multiple times (without calling gf_changlog_done())
+ * would result new changelogs(s) being refreshed in the tracker file.
+ * This call also acts as a cancellation point for the consumer.
+ *
+ * RETURN VALUES:
+ * +ve integer : success and keep scanning.(count of changelogs)
+ * 0 : success and done scanning.
+ * -1 : error.
+ *
+ * NOTE: After first 0 return call_get_next change for once more time
+ * to empty the tracker
+ *
+ */
+ssize_t
+gf_history_changelog_scan()
+{
+ int tracker_fd = 0;
+ size_t off = 0;
+ xlator_t *this = NULL;
+ size_t nr_entries = 0;
+ gf_changelog_journal_t *jnl = NULL;
+ gf_changelog_journal_t *hist_jnl = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char buffer[PATH_MAX] = {
+ 0,
+ };
+ static int is_last_scan;
+
+ this = THIS;
+ if (!this)
+ goto out;
+
+ jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this);
+ if (!jnl)
+ goto out;
+ if (JNL_IS_API_DISCONNECTED(jnl)) {
+ errno = ENOTCONN;
+ goto out;
+ }
+
+ hist_jnl = jnl->hist_jnl;
+ if (!hist_jnl)
+ goto out;
+
+retry:
+ if (is_last_scan == 1)
+ return 0;
+ if (hist_jnl->hist_done == 0)
+ is_last_scan = 1;
+
+ errno = EINVAL;
+ if (hist_jnl->hist_done == -1)
+ goto out;
+
+ tracker_fd = hist_jnl->jnl_fd;
+
+ if (gf_ftruncate(tracker_fd, 0))
+ goto out;
+
+ rewinddir(hist_jnl->jnl_dir);
+
+ for (;;) {
+ errno = 0;
+ entry = sys_readdir(hist_jnl->jnl_dir, scratch);
+ if (!entry || errno != 0)
+ break;
+
+ if (strcmp(basename(entry->d_name), ".") == 0 ||
+ strcmp(basename(entry->d_name), "..") == 0)
+ continue;
+
+ nr_entries++;
+
+ GF_CHANGELOG_FILL_BUFFER(hist_jnl->jnl_processing_dir, buffer, off,
+ strlen(hist_jnl->jnl_processing_dir));
+ GF_CHANGELOG_FILL_BUFFER(entry->d_name, buffer, off,
+ strlen(entry->d_name));
+ GF_CHANGELOG_FILL_BUFFER("\n", buffer, off, 1);
+
+ if (gf_changelog_write(tracker_fd, buffer, off) != off) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, CHANGELOG_LIB_MSG_WRITE_FAILED,
+ "error writing changelog filename"
+ " to tracker file");
+ break;
+ }
+ off = 0;
+ }
+
+ gf_msg_debug(this->name, 0, "hist_done %d, is_last_scan: %d",
+ hist_jnl->hist_done, is_last_scan);
+
+ if (!entry) {
+ if (gf_lseek(tracker_fd, 0, SEEK_SET) != -1) {
+ if (nr_entries > 0)
+ return nr_entries;
+ else {
+ sleep(1);
+ goto retry;
+ }
+ }
+ }
+out:
+ return -1;
+}
+
+/*
+ * Gets timestamp value at the changelog path at index.
+ * Returns 0 on success(updates given time-stamp), -1 on failure.
+ */
+int
+gf_history_get_timestamp(int fd, int index, int len, unsigned long *ts)
+{
+ xlator_t *this = NULL;
+ int n_read = -1;
+ char path_buf[PATH_MAX] = {
+ 0,
+ };
+ char *iter = path_buf;
+ size_t offset = index * (len + 1);
+ unsigned long value = 0;
+ int ret = 0;
+
+ this = THIS;
+ if (!this) {
+ return -1;
+ }
+
+ n_read = sys_pread(fd, path_buf, len, offset);
+ if (n_read < 0) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_READ_ERROR,
+ "could not read from htime file");
+ goto out;
+ }
+ iter += len - TIMESTAMP_LENGTH;
+ sscanf(iter, "%lu", &value);
+out:
+ if (ret == 0)
+ *ts = value;
+ return ret;
+}
+
+/*
+ * Function to ensure correctness of search
+ * Checks whether @value is there next to @target_index or not
+ */
+int
+gf_history_check(int fd, int target_index, unsigned long value, int len)
+{
+ int ret = 0;
+ unsigned long ts1 = 0;
+ unsigned long ts2 = 0;
+
+ if (target_index == 0) {
+ ret = gf_history_get_timestamp(fd, target_index, len, &ts1);
+ if (ret == -1)
+ goto out;
+ if (value <= ts1)
+ goto out;
+ else {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = gf_history_get_timestamp(fd, target_index, len, &ts1);
+ if (ret == -1)
+ goto out;
+ ret = gf_history_get_timestamp(fd, target_index - 1, len, &ts2);
+ if (ret == -1)
+ goto out;
+
+ if ((value <= ts1) && (value > ts2)) {
+ goto out;
+ } else
+ ret = -1;
+out:
+ return ret;
+}
+
+/*
+ * This is a "binary search" based search function which checks neighbours
+ * for in-range availability of the value to be searched and provides the
+ * index at which the changelog file nearest to the requested timestamp(value)
+ * can be read from.
+ *
+ * Actual offset can be calculated as (index* (len+1) ).
+ * "1" is because the changelog paths are null terminated.
+ *
+ * @path : Htime file to search in
+ * @value : time stamp to search
+ * @from : start index to search
+ * @to : end index to search
+ * @len : length of fixes length strings separated by null
+ */
+
+int
+gf_history_b_search(int fd, unsigned long value, unsigned long from,
+ unsigned long to, int len)
+{
+ int m_index = -1;
+ unsigned long cur_value = 0;
+ unsigned long ts1 = 0;
+ int ret = 0;
+
+ m_index = (from + to) / 2;
+
+ if ((to - from) <= 1) {
+ /* either one or 2 changelogs left */
+ if (to != from) {
+ /* check if value is less or greater than to
+ * return accordingly
+ */
+ ret = gf_history_get_timestamp(fd, from, len, &ts1);
+ if (ret == -1)
+ goto out;
+ if (ts1 >= value) {
+ /* actually compatision should be
+ * exactly == but considering
+ *
+ * case of only 2 changelogs in htime file
+ */
+ return from;
+ } else
+ return to;
+ } else
+ return to;
+ }
+
+ ret = gf_history_get_timestamp(fd, m_index, len, &cur_value);
+ if (ret == -1)
+ goto out;
+ if (cur_value == value) {
+ return m_index;
+ } else if (value > cur_value) {
+ ret = gf_history_get_timestamp(fd, m_index + 1, len, &cur_value);
+ if (ret == -1)
+ goto out;
+ if (value < cur_value)
+ return m_index + 1;
+ else
+ return gf_history_b_search(fd, value, m_index + 1, to, len);
+ } else {
+ if (m_index == 0) {
+ /* we are sure that values exists
+ * in this htime file
+ */
+ return 0;
+ } else {
+ ret = gf_history_get_timestamp(fd, m_index - 1, len, &cur_value);
+ if (ret == -1)
+ goto out;
+ if (value > cur_value) {
+ return m_index;
+ } else
+ return gf_history_b_search(fd, value, from, m_index - 1, len);
+ }
+ }
+out:
+ return -1;
+}
+
+/*
+ * Description: Checks if the changelog path is usable or not,
+ * which is differentiated by checking for "changelog"
+ * in the path and not "CHANGELOG".
+ *
+ * Returns:
+ * 1 : Yes, usable ( contains "CHANGELOG" )
+ * 0 : No, Not usable ( contains, "changelog")
+ */
+int
+gf_is_changelog_usable(char *cl_path)
+{
+ int ret = -1;
+ const char low_c[] = "changelog";
+ char *str_ret = NULL;
+ char *bname = NULL;
+
+ bname = basename(cl_path);
+
+ str_ret = strstr(bname, low_c);
+
+ if (str_ret != NULL)
+ ret = 0;
+ else
+ ret = 1;
+
+ return ret;
+}
+
+void *
+gf_changelog_consume_wrap(void *data)
+{
+ int ret = -1;
+ ssize_t nread = 0;
+ xlator_t *this = NULL;
+ gf_changelog_consume_data_t *ccd = NULL;
+
+ ccd = (gf_changelog_consume_data_t *)data;
+ this = ccd->this;
+
+ ccd->retval = -1;
+
+ nread = sys_pread(ccd->fd, ccd->changelog, PATH_MAX - 1, ccd->offset);
+ if (nread < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_READ_ERROR,
+ "cannot read from history metadata file");
+ goto out;
+ }
+
+ /* TODO: handle short reads and EOF. */
+ if (gf_is_changelog_usable(ccd->changelog) == 1) {
+ ret = gf_changelog_consume(ccd->this, ccd->jnl, ccd->changelog,
+ _gf_true);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_LIB_MSG_PARSE_ERROR,
+ "name=%s", ccd->changelog, NULL);
+ goto out;
+ }
+ }
+ ccd->retval = 0;
+
+out:
+ return NULL;
+}
+
+/**
+ * "gf_history_consume" is a worker function for history.
+ * parses and moves changelogs files from index "from"
+ * to index "to" in open htime file whose fd is "fd".
+ */
+
+#define MAX_PARALLELS 10
+
+void *
+gf_history_consume(void *data)
+{
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+ gf_changelog_journal_t *hist_jnl = NULL;
+ int ret = 0;
+ int iter = 0;
+ int fd = -1;
+ int from = -1;
+ int to = -1;
+ int len = -1;
+ int n_parallel = 0;
+ int n_envoked = 0;
+ gf_boolean_t publish = _gf_true;
+ pthread_t th_id[MAX_PARALLELS] = {
+ 0,
+ };
+ gf_changelog_history_data_t *hist_data = NULL;
+ gf_changelog_consume_data_t ccd[MAX_PARALLELS] = {
+ {0},
+ };
+ gf_changelog_consume_data_t *curr = NULL;
+
+ hist_data = (gf_changelog_history_data_t *)data;
+ if (hist_data == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ fd = hist_data->htime_fd;
+ from = hist_data->from;
+ to = hist_data->to;
+ len = hist_data->len;
+ n_parallel = hist_data->n_parallel;
+
+ THIS = hist_data->this;
+ this = hist_data->this;
+ if (!this) {
+ ret = -1;
+ goto out;
+ }
+
+ jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this);
+ if (!jnl) {
+ ret = -1;
+ goto out;
+ }
+
+ hist_jnl = jnl->hist_jnl;
+ if (!hist_jnl) {
+ ret = -1;
+ goto out;
+ }
+
+ while (from <= to) {
+ n_envoked = 0;
+
+ for (iter = 0; (iter < n_parallel) && (from <= to); iter++) {
+ curr = &ccd[iter];
+
+ curr->this = this;
+ curr->jnl = hist_jnl;
+ curr->fd = fd;
+ curr->offset = from * (len + 1);
+
+ curr->retval = 0;
+ memset(curr->changelog, '\0', PATH_MAX);
+
+ ret = gf_thread_create(&th_id[iter], NULL,
+ gf_changelog_consume_wrap, curr,
+ "clogc%03hx", (iter + 1) & 0x3ff);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ret,
+ CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED,
+ "could not create consume-thread");
+ goto sync;
+ } else
+ n_envoked++;
+
+ from++;
+ }
+
+ sync:
+ for (iter = 0; iter < n_envoked; iter++) {
+ ret = pthread_join(th_id[iter], NULL);
+ if (ret) {
+ publish = _gf_false;
+ gf_msg(this->name, GF_LOG_ERROR, ret,
+ CHANGELOG_LIB_MSG_PTHREAD_JOIN_FAILED,
+ "pthread_join() error");
+ /* try to join the rest */
+ continue;
+ }
+
+ if (publish == _gf_false)
+ continue;
+
+ curr = &ccd[iter];
+ if (ccd->retval) {
+ publish = _gf_false;
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_PARSE_ERROR_CEASED, NULL);
+ continue;
+ }
+
+ ret = gf_changelog_publish(curr->this, curr->jnl, curr->changelog);
+ if (ret) {
+ publish = _gf_false;
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_PUBLISH_ERROR,
+ "publish error, ceased publishing...");
+ }
+ }
+ }
+
+ /* informing "parsing done". */
+ hist_jnl->hist_done = (publish == _gf_true) ? 0 : -1;
+
+out:
+ if (fd != -1)
+ (void)sys_close(fd);
+ GF_FREE(hist_data);
+ return NULL;
+}
+
+/**
+ * @API
+ * gf_history_changelog() : Get/parse historical changelogs and get them ready
+ * for consumption.
+ *
+ * Arguments:
+ * @changelog_dir : Directory location from where history changelogs are
+ * supposed to be consumed.
+ * @start: Unix timestamp FROM where changelogs should be consumed.
+ * @end: Unix timestamp TO where changelogsshould be consumed.
+ * @n_parallel : degree of parallelism while changelog parsing.
+ * @actual_end : the end time till where changelogs are available.
+ *
+ * Return:
+ * Returns <timestamp> on success, the last time till where changelogs are
+ * available.
+ * Returns -1 on failure(error).
+ */
+
+/**
+ * Extract timestamp range from a historical metadata file
+ * Returns:
+ * 0 : Success ({min,max}_ts with the appropriate values)
+ * -1 : Failure
+ * -2 : Ignore this metadata file and process next
+ */
+int
+gf_changelog_extract_min_max(const char *dname, const char *htime_dir, int *fd,
+ unsigned long *total, unsigned long *min_ts,
+ unsigned long *max_ts)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ char htime_file[PATH_MAX] = {
+ 0,
+ };
+ struct stat stbuf = {
+ 0,
+ };
+ char *iter = NULL;
+ char x_value[30] = {
+ 0,
+ };
+
+ this = THIS;
+
+ snprintf(htime_file, PATH_MAX, "%s/%s", htime_dir, dname);
+
+ iter = (htime_file + strlen(htime_file) - TIMESTAMP_LENGTH);
+ sscanf(iter, "%lu", min_ts);
+
+ ret = sys_stat(htime_file, &stbuf);
+ if (ret) {
+ ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HTIME_ERROR,
+ "op=stat", "path=%s", htime_file, NULL);
+ goto out;
+ }
+
+ /* ignore everything except regular files */
+ if (!S_ISREG(stbuf.st_mode)) {
+ ret = -2;
+ goto out;
+ }
+
+ *fd = open(htime_file, O_RDONLY);
+ if (*fd < 0) {
+ ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HTIME_ERROR,
+ "op=open", "path=%s", htime_file, NULL);
+ goto out;
+ }
+
+ /* Looks good, extract max timestamp */
+ ret = sys_fgetxattr(*fd, HTIME_KEY, x_value, sizeof(x_value));
+ if (ret < 0) {
+ ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_GET_XATTR_FAILED, "path=%s", htime_file,
+ NULL);
+ goto out;
+ }
+
+ sscanf(x_value, "%lu:%lu", max_ts, total);
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_MIN_MAX_INFO,
+ "min=%lu", *min_ts, "max=%lu", *max_ts, "total_changelogs=%lu",
+ *total, NULL);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* gf_history_changelog returns actual_end and spawns threads to
+ * parse historical changelogs. The return values are as follows.
+ * 0 : On success
+ * 1 : Successful, but partial historical changelogs available,
+ * end time falls into different htime file or future time
+ * -2 : Error, requested historical changelog not available, not
+ * even partial
+ * -1 : On any error
+ */
+int
+gf_history_changelog(char *changelog_dir, unsigned long start,
+ unsigned long end, int n_parallel,
+ unsigned long *actual_end)
+{
+ int ret = 0;
+ int len = -1;
+ int fd = -1;
+ int n_read = -1;
+ unsigned long min_ts = 0;
+ unsigned long max_ts = 0;
+ unsigned long end2 = 0;
+ unsigned long ts1 = 0;
+ unsigned long ts2 = 0;
+ unsigned long to = 0;
+ unsigned long from = 0;
+ unsigned long total_changelog = 0;
+ xlator_t *this = NULL;
+ gf_changelog_journal_t *jnl = NULL;
+ gf_changelog_journal_t *hist_jnl = NULL;
+ gf_changelog_history_data_t *hist_data = NULL;
+ DIR *dirp = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ pthread_t consume_th = 0;
+ char htime_dir[PATH_MAX] = {
+ 0,
+ };
+ char buffer[PATH_MAX] = {
+ 0,
+ };
+ gf_boolean_t partial_history = _gf_false;
+
+ pthread_attr_t attr;
+
+ this = THIS;
+ if (!this) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = pthread_attr_init(&attr);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_PTHREAD_ERROR,
+ "Pthread init failed");
+ return -1;
+ }
+
+ jnl = (gf_changelog_journal_t *)GF_CHANGELOG_GET_API_PTR(this);
+ if (!jnl) {
+ ret = -1;
+ goto out;
+ }
+
+ hist_jnl = (gf_changelog_journal_t *)jnl->hist_jnl;
+ if (!hist_jnl) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_REQUESTING_INFO,
+ "start=%lu", start, "end=%lu", end, NULL);
+
+ /* basic sanity check */
+ if (start > end || n_parallel <= 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HIST_FAILED,
+ "start=%lu", start, "end=%lu", end, "thread_count=%d",
+ n_parallel, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ /* cap parallelism count */
+ if (n_parallel > MAX_PARALLELS)
+ n_parallel = MAX_PARALLELS;
+
+ CHANGELOG_FILL_HTIME_DIR(changelog_dir, htime_dir);
+
+ dirp = sys_opendir(htime_dir);
+ if (dirp == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_LIB_MSG_HTIME_ERROR,
+ "op=opendir", "path=%s", htime_dir, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ for (;;) {
+ errno = 0;
+
+ entry = sys_readdir(dirp, scratch);
+
+ if (!entry || errno != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_HIST_FAILED, "start=%lu", start,
+ "end=%lu", end, NULL);
+ ret = -2;
+ break;
+ }
+
+ ret = gf_changelog_extract_min_max(entry->d_name, htime_dir, &fd,
+ &total_changelog, &min_ts, &max_ts);
+ if (ret) {
+ if (-2 == ret)
+ continue;
+ goto out;
+ }
+
+ if (start >= min_ts && start < max_ts) {
+ /**
+ * TODO: handle short reads later...
+ */
+ n_read = sys_read(fd, buffer, PATH_MAX);
+ if (n_read < 0) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_READ_ERROR,
+ "unable to read htime file");
+ goto out;
+ }
+
+ len = strlen(buffer);
+
+ /**
+ * search @start in the htime file returning it's index
+ * (@from)
+ */
+ from = gf_history_b_search(fd, start, 0, total_changelog - 1, len);
+
+ /* ensuring correctness of gf_b_search */
+ if (gf_history_check(fd, from, start, len) != 0) {
+ ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_GET_TIME_ERROR, "for=start",
+ "start=%lu", start, "idx=%lu", from, NULL);
+ goto out;
+ }
+
+ end2 = (end <= max_ts) ? end : max_ts;
+
+ /* Check if end falls out of same HTIME file. The end
+ * falling to a different htime file or changelog
+ * disable-enable is detected only after 20 seconds.
+ * This is required because, applications generally
+ * asks historical changelogs till current time and
+ * it is possible changelog is not rolled over yet.
+ * So, buffer time of default rollover time plus 5
+ * seconds is subtracted. If the application requests
+ * the end time with in half a minute of changelog
+ * disable, it's not detected as changelog disable and
+ * it's application's responsibility to retry after
+ * 20 seconds before confirming it as partial history.
+ */
+ if ((end - 20) > max_ts) {
+ partial_history = _gf_true;
+ }
+
+ /**
+ * search @end2 in htime file returning it's index (@to)
+ */
+ to = gf_history_b_search(fd, end2, 0, total_changelog - 1, len);
+
+ if (gf_history_check(fd, to, end2, len) != 0) {
+ ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_LIB_MSG_GET_TIME_ERROR, "for=end",
+ "start=%lu", end2, "idx=%lu", to, NULL);
+ goto out;
+ }
+
+ ret = gf_history_get_timestamp(fd, from, len, &ts1);
+ if (ret == -1)
+ goto out;
+
+ ret = gf_history_get_timestamp(fd, to, len, &ts2);
+ if (ret == -1)
+ goto out;
+
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_LIB_MSG_FINAL_INFO,
+ "from=%lu", ts1, "to=%lu", ts2, "changes=%lu",
+ (to - from + 1), NULL);
+
+ hist_data = GF_CALLOC(1, sizeof(gf_changelog_history_data_t),
+ gf_changelog_mt_history_data_t);
+
+ hist_data->htime_fd = fd;
+ hist_data->from = from;
+ hist_data->to = to;
+ hist_data->len = len;
+ hist_data->n_parallel = n_parallel;
+ hist_data->this = this;
+
+ ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ret,
+ CHANGELOG_LIB_MSG_PTHREAD_ERROR,
+ "unable to sets the detach"
+ " state attribute");
+ ret = -1;
+ goto out;
+ }
+
+ /* spawn a thread for background parsing & publishing */
+ ret = gf_thread_create(&consume_th, &attr, gf_history_consume,
+ hist_data, "cloghcon");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ret,
+ CHANGELOG_LIB_MSG_THREAD_CREATION_FAILED,
+ "creation of consume parent-thread"
+ " failed.");
+ ret = -1;
+ goto out;
+ }
+
+ goto out;
+
+ } else { /* end of range check */
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_LIB_MSG_HIST_FAILED, "start=%lu", start,
+ "end=%lu", end, "chlog_min=%lu", min_ts, "chlog_max=%lu",
+ max_ts, NULL);
+ }
+ } /* end of readdir() */
+
+out:
+ if (dirp != NULL)
+ (void)sys_closedir(dirp);
+
+ if (ret < 0) {
+ if (fd != -1)
+ (void)sys_close(fd);
+ GF_FREE(hist_data);
+ (void)pthread_attr_destroy(&attr);
+
+ return ret;
+ }
+
+ hist_jnl->hist_done = 1;
+ *actual_end = ts2;
+
+ if (partial_history) {
+ ret = 1;
+ }
+
+ return ret;
+}
diff --git a/xlators/features/changelog/src/Makefile.am b/xlators/features/changelog/src/Makefile.am
new file mode 100644
index 00000000000..eee7dfa238d
--- /dev/null
+++ b/xlators/features/changelog/src/Makefile.am
@@ -0,0 +1,29 @@
+xlator_LTLIBRARIES = changelog.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+noinst_HEADERS = changelog-helpers.h changelog-mem-types.h changelog-rt.h \
+ changelog-rpc-common.h changelog-misc.h changelog-encoders.h \
+ changelog-rpc-common.h changelog-rpc.h changelog-ev-handle.h \
+ changelog-messages.h
+
+changelog_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+changelog_la_SOURCES = changelog.c changelog-rt.c changelog-helpers.c \
+ changelog-encoders.c changelog-rpc.c changelog-barrier.c \
+ changelog-rpc-common.c changelog-ev-handle.c
+changelog_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/rpc-transport/socket/src \
+ -I$(top_srcdir)/xlators/features/changelog/lib/src/ \
+ -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \
+ -DDATADIR=\"$(localstatedir)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/changelog/src/changelog-barrier.c b/xlators/features/changelog/src/changelog-barrier.c
new file mode 100644
index 00000000000..0fb89ddb127
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-barrier.c
@@ -0,0 +1,131 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-helpers.h"
+#include "changelog-messages.h"
+#include <glusterfs/call-stub.h>
+
+/* Enqueue a stub*/
+void
+__chlog_barrier_enqueue(xlator_t *this, call_stub_t *stub)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ list_add_tail(&stub->list, &priv->queue);
+ priv->queue_size++;
+
+ return;
+}
+
+/* Dequeue a stub */
+call_stub_t *
+__chlog_barrier_dequeue(xlator_t *this, struct list_head *queue)
+{
+ call_stub_t *stub = NULL;
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ if (list_empty(queue))
+ goto out;
+
+ stub = list_entry(queue->next, call_stub_t, list);
+ list_del_init(&stub->list);
+
+out:
+ return stub;
+}
+
+/* Dequeue all the stubs and call corresponding resume functions */
+void
+chlog_barrier_dequeue_all(xlator_t *this, struct list_head *queue)
+{
+ call_stub_t *stub = NULL;
+
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS,
+ NULL);
+
+ while ((stub = __chlog_barrier_dequeue(this, queue)))
+ call_resume(stub);
+
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED, NULL);
+ return;
+}
+
+/* Function called on changelog barrier timeout */
+void
+chlog_barrier_timeout(void *data)
+{
+ xlator_t *this = NULL;
+ changelog_priv_t *priv = NULL;
+ struct list_head queue = {
+ 0,
+ };
+
+ this = data;
+ THIS = this;
+ priv = this->private;
+
+ INIT_LIST_HEAD(&queue);
+
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_BARRIER_TIMEOUT, NULL);
+
+ LOCK(&priv->lock);
+ {
+ __chlog_barrier_disable(this, &queue);
+ }
+ UNLOCK(&priv->lock);
+
+ chlog_barrier_dequeue_all(this, &queue);
+
+ return;
+}
+
+/* Disable changelog barrier enable flag */
+void
+__chlog_barrier_disable(xlator_t *this, struct list_head *queue)
+{
+ changelog_priv_t *priv = this->private;
+ GF_ASSERT(priv);
+
+ if (priv->timer) {
+ gf_timer_call_cancel(this->ctx, priv->timer);
+ priv->timer = NULL;
+ }
+
+ list_splice_init(&priv->queue, queue);
+ priv->queue_size = 0;
+ priv->barrier_enabled = _gf_false;
+}
+
+/* Enable chagelog barrier enable with timer */
+int
+__chlog_barrier_enable(xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = -1;
+
+ priv->timer = gf_timer_call_after(this->ctx, priv->timeout,
+ chlog_barrier_timeout, (void *)this);
+ if (!priv->timer) {
+ gf_smsg(this->name, GF_LOG_CRITICAL, 0,
+ CHANGELOG_MSG_TIMEOUT_ADD_FAILED, NULL);
+ goto out;
+ }
+
+ priv->barrier_enabled = _gf_true;
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/features/changelog/src/changelog-encoders.c b/xlators/features/changelog/src/changelog-encoders.c
new file mode 100644
index 00000000000..63754516c2e
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-encoders.c
@@ -0,0 +1,232 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-encoders.h"
+
+size_t
+entry_fn(void *data, char *buffer, gf_boolean_t encode)
+{
+ char *tmpbuf = NULL;
+ size_t bufsz = 0;
+ struct changelog_entry_fields *ce = NULL;
+
+ ce = (struct changelog_entry_fields *)data;
+
+ if (encode) {
+ tmpbuf = uuid_utoa(ce->cef_uuid);
+ CHANGELOG_FILL_BUFFER(buffer, bufsz, tmpbuf, strlen(tmpbuf));
+ } else {
+ CHANGELOG_FILL_BUFFER(buffer, bufsz, ce->cef_uuid, sizeof(uuid_t));
+ }
+
+ CHANGELOG_FILL_BUFFER(buffer, bufsz, "/", 1);
+ CHANGELOG_FILL_BUFFER(buffer, bufsz, ce->cef_bname, strlen(ce->cef_bname));
+ return bufsz;
+}
+
+size_t
+del_entry_fn(void *data, char *buffer, gf_boolean_t encode)
+{
+ char *tmpbuf = NULL;
+ size_t bufsz = 0;
+ struct changelog_entry_fields *ce = NULL;
+
+ ce = (struct changelog_entry_fields *)data;
+
+ if (encode) {
+ tmpbuf = uuid_utoa(ce->cef_uuid);
+ CHANGELOG_FILL_BUFFER(buffer, bufsz, tmpbuf, strlen(tmpbuf));
+ } else {
+ CHANGELOG_FILL_BUFFER(buffer, bufsz, ce->cef_uuid, sizeof(uuid_t));
+ }
+
+ CHANGELOG_FILL_BUFFER(buffer, bufsz, "/", 1);
+ CHANGELOG_FILL_BUFFER(buffer, bufsz, ce->cef_bname, strlen(ce->cef_bname));
+ CHANGELOG_FILL_BUFFER(buffer, bufsz, "\0", 1);
+
+ if (ce->cef_path[0] == '\0') {
+ CHANGELOG_FILL_BUFFER(buffer, bufsz, "\0", 1);
+ } else {
+ CHANGELOG_FILL_BUFFER(buffer, bufsz, ce->cef_path,
+ strlen(ce->cef_path));
+ }
+
+ return bufsz;
+}
+
+size_t
+fop_fn(void *data, char *buffer, gf_boolean_t encode)
+{
+ char buf[10] = {
+ 0,
+ };
+ size_t bufsz = 0;
+ glusterfs_fop_t fop = 0;
+
+ fop = *(glusterfs_fop_t *)data;
+
+ if (encode) {
+ (void)snprintf(buf, sizeof(buf), "%d", fop);
+ CHANGELOG_FILL_BUFFER(buffer, bufsz, buf, strlen(buf));
+ } else
+ CHANGELOG_FILL_BUFFER(buffer, bufsz, &fop, sizeof(fop));
+
+ return bufsz;
+}
+
+size_t
+number_fn(void *data, char *buffer, gf_boolean_t encode)
+{
+ size_t bufsz = 0;
+ unsigned int nr = 0;
+ char buf[20] = {
+ 0,
+ };
+
+ nr = *(unsigned int *)data;
+
+ if (encode) {
+ (void)snprintf(buf, sizeof(buf), "%u", nr);
+ CHANGELOG_FILL_BUFFER(buffer, bufsz, buf, strlen(buf));
+ } else
+ CHANGELOG_FILL_BUFFER(buffer, bufsz, &nr, sizeof(unsigned int));
+
+ return bufsz;
+}
+
+void
+entry_free_fn(void *data)
+{
+ changelog_opt_t *co = data;
+
+ if (!co)
+ return;
+
+ GF_FREE(co->co_entry.cef_bname);
+}
+
+void
+del_entry_free_fn(void *data)
+{
+ changelog_opt_t *co = data;
+
+ if (!co)
+ return;
+
+ GF_FREE(co->co_entry.cef_bname);
+ GF_FREE(co->co_entry.cef_path);
+}
+
+/**
+ * try to write all data in one shot
+ */
+
+static void
+changelog_encode_write_xtra(changelog_log_data_t *cld, char *buffer,
+ size_t *off, gf_boolean_t encode)
+{
+ int i = 0;
+ size_t offset = 0;
+ void *data = NULL;
+ changelog_opt_t *co = NULL;
+
+ offset = *off;
+
+ co = (changelog_opt_t *)cld->cld_ptr;
+
+ for (; i < cld->cld_xtra_records; i++, co++) {
+ CHANGELOG_FILL_BUFFER(buffer, offset, "\0", 1);
+
+ switch (co->co_type) {
+ case CHANGELOG_OPT_REC_FOP:
+ data = &co->co_fop;
+ break;
+ case CHANGELOG_OPT_REC_ENTRY:
+ data = &co->co_entry;
+ break;
+ case CHANGELOG_OPT_REC_UINT32:
+ data = &co->co_uint32;
+ break;
+ }
+
+ if (co->co_convert)
+ offset += co->co_convert(data, buffer + offset, encode);
+ else /* no coversion: write it out as it is */
+ CHANGELOG_FILL_BUFFER(buffer, offset, data, co->co_len);
+ }
+
+ *off = offset;
+}
+
+int
+changelog_encode_ascii(xlator_t *this, changelog_log_data_t *cld)
+{
+ size_t off = 0;
+ size_t gfid_len = 0;
+ char *gfid_str = NULL;
+ char *buffer = NULL;
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ gfid_str = uuid_utoa(cld->cld_gfid);
+ gfid_len = strlen(gfid_str);
+
+ /* extra bytes for decorations */
+ buffer = alloca(gfid_len + cld->cld_ptr_len + 10);
+ CHANGELOG_STORE_ASCII(priv, buffer, off, gfid_str, gfid_len, cld);
+
+ if (cld->cld_xtra_records)
+ changelog_encode_write_xtra(cld, buffer, &off, _gf_true);
+
+ CHANGELOG_FILL_BUFFER(buffer, off, "\0", 1);
+
+ return changelog_write_change(priv, buffer, off);
+}
+
+int
+changelog_encode_binary(xlator_t *this, changelog_log_data_t *cld)
+{
+ size_t off = 0;
+ char *buffer = NULL;
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ /* extra bytes for decorations */
+ buffer = alloca(sizeof(uuid_t) + cld->cld_ptr_len + 10);
+ CHANGELOG_STORE_BINARY(priv, buffer, off, cld->cld_gfid, cld);
+
+ if (cld->cld_xtra_records)
+ changelog_encode_write_xtra(cld, buffer, &off, _gf_false);
+
+ CHANGELOG_FILL_BUFFER(buffer, off, "\0", 1);
+
+ return changelog_write_change(priv, buffer, off);
+}
+
+static struct changelog_encoder cb_encoder[] = {
+ [CHANGELOG_ENCODE_BINARY] =
+ {
+ .encoder = CHANGELOG_ENCODE_BINARY,
+ .encode = changelog_encode_binary,
+ },
+ [CHANGELOG_ENCODE_ASCII] =
+ {
+ .encoder = CHANGELOG_ENCODE_ASCII,
+ .encode = changelog_encode_ascii,
+ },
+};
+
+void
+changelog_encode_change(changelog_priv_t *priv)
+{
+ priv->ce = &cb_encoder[priv->encode_mode];
+}
diff --git a/xlators/features/changelog/src/changelog-encoders.h b/xlators/features/changelog/src/changelog-encoders.h
new file mode 100644
index 00000000000..26252696d56
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-encoders.h
@@ -0,0 +1,50 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_ENCODERS_H
+#define _CHANGELOG_ENCODERS_H
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "changelog-helpers.h"
+
+#define CHANGELOG_STORE_ASCII(priv, buf, off, gfid, gfid_len, cld) \
+ do { \
+ CHANGELOG_FILL_BUFFER(buffer, off, priv->maps[cld->cld_type], 1); \
+ CHANGELOG_FILL_BUFFER(buffer, off, gfid, gfid_len); \
+ } while (0)
+
+#define CHANGELOG_STORE_BINARY(priv, buf, off, gfid, cld) \
+ do { \
+ CHANGELOG_FILL_BUFFER(buffer, off, priv->maps[cld->cld_type], 1); \
+ CHANGELOG_FILL_BUFFER(buffer, off, gfid, sizeof(uuid_t)); \
+ } while (0)
+
+size_t
+entry_fn(void *data, char *buffer, gf_boolean_t encode);
+size_t
+del_entry_fn(void *data, char *buffer, gf_boolean_t encode);
+size_t
+fop_fn(void *data, char *buffer, gf_boolean_t encode);
+size_t
+number_fn(void *data, char *buffer, gf_boolean_t encode);
+void
+entry_free_fn(void *data);
+void
+del_entry_free_fn(void *data);
+int
+changelog_encode_binary(xlator_t *, changelog_log_data_t *);
+int
+changelog_encode_ascii(xlator_t *, changelog_log_data_t *);
+void
+changelog_encode_change(changelog_priv_t *);
+
+#endif /* _CHANGELOG_ENCODERS_H */
diff --git a/xlators/features/changelog/src/changelog-ev-handle.c b/xlators/features/changelog/src/changelog-ev-handle.c
new file mode 100644
index 00000000000..aa94459de5a
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-ev-handle.c
@@ -0,0 +1,412 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-ev-handle.h"
+#include "changelog-rpc-common.h"
+#include "changelog-helpers.h"
+
+struct rpc_clnt_program changelog_ev_program;
+
+#define NR_IOVEC (MAX_IOVEC - 3)
+struct ev_rpc_vec {
+ int count;
+ struct iovec vector[NR_IOVEC];
+
+ /* sequence number */
+ unsigned long seq;
+};
+
+struct ev_rpc {
+ rbuf_list_t *rlist;
+ struct rpc_clnt *rpc;
+ struct ev_rpc_vec vec;
+};
+
+/**
+ * As of now this just does the minimal (retval logging). Going further
+ * un-acknowledges sequence numbers can be retransmitted and other
+ * intelligence can be built into the server.
+ */
+int
+changelog_event_dispatch_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ return 0;
+}
+
+/* dispatcher RPC */
+int
+changelog_dispatch_vec(call_frame_t *frame, xlator_t *this,
+ struct rpc_clnt *rpc, struct ev_rpc_vec *vec)
+{
+ struct timeval tv = {
+ 0,
+ };
+ changelog_event_req req = {
+ 0,
+ };
+
+ (void)gettimeofday(&tv, NULL);
+
+ /**
+ * Event dispatch RPC header contains a sequence number for each
+ * dispatch. This allows the receiver to order the request before
+ * processing.
+ */
+ req.seq = vec->seq;
+ req.tv_sec = tv.tv_sec;
+ req.tv_usec = tv.tv_usec;
+
+ return changelog_rpc_sumbit_req(
+ rpc, (void *)&req, frame, &changelog_ev_program,
+ CHANGELOG_REV_PROC_EVENT, vec->vector, vec->count, NULL, this,
+ changelog_event_dispatch_cbk, (xdrproc_t)xdr_changelog_event_req);
+}
+
+int
+changelog_event_dispatch_rpc(call_frame_t *frame, xlator_t *this, void *data)
+{
+ int idx = 0;
+ int count = 0;
+ int ret = 0;
+ unsigned long sequence = 0;
+ rbuf_iovec_t *rvec = NULL;
+ struct ev_rpc *erpc = NULL;
+ struct rlist_iter riter = {
+ {
+ 0,
+ },
+ };
+
+ /* dispatch NR_IOVEC IO vectors at a time. */
+
+ erpc = data;
+ sequence = erpc->rlist->seq[0];
+
+ rlist_iter_init(&riter, erpc->rlist);
+
+ rvec_for_each_entry(rvec, &riter)
+ {
+ idx = count % NR_IOVEC;
+ if (++count == NR_IOVEC) {
+ erpc->vec.vector[idx] = rvec->iov;
+ erpc->vec.seq = sequence++;
+ erpc->vec.count = NR_IOVEC;
+
+ ret = changelog_dispatch_vec(frame, this, erpc->rpc, &erpc->vec);
+ if (ret)
+ break;
+ count = 0;
+ continue;
+ }
+
+ erpc->vec.vector[idx] = rvec->iov;
+ }
+
+ if (ret)
+ goto error_return;
+
+ idx = count % NR_IOVEC;
+ if (idx) {
+ erpc->vec.seq = sequence;
+ erpc->vec.count = idx;
+
+ ret = changelog_dispatch_vec(frame, this, erpc->rpc, &erpc->vec);
+ }
+
+error_return:
+ return ret;
+}
+
+int
+changelog_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data)
+{
+ xlator_t *this = NULL;
+ changelog_rpc_clnt_t *crpc = NULL;
+ changelog_clnt_t *c_clnt = NULL;
+ changelog_priv_t *priv = NULL;
+ changelog_ev_selector_t *selection = NULL;
+ uint64_t clntcnt = 0;
+ uint64_t xprtcnt = 0;
+
+ crpc = mydata;
+ this = crpc->this;
+ c_clnt = crpc->c_clnt;
+
+ priv = this->private;
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ selection = &priv->ev_selection;
+ GF_ATOMIC_INC(priv->clntcnt);
+
+ LOCK(&c_clnt->wait_lock);
+ {
+ LOCK(&c_clnt->active_lock);
+ {
+ changelog_select_event(this, selection, crpc->filter);
+ list_move_tail(&crpc->list, &c_clnt->active);
+ }
+ UNLOCK(&c_clnt->active_lock);
+ }
+ UNLOCK(&c_clnt->wait_lock);
+
+ break;
+ case RPC_CLNT_DISCONNECT:
+ rpc_clnt_disable(crpc->rpc);
+
+ /* rpc_clnt_disable doesn't unref the rpc. It just marks
+ * the rpc as disabled and cancels reconnection timer.
+ * Hence unref the rpc object to free it.
+ */
+ rpc_clnt_unref(crpc->rpc);
+
+ if (priv)
+ selection = &priv->ev_selection;
+
+ LOCK(&crpc->lock);
+ {
+ if (selection)
+ changelog_deselect_event(this, selection, crpc->filter);
+ changelog_set_disconnect_flag(crpc, _gf_true);
+ }
+ UNLOCK(&crpc->lock);
+ LOCK(&c_clnt->active_lock);
+ {
+ list_del_init(&crpc->list);
+ }
+ UNLOCK(&c_clnt->active_lock);
+
+ break;
+ case RPC_CLNT_MSG:
+ case RPC_CLNT_DESTROY:
+ /* Free up mydata */
+ changelog_rpc_clnt_unref(crpc);
+ clntcnt = GF_ATOMIC_DEC(priv->clntcnt);
+ xprtcnt = GF_ATOMIC_GET(priv->xprtcnt);
+ if (this->cleanup_starting) {
+ if (!clntcnt && !xprtcnt)
+ changelog_process_cleanup_event(this);
+ }
+ break;
+ case RPC_CLNT_PING:
+ break;
+ }
+
+ return 0;
+}
+
+void *
+changelog_ev_connector(void *data)
+{
+ xlator_t *this = NULL;
+ changelog_clnt_t *c_clnt = NULL;
+ changelog_rpc_clnt_t *crpc = NULL;
+
+ c_clnt = data;
+ this = c_clnt->this;
+
+ while (1) {
+ pthread_mutex_lock(&c_clnt->pending_lock);
+ {
+ while (list_empty(&c_clnt->pending))
+ pthread_cond_wait(&c_clnt->pending_cond, &c_clnt->pending_lock);
+ crpc = list_first_entry(&c_clnt->pending, changelog_rpc_clnt_t,
+ list);
+ crpc->rpc = changelog_rpc_client_init(this, crpc, crpc->sock,
+ changelog_rpc_notify);
+ if (!crpc->rpc) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_RPC_CONNECT_ERROR, "path=%s", crpc->sock,
+ NULL);
+ crpc->cleanup(crpc);
+ goto mutex_unlock;
+ }
+
+ LOCK(&c_clnt->wait_lock);
+ {
+ list_move_tail(&crpc->list, &c_clnt->waitq);
+ }
+ UNLOCK(&c_clnt->wait_lock);
+ }
+ mutex_unlock:
+ pthread_mutex_unlock(&c_clnt->pending_lock);
+ }
+
+ return NULL;
+}
+
+void
+changelog_ev_cleanup_connections(xlator_t *this, changelog_clnt_t *c_clnt)
+{
+ changelog_rpc_clnt_t *crpc = NULL;
+
+ /* cleanup active connections */
+ LOCK(&c_clnt->active_lock);
+ {
+ list_for_each_entry(crpc, &c_clnt->active, list)
+ {
+ rpc_clnt_disable(crpc->rpc);
+ }
+ }
+ UNLOCK(&c_clnt->active_lock);
+}
+
+/**
+ * TODO: granularize lock
+ *
+ * If we have multiple threads dispatching events, doing it this way is
+ * a performance bottleneck.
+ */
+
+static changelog_rpc_clnt_t *
+get_client(changelog_clnt_t *c_clnt, struct list_head **next)
+{
+ changelog_rpc_clnt_t *crpc = NULL;
+
+ LOCK(&c_clnt->active_lock);
+ {
+ if (*next == &c_clnt->active)
+ goto unblock;
+ crpc = list_entry(*next, changelog_rpc_clnt_t, list);
+ /* ref rpc as DISCONNECT might unref the rpc asynchronously */
+ changelog_rpc_clnt_ref(crpc);
+ rpc_clnt_ref(crpc->rpc);
+ *next = (*next)->next;
+ }
+unblock:
+ UNLOCK(&c_clnt->active_lock);
+
+ return crpc;
+}
+
+static void
+put_client(changelog_clnt_t *c_clnt, changelog_rpc_clnt_t *crpc)
+{
+ LOCK(&c_clnt->active_lock);
+ {
+ rpc_clnt_unref(crpc->rpc);
+ changelog_rpc_clnt_unref(crpc);
+ }
+ UNLOCK(&c_clnt->active_lock);
+}
+
+void
+_dispatcher(rbuf_list_t *rlist, void *arg)
+{
+ xlator_t *this = NULL;
+ changelog_clnt_t *c_clnt = NULL;
+ changelog_rpc_clnt_t *crpc = NULL;
+ struct ev_rpc erpc = {
+ 0,
+ };
+ struct list_head *next = NULL;
+
+ c_clnt = arg;
+ this = c_clnt->this;
+
+ erpc.rlist = rlist;
+ next = c_clnt->active.next;
+
+ while (1) {
+ crpc = get_client(c_clnt, &next);
+ if (!crpc)
+ break;
+ erpc.rpc = crpc->rpc;
+ (void)changelog_invoke_rpc(this, crpc->rpc, &changelog_ev_program,
+ CHANGELOG_REV_PROC_EVENT, &erpc);
+ put_client(c_clnt, crpc);
+ }
+}
+
+/** this is called under rotbuff's lock */
+void
+sequencer(rbuf_list_t *rlist, void *mydata)
+{
+ unsigned long range = 0;
+ changelog_clnt_t *c_clnt = 0;
+
+ c_clnt = mydata;
+
+ range = (RLIST_ENTRY_COUNT(rlist)) / NR_IOVEC;
+ if ((RLIST_ENTRY_COUNT(rlist)) % NR_IOVEC)
+ range++;
+ RLIST_STORE_SEQ(rlist, c_clnt->sequence, range);
+
+ c_clnt->sequence += range;
+}
+
+void *
+changelog_ev_dispatch(void *data)
+{
+ int ret = 0;
+ void *opaque = NULL;
+ xlator_t *this = NULL;
+ changelog_clnt_t *c_clnt = NULL;
+ struct timeval tv = {
+ 0,
+ };
+
+ c_clnt = data;
+ this = c_clnt->this;
+
+ while (1) {
+ /* TODO: change this to be pthread cond based.. later */
+
+ tv.tv_sec = 1;
+ tv.tv_usec = 0;
+ select(0, NULL, NULL, NULL, &tv);
+
+ ret = rbuf_get_buffer(c_clnt->rbuf, &opaque, sequencer, c_clnt);
+ if (ret != RBUF_CONSUMABLE) {
+ if (ret != RBUF_EMPTY)
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_BUFFER_STARVATION_ERROR,
+ "Failed to get buffer for RPC dispatch",
+ "rbuf_retval=%d", ret, NULL);
+ continue;
+ }
+
+ ret = rbuf_wait_for_completion(c_clnt->rbuf, opaque, _dispatcher,
+ c_clnt);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_PUT_BUFFER_FAILED, NULL);
+ }
+
+ return NULL;
+}
+
+void
+changelog_ev_queue_connection(changelog_clnt_t *c_clnt,
+ changelog_rpc_clnt_t *crpc)
+{
+ pthread_mutex_lock(&c_clnt->pending_lock);
+ {
+ list_add_tail(&crpc->list, &c_clnt->pending);
+ pthread_cond_signal(&c_clnt->pending_cond);
+ }
+ pthread_mutex_unlock(&c_clnt->pending_lock);
+}
+
+struct rpc_clnt_procedure changelog_ev_procs[CHANGELOG_REV_PROC_MAX] = {
+ [CHANGELOG_REV_PROC_NULL] = {"NULL", NULL},
+ [CHANGELOG_REV_PROC_EVENT] = {"EVENT DISPATCH",
+ changelog_event_dispatch_rpc},
+};
+
+struct rpc_clnt_program changelog_ev_program = {
+ .progname = "CHANGELOG EVENT DISPATCHER",
+ .prognum = CHANGELOG_REV_RPC_PROCNUM,
+ .progver = CHANGELOG_REV_RPC_PROCVER,
+ .numproc = CHANGELOG_REV_PROC_MAX,
+ .proctable = changelog_ev_procs,
+};
diff --git a/xlators/features/changelog/src/changelog-ev-handle.h b/xlators/features/changelog/src/changelog-ev-handle.h
new file mode 100644
index 00000000000..cc1af58a276
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-ev-handle.h
@@ -0,0 +1,136 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CHANGELOG_EV_HANDLE_H
+#define __CHANGELOG_EV_HANDLE_H
+
+#include <glusterfs/list.h>
+#include <glusterfs/xlator.h>
+#include "rpc-clnt.h"
+
+#include <glusterfs/rot-buffs.h>
+
+struct changelog_clnt;
+
+typedef struct changelog_rpc_clnt {
+ xlator_t *this;
+
+ gf_lock_t lock;
+
+ gf_atomic_t ref;
+ gf_boolean_t disconnected;
+
+ unsigned int filter;
+ char sock[UNIX_PATH_MAX];
+
+ struct changelog_clnt *c_clnt; /* back pointer to list holder */
+
+ struct rpc_clnt *rpc; /* RPC client endpoint */
+
+ struct list_head list; /* ->pending, ->waitq, ->active */
+
+ void (*cleanup)(struct changelog_rpc_clnt *); /* cleanup handler */
+} changelog_rpc_clnt_t;
+
+static inline void
+changelog_rpc_clnt_ref(changelog_rpc_clnt_t *crpc)
+{
+ GF_ATOMIC_INC(crpc->ref);
+}
+
+static inline void
+changelog_set_disconnect_flag(changelog_rpc_clnt_t *crpc, gf_boolean_t flag)
+{
+ crpc->disconnected = flag;
+}
+
+static inline int
+changelog_rpc_clnt_is_disconnected(changelog_rpc_clnt_t *crpc)
+{
+ return (crpc->disconnected == _gf_true);
+}
+
+static inline void
+changelog_rpc_clnt_unref(changelog_rpc_clnt_t *crpc)
+{
+ gf_boolean_t gone = _gf_false;
+ uint64_t ref = 0;
+
+ ref = GF_ATOMIC_DEC(crpc->ref);
+
+ if (!ref && changelog_rpc_clnt_is_disconnected(crpc)) {
+ list_del(&crpc->list);
+ gone = _gf_true;
+ }
+
+ if (gone)
+ crpc->cleanup(crpc);
+}
+
+/**
+ * This structure holds pending and active clients. On probe RPC all
+ * an instance of the above structure (@changelog_rpc_clnt) is placed
+ * in ->pending and gets moved to ->active on a successful connect.
+ *
+ * locking rules:
+ *
+ * Manipulating ->pending
+ * ->pending_lock
+ * ->pending
+ *
+ * Manipulating ->active
+ * ->active_lock
+ * ->active
+ *
+ * Moving object from ->pending to ->active
+ * ->pending_lock
+ * ->active_lock
+ *
+ * Objects are _never_ moved from ->active to ->pending, i.e., during
+ * disconnection, the object is destroyed. Well, we could have tried
+ * to reconnect, but that's pure waste.. let the other end reconnect.
+ */
+
+typedef struct changelog_clnt {
+ xlator_t *this;
+
+ /* pending connections */
+ pthread_mutex_t pending_lock;
+ pthread_cond_t pending_cond;
+ struct list_head pending;
+
+ /* current active connections */
+ gf_lock_t active_lock;
+ struct list_head active;
+
+ gf_lock_t wait_lock;
+ struct list_head waitq;
+
+ /* consumer part of rot-buffs */
+ rbuf_t *rbuf;
+ unsigned long sequence;
+} changelog_clnt_t;
+
+void *
+changelog_ev_connector(void *);
+
+void *
+changelog_ev_dispatch(void *);
+
+/* APIs */
+void
+changelog_ev_queue_connection(changelog_clnt_t *, changelog_rpc_clnt_t *);
+
+void
+changelog_ev_cleanup_connections(xlator_t *, changelog_clnt_t *);
+
+void
+changelog_process_cleanup_event(xlator_t *);
+#endif
diff --git a/xlators/features/changelog/src/changelog-helpers.c b/xlators/features/changelog/src/changelog-helpers.c
new file mode 100644
index 00000000000..e561997d858
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-helpers.c
@@ -0,0 +1,1977 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/iobuf.h>
+#include <glusterfs/syscall.h>
+
+#include "changelog-helpers.h"
+#include "changelog-encoders.h"
+#include "changelog-mem-types.h"
+#include "changelog-messages.h"
+
+#include "changelog-encoders.h"
+#include "changelog-rpc-common.h"
+#include <pthread.h>
+#include <time.h>
+
+static void
+changelog_cleanup_free_mutex(void *arg_mutex)
+{
+ pthread_mutex_t *p_mutex = (pthread_mutex_t *)arg_mutex;
+
+ if (p_mutex)
+ pthread_mutex_unlock(p_mutex);
+}
+
+int
+changelog_thread_cleanup(xlator_t *this, pthread_t thr_id)
+{
+ int ret = 0;
+ void *retval = NULL;
+
+ /* send a cancel request to the thread */
+ ret = pthread_cancel(thr_id);
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, NULL);
+ goto out;
+ }
+
+ ret = pthread_join(thr_id, &retval);
+ if ((ret != 0) || (retval != PTHREAD_CANCELED)) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_CANCEL_FAILED, NULL);
+ }
+
+out:
+ return ret;
+}
+
+void *
+changelog_get_usable_buffer(changelog_local_t *local)
+{
+ changelog_log_data_t *cld = NULL;
+
+ if (!local)
+ return NULL;
+
+ cld = &local->cld;
+ if (!cld->cld_iobuf)
+ return NULL;
+
+ return cld->cld_iobuf->ptr;
+}
+
+static int
+changelog_selector_index(unsigned int selector)
+{
+ return (ffs(selector) - 1);
+}
+
+int
+changelog_ev_selected(xlator_t *this, changelog_ev_selector_t *selection,
+ unsigned int selector)
+{
+ int idx = 0;
+
+ idx = changelog_selector_index(selector);
+ gf_msg_debug(this->name, 0, "selector ref count for %d (idx: %d): %d",
+ selector, idx, selection->ref[idx]);
+ /* this can be lockless */
+ return (idx < CHANGELOG_EV_SELECTION_RANGE && (selection->ref[idx] > 0));
+}
+
+void
+changelog_select_event(xlator_t *this, changelog_ev_selector_t *selection,
+ unsigned int selector)
+{
+ int idx = 0;
+
+ LOCK(&selection->reflock);
+ {
+ while (selector) {
+ idx = changelog_selector_index(selector);
+ if (idx < CHANGELOG_EV_SELECTION_RANGE) {
+ selection->ref[idx]++;
+ gf_msg_debug(this->name, 0, "selecting event %d", idx);
+ }
+ selector &= ~(1 << idx);
+ }
+ }
+ UNLOCK(&selection->reflock);
+}
+
+void
+changelog_deselect_event(xlator_t *this, changelog_ev_selector_t *selection,
+ unsigned int selector)
+{
+ int idx = 0;
+
+ LOCK(&selection->reflock);
+ {
+ while (selector) {
+ idx = changelog_selector_index(selector);
+ if (idx < CHANGELOG_EV_SELECTION_RANGE) {
+ selection->ref[idx]--;
+ gf_msg_debug(this->name, 0, "de-selecting event %d", idx);
+ }
+ selector &= ~(1 << idx);
+ }
+ }
+ UNLOCK(&selection->reflock);
+}
+
+int
+changelog_init_event_selection(xlator_t *this,
+ changelog_ev_selector_t *selection)
+{
+ int ret = 0;
+ int j = CHANGELOG_EV_SELECTION_RANGE;
+
+ ret = LOCK_INIT(&selection->reflock);
+ if (ret != 0)
+ return -1;
+
+ LOCK(&selection->reflock);
+ {
+ while (j--) {
+ selection->ref[j] = 0;
+ }
+ }
+ UNLOCK(&selection->reflock);
+
+ return 0;
+}
+
+static void
+changelog_perform_dispatch(xlator_t *this, changelog_priv_t *priv, void *mem,
+ size_t size)
+{
+ char *buf = NULL;
+ void *opaque = NULL;
+
+ buf = rbuf_reserve_write_area(priv->rbuf, size, &opaque);
+ if (!buf) {
+ gf_msg_callingfn(this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_DISPATCH_EVENT_FAILED,
+ "failed to dispatch event");
+ return;
+ }
+
+ memcpy(buf, mem, size);
+ rbuf_write_complete(opaque);
+}
+
+void
+changelog_dispatch_event(xlator_t *this, changelog_priv_t *priv,
+ changelog_event_t *ev)
+{
+ changelog_ev_selector_t *selection = NULL;
+
+ selection = &priv->ev_selection;
+ if (changelog_ev_selected(this, selection, ev->ev_type)) {
+ changelog_perform_dispatch(this, priv, ev, CHANGELOG_EV_SIZE);
+ }
+}
+
+void
+changelog_set_usable_record_and_length(changelog_local_t *local, size_t len,
+ int xr)
+{
+ changelog_log_data_t *cld = NULL;
+
+ cld = &local->cld;
+
+ cld->cld_ptr_len = len;
+ cld->cld_xtra_records = xr;
+}
+
+void
+changelog_local_cleanup(xlator_t *xl, changelog_local_t *local)
+{
+ int i = 0;
+ changelog_opt_t *co = NULL;
+ changelog_log_data_t *cld = NULL;
+
+ if (!local)
+ return;
+
+ cld = &local->cld;
+
+ /* cleanup dynamic allocation for extra records */
+ if (cld->cld_xtra_records) {
+ co = (changelog_opt_t *)cld->cld_ptr;
+ for (; i < cld->cld_xtra_records; i++, co++)
+ if (co->co_free)
+ co->co_free(co);
+ }
+
+ CHANGELOG_IOBUF_UNREF(cld->cld_iobuf);
+
+ if (local->inode)
+ inode_unref(local->inode);
+
+ mem_put(local);
+}
+
+int
+changelog_write(int fd, char *buffer, size_t len)
+{
+ ssize_t size = 0;
+ size_t written = 0;
+
+ while (written < len) {
+ size = sys_write(fd, buffer + written, len - written);
+ if (size <= 0)
+ break;
+
+ written += size;
+ }
+
+ return (written != len);
+}
+
+int
+htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer)
+{
+ char changelog_path[PATH_MAX + 1] = {
+ 0,
+ };
+ int len = -1;
+ char x_value[25] = {
+ 0,
+ };
+ /* time stamp(10) + : (1) + rolltime (12 ) + buffer (2) */
+ int ret = 0;
+
+ if (priv->htime_fd == -1) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
+ "reason=fd not available", NULL);
+ ret = -1;
+ goto out;
+ }
+ len = snprintf(changelog_path, PATH_MAX, "%s", buffer);
+ if (len >= PATH_MAX) {
+ ret = -1;
+ goto out;
+ }
+ if (changelog_write(priv->htime_fd, (void *)changelog_path, len + 1) < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
+ "reason=write failed", NULL);
+ ret = -1;
+ goto out;
+ }
+
+ len = snprintf(x_value, sizeof(x_value), "%ld:%d", ts,
+ priv->rollover_count);
+ if (len >= sizeof(x_value)) {
+ ret = -1;
+ goto out;
+ }
+
+ if (sys_fsetxattr(priv->htime_fd, HTIME_KEY, x_value, len, XATTR_REPLACE)) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR,
+ "reason=xattr updation failed", "XATTR_REPLACE=true",
+ "changelog=%s", changelog_path, NULL);
+
+ if (sys_fsetxattr(priv->htime_fd, HTIME_KEY, x_value, len, 0)) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_ERROR,
+ "reason=xattr updation failed", "changelog=%s",
+ changelog_path, NULL);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ priv->rollover_count += 1;
+
+out:
+ return ret;
+}
+
+/*
+ * Description: Check if the changelog to rollover is empty or not.
+ * It is assumed that fd passed is already verified.
+ *
+ * Returns:
+ * 1 : If found empty, changed path from "CHANGELOG.<TS>" to "changelog.<TS>"
+ * 0 : If NOT empty, proceed usual.
+ */
+int
+cl_is_empty(xlator_t *this, int fd)
+{
+ int ret = -1;
+ size_t elen = 0;
+ int encoding = -1;
+ char buffer[1024] = {
+ 0,
+ };
+ struct stat stbuf = {
+ 0,
+ };
+ int major_version = -1;
+ int minor_version = -1;
+
+ ret = sys_fstat(fd, &stbuf);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSTAT_OP_FAILED,
+ NULL);
+ goto out;
+ }
+
+ ret = sys_lseek(fd, 0, SEEK_SET);
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_LSEEK_OP_FAILED,
+ NULL);
+ goto out;
+ }
+
+ CHANGELOG_GET_HEADER_INFO(fd, buffer, sizeof(buffer), encoding,
+ major_version, minor_version, elen);
+
+ if (elen == stbuf.st_size) {
+ ret = 1;
+ } else {
+ ret = 0;
+ }
+
+out:
+ return ret;
+}
+
+/*
+ * Description: Updates "CHANGELOG" to "changelog" for writing changelog path
+ * to htime file.
+ *
+ * Returns:
+ * 0 : Success
+ * -1 : Error
+ */
+int
+update_path(xlator_t *this, char *cl_path)
+{
+ const char low_cl[] = "changelog";
+ const char up_cl[] = "CHANGELOG";
+ char *found = NULL;
+ int ret = -1;
+
+ found = strstr(cl_path, up_cl);
+
+ if (found == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PATH_NOT_FOUND,
+ NULL);
+ goto out;
+ } else {
+ memcpy(found, low_cl, sizeof(low_cl) - 1);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+changelog_rollover_changelog(xlator_t *this, changelog_priv_t *priv, time_t ts)
+{
+ int ret = -1;
+ int notify = 0;
+ int cl_empty_flag = 0;
+ struct tm *gmt;
+ char yyyymmdd[40];
+ char ofile[PATH_MAX] = {
+ 0,
+ };
+ char nfile[PATH_MAX] = {
+ 0,
+ };
+ char nfile_dir[PATH_MAX] = {
+ 0,
+ };
+ changelog_event_t ev = {
+ 0,
+ };
+
+ if (priv->changelog_fd != -1) {
+ ret = sys_fsync(priv->changelog_fd);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSYNC_OP_FAILED, NULL);
+ }
+ ret = cl_is_empty(this, priv->changelog_fd);
+ if (ret == 1) {
+ cl_empty_flag = 1;
+ } else if (ret == -1) {
+ /* Log error but proceed as usual */
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED, NULL);
+ }
+ sys_close(priv->changelog_fd);
+ priv->changelog_fd = -1;
+ }
+
+ /* Get GMT time. */
+ gmt = gmtime(&ts);
+
+ strftime(yyyymmdd, sizeof(yyyymmdd), "%Y/%m/%d", gmt);
+
+ (void)snprintf(ofile, PATH_MAX, "%s/" CHANGELOG_FILE_NAME,
+ priv->changelog_dir);
+ (void)snprintf(nfile, PATH_MAX, "%s/%s/" CHANGELOG_FILE_NAME ".%ld",
+ priv->changelog_dir, yyyymmdd, ts);
+ (void)snprintf(nfile_dir, PATH_MAX, "%s/%s", priv->changelog_dir, yyyymmdd);
+
+ if (cl_empty_flag == 1) {
+ ret = sys_unlink(ofile);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_UNLINK_OP_FAILED, "path=%s", ofile, NULL);
+ ret = 0; /* Error in unlinking empty changelog should
+ not break further changelog operation, so
+ reset return value to 0*/
+ }
+ } else {
+ ret = sys_rename(ofile, nfile);
+
+ /* Changelog file rename gets ENOENT when parent dir doesn't exist */
+ if (errno == ENOENT) {
+ ret = mkdir_p(nfile_dir, 0600, _gf_true);
+
+ if ((ret == -1) && (EEXIST != errno)) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_MKDIR_ERROR, "%s", nfile_dir, NULL);
+ goto out;
+ }
+
+ ret = sys_rename(ofile, nfile);
+ }
+
+ if (ret && (errno == ENOENT)) {
+ ret = 0;
+ goto out;
+ }
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_RENAME_ERROR,
+ "from=%s", ofile, "to=%s", nfile, NULL);
+ }
+ }
+
+ if (!ret && (cl_empty_flag == 0)) {
+ notify = 1;
+ }
+
+ if (!ret) {
+ if (cl_empty_flag) {
+ update_path(this, nfile);
+ }
+ ret = htime_update(this, priv, ts, nfile);
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_HTIME_ERROR,
+ NULL);
+ goto out;
+ }
+ }
+
+ if (notify) {
+ ev.ev_type = CHANGELOG_OP_TYPE_JOURNAL;
+ memcpy(ev.u.journal.path, nfile, strlen(nfile) + 1);
+ changelog_dispatch_event(this, priv, &ev);
+ }
+out:
+ /* If this is explicit rollover initiated by snapshot,
+ * wakeup reconfigure thread waiting for changelog to
+ * rollover. This should happen even in failure cases as
+ * well otherwise snapshot will timeout and fail. Hence
+ * moved under out.
+ */
+ if (priv->explicit_rollover) {
+ priv->explicit_rollover = _gf_false;
+
+ pthread_mutex_lock(&priv->bn.bnotify_mutex);
+ {
+ if (ret) {
+ priv->bn.bnotify_error = _gf_true;
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED, NULL);
+ } else {
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BNOTIFY_INFO,
+ "changelog=%s", nfile, NULL);
+ }
+ priv->bn.bnotify = _gf_false;
+ pthread_cond_signal(&priv->bn.bnotify_cond);
+ }
+ pthread_mutex_unlock(&priv->bn.bnotify_mutex);
+ }
+ return ret;
+}
+
+int
+filter_cur_par_dirs(const struct dirent *entry)
+{
+ if (entry == NULL)
+ return 0;
+
+ if ((strcmp(entry->d_name, ".") == 0) || (strcmp(entry->d_name, "..") == 0))
+ return 0;
+ else
+ return 1;
+}
+
+/*
+ * find_current_htime:
+ * It finds the latest htime file and sets the HTIME_CURRENT
+ * xattr.
+ * RETURN VALUE:
+ * -1 : Error
+ * ret: Number of directory entries;
+ */
+
+int
+find_current_htime(int ht_dir_fd, const char *ht_dir_path, char *ht_file_bname)
+{
+ struct dirent **namelist = NULL;
+ int ret = 0;
+ int cnt = 0;
+ int i = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(ht_dir_path);
+
+ cnt = scandir(ht_dir_path, &namelist, filter_cur_par_dirs, alphasort);
+ if (cnt < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_SCAN_DIR_FAILED,
+ NULL);
+ } else if (cnt > 0) {
+ if (snprintf(ht_file_bname, NAME_MAX, "%s",
+ namelist[cnt - 1]->d_name) >= NAME_MAX) {
+ ret = -1;
+ goto out;
+ }
+ if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname,
+ strlen(ht_file_bname), 0)) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSETXATTR_FAILED, "HTIME_CURRENT", NULL);
+ ret = -1;
+ goto out;
+ }
+
+ if (sys_fsync(ht_dir_fd) < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSYNC_OP_FAILED, NULL);
+ ret = -1;
+ goto out;
+ }
+ }
+
+out:
+ for (i = 0; i < cnt; i++)
+ free(namelist[i]);
+ free(namelist);
+
+ if (ret)
+ cnt = ret;
+
+ return cnt;
+}
+
+/* Returns 0 on successful open of htime file
+ * returns -1 on failure or error
+ */
+int
+htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts)
+{
+ int ht_file_fd = -1;
+ int ht_dir_fd = -1;
+ int ret = 0;
+ int cnt = 0;
+ char ht_dir_path[PATH_MAX] = {
+ 0,
+ };
+ char ht_file_path[PATH_MAX] = {
+ 0,
+ };
+ char ht_file_bname[NAME_MAX] = {
+ 0,
+ };
+ char x_value[NAME_MAX] = {
+ 0,
+ };
+ int flags = 0;
+ unsigned long min_ts = 0;
+ unsigned long max_ts = 0;
+ unsigned long total = 0;
+ unsigned long total1 = 0;
+ ssize_t size = 0;
+ struct stat stat_buf = {
+ 0,
+ };
+ unsigned long record_len = 0;
+ int32_t len = 0;
+
+ CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path);
+
+ /* Open htime directory to get HTIME_CURRENT */
+ ht_dir_fd = open(ht_dir_path, O_RDONLY);
+ if (ht_dir_fd == -1) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
+ "path=%s", ht_dir_path, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ size = sys_fgetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname,
+ sizeof(ht_file_bname));
+ if (size < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FGETXATTR_FAILED,
+ "name=HTIME_CURRENT", NULL);
+
+ /* If upgrade scenario, find the latest HTIME.TSTAMP file
+ * and use the same. If error, create a new HTIME.TSTAMP
+ * file.
+ */
+ cnt = find_current_htime(ht_dir_fd, ht_dir_path, ht_file_bname);
+ if (cnt <= 0) {
+ gf_smsg(this->name, GF_LOG_INFO, errno,
+ CHANGELOG_MSG_NO_HTIME_CURRENT, NULL);
+ sys_close(ht_dir_fd);
+ return htime_create(this, priv, ts);
+ }
+
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_HTIME_CURRENT_ERROR, NULL);
+ }
+
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_HTIME_CURRENT, "path=%s",
+ ht_file_bname, NULL);
+ len = snprintf(ht_file_path, PATH_MAX, "%s/%s", ht_dir_path, ht_file_bname);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Open in append mode as existing htime file is used */
+ flags |= (O_RDWR | O_SYNC | O_APPEND);
+ ht_file_fd = open(ht_file_path, flags,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (ht_file_fd < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
+ "path=%s", ht_file_path, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ /* save this htime_fd in priv->htime_fd */
+ priv->htime_fd = ht_file_fd;
+
+ ret = sys_fstat(ht_file_fd, &stat_buf);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_HTIME_STAT_ERROR,
+ "path=%s", ht_file_path, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ /* Initialize rollover-number in priv to current number */
+ size = sys_fgetxattr(ht_file_fd, HTIME_KEY, x_value, sizeof(x_value));
+ if (size < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FGETXATTR_FAILED,
+ "name=%s", HTIME_KEY, "path=%s", ht_file_path, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ sscanf(x_value, "%lu:%lu", &max_ts, &total);
+
+ /* 22 = 1(/) + 20(CHANGELOG.TIMESTAMP) + 1(\x00) */
+ record_len = strlen(priv->changelog_dir) + 22;
+ total1 = stat_buf.st_size / record_len;
+ if (total != total1) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO,
+ "xattr_total=%lu", total, "size_total=%lu", total1, NULL);
+ }
+
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_TOTAL_LOG_INFO, "min=%lu",
+ min_ts, "max=%lu", max_ts, "total_changelogs=%lu", total, NULL);
+
+ if (total < total1)
+ priv->rollover_count = total1 + 1;
+ else
+ priv->rollover_count = total + 1;
+
+out:
+ if (ht_dir_fd != -1)
+ sys_close(ht_dir_fd);
+ return ret;
+}
+
+/* Returns 0 on successful creation of htime file
+ * returns -1 on failure or error
+ */
+int
+htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts)
+{
+ int ht_file_fd = -1;
+ int ht_dir_fd = -1;
+ int ret = 0;
+ char ht_dir_path[PATH_MAX] = {
+ 0,
+ };
+ char ht_file_path[PATH_MAX] = {
+ 0,
+ };
+ char ht_file_bname[NAME_MAX + 1] = {
+ 0,
+ };
+ int flags = 0;
+ int32_t len = 0;
+
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_NEW_HTIME_FILE,
+ "name=%ld", ts, NULL);
+
+ CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, ht_dir_path);
+
+ /* get the htime file name in ht_file_path */
+ len = snprintf(ht_file_path, PATH_MAX, "%s/%s.%ld", ht_dir_path,
+ HTIME_FILE_NAME, ts);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ ret = -1;
+ goto out;
+ }
+
+ flags |= (O_CREAT | O_RDWR | O_SYNC);
+ ht_file_fd = open(ht_file_path, flags,
+ S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (ht_file_fd < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
+ "path=%s", ht_file_path, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ if (sys_fsetxattr(ht_file_fd, HTIME_KEY, HTIME_INITIAL_VALUE,
+ sizeof(HTIME_INITIAL_VALUE) - 1, 0)) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_XATTR_INIT_FAILED, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ ret = sys_fsync(ht_file_fd);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED,
+ NULL);
+ goto out;
+ }
+
+ /* save this htime_fd in priv->htime_fd */
+ priv->htime_fd = ht_file_fd;
+
+ ht_file_fd = -1;
+
+ /* Set xattr HTIME_CURRENT on htime directory to htime filename */
+ ht_dir_fd = open(ht_dir_path, O_RDONLY);
+ if (ht_dir_fd == -1) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
+ "path=%s", ht_dir_path, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ (void)snprintf(ht_file_bname, sizeof(ht_file_bname), "%s.%ld",
+ HTIME_FILE_NAME, ts);
+ if (sys_fsetxattr(ht_dir_fd, HTIME_CURRENT, ht_file_bname,
+ strlen(ht_file_bname), 0)) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSETXATTR_FAILED,
+ " HTIME_CURRENT", NULL);
+ ret = -1;
+ goto out;
+ }
+
+ ret = sys_fsync(ht_dir_fd);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_FSYNC_OP_FAILED,
+ NULL);
+ goto out;
+ }
+
+ /* initialize rollover-number in priv to 1 */
+ priv->rollover_count = 1;
+
+out:
+ if (ht_dir_fd != -1)
+ sys_close(ht_dir_fd);
+ if (ht_file_fd != -1)
+ sys_close(ht_file_fd);
+ return ret;
+}
+
+/* Description:
+ * Opens the snap changelog to log call path fops in it.
+ * This changelos name is "CHANGELOG.SNAP", stored in
+ * path ".glusterfs/changelogs/csnap".
+ * Returns:
+ * 0 : On success.
+ * -1 : On failure.
+ */
+int
+changelog_snap_open(xlator_t *this, changelog_priv_t *priv)
+{
+ int fd = -1;
+ int ret = 0;
+ int flags = 0;
+ char buffer[1024] = {
+ 0,
+ };
+ char c_snap_path[PATH_MAX] = {
+ 0,
+ };
+ char csnap_dir_path[PATH_MAX] = {
+ 0,
+ };
+ int32_t len = 0;
+
+ CHANGELOG_FILL_CSNAP_DIR(priv->changelog_dir, csnap_dir_path);
+
+ len = snprintf(c_snap_path, PATH_MAX, "%s/" CSNAP_FILE_NAME,
+ csnap_dir_path);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ ret = -1;
+ goto out;
+ }
+
+ flags |= (O_CREAT | O_RDWR | O_TRUNC);
+
+ fd = open(c_snap_path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (fd < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
+ "path=%s", c_snap_path, NULL);
+ ret = -1;
+ goto out;
+ }
+ priv->c_snap_fd = fd;
+
+ (void)snprintf(buffer, 1024, CHANGELOG_HEADER, CHANGELOG_VERSION_MAJOR,
+ CHANGELOG_VERSION_MINOR, priv->ce->encoder);
+ ret = changelog_snap_write_change(priv, buffer, strlen(buffer));
+ if (ret < 0) {
+ sys_close(priv->c_snap_fd);
+ priv->c_snap_fd = -1;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+/*
+ * Description:
+ * Starts logging fop details in CSNAP journal.
+ * Returns:
+ * 0 : On success.
+ * -1 : On Failure.
+ */
+int
+changelog_snap_logging_start(xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ ret = changelog_snap_open(this, priv);
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, "starting",
+ NULL);
+
+ return ret;
+}
+
+/*
+ * Description:
+ * Stops logging fop details in CSNAP journal.
+ * Returns:
+ * 0 : On success.
+ * -1 : On Failure.
+ */
+int
+changelog_snap_logging_stop(xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ sys_close(priv->c_snap_fd);
+ priv->c_snap_fd = -1;
+
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_SNAP_INFO, "Stopped",
+ NULL);
+
+ return ret;
+}
+
+int
+changelog_open_journal(xlator_t *this, changelog_priv_t *priv)
+{
+ int fd = 0;
+ int ret = -1;
+ int flags = 0;
+ char buffer[1024] = {
+ 0,
+ };
+ char changelog_path[PATH_MAX] = {
+ 0,
+ };
+
+ (void)snprintf(changelog_path, PATH_MAX, "%s/" CHANGELOG_FILE_NAME,
+ priv->changelog_dir);
+
+ flags |= (O_CREAT | O_RDWR);
+ if (priv->fsync_interval == 0)
+ flags |= O_SYNC;
+
+ fd = open(changelog_path, flags, S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH);
+ if (fd < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_OPEN_FAILED,
+ "path=%s", changelog_path, NULL);
+ goto out;
+ }
+
+ priv->changelog_fd = fd;
+
+ (void)snprintf(buffer, 1024, CHANGELOG_HEADER, CHANGELOG_VERSION_MAJOR,
+ CHANGELOG_VERSION_MINOR, priv->ce->encoder);
+ ret = changelog_write_change(priv, buffer, strlen(buffer));
+ if (ret) {
+ sys_close(priv->changelog_fd);
+ priv->changelog_fd = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts,
+ gf_boolean_t finale)
+{
+ int ret = -1;
+
+ ret = changelog_rollover_changelog(this, priv, ts);
+
+ if (!ret && !finale)
+ ret = changelog_open_journal(this, priv);
+
+ return ret;
+}
+
+/**
+ * return the length of entry
+ */
+size_t
+changelog_entry_length()
+{
+ return sizeof(changelog_log_data_t);
+}
+
+void
+changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last)
+{
+ cld->cld_type = CHANGELOG_TYPE_ROLLOVER;
+ cld->cld_roll_time = gf_time();
+ cld->cld_finale = is_last;
+}
+
+int
+changelog_snap_write_change(changelog_priv_t *priv, char *buffer, size_t len)
+{
+ return changelog_write(priv->c_snap_fd, buffer, len);
+}
+
+int
+changelog_write_change(changelog_priv_t *priv, char *buffer, size_t len)
+{
+ return changelog_write(priv->changelog_fd, buffer, len);
+}
+
+/*
+ * Descriptions:
+ * Writes fop details in ascii format to CSNAP.
+ * Issues:
+ * Not Encoding agnostic.
+ * Returns:
+ * 0 : On Success.
+ * -1 : On Failure.
+ */
+int
+changelog_snap_handle_ascii_change(xlator_t *this, changelog_log_data_t *cld)
+{
+ size_t off = 0;
+ size_t gfid_len = 0;
+ char *gfid_str = NULL;
+ char *buffer = NULL;
+ changelog_priv_t *priv = NULL;
+ int ret = 0;
+
+ if (this == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ priv = this->private;
+
+ if (priv == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ gfid_str = uuid_utoa(cld->cld_gfid);
+ gfid_len = strlen(gfid_str);
+
+ /* extra bytes for decorations */
+ buffer = alloca(gfid_len + cld->cld_ptr_len + 10);
+ CHANGELOG_STORE_ASCII(priv, buffer, off, gfid_str, gfid_len, cld);
+
+ CHANGELOG_FILL_BUFFER(buffer, off, "\0", 1);
+
+ ret = changelog_snap_write_change(priv, buffer, off);
+
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED,
+ "csnap", NULL);
+ }
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_WROTE_TO_CSNAP, NULL);
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+changelog_handle_change(xlator_t *this, changelog_priv_t *priv,
+ changelog_log_data_t *cld)
+{
+ int ret = 0;
+
+ if (CHANGELOG_TYPE_IS_ROLLOVER(cld->cld_type)) {
+ changelog_encode_change(priv);
+ ret = changelog_start_next_change(this, priv, cld->cld_roll_time,
+ cld->cld_finale);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_GET_TIME_OP_FAILED, NULL);
+ goto out;
+ }
+
+ /**
+ * case when there is reconfigure done (disabling changelog) and there
+ * are still fops that have updates in prgress.
+ */
+ if (priv->changelog_fd == -1)
+ return 0;
+
+ if (CHANGELOG_TYPE_IS_FSYNC(cld->cld_type)) {
+ ret = sys_fsync(priv->changelog_fd);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_FSYNC_OP_FAILED, NULL);
+ }
+ goto out;
+ }
+
+ ret = priv->ce->encode(this, cld);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_WRITE_FAILED,
+ "changelog", NULL);
+ }
+
+out:
+ return ret;
+}
+
+changelog_local_t *
+changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid,
+ int xtra_records, gf_boolean_t update_flag)
+{
+ changelog_local_t *local = NULL;
+ struct iobuf *iobuf = NULL;
+
+ /**
+ * We relax the presence of inode if @update_flag is true.
+ * The caller (implementation of the fop) needs to be careful to
+ * not blindly use local->inode.
+ */
+ if (!update_flag && !inode) {
+ gf_msg_callingfn(this->name, GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_INODE_NOT_FOUND,
+ "inode needed for version checking !!!");
+
+ goto out;
+ }
+
+ if (xtra_records) {
+ iobuf = iobuf_get2(this->ctx->iobuf_pool,
+ xtra_records * CHANGELOG_OPT_RECORD_LEN);
+ if (!iobuf)
+ goto out;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ CHANGELOG_IOBUF_UNREF(iobuf);
+ goto out;
+ }
+
+ local->update_no_check = update_flag;
+
+ gf_uuid_copy(local->cld.cld_gfid, gfid);
+
+ local->cld.cld_iobuf = iobuf;
+ local->cld.cld_xtra_records = 0; /* set by the caller */
+
+ if (inode)
+ local->inode = inode_ref(inode);
+
+out:
+ return local;
+}
+
+int
+changelog_forget(xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_addr = 0;
+ changelog_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_del(inode, this, &ctx_addr);
+ if (!ctx_addr)
+ return 0;
+
+ ctx = (changelog_inode_ctx_t *)(long)ctx_addr;
+ GF_FREE(ctx);
+
+ return 0;
+}
+
+int
+changelog_inject_single_event(xlator_t *this, changelog_priv_t *priv,
+ changelog_log_data_t *cld)
+{
+ return priv->cd.dispatchfn(this, priv, priv->cd.cd_data, cld, NULL);
+}
+
+/* Wait till all the black fops are drained */
+void
+changelog_drain_black_fops(xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ /* clean up framework of pthread_mutex is required here as
+ * 'reconfigure' terminates the changelog_rollover thread
+ * on graph change.
+ */
+ pthread_cleanup_push(changelog_cleanup_free_mutex,
+ &priv->dm.drain_black_mutex);
+ ret = pthread_mutex_lock(&priv->dm.drain_black_mutex);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR,
+ "error=%d", ret, NULL);
+ while (priv->dm.black_fop_cnt > 0) {
+ gf_msg_debug(this->name, 0, "Conditional wait on black fops: %ld",
+ priv->dm.black_fop_cnt);
+ priv->dm.drain_wait_black = _gf_true;
+ ret = pthread_cond_wait(&priv->dm.drain_black_cond,
+ &priv->dm.drain_black_mutex);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, "error=%d", ret,
+ NULL);
+ }
+ priv->dm.drain_wait_black = _gf_false;
+ ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR,
+ "error=%d", ret, NULL);
+ pthread_cleanup_pop(0);
+ gf_msg_debug(this->name, 0, "Woke up: Conditional wait on black fops");
+}
+
+/* Wait till all the white fops are drained */
+void
+changelog_drain_white_fops(xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ /* clean up framework of pthread_mutex is required here as
+ * 'reconfigure' terminates the changelog_rollover thread
+ * on graph change.
+ */
+ pthread_cleanup_push(changelog_cleanup_free_mutex,
+ &priv->dm.drain_white_mutex);
+ ret = pthread_mutex_lock(&priv->dm.drain_white_mutex);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR,
+ "error=%d", ret, NULL);
+ while (priv->dm.white_fop_cnt > 0) {
+ gf_msg_debug(this->name, 0, "Conditional wait on white fops : %ld",
+ priv->dm.white_fop_cnt);
+ priv->dm.drain_wait_white = _gf_true;
+ ret = pthread_cond_wait(&priv->dm.drain_white_cond,
+ &priv->dm.drain_white_mutex);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, "error=%d", ret,
+ NULL);
+ }
+ priv->dm.drain_wait_white = _gf_false;
+ ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_PTHREAD_ERROR,
+ "error=%d", ret, NULL);
+ pthread_cleanup_pop(0);
+ gf_msg_debug(this->name, 0, "Woke up: Conditional wait on white fops");
+}
+
+/**
+ * TODO: these threads have many thing in common (wake up after
+ * a certain time etc..). move them into separate routine.
+ */
+void *
+changelog_rollover(void *data)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ struct timespec tv = {
+ 0,
+ };
+ changelog_log_data_t cld = {
+ 0,
+ };
+ changelog_time_slice_t *slice = NULL;
+ changelog_priv_t *priv = data;
+
+ this = priv->cr.this;
+ slice = &priv->slice;
+
+ while (1) {
+ (void)pthread_testcancel();
+
+ tv.tv_sec = gf_time() + priv->rollover_time;
+ tv.tv_nsec = 0;
+ ret = 0; /* Reset ret to zero */
+
+ /* The race between actual rollover and explicit rollover is
+ * handled. If actual rollover is being done and the
+ * explicit rollover event comes, the event is not missed.
+ * Since explicit rollover sets 'cr.notify' to true, this
+ * thread doesn't wait on 'pthread_cond_timedwait'.
+ */
+ pthread_cleanup_push(changelog_cleanup_free_mutex, &priv->cr.lock);
+ pthread_mutex_lock(&priv->cr.lock);
+ {
+ while (ret == 0 && !priv->cr.notify)
+ ret = pthread_cond_timedwait(&priv->cr.cond, &priv->cr.lock,
+ &tv);
+ if (ret == 0)
+ priv->cr.notify = _gf_false;
+ }
+ pthread_mutex_unlock(&priv->cr.lock);
+ pthread_cleanup_pop(0);
+
+ if (ret == 0) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_BARRIER_INFO,
+ NULL);
+ priv->explicit_rollover = _gf_true;
+ } else if (ret && ret != ETIMEDOUT) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_SELECT_FAILED, NULL);
+ continue;
+ } else if (ret && ret == ETIMEDOUT) {
+ gf_msg_debug(this->name, 0, "Wokeup on timeout");
+ }
+
+ /* Reading curent_color without lock is fine here
+ * as it is only modified here and is next to reading.
+ */
+ if (priv->current_color == FOP_COLOR_BLACK) {
+ LOCK(&priv->lock);
+ priv->current_color = FOP_COLOR_WHITE;
+ UNLOCK(&priv->lock);
+ gf_msg_debug(this->name, 0,
+ "Black fops"
+ " to be drained:%ld",
+ priv->dm.black_fop_cnt);
+ changelog_drain_black_fops(this, priv);
+ } else {
+ LOCK(&priv->lock);
+ priv->current_color = FOP_COLOR_BLACK;
+ UNLOCK(&priv->lock);
+ gf_msg_debug(this->name, 0,
+ "White fops"
+ " to be drained:%ld",
+ priv->dm.white_fop_cnt);
+ changelog_drain_white_fops(this, priv);
+ }
+
+ /* Adding delay of 1 second only during explicit rollover:
+ *
+ * Changelog rollover can happen either due to actual
+ * or the explicit rollover during snapshot. Actual
+ * rollover is controlled by tuneable called 'rollover-time'.
+ * The minimum granularity for rollover-time is 1 second.
+ * Explicit rollover is asynchronous in nature and happens
+ * during snapshot.
+ *
+ * Basically, rollover renames the current CHANGELOG file
+ * to CHANGELOG.TIMESTAMP. Let's assume, at time 't1',
+ * actual and explicit rollover raced against each
+ * other and actual rollover won the race renaming the
+ * CHANGELOG file to CHANGELOG.t1 and opens a new
+ * CHANGELOG file. There is high chance that, an immediate
+ * explicit rollover at time 't1' can happen with in the same
+ * second to rename CHANGELOG file to CHANGELOG.t1 resulting in
+ * purging the earlier CHANGELOG.t1 file created by actual
+ * rollover. So adding a delay of 1 second guarantees unique
+ * CHANGELOG.TIMESTAMP during explicit rollover.
+ */
+ if (priv->explicit_rollover == _gf_true)
+ sleep(1);
+
+ changelog_fill_rollover_data(&cld, _gf_false);
+
+ _mask_cancellation();
+
+ LOCK(&priv->lock);
+ {
+ ret = changelog_inject_single_event(this, priv, &cld);
+ if (!ret)
+ SLICE_VERSION_UPDATE(slice);
+ }
+ UNLOCK(&priv->lock);
+
+ _unmask_cancellation();
+ }
+
+ return NULL;
+}
+
+void *
+changelog_fsync_thread(void *data)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ struct timeval tv = {
+ 0,
+ };
+ changelog_log_data_t cld = {
+ 0,
+ };
+ changelog_priv_t *priv = data;
+
+ this = priv->cf.this;
+ cld.cld_type = CHANGELOG_TYPE_FSYNC;
+
+ while (1) {
+ (void)pthread_testcancel();
+
+ tv.tv_sec = priv->fsync_interval;
+ tv.tv_usec = 0;
+
+ ret = select(0, NULL, NULL, NULL, &tv);
+ if (ret)
+ continue;
+
+ _mask_cancellation();
+
+ ret = changelog_inject_single_event(this, priv, &cld);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_INJECT_FSYNC_FAILED, NULL);
+
+ _unmask_cancellation();
+ }
+
+ return NULL;
+}
+
+/* macros for inode/changelog version checks */
+
+#define INODE_VERSION_UPDATE(priv, inode, iver, slice, type) \
+ do { \
+ LOCK(&inode->lock); \
+ { \
+ LOCK(&priv->lock); \
+ { \
+ *iver = slice->changelog_version[type]; \
+ } \
+ UNLOCK(&priv->lock); \
+ } \
+ UNLOCK(&inode->lock); \
+ } while (0)
+
+#define INODE_VERSION_EQUALS_SLICE(priv, ver, slice, type, upd) \
+ do { \
+ LOCK(&priv->lock); \
+ { \
+ upd = (ver == slice->changelog_version[type]) ? _gf_false \
+ : _gf_true; \
+ } \
+ UNLOCK(&priv->lock); \
+ } while (0)
+
+static int
+__changelog_inode_ctx_set(xlator_t *this, inode_t *inode,
+ changelog_inode_ctx_t *ctx)
+{
+ uint64_t ctx_addr = (uint64_t)(uintptr_t)ctx;
+ return __inode_ctx_set(inode, this, &ctx_addr);
+}
+
+/**
+ * one shot routine to get the address and the value of a inode version
+ * for a particular type.
+ */
+changelog_inode_ctx_t *
+__changelog_inode_ctx_get(xlator_t *this, inode_t *inode, unsigned long **iver,
+ unsigned long *version, changelog_log_type type)
+{
+ int ret = 0;
+ uint64_t ctx_addr = 0;
+ changelog_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get(inode, this, &ctx_addr);
+ if (ret < 0)
+ ctx_addr = 0;
+ if (ctx_addr != 0) {
+ ctx = (changelog_inode_ctx_t *)(long)ctx_addr;
+ goto out;
+ }
+
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_changelog_mt_inode_ctx_t);
+ if (!ctx)
+ goto out;
+
+ ret = __changelog_inode_ctx_set(this, inode, ctx);
+ if (ret) {
+ GF_FREE(ctx);
+ ctx = NULL;
+ }
+
+out:
+ if (ctx && iver && version) {
+ *iver = CHANGELOG_INODE_VERSION_TYPE(ctx, type);
+ *version = **iver;
+ }
+
+ return ctx;
+}
+
+static changelog_inode_ctx_t *
+changelog_inode_ctx_get(xlator_t *this, inode_t *inode, unsigned long **iver,
+ unsigned long *version, changelog_log_type type)
+{
+ changelog_inode_ctx_t *ctx = NULL;
+
+ LOCK(&inode->lock);
+ {
+ ctx = __changelog_inode_ctx_get(this, inode, iver, version, type);
+ }
+ UNLOCK(&inode->lock);
+
+ return ctx;
+}
+
+/**
+ * This is the main update routine. Locking has been made granular so as to
+ * maximize parallelism of fops - I'll try to explain it below using execution
+ * timelines.
+ *
+ * Basically, the contention is between multiple execution threads of this
+ * routine and the roll-over thread. So, instead of having a big lock, we hold
+ * granular locks: inode->lock and priv->lock. Now I'll explain what happens
+ * when there is an update and a roll-over at just about the same time.
+ * NOTE:
+ * - the dispatcher itself synchronizes updates via it's own lock
+ * - the slice version in incremented by the roll-over thread
+ *
+ * Case 1: When the rollover thread wins before the inode version can be
+ * compared with the slice version.
+ *
+ * [updater] | [rollover]
+ * |
+ * | <SLICE: 1, 1, 1>
+ * <changelog_update> |
+ * <changelog_inode_ctx_get> |
+ * <CTX: 1, 1, 1> |
+ * | <dispatch-rollover-event>
+ * | LOCK (&priv->lock)
+ * | <SLICE_VERSION_UPDATE>
+ * | <SLICE: 2, 2, 2>
+ * | UNLOCK (&priv->lock)
+ * |
+ * LOCK (&priv->lock) |
+ * <INODE_VERSION_EQUALS_SLICE> |
+ * I: 1 <-> S: 2 |
+ * update: true |
+ * UNLOCK (&priv->lock) |
+ * |
+ * <if update == true> |
+ * <dispath-update-event> |
+ * <INODE_VERSION_UPDATE> |
+ * LOCK (&inode->lock) |
+ * LOCK (&priv->lock) |
+ * <CTX: 2, 1, 1> |
+ * UNLOCK (&priv->lock) |
+ * UNLOCK (&inode->lock) |
+ *
+ * Therefore, the change gets recorded in the next change (no lost change). If
+ * the slice version was ahead of the inode version (say I:1, S: 2), then
+ * anyway the comparison would result in a update (I: 1, S: 3).
+ *
+ * If the rollover time is too less, then there is another contention when the
+ * updater tries to bring up inode version to the slice version (this is also
+ * the case when the roll-over thread wakes up during INODE_VERSION_UPDATE.
+ *
+ * <CTX: 1, 1, 1> | <SLICE: 2, 2, 2>
+ * |
+ * |
+ * <dispath-update-event> |
+ * <INODE_VERSION_UPDATE> |
+ * LOCK (&inode->lock) |
+ * LOCK (&priv->lock) |
+ * <CTX: 2, 1, 1> |
+ * UNLOCK (&priv->lock) |
+ * UNLOCK (&inode->lock) |
+ * | <dispatch-rollover-event>
+ * | LOCK (&priv->lock)
+ * | <SLICE_VERSION_UPDATE>
+ * | <SLICE: 3, 3, 3>
+ * | UNLOCK (&priv->lock)
+ *
+ *
+ * Case 2: When the fop thread wins
+ *
+ * [updater] | [rollover]
+ * |
+ * | <SLICE: 1, 1, 1>
+ * <changelog_update> |
+ * <changelog_inode_ctx_get> |
+ * <CTX: 0, 0, 0> |
+ * |
+ * LOCK (&priv->lock) |
+ * <INODE_VERSION_EQUALS_SLICE> |
+ * I: 0 <-> S: 1 |
+ * update: true |
+ * UNLOCK (&priv->lock) |
+ * | <dispatch-rollover-event>
+ * | LOCK (&priv->lock)
+ * | <SLICE_VERSION_UPDATE>
+ * | <SLICE: 2, 2, 2>
+ * | UNLOCK (&priv->lock)
+ * <if update == true> |
+ * <dispath-update-event> |
+ * <INODE_VERSION_UPDATE> |
+ * LOCK (&inode->lock) |
+ * LOCK (&priv->lock) |
+ * <CTX: 2, 0, 0> |
+ * UNLOCK (&priv->lock) |
+ * UNLOCK (&inode->lock) |
+ *
+ * Here again, if the inode version was equal to the slice version (I: 1, S: 1)
+ * then there is no need to record an update (as the equality of the two version
+ * signifies an update was recorded in the current time slice).
+ */
+void
+changelog_update(xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local, changelog_log_type type)
+{
+ int ret = 0;
+ unsigned long *iver = NULL;
+ unsigned long version = 0;
+ inode_t *inode = NULL;
+ changelog_time_slice_t *slice = NULL;
+ changelog_inode_ctx_t *ctx = NULL;
+ changelog_log_data_t *cld_0 = NULL;
+ changelog_log_data_t *cld_1 = NULL;
+ changelog_local_t *next_local = NULL;
+ gf_boolean_t need_upd = _gf_true;
+
+ slice = &priv->slice;
+
+ /**
+ * for fops that do not require inode version checking
+ */
+ if (local->update_no_check)
+ goto update;
+
+ inode = local->inode;
+
+ ctx = changelog_inode_ctx_get(this, inode, &iver, &version, type);
+ if (!ctx)
+ goto update;
+
+ INODE_VERSION_EQUALS_SLICE(priv, version, slice, type, need_upd);
+
+update:
+ if (need_upd) {
+ cld_0 = &local->cld;
+ cld_0->cld_type = type;
+
+ if ((next_local = local->prev_entry) != NULL) {
+ cld_1 = &next_local->cld;
+ cld_1->cld_type = type;
+ }
+
+ ret = priv->cd.dispatchfn(this, priv, priv->cd.cd_data, cld_0, cld_1);
+
+ /**
+ * update after the dispatcher has successfully done
+ * it's job.
+ */
+ if (!local->update_no_check && iver && !ret)
+ INODE_VERSION_UPDATE(priv, inode, iver, slice, type);
+ }
+
+ return;
+}
+
+/* Begin: Geo-rep snapshot dependency changes */
+
+/* changelog_color_fop_and_inc_cnt: Assign color and inc fop cnt.
+ *
+ * Assigning color and increment of corresponding fop count should happen
+ * in a lock (i.e., there should be no window between them). If it does not,
+ * we might miss draining those fops which are colored but not yet incremented
+ * the count. Let's assume black fops are draining. If the black fop count
+ * reaches zero, we say draining is completed but we miss black fops which are
+ * not incremented fop count but color is assigned black.
+ */
+
+void
+changelog_color_fop_and_inc_cnt(xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local)
+{
+ if (!priv || !local)
+ return;
+
+ LOCK(&priv->lock);
+ {
+ local->color = priv->current_color;
+ changelog_inc_fop_cnt(this, priv, local);
+ }
+ UNLOCK(&priv->lock);
+}
+
+/* Increments the respective fop counter based on the fop color */
+void
+changelog_inc_fop_cnt(xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local)
+{
+ int ret = 0;
+
+ if (local) {
+ if (local->color == FOP_COLOR_BLACK) {
+ ret = pthread_mutex_lock(&priv->dm.drain_black_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
+ {
+ priv->dm.black_fop_cnt++;
+ }
+ ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
+ } else {
+ ret = pthread_mutex_lock(&priv->dm.drain_white_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
+ {
+ priv->dm.white_fop_cnt++;
+ }
+ ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
+ }
+ }
+out:
+ return;
+}
+
+/* Decrements the respective fop counter based on the fop color */
+void
+changelog_dec_fop_cnt(xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local)
+{
+ int ret = 0;
+
+ if (local) {
+ if (local->color == FOP_COLOR_BLACK) {
+ ret = pthread_mutex_lock(&priv->dm.drain_black_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
+ {
+ priv->dm.black_fop_cnt--;
+ if (priv->dm.black_fop_cnt == 0 &&
+ priv->dm.drain_wait_black == _gf_true) {
+ ret = pthread_cond_signal(&priv->dm.drain_black_cond);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_2(
+ ret, out, priv->dm.drain_black_mutex);
+ gf_msg_debug(this->name, 0,
+ "Signalled "
+ "draining of black");
+ }
+ }
+ ret = pthread_mutex_unlock(&priv->dm.drain_black_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
+ } else {
+ ret = pthread_mutex_lock(&priv->dm.drain_white_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
+ {
+ priv->dm.white_fop_cnt--;
+ if (priv->dm.white_fop_cnt == 0 &&
+ priv->dm.drain_wait_white == _gf_true) {
+ ret = pthread_cond_signal(&priv->dm.drain_white_cond);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_2(
+ ret, out, priv->dm.drain_white_mutex);
+ gf_msg_debug(this->name, 0,
+ "Signalled "
+ "draining of white");
+ }
+ }
+ ret = pthread_mutex_unlock(&priv->dm.drain_white_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
+ }
+ }
+out:
+ return;
+}
+
+/* Write to a pipe setup between changelog main thread and changelog
+ * rollover thread to initiate explicit rollover of changelog journal.
+ */
+int
+changelog_barrier_notify(changelog_priv_t *priv, char *buf)
+{
+ int ret = 0;
+
+ pthread_mutex_lock(&priv->cr.lock);
+ {
+ ret = pthread_cond_signal(&priv->cr.cond);
+ priv->cr.notify = _gf_true;
+ }
+ pthread_mutex_unlock(&priv->cr.lock);
+ return ret;
+}
+
+/* Clean up flags set on barrier notification */
+void
+changelog_barrier_cleanup(xlator_t *this, changelog_priv_t *priv,
+ struct list_head *queue)
+{
+ int ret = 0;
+
+ LOCK(&priv->bflags.lock);
+ priv->bflags.barrier_ext = _gf_false;
+ UNLOCK(&priv->bflags.lock);
+
+ ret = pthread_mutex_lock(&priv->bn.bnotify_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
+ {
+ priv->bn.bnotify = _gf_false;
+ }
+ ret = pthread_mutex_unlock(&priv->bn.bnotify_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, out);
+
+ /* Disable changelog barrier and dequeue fops */
+ LOCK(&priv->lock);
+ {
+ if (priv->barrier_enabled == _gf_true)
+ __chlog_barrier_disable(this, queue);
+ else
+ ret = -1;
+ }
+ UNLOCK(&priv->lock);
+ if (ret == 0)
+ chlog_barrier_dequeue_all(this, queue);
+
+out:
+ return;
+}
+/* End: Geo-Rep snapshot dependency changes */
+
+int32_t
+changelog_fill_entry_buf(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ changelog_local_t **local)
+{
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+ char *dup_path = NULL;
+ char *bname = NULL;
+ inode_t *parent = NULL;
+
+ GF_ASSERT(this);
+
+ parent = inode_parent(loc->inode, 0, 0);
+ if (!parent) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, CHANGELOG_MSG_INODE_NOT_FOUND,
+ "type=parent", "gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
+ goto err;
+ }
+
+ CHANGELOG_INIT_NOCHECK(this, *local, loc->inode, loc->inode->gfid, 5);
+ if (!(*local)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_LOCAL_INIT_FAILED,
+ NULL);
+ goto err;
+ }
+
+ co = changelog_get_usable_buffer(*local);
+ if (!co) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_GET_BUFFER_FAILED,
+ NULL);
+ goto err;
+ }
+
+ if (loc->inode->ia_type == IA_IFDIR) {
+ CHANGLOG_FILL_FOP_NUMBER(co, GF_FOP_MKDIR, fop_fn, xtra_len);
+ co++;
+ CHANGELOG_FILL_UINT32(co, S_IFDIR | 0755, number_fn, xtra_len);
+ co++;
+ } else {
+ CHANGLOG_FILL_FOP_NUMBER(co, GF_FOP_CREATE, fop_fn, xtra_len);
+ co++;
+ CHANGELOG_FILL_UINT32(co, S_IFREG | 0644, number_fn, xtra_len);
+ co++;
+ }
+
+ CHANGELOG_FILL_UINT32(co, frame->root->uid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32(co, frame->root->gid, number_fn, xtra_len);
+ co++;
+
+ dup_path = gf_strdup(loc->path);
+ bname = basename(dup_path);
+
+ CHANGELOG_FILL_ENTRY(co, parent->gfid, bname, entry_fn, entry_free_fn,
+ xtra_len, err);
+ changelog_set_usable_record_and_length(*local, xtra_len, 5);
+
+ if (dup_path)
+ GF_FREE(dup_path);
+ if (parent)
+ inode_unref(parent);
+ return 0;
+
+err:
+ if (dup_path)
+ GF_FREE(dup_path);
+ if (parent)
+ inode_unref(parent);
+ return -1;
+}
+
+/*
+ * resolve_pargfid_to_path:
+ * It converts given pargfid to path by doing recursive readlinks at the
+ * backend. If bname is given, it suffixes bname to pargfid to form the
+ * complete path else it doesn't. It allocates memory for the path and is
+ * caller's responsibility to free the same. If bname is NULL and pargfid
+ * is ROOT, then it returns "."
+ */
+
+int
+resolve_pargfid_to_path(xlator_t *this, const uuid_t pgfid, char **path,
+ char *bname)
+{
+ char *linkname = NULL;
+ char *dir_handle = NULL;
+ char *pgfidstr = NULL;
+ char *saveptr = NULL;
+ ssize_t len = 0;
+ int ret = 0;
+ uuid_t tmp_gfid = {
+ 0,
+ };
+ uuid_t pargfid = {
+ 0,
+ };
+ changelog_priv_t *priv = NULL;
+ char gpath[PATH_MAX] = {
+ 0,
+ };
+ char result[PATH_MAX] = {
+ 0,
+ };
+ char *dir_name = NULL;
+ char pre_dir_name[PATH_MAX] = {
+ 0,
+ };
+
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ gf_uuid_copy(pargfid, pgfid);
+ if (!path || gf_uuid_is_null(pargfid)) {
+ ret = -1;
+ goto out;
+ }
+
+ if (__is_root_gfid(pargfid)) {
+ if (bname)
+ *path = gf_strdup(bname);
+ else
+ *path = gf_strdup(".");
+ return ret;
+ }
+
+ dir_handle = alloca(PATH_MAX);
+ linkname = alloca(PATH_MAX);
+ (void)snprintf(gpath, PATH_MAX, "%s/.glusterfs/", priv->changelog_brick);
+
+ while (!(__is_root_gfid(pargfid))) {
+ len = snprintf(dir_handle, PATH_MAX, "%s/%02x/%02x/%s", gpath,
+ pargfid[0], pargfid[1], uuid_utoa(pargfid));
+ if ((len < 0) || (len >= PATH_MAX)) {
+ ret = -1;
+ goto out;
+ }
+
+ len = sys_readlink(dir_handle, linkname, PATH_MAX);
+ if (len < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_READLINK_OP_FAILED,
+ "could not read the "
+ "link from the gfid handle",
+ "handle=%s", dir_handle, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ linkname[len] = '\0';
+
+ pgfidstr = strtok_r(linkname + strlen("../../00/00/"), "/", &saveptr);
+ dir_name = strtok_r(NULL, "/", &saveptr);
+
+ len = snprintf(result, PATH_MAX, "%s/%s", dir_name, pre_dir_name);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ ret = -1;
+ goto out;
+ }
+ if (snprintf(pre_dir_name, len + 1, "%s", result) >= len + 1) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_parse(pgfidstr, tmp_gfid);
+ gf_uuid_copy(pargfid, tmp_gfid);
+ }
+
+ if (bname)
+ strncat(result, bname, strlen(bname) + 1);
+
+ *path = gf_strdup(result);
+
+out:
+ return ret;
+}
diff --git a/xlators/features/changelog/src/changelog-helpers.h b/xlators/features/changelog/src/changelog-helpers.h
new file mode 100644
index 00000000000..38fa7590c32
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-helpers.h
@@ -0,0 +1,716 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_HELPERS_H
+#define _CHANGELOG_HELPERS_H
+
+#include <glusterfs/locking.h>
+#include <glusterfs/timer.h>
+#include "pthread.h"
+#include <glusterfs/iobuf.h>
+#include <glusterfs/rot-buffs.h>
+
+#include "changelog-misc.h"
+#include <glusterfs/call-stub.h>
+
+#include "rpcsvc.h"
+#include "changelog-ev-handle.h"
+
+#include "changelog.h"
+#include "changelog-messages.h"
+
+/**
+ * the changelog entry
+ */
+typedef struct changelog_log_data {
+ /* rollover related */
+ time_t cld_roll_time;
+
+ /* reopen changelog? */
+ gf_boolean_t cld_finale;
+
+ changelog_log_type cld_type;
+
+ /**
+ * sincd gfid is _always_ a necessity, it's not a part
+ * of the iobuf. by doing this we do not add any overhead
+ * for data and metadata related fops.
+ */
+ uuid_t cld_gfid;
+
+ /**
+ * iobufs are used for optionals records: pargfid, path,
+ * write offsets etc.. It's the fop implementers job
+ * to allocate (iobuf_get() in the fop) and get unref'ed
+ * in the callback (CHANGELOG_STACK_UNWIND).
+ */
+ struct iobuf *cld_iobuf;
+
+#define cld_ptr cld_iobuf->ptr
+
+ /**
+ * after allocation you can point this to the length of
+ * usable data, but make sure it does not exceed the
+ * the size of the requested iobuf.
+ */
+ size_t cld_iobuf_len;
+
+#define cld_ptr_len cld_iobuf_len
+
+ /**
+ * number of optional records
+ */
+ int cld_xtra_records;
+} changelog_log_data_t;
+
+/**
+ * holder for dispatch function and private data
+ */
+
+typedef struct changelog_priv changelog_priv_t;
+
+typedef struct changelog_dispatcher {
+ void *cd_data;
+ int (*dispatchfn)(xlator_t *, changelog_priv_t *, void *,
+ changelog_log_data_t *, changelog_log_data_t *);
+} changelog_dispatcher_t;
+
+struct changelog_bootstrap {
+ changelog_mode_t mode;
+ int (*ctor)(xlator_t *, changelog_dispatcher_t *);
+ int (*dtor)(xlator_t *, changelog_dispatcher_t *);
+};
+
+struct changelog_encoder {
+ changelog_encoder_t encoder;
+ int (*encode)(xlator_t *, changelog_log_data_t *);
+};
+
+/* xlator private */
+
+typedef struct changelog_time_slice {
+ /**
+ * version of changelog file, incremented each time changes
+ * rollover.
+ */
+ unsigned long changelog_version[CHANGELOG_MAX_TYPE];
+} changelog_time_slice_t;
+
+typedef struct changelog_rollover {
+ /* rollover thread */
+ pthread_t rollover_th;
+
+ xlator_t *this;
+
+ pthread_mutex_t lock;
+ pthread_cond_t cond;
+ gf_boolean_t notify;
+} changelog_rollover_t;
+
+typedef struct changelog_fsync {
+ /* fsync() thread */
+ pthread_t fsync_th;
+
+ xlator_t *this;
+} changelog_fsync_t;
+
+/* Draining during changelog rollover (for geo-rep snapshot dependency):
+ * --------------------------------------------------------------------
+ * The introduction of draining of in-transit fops during changelog rollover
+ * (both explicit/timeout triggered) requires coloring of fops. Basically the
+ * implementation requires two counters, one counter which keeps the count of
+ * current intransit fops which should end up in current changelog and the other
+ * counter to keep track of incoming fops which should be drained as part of
+ * next changelog rollover event. The fops are colored w.r.t these counters.
+ * The fops that are to be drained as part of current changelog rollover is
+ * given one color and the fops which keep incoming during this and not
+ * necessarily should end up in current changelog and should be drained as part
+ * of next changelog rollover are given other color. The color switching
+ * continues with each changelog rollover. Two colors(black and white) are
+ * chosen here and initially black is chosen is default.
+ */
+
+typedef enum chlog_fop_color {
+ FOP_COLOR_BLACK,
+ FOP_COLOR_WHITE
+} chlog_fop_color_t;
+
+/* Barrier notify variable */
+typedef struct barrier_notify {
+ pthread_mutex_t bnotify_mutex;
+ pthread_cond_t bnotify_cond;
+ gf_boolean_t bnotify;
+ gf_boolean_t bnotify_error;
+} barrier_notify_t;
+
+/* Two separate mutex and conditional variable set is used
+ * to drain white and black fops. */
+
+typedef struct drain_mgmt {
+ pthread_mutex_t drain_black_mutex;
+ pthread_cond_t drain_black_cond;
+ pthread_mutex_t drain_white_mutex;
+ pthread_cond_t drain_white_cond;
+ /* Represents black fops count in-transit */
+ unsigned long black_fop_cnt;
+ /* Represents white fops count in-transit */
+ unsigned long white_fop_cnt;
+ gf_boolean_t drain_wait_black;
+ gf_boolean_t drain_wait_white;
+} drain_mgmt_t;
+
+/* External barrier as a result of snap on/off indicating flag*/
+typedef struct barrier_flags {
+ gf_lock_t lock;
+ gf_boolean_t barrier_ext;
+} barrier_flags_t;
+
+/* Event selection */
+typedef struct changelog_ev_selector {
+ gf_lock_t reflock;
+
+ /**
+ * Array of references for each selection bit.
+ */
+ unsigned int ref[CHANGELOG_EV_SELECTION_RANGE];
+} changelog_ev_selector_t;
+
+/* changelog's private structure */
+struct changelog_priv {
+ /* changelog journalling */
+ gf_boolean_t active;
+
+ /* changelog live notifications */
+ gf_boolean_t rpc_active;
+
+ /* to generate unique socket file per brick */
+ char *changelog_brick;
+
+ /* logging directory */
+ char *changelog_dir;
+
+ /* htime directory */
+ char *htime_dir;
+
+ /* one file for all changelog types */
+ int changelog_fd;
+
+ /* htime fd for current changelog session */
+ int htime_fd;
+
+ /* c_snap_fd is fd for call-path changelog */
+ int c_snap_fd;
+
+ /* rollover_count used by htime */
+ int rollover_count;
+
+ gf_lock_t lock;
+
+ /* lock to synchronize CSNAP updation */
+ gf_lock_t c_snap_lock;
+
+ /* written end of the pipe */
+ int wfd;
+
+ /* rollover time */
+ int32_t rollover_time;
+
+ /* fsync() interval */
+ int32_t fsync_interval;
+
+ /* changelog type maps */
+ const char *maps[CHANGELOG_MAX_TYPE];
+
+ /* time slicer */
+ changelog_time_slice_t slice;
+
+ /* context of the updater */
+ changelog_dispatcher_t cd;
+
+ /* context of the rollover thread */
+ changelog_rollover_t cr;
+
+ /* context of fsync thread */
+ changelog_fsync_t cf;
+
+ /* operation mode */
+ changelog_mode_t op_mode;
+
+ /* bootstrap routine for 'current' logger */
+ struct changelog_bootstrap *cb;
+
+ /* encoder mode */
+ changelog_encoder_t encode_mode;
+
+ /* encoder */
+ struct changelog_encoder *ce;
+
+ /**
+ * snapshot dependency changes
+ */
+
+ /* Draining of fops*/
+ drain_mgmt_t dm;
+
+ /* Represents the active color. Initially by default black */
+ chlog_fop_color_t current_color;
+
+ /* flag to determine explicit rollover is triggered */
+ gf_boolean_t explicit_rollover;
+
+ /* barrier notification variable protected by mutex */
+ barrier_notify_t bn;
+
+ /* barrier on/off indicating flags */
+ barrier_flags_t bflags;
+
+ /* changelog barrier on/off indicating flag */
+ gf_boolean_t barrier_enabled;
+ struct list_head queue;
+ uint32_t queue_size;
+ gf_timer_t *timer;
+ struct timespec timeout;
+
+ /**
+ * buffers, RPC, event selection, notifications and other
+ * beasts.
+ */
+
+ /* epoll pthread */
+ pthread_t poller;
+
+ /* rotational buffer */
+ rbuf_t *rbuf;
+
+ /* changelog RPC server */
+ rpcsvc_t *rpc;
+
+ /* event selection */
+ changelog_ev_selector_t ev_selection;
+
+ /* client handling (reverse connection) */
+ pthread_t connector;
+
+ int nr_dispatchers;
+ pthread_t *ev_dispatcher;
+
+ changelog_clnt_t connections;
+
+ /* glusterfind dependency to capture paths on deleted entries*/
+ gf_boolean_t capture_del_path;
+
+ /* Save total no. of listners */
+ gf_atomic_t listnercnt;
+
+ /* Save total no. of xprt are associated with listner */
+ gf_atomic_t xprtcnt;
+
+ /* Save xprt list */
+ struct list_head xprt_list;
+
+ /* Save total no. of client connection */
+ gf_atomic_t clntcnt;
+
+ /* Save cleanup brick in victim */
+ xlator_t *victim;
+
+ /* Status to save cleanup notify status */
+ gf_boolean_t notify_down;
+};
+
+struct changelog_local {
+ inode_t *inode;
+ gf_boolean_t update_no_check;
+
+ changelog_log_data_t cld;
+
+ /**
+ * ->prev_entry is used in cases when there needs to be
+ * additional changelog entry for the parent (eg. rename)
+ * It's analogous to ->next in single linked list world,
+ * but we call it as ->prev_entry... ha ha ha
+ */
+ struct changelog_local *prev_entry;
+
+ /* snap dependency changes */
+ chlog_fop_color_t color;
+};
+
+typedef struct changelog_local changelog_local_t;
+
+/* inode version is stored in inode ctx */
+typedef struct changelog_inode_ctx {
+ unsigned long iversion[CHANGELOG_MAX_TYPE];
+} changelog_inode_ctx_t;
+
+#define CHANGELOG_INODE_VERSION_TYPE(ctx, type) &(ctx->iversion[type])
+
+/**
+ * Optional Records:
+ * fops that need to save additional information request a array of
+ * @changelog_opt_t struct. The array is allocated via @iobufs.
+ */
+typedef enum {
+ CHANGELOG_OPT_REC_FOP,
+ CHANGELOG_OPT_REC_ENTRY,
+ CHANGELOG_OPT_REC_UINT32,
+} changelog_optional_rec_type_t;
+
+struct changelog_entry_fields {
+ uuid_t cef_uuid;
+ char *cef_bname;
+ char *cef_path;
+};
+
+typedef struct {
+ /**
+ * @co_covert can be used to do post-processing of the record before
+ * it's persisted to the CHANGELOG. If this is NULL, then the record
+ * is persisted as per it's in memory format.
+ */
+ size_t (*co_convert)(void *data, char *buffer, gf_boolean_t encode);
+
+ /* release routines */
+ void (*co_free)(void *data);
+
+ /* type of the field */
+ changelog_optional_rec_type_t co_type;
+
+ /**
+ * sizeof of the 'valid' field in the union. This field is not used if
+ * @co_convert is specified.
+ */
+ size_t co_len;
+
+ union {
+ unsigned int co_uint32;
+ glusterfs_fop_t co_fop;
+ struct changelog_entry_fields co_entry;
+ };
+} changelog_opt_t;
+
+#define CHANGELOG_OPT_RECORD_LEN sizeof(changelog_opt_t)
+
+/**
+ * helpers routines
+ */
+
+int
+changelog_thread_cleanup(xlator_t *this, pthread_t thr_id);
+
+void *
+changelog_get_usable_buffer(changelog_local_t *local);
+
+void
+changelog_set_usable_record_and_length(changelog_local_t *local, size_t len,
+ int xr);
+void
+changelog_local_cleanup(xlator_t *xl, changelog_local_t *local);
+changelog_local_t *
+changelog_local_init(xlator_t *this, inode_t *inode, uuid_t gfid,
+ int xtra_records, gf_boolean_t update_flag);
+int
+changelog_start_next_change(xlator_t *this, changelog_priv_t *priv, time_t ts,
+ gf_boolean_t finale);
+int
+changelog_open_journal(xlator_t *this, changelog_priv_t *priv);
+void
+changelog_fill_rollover_data(changelog_log_data_t *cld, gf_boolean_t is_last);
+int
+changelog_inject_single_event(xlator_t *this, changelog_priv_t *priv,
+ changelog_log_data_t *cld);
+size_t
+changelog_entry_length();
+int
+changelog_write(int fd, char *buffer, size_t len);
+int
+changelog_write_change(changelog_priv_t *priv, char *buffer, size_t len);
+int
+changelog_handle_change(xlator_t *this, changelog_priv_t *priv,
+ changelog_log_data_t *cld);
+void
+changelog_update(xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local, changelog_log_type type);
+void *
+changelog_rollover(void *data);
+void *
+changelog_fsync_thread(void *data);
+int
+changelog_forget(xlator_t *this, inode_t *inode);
+int
+htime_update(xlator_t *this, changelog_priv_t *priv, time_t ts, char *buffer);
+int
+htime_open(xlator_t *this, changelog_priv_t *priv, time_t ts);
+int
+htime_create(xlator_t *this, changelog_priv_t *priv, time_t ts);
+
+/* Geo-Rep snapshot dependency changes */
+void
+changelog_color_fop_and_inc_cnt(xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local);
+void
+changelog_inc_fop_cnt(xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local);
+void
+changelog_dec_fop_cnt(xlator_t *this, changelog_priv_t *priv,
+ changelog_local_t *local);
+int
+changelog_barrier_notify(changelog_priv_t *priv, char *buf);
+void
+changelog_barrier_cleanup(xlator_t *this, changelog_priv_t *priv,
+ struct list_head *queue);
+void
+changelog_drain_white_fops(xlator_t *this, changelog_priv_t *priv);
+void
+changelog_drain_black_fops(xlator_t *this, changelog_priv_t *priv);
+
+/* Crash consistency of changelog wrt snapshot */
+int
+changelog_snap_logging_stop(xlator_t *this, changelog_priv_t *priv);
+int
+changelog_snap_logging_start(xlator_t *this, changelog_priv_t *priv);
+int
+changelog_snap_open(xlator_t *this, changelog_priv_t *priv);
+int
+changelog_snap_handle_ascii_change(xlator_t *this, changelog_log_data_t *cld);
+int
+changelog_snap_write_change(changelog_priv_t *priv, char *buffer, size_t len);
+
+/* Changelog barrier routines */
+void
+__chlog_barrier_enqueue(xlator_t *this, call_stub_t *stub);
+void
+__chlog_barrier_disable(xlator_t *this, struct list_head *queue);
+void
+chlog_barrier_dequeue_all(xlator_t *this, struct list_head *queue);
+call_stub_t *
+__chlog_barrier_dequeue(xlator_t *this, struct list_head *queue);
+int
+__chlog_barrier_enable(xlator_t *this, changelog_priv_t *priv);
+
+int32_t
+changelog_fill_entry_buf(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ changelog_local_t **local);
+
+/* event selection routines */
+void
+changelog_select_event(xlator_t *, changelog_ev_selector_t *, unsigned int);
+void
+changelog_deselect_event(xlator_t *, changelog_ev_selector_t *, unsigned int);
+int
+changelog_init_event_selection(xlator_t *, changelog_ev_selector_t *);
+int
+changelog_ev_selected(xlator_t *, changelog_ev_selector_t *, unsigned int);
+void
+changelog_dispatch_event(xlator_t *, changelog_priv_t *, changelog_event_t *);
+
+changelog_inode_ctx_t *
+__changelog_inode_ctx_get(xlator_t *, inode_t *, unsigned long **,
+ unsigned long *, changelog_log_type);
+int
+resolve_pargfid_to_path(xlator_t *this, const uuid_t gfid, char **path,
+ char *bname);
+
+/* macros */
+
+#define CHANGELOG_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ changelog_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ __xl = frame->this; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ if (__local && __local->prev_entry) \
+ changelog_local_cleanup(__xl, __local->prev_entry); \
+ changelog_local_cleanup(__xl, __local); \
+ } while (0)
+
+#define CHANGELOG_IOBUF_REF(iobuf) \
+ do { \
+ if (iobuf) \
+ iobuf_ref(iobuf); \
+ } while (0)
+
+#define CHANGELOG_IOBUF_UNREF(iobuf) \
+ do { \
+ if (iobuf) \
+ iobuf_unref(iobuf); \
+ } while (0)
+
+#define CHANGELOG_FILL_BUFFER(buffer, off, val, len) \
+ do { \
+ memcpy(buffer + off, val, len); \
+ off += len; \
+ } while (0)
+
+#define SLICE_VERSION_UPDATE(slice) \
+ do { \
+ int i = 0; \
+ for (; i < CHANGELOG_MAX_TYPE; i++) { \
+ slice->changelog_version[i]++; \
+ } \
+ } while (0)
+
+#define CHANGELOG_FILL_UINT32(co, number, converter, xlen) \
+ do { \
+ co->co_convert = converter; \
+ co->co_free = NULL; \
+ co->co_type = CHANGELOG_OPT_REC_UINT32; \
+ co->co_uint32 = number; \
+ xlen += sizeof(unsigned int); \
+ } while (0)
+
+#define CHANGLOG_FILL_FOP_NUMBER(co, fop, converter, xlen) \
+ do { \
+ co->co_convert = converter; \
+ co->co_free = NULL; \
+ co->co_type = CHANGELOG_OPT_REC_FOP; \
+ co->co_fop = fop; \
+ xlen += sizeof(fop); \
+ } while (0)
+
+#define CHANGELOG_FILL_ENTRY(co, pargfid, bname, converter, freefn, xlen, \
+ label) \
+ do { \
+ co->co_convert = converter; \
+ co->co_free = freefn; \
+ co->co_type = CHANGELOG_OPT_REC_ENTRY; \
+ gf_uuid_copy(co->co_entry.cef_uuid, pargfid); \
+ co->co_entry.cef_bname = gf_strdup(bname); \
+ if (!co->co_entry.cef_bname) \
+ goto label; \
+ xlen += (UUID_CANONICAL_FORM_LEN + strlen(bname)); \
+ } while (0)
+
+#define CHANGELOG_FILL_ENTRY_DIR_PATH(co, pargfid, bname, converter, \
+ del_freefn, xlen, label, capture_del) \
+ do { \
+ co->co_convert = converter; \
+ co->co_free = del_freefn; \
+ co->co_type = CHANGELOG_OPT_REC_ENTRY; \
+ gf_uuid_copy(co->co_entry.cef_uuid, pargfid); \
+ co->co_entry.cef_bname = gf_strdup(bname); \
+ if (!co->co_entry.cef_bname) \
+ goto label; \
+ xlen += (UUID_CANONICAL_FORM_LEN + strlen(bname)); \
+ if (!capture_del || \
+ resolve_pargfid_to_path(this, pargfid, &(co->co_entry.cef_path), \
+ co->co_entry.cef_bname)) { \
+ co->co_entry.cef_path = gf_strdup("\0"); \
+ xlen += 1; \
+ } else { \
+ xlen += (strlen(co->co_entry.cef_path)); \
+ } \
+ } while (0)
+
+#define CHANGELOG_INIT(this, local, inode, gfid, xrec) \
+ local = changelog_local_init(this, inode, gfid, xrec, _gf_false)
+
+#define CHANGELOG_INIT_NOCHECK(this, local, inode, gfid, xrec) \
+ local = changelog_local_init(this, inode, gfid, xrec, _gf_true)
+
+#define CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, label) \
+ do { \
+ if (!priv->active) \
+ goto label; \
+ /* ignore rebalance process's activity. */ \
+ if ((frame->root->pid == GF_CLIENT_PID_DEFRAG) || \
+ (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG)) \
+ goto label; \
+ } while (0)
+
+/* If it is a METADATA entry and fop num being GF_FOP_NULL, don't
+ * log in the changelog as it is of no use. And also if it is
+ * logged, since slicing version checking is done for metadata
+ * entries, the subsequent entries with valid fop num which falls
+ * to same changelog will be missed. Hence check for boundary
+ * condition.
+ */
+#define CHANGELOG_OP_BOUNDARY_CHECK(frame, label) \
+ do { \
+ if (frame->root->op <= GF_FOP_NULL || \
+ frame->root->op >= GF_FOP_MAXVALUE) \
+ goto label; \
+ } while (0)
+
+/**
+ * ignore internal fops for all clients except AFR self-heal daemon
+ */
+#define CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, dict, label) \
+ do { \
+ if ((frame->root->pid != GF_CLIENT_PID_SELF_HEALD) && dict && \
+ dict_get(dict, GLUSTERFS_INTERNAL_FOP_KEY)) \
+ goto label; \
+ } while (0)
+
+#define CHANGELOG_COND_GOTO(priv, cond, label) \
+ do { \
+ if (!priv->active || cond) \
+ goto label; \
+ } while (0)
+
+/* Begin: Geo-Rep snapshot dependency changes */
+
+#define DICT_ERROR -1
+#define BARRIER_OFF 0
+#define BARRIER_ON 1
+#define DICT_DEFAULT 2
+
+#define CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, label) \
+ do { \
+ if (!priv->active) { \
+ gf_smsg(this->name, GF_LOG_WARNING, 0, \
+ CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE, NULL); \
+ ret = 0; \
+ goto label; \
+ } \
+ } while (0)
+
+/* Log pthread error and goto label */
+#define CHANGELOG_PTHREAD_ERROR_HANDLE_0(ret, label) \
+ do { \
+ if (ret) { \
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \
+ "error=%d", ret, NULL); \
+ ret = -1; \
+ goto label; \
+ } \
+ } while (0);
+
+/* Log pthread error, set flag and goto label */
+#define CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret, label, flag) \
+ do { \
+ if (ret) { \
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \
+ "error=%d", ret, NULL); \
+ ret = -1; \
+ flag = _gf_true; \
+ goto label; \
+ } \
+ } while (0)
+
+/* Log pthread error, unlock mutex and goto label */
+#define CHANGELOG_PTHREAD_ERROR_HANDLE_2(ret, label, mutex) \
+ do { \
+ if (ret) { \
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_PTHREAD_ERROR, \
+ "error=%d", ret, NULL); \
+ ret = -1; \
+ pthread_mutex_unlock(&mutex); \
+ goto label; \
+ } \
+ } while (0)
+
+/* End: Geo-Rep snapshot dependency changes */
+
+#endif /* _CHANGELOG_HELPERS_H */
diff --git a/xlators/features/changelog/src/changelog-mem-types.h b/xlators/features/changelog/src/changelog-mem-types.h
new file mode 100644
index 00000000000..a2d8a9cbe93
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-mem-types.h
@@ -0,0 +1,34 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_MEM_TYPES_H
+#define _CHANGELOG_MEM_TYPES_H
+
+#include <glusterfs/mem-types.h>
+
+enum gf_changelog_mem_types {
+ gf_changelog_mt_priv_t = gf_common_mt_end + 1,
+ gf_changelog_mt_str_t = gf_common_mt_end + 2,
+ gf_changelog_mt_batch_t = gf_common_mt_end + 3,
+ gf_changelog_mt_rt_t = gf_common_mt_end + 4,
+ gf_changelog_mt_inode_ctx_t = gf_common_mt_end + 5,
+ gf_changelog_mt_rpc_clnt_t = gf_common_mt_end + 6,
+ gf_changelog_mt_libgfchangelog_t = gf_common_mt_end + 7,
+ gf_changelog_mt_libgfchangelog_entry_t = gf_common_mt_end + 8,
+ gf_changelog_mt_libgfchangelog_rl_t = gf_common_mt_end + 9,
+ gf_changelog_mt_changelog_buffer_t = gf_common_mt_end + 10,
+ gf_changelog_mt_history_data_t = gf_common_mt_end + 11,
+ gf_changelog_mt_libgfchangelog_call_pool_t = gf_common_mt_end + 12,
+ gf_changelog_mt_libgfchangelog_event_t = gf_common_mt_end + 13,
+ gf_changelog_mt_ev_dispatcher_t = gf_common_mt_end + 14,
+ gf_changelog_mt_end
+};
+
+#endif
diff --git a/xlators/features/changelog/src/changelog-messages.h b/xlators/features/changelog/src/changelog-messages.h
new file mode 100644
index 00000000000..cb0e16c85d8
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-messages.h
@@ -0,0 +1,172 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _CHANGELOG_MESSAGES_H_
+#define _CHANGELOG_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(
+ CHANGELOG, CHANGELOG_MSG_OPEN_FAILED, CHANGELOG_MSG_BARRIER_FOP_FAILED,
+ CHANGELOG_MSG_VOL_MISCONFIGURED, CHANGELOG_MSG_RENAME_ERROR,
+ CHANGELOG_MSG_READ_ERROR, CHANGELOG_MSG_HTIME_ERROR,
+ CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED,
+ CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, CHANGELOG_MSG_CHILD_MISCONFIGURED,
+ CHANGELOG_MSG_DIR_OPTIONS_NOT_SET, CHANGELOG_MSG_CLOSE_ERROR,
+ CHANGELOG_MSG_PIPE_CREATION_ERROR, CHANGELOG_MSG_DICT_GET_FAILED,
+ CHANGELOG_MSG_BARRIER_INFO, CHANGELOG_MSG_BARRIER_ERROR,
+ CHANGELOG_MSG_GET_TIME_OP_FAILED, CHANGELOG_MSG_WRITE_FAILED,
+ CHANGELOG_MSG_PTHREAD_ERROR, CHANGELOG_MSG_INODE_NOT_FOUND,
+ CHANGELOG_MSG_FSYNC_OP_FAILED, CHANGELOG_MSG_TOTAL_LOG_INFO,
+ CHANGELOG_MSG_SNAP_INFO, CHANGELOG_MSG_SELECT_FAILED,
+ CHANGELOG_MSG_FCNTL_FAILED, CHANGELOG_MSG_BNOTIFY_INFO,
+ CHANGELOG_MSG_ENTRY_BUF_INFO, CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE,
+ CHANGELOG_MSG_LOCAL_INIT_FAILED, CHANGELOG_MSG_NOTIFY_REGISTER_FAILED,
+ CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED, CHANGELOG_MSG_HANDLE_PROBE_ERROR,
+ CHANGELOG_MSG_SET_FD_CONTEXT, CHANGELOG_MSG_FREEUP_FAILED,
+ CHANGELOG_MSG_RECONFIGURE, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED,
+ CHANGELOG_MSG_RPC_BUILD_ERROR, CHANGELOG_MSG_RPC_CONNECT_ERROR,
+ CHANGELOG_MSG_RPC_START_ERROR, CHANGELOG_MSG_BUFFER_STARVATION_ERROR,
+ CHANGELOG_MSG_SCAN_DIR_FAILED, CHANGELOG_MSG_FSETXATTR_FAILED,
+ CHANGELOG_MSG_FGETXATTR_FAILED, CHANGELOG_MSG_CLEANUP_ON_ACTIVE_REF,
+ CHANGELOG_MSG_DISPATCH_EVENT_FAILED, CHANGELOG_MSG_PUT_BUFFER_FAILED,
+ CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED, CHANGELOG_MSG_PTHREAD_CANCEL_FAILED,
+ CHANGELOG_MSG_INJECT_FSYNC_FAILED, CHANGELOG_MSG_CREATE_FRAME_FAILED,
+ CHANGELOG_MSG_FSTAT_OP_FAILED, CHANGELOG_MSG_LSEEK_OP_FAILED,
+ CHANGELOG_MSG_STRSTR_OP_FAILED, CHANGELOG_MSG_UNLINK_OP_FAILED,
+ CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED,
+ CHANGELOG_MSG_READLINK_OP_FAILED, CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED,
+ CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED, CHANGELOG_MSG_MEMORY_INIT_FAILED,
+ CHANGELOG_MSG_NO_MEMORY, CHANGELOG_MSG_HTIME_STAT_ERROR,
+ CHANGELOG_MSG_HTIME_CURRENT_ERROR, CHANGELOG_MSG_BNOTIFY_COND_INFO,
+ CHANGELOG_MSG_NO_HTIME_CURRENT, CHANGELOG_MSG_HTIME_CURRENT,
+ CHANGELOG_MSG_NEW_HTIME_FILE, CHANGELOG_MSG_MKDIR_ERROR,
+ CHANGELOG_MSG_PATH_NOT_FOUND, CHANGELOG_MSG_XATTR_INIT_FAILED,
+ CHANGELOG_MSG_WROTE_TO_CSNAP, CHANGELOG_MSG_UNUSED_0,
+ CHANGELOG_MSG_GET_BUFFER_FAILED, CHANGELOG_MSG_BARRIER_STATE_NOTIFY,
+ CHANGELOG_MSG_BARRIER_DISABLED, CHANGELOG_MSG_BARRIER_ALREADY_DISABLED,
+ CHANGELOG_MSG_BARRIER_ON_ERROR, CHANGELOG_MSG_BARRIER_ENABLE,
+ CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, CHANGELOG_MSG_ERROR_IN_DICT_GET,
+ CHANGELOG_MSG_UNUSED_1, CHANGELOG_MSG_UNUSED_2,
+ CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS,
+ CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED,
+ CHANGELOG_MSG_BARRIER_TIMEOUT, CHANGELOG_MSG_TIMEOUT_ADD_FAILED,
+ CHANGELOG_MSG_CLEANUP_ALREADY_SET);
+
+#define CHANGELOG_MSG_BARRIER_FOP_FAILED_STR \
+ "failed to barrier FOPs, disabling changelog barrier"
+#define CHANGELOG_MSG_MEMORY_INIT_FAILED_STR "memory accounting init failed"
+#define CHANGELOG_MSG_NO_MEMORY_STR "failed to create local memory pool"
+#define CHANGELOG_MSG_ENTRY_BUF_INFO_STR \
+ "Entry cannot be captured for gfid, Capturing DATA entry."
+#define CHANGELOG_MSG_PTHREAD_ERROR_STR "pthread error"
+#define CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED_STR "pthread_mutex_init failed"
+#define CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED_STR "pthread_cond_init failed"
+#define CHANGELOG_MSG_HTIME_ERROR_STR "failed to update HTIME file"
+#define CHANGELOG_MSG_HTIME_STAT_ERROR_STR "unable to stat htime file"
+#define CHANGELOG_MSG_HTIME_CURRENT_ERROR_STR "Error extracting HTIME_CURRENT."
+#define CHANGELOG_MSG_UNLINK_OP_FAILED_STR "error unlinking empty changelog"
+#define CHANGELOG_MSG_RENAME_ERROR_STR "error renaming"
+#define CHANGELOG_MSG_MKDIR_ERROR_STR "unable to create directory"
+#define CHANGELOG_MSG_BNOTIFY_INFO_STR \
+ "Explicit rollover changelog signaling bnotify"
+#define CHANGELOG_MSG_BNOTIFY_COND_INFO_STR "Woke up: bnotify conditional wait"
+#define CHANGELOG_MSG_RECONFIGURE_STR "Reconfigure: Changelog Enable"
+#define CHANGELOG_MSG_NO_HTIME_CURRENT_STR \
+ "HTIME_CURRENT not found. Changelog enabled before init"
+#define CHANGELOG_MSG_HTIME_CURRENT_STR "HTIME_CURRENT"
+#define CHANGELOG_MSG_NEW_HTIME_FILE_STR \
+ "Changelog enable: Creating new HTIME file"
+#define CHANGELOG_MSG_FGETXATTR_FAILED_STR "fgetxattr failed"
+#define CHANGELOG_MSG_TOTAL_LOG_INFO_STR "changelog info"
+#define CHANGELOG_MSG_PTHREAD_COND_WAIT_FAILED_STR "pthread cond wait failed"
+#define CHANGELOG_MSG_INODE_NOT_FOUND_STR "inode not found"
+#define CHANGELOG_MSG_READLINK_OP_FAILED_STR \
+ "could not read the link from the gfid handle"
+#define CHANGELOG_MSG_OPEN_FAILED_STR "unable to open file"
+#define CHANGELOG_MSG_RPC_CONNECT_ERROR_STR "failed to connect back"
+#define CHANGELOG_MSG_BUFFER_STARVATION_ERROR_STR \
+ "Failed to get buffer for RPC dispatch"
+#define CHANGELOG_MSG_PTHREAD_CANCEL_FAILED_STR "could not cancel thread"
+#define CHANGELOG_MSG_FSTAT_OP_FAILED_STR "Could not stat (CHANGELOG)"
+#define CHANGELOG_MSG_LSEEK_OP_FAILED_STR "Could not lseek (changelog)"
+#define CHANGELOG_MSG_PATH_NOT_FOUND_STR \
+ "Could not find CHANGELOG in changelog path"
+#define CHANGELOG_MSG_FSYNC_OP_FAILED_STR "fsync failed"
+#define CHANGELOG_MSG_DETECT_EMPTY_CHANGELOG_FAILED_STR \
+ "Error detecting empty changelog"
+#define CHANGELOG_MSG_EXPLICIT_ROLLOVER_FAILED_STR \
+ "Fail snapshot because of previous errors"
+#define CHANGELOG_MSG_SCAN_DIR_FAILED_STR "scandir failed"
+#define CHANGELOG_MSG_FSETXATTR_FAILED_STR "fsetxattr failed"
+#define CHANGELOG_MSG_XATTR_INIT_FAILED_STR "Htime xattr initialization failed"
+#define CHANGELOG_MSG_SNAP_INFO_STR "log in call path"
+#define CHANGELOG_MSG_WRITE_FAILED_STR "error writing to disk"
+#define CHANGELOG_MSG_WROTE_TO_CSNAP_STR "Successfully wrote to csnap"
+#define CHANGELOG_MSG_GET_TIME_OP_FAILED_STR "Problem rolling over changelog(s)"
+#define CHANGELOG_MSG_BARRIER_INFO_STR "Explicit wakeup on barrier notify"
+#define CHANGELOG_MSG_SELECT_FAILED_STR "pthread_cond_timedwait failed"
+#define CHANGELOG_MSG_INJECT_FSYNC_FAILED_STR "failed to inject fsync event"
+#define CHANGELOG_MSG_LOCAL_INIT_FAILED_STR \
+ "changelog local initialization failed"
+#define CHANGELOG_MSG_GET_BUFFER_FAILED_STR "Failed to get buffer"
+#define CHANGELOG_MSG_SET_FD_CONTEXT_STR \
+ "could not set fd context(for release cbk)"
+#define CHANGELOG_MSG_DICT_GET_FAILED_STR "Barrier failed"
+#define CHANGELOG_MSG_BARRIER_STATE_NOTIFY_STR "Barrier notification"
+#define CHANGELOG_MSG_BARRIER_ERROR_STR \
+ "Received another barrier off notification while already off"
+#define CHANGELOG_MSG_BARRIER_DISABLED_STR "disabled changelog barrier"
+#define CHANGELOG_MSG_BARRIER_ALREADY_DISABLED_STR \
+ "Changelog barrier already disabled"
+#define CHANGELOG_MSG_BARRIER_ON_ERROR_STR \
+ "Received another barrier on notification when last one is not served yet"
+#define CHANGELOG_MSG_BARRIER_ENABLE_STR "Enabled changelog barrier"
+#define CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND_STR "barrier key not found"
+#define CHANGELOG_MSG_ERROR_IN_DICT_GET_STR \
+ "Something went wrong in dict_get_str_boolean"
+#define CHANGELOG_MSG_DIR_OPTIONS_NOT_SET_STR "changelog-dir option is not set"
+#define CHANGELOG_MSG_FREEUP_FAILED_STR "could not cleanup bootstrapper"
+#define CHANGELOG_MSG_CHILD_MISCONFIGURED_STR \
+ "translator needs a single subvolume"
+#define CHANGELOG_MSG_VOL_MISCONFIGURED_STR \
+ "dangling volume. please check volfile"
+#define CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_STR \
+ "Dequeuing all the changelog barriered fops"
+#define CHANGELOG_MSG_DEQUEUING_BARRIER_FOPS_FINISHED_STR \
+ "Dequeuing changelog barriered fops is finished"
+#define CHANGELOG_MSG_BARRIER_TIMEOUT_STR \
+ "Disabling changelog barrier because of the timeout"
+#define CHANGELOG_MSG_TIMEOUT_ADD_FAILED_STR \
+ "Couldn't add changelog barrier timeout event"
+#define CHANGELOG_MSG_RPC_BUILD_ERROR_STR "failed to build rpc options"
+#define CHANGELOG_MSG_NOTIFY_REGISTER_FAILED_STR "failed to register notify"
+#define CHANGELOG_MSG_RPC_START_ERROR_STR "failed to start rpc"
+#define CHANGELOG_MSG_CREATE_FRAME_FAILED_STR "failed to create frame"
+#define CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED_STR "failed to serialize reply"
+#define CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED_STR "cannot register program"
+#define CHANGELOG_MSG_CHANGELOG_NOT_ACTIVE_STR \
+ "Changelog is not active, return success"
+#define CHANGELOG_MSG_PUT_BUFFER_FAILED_STR \
+ "failed to put buffer after consumption"
+#define CHANGELOG_MSG_CLEANUP_ALREADY_SET_STR \
+ "cleanup_starting flag is already set for xl"
+#define CHANGELOG_MSG_HANDLE_PROBE_ERROR_STR "xdr decoding error"
+#endif /* !_CHANGELOG_MESSAGES_H_ */
diff --git a/xlators/features/changelog/src/changelog-misc.h b/xlators/features/changelog/src/changelog-misc.h
new file mode 100644
index 00000000000..e2addc09414
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-misc.h
@@ -0,0 +1,131 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_MISC_H
+#define _CHANGELOG_MISC_H
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/common-utils.h>
+
+#define CHANGELOG_MAX_TYPE 4
+#define CHANGELOG_FILE_NAME "CHANGELOG"
+#define HTIME_FILE_NAME "HTIME"
+#define CSNAP_FILE_NAME "CHANGELOG.SNAP"
+#define HTIME_KEY "trusted.glusterfs.htime"
+#define HTIME_CURRENT "trusted.glusterfs.current_htime"
+#define HTIME_INITIAL_VALUE "0:0"
+
+#define CHANGELOG_VERSION_MAJOR 1
+#define CHANGELOG_VERSION_MINOR 2
+
+#define CHANGELOG_UNIX_SOCK DEFAULT_VAR_RUN_DIRECTORY "/changelog-%s.sock"
+#define CHANGELOG_TMP_UNIX_SOCK DEFAULT_VAR_RUN_DIRECTORY "/.%s%lu.sock"
+
+/**
+ * header starts with the version and the format of the changelog.
+ * 'version' not much of a use now.
+ */
+#define CHANGELOG_HEADER \
+ "GlusterFS Changelog | version: v%d.%d | encoding : %d\n"
+
+#define CHANGELOG_MAKE_SOCKET_PATH(brick_path, sockpath, len) \
+ do { \
+ char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { \
+ 0, \
+ }; \
+ gf_xxh64_wrapper((unsigned char *)brick_path, strlen(brick_path), \
+ GF_XXHSUM64_DEFAULT_SEED, xxh64); \
+ (void)snprintf(sockpath, len, CHANGELOG_UNIX_SOCK, xxh64); \
+ } while (0)
+
+#define CHANGELOG_MAKE_TMP_SOCKET_PATH(brick_path, sockpath, len) \
+ do { \
+ unsigned long pid = 0; \
+ char xxh64[GF_XXH64_DIGEST_LENGTH * 2 + 1] = { \
+ 0, \
+ }; \
+ pid = (unsigned long)getpid(); \
+ gf_xxh64_wrapper((unsigned char *)brick_path, strlen(brick_path), \
+ GF_XXHSUM64_DEFAULT_SEED, xxh64); \
+ (void)snprintf(sockpath, len, CHANGELOG_TMP_UNIX_SOCK, xxh64, pid); \
+ } while (0)
+
+/**
+ * ... used by libgfchangelog.
+ */
+#define CHANGELOG_GET_HEADER_INFO(fd, buffer, len, enc, maj, min, elen) \
+ do { \
+ FILE *fp; \
+ int fd_dup; \
+ \
+ enc = -1; \
+ maj = -1; \
+ min = -1; \
+ fd_dup = dup(fd); \
+ \
+ if (fd_dup != -1) { \
+ fp = fdopen(fd_dup, "r"); \
+ if (fp) { \
+ if (fgets(buffer, len, fp)) { \
+ elen = strlen(buffer); \
+ sscanf(buffer, CHANGELOG_HEADER, &maj, &min, &enc); \
+ } \
+ fclose(fp); \
+ } else { \
+ sys_close(fd_dup); \
+ } \
+ } \
+ } while (0)
+
+#define CHANGELOG_FILL_HTIME_DIR(changelog_dir, path) \
+ do { \
+ snprintf(path, sizeof(path), "%s/htime", changelog_dir); \
+ } while (0)
+
+#define CHANGELOG_FILL_CSNAP_DIR(changelog_dir, path) \
+ do { \
+ snprintf(path, sizeof(path), "%s/csnap", changelog_dir); \
+ } while (0)
+/**
+ * everything after 'CHANGELOG_TYPE_METADATA_XATTR' are internal types
+ * (ie. none of the fops trigger this type of event), hence
+ * CHANGELOG_MAX_TYPE = 4
+ */
+typedef enum {
+ CHANGELOG_TYPE_DATA = 0,
+ CHANGELOG_TYPE_METADATA,
+ CHANGELOG_TYPE_ENTRY,
+ CHANGELOG_TYPE_METADATA_XATTR,
+ CHANGELOG_TYPE_ROLLOVER,
+ CHANGELOG_TYPE_FSYNC,
+} changelog_log_type;
+
+/* operation modes - RT for now */
+typedef enum {
+ CHANGELOG_MODE_RT = 0,
+} changelog_mode_t;
+
+/* encoder types */
+
+typedef enum {
+ CHANGELOG_ENCODE_MIN = 0,
+ CHANGELOG_ENCODE_BINARY,
+ CHANGELOG_ENCODE_ASCII,
+ CHANGELOG_ENCODE_MAX,
+} changelog_encoder_t;
+
+#define CHANGELOG_VALID_ENCODING(enc) \
+ (enc > CHANGELOG_ENCODE_MIN && enc < CHANGELOG_ENCODE_MAX)
+
+#define CHANGELOG_TYPE_IS_ENTRY(type) (type == CHANGELOG_TYPE_ENTRY)
+#define CHANGELOG_TYPE_IS_ROLLOVER(type) (type == CHANGELOG_TYPE_ROLLOVER)
+#define CHANGELOG_TYPE_IS_FSYNC(type) (type == CHANGELOG_TYPE_FSYNC)
+
+#endif /* _CHANGELOG_MISC_H */
diff --git a/xlators/features/changelog/src/changelog-rpc-common.c b/xlators/features/changelog/src/changelog-rpc-common.c
new file mode 100644
index 00000000000..125246a17e1
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rpc-common.c
@@ -0,0 +1,359 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "changelog-rpc-common.h"
+#include "changelog-messages.h"
+
+#include <glusterfs/syscall.h>
+/**
+*****************************************************
+ Client Interface
+*****************************************************
+*/
+
+/**
+ * Initialize and return an RPC client object for a given unix
+ * domain socket.
+ */
+
+void *
+changelog_rpc_poller(void *arg)
+{
+ xlator_t *this = arg;
+
+ (void)gf_event_dispatch(this->ctx->event_pool);
+ return NULL;
+}
+
+struct rpc_clnt *
+changelog_rpc_client_init(xlator_t *this, void *cbkdata, char *sockfile,
+ rpc_clnt_notify_t fn)
+{
+ int ret = 0;
+ struct rpc_clnt *rpc = NULL;
+ dict_t *options = NULL;
+
+ if (!cbkdata)
+ cbkdata = this;
+
+ options = dict_new();
+ if (!options)
+ goto error_return;
+
+ ret = rpc_transport_unix_options_build(options, sockfile, 0);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_BUILD_ERROR,
+ NULL);
+ goto dealloc_dict;
+ }
+
+ rpc = rpc_clnt_new(options, this, this->name, 16);
+ if (!rpc)
+ goto dealloc_dict;
+
+ ret = rpc_clnt_register_notify(rpc, fn, cbkdata);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, NULL);
+ goto dealloc_rpc_clnt;
+ }
+
+ ret = rpc_clnt_start(rpc);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR,
+ NULL);
+ goto dealloc_rpc_clnt;
+ }
+
+ dict_unref(options);
+ return rpc;
+
+dealloc_rpc_clnt:
+ rpc_clnt_unref(rpc);
+dealloc_dict:
+ dict_unref(options);
+error_return:
+ return NULL;
+}
+
+/**
+ * Generic RPC client routine to dispatch a request to an
+ * RPC server.
+ */
+int
+changelog_rpc_sumbit_req(struct rpc_clnt *rpc, void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog, int procnum,
+ struct iovec *payload, int payloadcnt,
+ struct iobref *iobref, xlator_t *this,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ int ret = 0;
+ int count = 0;
+ struct iovec iov = {
+ 0,
+ };
+ struct iobuf *iobuf = NULL;
+ char new_iobref = 0;
+ ssize_t xdr_size = 0;
+
+ GF_ASSERT(this);
+
+ if (req) {
+ xdr_size = xdr_sizeof(xdrproc, req);
+
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ };
+
+ if (!iobref) {
+ iobref = iobref_new();
+ if (!iobref) {
+ goto out;
+ }
+
+ new_iobref = 1;
+ }
+
+ iobref_add(iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_size(iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic(iov, req, xdrproc);
+ if (ret == -1) {
+ goto out;
+ }
+
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ ret = rpc_clnt_submit(rpc, prog, procnum, cbkfn, &iov, count, payload,
+ payloadcnt, iobref, frame, NULL, 0, NULL, 0, NULL);
+
+out:
+ if (new_iobref)
+ iobref_unref(iobref);
+ if (iobuf)
+ iobuf_unref(iobuf);
+ return ret;
+}
+
+/**
+ * Entry point to perform a remote procedure call
+ */
+int
+changelog_invoke_rpc(xlator_t *this, struct rpc_clnt *rpc,
+ rpc_clnt_prog_t *prog, int procidx, void *arg)
+{
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+
+ if (!this || !prog)
+ goto error_return;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CREATE_FRAME_FAILED,
+ NULL);
+ goto error_return;
+ }
+
+ proc = &prog->proctable[procidx];
+ if (proc->fn)
+ ret = proc->fn(frame, this, arg);
+
+ STACK_DESTROY(frame->root);
+ return ret;
+
+error_return:
+ return -1;
+}
+
+/**
+*****************************************************
+ Server Interface
+*****************************************************
+*/
+
+struct iobuf *
+__changelog_rpc_serialize_reply(rpcsvc_request_t *req, void *arg,
+ struct iovec *outmsg, xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ ssize_t retlen = 0;
+ ssize_t rsp_size = 0;
+
+ rsp_size = xdr_sizeof(xdrproc, arg);
+ iob = iobuf_get2(req->svc->ctx->iobuf_pool, rsp_size);
+ if (!iob)
+ goto error_return;
+
+ iobuf_to_iovec(iob, outmsg);
+
+ retlen = xdr_serialize_generic(*outmsg, arg, xdrproc);
+ if (retlen == -1)
+ goto unref_iob;
+
+ outmsg->iov_len = retlen;
+ return iob;
+
+unref_iob:
+ iobuf_unref(iob);
+error_return:
+ return NULL;
+}
+
+int
+changelog_rpc_sumbit_reply(rpcsvc_request_t *req, void *arg,
+ struct iovec *payload, int payloadcount,
+ struct iobref *iobref, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ struct iobuf *iob = NULL;
+ struct iovec iov = {
+ 0,
+ };
+ char new_iobref = 0;
+
+ if (!req)
+ goto return_ret;
+
+ if (!iobref) {
+ iobref = iobref_new();
+ if (!iobref)
+ goto return_ret;
+ new_iobref = 1;
+ }
+
+ iob = __changelog_rpc_serialize_reply(req, arg, &iov, xdrproc);
+ if (!iob)
+ gf_smsg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_SUBMIT_REPLY_FAILED,
+ NULL);
+ else
+ iobref_add(iobref, iob);
+
+ ret = rpcsvc_submit_generic(req, &iov, 1, payload, payloadcount, iobref);
+
+ if (new_iobref)
+ iobref_unref(iobref);
+ if (iob)
+ iobuf_unref(iob);
+return_ret:
+ return ret;
+}
+
+void
+changelog_rpc_server_destroy(xlator_t *this, rpcsvc_t *rpc, char *sockfile,
+ rpcsvc_notify_t fn, struct rpcsvc_program **progs)
+{
+ rpcsvc_listener_t *listener = NULL;
+ rpcsvc_listener_t *next = NULL;
+ struct rpcsvc_program *prog = NULL;
+ rpc_transport_t *trans = NULL;
+
+ if (!rpc)
+ return;
+
+ while (*progs) {
+ prog = *progs;
+ (void)rpcsvc_program_unregister(rpc, prog);
+ progs++;
+ }
+
+ list_for_each_entry_safe(listener, next, &rpc->listeners, list)
+ {
+ if (listener->trans) {
+ trans = listener->trans;
+ rpc_transport_disconnect(trans, _gf_false);
+ }
+ }
+
+ (void)rpcsvc_unregister_notify(rpc, fn, this);
+
+ /* TODO Avoid freeing rpc object in case of brick multiplex
+ after freeing rpc object svc->rpclock corrupted and it takes
+ more time to detach a brick
+ */
+ if (!this->cleanup_starting) {
+ if (rpc->rxpool) {
+ mem_pool_destroy(rpc->rxpool);
+ rpc->rxpool = NULL;
+ }
+ GF_FREE(rpc);
+ }
+}
+
+rpcsvc_t *
+changelog_rpc_server_init(xlator_t *this, char *sockfile, void *cbkdata,
+ rpcsvc_notify_t fn, struct rpcsvc_program **progs)
+{
+ int ret = 0;
+ rpcsvc_t *rpc = NULL;
+ dict_t *options = NULL;
+ struct rpcsvc_program *prog = NULL;
+
+ if (!cbkdata)
+ cbkdata = this;
+
+ options = dict_new();
+ if (!options)
+ return NULL;
+
+ ret = rpcsvc_transport_unix_options_build(options, sockfile);
+ if (ret)
+ goto dealloc_dict;
+
+ rpc = rpcsvc_init(this, this->ctx, options, 8);
+ if (rpc == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_RPC_START_ERROR,
+ NULL);
+ goto dealloc_dict;
+ }
+
+ ret = rpcsvc_register_notify(rpc, fn, cbkdata);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_NOTIFY_REGISTER_FAILED, NULL);
+ goto dealloc_rpc;
+ }
+
+ ret = rpcsvc_create_listeners(rpc, options, this->name);
+ if (ret != 1) {
+ gf_msg_debug(this->name, 0, "failed to create listeners");
+ goto dealloc_rpc;
+ }
+
+ while (*progs) {
+ prog = *progs;
+ ret = rpcsvc_program_register(rpc, prog, _gf_false);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_PROGRAM_NAME_REG_FAILED, "name%s",
+ prog->progname, "prognum=%d", prog->prognum, "pogver=%d",
+ prog->progver, NULL);
+ goto dealloc_rpc;
+ }
+
+ progs++;
+ }
+
+ dict_unref(options);
+ return rpc;
+
+dealloc_rpc:
+ GF_FREE(rpc);
+dealloc_dict:
+ dict_unref(options);
+ return NULL;
+}
diff --git a/xlators/features/changelog/src/changelog-rpc-common.h b/xlators/features/changelog/src/changelog-rpc-common.h
new file mode 100644
index 00000000000..4d9aa2c694b
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rpc-common.h
@@ -0,0 +1,85 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CHANGELOG_RPC_COMMON_H
+#define __CHANGELOG_RPC_COMMON_H
+
+#include "rpcsvc.h"
+#include "rpc-clnt.h"
+#include <glusterfs/gf-event.h>
+#include <glusterfs/call-stub.h>
+
+#include "changelog-xdr.h"
+#include "xdr-generic.h"
+
+#include "changelog.h"
+
+/**
+ * Let's keep this non-configurable for now.
+ */
+#define NR_ROTT_BUFFS 4
+#define NR_DISPATCHERS (NR_ROTT_BUFFS - 1)
+
+enum changelog_rpc_procnum {
+ CHANGELOG_RPC_PROC_NULL = 0,
+ CHANGELOG_RPC_PROBE_FILTER = 1,
+ CHANGELOG_RPC_PROC_MAX = 2,
+};
+
+#define CHANGELOG_RPC_PROGNUM 1885957735
+#define CHANGELOG_RPC_PROGVER 1
+
+/**
+ * reverse connection: data xfer path
+ */
+enum changelog_reverse_rpc_procnum {
+ CHANGELOG_REV_PROC_NULL = 0,
+ CHANGELOG_REV_PROC_EVENT = 1,
+ CHANGELOG_REV_PROC_MAX = 2,
+};
+
+#define CHANGELOG_REV_RPC_PROCNUM 1886350951
+#define CHANGELOG_REV_RPC_PROCVER 1
+
+typedef struct changelog_rpc {
+ rpcsvc_t *svc;
+ struct rpc_clnt *rpc;
+ char sock[UNIX_PATH_MAX]; /* tied to server */
+} changelog_rpc_t;
+
+/* event poller */
+void *
+changelog_rpc_poller(void *);
+
+/* CLIENT API */
+struct rpc_clnt *
+changelog_rpc_client_init(xlator_t *, void *, char *, rpc_clnt_notify_t);
+
+int
+changelog_rpc_sumbit_req(struct rpc_clnt *, void *, call_frame_t *,
+ rpc_clnt_prog_t *, int, struct iovec *, int,
+ struct iobref *, xlator_t *, fop_cbk_fn_t, xdrproc_t);
+
+int
+changelog_invoke_rpc(xlator_t *, struct rpc_clnt *, rpc_clnt_prog_t *, int,
+ void *);
+
+/* SERVER API */
+int
+changelog_rpc_sumbit_reply(rpcsvc_request_t *, void *, struct iovec *, int,
+ struct iobref *, xdrproc_t);
+rpcsvc_t *
+changelog_rpc_server_init(xlator_t *, char *, void *, rpcsvc_notify_t,
+ struct rpcsvc_program **);
+void
+changelog_rpc_server_destroy(xlator_t *, rpcsvc_t *, char *, rpcsvc_notify_t,
+ struct rpcsvc_program **);
+
+#endif
diff --git a/xlators/features/changelog/src/changelog-rpc.c b/xlators/features/changelog/src/changelog-rpc.c
new file mode 100644
index 00000000000..440b88091a6
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rpc.c
@@ -0,0 +1,440 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/syscall.h>
+#include "changelog-rpc.h"
+#include "changelog-mem-types.h"
+#include "changelog-ev-handle.h"
+
+static struct rpcsvc_program *changelog_programs[];
+
+static void
+changelog_cleanup_dispatchers(xlator_t *this, changelog_priv_t *priv, int count)
+{
+ for (count--; count >= 0; count--) {
+ (void)changelog_thread_cleanup(this, priv->ev_dispatcher[count]);
+ priv->ev_dispatcher[count] = 0;
+ }
+}
+
+int
+changelog_cleanup_rpc_threads(xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+ changelog_clnt_t *conn = NULL;
+
+ conn = &priv->connections;
+ if (!conn)
+ return 0;
+
+ /** terminate RPC thread(s) */
+ ret = changelog_thread_cleanup(this, priv->connector);
+ if (ret != 0)
+ goto error_return;
+ priv->connector = 0;
+
+ /** terminate dispatcher thread(s) */
+ changelog_cleanup_dispatchers(this, priv, priv->nr_dispatchers);
+
+ /* destroy locks */
+ ret = pthread_mutex_destroy(&conn->pending_lock);
+ if (ret != 0)
+ goto error_return;
+ ret = pthread_cond_destroy(&conn->pending_cond);
+ if (ret != 0)
+ goto error_return;
+ ret = LOCK_DESTROY(&conn->active_lock);
+ if (ret != 0)
+ goto error_return;
+ ret = LOCK_DESTROY(&conn->wait_lock);
+ if (ret != 0)
+ goto error_return;
+ return 0;
+
+error_return:
+ return -1;
+}
+
+static int
+changelog_init_rpc_threads(xlator_t *this, changelog_priv_t *priv, rbuf_t *rbuf,
+ int nr_dispatchers)
+{
+ int j = 0;
+ int ret = 0;
+ changelog_clnt_t *conn = NULL;
+
+ conn = &priv->connections;
+
+ conn->this = this;
+ conn->rbuf = rbuf;
+ conn->sequence = 1; /* start with sequence number one */
+
+ INIT_LIST_HEAD(&conn->pending);
+ INIT_LIST_HEAD(&conn->active);
+ INIT_LIST_HEAD(&conn->waitq);
+
+ ret = pthread_mutex_init(&conn->pending_lock, NULL);
+ if (ret)
+ goto error_return;
+ ret = pthread_cond_init(&conn->pending_cond, NULL);
+ if (ret)
+ goto cleanup_pending_lock;
+
+ ret = LOCK_INIT(&conn->active_lock);
+ if (ret)
+ goto cleanup_pending_cond;
+ ret = LOCK_INIT(&conn->wait_lock);
+ if (ret)
+ goto cleanup_active_lock;
+
+ /* spawn reverse connection thread */
+ ret = gf_thread_create(&priv->connector, NULL, changelog_ev_connector, conn,
+ "clogecon");
+ if (ret != 0)
+ goto cleanup_wait_lock;
+
+ /* spawn dispatcher thread(s) */
+ priv->ev_dispatcher = GF_CALLOC(nr_dispatchers, sizeof(pthread_t),
+ gf_changelog_mt_ev_dispatcher_t);
+ if (!priv->ev_dispatcher)
+ goto cleanup_connector;
+
+ /* spawn dispatcher threads */
+ for (; j < nr_dispatchers; j++) {
+ ret = gf_thread_create(&priv->ev_dispatcher[j], NULL,
+ changelog_ev_dispatch, conn, "clogd%03hx",
+ j & 0x3ff);
+ if (ret != 0) {
+ changelog_cleanup_dispatchers(this, priv, j);
+ break;
+ }
+ }
+
+ if (ret != 0)
+ goto cleanup_connector;
+
+ priv->nr_dispatchers = nr_dispatchers;
+ return 0;
+
+cleanup_connector:
+ (void)pthread_cancel(priv->connector);
+cleanup_wait_lock:
+ LOCK_DESTROY(&conn->wait_lock);
+cleanup_active_lock:
+ LOCK_DESTROY(&conn->active_lock);
+cleanup_pending_cond:
+ (void)pthread_cond_destroy(&conn->pending_cond);
+cleanup_pending_lock:
+ (void)pthread_mutex_destroy(&conn->pending_lock);
+error_return:
+ return -1;
+}
+
+int
+changelog_rpcsvc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
+ void *data)
+{
+ xlator_t *this = NULL;
+ rpc_transport_t *trans = NULL;
+ rpc_transport_t *xprt = NULL;
+ rpc_transport_t *xp_next = NULL;
+ changelog_priv_t *priv = NULL;
+ uint64_t listnercnt = 0;
+ uint64_t xprtcnt = 0;
+ uint64_t clntcnt = 0;
+ rpcsvc_listener_t *listener = NULL;
+ rpcsvc_listener_t *next = NULL;
+ gf_boolean_t listner_found = _gf_false;
+ socket_private_t *sockpriv = NULL;
+
+ if (!xl || !data || !rpc) {
+ gf_msg_callingfn("changelog", GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED,
+ "Calling rpc_notify without initializing");
+ goto out;
+ }
+
+ this = xl;
+ trans = data;
+ priv = this->private;
+
+ if (!priv) {
+ gf_msg_callingfn("changelog", GF_LOG_WARNING, 0,
+ CHANGELOG_MSG_RPCSVC_NOTIFY_FAILED,
+ "Calling rpc_notify without priv initializing");
+ goto out;
+ }
+
+ if (event == RPCSVC_EVENT_ACCEPT) {
+ GF_ATOMIC_INC(priv->xprtcnt);
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&trans->list, &priv->xprt_list);
+ }
+ UNLOCK(&priv->lock);
+ goto out;
+ }
+
+ if (event == RPCSVC_EVENT_DISCONNECT) {
+ list_for_each_entry_safe(listener, next, &rpc->listeners, list)
+ {
+ if (listener && listener->trans) {
+ if (listener->trans == trans) {
+ listnercnt = GF_ATOMIC_DEC(priv->listnercnt);
+ listner_found = _gf_true;
+ rpcsvc_listener_destroy(listener);
+ }
+ }
+ }
+
+ if (listnercnt > 0) {
+ goto out;
+ }
+ if (listner_found) {
+ LOCK(&priv->lock);
+ list_for_each_entry_safe(xprt, xp_next, &priv->xprt_list, list)
+ {
+ sockpriv = (socket_private_t *)(xprt->private);
+ gf_log("changelog", GF_LOG_INFO,
+ "Send disconnect"
+ " on socket %d",
+ sockpriv->sock);
+ rpc_transport_disconnect(xprt, _gf_false);
+ }
+ UNLOCK(&priv->lock);
+ goto out;
+ }
+ LOCK(&priv->lock);
+ {
+ list_del_init(&trans->list);
+ }
+ UNLOCK(&priv->lock);
+
+ xprtcnt = GF_ATOMIC_DEC(priv->xprtcnt);
+ clntcnt = GF_ATOMIC_GET(priv->clntcnt);
+ if (!xprtcnt && !clntcnt) {
+ changelog_process_cleanup_event(this);
+ }
+ }
+
+out:
+ return 0;
+}
+
+void
+changelog_process_cleanup_event(xlator_t *this)
+{
+ gf_boolean_t cleanup_notify = _gf_false;
+ changelog_priv_t *priv = NULL;
+ char sockfile[UNIX_PATH_MAX] = {
+ 0,
+ };
+
+ if (!this)
+ return;
+ priv = this->private;
+ if (!priv)
+ return;
+
+ LOCK(&priv->lock);
+ {
+ cleanup_notify = priv->notify_down;
+ priv->notify_down = _gf_true;
+ }
+ UNLOCK(&priv->lock);
+
+ if (priv->victim && !cleanup_notify) {
+ default_notify(this, GF_EVENT_PARENT_DOWN, priv->victim);
+
+ if (priv->rpc) {
+ /* sockfile path could have been saved to avoid this */
+ CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile,
+ UNIX_PATH_MAX);
+ sys_unlink(sockfile);
+ (void)rpcsvc_unregister_notify(priv->rpc, changelog_rpcsvc_notify,
+ this);
+ if (priv->rpc->rxpool) {
+ mem_pool_destroy(priv->rpc->rxpool);
+ priv->rpc->rxpool = NULL;
+ }
+ GF_FREE(priv->rpc);
+ priv->rpc = NULL;
+ }
+ }
+}
+
+void
+changelog_destroy_rpc_listner(xlator_t *this, changelog_priv_t *priv)
+{
+ char sockfile[UNIX_PATH_MAX] = {
+ 0,
+ };
+
+ /* sockfile path could have been saved to avoid this */
+ CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, UNIX_PATH_MAX);
+ changelog_rpc_server_destroy(this, priv->rpc, sockfile,
+ changelog_rpcsvc_notify, changelog_programs);
+}
+
+rpcsvc_t *
+changelog_init_rpc_listener(xlator_t *this, changelog_priv_t *priv,
+ rbuf_t *rbuf, int nr_dispatchers)
+{
+ int ret = 0;
+ char sockfile[UNIX_PATH_MAX] = {
+ 0,
+ };
+ rpcsvc_t *svcp;
+
+ ret = changelog_init_rpc_threads(this, priv, rbuf, nr_dispatchers);
+ if (ret)
+ return NULL;
+
+ CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile, UNIX_PATH_MAX);
+ (void)sys_unlink(sockfile);
+ svcp = changelog_rpc_server_init(
+ this, sockfile, NULL, changelog_rpcsvc_notify, changelog_programs);
+ return svcp;
+}
+
+void
+changelog_rpc_clnt_cleanup(changelog_rpc_clnt_t *crpc)
+{
+ if (!crpc)
+ return;
+ crpc->c_clnt = NULL;
+ LOCK_DESTROY(&crpc->lock);
+ GF_FREE(crpc);
+}
+
+static changelog_rpc_clnt_t *
+changelog_rpc_clnt_init(xlator_t *this, changelog_probe_req *rpc_req,
+ changelog_clnt_t *c_clnt)
+{
+ int ret = 0;
+ changelog_rpc_clnt_t *crpc = NULL;
+
+ crpc = GF_CALLOC(1, sizeof(*crpc), gf_changelog_mt_rpc_clnt_t);
+ if (!crpc)
+ goto error_return;
+ INIT_LIST_HEAD(&crpc->list);
+
+ /* Take a ref, the last unref will be on RPC_CLNT_DESTROY
+ * which comes as a result of last rpc_clnt_unref.
+ */
+ GF_ATOMIC_INIT(crpc->ref, 1);
+ changelog_set_disconnect_flag(crpc, _gf_false);
+
+ crpc->filter = rpc_req->filter;
+ (void)memcpy(crpc->sock, rpc_req->sock, strlen(rpc_req->sock));
+
+ crpc->this = this;
+ crpc->c_clnt = c_clnt;
+ crpc->cleanup = changelog_rpc_clnt_cleanup;
+
+ ret = LOCK_INIT(&crpc->lock);
+ if (ret != 0)
+ goto dealloc_crpc;
+ return crpc;
+
+dealloc_crpc:
+ GF_FREE(crpc);
+error_return:
+ return NULL;
+}
+
+/**
+ * Actor declarations
+ */
+
+/**
+ * @probe_handler
+ * A probe RPC call spawns a connect back to the caller. Caller also
+ * passes an hint which acts as a filter for selecting updates.
+ */
+
+int
+changelog_handle_probe(rpcsvc_request_t *req)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ rpcsvc_t *svc = NULL;
+ changelog_priv_t *priv = NULL;
+ changelog_clnt_t *c_clnt = NULL;
+ changelog_rpc_clnt_t *crpc = NULL;
+
+ changelog_probe_req rpc_req = {
+ 0,
+ };
+ changelog_probe_rsp rpc_rsp = {
+ 0,
+ };
+
+ this = req->trans->xl;
+ if (this->cleanup_starting) {
+ gf_smsg(this->name, GF_LOG_DEBUG, 0, CHANGELOG_MSG_CLEANUP_ALREADY_SET,
+ NULL);
+ return 0;
+ }
+
+ ret = xdr_to_generic(req->msg[0], &rpc_req,
+ (xdrproc_t)xdr_changelog_probe_req);
+ if (ret < 0) {
+ gf_smsg("", GF_LOG_ERROR, 0, CHANGELOG_MSG_HANDLE_PROBE_ERROR, NULL);
+ req->rpc_err = GARBAGE_ARGS;
+ goto handle_xdr_error;
+ }
+
+ /* ->xl hidden in rpcsvc */
+ svc = rpcsvc_request_service(req);
+ this = svc->xl;
+ priv = this->private;
+ c_clnt = &priv->connections;
+
+ crpc = changelog_rpc_clnt_init(this, &rpc_req, c_clnt);
+ if (!crpc)
+ goto handle_xdr_error;
+
+ changelog_ev_queue_connection(c_clnt, crpc);
+ rpc_rsp.op_ret = 0;
+
+ goto submit_rpc;
+
+handle_xdr_error:
+ rpc_rsp.op_ret = -1;
+submit_rpc:
+ (void)changelog_rpc_sumbit_reply(req, &rpc_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_changelog_probe_rsp);
+ return 0;
+}
+
+/**
+ * RPC declarations
+ */
+
+static rpcsvc_actor_t changelog_svc_actors[CHANGELOG_RPC_PROC_MAX] = {
+ [CHANGELOG_RPC_PROBE_FILTER] = {"CHANGELOG PROBE FILTER",
+ changelog_handle_probe, NULL,
+ CHANGELOG_RPC_PROBE_FILTER, DRC_NA, 0},
+};
+
+static struct rpcsvc_program changelog_svc_prog = {
+ .progname = CHANGELOG_RPC_PROGNAME,
+ .prognum = CHANGELOG_RPC_PROGNUM,
+ .progver = CHANGELOG_RPC_PROGVER,
+ .numactors = CHANGELOG_RPC_PROC_MAX,
+ .actors = changelog_svc_actors,
+ .synctask = _gf_true,
+};
+
+static struct rpcsvc_program *changelog_programs[] = {
+ &changelog_svc_prog,
+ NULL,
+};
diff --git a/xlators/features/changelog/src/changelog-rpc.h b/xlators/features/changelog/src/changelog-rpc.h
new file mode 100644
index 00000000000..b1707565249
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rpc.h
@@ -0,0 +1,31 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CHANGELOG_RPC_H
+#define __CHANGELOG_RPC_H
+
+#include <glusterfs/xlator.h>
+#include "changelog-helpers.h"
+
+/* one time */
+#include "socket.h"
+#include "changelog-rpc-common.h"
+
+#define CHANGELOG_RPC_PROGNAME "GlusterFS Changelog"
+
+rpcsvc_t *
+changelog_init_rpc_listener(xlator_t *, changelog_priv_t *, rbuf_t *, int);
+
+void
+changelog_destroy_rpc_listner(xlator_t *, changelog_priv_t *);
+
+int
+changelog_cleanup_rpc_threads(xlator_t *this, changelog_priv_t *priv);
+#endif
diff --git a/xlators/features/changelog/src/changelog-rt.c b/xlators/features/changelog/src/changelog-rt.c
new file mode 100644
index 00000000000..841545ae359
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rt.c
@@ -0,0 +1,66 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/logging.h>
+
+#include "changelog-rt.h"
+#include "changelog-mem-types.h"
+
+int
+changelog_rt_init(xlator_t *this, changelog_dispatcher_t *cd)
+{
+ changelog_rt_t *crt = NULL;
+
+ crt = GF_CALLOC(1, sizeof(*crt), gf_changelog_mt_rt_t);
+ if (!crt)
+ return -1;
+
+ LOCK_INIT(&crt->lock);
+
+ cd->cd_data = crt;
+ cd->dispatchfn = &changelog_rt_enqueue;
+
+ return 0;
+}
+
+int
+changelog_rt_fini(xlator_t *this, changelog_dispatcher_t *cd)
+{
+ changelog_rt_t *crt = NULL;
+
+ crt = cd->cd_data;
+
+ LOCK_DESTROY(&crt->lock);
+ GF_FREE(crt);
+
+ return 0;
+}
+
+int
+changelog_rt_enqueue(xlator_t *this, changelog_priv_t *priv, void *cbatch,
+ changelog_log_data_t *cld_0, changelog_log_data_t *cld_1)
+{
+ int ret = 0;
+ changelog_rt_t *crt = NULL;
+
+ crt = (changelog_rt_t *)cbatch;
+
+ LOCK(&crt->lock);
+ {
+ ret = changelog_handle_change(this, priv, cld_0);
+ if (!ret && cld_1)
+ ret = changelog_handle_change(this, priv, cld_1);
+ }
+ UNLOCK(&crt->lock);
+
+ return ret;
+}
diff --git a/xlators/features/changelog/src/changelog-rt.h b/xlators/features/changelog/src/changelog-rt.h
new file mode 100644
index 00000000000..28b9827d85b
--- /dev/null
+++ b/xlators/features/changelog/src/changelog-rt.h
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CHANGELOG_RT_H
+#define _CHANGELOG_RT_H
+
+#include <glusterfs/locking.h>
+#include <glusterfs/timer.h>
+#include "pthread.h"
+
+#include "changelog-helpers.h"
+
+/* unused as of now - may be you would need it later */
+typedef struct changelog_rt {
+ gf_lock_t lock;
+} changelog_rt_t;
+
+int
+changelog_rt_init(xlator_t *this, changelog_dispatcher_t *cd);
+int
+changelog_rt_fini(xlator_t *this, changelog_dispatcher_t *cd);
+int
+changelog_rt_enqueue(xlator_t *this, changelog_priv_t *priv, void *cbatch,
+ changelog_log_data_t *cld_0, changelog_log_data_t *cld_1);
+
+#endif /* _CHANGELOG_RT_H */
diff --git a/xlators/features/changelog/src/changelog.c b/xlators/features/changelog/src/changelog.c
new file mode 100644
index 00000000000..6a6e5af859e
--- /dev/null
+++ b/xlators/features/changelog/src/changelog.c
@@ -0,0 +1,2989 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/iobuf.h>
+
+#include "changelog-rt.h"
+
+#include "changelog-encoders.h"
+#include "changelog-mem-types.h"
+#include "changelog-messages.h"
+
+#include <pthread.h>
+#include <signal.h>
+
+#include "changelog-rpc.h"
+#include "errno.h"
+
+static struct changelog_bootstrap cb_bootstrap[] = {
+ {
+ .mode = CHANGELOG_MODE_RT,
+ .ctor = changelog_rt_init,
+ .dtor = changelog_rt_fini,
+ },
+};
+
+static int
+changelog_init_rpc(xlator_t *this, changelog_priv_t *priv);
+
+static int
+changelog_init(xlator_t *this, changelog_priv_t *priv);
+
+/* Entry operations - TYPE III */
+
+/**
+ * entry operations do not undergo inode version checking.
+ */
+
+/* {{{ */
+
+/* rmdir */
+
+int32_t
+changelog_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(rmdir, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_rmdir_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ gf_msg_debug(this->name, 0, "Dequeue rmdir");
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata);
+ return 0;
+}
+
+int32_t
+changelog_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {
+ 0,
+ };
+ gf_boolean_t barrier_enabled = _gf_false;
+
+ INIT_LIST_HEAD(&queue);
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, loc->inode->gfid, 2);
+
+ co = changelog_get_usable_buffer(frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ if (priv->capture_del_path) {
+ CHANGELOG_FILL_ENTRY_DIR_PATH(co, loc->pargfid, loc->name, del_entry_fn,
+ del_entry_free_fn, xtra_len, wind,
+ _gf_true);
+ } else {
+ CHANGELOG_FILL_ENTRY_DIR_PATH(co, loc->pargfid, loc->name, del_entry_fn,
+ del_entry_free_fn, xtra_len, wind,
+ _gf_false);
+ }
+
+ changelog_set_usable_record_and_length(frame->local, xtra_len, 2);
+
+ /* changelog barrier */
+ /* Color assignment and increment of fop_cnt for rmdir/unlink/rename
+ * should be made with in priv lock if changelog barrier is not enabled.
+ * Because if counter is not incremented yet, draining wakes up and
+ * publishes the changelog but later these fops might hit the disk and
+ * present in snapped volume but where as the intention is these fops
+ * should not be present in snapped volume.
+ */
+ LOCK(&priv->lock);
+ {
+ if ((barrier_enabled = priv->barrier_enabled)) {
+ stub = fop_rmdir_stub(frame, changelog_rmdir_resume, loc, xflags,
+ xdata);
+ if (!stub)
+ __chlog_barrier_disable(this, &queue);
+ else
+ __chlog_barrier_enqueue(this, stub);
+ } else {
+ ((changelog_local_t *)frame->local)->color = priv->current_color;
+ changelog_inc_fop_cnt(this, priv, frame->local);
+ }
+ }
+ UNLOCK(&priv->lock);
+
+ if (barrier_enabled && stub) {
+ gf_msg_debug(this->name, 0, "Enqueue rmdir");
+ goto out;
+ }
+ if (barrier_enabled && !stub) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=rmdir", NULL);
+ chlog_barrier_dequeue_all(this, &queue);
+ }
+
+ /* changelog barrier */
+
+wind:
+ STACK_WIND(frame, changelog_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata);
+out:
+ return 0;
+}
+
+/* unlink */
+
+int32_t
+changelog_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(unlink, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_unlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflags, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ gf_msg_debug(this->name, 0, "Dequeue unlink");
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflags, xdata);
+ return 0;
+}
+
+int32_t
+changelog_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {
+ 0,
+ };
+ gf_boolean_t barrier_enabled = _gf_false;
+ dht_changelog_rename_info_t *info = NULL;
+ int ret = 0;
+ char *old_name = NULL;
+ char *new_name = NULL;
+ char *nname = NULL;
+
+ INIT_LIST_HEAD(&queue);
+ priv = this->private;
+
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ ret = dict_get_bin(xdata, DHT_CHANGELOG_RENAME_OP_KEY, (void **)&info);
+ if (!ret) { /* special case: unlink considered as rename */
+ /* 3 == fop + oldloc + newloc */
+ old_name = alloca(info->oldname_len);
+ new_name = alloca(info->newname_len);
+ CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, loc->inode->gfid, 3);
+
+ co = changelog_get_usable_buffer(frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER(co, GF_FOP_RENAME, fop_fn, xtra_len);
+
+ co++;
+ strncpy(old_name, info->buffer, info->oldname_len);
+ CHANGELOG_FILL_ENTRY(co, info->old_pargfid, old_name, entry_fn,
+ entry_free_fn, xtra_len, wind);
+
+ co++;
+ /* new name resides just after old name */
+ nname = info->buffer + info->oldname_len;
+ strncpy(new_name, nname, info->newname_len);
+ CHANGELOG_FILL_ENTRY(co, info->new_pargfid, new_name, entry_fn,
+ entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length(frame->local, xtra_len, 3);
+ } else { /* default unlink */
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, wind);
+ CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, loc->inode->gfid, 2);
+
+ co = changelog_get_usable_buffer(frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ if (priv->capture_del_path) {
+ CHANGELOG_FILL_ENTRY_DIR_PATH(co, loc->pargfid, loc->name,
+ del_entry_fn, del_entry_free_fn,
+ xtra_len, wind, _gf_true);
+ } else {
+ CHANGELOG_FILL_ENTRY_DIR_PATH(co, loc->pargfid, loc->name,
+ del_entry_fn, del_entry_free_fn,
+ xtra_len, wind, _gf_false);
+ }
+
+ changelog_set_usable_record_and_length(frame->local, xtra_len, 2);
+ }
+
+ /* changelog barrier */
+ LOCK(&priv->lock);
+ {
+ if ((barrier_enabled = priv->barrier_enabled)) {
+ stub = fop_unlink_stub(frame, changelog_unlink_resume, loc, xflags,
+ xdata);
+ if (!stub)
+ __chlog_barrier_disable(this, &queue);
+ else
+ __chlog_barrier_enqueue(this, stub);
+ } else {
+ ((changelog_local_t *)frame->local)->color = priv->current_color;
+ changelog_inc_fop_cnt(this, priv, frame->local);
+ }
+ }
+ UNLOCK(&priv->lock);
+
+ if (barrier_enabled && stub) {
+ gf_msg_debug(this->name, 0, "Enqueue unlink");
+ goto out;
+ }
+ if (barrier_enabled && !stub) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=unlink", NULL);
+ chlog_barrier_dequeue_all(this, &queue);
+ }
+
+ /* changelog barrier */
+
+wind:
+ STACK_WIND(frame, changelog_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflags, xdata);
+out:
+ return 0;
+}
+
+/* rename */
+
+int32_t
+changelog_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+ changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY);
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(rename, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_rename_resume(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ gf_msg_debug(this->name, 0, "Dequeue rename");
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ return 0;
+}
+
+int32_t
+changelog_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {
+ 0,
+ };
+ gf_boolean_t barrier_enabled = _gf_false;
+ dht_changelog_rename_info_t *info = NULL;
+ int ret = 0;
+
+ INIT_LIST_HEAD(&queue);
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ ret = dict_get_bin(xdata, DHT_CHANGELOG_RENAME_OP_KEY, (void **)&info);
+ if (ret && oldloc->inode->ia_type != IA_IFDIR) {
+ /* xdata "NOT" set for a non-directory,
+ * Special rename => avoid logging */
+ goto wind;
+ }
+
+ /* 3 == fop + oldloc + newloc */
+ CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, oldloc->inode->gfid, 3);
+
+ co = changelog_get_usable_buffer(frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY(co, oldloc->pargfid, oldloc->name, entry_fn,
+ entry_free_fn, xtra_len, wind);
+
+ co++;
+ CHANGELOG_FILL_ENTRY(co, newloc->pargfid, newloc->name, entry_fn,
+ entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length(frame->local, xtra_len, 3);
+ /* changelog barrier */
+ LOCK(&priv->lock);
+ {
+ if ((barrier_enabled = priv->barrier_enabled)) {
+ stub = fop_rename_stub(frame, changelog_rename_resume, oldloc,
+ newloc, xdata);
+ if (!stub)
+ __chlog_barrier_disable(this, &queue);
+ else
+ __chlog_barrier_enqueue(this, stub);
+ } else {
+ ((changelog_local_t *)frame->local)->color = priv->current_color;
+ changelog_inc_fop_cnt(this, priv, frame->local);
+ }
+ }
+ UNLOCK(&priv->lock);
+
+ if (barrier_enabled && stub) {
+ gf_msg_debug(this->name, 0, "Enqueue rename");
+ goto out;
+ }
+ if (barrier_enabled && !stub) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=rename", NULL);
+ chlog_barrier_dequeue_all(this, &queue);
+ }
+ /* changelog barrier */
+
+wind:
+ STACK_WIND(frame, changelog_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+out:
+ return 0;
+}
+
+/* link */
+
+int32_t
+changelog_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_link_resume(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("changelog", this, out);
+ GF_VALIDATE_OR_GOTO("changelog", this->fops, out);
+ GF_VALIDATE_OR_GOTO("changelog", frame, out);
+
+ priv = this->private;
+
+ gf_msg_debug(this->name, 0, "Dequeuing link");
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
+out:
+ return -1;
+}
+int32_t
+changelog_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {
+ 0,
+ };
+ gf_boolean_t barrier_enabled = _gf_false;
+
+ priv = this->private;
+
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, wind);
+
+ CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, oldloc->gfid, 2);
+
+ co = changelog_get_usable_buffer(frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len);
+
+ co++;
+ CHANGELOG_FILL_ENTRY(co, newloc->pargfid, newloc->name, entry_fn,
+ entry_free_fn, xtra_len, wind);
+
+ changelog_set_usable_record_and_length(frame->local, xtra_len, 2);
+
+ LOCK(&priv->lock);
+ {
+ if ((barrier_enabled = priv->barrier_enabled)) {
+ stub = fop_link_stub(frame, changelog_link_resume, oldloc, newloc,
+ xdata);
+ if (!stub)
+ __chlog_barrier_disable(this, &queue);
+ else
+ __chlog_barrier_enqueue(this, stub);
+ } else {
+ ((changelog_local_t *)frame->local)->color = priv->current_color;
+ changelog_inc_fop_cnt(this, priv, frame->local);
+ }
+ }
+ UNLOCK(&priv->lock);
+
+ if (barrier_enabled && stub) {
+ gf_msg_debug(this->name, 0, "Enqueued link");
+ goto out;
+ }
+
+ if (barrier_enabled && !stub) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_BARRIER_FOP_FAILED,
+ "fop=link", NULL);
+ chlog_barrier_dequeue_all(this, &queue);
+ }
+wind:
+ STACK_WIND(frame, changelog_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+out:
+ return 0;
+}
+
+/* mkdir */
+
+int32_t
+changelog_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_mkdir_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, mode_t umask, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("changelog", this, out);
+ GF_VALIDATE_OR_GOTO("changelog", this->fops, out);
+ GF_VALIDATE_OR_GOTO("changelog", frame, out);
+
+ priv = this->private;
+
+ gf_msg_debug(this->name, 0, "Dequeuing mkdir");
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ return 0;
+out:
+ return -1;
+}
+
+int32_t
+changelog_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ uuid_t gfid = {
+ 0,
+ };
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {
+ 0,
+ };
+ gf_boolean_t barrier_enabled = _gf_false;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ ret = dict_get_gfuuid(xdata, "gfid-req", &gfid);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "failed to get gfid from dict");
+ goto wind;
+ }
+
+ CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, gfid, 5);
+
+ co = changelog_get_usable_buffer(frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32(co, S_IFDIR | mode, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32(co, frame->root->uid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32(co, frame->root->gid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_ENTRY(co, loc->pargfid, loc->name, entry_fn, entry_free_fn,
+ xtra_len, wind);
+
+ changelog_set_usable_record_and_length(frame->local, xtra_len, 5);
+
+ LOCK(&priv->lock);
+ {
+ if ((barrier_enabled = priv->barrier_enabled)) {
+ stub = fop_mkdir_stub(frame, changelog_mkdir_resume, loc, mode,
+ umask, xdata);
+ if (!stub)
+ __chlog_barrier_disable(this, &queue);
+ else
+ __chlog_barrier_enqueue(this, stub);
+ } else {
+ ((changelog_local_t *)frame->local)->color = priv->current_color;
+ changelog_inc_fop_cnt(this, priv, frame->local);
+ }
+ }
+ UNLOCK(&priv->lock);
+
+ if (barrier_enabled && stub) {
+ gf_msg_debug(this->name, 0, "Enqueued mkdir");
+ goto out;
+ }
+
+ if (barrier_enabled && !stub) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=mkdir", NULL);
+ chlog_barrier_dequeue_all(this, &queue);
+ }
+
+wind:
+ STACK_WIND(frame, changelog_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+out:
+ return 0;
+}
+
+/* symlink */
+
+int32_t
+changelog_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(symlink, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_symlink_resume(call_frame_t *frame, xlator_t *this,
+ const char *linkname, loc_t *loc, mode_t umask,
+ dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("changelog", this, out);
+ GF_VALIDATE_OR_GOTO("changelog", this->fops, out);
+ GF_VALIDATE_OR_GOTO("changelog", frame, out);
+
+ priv = this->private;
+
+ gf_msg_debug(this->name, 0, "Dequeuing symlink");
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata);
+ return 0;
+out:
+ return -1;
+}
+
+int32_t
+changelog_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ size_t xtra_len = 0;
+ uuid_t gfid = {
+ 0,
+ };
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {
+ 0,
+ };
+ gf_boolean_t barrier_enabled = _gf_false;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ ret = dict_get_gfuuid(xdata, "gfid-req", &gfid);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "failed to get gfid from dict");
+ goto wind;
+ }
+
+ CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, gfid, 2);
+
+ co = changelog_get_usable_buffer(frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_ENTRY(co, loc->pargfid, loc->name, entry_fn, entry_free_fn,
+ xtra_len, wind);
+
+ changelog_set_usable_record_and_length(frame->local, xtra_len, 2);
+
+ LOCK(&priv->lock);
+ {
+ if ((barrier_enabled = priv->barrier_enabled)) {
+ stub = fop_symlink_stub(frame, changelog_symlink_resume, linkname,
+ loc, umask, xdata);
+ if (!stub)
+ __chlog_barrier_disable(this, &queue);
+ else
+ __chlog_barrier_enqueue(this, stub);
+ } else {
+ ((changelog_local_t *)frame->local)->color = priv->current_color;
+ changelog_inc_fop_cnt(this, priv, frame->local);
+ }
+ }
+ UNLOCK(&priv->lock);
+
+ if (barrier_enabled && stub) {
+ gf_msg_debug(this->name, 0, "Enqueued symlink");
+ goto out;
+ }
+
+ if (barrier_enabled && !stub) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=symlink", NULL);
+ chlog_barrier_dequeue_all(this, &queue);
+ }
+
+wind:
+ STACK_WIND(frame, changelog_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata);
+out:
+ return 0;
+}
+
+/* mknod */
+
+int32_t
+changelog_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_mknod_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("changelog", this, out);
+ GF_VALIDATE_OR_GOTO("changelog", this->fops, out);
+ GF_VALIDATE_OR_GOTO("changelog", frame, out);
+
+ priv = this->private;
+
+ gf_msg_debug(this->name, 0, "Dequeuing mknod");
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
+ return 0;
+out:
+ return -1;
+}
+
+int32_t
+changelog_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata)
+{
+ int ret = -1;
+ uuid_t gfid = {
+ 0,
+ };
+ size_t xtra_len = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {
+ 0,
+ };
+ gf_boolean_t barrier_enabled = _gf_false;
+
+ priv = this->private;
+
+ /* Check whether changelog active */
+ if (!(priv->active))
+ goto wind;
+
+ /* Check whether rebalance activity */
+ if (frame->root->pid == GF_CLIENT_PID_DEFRAG)
+ goto wind;
+
+ /* If tier-dht linkto is SET, ignore about verifiying :
+ * 1. Whether internal fop AND
+ * 2. Whether tier rebalance process activity (this will help in
+ * recording mknod if tier rebalance process calls this mknod) */
+ if (!(dict_get(xdata, "trusted.tier.tier-dht.linkto"))) {
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, wind);
+ if (frame->root->pid == GF_CLIENT_PID_TIER_DEFRAG)
+ goto wind;
+ }
+
+ ret = dict_get_gfuuid(xdata, "gfid-req", &gfid);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "failed to get gfid from dict");
+ goto wind;
+ }
+
+ CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, gfid, 5);
+
+ co = changelog_get_usable_buffer(frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32(co, mode, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32(co, frame->root->uid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32(co, frame->root->gid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_ENTRY(co, loc->pargfid, loc->name, entry_fn, entry_free_fn,
+ xtra_len, wind);
+
+ changelog_set_usable_record_and_length(frame->local, xtra_len, 5);
+
+ LOCK(&priv->lock);
+ {
+ if ((barrier_enabled = priv->barrier_enabled)) {
+ stub = fop_mknod_stub(frame, changelog_mknod_resume, loc, mode, dev,
+ umask, xdata);
+ if (!stub)
+ __chlog_barrier_disable(this, &queue);
+ else
+ __chlog_barrier_enqueue(this, stub);
+ } else {
+ ((changelog_local_t *)frame->local)->color = priv->current_color;
+ changelog_inc_fop_cnt(this, priv, frame->local);
+ }
+ }
+ UNLOCK(&priv->lock);
+
+ if (barrier_enabled && stub) {
+ gf_msg_debug(this->name, 0, "Enqueued mknod");
+ goto out;
+ }
+
+ if (barrier_enabled && !stub) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=mknod", NULL);
+ chlog_barrier_dequeue_all(this, &queue);
+ }
+
+wind:
+ STACK_WIND(frame, changelog_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, dev, umask, xdata);
+out:
+ return 0;
+}
+
+/* create */
+
+int32_t
+changelog_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t ret = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+ changelog_event_t ev = {
+ 0,
+ };
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+
+ /* fill the event structure.. similar to open() */
+ ev.ev_type = CHANGELOG_OP_TYPE_CREATE;
+ gf_uuid_copy(ev.u.create.gfid, buf->ia_gfid);
+ ev.u.create.flags = fd->flags;
+ changelog_dispatch_event(this, priv, &ev);
+
+ if (changelog_ev_selected(this, &priv->ev_selection,
+ CHANGELOG_OP_TYPE_RELEASE)) {
+ ret = fd_ctx_set(fd, this, (uint64_t)(long)0x1);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT,
+ NULL);
+ }
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+changelog_create_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("changelog", this, out);
+ GF_VALIDATE_OR_GOTO("changelog", this->fops, out);
+ GF_VALIDATE_OR_GOTO("changelog", frame, out);
+
+ priv = this->private;
+
+ gf_msg_debug(this->name, 0, "Dequeuing create");
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+
+out:
+ return -1;
+}
+
+int32_t
+changelog_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int ret = -1;
+ uuid_t gfid = {
+ 0,
+ };
+ changelog_opt_t *co = NULL;
+ changelog_priv_t *priv = NULL;
+ size_t xtra_len = 0;
+ call_stub_t *stub = NULL;
+ struct list_head queue = {
+ 0,
+ };
+ gf_boolean_t barrier_enabled = _gf_false;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ ret = dict_get_gfuuid(xdata, "gfid-req", &gfid);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "failed to get gfid from dict");
+ goto wind;
+ }
+
+ /* init with two extra records */
+ CHANGELOG_INIT_NOCHECK(this, frame->local, NULL, gfid, 5);
+ if (!frame->local)
+ goto wind;
+
+ co = changelog_get_usable_buffer(frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32(co, mode, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32(co, frame->root->uid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_UINT32(co, frame->root->gid, number_fn, xtra_len);
+ co++;
+
+ CHANGELOG_FILL_ENTRY(co, loc->pargfid, loc->name, entry_fn, entry_free_fn,
+ xtra_len, wind);
+
+ changelog_set_usable_record_and_length(frame->local, xtra_len, 5);
+
+ LOCK(&priv->lock);
+ {
+ if ((barrier_enabled = priv->barrier_enabled)) {
+ stub = fop_create_stub(frame, changelog_create_resume, loc, flags,
+ mode, umask, fd, xdata);
+ if (!stub)
+ __chlog_barrier_disable(this, &queue);
+ else
+ __chlog_barrier_enqueue(this, stub);
+ } else {
+ ((changelog_local_t *)frame->local)->color = priv->current_color;
+ changelog_inc_fop_cnt(this, priv, frame->local);
+ }
+ }
+ UNLOCK(&priv->lock);
+
+ if (barrier_enabled && stub) {
+ gf_msg_debug(this->name, 0, "Enqueued create");
+ goto out;
+ }
+
+ if (barrier_enabled && !stub) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM,
+ CHANGELOG_MSG_BARRIER_FOP_FAILED, "fop=create", NULL);
+ chlog_barrier_dequeue_all(this, &queue);
+ }
+
+wind:
+ STACK_WIND(frame, changelog_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+out:
+ return 0;
+}
+
+/* }}} */
+
+/* Metadata modification fops - TYPE II */
+
+/* {{{ */
+
+/* {f}setattr */
+
+int32_t
+changelog_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop_stbuf, struct iatt *postop_stbuf,
+ dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, preop_stbuf,
+ postop_stbuf, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ CHANGELOG_OP_BOUNDARY_CHECK(frame, wind);
+
+ CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 1);
+ if (!frame->local)
+ goto wind;
+
+ co = changelog_get_usable_buffer(frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length(frame->local, xtra_len, 1);
+
+wind:
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
+}
+
+int32_t
+changelog_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preop_stbuf, struct iatt *postop_stbuf,
+ dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(setattr, frame, op_ret, op_errno, preop_stbuf,
+ postop_stbuf, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+ uuid_t shard_root_gfid = {
+ 0,
+ };
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, wind);
+
+ /* Do not record META on .shard */
+ gf_uuid_parse(SHARD_ROOT_GFID, shard_root_gfid);
+ if (gf_uuid_compare(loc->gfid, shard_root_gfid) == 0) {
+ goto wind;
+ }
+
+ CHANGELOG_OP_BOUNDARY_CHECK(frame, wind);
+
+ CHANGELOG_INIT(this, frame->local, loc->inode, loc->inode->gfid, 1);
+ if (!frame->local)
+ goto wind;
+
+ co = changelog_get_usable_buffer(frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length(frame->local, xtra_len, 1);
+
+wind:
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
+}
+
+/* {f}removexattr */
+
+int32_t
+changelog_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA_XATTR);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ CHANGELOG_OP_BOUNDARY_CHECK(frame, wind);
+
+ CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 1);
+
+ co = changelog_get_usable_buffer(frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length(frame->local, xtra_len, 1);
+
+wind:
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ return 0;
+}
+
+int32_t
+changelog_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA_XATTR);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ CHANGELOG_OP_BOUNDARY_CHECK(frame, wind);
+
+ CHANGELOG_INIT(this, frame->local, loc->inode, loc->inode->gfid, 1);
+
+ co = changelog_get_usable_buffer(frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length(frame->local, xtra_len, 1);
+
+wind:
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
+}
+
+/* {f}setxattr */
+
+int32_t
+changelog_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA_XATTR);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+/* changelog_handle_virtual_xattr:
+ * Handles virtual setxattr 'glusterfs.geo-rep.trigger-sync' on files.
+ * Following is the behaviour based on the value of xattr.
+ * 1: Captures only DATA entry in changelog.
+ * 2: Tries to captures both ENTRY and DATA entry in
+ * changelog. If failed to get pargfid, only DATA
+ * entry is captured.
+ * any other value: ENOTSUP is returned.
+ */
+static void
+changelog_handle_virtual_xattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+ int32_t value = 0;
+ int ret = 0;
+ int dict_ret = 0;
+ gf_boolean_t valid = _gf_false;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ dict_ret = dict_get_int32(dict, GF_XATTR_TRIGGER_SYNC, &value);
+
+ if ((dict_ret == 0 && value == 1) && ((loc->inode->ia_type == IA_IFDIR) ||
+ (loc->inode->ia_type == IA_IFREG)))
+ valid = _gf_true;
+
+ if (valid) {
+ ret = changelog_fill_entry_buf(frame, this, loc, &local);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_ENTRY_BUF_INFO,
+ "gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
+ goto unwind;
+ }
+ changelog_update(this, priv, local, CHANGELOG_TYPE_ENTRY);
+
+ unwind:
+ /* Capture DATA only if it's a file. */
+ if (loc->inode->ia_type != IA_IFDIR)
+ changelog_update(this, priv, frame->local, CHANGELOG_TYPE_DATA);
+ /* Assign local to prev_entry, so unwind will take
+ * care of cleanup. */
+ ((changelog_local_t *)(frame->local))->prev_entry = local;
+ CHANGELOG_STACK_UNWIND(setxattr, frame, 0, 0, NULL);
+ return;
+ } else {
+ CHANGELOG_STACK_UNWIND(setxattr, frame, -1, ENOTSUP, NULL);
+ return;
+ }
+}
+
+int32_t
+changelog_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ CHANGELOG_OP_BOUNDARY_CHECK(frame, wind);
+
+ CHANGELOG_INIT(this, frame->local, loc->inode, loc->inode->gfid, 1);
+
+ /* On setting this virtual xattr on a file, an explicit data
+ * sync is triggered from geo-rep as CREATE|DATA entry is
+ * recorded in changelog based on xattr value.
+ */
+ if (dict_get(dict, GF_XATTR_TRIGGER_SYNC)) {
+ changelog_handle_virtual_xattr(frame, this, loc, dict);
+ return 0;
+ }
+
+ co = changelog_get_usable_buffer(frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length(frame->local, xtra_len, 1);
+
+wind:
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+changelog_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA_XATTR);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+ CHANGELOG_IF_INTERNAL_FOP_THEN_GOTO(frame, xdata, wind);
+
+ CHANGELOG_OP_BOUNDARY_CHECK(frame, wind);
+
+ CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 1);
+
+ co = changelog_get_usable_buffer(frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length(frame->local, xtra_len, 1);
+
+wind:
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+changelog_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(xattrop, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+ int ret = 0;
+ void *size_attr = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+ ret = dict_get_ptr(xattr, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
+ if (ret)
+ goto wind;
+
+ CHANGELOG_OP_BOUNDARY_CHECK(frame, wind);
+
+ CHANGELOG_INIT(this, frame->local, loc->inode, loc->inode->gfid, 1);
+
+ co = changelog_get_usable_buffer(frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length(frame->local, xtra_len, 1);
+
+wind:
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, optype, xattr, xdata);
+ return 0;
+}
+
+int32_t
+changelog_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_METADATA_XATTR);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(fxattrop, frame, op_ret, op_errno, xattr, xdata);
+
+ return 0;
+}
+
+int32_t
+changelog_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_opt_t *co = NULL;
+ size_t xtra_len = 0;
+ void *size_attr = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+ ret = dict_get_ptr(xattr, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
+ if (ret)
+ goto wind;
+
+ CHANGELOG_OP_BOUNDARY_CHECK(frame, wind);
+
+ CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 1);
+
+ co = changelog_get_usable_buffer(frame->local);
+ if (!co)
+ goto wind;
+
+ CHANGLOG_FILL_FOP_NUMBER(co, frame->root->op, fop_fn, xtra_len);
+
+ changelog_set_usable_record_and_length(frame->local, xtra_len, 1);
+
+wind:
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_fxattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, optype, xattr, xdata);
+ return 0;
+}
+/* }}} */
+
+/* Data modification fops - TYPE I */
+
+/* {{{ */
+
+/* {f}truncate() */
+
+int32_t
+changelog_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_DATA);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int32_t
+changelog_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ CHANGELOG_INIT(this, frame->local, loc->inode, loc->inode->gfid, 0);
+ LOCK(&priv->c_snap_lock);
+ {
+ if (priv->c_snap_fd != -1 && priv->barrier_enabled == _gf_true) {
+ changelog_snap_handle_ascii_change(
+ this, &(((changelog_local_t *)(frame->local))->cld));
+ }
+ }
+ UNLOCK(&priv->c_snap_lock);
+
+wind:
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+}
+
+int32_t
+changelog_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !local), unwind);
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_DATA);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int32_t
+changelog_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 0);
+ LOCK(&priv->c_snap_lock);
+ {
+ if (priv->c_snap_fd != -1 && priv->barrier_enabled == _gf_true) {
+ changelog_snap_handle_ascii_change(
+ this, &(((changelog_local_t *)(frame->local))->cld));
+ }
+ }
+ UNLOCK(&priv->c_snap_lock);
+
+wind:
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+}
+
+/* writev() */
+
+int32_t
+changelog_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+ changelog_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret <= 0) || !local), unwind);
+
+ changelog_update(this, priv, local, CHANGELOG_TYPE_DATA);
+
+unwind:
+ changelog_dec_fop_cnt(this, priv, local);
+ CHANGELOG_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int32_t
+changelog_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ CHANGELOG_INIT(this, frame->local, fd->inode, fd->inode->gfid, 0);
+ LOCK(&priv->c_snap_lock);
+ {
+ if (priv->c_snap_fd != -1 && priv->barrier_enabled == _gf_true) {
+ changelog_snap_handle_ascii_change(
+ this, &(((changelog_local_t *)(frame->local))->cld));
+ }
+ }
+ UNLOCK(&priv->c_snap_lock);
+
+wind:
+ changelog_color_fop_and_inc_cnt(this, priv, frame->local);
+ STACK_WIND(frame, changelog_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
+ return 0;
+}
+
+/* }}} */
+
+/* open, release and other beasts */
+
+/* {{{ */
+
+int
+changelog_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, fd_t *fd, dict_t *xdata)
+{
+ int ret = 0;
+ changelog_priv_t *priv = NULL;
+ changelog_event_t ev = {
+ 0,
+ };
+ gf_boolean_t logopen = _gf_false;
+
+ priv = this->private;
+ if (frame->local) {
+ frame->local = NULL;
+ logopen = _gf_true;
+ }
+
+ CHANGELOG_COND_GOTO(priv, ((op_ret < 0) || !logopen), unwind);
+
+ /* fill the event structure */
+ ev.ev_type = CHANGELOG_OP_TYPE_OPEN;
+ gf_uuid_copy(ev.u.open.gfid, fd->inode->gfid);
+ ev.u.open.flags = fd->flags;
+ changelog_dispatch_event(this, priv, &ev);
+
+ if (changelog_ev_selected(this, &priv->ev_selection,
+ CHANGELOG_OP_TYPE_RELEASE)) {
+ ret = fd_ctx_set(fd, this, (uint64_t)(long)0x1);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_WARNING, 0, CHANGELOG_MSG_SET_FD_CONTEXT,
+ NULL);
+ }
+
+unwind:
+ CHANGELOG_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int
+changelog_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ fd_t *fd, dict_t *xdata)
+{
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+ CHANGELOG_NOT_ACTIVE_THEN_GOTO(frame, priv, wind);
+
+ frame->local = (void *)0x1; /* do not dereference in ->cbk */
+
+wind:
+ STACK_WIND(frame, changelog_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+/* }}} */
+
+/* {{{ */
+
+/* }}} */
+
+int32_t
+_changelog_generic_dispatcher(dict_t *dict, char *key, data_t *value,
+ void *data)
+{
+ xlator_t *this = NULL;
+ changelog_priv_t *priv = NULL;
+
+ this = data;
+ priv = this->private;
+
+ changelog_dispatch_event(this, priv, (changelog_event_t *)value->data);
+ return 0;
+}
+
+/**
+ * changelog ipc dispatches events, pointers of which are passed in
+ * @xdata. Dispatching is orderless (whatever order dict_foreach()
+ * traverses the dictionary).
+ */
+int32_t
+changelog_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
+{
+ if (op != GF_IPC_TARGET_CHANGELOG)
+ goto wind;
+
+ /* it's for us, do the job */
+ if (xdata)
+ (void)dict_foreach(xdata, _changelog_generic_dispatcher, this);
+
+ STACK_UNWIND_STRICT(ipc, frame, 0, 0, NULL);
+ return 0;
+
+wind:
+ STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ipc, op, xdata);
+ return 0;
+}
+
+/* {{{ */
+
+int32_t
+changelog_release(xlator_t *this, fd_t *fd)
+{
+ changelog_event_t ev = {
+ 0,
+ };
+ changelog_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ ev.ev_type = CHANGELOG_OP_TYPE_RELEASE;
+ gf_uuid_copy(ev.u.release.gfid, fd->inode->gfid);
+ changelog_dispatch_event(this, priv, &ev);
+
+ (void)fd_ctx_del(fd, this, NULL);
+
+ return 0;
+}
+
+/* }}} */
+
+/**
+ * The
+ * - @init ()
+ * - @fini ()
+ * - @reconfigure ()
+ * ... and helper routines
+ */
+
+/**
+ * needed if there are more operation modes in the future.
+ */
+static void
+changelog_assign_opmode(changelog_priv_t *priv, char *mode)
+{
+ if (strncmp(mode, "realtime", 8) == 0) {
+ priv->op_mode = CHANGELOG_MODE_RT;
+ }
+}
+
+static void
+changelog_assign_encoding(changelog_priv_t *priv, char *enc)
+{
+ if (strncmp(enc, "binary", 6) == 0) {
+ priv->encode_mode = CHANGELOG_ENCODE_BINARY;
+ } else if (strncmp(enc, "ascii", 5) == 0) {
+ priv->encode_mode = CHANGELOG_ENCODE_ASCII;
+ }
+}
+
+static void
+changelog_assign_barrier_timeout(changelog_priv_t *priv, uint32_t timeout)
+{
+ LOCK(&priv->lock);
+ {
+ priv->timeout.tv_sec = timeout;
+ }
+ UNLOCK(&priv->lock);
+}
+
+/* cleanup any helper threads that are running */
+static void
+changelog_cleanup_helper_threads(xlator_t *this, changelog_priv_t *priv)
+{
+ if (priv->cr.rollover_th) {
+ (void)changelog_thread_cleanup(this, priv->cr.rollover_th);
+ priv->cr.rollover_th = 0;
+ }
+
+ if (priv->cf.fsync_th) {
+ (void)changelog_thread_cleanup(this, priv->cf.fsync_th);
+ priv->cf.fsync_th = 0;
+ }
+}
+
+/* spawn helper thread; cleaning up in case of errors */
+static int
+changelog_spawn_helper_threads(xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ /* Geo-Rep snapshot dependency:
+ *
+ * To implement explicit rollover of changlog journal on barrier
+ * notification, a pipe is created to communicate between
+ * 'changelog_rollover' thread and changelog main thread. The select
+ * call used to wait till roll-over time in changelog_rollover thread
+ * is modified to wait on read end of the pipe. When barrier
+ * notification comes (i.e, in 'reconfigure'), select in
+ * changelog_rollover thread is woken up explicitly by writing into
+ * the write end of the pipe in 'reconfigure'.
+ */
+
+ priv->cr.notify = _gf_false;
+ priv->cr.this = this;
+ ret = gf_thread_create(&priv->cr.rollover_th, NULL, changelog_rollover,
+ priv, "clogro");
+ if (ret)
+ goto out;
+
+ if (priv->fsync_interval) {
+ priv->cf.this = this;
+ ret = gf_thread_create(&priv->cf.fsync_th, NULL, changelog_fsync_thread,
+ priv, "clogfsyn");
+ }
+
+ if (ret)
+ changelog_cleanup_helper_threads(this, priv);
+
+out:
+ return ret;
+}
+
+int
+notify(xlator_t *this, int event, void *data, ...)
+{
+ changelog_priv_t *priv = NULL;
+ dict_t *dict = NULL;
+ char buf[1] = {1};
+ int barrier = DICT_DEFAULT;
+ gf_boolean_t bclean_req = _gf_false;
+ int ret = 0;
+ int ret1 = 0;
+ struct list_head queue = {
+ 0,
+ };
+ uint64_t xprtcnt = 0;
+ uint64_t clntcnt = 0;
+ changelog_clnt_t *conn = NULL;
+ gf_boolean_t cleanup_notify = _gf_false;
+ char sockfile[UNIX_PATH_MAX] = {
+ 0,
+ };
+ rpcsvc_listener_t *listener = NULL;
+ rpcsvc_listener_t *next = NULL;
+
+ INIT_LIST_HEAD(&queue);
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ if (event == GF_EVENT_PARENT_DOWN) {
+ priv->victim = data;
+ gf_log(this->name, GF_LOG_INFO,
+ "cleanup changelog rpc connection of brick %s",
+ priv->victim->name);
+
+ if (priv->rpc_active) {
+ this->cleanup_starting = 1;
+ changelog_destroy_rpc_listner(this, priv);
+ conn = &priv->connections;
+ if (conn)
+ changelog_ev_cleanup_connections(this, conn);
+ xprtcnt = GF_ATOMIC_GET(priv->xprtcnt);
+ clntcnt = GF_ATOMIC_GET(priv->clntcnt);
+ if (!xprtcnt && !clntcnt) {
+ LOCK(&priv->lock);
+ {
+ cleanup_notify = priv->notify_down;
+ priv->notify_down = _gf_true;
+ }
+ UNLOCK(&priv->lock);
+ if (priv->rpc) {
+ list_for_each_entry_safe(listener, next,
+ &priv->rpc->listeners, list)
+ {
+ if (listener->trans) {
+ rpc_transport_unref(listener->trans);
+ }
+ }
+ rpcsvc_destroy(priv->rpc);
+ priv->rpc = NULL;
+ }
+ CHANGELOG_MAKE_SOCKET_PATH(priv->changelog_brick, sockfile,
+ UNIX_PATH_MAX);
+ sys_unlink(sockfile);
+ if (!cleanup_notify)
+ default_notify(this, GF_EVENT_PARENT_DOWN, data);
+ }
+ } else {
+ default_notify(this, GF_EVENT_PARENT_DOWN, data);
+ }
+ goto out;
+ }
+
+ if (event == GF_EVENT_TRANSLATOR_OP) {
+ dict = data;
+
+ barrier = dict_get_str_boolean(dict, "barrier", DICT_DEFAULT);
+
+ switch (barrier) {
+ case DICT_ERROR:
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_DICT_GET_FAILED, "dict_get_str_boolean",
+ NULL);
+ ret = -1;
+ goto out;
+
+ case BARRIER_OFF:
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_STATE_NOTIFY, "off", NULL);
+
+ CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, out);
+ LOCK(&priv->c_snap_lock);
+ {
+ changelog_snap_logging_stop(this, priv);
+ }
+ UNLOCK(&priv->c_snap_lock);
+
+ LOCK(&priv->bflags.lock);
+ {
+ if (priv->bflags.barrier_ext == _gf_false)
+ ret = -1;
+ }
+ UNLOCK(&priv->bflags.lock);
+
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_BARRIER_ERROR, NULL);
+ goto out;
+ }
+
+ /* Stop changelog barrier and dequeue all fops */
+ LOCK(&priv->lock);
+ {
+ if (priv->barrier_enabled == _gf_true)
+ __chlog_barrier_disable(this, &queue);
+ else
+ ret = -1;
+ }
+ UNLOCK(&priv->lock);
+ /* If ret = -1, then changelog barrier is already
+ * disabled because of error or timeout.
+ */
+ if (ret == 0) {
+ chlog_barrier_dequeue_all(this, &queue);
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_DISABLED, NULL);
+ } else {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_BARRIER_ALREADY_DISABLED, NULL);
+ }
+
+ LOCK(&priv->bflags.lock);
+ {
+ priv->bflags.barrier_ext = _gf_false;
+ }
+ UNLOCK(&priv->bflags.lock);
+
+ goto out;
+
+ case BARRIER_ON:
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_STATE_NOTIFY, "on", NULL);
+
+ CHANGELOG_NOT_ON_THEN_GOTO(priv, ret, out);
+ LOCK(&priv->c_snap_lock);
+ {
+ changelog_snap_logging_start(this, priv);
+ }
+ UNLOCK(&priv->c_snap_lock);
+
+ LOCK(&priv->bflags.lock);
+ {
+ if (priv->bflags.barrier_ext == _gf_true)
+ ret = -1;
+ else
+ priv->bflags.barrier_ext = _gf_true;
+ }
+ UNLOCK(&priv->bflags.lock);
+
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_BARRIER_ON_ERROR, NULL);
+ goto out;
+ }
+
+ ret = pthread_mutex_lock(&priv->bn.bnotify_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret, out, bclean_req);
+ {
+ priv->bn.bnotify = _gf_true;
+ }
+ ret = pthread_mutex_unlock(&priv->bn.bnotify_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret, out, bclean_req);
+
+ /* Start changelog barrier */
+ LOCK(&priv->lock);
+ {
+ ret = __chlog_barrier_enable(this, priv);
+ }
+ UNLOCK(&priv->lock);
+ if (ret == -1) {
+ changelog_barrier_cleanup(this, priv, &queue);
+ goto out;
+ }
+
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BARRIER_ENABLE, NULL);
+
+ ret = changelog_barrier_notify(priv, buf);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_WRITE_FAILED, "Explicit roll over",
+ NULL);
+ changelog_barrier_cleanup(this, priv, &queue);
+ ret = -1;
+ goto out;
+ }
+
+ ret = pthread_mutex_lock(&priv->bn.bnotify_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret, out, bclean_req);
+ {
+ /* The while condition check is required here to
+ * handle spurious wakeup of cond wait that can
+ * happen with pthreads. See man page */
+ while (priv->bn.bnotify == _gf_true) {
+ ret = pthread_cond_wait(&priv->bn.bnotify_cond,
+ &priv->bn.bnotify_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret, out, bclean_req);
+ }
+ if (priv->bn.bnotify_error == _gf_true) {
+ ret = -1;
+ priv->bn.bnotify_error = _gf_false;
+ }
+ }
+ ret1 = pthread_mutex_unlock(&priv->bn.bnotify_mutex);
+ CHANGELOG_PTHREAD_ERROR_HANDLE_1(ret1, out, bclean_req);
+ gf_smsg(this->name, GF_LOG_INFO, 0,
+ CHANGELOG_MSG_BNOTIFY_COND_INFO, NULL);
+
+ goto out;
+
+ case DICT_DEFAULT:
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ CHANGELOG_MSG_BARRIER_KEY_NOT_FOUND, NULL);
+ ret = -1;
+ goto out;
+
+ default:
+ gf_smsg(this->name, GF_LOG_ERROR, EINVAL,
+ CHANGELOG_MSG_ERROR_IN_DICT_GET, NULL);
+ ret = -1;
+ goto out;
+ }
+ } else {
+ ret = default_notify(this, event, data);
+ }
+
+out:
+ if (bclean_req)
+ changelog_barrier_cleanup(this, priv, &queue);
+
+ return ret;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_changelog_mt_end + 1);
+
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM,
+ CHANGELOG_MSG_MEMORY_INIT_FAILED, NULL);
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+changelog_init(xlator_t *this, changelog_priv_t *priv)
+{
+ int i = 0;
+ int ret = 0;
+ changelog_log_data_t cld = {
+ 0,
+ };
+
+ priv->maps[CHANGELOG_TYPE_DATA] = "D ";
+ priv->maps[CHANGELOG_TYPE_METADATA] = "M ";
+ priv->maps[CHANGELOG_TYPE_METADATA_XATTR] = "M ";
+ priv->maps[CHANGELOG_TYPE_ENTRY] = "E ";
+
+ for (; i < CHANGELOG_MAX_TYPE; i++) {
+ /* start with version 1 */
+ priv->slice.changelog_version[i] = 1;
+ }
+
+ if (!priv->active)
+ return ret;
+
+ /**
+ * start with a fresh changelog file every time. this is done
+ * in case there was an encoding change. so... things are kept
+ * simple here.
+ */
+ changelog_fill_rollover_data(&cld, _gf_false);
+
+ ret = htime_open(this, priv, cld.cld_roll_time);
+ /* call htime open with cld's rollover_time */
+ if (ret)
+ goto out;
+
+ LOCK(&priv->lock);
+ {
+ ret = changelog_inject_single_event(this, priv, &cld);
+ }
+ UNLOCK(&priv->lock);
+
+ /* ... and finally spawn the helpers threads */
+ ret = changelog_spawn_helper_threads(this, priv);
+
+out:
+ return ret;
+}
+
+/**
+ * Init barrier related condition variables and locks
+ */
+static int
+changelog_barrier_pthread_init(xlator_t *this, changelog_priv_t *priv)
+{
+ gf_boolean_t bn_mutex_init = _gf_false;
+ gf_boolean_t bn_cond_init = _gf_false;
+ gf_boolean_t dm_mutex_black_init = _gf_false;
+ gf_boolean_t dm_cond_black_init = _gf_false;
+ gf_boolean_t dm_mutex_white_init = _gf_false;
+ gf_boolean_t dm_cond_white_init = _gf_false;
+ gf_boolean_t cr_mutex_init = _gf_false;
+ gf_boolean_t cr_cond_init = _gf_false;
+ int ret = 0;
+
+ if ((ret = pthread_mutex_init(&priv->bn.bnotify_mutex, NULL)) != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=bnotify",
+ "ret=%d", ret, NULL);
+ ret = -1;
+ goto out;
+ }
+ bn_mutex_init = _gf_true;
+
+ if ((ret = pthread_cond_init(&priv->bn.bnotify_cond, NULL)) != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=bnotify",
+ "ret=%d", ret, NULL);
+ ret = -1;
+ goto out;
+ }
+ bn_cond_init = _gf_true;
+
+ if ((ret = pthread_mutex_init(&priv->dm.drain_black_mutex, NULL)) != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=drain_black",
+ "ret=%d", ret, NULL);
+ ret = -1;
+ goto out;
+ }
+ dm_mutex_black_init = _gf_true;
+
+ if ((ret = pthread_cond_init(&priv->dm.drain_black_cond, NULL)) != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=drain_black",
+ "ret=%d", ret, NULL);
+ ret = -1;
+ goto out;
+ }
+ dm_cond_black_init = _gf_true;
+
+ if ((ret = pthread_mutex_init(&priv->dm.drain_white_mutex, NULL)) != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED, "name=drain_white",
+ "ret=%d", ret, NULL);
+ ret = -1;
+ goto out;
+ }
+ dm_mutex_white_init = _gf_true;
+
+ if ((ret = pthread_cond_init(&priv->dm.drain_white_cond, NULL)) != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED, "name=drain_white",
+ "ret=%d", ret, NULL);
+ ret = -1;
+ goto out;
+ }
+ dm_cond_white_init = _gf_true;
+
+ if ((pthread_mutex_init(&priv->cr.lock, NULL)) != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_MUTEX_INIT_FAILED,
+ "name=changelog_rollover", "ret=%d", ret, NULL);
+ ret = -1;
+ goto out;
+ }
+ cr_mutex_init = _gf_true;
+
+ if ((pthread_cond_init(&priv->cr.cond, NULL)) != 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ CHANGELOG_MSG_PTHREAD_COND_INIT_FAILED,
+ "changelog_rollover cond init failed", "ret=%d", ret, NULL);
+ ret = -1;
+ goto out;
+ }
+ cr_cond_init = _gf_true;
+out:
+ if (ret) {
+ if (bn_mutex_init)
+ pthread_mutex_destroy(&priv->bn.bnotify_mutex);
+ if (bn_cond_init)
+ pthread_cond_destroy(&priv->bn.bnotify_cond);
+ if (dm_mutex_black_init)
+ pthread_mutex_destroy(&priv->dm.drain_black_mutex);
+ if (dm_cond_black_init)
+ pthread_cond_destroy(&priv->dm.drain_black_cond);
+ if (dm_mutex_white_init)
+ pthread_mutex_destroy(&priv->dm.drain_white_mutex);
+ if (dm_cond_white_init)
+ pthread_cond_destroy(&priv->dm.drain_white_cond);
+ if (cr_mutex_init)
+ pthread_mutex_destroy(&priv->cr.lock);
+ if (cr_cond_init)
+ pthread_cond_destroy(&priv->cr.cond);
+ }
+ return ret;
+}
+
+/* Destroy barrier related condition variables and locks */
+static void
+changelog_barrier_pthread_destroy(changelog_priv_t *priv)
+{
+ pthread_mutex_destroy(&priv->bn.bnotify_mutex);
+ pthread_cond_destroy(&priv->bn.bnotify_cond);
+ pthread_mutex_destroy(&priv->dm.drain_black_mutex);
+ pthread_cond_destroy(&priv->dm.drain_black_cond);
+ pthread_mutex_destroy(&priv->dm.drain_white_mutex);
+ pthread_cond_destroy(&priv->dm.drain_white_cond);
+ pthread_mutex_destroy(&priv->cr.lock);
+ pthread_cond_destroy(&priv->cr.cond);
+ LOCK_DESTROY(&priv->bflags.lock);
+}
+
+static void
+changelog_cleanup_rpc(xlator_t *this, changelog_priv_t *priv)
+{
+ /* terminate rpc server */
+ if (!this->cleanup_starting)
+ changelog_destroy_rpc_listner(this, priv);
+
+ (void)changelog_cleanup_rpc_threads(this, priv);
+ /* cleanup rot buffs */
+ rbuf_dtor(priv->rbuf);
+
+ /* cleanup poller thread */
+ if (priv->poller)
+ (void)changelog_thread_cleanup(this, priv->poller);
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ int ret = 0;
+ char *tmp = NULL;
+ changelog_priv_t *priv = NULL;
+ gf_boolean_t active_earlier = _gf_true;
+ gf_boolean_t active_now = _gf_true;
+ gf_boolean_t rpc_active_earlier = _gf_true;
+ gf_boolean_t rpc_active_now = _gf_true;
+ gf_boolean_t iniate_rpc = _gf_false;
+ changelog_time_slice_t *slice = NULL;
+ changelog_log_data_t cld = {
+ 0,
+ };
+ char htime_dir[PATH_MAX] = {
+ 0,
+ };
+ char csnap_dir[PATH_MAX] = {
+ 0,
+ };
+ uint32_t timeout = 0;
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ ret = -1;
+ active_earlier = priv->active;
+ rpc_active_earlier = priv->rpc_active;
+
+ /* first stop the rollover and the fsync thread */
+ changelog_cleanup_helper_threads(this, priv);
+
+ GF_OPTION_RECONF("changelog-dir", tmp, options, str, out);
+ if (!tmp) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_DIR_OPTIONS_NOT_SET,
+ NULL);
+ goto out;
+ }
+
+ GF_FREE(priv->changelog_dir);
+ priv->changelog_dir = gf_strdup(tmp);
+ if (!priv->changelog_dir)
+ goto out;
+
+ ret = mkdir_p(priv->changelog_dir, 0600, _gf_true);
+
+ if (ret)
+ goto out;
+ CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, htime_dir);
+ ret = mkdir_p(htime_dir, 0600, _gf_true);
+
+ if (ret)
+ goto out;
+
+ CHANGELOG_FILL_CSNAP_DIR(priv->changelog_dir, csnap_dir);
+ ret = mkdir_p(csnap_dir, 0600, _gf_true);
+
+ if (ret)
+ goto out;
+
+ GF_OPTION_RECONF("changelog", active_now, options, bool, out);
+ GF_OPTION_RECONF("changelog-notification", rpc_active_now, options, bool,
+ out);
+
+ /* If journalling is enabled, enable rpc notifications */
+ if (active_now && !active_earlier) {
+ if (!rpc_active_earlier)
+ iniate_rpc = _gf_true;
+ }
+
+ if (rpc_active_now && !rpc_active_earlier) {
+ iniate_rpc = _gf_true;
+ }
+
+ /* TODO: Disable of changelog-notifications is not supported for now
+ * as there is no clean way of cleaning up of rpc resources
+ */
+
+ if (iniate_rpc) {
+ ret = changelog_init_rpc(this, priv);
+ if (ret)
+ goto out;
+ priv->rpc_active = _gf_true;
+ }
+
+ /**
+ * changelog_handle_change() handles changes that could possibly
+ * have been submit changes before changelog deactivation.
+ */
+ if (!active_now)
+ priv->active = _gf_false;
+
+ GF_OPTION_RECONF("op-mode", tmp, options, str, out);
+ changelog_assign_opmode(priv, tmp);
+
+ tmp = NULL;
+
+ GF_OPTION_RECONF("encoding", tmp, options, str, out);
+ changelog_assign_encoding(priv, tmp);
+
+ GF_OPTION_RECONF("rollover-time", priv->rollover_time, options, int32, out);
+ GF_OPTION_RECONF("fsync-interval", priv->fsync_interval, options, int32,
+ out);
+ GF_OPTION_RECONF("changelog-barrier-timeout", timeout, options, time, out);
+ changelog_assign_barrier_timeout(priv, timeout);
+
+ GF_OPTION_RECONF("capture-del-path", priv->capture_del_path, options, bool,
+ out);
+
+ if (active_now || active_earlier) {
+ changelog_fill_rollover_data(&cld, !active_now);
+
+ slice = &priv->slice;
+
+ LOCK(&priv->lock);
+ {
+ ret = changelog_inject_single_event(this, priv, &cld);
+ if (!ret && active_now)
+ SLICE_VERSION_UPDATE(slice);
+ }
+ UNLOCK(&priv->lock);
+
+ if (ret)
+ goto out;
+
+ if (active_now) {
+ if (!active_earlier) {
+ gf_smsg(this->name, GF_LOG_INFO, 0, CHANGELOG_MSG_RECONFIGURE,
+ NULL);
+ htime_create(this, priv, gf_time());
+ }
+ ret = changelog_spawn_helper_threads(this, priv);
+ }
+ }
+
+out:
+ if (ret) {
+ /* TODO */
+ } else {
+ gf_msg_debug(this->name, 0, "changelog reconfigured");
+ if (active_now && priv)
+ priv->active = _gf_true;
+ }
+
+ return ret;
+}
+
+static void
+changelog_freeup_options(xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+
+ ret = priv->cb->dtor(this, &priv->cd);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_FREEUP_FAILED, NULL);
+ GF_FREE(priv->changelog_brick);
+ GF_FREE(priv->changelog_dir);
+}
+
+static int
+changelog_init_options(xlator_t *this, changelog_priv_t *priv)
+{
+ int ret = 0;
+ char *tmp = NULL;
+ uint32_t timeout = 0;
+ char htime_dir[PATH_MAX] = {
+ 0,
+ };
+ char csnap_dir[PATH_MAX] = {
+ 0,
+ };
+
+ GF_OPTION_INIT("changelog-brick", tmp, str, error_return);
+ priv->changelog_brick = gf_strdup(tmp);
+ if (!priv->changelog_brick)
+ goto error_return;
+
+ tmp = NULL;
+
+ GF_OPTION_INIT("changelog-dir", tmp, str, dealloc_1);
+ priv->changelog_dir = gf_strdup(tmp);
+ if (!priv->changelog_dir)
+ goto dealloc_1;
+
+ tmp = NULL;
+
+ /**
+ * create the directory even if change-logging would be inactive
+ * so that consumers can _look_ into it (finding nothing...)
+ */
+ ret = mkdir_p(priv->changelog_dir, 0600, _gf_true);
+
+ if (ret)
+ goto dealloc_2;
+
+ CHANGELOG_FILL_HTIME_DIR(priv->changelog_dir, htime_dir);
+ ret = mkdir_p(htime_dir, 0600, _gf_true);
+ if (ret)
+ goto dealloc_2;
+
+ CHANGELOG_FILL_CSNAP_DIR(priv->changelog_dir, csnap_dir);
+ ret = mkdir_p(csnap_dir, 0600, _gf_true);
+ if (ret)
+ goto dealloc_2;
+
+ GF_OPTION_INIT("changelog", priv->active, bool, dealloc_2);
+ GF_OPTION_INIT("changelog-notification", priv->rpc_active, bool, dealloc_2);
+ GF_OPTION_INIT("capture-del-path", priv->capture_del_path, bool, dealloc_2);
+
+ GF_OPTION_INIT("op-mode", tmp, str, dealloc_2);
+ changelog_assign_opmode(priv, tmp);
+
+ tmp = NULL;
+
+ GF_OPTION_INIT("encoding", tmp, str, dealloc_2);
+ changelog_assign_encoding(priv, tmp);
+ changelog_encode_change(priv);
+
+ GF_OPTION_INIT("rollover-time", priv->rollover_time, int32, dealloc_2);
+
+ GF_OPTION_INIT("fsync-interval", priv->fsync_interval, int32, dealloc_2);
+
+ GF_OPTION_INIT("changelog-barrier-timeout", timeout, time, dealloc_2);
+ changelog_assign_barrier_timeout(priv, timeout);
+
+ GF_ASSERT(cb_bootstrap[priv->op_mode].mode == priv->op_mode);
+ priv->cb = &cb_bootstrap[priv->op_mode];
+
+ /* ... now bootstrap the logger */
+ ret = priv->cb->ctor(this, &priv->cd);
+ if (ret)
+ goto dealloc_2;
+
+ priv->changelog_fd = -1;
+
+ return 0;
+
+dealloc_2:
+ GF_FREE(priv->changelog_dir);
+dealloc_1:
+ GF_FREE(priv->changelog_brick);
+error_return:
+ return -1;
+}
+
+static int
+changelog_init_rpc(xlator_t *this, changelog_priv_t *priv)
+{
+ rpcsvc_t *rpc = NULL;
+ changelog_ev_selector_t *selection = NULL;
+
+ selection = &priv->ev_selection;
+
+ /* initialize event selection */
+ changelog_init_event_selection(this, selection);
+
+ priv->rbuf = rbuf_init(NR_ROTT_BUFFS);
+ if (!priv->rbuf)
+ goto cleanup_thread;
+
+ rpc = changelog_init_rpc_listener(this, priv, priv->rbuf, NR_DISPATCHERS);
+ if (!rpc)
+ goto cleanup_rbuf;
+ priv->rpc = rpc;
+
+ return 0;
+
+cleanup_rbuf:
+ rbuf_dtor(priv->rbuf);
+cleanup_thread:
+ if (priv->poller)
+ (void)changelog_thread_cleanup(this, priv->poller);
+
+ return -1;
+}
+
+int32_t
+init(xlator_t *this)
+{
+ int ret = -1;
+ changelog_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("changelog", this, error_return);
+
+ if (!this->children || this->children->next) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_CHILD_MISCONFIGURED,
+ NULL);
+ goto error_return;
+ }
+
+ if (!this->parents) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, CHANGELOG_MSG_VOL_MISCONFIGURED,
+ NULL);
+ goto error_return;
+ }
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_changelog_mt_priv_t);
+ if (!priv)
+ goto error_return;
+
+ this->local_pool = mem_pool_new(changelog_local_t, 64);
+ if (!this->local_pool) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, CHANGELOG_MSG_NO_MEMORY,
+ NULL);
+ goto cleanup_priv;
+ }
+
+ LOCK_INIT(&priv->lock);
+ LOCK_INIT(&priv->c_snap_lock);
+ GF_ATOMIC_INIT(priv->listnercnt, 0);
+ GF_ATOMIC_INIT(priv->clntcnt, 0);
+ GF_ATOMIC_INIT(priv->xprtcnt, 0);
+ INIT_LIST_HEAD(&priv->xprt_list);
+ priv->htime_fd = -1;
+
+ ret = changelog_init_options(this, priv);
+ if (ret)
+ goto cleanup_mempool;
+
+ /* snap dependency changes */
+ priv->dm.black_fop_cnt = 0;
+ priv->dm.white_fop_cnt = 0;
+ priv->dm.drain_wait_black = _gf_false;
+ priv->dm.drain_wait_white = _gf_false;
+ priv->current_color = FOP_COLOR_BLACK;
+ priv->explicit_rollover = _gf_false;
+
+ priv->cr.notify = _gf_false;
+ /* Mutex is not needed as threads are not spawned yet */
+ priv->bn.bnotify = _gf_false;
+ priv->bn.bnotify_error = _gf_false;
+ ret = changelog_barrier_pthread_init(this, priv);
+ if (ret)
+ goto cleanup_options;
+ LOCK_INIT(&priv->bflags.lock);
+ priv->bflags.barrier_ext = _gf_false;
+
+ /* Changelog barrier init */
+ INIT_LIST_HEAD(&priv->queue);
+ priv->barrier_enabled = _gf_false;
+
+ if (priv->rpc_active || priv->active) {
+ /* RPC ball rolling.. */
+ ret = changelog_init_rpc(this, priv);
+ if (ret)
+ goto cleanup_barrier;
+ priv->rpc_active = _gf_true;
+ }
+
+ ret = changelog_init(this, priv);
+ if (ret)
+ goto cleanup_rpc;
+
+ gf_msg_debug(this->name, 0, "changelog translator loaded");
+
+ this->private = priv;
+ return 0;
+
+cleanup_rpc:
+ if (priv->rpc_active) {
+ changelog_cleanup_rpc(this, priv);
+ }
+cleanup_barrier:
+ changelog_barrier_pthread_destroy(priv);
+cleanup_options:
+ changelog_freeup_options(this, priv);
+cleanup_mempool:
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+cleanup_priv:
+ GF_FREE(priv);
+error_return:
+ this->private = NULL;
+ return -1;
+}
+
+void
+fini(xlator_t *this)
+{
+ changelog_priv_t *priv = NULL;
+ struct list_head queue = {
+ 0,
+ };
+
+ priv = this->private;
+
+ if (priv) {
+ if (priv->active || priv->rpc_active) {
+ /* terminate RPC server/threads */
+ changelog_cleanup_rpc(this, priv);
+ GF_FREE(priv->ev_dispatcher);
+ }
+ /* call barrier_disable to cancel timer */
+ if (priv->barrier_enabled)
+ __chlog_barrier_disable(this, &queue);
+
+ /* cleanup barrier related objects */
+ changelog_barrier_pthread_destroy(priv);
+
+ /* cleanup helper threads */
+ changelog_cleanup_helper_threads(this, priv);
+
+ /* cleanup allocated options */
+ changelog_freeup_options(this, priv);
+
+ /* deallocate mempool */
+ mem_pool_destroy(this->local_pool);
+
+ if (priv->htime_fd != -1) {
+ sys_close(priv->htime_fd);
+ }
+
+ /* finally, dealloac private variable */
+ GF_FREE(priv);
+ }
+
+ this->private = NULL;
+ this->local_pool = NULL;
+
+ return;
+}
+
+struct xlator_fops fops = {
+ .open = changelog_open,
+ .mknod = changelog_mknod,
+ .mkdir = changelog_mkdir,
+ .create = changelog_create,
+ .symlink = changelog_symlink,
+ .writev = changelog_writev,
+ .truncate = changelog_truncate,
+ .ftruncate = changelog_ftruncate,
+ .link = changelog_link,
+ .rename = changelog_rename,
+ .unlink = changelog_unlink,
+ .rmdir = changelog_rmdir,
+ .setattr = changelog_setattr,
+ .fsetattr = changelog_fsetattr,
+ .setxattr = changelog_setxattr,
+ .fsetxattr = changelog_fsetxattr,
+ .removexattr = changelog_removexattr,
+ .fremovexattr = changelog_fremovexattr,
+ .ipc = changelog_ipc,
+ .xattrop = changelog_xattrop,
+ .fxattrop = changelog_fxattrop,
+};
+
+struct xlator_cbks cbks = {
+ .forget = changelog_forget,
+ .release = changelog_release,
+};
+
+struct volume_options options[] = {
+ {.key = {"changelog"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable change-logging",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE,
+ .level = OPT_STATUS_BASIC,
+ .tags = {"journal", "georep", "glusterfind"}},
+ {.key = {"changelog-notification"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable changelog live notification",
+ .op_version = {3},
+ .level = OPT_STATUS_BASIC,
+ .tags = {"bitrot", "georep"}},
+ {.key = {"changelog-brick"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "brick path to generate unique socket file name."
+ " should be the export directory of the volume strictly.",
+ .default_value = "{{ brick.path }}",
+ .op_version = {3},
+ .tags = {"journal"}},
+ {.key = {"changelog-dir"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "directory for the changelog files",
+ .default_value = "{{ brick.path }}/.glusterfs/changelogs",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE,
+ .level = OPT_STATUS_ADVANCED,
+ .tags = {"journal", "georep", "glusterfind"}},
+ {.key = {"op-mode"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "realtime",
+ .value = {"realtime"},
+ .description = "operation mode - futuristic operation modes",
+ .op_version = {3},
+ .tags = {"journal"}},
+ {.key = {"encoding"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "ascii",
+ .value = {"binary", "ascii"},
+ .description = "encoding type for changelogs",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE,
+ .level = OPT_STATUS_ADVANCED,
+ .tags = {"journal"}},
+ {.key = {"rollover-time"},
+ .default_value = "15",
+ .type = GF_OPTION_TYPE_TIME,
+ .description = "time to switch to a new changelog file (in seconds)",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE,
+ .level = OPT_STATUS_ADVANCED,
+ .tags = {"journal", "georep", "glusterfind"}},
+ {.key = {"fsync-interval"},
+ .type = GF_OPTION_TYPE_TIME,
+ .default_value = "5",
+ .description = "do not open CHANGELOG file with O_SYNC mode."
+ " instead perform fsync() at specified intervals",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE,
+ .level = OPT_STATUS_ADVANCED,
+ .tags = {"journal"}},
+ {.key = {"changelog-barrier-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .default_value = BARRIER_TIMEOUT,
+ .description = "After 'timeout' seconds since the time 'barrier' "
+ "option was set to \"on\", unlink/rmdir/rename "
+ "operations are no longer blocked and previously "
+ "blocked fops are allowed to go through",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE,
+ .level = OPT_STATUS_ADVANCED,
+ .tags = {"journal"}},
+ {.key = {"capture-del-path"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable capturing paths of deleted entries",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE,
+ .level = OPT_STATUS_BASIC,
+ .tags = {"journal", "glusterfind"}},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "changelog",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/cloudsync/Makefile.am b/xlators/features/cloudsync/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/cloudsync/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/cloudsync/src/Makefile.am b/xlators/features/cloudsync/src/Makefile.am
new file mode 100644
index 00000000000..e2a277e372b
--- /dev/null
+++ b/xlators/features/cloudsync/src/Makefile.am
@@ -0,0 +1,46 @@
+SUBDIRS = cloudsync-plugins
+
+xlator_LTLIBRARIES = cloudsync.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+cloudsync_sources = cloudsync.c
+
+CLOUDSYNC_SRC = $(top_srcdir)/xlators/features/cloudsync/src
+CLOUDSYNC_BLD = $(top_builddir)/xlators/features/cloudsync/src
+
+cloudsynccommon_sources = $(CLOUDSYNC_SRC)/cloudsync-common.c
+
+noinst_HEADERS = $(CLOUDSYNC_BLD)/cloudsync.h \
+ $(CLOUDSYNC_BLD)/cloudsync-mem-types.h \
+ $(CLOUDSYNC_BLD)/cloudsync-messages.h \
+ $(CLOUDSYNC_BLD)/cloudsync-common.h
+
+cloudsync_la_SOURCES = $(cloudsync_sources) $(cloudsynccommon_sources)
+
+nodist_cloudsync_la_SOURCES = cloudsync-autogen-fops.c cloudsync-autogen-fops.h
+BUILT_SOURCES = cloudsync-autogen-fops.h
+
+cloudsync_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+cloudsync_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(LIB_DL)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -DCS_PLUGINDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins\"
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
+
+noinst_PYTHON = cloudsync-fops-c.py cloudsync-fops-h.py
+EXTRA_DIST = cloudsync-autogen-fops-tmpl.c cloudsync-autogen-fops-tmpl.h
+
+cloudsync-autogen-fops.c: cloudsync-fops-c.py cloudsync-autogen-fops-tmpl.c
+ $(PYTHON) $(CLOUDSYNC_SRC)/cloudsync-fops-c.py \
+ $(CLOUDSYNC_SRC)/cloudsync-autogen-fops-tmpl.c > $@
+
+cloudsync-autogen-fops.h: cloudsync-fops-h.py cloudsync-autogen-fops-tmpl.h
+ $(PYTHON) $(CLOUDSYNC_SRC)/cloudsync-fops-h.py \
+ $(CLOUDSYNC_SRC)/cloudsync-autogen-fops-tmpl.h > $@
+
+CLEANFILES = $(nodist_cloudsync_la_SOURCES)
+
+uninstall-local:
+ rm -f $(DESTDIR)$(xlatordir)/cloudsync.so
diff --git a/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c b/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c
new file mode 100644
index 00000000000..ee63f983980
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.c
@@ -0,0 +1,30 @@
+/*
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* File: cloudsync-autogen-fops-tmpl.c
+ * This file contains the CLOUDSYNC autogenerated FOPs. This is run through
+ * the code generator, generator.py to generate the required FOPs.
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <dlfcn.h>
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include "cloudsync.h"
+#include "cloudsync-common.h"
+#include <glusterfs/call-stub.h>
+
+#pragma generate
diff --git a/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h b/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h
new file mode 100644
index 00000000000..d922c77d8aa
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-autogen-fops-tmpl.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2008-2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* File: clousync-autogen-fops-tmpl.h
+ * This file contains the cloudsync autogenerated FOPs declarations.
+ */
+
+#ifndef _CLOUDSYNC_AUTOGEN_FOPS_H
+#define _CLOUDSYNC_AUTOGEN_FOPS_H
+
+#include <glusterfs/xlator.h>
+#include "cloudsync.h"
+#include "cloudsync-common.h"
+
+#pragma generate
+
+#endif /* _CLOUDSYNC_AUTOGEN_FOPS_H */
diff --git a/xlators/features/cloudsync/src/cloudsync-common.c b/xlators/features/cloudsync/src/cloudsync-common.c
new file mode 100644
index 00000000000..445a31b90e7
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-common.c
@@ -0,0 +1,60 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "cloudsync-common.h"
+
+void
+cs_xattrinfo_wipe(cs_local_t *local)
+{
+ if (local->xattrinfo.lxattr) {
+ if (local->xattrinfo.lxattr->file_path)
+ GF_FREE(local->xattrinfo.lxattr->file_path);
+
+ if (local->xattrinfo.lxattr->volname)
+ GF_FREE(local->xattrinfo.lxattr->volname);
+
+ GF_FREE(local->xattrinfo.lxattr);
+ }
+}
+
+void
+cs_local_wipe(xlator_t *this, cs_local_t *local)
+{
+ if (!local)
+ return;
+
+ loc_wipe(&local->loc);
+
+ if (local->fd) {
+ fd_unref(local->fd);
+ local->fd = NULL;
+ }
+
+ if (local->stub) {
+ call_stub_destroy(local->stub);
+ local->stub = NULL;
+ }
+
+ if (local->xattr_req)
+ dict_unref(local->xattr_req);
+
+ if (local->xattr_rsp)
+ dict_unref(local->xattr_rsp);
+
+ if (local->dlfd)
+ fd_unref(local->dlfd);
+
+ if (local->remotepath)
+ GF_FREE(local->remotepath);
+
+ cs_xattrinfo_wipe(local);
+
+ mem_put(local);
+}
diff --git a/xlators/features/cloudsync/src/cloudsync-common.h b/xlators/features/cloudsync/src/cloudsync-common.h
new file mode 100644
index 00000000000..11d233460a4
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-common.h
@@ -0,0 +1,134 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _CLOUDSYNC_COMMON_H
+#define _CLOUDSYNC_COMMON_H
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/compat-errno.h>
+#include "cloudsync-mem-types.h"
+#include "cloudsync-messages.h"
+
+typedef struct cs_loc_xattr {
+ char *file_path;
+ uuid_t uuid;
+ uuid_t gfid;
+ char *volname;
+} cs_loc_xattr_t;
+
+typedef struct cs_size_xattr {
+ uint64_t size;
+ uint64_t blksize;
+ uint64_t blocks;
+} cs_size_xattr_t;
+
+typedef struct cs_local {
+ loc_t loc;
+ fd_t *fd;
+ call_stub_t *stub;
+ call_frame_t *main_frame;
+ int op_errno;
+ int op_ret;
+ fd_t *dlfd;
+ off_t dloffset;
+ struct iatt stbuf;
+ dict_t *xattr_rsp;
+ dict_t *xattr_req;
+ glusterfs_fop_t fop;
+ gf_boolean_t locked;
+ int call_cnt;
+ inode_t *inode;
+ char *remotepath;
+
+ struct {
+ /* offset, flags and size are the information needed
+ * by read fop for remote read operation. These will be
+ * populated in cloudsync read fop, before being passed
+ * on to the plugin performing remote read.
+ */
+ off_t offset;
+ uint32_t flags;
+ size_t size;
+ cs_loc_xattr_t *lxattr;
+ } xattrinfo;
+
+} cs_local_t;
+
+typedef int (*fop_download_t)(call_frame_t *frame, void *config);
+
+typedef int (*fop_remote_read_t)(call_frame_t *, void *);
+
+typedef void *(*store_init)(xlator_t *this);
+
+typedef int (*store_reconfigure)(xlator_t *this, dict_t *options);
+
+typedef void (*store_fini)(void *config);
+
+struct cs_remote_stores {
+ char *name; /* store name */
+ void *config; /* store related information */
+ fop_download_t dlfop; /* store specific download function */
+ fop_remote_read_t rdfop; /* store specific read function */
+ store_init init; /* store init to initialize store config */
+ store_reconfigure reconfigure; /* reconfigure store config */
+ store_fini fini;
+ void *handle; /* shared library handle*/
+};
+
+typedef struct cs_private {
+ xlator_t *this;
+ struct cs_remote_stores *stores;
+ gf_boolean_t abortdl;
+ pthread_spinlock_t lock;
+ gf_boolean_t remote_read;
+} cs_private_t;
+
+void
+cs_local_wipe(xlator_t *this, cs_local_t *local);
+
+void
+cs_xattrinfo_wipe(cs_local_t *local);
+
+#define CS_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ cs_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ cs_local_wipe(__xl, __local); \
+ } while (0)
+
+#define CS_STACK_DESTROY(frame) \
+ do { \
+ cs_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY(frame->root); \
+ cs_local_wipe(__xl, __local); \
+ } while (0)
+
+typedef struct store_methods {
+ int (*fop_download)(call_frame_t *frame, void *config);
+ int (*fop_remote_read)(call_frame_t *, void *);
+ /* return type should be the store config */
+ void *(*fop_init)(xlator_t *this);
+ int (*fop_reconfigure)(xlator_t *this, dict_t *options);
+ void (*fop_fini)(void *config);
+} store_methods_t;
+
+#endif /* _CLOUDSYNC_COMMON_H */
diff --git a/xlators/features/cloudsync/src/cloudsync-fops-c.py b/xlators/features/cloudsync/src/cloudsync-fops-c.py
new file mode 100755
index 00000000000..c27df97ae58
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-fops-c.py
@@ -0,0 +1,324 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+import os
+import sys
+
+curdir = os.path.dirname(sys.argv[0])
+gendir = os.path.join(curdir, '../../../../libglusterfs/src')
+sys.path.append(gendir)
+from generator import ops, fop_subs, cbk_subs, generate
+
+FD_DATA_MODIFYING_OP_FOP_TEMPLATE = """
+int32_t
+cs_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ int op_errno = EINVAL ;
+ cs_local_t *local = NULL;
+ int ret = 0;
+ cs_inode_ctx_t *ctx = NULL;
+ gf_cs_obj_state state = -1;
+
+ VALIDATE_OR_GOTO (frame, err);
+ VALIDATE_OR_GOTO (this, err);
+ VALIDATE_OR_GOTO (fd, err);
+
+ local = cs_local_init (this, frame, NULL, fd, GF_FOP_@UPNAME@);
+ if (!local) {
+
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "local init failed");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ __cs_inode_ctx_get (this, fd->inode, &ctx);
+
+ if (ctx)
+ state = __cs_get_file_state (fd->inode, ctx);
+ else
+ state = GF_CS_LOCAL;
+
+ xdata = xdata ? dict_ref (xdata) : dict_new ();
+
+ if (!xdata) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->xattr_req = xdata;
+
+ ret = dict_set_uint32 (local->xattr_req, GF_CS_OBJECT_STATUS, 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "dict_set failed key:"
+ " %s", GF_CS_OBJECT_STATUS);
+ goto err;
+ }
+
+ local->stub = fop_@NAME@_stub (frame, cs_resume_@NAME@,
+ @SHORT_ARGS@);
+ if (!local->stub) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+
+ if (state == GF_CS_LOCAL) {
+ STACK_WIND (frame, cs_@NAME@_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ } else {
+ local->call_cnt++;
+ ret = locate_and_execute (frame);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ CS_STACK_UNWIND (@NAME@, frame, -1, op_errno, @CBK_ERROR_ARGS@);
+
+ return 0;
+}
+"""
+
+FD_DATA_MODIFYING_RESUME_OP_FOP_TEMPLATE = """
+int32_t
+cs_resume_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ int ret = 0;
+
+ ret = cs_resume_postprocess (this, frame, fd->inode);
+ if (ret) {
+ goto unwind;
+ }
+
+ cs_inodelk_unlock (frame);
+
+ STACK_WIND (frame, cs_@NAME@_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+
+ return 0;
+
+unwind:
+
+ cs_inodelk_unlock (frame);
+
+ cs_common_cbk (frame);
+
+ return 0;
+}
+"""
+FD_DATA_MODIFYING_OP_FOP_CBK_TEMPLATE = """
+int32_t
+cs_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ @LONG_ARGS@)
+{
+ cs_local_t *local = NULL;
+ int ret = 0;
+ uint64_t val = 0;
+ fd_t *fd = NULL;
+
+ local = frame->local;
+ fd = local->fd;
+
+ /* Do we need lock here? */
+ local->call_cnt++;
+
+ if (op_ret == -1) {
+ ret = dict_get_uint64 (xdata, GF_CS_OBJECT_STATUS, &val);
+ if (ret == 0) {
+ if (val == GF_CS_ERROR) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0,
+ "could not get file state, unwinding");
+ op_ret = -1;
+ op_errno = EIO;
+ goto unwind;
+ } else {
+ __cs_inode_ctx_update (this, fd->inode, val);
+ gf_msg (this->name, GF_LOG_INFO, 0, 0,
+ " state = %" PRIu64, val);
+
+ if (local->call_cnt == 1 &&
+ (val == GF_CS_REMOTE ||
+ val == GF_CS_DOWNLOADING)) {
+ gf_msg (this->name, GF_LOG_INFO, 0,
+ 0, " will repair and download "
+ "the file, current state : %"
+ PRIu64, val);
+ goto repair;
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0,
+ "second @NAME@, Unwinding");
+ goto unwind;
+ }
+ }
+ } else {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "file state "
+ "could not be figured, unwinding");
+ goto unwind;
+ }
+ } else {
+ /* successful @NAME@ => file is local */
+ __cs_inode_ctx_update (this, fd->inode, GF_CS_LOCAL);
+ gf_msg (this->name, GF_LOG_INFO, 0, 0, "state : GF_CS_LOCAL"
+ ", @NAME@ successful");
+
+ goto unwind;
+ }
+
+repair:
+ ret = locate_and_execute (frame);
+ if (ret) {
+ goto unwind;
+ }
+
+ return 0;
+
+unwind:
+ CS_STACK_UNWIND (@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@);
+
+ return 0;
+}
+"""
+
+LOC_STAT_OP_FOP_TEMPLATE = """
+int32_t
+cs_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ int op_errno = EINVAL;
+ cs_local_t *local = NULL;
+ int ret = 0;
+
+ local = cs_local_init (this, frame, loc, NULL, GF_FOP_@UPNAME@);
+ if (!local) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "local is NULL");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ if (loc->inode->ia_type == IA_IFDIR)
+ goto wind;
+
+ xdata = xdata ? dict_ref (xdata) : dict_new ();
+
+ if (!xdata) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->xattr_req = xdata;
+
+ ret = dict_set_uint32 (local->xattr_req, GF_CS_OBJECT_STATUS, 1);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0, "dict_set failed key:"
+ " %s", GF_CS_OBJECT_STATUS);
+ goto err;
+ }
+
+wind:
+ STACK_WIND (frame, cs_@NAME@_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+
+ return 0;
+err:
+ CS_STACK_UNWIND (@NAME@, frame, -1, op_errno, @CBK_ERROR_ARGS@);
+
+ return 0;
+}
+"""
+
+LOC_STAT_OP_FOP_CBK_TEMPLATE = """
+int32_t
+cs_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ @LONG_ARGS@)
+{
+ int ret = 0;
+ uint64_t val = 0;
+ loc_t *loc = NULL;
+ cs_local_t *local = NULL;
+
+ local = frame->local;
+
+ loc = &local->loc;
+
+ if (op_ret == 0) {
+ ret = dict_get_uint64 (xdata, GF_CS_OBJECT_STATUS, &val);
+ if (!ret) {
+ ret = __cs_inode_ctx_update (this, loc->inode, val);
+ if (ret) {
+ gf_msg (this->name, GF_LOG_ERROR, 0, 0,
+ "ctx update failed");
+ }
+ }
+ } else {
+ cs_inode_ctx_reset (this, loc->inode);
+ }
+
+ CS_STACK_UNWIND (@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@);
+
+ return 0;
+}
+"""
+
+# All xlator FOPs are covered in the following section just to create a clarity
+# The lists themselves are not used.
+entry_ops = ['mknod', 'mkdir', 'unlink', 'rmdir', 'symlink', 'rename', 'link',
+ 'create']
+special_ops = ['statfs', 'lookup', 'ipc', 'compound', 'icreate', 'namelink']
+ignored_ops = ['getspec']
+inode_ops = ['stat', 'readlink', 'truncate', 'open', 'setxattr', 'getxattr',
+ 'removexattr', 'opendir', 'access', 'inodelk', 'entrylk',
+ 'xattrop', 'setattr', 'lease', 'getactivelk', 'setactivelk',
+ 'discover']
+fd_ops = ['readv', 'writev', 'flush', 'fsync', 'fsyncdir', 'ftruncate',
+ 'fstat', 'lk', 'readdir', 'finodelk', 'fentrylk', 'fxattrop',
+ 'fsetxattr', 'fgetxattr', 'rchecksum', 'fsetattr', 'readdirp',
+ 'fremovexattr', 'fallocate', 'discard', 'zerofill', 'seek']
+
+
+# These are the current actual lists used to generate the code
+
+# The following list contains fops which are fd based that modifies data
+fd_data_modify_op_fop_template = ['writev', 'flush', 'fsync',
+ 'ftruncate', 'rchecksum', 'fallocate',
+ 'discard', 'zerofill', 'seek']
+
+# The following list contains fops which are entry based that does not change
+# data
+loc_stat_op_fop_template = ['lookup', 'stat', 'discover', 'access', 'setattr',
+ 'getattr']
+
+# These fops need a separate implementation
+special_fops = ['statfs', 'setxattr', 'unlink', 'getxattr',
+ 'truncate', 'fstat', 'readv', 'readdirp']
+
+def gen_defaults():
+ for name in ops:
+ if name in fd_data_modify_op_fop_template:
+ print(generate(FD_DATA_MODIFYING_OP_FOP_CBK_TEMPLATE, name, cbk_subs))
+ print(generate(FD_DATA_MODIFYING_RESUME_OP_FOP_TEMPLATE, name, fop_subs))
+ print(generate(FD_DATA_MODIFYING_OP_FOP_TEMPLATE, name, fop_subs))
+ elif name in loc_stat_op_fop_template:
+ print(generate(LOC_STAT_OP_FOP_CBK_TEMPLATE, name, cbk_subs))
+ print(generate(LOC_STAT_OP_FOP_TEMPLATE, name, fop_subs))
+
+for l in open(sys.argv[1], 'r').readlines():
+ if l.find('#pragma generate') != -1:
+ print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
+ gen_defaults()
+ print("/* END GENERATED CODE */")
+ else:
+ print(l[:-1])
diff --git a/xlators/features/cloudsync/src/cloudsync-fops-h.py b/xlators/features/cloudsync/src/cloudsync-fops-h.py
new file mode 100755
index 00000000000..faa2de651a7
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-fops-h.py
@@ -0,0 +1,31 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+import os
+import sys
+
+curdir = os.path.dirname(sys.argv[0])
+gendir = os.path.join(curdir, '../../../../libglusterfs/src')
+sys.path.append(gendir)
+from generator import ops, fop_subs, cbk_subs, generate
+
+OP_FOP_TEMPLATE = """
+int32_t
+cs_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@);
+"""
+
+def gen_defaults():
+ for name, value in ops.items():
+ if name == 'getspec':
+ continue
+ print(generate(OP_FOP_TEMPLATE, name, fop_subs))
+
+
+for l in open(sys.argv[1], 'r').readlines():
+ if l.find('#pragma generate') != -1:
+ print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
+ gen_defaults()
+ print("/* END GENERATED CODE */")
+ else:
+ print(l[:-1])
diff --git a/xlators/features/cloudsync/src/cloudsync-mem-types.h b/xlators/features/cloudsync/src/cloudsync-mem-types.h
new file mode 100644
index 00000000000..220346405d0
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-mem-types.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __CLOUDSYNC_MEM_TYPES_H__
+#define __CLOUDSYNC_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+enum cs_mem_types_ {
+ gf_cs_mt_cs_private_t = gf_common_mt_end + 1,
+ gf_cs_mt_cs_remote_stores_t,
+ gf_cs_mt_cs_inode_ctx_t,
+ gf_cs_mt_cs_lxattr_t,
+ gf_cs_mt_end
+};
+#endif /* __CLOUDSYNC_MEM_TYPES_H__ */
diff --git a/xlators/features/cloudsync/src/cloudsync-messages.h b/xlators/features/cloudsync/src/cloudsync-messages.h
new file mode 100644
index 00000000000..fb08f72de7f
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-messages.h
@@ -0,0 +1,16 @@
+/*
+ * Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __CLOUDSYNC_MESSAGES_H__
+#define __CLOUDSYNC_MESSAGES_H__
+
+/*TODO: define relevant message ids */
+
+#endif /* __CLOUDSYNC_MESSAGES_H__ */
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am
new file mode 100644
index 00000000000..fb6b0580c6d
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/Makefile.am
@@ -0,0 +1,11 @@
+if BUILD_AMAZONS3_PLUGIN
+ AMAZONS3_DIR = cloudsyncs3
+endif
+
+if BUILD_CVLT_PLUGIN
+ CVLT_DIR = cvlt
+endif
+
+SUBDIRS = ${AMAZONS3_DIR} ${CVLT_DIR}
+
+CLEANFILES =
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile.am
new file mode 100644
index 00000000000..6509426ef87
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/Makefile.am
@@ -0,0 +1,12 @@
+csp_LTLIBRARIES = cloudsyncs3.la
+cspdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins
+
+cloudsyncs3_la_SOURCES = libcloudsyncs3.c $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-common.c
+cloudsyncs3_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+cloudsyncs3_la_LDFLAGS = -module -export-symbols $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.sym $(GF_XLATOR_LDFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src -lcurlpp -lcryptopp
+noinst_HEADERS = libcloudsyncs3.h libcloudsyncs3-mem-types.h
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) -lcurl -lcrypto -I$(top_srcdir)/xlators/features/cloudsync/src
+CLEANFILES =
+
+EXTRA_DIST = libcloudsyncs3.sym
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h
new file mode 100644
index 00000000000..7ccfcc9f4b6
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3-mem-types.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __LIBAWS_MEM_TYPES_H__
+#define __LIBAWS_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+enum libaws_mem_types_ {
+ gf_libaws_mt_aws_private_t = gf_common_mt_end + 1,
+ gf_libaws_mt_end
+};
+#endif /* __CLOUDSYNC_MEM_TYPES_H__ */
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c
new file mode 100644
index 00000000000..23c3599825a
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.c
@@ -0,0 +1,584 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdlib.h>
+#include <openssl/hmac.h>
+#include <openssl/evp.h>
+#include <openssl/bio.h>
+#include <openssl/buffer.h>
+#include <openssl/crypto.h>
+#include <curl/curl.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include "libcloudsyncs3.h"
+#include "cloudsync-common.h"
+
+#define RESOURCE_SIZE 4096
+
+store_methods_t store_ops = {
+ .fop_download = aws_download_s3,
+ .fop_init = aws_init,
+ .fop_reconfigure = aws_reconfigure,
+ .fop_fini = aws_fini,
+};
+
+typedef struct aws_private {
+ char *hostname;
+ char *bucketid;
+ char *awssekey;
+ char *awskeyid;
+ gf_boolean_t abortdl;
+ pthread_spinlock_t lock;
+} aws_private_t;
+
+void *
+aws_init(xlator_t *this)
+{
+ aws_private_t *priv = NULL;
+ char *temp_str = NULL;
+ int ret = 0;
+
+ priv = GF_CALLOC(1, sizeof(aws_private_t), gf_libaws_mt_aws_private_t);
+ if (!priv) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
+ return NULL;
+ }
+
+ priv->abortdl = _gf_false;
+
+ pthread_spin_init(&priv->lock, PTHREAD_PROCESS_PRIVATE);
+
+ pthread_spin_lock(&(priv->lock));
+ {
+ if (dict_get_str(this->options, "s3plugin-seckey", &temp_str) == 0) {
+ priv->awssekey = gf_strdup(temp_str);
+ if (!priv->awssekey) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "initializing aws secret key failed");
+ ret = -1;
+ goto unlock;
+ }
+ }
+
+ if (dict_get_str(this->options, "s3plugin-keyid", &temp_str) == 0) {
+ priv->awskeyid = gf_strdup(temp_str);
+ if (!priv->awskeyid) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "initializing aws key ID failed");
+ ret = -1;
+ goto unlock;
+ }
+ }
+
+ if (dict_get_str(this->options, "s3plugin-bucketid", &temp_str) == 0) {
+ priv->bucketid = gf_strdup(temp_str);
+ if (!priv->bucketid) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "initializing aws bucketid failed");
+
+ ret = -1;
+ goto unlock;
+ }
+ }
+
+ if (dict_get_str(this->options, "s3plugin-hostname", &temp_str) == 0) {
+ priv->hostname = gf_strdup(temp_str);
+ if (!priv->hostname) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "initializing aws hostname failed");
+
+ ret = -1;
+ goto unlock;
+ }
+ }
+
+ gf_msg_debug(this->name, 0,
+ "stored key: %s id: %s "
+ "bucketid %s hostname: %s",
+ priv->awssekey, priv->awskeyid, priv->bucketid,
+ priv->hostname);
+ }
+unlock:
+ pthread_spin_unlock(&(priv->lock));
+
+ if (ret == -1) {
+ GF_FREE(priv->awskeyid);
+ GF_FREE(priv->awssekey);
+ GF_FREE(priv->bucketid);
+ GF_FREE(priv->hostname);
+ GF_FREE(priv);
+ priv = NULL;
+ }
+
+ return (void *)priv;
+}
+
+int
+aws_reconfigure(xlator_t *this, dict_t *options)
+{
+ aws_private_t *priv = NULL;
+ char *temp_str = NULL;
+ int ret = 0;
+ cs_private_t *cspriv = NULL;
+
+ cspriv = this->private;
+
+ priv = cspriv->stores->config;
+
+ if (!priv) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "null priv");
+ return -1;
+ }
+
+ pthread_spin_lock(&(priv->lock));
+ {
+ if (dict_get_str(options, "s3plugin-seckey", &temp_str) == 0) {
+ priv->awssekey = gf_strdup(temp_str);
+ if (!priv->awssekey) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "initializing aws secret key failed");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (dict_get_str(options, "s3plugin-keyid", &temp_str) == 0) {
+ priv->awskeyid = gf_strdup(temp_str);
+ if (!priv->awskeyid) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "initializing aws key ID failed");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (dict_get_str(options, "s3plugin-bucketid", &temp_str) == 0) {
+ priv->bucketid = gf_strdup(temp_str);
+ if (!priv->bucketid) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "initializing aws bucketid failed");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (dict_get_str(options, "s3plugin-hostname", &temp_str) == 0) {
+ priv->hostname = gf_strdup(temp_str);
+ if (!priv->hostname) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "initializing aws hostname failed");
+ ret = -1;
+ goto out;
+ }
+ }
+ }
+out:
+ pthread_spin_unlock(&(priv->lock));
+
+ gf_msg_debug(this->name, 0,
+ "stored key: %s id: %s "
+ "bucketid %s hostname: %s",
+ priv->awssekey, priv->awskeyid, priv->bucketid,
+ priv->hostname);
+
+ return ret;
+}
+
+void
+aws_fini(void *config)
+{
+ aws_private_t *priv = NULL;
+
+ priv = (aws_private_t *)priv;
+
+ if (priv) {
+ GF_FREE(priv->hostname);
+ GF_FREE(priv->bucketid);
+ GF_FREE(priv->awssekey);
+ GF_FREE(priv->awskeyid);
+
+ pthread_spin_destroy(&priv->lock);
+ GF_FREE(priv);
+ }
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("dht", this, out);
+
+ ret = xlator_mem_acct_init(this, gf_libaws_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "Memory accounting init failed");
+ return ret;
+ }
+out:
+ return ret;
+}
+char *
+aws_form_request(char *resource, char **date, char *reqtype, char *bucketid,
+ char *filepath)
+{
+ char httpdate[256];
+ time_t ctime;
+ struct tm *gtime = NULL;
+ char *sign_req = NULL;
+ int signreq_len = -1;
+ int date_len = -1;
+ int res_len = -1;
+
+ ctime = gf_time();
+ gtime = gmtime(&ctime);
+
+ date_len = strftime(httpdate, sizeof(httpdate),
+ "%a, %d %b %Y %H:%M:%S +0000", gtime);
+
+ *date = gf_strndup(httpdate, date_len);
+ if (*date == NULL) {
+ gf_msg("CS", GF_LOG_ERROR, ENOMEM, 0,
+ "memory allocation "
+ "failure for date");
+ goto out;
+ }
+
+ res_len = snprintf(resource, RESOURCE_SIZE, "%s/%s", bucketid, filepath);
+
+ gf_msg_debug("CS", 0, "resource %s", resource);
+
+ /* 6 accounts for the 4 new line chars, one forward slash and
+ * one null char */
+ signreq_len = res_len + date_len + strlen(reqtype) + 6;
+
+ sign_req = GF_MALLOC(signreq_len, gf_common_mt_char);
+ if (sign_req == NULL) {
+ gf_msg("CS", GF_LOG_ERROR, ENOMEM, 0,
+ "memory allocation "
+ "failure for sign_req");
+ goto out;
+ }
+
+ snprintf(sign_req, signreq_len, "%s\n\n%s\n%s\n/%s", reqtype, "", *date,
+ resource);
+
+out:
+ return sign_req;
+}
+
+char *
+aws_b64_encode(const unsigned char *input, int length)
+{
+ BIO *bio, *b64;
+ BUF_MEM *bptr;
+ char *buff = NULL;
+
+ b64 = BIO_new(BIO_f_base64());
+ bio = BIO_new(BIO_s_mem());
+ b64 = BIO_push(b64, bio);
+ BIO_write(b64, input, length);
+ BIO_flush(b64);
+ BIO_get_mem_ptr(b64, &bptr);
+
+ buff = GF_MALLOC(bptr->length, gf_common_mt_char);
+ memcpy(buff, bptr->data, bptr->length - 1);
+ buff[bptr->length - 1] = 0;
+
+ BIO_free_all(b64);
+
+ return buff;
+}
+
+char *
+aws_sign_request(char *const str, char *awssekey)
+{
+#if (OPENSSL_VERSION_NUMBER < 0x1010002f)
+ HMAC_CTX ctx;
+#endif
+ HMAC_CTX *pctx = NULL;
+ ;
+
+ unsigned char md[256];
+ unsigned len;
+ char *base64 = NULL;
+
+#if (OPENSSL_VERSION_NUMBER < 0x1010002f)
+ HMAC_CTX_init(&ctx);
+ pctx = &ctx;
+#else
+ pctx = HMAC_CTX_new();
+#endif
+ HMAC_Init_ex(pctx, awssekey, strlen(awssekey), EVP_sha1(), NULL);
+ HMAC_Update(pctx, (unsigned char *)str, strlen(str));
+ HMAC_Final(pctx, (unsigned char *)md, &len);
+
+#if (OPENSSL_VERSION_NUMBER < 0x1010002f)
+ HMAC_CTX_cleanup(pctx);
+#else
+ HMAC_CTX_free(pctx);
+#endif
+ base64 = aws_b64_encode(md, len);
+
+ return base64;
+}
+
+int
+aws_dlwritev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ aws_private_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, op_errno,
+ "write failed "
+ ". Aborting Download");
+
+ priv = this->private;
+ pthread_spin_lock(&(priv->lock));
+ {
+ priv->abortdl = _gf_true;
+ }
+ pthread_spin_unlock(&(priv->lock));
+ }
+
+ CS_STACK_DESTROY(frame);
+
+ return op_ret;
+}
+
+size_t
+aws_write_callback(void *dlbuf, size_t size, size_t nitems, void *mainframe)
+{
+ call_frame_t *frame = NULL;
+ fd_t *dlfd = NULL;
+ int ret = 0;
+ cs_local_t *local = NULL;
+ struct iovec iov = {
+ 0,
+ };
+ struct iobref *iobref = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iovec dliov = {
+ 0,
+ };
+ size_t tsize = 0;
+ xlator_t *this = NULL;
+ cs_private_t *xl_priv = NULL;
+ aws_private_t *priv = NULL;
+ call_frame_t *dlframe = NULL;
+
+ frame = (call_frame_t *)mainframe;
+ this = frame->this;
+ xl_priv = this->private;
+ priv = xl_priv->stores->config;
+
+ pthread_spin_lock(&(priv->lock));
+ {
+ /* returning size other than the size passed from curl will
+ * abort further download*/
+ if (priv->abortdl) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "aborting download");
+ pthread_spin_unlock(&(priv->lock));
+ return 0;
+ }
+ }
+ pthread_spin_unlock(&(priv->lock));
+
+ local = frame->local;
+ dlfd = local->dlfd;
+ tsize = size * nitems;
+
+ dliov.iov_base = (void *)dlbuf;
+ dliov.iov_len = tsize;
+
+ ret = iobuf_copy(this->ctx->iobuf_pool, &dliov, 1, &iobref, &iobuf, &iov);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "iobuf_copy failed");
+ goto out;
+ }
+
+ /* copy frame */
+ dlframe = copy_frame(frame);
+ if (!dlframe) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "copy_frame failed");
+ tsize = 0;
+ goto out;
+ }
+
+ STACK_WIND(dlframe, aws_dlwritev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, dlfd, &iov, 1, local->dloffset,
+ 0, iobref, NULL);
+
+ local->dloffset += tsize;
+
+out:
+ if (iobuf)
+ iobuf_unref(iobuf);
+ if (iobref)
+ iobref_unref(iobref);
+
+ return tsize;
+}
+
+int
+aws_download_s3(call_frame_t *frame, void *config)
+{
+ char *buf;
+ int bufsize = -1;
+ CURL *handle = NULL;
+ struct curl_slist *slist = NULL;
+ struct curl_slist *tmp = NULL;
+ xlator_t *this = NULL;
+ int ret = 0;
+ int debug = 1;
+ CURLcode res;
+ char errbuf[CURL_ERROR_SIZE];
+ size_t len = 0;
+ long responsecode;
+ char *sign_req = NULL;
+ char *date = NULL;
+ char *const reqtype = "GET";
+ char *signature = NULL;
+ cs_local_t *local = NULL;
+ char resource[RESOURCE_SIZE] = {
+ 0,
+ };
+ aws_private_t *priv = NULL;
+
+ this = frame->this;
+
+ local = frame->local;
+
+ priv = (aws_private_t *)config;
+
+ if (!priv->bucketid || !priv->hostname || !priv->awssekey ||
+ !priv->awskeyid) {
+ ret = -1;
+ goto out;
+ }
+
+ sign_req = aws_form_request(resource, &date, reqtype, priv->bucketid,
+ local->remotepath);
+ if (!sign_req) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "null sign_req, "
+ "aborting download");
+ ret = -1;
+ goto out;
+ }
+
+ gf_msg_debug("CS", 0, "sign_req %s date %s", sign_req, date);
+
+ signature = aws_sign_request(sign_req, priv->awssekey);
+ if (!signature) {
+ gf_msg("CS", GF_LOG_ERROR, 0, 0,
+ "null signature, "
+ "aborting download");
+ ret = -1;
+ goto out;
+ }
+
+ handle = curl_easy_init();
+ this = frame->this;
+
+ /* special numbers 6, 20, 10 accounts for static characters in the
+ * below snprintf string format arguments*/
+ bufsize = strlen(date) + 6 + strlen(priv->awskeyid) + strlen(signature) +
+ 20 + strlen(priv->hostname) + 10;
+
+ buf = (char *)alloca(bufsize);
+ if (!buf) {
+ gf_msg("CS", GF_LOG_ERROR, ENOMEM, 0,
+ "mem allocation "
+ "failed for buf");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf(buf, bufsize, "Date: %s", date);
+ slist = curl_slist_append(slist, buf);
+ snprintf(buf, bufsize, "Authorization: AWS %s:%s", priv->awskeyid,
+ signature);
+ slist = curl_slist_append(slist, buf);
+ snprintf(buf, bufsize, "https://%s/%s", priv->hostname, resource);
+
+ if (gf_log_get_loglevel() >= GF_LOG_DEBUG) {
+ tmp = slist;
+ while (tmp) {
+ gf_msg_debug(this->name, 0, "slist for curl - %s", tmp->data);
+ tmp = tmp->next;
+ }
+ }
+
+ curl_easy_setopt(handle, CURLOPT_HTTPHEADER, slist);
+ curl_easy_setopt(handle, CURLOPT_URL, buf);
+ curl_easy_setopt(handle, CURLOPT_WRITEFUNCTION, aws_write_callback);
+ curl_easy_setopt(handle, CURLOPT_WRITEDATA, frame);
+ curl_easy_setopt(handle, CURLOPT_VERBOSE, debug);
+ curl_easy_setopt(handle, CURLOPT_ERRORBUFFER, errbuf);
+
+ res = curl_easy_perform(handle);
+ if (res != CURLE_OK) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "download failed. err: %s\n",
+ curl_easy_strerror(res));
+ ret = -1;
+ len = strlen(errbuf);
+ if (len) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "curl failure %s", errbuf);
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "curl error "
+ "%s\n",
+ curl_easy_strerror(res));
+ }
+ }
+
+ if (res == CURLE_OK) {
+ curl_easy_getinfo(handle, CURLINFO_RESPONSE_CODE, &responsecode);
+ gf_msg_debug(this->name, 0, "response code %ld", responsecode);
+ if (responsecode != 200) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "curl download failed");
+ }
+ }
+
+ curl_slist_free_all(slist);
+ curl_easy_cleanup(handle);
+
+out:
+ if (sign_req)
+ GF_FREE(sign_req);
+ if (date)
+ GF_FREE(date);
+ if (signature)
+ GF_FREE(signature);
+
+ return ret;
+}
+
+struct volume_options cs_options[] = {
+ {.key = {"s3plugin-seckey"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "aws secret key"},
+ {.key = {"s3plugin-keyid"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "aws key ID"
+
+ },
+ {.key = {"s3plugin-bucketid"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "aws bucketid"},
+ {.key = {"s3plugin-hostname"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "aws hostname e.g. s3.amazonaws.com"},
+ {.key = {NULL}},
+};
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h
new file mode 100644
index 00000000000..85ae669486b
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.h
@@ -0,0 +1,50 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _LIBAWS_H
+#define _LIBAWS_H
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/syncop.h>
+#include <curl/curl.h>
+#include "cloudsync-common.h"
+#include "libcloudsyncs3-mem-types.h"
+
+char *
+aws_b64_encode(const unsigned char *input, int length);
+
+size_t
+aws_write_callback(void *dlbuf, size_t size, size_t nitems, void *mainframe);
+
+int
+aws_download_s3(call_frame_t *frame, void *config);
+
+int
+aws_dlwritev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata);
+
+void *
+aws_init(xlator_t *this);
+
+int
+aws_reconfigure(xlator_t *this, dict_t *options);
+
+char *
+aws_form_request(char *resource, char **date, char *reqtype, char *bucketid,
+ char *filepath);
+char *
+aws_sign_request(char *const str, char *awssekey);
+
+void
+aws_fini(void *config);
+
+#endif
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.sym b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.sym
new file mode 100644
index 00000000000..0bc273670d5
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cloudsyncs3/src/libcloudsyncs3.sym
@@ -0,0 +1 @@
+store_ops
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am
new file mode 100644
index 00000000000..b512464f157
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/Makefile.am
@@ -0,0 +1,12 @@
+csp_LTLIBRARIES = cloudsynccvlt.la
+cspdir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/cloudsync-plugins
+
+cloudsynccvlt_la_SOURCES = libcvlt.c $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-common.c
+cloudsynccvlt_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+cloudsynccvlt_la_LDFLAGS = -module -avoid-version -export-symbols $(top_srcdir)/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+noinst_HEADERS = archivestore.h libcvlt.h libcvlt-mem-types.h cvlt-messages.h
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS) -I$(top_srcdir)/xlators/features/cloudsync/src
+CLEANFILES =
+
+EXTRA_DIST = libcloudsynccvlt.sym
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h
new file mode 100644
index 00000000000..7230ef77337
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/archivestore.h
@@ -0,0 +1,203 @@
+/*
+ Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __ARCHIVESTORE_H__
+#define __ARCHIVESTORE_H__
+
+#include <stdlib.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <dlfcn.h>
+#include <uuid/uuid.h>
+
+#define CS_XATTR_ARCHIVE_UUID "trusted.cloudsync.uuid"
+#define CS_XATTR_PRODUCT_ID "trusted.cloudsync.product-id"
+#define CS_XATTR_STORE_ID "trusted.cloudsync.store-id"
+
+struct _archstore_methods;
+typedef struct _archstore_methods archstore_methods_t;
+
+struct _archstore_desc {
+ void *priv; /* Private field for store mgmt. */
+ /* To be used only by archive store*/
+};
+typedef struct _archstore_desc archstore_desc_t;
+
+struct _archstore_info {
+ char *id; /* Identifier for the archivestore */
+ uint32_t idlen; /* Length of identifier string */
+ char *prod; /* Name of the data mgmt. product */
+ uint32_t prodlen; /* Length of the product string */
+};
+typedef struct _archstore_info archstore_info_t;
+
+struct _archstore_fileinfo {
+ uuid_t uuid; /* uuid of the file */
+ char *path; /* file path */
+ uint32_t pathlength; /* length of file path */
+};
+typedef struct _archstore_fileinfo archstore_fileinfo_t;
+
+struct _app_callback_info {
+ archstore_info_t *src_archstore;
+ archstore_fileinfo_t *src_archfile;
+ archstore_info_t *dest_archstore;
+ archstore_fileinfo_t *dest_archfile;
+};
+typedef struct _app_callback_info app_callback_info_t;
+
+typedef void (*app_callback_t)(archstore_desc_t *, app_callback_info_t *,
+ void *, int64_t, int32_t);
+
+enum _archstore_scan_type { FULL = 1, INCREMENTAL = 2 };
+typedef enum _archstore_scan_type archstore_scan_type_t;
+
+typedef int32_t archstore_errno_t;
+
+/*
+ * Initialize archive store.
+ * arg1 pointer to structure containing archive store information
+ * arg2 error number if any generated during the initialization
+ * arg3 name of the log file
+ */
+typedef int32_t (*init_archstore_t)(archstore_desc_t *, archstore_errno_t *,
+ const char *);
+
+/*
+ * Clean up archive store.
+ * arg1 pointer to structure containing archive store information
+ * arg2 error number if any generated during the cleanup
+ */
+typedef int32_t (*term_archstore_t)(archstore_desc_t *, archstore_errno_t *);
+
+/*
+ * Read the contents of the file from archive store
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing archive store information
+ * arg3 pointer to structure containing information about file to be read
+ * arg4 offset in the file from which data should be read
+ * arg5 buffer where the data should be read
+ * arg6 number of bytes of data to be read
+ * arg7 error number if any generated during the read from file
+ * arg8 callback handler to be invoked after the data is read
+ * arg9 cookie to be passed when callback is invoked
+ */
+typedef int32_t (*read_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_fileinfo_t *, off_t, char *,
+ size_t, archstore_errno_t *, app_callback_t,
+ void *);
+
+/*
+ * Restore the contents of the file from archive store
+ * This is basically in-place restore
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing archive store information
+ * arg3 pointer to structure containing information about file to be restored
+ * arg4 error number if any generated during the file restore
+ * arg5 callback to be invoked after the file is restored
+ * arg6 cookie to be passed when callback is invoked
+ */
+typedef int32_t (*recall_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_errno_t *, app_callback_t,
+ void *);
+
+/*
+ * Restore the contents of the file from archive store to a different store
+ * This is basically out-of-place restore
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing source archive store information
+ * arg3 pointer to structure containing information about file to be restored
+ * arg4 pointer to structure containing destination archive store information
+ * arg5 pointer to structure containing information about the location to
+ which the file will be restored
+ * arg6 error number if any generated during the file restore
+ * arg7 callback to be invoked after the file is restored
+ * arg8 cookie to be passed when callback is invoked
+ */
+typedef int32_t (*restore_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_errno_t *, app_callback_t,
+ void *);
+
+/*
+ * Archive the contents of the file to archive store
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing source archive store information
+ * arg3 pointer to structure containing information about files to be archived
+ * arg4 pointer to structure containing destination archive store information
+ * arg5 pointer to structure containing information about files that failed
+ * to be archived
+ * arg6 error number if any generated during the file archival
+ * arg7 callback to be invoked after the file is archived
+ * arg8 cookie to be passed when callback is invoked
+ */
+typedef int32_t (*archive_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_errno_t *, app_callback_t,
+ void *);
+
+/*
+ * Backup list of files provided in the input file
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing source archive store information
+ * arg3 pointer to structure containing information about files to be backed up
+ * arg4 pointer to structure containing destination archive store information
+ * arg5 pointer to structure containing information about files that failed
+ * to be backed up
+ * arg6 error number if any generated during the file archival
+ * arg7 callback to be invoked after the file is archived
+ * arg8 cookie to be passed when callback is invoked
+ */
+typedef int32_t (*backup_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_info_t *,
+ archstore_fileinfo_t *,
+ archstore_errno_t *, app_callback_t,
+ void *);
+
+/*
+ * Scan the contents of a store and determine the files which need to be
+ * backed up.
+ * arg1 pointer to structure containing archive store description
+ * arg2 pointer to structure containing archive store information
+ * arg3 type of scan whether full or incremental
+ * arg4 path to file that contains list of files to be backed up
+ * arg5 error number if any generated during scan operation
+ */
+typedef int32_t (*scan_archstore_t)(archstore_desc_t *, archstore_info_t *,
+ archstore_scan_type_t, char *,
+ archstore_errno_t *);
+
+struct _archstore_methods {
+ init_archstore_t init;
+ term_archstore_t fini;
+ backup_archstore_t backup;
+ archive_archstore_t archive;
+ scan_archstore_t scan;
+ restore_archstore_t restore;
+ recall_archstore_t recall;
+ read_archstore_t read;
+};
+
+typedef int (*get_archstore_methods_t)(archstore_methods_t *);
+
+/*
+ * Single function that will be invoked by applications for extracting
+ * the function pointers to all data management functions.
+ */
+int32_t
+get_archstore_methods(archstore_methods_t *);
+
+#endif /* End of __ARCHIVESTORE_H__ */
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h
new file mode 100644
index 00000000000..57c9aa77da0
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/cvlt-messages.h
@@ -0,0 +1,30 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _CVLT_MESSAGES_H_
+#define _CVLT_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(CVLT, CVLT_EXTRACTION_FAILED, CVLT_FREE,
+ CVLT_RESOURCE_ALLOCATION_FAILED, CVLT_RESTORE_FAILED,
+ CVLT_READ_FAILED, CVLT_NO_MEMORY, CVLT_DLOPEN_FAILED);
+
+#endif /* !_CVLT_MESSAGES_H_ */
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym
new file mode 100644
index 00000000000..0bc273670d5
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcloudsynccvlt.sym
@@ -0,0 +1 @@
+store_ops
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h
new file mode 100644
index 00000000000..c24fab8bfe7
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt-mem-types.h
@@ -0,0 +1,19 @@
+/*
+ * Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __LIBCVLT_MEM_TYPES_H__
+#define __LIBCVLT_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+enum libcvlt_mem_types_ {
+ gf_libcvlt_mt_cvlt_private_t = gf_common_mt_end + 1,
+ gf_libcvlt_mt_end
+};
+#endif /* __LIBCVLT_MEM_TYPES_H__ */
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c
new file mode 100644
index 00000000000..5b7272bb448
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.c
@@ -0,0 +1,842 @@
+#include <stdlib.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include "libcvlt.h"
+#include "cloudsync-common.h"
+#include "cvlt-messages.h"
+
+#define LIBARCHIVE_SO "libopenarchive.so"
+#define ALIGN_SIZE 4096
+#define CVLT_TRAILER "cvltv1"
+
+store_methods_t store_ops = {
+ .fop_download = cvlt_download,
+ .fop_init = cvlt_init,
+ .fop_reconfigure = cvlt_reconfigure,
+ .fop_fini = cvlt_fini,
+ .fop_remote_read = cvlt_read,
+};
+
+static const int32_t num_req = 32;
+static const int32_t num_iatt = 32;
+static char *plugin = "cvlt_cloudSync";
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_libcvlt_mt_end + 1);
+
+ if (ret != 0) {
+ return ret;
+ }
+
+ return ret;
+}
+
+static void
+cvlt_free_resources(archive_t *arch)
+{
+ /*
+ * We will release all the resources that were allocated by the xlator.
+ * Check whether there are any buffers which have not been released
+ * back to a mempool.
+ */
+
+ if (arch->handle) {
+ dlclose(arch->handle);
+ }
+
+ if (arch->iobuf_pool) {
+ iobuf_pool_destroy(arch->iobuf_pool);
+ }
+
+ if (arch->req_pool) {
+ mem_pool_destroy(arch->req_pool);
+ arch->req_pool = NULL;
+ }
+
+ return;
+}
+
+static int32_t
+cvlt_extract_store_fops(xlator_t *this, archive_t *arch)
+{
+ int32_t op_ret = -1;
+ get_archstore_methods_t get_archstore_methods;
+
+ /*
+ * libopenarchive.so defines methods for performing data management
+ * operations. We will extract the methods from library and these
+ * methods will be invoked for moving data between glusterfs volume
+ * and the data management product.
+ */
+
+ VALIDATE_OR_GOTO(arch, err);
+
+ arch->handle = dlopen(LIBARCHIVE_SO, RTLD_NOW);
+ if (!arch->handle) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_DLOPEN_FAILED,
+ " failed to open %s ", LIBARCHIVE_SO);
+ return op_ret;
+ }
+
+ dlerror(); /* Clear any existing error */
+
+ get_archstore_methods = dlsym(arch->handle, "get_archstore_methods");
+ if (!get_archstore_methods) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " Error extracting get_archstore_methods()");
+ dlclose(arch->handle);
+ arch->handle = NULL;
+ return op_ret;
+ }
+
+ op_ret = get_archstore_methods(&(arch->fops));
+ if (op_ret) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " Failed to extract methods in get_archstore_methods");
+ dlclose(arch->handle);
+ arch->handle = NULL;
+ return op_ret;
+ }
+
+err:
+ return op_ret;
+}
+
+static int32_t
+cvlt_alloc_resources(xlator_t *this, archive_t *arch, int num_req, int num_iatt)
+{
+ /*
+ * Initialize information about all the memory pools that will be
+ * used by this xlator.
+ */
+ arch->nreqs = 0;
+
+ arch->req_pool = NULL;
+
+ arch->handle = NULL;
+ arch->xl = this;
+
+ arch->req_pool = mem_pool_new(cvlt_request_t, num_req);
+ if (!arch->req_pool) {
+ goto err;
+ }
+
+ arch->iobuf_pool = iobuf_pool_new();
+ if (!arch->iobuf_pool) {
+ goto err;
+ }
+
+ if (cvlt_extract_store_fops(this, arch)) {
+ goto err;
+ }
+
+ return 0;
+
+err:
+
+ return -1;
+}
+
+static void
+cvlt_req_init(cvlt_request_t *req)
+{
+ sem_init(&(req->sem), 0, 0);
+
+ return;
+}
+
+static void
+cvlt_req_destroy(cvlt_request_t *req)
+{
+ if (req->iobuf) {
+ iobuf_unref(req->iobuf);
+ }
+
+ if (req->iobref) {
+ iobref_unref(req->iobref);
+ }
+
+ sem_destroy(&(req->sem));
+
+ return;
+}
+
+static cvlt_request_t *
+cvlt_alloc_req(archive_t *arch)
+{
+ cvlt_request_t *reqptr = NULL;
+
+ if (!arch) {
+ goto err;
+ }
+
+ if (arch->req_pool) {
+ reqptr = mem_get0(arch->req_pool);
+ if (reqptr) {
+ cvlt_req_init(reqptr);
+ }
+ }
+
+ if (reqptr) {
+ LOCK(&(arch->lock));
+ arch->nreqs++;
+ UNLOCK(&(arch->lock));
+ }
+
+err:
+ return reqptr;
+}
+
+static int32_t
+cvlt_free_req(archive_t *arch, cvlt_request_t *reqptr)
+{
+ if (!reqptr) {
+ goto err;
+ }
+
+ if (!arch) {
+ goto err;
+ }
+
+ if (arch->req_pool) {
+ /*
+ * Free the request resources if they exist.
+ */
+
+ cvlt_req_destroy(reqptr);
+ mem_put(reqptr);
+
+ LOCK(&(arch->lock));
+ arch->nreqs--;
+ UNLOCK(&(arch->lock));
+ }
+
+ return 0;
+
+err:
+ return -1;
+}
+
+static int32_t
+cvlt_init_xlator(xlator_t *this, archive_t *arch, int num_req, int num_iatt)
+{
+ int32_t ret = -1;
+ int32_t errnum = -1;
+ int32_t locked = 0;
+
+ /*
+ * Perform all the initializations needed for brining up the xlator.
+ */
+ if (!arch) {
+ goto err;
+ }
+
+ LOCK_INIT(&(arch->lock));
+ LOCK(&(arch->lock));
+
+ locked = 1;
+
+ ret = cvlt_alloc_resources(this, arch, num_req, num_iatt);
+
+ if (ret) {
+ goto err;
+ }
+
+ /*
+ * Now that the fops have been extracted initialize the store
+ */
+ ret = arch->fops.init(&(arch->descinfo), &errnum, plugin);
+ if (ret) {
+ goto err;
+ }
+
+ UNLOCK(&(arch->lock));
+ locked = 0;
+ ret = 0;
+
+ return ret;
+
+err:
+ if (arch) {
+ cvlt_free_resources(arch);
+
+ if (locked) {
+ UNLOCK(&(arch->lock));
+ }
+ }
+
+ return ret;
+}
+
+static int32_t
+cvlt_term_xlator(archive_t *arch)
+{
+ int32_t errnum = -1;
+
+ if (!arch) {
+ goto err;
+ }
+
+ LOCK(&(arch->lock));
+
+ /*
+ * Release the resources that have been allocated inside store
+ */
+ arch->fops.fini(&(arch->descinfo), &errnum);
+
+ cvlt_free_resources(arch);
+
+ UNLOCK(&(arch->lock));
+
+ GF_FREE(arch);
+
+ return 0;
+
+err:
+ return -1;
+}
+
+static int32_t
+cvlt_init_store_info(archive_t *priv, archstore_info_t *store_info)
+{
+ if (!store_info) {
+ return -1;
+ }
+
+ store_info->prod = priv->product_id;
+ store_info->prodlen = strlen(priv->product_id);
+
+ store_info->id = priv->store_id;
+ store_info->idlen = strlen(priv->store_id);
+
+ return 0;
+}
+
+static int32_t
+cvlt_init_file_info(cs_loc_xattr_t *xattr, archstore_fileinfo_t *file_info)
+{
+ if (!xattr || !file_info) {
+ return -1;
+ }
+
+ gf_uuid_copy(file_info->uuid, xattr->uuid);
+ file_info->path = xattr->file_path;
+ file_info->pathlength = strlen(xattr->file_path);
+
+ return 0;
+}
+
+static int32_t
+cvlt_init_gluster_store_info(cs_loc_xattr_t *xattr,
+ archstore_info_t *store_info)
+{
+ static char *product = "glusterfs";
+
+ if (!xattr || !store_info) {
+ return -1;
+ }
+
+ store_info->prod = product;
+ store_info->prodlen = strlen(product);
+
+ store_info->id = xattr->volname;
+ store_info->idlen = strlen(xattr->volname);
+
+ return 0;
+}
+
+static int32_t
+cvlt_init_gluster_file_info(cs_loc_xattr_t *xattr,
+ archstore_fileinfo_t *file_info)
+{
+ if (!xattr || !file_info) {
+ return -1;
+ }
+
+ gf_uuid_copy(file_info->uuid, xattr->gfid);
+ file_info->path = xattr->file_path;
+ file_info->pathlength = strlen(xattr->file_path);
+
+ return 0;
+}
+
+static void
+cvlt_copy_stat_info(struct iatt *buf, cs_size_xattr_t *xattrs)
+{
+ /*
+ * If the file was archived then the reported size will not be a
+ * correct one. We need to fix this.
+ */
+ if (buf && xattrs) {
+ buf->ia_size = xattrs->size;
+ buf->ia_blksize = xattrs->blksize;
+ buf->ia_blocks = xattrs->blocks;
+ }
+
+ return;
+}
+
+static void
+cvlt_readv_complete(archstore_desc_t *desc, app_callback_info_t *cbkinfo,
+ void *cookie, int64_t op_ret, int32_t op_errno)
+{
+ struct iovec iov;
+ xlator_t *this = NULL;
+ struct iatt postbuf = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ cvlt_request_t *req = (cvlt_request_t *)cookie;
+ cs_local_t *local = NULL;
+ cs_private_t *cspriv = NULL;
+ archive_t *priv = NULL;
+
+ frame = req->frame;
+ this = frame->this;
+ local = frame->local;
+
+ cspriv = this->private;
+ priv = (archive_t *)cspriv->stores->config;
+
+ if (strcmp(priv->trailer, CVLT_TRAILER)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_msg_debug(plugin, 0,
+ " Read callback invoked offset:%" PRIu64 "bytes: %" PRIu64
+ " op : %d ret : %" PRId64 " errno : %d",
+ req->offset, req->bytes, req->op_type, op_ret, op_errno);
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ req->iobref = iobref_new();
+ if (!req->iobref) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ iobref_add(req->iobref, req->iobuf);
+ iov.iov_base = iobuf_ptr(req->iobuf);
+ iov.iov_len = op_ret;
+
+ cvlt_copy_stat_info(&postbuf, &(req->szxattr));
+
+ /*
+ * Hack to notify higher layers of EOF.
+ */
+ if (!postbuf.ia_size || (req->offset + iov.iov_len >= postbuf.ia_size)) {
+ gf_msg_debug(plugin, 0, " signalling end-of-file for uuid=%s",
+ uuid_utoa(req->file_info.uuid));
+ op_errno = ENOENT;
+ }
+
+out:
+
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1, &postbuf,
+ req->iobref, local->xattr_rsp);
+
+ cvlt_free_req(priv, req);
+
+ return;
+}
+
+static void
+cvlt_download_complete(archstore_desc_t *store, app_callback_info_t *cbk_info,
+ void *cookie, int64_t ret, int errcode)
+{
+ cvlt_request_t *req = (cvlt_request_t *)cookie;
+
+ gf_msg_debug(plugin, 0,
+ " Download callback invoked ret : %" PRId64 " errno : %d",
+ ret, errcode);
+
+ req->op_ret = ret;
+ req->op_errno = errcode;
+ sem_post(&(req->sem));
+
+ return;
+}
+
+void *
+cvlt_init(xlator_t *this)
+{
+ int ret = 0;
+ archive_t *priv = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0,
+ "should have exactly one child");
+ ret = -1;
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0,
+ "dangling volume. check volfile");
+ ret = -1;
+ goto out;
+ }
+
+ priv = GF_CALLOC(1, sizeof(archive_t), gf_libcvlt_mt_cvlt_private_t);
+ if (!priv) {
+ ret = -1;
+ goto out;
+ }
+
+ priv->trailer = CVLT_TRAILER;
+ if (cvlt_init_xlator(this, priv, num_req, num_iatt)) {
+ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, 0, "xlator init failed");
+ ret = -1;
+ goto out;
+ }
+
+ GF_OPTION_INIT("cloudsync-store-id", priv->store_id, str, out);
+ GF_OPTION_INIT("cloudsync-product-id", priv->product_id, str, out);
+
+ gf_msg(plugin, GF_LOG_INFO, 0, 0,
+ "store id is : %s "
+ "product id is : %s.",
+ priv->store_id, priv->product_id);
+out:
+ if (ret == -1) {
+ cvlt_term_xlator(priv);
+ return (NULL);
+ }
+ return priv;
+}
+
+int
+cvlt_reconfigure(xlator_t *this, dict_t *options)
+{
+ cs_private_t *cspriv = NULL;
+ archive_t *priv = NULL;
+
+ cspriv = this->private;
+ priv = (archive_t *)cspriv->stores->config;
+
+ if (strcmp(priv->trailer, CVLT_TRAILER))
+ goto out;
+
+ GF_OPTION_RECONF("cloudsync-store-id", priv->store_id, options, str, out);
+
+ GF_OPTION_RECONF("cloudsync-product-id", priv->product_id, options, str,
+ out);
+ gf_msg_debug(plugin, 0,
+ "store id is : %s "
+ "product id is : %s.",
+ priv->store_id, priv->product_id);
+ return 0;
+out:
+ return -1;
+}
+
+void
+cvlt_fini(void *config)
+{
+ archive_t *priv = NULL;
+
+ priv = (archive_t *)config;
+
+ if (strcmp(priv->trailer, CVLT_TRAILER))
+ return;
+
+ cvlt_term_xlator(priv);
+ gf_msg(plugin, GF_LOG_INFO, 0, CVLT_FREE, " released xlator resources");
+ return;
+}
+
+int
+cvlt_download(call_frame_t *frame, void *config)
+{
+ archive_t *parch = NULL;
+ cs_local_t *local = frame->local;
+ cs_loc_xattr_t *locxattr = local->xattrinfo.lxattr;
+ cvlt_request_t *req = NULL;
+ archstore_info_t dest_storeinfo;
+ archstore_fileinfo_t dest_fileinfo;
+ int32_t op_ret, op_errno;
+
+ parch = (archive_t *)config;
+
+ if (strcmp(parch->trailer, CVLT_TRAILER)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ gf_msg_debug(plugin, 0, " download invoked for uuid = %s gfid=%s ",
+ locxattr->uuid, uuid_utoa(locxattr->gfid));
+
+ if (!(parch->fops.restore)) {
+ op_errno = ELIBBAD;
+ goto err;
+ }
+
+ /*
+ * Download needs to be processed. Allocate a request.
+ */
+ req = cvlt_alloc_req(parch);
+
+ if (!req) {
+ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, CVLT_RESOURCE_ALLOCATION_FAILED,
+ " failed to allocated request for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /*
+ * Initialize the request object.
+ */
+ req->op_type = CVLT_RESTORE_OP;
+ req->frame = frame;
+
+ /*
+ * The file is currently residing inside a data management store.
+ * To restore the file contents we need to provide the information
+ * about data management store.
+ */
+ op_ret = cvlt_init_store_info(parch, &(req->store_info));
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract store info for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ op_ret = cvlt_init_file_info(locxattr, &(req->file_info));
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract file info for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ /*
+ * We need to perform in-place restore of the file from data management
+ * store to gusterfs volume.
+ */
+ op_ret = cvlt_init_gluster_store_info(locxattr, &dest_storeinfo);
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract destination store info for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ op_ret = cvlt_init_gluster_file_info(locxattr, &dest_fileinfo);
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract file info for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ /*
+ * Submit the restore request.
+ */
+ op_ret = parch->fops.restore(&(parch->descinfo), &(req->store_info),
+ &(req->file_info), &dest_storeinfo,
+ &dest_fileinfo, &op_errno,
+ cvlt_download_complete, req);
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_RESTORE_FAILED,
+ " failed to restore file gfid=%s from data management store",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ /*
+ * Wait for the restore to complete.
+ */
+ sem_wait(&(req->sem));
+
+ if (req->op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_RESTORE_FAILED,
+ " restored failed for gfid=%s", uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ if (req) {
+ cvlt_free_req(parch, req);
+ }
+
+ return 0;
+
+err:
+
+ if (req) {
+ cvlt_free_req(parch, req);
+ }
+
+ return -1;
+}
+
+int
+cvlt_read(call_frame_t *frame, void *config)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ archive_t *parch = NULL;
+ cvlt_request_t *req = NULL;
+ struct iovec iov = {
+ 0,
+ };
+ struct iobref *iobref;
+ size_t size = 0;
+ off_t off = 0;
+
+ cs_local_t *local = frame->local;
+ cs_loc_xattr_t *locxattr = local->xattrinfo.lxattr;
+
+ size = local->xattrinfo.size;
+ off = local->xattrinfo.offset;
+
+ parch = (archive_t *)config;
+
+ if (strcmp(parch->trailer, CVLT_TRAILER)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ gf_msg_debug(plugin, 0,
+ " read invoked for gfid = %s offset = %" PRIu64
+ " file_size = %" PRIu64,
+ uuid_utoa(locxattr->gfid), off, local->stbuf.ia_size);
+
+ if (off >= local->stbuf.ia_size) {
+ /*
+ * Hack to notify higher layers of EOF.
+ */
+
+ op_errno = ENOENT;
+ op_ret = 0;
+
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_READ_FAILED,
+ " reporting end-of-file for gfid=%s", uuid_utoa(locxattr->gfid));
+
+ goto err;
+ }
+
+ if (!size) {
+ op_errno = EINVAL;
+
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_READ_FAILED,
+ " zero size read attempted on gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ goto err;
+ }
+
+ if (!(parch->fops.read)) {
+ op_errno = ELIBBAD;
+ goto err;
+ }
+
+ /*
+ * The read request need to be processed. Allocate a request.
+ */
+ req = cvlt_alloc_req(parch);
+
+ if (!req) {
+ gf_msg(plugin, GF_LOG_ERROR, ENOMEM, CVLT_NO_MEMORY,
+ " failed to allocated request for gfid=%s",
+ uuid_utoa(locxattr->gfid));
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ req->iobuf = iobuf_get_page_aligned(parch->iobuf_pool, size, ALIGN_SIZE);
+ if (!req->iobuf) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ /*
+ * Initialize the request object.
+ */
+ req->op_type = CVLT_READ_OP;
+ req->offset = off;
+ req->bytes = size;
+ req->frame = frame;
+ req->szxattr.size = local->stbuf.ia_size;
+ req->szxattr.blocks = local->stbuf.ia_blocks;
+ req->szxattr.blksize = local->stbuf.ia_blksize;
+
+ /*
+ * The file is currently residing inside a data management store.
+ * To read the file contents we need to provide the information
+ * about data management store.
+ */
+ op_ret = cvlt_init_store_info(parch, &(req->store_info));
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract store info for gfid=%s"
+ " offset=%" PRIu64 " size=%" GF_PRI_SIZET
+ ", "
+ " buf=%p",
+ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr);
+ goto err;
+ }
+
+ op_ret = cvlt_init_file_info(locxattr, &(req->file_info));
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " failed to extract file info for gfid=%s"
+ " offset=%" PRIu64 " size=%" GF_PRI_SIZET
+ ", "
+ " buf=%p",
+ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr);
+ goto err;
+ }
+
+ /*
+ * Submit the read request.
+ */
+ op_ret = parch->fops.read(&(parch->descinfo), &(req->store_info),
+ &(req->file_info), off, req->iobuf->ptr, size,
+ &op_errno, cvlt_readv_complete, req);
+
+ if (op_ret < 0) {
+ gf_msg(plugin, GF_LOG_ERROR, 0, CVLT_EXTRACTION_FAILED,
+ " read failed on gfid=%s"
+ " offset=%" PRIu64 " size=%" GF_PRI_SIZET
+ ", "
+ " buf=%p",
+ uuid_utoa(locxattr->gfid), off, size, req->iobuf->ptr);
+ goto err;
+ }
+
+ return 0;
+
+err:
+
+ iobref = iobref_new();
+ gf_msg_debug(plugin, 0, " read unwinding stack op_ret = %d, op_errno = %d",
+ op_ret, op_errno);
+
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &iov, 1,
+ &(local->stbuf), iobref, local->xattr_rsp);
+
+ if (iobref) {
+ iobref_unref(iobref);
+ }
+
+ if (req) {
+ cvlt_free_req(parch, req);
+ }
+
+ return 0;
+}
diff --git a/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h
new file mode 100644
index 00000000000..c45ac948f6c
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync-plugins/src/cvlt/src/libcvlt.h
@@ -0,0 +1,84 @@
+/*
+ Copyright (c) 2018 Commvault Systems, Inc. <http://www.commvault.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _LIBCVLT_H
+#define _LIBCVLT_H
+
+#include <semaphore.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/compat-errno.h>
+#include "cloudsync-common.h"
+#include "libcvlt-mem-types.h"
+#include "archivestore.h"
+
+enum _cvlt_op {
+ CVLT_READ_OP = 1,
+ CVLT_WRITE_OP = 2,
+ CVLT_RESTORE_OP = 3,
+ CVLT_ARCHIVE_OP = 4,
+ CVLT_LOOKUP_OP = 5,
+ CVLT_XATTR_OP = 6,
+ CVLT_STAT_OP = 7,
+ CVLT_FSTAT_op = 8,
+ CVLT_UNDEF_OP = 127
+};
+typedef enum _cvlt_op cvlt_op_t;
+
+struct _archive;
+struct _cvlt_request {
+ uint64_t offset;
+ uint64_t bytes;
+ struct iobuf *iobuf;
+ struct iobref *iobref;
+ call_frame_t *frame;
+ cvlt_op_t op_type;
+ int32_t op_ret;
+ int32_t op_errno;
+ xlator_t *this;
+ sem_t sem;
+ archstore_info_t store_info;
+ archstore_fileinfo_t file_info;
+ cs_size_xattr_t szxattr;
+};
+typedef struct _cvlt_request cvlt_request_t;
+
+struct _archive {
+ gf_lock_t lock; /* lock for controlling access */
+ xlator_t *xl; /* xlator */
+ void *handle; /* handle returned from dlopen */
+ int32_t nreqs; /* num requests active */
+ struct mem_pool *req_pool; /* pool for requests */
+ struct iobuf_pool *iobuf_pool; /* iobuff pool */
+ archstore_desc_t descinfo; /* Archive store descriptor info */
+ archstore_methods_t fops; /* function pointers */
+ char *product_id;
+ char *store_id;
+ char *trailer;
+};
+typedef struct _archive archive_t;
+
+void *
+cvlt_init(xlator_t *);
+
+int
+cvlt_reconfigure(xlator_t *, dict_t *);
+
+void
+cvlt_fini(void *);
+
+int
+cvlt_download(call_frame_t *, void *);
+
+int
+cvlt_read(call_frame_t *, void *);
+
+#endif
diff --git a/xlators/features/cloudsync/src/cloudsync.c b/xlators/features/cloudsync/src/cloudsync.c
new file mode 100644
index 00000000000..7f0b9e563b8
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync.c
@@ -0,0 +1,2076 @@
+/*
+ * Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include "cloudsync.h"
+#include "cloudsync-common.h"
+#include <glusterfs/call-stub.h>
+#include "cloudsync-autogen-fops.h"
+
+#include <string.h>
+#include <dlfcn.h>
+
+static void
+cs_cleanup_private(cs_private_t *priv)
+{
+ if (priv) {
+ if (priv->stores) {
+ priv->stores->fini(priv->stores->config);
+ GF_FREE(priv->stores);
+ }
+
+ pthread_spin_destroy(&priv->lock);
+ GF_FREE(priv);
+ }
+
+ return;
+}
+
+static struct cs_plugin plugins[] = {
+ {.name = "cloudsyncs3",
+ .library = "cloudsyncs3.so",
+ .description = "cloudsync s3 store."},
+#if defined(__linux__)
+ {.name = "cvlt",
+ .library = "cloudsynccvlt.so",
+ .description = "Commvault content store."},
+#endif
+ {.name = NULL},
+};
+
+int
+cs_init(xlator_t *this)
+{
+ cs_private_t *priv = NULL;
+ gf_boolean_t per_vol = _gf_false;
+ int ret = 0;
+ char *libpath = NULL;
+ store_methods_t *store_methods = NULL;
+ void *handle = NULL;
+ char *temp_str = NULL;
+ int index = 0;
+ char *libname = NULL;
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_cs_mt_cs_private_t);
+ if (!priv) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
+ goto out;
+ }
+
+ priv->this = this;
+
+ this->local_pool = mem_pool_new(cs_local_t, 512);
+ if (!this->local_pool) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, "initialisation failed.");
+ ret = -1;
+ goto out;
+ }
+
+ this->private = priv;
+
+ GF_OPTION_INIT("cloudsync-remote-read", priv->remote_read, bool, out);
+
+ /* temp workaround. Should be configurable through glusterd*/
+ per_vol = _gf_true;
+
+ if (per_vol) {
+ if (dict_get_str_sizen(this->options, "cloudsync-storetype",
+ &temp_str) == 0) {
+ for (index = 0; plugins[index].name; index++) {
+ if (!strcmp(temp_str, plugins[index].name)) {
+ libname = plugins[index].library;
+ break;
+ }
+ }
+ } else {
+ ret = 0;
+ }
+
+ if (!libname) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, 0, "no plugin enabled");
+ ret = 0;
+ goto out;
+ }
+
+ ret = gf_asprintf(&libpath, "%s/%s", CS_PLUGINDIR, libname);
+ if (ret == -1) {
+ goto out;
+ }
+
+ handle = dlopen(libpath, RTLD_NOW);
+ if (!handle) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, 0,
+ "could not "
+ "load the required library. %s",
+ dlerror());
+ ret = 0;
+ goto out;
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "loading library:%s successful", libname);
+ }
+
+ priv->stores = GF_CALLOC(1, sizeof(struct cs_remote_stores),
+ gf_cs_mt_cs_remote_stores_t);
+ if (!priv->stores) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "Could not "
+ "allocate memory for priv->stores");
+ ret = -1;
+ goto out;
+ }
+
+ (void)dlerror(); /* clear out previous error string */
+
+ /* load library methods */
+ store_methods = (store_methods_t *)dlsym(handle, "store_ops");
+ if (!store_methods) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "null store_methods %s",
+ dlerror());
+ ret = -1;
+ goto out;
+ }
+
+ (void)dlerror();
+
+ if (priv->remote_read) {
+ priv->stores->rdfop = store_methods->fop_remote_read;
+ if (!priv->stores->rdfop) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "failed to get"
+ " read fop %s",
+ dlerror());
+ ret = -1;
+ goto out;
+ }
+ }
+
+ priv->stores->dlfop = store_methods->fop_download;
+ if (!priv->stores->dlfop) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "failed to get"
+ " download fop %s",
+ dlerror());
+ ret = -1;
+ goto out;
+ }
+
+ (void)dlerror();
+ priv->stores->init = store_methods->fop_init;
+ if (!priv->stores->init) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "failed to get"
+ " init fop %s",
+ dlerror());
+ ret = -1;
+ goto out;
+ }
+
+ (void)dlerror();
+ priv->stores->reconfigure = store_methods->fop_reconfigure;
+ if (!priv->stores->reconfigure) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "failed to get"
+ " reconfigure fop %s",
+ dlerror());
+ ret = -1;
+ goto out;
+ }
+
+ priv->stores->handle = handle;
+
+ priv->stores->config = (void *)((priv->stores->init)(this));
+ if (!priv->stores->config) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "null config");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ if (ret == -1) {
+ if (this->local_pool) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
+
+ cs_cleanup_private(priv);
+
+ if (handle) {
+ dlclose(handle);
+ }
+ }
+
+ GF_FREE(libpath);
+
+ return ret;
+}
+
+int
+cs_forget(xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_int = 0;
+ cs_inode_ctx_t *ctx = NULL;
+
+ inode_ctx_del(inode, this, &ctx_int);
+ if (!ctx_int)
+ return 0;
+
+ ctx = (cs_inode_ctx_t *)(uintptr_t)ctx_int;
+
+ GF_FREE(ctx);
+ return 0;
+}
+
+void
+cs_fini(xlator_t *this)
+{
+ cs_private_t *priv = NULL;
+ priv = this->private;
+
+ cs_cleanup_private(priv);
+}
+
+int
+cs_reconfigure(xlator_t *this, dict_t *options)
+{
+ cs_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ if (!priv) {
+ ret = -1;
+ goto out;
+ }
+
+ GF_OPTION_RECONF("cloudsync-remote-read", priv->remote_read, options, bool,
+ out);
+
+ /* needed only for per volume configuration*/
+ ret = priv->stores->reconfigure(this, options);
+
+out:
+ return ret;
+}
+
+int32_t
+cs_mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("cloudsync", this, out);
+
+ ret = xlator_mem_acct_init(this, gf_cs_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "Memory accounting init failed");
+ return ret;
+ }
+out:
+ return ret;
+}
+
+int32_t
+cs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ int ret = 0;
+ int op_errno = ENOMEM;
+
+ if (!xdata) {
+ xdata = dict_new();
+ if (!xdata) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
+ "failed to create "
+ "dict");
+ goto err;
+ }
+ }
+
+ ret = dict_set_uint32(xdata, GF_CS_OBJECT_STATUS, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "dict_set failed key:"
+ " %s",
+ GF_CS_OBJECT_STATUS);
+ goto err;
+ }
+
+ STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT(readdirp, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int32_t
+cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ cs_local_t *local = NULL;
+ int ret = 0;
+ uint64_t val = 0;
+
+ local = frame->local;
+
+ local->call_cnt++;
+
+ if (op_ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "truncate failed");
+ ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val);
+ if (ret == 0) {
+ if (val == GF_CS_ERROR) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "could not get file state, unwinding");
+ op_ret = -1;
+ op_errno = EIO;
+ goto unwind;
+ } else {
+ __cs_inode_ctx_update(this, local->loc.inode, val);
+ gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %" PRIu64, val);
+
+ if (local->call_cnt == 1 &&
+ (val == GF_CS_REMOTE || val == GF_CS_DOWNLOADING)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, 0,
+ "will repair and download "
+ "the file, current state : %" PRIu64,
+ val);
+ goto repair;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "second truncate, Unwinding");
+ goto unwind;
+ }
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state "
+ "could not be figured, unwinding");
+ goto unwind;
+ }
+ } else {
+ /* successful write => file is local */
+ __cs_inode_ctx_update(this, local->loc.inode, GF_CS_LOCAL);
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "state : GF_CS_LOCAL"
+ ", truncate successful");
+
+ goto unwind;
+ }
+
+repair:
+ ret = locate_and_execute(frame);
+ if (ret) {
+ goto unwind;
+ }
+
+ return 0;
+
+unwind:
+ CS_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+cs_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ cs_local_t *local = NULL;
+ int ret = 0;
+ cs_inode_ctx_t *ctx = NULL;
+ gf_cs_obj_state state = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ local = cs_local_init(this, frame, loc, NULL, GF_FOP_TRUNCATE);
+ if (!local) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "local init failed");
+ goto err;
+ }
+
+ __cs_inode_ctx_get(this, loc->inode, &ctx);
+
+ if (ctx)
+ state = __cs_get_file_state(loc->inode, ctx);
+ else
+ state = GF_CS_LOCAL;
+
+ local->xattr_req = xdata ? dict_ref(xdata) : (xdata = dict_new());
+
+ ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_STATUS, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "dict_set failed key:"
+ " %s",
+ GF_CS_OBJECT_STATUS);
+ goto err;
+ }
+
+ local->stub = fop_truncate_stub(frame, cs_resume_truncate, loc, offset,
+ xdata);
+ if (!local->stub) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
+ goto err;
+ }
+
+ if (state == GF_CS_LOCAL) {
+ STACK_WIND(frame, cs_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+
+ } else {
+ local->call_cnt++;
+ ret = locate_and_execute(frame);
+ if (ret) {
+ goto err;
+ }
+ }
+
+ return 0;
+err:
+ CS_STACK_UNWIND(truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+cs_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct statvfs *buf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+cs_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ STACK_WIND(frame, cs_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+}
+
+int32_t
+cs_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+cs_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xattr_req)
+{
+ STACK_WIND(frame, cs_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xattr_req);
+ return 0;
+}
+
+int32_t
+cs_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ cs_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->locked)
+ cs_inodelk_unlock(frame);
+
+ CS_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+cs_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ data_t *tmp = NULL;
+ cs_local_t *local = NULL;
+ int ret = 0;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+
+ local = cs_local_init(this, frame, loc, NULL, GF_FOP_SETXATTR);
+ if (!local) {
+ ret = -1;
+ goto err;
+ }
+
+ local->xattr_req = xdata ? dict_ref(xdata) : (xdata = dict_new());
+
+ tmp = dict_get_sizen(dict, GF_CS_OBJECT_UPLOAD_COMPLETE);
+ if (tmp) {
+ /* Value of key should be the atime */
+ local->stub = fop_setxattr_stub(frame, cs_resume_setxattr, loc, dict,
+ flags, xdata);
+
+ if (!local->stub)
+ goto err;
+
+ ret = locate_and_execute(frame);
+ if (ret) {
+ goto err;
+ }
+
+ return 0;
+ }
+
+ STACK_WIND(frame, cs_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
+ return 0;
+err:
+ CS_STACK_UNWIND(setxattr, frame, -1, errno, NULL);
+ return 0;
+}
+
+int32_t
+cs_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+cs_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ STACK_WIND(frame, cs_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
+}
+
+int32_t
+cs_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+cs_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ STACK_WIND(frame, cs_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+cs_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int32_t
+cs_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ dict_t *xattr_req)
+{
+ cs_local_t *local = NULL;
+ int ret = 0;
+
+ local = cs_local_init(this, frame, loc, NULL, GF_FOP_UNLINK);
+ if (!local)
+ goto err;
+
+ local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new();
+
+ ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_STATUS, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "dict_set failed key:"
+ " %s",
+ GF_CS_OBJECT_STATUS);
+ goto err;
+ }
+ STACK_WIND(frame, cs_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, flags, local->xattr_req);
+ return 0;
+err:
+ CS_STACK_UNWIND(unlink, frame, -1, errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+cs_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ int ret = 0;
+ uint64_t val = 0;
+
+ if (op_ret == 0) {
+ ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val);
+ if (!ret) {
+ ret = __cs_inode_ctx_update(this, fd->inode, val);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed");
+ }
+ }
+ } else {
+ cs_inode_ctx_reset(this, fd->inode);
+ }
+
+ CS_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+cs_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xattr_req)
+{
+ cs_local_t *local = NULL;
+ int ret = 0;
+
+ local = cs_local_init(this, frame, NULL, fd, GF_FOP_OPEN);
+ if (!local)
+ goto err;
+
+ local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new();
+
+ ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_STATUS, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "dict_set failed key:"
+ " %s",
+ GF_CS_OBJECT_STATUS);
+ goto err;
+ }
+
+ STACK_WIND(frame, cs_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, local->xattr_req);
+ return 0;
+err:
+ CS_STACK_UNWIND(open, frame, -1, errno, NULL, NULL);
+ return 0;
+}
+
+int32_t
+cs_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ int ret = 0;
+ uint64_t val = 0;
+ fd_t *fd = NULL;
+ cs_local_t *local = NULL;
+
+ local = frame->local;
+
+ fd = local->fd;
+
+ if (op_ret == 0) {
+ ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val);
+ if (!ret) {
+ gf_msg_debug(this->name, 0, "state %" PRIu64, val);
+ ret = __cs_inode_ctx_update(this, fd->inode, val);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed");
+ }
+ }
+ } else {
+ cs_inode_ctx_reset(this, fd->inode);
+ }
+
+ CS_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
+}
+
+int32_t
+cs_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xattr_req)
+{
+ cs_local_t *local = NULL;
+ int ret = 0;
+
+ local = cs_local_init(this, frame, NULL, fd, GF_FOP_FSTAT);
+ if (!local)
+ goto err;
+
+ if (fd->inode->ia_type == IA_IFDIR)
+ goto wind;
+
+ local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new();
+
+ ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_STATUS, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "dict_set failed key:"
+ " %s",
+ GF_CS_OBJECT_STATUS);
+ goto err;
+ }
+
+wind:
+ STACK_WIND(frame, cs_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req);
+ return 0;
+err:
+ CS_STACK_UNWIND(fstat, frame, -1, errno, NULL, NULL);
+ return 0;
+}
+
+cs_local_t *
+cs_local_init(xlator_t *this, call_frame_t *frame, loc_t *loc, fd_t *fd,
+ glusterfs_fop_t fop)
+{
+ cs_local_t *local = NULL;
+ int ret = 0;
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto out;
+
+ if (loc) {
+ ret = loc_copy(&local->loc, loc);
+ if (ret)
+ goto out;
+ }
+
+ if (fd) {
+ local->fd = fd_ref(fd);
+ }
+
+ local->op_ret = -1;
+ local->op_errno = EUCLEAN;
+ local->fop = fop;
+ local->dloffset = 0;
+ frame->local = local;
+ local->locked = _gf_false;
+ local->call_cnt = 0;
+out:
+ if (ret) {
+ if (local)
+ mem_put(local);
+ local = NULL;
+ }
+
+ return local;
+}
+
+call_frame_t *
+cs_lock_frame(call_frame_t *parent_frame)
+{
+ call_frame_t *lock_frame = NULL;
+
+ lock_frame = copy_frame(parent_frame);
+
+ if (lock_frame == NULL)
+ goto out;
+
+ set_lk_owner_from_ptr(&lock_frame->root->lk_owner, parent_frame->root);
+
+out:
+ return lock_frame;
+}
+
+void
+cs_lock_wipe(call_frame_t *lock_frame)
+{
+ CS_STACK_DESTROY(lock_frame);
+}
+
+int32_t
+cs_inodelk_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ cs_lock_wipe(frame);
+
+ return 0;
+}
+
+int
+cs_inodelk_unlock(call_frame_t *main_frame)
+{
+ xlator_t *this = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+ call_frame_t *lock_frame = NULL;
+ cs_local_t *lock_local = NULL;
+ cs_local_t *main_local = NULL;
+ int ret = 0;
+
+ this = main_frame->this;
+ main_local = main_frame->local;
+
+ lock_frame = cs_lock_frame(main_frame);
+ if (!lock_frame)
+ goto out;
+
+ lock_local = cs_local_init(this, lock_frame, NULL, NULL, 0);
+ if (!lock_local)
+ goto out;
+
+ ret = cs_build_loc(&lock_local->loc, main_frame);
+ if (ret) {
+ goto out;
+ }
+
+ flock.l_type = F_UNLCK;
+
+ main_local->locked = _gf_false;
+
+ STACK_WIND(lock_frame, cs_inodelk_unlock_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk, CS_LOCK_DOMAIN,
+ &lock_local->loc, F_SETLKW, &flock, NULL);
+
+ return 0;
+
+out:
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "Stale lock would be found on"
+ " server");
+
+ if (lock_frame)
+ cs_lock_wipe(lock_frame);
+
+ return 0;
+}
+
+int
+cs_download_task(void *arg)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ cs_private_t *priv = NULL;
+ int ret = -1;
+ char *sign_req = NULL;
+ fd_t *fd = NULL;
+ cs_local_t *local = NULL;
+ dict_t *dict = NULL;
+
+ frame = (call_frame_t *)arg;
+
+ this = frame->this;
+
+ priv = this->private;
+
+ if (!priv->stores) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "No remote store "
+ "plugins found");
+ ret = -1;
+ goto out;
+ }
+
+ local = frame->local;
+
+ if (local->fd)
+ fd = fd_anonymous(local->fd->inode);
+ else
+ fd = fd_anonymous(local->loc.inode);
+
+ if (!fd) {
+ gf_msg("CS", GF_LOG_ERROR, 0, 0, "fd creation failed");
+ ret = -1;
+ goto out;
+ }
+
+ local->dlfd = fd;
+ local->dloffset = 0;
+
+ dict = dict_new();
+ if (!dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
+ "failed to create "
+ "dict");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_uint32(dict, GF_CS_OBJECT_DOWNLOADING, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "dict_set failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_fsetxattr(this, local->fd, dict, 0, NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "fsetxattr failed "
+ "key %s",
+ GF_CS_OBJECT_DOWNLOADING);
+ ret = -1;
+ goto out;
+ }
+ /*this calling method is for per volume setting */
+ ret = priv->stores->dlfop(frame, priv->stores->config);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "download failed"
+ ", remotepath: %s",
+ local->remotepath);
+
+ /*using dlfd as it is anonymous and have RDWR flag*/
+ ret = syncop_ftruncate(FIRST_CHILD(this), local->dlfd, 0, NULL, NULL,
+ NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, -ret, "ftruncate failed");
+ } else {
+ gf_msg_debug(this->name, 0, "ftruncate succeed");
+ }
+
+ ret = -1;
+ goto out;
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "download success, path"
+ " : %s",
+ local->remotepath);
+
+ ret = syncop_fremovexattr(this, local->fd, GF_CS_OBJECT_REMOTE, NULL,
+ NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, -ret,
+ "removexattr failed, remotexattr");
+ ret = -1;
+ goto out;
+ } else {
+ gf_msg_debug(this->name, 0,
+ "fremovexattr success, "
+ "path : %s",
+ local->remotepath);
+ }
+
+ ret = syncop_fremovexattr(this, local->fd, GF_CS_OBJECT_DOWNLOADING,
+ NULL, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, -ret,
+ "removexattr failed, downloading xattr, path %s",
+ local->remotepath);
+ ret = -1;
+ goto out;
+ } else {
+ gf_msg_debug(this->name, 0,
+ "fremovexattr success"
+ " path %s",
+ local->remotepath);
+ }
+ }
+
+out:
+ GF_FREE(sign_req);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (fd) {
+ fd_unref(fd);
+ local->dlfd = NULL;
+ }
+
+ return ret;
+}
+
+int
+cs_download(call_frame_t *frame)
+{
+ int ret = 0;
+ cs_local_t *local = NULL;
+ xlator_t *this = NULL;
+
+ local = frame->local;
+ this = frame->this;
+
+ if (!local->remotepath) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "remote path not"
+ " available. Check posix logs to resolve");
+ goto out;
+ }
+
+ ret = cs_download_task((void *)frame);
+out:
+ return ret;
+}
+
+int
+cs_set_xattr_req(call_frame_t *frame)
+{
+ cs_local_t *local = NULL;
+ GF_UNUSED int ret = 0;
+
+ local = frame->local;
+
+ /* When remote reads are performed (i.e. reads on remote store),
+ * there needs to be a way to associate a file on gluster volume
+ * with its correspnding file on the remote store. In order to do
+ * that, a unique key can be maintained as an xattr
+ * (GF_CS_XATTR_ARCHIVE_UUID)on the stub file on gluster bricks.
+ * This xattr should be provided to the plugin to
+ * perform the read fop on the correct file. This assumes that the file
+ * hierarchy and name need not be the same on remote store as that of
+ * the gluster volume.
+ */
+ ret = dict_set_sizen_str_sizen(local->xattr_req, GF_CS_XATTR_ARCHIVE_UUID,
+ "1");
+
+ return 0;
+}
+
+int
+cs_update_xattrs(call_frame_t *frame, dict_t *xdata)
+{
+ cs_local_t *local = NULL;
+ xlator_t *this = NULL;
+ int size = -1;
+ GF_UNUSED int ret = 0;
+
+ local = frame->local;
+ this = frame->this;
+
+ local->xattrinfo.lxattr = GF_CALLOC(1, sizeof(cs_loc_xattr_t),
+ gf_cs_mt_cs_lxattr_t);
+ if (!local->xattrinfo.lxattr) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ gf_uuid_copy(local->xattrinfo.lxattr->gfid, local->loc.gfid);
+
+ if (local->remotepath) {
+ local->xattrinfo.lxattr->file_path = gf_strdup(local->remotepath);
+ if (!local->xattrinfo.lxattr->file_path) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+ }
+
+ ret = dict_get_gfuuid(xdata, GF_CS_XATTR_ARCHIVE_UUID,
+ &(local->xattrinfo.lxattr->uuid));
+
+ if (ret) {
+ gf_uuid_clear(local->xattrinfo.lxattr->uuid);
+ }
+ size = strlen(this->name) - strlen("-cloudsync") + 1;
+ local->xattrinfo.lxattr->volname = GF_CALLOC(1, size, gf_common_mt_char);
+ if (!local->xattrinfo.lxattr->volname) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+ strncpy(local->xattrinfo.lxattr->volname, this->name, size - 1);
+ local->xattrinfo.lxattr->volname[size - 1] = '\0';
+
+ return 0;
+err:
+ cs_xattrinfo_wipe(local);
+ return -1;
+}
+
+int
+cs_serve_readv(call_frame_t *frame, off_t offset, size_t size, uint32_t flags)
+{
+ xlator_t *this = NULL;
+ cs_private_t *priv = NULL;
+ int ret = -1;
+ fd_t *fd = NULL;
+ cs_local_t *local = NULL;
+
+ local = frame->local;
+ this = frame->this;
+ priv = this->private;
+
+ if (!local->remotepath) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "remote path not"
+ " available. Check posix logs to resolve");
+ goto out;
+ }
+
+ if (!priv->stores) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "No remote store "
+ "plugins found");
+ ret = -1;
+ goto out;
+ }
+
+ if (local->fd) {
+ fd = fd_anonymous(local->fd->inode);
+ } else {
+ fd = fd_anonymous(local->loc.inode);
+ }
+
+ local->xattrinfo.size = size;
+ local->xattrinfo.offset = offset;
+ local->xattrinfo.flags = flags;
+
+ if (!fd) {
+ gf_msg("CS", GF_LOG_ERROR, 0, 0, "fd creation failed");
+ ret = -1;
+ goto out;
+ }
+
+ local->dlfd = fd;
+ local->dloffset = offset;
+
+ /*this calling method is for per volume setting */
+ ret = priv->stores->rdfop(frame, priv->stores->config);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "read failed"
+ ", remotepath: %s",
+ local->remotepath);
+ ret = -1;
+ goto out;
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "read success, path"
+ " : %s",
+ local->remotepath);
+ }
+
+out:
+ if (fd) {
+ fd_unref(fd);
+ local->dlfd = NULL;
+ }
+ return ret;
+}
+
+int32_t
+cs_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ cs_local_t *local = NULL;
+ int ret = 0;
+ uint64_t val = 0;
+ fd_t *fd = NULL;
+
+ local = frame->local;
+ fd = local->fd;
+
+ local->call_cnt++;
+
+ if (op_ret == -1) {
+ ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val);
+ if (ret == 0) {
+ if (val == GF_CS_ERROR) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "could not get file state, unwinding");
+ op_ret = -1;
+ op_errno = EIO;
+ goto unwind;
+ } else {
+ __cs_inode_ctx_update(this, fd->inode, val);
+ gf_msg(this->name, GF_LOG_INFO, 0, 0, " state = %" PRIu64, val);
+
+ if (local->call_cnt == 1 &&
+ (val == GF_CS_REMOTE || val == GF_CS_DOWNLOADING)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ " will read from remote : %" PRIu64, val);
+ goto repair;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "second readv, Unwinding");
+ goto unwind;
+ }
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "file state "
+ "could not be figured, unwinding");
+ goto unwind;
+ }
+ } else {
+ /* successful readv => file is local */
+ __cs_inode_ctx_update(this, fd->inode, GF_CS_LOCAL);
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "state : GF_CS_LOCAL"
+ ", readv successful");
+
+ goto unwind;
+ }
+
+repair:
+ ret = locate_and_execute(frame);
+ if (ret) {
+ goto unwind;
+ }
+
+ return 0;
+
+unwind:
+ CS_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
+
+ return 0;
+}
+
+int32_t
+cs_resume_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int ret = 0;
+
+ ret = cs_resume_postprocess(this, frame, fd->inode);
+ if (ret) {
+ goto unwind;
+ }
+
+ cs_inodelk_unlock(frame);
+
+ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+
+ return 0;
+
+unwind:
+ cs_inodelk_unlock(frame);
+
+ cs_common_cbk(frame);
+
+ return 0;
+}
+
+int32_t
+cs_resume_remote_readv(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int ret = 0;
+ cs_local_t *local = NULL;
+ gf_cs_obj_state state = -1;
+ cs_inode_ctx_t *ctx = NULL;
+
+ cs_inodelk_unlock(frame);
+
+ local = frame->local;
+ if (!local) {
+ ret = -1;
+ goto unwind;
+ }
+
+ __cs_inode_ctx_get(this, fd->inode, &ctx);
+
+ state = __cs_get_file_state(fd->inode, ctx);
+ if (state == GF_CS_ERROR) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "status is GF_CS_ERROR."
+ " Aborting readv");
+ local->op_ret = -1;
+ local->op_errno = EREMOTE;
+ ret = -1;
+ goto unwind;
+ }
+
+ /* Serve readv from remote store only if it is remote. */
+ gf_msg_debug(this->name, 0, "status of file %s is %d",
+ local->remotepath ? local->remotepath : "", state);
+
+ /* We will reach this condition if local inode ctx had REMOTE
+ * state when the control was in cs_readv but after stat
+ * we got an updated state saying that the file is LOCAL.
+ */
+ if (state == GF_CS_LOCAL) {
+ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ } else if (state == GF_CS_REMOTE) {
+ ret = cs_resume_remote_readv_postprocess(this, frame, fd->inode, offset,
+ size, flags);
+ /* Failed to submit the remote readv fop to plugin */
+ if (ret) {
+ local->op_ret = -1;
+ local->op_errno = EREMOTE;
+ goto unwind;
+ }
+ /* When the file is in any other intermediate state,
+ * we should not perform remote reads.
+ */
+ } else {
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unwind;
+ }
+
+ return 0;
+
+unwind:
+ cs_common_cbk(frame);
+
+ return 0;
+}
+
+int32_t
+cs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ cs_local_t *local = NULL;
+ int ret = 0;
+ cs_inode_ctx_t *ctx = NULL;
+ gf_cs_obj_state state = -1;
+ cs_private_t *priv = NULL;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ priv = this->private;
+
+ local = cs_local_init(this, frame, NULL, fd, GF_FOP_READ);
+ if (!local) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "local init failed");
+ goto err;
+ }
+
+ __cs_inode_ctx_get(this, fd->inode, &ctx);
+
+ if (ctx)
+ state = __cs_get_file_state(fd->inode, ctx);
+ else
+ state = GF_CS_LOCAL;
+
+ local->xattr_req = xdata ? dict_ref(xdata) : (xdata = dict_new());
+
+ ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_STATUS, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "dict_set failed key:"
+ " %s",
+ GF_CS_OBJECT_STATUS);
+ goto err;
+ }
+
+ if (priv->remote_read) {
+ local->stub = fop_readv_stub(frame, cs_resume_remote_readv, fd, size,
+ offset, flags, xdata);
+ } else {
+ local->stub = fop_readv_stub(frame, cs_resume_readv, fd, size, offset,
+ flags, xdata);
+ }
+ if (!local->stub) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insufficient memory");
+ goto err;
+ }
+
+ if (state == GF_CS_LOCAL) {
+ STACK_WIND(frame, cs_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ } else {
+ local->call_cnt++;
+ ret = locate_and_execute(frame);
+ if (ret) {
+ goto err;
+ }
+ }
+
+ return 0;
+
+err:
+ CS_STACK_UNWIND(readv, frame, -1, op_errno, NULL, -1, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+cs_resume_remote_readv_postprocess(xlator_t *this, call_frame_t *frame,
+ inode_t *inode, off_t offset, size_t size,
+ uint32_t flags)
+{
+ int ret = 0;
+
+ ret = cs_serve_readv(frame, offset, size, flags);
+
+ return ret;
+}
+
+int
+cs_stat_check_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *stbuf, dict_t *xdata)
+{
+ cs_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ char *filepath = NULL;
+ int ret = 0;
+ inode_t *inode = NULL;
+ uint64_t val = 0;
+
+ local = frame->local;
+
+ if (op_ret == -1) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ gf_msg(this->name, GF_LOG_ERROR, 0, op_errno, "stat check failed");
+ goto err;
+ } else {
+ if (local->fd)
+ inode = local->fd->inode;
+ else
+ inode = local->loc.inode;
+
+ if (!inode) {
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "null inode "
+ "returned");
+ goto err;
+ }
+
+ ret = dict_get_uint64(xdata, GF_CS_OBJECT_STATUS, &val);
+ if (ret == 0) {
+ if (val == GF_CS_ERROR) {
+ cs_inode_ctx_reset(this, inode);
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "status = GF_CS_ERROR. failed to get "
+ " file state");
+ goto err;
+ } else {
+ ret = __cs_inode_ctx_update(this, inode, val);
+ gf_msg_debug(this->name, 0, "status : %" PRIu64, val);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx update failed");
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+ }
+ } else {
+ gf_msg_debug(this->name, 0, "status not found in dict");
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dict_get_str_sizen(xdata, GF_CS_OBJECT_REMOTE, &filepath);
+ if (filepath) {
+ gf_msg_debug(this->name, 0, "filepath returned %s", filepath);
+ local->remotepath = gf_strdup(filepath);
+ if (!local->remotepath) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+ } else {
+ gf_msg_debug(this->name, 0, "NULL filepath");
+ }
+
+ ret = cs_update_xattrs(frame, xdata);
+ if (ret)
+ goto err;
+
+ local->op_ret = 0;
+ local->xattr_rsp = dict_ref(xdata);
+ memcpy(&local->stbuf, stbuf, sizeof(struct iatt));
+ }
+
+ stub = local->stub;
+ local->stub = NULL;
+ call_resume(stub);
+
+ return 0;
+err:
+ cs_inodelk_unlock(frame);
+
+ cs_common_cbk(frame);
+
+ return 0;
+}
+
+int
+cs_do_stat_check(call_frame_t *main_frame)
+{
+ cs_local_t *local = NULL;
+ xlator_t *this = NULL;
+ int ret = 0;
+
+ local = main_frame->local;
+ this = main_frame->this;
+
+ ret = dict_set_uint32(local->xattr_req, GF_CS_OBJECT_REPAIR, 256);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "dict_set failed");
+ goto err;
+ }
+
+ cs_set_xattr_req(main_frame);
+
+ if (local->fd) {
+ STACK_WIND(main_frame, cs_stat_check_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, local->fd, local->xattr_req);
+ } else {
+ STACK_WIND(main_frame, cs_stat_check_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, &local->loc,
+ local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ cs_inodelk_unlock(main_frame);
+
+ cs_common_cbk(main_frame);
+
+ return 0;
+}
+
+void
+cs_common_cbk(call_frame_t *frame)
+{
+ glusterfs_fop_t fop = -1;
+ cs_local_t *local = NULL;
+
+ local = frame->local;
+
+ fop = local->fop;
+
+ /*Note: Only the failure case needs to be handled here. Since for
+ * successful stat check the fop will resume anyway. The unwind can
+ * happen from the fop_cbk and each cbk can unlock the inodelk in case
+ * a lock was taken before. The lock status can be stored in frame */
+
+ /* for failure case */
+
+ /*TODO: add other fops*/
+ switch (fop) {
+ case GF_FOP_WRITE:
+ CS_STACK_UNWIND(writev, frame, local->op_ret, local->op_errno, NULL,
+ NULL, NULL);
+ break;
+
+ case GF_FOP_SETXATTR:
+ CS_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ NULL);
+ break;
+ case GF_FOP_READ:
+ CS_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno, NULL,
+ 0, NULL, NULL, NULL);
+ break;
+ case GF_FOP_FTRUNCATE:
+ CS_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno,
+ NULL, NULL, NULL);
+ break;
+
+ case GF_FOP_TRUNCATE:
+ CS_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
+ NULL, NULL, NULL);
+ break;
+ default:
+ break;
+ }
+
+ return;
+}
+
+int
+cs_blocking_inodelk_cbk(call_frame_t *lock_frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ cs_local_t *main_local = NULL;
+ call_frame_t *main_frame = NULL;
+ cs_local_t *lock_local = NULL;
+
+ lock_local = lock_frame->local;
+
+ main_frame = lock_local->main_frame;
+ main_local = main_frame->local;
+
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "inodelk failed");
+ main_local->op_errno = op_errno;
+ main_local->op_ret = op_ret;
+ goto err;
+ }
+
+ main_local->locked = _gf_true;
+
+ cs_lock_wipe(lock_frame);
+
+ cs_do_stat_check(main_frame);
+
+ return 0;
+err:
+ cs_common_cbk(main_frame);
+
+ cs_lock_wipe(lock_frame);
+
+ return 0;
+}
+
+int
+cs_build_loc(loc_t *loc, call_frame_t *frame)
+{
+ cs_local_t *local = NULL;
+ int ret = -1;
+
+ local = frame->local;
+
+ if (local->fd) {
+ loc->inode = inode_ref(local->fd->inode);
+ if (loc->inode) {
+ gf_uuid_copy(loc->gfid, loc->inode->gfid);
+ ret = 0;
+ goto out;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ } else {
+ loc->inode = inode_ref(local->loc.inode);
+ if (loc->inode) {
+ gf_uuid_copy(loc->gfid, loc->inode->gfid);
+ ret = 0;
+ goto out;
+ } else {
+ ret = -1;
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+int
+cs_blocking_inodelk(call_frame_t *parent_frame)
+{
+ call_frame_t *lock_frame = NULL;
+ cs_local_t *lock_local = NULL;
+ xlator_t *this = NULL;
+ struct gf_flock flock = {
+ 0,
+ };
+ int ret = 0;
+
+ this = parent_frame->this;
+
+ lock_frame = cs_lock_frame(parent_frame);
+ if (!lock_frame) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "insuffcient memory");
+ goto err;
+ }
+
+ lock_local = cs_local_init(this, lock_frame, NULL, NULL, 0);
+ if (!lock_local) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "local init failed");
+ goto err;
+ }
+
+ lock_local->main_frame = parent_frame;
+
+ flock.l_type = F_WRLCK;
+
+ ret = cs_build_loc(&lock_local->loc, parent_frame);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "build_loc failed");
+ goto err;
+ }
+
+ STACK_WIND(lock_frame, cs_blocking_inodelk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk, CS_LOCK_DOMAIN,
+ &lock_local->loc, F_SETLKW, &flock, NULL);
+
+ return 0;
+err:
+ if (lock_frame)
+ cs_lock_wipe(lock_frame);
+
+ return -1;
+}
+
+int
+locate_and_execute(call_frame_t *frame)
+{
+ int ret = 0;
+
+ ret = cs_blocking_inodelk(frame);
+
+ if (ret)
+ return -1;
+ else
+ return 0;
+}
+
+int32_t
+cs_resume_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xattr_req)
+{
+ cs_local_t *local = NULL;
+ int ret = 0;
+
+ local = frame->local;
+
+ ret = cs_resume_postprocess(this, frame, loc->inode);
+ if (ret) {
+ goto unwind;
+ }
+
+ cs_inodelk_unlock(frame);
+
+ STACK_WIND(frame, cs_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset,
+ local->xattr_req);
+
+ return 0;
+
+unwind:
+ cs_inodelk_unlock(frame);
+
+ cs_common_cbk(frame);
+
+ return 0;
+}
+
+int32_t
+cs_resume_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ cs_local_t *local = NULL;
+ cs_inode_ctx_t *ctx = NULL;
+ gf_cs_obj_state state = GF_CS_ERROR;
+
+ local = frame->local;
+
+ __cs_inode_ctx_get(this, loc->inode, &ctx);
+
+ state = __cs_get_file_state(loc->inode, ctx);
+
+ if (state == GF_CS_ERROR) {
+ /* file is already remote */
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_WARNING, 0, 0,
+ "file %s , could not figure file state", loc->path);
+ goto unwind;
+ }
+
+ if (state == GF_CS_REMOTE) {
+ /* file is already remote */
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_WARNING, 0, EINVAL,
+ "file %s is already remote", loc->path);
+ goto unwind;
+ }
+
+ if (state == GF_CS_DOWNLOADING) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, 0,
+ " file is in downloading state.");
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unwind;
+ }
+
+ STACK_WIND(frame, cs_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ local->xattr_req);
+
+ return 0;
+unwind:
+ cs_inodelk_unlock(frame);
+
+ cs_common_cbk(frame);
+
+ return 0;
+}
+
+gf_cs_obj_state
+__cs_get_file_state(inode_t *inode, cs_inode_ctx_t *ctx)
+{
+ gf_cs_obj_state state = -1;
+
+ if (!ctx)
+ return GF_CS_ERROR;
+
+ LOCK(&inode->lock);
+ {
+ state = ctx->state;
+ }
+ UNLOCK(&inode->lock);
+
+ return state;
+}
+
+void
+__cs_inode_ctx_get(xlator_t *this, inode_t *inode, cs_inode_ctx_t **ctx)
+{
+ uint64_t ctxint = 0;
+ int ret = 0;
+
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get(inode, this, &ctxint);
+ }
+ UNLOCK(&inode->lock);
+
+ if (ret)
+ *ctx = NULL;
+ else
+ *ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint;
+
+ return;
+}
+
+int
+__cs_inode_ctx_update(xlator_t *this, inode_t *inode, uint64_t val)
+{
+ cs_inode_ctx_t *ctx = NULL;
+ uint64_t ctxint = 0;
+ int ret = 0;
+
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get(inode, this, &ctxint);
+ if (ret) {
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_cs_mt_cs_inode_ctx_t);
+ if (!ctx) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "ctx allocation failed");
+ ret = -1;
+ goto out;
+ }
+
+ ctx->state = val;
+
+ ctxint = (uint64_t)(uintptr_t)ctx;
+
+ ret = __inode_ctx_set(inode, this, &ctxint);
+ if (ret) {
+ GF_FREE(ctx);
+ goto out;
+ }
+ } else {
+ ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint;
+
+ ctx->state = val;
+ }
+ }
+
+out:
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+int
+cs_inode_ctx_reset(xlator_t *this, inode_t *inode)
+{
+ cs_inode_ctx_t *ctx = NULL;
+ uint64_t ctxint = 0;
+
+ inode_ctx_del(inode, this, &ctxint);
+ if (!ctxint) {
+ return 0;
+ }
+
+ ctx = (cs_inode_ctx_t *)(uintptr_t)ctxint;
+
+ GF_FREE(ctx);
+ return 0;
+}
+
+int
+cs_resume_postprocess(xlator_t *this, call_frame_t *frame, inode_t *inode)
+{
+ cs_local_t *local = NULL;
+ gf_cs_obj_state state = -1;
+ cs_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+
+ local = frame->local;
+ if (!local) {
+ ret = -1;
+ goto out;
+ }
+
+ __cs_inode_ctx_get(this, inode, &ctx);
+
+ state = __cs_get_file_state(inode, ctx);
+ if (state == GF_CS_ERROR) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "status is GF_CS_ERROR."
+ " Aborting write");
+ local->op_ret = -1;
+ local->op_errno = EREMOTE;
+ ret = -1;
+ goto out;
+ }
+
+ if (state == GF_CS_REMOTE || state == GF_CS_DOWNLOADING) {
+ gf_msg_debug(this->name, 0, "status is %d", state);
+ ret = cs_download(frame);
+ if (ret == 0) {
+ gf_msg_debug(this->name, 0, "Winding for Final Write");
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ " download failed, unwinding writev");
+ local->op_ret = -1;
+ local->op_errno = EREMOTE;
+ ret = -1;
+ }
+ }
+out:
+ return ret;
+}
+
+int32_t
+cs_fdctx_to_dict(xlator_t *this, fd_t *fd, dict_t *dict)
+{
+ return 0;
+}
+
+int32_t
+cs_inode(xlator_t *this)
+{
+ return 0;
+}
+
+int32_t
+cs_inode_to_dict(xlator_t *this, dict_t *dict)
+{
+ return 0;
+}
+
+int32_t
+cs_history(xlator_t *this)
+{
+ return 0;
+}
+
+int32_t
+cs_fd(xlator_t *this)
+{
+ return 0;
+}
+
+int32_t
+cs_fd_to_dict(xlator_t *this, dict_t *dict)
+{
+ return 0;
+}
+
+int32_t
+cs_fdctx(xlator_t *this, fd_t *fd)
+{
+ return 0;
+}
+
+int32_t
+cs_inodectx(xlator_t *this, inode_t *ino)
+{
+ return 0;
+}
+
+int32_t
+cs_inodectx_to_dict(xlator_t *this, inode_t *ino, dict_t *dict)
+{
+ return 0;
+}
+
+int32_t
+cs_priv_to_dict(xlator_t *this, dict_t *dict, char *brickname)
+{
+ return 0;
+}
+
+int32_t
+cs_priv(xlator_t *this)
+{
+ return 0;
+}
+
+int
+cs_notify(xlator_t *this, int event, void *data, ...)
+{
+ return default_notify(this, event, data);
+}
+
+struct xlator_fops cs_fops = {
+ .stat = cs_stat,
+ .readdirp = cs_readdirp,
+ .truncate = cs_truncate,
+ .seek = cs_seek,
+ .statfs = cs_statfs,
+ .fallocate = cs_fallocate,
+ .discard = cs_discard,
+ .getxattr = cs_getxattr,
+ .writev = cs_writev,
+ .setxattr = cs_setxattr,
+ .fgetxattr = cs_fgetxattr,
+ .lookup = cs_lookup,
+ .fsetxattr = cs_fsetxattr,
+ .readv = cs_readv,
+ .ftruncate = cs_ftruncate,
+ .rchecksum = cs_rchecksum,
+ .unlink = cs_unlink,
+ .open = cs_open,
+ .fstat = cs_fstat,
+ .zerofill = cs_zerofill,
+};
+
+struct xlator_cbks cs_cbks = {
+ .forget = cs_forget,
+};
+
+struct xlator_dumpops cs_dumpops = {
+ .fdctx_to_dict = cs_fdctx_to_dict,
+ .inode = cs_inode,
+ .inode_to_dict = cs_inode_to_dict,
+ .history = cs_history,
+ .fd = cs_fd,
+ .fd_to_dict = cs_fd_to_dict,
+ .fdctx = cs_fdctx,
+ .inodectx = cs_inodectx,
+ .inodectx_to_dict = cs_inodectx_to_dict,
+ .priv_to_dict = cs_priv_to_dict,
+ .priv = cs_priv,
+};
+
+struct volume_options cs_options[] = {
+ {.key = {"cloudsync-storetype"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Defines which remote store is enabled"},
+ {.key = {"cloudsync-remote-read"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "Defines a remote read fop when on"},
+ {.key = {"cloudsync-store-id"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Defines a volume wide store id"},
+ {.key = {"cloudsync-product-id"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Defines a volume wide product id"},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = cs_init,
+ .fini = cs_fini,
+ .notify = cs_notify,
+ .reconfigure = cs_reconfigure,
+ .mem_acct_init = cs_mem_acct_init,
+ .dumpops = &cs_dumpops,
+ .fops = &cs_fops,
+ .cbks = &cs_cbks,
+ .options = cs_options,
+ .identifier = "cloudsync",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/cloudsync/src/cloudsync.h b/xlators/features/cloudsync/src/cloudsync.h
new file mode 100644
index 00000000000..d24141978d6
--- /dev/null
+++ b/xlators/features/cloudsync/src/cloudsync.h
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __CLOUDSYNC_H__
+#define __CLOUDSYNC_H__
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/call-stub.h>
+#include "cloudsync-common.h"
+#include "cloudsync-autogen-fops.h"
+
+#define ALIGN_SIZE 4096
+#define CS_LOCK_DOMAIN "cs.protect.file.stat"
+typedef struct cs_dlstore {
+ off_t off;
+ struct iovec *vector;
+ int32_t count;
+ struct iobref *iobref;
+ uint32_t flags;
+} cs_dlstore;
+
+typedef struct cs_inode_ctx {
+ cs_loc_xattr_t locxattr;
+ gf_cs_obj_state state;
+} cs_inode_ctx_t;
+
+struct cs_plugin {
+ char *name; /* store name */
+ char *library; /* library to load for the given store */
+ char *description; /* description about the store */
+};
+
+cs_local_t *
+cs_local_init(xlator_t *this, call_frame_t *frame, loc_t *loc, fd_t *fd,
+ glusterfs_fop_t fop);
+
+int
+locate_and_execute(call_frame_t *frame);
+
+int32_t
+cs_resume_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata);
+
+int32_t
+cs_inodelk_unlock_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+size_t
+cs_write_callback(void *lcurlbuf, size_t size, size_t nitems, void *frame);
+
+void
+cs_common_cbk(call_frame_t *frame);
+
+gf_boolean_t
+cs_is_file_remote(struct iatt *stbuf, dict_t *xattr);
+
+int32_t
+cs_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+int
+cs_build_loc(loc_t *loc, call_frame_t *frame);
+
+int
+cs_blocking_inodelk_cbk(call_frame_t *lock_frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata);
+
+int
+cs_read_authinfo(xlator_t *this);
+
+int
+__cs_inode_ctx_update(xlator_t *this, inode_t *inode, uint64_t val);
+
+int
+cs_inode_ctx_reset(xlator_t *this, inode_t *inode);
+
+void
+__cs_inode_ctx_get(xlator_t *this, inode_t *inode, cs_inode_ctx_t **ctx);
+
+gf_cs_obj_state
+__cs_get_file_state(inode_t *inode, cs_inode_ctx_t *ctx);
+
+int
+cs_inodelk_unlock(call_frame_t *main_frame);
+
+int
+cs_resume_postprocess(xlator_t *this, call_frame_t *frame, inode_t *inode);
+
+int32_t
+cs_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
+int32_t
+cs_resume_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xattr_req);
+
+int32_t
+cs_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata);
+int32_t
+cs_resume_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+int32_t
+cs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata);
+
+int
+cs_resume_remote_readv_postprocess(xlator_t *this, call_frame_t *frame,
+ inode_t *inode, off_t offset, size_t size,
+ uint32_t flags);
+int
+cs_serve_readv(call_frame_t *frame, off_t offset, size_t size, uint32_t flags);
+#endif /* __CLOUDSYNC_H__ */
diff --git a/xlators/features/compress/Makefile.am b/xlators/features/compress/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/compress/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/compress/src/Makefile.am b/xlators/features/compress/src/Makefile.am
new file mode 100644
index 00000000000..98271a9f3fc
--- /dev/null
+++ b/xlators/features/compress/src/Makefile.am
@@ -0,0 +1,19 @@
+xlator_LTLIBRARIES = cdc.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+noinst_HEADERS = cdc.h cdc-mem-types.h
+
+cdc_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+cdc_la_SOURCES = cdc.c cdc-helper.c
+cdc_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la $(ZLIB_LIBS)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS) \
+ $(LIBZ_CFLAGS)
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/compress/src/cdc-helper.c b/xlators/features/compress/src/cdc-helper.c
new file mode 100644
index 00000000000..f973ff56cf5
--- /dev/null
+++ b/xlators/features/compress/src/cdc-helper.c
@@ -0,0 +1,527 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/syscall.h>
+
+#include "cdc.h"
+#include "cdc-mem-types.h"
+
+#ifdef HAVE_LIB_Z
+#include "zlib.h"
+#endif
+
+#ifdef HAVE_LIB_Z
+/* gzip header looks something like this
+ * (RFC 1950)
+ *
+ * +---+---+---+---+---+---+---+---+---+---+
+ * |ID1|ID2|CM |FLG| MTIME |XFL|OS |
+ * +---+---+---+---+---+---+---+---+---+---+
+ *
+ * Data is usually sent without this header i.e
+ * Data sent = <compressed-data> + trailer(8)
+ * The trailer contains the checksum.
+ *
+ * gzip_header is added only during debugging.
+ * Refer to the function cdc_dump_iovec_to_disk
+ */
+static const char gzip_header[10] = {'\037', '\213', Z_DEFLATED, 0, 0, 0, 0,
+ 0, 0, GF_CDC_OS_ID};
+
+static int32_t
+cdc_next_iovec(xlator_t *this, cdc_info_t *ci)
+{
+ int ret = -1;
+
+ ci->ncount++;
+ /* check for iovec overflow -- should not happen */
+ if (ci->ncount == MAX_IOVEC) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Zlib output buffer overflow"
+ " ->ncount (%d) | ->MAX_IOVEC (%d)",
+ ci->ncount, MAX_IOVEC);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static void
+cdc_put_long(unsigned char *string, unsigned long x)
+{
+ string[0] = (unsigned char)(x & 0xff);
+ string[1] = (unsigned char)((x & 0xff00) >> 8);
+ string[2] = (unsigned char)((x & 0xff0000) >> 16);
+ string[3] = (unsigned char)((x & 0xff000000) >> 24);
+}
+
+static unsigned long
+cdc_get_long(unsigned char *buf)
+{
+ return ((unsigned long)buf[0]) | (((unsigned long)buf[1]) << 8) |
+ (((unsigned long)buf[2]) << 16) | (((unsigned long)buf[3]) << 24);
+}
+
+static int32_t
+cdc_init_gzip_trailer(xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci)
+{
+ int ret = -1;
+ char *buf = NULL;
+
+ ret = cdc_next_iovec(this, ci);
+ if (ret)
+ goto out;
+
+ buf = CURR_VEC(ci).iov_base = (char *)GF_CALLOC(1, GF_CDC_VALIDATION_SIZE,
+ gf_cdc_mt_gzip_trailer_t);
+
+ if (!CURR_VEC(ci).iov_base)
+ goto out;
+
+ CURR_VEC(ci).iov_len = GF_CDC_VALIDATION_SIZE;
+
+ cdc_put_long((unsigned char *)&buf[0], ci->crc);
+ cdc_put_long((unsigned char *)&buf[4], ci->stream.total_in);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static int32_t
+cdc_alloc_iobuf_and_init_vec(xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci,
+ int size)
+{
+ int ret = -1;
+ int alloc_len = 0;
+ struct iobuf *iobuf = NULL;
+
+ ret = cdc_next_iovec(this, ci);
+ if (ret)
+ goto out;
+
+ alloc_len = size ? size : ci->buffer_size;
+
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, alloc_len);
+ if (!iobuf)
+ goto out;
+
+ ret = iobref_add(ci->iobref, iobuf);
+ if (ret)
+ goto out;
+
+ /* Initialize this iovec */
+ CURR_VEC(ci).iov_base = iobuf->ptr;
+ CURR_VEC(ci).iov_len = alloc_len;
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+static void
+cdc_init_zlib_output_stream(cdc_priv_t *priv, cdc_info_t *ci, int size)
+{
+ ci->stream.next_out = (unsigned char *)CURR_VEC(ci).iov_base;
+ ci->stream.avail_out = size ? size : ci->buffer_size;
+}
+
+/* This routine is for testing and debugging only.
+ * Data written = header(10) + <compressed-data> + trailer(8)
+ * So each gzip dump file is at least 18 bytes in size.
+ */
+void
+cdc_dump_iovec_to_disk(xlator_t *this, cdc_info_t *ci, const char *file)
+{
+ int i = 0;
+ int fd = 0;
+ size_t written = 0;
+ size_t total_written = 0;
+
+ fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0777);
+ if (fd < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Cannot open file: %s", file);
+ return;
+ }
+
+ written = sys_write(fd, (char *)gzip_header, 10);
+ total_written += written;
+ for (i = 0; i < ci->ncount; i++) {
+ written = sys_write(fd, (char *)ci->vec[i].iov_base,
+ ci->vec[i].iov_len);
+ total_written += written;
+ }
+
+ gf_log(this->name, GF_LOG_DEBUG, "dump'd %zu bytes to %s", total_written,
+ GF_CDC_DEBUG_DUMP_FILE);
+
+ sys_close(fd);
+}
+
+static int32_t
+cdc_flush_libz_buffer(cdc_priv_t *priv, xlator_t *this, cdc_info_t *ci,
+ int (*libz_func)(z_streamp, int), int flush)
+{
+ int32_t ret = Z_OK;
+ int done = 0;
+ unsigned int deflate_len = 0;
+
+ for (;;) {
+ deflate_len = ci->buffer_size - ci->stream.avail_out;
+
+ if (deflate_len != 0) {
+ CURR_VEC(ci).iov_len = deflate_len;
+
+ ret = cdc_alloc_iobuf_and_init_vec(this, priv, ci, 0);
+ if (ret) {
+ ret = Z_MEM_ERROR;
+ break;
+ }
+
+ /* Re-position Zlib output buffer */
+ cdc_init_zlib_output_stream(priv, ci, 0);
+ }
+
+ if (done) {
+ ci->ncount--;
+ break;
+ }
+
+ ret = libz_func(&ci->stream, flush);
+
+ if (ret == Z_BUF_ERROR) {
+ ret = Z_OK;
+ ci->ncount--;
+ break;
+ }
+
+ done = (ci->stream.avail_out != 0 || ret == Z_STREAM_END);
+
+ if (ret != Z_OK && ret != Z_STREAM_END)
+ break;
+ }
+
+ return ret;
+}
+
+static int32_t
+do_cdc_compress(struct iovec *vec, xlator_t *this, cdc_priv_t *priv,
+ cdc_info_t *ci)
+{
+ int ret = -1;
+
+ /* Initialize defalte */
+ ret = deflateInit2(&ci->stream, priv->cdc_level, Z_DEFLATED,
+ priv->window_size, priv->mem_level, Z_DEFAULT_STRATEGY);
+
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "unable to init Zlib (retval: %d)",
+ ret);
+ goto out;
+ }
+
+ ret = cdc_alloc_iobuf_and_init_vec(this, priv, ci, 0);
+ if (ret)
+ goto out;
+
+ /* setup output buffer */
+ cdc_init_zlib_output_stream(priv, ci, 0);
+
+ /* setup input buffer */
+ ci->stream.next_in = (unsigned char *)vec->iov_base;
+ ci->stream.avail_in = vec->iov_len;
+
+ ci->crc = crc32(ci->crc, (const Bytef *)vec->iov_base, vec->iov_len);
+
+ gf_log(this->name, GF_LOG_DEBUG, "crc=%lu len=%d buffer_size=%d", ci->crc,
+ ci->stream.avail_in, ci->buffer_size);
+
+ /* compress !! */
+ while (ci->stream.avail_in != 0) {
+ if (ci->stream.avail_out == 0) {
+ CURR_VEC(ci).iov_len = ci->buffer_size;
+
+ ret = cdc_alloc_iobuf_and_init_vec(this, priv, ci, 0);
+ if (ret)
+ break;
+
+ /* Re-position Zlib output buffer */
+ cdc_init_zlib_output_stream(priv, ci, 0);
+ }
+
+ ret = deflate(&ci->stream, Z_NO_FLUSH);
+ if (ret != Z_OK)
+ break;
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+cdc_compress(xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci, dict_t **xdata)
+{
+ int ret = -1;
+ int i = 0;
+
+ ci->iobref = iobref_new();
+ if (!ci->iobref)
+ goto out;
+
+ if (!*xdata) {
+ *xdata = dict_new();
+ if (!*xdata) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Cannot allocate xdata"
+ " dict");
+ goto out;
+ }
+ }
+
+ /* data */
+ for (i = 0; i < ci->count; i++) {
+ ret = do_cdc_compress(&ci->vector[i], this, priv, ci);
+ if (ret != Z_OK)
+ goto deflate_cleanup_out;
+ }
+
+ /* flush zlib buffer */
+ ret = cdc_flush_libz_buffer(priv, this, ci, deflate, Z_FINISH);
+ if (!(ret == Z_OK || ret == Z_STREAM_END)) {
+ gf_log(this->name, GF_LOG_ERROR, "Compression Error: ret (%d)", ret);
+ ret = -1;
+ goto deflate_cleanup_out;
+ }
+
+ /* trailer */
+ ret = cdc_init_gzip_trailer(this, priv, ci);
+ if (ret)
+ goto deflate_cleanup_out;
+
+ gf_log(this->name, GF_LOG_DEBUG, "Compressed %ld to %ld bytes",
+ ci->stream.total_in, ci->stream.total_out);
+
+ ci->nbytes = ci->stream.total_out + GF_CDC_VALIDATION_SIZE;
+
+ /* set deflated canary value for identification */
+ ret = dict_set_int32(*xdata, GF_CDC_DEFLATE_CANARY_VAL, 1);
+ if (ret) {
+ /* Send uncompressed data if we can't _tell_ the client
+ * that deflated data is on it's way. So, we just log
+ * the failure and continue as usual.
+ */
+ gf_log(this->name, GF_LOG_ERROR,
+ "Data deflated, but could not set canary"
+ " value in dict for identification");
+ }
+
+ /* This is to be used in testing */
+ if (priv->debug) {
+ cdc_dump_iovec_to_disk(this, ci, GF_CDC_DEBUG_DUMP_FILE);
+ }
+
+deflate_cleanup_out:
+ (void)deflateEnd(&ci->stream);
+
+out:
+ return ret;
+}
+
+/* deflate content is checked by the presence of a canary
+ * value in the dict as the key
+ */
+static int32_t
+cdc_check_content_for_deflate(dict_t *xdata)
+{
+ return dict_get(xdata, GF_CDC_DEFLATE_CANARY_VAL) ? -1 : 0;
+}
+
+static unsigned long
+cdc_extract_crc(char *trailer)
+{
+ return cdc_get_long((unsigned char *)&trailer[0]);
+}
+
+static unsigned long
+cdc_extract_size(char *trailer)
+{
+ return cdc_get_long((unsigned char *)&trailer[4]);
+}
+
+static int32_t
+cdc_validate_inflate(cdc_info_t *ci, unsigned long crc, unsigned long len)
+{
+ return !((crc == ci->crc)
+ /* inflated length is hidden inside
+ * Zlib stream struct */
+ && (len == ci->stream.total_out));
+}
+
+static int32_t
+do_cdc_decompress(xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci)
+{
+ int ret = -1;
+ int i = 0;
+ int len = 0;
+ char *inflte = NULL;
+ char *trailer = NULL;
+ struct iovec vec = {
+ 0,
+ };
+ unsigned long computed_crc = 0;
+ unsigned long computed_len = 0;
+
+ ret = inflateInit2(&ci->stream, priv->window_size);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Zlib: Unable to initialize inflate");
+ goto out;
+ }
+
+ vec = THIS_VEC(ci, 0);
+
+ trailer = (char *)(((char *)vec.iov_base) + vec.iov_len -
+ GF_CDC_VALIDATION_SIZE);
+
+ /* CRC of uncompressed data */
+ computed_crc = cdc_extract_crc(trailer);
+
+ /* size of uncomrpessed data */
+ computed_len = cdc_extract_size(trailer);
+
+ gf_log(this->name, GF_LOG_DEBUG, "crc=%lu len=%lu buffer_size=%d",
+ computed_crc, computed_len, ci->buffer_size);
+
+ inflte = vec.iov_base;
+ len = vec.iov_len - GF_CDC_VALIDATION_SIZE;
+
+ /* allocate buffer of the original length of the data */
+ ret = cdc_alloc_iobuf_and_init_vec(this, priv, ci, 0);
+ if (ret)
+ goto out;
+
+ /* setup output buffer */
+ cdc_init_zlib_output_stream(priv, ci, 0);
+
+ /* setup input buffer */
+ ci->stream.next_in = (unsigned char *)inflte;
+ ci->stream.avail_in = len;
+
+ while (ci->stream.avail_in != 0) {
+ if (ci->stream.avail_out == 0) {
+ CURR_VEC(ci).iov_len = ci->buffer_size;
+
+ ret = cdc_alloc_iobuf_and_init_vec(this, priv, ci, 0);
+ if (ret)
+ break;
+
+ /* Re-position Zlib output buffer */
+ cdc_init_zlib_output_stream(priv, ci, 0);
+ }
+
+ ret = inflate(&ci->stream, Z_NO_FLUSH);
+ if (ret == Z_STREAM_ERROR)
+ break;
+ }
+
+ /* flush zlib buffer */
+ ret = cdc_flush_libz_buffer(priv, this, ci, inflate, Z_SYNC_FLUSH);
+ if (!(ret == Z_OK || ret == Z_STREAM_END)) {
+ gf_log(this->name, GF_LOG_ERROR, "Decompression Error: ret (%d)", ret);
+ ret = -1;
+ goto out;
+ }
+
+ /* compute CRC of the uncompresses data to check for
+ * correctness */
+
+ for (i = 0; i < ci->ncount; i++) {
+ ci->crc = crc32(ci->crc, (const Bytef *)ci->vec[i].iov_base,
+ ci->vec[i].iov_len);
+ }
+
+ /* validate inflated data */
+ ret = cdc_validate_inflate(ci, computed_crc, computed_len);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Checksum or length mismatched in inflated data");
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+cdc_decompress(xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci, dict_t *xdata)
+{
+ int32_t ret = -1;
+
+ /* check for deflate content */
+ if (!cdc_check_content_for_deflate(xdata)) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Content not deflated, passing through ...");
+ goto passthrough_out;
+ }
+
+ ci->iobref = iobref_new();
+ if (!ci->iobref)
+ goto passthrough_out;
+
+ /* do we need to do this? can we assume that one iovec
+ * will hold per request data every time?
+ *
+ * server/client protocol seems to deal with a single
+ * iovec even if op_ret > 1M. So, it looks ok to
+ * assume that a single iovec will contain all the
+ * data (This saves us a lot from finding the trailer
+ * and the data since it could have been split-up onto
+ * two adjacent iovec's.
+ *
+ * But, in case this translator is loaded above quick-read
+ * for some reason, then it's entirely possible that we get
+ * multiple iovec's...
+ *
+ * This case (handled below) is not tested. (by loading the
+ * xlator below quick-read)
+ */
+
+ /* @@ I_HOPE_THIS_IS_NEVER_HIT */
+ if (ci->count > 1) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "unable to handle"
+ " multiple iovecs (%d in number)",
+ ci->count);
+ goto inflate_cleanup_out;
+ /* TODO: coallate all iovecs in one */
+ }
+
+ ret = do_cdc_decompress(this, priv, ci);
+ if (ret)
+ goto inflate_cleanup_out;
+
+ ci->nbytes = ci->stream.total_out;
+
+ gf_log(this->name, GF_LOG_DEBUG, "Inflated %ld to %ld bytes",
+ ci->stream.total_in, ci->stream.total_out);
+
+inflate_cleanup_out:
+ (void)inflateEnd(&ci->stream);
+
+passthrough_out:
+ return ret;
+}
+
+#endif
diff --git a/xlators/features/compress/src/cdc-mem-types.h b/xlators/features/compress/src/cdc-mem-types.h
new file mode 100644
index 00000000000..928afdd2efe
--- /dev/null
+++ b/xlators/features/compress/src/cdc-mem-types.h
@@ -0,0 +1,23 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CDC_MEM_TYPES_H
+#define __CDC_MEM_TYPES_H
+
+#include <glusterfs/mem-types.h>
+
+enum gf_cdc_mem_types {
+ gf_cdc_mt_priv_t = gf_common_mt_end + 1,
+ gf_cdc_mt_vec_t = gf_common_mt_end + 2,
+ gf_cdc_mt_gzip_trailer_t = gf_common_mt_end + 3,
+ gf_cdc_mt_end = gf_common_mt_end + 4,
+};
+
+#endif
diff --git a/xlators/features/compress/src/cdc.c b/xlators/features/compress/src/cdc.c
new file mode 100644
index 00000000000..b0b51e914ed
--- /dev/null
+++ b/xlators/features/compress/src/cdc.c
@@ -0,0 +1,348 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <sys/uio.h>
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/logging.h>
+
+#include "cdc.h"
+#include "cdc-mem-types.h"
+
+static void
+cdc_cleanup_iobref(cdc_info_t *ci)
+{
+ assert(ci->iobref != NULL);
+ iobref_clear(ci->iobref);
+}
+
+int32_t
+cdc_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ int ret = -1;
+ cdc_priv_t *priv = NULL;
+ cdc_info_t ci = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("cdc", this, default_out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, default_out);
+
+ priv = this->private;
+
+ if (op_ret <= 0)
+ goto default_out;
+
+ if ((priv->min_file_size != 0) && (op_ret < priv->min_file_size))
+ goto default_out;
+
+ ci.count = count;
+ ci.ibytes = op_ret;
+ ci.vector = vector;
+ ci.buf = NULL;
+ ci.iobref = NULL;
+ ci.ncount = 0;
+ ci.crc = 0;
+ ci.buffer_size = GF_CDC_DEF_BUFFERSIZE;
+
+ /* A readv compresses on the server side and decompresses on the client side
+ */
+ if (priv->op_mode == GF_CDC_MODE_SERVER) {
+ ret = cdc_compress(this, priv, &ci, &xdata);
+ } else if (priv->op_mode == GF_CDC_MODE_CLIENT) {
+ ret = cdc_decompress(this, priv, &ci, xdata);
+ } else {
+ gf_log(this->name, GF_LOG_ERROR, "Invalid operation mode (%d)",
+ priv->op_mode);
+ }
+
+ if (ret)
+ goto default_out;
+
+ STACK_UNWIND_STRICT(readv, frame, ci.nbytes, op_errno, ci.vec, ci.ncount,
+ stbuf, iobref, xdata);
+ cdc_cleanup_iobref(&ci);
+ return 0;
+
+default_out:
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
+ return 0;
+}
+
+int32_t
+cdc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ fop_readv_cbk_t cbk = NULL;
+
+#ifdef HAVE_LIB_Z
+ cbk = cdc_readv_cbk;
+#else
+ cbk = default_readv_cbk;
+#endif
+ STACK_WIND(frame, cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
+}
+
+int32_t
+cdc_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int32_t
+cdc_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ int ret = -1;
+ cdc_priv_t *priv = NULL;
+ cdc_info_t ci = {
+ 0,
+ };
+ size_t isize = 0;
+
+ GF_VALIDATE_OR_GOTO("cdc", this, err);
+ GF_VALIDATE_OR_GOTO(this->name, frame, err);
+
+ priv = this->private;
+
+ isize = iov_length(vector, count);
+
+ if (isize <= 0)
+ goto default_out;
+
+ if ((priv->min_file_size != 0) && (isize < priv->min_file_size))
+ goto default_out;
+
+ ci.count = count;
+ ci.ibytes = isize;
+ ci.vector = vector;
+ ci.buf = NULL;
+ ci.iobref = NULL;
+ ci.ncount = 0;
+ ci.crc = 0;
+ ci.buffer_size = GF_CDC_DEF_BUFFERSIZE;
+
+ /* A writev compresses on the client side and decompresses on the server
+ * side
+ */
+ if (priv->op_mode == GF_CDC_MODE_CLIENT) {
+ ret = cdc_compress(this, priv, &ci, &xdata);
+ } else if (priv->op_mode == GF_CDC_MODE_SERVER) {
+ ret = cdc_decompress(this, priv, &ci, xdata);
+ } else {
+ gf_log(this->name, GF_LOG_ERROR, "Invalid operation mode (%d) ",
+ priv->op_mode);
+ }
+
+ if (ret)
+ goto default_out;
+
+ STACK_WIND(frame, cdc_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, ci.vec, ci.ncount, offset,
+ flags, iobref, xdata);
+
+ cdc_cleanup_iobref(&ci);
+ return 0;
+
+default_out:
+ STACK_WIND(frame, cdc_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT(writev, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_cdc_mt_end);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+init(xlator_t *this)
+{
+ int ret = -1;
+ char *temp_str = NULL;
+ cdc_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("cdc", this, err);
+
+ if (!this->children || this->children->next) {
+ gf_log(this->name, GF_LOG_ERROR, "Need subvolume == 1");
+ goto err;
+ }
+
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING, "Dangling volume. Check volfile");
+ }
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_cdc_mt_priv_t);
+ if (!priv) {
+ goto err;
+ }
+
+ /* Check if debug mode is turned on */
+ GF_OPTION_INIT("debug", priv->debug, bool, err);
+ if (priv->debug) {
+ gf_log(this->name, GF_LOG_DEBUG, "CDC debug option turned on");
+ }
+
+ /* Set Gzip Window Size */
+ GF_OPTION_INIT("window-size", priv->window_size, int32, err);
+ if ((priv->window_size > GF_CDC_MAX_WINDOWSIZE) ||
+ (priv->window_size < GF_CDC_DEF_WINDOWSIZE)) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Invalid gzip window size (%d), using default",
+ priv->window_size);
+ priv->window_size = GF_CDC_DEF_WINDOWSIZE;
+ }
+
+ /* Set Gzip (De)Compression Level */
+ GF_OPTION_INIT("compression-level", priv->cdc_level, int32, err);
+ if (((priv->cdc_level < 1) || (priv->cdc_level > 9)) &&
+ (priv->cdc_level != GF_CDC_DEF_COMPRESSION)) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Invalid gzip (de)compression level (%d),"
+ " using default",
+ priv->cdc_level);
+ priv->cdc_level = GF_CDC_DEF_COMPRESSION;
+ }
+
+ /* Set Gzip Memory Level */
+ GF_OPTION_INIT("mem-level", priv->mem_level, int32, err);
+ if ((priv->mem_level < 1) || (priv->mem_level > 9)) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Invalid gzip memory level, using the default");
+ priv->mem_level = GF_CDC_DEF_MEMLEVEL;
+ }
+
+ /* Set min file size to enable compression */
+ GF_OPTION_INIT("min-size", priv->min_file_size, int32, err);
+
+ /* Mode of operation - Server/Client */
+ ret = dict_get_str(this->options, "mode", &temp_str);
+ if (ret) {
+ gf_log(this->name, GF_LOG_CRITICAL, "Operation mode not specified !!");
+ goto err;
+ }
+
+ if (GF_CDC_MODE_IS_CLIENT(temp_str)) {
+ priv->op_mode = GF_CDC_MODE_CLIENT;
+ } else if (GF_CDC_MODE_IS_SERVER(temp_str)) {
+ priv->op_mode = GF_CDC_MODE_SERVER;
+ } else {
+ gf_log(this->name, GF_LOG_CRITICAL,
+ "Bogus operation mode (%s) specified", temp_str);
+ goto err;
+ }
+
+ this->private = priv;
+ gf_log(this->name, GF_LOG_DEBUG, "CDC xlator loaded in (%s) mode",
+ temp_str);
+ return 0;
+
+err:
+ if (priv)
+ GF_FREE(priv);
+
+ return -1;
+}
+
+void
+fini(xlator_t *this)
+{
+ cdc_priv_t *priv = this->private;
+
+ if (priv)
+ GF_FREE(priv);
+ this->private = NULL;
+ return;
+}
+
+struct xlator_fops fops = {
+ .readv = cdc_readv,
+ .writev = cdc_writev,
+};
+
+struct xlator_cbks cbks = {};
+
+struct volume_options options[] = {
+ {.key = {"window-size"},
+ .default_value = "-15",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Size of the zlib history buffer."},
+ {.key = {"mem-level"},
+ .default_value = "8",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Memory allocated for internal compression state. "
+ "1 uses minimum memory but is slow and reduces "
+ "compression ratio; memLevel=9 uses maximum memory "
+ "for optimal speed. The default value is 8."},
+ {.key = {"compression-level"},
+ .default_value = "-1",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Compression levels \n"
+ "0 : no compression, 1 : best speed, \n"
+ "9 : best compression, -1 : default compression "},
+ {.key = {"min-size"},
+ .default_value = "0",
+ .type = GF_OPTION_TYPE_INT,
+ .description = "Data is compressed only when its size exceeds this."},
+ {.key = {"mode"},
+ .value = {"server", "client"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Set on the basis of where the xlator is loaded. "
+ "This option should NOT be configured by user."},
+ {.key = {"debug"},
+ .default_value = "false",
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "This is used in testing. Will dump compressed data "
+ "to disk as a gzip file."},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "cdc",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/compress/src/cdc.h b/xlators/features/compress/src/cdc.h
new file mode 100644
index 00000000000..cb87b06a989
--- /dev/null
+++ b/xlators/features/compress/src/cdc.h
@@ -0,0 +1,99 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __CDC_H
+#define __CDC_H
+
+#ifdef HAVE_LIB_Z
+#include "zlib.h"
+#endif
+
+#include <glusterfs/xlator.h>
+
+#ifndef MAX_IOVEC
+#define MAX_IOVEC 16
+#endif
+
+typedef struct cdc_priv {
+ int window_size;
+ int mem_level;
+ int cdc_level;
+ int min_file_size;
+ int op_mode;
+ gf_boolean_t debug;
+ gf_lock_t lock;
+} cdc_priv_t;
+
+typedef struct cdc_info {
+ /* input bits */
+ int count;
+ int32_t ibytes;
+ struct iovec *vector;
+ struct iatt *buf;
+
+ /* output bits */
+ int ncount;
+ int nbytes;
+ int buffer_size;
+ struct iovec vec[MAX_IOVEC];
+ struct iobref *iobref;
+
+ /* zlib bits */
+#ifdef HAVE_LIB_Z
+ z_stream stream;
+#endif
+ unsigned long crc;
+} cdc_info_t;
+
+#define NVEC(ci) (ci->ncount - 1)
+#define CURR_VEC(ci) ci->vec[ci->ncount - 1]
+#define THIS_VEC(ci, i) ci->vector[i]
+
+/* Gzip defaults */
+#define GF_CDC_DEF_WINDOWSIZE -15 /* default value */
+#define GF_CDC_MAX_WINDOWSIZE -8 /* max value */
+
+#ifdef HAVE_LIB_Z
+#define GF_CDC_DEF_COMPRESSION Z_DEFAULT_COMPRESSION
+#else
+#define GF_CDC_DEF_COMPRESSION -1
+#endif
+
+#define GF_CDC_DEF_MEMLEVEL 8
+#define GF_CDC_DEF_BUFFERSIZE 262144 // 256K - default compression buffer size
+
+/* Operation mode
+ * If xlator is loaded on client, readv decompresses and writev compresses
+ * If xlator is loaded on server, readv compresses and writev decompresses
+ */
+#define GF_CDC_MODE_CLIENT 0
+#define GF_CDC_MODE_SERVER 1
+
+/* min size of data to do cmpression
+ * 0 == compress even 1byte
+ */
+#define GF_CDC_MIN_CHUNK_SIZE 0
+
+#define GF_CDC_VALIDATION_SIZE 8
+
+#define GF_CDC_OS_ID 0xFF
+#define GF_CDC_DEFLATE_CANARY_VAL "deflate"
+#define GF_CDC_DEBUG_DUMP_FILE "/tmp/cdcdump.gz"
+
+#define GF_CDC_MODE_IS_CLIENT(m) (strcmp(m, "client") == 0)
+
+#define GF_CDC_MODE_IS_SERVER(m) (strcmp(m, "server") == 0)
+
+int32_t
+cdc_compress(xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci, dict_t **xdata);
+int32_t
+cdc_decompress(xlator_t *this, cdc_priv_t *priv, cdc_info_t *ci, dict_t *xdata);
+
+#endif
diff --git a/xlators/features/filter/Makefile.am b/xlators/features/filter/Makefile.am
deleted file mode 100644
index d471a3f9243..00000000000
--- a/xlators/features/filter/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/features/filter/src/Makefile.am b/xlators/features/filter/src/Makefile.am
deleted file mode 100644
index d473b9ea16d..00000000000
--- a/xlators/features/filter/src/Makefile.am
+++ /dev/null
@@ -1,15 +0,0 @@
-xlator_LTLIBRARIES = filter.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features
-
-filter_la_LDFLAGS = -module -avoidversion
-
-filter_la_SOURCES = filter.c
-filter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-noinst_HEADERS = filter-mem-types.h
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/features/filter/src/filter-mem-types.h b/xlators/features/filter/src/filter-mem-types.h
deleted file mode 100644
index de2cb9665dc..00000000000
--- a/xlators/features/filter/src/filter-mem-types.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __FILTER_MEM_TYPES_H__
-#define __FILTER_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_filter_mem_types_ {
- gf_filter_mt_gf_filter = gf_common_mt_end + 1,
- gf_filter_mt_end
-};
-#endif
-
diff --git a/xlators/features/filter/src/filter.c b/xlators/features/filter/src/filter.c
deleted file mode 100644
index eda042f35f5..00000000000
--- a/xlators/features/filter/src/filter.c
+++ /dev/null
@@ -1,1744 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "filter-mem-types.h"
-
-#define GF_FILTER_NOBODY_UID 65534
-#define GF_FILTER_NOBODY_GID 65534
-#define GF_FILTER_ROOT_UID 0
-#define GF_FILTER_ROOT_GID 0
-
-#define GF_MAXIMUM_FILTERING_ALLOWED 32
-
-/*
- option root-filtering on (off by default)
- option translate-uid <uid-range=newuid,uid=newuid>
- option translate-gid <gid-range=newgid,gid=newgid>
- option read-only <yes|true>
- option fixed-uid <uid>
- option fixed-gid <gid>
- option filter-uid <uid-range,uid>
- option filter-gid <gid-range,gid> // not supported yet
-
-*/
-
-struct gf_filter {
- /* Flags */
- gf_boolean_t complete_read_only;
- char fixed_uid_set;
- char fixed_gid_set;
- char partial_filter;
-
- /* Options */
- /* Mapping/Filtering/Translate whatever you want to call */
- int translate_num_uid_entries;
- int translate_num_gid_entries;
- int translate_input_uid[GF_MAXIMUM_FILTERING_ALLOWED][2];
- int translate_output_uid[GF_MAXIMUM_FILTERING_ALLOWED];
- int translate_input_gid[GF_MAXIMUM_FILTERING_ALLOWED][2];
- int translate_output_gid[GF_MAXIMUM_FILTERING_ALLOWED];
-
- /* Fixed uid/gid */
- int fixed_uid;
- int fixed_gid;
-
- /* Filter */
- int filter_num_uid_entries;
- int filter_num_gid_entries;
- int filter_input_uid[GF_MAXIMUM_FILTERING_ALLOWED][2];
- int filter_input_gid[GF_MAXIMUM_FILTERING_ALLOWED][2];
-
-};
-
-/* update_frame: The main logic of the whole translator.
- Return values:
- 0: no change
- // TRANSLATE
- 1: only uid changed
- 2: only gid changed
- 3: both uid/gid changed
- // FILTER
- 4: uid in filter range
- 5: gid in filter range // not supported yet
- 6: complete fs is readonly
-*/
-
-#define GF_FILTER_NO_CHANGE 0
-#define GF_FILTER_MAP_UID 1
-#define GF_FILTER_MAP_GID 2
-#define GF_FILTER_MAP_BOTH 3
-#define GF_FILTER_FILTER_UID 4
-#define GF_FILTER_FILTER_GID 5
-#define GF_FILTER_RO_FS 6
-
-static int32_t
-update_frame (call_frame_t *frame,
- inode_t *inode,
- struct gf_filter *filter)
-{
- uid_t uid = 0;
- int32_t idx = 0;
- int32_t ret = 0;
- int32_t dictret = 0;
- uint64_t tmp_uid = 0;
-
- for (idx = 0; idx < filter->translate_num_uid_entries; idx++) {
- if ((frame->root->uid >=filter->translate_input_uid[idx][0]) &&
- (frame->root->uid <=filter->translate_input_uid[idx][1])) {
- dictret = inode_ctx_get (inode, frame->this, &tmp_uid);
- uid = (uid_t)tmp_uid;
- if (dictret == 0) {
- if (frame->root->uid != uid)
- ret = GF_FILTER_MAP_UID;
- } else {
- ret = GF_FILTER_MAP_UID;
- }
- break;
- }
- }
-
- for (idx = 0; idx < filter->translate_num_gid_entries; idx++) {
- if ((frame->root->gid >=filter->translate_input_gid[idx][0]) &&
- (frame->root->gid <=filter->translate_input_gid[idx][1])) {
- if (ret == GF_FILTER_NO_CHANGE)
- ret = GF_FILTER_MAP_GID;
- else
- ret = GF_FILTER_MAP_BOTH;
- break;
- }
- }
-
-
- if (filter->complete_read_only)
- return GF_FILTER_RO_FS;
-
- if (filter->partial_filter) {
- dictret = inode_ctx_get (inode, frame->this, &tmp_uid);
- uid = (uid_t)tmp_uid;
- if (dictret != -1) {
- for (idx = 0; idx < filter->filter_num_uid_entries;
- idx++) {
- if ((uid >=filter->filter_input_uid[idx][0]) &&
- (uid <=filter->filter_input_uid[idx][1])) {
- return GF_FILTER_FILTER_UID;
- }
- }
- }
- }
-
- return ret;
-}
-
-/* if 'root' don't change the uid/gid */
-static int32_t
-update_stat (struct iatt *stbuf,
- struct gf_filter *filter)
-{
- int32_t idx = 0;
- for (idx = 0; idx < filter->translate_num_uid_entries; idx++) {
- if (stbuf->ia_uid == GF_FILTER_ROOT_UID)
- continue;
- if ((stbuf->ia_uid >= filter->translate_input_uid[idx][0]) &&
- (stbuf->ia_uid <= filter->translate_input_uid[idx][1])) {
- stbuf->ia_uid = filter->translate_output_uid[idx];
- break;
- }
- }
-
- for (idx = 0; idx < filter->translate_num_gid_entries; idx++) {
- if (stbuf->ia_gid == GF_FILTER_ROOT_GID)
- continue;
- if ((stbuf->ia_gid >= filter->translate_input_gid[idx][0]) &&
- (stbuf->ia_gid <= filter->translate_input_gid[idx][1])) {
- stbuf->ia_gid = filter->translate_output_gid[idx];
- break;
- }
- }
-
- if (filter->fixed_uid_set) {
- stbuf->ia_uid = filter->fixed_uid;
- }
-
- if (filter->fixed_gid_set) {
- stbuf->ia_gid = filter->fixed_gid;
- }
-
- return 0;
-}
-
-static int32_t
-filter_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
-{
- int ret = 0;
- if (op_ret >= 0) {
- update_stat (buf, this->private);
- ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "couldn't set context");
- }
-
- update_stat (postparent, this->private);
- }
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf, dict, postparent);
- return 0;
-}
-
-int32_t
-filter_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- STACK_WIND (frame,
- filter_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc,
- xattr_req);
- return 0;
-}
-
-
-static int32_t
-filter_stat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- if (op_ret >= 0) {
- update_stat (buf, this->private);
- }
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-filter_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- STACK_WIND (frame,
- filter_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc);
- return 0;
-}
-
-static int32_t
-filter_setattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preop,
- struct iatt *postop)
-{
- if (op_ret >= 0) {
- update_stat (preop, this->private);
- update_stat (postop, this->private);
- }
- STACK_UNWIND (frame, op_ret, op_errno, preop, postop);
- return 0;
-}
-
-int32_t
-filter_setattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct iatt *stbuf,
- int32_t valid)
-{
- int32_t ret = 0;
- ret = update_frame (frame, loc->inode, this->private);
- switch (ret) {
- case GF_FILTER_MAP_UID:
- if (loc->inode->st_mode & S_IWGRP)
- break;
- case GF_FILTER_MAP_BOTH:
- if (loc->inode->st_mode & S_IWOTH)
- break;
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM, NULL, NULL, NULL);
- return 0;
-
- case GF_FILTER_FILTER_UID:
- case GF_FILTER_FILTER_GID:
- case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
- return 0;
- default:
- break;
- }
-
- STACK_WIND (frame,
- filter_setattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr,
- loc,
- stbuf, valid);
- return 0;
-}
-
-static int32_t
-filter_fsetattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preop,
- struct iatt *postop)
-{
- if (op_ret >= 0) {
- update_stat (preop, this->private);
- update_stat (postop, this->private);
- }
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- preop, postop);
- return 0;
-}
-
-int32_t
-filter_fsetattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iatt *stbuf,
- int32_t valid)
-{
- STACK_WIND (frame,
- filter_fsetattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetattr,
- fd,
- stbuf, valid);
- return 0;
-}
-
-
-static int32_t
-filter_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- if (op_ret >= 0) {
- update_stat (prebuf, this->private);
- update_stat (postbuf, this->private);
- }
- STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
-
-int32_t
-filter_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
-{
- int32_t ret = 0;
- ret = update_frame (frame, loc->inode, this->private);
- switch (ret) {
- case GF_FILTER_MAP_UID:
- if (loc->inode->st_mode & S_IWGRP)
- break;
- case GF_FILTER_MAP_BOTH:
- if (loc->inode->st_mode & S_IWOTH)
- break;
- gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM, NULL, NULL);
- return 0;
-
- case GF_FILTER_FILTER_UID:
- case GF_FILTER_FILTER_GID:
- case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- filter_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc,
- offset);
- return 0;
-}
-
-static int32_t
-filter_ftruncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- if (op_ret >= 0) {
- update_stat (prebuf, this->private);
- update_stat (postbuf, this->private);
- }
- STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
-
-int32_t
-filter_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset)
-{
- STACK_WIND (frame,
- filter_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset);
- return 0;
-}
-
-
-static int32_t
-filter_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *path,
- struct iatt *sbuf)
-{
- if (op_ret >= 0)
- update_stat (sbuf, this->private);
-
- STACK_UNWIND (frame, op_ret, op_errno, path, sbuf);
- return 0;
-}
-
-int32_t
-filter_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
-{
- int32_t ret = 0;
- ret = update_frame (frame, loc->inode, this->private);
- switch (ret) {
- case GF_FILTER_MAP_UID:
- if (loc->inode->st_mode & S_IRGRP)
- break;
- case GF_FILTER_MAP_BOTH:
- if (loc->inode->st_mode & S_IROTH)
- break;
- gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM, NULL);
- return 0;
- }
- STACK_WIND (frame,
- filter_readlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink,
- loc,
- size);
- return 0;
-}
-
-
-static int32_t
-filter_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int ret = 0;
-
- if (op_ret >= 0) {
- update_stat (buf, this->private);
- ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "couldn't set context");
- }
-
- update_stat (preparent, this->private);
- update_stat (postparent, this->private);
- }
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
-}
-
-int32_t
-filter_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t rdev)
-{
- int ret = 0;
- inode_t *parent = loc->parent;
- ret = update_frame (frame, loc->inode, this->private);
- switch (ret) {
- case GF_FILTER_MAP_UID:
- if (parent->st_mode & S_IWGRP)
- break;
- case GF_FILTER_MAP_BOTH:
- if (parent->st_mode & S_IWOTH)
- break;
- gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM, NULL, NULL,
- NULL, NULL);
- return 0;
-
- case GF_FILTER_FILTER_UID:
- case GF_FILTER_FILTER_GID:
- case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL, NULL,
- NULL, NULL);
- return 0;
- }
- STACK_WIND (frame,
- filter_mknod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev);
- return 0;
-}
-
-static int32_t
-filter_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int ret = 0;
- if (op_ret >= 0) {
- update_stat (buf, this->private);
- ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "couldn't set context");
- }
-
- update_stat (preparent, this->private);
- update_stat (postparent, this->private);
- }
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
-}
-
-int32_t
-filter_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- int ret = 0;
- inode_t *parent = loc->parent;
- ret = update_frame (frame, loc->inode, this->private);
- switch (ret) {
- case GF_FILTER_MAP_UID:
- if (parent->st_mode & S_IWGRP)
- break;
- case GF_FILTER_MAP_BOTH:
- if (parent->st_mode & S_IWOTH)
- break;
- gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM, NULL, NULL,
- NULL, NULL);
- return 0;
-
- case GF_FILTER_FILTER_UID:
- case GF_FILTER_FILTER_GID:
- case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL, NULL,
- NULL, NULL);
- return 0;
- }
- STACK_WIND (frame,
- filter_mkdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- loc, mode);
- return 0;
-}
-
-static int32_t
-filter_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- if (op_ret >= 0) {
- update_stat (preparent, this->private);
- update_stat (postparent, this->private);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent);
- return 0;
-}
-
-int32_t
-filter_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- int32_t ret = 0;
- inode_t *parent = loc->parent;
- if (!parent)
- parent = inode_parent (loc->inode, 0, NULL);
- ret = update_frame (frame, loc->inode, this->private);
- switch (ret) {
- case GF_FILTER_MAP_UID:
- if (parent->st_mode & S_IWGRP)
- break;
- if (loc->inode->st_mode & S_IWGRP)
- break;
- case GF_FILTER_MAP_BOTH:
- if (parent->st_mode & S_IWOTH)
- break;
- if (loc->inode->st_mode & S_IWOTH)
- break;
- gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM, NULL, NULL);
- return 0;
- case GF_FILTER_FILTER_UID:
- case GF_FILTER_FILTER_GID:
- case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
- return 0;
- }
- STACK_WIND (frame,
- filter_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc);
- return 0;
-}
-
-static int32_t
-filter_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- if (op_ret >= 0) {
- update_stat (preparent, this->private);
- update_stat (postparent, this->private);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, preparent, postparent);
- return 0;
-}
-
-int32_t
-filter_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- int32_t ret = 0;
- inode_t *parent = loc->parent;
- if (!parent)
- parent = inode_parent (loc->inode, 0, NULL);
- ret = update_frame (frame, loc->inode, this->private);
- switch (ret) {
- case GF_FILTER_MAP_UID:
- if (parent->st_mode & S_IWGRP)
- break;
- if (loc->inode->st_mode & S_IWGRP)
- break;
- case GF_FILTER_MAP_BOTH:
- if (parent->st_mode & S_IWOTH)
- break;
- if (loc->inode->st_mode & S_IWOTH)
- break;
- gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM, NULL, NULL);
- return 0;
- case GF_FILTER_FILTER_UID:
- case GF_FILTER_FILTER_GID:
- case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
- return 0;
- }
- STACK_WIND (frame,
- filter_rmdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- loc);
- return 0;
-}
-
-static int32_t
-filter_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int ret = 0;
- if (op_ret >= 0) {
- update_stat (buf, this->private);
- ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "couldn't set context");
- }
-
- update_stat (preparent, this->private);
- update_stat (postparent, this->private);
- }
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
-}
-
-int32_t
-filter_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc)
-{
- int ret = 0;
- inode_t *parent = loc->parent;
- ret = update_frame (frame, loc->inode, this->private);
- switch (ret) {
- case GF_FILTER_MAP_UID:
- if (parent->st_mode & S_IWGRP)
- break;
- case GF_FILTER_MAP_BOTH:
- if (parent->st_mode & S_IWOTH)
- break;
- gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM, NULL, NULL,
- NULL, NULL);
- return 0;
-
- case GF_FILTER_FILTER_UID:
- case GF_FILTER_FILTER_GID:
- case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL, NULL,
- NULL, NULL);
- return 0;
- }
- STACK_WIND (frame,
- filter_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- linkpath, loc);
- return 0;
-}
-
-
-static int32_t
-filter_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- if (op_ret >= 0) {
- update_stat (buf, this->private);
-
- update_stat (preoldparent, this->private);
- update_stat (postoldparent, this->private);
-
- update_stat (prenewparent, this->private);
- update_stat (postnewparent, this->private);
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, buf,
- preoldparent, postoldparent,
- prenewparent, postnewparent);
- return 0;
-}
-
-int32_t
-filter_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- int32_t ret = 0;
- inode_t *parent = oldloc->parent;
- if (!parent)
- parent = inode_parent (oldloc->inode, 0, NULL);
- ret = update_frame (frame, oldloc->inode, this->private);
- switch (ret) {
- case GF_FILTER_MAP_UID:
- if (parent->st_mode & S_IWGRP)
- break;
- if (oldloc->inode->st_mode & S_IWGRP)
- break;
- case GF_FILTER_MAP_BOTH:
- if (parent->st_mode & S_IWOTH)
- break;
- if (oldloc->inode->st_mode & S_IWOTH)
- break;
- gf_log (this->name, GF_LOG_DEBUG,
- "%s -> %s: returning permission denied", oldloc->path, newloc->path);
- STACK_UNWIND (frame, -1, EPERM, NULL,
- NULL, NULL,
- NULL, NULL);
- return 0;
-
- case GF_FILTER_FILTER_UID:
- case GF_FILTER_FILTER_GID:
- case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL,
- NULL, NULL,
- NULL, NULL);
- return 0;
- }
- STACK_WIND (frame,
- filter_rename_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename,
- oldloc, newloc);
- return 0;
-}
-
-
-static int32_t
-filter_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int ret = 0;
- if (op_ret >= 0) {
- update_stat (buf, this->private);
- ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "couldn't set context");
- }
-
- update_stat (preparent, this->private);
- update_stat (postparent, this->private);
- }
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf,
- preparent, postparent);
- return 0;
-}
-
-int32_t
-filter_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- int ret = 0;
- ret = update_frame (frame, oldloc->inode, this->private);
- switch (ret) {
- case GF_FILTER_FILTER_UID:
- case GF_FILTER_FILTER_GID:
- case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL, NULL,
- NULL, NULL);
- return 0;
- }
- STACK_WIND (frame,
- filter_link_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link,
- oldloc, newloc);
- return 0;
-}
-
-
-static int32_t
-filter_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- int ret = 0;
- if (op_ret >= 0) {
- update_stat (buf, this->private);
- ret = inode_ctx_put (inode, this, (uint64_t)(long)buf->ia_uid);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "couldn't set context");
- }
- update_stat (preparent, this->private);
- update_stat (postparent, this->private);
- }
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent);
- return 0;
-}
-
-int32_t
-filter_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode, fd_t *fd)
-{
- int ret = 0;
- inode_t *parent = loc->parent;
- ret = update_frame (frame, loc->inode, this->private);
- switch (ret) {
- case GF_FILTER_MAP_UID:
- if (parent->st_mode & S_IWGRP)
- break;
- case GF_FILTER_MAP_BOTH:
- if (parent->st_mode & S_IWOTH)
- break;
- gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
-
- case GF_FILTER_FILTER_UID:
- case GF_FILTER_FILTER_GID:
- case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL, NULL, NULL,
- NULL, NULL);
- return 0;
- }
- STACK_WIND (frame, filter_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, flags, mode, fd);
- return 0;
-}
-
-static int32_t
-filter_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
- return 0;
-}
-
-int32_t
-filter_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- fd_t *fd,
- int32_t wbflags)
-{
- int32_t ret = 0;
- ret = update_frame (frame, loc->inode, this->private);
- switch (ret) {
- case GF_FILTER_MAP_UID:
- if (loc->inode->st_mode & S_IWGRP)
- break;
- if (!(((flags & O_ACCMODE) == O_WRONLY)
- || ((flags & O_ACCMODE) == O_RDWR))
- && (loc->inode->st_mode & S_IRGRP))
- break;
- case GF_FILTER_MAP_BOTH:
- if (loc->inode->st_mode & S_IWOTH)
- break;
- if (!(((flags & O_ACCMODE) == O_WRONLY)
- || ((flags & O_ACCMODE) == O_RDWR))
- && (loc->inode->st_mode & S_IROTH))
- break;
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: returning permission denied (mode: 0%o, flag=0%o)",
- loc->path, loc->inode->st_mode, flags);
- STACK_UNWIND (frame, -1, EPERM, fd);
- return 0;
- case GF_FILTER_FILTER_UID:
- case GF_FILTER_FILTER_GID:
- case GF_FILTER_RO_FS:
- if (!(((flags & O_ACCMODE) == O_WRONLY)
- || ((flags & O_ACCMODE) == O_RDWR)))
- break;
- STACK_UNWIND (frame, -1, EROFS, NULL);
- return 0;
-
- }
- STACK_WIND (frame,
- filter_open_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc, flags, fd, wbflags);
- return 0;
-}
-
-static int32_t
-filter_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count,
- struct iatt *stbuf,
- struct iobref *iobref)
-{
- if (op_ret >= 0) {
- update_stat (stbuf, this->private);
- }
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- vector,
- count,
- stbuf,
- iobref);
- return 0;
-}
-
-int32_t
-filter_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset)
-{
- STACK_WIND (frame,
- filter_readv_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd,
- size,
- offset);
- return 0;
-}
-
-
-static int32_t
-filter_writev_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *prebuf,
- struct iatt *postbuf)
-{
- if (op_ret >= 0) {
- update_stat (prebuf, this->private);
- update_stat (postbuf, this->private);
- }
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- prebuf,
- postbuf);
- return 0;
-}
-
-int32_t
-filter_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off,
- struct iobref *iobref)
-{
- int32_t ret = 0;
- ret = update_frame (frame, fd->inode, this->private);
- switch (ret) {
- case GF_FILTER_FILTER_UID:
- case GF_FILTER_FILTER_GID:
- case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS, NULL, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- filter_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd,
- vector,
- count,
- off,
- iobref);
- return 0;
-}
-
-static int32_t
-filter_fstat_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- if (op_ret >= 0) {
- update_stat (buf, this->private);
- }
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-
-int32_t
-filter_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd)
-{
- STACK_WIND (frame,
- filter_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd);
- return 0;
-}
-
-static int32_t
-filter_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- fd);
- return 0;
-}
-
-int32_t
-filter_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd)
-{
- int32_t ret = 0;
- ret = update_frame (frame, loc->inode, this->private);
- switch (ret) {
- case GF_FILTER_MAP_UID:
- if (loc->inode->st_mode & S_IWGRP)
- break;
- if (loc->inode->st_mode & S_IRGRP)
- break;
- case GF_FILTER_MAP_BOTH:
- if (loc->inode->st_mode & S_IWOTH)
- break;
- if (loc->inode->st_mode & S_IROTH)
- break;
- gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM, fd);
- return 0;
- }
- STACK_WIND (frame,
- filter_opendir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir,
- loc, fd);
- return 0;
-}
-
-
-static int32_t
-filter_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
-
-int32_t
-filter_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
-{
-
- int32_t ret = 0;
- ret = update_frame (frame, loc->inode, this->private);
- switch (ret) {
- case GF_FILTER_MAP_UID:
- if (loc->inode->st_mode & S_IWGRP)
- break;
- case GF_FILTER_MAP_BOTH:
- if (loc->inode->st_mode & S_IWOTH)
- break;
- gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM);
- return 0;
- case GF_FILTER_FILTER_UID:
- case GF_FILTER_FILTER_GID:
- case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS);
- return 0;
- }
-
- STACK_WIND (frame,
- filter_setxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- loc,
- dict,
- flags);
- return 0;
-}
-
-static int32_t
-filter_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- dict);
- return 0;
-}
-
-int32_t
-filter_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- int32_t ret = 0;
- ret = update_frame (frame, loc->inode, this->private);
- switch (ret) {
- case GF_FILTER_MAP_UID:
- if (loc->inode->st_mode & S_IRGRP)
- break;
- case GF_FILTER_MAP_BOTH:
- if (loc->inode->st_mode & S_IROTH)
- break;
- gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM, NULL);
- return 0;
- }
-
- STACK_WIND (frame,
- filter_getxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- loc,
- name);
- return 0;
-}
-
-static int32_t
-filter_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-filter_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- int32_t ret = 0;
- ret = update_frame (frame, loc->inode, this->private);
- switch (ret) {
- case GF_FILTER_MAP_UID:
- if (loc->inode->st_mode & S_IWGRP)
- break;
- case GF_FILTER_MAP_BOTH:
- if (loc->inode->st_mode & S_IWOTH)
- break;
- gf_log (this->name, GF_LOG_DEBUG, "%s: returning permission denied", loc->path);
- STACK_UNWIND (frame, -1, EPERM);
- return 0;
- case GF_FILTER_FILTER_UID:
- case GF_FILTER_FILTER_GID:
- case GF_FILTER_RO_FS:
- STACK_UNWIND (frame, -1, EROFS);
- return 0;
- }
-
- STACK_WIND (frame,
- filter_removexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- loc,
- name);
- return 0;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_filter_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-int32_t
-init (xlator_t *this)
-{
- char *value = NULL;
- char *tmp_str = NULL;
- char *tmp_str1 = NULL;
- char *tmp_str2 = NULL;
- char *dup_str = NULL;
- char *input_value_str1 = NULL;
- char *input_value_str2 = NULL;
- char *output_value_str = NULL;
- int32_t input_value = 0;
- int32_t output_value = 0;
- data_t *option_data = NULL;
- struct gf_filter *filter = NULL;
- gf_boolean_t tmp_bool = 0;
-
- if (!this->children || this->children->next) {
- gf_log (this->name,
- GF_LOG_ERROR,
- "translator not configured with exactly one child");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- filter = GF_CALLOC (sizeof (*filter), 1, gf_filter_mt_gf_filter);
- ERR_ABORT (filter);
-
- if (dict_get (this->options, "read-only")) {
- value = data_to_str (dict_get (this->options, "read-only"));
- if (gf_string2boolean (value, &filter->complete_read_only) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "wrong value provided for 'read-only'");
- return -1;
- }
- }
-
- if (dict_get (this->options, "root-squashing")) {
- value = data_to_str (dict_get (this->options, "root-squashing"));
- if (gf_string2boolean (value, &tmp_bool) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "wrong value provided for 'root-squashing'");
- return -1;
- }
- if (tmp_bool) {
- filter->translate_num_uid_entries = 1;
- filter->translate_num_gid_entries = 1;
- filter->translate_input_uid[0][0] = GF_FILTER_ROOT_UID; /* root */
- filter->translate_input_uid[0][1] = GF_FILTER_ROOT_UID; /* root */
- filter->translate_input_gid[0][0] = GF_FILTER_ROOT_GID; /* root */
- filter->translate_input_gid[0][1] = GF_FILTER_ROOT_GID; /* root */
- filter->translate_output_uid[0] = GF_FILTER_NOBODY_UID;
- filter->translate_output_gid[0] = GF_FILTER_NOBODY_GID;
- }
- }
-
- if (dict_get (this->options, "translate-uid")) {
- option_data = dict_get (this->options, "translate-uid");
- value = strtok_r (option_data->data, ",", &tmp_str);
- while (value) {
- dup_str = gf_strdup (value);
- input_value_str1 = strtok_r (dup_str, "=", &tmp_str1);
- if (input_value_str1) {
- /* Check for n-m */
- char *temp_string = gf_strdup (input_value_str1);
- input_value_str2 = strtok_r (temp_string, "-", &tmp_str2);
- if (gf_string2int (input_value_str2, &input_value) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"",
- input_value_str2);
- return -1;
- }
- filter->translate_input_uid[filter->translate_num_uid_entries][0] = input_value;
- input_value_str2 = strtok_r (NULL, "-", &tmp_str2);
- if (input_value_str2) {
- if (gf_string2int (input_value_str2, &input_value) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"",
- input_value_str2);
- return -1;
- }
- }
- filter->translate_input_uid[filter->translate_num_uid_entries][1] = input_value;
- GF_FREE (temp_string);
- output_value_str = strtok_r (NULL, "=", &tmp_str1);
- if (output_value_str) {
- if (gf_string2int (output_value_str, &output_value) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"",
- output_value_str);
- return -1;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "mapping string not valid");
- return -1;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "mapping string not valid");
- return -1;
- }
- filter->translate_output_uid[filter->translate_num_uid_entries] = output_value;
- gf_log (this->name,
- GF_LOG_DEBUG,
- "pair %d: input uid '%d' will be changed to uid '%d'",
- filter->translate_num_uid_entries, input_value, output_value);
-
- filter->translate_num_uid_entries++;
- if (filter->translate_num_uid_entries == GF_MAXIMUM_FILTERING_ALLOWED)
- break;
- value = strtok_r (NULL, ",", &tmp_str);
- GF_FREE (dup_str);
- }
- }
-
- tmp_str1 = NULL;
- tmp_str2 = NULL;
- tmp_str = NULL;
-
- if (dict_get (this->options, "translate-gid")) {
- option_data = dict_get (this->options, "translate-gid");
- value = strtok_r (option_data->data, ",", &tmp_str);
- while (value) {
- dup_str = gf_strdup (value);
- input_value_str1 = strtok_r (dup_str, "=", &tmp_str1);
- if (input_value_str1) {
- /* Check for n-m */
- char *temp_string = gf_strdup (input_value_str1);
- input_value_str2 = strtok_r (temp_string, "-", &tmp_str2);
- if (gf_string2int (input_value_str2, &input_value) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"",
- input_value_str2);
- return -1;
- }
- filter->translate_input_gid[filter->translate_num_gid_entries][0] = input_value;
- input_value_str2 = strtok_r (NULL, "-", &tmp_str2);
- if (input_value_str2) {
- if (gf_string2int (input_value_str2, &input_value) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"",
- input_value_str2);
- return -1;
- }
- }
- filter->translate_input_gid[filter->translate_num_gid_entries][1] = input_value;
- GF_FREE (temp_string);
- output_value_str = strtok_r (NULL, "=", &tmp_str1);
- if (output_value_str) {
- if (gf_string2int (output_value_str, &output_value) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"",
- output_value_str);
- return -1;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "translate-gid value not valid");
- return -1;
- }
- } else {
- gf_log (this->name, GF_LOG_ERROR,
- "translate-gid value not valid");
- return -1;
- }
-
- filter->translate_output_gid[filter->translate_num_gid_entries] = output_value;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "pair %d: input gid '%d' will be changed to gid '%d'",
- filter->translate_num_gid_entries, input_value, output_value);
-
- filter->translate_num_gid_entries++;
- if (filter->translate_num_gid_entries == GF_MAXIMUM_FILTERING_ALLOWED)
- break;
- value = strtok_r (NULL, ",", &tmp_str);
- GF_FREE (dup_str);
- }
- }
-
- tmp_str = NULL;
- tmp_str1 = NULL;
-
- if (dict_get (this->options, "filter-uid")) {
- option_data = dict_get (this->options, "filter-uid");
- value = strtok_r (option_data->data, ",", &tmp_str);
- while (value) {
- dup_str = gf_strdup (value);
- /* Check for n-m */
- input_value_str1 = strtok_r (dup_str, "-", &tmp_str1);
- if (gf_string2int (input_value_str1, &input_value) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"",
- input_value_str1);
- return -1;
- }
- filter->filter_input_uid[filter->filter_num_uid_entries][0] = input_value;
- input_value_str1 = strtok_r (NULL, "-", &tmp_str1);
- if (input_value_str1) {
- if (gf_string2int (input_value_str1, &input_value) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"",
- input_value_str1);
- return -1;
- }
- }
- filter->filter_input_uid[filter->filter_num_uid_entries][1] = input_value;
-
- gf_log (this->name,
- GF_LOG_DEBUG,
- "filter [%d]: input uid(s) '%s' will be filtered",
- filter->filter_num_uid_entries, dup_str);
-
- filter->filter_num_uid_entries++;
- if (filter->filter_num_uid_entries == GF_MAXIMUM_FILTERING_ALLOWED)
- break;
- value = strtok_r (NULL, ",", &tmp_str);
- GF_FREE (dup_str);
- }
- filter->partial_filter = 1;
- }
-
- tmp_str = NULL;
- tmp_str1 = NULL;
-
- if (dict_get (this->options, "filter-gid")) {
- option_data = dict_get (this->options, "filter-gid");
- value = strtok_r (option_data->data, ",", &tmp_str);
- while (value) {
- dup_str = gf_strdup (value);
- /* Check for n-m */
- input_value_str1 = strtok_r (dup_str, "-", &tmp_str1);
- if (gf_string2int (input_value_str1, &input_value) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"",
- input_value_str1);
- return -1;
- }
- filter->filter_input_gid[filter->filter_num_gid_entries][0] = input_value;
- input_value_str1 = strtok_r (NULL, "-", &tmp_str1);
- if (input_value_str1) {
- if (gf_string2int (input_value_str1, &input_value) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"",
- input_value_str1);
- return -1;
- }
- }
- filter->filter_input_gid[filter->filter_num_gid_entries][1] = input_value;
-
- gf_log (this->name,
- GF_LOG_DEBUG,
- "filter [%d]: input gid(s) '%s' will be filtered",
- filter->filter_num_gid_entries, dup_str);
-
- filter->filter_num_gid_entries++;
- if (filter->filter_num_gid_entries == GF_MAXIMUM_FILTERING_ALLOWED)
- break;
- value = strtok_r (NULL, ",", &tmp_str);
- GF_FREE (dup_str);
- }
- gf_log (this->name, GF_LOG_ERROR, "this option is not supported currently.. exiting");
- return -1;
- filter->partial_filter = 1;
- }
-
- if (dict_get (this->options, "fixed-uid")) {
- option_data = dict_get (this->options, "fixed-uid");
- if (gf_string2int (option_data->data, &input_value) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"",
- option_data->data);
- return -1;
- }
- filter->fixed_uid = input_value;
- filter->fixed_uid_set = 1;
- }
-
- if (dict_get (this->options, "fixed-gid")) {
- option_data = dict_get (this->options, "fixed-gid");
- if (gf_string2int (option_data->data, &input_value) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid number format \"%s\"",
- option_data->data);
- return -1;
- }
- filter->fixed_gid = input_value;
- filter->fixed_gid_set = 1;
- }
-
- this->private = filter;
- return 0;
-}
-
-
-void
-fini (xlator_t *this)
-{
- struct gf_filter *filter = this->private;
-
- GF_FREE (filter);
-
- return;
-}
-
-
-struct xlator_fops fops = {
- .lookup = filter_lookup,
- .stat = filter_stat,
- .fstat = filter_fstat,
- .readlink = filter_readlink,
- .mknod = filter_mknod,
- .mkdir = filter_mkdir,
- .unlink = filter_unlink,
- .rmdir = filter_rmdir,
- .symlink = filter_symlink,
- .rename = filter_rename,
- .link = filter_link,
- .truncate = filter_truncate,
- .ftruncate = filter_ftruncate,
- .create = filter_create,
- .open = filter_open,
- .readv = filter_readv,
- .writev = filter_writev,
- .setxattr = filter_setxattr,
- .getxattr = filter_getxattr,
- .removexattr = filter_removexattr,
- .opendir = filter_opendir,
- .setattr = filter_setattr,
- .fsetattr = filter_fsetattr,
-};
-
-struct xlator_cbks cbks = {
-};
-
-struct volume_options options[] = {
- { .key = { "root-squashing" },
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = { "read-only" },
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = { "fixed-uid" },
- .type = GF_OPTION_TYPE_INT
- },
- { .key = { "fixed-gid" },
- .type = GF_OPTION_TYPE_INT
- },
- { .key = { "translate-uid" },
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = { "translate-gid" },
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = { "filter-uid" },
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = { "filter-gid" },
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {NULL} },
-};
diff --git a/xlators/bindings/python/Makefile.am b/xlators/features/gfid-access/Makefile.am
index af437a64d6d..af437a64d6d 100644
--- a/xlators/bindings/python/Makefile.am
+++ b/xlators/features/gfid-access/Makefile.am
diff --git a/xlators/features/gfid-access/src/Makefile.am b/xlators/features/gfid-access/src/Makefile.am
new file mode 100644
index 00000000000..ff95604c4de
--- /dev/null
+++ b/xlators/features/gfid-access/src/Makefile.am
@@ -0,0 +1,16 @@
+xlator_LTLIBRARIES = gfid-access.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+gfid_access_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+gfid_access_la_SOURCES = gfid-access.c
+gfid_access_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = gfid-access.h gfid-access-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/gfid-access/src/gfid-access-mem-types.h b/xlators/features/gfid-access/src/gfid-access-mem-types.h
new file mode 100644
index 00000000000..1c4d0b93de2
--- /dev/null
+++ b/xlators/features/gfid-access/src/gfid-access-mem-types.h
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GFID_ACCESS_MEM_TYPES_H
+#define _GFID_ACCESS_MEM_TYPES_H
+
+#include <glusterfs/mem-types.h>
+
+enum gf_changelog_mem_types {
+ gf_gfid_access_mt_priv_t = gf_common_mt_end + 1,
+ gf_gfid_access_mt_gfid_t,
+ gf_gfid_access_mt_end
+};
+
+#endif
diff --git a/xlators/features/gfid-access/src/gfid-access.c b/xlators/features/gfid-access/src/gfid-access.c
new file mode 100644
index 00000000000..3fea5672a21
--- /dev/null
+++ b/xlators/features/gfid-access/src/gfid-access.c
@@ -0,0 +1,1420 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "gfid-access.h"
+#include <glusterfs/inode.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/statedump.h>
+
+int
+ga_valid_inode_loc_copy(loc_t *dst, loc_t *src, xlator_t *this)
+{
+ int ret = 0;
+ uint64_t value = 0;
+
+ /* if its an entry operation, on the virtual */
+ /* directory inode as parent, we need to handle */
+ /* it properly */
+ ret = loc_copy(dst, src);
+ if (ret < 0)
+ goto out;
+
+ /*
+ * Change ALL virtual inodes with real-inodes in loc
+ */
+ if (dst->parent) {
+ ret = inode_ctx_get(dst->parent, this, &value);
+ if (ret < 0) {
+ ret = 0; // real-inode
+ goto out;
+ }
+ inode_unref(dst->parent);
+ dst->parent = inode_ref((inode_t *)(uintptr_t)value);
+ gf_uuid_copy(dst->pargfid, dst->parent->gfid);
+ }
+
+ if (dst->inode) {
+ ret = inode_ctx_get(dst->inode, this, &value);
+ if (ret < 0) {
+ ret = 0; // real-inode
+ goto out;
+ }
+ inode_unref(dst->inode);
+ dst->inode = inode_ref((inode_t *)(uintptr_t)value);
+ gf_uuid_copy(dst->gfid, dst->inode->gfid);
+ }
+out:
+
+ return ret;
+}
+
+void
+ga_newfile_args_free(ga_newfile_args_t *args)
+{
+ if (!args)
+ goto out;
+
+ GF_FREE(args->bname);
+
+ if (S_ISLNK(args->st_mode) && args->args.symlink.linkpath) {
+ GF_FREE(args->args.symlink.linkpath);
+ args->args.symlink.linkpath = NULL;
+ }
+
+ mem_put(args);
+out:
+ return;
+}
+
+void
+ga_heal_args_free(ga_heal_args_t *args)
+{
+ if (!args)
+ goto out;
+
+ GF_FREE(args->bname);
+
+ mem_put(args);
+out:
+ return;
+}
+
+ga_newfile_args_t *
+ga_newfile_parse_args(xlator_t *this, data_t *data)
+{
+ ga_newfile_args_t *args = NULL;
+ ga_private_t *priv = NULL;
+ int len = 0;
+ int blob_len = 0;
+ int min_len = 0;
+ void *blob = NULL;
+
+ priv = this->private;
+
+ blob = data->data;
+ blob_len = data->len;
+
+ min_len = sizeof(args->uid) + sizeof(args->gid) + sizeof(args->gfid) +
+ sizeof(args->st_mode) + 2 + 2;
+ if (blob_len < min_len) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Invalid length: Total length is less "
+ "than minimum length.");
+ goto err;
+ }
+
+ args = mem_get0(priv->newfile_args_pool);
+ if (args == NULL)
+ goto err;
+
+ args->uid = ntoh32(*(uint32_t *)blob);
+ blob += sizeof(uint32_t);
+ blob_len -= sizeof(uint32_t);
+
+ args->gid = ntoh32(*(uint32_t *)blob);
+ blob += sizeof(uint32_t);
+ blob_len -= sizeof(uint32_t);
+
+ memcpy(args->gfid, blob, sizeof(args->gfid));
+ blob += sizeof(args->gfid);
+ blob_len -= sizeof(args->gfid);
+
+ args->st_mode = ntoh32(*(uint32_t *)blob);
+ blob += sizeof(uint32_t);
+ blob_len -= sizeof(uint32_t);
+
+ len = strnlen(blob, blob_len);
+ if (len == blob_len) {
+ gf_log(this->name, GF_LOG_ERROR, "gfid: %s. No null byte present.",
+ args->gfid);
+ goto err;
+ }
+
+ args->bname = GF_MALLOC(len + 1, gf_common_mt_char);
+ if (args->bname == NULL)
+ goto err;
+
+ memcpy(args->bname, blob, (len + 1));
+ blob += (len + 1);
+ blob_len -= (len + 1);
+
+ if (S_ISDIR(args->st_mode)) {
+ if (blob_len < sizeof(uint32_t)) {
+ gf_log(this->name, GF_LOG_ERROR, "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mkdir.mode = ntoh32(*(uint32_t *)blob);
+ blob += sizeof(uint32_t);
+ blob_len -= sizeof(uint32_t);
+
+ if (blob_len < sizeof(uint32_t)) {
+ gf_log(this->name, GF_LOG_ERROR, "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mkdir.umask = ntoh32(*(uint32_t *)blob);
+ blob_len -= sizeof(uint32_t);
+ if (blob_len < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ } else if (S_ISLNK(args->st_mode)) {
+ len = strnlen(blob, blob_len);
+ if (len == blob_len) {
+ gf_log(this->name, GF_LOG_ERROR, "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.symlink.linkpath = GF_MALLOC(len + 1, gf_common_mt_char);
+ if (args->args.symlink.linkpath == NULL)
+ goto err;
+
+ memcpy(args->args.symlink.linkpath, blob, (len + 1));
+ blob_len -= (len + 1);
+ } else {
+ if (blob_len < sizeof(uint32_t)) {
+ gf_log(this->name, GF_LOG_ERROR, "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mknod.mode = ntoh32(*(uint32_t *)blob);
+ blob += sizeof(uint32_t);
+ blob_len -= sizeof(uint32_t);
+
+ if (blob_len < sizeof(uint32_t)) {
+ gf_log(this->name, GF_LOG_ERROR, "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mknod.rdev = ntoh32(*(uint32_t *)blob);
+ blob += sizeof(uint32_t);
+ blob_len -= sizeof(uint32_t);
+
+ if (blob_len < sizeof(uint32_t)) {
+ gf_log(this->name, GF_LOG_ERROR, "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+ args->args.mknod.umask = ntoh32(*(uint32_t *)blob);
+ blob_len -= sizeof(uint32_t);
+ }
+
+ if (blob_len) {
+ gf_log(this->name, GF_LOG_ERROR, "gfid: %s. Invalid length",
+ args->gfid);
+ goto err;
+ }
+
+ return args;
+
+err:
+ if (args)
+ ga_newfile_args_free(args);
+
+ return NULL;
+}
+
+ga_heal_args_t *
+ga_heal_parse_args(xlator_t *this, data_t *data)
+{
+ ga_heal_args_t *args = NULL;
+ ga_private_t *priv = NULL;
+ void *blob = NULL;
+ int len = 0;
+ int blob_len = 0;
+
+ blob = data->data;
+ blob_len = data->len;
+
+ priv = this->private;
+
+ /* bname should at least contain a character */
+ if (blob_len < (sizeof(args->gfid) + 2))
+ goto err;
+
+ args = mem_get0(priv->heal_args_pool);
+ if (!args)
+ goto err;
+
+ memcpy(args->gfid, blob, sizeof(args->gfid));
+ blob += sizeof(args->gfid);
+ blob_len -= sizeof(args->gfid);
+
+ len = strnlen(blob, blob_len);
+ if (len == blob_len)
+ goto err;
+
+ args->bname = GF_MALLOC(len + 1, gf_common_mt_char);
+ if (!args->bname)
+ goto err;
+
+ memcpy(args->bname, blob, len);
+ args->bname[len] = '\0';
+ blob_len -= (len + 1);
+
+ if (blob_len)
+ goto err;
+
+ return args;
+
+err:
+ if (args)
+ ga_heal_args_free(args);
+
+ return NULL;
+}
+
+static int32_t
+ga_fill_tmp_loc(loc_t *loc, xlator_t *this, uuid_t gfid, char *bname,
+ dict_t *xdata, loc_t *new_loc)
+{
+ int ret = -1;
+ uint64_t value = 0;
+ inode_t *parent = NULL;
+ unsigned char *gfid_ptr = NULL;
+
+ parent = loc->inode;
+ ret = inode_ctx_get(loc->inode, this, &value);
+ if (!ret) {
+ parent = (void *)(uintptr_t)value;
+ if (gf_uuid_is_null(parent->gfid))
+ parent = loc->inode;
+ }
+
+ /* parent itself should be looked up */
+ gf_uuid_copy(new_loc->pargfid, parent->gfid);
+ new_loc->parent = inode_ref(parent);
+
+ new_loc->inode = inode_grep(parent->table, parent, bname);
+ if (!new_loc->inode) {
+ new_loc->inode = inode_new(parent->table);
+ gf_uuid_copy(new_loc->inode->gfid, gfid);
+ }
+
+ loc_path(new_loc, bname);
+ if (new_loc->path) {
+ new_loc->name = strrchr(new_loc->path, '/');
+ if (new_loc->name)
+ new_loc->name++;
+ }
+
+ gfid_ptr = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!gfid_ptr) {
+ ret = -1;
+ goto out;
+ }
+ gf_uuid_copy(gfid_ptr, gfid);
+ ret = dict_set_gfuuid(xdata, "gfid-req", gfid_ptr, false);
+ if (ret < 0)
+ goto out;
+
+ ret = 0;
+
+out:
+ if (ret && gfid_ptr)
+ GF_FREE(gfid_ptr);
+ return ret;
+}
+
+static gf_boolean_t
+__is_gfid_access_dir(uuid_t gfid)
+{
+ static uuid_t aux_gfid = {0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, GF_AUX_GFID};
+
+ if (gf_uuid_compare(gfid, aux_gfid) == 0)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+int32_t
+ga_forget(xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ uint64_t value = 0;
+ inode_t *tmp_inode = NULL;
+
+ ret = inode_ctx_del(inode, this, &value);
+ if (ret)
+ goto out;
+
+ tmp_inode = (void *)(uintptr_t)value;
+ inode_unref(tmp_inode);
+
+out:
+ return 0;
+}
+
+static int
+ga_heal_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *stat, dict_t *dict,
+ struct iatt *postparent)
+{
+ call_frame_t *orig_frame = NULL;
+
+ orig_frame = frame->local;
+ frame->local = NULL;
+
+ /* don't worry about inode linking and other stuff. They'll happen on
+ * the next lookup.
+ */
+ STACK_DESTROY(frame->root);
+
+ STACK_UNWIND_STRICT(setxattr, orig_frame, op_ret, op_errno, dict);
+
+ return 0;
+}
+
+static int
+ga_newentry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ ga_local_t *local = NULL;
+
+ local = frame->local;
+
+ /* don't worry about inode linking and other stuff. They'll happen on
+ * the next lookup.
+ */
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+
+ STACK_UNWIND_STRICT(setxattr, local->orig_frame, op_ret, op_errno, xdata);
+
+ if (local->xdata)
+ dict_unref(local->xdata);
+ loc_wipe(&local->loc);
+ mem_put(local);
+
+ return 0;
+}
+
+static int
+ga_newentry_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stat, dict_t *xdata,
+ struct iatt *postparent)
+
+{
+ ga_local_t *local = NULL;
+
+ local = frame->local;
+
+ if ((op_ret < 0) && ((op_errno != ENOENT) && (op_errno != ESTALE)))
+ goto err;
+
+ STACK_WIND(frame, ga_newentry_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, &local->loc, local->mode,
+ local->rdev, local->umask, local->xdata);
+ return 0;
+
+err:
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+ STACK_UNWIND_STRICT(setxattr, local->orig_frame, op_ret, op_errno, xdata);
+ if (local->xdata)
+ dict_unref(local->xdata);
+ loc_wipe(&local->loc);
+ mem_put(local);
+
+ return 0;
+}
+
+int32_t
+ga_new_entry(call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data,
+ dict_t *xdata)
+{
+ int ret = -1;
+ ga_newfile_args_t *args = NULL;
+ loc_t tmp_loc = {
+ 0,
+ };
+ call_frame_t *new_frame = NULL;
+ ga_local_t *local = NULL;
+ uuid_t gfid = {
+ 0,
+ };
+
+ if (!xdata) {
+ xdata = dict_new();
+ } else {
+ xdata = dict_ref(xdata);
+ }
+
+ if (!xdata) {
+ ret = -1;
+ goto out;
+ }
+
+ args = ga_newfile_parse_args(this, data);
+ if (!args)
+ goto out;
+
+ ret = gf_uuid_parse(args->gfid, gfid);
+ if (ret)
+ goto out;
+
+ ret = ga_fill_tmp_loc(loc, this, gfid, args->bname, xdata, &tmp_loc);
+ if (ret)
+ goto out;
+
+ new_frame = copy_frame(frame);
+ if (!new_frame)
+ goto out;
+
+ local = mem_get0(this->local_pool);
+ local->orig_frame = frame;
+
+ loc_copy(&local->loc, &tmp_loc);
+
+ new_frame->local = local;
+ new_frame->root->uid = args->uid;
+ new_frame->root->gid = args->gid;
+
+ if (S_ISDIR(args->st_mode)) {
+ STACK_WIND(new_frame, ga_newentry_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, &tmp_loc,
+ args->args.mkdir.mode, args->args.mkdir.umask, xdata);
+ } else if (S_ISLNK(args->st_mode)) {
+ STACK_WIND(new_frame, ga_newentry_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink,
+ args->args.symlink.linkpath, &tmp_loc, 0, xdata);
+ } else {
+ /* use 07777 (4 7s) for considering the Sticky bits etc) */
+ ((ga_local_t *)new_frame->local)->mode = (S_IFMT & args->st_mode) |
+ (07777 &
+ args->args.mknod.mode);
+
+ ((ga_local_t *)new_frame->local)->umask = args->args.mknod.umask;
+ ((ga_local_t *)new_frame->local)->rdev = args->args.mknod.rdev;
+ ((ga_local_t *)new_frame->local)->xdata = dict_ref(xdata);
+
+ /* send a named lookup, so that dht can cleanup up stale linkto
+ * files etc.
+ */
+ STACK_WIND(new_frame, ga_newentry_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &tmp_loc, NULL);
+ }
+
+ ret = 0;
+out:
+ ga_newfile_args_free(args);
+
+ if (xdata)
+ dict_unref(xdata);
+
+ loc_wipe(&tmp_loc);
+
+ return ret;
+}
+
+int32_t
+ga_heal_entry(call_frame_t *frame, xlator_t *this, loc_t *loc, data_t *data,
+ dict_t *xdata)
+{
+ int ret = -1;
+ ga_heal_args_t *args = NULL;
+ loc_t tmp_loc = {
+ 0,
+ };
+ call_frame_t *new_frame = NULL;
+ uuid_t gfid = {
+ 0,
+ };
+
+ args = ga_heal_parse_args(this, data);
+ if (!args)
+ goto out;
+
+ ret = gf_uuid_parse(args->gfid, gfid);
+ if (ret)
+ goto out;
+
+ if (!xdata)
+ xdata = dict_new();
+ else
+ xdata = dict_ref(xdata);
+
+ if (!xdata) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = ga_fill_tmp_loc(loc, this, gfid, args->bname, xdata, &tmp_loc);
+ if (ret)
+ goto out;
+
+ new_frame = copy_frame(frame);
+ if (!new_frame)
+ goto out;
+
+ new_frame->local = (void *)frame;
+
+ STACK_WIND(new_frame, ga_heal_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &tmp_loc, xdata);
+
+ ret = 0;
+out:
+ if (args)
+ ga_heal_args_free(args);
+
+ loc_wipe(&tmp_loc);
+
+ if (xdata)
+ dict_unref(xdata);
+
+ return ret;
+}
+
+int32_t
+ga_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+ga_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ data_t *data = NULL;
+ int op_errno = ENOMEM;
+ int ret = 0;
+ loc_t ga_loc = {
+ 0,
+ };
+
+ GFID_ACCESS_INODE_OP_CHECK(loc, op_errno, err);
+
+ data = dict_get(dict, GF_FUSE_AUX_GFID_NEWFILE);
+ if (data) {
+ ret = ga_new_entry(frame, this, loc, data, xdata);
+ if (ret)
+ goto err;
+ return 0;
+ }
+
+ data = dict_get(dict, GF_FUSE_AUX_GFID_HEAL);
+ if (data) {
+ ret = ga_heal_entry(frame, this, loc, data, xdata);
+ if (ret)
+ goto err;
+ return 0;
+ }
+
+ // If the inode is a virtual inode change the inode otherwise perform
+ // the operation on same inode
+ ret = ga_valid_inode_loc_copy(&ga_loc, loc, this);
+ if (ret < 0)
+ goto err;
+
+ STACK_WIND(frame, ga_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, &ga_loc, dict, flags, xdata);
+
+ loc_wipe(&ga_loc);
+ return 0;
+err:
+ STACK_UNWIND_STRICT(setxattr, frame, -1, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+ga_virtual_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ int ret = 0;
+ inode_t *cbk_inode = NULL;
+ inode_t *true_inode = NULL;
+ uuid_t random_gfid = {
+ 0,
+ };
+ inode_t *linked_inode = NULL;
+
+ if (frame->local)
+ cbk_inode = frame->local;
+ else
+ cbk_inode = inode_ref(inode);
+
+ frame->local = NULL;
+ if (op_ret)
+ goto unwind;
+
+ if (!IA_ISDIR(buf->ia_type))
+ goto unwind;
+
+ /* need to send back a different inode for linking in itable */
+ if (cbk_inode == inode) {
+ /* check if the inode is in the 'itable' or
+ if its just previously discover()'d inode */
+ true_inode = inode_find(inode->table, buf->ia_gfid);
+ if (!true_inode) {
+ /* This unref is for 'inode_ref()' done in beginning.
+ This is needed as cbk_inode is allocated new inode
+ whose unref is taken at the end*/
+ inode_unref(cbk_inode);
+ cbk_inode = inode_new(inode->table);
+
+ if (!cbk_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ /* the inode is not present in itable, ie, the actual
+ path is not yet looked up. Use the current inode
+ itself for now */
+
+ linked_inode = inode_link(inode, NULL, NULL, buf);
+ inode = linked_inode;
+ } else {
+ /* 'inode_ref()' has been done in inode_find() */
+ inode = true_inode;
+ }
+
+ ret = inode_ctx_put(cbk_inode, this, (uint64_t)(uintptr_t)inode);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "failed to set the inode ctx with"
+ "the actual inode");
+ if (inode)
+ inode_unref(inode);
+ }
+ inode = NULL;
+ }
+
+ if (!gf_uuid_is_null(cbk_inode->gfid)) {
+ /* if the previous linked inode is used, use the
+ same gfid */
+ gf_uuid_copy(random_gfid, cbk_inode->gfid);
+ } else {
+ /* replace the buf->ia_gfid to a random gfid
+ for directory, for files, what we received is fine */
+ gf_uuid_generate(random_gfid);
+ }
+
+ gf_uuid_copy(buf->ia_gfid, random_gfid);
+
+ buf->ia_ino = gfid_to_ino(buf->ia_gfid);
+
+unwind:
+ /* Lookup on non-existing gfid returns ESTALE.
+ Convert into ENOENT for virtual lookup*/
+ if (op_errno == ESTALE)
+ op_errno = ENOENT;
+
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, cbk_inode, buf, xdata,
+ postparent);
+
+ /* Also handles inode_unref of frame->local if done in ga_lookup */
+ if (cbk_inode)
+ inode_unref(cbk_inode);
+
+ return 0;
+}
+
+int32_t
+ga_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ ga_private_t *priv = NULL;
+
+ /* if the entry in question is not 'root',
+ then follow the normal path */
+ if (op_ret || !__is_root_gfid(buf->ia_gfid))
+ goto unwind;
+
+ priv = this->private;
+
+ /* do we need to copy root stbuf every time? */
+ /* mostly yes, as we want to have the 'stat' info show latest
+ in every _cbk() */
+
+ /* keep the reference for root stat buf */
+ priv->root_stbuf = *buf;
+ priv->gfiddir_stbuf = priv->root_stbuf;
+ priv->gfiddir_stbuf.ia_gfid[15] = GF_AUX_GFID;
+ priv->gfiddir_stbuf.ia_ino = GF_AUX_GFID;
+
+unwind:
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
+}
+
+int32_t
+ga_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ ga_private_t *priv = NULL;
+ int ret = -1;
+ uuid_t tmp_gfid = {
+ 0,
+ };
+ loc_t tmp_loc = {
+ 0,
+ };
+ uint64_t value = 0;
+ inode_t *inode = NULL;
+ inode_t *true_inode = NULL;
+ int32_t op_errno = ENOENT;
+
+ priv = this->private;
+
+ /* Handle nameless lookup on ".gfid" */
+ if (!loc->parent && __is_gfid_access_dir(loc->gfid)) {
+ STACK_UNWIND_STRICT(lookup, frame, 0, 0, loc->inode,
+ &priv->gfiddir_stbuf, xdata, &priv->root_stbuf);
+ return 0;
+ }
+
+ /* if its discover(), no need for any action here */
+ if (!loc->name)
+ goto wind;
+
+ /* if its revalidate, and inode is not of type directory,
+ proceed with 'wind' */
+ if (loc->inode && loc->inode->ia_type && !IA_ISDIR(loc->inode->ia_type)) {
+ /* a revalidate on ".gfid/<dentry>" is possible, check for it */
+ if (((loc->parent && __is_gfid_access_dir(loc->parent->gfid)) ||
+ __is_gfid_access_dir(loc->pargfid))) {
+ /* here, just send 'loc->gfid' and 'loc->inode' */
+ tmp_loc.inode = inode_ref(loc->inode);
+ gf_uuid_copy(tmp_loc.gfid, loc->inode->gfid);
+
+ STACK_WIND(frame, default_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &tmp_loc, xdata);
+
+ inode_unref(tmp_loc.inode);
+
+ return 0;
+ }
+
+ /* not something to bother, continue the flow */
+ goto wind;
+ }
+
+ /* need to check if the lookup is on virtual dir */
+ if ((loc->name && !strcmp(GF_GFID_DIR, loc->name)) &&
+ ((loc->parent && __is_root_gfid(loc->parent->gfid)) ||
+ __is_root_gfid(loc->pargfid))) {
+ /* this means, the query is on '/.gfid', return the fake stat,
+ and say success */
+
+ STACK_UNWIND_STRICT(lookup, frame, 0, 0, loc->inode,
+ &priv->gfiddir_stbuf, xdata, &priv->root_stbuf);
+ return 0;
+ }
+
+ /* now, check if the lookup() is on an existing entry,
+ but on gfid-path */
+ if (!((loc->parent && __is_gfid_access_dir(loc->parent->gfid)) ||
+ __is_gfid_access_dir(loc->pargfid))) {
+ if (!loc->parent)
+ goto wind;
+
+ ret = inode_ctx_get(loc->parent, this, &value);
+ if (ret)
+ goto wind;
+
+ inode = (inode_t *)(uintptr_t)value;
+
+ ret = loc_copy_overload_parent(&tmp_loc, loc, inode);
+ if (ret)
+ goto err;
+
+ STACK_WIND(frame, ga_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &tmp_loc, xdata);
+
+ loc_wipe(&tmp_loc);
+ return 0;
+ }
+
+ /* make sure the 'basename' is actually a 'canonical-gfid',
+ otherwise, return error */
+ ret = gf_uuid_parse(loc->name, tmp_gfid);
+ if (ret)
+ goto err;
+
+ /* if its fresh lookup, go ahead and send it down, if not,
+ for directory, we need indirection to actual dir inode */
+ if (!(loc->inode && loc->inode->ia_type))
+ goto discover;
+
+ /* revalidate on directory */
+ ret = inode_ctx_get(loc->inode, this, &value);
+ if (ret)
+ goto err;
+
+ inode = (void *)(uintptr_t)value;
+
+ /* valid inode, already looked up, work on that */
+ if (inode->ia_type)
+ goto discover;
+
+ /* check if the inode is in the 'itable' or
+ if its just previously discover()'d inode */
+ true_inode = inode_find(loc->inode->table, tmp_gfid);
+ if (true_inode) {
+ /* time do another lookup and update the context
+ with proper inode */
+ op_errno = ESTALE;
+ /* 'inode_ref()' done in inode_find */
+ inode_unref(true_inode);
+ goto err;
+ }
+
+discover:
+ /* for the virtual entries, we don't need to send 'gfid-req' key, as
+ for these entries, we don't want to 'set' a new gfid */
+ if (xdata)
+ dict_del(xdata, "gfid-req");
+
+ gf_uuid_copy(tmp_loc.gfid, tmp_gfid);
+
+ /* if revalidate, then we need to have the proper reference */
+ if (inode) {
+ tmp_loc.inode = inode_ref(inode);
+ frame->local = inode_ref(loc->inode);
+ } else {
+ tmp_loc.inode = inode_ref(loc->inode);
+ }
+
+ STACK_WIND(frame, ga_virtual_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &tmp_loc, xdata);
+
+ inode_unref(tmp_loc.inode);
+
+ return 0;
+
+wind:
+ /* used for all the normal lookup path */
+ STACK_WIND(frame, ga_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, loc->inode,
+ &priv->gfiddir_stbuf, xdata, &priv->root_stbuf);
+ return 0;
+}
+
+int
+ga_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+
+ GFID_ACCESS_ENTRY_OP_CHECK(loc, op_errno, err);
+
+ STACK_WIND(frame, default_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(mkdir, frame, -1, op_errno, loc->inode, NULL, NULL,
+ NULL, xdata);
+ return 0;
+}
+
+int
+ga_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+
+ GFID_ACCESS_ENTRY_OP_CHECK(loc, op_errno, err);
+
+ STACK_WIND(frame, default_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT(create, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+
+ GFID_ACCESS_ENTRY_OP_CHECK(loc, op_errno, err);
+
+ STACK_WIND(frame, default_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT(symlink, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ xdata);
+
+ return 0;
+}
+
+int
+ga_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+
+ GFID_ACCESS_ENTRY_OP_CHECK(loc, op_errno, err);
+
+ STACK_WIND(frame, default_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ xdata);
+
+ return 0;
+}
+
+int
+ga_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ int ret = -1;
+ loc_t ga_loc = {
+ 0,
+ };
+
+ GFID_ACCESS_ENTRY_OP_CHECK(loc, op_errno, err);
+
+ ret = ga_valid_inode_loc_copy(&ga_loc, loc, this);
+ if (ret < 0)
+ goto err;
+
+ STACK_WIND(frame, default_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, &ga_loc, flag, xdata);
+
+ loc_wipe(&ga_loc);
+ return 0;
+err:
+ STACK_UNWIND_STRICT(rmdir, frame, -1, op_errno, NULL, NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t xflag,
+ dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ int ret = -1;
+ loc_t ga_loc = {
+ 0,
+ };
+
+ GFID_ACCESS_ENTRY_OP_CHECK(loc, op_errno, err);
+
+ ret = ga_valid_inode_loc_copy(&ga_loc, loc, this);
+ if (ret < 0)
+ goto err;
+
+ STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &ga_loc, xflag, xdata);
+
+ loc_wipe(&ga_loc);
+ return 0;
+err:
+ STACK_UNWIND_STRICT(unlink, frame, -1, op_errno, NULL, NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ int ret = 0;
+ loc_t ga_oldloc = {
+ 0,
+ };
+ loc_t ga_newloc = {
+ 0,
+ };
+
+ GFID_ACCESS_ENTRY_OP_CHECK(oldloc, op_errno, err);
+ GFID_ACCESS_ENTRY_OP_CHECK(newloc, op_errno, err);
+
+ ret = ga_valid_inode_loc_copy(&ga_oldloc, oldloc, this);
+ if (ret < 0)
+ goto err;
+
+ ret = ga_valid_inode_loc_copy(&ga_newloc, newloc, this);
+ if (ret < 0) {
+ loc_wipe(&ga_oldloc);
+ goto err;
+ }
+
+ STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, &ga_oldloc, &ga_newloc, xdata);
+
+ loc_wipe(&ga_newloc);
+ loc_wipe(&ga_oldloc);
+ return 0;
+err:
+ STACK_UNWIND_STRICT(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, xdata);
+
+ return 0;
+}
+
+int
+ga_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ int ret = 0;
+ loc_t ga_oldloc = {
+ 0,
+ };
+ loc_t ga_newloc = {
+ 0,
+ };
+
+ GFID_ACCESS_ENTRY_OP_CHECK(oldloc, op_errno, err);
+ GFID_ACCESS_ENTRY_OP_CHECK(newloc, op_errno, err);
+
+ ret = ga_valid_inode_loc_copy(&ga_oldloc, oldloc, this);
+ if (ret < 0)
+ goto err;
+
+ ret = ga_valid_inode_loc_copy(&ga_newloc, newloc, this);
+ if (ret < 0) {
+ loc_wipe(&ga_oldloc);
+ goto err;
+ }
+
+ STACK_WIND(frame, default_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, &ga_oldloc, &ga_newloc, xdata);
+
+ loc_wipe(&ga_newloc);
+ loc_wipe(&ga_oldloc);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(link, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+ga_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+
+ GFID_ACCESS_INODE_OP_CHECK(loc, op_errno, err);
+
+ /* also check if the loc->inode itself is virtual
+ inode, if yes, return with failure, mainly because we
+ can't handle all the readdirp and other things on it. */
+ if (inode_ctx_get(loc->inode, this, NULL) == 0) {
+ op_errno = ENOTSUP;
+ goto err;
+ }
+
+ STACK_WIND(frame, default_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT(opendir, frame, -1, op_errno, NULL, xdata);
+
+ return 0;
+}
+
+int32_t
+ga_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ int ret = -1;
+ loc_t ga_loc = {
+ 0,
+ };
+
+ GFID_ACCESS_INODE_OP_CHECK(loc, op_errno, err);
+ ret = ga_valid_inode_loc_copy(&ga_loc, loc, this);
+ if (ret < 0)
+ goto err;
+
+ STACK_WIND(frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, &ga_loc, name, xdata);
+
+ loc_wipe(&ga_loc);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(getxattr, frame, -1, op_errno, NULL, xdata);
+
+ return 0;
+}
+
+int32_t
+ga_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ int ret = -1;
+ loc_t ga_loc = {
+ 0,
+ };
+ ga_private_t *priv = NULL;
+
+ priv = this->private;
+ /* If stat is on ".gfid" itself, do not wind further,
+ * return fake stat and return success.
+ */
+ if (__is_gfid_access_dir(loc->gfid))
+ goto out;
+
+ ret = ga_valid_inode_loc_copy(&ga_loc, loc, this);
+ if (ret < 0)
+ goto err;
+
+ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, &ga_loc, xdata);
+
+ loc_wipe(&ga_loc);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(stat, frame, -1, op_errno, NULL, xdata);
+
+ return 0;
+
+out:
+ STACK_UNWIND_STRICT(stat, frame, 0, 0, &priv->gfiddir_stbuf, xdata);
+ return 0;
+}
+
+int32_t
+ga_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ int ret = -1;
+ loc_t ga_loc = {
+ 0,
+ };
+
+ GFID_ACCESS_INODE_OP_CHECK(loc, op_errno, err);
+ ret = ga_valid_inode_loc_copy(&ga_loc, loc, this);
+ if (ret < 0)
+ goto err;
+
+ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, &ga_loc, stbuf, valid, xdata);
+
+ loc_wipe(&ga_loc);
+ return 0;
+err:
+ STACK_UNWIND_STRICT(setattr, frame, -1, op_errno, NULL, NULL, xdata);
+
+ return 0;
+}
+
+int32_t
+ga_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = ENOMEM;
+ int ret = -1;
+ loc_t ga_loc = {
+ 0,
+ };
+
+ GFID_ACCESS_INODE_OP_CHECK(loc, op_errno, err);
+ ret = ga_valid_inode_loc_copy(&ga_loc, loc, this);
+ if (ret < 0)
+ goto err;
+
+ STACK_WIND(frame, default_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, &ga_loc, name, xdata);
+
+ loc_wipe(&ga_loc);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(removexattr, frame, -1, op_errno, xdata);
+
+ return 0;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_gfid_access_mt_end + 1);
+
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Memory accounting"
+ " init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+init(xlator_t *this)
+{
+ ga_private_t *priv = NULL;
+ int ret = -1;
+
+ if (!this->children || this->children->next) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "not configured with exactly one child. exiting");
+ goto out;
+ }
+
+ /* This can be the top of graph in certain cases */
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_DEBUG, "dangling volume. check volfile ");
+ }
+
+ /* TODO: define a mem-type structure */
+ priv = GF_CALLOC(1, sizeof(*priv), gf_gfid_access_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ priv->newfile_args_pool = mem_pool_new(ga_newfile_args_t, 512);
+ if (!priv->newfile_args_pool)
+ goto out;
+
+ priv->heal_args_pool = mem_pool_new(ga_heal_args_t, 512);
+ if (!priv->heal_args_pool)
+ goto out;
+
+ this->local_pool = mem_pool_new(ga_local_t, 16);
+ if (!this->local_pool) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ this->private = priv;
+
+ ret = 0;
+out:
+ if (ret && priv) {
+ if (priv->newfile_args_pool)
+ mem_pool_destroy(priv->newfile_args_pool);
+ GF_FREE(priv);
+ }
+
+ return ret;
+}
+
+void
+fini(xlator_t *this)
+{
+ ga_private_t *priv = NULL;
+ priv = this->private;
+ this->private = NULL;
+
+ if (priv) {
+ if (priv->newfile_args_pool)
+ mem_pool_destroy(priv->newfile_args_pool);
+ if (priv->heal_args_pool)
+ mem_pool_destroy(priv->heal_args_pool);
+ GF_FREE(priv);
+ }
+
+ return;
+}
+
+int32_t
+ga_dump_inodectx(xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ uint64_t value = 0;
+ inode_t *tmp_inode = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+
+ ret = inode_ctx_get(inode, this, &value);
+ if (ret == 0) {
+ tmp_inode = (void *)(uintptr_t)value;
+ gf_proc_dump_build_key(key_prefix, this->name, "inode");
+ gf_proc_dump_add_section("%s", key_prefix);
+ gf_proc_dump_write("real-gfid", "%s", uuid_utoa(tmp_inode->gfid));
+ }
+
+ return 0;
+}
+
+struct xlator_fops fops = {
+ .lookup = ga_lookup,
+
+ /* entry fops */
+ .mkdir = ga_mkdir,
+ .mknod = ga_mknod,
+ .create = ga_create,
+ .symlink = ga_symlink,
+ .link = ga_link,
+ .unlink = ga_unlink,
+ .rmdir = ga_rmdir,
+ .rename = ga_rename,
+
+ /* handle any other directory operations here */
+ .opendir = ga_opendir,
+ .stat = ga_stat,
+ .setattr = ga_setattr,
+ .getxattr = ga_getxattr,
+ .removexattr = ga_removexattr,
+
+ /* special fop to handle more entry creations */
+ .setxattr = ga_setxattr,
+};
+
+struct xlator_cbks cbks = {
+ .forget = ga_forget,
+};
+
+struct xlator_dumpops dumpops = {
+ .inodectx = ga_dump_inodectx,
+};
+
+struct volume_options options[] = {
+ /* This translator doesn't take any options, or provide any options */
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "gfid-access",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/gfid-access/src/gfid-access.h b/xlators/features/gfid-access/src/gfid-access.h
new file mode 100644
index 00000000000..b1e255e56c0
--- /dev/null
+++ b/xlators/features/gfid-access/src/gfid-access.h
@@ -0,0 +1,107 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __GFID_ACCESS_H__
+#define __GFID_ACCESS_H__
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include "gfid-access-mem-types.h"
+
+#define UUID_CANONICAL_FORM_LEN 36
+
+#define GF_FUSE_AUX_GFID_NEWFILE "glusterfs.gfid.newfile"
+#define GF_FUSE_AUX_GFID_HEAL "glusterfs.gfid.heal"
+
+#define GF_GFID_KEY "GLUSTERFS_GFID"
+#define GF_GFID_DIR ".gfid"
+#define GF_AUX_GFID 0xd
+
+#define GFID_ACCESS_ENTRY_OP_CHECK(loc, err, lbl) \
+ do { \
+ /* need to check if the lookup is on virtual dir */ \
+ if ((loc->name && !strcmp(GF_GFID_DIR, loc->name)) && \
+ ((loc->parent && __is_root_gfid(loc->parent->gfid)) || \
+ __is_root_gfid(loc->pargfid))) { \
+ err = ENOTSUP; \
+ goto lbl; \
+ } \
+ \
+ /* now, check if the lookup() is on an existing */ \
+ /* entry, but on gfid-path */ \
+ if ((loc->parent && __is_gfid_access_dir(loc->parent->gfid)) || \
+ __is_gfid_access_dir(loc->pargfid)) { \
+ err = EPERM; \
+ goto lbl; \
+ } \
+ } while (0)
+
+#define GFID_ACCESS_INODE_OP_CHECK(loc, err, lbl) \
+ do { \
+ /*Check if it is on .gfid*/ \
+ if (__is_gfid_access_dir(loc->gfid)) { \
+ err = ENOTSUP; \
+ goto lbl; \
+ } \
+ } while (0)
+typedef struct {
+ unsigned int uid;
+ unsigned int gid;
+ char gfid[UUID_CANONICAL_FORM_LEN + 1];
+ unsigned int st_mode;
+ char *bname;
+
+ union {
+ struct _symlink_in {
+ char *linkpath;
+ } __attribute__((__packed__)) symlink;
+
+ struct _mknod_in {
+ unsigned int mode;
+ unsigned int rdev;
+ unsigned int umask;
+ } __attribute__((__packed__)) mknod;
+
+ struct _mkdir_in {
+ unsigned int mode;
+ unsigned int umask;
+ } __attribute__((__packed__)) mkdir;
+ } __attribute__((__packed__)) args;
+} __attribute__((__packed__)) ga_newfile_args_t;
+
+typedef struct {
+ char gfid[UUID_CANONICAL_FORM_LEN + 1];
+ char *bname; /* a null terminated basename */
+} __attribute__((__packed__)) ga_heal_args_t;
+
+struct ga_private {
+ /* root inode's stbuf */
+ struct iatt root_stbuf;
+ struct iatt gfiddir_stbuf;
+ struct mem_pool *newfile_args_pool;
+ struct mem_pool *heal_args_pool;
+};
+typedef struct ga_private ga_private_t;
+
+struct __ga_local {
+ call_frame_t *orig_frame;
+ unsigned int uid;
+ unsigned int gid;
+ loc_t loc;
+ mode_t mode;
+ dev_t rdev;
+ mode_t umask;
+ dict_t *xdata;
+};
+typedef struct __ga_local ga_local_t;
+
+#endif /* __GFID_ACCESS_H__ */
diff --git a/xlators/features/index/src/Makefile.am b/xlators/features/index/src/Makefile.am
index 5d037c7eca6..c71c238c163 100644
--- a/xlators/features/index/src/Makefile.am
+++ b/xlators/features/index/src/Makefile.am
@@ -1,15 +1,19 @@
+if WITH_SERVER
xlator_LTLIBRARIES = index.la
+endif
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-index_la_LDFLAGS = -module -avoidversion
+index_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
index_la_SOURCES = index.c
index_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = index.h index-mem-types.h
+noinst_HEADERS = index.h index-mem-types.h index-messages.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/rpc/xdr/src \
- -I$(top_srcdir)/rpc/rpc-lib/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/index/src/index-mem-types.h b/xlators/features/index/src/index-mem-types.h
index 964f3b09029..58833d0ec9b 100644
--- a/xlators/features/index/src/index-mem-types.h
+++ b/xlators/features/index/src/index-mem-types.h
@@ -1,32 +1,23 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#ifndef __INDEX_MEM_TYPES_H__
+#define __INDEX_MEM_TYPES_H__
-#ifndef __QUIESCE_MEM_TYPES_H__
-#define __QUIESCE_MEM_TYPES_H__
-
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_index_mem_types_ {
- gf_index_mt_priv_t = gf_common_mt_end + 1,
- gf_index_inode_ctx_t = gf_common_mt_end + 2,
- gf_index_fd_ctx_t = gf_common_mt_end + 3,
- gf_index_mt_end
+ gf_index_mt_priv_t = gf_common_mt_end + 1,
+ gf_index_inode_ctx_t,
+ gf_index_fd_ctx_t,
+ gf_index_mt_local_t,
+ gf_index_mt_end
};
#endif
diff --git a/xlators/features/index/src/index-messages.h b/xlators/features/index/src/index-messages.h
new file mode 100644
index 00000000000..364f17cd34e
--- /dev/null
+++ b/xlators/features/index/src/index-messages.h
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _INDEX_MESSAGES_H_
+#define _INDEX_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(INDEX, INDEX_MSG_INDEX_DIR_CREATE_FAILED,
+ INDEX_MSG_INDEX_READDIR_FAILED, INDEX_MSG_INDEX_ADD_FAILED,
+ INDEX_MSG_INDEX_DEL_FAILED, INDEX_MSG_DICT_SET_FAILED,
+ INDEX_MSG_INODE_CTX_GET_SET_FAILED, INDEX_MSG_INVALID_ARGS,
+ INDEX_MSG_FD_OP_FAILED, INDEX_MSG_WORKER_THREAD_CREATE_FAILED,
+ INDEX_MSG_INVALID_GRAPH);
+
+#endif /* !_INDEX_MESSAGES_H_ */
diff --git a/xlators/features/index/src/index.c b/xlators/features/index/src/index.c
index 508eb91a373..4abb2c73ce5 100644
--- a/xlators/features/index/src/index.c
+++ b/xlators/features/index/src/index.c
@@ -1,1184 +1,2682 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "index.h"
-#include "options.h"
+#include <glusterfs/options.h>
#include "glusterfs3-xdr.h"
+#include <glusterfs/syscall.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/common-utils.h>
+#include "index-messages.h"
+#include <ftw.h>
+#include <libgen.h> /* for dirname() */
+#include <signal.h>
#define XATTROP_SUBDIR "xattrop"
+#define DIRTY_SUBDIR "dirty"
+#define ENTRY_CHANGES_SUBDIR "entry-changes"
-call_stub_t *
-__index_dequeue (struct list_head *callstubs)
+struct index_syncop_args {
+ inode_t *parent;
+ gf_dirent_t *entries;
+ char *path;
+};
+
+static char *index_vgfid_xattrs[XATTROP_TYPE_END] = {
+ [XATTROP] = GF_XATTROP_INDEX_GFID,
+ [DIRTY] = GF_XATTROP_DIRTY_GFID,
+ [ENTRY_CHANGES] = GF_XATTROP_ENTRY_CHANGES_GFID};
+
+static char *index_subdirs[XATTROP_TYPE_END] = {
+ [XATTROP] = XATTROP_SUBDIR,
+ [DIRTY] = DIRTY_SUBDIR,
+ [ENTRY_CHANGES] = ENTRY_CHANGES_SUBDIR};
+
+int
+index_get_type_from_vgfid(index_priv_t *priv, uuid_t vgfid)
{
- call_stub_t *stub = NULL;
+ int i = 0;
- if (!list_empty (callstubs)) {
- stub = list_entry (callstubs->next, call_stub_t, list);
- list_del_init (&stub->list);
- }
+ for (i = 0; i < XATTROP_TYPE_END; i++) {
+ if (gf_uuid_compare(priv->internal_vgfid[i], vgfid) == 0)
+ return i;
+ }
+ return -1;
+}
- return stub;
+gf_boolean_t
+index_is_virtual_gfid(index_priv_t *priv, uuid_t vgfid)
+{
+ if (index_get_type_from_vgfid(priv, vgfid) < 0)
+ return _gf_false;
+ return _gf_true;
}
-inline static void
-__index_enqueue (struct list_head *callstubs, call_stub_t *stub)
+static int
+__index_inode_ctx_get(inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx)
{
- list_add_tail (&stub->list, callstubs);
+ int ret = 0;
+ index_inode_ctx_t *ictx = NULL;
+ uint64_t tmpctx = 0;
+
+ ret = __inode_ctx_get(inode, this, &tmpctx);
+ if (!ret) {
+ ictx = (index_inode_ctx_t *)(long)tmpctx;
+ goto out;
+ }
+ ictx = GF_CALLOC(1, sizeof(*ictx), gf_index_inode_ctx_t);
+ if (!ictx) {
+ ret = -1;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&ictx->callstubs);
+ ret = __inode_ctx_put(inode, this, (uint64_t)(uintptr_t)ictx);
+ if (ret) {
+ GF_FREE(ictx);
+ ictx = NULL;
+ goto out;
+ }
+out:
+ if (ictx)
+ *ctx = ictx;
+ return ret;
}
-static void
-worker_enqueue (xlator_t *this, call_stub_t *stub)
+static int
+index_inode_ctx_get(inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx)
{
- index_priv_t *priv = NULL;
+ int ret = 0;
- priv = this->private;
- pthread_mutex_lock (&priv->mutex);
- {
- __index_enqueue (&priv->callstubs, stub);
- pthread_cond_signal (&priv->cond);
- }
- pthread_mutex_unlock (&priv->mutex);
+ LOCK(&inode->lock);
+ {
+ ret = __index_inode_ctx_get(inode, this, ctx);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
}
-void *
-index_worker (void *data)
-{
- index_priv_t *priv = NULL;
- xlator_t *this = NULL;
- call_stub_t *stub = NULL;
- int ret = 0;
-
- THIS = data;
- this = data;
- priv = this->private;
-
- for (;;) {
- pthread_mutex_lock (&priv->mutex);
- {
- while (list_empty (&priv->callstubs)) {
- ret = pthread_cond_wait (&priv->cond,
- &priv->mutex);
- }
-
- stub = __index_dequeue (&priv->callstubs);
- }
- pthread_mutex_unlock (&priv->mutex);
+static gf_boolean_t
+index_is_subdir_of_entry_changes(xlator_t *this, inode_t *inode)
+{
+ index_inode_ctx_t *ctx = NULL;
+ int ret = 0;
- if (stub) /* guard against spurious wakeups */
- call_resume (stub);
- }
+ if (!inode)
+ return _gf_false;
- return NULL;
+ ret = index_inode_ctx_get(inode, this, &ctx);
+ if ((ret == 0) && !gf_uuid_is_null(ctx->virtual_pargfid))
+ return _gf_true;
+ return _gf_false;
}
-int
-__index_inode_ctx_get (inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx)
+
+static int
+index_get_type_from_vgfid_xattr(const char *name)
{
- int ret = 0;
- index_inode_ctx_t *ictx = NULL;
- uint64_t tmpctx = 0;
+ int i = 0;
- ret = __inode_ctx_get (inode, this, &tmpctx);
- if (!ret) {
- ictx = (index_inode_ctx_t*) (long) tmpctx;
- goto out;
- }
- ictx = GF_CALLOC (1, sizeof (*ictx), gf_index_inode_ctx_t);
- if (!ictx) {
- ret = -1;
- goto out;
- }
+ for (i = 0; i < XATTROP_TYPE_END; i++) {
+ if (strcmp(name, index_vgfid_xattrs[i]) == 0)
+ return i;
+ }
+ return -1;
+}
- INIT_LIST_HEAD (&ictx->callstubs);
- ret = __inode_ctx_put (inode, this, (uint64_t)ictx);
- if (ret) {
- GF_FREE (ictx);
- ictx = NULL;
- goto out;
- }
-out:
- if (ictx)
- *ctx = ictx;
- return ret;
+gf_boolean_t
+index_is_fop_on_internal_inode(xlator_t *this, inode_t *inode, uuid_t gfid)
+{
+ index_priv_t *priv = this->private;
+ uuid_t vgfid = {0};
+
+ if (!inode)
+ return _gf_false;
+
+ if (gfid && !gf_uuid_is_null(gfid))
+ gf_uuid_copy(vgfid, gfid);
+ else
+ gf_uuid_copy(vgfid, inode->gfid);
+
+ if (index_is_virtual_gfid(priv, vgfid))
+ return _gf_true;
+ if (index_is_subdir_of_entry_changes(this, inode))
+ return _gf_true;
+ return _gf_false;
}
-int
-index_inode_ctx_get (inode_t *inode, xlator_t *this, index_inode_ctx_t **ctx)
+static gf_boolean_t
+index_is_vgfid_xattr(const char *name)
{
- int ret = 0;
+ if (index_get_type_from_vgfid_xattr(name) < 0)
+ return _gf_false;
+ return _gf_true;
+}
- LOCK (&inode->lock);
- {
- ret = __index_inode_ctx_get (inode, this, ctx);
- }
- UNLOCK (&inode->lock);
+call_stub_t *
+__index_dequeue(struct list_head *callstubs)
+{
+ call_stub_t *stub = NULL;
+
+ if (!list_empty(callstubs)) {
+ stub = list_entry(callstubs->next, call_stub_t, list);
+ list_del_init(&stub->list);
+ }
- return ret;
+ return stub;
}
static void
-make_index_dir_path (char *base, const char *subdir,
- char *index_dir, size_t len)
+__index_enqueue(struct list_head *callstubs, call_stub_t *stub)
{
- snprintf (index_dir, len, "%s/%s", base, subdir);
+ list_add_tail(&stub->list, callstubs);
}
-int
-index_dir_create (xlator_t *this, const char *subdir)
-{
- int ret = 0;
- struct stat st = {0};
- char fullpath[PATH_MAX] = {0};
- char path[PATH_MAX] = {0};
- char *dir = NULL;
- index_priv_t *priv = NULL;
- size_t len = 0;
- size_t pathlen = 0;
-
- priv = this->private;
- make_index_dir_path (priv->index_basepath, subdir, fullpath,
- sizeof (fullpath));
- ret = stat (fullpath, &st);
- if (!ret) {
- if (!S_ISDIR (st.st_mode))
- ret = -2;
- goto out;
+static void
+worker_enqueue(xlator_t *this, call_stub_t *stub)
+{
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ pthread_mutex_lock(&priv->mutex);
+ {
+ __index_enqueue(&priv->callstubs, stub);
+ GF_ATOMIC_INC(priv->stub_cnt);
+ pthread_cond_signal(&priv->cond);
+ }
+ pthread_mutex_unlock(&priv->mutex);
+}
+
+void *
+index_worker(void *data)
+{
+ index_priv_t *priv = NULL;
+ xlator_t *this = NULL;
+ call_stub_t *stub = NULL;
+ gf_boolean_t bye = _gf_false;
+
+ THIS = data;
+ this = data;
+ priv = this->private;
+
+ for (;;) {
+ pthread_mutex_lock(&priv->mutex);
+ {
+ while (list_empty(&priv->callstubs)) {
+ if (priv->down) {
+ bye = _gf_true; /*Avoid wait*/
+ break;
+ }
+ (void)pthread_cond_wait(&priv->cond, &priv->mutex);
+ if (priv->down) {
+ bye = _gf_true;
+ break;
+ }
+ }
+ if (!bye)
+ stub = __index_dequeue(&priv->callstubs);
+ if (bye) {
+ priv->curr_count--;
+ if (priv->curr_count == 0)
+ pthread_cond_broadcast(&priv->cond);
+ }
}
+ pthread_mutex_unlock(&priv->mutex);
- pathlen = strlen (fullpath);
- if ((pathlen > 1) && fullpath[pathlen - 1] == '/')
- fullpath[pathlen - 1] = '\0';
- dir = strchr (fullpath, '/');
- while (dir) {
- dir = strchr (dir + 1, '/');
- if (dir)
- len = pathlen - strlen (dir);
- else
- len = pathlen;
- strncpy (path, fullpath, len);
- path[len] = '\0';
- ret = mkdir (path, 0600);
- if (ret && (errno != EEXIST))
- goto out;
+ if (stub) { /* guard against spurious wakeups */
+ call_resume(stub);
+ GF_ATOMIC_DEC(priv->stub_cnt);
}
- ret = 0;
+ stub = NULL;
+ if (bye)
+ break;
+ }
+
+ return NULL;
+}
+
+static void
+make_index_dir_path(char *base, const char *subdir, char *index_dir, size_t len)
+{
+ snprintf(index_dir, len, "%s/%s", base, subdir);
+}
+
+int
+index_dir_create(xlator_t *this, const char *subdir)
+{
+ int ret = 0;
+ struct stat st = {0};
+ char fullpath[PATH_MAX] = {0};
+ char path[PATH_MAX] = {0};
+ char *dir = NULL;
+ index_priv_t *priv = NULL;
+ size_t len = 0;
+ size_t pathlen = 0;
+
+ priv = this->private;
+ make_index_dir_path(priv->index_basepath, subdir, fullpath,
+ sizeof(fullpath));
+ ret = sys_stat(fullpath, &st);
+ if (!ret) {
+ if (!S_ISDIR(st.st_mode))
+ ret = -2;
+ goto out;
+ }
+
+ pathlen = strlen(fullpath);
+ if ((pathlen > 1) && fullpath[pathlen - 1] == '/')
+ fullpath[pathlen - 1] = '\0';
+ dir = strchr(fullpath, '/');
+ while (dir) {
+ dir = strchr(dir + 1, '/');
+ if (dir)
+ len = pathlen - strlen(dir);
+ else
+ len = pathlen;
+ strncpy(path, fullpath, len);
+ path[len] = '\0';
+ ret = sys_mkdir(path, 0600);
+ if (ret && (errno != EEXIST))
+ goto out;
+ }
+ ret = 0;
out:
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s/%s: Failed to "
- "create (%s)", priv->index_basepath, subdir,
- strerror (errno));
- } else if (ret == -2) {
- gf_log (this->name, GF_LOG_ERROR, "%s/%s: Failed to create, "
- "path exists, not a directory ", priv->index_basepath,
- subdir);
- }
- return ret;
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ INDEX_MSG_INDEX_DIR_CREATE_FAILED,
+ "%s/%s: Failed to "
+ "create",
+ priv->index_basepath, subdir);
+ } else if (ret == -2) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOTDIR,
+ INDEX_MSG_INDEX_DIR_CREATE_FAILED,
+ "%s/%s: Failed to "
+ "create, path exists, not a directory ",
+ priv->index_basepath, subdir);
+ }
+ return ret;
}
void
-index_get_index (index_priv_t *priv, uuid_t index)
+index_get_index(index_priv_t *priv, uuid_t index)
{
- LOCK (&priv->lock);
- {
- uuid_copy (index, priv->index);
- }
- UNLOCK (&priv->lock);
+ LOCK(&priv->lock);
+ {
+ gf_uuid_copy(index, priv->index);
+ }
+ UNLOCK(&priv->lock);
}
void
-index_generate_index (index_priv_t *priv, uuid_t index)
+index_generate_index(index_priv_t *priv, uuid_t index)
{
- LOCK (&priv->lock);
- {
- //To prevent duplicate generates.
- //This method fails if number of contending threads is greater
- //than MAX_LINK count of the fs
- if (!uuid_compare (priv->index, index))
- uuid_generate (priv->index);
- uuid_copy (index, priv->index);
- }
- UNLOCK (&priv->lock);
+ LOCK(&priv->lock);
+ {
+ // To prevent duplicate generates.
+ // This method fails if number of contending threads is greater
+ // than MAX_LINK count of the fs
+ if (!gf_uuid_compare(priv->index, index))
+ gf_uuid_generate(priv->index);
+ gf_uuid_copy(index, priv->index);
+ }
+ UNLOCK(&priv->lock);
}
static void
-make_index_path (char *base, const char *subdir, uuid_t index,
- char *index_path, size_t len)
+make_index_path(char *base, const char *subdir, uuid_t index, char *index_path,
+ size_t len)
{
- make_index_dir_path (base, subdir, index_path, len);
- snprintf (index_path + strlen (index_path), len - strlen (index_path),
- "/%s-%s", subdir, uuid_utoa (index));
+ make_index_dir_path(base, subdir, index_path, len);
+ snprintf(index_path + strlen(index_path), len - strlen(index_path),
+ "/%s-%s", subdir, uuid_utoa(index));
}
static void
-make_gfid_path (char *base, const char *subdir, uuid_t gfid,
- char *gfid_path, size_t len)
+make_gfid_path(char *base, const char *subdir, uuid_t gfid, char *gfid_path,
+ size_t len)
{
- make_index_dir_path (base, subdir, gfid_path, len);
- snprintf (gfid_path + strlen (gfid_path), len - strlen (gfid_path),
- "/%s", uuid_utoa (gfid));
+ make_index_dir_path(base, subdir, gfid_path, len);
+ snprintf(gfid_path + strlen(gfid_path), len - strlen(gfid_path), "/%s",
+ uuid_utoa(gfid));
}
static void
-make_file_path (char *base, const char *subdir, const char *filename,
- char *file_path, size_t len)
+make_file_path(char *base, const char *subdir, const char *filename,
+ char *file_path, size_t len)
{
- make_index_dir_path (base, subdir, file_path, len);
- snprintf (file_path + strlen (file_path), len - strlen (file_path),
- "/%s", filename);
+ make_index_dir_path(base, subdir, file_path, len);
+ snprintf(file_path + strlen(file_path), len - strlen(file_path), "/%s",
+ filename);
}
-static void
-check_delete_stale_index_file (xlator_t *this, char *filename)
+static int
+is_index_file_current(char *filename, uuid_t priv_index, char *subdir)
{
- int ret = 0;
- struct stat st = {0};
- char filepath[PATH_MAX] = {0};
- index_priv_t *priv = NULL;
+ char current_index[GF_UUID_BUF_SIZE + 16] = {
+ 0,
+ };
- priv = this->private;
- make_file_path (priv->index_basepath, XATTROP_SUBDIR,
- filename, filepath, sizeof (filepath));
- ret = stat (filepath, &st);
- if (!ret && st.st_nlink == 1)
- unlink (filepath);
+ snprintf(current_index, sizeof current_index, "%s-%s", subdir,
+ uuid_utoa(priv_index));
+ return (!strcmp(filename, current_index));
}
-static int
-index_fill_readdir (fd_t *fd, DIR *dir, off_t off,
- size_t size, gf_dirent_t *entries)
-{
- off_t in_case = -1;
- size_t filled = 0;
- int count = 0;
- char entrybuf[sizeof(struct dirent) + 256 + 8];
- struct dirent *entry = NULL;
- int32_t this_size = -1;
- gf_dirent_t *this_entry = NULL;
- xlator_t *this = NULL;
-
- this = THIS;
- if (!off) {
- rewinddir (dir);
- } else {
- seekdir (dir, off);
- }
-
- while (filled <= size) {
- in_case = telldir (dir);
+static void
+check_delete_stale_index_file(xlator_t *this, char *filename, char *subdir)
+{
+ int ret = 0;
+ struct stat st = {0};
+ char filepath[PATH_MAX] = {0};
+ index_priv_t *priv = NULL;
- if (in_case == -1) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "telldir failed on dir=%p: %s",
- dir, strerror (errno));
- goto out;
- }
+ priv = this->private;
- errno = 0;
- entry = NULL;
- readdir_r (dir, (struct dirent *)entrybuf, &entry);
-
- if (!entry) {
- if (errno == EBADF) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "readdir failed on dir=%p: %s",
- dir, strerror (errno));
- goto out;
- }
- break;
- }
+ if (is_index_file_current(filename, priv->index, subdir))
+ return;
- if (!strncmp (entry->d_name, XATTROP_SUBDIR"-",
- strlen (XATTROP_SUBDIR"-"))) {
- check_delete_stale_index_file (this, entry->d_name);
- continue;
- }
+ make_file_path(priv->index_basepath, subdir, filename, filepath,
+ sizeof(filepath));
+ ret = sys_stat(filepath, &st);
+ if (!ret && st.st_nlink == 1)
+ sys_unlink(filepath);
+}
- this_size = max (sizeof (gf_dirent_t),
- sizeof (gfs3_dirplist))
- + strlen (entry->d_name) + 1;
+static void
+index_set_link_count(index_priv_t *priv, int64_t count,
+ index_xattrop_type_t type)
+{
+ switch (type) {
+ case XATTROP:
+ LOCK(&priv->lock);
+ {
+ priv->pending_count = count;
+ }
+ UNLOCK(&priv->lock);
+ break;
+ default:
+ break;
+ }
+}
- if (this_size + filled > size) {
- seekdir (dir, in_case);
- break;
- }
+static void
+index_get_link_count(index_priv_t *priv, int64_t *count,
+ index_xattrop_type_t type)
+{
+ switch (type) {
+ case XATTROP:
+ LOCK(&priv->lock);
+ {
+ *count = priv->pending_count;
+ }
+ UNLOCK(&priv->lock);
+ break;
+ default:
+ break;
+ }
+}
- this_entry = gf_dirent_for_name (entry->d_name);
+static void
+index_dec_link_count(index_priv_t *priv, index_xattrop_type_t type)
+{
+ switch (type) {
+ case XATTROP:
+ LOCK(&priv->lock);
+ {
+ priv->pending_count--;
+ if (priv->pending_count == 0)
+ priv->pending_count--;
+ }
+ UNLOCK(&priv->lock);
+ break;
+ default:
+ break;
+ }
+}
- if (!this_entry) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "could not create gf_dirent for entry %s: (%s)",
- entry->d_name, strerror (errno));
- goto out;
- }
- this_entry->d_off = telldir (dir);
- this_entry->d_ino = entry->d_ino;
+char *
+index_get_subdir_from_type(index_xattrop_type_t type)
+{
+ if (type < XATTROP || type >= XATTROP_TYPE_END)
+ return NULL;
+ return index_subdirs[type];
+}
- list_add_tail (&this_entry->list, &entries->list);
+char *
+index_get_subdir_from_vgfid(index_priv_t *priv, uuid_t vgfid)
+{
+ return index_get_subdir_from_type(index_get_type_from_vgfid(priv, vgfid));
+}
- filled += this_size;
- count ++;
+static int
+index_fill_readdir(fd_t *fd, index_fd_ctx_t *fctx, DIR *dir, off_t off,
+ size_t size, gf_dirent_t *entries)
+{
+ off_t in_case = -1;
+ off_t last_off = 0;
+ size_t filled = 0;
+ int count = 0;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ int32_t this_size = -1;
+ gf_dirent_t *this_entry = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ if (!off) {
+ rewinddir(dir);
+ } else {
+ seekdir(dir, off);
+#ifndef GF_LINUX_HOST_OS
+ if ((u_long)telldir(dir) != off && off != fctx->dir_eof) {
+ gf_msg(THIS->name, GF_LOG_ERROR, EINVAL,
+ INDEX_MSG_INDEX_READDIR_FAILED,
+ "seekdir(0x%llx) failed on dir=%p: "
+ "Invalid argument (offset reused from "
+ "another DIR * structure?)",
+ off, dir);
+ errno = EINVAL;
+ count = -1;
+ goto out;
}
+#endif /* GF_LINUX_HOST_OS */
+ }
- if ((!readdir (dir) && (errno == 0)))
- /* Indicate EOF */
- errno = ENOENT;
-out:
- return count;
-}
+ while (filled <= size) {
+ in_case = (u_long)telldir(dir);
-int
-index_add (xlator_t *this, uuid_t gfid, const char *subdir)
-{
- int32_t op_errno = 0;
- char gfid_path[PATH_MAX] = {0};
- char index_path[PATH_MAX] = {0};
- int ret = 0;
- uuid_t index = {0};
- index_priv_t *priv = NULL;
- struct stat st = {0};
- int fd = 0;
-
- priv = this->private;
- GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !uuid_is_null (gfid),
- out, op_errno, EINVAL);
-
- make_gfid_path (priv->index_basepath, subdir, gfid,
- gfid_path, sizeof (gfid_path));
-
- ret = stat (gfid_path, &st);
- if (!ret)
- goto out;
- index_get_index (priv, index);
- make_index_path (priv->index_basepath, subdir,
- index, index_path, sizeof (index_path));
- ret = link (index_path, gfid_path);
- if (!ret || (errno == EEXIST)) {
- ret = 0;
- goto out;
+ if (in_case == -1) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno,
+ INDEX_MSG_INDEX_READDIR_FAILED, "telldir failed on dir=%p",
+ dir);
+ goto out;
}
- op_errno = errno;
- if (op_errno == ENOENT) {
- ret = index_dir_create (this, subdir);
- if (ret)
- goto out;
- } else if (op_errno == EMLINK) {
- index_generate_index (priv, index);
- make_index_path (priv->index_basepath, subdir,
- index, index_path, sizeof (index_path));
- } else {
+ errno = 0;
+ entry = sys_readdir(dir, scratch);
+ if (!entry || errno != 0) {
+ if (errno == EBADF) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno,
+ INDEX_MSG_INDEX_READDIR_FAILED,
+ "readdir failed on dir=%p", dir);
goto out;
+ }
+ break;
}
- fd = creat (index_path, 0);
- if ((fd < 0) && (errno != EEXIST)) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR, "%s: Not able to "
- "create index (%s)", uuid_utoa (gfid),
- strerror (errno));
+ if (!strncmp(entry->d_name, XATTROP_SUBDIR "-",
+ strlen(XATTROP_SUBDIR "-"))) {
+ check_delete_stale_index_file(this, entry->d_name, XATTROP_SUBDIR);
+ continue;
+ } else if (!strncmp(entry->d_name, DIRTY_SUBDIR "-",
+ strlen(DIRTY_SUBDIR "-"))) {
+ check_delete_stale_index_file(this, entry->d_name, DIRTY_SUBDIR);
+ continue;
+ }
+
+ this_size = max(sizeof(gf_dirent_t), sizeof(gfs3_dirplist)) +
+ strlen(entry->d_name) + 1;
+
+ if (this_size + filled > size) {
+ seekdir(dir, in_case);
+#ifndef GF_LINUX_HOST_OS
+ if ((u_long)telldir(dir) != in_case && in_case != fctx->dir_eof) {
+ gf_msg(THIS->name, GF_LOG_ERROR, EINVAL,
+ INDEX_MSG_INDEX_READDIR_FAILED,
+ "seekdir(0x%llx) failed on dir=%p: "
+ "Invalid argument (offset reused from "
+ "another DIR * structure?)",
+ in_case, dir);
+ errno = EINVAL;
+ count = -1;
goto out;
+ }
+#endif /* GF_LINUX_HOST_OS */
+ break;
}
- if (fd >= 0)
- close (fd);
+ this_entry = gf_dirent_for_name(entry->d_name);
- ret = link (index_path, gfid_path);
- if (ret && (errno != EEXIST)) {
- gf_log (this->name, GF_LOG_ERROR, "%s: Not able to "
- "add to index (%s)", uuid_utoa (gfid),
- strerror (errno));
- goto out;
+ if (!this_entry) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno,
+ INDEX_MSG_INDEX_READDIR_FAILED,
+ "could not create gf_dirent for entry %s", entry->d_name);
+ goto out;
}
+ /*
+ * we store the offset of next entry here, which is
+ * probably not intended, but code using syncop_readdir()
+ * (glfs-heal.c, afr-self-heald.c, pump.c) rely on it
+ * for directory read resumption.
+ */
+ last_off = (u_long)telldir(dir);
+ this_entry->d_off = last_off;
+ this_entry->d_ino = entry->d_ino;
+
+ list_add_tail(&this_entry->list, &entries->list);
+
+ filled += this_size;
+ count++;
+ }
+
+ errno = 0;
+
+ if ((!sys_readdir(dir, scratch) && (errno == 0))) {
+ /* Indicate EOF */
+ errno = ENOENT;
+ /* Remember EOF offset for later detection */
+ fctx->dir_eof = last_off;
+ }
+out:
+ return count;
+}
+int
+index_link_to_base(xlator_t *this, char *fpath, const char *subdir)
+{
+ int ret = 0;
+ int fd = 0;
+ int op_errno = 0;
+ uuid_t index = {0};
+ index_priv_t *priv = this->private;
+ char base[PATH_MAX] = {0};
+
+ index_get_index(priv, index);
+ make_index_path(priv->index_basepath, subdir, index, base, sizeof(base));
+
+ ret = sys_link(base, fpath);
+ if (!ret || (errno == EEXIST)) {
ret = 0;
+ goto out;
+ }
+
+ op_errno = errno;
+ if (op_errno == ENOENT) {
+ ret = index_dir_create(this, subdir);
+ if (ret) {
+ op_errno = errno;
+ goto out;
+ }
+ } else if (op_errno == EMLINK) {
+ index_generate_index(priv, index);
+ make_index_path(priv->index_basepath, subdir, index, base,
+ sizeof(base));
+ } else {
+ goto out;
+ }
+
+ op_errno = 0;
+ fd = sys_creat(base, 0);
+ if ((fd < 0) && (errno != EEXIST)) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, INDEX_MSG_INDEX_ADD_FAILED,
+ "%s: Not able to "
+ "create index",
+ fpath);
+ goto out;
+ }
+
+ if (fd >= 0)
+ sys_close(fd);
+
+ ret = sys_link(base, fpath);
+ if (ret && (errno != EEXIST)) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, INDEX_MSG_INDEX_ADD_FAILED,
+ "%s: Not able to "
+ "add to index",
+ fpath);
+ goto out;
+ }
out:
- return ret;
+ return -op_errno;
}
int
-index_del (xlator_t *this, uuid_t gfid, const char *subdir)
-{
- int32_t op_errno __attribute__((unused)) = 0;
- index_priv_t *priv = NULL;
- int ret = 0;
- char gfid_path[PATH_MAX] = {0};
-
- priv = this->private;
- GF_ASSERT_AND_GOTO_WITH_ERROR (this->name, !uuid_is_null (gfid),
- out, op_errno, EINVAL);
- make_gfid_path (priv->index_basepath, subdir, gfid,
- gfid_path, sizeof (gfid_path));
- ret = unlink (gfid_path);
- if (ret && (errno != ENOENT)) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s: failed to delete from index (%s)",
- gfid_path, strerror (errno));
- ret = -errno;
- goto out;
- }
- ret = 0;
+index_add(xlator_t *this, uuid_t gfid, const char *subdir,
+ index_xattrop_type_t type)
+{
+ char gfid_path[PATH_MAX] = {0};
+ int ret = -1;
+ index_priv_t *priv = NULL;
+ struct stat st = {0};
+
+ priv = this->private;
+
+ if (gf_uuid_is_null(gfid)) {
+ GF_ASSERT(0);
+ goto out;
+ }
+
+ make_gfid_path(priv->index_basepath, subdir, gfid, gfid_path,
+ sizeof(gfid_path));
+
+ ret = sys_stat(gfid_path, &st);
+ if (!ret)
+ goto out;
+ ret = index_link_to_base(this, gfid_path, subdir);
out:
- return ret;
+ return ret;
}
-void
-_xattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr)
-{
- data_pair_t *trav = NULL;
- gf_boolean_t zero_xattr = _gf_true;
- index_inode_ctx_t *ctx = NULL;
- int ret = 0;
-
- trav = xattr->members_list;
- while (trav && inode) {
- if (mem_0filled ((const char*)trav->value->data,
- trav->value->len)) {
- zero_xattr = _gf_false;
- break;
- }
- trav = trav->next;
- }
- ret = index_inode_ctx_get (inode, this, &ctx);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Not able to %s %s -> index",
- zero_xattr?"add":"del", uuid_utoa (inode->gfid));
- goto out;
- }
- if (zero_xattr) {
- if (ctx->state == NOTIN)
- goto out;
- ret = index_del (this, inode->gfid, XATTROP_SUBDIR);
- if (!ret)
- ctx->state = NOTIN;
- } else {
- if (ctx->state == IN)
- goto out;
- ret = index_add (this, inode->gfid, XATTROP_SUBDIR);
- if (!ret)
- ctx->state = IN;
+int
+index_del(xlator_t *this, uuid_t gfid, const char *subdir, int type)
+{
+ int32_t op_errno __attribute__((unused)) = 0;
+ index_priv_t *priv = NULL;
+ int ret = 0;
+ char gfid_path[PATH_MAX] = {0};
+ char rename_dst[PATH_MAX] = {
+ 0,
+ };
+ uuid_t uuid;
+
+ priv = this->private;
+ GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, !gf_uuid_is_null(gfid), out,
+ op_errno, EINVAL);
+ make_gfid_path(priv->index_basepath, subdir, gfid, gfid_path,
+ sizeof(gfid_path));
+
+ if ((strcmp(subdir, ENTRY_CHANGES_SUBDIR)) == 0) {
+ ret = sys_rmdir(gfid_path);
+ /* rmdir above could fail with ENOTEMPTY if the indices under
+ * it were created when granular-entry-heal was enabled, whereas
+ * the actual heal that happened was non-granular (or full) in
+ * nature, resulting in name indices getting left out. To
+ * clean up this directory without it affecting the IO path perf,
+ * the directory is renamed to a unique name under
+ * indices/entry-changes. Self-heal will pick up this entry
+ * during crawl and on lookup into the file system figure that
+ * the index is stale and subsequently wipe it out using rmdir().
+ */
+ if ((ret) && (errno == ENOTEMPTY)) {
+ gf_uuid_generate(uuid);
+ make_gfid_path(priv->index_basepath, subdir, uuid, rename_dst,
+ sizeof(rename_dst));
+ ret = sys_rename(gfid_path, rename_dst);
}
+ } else {
+ ret = sys_unlink(gfid_path);
+ }
+
+ if (ret && (errno != ENOENT)) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, INDEX_MSG_INDEX_DEL_FAILED,
+ "%s: failed to delete"
+ " from index",
+ gfid_path);
+ ret = -errno;
+ goto out;
+ }
+
+ index_dec_link_count(priv, type);
+ ret = 0;
out:
- return;
+ return ret;
}
-void
-fop_xattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr)
+static gf_boolean_t
+_is_xattr_in_watchlist(dict_t *d, char *k, data_t *v, void *tmp)
{
- _xattrop_index_action (this, inode, xattr);
+ if (!strncmp(k, tmp, strlen(k)))
+ return _gf_true;
+
+ return _gf_false;
}
-void
-fop_fxattrop_index_action (xlator_t *this, inode_t *inode, dict_t *xattr)
+static gf_boolean_t
+is_xattr_in_watchlist(dict_t *this, char *key, data_t *value, void *matchdata)
{
- _xattrop_index_action (this, inode, xattr);
+ int ret = -1;
+
+ // matchdata is a list of xattrs
+ // key is strncmp'ed with each xattr in matchdata.
+ // ret will be 0 if key pattern is not present in the matchdata
+ // else ret will be count number of xattrs the key pattern-matches with.
+ ret = dict_foreach_match(matchdata, _is_xattr_in_watchlist, key,
+ dict_null_foreach_fn, NULL);
+
+ if (ret > 0)
+ return _gf_true;
+ return _gf_false;
}
-inline gf_boolean_t
-index_xattrop_track (loc_t *loc, gf_xattrop_flags_t flags, dict_t *dict)
+static int
+index_find_xattr_type(dict_t *d, char *k, data_t *v)
{
- return (flags == GF_XATTROP_ADD_ARRAY);
+ int idx = -1;
+ index_priv_t *priv = THIS->private;
+
+ if (priv->dirty_watchlist &&
+ is_xattr_in_watchlist(d, k, v, priv->dirty_watchlist))
+ idx = DIRTY;
+ else if (priv->pending_watchlist &&
+ is_xattr_in_watchlist(d, k, v, priv->pending_watchlist))
+ idx = XATTROP;
+
+ return idx;
}
-inline gf_boolean_t
-index_fxattrop_track (fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict)
+int
+index_fill_zero_array(dict_t *d, char *k, data_t *v, void *adata)
{
- return (flags == GF_XATTROP_ADD_ARRAY);
+ int idx = -1;
+ int *zfilled = adata;
+ // zfilled array contains `state` for all types xattrs.
+ // state : whether the gfid file of this file exists in
+ // corresponding xattr directory or not.
+
+ idx = index_find_xattr_type(d, k, v);
+ if (idx == -1)
+ return 0;
+ zfilled[idx] = 0;
+ return 0;
}
-int
-__index_fd_ctx_get (fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx)
+static int
+_check_key_is_zero_filled(dict_t *d, char *k, data_t *v, void *tmp)
{
- int ret = 0;
- index_fd_ctx_t *fctx = NULL;
- uint64_t tmpctx = 0;
- char index_dir[PATH_MAX] = {0};
- index_priv_t *priv = NULL;
+ int *zfilled = tmp;
+ int idx = -1;
- priv = this->private;
- if (uuid_compare (fd->inode->gfid, priv->xattrop_vgfid)) {
- ret = -EINVAL;
- goto out;
- }
+ idx = index_find_xattr_type(d, k, v);
+ if (idx == -1)
+ return 0;
- ret = __fd_ctx_get (fd, this, &tmpctx);
- if (!ret) {
- fctx = (index_fd_ctx_t*) (long) tmpctx;
- goto out;
- }
+ /* Along with checking that the value of a key is zero filled
+ * the key's corresponding index should be assigned
+ * appropriate value.
+ * zfilled[idx] will be 0(false) if value not zero.
+ * will be 1(true) if value is zero.
+ */
+ if (mem_0filled((const char *)v->data, v->len)) {
+ zfilled[idx] = 0;
+ return 0;
+ }
+
+ /* If zfilled[idx] was previously 0, it means at least
+ * one xattr of its "kind" is non-zero. Keep its value
+ * the same.
+ */
+ if (zfilled[idx])
+ zfilled[idx] = 1;
+ return 0;
+}
- fctx = GF_CALLOC (1, sizeof (*fctx), gf_index_fd_ctx_t);
- if (!fctx) {
- ret = -ENOMEM;
- goto out;
+int
+index_entry_create(xlator_t *this, inode_t *inode, char *filename)
+{
+ int ret = -1;
+ int op_errno = 0;
+ char pgfid_path[PATH_MAX] = {0};
+ char entry_path[PATH_MAX] = {0};
+ index_priv_t *priv = NULL;
+ index_inode_ctx_t *ctx = NULL;
+ int32_t len = 0;
+
+ priv = this->private;
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, !gf_uuid_is_null(inode->gfid),
+ out, op_errno, EINVAL);
+ GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, filename, out, op_errno, EINVAL);
+
+ ret = index_inode_ctx_get(inode, this, &ctx);
+ if (ret) {
+ op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ INDEX_MSG_INODE_CTX_GET_SET_FAILED,
+ "Not able to get inode ctx for %s", uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ make_gfid_path(priv->index_basepath, ENTRY_CHANGES_SUBDIR, inode->gfid,
+ pgfid_path, sizeof(pgfid_path));
+
+ if (ctx->state[ENTRY_CHANGES] != IN) {
+ ret = sys_mkdir(pgfid_path, 0600);
+ if (ret != 0 && errno != EEXIST) {
+ op_errno = errno;
+ goto out;
}
+ ctx->state[ENTRY_CHANGES] = IN;
+ }
+
+ if (strchr(filename, '/')) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, INDEX_MSG_INDEX_ADD_FAILED,
+ "Got invalid entry (%s) for pargfid path (%s)", filename,
+ pgfid_path);
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ len = snprintf(entry_path, sizeof(entry_path), "%s/%s", pgfid_path,
+ filename);
+ if ((len < 0) || (len >= sizeof(entry_path))) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_errno = 0;
+
+ ret = index_link_to_base(this, entry_path, ENTRY_CHANGES_SUBDIR);
+out:
+ if (op_errno)
+ ret = -op_errno;
+ return ret;
+}
- make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR,
- index_dir, sizeof (index_dir));
- fctx->dir = opendir (index_dir);
- if (!fctx->dir) {
- ret = -errno;
- GF_FREE (fctx);
- fctx = NULL;
- goto out;
- }
+int
+index_entry_delete(xlator_t *this, uuid_t pgfid, char *filename)
+{
+ int ret = 0;
+ int op_errno = 0;
+ char pgfid_path[PATH_MAX] = {0};
+ char entry_path[PATH_MAX] = {0};
+ index_priv_t *priv = NULL;
+ int32_t len = 0;
+
+ priv = this->private;
+
+ GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, !gf_uuid_is_null(pgfid), out,
+ op_errno, EINVAL);
+ GF_ASSERT_AND_GOTO_WITH_ERROR(this->name, filename, out, op_errno, EINVAL);
+
+ make_gfid_path(priv->index_basepath, ENTRY_CHANGES_SUBDIR, pgfid,
+ pgfid_path, sizeof(pgfid_path));
+
+ if (strchr(filename, '/')) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, INDEX_MSG_INDEX_DEL_FAILED,
+ "Got invalid entry (%s) for pargfid path (%s)", filename,
+ pgfid_path);
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ len = snprintf(entry_path, sizeof(entry_path), "%s/%s", pgfid_path,
+ filename);
+ if ((len < 0) || (len >= sizeof(entry_path))) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = sys_unlink(entry_path);
+ if (ret && (errno != ENOENT)) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, INDEX_MSG_INDEX_DEL_FAILED,
+ "%s: failed to delete from index/entry-changes", entry_path);
+ }
- ret = __fd_ctx_set (fd, this, (uint64_t)(long)fctx);
- if (ret) {
- GF_FREE (fctx);
- fctx = NULL;
- ret = -EINVAL;
- goto out;
+out:
+ return -op_errno;
+}
+
+int
+index_entry_action(xlator_t *this, inode_t *inode, dict_t *xdata, char *key)
+{
+ int ret = 0;
+ char *filename = NULL;
+
+ ret = dict_get_str(xdata, key, &filename);
+ if (ret != 0) {
+ ret = 0;
+ goto out;
+ }
+
+ if (strcmp(key, GF_XATTROP_ENTRY_IN_KEY) == 0)
+ ret = index_entry_create(this, inode, filename);
+ else if (strcmp(key, GF_XATTROP_ENTRY_OUT_KEY) == 0)
+ ret = index_entry_delete(this, inode->gfid, filename);
+
+out:
+ return ret;
+}
+
+void
+_index_action(xlator_t *this, inode_t *inode, int *zfilled)
+{
+ int ret = 0;
+ int i = 0;
+ index_inode_ctx_t *ctx = NULL;
+ char *subdir = NULL;
+
+ ret = index_inode_ctx_get(inode, this, &ctx);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ INDEX_MSG_INODE_CTX_GET_SET_FAILED,
+ "Not able to get"
+ " inode context for %s.",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ for (i = 0; i < XATTROP_TYPE_END; i++) {
+ subdir = index_get_subdir_from_type(i);
+ if (zfilled[i] == 1) {
+ if (ctx->state[i] == NOTIN)
+ continue;
+ ret = index_del(this, inode->gfid, subdir, i);
+ if (!ret)
+ ctx->state[i] = NOTIN;
+ } else if (zfilled[i] == 0) {
+ if (ctx->state[i] == IN)
+ continue;
+ ret = index_add(this, inode->gfid, subdir, i);
+ if (!ret)
+ ctx->state[i] = IN;
}
+ }
+out:
+ return;
+}
+
+static void
+index_init_state(xlator_t *this, inode_t *inode, index_inode_ctx_t *ctx,
+ char *subdir)
+{
+ int ret = -1;
+ char pgfid_path[PATH_MAX] = {0};
+ struct stat st = {0};
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ make_gfid_path(priv->index_basepath, subdir, inode->gfid, pgfid_path,
+ sizeof(pgfid_path));
+
+ ret = sys_stat(pgfid_path, &st);
+ if (ret == 0)
+ ctx->state[ENTRY_CHANGES] = IN;
+ else if (ret != 0 && errno == ENOENT)
+ ctx->state[ENTRY_CHANGES] = NOTIN;
+
+ return;
+}
+
+void
+xattrop_index_action(xlator_t *this, index_local_t *local, dict_t *xattr,
+ dict_match_t match, void *match_data)
+{
+ int ret = 0;
+ int zfilled[XATTROP_TYPE_END] = {
+ 0,
+ };
+ int8_t value = 0;
+ char *subdir = NULL;
+ dict_t *req_xdata = NULL;
+ inode_t *inode = NULL;
+ index_inode_ctx_t *ctx = NULL;
+
+ inode = local->inode;
+ req_xdata = local->xdata;
+
+ memset(zfilled, -1, sizeof(zfilled));
+ ret = dict_foreach_match(xattr, match, match_data,
+ _check_key_is_zero_filled, zfilled);
+ _index_action(this, inode, zfilled);
+
+ if (req_xdata) {
+ ret = index_entry_action(this, inode, req_xdata,
+ GF_XATTROP_ENTRY_OUT_KEY);
+
+ ret = dict_get_int8(req_xdata, GF_XATTROP_PURGE_INDEX, &value);
+ if ((ret) || (value == 0))
+ goto out;
+ }
+
+ if (zfilled[XATTROP] != 1)
+ goto out;
+
+ if (inode->ia_type != IA_IFDIR)
+ goto out;
+
+ subdir = index_get_subdir_from_type(ENTRY_CHANGES);
+ ret = index_inode_ctx_get(inode, this, &ctx);
+ if (ctx->state[ENTRY_CHANGES] == UNKNOWN)
+ index_init_state(this, inode, ctx, subdir);
+ if (ctx->state[ENTRY_CHANGES] == IN) {
+ ret = index_del(this, inode->gfid, subdir, ENTRY_CHANGES);
+ ctx->state[ENTRY_CHANGES] = NOTIN;
+ }
+
out:
- if (fctx)
- *ctx = fctx;
- return ret;
+ return;
+}
+
+static gf_boolean_t
+index_xattrop_track(xlator_t *this, gf_xattrop_flags_t flags, dict_t *dict)
+{
+ index_priv_t *priv = this->private;
+
+ if (flags == GF_XATTROP_ADD_ARRAY)
+ return _gf_true;
+
+ if (flags != GF_XATTROP_ADD_ARRAY64)
+ return _gf_false;
+
+ if (!priv->pending_watchlist)
+ return _gf_false;
+
+ if (dict_foreach_match(dict, is_xattr_in_watchlist, priv->pending_watchlist,
+ dict_null_foreach_fn, NULL) > 0)
+ return _gf_true;
+
+ return _gf_false;
}
int
-index_fd_ctx_get (fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx)
+index_inode_path(xlator_t *this, inode_t *inode, char *dirpath, size_t len)
{
- int ret = 0;
- LOCK (&fd->lock);
- {
- ret = __index_fd_ctx_get (fd, this, ctx);
+ char *subdir = NULL;
+ int ret = 0;
+ index_priv_t *priv = NULL;
+ index_inode_ctx_t *ictx = NULL;
+
+ priv = this->private;
+ if (!index_is_fop_on_internal_inode(this, inode, NULL)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ subdir = index_get_subdir_from_vgfid(priv, inode->gfid);
+ if (subdir) {
+ if (len <= strlen(priv->index_basepath) + 1 /*'/'*/ + strlen(subdir)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ make_index_dir_path(priv->index_basepath, subdir, dirpath, len);
+ } else {
+ ret = index_inode_ctx_get(inode, this, &ictx);
+ if (ret)
+ goto out;
+ if (gf_uuid_is_null(ictx->virtual_pargfid)) {
+ ret = -EINVAL;
+ goto out;
+ }
+ make_index_dir_path(priv->index_basepath, ENTRY_CHANGES_SUBDIR, dirpath,
+ len);
+ if (len <= strlen(dirpath) + 1 /*'/'*/ + SLEN(UUID0_STR)) {
+ ret = -EINVAL;
+ goto out;
}
- UNLOCK (&fd->lock);
- return ret;
+ strcat(dirpath, "/");
+ strcat(dirpath, uuid_utoa(ictx->virtual_pargfid));
+ }
+out:
+ return ret;
+}
+
+int
+__index_fd_ctx_get(fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx)
+{
+ int ret = 0;
+ index_fd_ctx_t *fctx = NULL;
+ uint64_t tmpctx = 0;
+ char dirpath[PATH_MAX] = {0};
+
+ ret = __fd_ctx_get(fd, this, &tmpctx);
+ if (!ret) {
+ fctx = (index_fd_ctx_t *)(long)tmpctx;
+ *ctx = fctx;
+ goto out;
+ }
+
+ ret = index_inode_path(this, fd->inode, dirpath, sizeof(dirpath));
+ if (ret)
+ goto out;
+
+ fctx = GF_CALLOC(1, sizeof(*fctx), gf_index_fd_ctx_t);
+ if (!fctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fctx->dir = sys_opendir(dirpath);
+ if (!fctx->dir) {
+ ret = -errno;
+ GF_FREE(fctx);
+ fctx = NULL;
+ goto out;
+ }
+ fctx->dir_eof = -1;
+
+ ret = __fd_ctx_set(fd, this, (uint64_t)(long)fctx);
+ if (ret) {
+ (void)sys_closedir(fctx->dir);
+ GF_FREE(fctx);
+ fctx = NULL;
+ ret = -EINVAL;
+ goto out;
+ }
+ *ctx = fctx;
+out:
+ return ret;
}
-//new - Not NULL means start a fop
-//new - NULL means done processing the fop
+int
+index_fd_ctx_get(fd_t *fd, xlator_t *this, index_fd_ctx_t **ctx)
+{
+ int ret = 0;
+ LOCK(&fd->lock);
+ {
+ ret = __index_fd_ctx_get(fd, this, ctx);
+ }
+ UNLOCK(&fd->lock);
+ return ret;
+}
+
+// new - Not NULL means start a fop
+// new - NULL means done processing the fop
void
-index_queue_process (xlator_t *this, inode_t *inode, call_stub_t *new)
+index_queue_process(xlator_t *this, inode_t *inode, call_stub_t *new)
{
- call_stub_t *stub = NULL;
- index_inode_ctx_t *ctx = NULL;
- int ret = 0;
- call_frame_t *frame = NULL;
+ call_stub_t *stub = NULL;
+ index_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+ call_frame_t *frame = NULL;
+
+ LOCK(&inode->lock);
+ {
+ ret = __index_inode_ctx_get(inode, this, &ctx);
+ if (ret)
+ goto unlock;
- LOCK (&inode->lock);
- {
- ret = __index_inode_ctx_get (inode, this, &ctx);
- if (ret)
- goto unlock;
-
- if (new) {
- __index_enqueue (&ctx->callstubs, new);
- new = NULL;
- } else {
- ctx->processing = _gf_false;
- }
+ if (new) {
+ __index_enqueue(&ctx->callstubs, new);
+ new = NULL;
+ } else {
+ ctx->processing = _gf_false;
+ }
- if (!ctx->processing) {
- stub = __index_dequeue (&ctx->callstubs);
- if (stub)
- ctx->processing = _gf_true;
- else
- ctx->processing = _gf_false;
- }
+ if (!ctx->processing) {
+ stub = __index_dequeue(&ctx->callstubs);
+ if (stub)
+ ctx->processing = _gf_true;
+ else
+ ctx->processing = _gf_false;
}
+ }
unlock:
- UNLOCK (&inode->lock);
-
- if (ret && new) {
- frame = new->frame;
- if (new->fop == GF_FOP_XATTROP) {
- INDEX_STACK_UNWIND (xattrop, frame, -1, ENOMEM,
- NULL, NULL);
- } else if (new->fop == GF_FOP_FXATTROP) {
- INDEX_STACK_UNWIND (fxattrop, frame, -1, ENOMEM,
- NULL, NULL);
- }
- call_stub_destroy (new);
- } else if (stub) {
- call_resume (stub);
+ UNLOCK(&inode->lock);
+
+ if (ret && new) {
+ frame = new->frame;
+ if (new->fop == GF_FOP_XATTROP) {
+ INDEX_STACK_UNWIND(xattrop, frame, -1, ENOMEM, NULL, NULL);
+ } else if (new->fop == GF_FOP_FXATTROP) {
+ INDEX_STACK_UNWIND(fxattrop, frame, -1, ENOMEM, NULL, NULL);
}
- return;
+ call_stub_destroy(new);
+ } else if (stub) {
+ call_resume(stub);
+ }
+ return;
}
-int32_t
-index_xattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xattr, dict_t *xdata)
+static int
+xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xattr, dict_t *xdata, dict_match_t match,
+ dict_t *matchdata)
{
- inode_t *inode = NULL;
+ inode_t *inode = NULL;
+ index_local_t *local = NULL;
- inode = inode_ref (frame->local);
- if (op_ret < 0)
- goto out;
- fop_xattrop_index_action (this, frame->local, xattr);
+ local = frame->local;
+ inode = inode_ref(local->inode);
+
+ if (op_ret < 0)
+ goto out;
+
+ xattrop_index_action(this, local, xattr, match, matchdata);
out:
- INDEX_STACK_UNWIND (xattrop, frame, op_ret, op_errno, xattr, xdata);
- index_queue_process (this, inode, NULL);
- inode_unref (inode);
+ INDEX_STACK_UNWIND(xattrop, frame, op_ret, op_errno, xattr, xdata);
+ index_queue_process(this, inode, NULL);
+ inode_unref(inode);
- return 0;
+ return 0;
}
int32_t
-index_fxattrop_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+index_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xattr,
+ dict_t *xdata)
+{
+ index_priv_t *priv = this->private;
+
+ xattrop_cbk(frame, cookie, this, op_ret, op_errno, xattr, xdata,
+ is_xattr_in_watchlist, priv->complete_watchlist);
+ return 0;
+}
+
+int32_t
+index_xattrop64_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int32_t op_ret, int32_t op_errno, dict_t *xattr,
dict_t *xdata)
{
- inode_t *inode = NULL;
-
- inode = inode_ref (frame->local);
- if (op_ret < 0)
- goto out;
+ index_priv_t *priv = this->private;
- fop_fxattrop_index_action (this, frame->local, xattr);
-out:
- INDEX_STACK_UNWIND (fxattrop, frame, op_ret, op_errno, xattr, xdata);
- index_queue_process (this, inode, NULL);
- inode_unref (inode);
+ return xattrop_cbk(frame, cookie, this, op_ret, op_errno, xattr, xdata,
+ is_xattr_in_watchlist, priv->pending_watchlist);
+}
- return 0;
+void
+index_xattrop_do(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ int ret = -1;
+ int zfilled[XATTROP_TYPE_END] = {
+ 0,
+ };
+ index_local_t *local = NULL;
+ fop_xattrop_cbk_t x_cbk = NULL;
+
+ local = frame->local;
+
+ if (optype == GF_XATTROP_ADD_ARRAY)
+ x_cbk = index_xattrop_cbk;
+ else
+ x_cbk = index_xattrop64_cbk;
+
+ // In wind phase bring the gfid into index. This way if the brick crashes
+ // just after posix performs xattrop before _cbk reaches index xlator
+ // we will still have the gfid in index.
+ memset(zfilled, -1, sizeof(zfilled));
+
+ /* Foreach xattr, set corresponding index of zfilled to 1
+ * zfilled[index] = 1 implies the xattr's value is zero filled
+ * and should be added in its corresponding subdir.
+ *
+ * zfilled should be set to 1 only for those index that
+ * exist in xattr variable. This is to distinguish
+ * between different types of volumes.
+ * For e.g., if the check is not made,
+ * zfilled[DIRTY] is set to 1 for EC volumes,
+ * index file will be tried to create in indices/dirty dir
+ * which doesn't exist for an EC volume.
+ */
+ ret = dict_foreach(xattr, index_fill_zero_array, zfilled);
+
+ _index_action(this, local->inode, zfilled);
+ if (xdata)
+ ret = index_entry_action(this, local->inode, xdata,
+ GF_XATTROP_ENTRY_IN_KEY);
+ if (ret < 0) {
+ x_cbk(frame, NULL, this, -1, -ret, NULL, NULL);
+ return;
+ }
+
+ if (loc)
+ STACK_WIND(frame, x_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, optype, xattr, xdata);
+ else
+ STACK_WIND(frame, x_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, optype, xattr, xdata);
}
int
-index_xattrop_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+index_xattrop_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- STACK_WIND (frame, index_xattrop_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->xattrop, loc, optype, xattr,
- xdata);
- return 0;
+ index_xattrop_do(frame, this, loc, NULL, optype, xattr, xdata);
+ return 0;
}
int
-index_fxattrop_wrapper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+index_fxattrop_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
{
- STACK_WIND (frame, index_fxattrop_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fxattrop, fd, optype, xattr,
- xdata);
- return 0;
+ index_xattrop_do(frame, this, NULL, fd, optype, xattr, xdata);
+ return 0;
}
int32_t
-index_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+index_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- call_stub_t *stub = NULL;
-
- if (!index_xattrop_track (loc, flags, dict))
- goto out;
+ call_stub_t *stub = NULL;
+ index_local_t *local = NULL;
+
+ if (!index_xattrop_track(this, flags, dict))
+ goto out;
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ local->inode = inode_ref(loc->inode);
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ stub = fop_xattrop_stub(frame, index_xattrop_wrapper, loc, flags, dict,
+ xdata);
+
+err:
+ if ((!local) || (!stub)) {
+ INDEX_STACK_UNWIND(xattrop, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
- frame->local = inode_ref (loc->inode);
- stub = fop_xattrop_stub (frame, index_xattrop_wrapper,
- loc, flags, dict, xdata);
- if (!stub) {
- INDEX_STACK_UNWIND (xattrop, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
+ index_queue_process(this, loc->inode, stub);
+ return 0;
+out:
+ STACK_WIND(frame, default_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata);
+ return 0;
+}
- index_queue_process (this, loc->inode, stub);
+int32_t
+index_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ index_local_t *local = NULL;
+
+ if (!index_xattrop_track(this, flags, dict))
+ goto out;
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ local->inode = inode_ref(fd->inode);
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ stub = fop_fxattrop_stub(frame, index_fxattrop_wrapper, fd, flags, dict,
+ xdata);
+
+err:
+ if ((!local) || (!stub)) {
+ INDEX_STACK_UNWIND(fxattrop, frame, -1, ENOMEM, NULL, xdata);
return 0;
+ }
+
+ index_queue_process(this, fd->inode, stub);
+ return 0;
out:
- STACK_WIND (frame, default_xattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata);
- return 0;
+ STACK_WIND(frame, default_fxattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata);
+ return 0;
}
-int32_t
-index_fxattrop (call_frame_t *frame, xlator_t *this, fd_t *fd,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+uint64_t
+index_entry_count(xlator_t *this, char *subdir)
{
- call_stub_t *stub = NULL;
+ uint64_t count = 0;
+ index_priv_t *priv = NULL;
+ DIR *dirp = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char index_dir[PATH_MAX] = {
+ 0,
+ };
- if (!index_fxattrop_track (fd, flags, dict))
- goto out;
+ priv = this->private;
- frame->local = inode_ref (fd->inode);
- stub = fop_fxattrop_stub (frame, index_fxattrop_wrapper,
- fd, flags, dict, xdata);
- if (!stub) {
- INDEX_STACK_UNWIND (fxattrop, frame, -1, ENOMEM, NULL, xdata);
- return 0;
- }
+ make_index_dir_path(priv->index_basepath, subdir, index_dir,
+ sizeof(index_dir));
- index_queue_process (this, fd->inode, stub);
- return 0;
-out:
- STACK_WIND (frame, default_fxattrop_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata);
+ dirp = sys_opendir(index_dir);
+ if (!dirp)
return 0;
+
+ for (;;) {
+ errno = 0;
+ entry = sys_readdir(dirp, scratch);
+ if (!entry || errno != 0)
+ break;
+
+ if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
+ continue;
+
+ if (!strncmp(entry->d_name, subdir, strlen(subdir)))
+ continue;
+
+ count++;
+ }
+
+ (void)sys_closedir(dirp);
+
+ return count;
}
int32_t
-index_getxattr_wrapper (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
+index_getxattr_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- index_priv_t *priv = NULL;
- dict_t *xattr = NULL;
- int ret = 0;
+ index_priv_t *priv = NULL;
+ dict_t *xattr = NULL;
+ int ret = 0;
+ int vgfid_type = 0;
+ uint64_t count = 0;
+
+ priv = this->private;
+
+ xattr = dict_new();
+ if (!xattr) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ vgfid_type = index_get_type_from_vgfid_xattr(name);
+ if (vgfid_type >= 0) {
+ ret = dict_set_static_bin(xattr, (char *)name,
+ priv->internal_vgfid[vgfid_type],
+ sizeof(priv->internal_vgfid[vgfid_type]));
+ if (ret) {
+ ret = -EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, INDEX_MSG_DICT_SET_FAILED,
+ "xattrop index "
+ "gfid set failed");
+ goto done;
+ }
+ }
- priv = this->private;
+ /* TODO: Need to check what kind of link-counts are needed for
+ * ENTRY-CHANGES before refactor of this block with array*/
+ if (strcmp(name, GF_XATTROP_INDEX_COUNT) == 0) {
+ count = index_entry_count(this, XATTROP_SUBDIR);
- xattr = dict_new ();
- if (!xattr) {
- ret = -ENOMEM;
- goto done;
+ ret = dict_set_uint64(xattr, (char *)name, count);
+ if (ret) {
+ ret = -EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, INDEX_MSG_DICT_SET_FAILED,
+ "xattrop index "
+ "count set failed");
+ goto done;
}
+ } else if (strcmp(name, GF_XATTROP_DIRTY_COUNT) == 0) {
+ count = index_entry_count(this, DIRTY_SUBDIR);
- ret = dict_set_static_bin (xattr, (char*)name, priv->xattrop_vgfid,
- sizeof (priv->xattrop_vgfid));
+ ret = dict_set_uint64(xattr, (char *)name, count);
if (ret) {
- ret = -ENOMEM;
- gf_log (THIS->name, GF_LOG_ERROR, "xattrop index "
- "gfid set failed");
- goto done;
+ ret = -EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, INDEX_MSG_DICT_SET_FAILED,
+ "dirty index "
+ "count set failed");
+ goto done;
}
+ }
done:
- if (ret)
- STACK_UNWIND_STRICT (getxattr, frame, -1, -ret, xattr, xdata);
- else
- STACK_UNWIND_STRICT (getxattr, frame, 0, 0, xattr, xdata);
+ if (ret)
+ STACK_UNWIND_STRICT(getxattr, frame, -1, -ret, xattr, NULL);
+ else
+ STACK_UNWIND_STRICT(getxattr, frame, 0, 0, xattr, NULL);
- if (xattr)
- dict_unref (xattr);
+ if (xattr)
+ dict_unref(xattr);
+ return 0;
+}
+
+static int
+index_save_pargfid_for_entry_changes(xlator_t *this, loc_t *loc, char *path)
+{
+ index_priv_t *priv = NULL;
+ index_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ if (!loc)
+ return -1;
+ if (gf_uuid_compare(loc->pargfid, priv->internal_vgfid[ENTRY_CHANGES]))
return 0;
+
+ ret = index_inode_ctx_get(loc->inode, this, &ctx);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ INDEX_MSG_INODE_CTX_GET_SET_FAILED,
+ "Unable to get inode context for %s", path);
+ return -EINVAL;
+ }
+ ret = gf_uuid_parse(loc->name, ctx->virtual_pargfid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ INDEX_MSG_INODE_CTX_GET_SET_FAILED,
+ "Unable to store "
+ "virtual gfid in inode context for %s",
+ path);
+ return -EINVAL;
+ }
+ return 0;
}
int32_t
-index_lookup_wrapper (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
-{
- index_priv_t *priv = NULL;
- struct stat lstatbuf = {0};
- int ret = 0;
- int32_t op_errno = EINVAL;
- int32_t op_ret = -1;
- char path[PATH_MAX] = {0};
- struct iatt stbuf = {0, };
- struct iatt postparent = {0,};
- dict_t *xattr = NULL;
- gf_boolean_t is_dir = _gf_false;
-
- priv = this->private;
-
- VALIDATE_OR_GOTO (loc, done);
- if (!uuid_compare (loc->gfid, priv->xattrop_vgfid)) {
- make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR,
- path, sizeof (path));
- is_dir = _gf_true;
- } else if (!uuid_compare (loc->pargfid, priv->xattrop_vgfid)) {
- make_file_path (priv->index_basepath, XATTROP_SUBDIR,
- loc->name, path, sizeof (path));
+index_lookup_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xattr_req)
+{
+ index_priv_t *priv = NULL;
+ struct stat lstatbuf = {0};
+ int ret = 0;
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ uint64_t val = IA_INVAL;
+ char path[PATH_MAX] = {0};
+ struct iatt stbuf = {
+ 0,
+ };
+ struct iatt postparent = {
+ 0,
+ };
+ dict_t *xattr = NULL;
+ gf_boolean_t is_dir = _gf_false;
+ char *subdir = NULL;
+ loc_t iloc = {0};
+
+ priv = this->private;
+ loc_copy(&iloc, loc);
+
+ VALIDATE_OR_GOTO(loc, done);
+ if (index_is_fop_on_internal_inode(this, loc->parent, loc->pargfid)) {
+ subdir = index_get_subdir_from_vgfid(priv, loc->pargfid);
+ ret = index_inode_path(this, loc->parent, path, sizeof(path));
+ if (ret < 0) {
+ op_errno = -ret;
+ goto done;
}
+ ret = snprintf(path + strlen(path), PATH_MAX - strlen(path), "/%s",
+ loc->name);
- ret = lstat (path, &lstatbuf);
+ if ((ret < 0) || (ret > (PATH_MAX - strlen(path)))) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto done;
+ }
+
+ } else if (index_is_virtual_gfid(priv, loc->gfid)) {
+ subdir = index_get_subdir_from_vgfid(priv, loc->gfid);
+ make_index_dir_path(priv->index_basepath, subdir, path, sizeof(path));
+ is_dir = _gf_true;
+
+ if ((xattr_req) && (dict_get(xattr_req, GF_INDEX_IA_TYPE_GET_REQ))) {
+ if (0 == strcmp(subdir, index_get_subdir_from_type(ENTRY_CHANGES)))
+ val = IA_IFDIR;
+ else
+ val = IA_IFREG;
+ }
+ } else {
+ if (!inode_is_linked(loc->inode)) {
+ inode_unref(iloc.inode);
+ iloc.inode = inode_find(loc->inode->table, loc->gfid);
+ }
+ ret = index_inode_path(this, iloc.inode, path, sizeof(path));
+ if (ret < 0) {
+ op_errno = -ret;
+ goto done;
+ }
+ }
+ ret = sys_lstat(path, &lstatbuf);
+ if (ret) {
+ gf_msg_debug(this->name, errno, "Stat failed on %s dir ", path);
+ op_errno = errno;
+ goto done;
+ } else if (!S_ISDIR(lstatbuf.st_mode) && is_dir) {
+ op_errno = ENOTDIR;
+ gf_msg_debug(this->name, op_errno,
+ "Stat failed on %s dir, "
+ "not a directory",
+ path);
+ goto done;
+ }
+ xattr = dict_new();
+ if (!xattr) {
+ op_errno = ENOMEM;
+ goto done;
+ }
+
+ if (val != IA_INVAL) {
+ ret = dict_set_uint64(xattr, GF_INDEX_IA_TYPE_GET_RSP, val);
if (ret) {
- gf_log (this->name, GF_LOG_DEBUG, "Stat failed on index dir "
- "(%s)", strerror (errno));
- op_errno = errno;
- goto done;
- } else if (!S_ISDIR (lstatbuf.st_mode) && is_dir) {
- gf_log (this->name, GF_LOG_DEBUG, "Stat failed on index dir, "
- "not a directory");
- op_errno = ENOENT;
- goto done;
+ op_ret = -1;
+ op_errno = -ret;
+ goto done;
+ }
+ }
+
+ iatt_from_stat(&stbuf, &lstatbuf);
+ if (is_dir || inode_is_linked(iloc.inode))
+ loc_gfid(&iloc, stbuf.ia_gfid);
+ else
+ gf_uuid_generate(stbuf.ia_gfid);
+
+ ret = index_save_pargfid_for_entry_changes(this, &iloc, path);
+ if (ret) {
+ op_ret = -1;
+ op_errno = -ret;
+ goto done;
+ }
+
+ stbuf.ia_ino = -1;
+ op_ret = 0;
+done:
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno,
+ loc ? loc->inode : NULL, &stbuf, xattr, &postparent);
+ if (xattr)
+ dict_unref(xattr);
+ loc_wipe(&iloc);
+ return 0;
+}
+
+int
+index_get_gfid_type(void *opaque)
+{
+ gf_dirent_t *entry = NULL;
+ xlator_t *this = THIS;
+ struct index_syncop_args *args = opaque;
+ loc_t loc = {0};
+ struct iatt iatt = {0};
+ int ret = 0;
+
+ list_for_each_entry(entry, &args->entries->list, list)
+ {
+ if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
+ continue;
+
+ loc_wipe(&loc);
+
+ entry->d_type = gf_d_type_from_ia_type(IA_INVAL);
+ entry->d_stat.ia_type = IA_INVAL;
+ if (gf_uuid_parse(entry->d_name, loc.gfid))
+ continue;
+
+ loc.inode = inode_find(args->parent->table, loc.gfid);
+ if (loc.inode) {
+ entry->d_stat.ia_type = loc.inode->ia_type;
+ entry->d_type = gf_d_type_from_ia_type(loc.inode->ia_type);
+ continue;
}
- xattr = dict_new ();
- if (!xattr) {
- op_errno = ENOMEM;
- goto done;
+ loc.inode = inode_new(args->parent->table);
+ if (!loc.inode)
+ continue;
+ ret = syncop_lookup(FIRST_CHILD(this), &loc, &iatt, 0, 0, 0);
+ if (ret == 0) {
+ entry->d_type = gf_d_type_from_ia_type(iatt.ia_type);
+ entry->d_stat = iatt;
}
+ }
+ loc_wipe(&loc);
- iatt_from_stat (&stbuf, &lstatbuf);
- if (is_dir)
- uuid_copy (stbuf.ia_gfid, priv->xattrop_vgfid);
- else
- uuid_generate (stbuf.ia_gfid);
- stbuf.ia_ino = -1;
- op_ret = 0;
-done:
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno,
- loc->inode, &stbuf, xattr, &postparent);
- if (xattr)
- dict_unref (xattr);
- return 0;
+ return 0;
}
int32_t
-index_readdir_wrapper (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off, dict_t *xdata)
+index_readdir_wrapper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *xdata)
{
- index_fd_ctx_t *fctx = NULL;
- DIR *dir = NULL;
- int ret = -1;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int count = 0;
- gf_dirent_t entries;
+ index_fd_ctx_t *fctx = NULL;
+ index_priv_t *priv = NULL;
+ DIR *dir = NULL;
+ int ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int count = 0;
+ gf_dirent_t entries;
+ struct index_syncop_args args = {0};
+
+ priv = this->private;
+ INIT_LIST_HEAD(&entries.list);
+
+ ret = index_fd_ctx_get(fd, this, &fctx);
+ if (ret < 0) {
+ op_errno = -ret;
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, INDEX_MSG_FD_OP_FAILED,
+ "pfd is NULL, fd=%p", fd);
+ goto done;
+ }
+
+ dir = fctx->dir;
+ if (!dir) {
+ op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ INDEX_MSG_INDEX_READDIR_FAILED, "dir is NULL for fd=%p", fd);
+ goto done;
+ }
+
+ count = index_fill_readdir(fd, fctx, dir, off, size, &entries);
+
+ /* pick ENOENT to indicate EOF */
+ op_errno = errno;
+ op_ret = count;
+ if (index_is_virtual_gfid(priv, fd->inode->gfid) && xdata &&
+ dict_get(xdata, "get-gfid-type")) {
+ args.parent = fd->inode;
+ args.entries = &entries;
+ ret = synctask_new(this->ctx->env, index_get_gfid_type, NULL, NULL,
+ &args);
+ }
+done:
+ STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, &entries, NULL);
+ gf_dirent_free(&entries);
+ return 0;
+}
- INIT_LIST_HEAD (&entries.list);
+int
+deletion_handler(const char *fpath, const struct stat *sb, int typeflag,
+ struct FTW *ftwbuf)
+{
+ ia_type_t type = IA_INVAL;
+
+ switch (sb->st_mode & S_IFMT) {
+ case S_IFREG:
+ sys_unlink(fpath);
+ break;
+
+ case S_IFDIR:
+ sys_rmdir(fpath);
+ break;
+ default:
+ type = ia_type_from_st_mode(sb->st_mode);
+ gf_msg(THIS->name, GF_LOG_WARNING, EINVAL, INDEX_MSG_INVALID_ARGS,
+ "%s neither a regular file nor a directory - type:%s", fpath,
+ gf_inode_type_to_str(type));
+ break;
+ }
+ return 0;
+}
- ret = index_fd_ctx_get (fd, this, &fctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "pfd is NULL, fd=%p", fd);
- op_errno = -ret;
- goto done;
- }
+static int
+index_wipe_index_subdir(void *opaque)
+{
+ struct index_syncop_args *args = opaque;
- dir = fctx->dir;
+ nftw(args->path, deletion_handler, 1, FTW_DEPTH | FTW_PHYS);
+ return 0;
+}
- if (!dir) {
- gf_log (this->name, GF_LOG_WARNING,
- "dir is NULL for fd=%p", fd);
- op_errno = EINVAL;
- goto done;
- }
+static void
+index_get_parent_iatt(struct iatt *parent, char *path, loc_t *loc,
+ int32_t *op_ret, int32_t *op_errno)
+{
+ int ret = -1;
+ struct stat lstatbuf = {
+ 0,
+ };
+
+ ret = sys_lstat(path, &lstatbuf);
+ if (ret < 0) {
+ *op_ret = -1;
+ *op_errno = errno;
+ return;
+ }
- count = index_fill_readdir (fd, dir, off, size, &entries);
+ iatt_from_stat(parent, &lstatbuf);
+ gf_uuid_copy(parent->ia_gfid, loc->pargfid);
+ parent->ia_ino = -1;
- /* pick ENOENT to indicate EOF */
- op_errno = errno;
- op_ret = count;
-done:
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, &entries, xdata);
- gf_dirent_free (&entries);
- return 0;
+ return;
}
int
-index_unlink_wrapper (call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
- dict_t *xdata)
-{
- index_priv_t *priv = NULL;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- int ret = 0;
- struct iatt preparent = {0};
- struct iatt postparent = {0};
- char index_dir[PATH_MAX] = {0};
- struct stat lstatbuf = {0};
- uuid_t gfid = {0};
-
- priv = this->private;
- make_index_dir_path (priv->index_basepath, XATTROP_SUBDIR,
- index_dir, sizeof (index_dir));
- ret = lstat (index_dir, &lstatbuf);
+index_rmdir_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ int ret = 0;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ char *subdir = NULL;
+ char index_dir[PATH_MAX] = {0};
+ char index_subdir[PATH_MAX] = {0};
+ uuid_t gfid = {0};
+ struct iatt preparent = {0};
+ struct iatt postparent = {0};
+ index_priv_t *priv = NULL;
+ index_xattrop_type_t type = XATTROP_TYPE_UNSET;
+ struct index_syncop_args args = {
+ 0,
+ };
+
+ priv = this->private;
+
+ type = index_get_type_from_vgfid(priv, loc->pargfid);
+ subdir = index_get_subdir_from_vgfid(priv, loc->pargfid);
+ make_index_dir_path(priv->index_basepath, subdir, index_dir,
+ sizeof(index_dir));
+
+ index_get_parent_iatt(&preparent, index_dir, loc, &op_ret, &op_errno);
+ if (op_ret < 0)
+ goto done;
+
+ gf_uuid_parse(loc->name, gfid);
+ make_gfid_path(priv->index_basepath, subdir, gfid, index_subdir,
+ sizeof(index_subdir));
+
+ if (flag == 0) {
+ ret = index_del(this, gfid, subdir, type);
if (ret < 0) {
- op_ret = -1;
- op_errno = errno;
- goto done;
+ op_ret = -1;
+ op_errno = -ret;
+ goto done;
}
+ } else {
+ args.path = index_subdir;
+ ret = synctask_new(this->ctx->env, index_wipe_index_subdir, NULL, NULL,
+ &args);
+ }
- iatt_from_stat (&preparent, &lstatbuf);
- uuid_copy (preparent.ia_gfid, priv->xattrop_vgfid);
- preparent.ia_ino = -1;
- uuid_parse (loc->name, gfid);
- ret = index_del (this, gfid, XATTROP_SUBDIR);
- if (ret < 0) {
- op_ret = -1;
- op_errno = -ret;
- goto done;
+ index_get_parent_iatt(&postparent, index_dir, loc, &op_ret, &op_errno);
+ if (op_ret < 0)
+ goto done;
+
+done:
+ INDEX_STACK_UNWIND(rmdir, frame, op_ret, op_errno, &preparent, &postparent,
+ xdata);
+ return 0;
+}
+
+int
+index_unlink_wrapper(call_frame_t *frame, xlator_t *this, loc_t *loc, int flag,
+ dict_t *xdata)
+{
+ index_priv_t *priv = NULL;
+ index_inode_ctx_t *ictx = NULL;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ int ret = 0;
+ index_xattrop_type_t type = XATTROP_TYPE_UNSET;
+ struct iatt preparent = {0};
+ struct iatt postparent = {0};
+ char index_dir[PATH_MAX] = {0};
+ char filepath[PATH_MAX] = {0};
+ uuid_t gfid = {0};
+ char *subdir = NULL;
+
+ priv = this->private;
+ type = index_get_type_from_vgfid(priv, loc->pargfid);
+ ret = index_inode_path(this, loc->parent, index_dir, sizeof(index_dir));
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ goto done;
+ }
+
+ index_get_parent_iatt(&preparent, index_dir, loc, &op_ret, &op_errno);
+ if (op_ret < 0)
+ goto done;
+
+ if (type <= XATTROP_TYPE_UNSET) {
+ ret = index_inode_ctx_get(loc->parent, this, &ictx);
+ if ((ret == 0) && gf_uuid_is_null(ictx->virtual_pargfid)) {
+ ret = -EINVAL;
}
- memset (&lstatbuf, 0, sizeof (lstatbuf));
- ret = lstat (index_dir, &lstatbuf);
- if (ret < 0) {
- op_ret = -1;
- op_errno = errno;
- goto done;
+ if (ret == 0) {
+ ret = index_entry_delete(this, ictx->virtual_pargfid,
+ (char *)loc->name);
}
- iatt_from_stat (&postparent, &lstatbuf);
- uuid_copy (postparent.ia_gfid, priv->xattrop_vgfid);
- postparent.ia_ino = -1;
+ } else if (type == ENTRY_CHANGES) {
+ make_file_path(priv->index_basepath, ENTRY_CHANGES_SUBDIR,
+ (char *)loc->name, filepath, sizeof(filepath));
+ ret = sys_unlink(filepath);
+ } else {
+ subdir = index_get_subdir_from_type(type);
+ gf_uuid_parse(loc->name, gfid);
+ ret = index_del(this, gfid, subdir, type);
+ }
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ goto done;
+ }
+
+ index_get_parent_iatt(&postparent, index_dir, loc, &op_ret, &op_errno);
+ if (op_ret < 0)
+ goto done;
done:
- INDEX_STACK_UNWIND (unlink, frame, op_ret, op_errno, &preparent,
- &postparent, xdata);
- return 0;
+ INDEX_STACK_UNWIND(unlink, frame, op_ret, op_errno, &preparent, &postparent,
+ xdata);
+ return 0;
}
int32_t
-index_getxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
+index_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- call_stub_t *stub = NULL;
+ call_stub_t *stub = NULL;
- if (!name || strcmp (GF_XATTROP_INDEX_GFID, name))
- goto out;
+ if (!name ||
+ (!index_is_vgfid_xattr(name) && strcmp(GF_XATTROP_INDEX_COUNT, name) &&
+ strcmp(GF_XATTROP_DIRTY_COUNT, name)))
+ goto out;
- stub = fop_getxattr_stub (frame, index_getxattr_wrapper, loc, name,
- xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
- worker_enqueue (this, stub);
+ stub = fop_getxattr_stub(frame, index_getxattr_wrapper, loc, name, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(getxattr, frame, -1, ENOMEM, NULL, NULL);
return 0;
+ }
+ worker_enqueue(this, stub);
+ return 0;
out:
- STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
- return 0;
+ STACK_WIND(frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
}
-int32_t
-index_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+int64_t
+index_fetch_link_count(xlator_t *this, index_xattrop_type_t type)
{
- call_stub_t *stub = NULL;
- index_priv_t *priv = NULL;
+ index_priv_t *priv = this->private;
+ char *subdir = NULL;
+ struct stat lstatbuf = {
+ 0,
+ };
+ int ret = -1;
+ int64_t count = -1;
+ DIR *dirp = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char index_dir[PATH_MAX] = {
+ 0,
+ };
+ char index_path[PATH_MAX] = {
+ 0,
+ };
+
+ subdir = index_get_subdir_from_type(type);
+ make_index_dir_path(priv->index_basepath, subdir, index_dir,
+ sizeof(index_dir));
+
+ dirp = sys_opendir(index_dir);
+ if (!dirp)
+ goto out;
+
+ for (;;) {
+ errno = 0;
+ entry = sys_readdir(dirp, scratch);
+ if (!entry || errno != 0) {
+ if (count == -1)
+ count = 0;
+ goto out;
+ }
- priv = this->private;
+ if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
+ continue;
- if (uuid_compare (loc->gfid, priv->xattrop_vgfid) &&
- uuid_compare (loc->pargfid, priv->xattrop_vgfid))
- goto normal;
+ make_file_path(priv->index_basepath, subdir, entry->d_name, index_path,
+ sizeof(index_path));
- stub = fop_lookup_stub (frame, index_lookup_wrapper, loc, xattr_req);
- if (!stub) {
- STACK_UNWIND_STRICT (lookup, frame, -1, ENOMEM, loc->inode,
- NULL, NULL, NULL);
- return 0;
+ ret = sys_lstat(index_path, &lstatbuf);
+ if (ret < 0) {
+ count = -2;
+ continue;
+ } else {
+ count = lstatbuf.st_nlink - 1;
+ if (count == 0)
+ continue;
+ else
+ break;
}
- worker_enqueue (this, stub);
- return 0;
-normal:
- STACK_WIND (frame, default_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ }
+out:
+ if (dirp)
+ (void)sys_closedir(dirp);
+ return count;
+}
- return 0;
+dict_t *
+index_fill_link_count(xlator_t *this, dict_t *xdata)
+{
+ int ret = -1;
+ index_priv_t *priv = NULL;
+ int64_t count = -1;
+
+ priv = this->private;
+ xdata = (xdata) ? dict_ref(xdata) : dict_new();
+ if (!xdata)
+ goto out;
+
+ index_get_link_count(priv, &count, XATTROP);
+ if (count < 0) {
+ count = index_fetch_link_count(this, XATTROP);
+ index_set_link_count(priv, count, XATTROP);
+ }
+
+ if (count == 0) {
+ ret = dict_set_int8(xdata, "link-count", 0);
+ if (ret < 0)
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, INDEX_MSG_DICT_SET_FAILED,
+ "Unable to set link-count");
+ } else {
+ ret = dict_set_int8(xdata, "link-count", 1);
+ if (ret < 0)
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, INDEX_MSG_DICT_SET_FAILED,
+ "Unable to set link-count");
+ }
+
+out:
+ return xdata;
}
int32_t
-index_readdir (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t off, dict_t *xdata)
+index_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- call_stub_t *stub = NULL;
- index_priv_t *priv = NULL;
+ xdata = index_fill_link_count(this, xdata);
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ if (xdata)
+ dict_unref(xdata);
+ return 0;
+}
- priv = this->private;
- if (uuid_compare (fd->inode->gfid, priv->xattrop_vgfid))
- goto out;
- stub = fop_readdir_stub (frame, index_readdir_wrapper, fd, size, off,
- xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, NULL, NULL);
- return 0;
+int32_t
+index_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ inode_t *inode = NULL;
+ call_stub_t *stub = NULL;
+ char *flag = NULL;
+ int ret = -1;
+
+ if (!index_is_fop_on_internal_inode(this, loc->parent, loc->pargfid) &&
+ !index_is_fop_on_internal_inode(this, loc->inode, loc->gfid)) {
+ if (!inode_is_linked(loc->inode)) {
+ inode = inode_find(loc->inode->table, loc->gfid);
+ if (!index_is_fop_on_internal_inode(this, inode, loc->gfid)) {
+ inode_unref(inode);
+ goto normal;
+ }
+ inode_unref(inode);
+ } else {
+ goto normal;
}
- worker_enqueue (this, stub);
- return 0;
-out:
- STACK_WIND (frame, default_readdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata);
+ }
+
+ stub = fop_lookup_stub(frame, index_lookup_wrapper, loc, xattr_req);
+ if (!stub) {
+ STACK_UNWIND_STRICT(lookup, frame, -1, ENOMEM, loc->inode, NULL, NULL,
+ NULL);
return 0;
+ }
+ worker_enqueue(this, stub);
+ return 0;
+normal:
+ ret = dict_get_str_sizen(xattr_req, "link-count", &flag);
+ if ((ret == 0) && (strcmp(flag, GF_XATTROP_INDEX_COUNT) == 0)) {
+ STACK_WIND(frame, index_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ } else {
+ STACK_WIND(frame, default_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ }
+
+ return 0;
}
-int
-index_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+int32_t
+index_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ xdata = index_fill_link_count(this, xdata);
+ STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, buf, xdata);
+ if (xdata)
+ dict_unref(xdata);
+ return 0;
+}
+
+int32_t
+index_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int ret = -1;
+ char *flag = NULL;
+
+ ret = dict_get_str(xdata, "link-count", &flag);
+ if ((ret == 0) && (strcmp(flag, GF_XATTROP_INDEX_COUNT) == 0)) {
+ STACK_WIND(frame, index_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ } else {
+ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ }
+
+ return 0;
+}
+
+int32_t
+index_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
dict_t *xdata)
{
- call_stub_t *stub = NULL;
- index_priv_t *priv = NULL;
+ if (!index_is_fop_on_internal_inode(this, fd->inode, NULL))
+ goto normal;
- priv = this->private;
- if (uuid_compare (loc->pargfid, priv->xattrop_vgfid))
- goto out;
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(opendir, frame, 0, 0, fd, NULL);
+ return 0;
- stub = fop_unlink_stub (frame, index_unlink_wrapper, loc, xflag, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL,
- NULL);
- return 0;
- }
- worker_enqueue (this, stub);
+normal:
+ STACK_WIND(frame, default_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+}
+
+int32_t
+index_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ if (!index_is_fop_on_internal_inode(this, fd->inode, NULL))
+ goto out;
+
+ stub = fop_readdir_stub(frame, index_readdir_wrapper, fd, size, off, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL);
return 0;
+ }
+ worker_enqueue(this, stub);
+ return 0;
out:
- STACK_WIND (frame, default_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
- return 0;
+ STACK_WIND(frame, default_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata);
+ return 0;
}
-int32_t
-mem_acct_init (xlator_t *this)
+int
+index_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- int ret = -1;
+ call_stub_t *stub = NULL;
- ret = xlator_mem_acct_init (this, gf_index_mt_end + 1);
+ if (!index_is_fop_on_internal_inode(this, loc->parent, NULL))
+ goto out;
- return ret;
+ stub = fop_unlink_stub(frame, index_unlink_wrapper, loc, xflag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+ worker_enqueue(this, stub);
+ return 0;
+out:
+ STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
}
int
-init (xlator_t *this)
+index_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ dict_t *xdata)
{
- int ret = -1;
- index_priv_t *priv = NULL;
- pthread_t thread;
+ call_stub_t *stub = NULL;
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "'index' not configured with exactly one child");
- goto out;
- }
+ if (!index_is_fop_on_internal_inode(this, loc->parent, NULL))
+ goto out;
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
+ stub = fop_rmdir_stub(frame, index_rmdir_wrapper, loc, flags, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(rmdir, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+ worker_enqueue(this, stub);
+ return 0;
+out:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->rmdir,
+ loc, flags, xdata);
+ return 0;
+}
- priv = GF_CALLOC (1, sizeof (*priv), gf_index_mt_priv_t);
- if (!priv)
- goto out;
+int
+index_make_xattrop_watchlist(xlator_t *this, index_priv_t *priv,
+ char *watchlist, index_xattrop_type_t type)
+{
+ char *delim = NULL;
+ char *dup_watchlist = NULL;
+ char *key = NULL;
+ char *saveptr = NULL;
+ dict_t *xattrs = NULL;
+ data_t *dummy = NULL;
+ int ret = 0;
+
+ if (!watchlist)
+ return 0;
- LOCK_INIT (&priv->lock);
- if ((ret = pthread_cond_init(&priv->cond, NULL)) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "pthread_cond_init failed (%d)", ret);
- goto out;
+ dup_watchlist = gf_strdup(watchlist);
+ if (!dup_watchlist)
+ return -1;
+
+ xattrs = dict_new();
+ if (!xattrs) {
+ ret = -1;
+ goto out;
+ }
+
+ dummy = int_to_data(1);
+ if (!dummy) {
+ ret = -1;
+ goto out;
+ }
+
+ data_ref(dummy);
+
+ delim = ",";
+ key = strtok_r(dup_watchlist, delim, &saveptr);
+ while (key) {
+ if (strlen(key) == 0) {
+ ret = -1;
+ goto out;
}
- if ((ret = pthread_mutex_init(&priv->mutex, NULL)) != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "pthread_mutex_init failed (%d)", ret);
- goto out;
- }
+ ret = dict_set(xattrs, key, dummy);
+ if (ret)
+ goto out;
- pthread_attr_init (&priv->w_attr);
- ret = pthread_attr_setstacksize (&priv->w_attr,
- INDEX_THREAD_STACK_SIZE);
- if (ret == EINVAL) {
- gf_log (this->name, GF_LOG_WARNING,
- "Using default thread stack size");
- }
- GF_OPTION_INIT ("index-base", priv->index_basepath, path, out);
- uuid_generate (priv->index);
- uuid_generate (priv->xattrop_vgfid);
- INIT_LIST_HEAD (&priv->callstubs);
+ key = strtok_r(NULL, delim, &saveptr);
+ }
- ret = pthread_create (&thread, &priv->w_attr, index_worker, this);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING, "Failed to create "
- "worker thread, aborting");
+ switch (type) {
+ case DIRTY:
+ priv->dirty_watchlist = dict_copy_with_ref(xattrs,
+ priv->dirty_watchlist);
+ if (!priv->dirty_watchlist) {
+ ret = -1;
goto out;
- }
- this->private = priv;
- ret = 0;
+ }
+ break;
+ case XATTROP:
+ priv->pending_watchlist = dict_copy_with_ref(
+ xattrs, priv->pending_watchlist);
+ if (!priv->pending_watchlist) {
+ ret = -1;
+ goto out;
+ }
+ break;
+ default:
+ break;
+ }
+
+ ret = 0;
out:
- if (!this->private && priv)
- GF_FREE (priv);
+ if (xattrs)
+ dict_unref(xattrs);
+
+ GF_FREE(dup_watchlist);
- return ret;
+ if (dummy)
+ data_unref(dummy);
+
+ return ret;
}
-void
-fini (xlator_t *this)
+int32_t
+mem_acct_init(xlator_t *this)
{
- index_priv_t *priv = NULL;
+ int ret = -1;
- priv = this->private;
- if (!priv)
- goto out;
+ ret = xlator_mem_acct_init(this, gf_index_mt_end + 1);
+
+ return ret;
+}
+
+int
+init(xlator_t *this)
+{
+ int i = 0;
+ int ret = -1;
+ int64_t count = -1;
+ index_priv_t *priv = NULL;
+ pthread_attr_t w_attr;
+ gf_boolean_t mutex_inited = _gf_false;
+ gf_boolean_t cond_inited = _gf_false;
+ gf_boolean_t attr_inited = _gf_false;
+ char *watchlist = NULL;
+ char *dirtylist = NULL;
+ char *pendinglist = NULL;
+ char *index_base_parent = NULL;
+ char *tmp = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, INDEX_MSG_INVALID_GRAPH,
+ "'index' not configured with exactly one child");
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, INDEX_MSG_INVALID_GRAPH,
+ "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_index_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ LOCK_INIT(&priv->lock);
+ if ((ret = pthread_cond_init(&priv->cond, NULL)) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ret, INDEX_MSG_INVALID_ARGS,
+ "pthread_cond_init failed");
+ goto out;
+ }
+ cond_inited = _gf_true;
+
+ if ((ret = pthread_mutex_init(&priv->mutex, NULL)) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ret, INDEX_MSG_INVALID_ARGS,
+ "pthread_mutex_init failed");
+ goto out;
+ }
+ mutex_inited = _gf_true;
+
+ if ((ret = pthread_attr_init(&w_attr)) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ret, INDEX_MSG_INVALID_ARGS,
+ "pthread_attr_init failed");
+ goto out;
+ }
+ attr_inited = _gf_true;
+
+ ret = pthread_attr_setstacksize(&w_attr, INDEX_THREAD_STACK_SIZE);
+ if (ret == EINVAL) {
+ gf_msg(this->name, GF_LOG_WARNING, ret, INDEX_MSG_INVALID_ARGS,
+ "Using default thread stack size");
+ }
+
+ GF_OPTION_INIT("index-base", priv->index_basepath, path, out);
+ tmp = gf_strdup(priv->index_basepath);
+ index_base_parent = dirname(tmp);
+ if (gf_lstat_dir(index_base_parent, NULL) != 0) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ INDEX_MSG_INDEX_DIR_CREATE_FAILED,
+ "Failed to find parent dir (%s) of index basepath %s.",
+ index_base_parent, priv->index_basepath);
+ goto out;
+ }
+
+ GF_OPTION_INIT("xattrop64-watchlist", watchlist, str, out);
+ ret = index_make_xattrop_watchlist(this, priv, watchlist, XATTROP);
+ if (ret)
+ goto out;
+
+ GF_OPTION_INIT("xattrop-dirty-watchlist", dirtylist, str, out);
+ ret = index_make_xattrop_watchlist(this, priv, dirtylist, DIRTY);
+ if (ret)
+ goto out;
+
+ GF_OPTION_INIT("xattrop-pending-watchlist", pendinglist, str, out);
+ ret = index_make_xattrop_watchlist(this, priv, pendinglist, XATTROP);
+ if (ret)
+ goto out;
+
+ if (priv->dirty_watchlist)
+ priv->complete_watchlist = dict_copy_with_ref(priv->dirty_watchlist,
+ priv->complete_watchlist);
+ if (priv->pending_watchlist)
+ priv->complete_watchlist = dict_copy_with_ref(priv->pending_watchlist,
+ priv->complete_watchlist);
+
+ gf_uuid_generate(priv->index);
+ for (i = 0; i < XATTROP_TYPE_END; i++)
+ gf_uuid_generate(priv->internal_vgfid[i]);
+
+ INIT_LIST_HEAD(&priv->callstubs);
+ GF_ATOMIC_INIT(priv->stub_cnt, 0);
+
+ this->local_pool = mem_pool_new(index_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ goto out;
+ }
+
+ this->private = priv;
+
+ ret = index_dir_create(this, XATTROP_SUBDIR);
+ if (ret < 0)
+ goto out;
+
+ if (priv->dirty_watchlist) {
+ ret = index_dir_create(this, DIRTY_SUBDIR);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = index_dir_create(this, ENTRY_CHANGES_SUBDIR);
+ if (ret < 0)
+ goto out;
+
+ /*init indices files counts*/
+ count = index_fetch_link_count(this, XATTROP);
+ index_set_link_count(priv, count, XATTROP);
+ priv->down = _gf_false;
+
+ priv->curr_count = 0;
+ ret = gf_thread_create(&priv->thread, &w_attr, index_worker, this,
+ "idxwrker");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ret,
+ INDEX_MSG_WORKER_THREAD_CREATE_FAILED,
+ "Failed to create worker thread, aborting");
+ goto out;
+ }
+ priv->curr_count++;
+ ret = 0;
+out:
+ GF_FREE(tmp);
+
+ if (ret) {
+ if (cond_inited)
+ pthread_cond_destroy(&priv->cond);
+ if (mutex_inited)
+ pthread_mutex_destroy(&priv->mutex);
+ if (priv && priv->dirty_watchlist)
+ dict_unref(priv->dirty_watchlist);
+ if (priv && priv->pending_watchlist)
+ dict_unref(priv->pending_watchlist);
+ if (priv && priv->complete_watchlist)
+ dict_unref(priv->complete_watchlist);
+ if (priv)
+ GF_FREE(priv);
this->private = NULL;
- LOCK_DESTROY (&priv->lock);
- GF_FREE (priv);
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
+
+ if (attr_inited)
+ pthread_attr_destroy(&w_attr);
+ return ret;
+}
+
+void
+fini(xlator_t *this)
+{
+ index_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ priv->down = _gf_true;
+ pthread_cond_broadcast(&priv->cond);
+ if (priv->thread) {
+ gf_thread_cleanup_xint(priv->thread);
+ priv->thread = 0;
+ }
+ this->private = NULL;
+ LOCK_DESTROY(&priv->lock);
+ pthread_cond_destroy(&priv->cond);
+ pthread_mutex_destroy(&priv->mutex);
+ if (priv->dirty_watchlist)
+ dict_unref(priv->dirty_watchlist);
+ if (priv->pending_watchlist)
+ dict_unref(priv->pending_watchlist);
+ if (priv->complete_watchlist)
+ dict_unref(priv->complete_watchlist);
+ GF_FREE(priv);
+
+ if (this->local_pool) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
out:
- return;
+ return;
}
int
-index_forget (xlator_t *this, inode_t *inode)
+index_forget(xlator_t *this, inode_t *inode)
{
- uint64_t tmp_cache = 0;
- if (!inode_ctx_del (inode, this, &tmp_cache))
- GF_FREE ((index_inode_ctx_t*) (long)tmp_cache);
+ uint64_t tmp_cache = 0;
+ if (!inode_ctx_del(inode, this, &tmp_cache))
+ GF_FREE((index_inode_ctx_t *)(long)tmp_cache);
- return 0;
+ return 0;
}
int32_t
-index_releasedir (xlator_t *this, fd_t *fd)
+index_releasedir(xlator_t *this, fd_t *fd)
{
- index_fd_ctx_t *fctx = NULL;
- uint64_t ctx = 0;
- int ret = 0;
+ index_fd_ctx_t *fctx = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
- ret = fd_ctx_del (fd, this, &ctx);
- if (ret < 0)
- goto out;
+ ret = fd_ctx_del(fd, this, &ctx);
+ if (ret < 0)
+ goto out;
- fctx = (index_fd_ctx_t*) (long) ctx;
- if (fctx->dir)
- closedir (fctx->dir);
+ fctx = (index_fd_ctx_t *)(long)ctx;
+ if (fctx->dir) {
+ ret = sys_closedir(fctx->dir);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, errno, INDEX_MSG_FD_OP_FAILED,
+ "closedir error");
+ }
- GF_FREE (fctx);
+ GF_FREE(fctx);
out:
- return 0;
+ return 0;
}
int32_t
-index_release (xlator_t *this, fd_t *fd)
+index_release(xlator_t *this, fd_t *fd)
{
- index_fd_ctx_t *fctx = NULL;
- uint64_t ctx = 0;
- int ret = 0;
+ index_fd_ctx_t *fctx = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
- ret = fd_ctx_del (fd, this, &ctx);
- if (ret < 0)
- goto out;
+ ret = fd_ctx_del(fd, this, &ctx);
+ if (ret < 0)
+ goto out;
- fctx = (index_fd_ctx_t*) (long) ctx;
- GF_FREE (fctx);
+ fctx = (index_fd_ctx_t *)(long)ctx;
+ GF_FREE(fctx);
out:
- return 0;
+ return 0;
}
int
-notify (xlator_t *this, int event, void *data, ...)
+notify(xlator_t *this, int event, void *data, ...)
{
- int ret = 0;
- ret = default_notify (this, event, data);
- return ret;
+ int ret = 0;
+ index_priv_t *priv = NULL;
+ uint64_t stub_cnt = 0;
+ xlator_t *victim = data;
+ struct timespec sleep_till = {
+ 0,
+ };
+
+ if (!this)
+ return 0;
+
+ priv = this->private;
+ if (!priv)
+ return 0;
+
+ if ((event == GF_EVENT_PARENT_DOWN) && victim->cleanup_starting) {
+ stub_cnt = GF_ATOMIC_GET(priv->stub_cnt);
+ timespec_now_realtime(&sleep_till);
+ sleep_till.tv_sec += 1;
+
+ /* Wait for draining stub from queue before notify PARENT_DOWN */
+ pthread_mutex_lock(&priv->mutex);
+ {
+ while (stub_cnt) {
+ (void)pthread_cond_timedwait(&priv->cond, &priv->mutex,
+ &sleep_till);
+ stub_cnt = GF_ATOMIC_GET(priv->stub_cnt);
+ }
+ }
+ pthread_mutex_unlock(&priv->mutex);
+ gf_log(this->name, GF_LOG_INFO,
+ "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name);
+ }
+
+ if ((event == GF_EVENT_CHILD_DOWN) && victim->cleanup_starting) {
+ pthread_mutex_lock(&priv->mutex);
+ {
+ priv->down = _gf_true;
+ pthread_cond_broadcast(&priv->cond);
+ while (priv->curr_count)
+ pthread_cond_wait(&priv->cond, &priv->mutex);
+ }
+ pthread_mutex_unlock(&priv->mutex);
+
+ gf_log(this->name, GF_LOG_INFO,
+ "Notify GF_EVENT_CHILD_DOWN for brick %s", victim->name);
+ }
+
+ ret = default_notify(this, event, data);
+ return ret;
}
struct xlator_fops fops = {
- .xattrop = index_xattrop,
- .fxattrop = index_fxattrop,
-
- //interface functions follow
- .getxattr = index_getxattr,
- .lookup = index_lookup,
- .readdir = index_readdir,
- .unlink = index_unlink
+ .xattrop = index_xattrop,
+ .fxattrop = index_fxattrop,
+
+ // interface functions follow
+ .getxattr = index_getxattr,
+ .lookup = index_lookup,
+ .opendir = index_opendir,
+ .readdir = index_readdir,
+ .unlink = index_unlink,
+ .rmdir = index_rmdir,
+ .fstat = index_fstat,
};
-struct xlator_dumpops dumpops = {
-};
+struct xlator_dumpops dumpops;
-struct xlator_cbks cbks = {
- .forget = index_forget,
- .release = index_release,
- .releasedir = index_releasedir
-};
+struct xlator_cbks cbks = {.forget = index_forget,
+ .release = index_release,
+ .releasedir = index_releasedir};
struct volume_options options[] = {
- { .key = {"index-base" },
- .type = GF_OPTION_TYPE_PATH,
- .description = "path where the index files need to be stored",
- },
- { .key = {NULL} },
+ {.key = {"index-base"},
+ .type = GF_OPTION_TYPE_PATH,
+ .description = "path where the index files need to be stored",
+ .default_value = "{{ brick.path }}/.glusterfs/indices"},
+ {.key = {"xattrop64-watchlist"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Comma separated list of xattrs that are watched",
+ .default_value = "trusted.ec.dirty"},
+ {.key = {"xattrop-dirty-watchlist"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Comma separated list of xattrs that are watched",
+ .default_value = "trusted.afr.dirty"},
+ {.key = {"xattrop-pending-watchlist"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Comma separated list of xattrs that are watched",
+ .default_value = "trusted.afr.{{ volume.name }}"},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "index",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/features/index/src/index.h b/xlators/features/index/src/index.h
index 5d27b81f85b..a2b6e6e2570 100644
--- a/xlators/features/index/src/index.h
+++ b/xlators/features/index/src/index.h
@@ -1,70 +1,86 @@
/*
- Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __INDEX_H__
#define __INDEX_H__
-#include "xlator.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "byte-order.h"
-#include "common-utils.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/common-utils.h>
#include "index-mem-types.h"
-#define INDEX_THREAD_STACK_SIZE ((size_t)(1024*1024))
+#define INDEX_THREAD_STACK_SIZE ((size_t)(1024 * 1024))
+
+typedef enum { UNKNOWN, IN, NOTIN } index_state_t;
typedef enum {
- UNKNOWN,
- IN,
- NOTIN
-} index_state_t;
+ XATTROP_TYPE_UNSET = -1,
+ XATTROP,
+ DIRTY,
+ ENTRY_CHANGES,
+ XATTROP_TYPE_END
+} index_xattrop_type_t;
typedef struct index_inode_ctx {
- gf_boolean_t processing;
- struct list_head callstubs;
- index_state_t state;
+ gf_boolean_t processing;
+ struct list_head callstubs;
+ int state[XATTROP_TYPE_END];
+ uuid_t virtual_pargfid; /* virtual gfid of dir under
+ .glusterfs/indices/entry-changes. */
} index_inode_ctx_t;
typedef struct index_fd_ctx {
- DIR *dir;
+ DIR *dir;
+ off_t dir_eof;
} index_fd_ctx_t;
typedef struct index_priv {
- char *index_basepath;
- uuid_t index;
- gf_lock_t lock;
- uuid_t xattrop_vgfid;//virtual gfid of the xattrop index dir
- struct list_head callstubs;
- pthread_mutex_t mutex;
- pthread_cond_t cond;
- pthread_attr_t w_attr;
+ char *index_basepath;
+ char *dirty_basepath;
+ uuid_t index;
+ gf_lock_t lock;
+ uuid_t internal_vgfid[XATTROP_TYPE_END];
+ struct list_head callstubs;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ dict_t *dirty_watchlist;
+ dict_t *pending_watchlist;
+ dict_t *complete_watchlist;
+ int64_t pending_count;
+ pthread_t thread;
+ gf_boolean_t down;
+ gf_atomic_t stub_cnt;
+ int32_t curr_count;
} index_priv_t;
-#define INDEX_STACK_UNWIND(fop, frame, params ...) \
-do { \
- if (frame) { \
- inode_t *_inode = frame->local; \
- frame->local = NULL; \
- inode_unref (_inode); \
- } \
- STACK_UNWIND_STRICT (fop, frame, params); \
-} while (0)
+typedef struct index_local {
+ inode_t *inode;
+ dict_t *xdata;
+} index_local_t;
+
+#define INDEX_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ index_local_t *__local = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ if (__local) { \
+ inode_unref(__local->inode); \
+ if (__local->xdata) \
+ dict_unref(__local->xdata); \
+ mem_put(__local); \
+ } \
+ } while (0)
#endif
diff --git a/xlators/features/leases/Makefile.am b/xlators/features/leases/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/leases/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/leases/src/Makefile.am b/xlators/features/leases/src/Makefile.am
new file mode 100644
index 00000000000..a1aef10e299
--- /dev/null
+++ b/xlators/features/leases/src/Makefile.am
@@ -0,0 +1,20 @@
+if WITH_SERVER
+xlator_LTLIBRARIES = leases.la
+endif
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+leases_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+leases_la_SOURCES = leases.c leases-internal.c
+
+leases_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = leases.h leases-mem-types.h leases-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(CONTRIBDIR)/timer-wheel
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/leases/src/leases-internal.c b/xlators/features/leases/src/leases-internal.c
new file mode 100644
index 00000000000..56dee244281
--- /dev/null
+++ b/xlators/features/leases/src/leases-internal.c
@@ -0,0 +1,1412 @@
+/*
+ Copyright (c) 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "leases.h"
+
+/* Mutex locks used in this xlator and their order of acquisition:
+ * Check lease conflict:
+ * lease_ctx lock
+ * add_timer => internal timer locks
+ * lease_ctx unlock
+ *
+ * Add/remove lease:
+ * lease_ctx lock
+ * add_timer => internal timer locks
+ * OR
+ * priv lock => Adding/removing to/from the cleanup client list
+ * priv unlock
+ * lease_ctx unlock
+ *
+ * Timer thread:
+ * Timer internal lock
+ * priv lock => By timer handler
+ * priv unlock
+ * Timer internal unlock
+ *
+ * Expired recall cleanup thread:
+ * priv lock
+ * priv condwait
+ * priv unlock
+ * lease_ctx lock
+ * priv lock
+ * priv unlock
+ * lease_ctx unlock
+ */
+
+/*
+ * Check if lease_lk is enabled
+ * Return Value:
+ * _gf_true - lease lock option enabled
+ * _gf_false - lease lock option disabled
+ */
+gf_boolean_t
+is_leases_enabled(xlator_t *this)
+{
+ leases_private_t *priv = NULL;
+ gf_boolean_t is_enabled = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("leases", this, out);
+
+ if (this->private) {
+ priv = (leases_private_t *)this->private;
+ is_enabled = priv->leases_enabled;
+ }
+out:
+ return is_enabled;
+}
+
+/*
+ * Get the recall_leaselk_timeout
+ * Return Value:
+ * timeout value(in seconds) set as an option to this xlator.
+ * -1 error case
+ */
+static int32_t
+get_recall_lease_timeout(xlator_t *this)
+{
+ leases_private_t *priv = NULL;
+ int32_t timeout = -1;
+
+ GF_VALIDATE_OR_GOTO("leases", this, out);
+
+ if (this->private) {
+ priv = (leases_private_t *)this->private;
+ timeout = priv->recall_lease_timeout;
+ }
+out:
+ return timeout;
+}
+
+static void
+__dump_leases_info(xlator_t *this, lease_inode_ctx_t *lease_ctx)
+{
+ lease_id_entry_t *lease_entry = NULL;
+ lease_id_entry_t *tmp = NULL;
+
+ GF_VALIDATE_OR_GOTO("leases", this, out);
+ GF_VALIDATE_OR_GOTO("leases", lease_ctx, out);
+
+ gf_msg_debug(this->name, 0,
+ "Lease held on this inode, lease_type: %d,"
+ " lease_cnt:%" PRIu64
+ ", RD lease:%d, RW lease:%d, "
+ "openfd cnt:%" PRIu64,
+ lease_ctx->lease_type, lease_ctx->lease_cnt,
+ lease_ctx->lease_type_cnt[GF_RD_LEASE],
+ lease_ctx->lease_type_cnt[GF_RW_LEASE], lease_ctx->openfd_cnt);
+
+ list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list,
+ lease_id_list)
+ {
+ gf_msg_debug(this->name, 0,
+ "Leases held by client: %s, lease "
+ "ID:%s, RD lease:%d, RW lease:%d, lease_type: %d, "
+ "lease_cnt:%" PRIu64,
+ lease_entry->client_uid, lease_entry->lease_id,
+ lease_entry->lease_type_cnt[GF_RD_LEASE],
+ lease_entry->lease_type_cnt[GF_RW_LEASE],
+ lease_entry->lease_type, lease_entry->lease_cnt);
+ }
+out:
+ return;
+}
+
+static int
+__lease_ctx_set(inode_t *inode, xlator_t *this)
+{
+ lease_inode_ctx_t *inode_ctx = NULL;
+ int ret = -1;
+ uint64_t ctx = 0;
+
+ GF_VALIDATE_OR_GOTO("leases", inode, out);
+ GF_VALIDATE_OR_GOTO("leases", this, out);
+
+ ret = __inode_ctx_get(inode, this, &ctx);
+ if (!ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, LEASE_MSG_INVAL_INODE_CTX,
+ "inode_ctx_get failed");
+ goto out;
+ }
+
+ inode_ctx = GF_CALLOC(1, sizeof(*inode_ctx),
+ gf_leases_mt_lease_inode_ctx_t);
+ GF_CHECK_ALLOC(inode_ctx, ret, out);
+
+ pthread_mutex_init(&inode_ctx->lock, NULL);
+ INIT_LIST_HEAD(&inode_ctx->lease_id_list);
+ INIT_LIST_HEAD(&inode_ctx->blocked_list);
+
+ inode_ctx->lease_cnt = 0;
+
+ ret = __inode_ctx_set(inode, this, (uint64_t *)inode_ctx);
+ if (ret) {
+ GF_FREE(inode_ctx);
+ gf_msg(this->name, GF_LOG_INFO, 0, LEASE_MSG_INVAL_INODE_CTX,
+ "failed to set inode ctx (%p)", inode);
+ }
+out:
+ return ret;
+}
+
+static lease_inode_ctx_t *
+__lease_ctx_get(inode_t *inode, xlator_t *this)
+{
+ lease_inode_ctx_t *inode_ctx = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("leases", inode, out);
+ GF_VALIDATE_OR_GOTO("leases", this, out);
+
+ ret = __inode_ctx_get(inode, this, &ctx);
+ if (ret < 0) {
+ ret = __lease_ctx_set(inode, this);
+ if (ret < 0)
+ goto out;
+
+ ret = __inode_ctx_get(inode, this, &ctx);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, LEASE_MSG_INVAL_INODE_CTX,
+ "failed to get inode ctx (%p)", inode);
+ goto out;
+ }
+ }
+
+ inode_ctx = (lease_inode_ctx_t *)(long)ctx;
+out:
+ return inode_ctx;
+}
+
+lease_inode_ctx_t *
+lease_ctx_get(inode_t *inode, xlator_t *this)
+{
+ lease_inode_ctx_t *inode_ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO("leases", inode, out);
+ GF_VALIDATE_OR_GOTO("leases", this, out);
+
+ LOCK(&inode->lock);
+ {
+ inode_ctx = __lease_ctx_get(inode, this);
+ }
+ UNLOCK(&inode->lock);
+out:
+ return inode_ctx;
+}
+
+static lease_id_entry_t *
+new_lease_id_entry(call_frame_t *frame, const char *lease_id)
+{
+ lease_id_entry_t *lease_entry = NULL;
+
+ GF_VALIDATE_OR_GOTO("leases", frame, out);
+ GF_VALIDATE_OR_GOTO("leases", lease_id, out);
+
+ lease_entry = GF_CALLOC(1, sizeof(*lease_entry),
+ gf_leases_mt_lease_id_entry_t);
+ if (!lease_entry) {
+ gf_msg(frame->this->name, GF_LOG_ERROR, ENOMEM, LEASE_MSG_NO_MEM,
+ "Memory allocation for lease_entry failed");
+ return NULL;
+ }
+
+ INIT_LIST_HEAD(&lease_entry->lease_id_list);
+ lease_entry->lease_type = NONE;
+ lease_entry->lease_cnt = 0;
+ lease_entry->recall_time = get_recall_lease_timeout(frame->this);
+ lease_entry->client_uid = gf_strdup(frame->root->client->client_uid);
+ if (!lease_entry->client_uid) {
+ gf_msg(frame->this->name, GF_LOG_ERROR, ENOMEM, LEASE_MSG_NO_MEM,
+ "Memory allocation for client_uid failed");
+ GF_FREE(lease_entry);
+ lease_entry = NULL;
+ goto out;
+ }
+
+ memcpy(lease_entry->lease_id, lease_id, LEASE_ID_SIZE);
+out:
+ return lease_entry;
+}
+
+static void
+__destroy_lease_id_entry(lease_id_entry_t *lease_entry)
+{
+ GF_VALIDATE_OR_GOTO("leases", lease_entry, out);
+
+ list_del_init(&lease_entry->lease_id_list);
+ GF_FREE(lease_entry->client_uid);
+ GF_FREE(lease_entry);
+out:
+ return;
+}
+
+static inline gf_boolean_t
+__is_same_lease_id(const char *k1, const char *k2)
+{
+ if (memcmp(k1, k2, strlen(k1)) == 0)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+/* Checks if there are any leases, other than the leases taken
+ * by the given lease_id
+ */
+static gf_boolean_t
+__another_lease_found(lease_inode_ctx_t *lease_ctx, const char *lease_id)
+{
+ lease_id_entry_t *lease_entry = NULL;
+ lease_id_entry_t *tmp = NULL;
+ gf_boolean_t found_lease = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("leases", lease_id, out);
+ GF_VALIDATE_OR_GOTO("leases", lease_ctx, out);
+
+ list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list,
+ lease_id_list)
+ {
+ if (!__is_same_lease_id(lease_id, lease_entry->lease_id)) {
+ if (lease_entry->lease_cnt > 0) {
+ found_lease = _gf_true;
+ break;
+ }
+ }
+ }
+out:
+ return found_lease;
+}
+
+/* Returns the lease_id_entry for a given lease_id and a given inode.
+ * Return values:
+ * NULL - If no client entry found
+ * lease_id_entry_t* - a pointer to the client entry if found
+ */
+static lease_id_entry_t *
+__get_lease_id_entry(lease_inode_ctx_t *lease_ctx, const char *lease_id)
+{
+ lease_id_entry_t *lease_entry = NULL;
+ lease_id_entry_t *tmp = NULL;
+ lease_id_entry_t *found = NULL;
+
+ GF_VALIDATE_OR_GOTO("leases", lease_id, out);
+ GF_VALIDATE_OR_GOTO("leases", lease_ctx, out);
+
+ list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list,
+ lease_id_list)
+ {
+ if (__is_same_lease_id(lease_id, lease_entry->lease_id)) {
+ found = lease_entry;
+ gf_msg_debug("leases", 0,
+ "lease ID entry found "
+ "Client UID:%s, lease id:%s",
+ lease_entry->client_uid,
+ leaseid_utoa(lease_entry->lease_id));
+ break;
+ }
+ }
+out:
+ return found;
+}
+
+/* Returns the lease_id_entry for a given lease_id and a given inode,
+ * if none found creates one.
+ * Return values:
+ * lease_id_entry_t* - a pointer to the client entry
+ */
+static lease_id_entry_t *
+__get_or_new_lease_entry(call_frame_t *frame, const char *lease_id,
+ lease_inode_ctx_t *lease_ctx)
+{
+ lease_id_entry_t *lease_entry = NULL;
+
+ GF_VALIDATE_OR_GOTO("leases", frame, out);
+ GF_VALIDATE_OR_GOTO("leases", lease_id, out);
+ GF_VALIDATE_OR_GOTO("leases", lease_ctx, out);
+
+ lease_entry = __get_lease_id_entry(lease_ctx, lease_id);
+ if (!lease_entry) { /* create one */
+ lease_entry = new_lease_id_entry(frame, lease_id);
+ if (!lease_entry)
+ goto out;
+
+ list_add_tail(&lease_entry->lease_id_list, &lease_ctx->lease_id_list);
+
+ gf_msg_debug(frame->this->name, 0,
+ "lease ID entry added,"
+ " Client UID:%s, lease id:%s",
+ lease_entry->client_uid,
+ leaseid_utoa(lease_entry->lease_id));
+ }
+out:
+ return lease_entry;
+}
+
+static lease_inode_t *
+new_lease_inode(inode_t *inode)
+{
+ lease_inode_t *l_inode = GF_MALLOC(sizeof(*l_inode),
+ gf_leases_mt_lease_inode_t);
+ if (!l_inode)
+ goto out;
+
+ INIT_LIST_HEAD(&l_inode->list);
+ l_inode->inode = inode_ref(inode);
+out:
+ return l_inode;
+}
+
+static void
+__destroy_lease_inode(lease_inode_t *l_inode)
+{
+ list_del_init(&l_inode->list);
+ inode_unref(l_inode->inode);
+ GF_FREE(l_inode);
+}
+
+static lease_client_t *
+new_lease_client(const char *client_uid)
+{
+ lease_client_t *clnt = GF_MALLOC(sizeof(*clnt),
+ gf_leases_mt_lease_client_t);
+ if (!clnt)
+ goto out;
+
+ INIT_LIST_HEAD(&clnt->client_list);
+ INIT_LIST_HEAD(&clnt->inode_list);
+ clnt->client_uid = gf_strdup(client_uid);
+out:
+ return clnt;
+}
+
+static void
+__destroy_lease_client(lease_client_t *clnt)
+{
+ list_del_init(&clnt->inode_list);
+ list_del_init(&clnt->client_list);
+ GF_FREE(clnt);
+
+ return;
+}
+
+static lease_client_t *
+__get_lease_client(xlator_t *this, leases_private_t *priv,
+ const char *client_uid)
+{
+ lease_client_t *clnt = NULL;
+ lease_client_t *tmp = NULL;
+ lease_client_t *found = NULL;
+
+ list_for_each_entry_safe(clnt, tmp, &priv->client_list, client_list)
+ {
+ if ((strcmp(clnt->client_uid, client_uid) == 0)) {
+ found = clnt;
+ gf_msg_debug(this->name, 0,
+ "Client:%s already found "
+ "in the cleanup list",
+ client_uid);
+ break;
+ }
+ }
+ return found;
+}
+
+static lease_client_t *
+__get_or_new_lease_client(xlator_t *this, leases_private_t *priv,
+ const char *client_uid)
+{
+ lease_client_t *found = NULL;
+
+ found = __get_lease_client(this, priv, client_uid);
+ if (!found) {
+ found = new_lease_client(client_uid);
+ if (!found)
+ goto out;
+ list_add_tail(&found->client_list, &priv->client_list);
+ gf_msg_debug(this->name, 0,
+ "Adding a new client:%s entry "
+ "to the cleanup list",
+ client_uid);
+ }
+out:
+ return found;
+}
+
+static int
+add_inode_to_client_list(xlator_t *this, inode_t *inode, const char *client_uid)
+{
+ leases_private_t *priv = this->private;
+ lease_client_t *clnt = NULL;
+
+ lease_inode_t *lease_inode = new_lease_inode(inode);
+ if (!lease_inode)
+ return -ENOMEM;
+
+ pthread_mutex_lock(&priv->mutex);
+ {
+ clnt = __get_or_new_lease_client(this, priv, client_uid);
+ if (!clnt) {
+ pthread_mutex_unlock(&priv->mutex);
+ __destroy_lease_inode(lease_inode);
+ return -ENOMEM;
+ }
+ list_add_tail(&clnt->inode_list, &lease_inode->list);
+ }
+ pthread_mutex_unlock(&priv->mutex);
+ gf_msg_debug(this->name, 0,
+ "Added a new inode:%p to the client(%s) "
+ "cleanup list, gfid(%s)",
+ inode, client_uid, uuid_utoa(inode->gfid));
+ return 0;
+}
+
+/* Add lease entry to the corresponding client entry.
+ * Return values:
+ * 0 Success
+ * -1 Failure
+ */
+static int
+__add_lease(call_frame_t *frame, inode_t *inode, lease_inode_ctx_t *lease_ctx,
+ const char *client_uid, struct gf_lease *lease)
+{
+ lease_id_entry_t *lease_entry = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("leases", frame, out);
+ GF_VALIDATE_OR_GOTO("leases", client_uid, out);
+ GF_VALIDATE_OR_GOTO("leases", lease_ctx, out);
+ GF_VALIDATE_OR_GOTO("leases", inode, out);
+ GF_VALIDATE_OR_GOTO("leases", lease, out);
+
+ gf_msg_trace(frame->this->name, 0,
+ "Granting lease lock to client %s with lease id %s"
+ " on gfid(%s)",
+ client_uid, leaseid_utoa(lease->lease_id),
+ uuid_utoa(inode->gfid));
+
+ lease_entry = __get_or_new_lease_entry(frame, lease->lease_id, lease_ctx);
+ if (!lease_entry) {
+ errno = ENOMEM;
+ goto out;
+ }
+
+ lease_entry->lease_type_cnt[lease->lease_type]++;
+ lease_entry->lease_cnt++;
+ lease_entry->lease_type |= lease->lease_type;
+ /* If this is the first lease taken by the client on the file, then
+ * add this inode/file to the client disconnect cleanup list
+ */
+ if (lease_entry->lease_cnt == 1) {
+ add_inode_to_client_list(frame->this, inode, client_uid);
+ }
+
+ lease_ctx->lease_cnt++;
+ lease_ctx->lease_type_cnt[lease->lease_type]++;
+ lease_ctx->lease_type |= lease->lease_type;
+
+ /* Take a ref for the first lock taken on this inode. Corresponding
+ * unref when all the leases are unlocked or during DISCONNECT
+ * Ref is required because the inode on which lease is acquired should
+ * not be deleted when lru cleanup kicks in*/
+ if (lease_ctx->lease_cnt == 1) {
+ lease_ctx->inode = inode_ref(inode);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static gf_boolean_t
+__is_clnt_lease_none(const char *client_uid, lease_inode_ctx_t *lease_ctx)
+{
+ gf_boolean_t lease_none = _gf_true;
+ lease_id_entry_t *lease_entry = NULL;
+ lease_id_entry_t *tmp = NULL;
+
+ list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list,
+ lease_id_list)
+ {
+ if ((strcmp(client_uid, lease_entry->client_uid) == 0) &&
+ (lease_entry->lease_cnt != 0)) {
+ lease_none = _gf_false;
+ break;
+ }
+ }
+
+ return lease_none;
+}
+
+static int
+__remove_inode_from_clnt_list(xlator_t *this, lease_client_t *clnt,
+ inode_t *inode)
+{
+ int ret = -1;
+ lease_inode_t *l_inode = NULL;
+ lease_inode_t *tmp1 = NULL;
+
+ list_for_each_entry_safe(l_inode, tmp1, &clnt->inode_list, list)
+ {
+ if (l_inode->inode == inode) {
+ __destroy_lease_inode(l_inode);
+ gf_msg_debug(this->name, 0,
+ "Removed the inode from the client cleanup list");
+ ret = 0;
+ }
+ }
+ /* TODO: Remove the client entry from the cleanup list */
+
+ return ret;
+}
+
+static int
+remove_from_clnt_list(xlator_t *this, const char *client_uid, inode_t *inode)
+{
+ leases_private_t *priv = NULL;
+ int ret = -1;
+ lease_client_t *clnt = NULL;
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ pthread_mutex_lock(&priv->mutex);
+ {
+ clnt = __get_lease_client(this, priv, client_uid);
+ if (!clnt) {
+ pthread_mutex_unlock(&priv->mutex);
+ gf_msg(this->name, GF_LOG_ERROR, 0, LEASE_MSG_CLNT_NOTFOUND,
+ "There is no client entry found in the cleanup list");
+ goto out;
+ }
+ ret = __remove_inode_from_clnt_list(this, clnt, inode);
+ if (ret) {
+ pthread_mutex_unlock(&priv->mutex);
+ gf_msg(this->name, GF_LOG_ERROR, 0, LEASE_MSG_INODE_NOTFOUND,
+ "There is no inode entry found in the cleanup list");
+ goto out;
+ }
+ }
+ pthread_mutex_unlock(&priv->mutex);
+out:
+ return ret;
+}
+
+/* Remove lease entry in the corresponding client entry.
+ */
+static int
+__remove_lease(xlator_t *this, inode_t *inode, lease_inode_ctx_t *lease_ctx,
+ const char *client_uid, struct gf_lease *lease)
+{
+ lease_id_entry_t *lease_entry = NULL;
+ int ret = 0;
+ int32_t lease_type = 0;
+ leases_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("leases", lease_ctx, out);
+ GF_VALIDATE_OR_GOTO("leases", lease, out);
+
+ priv = this->private;
+
+ gf_msg_trace(this->name, 0,
+ "Removing lease entry for client: %s, "
+ "lease type:%d, lease id:%s",
+ client_uid, lease->lease_type, leaseid_utoa(lease->lease_id));
+
+ /* There could be a race where in server recalled the lease and by the time
+ * client sends lease_unlock request, server may have revoked it. To handle
+ * such cases, if lease doesnt exist treat it as noop and return success.
+ */
+ lease_entry = __get_lease_id_entry(lease_ctx, lease->lease_id);
+ if (!lease_entry) {
+ gf_msg(this->name, GF_LOG_INFO, 0, LEASE_MSG_INVAL_UNLK_LEASE,
+ "Got unlock lease request from client:%s, but has no "
+ "corresponding lock",
+ client_uid);
+ ret = 0;
+ goto out;
+ }
+
+ if (!(lease_entry->lease_type & lease->lease_type)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, LEASE_MSG_INVAL_UNLK_LEASE,
+ "Got unlock lease request from client:%s for an invalid "
+ "lease_type",
+ client_uid);
+ ret = -EINVAL;
+ errno = EINVAL;
+ goto out;
+ }
+ lease_type = lease->lease_type;
+ lease_entry->lease_type_cnt[lease_type]--;
+ lease_entry->lease_cnt--;
+
+ lease_ctx->lease_type_cnt[lease_type]--;
+ lease_ctx->lease_cnt--;
+
+ if (lease_entry->lease_type_cnt[lease_type] == 0)
+ lease_entry->lease_type = lease_entry->lease_type & (~lease_type);
+
+ if (lease_ctx->lease_type_cnt[lease_type] == 0)
+ lease_ctx->lease_type = lease_ctx->lease_type & (~lease_type);
+
+ if (lease_entry->lease_cnt == 0) {
+ if (__is_clnt_lease_none(client_uid, lease_ctx)) {
+ gf_msg_trace(this->name, 0,
+ "Client(%s) has no leases"
+ " on gfid (%s), hence removing the inode"
+ " from the client cleanup list",
+ client_uid, uuid_utoa(inode->gfid));
+ remove_from_clnt_list(this, client_uid, lease_ctx->inode);
+ }
+ __destroy_lease_id_entry(lease_entry);
+ lease_ctx->blocked_fops_resuming = _gf_true;
+ }
+
+ if (lease_ctx->lease_cnt == 0 && lease_ctx->timer) {
+ ret = gf_tw_del_timer(priv->timer_wheel, lease_ctx->timer);
+ lease_ctx->recall_in_progress = _gf_false;
+ lease_ctx->timer = NULL;
+ }
+out:
+ return ret;
+}
+
+static gf_boolean_t
+__is_lease_grantable(xlator_t *this, lease_inode_ctx_t *lease_ctx,
+ struct gf_lease *lease, inode_t *inode)
+{
+ uint32_t fd_count = 0;
+ int32_t flags = 0;
+ fd_t *iter_fd = NULL;
+ gf_boolean_t grant = _gf_false;
+ int ret = 0;
+ lease_fd_ctx_t *fd_ctx = NULL;
+ uint64_t ctx = 0;
+
+ GF_VALIDATE_OR_GOTO("leases", lease_ctx, out);
+ GF_VALIDATE_OR_GOTO("leases", lease, out);
+ GF_VALIDATE_OR_GOTO("leases", inode, out);
+
+ if (lease_ctx->recall_in_progress) {
+ gf_msg_debug(this->name, 0,
+ "Recall in progress, hence "
+ "failing the lease request");
+ grant = _gf_false;
+ goto out;
+ }
+
+ if (lease_ctx->blocked_fops_resuming) {
+ gf_msg_debug(this->name, 0,
+ "Previously blocked fops resuming, hence "
+ "failing the lease request");
+ grant = _gf_false;
+ goto out;
+ }
+
+ LOCK(&inode->lock);
+ {
+ list_for_each_entry(iter_fd, &inode->fd_list, inode_list)
+ {
+ ret = fd_ctx_get(iter_fd, this, &ctx);
+ if (ret < 0) {
+ grant = _gf_false;
+ UNLOCK(&inode->lock);
+ gf_msg(this->name, GF_LOG_ERROR, 0, LEASE_MSG_INVAL_FD_CTX,
+ "Unable to get fd ctx");
+ goto out;
+ }
+ fd_ctx = (lease_fd_ctx_t *)(long)ctx;
+
+ /* Check for open fd conflict, note that open fds from
+ * the same lease id is not checked for conflict, as it is
+ * lease id based lease.
+ */
+ if (fd_ctx->client_uid != NULL &&
+ !__is_same_lease_id(fd_ctx->lease_id, lease->lease_id)) {
+ fd_count++;
+ flags |= iter_fd->flags;
+ }
+ }
+ }
+ UNLOCK(&inode->lock);
+
+ gf_msg_debug(this->name, 0, "open fd count:%d flags:%d", fd_count, flags);
+
+ __dump_leases_info(this, lease_ctx);
+
+ switch (lease->lease_type) {
+ case GF_RD_LEASE:
+ /* check open fd conflict */
+ if ((fd_count > 0) && ((flags & O_WRONLY) || (flags & O_RDWR))) {
+ grant = _gf_false;
+ break;
+ }
+
+ /* check for conflict with existing leases */
+ if (lease_ctx->lease_type == NONE ||
+ lease_ctx->lease_type == GF_RD_LEASE ||
+ !(__another_lease_found(lease_ctx, lease->lease_id)))
+ grant = _gf_true;
+ else
+ grant = _gf_false;
+ break;
+
+ case GF_RW_LEASE:
+ /* check open fd conflict; conflict if there are any fds open
+ * other than the client on which the lease is requested. */
+ if (fd_count > 0) {
+ grant = _gf_false;
+ break;
+ }
+
+ /* check existing lease conflict */
+ if (lease_ctx->lease_type == NONE ||
+ !(__another_lease_found(lease_ctx, lease->lease_id)))
+ grant = _gf_true;
+ else
+ grant = _gf_false;
+ break;
+
+ default:
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, LEASE_MSG_INVAL_LEASE_TYPE,
+ "Invalid lease type specified");
+ break;
+ }
+out:
+ return grant;
+}
+
+static void
+do_blocked_fops(xlator_t *this, lease_inode_ctx_t *lease_ctx)
+{
+ struct list_head wind_list;
+ fop_stub_t *blk_fop = NULL;
+ fop_stub_t *tmp = NULL;
+
+ INIT_LIST_HEAD(&wind_list);
+
+ pthread_mutex_lock(&lease_ctx->lock);
+ {
+ if (!lease_ctx->blocked_fops_resuming) {
+ /* lease_ctx->blocked_fops_resuming will be set
+ * only when the last lease is released. That
+ * is when we need to resume blocked fops and unref
+ * the inode taken in __add_lease (when lease_cnt == 1).
+ * Return otherwise.
+ */
+ pthread_mutex_unlock(&lease_ctx->lock);
+ return;
+ }
+
+ list_for_each_entry_safe(blk_fop, tmp, &lease_ctx->blocked_list, list)
+ {
+ list_del_init(&blk_fop->list);
+ list_add_tail(&blk_fop->list, &wind_list);
+ }
+ }
+ pthread_mutex_unlock(&lease_ctx->lock);
+
+ gf_msg_trace(this->name, 0, "Executing the blocked stubs on gfid(%s)",
+ uuid_utoa(lease_ctx->inode->gfid));
+ list_for_each_entry_safe(blk_fop, tmp, &wind_list, list)
+ {
+ list_del_init(&blk_fop->list);
+ gf_msg_trace(this->name, 0, "Executing fop:%d", blk_fop->stub->fop);
+ call_resume(blk_fop->stub);
+ GF_FREE(blk_fop);
+ }
+
+ pthread_mutex_lock(&lease_ctx->lock);
+ {
+ lease_ctx->lease_type = NONE;
+ /* unref the inode taken in __add_lease
+ * (when lease_cnt == 1) */
+ lease_ctx->blocked_fops_resuming = _gf_false;
+ inode_unref(lease_ctx->inode);
+ lease_ctx->inode = NULL;
+ }
+ pthread_mutex_unlock(&lease_ctx->lock);
+
+ return;
+}
+
+void
+recall_lease_timer_handler(struct gf_tw_timer_list *timer, void *data,
+ unsigned long calltime)
+{
+ inode_t *inode = NULL;
+ lease_inode_t *lease_inode = NULL;
+ leases_private_t *priv = NULL;
+ lease_timer_data_t *timer_data = NULL;
+
+ timer_data = data;
+
+ priv = timer_data->this->private;
+ inode = timer_data->inode;
+ lease_inode = new_lease_inode(inode);
+ if (!lease_inode) {
+ errno = ENOMEM;
+ goto out;
+ }
+ pthread_mutex_lock(&priv->mutex);
+ {
+ list_add_tail(&lease_inode->list, &priv->recall_list);
+ pthread_cond_broadcast(&priv->cond);
+ }
+ pthread_mutex_unlock(&priv->mutex);
+out:
+ /* unref the inode_ref taken by timer_data in __recall_lease */
+ inode_unref(timer_data->inode);
+
+ GF_FREE(timer);
+}
+
+static void
+__recall_lease(xlator_t *this, lease_inode_ctx_t *lease_ctx)
+{
+ lease_id_entry_t *lease_entry = NULL;
+ lease_id_entry_t *tmp = NULL;
+ struct gf_upcall up_req = {
+ 0,
+ };
+ struct gf_upcall_recall_lease recall_req = {
+ 0,
+ };
+ int notify_ret = -1;
+ struct gf_tw_timer_list *timer = NULL;
+ leases_private_t *priv = NULL;
+ lease_timer_data_t *timer_data = NULL;
+ time_t recall_time;
+
+ if (lease_ctx->recall_in_progress) {
+ gf_msg_debug(this->name, 0,
+ "Lease recall is already in "
+ "progress, hence not sending another recall");
+ goto out;
+ }
+
+ priv = this->private;
+ recall_time = gf_time();
+ list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list,
+ lease_id_list)
+ {
+ gf_uuid_copy(up_req.gfid, lease_ctx->inode->gfid);
+ up_req.client_uid = lease_entry->client_uid;
+ up_req.event_type = GF_UPCALL_RECALL_LEASE;
+ up_req.data = &recall_req;
+
+ notify_ret = this->notify(this, GF_EVENT_UPCALL, &up_req);
+ if (notify_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, LEASE_MSG_RECALL_FAIL,
+ "Recall notification to client: %s failed",
+ lease_entry->client_uid);
+ /* Do not return from here, continue registering the timer,
+ this is required mostly o keep replicas in sync*/
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Recall lease (all)"
+ "notification sent to client %s",
+ lease_entry->client_uid);
+ }
+
+ lease_ctx->recall_in_progress = _gf_true;
+ lease_entry->recall_time = recall_time;
+ }
+ timer = GF_MALLOC(sizeof(*timer), gf_common_mt_tw_timer_list);
+ if (!timer) {
+ goto out;
+ }
+ timer_data = GF_MALLOC(sizeof(lease_timer_data_t),
+ gf_leases_mt_timer_data_t);
+ if (!timer_data) {
+ GF_FREE(timer);
+ goto out;
+ }
+
+ timer_data->inode = inode_ref(lease_ctx->inode);
+ timer_data->this = this;
+ timer->data = timer_data;
+
+ INIT_LIST_HEAD(&timer->entry);
+ timer->expires = get_recall_lease_timeout(this);
+ timer->function = recall_lease_timer_handler;
+ lease_ctx->timer = timer;
+ gf_tw_add_timer(priv->timer_wheel, timer);
+ gf_msg_trace(this->name, 0,
+ "Registering timer "
+ "%p, after "
+ "sending recall",
+ timer);
+out:
+ return;
+}
+
+/* ret = 0; STACK_UNWIND Success
+ * ret = -1; STACK_UNWIND failure
+ */
+int
+process_lease_req(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ struct gf_lease *lease)
+{
+ int ret = 0;
+ char *client_uid = NULL;
+ lease_inode_ctx_t *lease_ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO("leases", frame, out);
+ GF_VALIDATE_OR_GOTO("leases", this, out);
+ GF_VALIDATE_OR_GOTO("leases", inode, out);
+ GF_VALIDATE_OR_GOTO("leases", lease, out);
+
+ client_uid = frame->root->client->client_uid;
+
+ if (!is_valid_lease_id(lease->lease_id)) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, LEASE_MSG_INVAL_LEASE_ID,
+ "Invalid lease id, from"
+ "client:%s",
+ client_uid);
+ ret = -EINVAL;
+ errno = EINVAL;
+ goto out;
+ }
+
+ lease_ctx = lease_ctx_get(inode, this);
+ if (!lease_ctx) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM,
+ "Unable to create/get inode ctx, "
+ "inode:%p",
+ inode);
+ ret = -ENOMEM;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0,
+ "Lease request from client: %s, "
+ "lease type:%d, lease cmd:%d, lease ID:%s, gfid:%s",
+ client_uid, lease->lease_type, lease->cmd,
+ leaseid_utoa(lease->lease_id), uuid_utoa(inode->gfid));
+
+ pthread_mutex_lock(&lease_ctx->lock);
+ {
+ switch (lease->cmd) {
+ case GF_GET_LEASE:
+ lease->lease_type = lease_ctx->lease_type;
+ gf_msg_debug(this->name, 0,
+ "Get lease, existing lease"
+ "type: %d",
+ lease_ctx->lease_type);
+ /*TODO:Should it consider lease id or client_uid?*/
+ break;
+
+ case GF_SET_LEASE:
+ if (__is_lease_grantable(this, lease_ctx, lease, inode)) {
+ __add_lease(frame, inode, lease_ctx, client_uid, lease);
+ ret = 0;
+ } else {
+ gf_msg_debug(this->name, GF_LOG_DEBUG,
+ "Not granting the conflicting lease"
+ " request from %s on gfid(%s)",
+ client_uid, uuid_utoa(inode->gfid));
+ __recall_lease(this, lease_ctx);
+ ret = -1;
+ }
+ break;
+ case GF_UNLK_LEASE:
+ ret = __remove_lease(this, inode, lease_ctx, client_uid, lease);
+ if ((ret >= 0) && (lease_ctx->lease_cnt == 0)) {
+ pthread_mutex_unlock(&lease_ctx->lock);
+ goto unblock;
+ }
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ }
+ pthread_mutex_unlock(&lease_ctx->lock);
+
+ return ret;
+
+unblock:
+ do_blocked_fops(this, lease_ctx);
+out:
+ return ret;
+}
+
+/* ret = 1 conflict
+ * ret = 0 no conflict
+ */
+gf_boolean_t
+__check_lease_conflict(call_frame_t *frame, lease_inode_ctx_t *lease_ctx,
+ const char *lease_id, gf_boolean_t is_write)
+{
+ gf_lease_types_t lease_type = {
+ 0,
+ };
+ gf_boolean_t conflicts = _gf_false;
+ lease_id_entry_t *lease_entry = NULL;
+
+ GF_VALIDATE_OR_GOTO("leases", frame, out);
+ GF_VALIDATE_OR_GOTO("leases", lease_ctx, out);
+
+ lease_type = lease_ctx->lease_type;
+
+ /* If the fop is rename or unlink conflict the lease even if its
+ * from the same client??
+ */
+ if ((frame->root->op == GF_FOP_RENAME) ||
+ (frame->root->op == GF_FOP_UNLINK)) {
+ conflicts = _gf_true;
+ goto recall;
+ }
+
+ /* As internal fops are used to maintain data integrity but do not
+ * make modififications to the client data, no need to conflict with
+ * them.
+ *
+ * @todo: like for locks, even lease state has to be handled by
+ * rebalance or self-heal daemon process. */
+ if (frame->root->pid < 0) {
+ conflicts = _gf_false;
+ goto recall;
+ }
+
+ /* If lease_id is not sent, set conflicts = true if there is
+ * an existing lease */
+ if (!lease_id && (lease_ctx->lease_cnt > 0)) {
+ conflicts = _gf_true;
+ goto recall;
+ }
+
+ switch (lease_type) {
+ case (GF_RW_LEASE | GF_RD_LEASE):
+ case GF_RW_LEASE:
+ lease_entry = __get_lease_id_entry(lease_ctx, lease_id);
+ if (lease_entry && (lease_entry->lease_type & GF_RW_LEASE))
+ conflicts = _gf_false;
+ else
+ conflicts = _gf_true;
+ break;
+ case GF_RD_LEASE:
+ if (is_write && __another_lease_found(lease_ctx, lease_id))
+ conflicts = _gf_true;
+ else
+ conflicts = _gf_false;
+ break;
+ default:
+ break;
+ }
+
+recall:
+ /* If there is a conflict found and recall is not already sent to all
+ * the clients, then send recall to each of the client holding lease.
+ */
+ if (conflicts)
+ __recall_lease(frame->this, lease_ctx);
+out:
+ return conflicts;
+}
+
+/* Return values:
+ * -1 : error, unwind the fop
+ * WIND_FOP: No conflict, wind the fop
+ * BLOCK_FOP: Found a conflicting lease, block the fop
+ */
+int
+check_lease_conflict(call_frame_t *frame, inode_t *inode, const char *lease_id,
+ uint32_t fop_flags)
+{
+ lease_inode_ctx_t *lease_ctx = NULL;
+ gf_boolean_t is_blocking_fop = _gf_false;
+ gf_boolean_t is_write_fop = _gf_false;
+ gf_boolean_t conflicts = _gf_false;
+ int ret = WIND_FOP;
+
+ lease_ctx = lease_ctx_get(inode, frame->this);
+ if (!lease_ctx) {
+ gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM,
+ "Unable to create/get inode ctx");
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ is_blocking_fop = ((fop_flags & BLOCKING_FOP) != 0);
+ is_write_fop = ((fop_flags & DATA_MODIFY_FOP) != 0);
+
+ pthread_mutex_lock(&lease_ctx->lock);
+ {
+ if (lease_ctx->lease_type == NONE) {
+ pthread_mutex_unlock(&lease_ctx->lock);
+ gf_msg_debug(frame->this->name, 0,
+ "No leases found continuing with the"
+ " fop:%s",
+ gf_fop_list[frame->root->op]);
+ ret = WIND_FOP;
+ goto out;
+ }
+ conflicts = __check_lease_conflict(frame, lease_ctx, lease_id,
+ is_write_fop);
+ if (conflicts) {
+ if (is_blocking_fop) {
+ gf_msg_debug(frame->this->name, 0,
+ "Fop: %s "
+ "conflicting existing "
+ "lease: %d, blocking the"
+ "fop",
+ gf_fop_list[frame->root->op],
+ lease_ctx->lease_type);
+ ret = BLOCK_FOP;
+ } else {
+ gf_msg_debug(frame->this->name, 0,
+ "Fop: %s "
+ "conflicting existing "
+ "lease: %d, sending "
+ "EAGAIN",
+ gf_fop_list[frame->root->op],
+ lease_ctx->lease_type);
+ errno = EAGAIN;
+ ret = -1;
+ }
+ }
+ }
+ pthread_mutex_unlock(&lease_ctx->lock);
+out:
+ return ret;
+}
+
+static int
+remove_clnt_leases(const char *client_uid, inode_t *inode, xlator_t *this)
+{
+ lease_inode_ctx_t *lease_ctx = NULL;
+ lease_id_entry_t *lease_entry = NULL;
+ lease_id_entry_t *tmp = NULL;
+ int ret = 0;
+ int i = 0;
+
+ lease_ctx = lease_ctx_get(inode, this);
+ if (!lease_ctx) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_INVAL_INODE_CTX,
+ "Unable to create/get inode ctx");
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ pthread_mutex_lock(&lease_ctx->lock);
+ {
+ list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list,
+ lease_id_list)
+ {
+ if (strcmp(client_uid, lease_entry->client_uid) == 0) {
+ for (i = 0; i < GF_LEASE_MAX_TYPE; i++) {
+ lease_ctx->lease_type_cnt[i] -= lease_entry
+ ->lease_type_cnt[i];
+ }
+ lease_ctx->lease_cnt -= lease_entry->lease_cnt;
+ __destroy_lease_id_entry(lease_entry);
+ if (lease_ctx->lease_cnt == 0) {
+ lease_ctx->blocked_fops_resuming = _gf_true;
+ pthread_mutex_unlock(&lease_ctx->lock);
+ goto unblock;
+ }
+ }
+ }
+ }
+ pthread_mutex_unlock(&lease_ctx->lock);
+out:
+ return ret;
+
+unblock:
+ do_blocked_fops(this, lease_ctx);
+ return ret;
+}
+
+int
+cleanup_client_leases(xlator_t *this, const char *client_uid)
+{
+ lease_client_t *clnt = NULL;
+ lease_client_t *tmp = NULL;
+ struct list_head cleanup_list = {
+ 0,
+ };
+ lease_inode_t *l_inode = NULL;
+ lease_inode_t *tmp1 = NULL;
+ leases_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ if (!priv) {
+ ret = -1;
+ errno = EINVAL;
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&cleanup_list);
+ pthread_mutex_lock(&priv->mutex);
+ {
+ list_for_each_entry_safe(clnt, tmp, &priv->client_list, client_list)
+ {
+ if ((strcmp(clnt->client_uid, client_uid) == 0)) {
+ list_for_each_entry_safe(l_inode, tmp1, &clnt->inode_list, list)
+ {
+ list_del_init(&l_inode->list);
+ list_add_tail(&l_inode->list, &cleanup_list);
+ }
+ __destroy_lease_client(clnt);
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock(&priv->mutex);
+
+ l_inode = tmp1 = NULL;
+ list_for_each_entry_safe(l_inode, tmp1, &cleanup_list, list)
+ {
+ remove_clnt_leases(client_uid, l_inode->inode, this);
+ __destroy_lease_inode(l_inode);
+ }
+out:
+ return ret;
+}
+
+static void
+__remove_all_leases(xlator_t *this, lease_inode_ctx_t *lease_ctx)
+{
+ int i = 0;
+ lease_id_entry_t *lease_entry = NULL;
+ lease_id_entry_t *tmp = NULL;
+
+ if (lease_ctx->lease_cnt == 0) {
+ /* No leases to remove. Return */
+ return;
+ }
+ __dump_leases_info(this, lease_ctx);
+
+ list_for_each_entry_safe(lease_entry, tmp, &lease_ctx->lease_id_list,
+ lease_id_list)
+ {
+ lease_entry->lease_cnt = 0;
+ remove_from_clnt_list(this, lease_entry->client_uid, lease_ctx->inode);
+ __destroy_lease_id_entry(lease_entry);
+ }
+ INIT_LIST_HEAD(&lease_ctx->lease_id_list);
+ for (i = 0; i <= GF_LEASE_MAX_TYPE; i++)
+ lease_ctx->lease_type_cnt[i] = 0;
+ lease_ctx->lease_type = 0;
+ lease_ctx->lease_cnt = 0;
+ lease_ctx->recall_in_progress = _gf_false;
+ lease_ctx->timer = NULL;
+ lease_ctx->blocked_fops_resuming = _gf_true;
+
+ /* TODO:
+ * - Mark the corresponding fd bad. Could be done on client side
+ * as a result of recall
+ * - Free the lease_ctx
+ */
+ return;
+}
+
+static int
+remove_all_leases(xlator_t *this, inode_t *inode)
+{
+ lease_inode_ctx_t *lease_ctx = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("leases", inode, out);
+
+ lease_ctx = lease_ctx_get(inode, this);
+ if (!lease_ctx) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_INVAL_INODE_CTX,
+ "Unable to create/get inode ctx");
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ pthread_mutex_lock(&lease_ctx->lock);
+ {
+ __remove_all_leases(this, lease_ctx);
+ }
+ pthread_mutex_unlock(&lease_ctx->lock);
+
+ do_blocked_fops(this, lease_ctx);
+out:
+ return ret;
+}
+
+void *
+expired_recall_cleanup(void *data)
+{
+ struct timespec sleep_till = {
+ 0,
+ };
+ struct list_head recall_cleanup_list;
+ lease_inode_t *recall_entry = NULL;
+ lease_inode_t *tmp = NULL;
+ leases_private_t *priv = NULL;
+ xlator_t *this = NULL;
+ time_t time_now;
+
+ GF_VALIDATE_OR_GOTO("leases", data, out);
+
+ this = data;
+ priv = this->private;
+
+ gf_msg_debug(this->name, 0, "Started the expired_recall_cleanup thread");
+
+ while (1) {
+ time_now = gf_time();
+ pthread_mutex_lock(&priv->mutex);
+ {
+ if (priv->fini) {
+ pthread_mutex_unlock(&priv->mutex);
+ goto out;
+ }
+ INIT_LIST_HEAD(&recall_cleanup_list);
+ if (list_empty(&priv->recall_list)) {
+ sleep_till.tv_sec = time_now + 600;
+ pthread_cond_timedwait(&priv->cond, &priv->mutex, &sleep_till);
+ }
+ if (!list_empty(&priv->recall_list)) {
+ gf_msg_debug(this->name, 0, "Found expired recalls");
+ list_for_each_entry_safe(recall_entry, tmp, &priv->recall_list,
+ list)
+ {
+ list_del_init(&recall_entry->list);
+ list_add_tail(&recall_entry->list, &recall_cleanup_list);
+ }
+ }
+ }
+ pthread_mutex_unlock(&priv->mutex);
+
+ recall_entry = tmp = NULL;
+ list_for_each_entry_safe(recall_entry, tmp, &recall_cleanup_list, list)
+ {
+ gf_msg_debug(this->name, 0,
+ "Recall lease was sent on"
+ " inode:%p, recall timer has expired"
+ " and clients haven't unlocked the lease"
+ " hence cleaning up leases on the inode",
+ recall_entry->inode);
+ remove_all_leases(this, recall_entry->inode);
+ /* no need to take priv->mutex lock as this entry
+ * reference is removed from global recall list. */
+ __destroy_lease_inode(recall_entry);
+ }
+ }
+
+out:
+ return NULL;
+}
diff --git a/xlators/features/leases/src/leases-mem-types.h b/xlators/features/leases/src/leases-mem-types.h
new file mode 100644
index 00000000000..25664b44156
--- /dev/null
+++ b/xlators/features/leases/src/leases-mem-types.h
@@ -0,0 +1,27 @@
+/*
+ Copyright (c) 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __LEASES_MEM_TYPES_H__
+#define __LEASES_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_leases_mem_types_ {
+ gf_leases_mt_private_t = gf_common_mt_end + 1,
+ gf_leases_mt_lease_client_t,
+ gf_leases_mt_lease_inode_t,
+ gf_leases_mt_fd_ctx_t,
+ gf_leases_mt_lease_inode_ctx_t,
+ gf_leases_mt_lease_id_entry_t,
+ gf_leases_mt_fop_stub_t,
+ gf_leases_mt_timer_data_t,
+ gf_leases_mt_end
+};
+#endif
diff --git a/xlators/features/leases/src/leases-messages.h b/xlators/features/leases/src/leases-messages.h
new file mode 100644
index 00000000000..da696b832de
--- /dev/null
+++ b/xlators/features/leases/src/leases-messages.h
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _LEASES_MESSAGES_H_
+#define _LEASES_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(LEASES, LEASE_MSG_NO_MEM, LEASE_MSG_RECALL_FAIL,
+ LEASE_MSG_INVAL_LEASE_ID, LEASE_MSG_INVAL_UNLK_LEASE,
+ LEASE_MSG_INVAL_INODE_CTX, LEASE_MSG_NOT_ENABLED,
+ LEASE_MSG_NO_TIMER_WHEEL, LEASE_MSG_CLNT_NOTFOUND,
+ LEASE_MSG_INODE_NOTFOUND, LEASE_MSG_INVAL_FD_CTX,
+ LEASE_MSG_INVAL_LEASE_TYPE);
+
+#endif /* !_LEASES_MESSAGES_H_ */
diff --git a/xlators/features/leases/src/leases.c b/xlators/features/leases/src/leases.c
new file mode 100644
index 00000000000..04bee50ba3f
--- /dev/null
+++ b/xlators/features/leases/src/leases.c
@@ -0,0 +1,1168 @@
+/*
+ Copyright (c) 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include "leases.h"
+
+int32_t
+leases_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ int32_t op_errno = EINVAL;
+ int ret = 0;
+ lease_fd_ctx_t *fd_ctx = NULL;
+ char *lease_id = NULL;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ fd_ctx = GF_CALLOC(1, sizeof(*fd_ctx), gf_leases_mt_fd_ctx_t);
+ if (!fd_ctx) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ fd_ctx->client_uid = gf_strdup(frame->root->client->client_uid);
+ if (!fd_ctx->client_uid) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ GET_FLAGS(frame->root->op, flags);
+ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
+ if (lease_id != NULL)
+ memcpy(fd_ctx->lease_id, lease_id, LEASE_ID_SIZE);
+ else
+ memset(fd_ctx->lease_id, 0, LEASE_ID_SIZE);
+
+ ret = fd_ctx_set(fd, this, (uint64_t)(uintptr_t)fd_ctx);
+ if (ret) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP(fd->inode, open, frame, this, loc, flags, fd, xdata);
+ return 0;
+
+out:
+ STACK_WIND(frame, leases_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+
+err:
+ if (fd_ctx) {
+ GF_FREE(fd_ctx->client_uid);
+ GF_FREE(fd_ctx);
+ }
+
+ STACK_UNWIND_STRICT(open, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+leases_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS(frame->root->op, fd->flags);
+
+ ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP(fd->inode, writev, frame, this, fd, vector, count, off,
+ flags, iobref, xdata);
+ return 0;
+
+out:
+ STACK_WIND(frame, leases_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, off, flags,
+ iobref, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(writev, frame, -1, errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iovec *vector, int count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS(frame->root->op, fd->flags);
+
+ ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP(fd->inode, readv, frame, this, fd, size, offset, flags,
+ xdata);
+ return 0;
+
+out:
+ STACK_WIND(frame, leases_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(readv, frame, -1, errno, NULL, 0, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(lk, frame, op_ret, op_errno, lock, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS_LK(cmd, flock->l_type, fd->flags);
+
+ ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP(fd->inode, lk, frame, this, fd, cmd, flock, xdata);
+ return 0;
+
+out:
+ STACK_WIND(frame, leases_lk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk, fd, cmd, flock, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(lk, frame, -1, errno, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_lease(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct gf_lease *lease, dict_t *xdata)
+{
+ int32_t op_errno = 0;
+ int ret = 0;
+ struct gf_lease nullease = {
+ 0,
+ };
+ int32_t op_ret = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ ret = process_lease_req(frame, this, loc->inode, lease);
+ if (ret < 0) {
+ op_errno = -ret;
+ op_ret = -1;
+ }
+ goto unwind;
+
+out:
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, LEASE_MSG_NOT_ENABLED,
+ "\"features/leases\" translator is not enabled. "
+ "You need to enable it for proper functioning of your "
+ "application");
+ op_errno = ENOSYS;
+ op_ret = -1;
+
+unwind:
+ STACK_UNWIND_STRICT(lease, frame, op_ret, op_errno,
+ (op_errno == ENOSYS) ? &nullease : lease, xdata);
+ return 0;
+}
+
+int32_t
+leases_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+leases_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS(frame->root->op, 0);
+
+ ret = check_lease_conflict(frame, loc->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP(loc->inode, truncate, frame, this, loc, offset, xdata);
+ return 0;
+
+out:
+ STACK_WIND(frame, leases_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(truncate, frame, -1, errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+leases_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS(frame->root->op, 0);
+
+ ret = check_lease_conflict(frame, loc->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP(loc->inode, setattr, frame, this, loc, stbuf, valid, xdata);
+ return 0;
+
+out:
+ STACK_WIND(frame, leases_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(setattr, frame, -1, errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, stbuf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ /* should the lease be also checked for newloc */
+ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS(frame->root->op, 0);
+
+ ret = check_lease_conflict(frame, oldloc->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP(oldloc->inode, rename, frame, this, oldloc, newloc, xdata);
+ return 0;
+
+out:
+ STACK_WIND(frame, leases_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(rename, frame, -1, errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+int32_t
+leases_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+
+ return 0;
+}
+
+int32_t
+leases_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS(frame->root->op, 0);
+
+ ret = check_lease_conflict(frame, loc->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP(loc->inode, unlink, frame, this, loc, xflag, xdata);
+ return 0;
+
+out:
+ STACK_WIND(frame, leases_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(unlink, frame, -1, errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS(frame->root->op, 0);
+
+ ret = check_lease_conflict(frame, oldloc->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP(oldloc->inode, link, frame, this, oldloc, newloc, xdata);
+ return 0;
+out:
+ STACK_WIND(frame, leases_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(link, frame, -1, errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, stbuf,
+ preparent, postparent, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS(frame->root->op, flags);
+
+ ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP(fd->inode, create, frame, this, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+
+out:
+ STACK_WIND(frame, leases_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(create, frame, -1, errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+int32_t
+leases_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+leases_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS(frame->root->op, fd->flags);
+
+ ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP(fd->inode, fsync, frame, this, fd, flags, xdata);
+ return 0;
+
+out:
+ STACK_WIND(frame, leases_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT(fsync, frame, -1, errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
+
+int32_t
+leases_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS(frame->root->op, 0); /* TODO:fd->flags?*/
+
+ ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP(fd->inode, ftruncate, frame, this, fd, offset, xdata);
+ return 0;
+
+out:
+ STACK_WIND(frame, leases_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(ftruncate, frame, -1, errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+ return 0;
+}
+
+int32_t
+leases_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS(frame->root->op, fd->flags);
+
+ ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP(fd->inode, fsetattr, frame, this, fd, stbuf, valid, xdata);
+ return 0;
+
+out:
+ STACK_WIND(frame, leases_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(fsetattr, frame, -1, errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, pre, post, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS(frame->root->op, fd->flags);
+
+ ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP(fd->inode, fallocate, frame, this, fd, mode, offset, len,
+ xdata);
+ return 0;
+
+out:
+ STACK_WIND(frame, leases_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(fallocate, frame, -1, errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, pre, post, xdata);
+
+ return 0;
+}
+
+int32_t
+leases_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS(frame->root->op, fd->flags);
+
+ ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP(fd->inode, discard, frame, this, fd, offset, len, xdata);
+ return 0;
+
+out:
+ STACK_WIND(frame, leases_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(discard, frame, -1, errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+leases_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, pre, post, xdata);
+
+ return 0;
+}
+
+int
+leases_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS(frame->root->op, fd->flags);
+
+ ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP(fd->inode, zerofill, frame, this, fd, offset, len, xdata);
+ return 0;
+
+out:
+ STACK_WIND(frame, leases_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(zerofill, frame, -1, errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+leases_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(flush, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+int
+leases_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ uint32_t fop_flags = 0;
+ char *lease_id = NULL;
+ int ret = 0;
+ lease_fd_ctx_t *fd_ctx = NULL;
+ uint64_t ctx = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+ EXIT_IF_INTERNAL_FOP(frame, xdata, out);
+
+ GET_LEASE_ID(xdata, lease_id, frame->root->client->client_uid);
+ GET_FLAGS(frame->root->op, fd->flags);
+
+ ret = check_lease_conflict(frame, fd->inode, lease_id, fop_flags);
+ if (ret < 0)
+ goto err;
+ else if (ret == BLOCK_FOP)
+ goto block;
+ else if (ret == WIND_FOP)
+ goto out;
+
+block:
+ LEASE_BLOCK_FOP(fd->inode, flush, frame, this, fd, xdata);
+ return 0;
+
+out:
+ /* *
+ * currently release is not called after the close fop from the
+ * application. Hence lease fd ctx is reset on here.
+ * This is actually not the right way, since flush can be called
+ * not only from the close op.
+ * TODO :
+ * - Either identify the flush is called from close call on fd from
+ * from the application.
+ * OR
+ * - Find why release is not called post the last close call
+ */
+ ret = fd_ctx_get(fd, this, &ctx);
+ if (ret == 0) {
+ fd_ctx = (lease_fd_ctx_t *)(long)ctx;
+ if (fd_ctx->client_uid) {
+ GF_FREE(fd_ctx->client_uid);
+ fd_ctx->client_uid = NULL;
+ }
+ memset(fd_ctx->lease_id, 0, LEASE_ID_SIZE);
+ }
+ STACK_WIND(frame, leases_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(create, frame, -1, errno, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_leases_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM,
+ "mem account init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+static int
+leases_init_priv(xlator_t *this)
+{
+ int ret = 0;
+ leases_private_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ if (!priv->timer_wheel) {
+ priv->timer_wheel = glusterfs_ctx_tw_get(this->ctx);
+ if (!priv->timer_wheel) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (!priv->inited_recall_thr) {
+ ret = gf_thread_create(&priv->recall_thr, NULL, expired_recall_cleanup,
+ this, "leasercl");
+ if (!ret)
+ priv->inited_recall_thr = _gf_true;
+ }
+
+out:
+ return ret;
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ leases_private_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ /* TODO: In case of reconfigure, if its enabling the leases
+ * its not an issue, but if its disabling the leases, there
+ * is more to it, like recall all the existing leases, wait
+ * for unlock of all the leases etc., hence not supporting the
+ * reconfigure for now.
+
+ GF_OPTION_RECONF ("leases", priv->leases_enabled,
+ options, bool, out);
+
+ if (priv->leases_enabled) {
+ ret = leases_init_priv (this);
+ if (ret)
+ goto out;
+ }
+ */
+
+ GF_OPTION_RECONF("lease-lock-recall-timeout", priv->recall_lease_timeout,
+ options, int32, out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+init(xlator_t *this)
+{
+ int ret = -1;
+ leases_private_t *priv = NULL;
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_leases_mt_private_t);
+ if (!priv) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM,
+ "Leases init failed");
+ goto out;
+ }
+
+ GF_OPTION_INIT("leases", priv->leases_enabled, bool, out);
+ GF_OPTION_INIT("lease-lock-recall-timeout", priv->recall_lease_timeout,
+ int32, out);
+ pthread_mutex_init(&priv->mutex, NULL);
+ INIT_LIST_HEAD(&priv->client_list);
+ INIT_LIST_HEAD(&priv->recall_list);
+
+ this->private = priv;
+
+ if (priv->leases_enabled) {
+ ret = leases_init_priv(this);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (ret) {
+ GF_FREE(priv);
+ this->private = NULL;
+ }
+
+ return ret;
+}
+
+void
+fini(xlator_t *this)
+{
+ leases_private_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv) {
+ return;
+ }
+ this->private = NULL;
+
+ priv->fini = _gf_true;
+ pthread_cond_broadcast(&priv->cond);
+ if (priv->recall_thr) {
+ gf_thread_cleanup_xint(priv->recall_thr);
+ priv->recall_thr = 0;
+ priv->inited_recall_thr = _gf_false;
+ }
+
+ if (priv->timer_wheel) {
+ glusterfs_ctx_tw_put(this->ctx);
+ }
+
+ GF_FREE(priv);
+ return;
+}
+
+static int
+leases_forget(xlator_t *this, inode_t *inode)
+{
+ /* TODO:leases_cleanup_inode_ctx (this, inode); */
+ return 0;
+}
+
+static int
+leases_release(xlator_t *this, fd_t *fd)
+{
+ int ret = -1;
+ uint64_t tmp = 0;
+ lease_fd_ctx_t *fd_ctx = NULL;
+
+ if (fd == NULL) {
+ goto out;
+ }
+
+ gf_log(this->name, GF_LOG_TRACE, "Releasing all leases with fd %p", fd);
+
+ ret = fd_ctx_del(fd, this, &tmp);
+ if (ret) {
+ gf_log(this->name, GF_LOG_DEBUG, "Could not get fdctx");
+ goto out;
+ }
+
+ fd_ctx = (lease_fd_ctx_t *)(long)tmp;
+ if (fd_ctx)
+ GF_FREE(fd_ctx);
+out:
+ return ret;
+}
+
+static int
+leases_clnt_disconnect_cbk(xlator_t *this, client_t *client)
+{
+ int ret = 0;
+
+ EXIT_IF_LEASES_OFF(this, out);
+
+ ret = cleanup_client_leases(this, client->client_uid);
+out:
+ return ret;
+}
+
+struct xlator_fops fops = {
+ /* Metadata modifying fops */
+ .fsetattr = leases_fsetattr,
+ .setattr = leases_setattr,
+
+ /* File Data reading fops */
+ .open = leases_open,
+ .readv = leases_readv,
+
+ /* File Data modifying fops */
+ .truncate = leases_truncate,
+ .ftruncate = leases_ftruncate,
+ .writev = leases_writev,
+ .zerofill = leases_zerofill,
+ .fallocate = leases_fallocate,
+ .discard = leases_discard,
+ .lk = leases_lk,
+ .fsync = leases_fsync,
+ .flush = leases_flush,
+ .lease = leases_lease,
+
+ /* Directory Data modifying fops */
+ .create = leases_create,
+ .rename = leases_rename,
+ .unlink = leases_unlink,
+ .link = leases_link,
+
+#ifdef NOT_SUPPORTED
+ /* internal lk fops */
+ .inodelk = leases_inodelk,
+ .finodelk = leases_finodelk,
+ .entrylk = leases_entrylk,
+ .fentrylk = leases_fentrylk,
+
+ /* Internal special fops*/
+ .xattrop = leases_xattrop,
+ .fxattrop = leases_fxattrop,
+#endif
+};
+
+struct xlator_cbks cbks = {
+ .forget = leases_forget,
+ .release = leases_release,
+ .client_disconnect = leases_clnt_disconnect_cbk,
+};
+
+struct volume_options options[] = {
+ {.key = {"leases"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {GD_OP_VERSION_3_8_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .description = "When \"on\", enables leases support"},
+ {.key = {"lease-lock-recall-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = RECALL_LEASE_LK_TIMEOUT,
+ .op_version = {GD_OP_VERSION_3_8_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .description = "After 'timeout' seconds since the recall_lease"
+ " request has been sent to the client, the lease lock"
+ " will be forcefully purged by the server."},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "leases",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/leases/src/leases.h b/xlators/features/leases/src/leases.h
new file mode 100644
index 00000000000..a6e8a6824cc
--- /dev/null
+++ b/xlators/features/leases/src/leases.h
@@ -0,0 +1,259 @@
+/*
+ Copyright (c) 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _LEASES_H
+#define _LEASES_H
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <glusterfs/common-utils.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/client_t.h>
+#include <glusterfs/lkowner.h>
+#include <glusterfs/locking.h>
+#include <glusterfs/upcall-utils.h>
+#include "timer-wheel.h"
+#include "leases-mem-types.h"
+#include "leases-messages.h"
+
+/* The time period for which a client lease lock will be stored after its been
+ * recalled for the first time. */
+#define RECALL_LEASE_LK_TIMEOUT "60"
+
+#define DATA_MODIFY_FOP 0x0001
+#define BLOCKING_FOP 0x0002
+
+#define BLOCK_FOP 0x0001
+#define WIND_FOP 0x0002
+
+#define EXIT_IF_LEASES_OFF(this, label) \
+ do { \
+ if (!is_leases_enabled(this)) \
+ goto label; \
+ } while (0)
+
+#define EXIT_IF_INTERNAL_FOP(frame, xdata, label) \
+ do { \
+ if (frame->root->pid < 0) \
+ goto label; \
+ if (xdata && dict_get(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) \
+ goto label; \
+ } while (0)
+
+#define GET_LEASE_ID(xdata, lease_id, client_uid) \
+ do { \
+ int ret_val = -1; \
+ ret_val = dict_get_bin(xdata, "lease-id", (void **)&lease_id); \
+ if (ret_val) { \
+ ret_val = 0; \
+ gf_msg_debug("leases", 0, "Lease id is not set for client:%s", \
+ client_uid); \
+ } \
+ } while (0)
+
+#define GET_FLAGS(fop, fd_flags) \
+ do { \
+ if ((fd_flags & (O_WRONLY | O_RDWR)) && fop == GF_FOP_OPEN) \
+ fop_flags = DATA_MODIFY_FOP; \
+ \
+ if (fop == GF_FOP_UNLINK || fop == GF_FOP_RENAME || \
+ fop == GF_FOP_TRUNCATE || fop == GF_FOP_FTRUNCATE || \
+ fop == GF_FOP_FLUSH || fop == GF_FOP_FSYNC || \
+ fop == GF_FOP_WRITE || fop == GF_FOP_FALLOCATE || \
+ fop == GF_FOP_DISCARD || fop == GF_FOP_ZEROFILL || \
+ fop == GF_FOP_SETATTR || fop == GF_FOP_FSETATTR || \
+ fop == GF_FOP_LINK) \
+ fop_flags = DATA_MODIFY_FOP; \
+ \
+ if (!(fd_flags & (O_NONBLOCK | O_NDELAY))) \
+ fop_flags |= BLOCKING_FOP; \
+ \
+ } while (0)
+
+#define GET_FLAGS_LK(cmd, l_type, fd_flags) \
+ do { \
+ /* TODO: handle F_RESLK_LCK and other glusterfs_lk_recovery_cmds_t */ \
+ if ((cmd == F_SETLKW || cmd == F_SETLKW64 || cmd == F_SETLK || \
+ cmd == F_SETLK64) && \
+ l_type == F_WRLCK) \
+ fop_flags = DATA_MODIFY_FOP; \
+ \
+ if (fd_flags & (O_NONBLOCK | O_NDELAY) && \
+ (cmd == F_SETLKW || cmd == F_SETLKW64)) \
+ fop_flags |= BLOCKING_FOP; \
+ \
+ } while (0)
+
+#define LEASE_BLOCK_FOP(inode, fop_name, frame, this, params...) \
+ do { \
+ call_stub_t *__stub = NULL; \
+ fop_stub_t *blk_fop = NULL; \
+ lease_inode_ctx_t *lease_ctx = NULL; \
+ \
+ __stub = fop_##fop_name##_stub(frame, default_##fop_name##_resume, \
+ params); \
+ if (!__stub) { \
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM, \
+ "Unable to create stub"); \
+ ret = -ENOMEM; \
+ goto __out; \
+ } \
+ \
+ blk_fop = GF_CALLOC(1, sizeof(*blk_fop), gf_leases_mt_fop_stub_t); \
+ if (!blk_fop) { \
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM, \
+ "Unable to create lease fop stub"); \
+ ret = -ENOMEM; \
+ goto __out; \
+ } \
+ \
+ lease_ctx = lease_ctx_get(inode, this); \
+ if (!lease_ctx) { \
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM, \
+ "Unable to create/get inode ctx"); \
+ ret = -ENOMEM; \
+ goto __out; \
+ } \
+ \
+ blk_fop->stub = __stub; \
+ pthread_mutex_lock(&lease_ctx->lock); \
+ { \
+ /*TODO: If the lease is unlocked btw check lease conflict and \
+ * by now, then this fop shouldn't be add to the blocked fop \
+ * list, can use generation number for the same?*/ \
+ list_add_tail(&blk_fop->list, &lease_ctx->blocked_list); \
+ } \
+ pthread_mutex_unlock(&lease_ctx->lock); \
+ \
+ __out: \
+ if (ret < 0) { \
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, LEASE_MSG_NO_MEM, \
+ "Unable to create stub for blocking the fop:%s (%s)", \
+ gf_fop_list[frame->root->op], strerror(ENOMEM)); \
+ if (__stub != NULL) { \
+ call_stub_destroy(__stub); \
+ } \
+ GF_FREE(blk_fop); \
+ goto err; \
+ } \
+ } while (0)
+
+struct _leases_private {
+ struct list_head client_list;
+ struct list_head recall_list;
+ struct tvec_base *timer_wheel; /* timer wheel where the recall request
+ is qued and waits for unlock/expiry */
+ pthread_t recall_thr;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ int32_t recall_lease_timeout;
+ gf_boolean_t inited_recall_thr;
+ gf_boolean_t fini;
+ gf_boolean_t leases_enabled;
+
+ char _pad[1]; /* manual padding */
+};
+typedef struct _leases_private leases_private_t;
+
+struct _lease_client {
+ char *client_uid;
+ struct list_head client_list;
+ struct list_head inode_list;
+};
+typedef struct _lease_client lease_client_t;
+
+struct _lease_inode {
+ inode_t *inode;
+ struct list_head
+ list; /* This can be part of both inode_list and recall_list */
+};
+typedef struct _lease_inode lease_inode_t;
+
+struct _lease_fd_ctx {
+ char *client_uid;
+ char lease_id[LEASE_ID_SIZE];
+};
+typedef struct _lease_fd_ctx lease_fd_ctx_t;
+
+struct _lease_inode_ctx {
+ struct list_head lease_id_list; /* clients that have taken leases */
+ int lease_type_cnt[GF_LEASE_MAX_TYPE + 1];
+ uint64_t lease_cnt; /* Total number of leases on this inode */
+ uint64_t openfd_cnt; /* number of fds open */
+ struct list_head blocked_list; /* List of fops blocked until the
+ lease recall is complete */
+ inode_t *inode; /* this represents the inode on which the
+ lock was taken, required mainly during
+ disconnect cleanup */
+ struct gf_tw_timer_list *timer;
+ pthread_mutex_t lock;
+ int lease_type; /* Types of leases acquired */
+ gf_boolean_t recall_in_progress; /* if lease recall is sent on this inode */
+ gf_boolean_t blocked_fops_resuming; /* if blocked fops are being resumed */
+
+ char _pad[2]; /* manual padding */
+};
+typedef struct _lease_inode_ctx lease_inode_ctx_t;
+
+struct _lease_id_entry {
+ struct list_head lease_id_list;
+ char lease_id[LEASE_ID_SIZE];
+ char *client_uid; /* uid of the client that has
+ taken the lease */
+ int lease_type_cnt[GF_LEASE_MAX_TYPE + 1]; /* count of each lease type */
+ uint64_t lease_cnt; /* Number of leases taken under the
+ given lease id */
+ time_t recall_time; /* time @ which recall was sent */
+ int lease_type; /* Union of all the leases taken
+ under the given lease id */
+ char _pad[4]; /* manual padding */
+};
+typedef struct _lease_id_entry lease_id_entry_t;
+
+/* Required? as stub itself will have list */
+struct __fop_stub {
+ struct list_head list;
+ call_stub_t *stub;
+};
+typedef struct __fop_stub fop_stub_t;
+
+struct __lease_timer_data {
+ inode_t *inode;
+ xlator_t *this;
+};
+typedef struct __lease_timer_data lease_timer_data_t;
+
+gf_boolean_t
+is_leases_enabled(xlator_t *this);
+
+lease_inode_ctx_t *
+lease_ctx_get(inode_t *inode, xlator_t *this);
+
+int
+process_lease_req(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ struct gf_lease *lease);
+
+int
+check_lease_conflict(call_frame_t *frame, inode_t *inode, const char *lease_id,
+ uint32_t fop_flags);
+
+int
+cleanup_client_leases(xlator_t *this, const char *client_uid);
+
+void *
+expired_recall_cleanup(void *data);
+
+#endif /* _LEASES_H */
diff --git a/xlators/features/locks/src/Makefile.am b/xlators/features/locks/src/Makefile.am
index e39676826b4..0b174c19d2d 100644
--- a/xlators/features/locks/src/Makefile.am
+++ b/xlators/features/locks/src/Makefile.am
@@ -1,22 +1,29 @@
+if WITH_SERVER
xlator_LTLIBRARIES = locks.la
+endif
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-locks_la_LDFLAGS = -module -avoidversion
+locks_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
locks_la_SOURCES = common.c posix.c entrylk.c inodelk.c reservelk.c \
- clear.c
+ clear.c
+
locks_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = locks.h common.h locks-mem-types.h clear.h
+noinst_HEADERS = locks.h common.h locks-mem-types.h clear.h pl-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
- -fno-strict-aliasing -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src $(GF_CFLAGS) -shared -nostartfiles
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
CLEANFILES =
+if WITH_SERVER
uninstall-local:
rm -f $(DESTDIR)$(xlatordir)/posix-locks.so
install-data-hook:
ln -sf locks.so $(DESTDIR)$(xlatordir)/posix-locks.so
+endif
diff --git a/xlators/features/locks/src/clear.c b/xlators/features/locks/src/clear.c
index 5533c6bdcb9..ab1eac68a53 100644
--- a/xlators/features/locks/src/clear.c
+++ b/xlators/features/locks/src/clear.c
@@ -1,434 +1,460 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
#include <pthread.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "inode.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
#include "locks.h"
#include "common.h"
-#include "statedump.h"
+#include <glusterfs/statedump.h>
#include "clear.h"
+const char *clrlk_type_names[CLRLK_TYPE_MAX] = {
+ [CLRLK_INODE] = "inode",
+ [CLRLK_ENTRY] = "entry",
+ [CLRLK_POSIX] = "posix",
+};
+
int
-clrlk_get_kind (char *kind)
+clrlk_get_kind(char *kind)
{
- char *clrlk_kinds[CLRLK_KIND_MAX] = {"dummy", "blocked", "granted",
- "all"};
- int ret_kind = CLRLK_KIND_MAX;
- int i = 0;
-
- for (i = CLRLK_BLOCKED; i < CLRLK_KIND_MAX; i++) {
- if (!strcmp (clrlk_kinds[i], kind)) {
- ret_kind = i;
- break;
- }
+ char *clrlk_kinds[CLRLK_KIND_MAX] = {"dummy", "blocked", "granted", "all"};
+ int ret_kind = CLRLK_KIND_MAX;
+ int i = 0;
+
+ for (i = CLRLK_BLOCKED; i < CLRLK_KIND_MAX; i++) {
+ if (!strcmp(clrlk_kinds[i], kind)) {
+ ret_kind = i;
+ break;
}
+ }
- return ret_kind;
+ return ret_kind;
}
int
-clrlk_get_type (char *type)
+clrlk_get_type(char *type)
{
- char *clrlk_types[CLRLK_TYPE_MAX] = {"inode", "entry", "posix"};
- int ret_type = CLRLK_TYPE_MAX;
- int i = 0;
-
- for (i = CLRLK_INODE; i < CLRLK_TYPE_MAX; i++) {
- if (!strcmp (clrlk_types[i], type)) {
- ret_type = i;
- break;
- }
+ char *clrlk_types[CLRLK_TYPE_MAX] = {"inode", "entry", "posix"};
+ int ret_type = CLRLK_TYPE_MAX;
+ int i = 0;
+
+ for (i = CLRLK_INODE; i < CLRLK_TYPE_MAX; i++) {
+ if (!strcmp(clrlk_types[i], type)) {
+ ret_type = i;
+ break;
}
+ }
- return ret_type;
+ return ret_type;
}
int
-clrlk_get_lock_range (char *range_str, struct gf_flock *ulock,
- gf_boolean_t *chk_range)
+clrlk_get_lock_range(char *range_str, struct gf_flock *ulock,
+ gf_boolean_t *chk_range)
{
- int ret = -1;
-
- if (!chk_range)
- goto out;
+ int ret = -1;
- if (!range_str) {
- ret = 0;
- *chk_range = _gf_false;
- goto out;
- }
-
- if (sscanf (range_str, "%hd,%"PRId64"-""%"PRId64, &ulock->l_whence,
- &ulock->l_start, &ulock->l_len) != 3) {
- goto out;
- }
+ if (!chk_range)
+ goto out;
+ if (!range_str) {
ret = 0;
- *chk_range = _gf_true;
+ *chk_range = _gf_false;
+ goto out;
+ }
+
+ if (sscanf(range_str,
+ "%hd,%" PRId64 "-"
+ "%" PRId64,
+ &ulock->l_whence, &ulock->l_start, &ulock->l_len) != 3) {
+ goto out;
+ }
+
+ ret = 0;
+ *chk_range = _gf_true;
out:
- return ret;
+ return ret;
}
int
-clrlk_parse_args (const char* cmd, clrlk_args *args)
+clrlk_parse_args(const char *cmd, clrlk_args *args)
{
- char *opts = NULL;
- char *cur = NULL;
- char *tok = NULL;
- char *sptr = NULL;
- char *free_ptr = NULL;
- char kw[KW_MAX] = {[KW_TYPE] = 't',
- [KW_KIND] = 'k',
- };
- int ret = -1;
- int i = 0;
-
- GF_ASSERT (cmd);
- free_ptr = opts = GF_CALLOC (1, strlen (cmd), gf_common_mt_char);
- if (!opts)
- goto out;
-
- if (sscanf (cmd, GF_XATTR_CLRLK_CMD".%s", opts) < 1) {
- ret = -1;
- goto out;
+ char *opts = NULL;
+ char *cur = NULL;
+ char *tok = NULL;
+ char *sptr = NULL;
+ char *free_ptr = NULL;
+ char kw[KW_MAX] = {
+ [KW_TYPE] = 't',
+ [KW_KIND] = 'k',
+ };
+ int ret = -1;
+ int i = 0;
+
+ GF_ASSERT(cmd);
+ free_ptr = opts = GF_CALLOC(1, strlen(cmd), gf_common_mt_char);
+ if (!opts)
+ goto out;
+
+ if (sscanf(cmd, GF_XATTR_CLRLK_CMD ".%s", opts) < 1) {
+ ret = -1;
+ goto out;
+ }
+
+ /*clr_lk_prefix.ttype.kkind.args, args - type specific*/
+ cur = opts;
+ for (i = 0; i < KW_MAX && (tok = strtok_r(cur, ".", &sptr));
+ cur = NULL, i++) {
+ if (tok[0] != kw[i]) {
+ ret = -1;
+ goto out;
}
-
- /*clr_lk_prefix.ttype.kkind.args, args - type specific*/
- cur = opts;
- for (i = 0; i < KW_MAX && (tok = strtok_r (cur, ".", &sptr));
- cur = NULL, i++) {
- if (tok[0] != kw[i]) {
- ret = -1;
- goto out;
- }
- if (i == KW_TYPE)
- args->type = clrlk_get_type (tok+1);
- if (i == KW_KIND)
- args->kind = clrlk_get_kind (tok+1);
- }
-
- if ((args->type == CLRLK_TYPE_MAX) || (args->kind == CLRLK_KIND_MAX))
- goto out;
-
- /*optional args, neither range nor basename can 'legally' contain
- * "/" in them*/
- tok = strtok_r (NULL, "/", &sptr);
- if (tok)
- args->opts = gf_strdup (tok);
-
- ret = 0;
+ if (i == KW_TYPE)
+ args->type = clrlk_get_type(tok + 1);
+ if (i == KW_KIND)
+ args->kind = clrlk_get_kind(tok + 1);
+ }
+
+ if ((args->type == CLRLK_TYPE_MAX) || (args->kind == CLRLK_KIND_MAX))
+ goto out;
+
+ /*optional args, neither range nor basename can 'legally' contain
+ * "/" in them*/
+ tok = strtok_r(NULL, "/", &sptr);
+ if (tok)
+ args->opts = gf_strdup(tok);
+
+ ret = 0;
out:
- if (free_ptr)
- GF_FREE (free_ptr);
- return ret;
+ GF_FREE(free_ptr);
+ return ret;
}
int
-clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
- int *blkd, int *granted, int *op_errno)
+clrlk_clear_posixlk(xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
+ int *blkd, int *granted, int *op_errno)
{
- posix_lock_t *plock = NULL;
- posix_lock_t *tmp = NULL;
- struct gf_flock ulock = {0, };
- int ret = -1;
- int bcount = 0;
- int gcount = 0;
- gf_boolean_t chk_range = _gf_false;
-
- if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) {
- *op_errno = EINVAL;
- goto out;
- }
-
- pthread_mutex_lock (&pl_inode->mutex);
+ posix_lock_t *plock = NULL;
+ posix_lock_t *tmp = NULL;
+ struct gf_flock ulock = {
+ 0,
+ };
+ int ret = -1;
+ int bcount = 0;
+ int gcount = 0;
+ gf_boolean_t chk_range = _gf_false;
+
+ if (clrlk_get_lock_range(args->opts, &ulock, &chk_range)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ list_for_each_entry_safe(plock, tmp, &pl_inode->ext_list, list)
{
- list_for_each_entry_safe (plock, tmp, &pl_inode->ext_list,
- list) {
- if ((plock->blocked &&
- !(args->kind & CLRLK_BLOCKED)) ||
- (!plock->blocked &&
- !(args->kind & CLRLK_GRANTED)))
- continue;
-
- if (chk_range &&
- (plock->user_flock.l_whence != ulock.l_whence
- || plock->user_flock.l_start != ulock.l_start
- || plock->user_flock.l_len != ulock.l_len))
- continue;
-
- list_del_init (&plock->list);
- if (plock->blocked) {
- bcount++;
- pl_trace_out (this, plock->frame, NULL, NULL,
- F_SETLKW, &plock->user_flock,
- -1, EAGAIN, NULL);
-
- STACK_UNWIND_STRICT (lk, plock->frame, -1, EAGAIN,
- &plock->user_flock, NULL);
-
- } else {
- gcount++;
- }
- GF_FREE (plock);
- }
+ if ((plock->blocked && !(args->kind & CLRLK_BLOCKED)) ||
+ (!plock->blocked && !(args->kind & CLRLK_GRANTED)))
+ continue;
+
+ if (chk_range && (plock->user_flock.l_whence != ulock.l_whence ||
+ plock->user_flock.l_start != ulock.l_start ||
+ plock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ list_del_init(&plock->list);
+ if (plock->blocked) {
+ bcount++;
+ pl_trace_out(this, plock->frame, NULL, NULL, F_SETLKW,
+ &plock->user_flock, -1, EINTR, NULL);
+
+ STACK_UNWIND_STRICT(lk, plock->frame, -1, EINTR,
+ &plock->user_flock, NULL);
+
+ } else {
+ gcount++;
+ }
+ __destroy_lock(plock);
}
- pthread_mutex_unlock (&pl_inode->mutex);
- grant_blocked_locks (this, pl_inode);
- ret = 0;
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ grant_blocked_locks(this, pl_inode);
+ ret = 0;
out:
- *blkd = bcount;
- *granted = gcount;
- return ret;
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
}
/* Returns 0 on success and -1 on failure */
int
-clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
- clrlk_args *args, int *blkd, int *granted, int *op_errno)
+clrlk_clear_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno)
{
- pl_inode_lock_t *ilock = NULL;
- pl_inode_lock_t *tmp = NULL;
- struct gf_flock ulock = {0, };
- int ret = -1;
- int bcount = 0;
- int gcount = 0;
- gf_boolean_t chk_range = _gf_false;
- struct list_head released;
-
- INIT_LIST_HEAD (&released);
- if (clrlk_get_lock_range (args->opts, &ulock, &chk_range)) {
- *op_errno = EINVAL;
- goto out;
- }
-
- if (args->kind & CLRLK_BLOCKED)
- goto blkd;
-
- if (args->kind & CLRLK_GRANTED)
- goto granted;
+ posix_locks_private_t *priv;
+ pl_inode_lock_t *ilock = NULL;
+ pl_inode_lock_t *tmp = NULL;
+ struct gf_flock ulock = {
+ 0,
+ };
+ int ret = -1;
+ int bcount = 0;
+ int gcount = 0;
+ gf_boolean_t chk_range = _gf_false;
+ struct list_head *pcontend = NULL;
+ struct list_head released;
+ struct list_head contend;
+ struct timespec now = {};
+
+ INIT_LIST_HEAD(&released);
+
+ priv = this->private;
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD(pcontend);
+ timespec_now(&now);
+ }
+
+ if (clrlk_get_lock_range(args->opts, &ulock, &chk_range)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ if (args->kind & CLRLK_BLOCKED)
+ goto blkd;
+
+ if (args->kind & CLRLK_GRANTED)
+ goto granted;
blkd:
- pthread_mutex_lock (&pl_inode->mutex);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ list_for_each_entry_safe(ilock, tmp, &dom->blocked_inodelks,
+ blocked_locks)
{
- list_for_each_entry_safe (ilock, tmp, &dom->blocked_inodelks,
- blocked_locks) {
- if (chk_range &&
- (ilock->user_flock.l_whence != ulock.l_whence
- || ilock->user_flock.l_start != ulock.l_start
- || ilock->user_flock.l_len != ulock.l_len))
- continue;
-
- bcount++;
- list_del_init (&ilock->blocked_locks);
- list_add (&ilock->blocked_locks, &released);
- }
- }
- pthread_mutex_unlock (&pl_inode->mutex);
-
- list_for_each_entry_safe (ilock, tmp, &released, blocked_locks) {
- list_del_init (&ilock->blocked_locks);
- pl_trace_out (this, ilock->frame, NULL, NULL, F_SETLKW,
- &ilock->user_flock, -1, EAGAIN,
- ilock->volume);
- STACK_UNWIND_STRICT (inodelk, ilock->frame, -1,
- EAGAIN, NULL);
- //No need to take lock as the locks are only in one list
- __pl_inodelk_unref (ilock);
+ if (chk_range && (ilock->user_flock.l_whence != ulock.l_whence ||
+ ilock->user_flock.l_start != ulock.l_start ||
+ ilock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ bcount++;
+ list_del_init(&ilock->client_list);
+ list_del_init(&ilock->blocked_locks);
+ list_add(&ilock->blocked_locks, &released);
}
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
- if (!(args->kind & CLRLK_GRANTED)) {
- ret = 0;
- goto out;
+ if (!list_empty(&released)) {
+ list_for_each_entry_safe(ilock, tmp, &released, blocked_locks)
+ {
+ list_del_init(&ilock->blocked_locks);
+ pl_trace_out(this, ilock->frame, NULL, NULL, F_SETLKW,
+ &ilock->user_flock, -1, EAGAIN, ilock->volume);
+ STACK_UNWIND_STRICT(inodelk, ilock->frame, -1, EAGAIN, NULL);
+ // No need to take lock as the locks are only in one list
+ __pl_inodelk_unref(ilock);
}
+ }
+
+ if (!(args->kind & CLRLK_GRANTED)) {
+ ret = 0;
+ goto out;
+ }
granted:
- pthread_mutex_lock (&pl_inode->mutex);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ list_for_each_entry_safe(ilock, tmp, &dom->inodelk_list, list)
{
- list_for_each_entry_safe (ilock, tmp, &dom->inodelk_list,
- list) {
- if (chk_range &&
- (ilock->user_flock.l_whence != ulock.l_whence
- || ilock->user_flock.l_start != ulock.l_start
- || ilock->user_flock.l_len != ulock.l_len))
- continue;
-
- gcount++;
- list_del_init (&ilock->list);
- list_add (&ilock->list, &released);
- }
+ if (chk_range && (ilock->user_flock.l_whence != ulock.l_whence ||
+ ilock->user_flock.l_start != ulock.l_start ||
+ ilock->user_flock.l_len != ulock.l_len))
+ continue;
+
+ gcount++;
+ list_del_init(&ilock->client_list);
+ list_del_init(&ilock->list);
+ list_add(&ilock->list, &released);
}
- pthread_mutex_unlock (&pl_inode->mutex);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
- list_for_each_entry_safe (ilock, tmp, &released, list) {
- list_del_init (&ilock->list);
- //No need to take lock as the locks are only in one list
- __pl_inodelk_unref (ilock);
- }
+ list_for_each_entry_safe(ilock, tmp, &released, list)
+ {
+ list_del_init(&ilock->list);
+ // No need to take lock as the locks are only in one list
+ __pl_inodelk_unref(ilock);
+ }
- ret = 0;
+ ret = 0;
out:
- grant_blocked_inode_locks (this, pl_inode, dom);
- *blkd = bcount;
- *granted = gcount;
- return ret;
+ grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend);
+ if (pcontend != NULL) {
+ inodelk_contention_notify(this, pcontend);
+ }
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
}
/* Returns 0 on success and -1 on failure */
int
-clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
- clrlk_args *args, int *blkd, int *granted, int *op_errno)
+clrlk_clear_entrylk(xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno)
{
- pl_entry_lock_t *elock = NULL;
- pl_entry_lock_t *tmp = NULL;
- int bcount = 0;
- int gcount = 0;
- int ret = -1;
- struct list_head removed;
- struct list_head released;
-
- INIT_LIST_HEAD (&released);
- if (args->kind & CLRLK_BLOCKED)
- goto blkd;
-
- if (args->kind & CLRLK_GRANTED)
- goto granted;
+ posix_locks_private_t *priv;
+ pl_entry_lock_t *elock = NULL;
+ pl_entry_lock_t *tmp = NULL;
+ int bcount = 0;
+ int gcount = 0;
+ int ret = -1;
+ struct list_head *pcontend = NULL;
+ struct list_head removed;
+ struct list_head released;
+ struct list_head contend;
+ struct timespec now;
+
+ INIT_LIST_HEAD(&released);
+
+ priv = this->private;
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD(pcontend);
+ timespec_now(&now);
+ }
+
+ if (args->kind & CLRLK_BLOCKED)
+ goto blkd;
+
+ if (args->kind & CLRLK_GRANTED)
+ goto granted;
blkd:
- pthread_mutex_lock (&pl_inode->mutex);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ list_for_each_entry_safe(elock, tmp, &dom->blocked_entrylks,
+ blocked_locks)
{
- list_for_each_entry_safe (elock, tmp, &dom->blocked_entrylks,
- blocked_locks) {
- if (args->opts) {
- if (!elock->basename ||
- strcmp (elock->basename, args->opts))
- continue;
- }
-
- bcount++;
-
- list_del_init (&elock->blocked_locks);
- list_add_tail (&elock->blocked_locks, &released);
- }
- }
- pthread_mutex_unlock (&pl_inode->mutex);
-
- list_for_each_entry_safe (elock, tmp, &released, blocked_locks) {
- list_del_init (&elock->blocked_locks);
- entrylk_trace_out (this, elock->frame, elock->volume, NULL, NULL,
- elock->basename, ENTRYLK_LOCK, elock->type,
- -1, EAGAIN);
- STACK_UNWIND_STRICT (entrylk, elock->frame, -1, EAGAIN, NULL);
- GF_FREE ((char *) elock->basename);
- GF_FREE (elock);
+ if (args->opts) {
+ if (!elock->basename || strcmp(elock->basename, args->opts))
+ continue;
+ }
+
+ bcount++;
+
+ list_del_init(&elock->client_list);
+ list_del_init(&elock->blocked_locks);
+ list_add_tail(&elock->blocked_locks, &released);
}
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (!list_empty(&released)) {
+ list_for_each_entry_safe(elock, tmp, &released, blocked_locks)
+ {
+ list_del_init(&elock->blocked_locks);
+ entrylk_trace_out(this, elock->frame, elock->volume, NULL, NULL,
+ elock->basename, ENTRYLK_LOCK, elock->type, -1,
+ EAGAIN);
+ STACK_UNWIND_STRICT(entrylk, elock->frame, -1, EAGAIN, NULL);
- if (!(args->kind & CLRLK_GRANTED)) {
- ret = 0;
- goto out;
+ __pl_entrylk_unref(elock);
}
+ }
+
+ if (!(args->kind & CLRLK_GRANTED)) {
+ ret = 0;
+ goto out;
+ }
granted:
- INIT_LIST_HEAD (&removed);
- pthread_mutex_lock (&pl_inode->mutex);
+ INIT_LIST_HEAD(&removed);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ list_for_each_entry_safe(elock, tmp, &dom->entrylk_list, domain_list)
{
- list_for_each_entry_safe (elock, tmp, &dom->entrylk_list,
- domain_list) {
- if (args->opts) {
- if (!elock->basename ||
- strcmp (elock->basename, args->opts))
- continue;
- }
-
- gcount++;
- list_del_init (&elock->domain_list);
- list_add_tail (&elock->domain_list, &removed);
- }
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ if (args->opts) {
+ if (!elock->basename || strcmp(elock->basename, args->opts))
+ continue;
+ }
- list_for_each_entry_safe (elock, tmp, &removed, domain_list) {
- grant_blocked_entry_locks (this, pl_inode, elock, dom);
+ gcount++;
+ list_del_init(&elock->client_list);
+ list_del_init(&elock->domain_list);
+ list_add_tail(&elock->domain_list, &removed);
+
+ __pl_entrylk_unref(elock);
}
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
- ret = 0;
+ grant_blocked_entry_locks(this, pl_inode, dom, &now, pcontend);
+ if (pcontend != NULL) {
+ entrylk_contention_notify(this, pcontend);
+ }
+
+ ret = 0;
out:
- *blkd = bcount;
- *granted = gcount;
- return ret;
+ *blkd = bcount;
+ *granted = gcount;
+ return ret;
}
int
-clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode,
- clrlk_args *args, int *blkd, int *granted,
- int *op_errno)
+clrlk_clear_lks_in_all_domains(xlator_t *this, pl_inode_t *pl_inode,
+ clrlk_args *args, int *blkd, int *granted,
+ int *op_errno)
{
- pl_dom_list_t *dom = NULL;
- int ret = -1;
- int tmp_bcount = 0;
- int tmp_gcount = 0;
-
- if (list_empty (&pl_inode->dom_list)) {
- ret = 0;
- goto out;
- }
+ pl_dom_list_t *dom = NULL;
+ int ret = -1;
+ int tmp_bcount = 0;
+ int tmp_gcount = 0;
- list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
- tmp_bcount = tmp_gcount = 0;
-
- switch (args->type)
- {
- case CLRLK_INODE:
- ret = clrlk_clear_inodelk (this, pl_inode, dom, args,
- &tmp_bcount, &tmp_gcount,
- op_errno);
- if (ret)
- goto out;
- break;
- case CLRLK_ENTRY:
- ret = clrlk_clear_entrylk (this, pl_inode, dom, args,
- &tmp_bcount, &tmp_gcount,
- op_errno);
- if (ret)
- goto out;
- break;
- }
-
- *blkd += tmp_bcount;
- *granted += tmp_gcount;
+ if (list_empty(&pl_inode->dom_list)) {
+ ret = 0;
+ goto out;
+ }
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ tmp_bcount = tmp_gcount = 0;
+
+ switch (args->type) {
+ case CLRLK_INODE:
+ ret = clrlk_clear_inodelk(this, pl_inode, dom, args,
+ &tmp_bcount, &tmp_gcount, op_errno);
+ if (ret)
+ goto out;
+ break;
+ case CLRLK_ENTRY:
+ ret = clrlk_clear_entrylk(this, pl_inode, dom, args,
+ &tmp_bcount, &tmp_gcount, op_errno);
+ if (ret)
+ goto out;
+ break;
}
- ret = 0;
+ *blkd += tmp_bcount;
+ *granted += tmp_gcount;
+ }
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
diff --git a/xlators/features/locks/src/clear.h b/xlators/features/locks/src/clear.h
index 95572a9710e..bc118cb1b81 100644
--- a/xlators/features/locks/src/clear.h
+++ b/xlators/features/locks/src/clear.h
@@ -1,86 +1,73 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __CLEAR_H__
#define __CLEAR_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "compat-errno.h"
-#include "stack.h"
-#include "call-stub.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/stack.h>
+#include <glusterfs/call-stub.h>
#include "locks.h"
typedef enum {
- CLRLK_INODE,
- CLRLK_ENTRY,
- CLRLK_POSIX,
- CLRLK_TYPE_MAX
+ CLRLK_INODE,
+ CLRLK_ENTRY,
+ CLRLK_POSIX,
+ CLRLK_TYPE_MAX
} clrlk_type;
+extern const char *clrlk_type_names[];
+
typedef enum {
- CLRLK_BLOCKED = 1,
- CLRLK_GRANTED,
- CLRLK_ALL,
- CLRLK_KIND_MAX
+ CLRLK_BLOCKED = 1,
+ CLRLK_GRANTED,
+ CLRLK_ALL,
+ CLRLK_KIND_MAX
} clrlk_kind;
typedef enum {
- KW_TYPE,
- KW_KIND,
- /*add new keywords here*/
- KW_MAX
+ KW_TYPE,
+ KW_KIND,
+ /*add new keywords here*/
+ KW_MAX
} clrlk_opts;
struct _clrlk_args;
typedef struct _clrlk_args clrlk_args;
struct _clrlk_args {
- int type;
- int kind;
- char *opts;
+ int type;
+ int kind;
+ char *opts;
};
int
-clrlk_get__kind (char *kind);
+clrlk_get__kind(char *kind);
int
-clrlk_get_type (char *type);
+clrlk_get_type(char *type);
int
-clrlk_get_lock_range (char *range_str, struct gf_flock *ulock,
- gf_boolean_t *chk_range);
+clrlk_get_lock_range(char *range_str, struct gf_flock *ulock,
+ gf_boolean_t *chk_range);
int
-clrlk_parse_args (const char* cmd, clrlk_args *args);
+clrlk_parse_args(const char *cmd, clrlk_args *args);
int
-clrlk_clear_posixlk (xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
- int *blkd, int *granted, int *op_errno);
+clrlk_clear_posixlk(xlator_t *this, pl_inode_t *pl_inode, clrlk_args *args,
+ int *blkd, int *granted, int *op_errno);
int
-clrlk_clear_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
- clrlk_args *args, int *blkd, int *granted, int *op_errno);
+clrlk_clear_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno);
int
-clrlk_clear_entrylk (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
- clrlk_args *args, int *blkd, int *granted, int *op_errno);
+clrlk_clear_entrylk(xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom,
+ clrlk_args *args, int *blkd, int *granted, int *op_errno);
int
-clrlk_clear_lks_in_all_domains (xlator_t *this, pl_inode_t *pl_inode,
- clrlk_args *args, int *blkd, int *granted,
- int *op_errno);
+clrlk_clear_lks_in_all_domains(xlator_t *this, pl_inode_t *pl_inode,
+ clrlk_args *args, int *blkd, int *granted,
+ int *op_errno);
#endif /* __CLEAR_H__ */
diff --git a/xlators/features/locks/src/common.c b/xlators/features/locks/src/common.c
index f59d0882c1a..a2c6be93e03 100644
--- a/xlators/features/locks/src/common.c
+++ b/xlators/features/locks/src/common.c
@@ -1,728 +1,786 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012, 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
#include <pthread.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "inode.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/syncop.h>
#include "locks.h"
#include "common.h"
-
static int
-__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock);
+__is_lock_grantable(pl_inode_t *pl_inode, posix_lock_t *lock);
static void
-__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock);
+__insert_and_merge(pl_inode_t *pl_inode, posix_lock_t *lock);
static int
-pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode,
- posix_lock_t *old_lock);
+pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *old_lock);
+
static pl_dom_list_t *
-__allocate_domain (const char *volume)
+__allocate_domain(const char *volume)
{
- pl_dom_list_t *dom = NULL;
+ pl_dom_list_t *dom = NULL;
- dom = GF_CALLOC (1, sizeof (*dom),
- gf_locks_mt_pl_dom_list_t);
- if (!dom)
- goto out;
+ dom = GF_CALLOC(1, sizeof(*dom), gf_locks_mt_pl_dom_list_t);
+ if (!dom)
+ goto out;
- dom->domain = gf_strdup(volume);
- if (!dom->domain)
- goto out;
+ dom->domain = gf_strdup(volume);
+ if (!dom->domain)
+ goto out;
- gf_log ("posix-locks", GF_LOG_TRACE,
- "New domain allocated: %s", dom->domain);
+ gf_log("posix-locks", GF_LOG_TRACE, "New domain allocated: %s",
+ dom->domain);
- INIT_LIST_HEAD (&dom->inode_list);
- INIT_LIST_HEAD (&dom->entrylk_list);
- INIT_LIST_HEAD (&dom->blocked_entrylks);
- INIT_LIST_HEAD (&dom->inodelk_list);
- INIT_LIST_HEAD (&dom->blocked_inodelks);
+ INIT_LIST_HEAD(&dom->inode_list);
+ INIT_LIST_HEAD(&dom->entrylk_list);
+ INIT_LIST_HEAD(&dom->blocked_entrylks);
+ INIT_LIST_HEAD(&dom->inodelk_list);
+ INIT_LIST_HEAD(&dom->blocked_inodelks);
out:
- if (dom && (NULL == dom->domain)) {
- GF_FREE (dom);
- dom = NULL;
- }
+ if (dom && (NULL == dom->domain)) {
+ GF_FREE(dom);
+ dom = NULL;
+ }
- return dom;
+ return dom;
}
/* Returns domain for the lock. If domain is not present,
* allocates a domain and returns it
*/
pl_dom_list_t *
-get_domain (pl_inode_t *pl_inode, const char *volume)
+get_domain(pl_inode_t *pl_inode, const char *volume)
{
- pl_dom_list_t *dom = NULL;
+ pl_dom_list_t *dom = NULL;
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, pl_inode, out);
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, volume, out);
+ GF_VALIDATE_OR_GOTO("posix-locks", pl_inode, out);
+ GF_VALIDATE_OR_GOTO("posix-locks", volume, out);
- pthread_mutex_lock (&pl_inode->mutex);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
{
- list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
- if (strcmp (dom->domain, volume) == 0)
- goto unlock;
- }
-
- dom = __allocate_domain (volume);
- if (dom)
- list_add (&dom->inode_list, &pl_inode->dom_list);
+ if (strcmp(dom->domain, volume) == 0)
+ goto unlock;
}
+
+ dom = __allocate_domain(volume);
+ if (dom)
+ list_add(&dom->inode_list, &pl_inode->dom_list);
+ }
unlock:
- pthread_mutex_unlock (&pl_inode->mutex);
- if (dom) {
- gf_log (POSIX_LOCKS, GF_LOG_TRACE, "Domain %s found", volume);
- } else {
- gf_log (POSIX_LOCKS, GF_LOG_TRACE, "Domain %s not found", volume);
- }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ if (dom) {
+ gf_log("posix-locks", GF_LOG_TRACE, "Domain %s found", volume);
+ } else {
+ gf_log("posix-locks", GF_LOG_TRACE, "Domain %s not found", volume);
+ }
out:
- return dom;
+ return dom;
}
unsigned long
-fd_to_fdnum (fd_t *fd)
+fd_to_fdnum(fd_t *fd)
{
- return ((unsigned long) fd);
+ return ((unsigned long)fd);
}
fd_t *
-fd_from_fdnum (posix_lock_t *lock)
+fd_from_fdnum(posix_lock_t *lock)
{
- return ((fd_t *) lock->fd_num);
+ return ((fd_t *)lock->fd_num);
}
int
-__pl_inode_is_empty (pl_inode_t *pl_inode)
+__pl_inode_is_empty(pl_inode_t *pl_inode)
{
- pl_dom_list_t *dom = NULL;
- int is_empty = 1;
-
- if (!list_empty (&pl_inode->ext_list))
- is_empty = 0;
-
- list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
- if (!list_empty (&dom->entrylk_list))
- is_empty = 0;
-
- if (!list_empty (&dom->inodelk_list))
- is_empty = 0;
- }
-
- return is_empty;
+ return (list_empty(&pl_inode->ext_list));
}
void
-pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame)
+pl_print_locker(char *str, int size, xlator_t *this, call_frame_t *frame)
{
- snprintf (str, size, "Pid=%llu, lk-owner=%s, Transport=%p, Frame=%llu",
- (unsigned long long) frame->root->pid,
- lkowner_utoa (&frame->root->lk_owner),
- (void *)frame->root->trans,
- (unsigned long long) frame->root->unique);
+ snprintf(str, size, "Pid=%llu, lk-owner=%s, Client=%p, Frame=%llu",
+ (unsigned long long)frame->root->pid,
+ lkowner_utoa(&frame->root->lk_owner), frame->root->client,
+ (unsigned long long)frame->root->unique);
}
-
void
-pl_print_lockee (char *str, int size, fd_t *fd, loc_t *loc)
+pl_print_lockee(char *str, int size, fd_t *fd, loc_t *loc)
{
- inode_t *inode = NULL;
- char *ipath = NULL;
- int ret = 0;
+ inode_t *inode = NULL;
+ char *ipath = NULL;
+ int ret = 0;
- if (fd)
- inode = fd->inode;
- if (loc)
- inode = loc->inode;
+ if (fd)
+ inode = fd->inode;
+ if (loc)
+ inode = loc->inode;
- if (!inode) {
- snprintf (str, size, "<nul>");
- return;
- }
+ if (!inode) {
+ snprintf(str, size, "<nul>");
+ return;
+ }
- if (loc && loc->path) {
- ipath = gf_strdup (loc->path);
- } else {
- ret = inode_path (inode, NULL, &ipath);
- if (ret <= 0)
- ipath = NULL;
- }
+ if (loc && loc->path) {
+ ipath = gf_strdup(loc->path);
+ } else {
+ ret = inode_path(inode, NULL, &ipath);
+ if (ret <= 0)
+ ipath = NULL;
+ }
- snprintf (str, size, "gfid=%s, fd=%p, path=%s",
- uuid_utoa (inode->gfid), fd,
- ipath ? ipath : "<nul>");
+ snprintf(str, size, "gfid=%s, fd=%p, path=%s", uuid_utoa(inode->gfid), fd,
+ ipath ? ipath : "<nul>");
- if (ipath)
- GF_FREE (ipath);
+ GF_FREE(ipath);
}
-
void
-pl_print_lock (char *str, int size, int cmd,
- struct gf_flock *flock, gf_lkowner_t *owner)
+pl_print_lock(char *str, int size, int cmd, struct gf_flock *flock,
+ gf_lkowner_t *owner)
{
- char *cmd_str = NULL;
- char *type_str = NULL;
+ char *cmd_str = NULL;
+ char *type_str = NULL;
- switch (cmd) {
+ switch (cmd) {
#if F_GETLK != F_GETLK64
case F_GETLK64:
#endif
case F_GETLK:
- cmd_str = "GETLK";
- break;
+ cmd_str = "GETLK";
+ break;
#if F_SETLK != F_SETLK64
case F_SETLK64:
#endif
case F_SETLK:
- cmd_str = "SETLK";
- break;
+ cmd_str = "SETLK";
+ break;
#if F_SETLKW != F_SETLKW64
case F_SETLKW64:
#endif
case F_SETLKW:
- cmd_str = "SETLKW";
- break;
+ cmd_str = "SETLKW";
+ break;
default:
- cmd_str = "UNKNOWN";
- break;
- }
+ cmd_str = "UNKNOWN";
+ break;
+ }
- switch (flock->l_type) {
+ switch (flock->l_type) {
case F_RDLCK:
- type_str = "READ";
- break;
+ type_str = "READ";
+ break;
case F_WRLCK:
- type_str = "WRITE";
- break;
+ type_str = "WRITE";
+ break;
case F_UNLCK:
- type_str = "UNLOCK";
- break;
+ type_str = "UNLOCK";
+ break;
default:
- type_str = "UNKNOWN";
- break;
- }
-
- snprintf (str, size, "lock=FCNTL, cmd=%s, type=%s, "
- "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
- cmd_str, type_str, (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid,
- lkowner_utoa (owner));
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf(str, size,
+ "lock=FCNTL, cmd=%s, type=%s, "
+ "start=%llu, len=%llu, pid=%llu, lk-owner=%s",
+ cmd_str, type_str, (unsigned long long)flock->l_start,
+ (unsigned long long)flock->l_len, (unsigned long long)flock->l_pid,
+ lkowner_utoa(owner));
}
-
void
-pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
- int cmd, struct gf_flock *flock, const char *domain)
+pl_trace_in(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, int cmd,
+ struct gf_flock *flock, const char *domain)
{
- posix_locks_private_t *priv = NULL;
- char pl_locker[256];
- char pl_lockee[256];
- char pl_lock[256];
-
- priv = this->private;
+ posix_locks_private_t *priv = this->private;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_lock[256];
- if (!priv->trace)
- return;
+ if (!priv->trace)
+ return;
- pl_print_locker (pl_locker, 256, this, frame);
- pl_print_lockee (pl_lockee, 256, fd, loc);
- if (domain)
- pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
- else
- pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
+ pl_print_locker(pl_locker, 256, this, frame);
+ pl_print_lockee(pl_lockee, 256, fd, loc);
+ if (domain)
+ pl_print_inodelk(pl_lock, 256, cmd, flock, domain);
+ else
+ pl_print_lock(pl_lock, 256, cmd, flock, &frame->root->lk_owner);
- gf_log (this->name, GF_LOG_INFO,
- "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}",
- pl_locker, pl_lockee, pl_lock);
+ gf_log(this->name, GF_LOG_INFO,
+ "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}", pl_locker,
+ pl_lockee, pl_lock);
}
-
void
-pl_print_verdict (char *str, int size, int op_ret, int op_errno)
+pl_print_verdict(char *str, int size, int op_ret, int op_errno)
{
- char *verdict = NULL;
-
- if (op_ret == 0) {
- verdict = "GRANTED";
- } else {
- switch (op_errno) {
- case EAGAIN:
- verdict = "TRYAGAIN";
- break;
- default:
- verdict = strerror (op_errno);
- }
+ char *verdict = NULL;
+
+ if (op_ret == 0) {
+ verdict = "GRANTED";
+ } else {
+ switch (op_errno) {
+ case EAGAIN:
+ verdict = "TRYAGAIN";
+ break;
+ default:
+ verdict = strerror(op_errno);
}
+ }
- snprintf (str, size, "%s", verdict);
+ snprintf(str, size, "%s", verdict);
}
-
void
-pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
- int cmd, struct gf_flock *flock, int op_ret, int op_errno, const char *domain)
+pl_trace_out(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, int cmd,
+ struct gf_flock *flock, int op_ret, int op_errno,
+ const char *domain)
{
- posix_locks_private_t *priv = NULL;
- char pl_locker[256];
- char pl_lockee[256];
- char pl_lock[256];
- char verdict[32];
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_lock[256];
+ char verdict[32];
- priv = this->private;
+ priv = this->private;
- if (!priv->trace)
- return;
+ if (!priv->trace)
+ return;
- pl_print_locker (pl_locker, 256, this, frame);
- pl_print_lockee (pl_lockee, 256, fd, loc);
- if (domain)
- pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
- else
- pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
+ pl_print_locker(pl_locker, 256, this, frame);
+ pl_print_lockee(pl_lockee, 256, fd, loc);
+ if (domain)
+ pl_print_inodelk(pl_lock, 256, cmd, flock, domain);
+ else
+ pl_print_lock(pl_lock, 256, cmd, flock, &frame->root->lk_owner);
- pl_print_verdict (verdict, 32, op_ret, op_errno);
+ pl_print_verdict(verdict, 32, op_ret, op_errno);
- gf_log (this->name, GF_LOG_INFO,
- "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}",
- verdict, pl_locker, pl_lockee, pl_lock);
+ gf_log(this->name, GF_LOG_INFO,
+ "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}", verdict, pl_locker,
+ pl_lockee, pl_lock);
}
-
void
-pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
- int cmd, struct gf_flock *flock, const char *domain)
+pl_trace_block(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
+ int cmd, struct gf_flock *flock, const char *domain)
{
- posix_locks_private_t *priv = NULL;
- char pl_locker[256];
- char pl_lockee[256];
- char pl_lock[256];
-
- priv = this->private;
+ posix_locks_private_t *priv = this->private;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_lock[256];
- if (!priv->trace)
- return;
+ if (!priv->trace)
+ return;
- pl_print_locker (pl_locker, 256, this, frame);
- pl_print_lockee (pl_lockee, 256, fd, loc);
- if (domain)
- pl_print_inodelk (pl_lock, 256, cmd, flock, domain);
- else
- pl_print_lock (pl_lock, 256, cmd, flock, &frame->root->lk_owner);
+ pl_print_locker(pl_locker, 256, this, frame);
+ pl_print_lockee(pl_lockee, 256, fd, loc);
+ if (domain)
+ pl_print_inodelk(pl_lock, 256, cmd, flock, domain);
+ else
+ pl_print_lock(pl_lock, 256, cmd, flock, &frame->root->lk_owner);
- gf_log (this->name, GF_LOG_INFO,
- "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}",
- pl_locker, pl_lockee, pl_lock);
+ gf_log(this->name, GF_LOG_INFO,
+ "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}", pl_locker,
+ pl_lockee, pl_lock);
}
-
void
-pl_trace_flush (xlator_t *this, call_frame_t *frame, fd_t *fd)
+pl_trace_flush(xlator_t *this, call_frame_t *frame, fd_t *fd)
{
- posix_locks_private_t *priv = NULL;
- char pl_locker[256];
- char pl_lockee[256];
- pl_inode_t *pl_inode = NULL;
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ pl_inode_t *pl_inode = NULL;
- priv = this->private;
+ priv = this->private;
- if (!priv->trace)
- return;
+ if (!priv->trace)
+ return;
- pl_inode = pl_inode_get (this, fd->inode);
+ pl_inode = pl_inode_get(this, fd->inode, NULL);
- if (pl_inode && __pl_inode_is_empty (pl_inode))
- return;
+ if (pl_inode && __pl_inode_is_empty(pl_inode))
+ return;
- pl_print_locker (pl_locker, 256, this, frame);
- pl_print_lockee (pl_lockee, 256, fd, NULL);
+ pl_print_locker(pl_locker, 256, this, frame);
+ pl_print_lockee(pl_lockee, 256, fd, NULL);
- gf_log (this->name, GF_LOG_INFO,
- "[FLUSH] Locker = {%s} Lockee = {%s}",
- pl_locker, pl_lockee);
+ gf_log(this->name, GF_LOG_INFO, "[FLUSH] Locker = {%s} Lockee = {%s}",
+ pl_locker, pl_lockee);
}
void
-pl_trace_release (xlator_t *this, fd_t *fd)
+pl_trace_release(xlator_t *this, fd_t *fd)
{
- posix_locks_private_t *priv = NULL;
- char pl_lockee[256];
+ posix_locks_private_t *priv = NULL;
+ char pl_lockee[256];
- priv = this->private;
+ priv = this->private;
- if (!priv->trace)
- return;
+ if (!priv->trace)
+ return;
- pl_print_lockee (pl_lockee, 256, fd, NULL);
+ pl_print_lockee(pl_lockee, 256, fd, NULL);
- gf_log (this->name, GF_LOG_INFO,
- "[RELEASE] Lockee = {%s}", pl_lockee);
+ gf_log(this->name, GF_LOG_INFO, "[RELEASE] Lockee = {%s}", pl_lockee);
}
-
void
-pl_update_refkeeper (xlator_t *this, inode_t *inode)
+pl_update_refkeeper(xlator_t *this, inode_t *inode)
{
- pl_inode_t *pl_inode = NULL;
- int is_empty = 0;
- int need_unref = 0;
- int need_ref = 0;
+ pl_inode_t *pl_inode = NULL;
+ int is_empty = 0;
+ int need_unref = 0;
+ int need_ref = 0;
- pl_inode = pl_inode_get (this, inode);
+ pl_inode = pl_inode_get(this, inode, NULL);
+ if (!pl_inode)
+ return;
- pthread_mutex_lock (&pl_inode->mutex);
- {
- is_empty = __pl_inode_is_empty (pl_inode);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ is_empty = __pl_inode_is_empty(pl_inode);
- if (is_empty && pl_inode->refkeeper) {
- need_unref = 1;
- pl_inode->refkeeper = NULL;
- }
+ if (is_empty && pl_inode->refkeeper) {
+ need_unref = 1;
+ pl_inode->refkeeper = NULL;
+ }
- if (!is_empty && !pl_inode->refkeeper) {
- need_ref = 1;
- pl_inode->refkeeper = inode;
- }
+ if (!is_empty && !pl_inode->refkeeper) {
+ need_ref = 1;
+ pl_inode->refkeeper = inode;
}
- pthread_mutex_unlock (&pl_inode->mutex);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
- if (need_unref)
- inode_unref (inode);
+ if (need_unref)
+ inode_unref(inode);
- if (need_ref)
- inode_ref (inode);
+ if (need_ref)
+ inode_ref(inode);
}
-
-pl_inode_t *
-pl_inode_get (xlator_t *this, inode_t *inode)
+/* Get lock enforcement info from disk */
+int
+pl_fetch_mlock_info_from_disk(xlator_t *this, pl_inode_t *pl_inode,
+ pl_local_t *local)
{
- uint64_t tmp_pl_inode = 0;
- pl_inode_t *pl_inode = NULL;
- int ret = 0;
-
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &tmp_pl_inode);
- if (ret == 0) {
- pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
- goto unlock;
- }
- pl_inode = GF_CALLOC (1, sizeof (*pl_inode),
- gf_locks_mt_pl_inode_t);
- if (!pl_inode) {
- goto unlock;
- }
+ dict_t *xdata_rsp = NULL;
+ int ret = 0;
+ int op_ret = 0;
+
+ if (!local) {
+ return -1;
+ }
+
+ if (local->fd) {
+ op_ret = syncop_fgetxattr(this, local->fd, &xdata_rsp,
+ GF_ENFORCE_MANDATORY_LOCK, NULL, NULL);
+ } else {
+ op_ret = syncop_getxattr(this, &local->loc[0], &xdata_rsp,
+ GF_ENFORCE_MANDATORY_LOCK, NULL, NULL);
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (op_ret >= 0) {
+ pl_inode->mlock_enforced = _gf_true;
+ pl_inode->check_mlock_info = _gf_false;
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, -op_ret, 0,
+ "getxattr failed with %d", op_ret);
+ pl_inode->mlock_enforced = _gf_false;
+
+ if (-op_ret == ENODATA) {
+ pl_inode->check_mlock_info = _gf_false;
+ } else {
+ pl_inode->check_mlock_info = _gf_true;
+ }
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
- gf_log (this->name, GF_LOG_TRACE,
- "Allocating new pl inode");
+ return ret;
+}
- pthread_mutex_init (&pl_inode->mutex, NULL);
+pl_inode_t *
+pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local)
+{
+ uint64_t tmp_pl_inode = 0;
+ pl_inode_t *pl_inode = NULL;
+ int ret = 0;
+
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get(inode, this, &tmp_pl_inode);
+ if (ret == 0) {
+ pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+ goto unlock;
+ }
- INIT_LIST_HEAD (&pl_inode->dom_list);
- INIT_LIST_HEAD (&pl_inode->ext_list);
- INIT_LIST_HEAD (&pl_inode->rw_list);
- INIT_LIST_HEAD (&pl_inode->reservelk_list);
- INIT_LIST_HEAD (&pl_inode->blocked_reservelks);
- INIT_LIST_HEAD (&pl_inode->blocked_calls);
+ pl_inode = GF_CALLOC(1, sizeof(*pl_inode), gf_locks_mt_pl_inode_t);
+ if (!pl_inode) {
+ goto unlock;
+ }
- __inode_ctx_put (inode, this, (uint64_t)(long)(pl_inode));
+ gf_log(this->name, GF_LOG_TRACE, "Allocating new pl inode");
+
+ pthread_mutex_init(&pl_inode->mutex, NULL);
+ pthread_cond_init(&pl_inode->check_fop_wind_count, 0);
+
+ INIT_LIST_HEAD(&pl_inode->dom_list);
+ INIT_LIST_HEAD(&pl_inode->ext_list);
+ INIT_LIST_HEAD(&pl_inode->rw_list);
+ INIT_LIST_HEAD(&pl_inode->reservelk_list);
+ INIT_LIST_HEAD(&pl_inode->blocked_reservelks);
+ INIT_LIST_HEAD(&pl_inode->blocked_calls);
+ INIT_LIST_HEAD(&pl_inode->metalk_list);
+ INIT_LIST_HEAD(&pl_inode->queued_locks);
+ INIT_LIST_HEAD(&pl_inode->waiting);
+ gf_uuid_copy(pl_inode->gfid, inode->gfid);
+
+ pl_inode->check_mlock_info = _gf_true;
+ pl_inode->mlock_enforced = _gf_false;
+
+ /* -2 means never looked up. -1 means something went wrong and link
+ * tracking is disabled. */
+ pl_inode->links = -2;
+
+ ret = __inode_ctx_put(inode, this, (uint64_t)(long)(pl_inode));
+ if (ret) {
+ pthread_mutex_destroy(&pl_inode->mutex);
+ GF_FREE(pl_inode);
+ pl_inode = NULL;
+ goto unlock;
}
+ }
unlock:
- UNLOCK (&inode->lock);
+ UNLOCK(&inode->lock);
- return pl_inode;
-}
+ if ((pl_inode != NULL) && pl_is_mandatory_locking_enabled(pl_inode) &&
+ pl_inode->check_mlock_info && local) {
+ /* Note: The lock enforcement information per file can be stored in the
+ attribute flag of stat(x) in posix. With that there won't be a need
+ for doing getxattr post a reboot
+ */
+ pl_fetch_mlock_info_from_disk(this, pl_inode, local);
+ }
+ return pl_inode;
+}
/* Create a new posix_lock_t */
posix_lock_t *
-new_posix_lock (struct gf_flock *flock, void *transport, pid_t client_pid,
- gf_lkowner_t *owner, fd_t *fd)
+new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
+ gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking,
+ int32_t *op_errno)
{
- posix_lock_t *lock = NULL;
+ posix_lock_t *lock = NULL;
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, flock, out);
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, transport, out);
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, fd, out);
+ GF_VALIDATE_OR_GOTO("posix-locks", flock, out);
+ GF_VALIDATE_OR_GOTO("posix-locks", client, out);
+ GF_VALIDATE_OR_GOTO("posix-locks", fd, out);
- lock = GF_CALLOC (1, sizeof (posix_lock_t),
- gf_locks_mt_posix_lock_t);
- if (!lock) {
- goto out;
- }
+ if (!pl_is_lk_owner_valid(owner, client)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
- lock->fl_start = flock->l_start;
- lock->fl_type = flock->l_type;
+ lock = GF_CALLOC(1, sizeof(posix_lock_t), gf_locks_mt_posix_lock_t);
+ if (!lock) {
+ *op_errno = ENOMEM;
+ goto out;
+ }
- if (flock->l_len == 0)
- lock->fl_end = LLONG_MAX;
- else
- lock->fl_end = flock->l_start + flock->l_len - 1;
+ lock->fl_start = flock->l_start;
+ lock->fl_type = flock->l_type;
- lock->transport = transport;
- lock->fd_num = fd_to_fdnum (fd);
- lock->fd = fd;
- lock->client_pid = client_pid;
- lock->owner = *owner;
+ if (flock->l_len == 0)
+ lock->fl_end = LLONG_MAX;
+ else
+ lock->fl_end = flock->l_start + flock->l_len - 1;
- INIT_LIST_HEAD (&lock->list);
+ lock->client = client;
+
+ lock->client_uid = gf_strdup(client->client_uid);
+ if (lock->client_uid == NULL) {
+ GF_FREE(lock);
+ lock = NULL;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ lock->fd_num = fd_to_fdnum(fd);
+ lock->fd = fd;
+ lock->client_pid = client_pid;
+ lock->owner = *owner;
+ lock->lk_flags = lk_flags;
+
+ lock->blocking = blocking;
+ memcpy(&lock->user_flock, flock, sizeof(lock->user_flock));
+
+ INIT_LIST_HEAD(&lock->list);
out:
- return lock;
+ return lock;
}
-
/* Delete a lock from the inode's lock list */
void
-__delete_lock (pl_inode_t *pl_inode, posix_lock_t *lock)
+__delete_lock(posix_lock_t *lock)
{
- list_del_init (&lock->list);
+ list_del_init(&lock->list);
}
-
/* Destroy a posix_lock */
void
-__destroy_lock (posix_lock_t *lock)
+__destroy_lock(posix_lock_t *lock)
{
- GF_FREE (lock);
+ GF_FREE(lock->client_uid);
+ GF_FREE(lock);
}
+static posix_lock_t *
+__copy_lock(posix_lock_t *src)
+{
+ posix_lock_t *dst;
+
+ dst = GF_MALLOC(sizeof(posix_lock_t), gf_locks_mt_posix_lock_t);
+ if (dst != NULL) {
+ memcpy(dst, src, sizeof(posix_lock_t));
+ dst->client_uid = gf_strdup(src->client_uid);
+ if (dst->client_uid == NULL) {
+ GF_FREE(dst);
+ dst = NULL;
+ }
+
+ if (dst != NULL)
+ INIT_LIST_HEAD(&dst->list);
+ }
+
+ return dst;
+}
/* Convert a posix_lock to a struct gf_flock */
void
-posix_lock_to_flock (posix_lock_t *lock, struct gf_flock *flock)
+posix_lock_to_flock(posix_lock_t *lock, struct gf_flock *flock)
{
- flock->l_pid = lock->client_pid;
- flock->l_type = lock->fl_type;
- flock->l_start = lock->fl_start;
- flock->l_owner = lock->owner;
-
- if (lock->fl_end == LLONG_MAX)
- flock->l_len = 0;
- else
- flock->l_len = lock->fl_end - lock->fl_start + 1;
+ flock->l_pid = lock->user_flock.l_pid;
+ flock->l_type = lock->fl_type;
+ flock->l_start = lock->fl_start;
+ flock->l_owner = lock->owner;
+
+ if (lock->fl_end == LLONG_MAX)
+ flock->l_len = 0;
+ else
+ flock->l_len = lock->fl_end - lock->fl_start + 1;
}
/* Insert the lock into the inode's lock list */
static void
-__insert_lock (pl_inode_t *pl_inode, posix_lock_t *lock)
+__insert_lock(pl_inode_t *pl_inode, posix_lock_t *lock)
{
- if (lock->blocked)
- gettimeofday (&lock->blkd_time, NULL);
- else
- gettimeofday (&lock->granted_time, NULL);
+ if (lock->blocked)
+ lock->blkd_time = gf_time();
+ else
+ lock->granted_time = gf_time();
- list_add_tail (&lock->list, &pl_inode->ext_list);
-
- return;
+ list_add_tail(&lock->list, &pl_inode->ext_list);
}
-
/* Return true if the locks overlap, false otherwise */
int
-locks_overlap (posix_lock_t *l1, posix_lock_t *l2)
+locks_overlap(posix_lock_t *l1, posix_lock_t *l2)
{
- /*
- Note:
- FUSE always gives us absolute offsets, so no need to worry
- about SEEK_CUR or SEEK_END
- */
+ /*
+ Note:
+ FUSE always gives us absolute offsets, so no need to worry
+ about SEEK_CUR or SEEK_END
+ */
- return ((l1->fl_end >= l2->fl_start) &&
- (l2->fl_end >= l1->fl_start));
+ return ((l1->fl_end >= l2->fl_start) && (l2->fl_end >= l1->fl_start));
}
-
/* Return true if the locks have the same owner */
int
-same_owner (posix_lock_t *l1, posix_lock_t *l2)
+same_owner(posix_lock_t *l1, posix_lock_t *l2)
{
-
- return (is_same_lkowner (&l1->owner, &l2->owner) &&
- (l1->transport == l2->transport));
-
+ return (is_same_lkowner(&l1->owner, &l2->owner) &&
+ (l1->client == l2->client));
}
-
/* Delete all F_UNLCK locks */
void
-__delete_unlck_locks (pl_inode_t *pl_inode)
+__delete_unlck_locks(pl_inode_t *pl_inode)
{
- posix_lock_t *l = NULL;
- posix_lock_t *tmp = NULL;
-
- list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
- if (l->fl_type == F_UNLCK) {
- __delete_lock (pl_inode, l);
- __destroy_lock (l);
- }
+ posix_lock_t *l = NULL;
+ posix_lock_t *tmp = NULL;
+
+ list_for_each_entry_safe(l, tmp, &pl_inode->ext_list, list)
+ {
+ if (l->fl_type == F_UNLCK) {
+ __delete_lock(l);
+ __destroy_lock(l);
}
+ }
}
-
/* Add two locks */
static posix_lock_t *
-add_locks (posix_lock_t *l1, posix_lock_t *l2)
+add_locks(posix_lock_t *l1, posix_lock_t *l2, posix_lock_t *dst)
{
- posix_lock_t *sum = NULL;
+ posix_lock_t *sum = NULL;
- sum = GF_CALLOC (1, sizeof (posix_lock_t),
- gf_locks_mt_posix_lock_t);
- if (!sum)
- return NULL;
+ sum = __copy_lock(dst);
+ if (!sum)
+ return NULL;
- sum->fl_start = min (l1->fl_start, l2->fl_start);
- sum->fl_end = max (l1->fl_end, l2->fl_end);
+ sum->fl_start = min(l1->fl_start, l2->fl_start);
+ sum->fl_end = max(l1->fl_end, l2->fl_end);
- return sum;
+ posix_lock_to_flock(sum, &sum->user_flock);
+
+ return sum;
}
/* Subtract two locks */
struct _values {
- posix_lock_t *locks[3];
+ posix_lock_t *locks[3];
};
/* {big} must always be contained inside {small} */
static struct _values
-subtract_locks (posix_lock_t *big, posix_lock_t *small)
+subtract_locks(posix_lock_t *big, posix_lock_t *small)
{
+ struct _values v = {.locks = {0, 0, 0}};
- struct _values v = { .locks = {0, 0, 0} };
-
- if ((big->fl_start == small->fl_start) &&
- (big->fl_end == small->fl_end)) {
- /* both edges coincide with big */
- v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
- gf_locks_mt_posix_lock_t);
- if (!v.locks[0])
- goto out;
- memcpy (v.locks[0], big, sizeof (posix_lock_t));
- v.locks[0]->fl_type = small->fl_type;
- goto done;
+ if ((big->fl_start == small->fl_start) && (big->fl_end == small->fl_end)) {
+ /* both edges coincide with big */
+ v.locks[0] = __copy_lock(big);
+ if (!v.locks[0]) {
+ goto out;
}
- if ((small->fl_start > big->fl_start) &&
- (small->fl_end < big->fl_end)) {
- /* both edges lie inside big */
- v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
- gf_locks_mt_posix_lock_t);
- if (!v.locks[0])
- goto out;
-
- v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t),
- gf_locks_mt_posix_lock_t);
- if (!v.locks[1])
- goto out;
-
- v.locks[2] = GF_CALLOC (1, sizeof (posix_lock_t),
- gf_locks_mt_posix_lock_t);
- if (!v.locks[1])
- goto out;
-
- memcpy (v.locks[0], big, sizeof (posix_lock_t));
- v.locks[0]->fl_end = small->fl_start - 1;
+ v.locks[0]->fl_type = small->fl_type;
+ v.locks[0]->user_flock.l_type = small->fl_type;
+ goto done;
+ }
+
+ if ((small->fl_start > big->fl_start) && (small->fl_end < big->fl_end)) {
+ /* both edges lie inside big */
+ v.locks[0] = __copy_lock(big);
+ v.locks[1] = __copy_lock(small);
+ v.locks[2] = __copy_lock(big);
+ if ((v.locks[0] == NULL) || (v.locks[1] == NULL) ||
+ (v.locks[2] == NULL)) {
+ goto out;
+ }
- memcpy (v.locks[1], small, sizeof (posix_lock_t));
+ v.locks[0]->fl_end = small->fl_start - 1;
+ v.locks[2]->fl_start = small->fl_end + 1;
+ posix_lock_to_flock(v.locks[0], &v.locks[0]->user_flock);
+ posix_lock_to_flock(v.locks[2], &v.locks[2]->user_flock);
+ goto done;
+ }
+
+ /* one edge coincides with big */
+ if (small->fl_start == big->fl_start) {
+ v.locks[0] = __copy_lock(big);
+ v.locks[1] = __copy_lock(small);
+ if ((v.locks[0] == NULL) || (v.locks[1] == NULL)) {
+ goto out;
+ }
- memcpy (v.locks[2], big, sizeof (posix_lock_t));
- v.locks[2]->fl_start = small->fl_end + 1;
- goto done;
+ v.locks[0]->fl_start = small->fl_end + 1;
+ posix_lock_to_flock(v.locks[0], &v.locks[0]->user_flock);
+ goto done;
+ }
+ if (small->fl_end == big->fl_end) {
+ v.locks[0] = __copy_lock(big);
+ v.locks[1] = __copy_lock(small);
+ if ((v.locks[0] == NULL) || (v.locks[1] == NULL)) {
+ goto out;
}
- /* one edge coincides with big */
- if (small->fl_start == big->fl_start) {
- v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
- gf_locks_mt_posix_lock_t);
- if (!v.locks[0])
- goto out;
+ v.locks[0]->fl_end = small->fl_start - 1;
+ posix_lock_to_flock(v.locks[0], &v.locks[0]->user_flock);
+ goto done;
+ }
- v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t),
- gf_locks_mt_posix_lock_t);
- if (!v.locks[1])
- goto out;
+ GF_ASSERT(0);
+ gf_log("posix-locks", GF_LOG_ERROR, "Unexpected case in subtract_locks");
- memcpy (v.locks[0], big, sizeof (posix_lock_t));
- v.locks[0]->fl_start = small->fl_end + 1;
-
- memcpy (v.locks[1], small, sizeof (posix_lock_t));
- goto done;
- }
-
- if (small->fl_end == big->fl_end) {
- v.locks[0] = GF_CALLOC (1, sizeof (posix_lock_t),
- gf_locks_mt_posix_lock_t);
- if (!v.locks[0])
- goto out;
+out:
+ if (v.locks[0]) {
+ __destroy_lock(v.locks[0]);
+ v.locks[0] = NULL;
+ }
+ if (v.locks[1]) {
+ __destroy_lock(v.locks[1]);
+ v.locks[1] = NULL;
+ }
+ if (v.locks[2]) {
+ __destroy_lock(v.locks[2]);
+ v.locks[2] = NULL;
+ }
- v.locks[1] = GF_CALLOC (1, sizeof (posix_lock_t),
- gf_locks_mt_posix_lock_t);
- if (!v.locks[1])
- goto out;
+done:
+ return v;
+}
- memcpy (v.locks[0], big, sizeof (posix_lock_t));
- v.locks[0]->fl_end = small->fl_start - 1;
+static posix_lock_t *
+first_conflicting_overlap(pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ posix_lock_t *l = NULL;
+ posix_lock_t *conf = NULL;
- memcpy (v.locks[1], small, sizeof (posix_lock_t));
- goto done;
- }
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ list_for_each_entry(l, &pl_inode->ext_list, list)
+ {
+ if (l->blocked)
+ continue;
- GF_ASSERT (0);
- gf_log (POSIX_LOCKS, GF_LOG_ERROR, "Unexpected case in subtract_locks");
+ if (locks_overlap(l, lock)) {
+ if (same_owner(l, lock))
+ continue;
-out:
- if (v.locks[0]) {
- GF_FREE (v.locks[0]);
- v.locks[0] = NULL;
- }
- if (v.locks[1]) {
- GF_FREE (v.locks[1]);
- v.locks[1] = NULL;
- }
- if (v.locks[2]) {
- GF_FREE (v.locks[2]);
- v.locks[2] = NULL;
+ if ((l->fl_type == F_WRLCK) || (lock->fl_type == F_WRLCK)) {
+ conf = l;
+ goto unlock;
+ }
+ }
}
+ }
+unlock:
+ pthread_mutex_unlock(&pl_inode->mutex);
-done:
- return v;
+ return conf;
}
/*
@@ -731,350 +789,803 @@ done:
If {begin} is NULL, then start from the beginning of the list
*/
static posix_lock_t *
-first_overlap (pl_inode_t *pl_inode, posix_lock_t *lock)
+first_overlap(pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *l = NULL;
+ posix_lock_t *l = NULL;
- list_for_each_entry (l, &pl_inode->ext_list, list) {
- if (l->blocked)
- continue;
+ list_for_each_entry(l, &pl_inode->ext_list, list)
+ {
+ if (l->blocked)
+ continue;
- if (locks_overlap (l, lock))
- return l;
- }
+ if (locks_overlap(l, lock))
+ return l;
+ }
- return NULL;
+ return NULL;
}
-
-
/* Return true if lock is grantable */
static int
-__is_lock_grantable (pl_inode_t *pl_inode, posix_lock_t *lock)
+__is_lock_grantable(pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *l = NULL;
- int ret = 1;
-
- list_for_each_entry (l, &pl_inode->ext_list, list) {
- if (!l->blocked && locks_overlap (lock, l)) {
- if (((l->fl_type == F_WRLCK)
- || (lock->fl_type == F_WRLCK))
- && (lock->fl_type != F_UNLCK)
- && !same_owner (l, lock)) {
- ret = 0;
- break;
- }
- }
+ posix_lock_t *l = NULL;
+ int ret = 1;
+
+ list_for_each_entry(l, &pl_inode->ext_list, list)
+ {
+ if (!l->blocked && locks_overlap(lock, l)) {
+ if (((l->fl_type == F_WRLCK) || (lock->fl_type == F_WRLCK)) &&
+ (lock->fl_type != F_UNLCK) && !same_owner(l, lock)) {
+ ret = 0;
+ break;
+ }
}
- return ret;
+ }
+ return ret;
}
-
-extern void do_blocked_rw (pl_inode_t *);
-
+extern void
+do_blocked_rw(pl_inode_t *);
static void
-__insert_and_merge (pl_inode_t *pl_inode, posix_lock_t *lock)
+__insert_and_merge(pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *conf = NULL;
- posix_lock_t *t = NULL;
- posix_lock_t *sum = NULL;
- int i = 0;
- struct _values v = { .locks = {0, 0, 0} };
-
- list_for_each_entry_safe (conf, t, &pl_inode->ext_list, list) {
- if (conf->blocked)
- continue;
- if (!locks_overlap (conf, lock))
- continue;
+ posix_lock_t *conf = NULL;
+ posix_lock_t *t = NULL;
+ posix_lock_t *sum = NULL;
+ int i = 0;
+ struct _values v = {.locks = {0, 0, 0}};
+
+ list_for_each_entry_safe(conf, t, &pl_inode->ext_list, list)
+ {
+ if (conf->blocked)
+ continue;
+ if (!locks_overlap(conf, lock))
+ continue;
+
+ if (same_owner(conf, lock)) {
+ if (conf->fl_type == lock->fl_type &&
+ conf->lk_flags == lock->lk_flags) {
+ sum = add_locks(lock, conf, lock);
+
+ __delete_lock(conf);
+ __destroy_lock(conf);
+
+ __destroy_lock(lock);
+ INIT_LIST_HEAD(&sum->list);
+ posix_lock_to_flock(sum, &sum->user_flock);
+ __insert_and_merge(pl_inode, sum);
- if (same_owner (conf, lock)) {
- if (conf->fl_type == lock->fl_type) {
- sum = add_locks (lock, conf);
+ return;
+ } else {
+ sum = add_locks(lock, conf, conf);
+
+ v = subtract_locks(sum, lock);
+
+ __delete_lock(conf);
+ __destroy_lock(conf);
+
+ __delete_lock(lock);
+ __destroy_lock(lock);
- sum->fl_type = lock->fl_type;
- sum->transport = lock->transport;
- sum->fd_num = lock->fd_num;
- sum->client_pid = lock->client_pid;
- sum->owner = lock->owner;
+ __destroy_lock(sum);
- __delete_lock (pl_inode, conf);
- __destroy_lock (conf);
+ for (i = 0; i < 3; i++) {
+ if (!v.locks[i])
+ continue;
- __destroy_lock (lock);
- INIT_LIST_HEAD (&sum->list);
- posix_lock_to_flock (sum, &sum->user_flock);
- __insert_and_merge (pl_inode, sum);
+ __insert_and_merge(pl_inode, v.locks[i]);
+ }
- return;
- } else {
- sum = add_locks (lock, conf);
+ __delete_unlck_locks(pl_inode);
+ return;
+ }
+ }
- sum->fl_type = conf->fl_type;
- sum->transport = conf->transport;
- sum->fd_num = conf->fd_num;
- sum->client_pid = conf->client_pid;
- sum->owner = conf->owner;
+ if (lock->fl_type == F_UNLCK) {
+ continue;
+ }
- v = subtract_locks (sum, lock);
+ if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) {
+ __insert_lock(pl_inode, lock);
+ return;
+ }
+ }
+
+ /* no conflicts, so just insert */
+ if (lock->fl_type != F_UNLCK) {
+ __insert_lock(pl_inode, lock);
+ } else {
+ __destroy_lock(lock);
+ }
+}
- __delete_lock (pl_inode, conf);
- __destroy_lock (conf);
+void
+__grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
+{
+ struct list_head tmp_list;
+ posix_lock_t *l = NULL;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *conf = NULL;
+
+ INIT_LIST_HEAD(&tmp_list);
+
+ list_for_each_entry_safe(l, tmp, &pl_inode->ext_list, list)
+ {
+ if (l->blocked) {
+ conf = first_overlap(pl_inode, l);
+ if (conf)
+ continue;
+
+ l->blocked = 0;
+ list_move_tail(&l->list, &tmp_list);
+ }
+ }
- __delete_lock (pl_inode, lock);
- __destroy_lock (lock);
+ list_for_each_entry_safe(l, tmp, &tmp_list, list)
+ {
+ list_del_init(&l->list);
- __destroy_lock (sum);
+ if (__is_lock_grantable(pl_inode, l)) {
+ conf = GF_CALLOC(1, sizeof(*conf), gf_locks_mt_posix_lock_t);
- for (i = 0; i < 3; i++) {
- if (!v.locks[i])
- continue;
+ if (!conf) {
+ l->blocked = 1;
+ __insert_lock(pl_inode, l);
+ continue;
+ }
- INIT_LIST_HEAD (&v.locks[i]->list);
- posix_lock_to_flock (v.locks[i],
- &v.locks[i]->user_flock);
- __insert_and_merge (pl_inode,
- v.locks[i]);
- }
+ conf->frame = l->frame;
+ l->frame = NULL;
- __delete_unlck_locks (pl_inode);
- return;
- }
- }
+ posix_lock_to_flock(l, &conf->user_flock);
- if (lock->fl_type == F_UNLCK) {
- continue;
- }
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %" PRId64 " - %" PRId64
+ " => Granted",
+ l->fl_type == F_UNLCK ? "Unlock" : "Lock", l->client_pid,
+ lkowner_utoa(&l->owner), l->user_flock.l_start,
+ l->user_flock.l_len);
- if ((conf->fl_type == F_RDLCK) && (lock->fl_type == F_RDLCK)) {
- __insert_lock (pl_inode, lock);
- return;
- }
- }
+ __insert_and_merge(pl_inode, l);
- /* no conflicts, so just insert */
- if (lock->fl_type != F_UNLCK) {
- __insert_lock (pl_inode, lock);
+ list_add(&conf->list, granted);
} else {
- __destroy_lock (lock);
+ l->blocked = 1;
+ __insert_lock(pl_inode, l);
}
+ }
}
-
void
-__grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode, struct list_head *granted)
+grant_blocked_locks(xlator_t *this, pl_inode_t *pl_inode)
{
- struct list_head tmp_list;
- posix_lock_t *l = NULL;
- posix_lock_t *tmp = NULL;
- posix_lock_t *conf = NULL;
+ struct list_head granted_list;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *lock = NULL;
+ pl_local_t *local = NULL;
+ INIT_LIST_HEAD(&granted_list);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __grant_blocked_locks(this, pl_inode, &granted_list);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ list_for_each_entry_safe(lock, tmp, &granted_list, list)
+ {
+ list_del_init(&lock->list);
+
+ pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
+ 0, 0, NULL);
+ local = lock->frame->local;
+ PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
+ __destroy_lock(lock);
+ }
+
+ return;
+}
- INIT_LIST_HEAD (&tmp_list);
+static int
+pl_send_prelock_unlock(xlator_t *this, pl_inode_t *pl_inode,
+ posix_lock_t *old_lock)
+{
+ struct gf_flock flock = {
+ 0,
+ };
+ posix_lock_t *unlock_lock = NULL;
+ int32_t op_errno = 0;
- list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
- if (l->blocked) {
- conf = first_overlap (pl_inode, l);
- if (conf)
- continue;
+ struct list_head granted_list;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *lock = NULL;
+ pl_local_t *local = NULL;
- l->blocked = 0;
- list_move_tail (&l->list, &tmp_list);
- }
- }
+ int ret = -1;
- list_for_each_entry_safe (l, tmp, &tmp_list, list) {
- list_del_init (&l->list);
+ INIT_LIST_HEAD(&granted_list);
- if (__is_lock_grantable (pl_inode, l)) {
- conf = GF_CALLOC (1, sizeof (*conf),
- gf_locks_mt_posix_lock_t);
+ flock.l_type = F_UNLCK;
+ flock.l_whence = old_lock->user_flock.l_whence;
+ flock.l_start = old_lock->user_flock.l_start;
+ flock.l_len = old_lock->user_flock.l_len;
+ flock.l_pid = old_lock->user_flock.l_pid;
- if (!conf) {
- l->blocked = 1;
- __insert_lock (pl_inode, l);
- continue;
- }
+ unlock_lock = new_posix_lock(&flock, old_lock->client, old_lock->client_pid,
+ &old_lock->owner, old_lock->fd,
+ old_lock->lk_flags, 0, &op_errno);
+ GF_VALIDATE_OR_GOTO(this->name, unlock_lock, out);
+ ret = 0;
- conf->frame = l->frame;
- l->frame = NULL;
+ __insert_and_merge(pl_inode, unlock_lock);
- posix_lock_to_flock (l, &conf->user_flock);
+ __grant_blocked_locks(this, pl_inode, &granted_list);
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Granted",
- l->fl_type == F_UNLCK ? "Unlock" : "Lock",
- l->client_pid, lkowner_utoa (&l->owner),
- l->user_flock.l_start,
- l->user_flock.l_len);
+ list_for_each_entry_safe(lock, tmp, &granted_list, list)
+ {
+ list_del_init(&lock->list);
- __insert_and_merge (pl_inode, l);
+ pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
+ 0, 0, NULL);
+ local = lock->frame->local;
+ PL_STACK_UNWIND_AND_FREE(local, lk, lock->frame, 0, 0,
+ &lock->user_flock, NULL);
+ __destroy_lock(lock);
+ }
- list_add (&conf->list, granted);
- } else {
- l->blocked = 1;
- __insert_lock (pl_inode, l);
- }
+out:
+ return ret;
+}
+
+int
+pl_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block)
+{
+ int ret = 0;
+
+ errno = 0;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ /* Send unlock before the actual lock to
+ prevent lock upgrade / downgrade
+ problems only if:
+ - it is a blocking call
+ - it has other conflicting locks
+ */
+
+ if (can_block && !(__is_lock_grantable(pl_inode, lock))) {
+ ret = pl_send_prelock_unlock(this, pl_inode, lock);
+ if (ret)
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Could not send pre-lock "
+ "unlock");
+ }
+
+ if (__is_lock_grantable(pl_inode, lock)) {
+ if (pl_metalock_is_active(pl_inode)) {
+ __pl_queue_lock(pl_inode, lock);
+ pthread_mutex_unlock(&pl_inode->mutex);
+ ret = -2;
+ goto out;
+ }
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %" PRId64 " - %" PRId64 " => OK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa(&lock->owner),
+ lock->user_flock.l_start, lock->user_flock.l_len);
+ __insert_and_merge(pl_inode, lock);
+ } else if (can_block) {
+ if (pl_metalock_is_active(pl_inode)) {
+ __pl_queue_lock(pl_inode, lock);
+ pthread_mutex_unlock(&pl_inode->mutex);
+ ret = -2;
+ goto out;
+ }
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %" PRId64 " - %" PRId64
+ " => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa(&lock->owner),
+ lock->user_flock.l_start, lock->user_flock.l_len);
+
+ pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW,
+ &lock->user_flock, NULL);
+
+ lock->blocked = 1;
+ __insert_lock(pl_inode, lock);
+ ret = -1;
+ } else {
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %" PRId64 " - %" PRId64 " => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa(&lock->owner),
+ lock->user_flock.l_start, lock->user_flock.l_len);
+ errno = EAGAIN;
+ ret = -1;
}
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ grant_blocked_locks(this, pl_inode);
+
+ do_blocked_rw(pl_inode);
+
+out:
+ return ret;
}
+posix_lock_t *
+pl_getlk(pl_inode_t *pl_inode, posix_lock_t *lock)
+{
+ posix_lock_t *conf = first_conflicting_overlap(pl_inode, lock);
+ if (conf == NULL) {
+ lock->fl_type = F_UNLCK;
+ return lock;
+ }
+
+ return conf;
+}
-void
-grant_blocked_locks (xlator_t *this, pl_inode_t *pl_inode)
+gf_boolean_t
+pl_does_monkey_want_stuck_lock()
{
- struct list_head granted_list;
- posix_lock_t *tmp = NULL;
- posix_lock_t *lock = NULL;
+ long int monkey_unlock_rand = 0;
+ long int monkey_unlock_rand_rem = 0;
+
+ /* coverity[DC.WEAK_CRYPTO] */
+ monkey_unlock_rand = random();
+ monkey_unlock_rand_rem = monkey_unlock_rand % 100;
+ if (monkey_unlock_rand_rem == 0)
+ return _gf_true;
+ return _gf_false;
+}
+
+int
+pl_lock_preempt(pl_inode_t *pl_inode, posix_lock_t *reqlock)
+{
+ posix_lock_t *lock = NULL;
+ posix_lock_t *i = NULL;
+ pl_rw_req_t *rw = NULL;
+ pl_rw_req_t *itr = NULL;
+ struct list_head unwind_blist = {
+ 0,
+ };
+ struct list_head unwind_rw_list = {
+ 0,
+ };
+ int ret = 0;
+
+ INIT_LIST_HEAD(&unwind_blist);
+ INIT_LIST_HEAD(&unwind_rw_list);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ /*
+ - go through the lock list
+ - remove all locks from different owners
+ - same owner locks will be added or substracted based on
+ the new request
+ - add the new lock
+ */
+ list_for_each_entry_safe(lock, i, &pl_inode->ext_list, list)
+ {
+ if (lock->blocked) {
+ list_del_init(&lock->list);
+ list_add(&lock->list, &unwind_blist);
+ continue;
+ }
+
+ if (locks_overlap(lock, reqlock)) {
+ if (same_owner(lock, reqlock))
+ continue;
+
+ /* remove conflicting locks */
+ list_del_init(&lock->list);
+ __delete_lock(lock);
+ __destroy_lock(lock);
+ }
+ }
- INIT_LIST_HEAD (&granted_list);
+ __insert_and_merge(pl_inode, reqlock);
- pthread_mutex_lock (&pl_inode->mutex);
+ list_for_each_entry_safe(rw, itr, &pl_inode->rw_list, list)
{
- __grant_blocked_locks (this, pl_inode, &granted_list);
+ list_del_init(&rw->list);
+ list_add(&rw->list, &unwind_rw_list);
}
- pthread_mutex_unlock (&pl_inode->mutex);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ /* unwind blocked locks */
+ list_for_each_entry_safe(lock, i, &unwind_blist, list)
+ {
+ PL_STACK_UNWIND_AND_FREE(((pl_local_t *)lock->frame->local), lk,
+ lock->frame, -1, EBUSY, &lock->user_flock,
+ NULL);
+ __destroy_lock(lock);
+ }
+
+ /* unwind blocked IOs */
+ list_for_each_entry_safe(rw, itr, &unwind_rw_list, list)
+ {
+ pl_clean_local(rw->stub->frame->local);
+ call_unwind_error(rw->stub, -1, EBUSY);
+ }
+
+ return ret;
+}
- list_for_each_entry_safe (lock, tmp, &granted_list, list) {
- list_del_init (&lock->list);
+/* Return true in case we need to ensure mandatory-locking
+ * semantics under different modes.
+ */
+gf_boolean_t
+pl_is_mandatory_locking_enabled(pl_inode_t *pl_inode)
+{
+ posix_locks_private_t *priv = THIS->private;
- pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
- &lock->user_flock, 0, 0, NULL);
+ if (priv->mandatory_mode == MLK_FILE_BASED && pl_inode->mandatory)
+ return _gf_true;
+ else if (priv->mandatory_mode == MLK_FORCED ||
+ priv->mandatory_mode == MLK_OPTIMAL)
+ return _gf_true;
- STACK_UNWIND_STRICT (lk, lock->frame, 0, 0,
- &lock->user_flock, NULL);
+ return _gf_false;
+}
- GF_FREE (lock);
+void
+pl_clean_local(pl_local_t *local)
+{
+ if (!local)
+ return;
+
+ if (local->inodelk_dom_count_req)
+ data_unref(local->inodelk_dom_count_req);
+ loc_wipe(&local->loc[0]);
+ loc_wipe(&local->loc[1]);
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->inode)
+ inode_unref(local->inode);
+ mem_put(local);
+}
+
+/*
+TODO: detach local initialization from PL_LOCAL_GET_REQUESTS and add it here
+*/
+int
+pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd)
+{
+ pl_local_t *local = NULL;
+
+ if (!loc && !fd) {
+ return -1;
+ }
+
+ if (!frame->local) {
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, 0,
+ "mem allocation failed");
+ return -1;
}
- return;
+ local->inode = (loc ? inode_ref(loc->inode) : inode_ref(fd->inode));
+
+ frame->local = local;
+ }
+
+ return 0;
}
-static int
-pl_send_prelock_unlock (xlator_t *this, pl_inode_t *pl_inode,
- posix_lock_t *old_lock)
+gf_boolean_t
+pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client)
{
- struct gf_flock flock = {0,};
- posix_lock_t *unlock_lock = NULL;
+ if (client && (client->opversion < GD_OP_VERSION_7_0)) {
+ return _gf_true;
+ }
+
+ if (is_lk_owner_null(owner)) {
+ return _gf_false;
+ }
+ return _gf_true;
+}
- struct list_head granted_list;
- posix_lock_t *tmp = NULL;
- posix_lock_t *lock = NULL;
+static int32_t
+pl_inode_from_loc(loc_t *loc, inode_t **pinode)
+{
+ inode_t *inode = NULL;
+ int32_t error = 0;
+
+ if (loc->inode != NULL) {
+ inode = inode_ref(loc->inode);
+ goto done;
+ }
+
+ if (loc->parent == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+
+ if (!gf_uuid_is_null(loc->gfid)) {
+ inode = inode_find(loc->parent->table, loc->gfid);
+ if (inode != NULL) {
+ goto done;
+ }
+ }
- int ret = -1;
+ if (loc->name == NULL) {
+ error = EINVAL;
+ goto done;
+ }
- INIT_LIST_HEAD (&granted_list);
+ inode = inode_grep(loc->parent->table, loc->parent, loc->name);
+ if (inode == NULL) {
+ /* We haven't found any inode. This means that the file doesn't exist
+ * or that even if it exists, we don't have any knowledge about it, so
+ * we don't have locks on it either, which is fine for our purposes. */
+ goto done;
+ }
- flock.l_type = F_UNLCK;
- flock.l_whence = old_lock->user_flock.l_whence;
- flock.l_start = old_lock->user_flock.l_start;
- flock.l_len = old_lock->user_flock.l_len;
+done:
+ *pinode = inode;
+ return error;
+}
- unlock_lock = new_posix_lock (&flock, old_lock->transport,
- old_lock->client_pid, &old_lock->owner,
- old_lock->fd);
- GF_VALIDATE_OR_GOTO (this->name, unlock_lock, out);
- ret = 0;
+static gf_boolean_t
+pl_inode_has_owners(xlator_t *xl, client_t *client, pl_inode_t *pl_inode,
+ struct timespec *now, struct list_head *contend)
+{
+ pl_dom_list_t *dom;
+ pl_inode_lock_t *lock;
+ gf_boolean_t has_owners = _gf_false;
- __insert_and_merge (pl_inode, unlock_lock);
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ list_for_each_entry(lock, &dom->inodelk_list, list)
+ {
+ /* If the lock belongs to the same client, we assume it's related
+ * to the same operation, so we allow the removal to continue. */
+ if (lock->client == client) {
+ continue;
+ }
+ /* If the lock belongs to an internal process, we don't block the
+ * removal. */
+ if (lock->client_pid < 0) {
+ continue;
+ }
+ if (contend == NULL) {
+ return _gf_true;
+ }
+ has_owners = _gf_true;
+ inodelk_contention_notify_check(xl, lock, now, contend);
+ }
+ }
- __grant_blocked_locks (this, pl_inode, &granted_list);
+ return has_owners;
+}
+
+int32_t
+pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
+ pl_inode_t **ppl_inode, struct list_head *contend)
+{
+ struct timespec now;
+ inode_t *inode;
+ pl_inode_t *pl_inode;
+ int32_t error;
+
+ pl_inode = NULL;
+
+ error = pl_inode_from_loc(loc, &inode);
+ if ((error != 0) || (inode == NULL)) {
+ goto done;
+ }
+
+ pl_inode = pl_inode_get(xl, inode, NULL);
+ if (pl_inode == NULL) {
+ inode_unref(inode);
+ error = ENOMEM;
+ goto done;
+ }
+
+ /* pl_inode_from_loc() already increments ref count for inode, so
+ * we only assign here our reference. */
+ pl_inode->inode = inode;
+
+ timespec_now(&now);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ if (pl_inode->removed) {
+ error = ESTALE;
+ goto unlock;
+ }
+
+ if (pl_inode_has_owners(xl, frame->root->client, pl_inode, &now, contend)) {
+ error = -1;
+ /* We skip the unlock here because the caller must create a stub when
+ * we return -1 and do a call to pl_inode_remove_complete(), which
+ * assumes the lock is still acquired and will release it once
+ * everything else is prepared. */
+ goto done;
+ }
+
+ pl_inode->is_locked = _gf_true;
+ pl_inode->remove_running++;
- list_for_each_entry_safe (lock, tmp, &granted_list, list) {
- list_del_init (&lock->list);
+unlock:
+ pthread_mutex_unlock(&pl_inode->mutex);
- pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
- &lock->user_flock, 0, 0, NULL);
+done:
+ *ppl_inode = pl_inode;
- STACK_UNWIND_STRICT (lk, lock->frame, 0, 0,
- &lock->user_flock, NULL);
+ return error;
+}
- GF_FREE (lock);
+int32_t
+pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub,
+ struct list_head *contend)
+{
+ pl_inode_lock_t *lock;
+ int32_t error = -1;
+
+ if (stub != NULL) {
+ list_add_tail(&stub->list, &pl_inode->waiting);
+ pl_inode->is_locked = _gf_true;
+ } else {
+ error = ENOMEM;
+
+ while (!list_empty(contend)) {
+ lock = list_first_entry(contend, pl_inode_lock_t, list);
+ list_del_init(&lock->list);
+ __pl_inodelk_unref(lock);
}
+ }
-out:
- return ret;
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (error < 0) {
+ inodelk_contention_notify(xl, contend);
+ }
+
+ inode_unref(pl_inode->inode);
+
+ return error;
}
-int
-pl_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
- int can_block)
+void
+pl_inode_remove_wake(struct list_head *list)
{
- int ret = 0;
+ call_stub_t *stub;
- errno = 0;
+ while (!list_empty(list)) {
+ stub = list_first_entry(list, call_stub_t, list);
+ list_del_init(&stub->list);
- pthread_mutex_lock (&pl_inode->mutex);
- {
- /* Send unlock before the actual lock to
- prevent lock upgrade / downgrade
- problems only if:
- - it is a blocking call
- - it has other conflicting locks
- */
-
- if (can_block &&
- !(__is_lock_grantable (pl_inode, lock))) {
- ret = pl_send_prelock_unlock (this, pl_inode,
- lock);
- if (ret)
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not send pre-lock "
- "unlock");
- }
+ call_resume(stub);
+ }
+}
- if (__is_lock_grantable (pl_inode, lock)) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => OK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->user_flock.l_start,
- lock->user_flock.l_len);
- __insert_and_merge (pl_inode, lock);
- } else if (can_block) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->user_flock.l_start,
- lock->user_flock.l_len);
- lock->blocked = 1;
- __insert_lock (pl_inode, lock);
- ret = -1;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => NOK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->user_flock.l_start,
- lock->user_flock.l_len);
- errno = EAGAIN;
- ret = -1;
- }
+void
+pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error)
+{
+ struct list_head contend, granted;
+ struct timespec now;
+ pl_dom_list_t *dom;
+
+ if (pl_inode == NULL) {
+ return;
+ }
+
+ INIT_LIST_HEAD(&contend);
+ INIT_LIST_HEAD(&granted);
+ timespec_now(&now);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ if (error == 0) {
+ if (pl_inode->links >= 0) {
+ pl_inode->links--;
+ }
+ if (pl_inode->links == 0) {
+ pl_inode->removed = _gf_true;
}
- pthread_mutex_unlock (&pl_inode->mutex);
+ }
- grant_blocked_locks (this, pl_inode);
+ pl_inode->remove_running--;
- do_blocked_rw (pl_inode);
+ if ((pl_inode->remove_running == 0) && list_empty(&pl_inode->waiting)) {
+ pl_inode->is_locked = _gf_false;
- return ret;
-}
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ __grant_blocked_inode_locks(xl, pl_inode, &granted, dom, &now,
+ &contend);
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
-posix_lock_t *
-pl_getlk (pl_inode_t *pl_inode, posix_lock_t *lock)
+ unwind_granted_inodes(xl, pl_inode, &granted);
+
+ inodelk_contention_notify(xl, &contend);
+
+ inode_unref(pl_inode->inode);
+}
+
+void
+pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode,
+ struct list_head *list)
{
- posix_lock_t *conf = NULL;
+ call_stub_t *stub, *tmp;
+
+ if (!pl_inode->is_locked) {
+ return;
+ }
- conf = first_overlap (pl_inode, lock);
+ list_for_each_entry_safe(stub, tmp, &pl_inode->waiting, list)
+ {
+ if (!pl_inode_has_owners(xl, stub->frame->root->client, pl_inode, NULL,
+ NULL)) {
+ list_move_tail(&stub->list, list);
+ }
+ }
+}
- if (conf == NULL) {
- lock->fl_type = F_UNLCK;
- return lock;
+/* This function determines if an inodelk attempt can be done now or it needs
+ * to wait.
+ *
+ * Possible return values:
+ * < 0: An error occurred. Currently only -ESTALE can be returned if the
+ * inode has been deleted previously by unlink/rmdir/rename
+ * = 0: The lock can be attempted.
+ * > 0: The lock needs to wait because a conflicting remove operation is
+ * ongoing.
+ */
+int32_t
+pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock)
+{
+ pl_dom_list_t *dom;
+ pl_inode_lock_t *ilock;
+
+ /* If the inode has been deleted, we won't allow any lock. */
+ if (pl_inode->removed) {
+ return -ESTALE;
+ }
+
+ /* We only synchronize with locks made for regular operations coming from
+ * the user. Locks done for internal purposes are hard to control and could
+ * lead to long delays or deadlocks quite easily. */
+ if (lock->client_pid < 0) {
+ return 0;
+ }
+ if (!pl_inode->is_locked) {
+ return 0;
+ }
+ if (pl_inode->remove_running > 0) {
+ return 1;
+ }
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ list_for_each_entry(ilock, &dom->inodelk_list, list)
+ {
+ /* If a lock from the same client is already granted, we allow this
+ * one to continue. This is necessary to prevent deadlocks when
+ * multiple locks are taken for the same operation.
+ *
+ * On the other side it's unlikely that the same client sends
+ * completely unrelated locks for the same inode.
+ */
+ if (ilock->client == lock->client) {
+ return 0;
+ }
}
+ }
- return conf;
+ return 1;
}
diff --git a/xlators/features/locks/src/common.h b/xlators/features/locks/src/common.h
index 2cf9aa38093..281223bf3b8 100644
--- a/xlators/features/locks/src/common.h
+++ b/xlators/features/locks/src/common.h
@@ -1,154 +1,262 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012, 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef __COMMON_H__
#define __COMMON_H__
-#include "lkowner.h"
/*dump locks format strings */
-#define RANGE_FMT "type=%s, whence=%hd, start=%llu, len=%llu"
-#define ENTRY_FMT "type=%s on basename=%s"
-#define DUMP_GEN_FMT "pid = %llu, owner=%s, transport=%p, "
-#define GRNTD_AT "granted at %s"
-#define BLKD_AT "blocked at %s"
-#define DUMP_BLKD_FMT DUMP_GEN_FMT", "BLKD_AT
-#define DUMP_GRNTD_FMT DUMP_GEN_FMT", "GRNTD_AT
-#define DUMP_BLKD_GRNTD_FMT DUMP_GEN_FMT", "BLKD_AT", "GRNTD_AT
-#define ENTRY_BLKD_FMT ENTRY_FMT", "DUMP_BLKD_FMT
-#define ENTRY_GRNTD_FMT ENTRY_FMT", "DUMP_GRNTD_FMT
-#define ENTRY_BLKD_GRNTD_FMT ENTRY_FMT", "DUMP_BLKD_GRNTD_FMT
-
-#define RANGE_BLKD_FMT RANGE_FMT", "DUMP_BLKD_FMT
-#define RANGE_GRNTD_FMT RANGE_FMT", "DUMP_GRNTD_FMT
-#define RANGE_BLKD_GRNTD_FMT RANGE_FMT", "DUMP_BLKD_GRNTD_FMT
+#define RANGE_FMT "type=%s, whence=%hd, start=%llu, len=%llu"
+#define ENTRY_FMT "type=%s on basename=%s"
+#define DUMP_GEN_FMT "pid = %llu, owner=%s, client=%p"
+#define GRNTD_AT "granted at %s"
+#define BLKD_AT "blocked at %s"
+#define CONN_ID "connection-id=%s"
+#define DUMP_BLKD_FMT DUMP_GEN_FMT ", " CONN_ID ", " BLKD_AT
+#define DUMP_GRNTD_FMT DUMP_GEN_FMT ", " CONN_ID ", " GRNTD_AT
+#define DUMP_BLKD_GRNTD_FMT DUMP_GEN_FMT ", " CONN_ID ", " BLKD_AT ", " GRNTD_AT
+
+#define ENTRY_BLKD_FMT ENTRY_FMT ", " DUMP_BLKD_FMT
+#define ENTRY_GRNTD_FMT ENTRY_FMT ", " DUMP_GRNTD_FMT
+#define ENTRY_BLKD_GRNTD_FMT ENTRY_FMT ", " DUMP_BLKD_GRNTD_FMT
+
+#define RANGE_BLKD_FMT RANGE_FMT ", " DUMP_BLKD_FMT
+#define RANGE_GRNTD_FMT RANGE_FMT ", " DUMP_GRNTD_FMT
+#define RANGE_BLKD_GRNTD_FMT RANGE_FMT ", " DUMP_BLKD_GRNTD_FMT
#define SET_FLOCK_PID(flock, lock) ((flock)->l_pid = lock->client_pid)
+
+#define PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params...) \
+ do { \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT(fop, frame, op_ret, params); \
+ if (__local) { \
+ if (__local->inodelk_dom_count_req) \
+ data_unref(__local->inodelk_dom_count_req); \
+ loc_wipe(&__local->loc[0]); \
+ loc_wipe(&__local->loc[1]); \
+ if (__local->fd) \
+ fd_unref(__local->fd); \
+ if (__local->inode) \
+ inode_unref(__local->inode); \
+ if (__local->xdata) { \
+ dict_unref(__local->xdata); \
+ __local->xdata = NULL; \
+ } \
+ mem_put(__local); \
+ } \
+ } while (0)
+
posix_lock_t *
-new_posix_lock (struct gf_flock *flock, void *transport, pid_t client_pid,
- gf_lkowner_t *owner, fd_t *fd);
+new_posix_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
+ gf_lkowner_t *owner, fd_t *fd, uint32_t lk_flags, int blocking,
+ int32_t *op_errno);
pl_inode_t *
-pl_inode_get (xlator_t *this, inode_t *inode);
+pl_inode_get(xlator_t *this, inode_t *inode, pl_local_t *local);
posix_lock_t *
-pl_getlk (pl_inode_t *inode, posix_lock_t *lock);
+pl_getlk(pl_inode_t *inode, posix_lock_t *lock);
int
-pl_setlk (xlator_t *this, pl_inode_t *inode, posix_lock_t *lock,
- int can_block);
+pl_setlk(xlator_t *this, pl_inode_t *inode, posix_lock_t *lock, int can_block);
+
+int
+pl_lock_preempt(pl_inode_t *pl_inode, posix_lock_t *reqlock);
void
-grant_blocked_locks (xlator_t *this, pl_inode_t *inode);
+grant_blocked_locks(xlator_t *this, pl_inode_t *inode);
void
-posix_lock_to_flock (posix_lock_t *lock, struct gf_flock *flock);
+posix_lock_to_flock(posix_lock_t *lock, struct gf_flock *flock);
int
-locks_overlap (posix_lock_t *l1, posix_lock_t *l2);
+locks_overlap(posix_lock_t *l1, posix_lock_t *l2);
int
-same_owner (posix_lock_t *l1, posix_lock_t *l2);
+same_owner(posix_lock_t *l1, posix_lock_t *l2);
-void __delete_lock (pl_inode_t *, posix_lock_t *);
+void
+__delete_lock(posix_lock_t *);
-void __destroy_lock (posix_lock_t *);
+void
+__destroy_lock(posix_lock_t *);
pl_dom_list_t *
-get_domain (pl_inode_t *pl_inode, const char *volume);
+get_domain(pl_inode_t *pl_inode, const char *volume);
+
+void
+grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend);
+
+void
+inodelk_contention_notify(xlator_t *this, struct list_head *contend);
+
+void
+__delete_inode_lock(pl_inode_lock_t *lock);
+
+void
+__pl_inodelk_unref(pl_inode_lock_t *lock);
void
-grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom);
+__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted, pl_dom_list_t *dom,
+ struct timespec *now, struct list_head *contend);
void
-__delete_inode_lock (pl_inode_lock_t *lock);
+unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted);
void
-__pl_inodelk_unref (pl_inode_lock_t *lock);
+grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend);
void
-grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_entry_lock_t *unlocked, pl_dom_list_t *dom);
+entrylk_contention_notify(xlator_t *this, struct list_head *contend);
-void pl_update_refkeeper (xlator_t *this, inode_t *inode);
+void
+pl_update_refkeeper(xlator_t *this, inode_t *inode);
int32_t
-get_inodelk_count (xlator_t *this, inode_t *inode);
+__get_inodelk_count(xlator_t *this, pl_inode_t *pl_inode, char *domname);
+int32_t
+get_inodelk_count(xlator_t *this, inode_t *inode, char *domname);
int32_t
-get_entrylk_count (xlator_t *this, inode_t *inode);
+__get_entrylk_count(xlator_t *this, pl_inode_t *pl_inode);
+int32_t
+get_entrylk_count(xlator_t *this, inode_t *inode);
-void pl_trace_in (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
- int cmd, struct gf_flock *flock, const char *domain);
+void
+pl_trace_in(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, int cmd,
+ struct gf_flock *flock, const char *domain);
-void pl_trace_out (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
- int cmd, struct gf_flock *flock, int op_ret, int op_errno, const char *domain);
+void
+pl_trace_out(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc, int cmd,
+ struct gf_flock *flock, int op_ret, int op_errno,
+ const char *domain);
-void pl_trace_block (xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
- int cmd, struct gf_flock *flock, const char *domain);
+void
+pl_trace_block(xlator_t *this, call_frame_t *frame, fd_t *fd, loc_t *loc,
+ int cmd, struct gf_flock *flock, const char *domain);
-void pl_trace_flush (xlator_t *this, call_frame_t *frame, fd_t *fd);
+void
+pl_trace_flush(xlator_t *this, call_frame_t *frame, fd_t *fd);
-void entrylk_trace_in (xlator_t *this, call_frame_t *frame, const char *volume,
- fd_t *fd, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+void
+entrylk_trace_in(xlator_t *this, call_frame_t *frame, const char *volume,
+ fd_t *fd, loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type);
-void entrylk_trace_out (xlator_t *this, call_frame_t *frame, const char *volume,
- fd_t *fd, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type,
- int op_ret, int op_errno);
+void
+entrylk_trace_out(xlator_t *this, call_frame_t *frame, const char *volume,
+ fd_t *fd, loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, int op_ret, int op_errno);
-void entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume,
- fd_t *fd, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type);
+void
+entrylk_trace_block(xlator_t *this, call_frame_t *frame, const char *volume,
+ fd_t *fd, loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type);
void
-pl_print_verdict (char *str, int size, int op_ret, int op_errno);
+pl_print_verdict(char *str, int size, int op_ret, int op_errno);
void
-pl_print_lockee (char *str, int size, fd_t *fd, loc_t *loc);
+pl_print_lockee(char *str, int size, fd_t *fd, loc_t *loc);
void
-pl_print_locker (char *str, int size, xlator_t *this, call_frame_t *frame);
+pl_print_locker(char *str, int size, xlator_t *this, call_frame_t *frame);
void
-pl_print_inodelk (char *str, int size, int cmd, struct gf_flock *flock, const char *domain);
+pl_print_inodelk(char *str, int size, int cmd, struct gf_flock *flock,
+ const char *domain);
void
-pl_trace_release (xlator_t *this, fd_t *fd);
+pl_trace_release(xlator_t *this, fd_t *fd);
unsigned long
-fd_to_fdnum (fd_t *fd);
+fd_to_fdnum(fd_t *fd);
fd_t *
-fd_from_fdnum (posix_lock_t *lock);
+fd_from_fdnum(posix_lock_t *lock);
int
-pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
- int can_block);
+pl_reserve_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block);
int
-reservelks_equal (posix_lock_t *l1, posix_lock_t *l2);
+reservelks_equal(posix_lock_t *l1, posix_lock_t *l2);
int
-pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode,
- posix_lock_t *lock, int can_block);
+pl_verify_reservelk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block);
int
-pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *reqlock);
-uint32_t
-check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename);
+pl_reserve_unlock(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *reqlock);
+
+int32_t
+check_entrylk_on_basename(xlator_t *this, inode_t *parent, char *basename);
+
+void
+__pl_inodelk_unref(pl_inode_lock_t *lock);
+void
+__pl_entrylk_unref(pl_entry_lock_t *lock);
+
+int
+pl_metalock_is_active(pl_inode_t *pl_inode);
+
+void
+__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock);
+
+void
+inodelk_contention_notify_check(xlator_t *xl, pl_inode_lock_t *lock,
+ struct timespec *now,
+ struct list_head *contend);
+
+void
+entrylk_contention_notify_check(xlator_t *xl, pl_entry_lock_t *lock,
+ struct timespec *now,
+ struct list_head *contend);
+
+gf_boolean_t
+pl_does_monkey_want_stuck_lock();
+
+gf_boolean_t
+pl_is_mandatory_locking_enabled(pl_inode_t *pl_inode);
+
+void
+pl_clean_local(pl_local_t *local);
+
+int
+pl_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd);
+
+gf_boolean_t
+pl_is_lk_owner_valid(gf_lkowner_t *owner, client_t *client);
+
+int32_t
+pl_inode_remove_prepare(xlator_t *xl, call_frame_t *frame, loc_t *loc,
+ pl_inode_t **ppl_inode, struct list_head *contend);
+
+int32_t
+pl_inode_remove_complete(xlator_t *xl, pl_inode_t *pl_inode, call_stub_t *stub,
+ struct list_head *contend);
+
+void
+pl_inode_remove_wake(struct list_head *list);
+
+void
+pl_inode_remove_cbk(xlator_t *xl, pl_inode_t *pl_inode, int32_t error);
+
+void
+pl_inode_remove_unlocked(xlator_t *xl, pl_inode_t *pl_inode,
+ struct list_head *list);
+
+int32_t
+pl_inode_remove_inodelk(pl_inode_t *pl_inode, pl_inode_lock_t *lock);
+
#endif /* __COMMON_H__ */
diff --git a/xlators/features/locks/src/entrylk.c b/xlators/features/locks/src/entrylk.c
index 2a47657dbbb..fd772c850dd 100644
--- a/xlators/features/locks/src/entrylk.c
+++ b/xlators/features/locks/src/entrylk.c
@@ -1,67 +1,83 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "inode.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/list.h>
+#include <glusterfs/upcall-utils.h>
#include "locks.h"
+#include "clear.h"
#include "common.h"
+#include "pl-messages.h"
-static pl_entry_lock_t *
-new_entrylk_lock (pl_inode_t *pinode, const char *basename, entrylk_type type,
- void *trans, pid_t client_pid, gf_lkowner_t *owner,
- const char *volume)
-
+void
+__pl_entrylk_unref(pl_entry_lock_t *lock)
{
- pl_entry_lock_t *newlock = NULL;
-
- newlock = GF_CALLOC (1, sizeof (pl_entry_lock_t),
- gf_locks_mt_pl_entry_lock_t);
- if (!newlock) {
- goto out;
- }
-
- newlock->basename = basename ? gf_strdup (basename) : NULL;
- newlock->type = type;
- newlock->trans = trans;
- newlock->volume = volume;
- newlock->client_pid = client_pid;
- newlock->owner = *owner;
+ lock->ref--;
+ if (!lock->ref) {
+ GF_FREE((char *)lock->basename);
+ GF_FREE(lock->connection_id);
+ GF_FREE(lock);
+ }
+}
- INIT_LIST_HEAD (&newlock->domain_list);
- INIT_LIST_HEAD (&newlock->blocked_locks);
+static void
+__pl_entrylk_ref(pl_entry_lock_t *lock)
+{
+ lock->ref++;
+}
+static pl_entry_lock_t *
+new_entrylk_lock(pl_inode_t *pinode, const char *basename, entrylk_type type,
+ const char *domain, call_frame_t *frame, char *conn_id,
+ int32_t *op_errno)
+{
+ pl_entry_lock_t *newlock = NULL;
+
+ if (!pl_is_lk_owner_valid(&frame->root->lk_owner, frame->root->client)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ newlock = GF_CALLOC(1, sizeof(pl_entry_lock_t),
+ gf_locks_mt_pl_entry_lock_t);
+ if (!newlock) {
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ newlock->basename = basename ? gf_strdup(basename) : NULL;
+ newlock->type = type;
+ newlock->client = frame->root->client;
+ newlock->client_pid = frame->root->pid;
+ newlock->volume = domain;
+ newlock->owner = frame->root->lk_owner;
+ newlock->frame = frame;
+ newlock->this = frame->this;
+
+ if (conn_id) {
+ newlock->connection_id = gf_strdup(conn_id);
+ }
+
+ INIT_LIST_HEAD(&newlock->domain_list);
+ INIT_LIST_HEAD(&newlock->blocked_locks);
+ INIT_LIST_HEAD(&newlock->client_list);
+
+ __pl_entrylk_ref(newlock);
out:
- return newlock;
+ return newlock;
}
-
/**
* all_names - does a basename represent all names?
* @basename: name to check
@@ -76,206 +92,411 @@ out:
*/
static int
-names_conflict (const char *n1, const char *n2)
+names_conflict(const char *n1, const char *n2)
{
- return all_names (n1) || all_names (n2) || !strcmp (n1, n2);
+ return all_names(n1) || all_names(n2) || !strcmp(n1, n2);
}
+static int
+__same_entrylk_owner(pl_entry_lock_t *l1, pl_entry_lock_t *l2)
+{
+ return (is_same_lkowner(&l1->owner, &l2->owner) &&
+ (l1->client == l2->client));
+}
+
+/* Just as in inodelk, allow conflicting name locks from same (lk_owner, conn)*/
+static int
+__conflicting_entrylks(pl_entry_lock_t *l1, pl_entry_lock_t *l2)
+{
+ if (names_conflict(l1->basename, l2->basename) &&
+ !__same_entrylk_owner(l1, l2))
+ return 1;
+
+ return 0;
+}
+
+/* See comments in inodelk.c for details */
+static inline gf_boolean_t
+__stale_entrylk(xlator_t *this, pl_entry_lock_t *candidate_lock,
+ pl_entry_lock_t *requested_lock, time_t *lock_age_sec)
+{
+ posix_locks_private_t *priv = NULL;
+
+ priv = this->private;
+
+ /* Question: Should we just prune them all given the
+ * chance? Or just the locks we are attempting to acquire?
+ */
+ if (names_conflict(candidate_lock->basename, requested_lock->basename)) {
+ *lock_age_sec = gf_time() - candidate_lock->granted_time;
+ if (*lock_age_sec > priv->revocation_secs)
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+/* See comments in inodelk.c for details */
+static gf_boolean_t
+__entrylk_prune_stale(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
+ pl_entry_lock_t *lock)
+{
+ posix_locks_private_t *priv = NULL;
+ pl_entry_lock_t *tmp = NULL;
+ pl_entry_lock_t *lk = NULL;
+ gf_boolean_t revoke_lock = _gf_false;
+ int bcount = 0;
+ int gcount = 0;
+ int op_errno = 0;
+ clrlk_args args;
+ args.opts = NULL;
+ time_t lk_age_sec = 0;
+ uint32_t max_blocked = 0;
+ char *reason_str = NULL;
+
+ priv = this->private;
+ args.type = CLRLK_ENTRY;
+ if (priv->revocation_clear_all == _gf_true)
+ args.kind = CLRLK_ALL;
+ else
+ args.kind = CLRLK_GRANTED;
+
+ if (list_empty(&dom->entrylk_list))
+ goto out;
+
+ pthread_mutex_lock(&pinode->mutex);
+ lock->pinode = pinode;
+ list_for_each_entry_safe(lk, tmp, &dom->entrylk_list, domain_list)
+ {
+ if (__stale_entrylk(this, lk, lock, &lk_age_sec) == _gf_true) {
+ revoke_lock = _gf_true;
+ reason_str = "age";
+ break;
+ }
+ }
+ max_blocked = priv->revocation_max_blocked;
+ if (max_blocked != 0 && revoke_lock == _gf_false) {
+ list_for_each_entry_safe(lk, tmp, &dom->blocked_entrylks, blocked_locks)
+ {
+ max_blocked--;
+ if (max_blocked == 0) {
+ revoke_lock = _gf_true;
+ reason_str = "max blocked";
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock(&pinode->mutex);
+
+out:
+ if (revoke_lock == _gf_true) {
+ clrlk_clear_entrylk(this, pinode, dom, &args, &bcount, &gcount,
+ &op_errno);
+ gf_log(this->name, GF_LOG_WARNING,
+ "Lock revocation [reason: %s; gfid: %s; domain: %s; "
+ "age: %ld sec] - Entry lock revoked: %d granted & %d "
+ "blocked locks cleared",
+ reason_str, uuid_utoa(pinode->gfid), dom->domain, lk_age_sec,
+ gcount, bcount);
+ }
+
+ return revoke_lock;
+}
-static inline int
-__same_entrylk_owner (pl_entry_lock_t *l1, pl_entry_lock_t *l2)
+void
+entrylk_contention_notify_check(xlator_t *this, pl_entry_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
+ posix_locks_private_t *priv;
+ int64_t elapsed;
+
+ priv = this->private;
+
+ /* If this lock is in a list, it means that we are about to send a
+ * notification for it, so no need to do anything else. */
+ if (!list_empty(&lock->contend)) {
+ return;
+ }
+
+ elapsed = now->tv_sec;
+ elapsed -= lock->contention_time.tv_sec;
+ if (now->tv_nsec < lock->contention_time.tv_nsec) {
+ elapsed--;
+ }
+ if (elapsed < priv->notify_contention_delay) {
+ return;
+ }
+
+ /* All contention notifications will be sent outside of the locked
+ * region. This means that currently granted locks might have already
+ * been unlocked by that time. To avoid the lock or the inode to be
+ * destroyed before we process them, we take an additional reference
+ * on both. */
+ inode_ref(lock->pinode->inode);
+ __pl_entrylk_ref(lock);
+
+ lock->contention_time = *now;
- return (is_same_lkowner (&l1->owner, &l2->owner) &&
- (l1->trans == l2->trans));
+ list_add_tail(&lock->contend, contend);
}
+void
+entrylk_contention_notify(xlator_t *this, struct list_head *contend)
+{
+ struct gf_upcall up;
+ struct gf_upcall_entrylk_contention lc;
+ pl_entry_lock_t *lock;
+ pl_inode_t *pl_inode;
+ client_t *client;
+ gf_boolean_t notify;
+
+ while (!list_empty(contend)) {
+ lock = list_first_entry(contend, pl_entry_lock_t, contend);
+
+ pl_inode = lock->pinode;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* If the lock has already been released, no notification is
+ * sent. We clear the notification time in this case. */
+ notify = !list_empty(&lock->domain_list);
+ if (!notify) {
+ lock->contention_time.tv_sec = 0;
+ lock->contention_time.tv_nsec = 0;
+ } else {
+ lc.type = lock->type;
+ lc.name = lock->basename;
+ lc.pid = lock->client_pid;
+ lc.domain = lock->volume;
+ lc.xdata = NULL;
+
+ gf_uuid_copy(up.gfid, lock->pinode->gfid);
+ client = (client_t *)lock->client;
+ if (client == NULL) {
+ /* A NULL client can be found if the entrylk
+ * was issued by a server side xlator. */
+ up.client_uid = NULL;
+ } else {
+ up.client_uid = client->client_uid;
+ }
+ }
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (notify) {
+ up.event_type = GF_UPCALL_ENTRYLK_CONTENTION;
+ up.data = &lc;
+
+ if (this->notify(this, GF_EVENT_UPCALL, &up) < 0) {
+ gf_msg_debug(this->name, 0,
+ "Entrylk contention notification "
+ "failed");
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Entrylk contention notification "
+ "sent");
+ }
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ list_del_init(&lock->contend);
+ __pl_entrylk_unref(lock);
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ inode_unref(pl_inode->inode);
+ }
+}
/**
- * lock_grantable - is this lock grantable?
+ * entrylk_grantable - is this lock grantable?
* @inode: inode in which to look
* @basename: name we're trying to lock
* @type: type of lock
*/
static pl_entry_lock_t *
-__lock_grantable (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+__entrylk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_entry_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
- pl_entry_lock_t *lock = NULL;
-
- if (list_empty (&dom->entrylk_list))
- return NULL;
-
- list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
- if (names_conflict (lock->basename, basename))
- return lock;
+ pl_entry_lock_t *tmp = NULL;
+ pl_entry_lock_t *ret = NULL;
+
+ list_for_each_entry(tmp, &dom->entrylk_list, domain_list)
+ {
+ if (__conflicting_entrylks(tmp, lock)) {
+ if (ret == NULL) {
+ ret = tmp;
+ if (contend == NULL) {
+ break;
+ }
+ }
+ entrylk_contention_notify_check(this, tmp, now, contend);
}
+ }
- return NULL;
+ return ret;
}
static pl_entry_lock_t *
-__blocked_lock_conflict (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+__blocked_entrylk_conflict(pl_dom_list_t *dom, pl_entry_lock_t *lock)
{
- pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *tmp = NULL;
- if (list_empty (&dom->blocked_entrylks))
- return NULL;
+ list_for_each_entry(tmp, &dom->blocked_entrylks, blocked_locks)
+ {
+ if (names_conflict(tmp->basename, lock->basename))
+ return lock;
+ }
- list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
- if (names_conflict (lock->basename, basename))
- return lock;
- }
-
- return NULL;
+ return NULL;
}
static int
-__owner_has_lock (pl_dom_list_t *dom, pl_entry_lock_t *newlock)
+__owner_has_lock(pl_dom_list_t *dom, pl_entry_lock_t *newlock)
{
- pl_entry_lock_t *lock = NULL;
+ pl_entry_lock_t *lock = NULL;
- list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
- if (__same_entrylk_owner (lock, newlock))
- return 1;
- }
+ list_for_each_entry(lock, &dom->entrylk_list, domain_list)
+ {
+ if (__same_entrylk_owner(lock, newlock))
+ return 1;
+ }
- list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
- if (__same_entrylk_owner (lock, newlock))
- return 1;
- }
+ list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks)
+ {
+ if (__same_entrylk_owner(lock, newlock))
+ return 1;
+ }
- return 0;
+ return 0;
}
static int
-names_equal (const char *n1, const char *n2)
+names_equal(const char *n1, const char *n2)
{
- return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp (n1, n2));
+ return (n1 == NULL && n2 == NULL) || (n1 && n2 && !strcmp(n1, n2));
}
void
-pl_print_entrylk (char *str, int size, entrylk_cmd cmd, entrylk_type type,
- const char *basename, const char *domain)
+pl_print_entrylk(char *str, int size, entrylk_cmd cmd, entrylk_type type,
+ const char *basename, const char *domain)
{
- char *cmd_str = NULL;
- char *type_str = NULL;
+ char *cmd_str = NULL;
+ char *type_str = NULL;
- switch (cmd) {
+ switch (cmd) {
case ENTRYLK_LOCK:
- cmd_str = "LOCK";
- break;
+ cmd_str = "LOCK";
+ break;
case ENTRYLK_LOCK_NB:
- cmd_str = "LOCK_NB";
- break;
+ cmd_str = "LOCK_NB";
+ break;
case ENTRYLK_UNLOCK:
- cmd_str = "UNLOCK";
- break;
+ cmd_str = "UNLOCK";
+ break;
default:
- cmd_str = "UNKNOWN";
- break;
- }
+ cmd_str = "UNKNOWN";
+ break;
+ }
- switch (type) {
+ switch (type) {
case ENTRYLK_RDLCK:
- type_str = "READ";
- break;
+ type_str = "READ";
+ break;
case ENTRYLK_WRLCK:
- type_str = "WRITE";
- break;
+ type_str = "WRITE";
+ break;
default:
- type_str = "UNKNOWN";
- break;
- }
+ type_str = "UNKNOWN";
+ break;
+ }
- snprintf (str, size, "lock=ENTRYLK, cmd=%s, type=%s, basename=%s, domain: %s",
- cmd_str, type_str, basename, domain);
+ snprintf(str, size,
+ "lock=ENTRYLK, cmd=%s, type=%s, basename=%s, domain: %s", cmd_str,
+ type_str, basename, domain);
}
-
void
-entrylk_trace_in (xlator_t *this, call_frame_t *frame, const char *domain,
- fd_t *fd, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+entrylk_trace_in(xlator_t *this, call_frame_t *frame, const char *domain,
+ fd_t *fd, loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type)
{
- posix_locks_private_t *priv = NULL;
- char pl_locker[256];
- char pl_lockee[256];
- char pl_entrylk[256];
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_entrylk[256];
- priv = this->private;
+ priv = this->private;
- if (!priv->trace)
- return;
+ if (!priv->trace)
+ return;
- pl_print_locker (pl_locker, 256, this, frame);
- pl_print_lockee (pl_lockee, 256, fd, loc);
- pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, domain);
+ pl_print_locker(pl_locker, 256, this, frame);
+ pl_print_lockee(pl_lockee, 256, fd, loc);
+ pl_print_entrylk(pl_entrylk, 256, cmd, type, basename, domain);
- gf_log (this->name, GF_LOG_INFO,
- "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}",
- pl_locker, pl_lockee, pl_entrylk);
+ gf_log(this->name, GF_LOG_INFO,
+ "[REQUEST] Locker = {%s} Lockee = {%s} Lock = {%s}", pl_locker,
+ pl_lockee, pl_entrylk);
}
-
void
-entrylk_trace_out (xlator_t *this, call_frame_t *frame, const char *domain,
- fd_t *fd, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, int op_ret, int op_errno)
+entrylk_trace_out(xlator_t *this, call_frame_t *frame, const char *domain,
+ fd_t *fd, loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, int op_ret, int op_errno)
{
- posix_locks_private_t *priv = NULL;
- char pl_locker[256];
- char pl_lockee[256];
- char pl_entrylk[256];
- char verdict[32];
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_entrylk[256];
+ char verdict[32];
- priv = this->private;
+ priv = this->private;
- if (!priv->trace)
- return;
+ if (!priv->trace)
+ return;
- pl_print_locker (pl_locker, 256, this, frame);
- pl_print_lockee (pl_lockee, 256, fd, loc);
- pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, domain);
- pl_print_verdict (verdict, 32, op_ret, op_errno);
+ pl_print_locker(pl_locker, 256, this, frame);
+ pl_print_lockee(pl_lockee, 256, fd, loc);
+ pl_print_entrylk(pl_entrylk, 256, cmd, type, basename, domain);
+ pl_print_verdict(verdict, 32, op_ret, op_errno);
- gf_log (this->name, GF_LOG_INFO,
- "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}",
- verdict, pl_locker, pl_lockee, pl_entrylk);
+ gf_log(this->name, GF_LOG_INFO,
+ "[%s] Locker = {%s} Lockee = {%s} Lock = {%s}", verdict, pl_locker,
+ pl_lockee, pl_entrylk);
}
-
void
-entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume,
- fd_t *fd, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+entrylk_trace_block(xlator_t *this, call_frame_t *frame, const char *volume,
+ fd_t *fd, loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type)
{
- posix_locks_private_t *priv = NULL;
- char pl_locker[256];
- char pl_lockee[256];
- char pl_entrylk[256];
+ posix_locks_private_t *priv = NULL;
+ char pl_locker[256];
+ char pl_lockee[256];
+ char pl_entrylk[256];
- priv = this->private;
+ priv = this->private;
- if (!priv->trace)
- return;
+ if (!priv->trace)
+ return;
- pl_print_locker (pl_locker, 256, this, frame);
- pl_print_lockee (pl_lockee, 256, fd, loc);
- pl_print_entrylk (pl_entrylk, 256, cmd, type, basename, volume);
+ pl_print_locker(pl_locker, 256, this, frame);
+ pl_print_lockee(pl_lockee, 256, fd, loc);
+ pl_print_entrylk(pl_entrylk, 256, cmd, type, basename, volume);
- gf_log (this->name, GF_LOG_INFO,
- "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}",
- pl_locker, pl_lockee, pl_entrylk);
+ gf_log(this->name, GF_LOG_INFO,
+ "[BLOCKED] Locker = {%s} Lockee = {%s} Lock = {%s}", pl_locker,
+ pl_lockee, pl_entrylk);
}
/**
- * __find_most_matching_lock - find the lock struct which most matches in order of:
- * lock on the exact basename ||
- * an all_names lock
+ * __find_most_matching_lock - find the lock struct which most matches in order
+ * of: lock on the exact basename || an all_names lock
*
*
* @inode: inode in which to look
@@ -283,27 +504,61 @@ entrylk_trace_block (xlator_t *this, call_frame_t *frame, const char *volume,
*/
static pl_entry_lock_t *
-__find_most_matching_lock (pl_dom_list_t *dom, const char *basename)
+__find_most_matching_lock(pl_dom_list_t *dom, const char *basename)
{
- pl_entry_lock_t *lock;
- pl_entry_lock_t *all = NULL;
- pl_entry_lock_t *exact = NULL;
+ pl_entry_lock_t *lock;
+ pl_entry_lock_t *all = NULL;
+ pl_entry_lock_t *exact = NULL;
- if (list_empty (&dom->entrylk_list))
- return NULL;
+ if (list_empty(&dom->entrylk_list))
+ return NULL;
- list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
- if (all_names (lock->basename))
- all = lock;
- else if (names_equal (lock->basename, basename))
- exact = lock;
- }
+ list_for_each_entry(lock, &dom->entrylk_list, domain_list)
+ {
+ if (all_names(lock->basename))
+ all = lock;
+ else if (names_equal(lock->basename, basename))
+ exact = lock;
+ }
+
+ return (exact ? exact : all);
+}
- return (exact ? exact : all);
+static pl_entry_lock_t *
+__find_matching_lock(pl_dom_list_t *dom, pl_entry_lock_t *lock)
+{
+ pl_entry_lock_t *tmp = NULL;
+
+ list_for_each_entry(tmp, &dom->entrylk_list, domain_list)
+ {
+ if (names_equal(lock->basename, tmp->basename) &&
+ __same_entrylk_owner(lock, tmp) && (lock->type == tmp->type))
+ return tmp;
+ }
+ return NULL;
+}
+
+static int
+__lock_blocked_add(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
+ pl_entry_lock_t *lock, int nonblock)
+{
+ if (nonblock)
+ goto out;
+
+ lock->blkd_time = gf_time();
+ list_add_tail(&lock->blocked_locks, &dom->blocked_entrylks);
+
+ gf_msg_trace(this->name, 0, "Blocking lock: {pinode=%p, basename=%s}",
+ pinode, lock->basename);
+
+ entrylk_trace_block(this, lock->frame, NULL, NULL, NULL, lock->basename,
+ ENTRYLK_LOCK, lock->type);
+out:
+ return -EAGAIN;
}
/**
- * __lock_name - lock a name in a directory
+ * __lock_entrylk - lock a name in a directory
* @inode: inode for the directory in which to lock
* @basename: name of the entry to lock
* if null, lock the entire directory
@@ -314,436 +569,375 @@ __find_most_matching_lock (pl_dom_list_t *dom, const char *basename)
*/
int
-__lock_name (pl_inode_t *pinode, const char *basename, entrylk_type type,
- call_frame_t *frame, pl_dom_list_t *dom, xlator_t *this, int nonblock)
-{
- pl_entry_lock_t *lock = NULL;
- pl_entry_lock_t *conf = NULL;
- void *trans = NULL;
- pid_t client_pid = 0;
- int ret = -EINVAL;
-
- trans = frame->root->trans;
- client_pid = frame->root->pid;
-
- lock = new_entrylk_lock (pinode, basename, type, trans, client_pid,
- &frame->root->lk_owner, dom->domain);
- if (!lock) {
- ret = -ENOMEM;
- goto out;
- }
-
- lock->frame = frame;
- lock->this = this;
- lock->trans = trans;
-
- conf = __lock_grantable (dom, basename, type);
- if (conf) {
- ret = -EAGAIN;
- if (nonblock){
- if (lock->basename)
- GF_FREE ((char *)lock->basename);
- GF_FREE (lock);
- goto out;
-
- }
-
- gettimeofday (&lock->blkd_time, NULL);
- list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
-
- gf_log (this->name, GF_LOG_TRACE,
- "Blocking lock: {pinode=%p, basename=%s}",
- pinode, basename);
-
- goto out;
- }
-
- if ( __blocked_lock_conflict (dom, basename, type) && !(__owner_has_lock (dom, lock))) {
- ret = -EAGAIN;
- if (nonblock) {
- if (lock->basename)
- GF_FREE ((char *) lock->basename);
- GF_FREE (lock);
- goto out;
-
- }
- lock->frame = frame;
- lock->this = this;
-
- gettimeofday (&lock->blkd_time, NULL);
- list_add_tail (&lock->blocked_locks, &dom->blocked_entrylks);
-
- gf_log (this->name, GF_LOG_TRACE,
- "Lock is grantable, but blocking to prevent starvation");
- gf_log (this->name, GF_LOG_TRACE,
- "Blocking lock: {pinode=%p, basename=%s}",
- pinode, basename);
-
- ret = -EAGAIN;
- goto out;
+__lock_entrylk(xlator_t *this, pl_inode_t *pinode, pl_entry_lock_t *lock,
+ int nonblock, pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
+{
+ pl_entry_lock_t *conf = NULL;
+ int ret = -EAGAIN;
+
+ conf = __entrylk_grantable(this, dom, lock, now, contend);
+ if (conf) {
+ ret = __lock_blocked_add(this, pinode, dom, lock, nonblock);
+ goto out;
+ }
+
+ /* To prevent blocked locks starvation, check if there are any blocked
+ * locks thay may conflict with this lock. If there is then don't grant
+ * the lock. BUT grant the lock if the owner already has lock to allow
+ * nested locks.
+ * Example: SHD from Machine1 takes (gfid, basename=257-length-name)
+ * and is granted.
+ * SHD from machine2 takes (gfid, basename=NULL) and is blocked.
+ * When SHD from Machine1 takes (gfid, basename=NULL) it needs to be
+ * granted, without which self-heal can't progress.
+ * TODO: Find why 'owner_has_lock' is checked even for blocked locks.
+ */
+ if (__blocked_entrylk_conflict(dom, lock) &&
+ !(__owner_has_lock(dom, lock))) {
+ if (nonblock == 0) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Lock is grantable, but blocking to prevent "
+ "starvation");
}
- switch (type) {
-
- case ENTRYLK_WRLCK:
- gettimeofday (&lock->granted_time, NULL);
- list_add_tail (&lock->domain_list, &dom->entrylk_list);
- break;
- default:
+ ret = __lock_blocked_add(this, pinode, dom, lock, nonblock);
+ goto out;
+ }
- gf_log (this->name, GF_LOG_DEBUG,
- "Invalid type for entrylk specified: %d", type);
- ret = -EINVAL;
- goto out;
- }
+ __pl_entrylk_ref(lock);
+ lock->granted_time = gf_time();
+ list_add(&lock->domain_list, &dom->entrylk_list);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
/**
- * __unlock_name - unlock a name in a directory
+ * __unlock_entrylk - unlock a name in a directory
* @inode: inode for the directory to unlock in
* @basename: name of the entry to unlock
* if null, unlock the entire directory
*/
pl_entry_lock_t *
-__unlock_name (pl_dom_list_t *dom, const char *basename, entrylk_type type)
+__unlock_entrylk(pl_dom_list_t *dom, pl_entry_lock_t *lock)
{
- pl_entry_lock_t *lock = NULL;
- pl_entry_lock_t *ret_lock = NULL;
+ pl_entry_lock_t *ret_lock = NULL;
- lock = __find_most_matching_lock (dom, basename);
+ ret_lock = __find_matching_lock(dom, lock);
- if (!lock) {
- gf_log ("locks", GF_LOG_DEBUG,
- "unlock on %s (type=ENTRYLK_WRLCK) attempted but no matching lock found",
- basename);
- goto out;
- }
-
- if (names_equal (lock->basename, basename)
- && lock->type == type) {
-
- if (type == ENTRYLK_WRLCK) {
- list_del_init (&lock->domain_list);
- ret_lock = lock;
- }
- } else {
- gf_log ("locks", GF_LOG_DEBUG,
- "Unlock for a non-existing lock!");
- goto out;
- }
+ if (ret_lock) {
+ list_del_init(&ret_lock->domain_list);
+ } else {
+ gf_log("locks", GF_LOG_ERROR,
+ "unlock on %s "
+ "(type=ENTRYLK_WRLCK) attempted but no matching lock "
+ "found",
+ lock->basename);
+ }
-out:
- return ret_lock;
+ return ret_lock;
}
-uint32_t
-check_entrylk_on_basename (xlator_t *this, inode_t *parent, char *basename)
+int32_t
+check_entrylk_on_basename(xlator_t *this, inode_t *parent, char *basename)
{
- uint32_t entrylk = 0;
- pl_inode_t *pinode = 0;
- pl_dom_list_t *dom = NULL;
- pl_entry_lock_t *conf = NULL;
-
- pinode = pl_inode_get (this, parent);
- if (!pinode)
- goto out;
- pthread_mutex_lock (&pinode->mutex);
+ int32_t entrylk = 0;
+ pl_dom_list_t *dom = NULL;
+ pl_entry_lock_t *conf = NULL;
+
+ pl_inode_t *pinode = pl_inode_get(this, parent, NULL);
+ if (!pinode)
+ goto out;
+ pthread_mutex_lock(&pinode->mutex);
+ {
+ list_for_each_entry(dom, &pinode->dom_list, inode_list)
{
- list_for_each_entry (dom, &pinode->dom_list, inode_list) {
- conf = __lock_grantable (dom, basename, ENTRYLK_WRLCK);
- if (conf && conf->basename) {
- entrylk = 1;
- break;
- }
- }
+ conf = __find_most_matching_lock(dom, basename);
+ if (conf && conf->basename) {
+ entrylk = 1;
+ break;
+ }
}
- pthread_mutex_unlock (&pinode->mutex);
+ }
+ pthread_mutex_unlock(&pinode->mutex);
out:
- return entrylk;
+ return entrylk;
}
void
-__grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_dom_list_t *dom, struct list_head *granted)
+__grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom, struct list_head *granted,
+ struct timespec *now, struct list_head *contend)
{
- int bl_ret = 0;
- pl_entry_lock_t *bl = NULL;
- pl_entry_lock_t *tmp = NULL;
-
- struct list_head blocked_list;
+ int bl_ret = 0;
+ pl_entry_lock_t *bl = NULL;
+ pl_entry_lock_t *tmp = NULL;
- INIT_LIST_HEAD (&blocked_list);
- list_splice_init (&dom->blocked_entrylks, &blocked_list);
+ struct list_head blocked_list;
- list_for_each_entry_safe (bl, tmp, &blocked_list,
- blocked_locks) {
+ INIT_LIST_HEAD(&blocked_list);
+ list_splice_init(&dom->blocked_entrylks, &blocked_list);
- list_del_init (&bl->blocked_locks);
+ list_for_each_entry_safe(bl, tmp, &blocked_list, blocked_locks)
+ {
+ list_del_init(&bl->blocked_locks);
+ bl_ret = __lock_entrylk(bl->this, pl_inode, bl, 0, dom, now, contend);
- gf_log ("locks", GF_LOG_TRACE,
- "Trying to unblock: {pinode=%p, basename=%s}",
- pl_inode, bl->basename);
-
- bl_ret = __lock_name (pl_inode, bl->basename, bl->type,
- bl->frame, dom, bl->this, 0);
-
- if (bl_ret == 0) {
- list_add (&bl->blocked_locks, granted);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "should never happen");
- if (bl->basename)
- GF_FREE ((char *)bl->basename);
- GF_FREE (bl);
- }
+ if (bl_ret == 0) {
+ list_add_tail(&bl->blocked_locks, granted);
}
- return;
+ }
}
/* Grants locks if possible which are blocked on a lock */
void
-grant_blocked_entry_locks (xlator_t *this, pl_inode_t *pl_inode,
- pl_entry_lock_t *unlocked, pl_dom_list_t *dom)
-{
- struct list_head granted_list;
- pl_entry_lock_t *tmp = NULL;
- pl_entry_lock_t *lock = NULL;
-
- INIT_LIST_HEAD (&granted_list);
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- __grant_blocked_entry_locks (this, pl_inode, dom, &granted_list);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
-
- list_for_each_entry_safe (lock, tmp, &granted_list, blocked_locks) {
- list_del_init (&lock->blocked_locks);
-
- entrylk_trace_out (this, lock->frame, NULL, NULL, NULL,
- lock->basename, ENTRYLK_LOCK, lock->type,
- 0, 0);
-
- STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL);
-
- }
-
- GF_FREE ((char *)unlocked->basename);
- GF_FREE (unlocked);
-
- return;
-}
-
-/**
- * release_entry_locks_for_transport: release all entry locks from this
- * transport for this loc_t
- */
-
-static int
-release_entry_locks_for_transport (xlator_t *this, pl_inode_t *pinode,
- pl_dom_list_t *dom, void *trans)
+grant_blocked_entry_locks(xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
{
- pl_entry_lock_t *lock = NULL;
- pl_entry_lock_t *tmp = NULL;
- struct list_head granted;
- struct list_head released;
-
- INIT_LIST_HEAD (&granted);
- INIT_LIST_HEAD (&released);
-
- pthread_mutex_lock (&pinode->mutex);
+ struct list_head granted_list;
+ pl_entry_lock_t *tmp = NULL;
+ pl_entry_lock_t *lock = NULL;
+
+ INIT_LIST_HEAD(&granted_list);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __grant_blocked_entry_locks(this, pl_inode, dom, &granted_list, now,
+ contend);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ list_for_each_entry_safe(lock, tmp, &granted_list, blocked_locks)
+ {
+ entrylk_trace_out(this, lock->frame, NULL, NULL, NULL, lock->basename,
+ ENTRYLK_LOCK, lock->type, 0, 0);
+
+ STACK_UNWIND_STRICT(entrylk, lock->frame, 0, 0, NULL);
+ lock->frame = NULL;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ list_for_each_entry_safe(lock, tmp, &granted_list, blocked_locks)
{
- list_for_each_entry_safe (lock, tmp, &dom->blocked_entrylks,
- blocked_locks) {
- if (lock->trans != trans)
- continue;
-
- list_del_init (&lock->blocked_locks);
-
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on held by "
- "{transport=%p}",trans);
-
- list_add (&lock->blocked_locks, &released);
-
- }
-
- list_for_each_entry_safe (lock, tmp, &dom->entrylk_list,
- domain_list) {
- if (lock->trans != trans)
- continue;
-
- list_del_init (&lock->domain_list);
-
- gf_log (this->name, GF_LOG_TRACE,
- "releasing lock on held by "
- "{transport=%p}",trans);
-
- GF_FREE ((char *)lock->basename);
- GF_FREE (lock);
- }
-
- __grant_blocked_entry_locks (this, pinode, dom, &granted);
-
+ list_del_init(&lock->blocked_locks);
+ __pl_entrylk_unref(lock);
}
-
- pthread_mutex_unlock (&pinode->mutex);
-
- list_for_each_entry_safe (lock, tmp, &released, blocked_locks) {
- list_del_init (&lock->blocked_locks);
-
- STACK_UNWIND_STRICT (entrylk, lock->frame, -1, EAGAIN, NULL);
-
- if (lock->basename)
- GF_FREE ((char *)lock->basename);
- GF_FREE (lock);
-
- }
-
- list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
- list_del_init (&lock->blocked_locks);
-
- STACK_UNWIND_STRICT (entrylk, lock->frame, 0, 0, NULL);
-
- if (lock->basename)
- GF_FREE ((char *)lock->basename);
- GF_FREE (lock);
- }
-
- return 0;
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
}
/* Common entrylk code called by pl_entrylk and pl_fentrylk */
int
-pl_common_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, inode_t *inode, const char *basename,
- entrylk_cmd cmd, entrylk_type type, loc_t *loc, fd_t *fd)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- void * transport = NULL;
-
- pl_inode_t * pinode = NULL;
- int ret = -1;
- pl_entry_lock_t *unlocked = NULL;
- char unwind = 1;
-
- pl_dom_list_t *dom = NULL;
-
- pinode = pl_inode_get (this, inode);
- if (!pinode) {
- op_errno = ENOMEM;
- goto out;
- }
+pl_common_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ inode_t *inode, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, loc_t *loc, fd_t *fd, dict_t *xdata)
- dom = get_domain (pinode, volume);
- if (!dom){
- op_errno = ENOMEM;
- goto out;
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ char unwind = 1;
+ GF_UNUSED int dict_ret = -1;
+ pl_inode_t *pinode = NULL;
+ pl_entry_lock_t *reqlock = NULL;
+ pl_entry_lock_t *unlocked = NULL;
+ pl_dom_list_t *dom = NULL;
+ char *conn_id = NULL;
+ pl_ctx_t *ctx = NULL;
+ int nonblock = 0;
+ gf_boolean_t need_inode_unref = _gf_false;
+ posix_locks_private_t *priv = NULL;
+ struct list_head *pcontend = NULL;
+ struct list_head contend;
+ struct timespec now = {};
+
+ priv = this->private;
+
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD(pcontend);
+ timespec_now(&now);
+ }
+
+ if (xdata)
+ dict_ret = dict_get_str(xdata, "connection-id", &conn_id);
+
+ pinode = pl_inode_get(this, inode, NULL);
+ if (!pinode) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (frame->root->client) {
+ ctx = pl_ctx_get(frame->root->client, this);
+ if (!ctx) {
+ op_errno = ENOMEM;
+ gf_log(this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto unwind;
}
-
- entrylk_trace_in (this, frame, volume, fd, loc, basename, cmd, type);
-
- transport = frame->root->trans;
-
- if (frame->root->lk_owner.len == 0) {
- /*
- this is a special case that means release
- all locks from this transport
- */
-
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing locks for transport %p", transport);
-
- release_entry_locks_for_transport (this, pinode, dom, transport);
+ }
+
+ dom = get_domain(pinode, volume);
+ if (!dom) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ entrylk_trace_in(this, frame, volume, fd, loc, basename, cmd, type);
+
+ reqlock = new_entrylk_lock(pinode, basename, type, dom->domain, frame,
+ conn_id, &op_errno);
+ if (!reqlock) {
+ op_ret = -1;
+ goto unwind;
+ }
+
+ /* Ideally, AFTER a successful lock (both blocking and non-blocking) or
+ * an unsuccessful blocking lock operation, the inode needs to be ref'd.
+ *
+ * But doing so might give room to a race where the lock-requesting
+ * client could send a DISCONNECT just before this thread refs the inode
+ * after the locking is done, and the epoll thread could unref the inode
+ * in cleanup which means the inode's refcount would come down to 0, and
+ * the call to pl_forget() at this point destroys @pinode. Now when
+ * the io-thread executing this function tries to access pinode,
+ * it could crash on account of illegal memory access.
+ *
+ * To get around this problem, the inode is ref'd once even before
+ * adding the lock into client_list as a precautionary measure.
+ * This way even if there are DISCONNECTs, there will always be 1 extra
+ * ref on the inode, so @pinode is still alive until after the
+ * current stack unwinds.
+ */
+ pinode->inode = inode_ref(inode);
+ if (priv->revocation_secs != 0) {
+ if (cmd != ENTRYLK_UNLOCK) {
+ __entrylk_prune_stale(this, pinode, dom, reqlock);
+ } else if (priv->monkey_unlocking == _gf_true) {
+ if (pl_does_monkey_want_stuck_lock()) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "MONKEY LOCKING (forcing stuck lock)!");
op_ret = 0;
-
+ need_inode_unref = _gf_true;
+ pthread_mutex_lock(&pinode->mutex);
+ {
+ __pl_entrylk_unref(reqlock);
+ }
+ pthread_mutex_unlock(&pinode->mutex);
goto out;
+ }
}
+ }
- switch (cmd) {
+ switch (cmd) {
+ case ENTRYLK_LOCK_NB:
+ nonblock = 1;
+ /* fall through */
case ENTRYLK_LOCK:
- pthread_mutex_lock (&pinode->mutex);
- {
- ret = __lock_name (pinode, basename, type,
- frame, dom, this, 0);
- }
- pthread_mutex_unlock (&pinode->mutex);
-
- op_errno = -ret;
- if (ret < 0) {
- if (ret == -EAGAIN)
- unwind = 0;
- else
- unwind = 1;
- goto out;
+ if (ctx)
+ pthread_mutex_lock(&ctx->lock);
+ pthread_mutex_lock(&pinode->mutex);
+ {
+ reqlock->pinode = pinode;
+
+ ret = __lock_entrylk(this, pinode, reqlock, nonblock, dom, &now,
+ pcontend);
+ if (ret == 0) {
+ reqlock->frame = NULL;
+ op_ret = 0;
} else {
- op_ret = 0;
- op_errno = 0;
- unwind = 1;
- goto out;
+ op_errno = -ret;
}
- break;
+ if (ctx && (!ret || !nonblock))
+ list_add(&reqlock->client_list, &ctx->entrylk_lockers);
- case ENTRYLK_LOCK_NB:
- unwind = 1;
- pthread_mutex_lock (&pinode->mutex);
- {
- ret = __lock_name (pinode, basename, type,
- frame, dom, this, 1);
- }
- pthread_mutex_unlock (&pinode->mutex);
-
- if (ret < 0) {
- op_errno = -ret;
- goto out;
+ if (ret == -EAGAIN && !nonblock) {
+ /* blocked */
+ unwind = 0;
+ } else {
+ __pl_entrylk_unref(reqlock);
}
- break;
+ /* For all but the case where a non-blocking lock
+ * attempt fails, the extra ref taken before the switch
+ * block must be negated.
+ */
+ if ((ret == -EAGAIN) && (nonblock))
+ need_inode_unref = _gf_true;
+ }
+ pthread_mutex_unlock(&pinode->mutex);
+ if (ctx)
+ pthread_mutex_unlock(&ctx->lock);
+ break;
case ENTRYLK_UNLOCK:
- pthread_mutex_lock (&pinode->mutex);
- {
- unlocked = __unlock_name (dom, basename, type);
+ if (ctx)
+ pthread_mutex_lock(&ctx->lock);
+ pthread_mutex_lock(&pinode->mutex);
+ {
+ /* Irrespective of whether unlock succeeds or not,
+ * the extra inode ref that was done before the switch
+ * block must be negated. Towards this,
+ * @need_inode_unref flag is set unconditionally here.
+ */
+ need_inode_unref = _gf_true;
+ unlocked = __unlock_entrylk(dom, reqlock);
+ if (unlocked) {
+ list_del_init(&unlocked->client_list);
+ __pl_entrylk_unref(unlocked);
+ op_ret = 0;
+ } else {
+ op_errno = EINVAL;
}
- pthread_mutex_unlock (&pinode->mutex);
+ __pl_entrylk_unref(reqlock);
+ }
+ pthread_mutex_unlock(&pinode->mutex);
+ if (ctx)
+ pthread_mutex_unlock(&ctx->lock);
- if (unlocked)
- grant_blocked_entry_locks (this, pinode, unlocked, dom);
+ grant_blocked_entry_locks(this, pinode, dom, &now, pcontend);
- break;
+ break;
default:
- gf_log (this->name, GF_LOG_ERROR,
- "Unexpected case in entrylk (cmd=%d). Please file"
- "a bug report at http://bugs.gluster.com", cmd);
- goto out;
- }
+ need_inode_unref = _gf_true;
+ gf_log(this->name, GF_LOG_ERROR,
+ "Unexpected case in entrylk (cmd=%d). Please file"
+ "a bug report at http://bugs.gluster.com",
+ cmd);
+ goto out;
+ }
+ /* The following (extra) unref corresponds to the ref that
+ * was done at the time the lock was granted.
+ */
+ if ((cmd == ENTRYLK_UNLOCK) && (op_ret == 0))
+ inode_unref(pinode->inode);
- op_ret = 0;
out:
- pl_update_refkeeper (this, inode);
- if (unwind) {
- entrylk_trace_out (this, frame, volume, fd, loc, basename,
- cmd, type, op_ret, op_errno);
- STACK_UNWIND_STRICT (entrylk, frame, op_ret, op_errno, NULL);
- } else {
- entrylk_trace_block (this, frame, volume, fd, loc, basename,
- cmd, type);
- }
+ if (need_inode_unref)
+ inode_unref(pinode->inode);
+
+ if (unwind) {
+ entrylk_trace_out(this, frame, volume, fd, loc, basename, cmd, type,
+ op_ret, op_errno);
+ unwind:
+ STACK_UNWIND_STRICT(entrylk, frame, op_ret, op_errno, NULL);
+ }
+ if (pcontend != NULL) {
+ entrylk_contention_notify(this, pcontend);
+ }
- return 0;
+ return 0;
}
/**
@@ -753,17 +947,16 @@ out:
*/
int
-pl_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+pl_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
{
+ pl_common_entrylk(frame, this, volume, loc->inode, basename, cmd, type, loc,
+ NULL, xdata);
- pl_common_entrylk (frame, this, volume, loc->inode, basename, cmd, type, loc, NULL);
-
- return 0;
+ return 0;
}
-
/**
* pl_fentrylk:
*
@@ -771,73 +964,190 @@ pl_entrylk (call_frame_t *frame, xlator_t *this,
*/
int
-pl_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
+pl_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
{
+ pl_common_entrylk(frame, this, volume, fd->inode, basename, cmd, type, NULL,
+ fd, xdata);
- pl_common_entrylk (frame, this, volume, fd->inode, basename, cmd, type, NULL, fd);
-
- return 0;
+ return 0;
}
-
-static int32_t
-__get_entrylk_count (xlator_t *this, pl_inode_t *pl_inode)
+static void
+pl_entrylk_log_cleanup(pl_entry_lock_t *lock)
{
- int32_t count = 0;
- pl_entry_lock_t *lock = NULL;
- pl_dom_list_t *dom = NULL;
+ pl_inode_t *pinode = NULL;
+
+ pinode = lock->pinode;
- list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
- list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "releasing lock on %s held by "
+ "{client=%p, pid=%" PRId64 " lk-owner=%s}",
+ uuid_utoa(pinode->gfid), lock->client, (uint64_t)lock->client_pid,
+ lkowner_utoa(&lock->owner));
+}
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s on %s state = Active",
- dom->domain,
- lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
- "ENTRYLK_WRLCK", lock->basename);
- count++;
+/* Release all entrylks from this client */
+int
+pl_entrylk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
+{
+ posix_locks_private_t *priv;
+ pl_entry_lock_t *tmp = NULL;
+ pl_entry_lock_t *l = NULL;
+ pl_dom_list_t *dom = NULL;
+ pl_inode_t *pinode = NULL;
+ struct list_head *pcontend = NULL;
+ struct list_head released;
+ struct list_head unwind;
+ struct list_head contend;
+ struct timespec now = {};
+
+ INIT_LIST_HEAD(&released);
+ INIT_LIST_HEAD(&unwind);
+
+ priv = this->private;
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD(pcontend);
+ timespec_now(&now);
+ }
+
+ pthread_mutex_lock(&ctx->lock);
+ {
+ list_for_each_entry_safe(l, tmp, &ctx->entrylk_lockers, client_list)
+ {
+ pl_entrylk_log_cleanup(l);
+
+ pinode = l->pinode;
+
+ pthread_mutex_lock(&pinode->mutex);
+ {
+ /* If the entrylk object is part of granted list but not
+ * blocked list, then perform the following actions:
+ * i. delete the object from granted list;
+ * ii. grant other locks (from other clients) that may
+ * have been blocked on this entrylk; and
+ * iii. unref the object.
+ *
+ * If the entrylk object (L1) is part of both granted
+ * and blocked lists, then this means that a parallel
+ * unlock on another entrylk (L2 say) may have 'granted'
+ * L1 and added it to 'granted' list in
+ * __grant_blocked_entry_locks() (although using the
+ * 'blocked_locks' member). In that case, the cleanup
+ * codepath must try and grant other overlapping
+ * blocked entrylks from other clients, now that L1 is
+ * out of their way and then unref L1 in the end, and
+ * leave it to the other thread (the one executing
+ * unlock codepath) to unwind L1's frame, delete it from
+ * blocked_locks list, and perform the last unref on L1.
+ *
+ * If the entrylk object (L1) is part of blocked list
+ * only, the cleanup code path must:
+ * i. delete it from the blocked_locks list inside
+ * this critical section,
+ * ii. unwind its frame with EAGAIN,
+ * iii. try and grant blocked entry locks from other
+ * clients that were otherwise grantable, but were
+ * blocked to avoid leaving L1 to starve forever.
+ * iv. unref the object.
+ */
+ list_del_init(&l->client_list);
+
+ if (!list_empty(&l->domain_list)) {
+ list_del_init(&l->domain_list);
+ list_add_tail(&l->client_list, &released);
+ } else {
+ list_del_init(&l->blocked_locks);
+ list_add_tail(&l->client_list, &unwind);
}
+ }
+ pthread_mutex_unlock(&pinode->mutex);
+ }
+ }
+ pthread_mutex_unlock(&ctx->lock);
- list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
+ if (!list_empty(&unwind)) {
+ list_for_each_entry_safe(l, tmp, &unwind, client_list)
+ {
+ list_del_init(&l->client_list);
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s on %s state = Blocked",
- dom->domain,
- lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
- "ENTRYLK_WRLCK", lock->basename);
- count++;
- }
+ if (l->frame)
+ STACK_UNWIND_STRICT(entrylk, l->frame, -1, EAGAIN, NULL);
+ list_add_tail(&l->client_list, &released);
+ }
+ }
+
+ if (!list_empty(&released)) {
+ list_for_each_entry_safe(l, tmp, &released, client_list)
+ {
+ list_del_init(&l->client_list);
+
+ pinode = l->pinode;
+ dom = get_domain(pinode, l->volume);
+
+ grant_blocked_entry_locks(this, pinode, dom, &now, pcontend);
+
+ pthread_mutex_lock(&pinode->mutex);
+ {
+ __pl_entrylk_unref(l);
+ }
+ pthread_mutex_unlock(&pinode->mutex);
+
+ inode_unref(pinode->inode);
}
+ }
+
+ if (pcontend != NULL) {
+ entrylk_contention_notify(this, pcontend);
+ }
- return count;
+ return 0;
}
int32_t
-get_entrylk_count (xlator_t *this, inode_t *inode)
+__get_entrylk_count(xlator_t *this, pl_inode_t *pl_inode)
{
- pl_inode_t *pl_inode = NULL;
- uint64_t tmp_pl_inode = 0;
- int ret = 0;
- int32_t count = 0;
+ int32_t count = 0;
+ pl_entry_lock_t *lock = NULL;
+ pl_dom_list_t *dom = NULL;
- ret = inode_ctx_get (inode, this, &tmp_pl_inode);
- if (ret != 0) {
- goto out;
- }
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ list_for_each_entry(lock, &dom->entrylk_list, domain_list) { count++; }
- pl_inode = (pl_inode_t *)(long) tmp_pl_inode;
-
- pthread_mutex_lock (&pl_inode->mutex);
+ list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks)
{
- count = __get_entrylk_count (this, pl_inode);
+ count++;
}
- pthread_mutex_unlock (&pl_inode->mutex);
+ }
+
+ return count;
+}
+
+int32_t
+get_entrylk_count(xlator_t *this, inode_t *inode)
+{
+ pl_inode_t *pl_inode = NULL;
+ uint64_t tmp_pl_inode = 0;
+ int ret = 0;
+ int32_t count = 0;
+
+ ret = inode_ctx_get(inode, this, &tmp_pl_inode);
+ if (ret != 0) {
+ goto out;
+ }
+
+ pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ count = __get_entrylk_count(this, pl_inode);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
out:
- return count;
+ return count;
}
diff --git a/xlators/features/locks/src/inodelk.c b/xlators/features/locks/src/inodelk.c
index a2e65d55fc3..d4e51d6e0a1 100644
--- a/xlators/features/locks/src/inodelk.c
+++ b/xlators/features/locks/src/inodelk.c
@@ -1,766 +1,1174 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "inode.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/list.h>
+#include <glusterfs/upcall-utils.h>
#include "locks.h"
+#include "clear.h"
#include "common.h"
-inline void
-__delete_inode_lock (pl_inode_lock_t *lock)
+void
+__delete_inode_lock(pl_inode_lock_t *lock)
{
- list_del (&lock->list);
+ list_del_init(&lock->list);
}
-static inline void
-__pl_inodelk_ref (pl_inode_lock_t *lock)
+static void
+__pl_inodelk_ref(pl_inode_lock_t *lock)
{
- lock->ref++;
+ lock->ref++;
}
-inline void
-__pl_inodelk_unref (pl_inode_lock_t *lock)
+void
+__pl_inodelk_unref(pl_inode_lock_t *lock)
{
- lock->ref--;
- if (!lock->ref)
- GF_FREE (lock);
+ lock->ref--;
+ if (!lock->ref) {
+ GF_FREE(lock->connection_id);
+ GF_FREE(lock);
+ }
}
-/* Check if 2 inodelks are conflicting on type. Only 2 shared locks don't conflict */
-static inline int
-inodelk_type_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+/* Check if 2 inodelks are conflicting on type. Only 2 shared locks don't
+ * conflict */
+static int
+inodelk_type_conflict(pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
- if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK)
- return 1;
+ if (l2->fl_type == F_WRLCK || l1->fl_type == F_WRLCK)
+ return 1;
- return 0;
+ return 0;
}
void
-pl_print_inodelk (char *str, int size, int cmd, struct gf_flock *flock, const char *domain)
+pl_print_inodelk(char *str, int size, int cmd, struct gf_flock *flock,
+ const char *domain)
{
- char *cmd_str = NULL;
- char *type_str = NULL;
+ char *cmd_str = NULL;
+ char *type_str = NULL;
- switch (cmd) {
+ switch (cmd) {
#if F_GETLK != F_GETLK64
case F_GETLK64:
#endif
case F_GETLK:
- cmd_str = "GETLK";
- break;
+ cmd_str = "GETLK";
+ break;
#if F_SETLK != F_SETLK64
case F_SETLK64:
#endif
case F_SETLK:
- cmd_str = "SETLK";
- break;
+ cmd_str = "SETLK";
+ break;
#if F_SETLKW != F_SETLKW64
case F_SETLKW64:
#endif
case F_SETLKW:
- cmd_str = "SETLKW";
- break;
+ cmd_str = "SETLKW";
+ break;
default:
- cmd_str = "UNKNOWN";
- break;
- }
+ cmd_str = "UNKNOWN";
+ break;
+ }
- switch (flock->l_type) {
+ switch (flock->l_type) {
case F_RDLCK:
- type_str = "READ";
- break;
+ type_str = "READ";
+ break;
case F_WRLCK:
- type_str = "WRITE";
- break;
+ type_str = "WRITE";
+ break;
case F_UNLCK:
- type_str = "UNLOCK";
- break;
+ type_str = "UNLOCK";
+ break;
default:
- type_str = "UNKNOWN";
- break;
- }
-
- snprintf (str, size, "lock=INODELK, cmd=%s, type=%s, "
- "domain: %s, start=%llu, len=%llu, pid=%llu",
- cmd_str, type_str, domain,
- (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid);
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ snprintf(str, size,
+ "lock=INODELK, cmd=%s, type=%s, "
+ "domain: %s, start=%llu, len=%llu, pid=%llu",
+ cmd_str, type_str, domain, (unsigned long long)flock->l_start,
+ (unsigned long long)flock->l_len,
+ (unsigned long long)flock->l_pid);
}
/* Determine if the two inodelks overlap reach other's lock regions */
static int
-inodelk_overlap (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+inodelk_overlap(pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
- return ((l1->fl_end >= l2->fl_start) &&
- (l2->fl_end >= l1->fl_start));
+ return ((l1->fl_end >= l2->fl_start) && (l2->fl_end >= l1->fl_start));
}
/* Returns true if the 2 inodelks have the same owner */
-static inline int
-same_inodelk_owner (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+static int
+same_inodelk_owner(pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
- return (is_same_lkowner (&l1->owner, &l2->owner) &&
- (l1->transport == l2->transport));
+ return (is_same_lkowner(&l1->owner, &l2->owner) &&
+ (l1->client == l2->client));
}
/* Returns true if the 2 inodelks conflict with each other */
static int
-inodelk_conflict (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+inodelk_conflict(pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
- return (inodelk_overlap (l1, l2) &&
- inodelk_type_conflict (l1, l2));
+ return (inodelk_overlap(l1, l2) && inodelk_type_conflict(l1, l2));
}
-/* Determine if lock is grantable or not */
-static pl_inode_lock_t *
-__inodelk_grantable (pl_dom_list_t *dom, pl_inode_lock_t *lock)
+/*
+ * Check to see if the candidate lock overlaps/conflicts with the
+ * requested lock. If so, determine how old the lock is and return
+ * true if it exceeds the configured threshold, false otherwise.
+ */
+static inline gf_boolean_t
+__stale_inodelk(xlator_t *this, pl_inode_lock_t *candidate_lock,
+ pl_inode_lock_t *requested_lock, time_t *lock_age_sec)
{
- pl_inode_lock_t *l = NULL;
- pl_inode_lock_t *ret = NULL;
- if (list_empty (&dom->inodelk_list))
- goto out;
- list_for_each_entry (l, &dom->inodelk_list, list){
- if (inodelk_conflict (lock, l) &&
- !same_inodelk_owner (lock, l)) {
- ret = l;
- goto out;
- }
- }
-out:
- return ret;
+ posix_locks_private_t *priv = NULL;
+
+ priv = this->private;
+ /* Question: Should we just prune them all given the
+ * chance? Or just the locks we are attempting to acquire?
+ */
+ if (inodelk_conflict(candidate_lock, requested_lock)) {
+ *lock_age_sec = gf_time() - candidate_lock->granted_time;
+ if (*lock_age_sec > priv->revocation_secs)
+ return _gf_true;
+ }
+ return _gf_false;
}
-static pl_inode_lock_t *
-__blocked_lock_conflict (pl_dom_list_t *dom, pl_inode_lock_t *lock)
+/* Examine any locks held on this inode and potentially revoke the lock
+ * if the age exceeds revocation_secs. We will clear _only_ those locks
+ * which are granted, and then grant those locks which are blocked.
+ *
+ * Depending on how this patch works in the wild, we may expand this and
+ * introduce a heuristic which clears blocked locks as well if they
+ * are beyond a threshold.
+ */
+static gf_boolean_t
+__inodelk_prune_stale(xlator_t *this, pl_inode_t *pinode, pl_dom_list_t *dom,
+ pl_inode_lock_t *lock)
{
- pl_inode_lock_t *l = NULL;
- pl_inode_lock_t *ret = NULL;
-
- if (list_empty (&dom->blocked_inodelks))
- return NULL;
+ posix_locks_private_t *priv = NULL;
+ pl_inode_lock_t *tmp = NULL;
+ pl_inode_lock_t *lk = NULL;
+ gf_boolean_t revoke_lock = _gf_false;
+ int bcount = 0;
+ int gcount = 0;
+ int op_errno = 0;
+ clrlk_args args;
+ args.opts = NULL;
+ time_t lk_age_sec = 0;
+ uint32_t max_blocked = 0;
+ char *reason_str = NULL;
+
+ priv = this->private;
+
+ args.type = CLRLK_INODE;
+ if (priv->revocation_clear_all == _gf_true)
+ args.kind = CLRLK_ALL;
+ else
+ args.kind = CLRLK_GRANTED;
+
+ if (list_empty(&dom->inodelk_list))
+ goto out;
+
+ pthread_mutex_lock(&pinode->mutex);
+ list_for_each_entry_safe(lk, tmp, &dom->inodelk_list, list)
+ {
+ if (__stale_inodelk(this, lk, lock, &lk_age_sec) == _gf_true) {
+ revoke_lock = _gf_true;
+ reason_str = "age";
+ break;
+ }
+ }
- list_for_each_entry (l, &dom->blocked_inodelks, blocked_locks) {
- if (inodelk_conflict (lock, l)) {
- ret = l;
- goto out;
- }
+ max_blocked = priv->revocation_max_blocked;
+ if (max_blocked != 0 && revoke_lock == _gf_false) {
+ list_for_each_entry_safe(lk, tmp, &dom->blocked_inodelks, blocked_locks)
+ {
+ max_blocked--;
+ if (max_blocked == 0) {
+ revoke_lock = _gf_true;
+ reason_str = "max blocked";
+ break;
+ }
}
+ }
+ pthread_mutex_unlock(&pinode->mutex);
out:
- return ret;
+ if (revoke_lock == _gf_true) {
+ clrlk_clear_inodelk(this, pinode, dom, &args, &bcount, &gcount,
+ &op_errno);
+ gf_log(this->name, GF_LOG_WARNING,
+ "Lock revocation [reason: %s; gfid: %s; domain: %s; "
+ "age: %ld sec] - Inode lock revoked: %d granted & %d "
+ "blocked locks cleared",
+ reason_str, uuid_utoa(pinode->gfid), dom->domain, lk_age_sec,
+ gcount, bcount);
+ }
+ return revoke_lock;
}
-static int
-__owner_has_lock (pl_dom_list_t *dom, pl_inode_lock_t *newlock)
+void
+inodelk_contention_notify_check(xlator_t *this, pl_inode_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
{
- pl_inode_lock_t *lock = NULL;
+ posix_locks_private_t *priv;
+ int64_t elapsed;
- list_for_each_entry (lock, &dom->inodelk_list, list) {
- if (same_inodelk_owner (lock, newlock))
- return 1;
- }
+ priv = this->private;
- list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
- if (same_inodelk_owner (lock, newlock))
- return 1;
- }
+ /* If this lock is in a list, it means that we are about to send a
+ * notification for it, so no need to do anything else. */
+ if (!list_empty(&lock->contend)) {
+ return;
+ }
+
+ elapsed = now->tv_sec;
+ elapsed -= lock->contention_time.tv_sec;
+ if (now->tv_nsec < lock->contention_time.tv_nsec) {
+ elapsed--;
+ }
+ if (elapsed < priv->notify_contention_delay) {
+ return;
+ }
- return 0;
-}
+ /* All contention notifications will be sent outside of the locked
+ * region. This means that currently granted locks might have already
+ * been unlocked by that time. To avoid the lock or the inode to be
+ * destroyed before we process them, we take an additional reference
+ * on both. */
+ inode_ref(lock->pl_inode->inode);
+ __pl_inodelk_ref(lock);
+ lock->contention_time = *now;
-/* Determines if lock can be granted and adds the lock. If the lock
- * is blocking, adds it to the blocked_inodelks list of the domain.
- */
-static int
-__lock_inodelk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
- int can_block, pl_dom_list_t *dom)
+ list_add_tail(&lock->contend, contend);
+}
+
+void
+inodelk_contention_notify(xlator_t *this, struct list_head *contend)
{
- pl_inode_lock_t *conf = NULL;
- int ret = -EINVAL;
+ struct gf_upcall up;
+ struct gf_upcall_inodelk_contention lc;
+ pl_inode_lock_t *lock;
+ pl_inode_t *pl_inode;
+ client_t *client;
+ gf_boolean_t notify;
+
+ while (!list_empty(contend)) {
+ lock = list_first_entry(contend, pl_inode_lock_t, contend);
+
+ pl_inode = lock->pl_inode;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* If the lock has already been released, no notification is
+ * sent. We clear the notification time in this case. */
+ notify = !list_empty(&lock->list);
+ if (!notify) {
+ lock->contention_time.tv_sec = 0;
+ lock->contention_time.tv_nsec = 0;
+ } else {
+ memcpy(&lc.flock, &lock->user_flock, sizeof(lc.flock));
+ lc.pid = lock->client_pid;
+ lc.domain = lock->volume;
+ lc.xdata = NULL;
+
+ gf_uuid_copy(up.gfid, lock->pl_inode->gfid);
+ client = (client_t *)lock->client;
+ if (client == NULL) {
+ /* A NULL client can be found if the inodelk
+ * was issued by a server side xlator. */
+ up.client_uid = NULL;
+ } else {
+ up.client_uid = client->client_uid;
+ }
+ }
- conf = __inodelk_grantable (dom, lock);
- if (conf){
- ret = -EAGAIN;
- if (can_block == 0)
- goto out;
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (notify) {
+ up.event_type = GF_UPCALL_INODELK_CONTENTION;
+ up.data = &lc;
+
+ if (this->notify(this, GF_EVENT_UPCALL, &up) < 0) {
+ gf_msg_debug(this->name, 0,
+ "Inodelk contention notification "
+ "failed");
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Inodelk contention notification "
+ "sent");
+ }
+ }
- gettimeofday (&lock->blkd_time, NULL);
- list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
+ pthread_mutex_lock(&pl_inode->mutex);
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->user_flock.l_start,
- lock->user_flock.l_len);
+ list_del_init(&lock->contend);
+ __pl_inodelk_unref(lock);
+ pthread_mutex_unlock(&pl_inode->mutex);
- goto out;
+ inode_unref(pl_inode->inode);
+ }
+}
+
+/* Determine if lock is grantable or not */
+static pl_inode_lock_t *
+__inodelk_grantable(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
+ struct timespec *now, struct list_head *contend)
+{
+ pl_inode_lock_t *l = NULL;
+ pl_inode_lock_t *ret = NULL;
+
+ list_for_each_entry(l, &dom->inodelk_list, list)
+ {
+ if (inodelk_conflict(lock, l) && !same_inodelk_owner(lock, l)) {
+ if (ret == NULL) {
+ ret = l;
+ if (contend == NULL) {
+ break;
+ }
+ }
+ inodelk_contention_notify_check(this, l, now, contend);
}
+ }
- if (__blocked_lock_conflict (dom, lock) && !(__owner_has_lock (dom, lock))) {
- ret = -EAGAIN;
- if (can_block == 0)
- goto out;
+ return ret;
+}
- gettimeofday (&lock->blkd_time, NULL);
- list_add_tail (&lock->blocked_locks, &dom->blocked_inodelks);
+static pl_inode_lock_t *
+__blocked_lock_conflict(pl_dom_list_t *dom, pl_inode_lock_t *lock)
+{
+ pl_inode_lock_t *l = NULL;
- gf_log (this->name, GF_LOG_TRACE,
- "Lock is grantable, but blocking to prevent starvation");
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Blocked",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->user_flock.l_start,
- lock->user_flock.l_len);
+ list_for_each_entry(l, &dom->blocked_inodelks, blocked_locks)
+ {
+ if (inodelk_conflict(lock, l)) {
+ return l;
+ }
+ }
+ return NULL;
+}
- goto out;
- }
- __pl_inodelk_ref (lock);
- gettimeofday (&lock->granted_time, NULL);
- list_add (&lock->list, &dom->inodelk_list);
+static int
+__owner_has_lock(pl_dom_list_t *dom, pl_inode_lock_t *newlock)
+{
+ pl_inode_lock_t *lock = NULL;
- ret = 0;
+ list_for_each_entry(lock, &dom->inodelk_list, list)
+ {
+ if (same_inodelk_owner(lock, newlock))
+ return 1;
+ }
+ list_for_each_entry(lock, &dom->blocked_inodelks, blocked_locks)
+ {
+ if (same_inodelk_owner(lock, newlock))
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+__lock_blocked_add(xlator_t *this, pl_dom_list_t *dom, pl_inode_lock_t *lock,
+ int can_block)
+{
+ if (can_block == 0) {
+ goto out;
+ }
+
+ lock->blkd_time = gf_time();
+ list_add_tail(&lock->blocked_locks, &dom->blocked_inodelks);
+
+ gf_msg_trace(this->name, 0,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64
+ " - "
+ "%" PRId64 " => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
+ lkowner_utoa(&lock->owner), lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+ pl_trace_block(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
+ lock->volume);
out:
+ return -EAGAIN;
+}
+
+/* Determines if lock can be granted and adds the lock. If the lock
+ * is blocking, adds it to the blocked_inodelks list of the domain.
+ */
+static int
+__lock_inodelk(xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
+ int can_block, pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
+{
+ pl_inode_lock_t *conf = NULL;
+ int ret;
+
+ ret = pl_inode_remove_inodelk(pl_inode, lock);
+ if (ret < 0) {
return ret;
+ }
+ if (ret == 0) {
+ conf = __inodelk_grantable(this, dom, lock, now, contend);
+ }
+ if ((ret > 0) || (conf != NULL)) {
+ return __lock_blocked_add(this, dom, lock, can_block);
+ }
+
+ /* To prevent blocked locks starvation, check if there are any blocked
+ * locks thay may conflict with this lock. If there is then don't grant
+ * the lock. BUT grant the lock if the owner already has lock to allow
+ * nested locks.
+ * Example:
+ * SHD from Machine1 takes (gfid, 0-infinity) and is granted.
+ * SHD from machine2 takes (gfid, 0-infinity) and is blocked.
+ * When SHD from Machine1 takes (gfid, 0-128KB) it
+ * needs to be granted, without which the earlier lock on 0-infinity
+ * will not be unlocked by SHD from Machine1.
+ * TODO: Find why 'owner_has_lock' is checked even for blocked locks.
+ */
+ if (__blocked_lock_conflict(dom, lock) && !(__owner_has_lock(dom, lock))) {
+ if (can_block != 0) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Lock is grantable, but blocking to prevent "
+ "starvation");
+ }
+
+ return __lock_blocked_add(this, dom, lock, can_block);
+ }
+ __pl_inodelk_ref(lock);
+ lock->granted_time = gf_time();
+ list_add(&lock->list, &dom->inodelk_list);
+
+ return 0;
}
/* Return true if the two inodelks have exactly same lock boundaries */
static int
-inodelks_equal (pl_inode_lock_t *l1, pl_inode_lock_t *l2)
+inodelks_equal(pl_inode_lock_t *l1, pl_inode_lock_t *l2)
{
- if ((l1->fl_start == l2->fl_start) &&
- (l1->fl_end == l2->fl_end))
- return 1;
+ if ((l1->fl_start == l2->fl_start) && (l1->fl_end == l2->fl_end))
+ return 1;
- return 0;
+ return 0;
}
-
static pl_inode_lock_t *
-find_matching_inodelk (pl_inode_lock_t *lock, pl_dom_list_t *dom)
+find_matching_inodelk(pl_inode_lock_t *lock, pl_dom_list_t *dom)
{
- pl_inode_lock_t *l = NULL;
- list_for_each_entry (l, &dom->inodelk_list, list) {
- if (inodelks_equal (l, lock) &&
- same_inodelk_owner (l, lock))
- return l;
- }
- return NULL;
+ pl_inode_lock_t *l = NULL;
+ list_for_each_entry(l, &dom->inodelk_list, list)
+ {
+ if (inodelks_equal(l, lock) && same_inodelk_owner(l, lock))
+ return l;
+ }
+ return NULL;
}
/* Set F_UNLCK removes a lock which has the exact same lock boundaries
* as the UNLCK lock specifies. If such a lock is not found, returns invalid
*/
static pl_inode_lock_t *
-__inode_unlock_lock (xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom)
+__inode_unlock_lock(xlator_t *this, pl_inode_lock_t *lock, pl_dom_list_t *dom)
{
-
- pl_inode_lock_t *conf = NULL;
-
- conf = find_matching_inodelk (lock, dom);
- if (!conf) {
- gf_log (this->name, GF_LOG_ERROR,
- " Matching lock not found for unlock %llu-%llu, by %s "
- "on %p", (unsigned long long)lock->fl_start,
- (unsigned long long)lock->fl_end,
- lkowner_utoa (&lock->owner), lock->transport);
- goto out;
- }
- __delete_inode_lock (conf);
- gf_log (this->name, GF_LOG_DEBUG,
- " Matching lock found for unlock %llu-%llu, by %s on %p",
- (unsigned long long)lock->fl_start,
- (unsigned long long)lock->fl_end, lkowner_utoa (&lock->owner),
- lock->transport);
+ pl_inode_lock_t *conf = NULL;
+ inode_t *inode = NULL;
+
+ inode = lock->pl_inode->inode;
+
+ conf = find_matching_inodelk(lock, dom);
+ if (!conf) {
+ gf_log(this->name, GF_LOG_ERROR,
+ " Matching lock not found for unlock %llu-%llu, by %s "
+ "on %p for gfid:%s",
+ (unsigned long long)lock->fl_start,
+ (unsigned long long)lock->fl_end, lkowner_utoa(&lock->owner),
+ lock->client, inode ? uuid_utoa(inode->gfid) : "UNKNOWN");
+ goto out;
+ }
+ __delete_inode_lock(conf);
+ gf_log(this->name, GF_LOG_DEBUG,
+ " Matching lock found for unlock %llu-%llu, by %s on %p for gfid:%s",
+ (unsigned long long)lock->fl_start, (unsigned long long)lock->fl_end,
+ lkowner_utoa(&lock->owner), lock->client,
+ inode ? uuid_utoa(inode->gfid) : "UNKNOWN");
out:
- return conf;
+ return conf;
}
-static void
-__grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode,
- struct list_head *granted, pl_dom_list_t *dom)
-{
- int bl_ret = 0;
- pl_inode_lock_t *bl = NULL;
- pl_inode_lock_t *tmp = NULL;
- struct list_head blocked_list;
+void
+__grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted, pl_dom_list_t *dom,
+ struct timespec *now, struct list_head *contend)
+{
+ pl_inode_lock_t *bl = NULL;
+ pl_inode_lock_t *tmp = NULL;
- INIT_LIST_HEAD (&blocked_list);
- list_splice_init (&dom->blocked_inodelks, &blocked_list);
+ struct list_head blocked_list;
- list_for_each_entry_safe (bl, tmp, &blocked_list, blocked_locks) {
+ INIT_LIST_HEAD(&blocked_list);
+ list_splice_init(&dom->blocked_inodelks, &blocked_list);
- list_del_init (&bl->blocked_locks);
+ list_for_each_entry_safe(bl, tmp, &blocked_list, blocked_locks)
+ {
+ list_del_init(&bl->blocked_locks);
- bl_ret = __lock_inodelk (this, pl_inode, bl, 1, dom);
+ bl->status = __lock_inodelk(this, pl_inode, bl, 1, dom, now, contend);
- if (bl_ret == 0) {
- list_add (&bl->blocked_locks, granted);
- }
+ if (bl->status != -EAGAIN) {
+ list_add_tail(&bl->blocked_locks, granted);
}
- return;
+ }
}
-/* Grant all inodelks blocked on a lock */
void
-grant_blocked_inode_locks (xlator_t *this, pl_inode_t *pl_inode, pl_dom_list_t *dom)
+unwind_granted_inodes(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
{
- struct list_head granted;
- pl_inode_lock_t *lock;
- pl_inode_lock_t *tmp;
+ pl_inode_lock_t *lock;
+ pl_inode_lock_t *tmp;
+ int32_t op_ret;
+ int32_t op_errno;
+
+ list_for_each_entry_safe(lock, tmp, granted, blocked_locks)
+ {
+ if (lock->status == 0) {
+ op_ret = 0;
+ op_errno = 0;
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64
+ " => Granted",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa(&lock->owner),
+ lock->user_flock.l_start, lock->user_flock.l_len);
+ } else {
+ op_ret = -1;
+ op_errno = -lock->status;
+ }
+ pl_trace_out(this, lock->frame, NULL, NULL, F_SETLKW, &lock->user_flock,
+ op_ret, op_errno, lock->volume);
- INIT_LIST_HEAD (&granted);
+ STACK_UNWIND_STRICT(inodelk, lock->frame, op_ret, op_errno, NULL);
+ lock->frame = NULL;
+ }
- pthread_mutex_lock (&pl_inode->mutex);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ list_for_each_entry_safe(lock, tmp, granted, blocked_locks)
{
- __grant_blocked_inode_locks (this, pl_inode, &granted, dom);
+ list_del_init(&lock->blocked_locks);
+ __pl_inodelk_unref(lock);
}
- pthread_mutex_unlock (&pl_inode->mutex);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+}
- list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->user_flock.l_start,
- lock->user_flock.l_len);
+/* Grant all inodelks blocked on a lock */
+void
+grant_blocked_inode_locks(xlator_t *this, pl_inode_t *pl_inode,
+ pl_dom_list_t *dom, struct timespec *now,
+ struct list_head *contend)
+{
+ struct list_head granted;
- pl_trace_out (this, lock->frame, NULL, NULL, F_SETLKW,
- &lock->user_flock, 0, 0, lock->volume);
+ INIT_LIST_HEAD(&granted);
- STACK_UNWIND_STRICT (inodelk, lock->frame, 0, 0, NULL);
- }
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __grant_blocked_inode_locks(this, pl_inode, &granted, dom, now,
+ contend);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
- pthread_mutex_lock (&pl_inode->mutex);
- {
- list_for_each_entry_safe (lock, tmp, &granted, blocked_locks) {
- list_del_init (&lock->blocked_locks);
- __pl_inodelk_unref (lock);
- }
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ unwind_granted_inodes(this, pl_inode, &granted);
}
-/* Release all inodelks from this transport */
-static int
-release_inode_locks_of_transport (xlator_t *this, pl_dom_list_t *dom,
- inode_t *inode, void *trans)
+static void
+pl_inodelk_log_cleanup(pl_inode_lock_t *lock)
{
- pl_inode_lock_t *tmp = NULL;
- pl_inode_lock_t *l = NULL;
+ pl_inode_t *pl_inode = NULL;
- pl_inode_t * pinode = NULL;
+ pl_inode = lock->pl_inode;
- struct list_head released;
+ gf_log(THIS->name, GF_LOG_WARNING,
+ "releasing lock on %s held by "
+ "{client=%p, pid=%" PRId64 " lk-owner=%s}",
+ uuid_utoa(pl_inode->gfid), lock->client, (uint64_t)lock->client_pid,
+ lkowner_utoa(&lock->owner));
+}
- char *path = NULL;
- char *file = NULL;
+/* Release all inodelks from this client */
+int
+pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
+{
+ posix_locks_private_t *priv;
+ pl_inode_lock_t *tmp = NULL;
+ pl_inode_lock_t *l = NULL;
+ pl_dom_list_t *dom = NULL;
+ pl_inode_t *pl_inode = NULL;
+ struct list_head *pcontend = NULL;
+ struct list_head released;
+ struct list_head unwind;
+ struct list_head contend;
+ struct timespec now = {};
+
+ priv = this->private;
+
+ INIT_LIST_HEAD(&released);
+ INIT_LIST_HEAD(&unwind);
+
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD(pcontend);
+ timespec_now(&now);
+ }
+
+ pthread_mutex_lock(&ctx->lock);
+ {
+ list_for_each_entry_safe(l, tmp, &ctx->inodelk_lockers, client_list)
+ {
+ pl_inodelk_log_cleanup(l);
+
+ pl_inode = l->pl_inode;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ /* If the inodelk object is part of granted list but not
+ * blocked list, then perform the following actions:
+ * i. delete the object from granted list;
+ * ii. grant other locks (from other clients) that may
+ * have been blocked on this inodelk; and
+ * iii. unref the object.
+ *
+ * If the inodelk object (L1) is part of both granted
+ * and blocked lists, then this means that a parallel
+ * unlock on another inodelk (L2 say) may have 'granted'
+ * L1 and added it to 'granted' list in
+ * __grant_blocked_inode_locks() (although using the
+ * 'blocked_locks' member). In that case, the cleanup
+ * codepath must try and grant other overlapping
+ * blocked inodelks from other clients, now that L1 is
+ * out of their way and then unref L1 in the end, and
+ * leave it to the other thread (the one executing
+ * unlock codepath) to unwind L1's frame, delete it from
+ * blocked_locks list, and perform the last unref on L1.
+ *
+ * If the inodelk object (L1) is part of blocked list
+ * only, the cleanup code path must:
+ * i. delete it from the blocked_locks list inside
+ * this critical section,
+ * ii. unwind its frame with EAGAIN,
+ * iii. try and grant blocked inode locks from other
+ * clients that were otherwise grantable, but just
+ * got blocked to avoid leaving L1 to starve
+ * forever.
+ * iv. unref the object.
+ */
+ list_del_init(&l->client_list);
+
+ if (!list_empty(&l->list)) {
+ __delete_inode_lock(l);
+ list_add_tail(&l->client_list, &released);
+ } else {
+ list_del_init(&l->blocked_locks);
+ list_add_tail(&l->client_list, &unwind);
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+ }
+ pthread_mutex_unlock(&ctx->lock);
- INIT_LIST_HEAD (&released);
+ if (!list_empty(&unwind)) {
+ list_for_each_entry_safe(l, tmp, &unwind, client_list)
+ {
+ list_del_init(&l->client_list);
- pinode = pl_inode_get (this, inode);
+ if (l->frame)
+ STACK_UNWIND_STRICT(inodelk, l->frame, -1, EAGAIN, NULL);
+ list_add_tail(&l->client_list, &released);
+ }
+ }
- pthread_mutex_lock (&pinode->mutex);
+ if (!list_empty(&released)) {
+ list_for_each_entry_safe(l, tmp, &released, client_list)
{
+ list_del_init(&l->client_list);
- list_for_each_entry_safe (l, tmp, &dom->blocked_inodelks, blocked_locks) {
- if (l->transport != trans)
- continue;
-
- list_del_init (&l->blocked_locks);
-
- inode_path (inode, NULL, &path);
- if (path)
- file = path;
- else
- file = uuid_utoa (inode->gfid);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "releasing blocking lock on %s held by "
- "{transport=%p, pid=%"PRId64" lk-owner=%s}",
- file, trans, (uint64_t) l->client_pid,
- lkowner_utoa (&l->owner));
-
- list_add (&l->blocked_locks, &released);
- if (path) {
- GF_FREE (path);
- path = NULL;
- }
- }
-
- list_for_each_entry_safe (l, tmp, &dom->inodelk_list, list) {
- if (l->transport != trans)
- continue;
-
- inode_path (inode, NULL, &path);
- if (path)
- file = path;
- else
- file = uuid_utoa (inode->gfid);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "releasing granted lock on %s held by "
- "{transport=%p, pid=%"PRId64" lk-owner=%s}",
- file, trans, (uint64_t) l->client_pid,
- lkowner_utoa (&l->owner));
-
- if (path) {
- GF_FREE (path);
- path = NULL;
- }
-
- __delete_inode_lock (l);
- __pl_inodelk_unref (l);
- }
- }
- if (path)
- GF_FREE (path);
+ pl_inode = l->pl_inode;
- pthread_mutex_unlock (&pinode->mutex);
+ dom = get_domain(pl_inode, l->volume);
- list_for_each_entry_safe (l, tmp, &released, blocked_locks) {
- list_del_init (&l->blocked_locks);
+ grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend);
- STACK_UNWIND_STRICT (inodelk, l->frame, -1, EAGAIN, NULL);
- //No need to take lock as the locks are only in one list
- __pl_inodelk_unref (l);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __pl_inodelk_unref(l);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ inode_unref(pl_inode->inode);
}
+ }
- grant_blocked_inode_locks (this, pinode, dom);
- return 0;
-}
+ if (pcontend != NULL) {
+ inodelk_contention_notify(this, pcontend);
+ }
+ return 0;
+}
static int
-pl_inode_setlk (xlator_t *this, pl_inode_t *pl_inode, pl_inode_lock_t *lock,
- int can_block, pl_dom_list_t *dom)
+pl_inode_setlk(xlator_t *this, pl_ctx_t *ctx, pl_inode_t *pl_inode,
+ pl_inode_lock_t *lock, int can_block, pl_dom_list_t *dom,
+ inode_t *inode)
{
- int ret = -EINVAL;
- pl_inode_lock_t *retlock = NULL;
- gf_boolean_t unref = _gf_true;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- if (lock->fl_type != F_UNLCK) {
- ret = __lock_inodelk (this, pl_inode, lock, can_block, dom);
- if (ret == 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->fl_start,
- lock->fl_end);
- } else if (ret == -EAGAIN) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->user_flock.l_start,
- lock->user_flock.l_len);
- if (can_block)
- unref = _gf_false;
- }
- } else {
- retlock = __inode_unlock_lock (this, lock, dom);
- if (!retlock) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Bad Unlock issued on Inode lock");
- ret = -EINVAL;
- goto out;
- }
- __pl_inodelk_unref (retlock);
-
- ret = 0;
+ posix_locks_private_t *priv = NULL;
+ int ret = -EINVAL;
+ pl_inode_lock_t *retlock = NULL;
+ gf_boolean_t unref = _gf_true;
+ gf_boolean_t need_inode_unref = _gf_false;
+ struct list_head *pcontend = NULL;
+ struct list_head contend;
+ struct list_head wake;
+ struct timespec now = {};
+ short fl_type;
+
+ lock->pl_inode = pl_inode;
+ fl_type = lock->fl_type;
+
+ priv = this->private;
+
+ /* Ideally, AFTER a successful lock (both blocking and non-blocking) or
+ * an unsuccessful blocking lock operation, the inode needs to be ref'd.
+ *
+ * But doing so might give room to a race where the lock-requesting
+ * client could send a DISCONNECT just before this thread refs the inode
+ * after the locking is done, and the epoll thread could unref the inode
+ * in cleanup which means the inode's refcount would come down to 0, and
+ * the call to pl_forget() at this point destroys @pl_inode. Now when
+ * the io-thread executing this function tries to access pl_inode,
+ * it could crash on account of illegal memory access.
+ *
+ * To get around this problem, the inode is ref'd once even before
+ * adding the lock into client_list as a precautionary measure.
+ * This way even if there are DISCONNECTs, there will always be 1 extra
+ * ref on the inode, so @pl_inode is still alive until after the
+ * current stack unwinds.
+ */
+ pl_inode->inode = inode_ref(inode);
+
+ if (priv->revocation_secs != 0) {
+ if (lock->fl_type != F_UNLCK) {
+ __inodelk_prune_stale(this, pl_inode, dom, lock);
+ } else if (priv->monkey_unlocking == _gf_true) {
+ if (pl_does_monkey_want_stuck_lock()) {
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __pl_inodelk_unref(lock);
}
+ pthread_mutex_unlock(&pl_inode->mutex);
+ inode_unref(pl_inode->inode);
+ gf_log(this->name, GF_LOG_WARNING,
+ "MONKEY LOCKING (forcing stuck lock)!");
+ return 0;
+ }
}
-out:
+ }
+
+ if (priv->notify_contention) {
+ pcontend = &contend;
+ INIT_LIST_HEAD(pcontend);
+ timespec_now(&now);
+ }
+
+ INIT_LIST_HEAD(&wake);
+
+ if (ctx)
+ pthread_mutex_lock(&ctx->lock);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (lock->fl_type != F_UNLCK) {
+ ret = __lock_inodelk(this, pl_inode, lock, can_block, dom, &now,
+ pcontend);
+ if (ret == 0) {
+ lock->frame = NULL;
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64
+ " => OK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa(&lock->owner),
+ lock->fl_start, lock->fl_end);
+ } else if (ret == -EAGAIN) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64
+ " => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
+ lock->client_pid, lkowner_utoa(&lock->owner),
+ lock->user_flock.l_start, lock->user_flock.l_len);
+ if (can_block) {
+ unref = _gf_false;
+ }
+ }
+ /* For all but the case where a non-blocking lock attempt fails
+ * with -EAGAIN, the extra ref taken at the start of this function
+ * must be negated. */
+ need_inode_unref = (ret != 0) && ((ret != -EAGAIN) || !can_block);
+ if (ctx && !need_inode_unref) {
+ list_add_tail(&lock->client_list, &ctx->inodelk_lockers);
+ }
+ } else {
+ /* Irrespective of whether unlock succeeds or not,
+ * the extra inode ref that was done at the start of
+ * this function must be negated. Towards this,
+ * @need_inode_unref flag is set unconditionally here.
+ */
+ need_inode_unref = _gf_true;
+ retlock = __inode_unlock_lock(this, lock, dom);
+ if (!retlock) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Bad Unlock issued on Inode lock");
+ ret = -EINVAL;
+ goto out;
+ }
+ list_del_init(&retlock->client_list);
+ __pl_inodelk_unref(retlock);
+
+ pl_inode_remove_unlocked(this, pl_inode, &wake);
+
+ ret = 0;
+ }
+ out:
if (unref)
- __pl_inodelk_unref (lock);
- pthread_mutex_unlock (&pl_inode->mutex);
- grant_blocked_inode_locks (this, pl_inode, dom);
- return ret;
+ __pl_inodelk_unref(lock);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ if (ctx)
+ pthread_mutex_unlock(&ctx->lock);
+
+ pl_inode_remove_wake(&wake);
+
+ /* The following (extra) unref corresponds to the ref that
+ * was done at the time the lock was granted.
+ */
+ if ((fl_type == F_UNLCK) && (ret == 0)) {
+ inode_unref(pl_inode->inode);
+ grant_blocked_inode_locks(this, pl_inode, dom, &now, pcontend);
+ }
+
+ if (need_inode_unref) {
+ inode_unref(pl_inode->inode);
+ }
+
+ if (pcontend != NULL) {
+ inodelk_contention_notify(this, pcontend);
+ }
+
+ return ret;
}
/* Create a new inode_lock_t */
-pl_inode_lock_t *
-new_inode_lock (struct gf_flock *flock, void *transport, pid_t client_pid,
- gf_lkowner_t *owner, const char *volume)
+static pl_inode_lock_t *
+new_inode_lock(struct gf_flock *flock, client_t *client, pid_t client_pid,
+ call_frame_t *frame, xlator_t *this, const char *volume,
+ char *conn_id, int32_t *op_errno)
{
- pl_inode_lock_t *lock = NULL;
+ pl_inode_lock_t *lock = NULL;
+
+ if (!pl_is_lk_owner_valid(&frame->root->lk_owner, frame->root->client)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ lock = GF_CALLOC(1, sizeof(*lock), gf_locks_mt_pl_inode_lock_t);
+ if (!lock) {
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ lock->fl_start = flock->l_start;
+ lock->fl_type = flock->l_type;
+
+ if (flock->l_len == 0)
+ lock->fl_end = LLONG_MAX;
+ else
+ lock->fl_end = flock->l_start + flock->l_len - 1;
+
+ lock->client = client;
+ lock->client_pid = client_pid;
+ lock->volume = volume;
+ lock->owner = frame->root->lk_owner;
+ lock->frame = frame;
+ lock->this = this;
+
+ if (conn_id) {
+ lock->connection_id = gf_strdup(conn_id);
+ }
+
+ INIT_LIST_HEAD(&lock->list);
+ INIT_LIST_HEAD(&lock->blocked_locks);
+ INIT_LIST_HEAD(&lock->client_list);
+ INIT_LIST_HEAD(&lock->contend);
+ __pl_inodelk_ref(lock);
- lock = GF_CALLOC (1, sizeof (*lock),
- gf_locks_mt_pl_inode_lock_t);
- if (!lock) {
- return NULL;
- }
+out:
+ return lock;
+}
+
+int32_t
+_pl_convert_volume(const char *volume, char **res)
+{
+ char *mdata_vol = NULL;
+ int ret = 0;
- lock->fl_start = flock->l_start;
- lock->fl_type = flock->l_type;
+ mdata_vol = strrchr(volume, ':');
+ // if the volume already ends with :metadata don't bother
+ if (mdata_vol && (strcmp(mdata_vol, ":metadata") == 0))
+ return 0;
- if (flock->l_len == 0)
- lock->fl_end = LLONG_MAX;
- else
- lock->fl_end = flock->l_start + flock->l_len - 1;
+ ret = gf_asprintf(res, "%s:metadata", volume);
+ if (ret <= 0)
+ return ENOMEM;
+ return 0;
+}
- lock->transport = transport;
- lock->client_pid = client_pid;
- lock->volume = volume;
- lock->owner = *owner;
+int32_t
+_pl_convert_volume_for_special_range(struct gf_flock *flock, const char *volume,
+ char **res)
+{
+ int32_t ret = 0;
- INIT_LIST_HEAD (&lock->list);
- INIT_LIST_HEAD (&lock->blocked_locks);
- __pl_inodelk_ref (lock);
+ if ((flock->l_start == LLONG_MAX - 1) && (flock->l_len == 0)) {
+ ret = _pl_convert_volume(volume, res);
+ }
- return lock;
+ return ret;
}
/* Common inodelk code called from pl_inodelk and pl_finodelk */
int
-pl_common_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, inode_t *inode, int32_t cmd,
- struct gf_flock *flock, loc_t *loc, fd_t *fd)
-{
- int32_t op_ret = -1;
- int32_t op_errno = 0;
- int ret = -1;
- int can_block = 0;
- pid_t client_pid = -1;
- void * transport = NULL;
- pl_inode_t * pinode = NULL;
- pl_inode_lock_t * reqlock = NULL;
- pl_dom_list_t * dom = NULL;
-
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (inode, unwind);
- VALIDATE_OR_GOTO (flock, unwind);
-
- if ((flock->l_start < 0) || (flock->l_len < 0)) {
- op_errno = EINVAL;
- goto unwind;
- }
-
- pl_trace_in (this, frame, fd, loc, cmd, flock, volume);
-
- transport = frame->root->trans;
- client_pid = frame->root->pid;
-
- pinode = pl_inode_get (this, inode);
- if (!pinode) {
- op_errno = ENOMEM;
- goto unwind;
- }
-
- dom = get_domain (pinode, volume);
- if (!dom) {
- op_errno = ENOMEM;
- goto unwind;
+pl_common_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ inode_t *inode, int32_t cmd, struct gf_flock *flock,
+ loc_t *loc, fd_t *fd, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ GF_UNUSED int dict_ret = -1;
+ int can_block = 0;
+ short lock_type = 0;
+ pl_inode_t *pinode = NULL;
+ pl_inode_lock_t *reqlock = NULL;
+ pl_dom_list_t *dom = NULL;
+ char *res = NULL;
+ char *res1 = NULL;
+ char *conn_id = NULL;
+ pl_ctx_t *ctx = NULL;
+
+ if (xdata)
+ dict_ret = dict_get_str(xdata, "connection-id", &conn_id);
+
+ VALIDATE_OR_GOTO(frame, out);
+ VALIDATE_OR_GOTO(inode, unwind);
+ VALIDATE_OR_GOTO(flock, unwind);
+
+ if ((flock->l_start < 0) || (flock->l_len < 0)) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ op_errno = _pl_convert_volume_for_special_range(flock, volume, &res);
+ if (op_errno)
+ goto unwind;
+ if (res)
+ volume = res;
+
+ pl_trace_in(this, frame, fd, loc, cmd, flock, volume);
+
+ if (frame->root->client) {
+ ctx = pl_ctx_get(frame->root->client, this);
+ if (!ctx) {
+ op_errno = ENOMEM;
+ gf_log(this->name, GF_LOG_INFO, "pl_ctx_get() failed");
+ goto unwind;
}
+ }
- if (frame->root->lk_owner.len == 0) {
- /*
- special case: this means release all locks
- from this transport
- */
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing all locks from transport %p", transport);
-
- release_inode_locks_of_transport (this, dom, inode, transport);
-
- op_ret = 0;
- goto unwind;
- }
+ pinode = pl_inode_get(this, inode, NULL);
+ if (!pinode) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- reqlock = new_inode_lock (flock, transport, client_pid,
- &frame->root->lk_owner, volume);
+ dom = get_domain(pinode, volume);
+ if (!dom) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- if (!reqlock) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
+ reqlock = new_inode_lock(flock, frame->root->client, frame->root->pid,
+ frame, this, dom->domain, conn_id, &op_errno);
- reqlock->frame = frame;
- reqlock->this = this;
+ if (!reqlock) {
+ op_ret = -1;
+ goto unwind;
+ }
- switch (cmd) {
+ switch (cmd) {
case F_SETLKW:
- can_block = 1;
- reqlock->frame = frame;
- reqlock->this = this;
+ can_block = 1;
- /* fall through */
+ /* fall through */
case F_SETLK:
- memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock));
- ret = pl_inode_setlk (this, pinode, reqlock,
- can_block, dom);
-
- if (ret < 0) {
- if ((can_block) && (F_UNLCK != flock->l_type)) {
- pl_trace_block (this, frame, fd, loc,
- cmd, flock, volume);
- goto out;
- }
- gf_log (this->name, GF_LOG_TRACE, "returning EAGAIN");
- op_errno = -ret;
- goto unwind;
+ lock_type = flock->l_type;
+ memcpy(&reqlock->user_flock, flock, sizeof(struct gf_flock));
+ ret = pl_inode_setlk(this, ctx, pinode, reqlock, can_block, dom,
+ inode);
+
+ if (ret < 0) {
+ if (ret == -EAGAIN) {
+ if (can_block && (F_UNLCK != lock_type)) {
+ goto out;
+ }
+ gf_log(this->name, GF_LOG_TRACE, "returning EAGAIN");
+ } else {
+ gf_log(this->name, GF_LOG_TRACE, "returning %d", ret);
}
- break;
+ op_errno = -ret;
+ goto unwind;
+ }
+ break;
default:
- op_errno = ENOTSUP;
- gf_log (this->name, GF_LOG_DEBUG,
- "Lock command F_GETLK not supported for [f]inodelk "
- "(cmd=%d)",
- cmd);
- goto unwind;
- }
+ op_errno = ENOTSUP;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Lock command F_GETLK not supported for [f]inodelk "
+ "(cmd=%d)",
+ cmd);
+ goto unwind;
+ }
- op_ret = 0;
+ op_ret = 0;
unwind:
- if ((inode != NULL) && (flock !=NULL)) {
- pl_update_refkeeper (this, inode);
- pl_trace_out (this, frame, fd, loc, cmd, flock, op_ret, op_errno, volume);
- }
+ if (flock != NULL)
+ pl_trace_out(this, frame, fd, loc, cmd, flock, op_ret, op_errno,
+ volume);
- STACK_UNWIND_STRICT (inodelk, frame, op_ret, op_errno, NULL);
+ STACK_UNWIND_STRICT(inodelk, frame, op_ret, op_errno, NULL);
out:
- return 0;
+ GF_FREE(res);
+ GF_FREE(res1);
+ return 0;
}
int
-pl_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock)
+pl_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
+ pl_common_inodelk(frame, this, volume, loc->inode, cmd, flock, loc, NULL,
+ xdata);
- pl_common_inodelk (frame, this, volume, loc->inode, cmd, flock, loc, NULL);
-
- return 0;
+ return 0;
}
int
-pl_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock)
+pl_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
{
+ pl_common_inodelk(frame, this, volume, fd->inode, cmd, flock, NULL, fd,
+ xdata);
- pl_common_inodelk (frame, this, volume, fd->inode, cmd, flock, NULL, fd);
-
- return 0;
-
+ return 0;
}
-
static int32_t
-__get_inodelk_count (xlator_t *this, pl_inode_t *pl_inode)
-{
- int32_t count = 0;
- pl_inode_lock_t *lock = NULL;
- pl_dom_list_t *dom = NULL;
-
- list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
- list_for_each_entry (lock, &dom->inodelk_list, list) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" "
- "state = Active",
- dom->domain,
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->user_flock.l_start,
- lock->user_flock.l_len);
-
- count++;
- }
-
- list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- " domain: %s %s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" "
- "state = Blocked",
- dom->domain,
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->user_flock.l_start,
- lock->user_flock.l_len);
+__get_inodelk_dom_count(pl_dom_list_t *dom)
+{
+ pl_inode_lock_t *lock = NULL;
+ int32_t count = 0;
+
+ list_for_each_entry(lock, &dom->inodelk_list, list) { count++; }
+ list_for_each_entry(lock, &dom->blocked_inodelks, blocked_locks)
+ {
+ count++;
+ }
+ return count;
+}
- count++;
- }
+/* Returns the no. of locks (blocked/granted) held on a given domain name
+ * If @domname is NULL, returns the no. of locks in all the domains present.
+ * If @domname is non-NULL and non-existent, returns 0 */
+int32_t
+__get_inodelk_count(xlator_t *this, pl_inode_t *pl_inode, char *domname)
+{
+ int32_t count = 0;
+ pl_dom_list_t *dom = NULL;
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ if (domname) {
+ if (strcmp(domname, dom->domain) == 0) {
+ count = __get_inodelk_dom_count(dom);
+ goto out;
+ }
+ } else {
+ /* Counting locks from all domains */
+ count += __get_inodelk_dom_count(dom);
}
+ }
- return count;
+out:
+ return count;
}
int32_t
-get_inodelk_count (xlator_t *this, inode_t *inode)
+get_inodelk_count(xlator_t *this, inode_t *inode, char *domname)
{
- pl_inode_t *pl_inode = NULL;
- uint64_t tmp_pl_inode = 0;
- int ret = 0;
- int32_t count = 0;
+ pl_inode_t *pl_inode = NULL;
+ uint64_t tmp_pl_inode = 0;
+ int ret = 0;
+ int32_t count = 0;
- ret = inode_ctx_get (inode, this, &tmp_pl_inode);
- if (ret != 0) {
- goto out;
- }
+ ret = inode_ctx_get(inode, this, &tmp_pl_inode);
+ if (ret != 0) {
+ goto out;
+ }
- pl_inode = (pl_inode_t *)(long) tmp_pl_inode;
+ pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
- pthread_mutex_lock (&pl_inode->mutex);
- {
- count = __get_inodelk_count (this, pl_inode);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ count = __get_inodelk_count(this, pl_inode, domname);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
out:
- return count;
+ return count;
}
diff --git a/xlators/features/locks/src/locks-mem-types.h b/xlators/features/locks/src/locks-mem-types.h
index 5b29cbc7257..a76605027b3 100644
--- a/xlators/features/locks/src/locks-mem-types.h
+++ b/xlators/features/locks/src/locks-mem-types.h
@@ -1,39 +1,28 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __LOCKS_MEM_TYPES_H__
#define __LOCKS_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_locks_mem_types_ {
- gf_locks_mt_pl_dom_list_t = gf_common_mt_end + 1,
- gf_locks_mt_pl_inode_t,
- gf_locks_mt_posix_lock_t,
- gf_locks_mt_pl_entry_lock_t,
- gf_locks_mt_pl_inode_lock_t,
- gf_locks_mt_truncate_ops,
- gf_locks_mt_pl_rw_req_t,
- gf_locks_mt_posix_locks_private_t,
- gf_locks_mt_pl_fdctx_t,
- gf_locks_mt_end
+ gf_locks_mt_pl_dom_list_t = gf_common_mt_end + 1,
+ gf_locks_mt_pl_inode_t,
+ gf_locks_mt_posix_lock_t,
+ gf_locks_mt_pl_entry_lock_t,
+ gf_locks_mt_pl_inode_lock_t,
+ gf_locks_mt_pl_rw_req_t,
+ gf_locks_mt_posix_locks_private_t,
+ gf_locks_mt_pl_fdctx_t,
+ gf_locks_mt_pl_meta_lock_t,
+ gf_locks_mt_end
};
#endif
-
diff --git a/xlators/features/locks/src/locks.h b/xlators/features/locks/src/locks.h
index f64c2690241..c868eb494a2 100644
--- a/xlators/features/locks/src/locks.h
+++ b/xlators/features/locks/src/locks.h
@@ -1,182 +1,292 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012, 2015-2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef __POSIX_LOCKS_H__
#define __POSIX_LOCKS_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "compat-errno.h"
-#include "stack.h"
-#include "call-stub.h"
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/stack.h>
+#include <glusterfs/call-stub.h>
#include "locks-mem-types.h"
+#include <glusterfs/client_t.h>
-#include "lkowner.h"
+#include <glusterfs/lkowner.h>
+
+typedef enum {
+ MLK_NONE,
+ MLK_FILE_BASED,
+ MLK_FORCED,
+ MLK_OPTIMAL
+} mlk_mode_t; /* defines different mandatory locking modes*/
-#define POSIX_LOCKS "posix-locks"
struct __pl_fd;
struct __posix_lock {
- struct list_head list;
+ struct list_head list;
+
+ off_t fl_start;
+ off_t fl_end;
+ uint32_t lk_flags;
+
+ short fl_type;
+ short blocked; /* waiting to acquire */
+ struct gf_flock user_flock; /* the flock supplied by the user */
+ xlator_t *this; /* required for blocked locks */
+ unsigned long fd_num;
- short fl_type;
- off_t fl_start;
- off_t fl_end;
+ fd_t *fd;
+ call_frame_t *frame;
- short blocked; /* waiting to acquire */
- struct gf_flock user_flock; /* the flock supplied by the user */
- xlator_t *this; /* required for blocked locks */
- unsigned long fd_num;
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
- fd_t *fd;
- call_frame_t *frame;
+ /* These two together serve to uniquely identify each process
+ across nodes */
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval granted_time; /*time at which lock was queued into active list*/
+ void *client; /* to identify client node */
- /* These two together serve to uniquely identify each process
- across nodes */
+ /* This field uniquely identifies the client the lock belongs to. As
+ * lock migration is handled by rebalance, the client_t object will be
+ * overwritten by rebalance and can't be deemed as the owner of the
+ * lock on destination. Hence, the below field is migrated from
+ * source to destination by lock_migration_info_t and updated on the
+ * destination. So that on client-server disconnection, server can
+ * cleanup the locks proper;y. */
- void *transport; /* to identify client node */
- gf_lkowner_t owner;
- pid_t client_pid; /* pid of client process */
+ char *client_uid;
+ gf_lkowner_t owner;
+ pid_t client_pid; /* pid of client process */
+
+ int blocking;
};
typedef struct __posix_lock posix_lock_t;
struct __pl_inode_lock {
- struct list_head list;
- struct list_head blocked_locks; /* list_head pointing to blocked_inodelks */
- int ref;
+ struct list_head list;
+ struct list_head blocked_locks; /* list_head pointing to blocked_inodelks */
+ struct list_head contend; /* list of contending locks */
+ int ref;
+
+ off_t fl_start;
+ off_t fl_end;
+
+ const char *volume;
- short fl_type;
- off_t fl_start;
- off_t fl_end;
+ struct gf_flock user_flock; /* the flock supplied by the user */
+ xlator_t *this; /* required for blocked locks */
+ struct __pl_inode *pl_inode;
- const char *volume;
+ call_frame_t *frame;
- struct gf_flock user_flock; /* the flock supplied by the user */
- xlator_t *this; /* required for blocked locks */
- fd_t *fd;
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
- call_frame_t *frame;
+ /*last time at which lock contention was detected and notified*/
+ struct timespec contention_time;
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval granted_time; /*time at which lock was queued into active list*/
+ /* These two together serve to uniquely identify each process
+ across nodes */
- /* These two together serve to uniquely identify each process
- across nodes */
+ void *client; /* to identify client node */
+ gf_lkowner_t owner;
+ pid_t client_pid; /* pid of client process */
- void *transport; /* to identify client node */
- gf_lkowner_t owner;
- pid_t client_pid; /* pid of client process */
+ char *connection_id; /* stores the client connection id */
+
+ struct list_head client_list; /* list of all locks from a client */
+ short fl_type;
+
+ int32_t status; /* Error code when we try to grant a lock in blocked
+ state */
};
typedef struct __pl_inode_lock pl_inode_lock_t;
-struct __pl_rw_req_t {
- struct list_head list;
- call_stub_t *stub;
- posix_lock_t region;
+struct _pl_rw_req {
+ struct list_head list;
+ call_stub_t *stub;
+ posix_lock_t region;
};
-typedef struct __pl_rw_req_t pl_rw_req_t;
-
-struct __pl_dom_list_t {
- struct list_head inode_list; /* list_head back to pl_inode_t */
- const char *domain;
- struct list_head entrylk_list; /* List of entry locks */
- struct list_head blocked_entrylks; /* List of all blocked entrylks */
- struct list_head inodelk_list; /* List of inode locks */
- struct list_head blocked_inodelks; /* List of all blocked inodelks */
+typedef struct _pl_rw_req pl_rw_req_t;
+
+struct _pl_dom_list {
+ struct list_head inode_list; /* list_head back to pl_inode_t */
+ const char *domain;
+ struct list_head entrylk_list; /* List of entry locks */
+ struct list_head blocked_entrylks; /* List of all blocked entrylks */
+ struct list_head inodelk_list; /* List of inode locks */
+ struct list_head blocked_inodelks; /* List of all blocked inodelks */
};
-typedef struct __pl_dom_list_t pl_dom_list_t;
+typedef struct _pl_dom_list pl_dom_list_t;
struct __entry_lock {
- struct list_head domain_list; /* list_head back to pl_dom_list_t */
- struct list_head blocked_locks; /* list_head back to blocked_entrylks */
+ struct list_head domain_list; /* list_head back to pl_dom_list_t */
+ struct list_head blocked_locks; /* list_head back to blocked_entrylks */
+ struct list_head contend; /* list of contending locks */
+ int ref;
+
+ call_frame_t *frame;
+ xlator_t *this;
+ struct __pl_inode *pinode;
- call_frame_t *frame;
- xlator_t *this;
+ const char *volume;
- const char *volume;
+ const char *basename;
- const char *basename;
- entrylk_type type;
+ time_t blkd_time; /* time at which lock was queued into blkd list */
+ time_t granted_time; /* time at which lock was queued into active list */
- struct timeval blkd_time; /*time at which lock was queued into blkd list*/
- struct timeval granted_time; /*time at which lock was queued into active list*/
+ /*last time at which lock contention was detected and notified*/
+ struct timespec contention_time;
- void *trans;
- gf_lkowner_t owner;
- pid_t client_pid; /* pid of client process */
+ void *client;
+ gf_lkowner_t owner;
+ pid_t client_pid; /* pid of client process */
+
+ char *connection_id; /* stores the client connection id */
+
+ struct list_head client_list; /* list of all locks from a client */
+ entrylk_type type;
};
typedef struct __entry_lock pl_entry_lock_t;
-
/* The "simulated" inode. This contains a list of all the locks associated
with this file */
struct __pl_inode {
- pthread_mutex_t mutex;
-
- struct list_head dom_list; /* list of domains */
- struct list_head ext_list; /* list of fcntl locks */
- struct list_head rw_list; /* list of waiting r/w requests */
- struct list_head reservelk_list; /* list of reservelks */
- struct list_head blocked_reservelks; /* list of blocked reservelks */
- struct list_head blocked_calls; /* List of blocked lock calls while a reserve is held*/
- int mandatory; /* if mandatory locking is enabled */
-
- inode_t *refkeeper; /* hold refs on an inode while locks are
- held to prevent pruning */
+ pthread_mutex_t mutex;
+
+ struct list_head dom_list; /* list of domains */
+ struct list_head ext_list; /* list of fcntl locks */
+ struct list_head rw_list; /* list of waiting r/w requests */
+ struct list_head reservelk_list; /* list of reservelks */
+ struct list_head blocked_reservelks; /* list of blocked reservelks */
+ struct list_head blocked_calls; /* List of blocked lock calls while a
+ reserve is held*/
+ struct list_head metalk_list; /* Meta lock list */
+ struct list_head queued_locks; /* This is to store the incoming lock
+ requests while meta lock is enabled */
+ struct list_head waiting; /* List of pending fops waiting to unlink/rmdir
+ the inode. */
+ int mandatory; /* if mandatory locking is enabled */
+
+ inode_t *refkeeper; /* hold refs on an inode while locks are
+ held to prevent pruning */
+ uuid_t gfid; /* placeholder for gfid of the inode */
+ inode_t *inode; /* pointer to be used for ref and unref
+ of inode_t as long as there are
+ locks on it */
+ gf_boolean_t migrated;
+
+ /* Flag to indicate whether to read mlock-enforce xattr from disk */
+ gf_boolean_t check_mlock_info;
+
+ /* Mandatory_lock enforce: IO will be allowed if and only if the lkowner has
+ held the lock.
+
+ Note: An xattr is set on the file to recover this information post
+ reboot. If client does not want mandatory lock to be enforced, then it
+ should remove this xattr explicitly
+ */
+ gf_boolean_t mlock_enforced;
+ /* There are scenarios where mandatory lock is granted but there are IOs
+ pending at posix level. To avoid this before preempting the previous lock
+ owner, we wait for all the fops to be unwound.
+ */
+ int fop_wind_count;
+ pthread_cond_t check_fop_wind_count;
+
+ gf_boolean_t track_fop_wind_count;
+
+ int32_t links; /* Number of hard links the inode has. */
+ uint32_t remove_running; /* Number of remove operations running. */
+ gf_boolean_t is_locked; /* Regular locks will be blocked. */
+ gf_boolean_t removed; /* The inode has been deleted. */
};
typedef struct __pl_inode pl_inode_t;
+struct __pl_metalk {
+ pthread_mutex_t mutex;
+ /* For pl_inode meta lock list */
+ struct list_head list;
+ /* For pl_ctx_t list */
+ struct list_head client_list;
+ char *client_uid;
-struct __pl_fd {
- gf_boolean_t nonblocking; /* whether O_NONBLOCK has been set */
+ pl_inode_t *pl_inode;
+ int ref;
};
-typedef struct __pl_fd pl_fd_t;
-
+typedef struct __pl_metalk pl_meta_lock_t;
typedef struct {
- gf_boolean_t mandatory; /* if mandatory locking is enabled */
- gf_boolean_t trace; /* trace lock requests in and out */
+ char *brickname;
+ uint32_t revocation_secs;
+ uint32_t revocation_max_blocked;
+ uint32_t notify_contention_delay;
+ mlk_mode_t mandatory_mode; /* holds current mandatory locking mode */
+ gf_boolean_t trace; /* trace lock requests in and out */
+ gf_boolean_t monkey_unlocking;
+ gf_boolean_t revocation_clear_all;
+ gf_boolean_t notify_contention;
+ gf_boolean_t mlock_enforced;
} posix_locks_private_t;
typedef struct {
- gf_boolean_t entrylk_count_req;
- gf_boolean_t inodelk_count_req;
- gf_boolean_t posixlk_count_req;
- gf_boolean_t parent_entrylk_req;
-
- /* used by {f,}truncate */
- loc_t loc;
- fd_t *fd;
- off_t offset;
- dict_t *xdata;
- enum {TRUNCATE, FTRUNCATE} op;
+ data_t *inodelk_dom_count_req;
+
+ dict_t *xdata;
+ loc_t loc[2];
+ fd_t *fd;
+ inode_t *inode;
+ off_t offset;
+ glusterfs_fop_t op;
+ gf_boolean_t entrylk_count_req;
+ gf_boolean_t inodelk_count_req;
+ gf_boolean_t posixlk_count_req;
+ gf_boolean_t parent_entrylk_req;
+ gf_boolean_t multiple_dom_lk_requests;
+ int update_mlock_enforced_flag;
} pl_local_t;
typedef struct {
- struct list_head locks_list;
+ struct list_head locks_list;
} pl_fdctx_t;
+struct _locker {
+ struct list_head lockers;
+ char *volume;
+ inode_t *inode;
+ gf_lkowner_t owner;
+};
+
+typedef struct _locks_ctx {
+ pthread_mutex_t lock;
+ struct list_head inodelk_lockers;
+ struct list_head entrylk_lockers;
+ struct list_head metalk_list;
+} pl_ctx_t;
+
+typedef struct _multi_dom_lk_data {
+ xlator_t *this;
+ inode_t *inode;
+ dict_t *xdata_rsp;
+ gf_boolean_t keep_max;
+} multi_dom_lk_data;
+
+typedef enum { DECREMENT, INCREMENT } pl_count_op_t;
+
+pl_ctx_t *
+pl_ctx_get(client_t *client, xlator_t *xlator);
+
+int
+pl_inodelk_client_cleanup(xlator_t *this, pl_ctx_t *ctx);
+
+int
+pl_entrylk_client_cleanup(xlator_t *this, pl_ctx_t *ctx);
+
#endif /* __POSIX_LOCKS_H__ */
diff --git a/xlators/features/locks/src/pl-messages.h b/xlators/features/locks/src/pl-messages.h
new file mode 100644
index 00000000000..e2d3d7ca974
--- /dev/null
+++ b/xlators/features/locks/src/pl-messages.h
@@ -0,0 +1,29 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _PL_MESSAGES_H_
+#define _PL_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(PL, PL_MSG_LOCK_NUMBER, PL_MSG_INODELK_CONTENTION_FAILED,
+ PL_MSG_ENTRYLK_CONTENTION_FAILED);
+
+#endif /* !_PL_MESSAGES_H_ */
diff --git a/xlators/features/locks/src/posix.c b/xlators/features/locks/src/posix.c
index 3da40ffacac..cf0ae4c57dd 100644
--- a/xlators/features/locks/src/posix.c
+++ b/xlators/features/locks/src/posix.c
@@ -1,2203 +1,5095 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012, 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <unistd.h>
#include <fcntl.h>
#include <limits.h>
#include <pthread.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "inode.h"
-#include "logging.h"
-#include "common-utils.h"
+#include <glusterfs/compat.h>
+#include <glusterfs/logging.h>
#include "locks.h"
#include "common.h"
-#include "statedump.h"
+#include <glusterfs/statedump.h>
#include "clear.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/syncop.h>
#ifndef LLONG_MAX
#define LLONG_MAX LONG_LONG_MAX /* compat with old gcc */
-#endif /* LLONG_MAX */
+#endif /* LLONG_MAX */
/* Forward declarations */
+void
+do_blocked_rw(pl_inode_t *);
+static int
+__rw_allowable(pl_inode_t *, posix_lock_t *, glusterfs_fop_t);
+static int
+format_brickname(char *);
+int
+pl_lockinfo_get_brickname(xlator_t *, inode_t *, int32_t *);
+static int
+fetch_pathinfo(xlator_t *, inode_t *, int32_t *, char **);
-void do_blocked_rw (pl_inode_t *);
-static int __rw_allowable (pl_inode_t *, posix_lock_t *, glusterfs_fop_t);
+/*
+ * The client is always requesting data, but older
+ * servers were not returning it. Newer ones are, so
+ * the client is receiving a mix of NULL and non-NULL
+ * xdata in the answers when bricks are of different
+ * versions. This triggers a bug in older clients.
+ * To prevent that, we avoid returning extra xdata to
+ * older clients (making the newer brick to behave as
+ * an old brick).
+ */
+#define PL_STACK_UNWIND_FOR_CLIENT(fop, xdata, frame, op_ret, params...) \
+ do { \
+ pl_local_t *__local = NULL; \
+ if (frame->root->client && \
+ (frame->root->client->opversion < GD_OP_VERSION_3_10_0)) { \
+ __local = frame->local; \
+ PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params); \
+ } else { \
+ PL_STACK_UNWIND(fop, xdata, frame, op_ret, params); \
+ } \
+ } while (0)
+
+#define PL_STACK_UNWIND(fop, xdata, frame, op_ret, params...) \
+ do { \
+ pl_local_t *__local = NULL; \
+ inode_t *__parent = NULL; \
+ inode_t *__inode = NULL; \
+ char *__name = NULL; \
+ dict_t *__unref = NULL; \
+ int __i = 0; \
+ __local = frame->local; \
+ if (op_ret >= 0 && pl_needs_xdata_response(frame->local)) { \
+ if (xdata) \
+ dict_ref(xdata); \
+ else \
+ xdata = dict_new(); \
+ if (xdata) { \
+ __unref = xdata; \
+ while (__local->fd || __local->loc[__i].inode) { \
+ pl_get_xdata_rsp_args(__local, #fop, &__parent, &__inode, \
+ &__name, __i); \
+ pl_set_xdata_response(frame->this, __local, __parent, \
+ __inode, __name, xdata, __i > 0); \
+ if (__local->fd || __i == 1) \
+ break; \
+ __i++; \
+ } \
+ } \
+ } \
+ PL_STACK_UNWIND_AND_FREE(__local, fop, frame, op_ret, params); \
+ if (__unref) \
+ dict_unref(__unref); \
+ } while (0)
+
+#define PL_LOCAL_GET_REQUESTS(frame, this, xdata, __fd, __loc, __newloc) \
+ do { \
+ if (pl_has_xdata_requests(xdata)) { \
+ if (!frame->local) \
+ frame->local = mem_get0(this->local_pool); \
+ pl_local_t *__local = frame->local; \
+ if (__local) { \
+ if (__fd) { \
+ __local->fd = fd_ref(__fd); \
+ __local->inode = inode_ref(__fd->inode); \
+ } else { \
+ if (__loc) \
+ loc_copy(&__local->loc[0], __loc); \
+ if (__newloc) \
+ loc_copy(&__local->loc[1], __newloc); \
+ __local->inode = inode_ref(__local->loc[0].inode); \
+ } \
+ pl_get_xdata_requests(__local, xdata); \
+ } \
+ } \
+ } while (0)
+
+#define PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, loc, fd, priv) \
+ do { \
+ if ((dict && (dict_get(dict, GF_ENFORCE_MANDATORY_LOCK))) || \
+ (name && (strcmp(name, GF_ENFORCE_MANDATORY_LOCK) == 0))) { \
+ inode_t *__inode = (loc ? loc->inode : fd->inode); \
+ pl_inode_t *__pl_inode = pl_inode_get(this, __inode, NULL); \
+ if (__pl_inode == NULL) { \
+ op_ret = -1; \
+ op_errno = ENOMEM; \
+ goto unwind; \
+ } \
+ if (!pl_is_mandatory_locking_enabled(__pl_inode) || \
+ !priv->mlock_enforced) { \
+ op_ret = -1; \
+ gf_msg(this->name, GF_LOG_DEBUG, EINVAL, 0, \
+ "option %s would need mandatory lock to be enabled " \
+ "and feature.enforce-mandatory-lock option to be set " \
+ "to on", \
+ GF_ENFORCE_MANDATORY_LOCK); \
+ op_errno = EINVAL; \
+ goto unwind; \
+ } \
+ \
+ op_ret = pl_local_init(frame, this, loc, fd); \
+ if (op_ret) { \
+ op_errno = ENOMEM; \
+ goto unwind; \
+ } \
+ \
+ ((pl_local_t *)(frame->local))->update_mlock_enforced_flag = 1; \
+ } \
+ } while (0)
+
+#define PL_INODE_REMOVE(_fop, _frame, _xl, _loc1, _loc2, _cont, _cbk, \
+ _args...) \
+ ({ \
+ struct list_head contend; \
+ pl_inode_t *__pl_inode; \
+ call_stub_t *__stub; \
+ int32_t __error; \
+ INIT_LIST_HEAD(&contend); \
+ __error = pl_inode_remove_prepare(_xl, _frame, _loc2 ? _loc2 : _loc1, \
+ &__pl_inode, &contend); \
+ if (__error < 0) { \
+ __stub = fop_##_fop##_stub(_frame, _cont, ##_args); \
+ __error = pl_inode_remove_complete(_xl, __pl_inode, __stub, \
+ &contend); \
+ } else if (__error == 0) { \
+ PL_LOCAL_GET_REQUESTS(_frame, _xl, xdata, ((fd_t *)NULL), _loc1, \
+ _loc2); \
+ STACK_WIND_COOKIE(_frame, _cbk, __pl_inode, FIRST_CHILD(_xl), \
+ FIRST_CHILD(_xl)->fops->_fop, ##_args); \
+ } \
+ __error; \
+ })
+
+gf_boolean_t
+pl_has_xdata_requests(dict_t *xdata)
+{
+ static char *reqs[] = {GLUSTERFS_ENTRYLK_COUNT,
+ GLUSTERFS_INODELK_COUNT,
+ GLUSTERFS_INODELK_DOM_COUNT,
+ GLUSTERFS_POSIXLK_COUNT,
+ GLUSTERFS_PARENT_ENTRYLK,
+ GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS,
+ NULL};
+ static int reqs_size[] = {SLEN(GLUSTERFS_ENTRYLK_COUNT),
+ SLEN(GLUSTERFS_INODELK_COUNT),
+ SLEN(GLUSTERFS_INODELK_DOM_COUNT),
+ SLEN(GLUSTERFS_POSIXLK_COUNT),
+ SLEN(GLUSTERFS_PARENT_ENTRYLK),
+ SLEN(GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS),
+ 0};
+ int i = 0;
+
+ if (!xdata)
+ return _gf_false;
+
+ for (i = 0; reqs[i]; i++)
+ if (dict_getn(xdata, reqs[i], reqs_size[i]))
+ return _gf_true;
+
+ return _gf_false;
+}
-static pl_fdctx_t *
-pl_new_fdctx ()
+static int
+dict_delete_domain_key(dict_t *dict, char *key, data_t *value, void *data)
{
- pl_fdctx_t *fdctx = NULL;
+ dict_del(dict, key);
+ return 0;
+}
- fdctx = GF_CALLOC (1, sizeof (*fdctx),
- gf_locks_mt_pl_fdctx_t);
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, fdctx, out);
+void
+pl_get_xdata_requests(pl_local_t *local, dict_t *xdata)
+{
+ if (!local || !xdata)
+ return;
- INIT_LIST_HEAD (&fdctx->locks_list);
+ GF_ASSERT(local->xdata == NULL);
+ local->xdata = dict_copy_with_ref(xdata, NULL);
+
+ if (dict_get_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT)) {
+ local->entrylk_count_req = 1;
+ dict_del_sizen(xdata, GLUSTERFS_ENTRYLK_COUNT);
+ }
+ if (dict_get_sizen(xdata, GLUSTERFS_INODELK_COUNT)) {
+ local->inodelk_count_req = 1;
+ dict_del_sizen(xdata, GLUSTERFS_INODELK_COUNT);
+ }
+ if (dict_get_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS)) {
+ local->multiple_dom_lk_requests = 1;
+ dict_del_sizen(xdata, GLUSTERFS_MULTIPLE_DOM_LK_CNT_REQUESTS);
+ dict_foreach_fnmatch(xdata, GLUSTERFS_INODELK_DOM_PREFIX "*",
+ dict_delete_domain_key, NULL);
+ }
+
+ local->inodelk_dom_count_req = dict_get_sizen(xdata,
+ GLUSTERFS_INODELK_DOM_COUNT);
+ if (local->inodelk_dom_count_req) {
+ data_ref(local->inodelk_dom_count_req);
+ dict_del_sizen(xdata, GLUSTERFS_INODELK_DOM_COUNT);
+ }
+
+ if (dict_get_sizen(xdata, GLUSTERFS_POSIXLK_COUNT)) {
+ local->posixlk_count_req = 1;
+ dict_del_sizen(xdata, GLUSTERFS_POSIXLK_COUNT);
+ }
+
+ if (dict_get_sizen(xdata, GLUSTERFS_PARENT_ENTRYLK)) {
+ local->parent_entrylk_req = 1;
+ dict_del_sizen(xdata, GLUSTERFS_PARENT_ENTRYLK);
+ }
+}
-out:
- return fdctx;
+gf_boolean_t
+pl_needs_xdata_response(pl_local_t *local)
+{
+ if (!local)
+ return _gf_false;
+
+ if (local->parent_entrylk_req || local->entrylk_count_req ||
+ local->inodelk_dom_count_req || local->inodelk_count_req ||
+ local->posixlk_count_req || local->multiple_dom_lk_requests)
+ return _gf_true;
+
+ return _gf_false;
}
-static pl_fdctx_t *
-pl_check_n_create_fdctx (xlator_t *this, fd_t *fd)
+void
+pl_get_xdata_rsp_args(pl_local_t *local, char *fop, inode_t **parent,
+ inode_t **inode, char **name, int i)
{
- int ret = 0;
- uint64_t tmp = 0;
- pl_fdctx_t *fdctx = NULL;
+ if (strcmp(fop, "lookup") == 0) {
+ *parent = local->loc[0].parent;
+ *inode = local->loc[0].inode;
+ *name = (char *)local->loc[0].name;
+ } else {
+ if (local->fd) {
+ *inode = local->fd->inode;
+ } else {
+ *inode = local->loc[i].parent;
+ }
+ }
+}
- GF_VALIDATE_OR_GOTO (POSIX_LOCKS, this, out);
- GF_VALIDATE_OR_GOTO (this->name, fd, out);
+static inline int
+pl_track_io_fop_count(pl_local_t *local, xlator_t *this, pl_count_op_t op)
+{
+ pl_inode_t *pl_inode = NULL;
- LOCK (&fd->lock);
- {
- ret = __fd_ctx_get (fd, this, &tmp);
- if ((ret != 0) || (tmp == 0)) {
- fdctx = pl_new_fdctx ();
- if (fdctx == NULL) {
- goto unlock;
- }
- }
+ if (!local)
+ return -1;
- ret = __fd_ctx_set (fd, this, (uint64_t)(long)fdctx);
- if (ret != 0) {
- GF_FREE (fdctx);
- fdctx = NULL;
- gf_log (this->name, GF_LOG_DEBUG,
- "failed to set fd ctx");
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode)
+ return -1;
+
+ if (pl_inode->mlock_enforced && pl_inode->track_fop_wind_count) {
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (op == DECREMENT) {
+ pl_inode->fop_wind_count--;
+ /* fop_wind_count can go negative when lock enforcement is
+ * enabled on unwind path of an IO. Hence the "<" comparision.
+ */
+ if (pl_inode->fop_wind_count <= 0) {
+ pthread_cond_broadcast(&pl_inode->check_fop_wind_count);
+ pl_inode->track_fop_wind_count = _gf_false;
+ pl_inode->fop_wind_count = 0;
}
+ } else {
+ pl_inode->fop_wind_count++;
+ }
}
-unlock:
- UNLOCK (&fd->lock);
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
-out:
- return fdctx;
+ return 0;
}
-int
-pl_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+static int32_t
+__get_posixlk_count(pl_inode_t *pl_inode)
{
- pl_local_t *local = NULL;
+ posix_lock_t *lock = NULL;
+ int32_t count = 0;
- local = frame->local;
+ list_for_each_entry(lock, &pl_inode->ext_list, list) { count++; }
- if (local->op == TRUNCATE)
- loc_wipe (&local->loc);
+ return count;
+}
- if (local->xdata)
- dict_unref (local->xdata);
- if (local->fd)
- fd_unref (local->fd);
+int32_t
+get_posixlk_count(xlator_t *this, inode_t *inode)
+{
+ pl_inode_t *pl_inode = NULL;
+ uint64_t tmp_pl_inode = 0;
+ int32_t count = 0;
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno,
- prebuf, postbuf, xdata);
- return 0;
+ int ret = inode_ctx_get(inode, this, &tmp_pl_inode);
+ if (ret != 0) {
+ goto out;
+ }
+
+ pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ count = __get_posixlk_count(pl_inode);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+out:
+ return count;
+}
+
+void
+pl_parent_entrylk_xattr_fill(xlator_t *this, inode_t *parent, char *basename,
+ dict_t *dict, gf_boolean_t keep_max)
+{
+ int32_t entrylk = 0;
+ int32_t maxcount = -1;
+ int ret = -1;
+
+ if (!parent || !basename)
+ goto out;
+ if (keep_max) {
+ ret = dict_get_int32_sizen(dict, GLUSTERFS_PARENT_ENTRYLK, &maxcount);
+ if (ret < 0)
+ gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
+ GLUSTERFS_PARENT_ENTRYLK);
+ }
+ entrylk = check_entrylk_on_basename(this, parent, basename);
+ if (maxcount >= entrylk)
+ return;
+out:
+ ret = dict_set_int32_sizen(dict, GLUSTERFS_PARENT_ENTRYLK, entrylk);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, " dict_set failed on key %s",
+ GLUSTERFS_PARENT_ENTRYLK);
+ }
}
+void
+pl_entrylk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
+ gf_boolean_t keep_max)
+{
+ int32_t count = 0;
+ int32_t maxcount = -1;
+ int ret = -1;
+
+ if (keep_max) {
+ ret = dict_get_int32_sizen(dict, GLUSTERFS_ENTRYLK_COUNT, &maxcount);
+ if (ret < 0)
+ gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
+ GLUSTERFS_ENTRYLK_COUNT);
+ }
+ count = get_entrylk_count(this, inode);
+ if (maxcount >= count)
+ return;
-static int
-truncate_allowed (pl_inode_t *pl_inode,
- void *transport, pid_t client_pid,
- gf_lkowner_t *owner, off_t offset)
+ ret = dict_set_int32_sizen(dict, GLUSTERFS_ENTRYLK_COUNT, count);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, " dict_set failed on key %s",
+ GLUSTERFS_ENTRYLK_COUNT);
+ }
+}
+
+void
+pl_inodelk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
+ char *domname, gf_boolean_t keep_max)
{
- posix_lock_t *l = NULL;
- posix_lock_t region = {.list = {0, }, };
- int ret = 1;
+ int32_t count = 0;
+ int32_t maxcount = -1;
+ int ret = -1;
+
+ if (keep_max) {
+ ret = dict_get_int32_sizen(dict, GLUSTERFS_INODELK_COUNT, &maxcount);
+ if (ret < 0)
+ gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
+ GLUSTERFS_INODELK_COUNT);
+ }
+ count = get_inodelk_count(this, inode, domname);
+ if (maxcount >= count)
+ return;
- region.fl_start = offset;
- region.fl_end = LLONG_MAX;
- region.transport = transport;
- region.client_pid = client_pid;
- region.owner = *owner;
+ ret = dict_set_int32_sizen(dict, GLUSTERFS_INODELK_COUNT, count);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0,
+ "Failed to set count for "
+ "key %s",
+ GLUSTERFS_INODELK_COUNT);
+ }
- pthread_mutex_lock (&pl_inode->mutex);
- {
- list_for_each_entry (l, &pl_inode->ext_list, list) {
- if (!l->blocked
- && locks_overlap (&region, l)
- && !same_owner (&region, l)) {
- ret = 0;
- gf_log (POSIX_LOCKS, GF_LOG_TRACE, "Truncate "
- "allowed");
- break;
- }
- }
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ return;
+}
- return ret;
+void
+pl_posixlk_xattr_fill(xlator_t *this, inode_t *inode, dict_t *dict,
+ gf_boolean_t keep_max)
+{
+ int32_t count = 0;
+ int32_t maxcount = -1;
+ int ret = -1;
+
+ if (keep_max) {
+ ret = dict_get_int32_sizen(dict, GLUSTERFS_POSIXLK_COUNT, &maxcount);
+ if (ret < 0)
+ gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
+ GLUSTERFS_POSIXLK_COUNT);
+ }
+ count = get_posixlk_count(this, inode);
+ if (maxcount >= count)
+ return;
+
+ ret = dict_set_int32_sizen(dict, GLUSTERFS_POSIXLK_COUNT, count);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, " dict_set failed on key %s",
+ GLUSTERFS_POSIXLK_COUNT);
+ }
}
+void
+pl_inodelk_xattr_fill_each(xlator_t *this, inode_t *inode, dict_t *dict,
+ char *domname, gf_boolean_t keep_max, char *key)
+{
+ int32_t count = 0;
+ int32_t maxcount = -1;
+ int ret = -1;
+
+ if (keep_max) {
+ ret = dict_get_int32(dict, key, &maxcount);
+ if (ret < 0)
+ gf_msg_debug(this->name, 0, " Failed to fetch the value for key %s",
+ GLUSTERFS_INODELK_COUNT);
+ }
+ count = get_inodelk_count(this, inode, domname);
+ if (maxcount >= count)
+ return;
+
+ ret = dict_set_int32(dict, key, count);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0,
+ "Failed to set count for "
+ "key %s",
+ key);
+ }
+
+ return;
+}
static int
-truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
+pl_inodelk_xattr_fill_multiple(dict_t *this, char *key, data_t *value,
+ void *data)
{
- posix_locks_private_t *priv = NULL;
- pl_local_t *local = NULL;
- inode_t *inode = NULL;
- pl_inode_t *pl_inode = NULL;
+ multi_dom_lk_data *d = data;
+ char *tmp_key = NULL;
+ char *save_ptr = NULL;
+
+ tmp_key = gf_strdup(key);
+ if (!tmp_key)
+ return -1;
+
+ strtok_r(tmp_key, ":", &save_ptr);
+ if (!*save_ptr) {
+ if (tmp_key)
+ GF_FREE(tmp_key);
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, EINVAL,
+ "Could not tokenize domain string from key %s", key);
+ return -1;
+ }
+
+ pl_inodelk_xattr_fill_each(d->this, d->inode, d->xdata_rsp, save_ptr,
+ d->keep_max, key);
+ if (tmp_key)
+ GF_FREE(tmp_key);
+
+ return 0;
+}
+void
+pl_fill_multiple_dom_lk_requests(xlator_t *this, pl_local_t *local,
+ inode_t *inode, dict_t *dict,
+ gf_boolean_t keep_max)
+{
+ multi_dom_lk_data data;
- priv = this->private;
- local = frame->local;
+ data.this = this;
+ data.inode = inode;
+ data.xdata_rsp = dict;
+ data.keep_max = keep_max;
- if (op_ret != 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "got error (errno=%d, stderror=%s) from child",
- op_errno, strerror (op_errno));
- goto unwind;
- }
+ dict_foreach_fnmatch(local->xdata, GLUSTERFS_INODELK_DOM_PREFIX "*",
+ pl_inodelk_xattr_fill_multiple, &data);
+}
- if (local->op == TRUNCATE)
- inode = local->loc.inode;
- else
- inode = local->fd->inode;
+void
+pl_set_xdata_response(xlator_t *this, pl_local_t *local, inode_t *parent,
+ inode_t *inode, char *name, dict_t *xdata,
+ gf_boolean_t max_lock)
+{
+ if (!xdata || !local)
+ return;
- pl_inode = pl_inode_get (this, inode);
- if (!pl_inode) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
+ if (local->parent_entrylk_req && parent && name && name[0] != '\0')
+ pl_parent_entrylk_xattr_fill(this, parent, name, xdata, max_lock);
- if (priv->mandatory
- && pl_inode->mandatory
- && !truncate_allowed (pl_inode, frame->root->trans,
- frame->root->pid, &frame->root->lk_owner,
- local->offset)) {
- op_ret = -1;
- op_errno = EAGAIN;
- goto unwind;
+ if (!inode)
+ return;
+
+ if (local->entrylk_count_req)
+ pl_entrylk_xattr_fill(this, inode, xdata, max_lock);
+
+ if (local->inodelk_dom_count_req)
+ pl_inodelk_xattr_fill(this, inode, xdata,
+ data_to_str(local->inodelk_dom_count_req),
+ max_lock);
+
+ if (local->inodelk_count_req)
+ pl_inodelk_xattr_fill(this, inode, xdata, NULL, max_lock);
+
+ if (local->posixlk_count_req)
+ pl_posixlk_xattr_fill(this, inode, xdata, max_lock);
+
+ if (local->multiple_dom_lk_requests)
+ pl_fill_multiple_dom_lk_requests(this, local, inode, xdata, max_lock);
+}
+
+/* Checks whether the region where fop is acting upon conflicts
+ * with existing locks. If there is no conflict function returns
+ * 1 else returns 0 with can_block boolean set accordingly to
+ * indicate block/fail the fop.
+ */
+int
+pl_is_fop_allowed(pl_inode_t *pl_inode, posix_lock_t *region, fd_t *fd,
+ glusterfs_fop_t op, gf_boolean_t *can_block)
+{
+ int ret = 0;
+
+ if (!__rw_allowable(pl_inode, region, op)) {
+ if (pl_inode->mlock_enforced) {
+ *can_block = _gf_false;
+ } else if ((!fd) || (fd && (fd->flags & O_NONBLOCK))) {
+ gf_log("locks", GF_LOG_TRACE,
+ "returning EAGAIN"
+ " because fd is O_NONBLOCK");
+ *can_block = _gf_false;
+ } else {
+ *can_block = _gf_true;
}
+ } else {
+ ret = 1;
+ }
- switch (local->op) {
- case TRUNCATE:
- STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->truncate,
- &local->loc, local->offset, local->xdata);
- break;
- case FTRUNCATE:
- STACK_WIND (frame, pl_truncate_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->ftruncate,
- local->fd, local->offset, local->xdata);
- break;
+ return ret;
+}
+
+static pl_fdctx_t *
+pl_new_fdctx()
+{
+ pl_fdctx_t *fdctx = GF_MALLOC(sizeof(*fdctx), gf_locks_mt_pl_fdctx_t);
+ GF_VALIDATE_OR_GOTO("posix-locks", fdctx, out);
+
+ INIT_LIST_HEAD(&fdctx->locks_list);
+
+out:
+ return fdctx;
+}
+
+static pl_fdctx_t *
+pl_check_n_create_fdctx(xlator_t *this, fd_t *fd)
+{
+ int ret = 0;
+ uint64_t tmp = 0;
+ pl_fdctx_t *fdctx = NULL;
+
+ GF_VALIDATE_OR_GOTO("posix-locks", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ LOCK(&fd->lock);
+ {
+ ret = __fd_ctx_get(fd, this, &tmp);
+ if ((ret != 0) || (tmp == 0)) {
+ fdctx = pl_new_fdctx();
+ if (fdctx == NULL) {
+ goto unlock;
+ }
}
- return 0;
+ ret = __fd_ctx_set(fd, this, (uint64_t)(long)fdctx);
+ if (ret != 0) {
+ GF_FREE(fdctx);
+ fdctx = NULL;
+ UNLOCK(&fd->lock);
+ gf_log(this->name, GF_LOG_DEBUG, "failed to set fd ctx");
+ goto out;
+ }
+ }
+unlock:
+ UNLOCK(&fd->lock);
-unwind:
- gf_log (this->name, GF_LOG_ERROR, "truncate failed with ret: %d, "
- "error: %s", op_ret, strerror (op_errno));
- if (local->op == TRUNCATE)
- loc_wipe (&local->loc);
- if (local->xdata)
- dict_unref (local->xdata);
- if (local->fd)
- fd_unref (local->fd);
-
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, buf, NULL, xdata);
- return 0;
+out:
+ return fdctx;
}
+int32_t
+pl_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ pl_track_io_fop_count(frame->local, this, DECREMENT);
+
+ PL_STACK_UNWIND(discard, xdata, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
int
-pl_truncate (call_frame_t *frame, xlator_t *this,
- loc_t *loc, off_t offset, dict_t *xdata)
+pl_discard_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- pl_local_t *local = NULL;
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
- local = mem_get0 (this->local_pool);
- GF_VALIDATE_OR_GOTO (this->name, local, unwind);
+ STACK_WIND(frame, pl_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+}
- local->op = TRUNCATE;
- local->offset = offset;
- loc_copy (&local->loc, loc);
- if (xdata)
- local->xdata = dict_ref (xdata);
+int32_t
+pl_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {
+ .list =
+ {
+ 0,
+ },
+ };
+ gf_boolean_t enabled = _gf_false;
+ gf_boolean_t can_block = _gf_true;
+ int op_ret = 0;
+ int op_errno = 0;
+ int allowed = 1;
+
+ GF_VALIDATE_OR_GOTO("locks", this, unwind);
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ frame->local = local;
+ local->inode = inode_ref(fd->inode);
+ local->fd = fd_ref(fd);
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (frame->root->pid < 0)
+ enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
+
+ if (enabled) {
+ region.fl_start = offset;
+ region.fl_end = offset + len - 1;
+ region.client = frame->root->client;
+ region.fd_num = fd_to_fdnum(fd);
+ region.client_pid = frame->root->pid;
+ region.owner = frame->root->lk_owner;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ allowed = pl_is_fop_allowed(pl_inode, &region, fd, GF_FOP_DISCARD,
+ &can_block);
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
+ goto unlock;
+ } else if (!can_block) {
+ op_errno = EAGAIN;
+ op_ret = -1;
+ goto unlock;
+ }
- frame->local = local;
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ if (!rw) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
- STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->stat, loc, NULL);
+ rw->stub = fop_discard_stub(frame, pl_discard_cont, fd, offset, len,
+ xdata);
+ if (!rw->stub) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ GF_FREE(rw);
+ goto unlock;
+ }
- return 0;
+ rw->region = region;
+
+ list_add_tail(&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+ if (allowed == 1)
+ STACK_WIND(frame, pl_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
unwind:
- gf_log (this->name, GF_LOG_ERROR, "truncate for %s failed with ret: %d, "
- "error: %s", loc->path, -1, strerror (ENOMEM));
- STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ if (op_ret == -1)
+ PL_STACK_UNWIND(discard, xdata, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
- return 0;
+ return 0;
}
+int32_t
+pl_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ pl_track_io_fop_count(frame->local, this, DECREMENT);
+
+ PL_STACK_UNWIND(zerofill, xdata, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
int
-pl_ftruncate (call_frame_t *frame, xlator_t *this,
- fd_t *fd, off_t offset, dict_t *xdata)
+pl_zerofill_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
{
- pl_local_t *local = NULL;
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
- local = mem_get0 (this->local_pool);
- GF_VALIDATE_OR_GOTO (this->name, local, unwind);
+ STACK_WIND(frame, pl_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
+}
- local->op = FTRUNCATE;
- local->offset = offset;
- local->fd = fd_ref (fd);
- if (xdata)
- local->xdata = dict_ref (xdata);
+int32_t
+pl_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {
+ .list =
+ {
+ 0,
+ },
+ };
+ gf_boolean_t enabled = _gf_false;
+ gf_boolean_t can_block = _gf_true;
+ int op_ret = 0;
+ int op_errno = 0;
+ int allowed = 1;
+
+ GF_VALIDATE_OR_GOTO("locks", this, unwind);
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ frame->local = local;
+ local->inode = inode_ref(fd->inode);
+ local->fd = fd_ref(fd);
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (frame->root->pid < 0)
+ enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
+
+ if (enabled) {
+ region.fl_start = offset;
+ region.fl_end = offset + len - 1;
+ region.client = frame->root->client;
+ region.fd_num = fd_to_fdnum(fd);
+ region.client_pid = frame->root->pid;
+ region.owner = frame->root->lk_owner;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ allowed = pl_is_fop_allowed(pl_inode, &region, fd, GF_FOP_ZEROFILL,
+ &can_block);
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
+ goto unlock;
+ } else if (!can_block) {
+ op_errno = EAGAIN;
+ op_ret = -1;
+ goto unlock;
+ }
- frame->local = local;
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ if (!rw) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
- STACK_WIND (frame, truncate_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
- return 0;
+ rw->stub = fop_zerofill_stub(frame, pl_zerofill_cont, fd, offset,
+ len, xdata);
+ if (!rw->stub) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ GF_FREE(rw);
+ goto unlock;
+ }
+
+ rw->region = region;
+ list_add_tail(&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+ if (allowed == 1)
+ STACK_WIND(frame, pl_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
unwind:
- gf_log (this->name, GF_LOG_ERROR, "ftruncate failed with ret: %d, "
- "error: %s", -1, strerror (ENOMEM));
- STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ if (op_ret == -1)
+ PL_STACK_UNWIND(zerofill, xdata, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
- return 0;
+ return 0;
}
int
-pl_locks_by_fd (pl_inode_t *pl_inode, fd_t *fd)
+pl_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- posix_lock_t *l = NULL;
- int found = 0;
+ pl_local_t *local = frame->local;
+
+ pl_track_io_fop_count(local, this, DECREMENT);
- pthread_mutex_lock (&pl_inode->mutex);
- {
+ if (local->op == GF_FOP_TRUNCATE)
+ PL_STACK_UNWIND(truncate, xdata, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ else
+ PL_STACK_UNWIND(ftruncate, xdata, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
- list_for_each_entry (l, &pl_inode->ext_list, list) {
- if ((l->fd_num == fd_to_fdnum(fd))) {
- found = 1;
- break;
- }
- }
+int
+pl_ftruncate_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
- return found;
+ STACK_WIND(frame, pl_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
}
-static void
-delete_locks_of_fd (xlator_t *this, pl_inode_t *pl_inode, fd_t *fd)
+int
+pl_truncate_cont(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- posix_lock_t *tmp = NULL;
- posix_lock_t *l = NULL;
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
+
+ STACK_WIND(frame, pl_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+}
+
+static int
+truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ pl_local_t *local = frame->local;
+ inode_t *inode = NULL;
+ pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {
+ .list =
+ {
+ 0,
+ },
+ };
+ gf_boolean_t enabled = _gf_false;
+ gf_boolean_t can_block = _gf_true;
+ int allowed = 1;
+
+ GF_VALIDATE_OR_GOTO("locks", this, unwind);
+
+ if (op_ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "got error (errno=%d, stderror=%s) from child", op_errno,
+ strerror(op_errno));
+ goto unwind;
+ }
+
+ if (local->op == GF_FOP_TRUNCATE)
+ inode = local->loc[0].inode;
+ else
+ inode = local->fd->inode;
+
+ local->inode = inode_ref(inode);
+
+ pl_inode = pl_inode_get(this, inode, local);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (frame->root->pid < 0)
+ enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
+
+ if (enabled) {
+ region.fl_start = local->offset;
+ region.fl_end = LLONG_MAX;
+ region.client = frame->root->client;
+ region.fd_num = fd_to_fdnum(local->fd);
+ region.client_pid = frame->root->pid;
+ region.owner = frame->root->lk_owner;
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ allowed = pl_is_fop_allowed(pl_inode, &region, local->fd, local->op,
+ &can_block);
+
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
+ goto unlock;
+ } else if (!can_block) {
+ op_errno = EAGAIN;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ if (!rw) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
+
+ if (local->op == GF_FOP_TRUNCATE)
+ rw->stub = fop_truncate_stub(frame, pl_truncate_cont,
+ &local->loc[0], local->offset,
+ local->xdata);
+ else
+ rw->stub = fop_ftruncate_stub(frame, pl_ftruncate_cont,
+ local->fd, local->offset,
+ local->xdata);
+ if (!rw->stub) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ GF_FREE(rw);
+ goto unlock;
+ }
+
+ rw->region = region;
- struct list_head blocked_list;
+ list_add_tail(&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
- INIT_LIST_HEAD (&blocked_list);
+ if (allowed == 1) {
+ switch (local->op) {
+ case GF_FOP_TRUNCATE:
+ STACK_WIND(frame, pl_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, &local->loc[0],
+ local->offset, local->xdata);
+ break;
+ case GF_FOP_FTRUNCATE:
+ STACK_WIND(frame, pl_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, local->fd,
+ local->offset, local->xdata);
+ break;
+ default:
+ break;
+ }
+ }
+unwind:
+ if (op_ret == -1) {
+ gf_log(this ? this->name : "locks", GF_LOG_ERROR,
+ "truncate failed with "
+ "ret: %d, error: %s",
+ op_ret, strerror(op_errno));
- pthread_mutex_lock (&pl_inode->mutex);
- {
+ switch (local->op) {
+ case GF_FOP_TRUNCATE:
+ PL_STACK_UNWIND(truncate, xdata, frame, op_ret, op_errno, buf,
+ NULL, xdata);
+ break;
+ case GF_FOP_FTRUNCATE:
+ PL_STACK_UNWIND(ftruncate, xdata, frame, op_ret, op_errno, buf,
+ NULL, xdata);
+ break;
+ default:
+ break;
+ }
+ }
+ return 0;
+}
- list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
- if ((l->fd_num == fd_to_fdnum(fd))) {
- if (l->blocked) {
- list_move_tail (&l->list, &blocked_list);
- continue;
- }
- __delete_lock (pl_inode, l);
- __destroy_lock (l);
- }
- }
+int
+pl_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ pl_local_t *local = NULL;
+ int ret = -1;
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ GF_VALIDATE_OR_GOTO("locks", this, unwind);
- list_for_each_entry_safe (l, tmp, &blocked_list, list) {
- list_del_init(&l->list);
- STACK_UNWIND_STRICT (lk, l->frame, -1, EAGAIN, &l->user_flock,
- NULL);
- __destroy_lock (l);
- }
+ local = mem_get0(this->local_pool);
+ GF_VALIDATE_OR_GOTO(this->name, local, unwind);
- grant_blocked_locks (this, pl_inode);
+ local->op = GF_FOP_TRUNCATE;
+ local->offset = offset;
+ loc_copy(&local->loc[0], loc);
+ if (xdata)
+ local->xdata = dict_ref(xdata);
- do_blocked_rw (pl_inode);
+ frame->local = local;
+ STACK_WIND(frame, truncate_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, NULL);
+ ret = 0;
+
+unwind:
+ if (ret == -1) {
+ gf_log(this ? this->name : "locks", GF_LOG_ERROR,
+ "truncate on %s failed with"
+ " ret: %d, error: %s",
+ loc->path, -1, strerror(ENOMEM));
+ STACK_UNWIND_STRICT(truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ }
+ return 0;
+}
+
+int
+pl_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ pl_local_t *local = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("locks", this, unwind);
+ local = mem_get0(this->local_pool);
+ GF_VALIDATE_OR_GOTO(this->name, local, unwind);
+
+ local->op = GF_FOP_FTRUNCATE;
+ local->offset = offset;
+ local->fd = fd_ref(fd);
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ frame->local = local;
+
+ STACK_WIND(frame, truncate_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ ret = 0;
+unwind:
+ if (ret == -1) {
+ gf_log(this ? this->name : "locks", GF_LOG_ERROR,
+ "ftruncate failed with"
+ " ret: %d, error: %s",
+ -1, strerror(ENOMEM));
+ STACK_UNWIND_STRICT(ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ }
+ return 0;
+}
+
+int
+pl_locks_by_fd(pl_inode_t *pl_inode, fd_t *fd)
+{
+ posix_lock_t *l = NULL;
+ int found = 0;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ list_for_each_entry(l, &pl_inode->ext_list, list)
+ {
+ if (l->fd_num == fd_to_fdnum(fd)) {
+ found = 1;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ return found;
}
static void
-__delete_locks_of_owner (pl_inode_t *pl_inode,
- void *transport, gf_lkowner_t *owner)
-{
- posix_lock_t *tmp = NULL;
- posix_lock_t *l = NULL;
-
- /* TODO: what if it is a blocked lock with pending l->frame */
-
- list_for_each_entry_safe (l, tmp, &pl_inode->ext_list, list) {
- if (l->blocked)
- continue;
- if ((l->transport == transport) &&
- is_same_lkowner (&l->owner, owner)) {
- gf_log ("posix-locks", GF_LOG_TRACE,
- " Flushing lock"
- "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" state: %s",
- l->fl_type == F_UNLCK ? "Unlock" : "Lock",
- l->client_pid,
- lkowner_utoa (&l->owner),
- l->user_flock.l_start,
- l->user_flock.l_len,
- l->blocked == 1 ? "Blocked" : "Active");
-
- __delete_lock (pl_inode, l);
- __destroy_lock (l);
+delete_locks_of_fd(xlator_t *this, pl_inode_t *pl_inode, fd_t *fd)
+{
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *l = NULL;
+
+ struct list_head blocked_list;
+
+ INIT_LIST_HEAD(&blocked_list);
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ list_for_each_entry_safe(l, tmp, &pl_inode->ext_list, list)
+ {
+ if (l->fd_num == fd_to_fdnum(fd)) {
+ if (l->blocked) {
+ list_move_tail(&l->list, &blocked_list);
+ continue;
}
+ __delete_lock(l);
+ __destroy_lock(l);
+ }
}
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
- return;
-}
+ list_for_each_entry_safe(l, tmp, &blocked_list, list)
+ {
+ list_del_init(&l->list);
+ STACK_UNWIND_STRICT(lk, l->frame, -1, EAGAIN, &l->user_flock, NULL);
+ __destroy_lock(l);
+ }
+ grant_blocked_locks(this, pl_inode);
-int32_t
-pl_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+ do_blocked_rw(pl_inode);
+}
+
+static void
+__delete_locks_of_owner(pl_inode_t *pl_inode, client_t *client,
+ gf_lkowner_t *owner)
{
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ posix_lock_t *tmp = NULL;
+ posix_lock_t *l = NULL;
+
+ /* TODO: what if it is a blocked lock with pending l->frame */
+
+ list_for_each_entry_safe(l, tmp, &pl_inode->ext_list, list)
+ {
+ if (l->blocked)
+ continue;
+ if ((l->client == client) && is_same_lkowner(&l->owner, owner)) {
+ gf_log("posix-locks", GF_LOG_TRACE,
+ " Flushing lock"
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64
+ " state: %s",
+ l->fl_type == F_UNLCK ? "Unlock" : "Lock", l->client_pid,
+ lkowner_utoa(&l->owner), l->user_flock.l_start,
+ l->user_flock.l_len, l->blocked == 1 ? "Blocked" : "Active");
+
+ __delete_lock(l);
+ __destroy_lock(l);
+ }
+ }
+ return;
}
int32_t
-pl_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- int op_errno = EINVAL;
- int op_ret = -1;
- int32_t bcount = 0;
- int32_t gcount = 0;
- char key[PATH_MAX] = {0, };
- char *lk_summary = NULL;
- pl_inode_t *pl_inode = NULL;
- dict_t *dict = NULL;
- clrlk_args args = {0,};
-
- if (!name)
- goto usual;
-
- if (strncmp (name, GF_XATTR_CLRLK_CMD, strlen (GF_XATTR_CLRLK_CMD)))
- goto usual;
-
- if (clrlk_parse_args (name, &args)) {
- op_errno = EINVAL;
- goto out;
- }
+pl_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
- dict = dict_new ();
- if (!dict) {
- op_errno = ENOMEM;
- goto out;
+static int32_t
+pl_getxattr_clrlk(xlator_t *this, const char *name, inode_t *inode,
+ dict_t **dict, int32_t *op_errno)
+{
+ int32_t bcount = 0;
+ int32_t gcount = 0;
+ char *key = NULL;
+ char *lk_summary = NULL;
+ pl_inode_t *pl_inode = NULL;
+ clrlk_args args = {
+ 0,
+ };
+ char *brickname = NULL;
+ int32_t op_ret = -1;
+
+ *op_errno = EINVAL;
+
+ if (clrlk_parse_args(name, &args)) {
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ *dict = dict_new();
+ if (!*dict) {
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ pl_inode = pl_inode_get(this, inode, NULL);
+ if (!pl_inode) {
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ switch (args.type) {
+ case CLRLK_INODE:
+ case CLRLK_ENTRY:
+ op_ret = clrlk_clear_lks_in_all_domains(this, pl_inode, &args,
+ &bcount, &gcount, op_errno);
+ break;
+ case CLRLK_POSIX:
+ op_ret = clrlk_clear_posixlk(this, pl_inode, &args, &bcount,
+ &gcount, op_errno);
+ break;
+ default:
+ op_ret = -1;
+ *op_errno = EINVAL;
+ }
+ if (op_ret) {
+ if (args.type >= CLRLK_TYPE_MAX) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "clear locks: invalid lock type %d", args.type);
+ } else {
+ gf_log(this->name, GF_LOG_ERROR,
+ "clear locks of type %s failed: %s",
+ clrlk_type_names[args.type], strerror(*op_errno));
}
- pl_inode = pl_inode_get (this, loc->inode);
- if (!pl_inode) {
- op_errno = ENOMEM;
- goto out;
+ goto out;
+ }
+
+ op_ret = fetch_pathinfo(this, inode, op_errno, &brickname);
+ if (op_ret) {
+ gf_log(this->name, GF_LOG_WARNING, "Couldn't get brickname");
+ } else {
+ op_ret = format_brickname(brickname);
+ if (op_ret) {
+ gf_log(this->name, GF_LOG_WARNING, "Couldn't format brickname");
+ GF_FREE(brickname);
+ brickname = NULL;
}
+ }
- switch (args.type) {
- case CLRLK_INODE:
- case CLRLK_ENTRY:
- op_ret = clrlk_clear_lks_in_all_domains (this, pl_inode,
- &args, &bcount,
- &gcount,
- &op_errno);
- if (op_ret)
- goto out;
- break;
- case CLRLK_POSIX:
- op_ret = clrlk_clear_posixlk (this, pl_inode, &args,
- &bcount, &gcount,
- &op_errno);
- if (op_ret)
- goto out;
- break;
- case CLRLK_TYPE_MAX:
- op_errno = EINVAL;
- goto out;
+ if (!gcount && !bcount) {
+ if (gf_asprintf(&lk_summary, "No locks cleared.") == -1) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
}
+ } else if (gf_asprintf(&lk_summary,
+ "%s: %s blocked locks=%d "
+ "granted locks=%d",
+ (brickname == NULL) ? this->name : brickname,
+ clrlk_type_names[args.type], bcount, gcount) == -1) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ gf_log(this->name, GF_LOG_DEBUG, "%s", lk_summary);
+
+ key = gf_strdup(name);
+ if (!key) {
+ op_ret = -1;
+ goto out;
+ }
+ if (dict_set_dynstr(*dict, key, lk_summary)) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = 0;
- if (!gcount && !bcount) {
- if (gf_asprintf (&lk_summary, "No locks cleared.") == -1) {
- op_errno = ENOMEM;
- goto out;
- }
- } else if (gf_asprintf (&lk_summary, "%s: %s blocked locks=%d "
- "granted locks=%d", this->name,
- (args.type == CLRLK_INODE)? "inode":
- (args.type == CLRLK_ENTRY)? "entry":
- (args.type == CLRLK_POSIX)? "posix": " ",
- bcount, gcount) == -1) {
- op_errno = ENOMEM;
- goto out;
- }
+out:
+ GF_FREE(brickname);
+ GF_FREE(args.opts);
+ GF_FREE(key);
+ if (op_ret) {
+ GF_FREE(lk_summary);
+ }
+
+ return op_ret;
+}
- strncpy (key, name, strlen (name));
- if (dict_set_dynstr (dict, key, lk_summary)) {
- op_errno = ENOMEM;
- goto out;
- }
+int32_t
+pl_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ dict_t *dict = NULL;
- op_ret = 0;
-out:
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
-
- if (args.opts)
- GF_FREE (args.opts);
- if (op_ret && lk_summary)
- GF_FREE (lk_summary);
- if (dict)
- dict_unref (dict);
- return 0;
+ if (!name)
+ goto usual;
+
+ if (strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)))
+ goto usual;
+
+ op_ret = pl_getxattr_clrlk(this, name, loc->inode, &dict, &op_errno);
+
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ if (dict)
+ dict_unref(dict);
+ return 0;
usual:
- STACK_WIND (frame, pl_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
- return 0;
+ STACK_WIND(frame, pl_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
}
-int32_t
-pl_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd, dict_t *xdata)
+static int
+format_brickname(char *brickname)
{
- pl_fdctx_t *fdctx = NULL;
+ int ret = -1;
+ char *hostname = NULL;
+ char *volume = NULL;
+ char *saveptr = NULL;
- if (op_ret < 0)
- goto unwind;
+ if (!brickname)
+ goto out;
- fdctx = pl_check_n_create_fdctx (this, fd);
- if (!fdctx) {
- op_errno = ENOMEM;
- op_ret = -1;
- goto unwind;
- }
+ strtok_r(brickname, ":", &saveptr);
+ hostname = gf_strdup(strtok_r(NULL, ":", &saveptr));
+ if (hostname == NULL)
+ goto out;
+ volume = gf_strdup(strtok_r(NULL, ".", &saveptr));
+ if (volume == NULL)
+ goto out;
-unwind:
- STACK_UNWIND_STRICT (opendir,
- frame,
- op_ret,
- op_errno,
- fd, xdata);
- return 0;
+ sprintf(brickname, "%s:%s", hostname, volume);
+
+ ret = 0;
+out:
+ GF_FREE(hostname);
+ GF_FREE(volume);
+ return ret;
}
-int32_t
-pl_opendir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, fd_t *fd, dict_t *xdata)
-{
- STACK_WIND (frame,
- pl_opendir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir,
- loc, fd, xdata);
- return 0;
+static int
+fetch_pathinfo(xlator_t *this, inode_t *inode, int32_t *op_errno,
+ char **brickname)
+{
+ int ret = -1;
+ loc_t loc = {
+ 0,
+ };
+ dict_t *dict = NULL;
+
+ if (!brickname)
+ goto out;
+
+ if (!op_errno)
+ goto out;
+
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ loc.inode = inode_ref(inode);
+
+ ret = syncop_getxattr(FIRST_CHILD(this), &loc, &dict, GF_XATTR_PATHINFO_KEY,
+ NULL, NULL);
+ if (ret < 0) {
+ *op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(dict, GF_XATTR_PATHINFO_KEY, brickname);
+ if (ret)
+ goto out;
+
+ *brickname = gf_strdup(*brickname);
+ if (*brickname == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (dict != NULL) {
+ dict_unref(dict);
+ }
+ loc_wipe(&loc);
+ return ret;
}
int
-pl_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+pl_lockinfo_get_brickname(xlator_t *this, inode_t *inode, int32_t *op_errno)
{
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
+ posix_locks_private_t *priv = this->private;
+ char *brickname = NULL;
+ char *end = NULL;
+ char *tmp = NULL;
+
+ int ret = fetch_pathinfo(this, inode, op_errno, &brickname);
+ if (ret)
+ goto out;
+
+ end = strrchr(brickname, ':');
+ if (!end) {
+ GF_FREE(brickname);
+ ret = -1;
+ goto out;
+ }
+
+ tmp = brickname;
+ brickname = gf_strndup(brickname, (end - brickname));
+ if (brickname == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ priv->brickname = brickname;
+ ret = 0;
+out:
+ GF_FREE(tmp);
+ return ret;
+}
- return 0;
+char *
+pl_lockinfo_key(xlator_t *this, inode_t *inode, int32_t *op_errno)
+{
+ posix_locks_private_t *priv = this->private;
+ char *key = NULL;
+ int ret = 0;
+
+ if (priv->brickname == NULL) {
+ ret = pl_lockinfo_get_brickname(this, inode, op_errno);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "cannot get brickname");
+ goto out;
+ }
+ }
+
+ key = priv->brickname;
+out:
+ return key;
}
+int32_t
+pl_fgetxattr_handle_lockinfo(xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t *op_errno)
+{
+ char *key = NULL, *buf = NULL;
+ int32_t op_ret = 0;
+ unsigned long fdnum = 0;
+ int32_t len = 0;
+ dict_t *tmp = NULL;
+
+ pl_inode_t *pl_inode = pl_inode_get(this, fd->inode, NULL);
+
+ if (!pl_inode) {
+ gf_log(this->name, GF_LOG_DEBUG, "Could not get inode.");
+ *op_errno = EBADFD;
+ op_ret = -1;
+ goto out;
+ }
+
+ if (!pl_locks_by_fd(pl_inode, fd)) {
+ op_ret = 0;
+ goto out;
+ }
+
+ fdnum = fd_to_fdnum(fd);
+
+ key = pl_lockinfo_key(this, fd->inode, op_errno);
+ if (key == NULL) {
+ op_ret = -1;
+ goto out;
+ }
+
+ tmp = dict_new();
+ if (tmp == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = dict_set_uint64(tmp, key, fdnum);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log(this->name, GF_LOG_WARNING,
+ "setting lockinfo value "
+ "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)",
+ fdnum, fd, uuid_utoa(fd->inode->gfid), strerror(*op_errno));
+ goto out;
+ }
+
+ op_ret = dict_allocate_and_serialize(tmp, (char **)&buf,
+ (unsigned int *)&len);
+ if (op_ret != 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log(this->name, GF_LOG_WARNING,
+ "dict_serialized_length failed (%s) while handling "
+ "lockinfo for fd (ptr:%p inode-gfid:%s)",
+ strerror(*op_errno), fd, uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+ op_ret = dict_set_dynptr(dict, GF_XATTR_LOCKINFO_KEY, buf, len);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ gf_log(this->name, GF_LOG_WARNING,
+ "setting lockinfo value "
+ "(%lu) for fd (ptr:%p inode-gfid:%s) failed (%s)",
+ fdnum, fd, uuid_utoa(fd->inode->gfid), strerror(*op_errno));
+ goto out;
+ }
+
+ buf = NULL;
+out:
+ if (tmp != NULL) {
+ dict_unref(tmp);
+ }
-int
-pl_flush (call_frame_t *frame, xlator_t *this,
- fd_t *fd, dict_t *xdata)
+ if (buf != NULL) {
+ GF_FREE(buf);
+ }
+
+ return op_ret;
+}
+
+int32_t
+pl_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
{
- pl_inode_t *pl_inode = NULL;
+ int32_t op_ret = 0, op_errno = 0;
+ dict_t *dict = NULL;
+
+ if (!name) {
+ goto usual;
+ }
+
+ if (strcmp(name, GF_XATTR_LOCKINFO_KEY) == 0) {
+ dict = dict_new();
+ if (dict == NULL) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- pl_inode = pl_inode_get (this, fd->inode);
+ op_ret = pl_fgetxattr_handle_lockinfo(this, fd, dict, &op_errno);
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "getting lockinfo on fd (ptr:%p inode-gfid:%s) "
+ "failed (%s)",
+ fd, uuid_utoa(fd->inode->gfid), strerror(op_errno));
+ }
- if (!pl_inode) {
- gf_log (this->name, GF_LOG_DEBUG, "Could not get inode.");
- STACK_UNWIND_STRICT (flush, frame, -1, EBADFD, NULL);
- return 0;
+ goto unwind;
+ } else if (strncmp(name, GF_XATTR_CLRLK_CMD, SLEN(GF_XATTR_CLRLK_CMD)) ==
+ 0) {
+ op_ret = pl_getxattr_clrlk(this, name, fd->inode, &dict, &op_errno);
+
+ goto unwind;
+ } else {
+ goto usual;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, NULL);
+ if (dict != NULL) {
+ dict_unref(dict);
+ }
+
+ return 0;
+
+usual:
+ STACK_WIND(frame, default_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
+}
+
+int32_t
+pl_migrate_locks(call_frame_t *frame, fd_t *newfd, uint64_t oldfd_num,
+ int32_t *op_errno)
+{
+ posix_lock_t *l = NULL;
+ int32_t op_ret = 0;
+ uint64_t newfd_num = fd_to_fdnum(newfd);
+
+ pl_inode_t *pl_inode = pl_inode_get(frame->this, newfd->inode, NULL);
+ if (pl_inode == NULL) {
+ op_ret = -1;
+ *op_errno = EBADFD;
+ goto out;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ list_for_each_entry(l, &pl_inode->ext_list, list)
+ {
+ if (l->fd_num == oldfd_num) {
+ l->fd_num = newfd_num;
+ l->client = frame->root->client;
+ }
}
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
- pl_trace_flush (this, frame, fd);
+ op_ret = 0;
+out:
+ return op_ret;
+}
- if (frame->root->lk_owner.len == 0) {
- /* Handle special case when protocol/server sets lk-owner to zero.
- * This usually happens due to a client disconnection. Hence, free
- * all locks opened with this fd.
- */
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing all locks with fd %p", fd);
- delete_locks_of_fd (this, pl_inode, fd);
- goto wind;
+int32_t
+pl_fsetxattr_handle_lockinfo(call_frame_t *frame, fd_t *fd, char *lockinfo_buf,
+ int len, int32_t *op_errno)
+{
+ int32_t op_ret = -1;
+ uint64_t oldfd_num = 0;
+ char *key = NULL;
+
+ dict_t *lockinfo = dict_new();
+ if (lockinfo == NULL) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_ret = dict_unserialize(lockinfo_buf, len, &lockinfo);
+ if (op_ret < 0) {
+ *op_errno = -op_ret;
+ op_ret = -1;
+ goto out;
+ }
+
+ key = pl_lockinfo_key(frame->this, fd->inode, op_errno);
+ if (key == NULL) {
+ op_ret = -1;
+ goto out;
+ }
+
+ op_ret = dict_get_uint64(lockinfo, key, &oldfd_num);
+
+ if (oldfd_num == 0) {
+ op_ret = 0;
+ goto out;
+ }
+
+ op_ret = pl_migrate_locks(frame, fd, oldfd_num, op_errno);
+ if (op_ret < 0) {
+ gf_log(frame->this->name, GF_LOG_WARNING,
+ "migration of locks from oldfd (ptr:%p) to newfd "
+ "(ptr:%p) (inode-gfid:%s)",
+ (void *)(uintptr_t)oldfd_num, fd, uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+out:
+ dict_unref(lockinfo);
+
+ return op_ret;
+}
+int32_t
+pl_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+
+ local = frame->local;
+ if (local && local->update_mlock_enforced_flag && op_ret != -1) {
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
}
- pthread_mutex_lock (&pl_inode->mutex);
+
+ pthread_mutex_lock(&pl_inode->mutex);
{
- __delete_locks_of_owner (pl_inode, frame->root->trans,
- &frame->root->lk_owner);
+ pl_inode->mlock_enforced = _gf_true;
+ pl_inode->check_mlock_info = _gf_false;
}
- pthread_mutex_unlock (&pl_inode->mutex);
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
- grant_blocked_locks (this, pl_inode);
+unwind:
+ PL_STACK_UNWIND_FOR_CLIENT(fsetxattr, xdata, frame, op_ret, op_errno,
+ xdata);
+ return 0;
+}
- do_blocked_rw (pl_inode);
+int32_t
+pl_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t op_errno = 0;
+ void *lockinfo_buf = NULL;
+ int len = 0;
+ char *name = NULL;
+ posix_locks_private_t *priv = this->private;
+
+ int32_t op_ret = dict_get_ptr_and_len(dict, GF_XATTR_LOCKINFO_KEY,
+ &lockinfo_buf, &len);
+ if (lockinfo_buf == NULL) {
+ goto usual;
+ }
+
+ op_ret = pl_fsetxattr_handle_lockinfo(frame, fd, lockinfo_buf, len,
+ &op_errno);
+ if (op_ret < 0) {
+ goto unwind;
+ }
-wind:
- STACK_WIND (frame, pl_flush_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush, fd, xdata);
- return 0;
-}
+usual:
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+ PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, ((loc_t *)NULL), fd,
+ priv);
-int
-pl_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+ STACK_WIND(frame, pl_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
+
+unwind:
+ PL_STACK_UNWIND_FOR_CLIENT(fsetxattr, xdata, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+int32_t
+pl_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- pl_fdctx_t *fdctx = NULL;
+ pl_fdctx_t *fdctx = NULL;
- if (op_ret < 0)
- goto unwind;
+ if (op_ret < 0)
+ goto unwind;
- fdctx = pl_check_n_create_fdctx (this, fd);
- if (!fdctx) {
- op_errno = ENOMEM;
- op_ret = -1;
- goto unwind;
- }
+ fdctx = pl_check_n_create_fdctx(this, fd);
+ if (!fdctx) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unwind;
+ }
unwind:
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ PL_STACK_UNWIND(opendir, xdata, frame, op_ret, op_errno, fd, xdata);
- return 0;
+ return 0;
}
+int32_t
+pl_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+ STACK_WIND(frame, pl_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+}
int
-pl_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
+pl_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
{
- STACK_WIND (frame, pl_open_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
- loc, flags, fd, xdata);
+ PL_STACK_UNWIND_FOR_CLIENT(flush, xdata, frame, op_ret, op_errno, xdata);
- return 0;
+ return 0;
}
+int
+pl_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ pl_inode_t *pl_inode = pl_inode_get(this, fd->inode, NULL);
+ if (!pl_inode) {
+ gf_log(this->name, GF_LOG_DEBUG, "Could not get inode.");
+ STACK_UNWIND_STRICT(flush, frame, -1, EBADFD, NULL);
+ return 0;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (pl_inode->migrated) {
+ pthread_mutex_unlock(&pl_inode->mutex);
+ STACK_UNWIND_STRICT(flush, frame, -1, EREMOTE, NULL);
+ return 0;
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ pl_trace_flush(this, frame, fd);
+
+ if (frame->root->lk_owner.len == 0) {
+ /* Handle special case when protocol/server sets lk-owner to zero.
+ * This usually happens due to a client disconnection. Hence, free
+ * all locks opened with this fd.
+ */
+ gf_log(this->name, GF_LOG_TRACE, "Releasing all locks with fd %p", fd);
+ delete_locks_of_fd(this, pl_inode, fd);
+ goto wind;
+ }
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __delete_locks_of_owner(pl_inode, frame->root->client,
+ &frame->root->lk_owner);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ grant_blocked_locks(this, pl_inode);
+
+ do_blocked_rw(pl_inode);
+
+wind:
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+ STACK_WIND(frame, pl_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+}
int
-pl_create_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- fd_t *fd, inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+pl_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- pl_fdctx_t *fdctx = NULL;
+ pl_fdctx_t *fdctx = NULL;
- if (op_ret < 0)
- goto unwind;
+ if (op_ret < 0)
+ goto unwind;
- fdctx = pl_check_n_create_fdctx (this, fd);
- if (!fdctx) {
- op_errno = ENOMEM;
- op_ret = -1;
- goto unwind;
- }
+ fdctx = pl_check_n_create_fdctx(this, fd);
+ if (!fdctx) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unwind;
+ }
unwind:
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata);
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
- return 0;
+ return 0;
}
-
int
-pl_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
- dict_t *xdata)
+pl_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- STACK_WIND (frame, pl_create_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->create,
- loc, flags, mode, umask, fd, xdata);
- return 0;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ pl_inode_t *pl_inode = NULL;
+ posix_lock_t *l = NULL;
+ posix_locks_private_t *priv = this->private;
+
+ GF_VALIDATE_OR_GOTO("locks", this, unwind);
+
+ op_ret = 0, op_errno = 0;
+ pl_inode = pl_inode_get(this, fd->inode, NULL);
+ if (!pl_inode) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM, "Could not get inode");
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ /* As per design, under forced and file-based mandatory locking modes
+ * it doesn't matter whether inodes's lock list contain advisory or
+ * mandatory type locks. So we just check whether inode's lock list is
+ * empty or not to make sure that no locks are being held for the file.
+ * Whereas under optimal mandatory locking mode, we strictly fail open
+ * if and only if lock list contain mandatory locks.
+ */
+ if (((priv->mandatory_mode == MLK_FILE_BASED) && pl_inode->mandatory) ||
+ priv->mandatory_mode == MLK_FORCED) {
+ if (fd->flags & O_TRUNC) {
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (!list_empty(&pl_inode->ext_list)) {
+ op_ret = -1;
+ op_errno = EAGAIN;
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+ } else if (priv->mandatory_mode == MLK_OPTIMAL) {
+ if (fd->flags & O_TRUNC) {
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ list_for_each_entry(l, &pl_inode->ext_list, list)
+ {
+ if ((l->lk_flags & GF_LK_MANDATORY)) {
+ op_ret = -1;
+ op_errno = EAGAIN;
+ break;
+ }
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+ }
+
+unwind:
+ if (op_ret == -1)
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, NULL, NULL);
+ else
+ STACK_WIND(frame, pl_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
}
+int
+pl_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ pl_fdctx_t *fdctx = NULL;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ fdctx = pl_check_n_create_fdctx(this, fd);
+ if (!fdctx) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unwind;
+ }
+
+unwind:
+ PL_STACK_UNWIND(create, xdata, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+
+ return 0;
+}
int
-pl_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count, struct iatt *stbuf,
- struct iobref *iobref, dict_t *xdata)
+pl_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno,
- vector, count, stbuf, iobref, xdata);
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
- return 0;
+ STACK_WIND(frame, pl_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
}
int
-pl_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+pl_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iovec *vector, int32_t count,
+ struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ pl_track_io_fop_count(frame->local, this, DECREMENT);
- return 0;
+ PL_STACK_UNWIND(readv, xdata, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
+
+ return 0;
}
+int
+pl_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ pl_track_io_fop_count(frame->local, this, DECREMENT);
+
+ PL_STACK_UNWIND(writev, xdata, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
void
-do_blocked_rw (pl_inode_t *pl_inode)
+do_blocked_rw(pl_inode_t *pl_inode)
{
- struct list_head wind_list;
- pl_rw_req_t *rw = NULL;
- pl_rw_req_t *tmp = NULL;
+ struct list_head wind_list;
+ pl_rw_req_t *rw = NULL;
+ pl_rw_req_t *tmp = NULL;
- INIT_LIST_HEAD (&wind_list);
+ INIT_LIST_HEAD(&wind_list);
- pthread_mutex_lock (&pl_inode->mutex);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ list_for_each_entry_safe(rw, tmp, &pl_inode->rw_list, list)
{
- list_for_each_entry_safe (rw, tmp, &pl_inode->rw_list, list) {
- if (__rw_allowable (pl_inode, &rw->region,
- rw->stub->fop)) {
- list_del_init (&rw->list);
- list_add_tail (&rw->list, &wind_list);
- }
+ if (__rw_allowable(pl_inode, &rw->region, rw->stub->fop)) {
+ list_del_init(&rw->list);
+ list_add_tail(&rw->list, &wind_list);
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
}
+ }
}
- pthread_mutex_unlock (&pl_inode->mutex);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
- list_for_each_entry_safe (rw, tmp, &wind_list, list) {
- list_del_init (&rw->list);
- call_resume (rw->stub);
- GF_FREE (rw);
- }
+ list_for_each_entry_safe(rw, tmp, &wind_list, list)
+ {
+ list_del_init(&rw->list);
+ call_resume(rw->stub);
+ GF_FREE(rw);
+ }
- return;
+ return;
}
+/* when mandatory lock is enforced:
+ If an IO request comes on a region which is out of the boundary of the
+ granted mandatory lock, it will be rejected.
+
+ Note: There is no IO blocking with mandatory lock enforced as it may be
+ a stale data from an old client.
+ */
+gf_boolean_t static within_range(posix_lock_t *existing, posix_lock_t *new)
+{
+ if (existing->fl_start <= new->fl_start && existing->fl_end >= new->fl_end)
+ return _gf_true;
+
+ return _gf_false;
+}
static int
-__rw_allowable (pl_inode_t *pl_inode, posix_lock_t *region,
- glusterfs_fop_t op)
+__rw_allowable(pl_inode_t *pl_inode, posix_lock_t *region, glusterfs_fop_t op)
{
- posix_lock_t *l = NULL;
- int ret = 1;
+ posix_lock_t *l = NULL;
+ posix_locks_private_t *priv = THIS->private;
+ int ret = 1;
- list_for_each_entry (l, &pl_inode->ext_list, list) {
- if (locks_overlap (l, region) && !same_owner (l, region)) {
- if ((op == GF_FOP_READ) && (l->fl_type != F_WRLCK))
- continue;
- ret = 0;
- break;
+ if (pl_inode->mlock_enforced) {
+ list_for_each_entry(l, &pl_inode->ext_list, list)
+ {
+ /*
+ with lock enforced (fencing) there should not be any blocking
+ lock coexisting.
+ */
+ if (same_owner(l, region)) {
+ /* Should range check be strict for same owner with fencing? */
+ if (locks_overlap(l, region)) {
+ if (within_range(l, region)) {
+ return 1;
+ } else {
+ /*
+ Should we allow read fop if it does not fit it in the
+ range?
+ if (op == GF_FOP_READ && l->fl_type != F_WRLCK) {
+ return 1;
+ }
+ */
+ return 0;
+ }
+ }
+ } else {
+ if (locks_overlap(l, region)) {
+ /*
+ with fencing should a read from a different owner be
+ allowed if the mandatory lock taken is F_RDLCK?
+ if (op == GF_FOP_READ && l->fl_type != F_WRLCK) {
+ return 1;
+ }
+ */
+ return 0;
}
+ }
}
- return ret;
-}
+ /* No lock has been taken by this owner */
+ return 0;
+ }
+
+ list_for_each_entry(l, &pl_inode->ext_list, list)
+ {
+ if (!l->blocked && locks_overlap(l, region) && !same_owner(l, region)) {
+ if ((op == GF_FOP_READ) && (l->fl_type != F_WRLCK))
+ continue;
+ /* Check for mandatory lock under optimal
+ * mandatory-locking mode */
+ if (priv->mandatory_mode == MLK_OPTIMAL &&
+ !(l->lk_flags & GF_LK_MANDATORY))
+ continue;
+ ret = 0;
+ break;
+ }
+ }
+ return ret;
+}
int
-pl_readv_cont (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
+pl_readv_cont(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- STACK_WIND (frame, pl_readv_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
- fd, size, offset, flags, xdata);
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
- return 0;
-}
+ STACK_WIND(frame, pl_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+ return 0;
+}
int
-pl_readv (call_frame_t *frame, xlator_t *this,
- fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata)
-{
- posix_locks_private_t *priv = NULL;
- pl_inode_t *pl_inode = NULL;
- pl_rw_req_t *rw = NULL;
- posix_lock_t region = {.list = {0, }, };
- int op_ret = 0;
- int op_errno = 0;
- char wind_needed = 1;
-
-
- priv = this->private;
- pl_inode = pl_inode_get (this, fd->inode);
-
- if (priv->mandatory && pl_inode->mandatory) {
- region.fl_start = offset;
- region.fl_end = offset + size - 1;
- region.transport = frame->root->trans;
- region.fd_num = fd_to_fdnum(fd);
- region.client_pid = frame->root->pid;
- region.owner = frame->root->lk_owner;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- wind_needed = __rw_allowable (pl_inode, &region,
- GF_FOP_READ);
- if (wind_needed) {
- goto unlock;
- }
-
- if (fd->flags & O_NONBLOCK) {
- gf_log (this->name, GF_LOG_TRACE,
- "returning EAGAIN as fd is O_NONBLOCK");
- op_errno = EAGAIN;
- op_ret = -1;
- goto unlock;
- }
-
- rw = GF_CALLOC (1, sizeof (*rw),
- gf_locks_mt_pl_rw_req_t);
- if (!rw) {
- op_errno = ENOMEM;
- op_ret = -1;
- goto unlock;
- }
-
- rw->stub = fop_readv_stub (frame, pl_readv_cont,
- fd, size, offset, flags,
- xdata);
- if (!rw->stub) {
- op_errno = ENOMEM;
- op_ret = -1;
- GF_FREE (rw);
- goto unlock;
- }
+pl_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {
+ .list =
+ {
+ 0,
+ },
+ };
+ gf_boolean_t enabled = _gf_false;
+ gf_boolean_t can_block = _gf_true;
+ int op_ret = 0;
+ int op_errno = 0;
+ int allowed = 1;
+
+ GF_VALIDATE_OR_GOTO("locks", this, unwind);
+
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+
+ if (!frame->local) {
+ frame->local = mem_get0(this->local_pool);
+ local = frame->local;
+ local->inode = inode_ref(fd->inode);
+ local->fd = fd_ref(fd);
+ }
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (frame->root->pid < 0)
+ enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
+
+ if (enabled) {
+ region.fl_start = offset;
+ region.fl_end = offset + size - 1;
+ region.client = frame->root->client;
+ region.fd_num = fd_to_fdnum(fd);
+ region.client_pid = frame->root->pid;
+ region.owner = frame->root->lk_owner;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ allowed = pl_is_fop_allowed(pl_inode, &region, fd, GF_FOP_READ,
+ &can_block);
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
+ goto unlock;
+ } else if (!can_block) {
+ op_errno = EAGAIN;
+ op_ret = -1;
+ goto unlock;
+ }
- rw->region = region;
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ if (!rw) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
- list_add_tail (&rw->list, &pl_inode->rw_list);
- }
- unlock:
- pthread_mutex_unlock (&pl_inode->mutex);
- }
+ rw->stub = fop_readv_stub(frame, pl_readv_cont, fd, size, offset,
+ flags, xdata);
+ if (!rw->stub) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ GF_FREE(rw);
+ goto unlock;
+ }
+ rw->region = region;
- if (wind_needed) {
- STACK_WIND (frame, pl_readv_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
- fd, size, offset, flags, xdata);
+ list_add_tail(&rw->list, &pl_inode->rw_list);
}
+ unlock:
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+ if (allowed == 1) {
+ STACK_WIND(frame, pl_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ }
+unwind:
+ if (op_ret == -1)
+ PL_STACK_UNWIND(readv, xdata, frame, op_ret, op_errno, NULL, 0, NULL,
+ NULL, NULL);
- if (op_ret == -1)
- STACK_UNWIND_STRICT (readv, frame, -1, op_errno,
- NULL, 0, NULL, NULL, NULL);
-
- return 0;
+ return 0;
}
-
int
-pl_writev_cont (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int count, off_t offset,
- uint32_t flags, struct iobref *iobref, dict_t *xdata)
+pl_writev_cont(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
- STACK_WIND (frame, pl_writev_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset, flags, iobref, xdata);
+ pl_track_io_fop_count(frame->local, this, INCREMENT);
- return 0;
-}
+ STACK_WIND(frame, pl_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
+ return 0;
+}
int
-pl_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t offset,
- uint32_t flags, struct iobref *iobref, dict_t *xdata)
-{
- posix_locks_private_t *priv = NULL;
- pl_inode_t *pl_inode = NULL;
- pl_rw_req_t *rw = NULL;
- posix_lock_t region = {.list = {0, }, };
- int op_ret = 0;
- int op_errno = 0;
- char wind_needed = 1;
-
- priv = this->private;
- pl_inode = pl_inode_get (this, fd->inode);
-
- if (priv->mandatory && pl_inode->mandatory) {
- region.fl_start = offset;
- region.fl_end = offset + iov_length (vector, count) - 1;
- region.transport = frame->root->trans;
- region.fd_num = fd_to_fdnum(fd);
- region.client_pid = frame->root->pid;
- region.owner = frame->root->lk_owner;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- wind_needed = __rw_allowable (pl_inode, &region,
- GF_FOP_WRITE);
- if (wind_needed)
- goto unlock;
-
- if (fd->flags & O_NONBLOCK) {
- gf_log (this->name, GF_LOG_TRACE,
- "returning EAGAIN because fd is "
- "O_NONBLOCK");
- op_errno = EAGAIN;
- op_ret = -1;
- goto unlock;
- }
-
- rw = GF_CALLOC (1, sizeof (*rw),
- gf_locks_mt_pl_rw_req_t);
- if (!rw) {
- op_errno = ENOMEM;
- op_ret = -1;
- goto unlock;
- }
-
- rw->stub = fop_writev_stub (frame, pl_writev_cont,
- fd, vector, count, offset,
- flags, iobref, xdata);
- if (!rw->stub) {
- op_errno = ENOMEM;
- op_ret = -1;
- GF_FREE (rw);
- goto unlock;
- }
+pl_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+ pl_rw_req_t *rw = NULL;
+ posix_lock_t region = {
+ .list =
+ {
+ 0,
+ },
+ };
+ gf_boolean_t enabled = _gf_false;
+ gf_boolean_t can_block = _gf_true;
+ int op_ret = 0;
+ int op_errno = 0;
+ int allowed = 1;
+
+ GF_VALIDATE_OR_GOTO("locks", this, unwind);
+
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+
+ if (!frame->local) {
+ frame->local = mem_get0(this->local_pool);
+ local = frame->local;
+ local->inode = inode_ref(fd->inode);
+ local->fd = fd_ref(fd);
+ }
+
+ pl_inode = pl_inode_get(this, fd->inode, local);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (frame->root->pid < 0)
+ enabled = _gf_false;
+ else
+ enabled = pl_is_mandatory_locking_enabled(pl_inode);
+
+ if (enabled) {
+ region.fl_start = offset;
+ region.fl_end = offset + iov_length(vector, count) - 1;
+ region.client = frame->root->client;
+ region.fd_num = fd_to_fdnum(fd);
+ region.client_pid = frame->root->pid;
+ region.owner = frame->root->lk_owner;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ allowed = pl_is_fop_allowed(pl_inode, &region, fd, GF_FOP_WRITE,
+ &can_block);
+ if (allowed == 1) {
+ if (pl_inode->mlock_enforced &&
+ pl_inode->track_fop_wind_count) {
+ pl_inode->fop_wind_count++;
+ }
+ goto unlock;
+ } else if (!can_block) {
+ if (pl_inode->mlock_enforced) {
+ op_errno = EBUSY;
+ } else {
+ op_errno = EAGAIN;
+ }
- rw->region = region;
+ op_ret = -1;
+ goto unlock;
+ }
- list_add_tail (&rw->list, &pl_inode->rw_list);
- }
- unlock:
- pthread_mutex_unlock (&pl_inode->mutex);
- }
+ rw = GF_MALLOC(sizeof(*rw), gf_locks_mt_pl_rw_req_t);
+ if (!rw) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ goto unlock;
+ }
+ rw->stub = fop_writev_stub(frame, pl_writev_cont, fd, vector, count,
+ offset, flags, iobref, xdata);
+ if (!rw->stub) {
+ op_errno = ENOMEM;
+ op_ret = -1;
+ GF_FREE(rw);
+ goto unlock;
+ }
- if (wind_needed)
- STACK_WIND (frame, pl_writev_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->writev,
- fd, vector, count, offset, flags, iobref, xdata);
+ rw->region = region;
- if (op_ret == -1)
- STACK_UNWIND_STRICT (writev, frame, -1, op_errno, NULL, NULL,
- NULL);
+ list_add_tail(&rw->list, &pl_inode->rw_list);
+ }
+ unlock:
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+ if (allowed == 1) {
+ STACK_WIND(frame, pl_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
+ }
+unwind:
+ if (op_ret == -1)
+ PL_STACK_UNWIND(writev, xdata, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
- return 0;
+ return 0;
}
static int
-__fd_has_locks (pl_inode_t *pl_inode, fd_t *fd)
+__fd_has_locks(pl_inode_t *pl_inode, fd_t *fd)
{
- int found = 0;
- posix_lock_t *l = NULL;
+ posix_lock_t *l = NULL;
- list_for_each_entry (l, &pl_inode->ext_list, list) {
- if ((l->fd_num == fd_to_fdnum(fd))) {
- found = 1;
- break;
- }
+ list_for_each_entry(l, &pl_inode->ext_list, list)
+ {
+ if (l->fd_num == fd_to_fdnum(fd)) {
+ return 1;
}
+ }
- return found;
+ return 0;
}
static posix_lock_t *
-lock_dup (posix_lock_t *lock)
+lock_dup(posix_lock_t *lock)
{
- posix_lock_t *new_lock = NULL;
-
- new_lock = new_posix_lock (&lock->user_flock, lock->transport,
- lock->client_pid, &lock->owner,
- (fd_t *)lock->fd_num);
- return new_lock;
+ int32_t op_errno = 0;
+ return new_posix_lock(&lock->user_flock, lock->client, lock->client_pid,
+ &lock->owner, (fd_t *)lock->fd_num, lock->lk_flags,
+ lock->blocking, &op_errno);
}
static int
-__dup_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd,
- pl_fdctx_t *fdctx)
-{
- posix_lock_t *l = NULL;
- posix_lock_t *duplock = NULL;
- int ret = 0;
-
- list_for_each_entry (l, &pl_inode->ext_list, list) {
- if ((l->fd_num == fd_to_fdnum(fd))) {
- duplock = lock_dup (l);
- if (!duplock) {
- ret = -1;
- break;
- }
+__dup_locks_to_fdctx(pl_inode_t *pl_inode, fd_t *fd, pl_fdctx_t *fdctx)
+{
+ posix_lock_t *l = NULL;
+ posix_lock_t *duplock = NULL;
+ int ret = 0;
+
+ list_for_each_entry(l, &pl_inode->ext_list, list)
+ {
+ if (l->fd_num == fd_to_fdnum(fd)) {
+ duplock = lock_dup(l);
+ if (!duplock) {
+ ret = -1;
+ break;
+ }
- list_add_tail (&duplock->list, &fdctx->locks_list);
- }
+ list_add_tail(&duplock->list, &fdctx->locks_list);
}
+ }
- return ret;
+ return ret;
}
static int
-__copy_locks_to_fdctx (pl_inode_t *pl_inode, fd_t *fd,
- pl_fdctx_t *fdctx)
+__copy_locks_to_fdctx(pl_inode_t *pl_inode, fd_t *fd, pl_fdctx_t *fdctx)
{
- int ret = 0;
-
- ret = __dup_locks_to_fdctx (pl_inode, fd, fdctx);
- if (ret)
- goto out;
-
-out:
- return ret;
-
+ return __dup_locks_to_fdctx(pl_inode, fd, fdctx);
}
static void
-pl_mark_eol_lock (posix_lock_t *lock)
+pl_mark_eol_lock(posix_lock_t *lock)
{
- lock->user_flock.l_type = GF_LK_EOL;
- return;
+ lock->user_flock.l_type = GF_LK_EOL;
+ return;
}
static posix_lock_t *
-__get_next_fdctx_lock (pl_fdctx_t *fdctx)
+__get_next_fdctx_lock(pl_fdctx_t *fdctx)
{
- posix_lock_t *lock = NULL;
+ posix_lock_t *lock = NULL;
- GF_ASSERT (fdctx);
+ GF_ASSERT(fdctx);
- if (list_empty (&fdctx->locks_list)) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "fdctx lock list empty");
- goto out;
- }
+ if (list_empty(&fdctx->locks_list)) {
+ gf_log(THIS->name, GF_LOG_DEBUG, "fdctx lock list empty");
+ goto out;
+ }
- lock = list_entry (fdctx->locks_list.next, typeof (*lock),
- list);
+ lock = list_entry(fdctx->locks_list.next, typeof(*lock), list);
- GF_ASSERT (lock);
+ GF_ASSERT(lock);
- list_del_init (&lock->list);
+ list_del_init(&lock->list);
out:
- return lock;
+ return lock;
}
static int
-__set_next_lock_fd (pl_fdctx_t *fdctx, posix_lock_t *reqlock)
+__set_next_lock_fd(pl_fdctx_t *fdctx, posix_lock_t *reqlock)
{
- posix_lock_t *lock = NULL;
- int ret = 0;
+ posix_lock_t *lock = NULL;
+ int ret = 0;
- GF_ASSERT (fdctx);
+ GF_ASSERT(fdctx);
- lock = __get_next_fdctx_lock (fdctx);
- if (!lock) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "marking EOL in reqlock");
- pl_mark_eol_lock (reqlock);
- goto out;
- }
+ lock = __get_next_fdctx_lock(fdctx);
+ if (!lock) {
+ gf_log(THIS->name, GF_LOG_DEBUG, "marking EOL in reqlock");
+ pl_mark_eol_lock(reqlock);
+ goto out;
+ }
- reqlock->user_flock = lock->user_flock;
- reqlock->fl_start = lock->fl_start;
- reqlock->fl_type = lock->fl_type;
- reqlock->fl_end = lock->fl_end;
- reqlock->owner = lock->owner;
+ reqlock->user_flock = lock->user_flock;
+ reqlock->fl_start = lock->fl_start;
+ reqlock->fl_type = lock->fl_type;
+ reqlock->fl_end = lock->fl_end;
+ reqlock->owner = lock->owner;
out:
- if (lock)
- __destroy_lock (lock);
+ if (lock)
+ __destroy_lock(lock);
- return ret;
+ return ret;
}
static int
-pl_getlk_fd (xlator_t *this, pl_inode_t *pl_inode,
- fd_t *fd, posix_lock_t *reqlock)
+pl_getlk_fd(xlator_t *this, pl_inode_t *pl_inode, fd_t *fd,
+ posix_lock_t *reqlock)
{
- uint64_t tmp = 0;
- pl_fdctx_t *fdctx = NULL;
- int ret = 0;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- if (!__fd_has_locks (pl_inode, fd)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "fd=%p has no active locks", fd);
- ret = 0;
- goto unlock;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "There are active locks on fd");
-
- ret = fd_ctx_get (fd, this, &tmp);
- fdctx = (pl_fdctx_t *)(long) tmp;
-
- if (list_empty (&fdctx->locks_list)) {
- gf_log (this->name, GF_LOG_TRACE,
- "no fdctx -> copying all locks on fd");
+ uint64_t tmp = 0;
+ pl_fdctx_t *fdctx = NULL;
+ int ret = 0;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (!__fd_has_locks(pl_inode, fd)) {
+ pthread_mutex_unlock(&pl_inode->mutex);
+ gf_log(this->name, GF_LOG_DEBUG, "fd=%p has no active locks", fd);
+ ret = 0;
+ goto out;
+ }
- ret = __copy_locks_to_fdctx (pl_inode, fd, fdctx);
- if (ret) {
- goto unlock;
- }
+ gf_log(this->name, GF_LOG_DEBUG, "There are active locks on fd");
- ret = __set_next_lock_fd (fdctx, reqlock);
+ ret = fd_ctx_get(fd, this, &tmp);
+ fdctx = (pl_fdctx_t *)(long)tmp;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "fdctx present -> returning the next lock");
- ret = __set_next_lock_fd (fdctx, reqlock);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "could not get next lock of fd");
- goto unlock;
- }
- }
+ if (list_empty(&fdctx->locks_list)) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "no fdctx -> copying all locks on fd");
+
+ ret = __copy_locks_to_fdctx(pl_inode, fd, fdctx);
+ if (ret) {
+ goto unlock;
+ }
+
+ ret = __set_next_lock_fd(fdctx, reqlock);
+
+ } else {
+ gf_log(this->name, GF_LOG_TRACE,
+ "fdctx present -> returning the next lock");
+ ret = __set_next_lock_fd(fdctx, reqlock);
+ if (ret) {
+ pthread_mutex_unlock(&pl_inode->mutex);
+ gf_log(this->name, GF_LOG_DEBUG,
+ "could not get next lock of fd");
+ goto out;
+ }
}
+ }
unlock:
- pthread_mutex_unlock (&pl_inode->mutex);
- return ret;
-
+ pthread_mutex_unlock(&pl_inode->mutex);
+out:
+ return ret;
}
int
-pl_lk (call_frame_t *frame, xlator_t *this,
- fd_t *fd, int32_t cmd, struct gf_flock *flock, dict_t *xdata)
-{
- void *transport = NULL;
- pid_t client_pid = 0;
- pl_inode_t *pl_inode = NULL;
- int op_ret = 0;
- int op_errno = 0;
- int can_block = 0;
- posix_lock_t *reqlock = NULL;
- posix_lock_t *conf = NULL;
- int ret = 0;
-
- transport = frame->root->trans;
- client_pid = frame->root->pid;
-
- if ((flock->l_start < 0) || (flock->l_len < 0)) {
- op_ret = -1;
- op_errno = EINVAL;
- goto unwind;
- }
+pl_metalock_is_active(pl_inode_t *pl_inode)
+{
+ if (list_empty(&pl_inode->metalk_list))
+ return 0;
+ else
+ return 1;
+}
- pl_inode = pl_inode_get (this, fd->inode);
- if (!pl_inode) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
+void
+__pl_queue_lock(pl_inode_t *pl_inode, posix_lock_t *reqlock)
+{
+ list_add_tail(&reqlock->list, &pl_inode->queued_locks);
+}
- reqlock = new_posix_lock (flock, transport, client_pid,
- &frame->root->lk_owner, fd);
+int
+pl_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ pl_inode_t *pl_inode = NULL;
+ int op_ret = 0;
+ int op_errno = 0;
+ int can_block = 0;
+ posix_lock_t *reqlock = NULL;
+ posix_lock_t *conf = NULL;
+ uint32_t lk_flags = 0;
+ posix_locks_private_t *priv = this->private;
+ pl_local_t *local = NULL;
+ short lock_type = 0;
+
+ int ret = dict_get_uint32(xdata, GF_LOCK_MODE, &lk_flags);
+ if (ret == 0) {
+ if (priv->mandatory_mode == MLK_NONE)
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Lock flags received "
+ "in a non-mandatory locking environment, "
+ "continuing");
+ else
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Lock flags received, "
+ "continuing");
+ }
+
+ if ((flock->l_start < 0) || ((flock->l_start + flock->l_len) < 0)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ /* As per 'man 3 fcntl', the value of l_len may be
+ * negative. In such cases, lock request should be
+ * considered for the range starting at 'l_start+l_len'
+ * and ending at 'l_start-1'. Update the fields accordingly.
+ */
+ if (flock->l_len < 0) {
+ flock->l_start += flock->l_len;
+ flock->l_len = labs(flock->l_len);
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ } else {
+ frame->local = local;
+ local->fd = fd_ref(fd);
+ }
- if (!reqlock) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
+ pl_inode = pl_inode_get(this, fd->inode, local);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ reqlock = new_posix_lock(flock, frame->root->client, frame->root->pid,
+ &frame->root->lk_owner, fd, lk_flags, can_block,
+ &op_errno);
- pl_trace_in (this, frame, fd, NULL, cmd, flock, NULL);
+ if (!reqlock) {
+ op_ret = -1;
+ goto unwind;
+ }
- switch (cmd) {
+ pl_trace_in(this, frame, fd, NULL, cmd, flock, NULL);
+ switch (cmd) {
case F_RESLK_LCKW:
- can_block = 1;
+ can_block = 1;
- /* fall through */
+ /* fall through */
case F_RESLK_LCK:
- memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock));
- reqlock->frame = frame;
- reqlock->this = this;
+ reqlock->frame = frame;
+ reqlock->this = this;
- ret = pl_reserve_setlk (this, pl_inode, reqlock,
- can_block);
- if (ret < 0) {
- if (can_block)
- goto out;
+ ret = pl_reserve_setlk(this, pl_inode, reqlock, can_block);
+ if (ret < 0) {
+ if (can_block)
+ goto out;
- op_ret = -1;
- op_errno = -ret;
- __destroy_lock (reqlock);
- goto unwind;
- }
- /* Finally a getlk and return the call */
- conf = pl_getlk (pl_inode, reqlock);
- if (conf)
- posix_lock_to_flock (conf, flock);
- break;
+ op_ret = -1;
+ op_errno = -ret;
+ __destroy_lock(reqlock);
+ goto unwind;
+ }
+ /* Finally a getlk and return the call */
+ conf = pl_getlk(pl_inode, reqlock);
+ if (conf)
+ posix_lock_to_flock(conf, flock);
+ break;
case F_RESLK_UNLCK:
- reqlock->frame = frame;
- reqlock->this = this;
- ret = pl_reserve_unlock (this, pl_inode, reqlock);
- if (ret < 0) {
- op_ret = -1;
- op_errno = -ret;
- }
- __destroy_lock (reqlock);
- goto unwind;
+ reqlock->frame = frame;
+ reqlock->this = this;
+ ret = pl_reserve_unlock(this, pl_inode, reqlock);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = -ret;
+ }
+ __destroy_lock(reqlock);
+ goto unwind;
- break;
+ break;
case F_GETLK_FD:
- reqlock->frame = frame;
- reqlock->this = this;
- ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block);
- GF_ASSERT (ret >= 0);
-
- ret = pl_getlk_fd (this, pl_inode, fd, reqlock);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "getting locks on fd failed");
- op_ret = -1;
- op_errno = ENOLCK;
- goto unwind;
- }
+ reqlock->frame = frame;
+ reqlock->this = this;
+ ret = pl_verify_reservelk(this, pl_inode, reqlock, can_block);
+ GF_ASSERT(ret >= 0);
+
+ ret = pl_getlk_fd(this, pl_inode, fd, reqlock);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_DEBUG, "getting locks on fd failed");
+ op_ret = -1;
+ op_errno = ENOLCK;
+ goto unwind;
+ }
- gf_log (this->name, GF_LOG_TRACE,
- "Replying with a lock on fd for healing");
+ gf_log(this->name, GF_LOG_TRACE,
+ "Replying with a lock on fd for healing");
- posix_lock_to_flock (reqlock, flock);
- __destroy_lock (reqlock);
+ posix_lock_to_flock(reqlock, flock);
+ __destroy_lock(reqlock);
- break;
+ break;
#if F_GETLK != F_GETLK64
case F_GETLK64:
#endif
case F_GETLK:
- conf = pl_getlk (pl_inode, reqlock);
- posix_lock_to_flock (conf, flock);
- __destroy_lock (reqlock);
+ conf = pl_getlk(pl_inode, reqlock);
+ posix_lock_to_flock(conf, flock);
+ __destroy_lock(reqlock);
- break;
+ break;
#if F_SETLKW != F_SETLKW64
case F_SETLKW64:
#endif
case F_SETLKW:
- can_block = 1;
- reqlock->frame = frame;
- reqlock->this = this;
-
- /* fall through */
+ can_block = 1;
+ reqlock->frame = frame;
+ reqlock->this = this;
+ reqlock->blocking = can_block;
+ /* fall through */
#if F_SETLK != F_SETLK64
case F_SETLK64:
#endif
case F_SETLK:
- memcpy (&reqlock->user_flock, flock, sizeof (struct gf_flock));
- ret = pl_verify_reservelk (this, pl_inode, reqlock, can_block);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_TRACE,
- "Lock blocked due to conflicting reserve lock");
- goto out;
+ reqlock->frame = frame;
+ reqlock->this = this;
+ lock_type = flock->l_type;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (pl_inode->migrated) {
+ op_errno = EREMOTE;
+ pthread_mutex_unlock(&pl_inode->mutex);
+ STACK_UNWIND_STRICT(lk, frame, -1, op_errno, flock, xdata);
+
+ __destroy_lock(reqlock);
+ goto out;
}
- ret = pl_setlk (this, pl_inode, reqlock,
- can_block);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ ret = pl_verify_reservelk(this, pl_inode, reqlock, can_block);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "Lock blocked due to conflicting reserve lock");
+ goto out;
+ }
+
+ if (reqlock->fl_type != F_UNLCK && pl_inode->mlock_enforced) {
+ ret = pl_lock_preempt(pl_inode, reqlock);
if (ret == -1) {
- if ((can_block) && (F_UNLCK != flock->l_type)) {
- pl_trace_block (this, frame, fd, NULL, cmd, flock, NULL);
- goto out;
- }
- gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN");
- op_ret = -1;
- op_errno = EAGAIN;
- __destroy_lock (reqlock);
-
- } else if ((0 == ret) && (F_UNLCK == flock->l_type)) {
- /* For NLM's last "unlock on fd" detection */
- if (pl_locks_by_fd (pl_inode, fd))
- flock->l_type = F_RDLCK;
- else
- flock->l_type = F_UNLCK;
+ gf_log(this->name, GF_LOG_ERROR, "lock preempt failed");
+ op_ret = -1;
+ op_errno = EAGAIN;
+ __destroy_lock(reqlock);
+ goto out;
}
- }
-unwind:
- pl_trace_out (this, frame, fd, NULL, cmd, flock, op_ret, op_errno, NULL);
- pl_update_refkeeper (this, fd->inode);
+ pl_trace_block(this, frame, fd, NULL, cmd, flock, NULL);
+ goto unwind;
+ }
+
+ ret = pl_setlk(this, pl_inode, reqlock, can_block);
+ if (ret == -1) {
+ if ((can_block) && (F_UNLCK != lock_type)) {
+ goto out;
+ }
+ gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN");
+ op_ret = -1;
+ op_errno = EAGAIN;
+ __destroy_lock(reqlock);
+ } else if (ret == -2) {
+ goto out;
+ } else if ((0 == ret) && (F_UNLCK == flock->l_type)) {
+ /* For NLM's last "unlock on fd" detection */
+ if (pl_locks_by_fd(pl_inode, fd))
+ flock->l_type = F_RDLCK;
+ else
+ flock->l_type = F_UNLCK;
+ }
+ }
+unwind:
+ pl_trace_out(this, frame, fd, NULL, cmd, flock, op_ret, op_errno, NULL);
+ pl_update_refkeeper(this, fd->inode);
- STACK_UNWIND_STRICT (lk, frame, op_ret, op_errno, flock, xdata);
+ PL_STACK_UNWIND(lk, xdata, frame, op_ret, op_errno, flock, xdata);
out:
- return 0;
+ return 0;
}
-
/* TODO: this function just logs, no action required?? */
int
-pl_forget (xlator_t *this,
- inode_t *inode)
+pl_forget(xlator_t *this, inode_t *inode)
{
- pl_inode_t *pl_inode = NULL;
+ pl_inode_t *pl_inode = NULL;
- posix_lock_t *ext_tmp = NULL;
- posix_lock_t *ext_l = NULL;
- struct list_head posixlks_released;
+ posix_lock_t *ext_tmp = NULL;
+ posix_lock_t *ext_l = NULL;
+ struct list_head posixlks_released;
- pl_inode_lock_t *ino_tmp = NULL;
- pl_inode_lock_t *ino_l = NULL;
- struct list_head inodelks_released;
+ pl_inode_lock_t *ino_tmp = NULL;
+ pl_inode_lock_t *ino_l = NULL;
+ struct list_head inodelks_released;
- pl_rw_req_t *rw_tmp = NULL;
- pl_rw_req_t *rw_req = NULL;
+ pl_rw_req_t *rw_tmp = NULL;
+ pl_rw_req_t *rw_req = NULL;
- pl_entry_lock_t *entry_tmp = NULL;
- pl_entry_lock_t *entry_l = NULL;
- struct list_head entrylks_released;
+ pl_entry_lock_t *entry_tmp = NULL;
+ pl_entry_lock_t *entry_l = NULL;
+ struct list_head entrylks_released;
- pl_dom_list_t *dom = NULL;
- pl_dom_list_t *dom_tmp = NULL;
+ pl_dom_list_t *dom = NULL;
+ pl_dom_list_t *dom_tmp = NULL;
- INIT_LIST_HEAD (&posixlks_released);
- INIT_LIST_HEAD (&inodelks_released);
- INIT_LIST_HEAD (&entrylks_released);
+ INIT_LIST_HEAD(&posixlks_released);
+ INIT_LIST_HEAD(&inodelks_released);
+ INIT_LIST_HEAD(&entrylks_released);
- pl_inode = pl_inode_get (this, inode);
+ pl_inode = pl_inode_get(this, inode, NULL);
+ if (!pl_inode)
+ return 0;
- pthread_mutex_lock (&pl_inode->mutex);
- {
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (!list_empty(&pl_inode->rw_list)) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Pending R/W requests found, releasing.");
+
+ list_for_each_entry_safe(rw_req, rw_tmp, &pl_inode->rw_list, list)
+ {
+ list_del(&rw_req->list);
+ call_stub_destroy(rw_req->stub);
+ GF_FREE(rw_req);
+ }
+ }
- if (!list_empty (&pl_inode->rw_list)) {
- gf_log (this->name, GF_LOG_WARNING,
- "Pending R/W requests found, releasing.");
+ if (!list_empty(&pl_inode->ext_list)) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Pending fcntl locks found, releasing.");
- list_for_each_entry_safe (rw_req, rw_tmp, &pl_inode->rw_list,
- list) {
+ list_for_each_entry_safe(ext_l, ext_tmp, &pl_inode->ext_list, list)
+ {
+ __delete_lock(ext_l);
+ if (ext_l->blocked) {
+ list_add_tail(&ext_l->list, &posixlks_released);
+ continue;
+ }
+ __destroy_lock(ext_l);
+ }
+ }
- list_del (&rw_req->list);
- GF_FREE (rw_req);
- }
+ list_for_each_entry_safe(dom, dom_tmp, &pl_inode->dom_list, inode_list)
+ {
+ if (!list_empty(&dom->inodelk_list)) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Pending inode locks found, releasing.");
+
+ list_for_each_entry_safe(ino_l, ino_tmp, &dom->inodelk_list,
+ list)
+ {
+ __delete_inode_lock(ino_l);
+ __pl_inodelk_unref(ino_l);
}
- if (!list_empty (&pl_inode->ext_list)) {
- gf_log (this->name, GF_LOG_WARNING,
- "Pending fcntl locks found, releasing.");
+ list_splice_init(&dom->blocked_inodelks, &inodelks_released);
+ }
+ if (!list_empty(&dom->entrylk_list)) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Pending entry locks found, releasing.");
- list_for_each_entry_safe (ext_l, ext_tmp, &pl_inode->ext_list,
- list) {
+ list_for_each_entry_safe(entry_l, entry_tmp, &dom->entrylk_list,
+ domain_list)
+ {
+ list_del_init(&entry_l->domain_list);
- __delete_lock (pl_inode, ext_l);
- if (ext_l->blocked) {
- list_add_tail (&ext_l->list, &posixlks_released);
- continue;
- }
- __destroy_lock (ext_l);
- }
+ GF_FREE((char *)entry_l->basename);
+ GF_FREE(entry_l->connection_id);
+ GF_FREE(entry_l);
}
+ list_splice_init(&dom->blocked_entrylks, &entrylks_released);
+ }
- list_for_each_entry_safe (dom, dom_tmp, &pl_inode->dom_list, inode_list) {
+ list_del(&dom->inode_list);
+ gf_log("posix-locks", GF_LOG_TRACE, " Cleaning up domain: %s",
+ dom->domain);
+ GF_FREE((char *)(dom->domain));
+ GF_FREE(dom);
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
- if (!list_empty (&dom->inodelk_list)) {
- gf_log (this->name, GF_LOG_WARNING,
- "Pending inode locks found, releasing.");
+ if (!list_empty(&posixlks_released)) {
+ list_for_each_entry_safe(ext_l, ext_tmp, &posixlks_released, list)
+ {
+ STACK_UNWIND_STRICT(lk, ext_l->frame, -1, 0, &ext_l->user_flock,
+ NULL);
+ __destroy_lock(ext_l);
+ }
+ }
- list_for_each_entry_safe (ino_l, ino_tmp, &dom->inodelk_list, list) {
- __delete_inode_lock (ino_l);
- __pl_inodelk_unref (ino_l);
- }
+ if (!list_empty(&inodelks_released)) {
+ list_for_each_entry_safe(ino_l, ino_tmp, &inodelks_released,
+ blocked_locks)
+ {
+ STACK_UNWIND_STRICT(inodelk, ino_l->frame, -1, 0, NULL);
+ __pl_inodelk_unref(ino_l);
+ }
+ }
- list_splice_init (&dom->blocked_inodelks, &inodelks_released);
+ if (!list_empty(&entrylks_released)) {
+ list_for_each_entry_safe(entry_l, entry_tmp, &entrylks_released,
+ blocked_locks)
+ {
+ STACK_UNWIND_STRICT(entrylk, entry_l->frame, -1, 0, NULL);
+ GF_FREE((char *)entry_l->basename);
+ GF_FREE(entry_l->connection_id);
+ GF_FREE(entry_l);
+ }
+ }
+ pthread_mutex_destroy(&pl_inode->mutex);
- }
- if (!list_empty (&dom->entrylk_list)) {
- gf_log (this->name, GF_LOG_WARNING,
- "Pending entry locks found, releasing.");
+ GF_FREE(pl_inode);
- list_for_each_entry_safe (entry_l, entry_tmp, &dom->entrylk_list, domain_list) {
- list_del_init (&entry_l->domain_list);
+ return 0;
+}
- if (entry_l->basename)
- GF_FREE ((char *)entry_l->basename);
- GF_FREE (entry_l);
- }
+int
+pl_release(xlator_t *this, fd_t *fd)
+{
+ pl_inode_t *pl_inode = NULL;
+ uint64_t tmp_pl_inode = 0;
+ int ret = -1;
+ uint64_t tmp = 0;
+ pl_fdctx_t *fdctx = NULL;
- list_splice_init (&dom->blocked_entrylks, &entrylks_released);
- }
+ if (fd == NULL) {
+ goto out;
+ }
- list_del (&dom->inode_list);
- gf_log ("posix-locks", GF_LOG_TRACE,
- " Cleaning up domain: %s", dom->domain);
- GF_FREE ((char *)(dom->domain));
- GF_FREE (dom);
- }
+ ret = inode_ctx_get(fd->inode, this, &tmp_pl_inode);
+ if (ret != 0)
+ goto clean;
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
- list_for_each_entry_safe (ext_l, ext_tmp, &posixlks_released, list) {
+ pl_trace_release(this, fd);
- STACK_UNWIND_STRICT (lk, ext_l->frame, -1, 0,
- &ext_l->user_flock, NULL);
- __destroy_lock (ext_l);
- }
+ gf_log(this->name, GF_LOG_TRACE, "Releasing all locks with fd %p", fd);
+
+ delete_locks_of_fd(this, pl_inode, fd);
+ pl_update_refkeeper(this, fd->inode);
+
+clean:
+ ret = fd_ctx_del(fd, this, &tmp);
+ if (ret) {
+ gf_log(this->name, GF_LOG_DEBUG, "Could not get fdctx");
+ goto out;
+ }
+
+ fdctx = (pl_fdctx_t *)(long)tmp;
+
+ GF_FREE(fdctx);
+out:
+ return ret;
+}
+
+int
+pl_releasedir(xlator_t *this, fd_t *fd)
+{
+ int ret = -1;
+ uint64_t tmp = 0;
+ pl_fdctx_t *fdctx = NULL;
+
+ if (fd == NULL) {
+ goto out;
+ }
+
+ ret = fd_ctx_del(fd, this, &tmp);
+ if (ret) {
+ gf_log(this->name, GF_LOG_DEBUG, "Could not get fdctx");
+ goto out;
+ }
+
+ fdctx = (pl_fdctx_t *)(long)tmp;
+
+ GF_FREE(fdctx);
+out:
+ return ret;
+}
- list_for_each_entry_safe (ino_l, ino_tmp, &inodelks_released, blocked_locks) {
+static int32_t
+pl_request_link_count(dict_t **pxdata)
+{
+ dict_t *xdata;
- STACK_UNWIND_STRICT (inodelk, ino_l->frame, -1, 0, NULL);
- __pl_inodelk_unref (ino_l);
+ xdata = *pxdata;
+ if (xdata == NULL) {
+ xdata = dict_new();
+ if (xdata == NULL) {
+ return ENOMEM;
}
+ } else {
+ dict_ref(xdata);
+ }
+
+ if (dict_set_uint32(xdata, GET_LINK_COUNT, 0) != 0) {
+ dict_unref(xdata);
+ return ENOMEM;
+ }
+
+ *pxdata = xdata;
- list_for_each_entry_safe (entry_l, entry_tmp, &entrylks_released, blocked_locks) {
+ return 0;
+}
+
+static int32_t
+pl_check_link_count(dict_t *xdata)
+{
+ int32_t count;
- STACK_UNWIND_STRICT (entrylk, entry_l->frame, -1, 0, NULL);
- if (entry_l->basename)
- GF_FREE ((char *)entry_l->basename);
- GF_FREE (entry_l);
+ /* In case we are unable to read the link count from xdata, we take a
+ * conservative approach and return -2, which will prevent the inode from
+ * being considered deleted. In fact it will cause link tracking for this
+ * inode to be disabled completely to avoid races. */
+ if (xdata == NULL) {
+ return -2;
+ }
+
+ if (dict_get_int32(xdata, GET_LINK_COUNT, &count) != 0) {
+ return -2;
+ }
+
+ return count;
+}
+
+int32_t
+pl_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ pl_inode_t *pl_inode;
+
+ if (op_ret >= 0) {
+ pl_inode = pl_inode_get(this, inode, NULL);
+ if (pl_inode == NULL) {
+ PL_STACK_UNWIND(lookup, xdata, frame, -1, ENOMEM, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
- GF_FREE (pl_inode);
+ pthread_mutex_lock(&pl_inode->mutex);
+
+ /* We only update the link count if we previously didn't know it.
+ * Doing it always can lead to races since lookup is not executed
+ * atomically most of the times. */
+ if (pl_inode->links == -2) {
+ pl_inode->links = pl_check_link_count(xdata);
+ if (buf->ia_type == IA_IFDIR) {
+ /* Directories have at least 2 links. To avoid special handling
+ * for directories, we simply decrement the value here to make
+ * them equivalent to regular files. */
+ pl_inode->links--;
+ }
+ }
- return 0;
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+ PL_STACK_UNWIND(lookup, xdata, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
+}
+
+int32_t
+pl_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int32_t error;
+
+ error = pl_request_link_count(&xdata);
+ if (error == 0) {
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+ STACK_WIND(frame, pl_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ dict_unref(xdata);
+ } else {
+ STACK_UNWIND_STRICT(lookup, frame, -1, error, NULL, NULL, NULL, NULL);
+ }
+ return 0;
+}
+
+int32_t
+pl_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ PL_STACK_UNWIND(fstat, xdata, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+pl_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+ STACK_WIND(frame, pl_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
}
int
-pl_release (xlator_t *this, fd_t *fd)
+pl_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, gf_dirent_t *entries, dict_t *xdata)
{
- pl_inode_t *pl_inode = NULL;
- uint64_t tmp_pl_inode = 0;
- int ret = -1;
- uint64_t tmp = 0;
- pl_fdctx_t *fdctx = NULL;
+ pl_local_t *local = NULL;
+ gf_dirent_t *entry = NULL;
- if (fd == NULL) {
- goto out;
- }
+ if (op_ret <= 0)
+ goto unwind;
- ret = inode_ctx_get (fd->inode, this, &tmp_pl_inode);
- if (ret != 0)
- goto out;
+ local = frame->local;
+ if (!local)
+ goto unwind;
- pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ pl_set_xdata_response(this, local, local->fd->inode, entry->inode,
+ entry->d_name, entry->dict, 0);
+ }
- pl_trace_release (this, fd);
+unwind:
+ PL_STACK_UNWIND(readdirp, xdata, frame, op_ret, op_errno, entries, xdata);
- gf_log (this->name, GF_LOG_TRACE,
- "Releasing all locks with fd %p", fd);
+ return 0;
+}
- delete_locks_of_fd (this, pl_inode, fd);
- pl_update_refkeeper (this, fd->inode);
+int
+pl_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+ STACK_WIND(frame, pl_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, xdata);
- ret = fd_ctx_del (fd, this, &tmp);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not get fdctx");
- goto out;
- }
+ return 0;
+}
- fdctx = (pl_fdctx_t *)(long)tmp;
+lock_migration_info_t *
+gf_mig_info_for_lock(posix_lock_t *lock)
+{
+ lock_migration_info_t *new = GF_MALLOC(sizeof(lock_migration_info_t),
+ gf_common_mt_lock_mig);
+ if (new == NULL) {
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&new->list);
+
+ posix_lock_to_flock(lock, &new->flock);
+
+ new->lk_flags = lock->lk_flags;
+
+ new->client_uid = gf_strdup(lock->client_uid);
- GF_FREE (fdctx);
out:
- return ret;
+ return new;
}
int
-pl_releasedir (xlator_t *this, fd_t *fd)
+pl_fill_active_locks(pl_inode_t *pl_inode, lock_migration_info_t *lmi)
{
- int ret = -1;
- uint64_t tmp = 0;
- pl_fdctx_t *fdctx = NULL;
-
- if (fd == NULL) {
- goto out;
+ posix_lock_t *temp = NULL;
+ lock_migration_info_t *newlock = NULL;
+ int count = 0;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (list_empty(&pl_inode->ext_list)) {
+ count = 0;
+ goto unlock;
}
- ret = fd_ctx_del (fd, this, &tmp);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Could not get fdctx");
+ list_for_each_entry(temp, &pl_inode->ext_list, list)
+ {
+ if (temp->blocked)
+ continue;
+
+ newlock = gf_mig_info_for_lock(temp);
+ if (!newlock) {
+ pthread_mutex_unlock(&pl_inode->mutex);
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "lock_dup failed");
+ count = -1;
goto out;
+ }
+
+ list_add_tail(&newlock->list, &lmi->list);
+ count++;
}
+ }
- fdctx = (pl_fdctx_t *)(long)tmp;
+unlock:
+ pthread_mutex_unlock(&pl_inode->mutex);
+out:
+ return count;
+}
+
+/* This function reads only active locks */
+static int
+pl_getactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ pl_inode_t *pl_inode = NULL;
+ lock_migration_info_t locks;
+ int op_ret = 0;
+ int op_errno = 0;
+ int count = 0;
+
+ INIT_LIST_HEAD(&locks.list);
+
+ pl_inode = pl_inode_get(this, loc->inode, NULL);
+ if (!pl_inode) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "pl_inode_get failed");
+
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ count = pl_fill_active_locks(pl_inode, &locks);
+
+ op_ret = count;
- GF_FREE (fdctx);
out:
- return ret;
+ STACK_UNWIND_STRICT(getactivelk, frame, op_ret, op_errno, &locks, NULL);
+
+ gf_free_mig_locks(&locks);
+
+ return 0;
}
-static int32_t
-__get_posixlk_count (xlator_t *this, pl_inode_t *pl_inode)
+void
+pl_metalk_unref(pl_meta_lock_t *lock)
+{
+ lock->ref--;
+ if (!lock->ref) {
+ GF_FREE(lock->client_uid);
+ GF_FREE(lock);
+ }
+}
+
+void
+__pl_metalk_ref(pl_meta_lock_t *lock)
{
- posix_lock_t *lock = NULL;
- int32_t count = 0;
+ lock->ref++;
+}
- list_for_each_entry (lock, &pl_inode->ext_list, list) {
+pl_meta_lock_t *
+new_meta_lock(call_frame_t *frame, xlator_t *this)
+{
+ pl_meta_lock_t *lock = GF_CALLOC(1, sizeof(*lock),
+ gf_locks_mt_pl_meta_lock_t);
+
+ if (!lock) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
+ "mem allocation"
+ " failed for meta lock");
+ goto out;
+ }
+
+ INIT_LIST_HEAD(&lock->list);
+ INIT_LIST_HEAD(&lock->client_list);
+
+ lock->client_uid = gf_strdup(frame->root->client->client_uid);
+ if (!lock->client_uid) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
+ "mem allocation"
+ " failed for client_uid");
+ GF_FREE(lock);
+ lock = NULL;
+ goto out;
+ }
+
+ __pl_metalk_ref(lock);
+out:
+ return lock;
+}
- gf_log (this->name, GF_LOG_DEBUG,
- " XATTR DEBUG"
- "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" state: %s",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->user_flock.l_start,
- lock->user_flock.l_len,
- lock->blocked == 1 ? "Blocked" : "Active");
+int
+pl_insert_metalk(pl_inode_t *pl_inode, pl_ctx_t *ctx, pl_meta_lock_t *lock)
+{
+ int ret = 0;
- count++;
+ if (!pl_inode || !ctx || !lock) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, 0, "NULL parameter");
+ ret = -1;
+ goto out;
+ }
+
+ lock->pl_inode = pl_inode;
+
+ /* refer function pl_inode_setlk for more info for this ref.
+ * This should be unrefed on meta-unlock triggered by rebalance or
+ * in cleanup with client disconnect*/
+ /*TODO: unref this in cleanup code for disconnect and meta-unlock*/
+ pl_inode->inode = inode_ref(pl_inode->inode);
+
+ /* NOTE:In case of a client-server disconnect we need to cleanup metalk.
+ * Hence, adding the metalk to pl_ctx_t as well. The mutex lock order
+ * should always be on ctx and then on pl_inode*/
+
+ pthread_mutex_lock(&ctx->lock);
+ {
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ list_add_tail(&lock->list, &pl_inode->metalk_list);
}
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ list_add_tail(&lock->client_list, &ctx->metalk_list);
+ }
+ pthread_mutex_unlock(&ctx->lock);
- return count;
+out:
+ return ret;
}
int32_t
-get_posixlk_count (xlator_t *this, inode_t *inode)
+pl_metalk(call_frame_t *frame, xlator_t *this, inode_t *inode)
{
- pl_inode_t *pl_inode = NULL;
- uint64_t tmp_pl_inode = 0;
- int ret = 0;
- int32_t count = 0;
-
- ret = inode_ctx_get (inode, this, &tmp_pl_inode);
- if (ret != 0) {
- goto out;
+ pl_inode_t *pl_inode = NULL;
+ int ret = 0;
+ pl_meta_lock_t *reqlk = NULL;
+ pl_ctx_t *ctx = NULL;
+
+ pl_inode = pl_inode_get(this, inode, NULL);
+ if (!pl_inode) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, ENOMEM,
+ "pl_inode mem allocation failedd");
+
+ ret = -1;
+ goto out;
+ }
+
+ /* Non rebalance process trying to do metalock */
+ if (frame->root->pid != GF_CLIENT_PID_DEFRAG) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Note: In the current scheme of glusterfs where lock migration is
+ * experimental, (ideally) the rebalance process which is migrating
+ * the file should request for a metalock. Hence, the metalock count
+ * should not be more than one for an inode. In future, if there is a
+ * need for meta-lock from other clients, the following block can be
+ * removed.
+ *
+ * Since pl_metalk is called as part of setxattr operation, any client
+ * process(non-rebalance) residing outside trusted network can exhaust
+ * memory of the server node by issuing setxattr repetitively on the
+ * metalock key. The following code makes sure that more than
+ * one metalock cannot be granted on an inode*/
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (pl_metalock_is_active(pl_inode)) {
+ ret = -1;
+ }
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, 0,
+ "More than one meta-lock cannot be granted on"
+ " the inode");
+ goto out;
+ }
+
+ if (frame->root->client) {
+ ctx = pl_ctx_get(frame->root->client, this);
+ if (!ctx) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "pl_ctx_get failed");
+
+ ret = -1;
+ goto out;
}
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "frame-root-client "
+ "is NULL");
+
+ ret = -1;
+ goto out;
+ }
+
+ reqlk = new_meta_lock(frame, this);
+ if (!reqlk) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = pl_insert_metalk(pl_inode, ctx, reqlk);
+ if (ret < 0) {
+ pl_metalk_unref(reqlk);
+ }
+
+out:
+ return ret;
+}
+
+static void
+__unwind_queued_locks(pl_inode_t *pl_inode, struct list_head *tmp_list)
+{
+ if (list_empty(&pl_inode->queued_locks))
+ return;
+
+ list_splice_init(&pl_inode->queued_locks, tmp_list);
+}
- pl_inode = (pl_inode_t *)(long) tmp_pl_inode;
+static void
+__unwind_blocked_locks(pl_inode_t *pl_inode, struct list_head *tmp_list)
+{
+ posix_lock_t *lock = NULL;
+ posix_lock_t *tmp = NULL;
- pthread_mutex_lock (&pl_inode->mutex);
+ if (list_empty(&pl_inode->ext_list))
+ return;
+
+ list_for_each_entry_safe(lock, tmp, &pl_inode->ext_list, list)
+ {
+ if (!lock->blocking)
+ continue;
+
+ list_del_init(&lock->list);
+ list_add_tail(&lock->list, tmp_list);
+ }
+}
+
+int
+pl_metaunlock(call_frame_t *frame, xlator_t *this, inode_t *inode, dict_t *dict)
+{
+ pl_inode_t *pl_inode = NULL;
+ int ret = 0;
+ pl_meta_lock_t *meta_lock = NULL;
+ pl_meta_lock_t *tmp_metalk = NULL;
+ pl_ctx_t *ctx = NULL;
+ posix_lock_t *posix_lock = NULL;
+ posix_lock_t *tmp_posixlk = NULL;
+ struct list_head tmp_posixlk_list;
+
+ INIT_LIST_HEAD(&tmp_posixlk_list);
+
+ if (frame->root->client) {
+ ctx = pl_ctx_get(frame->root->client, this);
+ if (!ctx) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "pl_ctx_get failed");
+
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0,
+ "frame-root-client is "
+ "NULL");
+ ret = -1;
+ goto out;
+ }
+
+ pl_inode = pl_inode_get(this, inode, NULL);
+ if (!pl_inode) {
+ ret = -1;
+ goto out;
+ }
+
+ pthread_mutex_lock(&ctx->lock);
+ {
+ pthread_mutex_lock(&pl_inode->mutex);
{
- count =__get_posixlk_count (this, pl_inode);
+ /* Unwind queued locks regardless of migration status */
+ __unwind_queued_locks(pl_inode, &tmp_posixlk_list);
+
+ /* Unwind blocked locks only for successful migration */
+ if (dict_get_sizen(dict, "status")) {
+ /* unwind all blocked locks */
+ __unwind_blocked_locks(pl_inode, &tmp_posixlk_list);
+ }
+
+ /* unlock metalk */
+ /* if this list is empty then pl_inode->metalk_list
+ * should be empty too. meta lock should in all cases
+ * be added/removed from both pl_ctx_t and pl_inode */
+
+ if (list_empty(&ctx->metalk_list))
+ goto unlock;
+
+ list_for_each_entry_safe(meta_lock, tmp_metalk, &ctx->metalk_list,
+ client_list)
+ {
+ list_del_init(&meta_lock->client_list);
+
+ pl_inode = meta_lock->pl_inode;
+
+ list_del_init(&meta_lock->list);
+
+ pl_metalk_unref(meta_lock);
+
+ /* The corresponding ref is taken in
+ * pl_insert_metalk*/
+ inode_unref(pl_inode->inode);
+ }
+
+ if (dict_get_sizen(dict, "status"))
+ pl_inode->migrated = _gf_true;
+ else
+ pl_inode->migrated = _gf_false;
}
- pthread_mutex_unlock (&pl_inode->mutex);
+ unlock:
+
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+ pthread_mutex_unlock(&ctx->lock);
out:
- return count;
+ list_for_each_entry_safe(posix_lock, tmp_posixlk, &tmp_posixlk_list, list)
+ {
+ list_del_init(&posix_lock->list);
+
+ STACK_UNWIND_STRICT(lk, posix_lock->frame, -1, EREMOTE,
+ &posix_lock->user_flock, NULL);
+
+ __destroy_lock(posix_lock);
+ }
+
+ return ret;
}
-void
-pl_parent_entrylk_xattr_fill (xlator_t *this, inode_t *parent,
- char *basename, dict_t *dict)
+int32_t
+pl_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- uint32_t entrylk = 0;
- int ret = -1;
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+ local = frame->local;
+ if (local && local->update_mlock_enforced_flag && op_ret != -1) {
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- if (!parent || !basename || !strlen (basename))
- goto out;
- entrylk = check_entrylk_on_basename (this, parent, basename);
-out:
- ret = dict_set_uint32 (dict, GLUSTERFS_PARENT_ENTRYLK, entrylk);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- " dict_set failed on key %s", GLUSTERFS_PARENT_ENTRYLK);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ while (pl_inode->fop_wind_count > 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, 0,
+ "waiting for existing fops (count %d) to drain for "
+ "gfid %s",
+ pl_inode->fop_wind_count, uuid_utoa(pl_inode->gfid));
+ pthread_cond_wait(&pl_inode->check_fop_wind_count,
+ &pl_inode->mutex);
+ }
+ pl_inode->mlock_enforced = _gf_true;
+ pl_inode->check_mlock_info = _gf_false;
}
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+
+unwind:
+ PL_STACK_UNWIND_FOR_CLIENT(setxattr, xdata, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+pl_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int flags, dict_t *xdata)
+{
+ int op_ret = 0;
+ int op_errno = EINVAL;
+ dict_t *xdata_rsp = NULL;
+ char *name = NULL;
+ posix_locks_private_t *priv = this->private;
+
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+
+ if (dict_get_sizen(dict, GF_META_LOCK_KEY)) {
+ op_ret = pl_metalk(frame, this, loc->inode);
+
+ } else if (dict_get_sizen(dict, GF_META_UNLOCK_KEY)) {
+ op_ret = pl_metaunlock(frame, this, loc->inode, dict);
+ } else {
+ goto usual;
+ }
+
+ PL_STACK_UNWIND_FOR_CLIENT(setxattr, xdata_rsp, frame, op_ret, op_errno,
+ xdata_rsp);
+ return 0;
+
+usual:
+ PL_CHECK_LOCK_ENFORCE_KEY(frame, dict, name, this, loc, ((fd_t *)NULL),
+ priv);
+
+ STACK_WIND(frame, pl_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
+ return 0;
+
+unwind:
+ PL_STACK_UNWIND_FOR_CLIENT(setxattr, xdata, frame, op_ret, op_errno, xdata);
+
+ return 0;
}
void
-pl_entrylk_xattr_fill (xlator_t *this, inode_t *inode,
- dict_t *dict)
+pl_dump_lock(char *str, int size, struct gf_flock *flock, gf_lkowner_t *owner,
+ void *trans, char *conn_id, time_t *granted_time,
+ time_t *blkd_time, gf_boolean_t active)
{
- int32_t count = 0;
- int ret = -1;
+ char *type_str = NULL;
+ char granted[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char blocked[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+
+ if (granted_time)
+ gf_time_fmt(granted, sizeof(granted), *granted_time, gf_timefmt_FT);
+ if (blkd_time)
+ gf_time_fmt(blocked, sizeof(blocked), *blkd_time, gf_timefmt_FT);
+ switch (flock->l_type) {
+ case F_RDLCK:
+ type_str = "READ";
+ break;
+ case F_WRLCK:
+ type_str = "WRITE";
+ break;
+ case F_UNLCK:
+ type_str = "UNLOCK";
+ break;
+ default:
+ type_str = "UNKNOWN";
+ break;
+ }
+
+ if (active) {
+ if (blkd_time && *blkd_time == 0) {
+ snprintf(str, size, RANGE_GRNTD_FMT, type_str, flock->l_whence,
+ (unsigned long long)flock->l_start,
+ (unsigned long long)flock->l_len,
+ (unsigned long long)flock->l_pid, lkowner_utoa(owner),
+ trans, conn_id, granted);
+ } else {
+ snprintf(str, size, RANGE_BLKD_GRNTD_FMT, type_str, flock->l_whence,
+ (unsigned long long)flock->l_start,
+ (unsigned long long)flock->l_len,
+ (unsigned long long)flock->l_pid, lkowner_utoa(owner),
+ trans, conn_id, blocked, granted);
+ }
+ } else {
+ snprintf(str, size, RANGE_BLKD_FMT, type_str, flock->l_whence,
+ (unsigned long long)flock->l_start,
+ (unsigned long long)flock->l_len,
+ (unsigned long long)flock->l_pid, lkowner_utoa(owner), trans,
+ conn_id, blocked);
+ }
+}
- count = get_entrylk_count (this, inode);
- ret = dict_set_int32 (dict, GLUSTERFS_ENTRYLK_COUNT, count);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- " dict_set failed on key %s", GLUSTERFS_ENTRYLK_COUNT);
+void
+__dump_entrylks(pl_inode_t *pl_inode)
+{
+ pl_dom_list_t *dom = NULL;
+ pl_entry_lock_t *lock = NULL;
+ char blocked[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char granted[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ int count = 0;
+ char key[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ char *k = "xlator.feature.locks.lock-dump.domain.entrylk";
+
+ char tmp[4098];
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ count = 0;
+
+ gf_proc_dump_build_key(key, "lock-dump.domain", "domain");
+ gf_proc_dump_write(key, "%s", dom->domain);
+
+ list_for_each_entry(lock, &dom->entrylk_list, domain_list)
+ {
+ gf_time_fmt(granted, sizeof(granted), lock->granted_time,
+ gf_timefmt_FT);
+ gf_proc_dump_build_key(key, k, "entrylk[%d](ACTIVE)", count);
+ if (lock->blkd_time == 0) {
+ snprintf(tmp, sizeof(tmp), ENTRY_GRNTD_FMT,
+ lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK"
+ : "ENTRYLK_WRLCK",
+ lock->basename, (unsigned long long)lock->client_pid,
+ lkowner_utoa(&lock->owner), lock->client,
+ lock->connection_id, granted);
+ } else {
+ gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time,
+ gf_timefmt_FT);
+ snprintf(tmp, sizeof(tmp), ENTRY_BLKD_GRNTD_FMT,
+ lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK"
+ : "ENTRYLK_WRLCK",
+ lock->basename, (unsigned long long)lock->client_pid,
+ lkowner_utoa(&lock->owner), lock->client,
+ lock->connection_id, blocked, granted);
+ }
+
+ gf_proc_dump_write(key, "%s", tmp);
+
+ count++;
}
+ list_for_each_entry(lock, &dom->blocked_entrylks, blocked_locks)
+ {
+ gf_time_fmt(blocked, sizeof(blocked), lock->blkd_time,
+ gf_timefmt_FT);
+
+ gf_proc_dump_build_key(key, k, "entrylk[%d](BLOCKED)", count);
+ snprintf(
+ tmp, sizeof(tmp), ENTRY_BLKD_FMT,
+ lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" : "ENTRYLK_WRLCK",
+ lock->basename, (unsigned long long)lock->client_pid,
+ lkowner_utoa(&lock->owner), lock->client, lock->connection_id,
+ blocked);
+
+ gf_proc_dump_write(key, "%s", tmp);
+
+ count++;
+ }
+ }
}
void
-pl_inodelk_xattr_fill (xlator_t *this, inode_t *inode,
- dict_t *dict)
+dump_entrylks(pl_inode_t *pl_inode)
{
- int32_t count = 0;
- int ret = -1;
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __dump_entrylks(pl_inode);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+}
- count = get_inodelk_count (this, inode);
- ret = dict_set_int32 (dict, GLUSTERFS_INODELK_COUNT, count);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- " dict_set failed on key %s", GLUSTERFS_INODELK_COUNT);
+void
+__dump_inodelks(pl_inode_t *pl_inode)
+{
+ pl_dom_list_t *dom = NULL;
+ pl_inode_lock_t *lock = NULL;
+ int count = 0;
+ char key[GF_DUMP_MAX_BUF_LEN];
+
+ char tmp[4098];
+
+ list_for_each_entry(dom, &pl_inode->dom_list, inode_list)
+ {
+ count = 0;
+
+ gf_proc_dump_build_key(key, "lock-dump.domain", "domain");
+ gf_proc_dump_write(key, "%s", dom->domain);
+
+ list_for_each_entry(lock, &dom->inodelk_list, list)
+ {
+ gf_proc_dump_build_key(key, "inodelk", "inodelk[%d](ACTIVE)",
+ count);
+
+ SET_FLOCK_PID(&lock->user_flock, lock);
+ pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
+ lock->client, lock->connection_id, &lock->granted_time,
+ &lock->blkd_time, _gf_true);
+ gf_proc_dump_write(key, "%s", tmp);
+
+ count++;
+ }
+
+ list_for_each_entry(lock, &dom->blocked_inodelks, blocked_locks)
+ {
+ gf_proc_dump_build_key(key, "inodelk", "inodelk[%d](BLOCKED)",
+ count);
+ SET_FLOCK_PID(&lock->user_flock, lock);
+ pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
+ lock->client, lock->connection_id, 0, &lock->blkd_time,
+ _gf_false);
+ gf_proc_dump_write(key, "%s", tmp);
+
+ count++;
}
+ }
+}
+void
+dump_inodelks(pl_inode_t *pl_inode)
+{
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __dump_inodelks(pl_inode);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
}
void
-pl_posixlk_xattr_fill (xlator_t *this, inode_t *inode,
- dict_t *dict)
+__dump_posixlks(pl_inode_t *pl_inode)
{
- int32_t count = 0;
- int ret = -1;
+ posix_lock_t *lock = NULL;
+ int count = 0;
+ char key[GF_DUMP_MAX_BUF_LEN];
+
+ char tmp[4098];
+
+ list_for_each_entry(lock, &pl_inode->ext_list, list)
+ {
+ SET_FLOCK_PID(&lock->user_flock, lock);
+ gf_proc_dump_build_key(key, "posixlk", "posixlk[%d](%s)", count,
+ lock->blocked ? "BLOCKED" : "ACTIVE");
+ pl_dump_lock(tmp, sizeof(tmp), &lock->user_flock, &lock->owner,
+ lock->client, lock->client_uid, &lock->granted_time,
+ &lock->blkd_time, (lock->blocked) ? _gf_false : _gf_true);
+ gf_proc_dump_write(key, "%s", tmp);
+
+ count++;
+ }
+}
- count = get_posixlk_count (this, inode);
- ret = dict_set_int32 (dict, GLUSTERFS_POSIXLK_COUNT, count);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- " dict_set failed on key %s", GLUSTERFS_POSIXLK_COUNT);
+void
+dump_posixlks(pl_inode_t *pl_inode)
+{
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __dump_posixlks(pl_inode);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+}
+
+int32_t
+pl_dump_inode_priv(xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ uint64_t tmp_pl_inode = 0;
+ pl_inode_t *pl_inode = NULL;
+ char *pathname = NULL;
+ gf_boolean_t section_added = _gf_false;
+
+ int count = 0;
+
+ if (!inode) {
+ errno = EINVAL;
+ goto out;
+ }
+
+ ret = TRY_LOCK(&inode->lock);
+ if (ret)
+ goto out;
+ {
+ ret = __inode_ctx_get(inode, this, &tmp_pl_inode);
+ if (ret)
+ goto unlock;
+ }
+unlock:
+ UNLOCK(&inode->lock);
+ if (ret)
+ goto out;
+
+ pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+ if (!pl_inode) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_proc_dump_add_section("xlator.features.locks.%s.inode", this->name);
+ section_added = _gf_true;
+
+ /*We are safe to call __inode_path since we have the
+ * inode->table->lock */
+ __inode_path(inode, NULL, &pathname);
+ if (pathname)
+ gf_proc_dump_write("path", "%s", pathname);
+
+ gf_proc_dump_write("mandatory", "%d", pl_inode->mandatory);
+
+ ret = pthread_mutex_trylock(&pl_inode->mutex);
+ if (ret)
+ goto out;
+ {
+ count = __get_entrylk_count(this, pl_inode);
+ if (count) {
+ gf_proc_dump_write("entrylk-count", "%d", count);
+ __dump_entrylks(pl_inode);
+ }
+
+ count = __get_inodelk_count(this, pl_inode, NULL);
+ if (count) {
+ gf_proc_dump_write("inodelk-count", "%d", count);
+ __dump_inodelks(pl_inode);
+ }
+
+ count = __get_posixlk_count(pl_inode);
+ if (count) {
+ gf_proc_dump_write("posixlk-count", "%d", count);
+ __dump_posixlks(pl_inode);
}
+ gf_proc_dump_write("links", "%d", pl_inode->links);
+ gf_proc_dump_write("removes_pending", "%u", pl_inode->remove_running);
+ gf_proc_dump_write("removed", "%u", pl_inode->removed);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+out:
+ GF_FREE(pathname);
+
+ if (ret && inode) {
+ if (!section_added)
+ gf_proc_dump_add_section(
+ "xlator.features.locks.%s."
+ "inode",
+ this->name);
+ gf_proc_dump_write("Unable to print lock state",
+ "(Lock "
+ "acquisition failure) %s",
+ uuid_utoa(inode->gfid));
+ }
+ return ret;
}
int32_t
-pl_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *xdata,
- struct iatt *postparent)
+mem_acct_init(xlator_t *this)
{
- pl_local_t *local = NULL;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO (this->name, frame->local, out);
+ if (!this)
+ return ret;
- if (op_ret)
- goto out;
+ ret = xlator_mem_acct_init(this, gf_locks_mt_end + 1);
- local = frame->local;
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+pl_ctx_t *
+pl_ctx_get(client_t *client, xlator_t *xlator)
+{
+ void *tmp = NULL;
+ pl_ctx_t *ctx = NULL;
+ pl_ctx_t *setted_ctx = NULL;
+
+ client_ctx_get(client, xlator, &tmp);
+
+ ctx = tmp;
- if (local->parent_entrylk_req)
- pl_parent_entrylk_xattr_fill (this, local->loc.parent,
- (char*)local->loc.name, xdata);
- if (local->entrylk_count_req)
- pl_entrylk_xattr_fill (this, inode, xdata);
- if (local->inodelk_count_req)
- pl_inodelk_xattr_fill (this, inode, xdata);
- if (local->posixlk_count_req)
- pl_posixlk_xattr_fill (this, inode, xdata);
+ if (ctx != NULL)
+ goto out;
+ ctx = GF_CALLOC(1, sizeof(pl_ctx_t), gf_locks_mt_posix_lock_t);
+ if (ctx == NULL)
+ goto out;
+
+ pthread_mutex_init(&ctx->lock, NULL);
+ INIT_LIST_HEAD(&ctx->inodelk_lockers);
+ INIT_LIST_HEAD(&ctx->entrylk_lockers);
+ INIT_LIST_HEAD(&ctx->metalk_list);
+
+ setted_ctx = client_ctx_set(client, xlator, ctx);
+ if (ctx != setted_ctx) {
+ pthread_mutex_destroy(&ctx->lock);
+ GF_FREE(ctx);
+ ctx = setted_ctx;
+ }
out:
- local = frame->local;
- frame->local = NULL;
-
- if (local != NULL) {
- loc_wipe (&local->loc);
- mem_put (local);
- }
-
- STACK_UNWIND_STRICT (
- lookup,
- frame,
- op_ret,
- op_errno,
- inode,
- buf,
- xdata,
- postparent);
- return 0;
+ return ctx;
}
-int32_t
-pl_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xdata)
+int
+pl_metalk_client_cleanup(xlator_t *this, pl_ctx_t *ctx)
{
- pl_local_t *local = NULL;
- int ret = -1;
+ pl_meta_lock_t *meta_lock = NULL;
+ pl_meta_lock_t *tmp_metalk = NULL;
+ pl_inode_t *pl_inode = NULL;
+ posix_lock_t *posix_lock = NULL;
+ posix_lock_t *tmp_posixlk = NULL;
+ struct list_head tmp_posixlk_list;
+
+ INIT_LIST_HEAD(&tmp_posixlk_list);
+
+ pthread_mutex_lock(&ctx->lock);
+ {
+ /* if this list is empty then pl_inode->metalk_list should be
+ * empty too. meta lock should in all cases be added/removed
+ * from both pl_ctx_t and pl_inode */
+ if (list_empty(&ctx->metalk_list))
+ goto unlock;
+
+ list_for_each_entry_safe(meta_lock, tmp_metalk, &ctx->metalk_list,
+ client_list)
+ {
+ list_del_init(&meta_lock->client_list);
+
+ pl_inode = meta_lock->pl_inode;
- VALIDATE_OR_GOTO (frame, out);
- VALIDATE_OR_GOTO (this, out);
- VALIDATE_OR_GOTO (loc, out);
+ pthread_mutex_lock(&pl_inode->mutex);
- local = mem_get0 (this->local_pool);
- GF_VALIDATE_OR_GOTO (this->name, local, out);
+ {
+ /* Since the migration status is unknown here
+ * unwind all queued and blocked locks to check
+ * migration status and find the correct
+ * destination */
+ __unwind_queued_locks(pl_inode, &tmp_posixlk_list);
- if (xdata) {
- if (dict_get (xdata, GLUSTERFS_ENTRYLK_COUNT))
- local->entrylk_count_req = 1;
- if (dict_get (xdata, GLUSTERFS_INODELK_COUNT))
- local->inodelk_count_req = 1;
- if (dict_get (xdata, GLUSTERFS_POSIXLK_COUNT))
- local->posixlk_count_req = 1;
- if (dict_get (xdata, GLUSTERFS_PARENT_ENTRYLK))
- local->parent_entrylk_req = 1;
+ __unwind_blocked_locks(pl_inode, &tmp_posixlk_list);
+
+ list_del_init(&meta_lock->list);
+
+ pl_metalk_unref(meta_lock);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ /* The corresponding ref is taken in
+ * pl_insert_metalk*/
+ inode_unref(pl_inode->inode);
}
+ }
- frame->local = local;
- loc_copy (&local->loc, loc);
-
- STACK_WIND (frame,
- pl_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc, xdata);
- ret = 0;
-out:
- if (ret == -1)
- STACK_UNWIND_STRICT (lookup, frame, -1, 0, NULL,
- NULL, NULL, NULL);
+unlock:
+ pthread_mutex_unlock(&ctx->lock);
+
+ list_for_each_entry_safe(posix_lock, tmp_posixlk, &tmp_posixlk_list, list)
+ {
+ list_del_init(&posix_lock->list);
+
+ STACK_UNWIND_STRICT(lk, posix_lock->frame, -1, EREMOTE,
+ &posix_lock->user_flock, NULL);
+ __destroy_lock(posix_lock);
+ }
+ return 0;
+}
+
+static int
+pl_client_disconnect_cbk(xlator_t *this, client_t *client)
+{
+ pl_ctx_t *pl_ctx = pl_ctx_get(client, this);
+ if (pl_ctx) {
+ pl_inodelk_client_cleanup(this, pl_ctx);
+ pl_entrylk_client_cleanup(this, pl_ctx);
+ pl_metalk_client_cleanup(this, pl_ctx);
+ }
+
+ return 0;
+}
+
+static int
+pl_client_destroy_cbk(xlator_t *this, client_t *client)
+{
+ void *tmp = NULL;
+ pl_ctx_t *pl_ctx = NULL;
+
+ pl_client_disconnect_cbk(this, client);
+
+ client_ctx_del(client, this, &tmp);
+
+ if (tmp == NULL)
return 0;
+
+ pl_ctx = tmp;
+
+ GF_ASSERT(list_empty(&pl_ctx->inodelk_lockers));
+ GF_ASSERT(list_empty(&pl_ctx->entrylk_lockers));
+
+ pthread_mutex_destroy(&pl_ctx->lock);
+ GF_FREE(pl_ctx);
+
+ return 0;
}
+
int
-pl_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries, dict_t *xdata)
+reconfigure(xlator_t *this, dict_t *options)
{
- pl_local_t *local = NULL;
- gf_dirent_t *entry = NULL;
+ posix_locks_private_t *priv = this->private;
+ int ret = -1;
+ char *tmp_str = NULL;
- local = frame->local;
+ GF_OPTION_RECONF("trace", priv->trace, options, bool, out);
- if (op_ret <= 0)
- goto unwind;
+ GF_OPTION_RECONF("monkey-unlocking", priv->monkey_unlocking, options, bool,
+ out);
- list_for_each_entry (entry, &entries->list, list) {
- if (local->entrylk_count_req)
- pl_entrylk_xattr_fill (this, entry->inode, entry->dict);
- if (local->inodelk_count_req)
- pl_inodelk_xattr_fill (this, entry->inode, entry->dict);
- if (local->posixlk_count_req)
- pl_posixlk_xattr_fill (this, entry->inode, entry->dict);
- }
+ GF_OPTION_RECONF("revocation-secs", priv->revocation_secs, options, uint32,
+ out);
-unwind:
- frame->local = NULL;
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+ GF_OPTION_RECONF("revocation-clear-all", priv->revocation_clear_all,
+ options, bool, out);
- if (local)
- mem_put (local);
+ GF_OPTION_RECONF("revocation-max-blocked", priv->revocation_max_blocked,
+ options, uint32, out);
- return 0;
+ GF_OPTION_RECONF("notify-contention", priv->notify_contention, options,
+ bool, out);
+
+ GF_OPTION_RECONF("notify-contention-delay", priv->notify_contention_delay,
+ options, uint32, out);
+
+ GF_OPTION_RECONF("mandatory-locking", tmp_str, options, str, out);
+
+ GF_OPTION_RECONF("enforce-mandatory-lock", priv->mlock_enforced, options,
+ bool, out);
+
+ if (!strcmp(tmp_str, "forced"))
+ priv->mandatory_mode = MLK_FORCED;
+ else if (!strcmp(tmp_str, "file"))
+ priv->mandatory_mode = MLK_FILE_BASED;
+ else if (!strcmp(tmp_str, "optimal"))
+ priv->mandatory_mode = MLK_OPTIMAL;
+ else
+ priv->mandatory_mode = MLK_NONE;
+
+ ret = 0;
+
+out:
+ return ret;
}
int
-pl_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *dict)
+init(xlator_t *this)
{
- pl_local_t *local = NULL;
+ posix_locks_private_t *priv = NULL;
+ xlator_list_t *trav = NULL;
+ char *tmp_str = NULL;
+ int ret = -1;
- local = mem_get0 (this->local_pool);
- GF_VALIDATE_OR_GOTO (this->name, local, out);
+ if (!this->children || this->children->next) {
+ gf_log(this->name, GF_LOG_CRITICAL,
+ "FATAL: posix-locks should have exactly one child");
+ goto out;
+ }
- if (dict) {
- if (dict_get (dict, GLUSTERFS_ENTRYLK_COUNT))
- local->entrylk_count_req = 1;
- if (dict_get (dict, GLUSTERFS_INODELK_COUNT))
- local->inodelk_count_req = 1;
- if (dict_get (dict, GLUSTERFS_POSIXLK_COUNT))
- local->posixlk_count_req = 1;
- }
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Volume is dangling. Please check the volume file.");
+ }
- frame->local = local;
+ trav = this->children;
+ while (trav->xlator->children)
+ trav = trav->xlator->children;
- STACK_WIND (frame, pl_readdirp_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
- fd, size, offset, dict);
+ if (strncmp("storage/", trav->xlator->type, 8)) {
+ gf_log(this->name, GF_LOG_CRITICAL,
+ "'locks' translator is not loaded over a storage "
+ "translator");
+ goto out;
+ }
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_locks_mt_posix_locks_private_t);
+
+ GF_OPTION_INIT("mandatory-locking", tmp_str, str, out);
+ if (!strcmp(tmp_str, "forced"))
+ priv->mandatory_mode = MLK_FORCED;
+ else if (!strcmp(tmp_str, "file"))
+ priv->mandatory_mode = MLK_FILE_BASED;
+ else if (!strcmp(tmp_str, "optimal"))
+ priv->mandatory_mode = MLK_OPTIMAL;
+ else
+ priv->mandatory_mode = MLK_NONE;
+
+ tmp_str = NULL;
+
+ GF_OPTION_INIT("trace", priv->trace, bool, out);
+
+ GF_OPTION_INIT("monkey-unlocking", priv->monkey_unlocking, bool, out);
+
+ GF_OPTION_INIT("revocation-secs", priv->revocation_secs, uint32, out);
+
+ GF_OPTION_INIT("revocation-clear-all", priv->revocation_clear_all, bool,
+ out);
+
+ GF_OPTION_INIT("revocation-max-blocked", priv->revocation_max_blocked,
+ uint32, out);
+
+ GF_OPTION_INIT("notify-contention", priv->notify_contention, bool, out);
+
+ GF_OPTION_INIT("notify-contention-delay", priv->notify_contention_delay,
+ uint32, out);
+
+ GF_OPTION_INIT("enforce-mandatory-lock", priv->mlock_enforced, bool, out);
+
+ this->local_pool = mem_pool_new(pl_local_t, 32);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto out;
+ }
+
+ this->private = priv;
+ ret = 0;
- return 0;
out:
- STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, NULL, NULL);
- return 0;
+ if (ret) {
+ GF_FREE(priv);
+ }
+ return ret;
}
-
void
-pl_dump_lock (char *str, int size, struct gf_flock *flock,
- gf_lkowner_t *owner, void *trans, time_t *granted_time,
- time_t *blkd_time, gf_boolean_t active)
+fini(xlator_t *this)
{
- char *type_str = NULL;
- char granted[32] = {0,};
- char blocked[32] = {0,};
+ posix_locks_private_t *priv = this->private;
+ if (!priv)
+ return;
+ this->private = NULL;
+ if (this->local_pool) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
+ GF_FREE(priv->brickname);
+ GF_FREE(priv);
+
+ return;
+}
- switch (flock->l_type) {
- case F_RDLCK:
- type_str = "READ";
- break;
- case F_WRLCK:
- type_str = "WRITE";
- break;
- case F_UNLCK:
- type_str = "UNLOCK";
- break;
- default:
- type_str = "UNKNOWN";
- break;
- }
+int
+pl_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata);
- if (active) {
- if (blkd_time && *blkd_time == 0) {
- snprintf (str, size, RANGE_GRNTD_FMT,
- type_str, flock->l_whence,
- (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid,
- lkowner_utoa (owner), trans,
- ctime_r (granted_time, granted));
- } else {
- snprintf (str, size, RANGE_BLKD_GRNTD_FMT,
- type_str, flock->l_whence,
- (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid,
- lkowner_utoa (owner), trans,
- ctime_r (blkd_time, blocked),
- ctime_r (granted_time, granted));
- }
- }
- else {
- snprintf (str, size, RANGE_BLKD_FMT,
- type_str, flock->l_whence,
- (unsigned long long) flock->l_start,
- (unsigned long long) flock->l_len,
- (unsigned long long) flock->l_pid,
- lkowner_utoa (owner), trans,
- ctime_r (blkd_time, blocked));
- }
+int
+pl_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata);
+
+int
+pl_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata);
+int
+pl_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata);
+
+int32_t
+pl_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, struct iatt *preoldparent,
+ struct iatt *postoldparent, struct iatt *prenewparent,
+ struct iatt *postnewparent, dict_t *xdata)
+{
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
+
+ PL_STACK_UNWIND(rename, xdata, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
+
+ return 0;
}
-void
-__dump_entrylks (pl_inode_t *pl_inode)
-{
- pl_dom_list_t *dom = NULL;
- pl_entry_lock_t *lock = NULL;
- char blocked[32] = {0,};
- char granted[32] = {0,};
- int count = 0;
- char key[GF_DUMP_MAX_BUF_LEN] = {0,};
-
- char tmp[256];
-
- list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
-
- count = 0;
-
- gf_proc_dump_build_key(key,
- "lock-dump.domain",
- "domain");
- gf_proc_dump_write(key, "%s", dom->domain);
-
- list_for_each_entry (lock, &dom->entrylk_list, domain_list) {
-
- gf_proc_dump_build_key(key,
- "xlator.feature.locks.lock-dump.domain.entrylk",
- "entrylk[%d](ACTIVE)", count );
- if (lock->blkd_time.tv_sec == 0 && lock->blkd_time.tv_usec == 0) {
- snprintf (tmp, 256, ENTRY_GRNTD_FMT,
- lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
- "ENTRYLK_WRLCK", lock->basename,
- (unsigned long long) lock->client_pid,
- lkowner_utoa (&lock->owner), lock->trans,
- ctime_r (&lock->granted_time.tv_sec, granted));
- } else {
- snprintf (tmp, 256, ENTRY_BLKD_GRNTD_FMT,
- lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
- "ENTRYLK_WRLCK", lock->basename,
- (unsigned long long) lock->client_pid,
- lkowner_utoa (&lock->owner), lock->trans,
- ctime_r (&lock->blkd_time.tv_sec, blocked),
- ctime_r (&lock->granted_time.tv_sec, granted));
- }
+int32_t
+pl_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int32_t error;
- gf_proc_dump_write(key, tmp);
+ error = PL_INODE_REMOVE(rename, frame, this, oldloc, newloc, pl_rename,
+ pl_rename_cbk, oldloc, newloc, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(rename, frame, -1, error, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ }
- count++;
- }
+ return 0;
+}
- list_for_each_entry (lock, &dom->blocked_entrylks, blocked_locks) {
+posix_lock_t *
+gf_lkmig_info_to_posix_lock(call_frame_t *frame, lock_migration_info_t *lmi)
+{
+ posix_lock_t *lock = GF_CALLOC(1, sizeof(posix_lock_t),
+ gf_locks_mt_posix_lock_t);
+ if (!lock)
+ goto out;
- gf_proc_dump_build_key(key,
- "xlator.feature.locks.lock-dump.domain.entrylk",
- "entrylk[%d](BLOCKED)", count );
- snprintf (tmp, 256, ENTRY_BLKD_FMT,
- lock->type == ENTRYLK_RDLCK ? "ENTRYLK_RDLCK" :
- "ENTRYLK_WRLCK", lock->basename,
- (unsigned long long) lock->client_pid,
- lkowner_utoa (&lock->owner), lock->trans,
- ctime_r (&lock->blkd_time.tv_sec, blocked));
+ lock->fl_start = lmi->flock.l_start;
+ lock->fl_type = lmi->flock.l_type;
- gf_proc_dump_write(key, tmp);
+ if (lmi->flock.l_len == 0)
+ lock->fl_end = LLONG_MAX;
+ else
+ lock->fl_end = lmi->flock.l_start + lmi->flock.l_len - 1;
- count++;
- }
+ lock->client = frame->root->client;
- }
+ lock->lk_flags = lmi->lk_flags;
+
+ lock->client_uid = gf_strdup(lmi->client_uid);
+ if (lock->client_uid == NULL) {
+ GF_FREE(lock);
+ lock = NULL;
+ goto out;
+ }
+ lock->client_pid = lmi->flock.l_pid;
+ lock->owner = lmi->flock.l_owner;
+
+ INIT_LIST_HEAD(&lock->list);
+
+out:
+ return lock;
}
-void
-dump_entrylks (pl_inode_t *pl_inode)
+/* This function is supposed to write the active locks from the source brick(in
+ * rebalance context) and write here. Hence, will add the locks directly to the
+ * pl_inode->ext_list*/
+int
+pl_write_active_locks(call_frame_t *frame, pl_inode_t *pl_inode,
+ lock_migration_info_t *locklist)
{
- pthread_mutex_lock (&pl_inode->mutex);
+ posix_lock_t *newlock = NULL;
+ lock_migration_info_t *temp = NULL;
+ int ret = 0;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ /* Just making sure the activelk list is empty. Should not
+ * happen though*/
+ if (!list_empty(&pl_inode->ext_list)) {
+ pthread_mutex_unlock(&pl_inode->mutex);
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "invalid locks found");
+
+ ret = -1;
+ goto out;
+ }
+
+ /* This list also should not be empty */
+ if (list_empty(&locklist->list)) {
+ pthread_mutex_unlock(&pl_inode->mutex);
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, 0, "empty lock list");
+
+ ret = -1;
+ goto out;
+ }
+
+ list_for_each_entry(temp, &locklist->list, list)
{
- __dump_entrylks (pl_inode);
+ newlock = gf_lkmig_info_to_posix_lock(frame, temp);
+ if (!newlock) {
+ pthread_mutex_unlock(&pl_inode->mutex);
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, 0,
+ "mem allocation failed for newlock");
+
+ ret = -1;
+ goto out;
+ }
+ list_add_tail(&newlock->list, &pl_inode->ext_list);
}
- pthread_mutex_unlock (&pl_inode->mutex);
+ }
+ /*TODO: What if few lock add failed with ENOMEM. Should the already
+ * added locks be clearted */
+ pthread_mutex_unlock(&pl_inode->mutex);
+out:
+ return ret;
+}
+
+static int
+pl_setactivelk(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ lock_migration_info_t *locklist, dict_t *xdata)
+{
+ int op_ret = 0;
+ int op_errno = 0;
+ int ret = 0;
+
+ pl_inode_t *pl_inode = pl_inode_get(this, loc->inode, NULL);
+ if (!pl_inode) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, 0, "pl_inode_get failed");
+
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
+ ret = pl_write_active_locks(frame, pl_inode, locklist);
+
+ op_ret = ret;
+out:
+ STACK_UNWIND_STRICT(setactivelk, frame, op_ret, op_errno, NULL);
+
+ return 0;
}
-void
-__dump_inodelks (pl_inode_t *pl_inode)
+int32_t
+pl_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- pl_dom_list_t *dom = NULL;
- pl_inode_lock_t *lock = NULL;
- int count = 0;
- char key[GF_DUMP_MAX_BUF_LEN];
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
- char tmp[256];
+ PL_STACK_UNWIND(unlink, xdata, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
- list_for_each_entry (dom, &pl_inode->dom_list, inode_list) {
+ return 0;
+}
- count = 0;
+int32_t
+pl_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ int32_t error;
- gf_proc_dump_build_key(key,
- "lock-dump.domain",
- "domain");
- gf_proc_dump_write(key, "%s", dom->domain);
+ error = PL_INODE_REMOVE(unlink, frame, this, loc, NULL, pl_unlink,
+ pl_unlink_cbk, loc, xflag, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(unlink, frame, -1, error, NULL, NULL, NULL);
+ }
- list_for_each_entry (lock, &dom->inodelk_list, list) {
+ return 0;
+}
- gf_proc_dump_build_key(key,
- "inodelk",
- "inodelk[%d](ACTIVE)",count );
+int32_t
+pl_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ PL_STACK_UNWIND_FOR_CLIENT(mkdir, xdata, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
- SET_FLOCK_PID (&lock->user_flock, lock);
- pl_dump_lock (tmp, 256, &lock->user_flock,
- &lock->owner,
- lock->transport,
- &lock->granted_time.tv_sec,
- &lock->blkd_time.tv_sec,
- _gf_true);
- gf_proc_dump_write(key, tmp);
+int
+pl_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+ STACK_WIND(frame, pl_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ return 0;
+}
- count++;
- }
+int32_t
+pl_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ PL_STACK_UNWIND_FOR_CLIENT(stat, xdata, frame, op_ret, op_errno, buf,
+ xdata);
+ return 0;
+}
- list_for_each_entry (lock, &dom->blocked_inodelks, blocked_locks) {
+int
+pl_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+ STACK_WIND(frame, pl_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
- gf_proc_dump_build_key(key,
- "inodelk",
- "inodelk[%d](BLOCKED)",count );
- SET_FLOCK_PID (&lock->user_flock, lock);
- pl_dump_lock (tmp, 256, &lock->user_flock,
- &lock->owner,
- lock->transport,
- 0, &lock->blkd_time.tv_sec,
- _gf_false);
- gf_proc_dump_write(key, tmp);
+int32_t
+pl_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ PL_STACK_UNWIND_FOR_CLIENT(mknod, xdata, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
- count++;
- }
+int
+pl_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+ STACK_WIND(frame, pl_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
+ return 0;
+}
- }
+int32_t
+pl_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ pl_inode_remove_cbk(this, cookie, op_ret < 0 ? op_errno : 0);
+
+ PL_STACK_UNWIND_FOR_CLIENT(rmdir, xdata, frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+ return 0;
}
-void
-dump_inodelks (pl_inode_t *pl_inode)
+int
+pl_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
{
- pthread_mutex_lock (&pl_inode->mutex);
- {
- __dump_inodelks (pl_inode);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ int32_t error;
+
+ error = PL_INODE_REMOVE(rmdir, frame, this, loc, NULL, pl_rmdir,
+ pl_rmdir_cbk, loc, xflags, xdata);
+ if (error > 0) {
+ STACK_UNWIND_STRICT(rmdir, frame, -1, error, NULL, NULL, NULL);
+ }
+ return 0;
}
-void
-__dump_posixlks (pl_inode_t *pl_inode)
+int32_t
+pl_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- posix_lock_t *lock = NULL;
- int count = 0;
- char key[GF_DUMP_MAX_BUF_LEN];
+ PL_STACK_UNWIND_FOR_CLIENT(symlink, xdata, frame, op_ret, op_errno, inode,
+ buf, preparent, postparent, xdata);
+ return 0;
+}
- char tmp[256];
+int
+pl_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+ STACK_WIND(frame, pl_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata);
+ return 0;
+}
- list_for_each_entry (lock, &pl_inode->ext_list, list) {
+int32_t
+pl_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ pl_inode_t *pl_inode = (pl_inode_t *)cookie;
- SET_FLOCK_PID (&lock->user_flock, lock);
- gf_proc_dump_build_key(key,
- "posixlk",
- "posixlk[%d](%s)",
- count,
- lock->blocked ? "BLOCKED" : "ACTIVE");
- pl_dump_lock (tmp, 256, &lock->user_flock,
- &lock->owner, lock->transport,
- &lock->granted_time.tv_sec, &lock->blkd_time.tv_sec,
- (lock->blocked)? _gf_false: _gf_true);
- gf_proc_dump_write(key, tmp);
+ if (op_ret >= 0) {
+ pthread_mutex_lock(&pl_inode->mutex);
- count++;
+ /* TODO: can happen pl_inode->links == 0 ? */
+ if (pl_inode->links >= 0) {
+ pl_inode->links++;
}
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
+ PL_STACK_UNWIND_FOR_CLIENT(link, xdata, frame, op_ret, op_errno, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+int
+pl_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ pl_inode_t *pl_inode;
+
+ pl_inode = pl_inode_get(this, oldloc->inode, NULL);
+ if (pl_inode == NULL) {
+ STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
+
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), oldloc, newloc);
+ STACK_WIND_COOKIE(frame, pl_link_cbk, pl_inode, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
}
-void
-dump_posixlks (pl_inode_t *pl_inode)
+int32_t
+pl_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- pthread_mutex_lock (&pl_inode->mutex);
- {
- __dump_posixlks (pl_inode);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
+ PL_STACK_UNWIND_FOR_CLIENT(fsync, xdata, frame, op_ret, op_errno, prebuf,
+ postbuf, xdata);
+ return 0;
+}
+int
+pl_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+ STACK_WIND(frame, pl_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
}
int32_t
-pl_dump_inode_priv (xlator_t *this, inode_t *inode)
+pl_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
+ PL_STACK_UNWIND_FOR_CLIENT(readdir, xdata, frame, op_ret, op_errno, entries,
+ xdata);
+ return 0;
+}
- int ret = -1;
- uint64_t tmp_pl_inode = 0;
- pl_inode_t *pl_inode = NULL;
- char *pathname = NULL;
+int
+pl_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+ STACK_WIND(frame, pl_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
+ return 0;
+}
- int count = 0;
+int32_t
+pl_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ PL_STACK_UNWIND_FOR_CLIENT(fsyncdir, xdata, frame, op_ret, op_errno, xdata);
+ return 0;
+}
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
+int
+pl_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+ STACK_WIND(frame, pl_fsyncdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir, fd, datasync, xdata);
+ return 0;
+}
- ret = inode_ctx_get (inode, this, &tmp_pl_inode);
+int32_t
+pl_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct statvfs *buf, dict_t *xdata)
+{
+ PL_STACK_UNWIND_FOR_CLIENT(statfs, xdata, frame, op_ret, op_errno, buf,
+ xdata);
+ return 0;
+}
- if (ret != 0)
- goto out;
+int
+pl_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+ STACK_WIND(frame, pl_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+}
- pl_inode = (pl_inode_t *)(long)tmp_pl_inode;
+int32_t
+pl_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
+ local = frame->local;
+ if (local && local->update_mlock_enforced_flag && op_ret != -1) {
+ pl_inode = pl_inode_get(this, local->inode, NULL);
if (!pl_inode) {
- ret = -1;
- goto out;
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
}
- gf_proc_dump_add_section("xlator.features.locks.%s.inode", this->name);
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ pl_inode->mlock_enforced = _gf_false;
+ pl_inode->check_mlock_info = _gf_false;
+ pl_inode->track_fop_wind_count = _gf_true;
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
- /*We are safe to call __inode_path since we have the
- * inode->table->lock */
- __inode_path (inode, NULL, &pathname);
- if (pathname)
- gf_proc_dump_write ("path", "%s", pathname);
+unwind:
+ PL_STACK_UNWIND_FOR_CLIENT(removexattr, xdata, frame, op_ret, op_errno,
+ xdata);
+ return 0;
+}
- gf_proc_dump_write("mandatory", "%d", pl_inode->mandatory);
+int
+pl_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int op_ret = 0;
+ int op_errno = EINVAL;
+ posix_locks_private_t *priv = this->private;
- count = get_entrylk_count (this, inode);
- if (count) {
- gf_proc_dump_write("entrylk-count", "%d", count);
- dump_entrylks(pl_inode);
- }
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
- count = get_inodelk_count (this, inode);
- if (count) {
- gf_proc_dump_write("inodelk-count", "%d", count);
- dump_inodelks(pl_inode);
- }
+ PL_CHECK_LOCK_ENFORCE_KEY(frame, ((dict_t *)NULL), name, this, loc,
+ ((fd_t *)NULL), priv);
- count = get_posixlk_count (this, inode);
- if (count) {
- gf_proc_dump_write("posixlk-count", "%d", count);
- dump_posixlks(pl_inode);
- }
+ STACK_WIND(frame, pl_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
-out:
- if (pathname)
- GF_FREE (pathname);
+unwind:
+ PL_STACK_UNWIND_FOR_CLIENT(removexattr, xdata, frame, op_ret, op_errno,
+ NULL);
- return ret;
+ return 0;
}
int32_t
-mem_acct_init (xlator_t *this)
+pl_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- int ret = -1;
-
- if (!this)
- return ret;
+ pl_local_t *local = NULL;
+ pl_inode_t *pl_inode = NULL;
- ret = xlator_mem_acct_init (this, gf_locks_mt_end + 1);
+ local = frame->local;
+ if (local && local->update_mlock_enforced_flag && op_ret != -1) {
+ pl_inode = pl_inode_get(this, local->inode, NULL);
+ if (!pl_inode) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ pl_inode->mlock_enforced = _gf_false;
+ pl_inode->check_mlock_info = _gf_false;
}
+ pthread_mutex_unlock(&pl_inode->mutex);
+ }
- return ret;
+unwind:
+ PL_STACK_UNWIND_FOR_CLIENT(fremovexattr, xdata, frame, op_ret, op_errno,
+ xdata);
+ return 0;
}
int
-init (xlator_t *this)
+pl_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
{
- posix_locks_private_t *priv = NULL;
- xlator_list_t *trav = NULL;
- data_t *mandatory = NULL;
- data_t *trace = NULL;
- int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ posix_locks_private_t *priv = this->private;
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "FATAL: posix-locks should have exactly one child");
- goto out;
- }
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling. Please check the volume file.");
- }
+ PL_CHECK_LOCK_ENFORCE_KEY(frame, ((dict_t *)NULL), name, this,
+ ((loc_t *)NULL), fd, priv);
- trav = this->children;
- while (trav->xlator->children)
- trav = trav->xlator->children;
+ STACK_WIND(frame, pl_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ return 0;
- if (strncmp ("storage/", trav->xlator->type, 8)) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "'locks' translator is not loaded over a storage "
- "translator");
- goto out;;
- }
+unwind:
+ PL_STACK_UNWIND_FOR_CLIENT(fremovexattr, xdata, frame, op_ret, op_errno,
+ NULL);
+ return 0;
+}
- priv = GF_CALLOC (1, sizeof (*priv),
- gf_locks_mt_posix_locks_private_t);
+int32_t
+pl_rchecksum_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_cksum,
+ uint8_t *strong_cksum, dict_t *xdata)
+{
+ PL_STACK_UNWIND_FOR_CLIENT(rchecksum, xdata, frame, op_ret, op_errno,
+ weak_cksum, strong_cksum, xdata);
+ return 0;
+}
- mandatory = dict_get (this->options, "mandatory-locks");
- if (mandatory)
- gf_log (this->name, GF_LOG_WARNING,
- "mandatory locks not supported in this minor release.");
+int
+pl_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+ STACK_WIND(frame, pl_rchecksum_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rchecksum, fd, offset, len, xdata);
+ return 0;
+}
- trace = dict_get (this->options, "trace");
- if (trace) {
- if (gf_string2boolean (trace->data,
- &priv->trace) == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "'trace' takes on only boolean values.");
- goto out;
- }
- }
+int32_t
+pl_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ PL_STACK_UNWIND_FOR_CLIENT(xattrop, xdata, frame, op_ret, op_errno, dict,
+ xdata);
+ return 0;
+}
- this->local_pool = mem_pool_new (pl_local_t, 32);
- if (!this->local_pool) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto out;
- }
+int
+pl_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+ STACK_WIND(frame, pl_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, optype, xattr, xdata);
+ return 0;
+}
- this->private = priv;
- ret = 0;
+int32_t
+pl_fxattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ PL_STACK_UNWIND_FOR_CLIENT(fxattrop, xdata, frame, op_ret, op_errno, dict,
+ xdata);
+ return 0;
+}
-out:
- if (ret) {
- if (priv)
- GF_FREE (priv);
- }
- return ret;
+int
+pl_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+ STACK_WIND(frame, pl_fxattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, optype, xattr, xdata);
+ return 0;
}
+int32_t
+pl_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ PL_STACK_UNWIND_FOR_CLIENT(setattr, xdata, frame, op_ret, op_errno, statpre,
+ statpost, xdata);
+ return 0;
+}
int
-fini (xlator_t *this)
+pl_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
{
- posix_locks_private_t *priv = NULL;
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+ STACK_WIND(frame, pl_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
+}
- priv = this->private;
- if (!priv)
- return 0;
- this->private = NULL;
- GF_FREE (priv);
+int32_t
+pl_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ PL_STACK_UNWIND_FOR_CLIENT(fsetattr, xdata, frame, op_ret, op_errno,
+ statpre, statpost, xdata);
+ return 0;
+}
- return 0;
+int
+pl_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+ STACK_WIND(frame, pl_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
}
+int32_t
+pl_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ PL_STACK_UNWIND_FOR_CLIENT(fallocate, xdata, frame, op_ret, op_errno, pre,
+ post, xdata);
+ return 0;
+}
int
-pl_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *flock,
- dict_t *xdata);
+pl_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+ STACK_WIND(frame, pl_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset, len,
+ xdata);
+ return 0;
+}
-int
-pl_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *flock,
- dict_t *xdata);
+int32_t
+pl_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
+{
+ PL_STACK_UNWIND_FOR_CLIENT(readlink, xdata, frame, op_ret, op_errno, path,
+ buf, xdata);
+ return 0;
+}
int
-pl_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+pl_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+ STACK_WIND(frame, pl_readlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
+ return 0;
+}
+
+int32_t
+pl_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ PL_STACK_UNWIND_FOR_CLIENT(access, xdata, frame, op_ret, op_errno, xdata);
+ return 0;
+}
int
-pl_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata);
+pl_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, ((fd_t *)NULL), loc, NULL);
+ STACK_WIND(frame, pl_access_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access, loc, mask, xdata);
+ return 0;
+}
+
+int32_t
+pl_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, off_t offset, dict_t *xdata)
+{
+ PL_STACK_UNWIND_FOR_CLIENT(seek, xdata, frame, op_ret, op_errno, offset,
+ xdata);
+ return 0;
+}
+
+int32_t
+pl_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ PL_LOCAL_GET_REQUESTS(frame, this, xdata, fd, NULL, NULL);
+ STACK_WIND(frame, pl_seek_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->seek, fd, offset, what, xdata);
+ return 0;
+}
struct xlator_fops fops = {
- .lookup = pl_lookup,
- .create = pl_create,
- .truncate = pl_truncate,
- .ftruncate = pl_ftruncate,
- .open = pl_open,
- .readv = pl_readv,
- .writev = pl_writev,
- .lk = pl_lk,
- .inodelk = pl_inodelk,
- .finodelk = pl_finodelk,
- .entrylk = pl_entrylk,
- .fentrylk = pl_fentrylk,
- .flush = pl_flush,
- .opendir = pl_opendir,
- .readdirp = pl_readdirp,
- .getxattr = pl_getxattr,
+ .lookup = pl_lookup,
+ .create = pl_create,
+ .fstat = pl_fstat,
+ .truncate = pl_truncate,
+ .ftruncate = pl_ftruncate,
+ .discard = pl_discard,
+ .zerofill = pl_zerofill,
+ .open = pl_open,
+ .readv = pl_readv,
+ .writev = pl_writev,
+ .lk = pl_lk,
+ .inodelk = pl_inodelk,
+ .finodelk = pl_finodelk,
+ .entrylk = pl_entrylk,
+ .fentrylk = pl_fentrylk,
+ .flush = pl_flush,
+ .opendir = pl_opendir,
+ .readdirp = pl_readdirp,
+ .setxattr = pl_setxattr,
+ .fsetxattr = pl_fsetxattr,
+ .getxattr = pl_getxattr,
+ .fgetxattr = pl_fgetxattr,
+ .removexattr = pl_removexattr,
+ .fremovexattr = pl_fremovexattr,
+ .rename = pl_rename,
+ .getactivelk = pl_getactivelk,
+ .setactivelk = pl_setactivelk,
+ .unlink = pl_unlink,
+ .access = pl_access,
+ .readlink = pl_readlink,
+ .fallocate = pl_fallocate,
+ .fsetattr = pl_fsetattr,
+ .setattr = pl_setattr,
+ .fxattrop = pl_fxattrop,
+ .xattrop = pl_xattrop,
+ .rchecksum = pl_rchecksum,
+ .statfs = pl_statfs,
+ .fsyncdir = pl_fsyncdir,
+ .readdir = pl_readdir,
+ .symlink = pl_symlink,
+ .link = pl_link,
+ .rmdir = pl_rmdir,
+ .mknod = pl_mknod,
+ .stat = pl_stat,
+ .seek = pl_seek,
};
struct xlator_dumpops dumpops = {
- .inodectx = pl_dump_inode_priv,
+ .inodectx = pl_dump_inode_priv,
};
struct xlator_cbks cbks = {
- .forget = pl_forget,
- .release = pl_release,
- .releasedir = pl_releasedir,
+ .forget = pl_forget,
+ .release = pl_release,
+ .releasedir = pl_releasedir,
+ .client_destroy = pl_client_destroy_cbk,
+ .client_disconnect = pl_client_disconnect_cbk,
};
-
struct volume_options options[] = {
- { .key = { "mandatory-locks", "mandatory" },
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = { "trace" },
- .type = GF_OPTION_TYPE_BOOL
- },
- { .key = {NULL} },
+ {.key = {"mandatory-locking"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "off",
+ .op_version = {GD_OP_VERSION_3_8_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"locks"},
+ .description = "Specifies the mandatory-locking mode. Valid options "
+ "are 'file' to use linux style mandatory locks, "
+ "'forced' to use volume strictly under mandatory lock "
+ "semantics only and 'optimal' to treat advisory and "
+ "mandatory locks separately on their own."},
+ {.key = {"trace"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"locks"},
+ .description = "Trace the different lock requests "
+ "to logs."},
+ {.key = {"monkey-unlocking"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"locks"},
+ .description = "Ignore a random number of unlock requests. Useful "
+ "for testing/creating robust lock recovery mechanisms."},
+ {
+ .key = {"revocation-secs"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "0",
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"locks"},
+ .description = "Maximum time a lock can be taken out, before"
+ "being revoked.",
+ },
+ {
+ .key = {"revocation-clear-all"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "false",
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"locks"},
+ .description = "If set to true, will revoke BOTH granted and blocked "
+ "(pending) lock requests if a revocation threshold is "
+ "hit.",
+ },
+ {.key = {"revocation-max-blocked"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0,
+ .max = INT_MAX,
+ .default_value = "0",
+ .op_version = {GD_OP_VERSION_3_9_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"locks"},
+ .description = "A number of blocked lock requests after which a lock "
+ "will be revoked to allow the others to proceed. Can "
+ "be used in conjunction w/ revocation-clear-all."},
+ {.key = {"notify-contention"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "yes",
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .tags = {"locks", "contention"},
+ .description = "When this option is enabled and a lock request "
+ "conflicts with a currently granted lock, an upcall "
+ "notification will be sent to the current owner of "
+ "the lock to request it to be released as soon as "
+ "possible."},
+ {.key = {"notify-contention-delay"},
+ .type = GF_OPTION_TYPE_INT,
+ .min = 0, /* An upcall notification is sent every time a conflict is
+ * detected. */
+ .max = 60,
+ .default_value = "5",
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .tags = {"locks", "contention", "timeout"},
+ .description = "This value determines the minimum amount of time "
+ "(in seconds) between upcall contention notifications "
+ "on the same inode. If multiple lock requests are "
+ "received during this period, only one upcall will "
+ "be sent."},
+ {.key = {"enforce-mandatory-lock"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .flags = OPT_FLAG_SETTABLE,
+ .op_version = {GD_OP_VERSION_6_0},
+ .description = "option to enable lock enforcement"},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "locks",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/features/locks/src/reservelk.c b/xlators/features/locks/src/reservelk.c
index 11b724bc562..604691fd887 100644
--- a/xlators/features/locks/src/reservelk.c
+++ b/xlators/features/locks/src/reservelk.c
@@ -1,453 +1,382 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "inode.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/list.h>
#include "locks.h"
#include "common.h"
-void
-__delete_reserve_lock (posix_lock_t *lock)
-{
- list_del (&lock->list);
-}
-
-void
-__destroy_reserve_lock (posix_lock_t *lock)
-{
- GF_FREE (lock);
-}
-
/* Return true if the two reservelks have exactly same lock boundaries */
int
-reservelks_equal (posix_lock_t *l1, posix_lock_t *l2)
+reservelks_equal(posix_lock_t *l1, posix_lock_t *l2)
{
- if ((l1->fl_start == l2->fl_start) &&
- (l1->fl_end == l2->fl_end))
- return 1;
+ if ((l1->fl_start == l2->fl_start) && (l1->fl_end == l2->fl_end))
+ return 1;
- return 0;
+ return 0;
}
/* Determine if lock is grantable or not */
static posix_lock_t *
-__reservelk_grantable (pl_inode_t *pl_inode, posix_lock_t *lock)
+__reservelk_grantable(pl_inode_t *pl_inode, posix_lock_t *lock)
{
- xlator_t *this = NULL;
- posix_lock_t *l = NULL;
- posix_lock_t *ret_lock = NULL;
-
- this = THIS;
-
- if (list_empty (&pl_inode->reservelk_list)) {
- gf_log (this->name, GF_LOG_TRACE,
- "No reservelks in list");
- goto out;
- }
- list_for_each_entry (l, &pl_inode->reservelk_list, list){
- if (reservelks_equal (lock, l)) {
- ret_lock = l;
- break;
- }
+ xlator_t *this = THIS;
+ posix_lock_t *l = NULL;
+ posix_lock_t *ret_lock = NULL;
+
+ if (list_empty(&pl_inode->reservelk_list)) {
+ gf_log(this->name, GF_LOG_TRACE, "No reservelks in list");
+ goto out;
+ }
+ list_for_each_entry(l, &pl_inode->reservelk_list, list)
+ {
+ if (reservelks_equal(lock, l)) {
+ ret_lock = l;
+ break;
}
+ }
out:
- return ret_lock;
+ return ret_lock;
}
-static inline int
-__same_owner_reservelk (posix_lock_t *l1, posix_lock_t *l2)
+static int
+__same_owner_reservelk(posix_lock_t *l1, posix_lock_t *l2)
{
- return (is_same_lkowner (&l1->owner, &l2->owner));
-
+ return (is_same_lkowner(&l1->owner, &l2->owner));
}
static posix_lock_t *
-__matching_reservelk (pl_inode_t *pl_inode, posix_lock_t *lock)
+__matching_reservelk(pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *l = NULL;
+ posix_lock_t *l = NULL;
- if (list_empty (&pl_inode->reservelk_list)) {
- gf_log ("posix-locks", GF_LOG_TRACE,
- "reservelk list empty");
- return NULL;
- }
+ if (list_empty(&pl_inode->reservelk_list)) {
+ gf_log("posix-locks", GF_LOG_TRACE, "reservelk list empty");
+ return NULL;
+ }
- list_for_each_entry (l, &pl_inode->reservelk_list, list) {
- if (reservelks_equal (l, lock)) {
- gf_log ("posix-locks", GF_LOG_TRACE,
- "equal reservelk found");
- break;
- }
+ list_for_each_entry(l, &pl_inode->reservelk_list, list)
+ {
+ if (reservelks_equal(l, lock)) {
+ gf_log("posix-locks", GF_LOG_TRACE, "equal reservelk found");
+ break;
}
+ }
- return l;
+ return l;
}
static int
-__reservelk_conflict (xlator_t *this, pl_inode_t *pl_inode,
- posix_lock_t *lock)
+__reservelk_conflict(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *conf = NULL;
- int ret = 0;
-
- conf = __matching_reservelk (pl_inode, lock);
- if (conf) {
- gf_log (this->name, GF_LOG_TRACE,
- "Matching reservelk found");
- if (__same_owner_reservelk (lock, conf)) {
- list_del_init (&conf->list);
- gf_log (this->name, GF_LOG_TRACE,
- "Removing the matching reservelk for setlk to progress");
- GF_FREE (conf);
- ret = 0;
- } else {
- gf_log (this->name, GF_LOG_TRACE,
- "Conflicting reservelk found");
- ret = 1;
- }
-
+ int ret = 0;
+
+ posix_lock_t *conf = __matching_reservelk(pl_inode, lock);
+ if (conf) {
+ gf_log(this->name, GF_LOG_TRACE, "Matching reservelk found");
+ if (__same_owner_reservelk(lock, conf)) {
+ list_del_init(&conf->list);
+ gf_log(this->name, GF_LOG_TRACE,
+ "Removing the matching reservelk for setlk to progress");
+ __destroy_lock(conf);
+ ret = 0;
+ } else {
+ gf_log(this->name, GF_LOG_TRACE, "Conflicting reservelk found");
+ ret = 1;
}
- return ret;
-
+ }
+ return ret;
}
int
-pl_verify_reservelk (xlator_t *this, pl_inode_t *pl_inode,
- posix_lock_t *lock, int can_block)
+pl_verify_reservelk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ const int can_block)
{
- int ret = 0;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- if (__reservelk_conflict (this, pl_inode, lock)) {
- gf_log (this->name, GF_LOG_TRACE,
- "Found conflicting reservelk. Blocking until reservelk is unlocked.");
- lock->blocked = can_block;
- list_add_tail (&lock->list, &pl_inode->blocked_calls);
- ret = -1;
- goto unlock;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "no conflicting reservelk found. Call continuing");
- ret = 0;
-
+ int ret = 0;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ if (__reservelk_conflict(this, pl_inode, lock)) {
+ lock->blocked = can_block;
+ list_add_tail(&lock->list, &pl_inode->blocked_calls);
+ pthread_mutex_unlock(&pl_inode->mutex);
+ gf_log(this->name, GF_LOG_TRACE,
+ "Found conflicting reservelk. Blocking until reservelk is "
+ "unlocked.");
+ ret = -1;
+ goto out;
}
-unlock:
- pthread_mutex_unlock (&pl_inode->mutex);
-
- return ret;
-
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+ gf_log(this->name, GF_LOG_TRACE,
+ "no conflicting reservelk found. Call continuing");
+ ret = 0;
+out:
+ return ret;
}
-
/* Determines if lock can be granted and adds the lock. If the lock
* is blocking, adds it to the blocked_reservelks.
*/
static int
-__lock_reservelk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
- int can_block)
+__lock_reservelk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ const int can_block)
{
- posix_lock_t *conf = NULL;
- int ret = -EINVAL;
+ int ret = -EINVAL;
- conf = __reservelk_grantable (pl_inode, lock);
- if (conf){
- ret = -EAGAIN;
- if (can_block == 0)
- goto out;
+ posix_lock_t *conf = __reservelk_grantable(pl_inode, lock);
+ if (conf) {
+ ret = -EAGAIN;
+ if (can_block == 0)
+ goto out;
- list_add_tail (&lock->list, &pl_inode->blocked_reservelks);
+ list_add_tail(&lock->list, &pl_inode->blocked_reservelks);
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) lk-owner:%s %"PRId64" - %"PRId64" => Blocked",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->user_flock.l_start,
- lock->user_flock.l_len);
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) lk-owner:%s %" PRId64 " - %" PRId64 " => Blocked",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
+ lkowner_utoa(&lock->owner), lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ goto out;
+ }
- goto out;
- }
-
- list_add (&lock->list, &pl_inode->reservelk_list);
+ list_add(&lock->list, &pl_inode->reservelk_list);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
static posix_lock_t *
-find_matching_reservelk (posix_lock_t *lock, pl_inode_t *pl_inode)
+find_matching_reservelk(posix_lock_t *lock, pl_inode_t *pl_inode)
{
- posix_lock_t *l = NULL;
- list_for_each_entry (l, &pl_inode->reservelk_list, list) {
- if (reservelks_equal (l, lock))
- return l;
- }
- return NULL;
+ posix_lock_t *l = NULL;
+ list_for_each_entry(l, &pl_inode->reservelk_list, list)
+ {
+ if (reservelks_equal(l, lock))
+ return l;
+ }
+ return NULL;
}
/* Set F_UNLCK removes a lock which has the exact same lock boundaries
* as the UNLCK lock specifies. If such a lock is not found, returns invalid
*/
static posix_lock_t *
-__reserve_unlock_lock (xlator_t *this, posix_lock_t *lock, pl_inode_t *pl_inode)
+__reserve_unlock_lock(xlator_t *this, posix_lock_t *lock, pl_inode_t *pl_inode)
{
-
- posix_lock_t *conf = NULL;
-
- conf = find_matching_reservelk (lock, pl_inode);
- if (!conf) {
- gf_log (this->name, GF_LOG_DEBUG,
- " Matching lock not found for unlock");
- goto out;
- }
- __delete_reserve_lock (conf);
- gf_log (this->name, GF_LOG_DEBUG,
- " Matching lock found for unlock");
+ posix_lock_t *conf = find_matching_reservelk(lock, pl_inode);
+ if (!conf) {
+ gf_log(this->name, GF_LOG_DEBUG, " Matching lock not found for unlock");
+ goto out;
+ }
+ __delete_lock(conf);
+ gf_log(this->name, GF_LOG_DEBUG, " Matching lock found for unlock");
out:
- return conf;
-
-
+ return conf;
}
static void
-__grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode,
- struct list_head *granted)
+__grant_blocked_reserve_locks(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
{
- int bl_ret = 0;
- posix_lock_t *bl = NULL;
- posix_lock_t *tmp = NULL;
+ int bl_ret = 0;
+ posix_lock_t *bl = NULL;
+ posix_lock_t *tmp = NULL;
- struct list_head blocked_list;
+ struct list_head blocked_list;
- INIT_LIST_HEAD (&blocked_list);
- list_splice_init (&pl_inode->blocked_reservelks, &blocked_list);
+ INIT_LIST_HEAD(&blocked_list);
+ list_splice_init(&pl_inode->blocked_reservelks, &blocked_list);
- list_for_each_entry_safe (bl, tmp, &blocked_list, list) {
+ list_for_each_entry_safe(bl, tmp, &blocked_list, list)
+ {
+ list_del_init(&bl->list);
- list_del_init (&bl->list);
+ bl_ret = __lock_reservelk(this, pl_inode, bl, 1);
- bl_ret = __lock_reservelk (this, pl_inode, bl, 1);
-
- if (bl_ret == 0) {
- list_add (&bl->list, granted);
- }
+ if (bl_ret == 0) {
+ list_add(&bl->list, granted);
}
- return;
+ }
+ return;
}
/* Grant all reservelks blocked on lock(s) */
void
-grant_blocked_reserve_locks (xlator_t *this, pl_inode_t *pl_inode)
+grant_blocked_reserve_locks(xlator_t *this, pl_inode_t *pl_inode)
{
- struct list_head granted;
- posix_lock_t *lock = NULL;
- posix_lock_t *tmp = NULL;
+ struct list_head granted;
+ posix_lock_t *lock = NULL;
+ posix_lock_t *tmp = NULL;
- INIT_LIST_HEAD (&granted);
-
- if (list_empty (&pl_inode->blocked_reservelks)) {
- gf_log (this->name, GF_LOG_TRACE,
- "No blocked locks to be granted");
- return;
- }
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- __grant_blocked_reserve_locks (this, pl_inode, &granted);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
-
- list_for_each_entry_safe (lock, tmp, &granted, list) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => Granted",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->user_flock.l_start,
- lock->user_flock.l_len);
-
- STACK_UNWIND_STRICT (lk, lock->frame, 0, 0, &lock->user_flock,
- NULL);
- }
+ INIT_LIST_HEAD(&granted);
+ if (list_empty(&pl_inode->blocked_reservelks)) {
+ gf_log(this->name, GF_LOG_TRACE, "No blocked locks to be granted");
+ return;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __grant_blocked_reserve_locks(this, pl_inode, &granted);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ list_for_each_entry_safe(lock, tmp, &granted, list)
+ {
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => Granted",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
+ lkowner_utoa(&lock->owner), lock->user_flock.l_start,
+ lock->user_flock.l_len);
+
+ STACK_UNWIND_STRICT(lk, lock->frame, 0, 0, &lock->user_flock, NULL);
+ }
}
static void
-__grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode,
- struct list_head *granted)
+__grant_blocked_lock_calls(xlator_t *this, pl_inode_t *pl_inode,
+ struct list_head *granted)
{
- int bl_ret = 0;
- posix_lock_t *bl = NULL;
- posix_lock_t *tmp = NULL;
-
- struct list_head blocked_list;
+ int bl_ret = 0;
+ posix_lock_t *bl = NULL;
+ posix_lock_t *tmp = NULL;
- INIT_LIST_HEAD (&blocked_list);
- list_splice_init (&pl_inode->blocked_reservelks, &blocked_list);
+ struct list_head blocked_list;
- list_for_each_entry_safe (bl, tmp, &blocked_list, list) {
+ INIT_LIST_HEAD(&blocked_list);
+ list_splice_init(&pl_inode->blocked_reservelks, &blocked_list);
- list_del_init (&bl->list);
+ list_for_each_entry_safe(bl, tmp, &blocked_list, list)
+ {
+ list_del_init(&bl->list);
- bl_ret = pl_verify_reservelk (this, pl_inode, bl, bl->blocked);
+ bl_ret = pl_verify_reservelk(this, pl_inode, bl, bl->blocked);
- if (bl_ret == 0) {
- list_add_tail (&bl->list, granted);
- }
+ if (bl_ret == 0) {
+ list_add_tail(&bl->list, granted);
}
- return;
+ }
+ return;
}
void
-grant_blocked_lock_calls (xlator_t *this, pl_inode_t *pl_inode)
+grant_blocked_lock_calls(xlator_t *this, pl_inode_t *pl_inode)
{
- struct list_head granted;
- posix_lock_t *lock = NULL;
- posix_lock_t *tmp = NULL;
- fd_t *fd = NULL;
-
- int can_block = 0;
- int32_t cmd = 0;
- int ret = 0;
-
- if (list_empty (&pl_inode->blocked_calls)) {
- gf_log (this->name, GF_LOG_TRACE,
- "No blocked lock calls to be granted");
- return;
- }
+ struct list_head granted;
+ posix_lock_t *lock = NULL;
+ posix_lock_t *tmp = NULL;
+ fd_t *fd = NULL;
- pthread_mutex_lock (&pl_inode->mutex);
- {
- __grant_blocked_lock_calls (this, pl_inode, &granted);
- }
- pthread_mutex_unlock (&pl_inode->mutex);
-
- list_for_each_entry_safe (lock, tmp, &granted, list) {
- fd = fd_from_fdnum (lock);
-
- if (lock->blocked) {
- can_block = 1;
- cmd = F_SETLKW;
- }
- else
- cmd = F_SETLK;
-
- lock->blocked = 0;
- ret = pl_setlk (this, pl_inode, lock, can_block);
- if (ret == -1) {
- if (can_block) {
- pl_trace_block (this, lock->frame, fd, NULL,
- cmd, &lock->user_flock, NULL);
- continue;
- } else {
- gf_log (this->name, GF_LOG_DEBUG, "returning EAGAIN");
- pl_trace_out (this, lock->frame, fd, NULL, cmd,
- &lock->user_flock, -1, EAGAIN, NULL);
- pl_update_refkeeper (this, fd->inode);
- STACK_UNWIND_STRICT (lk, lock->frame, -1,
- EAGAIN, &lock->user_flock,
- NULL);
- __destroy_lock (lock);
- }
- }
+ int can_block = 0;
+ int32_t cmd = 0;
+ int ret = 0;
+ if (list_empty(&pl_inode->blocked_calls)) {
+ gf_log(this->name, GF_LOG_TRACE, "No blocked lock calls to be granted");
+ return;
+ }
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ __grant_blocked_lock_calls(this, pl_inode, &granted);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ list_for_each_entry_safe(lock, tmp, &granted, list)
+ {
+ fd = fd_from_fdnum(lock);
+
+ if (lock->blocked) {
+ can_block = 1;
+ cmd = F_SETLKW;
+ } else
+ cmd = F_SETLK;
+
+ lock->blocked = 0;
+ ret = pl_setlk(this, pl_inode, lock, can_block);
+ if (ret == -1) {
+ if (can_block) {
+ continue;
+ } else {
+ gf_log(this->name, GF_LOG_DEBUG, "returning EAGAIN");
+ pl_trace_out(this, lock->frame, fd, NULL, cmd,
+ &lock->user_flock, -1, EAGAIN, NULL);
+ pl_update_refkeeper(this, fd->inode);
+ STACK_UNWIND_STRICT(lk, lock->frame, -1, EAGAIN,
+ &lock->user_flock, NULL);
+ __destroy_lock(lock);
+ }
}
-
+ }
}
-
int
-pl_reserve_unlock (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
+pl_reserve_unlock(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock)
{
- posix_lock_t *retlock = NULL;
- int ret = -1;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
- retlock = __reserve_unlock_lock (this, lock, pl_inode);
- if (!retlock) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Bad Unlock issued on Inode lock");
- ret = -EINVAL;
- goto out;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "Reservelk Unlock successful");
- __destroy_reserve_lock (retlock);
- ret = 0;
+ posix_lock_t *retlock = NULL;
+ int ret = -1;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ retlock = __reserve_unlock_lock(this, lock, pl_inode);
+ if (!retlock) {
+ pthread_mutex_unlock(&pl_inode->mutex);
+ gf_log(this->name, GF_LOG_DEBUG, "Bad Unlock issued on Inode lock");
+ ret = -EINVAL;
+ goto out;
}
-out:
- pthread_mutex_unlock (&pl_inode->mutex);
- grant_blocked_reserve_locks (this, pl_inode);
- grant_blocked_lock_calls (this, pl_inode);
-
- return ret;
+ gf_log(this->name, GF_LOG_TRACE, "Reservelk Unlock successful");
+ __destroy_lock(retlock);
+ ret = 0;
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+out:
+ grant_blocked_reserve_locks(this, pl_inode);
+ grant_blocked_lock_calls(this, pl_inode);
+ return ret;
}
int
-pl_reserve_setlk (xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
- int can_block)
+pl_reserve_setlk(xlator_t *this, pl_inode_t *pl_inode, posix_lock_t *lock,
+ int can_block)
{
- int ret = -EINVAL;
-
- pthread_mutex_lock (&pl_inode->mutex);
- {
-
- ret = __lock_reservelk (this, pl_inode, lock, can_block);
- if (ret < 0)
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => NOK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->user_flock.l_start,
- lock->user_flock.l_len);
- else
- gf_log (this->name, GF_LOG_TRACE,
- "%s (pid=%d) (lk-owner=%s) %"PRId64" - %"PRId64" => OK",
- lock->fl_type == F_UNLCK ? "Unlock" : "Lock",
- lock->client_pid,
- lkowner_utoa (&lock->owner),
- lock->fl_start,
- lock->fl_end);
-
- }
- pthread_mutex_unlock (&pl_inode->mutex);
- return ret;
+ int ret = -EINVAL;
+
+ pthread_mutex_lock(&pl_inode->mutex);
+ {
+ ret = __lock_reservelk(this, pl_inode, lock, can_block);
+ }
+ pthread_mutex_unlock(&pl_inode->mutex);
+
+ if (ret < 0)
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => NOK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
+ lkowner_utoa(&lock->owner), lock->user_flock.l_start,
+ lock->user_flock.l_len);
+ else
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s (pid=%d) (lk-owner=%s) %" PRId64 " - %" PRId64 " => OK",
+ lock->fl_type == F_UNLCK ? "Unlock" : "Lock", lock->client_pid,
+ lkowner_utoa(&lock->owner), lock->fl_start, lock->fl_end);
+
+ return ret;
}
diff --git a/xlators/features/locks/tests/unit-test.c b/xlators/features/locks/tests/unit-test.c
index e95612ad46d..d285b12b5aa 100644
--- a/xlators/features/locks/tests/unit-test.c
+++ b/xlators/features/locks/tests/unit-test.c
@@ -1,75 +1,77 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "compat.h"
-#include "xlator.h"
-#include "inode.h"
-#include "logging.h"
-#include "common-utils.h"
-#include "list.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/list.h>
#include "locks.h"
#include "common.h"
-#define expect(cond) if (!(cond)) { goto out; }
+#define expect(cond) \
+ if (!(cond)) { \
+ goto out; \
+ }
-extern int lock_name (pl_inode_t *, const char *, entrylk_type);
-extern int unlock_name (pl_inode_t *, const char *, entrylk_type);
+extern int
+lock_name(pl_inode_t *, const char *, entrylk_type);
+extern int
+unlock_name(pl_inode_t *, const char *, entrylk_type);
-int main (int argc, char **argv)
+int
+main(int argc, char **argv)
{
- int ret = 1;
- int r = -1;
+ int ret = 1;
+ int r = -1;
+
+ pl_inode_t *pinode = CALLOC(sizeof(pl_inode_t), 1);
+ pthread_mutex_init(&pinode->dir_lock_mutex, NULL);
+ INIT_LIST_HEAD(&pinode->gf_dir_locks);
- pl_inode_t *pinode = CALLOC (sizeof (pl_inode_t), 1);
- pthread_mutex_init (&pinode->dir_lock_mutex, NULL);
- INIT_LIST_HEAD (&pinode->gf_dir_locks);
+ r = lock_name(pinode, NULL, ENTRYLK_WRLCK);
+ expect(r == 0);
+ {
+ r = lock_name(pinode, "foo", ENTRYLK_WRLCK);
+ expect(r == -EAGAIN);
+ }
+ r = unlock_name(pinode, NULL, ENTRYLK_WRLCK);
+ expect(r == 0);
- r = lock_name (pinode, NULL, ENTRYLK_WRLCK); expect (r == 0);
- {
- r = lock_name (pinode, "foo", ENTRYLK_WRLCK); expect (r == -EAGAIN);
- }
- r = unlock_name (pinode, NULL, ENTRYLK_WRLCK); expect (r == 0);
+ r = lock_name(pinode, "foo", ENTRYLK_RDLCK);
+ expect(r == 0);
+ {
+ r = lock_name(pinode, "foo", ENTRYLK_RDLCK);
+ expect(r == 0);
+ {
+ r = lock_name(pinode, "foo", ENTRYLK_WRLCK);
+ expect(r == -EAGAIN);
+ }
+ r = unlock_name(pinode, "foo", ENTRYLK_RDLCK);
+ expect(r == 0);
+ }
+ r = unlock_name(pinode, "foo", ENTRYLK_RDLCK);
+ expect(r == 0);
- r = lock_name (pinode, "foo", ENTRYLK_RDLCK); expect (r == 0);
- {
- r = lock_name (pinode, "foo", ENTRYLK_RDLCK); expect (r == 0);
- {
- r = lock_name (pinode, "foo", ENTRYLK_WRLCK); expect (r == -EAGAIN);
- }
- r = unlock_name (pinode, "foo", ENTRYLK_RDLCK); expect (r == 0);
- }
- r = unlock_name (pinode, "foo", ENTRYLK_RDLCK); expect (r == 0);
-
- r = lock_name (pinode, "foo", ENTRYLK_WRLCK); expect (r == 0);
- r = unlock_name (pinode, "foo", ENTRYLK_WRLCK); expect (r == 0);
+ r = lock_name(pinode, "foo", ENTRYLK_WRLCK);
+ expect(r == 0);
+ r = unlock_name(pinode, "foo", ENTRYLK_WRLCK);
+ expect(r == 0);
- r = lock_name (pinode, "baz", ENTRYLK_WRLCK); expect (r == 0);
- r = lock_name (pinode, "baz", ENTRYLK_RDLCK); expect (r == -EAGAIN);
+ r = lock_name(pinode, "baz", ENTRYLK_WRLCK);
+ expect(r == 0);
+ r = lock_name(pinode, "baz", ENTRYLK_RDLCK);
+ expect(r == -EAGAIN);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
diff --git a/xlators/features/mac-compat/Makefile.am b/xlators/features/mac-compat/Makefile.am
deleted file mode 100644
index d471a3f9243..00000000000
--- a/xlators/features/mac-compat/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/features/mac-compat/src/Makefile.am b/xlators/features/mac-compat/src/Makefile.am
deleted file mode 100644
index 915c13e308f..00000000000
--- a/xlators/features/mac-compat/src/Makefile.am
+++ /dev/null
@@ -1,13 +0,0 @@
-xlator_LTLIBRARIES = mac-compat.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-
-mac_compat_la_LDFLAGS = -module -avoidversion
-
-mac_compat_la_SOURCES = mac-compat.c
-mac_compat_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/features/mac-compat/src/mac-compat.c b/xlators/features/mac-compat/src/mac-compat.c
deleted file mode 100644
index cce621e6723..00000000000
--- a/xlators/features/mac-compat/src/mac-compat.c
+++ /dev/null
@@ -1,248 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "defaults.h"
-#include "compat-errno.h"
-
-
-enum apple_xattr {
- GF_FINDER_INFO_XATTR,
- GF_RESOURCE_FORK_XATTR,
- GF_XATTR_ALL,
- GF_XATTR_NONE
-};
-
-static char *apple_xattr_name[] = {
- [GF_FINDER_INFO_XATTR] = "com.apple.FinderInfo",
- [GF_RESOURCE_FORK_XATTR] = "com.apple.ResourceFork"
-};
-
-static const char *apple_xattr_value[] = {
- [GF_FINDER_INFO_XATTR] =
- /* 1 2 3 4 5 6 7 8 */
- "\0\0\0\0\0\0\0\0"
- "\0\0\0\0\0\0\0\0"
- "\0\0\0\0\0\0\0\0"
- "\0\0\0\0\0\0\0\0",
- [GF_RESOURCE_FORK_XATTR] = ""
-};
-
-static int32_t apple_xattr_len[] = {
- [GF_FINDER_INFO_XATTR] = 32,
- [GF_RESOURCE_FORK_XATTR] = 1
-};
-
-
-int32_t
-maccomp_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
-{
- intptr_t ax = (intptr_t)this->private;
- int i = 0;
-
- if ((ax == GF_XATTR_ALL && op_ret >= 0) || ax != GF_XATTR_NONE) {
- op_ret = op_errno = 0;
-
- for (i = 0; i < GF_XATTR_ALL; i++) {
- if (dict_get (dict, apple_xattr_name[i]))
- continue;
-
- if (dict_set (dict, apple_xattr_name[i],
- bin_to_data ((void *)apple_xattr_value[i],
- apple_xattr_len[i])) == -1) {
- op_ret = -1;
- op_errno = ENOMEM;
-
- break;
- }
- }
- }
-
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
-
- return 0;
-}
-
-
-int32_t
-maccomp_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- intptr_t ax = GF_XATTR_NONE;
- int i = 0;
-
- if (name) {
- for (i = 0; i < GF_XATTR_ALL; i++) {
- if (strcmp (apple_xattr_name[i], name) == 0) {
- ax = i;
-
- break;
- }
- }
- } else
- ax = GF_XATTR_ALL;
-
- this->private = (void *)ax;
-
- STACK_WIND (frame, maccomp_getxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- loc, name, xdata);
- return 0;
-}
-
-
-int32_t
-maccomp_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
-{
- intptr_t ax = GF_XATTR_NONE;
- int i = 0;
-
- if (name) {
- for (i = 0; i < GF_XATTR_ALL; i++) {
- if (strcmp (apple_xattr_name[i], name) == 0) {
- ax = i;
-
- break;
- }
- }
- } else
- ax = GF_XATTR_ALL;
-
- this->private = (void *)ax;
-
- STACK_WIND (frame, maccomp_getxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr,
- fd, name, xdata);
- return 0;
-}
-
-
-int32_t
-maccomp_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- intptr_t ax = (intptr_t)this->private;
-
- if (op_ret == -1 && ax != GF_XATTR_NONE)
- op_ret = op_errno = 0;
-
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
-
- return 0;
-}
-
-
-int32_t
-maccomp_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- intptr_t ax = GF_XATTR_NONE;
- int i = 0;
-
- for (i = 0; i < GF_XATTR_ALL; i++) {
- if (dict_get (dict, apple_xattr_name[i])) {
- ax = i;
-
- break;
- }
- }
-
- this->private = (void *)ax;
-
- STACK_WIND (frame, maccomp_setxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- loc, dict, flags, xdata);
- return 0;
-}
-
-
-int32_t
-maccomp_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
-{
- intptr_t ax = GF_XATTR_NONE;
- int i = 0;
-
- for (i = 0; i < GF_XATTR_ALL; i++) {
- if (dict_get (dict, apple_xattr_name[i])) {
- ax = i;
-
- break;
- }
- }
-
- this->private = (void *)ax;
-
- STACK_WIND (frame, maccomp_setxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr,
- fd, dict, flags, xdata);
- return 0;
-}
-
-
-int32_t
-init (xlator_t *this)
-{
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "translator not configured with exactly one child");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- return 0;
-}
-
-
-void
-fini (xlator_t *this)
-{
- return;
-}
-
-
-struct xlator_fops fops = {
- .getxattr = maccomp_getxattr,
- .fgetxattr = maccomp_fgetxattr,
- .setxattr = maccomp_setxattr,
- .fsetxattr = maccomp_fsetxattr,
-};
-
-struct xlator_cbks cbks = {
-};
-
-struct volume_options options[] = {
- { .key = {NULL} },
-};
diff --git a/xlators/features/marker/Makefile.am b/xlators/features/marker/Makefile.am
index a6ba2de16ae..a985f42a877 100644
--- a/xlators/features/marker/Makefile.am
+++ b/xlators/features/marker/Makefile.am
@@ -1,3 +1,3 @@
-SUBDIRS = src @SYNCDAEMON_SUBDIR@
+SUBDIRS = src
CLEANFILES =
diff --git a/xlators/features/marker/src/Makefile.am b/xlators/features/marker/src/Makefile.am
index 501586a76b6..58056b36511 100644
--- a/xlators/features/marker/src/Makefile.am
+++ b/xlators/features/marker/src/Makefile.am
@@ -1,15 +1,24 @@
+if WITH_SERVER
xlator_LTLIBRARIES = marker.la
+endif
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-marker_la_LDFLAGS = -module -avoidversion
+marker_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+marker_la_SOURCES = marker.c marker-quota.c marker-quota-helper.c \
+ marker-common.c
-marker_la_SOURCES = marker.c marker-quota.c marker-quota-helper.c marker-common.c
marker_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = marker-mem-types.h marker.h marker-quota.h marker-quota-helper.h marker-common.h $(top_builddir)/xlators/lib/src/libxlator.h
+noinst_HEADERS = marker-mem-types.h marker.h marker-quota.h \
+ marker-quota-helper.h marker-common.h \
+ $(top_builddir)/xlators/lib/src/libxlator.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/xlators/lib/src
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -fno-strict-aliasing -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src $(GF_CFLAGS) -shared -nostartfiles
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/marker/src/marker-common.c b/xlators/features/marker/src/marker-common.c
index a413781bc0c..9c9047005d6 100644
--- a/xlators/features/marker/src/marker-common.c
+++ b/xlators/features/marker/src/marker-common.c
@@ -1,86 +1,57 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
#include <fnmatch.h>
#include "marker-common.h"
marker_inode_ctx_t *
-marker_inode_ctx_new ()
+marker_inode_ctx_new()
{
- marker_inode_ctx_t *ctx = NULL;
+ marker_inode_ctx_t *ctx = NULL;
- ctx = GF_CALLOC (1, sizeof (marker_inode_ctx_t),
- gf_marker_mt_marker_inode_ctx_t);
- if (ctx == NULL)
- goto out;
+ ctx = GF_CALLOC(1, sizeof(marker_inode_ctx_t),
+ gf_marker_mt_marker_inode_ctx_t);
+ if (ctx == NULL)
+ goto out;
- ctx->quota_ctx = NULL;
+ ctx->quota_ctx = NULL;
out:
- return ctx;
+ return ctx;
}
int32_t
-marker_force_inode_ctx_get (inode_t *inode, xlator_t *this,
- marker_inode_ctx_t **ctx)
+marker_force_inode_ctx_get(inode_t *inode, xlator_t *this,
+ marker_inode_ctx_t **ctx)
{
- int32_t ret = -1;
- uint64_t ctx_int = 0;
+ int32_t ret = -1;
+ uint64_t ctx_int = 0;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx_int);
- if (ret == 0)
- *ctx = (marker_inode_ctx_t *) (unsigned long)ctx_int;
- else {
- *ctx = marker_inode_ctx_new ();
- if (*ctx == NULL)
- goto unlock;
-
- ret = __inode_ctx_put (inode, this,
- (uint64_t )(unsigned long) *ctx);
- if (ret == -1) {
- GF_FREE (*ctx);
- goto unlock;
- }
- ret = 0;
- }
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get(inode, this, &ctx_int);
+ if (ret == 0)
+ *ctx = (marker_inode_ctx_t *)(unsigned long)ctx_int;
+ else {
+ *ctx = marker_inode_ctx_new();
+ if (*ctx == NULL)
+ goto unlock;
+
+ ret = __inode_ctx_put(inode, this, (uint64_t)(unsigned long)*ctx);
+ if (ret == -1) {
+ GF_FREE(*ctx);
+ goto unlock;
+ }
+ ret = 0;
}
-unlock: UNLOCK (&inode->lock);
+ }
+unlock:
+ UNLOCK(&inode->lock);
- return ret;
-}
-
-void
-marker_filter_quota_xattr (dict_t *dict, char *key,
- data_t *value, void *data)
-{
- int ret = -1;
-
- GF_VALIDATE_OR_GOTO ("marker", dict, out);
- GF_VALIDATE_OR_GOTO ("marker", key, out);
-
- ret = fnmatch ("trusted.glusterfs.quota*", key, 0);
- if (ret == 0)
- dict_del (dict, key);
-out:
- return;
+ return ret;
}
diff --git a/xlators/features/marker/src/marker-common.h b/xlators/features/marker/src/marker-common.h
index 0a7ee261948..7f8cffe7d35 100644
--- a/xlators/features/marker/src/marker-common.h
+++ b/xlators/features/marker/src/marker-common.h
@@ -1,36 +1,19 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _MARKER_COMMON_H
#define _MARKER_COMMON_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "inode.h"
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "marker.h"
int32_t
-marker_force_inode_ctx_get (inode_t *, xlator_t *, marker_inode_ctx_t **);
+marker_force_inode_ctx_get(inode_t *, xlator_t *, marker_inode_ctx_t **);
-void
-marker_filter_quota_xattr (dict_t *, char *, data_t *, void *);
#endif
diff --git a/xlators/features/marker/src/marker-mem-types.h b/xlators/features/marker/src/marker-mem-types.h
index 2f49c0d9d4e..aedfdb4a1b7 100644
--- a/xlators/features/marker/src/marker-mem-types.h
+++ b/xlators/features/marker/src/marker-mem-types.h
@@ -1,35 +1,28 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __MARKER_MEM_TYPES_H__
#define __MARKER_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_marker_mem_types_ {
- gf_marker_mt_marker_conf_t = gf_common_mt_end + 1,
- gf_marker_mt_loc_t,
- gf_marker_mt_volume_mark,
- gf_marker_mt_int64_t,
- gf_marker_mt_quota_inode_ctx_t,
- gf_marker_mt_marker_inode_ctx_t,
- gf_marker_mt_inode_contribution_t,
- gf_marker_mt_end
+ /* Those are used by ALLOCATE_OR_GOTO macro */
+ gf_marker_mt_marker_conf_t = gf_common_mt_end + 1,
+ gf_marker_mt_loc_t,
+ gf_marker_mt_volume_mark,
+ gf_marker_mt_int64_t,
+ gf_marker_mt_quota_inode_ctx_t,
+ gf_marker_mt_marker_inode_ctx_t,
+ gf_marker_mt_inode_contribution_t,
+ gf_marker_mt_quota_meta_t,
+ gf_marker_mt_quota_synctask_t,
+ gf_marker_mt_end
};
#endif
diff --git a/xlators/features/marker/src/marker-quota-helper.c b/xlators/features/marker/src/marker-quota-helper.c
index 8d4ff7786a6..ecd85d67b2b 100644
--- a/xlators/features/marker/src/marker-quota-helper.c
+++ b/xlators/features/marker/src/marker-quota-helper.c
@@ -1,415 +1,380 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "locking.h"
+#include <glusterfs/locking.h>
#include "marker-quota.h"
#include "marker-common.h"
#include "marker-quota-helper.h"
#include "marker-mem-types.h"
int
-mq_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
+mq_loc_fill(loc_t *loc, inode_t *inode, inode_t *parent, char *path)
{
- int ret = -1;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("marker", loc, out);
- GF_VALIDATE_OR_GOTO ("marker", inode, out);
- GF_VALIDATE_OR_GOTO ("marker", path, out);
- /* Not checking for parent because while filling
- * loc of root, parent will be NULL
- */
+ GF_VALIDATE_OR_GOTO("marker", loc, out);
+ GF_VALIDATE_OR_GOTO("marker", inode, out);
+ GF_VALIDATE_OR_GOTO("marker", path, out);
+ /* Not checking for parent because while filling
+ * loc of root, parent will be NULL
+ */
- if (inode) {
- loc->inode = inode_ref (inode);
- }
+ if (inode) {
+ loc->inode = inode_ref(inode);
+ }
- if (parent)
- loc->parent = inode_ref (parent);
+ if (parent)
+ loc->parent = inode_ref(parent);
- loc->path = gf_strdup (path);
- if (!loc->path) {
- gf_log ("loc fill", GF_LOG_ERROR, "strdup failed");
- goto loc_wipe;
- }
+ if (!gf_uuid_is_null(inode->gfid))
+ gf_uuid_copy(loc->gfid, inode->gfid);
- loc->name = strrchr (loc->path, '/');
- if (loc->name)
- loc->name++;
- else
- goto loc_wipe;
+ loc->path = gf_strdup(path);
+ if (!loc->path) {
+ gf_log("loc fill", GF_LOG_ERROR, "strdup failed");
+ goto out;
+ }
+
+ loc->name = strrchr(loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ else
+ goto out;
+
+ ret = 0;
- ret = 0;
-loc_wipe:
- if (ret < 0)
- loc_wipe (loc);
out:
- return ret;
-}
+ if (ret < 0)
+ loc_wipe(loc);
+ return ret;
+}
int32_t
-mq_inode_loc_fill (const char *parent_gfid, inode_t *inode, loc_t *loc)
+mq_inode_loc_fill(const char *parent_gfid, inode_t *inode, loc_t *loc)
{
- char *resolvedpath = NULL;
- inode_t *parent = NULL;
- int ret = -1;
+ char *resolvedpath = NULL;
+ inode_t *parent = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ this = THIS;
+
+ if (inode == NULL) {
+ gf_log_callingfn("marker", GF_LOG_ERROR,
+ "loc fill failed, "
+ "inode is NULL");
+ return ret;
+ }
- if ((!inode) || (!loc))
- return ret;
+ if (loc == NULL)
+ return ret;
- if ((inode) && __is_root_gfid (inode->gfid)) {
- loc->parent = NULL;
- goto ignore_parent;
- }
+ if ((inode) && __is_root_gfid(inode->gfid)) {
+ loc->parent = NULL;
+ goto ignore_parent;
+ }
- if (parent_gfid == NULL)
- parent = inode_parent (inode, 0, NULL);
- else
- parent = inode_find (inode->table,
- (unsigned char *) parent_gfid);
+ if (parent_gfid == NULL)
+ parent = inode_parent(inode, 0, NULL);
+ else
+ parent = inode_find(inode->table, (unsigned char *)parent_gfid);
- if (parent == NULL)
- goto err;
+ if (parent == NULL) {
+ gf_log("marker", GF_LOG_ERROR, "parent is NULL for %s",
+ uuid_utoa(inode->gfid));
+ goto err;
+ }
ignore_parent:
- ret = inode_path (inode, NULL, &resolvedpath);
- if (ret < 0)
- goto err;
-
- ret = mq_loc_fill (loc, inode, parent, resolvedpath);
- if (ret < 0)
- goto err;
+ ret = inode_path(inode, NULL, &resolvedpath);
+ if (ret < 0) {
+ gf_log("marker", GF_LOG_ERROR, "failed to resolve path for %s",
+ uuid_utoa(inode->gfid));
+ goto err;
+ }
+
+ ret = mq_loc_fill(loc, inode, parent, resolvedpath);
+ if (ret < 0)
+ goto err;
+
+ ret = mq_inode_ctx_get(inode, this, &ctx);
+ if (ret < 0 || ctx == NULL)
+ ctx = mq_inode_ctx_new(inode, this);
+ if (ctx == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "mq_inode_ctx_new "
+ "failed for %s",
+ uuid_utoa(inode->gfid));
+ ret = -1;
+ goto err;
+ }
+ ret = 0;
err:
- if (parent)
- inode_unref (parent);
+ if (parent)
+ inode_unref(parent);
- GF_FREE (resolvedpath);
+ GF_FREE(resolvedpath);
- return ret;
+ return ret;
}
-
quota_inode_ctx_t *
-mq_alloc_inode_ctx ()
-{
- int32_t ret = -1;
- quota_inode_ctx_t *ctx = NULL;
-
- QUOTA_ALLOC (ctx, quota_inode_ctx_t, ret);
- if (ret == -1)
- goto out;
-
- ctx->size = 0;
- ctx->dirty = 0;
- ctx->updation_status = _gf_false;
- LOCK_INIT (&ctx->lock);
- INIT_LIST_HEAD (&ctx->contribution_head);
-out:
- return ctx;
-}
-
-inode_contribution_t *
-mq_get_contribution_node (inode_t *inode, quota_inode_ctx_t *ctx)
+mq_alloc_inode_ctx()
{
- inode_contribution_t *contri = NULL;
- inode_contribution_t *temp = NULL;
-
- if (!inode || !ctx)
- goto out;
-
- list_for_each_entry (temp, &ctx->contribution_head, contri_list) {
- if (uuid_compare (temp->gfid, inode->gfid) == 0) {
- contri = temp;
- goto out;
- }
- }
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
+
+ QUOTA_ALLOC(ctx, quota_inode_ctx_t, ret);
+ if (ret == -1)
+ goto out;
+
+ ctx->size = 0;
+ ctx->dirty = 0;
+ ctx->updation_status = _gf_false;
+ LOCK_INIT(&ctx->lock);
+ INIT_LIST_HEAD(&ctx->contribution_head);
out:
- return contri;
+ return ctx;
}
-
-int32_t
-mq_delete_contribution_node (dict_t *dict, char *key,
- inode_contribution_t *contribution)
+static void
+mq_contri_fini(inode_contribution_t *contri)
{
- if (dict_get (dict, key) != NULL)
- goto out;
-
- QUOTA_FREE_CONTRIBUTION_NODE (contribution);
-out:
- return 0;
+ LOCK_DESTROY(&contri->lock);
+ GF_FREE(contri);
}
-
inode_contribution_t *
-__mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc)
+mq_contri_init(inode_t *inode)
{
- int32_t ret = 0;
- inode_contribution_t *contribution = NULL;
+ inode_contribution_t *contri = NULL;
+ int32_t ret = 0;
- if (!loc->parent)
- goto out;
-
- list_for_each_entry (contribution, &ctx->contribution_head, contri_list) {
- if (loc->parent &&
- uuid_compare (contribution->gfid, loc->parent->gfid) == 0) {
- goto out;
- }
- }
+ QUOTA_ALLOC(contri, inode_contribution_t, ret);
+ if (ret == -1)
+ goto out;
- QUOTA_ALLOC (contribution, inode_contribution_t, ret);
- if (ret == -1)
- goto out;
+ GF_REF_INIT(contri, mq_contri_fini);
- contribution->contribution = 0;
+ contri->contribution = 0;
+ contri->file_count = 0;
+ contri->dir_count = 0;
+ gf_uuid_copy(contri->gfid, inode->gfid);
- uuid_copy (contribution->gfid, loc->parent->gfid);
-
- LOCK_INIT (&contribution->lock);
- INIT_LIST_HEAD (&contribution->contri_list);
-
- list_add_tail (&contribution->contri_list, &ctx->contribution_head);
+ LOCK_INIT(&contri->lock);
+ INIT_LIST_HEAD(&contri->contri_list);
out:
- return contribution;
+ return contri;
}
-
inode_contribution_t *
-mq_add_new_contribution_node (xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc)
+mq_get_contribution_node(inode_t *inode, quota_inode_ctx_t *ctx)
{
- inode_contribution_t *contribution = NULL;
+ inode_contribution_t *contri = NULL;
+ inode_contribution_t *temp = NULL;
- if ((ctx == NULL) || (loc == NULL))
- return NULL;
+ if (!inode || !ctx)
+ goto out;
- if (strcmp (loc->path, "/") == 0)
- return NULL;
+ LOCK(&ctx->lock);
+ {
+ if (list_empty(&ctx->contribution_head))
+ goto unlock;
- LOCK (&ctx->lock);
+ list_for_each_entry(temp, &ctx->contribution_head, contri_list)
{
- contribution = __mq_add_new_contribution_node (this, ctx, loc);
- }
- UNLOCK (&ctx->lock);
-
- return contribution;
-}
-
-
-int32_t
-mq_dict_set_contribution (xlator_t *this, dict_t *dict,
- loc_t *loc)
-{
- int32_t ret = -1;
- char contri_key [512] = {0, };
-
- GF_VALIDATE_OR_GOTO ("marker", this, out);
- GF_VALIDATE_OR_GOTO ("marker", dict, out);
- GF_VALIDATE_OR_GOTO ("marker", loc, out);
- GF_VALIDATE_OR_GOTO ("marker", loc->parent, out);
-
- GET_CONTRI_KEY (contri_key, loc->parent->gfid, ret);
- if (ret < 0) {
- ret = -1;
- goto out;
- }
-
- ret = dict_set_int64 (dict, contri_key, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "unable to set dict value on %s.",
- loc->path);
- goto out;
+ if (gf_uuid_compare(temp->gfid, inode->gfid) == 0) {
+ contri = temp;
+ GF_REF_GET(contri);
+ break;
+ }
}
+ }
+unlock:
+ UNLOCK(&ctx->lock);
- ret = 0;
out:
- return ret;
+ return contri;
}
-
-int32_t
-mq_inode_ctx_get (inode_t *inode, xlator_t *this,
- quota_inode_ctx_t **ctx)
+inode_contribution_t *
+__mq_add_new_contribution_node(xlator_t *this, quota_inode_ctx_t *ctx,
+ loc_t *loc)
{
- int32_t ret = -1;
- uint64_t ctx_int = 0;
- marker_inode_ctx_t *mark_ctx = NULL;
+ inode_contribution_t *contribution = NULL;
- GF_VALIDATE_OR_GOTO ("marker", inode, out);
- GF_VALIDATE_OR_GOTO ("marker", this, out);
- GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+ if (!loc->parent) {
+ if (!gf_uuid_is_null(loc->pargfid))
+ loc->parent = inode_find(loc->inode->table, loc->pargfid);
- ret = inode_ctx_get (inode, this, &ctx_int);
- if (ret < 0) {
- ret = -1;
- *ctx = NULL;
- goto out;
- }
-
- mark_ctx = (marker_inode_ctx_t *) (unsigned long)ctx_int;
- if (mark_ctx->quota_ctx == NULL) {
- ret = -1;
- goto out;
+ if (!loc->parent)
+ loc->parent = inode_parent(loc->inode, loc->pargfid, loc->name);
+ if (!loc->parent)
+ goto out;
+ }
+
+ list_for_each_entry(contribution, &ctx->contribution_head, contri_list)
+ {
+ if (loc->parent &&
+ gf_uuid_compare(contribution->gfid, loc->parent->gfid) == 0) {
+ goto out;
}
+ }
- *ctx = mark_ctx->quota_ctx;
+ contribution = mq_contri_init(loc->parent);
+ if (contribution == NULL)
+ goto out;
- ret = 0;
+ list_add_tail(&contribution->contri_list, &ctx->contribution_head);
out:
- return ret;
+ return contribution;
}
-
-quota_inode_ctx_t *
-__mq_inode_ctx_new (inode_t *inode, xlator_t *this)
-{
- int32_t ret = -1;
- quota_inode_ctx_t *quota_ctx = NULL;
- marker_inode_ctx_t *mark_ctx = NULL;
-
- ret = marker_force_inode_ctx_get (inode, this, &mark_ctx);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "marker_force_inode_ctx_get() failed");
- goto out;
- }
-
- LOCK (&inode->lock);
- {
- if (mark_ctx->quota_ctx == NULL) {
- quota_ctx = mq_alloc_inode_ctx ();
- if (quota_ctx == NULL) {
- ret = -1;
- goto unlock;
- }
- mark_ctx->quota_ctx = quota_ctx;
- } else {
- quota_ctx = mark_ctx->quota_ctx;
- }
-
- ret = 0;
- }
-unlock:
- UNLOCK (&inode->lock);
-out:
- return quota_ctx;
-}
-
-
-quota_inode_ctx_t *
-mq_inode_ctx_new (inode_t * inode, xlator_t *this)
-{
- return __mq_inode_ctx_new (inode, this);
-}
-
-quota_local_t *
-mq_local_new ()
+inode_contribution_t *
+mq_add_new_contribution_node(xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc)
{
- quota_local_t *local = NULL;
+ inode_contribution_t *contribution = NULL;
- local = mem_get0 (THIS->local_pool);
- if (!local)
- goto out;
+ if ((ctx == NULL) || (loc == NULL))
+ return NULL;
- local->ref = 1;
- LOCK_INIT (&local->lock);
+ if (((loc->path) && (strcmp(loc->path, "/") == 0)) ||
+ (!loc->path && gf_uuid_is_null(loc->pargfid)))
+ return NULL;
- local->ctx = NULL;
- local->contri = NULL;
+ LOCK(&ctx->lock);
+ {
+ contribution = __mq_add_new_contribution_node(this, ctx, loc);
+ if (contribution)
+ GF_REF_GET(contribution);
+ }
+ UNLOCK(&ctx->lock);
-out:
- return local;
+ return contribution;
}
-quota_local_t *
-mq_local_ref (quota_local_t *local)
+int32_t
+mq_dict_set_contribution(xlator_t *this, dict_t *dict, loc_t *loc, uuid_t gfid,
+ char *contri_key)
{
- LOCK (&local->lock);
- {
- local->ref ++;
+ int32_t ret = -1;
+ char key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("marker", this, out);
+ GF_VALIDATE_OR_GOTO("marker", dict, out);
+ GF_VALIDATE_OR_GOTO("marker", loc, out);
+
+ if (gfid && !gf_uuid_is_null(gfid)) {
+ GET_CONTRI_KEY(this, key, gfid, ret);
+ } else if (loc->parent) {
+ GET_CONTRI_KEY(this, key, loc->parent->gfid, ret);
+ } else {
+ /* nameless lookup, fetch contributions to all parents */
+ GET_CONTRI_KEY(this, key, NULL, ret);
+ }
+
+ if (ret < 0)
+ goto out;
+
+ ret = dict_set_int64(dict, key, 0);
+ if (ret < 0)
+ goto out;
+
+ if (contri_key)
+ if (snprintf(contri_key, QUOTA_KEY_MAX, "%s", key) >= QUOTA_KEY_MAX) {
+ ret = -1;
+ goto out;
}
- UNLOCK (&local->lock);
- return local;
-}
+out:
+ if (ret < 0)
+ gf_log_callingfn(this ? this->name : "Marker", GF_LOG_ERROR,
+ "dict set failed");
+ return ret;
+}
int32_t
-mq_local_unref (xlator_t *this, quota_local_t *local)
+mq_inode_ctx_get(inode_t *inode, xlator_t *this, quota_inode_ctx_t **ctx)
{
- int32_t ref = 0;
- if (local == NULL)
- goto out;
-
- QUOTA_SAFE_DECREMENT (&local->lock, local->ref, ref);
+ int32_t ret = -1;
+ uint64_t ctx_int = 0;
+ marker_inode_ctx_t *mark_ctx = NULL;
- if (ref != 0)
- goto out;
+ GF_VALIDATE_OR_GOTO("marker", inode, out);
+ GF_VALIDATE_OR_GOTO("marker", this, out);
+ GF_VALIDATE_OR_GOTO("marker", ctx, out);
- if (local->fd != NULL)
- fd_unref (local->fd);
+ ret = inode_ctx_get(inode, this, &ctx_int);
+ if (ret < 0) {
+ ret = -1;
+ *ctx = NULL;
+ goto out;
+ }
- loc_wipe (&local->loc);
+ mark_ctx = (marker_inode_ctx_t *)(unsigned long)ctx_int;
+ if (mark_ctx->quota_ctx == NULL) {
+ ret = -1;
+ goto out;
+ }
- loc_wipe (&local->parent_loc);
+ *ctx = mark_ctx->quota_ctx;
- LOCK_DESTROY (&local->lock);
+ ret = 0;
- mem_put (local);
out:
- return 0;
+ return ret;
}
-
-inode_contribution_t *
-mq_get_contribution_from_loc (xlator_t *this, loc_t *loc)
+quota_inode_ctx_t *
+__mq_inode_ctx_new(inode_t *inode, xlator_t *this)
{
- int32_t ret = 0;
- quota_inode_ctx_t *ctx = NULL;
- inode_contribution_t *contribution = NULL;
-
- ret = mq_inode_ctx_get (loc->inode, this, &ctx);
- if (ret < 0) {
- gf_log_callingfn (this->name, GF_LOG_WARNING,
- "cannot get marker-quota context from inode "
- "(gfid:%s, path:%s)",
- uuid_utoa (loc->inode->gfid), loc->path);
- goto err;
+ int32_t ret = -1;
+ quota_inode_ctx_t *quota_ctx = NULL;
+ marker_inode_ctx_t *mark_ctx = NULL;
+
+ ret = marker_force_inode_ctx_get(inode, this, &mark_ctx);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "marker_force_inode_ctx_get() failed");
+ goto out;
+ }
+
+ LOCK(&inode->lock);
+ {
+ if (mark_ctx->quota_ctx == NULL) {
+ quota_ctx = mq_alloc_inode_ctx();
+ if (quota_ctx == NULL) {
+ ret = -1;
+ goto unlock;
+ }
+ mark_ctx->quota_ctx = quota_ctx;
+ } else {
+ quota_ctx = mark_ctx->quota_ctx;
}
- contribution = mq_get_contribution_node (loc->parent, ctx);
- if (contribution == NULL) {
- gf_log_callingfn (this->name, GF_LOG_WARNING,
- "inode (gfid:%s, path:%s) has "
- "no contribution towards parent (gfid:%s)",
- uuid_utoa (loc->inode->gfid),
- loc->path, uuid_utoa (loc->parent->gfid));
- goto err;
- }
+ ret = 0;
+ }
+unlock:
+ UNLOCK(&inode->lock);
+out:
+ return quota_ctx;
+}
-err:
- return contribution;
+quota_inode_ctx_t *
+mq_inode_ctx_new(inode_t *inode, xlator_t *this)
+{
+ return __mq_inode_ctx_new(inode, this);
}
diff --git a/xlators/features/marker/src/marker-quota-helper.h b/xlators/features/marker/src/marker-quota-helper.h
index e019fbd52ad..d4091dd2180 100644
--- a/xlators/features/marker/src/marker-quota-helper.h
+++ b/xlators/features/marker/src/marker-quota-helper.h
@@ -1,83 +1,66 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _MARKER_QUOTA_HELPER_H
-#define _MARKER_QUOTA_HELPER
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+#ifndef _MARKER_QUOTA_HELPER_H
+#define _MARKER_QUOTA_HELPER_H
#include "marker.h"
-#define QUOTA_FREE_CONTRIBUTION_NODE(_contribution) \
- do { \
- list_del (&_contribution->contri_list); \
- GF_FREE (_contribution); \
- } while (0)
-
-#define QUOTA_SAFE_INCREMENT(lock, var) \
- do { \
- LOCK (lock); \
- var ++; \
- UNLOCK (lock); \
- } while (0)
-
-#define QUOTA_SAFE_DECREMENT(lock, var, value) \
- do { \
- LOCK (lock); \
- { \
- value = --var; \
- } \
- UNLOCK (lock); \
- } while (0)
+#define QUOTA_FREE_CONTRIBUTION_NODE(ctx, _contribution) \
+ do { \
+ LOCK(&ctx->lock); \
+ { \
+ list_del_init(&_contribution->contri_list); \
+ GF_REF_PUT(_contribution); \
+ } \
+ UNLOCK(&ctx->lock); \
+ } while (0)
+
+#define QUOTA_SAFE_INCREMENT(lock, var) \
+ do { \
+ LOCK(lock); \
+ var++; \
+ UNLOCK(lock); \
+ } while (0)
+
+#define QUOTA_SAFE_DECREMENT(lock, var, value) \
+ do { \
+ LOCK(lock); \
+ { \
+ value = --var; \
+ } \
+ UNLOCK(lock); \
+ } while (0)
inode_contribution_t *
-mq_add_new_contribution_node (xlator_t *, quota_inode_ctx_t *, loc_t *);
+mq_add_new_contribution_node(xlator_t *, quota_inode_ctx_t *, loc_t *);
int32_t
-mq_dict_set_contribution (xlator_t *, dict_t *, loc_t *);
+mq_dict_set_contribution(xlator_t *, dict_t *, loc_t *, uuid_t, char *);
quota_inode_ctx_t *
-mq_inode_ctx_new (inode_t *, xlator_t *);
-
-int32_t
-mq_inode_ctx_get (inode_t *, xlator_t *, quota_inode_ctx_t **);
+mq_inode_ctx_new(inode_t *, xlator_t *);
int32_t
-mq_delete_contribution_node (dict_t *, char *, inode_contribution_t *);
+mq_inode_ctx_get(inode_t *, xlator_t *, quota_inode_ctx_t **);
int32_t
-mq_inode_loc_fill (const char *, inode_t *, loc_t *);
-
-quota_local_t *
-mq_local_new ();
-
-quota_local_t *
-mq_local_ref (quota_local_t *);
+mq_delete_contribution_node(dict_t *, char *, inode_contribution_t *);
int32_t
-mq_local_unref (xlator_t *, quota_local_t *);
+mq_inode_loc_fill(const char *, inode_t *, loc_t *);
inode_contribution_t *
-mq_get_contribution_node (inode_t *, quota_inode_ctx_t *);
+mq_contri_init(inode_t *inode);
inode_contribution_t *
-mq_get_contribution_from_loc (xlator_t *this, loc_t *loc);
+mq_get_contribution_node(inode_t *, quota_inode_ctx_t *);
#endif
diff --git a/xlators/features/marker/src/marker-quota.c b/xlators/features/marker/src/marker-quota.c
index fdce7129e40..3de2ea1c92c 100644
--- a/xlators/features/marker/src/marker-quota.c
+++ b/xlators/features/marker/src/marker-quota.c
@@ -1,2528 +1,2297 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "dict.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "libxlator.h"
-#include "common-utils.h"
-#include "byte-order.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/byte-order.h>
#include "marker-quota.h"
#include "marker-quota-helper.h"
+#include <glusterfs/syncop.h>
+#include <glusterfs/quota-common-utils.h>
int
-mq_loc_copy (loc_t *dst, loc_t *src)
+mq_loc_copy(loc_t *dst, loc_t *src)
{
- int ret = -1;
+ int ret = -1;
- GF_VALIDATE_OR_GOTO ("marker", dst, out);
- GF_VALIDATE_OR_GOTO ("marker", src, out);
+ GF_VALIDATE_OR_GOTO("marker", dst, out);
+ GF_VALIDATE_OR_GOTO("marker", src, out);
- if (src->inode == NULL ||
- src->path == NULL) {
- gf_log ("marker", GF_LOG_WARNING,
- "src loc is not valid");
- goto out;
- }
+ if (src->inode == NULL ||
+ ((src->parent == NULL) && (gf_uuid_is_null(src->pargfid)) &&
+ !__is_root_gfid(src->inode->gfid))) {
+ gf_log("marker", GF_LOG_WARNING, "src loc is not valid");
+ goto out;
+ }
- ret = loc_copy (dst, src);
+ ret = loc_copy(dst, src);
out:
- return ret;
+ return ret;
}
-int32_t
-mq_get_local_err (quota_local_t *local,
- int32_t *val)
+static void
+mq_set_ctx_status(quota_inode_ctx_t *ctx, gf_boolean_t *flag,
+ gf_boolean_t status)
{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO ("marker", local, out);
- GF_VALIDATE_OR_GOTO ("marker", val, out);
-
- LOCK (&local->lock);
- {
- *val = local->err;
- }
- UNLOCK (&local->lock);
-
- ret = 0;
-out:
- return ret;
+ LOCK(&ctx->lock);
+ {
+ *flag = status;
+ }
+ UNLOCK(&ctx->lock);
}
-int32_t
-mq_get_ctx_updation_status (quota_inode_ctx_t *ctx,
- gf_boolean_t *status)
+static void
+mq_test_and_set_ctx_status(quota_inode_ctx_t *ctx, gf_boolean_t *flag,
+ gf_boolean_t *status)
{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO ("marker", ctx, out);
- GF_VALIDATE_OR_GOTO ("marker", status, out);
-
- LOCK (&ctx->lock);
- {
- *status = ctx->updation_status;
- }
- UNLOCK (&ctx->lock);
-
- ret = 0;
-out:
- return ret;
+ gf_boolean_t temp = _gf_false;
+
+ LOCK(&ctx->lock);
+ {
+ temp = *status;
+ *status = *flag;
+ *flag = temp;
+ }
+ UNLOCK(&ctx->lock);
}
+static void
+mq_get_ctx_status(quota_inode_ctx_t *ctx, gf_boolean_t *flag,
+ gf_boolean_t *status)
+{
+ LOCK(&ctx->lock);
+ {
+ *status = *flag;
+ }
+ UNLOCK(&ctx->lock);
+}
int32_t
-mq_set_ctx_updation_status (quota_inode_ctx_t *ctx,
- gf_boolean_t status)
+mq_get_ctx_updation_status(quota_inode_ctx_t *ctx, gf_boolean_t *status)
{
- int32_t ret = -1;
-
- if (ctx == NULL)
- goto out;
-
- LOCK (&ctx->lock);
- {
- ctx->updation_status = status;
- }
- UNLOCK (&ctx->lock);
+ GF_VALIDATE_OR_GOTO("marker", ctx, out);
+ GF_VALIDATE_OR_GOTO("marker", status, out);
- ret = 0;
+ mq_get_ctx_status(ctx, &ctx->updation_status, status);
+ return 0;
out:
- return ret;
+ return -1;
}
int32_t
-mq_test_and_set_ctx_updation_status (quota_inode_ctx_t *ctx,
- gf_boolean_t *status)
+mq_set_ctx_updation_status(quota_inode_ctx_t *ctx, gf_boolean_t status)
{
- int32_t ret = -1;
- gf_boolean_t temp = _gf_false;
-
- GF_VALIDATE_OR_GOTO ("marker", ctx, out);
- GF_VALIDATE_OR_GOTO ("marker", status, out);
-
- LOCK (&ctx->lock);
- {
- temp = *status;
- *status = ctx->updation_status;
- ctx->updation_status = temp;
- }
- UNLOCK (&ctx->lock);
+ GF_VALIDATE_OR_GOTO("marker", ctx, out);
- ret = 0;
+ mq_set_ctx_status(ctx, &ctx->updation_status, status);
+ return 0;
out:
- return ret;
+ return -1;
}
-void
-mq_assign_lk_owner (xlator_t *this, call_frame_t *frame)
+int32_t
+mq_test_and_set_ctx_updation_status(quota_inode_ctx_t *ctx,
+ gf_boolean_t *status)
{
- marker_conf_t *conf = NULL;
- uint64_t lk_owner = 0;
-
- conf = this->private;
-
- LOCK (&conf->lock);
- {
- if (++conf->quota_lk_owner == 0) {
- ++conf->quota_lk_owner;
- }
-
- lk_owner = conf->quota_lk_owner;
- }
- UNLOCK (&conf->lock);
-
- set_lk_owner_from_uint64 (&frame->root->lk_owner, lk_owner);
+ GF_VALIDATE_OR_GOTO("marker", ctx, out);
+ GF_VALIDATE_OR_GOTO("marker", status, out);
- return;
+ mq_test_and_set_ctx_status(ctx, &ctx->updation_status, status);
+ return 0;
+out:
+ return -1;
}
-
int32_t
-mq_loc_fill_from_name (xlator_t *this, loc_t *newloc, loc_t *oldloc,
- uint64_t ino, char *name)
+mq_set_ctx_create_status(quota_inode_ctx_t *ctx, gf_boolean_t status)
{
- int32_t ret = -1;
- int32_t len = 0;
- char *path = NULL;
-
- GF_VALIDATE_OR_GOTO ("marker", this, out);
- GF_VALIDATE_OR_GOTO ("marker", newloc, out);
- GF_VALIDATE_OR_GOTO ("marker", oldloc, out);
- GF_VALIDATE_OR_GOTO ("marker", name, out);
+ GF_VALIDATE_OR_GOTO("marker", ctx, out);
- newloc->inode = inode_new (oldloc->inode->table);
-
- if (!newloc->inode) {
- ret = -1;
- goto out;
- }
-
- newloc->parent = inode_ref (oldloc->inode);
- uuid_copy (newloc->pargfid, oldloc->inode->gfid);
-
- len = strlen (oldloc->path);
-
- if (oldloc->path [len - 1] == '/')
- ret = gf_asprintf ((char **) &path, "%s%s",
- oldloc->path, name);
- else
- ret = gf_asprintf ((char **) &path, "%s/%s",
- oldloc->path, name);
-
- if (ret < 0)
- goto out;
-
- newloc->path = path;
-
- newloc->name = strrchr (newloc->path, '/');
-
- if (newloc->name)
- newloc->name++;
-
- gf_log (this->name, GF_LOG_DEBUG, "path = %s name =%s",
- newloc->path, newloc->name);
+ mq_set_ctx_status(ctx, &ctx->create_status, status);
+ return 0;
out:
- return ret;
+ return -1;
}
int32_t
-mq_dirty_inode_updation_done (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+mq_test_and_set_ctx_create_status(quota_inode_ctx_t *ctx, gf_boolean_t *status)
{
- QUOTA_STACK_DESTROY (frame, this);
+ GF_VALIDATE_OR_GOTO("marker", ctx, out);
+ GF_VALIDATE_OR_GOTO("marker", status, out);
- return 0;
+ mq_test_and_set_ctx_status(ctx, &ctx->create_status, status);
+ return 0;
+out:
+ return -1;
}
-int32_t
-mq_release_lock_on_dirty_inode (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+static void
+mq_set_ctx_dirty_status(quota_inode_ctx_t *ctx, gf_boolean_t status)
{
- struct gf_flock lock = {0, };
- quota_local_t *local = NULL;
- loc_t loc = {0, };
- int ret = -1;
-
- local = frame->local;
+ GF_VALIDATE_OR_GOTO("marker", ctx, out);
- if (op_ret == -1) {
- local->err = -1;
-
- mq_dirty_inode_updation_done (frame, NULL, this, 0, 0, NULL);
-
- return 0;
- }
-
- if (op_ret == 0)
- local->ctx->dirty = 0;
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- ret = loc_copy (&loc, &local->loc);
- if (ret == -1) {
- local->err = -1;
- frame->local = NULL;
- mq_dirty_inode_updation_done (frame, NULL, this, 0, 0, NULL);
- return 0;
- }
-
- if (local->loc.inode == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "Inode is NULL, so can't stackwind.");
- goto out;
- }
-
- STACK_WIND (frame,
- mq_dirty_inode_updation_done,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- this->name, &loc, F_SETLKW, &lock, NULL);
-
- loc_wipe (&loc);
-
- return 0;
+ mq_set_ctx_status(ctx, &ctx->dirty_status, status);
out:
- mq_dirty_inode_updation_done (frame, NULL, this, -1, 0, NULL);
-
- return 0;
+ return;
}
-int32_t
-mq_mark_inode_undirty (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
+int
+mq_build_ancestry(xlator_t *this, loc_t *loc)
{
- int32_t ret = -1;
- int64_t *size = NULL;
- dict_t *newdict = NULL;
- quota_local_t *local = NULL;
-
- local = (quota_local_t *) frame->local;
-
- if (op_ret == -1)
- goto err;
-
- if (!dict)
- goto wind;
-
- ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size);
- if (ret)
- goto wind;
+ int32_t ret = -1;
+ fd_t *fd = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ dict_t *xdata = NULL;
+ inode_t *tmp_parent = NULL;
+ inode_t *tmp_inode = NULL;
+ inode_t *linked_inode = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ INIT_LIST_HEAD(&entries.list);
+
+ xdata = dict_new();
+ if (xdata == NULL) {
+ gf_log(this->name, GF_LOG_ERROR, "dict_new failed");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_int8(xdata, GET_ANCESTRY_DENTRY_KEY, 1);
+ if (ret < 0)
+ goto out;
+
+ fd = fd_anonymous(loc->inode);
+ if (fd == NULL) {
+ gf_log(this->name, GF_LOG_ERROR, "fd creation failed");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ fd_bind(fd);
+
+ ret = syncop_readdirp(this, fd, 131072, 0, &entries, xdata, NULL);
+ if (ret < 0) {
+ gf_log(this->name,
+ (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR,
+ "readdirp failed "
+ "for %s: %s",
+ loc->path, strerror(-ret));
+ goto out;
+ }
+
+ if (list_empty(&entries.list)) {
+ ret = -1;
+ goto out;
+ }
+
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ if (__is_root_gfid(entry->inode->gfid)) {
+ /* The list contains a sub-list for each possible path
+ * to the target inode. Each sub-list starts with the
+ * root entry of the tree and is followed by the child
+ * entries for a particular path to the target entry.
+ * The root entry is an implied sub-list delimiter,
+ * as it denotes we have started processing a new path.
+ * Reset the parent pointer and continue
+ */
+
+ tmp_parent = NULL;
+ } else {
+ linked_inode = inode_link(entry->inode, tmp_parent, entry->d_name,
+ &entry->d_stat);
+ if (linked_inode) {
+ tmp_inode = entry->inode;
+ entry->inode = linked_inode;
+ inode_unref(tmp_inode);
+ } else {
+ gf_log(this->name, GF_LOG_ERROR, "inode link failed");
+ ret = -EINVAL;
+ goto out;
+ }
+ }
- LOCK (&local->ctx->lock);
- {
- local->ctx->size = ntoh64 (*size);
+ ctx = mq_inode_ctx_new(entry->inode, this);
+ if (ctx == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "mq_inode_ctx_new "
+ "failed for %s",
+ uuid_utoa(entry->inode->gfid));
+ ret = -ENOMEM;
+ goto out;
}
- UNLOCK (&local->ctx->lock);
-wind:
- newdict = dict_new ();
- if (!newdict)
- goto err;
+ /* For non-directory, posix_get_ancestry_non_directory returns
+ * all hard-links that are represented by nodes adjacent to
+ * each other in the dentry-list.
+ * (Unlike the directory case where adjacent nodes either have
+ * a parent/child relationship or belong to different paths).
+ */
+ if (entry->inode->ia_type == IA_IFDIR)
+ tmp_parent = entry->inode;
+ }
- ret = dict_set_int8 (newdict, QUOTA_DIRTY_KEY, 0);
- if (ret)
- goto err;
+ if (loc->parent)
+ inode_unref(loc->parent);
- if (uuid_is_null (local->loc.gfid))
- uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+ loc->parent = inode_parent(loc->inode, 0, NULL);
+ if (loc->parent == NULL) {
+ ret = -1;
+ goto out;
+ }
- GF_UUID_ASSERT (local->loc.gfid);
- STACK_WIND (frame, mq_release_lock_on_dirty_inode,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- &local->loc, newdict, 0, NULL);
- ret = 0;
+ ret = 0;
-err:
- if (op_ret == -1 || ret == -1) {
- local->err = -1;
+out:
+ gf_dirent_free(&entries);
- mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
- }
+ if (fd)
+ fd_unref(fd);
- if (newdict)
- dict_unref (newdict);
+ if (xdata)
+ dict_unref(xdata);
- return 0;
+ return ret;
}
-int32_t
-mq_update_size_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
+/* This function should be used only in inspect_directory and inspect_file
+ * function to heal quota xattrs.
+ * Inode quota feature is introduced in 3.7.
+ * If gluster setup is upgraded from 3.6 to 3.7, there can be a
+ * getxattr and setxattr spikes with quota heal as inode quota is missing.
+ * So this wrapper function is to avoid xattrs spikes during upgrade.
+ * This function returns success even is inode-quota xattrs are missing and
+ * hence no healing performed.
+ */
+static int32_t
+_quota_dict_get_meta(xlator_t *this, dict_t *dict, char *key, const int keylen,
+ quota_meta_t *meta, ia_type_t ia_type,
+ gf_boolean_t add_delta)
{
- int32_t ret = -1;
- dict_t *new_dict = NULL;
- int64_t *size = NULL;
- int64_t *delta = NULL;
- quota_local_t *local = NULL;
-
- local = frame->local;
-
- if (op_ret == -1)
- goto err;
-
- if (dict == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "Dict is null while updating the size xattr %s",
- local->loc.path?local->loc.path:"");
- goto err;
- }
-
- ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size);
- if (!size) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to get the size, %s",
- local->loc.path?local->loc.path:"");
- goto err;
- }
+ int32_t ret = 0;
+ marker_conf_t *priv = NULL;
- QUOTA_ALLOC_OR_GOTO (delta, int64_t, ret, err);
+ priv = this->private;
- *delta = hton64 (local->sum - ntoh64 (*size));
-
- gf_log (this->name, GF_LOG_DEBUG, "calculated size = %"PRId64", "
- "original size = %"PRIu64
- " path = %s diff = %"PRIu64, local->sum, ntoh64 (*size),
- local->loc.path, ntoh64 (*delta));
-
- new_dict = dict_new ();
- if (!new_dict);
-
- ret = dict_set_bin (new_dict, QUOTA_SIZE_KEY, delta, 8);
- if (ret)
- goto err;
-
- if (uuid_is_null (local->loc.gfid))
- uuid_copy (local->loc.gfid, buf->ia_gfid);
-
- GF_UUID_ASSERT (local->loc.gfid);
-
- STACK_WIND (frame, mq_mark_inode_undirty, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop, &local->loc,
- GF_XATTROP_ADD_ARRAY64, new_dict, NULL);
+ ret = quota_dict_get_inode_meta(dict, key, keylen, meta);
+ if (ret == -2 && (priv->feature_enabled & GF_INODE_QUOTA) == 0) {
+ /* quota_dict_get_inode_meta returns -2 if
+ * inode quota xattrs are not present.
+ * if inode quota self heal is turned off,
+ * then we should skip healing inode quotas
+ */
+ gf_log(this->name, GF_LOG_DEBUG,
+ "inode quota disabled. "
+ "inode quota self heal will not be performed");
ret = 0;
-
-err:
- if (op_ret == -1 || ret == -1) {
- local->err = -1;
-
- mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
+ if (add_delta) {
+ if (ia_type == IA_IFDIR)
+ meta->dir_count = 1;
+ else
+ meta->file_count = 1;
}
+ }
- if (new_dict)
- dict_unref (new_dict);
-
- return 0;
+ return ret;
}
int32_t
-mq_test_and_set_local_err(quota_local_t *local,
- int32_t *val)
+quota_dict_set_size_meta(xlator_t *this, dict_t *dict, const quota_meta_t *meta)
{
- int tmp = 0;
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO ("marker", local, out);
- GF_VALIDATE_OR_GOTO ("marker", val, out);
-
- LOCK (&local->lock);
- {
- tmp = local->err;
- local->err = *val;
- *val = tmp;
- }
- UNLOCK (&local->lock);
-
- ret = 0;
+ int32_t ret = -ENOMEM;
+ quota_meta_t *value = NULL;
+ char size_key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+
+ value = GF_MALLOC(2 * sizeof(quota_meta_t), gf_common_quota_meta_t);
+ if (value == NULL) {
+ goto out;
+ }
+ value[0].size = hton64(meta->size);
+ value[0].file_count = hton64(meta->file_count);
+ value[0].dir_count = hton64(meta->dir_count);
+
+ value[1].size = 0;
+ value[1].file_count = 0;
+ value[1].dir_count = hton64(1);
+
+ GET_SIZE_KEY(this, size_key, ret);
+ if (ret < 0)
+ goto out;
+ ret = dict_set_bin(dict, size_key, value, (sizeof(quota_meta_t) * 2));
+ if (ret < 0) {
+ gf_log_callingfn("quota", GF_LOG_ERROR, "dict set failed");
+ GF_FREE(value);
+ }
out:
- return ret;
+ return ret;
}
-int32_t
-mq_get_dirty_inode_size (call_frame_t *frame, xlator_t *this)
+void
+mq_compute_delta(quota_meta_t *delta, const quota_meta_t *op1,
+ const quota_meta_t *op2)
{
- int32_t ret = -1;
- dict_t *dict = NULL;
- quota_local_t *local = NULL;
-
- local = (quota_local_t *) frame->local;
-
- dict = dict_new ();
- if (!dict) {
- ret = -1;
- goto err;
- }
-
- ret = dict_set_int64 (dict, QUOTA_SIZE_KEY, 0);
- if (ret)
- goto err;
-
- if (uuid_is_null (local->loc.gfid))
- uuid_copy (local->loc.gfid, local->loc.inode->gfid);
-
- GF_UUID_ASSERT (local->loc.gfid);
-
- STACK_WIND (frame, mq_update_size_xattr, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, &local->loc, dict);
- ret =0;
-
-err:
- if (ret) {
- local->err = -1;
-
- mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
- }
+ delta->size = op1->size - op2->size;
+ delta->file_count = op1->file_count - op2->file_count;
+ delta->dir_count = op1->dir_count - op2->dir_count;
+}
- if (dict)
- dict_unref (dict);
+void
+mq_add_meta(quota_meta_t *dst, const quota_meta_t *src)
+{
+ dst->size += src->size;
+ dst->file_count += src->file_count;
+ dst->dir_count += src->dir_count;
+}
- return 0;
+void
+mq_sub_meta(quota_meta_t *dst, const quota_meta_t *src)
+{
+ if (src == NULL) {
+ dst->size = -dst->size;
+ dst->file_count = -dst->file_count;
+ dst->dir_count = -dst->dir_count;
+ } else {
+ dst->size = src->size - dst->size;
+ dst->file_count = src->file_count - dst->file_count;
+ dst->dir_count = src->dir_count - dst->dir_count;
+ }
}
int32_t
-mq_get_child_contribution (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
+mq_are_xattrs_set(xlator_t *this, loc_t *loc, gf_boolean_t *contri_set,
+ gf_boolean_t *size_set)
{
- int32_t ret = -1;
- int32_t val = 0;
- char contri_key [512] = {0, };
- int64_t *contri = NULL;
- quota_local_t *local = NULL;
-
- local = frame->local;
-
- frame->local = NULL;
-
- QUOTA_STACK_DESTROY (frame, this);
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s",
- strerror (op_errno));
- val = -2;
- if (!mq_test_and_set_local_err (local, &val) &&
- val != -2)
- mq_release_lock_on_dirty_inode (local->frame, NULL,
- this, 0, 0, NULL);
-
- goto exit;
- }
-
- ret = mq_get_local_err (local, &val);
- if (!ret && val == -2)
- goto exit;
-
- GET_CONTRI_KEY (contri_key, local->loc.inode->gfid, ret);
+ int32_t ret = -1;
+ char contri_key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+ char size_key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+ quota_meta_t meta = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+
+ dict = dict_new();
+ if (dict == NULL) {
+ gf_log(this->name, GF_LOG_ERROR, "dict_new failed");
+ goto out;
+ }
+
+ ret = mq_req_xattr(this, loc, dict, contri_key, size_key);
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, dict, &rsp_dict);
+ if (ret < 0) {
+ gf_log_callingfn(
+ this->name,
+ (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR,
+ "lookup failed "
+ "for %s: %s",
+ loc->path, strerror(-ret));
+ goto out;
+ }
+
+ if (rsp_dict == NULL)
+ goto out;
+
+ *contri_set = _gf_true;
+ *size_set = _gf_true;
+ if (loc->inode->ia_type == IA_IFDIR) {
+ ret = quota_dict_get_inode_meta(rsp_dict, size_key, strlen(size_key),
+ &meta);
+ if (ret < 0 || meta.dir_count == 0)
+ *size_set = _gf_false;
+ }
+
+ if (!loc_is_root(loc)) {
+ ret = quota_dict_get_inode_meta(rsp_dict, contri_key,
+ strlen(contri_key), &meta);
if (ret < 0)
- goto out;
-
- if (!dict)
- goto out;
-
- if (dict_get_bin (dict, contri_key, (void **) &contri) == 0)
- local->sum += ntoh64 (*contri);
+ *contri_set = _gf_false;
+ }
+ ret = 0;
out:
- LOCK (&local->lock);
- {
- val = --local->dentry_child_count;
- }
- UNLOCK (&local->lock);
+ if (dict)
+ dict_unref(dict);
- if (val == 0) {
- mq_dirty_inode_readdir (local->frame, NULL, this,
- 0, 0, NULL, NULL);
- }
- mq_local_unref (this, local);
+ if (rsp_dict)
+ dict_unref(rsp_dict);
- return 0;
-exit:
- mq_local_unref (this, local);
- return 0;
+ return ret;
}
int32_t
-mq_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *entries, dict_t *xdata)
+mq_create_size_xattrs(xlator_t *this, quota_inode_ctx_t *ctx, loc_t *loc)
{
- char contri_key [512] = {0, };
- int32_t ret = 0;
- int32_t val = 0;
- off_t offset = 0;
- int32_t count = 0;
- dict_t *dict = NULL;
- quota_local_t *local = NULL;
- gf_dirent_t *entry = NULL;
- call_frame_t *newframe = NULL;
- loc_t loc = {0, };
-
- local = mq_local_ref (frame->local);
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "readdir failed %s", strerror (op_errno));
- local->err = -1;
-
- mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
-
- goto end;
- } else if (op_ret == 0) {
- mq_get_dirty_inode_size (frame, this);
-
- goto end;
- }
-
- local->dentry_child_count = 0;
-
- list_for_each_entry (entry, (&entries->list), list) {
- gf_log (this->name, GF_LOG_DEBUG, "entry = %s", entry->d_name);
-
- if ((!strcmp (entry->d_name, ".")) || (!strcmp (entry->d_name,
- ".."))) {
- gf_log (this->name, GF_LOG_DEBUG, "entry = %s",
- entry->d_name);
- continue;
- }
-
- offset = entry->d_off;
- count++;
- }
-
- if (count == 0) {
- mq_get_dirty_inode_size (frame, this);
- goto end;
-
- }
-
- local->frame = frame;
+ int32_t ret = -1;
+ quota_meta_t size = {
+ 0,
+ };
+ dict_t *dict = NULL;
- LOCK (&local->lock);
- {
- local->dentry_child_count = count;
- local->d_off = offset;
- }
- UNLOCK (&local->lock);
-
-
- list_for_each_entry (entry, (&entries->list), list) {
- gf_log (this->name, GF_LOG_DEBUG, "entry = %s", entry->d_name);
-
- if ((!strcmp (entry->d_name, ".")) || (!strcmp (entry->d_name,
- ".."))) {
- gf_log (this->name, GF_LOG_DEBUG, "entry = %s",
- entry->d_name);
- continue;
- }
+ GF_VALIDATE_OR_GOTO("marker", loc, out);
+ GF_VALIDATE_OR_GOTO("marker", loc->inode, out);
- ret = mq_loc_fill_from_name (this, &loc, &local->loc,
- entry->d_ino, entry->d_name);
- if (ret < 0)
- goto out;
-
- ret = 0;
-
- LOCK (&local->lock);
- {
- if (local->err != -2) {
- newframe = copy_frame (frame);
- if (!newframe) {
- ret = -1;
- }
- } else
- ret = -1;
- }
- UNLOCK (&local->lock);
-
- if (ret == -1)
- goto out;
-
- newframe->local = mq_local_ref (local);
-
- dict = dict_new ();
- if (!dict) {
- ret = -1;
- goto out;
- }
-
- GET_CONTRI_KEY (contri_key, local->loc.inode->gfid, ret);
- if (ret < 0)
- goto out;
-
- ret = dict_set_int64 (dict, contri_key, 0);
- if (ret)
- goto out;
-
- STACK_WIND (newframe,
- mq_get_child_contribution,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- &loc, dict);
-
- offset = entry->d_off;
-
- loc_wipe (&loc);
-
- newframe = NULL;
-
- out:
- if (dict) {
- dict_unref (dict);
- dict = NULL;
- }
-
- if (ret) {
- val = -2;
- mq_test_and_set_local_err (local, &val);
-
- if (newframe) {
- newframe->local = NULL;
- mq_local_unref(this, local);
- QUOTA_STACK_DESTROY (newframe, this);
- }
-
- break;
- }
- }
+ if (loc->inode->ia_type != IA_IFDIR) {
+ ret = 0;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_log(this->name, GF_LOG_ERROR, "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = quota_dict_set_size_meta(this, dict, &size);
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_xattrop(FIRST_CHILD(this), loc,
+ GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT, dict, NULL, NULL,
+ NULL);
+
+ if (ret < 0) {
+ gf_log_callingfn(
+ this->name,
+ (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR,
+ "xattrop failed "
+ "for %s: %s",
+ loc->path, strerror(-ret));
+ goto out;
+ }
- if (ret && val != -2) {
- mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
- }
-end:
- mq_local_unref (this, local);
+out:
+ if (dict)
+ dict_unref(dict);
- return 0;
+ return ret;
}
int32_t
-mq_dirty_inode_readdir (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd, dict_t *xdata)
+mq_lock(xlator_t *this, loc_t *loc, short l_type)
{
- quota_local_t *local = NULL;
-
- local = frame->local;
-
- if (op_ret == -1) {
- local->err = -1;
- mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
- return 0;
- }
-
- if (local->fd == NULL)
- local->fd = fd_ref (fd);
+ struct gf_flock lock = {
+ 0,
+ };
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("marker", loc, out);
+ GF_VALIDATE_OR_GOTO("marker", loc->inode, out);
+
+ gf_log(this->name, GF_LOG_DEBUG, "set lock type %d on %s", l_type,
+ loc->path);
+
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = l_type;
+ lock.l_whence = SEEK_SET;
+
+ ret = syncop_inodelk(FIRST_CHILD(this), this->name, loc, F_SETLKW, &lock,
+ NULL, NULL);
+ if (ret < 0)
+ gf_log_callingfn(
+ this->name,
+ (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR,
+ "inodelk failed "
+ "for %s: %s",
+ loc->path, strerror(-ret));
- STACK_WIND (frame,
- mq_readdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir,
- local->fd, READDIR_BUF, local->d_off, xdata);
+out:
- return 0;
+ return ret;
}
int32_t
-mq_check_if_still_dirty (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *dict,
- struct iatt *postparent)
+mq_get_dirty(xlator_t *this, loc_t *loc, int32_t *dirty)
{
- int8_t dirty = -1;
- int32_t ret = -1;
- fd_t *fd = NULL;
- quota_local_t *local = NULL;
-
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "failed to get "
- "the dirty xattr for %s", local->loc.path);
- goto err;
- }
-
- if (!dict) {
- ret = -1;
- goto err;
- }
-
- ret = dict_get_int8 (dict, QUOTA_DIRTY_KEY, &dirty);
- if (ret)
- goto err;
-
- //the inode is not dirty anymore
- if (dirty == 0) {
- mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
-
- return 0;
- }
-
- fd = fd_create (local->loc.inode, frame->root->pid);
-
- local->d_off = 0;
-
- if (uuid_is_null (local->loc.gfid))
- uuid_copy (local->loc.gfid, buf->ia_gfid);
-
- GF_UUID_ASSERT (local->loc.gfid);
- STACK_WIND(frame,
- mq_dirty_inode_readdir,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir,
- &local->loc, fd, NULL);
+ int32_t ret = -1;
+ int8_t value = 0;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ dict = dict_new();
+ if (dict == NULL) {
+ gf_log(this->name, GF_LOG_ERROR, "dict_new failed");
+ goto out;
+ }
+
+ ret = dict_set_int64(dict, QUOTA_DIRTY_KEY, 0);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "dict set failed");
+ goto out;
+ }
+
+ ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, dict, &rsp_dict);
+ if (ret < 0) {
+ gf_log_callingfn(
+ this->name,
+ (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR,
+ "lookup failed "
+ "for %s: %s",
+ loc->path, strerror(-ret));
+ goto out;
+ }
+
+ ret = dict_get_int8(rsp_dict, QUOTA_DIRTY_KEY, &value);
+ if (ret < 0)
+ goto out;
+
+ *dirty = value;
- ret = 0;
-
-err:
- if (op_ret == -1 || ret == -1) {
- local->err = -1;
- mq_release_lock_on_dirty_inode (frame, NULL, this, 0, 0, NULL);
- }
+out:
+ if (dict)
+ dict_unref(dict);
- if (fd != NULL) {
- fd_unref (fd);
- }
+ if (rsp_dict)
+ dict_unref(rsp_dict);
- return 0;
+ return ret;
}
int32_t
-mq_get_dirty_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+mq_get_set_dirty(xlator_t *this, loc_t *loc, int32_t dirty, int32_t *prev_dirty)
{
- int32_t ret = -1;
- dict_t *xattr_req = NULL;
- quota_local_t *local = NULL;
-
- if (op_ret == -1) {
- mq_dirty_inode_updation_done (frame, NULL, this, 0, 0, NULL);
- return 0;
- }
-
- local = frame->local;
-
- xattr_req = dict_new ();
- if (xattr_req == NULL) {
- ret = -1;
- goto err;
- }
-
- ret = dict_set_int8 (xattr_req, QUOTA_DIRTY_KEY, 0);
- if (ret)
- goto err;
+ int32_t ret = -1;
+ int8_t value = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+
+ GF_VALIDATE_OR_GOTO("marker", loc, out);
+ GF_VALIDATE_OR_GOTO("marker", loc->inode, out);
+ GF_VALIDATE_OR_GOTO("marker", prev_dirty, out);
+
+ ret = mq_inode_ctx_get(loc->inode, this, &ctx);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to get inode ctx for "
+ "%s",
+ loc->path);
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_log(this->name, GF_LOG_ERROR, "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int8(dict, QUOTA_DIRTY_KEY, dirty);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "dict_set failed");
+ goto out;
+ }
+
+ ret = syncop_xattrop(FIRST_CHILD(this), loc, GF_XATTROP_GET_AND_SET, dict,
+ NULL, NULL, &rsp_dict);
+ if (ret < 0) {
+ gf_log_callingfn(
+ this->name,
+ (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR,
+ "xattrop failed "
+ "for %s: %s",
+ loc->path, strerror(-ret));
+ goto out;
+ }
+
+ *prev_dirty = 0;
+ if (rsp_dict) {
+ ret = dict_get_int8(rsp_dict, QUOTA_DIRTY_KEY, &value);
+ if (ret == 0)
+ *prev_dirty = value;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ ctx->dirty = dirty;
+ }
+ UNLOCK(&ctx->lock);
+ ret = 0;
+out:
+ if (dict)
+ dict_unref(dict);
- if (uuid_is_null (local->loc.gfid))
- uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+ if (rsp_dict)
+ dict_unref(rsp_dict);
- GF_UUID_ASSERT (local->loc.gfid);
+ return ret;
+}
- STACK_WIND (frame,
- mq_check_if_still_dirty,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- &local->loc,
- xattr_req);
+int32_t
+mq_mark_dirty(xlator_t *this, loc_t *loc, int32_t dirty)
+{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO("marker", loc, out);
+ GF_VALIDATE_OR_GOTO("marker", loc->inode, out);
+
+ ret = mq_inode_ctx_get(loc->inode, this, &ctx);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to get inode ctx for "
+ "%s",
+ loc->path);
ret = 0;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ gf_log(this->name, GF_LOG_ERROR, "dict_new failed");
+ goto out;
+ }
+
+ ret = dict_set_int8(dict, QUOTA_DIRTY_KEY, dirty);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "dict_set failed");
+ goto out;
+ }
+
+ ret = syncop_setxattr(FIRST_CHILD(this), loc, dict, 0, NULL, NULL);
+ if (ret < 0) {
+ gf_log_callingfn(
+ this->name,
+ (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR,
+ "setxattr dirty = %d "
+ "failed for %s: %s",
+ dirty, loc->path, strerror(-ret));
+ goto out;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ ctx->dirty = dirty;
+ }
+ UNLOCK(&ctx->lock);
-err:
- if (ret) {
- local->err = -1;
- mq_release_lock_on_dirty_inode(frame, NULL, this, 0, 0, NULL);
- }
-
- if (xattr_req)
- dict_unref (xattr_req);
+out:
+ if (dict)
+ dict_unref(dict);
- return 0;
+ return ret;
}
-/* return 1 when dirty updation started
- * 0 other wise
- */
int32_t
-mq_update_dirty_inode (xlator_t *this,
- loc_t *loc,
- quota_inode_ctx_t *ctx,
- inode_contribution_t *contribution)
+_mq_get_metadata(xlator_t *this, loc_t *loc, quota_meta_t *contri,
+ quota_meta_t *size, uuid_t contri_gfid)
{
- int32_t ret = -1;
- quota_local_t *local = NULL;
- gf_boolean_t status = _gf_false;
- struct gf_flock lock = {0, };
- call_frame_t *frame = NULL;
-
- ret = mq_get_ctx_updation_status (ctx, &status);
- if (ret == -1 || status == _gf_true) {
- ret = 0;
- goto out;
- }
-
- frame = create_frame (this, this->ctx->pool);
- if (frame == NULL) {
- ret = -1;
- goto out;
+ int32_t ret = -1;
+ quota_meta_t meta = {
+ 0,
+ };
+ char contri_key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+ char size_key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+ int keylen = 0;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("marker", loc, out);
+ GF_VALIDATE_OR_GOTO("marker", loc->inode, out);
+
+ if (size == NULL && contri == NULL)
+ goto out;
+
+ dict = dict_new();
+ if (dict == NULL) {
+ gf_log(this->name, GF_LOG_ERROR, "dict_new failed");
+ goto out;
+ }
+
+ if (size && loc->inode->ia_type == IA_IFDIR) {
+ GET_SIZE_KEY(this, size_key, keylen);
+ if (keylen < 0)
+ goto out;
+ ret = dict_set_int64(dict, size_key, 0);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "dict_set failed.");
+ goto out;
}
+ }
- mq_assign_lk_owner (this, frame);
-
- local = mq_local_new ();
- if (local == NULL)
- goto fr_destroy;
-
- frame->local = local;
- ret = mq_loc_copy (&local->loc, loc);
+ if (contri && !loc_is_root(loc)) {
+ ret = mq_dict_set_contribution(this, dict, loc, contri_gfid,
+ contri_key);
if (ret < 0)
- goto fr_destroy;
-
- local->ctx = ctx;
-
- local->contri = contribution;
+ goto out;
+ }
+
+ ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL, dict, &rsp_dict);
+ if (ret < 0) {
+ gf_log_callingfn(
+ this->name,
+ (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR,
+ "lookup failed "
+ "for %s: %s",
+ loc->path, strerror(-ret));
+ goto out;
+ }
+
+ if (size) {
+ if (loc->inode->ia_type == IA_IFDIR) {
+ ret = quota_dict_get_meta(rsp_dict, size_key, keylen, &meta);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "dict_get failed.");
+ goto out;
+ }
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
+ size->size = meta.size;
+ size->file_count = meta.file_count;
+ size->dir_count = meta.dir_count;
+ } else {
+ size->size = stbuf.ia_blocks * 512;
+ size->file_count = 1;
+ size->dir_count = 0;
+ }
+ }
- if (local->loc.inode == NULL) {
- ret = -1;
- gf_log (this->name, GF_LOG_WARNING,
- "Inode is NULL, so can't stackwind.");
- goto fr_destroy;
+ if (contri && !loc_is_root(loc)) {
+ ret = quota_dict_get_meta(rsp_dict, contri_key, strlen(contri_key),
+ &meta);
+ if (ret < 0) {
+ contri->size = 0;
+ contri->file_count = 0;
+ contri->dir_count = 0;
+ } else {
+ contri->size = meta.size;
+ contri->file_count = meta.file_count;
+ contri->dir_count = meta.dir_count;
}
+ }
- STACK_WIND (frame,
- mq_get_dirty_xattr,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->loc, F_SETLKW, &lock, NULL);
- return 1;
+ ret = 0;
-fr_destroy:
- QUOTA_STACK_DESTROY (frame, this);
out:
+ if (dict)
+ dict_unref(dict);
- return 0;
-}
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+ return ret;
+}
int32_t
-mq_inode_creation_done (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+mq_get_metadata(xlator_t *this, loc_t *loc, quota_meta_t *contri,
+ quota_meta_t *size, quota_inode_ctx_t *ctx,
+ inode_contribution_t *contribution)
{
- quota_local_t *local = NULL;
+ int32_t ret = -1;
- if (frame == NULL)
- return 0;
+ GF_VALIDATE_OR_GOTO("marker", loc, out);
+ GF_VALIDATE_OR_GOTO("marker", loc->inode, out);
+ GF_VALIDATE_OR_GOTO("marker", ctx, out);
+ GF_VALIDATE_OR_GOTO("marker", contribution, out);
- local = frame->local;
+ if (size == NULL && contri == NULL) {
+ ret = 0;
+ goto out;
+ }
- if (local != NULL) {
- mq_initiate_quota_txn (this, &local->loc);
+ ret = _mq_get_metadata(this, loc, contri, size, contribution->gfid);
+ if (ret < 0)
+ goto out;
+
+ if (size) {
+ LOCK(&ctx->lock);
+ {
+ ctx->size = size->size;
+ ctx->file_count = size->file_count;
+ ctx->dir_count = size->dir_count;
}
+ UNLOCK(&ctx->lock);
+ }
- QUOTA_STACK_DESTROY (frame, this);
+ if (contri) {
+ LOCK(&contribution->lock);
+ {
+ contribution->contribution = contri->size;
+ contribution->file_count = contri->file_count;
+ contribution->dir_count = contri->dir_count;
+ }
+ UNLOCK(&contribution->lock);
+ }
- return 0;
+out:
+ return ret;
}
-
int32_t
-mq_xattr_creation_release_lock (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+mq_get_delta(xlator_t *this, loc_t *loc, quota_meta_t *delta,
+ quota_inode_ctx_t *ctx, inode_contribution_t *contribution)
{
- struct gf_flock lock = {0, };
- quota_local_t *local = NULL;
+ int32_t ret = -1;
+ quota_meta_t size = {
+ 0,
+ };
+ quota_meta_t contri = {
+ 0,
+ };
- local = frame->local;
+ GF_VALIDATE_OR_GOTO("marker", loc, out);
+ GF_VALIDATE_OR_GOTO("marker", loc->inode, out);
+ GF_VALIDATE_OR_GOTO("marker", ctx, out);
+ GF_VALIDATE_OR_GOTO("marker", contribution, out);
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
+ ret = mq_get_metadata(this, loc, &contri, &size, ctx, contribution);
+ if (ret < 0)
+ goto out;
- STACK_WIND (frame,
- mq_inode_creation_done,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->loc,
- F_SETLKW, &lock, NULL);
+ mq_compute_delta(delta, &size, &contri);
- return 0;
+out:
+ return ret;
}
-
int32_t
-mq_create_dirty_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
+mq_remove_contri(xlator_t *this, loc_t *loc, quota_inode_ctx_t *ctx,
+ inode_contribution_t *contri, quota_meta_t *delta,
+ uint32_t nlink)
{
- int32_t ret = -1;
- dict_t *newdict = NULL;
- quota_local_t *local = NULL;
-
- if (op_ret < 0) {
- goto err;
- }
+ int32_t ret = -1;
+ char contri_key[QUOTA_KEY_MAX] = {
+ 0,
+ };
- local = frame->local;
-
- if (local->loc.inode->ia_type == IA_IFDIR) {
- newdict = dict_new ();
- if (!newdict) {
- goto err;
- }
-
- ret = dict_set_int8 (newdict, QUOTA_DIRTY_KEY, 0);
- if (ret == -1) {
- goto err;
- }
-
- uuid_copy (local->loc.gfid, local->loc.inode->gfid);
- GF_UUID_ASSERT (local->loc.gfid);
-
- STACK_WIND (frame, mq_xattr_creation_release_lock,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- &local->loc, newdict, 0, NULL);
+ if (nlink == 1) {
+ /*File was a last link and has been deleted */
+ ret = 0;
+ goto done;
+ }
+
+ GET_CONTRI_KEY(this, contri_key, contri->gfid, ret);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "get contri_key "
+ "failed for %s",
+ uuid_utoa(contri->gfid));
+ goto out;
+ }
+
+ ret = syncop_removexattr(FIRST_CHILD(this), loc, contri_key, 0, NULL);
+ if (ret < 0) {
+ if (-ret == ENOENT || -ret == ESTALE || -ret == ENODATA ||
+ -ret == ENOATTR) {
+ /* Remove contri in done when unlink operation is
+ * performed, so return success on ENOENT/ESTSLE
+ * rename operation removes xattr earlier,
+ * so return success on ENODATA
+ */
+ ret = 0;
} else {
- mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
+ gf_log_callingfn(this->name, GF_LOG_ERROR,
+ "removexattr %s failed for %s: %s", contri_key,
+ loc->path, strerror(-ret));
+ goto out;
}
+ }
- ret = 0;
+done:
+ LOCK(&contri->lock);
+ {
+ contri->contribution += delta->size;
+ contri->file_count += delta->file_count;
+ contri->dir_count += delta->dir_count;
+ }
+ UNLOCK(&contri->lock);
-err:
- if (ret < 0) {
- mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
- }
+ ret = 0;
- if (newdict != NULL)
- dict_unref (newdict);
+out:
+ QUOTA_FREE_CONTRIBUTION_NODE(ctx, contri);
- return 0;
+ return ret;
}
-
int32_t
-mq_create_xattr (xlator_t *this, call_frame_t *frame)
+mq_update_contri(xlator_t *this, loc_t *loc, inode_contribution_t *contri,
+ quota_meta_t *delta)
{
- int32_t ret = 0;
- int64_t *value = NULL;
- int64_t *size = NULL;
- dict_t *dict = NULL;
- char key[512] = {0, };
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
- inode_contribution_t *contri = NULL;
-
- if (frame == NULL || this == NULL)
- return 0;
-
- local = frame->local;
-
- ret = mq_inode_ctx_get (local->loc.inode, this, &ctx);
- if (ret < 0) {
- ctx = mq_inode_ctx_new (local->loc.inode, this);
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "mq_inode_ctx_new failed");
- ret = -1;
- goto out;
- }
- }
-
- dict = dict_new ();
- if (!dict)
- goto out;
-
- if (local->loc.inode->ia_type == IA_IFDIR) {
- QUOTA_ALLOC_OR_GOTO (size, int64_t, ret, err);
- ret = dict_set_bin (dict, QUOTA_SIZE_KEY, size, 8);
- if (ret < 0)
- goto free_size;
- }
-
- if (strcmp (local->loc.path, "/") != 0) {
- contri = mq_add_new_contribution_node (this, ctx, &local->loc);
- if (contri == NULL)
- goto err;
-
- QUOTA_ALLOC_OR_GOTO (value, int64_t, ret, err);
- GET_CONTRI_KEY (key, local->loc.parent->gfid, ret);
-
- ret = dict_set_bin (dict, key, value, 8);
- if (ret < 0)
- goto free_value;
- }
-
- GF_UUID_ASSERT (local->loc.gfid);
-
- STACK_WIND (frame, mq_create_dirty_xattr, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop, &local->loc,
- GF_XATTROP_ADD_ARRAY64, dict, NULL);
+ int32_t ret = -1;
+ char contri_key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+ dict_t *dict = NULL;
+
+ GF_VALIDATE_OR_GOTO("marker", loc, out);
+ GF_VALIDATE_OR_GOTO("marker", loc->inode, out);
+ GF_VALIDATE_OR_GOTO("marker", delta, out);
+ GF_VALIDATE_OR_GOTO("marker", contri, out);
+
+ if (quota_meta_is_null(delta)) {
ret = 0;
-
-free_size:
- if (ret < 0) {
- GF_FREE (size);
- }
-
-free_value:
- if (ret < 0) {
- GF_FREE (value);
- }
-
-err:
- dict_unref (dict);
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_log(this->name, GF_LOG_ERROR, "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+
+ GET_CONTRI_KEY(this, contri_key, contri->gfid, ret);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "get contri_key "
+ "failed for %s",
+ uuid_utoa(contri->gfid));
+ goto out;
+ }
+
+ ret = quota_dict_set_meta(dict, contri_key, delta, loc->inode->ia_type);
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_xattrop(FIRST_CHILD(this), loc, GF_XATTROP_ADD_ARRAY64, dict,
+ NULL, NULL, NULL);
+ if (ret < 0) {
+ gf_log_callingfn(
+ this->name,
+ (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR,
+ "xattrop failed "
+ "for %s: %s",
+ loc->path, strerror(-ret));
+ goto out;
+ }
+
+ LOCK(&contri->lock);
+ {
+ contri->contribution += delta->size;
+ contri->file_count += delta->file_count;
+ contri->dir_count += delta->dir_count;
+ }
+ UNLOCK(&contri->lock);
out:
- if (ret < 0) {
- mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
- }
+ if (dict)
+ dict_unref(dict);
- return 0;
+ return ret;
}
-
int32_t
-mq_check_n_set_inode_xattr (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- inode_t *inode, struct iatt *buf, dict_t *dict,
- struct iatt *postparent)
+mq_update_size(xlator_t *this, loc_t *loc, quota_meta_t *delta)
{
- quota_local_t *local = NULL;
- int64_t *size = NULL, *contri = NULL;
- int8_t dirty = 0;
- int32_t ret = 0;
- char contri_key[512] = {0, };
-
- if (op_ret < 0) {
- goto out;
- }
-
- local = frame->local;
-
- ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size);
- if (ret < 0)
- goto create_xattr;
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
+ dict_t *dict = NULL;
- ret = dict_get_int8 (dict, QUOTA_DIRTY_KEY, &dirty);
- if (ret < 0)
- goto create_xattr;
-
- //check contribution xattr if not root
- if (strcmp (local->loc.path, "/") != 0) {
- GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret);
- if (ret < 0)
- goto out;
+ GF_VALIDATE_OR_GOTO("marker", loc, out);
+ GF_VALIDATE_OR_GOTO("marker", loc->inode, out);
+ GF_VALIDATE_OR_GOTO("marker", delta, out);
- ret = dict_get_bin (dict, contri_key, (void **) &contri);
- if (ret < 0)
- goto create_xattr;
- }
+ if (quota_meta_is_null(delta)) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = mq_inode_ctx_get(loc->inode, this, &ctx);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to get inode ctx for "
+ "%s",
+ loc->path);
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_log(this->name, GF_LOG_ERROR, "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = quota_dict_set_size_meta(this, dict, delta);
+ if (ret < 0)
+ goto out;
+
+ ret = syncop_xattrop(FIRST_CHILD(this), loc,
+ GF_XATTROP_ADD_ARRAY64_WITH_DEFAULT, dict, NULL, NULL,
+ NULL);
+ if (ret < 0) {
+ gf_log_callingfn(
+ this->name,
+ (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR,
+ "xattrop failed "
+ "for %s: %s",
+ loc->path, strerror(-ret));
+ goto out;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ ctx->size += delta->size;
+ ctx->file_count += delta->file_count;
+ if (ctx->dir_count == 0)
+ ctx->dir_count += delta->dir_count + 1;
+ else
+ ctx->dir_count += delta->dir_count;
+ }
+ UNLOCK(&ctx->lock);
out:
- mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
- return 0;
+ if (dict)
+ dict_unref(dict);
-create_xattr:
- if (uuid_is_null (local->loc.gfid)) {
- uuid_copy (local->loc.gfid, buf->ia_gfid);
- }
-
- mq_create_xattr (this, frame);
- return 0;
+ return ret;
}
-
-int32_t
-mq_get_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+int
+mq_synctask_cleanup(int ret, call_frame_t *frame, void *opaque)
{
- dict_t *xattr_req = NULL;
- quota_local_t *local = NULL;
- int32_t ret = 0;
-
- if (op_ret < 0) {
- goto lock_err;
- }
-
- local = frame->local;
-
- xattr_req = dict_new ();
- if (xattr_req == NULL) {
- goto err;
- }
-
- ret = mq_req_xattr (this, &local->loc, xattr_req);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "cannot request xattr");
- goto err;
- }
-
- if (uuid_is_null (local->loc.gfid))
- uuid_copy (local->loc.gfid, local->loc.inode->gfid);
-
- GF_UUID_ASSERT (local->loc.gfid);
+ quota_synctask_t *args = NULL;
- STACK_WIND (frame, mq_check_n_set_inode_xattr, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, &local->loc, xattr_req);
+ GF_ASSERT(opaque);
- dict_unref (xattr_req);
+ args = (quota_synctask_t *)opaque;
+ loc_wipe(&args->loc);
- return 0;
+ if (args->stub)
+ call_resume(args->stub);
-err:
- mq_xattr_creation_release_lock (frame, NULL, this, 0, 0, NULL);
+ if (!args->is_static)
+ GF_FREE(args);
- if (xattr_req)
- dict_unref (xattr_req);
- return 0;
-
-lock_err:
- mq_inode_creation_done (frame, NULL, this, 0, 0, NULL);
- return 0;
+ return 0;
}
-
-int32_t
-mq_set_inode_xattr (xlator_t *this, loc_t *loc)
+int
+mq_synctask1(xlator_t *this, synctask_fn_t task, gf_boolean_t spawn, loc_t *loc,
+ quota_meta_t *contri, uint32_t nlink, call_stub_t *stub)
{
- struct gf_flock lock = {0, };
- quota_local_t *local = NULL;
- int32_t ret = 0;
- call_frame_t *frame = NULL;
-
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- ret = -1;
- goto err;
- }
-
- local = mq_local_new ();
- if (local == NULL) {
- goto err;
- }
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret < 0) {
- goto err;
+ int32_t ret = -1;
+ quota_synctask_t *args = NULL;
+ quota_synctask_t static_args = {
+ 0,
+ };
+
+ if (spawn) {
+ QUOTA_ALLOC_OR_GOTO(args, quota_synctask_t, ret, out);
+ args->is_static = _gf_false;
+ } else {
+ args = &static_args;
+ args->is_static = _gf_true;
+ }
+
+ args->this = this;
+ args->stub = stub;
+ loc_copy(&args->loc, loc);
+ args->ia_nlink = nlink;
+
+ if (contri) {
+ args->contri = *contri;
+ } else {
+ args->contri.size = -1;
+ args->contri.file_count = -1;
+ args->contri.dir_count = -1;
+ }
+
+ if (spawn) {
+ ret = synctask_new1(this->ctx->env, 1024 * 16, task,
+ mq_synctask_cleanup, NULL, args);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to spawn "
+ "new synctask");
+ mq_synctask_cleanup(ret, NULL, args);
}
+ } else {
+ ret = task(args);
+ mq_synctask_cleanup(ret, NULL, args);
+ }
- frame->local = local;
-
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND (frame,
- mq_get_xattr,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->loc, F_SETLKW, &lock, NULL);
-
- return 0;
-
-err:
- QUOTA_STACK_DESTROY (frame, this);
-
- return 0;
+out:
+ return ret;
}
+int
+mq_synctask(xlator_t *this, synctask_fn_t task, gf_boolean_t spawn, loc_t *loc)
+{
+ return mq_synctask1(this, task, spawn, loc, NULL, -1, NULL);
+}
int32_t
-mq_get_parent_inode_local (xlator_t *this, quota_local_t *local)
+mq_prevalidate_txn(xlator_t *this, loc_t *origin_loc, loc_t *loc,
+ quota_inode_ctx_t **ctx, struct iatt *buf)
{
- int32_t ret = -1;
- quota_inode_ctx_t *ctx = NULL;
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctxtmp = NULL;
+
+ if (buf) {
+ if (buf->ia_type == IA_IFREG && IS_DHT_LINKFILE_MODE(buf))
+ goto out;
+
+ if (buf->ia_type != IA_IFREG && buf->ia_type != IA_IFLNK &&
+ buf->ia_type != IA_IFDIR)
+ goto out;
+ }
+
+ if (origin_loc == NULL || origin_loc->inode == NULL ||
+ gf_uuid_is_null(origin_loc->inode->gfid))
+ goto out;
+
+ loc_copy(loc, origin_loc);
+
+ if (gf_uuid_is_null(loc->gfid))
+ gf_uuid_copy(loc->gfid, loc->inode->gfid);
+
+ if (!loc_is_root(loc) && loc->parent == NULL)
+ loc->parent = inode_parent(loc->inode, 0, NULL);
+
+ ret = mq_inode_ctx_get(loc->inode, this, &ctxtmp);
+ if (ret < 0) {
+ gf_log_callingfn(this->name, GF_LOG_WARNING,
+ "inode ctx for "
+ "is NULL for %s",
+ loc->path);
+ goto out;
+ }
+ if (ctx)
+ *ctx = ctxtmp;
+
+ ret = 0;
+out:
+ return ret;
+}
- GF_VALIDATE_OR_GOTO ("marker", this, out);
- GF_VALIDATE_OR_GOTO ("marker", local, out);
+int
+mq_create_xattrs_task(void *opaque)
+{
+ int32_t ret = -1;
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t contri_set = _gf_false;
+ gf_boolean_t size_set = _gf_false;
+ gf_boolean_t need_txn = _gf_false;
+ quota_synctask_t *args = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
+ loc_t *loc = NULL;
+ gf_boolean_t status = _gf_false;
+
+ GF_ASSERT(opaque);
+
+ args = (quota_synctask_t *)opaque;
+ loc = &args->loc;
+ this = args->this;
+ THIS = this;
+
+ ret = mq_inode_ctx_get(loc->inode, this, &ctx);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to"
+ "get inode ctx, aborting quota create txn");
+ goto out;
+ }
+
+ if (loc->inode->ia_type == IA_IFDIR) {
+ /* lock not required for files */
+ ret = mq_lock(this, loc, F_WRLCK);
+ if (ret < 0)
+ goto out;
+ locked = _gf_true;
+ }
- local->contri = NULL;
+ ret = mq_are_xattrs_set(this, loc, &contri_set, &size_set);
+ if (ret < 0 || (contri_set && size_set))
+ goto out;
- loc_wipe (&local->loc);
+ mq_set_ctx_create_status(ctx, _gf_false);
+ status = _gf_true;
- ret = mq_loc_copy (&local->loc, &local->parent_loc);
- if (ret < 0) {
- gf_log_callingfn (this->name, GF_LOG_WARNING,
- "loc copy failed");
- goto out;
- }
+ if (loc->inode->ia_type == IA_IFDIR && size_set == _gf_false) {
+ ret = mq_create_size_xattrs(this, ctx, loc);
+ if (ret < 0)
+ goto out;
+ }
- loc_wipe (&local->parent_loc);
+ need_txn = _gf_true;
+out:
+ if (locked)
+ ret = mq_lock(this, loc, F_UNLCK);
- ret = mq_inode_loc_fill (NULL, local->loc.parent,
- &local->parent_loc);
- if (ret < 0) {
- gf_log_callingfn (this->name, GF_LOG_WARNING,
- "failed to build parent loc of %s",
- local->loc.path);
- goto out;
- }
+ if (status == _gf_false)
+ mq_set_ctx_create_status(ctx, _gf_false);
- ret = mq_inode_ctx_get (local->loc.inode, this, &ctx);
- if (ret < 0) {
- gf_log_callingfn (this->name, GF_LOG_WARNING,
- "inode ctx get failed");
- goto out;
- }
+ if (need_txn)
+ ret = mq_initiate_quota_blocking_txn(this, loc, NULL);
- local->ctx = ctx;
+ return ret;
+}
- if (list_empty (&ctx->contribution_head)) {
- gf_log_callingfn (this->name, GF_LOG_WARNING,
- "contribution node list is empty which "
- "is an error");
- ret = -1;
- goto out;
+static int
+_mq_create_xattrs_txn(xlator_t *this, loc_t *origin_loc, struct iatt *buf,
+ gf_boolean_t spawn)
+{
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
+ gf_boolean_t status = _gf_true;
+ loc_t loc = {
+ 0,
+ };
+ inode_contribution_t *contribution = NULL;
+
+ ret = mq_prevalidate_txn(this, origin_loc, &loc, &ctx, buf);
+ if (ret < 0)
+ goto out;
+
+ ret = mq_test_and_set_ctx_create_status(ctx, &status);
+ if (ret < 0 || status == _gf_true)
+ goto out;
+
+ if (!loc_is_root(&loc) && loc.parent) {
+ contribution = mq_add_new_contribution_node(this, ctx, &loc);
+ if (contribution == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "cannot add a new contribution node "
+ "(%s)",
+ uuid_utoa(loc.gfid));
+ ret = -1;
+ goto out;
+ } else {
+ GF_REF_PUT(contribution);
}
+ }
- local->contri = (inode_contribution_t *) ctx->contribution_head.next;
-
- ret = 0;
+ ret = mq_synctask(this, mq_create_xattrs_task, spawn, &loc);
out:
- return ret;
-}
-
+ if (ret < 0 && status == _gf_false)
+ mq_set_ctx_create_status(ctx, _gf_false);
-int32_t
-mq_xattr_updation_done (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict, dict_t *xdata)
-{
- QUOTA_STACK_DESTROY (frame, this);
- return 0;
+ loc_wipe(&loc);
+ return ret;
}
-
-int32_t
-mq_inodelk_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+int
+mq_create_xattrs_txn(xlator_t *this, loc_t *loc, struct iatt *buf)
{
- int32_t ret = 0;
- gf_boolean_t status = _gf_false;
- quota_local_t *local = NULL;
-
- local = frame->local;
-
- if (op_ret == -1 || local->err) {
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "unlocking failed on path (%s)(%s)",
- local->parent_loc.path, strerror (op_errno));
- }
- mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL, NULL);
-
- return 0;
- }
+ int32_t ret = -1;
- gf_log (this->name, GF_LOG_DEBUG,
- "inodelk released on %s", local->parent_loc.path);
+ GF_VALIDATE_OR_GOTO("marker", loc, out);
+ GF_VALIDATE_OR_GOTO("marker", loc->inode, out);
- if ((strcmp (local->parent_loc.path, "/") == 0)
- || (local->delta == 0)) {
- mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL, NULL);
- } else {
- ret = mq_get_parent_inode_local (this, local);
- if (ret < 0) {
- mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL,
- NULL);
- goto out;
- }
- status = _gf_true;
-
- ret = mq_test_and_set_ctx_updation_status (local->ctx, &status);
- if (ret == 0 && status == _gf_false) {
- mq_get_lock_on_parent (frame, this);
- } else {
- mq_xattr_updation_done (frame, NULL, this, 0, 0, NULL,
- NULL);
- }
- }
+ ret = _mq_create_xattrs_txn(this, loc, buf, _gf_true);
out:
- return 0;
+ return ret;
}
-
-//now release lock on the parent inode
int32_t
-mq_release_parent_lock (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+mq_reduce_parent_size_task(void *opaque)
{
- int32_t ret = 0;
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
- struct gf_flock lock = {0, };
-
- local = frame->local;
-
- if (local->err != 0) {
- gf_log_callingfn (this->name,
- (local->err == ENOENT) ? GF_LOG_DEBUG
- : GF_LOG_WARNING,
- "An operation during quota updation "
- "of path (%s) failed (%s)", local->loc.path,
- strerror (local->err));
+ int32_t ret = -1;
+ int32_t prev_dirty = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_inode_ctx_t *parent_ctx = NULL;
+ inode_contribution_t *contribution = NULL;
+ quota_meta_t delta = {
+ 0,
+ };
+ quota_meta_t contri = {
+ 0,
+ };
+ loc_t parent_loc = {
+ 0,
+ };
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t dirty = _gf_false;
+ quota_synctask_t *args = NULL;
+ xlator_t *this = NULL;
+ loc_t *loc = NULL;
+ gf_boolean_t remove_xattr = _gf_true;
+ uint32_t nlink = 0;
+
+ GF_ASSERT(opaque);
+
+ args = (quota_synctask_t *)opaque;
+ loc = &args->loc;
+ contri = args->contri;
+ nlink = args->ia_nlink;
+ this = args->this;
+ THIS = this;
+
+ ret = mq_inode_loc_fill(NULL, loc->parent, &parent_loc);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "parent_loc fill failed for "
+ "child inode %s: ",
+ uuid_utoa(loc->inode->gfid));
+ goto out;
+ }
+
+ ret = mq_lock(this, &parent_loc, F_WRLCK);
+ if (ret < 0)
+ goto out;
+ locked = _gf_true;
+
+ if (contri.size >= 0) {
+ /* contri parameter is supplied only for rename operation.
+ * remove xattr is alreday performed, we need to skip
+ * removexattr for rename operation
+ */
+ remove_xattr = _gf_false;
+ delta.size = contri.size;
+ delta.file_count = contri.file_count;
+ delta.dir_count = contri.dir_count;
+ } else {
+ remove_xattr = _gf_true;
+
+ ret = mq_inode_ctx_get(loc->inode, this, &ctx);
+ if (ret < 0) {
+ gf_log_callingfn(this->name, GF_LOG_WARNING,
+ "ctx for"
+ " the node %s is NULL",
+ loc->path);
+ goto out;
}
- ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx);
- if (ret < 0)
- goto wind;
-
- LOCK (&ctx->lock);
- {
- ctx->dirty = 0;
+ contribution = mq_get_contribution_node(loc->parent, ctx);
+ if (contribution == NULL) {
+ ret = -1;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "contribution for the node %s is NULL", loc->path);
+ goto out;
}
- UNLOCK (&ctx->lock);
- if (local->parent_loc.inode == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Invalid parent inode.");
- goto err;
+ LOCK(&contribution->lock);
+ {
+ delta.size = contribution->contribution;
+ delta.file_count = contribution->file_count;
+ delta.dir_count = contribution->dir_count;
}
+ UNLOCK(&contribution->lock);
+ }
-wind:
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND (frame,
- mq_inodelk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->parent_loc,
- F_SETLKW, &lock, NULL);
-
- return 0;
-err:
- mq_xattr_updation_done (frame, NULL, this,
- 0, 0 , NULL, NULL);
- return 0;
-}
-
-
-int32_t
-mq_mark_undirty (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict, dict_t *xdata)
-{
- int32_t ret = -1;
- int64_t *size = NULL;
- dict_t *newdict = NULL;
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
-
- local = frame->local;
+ ret = mq_get_set_dirty(this, &parent_loc, 1, &prev_dirty);
+ if (ret < 0)
+ goto out;
+ dirty = _gf_true;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_WARNING, "%s occurred while"
- " updating the size of %s", strerror (op_errno),
- local->parent_loc.path);
+ mq_sub_meta(&delta, NULL);
- goto err;
- }
+ if (remove_xattr) {
+ ret = mq_remove_contri(this, loc, ctx, contribution, &delta, nlink);
+ if (ret < 0)
+ goto out;
+ }
- //update the size of the parent inode
- if (dict != NULL) {
- ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx);
- if (ret < 0) {
- op_errno = EINVAL;
- goto err;
- }
-
- ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size);
- if (ret < 0) {
- op_errno = EINVAL;
- goto err;
- }
-
- LOCK (&ctx->lock);
- {
- if (size)
- ctx->size = ntoh64 (*size);
- gf_log (this->name, GF_LOG_DEBUG, "%s %"PRId64,
- local->parent_loc.path, ctx->size);
- }
- UNLOCK (&ctx->lock);
- }
+ if (quota_meta_is_null(&delta))
+ goto out;
- newdict = dict_new ();
- if (!newdict) {
- op_errno = ENOMEM;
- goto err;
- }
+ ret = mq_update_size(this, &parent_loc, &delta);
+ if (ret < 0)
+ goto out;
- ret = dict_set_int8 (newdict, QUOTA_DIRTY_KEY, 0);
-
- if (ret == -1) {
- op_errno = -ret;
- goto err;
+out:
+ if (dirty) {
+ if (ret < 0 || prev_dirty) {
+ /* On failure clear dirty status flag.
+ * In the next lookup inspect_directory_xattr
+ * can set the status flag and fix the
+ * dirty directory.
+ * Do the same if dir was dirty before
+ * the txn
+ */
+ ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx);
+ if (ret == 0)
+ mq_set_ctx_dirty_status(parent_ctx, _gf_false);
+ } else {
+ ret = mq_mark_dirty(this, &parent_loc, 0);
}
+ }
- uuid_copy (local->parent_loc.gfid, local->parent_loc.inode->gfid);
- GF_UUID_ASSERT (local->parent_loc.gfid);
+ if (locked)
+ ret = mq_lock(this, &parent_loc, F_UNLCK);
- STACK_WIND (frame, mq_release_parent_lock,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- &local->parent_loc, newdict, 0, NULL);
+ if (ret >= 0)
+ ret = mq_initiate_quota_blocking_txn(this, &parent_loc, NULL);
- ret = 0;
-err:
- if (op_ret == -1 || ret == -1) {
- local->err = op_errno;
+ loc_wipe(&parent_loc);
- mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
- }
+ if (contribution)
+ GF_REF_PUT(contribution);
- if (newdict)
- dict_unref (newdict);
-
- return 0;
+ return ret;
}
-
int32_t
-mq_update_parent_size (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict, dict_t *xdata)
+mq_reduce_parent_size_txn(xlator_t *this, loc_t *origin_loc,
+ quota_meta_t *contri, uint32_t nlink,
+ call_stub_t *stub)
{
- int64_t *size = NULL;
- int32_t ret = -1;
- dict_t *newdict = NULL;
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
-
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG :
- GF_LOG_WARNING),
- "xattrop call failed: %s", strerror (op_errno));
-
- goto err;
- }
+ int32_t ret = -1;
+ loc_t loc = {
+ 0,
+ };
+ gf_boolean_t resume_stub = _gf_true;
- LOCK (&local->contri->lock);
- {
- local->contri->contribution += local->delta;
- }
- UNLOCK (&local->contri->lock);
+ GF_VALIDATE_OR_GOTO("marker", this, out);
+ GF_VALIDATE_OR_GOTO("marker", origin_loc, out);
- gf_log (this->name, GF_LOG_DEBUG, "%s %"PRId64 "%"PRId64,
- local->loc.path, local->ctx->size,
- local->contri->contribution);
-
- if (dict == NULL) {
- op_errno = EINVAL;
- goto err;
- }
+ ret = mq_prevalidate_txn(this, origin_loc, &loc, NULL, NULL);
+ if (ret < 0)
+ goto out;
- ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx);
- if (ret < 0) {
- op_errno = EINVAL;
- goto err;
- }
-
- newdict = dict_new ();
- if (!newdict) {
- op_errno = ENOMEM;
- ret = -1;
- goto err;
- }
-
- QUOTA_ALLOC_OR_GOTO (size, int64_t, ret, err);
-
- *size = hton64 (local->delta);
+ if (loc_is_root(&loc)) {
+ ret = 0;
+ goto out;
+ }
- ret = dict_set_bin (newdict, QUOTA_SIZE_KEY, size, 8);
- if (ret < 0) {
- op_errno = -ret;
- goto err;
- }
+ resume_stub = _gf_false;
+ ret = mq_synctask1(this, mq_reduce_parent_size_task, _gf_true, &loc, contri,
+ nlink, stub);
+out:
+ loc_wipe(&loc);
- if (uuid_is_null (local->parent_loc.gfid))
- uuid_copy (local->parent_loc.gfid,
- local->parent_loc.inode->gfid);
- GF_UUID_ASSERT (local->parent_loc.gfid);
-
- STACK_WIND (frame,
- mq_mark_undirty,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop,
- &local->parent_loc,
- GF_XATTROP_ADD_ARRAY64,
- newdict, NULL);
- ret = 0;
-err:
- if (op_ret == -1 || ret < 0) {
- local->err = op_errno;
- mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
- }
+ if (resume_stub && stub)
+ call_resume(stub);
- if (newdict)
- dict_unref (newdict);
+ if (ret)
+ gf_log_callingfn(this ? this->name : "Marker", GF_LOG_ERROR,
+ "mq_reduce_parent_size_txn failed");
- return 0;
+ return ret;
}
-int32_t
-mq_update_inode_contribution (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict,
- struct iatt *postparent)
+int
+mq_initiate_quota_task(void *opaque)
{
- int32_t ret = -1;
- int64_t *size = NULL, size_int = 0, contri_int = 0;
- int64_t *contri = NULL;
- int64_t *delta = NULL;
- char contri_key [512] = {0, };
- dict_t *newdict = NULL;
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
- inode_contribution_t *contribution = NULL;
-
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG :
- GF_LOG_WARNING),
- "failed to get size and contribution of path (%s)(%s)",
- local->loc.path, strerror (op_errno));
- goto err;
- }
-
- ctx = local->ctx;
- contribution = local->contri;
-
- //prepare to update size & contribution of the inode
- GET_CONTRI_KEY (contri_key, contribution->gfid, ret);
- if (ret == -1) {
- op_errno = ENOMEM;
- goto err;
- }
-
- LOCK (&ctx->lock);
- {
- if (local->loc.inode->ia_type == IA_IFDIR ) {
- ret = dict_get_bin (dict, QUOTA_SIZE_KEY,
- (void **) &size);
- if (ret < 0) {
- op_errno = EINVAL;
- goto unlock;
- }
-
- ctx->size = ntoh64 (*size);
- } else
- ctx->size = buf->ia_blocks * 512;
-
- size_int = ctx->size;
- }
-unlock:
- UNLOCK (&ctx->lock);
-
+ int32_t ret = -1;
+ int32_t prev_dirty = 0;
+ loc_t child_loc = {
+ 0,
+ };
+ loc_t parent_loc = {
+ 0,
+ };
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t dirty = _gf_false;
+ gf_boolean_t status = _gf_false;
+ quota_meta_t delta = {
+ 0,
+ };
+ quota_synctask_t *args = NULL;
+ xlator_t *this = NULL;
+ loc_t *loc = NULL;
+ inode_contribution_t *contri = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_inode_ctx_t *parent_ctx = NULL;
+ inode_t *tmp_parent = NULL;
+
+ GF_VALIDATE_OR_GOTO("marker", opaque, out);
+
+ args = (quota_synctask_t *)opaque;
+ loc = &args->loc;
+ this = args->this;
+
+ GF_VALIDATE_OR_GOTO("marker", this, out);
+ THIS = this;
+
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ ret = mq_loc_copy(&child_loc, loc);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "loc copy failed");
+ goto out;
+ }
+
+ while (!__is_root_gfid(child_loc.gfid)) {
+ ret = mq_inode_ctx_get(child_loc.inode, this, &ctx);
if (ret < 0) {
- goto err;
+ gf_log(this->name, GF_LOG_WARNING,
+ "inode ctx get failed for %s, "
+ "aborting update txn",
+ child_loc.path);
+ goto out;
}
- ret = dict_get_bin (dict, contri_key, (void **) &contri);
-
- LOCK (&contribution->lock);
- {
- if (ret < 0)
- contribution->contribution = 0;
- else
- contribution->contribution = ntoh64 (*contri);
-
- contri_int = contribution->contribution;
- }
- UNLOCK (&contribution->lock);
-
- gf_log (this->name, GF_LOG_DEBUG, "%s %"PRId64 "%"PRId64,
- local->loc.path, size_int, contri_int);
-
- local->delta = size_int - contri_int;
-
- if (local->delta == 0) {
- mq_mark_undirty (frame, NULL, this, 0, 0, NULL, NULL);
- return 0;
+ /* To improve performance, abort current transaction
+ * if one is already in progress for same inode
+ */
+ if (status == _gf_true) {
+ /* status will already set before txn start,
+ * so it should not be set in first
+ * loop iteration
+ */
+ ret = mq_test_and_set_ctx_updation_status(ctx, &status);
+ if (ret < 0 || status == _gf_true)
+ goto out;
}
- newdict = dict_new ();
- if (newdict == NULL) {
- op_errno = ENOMEM;
+ if (child_loc.parent == NULL) {
+ ret = mq_build_ancestry(this, &child_loc);
+ if (ret < 0 || child_loc.parent == NULL) {
+ /* If application performs parallel remove
+ * operations on same set of files/directories
+ * then we may get ENOENT/ESTALE
+ */
+ gf_log(this->name,
+ (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG
+ : GF_LOG_ERROR,
+ "build ancestry failed for inode %s",
+ uuid_utoa(child_loc.inode->gfid));
ret = -1;
- goto err;
+ goto out;
+ }
}
- QUOTA_ALLOC_OR_GOTO (delta, int64_t, ret, err);
-
- *delta = hton64 (local->delta);
-
- ret = dict_set_bin (newdict, contri_key, delta, 8);
+ ret = mq_inode_loc_fill(NULL, child_loc.parent, &parent_loc);
if (ret < 0) {
- op_errno = -ret;
- ret = -1;
- goto err;
+ gf_log(this->name, GF_LOG_ERROR,
+ "parent_loc fill "
+ "failed for child inode %s: ",
+ uuid_utoa(child_loc.inode->gfid));
+ goto out;
}
- if (uuid_is_null (local->loc.gfid))
- uuid_copy (local->loc.gfid, buf->ia_gfid);
-
- GF_UUID_ASSERT (local->loc.gfid);
-
- STACK_WIND (frame,
- mq_update_parent_size,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop,
- &local->loc,
- GF_XATTROP_ADD_ARRAY64,
- newdict, NULL);
- ret = 0;
-
-err:
- if (op_ret == -1 || ret < 0) {
- local->err = op_errno;
-
- mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
- }
-
- if (newdict)
- dict_unref (newdict);
+ ret = mq_lock(this, &parent_loc, F_WRLCK);
+ if (ret < 0)
+ goto out;
+ locked = _gf_true;
- return 0;
-}
+ mq_set_ctx_updation_status(ctx, _gf_false);
+ status = _gf_true;
-int32_t
-mq_fetch_child_size_and_contri (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- int32_t ret = -1;
- char contri_key [512] = {0, };
- dict_t *newdict = NULL;
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
-
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, (op_errno == ENOENT) ? GF_LOG_DEBUG
- : GF_LOG_WARNING,
- "couldnt mark inode corresponding to path (%s) dirty "
- "(%s)", local->parent_loc.path, strerror (op_errno));
- goto err;
+ /* Contribution node can be NULL in below scenarios and
+ create if needed:
+
+ Scenario 1)
+ In this case create a new contribution node
+ Suppose hard link for a file f1 present in a directory d1 is
+ created in the directory d2 (as f2). Now, since d2's
+ contribution is not there in f1's inode ctx, d2's
+ contribution xattr won't be created and will create problems
+ for quota operations.
+
+ Don't create contribution if parent has been changed after
+ taking a lock, this can happen when rename is performed
+ and writes is still in-progress for the same file
+
+ Scenario 2)
+ When a rename operation is performed, contribution node
+ for olp path will be removed.
+
+ Create contribution node only if oldparent is same as
+ newparent.
+ Consider below example
+ 1) rename FOP invoked on file 'x'
+ 2) write is still in progress for file 'x'
+ 3) rename takes a lock on old-parent
+ 4) write-update txn blocked on old-parent to acquire lock
+ 5) in rename_cbk, contri xattrs are removed and contribution
+ is deleted and lock is released
+ 6) now write-update txn gets the lock and updates the
+ wrong parent as it was holding lock on old parent
+ so validate parent once the lock is acquired
+
+ For more information on this problem, please see
+ doc for marker_rename in file marker.c
+ */
+ contri = mq_get_contribution_node(child_loc.parent, ctx);
+ if (contri == NULL) {
+ tmp_parent = inode_parent(child_loc.inode, 0, NULL);
+ if (tmp_parent == NULL) {
+ /* This can happen if application performs
+ * parallel remove operations on same set
+ * of files/directories
+ */
+ gf_log(this->name, GF_LOG_WARNING,
+ "parent is "
+ "NULL for inode %s",
+ uuid_utoa(child_loc.inode->gfid));
+ ret = -1;
+ goto out;
+ }
+ if (gf_uuid_compare(tmp_parent->gfid, parent_loc.gfid)) {
+ /* abort txn if parent has changed */
+ ret = 0;
+ goto out;
+ }
+
+ inode_unref(tmp_parent);
+ tmp_parent = NULL;
+
+ contri = mq_add_new_contribution_node(this, ctx, &child_loc);
+ if (contri == NULL) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to "
+ "create contribution node for %s, "
+ "abort update txn",
+ child_loc.path);
+ ret = -1;
+ goto out;
+ }
}
- VALIDATE_OR_GOTO (local->ctx, err);
- VALIDATE_OR_GOTO (local->contri, err);
-
- gf_log (this->name, GF_LOG_DEBUG, "%s marked dirty", local->parent_loc.path);
-
- //update parent ctx
- ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx);
- if (ret == -1) {
- op_errno = EINVAL;
- goto err;
- }
+ ret = mq_get_delta(this, &child_loc, &delta, ctx, contri);
+ if (ret < 0)
+ goto out;
- LOCK (&ctx->lock);
- {
- ctx->dirty = 1;
- }
- UNLOCK (&ctx->lock);
+ if (quota_meta_is_null(&delta))
+ goto out;
- newdict = dict_new ();
- if (newdict == NULL) {
- op_errno = ENOMEM;
- goto err;
- }
+ ret = mq_get_set_dirty(this, &parent_loc, 1, &prev_dirty);
+ if (ret < 0)
+ goto out;
+ dirty = _gf_true;
- if (local->loc.inode->ia_type == IA_IFDIR) {
- ret = dict_set_int64 (newdict, QUOTA_SIZE_KEY, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "dict_set failed.");
- goto err;
- }
- }
+ ret = mq_update_contri(this, &child_loc, contri, &delta);
+ if (ret < 0)
+ goto out;
- GET_CONTRI_KEY (contri_key, local->contri->gfid, ret);
+ ret = mq_update_size(this, &parent_loc, &delta);
if (ret < 0) {
- op_errno = ENOMEM;
- goto err;
+ gf_log(this->name, GF_LOG_DEBUG,
+ "rollback "
+ "contri updation");
+ mq_sub_meta(&delta, NULL);
+ mq_update_contri(this, &child_loc, contri, &delta);
+ goto out;
}
- ret = dict_set_int64 (newdict, contri_key, 0);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "dict_set failed.");
- goto err;
+ if (prev_dirty == 0) {
+ ret = mq_mark_dirty(this, &parent_loc, 0);
+ } else {
+ ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx);
+ if (ret == 0)
+ mq_set_ctx_dirty_status(parent_ctx, _gf_false);
}
+ dirty = _gf_false;
+ prev_dirty = 0;
- mq_set_ctx_updation_status (local->ctx, _gf_false);
+ ret = mq_lock(this, &parent_loc, F_UNLCK);
+ locked = _gf_false;
- if (uuid_is_null (local->loc.gfid))
- uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+ if (__is_root_gfid(parent_loc.gfid))
+ break;
- GF_UUID_ASSERT (local->loc.gfid);
+ /* Repeate above steps upwards till the root */
+ loc_wipe(&child_loc);
+ ret = mq_loc_copy(&child_loc, &parent_loc);
+ if (ret < 0)
+ goto out;
- STACK_WIND (frame, mq_update_inode_contribution, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, &local->loc, newdict);
+ loc_wipe(&parent_loc);
+ GF_REF_PUT(contri);
+ contri = NULL;
+ }
- ret = 0;
+out:
+ if ((dirty) && (ret < 0)) {
+ /* On failure clear dirty status flag.
+ * In the next lookup inspect_directory_xattr
+ * can set the status flag and fix the
+ * dirty directory.
+ * Do the same if the dir was dirty before
+ * txn
+ */
+ ret = mq_inode_ctx_get(parent_loc.inode, this, &parent_ctx);
+ if (ret == 0)
+ mq_set_ctx_dirty_status(parent_ctx, _gf_false);
+ }
-err:
- if ((op_ret == -1) || (ret < 0)) {
- local->err = op_errno;
+ if (locked)
+ ret = mq_lock(this, &parent_loc, F_UNLCK);
- mq_set_ctx_updation_status (local->ctx, _gf_false);
+ if (ctx && status == _gf_false)
+ mq_set_ctx_updation_status(ctx, _gf_false);
- mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
- }
+ loc_wipe(&child_loc);
+ loc_wipe(&parent_loc);
- if (newdict)
- dict_unref (newdict);
+ if (tmp_parent)
+ inode_unref(tmp_parent);
- return 0;
+ if (contri)
+ GF_REF_PUT(contri);
+
+ return 0;
}
-int32_t
-mq_markdirty (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno, dict_t *xdata)
+int
+_mq_initiate_quota_txn(xlator_t *this, loc_t *origin_loc, struct iatt *buf,
+ gf_boolean_t spawn)
{
- int32_t ret = -1;
- dict_t *dict = NULL;
- quota_local_t *local = NULL;
-
- local = frame->local;
-
- if (op_ret == -1){
- gf_log (this->name, (op_errno == ENOENT) ? GF_LOG_DEBUG
- : GF_LOG_WARNING, "acquiring locks failed on %s (%s)",
- local->parent_loc.path, strerror (op_errno));
-
- local->err = op_errno;
-
- mq_set_ctx_updation_status (local->ctx, _gf_false);
-
- mq_inodelk_cbk (frame, NULL, this, 0, 0, NULL);
-
- return 0;
- }
-
- gf_log (this->name, GF_LOG_TRACE,
- "inodelk succeeded on %s", local->parent_loc.path);
-
- dict = dict_new ();
- if (!dict) {
- ret = -1;
- goto err;
- }
-
- ret = dict_set_int8 (dict, QUOTA_DIRTY_KEY, 1);
- if (ret == -1)
- goto err;
-
- uuid_copy (local->parent_loc.gfid,
- local->parent_loc.inode->gfid);
- GF_UUID_ASSERT (local->parent_loc.gfid);
-
- STACK_WIND (frame, mq_fetch_child_size_and_contri,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- &local->parent_loc, dict, 0, NULL);
-
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
+ gf_boolean_t status = _gf_true;
+ loc_t loc = {
+ 0,
+ };
+
+ ret = mq_prevalidate_txn(this, origin_loc, &loc, &ctx, buf);
+ if (ret < 0)
+ goto out;
+
+ if (loc_is_root(&loc)) {
ret = 0;
-err:
- if (ret == -1) {
- local->err = 1;
+ goto out;
+ }
- mq_set_ctx_updation_status (local->ctx, _gf_false);
+ ret = mq_test_and_set_ctx_updation_status(ctx, &status);
+ if (ret < 0 || status == _gf_true)
+ goto out;
- mq_release_parent_lock (frame, NULL, this, 0, 0, NULL);
- }
+ ret = mq_synctask(this, mq_initiate_quota_task, spawn, &loc);
- if (dict)
- dict_unref (dict);
+out:
+ if (ret < 0 && status == _gf_false)
+ mq_set_ctx_updation_status(ctx, _gf_false);
- return 0;
+ loc_wipe(&loc);
+ return ret;
}
-
-int32_t
-mq_get_lock_on_parent (call_frame_t *frame, xlator_t *this)
+int
+mq_initiate_quota_txn(xlator_t *this, loc_t *loc, struct iatt *buf)
{
- struct gf_flock lock = {0, };
- quota_local_t *local = NULL;
-
- GF_VALIDATE_OR_GOTO ("marker", frame, fr_destroy);
-
- local = frame->local;
- gf_log (this->name, GF_LOG_DEBUG, "taking lock on %s",
- local->parent_loc.path);
-
- if (local->parent_loc.inode == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "parent inode is not valid, aborting "
- "transaction.");
- goto fr_destroy;
- }
-
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND (frame,
- mq_markdirty,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->parent_loc, F_SETLKW, &lock, NULL);
-
- return 0;
+ int32_t ret = -1;
-fr_destroy:
- QUOTA_STACK_DESTROY (frame, this);
+ GF_VALIDATE_OR_GOTO("marker", this, out);
+ GF_VALIDATE_OR_GOTO("marker", loc, out);
+ GF_VALIDATE_OR_GOTO("marker", loc->inode, out);
- return -1;
+ ret = _mq_initiate_quota_txn(this, loc, buf, _gf_true);
+out:
+ return ret;
}
-
int
-mq_start_quota_txn (xlator_t *this, loc_t *loc,
- quota_inode_ctx_t *ctx,
- inode_contribution_t *contri)
+mq_initiate_quota_blocking_txn(xlator_t *this, loc_t *loc, struct iatt *buf)
{
- int32_t ret = -1;
- call_frame_t *frame = NULL;
- quota_local_t *local = NULL;
-
- frame = create_frame (this, this->ctx->pool);
- if (frame == NULL)
- goto err;
-
- mq_assign_lk_owner (this, frame);
-
- local = mq_local_new ();
- if (local == NULL)
- goto fr_destroy;
-
- frame->local = local;
-
- ret = mq_loc_copy (&local->loc, loc);
- if (ret < 0)
- goto fr_destroy;
-
- ret = mq_inode_loc_fill (NULL, local->loc.parent,
- &local->parent_loc);
- if (ret < 0)
- goto fr_destroy;
-
- local->ctx = ctx;
- local->contri = contri;
+ int32_t ret = -1;
- ret = mq_get_lock_on_parent (frame, this);
- if (ret == -1)
- goto err;
+ GF_VALIDATE_OR_GOTO("marker", this, out);
+ GF_VALIDATE_OR_GOTO("marker", loc, out);
+ GF_VALIDATE_OR_GOTO("marker", loc->inode, out);
- return 0;
-
-fr_destroy:
- QUOTA_STACK_DESTROY (frame, this);
-err:
- mq_set_ctx_updation_status (ctx, _gf_false);
-
- return -1;
+ ret = _mq_initiate_quota_txn(this, loc, buf, _gf_false);
+out:
+ return ret;
}
-
int
-mq_initiate_quota_txn (xlator_t *this, loc_t *loc)
+mq_update_dirty_inode_task(void *opaque)
{
- int32_t ret = -1;
- gf_boolean_t status = _gf_false;
- quota_inode_ctx_t *ctx = NULL;
- inode_contribution_t *contribution = NULL;
-
- GF_VALIDATE_OR_GOTO ("marker", this, out);
- GF_VALIDATE_OR_GOTO ("marker", loc, out);
- GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
-
- ret = mq_inode_ctx_get (loc->inode, this, &ctx);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "inode ctx get failed, aborting quota txn");
- ret = -1;
- goto out;
- }
-
- contribution = mq_get_contribution_node (loc->parent, ctx);
- if (contribution == NULL)
- goto out;
-
- /* To improve performance, donot start another transaction
- * if one is already in progress for same inode
- */
- status = _gf_true;
-
- ret = mq_test_and_set_ctx_updation_status (ctx, &status);
- if (ret < 0)
- goto out;
-
- if (status == _gf_false) {
- mq_start_quota_txn (this, loc, ctx, contribution);
- }
-
+ int32_t ret = -1;
+ fd_t *fd = NULL;
+ off_t offset = 0;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ gf_boolean_t locked = _gf_false;
+ gf_boolean_t updated = _gf_false;
+ int32_t dirty = 0;
+ quota_meta_t contri = {
+ 0,
+ };
+ quota_meta_t size = {
+ 0,
+ };
+ quota_meta_t contri_sum = {
+ 0,
+ };
+ quota_meta_t delta = {
+ 0,
+ };
+ quota_synctask_t *args = NULL;
+ xlator_t *this = NULL;
+ loc_t *loc = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ dict_t *xdata = NULL;
+ char contri_key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+ int keylen = 0;
+
+ GF_ASSERT(opaque);
+
+ args = (quota_synctask_t *)opaque;
+ loc = &args->loc;
+ this = args->this;
+ THIS = this;
+ INIT_LIST_HEAD(&entries.list);
+
+ ret = mq_inode_ctx_get(loc->inode, this, &ctx);
+ if (ret < 0)
+ goto out;
+
+ GET_CONTRI_KEY(this, contri_key, loc->gfid, keylen);
+ if (keylen < 0) {
+ ret = keylen;
+ goto out;
+ }
+
+ xdata = dict_new();
+ if (xdata == NULL) {
+ gf_log(this->name, GF_LOG_ERROR, "dict_new failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int64(xdata, contri_key, 0);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "dict_set failed");
+ goto out;
+ }
+
+ ret = mq_lock(this, loc, F_WRLCK);
+ if (ret < 0)
+ goto out;
+ locked = _gf_true;
+
+ ret = mq_get_dirty(this, loc, &dirty);
+ if (ret < 0 || dirty == 0) {
ret = 0;
-out:
- return ret;
-}
-
+ goto out;
+ }
+
+ fd = fd_create(loc->inode, 0);
+ if (!fd) {
+ gf_log(this->name, GF_LOG_ERROR, "Failed to create fd");
+ ret = -1;
+ goto out;
+ }
+
+ ret = syncop_opendir(this, loc, fd, NULL, NULL);
+ if (ret < 0) {
+ gf_log(this->name,
+ (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG : GF_LOG_ERROR,
+ "opendir failed "
+ "for %s: %s",
+ loc->path, strerror(-ret));
+ goto out;
+ }
+
+ fd_bind(fd);
+ while ((ret = syncop_readdirp(this, fd, 131072, offset, &entries, xdata,
+ NULL)) != 0) {
+ if (ret < 0) {
+ gf_log(this->name,
+ (-ret == ENOENT || -ret == ESTALE) ? GF_LOG_DEBUG
+ : GF_LOG_ERROR,
+ "readdirp failed "
+ "for %s: %s",
+ loc->path, strerror(-ret));
+ goto out;
+ }
-/* int32_t */
-/* validate_inode_size_contribution (xlator_t *this, loc_t *loc, int64_t size, */
-/* int64_t contribution) */
-/* { */
-/* if (size != contribution) { */
-/* mq_initiate_quota_txn (this, loc); */
-/* } */
+ if (list_empty(&entries.list))
+ break;
-/* return 0; */
-/* } */
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ offset = entry->d_off;
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ continue;
-int32_t
-mq_inspect_directory_xattr (xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- struct iatt buf)
-{
- int32_t ret = 0;
- int8_t dirty = -1;
- int64_t *size = NULL, size_int = 0;
- int64_t *contri = NULL, contri_int = 0;
- char contri_key [512] = {0, };
- gf_boolean_t not_root = _gf_false;
- quota_inode_ctx_t *ctx = NULL;
- inode_contribution_t *contribution = NULL;
-
- ret = mq_inode_ctx_get (loc->inode, this, &ctx);
- if (ret < 0) {
- ctx = mq_inode_ctx_new (loc->inode, this);
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "mq_inode_ctx_new failed");
- ret = -1;
- goto err;
- }
- }
+ memset(&contri, 0, sizeof(contri));
+ quota_dict_get_meta(entry->dict, contri_key, keylen, &contri);
+ if (quota_meta_is_null(&contri))
+ continue;
- if (strcmp (loc->path, "/") != 0) {
- contribution = mq_add_new_contribution_node (this, ctx, loc);
- if (contribution == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot add a new contribution node");
- ret = -1;
- goto err;
- }
+ mq_add_meta(&contri_sum, &contri);
}
- ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size);
- if (ret < 0)
- goto out;
-
- ret = dict_get_int8 (dict, QUOTA_DIRTY_KEY, &dirty);
- if (ret < 0)
- goto out;
+ gf_dirent_free(&entries);
+ }
+ /* Inculde for self */
+ contri_sum.dir_count++;
- if (strcmp (loc->path, "/") != 0) {
- not_root = _gf_true;
+ ret = _mq_get_metadata(this, loc, NULL, &size, 0);
+ if (ret < 0)
+ goto out;
- GET_CONTRI_KEY (contri_key, contribution->gfid, ret);
- if (ret < 0)
- goto out;
+ mq_compute_delta(&delta, &contri_sum, &size);
- ret = dict_get_bin (dict, contri_key, (void **) &contri);
- if (ret < 0)
- goto out;
+ if (quota_meta_is_null(&delta))
+ goto out;
- LOCK (&contribution->lock);
- {
- contribution->contribution = ntoh64 (*contri);
- contri_int = contribution->contribution;
- }
- UNLOCK (&contribution->lock);
- }
+ gf_log(this->name, GF_LOG_INFO,
+ "calculated size = %" PRId64 ", original size = %" PRIu64
+ ", diff = %" PRIu64 ", path = %s ",
+ contri_sum.size, size.size, delta.size, loc->path);
- LOCK (&ctx->lock);
- {
- ctx->size = ntoh64 (*size);
- ctx->dirty = dirty;
- size_int = ctx->size;
- }
- UNLOCK (&ctx->lock);
+ gf_log(this->name, GF_LOG_INFO,
+ "calculated f_count = %" PRId64 ", original f_count = %" PRIu64
+ ", diff = %" PRIu64 ", path = %s ",
+ contri_sum.file_count, size.file_count, delta.file_count, loc->path);
- gf_log (this->name, GF_LOG_DEBUG, "size=%"PRId64
- " contri=%"PRId64, size_int, contri_int);
+ gf_log(this->name, GF_LOG_INFO,
+ "calculated d_count = %" PRId64 ", original d_count = %" PRIu64
+ ", diff = %" PRIu64 ", path = %s ",
+ contri_sum.dir_count, size.dir_count, delta.dir_count, loc->path);
- if (dirty) {
- ret = mq_update_dirty_inode (this, loc, ctx, contribution);
- }
+ ret = mq_update_size(this, loc, &delta);
+ if (ret < 0)
+ goto out;
- if ((!dirty || ret == 0) && (not_root == _gf_true) &&
- (size_int != contri_int)) {
- mq_initiate_quota_txn (this, loc);
- }
+ updated = _gf_true;
- ret = 0;
out:
- if (ret)
- mq_set_inode_xattr (this, loc);
-err:
- return ret;
-}
+ gf_dirent_free(&entries);
-int32_t
-mq_inspect_file_xattr (xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- struct iatt buf)
-{
- int32_t ret = -1;
- uint64_t contri_int = 0, size = 0;
- int64_t *contri_ptr = NULL;
- char contri_key [512] = {0, };
- quota_inode_ctx_t *ctx = NULL;
- inode_contribution_t *contribution = NULL;
-
- ret = mq_inode_ctx_get (loc->inode, this, &ctx);
- if (ret < 0) {
- ctx = mq_inode_ctx_new (loc->inode, this);
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "mq_inode_ctx_new failed");
- ret = -1;
- goto out;
- }
- }
+ if (fd)
+ fd_unref(fd);
- contribution = mq_add_new_contribution_node (this, ctx, loc);
- if (contribution == NULL)
- goto out;
+ if (xdata)
+ dict_unref(xdata);
- LOCK (&ctx->lock);
- {
- ctx->size = 512 * buf.ia_blocks;
- size = ctx->size;
- }
- UNLOCK (&ctx->lock);
-
- list_for_each_entry (contribution, &ctx->contribution_head,
- contri_list) {
- GET_CONTRI_KEY (contri_key, contribution->gfid, ret);
- if (ret < 0)
- continue;
-
- ret = dict_get_bin (dict, contri_key, (void **) &contri_int);
- if (ret == 0) {
- contri_ptr = (int64_t *)(unsigned long)contri_int;
-
- LOCK (&contribution->lock);
- {
- contribution->contribution = ntoh64 (*contri_ptr);
- contri_int = contribution->contribution;
- }
- UNLOCK (&contribution->lock);
-
- gf_log (this->name, GF_LOG_DEBUG,
- "size=%"PRId64 " contri=%"PRId64, size, contri_int);
-
- if (size != contri_int) {
- mq_initiate_quota_txn (this, loc);
- }
- } else
- mq_initiate_quota_txn (this, loc);
- }
+ if (ret < 0) {
+ /* On failure clear dirty status flag.
+ * In the next lookup inspect_directory_xattr
+ * can set the status flag and fix the
+ * dirty directory
+ */
+ if (ctx)
+ mq_set_ctx_dirty_status(ctx, _gf_false);
+ } else if (dirty) {
+ mq_mark_dirty(this, loc, 0);
+ }
-out:
- return ret;
-}
+ if (locked)
+ mq_lock(this, loc, F_UNLCK);
-int32_t
-mq_xattr_state (xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- struct iatt buf)
-{
- if (buf.ia_type == IA_IFREG ||
- buf.ia_type == IA_IFLNK) {
- mq_inspect_file_xattr (this, loc, dict, buf);
- } else if (buf.ia_type == IA_IFDIR)
- mq_inspect_directory_xattr (this, loc, dict, buf);
+ if (updated)
+ mq_initiate_quota_blocking_txn(this, loc, NULL);
- return 0;
+ return ret;
}
int32_t
-mq_req_xattr (xlator_t *this,
- loc_t *loc,
- dict_t *dict)
+mq_update_dirty_inode_txn(xlator_t *this, loc_t *loc, quota_inode_ctx_t *ctx)
{
- int32_t ret = -1;
-
- GF_VALIDATE_OR_GOTO ("marker", this, out);
- GF_VALIDATE_OR_GOTO ("marker", dict, out);
-
- if (!loc)
- goto set_size;
-
- //if not "/" then request contribution
- if (strcmp (loc->path, "/") == 0)
- goto set_size;
-
- ret = mq_dict_set_contribution (this, dict, loc);
- if (ret == -1)
- goto out;
-
-set_size:
- ret = dict_set_uint64 (dict, QUOTA_SIZE_KEY, 0);
- if (ret < 0) {
- ret = -1;
- goto out;
- }
+ int32_t ret = -1;
+ gf_boolean_t status = _gf_true;
- ret = dict_set_int8 (dict, QUOTA_DIRTY_KEY, 0);
- if (ret < 0) {
- ret = -1;
- goto out;
- }
+ GF_VALIDATE_OR_GOTO("marker", loc, out);
+ GF_VALIDATE_OR_GOTO("marker", loc->inode, out);
- ret = 0;
+ mq_test_and_set_ctx_status(ctx, &ctx->dirty_status, &status);
+ if (status == _gf_true)
+ goto out;
+ ret = mq_synctask(this, mq_update_dirty_inode_task, _gf_true, loc);
out:
- return ret;
-}
-
+ if (ret < 0 && status == _gf_false)
+ mq_set_ctx_dirty_status(ctx, _gf_false);
-int32_t
-mq_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- QUOTA_STACK_DESTROY (frame, this);
-
- return 0;
+ return ret;
}
int32_t
-_mq_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+mq_inspect_directory_xattr(xlator_t *this, quota_inode_ctx_t *ctx,
+ inode_contribution_t *contribution, loc_t *loc,
+ dict_t *dict)
{
- int32_t ret = 0;
- char contri_key [512] = {0, };
- quota_local_t *local = NULL;
+ int32_t ret = -1;
+ int8_t dirty = -1;
+ quota_meta_t size = {
+ 0,
+ };
+ quota_meta_t contri = {
+ 0,
+ };
+ quota_meta_t delta = {
+ 0,
+ };
+ char contri_key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+ char size_key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+ int keylen = 0;
+ gf_boolean_t status = _gf_false;
+
+ ret = dict_get_int8(dict, QUOTA_DIRTY_KEY, &dirty);
+ if (ret < 0) {
+ /* dirty is set only on the first file write operation
+ * so ignore this error
+ */
+ ret = 0;
+ dirty = 0;
+ }
+
+ GET_SIZE_KEY(this, size_key, keylen);
+ if (keylen < 0) {
+ ret = -1;
+ goto out;
+ }
+ ret = _quota_dict_get_meta(this, dict, size_key, keylen, &size, IA_IFDIR,
+ _gf_false);
+ if (ret < 0)
+ goto create_xattr;
+
+ if (!contribution)
+ goto create_xattr;
+
+ if (!loc_is_root(loc)) {
+ GET_CONTRI_KEY(this, contri_key, contribution->gfid, keylen);
+ if (keylen < 0) {
+ ret = -1;
+ goto out;
+ }
+ ret = _quota_dict_get_meta(this, dict, contri_key, keylen, &contri,
+ IA_IFDIR, _gf_false);
+ if (ret < 0)
+ goto create_xattr;
- local = (quota_local_t *) frame->local;
+ LOCK(&contribution->lock);
+ {
+ contribution->contribution = contri.size;
+ contribution->file_count = contri.file_count;
+ contribution->dir_count = contri.dir_count;
+ }
+ UNLOCK(&contribution->lock);
+ }
+
+ LOCK(&ctx->lock);
+ {
+ ctx->size = size.size;
+ ctx->file_count = size.file_count;
+ ctx->dir_count = size.dir_count;
+ ctx->dirty = dirty;
+ }
+ UNLOCK(&ctx->lock);
+
+ ret = mq_get_ctx_updation_status(ctx, &status);
+ if (ret < 0 || status == _gf_true) {
+ /* If the update txn is in progress abort inspection */
+ ret = 0;
+ goto out;
+ }
- if (op_ret == -1 || local->err == -1) {
- mq_removexattr_cbk (frame, NULL, this, -1, 0, NULL);
- return 0;
- }
+ mq_compute_delta(&delta, &size, &contri);
- frame->local = NULL;
+ if (dirty) {
+ ret = mq_update_dirty_inode_txn(this, loc, ctx);
+ goto out;
+ }
- if (local->hl_count > 1) {
- GET_CONTRI_KEY (contri_key, local->contri->gfid, ret);
+ if (!loc_is_root(loc) && !quota_meta_is_null(&delta))
+ mq_initiate_quota_txn(this, loc, NULL);
- STACK_WIND (frame, mq_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- &local->loc, contri_key, NULL);
- ret = 0;
- } else {
- mq_removexattr_cbk (frame, NULL, this, 0, 0, NULL);
- }
+ ret = 0;
+ goto out;
- if (strcmp (local->parent_loc.path, "/") != 0) {
- ret = mq_get_parent_inode_local (this, local);
- if (ret < 0)
- goto out;
+create_xattr:
+ if (ret < 0)
+ ret = mq_create_xattrs_txn(this, loc, NULL);
- mq_start_quota_txn (this, &local->loc, local->ctx, local->contri);
- }
out:
- mq_local_unref (this, local);
-
- return 0;
+ return ret;
}
int32_t
-mq_inode_remove_done (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
+mq_inspect_file_xattr(xlator_t *this, quota_inode_ctx_t *ctx,
+ inode_contribution_t *contribution, loc_t *loc,
+ dict_t *dict, struct iatt *buf)
{
- int32_t ret = -1;
- struct gf_flock lock = {0, };
- quota_inode_ctx_t *ctx = NULL;
- quota_local_t *local = NULL;
- int64_t contribution = 0;
-
- local = frame->local;
- if (op_ret == -1)
- local->err = -1;
-
- ret = mq_inode_ctx_get (local->parent_loc.inode, this, &ctx);
-
- LOCK (&local->contri->lock);
+ int32_t ret = -1;
+ quota_meta_t size = {
+ 0,
+ };
+ quota_meta_t contri = {
+ 0,
+ };
+ quota_meta_t delta = {
+ 0,
+ };
+ char contri_key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+ int keylen = 0;
+ gf_boolean_t status = _gf_false;
+
+ if (!buf || !contribution || !ctx)
+ goto out;
+
+ LOCK(&ctx->lock);
+ {
+ ctx->size = 512 * buf->ia_blocks;
+ ctx->file_count = 1;
+ ctx->dir_count = 0;
+
+ size.size = ctx->size;
+ size.file_count = ctx->file_count;
+ size.dir_count = ctx->dir_count;
+ }
+ UNLOCK(&ctx->lock);
+
+ GET_CONTRI_KEY(this, contri_key, contribution->gfid, keylen);
+ if (keylen < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = _quota_dict_get_meta(this, dict, contri_key, keylen, &contri,
+ IA_IFREG, _gf_true);
+ if (ret < 0) {
+ ret = mq_create_xattrs_txn(this, loc, NULL);
+ } else {
+ LOCK(&contribution->lock);
{
- contribution = local->contri->contribution;
- }
- UNLOCK (&local->contri->lock);
-
- if (contribution == local->size) {
- if (ret == 0) {
- LOCK (&ctx->lock);
- {
- ctx->size -= contribution;
- }
- UNLOCK (&ctx->lock);
-
- LOCK (&local->contri->lock);
- {
- local->contri->contribution = 0;
- }
- UNLOCK (&local->contri->lock);
- }
+ contribution->contribution = contri.size;
+ contribution->file_count = contri.file_count;
+ contribution->dir_count = contri.dir_count;
}
+ UNLOCK(&contribution->lock);
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND (frame,
- _mq_inode_remove_done,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->parent_loc,
- F_SETLKW, &lock, NULL);
- return 0;
-}
-
-int32_t
-mq_reduce_parent_size_xattr (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
-{
- int32_t ret = -1;
- int64_t *size = NULL;
- dict_t *dict = NULL;
- quota_local_t *local = NULL;
-
- local = frame->local;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "inodelk set failed on %s", local->parent_loc.path);
- QUOTA_STACK_DESTROY (frame, this);
- return 0;
- }
-
- VALIDATE_OR_GOTO (local->contri, err);
-
- dict = dict_new ();
- if (dict == NULL) {
- ret = -1;
- goto err;
+ ret = mq_get_ctx_updation_status(ctx, &status);
+ if (ret < 0 || status == _gf_true) {
+ /* If the update txn is in progress abort inspection */
+ ret = 0;
+ goto out;
}
- QUOTA_ALLOC_OR_GOTO (size, int64_t, ret, err);
-
- *size = hton64 (-local->size);
+ mq_compute_delta(&delta, &size, &contri);
+ if (!quota_meta_is_null(&delta))
+ mq_initiate_quota_txn(this, loc, NULL);
+ }
+ /* TODO: revist this code when fixing hardlinks */
- ret = dict_set_bin (dict, QUOTA_SIZE_KEY, size, 8);
- if (ret < 0)
- goto err;
-
- uuid_copy (local->parent_loc.gfid,
- local->parent_loc.inode->gfid);
- GF_UUID_ASSERT (local->parent_loc.gfid);
-
- STACK_WIND (frame, mq_inode_remove_done, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop, &local->parent_loc,
- GF_XATTROP_ADD_ARRAY64, dict, NULL);
- dict_unref (dict);
- return 0;
-
-err:
- local->err = 1;
- mq_inode_remove_done (frame, NULL, this, -1, 0, NULL, NULL);
- if (dict)
- dict_unref (dict);
- return 0;
+out:
+ return ret;
}
int32_t
-mq_reduce_parent_size (xlator_t *this, loc_t *loc, int64_t contri)
+mq_xattr_state(xlator_t *this, loc_t *origin_loc, dict_t *dict,
+ struct iatt *buf)
{
- int32_t ret = -1;
- struct gf_flock lock = {0,};
- call_frame_t *frame = NULL;
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
- inode_contribution_t *contribution = NULL;
-
- GF_VALIDATE_OR_GOTO ("marker", this, out);
- GF_VALIDATE_OR_GOTO ("marker", loc, out);
-
- ret = mq_inode_ctx_get (loc->inode, this, &ctx);
- if (ret < 0)
- goto out;
-
- contribution = mq_get_contribution_node (loc->parent, ctx);
- if (contribution == NULL)
- goto out;
-
- local = mq_local_new ();
- if (local == NULL) {
- ret = -1;
- goto out;
- }
-
- if (contri >= 0) {
- local->size = contri;
- } else {
- LOCK (&contribution->lock);
- {
- local->size = contribution->contribution;
- }
- UNLOCK (&contribution->lock);
- }
-
- if (local->size == 0) {
- ret = 0;
- goto out;
- }
-
- ret = mq_loc_copy (&local->loc, loc);
- if (ret < 0)
- goto out;
-
- local->ctx = ctx;
- local->contri = contribution;
-
- ret = mq_inode_loc_fill (NULL, loc->parent, &local->parent_loc);
- if (ret < 0)
- goto out;
-
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- ret = -1;
- goto out;
- }
-
- mq_assign_lk_owner (this, frame);
-
- frame->local = local;
-
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
-
- if (local->parent_loc.inode == NULL) {
- ret = -1;
- gf_log (this->name, GF_LOG_DEBUG,
- "Inode is NULL, so can't stackwind.");
- goto out;
- }
-
- STACK_WIND (frame,
- mq_reduce_parent_size_xattr,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->parent_loc, F_SETLKW, &lock, NULL);
- local = NULL;
- ret = 0;
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
+ loc_t loc = {
+ 0,
+ };
+ inode_contribution_t *contribution = NULL;
+
+ ret = mq_prevalidate_txn(this, origin_loc, &loc, &ctx, buf);
+ if (ret < 0 || loc.parent == NULL)
+ goto out;
+
+ if (!loc_is_root(&loc)) {
+ contribution = mq_add_new_contribution_node(this, ctx, &loc);
+ if (contribution == NULL) {
+ if (!gf_uuid_is_null(loc.inode->gfid))
+ gf_log(this->name, GF_LOG_WARNING,
+ "cannot add a new contribution node "
+ "(%s)",
+ uuid_utoa(loc.gfid));
+ ret = -1;
+ goto out;
+ }
+ if (buf->ia_type == IA_IFDIR)
+ mq_inspect_directory_xattr(this, ctx, contribution, &loc, dict);
+ else
+ mq_inspect_file_xattr(this, ctx, contribution, &loc, dict, buf);
+ } else {
+ mq_inspect_directory_xattr(this, ctx, 0, &loc, dict);
+ }
out:
- if (local != NULL)
- mq_local_unref (this, local);
-
- return ret;
-}
+ loc_wipe(&loc);
+ if (contribution)
+ GF_REF_PUT(contribution);
-int32_t
-init_quota_priv (xlator_t *this)
-{
- return 0;
+ return ret;
}
-
int32_t
-mq_rename_update_newpath (xlator_t *this, loc_t *loc)
+mq_req_xattr(xlator_t *this, loc_t *loc, dict_t *dict, char *contri_key,
+ char *size_key)
{
- int32_t ret = -1;
- quota_inode_ctx_t *ctx = NULL;
- inode_contribution_t *contribution = NULL;
+ int32_t ret = -1;
+ char key[QUOTA_KEY_MAX] = {
+ 0,
+ };
- GF_VALIDATE_OR_GOTO ("marker", this, out);
- GF_VALIDATE_OR_GOTO ("marker", loc, out);
- GF_VALIDATE_OR_GOTO ("marker", loc->inode, out);
+ GF_VALIDATE_OR_GOTO("marker", this, out);
+ GF_VALIDATE_OR_GOTO("marker", loc, out);
+ GF_VALIDATE_OR_GOTO("marker", dict, out);
- ret = mq_inode_ctx_get (loc->inode, this, &ctx);
+ if (!loc_is_root(loc)) {
+ ret = mq_dict_set_contribution(this, dict, loc, NULL, contri_key);
if (ret < 0)
- goto out;
+ goto out;
+ }
- contribution = mq_add_new_contribution_node (this, ctx, loc);
- if (contribution == NULL) {
- ret = -1;
- goto out;
+ GET_SIZE_KEY(this, key, ret);
+ if (ret < 0)
+ goto out;
+ if (size_key)
+ if (snprintf(size_key, QUOTA_KEY_MAX, "%s", key) >= QUOTA_KEY_MAX) {
+ ret = -1;
+ goto out;
}
- mq_initiate_quota_txn (this, loc);
+ ret = dict_set_uint64(dict, key, 0);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_set_int8(dict, QUOTA_DIRTY_KEY, 0);
+
out:
- return ret;
+ if (ret < 0)
+ gf_log_callingfn(this ? this->name : "Marker", GF_LOG_ERROR,
+ "dict set failed");
+ return ret;
}
int32_t
-mq_forget (xlator_t *this, quota_inode_ctx_t *ctx)
+mq_forget(xlator_t *this, quota_inode_ctx_t *ctx)
{
- inode_contribution_t *contri = NULL;
- inode_contribution_t *next = NULL;
+ inode_contribution_t *contri = NULL;
+ inode_contribution_t *next = NULL;
- GF_VALIDATE_OR_GOTO ("marker", this, out);
- GF_VALIDATE_OR_GOTO ("marker", ctx, out);
+ GF_VALIDATE_OR_GOTO("marker", this, out);
+ GF_VALIDATE_OR_GOTO("marker", ctx, out);
- list_for_each_entry_safe (contri, next, &ctx->contribution_head,
- contri_list) {
- list_del (&contri->contri_list);
- GF_FREE (contri);
- }
+ list_for_each_entry_safe(contri, next, &ctx->contribution_head, contri_list)
+ {
+ list_del_init(&contri->contri_list);
+ GF_REF_PUT(contri);
+ }
- LOCK_DESTROY (&ctx->lock);
- GF_FREE (ctx);
+ LOCK_DESTROY(&ctx->lock);
+ GF_FREE(ctx);
out:
- return 0;
+ return 0;
}
diff --git a/xlators/features/marker/src/marker-quota.h b/xlators/features/marker/src/marker-quota.h
index 4dd855fd730..4bbf6878b22 100644
--- a/xlators/features/marker/src/marker-quota.h
+++ b/xlators/features/marker/src/marker-quota.h
@@ -1,139 +1,140 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _MARKER_QUOTA_H
#define _MARKER_QUOTA_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "marker-mem-types.h"
+#include <glusterfs/refcount.h>
+#include <glusterfs/quota-common-utils.h>
+#include <glusterfs/call-stub.h>
#define QUOTA_XATTR_PREFIX "trusted.glusterfs"
#define QUOTA_DIRTY_KEY "trusted.glusterfs.quota.dirty"
#define CONTRIBUTION "contri"
-#define CONTRI_KEY_MAX 512
+#define QUOTA_KEY_MAX 512
#define READDIR_BUF 4096
-
-#define QUOTA_STACK_DESTROY(_frame, _this) \
- do { \
- quota_local_t *_local = NULL; \
- _local = _frame->local; \
- _frame->local = NULL; \
- STACK_DESTROY (_frame->root); \
- mq_local_unref (_this, _local); \
- } while (0)
-
-
-#define QUOTA_ALLOC(var, type, ret) \
- do { \
- ret = 0; \
- var = GF_CALLOC (sizeof (type), 1, \
- gf_marker_mt_##type); \
- if (!var) { \
- gf_log ("", GF_LOG_ERROR, \
- "out of memory"); \
- ret = -1; \
- } \
- } while (0);
-
-#define QUOTA_ALLOC_OR_GOTO(var, type, ret, label) \
- do { \
- var = GF_CALLOC (sizeof (type), 1, \
- gf_marker_mt_##type); \
- if (!var) { \
- gf_log ("", GF_LOG_ERROR, \
- "out of memory"); \
- ret = -1; \
- goto label; \
- } \
- ret = 0; \
- } while (0);
-
-#define GET_CONTRI_KEY(var, _gfid, _ret) \
- do { \
- char _gfid_unparsed[40]; \
- uuid_unparse (_gfid, _gfid_unparsed); \
- _ret = snprintf (var, CONTRI_KEY_MAX, QUOTA_XATTR_PREFIX \
- ".%s.%s." CONTRIBUTION, "quota", \
- _gfid_unparsed); \
- } while (0);
-
-#define QUOTA_SAFE_INCREMENT(lock, var) \
- do { \
- LOCK (lock); \
- var ++; \
- UNLOCK (lock); \
- } while (0)
+#define QUOTA_ALLOC(var, type, ret) \
+ do { \
+ ret = 0; \
+ var = GF_CALLOC(sizeof(type), 1, gf_marker_mt_##type); \
+ if (!var) { \
+ ret = -1; \
+ } \
+ } while (0);
+
+#define QUOTA_ALLOC_OR_GOTO(var, type, ret, label) \
+ do { \
+ var = GF_CALLOC(sizeof(type), 1, gf_marker_mt_##type); \
+ if (!var) { \
+ gf_log("", GF_LOG_ERROR, "out of memory"); \
+ ret = -1; \
+ goto label; \
+ } \
+ ret = 0; \
+ } while (0);
+
+#define GET_QUOTA_KEY(_this, var, key, _ret) \
+ do { \
+ marker_conf_t *_priv = _this->private; \
+ if (_priv->version > 0) \
+ _ret = snprintf(var, QUOTA_KEY_MAX, "%s.%d", key, _priv->version); \
+ else \
+ _ret = snprintf(var, QUOTA_KEY_MAX, "%s", key); \
+ } while (0)
+
+#define GET_CONTRI_KEY(_this, var, _gfid, _ret) \
+ do { \
+ char _tmp_var[QUOTA_KEY_MAX] = { \
+ 0, \
+ }; \
+ if (_gfid != NULL) { \
+ char _gfid_unparsed[40]; \
+ gf_uuid_unparse(_gfid, _gfid_unparsed); \
+ _ret = snprintf(_tmp_var, QUOTA_KEY_MAX, \
+ QUOTA_XATTR_PREFIX ".%s.%s." CONTRIBUTION, \
+ "quota", _gfid_unparsed); \
+ } else { \
+ _ret = snprintf(_tmp_var, QUOTA_KEY_MAX, \
+ QUOTA_XATTR_PREFIX ".%s.." CONTRIBUTION, "quota"); \
+ } \
+ GET_QUOTA_KEY(_this, var, _tmp_var, _ret); \
+ } while (0)
+
+#define GET_SIZE_KEY(_this, var, _ret) \
+ { \
+ GET_QUOTA_KEY(_this, var, QUOTA_SIZE_KEY, _ret); \
+ }
+
+#define QUOTA_SAFE_INCREMENT(lock, var) \
+ do { \
+ LOCK(lock); \
+ var++; \
+ UNLOCK(lock); \
+ } while (0)
struct quota_inode_ctx {
- int64_t size;
- int8_t dirty;
- gf_boolean_t updation_status;
- gf_lock_t lock;
- struct list_head contribution_head;
+ int64_t size;
+ int64_t file_count;
+ int64_t dir_count;
+ int8_t dirty;
+ gf_boolean_t create_status;
+ gf_boolean_t updation_status;
+ gf_boolean_t dirty_status;
+ gf_lock_t lock;
+ struct list_head contribution_head;
};
typedef struct quota_inode_ctx quota_inode_ctx_t;
+struct quota_synctask {
+ xlator_t *this;
+ loc_t loc;
+ quota_meta_t contri;
+ gf_boolean_t is_static;
+ uint32_t ia_nlink;
+ call_stub_t *stub;
+};
+typedef struct quota_synctask quota_synctask_t;
+
struct inode_contribution {
- struct list_head contri_list;
- int64_t contribution;
- uuid_t gfid;
- gf_lock_t lock;
+ struct list_head contri_list;
+ int64_t contribution;
+ int64_t file_count;
+ int64_t dir_count;
+ uuid_t gfid;
+ gf_lock_t lock;
+ GF_REF_DECL;
};
typedef struct inode_contribution inode_contribution_t;
int32_t
-mq_get_lock_on_parent (call_frame_t *, xlator_t *);
-
-int32_t
-mq_req_xattr (xlator_t *, loc_t *, dict_t *);
-
-int32_t
-init_quota_priv (xlator_t *);
-
-int32_t
-mq_xattr_state (xlator_t *, loc_t *, dict_t *, struct iatt);
+mq_req_xattr(xlator_t *, loc_t *, dict_t *, char *, char *);
int32_t
-mq_set_inode_xattr (xlator_t *, loc_t *);
+mq_xattr_state(xlator_t *, loc_t *, dict_t *, struct iatt *);
int
-mq_initiate_quota_txn (xlator_t *, loc_t *);
-
-int32_t
-mq_dirty_inode_readdir (call_frame_t *, void *, xlator_t *,
- int32_t, int32_t, fd_t *, dict_t *);
+mq_initiate_quota_txn(xlator_t *, loc_t *, struct iatt *);
-int32_t
-mq_reduce_parent_size (xlator_t *, loc_t *, int64_t);
+int
+mq_initiate_quota_blocking_txn(xlator_t *, loc_t *, struct iatt *);
-int32_t
-mq_rename_update_newpath (xlator_t *, loc_t *);
+int
+mq_create_xattrs_txn(xlator_t *this, loc_t *loc, struct iatt *buf);
int32_t
-mq_inspect_file_xattr (xlator_t *this, loc_t *loc, dict_t *dict, struct iatt buf);
+mq_reduce_parent_size_txn(xlator_t *, loc_t *, quota_meta_t *, uint32_t nlink,
+ call_stub_t *stub);
int32_t
-mq_forget (xlator_t *, quota_inode_ctx_t *);
+mq_forget(xlator_t *, quota_inode_ctx_t *);
#endif
diff --git a/xlators/features/marker/src/marker.c b/xlators/features/marker/src/marker.c
index 534437479a0..1375ccc498c 100644
--- a/xlators/features/marker/src/marker.c
+++ b/xlators/features/marker/src/marker.c
@@ -1,1811 +1,2320 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "libxlator.h"
#include "marker.h"
#include "marker-mem-types.h"
#include "marker-quota.h"
#include "marker-quota-helper.h"
#include "marker-common.h"
-#include "byte-order.h"
+#include <glusterfs/byte-order.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/syscall.h>
+
+#include <fnmatch.h>
#define _GF_UID_GID_CHANGED 1
+static char *mq_ext_xattrs[] = {
+ QUOTA_SIZE_KEY,
+ QUOTA_LIMIT_KEY,
+ QUOTA_LIMIT_OBJECTS_KEY,
+ NULL,
+};
+
void
-fini (xlator_t *this);
+fini(xlator_t *this);
int32_t
-marker_start_setxattr (call_frame_t *, xlator_t *);
+marker_start_setxattr(call_frame_t *, xlator_t *);
+
+/* When client/quotad request for quota xattrs,
+ * replace the key-name by adding the version number
+ * in end of the key-name.
+ * In the cbk, result value of xattrs for original
+ * key-name.
+ * Below function marker_key_replace_with_ver and
+ * marker_key_set_ver is used for setting/removing
+ * version for the key-name
+ */
+int
+marker_key_replace_with_ver(xlator_t *this, dict_t *dict)
+{
+ int ret = -1;
+ int i = 0;
+ marker_conf_t *priv = NULL;
+ char key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+
+ priv = this->private;
+
+ if (dict == NULL || priv->version <= 0) {
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 0; mq_ext_xattrs[i]; i++) {
+ if (dict_get(dict, mq_ext_xattrs[i])) {
+ GET_QUOTA_KEY(this, key, mq_ext_xattrs[i], ret);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_set(dict, key, dict_get(dict, mq_ext_xattrs[i]));
+ if (ret < 0)
+ goto out;
+
+ dict_del(dict, mq_ext_xattrs[i]);
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+marker_key_set_ver(xlator_t *this, dict_t *dict)
+{
+ int ret = -1;
+ int i = -1;
+ marker_conf_t *priv = NULL;
+ char key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+
+ priv = this->private;
+
+ if (dict == NULL || priv->version <= 0) {
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 0; mq_ext_xattrs[i]; i++) {
+ GET_QUOTA_KEY(this, key, mq_ext_xattrs[i], ret);
+ if (ret < 0)
+ goto out;
+
+ if (dict_get(dict, key))
+ dict_set(dict, mq_ext_xattrs[i], dict_get(dict, key));
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
marker_local_t *
-marker_local_ref (marker_local_t *local)
+marker_local_ref(marker_local_t *local)
{
- GF_VALIDATE_OR_GOTO ("marker", local, err);
+ GF_VALIDATE_OR_GOTO("marker", local, err);
- LOCK (&local->lock);
- {
- local->ref++;
- }
- UNLOCK (&local->lock);
+ LOCK(&local->lock);
+ {
+ local->ref++;
+ }
+ UNLOCK(&local->lock);
- return local;
+ return local;
err:
- return NULL;
+ return NULL;
}
int
-marker_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
+marker_loc_fill(loc_t *loc, inode_t *inode, inode_t *parent, char *path)
{
- int ret = -1;
+ int ret = -1;
- if (!loc)
- return ret;
+ if (!loc)
+ return ret;
- if (inode) {
- loc->inode = inode_ref (inode);
- if (uuid_is_null (loc->gfid)) {
- uuid_copy (loc->gfid, loc->inode->gfid);
- }
+ if (inode) {
+ loc->inode = inode_ref(inode);
+ if (gf_uuid_is_null(loc->gfid)) {
+ gf_uuid_copy(loc->gfid, loc->inode->gfid);
}
+ }
- if (parent)
- loc->parent = inode_ref (parent);
-
- if (path) {
- loc->path = gf_strdup (path);
- if (!loc->path) {
- gf_log ("loc fill", GF_LOG_ERROR, "strdup failed");
- goto loc_wipe;
- }
-
- loc->name = strrchr (loc->path, '/');
- if (loc->name)
- loc->name++;
+ if (parent)
+ loc->parent = inode_ref(parent);
+
+ if (path) {
+ loc->path = gf_strdup(path);
+ if (!loc->path) {
+ gf_log("loc fill", GF_LOG_ERROR, "strdup failed");
+ goto loc_wipe;
}
- ret = 0;
+ loc->name = strrchr(loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ }
+
+ ret = 0;
loc_wipe:
- if (ret < 0)
- loc_wipe (loc);
+ if (ret < 0)
+ loc_wipe(loc);
- return ret;
+ return ret;
}
int
-marker_inode_loc_fill (inode_t *inode, loc_t *loc)
+_marker_inode_loc_fill(inode_t *inode, inode_t *parent, char *name, loc_t *loc)
{
- char *resolvedpath = NULL;
- int ret = -1;
- inode_t *parent = NULL;
+ char *resolvedpath = NULL;
+ int ret = -1;
+ gf_boolean_t free_parent = _gf_false;
- if ((!inode) || (!loc))
- return ret;
+ if ((!inode) || (!loc))
+ return ret;
- parent = inode_parent (inode, NULL, NULL);
+ if (parent && name)
+ ret = inode_path(parent, name, &resolvedpath);
+ else
+ ret = inode_path(inode, NULL, &resolvedpath);
+ if (ret < 0)
+ goto err;
- ret = inode_path (inode, NULL, &resolvedpath);
- if (ret < 0)
- goto err;
+ if (parent == NULL) {
+ parent = inode_parent(inode, NULL, NULL);
+ free_parent = _gf_true;
+ }
- ret = marker_loc_fill (loc, inode, parent, resolvedpath);
- if (ret < 0)
- goto err;
+ ret = marker_loc_fill(loc, inode, parent, resolvedpath);
+ if (ret < 0)
+ goto err;
err:
- if (parent)
- inode_unref (parent);
+ if (free_parent)
+ inode_unref(parent);
- if (resolvedpath)
- GF_FREE (resolvedpath);
+ GF_FREE(resolvedpath);
- return ret;
+ return ret;
+}
+
+int
+marker_inode_loc_fill(inode_t *inode, loc_t *loc)
+{
+ return _marker_inode_loc_fill(inode, NULL, NULL, loc);
}
int32_t
-marker_trav_parent (marker_local_t *local)
+marker_trav_parent(marker_local_t *local)
{
- int32_t ret = 0;
- loc_t loc = {0, };
- inode_t *parent = NULL;
- int8_t need_unref = 0;
+ int32_t ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ inode_t *parent = NULL;
+ int8_t need_unref = 0;
- if (!local->loc.parent) {
- parent = inode_parent (local->loc.inode, NULL, NULL);
- if (parent)
- need_unref = 1;
- } else
- parent = local->loc.parent;
+ if (!local->loc.parent) {
+ parent = inode_parent(local->loc.inode, NULL, NULL);
+ if (parent)
+ need_unref = 1;
+ } else
+ parent = local->loc.parent;
- ret = marker_inode_loc_fill (parent, &loc);
+ ret = marker_inode_loc_fill(parent, &loc);
- if (ret < 0) {
- ret = -1;
- goto out;
- }
+ if (ret < 0) {
+ ret = -1;
+ goto out;
+ }
- loc_wipe (&local->loc);
+ loc_wipe(&local->loc);
- local->loc = loc;
+ local->loc = loc;
out:
- if (need_unref)
- inode_unref (parent);
+ if (need_unref)
+ inode_unref(parent);
- return ret;
+ return ret;
}
-int32_t
-marker_error_handler (xlator_t *this)
+void
+marker_error_handler(xlator_t *this, marker_local_t *local, int32_t op_errno)
{
- marker_conf_t *priv = NULL;
-
- priv = (marker_conf_t *) this->private;
-
- unlink (priv->timestamp_file);
+ marker_conf_t *priv = (marker_conf_t *)this->private;
+ const char *path = local ? ((local->loc.path) ? local->loc.path
+ : uuid_utoa(local->loc.gfid))
+ : "<nul>";
- return 0;
+ gf_log(this->name, GF_LOG_CRITICAL,
+ "Indexing gone corrupt at %s (reason: %s)."
+ " Geo-replication slave content needs to be revalidated",
+ path, strerror(op_errno));
+ sys_unlink(priv->timestamp_file);
}
int32_t
-marker_local_unref (marker_local_t *local)
+marker_local_unref(marker_local_t *local)
{
- int32_t var = 0;
-
- if (local == NULL)
- return -1;
+ int32_t var = 0;
- LOCK (&local->lock);
- {
- var = --local->ref;
- }
- UNLOCK (&local->lock);
-
- if (var != 0)
- goto out;
-
- loc_wipe (&local->loc);
- loc_wipe (&local->parent_loc);
+ if (local == NULL)
+ return -1;
- if (local->oplocal) {
- marker_local_unref (local->oplocal);
- local->oplocal = NULL;
- }
- mem_put (local);
+ LOCK(&local->lock);
+ {
+ var = --local->ref;
+ }
+ UNLOCK(&local->lock);
+
+ if (var != 0)
+ goto out;
+
+ loc_wipe(&local->loc);
+ loc_wipe(&local->parent_loc);
+ if (local->xdata)
+ dict_unref(local->xdata);
+
+ if (local->lk_frame) {
+ STACK_DESTROY(local->lk_frame->root);
+ local->lk_frame = NULL;
+ }
+
+ if (local->oplocal) {
+ marker_local_unref(local->oplocal);
+ local->oplocal = NULL;
+ }
+ mem_put(local);
out:
- return 0;
+ return 0;
}
int32_t
-stat_stampfile (xlator_t *this, marker_conf_t *priv,
- struct volume_mark **status)
+stat_stampfile(xlator_t *this, marker_conf_t *priv, struct volume_mark **status)
{
- struct stat buf = {0, };
- struct volume_mark *vol_mark = NULL;
+ struct stat buf = {
+ 0,
+ };
+ struct volume_mark *vol_mark = NULL;
- vol_mark = GF_CALLOC (sizeof (struct volume_mark), 1,
- gf_marker_mt_volume_mark);
+ vol_mark = GF_CALLOC(sizeof(struct volume_mark), 1,
+ gf_marker_mt_volume_mark);
- vol_mark->major = 1;
- vol_mark->minor = 0;
+ vol_mark->major = 1;
+ vol_mark->minor = 0;
- GF_ASSERT (sizeof (priv->volume_uuid_bin) == 16);
- memcpy (vol_mark->uuid, priv->volume_uuid_bin, 16);
+ GF_ASSERT(sizeof(priv->volume_uuid_bin) == 16);
+ memcpy(vol_mark->uuid, priv->volume_uuid_bin, 16);
- if (stat (priv->timestamp_file, &buf) != -1) {
- vol_mark->retval = 0;
- vol_mark->sec = htonl (buf.st_ctime);
- vol_mark->usec = htonl (ST_CTIM_NSEC (&buf)/1000);
- } else
- vol_mark->retval = 1;
+ if (sys_stat(priv->timestamp_file, &buf) != -1) {
+ vol_mark->retval = 0;
+ vol_mark->sec = htonl(buf.st_mtime);
+ vol_mark->usec = htonl(ST_MTIM_NSEC(&buf) / 1000);
+ } else
+ vol_mark->retval = 1;
- *status = vol_mark;
+ *status = vol_mark;
- return 0;
+ return 0;
}
int32_t
-marker_getxattr_stampfile_cbk (call_frame_t *frame, xlator_t *this,
- const char *name, struct volume_mark *vol_mark,
- dict_t *xdata)
+marker_getxattr_stampfile_cbk(call_frame_t *frame, xlator_t *this,
+ const char *name, struct volume_mark *vol_mark,
+ dict_t *xdata)
{
- int32_t ret = -1;
- dict_t *dict = NULL;
+ int32_t ret = -1;
+ dict_t *dict = NULL;
- if (vol_mark == NULL){
- STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL);
+ if (vol_mark == NULL) {
+ STACK_UNWIND_STRICT(getxattr, frame, -1, ENOMEM, NULL, NULL);
- goto out;
- }
+ goto out;
+ }
- dict = dict_new ();
+ dict = dict_new();
- ret = dict_set_bin (dict, (char *)name, vol_mark,
- sizeof (struct volume_mark));
- if (ret)
- gf_log (this->name, GF_LOG_WARNING, "failed to set key %s",
- name);
+ ret = dict_set_bin(dict, (char *)name, vol_mark,
+ sizeof(struct volume_mark));
+ if (ret) {
+ GF_FREE(vol_mark);
+ gf_log(this->name, GF_LOG_WARNING, "failed to set key %s", name);
+ }
- STACK_UNWIND_STRICT (getxattr, frame, 0, 0, dict, xdata);
+ STACK_UNWIND_STRICT(getxattr, frame, 0, 0, dict, xdata);
- dict_unref (dict);
+ if (dict)
+ dict_unref(dict);
out:
- return 0;
+ return 0;
}
-int32_t
-call_from_special_client (call_frame_t *frame, xlator_t *this, const char *name)
+gf_boolean_t
+call_from_special_client(call_frame_t *frame, xlator_t *this, const char *name)
{
- struct volume_mark *vol_mark = NULL;
- marker_conf_t *priv = NULL;
- gf_boolean_t ret = _gf_true;
+ struct volume_mark *vol_mark = NULL;
+ marker_conf_t *priv = NULL;
+ gf_boolean_t is_true = _gf_true;
- priv = (marker_conf_t *)this->private;
+ priv = (marker_conf_t *)this->private;
- if (frame->root->pid != GF_CLIENT_PID_GSYNCD || name == NULL ||
- strcmp (name, MARKER_XATTR_PREFIX "." VOLUME_MARK) != 0) {
- ret = _gf_false;
- goto out;
- }
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD || name == NULL ||
+ strcmp(name, MARKER_XATTR_PREFIX "." VOLUME_MARK) != 0) {
+ is_true = _gf_false;
+ goto out;
+ }
- stat_stampfile (this, priv, &vol_mark);
+ stat_stampfile(this, priv, &vol_mark);
- marker_getxattr_stampfile_cbk (frame, this, name, vol_mark, NULL);
+ marker_getxattr_stampfile_cbk(frame, this, name, vol_mark, NULL);
out:
- return ret;
+ return is_true;
}
-int32_t
-marker_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
+static gf_boolean_t
+_is_quota_internal_xattr(dict_t *d, char *k, data_t *v, void *data)
{
- if (cookie) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Filtering the quota extended attributes");
+ int i = 0;
+ char **external_xattrs = data;
- dict_foreach (dict, marker_filter_quota_xattr, NULL);
- }
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
- return 0;
+ for (i = 0; external_xattrs && external_xattrs[i]; i++) {
+ if (strcmp(k, external_xattrs[i]) == 0)
+ return _gf_false;
+ }
+
+ if (fnmatch("trusted.glusterfs.quota*", k, 0) == 0)
+ return _gf_true;
+
+ /* It would be nice if posix filters pgfid xattrs. But since marker
+ * also takes up responsibility to clean these up, adding the filtering
+ * here (Check 'quota_xattr_cleaner')
+ */
+ if (fnmatch(PGFID_XATTR_KEY_PREFIX "*", k, 0) == 0)
+ return _gf_true;
+
+ return _gf_false;
}
-int32_t
-marker_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
+static void
+marker_filter_internal_xattrs(xlator_t *this, dict_t *xattrs)
{
- gf_boolean_t ret = _gf_false;
- marker_conf_t *priv = NULL;
- unsigned long cookie = 0;
+ marker_conf_t *priv = NULL;
+ char **ext = NULL;
- priv = this->private;
+ priv = this->private;
+ if (priv->feature_enabled & GF_QUOTA)
+ ext = mq_ext_xattrs;
- if (priv == NULL || (priv->feature_enabled & GF_XTIME) == 0)
- goto wind;
+ dict_foreach_match(xattrs, _is_quota_internal_xattr, ext,
+ dict_remove_foreach_fn, NULL);
+}
- gf_log (this->name, GF_LOG_DEBUG, "USER:PID = %d", frame->root->pid);
+static void
+marker_filter_gsyncd_xattrs(call_frame_t *frame, xlator_t *this, dict_t *xattrs)
+{
+ marker_conf_t *priv = NULL;
- ret = call_from_special_client (frame, this, name);
-wind:
- if (ret == _gf_false) {
- if (name == NULL) {
- /* Signifies that marker translator
- * has to filter the quota's xattr's,
- * this is to prevent afr from performing
- * self healing on marker-quota xattrs'
- */
- cookie = 1;
- }
- STACK_WIND_COOKIE (frame, marker_getxattr_cbk, (void *)cookie,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc,
- name, xdata);
- }
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(frame);
- return 0;
+ if (xattrs && frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+ GF_REMOVE_INTERNAL_XATTR(GF_XATTR_XTIME_PATTERN, xattrs);
+ }
+ return;
}
+int32_t
+marker_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+ if (op_ret < 0)
+ goto unwind;
+
+ ret = marker_key_set_ver(this, dict);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (cookie) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Filtering the quota extended attributes");
+
+ /* If the getxattr is from a non special client, then do not
+ copy the quota related xattrs (except the quota limit key
+ i.e trusted.glusterfs.quota.limit-set which has been set by
+ glusterd on the directory on which quota limit is set.) for
+ directories. Let the healing of xattrs happen upon lookup.
+ NOTE: setting of trusted.glusterfs.quota.limit-set as of now
+ happens from glusterd. It should be moved to quotad. Also
+ trusted.glusterfs.quota.limit-set is set on directory which
+ is permanent till quota is removed on that directory or limit
+ is changed. So let that xattr be healed by other xlators
+ properly whenever directory healing is done.
+ */
+ /*
+ * Except limit-set xattr, rest of the xattrs are maintained
+ * by quota xlator. Don't expose them to other xlators.
+ * This filter makes sure quota xattrs are not healed as part of
+ * metadata self-heal
+ */
+ marker_filter_internal_xattrs(frame->this, dict);
+ }
+
+ /* Filter gsyncd xtime xattr for non gsyncd clients */
+ marker_filter_gsyncd_xattrs(frame, frame->this, dict);
+
+unwind:
+ MARKER_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
int32_t
-marker_setxattr_done (call_frame_t *frame)
-{
- marker_local_t *local = NULL;
+marker_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ gf_boolean_t is_true = _gf_false;
+ marker_conf_t *priv = NULL;
+ unsigned long cookie = 0;
+ marker_local_t *local = NULL;
+ char key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+ int32_t ret = -1;
+ int32_t i = 0;
+
+ priv = this->private;
+
+ if (name) {
+ for (i = 0; mq_ext_xattrs[i]; i++) {
+ if (strcmp(name, mq_ext_xattrs[i]))
+ continue;
+
+ GET_QUOTA_KEY(this, key, mq_ext_xattrs[i], ret);
+ if (ret < 0)
+ goto out;
+ name = key;
+ break;
+ }
+ }
- local = (marker_local_t *) frame->local;
+ frame->local = mem_get0(this->local_pool);
+ local = frame->local;
+ if (local == NULL)
+ goto out;
- frame->local = NULL;
+ MARKER_INIT_LOCAL(frame, local);
- STACK_DESTROY (frame->root);
+ if ((loc_copy(&local->loc, loc)) < 0)
+ goto out;
- marker_local_unref (local);
+ gf_log(this->name, GF_LOG_DEBUG, "USER:PID = %d", frame->root->pid);
- return 0;
+ if (priv && priv->feature_enabled & GF_XTIME)
+ is_true = call_from_special_client(frame, this, name);
+
+ if (is_true == _gf_false) {
+ if (name == NULL) {
+ /* Signifies that marker translator
+ * has to filter the quota's xattr's,
+ * this is to prevent afr from performing
+ * self healing on marker-quota xattrs'
+ */
+ cookie = 1;
+ }
+ STACK_WIND_COOKIE(frame, marker_getxattr_cbk, (void *)cookie,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->getxattr,
+ loc, name, xdata);
+ }
+
+ return 0;
+out:
+ MARKER_STACK_UNWIND(getxattr, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
}
-int
-marker_specific_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+int32_t
+marker_setxattr_done(call_frame_t *frame)
{
- int32_t ret = 0;
- int32_t done = 0;
- marker_local_t *local = NULL;
+ marker_local_t *local = NULL;
- local = (marker_local_t*) frame->local;
+ local = (marker_local_t *)frame->local;
- if (op_ret == -1 && op_errno == ENOSPC) {
- marker_error_handler (this);
- done = 1;
- goto out;
- }
+ frame->local = NULL;
- if (local) {
- if (local->loc.path && strcmp (local->loc.path, "/") == 0) {
- done = 1;
- goto out;
- }
- if (__is_root_gfid (local->loc.gfid)) {
- done = 1;
- goto out;
- }
- }
+ STACK_DESTROY(frame->root);
- ret = marker_trav_parent (local);
+ marker_local_unref(local);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG, "Error occurred "
- "while traversing to the parent, stopping marker");
+ return 0;
+}
- done = 1;
+int
+marker_specific_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int32_t ret = 0;
+ int32_t done = 1;
+ marker_local_t *local = NULL;
- goto out;
+ local = (marker_local_t *)frame->local;
+
+ if (op_ret == -1 && op_errno == ENOSPC) {
+ marker_error_handler(this, local, op_errno);
+ goto out;
+ }
+
+ if (local) {
+ if (local->loc.path && strcmp(local->loc.path, "/") == 0) {
+ goto out;
+ }
+ if (__is_root_gfid(local->loc.gfid)) {
+ goto out;
}
+ }
- marker_start_setxattr (frame, this);
+ ret = (local) ? marker_trav_parent(local) : -1;
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Error occurred "
+ "while traversing to the parent, stopping marker");
+ goto out;
+ }
+
+ marker_start_setxattr(frame, this);
+ done = 0;
out:
- if (done) {
- marker_setxattr_done (frame);
- }
+ if (done) {
+ marker_setxattr_done(frame);
+ }
- return 0;
+ return 0;
}
int32_t
-marker_start_setxattr (call_frame_t *frame, xlator_t *this)
+marker_start_setxattr(call_frame_t *frame, xlator_t *this)
{
- int32_t ret = -1;
- dict_t *dict = NULL;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- local = (marker_local_t*) frame->local;
+ local = (marker_local_t *)frame->local;
- if (!local)
- goto out;
+ if (!local)
+ goto out;
- dict = dict_new ();
+ dict = dict_new();
- if (!dict)
- goto out;
+ if (!dict)
+ goto out;
- if (local->loc.inode && uuid_is_null (local->loc.gfid))
- uuid_copy (local->loc.gfid, local->loc.inode->gfid);
+ if (local->loc.inode && gf_uuid_is_null(local->loc.gfid))
+ gf_uuid_copy(local->loc.gfid, local->loc.inode->gfid);
- GF_UUID_ASSERT (local->loc.gfid);
+ GF_UUID_ASSERT(local->loc.gfid);
- ret = dict_set_static_bin (dict, priv->marker_xattr,
- (void *)local->timebuf, 8);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to set marker xattr (%s)", local->loc.path);
- goto out;
- }
+ ret = dict_set_static_bin(dict, priv->marker_xattr, (void *)local->timebuf,
+ 8);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING, "failed to set marker xattr (%s)",
+ local->loc.path);
+ goto out;
+ }
- STACK_WIND (frame, marker_specific_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, &local->loc, dict, 0,
- NULL);
+ STACK_WIND(frame, marker_specific_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, &local->loc, dict, 0, NULL);
- ret = 0;
+ ret = 0;
out:
- if (dict)
- dict_unref (dict);
+ if (dict)
+ dict_unref(dict);
- return ret;
+ return ret;
}
void
-marker_gettimeofday (marker_local_t *local)
+marker_gettimeofday(marker_local_t *local)
{
- struct timeval tv = {0, };
+ struct timeval tv = {
+ 0,
+ };
- gettimeofday (&tv, NULL);
+ gettimeofday(&tv, NULL);
- local->timebuf [0] = htonl (tv.tv_sec);
- local->timebuf [1] = htonl (tv.tv_usec);
+ local->timebuf[0] = htonl(tv.tv_sec);
+ local->timebuf[1] = htonl(tv.tv_usec);
- return;
+ return;
}
int32_t
-marker_create_frame (xlator_t *this, marker_local_t *local)
+marker_create_frame(xlator_t *this, marker_local_t *local)
{
- call_frame_t *frame = NULL;
+ call_frame_t *frame = NULL;
+
+ frame = create_frame(this, this->ctx->pool);
- frame = create_frame (this, this->ctx->pool);
+ if (!frame)
+ return -1;
- frame->local = (void *) local;
+ frame->local = (void *)local;
- marker_start_setxattr (frame, this);
+ marker_start_setxattr(frame, this);
- return 0;
+ return 0;
}
int32_t
-marker_xtime_update_marks (xlator_t *this, marker_local_t *local)
+marker_xtime_update_marks(xlator_t *this, marker_local_t *local)
{
- GF_VALIDATE_OR_GOTO ("marker", this, out);
- GF_VALIDATE_OR_GOTO (this->name, local, out);
+ marker_conf_t *priv = NULL;
- if ((local->pid == GF_CLIENT_PID_GSYNCD) ||
- (local->pid == GF_CLIENT_PID_DEFRAG))
- goto out;
+ GF_VALIDATE_OR_GOTO("marker", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, local, out);
- marker_gettimeofday (local);
+ priv = this->private;
- marker_local_ref (local);
+ if ((local->pid == GF_CLIENT_PID_GSYNCD &&
+ !(priv->feature_enabled & GF_XTIME_GSYNC_FORCE)) ||
+ (local->pid == GF_CLIENT_PID_DEFRAG))
+ goto out;
- marker_create_frame (this, local);
+ marker_gettimeofday(local);
+
+ marker_local_ref(local);
+
+ marker_create_frame(this, local);
out:
- return 0;
+ return 0;
}
-
int32_t
-marker_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+marker_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- marker_conf_t *priv = NULL;
- marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "error occurred "
- "while Creating a file %s", strerror (op_errno));
- }
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "error occurred "
+ "while creating directory %s",
+ strerror(op_errno));
+ }
- local = (marker_local_t *) frame->local;
+ local = (marker_local_t *)frame->local;
- frame->local = NULL;
+ frame->local = NULL;
+ priv = this->private;
- STACK_UNWIND_STRICT (mkdir, frame, op_ret, op_errno, inode,
- buf, preparent, postparent, xdata);
+ if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) {
+ ctx = mq_inode_ctx_new(inode, this);
+ if (ctx == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "mq_inode_ctx_new "
+ "failed for %s",
+ uuid_utoa(inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+ }
- if (op_ret == -1 || local == NULL)
- goto out;
+ STACK_UNWIND_STRICT(mkdir, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
- if (uuid_is_null (local->loc.gfid))
- uuid_copy (local->loc.gfid, buf->ia_gfid);
+ if (op_ret == -1 || local == NULL)
+ goto out;
- priv = this->private;
+ if (gf_uuid_is_null(local->loc.gfid))
+ gf_uuid_copy(local->loc.gfid, buf->ia_gfid);
- if (priv->feature_enabled & GF_QUOTA)
- mq_set_inode_xattr (this, &local->loc);
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_create_xattrs_txn(this, &local->loc, NULL);
- if (priv->feature_enabled & GF_XTIME)
- marker_xtime_update_marks (this, local);
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
out:
- marker_local_unref (local);
+ marker_local_unref(local);
- return 0;
+ return 0;
}
int
-marker_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
+marker_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled == 0)
- goto wind;
+ if (priv->feature_enabled == 0)
+ goto wind;
- local = mem_get0 (this->local_pool);
+ local = mem_get0(this->local_pool);
- MARKER_INIT_LOCAL (frame, local);
+ MARKER_INIT_LOCAL(frame, local);
- ret = loc_copy (&local->loc, loc);
+ ret = loc_copy(&local->loc, loc);
- if (ret == -1)
- goto err;
+ if (ret == -1)
+ goto err;
wind:
- STACK_WIND (frame, marker_mkdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ STACK_WIND(frame, marker_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
- return 0;
+ return 0;
err:
- STACK_UNWIND_STRICT (mkdir, frame, -1, ENOMEM, NULL,
- NULL, NULL, NULL, NULL);
- return 0;
-}
+ MARKER_STACK_UNWIND(mkdir, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
int32_t
-marker_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+marker_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+ quota_inode_ctx_t *ctx = NULL;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "error occurred "
- "while Creating a file %s", strerror (op_errno));
- }
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "error occurred "
+ "while creating file %s",
+ strerror(op_errno));
+ }
- local = (marker_local_t *) frame->local;
+ local = (marker_local_t *)frame->local;
- frame->local = NULL;
+ frame->local = NULL;
+ priv = this->private;
- STACK_UNWIND_STRICT (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata);
+ if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) {
+ ctx = mq_inode_ctx_new(inode, this);
+ if (ctx == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "mq_inode_ctx_new "
+ "failed for %s",
+ uuid_utoa(inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+ }
- if (op_ret == -1 || local == NULL)
- goto out;
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
- if (uuid_is_null (local->loc.gfid))
- uuid_copy (local->loc.gfid, buf->ia_gfid);
+ if (op_ret == -1 || local == NULL)
+ goto out;
- priv = this->private;
+ if (gf_uuid_is_null(local->loc.gfid))
+ gf_uuid_copy(local->loc.gfid, buf->ia_gfid);
- if (priv->feature_enabled & GF_QUOTA)
- mq_set_inode_xattr (this, &local->loc);
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_create_xattrs_txn(this, &local->loc, buf);
- if (priv->feature_enabled & GF_XTIME)
- marker_xtime_update_marks (this, local);
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
out:
- marker_local_unref (local);
+ marker_local_unref(local);
- return 0;
+ return 0;
}
int32_t
-marker_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+marker_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled == 0)
- goto wind;
+ if (priv->feature_enabled == 0)
+ goto wind;
- local = mem_get0 (this->local_pool);
+ local = mem_get0(this->local_pool);
- MARKER_INIT_LOCAL (frame, local);
+ MARKER_INIT_LOCAL(frame, local);
- ret = loc_copy (&local->loc, loc);
+ ret = loc_copy(&local->loc, loc);
- if (ret == -1)
- goto err;
+ if (ret == -1)
+ goto err;
wind:
- STACK_WIND (frame, marker_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
- fd, xdata);
- return 0;
+ STACK_WIND(frame, marker_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
err:
- STACK_UNWIND_STRICT (create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
- NULL, NULL);
+ MARKER_STACK_UNWIND(create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL,
+ NULL);
- return 0;
+ return 0;
}
-
int32_t
-marker_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+marker_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- marker_conf_t *priv = NULL;
- marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "error occurred "
- "while write, %s", strerror (op_errno));
- }
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "error occurred "
+ "while write, %s",
+ strerror(op_errno));
+ }
- local = (marker_local_t *) frame->local;
+ local = (marker_local_t *)frame->local;
- frame->local = NULL;
+ frame->local = NULL;
- STACK_UNWIND_STRICT (writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
+ STACK_UNWIND_STRICT(writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
- if (op_ret == -1 || local == NULL)
- goto out;
+ if (op_ret == -1 || local == NULL)
+ goto out;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled & GF_QUOTA)
- mq_initiate_quota_txn (this, &local->loc);
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn(this, &local->loc, postbuf);
- if (priv->feature_enabled & GF_XTIME)
- marker_xtime_update_marks (this, local);
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
out:
- marker_local_unref (local);
+ marker_local_unref(local);
- return 0;
+ return 0;
}
int32_t
-marker_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t offset, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
+marker_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled == 0)
- goto wind;
+ if (priv->feature_enabled == 0)
+ goto wind;
- local = mem_get0 (this->local_pool);
+ local = mem_get0(this->local_pool);
- MARKER_INIT_LOCAL (frame, local);
+ MARKER_INIT_LOCAL(frame, local);
- ret = marker_inode_loc_fill (fd->inode, &local->loc);
+ ret = marker_inode_loc_fill(fd->inode, &local->loc);
- if (ret == -1)
- goto err;
+ if (ret == -1)
+ goto err;
wind:
- STACK_WIND (frame, marker_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
- flags, iobref, xdata);
- return 0;
+ STACK_WIND(frame, marker_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
+ return 0;
err:
- STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL);
+ MARKER_STACK_UNWIND(writev, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
int32_t
-marker_rmdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+marker_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- marker_conf_t *priv = NULL;
- marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ call_stub_t *stub = NULL;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "error occurred "
- "rmdir %s", strerror (op_errno));
- }
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "error occurred "
+ "rmdir %s",
+ strerror(op_errno));
+ }
- local = (marker_local_t *) frame->local;
+ local = (marker_local_t *)frame->local;
- frame->local = NULL;
+ frame->local = NULL;
+ priv = this->private;
- STACK_UNWIND_STRICT (rmdir, frame, op_ret, op_errno, preparent,
- postparent, xdata);
+ if (op_ret == -1 || local == NULL)
+ goto out;
- if (op_ret == -1 || local == NULL)
- goto out;
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
- priv = this->private;
+ if (priv->feature_enabled & GF_QUOTA) {
+ /* If a 'rm -rf' is performed by a client, rmdir can be faster
+ than marker background mq_reduce_parent_size_txn.
+ In this case, as part of rmdir parent child association
+ will be removed in the server protocol.
+ This can lead to mq_reduce_parent_size_txn failures.
- if (priv->feature_enabled & GF_QUOTA)
- mq_reduce_parent_size (this, &local->loc, -1);
+ So perform mq_reduce_parent_size_txn in foreground
+ and unwind to server once txn is complete
+ */
+
+ stub = fop_rmdir_cbk_stub(frame, default_rmdir_cbk, op_ret, op_errno,
+ preparent, postparent, xdata);
+ mq_reduce_parent_size_txn(this, &local->loc, NULL, 1, stub);
+
+ if (stub) {
+ marker_local_unref(local);
+ return 0;
+ }
+ }
- if (priv->feature_enabled & GF_XTIME)
- marker_xtime_update_marks (this, local);
out:
- marker_local_unref (local);
+ STACK_UNWIND_STRICT(rmdir, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
- return 0;
+ marker_local_unref(local);
+
+ return 0;
}
int32_t
-marker_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- dict_t *xdata)
+marker_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled == 0)
- goto wind;
+ if (priv->feature_enabled == 0)
+ goto wind;
- local = mem_get0 (this->local_pool);
+ local = mem_get0(this->local_pool);
- MARKER_INIT_LOCAL (frame, local);
+ MARKER_INIT_LOCAL(frame, local);
- ret = loc_copy (&local->loc, loc);
+ ret = loc_copy(&local->loc, loc);
- if (ret == -1)
- goto err;
+ if (ret == -1)
+ goto err;
wind:
- STACK_WIND (frame, marker_rmdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
- return 0;
+ STACK_WIND(frame, marker_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
+ return 0;
err:
- STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL, NULL);
+ MARKER_STACK_UNWIND(rmdir, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
int32_t
-marker_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+marker_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- marker_conf_t *priv = NULL;
- marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ uint32_t nlink = -1;
+ GF_UNUSED int32_t ret = 0;
+ call_stub_t *stub = NULL;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE,
- "%s occurred in unlink", strerror (op_errno));
- }
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE, "%s occurred in unlink",
+ strerror(op_errno));
+ }
- local = (marker_local_t *) frame->local;
+ local = (marker_local_t *)frame->local;
- frame->local = NULL;
+ frame->local = NULL;
+ priv = this->private;
- STACK_UNWIND_STRICT (unlink, frame, op_ret, op_errno, preparent,
- postparent, xdata);
+ if (op_ret == -1 || local == NULL)
+ goto out;
- if (op_ret == -1 || local == NULL)
- goto out;
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
- priv = this->private;
+ if (priv->feature_enabled & GF_QUOTA) {
+ if (local->skip_txn)
+ goto out;
- if ((priv->feature_enabled & GF_QUOTA) && (local->ia_nlink == 1))
- mq_reduce_parent_size (this, &local->loc, -1);
-
- if (priv->feature_enabled & GF_XTIME)
- marker_xtime_update_marks (this, local);
-out:
- marker_local_unref (local);
+ if (xdata) {
+ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, &nlink);
+ if (ret) {
+ gf_log(this->name, GF_LOG_TRACE, "dict get failed %s ",
+ strerror(-ret));
+ }
+ }
- return 0;
-}
+ /* If a 'rm -rf' is performed by a client, unlink can be faster
+ than marker background mq_reduce_parent_size_txn.
+ In this case, as part of unlink parent child association
+ will be removed in the server protocol.
+ This can lead to mq_reduce_parent_size_txn failures.
+ So perform mq_reduce_parent_size_txn in foreground
+ and unwind to server once txn is complete
+ */
-int32_t
-marker_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
-{
- marker_local_t *local = NULL;
+ stub = fop_unlink_cbk_stub(frame, default_unlink_cbk, op_ret, op_errno,
+ preparent, postparent, xdata);
+ mq_reduce_parent_size_txn(this, &local->loc, NULL, nlink, stub);
- local = frame->local;
- if (op_ret < 0) {
- goto err;
+ if (stub) {
+ marker_local_unref(local);
+ return 0;
}
+ }
- if (local == NULL) {
- op_errno = EINVAL;
- goto err;
- }
+out:
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
- local->ia_nlink = buf->ia_nlink;
+ marker_local_unref(local);
- STACK_WIND (frame, marker_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
- NULL);
- return 0;
-err:
- frame->local = NULL;
- STACK_UNWIND_STRICT (unlink, frame, -1, op_errno, NULL, NULL, NULL);
- marker_local_unref (local);
- return 0;
+ return 0;
}
-
int32_t
-marker_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
+marker_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+ gf_boolean_t dict_free = _gf_false;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled == 0)
- goto unlink_wind;
+ if (priv->feature_enabled == 0)
+ goto unlink_wind;
- local = mem_get0 (this->local_pool);
- local->xflag = xflag;
- MARKER_INIT_LOCAL (frame, local);
+ local = mem_get0(this->local_pool);
+ local->xflag = xflag;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+ MARKER_INIT_LOCAL(frame, local);
- ret = loc_copy (&local->loc, loc);
+ ret = loc_copy(&local->loc, loc);
- if (ret == -1)
- goto err;
+ if (ret == -1)
+ goto err;
- if (uuid_is_null (loc->gfid) && loc->inode)
- uuid_copy (loc->gfid, loc->inode->gfid);
+ if (xdata && dict_get(xdata, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY)) {
+ local->skip_txn = 1;
+ goto unlink_wind;
+ }
- STACK_WIND (frame, marker_unlink_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
- return 0;
+ if (xdata == NULL) {
+ xdata = dict_new();
+ dict_free = _gf_true;
+ }
+
+ ret = dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
+ if (ret < 0)
+ goto err;
unlink_wind:
- STACK_WIND (frame, marker_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
- return 0;
+ STACK_WIND(frame, marker_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ goto out;
+
err:
- frame->local = NULL;
- STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
- marker_local_unref (local);
- return 0;
-}
+ MARKER_STACK_UNWIND(unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+out:
+ if (dict_free)
+ dict_unref(xdata);
+ return 0;
+}
int32_t
-marker_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+marker_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
- "linking a file ", strerror (op_errno));
- }
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s occurred while "
+ "linking a file ",
+ strerror(op_errno));
+ }
- local = (marker_local_t *) frame->local;
+ local = (marker_local_t *)frame->local;
- frame->local = NULL;
+ frame->local = NULL;
- STACK_UNWIND_STRICT (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
+ STACK_UNWIND_STRICT(link, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
- if (op_ret == -1 || local == NULL)
- goto out;
+ if (op_ret == -1 || local == NULL)
+ goto out;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled & GF_QUOTA)
- mq_initiate_quota_txn (this, &local->loc);
+ if (priv->feature_enabled & GF_QUOTA) {
+ if (!local->skip_txn)
+ mq_create_xattrs_txn(this, &local->loc, buf);
+ }
- if (priv->feature_enabled & GF_XTIME)
- marker_xtime_update_marks (this, local);
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
out:
- marker_local_unref (local);
+ marker_local_unref(local);
- return 0;
+ return 0;
}
int32_t
-marker_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
+marker_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled == 0)
- goto wind;
+ if (priv->feature_enabled == 0)
+ goto wind;
- local = mem_get0 (this->local_pool);
+ local = mem_get0(this->local_pool);
- MARKER_INIT_LOCAL (frame, local);
+ MARKER_INIT_LOCAL(frame, local);
- ret = loc_copy (&local->loc, newloc);
+ ret = loc_copy(&local->loc, newloc);
- if (ret == -1)
- goto err;
+ if (ret == -1)
+ goto err;
+
+ if (xdata && dict_get(xdata, GLUSTERFS_MARKER_DONT_ACCOUNT_KEY))
+ local->skip_txn = 1;
wind:
- STACK_WIND (frame, marker_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
- return 0;
+ STACK_WIND(frame, marker_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
err:
- STACK_UNWIND_STRICT (link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
- NULL);
+ MARKER_STACK_UNWIND(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
int32_t
-marker_rename_done (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+marker_rename_done(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- marker_local_t *local = NULL, *oplocal = NULL;
- loc_t newloc = {0, };
- marker_conf_t *priv = NULL;
-
- local = frame->local;
- oplocal = local->oplocal;
+ marker_local_t *local = NULL, *oplocal = NULL;
+ loc_t newloc = {
+ 0,
+ };
+ marker_conf_t *priv = NULL;
- priv = this->private;
+ local = frame->local;
+ oplocal = local->oplocal;
- frame->local = NULL;
+ priv = this->private;
- if (op_ret < 0) {
- if (local->err == 0) {
- local->err = op_errno;
- }
+ frame->local = NULL;
- gf_log (this->name, GF_LOG_WARNING,
- "inodelk (UNLOCK) failed on path:%s (gfid:%s) (%s)",
- local->parent_loc.path,
- uuid_utoa (local->parent_loc.inode->gfid),
- strerror (op_errno));
- }
+ if (op_ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "inodelk (UNLOCK) failed on path:%s (gfid:%s) (%s)",
+ oplocal->parent_loc.path,
+ uuid_utoa(oplocal->parent_loc.inode->gfid), strerror(op_errno));
+ }
- if (local->stub != NULL) {
- call_resume (local->stub);
- local->stub = NULL;
- } else if (local->err != 0) {
- STACK_UNWIND_STRICT (rename, frame, -1, local->err, NULL, NULL,
- NULL, NULL, NULL, NULL);
- }
+ if (local->err != 0)
+ goto err;
- mq_reduce_parent_size (this, &oplocal->loc, oplocal->contribution);
+ mq_reduce_parent_size_txn(this, &oplocal->loc, &oplocal->contribution, -1,
+ NULL);
- if (local->loc.inode != NULL) {
- mq_reduce_parent_size (this, &local->loc, local->contribution);
- }
+ if (local->loc.inode != NULL) {
+ /* If destination file exits before rename, it would have
+ * been unlinked while renaming a file
+ */
+ mq_reduce_parent_size_txn(this, &local->loc, NULL, local->ia_nlink,
+ NULL);
+ }
+
+ newloc.inode = inode_ref(oplocal->loc.inode);
+ newloc.path = gf_strdup(local->loc.path);
+ newloc.name = strrchr(newloc.path, '/');
+ if (newloc.name)
+ newloc.name++;
+ newloc.parent = inode_ref(local->loc.parent);
+
+ mq_create_xattrs_txn(this, &newloc, &local->buf);
+
+ loc_wipe(&newloc);
+
+ if (priv->feature_enabled & GF_XTIME) {
+ if (!local->loc.inode)
+ local->loc.inode = inode_ref(oplocal->loc.inode);
+ // update marks on oldpath
+ gf_uuid_copy(local->loc.gfid, oplocal->loc.inode->gfid);
+ marker_xtime_update_marks(this, oplocal);
+ marker_xtime_update_marks(this, local);
+ }
- newloc.inode = inode_ref (oplocal->loc.inode);
- newloc.path = gf_strdup (local->loc.path);
- newloc.name = strrchr (newloc.path, '/');
- if (newloc.name)
- newloc.name++;
- newloc.parent = inode_ref (local->loc.parent);
+err:
+ marker_local_unref(local);
+ marker_local_unref(oplocal);
- mq_rename_update_newpath (this, &newloc);
+ return 0;
+}
- loc_wipe (&newloc);
+void
+marker_rename_release_oldp_lock(marker_local_t *local, xlator_t *this)
+{
+ marker_local_t *oplocal = NULL;
+ call_frame_t *lk_frame = NULL;
+ struct gf_flock lock = {
+ 0,
+ };
- if (priv->feature_enabled & GF_XTIME) {
- //update marks on oldpath
- uuid_copy (local->loc.gfid, oplocal->loc.inode->gfid);
- marker_xtime_update_marks (this, oplocal);
- marker_xtime_update_marks (this, local);
- }
+ oplocal = local->oplocal;
+ lk_frame = local->lk_frame;
- marker_local_unref (local);
- marker_local_unref (oplocal);
- return 0;
-}
+ if (lk_frame == NULL)
+ goto err;
+ lock.l_type = F_UNLCK;
+ lock.l_whence = SEEK_SET;
+ lock.l_start = 0;
+ lock.l_len = 0;
+ lock.l_pid = 0;
-int32_t
-marker_rename_release_newp_lock (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
-{
- marker_local_t *local = NULL, *oplocal = NULL;
- struct gf_flock lock = {0, };
+ STACK_WIND(lk_frame, marker_rename_done, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk, this->name,
+ &oplocal->parent_loc, F_SETLKW, &lock, NULL);
- local = frame->local;
- oplocal = local->oplocal;
+ return;
- if (op_ret < 0) {
- if (local->err == 0) {
- local->err = op_errno;
- }
+err:
+ marker_local_unref(local);
+ marker_local_unref(oplocal);
+}
- gf_log (this->name, GF_LOG_WARNING,
- "inodelk (UNLOCK) failed on %s (gfid:%s) (%s)",
- oplocal->parent_loc.path,
- uuid_utoa (oplocal->parent_loc.inode->gfid),
- strerror (op_errno));
- }
+int32_t
+marker_rename_unwind(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_local_t *oplocal = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ inode_contribution_t *contri = NULL;
- if (local->next_lock_on == NULL) {
- marker_rename_done (frame, NULL, this, 0, 0, NULL);
- goto out;
- }
+ local = frame->local;
+ oplocal = local->oplocal;
+ frame->local = NULL;
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
+ // Reset frame uid and gid if set.
+ if (cookie == (void *)_GF_UID_GID_CHANGED)
+ MARKER_RESET_UID_GID(frame, frame->root, local);
- STACK_WIND (frame,
- marker_rename_done,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- this->name, &local->parent_loc, F_SETLKW, &lock, NULL);
+ if (op_ret < 0)
+ local->err = op_errno ? op_errno : EINVAL;
-out:
- return 0;
+ if (local->stub != NULL) {
+ /* Remove contribution node from in-memory even if
+ * remove-xattr has failed as the rename is already performed
+ * if local->stub is set, which means rename was successful
+ */
+ (void)mq_inode_ctx_get(oplocal->loc.inode, this, &ctx);
+ if (ctx) {
+ contri = mq_get_contribution_node(oplocal->loc.parent, ctx);
+ if (contri) {
+ QUOTA_FREE_CONTRIBUTION_NODE(ctx, contri);
+ GF_REF_PUT(contri);
+ }
+ }
+
+ call_resume(local->stub);
+ local->stub = NULL;
+ local->err = 0;
+ } else if (local->err != 0) {
+ STACK_UNWIND_STRICT(rename, frame, -1, local->err, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ } else {
+ gf_log(this->name, GF_LOG_CRITICAL,
+ "continuation stub to unwind the call is absent, hence "
+ "call will be hung (call-stack id = %" PRIu64 ")",
+ frame->root->unique);
+ }
+
+ /* If there are in-progress writes on old-path when during rename
+ * operation, update txn will update the wrong path if lock
+ * is released before rename unwind.
+ * So release lock only after rename unwind
+ */
+ marker_rename_release_oldp_lock(local, this);
+
+ return 0;
}
-
int32_t
-marker_rename_release_oldp_lock (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+marker_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- marker_local_t *local = NULL, *oplocal = NULL;
- struct gf_flock lock = {0, };
-
- local = frame->local;
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ marker_local_t *oplocal = NULL;
+ call_stub_t *stub = NULL;
+ int32_t ret = 0;
+ char contri_key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+ loc_t newloc = {
+ 0,
+ };
+
+ local = (marker_local_t *)frame->local;
+
+ if (local != NULL) {
oplocal = local->oplocal;
+ }
- if ((op_ret < 0) && (op_errno != ENOATTR)) {
- local->err = op_errno;
+ priv = this->private;
+
+ if (op_ret < 0) {
+ if (local != NULL) {
+ local->err = op_errno;
}
- //Reset frame uid and gid if set.
- if (cookie == (void *) _GF_UID_GID_CHANGED)
- MARKER_RESET_UID_GID (frame, frame->root, local);
-
- lock.l_type = F_UNLCK;
- lock.l_whence = SEEK_SET;
- lock.l_start = 0;
- lock.l_len = 0;
- lock.l_pid = 0;
-
- STACK_WIND (frame,
- marker_rename_release_newp_lock,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- this->name, &oplocal->parent_loc, F_SETLKW, &lock, NULL);
- return 0;
-}
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s occurred while "
+ "renaming a file ",
+ strerror(op_errno));
+ }
+
+ if (priv->feature_enabled & GF_QUOTA) {
+ if ((op_ret < 0) || (local == NULL)) {
+ goto quota_err;
+ }
+ local->ia_nlink = 0;
+ if (xdata)
+ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA,
+ &local->ia_nlink);
-int32_t
-marker_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- marker_conf_t *priv = NULL;
- marker_local_t *local = NULL;
- marker_local_t *oplocal = NULL;
- call_stub_t *stub = NULL;
- int32_t ret = 0;
- char contri_key [512] = {0, };
- loc_t newloc = {0, };
-
- local = (marker_local_t *) frame->local;
+ local->buf = *buf;
+ stub = fop_rename_cbk_stub(frame, default_rename_cbk, op_ret, op_errno,
+ buf, preoldparent, postoldparent,
+ prenewparent, postnewparent, xdata);
+ if (stub == NULL) {
+ local->err = ENOMEM;
+ goto quota_err;
+ }
- if (local != NULL) {
- oplocal = local->oplocal;
+ local->stub = stub;
+
+ GET_CONTRI_KEY(this, contri_key, oplocal->loc.parent->gfid, ret);
+ if (ret < 0) {
+ local->err = ENOMEM;
+ goto quota_err;
}
- priv = this->private;
+ /* Removexattr requires uid and gid to be 0,
+ * reset them in the callback.
+ */
+ MARKER_SET_UID_GID(frame, local, frame->root);
+
+ newloc.inode = inode_ref(oplocal->loc.inode);
+ newloc.path = gf_strdup(local->loc.path);
+ newloc.name = strrchr(newloc.path, '/');
+ if (newloc.name)
+ newloc.name++;
+ newloc.parent = inode_ref(local->loc.parent);
+ gf_uuid_copy(newloc.gfid, oplocal->loc.inode->gfid);
- if (op_ret < 0) {
- if (local != NULL) {
- local->err = op_errno;
- }
+ STACK_WIND_COOKIE(
+ frame, marker_rename_unwind, frame->cookie, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, &newloc, contri_key, NULL);
- gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
- "renaming a file ", strerror (op_errno));
+ loc_wipe(&newloc);
+ } else {
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
+
+ if ((op_ret < 0) || (local == NULL)) {
+ goto out;
}
- if (priv->feature_enabled & GF_QUOTA) {
- if ((op_ret < 0) || (local == NULL)) {
- goto quota_err;
- }
-
- stub = fop_rename_cbk_stub (frame, default_rename_cbk, op_ret,
- op_errno, buf, preoldparent,
- postoldparent, prenewparent,
- postnewparent, xdata);
- if (stub == NULL) {
- local->err = ENOMEM;
- goto quota_err;
- }
-
- local->stub = stub;
-
- GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret);
- if (ret < 0) {
- local->err = ENOMEM;
- goto quota_err;
- }
-
- /* Removexattr requires uid and gid to be 0,
- * reset them in the callback.
- */
- MARKER_SET_UID_GID (frame, local, frame->root);
-
- newloc.inode = inode_ref (oplocal->loc.inode);
- newloc.path = gf_strdup (local->loc.path);
- newloc.name = strrchr (newloc.path, '/');
- if (newloc.name)
- newloc.name++;
- newloc.parent = inode_ref (local->loc.parent);
- uuid_copy (newloc.gfid, oplocal->loc.inode->gfid);
-
- STACK_WIND_COOKIE (frame, marker_rename_release_oldp_lock,
- frame->cookie, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- &newloc, contri_key, NULL);
-
- loc_wipe (&newloc);
- } else {
- frame->local = NULL;
-
- STACK_UNWIND_STRICT (rename, frame, op_ret, op_errno, buf,
- preoldparent, postoldparent, prenewparent,
- postnewparent, xdata);
-
- if ((op_ret < 0) || (local == NULL)) {
- goto out;
- }
-
- if (priv->feature_enabled & GF_XTIME) {
- //update marks on oldpath
- uuid_copy (local->loc.gfid, oplocal->loc.inode->gfid);
- marker_xtime_update_marks (this, oplocal);
- marker_xtime_update_marks (this, local);
- }
+ if (priv->feature_enabled & GF_XTIME) {
+ // update marks on oldpath
+ if (!local->loc.inode)
+ local->loc.inode = inode_ref(oplocal->loc.inode);
+ gf_uuid_copy(local->loc.gfid, oplocal->loc.inode->gfid);
+ marker_xtime_update_marks(this, oplocal);
+ marker_xtime_update_marks(this, local);
}
+ }
out:
- if (!(priv->feature_enabled & GF_QUOTA)) {
- marker_local_unref (local);
- marker_local_unref (oplocal);
- }
+ if (!(priv->feature_enabled & GF_QUOTA)) {
+ marker_local_unref(local);
+ marker_local_unref(oplocal);
+ }
- return 0;
+ return 0;
quota_err:
- marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
- return 0;
+ marker_rename_unwind(frame, NULL, this, 0, 0, NULL);
+ return 0;
}
+int32_t
+marker_do_rename(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_local_t *oplocal = NULL;
+ char contri_key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+ int keylen = 0;
+ quota_meta_t contribution = {
+ 0,
+ };
+
+ local = frame->local;
+ oplocal = local->oplocal;
+
+ // Reset frame uid and gid if set.
+ if (cookie == (void *)_GF_UID_GID_CHANGED)
+ MARKER_RESET_UID_GID(frame, frame->root, local);
+
+ if ((op_ret < 0) && (op_errno != ENOATTR) && (op_errno != ENODATA)) {
+ local->err = op_errno ? op_errno : EINVAL;
+ gf_log(this->name, GF_LOG_WARNING,
+ "fetching contribution values from %s (gfid:%s) "
+ "failed (%s)",
+ oplocal->loc.path, uuid_utoa(oplocal->loc.inode->gfid),
+ strerror(op_errno));
+ goto err;
+ }
+
+ GET_CONTRI_KEY(this, contri_key, oplocal->loc.parent->gfid, keylen);
+ if (keylen < 0) {
+ local->err = errno ? errno : ENOMEM;
+ goto err;
+ }
+ quota_dict_get_meta(dict, contri_key, keylen, &contribution);
+ oplocal->contribution = contribution;
+
+ STACK_WIND(frame, marker_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, &oplocal->loc, &local->loc,
+ local->xdata);
+
+ return 0;
+
+err:
+ marker_rename_unwind(frame, NULL, this, 0, 0, NULL);
+ return 0;
+}
int32_t
-marker_do_rename (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+marker_get_oldpath_contribution(call_frame_t *lk_frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *xdata)
+{
+ call_frame_t *frame = NULL;
+ marker_local_t *local = NULL;
+ marker_local_t *oplocal = NULL;
+ char contri_key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+ int32_t ret = 0;
+
+ local = lk_frame->local;
+ oplocal = local->oplocal;
+ frame = local->frame;
+
+ if (op_ret < 0) {
+ local->err = op_errno ? op_errno : EINVAL;
+ gf_log(this->name, GF_LOG_WARNING,
+ "cannot hold inodelk on %s (gfid:%s) (%s)", oplocal->loc.path,
+ uuid_utoa(oplocal->loc.inode->gfid), strerror(op_errno));
+ if (local->lk_frame) {
+ STACK_DESTROY(local->lk_frame->root);
+ local->lk_frame = NULL;
+ }
+ goto err;
+ }
+
+ GET_CONTRI_KEY(this, contri_key, oplocal->loc.parent->gfid, ret);
+ if (ret < 0) {
+ local->err = errno ? errno : ENOMEM;
+ goto err;
+ }
+
+ /* getxattr requires uid and gid to be 0,
+ * reset them in the callback.
+ */
+ MARKER_SET_UID_GID(frame, local, frame->root);
+
+ if (gf_uuid_is_null(oplocal->loc.gfid))
+ gf_uuid_copy(oplocal->loc.gfid, oplocal->loc.inode->gfid);
+
+ GF_UUID_ASSERT(oplocal->loc.gfid);
+
+ STACK_WIND_COOKIE(frame, marker_do_rename, frame->cookie, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, &oplocal->loc,
+ contri_key, NULL);
+
+ return 0;
+err:
+ marker_rename_unwind(frame, NULL, this, 0, 0, NULL);
+ return 0;
+}
+
+/* For a marker_rename FOP, following is the algorithm used for Quota
+ * accounting. The use-case considered is:
+ * 1. rename (src, dst)
+ * 2. both src and dst exist
+ * 3. there are parallel operations on src and dst (lets say through fds
+ * opened on them before rename was initiated).
+ *
+ * PS: We've not thought through whether this algo works in the presence of
+ * hardlinks to src and/or dst.
+ *
+ * Algorithm:
+ * ==========
+ *
+ * 1) set inodelk on src-parent
+ * As part of rename operation, parent can change for the file.
+ * We need to remove contribution (both on disk xattr and in-memory one)
+ * to src-parent (and its ancestors) and add the contribution to dst-parent
+ * (and its ancestors). While we are doing these operations, contribution of
+ * the file/directory shouldn't be changing as we want to be sure that
+ * a) what we subtract from src-parent is exactly what we add to dst-parent
+ * b) we should subtract from src-parent exactly what we contributed to
+ * src-parent
+ * So, We hold a lock on src-parent to block any parallel transcations on
+ * src-inode (since that's the one which survives rename).
+ *
+ * If there are any parallel transactions on dst-inode they keep succeeding
+ * till the association of dst-inode with dst-parent is broken because of an
+ * inode_rename after unwind of rename fop from marker. Only after unwind
+ * (and hence inode_rename), we delete and subtract the contribution of
+ * dst-inode to dst-parent. That way we are making sure we subtract exactly
+ * what dst-inode contributed to dst-parent.
+ *
+ * 2) lookup contribution to src-parent on src-inode.
+ * We need to save the contribution info for use at step-8.
+ *
+ * 3) wind rename
+ * Perform rename on disk
+ *
+ * 4) remove xattr on src-loc
+ * After rename, parent can change, so
+ * need to remove xattrs storing contribution to src-parent.
+ *
+ * 5) remove contribution node corresponding to src-parent from the in-memory
+ * list.
+ * After rename, contri gfid can change and we have
+ * also removed xattr from file.
+ * We need to remove in-memory contribution node to prevent updations to
+ * src-parent even after a successful rename
+ *
+ * 6) unwind rename
+ * This will ensure that rename is done in the server
+ * inode table. An inode_rename disassociates src-inode from src-parent and
+ * associates it with dst-parent. It also disassociates dst-inode from
+ * dst-parent. After inode_rename, inode_parent on src-inode will give
+ * dst-parent and inode_parent on dst-inode will return NULL (assuming
+ * dst-inode doesn't have any hardlinks).
+ *
+ * 7) release inodelk on src-parent
+ * Lock on src-parent should be released only after
+ * rename on disk, remove xattr and rename_unwind (and hence inode_rename)
+ * operations. If lock is released before inode_rename, a parallel
+ * transaction on src-inode can still update src-parent (as inode_parent on
+ * src-inode can still return src-parent). This would make the
+ * contribution from src-inode to src-parent stored in step-2 stale.
+ *
+ * 8) Initiate mq_reduce_parent_size_txn on src-parent to remove contribution
+ * of src-inode to src-parent. We use the contribution stored in step-2.
+ * Since, we had acquired the lock on src-parent all along step-2 through
+ * inode_rename, we can be sure that a parallel transaction wouldn't have
+ * added a delta to src-parent.
+ *
+ * 9) Initiate mq_reduce_parent_size_txn on dst-parent if dst-inode exists.
+ * The size reduced from dst-parent and its ancestors is the
+ * size stored as contribution to dst-parent in dst-inode.
+ * If the destination file had existed, rename will unlink the
+ * destination file as part of its operation.
+ * We need to reduce the size on the dest parent similarly to
+ * unlink. Since, we are initiating reduce-parent-size transaction after
+ * inode_rename, we can be sure that a parallel transaction wouldn't add
+ * delta to dst-parent while we are reducing the contribution of dst-inode
+ * from its ancestors before rename.
+ *
+ * 10) create contribution xattr to dst-parent on src-inode.
+ */
+int32_t
+marker_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- marker_local_t *local = NULL, *oplocal = NULL;
- char contri_key[512] = {0, };
- int32_t ret = 0;
- int64_t *contribution = 0;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_local_t *oplocal = NULL;
+ marker_conf_t *priv = NULL;
+ struct gf_flock lock = {
+ 0,
+ };
- local = frame->local;
- oplocal = local->oplocal;
+ priv = this->private;
- //Reset frame uid and gid if set.
- if (cookie == (void *) _GF_UID_GID_CHANGED)
- MARKER_RESET_UID_GID (frame, frame->root, local);
-
- if ((op_ret < 0) && (op_errno != ENOATTR)) {
- local->err = op_errno;
- gf_log (this->name, GF_LOG_WARNING,
- "fetching contribution values from %s (gfid:%s) "
- "failed (%s)", local->loc.path,
- uuid_utoa (local->loc.inode->gfid),
- strerror (op_errno));
- goto err;
- }
+ if (priv->feature_enabled == 0)
+ goto rename_wind;
- if (local->loc.inode != NULL) {
- GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret);
- if (ret < 0) {
- local->err = errno;
- goto err;
- }
-
- if (dict_get_bin (dict, contri_key,
- (void **) &contribution) == 0) {
- local->contribution = ntoh64 (*contribution);
- }
- }
+ local = mem_get0(this->local_pool);
- STACK_WIND (frame, marker_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, &oplocal->loc,
- &local->loc, NULL);
+ MARKER_INIT_LOCAL(frame, local);
- return 0;
+ oplocal = mem_get0(this->local_pool);
+
+ MARKER_INIT_LOCAL(frame, oplocal);
+
+ frame->local = local;
+
+ local->oplocal = marker_local_ref(oplocal);
+
+ ret = loc_copy(&local->loc, newloc);
+ if (ret < 0)
+ goto err;
+
+ ret = loc_copy(&oplocal->loc, oldloc);
+ if (ret < 0)
+ goto err;
+
+ if (!(priv->feature_enabled & GF_QUOTA)) {
+ goto rename_wind;
+ }
+
+ ret = mq_inode_loc_fill(NULL, newloc->parent, &local->parent_loc);
+ if (ret < 0)
+ goto err;
+
+ ret = mq_inode_loc_fill(NULL, oldloc->parent, &oplocal->parent_loc);
+ if (ret < 0)
+ goto err;
+ lock.l_len = 0;
+ lock.l_start = 0;
+ lock.l_type = F_WRLCK;
+ lock.l_whence = SEEK_SET;
+
+ local->xdata = xdata ? dict_ref(xdata) : dict_new();
+ ret = dict_set_int32(local->xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
+ if (ret < 0)
+ goto err;
+
+ local->frame = frame;
+ local->lk_frame = create_frame(this, this->ctx->pool);
+ if (local->lk_frame == NULL)
+ goto err;
+
+ local->lk_frame->root->uid = 0;
+ local->lk_frame->root->gid = 0;
+ local->lk_frame->local = local;
+ set_lk_owner_from_ptr(&local->lk_frame->root->lk_owner,
+ local->lk_frame->root);
+
+ STACK_WIND(local->lk_frame, marker_get_oldpath_contribution,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->inodelk, this->name,
+ &oplocal->parent_loc, F_SETLKW, &lock, NULL);
+
+ return 0;
+
+rename_wind:
+ STACK_WIND(frame, marker_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+
+ return 0;
err:
- marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
- return 0;
+ MARKER_STACK_UNWIND(rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ marker_local_unref(oplocal);
+
+ return 0;
}
+int32_t
+marker_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s occurred while "
+ "truncating a file ",
+ strerror(op_errno));
+ }
+
+ local = (marker_local_t *)frame->local;
+
+ frame->local = NULL;
+
+ STACK_UNWIND_STRICT(truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ if (op_ret == -1 || local == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->feature_enabled & GF_QUOTA) {
+ /* DHT Rebalance process, at the end of migration will
+ * first make the src file as a linkto file and then
+ * truncate the file. By doing a truncate after making the
+ * src file as linkto file, the contri which is already
+ * accounted is left over.
+ * So, we need to account for the linkto file when a truncate
+ * happens, thereby updating the contri properly.
+ * By passing NULL for postbuf, mq_prevalidate does not check
+ * for linkto file.
+ * Same happens with ftruncate as well.
+ */
+ if (postbuf && IS_DHT_LINKFILE_MODE(postbuf))
+ mq_initiate_quota_txn(this, &local->loc, NULL);
+ else
+ mq_initiate_quota_txn(this, &local->loc, postbuf);
+ }
+
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
+
+out:
+ marker_local_unref(local);
+
+ return 0;
+}
int32_t
-marker_get_newpath_contribution (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *dict, dict_t *xdata)
+marker_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- marker_local_t *local = NULL, *oplocal = NULL;
- char contri_key[512] = {0, };
- int32_t ret = 0;
- int64_t *contribution = 0;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- local = frame->local;
- oplocal = local->oplocal;
+ priv = this->private;
- //Reset frame uid and gid if set.
- if (cookie == (void *) _GF_UID_GID_CHANGED)
- MARKER_RESET_UID_GID (frame, frame->root, local);
-
- if ((op_ret < 0) && (op_errno != ENOATTR)) {
- local->err = op_errno;
- gf_log (this->name, GF_LOG_WARNING,
- "fetching contribution values from %s (gfid:%s) "
- "failed (%s)", oplocal->loc.path,
- uuid_utoa (oplocal->loc.inode->gfid),
- strerror (op_errno));
- goto err;
- }
+ if (priv->feature_enabled == 0)
+ goto wind;
- GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret);
- if (ret < 0) {
- local->err = errno;
- goto err;
- }
+ local = mem_get0(this->local_pool);
- if (dict_get_bin (dict, contri_key, (void **) &contribution) == 0)
- oplocal->contribution = ntoh64 (*contribution);
-
- if (local->loc.inode != NULL) {
- GET_CONTRI_KEY (contri_key, local->loc.parent->gfid, ret);
- if (ret < 0) {
- local->err = errno;
- goto err;
- }
-
- /* getxattr requires uid and gid to be 0,
- * reset them in the callback.
- */
- MARKER_SET_UID_GID (frame, local, frame->root);
- if (uuid_is_null (local->loc.gfid))
- uuid_copy (local->loc.gfid, local->loc.inode->gfid);
-
- GF_UUID_ASSERT (local->loc.gfid);
-
- STACK_WIND_COOKIE (frame, marker_do_rename,
- frame->cookie, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- &local->loc, contri_key, NULL);
- } else {
- marker_do_rename (frame, NULL, this, 0, 0, NULL, NULL);
- }
+ MARKER_INIT_LOCAL(frame, local);
- return 0;
+ ret = loc_copy(&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND(frame, marker_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
err:
- marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
- return 0;
-}
+ MARKER_STACK_UNWIND(truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
int32_t
-marker_get_oldpath_contribution (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, dict_t *xdata)
+marker_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- marker_local_t *local = NULL, *oplocal = NULL;
- char contri_key[512] = {0, };
- int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- local = frame->local;
- oplocal = local->oplocal;
-
- if (op_ret < 0) {
- local->err = op_errno;
- gf_log (this->name, GF_LOG_WARNING,
- "cannot hold inodelk on %s (gfid:%s) (%s)",
- local->next_lock_on->path,
- uuid_utoa (local->next_lock_on->inode->gfid),
- strerror (op_errno));
- goto lock_err;
- }
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s occurred while "
+ "truncating a file ",
+ strerror(op_errno));
+ }
- GET_CONTRI_KEY (contri_key, oplocal->loc.parent->gfid, ret);
- if (ret < 0) {
- local->err = errno;
- goto quota_err;
- }
+ local = (marker_local_t *)frame->local;
- /* getxattr requires uid and gid to be 0,
- * reset them in the callback.
- */
- MARKER_SET_UID_GID (frame, local, frame->root);
+ frame->local = NULL;
- if (uuid_is_null (oplocal->loc.gfid))
- uuid_copy (oplocal->loc.gfid,
- oplocal->loc.inode->gfid);
+ STACK_UNWIND_STRICT(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
- GF_UUID_ASSERT (oplocal->loc.gfid);
+ if (op_ret == -1 || local == NULL)
+ goto out;
- STACK_WIND_COOKIE (frame, marker_get_newpath_contribution,
- frame->cookie, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- &oplocal->loc, contri_key, NULL);
- return 0;
+ priv = this->private;
-quota_err:
- marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
- return 0;
+ if (priv->feature_enabled & GF_QUOTA) {
+ if (postbuf && IS_DHT_LINKFILE_MODE(postbuf))
+ mq_initiate_quota_txn(this, &local->loc, NULL);
+ else
+ mq_initiate_quota_txn(this, &local->loc, postbuf);
+ }
-lock_err:
- if ((local->next_lock_on == NULL)
- || (local->next_lock_on == &local->parent_loc)) {
- local->next_lock_on = NULL;
- marker_rename_release_oldp_lock (frame, NULL, this, 0, 0, NULL);
- } else {
- marker_rename_release_newp_lock (frame, NULL, this, 0, 0, NULL);
- }
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
+out:
+ marker_local_unref(local);
- return 0;
+ return 0;
}
-
int32_t
-marker_rename_inodelk_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+marker_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- marker_local_t *local = NULL, *oplocal = NULL;
- loc_t *loc = NULL;
- struct gf_flock lock = {0, };
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- local = frame->local;
- oplocal = local->oplocal;
+ priv = this->private;
- if (op_ret < 0) {
- if (local->next_lock_on != &oplocal->parent_loc) {
- loc = &oplocal->parent_loc;
- } else {
- loc = &local->parent_loc;
- }
-
- local->err = op_errno;
- gf_log (this->name, GF_LOG_WARNING,
- "cannot hold inodelk on %s (gfid:%s) (%s)",
- loc->path, uuid_utoa (loc->inode->gfid),
- strerror (op_errno));
- goto err;
- }
+ if (priv->feature_enabled == 0)
+ goto wind;
- if (local->next_lock_on != NULL) {
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
-
- STACK_WIND (frame,
- marker_get_oldpath_contribution,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- this->name, local->next_lock_on,
- F_SETLKW, &lock, NULL);
- } else {
- marker_get_oldpath_contribution (frame, 0, this, 0, 0, NULL);
- }
+ local = mem_get0(this->local_pool);
- return 0;
+ MARKER_INIT_LOCAL(frame, local);
+
+ ret = marker_inode_loc_fill(fd->inode, &local->loc);
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND(frame, marker_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
err:
- marker_rename_done (frame, NULL, this, 0, 0, NULL);
- return 0;
-}
+ MARKER_STACK_UNWIND(ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
int32_t
-marker_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
+marker_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_local_t *oplocal = NULL;
- marker_conf_t *priv = NULL;
- struct gf_flock lock = {0, };
- loc_t *lock_on = NULL;
-
- priv = this->private;
-
- if (priv->feature_enabled == 0)
- goto rename_wind;
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
- local = mem_get0 (this->local_pool);
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s occurred while "
+ "creating symlinks ",
+ strerror(op_errno));
+ }
- MARKER_INIT_LOCAL (frame, local);
+ local = (marker_local_t *)frame->local;
- oplocal = mem_get0 (this->local_pool);
+ frame->local = NULL;
+ priv = this->private;
- MARKER_INIT_LOCAL (frame, oplocal);
+ if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) {
+ ctx = mq_inode_ctx_new(inode, this);
+ if (ctx == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "mq_inode_ctx_new "
+ "failed for %s",
+ uuid_utoa(inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+ }
- frame->local = local;
+ STACK_UNWIND_STRICT(symlink, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
- local->oplocal = marker_local_ref (oplocal);
+ if (op_ret == -1 || local == NULL)
+ goto out;
- ret = loc_copy (&local->loc, newloc);
- if (ret < 0)
- goto err;
+ if (gf_uuid_is_null(local->loc.gfid))
+ gf_uuid_copy(local->loc.gfid, buf->ia_gfid);
- ret = loc_copy (&oplocal->loc, oldloc);
- if (ret < 0)
- goto err;
+ if (priv->feature_enabled & GF_QUOTA) {
+ mq_create_xattrs_txn(this, &local->loc, buf);
+ }
- if (!(priv->feature_enabled & GF_QUOTA)) {
- goto rename_wind;
- }
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
+out:
+ marker_local_unref(local);
- ret = mq_inode_loc_fill (NULL, newloc->parent, &local->parent_loc);
- if (ret < 0)
- goto err;
+ return 0;
+}
- ret = mq_inode_loc_fill (NULL, oldloc->parent, &oplocal->parent_loc);
- if (ret < 0)
- goto err;
+int
+marker_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- if ((newloc->inode != NULL) && (newloc->parent != oldloc->parent)
- && (uuid_compare (newloc->parent->gfid,
- oldloc->parent->gfid) < 0)) {
- lock_on = &local->parent_loc;
- local->next_lock_on = &oplocal->parent_loc;
- } else {
- lock_on = &oplocal->parent_loc;
- if ((newloc->inode != NULL) && (newloc->parent
- != oldloc->parent)) {
- local->next_lock_on = &local->parent_loc;
- }
- }
+ priv = this->private;
- lock.l_len = 0;
- lock.l_start = 0;
- lock.l_type = F_WRLCK;
- lock.l_whence = SEEK_SET;
+ if (priv->feature_enabled == 0)
+ goto wind;
- STACK_WIND (frame,
- marker_rename_inodelk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- this->name, lock_on,
- F_SETLKW, &lock, NULL);
+ local = mem_get0(this->local_pool);
- return 0;
+ MARKER_INIT_LOCAL(frame, local);
-rename_wind:
- STACK_WIND (frame, marker_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ ret = loc_copy(&local->loc, loc);
- return 0;
+ if (ret == -1)
+ goto err;
+wind:
+ STACK_WIND(frame, marker_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask, xdata);
+ return 0;
err:
- STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM, NULL,
- NULL, NULL, NULL, NULL, NULL);
+ MARKER_STACK_UNWIND(symlink, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
- return 0;
+ return 0;
}
-
int32_t
-marker_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+marker_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+ quota_inode_ctx_t *ctx = NULL;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
- "truncating a file ", strerror (op_errno));
- }
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s occurred with "
+ "mknod ",
+ strerror(op_errno));
+ }
- local = (marker_local_t *) frame->local;
+ local = (marker_local_t *)frame->local;
- frame->local = NULL;
+ frame->local = NULL;
+ priv = this->private;
- STACK_UNWIND_STRICT (truncate, frame, op_ret, op_errno, prebuf,
- postbuf, xdata);
+ if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) {
+ ctx = mq_inode_ctx_new(inode, this);
+ if (ctx == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "mq_inode_ctx_new "
+ "failed for %s",
+ uuid_utoa(inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+ }
- if (op_ret == -1 || local == NULL)
- goto out;
+ STACK_UNWIND_STRICT(mknod, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
- priv = this->private;
+ if (op_ret == -1 || local == NULL)
+ goto out;
- if (priv->feature_enabled & GF_QUOTA)
- mq_initiate_quota_txn (this, &local->loc);
+ if (gf_uuid_is_null(local->loc.gfid))
+ gf_uuid_copy(local->loc.gfid, buf->ia_gfid);
- if (priv->feature_enabled & GF_XTIME)
- marker_xtime_update_marks (this, local);
+ if ((priv->feature_enabled & GF_QUOTA) && (S_ISREG(local->mode))) {
+ mq_create_xattrs_txn(this, &local->loc, buf);
+ }
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
out:
- marker_local_unref (local);
+ marker_local_unref(local);
- return 0;
+ return 0;
}
-int32_t
-marker_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
+int
+marker_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled == 0)
- goto wind;
+ if (priv->feature_enabled == 0)
+ goto wind;
- local = mem_get0 (this->local_pool);
+ local = mem_get0(this->local_pool);
- MARKER_INIT_LOCAL (frame, local);
+ MARKER_INIT_LOCAL(frame, local);
- ret = loc_copy (&local->loc, loc);
+ ret = loc_copy(&local->loc, loc);
- if (ret == -1)
- goto err;
+ local->mode = mode;
+
+ if (ret == -1)
+ goto err;
wind:
- STACK_WIND (frame, marker_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
- return 0;
+ STACK_WIND(frame, marker_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
+ return 0;
err:
- STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ MARKER_STACK_UNWIND(mknod, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
int32_t
-marker_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+marker_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
- "truncating a file ", strerror (op_errno));
- }
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s occurred while "
+ "fallocating a file ",
+ strerror(op_errno));
+ }
- local = (marker_local_t *) frame->local;
+ local = (marker_local_t *)frame->local;
- frame->local = NULL;
+ frame->local = NULL;
- STACK_UNWIND_STRICT (ftruncate, frame, op_ret, op_errno, prebuf,
- postbuf, xdata);
+ STACK_UNWIND_STRICT(fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
- if (op_ret == -1 || local == NULL)
- goto out;
+ if (op_ret == -1 || local == NULL)
+ goto out;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled & GF_QUOTA)
- mq_initiate_quota_txn (this, &local->loc);
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn(this, &local->loc, postbuf);
- if (priv->feature_enabled & GF_XTIME)
- marker_xtime_update_marks (this, local);
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
out:
- marker_local_unref (local);
+ marker_local_unref(local);
- return 0;
+ return 0;
}
int32_t
-marker_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
+marker_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled == 0)
- goto wind;
+ if (priv->feature_enabled == 0)
+ goto wind;
- local = mem_get0 (this->local_pool);
+ local = mem_get0(this->local_pool);
- MARKER_INIT_LOCAL (frame, local);
+ MARKER_INIT_LOCAL(frame, local);
- ret = marker_inode_loc_fill (fd->inode, &local->loc);
+ ret = marker_inode_loc_fill(fd->inode, &local->loc);
- if (ret == -1)
- goto err;
+ if (ret == -1)
+ goto err;
wind:
- STACK_WIND (frame, marker_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
- return 0;
+ STACK_WIND(frame, marker_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+ return 0;
err:
- STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ MARKER_STACK_UNWIND(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
int32_t
-marker_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+marker_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- marker_conf_t *priv = NULL;
- marker_local_t *local = NULL;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
- "creating symlinks ", strerror (op_errno));
- }
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE, "%s occurred during discard",
+ strerror(op_errno));
+ }
- local = (marker_local_t *) frame->local;
-
- frame->local = NULL;
+ local = (marker_local_t *)frame->local;
- STACK_UNWIND_STRICT (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
+ frame->local = NULL;
- if (op_ret == -1 || local == NULL)
- goto out;
+ STACK_UNWIND_STRICT(discard, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
- if (uuid_is_null (local->loc.gfid))
- uuid_copy (local->loc.gfid, buf->ia_gfid);
+ if (op_ret == -1 || local == NULL)
+ goto out;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled & GF_QUOTA)
- mq_set_inode_xattr (this, &local->loc);
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn(this, &local->loc, postbuf);
- if (priv->feature_enabled & GF_XTIME)
- marker_xtime_update_marks (this, local);
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
out:
- marker_local_unref (local);
+ marker_local_unref(local);
- return 0;
+ return 0;
}
-int
-marker_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, mode_t umask, dict_t *xdata)
+int32_t
+marker_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled == 0)
- goto wind;
+ if (priv->feature_enabled == 0)
+ goto wind;
- local = mem_get0 (this->local_pool);
+ local = mem_get0(this->local_pool);
- MARKER_INIT_LOCAL (frame, local);
+ MARKER_INIT_LOCAL(frame, local);
- ret = loc_copy (&local->loc, loc);
+ ret = marker_inode_loc_fill(fd->inode, &local->loc);
- if (ret == -1)
- goto err;
+ if (ret == -1)
+ goto err;
wind:
- STACK_WIND (frame, marker_symlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
- xdata);
- return 0;
+ STACK_WIND(frame, marker_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
err:
- STACK_UNWIND_STRICT (symlink, frame, -1, ENOMEM, NULL,
- NULL, NULL, NULL, NULL);
- return 0;
-}
+ MARKER_STACK_UNWIND(discard, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
int32_t
-marker_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+marker_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
- "creating symlinks ", strerror (op_errno));
- }
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE, "%s occurred during zerofill",
+ strerror(op_errno));
+ }
- local = (marker_local_t *) frame->local;
-
- frame->local = NULL;
+ local = (marker_local_t *)frame->local;
- STACK_UNWIND_STRICT (mknod, frame, op_ret, op_errno, inode,
- buf, preparent, postparent, xdata);
+ frame->local = NULL;
- if (op_ret == -1 || local == NULL)
- goto out;
+ STACK_UNWIND_STRICT(zerofill, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
- if (uuid_is_null (local->loc.gfid))
- uuid_copy (local->loc.gfid, buf->ia_gfid);
+ if (op_ret == -1 || local == NULL)
+ goto out;
- priv = this->private;
+ priv = this->private;
- if ((priv->feature_enabled & GF_QUOTA) && (S_ISREG (local->mode))) {
- mq_set_inode_xattr (this, &local->loc);
- }
+ if (priv->feature_enabled & GF_QUOTA)
+ mq_initiate_quota_txn(this, &local->loc, postbuf);
- if (priv->feature_enabled & GF_XTIME)
- marker_xtime_update_marks (this, local);
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
out:
- marker_local_unref (local);
+ marker_local_unref(local);
- return 0;
+ return 0;
}
-int
-marker_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata)
+int32_t
+marker_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled == 0)
- goto wind;
+ if (priv->feature_enabled == 0)
+ goto wind;
- local = mem_get0 (this->local_pool);
+ local = mem_get0(this->local_pool);
- MARKER_INIT_LOCAL (frame, local);
+ MARKER_INIT_LOCAL(frame, local);
- ret = loc_copy (&local->loc, loc);
+ ret = marker_inode_loc_fill(fd->inode, &local->loc);
- local->mode = mode;
-
- if (ret == -1)
- goto err;
+ if (ret == -1)
+ goto err;
wind:
- STACK_WIND (frame, marker_mknod_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
- xdata);
- return 0;
+ STACK_WIND(frame, marker_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
err:
- STACK_UNWIND_STRICT (mknod, frame, -1, ENOMEM, NULL,
- NULL, NULL, NULL, NULL);
- return 0;
-}
+ MARKER_STACK_UNWIND(zerofill, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+}
/* when a call from the special client is received on
* key trusted.glusterfs.volume-mark with value "RESET"
@@ -1814,821 +2323,1246 @@ err:
* timestamp file.
*/
int32_t
-call_from_sp_client_to_reset_tmfile (call_frame_t *frame,
- xlator_t *this,
- dict_t *dict)
+call_from_sp_client_to_reset_tmfile(call_frame_t *frame, xlator_t *this,
+ dict_t *dict)
{
- int32_t fd = 0;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- data_t *data = NULL;
- marker_conf_t *priv = NULL;
+ int32_t fd = 0;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ data_t *data = NULL;
+ marker_conf_t *priv = NULL;
- if (frame == NULL || this == NULL || dict == NULL)
- return -1;
+ if (frame == NULL || this == NULL || dict == NULL)
+ return -1;
- priv = this->private;
+ priv = this->private;
- data = dict_get (dict, "trusted.glusterfs.volume-mark");
- if (data == NULL)
- return -1;
+ data = dict_get(dict, "trusted.glusterfs.volume-mark");
+ if (data == NULL)
+ return -1;
- if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
- op_ret = -1;
- op_errno = EPERM;
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+ op_ret = -1;
+ op_errno = EPERM;
- goto out;
+ goto out;
+ }
+
+ if (data->len == 0 ||
+ (data->len == 5 && memcmp(data->data, "RESET", 5) == 0)) {
+ fd = open(priv->timestamp_file, O_WRONLY | O_TRUNC);
+ if (fd != -1) {
+ /* TODO check whether the O_TRUNC would update the
+ * timestamps on a zero length file on all machies.
+ */
+ sys_close(fd);
}
- if (data->len == 0 || (data->len == 5 &&
- memcmp (data->data, "RESET", 5) == 0)) {
- fd = open (priv->timestamp_file, O_WRONLY|O_TRUNC);
- if (fd != -1) {
- /* TODO check whether the O_TRUNC would update the
- * timestamps on a zero length file on all machies.
- */
- close (fd);
- }
-
- if (fd != -1 || errno == ENOENT) {
- op_ret = 0;
- op_errno = 0;
- } else {
- op_ret = -1;
- op_errno = errno;
- }
+ if (fd != -1 || errno == ENOENT) {
+ op_ret = 0;
+ op_errno = 0;
} else {
- op_ret = -1;
- op_errno = EINVAL;
+ op_ret = -1;
+ op_errno = errno;
}
+ } else {
+ op_ret = -1;
+ op_errno = EINVAL;
+ }
out:
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, NULL);
+ STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, NULL);
- return 0;
+ return 0;
}
-
int32_t
-marker_setxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+marker_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "%s occurred in "
- "setxattr ", strerror (op_errno));
- }
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s occurred in "
+ "setxattr ",
+ strerror(op_errno));
+ }
- local = (marker_local_t *) frame->local;
+ local = (marker_local_t *)frame->local;
- frame->local = NULL;
+ frame->local = NULL;
- STACK_UNWIND_STRICT (setxattr, frame, op_ret, op_errno, xdata);
+ STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata);
- if (op_ret == -1 || local == NULL)
- goto out;
+ if (op_ret == -1 || local == NULL)
+ goto out;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled & GF_XTIME)
- marker_xtime_update_marks (this, local);
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
out:
- marker_local_unref (local);
+ marker_local_unref(local);
- return 0;
+ return 0;
+}
+
+int
+remove_quota_keys(dict_t *dict, char *k, data_t *v, void *data)
+{
+ call_frame_t *frame = data;
+ marker_local_t *local = frame->local;
+ xlator_t *this = frame->this;
+ marker_conf_t *priv = NULL;
+ char ver_str[NAME_MAX] = {
+ 0,
+ };
+ char *dot = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ /* If quota is enabled immediately after disable.
+ * quota healing starts creating new xattrs
+ * before completing the cleanup operation.
+ * So we should check if the xattr is the new.
+ * Do not remove xattr if its xattr
+ * version is same as current version
+ */
+ if ((priv->feature_enabled & GF_QUOTA) && priv->version > 0) {
+ snprintf(ver_str, sizeof(ver_str), ".%d", priv->version);
+ dot = strrchr(k, '.');
+ if (dot && !strcmp(dot, ver_str))
+ return 0;
+ }
+
+ ret = syncop_removexattr(FIRST_CHILD(this), &local->loc, k, 0, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "%s: Failed to remove "
+ "extended attribute: %s",
+ local->loc.path, k);
+ return -1;
+ }
+ return 0;
+}
+
+int
+quota_xattr_cleaner_cbk(int ret, call_frame_t *frame, void *args)
+{
+ dict_t *xdata = args;
+ int op_ret = -1;
+ int op_errno = 0;
+
+ op_ret = (ret < 0) ? -1 : 0;
+ op_errno = -ret;
+
+ MARKER_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+ return ret;
+}
+
+int
+quota_xattr_cleaner(void *args)
+{
+ struct synctask *task = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ marker_local_t *local = NULL;
+ dict_t *xdata = NULL;
+ int ret = -1;
+
+ task = synctask_get();
+ if (!task)
+ goto out;
+
+ frame = task->frame;
+ this = frame->this;
+ local = frame->local;
+
+ ret = syncop_listxattr(FIRST_CHILD(this), &local->loc, &xdata, NULL, NULL);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = dict_foreach_fnmatch(xdata, "trusted.glusterfs.quota.*",
+ remove_quota_keys, frame);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
+ ret = dict_foreach_fnmatch(xdata, PGFID_XATTR_KEY_PREFIX "*",
+ remove_quota_keys, frame);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (xdata)
+ dict_unref(xdata);
+
+ return ret;
+}
+
+int
+marker_do_xattr_cleanup(call_frame_t *frame, xlator_t *this, dict_t *xdata,
+ loc_t *loc)
+{
+ int ret = -1;
+ marker_local_t *local = NULL;
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto out;
+
+ MARKER_INIT_LOCAL(frame, local);
+
+ loc_copy(&local->loc, loc);
+ ret = synctask_new(this->ctx->env, quota_xattr_cleaner,
+ quota_xattr_cleaner_cbk, frame, xdata);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to create synctask "
+ "for cleaning up quota extended attributes");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret)
+ MARKER_STACK_UNWIND(setxattr, frame, -1, ENOMEM, xdata);
+
+ return ret;
+}
+
+static gf_boolean_t
+marker_xattr_cleanup_cmd(dict_t *dict)
+{
+ return (dict_get(dict, VIRTUAL_QUOTA_XATTR_CLEANUP_KEY) != NULL);
}
int32_t
-marker_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
+marker_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+ int op_errno = ENOMEM;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled == 0)
- goto wind;
+ if (marker_xattr_cleanup_cmd(dict)) {
+ if (frame->root->uid != 0 || frame->root->gid != 0) {
+ op_errno = EPERM;
+ ret = -1;
+ goto err;
+ }
- ret = call_from_sp_client_to_reset_tmfile (frame, this, dict);
- if (ret == 0)
- return 0;
+ /* The following function does the cleanup and then unwinds the
+ * corresponding call*/
+ loc_path(loc, NULL);
+ marker_do_xattr_cleanup(frame, this, xdata, loc);
+ return 0;
+ }
- local = mem_get0 (this->local_pool);
+ ret = marker_key_replace_with_ver(this, dict);
+ if (ret < 0)
+ goto err;
- MARKER_INIT_LOCAL (frame, local);
+ if (priv->feature_enabled == 0)
+ goto wind;
- ret = loc_copy (&local->loc, loc);
+ ret = call_from_sp_client_to_reset_tmfile(frame, this, dict);
+ if (ret == 0)
+ return 0;
- if (ret == -1)
- goto err;
+ local = mem_get0(this->local_pool);
+
+ MARKER_INIT_LOCAL(frame, local);
+
+ ret = loc_copy(&local->loc, loc);
+
+ if (ret == -1)
+ goto err;
wind:
- STACK_WIND (frame, marker_setxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
- return 0;
+ STACK_WIND(frame, marker_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
+ return 0;
err:
- STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM, NULL);
+ MARKER_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
- return 0;
+ return 0;
}
-
int32_t
-marker_fsetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+marker_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "%s occurred while "
- "creating symlinks ", strerror (op_errno));
- }
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s occurred in "
+ "fsetxattr",
+ strerror(op_errno));
+ }
- local = (marker_local_t *) frame->local;
+ local = (marker_local_t *)frame->local;
- frame->local = NULL;
+ frame->local = NULL;
- STACK_UNWIND_STRICT (fsetxattr, frame, op_ret, op_errno, xdata);
+ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata);
- if (op_ret == -1 || local == NULL)
- goto out;
+ if (op_ret == -1 || local == NULL)
+ goto out;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled & GF_XTIME)
- marker_xtime_update_marks (this, local);
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
out:
- marker_local_unref (local);
+ marker_local_unref(local);
- return 0;
+ return 0;
}
int32_t
-marker_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
+marker_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled == 0)
- goto wind;
+ if (priv->feature_enabled == 0)
+ goto wind;
- ret = call_from_sp_client_to_reset_tmfile (frame, this, dict);
- if (ret == 0)
- return 0;
+ ret = call_from_sp_client_to_reset_tmfile(frame, this, dict);
+ if (ret == 0)
+ return 0;
- local = mem_get0 (this->local_pool);
+ local = mem_get0(this->local_pool);
- MARKER_INIT_LOCAL (frame, local);
+ MARKER_INIT_LOCAL(frame, local);
- ret = marker_inode_loc_fill (fd->inode, &local->loc);
+ ret = marker_inode_loc_fill(fd->inode, &local->loc);
- if (ret == -1)
- goto err;
+ if (ret == -1)
+ goto err;
wind:
- STACK_WIND (frame, marker_fsetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
- return 0;
+ STACK_WIND(frame, marker_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
err:
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM, NULL);
+ MARKER_STACK_UNWIND(fsetxattr, frame, -1, ENOMEM, NULL);
- return 0;
+ return 0;
}
-
int32_t
-marker_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+marker_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
{
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s occurred while "
- "creating symlinks ", strerror (op_errno));
- }
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s occurred in "
+ "fsetattr ",
+ strerror(op_errno));
+ }
- local = (marker_local_t *) frame->local;
+ local = (marker_local_t *)frame->local;
- frame->local = NULL;
+ frame->local = NULL;
- STACK_UNWIND_STRICT (fsetattr, frame, op_ret, op_errno, statpre,
- statpost, xdata);
+ STACK_UNWIND_STRICT(fsetattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
- if (op_ret == -1 || local == NULL)
- goto out;
+ if (op_ret == -1 || local == NULL)
+ goto out;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled & GF_XTIME)
- marker_xtime_update_marks (this, local);
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
out:
- marker_local_unref (local);
+ marker_local_unref(local);
- return 0;
+ return 0;
}
-
int32_t
-marker_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+marker_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled == 0)
- goto wind;
+ if (priv->feature_enabled == 0)
+ goto wind;
- local = mem_get0 (this->local_pool);
+ local = mem_get0(this->local_pool);
- MARKER_INIT_LOCAL (frame, local);
+ MARKER_INIT_LOCAL(frame, local);
- ret = marker_inode_loc_fill (fd->inode, &local->loc);
+ ret = marker_inode_loc_fill(fd->inode, &local->loc);
- if (ret == -1)
- goto err;
+ if (ret == -1)
+ goto err;
wind:
- STACK_WIND (frame, marker_fsetattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, xdata);
- return 0;
+ STACK_WIND(frame, marker_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
err:
- STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ MARKER_STACK_UNWIND(fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
int32_t
-marker_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+marker_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
{
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- local = (marker_local_t *) frame->local;
+ local = (marker_local_t *)frame->local;
- frame->local = NULL;
+ frame->local = NULL;
- if (op_ret == -1) {
- gf_log (this->name, ((op_errno == ENOENT) ? GF_LOG_DEBUG :
- GF_LOG_ERROR),
- "%s occurred during setattr of %s",
- strerror (op_errno),
- (local ? local->loc.path : "<nul>"));
- }
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE, "%s occurred during setattr of %s",
+ strerror(op_errno), (local ? local->loc.path : "<nul>"));
+ }
- STACK_UNWIND_STRICT (setattr, frame, op_ret, op_errno, statpre,
- statpost, xdata);
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
- if (op_ret == -1 || local == NULL)
- goto out;
+ if (op_ret == -1 || local == NULL)
+ goto out;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled & GF_XTIME)
- marker_xtime_update_marks (this, local);
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
out:
- marker_local_unref (local);
+ marker_local_unref(local);
- return 0;
+ return 0;
}
int32_t
-marker_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+marker_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled == 0)
- goto wind;
+ if (priv->feature_enabled == 0)
+ goto wind;
- local = mem_get0 (this->local_pool);
+ local = mem_get0(this->local_pool);
- MARKER_INIT_LOCAL (frame, local);
+ MARKER_INIT_LOCAL(frame, local);
- ret = loc_copy (&local->loc, loc);
+ ret = loc_copy(&local->loc, loc);
- if (ret == -1)
- goto err;
+ if (ret == -1)
+ goto err;
wind:
- STACK_WIND (frame, marker_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
- return 0;
+ STACK_WIND(frame, marker_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
err:
- STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ MARKER_STACK_UNWIND(setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
+ return 0;
}
-
int32_t
-marker_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+marker_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_ERROR, "%s occurred while "
- "creating symlinks ", strerror (op_errno));
- }
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE,
+ "%s occurred while "
+ "removing extended attribute",
+ strerror(op_errno));
+ }
- local = (marker_local_t *) frame->local;
+ local = (marker_local_t *)frame->local;
- frame->local = NULL;
+ frame->local = NULL;
- STACK_UNWIND_STRICT (removexattr, frame, op_ret, op_errno, xdata);
+ STACK_UNWIND_STRICT(removexattr, frame, op_ret, op_errno, xdata);
- if (op_ret == -1 || local == NULL)
- goto out;
+ if (op_ret == -1 || local == NULL)
+ goto out;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled & GF_XTIME)
- marker_xtime_update_marks (this, local);
+ if (priv->feature_enabled & GF_XTIME)
+ marker_xtime_update_marks(this, local);
out:
- marker_local_unref (local);
+ marker_local_unref(local);
- return 0;
+ return 0;
}
int32_t
-marker_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
-{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
-
- priv = this->private;
+marker_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int32_t i = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
+ char key[QUOTA_KEY_MAX] = {
+ 0,
+ };
+
+ priv = this->private;
+
+ if (name) {
+ for (i = 0; mq_ext_xattrs[i]; i++) {
+ if (strcmp(name, mq_ext_xattrs[i]))
+ continue;
+
+ GET_QUOTA_KEY(this, key, mq_ext_xattrs[i], ret);
+ if (ret < 0)
+ goto err;
+ name = key;
+ break;
+ }
+ }
- if (priv->feature_enabled == 0)
- goto wind;
+ if (priv->feature_enabled == 0)
+ goto wind;
- local = mem_get0 (this->local_pool);
+ local = mem_get0(this->local_pool);
- MARKER_INIT_LOCAL (frame, local);
+ MARKER_INIT_LOCAL(frame, local);
- ret = loc_copy (&local->loc, loc);
+ ret = loc_copy(&local->loc, loc);
- if (ret == -1)
- goto err;
+ if (ret == -1)
+ goto err;
wind:
- STACK_WIND (frame, marker_removexattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
- return 0;
+ STACK_WIND(frame, marker_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
err:
- STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM, NULL);
+ MARKER_STACK_UNWIND(removexattr, frame, -1, ENOMEM, NULL);
- return 0;
+ return 0;
}
-
-int32_t
-marker_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
+static gf_boolean_t
+__has_quota_xattrs(dict_t *xattrs)
{
- marker_conf_t *priv = NULL;
- marker_local_t *local = NULL;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_TRACE, "lookup failed with %s",
- strerror (op_errno));
- }
+ if (dict_foreach_match(xattrs, _is_quota_internal_xattr, NULL,
+ dict_null_foreach_fn, NULL) > 0)
+ return _gf_true;
- local = (marker_local_t *) frame->local;
-
- frame->local = NULL;
+ return _gf_false;
+}
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
- dict, postparent);
+int32_t
+marker_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *dict, struct iatt *postparent)
+{
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ dict_t *xattrs = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ int32_t ret = -1;
+
+ priv = this->private;
+ local = (marker_local_t *)frame->local;
+ frame->local = NULL;
+
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_TRACE, "lookup failed with %s",
+ strerror(op_errno));
+ goto unwind;
+ }
+
+ ret = marker_key_set_ver(this, dict);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ if (dict && __has_quota_xattrs(dict)) {
+ xattrs = dict_copy_with_ref(dict, NULL);
+ if (!xattrs) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ } else {
+ marker_filter_internal_xattrs(this, xattrs);
+ }
+ } else if (dict) {
+ xattrs = dict_ref(dict);
+ }
- if (op_ret == -1 || local == NULL)
- goto out;
+ if (op_ret >= 0 && inode && (priv->feature_enabled & GF_QUOTA)) {
+ ctx = mq_inode_ctx_new(inode, this);
+ if (ctx == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "mq_inode_ctx_new "
+ "failed for %s",
+ uuid_utoa(inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ }
+ }
- /* copy the gfid from the stat structure instead of inode,
- * since if the lookup is fresh lookup, then the inode
- * would have not yet linked to the inode table which happens
- * in protocol/server.
- */
- if (uuid_is_null (local->loc.gfid))
- uuid_copy (local->loc.gfid, buf->ia_gfid);
+unwind:
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xattrs,
+ postparent);
+ if (op_ret == -1 || local == NULL)
+ goto out;
- priv = this->private;
+ /* copy the gfid from the stat structure instead of inode,
+ * since if the lookup is fresh lookup, then the inode
+ * would have not yet linked to the inode table which happens
+ * in protocol/server.
+ */
+ if (gf_uuid_is_null(local->loc.gfid))
+ gf_uuid_copy(local->loc.gfid, buf->ia_gfid);
- if (priv->feature_enabled & GF_QUOTA) {
- mq_xattr_state (this, &local->loc, dict, *buf);
- }
+ if (priv->feature_enabled & GF_QUOTA) {
+ mq_xattr_state(this, &local->loc, dict, buf);
+ }
out:
- marker_local_unref (local);
+ marker_local_unref(local);
+ if (xattrs)
+ dict_unref(xattrs);
- return 0;
+ return 0;
}
int32_t
-marker_lookup (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *xattr_req)
+marker_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xattr_req)
{
- int32_t ret = 0;
- marker_local_t *local = NULL;
- marker_conf_t *priv = NULL;
+ int32_t ret = 0;
+ marker_local_t *local = NULL;
+ marker_conf_t *priv = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->feature_enabled == 0)
- goto wind;
+ xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new();
+ if (!xattr_req)
+ goto err;
- local = mem_get0 (this->local_pool);
+ ret = marker_key_replace_with_ver(this, xattr_req);
+ if (ret < 0)
+ goto err;
- MARKER_INIT_LOCAL (frame, local);
+ if (priv->feature_enabled == 0)
+ goto wind;
- ret = loc_copy (&local->loc, loc);
- if (ret == -1)
- goto err;
+ local = mem_get0(this->local_pool);
+ if (local == NULL)
+ goto err;
+
+ MARKER_INIT_LOCAL(frame, local);
+
+ ret = loc_copy(&local->loc, loc);
+ if (ret == -1)
+ goto err;
+
+ if ((priv->feature_enabled & GF_QUOTA))
+ mq_req_xattr(this, loc, xattr_req, NULL, NULL);
- if ((priv->feature_enabled & GF_QUOTA) && xattr_req)
- mq_req_xattr (this, loc, xattr_req);
wind:
- STACK_WIND (frame, marker_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
- return 0;
+ STACK_WIND(frame, marker_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+
+ dict_unref(xattr_req);
+
+ return 0;
err:
- STACK_UNWIND_STRICT (lookup, frame, -1, 0, NULL, NULL, NULL, NULL);
+ MARKER_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
- return 0;
+ if (xattr_req)
+ dict_unref(xattr_req);
+
+ return 0;
}
int
-marker_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries,
- dict_t *xdata)
+marker_build_ancestry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- gf_dirent_t *entry = NULL;
+ gf_dirent_t *entry = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ int ret = -1;
- if (op_ret <= 0)
- goto unwind;
+ if ((op_ret <= 0) || (entries == NULL)) {
+ goto out;
+ }
- list_for_each_entry (entry, &entries->list, list) {
- /* TODO: fill things */
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ if (entry->inode == NULL)
+ continue;
+
+ ret = marker_key_set_ver(this, entry->dict);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ break;
}
-unwind:
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+ ctx = mq_inode_ctx_new(entry->inode, this);
+ if (ctx == NULL)
+ gf_log(this->name, GF_LOG_WARNING,
+ "mq_inode_ctx_new "
+ "failed for %s",
+ uuid_utoa(entry->inode->gfid));
+ }
- return 0;
+out:
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
+ return 0;
}
int
-marker_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *dict)
-{
- marker_conf_t *priv = NULL;
+marker_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+ marker_conf_t *priv = NULL;
+ marker_local_t *local = NULL;
+ loc_t loc = {
+ 0,
+ };
+ int ret = -1;
+ char *resolvedpath = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+
+ if (op_ret <= 0)
+ goto unwind;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (!(priv->feature_enabled & GF_QUOTA) || (local == NULL)) {
+ goto unwind;
+ }
+
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ if ((strcmp(entry->d_name, ".") == 0) ||
+ (strcmp(entry->d_name, "..") == 0) || entry->inode == NULL)
+ continue;
+
+ loc.parent = inode_ref(local->loc.inode);
+ loc.inode = inode_ref(entry->inode);
+ ret = inode_path(loc.parent, entry->d_name, &resolvedpath);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to get the "
+ "path for the entry %s",
+ entry->d_name);
+ loc_wipe(&loc);
+ continue;
+ }
- priv = this->private;
+ loc.path = resolvedpath;
+ resolvedpath = NULL;
- if (priv->feature_enabled == 0)
- goto wind;
+ ctx = mq_inode_ctx_new(loc.inode, this);
+ if (ctx == NULL)
+ gf_log(this->name, GF_LOG_WARNING,
+ "mq_inode_ctx_new "
+ "failed for %s",
+ uuid_utoa(loc.inode->gfid));
- if ((priv->feature_enabled & GF_QUOTA) && dict)
- mq_req_xattr (this, NULL, dict);
+ mq_xattr_state(this, &loc, entry->dict, &entry->d_stat);
+ loc_wipe(&loc);
-wind:
- STACK_WIND (frame, marker_readdirp_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
- fd, size, offset, dict);
+ ret = marker_key_set_ver(this, entry->dict);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+ }
- return 0;
+unwind:
+ MARKER_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata);
+
+ return 0;
}
+int
+marker_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ marker_conf_t *priv = NULL;
+ loc_t loc = {
+ 0,
+ };
+ marker_local_t *local = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ dict = dict ? dict_ref(dict) : dict_new();
+ if (!dict)
+ goto unwind;
+
+ ret = marker_key_replace_with_ver(this, dict);
+ if (ret < 0)
+ goto unwind;
+
+ if (dict_get(dict, GET_ANCESTRY_DENTRY_KEY)) {
+ STACK_WIND(frame, marker_build_ancestry_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict);
+ } else {
+ if (priv->feature_enabled & GF_QUOTA) {
+ local = mem_get0(this->local_pool);
+
+ MARKER_INIT_LOCAL(frame, local);
+
+ loc.parent = local->loc.inode = inode_ref(fd->inode);
+
+ mq_req_xattr(this, &loc, dict, NULL, NULL);
+ }
+
+ STACK_WIND(frame, marker_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict);
+ }
+
+ dict_unref(dict);
+ return 0;
+unwind:
+ MARKER_STACK_UNWIND(readdirp, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
int32_t
-mem_acct_init (xlator_t *this)
+mem_acct_init(xlator_t *this)
{
- int ret = -1;
-
- if (!this)
- return ret;
+ int ret = -1;
- ret = xlator_mem_acct_init (this, gf_marker_mt_end + 1);
+ if (!this)
+ return ret;
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
+ ret = xlator_mem_acct_init(this, gf_marker_mt_end + 1);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Memory accounting init"
+ " failed");
return ret;
-}
+ }
+ return ret;
+}
int32_t
-init_xtime_priv (xlator_t *this, dict_t *options)
+init_xtime_priv(xlator_t *this, dict_t *options)
{
- data_t *data = NULL;
- int32_t ret = -1;
- marker_conf_t *priv = NULL;
+ int32_t ret = -1;
+ marker_conf_t *priv = NULL;
+ char *tmp_opt = NULL;
- GF_VALIDATE_OR_GOTO ("marker", this, out);
- GF_VALIDATE_OR_GOTO (this->name, options, out);
- GF_VALIDATE_OR_GOTO (this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO("marker", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, options, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
- priv = this->private;
+ priv = this->private;
- if((data = dict_get (options, VOLUME_UUID)) != NULL) {
- priv->volume_uuid = data->data;
+ ret = dict_get_str(options, "volume-uuid", &tmp_opt);
- ret = uuid_parse (priv->volume_uuid, priv->volume_uuid_bin);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid volume uuid %s", priv->volume_uuid);
- goto out;
- }
+ if (ret) {
+ priv->volume_uuid = NULL;
+ tmp_opt = "";
- ret = gf_asprintf (& (priv->marker_xattr), "%s.%s.%s",
- MARKER_XATTR_PREFIX, priv->volume_uuid,
- XTIME);
+ gf_log(this->name, GF_LOG_ERROR,
+ "please specify the volume-uuid"
+ "in the translator options");
- if (ret == -1){
- priv->marker_xattr = NULL;
+ return -1;
+ }
+ gf_asprintf(&priv->volume_uuid, "%s", tmp_opt);
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to allocate memory");
- goto out;
- }
+ ret = gf_uuid_parse(priv->volume_uuid, priv->volume_uuid_bin);
- gf_log (this->name, GF_LOG_DEBUG,
- "the volume-uuid = %s", priv->volume_uuid);
- } else {
- priv->volume_uuid = NULL;
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_ERROR, "invalid volume uuid %s",
+ priv->volume_uuid);
+ goto out;
+ }
- gf_log (this->name, GF_LOG_ERROR,
- "please specify the volume-uuid"
- "in the translator options");
+ ret = gf_asprintf(&(priv->marker_xattr), "%s.%s.%s", MARKER_XATTR_PREFIX,
+ priv->volume_uuid, XTIME);
- return -1;
- }
+ if (ret == -1) {
+ priv->marker_xattr = NULL;
+ goto out;
+ }
- if ((data = dict_get (options, TIMESTAMP_FILE)) != NULL) {
- priv->timestamp_file = data->data;
+ gf_log(this->name, GF_LOG_DEBUG, "volume-uuid = %s", priv->volume_uuid);
- gf_log (this->name, GF_LOG_DEBUG,
- "the timestamp-file is = %s",
- priv->timestamp_file);
+ ret = dict_get_str(options, "timestamp-file", &tmp_opt);
+ if (ret) {
+ priv->timestamp_file = NULL;
+ tmp_opt = "";
- } else {
- priv->timestamp_file = NULL;
+ gf_log(this->name, GF_LOG_ERROR,
+ "please specify the timestamp-file"
+ "in the translator options");
- gf_log (this->name, GF_LOG_ERROR,
- "please specify the timestamp-file"
- "in the translator options");
+ goto out;
+ }
- goto out;
- }
+ ret = gf_asprintf(&priv->timestamp_file, "%s", tmp_opt);
+ if (ret == -1) {
+ priv->timestamp_file = NULL;
+ goto out;
+ }
- ret = 0;
+ gf_log(this->name, GF_LOG_DEBUG, "the timestamp-file is = %s",
+ priv->timestamp_file);
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
void
-marker_xtime_priv_cleanup (xlator_t *this)
+marker_xtime_priv_cleanup(xlator_t *this)
{
- marker_conf_t *priv = NULL;
+ marker_conf_t *priv = NULL;
- GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO("marker", this, out);
- priv = (marker_conf_t *) this->private;
+ priv = (marker_conf_t *)this->private;
- GF_VALIDATE_OR_GOTO (this->name, priv, out);
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
- if (priv->volume_uuid != NULL)
- GF_FREE (priv->volume_uuid);
+ GF_FREE(priv->volume_uuid);
- if (priv->timestamp_file != NULL)
- GF_FREE (priv->timestamp_file);
+ GF_FREE(priv->timestamp_file);
- if (priv->marker_xattr != NULL)
- GF_FREE (priv->marker_xattr);
+ GF_FREE(priv->marker_xattr);
out:
- return;
+ return;
}
void
-marker_priv_cleanup (xlator_t *this)
+marker_priv_cleanup(xlator_t *this)
{
- marker_conf_t *priv = NULL;
+ marker_conf_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("marker", this, out);
+
+ priv = (marker_conf_t *)this->private;
- GF_VALIDATE_OR_GOTO ("marker", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
- priv = (marker_conf_t *) this->private;
+ marker_xtime_priv_cleanup(this);
- GF_VALIDATE_OR_GOTO (this->name, priv, out);
+ LOCK_DESTROY(&priv->lock);
- marker_xtime_priv_cleanup (this);
+ GF_FREE(priv);
- LOCK_DESTROY (&priv->lock);
+ if (this->local_pool) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
- GF_FREE (priv);
out:
- return;
+ return;
}
int32_t
-reconfigure (xlator_t *this, dict_t *options)
-{
- int32_t ret = -1;
- data_t *data = NULL;
- gf_boolean_t flag = _gf_false;
- marker_conf_t *priv = NULL;
-
- GF_ASSERT (this);
- GF_ASSERT (this->private);
-
- priv = this->private;
-
- priv->feature_enabled = 0;
-
- GF_VALIDATE_OR_GOTO (this->name, options, out);
-
- data = dict_get (options, "quota");
- if (data) {
- ret = gf_string2boolean (data->data, &flag);
- if (ret == 0 && flag == _gf_true) {
- ret = init_quota_priv (this);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to initialize quota private");
- } else {
- priv->feature_enabled |= GF_QUOTA;
- }
- }
- }
-
- data = dict_get (options, "xtime");
- if (data) {
- ret = gf_string2boolean (data->data, &flag);
- if (ret == 0 && flag == _gf_true) {
- marker_xtime_priv_cleanup (this);
-
- ret = init_xtime_priv (this, options);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "failed to initialize xtime private, "
- "xtime updation will fail");
- } else {
- priv->feature_enabled |= GF_XTIME;
- }
- }
- }
+reconfigure(xlator_t *this, dict_t *options)
+{
+ int32_t ret = 0;
+ data_t *data = NULL;
+ gf_boolean_t flag = _gf_false;
+ marker_conf_t *priv = NULL;
+ int32_t version = 0;
+
+ GF_ASSERT(this);
+ GF_ASSERT(this->private);
+
+ priv = this->private;
+
+ priv->feature_enabled = 0;
+
+ GF_VALIDATE_OR_GOTO(this->name, options, out);
+
+ data = dict_get(options, "quota");
+ if (data) {
+ ret = gf_string2boolean(data->data, &flag);
+ if (ret == 0 && flag == _gf_true)
+ priv->feature_enabled |= GF_QUOTA;
+ }
+
+ data = dict_get(options, "inode-quota");
+ if (data) {
+ ret = gf_string2boolean(data->data, &flag);
+ if (ret == 0 && flag == _gf_true)
+ priv->feature_enabled |= GF_INODE_QUOTA;
+ }
+
+ data = dict_get(options, "quota-version");
+ if (data)
+ ret = gf_string2int32(data->data, &version);
+
+ if (priv->feature_enabled) {
+ if (version >= 0)
+ priv->version = version;
+ else
+ gf_log(this->name, GF_LOG_ERROR,
+ "Invalid quota "
+ "version %d",
+ priv->version);
+ }
+
+ data = dict_get(options, "xtime");
+ if (data) {
+ ret = gf_string2boolean(data->data, &flag);
+ if (ret == 0 && flag == _gf_true) {
+ marker_xtime_priv_cleanup(this);
+
+ ret = init_xtime_priv(this, options);
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "failed to initialize xtime private, "
+ "xtime updation will fail");
+ } else {
+ priv->feature_enabled |= GF_XTIME;
+ data = dict_get(options, "gsync-force-xtime");
+ if (!data)
+ goto out;
+ ret = gf_string2boolean(data->data, &flag);
+ if (ret == 0 && flag)
+ priv->feature_enabled |= GF_XTIME_GSYNC_FORCE;
+ }
+ }
+ }
out:
- return 0;
+ return ret;
}
-
int32_t
-init (xlator_t *this)
-{
- dict_t *options = NULL;
- data_t *data = NULL;
- int32_t ret = 0;
- gf_boolean_t flag = _gf_false;
- marker_conf_t *priv = NULL;
-
- if (!this->children) {
- gf_log (this->name, GF_LOG_ERROR,
- "marker translator needs subvolume defined.");
- return -1;
- }
+init(xlator_t *this)
+{
+ dict_t *options = NULL;
+ data_t *data = NULL;
+ int32_t ret = 0;
+ gf_boolean_t flag = _gf_false;
+ marker_conf_t *priv = NULL;
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "Volume is dangling.");
- return -1;
- }
+ if (!this->children) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "marker translator needs subvolume defined.");
+ return -1;
+ }
- options = this->options;
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING, "Volume is dangling.");
+ return -1;
+ }
- ALLOCATE_OR_GOTO (this->private, marker_conf_t, err);
+ options = this->options;
- priv = this->private;
+ ALLOCATE_OR_GOTO(this->private, marker_conf_t, err);
- priv->feature_enabled = 0;
+ priv = this->private;
- LOCK_INIT (&priv->lock);
+ priv->feature_enabled = 0;
+ priv->version = 0;
- data = dict_get (options, "quota");
- if (data) {
- ret = gf_string2boolean (data->data, &flag);
- if (ret == 0 && flag == _gf_true) {
- ret = init_quota_priv (this);
- if (ret < 0)
- goto err;
+ LOCK_INIT(&priv->lock);
- priv->feature_enabled |= GF_QUOTA;
- }
- }
+ data = dict_get(options, "quota");
+ if (data) {
+ ret = gf_string2boolean(data->data, &flag);
+ if (ret == 0 && flag == _gf_true)
+ priv->feature_enabled |= GF_QUOTA;
+ }
- data = dict_get (options, "xtime");
- if (data) {
- ret = gf_string2boolean (data->data, &flag);
- if (ret == 0 && flag == _gf_true) {
- ret = init_xtime_priv (this, options);
- if (ret < 0)
- goto err;
+ data = dict_get(options, "inode-quota");
+ if (data) {
+ ret = gf_string2boolean(data->data, &flag);
+ if (ret == 0 && flag == _gf_true)
+ priv->feature_enabled |= GF_INODE_QUOTA;
+ }
- priv->feature_enabled |= GF_XTIME;
- }
- }
+ data = dict_get(options, "quota-version");
+ if (data)
+ ret = gf_string2int32(data->data, &priv->version);
- this->local_pool = mem_pool_new (marker_local_t, 128);
- if (!this->local_pool) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
+ if ((ret == 0) && priv->feature_enabled && priv->version < 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Invalid quota version %d",
+ priv->version);
+ goto err;
+ }
+
+ data = dict_get(options, "xtime");
+ if (data) {
+ ret = gf_string2boolean(data->data, &flag);
+ if (ret == 0 && flag == _gf_true) {
+ ret = init_xtime_priv(this, options);
+ if (ret < 0)
goto err;
- }
- return 0;
+ priv->feature_enabled |= GF_XTIME;
+ data = dict_get(options, "gsync-force-xtime");
+ if (!data)
+ goto cont;
+ ret = gf_string2boolean(data->data, &flag);
+ if (ret == 0 && flag)
+ priv->feature_enabled |= GF_XTIME_GSYNC_FORCE;
+ }
+ }
+
+cont:
+ this->local_pool = mem_pool_new(marker_local_t, 128);
+ if (!this->local_pool) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ goto err;
+ }
+
+ return 0;
err:
- marker_priv_cleanup (this);
+ marker_priv_cleanup(this);
- return -1;
+ return -1;
}
int32_t
-marker_forget (xlator_t *this, inode_t *inode)
+marker_forget(xlator_t *this, inode_t *inode)
{
- marker_inode_ctx_t *ctx = NULL;
- uint64_t value = 0;
+ marker_inode_ctx_t *ctx = NULL;
+ uint64_t value = 0;
- if (inode_ctx_del (inode, this, &value) != 0)
- goto out;
+ if (inode_ctx_del(inode, this, &value) != 0)
+ goto out;
- ctx = (marker_inode_ctx_t *)(unsigned long)value;
- if (ctx == NULL) {
- goto out;
- }
+ ctx = (marker_inode_ctx_t *)(unsigned long)value;
+ if (ctx == NULL) {
+ goto out;
+ }
- mq_forget (this, ctx->quota_ctx);
+ mq_forget(this, ctx->quota_ctx);
- GF_FREE (ctx);
+ GF_FREE(ctx);
out:
- return 0;
+ return 0;
}
void
-fini (xlator_t *this)
+fini(xlator_t *this)
{
- marker_priv_cleanup (this);
+ marker_priv_cleanup(this);
}
struct xlator_fops fops = {
- .lookup = marker_lookup,
- .create = marker_create,
- .mkdir = marker_mkdir,
- .writev = marker_writev,
- .truncate = marker_truncate,
- .ftruncate = marker_ftruncate,
- .symlink = marker_symlink,
- .link = marker_link,
- .unlink = marker_unlink,
- .rmdir = marker_rmdir,
- .rename = marker_rename,
- .mknod = marker_mknod,
- .setxattr = marker_setxattr,
- .fsetxattr = marker_fsetxattr,
- .setattr = marker_setattr,
- .fsetattr = marker_fsetattr,
- .removexattr = marker_removexattr,
- .getxattr = marker_getxattr,
- .readdirp = marker_readdirp,
+ .lookup = marker_lookup,
+ .create = marker_create,
+ .mkdir = marker_mkdir,
+ .writev = marker_writev,
+ .truncate = marker_truncate,
+ .ftruncate = marker_ftruncate,
+ .symlink = marker_symlink,
+ .link = marker_link,
+ .unlink = marker_unlink,
+ .rmdir = marker_rmdir,
+ .rename = marker_rename,
+ .mknod = marker_mknod,
+ .setxattr = marker_setxattr,
+ .fsetxattr = marker_fsetxattr,
+ .setattr = marker_setattr,
+ .fsetattr = marker_fsetattr,
+ .removexattr = marker_removexattr,
+ .getxattr = marker_getxattr,
+ .readdirp = marker_readdirp,
+ .fallocate = marker_fallocate,
+ .discard = marker_discard,
+ .zerofill = marker_zerofill,
};
-struct xlator_cbks cbks = {
- .forget = marker_forget
-};
+struct xlator_cbks cbks = {.forget = marker_forget};
struct volume_options options[] = {
- {.key = {"volume-uuid"}},
- {.key = {"timestamp-file"}},
- {.key = {"quota"}},
- {.key = {"xtime"}},
- {.key = {NULL}}
+ {.key = {"volume-uuid"}, .default_value = "{{ volume.id }}"},
+ {.key = {"timestamp-file"}},
+ {
+ .key = {"quota"},
+ .op_version = {1},
+ .flags = OPT_FLAG_NONE,
+ .tags = {},
+ },
+ {
+ .key = {"inode-quota"},
+ .op_version = {1},
+ .flags = OPT_FLAG_NONE,
+ .tags = {},
+ },
+ {
+ .key = {"xtime"},
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_FORCE,
+ .tags = {},
+ },
+ {
+ .key = {"gsync-force-xtime"},
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_FORCE,
+ .tags = {},
+ },
+ {
+ .key = {"quota-version"},
+ .flags = OPT_FLAG_NONE,
+ },
+ {.key = {NULL}}};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "marker",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/features/marker/src/marker.h b/xlators/features/marker/src/marker.h
index cf0f92b7908..4821094c14b 100644
--- a/xlators/features/marker/src/marker.h
+++ b/xlators/features/marker/src/marker.h
@@ -1,144 +1,147 @@
-/*Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _MARKER_H
#define _MARKER_H
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include "marker-quota.h"
-#include "xlator.h"
-#include "defaults.h"
-#include "uuid.h"
-#include "call-stub.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat-uuid.h>
+#include <glusterfs/call-stub.h>
#define MARKER_XATTR_PREFIX "trusted.glusterfs"
-#define XTIME "xtime"
-#define VOLUME_MARK "volume-mark"
-#define VOLUME_UUID "volume-uuid"
-#define TIMESTAMP_FILE "timestamp-file"
+#define XTIME "xtime"
+#define VOLUME_MARK "volume-mark"
+#define VOLUME_UUID "volume-uuid"
+#define TIMESTAMP_FILE "timestamp-file"
enum {
- GF_QUOTA=1,
- GF_XTIME=2
+ GF_QUOTA = 1,
+ GF_XTIME = 2,
+ GF_XTIME_GSYNC_FORCE = 4,
+ GF_INODE_QUOTA = 8,
};
/*initialize the local variable*/
-#define MARKER_INIT_LOCAL(_frame,_local) do { \
- _frame->local = _local; \
- _local->pid = _frame->root->pid; \
- memset (&_local->loc, 0, sizeof (loc_t)); \
- _local->ref = 1; \
- _local->uid = -1; \
- _local->gid = -1; \
- LOCK_INIT (&_local->lock); \
- _local->oplocal = NULL; \
- } while (0)
+#define MARKER_INIT_LOCAL(_frame, _local) \
+ do { \
+ _frame->local = _local; \
+ _local->pid = _frame->root->pid; \
+ memset(&_local->loc, 0, sizeof(loc_t)); \
+ _local->ref = 1; \
+ _local->uid = -1; \
+ _local->gid = -1; \
+ LOCK_INIT(&_local->lock); \
+ _local->oplocal = NULL; \
+ } while (0)
/* try alloc and if it fails, goto label */
-#define ALLOCATE_OR_GOTO(var, type, label) do { \
- var = GF_CALLOC (sizeof (type), 1, \
- gf_marker_mt_##type); \
- if (!var) { \
- gf_log (this->name, GF_LOG_ERROR, \
- "out of memory :("); \
- goto label; \
- } \
- } while (0)
-
-#define _MARKER_SET_UID_GID(dest, src) \
- do { \
- if (src->uid != -1 && \
- src->gid != -1) { \
- dest->uid = src->uid; \
- dest->gid = src->gid; \
- } \
- } while (0)
-
-#define MARKER_SET_UID_GID(frame, dest, src) \
- do { \
- _MARKER_SET_UID_GID (dest, src); \
- frame->root->uid = 0; \
- frame->root->gid = 0; \
- frame->cookie = (void *) _GF_UID_GID_CHANGED; \
- } while (0)
-
-#define MARKER_RESET_UID_GID(frame, dest, src) \
- do { \
- _MARKER_SET_UID_GID (dest, src); \
- frame->cookie = NULL; \
- } while (0)
-
-struct marker_local{
- uint32_t timebuf[2];
- pid_t pid;
- loc_t loc;
- loc_t parent_loc;
- loc_t *next_lock_on;
- uid_t uid;
- gid_t gid;
- int32_t ref;
- int32_t ia_nlink;
- gf_lock_t lock;
- mode_t mode;
- int32_t err;
- call_stub_t *stub;
- int64_t contribution;
- struct marker_local *oplocal;
-
- /* marker quota specific */
- int64_t delta;
- int64_t d_off;
- int64_t sum;
- int64_t size;
- int32_t hl_count;
- int32_t dentry_child_count;
-
- fd_t *fd;
- call_frame_t *frame;
-
- quota_inode_ctx_t *ctx;
- inode_contribution_t *contri;
-
- int xflag;
+#define ALLOCATE_OR_GOTO(var, type, label) \
+ do { \
+ var = GF_CALLOC(sizeof(type), 1, gf_marker_mt_##type); \
+ if (!var) { \
+ gf_log(this->name, GF_LOG_ERROR, "out of memory :("); \
+ goto label; \
+ } \
+ } while (0)
+
+#define _MARKER_SET_UID_GID(dest, src) \
+ do { \
+ if (src->uid != -1 && src->gid != -1) { \
+ dest->uid = src->uid; \
+ dest->gid = src->gid; \
+ } \
+ } while (0)
+
+#define MARKER_SET_UID_GID(frame, dest, src) \
+ do { \
+ _MARKER_SET_UID_GID(dest, src); \
+ frame->root->uid = 0; \
+ frame->root->gid = 0; \
+ frame->cookie = (void *)_GF_UID_GID_CHANGED; \
+ } while (0)
+
+#define MARKER_RESET_UID_GID(frame, dest, src) \
+ do { \
+ _MARKER_SET_UID_GID(dest, src); \
+ frame->cookie = NULL; \
+ } while (0)
+
+#define MARKER_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ quota_local_t *_local = NULL; \
+ if (frame) { \
+ _local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ if (_local) \
+ marker_local_unref(_local); \
+ } while (0)
+
+struct marker_local {
+ uint32_t timebuf[2];
+ pid_t pid;
+ loc_t loc;
+ loc_t parent_loc;
+ uid_t uid;
+ gid_t gid;
+ int32_t ref;
+ uint32_t ia_nlink;
+ struct iatt buf;
+ gf_lock_t lock;
+ mode_t mode;
+ int32_t err;
+ call_stub_t *stub;
+ call_frame_t *lk_frame;
+ quota_meta_t contribution;
+ struct marker_local *oplocal;
+
+ /* marker quota specific */
+ int64_t delta;
+ int64_t d_off;
+ int64_t sum;
+ int64_t size;
+ int32_t hl_count;
+ int32_t dentry_child_count;
+
+ fd_t *fd;
+ call_frame_t *frame;
+
+ quota_inode_ctx_t *ctx;
+ inode_contribution_t *contri;
+
+ int xflag;
+ dict_t *xdata;
+ gf_boolean_t skip_txn;
};
typedef struct marker_local marker_local_t;
#define quota_local_t marker_local_t
struct marker_inode_ctx {
- struct quota_inode_ctx *quota_ctx;
+ struct quota_inode_ctx *quota_ctx;
};
typedef struct marker_inode_ctx marker_inode_ctx_t;
-struct marker_conf{
- char feature_enabled;
- char *size_key;
- char *dirty_key;
- char *volume_uuid;
- uuid_t volume_uuid_bin;
- char *timestamp_file;
- char *marker_xattr;
- uint64_t quota_lk_owner;
- gf_lock_t lock;
+struct marker_conf {
+ char feature_enabled;
+ char *size_key;
+ char *dirty_key;
+ char *volume_uuid;
+ uuid_t volume_uuid_bin;
+ char *timestamp_file;
+ char *marker_xattr;
+ uint64_t quota_lk_owner;
+ gf_lock_t lock;
+ int32_t version;
};
typedef struct marker_conf marker_conf_t;
diff --git a/xlators/features/marker/utils/Makefile.am b/xlators/features/marker/utils/Makefile.am
deleted file mode 100644
index 556951d9fb7..00000000000
--- a/xlators/features/marker/utils/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = syncdaemon src
-
-CLEANFILES =
diff --git a/xlators/features/marker/utils/src/Makefile.am b/xlators/features/marker/utils/src/Makefile.am
deleted file mode 100644
index 73c99cb76d8..00000000000
--- a/xlators/features/marker/utils/src/Makefile.am
+++ /dev/null
@@ -1,22 +0,0 @@
-gsyncddir = $(libexecdir)/glusterfs
-
-gsyncd_PROGRAMS = gsyncd
-
-gsyncd_SOURCES = gsyncd.c procdiggy.c
-
-gsyncd_LDADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-gsyncd_LDFLAGS = $(GF_LDFLAGS) $(GF_GLUSTERFS_LDFLAGS)
-
-noinst_HEADERS = procdiggy.h
-
-AM_CFLAGS = -fPIC -Wall -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src\
- -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\
- -DSBIN_DIR=\"$(sbindir)\" -DPYTHON=\"$(PYTHON)\"
-
-
-CLEANFILES =
-
-$(top_builddir)/libglusterfs/src/libglusterfs.la:
- $(MAKE) -C $(top_builddir)/libglusterfs/src/ all
diff --git a/xlators/features/marker/utils/src/gsyncd.c b/xlators/features/marker/utils/src/gsyncd.c
deleted file mode 100644
index 7da2c983cff..00000000000
--- a/xlators/features/marker/utils/src/gsyncd.c
+++ /dev/null
@@ -1,345 +0,0 @@
-/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <string.h>
-#include <sys/param.h> /* for PATH_MAX */
-
-
-#include "common-utils.h"
-#include "run.h"
-#include "procdiggy.h"
-
-#define _GLUSTERD_CALLED_ "_GLUSTERD_CALLED_"
-#define _GSYNCD_DISPATCHED_ "_GSYNCD_DISPATCHED_"
-#define GSYNCD_CONF "geo-replication/gsyncd.conf"
-#define GSYNCD_PY "gsyncd.py"
-#define RSYNC "rsync"
-
-int restricted = 0;
-
-static int
-duplexpand (void **buf, size_t tsiz, size_t *len)
-{
- size_t osiz = tsiz * *len;
-
- *buf = realloc (*buf, osiz << 1);
- if (!buf)
- return -1;
-
- memset ((char *)*buf + osiz, 0, osiz);
- *len <<= 1;
-
- return 0;
-}
-
-static int
-str2argv (char *str, char ***argv)
-{
- char *p = NULL;
- char *savetok = NULL;
- int argc = 0;
- size_t argv_len = 32;
- int ret = 0;
-
- assert (str);
- str = strdup (str);
- if (!str)
- return -1;
-
- *argv = calloc (argv_len, sizeof (**argv));
- if (!*argv)
- goto error;
-
- while ((p = strtok_r (str, " ", &savetok))) {
- str = NULL;
-
- argc++;
- if (argc == argv_len) {
- ret = duplexpand ((void *)argv,
- sizeof (**argv),
- &argv_len);
- if (ret == -1)
- goto error;
- }
- (*argv)[argc - 1] = p;
- }
-
- return argc;
-
- error:
- fprintf (stderr, "out of memory\n");
- return -1;
-}
-
-static int
-invoke_gsyncd (int argc, char **argv)
-{
- char config_file[PATH_MAX] = {0,};
- size_t gluster_workdir_len = 0;
- runner_t runner = {0,};
- int i = 0;
- int j = 0;
- char *nargv[argc + 4];
-
- if (restricted) {
- /* in restricted mode we forcibly use the system-wide config */
- runinit (&runner);
- runner_add_args (&runner, SBIN_DIR"/gluster",
- "--log-file=-", "system::", "getwd",
- NULL);
- runner_redir (&runner, STDOUT_FILENO, RUN_PIPE);
- if (runner_start (&runner) == 0 &&
- fgets (config_file, PATH_MAX,
- runner_chio (&runner, STDOUT_FILENO)) != NULL &&
- config_file[strlen (config_file) - 1] == '\n' &&
- runner_end (&runner) == 0)
- gluster_workdir_len = strlen (config_file) - 1;
-
- if (gluster_workdir_len) {
- if (gluster_workdir_len + 1 + strlen (GSYNCD_CONF) + 1 >
- PATH_MAX)
- goto error;
- config_file[gluster_workdir_len] = '/';
- strcat (config_file, GSYNCD_CONF);
- } else
- goto error;
-
- if (setenv ("_GSYNCD_RESTRICTED_", "1", 1) == -1)
- goto error;
- }
-
- if (chdir ("/") == -1)
- goto error;
-
- j = 0;
- nargv[j++] = PYTHON;
- nargv[j++] = GSYNCD_PREFIX"/python/syncdaemon/"GSYNCD_PY;
- for (i = 1; i < argc; i++)
- nargv[j++] = argv[i];
- if (config_file[0]) {
- nargv[j++] = "-c";
- nargv[j++] = config_file;
- }
- nargv[j++] = NULL;
-
- execvp (PYTHON, nargv);
-
- fprintf (stderr, "exec of "PYTHON" failed\n");
- return 127;
-
- error:
- fprintf (stderr, "gsyncd initializaion failed\n");
- return 1;
-}
-
-
-static int
-find_gsyncd (pid_t pid, pid_t ppid, char *name, void *data)
-{
- char buf[NAME_MAX * 2] = {0,};
- char path[PATH_MAX] = {0,};
- char *p = NULL;
- int zeros = 0;
- int ret = 0;
- int fd = -1;
- pid_t *pida = (pid_t *)data;
-
- if (ppid != pida[0])
- return 0;
-
- sprintf (path, PROC"/%d/cmdline", pid);
- fd = open (path, O_RDONLY);
- if (fd == -1)
- return 0;
- ret = read (fd, buf, sizeof (buf));
- close (fd);
- if (ret == -1)
- return 0;
- for (zeros = 0, p = buf; zeros < 2 && p < buf + ret; p++)
- zeros += !*p;
-
- ret = 0;
- switch (zeros) {
- case 2:
- if ((strcmp (basename (buf), basename (PYTHON)) ||
- strcmp (basename (buf + strlen (buf) + 1), GSYNCD_PY)) == 0) {
- ret = 1;
- break;
- }
- /* fallthrough */
- case 1:
- if (strcmp (basename (buf), GSYNCD_PY) == 0)
- ret = 1;
- }
-
- if (ret == 1) {
- if (pida[1] != -1) {
- fprintf (stderr, GSYNCD_PY" sibling is not unique");
- return -1;
- }
- pida[1] = pid;
- }
-
- return 0;
-}
-
-static int
-invoke_rsync (int argc, char **argv)
-{
- int i = 0;
- char path[PATH_MAX] = {0,};
- pid_t pid = -1;
- pid_t ppid = -1;
- pid_t pida[] = {-1, -1};
- char *name = NULL;
- char buf[PATH_MAX + 1] = {0,};
- int ret = 0;
-
- assert (argv[argc] == NULL);
-
- if (argc < 2 || strcmp (argv[1], "--server") != 0)
- goto error;
-
- for (i = 2; i < argc && argv[i][0] == '-'; i++);
-
- if (!(i == argc - 2 && strcmp (argv[i], ".") == 0 && argv[i + 1][0] == '/')) {
- fprintf (stderr, "need an rsync invocation without protected args\n");
- goto error;
- }
-
- /* look up sshd we are spawned from */
- for (pid = getpid () ;; pid = ppid) {
- ppid = pidinfo (pid, &name);
- if (ppid < 0) {
- fprintf (stderr, "sshd ancestor not found\n");
- goto error;
- }
- if (strcmp (name, "sshd") == 0) {
- GF_FREE (name);
- break;
- }
- GF_FREE (name);
- }
- /* look up "ssh-sibling" gsyncd */
- pida[0] = pid;
- ret = prociter (find_gsyncd, pida);
- if (ret == -1 || pida[1] == -1) {
- fprintf (stderr, "gsyncd sibling not found\n");
- goto error;
- }
- /* check if rsync target matches gsyncd target */
- sprintf (path, PROC"/%d/cwd", pida[1]);
- ret = readlink (path, buf, sizeof (buf));
- if (ret == -1 || ret == sizeof (buf))
- goto error;
- if (strcmp (argv[argc - 1], "/") == 0 /* root dir cannot be a target */ ||
- (strcmp (argv[argc - 1], path) /* match against gluster target */ &&
- strcmp (argv[argc - 1], buf) /* match against file target */) != 0) {
- fprintf (stderr, "rsync target does not match "GEOREP" session\n");
- goto error;
- }
-
- argv[0] = RSYNC;
-
- execvp (RSYNC, argv);
-
- fprintf (stderr, "exec of "RSYNC" failed\n");
- return 127;
-
- error:
- fprintf (stderr, "disallowed "RSYNC" invocation\n");
- return 1;
-}
-
-
-struct invocable {
- char *name;
- int (*invoker) (int argc, char **argv);
-};
-
-struct invocable invocables[] = {
- { "rsync", invoke_rsync },
- { "gsyncd", invoke_gsyncd },
- { NULL, NULL}
-};
-
-int
-main (int argc, char **argv)
-{
- char *evas = NULL;
- struct invocable *i = NULL;
- char *b = NULL;
- char *sargv = NULL;
-
- evas = getenv (_GLUSTERD_CALLED_);
- if (evas && strcmp (evas, "1") == 0)
- /* OK, we know glusterd called us, no need to look for further config
- * ... altough this conclusion should not inherit to our children
- */
- unsetenv (_GLUSTERD_CALLED_);
- else {
- /* we regard all gsyncd invocations unsafe
- * that do not come from glusterd and
- * therefore restrict it
- */
- restricted = 1;
-
- if (!getenv (_GSYNCD_DISPATCHED_)) {
- evas = getenv ("SSH_ORIGINAL_COMMAND");
- if (evas)
- sargv = evas;
- else {
- evas = getenv ("SHELL");
- if (evas && strcmp (basename (evas), "gsyncd") == 0 &&
- argc == 3 && strcmp (argv[1], "-c") == 0)
- sargv = argv[2];
- }
- }
-
- }
-
- if (!(sargv && restricted))
- return invoke_gsyncd (argc, argv);
-
- argc = str2argv (sargv, &argv);
- if (argc == -1 || setenv (_GSYNCD_DISPATCHED_, "1", 1) == -1) {
- fprintf (stderr, "internal error\n");
- return 1;
- }
-
- b = basename (argv[0]);
- for (i = invocables; i->name; i++) {
- if (strcmp (b, i->name) == 0)
- return i->invoker (argc, argv);
- }
-
- fprintf (stderr, "invoking %s in restricted SSH session is not allowed\n",
- b);
-
- return 1;
-}
diff --git a/xlators/features/marker/utils/src/procdiggy.c b/xlators/features/marker/utils/src/procdiggy.c
deleted file mode 100644
index 0baab966dc5..00000000000
--- a/xlators/features/marker/utils/src/procdiggy.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <string.h>
-#include <ctype.h>
-#include <sys/param.h> /* for PATH_MAX */
-
-#include "common-utils.h"
-#include "procdiggy.h"
-
-pid_t
-pidinfo (pid_t pid, char **name)
-{
- char buf[NAME_MAX * 2] = {0,};
- FILE *f = NULL;
- char path[PATH_MAX] = {0,};
- char *p = NULL;
- int ret = 0;
-
- sprintf (path, PROC"/%d/status", pid);
-
- f = fopen (path, "r");
- if (!f)
- return -1;
-
- if (name)
- *name = NULL;
- for (;;) {
- memset (buf, 0, sizeof (buf));
- if (fgets (buf, sizeof (buf), f) == NULL ||
- buf[strlen (buf) - 1] != '\n') {
- pid = -1;
- goto out;
- }
- buf[strlen (buf) -1] = '\0';
-
- if (name && !*name) {
- p = strtail (buf, "Name:");
- if (p) {
- while (isspace (*++p));
- *name = gf_strdup (p);
- if (!*name) {
- pid = -2;
- goto out;
- }
- continue;
- }
- }
-
- p = strtail (buf, "PPid:");
- if (p)
- break;
- }
-
- while (isspace (*++p));
- ret = gf_string2int (p, &pid);
- if (ret == -1)
- pid = -1;
-
- out:
- fclose (f);
- if (pid == -1 && name && *name)
- GF_FREE (name);
- if (pid == -2)
- fprintf (stderr, "out of memory\n");
- return pid;
-}
-
-int
-prociter (int (*proch) (pid_t pid, pid_t ppid, char *tmpname, void *data),
- void *data)
-{
- char *name = NULL;
- DIR *d = NULL;
- struct dirent *de = NULL;
- pid_t pid = -1;
- pid_t ppid = -1;
- int ret = 0;
-
- d = opendir (PROC);
- if (!d)
- return -1;
- while (errno = 0, de = readdir (d)) {
- if (gf_string2int (de->d_name, &pid) != -1 && pid >= 0) {
- ppid = pidinfo (pid, &name);
- switch (ppid) {
- case -1: continue;
- case -2: ret = -1; break;
- }
- ret = proch (pid, ppid, name, data);
- GF_FREE (name);
- if (ret)
- break;
- }
- }
- closedir (d);
- if (!de && errno) {
- fprintf (stderr, "failed to traverse "PROC" (%s)\n",
- strerror (errno));
- ret = -1;
- }
-
- return ret;
-}
diff --git a/xlators/features/marker/utils/src/procdiggy.h b/xlators/features/marker/utils/src/procdiggy.h
deleted file mode 100644
index ee87b0e3916..00000000000
--- a/xlators/features/marker/utils/src/procdiggy.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifdef __NetBSD__
-#include <sys/syslimits.h>
-#endif /* __NetBSD__ */
-
-#define PROC "/proc"
-
-pid_t pidinfo (pid_t pid, char **name);
-
-int prociter (int (*proch) (pid_t pid, pid_t ppid, char *name, void *data),
- void *data);
-
diff --git a/xlators/features/marker/utils/syncdaemon/Makefile.am b/xlators/features/marker/utils/syncdaemon/Makefile.am
deleted file mode 100644
index cc7cee102ea..00000000000
--- a/xlators/features/marker/utils/syncdaemon/Makefile.am
+++ /dev/null
@@ -1,6 +0,0 @@
-syncdaemondir = $(libexecdir)/glusterfs/python/syncdaemon
-
-syncdaemon_PYTHON = gconf.py gsyncd.py __init__.py master.py README.md repce.py resource.py configinterface.py syncdutils.py monitor.py libcxattr.py \
- $(top_builddir)/contrib/ipaddr-py/ipaddr.py
-
-CLEANFILES =
diff --git a/xlators/features/marker/utils/syncdaemon/__init__.py b/xlators/features/marker/utils/syncdaemon/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
--- a/xlators/features/marker/utils/syncdaemon/__init__.py
+++ /dev/null
diff --git a/xlators/features/marker/utils/syncdaemon/configinterface.py b/xlators/features/marker/utils/syncdaemon/configinterface.py
deleted file mode 100644
index e55bec519e9..00000000000
--- a/xlators/features/marker/utils/syncdaemon/configinterface.py
+++ /dev/null
@@ -1,224 +0,0 @@
-try:
- import ConfigParser
-except ImportError:
- # py 3
- import configparser as ConfigParser
-import re
-from string import Template
-
-from syncdutils import escape, unescape, norm, update_file, GsyncdError
-
-SECT_ORD = '__section_order__'
-SECT_META = '__meta__'
-config_version = 2.0
-
-re_type = type(re.compile(''))
-
-
-class MultiDict(object):
- """a virtual dict-like class which functions as the union of underlying dicts"""
-
- def __init__(self, *dd):
- self.dicts = dd
-
- def __getitem__(self, key):
- val = None
- for d in self.dicts:
- if d.get(key):
- val = d[key]
- if not val:
- raise KeyError(key)
- return val
-
-
-class GConffile(object):
- """A high-level interface to ConfigParser which flattens the two-tiered
- config layout by implenting automatic section dispatch based on initial
- parameters.
-
- Also ensure section ordering in terms of their time of addition -- a compat
- hack for Python < 2.7.
- """
-
- def _normconfig(self):
- """normalize config keys by s/-/_/g"""
- for n, s in self.config._sections.items():
- if n.find('__') == 0:
- continue
- s2 = type(s)()
- for k, v in s.items():
- if k.find('__') != 0:
- k = norm(k)
- s2[k] = v
- self.config._sections[n] = s2
-
- def __init__(self, path, peers, *dd):
- """
- - .path: location of config file
- - .config: underlying ConfigParser instance
- - .peers: on behalf of whom we flatten .config
- (master, or master-slave url pair)
- - .auxdicts: template subtituents
- """
- self.peers = peers
- self.path = path
- self.auxdicts = dd
- self.config = ConfigParser.RawConfigParser()
- self.config.read(path)
- self._normconfig()
-
- def section(self, rx=False):
- """get the section name of the section representing .peers in .config"""
- peers = self.peers
- if not peers:
- peers = ['.', '.']
- rx = True
- if rx:
- st = 'peersrx'
- else:
- st = 'peers'
- return ' '.join([st] + [escape(u) for u in peers])
-
- @staticmethod
- def parse_section(section):
- """retrieve peers sequence encoded by section name
- (as urls or regexen, depending on section type)
- """
- sl = section.split()
- st = sl.pop(0)
- sl = [unescape(u) for u in sl]
- if st == 'peersrx':
- sl = [re.compile(u) for u in sl]
- return sl
-
- def ord_sections(self):
- """Return an ordered list of sections.
-
- Ordering happens based on the auxiliary
- SECT_ORD section storing indices for each
- section added through the config API.
-
- To not to go corrupt in case of manually
- written config files, we take care to append
- also those sections which are not registered
- in SECT_ORD.
-
- Needed for python 2.{4,5,6} where ConfigParser
- cannot yet order sections/options internally.
- """
- so = {}
- if self.config.has_section(SECT_ORD):
- so = self.config._sections[SECT_ORD]
- so2 = {}
- for k, v in so.items():
- if k != '__name__':
- so2[k] = int(v)
- tv = 0
- if so2:
- tv = max(so2.values()) + 1
- ss = [s for s in self.config.sections() if s.find('__') != 0]
- for s in ss:
- if s in so.keys():
- continue
- so2[s] = tv
- tv += 1
- def scmp(x, y):
- return cmp(*(so2[s] for s in (x, y)))
- ss.sort(scmp)
- return ss
-
- def update_to(self, dct, allow_unresolved=False):
- """update @dct from key/values of ours.
-
- key/values are collected from .config by filtering the regexp sections
- according to match, and from .section. The values are treated as templates,
- which are substituted from .auxdicts and (in case of regexp sections)
- match groups.
- """
- if not self.peers:
- raise GsyncdError('no peers given, cannot select matching options')
- def update_from_sect(sect, mud):
- for k, v in self.config._sections[sect].items():
- if k == '__name__':
- continue
- if allow_unresolved:
- dct[k] = Template(v).safe_substitute(mud)
- else:
- dct[k] = Template(v).substitute(mud)
- for sect in self.ord_sections():
- sp = self.parse_section(sect)
- if isinstance(sp[0], re_type) and len(sp) == len(self.peers):
- match = True
- mad = {}
- for i in range(len(sp)):
- m = sp[i].search(self.peers[i])
- if not m:
- match = False
- break
- for j in range(len(m.groups())):
- mad['match%d_%d' % (i+1, j+1)] = m.groups()[j]
- if match:
- update_from_sect(sect, MultiDict(dct, mad, *self.auxdicts))
- if self.config.has_section(self.section()):
- update_from_sect(self.section(), MultiDict(dct, *self.auxdicts))
-
- def get(self, opt=None):
- """print the matching key/value pairs from .config,
- or if @opt given, the value for @opt (according to the
- logic described in .update_to)
- """
- d = {}
- self.update_to(d, allow_unresolved = True)
- if opt:
- opt = norm(opt)
- v = d.get(opt)
- if v:
- print(v)
- else:
- for k, v in d.iteritems():
- if k == '__name__':
- continue
- print("%s: %s" % (k, v))
-
- def write(self, trfn, opt, *a, **kw):
- """update on-disk config transactionally
-
- @trfn is the transaction function
- """
- def mergeconf(f):
- self.config = ConfigParser.RawConfigParser()
- self.config.readfp(f)
- self._normconfig()
- if not self.config.has_section(SECT_META):
- self.config.add_section(SECT_META)
- self.config.set(SECT_META, 'version', config_version)
- return trfn(norm(opt), *a, **kw)
- def updateconf(f):
- self.config.write(f)
- update_file(self.path, updateconf, mergeconf)
-
- def _set(self, opt, val, rx=False):
- """set @opt to @val in .section"""
- sect = self.section(rx)
- if not self.config.has_section(sect):
- self.config.add_section(sect)
- # regarding SECT_ORD, cf. ord_sections
- if not self.config.has_section(SECT_ORD):
- self.config.add_section(SECT_ORD)
- self.config.set(SECT_ORD, sect, len(self.config._sections[SECT_ORD]))
- self.config.set(sect, opt, val)
- return True
-
- def set(self, opt, *a, **kw):
- """perform ._set transactionally"""
- self.write(self._set, opt, *a, **kw)
-
- def _delete(self, opt, rx=False):
- """delete @opt from .section"""
- sect = self.section(rx)
- if self.config.has_section(sect):
- return self.config.remove_option(sect, opt)
-
- def delete(self, opt, *a, **kw):
- """perform ._delete transactionally"""
- self.write(self._delete, opt, *a, **kw)
diff --git a/xlators/features/marker/utils/syncdaemon/gconf.py b/xlators/features/marker/utils/syncdaemon/gconf.py
deleted file mode 100644
index 146c72a1825..00000000000
--- a/xlators/features/marker/utils/syncdaemon/gconf.py
+++ /dev/null
@@ -1,20 +0,0 @@
-import os
-
-class GConf(object):
- """singleton class to store globals
- shared between gsyncd modules"""
-
- ssh_ctl_dir = None
- ssh_ctl_args = None
- cpid = None
- pid_file_owned = False
- log_exit = False
- permanent_handles = []
- log_metadata = {}
-
- @classmethod
- def setup_ssh_ctl(cls, ctld):
- cls.ssh_ctl_dir = ctld
- cls.ssh_ctl_args = ["-oControlMaster=auto", "-S", os.path.join(ctld, "gsycnd-ssh-%r@%h:%p")]
-
-gconf = GConf()
diff --git a/xlators/features/marker/utils/syncdaemon/gsyncd.py b/xlators/features/marker/utils/syncdaemon/gsyncd.py
deleted file mode 100644
index 165aebda1f9..00000000000
--- a/xlators/features/marker/utils/syncdaemon/gsyncd.py
+++ /dev/null
@@ -1,400 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import os.path
-import sys
-import time
-import logging
-import signal
-import optparse
-import fcntl
-import fnmatch
-from optparse import OptionParser, SUPPRESS_HELP
-from logging import Logger
-from errno import EEXIST, ENOENT
-
-from ipaddr import IPAddress, IPNetwork
-
-from gconf import gconf
-from syncdutils import FreeObject, norm, grabpidfile, finalize, log_raise_exception
-from syncdutils import GsyncdError, select, set_term_handler
-from configinterface import GConffile
-import resource
-from monitor import monitor
-
-class GLogger(Logger):
- """Logger customizations for gsyncd.
-
- It implements a log format similar to that of glusterfs.
- """
-
- def makeRecord(self, name, level, *a):
- rv = Logger.makeRecord(self, name, level, *a)
- rv.nsecs = (rv.created - int(rv.created)) * 1000000
- fr = sys._getframe(4)
- callee = fr.f_locals.get('self')
- if callee:
- ctx = str(type(callee)).split("'")[1].split('.')[-1]
- else:
- ctx = '<top>'
- if not hasattr(rv, 'funcName'):
- rv.funcName = fr.f_code.co_name
- rv.lvlnam = logging.getLevelName(level)[0]
- rv.ctx = ctx
- return rv
-
- @classmethod
- def setup(cls, **kw):
- lbl = kw.get('label', "")
- if lbl:
- lbl = '(' + lbl + ')'
- lprm = {'datefmt': "%Y-%m-%d %H:%M:%S",
- 'format': "[%(asctime)s.%(nsecs)d] %(lvlnam)s [%(module)s" + lbl + ":%(lineno)s:%(funcName)s] %(ctx)s: %(message)s"}
- lprm.update(kw)
- lvl = kw.get('level', logging.INFO)
- lprm['level'] = lvl
- logging.root = cls("root", lvl)
- logging.setLoggerClass(cls)
- logging.getLogger().handlers = []
- logging.basicConfig(**lprm)
-
- @classmethod
- def _gsyncd_loginit(cls, **kw):
- lkw = {}
- if gconf.log_level:
- lkw['level'] = gconf.log_level
- if kw.get('log_file'):
- if kw['log_file'] in ('-', '/dev/stderr'):
- lkw['stream'] = sys.stderr
- elif kw['log_file'] == '/dev/stdout':
- lkw['stream'] = sys.stdout
- else:
- lkw['filename'] = kw['log_file']
-
- cls.setup(label=kw.get('label'), **lkw)
-
- lkw.update({'saved_label': kw.get('label')})
- gconf.log_metadata = lkw
- gconf.log_exit = True
-
-def startup(**kw):
- """set up logging, pidfile grabbing, daemonization"""
- if getattr(gconf, 'pid_file', None) and kw.get('go_daemon') != 'postconn':
- if not grabpidfile():
- sys.stderr.write("pidfile is taken, exiting.\n")
- sys.exit(2)
- gconf.pid_file_owned = True
-
- if kw.get('go_daemon') == 'should':
- x, y = os.pipe()
- gconf.cpid = os.fork()
- if gconf.cpid:
- os.close(x)
- sys.exit()
- os.close(y)
- os.setsid()
- dn = os.open(os.devnull, os.O_RDWR)
- for f in (sys.stdin, sys.stdout, sys.stderr):
- os.dup2(dn, f.fileno())
- if getattr(gconf, 'pid_file', None):
- if not grabpidfile(gconf.pid_file + '.tmp'):
- raise GsyncdError("cannot grab temporary pidfile")
- os.rename(gconf.pid_file + '.tmp', gconf.pid_file)
- # wait for parent to terminate
- # so we can start up with
- # no messing from the dirty
- # ol' bustard
- select((x,), (), ())
- os.close(x)
-
- GLogger._gsyncd_loginit(**kw)
-
-def main():
- """main routine, signal/exception handling boilerplates"""
- gconf.starttime = time.time()
- set_term_handler()
- GLogger.setup()
- excont = FreeObject(exval = 0)
- try:
- try:
- main_i()
- except:
- log_raise_exception(excont)
- finally:
- finalize(exval = excont.exval)
-
-def main_i():
- """internal main routine
-
- parse command line, decide what action will be taken;
- we can either:
- - query/manipulate configuration
- - format gsyncd urls using gsyncd's url parsing engine
- - start service in following modes, in given stages:
- - monitor: startup(), monitor()
- - master: startup(), connect_remote(), connect(), service_loop()
- - slave: startup(), connect(), service_loop()
- """
- rconf = {'go_daemon': 'should'}
-
- def store_abs(opt, optstr, val, parser):
- if val and val != '-':
- val = os.path.abspath(val)
- setattr(parser.values, opt.dest, val)
- def store_local(opt, optstr, val, parser):
- rconf[opt.dest] = val
- def store_local_curry(val):
- return lambda o, oo, vx, p: store_local(o, oo, val, p)
- def store_local_obj(op, dmake):
- return lambda o, oo, vx, p: store_local(o, oo, FreeObject(op=op, **dmake(vx)), p)
-
- op = OptionParser(usage="%prog [options...] <master> <slave>", version="%prog 0.0.1")
- op.add_option('--gluster-command-dir', metavar='DIR', default='')
- op.add_option('--gluster-log-file', metavar='LOGF', default=os.devnull, type=str, action='callback', callback=store_abs)
- op.add_option('--gluster-log-level', metavar='LVL')
- op.add_option('--gluster-params', metavar='PRMS', default='')
- op.add_option('--gluster-cli-options', metavar='OPTS', default='--log-file=-')
- op.add_option('--mountbroker', metavar='LABEL')
- op.add_option('-p', '--pid-file', metavar='PIDF', type=str, action='callback', callback=store_abs)
- op.add_option('-l', '--log-file', metavar='LOGF', type=str, action='callback', callback=store_abs)
- op.add_option('--state-file', metavar='STATF', type=str, action='callback', callback=store_abs)
- op.add_option('--ignore-deletes', default=False, action='store_true')
- op.add_option('-L', '--log-level', metavar='LVL')
- op.add_option('-r', '--remote-gsyncd', metavar='CMD', default=os.path.abspath(sys.argv[0]))
- op.add_option('--volume-id', metavar='UUID')
- op.add_option('--session-owner', metavar='ID')
- op.add_option('-s', '--ssh-command', metavar='CMD', default='ssh')
- op.add_option('--rsync-command', metavar='CMD', default='rsync')
- op.add_option('--timeout', metavar='SEC', type=int, default=120)
- op.add_option('--connection-timeout', metavar='SEC', type=int, default=60, help=SUPPRESS_HELP)
- op.add_option('--sync-jobs', metavar='N', type=int, default=3)
- op.add_option('--turns', metavar='N', type=int, default=0, help=SUPPRESS_HELP)
- op.add_option('--allow-network', metavar='IPS', default='')
- op.add_option('--state-socket-unencoded', metavar='SOCKF', type=str, action='callback', callback=store_abs)
- op.add_option('--checkpoint', metavar='LABEL', default='')
- # tunables for failover/failback mechanism:
- # None - gsyncd behaves as normal
- # blind - gsyncd works with xtime pairs to identify
- # candidates for synchronization
- # wrapup - same as normal mode but does not assign
- # xtimes to orphaned files
- # see crawl() for usage of the above tunables
- op.add_option('--special-sync-mode', type=str, help=SUPPRESS_HELP)
-
- op.add_option('-c', '--config-file', metavar='CONF', type=str, action='callback', callback=store_local)
- # duh. need to specify dest or value will be mapped to None :S
- op.add_option('--monitor', dest='monitor', action='callback', callback=store_local_curry(True))
- op.add_option('--feedback-fd', dest='feedback_fd', type=int, help=SUPPRESS_HELP, action='callback', callback=store_local)
- op.add_option('--listen', dest='listen', help=SUPPRESS_HELP, action='callback', callback=store_local_curry(True))
- op.add_option('-N', '--no-daemon', dest="go_daemon", action='callback', callback=store_local_curry('dont'))
- op.add_option('--debug', dest="go_daemon", action='callback', callback=lambda *a: (store_local_curry('dont')(*a),
- setattr(a[-1].values, 'log_file', '-'),
- setattr(a[-1].values, 'log_level', 'DEBUG'))),
-
- for a in ('check', 'get'):
- op.add_option('--config-' + a, metavar='OPT', type=str, dest='config', action='callback',
- callback=store_local_obj(a, lambda vx: {'opt': vx}))
- op.add_option('--config-get-all', dest='config', action='callback', callback=store_local_obj('get', lambda vx: {'opt': None}))
- for m in ('', '-rx', '-glob'):
- # call this code 'Pythonic' eh?
- # have to define a one-shot local function to be able to inject (a value depending on the)
- # iteration variable into the inner lambda
- def conf_mod_opt_regex_variant(rx):
- op.add_option('--config-set' + m, metavar='OPT VAL', type=str, nargs=2, dest='config', action='callback',
- callback=store_local_obj('set', lambda vx: {'opt': vx[0], 'val': vx[1], 'rx': rx}))
- op.add_option('--config-del' + m, metavar='OPT', type=str, dest='config', action='callback',
- callback=store_local_obj('del', lambda vx: {'opt': vx, 'rx': rx}))
- conf_mod_opt_regex_variant(m and m[1:] or False)
-
- op.add_option('--normalize-url', dest='url_print', action='callback', callback=store_local_curry('normal'))
- op.add_option('--canonicalize-url', dest='url_print', action='callback', callback=store_local_curry('canon'))
- op.add_option('--canonicalize-escape-url', dest='url_print', action='callback', callback=store_local_curry('canon_esc'))
-
- tunables = [ norm(o.get_opt_string()[2:]) for o in op.option_list if o.callback in (store_abs, 'store_true', None) and o.get_opt_string() not in ('--version', '--help') ]
- remote_tunables = [ 'listen', 'go_daemon', 'timeout', 'session_owner', 'config_file' ]
- rq_remote_tunables = { 'listen': True }
-
- # precedence for sources of values: 1) commandline, 2) cfg file, 3) defaults
- # -- for this to work out we need to tell apart defaults from explicitly set
- # options... so churn out the defaults here and call the parser with virgin
- # values container.
- defaults = op.get_default_values()
- opts, args = op.parse_args(values=optparse.Values())
- confdata = rconf.get('config')
- if not (len(args) == 2 or \
- (len(args) == 1 and rconf.get('listen')) or \
- (len(args) <= 2 and confdata) or \
- rconf.get('url_print')):
- sys.stderr.write("error: incorrect number of arguments\n\n")
- sys.stderr.write(op.get_usage() + "\n")
- sys.exit(1)
-
- restricted = os.getenv('_GSYNCD_RESTRICTED_')
-
- if restricted:
- allopts = {}
- allopts.update(opts.__dict__)
- allopts.update(rconf)
- bannedtuns = set(allopts.keys()) - set(remote_tunables)
- if bannedtuns:
- raise GsyncdError('following tunables cannot be set with restricted SSH invocaton: ' + \
- ', '.join(bannedtuns))
- for k, v in rq_remote_tunables.items():
- if not k in allopts or allopts[k] != v:
- raise GsyncdError('tunable %s is not set to value %s required for restricted SSH invocaton' % \
- (k, v))
-
- confrx = getattr(confdata, 'rx', None)
- if confrx:
- # peers are regexen, don't try to parse them
- if confrx == 'glob':
- args = [ '\A' + fnmatch.translate(a) for a in args ]
- canon_peers = args
- namedict = {}
- else:
- rscs = [resource.parse_url(u) for u in args]
- dc = rconf.get('url_print')
- if dc:
- for r in rscs:
- print(r.get_url(**{'normal': {},
- 'canon': {'canonical': True},
- 'canon_esc': {'canonical': True, 'escaped': True}}[dc]))
- return
- local = remote = None
- if rscs:
- local = rscs[0]
- if len(rscs) > 1:
- remote = rscs[1]
- if not local.can_connect_to(remote):
- raise GsyncdError("%s cannot work with %s" % (local.path, remote and remote.path))
- pa = ([], [], [])
- urlprms = ({}, {'canonical': True}, {'canonical': True, 'escaped': True})
- for x in rscs:
- for i in range(len(pa)):
- pa[i].append(x.get_url(**urlprms[i]))
- peers, canon_peers, canon_esc_peers = pa
- # creating the namedict, a dict representing various ways of referring to / repreenting
- # peers to be fillable in config templates
- mods = (lambda x: x, lambda x: x[0].upper() + x[1:], lambda x: 'e' + x[0].upper() + x[1:])
- if remote:
- rmap = { local: ('local', 'master'), remote: ('remote', 'slave') }
- else:
- rmap = { local: ('local', 'slave') }
- namedict = {}
- for i in range(len(rscs)):
- x = rscs[i]
- for name in rmap[x]:
- for j in range(3):
- namedict[mods[j](name)] = pa[j][i]
- if x.scheme == 'gluster':
- namedict[name + 'vol'] = x.volume
- if not 'config_file' in rconf:
- rconf['config_file'] = os.path.join(os.path.dirname(sys.argv[0]), "conf/gsyncd.conf")
- gcnf = GConffile(rconf['config_file'], canon_peers, defaults.__dict__, opts.__dict__, namedict)
-
- checkpoint_change = False
- if confdata:
- opt_ok = norm(confdata.opt) in tunables + [None]
- if confdata.op == 'check':
- if opt_ok:
- sys.exit(0)
- else:
- sys.exit(1)
- elif not opt_ok:
- raise GsyncdError("not a valid option: " + confdata.opt)
- if confdata.op == 'get':
- gcnf.get(confdata.opt)
- elif confdata.op == 'set':
- gcnf.set(confdata.opt, confdata.val, confdata.rx)
- elif confdata.op == 'del':
- gcnf.delete(confdata.opt, confdata.rx)
- # when modifying checkpoint, it's important to make a log
- # of that, so in that case we go on to set up logging even
- # if its just config invocation
- if confdata.opt == 'checkpoint' and confdata.op in ('set', 'del') and \
- not confdata.rx:
- checkpoint_change = True
- if not checkpoint_change:
- return
-
- gconf.__dict__.update(defaults.__dict__)
- gcnf.update_to(gconf.__dict__)
- gconf.__dict__.update(opts.__dict__)
- gconf.configinterface = gcnf
-
- if restricted and gconf.allow_network:
- ssh_conn = os.getenv('SSH_CONNECTION')
- if not ssh_conn:
- #legacy env var
- ssh_conn = os.getenv('SSH_CLIENT')
- if ssh_conn:
- allowed_networks = [ IPNetwork(a) for a in gconf.allow_network.split(',') ]
- client_ip = IPAddress(ssh_conn.split()[0])
- allowed = False
- for nw in allowed_networks:
- if client_ip in nw:
- allowed = True
- break
- if not allowed:
- raise GsyncdError("client IP address is not allowed")
-
- ffd = rconf.get('feedback_fd')
- if ffd:
- fcntl.fcntl(ffd, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
-
- #normalize loglevel
- lvl0 = gconf.log_level
- if isinstance(lvl0, str):
- lvl1 = lvl0.upper()
- lvl2 = logging.getLevelName(lvl1)
- # I have _never_ _ever_ seen such an utterly braindead
- # error condition
- if lvl2 == "Level " + lvl1:
- raise GsyncdError('cannot recognize log level "%s"' % lvl0)
- gconf.log_level = lvl2
-
- if checkpoint_change:
- GLogger._gsyncd_loginit(log_file=gconf.log_file, label='conf')
- if confdata.op == 'set':
- logging.info('checkpoint %s set' % confdata.val)
- elif confdata.op == 'del':
- logging.info('checkpoint info was reset')
- return
-
- go_daemon = rconf['go_daemon']
- be_monitor = rconf.get('monitor')
-
- if not be_monitor and isinstance(remote, resource.SSH) and \
- go_daemon == 'should':
- go_daemon = 'postconn'
- log_file = None
- else:
- log_file = gconf.log_file
- if be_monitor:
- label = 'monitor'
- elif remote:
- #master
- label = ''
- else:
- label = 'slave'
- startup(go_daemon=go_daemon, log_file=log_file, label=label)
-
- if be_monitor:
- return monitor()
-
- logging.info("syncing: %s" % " -> ".join(peers))
- resource.Popen.init_errhandler()
- if remote:
- go_daemon = remote.connect_remote(go_daemon=go_daemon)
- if go_daemon:
- startup(go_daemon=go_daemon, log_file=gconf.log_file)
- # complete remote connection in child
- remote.connect_remote(go_daemon='done')
- local.connect()
- if ffd:
- os.close(ffd)
- local.service_loop(*[r for r in [remote] if r])
-
-
-if __name__ == "__main__":
- main()
diff --git a/xlators/features/marker/utils/syncdaemon/libcxattr.py b/xlators/features/marker/utils/syncdaemon/libcxattr.py
deleted file mode 100644
index f0a9d22920a..00000000000
--- a/xlators/features/marker/utils/syncdaemon/libcxattr.py
+++ /dev/null
@@ -1,72 +0,0 @@
-import os
-from ctypes import *
-from ctypes.util import find_library
-
-class Xattr(object):
- """singleton that wraps the extended attribues system
- interface for python using ctypes
-
- Just implement it to the degree we need it, in particular
- - we need just the l*xattr variants, ie. we never want symlinks to be
- followed
- - don't need size discovery for getxattr, as we always know the exact
- sizes we expect
- """
-
- libc = CDLL(find_library("libc"))
-
- @classmethod
- def geterrno(cls):
- return c_int.in_dll(cls.libc, 'errno').value
-
- @classmethod
- def raise_oserr(cls):
- errn = cls.geterrno()
- raise OSError(errn, os.strerror(errn))
-
- @classmethod
- def _query_xattr(cls, path, siz, syscall, *a):
- if siz:
- buf = create_string_buffer('\0' * siz)
- else:
- buf = None
- ret = getattr(cls.libc, syscall)(*((path,) + a + (buf, siz)))
- if ret == -1:
- cls.raise_oserr()
- if siz:
- return buf.raw[:ret]
- else:
- return ret
-
- @classmethod
- def lgetxattr(cls, path, attr, siz=0):
- return cls._query_xattr( path, siz, 'lgetxattr', attr)
-
- @classmethod
- def llistxattr(cls, path, siz=0):
- ret = cls._query_xattr(path, siz, 'llistxattr')
- if isinstance(ret, str):
- ret = ret.split('\0')
- return ret
-
- @classmethod
- def lsetxattr(cls, path, attr, val):
- ret = cls.libc.lsetxattr(path, attr, val, len(val), 0)
- if ret == -1:
- cls.raise_oserr()
-
- @classmethod
- def lremovexattr(cls, path, attr):
- ret = cls.libc.lremovexattr(path, attr)
- if ret == -1:
- cls.raise_oserr()
-
- @classmethod
- def llistxattr_buf(cls, path):
- """listxattr variant with size discovery"""
- size = cls.llistxattr(path)
- if size == -1:
- cls.raise_oserr()
- if size == 0:
- return []
- return cls.llistxattr(path, size)
diff --git a/xlators/features/marker/utils/syncdaemon/master.py b/xlators/features/marker/utils/syncdaemon/master.py
deleted file mode 100644
index 945ebb75dd2..00000000000
--- a/xlators/features/marker/utils/syncdaemon/master.py
+++ /dev/null
@@ -1,886 +0,0 @@
-import os
-import sys
-import time
-import stat
-import random
-import signal
-import logging
-import socket
-import errno
-from errno import ENOENT, ENODATA, EPIPE
-from threading import currentThread, Condition, Lock
-from datetime import datetime
-try:
- from hashlib import md5 as md5
-except ImportError:
- # py 2.4
- from md5 import new as md5
-
-from gconf import gconf
-from syncdutils import FreeObject, Thread, GsyncdError, boolify, \
- escape, unescape, select
-
-URXTIME = (-1, 0)
-
-# Utility functions to help us to get to closer proximity
-# of the DRY principle (no, don't look for elevated or
-# perspectivistic things here)
-
-def _xtime_now():
- t = time.time()
- sec = int(t)
- nsec = int((t - sec) * 1000000)
- return (sec, nsec)
-
-def _volinfo_hook_relax_foreign(self):
- volinfo_sys = self.get_sys_volinfo()
- fgn_vi = volinfo_sys[self.KFGN]
- if fgn_vi:
- expiry = fgn_vi['timeout'] - int(time.time()) + 1
- logging.info('foreign volume info found, waiting %d sec for expiry' % \
- expiry)
- time.sleep(expiry)
- volinfo_sys = self.get_sys_volinfo()
- self.volinfo_state, state_change = self.volinfo_state_machine(self.volinfo_state,
- volinfo_sys)
- if self.inter_master:
- raise GsyncdError("cannot be intermediate master in special mode")
- return (volinfo_sys, state_change)
-
-
-# The API!
-
-def gmaster_builder():
- """produce the GMaster class variant corresponding
- to sync mode"""
- this = sys.modules[__name__]
- mixin = gconf.special_sync_mode
- if not mixin:
- mixin = 'normal'
- logging.info('setting up master for %s sync mode' % mixin)
- mixin = getattr(this, mixin.capitalize() + 'Mixin')
- class _GMaster(GMasterBase, mixin):
- pass
- return _GMaster
-
-
-# Mixin classes that implement the data format
-# and logic particularities of the certain
-# sync modes
-
-class NormalMixin(object):
- """normal geo-rep behavior"""
-
- minus_infinity = URXTIME
-
- # following staticmethods ideally would be
- # methods of an xtime object (in particular,
- # implementing the hooks needed for comparison
- # operators), but at this point we don't yet
- # have a dedicated xtime class
-
- @staticmethod
- def serialize_xtime(xt):
- return "%d.%d" % tuple(xt)
-
- @staticmethod
- def deserialize_xtime(xt):
- return tuple(int(x) for x in xt.split("."))
-
- @staticmethod
- def native_xtime(xt):
- return xt
-
- @staticmethod
- def xtime_geq(xt0, xt1):
- return xt0 >= xt1
-
- def make_xtime_opts(self, is_master, opts):
- if not 'create' in opts:
- opts['create'] = is_master and not self.inter_master
- if not 'default_xtime' in opts:
- if is_master and self.inter_master:
- opts['default_xtime'] = ENODATA
- else:
- opts['default_xtime'] = URXTIME
-
- def xtime_low(self, server, path, **opts):
- xt = server.xtime(path, self.uuid)
- if isinstance(xt, int) and xt != ENODATA:
- return xt
- if xt == ENODATA or xt < self.volmark:
- if opts['create']:
- xt = _xtime_now()
- server.set_xtime(path, self.uuid, xt)
- else:
- xt = opts['default_xtime']
- return xt
-
- def keepalive_payload_hook(self, timo, gap):
- # first grab a reference as self.volinfo
- # can be changed in main thread
- vi = self.volinfo
- if vi:
- # then have a private copy which we can mod
- vi = vi.copy()
- vi['timeout'] = int(time.time()) + timo
- else:
- # send keep-alives more frequently to
- # avoid a delay in announcing our volume info
- # to slave if it becomes established in the
- # meantime
- gap = min(10, gap)
- return (vi, gap)
-
- def volinfo_hook(self):
- volinfo_sys = self.get_sys_volinfo()
- self.volinfo_state, state_change = self.volinfo_state_machine(self.volinfo_state,
- volinfo_sys)
- return (volinfo_sys, state_change)
-
- def xtime_reversion_hook(self, path, xtl, xtr):
- if xtr > xtl:
- raise GsyncdError("timestamp corruption for " + path)
-
- def need_sync(self, e, xte, xtrd):
- return xte > xtrd
-
- def set_slave_xtime(self, path, mark):
- self.slave.server.set_xtime(path, self.uuid, mark)
-
-class WrapupMixin(NormalMixin):
- """a variant that differs from normal in terms
- of ignoring non-indexed files"""
-
- @staticmethod
- def make_xtime_opts(is_master, opts):
- if not 'create' in opts:
- opts['create'] = False
- if not 'default_xtime' in opts:
- opts['default_xtime'] = URXTIME
-
- @staticmethod
- def keepalive_payload_hook(timo, gap):
- return (None, gap)
-
- def volinfo_hook(self):
- return _volinfo_hook_relax_foreign(self)
-
-class BlindMixin(object):
- """Geo-rep flavor using vectored xtime.
-
- Coordinates are the master, slave uuid pair;
- in master coordinate behavior is normal,
- in slave coordinate we force synchronization
- on any value difference (these are in disjunctive
- relation, ie. if either orders the entry to be
- synced, it shall be synced.
- """
-
- minus_infinity = (URXTIME, None)
-
- @staticmethod
- def serialize_xtime(xt):
- a = []
- for x in xt:
- if not x:
- x = ('None', '')
- a.extend(x)
- return '.'.join(str(n) for n in a)
-
- @staticmethod
- def deserialize_xtime(xt):
- a = xt.split(".")
- a = (tuple(a[0:2]), tuple(a[3:4]))
- b = []
- for p in a:
- if p[0] == 'None':
- p = None
- else:
- p = tuple(int(x) for x in p)
- b.append(p)
- return tuple(b)
-
- @staticmethod
- def native_xtime(xt):
- return xt[0]
-
- @staticmethod
- def xtime_geq(xt0, xt1):
- return (not xt1[0] or xt0[0] >= xt1[0]) and \
- (not xt1[1] or xt0[1] >= xt1[1])
-
- @property
- def ruuid(self):
- if self.volinfo_r:
- return self.volinfo_r['uuid']
-
- @staticmethod
- def make_xtime_opts(is_master, opts):
- if not 'create' in opts:
- opts['create'] = is_master
- if not 'default_xtime' in opts:
- opts['default_xtime'] = URXTIME
-
- def xtime_low(self, server, path, **opts):
- xtd = server.xtime_vec(path, self.uuid, self.ruuid)
- if isinstance(xtd, int):
- return xtd
- xt = (xtd[self.uuid], xtd[self.ruuid])
- if not xt[1] and (not xt[0] or xt[0] < self.volmark):
- if opts['create']:
- # not expected, but can happen if file originates
- # from interrupted gsyncd transfer
- logging.warn('have to fix up missing xtime on ' + path)
- xt0 = _xtime_now()
- server.set_xtime(path, self.uuid, xt0)
- else:
- xt0 = opts['default_xtime']
- xt = (xt0, xt[1])
- return xt
-
- @staticmethod
- def keepalive_payload_hook(timo, gap):
- return (None, gap)
-
- def volinfo_hook(self):
- res = _volinfo_hook_relax_foreign(self)
- volinfo_r_new = self.slave.server.native_volume_info()
- if volinfo_r_new['retval']:
- raise GsyncdError("slave is corrupt")
- if getattr(self, 'volinfo_r', None):
- if self.volinfo_r['uuid'] != volinfo_r_new['uuid']:
- raise GsyncdError("uuid mismatch on slave")
- self.volinfo_r = volinfo_r_new
- return res
-
- def xtime_reversion_hook(self, path, xtl, xtr):
- if not isinstance(xtr[0], int) and \
- (isinstance(xtl[0], int) or xtr[0] > xtl[0]):
- raise GsyncdError("timestamp corruption for " + path)
-
- def need_sync(self, e, xte, xtrd):
- if xte[0]:
- if not xtrd[0] or xte[0] > xtrd[0]:
- # there is outstanding diff at 0th pos,
- # we can short-cut to true
- return True
- # we arrived to this point by either of these
- # two possiblilites:
- # - no outstanding difference at 0th pos,
- # wanna see 1st pos if he raises veto
- # against "no need to sync" proposal
- # - no data at 0th pos, 1st pos will have
- # to decide (due to xtime assignment,
- # in this case 1st pos does carry data
- # -- iow, if 1st pos did not have data,
- # and 0th neither, 0th would have been
- # force-feeded)
- if not xte[1]:
- # no data, no veto
- return False
- # the hard work: for 1st pos,
- # the conduct is fetch corresponding
- # slave data and do a "blind" comparison
- # (ie. do not care who is newer, we trigger
- # sync on non-identical xitmes)
- xtr = self.xtime(e, self.slave)
- return isinstance(xtr, int) or xte[1] != xtr[1]
-
- def set_slave_xtime(self, path, mark):
- xtd = {}
- for (u, t) in zip((self.uuid, self.ruuid), mark):
- if t:
- xtd[u] = t
- self.slave.server.set_xtime_vec(path, xtd)
-
-
-class GMasterBase(object):
- """abstract class impementling master role"""
-
- KFGN = 0
- KNAT = 1
-
- def get_sys_volinfo(self):
- """query volume marks on fs root
-
- err out on multiple foreign masters
- """
- fgn_vis, nat_vi = self.master.server.foreign_volume_infos(), \
- self.master.server.native_volume_info()
- fgn_vi = None
- if fgn_vis:
- if len(fgn_vis) > 1:
- raise GsyncdError("cannot work with multiple foreign masters")
- fgn_vi = fgn_vis[0]
- return fgn_vi, nat_vi
-
- @property
- def uuid(self):
- if self.volinfo:
- return self.volinfo['uuid']
-
- @property
- def volmark(self):
- if self.volinfo:
- return self.volinfo['volume_mark']
-
- @property
- def inter_master(self):
- """decide if we are an intermediate master
- in a cascading setup
- """
- return self.volinfo_state[self.KFGN] and True or False
-
- def xtime(self, path, *a, **opts):
- """get amended xtime
-
- as of amending, we can create missing xtime, or
- determine a valid value if what we get is expired
- (as of the volume mark expiry); way of amendig
- depends on @opts and on subject of query (master
- or slave).
- """
- if a:
- rsc = a[0]
- else:
- rsc = self.master
- self.make_xtime_opts(rsc == self.master, opts)
- return self.xtime_low(rsc.server, path, **opts)
-
- def __init__(self, master, slave):
- self.master = master
- self.slave = slave
- self.jobtab = {}
- self.syncer = Syncer(slave)
- # crawls vs. turns:
- # - self.crawls is simply the number of crawl() invocations on root
- # - one turn is a maximal consecutive sequence of crawls so that each
- # crawl in it detects a change to be synced
- # - self.turns is the number of turns since start
- # - self.total_turns is a limit so that if self.turns reaches it, then
- # we exit (for diagnostic purposes)
- # so, eg., if the master fs changes unceasingly, self.turns will remain 0.
- self.crawls = 0
- self.turns = 0
- self.total_turns = int(gconf.turns)
- self.lastreport = {'crawls': 0, 'turns': 0}
- self.start = None
- self.change_seen = None
- # the authoritative (foreign, native) volinfo pair
- # which lets us deduce what to do when we refetch
- # the volinfos from system
- uuid_preset = getattr(gconf, 'volume_id', None)
- self.volinfo_state = (uuid_preset and {'uuid': uuid_preset}, None)
- # the actual volinfo we make use of
- self.volinfo = None
- self.terminate = False
- self.checkpoint_thread = None
-
- @classmethod
- def _checkpt_param(cls, chkpt, prm, xtimish=True):
- """use config backend to lookup a parameter belonging to
- checkpoint @chkpt"""
- cprm = getattr(gconf, 'checkpoint_' + prm, None)
- if not cprm:
- return
- chkpt_mapped, val = cprm.split(':', 1)
- if unescape(chkpt_mapped) != chkpt:
- return
- if xtimish:
- val = cls.deserialize_xtime(val)
- return val
-
- @classmethod
- def _set_checkpt_param(cls, chkpt, prm, val, xtimish=True):
- """use config backend to store a parameter associated
- with checkpoint @chkpt"""
- if xtimish:
- val = cls.serialize_xtime(val)
- gconf.configinterface.set('checkpoint_' + prm, "%s:%s" % (escape(chkpt), val))
-
- @staticmethod
- def humantime(*tpair):
- """format xtime-like (sec, nsec) pair to human readable format"""
- ts = datetime.fromtimestamp(float('.'.join(str(n) for n in tpair))).\
- strftime("%Y-%m-%d %H:%M:%S")
- if len(tpair) > 1:
- ts += '.' + str(tpair[1])
- return ts
-
- def checkpt_service(self, chan, chkpt, tgt):
- """checkpoint service loop
-
- monitor and verify checkpoint status for @chkpt, and listen
- for incoming requests for whom we serve a pretty-formatted
- status report"""
- if not chkpt:
- # dummy loop for the case when there is no checkpt set
- while True:
- select([chan], [], [])
- conn, _ = chan.accept()
- conn.send('\0')
- conn.close()
- completed = self._checkpt_param(chkpt, 'completed', xtimish=False)
- if completed:
- completed = tuple(int(x) for x in completed.split('.'))
- while True:
- s,_,_ = select([chan], [], [], (not completed) and 5 or None)
- # either request made and we re-check to not
- # give back stale data, or we still hunting for completion
- if self.native_xtime(tgt) and self.native_xtime(tgt) < self.volmark:
- # indexing has been reset since setting the checkpoint
- status = "is invalid"
- else:
- xtr = self.xtime('.', self.slave)
- if isinstance(xtr, int):
- raise GsyncdError("slave root directory is unaccessible (%s)",
- os.strerror(xtr))
- ncompleted = self.xtime_geq(xtr, tgt)
- if completed and not ncompleted: # stale data
- logging.warn("completion time %s for checkpoint %s became stale" % \
- (self.humantime(*completed), chkpt))
- completed = None
- gconf.confdata.delete('checkpoint-completed')
- if ncompleted and not completed: # just reaching completion
- completed = "%.6f" % time.time()
- self._set_checkpt_param(chkpt, 'completed', completed, xtimish=False)
- completed = tuple(int(x) for x in completed.split('.'))
- logging.info("checkpoint %s completed" % chkpt)
- status = completed and \
- "completed at " + self.humantime(completed[0]) or \
- "not reached yet"
- if s:
- conn = None
- try:
- conn, _ = chan.accept()
- try:
- conn.send(" | checkpoint %s %s\0" % (chkpt, status))
- except:
- exc = sys.exc_info()[1]
- if (isinstance(exc, OSError) or isinstance(exc, IOError)) and \
- exc.errno == EPIPE:
- logging.debug('checkpoint client disconnected')
- else:
- raise
- finally:
- if conn:
- conn.close()
-
- def start_checkpoint_thread(self):
- """prepare and start checkpoint service"""
- if self.checkpoint_thread or not getattr(gconf, 'state_socket_unencoded', None):
- return
- chan = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
- state_socket = "/tmp/%s.socket" % md5(gconf.state_socket_unencoded).hexdigest()
- try:
- os.unlink(state_socket)
- except:
- if sys.exc_info()[0] == OSError:
- pass
- chan.bind(state_socket)
- chan.listen(1)
- checkpt_tgt = None
- if gconf.checkpoint:
- checkpt_tgt = self._checkpt_param(gconf.checkpoint, 'target')
- if not checkpt_tgt:
- checkpt_tgt = self.xtime('.')
- if isinstance(checkpt_tgt, int):
- raise GsyncdError("master root directory is unaccessible (%s)",
- os.strerror(checkpt_tgt))
- self._set_checkpt_param(gconf.checkpoint, 'target', checkpt_tgt)
- logging.debug("checkpoint target %s has been determined for checkpoint %s" % \
- (repr(checkpt_tgt), gconf.checkpoint))
- t = Thread(target=self.checkpt_service, args=(chan, gconf.checkpoint, checkpt_tgt))
- t.start()
- self.checkpoint_thread = t
-
- def crawl_loop(self):
- """start the keep-alive thread and iterate .crawl"""
- timo = int(gconf.timeout or 0)
- if timo > 0:
- def keep_alive():
- while True:
- vi, gap = self.keepalive_payload_hook(timo, timo * 0.5)
- self.slave.server.keep_alive(vi)
- time.sleep(gap)
- t = Thread(target=keep_alive)
- t.start()
- self.lastreport['time'] = time.time()
- while not self.terminate:
- self.crawl()
-
- def add_job(self, path, label, job, *a, **kw):
- """insert @job function to job table at @path with @label"""
- if self.jobtab.get(path) == None:
- self.jobtab[path] = []
- self.jobtab[path].append((label, a, lambda : job(*a, **kw)))
-
- def add_failjob(self, path, label):
- """invoke .add_job with a job that does nothing just fails"""
- logging.debug('salvaged: ' + label)
- self.add_job(path, label, lambda: False)
-
- def wait(self, path, *args):
- """perform jobs registered for @path
-
- Reset jobtab entry for @path,
- determine success as the conjuction of
- success of all the jobs. In case of
- success, call .sendmark on @path
- """
- jobs = self.jobtab.pop(path, [])
- succeed = True
- for j in jobs:
- ret = j[-1]()
- if not ret:
- succeed = False
- if succeed:
- self.sendmark(path, *args)
- return succeed
-
- def sendmark(self, path, mark, adct=None):
- """update slave side xtime for @path to master side xtime
-
- also can send a setattr payload (see Server.setattr).
- """
- if adct:
- self.slave.server.setattr(path, adct)
- self.set_slave_xtime(path, mark)
-
- @staticmethod
- def volinfo_state_machine(volinfo_state, volinfo_sys):
- """compute new volinfo_state from old one and incoming
- as of current system state, also indicating if there was a
- change regarding which volume mark is the authoritative one
-
- @volinfo_state, @volinfo_sys are pairs of volume mark dicts
- (foreign, native).
-
- Note this method is marked as static, ie. the computation is
- pure, without reliance on any excess implicit state. State
- transitions which are deemed as ambiguous or banned will raise
- an exception.
-
- """
- # store the value below "boxed" to emulate proper closures
- # (variables of the enclosing scope are available inner functions
- # provided they are no reassigned; mutation is OK).
- param = FreeObject(relax_mismatch = False, state_change = None, index=-1)
- def select_vi(vi0, vi):
- param.index += 1
- if vi and (not vi0 or vi0['uuid'] == vi['uuid']):
- if not vi0 and not param.relax_mismatch:
- param.state_change = param.index
- # valid new value found; for the rest, we are graceful about
- # uuid mismatch
- param.relax_mismatch = True
- return vi
- if vi0 and vi and vi0['uuid'] != vi['uuid'] and not param.relax_mismatch:
- # uuid mismatch for master candidate, bail out
- raise GsyncdError("aborting on uuid change from %s to %s" % \
- (vi0['uuid'], vi['uuid']))
- # fall back to old
- return vi0
- newstate = tuple(select_vi(*vip) for vip in zip(volinfo_state, volinfo_sys))
- srep = lambda vi: vi and vi['uuid'][0:8]
- logging.debug('(%s, %s) << (%s, %s) -> (%s, %s)' % \
- tuple(srep(vi) for vi in volinfo_state + volinfo_sys + newstate))
- return newstate, param.state_change
-
- def crawl(self, path='.', xtl=None):
- """crawling...
-
- Standing around
- All the right people
- Crawling
- Tennis on Tuesday
- The ladder is long
- It is your nature
- You've gotta suntan
- Football on Sunday
- Society boy
-
- Recursively walk the master side tree and check if updates are
- needed due to xtime differences. One invocation of crawl checks
- children of @path and do a recursive enter only on
- those directory children where there is an update needed.
-
- Way of updates depend on file type:
- - for symlinks, sync them directy and synchronously
- - for regular children, register jobs for @path (cf. .add_job) to start
- and wait on their rsync
- - for directory children, register a job for @path which waits (.wait)
- on jobs for the given child
- (other kind of filesystem nodes are not considered)
-
- Those slave side children which do not exist on master are simply
- purged (see Server.purge).
-
- Behavior is fault tolerant, synchronization is adaptive: if some action fails,
- just go on relentlessly, adding a fail job (see .add_failjob) which will prevent
- the .sendmark on @path, so when the next crawl will arrive to @path it will not
- see it as up-to-date and will try to sync it again. While this semantics can be
- supported by funky design principles (http://c2.com/cgi/wiki?LazinessImpatienceHubris),
- the ultimate reason which excludes other possibilities is simply transience: we cannot
- assert that the file systems (master / slave) underneath do not change and actions
- taken upon some condition will not lose their context by the time they are performed.
- """
- if path == '.':
- if self.start:
- self.crawls += 1
- logging.debug("... crawl #%d done, took %.6f seconds" % \
- (self.crawls, time.time() - self.start))
- time.sleep(1)
- self.start = time.time()
- should_display_info = self.start - self.lastreport['time'] >= 60
- if should_display_info:
- logging.info("completed %d crawls, %d turns",
- self.crawls - self.lastreport['crawls'],
- self.turns - self.lastreport['turns'])
- self.lastreport.update(crawls = self.crawls,
- turns = self.turns,
- time = self.start)
- volinfo_sys, state_change = self.volinfo_hook()
- if self.inter_master:
- self.volinfo = volinfo_sys[self.KFGN]
- else:
- self.volinfo = volinfo_sys[self.KNAT]
- if state_change == self.KFGN or (state_change == self.KNAT and not self.inter_master):
- logging.info('new master is %s', self.uuid)
- if self.volinfo:
- logging.info("%s master with volume id %s ..." % \
- (self.inter_master and "intermediate" or "primary",
- self.uuid))
- if state_change == self.KFGN:
- gconf.configinterface.set('volume_id', self.uuid)
- if self.volinfo:
- if self.volinfo['retval']:
- raise GsyncdError ("master is corrupt")
- self.start_checkpoint_thread()
- else:
- if should_display_info or self.crawls == 0:
- if self.inter_master:
- logging.info("waiting for being synced from %s ..." % \
- self.volinfo_state[self.KFGN]['uuid'])
- else:
- logging.info("waiting for volume info ...")
- return
- logging.debug("entering " + path)
- if not xtl:
- xtl = self.xtime(path)
- if isinstance(xtl, int):
- self.add_failjob(path, 'no-local-node')
- return
- xtr = self.xtime(path, self.slave)
- if isinstance(xtr, int):
- if xtr != ENOENT:
- self.slave.server.purge(path)
- try:
- self.slave.server.mkdir(path)
- except OSError:
- self.add_failjob(path, 'no-remote-node')
- return
- xtr = self.minus_infinity
- else:
- self.xtime_reversion_hook(path, xtl, xtr)
- if xtl == xtr:
- if path == '.' and self.change_seen:
- self.turns += 1
- self.change_seen = False
- if self.total_turns:
- logging.info("finished turn #%s/%s" % \
- (self.turns, self.total_turns))
- if self.turns == self.total_turns:
- logging.info("reached turn limit")
- self.terminate = True
- return
- if path == '.':
- self.change_seen = True
- try:
- dem = self.master.server.entries(path)
- except OSError:
- self.add_failjob(path, 'local-entries-fail')
- return
- random.shuffle(dem)
- try:
- des = self.slave.server.entries(path)
- except OSError:
- self.slave.server.purge(path)
- try:
- self.slave.server.mkdir(path)
- des = self.slave.server.entries(path)
- except OSError:
- self.add_failjob(path, 'remote-entries-fail')
- return
- dd = set(des) - set(dem)
- if dd and not boolify(gconf.ignore_deletes):
- self.slave.server.purge(path, dd)
- chld = []
- for e in dem:
- e = os.path.join(path, e)
- xte = self.xtime(e)
- if isinstance(xte, int):
- logging.warn("irregular xtime for %s: %s" % (e, errno.errorcode[xte]))
- elif self.need_sync(e, xte, xtr):
- chld.append((e, xte))
- def indulgently(e, fnc, blame=None):
- if not blame:
- blame = path
- try:
- return fnc(e)
- except (IOError, OSError):
- ex = sys.exc_info()[1]
- if ex.errno == ENOENT:
- logging.warn("salvaged ENOENT for " + e)
- self.add_failjob(blame, 'by-indulgently')
- return False
- else:
- raise
- for e, xte in chld:
- st = indulgently(e, lambda e: os.lstat(e))
- if st == False:
- continue
- mo = st.st_mode
- adct = {'own': (st.st_uid, st.st_gid)}
- if stat.S_ISLNK(mo):
- if indulgently(e, lambda e: self.slave.server.symlink(os.readlink(e), e)) == False:
- continue
- self.sendmark(e, xte, adct)
- elif stat.S_ISREG(mo):
- logging.debug("syncing %s ..." % e)
- pb = self.syncer.add(e)
- def regjob(e, xte, pb):
- if pb.wait():
- logging.debug("synced " + e)
- self.sendmark(e, xte)
- return True
- else:
- logging.warn("failed to sync " + e)
- self.add_job(path, 'reg', regjob, e, xte, pb)
- elif stat.S_ISDIR(mo):
- adct['mode'] = mo
- if indulgently(e, lambda e: (self.add_job(path, 'cwait', self.wait, e, xte, adct),
- self.crawl(e, xte),
- True)[-1], blame=e) == False:
- continue
- else:
- # ignore fifos, sockets and special files
- pass
- if path == '.':
- self.wait(path, xtl)
-
-class BoxClosedErr(Exception):
- pass
-
-class PostBox(list):
- """synchronized collection for storing things thought of as "requests" """
-
- def __init__(self, *a):
- list.__init__(self, *a)
- # too bad Python stdlib does not have read/write locks...
- # it would suffivce to grab the lock in .append as reader, in .close as writer
- self.lever = Condition()
- self.open = True
- self.done = False
-
- def wait(self):
- """wait on requests to be processed"""
- self.lever.acquire()
- if not self.done:
- self.lever.wait()
- self.lever.release()
- return self.result
-
- def wakeup(self, data):
- """wake up requestors with the result"""
- self.result = data
- self.lever.acquire()
- self.done = True
- self.lever.notifyAll()
- self.lever.release()
-
- def append(self, e):
- """post a request"""
- self.lever.acquire()
- if not self.open:
- raise BoxClosedErr
- list.append(self, e)
- self.lever.release()
-
- def close(self):
- """prohibit the posting of further requests"""
- self.lever.acquire()
- self.open = False
- self.lever.release()
-
-class Syncer(object):
- """a staged queue to relay rsync requests to rsync workers
-
- By "staged queue" its meant that when a consumer comes to the
- queue, it takes _all_ entries, leaving the queue empty.
- (I don't know if there is an official term for this pattern.)
-
- The queue uses a PostBox to accumulate incoming items.
- When a consumer (rsync worker) comes, a new PostBox is
- set up and the old one is passed on to the consumer.
-
- Instead of the simplistic scheme of having one big lock
- which synchronizes both the addition of new items and
- PostBox exchanges, use a separate lock to arbitrate consumers,
- and rely on PostBox's synchronization mechanisms take
- care about additions.
-
- There is a corner case racy situation, producers vs. consumers,
- which is not handled by this scheme: namely, when the PostBox
- exchange occurs in between being passed to the producer for posting
- and the post placement. But that's what Postbox.close is for:
- such a posting will find the PostBox closed, in which case
- the producer can re-try posting against the actual PostBox of
- the queue.
-
- To aid accumlation of items in the PostBoxen before grabbed
- by an rsync worker, the worker goes to sleep a bit after
- each completed syncjob.
- """
-
- def __init__(self, slave):
- """spawn worker threads"""
- self.slave = slave
- self.lock = Lock()
- self.pb = PostBox()
- for i in range(int(gconf.sync_jobs)):
- t = Thread(target=self.syncjob)
- t.start()
-
- def syncjob(self):
- """the life of a worker"""
- while True:
- pb = None
- while True:
- self.lock.acquire()
- if self.pb:
- pb, self.pb = self.pb, PostBox()
- self.lock.release()
- if pb:
- break
- time.sleep(0.5)
- pb.close()
- po = self.slave.rsync(pb)
- if po.returncode == 0:
- ret = True
- elif po.returncode in (23, 24):
- # partial transfer (cf. rsync(1)), that's normal
- ret = False
- else:
- po.errfail()
- pb.wakeup(ret)
-
- def add(self, e):
- while True:
- pb = self.pb
- try:
- pb.append(e)
- return pb
- except BoxClosedErr:
- pass
diff --git a/xlators/features/marker/utils/syncdaemon/monitor.py b/xlators/features/marker/utils/syncdaemon/monitor.py
deleted file mode 100644
index b8956dcc2b9..00000000000
--- a/xlators/features/marker/utils/syncdaemon/monitor.py
+++ /dev/null
@@ -1,129 +0,0 @@
-import os
-import sys
-import time
-import signal
-import logging
-from gconf import gconf
-from syncdutils import update_file, select, waitpid, set_term_handler
-
-class Monitor(object):
- """class which spawns and manages gsyncd workers"""
-
- def __init__(self):
- self.state = None
-
- def set_state(self, state):
- """set the state that can be used by external agents
- like glusterd for status reporting"""
- if state == self.state:
- return
- self.state = state
- logging.info('new state: %s' % state)
- if getattr(gconf, 'state_file', None):
- update_file(gconf.state_file, lambda f: f.write(state + '\n'))
-
- def monitor(self):
- """the monitor loop
-
- Basic logic is a blantantly simple blunt heuristics:
- if spawned client survives 60 secs, it's considered OK.
- This servers us pretty well as it's not vulneralbe to
- any kind of irregular behavior of the child...
-
- ... well, except for one: if children is hung up on
- waiting for some event, it can survive aeons, still
- will be defunct. So we tweak the above logic to
- expect the worker to send us a signal within 60 secs
- (in the form of closing its end of a pipe). The worker
- does this when it's done with the setup stage
- ready to enter the service loop (note it's the setup
- stage which is vulnerable to hangs -- the full
- blown worker blows up on EPIPE if the net goes down,
- due to the keep-alive thread)
- """
- def sigcont_handler(*a):
- """
- Re-init logging and send group kill signal
- """
- md = gconf.log_metadata
- logging.shutdown()
- lcls = logging.getLoggerClass()
- lcls.setup(label=md.get('saved_label'), **md)
- pid = os.getpid()
- os.kill(-pid, signal.SIGUSR1)
- signal.signal(signal.SIGUSR1, lambda *a: ())
- signal.signal(signal.SIGCONT, sigcont_handler)
-
- argv = sys.argv[:]
- for o in ('-N', '--no-daemon', '--monitor'):
- while o in argv:
- argv.remove(o)
- argv.extend(('-N', '-p', ''))
- argv.insert(0, os.path.basename(sys.executable))
-
- self.set_state('starting...')
- ret = 0
- def nwait(p, o=0):
- p2, r = waitpid(p, o)
- if not p2:
- return
- return r
- def exit_signalled(s):
- """ child teminated due to receipt of SIGUSR1 """
- return (os.WIFSIGNALED(s) and (os.WTERMSIG(s) == signal.SIGUSR1))
- def exit_status(s):
- if os.WIFEXITED(s):
- return os.WEXITSTATUS(s)
- return 1
- conn_timeout = int(gconf.connection_timeout)
- while ret in (0, 1):
- logging.info('-' * conn_timeout)
- logging.info('starting gsyncd worker')
- pr, pw = os.pipe()
- cpid = os.fork()
- if cpid == 0:
- os.close(pr)
- os.execv(sys.executable, argv + ['--feedback-fd', str(pw)])
- os.close(pw)
- t0 = time.time()
- so = select((pr,), (), (), conn_timeout)[0]
- os.close(pr)
- if so:
- ret = nwait(cpid, os.WNOHANG)
- if ret != None:
- logging.debug("worker died before establishing connection")
- else:
- logging.debug("worker seems to be connected (?? racy check)")
- while time.time() < t0 + conn_timeout:
- ret = nwait(cpid, os.WNOHANG)
- if ret != None:
- logging.debug("worker died in startup phase")
- break
- time.sleep(1)
- else:
- logging.debug("worker not confirmed in %d sec, aborting it" % \
- conn_timeout)
- # relax one SIGTERM by setting a handler that sets back
- # standard handler
- set_term_handler(lambda *a: set_term_handler())
- # give a chance to graceful exit
- os.kill(-os.getpid(), signal.SIGTERM)
- time.sleep(1)
- os.kill(cpid, signal.SIGKILL)
- ret = nwait(cpid)
- if ret == None:
- self.set_state('OK')
- ret = nwait(cpid)
- if exit_signalled(ret):
- ret = 0
- else:
- ret = exit_status(ret)
- if ret in (0,1):
- self.set_state('faulty')
- time.sleep(10)
- self.set_state('inconsistent')
- return ret
-
-def monitor():
- """oh yeah, actually Monitor is used as singleton, too"""
- return Monitor().monitor()
diff --git a/xlators/features/marker/utils/syncdaemon/resource.py b/xlators/features/marker/utils/syncdaemon/resource.py
deleted file mode 100644
index 7e62fd48ca9..00000000000
--- a/xlators/features/marker/utils/syncdaemon/resource.py
+++ /dev/null
@@ -1,945 +0,0 @@
-import re
-import os
-import sys
-import stat
-import time
-import fcntl
-import errno
-import struct
-import socket
-import logging
-import tempfile
-import threading
-import subprocess
-from errno import EEXIST, ENOENT, ENODATA, ENOTDIR, ELOOP, EISDIR
-from select import error as selecterror
-
-from gconf import gconf
-import repce
-from repce import RepceServer, RepceClient
-from master import gmaster_builder
-import syncdutils
-from syncdutils import GsyncdError, select, privileged
-
-UrlRX = re.compile('\A(\w+)://([^ *?[]*)\Z')
-HostRX = re.compile('[a-z\d](?:[a-z\d.-]*[a-z\d])?', re.I)
-UserRX = re.compile("[\w!\#$%&'*+-\/=?^_`{|}~]+")
-
-def sup(x, *a, **kw):
- """a rubyesque "super" for python ;)
-
- invoke caller method in parent class with given args.
- """
- return getattr(super(type(x), x), sys._getframe(1).f_code.co_name)(*a, **kw)
-
-def desugar(ustr):
- """transform sugared url strings to standard <scheme>://<urlbody> form
-
- parsing logic enforces the constraint that sugared forms should contatin
- a ':' or a '/', which ensures that sugared urls do not conflict with
- gluster volume names.
- """
- m = re.match('([^:]*):(.*)', ustr)
- if m:
- if not m.groups()[0]:
- return "gluster://localhost" + ustr
- elif '@' in m.groups()[0] or re.search('[:/]', m.groups()[1]):
- return "ssh://" + ustr
- else:
- return "gluster://" + ustr
- else:
- if ustr[0] != '/':
- raise GsyncdError("cannot resolve sugared url '%s'" % ustr)
- ap = os.path.normpath(ustr)
- if ap.startswith('//'):
- ap = ap[1:]
- return "file://" + ap
-
-def gethostbyname(hnam):
- """gethostbyname wrapper"""
- try:
- return socket.gethostbyname(hnam)
- except socket.gaierror:
- ex = sys.exc_info()[1]
- raise GsyncdError("failed to resolve %s: %s" % \
- (hnam, ex.strerror))
-
-def parse_url(ustr):
- """instantiate an url object by scheme-to-class dispatch
-
- The url classes taken into consideration are the ones in
- this module whose names are full-caps.
- """
- m = UrlRX.match(ustr)
- if not m:
- ustr = desugar(ustr)
- m = UrlRX.match(ustr)
- if not m:
- raise GsyncdError("malformed url")
- sch, path = m.groups()
- this = sys.modules[__name__]
- if not hasattr(this, sch.upper()):
- raise GsyncdError("unknown url scheme " + sch)
- return getattr(this, sch.upper())(path)
-
-
-class _MetaXattr(object):
- """singleton class, a lazy wrapper around the
- libcxattr module
-
- libcxattr (a heavy import due to ctypes) is
- loaded only when when the single
- instance is tried to be used.
-
- This reduces runtime for those invocations
- which do not need filesystem manipulation
- (eg. for config, url parsing)
- """
-
- def __getattr__(self, meth):
- from libcxattr import Xattr as LXattr
- xmeth = [ m for m in dir(LXattr) if m[0] != '_' ]
- if not meth in xmeth:
- return
- for m in xmeth:
- setattr(self, m, getattr(LXattr, m))
- return getattr(self, meth)
-
-Xattr = _MetaXattr()
-
-
-class Popen(subprocess.Popen):
- """customized subclass of subprocess.Popen with a ring
- buffer for children error output"""
-
- @classmethod
- def init_errhandler(cls):
- """start the thread which handles children's error output"""
- cls.errstore = {}
- def tailer():
- while True:
- errstore = cls.errstore.copy()
- try:
- poe, _ ,_ = select([po.stderr for po in errstore], [], [], 1)
- except ValueError, selecterror:
- continue
- for po in errstore:
- if po.stderr not in poe:
- next
- po.lock.acquire()
- try:
- la = errstore.get(po)
- if la == None:
- continue
- try:
- fd = po.stderr.fileno()
- except ValueError: # file is already closed
- continue
- l = os.read(fd, 1024)
- if not l:
- continue
- tots = len(l)
- for lx in la:
- tots += len(lx)
- while tots > 1<<20 and la:
- tots -= len(la.pop(0))
- la.append(l)
- finally:
- po.lock.release()
- t = syncdutils.Thread(target = tailer)
- t.start()
- cls.errhandler = t
-
- @classmethod
- def fork(cls):
- """fork wrapper that restarts errhandler thread in child"""
- pid = os.fork()
- if not pid:
- cls.init_errhandler()
- return pid
-
- def __init__(self, args, *a, **kw):
- """customizations for subprocess.Popen instantiation
-
- - 'close_fds' is taken to be the default
- - if child's stderr is chosen to be managed,
- register it with the error handler thread
- """
- self.args = args
- if 'close_fds' not in kw:
- kw['close_fds'] = True
- self.lock = threading.Lock()
- try:
- sup(self, args, *a, **kw)
- except:
- ex = sys.exc_info()[1]
- if not isinstance(ex, OSError):
- raise
- raise GsyncdError("""execution of "%s" failed with %s (%s)""" % \
- (args[0], errno.errorcode[ex.errno], os.strerror(ex.errno)))
- if kw['stderr'] == subprocess.PIPE:
- assert(getattr(self, 'errhandler', None))
- self.errstore[self] = []
-
- def errlog(self):
- """make a log about child's failure event"""
- filling = ""
- if self.elines:
- filling = ", saying:"
- logging.error("""command "%s" returned with %s%s""" % \
- (" ".join(self.args), repr(self.returncode), filling))
- for l in self.elines:
- for ll in l.rstrip().split("\n"):
- logging.error(self.args[0] + "> " + ll.rstrip())
-
- def errfail(self):
- """fail nicely if child did not terminate with success"""
- self.errlog()
- syncdutils.finalize(exval = 1)
-
- def terminate_geterr(self, fail_on_err = True):
- """kill child, finalize stderr harvesting (unregister
- from errhandler, set up .elines), fail on error if
- asked for
- """
- self.lock.acquire()
- try:
- elines = self.errstore.pop(self)
- finally:
- self.lock.release()
- if self.poll() == None:
- self.terminate()
- if self.poll() == None:
- time.sleep(0.1)
- self.kill()
- self.wait()
- while True:
- b = os.read(self.stderr.fileno(), 1024)
- if b:
- elines.append(b)
- else:
- break
- self.stderr.close()
- self.elines = elines
- if fail_on_err and self.returncode != 0:
- self.errfail()
-
-
-class Server(object):
- """singleton implemening those filesystem access primitives
- which are needed for geo-replication functionality
-
- (Singleton in the sense it's a class which has only static
- and classmethods and is used directly, without instantiation.)
- """
-
- GX_NSPACE = (privileged() and "trusted" or "system") + ".glusterfs"
- NTV_FMTSTR = "!" + "B"*19 + "II"
- FRGN_XTRA_FMT = "I"
- FRGN_FMTSTR = NTV_FMTSTR + FRGN_XTRA_FMT
-
- def _pathguard(f):
- """decorator method that checks
- the path argument of the decorated
- functions to make sure it does not
- point out of the managed tree
- """
-
- fc = getattr(f, 'func_code', None)
- if not fc:
- # python 3
- fc = f.__code__
- pi = list(fc.co_varnames).index('path')
- def ff(*a):
- path = a[pi]
- ps = path.split('/')
- if path[0] == '/' or '..' in ps:
- raise ValueError('unsafe path')
- return f(*a)
- return ff
-
- @staticmethod
- @_pathguard
- def entries(path):
- """directory entries in an array"""
- # prevent symlinks being followed
- if not stat.S_ISDIR(os.lstat(path).st_mode):
- raise OSError(ENOTDIR, os.strerror(ENOTDIR))
- return os.listdir(path)
-
- @classmethod
- @_pathguard
- def purge(cls, path, entries=None):
- """force-delete subtrees
-
- If @entries is not specified, delete
- the whole subtree under @path (including
- @path).
-
- Otherwise, @entries should be a
- a sequence of children of @path, and
- the effect is identical with a joint
- @entries-less purge on them, ie.
-
- for e in entries:
- cls.purge(os.path.join(path, e))
- """
- me_also = entries == None
- if not entries:
- try:
- # if it's a symlink, prevent
- # following it
- try:
- os.unlink(path)
- return
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno == EISDIR:
- entries = os.listdir(path)
- else:
- raise
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno in (ENOTDIR, ENOENT, ELOOP):
- try:
- os.unlink(path)
- return
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno == ENOENT:
- return
- raise
- else:
- raise
- for e in entries:
- cls.purge(os.path.join(path, e))
- if me_also:
- os.rmdir(path)
-
- @classmethod
- @_pathguard
- def _create(cls, path, ctor):
- """path creation backend routine"""
- try:
- ctor(path)
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno == EEXIST:
- cls.purge(path)
- return ctor(path)
- raise
-
- @classmethod
- @_pathguard
- def mkdir(cls, path):
- cls._create(path, os.mkdir)
-
- @classmethod
- @_pathguard
- def symlink(cls, lnk, path):
- cls._create(path, lambda p: os.symlink(lnk, p))
-
- @classmethod
- @_pathguard
- def xtime(cls, path, uuid):
- """query xtime extended attribute
-
- Return xtime of @path for @uuid as a pair of integers.
- "Normal" errors due to non-existent @path or extended attribute
- are tolerated and errno is returned in such a case.
- """
-
- try:
- return struct.unpack('!II', Xattr.lgetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), 8))
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno in (ENOENT, ENODATA, ENOTDIR):
- return ex.errno
- else:
- raise
-
- @classmethod
- def xtime_vec(cls, path, *uuids):
- """vectored version of @xtime
-
- accepts a list of uuids and returns a dictionary
- with uuid as key(s) and xtime as value(s)
- """
- xt = {}
- for uuid in uuids:
- xtu = cls.xtime(path, uuid)
- if xtu == ENODATA:
- xtu = None
- if isinstance(xtu, int):
- return xtu
- xt[uuid] = xtu
- return xt
-
- @classmethod
- @_pathguard
- def set_xtime(cls, path, uuid, mark):
- """set @mark as xtime for @uuid on @path"""
- Xattr.lsetxattr(path, '.'.join([cls.GX_NSPACE, uuid, 'xtime']), struct.pack('!II', *mark))
-
- @classmethod
- def set_xtime_vec(cls, path, mark_dct):
- """vectored (or dictered) version of set_xtime
-
- ignore values that match @ignore
- """
- for u,t in mark_dct.items():
- cls.set_xtime(path, u, t)
-
- @staticmethod
- @_pathguard
- def setattr(path, adct):
- """set file attributes
-
- @adct is a dict, where 'own', 'mode' and 'times'
- keys are looked for and values used to perform
- chown, chmod or utimes on @path.
- """
- own = adct.get('own')
- if own:
- os.lchown(path, *own)
- mode = adct.get('mode')
- if mode:
- os.chmod(path, stat.S_IMODE(mode))
- times = adct.get('times')
- if times:
- os.utime(path, times)
-
- @staticmethod
- def pid():
- return os.getpid()
-
- last_keep_alive = 0
- @classmethod
- def keep_alive(cls, dct):
- """process keepalive messages.
-
- Return keep-alive counter (number of received keep-alive
- messages).
-
- Now the "keep-alive" message can also have a payload which is
- used to set a foreign volume-mark on the underlying file system.
- """
- if dct:
- key = '.'.join([cls.GX_NSPACE, 'volume-mark', dct['uuid']])
- val = struct.pack(cls.FRGN_FMTSTR,
- *(dct['version'] +
- tuple(int(x,16) for x in re.findall('(?:[\da-f]){2}', dct['uuid'])) +
- (dct['retval'],) + dct['volume_mark'][0:2] + (dct['timeout'],)))
- Xattr.lsetxattr('.', key, val)
- cls.last_keep_alive += 1
- return cls.last_keep_alive
-
- @staticmethod
- def version():
- """version used in handshake"""
- return 1.0
-
-
-class SlaveLocal(object):
- """mix-in class to implement some factes of a slave server
-
- ("mix-in" is sort of like "abstract class", ie. it's not
- instantiated just included in the ancesty DAG. I use "mix-in"
- to indicate that it's not used as an abstract base class,
- rather just taken in to implement additional functionality
- on the basis of the assumed availability of certain interfaces.)
- """
-
- def can_connect_to(self, remote):
- """determine our position in the connectibility matrix"""
- return not remote
-
- def service_loop(self):
- """start a RePCe server serving self's server
-
- stop servicing if a timeout is configured and got no
- keep-alime in that inteval
- """
- repce = RepceServer(self.server, sys.stdin, sys.stdout, int(gconf.sync_jobs))
- t = syncdutils.Thread(target=lambda: (repce.service_loop(),
- syncdutils.finalize()))
- t.start()
- logging.info("slave listening")
- if gconf.timeout and int(gconf.timeout) > 0:
- while True:
- lp = self.server.last_keep_alive
- time.sleep(int(gconf.timeout))
- if lp == self.server.last_keep_alive:
- logging.info("connection inactive for %d seconds, stopping" % int(gconf.timeout))
- break
- else:
- select((), (), ())
-
-class SlaveRemote(object):
- """mix-in class to implement an interface to a remote slave"""
-
- def connect_remote(self, rargs=[], **opts):
- """connects to a remote slave
-
- Invoke an auxiliary utility (slave gsyncd, possibly wrapped)
- which sets up the connection and set up a RePCe client to
- communicate throuh its stdio.
- """
- slave = opts.get('slave', self.url)
- so = getattr(gconf, 'session_owner', None)
- if so:
- so_args = ['--session-owner', so]
- else:
- so_args = []
- po = Popen(rargs + gconf.remote_gsyncd.split() + so_args + \
- ['-N', '--listen', '--timeout', str(gconf.timeout), slave],
- stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
- gconf.transport = po
- return self.start_fd_client(po.stdout, po.stdin, **opts)
-
- def start_fd_client(self, i, o, **opts):
- """set up RePCe client, handshake with server
-
- It's cut out as a separate method to let
- subclasses hook into client startup
- """
- self.server = RepceClient(i, o)
- rv = self.server.__version__()
- exrv = {'proto': repce.repce_version, 'object': Server.version()}
- da0 = (rv, exrv)
- da1 = ({}, {})
- for i in range(2):
- for k, v in da0[i].iteritems():
- da1[i][k] = int(v)
- if da1[0] != da1[1]:
- raise GsyncdError("RePCe major version mismatch: local %s, remote %s" % (exrv, rv))
-
- def rsync(self, files, *args):
- """invoke rsync"""
- if not files:
- raise GsyncdError("no files to sync")
- logging.debug("files: " + ", ".join(files))
- argv = gconf.rsync_command.split() + ['-aRS', '--super', '--numeric-ids'] + files + list(args)
- po = Popen(argv, stderr=subprocess.PIPE)
- po.wait()
- po.terminate_geterr(fail_on_err = False)
- return po
-
-
-class AbstractUrl(object):
- """abstract base class for url scheme classes"""
-
- def __init__(self, path, pattern):
- m = re.search(pattern, path)
- if not m:
- raise GsyncdError("malformed path")
- self.path = path
- return m.groups()
-
- @property
- def scheme(self):
- return type(self).__name__.lower()
-
- def canonical_path(self):
- return self.path
-
- def get_url(self, canonical=False, escaped=False):
- """format self's url in various styles"""
- if canonical:
- pa = self.canonical_path()
- else:
- pa = self.path
- u = "://".join((self.scheme, pa))
- if escaped:
- u = syncdutils.escape(u)
- return u
-
- @property
- def url(self):
- return self.get_url()
-
-
- ### Concrete resource classes ###
-
-
-class FILE(AbstractUrl, SlaveLocal, SlaveRemote):
- """scheme class for file:// urls
-
- can be used to represent a file slave server
- on slave side, or interface to a remote file
- file server on master side
- """
-
- class FILEServer(Server):
- """included server flavor"""
- pass
-
- server = FILEServer
-
- def __init__(self, path):
- sup(self, path, '^/')
-
- def connect(self):
- """inhibit the resource beyond"""
- os.chdir(self.path)
-
- def rsync(self, files):
- return sup(self, files, self.path)
-
-
-class GLUSTER(AbstractUrl, SlaveLocal, SlaveRemote):
- """scheme class for gluster:// urls
-
- can be used to represent a gluster slave server
- on slave side, or interface to a remote gluster
- slave on master side, or to represent master
- (slave-ish features come from the mixins, master
- functionality is outsourced to GMaster from master)
- """
-
- class GLUSTERServer(Server):
- "server enhancements for a glusterfs backend"""
-
- @classmethod
- def _attr_unpack_dict(cls, xattr, extra_fields = ''):
- """generic volume mark fetching/parsing backed"""
- fmt_string = cls.NTV_FMTSTR + extra_fields
- buf = Xattr.lgetxattr('.', xattr, struct.calcsize(fmt_string))
- vm = struct.unpack(fmt_string, buf)
- m = re.match('(.{8})(.{4})(.{4})(.{4})(.{12})', "".join(['%02x' % x for x in vm[2:18]]))
- uuid = '-'.join(m.groups())
- volinfo = { 'version': vm[0:2],
- 'uuid' : uuid,
- 'retval' : vm[18],
- 'volume_mark': vm[19:21],
- }
- if extra_fields:
- return volinfo, vm[-len(extra_fields):]
- else:
- return volinfo
-
- @classmethod
- def foreign_volume_infos(cls):
- """return list of valid (not expired) foreign volume marks"""
- dict_list = []
- xattr_list = Xattr.llistxattr_buf('.')
- for ele in xattr_list:
- if ele.find('.'.join([cls.GX_NSPACE, 'volume-mark', ''])) == 0:
- d, x = cls._attr_unpack_dict(ele, cls.FRGN_XTRA_FMT)
- now = int(time.time())
- if x[0] > now:
- logging.debug("volinfo[%s] expires: %d (%d sec later)" % \
- (d['uuid'], x[0], x[0] - now))
- d['timeout'] = x[0]
- dict_list.append(d)
- else:
- try:
- Xattr.lremovexattr('.', ele)
- except OSError:
- pass
- return dict_list
-
- @classmethod
- def native_volume_info(cls):
- """get the native volume mark of the underlying gluster volume"""
- try:
- return cls._attr_unpack_dict('.'.join([cls.GX_NSPACE, 'volume-mark']))
- except OSError:
- ex = sys.exc_info()[1]
- if ex.errno != ENODATA:
- raise
-
- server = GLUSTERServer
-
- def __init__(self, path):
- self.host, self.volume = sup(self, path, '^(%s):(.+)' % HostRX.pattern)
-
- def canonical_path(self):
- return ':'.join([gethostbyname(self.host), self.volume])
-
- def can_connect_to(self, remote):
- """determine our position in the connectibility matrix"""
- return True
-
- class Mounter(object):
- """Abstract base class for mounter backends"""
-
- def __init__(self, params):
- self.params = params
- self.mntpt = None
-
- @classmethod
- def get_glusterprog(cls):
- return os.path.join(gconf.gluster_command_dir, cls.glusterprog)
-
- def umount_l(self, d):
- """perform lazy umount"""
- po = Popen(self.make_umount_argv(d), stderr=subprocess.PIPE)
- po.wait()
- return po
-
- @classmethod
- def make_umount_argv(cls, d):
- raise NotImplementedError
-
- def make_mount_argv(self, *a):
- raise NotImplementedError
-
- def cleanup_mntpt(self, *a):
- pass
-
- def handle_mounter(self, po):
- po.wait()
-
- def inhibit(self, *a):
- """inhibit a gluster filesystem
-
- Mount glusterfs over a temporary mountpoint,
- change into the mount, and lazy unmount the
- filesystem.
- """
-
- mpi, mpo = os.pipe()
- mh = Popen.fork()
- if mh:
- os.close(mpi)
- fcntl.fcntl(mpo, fcntl.F_SETFD, fcntl.FD_CLOEXEC)
- d = None
- margv = self.make_mount_argv(*a)
- if self.mntpt:
- # mntpt is determined pre-mount
- d = self.mntpt
- os.write(mpo, d + '\0')
- po = Popen(margv, **self.mountkw)
- self.handle_mounter(po)
- po.terminate_geterr()
- logging.debug('auxiliary glusterfs mount in place')
- if not d:
- # mntpt is determined during mount
- d = self.mntpt
- os.write(mpo, d + '\0')
- os.write(mpo, 'M')
- t = syncdutils.Thread(target=lambda: os.chdir(d))
- t.start()
- tlim = gconf.starttime + int(gconf.connection_timeout)
- while True:
- if not t.isAlive():
- break
- if time.time() >= tlim:
- syncdutils.finalize(exval = 1)
- time.sleep(1)
- os.close(mpo)
- _, rv = syncdutils.waitpid(mh, 0)
- if rv:
- rv = (os.WIFEXITED(rv) and os.WEXITSTATUS(rv) or 0) - \
- (os.WIFSIGNALED(rv) and os.WTERMSIG(rv) or 0)
- logging.warn('stale mount possibly left behind on ' + d)
- raise GsyncdError("cleaning up temp mountpoint %s failed with status %d" % \
- (d, rv))
- else:
- rv = 0
- try:
- os.setsid()
- os.close(mpo)
- mntdata = ''
- while True:
- c = os.read(mpi, 1)
- if not c:
- break
- mntdata += c
- if mntdata:
- mounted = False
- if mntdata[-1] == 'M':
- mntdata = mntdata[:-1]
- assert(mntdata)
- mounted = True
- assert(mntdata[-1] == '\0')
- mntpt = mntdata[:-1]
- assert(mntpt)
- if mounted:
- po = self.umount_l(mntpt)
- po.terminate_geterr(fail_on_err = False)
- if po.returncode != 0:
- po.errlog()
- rv = po.returncode
- self.cleanup_mntpt(mntpt)
- except:
- logging.exception('mount cleanup failure:')
- rv = 200
- os._exit(rv)
- logging.debug('auxiliary glusterfs mount prepared')
-
- class DirectMounter(Mounter):
- """mounter backend which calls mount(8), umount(8) directly"""
-
- mountkw = {'stderr': subprocess.PIPE}
- glusterprog = 'glusterfs'
-
- @staticmethod
- def make_umount_argv(d):
- return ['umount', '-l', d]
-
- def make_mount_argv(self):
- self.mntpt = tempfile.mkdtemp(prefix = 'gsyncd-aux-mount-')
- return [self.get_glusterprog()] + ['--' + p for p in self.params] + [self.mntpt]
-
- def cleanup_mntpt(self, mntpt = None):
- if not mntpt:
- mntpt = self.mntpt
- os.rmdir(mntpt)
-
- class MountbrokerMounter(Mounter):
- """mounter backend using the mountbroker gluster service"""
-
- mountkw = {'stderr': subprocess.PIPE, 'stdout': subprocess.PIPE}
- glusterprog = 'gluster'
-
- @classmethod
- def make_cli_argv(cls):
- return [cls.get_glusterprog()] + gconf.gluster_cli_options.split() + ['system::']
-
- @classmethod
- def make_umount_argv(cls, d):
- return cls.make_cli_argv() + ['umount', d, 'lazy']
-
- def make_mount_argv(self, label):
- return self.make_cli_argv() + \
- ['mount', label, 'user-map-root=' + syncdutils.getusername()] + self.params
-
- def handle_mounter(self, po):
- self.mntpt = po.stdout.readline()[:-1]
- po.stdout.close()
- sup(self, po)
- if po.returncode != 0:
- # if cli terminated with error due to being
- # refused by glusterd, what it put
- # out on stdout is a diagnostic message
- logging.error('glusterd answered: %s' % self.mntpt)
-
- def connect(self):
- """inhibit the resource beyond
-
- Choose mounting backend (direct or mountbroker),
- set up glusterfs parameters and perform the mount
- with given backend
- """
-
- label = getattr(gconf, 'mountbroker', None)
- if not label and not privileged():
- label = syncdutils.getusername()
- mounter = label and self.MountbrokerMounter or self.DirectMounter
- params = gconf.gluster_params.split() + \
- (gconf.gluster_log_level and ['log-level=' + gconf.gluster_log_level] or []) + \
- ['log-file=' + gconf.gluster_log_file, 'volfile-server=' + self.host,
- 'volfile-id=' + self.volume, 'client-pid=-1']
- mounter(params).inhibit(*[l for l in [label] if l])
-
- def connect_remote(self, *a, **kw):
- sup(self, *a, **kw)
- self.slavedir = "/proc/%d/cwd" % self.server.pid()
-
- def service_loop(self, *args):
- """enter service loop
-
- - if slave given, instantiate GMaster and
- pass control to that instance, which implements
- master behavior
- - else do that's what's inherited
- """
- if args:
- gmaster_builder()(self, args[0]).crawl_loop()
- else:
- sup(self, *args)
-
- def rsync(self, files):
- return sup(self, files, self.slavedir)
-
-
-class SSH(AbstractUrl, SlaveRemote):
- """scheme class for ssh:// urls
-
- interface to remote slave on master side
- implementing an ssh based proxy
- """
-
- def __init__(self, path):
- self.remote_addr, inner_url = sup(self, path,
- '^((?:%s@)?%s):(.+)' % tuple([ r.pattern for r in (UserRX, HostRX) ]))
- self.inner_rsc = parse_url(inner_url)
-
- def canonical_path(self):
- m = re.match('([^@]+)@(.+)', self.remote_addr)
- if m:
- u, h = m.groups()
- else:
- u, h = syncdutils.getusername(), self.remote_addr
- remote_addr = '@'.join([u, gethostbyname(h)])
- return ':'.join([remote_addr, self.inner_rsc.get_url(canonical=True)])
-
- def can_connect_to(self, remote):
- """determine our position in the connectibility matrix"""
- return False
-
- def start_fd_client(self, *a, **opts):
- """customizations for client startup
-
- - be a no-op if we are to daemonize (client startup is deferred
- to post-daemon stage)
- - determine target url for rsync after consulting server
- """
- if opts.get('deferred'):
- return a
- sup(self, *a)
- ityp = type(self.inner_rsc)
- if ityp == FILE:
- slavepath = self.inner_rsc.path
- elif ityp == GLUSTER:
- slavepath = "/proc/%d/cwd" % self.server.pid()
- else:
- raise NotImplementedError
- self.slaveurl = ':'.join([self.remote_addr, slavepath])
-
- def connect_remote(self, go_daemon=None):
- """connect to inner slave url through outer ssh url
-
- Wrap the connecting utility in ssh.
-
- Much care is put into daemonizing: in that case
- ssh is started before daemonization, but
- RePCe client is to be created after that (as ssh
- interactive password auth would be defeated by
- a daemonized ssh, while client should be present
- only in the final process). In that case the action
- is taken apart to two parts, this method is ivoked
- once pre-daemon, once post-daemon. Use @go_daemon
- to deiced what part to perform.
-
- [NB. ATM gluster product does not makes use of interactive
- authentication.]
- """
- if go_daemon == 'done':
- return self.start_fd_client(*self.fd_pair)
- gconf.setup_ssh_ctl(tempfile.mkdtemp(prefix='gsyncd-aux-ssh-'))
- deferred = go_daemon == 'postconn'
- ret = sup(self, gconf.ssh_command.split() + gconf.ssh_ctl_args + [self.remote_addr], slave=self.inner_rsc.url, deferred=deferred)
- if deferred:
- # send a message to peer so that we can wait for
- # the answer from which we know connection is
- # established and we can proceed with daemonization
- # (doing that too early robs the ssh passwd prompt...)
- # However, we'd better not start the RepceClient
- # before daemonization (that's not preserved properly
- # in daemon), we just do a an ad-hoc linear put/get.
- i, o = ret
- inf = os.fdopen(i)
- repce.send(o, None, '__repce_version__')
- select((inf,), (), ())
- repce.recv(inf)
- # hack hack hack: store a global reference to the file
- # to save it from getting GC'd which implies closing it
- gconf.permanent_handles.append(inf)
- self.fd_pair = (i, o)
- return 'should'
-
- def rsync(self, files):
- return sup(self, files, '-ze', " ".join(gconf.ssh_command.split() + gconf.ssh_ctl_args), self.slaveurl)
diff --git a/xlators/features/marker/utils/syncdaemon/syncdutils.py b/xlators/features/marker/utils/syncdaemon/syncdutils.py
deleted file mode 100644
index 1d4eb20032c..00000000000
--- a/xlators/features/marker/utils/syncdaemon/syncdutils.py
+++ /dev/null
@@ -1,282 +0,0 @@
-import os
-import sys
-import pwd
-import time
-import fcntl
-import shutil
-import logging
-from threading import Lock, Thread as baseThread
-from errno import EACCES, EAGAIN, EPIPE, ENOTCONN, ECONNABORTED, EINTR, errorcode
-from signal import signal, SIGTERM, SIGKILL
-from time import sleep
-import select as oselect
-from os import waitpid as owaitpid
-try:
- from cPickle import PickleError
-except ImportError:
- # py 3
- from pickle import PickleError
-
-from gconf import gconf
-
-try:
- # py 3
- from urllib import parse as urllib
-except ImportError:
- import urllib
-
-def escape(s):
- """the chosen flavor of string escaping, used all over
- to turn whatever data to creatable representation"""
- return urllib.quote_plus(s)
-
-def unescape(s):
- """inverse of .escape"""
- return urllib.unquote_plus(s)
-
-def norm(s):
- if s:
- return s.replace('-', '_')
-
-def update_file(path, updater, merger = lambda f: True):
- """update a file in a transaction-like manner"""
-
- fr = fw = None
- try:
- fd = os.open(path, os.O_CREAT|os.O_RDWR)
- try:
- fr = os.fdopen(fd, 'r+b')
- except:
- os.close(fd)
- raise
- fcntl.lockf(fr, fcntl.LOCK_EX)
- if not merger(fr):
- return
-
- tmpp = path + '.tmp.' + str(os.getpid())
- fd = os.open(tmpp, os.O_CREAT|os.O_EXCL|os.O_WRONLY)
- try:
- fw = os.fdopen(fd, 'wb', 0)
- except:
- os.close(fd)
- raise
- updater(fw)
- os.fsync(fd)
- os.rename(tmpp, path)
- finally:
- for fx in (fr, fw):
- if fx:
- fx.close()
-
-def grabfile(fname, content=None):
- """open @fname + contest for its fcntl lock
-
- @content: if given, set the file content to it
- """
- # damn those messy open() mode codes
- fd = os.open(fname, os.O_CREAT|os.O_RDWR)
- f = os.fdopen(fd, 'r+b', 0)
- try:
- fcntl.lockf(f, fcntl.LOCK_EX|fcntl.LOCK_NB)
- except:
- ex = sys.exc_info()[1]
- f.close()
- if isinstance(ex, IOError) and ex.errno in (EACCES, EAGAIN):
- # cannot grab, it's taken
- return
- raise
- if content:
- try:
- f.truncate()
- f.write(content)
- except:
- f.close()
- raise
- gconf.permanent_handles.append(f)
- return f
-
-def grabpidfile(fname=None, setpid=True):
- """.grabfile customization for pid files"""
- if not fname:
- fname = gconf.pid_file
- content = None
- if setpid:
- content = str(os.getpid()) + '\n'
- return grabfile(fname, content=content)
-
-final_lock = Lock()
-
-def finalize(*a, **kw):
- """all those messy final steps we go trough upon termination
-
- Do away with pidfile, ssh control dir and logging.
- """
- final_lock.acquire()
- if getattr(gconf, 'pid_file', None):
- rm_pidf = gconf.pid_file_owned
- if gconf.cpid:
- # exit path from parent branch of daemonization
- rm_pidf = False
- while True:
- f = grabpidfile(setpid=False)
- if not f:
- # child has already taken over pidfile
- break
- if os.waitpid(gconf.cpid, os.WNOHANG)[0] == gconf.cpid:
- # child has terminated
- rm_pidf = True
- break;
- time.sleep(0.1)
- if rm_pidf:
- try:
- os.unlink(gconf.pid_file)
- except:
- ex = sys.exc_info()[1]
- if ex.errno == ENOENT:
- pass
- else:
- raise
- if gconf.ssh_ctl_dir and not gconf.cpid:
- shutil.rmtree(gconf.ssh_ctl_dir)
- if getattr(gconf, 'state_socket', None):
- try:
- os.unlink(gconf.state_socket)
- except:
- if sys.exc_info()[0] == OSError:
- pass
- if gconf.log_exit:
- logging.info("exiting.")
- sys.stdout.flush()
- sys.stderr.flush()
- os._exit(kw.get('exval', 0))
-
-def log_raise_exception(excont):
- """top-level exception handler
-
- Try to some fancy things to cover up we face with an error.
- Translate some weird sounding but well understood exceptions
- into human-friendly lingo
- """
- is_filelog = False
- for h in logging.getLogger().handlers:
- fno = getattr(getattr(h, 'stream', None), 'fileno', None)
- if fno and not os.isatty(fno()):
- is_filelog = True
-
- exc = sys.exc_info()[1]
- if isinstance(exc, SystemExit):
- excont.exval = exc.code or 0
- raise
- else:
- logtag = None
- if isinstance(exc, GsyncdError):
- if is_filelog:
- logging.error(exc.message)
- sys.stderr.write('failure: ' + exc.message + "\n")
- elif isinstance(exc, PickleError) or isinstance(exc, EOFError) or \
- ((isinstance(exc, OSError) or isinstance(exc, IOError)) and \
- exc.errno == EPIPE):
- logging.error('connection to peer is broken')
- if hasattr(gconf, 'transport'):
- gconf.transport.wait()
- gconf.transport.terminate_geterr()
- elif isinstance(exc, OSError) and exc.errno in (ENOTCONN, ECONNABORTED):
- logging.error('glusterfs session went down [%s]', errorcode[exc.errno])
- else:
- logtag = "FAIL"
- if not logtag and logging.getLogger().isEnabledFor(logging.DEBUG):
- logtag = "FULL EXCEPTION TRACE"
- if logtag:
- logging.exception(logtag + ": ")
- sys.stderr.write("failed with %s.\n" % type(exc).__name__)
- excont.exval = 1
- sys.exit(excont.exval)
-
-
-class FreeObject(object):
- """wildcard class for which any attribute can be set"""
-
- def __init__(self, **kw):
- for k,v in kw.items():
- setattr(self, k, v)
-
-class Thread(baseThread):
- """thread class flavor for gsyncd
-
- - always a daemon thread
- - force exit for whole program if thread
- function coughs up an exception
- """
- def __init__(self, *a, **kw):
- tf = kw.get('target')
- if tf:
- def twrap(*aa):
- excont = FreeObject(exval = 0)
- try:
- tf(*aa)
- except:
- try:
- log_raise_exception(excont)
- finally:
- finalize(exval = excont.exval)
- kw['target'] = twrap
- baseThread.__init__(self, *a, **kw)
- self.setDaemon(True)
-
-class GsyncdError(Exception):
- pass
-
-def getusername(uid = None):
- if uid == None:
- uid = os.geteuid()
- return pwd.getpwuid(uid).pw_name
-
-def privileged():
- return os.geteuid() == 0
-
-def boolify(s):
- """
- Generic string to boolean converter
-
- return
- - Quick return if string 's' is of type bool
- - True if it's in true_list
- - False if it's in false_list
- - Warn if it's not present in either and return False
- """
- true_list = ['true', 'yes', '1', 'on']
- false_list = ['false', 'no', '0', 'off']
-
- if isinstance(s, bool):
- return s
-
- rv = False
- lstr = s.lower()
- if lstr in true_list:
- rv = True
- elif not lstr in false_list:
- logging.warn("Unknown string (%s) in string to boolean conversion defaulting to False\n" % (s))
-
- return rv
-
-def eintr_wrap(func, exc, *a):
- """
- wrapper around syscalls resilient to interrupt caused
- by signals
- """
- while True:
- try:
- return func(*a)
- except exc:
- ex = sys.exc_info()[1]
- if not ex.args[0] == EINTR:
- raise
-
-def select(*a):
- return eintr_wrap(oselect.select, oselect.error, *a)
-
-def waitpid (*a):
- return eintr_wrap(owaitpid, OSError, *a)
-
-def set_term_handler(hook=lambda *a: finalize(*a, **{'exval': 1})):
- signal(SIGTERM, hook)
diff --git a/xlators/features/metadisp/Makefile.am b/xlators/features/metadisp/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/metadisp/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/metadisp/src/Makefile.am b/xlators/features/metadisp/src/Makefile.am
new file mode 100644
index 00000000000..1520ad8c424
--- /dev/null
+++ b/xlators/features/metadisp/src/Makefile.am
@@ -0,0 +1,38 @@
+noinst_PYTHON = gen-fops.py
+
+EXTRA_DIST = fops-tmpl.c
+
+xlator_LTLIBRARIES = metadisp.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+nodist_metadisp_la_SOURCES = fops.c
+
+BUILT_SOURCES = fops.c
+
+metadisp_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+metadisp_la_SOURCES = metadisp.c \
+ metadisp-unlink.c \
+ metadisp-stat.c \
+ metadisp-lookup.c \
+ metadisp-readdir.c \
+ metadisp-create.c \
+ metadisp-open.c \
+ metadisp-fsync.c \
+ metadisp-setattr.c \
+ backend.c
+
+metadisp_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = metadisp.h metadisp-fops.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+fops.c: fops-tmpl.c $(top_srcdir)/libglusterfs/src/generator.py gen-fops.py
+ PYTHONPATH=$(top_srcdir)/libglusterfs/src \
+ $(PYTHON) $(srcdir)/gen-fops.py $(srcdir)/fops-tmpl.c > $@
+
+CLEANFILES = $(nodist_metadisp_la_SOURCES)
diff --git a/xlators/features/metadisp/src/backend.c b/xlators/features/metadisp/src/backend.c
new file mode 100644
index 00000000000..ee2c25bfaa7
--- /dev/null
+++ b/xlators/features/metadisp/src/backend.c
@@ -0,0 +1,45 @@
+#define GFID_STR_LEN 37
+
+#include "metadisp.h"
+
+/*
+ * backend.c
+ *
+ * functions responsible for converting user-facing paths to backend-style
+ * "/$GFID" paths.
+ */
+
+int32_t
+build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc)
+{
+ static uuid_t root = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ char gfid_buf[GFID_STR_LEN + 1] = {
+ 0,
+ };
+ char *path = NULL;
+
+ GF_VALIDATE_OR_GOTO("metadisp", src_loc, out);
+ GF_VALIDATE_OR_GOTO("metadisp", dst_loc, out);
+
+ loc_copy(dst_loc, src_loc);
+ memcpy(dst_loc->pargfid, root, sizeof(root));
+ GF_FREE((char *)dst_loc->path); // we are overwriting path so nuke
+ // whatever loc_copy gave us
+
+ uuid_utoa_r(gfid, gfid_buf);
+
+ path = GF_CALLOC(GFID_STR_LEN + 1, sizeof(char),
+ gf_common_mt_char); // freed via loc_wipe
+
+ path[0] = '/';
+ strncpy(path + 1, gfid_buf, GFID_STR_LEN);
+ path[GFID_STR_LEN] = 0;
+ dst_loc->path = path;
+ if (src_loc->name)
+ dst_loc->name = strrchr(dst_loc->path, '/');
+ if (dst_loc->name)
+ dst_loc->name++;
+ return 0;
+out:
+ return -1;
+}
diff --git a/xlators/features/metadisp/src/fops-tmpl.c b/xlators/features/metadisp/src/fops-tmpl.c
new file mode 100644
index 00000000000..4385b7dd5b7
--- /dev/null
+++ b/xlators/features/metadisp/src/fops-tmpl.c
@@ -0,0 +1,10 @@
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <glusterfs/xlator.h>
+#include "metadisp.h"
+#include "metadisp-fops.h"
+
+#pragma generate
diff --git a/xlators/features/metadisp/src/gen-fops.py b/xlators/features/metadisp/src/gen-fops.py
new file mode 100644
index 00000000000..8b5e120fdec
--- /dev/null
+++ b/xlators/features/metadisp/src/gen-fops.py
@@ -0,0 +1,160 @@
+#!/usr/bin/python
+
+import sys
+from generator import fop_subs, generate
+
+FN_METADATA_CHILD_GENERIC = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ metadata");
+ STACK_WIND (frame, default_@NAME@_cbk,
+ METADATA_CHILD(this), METADATA_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_GENERIC_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ generic");
+ STACK_WIND (frame, default_@NAME@_cbk,
+ DATA_CHILD(this), DATA_CHILD(this)->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_DATAFD_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ datafd");
+ xlator_t *child = NULL;
+ child = DATA_CHILD(this);
+ STACK_WIND (frame, default_@NAME@_cbk,
+ child, child->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FN_DATALOC_TEMPLATE = """
+int32_t
+metadisp_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ METADISP_TRACE("@NAME@ dataloc");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+ xlator_t *child = NULL;
+ child = DATA_CHILD(this);
+ STACK_WIND (frame, default_@NAME@_cbk,
+ child, child->fops->@NAME@,
+ @SHORT_ARGS@);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+"""
+
+FOPS_LINE_TEMPLATE = "\t.@NAME@ = metadisp_@NAME@,"
+
+skipped = [
+ "readdir",
+ "readdirp",
+ "lookup",
+ "fsync",
+ "stat",
+ "open",
+ "create",
+ "unlink",
+ "setattr",
+ # TODO: implement "inodelk",
+]
+
+
+def gen_fops():
+ done = skipped
+
+ #
+ # these are fops that wind to the DATA_CHILD
+ #
+ # NOTE: re-written in order from google doc:
+ # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q
+ for name in [
+ "writev",
+ "readv",
+ "ftruncate",
+ "zerofill",
+ "discard",
+ "seek",
+ "fstat",
+ ]:
+ done = done + [name]
+ print(generate(FN_DATAFD_TEMPLATE, name, fop_subs))
+
+ for name in ["truncate"]:
+ done = done + [name]
+ print(generate(FN_DATALOC_TEMPLATE, name, fop_subs))
+
+ # these are fops that operate solely on dentries, folders,
+ # or extended attributes. Therefore, they must always
+ # wind to METADATA_CHILD and should never perform
+ # any path rewriting
+ #
+ # NOTE: re-written in order from google doc:
+ # https://docs.google.com/document/d/1KEwVtSNvDhs4qb63gWx2ulCp5GJjge77NGJk4p_Ms4Q
+ for name in [
+ "mkdir",
+ "symlink",
+ "link",
+ "rename",
+ "mknod",
+ "opendir",
+ # "readdir, # special-cased
+ # "readdirp, # special-cased
+ "fsyncdir",
+ # "setattr", # special-cased
+ "readlink",
+ "fentrylk",
+ "access",
+ # TODO: these wind to both,
+ # data for backend-attributes and metadata for the rest
+ "xattrop",
+ "setxattr",
+ "getxattr",
+ "removexattr",
+ "fgetxattr",
+ "fsetxattr",
+ "fremovexattr",
+ ]:
+
+ done = done + [name]
+ print(generate(FN_METADATA_CHILD_GENERIC, name, fop_subs))
+
+ print("struct xlator_fops fops = {")
+ for name in done:
+ print(generate(FOPS_LINE_TEMPLATE, name, fop_subs))
+
+ print("};")
+
+
+for l in open(sys.argv[1], "r").readlines():
+ if l.find("#pragma generate") != -1:
+ print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
+ gen_fops()
+ print("/* END GENERATED CODE */")
+ else:
+ print(l[:-1])
diff --git a/xlators/features/metadisp/src/metadisp-create.c b/xlators/features/metadisp/src/metadisp-create.c
new file mode 100644
index 00000000000..f8c9798dd59
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-create.c
@@ -0,0 +1,101 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * Create, like stat, is a two-step process. We send a create
+ * to the METADATA_CHILD, then send another create to the DATA_CHILD.
+ *
+ * We do the metadata child first to ensure that the ACLs are enforced.
+ */
+
+int32_t
+metadisp_create_dentry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_create_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
+{
+ // create the backend data inode
+ STACK_WIND(frame, metadisp_create_dentry_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = cookie;
+ if (op_ret != 0) {
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+ }
+
+ if (stub == NULL) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+int32_t
+metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+
+ loc_t backend_loc = {
+ 0,
+ };
+ call_stub_t *stub = NULL;
+ uuid_t *gfid_req = NULL;
+
+ RESOLVE_GFID_REQ(xdata, gfid_req, out);
+
+ if (build_backend_loc(*gfid_req, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ frame->local = loc;
+
+ stub = fop_create_stub(frame, metadisp_create_resume, &backend_loc, flags,
+ mode, umask, fd, xdata);
+
+ STACK_WIND_COOKIE(frame, metadisp_create_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->create, loc, flags, mode,
+ umask, fd, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(create, frame, -1, EINVAL, NULL, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+out:
+ return -1;
+}
diff --git a/xlators/features/metadisp/src/metadisp-fops.h b/xlators/features/metadisp/src/metadisp-fops.h
new file mode 100644
index 00000000000..56dd427cf34
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-fops.h
@@ -0,0 +1,51 @@
+#ifndef GF_METADISP_FOPS_H_
+#define GF_METADISP_FOPS_H_
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/glusterfs.h>
+
+#include <sys/types.h>
+
+/* fops in here are defined in their own file. Every other fop is just defined
+ * inline of fops.c */
+
+int
+metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata);
+
+int
+metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict);
+
+int
+metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int
+metadisp_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+
+int
+metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
+
+int
+metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata);
+
+int
+metadisp_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+
+int
+metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata);
+
+int
+metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata);
+
+int
+metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata);
+
+#endif
diff --git a/xlators/features/metadisp/src/metadisp-fsync.c b/xlators/features/metadisp/src/metadisp-fsync.c
new file mode 100644
index 00000000000..2e46fa84eac
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-fsync.c
@@ -0,0 +1,54 @@
+
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+int32_t
+metadisp_fsync_resume(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ STACK_WIND(frame, default_fsync_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ stub = fop_fsync_stub(frame, metadisp_fsync_resume, fd, flags, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_fsync_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-lookup.c b/xlators/features/metadisp/src/metadisp-lookup.c
new file mode 100644
index 00000000000..27d90c9f746
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-lookup.c
@@ -0,0 +1,90 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * Lookup, like stat, is a two-step process for grabbing the metadata details
+ * as well as the data details.
+ */
+
+int32_t
+metadisp_backend_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ METADISP_TRACE("backend_lookup_cbk");
+ if (op_errno == ENOENT) {
+ op_errno = ENODATA;
+ op_ret = -1;
+ }
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
+}
+
+int32_t
+metadisp_backend_lookup_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ METADISP_TRACE("backend_lookup_resume");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ STACK_WIND(frame, metadisp_backend_lookup_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->lookup, &backend_loc, xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(lookup, frame, -1, EINVAL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = NULL;
+ stub = cookie;
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!IA_ISREG(buf->ia_type)) {
+ goto unwind;
+ } else if (!stub) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ METADISP_TRACE("resuming stub");
+
+ // memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t));
+ call_resume(stub);
+ return 0;
+unwind:
+ METADISP_TRACE("unwinding %d %d", op_ret, op_errno);
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ return 0;
+}
+
+int32_t
+metadisp_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ METADISP_TRACE("lookup");
+ call_stub_t *stub = NULL;
+ stub = fop_lookup_stub(frame, metadisp_backend_lookup_resume, loc, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_lookup_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-open.c b/xlators/features/metadisp/src/metadisp-open.c
new file mode 100644
index 00000000000..64814afe636
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-open.c
@@ -0,0 +1,70 @@
+#include <glusterfs/call-stub.h>
+#include "metadisp.h"
+
+int32_t
+metadisp_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ METADISP_TRACE("got open results %d %d", op_ret, op_errno);
+
+ call_stub_t *stub = NULL;
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!stub) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_open_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND_COOKIE(frame, metadisp_open_cbk, NULL, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ loc_t backend_loc = {
+ 0,
+ };
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ stub = fop_open_stub(frame, metadisp_open_resume, &backend_loc, flags, fd,
+ xdata);
+ STACK_WIND_COOKIE(frame, metadisp_open_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(open, frame, -1, EINVAL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-readdir.c b/xlators/features/metadisp/src/metadisp-readdir.c
new file mode 100644
index 00000000000..5f840b1e88f
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-readdir.c
@@ -0,0 +1,65 @@
+#include "metadisp.h"
+
+/**
+ * With a change to the posix xlator, readdir and readdirp are shockingly
+ * simple.
+ *
+ * The issue with separating the backend data of the files
+ * with the metadata is that readdirs must now read from multiple sources
+ * to coalesce the directory entries.
+ *
+ * The way we do this is to tell the METADATA_CHILD that when it's
+ * running readdirp, each file entry should have a stat wound to
+ * 'stat-source-of-truth'.
+ *
+ * see metadisp_stat for how it handles winds _from_posix.
+ */
+
+int32_t
+metadisp_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+ /*
+ * Always use readdirp, even if the original was readdir. Why? Because NFS.
+ * There are multiple translations between Gluster, UNIX, and NFS stat
+ * structures in that path. One of them uses the type etc. from the stat
+ * structure, which is only filled in by readdirp. If we use readdir, the
+ * entries do actually go all the way back to the client and are visible in
+ * getdents, but then the readdir throws them away because of the
+ * uninitialized type.
+ */
+ GF_UNUSED int32_t ret;
+ if (!xdata) {
+ xdata = dict_new();
+ }
+
+ // ret = dict_set_int32 (xdata, "list-xattr", 1);
+
+ // I'm my own source of truth!
+ ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this);
+
+ STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+
+ return 0;
+}
+
+int32_t
+metadisp_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ METADISP_TRACE(".");
+ if (!xdata) {
+ xdata = dict_new();
+ }
+ GF_UNUSED int32_t ret;
+ // ret = dict_set_int32 (xdata, "list-xattr", 1);
+
+ // I'm my own source of truth!
+ ret = dict_set_static_ptr(xdata, "stat-source-of-truth", (void *)this);
+
+ STACK_WIND(frame, default_readdirp_cbk, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-setattr.c b/xlators/features/metadisp/src/metadisp-setattr.c
new file mode 100644
index 00000000000..6991cf644f3
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-setattr.c
@@ -0,0 +1,90 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+int32_t
+metadisp_backend_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
+
+{
+ METADISP_TRACE("backend_setattr_cbk");
+ if (op_errno == ENOENT) {
+ op_errno = ENODATA;
+ op_ret = -1;
+ }
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_backend_setattr_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid,
+ dict_t *xdata)
+
+{
+ METADISP_TRACE("backend_setattr_resume");
+ loc_t backend_loc = {
+ 0,
+ };
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ STACK_WIND(frame, metadisp_backend_setattr_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->setattr, &backend_loc, stbuf, valid,
+ xdata);
+ return 0;
+
+unwind:
+ STACK_UNWIND_STRICT(setattr, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
+{
+ METADISP_TRACE("%d %d", op_ret, op_errno);
+ call_stub_t *stub = NULL;
+ stub = cookie;
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (!IA_ISREG(statpost->ia_type)) {
+ goto unwind;
+ } else if (!stub) {
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ METADISP_TRACE("resuming stub");
+ call_resume(stub);
+ return 0;
+unwind:
+ METADISP_TRACE("unwinding %d %d", op_ret, op_errno);
+ STACK_UNWIND_STRICT(setattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ return 0;
+}
+
+int32_t
+metadisp_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ METADISP_TRACE("setattr");
+ call_stub_t *stub = NULL;
+ stub = fop_setattr_stub(frame, metadisp_backend_setattr_resume, loc, stbuf,
+ valid, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_setattr_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->setattr, loc, stbuf, valid,
+ xdata);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-stat.c b/xlators/features/metadisp/src/metadisp-stat.c
new file mode 100644
index 00000000000..b06d0dbcddd
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-stat.c
@@ -0,0 +1,124 @@
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * The stat flow in METADISP is complicated because we must
+ * do ensure a few things:
+ * 1. stat, on the path within the metadata layer,
+ * MUST get the backend FD of the data layer.
+ * --- we wind to the metadata layer, then the data layer.
+ *
+ * 2. the metadata layer MUST be able to ask the data
+ * layer for stat information.
+ * --- this is 'syncop-internal-from-posix'
+ *
+ * 3. when the metadata exists BUT the data is missing,
+ * we MUST mark the backend file as bad and heal it.
+ */
+
+int32_t
+metadisp_stat_backend_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ METADISP_TRACE("got backend stat results %d %d", op_ret, op_errno);
+ if (op_errno == ENOENT) {
+ STACK_UNWIND_STRICT(open, frame, -1, ENODATA, NULL, NULL);
+ return 0;
+ }
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ METADISP_TRACE("winding stat to path %s", loc->path);
+ if (gf_uuid_is_null(loc->gfid)) {
+ METADISP_TRACE("bad object, sending EUCLEAN");
+ STACK_UNWIND_STRICT(open, frame, -1, EUCLEAN, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND(frame, metadisp_stat_backend_cbk, SECOND_CHILD(this),
+ SECOND_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+
+ METADISP_TRACE("got stat results %d %d", op_ret, op_errno);
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ // only use the stub for the files
+ if (!IA_ISREG(buf->ia_type)) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ int32_t ret = 0;
+ loc_t backend_loc = {
+ 0,
+ };
+ METADISP_FILTER_ROOT(stat, loc, xdata);
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ if (dict_get_int32(xdata, "syncop-internal-from-posix", &ret) == 0) {
+ // if we've just been sent a stat from posix, then we know
+ // that we must send down a stat for a file to the second child.
+ //
+ // that means we can skip the stat for the first child and just
+ // send to the data disk.
+ METADISP_TRACE("got syncop-internal-from-posix");
+ STACK_WIND(frame, default_stat_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->stat, &backend_loc, xdata);
+ return 0;
+ }
+
+ // we do not know if the request is for a file, folder, etc. wind
+ // to first child to find out.
+ stub = fop_stat_stub(frame, metadisp_stat_resume, &backend_loc, xdata);
+ METADISP_TRACE("winding stat to first child %s", loc->path);
+ STACK_WIND_COOKIE(frame, metadisp_stat_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(stat, frame, -1, EINVAL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp-unlink.c b/xlators/features/metadisp/src/metadisp-unlink.c
new file mode 100644
index 00000000000..1f6a8eb35ce
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp-unlink.c
@@ -0,0 +1,160 @@
+
+#include "metadisp.h"
+#include <glusterfs/call-stub.h>
+
+/**
+ * The unlink flow in metadisp is complicated because we must
+ * do ensure that UNLINK causes both the metadata objects
+ * to get removed and the data objects to get removed.
+ */
+
+int32_t
+metadisp_unlink_resume(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int xflag, dict_t *xdata)
+{
+ METADISP_TRACE("winding backend unlink to path %s", loc->path);
+ STACK_WIND(frame, default_unlink_cbk, DATA_CHILD(this),
+ DATA_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
+
+int32_t
+metadisp_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ METADISP_TRACE(". %d %d", op_ret, op_errno);
+
+ int ret = 0;
+ call_stub_t *stub = NULL;
+ int nlink = 0;
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ ret = dict_get_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, &nlink);
+ if (ret != 0) {
+ op_errno = EINVAL;
+ op_ret = -1;
+ goto unwind;
+ }
+ METADISP_TRACE("frontend hardlink count %d %d", ret, nlink);
+ if (nlink > 1) {
+ goto unwind;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
+
+int32_t
+metadisp_unlink_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ call_stub_t *stub = NULL;
+
+ if (cookie) {
+ stub = cookie;
+ }
+
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ // fail fast on empty gfid so we don't loop forever
+ if (gf_uuid_is_null(buf->ia_gfid)) {
+ op_ret = -1;
+ op_errno = ENODATA;
+ goto unwind;
+ }
+
+ // fill gfid since the stub is incomplete
+ memcpy(stub->args.loc.gfid, buf->ia_gfid, sizeof(uuid_t));
+ memcpy(stub->args.loc.pargfid, postparent->ia_gfid, sizeof(uuid_t));
+
+ if (stub->poison) {
+ call_stub_destroy(stub);
+ stub = NULL;
+ return 0;
+ }
+
+ call_resume(stub);
+ return 0;
+
+unwind:
+ if (stub) {
+ call_stub_destroy(stub);
+ }
+ STACK_UNWIND_STRICT(unlink, frame, op_ret, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+metadisp_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ loc_t backend_loc = {
+ 0,
+ };
+
+ if (gf_uuid_is_null(loc->gfid)) {
+ METADISP_TRACE("winding lookup for unlink to path %s", loc->path);
+
+ // loop back to ourselves after a lookup
+ stub = fop_unlink_stub(frame, metadisp_unlink, loc, xflag, xdata);
+ STACK_WIND_COOKIE(frame, metadisp_unlink_lookup_cbk, stub,
+ METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+ }
+
+ if (build_backend_loc(loc->gfid, loc, &backend_loc)) {
+ goto unwind;
+ }
+
+ //
+ // ensure we get the link count on the unlink response, so we can
+ // account for hardlinks before winding to the backend.
+ // NOTE:
+ // multiple xlators use GF_REQUEST_LINK_COUNT_XDATA. confirmation
+ // is needed to ensure that multiple requests will work in the same
+ // xlator stack.
+ //
+ if (!xdata) {
+ xdata = dict_new();
+ }
+ dict_set_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, 1);
+
+ METADISP_TRACE("winding frontend unlink to path %s", loc->path);
+ stub = fop_unlink_stub(frame, metadisp_unlink_resume, &backend_loc, xflag,
+ xdata);
+
+ STACK_WIND_COOKIE(frame, metadisp_unlink_cbk, stub, METADATA_CHILD(this),
+ METADATA_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+unwind:
+ STACK_UNWIND_STRICT(unlink, frame, -1, EINVAL, NULL, NULL, NULL);
+ return 0;
+}
diff --git a/xlators/features/metadisp/src/metadisp.c b/xlators/features/metadisp/src/metadisp.c
new file mode 100644
index 00000000000..3c8f150cebc
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp.c
@@ -0,0 +1,46 @@
+#include <glusterfs/call-stub.h>
+
+#include "metadisp.h"
+#include "metadisp-fops.h"
+
+int32_t
+init(xlator_t *this)
+{
+ if (!this->children) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "not configured with children. exiting");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
+ }
+
+ return 0;
+}
+
+void
+fini(xlator_t *this)
+{
+ return;
+}
+
+/* defined in fops.c */
+struct xlator_fops fops;
+
+struct xlator_cbks cbks = {};
+
+struct volume_options options[] = {
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .op_version = {1},
+ .identifier = "metadisp",
+ .category = GF_EXPERIMENTAL,
+};
diff --git a/xlators/features/metadisp/src/metadisp.h b/xlators/features/metadisp/src/metadisp.h
new file mode 100644
index 00000000000..c8fd7a13c04
--- /dev/null
+++ b/xlators/features/metadisp/src/metadisp.h
@@ -0,0 +1,45 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef GF_METADISP_H_
+#define GF_METADISP_H_
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#define METADATA_CHILD(_this) FIRST_CHILD(_this)
+#define DATA_CHILD(_this) SECOND_CHILD(_this)
+
+int32_t
+build_backend_loc(uuid_t gfid, loc_t *src_loc, loc_t *dst_loc);
+
+#define METADISP_TRACE(_args...) gf_log("metadisp", GF_LOG_INFO, _args)
+
+#define METADISP_FILTER_ROOT(_op, _args...) \
+ if (strcmp(loc->path, "/") == 0) { \
+ STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \
+ METADATA_CHILD(this)->fops->_op, _args); \
+ return 0; \
+ }
+
+#define METADISP_FILTER_ROOT_BY_GFID(_op, _gfid, _args...) \
+ if (__is_root_gfid(_gfid)) { \
+ STACK_WIND(frame, default_##_op##_cbk, METADATA_CHILD(this), \
+ METADATA_CHILD(this)->fops->_op, _args); \
+ return 0; \
+ }
+
+#define RESOLVE_GFID_REQ(_dict, _dest, _lbl) \
+ VALIDATE_OR_GOTO(dict_get_ptr(_dict, "gfid-req", (void **)&_dest) == 0, \
+ _lbl)
+
+#endif /* __TEMPLATE_H__ */
diff --git a/xlators/features/namespace/Makefile.am b/xlators/features/namespace/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/namespace/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/namespace/src/Makefile.am b/xlators/features/namespace/src/Makefile.am
new file mode 100644
index 00000000000..e355d42cf4e
--- /dev/null
+++ b/xlators/features/namespace/src/Makefile.am
@@ -0,0 +1,17 @@
+xlator_LTLIBRARIES = namespace.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+namespace_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+namespace_la_SOURCES = namespace.c
+namespace_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = namespace.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/xlators/lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/namespace/src/namespace.c b/xlators/features/namespace/src/namespace.c
new file mode 100644
index 00000000000..86c5ebee900
--- /dev/null
+++ b/xlators/features/namespace/src/namespace.c
@@ -0,0 +1,1344 @@
+/*
+ * Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ *
+ * xlators/features/namespace:
+ * This translator tags each request with a namespace hash,
+ * which then can be used in later translators to track and
+ * throttle fops per namespace.
+ */
+
+#include <sys/types.h>
+
+#include <glusterfs/defaults.h>
+#include <glusterfs/hashfn.h>
+#include <glusterfs/logging.h>
+#include "namespace.h"
+
+/* Return codes for common path parsing functions. */
+enum _path_parse_result {
+ PATH_PARSE_RESULT_NO_PATH = 0,
+ PATH_PARSE_RESULT_FOUND = 1,
+ PATH_PARSE_RESULT_IS_GFID = 2,
+};
+
+typedef enum _path_parse_result path_parse_result_t;
+
+/* Clean up an ns_local struct. Wipe a loc (its inode is ref'd, so we're good.)
+ */
+static inline void
+ns_local_cleanup(ns_local_t *local)
+{
+ if (!local) {
+ return;
+ }
+
+ loc_wipe(&local->loc);
+ GF_FREE(local);
+}
+
+/* Create a new ns_local. We ref the inode, fake a new loc struct, and stash
+ * the stub given to us. */
+static inline ns_local_t *
+ns_local_new(call_stub_t *stub, inode_t *inode)
+{
+ ns_local_t *local = NULL;
+ loc_t loc = {
+ 0,
+ };
+
+ if (!stub || !inode) {
+ goto out;
+ }
+
+ local = GF_CALLOC(1, sizeof(ns_local_t), 0);
+ if (local == NULL) {
+ goto out;
+ }
+
+ /* Set up a fake loc_t struct to give to the getxattr call. */
+ gf_uuid_copy(loc.gfid, inode->gfid);
+ loc.inode = inode_ref(inode);
+
+ /* If for some reason inode_ref() fails, then just give up. */
+ if (!loc.inode) {
+ GF_FREE(local);
+ goto out;
+ }
+
+ local->stub = stub;
+ local->loc = loc;
+
+out:
+ return local;
+}
+
+/* Try parsing a path string. If the path string is a GFID, then return
+ * with PATH_PARSE_RESULT_IS_GFID. If we have no namespace (i.e. '/') then
+ * return PATH_PARSE_RESULT_NO_PATH and set the hash to 1. Otherwise, hash the
+ * namespace and store it in the info struct. */
+static path_parse_result_t
+parse_path(ns_info_t *info, const char *path)
+{
+ int len = 0;
+ const char *ns_begin = path;
+ const char *ns_end = NULL;
+
+ if (!path || strlen(path) == 0) {
+ return PATH_PARSE_RESULT_NO_PATH;
+ }
+
+ if (path[0] == '<') {
+ return PATH_PARSE_RESULT_IS_GFID;
+ }
+
+ /* Right now we only want the top-level directory, so
+ * skip the initial '/' and read until the next '/'. */
+ while (*ns_begin == '/') {
+ ns_begin++;
+ }
+
+ /* ns_end will point to the next '/' or NULL if there is no delimiting
+ * '/' (i.e. "/directory" or the top level "/") */
+ ns_end = strchr(ns_begin, '/');
+ len = ns_end ? (ns_end - ns_begin) : strlen(ns_begin);
+
+ if (len != 0) {
+ info->hash = SuperFastHash(ns_begin, len);
+ } else {
+ /* If our substring is empty, then we can hash '/' instead.
+ * '/' is used in the namespace config for the top-level
+ * namespace. */
+ info->hash = SuperFastHash("/", 1);
+ }
+
+ info->found = _gf_true;
+ return PATH_PARSE_RESULT_FOUND;
+}
+
+/* Cache namespace info stored in the stack (info) into the inode. */
+static int
+ns_inode_ctx_put(inode_t *inode, xlator_t *this, ns_info_t *info)
+{
+ ns_info_t *cached_ns_info = NULL;
+ uint64_t ns_as_64 = 0;
+ int ret = -1;
+
+ if (!inode || !this) {
+ gf_log(this ? this->name : "namespace", GF_LOG_WARNING,
+ "Need a valid inode and xlator to cache ns_info.");
+ ret = -1;
+ goto out;
+ }
+
+ cached_ns_info = GF_CALLOC(1, sizeof(ns_info_t), 0);
+
+ /* If we've run out of memory, then return ENOMEM. */
+ if (cached_ns_info == NULL) {
+ gf_log(this->name, GF_LOG_WARNING, "No memory to cache ns_info.");
+ ret = -(ENOMEM);
+ goto out;
+ }
+
+ *cached_ns_info = *info;
+ ns_as_64 = (uint64_t)(uintptr_t)cached_ns_info;
+
+ ret = inode_ctx_put(inode, this, ns_as_64);
+
+ if (ret) {
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && cached_ns_info) {
+ GF_FREE(cached_ns_info);
+ }
+
+ return ret;
+}
+
+/* Retrieve namespace info cached in the inode into the stack for use in later
+ * translators. */
+static int
+ns_inode_ctx_get(inode_t *inode, xlator_t *this, ns_info_t *info)
+{
+ ns_info_t *cached_ns_info = NULL;
+ uint64_t ns_as_64 = 0;
+ int ret = -1;
+
+ if (!inode) {
+ ret = -ENOENT;
+ goto out;
+ }
+
+ ret = inode_ctx_get(inode, this, &ns_as_64);
+
+ if (!ret) {
+ cached_ns_info = (ns_info_t *)(uintptr_t)ns_as_64;
+ *info = *cached_ns_info;
+ }
+
+out:
+ return ret;
+}
+
+/* This callback is the top of the unwind path of our attempt to get the path
+ * manually from the posix translator. We'll try to parse the path returned
+ * if it exists, then cache the hash if possible. Then just return to the
+ * default stub that we provide in the local, since there's nothing else to do
+ * once we've gotten the namespace hash. */
+int32_t
+get_path_resume_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ path_parse_result_t ret = PATH_PARSE_RESULT_NO_PATH;
+ call_frame_t *resume_frame = NULL;
+ ns_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ ns_info_t *info = NULL;
+ char *path = NULL;
+
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ local = frame->local;
+
+ GF_VALIDATE_OR_GOTO(this->name, local, out);
+ stub = local->stub;
+
+ GF_VALIDATE_OR_GOTO(this->name, stub, out);
+ /* Get the ns_info from the frame that we will eventually resume,
+ * not the frame that we're going to destroy (frame). */
+ resume_frame = stub->frame;
+
+ GF_VALIDATE_OR_GOTO(this->name, resume_frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, resume_frame->root, out);
+ info = &resume_frame->root->ns_info;
+
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+ /* If we get a value back for the GET_ANCESTRY_PATH_KEY, then we
+ * try to access it and parse it like a path. */
+ if (!op_ret && !dict_get_str(dict, GET_ANCESTRY_PATH_KEY, &path)) {
+ gf_log(this->name, GF_LOG_DEBUG, "G>P %s retrieved path %s",
+ uuid_utoa(local->loc.gfid), path);
+ /* Now let's parse a path, finally. */
+ ret = parse_path(info, path);
+ }
+
+ if (ret == PATH_PARSE_RESULT_FOUND) {
+ /* If we finally found namespace, then stash it. */
+ ns_inode_ctx_put(local->loc.inode, this, info);
+
+ gf_log(this->name, GF_LOG_DEBUG, "G>P %s %10u namespace found %s",
+ uuid_utoa(local->loc.inode->gfid), info->hash, path);
+ } else if (ret == PATH_PARSE_RESULT_NO_PATH) {
+ gf_log(this->name, GF_LOG_WARNING, "G>P %s has no path",
+ uuid_utoa(local->loc.inode->gfid));
+ } else if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "G>P %s winding failed, still have gfid",
+ uuid_utoa(local->loc.inode->gfid));
+ }
+
+out:
+ /* Make sure to clean up local finally. */
+
+ if (frame) {
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+ }
+
+ if (local) {
+ ns_local_cleanup(local);
+ }
+
+ if (stub) {
+ call_resume(stub);
+ }
+
+ return 0;
+}
+
+/* This function tries first to set a namespace based on the information that
+ * it can retrieve from an `loc_t`. This includes first looking for a cached
+ * namespace in the inode, then trying to parse the path string in the `loc_t`
+ * struct. If this fails, then it will try to call inode_path. */
+static path_parse_result_t
+set_ns_from_loc(const char *fn, call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ path_parse_result_t ret = PATH_PARSE_RESULT_NO_PATH;
+ ns_private_t *priv = (ns_private_t *)this->private;
+ ns_info_t *info = &frame->root->ns_info;
+ char *path = NULL;
+
+ info->hash = 0;
+ info->found = _gf_false;
+
+ if (!priv->tag_namespaces) {
+ return ret;
+ }
+
+ /* This is our first pass at trying to get a path. Try getting
+ * from the inode context, then from the loc's path itself. */
+ if (!loc || !loc->path || !loc->inode) {
+ ret = PATH_PARSE_RESULT_NO_PATH;
+ } else if (!ns_inode_ctx_get(loc->inode, this, info)) {
+ ret = PATH_PARSE_RESULT_FOUND;
+ } else {
+ ret = parse_path(info, loc->path);
+ gf_log(this->name, GF_LOG_DEBUG, "%s: LOC retrieved path %s", fn,
+ loc->path);
+
+ if (ret == PATH_PARSE_RESULT_FOUND) {
+ ns_inode_ctx_put(loc->inode, this, info);
+ }
+ }
+
+ /* Keep trying by calling inode_path next, making sure to copy
+ the loc's gfid into its inode if necessary. */
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ if (gf_uuid_is_null(loc->inode->gfid)) {
+ gf_uuid_copy(loc->inode->gfid, loc->gfid);
+ }
+
+ if (inode_path(loc->inode, NULL, &path) >= 0 && path) {
+ ret = parse_path(info, loc->path);
+ gf_log(this->name, GF_LOG_DEBUG, "%s: LOC retrieved path %s", fn,
+ path);
+
+ if (ret == PATH_PARSE_RESULT_FOUND) {
+ ns_inode_ctx_put(loc->inode, this, info);
+ }
+ }
+
+ if (path) {
+ GF_FREE(path);
+ }
+ }
+
+ /* Report our status, and if we have a GFID, we'll eventually try a
+ * GET_ANCESTRY_PATH_KEY wind when we return from this function. */
+ if (ret == PATH_PARSE_RESULT_FOUND) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "%s: LOC %s %10u namespace found for %s", fn,
+ uuid_utoa(loc->inode->gfid), info->hash, loc->path);
+ } else if (ret == PATH_PARSE_RESULT_NO_PATH) {
+ gf_log(this->name, GF_LOG_WARNING, "%s: LOC has no path", fn);
+ } else if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ /* Make sure to copy the inode's gfid for the eventual wind. */
+ if (gf_uuid_is_null(loc->inode->gfid)) {
+ gf_uuid_copy(loc->inode->gfid, loc->gfid);
+ }
+
+ gf_log(this->name, GF_LOG_DEBUG, "%s: LOC %s winding, looking for path",
+ fn, uuid_utoa(loc->inode->gfid));
+ }
+
+ return ret;
+}
+
+/* This function tries first to set a namespace based on the information that
+ * it can retrieve from an `fd_t`. This includes first looking for a cached
+ * namespace in the inode, then trying to call inode_path manually. */
+static path_parse_result_t
+set_ns_from_fd(const char *fn, call_frame_t *frame, xlator_t *this, fd_t *fd)
+{
+ path_parse_result_t ret = PATH_PARSE_RESULT_NO_PATH;
+ ns_private_t *priv = (ns_private_t *)this->private;
+ ns_info_t *info = &frame->root->ns_info;
+ char *path = NULL;
+
+ info->hash = 0;
+ info->found = _gf_false;
+
+ if (!priv->tag_namespaces) {
+ return ret;
+ }
+
+ /* This is our first pass at trying to get a path. Try getting
+ * from the inode context, then inode_path. */
+ if (!fd || !fd->inode) {
+ ret = PATH_PARSE_RESULT_NO_PATH;
+ } else if (!ns_inode_ctx_get(fd->inode, this, info)) {
+ ret = PATH_PARSE_RESULT_FOUND;
+ } else if (inode_path(fd->inode, NULL, &path) >= 0 && path) {
+ ret = parse_path(info, path);
+ gf_log(this->name, GF_LOG_DEBUG, "%s: FD retrieved path %s", fn, path);
+
+ if (ret == PATH_PARSE_RESULT_FOUND) {
+ ns_inode_ctx_put(fd->inode, this, info);
+ }
+ }
+
+ if (path) {
+ GF_FREE(path);
+ }
+
+ /* Report our status, and if we have a GFID, we'll eventually try a
+ * GET_ANCESTRY_PATH_KEY wind when we return from this function. */
+ if (ret == PATH_PARSE_RESULT_FOUND) {
+ gf_log(this->name, GF_LOG_DEBUG, "%s: FD %s %10u namespace found", fn,
+ uuid_utoa(fd->inode->gfid), info->hash);
+ } else if (ret == PATH_PARSE_RESULT_NO_PATH) {
+ gf_log(this->name, GF_LOG_WARNING, "%s: FD has no path", fn);
+ } else if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ gf_log(this->name, GF_LOG_DEBUG, "%s: FD %s winding, looking for path",
+ fn, uuid_utoa(fd->inode->gfid));
+ }
+
+ return ret;
+}
+
+/* This macro does the work of winding down a call of `getxattr` in the case
+ * that we have to retrieve the path manually. It assumes that there is a label
+ * called `wind` and the existence of several basic variables (frame, this),
+ * but otherwise is general enough for any fop (fd- or loc-based.) */
+#define GET_ANCESTRY_PATH_WIND(fop, inode, args...) \
+ do { \
+ ns_info_t *info = &frame->root->ns_info; \
+ call_frame_t *new_frame = NULL; \
+ ns_local_t *local = NULL; \
+ call_stub_t *stub = NULL; \
+ \
+ gf_log(this->name, GF_LOG_DEBUG, " %s winding, looking for path", \
+ uuid_utoa(inode->gfid)); \
+ \
+ new_frame = create_frame(this, this->ctx->pool); \
+ if (!new_frame) { \
+ gf_log(this->name, GF_LOG_ERROR, \
+ "Cannot allocate new call frame."); \
+ goto wind; \
+ } \
+ \
+ stub = fop_##fop##_stub(frame, default_##fop, args); \
+ if (!stub) { \
+ gf_log(this->name, GF_LOG_ERROR, \
+ "Cannot allocate function stub."); \
+ goto wind; \
+ } \
+ \
+ new_frame->root->uid = 0; \
+ new_frame->root->gid = 0; \
+ /* Put a phony "not found" NS info into this call. */ \
+ new_frame->root->ns_info = *info; \
+ \
+ local = ns_local_new(stub, inode); \
+ if (!local) { \
+ gf_log(this->name, GF_LOG_ERROR, \
+ "Cannot allocate function local."); \
+ goto wind; \
+ } \
+ \
+ new_frame->local = local; \
+ /* After allocating a new frame, a call stub (to \
+ * resume our current fop), and a local variables \
+ * struct (for our loc to getxattr and our resume \
+ * stub), call getxattr and unwind to get_path_resume_cbk. \
+ */ \
+ STACK_WIND(new_frame, get_path_resume_cbk, FIRST_CHILD(this), \
+ FIRST_CHILD(this)->fops->getxattr, &local->loc, \
+ GET_ANCESTRY_PATH_KEY, NULL); \
+ } while (0)
+
+int32_t
+ns_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(rmdir, loc->inode, loc, xflags, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, xflags, xdata);
+ return 0;
+}
+
+int32_t
+ns_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(unlink, loc->inode, loc, xflags, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflags, xdata);
+ return 0;
+}
+
+int32_t
+ns_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this,
+ newloc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(rename, newloc->inode, oldloc, newloc, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ return 0;
+}
+
+int32_t
+ns_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this,
+ newloc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(link, newloc->inode, oldloc, newloc, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
+}
+
+int32_t
+ns_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(mkdir, loc->inode, loc, mode, umask, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ return 0;
+}
+
+int32_t
+ns_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(symlink, loc->inode, linkname, loc, umask,
+ xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata);
+ return 0;
+}
+
+int32_t
+ns_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t dev, mode_t umask, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(mknod, loc->inode, loc, mode, dev, umask, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, dev, umask, xdata);
+ return 0;
+}
+
+int32_t
+ns_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(create, loc->inode, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+}
+
+int32_t
+ns_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(fsetattr, fd->inode, fd, stbuf, valid, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
+}
+
+int32_t
+ns_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(setattr, loc->inode, loc, stbuf, valid, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
+}
+
+int32_t
+ns_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(fremovexattr, fd->inode, fd, name, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ return 0;
+}
+
+int32_t
+ns_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(removexattr, loc->inode, loc, name, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
+}
+
+int32_t
+ns_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(setxattr, loc->inode, loc, dict, flags, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+ns_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(fsetxattr, fd->inode, fd, dict, flags, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
+}
+
+int32_t
+ns_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(truncate, loc->inode, loc, offset, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+}
+
+int32_t
+ns_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(ftruncate, fd->inode, fd, offset, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+}
+
+int32_t
+ns_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(writev, fd->inode, fd, vector, count, offset,
+ flags, iobref, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, offset,
+ flags, iobref, xdata);
+ return 0;
+}
+
+int32_t
+ns_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(lookup, loc->inode, loc, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+}
+
+int32_t
+ns_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(stat, loc->inode, loc, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+}
+
+int32_t
+ns_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(fstat, fd->inode, fd, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+}
+
+int32_t
+ns_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(readlink, loc->inode, loc, size, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_readlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
+ return 0;
+}
+
+int32_t
+ns_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(access, loc->inode, loc, mask, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_access_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access, loc, mask, xdata);
+ return 0;
+}
+
+int32_t
+ns_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(open, fd->inode, loc, flags, fd, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+int32_t
+ns_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(readv, fd->inode, fd, size, offset, flags,
+ xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+ return 0;
+}
+
+int32_t
+ns_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(flush, fd->inode, fd, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+}
+
+int32_t
+ns_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(fsync, fd->inode, fd, datasync, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
+}
+
+int32_t
+ns_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(opendir, loc->inode, loc, fd, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+}
+
+int32_t
+ns_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(fsyncdir, fd->inode, fd, datasync, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_fsyncdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir, fd, datasync, xdata);
+ return 0;
+}
+
+int32_t
+ns_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(rchecksum, fd->inode, fd, offset, len, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_rchecksum_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rchecksum, fd, offset, len, xdata);
+ return 0;
+}
+
+int32_t
+ns_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(statfs, loc->inode, loc, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+}
+
+int32_t
+ns_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(inodelk, loc->inode, volume, loc, cmd, flock,
+ xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_inodelk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk, volume, loc, cmd, flock,
+ xdata);
+ return 0;
+}
+
+int32_t
+ns_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *flock, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(finodelk, fd->inode, volume, fd, cmd, flock,
+ xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_finodelk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk, volume, fd, cmd, flock,
+ xdata);
+ return 0;
+}
+
+int32_t
+ns_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(entrylk, loc->inode, volume, loc, basename, cmd,
+ type, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, volume, loc, basename, cmd,
+ type, xdata);
+ return 0;
+}
+
+int32_t
+ns_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(fentrylk, fd->inode, volume, fd, basename, cmd,
+ type, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_fentrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fentrylk, volume, fd, basename, cmd,
+ type, xdata);
+ return 0;
+}
+
+int32_t
+ns_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(fgetxattr, fd->inode, fd, name, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
+}
+
+int32_t
+ns_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(getxattr, loc->inode, loc, name, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+}
+
+int32_t
+ns_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(lk, fd->inode, fd, cmd, flock, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_lk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk, fd, cmd, flock, xdata);
+ return 0;
+}
+
+int32_t
+ns_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(readdir, fd->inode, fd, size, offset, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
+
+ return 0;
+}
+
+int32_t
+ns_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(readdirp, fd->inode, fd, size, offset, dict);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict);
+ return 0;
+}
+
+int32_t
+ns_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_loc(__FUNCTION__, frame, this, loc);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(xattrop, loc->inode, loc, flags, dict, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata);
+
+ return 0;
+}
+
+int32_t
+ns_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(fxattrop, fd->inode, fd, flags, dict, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_fxattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata);
+
+ return 0;
+}
+
+int32_t
+ns_getspec(call_frame_t *frame, xlator_t *this, const char *key, int32_t flag)
+{
+ STACK_WIND(frame, default_getspec_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getspec, key, flag);
+ return 0;
+}
+
+int32_t
+ns_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(fallocate, fd->inode, fd, keep_size, offset, len,
+ xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, keep_size, offset, len,
+ xdata);
+ return 0;
+}
+
+int32_t
+ns_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(discard, fd->inode, fd, offset, len, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+ return 0;
+}
+
+int32_t
+ns_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ path_parse_result_t ret = set_ns_from_fd(__FUNCTION__, frame, this, fd);
+
+ if (ret == PATH_PARSE_RESULT_IS_GFID) {
+ GET_ANCESTRY_PATH_WIND(zerofill, fd->inode, fd, offset, len, xdata);
+ return 0;
+ }
+wind:
+ STACK_WIND(frame, default_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+ return 0;
+}
+
+int
+ns_forget(xlator_t *this, inode_t *inode)
+{
+ uint64_t ns_as_64 = 0;
+ ns_info_t *info = NULL;
+
+ inode_ctx_del(inode, this, &ns_as_64);
+
+ if (!ns_as_64) {
+ return 0;
+ }
+
+ info = (ns_info_t *)(uintptr_t)ns_as_64;
+ GF_FREE(info);
+
+ return 0;
+}
+
+int32_t
+init(xlator_t *this)
+{
+ int32_t ret = -1;
+ ns_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO(GF_NAMESPACE, this, out);
+
+ if (!this->children || this->children->next) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "translator needs a single subvolume.");
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "dangling volume. please check volfile.");
+ goto out;
+ }
+
+ priv = GF_CALLOC(1, sizeof(ns_private_t), 0);
+
+ if (!priv) {
+ gf_log(this->name, GF_LOG_ERROR, "Can't allocate ns_priv structure.");
+ goto out;
+ }
+
+ GF_OPTION_INIT("tag-namespaces", priv->tag_namespaces, bool, out);
+
+ gf_log(this->name, GF_LOG_INFO, "Namespace xlator loaded");
+ this->private = priv;
+ ret = 0;
+
+out:
+ if (ret) {
+ GF_FREE(priv);
+ }
+
+ return ret;
+}
+
+void
+fini(xlator_t *this)
+{
+ GF_FREE(this->private);
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ int ret = -1;
+ ns_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, options, out);
+
+ priv = (ns_private_t *)this->private;
+
+ GF_OPTION_RECONF("tag-namespaces", priv->tag_namespaces, options, bool,
+ out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+struct xlator_fops fops = {
+ .lookup = ns_lookup,
+ .stat = ns_stat,
+ .fstat = ns_fstat,
+ .truncate = ns_truncate,
+ .ftruncate = ns_ftruncate,
+ .access = ns_access,
+ .readlink = ns_readlink,
+ .mknod = ns_mknod,
+ .mkdir = ns_mkdir,
+ .unlink = ns_unlink,
+ .rmdir = ns_rmdir,
+ .symlink = ns_symlink,
+ .rename = ns_rename,
+ .link = ns_link,
+ .create = ns_create,
+ .open = ns_open,
+ .readv = ns_readv,
+ .writev = ns_writev,
+ .flush = ns_flush,
+ .fsync = ns_fsync,
+ .opendir = ns_opendir,
+ .readdir = ns_readdir,
+ .readdirp = ns_readdirp,
+ .fsyncdir = ns_fsyncdir,
+ .statfs = ns_statfs,
+ .setxattr = ns_setxattr,
+ .getxattr = ns_getxattr,
+ .fsetxattr = ns_fsetxattr,
+ .fgetxattr = ns_fgetxattr,
+ .removexattr = ns_removexattr,
+ .fremovexattr = ns_fremovexattr,
+ .lk = ns_lk,
+ .inodelk = ns_inodelk,
+ .finodelk = ns_finodelk,
+ .entrylk = ns_entrylk,
+ .fentrylk = ns_fentrylk,
+ .rchecksum = ns_rchecksum,
+ .xattrop = ns_xattrop,
+ .fxattrop = ns_fxattrop,
+ .setattr = ns_setattr,
+ .fsetattr = ns_fsetattr,
+ .getspec = ns_getspec,
+ .fallocate = ns_fallocate,
+ .discard = ns_discard,
+ .zerofill = ns_zerofill,
+};
+
+struct xlator_cbks cbks = {
+ .forget = ns_forget,
+};
+
+struct xlator_dumpops dumpops;
+
+struct volume_options options[] = {
+ {
+ .key = {"tag-namespaces"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "This option enables this translator's functionality "
+ "that tags every fop with a namespace hash for later "
+ "throttling, stats collection, logging, etc.",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .tags = {"namespace"},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ },
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .op_version = {GD_OP_VERSION_3_12_0},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "namespace",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/namespace/src/namespace.h b/xlators/features/namespace/src/namespace.h
new file mode 100644
index 00000000000..3a9b84d6426
--- /dev/null
+++ b/xlators/features/namespace/src/namespace.h
@@ -0,0 +1,23 @@
+#ifndef __NAMESPACE_H__
+#define __NAMESPACE_H__
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/call-stub.h>
+
+#define GF_NAMESPACE "namespace"
+
+typedef struct {
+ gf_boolean_t tag_namespaces;
+} ns_private_t;
+
+typedef struct {
+ loc_t loc; /* We store a "fake" loc_t for the getxattr wind. */
+ call_stub_t *stub; /* A stub back to the function we're resuming. */
+} ns_local_t;
+
+#endif /* __NAMESPACE_H__ */
diff --git a/xlators/features/path-convertor/Makefile.am b/xlators/features/path-convertor/Makefile.am
deleted file mode 100644
index d471a3f9243..00000000000
--- a/xlators/features/path-convertor/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = src
-
-CLEANFILES =
diff --git a/xlators/features/path-convertor/src/Makefile.am b/xlators/features/path-convertor/src/Makefile.am
deleted file mode 100644
index 58cfed0f983..00000000000
--- a/xlators/features/path-convertor/src/Makefile.am
+++ /dev/null
@@ -1,14 +0,0 @@
-
-xlator_LTLIBRARIES = path-converter.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features
-
-path_converter_la_LDFLAGS = -module -avoidversion
-
-path_converter_la_SOURCES = path.c
-path_converter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
-
-CLEANFILES =
-
diff --git a/xlators/features/path-convertor/src/path-mem-types.h b/xlators/features/path-convertor/src/path-mem-types.h
deleted file mode 100644
index c071513b682..00000000000
--- a/xlators/features/path-convertor/src/path-mem-types.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __PATH_MEM_TYPES_H__
-#define __PATH_MEM_TYPES_H__
-
-#include "mem-types.h"
-
-enum gf_path_mem_types_ {
- gf_path_mt_path_private_t = gf_common_mt_end + 1,
- gf_path_mt_char,
- gf_path_mt_regex_t,
- gf_path_mt_end
-};
-#endif
-
diff --git a/xlators/features/path-convertor/src/path.c b/xlators/features/path-convertor/src/path.c
deleted file mode 100644
index f2423092e56..00000000000
--- a/xlators/features/path-convertor/src/path.c
+++ /dev/null
@@ -1,1239 +0,0 @@
-/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-/* TODO: add gf_log to all the cases returning errors */
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-/**
- * xlators/features/path-translator:
- * This translator converts the path it gets into user specified targets.
- */
-
-#include <sys/types.h>
-#include <regex.h>
-#include <time.h>
-#include <errno.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "path-mem-types.h"
-
-typedef struct path_private
-{
- int32_t this_len;
- int32_t start_off;
- int32_t end_off;
- char *this;
- char *that;
- char *path;
- regex_t *preg;
-} path_private_t;
-
-static char *
-name_this_to_that (xlator_t *xl, const char *path, const char *name)
-{
- path_private_t *priv = xl->private;
- char priv_path[PATH_MAX] = {0,};
- char *tmp_name = NULL;
- int32_t path_len = strlen (path);
- int32_t name_len = strlen (name) - ZR_FILE_CONTENT_STRLEN;
- int32_t total_len = path_len + name_len;
- int32_t i = 0, j = 0;
-
- if (path_len >= priv->end_off)
- return (char *)name;
-
- if (priv->end_off && (total_len > priv->end_off)) {
- j = priv->start_off;
- tmp_name = GF_CALLOC (1, (total_len +
- ZR_FILE_CONTENT_STRLEN),
- gf_path_mt_char);
- ERR_ABORT (tmp_name);
-
- /* Get the complete path for the file first */
- strcpy (tmp_name, path);
- strcat (tmp_name, name + ZR_FILE_CONTENT_STRLEN);
-
- strncpy (priv_path, tmp_name, priv->start_off);
- for (i = priv->start_off; i < priv->end_off; i++) {
- if (tmp_name[i] == '/')
- continue;
- priv_path[j++] = tmp_name[i];
- }
- memcpy ((priv_path + j),
- (tmp_name + priv->end_off),
- (total_len - priv->end_off));
- priv_path[(total_len - (priv->end_off - j))] = '\0';
-
- strcpy (tmp_name, ZR_FILE_CONTENT_STR);
- strcat (tmp_name, priv_path);
-
- return tmp_name;
- }
-
- return (char *)name;
-}
-
-/* This function should return
- * NULL -
- * converted path - if path match
- * same path - if it doesn't match
- */
-static char *
-path_this_to_that (xlator_t *xl, const char *path)
-{
- path_private_t *priv = xl->private;
- char *priv_path = NULL;
- int32_t path_len = strlen (path);
- int32_t i = 0, j = 0;
-
- if (priv->end_off && (path_len > priv->start_off)) {
- priv_path = GF_CALLOC (1, path_len, gf_path_mt_char);
- ERR_ABORT (priv_path);
-
- if (priv->start_off && (path_len > priv->start_off))
- memcpy (priv_path, path, priv->start_off);
- if (path_len > priv->end_off) {
- j = priv->start_off;
- for (i = priv->start_off; i < priv->end_off; i++) {
- if (path[i] == '/')
- continue;
- priv_path[j++] = path[i];
- }
- memcpy ((priv_path + j),
- (path + priv->end_off),
- (path_len - priv->end_off));
- priv_path[(path_len - (priv->end_off - j))] = '\0';
- }
- return priv_path;
- }
- return (char *)path;
-}
-
-int32_t
-path_create_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fd, inode, buf);
- return 0;
-}
-
-int32_t
-path_open_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
- return 0;
-}
-
-int32_t
-path_getdents_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count)
-{
- STACK_UNWIND (frame, op_ret, op_errno, entries, count);
- return 0;
-}
-
-int32_t
-path_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- gf_dirent_t *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-
-int32_t
-path_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- const char *buf,
- struct iatt *sbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf, sbuf);
- return 0;
-}
-
-int32_t
-path_lookup_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- dict_t *xattr,
- struct iatt *postparent)
-{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf, xattr);
- return 0;
-}
-
-
-int32_t
-path_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
-}
-
-int32_t
-path_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
-}
-
-
-int32_t
-path_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
-}
-
-int32_t
-path_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- inode_t *inode,
- struct iatt *buf,
- struct iatt *preparent,
- struct iatt *postparent)
-{
- STACK_UNWIND (frame, op_ret, op_errno, inode, buf);
- return 0;
-}
-
-int32_t
-path_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- fd_t *fd)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fd);
- return 0;
-}
-
-
-int32_t
-path_rename_buf_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf,
- struct iatt *preoldparent,
- struct iatt *postoldparent,
- struct iatt *prenewparent,
- struct iatt *postnewparent)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-
-
-int32_t
-path_common_buf_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-path_common_dict_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *dict)
-{
- STACK_UNWIND (frame, op_ret, op_errno, dict);
- return 0;
-}
-
-int32_t
-path_common_remove_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,struct iatt *preparent,
- struct iatt *postparent)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-path_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,struct iatt *prebuf,
- struct iatt *postbuf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
-}
-
-
-int32_t
-path_common_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-/* */
-int32_t
-path_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame, path_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-int32_t
-path_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_common_buf_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-int32_t
-path_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_readlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink,
- loc,
- size);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-int32_t
-path_mknod (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode,
- dev_t dev)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_mknod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod,
- loc,
- mode,
- dev);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-int32_t
-path_mkdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- mode_t mode)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_mkdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- loc,
- mode);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-int32_t
-path_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_common_remove_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-int32_t
-path_rmdir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_common_remove_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- loc);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-int32_t
-path_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *linkpath,
- loc_t *loc)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- linkpath,
- loc);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-int32_t
-path_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- char *oldloc_path = (char *)oldloc->path;
- char *tmp_oldloc_path = NULL;
-
- char *newloc_path = (char *)newloc->path;
- char *tmp_newloc_path = NULL;
-
- if (!(tmp_oldloc_path = path_this_to_that (this, oldloc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- oldloc->path = tmp_oldloc_path;
-
- if (!(tmp_newloc_path = path_this_to_that (this, newloc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- newloc->path = tmp_newloc_path;
-
- STACK_WIND (frame,
- path_rename_buf_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename,
- oldloc,
- newloc);
-
- oldloc->path = oldloc_path;
- if (tmp_oldloc_path != oldloc_path)
- GF_FREE (tmp_oldloc_path);
-
- newloc->path = newloc_path;
- if (tmp_newloc_path != newloc_path)
- GF_FREE (tmp_newloc_path);
-
- return 0;
-}
-
-int32_t
-path_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc)
-{
- char *oldloc_path = (char *)oldloc->path;
- char *tmp_oldloc_path = NULL;
-
- char *newloc_path = (char *)newloc->path;
- char *tmp_newloc_path = NULL;
-
- if (!(tmp_oldloc_path = path_this_to_that (this, oldloc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- oldloc->path = tmp_oldloc_path;
-
- if (!(tmp_newloc_path = path_this_to_that (this, newloc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- newloc->path = tmp_newloc_path;
-
- STACK_WIND (frame,
- path_link_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link,
- oldloc,
- newloc);
-
- oldloc->path = oldloc_path;
- if (tmp_oldloc_path != oldloc_path)
- GF_FREE (tmp_oldloc_path);
-
- newloc->path = newloc_path;
- if (tmp_newloc_path != newloc_path)
- GF_FREE (tmp_newloc_path);
-
- return 0;
-}
-
-int32_t
-path_setattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iatt *preop,
- struct iatt *postop)
-{
- STACK_UNWIND (frame, op_ret, op_errno, preop, postop);
- return 0;
-}
-
-int32_t
-path_setattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct iatt *stbuf,
- int32_t valid)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_setattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setattr,
- loc,
- stbuf, valid);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-
-int32_t
-path_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc,
- offset);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-
-int32_t
-path_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- fd_t *fd,
- int32_t wbflags)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_open_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc,
- flags,
- fd,
- wbflags);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-int32_t
-path_create (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags,
- mode_t mode,
- fd_t *fd)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc,
- flags,
- mode,
- fd);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-int32_t
-path_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags)
-{
- char *tmp_name = NULL;
- data_pair_t *trav = dict->members_list;
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- if (ZR_FILE_CONTENT_REQUEST(trav->key)) {
- tmp_name = name_this_to_that (this, loc->path, trav->key);
- if (tmp_name != trav->key) {
- trav->key = tmp_name;
- } else {
- tmp_name = NULL;
- }
- }
-
- STACK_WIND (frame,
- path_common_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- loc,
- dict,
- flags);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- if (tmp_name)
- GF_FREE (tmp_name);
-
- return 0;
-}
-
-int32_t
-path_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- char *tmp_name = (char *)name;
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- if (ZR_FILE_CONTENT_REQUEST(name)) {
- tmp_name = name_this_to_that (this, loc->path, name);
- }
-
- STACK_WIND (frame,
- path_common_dict_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- loc,
- tmp_name);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- if (tmp_name != name)
- GF_FREE (tmp_name);
-
- return 0;
-}
-
-int32_t
-path_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name)
-{
- char *tmp_name = (char *)name;
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- if (ZR_FILE_CONTENT_REQUEST(name)) {
- tmp_name = name_this_to_that (this, loc->path, name);
- }
-
- STACK_WIND (frame,
- path_common_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- loc,
- tmp_name);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- if (tmp_name != name)
- GF_FREE (tmp_name);
-
- return 0;
-}
-
-int32_t
-path_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- fd_t *fd)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_opendir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir,
- loc,
- fd);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-int32_t
-path_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_common_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access,
- loc,
- mask);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-int32_t
-path_checksum_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- uint8_t *fchecksum,
- uint8_t *dchecksum)
-{
- STACK_UNWIND (frame, op_ret, op_errno, fchecksum, dchecksum);
- return 0;
-}
-
-int32_t
-path_checksum (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flag)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_checksum_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->checksum,
- loc,
- flag);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-
-int32_t
-path_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame, path_common_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->entrylk,
- volume, loc, basename, cmd, type);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-int32_t
-path_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd, struct gf_flock *lock)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_common_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- volume, loc, cmd, lock);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-
-int32_t
-path_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict)
-{
- char *loc_path = (char *)loc->path;
- char *tmp_path = NULL;
-
- if (!(tmp_path = path_this_to_that (this, loc->path))) {
- STACK_UNWIND (frame, -1, ENOENT, NULL, NULL);
- return 0;
- }
- loc->path = tmp_path;
-
- STACK_WIND (frame,
- path_common_dict_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop,
- loc,
- flags,
- dict);
-
- loc->path = loc_path;
- if (tmp_path != loc_path)
- GF_FREE (tmp_path);
-
- return 0;
-}
-
-int32_t
-mem_acct_init (xlator_t *this)
-{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_path_mt_end + 1);
-
- if (ret != 0) {
- gf_log (this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
-}
-
-int32_t
-init (xlator_t *this)
-{
- dict_t *options = this->options;
- path_private_t *priv = NULL;
-
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "path translator requires exactly one subvolume");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- priv = GF_CALLOC (1, sizeof (*priv), gf_path_mt_path_private_t);
- ERR_ABORT (priv);
- if (dict_get (options, "start-offset")) {
- priv->start_off = data_to_int32 (dict_get (options,
- "start-offset"));
- }
- if (dict_get (options, "end-offset")) {
- priv->end_off = data_to_int32 (dict_get (options,
- "end-offset"));
- }
-
- if (dict_get (options, "regex")) {
- int32_t ret = 0;
- priv->preg = GF_CALLOC (1, sizeof (regex_t),
- gf_path_mt_regex_t);
- ERR_ABORT (priv->preg);
- ret = regcomp (priv->preg,
- data_to_str (dict_get (options, "regex")),
- REG_EXTENDED);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Failed to compile the 'option regex'");
- GF_FREE (priv);
- return -1;
- }
- if (dict_get (options, "replace-with")) {
- priv->that = data_to_str (dict_get (options,
- "replace-with"));
- } else {
- priv->that = "";
- }
- }
-
- this->private = priv;
- return 0;
-}
-
-void
-fini (xlator_t *this)
-{
- return;
-}
-
-struct xlator_fops fops = {
- .stat = path_stat,
- .readlink = path_readlink,
- .mknod = path_mknod,
- .mkdir = path_mkdir,
- .unlink = path_unlink,
- .rmdir = path_rmdir,
- .symlink = path_symlink,
- .rename = path_rename,
- .link = path_link,
- .truncate = path_truncate,
- .open = path_open,
- .setxattr = path_setxattr,
- .getxattr = path_getxattr,
- .removexattr = path_removexattr,
- .opendir = path_opendir,
- .access = path_access,
- .create = path_create,
- .lookup = path_lookup,
- .checksum = path_checksum,
- .xattrop = path_xattrop,
- .entrylk = path_entrylk,
- .inodelk = path_inodelk,
- .setattr = path_setattr,
-};
-
-struct xlator_cbks cbks = {
-};
-
-struct volume_options options[] = {
- { .key = {"start-offset"},
- .type = GF_OPTION_TYPE_INT,
- .min = 0,
- .max = 4095
- },
- { .key = {"end-offset"},
- .type = GF_OPTION_TYPE_INT,
- .min = 1,
- .max = 4096
- },
- { .key = {"replace-with"},
- .type = GF_OPTION_TYPE_ANY
- },
- { .key = {NULL} },
-};
diff --git a/xlators/features/quiesce/src/Makefile.am b/xlators/features/quiesce/src/Makefile.am
index e8ab4cb2417..74ea999c045 100644
--- a/xlators/features/quiesce/src/Makefile.am
+++ b/xlators/features/quiesce/src/Makefile.am
@@ -1,14 +1,16 @@
xlator_LTLIBRARIES = quiesce.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-quiesce_la_LDFLAGS = -module -avoidversion
+quiesce_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
quiesce_la_SOURCES = quiesce.c
quiesce_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-noinst_HEADERS = quiesce.h quiesce-mem-types.h
+noinst_HEADERS = quiesce.h quiesce-mem-types.h quiesce-messages.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/quiesce/src/quiesce-mem-types.h b/xlators/features/quiesce/src/quiesce-mem-types.h
index 00f7aa4f16b..416456b13af 100644
--- a/xlators/features/quiesce/src/quiesce-mem-types.h
+++ b/xlators/features/quiesce/src/quiesce-mem-types.h
@@ -1,30 +1,21 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __QUIESCE_MEM_TYPES_H__
#define __QUIESCE_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_quiesce_mem_types_ {
- gf_quiesce_mt_priv_t = gf_common_mt_end + 1,
- gf_quiesce_mt_end
+ gf_quiesce_mt_priv_t = gf_common_mt_end + 1,
+ gf_quiesce_mt_failover_hosts,
+ gf_quiesce_mt_end
};
#endif
diff --git a/xlators/features/quiesce/src/quiesce-messages.h b/xlators/features/quiesce/src/quiesce-messages.h
new file mode 100644
index 00000000000..32ffd409807
--- /dev/null
+++ b/xlators/features/quiesce/src/quiesce-messages.h
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ * This file is part of GlusterFS.
+ *
+ * This file is licensed to you under your choice of the GNU Lesser
+ * General Public License, version 3 or any later version (LGPLv3 or
+ * later), or the GNU General Public License, version 2 (GPLv2), in all
+ * cases as published by the Free Software Foundation.
+ */
+
+#ifndef __QUIESCE_MESSAGES_H__
+#define __QUIESCE_MESSAGES_H__
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(QUIESCE, QUIESCE_MSG_INVAL_HOST, QUIESCE_MSG_FAILOVER_FAILED);
+
+#endif /* __NL_CACHE_MESSAGES_H__ */
diff --git a/xlators/features/quiesce/src/quiesce.c b/xlators/features/quiesce/src/quiesce.c
index d355a19fa7f..0e5eb60a16f 100644
--- a/xlators/features/quiesce/src/quiesce.c
+++ b/xlators/features/quiesce/src/quiesce.c
@@ -1,688 +1,821 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "quiesce.h"
-#include "defaults.h"
-#include "call-stub.h"
+#include <glusterfs/defaults.h>
+#include <glusterfs/call-stub.h>
/* TODO: */
/* Think about 'writev/_*_lk/setattr/xattrop/' fops to do re-transmittion */
+void
+gf_quiesce_timeout(void *data);
/* Quiesce Specific Functions */
void
-gf_quiesce_local_wipe (xlator_t *this, quiesce_local_t *local)
+gf_quiesce_local_wipe(xlator_t *this, quiesce_local_t *local)
{
- if (!local || !this || !this->private)
- return;
+ if (!local || !this || !this->private)
+ return;
- if (local->loc.inode)
- loc_wipe (&local->loc);
- if (local->fd)
- fd_unref (local->fd);
- if (local->name)
- GF_FREE (local->name);
- if (local->volname)
- GF_FREE (local->volname);
- if (local->dict)
- dict_unref (local->dict);
- if (local->iobref)
- iobref_unref (local->iobref);
- if (local->vector)
- GF_FREE (local->vector);
+ if (local->loc.inode)
+ loc_wipe(&local->loc);
+ if (local->fd)
+ fd_unref(local->fd);
+ GF_FREE(local->name);
+ GF_FREE(local->volname);
+ if (local->dict)
+ dict_unref(local->dict);
+ if (local->iobref)
+ iobref_unref(local->iobref);
+ GF_FREE(local->vector);
- mem_put (local);
+ mem_put(local);
}
-call_stub_t *
-gf_quiesce_dequeue (xlator_t *this)
+void
+__gf_quiesce_start_timer(xlator_t *this, quiesce_priv_t *priv)
{
- call_stub_t *stub = NULL;
- quiesce_priv_t *priv = NULL;
-
- priv = this->private;
+ struct timespec timeout = {
+ 0,
+ };
- if (!priv || list_empty (&priv->req))
- return NULL;
+ if (!priv->timer) {
+ timeout.tv_sec = priv->timeout;
+ timeout.tv_nsec = 0;
- LOCK (&priv->lock);
- {
- stub = list_entry (priv->req.next, call_stub_t, list);
- list_del_init (&stub->list);
- priv->queue_size--;
+ priv->timer = gf_timer_call_after(this->ctx, timeout,
+ gf_quiesce_timeout, (void *)this);
+ if (priv->timer == NULL) {
+ gf_log(this->name, GF_LOG_ERROR, "Cannot create timer");
}
- UNLOCK (&priv->lock);
+ }
+}
+
+static void
+__gf_quiesce_cleanup_failover_hosts(xlator_t *this, quiesce_priv_t *priv)
+{
+ quiesce_failover_hosts_t *tmp = NULL;
+ quiesce_failover_hosts_t *failover_host = NULL;
- return stub;
+ list_for_each_entry_safe(failover_host, tmp, &priv->failover_list, list)
+ {
+ GF_FREE(failover_host->addr);
+ list_del(&failover_host->list);
+ GF_FREE(failover_host);
+ }
+ return;
}
-void *
-gf_quiesce_dequeue_start (void *data)
+void
+gf_quiesce_populate_failover_hosts(xlator_t *this, quiesce_priv_t *priv,
+ const char *value)
+{
+ char *dup_val = NULL;
+ char *addr_tok = NULL;
+ char *save_ptr = NULL;
+ quiesce_failover_hosts_t *failover_host = NULL;
+
+ if (!value)
+ goto out;
+
+ dup_val = gf_strdup(value);
+ if (!dup_val)
+ goto out;
+
+ addr_tok = strtok_r(dup_val, ",", &save_ptr);
+ LOCK(&priv->lock);
+ {
+ if (!list_empty(&priv->failover_list))
+ __gf_quiesce_cleanup_failover_hosts(this, priv);
+
+ while (addr_tok) {
+ if (!valid_internet_address(addr_tok, _gf_true, _gf_false)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, QUIESCE_MSG_INVAL_HOST,
+ "Specified "
+ "invalid internet address:%s",
+ addr_tok);
+ continue;
+ }
+ failover_host = GF_CALLOC(1, sizeof(*failover_host),
+ gf_quiesce_mt_failover_hosts);
+ failover_host->addr = gf_strdup(addr_tok);
+ INIT_LIST_HEAD(&failover_host->list);
+ list_add(&failover_host->list, &priv->failover_list);
+ addr_tok = strtok_r(NULL, ",", &save_ptr);
+ }
+ }
+ UNLOCK(&priv->lock);
+ GF_FREE(dup_val);
+out:
+ return;
+}
+
+int32_t
+gf_quiesce_failover_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- xlator_t *this = NULL;
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
- this = data;
- priv = this->private;
- THIS = this;
+ if (op_ret < 0) {
+ /* Failure here doesn't mean the failover to another host didn't
+ * succeed, we will know if failover succeeds or not by the
+ * CHILD_UP/CHILD_DOWN event. A failure here indicates something
+ * went wrong with the submission of failover command, hence
+ * just abort the failover attempts without retrying with other
+ * hosts.
+ */
+ gf_msg(this->name, GF_LOG_INFO, op_errno, QUIESCE_MSG_FAILOVER_FAILED,
+ "Initiating failover to host:%s failed:", (char *)cookie);
+ }
- while (!list_empty (&priv->req)) {
- stub = gf_quiesce_dequeue (this);
- if (stub) {
- call_resume (stub);
- }
- }
+ GF_FREE(cookie);
+ STACK_DESTROY(frame->root);
- return 0;
+ priv = this->private;
+ __gf_quiesce_start_timer(this, priv);
+
+ return 0;
}
+int
+__gf_quiesce_perform_failover(xlator_t *this)
+{
+ int ret = 0;
+ call_frame_t *frame = NULL;
+ dict_t *dict = NULL;
+ quiesce_priv_t *priv = NULL;
+ quiesce_failover_hosts_t *failover_host = NULL;
+ quiesce_failover_hosts_t *host = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ gf_msg_trace(this->name, 0,
+ "child is up, hence not "
+ "performing any failover");
+ goto out;
+ }
+
+ list_for_each_entry(failover_host, &priv->failover_list, list)
+ {
+ if (failover_host->tried == 0) {
+ host = failover_host;
+ failover_host->tried = 1;
+ break;
+ }
+ }
+ if (!host) {
+ /*TODO: Keep trying until any of the gfproxy comes back up.
+ Currently it tries failing over once for each host,
+ if it doesn't succeed then returns error to mount point
+ list_for_each_entry (failover_host,
+ &priv->failover_list, list) {
+ failover_host->tried = 0;
+ }*/
+ gf_msg_debug(this->name, 0,
+ "all the failover hosts have "
+ "been tried and looks like didn't succeed");
+ ret = -1;
+ goto out;
+ }
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ gf_msg_debug(this->name, 0, "failed to create the frame");
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new();
+
+ ret = dict_set_dynstr(dict, CLIENT_CMD_CONNECT, gf_strdup(host->addr));
+
+ gf_msg_trace(this->name, 0, "Initiating failover to:%s", host->addr);
+
+ STACK_WIND_COOKIE(frame, gf_quiesce_failover_cbk, NULL, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, NULL, dict, 0, NULL);
+out:
-void
-gf_quiesce_timeout (void *data)
+ if (dict)
+ dict_unref(dict);
+
+ return ret;
+}
+
+call_stub_t *
+gf_quiesce_dequeue(xlator_t *this)
{
- xlator_t *this = NULL;
- quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
- this = data;
- priv = this->private;
- THIS = this;
+ priv = this->private;
- LOCK (&priv->lock);
- {
- priv->pass_through = _gf_true;
- }
- UNLOCK (&priv->lock);
+ if (!priv || list_empty(&priv->req))
+ return NULL;
- gf_quiesce_dequeue_start (this);
+ LOCK(&priv->lock);
+ {
+ stub = list_entry(priv->req.next, call_stub_t, list);
+ list_del_init(&stub->list);
+ priv->queue_size--;
+ }
+ UNLOCK(&priv->lock);
- return;
+ return stub;
}
-void
-gf_quiesce_enqueue (xlator_t *this, call_stub_t *stub)
+void *
+gf_quiesce_dequeue_start(void *data)
{
- quiesce_priv_t *priv = NULL;
- struct timeval timeout = {0,};
+ xlator_t *this = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
- if (!priv) {
- gf_log_callingfn (this->name, GF_LOG_ERROR,
- "this->private == NULL");
- return;
- }
+ this = data;
+ priv = this->private;
+ THIS = this;
- LOCK (&priv->lock);
- {
- list_add_tail (&stub->list, &priv->req);
- priv->queue_size++;
+ while (!list_empty(&priv->req)) {
+ stub = gf_quiesce_dequeue(this);
+ if (stub) {
+ call_resume(stub);
}
- UNLOCK (&priv->lock);
+ }
+
+ return 0;
+}
+
+void
+gf_quiesce_timeout(void *data)
+{
+ xlator_t *this = NULL;
+ quiesce_priv_t *priv = NULL;
+ int ret = -1;
- if (!priv->timer) {
- timeout.tv_sec = 20;
- timeout.tv_usec = 0;
+ this = data;
+ priv = this->private;
+ THIS = this;
- priv->timer = gf_timer_call_after (this->ctx,
- timeout,
- gf_quiesce_timeout,
- (void *) this);
+ LOCK(&priv->lock);
+ {
+ priv->timer = NULL;
+ if (priv->pass_through) {
+ UNLOCK(&priv->lock);
+ goto out;
}
+ ret = __gf_quiesce_perform_failover(THIS);
+ }
+ UNLOCK(&priv->lock);
- return;
+ if (ret < 0) {
+ priv->pass_through = _gf_true;
+ gf_quiesce_dequeue_start(this);
+ }
+
+out:
+ return;
}
+void
+gf_quiesce_enqueue(xlator_t *this, call_stub_t *stub)
+{
+ quiesce_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv) {
+ gf_log_callingfn(this->name, GF_LOG_ERROR, "this->private == NULL");
+ return;
+ }
+ LOCK(&priv->lock);
+ {
+ list_add_tail(&stub->list, &priv->req);
+ priv->queue_size++;
+ __gf_quiesce_start_timer(this, priv);
+ }
+ UNLOCK(&priv->lock);
+
+ return;
+}
/* _CBK function section */
int32_t
-quiesce_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
+quiesce_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *dict, struct iatt *postparent)
{
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
-
- local = frame->local;
- frame->local = NULL;
- if ((op_ret == -1) && (op_errno == ENOTCONN)) {
- /* Re-transmit (by putting in the queue) */
- stub = fop_lookup_stub (frame, default_lookup_resume,
- &local->loc, local->dict);
- if (!stub) {
- STACK_UNWIND_STRICT (lookup, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL);
- goto out;
- }
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- gf_quiesce_enqueue (this, stub);
- goto out;
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_lookup_stub(frame, default_lookup_resume, &local->loc,
+ local->dict);
+ if (!stub) {
+ STACK_UNWIND_STRICT(lookup, frame, -1, ENOMEM, NULL, NULL, NULL,
+ NULL);
+ goto out;
}
- STACK_UNWIND_STRICT (lookup, frame, op_ret, op_errno, inode, buf,
- dict, postparent);
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, buf, dict,
+ postparent);
out:
- gf_quiesce_local_wipe (this, local);
+ gf_quiesce_local_wipe(this, local);
- return 0;
+ return 0;
}
int32_t
-quiesce_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
+quiesce_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- local = frame->local;
- frame->local = NULL;
- if ((op_ret == -1) && (op_errno == ENOTCONN)) {
- /* Re-transmit (by putting in the queue) */
- stub = fop_stat_stub (frame, default_stat_resume,
- &local->loc, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM,
- NULL, NULL);
- goto out;
- }
-
- gf_quiesce_enqueue (this, stub);
- goto out;
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_stat_stub(frame, default_stat_resume, &local->loc, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(stat, frame, -1, ENOMEM, NULL, NULL);
+ goto out;
}
- STACK_UNWIND_STRICT (stat, frame, op_ret, op_errno, buf, xdata);
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, buf, xdata);
out:
- gf_quiesce_local_wipe (this, local);
+ gf_quiesce_local_wipe(this, local);
- return 0;
+ return 0;
}
int32_t
-quiesce_access_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+quiesce_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- local = frame->local;
- frame->local = NULL;
- if ((op_ret == -1) && (op_errno == ENOTCONN)) {
- /* Re-transmit (by putting in the queue) */
- stub = fop_access_stub (frame, default_access_resume,
- &local->loc, local->flag, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (access, frame, -1, ENOMEM, NULL);
- goto out;
- }
-
- gf_quiesce_enqueue (this, stub);
- goto out;
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_access_stub(frame, default_access_resume, &local->loc,
+ local->flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(access, frame, -1, ENOMEM, NULL);
+ goto out;
}
- STACK_UNWIND_STRICT (access, frame, op_ret, op_errno, xdata);
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(access, frame, op_ret, op_errno, xdata);
out:
- gf_quiesce_local_wipe (this, local);
+ gf_quiesce_local_wipe(this, local);
- return 0;
+ return 0;
}
int32_t
-quiesce_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *buf, dict_t *xdata)
+quiesce_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
-
- local = frame->local;
- frame->local = NULL;
- if ((op_ret == -1) && (op_errno == ENOTCONN)) {
- /* Re-transmit (by putting in the queue) */
- stub = fop_readlink_stub (frame, default_readlink_resume,
- &local->loc, local->size, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (readlink, frame, -1, ENOMEM,
- NULL, NULL, NULL);
- goto out;
- }
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- gf_quiesce_enqueue (this, stub);
- goto out;
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_readlink_stub(frame, default_readlink_resume, &local->loc,
+ local->size, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(readlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+ goto out;
}
- STACK_UNWIND_STRICT (readlink, frame, op_ret, op_errno, path, buf, xdata);
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, path, buf, xdata);
out:
- gf_quiesce_local_wipe (this, local);
+ gf_quiesce_local_wipe(this, local);
- return 0;
+ return 0;
}
int32_t
-quiesce_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+quiesce_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- local = frame->local;
- frame->local = NULL;
- if ((op_ret == -1) && (op_errno == ENOTCONN)) {
- /* Re-transmit (by putting in the queue) */
- stub = fop_open_stub (frame, default_open_resume,
- &local->loc, local->flag, local->fd,
- xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (open, frame, -1, ENOMEM,
- NULL, NULL);
- goto out;
- }
-
- gf_quiesce_enqueue (this, stub);
- goto out;
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_open_stub(frame, default_open_resume, &local->loc,
+ local->flag, local->fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(open, frame, -1, ENOMEM, NULL, NULL);
+ goto out;
}
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
out:
- gf_quiesce_local_wipe (this, local);
+ gf_quiesce_local_wipe(this, local);
- return 0;
+ return 0;
}
int32_t
-quiesce_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *stbuf, struct iobref *iobref, dict_t *xdata)
+quiesce_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- local = frame->local;
- frame->local = NULL;
- if ((op_ret == -1) && (op_errno == ENOTCONN)) {
- /* Re-transmit (by putting in the queue) */
- stub = fop_readv_stub (frame, default_readv_resume,
- local->fd, local->size, local->offset,
- local->io_flag, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
- NULL, 0, NULL, NULL, NULL);
- goto out;
- }
-
- gf_quiesce_enqueue (this, stub);
- goto out;
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_readv_stub(frame, default_readv_resume, local->fd,
+ local->size, local->offset, local->io_flag,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL,
+ NULL);
+ goto out;
}
- STACK_UNWIND_STRICT (readv, frame, op_ret, op_errno, vector, count,
- stbuf, iobref, xdata);
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
out:
- gf_quiesce_local_wipe (this, local);
+ gf_quiesce_local_wipe(this, local);
- return 0;
+ return 0;
}
int32_t
-quiesce_flush_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+quiesce_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- local = frame->local;
- frame->local = NULL;
- if ((op_ret == -1) && (op_errno == ENOTCONN)) {
- /* Re-transmit (by putting in the queue) */
- stub = fop_flush_stub (frame, default_flush_resume,
- local->fd, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL);
- goto out;
- }
-
- gf_quiesce_enqueue (this, stub);
- goto out;
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_flush_stub(frame, default_flush_resume, local->fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, NULL);
+ goto out;
}
- STACK_UNWIND_STRICT (flush, frame, op_ret, op_errno, xdata);
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(flush, frame, op_ret, op_errno, xdata);
out:
- gf_quiesce_local_wipe (this, local);
+ gf_quiesce_local_wipe(this, local);
- return 0;
+ return 0;
}
-
-
int32_t
-quiesce_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+quiesce_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- local = frame->local;
- frame->local = NULL;
- if ((op_ret == -1) && (op_errno == ENOTCONN)) {
- /* Re-transmit (by putting in the queue) */
- stub = fop_fsync_stub (frame, default_fsync_resume,
- local->fd, local->flag, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM,
- NULL, NULL, NULL);
- goto out;
- }
-
- gf_quiesce_enqueue (this, stub);
- goto out;
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fsync_stub(frame, default_fsync_resume, local->fd,
+ local->flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, NULL, NULL, NULL);
+ goto out;
}
- STACK_UNWIND_STRICT (fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
out:
- gf_quiesce_local_wipe (this, local);
+ gf_quiesce_local_wipe(this, local);
- return 0;
+ return 0;
}
int32_t
-quiesce_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+quiesce_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- local = frame->local;
- frame->local = NULL;
- if ((op_ret == -1) && (op_errno == ENOTCONN)) {
- /* Re-transmit (by putting in the queue) */
- stub = fop_fstat_stub (frame, default_fstat_resume,
- local->fd, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM,
- NULL, NULL);
- goto out;
- }
-
- gf_quiesce_enqueue (this, stub);
- goto out;
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fstat_stub(frame, default_fstat_resume, local->fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, NULL, NULL);
+ goto out;
}
- STACK_UNWIND_STRICT (fstat, frame, op_ret, op_errno, buf, xdata);
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, buf, xdata);
out:
- gf_quiesce_local_wipe (this, local);
+ gf_quiesce_local_wipe(this, local);
- return 0;
+ return 0;
}
int32_t
-quiesce_opendir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+quiesce_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- local = frame->local;
- frame->local = NULL;
- if ((op_ret == -1) && (op_errno == ENOTCONN)) {
- /* Re-transmit (by putting in the queue) */
- stub = fop_opendir_stub (frame, default_opendir_resume,
- &local->loc, local->fd, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (opendir, frame, -1, ENOMEM,
- NULL, NULL);
- goto out;
- }
-
- gf_quiesce_enqueue (this, stub);
- goto out;
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_opendir_stub(frame, default_opendir_resume, &local->loc,
+ local->fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(opendir, frame, -1, ENOMEM, NULL, NULL);
+ goto out;
}
- STACK_UNWIND_STRICT (opendir, frame, op_ret, op_errno, fd, xdata);
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata);
out:
- gf_quiesce_local_wipe (this, local);
+ gf_quiesce_local_wipe(this, local);
- return 0;
+ return 0;
}
int32_t
-quiesce_fsyncdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+quiesce_fsyncdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- local = frame->local;
- frame->local = NULL;
- if ((op_ret == -1) && (op_errno == ENOTCONN)) {
- /* Re-transmit (by putting in the queue) */
- stub = fop_fsyncdir_stub (frame, default_fsyncdir_resume,
- local->fd, local->flag, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM, NULL);
- goto out;
- }
-
- gf_quiesce_enqueue (this, stub);
- goto out;
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fsyncdir_stub(frame, default_fsyncdir_resume, local->fd,
+ local->flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fsyncdir, frame, -1, ENOMEM, NULL);
+ goto out;
}
- STACK_UNWIND_STRICT (fsyncdir, frame, op_ret, op_errno, xdata);
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(fsyncdir, frame, op_ret, op_errno, xdata);
out:
- gf_quiesce_local_wipe (this, local);
+ gf_quiesce_local_wipe(this, local);
- return 0;
+ return 0;
}
int32_t
-quiesce_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf, dict_t *xdata)
+quiesce_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- local = frame->local;
- frame->local = NULL;
- if ((op_ret == -1) && (op_errno == ENOTCONN)) {
- /* Re-transmit (by putting in the queue) */
- stub = fop_statfs_stub (frame, default_statfs_resume,
- &local->loc, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (statfs, frame, -1, ENOMEM,
- NULL, NULL);
- goto out;
- }
-
- gf_quiesce_enqueue (this, stub);
- goto out;
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_statfs_stub(frame, default_statfs_resume, &local->loc,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(statfs, frame, -1, ENOMEM, NULL, NULL);
+ goto out;
}
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, buf, xdata);
out:
- gf_quiesce_local_wipe (this, local);
+ gf_quiesce_local_wipe(this, local);
- return 0;
+ return 0;
}
int32_t
-quiesce_fgetxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+quiesce_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
-
- local = frame->local;
- frame->local = NULL;
- if ((op_ret == -1) && (op_errno == ENOTCONN)) {
- /* Re-transmit (by putting in the queue) */
- stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume,
- local->fd, local->name, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM,
- NULL, NULL);
- goto out;
- }
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- gf_quiesce_enqueue (this, stub);
- goto out;
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_fgetxattr_stub(frame, default_fgetxattr_resume, local->fd,
+ local->name, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fgetxattr, frame, -1, ENOMEM, NULL, NULL);
+ goto out;
}
- STACK_UNWIND_STRICT (fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xdata);
out:
- gf_quiesce_local_wipe (this, local);
+ gf_quiesce_local_wipe(this, local);
- return 0;
+ return 0;
}
-
int32_t
-quiesce_getxattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+quiesce_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- local = frame->local;
- frame->local = NULL;
- if ((op_ret == -1) && (op_errno == ENOTCONN)) {
- /* Re-transmit (by putting in the queue) */
- stub = fop_getxattr_stub (frame, default_getxattr_resume,
- &local->loc, local->name, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM,
- NULL, NULL);
- goto out;
- }
-
- gf_quiesce_enqueue (this, stub);
- goto out;
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_getxattr_stub(frame, default_getxattr_resume, &local->loc,
+ local->name, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(getxattr, frame, -1, ENOMEM, NULL, NULL);
+ goto out;
}
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno, dict, xdata);
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata);
out:
- gf_quiesce_local_wipe (this, local);
+ gf_quiesce_local_wipe(this, local);
- return 0;
+ return 0;
}
-
int32_t
-quiesce_rchecksum_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
- uint8_t *strong_checksum, dict_t *xdata)
+quiesce_rchecksum_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, uint32_t weak_checksum,
+ uint8_t *strong_checksum, dict_t *xdata)
{
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- local = frame->local;
- frame->local = NULL;
- if ((op_ret == -1) && (op_errno == ENOTCONN)) {
- /* Re-transmit (by putting in the queue) */
- stub = fop_rchecksum_stub (frame, default_rchecksum_resume,
- local->fd, local->offset, local->flag, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOMEM,
- 0, NULL, NULL);
- goto out;
- }
-
- gf_quiesce_enqueue (this, stub);
- goto out;
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_rchecksum_stub(frame, default_rchecksum_resume, local->fd,
+ local->offset, local->flag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(rchecksum, frame, -1, ENOMEM, 0, NULL, NULL);
+ goto out;
}
- STACK_UNWIND_STRICT (rchecksum, frame, op_ret, op_errno, weak_checksum,
- strong_checksum, xdata);
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(rchecksum, frame, op_ret, op_errno, weak_checksum,
+ strong_checksum, xdata);
out:
- gf_quiesce_local_wipe (this, local);
+ gf_quiesce_local_wipe(this, local);
- return 0;
+ return 0;
}
-
int32_t
-quiesce_readdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
+quiesce_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- local = frame->local;
- frame->local = NULL;
- if ((op_ret == -1) && (op_errno == ENOTCONN)) {
- /* Re-transmit (by putting in the queue) */
- stub = fop_readdir_stub (frame, default_readdir_resume,
- local->fd, local->size, local->offset, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM,
- NULL, NULL);
- goto out;
- }
-
- gf_quiesce_enqueue (this, stub);
- goto out;
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_readdir_stub(frame, default_readdir_resume, local->fd,
+ local->size, local->offset, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL);
+ goto out;
}
- STACK_UNWIND_STRICT (readdir, frame, op_ret, op_errno, entries, xdata);
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, entries, xdata);
out:
- gf_quiesce_local_wipe (this, local);
+ gf_quiesce_local_wipe(this, local);
- return 0;
+ return 0;
}
-
int32_t
-quiesce_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, gf_dirent_t *entries, dict_t *xdata)
+quiesce_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- local = frame->local;
- frame->local = NULL;
- if ((op_ret == -1) && (op_errno == ENOTCONN)) {
- /* Re-transmit (by putting in the queue) */
- stub = fop_readdirp_stub (frame, default_readdirp_resume,
- local->fd, local->size, local->offset,
- local->dict);
- if (!stub) {
- STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM,
- NULL, NULL);
- goto out;
- }
-
- gf_quiesce_enqueue (this, stub);
- goto out;
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_readdirp_stub(frame, default_readdirp_resume, local->fd,
+ local->size, local->offset, local->dict);
+ if (!stub) {
+ STACK_UNWIND_STRICT(readdirp, frame, -1, ENOMEM, NULL, NULL);
+ goto out;
}
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, entries, xdata);
out:
- gf_quiesce_local_wipe (this, local);
+ gf_quiesce_local_wipe(this, local);
- return 0;
+ return 0;
}
-
#if 0
int32_t
@@ -1028,1598 +1161,1544 @@ out:
#endif /* if 0 */
-
/* FOP */
/* No retransmittion */
int32_t
-quiesce_removexattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata)
+quiesce_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->pass_through) {
- STACK_WIND (frame,
- default_removexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- loc,
- name, xdata);
- return 0;
- }
+ if (priv->pass_through) {
+ STACK_WIND(frame, default_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
+ }
- stub = fop_removexattr_stub (frame, default_removexattr_resume,
- loc, name, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (removexattr, frame, -1, ENOMEM, NULL);
- return 0;
- }
+ stub = fop_removexattr_stub(frame, default_removexattr_resume, loc, name,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(removexattr, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
int32_t
-quiesce_truncate (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- off_t offset, dict_t *xdata)
+quiesce_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->pass_through) {
- STACK_WIND (frame,
- default_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- loc,
- offset, xdata);
- return 0;
- }
+ if (priv->pass_through) {
+ STACK_WIND(frame, default_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ return 0;
+ }
- stub = fop_truncate_stub (frame, default_truncate_resume, loc, offset, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
- }
+ stub = fop_fremovexattr_stub(frame, default_fremovexattr_resume, fd, name,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fremovexattr, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
int32_t
-quiesce_fsetxattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- dict_t *dict,
- int32_t flags, dict_t *xdata)
+quiesce_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->pass_through) {
- STACK_WIND (frame,
- default_fsetxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr,
- fd,
- dict,
- flags, xdata);
- return 0;
- }
+ if (priv->pass_through) {
+ STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+ }
- stub = fop_fsetxattr_stub (frame, default_fsetxattr_resume,
- fd, dict, flags, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, ENOMEM, NULL);
- return 0;
- }
+ stub = fop_truncate_stub(frame, default_truncate_resume, loc, offset,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
int32_t
-quiesce_setxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *dict,
- int32_t flags, dict_t *xdata)
+quiesce_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->pass_through) {
- STACK_WIND (frame,
- default_setxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- loc,
- dict,
- flags, xdata);
- return 0;
- }
+ if (priv->pass_through) {
+ STACK_WIND(frame, default_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
+ }
- stub = fop_setxattr_stub (frame, default_setxattr_resume,
- loc, dict, flags, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (setxattr, frame, -1, ENOMEM, NULL);
- return 0;
- }
+ stub = fop_fsetxattr_stub(frame, default_fsetxattr_resume, fd, dict, flags,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fsetxattr, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
int32_t
-quiesce_create (call_frame_t *frame, xlator_t *this,
- loc_t *loc, int32_t flags, mode_t mode,
- mode_t umask, fd_t *fd, dict_t *xdata)
+quiesce_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->pass_through) {
- /* Don't send O_APPEND below, as write() re-transmittions can
- fail with O_APPEND */
- STACK_WIND (frame, default_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- loc, (flags & ~O_APPEND), mode, umask, fd, xdata);
- return 0;
- }
+ if (priv->pass_through) {
+ STACK_WIND(frame, default_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
+ return 0;
+ }
- stub = fop_create_stub (frame, default_create_resume,
- loc, (flags & ~O_APPEND), mode, umask, fd, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (create, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL, NULL, NULL);
- return 0;
- }
+ stub = fop_setxattr_stub(frame, default_setxattr_resume, loc, dict, flags,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(setxattr, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
int32_t
-quiesce_link (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
+quiesce_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->pass_through) {
- STACK_WIND (frame,
- default_link_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link,
- oldloc, newloc, xdata);
- return 0;
- }
+ if (priv->pass_through) {
+ /* Don't send O_APPEND below, as write() re-transmittions can
+ fail with O_APPEND */
+ STACK_WIND(frame, default_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, (flags & ~O_APPEND),
+ mode, umask, fd, xdata);
+ return 0;
+ }
- stub = fop_link_stub (frame, default_link_resume, oldloc, newloc, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (link, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL, NULL);
- return 0;
- }
+ stub = fop_create_stub(frame, default_create_resume, loc,
+ (flags & ~O_APPEND), mode, umask, fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
int32_t
-quiesce_rename (call_frame_t *frame,
- xlator_t *this,
- loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
+quiesce_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->pass_through) {
- STACK_WIND (frame,
- default_rename_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename,
- oldloc, newloc, xdata);
- return 0;
- }
+ if (priv->pass_through) {
+ STACK_WIND(frame, default_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
+ }
- stub = fop_rename_stub (frame, default_rename_resume, oldloc, newloc, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (rename, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL, NULL, NULL);
- return 0;
- }
+ stub = fop_link_stub(frame, default_link_resume, oldloc, newloc, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(link, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
+int32_t
+quiesce_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ if (priv->pass_through) {
+ STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ return 0;
+ }
+
+ stub = fop_rename_stub(frame, default_rename_resume, oldloc, newloc, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(rename, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
+}
int
-quiesce_symlink (call_frame_t *frame, xlator_t *this,
- const char *linkpath, loc_t *loc, mode_t umask, dict_t *xdata)
+quiesce_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->pass_through) {
- STACK_WIND (frame, default_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- linkpath, loc, umask, xdata);
- return 0;
- }
+ if (priv->pass_through) {
+ STACK_WIND(frame, default_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
+ xdata);
+ return 0;
+ }
- stub = fop_symlink_stub (frame, default_symlink_resume,
- linkpath, loc, umask, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (symlink, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL, NULL);
- return 0;
- }
+ stub = fop_symlink_stub(frame, default_symlink_resume, linkpath, loc, umask,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(symlink, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
-
int
-quiesce_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, dict_t *xdata)
+quiesce_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->pass_through) {
- STACK_WIND (frame, default_rmdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- loc, flags, xdata);
- return 0;
- }
+ if (priv->pass_through) {
+ STACK_WIND(frame, default_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
+ return 0;
+ }
- stub = fop_rmdir_stub (frame, default_rmdir_resume, loc, flags, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (rmdir, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
- }
+ stub = fop_rmdir_stub(frame, default_rmdir_resume, loc, flags, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(rmdir, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
int32_t
-quiesce_unlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, int xflag, dict_t *xdata)
+quiesce_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->pass_through) {
- STACK_WIND (frame,
- default_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- loc, xflag, xdata);
- return 0;
- }
+ if (priv->pass_through) {
+ STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+ }
- stub = fop_unlink_stub (frame, default_unlink_resume, loc, xflag, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
- }
+ stub = fop_unlink_stub(frame, default_unlink_resume, loc, xflag, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(unlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
int
-quiesce_mkdir (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, mode_t umask, dict_t *xdata)
+quiesce_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->pass_through) {
- STACK_WIND (frame, default_mkdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- loc, mode, umask, xdata);
- return 0;
- }
+ if (priv->pass_through) {
+ STACK_WIND(frame, default_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ return 0;
+ }
- stub = fop_mkdir_stub (frame, default_mkdir_resume,
- loc, mode, umask, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (mkdir, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL, NULL);
- return 0;
- }
+ stub = fop_mkdir_stub(frame, default_mkdir_resume, loc, mode, umask, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(mkdir, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
-
int
-quiesce_mknod (call_frame_t *frame, xlator_t *this,
- loc_t *loc, mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+quiesce_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->pass_through) {
- STACK_WIND (frame, default_mknod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod,
- loc, mode, rdev, umask, xdata);
- return 0;
- }
+ if (priv->pass_through) {
+ STACK_WIND(frame, default_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
+ xdata);
+ return 0;
+ }
- stub = fop_mknod_stub (frame, default_mknod_resume,
- loc, mode, rdev, umask, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (mknod, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL, NULL);
- return 0;
- }
+ stub = fop_mknod_stub(frame, default_mknod_resume, loc, mode, rdev, umask,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(mknod, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
int32_t
-quiesce_ftruncate (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- off_t offset, dict_t *xdata)
+quiesce_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv->pass_through) {
- STACK_WIND (frame,
- default_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset, xdata);
- return 0;
- }
+ if (priv->pass_through) {
+ STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+ }
- stub = fop_ftruncate_stub (frame, default_ftruncate_resume, fd, offset, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
- }
+ stub = fop_ftruncate_stub(frame, default_ftruncate_resume, fd, offset,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
/* Re-transmittion */
int32_t
-quiesce_readlink (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- size_t size, dict_t *xdata)
+quiesce_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- local = mem_get0 (priv->local_pool);
- loc_dup (loc, &local->loc);
- local->size = size;
- frame->local = local;
-
- STACK_WIND (frame,
- quiesce_readlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink,
- loc,
- size, xdata);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ loc_dup(loc, &local->loc);
+ local->size = size;
+ frame->local = local;
- stub = fop_readlink_stub (frame, default_readlink_resume, loc, size, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (readlink, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
- }
-
- gf_quiesce_enqueue (this, stub);
+ STACK_WIND(frame, quiesce_readlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
+ return 0;
+ }
+ stub = fop_readlink_stub(frame, default_readlink_resume, loc, size, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(readlink, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
-}
+ }
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
+}
int32_t
-quiesce_access (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t mask, dict_t *xdata)
+quiesce_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
-
- priv = this->private;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- if (priv && priv->pass_through) {
- local = mem_get0 (priv->local_pool);
- loc_dup (loc, &local->loc);
- local->flag = mask;
- frame->local = local;
-
- STACK_WIND (frame,
- quiesce_access_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access,
- loc,
- mask, xdata);
- return 0;
- }
+ priv = this->private;
- stub = fop_access_stub (frame, default_access_resume, loc, mask, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (access, frame, -1, ENOMEM, NULL);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ loc_dup(loc, &local->loc);
+ local->flag = mask;
+ frame->local = local;
- gf_quiesce_enqueue (this, stub);
+ STACK_WIND(frame, quiesce_access_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access, loc, mask, xdata);
+ return 0;
+ }
+ stub = fop_access_stub(frame, default_access_resume, loc, mask, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(access, frame, -1, ENOMEM, NULL);
return 0;
+ }
+
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
}
int32_t
-quiesce_fgetxattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- const char *name, dict_t *xdata)
+quiesce_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- local = mem_get0 (priv->local_pool);
- local->fd = fd_ref (fd);
- if (name)
- local->name = gf_strdup (name);
-
- frame->local = local;
-
- STACK_WIND (frame,
- quiesce_fgetxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr,
- fd,
- name, xdata);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ local->fd = fd_ref(fd);
+ if (name)
+ local->name = gf_strdup(name);
- stub = fop_fgetxattr_stub (frame, default_fgetxattr_resume, fd, name, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (fgetxattr, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
+ frame->local = local;
- gf_quiesce_enqueue (this, stub);
+ STACK_WIND(frame, quiesce_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
+ }
+ stub = fop_fgetxattr_stub(frame, default_fgetxattr_resume, fd, name, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fgetxattr, frame, -1, ENOMEM, NULL, NULL);
return 0;
+ }
+
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
}
int32_t
-quiesce_statfs (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata)
+quiesce_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- local = mem_get0 (priv->local_pool);
- loc_dup (loc, &local->loc);
- frame->local = local;
-
- STACK_WIND (frame,
- quiesce_statfs_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs,
- loc, xdata);
- return 0;
- }
-
- stub = fop_statfs_stub (frame, default_statfs_resume, loc, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (statfs, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ loc_dup(loc, &local->loc);
+ frame->local = local;
- gf_quiesce_enqueue (this, stub);
+ STACK_WIND(frame, quiesce_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+ }
+ stub = fop_statfs_stub(frame, default_statfs_resume, loc, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(statfs, frame, -1, ENOMEM, NULL, NULL);
return 0;
+ }
+
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
}
int32_t
-quiesce_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags, dict_t *xdata)
+quiesce_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- local = mem_get0 (priv->local_pool);
- local->fd = fd_ref (fd);
- local->flag = flags;
- frame->local = local;
-
- STACK_WIND (frame,
- quiesce_fsyncdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir,
- fd,
- flags, xdata);
- return 0;
- }
-
- stub = fop_fsyncdir_stub (frame, default_fsyncdir_resume, fd, flags, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, ENOMEM, NULL);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ local->fd = fd_ref(fd);
+ local->flag = flags;
+ frame->local = local;
- gf_quiesce_enqueue (this, stub);
+ STACK_WIND(frame, quiesce_fsyncdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata);
+ return 0;
+ }
+ stub = fop_fsyncdir_stub(frame, default_fsyncdir_resume, fd, flags, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fsyncdir, frame, -1, ENOMEM, NULL);
return 0;
+ }
+
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
}
int32_t
-quiesce_opendir (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, fd_t *fd, dict_t *xdata)
+quiesce_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- local = mem_get0 (priv->local_pool);
- loc_dup (loc, &local->loc);
- local->fd = fd_ref (fd);
- frame->local = local;
-
- STACK_WIND (frame,
- quiesce_opendir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->opendir,
- loc, fd, xdata);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ loc_dup(loc, &local->loc);
+ local->fd = fd_ref(fd);
+ frame->local = local;
- stub = fop_opendir_stub (frame, default_opendir_resume, loc, fd, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (opendir, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
-
- gf_quiesce_enqueue (this, stub);
+ STACK_WIND(frame, quiesce_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+ return 0;
+ }
+ stub = fop_opendir_stub(frame, default_opendir_resume, loc, fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(opendir, frame, -1, ENOMEM, NULL, NULL);
return 0;
+ }
+
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
}
int32_t
-quiesce_fstat (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata)
+quiesce_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- priv = this->private;
-
- if (priv && priv->pass_through) {
- local = mem_get0 (priv->local_pool);
- local->fd = fd_ref (fd);
- frame->local = local;
-
- STACK_WIND (frame,
- quiesce_fstat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat,
- fd, xdata);
- return 0;
- }
+ priv = this->private;
- stub = fop_fstat_stub (frame, default_fstat_resume, fd, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (fstat, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ local->fd = fd_ref(fd);
+ frame->local = local;
- gf_quiesce_enqueue (this, stub);
+ STACK_WIND(frame, quiesce_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+ }
+ stub = fop_fstat_stub(frame, default_fstat_resume, fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fstat, frame, -1, ENOMEM, NULL, NULL);
return 0;
+ }
+
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
}
int32_t
-quiesce_fsync (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t flags, dict_t *xdata)
+quiesce_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- local = mem_get0 (priv->local_pool);
- local->fd = fd_ref (fd);
- local->flag = flags;
- frame->local = local;
-
- STACK_WIND (frame,
- quiesce_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd,
- flags, xdata);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ local->fd = fd_ref(fd);
+ local->flag = flags;
+ frame->local = local;
- stub = fop_fsync_stub (frame, default_fsync_resume, fd, flags, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (fsync, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
- }
-
- gf_quiesce_enqueue (this, stub);
+ STACK_WIND(frame, quiesce_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
+ }
+ stub = fop_fsync_stub(frame, default_fsync_resume, fd, flags, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fsync, frame, -1, ENOMEM, NULL, NULL, NULL);
return 0;
+ }
+
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
}
int32_t
-quiesce_flush (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, dict_t *xdata)
+quiesce_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
-
- priv = this->private;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- if (priv && priv->pass_through) {
- local = mem_get0 (priv->local_pool);
- local->fd = fd_ref (fd);
- frame->local = local;
-
- STACK_WIND (frame,
- quiesce_flush_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->flush,
- fd, xdata);
- return 0;
- }
+ priv = this->private;
- stub = fop_flush_stub (frame, default_flush_resume, fd, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (flush, frame, -1, ENOMEM, NULL);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ local->fd = fd_ref(fd);
+ frame->local = local;
- gf_quiesce_enqueue (this, stub);
+ STACK_WIND(frame, quiesce_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+ }
+ stub = fop_flush_stub(frame, default_flush_resume, fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(flush, frame, -1, ENOMEM, NULL);
return 0;
+ }
+
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
}
int32_t
-quiesce_writev (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iovec *vector,
- int32_t count,
- off_t off, uint32_t flags,
- struct iobref *iobref, dict_t *xdata)
+quiesce_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- STACK_WIND (frame,
- default_writev_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- fd,
- vector,
- count,
- off, flags,
- iobref, xdata);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ STACK_WIND(frame, default_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, off,
+ flags, iobref, xdata);
+ return 0;
+ }
- stub = fop_writev_stub (frame, default_writev_resume,
- fd, vector, count, off, flags, iobref, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (writev, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
- }
+ stub = fop_writev_stub(frame, default_writev_resume, fd, vector, count, off,
+ flags, iobref, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(writev, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
int32_t
-quiesce_readv (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
+quiesce_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
-
- priv = this->private;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- if (priv && priv->pass_through) {
- local = mem_get0 (priv->local_pool);
- local->fd = fd_ref (fd);
- local->size = size;
- local->offset = offset;
- local->io_flag = flags;
- frame->local = local;
-
- STACK_WIND (frame,
- quiesce_readv_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv,
- fd,
- size,
- offset, flags, xdata);
- return 0;
- }
+ priv = this->private;
- stub = fop_readv_stub (frame, default_readv_resume, fd, size, offset,
- flags, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (readv, frame, -1, ENOMEM,
- NULL, 0, NULL, NULL, NULL);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ local->fd = fd_ref(fd);
+ local->size = size;
+ local->offset = offset;
+ local->io_flag = flags;
+ frame->local = local;
- gf_quiesce_enqueue (this, stub);
+ STACK_WIND(frame, quiesce_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ return 0;
+ }
+ stub = fop_readv_stub(frame, default_readv_resume, fd, size, offset, flags,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(readv, frame, -1, ENOMEM, NULL, 0, NULL, NULL,
+ NULL);
return 0;
-}
+ }
+
+ gf_quiesce_enqueue(this, stub);
+ return 0;
+}
int32_t
-quiesce_open (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- int32_t flags, fd_t *fd,
- dict_t *xdata)
+quiesce_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- local = mem_get0 (priv->local_pool);
- loc_dup (loc, &local->loc);
- local->fd = fd_ref (fd);
-
- /* Don't send O_APPEND below, as write() re-transmittions can
- fail with O_APPEND */
- local->flag = (flags & ~O_APPEND);
- frame->local = local;
-
- STACK_WIND (frame,
- quiesce_open_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open,
- loc, (flags & ~O_APPEND), fd, xdata);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ loc_dup(loc, &local->loc);
+ local->fd = fd_ref(fd);
- stub = fop_open_stub (frame, default_open_resume, loc,
- (flags & ~O_APPEND), fd, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (open, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
+ /* Don't send O_APPEND below, as write() re-transmittions can
+ fail with O_APPEND */
+ local->flag = (flags & ~O_APPEND);
+ frame->local = local;
- gf_quiesce_enqueue (this, stub);
+ STACK_WIND(frame, quiesce_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, (flags & ~O_APPEND), fd,
+ xdata);
+ return 0;
+ }
+ stub = fop_open_stub(frame, default_open_resume, loc, (flags & ~O_APPEND),
+ fd, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(open, frame, -1, ENOMEM, NULL, NULL);
return 0;
+ }
+
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
}
int32_t
-quiesce_getxattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- const char *name, dict_t *xdata)
+quiesce_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- local = mem_get0 (priv->local_pool);
- loc_dup (loc, &local->loc);
- if (name)
- local->name = gf_strdup (name);
-
- frame->local = local;
-
- STACK_WIND (frame,
- quiesce_getxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- loc,
- name, xdata);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ loc_dup(loc, &local->loc);
+ if (name)
+ local->name = gf_strdup(name);
- stub = fop_getxattr_stub (frame, default_getxattr_resume, loc, name, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (getxattr, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
+ frame->local = local;
- gf_quiesce_enqueue (this, stub);
+ STACK_WIND(frame, quiesce_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+ }
+ stub = fop_getxattr_stub(frame, default_getxattr_resume, loc, name, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(getxattr, frame, -1, ENOMEM, NULL, NULL);
return 0;
-}
+ }
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
+}
int32_t
-quiesce_xattrop (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata)
+quiesce_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- STACK_WIND (frame,
- default_xattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->xattrop,
- loc,
- flags,
- dict, xdata);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ STACK_WIND(frame, default_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, flags, dict, xdata);
+ return 0;
+ }
- stub = fop_xattrop_stub (frame, default_xattrop_resume,
- loc, flags, dict, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (xattrop, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
+ stub = fop_xattrop_stub(frame, default_xattrop_resume, loc, flags, dict,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(xattrop, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
int32_t
-quiesce_fxattrop (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- gf_xattrop_flags_t flags,
- dict_t *dict, dict_t *xdata)
+quiesce_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- STACK_WIND (frame,
- default_fxattrop_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fxattrop,
- fd,
- flags,
- dict, xdata);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ STACK_WIND(frame, default_fxattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict, xdata);
+ return 0;
+ }
- stub = fop_fxattrop_stub (frame, default_fxattrop_resume,
- fd, flags, dict, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (fxattrop, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
+ stub = fop_fxattrop_stub(frame, default_fxattrop_resume, fd, flags, dict,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fxattrop, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
int32_t
-quiesce_lk (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
+quiesce_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- STACK_WIND (frame,
- default_lk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk,
- fd,
- cmd,
- lock, xdata);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ STACK_WIND(frame, default_lk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lk, fd, cmd, lock, xdata);
+ return 0;
+ }
- stub = fop_lk_stub (frame, default_lk_resume, fd, cmd, lock, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (lk, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
+ stub = fop_lk_stub(frame, default_lk_resume, fd, cmd, lock, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(lk, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
-
int32_t
-quiesce_inodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, int32_t cmd,
- struct gf_flock *lock, dict_t *xdata)
+quiesce_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- STACK_WIND (frame,
- default_inodelk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->inodelk,
- volume, loc, cmd, lock, xdata);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ STACK_WIND(frame, default_inodelk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk, volume, loc, cmd, lock,
+ xdata);
+ return 0;
+ }
- stub = fop_inodelk_stub (frame, default_inodelk_resume,
- volume, loc, cmd, lock, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (inodelk, frame, -1, ENOMEM, NULL);
- return 0;
- }
+ stub = fop_inodelk_stub(frame, default_inodelk_resume, volume, loc, cmd,
+ lock, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(inodelk, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
int32_t
-quiesce_finodelk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+quiesce_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- STACK_WIND (frame,
- default_finodelk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->finodelk,
- volume, fd, cmd, lock, xdata);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ STACK_WIND(frame, default_finodelk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->finodelk, volume, fd, cmd, lock,
+ xdata);
+ return 0;
+ }
- stub = fop_finodelk_stub (frame, default_finodelk_resume,
- volume, fd, cmd, lock, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (finodelk, frame, -1, ENOMEM, NULL);
- return 0;
- }
+ stub = fop_finodelk_stub(frame, default_finodelk_resume, volume, fd, cmd,
+ lock, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(finodelk, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
int32_t
-quiesce_entrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, loc_t *loc, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+quiesce_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- STACK_WIND (frame, default_entrylk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->entrylk,
- volume, loc, basename, cmd, type, xdata);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ STACK_WIND(frame, default_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, volume, loc, basename, cmd,
+ type, xdata);
+ return 0;
+ }
- stub = fop_entrylk_stub (frame, default_entrylk_resume,
- volume, loc, basename, cmd, type, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (entrylk, frame, -1, ENOMEM, NULL);
- return 0;
- }
+ stub = fop_entrylk_stub(frame, default_entrylk_resume, volume, loc,
+ basename, cmd, type, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(entrylk, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
int32_t
-quiesce_fentrylk (call_frame_t *frame, xlator_t *this,
- const char *volume, fd_t *fd, const char *basename,
- entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+quiesce_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- STACK_WIND (frame, default_fentrylk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fentrylk,
- volume, fd, basename, cmd, type, xdata);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ STACK_WIND(frame, default_fentrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fentrylk, volume, fd, basename, cmd,
+ type, xdata);
+ return 0;
+ }
- stub = fop_fentrylk_stub (frame, default_fentrylk_resume,
- volume, fd, basename, cmd, type, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (fentrylk, frame, -1, ENOMEM, NULL);
- return 0;
- }
+ stub = fop_fentrylk_stub(frame, default_fentrylk_resume, volume, fd,
+ basename, cmd, type, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fentrylk, frame, -1, ENOMEM, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
int32_t
-quiesce_rchecksum (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd, off_t offset,
- int32_t len, dict_t *xdata)
+quiesce_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ int32_t len, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- priv = this->private;
-
- if (priv && priv->pass_through) {
- local = mem_get0 (priv->local_pool);
- local->fd = fd_ref (fd);
- local->offset = offset;
- local->flag = len;
- frame->local = local;
-
- STACK_WIND (frame,
- quiesce_rchecksum_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rchecksum,
- fd, offset, len, xdata);
- return 0;
- }
+ priv = this->private;
- stub = fop_rchecksum_stub (frame, default_rchecksum_resume,
- fd, offset, len, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (rchecksum, frame, -1, ENOMEM, 0, NULL, NULL);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ local->fd = fd_ref(fd);
+ local->offset = offset;
+ local->flag = len;
+ frame->local = local;
- gf_quiesce_enqueue (this, stub);
+ STACK_WIND(frame, quiesce_rchecksum_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rchecksum, fd, offset, len, xdata);
+ return 0;
+ }
+ stub = fop_rchecksum_stub(frame, default_rchecksum_resume, fd, offset, len,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(rchecksum, frame, -1, ENOMEM, 0, NULL, NULL);
return 0;
-}
+ }
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
+}
int32_t
-quiesce_readdir (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t off, dict_t *xdata)
+quiesce_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
-
- priv = this->private;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- if (priv && priv->pass_through) {
- local = mem_get0 (priv->local_pool);
- local->fd = fd_ref (fd);
- local->size = size;
- local->offset = off;
- frame->local = local;
-
- STACK_WIND (frame,
- quiesce_readdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdir,
- fd, size, off, xdata);
- return 0;
- }
+ priv = this->private;
- stub = fop_readdir_stub (frame, default_readdir_resume, fd, size, off, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (readdir, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ local->fd = fd_ref(fd);
+ local->size = size;
+ local->offset = off;
+ frame->local = local;
- gf_quiesce_enqueue (this, stub);
+ STACK_WIND(frame, quiesce_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata);
+ return 0;
+ }
+ stub = fop_readdir_stub(frame, default_readdir_resume, fd, size, off,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL);
return 0;
-}
+ }
+
+ gf_quiesce_enqueue(this, stub);
+ return 0;
+}
int32_t
-quiesce_readdirp (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- size_t size,
- off_t off, dict_t *dict)
+quiesce_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- priv = this->private;
-
- if (priv && priv->pass_through) {
- local = mem_get0 (priv->local_pool);
- local->fd = fd_ref (fd);
- local->size = size;
- local->offset = off;
- local->dict = dict_ref (dict);
- frame->local = local;
-
- STACK_WIND (frame,
- quiesce_readdirp_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readdirp,
- fd, size, off, dict);
- return 0;
- }
+ priv = this->private;
- stub = fop_readdirp_stub (frame, default_readdirp_resume, fd, size,
- off, dict);
- if (!stub) {
- STACK_UNWIND_STRICT (readdirp, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ local->fd = fd_ref(fd);
+ local->size = size;
+ local->offset = off;
+ local->dict = dict_ref(dict);
+ frame->local = local;
- gf_quiesce_enqueue (this, stub);
+ STACK_WIND(frame, quiesce_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off, dict);
+ return 0;
+ }
+ stub = fop_readdirp_stub(frame, default_readdirp_resume, fd, size, off,
+ dict);
+ if (!stub) {
+ STACK_UNWIND_STRICT(readdirp, frame, -1, ENOMEM, NULL, NULL);
return 0;
+ }
+
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
}
int32_t
-quiesce_setattr (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
+quiesce_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- STACK_WIND (frame,
- default_setattr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr,
- loc, stbuf, valid, xdata);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
+ }
- stub = fop_setattr_stub (frame, default_setattr_resume,
- loc, stbuf, valid, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
- }
+ stub = fop_setattr_stub(frame, default_setattr_resume, loc, stbuf, valid,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
-
int32_t
-quiesce_stat (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc, dict_t *xdata)
+quiesce_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- local = mem_get0 (priv->local_pool);
- loc_dup (loc, &local->loc);
- frame->local = local;
-
- STACK_WIND (frame,
- quiesce_stat_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat,
- loc, xdata);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ loc_dup(loc, &local->loc);
+ frame->local = local;
- stub = fop_stat_stub (frame, default_stat_resume, loc, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (stat, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
-
- gf_quiesce_enqueue (this, stub);
+ STACK_WIND(frame, quiesce_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+ }
+ stub = fop_stat_stub(frame, default_stat_resume, loc, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(stat, frame, -1, ENOMEM, NULL, NULL);
return 0;
+ }
+
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
}
int32_t
-quiesce_lookup (call_frame_t *frame,
- xlator_t *this,
- loc_t *loc,
- dict_t *xattr_req)
+quiesce_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xattr_req)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quiesce_local_t *local = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- local = mem_get0 (priv->local_pool);
- loc_dup (loc, &local->loc);
- local->dict = dict_ref (xattr_req);
- frame->local = local;
-
- STACK_WIND (frame,
- quiesce_lookup_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup,
- loc, xattr_req);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ loc_dup(loc, &local->loc);
+ local->dict = dict_ref(xattr_req);
+ frame->local = local;
- stub = fop_lookup_stub (frame, default_lookup_resume, loc, xattr_req);
- if (!stub) {
- STACK_UNWIND_STRICT (lookup, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL);
- return 0;
- }
-
- gf_quiesce_enqueue (this, stub);
+ STACK_WIND(frame, quiesce_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ return 0;
+ }
+ stub = fop_lookup_stub(frame, default_lookup_resume, loc, xattr_req);
+ if (!stub) {
+ STACK_UNWIND_STRICT(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
return 0;
+ }
+
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
}
int32_t
-quiesce_fsetattr (call_frame_t *frame,
- xlator_t *this,
- fd_t *fd,
- struct iatt *stbuf,
- int32_t valid, dict_t *xdata)
+quiesce_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- quiesce_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- priv = this->private;
+ priv = this->private;
- if (priv && priv->pass_through) {
- STACK_WIND (frame,
- default_fsetattr_cbk,
- FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr,
- fd, stbuf, valid, xdata);
- return 0;
- }
+ if (priv && priv->pass_through) {
+ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
+ }
- stub = fop_fsetattr_stub (frame, default_fsetattr_resume,
- fd, stbuf, valid, xdata);
- if (!stub) {
- STACK_UNWIND_STRICT (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
- }
+ stub = fop_fsetattr_stub(frame, default_fsetattr_resume, fd, stbuf, valid,
+ xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
- gf_quiesce_enqueue (this, stub);
+ gf_quiesce_enqueue(this, stub);
- return 0;
+ return 0;
}
int32_t
-mem_acct_init (xlator_t *this)
+quiesce_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- int ret = -1;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
- ret = xlator_mem_acct_init (this, gf_quiesce_mt_end + 1);
+ priv = this->private;
- return ret;
+ if (priv && priv->pass_through) {
+ STACK_WIND(frame, default_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+ return 0;
+ }
+
+ stub = fop_fallocate_stub(frame, default_fallocate_resume, fd, mode, offset,
+ len, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(fallocate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ gf_quiesce_enqueue(this, stub);
+
+ return 0;
}
int
-init (xlator_t *this)
+quiesce_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, off_t offset, dict_t *xdata)
+{
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
+
+ local = frame->local;
+ frame->local = NULL;
+ if ((op_ret == -1) && (op_errno == ENOTCONN)) {
+ /* Re-transmit (by putting in the queue) */
+ stub = fop_seek_stub(frame, default_seek_resume, local->fd,
+ local->offset, local->what, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(seek, frame, -1, ENOMEM, 0, NULL);
+ goto out;
+ }
+
+ gf_quiesce_enqueue(this, stub);
+ goto out;
+ }
+
+ STACK_UNWIND_STRICT(seek, frame, op_ret, op_errno, offset, xdata);
+out:
+ gf_quiesce_local_wipe(this, local);
+
+ return 0;
+}
+
+int
+quiesce_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
{
- int ret = -1;
- quiesce_priv_t *priv = NULL;
+ quiesce_priv_t *priv = NULL;
+ call_stub_t *stub = NULL;
+ quiesce_local_t *local = NULL;
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "'quiesce' not configured with exactly one child");
- goto out;
- }
+ priv = this->private;
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
+ if (priv && priv->pass_through) {
+ local = mem_get0(priv->local_pool);
+ local->fd = fd_ref(fd);
+ local->offset = offset;
+ local->what = what;
- priv = GF_CALLOC (1, sizeof (*priv), gf_quiesce_mt_priv_t);
- if (!priv)
- goto out;
+ frame->local = local;
- priv->local_pool = mem_pool_new (quiesce_local_t,
- GF_FOPS_EXPECTED_IN_PARALLEL);
+ STACK_WIND(frame, quiesce_seek_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->seek, fd, offset, what, xdata);
+ return 0;
+ }
- LOCK_INIT (&priv->lock);
- priv->pass_through = _gf_false;
+ stub = fop_seek_stub(frame, default_seek_resume, fd, offset, what, xdata);
+ if (!stub) {
+ STACK_UNWIND_STRICT(seek, frame, -1, ENOMEM, 0, NULL);
+ return 0;
+ }
- INIT_LIST_HEAD (&priv->req);
+ gf_quiesce_enqueue(this, stub);
- this->private = priv;
- ret = 0;
-out:
- return ret;
+ return 0;
}
-void
-fini (xlator_t *this)
+int32_t
+mem_acct_init(xlator_t *this)
{
- quiesce_priv_t *priv = NULL;
+ int ret = -1;
- priv = this->private;
- if (!priv)
- goto out;
- this->private = NULL;
+ ret = xlator_mem_acct_init(this, gf_quiesce_mt_end + 1);
- mem_pool_destroy (priv->local_pool);
- LOCK_DESTROY (&priv->lock);
- GF_FREE (priv);
+ return ret;
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ quiesce_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ GF_OPTION_RECONF("timeout", priv->timeout, options, time, out);
+ GF_OPTION_RECONF("failover-hosts", priv->failover_hosts, options, str, out);
+ gf_quiesce_populate_failover_hosts(this, priv, priv->failover_hosts);
+
+ ret = 0;
out:
- return;
+ return ret;
}
int
-notify (xlator_t *this, int event, void *data, ...)
+init(xlator_t *this)
{
- int ret = 0;
- quiesce_priv_t *priv = NULL;
- struct timeval timeout = {0,};
+ int ret = -1;
+ quiesce_priv_t *priv = NULL;
- priv = this->private;
- if (!priv)
- goto out;
+ if (!this->children || this->children->next) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'quiesce' not configured with exactly one child");
+ goto out;
+ }
- switch (event) {
- case GF_EVENT_CHILD_UP:
- {
- ret = pthread_create (&priv->thr, NULL, gf_quiesce_dequeue_start,
- this);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create the quiesce-dequeue thread");
- }
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
+ }
- LOCK (&priv->lock);
- {
- priv->pass_through = _gf_true;
- }
- UNLOCK (&priv->lock);
- break;
- }
- case GF_EVENT_CHILD_DOWN:
- LOCK (&priv->lock);
- {
- priv->pass_through = _gf_false;
- }
- UNLOCK (&priv->lock);
+ priv = GF_CALLOC(1, sizeof(*priv), gf_quiesce_mt_priv_t);
+ if (!priv)
+ goto out;
- if (priv->timer)
- break;
- timeout.tv_sec = 20;
- timeout.tv_usec = 0;
+ INIT_LIST_HEAD(&priv->failover_list);
- priv->timer = gf_timer_call_after (this->ctx,
- timeout,
- gf_quiesce_timeout,
- (void *) this);
+ GF_OPTION_INIT("timeout", priv->timeout, time, out);
+ GF_OPTION_INIT("failover-hosts", priv->failover_hosts, str, out);
+ gf_quiesce_populate_failover_hosts(this, priv, priv->failover_hosts);
- if (priv->timer == NULL) {
- gf_log (this->name, GF_LOG_ERROR,
- "Cannot create timer");
- }
+ priv->local_pool = mem_pool_new(quiesce_local_t,
+ GF_FOPS_EXPECTED_IN_PARALLEL);
- break;
- default:
- break;
- }
+ LOCK_INIT(&priv->lock);
+ priv->pass_through = _gf_false;
+
+ INIT_LIST_HEAD(&priv->req);
- ret = default_notify (this, event, data);
+ this->private = priv;
+ ret = 0;
out:
- return ret;
+ return ret;
}
+void
+fini(xlator_t *this)
+{
+ quiesce_priv_t *priv = NULL;
-struct xlator_fops fops = {
- /* write/modifying fops */
- .mknod = quiesce_mknod,
- .create = quiesce_create,
- .truncate = quiesce_truncate,
- .ftruncate = quiesce_ftruncate,
- .setxattr = quiesce_setxattr,
- .removexattr = quiesce_removexattr,
- .symlink = quiesce_symlink,
- .unlink = quiesce_unlink,
- .link = quiesce_link,
- .mkdir = quiesce_mkdir,
- .rmdir = quiesce_rmdir,
- .rename = quiesce_rename,
-
- /* The below calls are known to change state, hence
- re-transmittion is not advised */
- .lk = quiesce_lk,
- .inodelk = quiesce_inodelk,
- .finodelk = quiesce_finodelk,
- .entrylk = quiesce_entrylk,
- .fentrylk = quiesce_fentrylk,
- .xattrop = quiesce_xattrop,
- .fxattrop = quiesce_fxattrop,
- .setattr = quiesce_setattr,
- .fsetattr = quiesce_fsetattr,
-
- /* Special case, re-transmittion is not harmful *
- * as offset is properly sent from above layers */
- /* TODO: not re-transmitted as of now */
- .writev = quiesce_writev,
-
- /* re-transmittable fops */
- .lookup = quiesce_lookup,
- .stat = quiesce_stat,
- .fstat = quiesce_fstat,
- .access = quiesce_access,
- .readlink = quiesce_readlink,
- .getxattr = quiesce_getxattr,
- .open = quiesce_open,
- .readv = quiesce_readv,
- .flush = quiesce_flush,
- .fsync = quiesce_fsync,
- .statfs = quiesce_statfs,
- .opendir = quiesce_opendir,
- .readdir = quiesce_readdir,
- .readdirp = quiesce_readdirp,
- .fsyncdir = quiesce_fsyncdir,
+ priv = this->private;
+ if (!priv)
+ goto out;
+ this->private = NULL;
-};
+ mem_pool_destroy(priv->local_pool);
+ priv->local_pool = NULL;
+ LOCK_DESTROY(&priv->lock);
+ GF_FREE(priv);
+out:
+ return;
+}
-struct xlator_dumpops dumpops = {
-};
+int
+notify(xlator_t *this, int event, void *data, ...)
+{
+ int ret = 0;
+ quiesce_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ switch (event) {
+ case GF_EVENT_CHILD_UP: {
+ ret = gf_thread_create(&priv->thr, NULL, gf_quiesce_dequeue_start,
+ this, "quiesce");
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to create the quiesce-dequeue thread");
+ }
+
+ LOCK(&priv->lock);
+ {
+ priv->pass_through = _gf_true;
+ }
+ UNLOCK(&priv->lock);
+ break;
+ }
+ case GF_EVENT_CHILD_DOWN:
+ LOCK(&priv->lock);
+ {
+ priv->pass_through = _gf_false;
+ __gf_quiesce_start_timer(this, priv);
+ }
+ UNLOCK(&priv->lock);
+ break;
+ default:
+ break;
+ }
+ ret = default_notify(this, event, data);
+out:
+ return ret;
+}
-struct xlator_cbks cbks = {
+struct xlator_fops fops = {
+ /* write/modifying fops */
+ .mknod = quiesce_mknod,
+ .create = quiesce_create,
+ .truncate = quiesce_truncate,
+ .ftruncate = quiesce_ftruncate,
+ .setxattr = quiesce_setxattr,
+ .fsetxattr = quiesce_fsetxattr,
+ .removexattr = quiesce_removexattr,
+ .fremovexattr = quiesce_fremovexattr,
+ .symlink = quiesce_symlink,
+ .unlink = quiesce_unlink,
+ .link = quiesce_link,
+ .mkdir = quiesce_mkdir,
+ .rmdir = quiesce_rmdir,
+ .rename = quiesce_rename,
+ .fallocate = quiesce_fallocate,
+
+ /* The below calls are known to change state, hence
+ re-transmittion is not advised */
+ .lk = quiesce_lk,
+ .inodelk = quiesce_inodelk,
+ .finodelk = quiesce_finodelk,
+ .entrylk = quiesce_entrylk,
+ .fentrylk = quiesce_fentrylk,
+ .xattrop = quiesce_xattrop,
+ .fxattrop = quiesce_fxattrop,
+ .setattr = quiesce_setattr,
+ .fsetattr = quiesce_fsetattr,
+
+ /* Special case, re-transmittion is not harmful *
+ * as offset is properly sent from above layers */
+ /* TODO: not re-transmitted as of now */
+ .writev = quiesce_writev,
+
+ /* re-transmittable fops */
+ .lookup = quiesce_lookup,
+ .stat = quiesce_stat,
+ .fstat = quiesce_fstat,
+ .access = quiesce_access,
+ .readlink = quiesce_readlink,
+ .getxattr = quiesce_getxattr,
+ .fgetxattr = quiesce_fgetxattr,
+ .open = quiesce_open,
+ .readv = quiesce_readv,
+ .flush = quiesce_flush,
+ .fsync = quiesce_fsync,
+ .statfs = quiesce_statfs,
+ .opendir = quiesce_opendir,
+ .readdir = quiesce_readdir,
+ .readdirp = quiesce_readdirp,
+ .fsyncdir = quiesce_fsyncdir,
+ .seek = quiesce_seek,
};
+struct xlator_dumpops dumpops;
+
+struct xlator_cbks cbks;
struct volume_options options[] = {
- { .key = {NULL} },
+ {
+ .key = {"timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .default_value = "45",
+ .description =
+ "After 'timeout' seconds since the time 'quiesce' "
+ "option was set to \"!pass-through\", acknowledgements to file "
+ "operations are no longer quiesced and previously "
+ "quiesced acknowledgements are sent to the application",
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ },
+ {.key = {"failover-hosts"},
+ .type = GF_OPTION_TYPE_INTERNET_ADDRESS_LIST,
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .flags = OPT_FLAG_CLIENT_OPT | OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .description = "It is a comma separated list of hostname/IP "
+ "addresses. It Specifies the list of hosts where "
+ "the gfproxy daemons are running, to which the "
+ "the thin clients can failover to."},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {GD_OP_VERSION_3_12_0},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "quiesce",
+ .category = GF_TECH_PREVIEW,
};
diff --git a/xlators/features/quiesce/src/quiesce.h b/xlators/features/quiesce/src/quiesce.h
index 08f87e2aeae..6ab2af40a56 100644
--- a/xlators/features/quiesce/src/quiesce.h
+++ b/xlators/features/quiesce/src/quiesce.h
@@ -1,61 +1,65 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __QUIESCE_H__
#define __QUIESCE_H__
#include "quiesce-mem-types.h"
-#include "xlator.h"
-#include "timer.h"
+#include "quiesce-messages.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/timer.h>
#define GF_FOPS_EXPECTED_IN_PARALLEL 512
typedef struct {
- gf_timer_t *timer;
- gf_boolean_t pass_through;
- gf_lock_t lock;
- struct list_head req;
- int queue_size;
- pthread_t thr;
- struct mem_pool *local_pool;
+ struct list_head list;
+ char *addr;
+ gf_boolean_t tried; /* indicates attempted connecting */
+} quiesce_failover_hosts_t;
+
+typedef struct {
+ gf_timer_t *timer;
+ gf_boolean_t pass_through;
+ gf_lock_t lock;
+ struct list_head req;
+ int queue_size;
+ pthread_t thr;
+ struct mem_pool *local_pool;
+ uint32_t timeout;
+ char *failover_hosts;
+ struct list_head failover_list;
} quiesce_priv_t;
typedef struct {
- fd_t *fd;
- char *name;
- char *volname;
- loc_t loc;
- off_t size;
- off_t offset;
- mode_t mode;
- int32_t flag;
- struct iatt stbuf;
- struct iovec *vector;
- struct iobref *iobref;
- dict_t *dict;
- struct gf_flock flock;
- entrylk_cmd cmd;
- entrylk_type type;
- gf_xattrop_flags_t xattrop_flags;
- int32_t wbflags;
- uint32_t io_flag;
+ fd_t *fd;
+ char *name;
+ char *volname;
+ loc_t loc;
+ off_t size;
+ off_t offset;
+ mode_t mode;
+ int32_t flag;
+ struct iatt stbuf;
+ struct iovec *vector;
+ struct iobref *iobref;
+ dict_t *dict;
+ struct gf_flock flock;
+ entrylk_cmd cmd;
+ entrylk_type type;
+ gf_xattrop_flags_t xattrop_flags;
+ int32_t wbflags;
+ uint32_t io_flag;
+ /* for fallocate */
+ size_t len;
+ /* for lseek */
+ gf_seek_what_t what;
} quiesce_local_t;
#endif
diff --git a/xlators/features/quota/src/Makefile.am b/xlators/features/quota/src/Makefile.am
index 4baa5f06e35..1c2dcef0ca3 100644
--- a/xlators/features/quota/src/Makefile.am
+++ b/xlators/features/quota/src/Makefile.am
@@ -1,16 +1,29 @@
-xlator_LTLIBRARIES = quota.la
+if WITH_SERVER
+xlator_LTLIBRARIES = quota.la quotad.la
+endif
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-quota_la_LDFLAGS = -module -avoidversion
+quota_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+quotad_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-quota_la_SOURCES = quota.c
-quota_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+quota_la_SOURCES = quota.c quota-enforcer-client.c
+quota_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
-noinst_HEADERS = quota-mem-types.h quota.h
+quotad_la_SOURCES = quotad.c quotad-helpers.c quotad-aggregator.c
+quotad_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS) \
+noinst_HEADERS = quota-mem-types.h quota.h quotad-aggregator.h \
+ quotad-helpers.h quota-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src/ -I$(top_builddir)/rpc/xdr/src/ \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
-I$(top_srcdir)/xlators/cluster/dht/src
-CLEANFILES =
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+CLEANFILES =
diff --git a/xlators/features/quota/src/quota-enforcer-client.c b/xlators/features/quota/src/quota-enforcer-client.c
new file mode 100644
index 00000000000..480d64ade27
--- /dev/null
+++ b/xlators/features/quota/src/quota-enforcer-client.c
@@ -0,0 +1,503 @@
+/*
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <stdio.h>
+#include <string.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <sys/file.h>
+#include <netdb.h>
+#include <signal.h>
+#include <libgen.h>
+
+#include <sys/utsname.h>
+
+#include <stdint.h>
+#include <pthread.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <semaphore.h>
+#include <errno.h>
+
+#ifdef HAVE_MALLOC_H
+#include <malloc.h>
+#endif
+
+#include "quota.h"
+#include "quota-messages.h"
+
+extern struct rpc_clnt_program quota_enforcer_clnt;
+
+int32_t
+quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent);
+
+int
+quota_enforcer_submit_request(void *req, call_frame_t *frame,
+ rpc_clnt_prog_t *prog, int procnum,
+ struct iobref *iobref, xlator_t *this,
+ fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
+{
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {
+ 0,
+ };
+ struct iobuf *iobuf = NULL;
+ char new_iobref = 0;
+ ssize_t xdr_size = 0;
+ quota_priv_t *priv = NULL;
+
+ GF_ASSERT(this);
+
+ priv = this->private;
+
+ if (req) {
+ xdr_size = xdr_sizeof(xdrproc, req);
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ }
+
+ if (!iobref) {
+ iobref = iobref_new();
+ if (!iobref) {
+ goto out;
+ }
+
+ new_iobref = 1;
+ }
+
+ iobref_add(iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_size(iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic(iov, req, xdrproc);
+ if (ret == -1) {
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ /* Send the msg */
+ ret = rpc_clnt_submit(priv->rpc_clnt, prog, procnum, cbkfn, &iov, count,
+ NULL, 0, iobref, frame, NULL, 0, NULL, 0, NULL);
+ ret = 0;
+
+out:
+ if (new_iobref)
+ iobref_unref(iobref);
+ if (iobuf)
+ iobuf_unref(iobuf);
+
+ return ret;
+}
+
+int
+quota_enforcer_lookup_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ quota_local_t *local = NULL;
+ call_frame_t *frame = NULL;
+ int ret = 0;
+ gfs3_lookup_rsp rsp = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+ struct iatt postparent = {
+ 0,
+ };
+ int op_errno = EINVAL;
+ dict_t *xdata = NULL;
+ inode_t *inode = NULL;
+ xlator_t *this = NULL;
+ quota_priv_t *priv = NULL;
+ struct timespec retry_delay = {
+ 0,
+ };
+ gf_timer_t *timer = NULL;
+
+ this = THIS;
+
+ frame = myframe;
+ local = frame->local;
+ inode = local->validate_loc.inode;
+ priv = this->private;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gfs3_lookup_rsp);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, Q_MSG_XDR_DECODING_FAILED,
+ "XDR decoding failed");
+ rsp.op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ op_errno = gf_error_to_errno(rsp.op_errno);
+ gf_stat_to_iatt(&rsp.postparent, &postparent);
+
+ if (rsp.op_ret == -1)
+ goto out;
+
+ rsp.op_ret = -1;
+ gf_stat_to_iatt(&rsp.stat, &stbuf);
+
+ GF_PROTOCOL_DICT_UNSERIALIZE(frame->this, xdata, (rsp.xdata.xdata_val),
+ (rsp.xdata.xdata_len), rsp.op_ret, op_errno,
+ out);
+
+ if ((!gf_uuid_is_null(inode->gfid)) &&
+ (gf_uuid_compare(stbuf.ia_gfid, inode->gfid) != 0)) {
+ gf_msg_debug(frame->this->name, ESTALE, "gfid changed for %s",
+ local->validate_loc.path);
+ rsp.op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ rsp.op_ret = 0;
+
+out:
+ rsp.op_errno = op_errno;
+
+ /* We need to retry connecting to quotad on ENOTCONN error.
+ * Suppose if there are two volumes vol1 and vol2,
+ * and quota is enabled and limit is set on vol1.
+ * Now if IO is happening on vol1 and quota is enabled/disabled
+ * on vol2, quotad gets restarted and client will receive
+ * ENOTCONN in the IO path of vol1
+ */
+ if (rsp.op_ret == -1 && rsp.op_errno == ENOTCONN) {
+ if (local->quotad_conn_retry >= 12) {
+ priv->quotad_conn_status = 1;
+ gf_log(this->name, GF_LOG_WARNING,
+ "failed to connect "
+ "to quotad after retry count %d)",
+ local->quotad_conn_retry);
+ } else {
+ local->quotad_conn_retry++;
+ }
+
+ if (priv->quotad_conn_status == 0) {
+ /* retry connecting after 5secs for 12 retries
+ * (up to 60sec).
+ */
+ gf_log(this->name, GF_LOG_DEBUG,
+ "retry connecting to "
+ "quotad (retry count %d)",
+ local->quotad_conn_retry);
+
+ retry_delay.tv_sec = 5;
+ retry_delay.tv_nsec = 0;
+ timer = gf_timer_call_after(this->ctx, retry_delay,
+ _quota_enforcer_lookup, (void *)frame);
+ if (timer == NULL) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "failed to "
+ "set quota_enforcer_lookup with timer");
+ } else {
+ goto clean;
+ }
+ }
+ } else {
+ priv->quotad_conn_status = 0;
+ }
+
+ if (rsp.op_ret == -1) {
+ /* any error other than ENOENT */
+ if (rsp.op_errno != ENOENT)
+ gf_msg(
+ this->name, GF_LOG_WARNING, rsp.op_errno, Q_MSG_LOOKUP_FAILED,
+ "Getting cluster-wide size of directory failed "
+ "(path: %s gfid:%s)",
+ local->validate_loc.path, loc_gfid_utoa(&local->validate_loc));
+ else
+ gf_msg_trace(this->name, ENOENT, "not found on remote node");
+
+ } else if (local->quotad_conn_retry) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "connected to quotad after "
+ "retry count %d",
+ local->quotad_conn_retry);
+ }
+
+ local->validate_cbk(frame, NULL, this, rsp.op_ret, rsp.op_errno, inode,
+ &stbuf, xdata, &postparent);
+
+clean:
+ if (xdata)
+ dict_unref(xdata);
+
+ free(rsp.xdata.xdata_val);
+
+ return 0;
+}
+
+void
+_quota_enforcer_lookup(void *data)
+{
+ quota_local_t *local = NULL;
+ gfs3_lookup_req req = {
+ {
+ 0,
+ },
+ };
+ int ret = 0;
+ int op_errno = ESTALE;
+ quota_priv_t *priv = NULL;
+ call_frame_t *frame = NULL;
+ loc_t *loc = NULL;
+ xlator_t *this = NULL;
+ char *dir_path = NULL;
+
+ frame = data;
+ local = frame->local;
+ this = local->this;
+ loc = &local->validate_loc;
+
+ priv = this->private;
+
+ if (!(loc && loc->inode))
+ goto unwind;
+
+ if (!gf_uuid_is_null(loc->inode->gfid))
+ memcpy(req.gfid, loc->inode->gfid, 16);
+ else
+ memcpy(req.gfid, loc->gfid, 16);
+
+ if (local->validate_xdata) {
+ GF_PROTOCOL_DICT_SERIALIZE(this, local->validate_xdata,
+ (&req.xdata.xdata_val), req.xdata.xdata_len,
+ op_errno, unwind);
+ }
+
+ if (loc->name)
+ req.bname = (char *)loc->name;
+ else
+ req.bname = "";
+
+ if (loc->path)
+ dir_path = (char *)loc->path;
+ else
+ dir_path = "";
+
+ ret = quota_enforcer_submit_request(
+ &req, frame, priv->quota_enforcer, GF_AGGREGATOR_LOOKUP, NULL, this,
+ quota_enforcer_lookup_cbk, (xdrproc_t)xdr_gfs3_lookup_req);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_RPC_SUBMIT_FAILED,
+ "Couldn't send the request to "
+ "fetch cluster wide size of directory (path:%s gfid:%s)",
+ dir_path, req.gfid);
+ }
+
+ GF_FREE(req.xdata.xdata_val);
+
+ return;
+
+unwind:
+ local->validate_cbk(frame, NULL, this, -1, op_errno, NULL, NULL, NULL,
+ NULL);
+
+ GF_FREE(req.xdata.xdata_val);
+
+ return;
+}
+
+int
+quota_enforcer_lookup(call_frame_t *frame, xlator_t *this, dict_t *xdata,
+ fop_lookup_cbk_t validate_cbk)
+{
+ quota_local_t *local = NULL;
+
+ if (!frame || !this)
+ goto unwind;
+
+ local = frame->local;
+ local->this = this;
+ local->validate_cbk = validate_cbk;
+ local->validate_xdata = dict_ref(xdata);
+
+ _quota_enforcer_lookup(frame);
+
+ return 0;
+
+unwind:
+ validate_cbk(frame, NULL, this, -1, ESTALE, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+int
+quota_enforcer_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ xlator_t *this = NULL;
+ int ret = 0;
+ quota_priv_t *priv = NULL;
+
+ this = mydata;
+ priv = this->private;
+ switch (event) {
+ case RPC_CLNT_CONNECT: {
+ pthread_mutex_lock(&priv->conn_mutex);
+ {
+ priv->conn_status = _gf_true;
+ }
+ pthread_mutex_unlock(&priv->conn_mutex);
+ gf_msg_trace(this->name, 0, "got RPC_CLNT_CONNECT");
+ break;
+ }
+
+ case RPC_CLNT_DISCONNECT: {
+ pthread_mutex_lock(&priv->conn_mutex);
+ {
+ priv->conn_status = _gf_false;
+ pthread_cond_signal(&priv->conn_cond);
+ }
+ pthread_mutex_unlock(&priv->conn_mutex);
+ gf_msg_trace(this->name, 0, "got RPC_CLNT_DISCONNECT");
+ break;
+ }
+
+ default:
+ gf_msg_trace(this->name, 0, "got some other RPC event %d", event);
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+int
+quota_enforcer_blocking_connect(rpc_clnt_t *rpc)
+{
+ dict_t *options = NULL;
+ int ret = -1;
+
+ options = dict_new();
+ if (options == NULL)
+ goto out;
+
+ ret = dict_set_sizen_str_sizen(options, "non-blocking-io", "no");
+ if (ret)
+ goto out;
+
+ rpc->conn.trans->reconfigure(rpc->conn.trans, options);
+
+ rpc_clnt_start(rpc);
+
+ ret = dict_set_sizen_str_sizen(options, "non-blocking-io", "yes");
+ if (ret)
+ goto out;
+
+ rpc->conn.trans->reconfigure(rpc->conn.trans, options);
+
+ ret = 0;
+out:
+ if (options)
+ dict_unref(options);
+
+ return ret;
+}
+
+// Returns a started rpc_clnt. Creates a new rpc_clnt if quota_priv doesn't have
+// one already
+struct rpc_clnt *
+quota_enforcer_init(xlator_t *this, dict_t *options)
+{
+ struct rpc_clnt *rpc = NULL;
+ quota_priv_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ LOCK(&priv->lock);
+ {
+ if (priv->rpc_clnt) {
+ ret = 0;
+ rpc = priv->rpc_clnt;
+ }
+ }
+ UNLOCK(&priv->lock);
+
+ if (rpc)
+ goto out;
+
+ priv->quota_enforcer = &quota_enforcer_clnt;
+
+ ret = dict_set_sizen_str_sizen(options, "transport.address-family", "unix");
+ if (ret)
+ goto out;
+
+ ret = dict_set_sizen_str_sizen(options, "transport-type", "socket");
+ if (ret)
+ goto out;
+
+ ret = dict_set_sizen_str_sizen(options, "transport.socket.connect-path",
+ "/var/run/gluster/quotad.socket");
+ if (ret)
+ goto out;
+
+ rpc = rpc_clnt_new(options, this, this->name, 16);
+ if (!rpc) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify(rpc, quota_enforcer_notify, this);
+ if (ret) {
+ gf_msg("quota", GF_LOG_ERROR, 0, Q_MSG_RPCCLNT_REGISTER_NOTIFY_FAILED,
+ "failed to register notify");
+ goto out;
+ }
+
+ ret = quota_enforcer_blocking_connect(rpc);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ if (ret) {
+ if (rpc)
+ rpc_clnt_unref(rpc);
+ rpc = NULL;
+ }
+
+ return rpc;
+}
+
+struct rpc_clnt_procedure quota_enforcer_actors[GF_AGGREGATOR_MAXVALUE] = {
+ [GF_AGGREGATOR_NULL] = {"NULL", NULL},
+ [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", NULL},
+};
+
+struct rpc_clnt_program quota_enforcer_clnt = {
+ .progname = "Quota enforcer",
+ .prognum = GLUSTER_AGGREGATOR_PROGRAM,
+ .progver = GLUSTER_AGGREGATOR_VERSION,
+ .numproc = GF_AGGREGATOR_MAXVALUE,
+ .proctable = quota_enforcer_actors,
+};
diff --git a/xlators/features/quota/src/quota-mem-types.h b/xlators/features/quota/src/quota-mem-types.h
index ed70c29285e..782a7de96bb 100644
--- a/xlators/features/quota/src/quota-mem-types.h
+++ b/xlators/features/quota/src/quota-mem-types.h
@@ -1,37 +1,30 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __QUOTA_MEM_TYPES_H__
#define __QUOTA_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_quota_mem_types_ {
- gf_quota_mt_quota_priv_t = gf_common_mt_end + 1,
- gf_quota_mt_quota_inode_ctx_t,
- gf_quota_mt_loc_t,
- gf_quota_mt_char,
- gf_quota_mt_int64_t,
- gf_quota_mt_int32_t,
- gf_quota_mt_limits_t,
- gf_quota_mt_quota_dentry_t,
- gf_quota_mt_end
+ /* Those are used by QUOTA_ALLOC_OR_GOTO macro */
+ gf_quota_mt_quota_priv_t = gf_common_mt_end + 1,
+ gf_quota_mt_quota_inode_ctx_t,
+ gf_quota_mt_loc_t,
+ gf_quota_mt_char,
+ gf_quota_mt_int64_t,
+ gf_quota_mt_int32_t,
+ gf_quota_mt_limits_t,
+ gf_quota_mt_quota_dentry_t,
+ gf_quota_mt_quota_limits_level_t,
+ gf_quota_mt_qd_vols_conf_t,
+ gf_quota_mt_aggregator_state_t,
+ gf_quota_mt_end
};
#endif
-
diff --git a/xlators/features/quota/src/quota-messages.h b/xlators/features/quota/src/quota-messages.h
new file mode 100644
index 00000000000..d434ed75e76
--- /dev/null
+++ b/xlators/features/quota/src/quota-messages.h
@@ -0,0 +1,39 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _QUOTA_MESSAGES_H_
+#define _QUOTA_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(QUOTA, Q_MSG_ENFORCEMENT_FAILED, Q_MSG_ENOMEM, Q_MSG_PARENT_NULL,
+ Q_MSG_CROSSED_SOFT_LIMIT, Q_MSG_QUOTA_ENFORCER_RPC_INIT_FAILED,
+ Q_MSG_REMOTE_OPERATION_FAILED, Q_MSG_FAILED_TO_SEND_FOP,
+ Q_MSG_INVALID_VOLFILE, Q_MSG_INODE_PARENT_NOT_FOUND,
+ Q_MSG_XDR_DECODE_ERROR, Q_MSG_DICT_UNSERIALIZE_FAIL,
+ Q_MSG_DICT_SERIALIZE_FAIL, Q_MSG_RPCSVC_INIT_FAILED,
+ Q_MSG_RPCSVC_LISTENER_CREATION_FAILED, Q_MSG_RPCSVC_REGISTER_FAILED,
+ Q_MSG_XDR_DECODING_FAILED, Q_MSG_RPCCLNT_REGISTER_NOTIFY_FAILED,
+ Q_MSG_ANCESTRY_BUILD_FAILED, Q_MSG_SIZE_KEY_MISSING,
+ Q_MSG_INODE_CTX_GET_FAILED, Q_MSG_INODE_CTX_SET_FAILED,
+ Q_MSG_LOOKUP_FAILED, Q_MSG_RPC_SUBMIT_FAILED,
+ Q_MSG_ENFORCEMENT_SKIPPED, Q_MSG_INTERNAL_FOP_KEY_MISSING);
+
+#endif /* !_QUOTA_MESSAGES_H_ */
diff --git a/xlators/features/quota/src/quota.c b/xlators/features/quota/src/quota.c
index a5ff2373fb6..18df9ae6d19 100644
--- a/xlators/features/quota/src/quota.c
+++ b/xlators/features/quota/src/quota.c
@@ -1,3323 +1,5336 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#include <fnmatch.h>
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "quota.h"
-#include "common-utils.h"
-#include "defaults.h"
+#include <glusterfs/statedump.h>
+#include "quota-messages.h"
+#include <glusterfs/events.h>
-int32_t
-quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
- char *name, uuid_t par);
struct volume_options options[];
-int
-quota_loc_fill (loc_t *loc, inode_t *inode, inode_t *parent, char *path)
+static int32_t
+__quota_init_inode_ctx(inode_t *inode, xlator_t *this,
+ quota_inode_ctx_t **context)
{
- int ret = -1;
+ int32_t ret = -1;
+ quota_inode_ctx_t *ctx = NULL;
- if (!loc) {
- return ret;
- }
+ if (inode == NULL) {
+ goto out;
+ }
- if (inode) {
- loc->inode = inode_ref (inode);
- }
+ QUOTA_ALLOC_OR_GOTO(ctx, quota_inode_ctx_t, out);
- if (parent) {
- loc->parent = inode_ref (parent);
- }
+ LOCK_INIT(&ctx->lock);
- loc->path = gf_strdup (path);
- if (!loc->path) {
- goto loc_wipe;
- }
+ if (context != NULL) {
+ *context = ctx;
+ }
- loc->name = strrchr (loc->path, '/');
- if (loc->name) {
- loc->name++;
- } else {
- goto loc_wipe;
- }
+ INIT_LIST_HEAD(&ctx->parents);
- ret = 0;
+ ret = __inode_ctx_put(inode, this, (uint64_t)(long)ctx);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_INODE_CTX_SET_FAILED,
+ "cannot set quota context "
+ "in inode (gfid:%s)",
+ uuid_utoa(inode->gfid));
+ GF_FREE(ctx);
+ }
+out:
+ return ret;
+}
-loc_wipe:
- if (ret < 0) {
- loc_wipe (loc);
+static int32_t
+quota_inode_ctx_get(inode_t *inode, xlator_t *this, quota_inode_ctx_t **ctx,
+ char create_if_absent)
+{
+ int32_t ret = 0;
+ uint64_t ctx_int;
+
+ LOCK(&inode->lock);
+ {
+ ret = __inode_ctx_get(inode, this, &ctx_int);
+
+ if ((ret == 0) && (ctx != NULL)) {
+ *ctx = (quota_inode_ctx_t *)(unsigned long)ctx_int;
+ } else if (create_if_absent) {
+ ret = __quota_init_inode_ctx(inode, this, ctx);
}
+ }
+ UNLOCK(&inode->lock);
- return ret;
+ return ret;
}
-
int
-quota_inode_loc_fill (inode_t *inode, loc_t *loc)
+quota_loc_fill(loc_t *loc, inode_t *inode, inode_t *parent, char *path)
{
- char *resolvedpath = NULL;
- inode_t *parent = NULL;
- int ret = -1;
- xlator_t *this = NULL;
+ int ret = -1;
- if ((!inode) || (!loc)) {
- return ret;
- }
+ if (!loc || (inode == NULL))
+ return ret;
- this = THIS;
+ if (inode) {
+ loc->inode = inode_ref(inode);
+ gf_uuid_copy(loc->gfid, inode->gfid);
+ }
- if ((inode) && __is_root_gfid (inode->gfid)) {
- loc->parent = NULL;
- goto ignore_parent;
- }
+ if (parent) {
+ loc->parent = inode_ref(parent);
+ }
- parent = inode_parent (inode, 0, NULL);
- if (!parent) {
- gf_log (this->name, GF_LOG_DEBUG,
- "cannot find parent for inode (gfid:%s)",
- uuid_utoa (inode->gfid));
- goto err;
- }
+ if (path != NULL) {
+ loc->path = gf_strdup(path);
-ignore_parent:
- ret = inode_path (inode, NULL, &resolvedpath);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "cannot construct path for inode (gfid:%s)",
- uuid_utoa (inode->gfid));
- goto err;
+ loc->name = strrchr(loc->path, '/');
+ if (loc->name) {
+ loc->name++;
}
+ }
- ret = quota_loc_fill (loc, inode, parent, resolvedpath);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "cannot fill loc");
- goto err;
- }
+ ret = 0;
-err:
- if (parent) {
- inode_unref (parent);
- }
+ return ret;
+}
- GF_FREE (resolvedpath);
+int
+quota_inode_loc_fill(inode_t *inode, loc_t *loc)
+{
+ char *resolvedpath = NULL;
+ inode_t *parent = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+ if ((!inode) || (!loc)) {
return ret;
-}
+ }
+
+ this = THIS;
+
+ if ((inode) && __is_root_gfid(inode->gfid)) {
+ loc->parent = NULL;
+ goto ignore_parent;
+ }
+ parent = inode_parent(inode, 0, NULL);
+ if (!parent) {
+ gf_msg_debug(this->name, 0,
+ "cannot find parent for "
+ "inode (gfid:%s)",
+ uuid_utoa(inode->gfid));
+ }
+
+ignore_parent:
+ ret = inode_path(inode, NULL, &resolvedpath);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0,
+ "cannot construct path for "
+ "inode (gfid:%s)",
+ uuid_utoa(inode->gfid));
+ }
+
+ ret = quota_loc_fill(loc, inode, parent, resolvedpath);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "cannot fill loc");
+ goto err;
+ }
+
+err:
+ if (parent) {
+ inode_unref(parent);
+ }
+
+ GF_FREE(resolvedpath);
+
+ return ret;
+}
int32_t
-quota_local_cleanup (xlator_t *this, quota_local_t *local)
+quota_local_cleanup(quota_local_t *local)
{
- if (local == NULL) {
- goto out;
- }
+ if (local == NULL) {
+ goto out;
+ }
+
+ loc_wipe(&local->loc);
+ loc_wipe(&local->newloc);
+ loc_wipe(&local->oldloc);
+ loc_wipe(&local->validate_loc);
+
+ inode_unref(local->inode);
+
+ if (local->xdata)
+ dict_unref(local->xdata);
- loc_wipe (&local->loc);
- loc_wipe (&local->newloc);
- loc_wipe (&local->oldloc);
- loc_wipe (&local->validate_loc);
+ if (local->validate_xdata)
+ dict_unref(local->validate_xdata);
- inode_unref (local->inode);
- LOCK_DESTROY (&local->lock);
+ if (local->stub)
+ call_stub_destroy(local->stub);
+ LOCK_DESTROY(&local->lock);
+
+ mem_put(local);
out:
- return 0;
+ return 0;
}
-
-static inline quota_local_t *
-quota_local_new ()
+static quota_local_t *
+quota_local_new()
{
- quota_local_t *local = NULL;
- local = mem_get0 (THIS->local_pool);
- if (local)
- LOCK_INIT (&local->lock);
- return local;
-}
+ quota_local_t *local = NULL;
+ local = mem_get0(THIS->local_pool);
+ if (local == NULL)
+ goto out;
+ LOCK_INIT(&local->lock);
+ local->space_available = -1;
+
+out:
+ return local;
+}
quota_dentry_t *
-__quota_dentry_new (quota_inode_ctx_t *ctx, char *name, uuid_t par)
+__quota_dentry_new(quota_inode_ctx_t *ctx, char *name, uuid_t par)
{
- quota_dentry_t *dentry = NULL;
- GF_UNUSED int32_t ret = 0;
+ quota_dentry_t *dentry = NULL;
+ GF_UNUSED int32_t ret = 0;
- QUOTA_ALLOC_OR_GOTO (dentry, quota_dentry_t, err);
+ QUOTA_ALLOC_OR_GOTO(dentry, quota_dentry_t, err);
- INIT_LIST_HEAD (&dentry->next);
+ INIT_LIST_HEAD(&dentry->next);
- dentry->name = gf_strdup (name);
- if (dentry->name == NULL) {
- GF_FREE (dentry);
- goto err;
- }
+ dentry->name = gf_strdup(name);
+ if (dentry->name == NULL) {
+ GF_FREE(dentry);
+ dentry = NULL;
+ goto err;
+ }
- uuid_copy (dentry->par, par);
+ gf_uuid_copy(dentry->par, par);
+
+ if (ctx != NULL)
+ list_add_tail(&dentry->next, &ctx->parents);
- list_add_tail (&dentry->next, &ctx->parents);
err:
- return dentry;
+ return dentry;
}
-
void
-__quota_dentry_free (quota_dentry_t *dentry)
+__quota_dentry_free(quota_dentry_t *dentry)
{
- if (dentry == NULL) {
- goto out;
- }
+ if (dentry == NULL) {
+ goto out;
+ }
- list_del_init (&dentry->next);
+ list_del_init(&dentry->next);
- GF_FREE (dentry->name);
- GF_FREE (dentry);
+ GF_FREE(dentry->name);
+ GF_FREE(dentry);
out:
- return;
+ return;
}
-
-int32_t
-quota_validate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *dict,
- dict_t *xdata)
+void
+__quota_dentry_del(quota_inode_ctx_t *ctx, const char *name, uuid_t par)
{
- quota_local_t *local = NULL;
- uint32_t validate_count = 0, link_count = 0;
- int32_t ret = 0;
- quota_inode_ctx_t *ctx = NULL;
- int64_t *size = 0;
- uint64_t value = 0;
- call_stub_t *stub = NULL;
+ quota_dentry_t *dentry = NULL;
+ quota_dentry_t *tmp = NULL;
- local = frame->local;
-
- if (op_ret < 0) {
- goto unwind;
- }
-
- GF_ASSERT (local);
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO_WITH_ERROR ("quota", this, unwind, op_errno,
- EINVAL);
- GF_VALIDATE_OR_GOTO_WITH_ERROR (this->name, dict, unwind, op_errno,
- EINVAL);
-
- ret = inode_ctx_get (local->validate_loc.inode, this, &value);
-
- ctx = (quota_inode_ctx_t *)(unsigned long)value;
- if ((ret == -1) || (ctx == NULL)) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context is not present in inode (gfid:%s)",
- uuid_utoa (local->validate_loc.inode->gfid));
- op_errno = EINVAL;
- goto unwind;
+ list_for_each_entry_safe(dentry, tmp, &ctx->parents, next)
+ {
+ if ((strcmp(dentry->name, name) == 0) &&
+ (gf_uuid_compare(dentry->par, par) == 0)) {
+ __quota_dentry_free(dentry);
+ break;
}
+ }
+}
- ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "size key not present in dict");
- op_errno = EINVAL;
- goto unwind;
- }
+void
+quota_dentry_del(quota_inode_ctx_t *ctx, const char *name, uuid_t par)
+{
+ LOCK(&ctx->lock);
+ {
+ __quota_dentry_del(ctx, name, par);
+ }
+ UNLOCK(&ctx->lock);
+}
- local->just_validated = 1; /* so that we don't go into infinite
- * loop of validation and checking
- * limit when timeout is zero.
- */
- LOCK (&ctx->lock);
- {
- ctx->size = ntoh64 (*size);
- gettimeofday (&ctx->tv, NULL);
- }
- UNLOCK (&ctx->lock);
+static inode_t *
+__quota_inode_parent(inode_t *inode, uuid_t pargfid, const char *name)
+{
+ inode_t *parent = NULL;
- quota_check_limit (frame, local->validate_loc.inode, this, NULL, NULL);
- return 0;
+ parent = inode_parent(inode, pargfid, name);
+ inode_unref(inode);
+ return parent;
+}
-unwind:
- LOCK (&local->lock);
- {
- local->op_ret = -1;
- local->op_errno = op_errno;
+static inode_t *
+quota_inode_parent(inode_t *inode, uuid_t pargfid, const char *name)
+{
+ inode_t *parent = NULL;
- validate_count = --local->validate_count;
- link_count = local->link_count;
+ parent = __quota_inode_parent(inode, pargfid, name);
+ if (!parent)
+ gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0, Q_MSG_PARENT_NULL,
+ "Failed to find "
+ "ancestor for inode (%s)",
+ uuid_utoa(inode->gfid));
- if ((validate_count == 0) && (link_count == 0)) {
- stub = local->stub;
- local->stub = NULL;
- }
- }
- UNLOCK (&local->lock);
+ return parent;
+}
- if (stub != NULL) {
- call_resume (stub);
- }
+int32_t
+quota_inode_depth(inode_t *inode)
+{
+ int depth = 0;
+ inode_t *cur_inode = NULL;
- return 0;
-}
+ cur_inode = inode_ref(inode);
+ while (cur_inode && !__is_root_gfid(cur_inode->gfid)) {
+ depth++;
+ cur_inode = quota_inode_parent(cur_inode, 0, NULL);
+ if (!cur_inode)
+ depth = -1;
+ }
+ if (cur_inode)
+ inode_unref(cur_inode);
-static inline uint64_t
-quota_time_elapsed (struct timeval *now, struct timeval *then)
-{
- return (now->tv_sec - then->tv_sec);
+ return depth;
}
-
int32_t
-quota_timeout (struct timeval *tv, int32_t timeout)
+quota_find_common_ancestor(inode_t *inode1, inode_t *inode2,
+ uuid_t *common_ancestor)
{
- struct timeval now = {0,};
- int32_t timed_out = 0;
+ int32_t depth1 = 0;
+ int32_t depth2 = 0;
+ int32_t ret = -1;
+ inode_t *cur_inode1 = NULL;
+ inode_t *cur_inode2 = NULL;
+
+ depth1 = quota_inode_depth(inode1);
+ if (depth1 < 0)
+ goto out;
+
+ depth2 = quota_inode_depth(inode2);
+ if (depth2 < 0)
+ goto out;
+
+ cur_inode1 = inode_ref(inode1);
+ cur_inode2 = inode_ref(inode2);
+
+ while (cur_inode1 && depth1 > depth2) {
+ cur_inode1 = quota_inode_parent(cur_inode1, 0, NULL);
+ depth1--;
+ }
+
+ while (cur_inode2 && depth2 > depth1) {
+ cur_inode2 = quota_inode_parent(cur_inode2, 0, NULL);
+ depth2--;
+ }
+
+ while (depth1 && cur_inode1 && cur_inode2 && cur_inode1 != cur_inode2) {
+ cur_inode1 = quota_inode_parent(cur_inode1, 0, NULL);
+ cur_inode2 = quota_inode_parent(cur_inode2, 0, NULL);
+ depth1--;
+ }
+
+ if (cur_inode1 && cur_inode2) {
+ gf_uuid_copy(*common_ancestor, cur_inode1->gfid);
+ ret = 0;
+ }
+out:
+ if (cur_inode1)
+ inode_unref(cur_inode1);
+
+ if (cur_inode2)
+ inode_unref(cur_inode2);
- gettimeofday (&now, NULL);
+ return ret;
+}
- if (quota_time_elapsed (&now, tv) >= timeout) {
- timed_out = 1;
+void
+check_ancestory_continue(struct list_head *parents, inode_t *inode,
+ int32_t op_ret, int32_t op_errno, void *data)
+{
+ call_frame_t *frame = NULL;
+ quota_local_t *local = NULL;
+ uint32_t link_count = 0;
+
+ frame = data;
+ local = frame->local;
+
+ if (parents && list_empty(parents)) {
+ gf_msg(THIS->name, GF_LOG_WARNING, EIO, Q_MSG_ANCESTRY_BUILD_FAILED,
+ "Couldn't build ancestry for inode (gfid:%s). "
+ "Without knowing ancestors till root, quota "
+ "cannot be enforced. "
+ "Hence, failing fop with EIO",
+ uuid_utoa(inode->gfid));
+ op_errno = EIO;
+ op_ret = -1;
+ }
+
+ LOCK(&local->lock);
+ {
+ link_count = --local->link_count;
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
}
+ }
+ UNLOCK(&local->lock);
- return timed_out;
+ if (link_count == 0)
+ local->fop_continue_cbk(frame);
}
-
-int32_t
-quota_check_limit (call_frame_t *frame, inode_t *inode, xlator_t *this,
- char *name, uuid_t par)
+void
+check_ancestory(call_frame_t *frame, inode_t *inode)
{
- int32_t ret = -1;
- inode_t *_inode = NULL, *parent = NULL;
- quota_inode_ctx_t *ctx = NULL;
- quota_priv_t *priv = NULL;
- quota_local_t *local = NULL;
- char need_validate = 0, need_unwind = 0;
- int64_t delta = 0;
- call_stub_t *stub = NULL;
- int32_t validate_count = 0, link_count = 0;
- uint64_t value = 0;
- char just_validated = 0;
- uuid_t trav_uuid = {0,};
+ inode_t *cur_inode = NULL;
+ inode_t *parent = NULL;
- GF_VALIDATE_OR_GOTO ("quota", this, out);
- GF_VALIDATE_OR_GOTO (this->name, frame, out);
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ cur_inode = inode_ref(inode);
+ while (cur_inode && !__is_root_gfid(cur_inode->gfid)) {
+ parent = inode_parent(cur_inode, 0, NULL);
+ if (!parent) {
+ quota_build_ancestry(cur_inode, check_ancestory_continue, frame);
+ inode_unref(cur_inode);
+ return;
+ }
+ inode_unref(cur_inode);
+ cur_inode = parent;
+ }
+
+ if (cur_inode) {
+ inode_unref(cur_inode);
+ check_ancestory_continue(NULL, NULL, 0, 0, frame);
+ } else {
+ check_ancestory_continue(NULL, NULL, -1, ESTALE, frame);
+ }
+}
- local = frame->local;
- GF_VALIDATE_OR_GOTO (this->name, local, out);
+void
+check_ancestory_2_cbk(struct list_head *parents, inode_t *inode, int32_t op_ret,
+ int32_t op_errno, void *data)
+{
+ inode_t *this_inode = NULL;
+ quota_inode_ctx_t *ctx = NULL;
- delta = local->delta;
+ this_inode = data;
- GF_VALIDATE_OR_GOTO (this->name, local->stub, out);
+ if (op_ret < 0)
+ goto out;
- priv = this->private;
+ if (parents == NULL || list_empty(parents)) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, Q_MSG_ENFORCEMENT_FAILED,
+ "Couldn't build ancestry for inode (gfid:%s). "
+ "Without knowing ancestors till root, quota "
+ "cannot be enforced.",
+ uuid_utoa(this_inode->gfid));
+ goto out;
+ }
- inode_ctx_get (inode, this, &value);
- ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ quota_inode_ctx_get(this_inode, THIS, &ctx, 0);
+ if (ctx)
+ ctx->ancestry_built = _gf_true;
- _inode = inode_ref (inode);
+out:
+ inode_unref(this_inode);
+}
- LOCK (&local->lock);
- {
- just_validated = local->just_validated;
- local->just_validated = 0;
+void
+check_ancestory_2(xlator_t *this, quota_local_t *local, inode_t *inode)
+{
+ inode_t *cur_inode = NULL;
+ inode_t *parent = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ char *name = NULL;
+ uuid_t pgfid = {0};
+
+ name = (char *)local->loc.name;
+ if (local->loc.parent) {
+ gf_uuid_copy(pgfid, local->loc.parent->gfid);
+ }
+
+ cur_inode = inode_ref(inode);
+ while (cur_inode && !__is_root_gfid(cur_inode->gfid)) {
+ quota_inode_ctx_get(cur_inode, this, &ctx, 0);
+ /* build ancestry is required only on the first lookup,
+ * so stop crawling when the inode_ctx is set for an inode
+ */
+ if (ctx && ctx->ancestry_built)
+ goto setctx;
+
+ parent = inode_parent(cur_inode, pgfid, name);
+ if (!parent) {
+ quota_build_ancestry(cur_inode, check_ancestory_2_cbk,
+ inode_ref(inode));
+ goto out;
+ }
- if (just_validated) {
- local->validate_count--;
- }
+ if (name != NULL) {
+ name = NULL;
+ gf_uuid_clear(pgfid);
}
- UNLOCK (&local->lock);
-
- if ( par != NULL ) {
- uuid_copy (trav_uuid, par);
- }
-
- do {
- if (ctx != NULL) {
- LOCK (&ctx->lock);
- {
- if (ctx->limit >= 0) {
- if (!just_validated
- && quota_timeout (&ctx->tv,
- priv->timeout)) {
- need_validate = 1;
- } else if ((ctx->size + delta)
- >= ctx->limit) {
- local->op_ret = -1;
- local->op_errno = EDQUOT;
- need_unwind = 1;
- }
- }
- }
- UNLOCK (&ctx->lock);
-
- if (need_validate) {
- goto validate;
- }
-
- if (need_unwind) {
- break;
- }
- }
- if (__is_root_gfid (_inode->gfid)) {
- break;
- }
+ inode_unref(cur_inode);
+ cur_inode = parent;
+ }
- parent = inode_parent (_inode, trav_uuid, name);
+setctx:
+ if (cur_inode && cur_inode != inode) {
+ quota_inode_ctx_get(inode, this, &ctx, 0);
+ if (ctx)
+ ctx->ancestry_built = _gf_true;
+ }
+out:
+ if (cur_inode)
+ inode_unref(cur_inode);
+}
- if (name != NULL) {
- name = NULL;
- uuid_clear (trav_uuid);
- }
+static void
+quota_link_count_decrement(call_frame_t *frame)
+{
+ call_frame_t *tmpframe = NULL;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+ int link_count = -1;
+
+ local = frame->local;
+ if (local && local->par_frame) {
+ local = local->par_frame->local;
+ tmpframe = frame;
+ }
+
+ if (local == NULL)
+ goto out;
+
+ LOCK(&local->lock);
+ {
+ link_count = --local->link_count;
+ if (link_count == 0) {
+ stub = local->stub;
+ local->stub = NULL;
+ }
+ }
+ UNLOCK(&local->lock);
+
+ if (stub != NULL) {
+ call_resume(stub);
+ }
- if (parent == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "cannot find parent for inode (gfid:%s), hence "
- "aborting enforcing quota-limits and continuing"
- " with the fop", uuid_utoa (_inode->gfid));
- }
+out:
+ if (tmpframe) {
+ local = tmpframe->local;
+ tmpframe->local = NULL;
- inode_unref (_inode);
- _inode = parent;
- just_validated = 0;
+ STACK_DESTROY(frame->root);
+ if (local)
+ quota_local_cleanup(local);
+ }
- if (_inode == NULL) {
- break;
- }
+ return;
+}
- value = 0;
- inode_ctx_get (_inode, this, &value);
- ctx = (quota_inode_ctx_t *)(unsigned long)value;
- } while (1);
+static void
+quota_handle_validate_error(call_frame_t *frame, int32_t op_ret,
+ int32_t op_errno)
+{
+ quota_local_t *local;
- ret = 0;
+ local = frame->local;
+ if (local && local->par_frame)
+ local = local->par_frame->local;
- if (_inode != NULL) {
- inode_unref (_inode);
- }
+ if (local == NULL)
+ goto out;
- LOCK (&local->lock);
+ if (op_ret < 0) {
+ LOCK(&local->lock);
{
- validate_count = local->validate_count;
- link_count = local->link_count;
- if ((validate_count == 0) && (link_count == 0)) {
- stub = local->stub;
- local->stub = NULL;
- }
- }
- UNLOCK (&local->lock);
-
- if (stub != NULL) {
- call_resume (stub);
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
}
-
+ UNLOCK(&local->lock);
+ }
+ /* we abort checking limits on this path to root */
+ quota_link_count_decrement(frame);
out:
- return ret;
+ return;
+}
-validate:
- LOCK (&local->lock);
- {
- loc_wipe (&local->validate_loc);
+int32_t
+quota_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ quota_local_t *local = NULL;
+ int32_t ret = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ uint64_t value = 0;
+ quota_meta_t size = {
+ 0,
+ };
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ goto unwind;
+ }
+
+ GF_ASSERT(local);
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR("quota", this, unwind, op_errno, EINVAL);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, xdata, unwind, op_errno, EINVAL);
+
+ ret = inode_ctx_get(local->validate_loc.inode, this, &value);
+
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, Q_MSG_INODE_CTX_GET_FAILED,
+ "quota context is"
+ " not present in inode (gfid:%s)",
+ uuid_utoa(local->validate_loc.inode->gfid));
+ op_errno = EINVAL;
+ goto unwind;
+ }
+
+ ret = quota_dict_get_meta(xdata, QUOTA_SIZE_KEY, SLEN(QUOTA_SIZE_KEY),
+ &size);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, Q_MSG_SIZE_KEY_MISSING,
+ "quota size key not present "
+ "in dict");
+ op_errno = EINVAL;
+ }
+
+ local->just_validated = 1; /* so that we don't go into infinite
+ * loop of validation and checking
+ * limit when timeout is zero.
+ */
+ LOCK(&ctx->lock);
+ {
+ ctx->size = size.size;
+ ctx->validate_time = gf_time();
+ ctx->file_count = size.file_count;
+ ctx->dir_count = size.dir_count;
+ }
+ UNLOCK(&ctx->lock);
+
+ quota_check_limit(frame, local->validate_loc.inode, this);
+ return 0;
- if (just_validated) {
- local->validate_count--;
- }
+unwind:
+ quota_handle_validate_error(frame, op_ret, op_errno);
+ return 0;
+}
- local->validate_count++;
- ret = quota_inode_loc_fill (_inode, &local->validate_loc);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot fill loc for inode (gfid:%s), hence "
- "aborting quota-checks and continuing with fop",
- uuid_utoa (_inode->gfid));
- local->validate_count--;
- }
- }
- UNLOCK (&local->lock);
+static inline gf_boolean_t
+quota_timeout(time_t t, uint32_t timeout)
+{
+ return (gf_time() - t) >= timeout;
+}
- if (ret < 0) {
- goto loc_fill_failed;
+/* Return: 1 if new entry added
+ * 0 no entry added
+ * -1 on errors
+ */
+static int32_t
+quota_add_parent(struct list_head *list, char *name, uuid_t pgfid)
+{
+ quota_dentry_t *entry = NULL;
+ gf_boolean_t found = _gf_false;
+ int ret = 0;
+
+ if (!list_empty(list)) {
+ list_for_each_entry(entry, list, next)
+ {
+ if (gf_uuid_compare(pgfid, entry->par) == 0) {
+ found = _gf_true;
+ goto out;
+ }
}
+ }
- STACK_WIND (frame, quota_validate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, &local->validate_loc,
- QUOTA_SIZE_KEY, NULL);
+ entry = __quota_dentry_new(NULL, name, pgfid);
+ if (entry)
+ list_add_tail(&entry->next, list);
+ else
+ ret = -1;
-loc_fill_failed:
- inode_unref (_inode);
+out:
+ if (found)
return 0;
+ else if (ret == 0)
+ return 1;
+ else
+ return -1;
}
-
-int32_t
-quota_get_limit_value (inode_t *inode, xlator_t *this, int64_t *n)
+/* This function iterates the parent list in inode
+ * context and add unique parent to the list
+ * Returns number of dentry added to the list, or -1 on errors
+ */
+static int32_t
+quota_add_parents_from_ctx(quota_inode_ctx_t *ctx, struct list_head *list)
{
- int32_t ret = 0;
- char *path = NULL;
- limits_t *limit_node = NULL;
- quota_priv_t *priv = NULL;
+ int ret = 0;
+ quota_dentry_t *dentry = NULL;
+ int32_t count = 0;
- if (inode == NULL || n == NULL) {
- ret = -1;
- goto out;
+ if (ctx == NULL || list == NULL)
+ goto out;
+
+ LOCK(&ctx->lock);
+ {
+ list_for_each_entry(dentry, &ctx->parents, next)
+ {
+ ret = quota_add_parent(list, dentry->name, dentry->par);
+ if (ret == 1)
+ count++;
+ else if (ret == -1)
+ break;
}
+ }
+ UNLOCK(&ctx->lock);
- *n = 0;
+out:
+ return (ret == -1) ? -1 : count;
+}
- ret = inode_path (inode, NULL, &path);
- if (ret < 0) {
- ret = -1;
- goto out;
+int32_t
+quota_build_ancestry_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ inode_t *parent = NULL;
+ inode_t *tmp_parent = NULL;
+ inode_t *linked_inode = NULL;
+ inode_t *tmp_inode = NULL;
+ gf_dirent_t *entry = NULL;
+ loc_t loc = {
+ 0,
+ };
+ quota_dentry_t *dentry = NULL;
+ quota_dentry_t *tmp = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ struct list_head parents;
+ quota_local_t *local = NULL;
+ int ret;
+
+ INIT_LIST_HEAD(&parents);
+
+ local = frame->local;
+ frame->local = NULL;
+
+ if (op_ret < 0)
+ goto err;
+
+ if ((op_ret > 0) && (entries != NULL)) {
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ if (__is_root_gfid(entry->inode->gfid)) {
+ /* The list contains a sub-list for each
+ * possible path to the target inode. Each
+ * sub-list starts with the root entry of the
+ * tree and is followed by the child entries
+ * for a particular path to the target entry.
+ * The root entry is an implied sub-list
+ * delimiter, as it denotes we have started
+ * processing a new path. Reset the parent
+ * pointer and continue
+ */
+
+ tmp_parent = NULL;
+ } else {
+ /* For a non-root entry, link this inode */
+ linked_inode = inode_link(entry->inode, tmp_parent,
+ entry->d_name, &entry->d_stat);
+ if (linked_inode) {
+ tmp_inode = entry->inode;
+ entry->inode = linked_inode;
+ inode_unref(tmp_inode);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL,
+ Q_MSG_PARENT_NULL, "inode link failed");
+ op_errno = EINVAL;
+ goto err;
+ }
+ }
+
+ gf_uuid_copy(loc.gfid, entry->d_stat.ia_gfid);
+
+ loc.inode = inode_ref(entry->inode);
+ loc.parent = inode_ref(tmp_parent);
+ loc.name = entry->d_name;
+
+ quota_fill_inodectx(this, entry->inode, entry->dict, &loc,
+ &entry->d_stat, &op_errno);
+
+ /* For non-directory, posix_get_ancestry_non_directory
+ * returns all hard-links that are represented by nodes
+ * adjacent to each other in the dentry-list.
+ * (Unlike the directory case where adjacent nodes
+ * either have a parent/child relationship or belong to
+ * different paths).
+ */
+ if (entry->inode->ia_type == IA_IFDIR)
+ tmp_parent = entry->inode;
+
+ loc_wipe(&loc);
+ }
+ }
+
+ parent = inode_parent(local->loc.inode, 0, NULL);
+ if (parent == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, Q_MSG_PARENT_NULL,
+ "parent is NULL");
+ op_errno = EINVAL;
+ goto err;
+ }
+
+ quota_inode_ctx_get(local->loc.inode, this, &ctx, 0);
+
+ ret = quota_add_parents_from_ctx(ctx, &parents);
+ if (ret == -1) {
+ op_errno = errno;
+ goto err;
+ }
+
+ if (list_empty(&parents)) {
+ /* we built ancestry for a directory */
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ if (entry->inode == local->loc.inode)
+ break;
}
- priv = this->private;
+ /* Getting assertion here, need to investigate
+ comment for now
+ GF_ASSERT (&entry->list != &entries->list);
+ */
- list_for_each_entry (limit_node, &priv->limit_head, limit_list) {
- if (strcmp (limit_node->path, path) == 0) {
- *n = limit_node->value;
- break;
- }
+ ret = quota_add_parent(&parents, entry->d_name, parent->gfid);
+ if (ret == -1) {
+ op_errno = errno;
+ goto err;
}
+ }
-out:
- if (path)
- GF_FREE (path);
+ local->ancestry_cbk(&parents, local->loc.inode, 0, 0, local->ancestry_data);
+ goto cleanup;
- return ret;
-}
+err:
+ local->ancestry_cbk(NULL, NULL, -1, op_errno, local->ancestry_data);
+cleanup:
+ STACK_DESTROY(frame->root);
+ quota_local_cleanup(local);
-static int32_t
-__quota_init_inode_ctx (inode_t *inode, int64_t limit, xlator_t *this,
- dict_t *dict, struct iatt *buf,
- quota_inode_ctx_t **context)
-{
- int32_t ret = -1;
- int64_t *size = 0;
- quota_inode_ctx_t *ctx = NULL;
+ if (parent != NULL) {
+ inode_unref(parent);
+ parent = NULL;
+ }
- if (inode == NULL) {
- goto out;
+ if (!list_empty(&parents)) {
+ list_for_each_entry_safe(dentry, tmp, &parents, next)
+ {
+ __quota_dentry_free(dentry);
}
+ }
- QUOTA_ALLOC_OR_GOTO (ctx, quota_inode_ctx_t, out);
+ return 0;
+}
- ctx->limit = limit;
- if (buf)
- ctx->buf = *buf;
+int
+quota_build_ancestry(inode_t *inode, quota_ancestry_built_t ancestry_cbk,
+ void *data)
+{
+ fd_t *fd = NULL;
+ quota_local_t *local = NULL;
+ call_frame_t *new_frame = NULL;
+ int op_errno = ENOMEM;
+ int op_ret = -1;
+ xlator_t *this = NULL;
+ dict_t *xdata_req = NULL;
+
+ this = THIS;
+
+ xdata_req = dict_new();
+ if (xdata_req == NULL)
+ goto err;
+
+ fd = fd_anonymous(inode);
+ if (fd == NULL)
+ goto err;
+
+ new_frame = create_frame(this, this->ctx->pool);
+ if (new_frame == NULL)
+ goto err;
+
+ local = quota_local_new();
+ if (local == NULL)
+ goto err;
+
+ new_frame->root->uid = new_frame->root->gid = 0;
+ new_frame->local = local;
+ local->ancestry_cbk = ancestry_cbk;
+ local->ancestry_data = data;
+ local->loc.inode = inode_ref(inode);
+
+ op_ret = dict_set_int8(xdata_req, QUOTA_LIMIT_KEY, 1);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto err;
+ }
+
+ op_ret = dict_set_int8(xdata_req, QUOTA_LIMIT_OBJECTS_KEY, 1);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto err;
+ }
+
+ op_ret = dict_set_int8(xdata_req, GET_ANCESTRY_DENTRY_KEY, 1);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ goto err;
+ }
+
+ /* This would ask posix layer to construct dentry chain till root
+ * We don't need to do a opendir, we can use the anonymous fd
+ * here for the readidrp.
+ * avoiding opendir also reduces the window size where another FOP
+ * can be executed before completion of build ancestry
+ */
+ STACK_WIND(new_frame, quota_build_ancestry_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, 0, 0, xdata_req);
+
+ op_ret = 0;
- LOCK_INIT(&ctx->lock);
+err:
+ if (fd)
+ fd_unref(fd);
- if (context != NULL) {
- *context = ctx;
- }
+ if (xdata_req)
+ dict_unref(xdata_req);
- INIT_LIST_HEAD (&ctx->parents);
+ if (op_ret < 0) {
+ ancestry_cbk(NULL, NULL, -1, op_errno, data);
- if (dict != NULL) {
- ret = dict_get_bin (dict, QUOTA_SIZE_KEY, (void **) &size);
- if (ret == 0) {
- ctx->size = ntoh64 (*size);
- gettimeofday (&ctx->tv, NULL);
- }
+ if (new_frame) {
+ local = new_frame->local;
+ new_frame->local = NULL;
+ STACK_DESTROY(new_frame->root);
}
- ret = __inode_ctx_put (inode, this, (uint64_t )(long)ctx);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot set quota context in inode (gfid:%s)",
- uuid_utoa (inode->gfid));
- }
-out:
- return ret;
-}
+ if (local)
+ quota_local_cleanup(local);
+ }
+ return 0;
+}
-static int32_t
-quota_inode_ctx_get (inode_t *inode, int64_t limit, xlator_t *this,
- dict_t *dict, struct iatt *buf, quota_inode_ctx_t **ctx,
- char create_if_absent)
+int
+quota_validate(call_frame_t *frame, inode_t *inode, xlator_t *this,
+ fop_lookup_cbk_t cbk_fn)
{
- int32_t ret = 0;
- uint64_t ctx_int;
+ quota_local_t *local = NULL;
+ int ret = 0;
+ dict_t *xdata = NULL;
+ quota_priv_t *priv = NULL;
- LOCK (&inode->lock);
- {
- ret = __inode_ctx_get (inode, this, &ctx_int);
+ local = frame->local;
+ priv = this->private;
- if ((ret == 0) && (ctx != NULL)) {
- *ctx = (quota_inode_ctx_t *) (unsigned long)ctx_int;
- } else if (create_if_absent) {
- ret = __quota_init_inode_ctx (inode, limit, this, dict,
- buf, ctx);
- }
- }
- UNLOCK (&inode->lock);
+ LOCK(&local->lock);
+ {
+ loc_wipe(&local->validate_loc);
- return ret;
-}
+ ret = quota_inode_loc_fill(inode, &local->validate_loc);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENFORCEMENT_FAILED,
+ "cannot fill loc for inode (gfid:%s), hence "
+ "aborting quota-checks and continuing with fop",
+ uuid_utoa(inode->gfid));
+ }
+ }
+ UNLOCK(&local->lock);
+
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ xdata = dict_new();
+ if (xdata == NULL) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = dict_set_int8(xdata, QUOTA_SIZE_KEY, 1);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "dict set failed");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = dict_set_str(xdata, "volume-uuid", priv->volume_uuid);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "dict set failed");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = quota_enforcer_lookup(frame, this, xdata, cbk_fn);
+ if (ret < 0) {
+ ret = -ENOTCONN;
+ goto err;
+ }
+
+ ret = 0;
+err:
+ if (xdata)
+ dict_unref(xdata);
+ return ret;
+}
-int32_t
-quota_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *dict, struct iatt *postparent)
+void
+quota_check_limit_continuation(struct list_head *parents, inode_t *inode,
+ int32_t op_ret, int32_t op_errno, void *data)
{
- int32_t ret = -1;
- char found = 0;
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
- quota_dentry_t *dentry = NULL;
- int64_t *size = 0;
- uint64_t value = 0;
- limits_t *limit_node = NULL;
- quota_priv_t *priv = NULL;
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ quota_local_t *local = NULL;
+ quota_local_t *par_local = NULL;
+ quota_dentry_t *entry = NULL;
+ inode_t *parent = NULL;
+ int parent_count = 0;
+
+ frame = data;
+ local = frame->local;
+ this = THIS;
+
+ if (local->par_frame)
+ par_local = local->par_frame->local;
+ else
+ par_local = local;
+
+ if ((op_ret < 0) || list_empty(parents)) {
+ if (op_ret >= 0) {
+ gf_msg(this->name, GF_LOG_WARNING, EIO, Q_MSG_ANCESTRY_BUILD_FAILED,
+ "Couldn't build ancestry for inode (gfid:%s). "
+ "Without knowing ancestors till root, quota"
+ "cannot be enforced. "
+ "Hence, failing fop with EIO",
+ uuid_utoa(inode->gfid));
+ op_errno = EIO;
+ }
+
+ quota_handle_validate_error(frame, -1, op_errno);
+ goto out;
+ }
+
+ list_for_each_entry(entry, parents, next) { parent_count++; }
+
+ LOCK(&par_local->lock);
+ {
+ par_local->link_count += (parent_count - 1);
+ }
+ UNLOCK(&par_local->lock);
+
+ if (local->par_frame) {
+ list_for_each_entry(entry, parents, next)
+ {
+ parent = inode_find(inode->table, entry->par);
+ quota_check_limit(frame, parent, this);
+ inode_unref(parent);
+ }
+ } else {
+ list_for_each_entry(entry, parents, next)
+ {
+ parent = do_quota_check_limit(frame, inode, this, entry, _gf_true);
+ if (parent)
+ inode_unref(parent);
+ else
+ quota_link_count_decrement(frame);
+ }
+ }
- local = frame->local;
+out:
+ return;
+}
- priv = this->private;
+int32_t
+quota_check_object_limit(call_frame_t *frame, quota_inode_ctx_t *ctx,
+ quota_priv_t *priv, inode_t *_inode, xlator_t *this,
+ int32_t *op_errno, int just_validated,
+ quota_local_t *local, gf_boolean_t *skip_check)
+{
+ int32_t ret = -1;
+ uint32_t timeout = 0;
+ char need_validate = 0;
+ gf_boolean_t hard_limit_exceeded = 0;
+ int64_t object_aggr_count = 0;
+
+ GF_ASSERT(frame);
+ GF_ASSERT(priv);
+ GF_ASSERT(_inode);
+ GF_ASSERT(this);
+ GF_ASSERT(local);
+
+ if (ctx != NULL && (ctx->object_hard_lim > 0 || ctx->object_soft_lim)) {
+ LOCK(&ctx->lock);
+ {
+ timeout = priv->soft_timeout;
- inode_ctx_get (inode, this, &value);
- ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ object_aggr_count = ctx->file_count + ctx->dir_count + 1;
+ if (((ctx->object_soft_lim >= 0) &&
+ (object_aggr_count) > ctx->object_soft_lim)) {
+ timeout = priv->hard_timeout;
+ }
- if ((op_ret < 0) || (local == NULL)
- || (((ctx == NULL) || (ctx->limit == local->limit))
- && (local->limit < 0) && !((IA_ISREG (buf->ia_type))
- || (IA_ISLNK (buf->ia_type))))) {
- goto unwind;
+ if (!just_validated && quota_timeout(ctx->validate_time, timeout)) {
+ need_validate = 1;
+ } else if ((object_aggr_count) > ctx->object_hard_lim) {
+ hard_limit_exceeded = 1;
+ }
}
+ UNLOCK(&ctx->lock);
- LOCK (&priv->lock);
- {
- list_for_each_entry (limit_node, &priv->limit_head,
- limit_list) {
- if (strcmp (local->loc.path, limit_node->path) == 0) {
- uuid_copy (limit_node->gfid, buf->ia_gfid);
- break;
- }
- }
+ if (need_validate && *skip_check != _gf_true) {
+ *skip_check = _gf_true;
+ ret = quota_validate(frame, _inode, this, quota_validate_cbk);
+ if (ret < 0) {
+ *op_errno = -ret;
+ *skip_check = _gf_false;
+ }
+ goto out;
}
- UNLOCK (&priv->lock);
- ret = quota_inode_ctx_get (local->loc.inode, local->limit, this, dict,
- buf, &ctx, 1);
- if ((ret == -1) || (ctx == NULL)) {
- gf_log (this->name, GF_LOG_WARNING, "cannot create quota "
- "context in inode(gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
+ if (hard_limit_exceeded) {
+ local->op_ret = -1;
+ local->op_errno = EDQUOT;
+ *op_errno = EDQUOT;
+ goto out;
}
- LOCK (&ctx->lock);
+ /*We log usage only if quota limit is configured on
+ that inode
+ */
+ quota_log_usage(this, ctx, _inode, 0);
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+quota_check_size_limit(call_frame_t *frame, quota_inode_ctx_t *ctx,
+ quota_priv_t *priv, inode_t *_inode, xlator_t *this,
+ int32_t *op_errno, int just_validated, int64_t delta,
+ quota_local_t *local, gf_boolean_t *skip_check)
+{
+ int32_t ret = -1;
+ uint32_t timeout = 0;
+ char need_validate = 0;
+ gf_boolean_t hard_limit_exceeded = 0;
+ int64_t space_available = 0;
+ int64_t wouldbe_size = 0;
+
+ GF_ASSERT(frame);
+ GF_ASSERT(priv);
+ GF_ASSERT(_inode);
+ GF_ASSERT(this);
+ GF_ASSERT(local);
+
+ if (ctx != NULL && (ctx->hard_lim > 0 || ctx->soft_lim > 0)) {
+ wouldbe_size = ctx->size + delta;
+
+ LOCK(&ctx->lock);
{
+ timeout = priv->soft_timeout;
- if (dict != NULL) {
- ret = dict_get_bin (dict, QUOTA_SIZE_KEY,
- (void **) &size);
- if (ret == 0) {
- ctx->size = ntoh64 (*size);
- gettimeofday (&ctx->tv, NULL);
- }
- }
+ if ((ctx->soft_lim >= 0) && (wouldbe_size > ctx->soft_lim)) {
+ timeout = priv->hard_timeout;
+ }
- if (local->limit != ctx->limit) {
- ctx->limit = local->limit;
- }
+ if (!just_validated && quota_timeout(ctx->validate_time, timeout)) {
+ need_validate = 1;
+ } else if (wouldbe_size >= ctx->hard_lim) {
+ hard_limit_exceeded = 1;
+ }
+ }
+ UNLOCK(&ctx->lock);
- ctx->buf = *buf;
+ if (need_validate && *skip_check != _gf_true) {
+ *skip_check = _gf_true;
+ ret = quota_validate(frame, _inode, this, quota_validate_cbk);
+ if (ret < 0) {
+ *op_errno = -ret;
+ *skip_check = _gf_false;
+ }
+ goto out;
+ }
- if (!(IA_ISREG (buf->ia_type) || IA_ISLNK (buf->ia_type))) {
- goto unlock;
- }
+ if (hard_limit_exceeded) {
+ local->op_ret = -1;
+ local->op_errno = EDQUOT;
- if (local->loc.name == NULL)
- goto unlock;
+ space_available = ctx->hard_lim - ctx->size;
- list_for_each_entry (dentry, &ctx->parents, next) {
- if ((strcmp (dentry->name, local->loc.name) == 0) &&
- (uuid_compare (local->loc.parent->gfid,
- dentry->par) == 0)) {
- found = 1;
- break;
- }
- }
+ if (space_available < 0)
+ space_available = 0;
- if (!found) {
- dentry = __quota_dentry_new (ctx,
- (char *)local->loc.name,
- local->loc.parent->gfid);
- if (dentry == NULL) {
- /*
- gf_log (this->name, GF_LOG_WARNING,
- "cannot create a new dentry (par:%"
- PRId64", name:%s) for inode(ino:%"
- PRId64", gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
- */
- op_ret = -1;
- op_errno = ENOMEM;
- goto unlock;
- }
- }
+ if ((local->space_available < 0) ||
+ (local->space_available > space_available)) {
+ local->space_available = space_available;
+ }
+
+ if (space_available == 0) {
+ *op_errno = EDQUOT;
+ goto out;
+ }
}
-unlock:
- UNLOCK (&ctx->lock);
-unwind:
- QUOTA_STACK_UNWIND (lookup, frame, op_ret, op_errno, inode, buf,
- dict, postparent);
- return 0;
-}
+ /* We log usage only if quota limit is configured on
+ that inode. */
+ quota_log_usage(this, ctx, _inode, delta);
+ }
+ ret = 0;
+out:
+ return ret;
+}
int32_t
-quota_lookup (call_frame_t *frame, xlator_t *this, loc_t *loc,
- dict_t *xattr_req)
+quota_check_limit(call_frame_t *frame, inode_t *inode, xlator_t *this)
{
- int32_t ret = -1;
- int64_t limit = -1;
- limits_t *limit_node = NULL;
- gf_boolean_t dict_newed = _gf_false;
- quota_priv_t *priv = NULL;
- quota_local_t *local = NULL;
+ int32_t ret = -1, op_errno = EINVAL;
+ inode_t *_inode = NULL, *parent = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+ quota_local_t *par_local = NULL;
+ char just_validated = 0;
+ int64_t delta = 0;
+ int8_t object_delta = 0;
+ uint64_t value = 0;
+ gf_boolean_t skip_check = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("quota", this, err);
+ GF_VALIDATE_OR_GOTO(this->name, frame, err);
+ GF_VALIDATE_OR_GOTO(this->name, inode, err);
+
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO(this->name, local, err);
+
+ if (local->par_frame) {
+ par_local = local->par_frame->local;
+ GF_VALIDATE_OR_GOTO(this->name, par_local, err);
+ } else {
+ par_local = local;
+ }
+
+ delta = par_local->delta;
+ object_delta = par_local->object_delta;
+
+ GF_VALIDATE_OR_GOTO(this->name, par_local->stub, err);
+ /* Allow all the trusted clients
+ * Don't block the gluster internal processes like rebalance, gsyncd,
+ * self heal etc from the disk quotas.
+ *
+ * Method: Allow all the clients with PID negative. This is by the
+ * assumption that any kernel assigned pid doesn't have the negative
+ * number.
+ */
+ if (0 > frame->root->pid) {
+ ret = 0;
+ quota_link_count_decrement(frame);
+ goto done;
+ }
- priv = this->private;
+ priv = this->private;
- list_for_each_entry (limit_node, &priv->limit_head, limit_list) {
- if (strcmp (limit_node->path, loc->path) == 0) {
- limit = limit_node->value;
- }
- }
+ inode_ctx_get(inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
- local = quota_local_new ();
- if (local == NULL) {
- goto err;
- }
+ _inode = inode_ref(inode);
- ret = loc_copy (&local->loc, loc);
- if (ret == -1) {
- goto err;
+ LOCK(&local->lock);
+ {
+ just_validated = local->just_validated;
+ local->just_validated = 0;
+ }
+ UNLOCK(&local->lock);
+
+ do {
+ /* In a rename operation, enforce should be stopped at common
+ ancestor */
+ if (!gf_uuid_is_null(par_local->common_ancestor) &&
+ !gf_uuid_compare(_inode->gfid, par_local->common_ancestor)) {
+ quota_link_count_decrement(frame);
+ break;
}
- frame->local = local;
+ if (object_delta <= 0)
+ goto skip_check_object_limit;
- local->limit = limit;
+ ret = quota_check_object_limit(frame, ctx, priv, _inode, this,
+ &op_errno, just_validated, par_local,
+ &skip_check);
+ if (skip_check == _gf_true)
+ goto done;
- if (limit < 0) {
- goto wind;
+ if (ret) {
+ if (op_errno != EDQUOT)
+ gf_msg(this->name, GF_LOG_ERROR, 0, Q_MSG_ENFORCEMENT_FAILED,
+ "Failed to "
+ "check quota object limit");
+ goto err;
}
- if (xattr_req == NULL) {
- xattr_req = dict_new ();
- dict_newed = _gf_true;
+ skip_check_object_limit:
+ ret = quota_check_size_limit(frame, ctx, priv, _inode, this, &op_errno,
+ just_validated, delta, par_local,
+ &skip_check);
+ if (skip_check == _gf_true)
+ goto done;
+
+ if (ret) {
+ if (op_errno != EDQUOT)
+ gf_msg(this->name, GF_LOG_ERROR, 0, Q_MSG_ENFORCEMENT_FAILED,
+ "Failed to "
+ "check quota size limit");
+ goto err;
}
- ret = dict_set_uint64 (xattr_req, QUOTA_SIZE_KEY, 0);
- if (ret < 0) {
+ if (__is_root_gfid(_inode->gfid)) {
+ quota_link_count_decrement(frame);
+ break;
+ }
+
+ parent = inode_parent(_inode, 0, NULL);
+ if (parent == NULL) {
+ ret = quota_build_ancestry(_inode, quota_check_limit_continuation,
+ frame);
+ if (ret < 0) {
+ op_errno = -ret;
goto err;
+ }
+
+ break;
}
-wind:
- STACK_WIND (frame, quota_lookup_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+ inode_unref(_inode);
+ _inode = parent;
+ just_validated = 0;
- ret = 0;
+ value = 0;
+ inode_ctx_get(_inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ } while (1);
+
+done:
+ if (_inode != NULL) {
+ inode_unref(_inode);
+ _inode = NULL;
+ }
+ return 0;
err:
- if (ret < 0) {
- QUOTA_STACK_UNWIND (lookup, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL);
+ quota_handle_validate_error(frame, -1, op_errno);
+
+ inode_unref(_inode);
+ return 0;
+}
+
+inode_t *
+do_quota_check_limit(call_frame_t *frame, inode_t *inode, xlator_t *this,
+ quota_dentry_t *dentry, gf_boolean_t force)
+{
+ int32_t ret = -1;
+ inode_t *parent = NULL;
+ call_frame_t *new_frame = NULL;
+ quota_local_t *new_local = NULL;
+
+ parent = inode_parent(inode, dentry->par, dentry->name);
+ if (parent == NULL) {
+ if (force)
+ parent = inode_find(inode->table, dentry->par);
+ else
+ goto out;
+ }
+ if (parent == NULL)
+ goto out;
+
+ new_frame = copy_frame(frame);
+ if (new_frame == NULL)
+ goto out;
+
+ new_local = quota_local_new();
+ if (new_local == NULL)
+ goto out;
+
+ new_frame->local = new_local;
+ new_local->par_frame = frame;
+
+ quota_check_limit(new_frame, parent, this);
+
+ ret = 0;
+out:
+ if (ret < 0) {
+ if (parent) {
+ /* Caller should decrement link_count, in case parent is
+ * NULL
+ */
+ quota_handle_validate_error(frame, -1, ENOMEM);
}
- if (dict_newed == _gf_true) {
- dict_unref (xattr_req);
+ if (new_frame) {
+ new_frame->local = NULL;
+ STACK_DESTROY(new_frame->root);
}
+ }
- return 0;
+ return parent;
}
-
-void
-quota_update_size (xlator_t *this, inode_t *inode, char *name, uuid_t par,
- int64_t delta)
+static int
+quota_get_limits(xlator_t *this, dict_t *dict, int64_t *hard_lim,
+ int64_t *soft_lim, int64_t *object_hard_limit,
+ int64_t *object_soft_limit)
{
- inode_t *_inode = NULL;
- inode_t *parent = NULL;
- uint64_t value = 0;
- quota_inode_ctx_t *ctx = NULL;
- uuid_t trav_uuid = {0,};
+ quota_limits_t *limit = NULL;
+ quota_limits_t *object_limit = NULL;
+ quota_priv_t *priv = NULL;
+ int64_t soft_lim_percent = 0;
+ int64_t *ptr = NULL;
+ int ret = 0;
- GF_VALIDATE_OR_GOTO ("quota", this, out);
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
+ if ((this == NULL) || (dict == NULL) || (hard_lim == NULL) ||
+ (soft_lim == NULL))
+ goto out;
- inode_ctx_get (inode, this, &value);
- ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ priv = this->private;
- _inode = inode_ref (inode);
+ ret = dict_get_bin(dict, QUOTA_LIMIT_KEY, (void **)&ptr);
+ limit = (quota_limits_t *)ptr;
- if ( par != NULL ) {
- uuid_copy (trav_uuid, par);
- }
-
- do {
- if ((ctx != NULL) && (ctx->limit >= 0)) {
- LOCK (&ctx->lock);
- {
- ctx->size += delta;
- }
- UNLOCK (&ctx->lock);
- }
+ if (limit) {
+ *hard_lim = ntoh64(limit->hl);
+ soft_lim_percent = ntoh64(limit->sl);
+ }
- if (__is_root_gfid (_inode->gfid)) {
- break;
- }
+ if (soft_lim_percent < 0) {
+ soft_lim_percent = priv->default_soft_lim;
+ }
- parent = inode_parent (_inode, trav_uuid, name);
- if (parent == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "cannot find parent for inode (gfid:%s), hence "
- "aborting size updation of parents",
- uuid_utoa (_inode->gfid));
- }
+ if ((*hard_lim > 0) && (soft_lim_percent > 0)) {
+ *soft_lim = (soft_lim_percent * (*hard_lim)) / 100;
+ }
- if (name != NULL) {
- name = NULL;
- uuid_clear (trav_uuid);
- }
+ ret = dict_get_bin(dict, QUOTA_LIMIT_OBJECTS_KEY, (void **)&ptr);
+ if (ret)
+ return 0;
+ object_limit = (quota_limits_t *)ptr;
- inode_unref (_inode);
- _inode = parent;
+ if (object_limit) {
+ *object_hard_limit = ntoh64(object_limit->hl);
+ soft_lim_percent = ntoh64(object_limit->sl);
+ }
- if (_inode == NULL) {
- break;
- }
+ if (soft_lim_percent < 0) {
+ soft_lim_percent = priv->default_soft_lim;
+ }
- inode_ctx_get (_inode, this, &value);
- ctx = (quota_inode_ctx_t *)(unsigned long)value;
- } while (1);
+ if ((*object_hard_limit > 0) && (soft_lim_percent > 0)) {
+ *object_soft_limit = (soft_lim_percent * (*object_hard_limit)) / 100;
+ }
out:
- return;
+ return 0;
}
-
-int32_t
-quota_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+int
+quota_fill_inodectx(xlator_t *this, inode_t *inode, dict_t *dict, loc_t *loc,
+ struct iatt *buf, int32_t *op_errno)
{
- int32_t ret = 0;
- uint64_t ctx_int = 0;
- quota_inode_ctx_t *ctx = NULL;
- quota_local_t *local = NULL;
- quota_dentry_t *dentry = NULL;
- int64_t delta = 0;
-
- local = frame->local;
-
- if ((op_ret < 0) || (local == NULL)) {
- goto out;
- }
-
- ret = inode_ctx_get (local->loc.inode, this, &ctx_int);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "%s: failed to get the context", local->loc.path);
- goto out;
+ int32_t ret = -1;
+ char found = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+ uint64_t value = 0;
+ int64_t hard_lim = 0;
+ int64_t soft_lim = 0;
+ int64_t object_hard_limit = 0;
+ int64_t object_soft_limit = 0;
+
+ quota_get_limits(this, dict, &hard_lim, &soft_lim, &object_hard_limit,
+ &object_soft_limit);
+
+ inode_ctx_get(inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+
+ if ((((ctx == NULL) || (ctx->hard_lim == hard_lim)) && (hard_lim < 0) &&
+ !QUOTA_REG_OR_LNK_FILE(buf->ia_type))) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = quota_inode_ctx_get(inode, this, &ctx, 1);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_INODE_CTX_GET_FAILED,
+ "cannot create quota "
+ "context in inode(gfid:%s)",
+ uuid_utoa(inode->gfid));
+ ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ ctx->hard_lim = hard_lim;
+ ctx->soft_lim = soft_lim;
+ ctx->object_hard_lim = object_hard_limit;
+ ctx->object_soft_lim = object_soft_limit;
+
+ ctx->buf = *buf;
+
+ if (!QUOTA_REG_OR_LNK_FILE(buf->ia_type)) {
+ goto unlock;
+ }
+
+ /* do nothing if it is a nameless lookup */
+ if (loc->name == NULL || !loc->parent)
+ goto unlock;
+
+ list_for_each_entry(dentry, &ctx->parents, next)
+ {
+ if ((strcmp(dentry->name, loc->name) == 0) &&
+ (gf_uuid_compare(loc->parent->gfid, dentry->par) == 0)) {
+ found = 1;
+ break;
+ }
+ }
+
+ if (!found) {
+ dentry = __quota_dentry_new(ctx, (char *)loc->name,
+ loc->parent->gfid);
+ if (dentry == NULL) {
+ /*
+ gf_msg (this->name, GF_LOG_WARNING, ENOMEM,
+ Q_MSG_ENOMEM,
+ "cannot create a new dentry (par:%"
+- PRId64", name:%s) for inode(ino:%"
+- PRId64", gfid:%s)",
+- uuid_utoa (local->loc.inode->gfid));
+ */
+ ret = -1;
+ *op_errno = ENOMEM;
+ goto unlock;
+ }
}
+ }
+unlock:
+ UNLOCK(&ctx->lock);
- ctx = (quota_inode_ctx_t *)(unsigned long) ctx_int;
-
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in %s (gfid:%s)",
- local->loc.path, uuid_utoa (local->loc.inode->gfid));
- goto out;
- }
+out:
+ return ret;
+}
- LOCK (&ctx->lock);
- {
- ctx->buf = *postbuf;
- }
- UNLOCK (&ctx->lock);
+/*
+ * return _gf_true if enforcement is needed and _gf_false otherwise
+ */
+gf_boolean_t
+should_quota_enforce(xlator_t *this, dict_t *dict, glusterfs_fop_t fop)
+{
+ int ret = 0;
- list_for_each_entry (dentry, &ctx->parents, next) {
- delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512;
- quota_update_size (this, local->loc.inode,
- dentry->name, dentry->par, delta);
- }
+ ret = dict_check_flag(dict, GF_INTERNAL_CTX_KEY, GF_DHT_HEAL_DIR);
+ if (fop == GF_FOP_MKDIR && ret == DICT_FLAG_SET) {
+ return _gf_false;
+ } else if (ret == -ENOENT) {
+ gf_msg(this->name, GF_LOG_DEBUG, EINVAL, Q_MSG_INTERNAL_FOP_KEY_MISSING,
+ "No internal fop context present");
+ goto out;
+ }
out:
- QUOTA_STACK_UNWIND (writev, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
-
- return 0;
+ return _gf_true;
}
-
int32_t
-quota_writev_helper (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t off,
- uint32_t flags, struct iobref *iobref, dict_t *xdata)
+quota_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *dict, struct iatt *postparent)
{
- quota_local_t *local = NULL;
- int32_t op_errno = EINVAL;
+ quota_local_t *local = NULL;
+ inode_t *this_inode = NULL;
- local = frame->local;
- if (local == NULL) {
- gf_log (this->name, GF_LOG_WARNING, "local is NULL");
- goto unwind;
- }
+ local = frame->local;
+ frame->local = NULL;
- if (local->op_ret == -1) {
- op_errno = local->op_errno;
- goto unwind;
- }
+ if (op_ret >= 0 && inode) {
+ this_inode = inode_ref(inode);
- STACK_WIND (frame, quota_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev, fd, vector, count, off,
- flags, iobref, xdata);
- return 0;
+ op_ret = quota_fill_inodectx(this, inode, dict, &local->loc, buf,
+ &op_errno);
+ if (op_ret < 0)
+ op_errno = ENOMEM;
+ }
-unwind:
- QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
+ QUOTA_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, dict,
+ postparent);
+
+ if (op_ret < 0 || this_inode == NULL || gf_uuid_is_null(this_inode->gfid))
+ goto out;
+ check_ancestory_2(this, local, this_inode);
+
+out:
+ if (this_inode)
+ inode_unref(this_inode);
+
+ quota_local_cleanup(local);
+
+ return 0;
+}
int32_t
-quota_writev (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iovec *vector, int32_t count, off_t off,
- uint32_t flags, struct iobref *iobref, dict_t *xdata)
-{
- int32_t ret = -1, op_errno = EINVAL;
- int32_t parents = 0;
- uint64_t size = 0;
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
- quota_priv_t *priv = NULL;
- call_stub_t *stub = NULL;
- quota_dentry_t *dentry = NULL;
-
- GF_ASSERT (frame);
- GF_VALIDATE_OR_GOTO ("quota", this, unwind);
- GF_VALIDATE_OR_GOTO (this->name, fd, unwind);
-
- local = quota_local_new ();
- if (local == NULL) {
- goto unwind;
- }
+quota_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
- frame->local = local;
- local->loc.inode = inode_ref (fd->inode);
+ priv = this->private;
- ret = quota_inode_ctx_get (fd->inode, -1, this, NULL, NULL, &ctx, 0);
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
- uuid_utoa (fd->inode->gfid));
- goto unwind;
- }
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
- stub = fop_writev_stub (frame, quota_writev_helper, fd, vector, count,
- off, flags, iobref, xdata);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto unwind;
- }
+ xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new();
+ if (!xattr_req)
+ goto err;
- priv = this->private;
- GF_VALIDATE_OR_GOTO (this->name, priv, unwind);
+ local = quota_local_new();
+ if (local == NULL) {
+ goto err;
+ }
- size = iov_length (vector, count);
- LOCK (&ctx->lock);
- {
- list_for_each_entry (dentry, &ctx->parents, next) {
- parents++;
- }
- }
- UNLOCK (&ctx->lock);
+ frame->local = local;
+ loc_copy(&local->loc, loc);
- local->delta = size;
- local->stub = stub;
- local->link_count = parents;
+ ret = dict_set_int8(xattr_req, QUOTA_LIMIT_KEY, 1);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "dict set of key for "
+ "hard-limit failed");
+ goto err;
+ }
- list_for_each_entry (dentry, &ctx->parents, next) {
- ret = quota_check_limit (frame, fd->inode, this, dentry->name,
- dentry->par);
- if (ret == -1) {
- break;
- }
- }
+ ret = dict_set_int8(xattr_req, QUOTA_LIMIT_OBJECTS_KEY, 1);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "dict set of key for quota object limit failed");
+ goto err;
+ }
- stub = NULL;
+ STACK_WIND(frame, quota_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
- LOCK (&local->lock);
- {
- local->link_count = 0;
- if (local->validate_count == 0) {
- stub = local->stub;
- local->stub = NULL;
- }
- }
- UNLOCK (&local->lock);
+ ret = 0;
- if (stub != NULL) {
- call_resume (stub);
- }
+err:
+ if (xattr_req)
+ dict_unref(xattr_req);
- return 0;
+ if (ret < 0) {
+ QUOTA_STACK_UNWIND(lookup, frame, -1, ENOMEM, NULL, NULL, NULL, NULL);
+ }
-unwind:
- QUOTA_STACK_UNWIND (writev, frame, -1, op_errno, NULL, NULL, NULL);
- return 0;
-}
+ return 0;
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup,
+ loc, xattr_req);
+ return 0;
+}
int32_t
-quota_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+quota_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- QUOTA_STACK_UNWIND (mkdir, frame, op_ret, op_errno, inode,
- buf, preparent, postparent, xdata);
- return 0;
+ int32_t ret = 0;
+ uint64_t ctx_int = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+
+ local = frame->local;
+
+ if ((op_ret < 0) || (local == NULL) || (postbuf == NULL)) {
+ goto out;
+ }
+
+ ret = inode_ctx_get(local->loc.inode, this, &ctx_int);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_INODE_CTX_GET_FAILED,
+ "%s: failed to get the "
+ "context",
+ local->loc.path);
+ goto out;
+ }
+
+ ctx = (quota_inode_ctx_t *)(unsigned long)ctx_int;
+
+ if (ctx == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_INODE_CTX_GET_FAILED,
+ "quota context not set in %s (gfid:%s)", local->loc.path,
+ uuid_utoa(local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ ctx->buf = *postbuf;
+ }
+ UNLOCK(&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+
+ return 0;
}
+static int gf_quota_enforcer_log;
int32_t
-quota_mkdir_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, mode_t umask, dict_t *xdata)
+quota_writev_helper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
{
- quota_local_t *local = NULL;
- int32_t op_errno = EINVAL;
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+ struct iovec *new_vector = NULL;
+ int32_t new_count = 0;
- local = frame->local;
- if (local == NULL) {
- gf_log (this->name, GF_LOG_WARNING, "local is NULL");
- goto unwind;
- }
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO("quota", local, unwind);
+
+ if (local->op_ret == -1) {
op_errno = local->op_errno;
- if (local->op_ret == -1) {
+ if ((op_errno == EDQUOT) && (local->space_available > 0)) {
+ new_count = iov_subset(vector, count, 0, local->space_available,
+ &new_vector, 0);
+ if (new_count < 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
goto unwind;
+ }
+
+ vector = new_vector;
+ count = new_count;
+ } else if (op_errno == ENOENT || op_errno == ESTALE) {
+ /* We may get ENOENT/ESTALE in case of below scenario
+ * fd = open file.txt
+ * unlink file.txt
+ * write on fd
+ * Here build_ancestry can fail as the file is removed.
+ * For now ignore ENOENT/ESTALE with writes on active fd
+ * We need to re-visit this code once we understand
+ * how other file-system behave in this scenario
+ */
+ gf_msg_debug(this->name, 0,
+ "quota enforcer failed "
+ "with ENOENT/ESTALE on %s, cannot check "
+ "quota limits and allowing writes",
+ uuid_utoa(fd->inode->gfid));
+ } else if ((op_errno == EINVAL) &&
+ !inode_parent(local->loc.inode, 0, NULL)) {
+ /* We may get INVAL with parent == NULL,
+ * in case of below scenario
+ * 1. enable quota
+ * 2. glusterfsd stop/start
+ * 3. nameless lookup
+ * 4. write on fd
+ * Here build_ancestry can fail as the file's pgfid
+ * is't exist.
+ * For now ignore EINVAL with writes on active fd
+ * untils the pgfid is created at name lookup
+ */
+ GF_LOG_OCCASIONALLY(gf_quota_enforcer_log, this->name,
+ GF_LOG_CRITICAL,
+ "Quota cannot be enforced as "
+ "parent is not available and writes are being "
+ "allowed without checking whether they are "
+ "within quota limits. This can happen if Quota "
+ "crawl is not complete. If crawl has been "
+ "completed, please file a bug.");
+ } else {
+ goto unwind;
}
+ }
- STACK_WIND (frame, quota_mkdir_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
- return 0;
+ STACK_WIND(frame, quota_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, off, flags,
+ iobref, xdata);
+
+ if (new_vector != NULL)
+ GF_FREE(new_vector);
+
+ return 0;
unwind:
- QUOTA_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL);
- return 0;
+ QUOTA_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
}
-
int32_t
-quota_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
+quota_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
{
- int32_t ret = 0, op_errno = 0;
- quota_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- local = quota_local_new ();
- if (local == NULL) {
- op_errno = ENOMEM;
- goto err;
- }
-
- frame->local = local;
-
- local->link_count = 1;
-
- ret = loc_copy (&local->loc, loc);
- if (ret) {
- op_errno = ENOMEM;
- gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
- goto err;
- }
-
- stub = fop_mkdir_stub (frame, quota_mkdir_helper, loc, mode, umask,
- xdata);
- if (stub == NULL) {
- op_errno = ENOMEM;
- goto err;
- }
-
+ quota_priv_t *priv = NULL;
+ int32_t op_errno = EINVAL;
+ int32_t parents = 0;
+ int32_t fail_count = 0;
+ uint64_t size = 0;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL, *tmp = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head head;
+ inode_t *par_inode = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
+
+ INIT_LIST_HEAD(&head);
+
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO("quota", this, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, fd, unwind);
+
+ local = quota_local_new();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+ local->loc.inode = inode_ref(fd->inode);
+
+ (void)quota_inode_ctx_get(fd->inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg_debug(this->name, 0,
+ "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa(fd->inode->gfid));
+ }
+
+ stub = fop_writev_stub(frame, quota_writev_helper, fd, vector, count, off,
+ flags, iobref, xdata);
+ if (stub == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, unwind);
+
+ parents = quota_add_parents_from_ctx(ctx, &head);
+ if (parents == -1) {
+ op_errno = errno;
+ goto unwind;
+ }
+
+ size = iov_length(vector, count);
+
+ LOCK(&local->lock);
+ {
+ local->delta = size;
+ local->object_delta = 0;
+ local->link_count = (parents != 0) ? parents : 1;
local->stub = stub;
- local->delta = 0;
-
- quota_check_limit (frame, loc->parent, this, NULL, NULL);
-
- stub = NULL;
-
- LOCK (&local->lock);
+ }
+ UNLOCK(&local->lock);
+
+ if (parents == 0) {
+ /* nameless lookup on this inode, allow quota to reconstruct
+ * ancestry as part of check_limit.
+ */
+ quota_check_limit(frame, fd->inode, this);
+ } else {
+ list_for_each_entry_safe(dentry, tmp, &head, next)
{
- if (local->validate_count == 0) {
- stub = local->stub;
- local->stub = NULL;
+ par_inode = do_quota_check_limit(frame, fd->inode, this, dentry,
+ _gf_false);
+ if (par_inode == NULL) {
+ if (ctx) {
+ /* remove stale entry from inode ctx */
+ quota_dentry_del(ctx, dentry->name, dentry->par);
+ parents--;
+ fail_count++;
}
+ } else {
+ inode_unref(par_inode);
+ }
+ __quota_dentry_free(dentry);
+ }
- local->link_count = 0;
+ if (parents == 0) {
+ LOCK(&local->lock);
+ {
+ local->link_count++;
+ }
+ UNLOCK(&local->lock);
+ quota_check_limit(frame, fd->inode, this);
}
- UNLOCK (&local->lock);
- if (stub != NULL) {
- call_resume (stub);
+ while (fail_count != 0) {
+ quota_link_count_decrement(frame);
+ fail_count--;
}
+ }
- return 0;
-err:
- QUOTA_STACK_UNWIND (mkdir, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
+ return 0;
- return 0;
+unwind:
+ QUOTA_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
+ fd, vector, count, off, flags, iobref, xdata);
+ return 0;
}
+int32_t
+quota_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ QUOTA_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
int32_t
-quota_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+quota_mkdir_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- int32_t ret = -1;
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
- quota_dentry_t *dentry = NULL;
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- local = frame->local;
- if (op_ret < 0) {
- goto unwind;
- }
+ local = frame->local;
- ret = quota_inode_ctx_get (inode, -1, this, NULL, buf, &ctx, 1);
- if ((ret == -1) || (ctx == NULL)) {
- gf_log (this->name, GF_LOG_WARNING, "cannot create quota "
- "context in inode(gfid:%s)",
- uuid_utoa (inode->gfid));
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
+ GF_VALIDATE_OR_GOTO("quota", local, unwind);
- LOCK (&ctx->lock);
- {
- ctx->buf = *buf;
-
- dentry = __quota_dentry_new (ctx, (char *)local->loc.name,
- local->loc.parent->gfid);
- if (dentry == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot create a new dentry (name:%s) for "
- "inode(gfid:%s)", local->loc.name,
- uuid_utoa (local->loc.inode->gfid));
- op_ret = -1;
- op_errno = ENOMEM;
- goto unlock;
- }
- }
-unlock:
- UNLOCK (&ctx->lock);
+ op_errno = local->op_errno;
+
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
+
+ STACK_WIND(frame, quota_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+
+ return 0;
unwind:
- QUOTA_STACK_UNWIND (create, frame, op_ret, op_errno, fd, inode, buf,
- preparent, postparent, xdata);
- return 0;
+ QUOTA_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
-
int32_t
-quota_create_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
- dict_t *xdata)
+quota_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- quota_local_t *local = NULL;
- int32_t op_errno = EINVAL;
+ quota_priv_t *priv = NULL;
+ int32_t ret = 0, op_errno = 0;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
+
+ if (!should_quota_enforce(this, xdata, GF_FOP_MKDIR)) {
+ gf_msg(this->name, GF_LOG_DEBUG, 0, Q_MSG_ENFORCEMENT_SKIPPED,
+ "Enforcement has been skipped(internal fop).");
+ goto off;
+ }
+
+ local = quota_local_new();
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy(&local->loc, loc);
+ if (ret) {
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ goto err;
+ }
+
+ stub = fop_mkdir_stub(frame, quota_mkdir_helper, loc, mode, umask, xdata);
+ if (stub == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ LOCK(&local->lock);
+ {
+ local->stub = stub;
+ local->delta = 0;
+ local->object_delta = 1;
+ local->link_count = 1;
+ }
+ UNLOCK(&local->lock);
- local = frame->local;
- if (local == NULL) {
- gf_log (this->name, GF_LOG_WARNING, "local is NULL");
- goto unwind;
- }
+ quota_check_limit(frame, loc->parent, this);
+ return 0;
- if (local->op_ret == -1) {
- op_errno = local->op_errno;
- goto unwind;
- }
+err:
+ QUOTA_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
- STACK_WIND (frame, quota_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, loc, flags, mode, umask,
- fd, xdata);
- return 0;
+ return 0;
-unwind:
- QUOTA_STACK_UNWIND (create, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL, NULL);
- return 0;
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ loc, mode, umask, xdata);
+
+ return 0;
}
+int32_t
+quota_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+
+ local = frame->local;
+ if (op_ret < 0) {
+ goto unwind;
+ }
+
+ ret = quota_inode_ctx_get(inode, this, &ctx, 1);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_INODE_CTX_GET_FAILED,
+ "cannot create quota "
+ "context in inode(gfid:%s)",
+ uuid_utoa(inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ ctx->buf = *buf;
+
+ dentry = __quota_dentry_new(ctx, (char *)local->loc.name,
+ local->loc.parent->gfid);
+ if (dentry == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "cannot create a new dentry "
+ "(name:%s) for inode(gfid:%s)",
+ local->loc.name, uuid_utoa(local->loc.inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
+unlock:
+ UNLOCK(&ctx->lock);
+
+unwind:
+ QUOTA_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return 0;
+}
int32_t
-quota_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+quota_create_helper(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
{
- int32_t ret = -1;
- quota_local_t *local = NULL;
- call_stub_t *stub = NULL;
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- local = quota_local_new ();
- if (local == NULL) {
- goto err;
- }
+ local = frame->local;
- frame->local = local;
+ GF_VALIDATE_OR_GOTO("quota", local, unwind);
- ret = loc_copy (&local->loc, loc);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
- goto err;
- }
+ if (local->op_ret == -1) {
+ op_errno = local->op_errno;
+ goto unwind;
+ }
- stub = fop_create_stub (frame, quota_create_helper, loc, flags, mode,
- umask, fd, xdata);
- if (stub == NULL) {
- goto err;
- }
+ STACK_WIND(frame, quota_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+
+unwind:
+ QUOTA_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
+}
+int32_t
+quota_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ int32_t op_errno = 0;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
+ QUOTA_WIND_FOR_INTERNAL_FOP(xdata, off);
+
+ local = quota_local_new();
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy(&local->loc, loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ stub = fop_create_stub(frame, quota_create_helper, loc, flags, mode, umask,
+ fd, xdata);
+ if (stub == NULL) {
+ goto err;
+ }
+
+ LOCK(&local->lock);
+ {
local->link_count = 1;
local->stub = stub;
local->delta = 0;
+ local->object_delta = 1;
+ }
+ UNLOCK(&local->lock);
- quota_check_limit (frame, loc->parent, this, NULL, NULL);
-
- stub = NULL;
-
- LOCK (&local->lock);
- {
- local->link_count = 0;
- if (local->validate_count == 0) {
- stub = local->stub;
- local->stub = NULL;
- }
- }
- UNLOCK (&local->lock);
-
- if (stub != NULL) {
- call_resume (stub);
- }
-
- return 0;
+ quota_check_limit(frame, loc->parent, this);
+ return 0;
err:
- QUOTA_STACK_UNWIND (create, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
- NULL, NULL);
+ QUOTA_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
- return 0;
-}
+ return 0;
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
+ loc, flags, mode, umask, fd, xdata);
+ return 0;
+}
int32_t
-quota_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+quota_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
- uint64_t value = 0;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ uint64_t value = 0;
- if (op_ret < 0) {
- goto out;
- }
+ if (op_ret < 0) {
+ goto out;
+ }
- local = (quota_local_t *) frame->local;
+ local = (quota_local_t *)frame->local;
- inode_ctx_get (local->loc.inode, this, &value);
- ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ inode_ctx_get(local->loc.inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
- goto out;
- }
+ if (ctx == NULL) {
+ gf_msg(this->name, GF_LOG_INFO, EINVAL, Q_MSG_INODE_CTX_GET_FAILED,
+ "quota context not set inode (gfid:%s)",
+ uuid_utoa(local->loc.gfid));
+ goto out;
+ }
- quota_update_size (this, local->loc.inode, (char *)local->loc.name,
- local->loc.parent->gfid,
- (-(ctx->buf.ia_blocks * 512)));
+ quota_dentry_del(ctx, local->loc.name, local->loc.parent->gfid);
out:
- QUOTA_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent,
- postparent, xdata);
- return 0;
+ QUOTA_STACK_UNWIND(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
}
-
int32_t
-quota_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
+quota_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- int32_t ret = 0;
- quota_local_t *local = NULL;
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
- local = quota_local_new ();
- if (local == NULL) {
- goto err;
- }
+ priv = this->private;
- frame->local = local;
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
- ret = loc_copy (&local->loc, loc);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
- goto err;
- }
+ local = quota_local_new();
+ if (local == NULL) {
+ goto err;
+ }
- STACK_WIND (frame, quota_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ frame->local = local;
- ret = 0;
+ ret = loc_copy(&local->loc, loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ goto err;
+ }
+
+ STACK_WIND(frame, quota_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+
+ ret = 0;
err:
- if (ret == -1) {
- QUOTA_STACK_UNWIND (unlink, frame, -1, 0, NULL, NULL, NULL);
- }
+ if (ret == -1) {
+ QUOTA_STACK_UNWIND(unlink, frame, -1, 0, NULL, NULL, NULL);
+ }
- return 0;
-}
+ return 0;
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
+ loc, xflag, xdata);
+ return 0;
+}
int32_t
-quota_link_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+quota_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int32_t ret = -1;
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
- quota_dentry_t *dentry = NULL;
- char found = 0;
-
- if (op_ret < 0) {
- goto out;
- }
-
- local = (quota_local_t *) frame->local;
-
- quota_update_size (this, local->loc.parent, NULL, NULL,
- (buf->ia_blocks * 512));
-
- ret = quota_inode_ctx_get (inode, -1, this, NULL, NULL, &ctx, 0);
- if ((ret == -1) || (ctx == NULL)) {
- gf_log (this->name, GF_LOG_WARNING, "cannot find quota "
- "context in %s (gfid:%s)", local->loc.path,
- uuid_utoa (inode->gfid));
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+ char found = 0;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = (quota_local_t *)frame->local;
+
+ ret = quota_inode_ctx_get(inode, this, &ctx, 0);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_msg_debug(this->name, 0,
+ "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ list_for_each_entry(dentry, &ctx->parents, next)
+ {
+ if ((strcmp(dentry->name, local->loc.name) == 0) &&
+ (gf_uuid_compare(local->loc.parent->gfid, dentry->par) == 0)) {
+ found = 1;
+
+ gf_msg_debug(this->name, 0,
+ "new entry being"
+ " linked (name:%s) for inode "
+ "(gfid:%s) is already present "
+ "in inode-dentry-list",
+ dentry->name, uuid_utoa(local->loc.inode->gfid));
+ break;
+ }
+ }
+
+ if (!found) {
+ dentry = __quota_dentry_new(ctx, (char *)local->loc.name,
+ local->loc.parent->gfid);
+ if (dentry == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "cannot create a new dentry (name:%s)"
+ "for inode(gfid:%s)",
+ local->loc.name, uuid_utoa(local->loc.inode->gfid));
op_ret = -1;
- op_errno = EINVAL;
- goto out;
+ op_errno = ENOMEM;
+ goto unlock;
+ }
}
- LOCK (&ctx->lock);
- {
- list_for_each_entry (dentry, &ctx->parents, next) {
- if ((strcmp (dentry->name, local->loc.name) == 0) &&
- (uuid_compare (local->loc.parent->gfid,
- dentry->par) == 0)) {
- found = 1;
- gf_log (this->name, GF_LOG_WARNING,
- "new entry being linked (name:%s) for "
- "inode (gfid:%s) is already present "
- "in inode-dentry-list", dentry->name,
- uuid_utoa (local->loc.inode->gfid));
- break;
- }
- }
-
- if (!found) {
- dentry = __quota_dentry_new (ctx,
- (char *)local->loc.name,
- local->loc.parent->gfid);
- if (dentry == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot create a new dentry (name:%s) "
- "for inode(gfid:%s)", local->loc.name,
- uuid_utoa (local->loc.inode->gfid));
- op_ret = -1;
- op_errno = ENOMEM;
- goto unlock;
- }
- }
-
- ctx->buf = *buf;
- }
+ ctx->buf = *buf;
+ }
unlock:
- UNLOCK (&ctx->lock);
+ UNLOCK(&ctx->lock);
out:
- QUOTA_STACK_UNWIND (link, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
+ QUOTA_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
- return 0;
+ return 0;
}
-
int32_t
-quota_link_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
+quota_link_helper(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
- quota_local_t *local = NULL;
- int32_t op_errno = EINVAL;
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- local = frame->local;
- if (local == NULL) {
- gf_log (this->name, GF_LOG_WARNING, "local is NULL");
- goto unwind;
- }
+ local = frame->local;
- op_errno = local->op_errno;
+ GF_VALIDATE_OR_GOTO("quota", local, unwind);
- if (local->op_ret == -1) {
- goto unwind;
- }
+ op_errno = local->op_errno;
- STACK_WIND (frame, quota_link_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
- return 0;
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
+
+ STACK_WIND(frame, quota_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
unwind:
- QUOTA_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL);
- return 0;
+ QUOTA_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
}
-
-int32_t
-quota_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
- dict_t *xdata)
+void
+quota_link_continue(call_frame_t *frame)
{
- int32_t ret = -1, op_errno = ENOMEM;
- quota_local_t *local = NULL;
- call_stub_t *stub = NULL;
- quota_inode_ctx_t *ctx = NULL;
-
- local = quota_local_new ();
- if (local == NULL) {
- goto err;
- }
-
- frame->local = (void *) local;
-
- ret = loc_copy (&local->loc, newloc);
- if (ret == -1) {
- gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
- goto err;
- }
-
- stub = fop_link_stub (frame, quota_link_helper, oldloc, newloc, xdata);
- if (stub == NULL) {
- goto err;
- }
-
+ int32_t ret = -1;
+ int32_t op_errno = EIO;
+ quota_local_t *local = NULL;
+ uuid_t common_ancestor = {0};
+ xlator_t *this = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ inode_t *src_parent = NULL;
+ inode_t *dst_parent = NULL;
+
+ local = frame->local;
+ this = THIS;
+
+ if (local->op_ret < 0) {
+ op_errno = local->op_errno;
+ goto err;
+ }
+
+ if (local->xdata && dict_get(local->xdata, GLUSTERFS_INTERNAL_FOP_KEY)) {
+ /* Treat link as rename, crawl upwards only till common ancestor
+ */
+ ret = quota_find_common_ancestor(
+ local->oldloc.inode, local->newloc.parent, &common_ancestor);
+ if (ret < 0 || gf_uuid_is_null(common_ancestor)) {
+ gf_msg(this->name, GF_LOG_ERROR, ESTALE,
+ Q_MSG_ANCESTRY_BUILD_FAILED,
+ "failed to get "
+ "common_ancestor for %s and %s",
+ local->oldloc.path, local->newloc.path);
+ op_errno = ESTALE;
+ goto err;
+ }
+ } else {
+ /* Treat link as a new file.
+ * TODO: Currently marker accounts twice for the links created
+ * across directories.
+ * This needs re-visit if marker accounts only once
+ * for the links created across directories
+ */
+ if (local->oldloc.parent)
+ src_parent = inode_ref(local->oldloc.parent);
+ else
+ src_parent = inode_parent(local->oldloc.inode, 0, NULL);
+ dst_parent = local->newloc.parent;
+
+ /* No need to check quota limit if src and dst parents are same
+ */
+ if (src_parent == dst_parent ||
+ gf_uuid_compare(src_parent->gfid, dst_parent->gfid) == 0) {
+ inode_unref(src_parent);
+ goto wind;
+ }
+
+ inode_unref(src_parent);
+ }
+
+ quota_inode_ctx_get(local->oldloc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg_debug(this->name, 0,
+ "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa(local->oldloc.inode->gfid));
+ }
+
+ LOCK(&local->lock);
+ {
local->link_count = 1;
- local->stub = stub;
+ local->delta = (ctx != NULL) ? ctx->buf.ia_blocks * 512 : 0;
+ local->object_delta = 1;
+ gf_uuid_copy(local->common_ancestor, common_ancestor);
+ }
+ UNLOCK(&local->lock);
- ret = quota_inode_ctx_get (oldloc->inode, -1, this, NULL, NULL, &ctx,
- 0);
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
- oldloc->inode ? uuid_utoa (oldloc->inode->gfid) : "0");
- op_errno = EINVAL;
- goto err;
- }
+ quota_check_limit(frame, local->newloc.parent, this);
+ return;
- local->delta = ctx->buf.ia_blocks * 512;
+err:
+ QUOTA_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return;
- quota_check_limit (frame, newloc->parent, this, NULL, NULL);
+wind:
+ STACK_WIND(frame, quota_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, &(local->oldloc),
+ &(local->newloc), local->xdata);
+ return;
+}
- stub = NULL;
+int32_t
+quota_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ int32_t op_errno = ENOMEM;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
+
+ local = quota_local_new();
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = (void *)local;
+
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ ret = loc_copy(&local->loc, newloc);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ goto err;
+ }
+
+ ret = loc_copy(&local->oldloc, oldloc);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ goto err;
+ }
+
+ ret = loc_copy(&local->newloc, newloc);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ goto err;
+ }
+
+ /* No need to check quota limit if src and dst parents are same */
+ if (oldloc->parent && newloc->parent &&
+ !gf_uuid_compare(oldloc->parent->gfid, newloc->parent->gfid)) {
+ gf_msg_debug(this->name, GF_LOG_DEBUG,
+ "link %s -> %s are "
+ "in the same directory, so skip check limit",
+ oldloc->path, newloc->path);
+ goto wind;
+ }
+
+ stub = fop_link_stub(frame, quota_link_helper, oldloc, newloc, xdata);
+ if (stub == NULL) {
+ goto err;
+ }
+
+ LOCK(&local->lock);
+ {
+ local->link_count = 2;
+ local->fop_continue_cbk = quota_link_continue;
+ local->stub = stub;
+ }
+ UNLOCK(&local->lock);
- LOCK (&local->lock);
- {
- if (local->validate_count == 0) {
- stub = local->stub;
- local->stub = NULL;
- }
+ check_ancestory(frame, newloc->parent);
- local->link_count = 0;
- }
- UNLOCK (&local->lock);
+ /* source parent can be NULL, so do check_ancestry on a file */
+ if (oldloc->parent)
+ check_ancestory(frame, oldloc->parent);
+ else
+ check_ancestory(frame, oldloc->inode);
- if (stub != NULL) {
- call_resume (stub);
- }
+ return 0;
- ret = 0;
err:
- if (ret < 0) {
- QUOTA_STACK_UNWIND (link, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL);
- }
+ QUOTA_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL, NULL);
+ return 0;
- return 0;
-}
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+wind:
+ STACK_WIND(frame, quota_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+ return 0;
+}
int32_t
-quota_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent,
- dict_t *xdata)
-{
- int32_t ret = -1;
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
- quota_dentry_t *old_dentry = NULL, *dentry = NULL;
- char new_dentry_found = 0;
- int64_t size = 0;
+quota_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *old_dentry = NULL, *dentry = NULL;
+ char new_dentry_found = 0;
- if (op_ret < 0) {
- goto out;
- }
+ if (op_ret < 0) {
+ goto out;
+ }
- local = frame->local;
- if (local == NULL) {
- op_ret = -1;
- op_errno = EINVAL;
- gf_log (this->name, GF_LOG_WARNING, "local is NULL");
- goto out;
- }
+ local = frame->local;
- if (IA_ISREG (local->oldloc.inode->ia_type)
- || IA_ISLNK (local->oldloc.inode->ia_type)) {
- size = buf->ia_blocks * 512;
- }
+ GF_VALIDATE_OR_GOTO("quota", local, out);
- if (local->oldloc.parent != local->newloc.parent) {
- quota_update_size (this, local->oldloc.parent, NULL, NULL, (-size));
- quota_update_size (this, local->newloc.parent, NULL, NULL, size);
- }
+ if (!QUOTA_REG_OR_LNK_FILE(local->oldloc.inode->ia_type))
+ goto out;
- if (!(IA_ISREG (local->oldloc.inode->ia_type)
- || IA_ISLNK (local->oldloc.inode->ia_type))) {
- goto out;
- }
+ ret = quota_inode_ctx_get(local->oldloc.inode, this, &ctx, 0);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_msg_debug(this->name, 0,
+ "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa(local->oldloc.inode->gfid));
- ret = quota_inode_ctx_get (local->oldloc.inode, -1, this, NULL, NULL,
- &ctx, 0);
- if ((ret == -1) || (ctx == NULL)) {
- gf_log (this->name, GF_LOG_WARNING, "quota context not"
- "set in inode(gfid:%s)",
- uuid_utoa (local->oldloc.inode->gfid));
- op_ret = -1;
- op_errno = EINVAL;
- goto out;
- }
+ goto out;
+ }
- LOCK (&ctx->lock);
+ LOCK(&ctx->lock);
+ {
+ list_for_each_entry(dentry, &ctx->parents, next)
{
- /* decision of whether to create a context in newloc->inode
- * depends on fuse_rename_cbk's choice of inode it retains
- * after rename. currently it just associates oldloc->inode
- * with new parent and name. If this changes, following code
- * should be changed to set a new context in newloc->inode.
- */
- list_for_each_entry (dentry, &ctx->parents, next) {
- if ((strcmp (dentry->name, local->oldloc.name) == 0) &&
- (uuid_compare (local->oldloc.parent->gfid,
- dentry->par) == 0)) {
- old_dentry = dentry;
- } else if ((strcmp (dentry->name,
- local->newloc.name) == 0) &&
- (uuid_compare (local->oldloc.parent->gfid,
- dentry->par) == 0)) {
- new_dentry_found = 1;
- gf_log (this->name, GF_LOG_WARNING,
- "new entry being linked (name:%s) for "
- "inode (gfid:%s) is already present "
- "in inode-dentry-list", dentry->name,
- uuid_utoa (local->newloc.inode->gfid));
- break;
- }
- }
-
- if (old_dentry != NULL) {
- __quota_dentry_free (old_dentry);
- } else {
- gf_log (this->name, GF_LOG_WARNING,
- "dentry corresponding to the path just renamed "
- "(name:%s) is not present", local->oldloc.name);
- }
-
- if (!new_dentry_found) {
- dentry = __quota_dentry_new (ctx,
- (char *)local->newloc.name,
- local->newloc.parent->gfid);
- if (dentry == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot create a new dentry (name:%s) "
- "for inode(gfid:%s)", local->newloc.name,
- uuid_utoa (local->newloc.inode->gfid));
- op_ret = -1;
- op_errno = ENOMEM;
- goto unlock;
- }
- }
-
- ctx->buf = *buf;
+ if ((strcmp(dentry->name, local->oldloc.name) == 0) &&
+ (gf_uuid_compare(local->oldloc.parent->gfid, dentry->par) ==
+ 0)) {
+ old_dentry = dentry;
+ } else if ((strcmp(dentry->name, local->newloc.name) == 0) &&
+ (gf_uuid_compare(local->newloc.parent->gfid,
+ dentry->par) == 0)) {
+ new_dentry_found = 1;
+ gf_msg_debug(this->name, 0,
+ "new entry being "
+ "linked (name:%s) for inode (gfid:%s) "
+ "is in inode-dentry-list",
+ dentry->name,
+ uuid_utoa(local->oldloc.inode->gfid));
+ }
+
+ if (old_dentry && new_dentry_found)
+ break;
+ }
+
+ if (old_dentry != NULL) {
+ __quota_dentry_free(old_dentry);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "dentry corresponding"
+ "the path just renamed (name:%s) is not"
+ " present",
+ local->oldloc.name);
+ }
+
+ if (!new_dentry_found) {
+ dentry = __quota_dentry_new(ctx, (char *)local->newloc.name,
+ local->newloc.parent->gfid);
+ if (dentry == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "cannot create a new dentry (name:%s) "
+ "for inode(gfid:%s)",
+ local->newloc.name,
+ uuid_utoa(local->newloc.inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unlock;
+ }
}
+
+ ctx->buf = *buf;
+ }
unlock:
- UNLOCK (&ctx->lock);
+ UNLOCK(&ctx->lock);
out:
- QUOTA_STACK_UNWIND (rename, frame, op_ret, op_errno, buf, preoldparent,
- postoldparent, prenewparent, postnewparent, xdata);
+ QUOTA_STACK_UNWIND(rename, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
- return 0;
+ return 0;
}
-
int32_t
-quota_rename_helper (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
+quota_rename_helper(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
{
- quota_local_t *local = NULL;
- int32_t op_errno = EINVAL;
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- local = frame->local;
- if (local == NULL) {
- gf_log (this->name, GF_LOG_WARNING, "local is NULL");
- goto unwind;
- }
+ local = frame->local;
- op_errno = local->op_errno;
+ GF_VALIDATE_OR_GOTO("quota", local, unwind);
- if (local->op_ret == -1) {
- goto unwind;
- }
+ op_errno = local->op_errno;
- STACK_WIND (frame, quota_rename_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
- return 0;
+ if (local->op_ret == -1) {
+ goto unwind;
+ }
+
+ STACK_WIND(frame, quota_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+
+ return 0;
unwind:
- QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL, NULL);
- return 0;
+ QUOTA_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
}
-
-int32_t
-quota_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc, dict_t *xdata)
+static int32_t
+quota_rename_get_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
{
- int32_t ret = -1, op_errno = ENOMEM;
- quota_local_t *local = NULL;
- call_stub_t *stub = NULL;
- quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+ int32_t ret = 0;
+ int64_t *size = 0;
+
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR("quota", this, out, op_errno, EINVAL);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, xdata, out, op_errno, EINVAL);
+ local = frame->local;
+ GF_ASSERT(local);
+ local->link_count = 1;
+
+ if (op_ret < 0)
+ goto out;
+
+ ret = dict_get_bin(xdata, QUOTA_SIZE_KEY, (void **)&size);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, Q_MSG_SIZE_KEY_MISSING,
+ "size key not present in dict");
+ op_errno = EINVAL;
+ goto out;
+ }
+ local->delta = ntoh64(*size);
+ local->object_delta = 1;
+ quota_check_limit(frame, local->newloc.parent, this);
+ return 0;
- local = quota_local_new ();
- if (local == NULL) {
- goto err;
- }
-
- frame->local = local;
-
- ret = loc_copy (&local->oldloc, oldloc);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
- goto err;
- }
+out:
+ quota_handle_validate_error(frame, -1, op_errno);
+ return 0;
+}
- ret = loc_copy (&local->newloc, newloc);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
- goto err;
- }
+void
+quota_rename_continue(call_frame_t *frame)
+{
+ int32_t ret = -1;
+ int32_t op_errno = EIO;
+ quota_local_t *local = NULL;
+ uuid_t common_ancestor = {0};
+ xlator_t *this = NULL;
+ quota_inode_ctx_t *ctx = NULL;
- stub = fop_rename_stub (frame, quota_rename_helper, oldloc, newloc,
- xdata);
- if (stub == NULL) {
- goto err;
- }
+ local = frame->local;
+ this = THIS;
+ if (local->op_ret < 0) {
+ op_errno = local->op_errno;
+ goto err;
+ }
+
+ ret = quota_find_common_ancestor(local->oldloc.parent, local->newloc.parent,
+ &common_ancestor);
+ if (ret < 0 || gf_uuid_is_null(common_ancestor)) {
+ gf_msg(this->name, GF_LOG_ERROR, ESTALE, Q_MSG_ANCESTRY_BUILD_FAILED,
+ "failed to get "
+ "common_ancestor for %s and %s",
+ local->oldloc.path, local->newloc.path);
+ op_errno = ESTALE;
+ goto err;
+ }
+
+ LOCK(&local->lock);
+ {
local->link_count = 1;
- local->stub = stub;
+ gf_uuid_copy(local->common_ancestor, common_ancestor);
+ }
+ UNLOCK(&local->lock);
- if (IA_ISREG (oldloc->inode->ia_type)
- || IA_ISLNK (oldloc->inode->ia_type)) {
- ret = quota_inode_ctx_get (oldloc->inode, -1, this, NULL, NULL,
- &ctx, 0);
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
- oldloc->inode ? uuid_utoa (oldloc->inode->gfid)
- : "0");
- op_errno = EINVAL;
- goto err;
- }
- local->delta = ctx->buf.ia_blocks * 512;
+ if (QUOTA_REG_OR_LNK_FILE(local->oldloc.inode->ia_type)) {
+ ret = quota_inode_ctx_get(local->oldloc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_INODE_CTX_GET_FAILED,
+ "quota context not set in inode (gfid:%s), "
+ "considering file size as zero while enforcing "
+ "quota on new ancestry",
+ uuid_utoa(local->oldloc.inode->gfid));
+
+ local->delta = 0;
+ local->object_delta = 1;
} else {
- local->delta = 0;
- }
-
- quota_check_limit (frame, newloc->parent, this, NULL, NULL);
+ /* FIXME: We need to account for the size occupied by
+ * this inode on the target directory. To avoid double
+ * accounting, we need to modify enforcer to perform
+ * quota_check_limit only up till the least common
+ * ancestor directory inode*/
- stub = NULL;
-
- LOCK (&local->lock);
- {
- if (local->validate_count == 0) {
- stub = local->stub;
- local->stub = NULL;
- }
+ /* FIXME: The following code assumes that regular files
+ * and link files are present, in their entirety, in a
+ * single brick. This *assumption is invalid in the
+ * case of stripe.*/
- local->link_count = 0;
+ local->delta = ctx->buf.ia_blocks * 512;
+ local->object_delta = 1;
}
- UNLOCK (&local->lock);
- if (stub != NULL) {
- call_resume (stub);
+ } else if (IA_ISDIR(local->oldloc.inode->ia_type)) {
+ ret = quota_validate(frame, local->oldloc.inode, this,
+ quota_rename_get_size_cbk);
+ if (ret) {
+ op_errno = -ret;
+ goto err;
}
- ret = 0;
-err:
- if (ret == -1) {
- QUOTA_STACK_UNWIND (rename, frame, -1, op_errno, NULL,
- NULL, NULL, NULL, NULL, NULL);
- }
+ return;
+ }
- return 0;
-}
+ quota_check_limit(frame, local->newloc.parent, this);
+ return;
+err:
+ QUOTA_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return;
+}
int32_t
-quota_symlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+quota_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- int64_t size = 0;
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
- quota_dentry_t *dentry = NULL;
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ int32_t op_errno = ENOMEM;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
+
+ local = quota_local_new();
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy(&local->oldloc, oldloc);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ goto err;
+ }
+
+ ret = loc_copy(&local->newloc, newloc);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ goto err;
+ }
+
+ /* No need to check quota limit if src and dst parents are same */
+ if (oldloc->parent && newloc->parent &&
+ !gf_uuid_compare(oldloc->parent->gfid, newloc->parent->gfid)) {
+ gf_msg_debug(this->name, 0,
+ "rename %s -> %s are "
+ "in the same directory, so skip check limit",
+ oldloc->path, newloc->path);
+ goto wind;
+ }
+
+ stub = fop_rename_stub(frame, quota_rename_helper, oldloc, newloc, xdata);
+ if (stub == NULL) {
+ goto err;
+ }
+
+ LOCK(&local->lock);
+ {
+ /* link_count here tell how many check_ancestry should be done
+ * before continuing the FOP
+ */
+ local->link_count = 2;
+ local->stub = stub;
+ local->fop_continue_cbk = quota_rename_continue;
+ }
+ UNLOCK(&local->lock);
- if (op_ret < 0) {
- goto out;
- }
+ check_ancestory(frame, newloc->parent);
+ check_ancestory(frame, oldloc->parent);
+ return 0;
- local = frame->local;
- size = buf->ia_blocks * 512;
+err:
+ QUOTA_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
- quota_update_size (this, local->loc.parent, NULL, NULL, size);
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+ return 0;
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 1);
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
- goto out;
- }
+wind:
+ STACK_WIND(frame, quota_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ return 0;
+}
- LOCK (&ctx->lock);
- {
- ctx->buf = *buf;
-
- dentry = __quota_dentry_new (ctx, (char *)local->loc.name,
- local->loc.parent->gfid);
- if (dentry == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot create a new dentry (name:%s) for "
- "inode(gfid:%s)", local->loc.name,
- uuid_utoa (local->loc.inode->gfid));
- op_ret = -1;
- op_errno = ENOMEM;
- }
+int32_t
+quota_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+ int32_t ret = -1;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+
+ ret = quota_inode_ctx_get(local->loc.inode, this, &ctx, 1);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_msg_debug(this->name, 0,
+ "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa(local->loc.inode->gfid));
+
+ goto out;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ ctx->buf = *buf;
+
+ dentry = __quota_dentry_new(ctx, (char *)local->loc.name,
+ local->loc.parent->gfid);
+ if (dentry == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "cannot create "
+ "a new dentry (name:%s) for inode(gfid:%s)",
+ local->loc.name, uuid_utoa(local->loc.inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
}
- UNLOCK (&ctx->lock);
+ }
+ UNLOCK(&ctx->lock);
out:
- QUOTA_STACK_UNWIND (symlink, frame, op_ret, op_errno, inode, buf,
- preparent, postparent, xdata);
+ QUOTA_STACK_UNWIND(symlink, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
- return 0;
+ return 0;
}
-
int
-quota_symlink_helper (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, mode_t umask, dict_t *xdata)
+quota_symlink_helper(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- quota_local_t *local = NULL;
- int32_t op_errno = EINVAL;
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- local = frame->local;
- if (local == NULL) {
- gf_log (this->name, GF_LOG_WARNING, "local is NULL");
- goto unwind;
- }
+ local = frame->local;
- if (local->op_ret == -1) {
- op_errno = local->op_errno;
- goto unwind;
- }
+ GF_VALIDATE_OR_GOTO("quota", local, unwind);
- STACK_WIND (frame, quota_symlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
- xdata);
- return 0;
+ if (local->op_ret == -1) {
+ op_errno = local->op_errno;
+ goto unwind;
+ }
+
+ STACK_WIND(frame, quota_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask, xdata);
+ return 0;
unwind:
- QUOTA_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL);
- return 0;
+ QUOTA_STACK_UNWIND(symlink, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
-
int
-quota_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, mode_t umask, dict_t *xdata)
+quota_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- int32_t ret = -1;
- int32_t op_errno = ENOMEM;
- quota_local_t *local = NULL;
- call_stub_t *stub = NULL;
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ int32_t op_errno = ENOMEM;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
- local = quota_local_new ();
- if (local == NULL) {
- goto err;
- }
+ priv = this->private;
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
- goto err;
- }
-
- local->link_count = 1;
-
- stub = fop_symlink_stub (frame, quota_symlink_helper, linkpath, loc,
- umask, xdata);
- if (stub == NULL) {
- goto err;
- }
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
- local->stub = stub;
- local->delta = strlen (linkpath);
+ local = quota_local_new();
+ if (local == NULL) {
+ goto err;
+ }
- quota_check_limit (frame, loc->parent, this, NULL, NULL);
+ frame->local = local;
- stub = NULL;
+ ret = loc_copy(&local->loc, loc);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ goto err;
+ }
- LOCK (&local->lock);
- {
- if (local->validate_count == 0) {
- stub = local->stub;
- local->stub = NULL;
- }
-
- local->link_count = 0;
- }
- UNLOCK (&local->lock);
+ stub = fop_symlink_stub(frame, quota_symlink_helper, linkpath, loc, umask,
+ xdata);
+ if (stub == NULL) {
+ goto err;
+ }
- if (stub != NULL) {
- call_resume (stub);
- }
+ LOCK(&local->lock);
+ {
+ local->stub = stub;
+ local->delta = strlen(linkpath);
+ local->object_delta = 1;
+ local->link_count = 1;
+ }
+ UNLOCK(&local->lock);
- return 0;
+ quota_check_limit(frame, loc->parent, this);
+ return 0;
err:
- QUOTA_STACK_UNWIND (symlink, frame, -1, op_errno, NULL, NULL, NULL,
- NULL, NULL);
+ QUOTA_STACK_UNWIND(symlink, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
- return 0;
-}
+ return 0;
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->symlink,
+ linkpath, loc, umask, xdata);
+ return 0;
+}
int32_t
-quota_truncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+quota_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- quota_local_t *local = NULL;
- int64_t delta = 0;
- quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
- if (op_ret < 0) {
- goto out;
- }
+ if (op_ret < 0) {
+ goto out;
+ }
- local = frame->local;
- if (local == NULL) {
- gf_log (this->name, GF_LOG_WARNING, "local is NULL");
- goto out;
- }
+ local = frame->local;
- delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512;
+ GF_VALIDATE_OR_GOTO("quota", local, out);
- quota_update_size (this, local->loc.inode, NULL, NULL, delta);
+ quota_inode_ctx_get(local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg_debug(this->name, 0,
+ "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa(local->loc.inode->gfid));
+ goto out;
+ }
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
- goto out;
- }
-
- LOCK (&ctx->lock);
- {
- ctx->buf = *postbuf;
- }
- UNLOCK (&ctx->lock);
+ LOCK(&ctx->lock);
+ {
+ ctx->buf = *postbuf;
+ }
+ UNLOCK(&ctx->lock);
out:
- QUOTA_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf,
- postbuf, xdata);
- return 0;
+ QUOTA_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
-
int32_t
-quota_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
- dict_t *xdata)
+quota_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- int32_t ret = -1;
- quota_local_t *local = NULL;
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
- local = quota_local_new ();
- if (local == NULL) {
- goto err;
- }
+ priv = this->private;
- frame->local = local;
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
- ret = loc_copy (&local->loc, loc);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
- goto err;
- }
+ local = quota_local_new();
+ if (local == NULL) {
+ goto err;
+ }
- STACK_WIND (frame, quota_truncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ frame->local = local;
+
+ ret = loc_copy(&local->loc, loc);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ goto err;
+ }
+
+ STACK_WIND(frame, quota_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+
+ return 0;
- return 0;
err:
- QUOTA_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ QUOTA_STACK_UNWIND(truncate, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
+ return 0;
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate,
+ loc, offset, xdata);
+ return 0;
}
-
int32_t
-quota_ftruncate_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+quota_ftruncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- quota_local_t *local = NULL;
- int64_t delta = 0;
- quota_inode_ctx_t *ctx = NULL;
-
- if (op_ret < 0) {
- goto out;
- }
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
- local = frame->local;
- if (local == NULL) {
- gf_log (this->name, GF_LOG_WARNING, "local is NULL");
- goto out;
- }
+ if (op_ret < 0) {
+ goto out;
+ }
- delta = (postbuf->ia_blocks - prebuf->ia_blocks) * 512;
+ local = frame->local;
- quota_update_size (this, local->loc.inode, NULL, NULL, delta);
+ GF_VALIDATE_OR_GOTO("quota", local, out);
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
- goto out;
- }
+ quota_inode_ctx_get(local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg_debug(this->name, 0,
+ "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa(local->loc.inode->gfid));
+ goto out;
+ }
- LOCK (&ctx->lock);
- {
- ctx->buf = *postbuf;
- }
- UNLOCK (&ctx->lock);
+ LOCK(&ctx->lock);
+ {
+ ctx->buf = *postbuf;
+ }
+ UNLOCK(&ctx->lock);
out:
- QUOTA_STACK_UNWIND (ftruncate, frame, op_ret, op_errno, prebuf,
- postbuf, xdata);
- return 0;
+ QUOTA_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
}
-
int32_t
-quota_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
- dict_t *xdata)
+quota_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- quota_local_t *local = NULL;
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
- local = quota_local_new ();
- if (local == NULL)
- goto err;
+ priv = this->private;
- frame->local = local;
-
- local->loc.inode = inode_ref (fd->inode);
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
- STACK_WIND (frame, quota_ftruncate_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
-
- return 0;
-err:
- QUOTA_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
+ local = quota_local_new();
+ if (local == NULL)
+ goto err;
- return 0;
-}
+ frame->local = local;
+ local->loc.inode = inode_ref(fd->inode);
-int32_t
-quota_send_dir_limit_to_cli (call_frame_t *frame, xlator_t *this,
- inode_t *inode, const char *name)
-{
- int32_t ret = 0;
- char dir_limit [1024] = {0, };
- dict_t *dict = NULL;
- quota_inode_ctx_t *ctx = NULL;
- uint64_t value = 0;
+ STACK_WIND(frame, quota_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
- ret = inode_ctx_get (inode, this, &value);
- if (ret < 0)
- goto out;
-
- ctx = (quota_inode_ctx_t *)(unsigned long)value;
- snprintf (dir_limit, 1024, "%"PRId64",%"PRId64, ctx->size, ctx->limit);
-
- dict = dict_new ();
- if (dict == NULL) {
- ret = -1;
- goto out;
- }
-
- ret = dict_set_str (dict, (char *) name, dir_limit);
- if (ret < 0)
- goto out;
+ return 0;
+err:
+ QUOTA_STACK_UNWIND(ftruncate, frame, -1, ENOMEM, NULL, NULL, NULL);
- gf_log (this->name, GF_LOG_INFO, "str = %s", dir_limit);
+ return 0;
- QUOTA_STACK_UNWIND (getxattr, frame, 0, 0, dict, NULL);
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+}
- ret = 0;
+static int32_t
+quota_send_dir_limit_to_cli(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ const char *name, const int namelen)
+{
+ int32_t ret = 0;
+ int dir_limit_len = 0;
+ char dir_limit[64] = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ uint64_t value = 0;
+ quota_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv->is_quota_on) {
+ dir_limit_len = snprintf(dir_limit, sizeof(dir_limit),
+ "Quota is disabled please turn on");
+ goto dict_set;
+ }
+
+ ret = inode_ctx_get(inode, this, &value);
+ if (ret < 0)
+ goto out;
+
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ dir_limit_len = snprintf(dir_limit, sizeof(dir_limit),
+ "%" PRId64 ",%" PRId64, ctx->size, ctx->hard_lim);
+
+dict_set:
+ dict = dict_new();
+ if (dict == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_nstrn(dict, (char *)name, namelen, dir_limit, dir_limit_len);
+ if (ret < 0)
+ goto out;
+
+ gf_msg_debug(this->name, 0, "str = %s", dir_limit);
+
+ QUOTA_STACK_UNWIND(getxattr, frame, 0, 0, dict, NULL);
+
+ ret = 0;
out:
- return ret;
+ if (dict)
+ dict_unref(dict);
+ return ret;
}
-
int32_t
-quota_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- const char *name, dict_t *xdata)
+quota_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
{
- int32_t ret = 0;
+ int32_t ret = 0;
- if (name && strcasecmp (name, "trusted.limit.list") == 0) {
- ret = quota_send_dir_limit_to_cli (frame, this, fd->inode,
- name);
- if (ret == 0) {
- return 0;
- }
+ if (name && strcasecmp(name, "trusted.limit.list") == 0) {
+ ret = quota_send_dir_limit_to_cli(frame, this, fd->inode,
+ "trusted.limit.list",
+ SLEN("trusted.limit.list"));
+ if (ret == 0) {
+ return 0;
}
+ }
- STACK_WIND (frame, default_fgetxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
- return 0;
+ STACK_WIND(frame, default_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
}
-
int32_t
-quota_getxattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
+quota_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- int32_t ret = 0;
-
- if ((name != NULL) && strcasecmp (name, "trusted.limit.list") == 0) {
- ret = quota_send_dir_limit_to_cli (frame, this, loc->inode,
- name);
- if (ret == 0)
- return 0;
- }
-
- STACK_WIND (frame, default_getxattr_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
- return 0;
+ int32_t ret = 0;
+
+ if ((name != NULL) && strcasecmp(name, "trusted.limit.list") == 0) {
+ ret = quota_send_dir_limit_to_cli(frame, this, loc->inode,
+ "trusted.limit.list",
+ SLEN("trusted.limit.list"));
+ if (ret == 0)
+ return 0;
+ }
+
+ STACK_WIND(frame, default_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
}
-
int32_t
-quota_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf, dict_t *xdata)
+quota_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
- if (op_ret < 0) {
- goto out;
- }
+ if (op_ret < 0) {
+ goto out;
+ }
- local = frame->local;
- if (local == NULL) {
- gf_log (this->name, GF_LOG_WARNING, "local is NULL");
- goto out;
- }
+ local = frame->local;
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "quota context not set in inode (gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
- goto out;
- }
+ GF_VALIDATE_OR_GOTO("quota", local, out);
- LOCK (&ctx->lock);
- {
- if (buf)
- ctx->buf = *buf;
+ quota_inode_ctx_get(local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ if (!IA_ISDIR(buf->ia_type)) {
+ gf_msg_debug(this->name, 0,
+ "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa(local->loc.inode->gfid));
}
- UNLOCK (&ctx->lock);
+
+ goto out;
+ }
+
+ if (buf) {
+ LOCK(&ctx->lock);
+ ctx->buf = *buf;
+ UNLOCK(&ctx->lock);
+ }
out:
- QUOTA_STACK_UNWIND (stat, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ QUOTA_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
-
int32_t
-quota_stat (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+quota_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- quota_local_t *local = NULL;
- int32_t ret = -1;
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+ int32_t ret = -1;
- local = quota_local_new ();
- if (local == NULL) {
- goto unwind;
- }
+ priv = this->private;
- frame->local = local;
- ret = loc_copy (&local->loc, loc);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
- goto unwind;
- }
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
- STACK_WIND (frame, quota_stat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->stat, loc, xdata);
- return 0;
+ local = quota_local_new();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+ ret = loc_copy(&local->loc, loc);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ goto unwind;
+ }
+
+ STACK_WIND(frame, quota_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
unwind:
- QUOTA_STACK_UNWIND (stat, frame, -1, ENOMEM, NULL, NULL);
- return 0;
-}
+ QUOTA_STACK_UNWIND(stat, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->stat,
+ loc, xdata);
+ return 0;
+}
int32_t
-quota_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- dict_t *xdata)
+quota_fstat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
- if (op_ret < 0) {
- goto out;
- }
+ if (op_ret < 0) {
+ goto out;
+ }
- local = frame->local;
- if (local == NULL) {
- gf_log (this->name, GF_LOG_WARNING, "local is NULL");
- goto out;
- }
+ local = frame->local;
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
- goto out;
- }
+ GF_VALIDATE_OR_GOTO("quota", local, out);
- LOCK (&ctx->lock);
- {
- if (buf)
- ctx->buf = *buf;
+ quota_inode_ctx_get(local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ if (!IA_ISDIR(buf->ia_type)) {
+ gf_msg_debug(this->name, 0,
+ "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa(local->loc.inode->gfid));
}
- UNLOCK (&ctx->lock);
+
+ goto out;
+ }
+
+ if (buf) {
+ LOCK(&ctx->lock);
+ ctx->buf = *buf;
+ UNLOCK(&ctx->lock);
+ }
out:
- QUOTA_STACK_UNWIND (fstat, frame, op_ret, op_errno, buf, xdata);
- return 0;
+ QUOTA_STACK_UNWIND(fstat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
}
-
int32_t
-quota_fstat (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+quota_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- quota_local_t *local = NULL;
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
- local = quota_local_new ();
- if (local == NULL) {
- goto unwind;
- }
+ priv = this->private;
- frame->local = local;
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
- local->loc.inode = inode_ref (fd->inode);
+ local = quota_local_new();
+ if (local == NULL) {
+ goto unwind;
+ }
- STACK_WIND (frame, quota_fstat_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fstat, fd, xdata);
- return 0;
+ frame->local = local;
+
+ local->loc.inode = inode_ref(fd->inode);
+
+ STACK_WIND(frame, quota_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
unwind:
- QUOTA_STACK_UNWIND (fstat, frame, -1, ENOMEM, NULL, NULL);
- return 0;
-}
+ QUOTA_STACK_UNWIND(fstat, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fstat,
+ fd, xdata);
+ return 0;
+}
int32_t
-quota_readlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, const char *path,
- struct iatt *buf, dict_t *xdata)
+quota_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, const char *path,
+ struct iatt *buf, dict_t *xdata)
{
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
- if (op_ret < 0) {
- goto out;
- }
+ if (op_ret < 0) {
+ goto out;
+ }
- local = frame->local;
- if (local == NULL) {
- gf_log (this->name, GF_LOG_WARNING, "local is NULL");
- goto out;
- }
+ local = frame->local;
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
- goto out;
- }
+ GF_VALIDATE_OR_GOTO("quota", local, out);
- LOCK (&ctx->lock);
- {
- ctx->buf = *buf;
- }
- UNLOCK (&ctx->lock);
+ quota_inode_ctx_get(local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg_debug(this->name, 0,
+ "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa(local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ ctx->buf = *buf;
+ }
+ UNLOCK(&ctx->lock);
out:
- QUOTA_STACK_UNWIND (readlink, frame, op_ret, op_errno, path, buf, xdata);
- return 0;
+ QUOTA_STACK_UNWIND(readlink, frame, op_ret, op_errno, path, buf, xdata);
+ return 0;
}
-
int32_t
-quota_readlink (call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
- dict_t *xdata)
+quota_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
- quota_local_t *local = NULL;
- int32_t ret = -1;
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+ int32_t ret = -1;
- local = quota_local_new ();
- if (local == NULL) {
- goto unwind;
- }
+ priv = this->private;
- frame->local = local;
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
- ret = loc_copy (&local->loc, loc);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
- goto unwind;
- }
+ local = quota_local_new();
+ if (local == NULL) {
+ goto unwind;
+ }
- STACK_WIND (frame, quota_readlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
- return 0;
+ frame->local = local;
+
+ ret = loc_copy(&local->loc, loc);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ goto unwind;
+ }
+
+ STACK_WIND(frame, quota_readlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
+ return 0;
unwind:
- QUOTA_STACK_UNWIND (readlink, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
-}
+ QUOTA_STACK_UNWIND(readlink, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readlink,
+ loc, size, xdata);
+ return 0;
+}
int32_t
-quota_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iovec *vector,
- int32_t count, struct iatt *buf, struct iobref *iobref,
- dict_t *xdata)
+quota_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *buf, struct iobref *iobref,
+ dict_t *xdata)
{
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
- if (op_ret < 0) {
- goto out;
- }
+ if (op_ret < 0) {
+ goto out;
+ }
- local = frame->local;
- if (local == NULL) {
- gf_log (this->name, GF_LOG_WARNING, "local is NULL");
- goto out;
- }
+ local = frame->local;
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
- goto out;
- }
+ GF_VALIDATE_OR_GOTO("quota", local, out);
- LOCK (&ctx->lock);
- {
- ctx->buf = *buf;
- }
- UNLOCK (&ctx->lock);
+ quota_inode_ctx_get(local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg_debug(this->name, 0,
+ "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa(local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ ctx->buf = *buf;
+ }
+ UNLOCK(&ctx->lock);
out:
- QUOTA_STACK_UNWIND (readv, frame, op_ret, op_errno, vector, count,
- buf, iobref, xdata);
- return 0;
+ QUOTA_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, buf,
+ iobref, xdata);
+ return 0;
}
-
int32_t
-quota_readv (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, uint32_t flags, dict_t *xdata)
+quota_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- quota_local_t *local = NULL;
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
- local = quota_local_new ();
- if (local == NULL) {
- goto unwind;
- }
+ priv = this->private;
- frame->local = local;
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
- local->loc.inode = inode_ref (fd->inode);
+ local = quota_local_new();
+ if (local == NULL) {
+ goto unwind;
+ }
- STACK_WIND (frame, quota_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
- xdata);
- return 0;
+ frame->local = local;
+
+ local->loc.inode = inode_ref(fd->inode);
+
+ STACK_WIND(frame, quota_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+ return 0;
unwind:
- QUOTA_STACK_UNWIND (readv, frame, -1, ENOMEM, NULL, -1, NULL, NULL, NULL);
- return 0;
-}
+ QUOTA_STACK_UNWIND(readv, frame, -1, ENOMEM, NULL, -1, NULL, NULL, NULL);
+ return 0;
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
+ fd, size, offset, flags, xdata);
+ return 0;
+}
int32_t
-quota_fsync_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
- struct iatt *postbuf, dict_t *xdata)
+quota_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
- if (op_ret < 0) {
- goto out;
- }
+ if (op_ret < 0) {
+ goto out;
+ }
- local = frame->local;
- if (local == NULL) {
- gf_log (this->name, GF_LOG_WARNING, "local is NULL");
- goto out;
- }
+ local = frame->local;
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
- goto out;
- }
+ GF_VALIDATE_OR_GOTO("quota", local, out);
- LOCK (&ctx->lock);
- {
- ctx->buf = *postbuf;
- }
- UNLOCK (&ctx->lock);
+ quota_inode_ctx_get(local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg_debug(this->name, 0,
+ "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa(local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ ctx->buf = *postbuf;
+ }
+ UNLOCK(&ctx->lock);
out:
- QUOTA_STACK_UNWIND (fsync, frame, op_ret, op_errno, prebuf, postbuf,
- xdata);
- return 0;
+ QUOTA_STACK_UNWIND(fsync, frame, op_ret, op_errno, prebuf, postbuf, xdata);
+ return 0;
}
-
int32_t
-quota_fsync (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
- dict_t *xdata)
+quota_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
{
- quota_local_t *local = NULL;
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
- local = quota_local_new ();
- if (local == NULL) {
- goto unwind;
- }
+ priv = this->private;
- local->loc.inode = inode_ref (fd->inode);
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
- frame->local = local;
+ local = quota_local_new();
+ if (local == NULL) {
+ goto unwind;
+ }
- STACK_WIND (frame, quota_fsync_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
- return 0;
+ local->loc.inode = inode_ref(fd->inode);
+
+ frame->local = local;
+
+ STACK_WIND(frame, quota_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, flags, xdata);
+ return 0;
unwind:
- QUOTA_STACK_UNWIND (fsync, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
+ QUOTA_STACK_UNWIND(fsync, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
+ fd, flags, xdata);
+ return 0;
}
-
int32_t
-quota_setattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+quota_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
{
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
- if (op_ret < 0) {
- goto out;
- }
+ if (op_ret < 0) {
+ goto out;
+ }
- local = frame->local;
- if (local == NULL) {
- gf_log (this->name, GF_LOG_WARNING, "local is NULL");
- goto out;
- }
+ local = frame->local;
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_DEBUG,
- "quota context not set in inode (gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
- goto out;
- }
+ GF_VALIDATE_OR_GOTO("quota", local, out);
- LOCK (&ctx->lock);
- {
- if (statpost)
- ctx->buf = *statpost;
+ quota_inode_ctx_get(local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ if (!IA_ISDIR(statpost->ia_type)) {
+ gf_msg_debug(this->name, 0,
+ "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa(local->loc.inode->gfid));
}
- UNLOCK (&ctx->lock);
+
+ goto out;
+ }
+
+ if (statpost) {
+ LOCK(&ctx->lock);
+ ctx->buf = *statpost;
+ UNLOCK(&ctx->lock);
+ }
out:
- QUOTA_STACK_UNWIND (setattr, frame, op_ret, op_errno, statpre,
- statpost, xdata);
- return 0;
+ QUOTA_STACK_UNWIND(setattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+ return 0;
}
-
int32_t
-quota_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+quota_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- quota_local_t *local = NULL;
- int32_t ret = -1;
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+ int32_t ret = -1;
- local = quota_local_new ();
- if (local == NULL) {
- goto unwind;
- }
+ priv = this->private;
- frame->local = local;
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
- ret = loc_copy (&local->loc, loc);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
- goto unwind;
- }
+ local = quota_local_new();
+ if (local == NULL) {
+ goto unwind;
+ }
- STACK_WIND (frame, quota_setattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->setattr, loc, stbuf, valid, xdata);
- return 0;
+ frame->local = local;
+
+ ret = loc_copy(&local->loc, loc);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ goto unwind;
+ }
+
+ STACK_WIND(frame, quota_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
unwind:
- QUOTA_STACK_UNWIND (setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
-}
+ QUOTA_STACK_UNWIND(setattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setattr,
+ loc, stbuf, valid, xdata);
+ return 0;
+}
int32_t
-quota_fsetattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *statpre,
- struct iatt *statpost, dict_t *xdata)
+quota_fsetattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *statpre,
+ struct iatt *statpost, dict_t *xdata)
{
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
- if (op_ret < 0) {
- goto out;
- }
+ if (op_ret < 0) {
+ goto out;
+ }
- local = frame->local;
- if (local == NULL) {
- gf_log (this->name, GF_LOG_WARNING, "local is NULL");
- goto out;
- }
+ local = frame->local;
- quota_inode_ctx_get (local->loc.inode, -1, this, NULL, NULL,
- &ctx, 0);
- if (ctx == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "quota context not set in inode (gfid:%s)",
- uuid_utoa (local->loc.inode->gfid));
- goto out;
- }
+ GF_VALIDATE_OR_GOTO("quota", local, out);
- LOCK (&ctx->lock);
- {
- ctx->buf = *statpost;
+ quota_inode_ctx_get(local->loc.inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ if (!IA_ISDIR(statpost->ia_type)) {
+ gf_msg_debug(this->name, 0,
+ "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa(local->loc.inode->gfid));
}
- UNLOCK (&ctx->lock);
+
+ goto out;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ ctx->buf = *statpost;
+ }
+ UNLOCK(&ctx->lock);
out:
- QUOTA_STACK_UNWIND (fsetattr, frame, op_ret, op_errno, statpre,
- statpost, xdata);
- return 0;
+ QUOTA_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+ return 0;
}
-
int32_t
-quota_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+quota_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
{
- quota_local_t *local = NULL;
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
- local = quota_local_new ();
- if (local == NULL) {
- goto unwind;
- }
+ priv = this->private;
- frame->local = local;
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
- local->loc.inode = inode_ref (fd->inode);
+ local = quota_local_new();
+ if (local == NULL) {
+ goto unwind;
+ }
- STACK_WIND (frame, quota_fsetattr_cbk, FIRST_CHILD (this),
- FIRST_CHILD (this)->fops->fsetattr, fd, stbuf, valid, xdata);
- return 0;
+ frame->local = local;
+
+ local->loc.inode = inode_ref(fd->inode);
+
+ STACK_WIND(frame, quota_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
unwind:
- QUOTA_STACK_UNWIND (fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
- return 0;
-}
+ QUOTA_STACK_UNWIND(fsetattr, frame, -1, ENOMEM, NULL, NULL, NULL);
+ return 0;
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsetattr,
+ fd, stbuf, valid, xdata);
+ return 0;
+}
int32_t
-quota_mknod_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, struct iatt *preparent,
- struct iatt *postparent, dict_t *xdata)
+quota_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- int32_t ret = -1;
- quota_local_t *local = NULL;
- quota_inode_ctx_t *ctx = NULL;
- quota_dentry_t *dentry = NULL;
-
- local = frame->local;
- if (op_ret < 0) {
- goto unwind;
- }
-
- ret = quota_inode_ctx_get (inode, -1, this, NULL, buf, &ctx, 1);
- if ((ret == -1) || (ctx == NULL)) {
- gf_log (this->name, GF_LOG_WARNING, "cannot create quota "
- "context in inode (gfid:%s)", uuid_utoa (inode->gfid));
- op_ret = -1;
- op_errno = ENOMEM;
- goto unwind;
- }
-
- LOCK (&ctx->lock);
- {
- ctx->buf = *buf;
-
- dentry = __quota_dentry_new (ctx, (char *)local->loc.name,
- local->loc.parent->gfid);
- if (dentry == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "cannot create a new dentry (name:%s) for "
- "inode(gfid:%s)", local->loc.name,
- uuid_utoa (local->loc.inode->gfid));
- op_ret = -1;
- op_errno = ENOMEM;
- goto unlock;
- }
- }
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL;
+
+ local = frame->local;
+ if (op_ret < 0) {
+ goto unwind;
+ }
+
+ ret = quota_inode_ctx_get(inode, this, &ctx, 1);
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_INODE_CTX_GET_FAILED,
+ "cannot create quota context in "
+ "inode(gfid:%s)",
+ uuid_utoa(inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ ctx->buf = *buf;
+
+ dentry = __quota_dentry_new(ctx, (char *)local->loc.name,
+ local->loc.parent->gfid);
+ if (dentry == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "cannot create a new dentry "
+ "(name:%s) for inode(gfid:%s)",
+ local->loc.name, uuid_utoa(local->loc.inode->gfid));
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto unlock;
+ }
+ }
unlock:
- UNLOCK (&ctx->lock);
+ UNLOCK(&ctx->lock);
unwind:
- QUOTA_STACK_UNWIND (mknod, frame, op_ret, op_errno, inode,
- buf, preparent, postparent, xdata);
- return 0;
+ QUOTA_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
}
-
int
-quota_mknod_helper (call_frame_t *frame, xlator_t *this, loc_t *loc,
- mode_t mode, dev_t rdev, mode_t umask, dict_t *xdata)
+quota_mknod_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- quota_local_t *local = NULL;
- int32_t op_errno = EINVAL;
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
- local = frame->local;
- if (local == NULL) {
- gf_log (this->name, GF_LOG_WARNING, "local is NULL");
- goto unwind;
- }
+ local = frame->local;
- if (local->op_ret == -1) {
- op_errno = local->op_errno;
- goto unwind;
- }
+ GF_VALIDATE_OR_GOTO("quota", local, unwind);
- STACK_WIND (frame, quota_mknod_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
- xdata);
+ if (local->op_ret == -1) {
+ op_errno = local->op_errno;
+ goto unwind;
+ }
- return 0;
+ STACK_WIND(frame, quota_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
+
+ return 0;
unwind:
- QUOTA_STACK_UNWIND (mknod, frame, -1, op_errno, NULL, NULL,
- NULL, NULL, NULL);
- return 0;
+ QUOTA_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
}
-
int
-quota_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata)
+quota_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- int32_t ret = -1;
- quota_local_t *local = NULL;
- call_stub_t *stub = NULL;
-
- local = quota_local_new ();
- if (local == NULL) {
- goto err;
- }
-
- frame->local = local;
-
- ret = loc_copy (&local->loc, loc);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING, "loc_copy failed");
- goto err;
- }
-
- stub = fop_mknod_stub (frame, quota_mknod_helper, loc, mode, rdev,
- umask, xdata);
- if (stub == NULL) {
- goto err;
- }
-
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
+ QUOTA_WIND_FOR_INTERNAL_FOP(xdata, off);
+
+ local = quota_local_new();
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ ret = loc_copy(&local->loc, loc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "loc_copy failed");
+ goto err;
+ }
+
+ stub = fop_mknod_stub(frame, quota_mknod_helper, loc, mode, rdev, umask,
+ xdata);
+ if (stub == NULL) {
+ goto err;
+ }
+
+ LOCK(&local->lock);
+ {
local->link_count = 1;
local->stub = stub;
local->delta = 0;
+ local->object_delta = 1;
+ }
+ UNLOCK(&local->lock);
- quota_check_limit (frame, loc->parent, this, NULL, NULL);
-
- stub = NULL;
-
- LOCK (&local->lock);
- {
- local->link_count = 0;
- if (local->validate_count == 0) {
- stub = local->stub;
- local->stub = NULL;
- }
- }
- UNLOCK (&local->lock);
+ quota_check_limit(frame, loc->parent, this);
+ return 0;
- if (stub != NULL) {
- call_resume (stub);
- }
-
- return 0;
err:
- QUOTA_STACK_UNWIND (mknod, frame, -1, ENOMEM, NULL, NULL, NULL, NULL,
- NULL);
+ QUOTA_STACK_UNWIND(mknod, frame, -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
+ return 0;
- return 0;
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
+ loc, mode, rdev, umask, xdata);
+ return 0;
}
int
-quota_setxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
+quota_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- QUOTA_STACK_UNWIND (setxattr, frame, op_ret, op_errno, xdata);
- return 0;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ ret = quota_inode_ctx_get(local->loc.inode, this, &ctx, 1);
+ if ((ret < 0) || (ctx == NULL)) {
+ op_errno = -1;
+ goto out;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ ctx->hard_lim = local->limit.hl;
+ ctx->soft_lim = local->limit.sl;
+ ctx->object_hard_lim = local->object_limit.hl;
+ ctx->object_soft_lim = local->object_limit.sl;
+ }
+ UNLOCK(&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
-quota_setxattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, dict_t *dict, int flags, dict_t *xdata)
+quota_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int flags, dict_t *xdata)
{
- data_pair_t *trav = NULL;
- int op_errno = EINVAL;
- int op_ret = -1;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
-
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.quota*", dict,
- trav, op_errno, err);
+ quota_priv_t *priv = NULL;
+ int op_errno = EINVAL;
+ int op_ret = -1;
+ int64_t hard_lim = -1;
+ int64_t soft_lim = -1;
+ int64_t object_hard_limit = -1;
+ int64_t object_soft_limit = -1;
+ quota_local_t *local = NULL;
+ gf_boolean_t internal_fop = _gf_false;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ if (xdata && dict_get_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY))
+ internal_fop = _gf_true;
+
+ if (frame->root->pid >= 0 && internal_fop == _gf_false) {
+ GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.quota*", dict, op_errno,
+ err);
+ GF_IF_INTERNAL_XATTR_GOTO("trusted.pgfid*", dict, op_errno, err);
+ }
+
+ quota_get_limits(this, dict, &hard_lim, &soft_lim, &object_hard_limit,
+ &object_soft_limit);
+
+ if (hard_lim > 0 || object_hard_limit > 0) {
+ local = quota_local_new();
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ loc_copy(&local->loc, loc);
+ }
+
+ if (hard_lim > 0) {
+ local->limit.hl = hard_lim;
+ local->limit.sl = soft_lim;
+ }
+
+ if (object_hard_limit > 0) {
+ local->object_limit.hl = object_hard_limit;
+ local->object_limit.sl = object_soft_limit;
+ }
+
+ STACK_WIND(frame, quota_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL);
+ return 0;
- STACK_WIND (frame, quota_setxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr,
loc, dict, flags, xdata);
- return 0;
-err:
- QUOTA_STACK_UNWIND (setxattr, frame, op_ret, op_errno, NULL);
- return 0;
+ return 0;
}
int
-quota_fsetxattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int op_ret, int op_errno, dict_t *xdata)
+quota_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
{
- QUOTA_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, xdata);
- return 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+
+ if (op_ret < 0)
+ goto out;
+
+ local = frame->local;
+ if (!local)
+ goto out;
+
+ op_ret = quota_inode_ctx_get(local->loc.inode, this, &ctx, 1);
+ if ((op_ret < 0) || (ctx == NULL)) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ ctx->hard_lim = local->limit.hl;
+ ctx->soft_lim = local->limit.sl;
+ ctx->object_hard_lim = local->object_limit.hl;
+ ctx->object_soft_lim = local->object_limit.sl;
+ }
+ UNLOCK(&ctx->lock);
+
+out:
+ QUOTA_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
-quota_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- dict_t *dict, int flags, dict_t *xdata)
+quota_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int flags, dict_t *xdata)
{
- data_pair_t *trav = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- GF_IF_INTERNAL_XATTR_GOTO ("trusted.glusterfs.quota*", dict,
- trav, op_errno, err);
+ quota_priv_t *priv = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ quota_local_t *local = NULL;
+ int64_t hard_lim = -1;
+ int64_t soft_lim = -1;
+ int64_t object_hard_limit = -1;
+ int64_t object_soft_limit = -1;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ if (0 <= frame->root->pid) {
+ GF_IF_INTERNAL_XATTR_GOTO("trusted.glusterfs.quota*", dict, op_errno,
+ err);
+ GF_IF_INTERNAL_XATTR_GOTO("trusted.pgfid*", dict, op_errno, err);
+ }
+
+ quota_get_limits(this, dict, &hard_lim, &soft_lim, &object_hard_limit,
+ &object_soft_limit);
+
+ if (hard_lim > 0 || object_hard_limit > 0) {
+ local = quota_local_new();
+ if (local == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+ local->loc.inode = inode_ref(fd->inode);
+ }
+
+ if (hard_lim > 0) {
+ local->limit.hl = hard_lim;
+ local->limit.sl = soft_lim;
+ }
+
+ if (object_hard_limit > 0) {
+ local->object_limit.hl = object_hard_limit;
+ local->object_limit.sl = object_soft_limit;
+ }
+
+ STACK_WIND(frame, quota_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL);
+ return 0;
- STACK_WIND (frame, quota_fsetxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsetxattr,
- fd, dict, flags, xdata);
- return 0;
- err:
- QUOTA_STACK_UNWIND (fsetxattr, frame, op_ret, op_errno, NULL);
- return 0;
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+ return 0;
}
-
int
-quota_removexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+quota_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- QUOTA_STACK_UNWIND (removexattr, frame, op_ret, op_errno, xdata);
- return 0;
+ QUOTA_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
-quota_removexattr (call_frame_t *frame, xlator_t *this,
- loc_t *loc, const char *name, dict_t *xdata)
+quota_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- int32_t op_errno = EINVAL;
+ quota_priv_t *priv = NULL;
+ int32_t op_errno = EINVAL;
- VALIDATE_OR_GOTO (this, err);
+ priv = this->private;
- GF_IF_NATIVE_XATTR_GOTO ("trusted.quota*",
- name, op_errno, err);
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (loc, err);
+ VALIDATE_OR_GOTO(this, err);
+
+ /* all quota xattrs can be cleaned up by doing setxattr on special key.
+ * Hence its ok that we don't allow removexattr on quota keys here.
+ */
+ if (frame->root->pid >= 0) {
+ GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.quota*", name, op_errno,
+ err);
+ GF_IF_NATIVE_XATTR_GOTO("trusted.pgfid*", name, op_errno, err);
+ }
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(loc, err);
+
+ STACK_WIND(frame, quota_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
- STACK_WIND (frame, quota_removexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- loc, name, xdata);
- return 0;
err:
- QUOTA_STACK_UNWIND (removexattr, frame, -1, op_errno, NULL);
- return 0;
-}
+ QUOTA_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
+ return 0;
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
+}
int
-quota_fremovexattr_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, dict_t *xdata)
+quota_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
{
- QUOTA_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, xdata);
- return 0;
+ QUOTA_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
}
int
-quota_fremovexattr (call_frame_t *frame, xlator_t *this,
- fd_t *fd, const char *name, dict_t *xdata)
+quota_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
{
- int32_t op_ret = -1;
- int32_t op_errno = EINVAL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (fd, err);
-
- GF_IF_NATIVE_XATTR_GOTO ("trusted.quota*",
- name, op_errno, err);
+ quota_priv_t *priv = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+
+ priv = this->private;
+
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(fd, err);
+
+ if (frame->root->pid >= 0) {
+ GF_IF_NATIVE_XATTR_GOTO("trusted.glusterfs.quota*", name, op_errno,
+ err);
+ GF_IF_NATIVE_XATTR_GOTO("trusted.pgfid*", name, op_errno, err);
+ }
+ STACK_WIND(frame, quota_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL);
+ return 0;
- STACK_WIND (frame, quota_fremovexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fremovexattr,
- fd, name, xdata);
- return 0;
- err:
- QUOTA_STACK_UNWIND (fremovexattr, frame, op_ret, op_errno, NULL);
- return 0;
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ return 0;
}
-
int32_t
-quota_statfs_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct statvfs *buf,
- dict_t *xdata)
-{
- inode_t *root_inode = NULL;
- quota_priv_t *priv = NULL;
- uint64_t value = 0;
- quota_inode_ctx_t *ctx = NULL;
- limits_t *limit_node = NULL;
- int64_t usage = -1;
- int64_t avail = -1;
- int64_t blocks = 0;
-
- root_inode = cookie;
-
- /* This fop will fail mostly in case of client disconnect's,
- * which is already logged. Hence, not logging here */
- if (op_ret == -1)
- goto unwind;
- /*
- * We should never get here unless quota_statfs (below) sent us a
- * cookie, and it would only do so if the value was non-NULL. This
- * check is therefore just routine defensive coding.
- */
- if (!root_inode) {
- gf_log(this->name,GF_LOG_WARNING,
- "null inode, cannot adjust for quota");
- goto unwind;
- }
- if (!root_inode->table || (root_inode != root_inode->table->root)) {
- gf_log(this->name,GF_LOG_WARNING,
- "non-root inode, cannot adjust for quota");
- goto unwind;
- }
-
- inode_ctx_get (root_inode, this, &value);
- if (!value) {
- goto unwind;
- }
- ctx = (quota_inode_ctx_t *)(unsigned long)value;
- usage = (ctx->size) / buf->f_bsize;
- priv = this->private;
-
- list_for_each_entry (limit_node, &priv->limit_head, limit_list) {
- /* Notice that this only works for volume-level quota. */
- if (strcmp (limit_node->path, "/") == 0) {
- blocks = limit_node->value / buf->f_bsize;
- if (usage > blocks) {
- break;
- }
-
- buf->f_blocks = blocks;
- avail = buf->f_blocks - usage;
- if (buf->f_bfree > avail) {
- buf->f_bfree = avail;
- }
- /*
- * We have to assume that the total assigned quota
- * won't cause us to dip into the reserved space,
- * because dealing with the overcommitted cases is
- * just too hairy (especially when different bricks
- * might be using different reserved percentages and
- * such).
- */
- buf->f_bavail = buf->f_bfree;
- break;
- }
- }
+quota_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct statvfs *buf,
+ dict_t *xdata)
+{
+ inode_t *inode = NULL;
+ uint64_t value = 0;
+ int64_t usage = -1;
+ int64_t avail = -1;
+ int64_t blocks = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ int ret = 0;
+
+ inode = cookie;
+
+ /* This fop will fail mostly in case of client disconnect,
+ * which is already logged. Hence, not logging here */
+ if (op_ret == -1)
+ goto unwind;
+ /*
+ * We should never get here unless quota_statfs (below) sent us a
+ * cookie, and it would only do so if the value was non-NULL. This
+ * check is therefore just routine defensive coding.
+ */
+
+ GF_VALIDATE_OR_GOTO("quota", inode, unwind);
+
+ inode_ctx_get(inode, this, &value);
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ if (!ctx || ctx->hard_lim <= 0)
+ goto unwind;
+
+ { /* statfs is adjusted in this code block */
+ usage = (ctx->size) / buf->f_bsize;
+
+ blocks = ctx->hard_lim / buf->f_bsize;
+ buf->f_blocks = blocks;
+
+ avail = buf->f_blocks - usage;
+ avail = max(avail, 0);
+
+ buf->f_bfree = avail;
+ /*
+ * We have to assume that the total assigned quota
+ * won't cause us to dip into the reserved space,
+ * because dealing with the overcommitted cases is
+ * just too hairy (especially when different bricks
+ * might be using different reserved percentages and
+ * such).
+ */
+ buf->f_bavail = buf->f_bfree;
+ }
+
+ xdata = xdata ? dict_ref(xdata) : dict_new();
+ if (!xdata)
+ goto unwind;
+
+ ret = dict_set_int8(xdata, "quota-deem-statfs", 1);
+ if (-1 == ret)
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, Q_MSG_ENOMEM,
+ "Dict set failed, deem-statfs option may "
+ "have no effect");
unwind:
- if (root_inode) {
- inode_unref(root_inode);
- }
- STACK_UNWIND_STRICT (statfs, frame, op_ret, op_errno, buf, xdata);
- return 0;
-}
+ QUOTA_STACK_UNWIND(statfs, frame, op_ret, op_errno, buf, xdata);
+ if (xdata)
+ dict_unref(xdata);
-int32_t
-quota_statfs (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
-{
- inode_t *root_inode = NULL;
-
- if (loc->inode) {
- root_inode = loc->inode->table->root;
- inode_ref(root_inode);
- STACK_WIND_COOKIE (frame, quota_statfs_cbk, root_inode,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs, loc, xdata);
- }
- else {
- /*
- * We have to make sure that we never get to quota_statfs_cbk
- * with a cookie that points to something other than an inode,
- * which is exactly what would happen with STACK_UNWIND using
- * that as a callback. Therefore, use default_statfs_cbk in
- * this case instead.
- */
- gf_log(this->name,GF_LOG_WARNING,
- "missing inode, cannot adjust for quota");
- STACK_WIND (frame, default_statfs_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs, loc, xdata);
- }
- return 0;
+ return 0;
}
-
-int
-quota_readdirp_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, gf_dirent_t *entries,
+int32_t
+quota_statfs_helper(call_frame_t *frame, xlator_t *this, loc_t *loc,
dict_t *xdata)
{
- gf_dirent_t *entry = NULL;
+ quota_local_t *local = frame->local;
+ int op_errno = EINVAL;
- if (op_ret <= 0)
- goto unwind;
+ GF_VALIDATE_OR_GOTO("quota", local, err);
- list_for_each_entry (entry, &entries->list, list) {
- /* TODO: fill things */
- }
+ if (-1 == local->op_ret) {
+ op_errno = local->op_errno;
+ goto err;
+ }
-unwind:
- STACK_UNWIND_STRICT (readdirp, frame, op_ret, op_errno, entries, xdata);
+ STACK_WIND_COOKIE(frame, quota_statfs_cbk, local->inode, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
- return 0;
+ return 0;
}
-int
-quota_readdirp (call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
- off_t offset, dict_t *dict)
+
+int32_t
+quota_statfs_validate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
{
- int ret = 0;
+ quota_local_t *local = NULL;
+ int32_t ret = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ uint64_t value = 0;
+ quota_meta_t size = {
+ 0,
+ };
+
+ local = frame->local;
+
+ if (op_ret < 0)
+ goto resume;
+
+ GF_ASSERT(local);
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR("quota", this, resume, op_errno, EINVAL);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, xdata, resume, op_errno, EINVAL);
+
+ ret = inode_ctx_get(local->validate_loc.inode, this, &value);
+
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ if ((ret == -1) || (ctx == NULL)) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, Q_MSG_INODE_CTX_GET_FAILED,
+ "quota context is not present in inode (gfid:%s)",
+ uuid_utoa(local->validate_loc.inode->gfid));
+ op_errno = EINVAL;
+ goto resume;
+ }
+
+ ret = quota_dict_get_meta(xdata, QUOTA_SIZE_KEY, SLEN(QUOTA_SIZE_KEY),
+ &size);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, Q_MSG_SIZE_KEY_MISSING,
+ "size key not present in "
+ "dict");
+ op_errno = EINVAL;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ ctx->size = size.size;
+ ctx->validate_time = gf_time();
+ ctx->file_count = size.file_count;
+ ctx->dir_count = size.dir_count;
+ }
+ UNLOCK(&ctx->lock);
+
+resume:
+ local->op_errno = op_errno;
+ quota_link_count_decrement(frame);
+ return 0;
+}
- if (dict) {
- ret = dict_set_uint64 (dict, QUOTA_SIZE_KEY, 0);
- if (ret < 0) {
- goto err;
- }
+void
+quota_get_limit_dir_continuation(struct list_head *parents, inode_t *inode,
+ int32_t op_ret, int32_t op_errno, void *data)
+{
+ call_frame_t *frame = NULL;
+ xlator_t *this = NULL;
+ quota_dentry_t *entry = NULL;
+ inode_t *parent = NULL;
+
+ frame = data;
+ this = THIS;
+
+ if ((op_ret < 0) || list_empty(parents)) {
+ if (op_ret >= 0) {
+ gf_msg(this->name, GF_LOG_WARNING, EIO, Q_MSG_ANCESTRY_BUILD_FAILED,
+ "Couldn't build ancestry for inode (gfid:%s). "
+ "Without knowing ancestors till root, quota "
+ "cannot be enforced. "
+ "Hence, failing fop with EIO",
+ uuid_utoa(inode->gfid));
+ op_errno = EIO;
}
- STACK_WIND (frame, quota_readdirp_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
- fd, size, offset, dict);
- return 0;
-err:
- STACK_UNWIND_STRICT (readdirp, frame, -1, EINVAL, NULL, NULL);
- return 0;
+ quota_handle_validate_error(frame, -1, op_errno);
+ goto out;
+ }
+
+ entry = list_entry(parents, quota_dentry_t, next);
+ parent = inode_find(inode->table, entry->par);
+
+ quota_get_limit_dir(frame, parent, this);
+
+ inode_unref(parent);
+out:
+ return;
}
+void
+quota_statfs_continue(call_frame_t *frame, xlator_t *this, inode_t *inode)
+{
+ quota_local_t *local = frame->local;
+ int ret = -1;
+
+ LOCK(&local->lock);
+ {
+ local->inode = inode_ref(inode);
+ }
+ UNLOCK(&local->lock);
+
+ ret = quota_validate(frame, local->inode, this, quota_statfs_validate_cbk);
+ if (0 > ret)
+ quota_handle_validate_error(frame, -1, -ret);
+}
-int32_t
-mem_acct_init (xlator_t *this)
+void
+quota_get_limit_dir(call_frame_t *frame, inode_t *cur_inode, xlator_t *this)
{
- int ret = -1;
+ inode_t *inode = NULL;
+ inode_t *parent = NULL;
+ uint64_t value = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = frame->local;
- if (!this)
- return ret;
+ if (!cur_inode)
+ goto out;
- ret = xlator_mem_acct_init (this, gf_quota_mt_end + 1);
+ inode = inode_ref(cur_inode);
+ while (inode) {
+ value = 0;
+ inode_ctx_get(inode, this, &value);
- if (ret != 0) {
- gf_log (this->name, GF_LOG_WARNING, "Memory accounting"
- "init failed");
- return ret;
+ if (value) {
+ ctx = (quota_inode_ctx_t *)(unsigned long)value;
+ if (ctx->hard_lim > 0)
+ break;
}
- return ret;
-}
+ if (__is_root_gfid(inode->gfid))
+ goto off;
+
+ parent = inode_parent(inode, 0, NULL);
+ if (!parent) {
+ (void)quota_build_ancestry(inode, quota_get_limit_dir_continuation,
+ frame);
+ goto out;
+ }
+
+ inode_unref(inode);
+ inode = parent;
+ }
+ quota_statfs_continue(frame, this, inode);
+ inode_unref(inode);
+ return;
+
+off:
+ gf_msg_debug(this->name, 0, "No limit set on the inode or it's parents.");
+
+ QUOTA_STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, &local->loc,
+ local->xdata);
+out:
+ inode_unref(inode);
+
+ return;
+}
int32_t
-quota_forget (xlator_t *this, inode_t *inode)
+quota_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- int32_t ret = 0;
- uint64_t ctx_int = 0;
- quota_inode_ctx_t *ctx = NULL;
- quota_dentry_t *dentry = NULL, *tmp;
+ int op_errno = 0;
+ int ret = -1;
+ int8_t ignore_deem_statfs = 0;
+ quota_priv_t *priv = NULL;
+ quota_local_t *local = NULL;
+ call_stub_t *stub = NULL;
- ret = inode_ctx_del (inode, this, &ctx_int);
+ priv = this->private;
+ GF_ASSERT(loc);
- if (ret < 0) {
- return 0;
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
+
+ ret = dict_get_int8(xdata, GF_INTERNAL_IGNORE_DEEM_STATFS,
+ &ignore_deem_statfs);
+ ret = 0;
+
+ if (ignore_deem_statfs)
+ goto off;
+
+ if (priv->consider_statfs && loc->inode) {
+ local = quota_local_new();
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ frame->local = local;
+
+ ret = loc_copy(&local->loc, loc);
+ if (-1 == ret) {
+ op_errno = ENOMEM;
+ goto err;
}
- ctx = (quota_inode_ctx_t *) (long)ctx_int;
+ if (xdata)
+ local->xdata = dict_ref(xdata);
- LOCK (&ctx->lock);
- {
- list_for_each_entry_safe (dentry, tmp, &ctx->parents, next) {
- __quota_dentry_free (dentry);
- }
+ stub = fop_statfs_stub(frame, quota_statfs_helper, &local->loc,
+ local->xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto err;
}
- UNLOCK (&ctx->lock);
- LOCK_DESTROY (&ctx->lock);
+ LOCK(&local->lock);
+ {
+ local->link_count = 1;
+ local->stub = stub;
+ }
+ UNLOCK(&local->lock);
- GF_FREE (ctx);
+ quota_get_limit_dir(frame, loc->inode, this);
return 0;
-}
+ }
+
+ /*
+ * We have to make sure that we never get to quota_statfs_cbk
+ * with a cookie that points to something other than an inode,
+ * which is exactly what would happen with STACK_UNWIND using
+ * that as a callback. Therefore, use default_statfs_cbk in
+ * this case instead.
+ *
+ * Also if the option deem-statfs is not set to "on" don't
+ * bother calculating quota limit on / in statfs_cbk.
+ */
+ if (priv->consider_statfs)
+ gf_log(this->name, GF_LOG_ERROR,
+ "Missing inode, can't adjust for quota");
+
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->statfs,
+ loc, xdata);
+ return 0;
+err:
+ QUOTA_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
int
-quota_parse_limits (quota_priv_t *priv, xlator_t *this, dict_t *xl_options,
- struct list_head *old_list)
+quota_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
{
- int32_t ret = -1;
- char *str = NULL;
- char *str_val = NULL;
- char *path = NULL, *saveptr = NULL;
- uint64_t value = 0;
- limits_t *quota_lim = NULL, *old = NULL;
+ gf_dirent_t *entry = NULL;
+ quota_local_t *local = NULL;
+ loc_t loc = {
+ 0,
+ };
- ret = dict_get_str (xl_options, "limit-set", &str);
+ if (op_ret <= 0)
+ goto unwind;
- if (str) {
- path = strtok_r (str, ":", &saveptr);
+ local = frame->local;
- while (path) {
- str_val = strtok_r (NULL, ",", &saveptr);
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ if ((strcmp(entry->d_name, ".") == 0) ||
+ (strcmp(entry->d_name, "..") == 0) || entry->inode == NULL)
+ continue;
- ret = gf_string2bytesize (str_val, &value);
- if (ret != 0)
- goto err;
+ gf_uuid_copy(loc.gfid, entry->d_stat.ia_gfid);
+ loc.inode = inode_ref(entry->inode);
+ loc.parent = inode_ref(local->loc.inode);
+ gf_uuid_copy(loc.pargfid, loc.parent->gfid);
+ loc.name = entry->d_name;
- QUOTA_ALLOC_OR_GOTO (quota_lim, limits_t, err);
+ quota_fill_inodectx(this, entry->inode, entry->dict, &loc,
+ &entry->d_stat, &op_errno);
- quota_lim->path = path;
+ loc_wipe(&loc);
+ }
- quota_lim->value = value;
+unwind:
+ QUOTA_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata);
- gf_log (this->name, GF_LOG_INFO, "%s:%"PRId64,
- quota_lim->path, quota_lim->value);
+ return 0;
+}
- if (old_list != NULL) {
- list_for_each_entry (old, old_list,
- limit_list) {
- if (strcmp (old->path, quota_lim->path)
- == 0) {
- uuid_copy (quota_lim->gfid,
- old->gfid);
- break;
- }
- }
- }
+int
+quota_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *dict)
+{
+ quota_priv_t *priv = NULL;
+ int ret = 0;
+ gf_boolean_t new_dict = _gf_false;
+ quota_local_t *local = NULL;
- LOCK (&priv->lock);
- {
- list_add_tail (&quota_lim->limit_list,
- &priv->limit_head);
- }
- UNLOCK (&priv->lock);
+ priv = this->private;
- path = strtok_r (NULL, ":", &saveptr);
- }
- } else {
- gf_log (this->name, GF_LOG_INFO,
- "no \"limit-set\" option provided");
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
+
+ local = quota_local_new();
+
+ if (local == NULL) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ local->loc.inode = inode_ref(fd->inode);
+
+ if (dict == NULL) {
+ dict = dict_new();
+ new_dict = _gf_true;
+ }
+
+ if (dict) {
+ ret = dict_set_int8(dict, QUOTA_LIMIT_KEY, 1);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "dict set of key for hard-limit");
+ goto err;
}
+ }
- LOCK (&priv->lock);
- {
- list_for_each_entry (quota_lim, &priv->limit_head, limit_list) {
- gf_log (this->name, GF_LOG_INFO, "%s:%"PRId64,
- quota_lim->path, quota_lim->value);
- }
+ if (dict) {
+ ret = dict_set_int8(dict, QUOTA_LIMIT_OBJECTS_KEY, 1);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "dict set of key for hard-limit "
+ "failed");
+ goto err;
}
- UNLOCK (&priv->lock);
+ }
- ret = 0;
+ STACK_WIND(frame, quota_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset, dict);
+
+ if (new_dict) {
+ dict_unref(dict);
+ }
+
+ return 0;
err:
- return ret;
-}
+ STACK_UNWIND_STRICT(readdirp, frame, -1, EINVAL, NULL, NULL);
+ if (new_dict) {
+ dict_unref(dict);
+ }
+
+ return 0;
+
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+ fd, size, offset, dict);
+ return 0;
+}
int32_t
-init (xlator_t *this)
+quota_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- int32_t ret = -1;
- quota_priv_t *priv = NULL;
+ int32_t ret = 0;
+ uint64_t ctx_int = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_local_t *local = NULL;
+
+ local = frame->local;
+
+ if ((op_ret < 0) || (local == NULL)) {
+ goto out;
+ }
+
+ ret = inode_ctx_get(local->loc.inode, this, &ctx_int);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_INODE_CTX_GET_FAILED,
+ "%s: failed to get the context", local->loc.path);
+ goto out;
+ }
+
+ ctx = (quota_inode_ctx_t *)(unsigned long)ctx_int;
+
+ if (ctx == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_INODE_CTX_GET_FAILED,
+ "quota context not set in %s (gfid:%s)", local->loc.path,
+ uuid_utoa(local->loc.inode->gfid));
+ goto out;
+ }
+
+ LOCK(&ctx->lock);
+ {
+ ctx->buf = *postbuf;
+ }
+ UNLOCK(&ctx->lock);
- if ((this->children == NULL)
- || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "FATAL: quota (%s) not configured with "
- "exactly one child", this->name);
- return -1;
- }
+out:
+ QUOTA_STACK_UNWIND(fallocate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
- if (this->parents == NULL) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile");
- }
+ return 0;
+}
- QUOTA_ALLOC_OR_GOTO (priv, quota_priv_t, err);
+int32_t
+quota_fallocate_helper(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t mode, off_t offset, size_t len, dict_t *xdata)
+{
+ quota_local_t *local = NULL;
+ int32_t op_errno = EINVAL;
+
+ local = frame->local;
- INIT_LIST_HEAD (&priv->limit_head);
+ GF_VALIDATE_OR_GOTO("quota", local, unwind);
- LOCK_INIT (&priv->lock);
+ if (local->op_ret == -1) {
+ op_errno = local->op_errno;
+ if (op_errno == ENOENT || op_errno == ESTALE) {
+ /* We may get ENOENT/ESTALE in case of below scenario
+ * fd = open file.txt
+ * unlink file.txt
+ * fallocate on fd
+ * Here build_ancestry can fail as the file is removed.
+ * For now ignore ENOENT/ESTALE on active fd
+ * We need to re-visit this code once we understand
+ * how other file-system behave in this scenario
+ */
+ gf_msg_debug(this->name, 0,
+ "quota enforcer failed "
+ "with ENOENT/ESTALE on %s, cannot check "
+ "quota limits and allowing fallocate",
+ uuid_utoa(fd->inode->gfid));
+ } else {
+ goto unwind;
+ }
+ }
- this->private = priv;
+ STACK_WIND(frame, quota_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+ return 0;
- ret = quota_parse_limits (priv, this, this->options, NULL);
+unwind:
+ QUOTA_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+}
- if (ret) {
- goto err;
+int32_t
+quota_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ int32_t parents = 0;
+ int32_t fail_count = 0;
+ quota_local_t *local = NULL;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_priv_t *priv = NULL;
+ quota_dentry_t *dentry = NULL;
+ quota_dentry_t *tmp = NULL;
+ call_stub_t *stub = NULL;
+ struct list_head head = {
+ 0,
+ };
+ inode_t *par_inode = NULL;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, unwind);
+
+ WIND_IF_QUOTAOFF(priv->is_quota_on, off);
+
+ INIT_LIST_HEAD(&head);
+
+ GF_ASSERT(frame);
+ GF_VALIDATE_OR_GOTO("quota", this, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, fd, unwind);
+
+ local = quota_local_new();
+ if (local == NULL) {
+ goto unwind;
+ }
+
+ frame->local = local;
+ local->loc.inode = inode_ref(fd->inode);
+
+ (void)quota_inode_ctx_get(fd->inode, this, &ctx, 0);
+ if (ctx == NULL) {
+ gf_msg_debug(this->name, 0,
+ "quota context is NULL on inode"
+ " (%s). If quota is not enabled recently and "
+ "crawler has finished crawling, its an error",
+ uuid_utoa(local->loc.inode->gfid));
+ }
+
+ stub = fop_fallocate_stub(frame, quota_fallocate_helper, fd, mode, offset,
+ len, xdata);
+ if (stub == NULL) {
+ op_errno = ENOMEM;
+ goto unwind;
+ }
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, unwind);
+
+ parents = quota_add_parents_from_ctx(ctx, &head);
+ if (parents == -1) {
+ op_errno = errno;
+ goto unwind;
+ }
+
+ /*
+ * Note that by using len as the delta we're assuming the range from
+ * offset to offset+len has not already been allocated. This can result
+ * in ENOSPC errors attempting to allocate an already allocated range.
+ */
+ local->delta = len;
+ local->object_delta = 0;
+ local->stub = stub;
+ local->link_count = parents;
+
+ if (parents == 0) {
+ local->link_count = 1;
+ quota_check_limit(frame, fd->inode, this);
+ } else {
+ list_for_each_entry_safe(dentry, tmp, &head, next)
+ {
+ par_inode = do_quota_check_limit(frame, fd->inode, this, dentry,
+ _gf_false);
+ if (par_inode == NULL) {
+ /* remove stale entry from inode_ctx */
+ quota_dentry_del(ctx, dentry->name, dentry->par);
+ parents--;
+ fail_count++;
+ } else {
+ inode_unref(par_inode);
+ }
+ __quota_dentry_free(dentry);
}
- GF_OPTION_INIT ("timeout", priv->timeout, int64, err);
+ if (parents == 0) {
+ LOCK(&local->lock);
+ {
+ local->link_count++;
+ }
+ UNLOCK(&local->lock);
+ quota_check_limit(frame, fd->inode, this);
+ }
- this->local_pool = mem_pool_new (quota_local_t, 64);
- if (!this->local_pool) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- goto err;
+ while (fail_count != 0) {
+ quota_link_count_decrement(frame);
+ fail_count--;
}
+ }
- ret = 0;
-err:
- return ret;
-}
+ return 0;
+unwind:
+ QUOTA_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+ return 0;
+
+off:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+ return 0;
+}
void
-__quota_reconfigure_inode_ctx (xlator_t *this, inode_t *inode, limits_t *limit)
+quota_log_helper(char **usage_str, int64_t cur_size, inode_t *inode,
+ char **path, time_t *cur_time)
{
- int ret = -1;
- quota_inode_ctx_t *ctx = NULL;
+ xlator_t *this = THIS;
- GF_VALIDATE_OR_GOTO ("quota", this, out);
- GF_VALIDATE_OR_GOTO (this->name, inode, out);
- GF_VALIDATE_OR_GOTO (this->name, limit, out);
+ if (!usage_str || !inode || !path || !cur_time) {
+ gf_log(this->name, GF_LOG_ERROR, "Received null argument");
+ return;
+ }
- ret = quota_inode_ctx_get (inode, limit->value, this, NULL, NULL, &ctx,
- 1);
- if ((ret == -1) || (ctx == NULL)) {
- gf_log (this->name, GF_LOG_WARNING, "cannot create quota "
- "context in inode(gfid:%s)",
- uuid_utoa (inode->gfid));
- goto out;
- }
+ *usage_str = gf_uint64_2human_readable(cur_size);
+ if (!(*usage_str))
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, Q_MSG_ENOMEM,
+ "integer to string conversion failed Reason"
+ ":\"Cannot allocate memory\"");
- LOCK (&ctx->lock);
- {
- ctx->limit = limit->value;
- }
- UNLOCK (&ctx->lock);
+ inode_path(inode, NULL, path);
+ if (!(*path))
+ *path = uuid_utoa(inode->gfid);
-out:
- return;
+ *cur_time = gf_time();
}
-
+/* Logs if
+ * i. Usage crossed soft limit
+ * ii. Usage above soft limit and alert-time elapsed
+ */
void
-__quota_reconfigure (xlator_t *this, inode_table_t *itable, limits_t *limit)
+quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
+ int64_t delta)
{
- inode_t *inode = NULL;
+ time_t cur_time = 0;
+ char *usage_str = NULL;
+ char *path = NULL;
+ int64_t cur_size = 0;
+ quota_priv_t *priv = NULL;
- if ((this == NULL) || (itable == NULL) || (limit == NULL)) {
- goto out;
- }
+ priv = this->private;
+ cur_size = ctx->size + delta;
- if (!uuid_is_null (limit->gfid)) {
- inode = inode_find (itable, limit->gfid);
- } else {
- inode = inode_resolve (itable, limit->path);
- }
+ if ((ctx->soft_lim <= 0) || cur_size < ctx->soft_lim)
+ return;
- if (inode != NULL) {
- __quota_reconfigure_inode_ctx (this, inode, limit);
- }
+ /* Usage crossed/reached soft limit */
+ if (DID_REACH_LIMIT(ctx->soft_lim, ctx->size, cur_size)) {
+ quota_log_helper(&usage_str, cur_size, inode, &path, &cur_time);
-out:
- return;
+ gf_msg(this->name, GF_LOG_ALERT, 0, Q_MSG_CROSSED_SOFT_LIMIT,
+ "Usage crossed soft limit: "
+ "%s used by %s",
+ usage_str, path);
+
+ gf_event(EVENT_QUOTA_CROSSED_SOFT_LIMIT,
+ "Usage=%s;volume=%s;"
+ "path=%s",
+ usage_str, priv->volume_uuid, path);
+
+ ctx->prev_log_time = cur_time;
+
+ }
+ /* Usage is above soft limit */
+ else if (cur_size > ctx->soft_lim &&
+ quota_timeout(ctx->prev_log_time, priv->log_timeout)) {
+ quota_log_helper(&usage_str, cur_size, inode, &path, &cur_time);
+
+ gf_msg(this->name, GF_LOG_ALERT, 0, Q_MSG_CROSSED_SOFT_LIMIT,
+ "Usage is above soft limit: %s used by %s", usage_str, path);
+
+ gf_event(EVENT_QUOTA_CROSSED_SOFT_LIMIT,
+ "Usage=%s;volume=%s;"
+ "path=%s",
+ usage_str, priv->volume_uuid, path);
+
+ ctx->prev_log_time = cur_time;
+ }
+
+ if (path)
+ GF_FREE(path);
+
+ if (usage_str)
+ GF_FREE(usage_str);
}
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
-int
-reconfigure (xlator_t *this, dict_t *options)
+ ret = xlator_mem_acct_init(this, gf_quota_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "Memory accounting init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+quota_forget(xlator_t *this, inode_t *inode)
{
- int32_t ret = -1;
- quota_priv_t *priv = NULL;
- limits_t *limit = NULL, *next = NULL, *new = NULL;
- struct list_head head = {0, };
- xlator_t *top = NULL;
- char found = 0;
+ int32_t ret = 0;
+ uint64_t ctx_int = 0;
+ quota_inode_ctx_t *ctx = NULL;
+ quota_dentry_t *dentry = NULL, *tmp;
+
+ ret = inode_ctx_del(inode, this, &ctx_int);
- priv = this->private;
+ if (ret < 0) {
+ return 0;
+ }
- INIT_LIST_HEAD (&head);
+ ctx = (quota_inode_ctx_t *)(long)ctx_int;
- LOCK (&priv->lock);
+ LOCK(&ctx->lock);
+ {
+ list_for_each_entry_safe(dentry, tmp, &ctx->parents, next)
{
- list_splice_init (&priv->limit_head, &head);
+ __quota_dentry_free(dentry);
}
- UNLOCK (&priv->lock);
+ }
+ UNLOCK(&ctx->lock);
- ret = quota_parse_limits (priv, this, options, &head);
- if (ret == -1) {
- gf_log ("quota", GF_LOG_WARNING,
- "quota reconfigure failed, "
- "new changes will not take effect");
- goto out;
+ LOCK_DESTROY(&ctx->lock);
+
+ GF_FREE(ctx);
+
+ return 0;
+}
+
+int
+notify(xlator_t *this, int event, void *data, ...)
+{
+ quota_priv_t *priv = NULL;
+ int ret = 0;
+ rpc_clnt_t *rpc = NULL;
+ gf_boolean_t conn_status = _gf_true;
+ xlator_t *victim = data;
+
+ priv = this->private;
+ if (!priv || !priv->is_quota_on)
+ goto out;
+
+ if (event == GF_EVENT_PARENT_DOWN) {
+ rpc = priv->rpc_clnt;
+ if (rpc) {
+ rpc_clnt_disable(rpc);
+ pthread_mutex_lock(&priv->conn_mutex);
+ {
+ conn_status = priv->conn_status;
+ while (conn_status) {
+ (void)pthread_cond_wait(&priv->conn_cond,
+ &priv->conn_mutex);
+ conn_status = priv->conn_status;
+ }
+ }
+ pthread_mutex_unlock(&priv->conn_mutex);
+ gf_log(this->name, GF_LOG_INFO,
+ "Notify GF_EVENT_PARENT_DOWN for brick %s", victim->name);
}
+ }
+
+out:
+ ret = default_notify(this, event, data);
+ return ret;
+}
- LOCK (&priv->lock);
+int32_t
+init(xlator_t *this)
+{
+ int32_t ret = -1;
+ quota_priv_t *priv = NULL;
+ rpc_clnt_t *rpc = NULL;
+
+ if ((this->children == NULL) || this->children->next) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, Q_MSG_INVALID_VOLFILE,
+ "FATAL: quota (%s) not configured with "
+ "exactly one child",
+ this->name);
+ return -1;
+ }
+
+ if (this->parents == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_INVALID_VOLFILE,
+ "dangling volume. check volfile");
+ }
+
+ QUOTA_ALLOC_OR_GOTO(priv, quota_priv_t, err);
+
+ LOCK_INIT(&priv->lock);
+
+ this->private = priv;
+
+ GF_OPTION_INIT("deem-statfs", priv->consider_statfs, bool, err);
+ GF_OPTION_INIT("server-quota", priv->is_quota_on, bool, err);
+ GF_OPTION_INIT("default-soft-limit", priv->default_soft_lim, percent, err);
+ GF_OPTION_INIT("soft-timeout", priv->soft_timeout, time, err);
+ GF_OPTION_INIT("hard-timeout", priv->hard_timeout, time, err);
+ GF_OPTION_INIT("alert-time", priv->log_timeout, time, err);
+ GF_OPTION_INIT("volume-uuid", priv->volume_uuid, str, err);
+
+ this->local_pool = mem_pool_new(quota_local_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, Q_MSG_ENOMEM,
+ "failed to create local_t's memory pool");
+ goto err;
+ }
+
+ pthread_mutex_init(&priv->conn_mutex, NULL);
+ pthread_cond_init(&priv->conn_cond, NULL);
+ priv->conn_status = _gf_false;
+
+ if (priv->is_quota_on) {
+ rpc = quota_enforcer_init(this, this->options);
+ if (rpc == NULL) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ Q_MSG_QUOTA_ENFORCER_RPC_INIT_FAILED,
+ "quota enforcer rpc init failed");
+ goto err;
+ }
+
+ LOCK(&priv->lock);
{
- top = ((glusterfs_ctx_t *)this->ctx)->active->top;
- GF_ASSERT (top);
+ priv->rpc_clnt = rpc;
+ }
+ UNLOCK(&priv->lock);
+ }
- list_for_each_entry (limit, &priv->limit_head, limit_list) {
- __quota_reconfigure (this, top->itable, limit);
- }
+ ret = 0;
+err:
+ return ret;
+}
- list_for_each_entry_safe (limit, next, &head, limit_list) {
- found = 0;
- list_for_each_entry (new, &priv->limit_head,
- limit_list) {
- if (strcmp (new->path, limit->path) == 0) {
- found = 1;
- break;
- }
- }
-
- if (!found) {
- limit->value = -1;
- __quota_reconfigure (this, top->itable, limit);
- }
-
- list_del_init (&limit->limit_list);
- GF_FREE (limit);
- }
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ quota_priv_t *priv = NULL;
+ gf_boolean_t quota_on = _gf_false;
+ rpc_clnt_t *rpc = NULL;
+
+ priv = this->private;
+
+ GF_OPTION_RECONF("deem-statfs", priv->consider_statfs, options, bool, out);
+ GF_OPTION_RECONF("server-quota", quota_on, options, bool, out);
+ GF_OPTION_RECONF("default-soft-limit", priv->default_soft_lim, options,
+ percent, out);
+ GF_OPTION_RECONF("alert-time", priv->log_timeout, options, time, out);
+ GF_OPTION_RECONF("soft-timeout", priv->soft_timeout, options, time, out);
+ GF_OPTION_RECONF("hard-timeout", priv->hard_timeout, options, time, out);
+
+ if (quota_on) {
+ priv->rpc_clnt = quota_enforcer_init(this, this->options);
+ if (priv->rpc_clnt == NULL) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ Q_MSG_QUOTA_ENFORCER_RPC_INIT_FAILED,
+ "quota enforcer rpc init failed");
+ goto out;
+ }
+
+ } else {
+ LOCK(&priv->lock);
+ {
+ rpc = priv->rpc_clnt;
+ priv->rpc_clnt = NULL;
}
- UNLOCK (&priv->lock);
+ UNLOCK(&priv->lock);
- GF_OPTION_RECONF ("timeout", priv->timeout, options, int64, out);
+ if (rpc != NULL) {
+ // Quotad is shutdown when there is no started volume
+ // which has quota enabled. So, we should disable the
+ // enforcer client when quota is disabled on a volume,
+ // to avoid spurious reconnect attempts to a service
+ // (quotad), that is known to be down.
+ rpc_clnt_unref(rpc);
+ }
+ }
- ret = 0;
+ priv->is_quota_on = quota_on;
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
+int32_t
+quota_priv_dump(xlator_t *this)
+{
+ quota_priv_t *priv = NULL;
+ int32_t ret = -1;
+
+ GF_ASSERT(this);
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ gf_proc_dump_add_section("xlators.features.quota.priv");
+
+ ret = TRY_LOCK(&priv->lock);
+ if (ret)
+ goto out;
+ else {
+ gf_proc_dump_write("soft-timeout", "%u", priv->soft_timeout);
+ gf_proc_dump_write("hard-timeout", "%u", priv->hard_timeout);
+ gf_proc_dump_write("alert-time", "%u", priv->log_timeout);
+ gf_proc_dump_write("quota-on", "%d", priv->is_quota_on);
+ gf_proc_dump_write("statfs", "%d", priv->consider_statfs);
+ gf_proc_dump_write("volume-uuid", "%s", priv->volume_uuid);
+ gf_proc_dump_write("validation-count", "%" PRIu64,
+ priv->validation_count);
+ }
+ UNLOCK(&priv->lock);
+
+out:
+ return 0;
+}
void
-fini (xlator_t *this)
+fini(xlator_t *this)
{
+ quota_priv_t *priv = NULL;
+ rpc_clnt_t *rpc = NULL;
+
+ priv = this->private;
+ if (!priv)
return;
+ rpc = priv->rpc_clnt;
+ priv->rpc_clnt = NULL;
+ if (rpc) {
+ rpc_clnt_connection_cleanup(&rpc->conn);
+ rpc_clnt_unref(rpc);
+ }
+
+ this->private = NULL;
+ LOCK_DESTROY(&priv->lock);
+ pthread_mutex_destroy(&priv->conn_mutex);
+ pthread_cond_destroy(&priv->conn_cond);
+
+ GF_FREE(priv);
+ if (this->local_pool) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
+ return;
}
-
struct xlator_fops fops = {
- .statfs = quota_statfs,
- .lookup = quota_lookup,
- .writev = quota_writev,
- .create = quota_create,
- .mkdir = quota_mkdir,
- .truncate = quota_truncate,
- .ftruncate = quota_ftruncate,
- .unlink = quota_unlink,
- .symlink = quota_symlink,
- .link = quota_link,
- .rename = quota_rename,
- .getxattr = quota_getxattr,
- .fgetxattr = quota_fgetxattr,
- .stat = quota_stat,
- .fstat = quota_fstat,
- .readlink = quota_readlink,
- .readv = quota_readv,
- .fsync = quota_fsync,
- .setattr = quota_setattr,
- .fsetattr = quota_fsetattr,
- .mknod = quota_mknod,
- .setxattr = quota_setxattr,
- .fsetxattr = quota_fsetxattr,
- .removexattr = quota_removexattr,
- .fremovexattr = quota_fremovexattr,
- .readdirp = quota_readdirp,
+ .statfs = quota_statfs,
+ .lookup = quota_lookup,
+ .writev = quota_writev,
+ .create = quota_create,
+ .mkdir = quota_mkdir,
+ .truncate = quota_truncate,
+ .ftruncate = quota_ftruncate,
+ .unlink = quota_unlink,
+ .symlink = quota_symlink,
+ .link = quota_link,
+ .rename = quota_rename,
+ .getxattr = quota_getxattr,
+ .fgetxattr = quota_fgetxattr,
+ .stat = quota_stat,
+ .fstat = quota_fstat,
+ .readlink = quota_readlink,
+ .readv = quota_readv,
+ .fsync = quota_fsync,
+ .setattr = quota_setattr,
+ .fsetattr = quota_fsetattr,
+ .mknod = quota_mknod,
+ .setxattr = quota_setxattr,
+ .fsetxattr = quota_fsetxattr,
+ .removexattr = quota_removexattr,
+ .fremovexattr = quota_fremovexattr,
+ .readdirp = quota_readdirp,
+ .fallocate = quota_fallocate,
};
-struct xlator_cbks cbks = {
- .forget = quota_forget
-};
+struct xlator_cbks cbks = {.forget = quota_forget};
+struct xlator_dumpops dumpops = {
+ .priv = quota_priv_dump,
+};
struct volume_options options[] = {
- {.key = {"limit-set"}},
- {.key = {"timeout"},
- .type = GF_OPTION_TYPE_SIZET,
- .min = 0,
- .max = 60,
- .default_value = "0",
- .description = "quota caches the directory sizes on client. Timeout "
- "indicates the timeout for the cache to be revalidated."
- },
- {.key = {NULL}}
+ {
+ .key = {"enable"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable is the volume option that can be used "
+ "to turn on quota.",
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .level = OPT_STATUS_BASIC,
+ .tags = {},
+ },
+ {
+ .key = {"deem-statfs"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "If set to on, it takes quota limits into"
+ " consideration while estimating fs size. (df command)"
+ " (Default is on).",
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {},
+ },
+ {
+ .key = {"server-quota"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Skip the quota enforcement if the feature is"
+ " not turned on. This is not a user exposed option.",
+ .flags = OPT_FLAG_NONE,
+ },
+ {
+ .key = {"default-soft-limit"},
+ .type = GF_OPTION_TYPE_PERCENT,
+ .default_value = "80%",
+ .op_version = {3},
+ .description = "Soft limit is expressed as a proportion of hard limit."
+ " Default-soft-limit is the proportion used when the "
+ " user does not supply any soft limit value.",
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {},
+ },
+ {
+ .key = {"soft-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 0,
+ .max = 1800,
+ .default_value = "60",
+ .description = "quota caches the directory sizes on client. "
+ "soft-timeout indicates the timeout for the validity of"
+ " cache before soft-limit has been crossed.",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {},
+ },
+ {
+ .key = {"hard-timeout"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 0,
+ .max = 60,
+ .default_value = "5",
+ .description = "quota caches the directory sizes on client. "
+ "hard-timeout indicates the timeout for the validity of"
+ " cache after soft-limit has been crossed.",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {},
+ },
+ {.key = {"volume-uuid"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "{{ volume.id }}",
+ .description = "uuid of the volume this brick is part of."},
+ {
+ .key = {"alert-time"},
+ .type = GF_OPTION_TYPE_TIME,
+ .min = 0,
+ .max = 7 * 86400,
+ .default_value = "86400",
+ .op_version = {3},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .description = "Frequency of limit breach messages in log.",
+ .tags = {},
+ },
+ {.key = {NULL}}};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "quota",
+ .category = GF_MAINTAINED,
};
diff --git a/xlators/features/quota/src/quota.h b/xlators/features/quota/src/quota.h
index cf9c4f9b769..0395d78c9ef 100644
--- a/xlators/features/quota/src/quota.h
+++ b/xlators/features/quota/src/quota.h
@@ -1,171 +1,266 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#ifndef _QUOTA_H
+#define _QUOTA_H
+
+#include <glusterfs/call-stub.h>
+#include "quota-mem-types.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/gf-event.h>
+#include "rpcsvc.h"
+#include "rpc-clnt.h"
+#include <glusterfs/byte-order.h>
+#include "glusterfs3-xdr.h"
+#include "glusterfs3.h"
+#include "xdr-generic.h"
+#include <glusterfs/compat-errno.h>
+#include "protocol-common.h"
+#include <glusterfs/quota-common-utils.h>
+#include "quota-messages.h"
+
+#define DIRTY "dirty"
+#define SIZE "size"
+#define CONTRIBUTION "contri"
+#define VAL_LENGTH 8
+#define READDIR_BUF 4096
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
+#ifndef UUID_CANONICAL_FORM_LEN
+#define UUID_CANONICAL_FORM_LEN 36
#endif
-#include "xlator.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "byte-order.h"
-#include "common-utils.h"
-#include "quota-mem-types.h"
+#define WIND_IF_QUOTAOFF(is_quota_on, label) \
+ if (!is_quota_on) \
+ goto label;
+
+#define QUOTA_WIND_FOR_INTERNAL_FOP(xdata, label) \
+ do { \
+ if (xdata && dict_get_sizen(xdata, GLUSTERFS_INTERNAL_FOP_KEY)) \
+ goto label; \
+ } while (0)
+
+#define DID_REACH_LIMIT(lim, prev_size, cur_size) \
+ ((cur_size) >= (lim) && (prev_size) < (lim))
+
+#define QUOTA_SAFE_INCREMENT(lock, var) \
+ do { \
+ LOCK(lock); \
+ var++; \
+ UNLOCK(lock); \
+ } while (0)
+
+#define QUOTA_SAFE_DECREMENT(lock, var) \
+ do { \
+ LOCK(lock); \
+ var--; \
+ UNLOCK(lock); \
+ } while (0)
+
+#define QUOTA_ALLOC_OR_GOTO(var, type, label) \
+ do { \
+ var = GF_CALLOC(sizeof(type), 1, gf_quota_mt_##type); \
+ if (!var) { \
+ gf_msg("", GF_LOG_ERROR, ENOMEM, Q_MSG_ENOMEM, "out of memory"); \
+ ret = -1; \
+ goto label; \
+ } \
+ } while (0);
+
+#define QUOTA_STACK_WIND_TAIL(frame, params...) \
+ do { \
+ quota_local_t *_local = NULL; \
+ \
+ if (frame) { \
+ _local = frame->local; \
+ frame->local = NULL; \
+ } \
+ \
+ STACK_WIND_TAIL(frame, params); \
+ \
+ if (_local) \
+ quota_local_cleanup(_local); \
+ } while (0)
+
+#define QUOTA_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ quota_local_t *_local = NULL; \
+ if (frame) { \
+ _local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ quota_local_cleanup(_local); \
+ } while (0)
+
+#define QUOTA_FREE_CONTRIBUTION_NODE(_contribution) \
+ do { \
+ list_del(&_contribution->contri_list); \
+ GF_FREE(_contribution); \
+ } while (0)
+
+#define GET_CONTRI_KEY(var, _vol_name, _gfid, _ret) \
+ do { \
+ char _gfid_unparsed[40]; \
+ if (_gfid != NULL) { \
+ gf_uuid_unparse(_gfid, _gfid_unparsed); \
+ _ret = gf_asprintf(var, QUOTA_XATTR_PREFIX "%s.%s." CONTRIBUTION, \
+ _vol_name, _gfid_unparsed); \
+ } else { \
+ _ret = gf_asprintf(var, QUOTA_XATTR_PREFIX "%s.." CONTRIBUTION, \
+ _vol_name); \
+ } \
+ } while (0)
-#define QUOTA_XATTR_PREFIX "trusted."
-#define DIRTY "dirty"
-#define SIZE "size"
-#define CONTRIBUTION "contri"
-#define VAL_LENGTH 8
-#define READDIR_BUF 4096
-
-#define QUOTA_STACK_DESTROY(_frame, _this) \
- do { \
- quota_local_t *_local = NULL; \
- _local = _frame->local; \
- _frame->local = NULL; \
- STACK_DESTROY (_frame->root); \
- quota_local_cleanup (_this, _local); \
- GF_FREE (_local); \
- } while (0)
-
-#define QUOTA_SAFE_INCREMENT(lock, var) \
- do { \
- LOCK (lock); \
- var ++; \
- UNLOCK (lock); \
- } while (0)
-
-#define QUOTA_SAFE_DECREMENT(lock, var) \
- do { \
- LOCK (lock); \
- var --; \
- UNLOCK (lock); \
- } while (0)
-
-#define QUOTA_ALLOC_OR_GOTO(var, type, label) \
- do { \
- var = GF_CALLOC (sizeof (type), 1, \
- gf_quota_mt_##type); \
- if (!var) { \
- gf_log ("", GF_LOG_ERROR, \
- "out of memory :("); \
- ret = -1; \
- goto label; \
- } \
- } while (0);
-
-#define QUOTA_STACK_UNWIND(fop, frame, params...) \
- do { \
- quota_local_t *_local = NULL; \
- xlator_t *_this = NULL; \
- if (frame) { \
- _local = frame->local; \
- _this = frame->this; \
- frame->local = NULL; \
- } \
- STACK_UNWIND_STRICT (fop, frame, params); \
- quota_local_cleanup (_this, _local); \
- mem_put (_local); \
- } while (0)
-
-#define QUOTA_FREE_CONTRIBUTION_NODE(_contribution) \
- do { \
- list_del (&_contribution->contri_list); \
- GF_FREE (_contribution); \
- } while (0)
-
-#define GET_CONTRI_KEY(var, _vol_name, _gfid, _ret) \
- do { \
- char _gfid_unparsed[40]; \
- uuid_unparse (_gfid, _gfid_unparsed); \
- _ret = gf_asprintf (var, QUOTA_XATTR_PREFIX \
- "%s.%s." CONTRIBUTION, \
- _vol_name, _gfid_unparsed); \
- } while (0)
-
-
-#define GET_CONTRI_KEY_OR_GOTO(var, _vol_name, _gfid, label) \
- do { \
- GET_CONTRI_KEY(var, _vol_name, _gfid, ret); \
- if (ret == -1) \
- goto label; \
- } while (0)
-
-#define GET_DIRTY_KEY_OR_GOTO(var, _vol_name, label) \
- do { \
- ret = gf_asprintf (var, QUOTA_XATTR_PREFIX \
- "%s." DIRTY, _vol_name); \
- if (ret == -1) \
- goto label; \
- } while (0)
+#define GET_CONTRI_KEY_OR_GOTO(var, _vol_name, _gfid, label) \
+ do { \
+ GET_CONTRI_KEY(var, _vol_name, _gfid, ret); \
+ if (ret == -1) \
+ goto label; \
+ } while (0)
+
+#define GET_DIRTY_KEY_OR_GOTO(var, _vol_name, label) \
+ do { \
+ ret = gf_asprintf(var, QUOTA_XATTR_PREFIX "%s." DIRTY, _vol_name); \
+ if (ret == -1) \
+ goto label; \
+ } while (0)
+
+#define QUOTA_REG_OR_LNK_FILE(ia_type) (IA_ISREG(ia_type) || IA_ISLNK(ia_type))
struct quota_dentry {
- char *name;
- uuid_t par;
- struct list_head next;
+ char *name;
+ uuid_t par;
+ struct list_head next;
};
typedef struct quota_dentry quota_dentry_t;
struct quota_inode_ctx {
- int64_t size;
- int64_t limit;
- struct iatt buf;
- struct list_head parents;
- struct timeval tv;
- gf_lock_t lock;
+ int64_t size;
+ int64_t hard_lim;
+ int64_t soft_lim;
+ int64_t file_count;
+ int64_t dir_count;
+ int64_t object_hard_lim;
+ int64_t object_soft_lim;
+ struct iatt buf;
+ struct list_head parents;
+ time_t validate_time;
+ time_t prev_log_time;
+ gf_boolean_t ancestry_built;
+ gf_lock_t lock;
};
typedef struct quota_inode_ctx quota_inode_ctx_t;
+typedef void (*quota_ancestry_built_t)(struct list_head *parents,
+ inode_t *inode, int32_t op_ret,
+ int32_t op_errno, void *data);
+
+typedef void (*quota_fop_continue_t)(call_frame_t *frame);
+
struct quota_local {
- gf_lock_t lock;
- uint32_t validate_count;
- uint32_t link_count;
- loc_t loc;
- loc_t oldloc;
- loc_t newloc;
- loc_t validate_loc;
- int64_t delta;
- int32_t op_ret;
- int32_t op_errno;
- int64_t size;
- int64_t limit;
- char just_validated;
- inode_t *inode;
- call_stub_t *stub;
+ gf_lock_t lock;
+ uint32_t link_count;
+ loc_t loc;
+ loc_t oldloc;
+ loc_t newloc;
+ loc_t validate_loc;
+ int64_t delta;
+ int8_t object_delta;
+ int32_t op_ret;
+ int32_t op_errno;
+ int64_t size;
+ char just_validated;
+ fop_lookup_cbk_t validate_cbk;
+ quota_fop_continue_t fop_continue_cbk;
+ inode_t *inode;
+ uuid_t common_ancestor; /* Used by quota_rename */
+ call_stub_t *stub;
+ struct iobref *iobref;
+ quota_limits_t limit;
+ quota_limits_t object_limit;
+ int64_t space_available;
+ quota_ancestry_built_t ancestry_cbk;
+ void *ancestry_data;
+ dict_t *xdata;
+ dict_t *validate_xdata;
+ int32_t quotad_conn_retry;
+ xlator_t *this;
+ call_frame_t *par_frame;
};
typedef struct quota_local quota_local_t;
struct quota_priv {
- int64_t timeout;
- struct list_head limit_head;
- gf_lock_t lock;
+ /* FIXME: consider time_t for timeouts. */
+ uint32_t soft_timeout;
+ uint32_t hard_timeout;
+ uint32_t log_timeout;
+ double default_soft_lim;
+ gf_boolean_t is_quota_on;
+ gf_boolean_t consider_statfs;
+ gf_lock_t lock;
+ rpc_clnt_prog_t *quota_enforcer;
+ struct rpcsvc_program *quotad_aggregator;
+ struct rpc_clnt *rpc_clnt;
+ rpcsvc_t *rpcsvc;
+ inode_table_t *itable;
+ char *volume_uuid;
+ uint64_t validation_count;
+ int32_t quotad_conn_status;
+ pthread_mutex_t conn_mutex;
+ pthread_cond_t conn_cond;
+ gf_boolean_t conn_status;
};
typedef struct quota_priv quota_priv_t;
-struct limits {
- struct list_head limit_list;
- char *path;
- int64_t value;
- uuid_t gfid;
-};
-typedef struct limits limits_t;
+int
+quota_enforcer_lookup(call_frame_t *frame, xlator_t *this, dict_t *xdata,
+ fop_lookup_cbk_t cbk);
+
+void
+_quota_enforcer_lookup(void *data);
+
+struct rpc_clnt *
+quota_enforcer_init(xlator_t *this, dict_t *options);
-uint64_t cn = 1;
+void
+quota_log_usage(xlator_t *this, quota_inode_ctx_t *ctx, inode_t *inode,
+ int64_t delta);
+
+int
+quota_build_ancestry(inode_t *inode, quota_ancestry_built_t ancestry_cbk,
+ void *data);
+
+void
+quota_get_limit_dir(call_frame_t *frame, inode_t *cur_inode, xlator_t *this);
+
+int32_t
+quota_check_limit(call_frame_t *frame, inode_t *inode, xlator_t *this);
+
+inode_t *
+do_quota_check_limit(call_frame_t *frame, inode_t *inode, xlator_t *this,
+ quota_dentry_t *dentry, gf_boolean_t force);
+int
+quota_fill_inodectx(xlator_t *this, inode_t *inode, dict_t *dict, loc_t *loc,
+ struct iatt *buf, int32_t *op_errno);
+
+int32_t
+quota_check_size_limit(call_frame_t *frame, quota_inode_ctx_t *ctx,
+ quota_priv_t *priv, inode_t *_inode, xlator_t *this,
+ int32_t *op_errno, int just_validated, int64_t delta,
+ quota_local_t *local, gf_boolean_t *skip_check);
+
+int32_t
+quota_check_object_limit(call_frame_t *frame, quota_inode_ctx_t *ctx,
+ quota_priv_t *priv, inode_t *_inode, xlator_t *this,
+ int32_t *op_errno, int just_validated,
+ quota_local_t *local, gf_boolean_t *skip_check);
+#endif
diff --git a/xlators/features/quota/src/quotad-aggregator.c b/xlators/features/quota/src/quotad-aggregator.c
new file mode 100644
index 00000000000..75d47867b5b
--- /dev/null
+++ b/xlators/features/quota/src/quotad-aggregator.c
@@ -0,0 +1,494 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "cli1-xdr.h"
+#include "quota.h"
+#include "quotad-helpers.h"
+#include "quotad-aggregator.h"
+
+static char *qd_ext_xattrs[] = {
+ QUOTA_SIZE_KEY,
+ QUOTA_LIMIT_KEY,
+ QUOTA_LIMIT_OBJECTS_KEY,
+ NULL,
+};
+
+static struct rpcsvc_program quotad_aggregator_prog;
+
+struct iobuf *
+quotad_serialize_reply(rpcsvc_request_t *req, void *arg, struct iovec *outmsg,
+ xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ ssize_t retlen = 0;
+ ssize_t xdr_size = 0;
+
+ GF_VALIDATE_OR_GOTO("server", req, ret);
+
+ /* First, get the io buffer into which the reply in arg will
+ * be serialized.
+ */
+ if (arg && xdrproc) {
+ xdr_size = xdr_sizeof(xdrproc, arg);
+ iob = iobuf_get2(req->svc->ctx->iobuf_pool, xdr_size);
+ if (!iob) {
+ gf_log_callingfn(THIS->name, GF_LOG_ERROR, "Failed to get iobuf");
+ goto ret;
+ };
+
+ iobuf_to_iovec(iob, outmsg);
+ /* Use the given serializer to translate the given C structure
+ * in arg to XDR format which will be written into the buffer
+ * in outmsg.
+ */
+ /* retlen is used to received the error since size_t is unsigned and we
+ * need -1 for error notification during encoding.
+ */
+
+ retlen = xdr_serialize_generic(*outmsg, arg, xdrproc);
+ if (retlen == -1) {
+ /* Failed to Encode 'GlusterFS' msg in RPC is not exactly
+ failure of RPC return values.. Client should get
+ notified about this, so there are no missing frames */
+ gf_log_callingfn("", GF_LOG_ERROR, "Failed to encode message");
+ req->rpc_err = GARBAGE_ARGS;
+ retlen = 0;
+ }
+ }
+ outmsg->iov_len = retlen;
+ret:
+ return iob;
+}
+
+int
+quotad_aggregator_submit_reply(call_frame_t *frame, rpcsvc_request_t *req,
+ void *arg, struct iovec *payload,
+ int payloadcount, struct iobref *iobref,
+ xdrproc_t xdrproc)
+{
+ struct iobuf *iob = NULL;
+ int ret = -1;
+ struct iovec rsp = {
+ 0,
+ };
+ quotad_aggregator_state_t *state = NULL;
+ char new_iobref = 0;
+
+ GF_VALIDATE_OR_GOTO("server", req, ret);
+
+ if (frame) {
+ state = frame->root->state;
+ frame->local = NULL;
+ }
+
+ if (!iobref) {
+ iobref = iobref_new();
+ if (!iobref) {
+ goto ret;
+ }
+
+ new_iobref = 1;
+ }
+
+ iob = quotad_serialize_reply(req, arg, &rsp, xdrproc);
+ if (!iob) {
+ gf_msg("", GF_LOG_ERROR, 0, Q_MSG_DICT_SERIALIZE_FAIL,
+ "Failed to serialize reply");
+ goto ret;
+ }
+
+ iobref_add(iobref, iob);
+
+ ret = rpcsvc_submit_generic(req, &rsp, 1, payload, payloadcount, iobref);
+
+ iobuf_unref(iob);
+
+ ret = 0;
+ret:
+ if (state) {
+ quotad_aggregator_free_state(state);
+ }
+
+ if (frame)
+ STACK_DESTROY(frame->root);
+
+ if (new_iobref) {
+ iobref_unref(iobref);
+ }
+
+ return ret;
+}
+
+int
+quotad_aggregator_getlimit_cbk(xlator_t *this, call_frame_t *frame,
+ void *lookup_rsp)
+{
+ gfs3_lookup_rsp *rsp = lookup_rsp;
+ gf_cli_rsp cli_rsp = {
+ 0,
+ };
+ dict_t *xdata = NULL;
+ quotad_aggregator_state_t *state = NULL;
+ int ret = -1;
+ int type = 0;
+
+ if (!rsp || (rsp->op_ret == -1))
+ goto reply;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE(frame->this, xdata, (rsp->xdata.xdata_val),
+ (rsp->xdata.xdata_len), rsp->op_ret,
+ rsp->op_errno, out);
+
+ if (xdata) {
+ state = frame->root->state;
+ ret = dict_get_int32n(state->req_xdata, "type", SLEN("type"), &type);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_set_int32_sizen(xdata, "type", type);
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ rsp->op_ret = ret;
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, Q_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to unserialize "
+ "nameless lookup rsp");
+ goto reply;
+ }
+ cli_rsp.op_ret = rsp->op_ret;
+ cli_rsp.op_errno = rsp->op_errno;
+ cli_rsp.op_errstr = "";
+ if (xdata) {
+ GF_PROTOCOL_DICT_SERIALIZE(frame->this, xdata, (&cli_rsp.dict.dict_val),
+ (cli_rsp.dict.dict_len), cli_rsp.op_errno,
+ reply);
+ }
+
+reply:
+ quotad_aggregator_submit_reply(frame, (frame) ? frame->local : NULL,
+ (void *)&cli_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp);
+
+ dict_unref(xdata);
+ GF_FREE(cli_rsp.dict.dict_val);
+ return 0;
+}
+
+int
+quotad_aggregator_getlimit(rpcsvc_request_t *req)
+{
+ call_frame_t *frame = NULL;
+ gf_cli_req cli_req = {
+ {0},
+ };
+ gf_cli_rsp cli_rsp = {0};
+ quotad_aggregator_state_t *state = NULL;
+ xlator_t *this = NULL;
+ dict_t *dict = NULL;
+ int ret = -1, op_errno = 0;
+ char *gfid_str = NULL;
+ uuid_t gfid = {0};
+ char *volume_uuid = NULL;
+
+ GF_VALIDATE_OR_GOTO("quotad-aggregator", req, err);
+
+ this = THIS;
+
+ cli_req.dict.dict_val = alloca(req->msg[0].iov_len);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ gf_msg("this->name", GF_LOG_ERROR, 0, Q_MSG_XDR_DECODE_ERROR,
+ "xdr decoding error");
+ req->rpc_err = GARBAGE_ARGS;
+ goto err;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new();
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, Q_MSG_DICT_UNSERIALIZE_FAIL,
+ "Failed to unserialize req-buffer to "
+ "dictionary");
+ goto err;
+ }
+ }
+
+ ret = dict_get_strn(dict, "gfid", SLEN("gfid"), &gfid_str);
+ if (ret) {
+ goto err;
+ }
+
+ ret = dict_get_strn(dict, "volume-uuid", SLEN("volume-uuid"), &volume_uuid);
+ if (ret) {
+ goto err;
+ }
+
+ gf_uuid_parse((const char *)gfid_str, gfid);
+
+ frame = quotad_aggregator_get_frame_from_req(req);
+ if (frame == NULL) {
+ cli_rsp.op_errno = ENOMEM;
+ goto errx;
+ }
+ state = frame->root->state;
+ state->req_xdata = dict;
+ state->xdata = dict_new();
+ dict = NULL;
+
+ ret = dict_set_int32_sizen(state->xdata, QUOTA_LIMIT_KEY, 42);
+ if (ret)
+ goto err;
+
+ ret = dict_set_int32_sizen(state->xdata, QUOTA_LIMIT_OBJECTS_KEY, 42);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, Q_MSG_ENOMEM,
+ "Failed to set QUOTA_LIMIT_OBJECTS_KEY");
+ goto err;
+ }
+
+ ret = dict_set_int32_sizen(state->xdata, QUOTA_SIZE_KEY, 42);
+ if (ret)
+ goto err;
+
+ ret = dict_set_int32_sizen(state->xdata, GET_ANCESTRY_PATH_KEY, 42);
+ if (ret)
+ goto err;
+
+ ret = qd_nameless_lookup(this, frame, (char *)gfid, state->xdata,
+ volume_uuid, quotad_aggregator_getlimit_cbk);
+ if (ret) {
+ cli_rsp.op_errno = ret;
+ goto errx;
+ }
+
+ return ret;
+
+err:
+ cli_rsp.op_errno = op_errno;
+errx:
+ cli_rsp.op_ret = -1;
+ cli_rsp.op_errstr = "";
+
+ quotad_aggregator_getlimit_cbk(this, frame, &cli_rsp);
+ if (dict)
+ dict_unref(dict);
+ return ret;
+}
+
+int
+quotad_aggregator_lookup_cbk(xlator_t *this, call_frame_t *frame, void *rsp)
+{
+ quotad_aggregator_submit_reply(frame, frame ? frame->local : NULL, rsp,
+ NULL, 0, NULL,
+ (xdrproc_t)xdr_gfs3_lookup_rsp);
+
+ return 0;
+}
+
+int
+quotad_aggregator_lookup(rpcsvc_request_t *req)
+{
+ call_frame_t *frame = NULL;
+ gfs3_lookup_req args = {
+ {
+ 0,
+ },
+ };
+ int i = 0, ret = -1, op_errno = 0;
+ gfs3_lookup_rsp rsp = {
+ 0,
+ };
+ quotad_aggregator_state_t *state = NULL;
+ xlator_t *this = NULL;
+ dict_t *dict = NULL;
+ char *volume_uuid = NULL;
+
+ GF_VALIDATE_OR_GOTO("quotad-aggregator", req, err);
+
+ this = THIS;
+
+ args.bname = alloca(req->msg[0].iov_len);
+ args.xdata.xdata_val = alloca(req->msg[0].iov_len);
+
+ ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_gfs3_lookup_req);
+ if (ret < 0) {
+ rsp.op_errno = EINVAL;
+ goto err;
+ }
+
+ frame = quotad_aggregator_get_frame_from_req(req);
+ if (frame == NULL) {
+ rsp.op_errno = ENOMEM;
+ goto err;
+ }
+
+ state = frame->root->state;
+
+ GF_PROTOCOL_DICT_UNSERIALIZE(this, dict, (args.xdata.xdata_val),
+ (args.xdata.xdata_len), ret, op_errno, err);
+
+ ret = dict_get_str(dict, "volume-uuid", &volume_uuid);
+ if (ret) {
+ goto err;
+ }
+
+ state->xdata = dict_new();
+
+ for (i = 0; qd_ext_xattrs[i]; i++) {
+ if (dict_get(dict, qd_ext_xattrs[i])) {
+ ret = dict_set_uint32(state->xdata, qd_ext_xattrs[i], 1);
+ if (ret < 0)
+ goto err;
+ }
+ }
+
+ ret = qd_nameless_lookup(this, frame, args.gfid, state->xdata, volume_uuid,
+ quotad_aggregator_lookup_cbk);
+ if (ret) {
+ rsp.op_errno = ret;
+ goto err;
+ }
+
+ if (dict)
+ dict_unref(dict);
+
+ return ret;
+
+err:
+ rsp.op_ret = -1;
+ rsp.op_errno = op_errno;
+
+ quotad_aggregator_lookup_cbk(this, frame, &rsp);
+ if (dict)
+ dict_unref(dict);
+
+ return ret;
+}
+
+int
+quotad_aggregator_rpc_notify(rpcsvc_t *rpc, void *xl, rpcsvc_event_t event,
+ void *data)
+{
+ if (!xl || !data) {
+ gf_log_callingfn("server", GF_LOG_WARNING,
+ "Calling rpc_notify without initializing");
+ goto out;
+ }
+
+ switch (event) {
+ case RPCSVC_EVENT_ACCEPT:
+ break;
+
+ case RPCSVC_EVENT_DISCONNECT:
+ break;
+
+ default:
+ break;
+ }
+
+out:
+ return 0;
+}
+
+int
+quotad_aggregator_init(xlator_t *this)
+{
+ quota_priv_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+
+ if (priv->rpcsvc) {
+ /* Listener already created */
+ return 0;
+ }
+
+ ret = dict_set_nstrn(this->options, "transport.address-family",
+ SLEN("transport.address-family"), "unix",
+ SLEN("unix"));
+ if (ret)
+ goto out;
+
+ ret = dict_set_nstrn(this->options, "transport-type",
+ SLEN("transport-type"), "socket", SLEN("socket"));
+ if (ret)
+ goto out;
+
+ ret = dict_set_nstrn(this->options, "transport.socket.listen-path",
+ SLEN("transport.socket.listen-path"),
+ "/var/run/gluster/quotad.socket",
+ SLEN("/var/run/gluster/quotad.socket"));
+ if (ret)
+ goto out;
+
+ /* RPC related */
+ priv->rpcsvc = rpcsvc_init(this, this->ctx, this->options, 0);
+ if (priv->rpcsvc == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_RPCSVC_INIT_FAILED,
+ "creation of rpcsvc failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = rpcsvc_create_listeners(priv->rpcsvc, this->options, this->name);
+ if (ret < 1) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ Q_MSG_RPCSVC_LISTENER_CREATION_FAILED,
+ "creation of listener failed");
+ ret = -1;
+ goto out;
+ }
+
+ priv->quotad_aggregator = &quotad_aggregator_prog;
+ quotad_aggregator_prog.options = this->options;
+
+ ret = rpcsvc_program_register(priv->rpcsvc, &quotad_aggregator_prog,
+ _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, Q_MSG_RPCSVC_REGISTER_FAILED,
+ "registration of program (name:%s, prognum:%d, "
+ "progver:%d) failed",
+ quotad_aggregator_prog.progname, quotad_aggregator_prog.prognum,
+ quotad_aggregator_prog.progver);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && priv->rpcsvc) {
+ GF_FREE(priv->rpcsvc);
+ priv->rpcsvc = NULL;
+ }
+
+ return ret;
+}
+
+static rpcsvc_actor_t quotad_aggregator_actors[GF_AGGREGATOR_MAXVALUE] = {
+ [GF_AGGREGATOR_NULL] = {"NULL", NULL, NULL, GF_AGGREGATOR_NULL, DRC_NA, 0},
+ [GF_AGGREGATOR_LOOKUP] = {"LOOKUP", quotad_aggregator_lookup, NULL,
+ GF_AGGREGATOR_NULL, DRC_NA, 0},
+ [GF_AGGREGATOR_GETLIMIT] = {"GETLIMIT", quotad_aggregator_getlimit, NULL,
+ GF_AGGREGATOR_GETLIMIT, DRC_NA, 0},
+};
+
+static struct rpcsvc_program quotad_aggregator_prog = {
+ .progname = "GlusterFS 3.3",
+ .prognum = GLUSTER_AGGREGATOR_PROGRAM,
+ .progver = GLUSTER_AGGREGATOR_VERSION,
+ .numactors = GF_AGGREGATOR_MAXVALUE,
+ .actors = quotad_aggregator_actors};
diff --git a/xlators/features/quota/src/quotad-aggregator.h b/xlators/features/quota/src/quotad-aggregator.h
new file mode 100644
index 00000000000..706592c7d50
--- /dev/null
+++ b/xlators/features/quota/src/quotad-aggregator.h
@@ -0,0 +1,38 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _QUOTAD_AGGREGATOR_H
+#define _QUOTAD_AGGREGATOR_H
+
+#include "quota.h"
+#include <glusterfs/stack.h>
+#include "glusterfs3-xdr.h"
+#include <glusterfs/inode.h>
+
+typedef struct {
+ void *pool;
+ xlator_t *this;
+ xlator_t *active_subvol;
+ inode_table_t *itable;
+ loc_t loc;
+ dict_t *xdata;
+ dict_t *req_xdata;
+} quotad_aggregator_state_t;
+
+typedef int (*quotad_aggregator_lookup_cbk_t)(xlator_t *this,
+ call_frame_t *frame, void *rsp);
+int
+qd_nameless_lookup(xlator_t *this, call_frame_t *frame, char *gfid,
+ dict_t *xdata, char *volume_uuid,
+ quotad_aggregator_lookup_cbk_t lookup_cbk);
+int
+quotad_aggregator_init(xlator_t *this);
+
+#endif
diff --git a/xlators/features/quota/src/quotad-helpers.c b/xlators/features/quota/src/quotad-helpers.c
new file mode 100644
index 00000000000..51ff1d7e98d
--- /dev/null
+++ b/xlators/features/quota/src/quotad-helpers.c
@@ -0,0 +1,107 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "quotad-helpers.h"
+
+quotad_aggregator_state_t *
+get_quotad_aggregator_state(xlator_t *this, rpcsvc_request_t *req)
+{
+ quotad_aggregator_state_t *state = NULL;
+ xlator_t *active_subvol = NULL;
+ quota_priv_t *priv = NULL;
+
+ state = (void *)GF_CALLOC(1, sizeof(*state),
+ gf_quota_mt_aggregator_state_t);
+ if (!state)
+ return NULL;
+
+ state->this = THIS;
+ priv = this->private;
+
+ LOCK(&priv->lock);
+ {
+ active_subvol = state->active_subvol = FIRST_CHILD(this);
+ }
+ UNLOCK(&priv->lock);
+
+ if (active_subvol->itable == NULL)
+ active_subvol->itable = inode_table_new(4096, active_subvol);
+
+ state->itable = active_subvol->itable;
+
+ state->pool = this->ctx->pool;
+
+ return state;
+}
+
+void
+quotad_aggregator_free_state(quotad_aggregator_state_t *state)
+{
+ if (state->xdata)
+ dict_unref(state->xdata);
+
+ if (state->req_xdata)
+ dict_unref(state->req_xdata);
+
+ GF_FREE(state);
+}
+
+call_frame_t *
+quotad_aggregator_alloc_frame(rpcsvc_request_t *req)
+{
+ call_frame_t *frame = NULL;
+ quotad_aggregator_state_t *state = NULL;
+ xlator_t *this = NULL;
+
+ GF_VALIDATE_OR_GOTO("server", req, out);
+ GF_VALIDATE_OR_GOTO("server", req->trans, out);
+ GF_VALIDATE_OR_GOTO("server", req->svc, out);
+ GF_VALIDATE_OR_GOTO("server", req->svc->ctx, out);
+
+ this = req->svc->xl;
+
+ frame = create_frame(this, req->svc->ctx->pool);
+ if (!frame)
+ goto out;
+
+ state = get_quotad_aggregator_state(this, req);
+ if (!state)
+ goto out;
+
+ frame->root->state = state;
+
+ frame->this = this;
+out:
+ return frame;
+}
+
+call_frame_t *
+quotad_aggregator_get_frame_from_req(rpcsvc_request_t *req)
+{
+ call_frame_t *frame = NULL;
+
+ GF_VALIDATE_OR_GOTO("server", req, out);
+
+ frame = quotad_aggregator_alloc_frame(req);
+ if (!frame)
+ goto out;
+
+ frame->root->op = req->procnum;
+
+ frame->root->uid = req->uid;
+ frame->root->gid = req->gid;
+ frame->root->pid = req->pid;
+
+ frame->root->lk_owner = req->lk_owner;
+
+ frame->local = req;
+out:
+ return frame;
+}
diff --git a/xlators/features/quota/src/quotad-helpers.h b/xlators/features/quota/src/quotad-helpers.h
new file mode 100644
index 00000000000..bcb39fe845e
--- /dev/null
+++ b/xlators/features/quota/src/quotad-helpers.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef QUOTAD_HELPERS_H
+#define QUOTAD_HELPERS_H
+
+#include "rpcsvc.h"
+#include "quota.h"
+#include "quotad-aggregator.h"
+
+void
+quotad_aggregator_free_state(quotad_aggregator_state_t *state);
+
+call_frame_t *
+quotad_aggregator_get_frame_from_req(rpcsvc_request_t *req);
+
+#endif
diff --git a/xlators/features/quota/src/quotad.c b/xlators/features/quota/src/quotad.c
new file mode 100644
index 00000000000..643f25c9c2a
--- /dev/null
+++ b/xlators/features/quota/src/quotad.c
@@ -0,0 +1,245 @@
+/*
+ Copyright (c) 2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "quota.h"
+#include "quotad-aggregator.h"
+
+int
+qd_notify(xlator_t *this, int32_t event, void *data, ...)
+{
+ switch (event) {
+ case GF_EVENT_PARENT_UP:
+ quotad_aggregator_init(this);
+ }
+
+ default_notify(this, event, data);
+ return 0;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_quota_mt_end + 1);
+
+ if (0 != ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Memory accounting "
+ "init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+qd_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ quotad_aggregator_lookup_cbk_t lookup_cbk = NULL;
+ gfs3_lookup_rsp rsp = {
+ 0,
+ };
+
+ lookup_cbk = cookie;
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = op_errno;
+
+ gf_stat_from_iatt(&rsp.postparent, postparent);
+
+ GF_PROTOCOL_DICT_SERIALIZE(this, xdata, (&rsp.xdata.xdata_val),
+ rsp.xdata.xdata_len, rsp.op_errno, out);
+
+ gf_stat_from_iatt(&rsp.stat, buf);
+
+out:
+ lookup_cbk(this, frame, &rsp);
+
+ GF_FREE(rsp.xdata.xdata_val);
+
+ inode_unref(inode);
+
+ return 0;
+}
+
+xlator_t *
+qd_find_subvol(xlator_t *this, char *volume_uuid)
+{
+ xlator_list_t *child = NULL;
+ xlator_t *subvol = NULL;
+ char key[1024];
+ int keylen = 0;
+ char *optstr = NULL;
+
+ if (!this || !volume_uuid)
+ goto out;
+
+ for (child = this->children; child; child = child->next) {
+ keylen = snprintf(key, sizeof(key), "%s.volume-id",
+ child->xlator->name);
+ if (dict_get_strn(this->options, key, keylen, &optstr) < 0)
+ continue;
+
+ if (strcmp(optstr, volume_uuid) == 0) {
+ subvol = child->xlator;
+ break;
+ }
+ }
+
+out:
+ return subvol;
+}
+
+int
+qd_nameless_lookup(xlator_t *this, call_frame_t *frame, char *gfid,
+ dict_t *xdata, char *volume_uuid,
+ quotad_aggregator_lookup_cbk_t lookup_cbk)
+{
+ gfs3_lookup_rsp rsp = {
+ 0,
+ };
+ int op_errno = 0, ret = -1;
+ loc_t loc = {
+ 0,
+ };
+ quotad_aggregator_state_t *state = NULL;
+ xlator_t *subvol = NULL;
+
+ state = frame->root->state;
+
+ frame->root->op = GF_FOP_LOOKUP;
+
+ loc.inode = inode_new(state->itable);
+ if (loc.inode == NULL) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ memcpy(loc.gfid, gfid, 16);
+
+ ret = dict_set_int8(xdata, QUOTA_READ_ONLY_KEY, 1);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, Q_MSG_ENOMEM,
+ "dict set failed");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ subvol = qd_find_subvol(this, volume_uuid);
+ if (subvol == NULL) {
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ STACK_WIND_COOKIE(frame, qd_lookup_cbk, lookup_cbk, subvol,
+ subvol->fops->lookup, &loc, xdata);
+ return 0;
+
+out:
+ rsp.op_ret = -1;
+ rsp.op_errno = op_errno;
+
+ lookup_cbk(this, frame, &rsp);
+
+ inode_unref(loc.inode);
+ return 0;
+}
+
+int
+qd_reconfigure(xlator_t *this, dict_t *options)
+{
+ /* As of now quotad is restarted upon alteration of volfile */
+ return 0;
+}
+
+void
+qd_fini(xlator_t *this)
+{
+ quota_priv_t *priv = NULL;
+
+ if (this == NULL || this->private == NULL)
+ goto out;
+
+ priv = this->private;
+
+ if (priv->rpcsvc) {
+ GF_FREE(priv->rpcsvc);
+ priv->rpcsvc = NULL;
+ }
+
+ GF_FREE(priv);
+
+out:
+ return;
+}
+
+int32_t
+qd_init(xlator_t *this)
+{
+ int32_t ret = -1;
+ quota_priv_t *priv = NULL;
+
+ if (NULL == this->children) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "FATAL: quota (%s) not configured for min of 1 child",
+ this->name);
+ ret = -1;
+ goto err;
+ }
+
+ QUOTA_ALLOC_OR_GOTO(priv, quota_priv_t, err);
+ LOCK_INIT(&priv->lock);
+
+ this->private = priv;
+
+ ret = 0;
+err:
+ if (ret) {
+ GF_FREE(priv);
+ }
+ return ret;
+}
+
+struct xlator_fops fops = {};
+
+struct xlator_cbks cbks = {};
+
+struct volume_options options[] = {
+ {.key = {"transport-type"},
+ .value = {"rpc", "rpc-over-rdma", "tcp", "socket", "ib-verbs", "unix",
+ "ib-sdp", "tcp/server", "ib-verbs/server", "rdma",
+ "rdma*([ \t]),*([ \t])socket", "rdma*([ \t]),*([ \t])tcp",
+ "tcp*([ \t]),*([ \t])rdma", "socket*([ \t]),*([ \t])rdma"},
+ .type = GF_OPTION_TYPE_STR},
+ {
+ .key = {"transport.*"},
+ .type = GF_OPTION_TYPE_ANY,
+ },
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = qd_init,
+ .fini = qd_fini,
+ .reconfigure = qd_reconfigure,
+ .notify = qd_notify,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "quotad",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/read-only/src/Makefile.am b/xlators/features/read-only/src/Makefile.am
index 31ae4f34009..e4a2017ef0d 100644
--- a/xlators/features/read-only/src/Makefile.am
+++ b/xlators/features/read-only/src/Makefile.am
@@ -2,20 +2,22 @@ xlator_LTLIBRARIES = read-only.la worm.la
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-noinst_HEADERS = read-only-common.h
+noinst_HEADERS = read-only.h read-only-mem-types.h read-only-common.h worm-helper.h
-read_only_la_LDFLAGS = -module -avoidversion
+read_only_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
read_only_la_SOURCES = read-only.c read-only-common.c
read_only_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-worm_la_LDFLAGS = -module -avoidversion
+worm_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-worm_la_SOURCES = read-only-common.c worm.c
+worm_la_SOURCES = read-only-common.c worm-helper.c worm.c
worm_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS) \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/read-only/src/read-only-common.c b/xlators/features/read-only/src/read-only-common.c
index dbb529ce5cb..9640e7e3eee 100644
--- a/xlators/features/read-only/src/read-only-common.c
+++ b/xlators/features/read-only/src/read-only-common.c
@@ -1,249 +1,406 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#include "read-only.h"
+#include "read-only-mem-types.h"
+#include <glusterfs/defaults.h>
+
+gf_boolean_t
+is_readonly_or_worm_enabled(call_frame_t *frame, xlator_t *this)
+{
+ read_only_priv_t *priv = NULL;
+ gf_boolean_t readonly_or_worm_enabled = _gf_false;
+
+ priv = this->private;
+ GF_ASSERT(priv);
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ readonly_or_worm_enabled = priv->readonly_or_worm_enabled;
-#include "xlator.h"
-#include "defaults.h"
+ if (frame->root->pid < GF_CLIENT_PID_MAX)
+ readonly_or_worm_enabled = _gf_false;
+
+ return readonly_or_worm_enabled;
+}
+
+static int
+_check_key_is_zero_filled(dict_t *d, char *k, data_t *v, void *tmp)
+{
+ if (mem_0filled((const char *)v->data, v->len)) {
+ /* -1 means, no more iterations, treat as 'break' */
+ return -1;
+ }
+ return 0;
+}
int32_t
-ro_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+ro_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND_STRICT (xattrop, frame, -1, EROFS, NULL, xdata);
- return 0;
+ gf_boolean_t allzero = _gf_false;
+ int ret = 0;
+
+ ret = dict_foreach(dict, _check_key_is_zero_filled, NULL);
+ if (ret == 0)
+ allzero = _gf_true;
+
+ if (is_readonly_or_worm_enabled(frame, this) && !allzero)
+ STACK_UNWIND_STRICT(xattrop, frame, -1, EROFS, NULL, xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, flags, dict,
+ xdata);
+ return 0;
}
int32_t
-ro_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+ro_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fxattrop, frame, -1, EROFS, NULL, xdata);
- return 0;
+ gf_boolean_t allzero = _gf_false;
+ int ret = 0;
+
+ ret = dict_foreach(dict, _check_key_is_zero_filled, NULL);
+ if (ret == 0)
+ allzero = _gf_true;
+
+ if (is_readonly_or_worm_enabled(frame, this) && !allzero)
+ STACK_UNWIND_STRICT(fxattrop, frame, -1, EROFS, NULL, xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, flags, dict,
+ xdata);
+
+ return 0;
}
int32_t
-ro_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, const char *basename, entrylk_cmd cmd,
- entrylk_type type, dict_t *xdata)
+ro_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (entrylk, frame, -1, EROFS, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->entrylk,
+ volume, loc, basename, cmd, type, xdata);
+
+ return 0;
}
int32_t
-ro_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type type, dict_t *xdata)
+ro_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (fentrylk, frame, -1, EROFS, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->fentrylk,
+ volume, fd, basename, cmd, type, xdata);
+
+ return 0;
}
int32_t
-ro_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+ro_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (inodelk, frame, -1, EROFS, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->inodelk,
+ volume, loc, cmd, lock, xdata);
+
+ return 0;
}
int32_t
-ro_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata)
+ro_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (finodelk, frame, -1, EROFS, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->finodelk,
+ volume, fd, cmd, lock, xdata);
+
+ return 0;
}
int32_t
-ro_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
- struct gf_flock *flock, dict_t *xdata)
+ro_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
+ struct gf_flock *flock, dict_t *xdata)
{
- STACK_UNWIND_STRICT (lk, frame, -1, EROFS, NULL, xdata);
- return 0;
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk, fd,
+ cmd, flock, xdata);
+
+ return 0;
}
int32_t
-ro_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+ro_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setattr, frame, -1, EROFS, NULL, NULL, xdata);
- return 0;
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(setattr, frame, -1, EROFS, NULL, NULL, xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid,
+ xdata);
+
+ return 0;
}
int32_t
-ro_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid, dict_t *xdata)
+ro_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsetattr, frame, -1, EROFS, NULL, NULL, xdata);
- return 0;
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(fsetattr, frame, -1, EROFS, NULL, NULL, xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid,
+ xdata);
+
+ return 0;
}
+int32_t
+ro_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(truncate, frame, -1, EROFS, NULL, NULL, xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+
+ return 0;
+}
int32_t
-ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata)
+ro_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (truncate, frame, -1, EROFS, NULL, NULL, xdata);
- return 0;
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(ftruncate, frame, -1, EROFS, NULL, NULL, xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+
+ return 0;
}
int32_t
-ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata)
+ro_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
{
- STACK_UNWIND_STRICT (ftruncate, frame, -1, EROFS, NULL, NULL, xdata);
- return 0;
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(fallocate, frame, -1, EROFS, NULL, NULL, xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset,
+ len, xdata);
+ return 0;
}
int
-ro_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata)
+ro_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
{
- STACK_UNWIND_STRICT (mknod, frame, -1, EROFS, NULL, NULL, NULL, NULL, xdata);
- return 0;
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(mknod, frame, -1, EROFS, NULL, NULL, NULL, NULL,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
+ xdata);
+
+ return 0;
}
-
int
-ro_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata)
+ro_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- STACK_UNWIND_STRICT (mkdir, frame, -1, EROFS, NULL, NULL, NULL, NULL, xdata);
- return 0;
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(mkdir, frame, -1, EROFS, NULL, NULL, NULL, NULL,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask,
+ xdata);
+
+ return 0;
}
int32_t
-ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata)
+ro_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (unlink, frame, -1, EROFS, NULL, NULL, xdata);
- return 0;
-}
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(unlink, frame, -1, EROFS, NULL, NULL, xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+}
int
-ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
- dict_t *xdata)
+ro_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (rmdir, frame, -1, EROFS, NULL, NULL, xdata);
- return 0;
-}
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(rmdir, frame, -1, EROFS, NULL, NULL, xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
+ return 0;
+}
int
-ro_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, mode_t umask, dict_t *xdata)
+ro_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
{
- STACK_UNWIND_STRICT (symlink, frame, -1, EROFS, NULL, NULL, NULL,
- NULL, xdata);
- return 0;
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(symlink, frame, -1, EROFS, NULL, NULL, NULL, NULL,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
+ xdata);
+
+ return 0;
}
-
-
int32_t
-ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+ro_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (rename, frame, -1, EROFS, NULL, NULL, NULL, NULL,
- NULL, xdata);
- return 0;
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(rename, frame, -1, EROFS, NULL, NULL, NULL, NULL,
+ NULL, xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+
+ return 0;
}
-
int32_t
-ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata)
+ro_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (link, frame, -1, EROFS, NULL, NULL, NULL, NULL, xdata);
- return 0;
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(link, frame, -1, EROFS, NULL, NULL, NULL, NULL,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+
+ return 0;
}
int32_t
-ro_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+ro_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (create, frame, -1, EROFS, NULL, NULL, NULL,
- NULL, NULL, xdata);
- return 0;
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(create, frame, -1, EROFS, NULL, NULL, NULL, NULL,
+ NULL, xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode,
+ umask, fd, xdata);
+
+ return 0;
}
-
static int32_t
-ro_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd, dict_t *xdata)
+ro_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
- return 0;
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
}
int32_t
-ro_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
+ro_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- if (((flags & O_ACCMODE) == O_WRONLY) ||
- ((flags & O_ACCMODE) == O_RDWR)) {
- STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL, xdata);
- return 0;
- }
+ if (is_readonly_or_worm_enabled(frame, this) &&
+ (((flags & O_ACCMODE) == O_WRONLY) ||
+ ((flags & O_ACCMODE) == O_RDWR))) {
+ STACK_UNWIND_STRICT(open, frame, -1, EROFS, NULL, xdata);
+ return 0;
+ }
- STACK_WIND (frame, ro_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
- return 0;
+ STACK_WIND(frame, ro_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
}
int32_t
-ro_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata)
+ro_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsetxattr, frame, -1, EROFS, xdata);
- return 0;
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(fsetxattr, frame, -1, EROFS, xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
+
+ return 0;
}
int32_t
-ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata)
+ro_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (fsyncdir, frame, -1, EROFS, xdata);
- return 0;
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(fsyncdir, frame, -1, EROFS, xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsyncdir, fd, flags, xdata);
+
+ return 0;
}
int32_t
-ro_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata)
+ro_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t off, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- STACK_UNWIND_STRICT (writev, frame, -1, EROFS, NULL, NULL, xdata);
- return 0;
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(writev, frame, -1, EROFS, NULL, NULL, xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, off,
+ flags, iobref, xdata);
+
+ return 0;
}
-
int32_t
-ro_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata)
+ro_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
{
- STACK_UNWIND_STRICT (setxattr, frame, -1, EROFS, xdata);
- return 0;
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(setxattr, frame, -1, EROFS, xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+
+ return 0;
}
int32_t
-ro_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata)
+ro_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
{
- STACK_UNWIND_STRICT (removexattr, frame, -1, EROFS, xdata);
- return 0;
+ if (is_readonly_or_worm_enabled(frame, this))
+ STACK_UNWIND_STRICT(removexattr, frame, -1, EROFS, xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+
+ return 0;
}
diff --git a/xlators/features/read-only/src/read-only-common.h b/xlators/features/read-only/src/read-only-common.h
index 0a3f7dceb41..5561961ffa2 100644
--- a/xlators/features/read-only/src/read-only-common.h
+++ b/xlators/features/read-only/src/read-only-common.h
@@ -1,125 +1,121 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
-#include "xlator.h"
-#include "defaults.h"
+gf_boolean_t
+is_readonly_or_worm_enabled(call_frame_t *frame, xlator_t *this);
int32_t
-ro_xattrop (call_frame_t *frame, xlator_t *this, loc_t *loc,
- gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+ro_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
int32_t
-ro_fxattrop (call_frame_t *frame, xlator_t *this,
- fd_t *fd, gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
+ro_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata);
int32_t
-ro_entrylk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, const char *basename, entrylk_cmd cmd,
- entrylk_type type, dict_t *xdata);
+ro_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata);
int32_t
-ro_fentrylk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, const char *basename, entrylk_cmd cmd, entrylk_type
- type, dict_t *xdata);
+ro_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata);
int32_t
-ro_inodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- loc_t *loc, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+ro_inodelk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata);
int32_t
-ro_finodelk (call_frame_t *frame, xlator_t *this, const char *volume,
- fd_t *fd, int32_t cmd, struct gf_flock *lock, dict_t *xdata);
+ro_finodelk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ int32_t cmd, struct gf_flock *lock, dict_t *xdata);
int32_t
-ro_lk (call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
- struct gf_flock *flock, dict_t *xdata);
+ro_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int cmd,
+ struct gf_flock *flock, dict_t *xdata);
int32_t
-ro_setattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- struct iatt *stbuf, int32_t valid, dict_t *xdata);
+ro_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
int32_t
-ro_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
- struct iatt *stbuf, int32_t valid, dict_t *xdata);
-
+ro_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata);
int32_t
-ro_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset, dict_t *xdata);
+ro_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata);
int32_t
-ro_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, dict_t *xdata);
+ro_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata);
int
-ro_mknod (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- dev_t rdev, mode_t umask, dict_t *xdata);
+ro_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata);
int
-ro_mkdir (call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
- mode_t umask, dict_t *xdata);
+ro_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata);
int32_t
-ro_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
- dict_t *xdata);
-
-int
-ro_rmdir (call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ro_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
dict_t *xdata);
+int
+ro_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata);
int
-ro_symlink (call_frame_t *frame, xlator_t *this, const char *linkpath,
- loc_t *loc, mode_t umask, dict_t *xdata);
+ro_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata);
+
+int32_t
+ro_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
int32_t
-ro_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata);
+ro_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata);
int32_t
-ro_link (call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc, dict_t *xdata);
+ro_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
int32_t
-ro_create (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata);
+ro_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata);
int32_t
-ro_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata);
+ro_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata);
int32_t
-ro_fsetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
- int32_t flags, dict_t *xdata);
+ro_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata);
int32_t
-ro_fsyncdir (call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags, dict_t *xdata);
+ro_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t off, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata);
int32_t
-ro_writev (call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
- int32_t count, off_t off, uint32_t flags, struct iobref *iobref, dict_t *xdata);
+ro_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata);
int32_t
-ro_setxattr (call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
- int32_t flags, dict_t *xdata);
+ro_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata);
int32_t
-ro_removexattr (call_frame_t *frame, xlator_t *this, loc_t *loc,
- const char *name, dict_t *xdata);
+ro_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata);
diff --git a/xlators/features/read-only/src/read-only-mem-types.h b/xlators/features/read-only/src/read-only-mem-types.h
new file mode 100644
index 00000000000..c67d6c02cd0
--- /dev/null
+++ b/xlators/features/read-only/src/read-only-mem-types.h
@@ -0,0 +1,20 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __READONLY_MEM_TYPES_H__
+#define __READONLY_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_read_only_mem_types_ {
+ gf_read_only_mt_priv_t = gf_common_mt_end + 1,
+ gf_read_only_mt_end
+};
+#endif
diff --git a/xlators/features/read-only/src/read-only.c b/xlators/features/read-only/src/read-only.c
index b11e84f24e8..48654998e63 100644
--- a/xlators/features/read-only/src/read-only.c
+++ b/xlators/features/read-only/src/read-only.c
@@ -1,87 +1,144 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#include "read-only-common.h"
+#include "read-only-mem-types.h"
+#include "read-only.h"
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ ret = xlator_mem_acct_init(this, gf_read_only_mt_end + 1);
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR,
+ "Memory accounting "
+ "initialization failed.");
-#include "xlator.h"
-#include "defaults.h"
-#include "read-only-common.h"
+ return ret;
+}
int32_t
-init (xlator_t *this)
+init(xlator_t *this)
{
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "translator not configured with exactly one child");
- return -1;
- }
-
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
- }
-
- return 0;
+ int ret = -1;
+ read_only_priv_t *priv = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_read_only_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ this->private = priv;
+
+ GF_OPTION_INIT("read-only", priv->readonly_or_worm_enabled, bool, out);
+
+ ret = 0;
+out:
+ return ret;
}
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ read_only_priv_t *priv = NULL;
+ int ret = -1;
+ gf_boolean_t readonly_or_worm_enabled = _gf_false;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GF_OPTION_RECONF("read-only", readonly_or_worm_enabled, options, bool, out);
+ priv->readonly_or_worm_enabled = readonly_or_worm_enabled;
+ ret = 0;
+out:
+ gf_log(this->name, GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
void
-fini (xlator_t *this)
+fini(xlator_t *this)
{
- return;
-}
+ read_only_priv_t *priv = NULL;
+ priv = this->private;
+ if (!priv)
+ return;
+
+ this->private = NULL;
+ GF_FREE(priv);
+
+ return;
+}
struct xlator_fops fops = {
- .mknod = ro_mknod,
- .mkdir = ro_mkdir,
- .unlink = ro_unlink,
- .rmdir = ro_rmdir,
- .symlink = ro_symlink,
- .rename = ro_rename,
- .link = ro_link,
- .truncate = ro_truncate,
- .open = ro_open,
- .writev = ro_writev,
- .setxattr = ro_setxattr,
- .fsetxattr = ro_fsetxattr,
- .removexattr = ro_removexattr,
- .fsyncdir = ro_fsyncdir,
- .ftruncate = ro_ftruncate,
- .create = ro_create,
- .setattr = ro_setattr,
- .fsetattr = ro_fsetattr,
- .xattrop = ro_xattrop,
- .fxattrop = ro_fxattrop,
- .inodelk = ro_inodelk,
- .finodelk = ro_finodelk,
- .entrylk = ro_entrylk,
- .fentrylk = ro_fentrylk,
- .lk = ro_lk,
+ .mknod = ro_mknod,
+ .mkdir = ro_mkdir,
+ .unlink = ro_unlink,
+ .rmdir = ro_rmdir,
+ .symlink = ro_symlink,
+ .rename = ro_rename,
+ .link = ro_link,
+ .truncate = ro_truncate,
+ .open = ro_open,
+ .writev = ro_writev,
+ .setxattr = ro_setxattr,
+ .fsetxattr = ro_fsetxattr,
+ .removexattr = ro_removexattr,
+ .fsyncdir = ro_fsyncdir,
+ .ftruncate = ro_ftruncate,
+ .create = ro_create,
+ .setattr = ro_setattr,
+ .fsetattr = ro_fsetattr,
+ .xattrop = ro_xattrop,
+ .fxattrop = ro_fxattrop,
+ .inodelk = ro_inodelk,
+ .finodelk = ro_finodelk,
+ .entrylk = ro_entrylk,
+ .fentrylk = ro_fentrylk,
+ .lk = ro_lk,
+ .fallocate = ro_fallocate,
};
-struct xlator_cbks cbks = {
-};
+struct xlator_cbks cbks = {};
struct volume_options options[] = {
- { .key = {NULL} },
+ {.key = {"read-only"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ /*.validate_fn = validate_boolean,*/
+ .op_version = {1},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "When \"on\", makes a volume read-only. It is turned "
+ "\"off\" by default."},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "read-only",
+ .category = GF_TECH_PREVIEW,
};
diff --git a/xlators/features/read-only/src/read-only.h b/xlators/features/read-only/src/read-only.h
new file mode 100644
index 00000000000..aced5d3c577
--- /dev/null
+++ b/xlators/features/read-only/src/read-only.h
@@ -0,0 +1,37 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __READONLY_H__
+#define __READONLY_H__
+
+#include <stdint.h> // for uint64_t, uint8_t
+#include <sys/time.h> // for time_t
+#include "glusterfs/glusterfs.h" // for gf_boolean_t
+
+typedef struct {
+ uint8_t worm : 1;
+ uint8_t retain : 1;
+ uint8_t legal_hold : 1;
+ uint8_t ret_mode : 1;
+ int64_t ret_period;
+ int64_t auto_commit_period;
+} worm_reten_state_t;
+
+typedef struct {
+ gf_boolean_t readonly_or_worm_enabled;
+ gf_boolean_t worm_file;
+ gf_boolean_t worm_files_deletable;
+ int64_t reten_period;
+ int64_t com_period;
+ int reten_mode;
+ time_t start_time;
+} read_only_priv_t;
+
+#endif
diff --git a/xlators/features/read-only/src/worm-helper.c b/xlators/features/read-only/src/worm-helper.c
new file mode 100644
index 00000000000..df45f2a940b
--- /dev/null
+++ b/xlators/features/read-only/src/worm-helper.c
@@ -0,0 +1,395 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "read-only-mem-types.h"
+#include "read-only.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/syncop.h>
+#include "worm-helper.h"
+
+/*Function to check whether file is read-only.
+ * The input *stbuf contains the attributes of the file, which is used to check
+ * the write protection bits for all the users of the file.
+ * Return true if all the write bits are disabled,false otherwise*/
+gf_boolean_t
+gf_worm_write_disabled(struct iatt *stbuf)
+{
+ gf_boolean_t ret = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("worm", stbuf, out);
+
+ if (stbuf->ia_prot.owner.write == 0 && stbuf->ia_prot.group.write == 0 &&
+ stbuf->ia_prot.other.write == 0)
+ ret = _gf_true;
+out:
+ return ret;
+}
+
+int32_t
+worm_init_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr)
+{
+ int ret = -1;
+ uint64_t start_time = 0;
+ dict_t *dict = NULL;
+
+ GF_VALIDATE_OR_GOTO("worm", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, file_ptr, out);
+
+ start_time = gf_time();
+ dict = dict_new();
+ if (!dict) {
+ gf_log(this->name, GF_LOG_ERROR, "Error creating the dict");
+ goto out;
+ }
+ ret = dict_set_uint64(dict, "trusted.start_time", start_time);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Error in setting the dict");
+ goto out;
+ }
+ if (fop_with_fd)
+ ret = syncop_fsetxattr(this, (fd_t *)file_ptr, dict, 0, NULL, NULL);
+ else
+ ret = syncop_setxattr(this, (loc_t *)file_ptr, dict, 0, NULL, NULL);
+out:
+ if (dict)
+ dict_unref(dict);
+ return ret;
+}
+
+/*Function to set the retention state for a file.
+ * It loads the WORM/Retention state into the retention_state pointer.*/
+int32_t
+worm_set_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr,
+ worm_reten_state_t *retention_state, struct iatt *stbuf)
+{
+ read_only_priv_t *priv = NULL;
+ struct iatt stpre = {
+ 0,
+ };
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("worm", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, file_ptr, out);
+ GF_VALIDATE_OR_GOTO(this->name, retention_state, out);
+ GF_VALIDATE_OR_GOTO(this->name, stbuf, out);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+ retention_state->worm = 1;
+ retention_state->retain = 1;
+ retention_state->legal_hold = 0;
+ retention_state->ret_mode = priv->reten_mode;
+ retention_state->ret_period = priv->reten_period;
+ retention_state->auto_commit_period = priv->com_period;
+ if (fop_with_fd)
+ ret = syncop_fstat(this, (fd_t *)file_ptr, &stpre, NULL, NULL);
+ else
+ ret = syncop_stat(this, (loc_t *)file_ptr, &stpre, NULL, NULL);
+ if (ret)
+ goto out;
+ stbuf->ia_mtime = stpre.ia_mtime;
+ stbuf->ia_atime = gf_time() + retention_state->ret_period;
+
+ if (fop_with_fd)
+ ret = syncop_fsetattr(this, (fd_t *)file_ptr, stbuf, GF_SET_ATTR_ATIME,
+ NULL, NULL, NULL, NULL);
+ else
+ ret = syncop_setattr(this, (loc_t *)file_ptr, stbuf, GF_SET_ATTR_ATIME,
+ NULL, NULL, NULL, NULL);
+ if (ret)
+ goto out;
+
+ ret = gf_worm_set_xattr(this, retention_state, fop_with_fd, file_ptr);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Error setting xattr");
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+/*This function gets the state of the WORM/Retention xattr and loads it in the
+ * dict pointer.*/
+int32_t
+worm_get_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr,
+ worm_reten_state_t *reten_state)
+{
+ dict_t *dict = NULL;
+ char *val = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("worm", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, file_ptr, out);
+ GF_VALIDATE_OR_GOTO(this->name, reten_state, out);
+
+ if (fop_with_fd)
+ ret = syncop_fgetxattr(this, (fd_t *)file_ptr, &dict,
+ "trusted.reten_state", NULL, NULL);
+ else
+ ret = syncop_getxattr(this, (loc_t *)file_ptr, &dict,
+ "trusted.reten_state", NULL, NULL);
+ if (ret < 0 || !dict) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_get_str(dict, "trusted.reten_state", &val);
+ if (ret) {
+ ret = -2;
+ gf_log(this->name, GF_LOG_ERROR, "Empty val");
+ }
+ gf_worm_deserialize_state(val, reten_state);
+out:
+ if (dict)
+ dict_unref(dict);
+ return ret;
+}
+
+/*Function to lookup the current state of the WORM/Retention profile.
+ * Based on the retain value and the access time of the file, the transition
+ * from WORM/Retention to WORM is made.*/
+void
+gf_worm_state_lookup(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr,
+ worm_reten_state_t *reten_state, struct iatt *stbuf)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("worm", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, file_ptr, out);
+ GF_VALIDATE_OR_GOTO(this->name, reten_state, out);
+ GF_VALIDATE_OR_GOTO(this->name, stbuf, out);
+
+ stbuf->ia_atime -= reten_state->ret_period;
+ reten_state->retain = 0;
+ reten_state->ret_period = 0;
+ reten_state->auto_commit_period = 0;
+ ret = gf_worm_set_xattr(this, reten_state, fop_with_fd, file_ptr);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Error setting xattr");
+ goto out;
+ }
+
+ if (fop_with_fd)
+ ret = syncop_fsetattr(this, (fd_t *)file_ptr, stbuf, GF_SET_ATTR_ATIME,
+ NULL, NULL, NULL, NULL);
+ else
+ ret = syncop_setattr(this, (loc_t *)file_ptr, stbuf, GF_SET_ATTR_ATIME,
+ NULL, NULL, NULL, NULL);
+ if (ret)
+ goto out;
+ gf_log(this->name, GF_LOG_INFO, "Retention state reset");
+out:
+ return;
+}
+
+/*This function serializes and stores the WORM/Retention state of a file in an
+ * uint64_t variable by setting the bits using the bitwise operations.*/
+void
+gf_worm_serialize_state(worm_reten_state_t *reten_state, char *val)
+{
+ uint32_t state = 0;
+
+ GF_VALIDATE_OR_GOTO("worm", reten_state, out);
+ GF_VALIDATE_OR_GOTO("worm", val, out);
+
+ state |= reten_state->worm << 0;
+ state |= reten_state->retain << 1;
+ state |= reten_state->legal_hold << 2;
+ state |= reten_state->ret_mode << 3;
+ sprintf(val, "%d/%" PRIu64 "/%" PRIu64, state, reten_state->ret_period,
+ reten_state->auto_commit_period);
+
+out:
+ return;
+}
+
+/*This function deserializes the data stored in the xattr of the file and loads
+ * the value to the reten_state structure.*/
+void
+gf_worm_deserialize_state(char *val, worm_reten_state_t *reten_state)
+{
+ char *token = NULL;
+ uint32_t state = 0;
+
+ GF_VALIDATE_OR_GOTO("worm", val, out);
+ GF_VALIDATE_OR_GOTO("worm", reten_state, out);
+
+ token = strtok(val, "/");
+ state = atoi(token);
+ reten_state->worm = (state >> 0) & 1;
+ reten_state->retain = (state >> 1) & 1;
+ reten_state->legal_hold = (state >> 2) & 1;
+ reten_state->ret_mode = (state >> 3) & 1;
+ token = strtok(NULL, "/");
+ reten_state->ret_period = atoi(token);
+ token = strtok(NULL, "/");
+ reten_state->auto_commit_period = atoi(token);
+
+out:
+ return;
+}
+
+/*Function to set the xattr for a file.
+ * If the xattr is already present then it will replace that.*/
+int32_t
+gf_worm_set_xattr(xlator_t *this, worm_reten_state_t *reten_state,
+ gf_boolean_t fop_with_fd, void *file_ptr)
+{
+ char val[100] = "";
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ GF_VALIDATE_OR_GOTO("worm", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, reten_state, out);
+ GF_VALIDATE_OR_GOTO(this->name, file_ptr, out);
+
+ gf_worm_serialize_state(reten_state, val);
+ dict = dict_new();
+ if (!dict) {
+ gf_log(this->name, GF_LOG_ERROR, "Error creating the dict");
+ goto out;
+ }
+ ret = dict_set_str(dict, "trusted.reten_state", val);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Error in setting the dict");
+ goto out;
+ }
+ if (fop_with_fd)
+ ret = syncop_fsetxattr(this, (fd_t *)file_ptr, dict, 0, NULL, NULL);
+ else
+ ret = syncop_setxattr(this, (loc_t *)file_ptr, dict, 0, NULL, NULL);
+out:
+ if (dict)
+ dict_unref(dict);
+ return ret;
+}
+
+/*This function checks whether a file's timeout is happened for the state
+ * transition and if yes, then it will do the transition from the current state
+ * to the appropriate state. It also decides whether to continue or to block
+ * the FOP.
+ * Return:
+ * 0 : If the FOP should continue i.e., if the file is not in the WORM-Retained
+ * state or if the FOP is unlink and the file is not in the Retained state.
+ * 1: If the FOP sholud block i.e., if the file is in WORM-Retained/WORM state.
+ * 2: Blocks the FOP if any operation fails while doing the state transition or
+ * fails to get the state of the file.*/
+int
+gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd,
+ void *file_ptr, glusterfs_fop_t op)
+{
+ int op_errno = EROFS;
+ int ret = -1;
+ time_t now = 0;
+ uint64_t com_period = 0;
+ uint64_t start_time = 0;
+ dict_t *dict = NULL;
+ worm_reten_state_t reten_state = {
+ 0,
+ };
+ read_only_priv_t *priv = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ if (fop_with_fd)
+ ret = syncop_fgetxattr(this, (fd_t *)file_ptr, &dict,
+ "trusted.start_time", NULL, NULL);
+ else
+ ret = syncop_getxattr(this, (loc_t *)file_ptr, &dict,
+ "trusted.start_time", NULL, NULL);
+ if (ret < 0 || !dict) {
+ op_errno = ret;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, 0, "Error getting xattr");
+ goto out;
+ }
+ ret = dict_get_uint64(dict, "trusted.start_time", &start_time);
+ if (ret) {
+ op_errno = ret;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, 0, "Error getting start time");
+ goto out;
+ }
+
+ com_period = priv->com_period;
+ if (fop_with_fd)
+ ret = syncop_fstat(this, (fd_t *)file_ptr, &stbuf, NULL, NULL);
+ else
+ ret = syncop_stat(this, (loc_t *)file_ptr, &stbuf, NULL, NULL);
+ if (ret) {
+ op_errno = ret;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, 0, "Error getting file stat");
+ goto out;
+ }
+
+ ret = worm_get_state(this, fop_with_fd, file_ptr, &reten_state);
+ if (ret == -2) {
+ op_errno = ret;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
+ "Error getting worm/retention state");
+ goto out;
+ }
+
+ now = gf_time();
+
+ if (ret == -1 && (now - start_time) >= com_period) {
+ if ((now - stbuf.ia_mtime) >= com_period) {
+ ret = worm_set_state(this, fop_with_fd, file_ptr, &reten_state,
+ &stbuf);
+ if (ret) {
+ op_errno = ret;
+ gf_msg(this->name, GF_LOG_ERROR, -ret, 0,
+ "Error setting worm/retention state");
+ goto out;
+ }
+ goto out;
+ } else {
+ op_errno = 0;
+ goto out;
+ }
+ } else if (ret == -1 && (now - start_time) < com_period) {
+ op_errno = 0;
+ goto out;
+ } else if (reten_state.retain && ((now >= stbuf.ia_atime))) {
+ gf_worm_state_lookup(this, fop_with_fd, file_ptr, &reten_state, &stbuf);
+ }
+ if (reten_state.worm && !reten_state.retain && priv->worm_files_deletable &&
+ op == GF_FOP_UNLINK) {
+ op_errno = 0;
+ goto out;
+ }
+
+out:
+ if (dict)
+ dict_unref(dict);
+ return op_errno;
+}
+
+/*Function to check whether a file is independently WORMed (i.e., file level
+ * WORM is set on the file). */
+int32_t
+is_wormfile(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr)
+{
+ int ret = -1;
+ dict_t *dict = NULL;
+
+ if (fop_with_fd)
+ ret = syncop_fgetxattr(this, (fd_t *)file_ptr, &dict,
+ "trusted.worm_file", NULL, NULL);
+ else
+ ret = syncop_getxattr(this, (loc_t *)file_ptr, &dict,
+ "trusted.worm_file", NULL, NULL);
+ if (dict) {
+ ret = 0;
+ dict_unref(dict);
+ }
+ return ret;
+}
diff --git a/xlators/features/read-only/src/worm-helper.h b/xlators/features/read-only/src/worm-helper.h
new file mode 100644
index 00000000000..b42f8d2b40c
--- /dev/null
+++ b/xlators/features/read-only/src/worm-helper.h
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+gf_boolean_t
+gf_worm_write_disabled(struct iatt *stbuf);
+
+int32_t
+worm_init_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr);
+
+int32_t
+worm_set_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr,
+ worm_reten_state_t *retention_state, struct iatt *stbuf);
+
+int32_t
+worm_get_state(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr,
+ worm_reten_state_t *reten_state);
+
+void
+gf_worm_state_lookup(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr,
+ worm_reten_state_t *reten_state, struct iatt *stbuf);
+
+void
+gf_worm_serialize_state(worm_reten_state_t *reten_state, char *val);
+
+void
+gf_worm_deserialize_state(char *val, worm_reten_state_t *reten_state);
+
+int32_t
+gf_worm_set_xattr(xlator_t *this, worm_reten_state_t *reten_state,
+ gf_boolean_t fop_with_fd, void *file_ptr);
+
+int
+gf_worm_state_transition(xlator_t *this, gf_boolean_t fop_with_fd,
+ void *file_ptr, glusterfs_fop_t op);
+
+int32_t
+is_wormfile(xlator_t *this, gf_boolean_t fop_with_fd, void *file_ptr);
diff --git a/xlators/features/read-only/src/worm.c b/xlators/features/read-only/src/worm.c
index d6c1e1b7dd0..1cc5526d5cd 100644
--- a/xlators/features/read-only/src/worm.c
+++ b/xlators/features/read-only/src/worm.c
@@ -1,100 +1,722 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2008-2012, 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include "read-only-common.h"
+#include "read-only-mem-types.h"
+#include "read-only.h"
+#include <glusterfs/syncop.h>
+#include "worm-helper.h"
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ ret = xlator_mem_acct_init(this, gf_read_only_mt_end + 1);
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR,
+ "Memory accounting "
+ "initialization failed.");
-#include "xlator.h"
-#include "defaults.h"
-#include "read-only-common.h"
+ return ret;
+}
static int32_t
-worm_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
- int32_t op_errno, fd_t *fd, dict_t *xdata)
+worm_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND_STRICT (open, frame, op_ret, op_errno, fd, xdata);
+ if (is_readonly_or_worm_enabled(frame, this) &&
+ (flags & (O_WRONLY | O_RDWR | O_APPEND | O_TRUNC))) {
+ STACK_UNWIND_STRICT(open, frame, -1, EROFS, NULL, NULL);
return 0;
+ }
+
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
+ loc, flags, fd, xdata);
+ return 0;
}
-int32_t
-worm_open (call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
- fd_t *fd, dict_t *xdata)
+static int32_t
+worm_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
{
- if ((((flags & O_ACCMODE) == O_WRONLY) ||
- ((flags & O_ACCMODE) == O_RDWR)) &&
- !(flags & O_APPEND)) {
- STACK_UNWIND_STRICT (open, frame, -1, EROFS, NULL, NULL);
- return 0;
+ int op_errno = EROFS;
+ read_only_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+ if (is_readonly_or_worm_enabled(frame, this))
+ goto out;
+ if (!priv->worm_file || (frame->root->pid < 0)) {
+ op_errno = 0;
+ goto out;
+ }
+
+ gf_uuid_copy(oldloc->gfid, oldloc->inode->gfid);
+ if (is_wormfile(this, _gf_false, oldloc)) {
+ op_errno = 0;
+ goto out;
+ }
+ op_errno = gf_worm_state_transition(this, _gf_false, oldloc, GF_FOP_LINK);
+
+out:
+ if (op_errno) {
+ if (op_errno < 0)
+ op_errno = EROFS;
+ STACK_UNWIND_STRICT(link, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ } else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+}
+
+static int32_t
+worm_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ dict_t *xdata)
+{
+ int op_errno = EROFS;
+ read_only_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+ if (is_readonly_or_worm_enabled(frame, this)) {
+ goto out;
+ }
+ if (!priv->worm_file || (frame->root->pid < 0)) {
+ op_errno = 0;
+ goto out;
+ }
+
+ gf_uuid_copy(loc->gfid, loc->inode->gfid);
+ if (is_wormfile(this, _gf_false, loc)) {
+ op_errno = 0;
+ goto out;
+ }
+ op_errno = gf_worm_state_transition(this, _gf_false, loc, GF_FOP_UNLINK);
+out:
+ if (op_errno) {
+ if (op_errno < 0)
+ op_errno = EROFS;
+ STACK_UNWIND_STRICT(unlink, frame, -1, op_errno, NULL, NULL, NULL);
+ } else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, flags, xdata);
+ return 0;
+}
+
+static int32_t
+worm_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int op_errno = EROFS;
+ read_only_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+ if (is_readonly_or_worm_enabled(frame, this))
+ goto out;
+ if (!priv->worm_file || (frame->root->pid < 0)) {
+ op_errno = 0;
+ goto out;
+ }
+
+ gf_uuid_copy(oldloc->gfid, oldloc->inode->gfid);
+ if (is_wormfile(this, _gf_false, oldloc)) {
+ op_errno = 0;
+ goto check_newloc;
+ }
+ op_errno = gf_worm_state_transition(this, _gf_false, oldloc, GF_FOP_RENAME);
+
+ if (op_errno == 0) {
+ check_newloc:
+ if (newloc->inode != NULL) {
+ gf_uuid_copy(newloc->gfid, newloc->inode->gfid);
+ if (is_wormfile(this, _gf_false, newloc)) {
+ op_errno = 0;
+ goto out;
+ }
+ op_errno = gf_worm_state_transition(this, _gf_false, newloc,
+ GF_FOP_RENAME);
}
+ }
- STACK_WIND (frame, worm_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
- return 0;
+out:
+ if (op_errno) {
+ if (op_errno < 0)
+ op_errno = EROFS;
+ STACK_UNWIND_STRICT(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+ } else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ return 0;
}
-int32_t
-init (xlator_t *this)
+static int32_t
+worm_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "translator not configured with exactly one child");
- return -1;
+ int op_errno = EROFS;
+ read_only_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+ if (is_readonly_or_worm_enabled(frame, this))
+ goto out;
+ if (!priv->worm_file || (frame->root->pid < 0)) {
+ op_errno = 0;
+ goto out;
+ }
+
+ if (is_wormfile(this, _gf_false, loc)) {
+ op_errno = 0;
+ goto out;
+ }
+ op_errno = gf_worm_state_transition(this, _gf_false, loc, GF_FOP_TRUNCATE);
+
+out:
+ if (op_errno) {
+ if (op_errno < 0)
+ op_errno = EROFS;
+ STACK_UNWIND_STRICT(truncate, frame, -1, op_errno, NULL, NULL, NULL);
+ } else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+}
+
+static int32_t
+worm_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ int op_errno = EROFS;
+ read_only_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+ if (is_readonly_or_worm_enabled(frame, this))
+ goto out;
+ if (!priv->worm_file || (frame->root->pid < 0)) {
+ op_errno = 0;
+ goto out;
+ }
+
+ if (is_wormfile(this, _gf_true, fd)) {
+ op_errno = 0;
+ goto out;
+ }
+ op_errno = gf_worm_state_transition(this, _gf_true, fd, GF_FOP_FTRUNCATE);
+
+out:
+ if (op_errno) {
+ if (op_errno < 0)
+ op_errno = EROFS;
+ STACK_UNWIND_STRICT(ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+ } else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+}
+
+static int32_t
+worm_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ gf_boolean_t rd_only = _gf_false;
+ worm_reten_state_t reten_state = {
+ 0,
+ };
+ struct iatt stpre = {
+ 0,
+ };
+ read_only_priv_t *priv = NULL;
+ int op_errno = EROFS;
+ int ret = -1;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+ if (!priv->worm_file) {
+ op_errno = 0;
+ goto out;
+ }
+
+ if (is_wormfile(this, _gf_false, loc)) {
+ op_errno = 0;
+ goto out;
+ }
+ if (valid & GF_SET_ATTR_MODE) {
+ rd_only = gf_worm_write_disabled(stbuf);
+ if (!rd_only) {
+ op_errno = 0;
+ goto out;
}
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
+ ret = worm_set_state(this, _gf_false, loc, &reten_state, stbuf);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Error setting worm state");
+ goto out;
+ }
+ } else if (valid & GF_SET_ATTR_ATIME) {
+ ret = worm_get_state(this, _gf_false, loc, &reten_state);
+ if (ret) {
+ op_errno = 0;
+ goto out;
}
+ if (reten_state.retain) {
+ ret = syncop_stat(this, loc, &stpre, NULL, NULL);
+ if (ret)
+ goto out;
+ if (reten_state.ret_mode == 0) {
+ if (stbuf->ia_atime < stpre.ia_mtime) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Cannot set atime less than "
+ "the mtime for a WORM-Retained "
+ "file");
+ goto out;
+ }
+ } else {
+ if (stbuf->ia_atime < stpre.ia_atime) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Cannot decrease the atime of a"
+ " WORM-Retained file in "
+ "Enterprise mode");
+ goto out;
+ }
+ }
+ reten_state.ret_period = reten_state.ret_period + stbuf->ia_atime -
+ stpre.ia_atime;
+ ret = gf_worm_set_xattr(this, &reten_state, _gf_false, loc);
+ if (ret) {
+ goto out;
+ }
+ stbuf->ia_mtime = stpre.ia_mtime;
+ }
+ }
+ op_errno = 0;
- return 0;
+out:
+ if (op_errno)
+ STACK_UNWIND_STRICT(setattr, frame, -1, EROFS, NULL, NULL, NULL);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid,
+ xdata);
+ return 0;
}
+static int32_t
+worm_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ gf_boolean_t rd_only = _gf_false;
+ worm_reten_state_t reten_state = {
+ 0,
+ };
+ struct iatt stpre = {
+ 0,
+ };
+ read_only_priv_t *priv = NULL;
+ int op_errno = EROFS;
+ int ret = -1;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+ if (!priv->worm_file) {
+ op_errno = 0;
+ goto out;
+ }
+
+ if (is_wormfile(this, _gf_true, fd)) {
+ op_errno = 0;
+ goto out;
+ }
+ if (valid & GF_SET_ATTR_MODE) {
+ rd_only = gf_worm_write_disabled(stbuf);
+ if (!rd_only) {
+ op_errno = 0;
+ goto out;
+ }
+
+ ret = worm_set_state(this, _gf_true, fd, &reten_state, stbuf);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Error setting worm state");
+ goto out;
+ }
+ } else if (valid & GF_SET_ATTR_ATIME) {
+ ret = worm_get_state(this, _gf_true, fd, &reten_state);
+ if (ret) {
+ op_errno = 0;
+ goto out;
+ }
+ if (reten_state.retain) {
+ ret = syncop_fstat(this, fd, &stpre, NULL, NULL);
+ if (ret)
+ goto out;
+ if (reten_state.ret_mode == 0) {
+ if (stbuf->ia_atime < stpre.ia_mtime) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Cannot set atime less than "
+ "the mtime for a WORM-Retained "
+ "file");
+ goto out;
+ }
+ } else {
+ if (stbuf->ia_atime < stpre.ia_atime) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Cannot decrease the atime of a"
+ " WORM-Retained file in "
+ "Enterprise mode");
+ goto out;
+ }
+ }
+ reten_state.ret_period = reten_state.ret_period + stbuf->ia_atime -
+ stpre.ia_atime;
+ ret = gf_worm_set_xattr(this, &reten_state, _gf_true, fd);
+ if (ret) {
+ goto out;
+ }
+
+ stbuf->ia_mtime = stpre.ia_mtime;
+ }
+ }
+ op_errno = 0;
+
+out:
+ if (op_errno)
+ STACK_UNWIND_STRICT(fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid,
+ xdata);
+ return 0;
+}
+
+static int32_t
+worm_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ read_only_priv_t *priv = NULL;
+ int op_errno = EROFS;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+ if (!priv->worm_file || (frame->root->pid < 0)) {
+ op_errno = 0;
+ goto out;
+ }
+ if (is_wormfile(this, _gf_true, fd)) {
+ op_errno = 0;
+ goto out;
+ }
+ op_errno = gf_worm_state_transition(this, _gf_true, fd, GF_FOP_WRITE);
+
+out:
+ if (op_errno) {
+ if (op_errno < 0)
+ op_errno = EROFS;
+ STACK_UNWIND_STRICT(writev, frame, -1, op_errno, NULL, NULL, NULL);
+ } else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count,
+ offset, flags, iobref, xdata);
+ return 0;
+}
+
+static int32_t
+worm_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int ret = 0;
+ read_only_priv_t *priv = NULL;
+ // In case of an error exit because fd can be NULL and this would
+ // cause an segfault when performing fsetxattr . We explicitly
+ // unwind to avoid future problems
+ if (op_ret < 0) {
+ goto out;
+ }
+
+ priv = this->private;
+ GF_ASSERT(priv);
+ if (priv->worm_file) {
+ ret = fd_ctx_set(fd, this, 1);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to set the fd ctx "
+ "for gfid:%s . Worm feature may not work for the gfid",
+ uuid_utoa(inode->gfid));
+ }
+ ret = worm_init_state(this, _gf_true, fd);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Error initializing state");
+ }
+ }
+
+out:
+ STACK_UNWIND_STRICT(create, frame, op_ret, op_errno, fd, inode, buf,
+ preparent, postparent, xdata);
+ return ret;
+}
+
+static int32_t
+worm_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND(frame, worm_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+}
+
+static void
+set_reten_mode(read_only_priv_t *priv, char *reten_mode)
+{
+ if (strcmp(reten_mode, "relax") == 0)
+ priv->reten_mode = 0;
+ else
+ priv->reten_mode = 1;
+}
+
+int32_t
+init(xlator_t *this)
+{
+ int ret = -1;
+ read_only_priv_t *priv = NULL;
+ char *reten_mode = NULL;
+
+ if (!this->children || this->children->next) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "translator not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
+ }
+
+ this->local_pool = mem_pool_new(read_only_priv_t, 64);
+ if (!this->local_pool) {
+ ret = -1;
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to create read_only_priv_t's memory pool");
+ goto out;
+ }
+
+ priv = mem_get0(this->local_pool);
+ if (!priv) {
+ gf_log(this->name, GF_LOG_ERROR, "Error allocating priv");
+ goto out;
+ }
+
+ this->private = priv;
+
+ GF_OPTION_INIT("worm", priv->readonly_or_worm_enabled, bool, out);
+ GF_OPTION_INIT("worm-file-level", priv->worm_file, bool, out);
+ GF_OPTION_INIT("default-retention-period", priv->reten_period, int64, out);
+ GF_OPTION_INIT("auto-commit-period", priv->com_period, int64, out);
+ GF_OPTION_INIT("retention-mode", reten_mode, str, out);
+ set_reten_mode(priv, reten_mode);
+ GF_OPTION_INIT("worm-files-deletable", priv->worm_files_deletable, bool,
+ out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ read_only_priv_t *priv = NULL;
+ char *reten_mode = NULL;
+ int ret = -1;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GF_OPTION_RECONF("worm", priv->readonly_or_worm_enabled, options, bool,
+ out);
+ GF_OPTION_RECONF("worm-file-level", priv->worm_file, options, bool, out);
+ GF_OPTION_RECONF("default-retention-period", priv->reten_period, options,
+ int64, out);
+ GF_OPTION_RECONF("retention-mode", reten_mode, options, str, out);
+ set_reten_mode(priv, reten_mode);
+ GF_OPTION_RECONF("auto-commit-period", priv->com_period, options, int64,
+ out);
+ GF_OPTION_RECONF("worm-files-deletable", priv->worm_files_deletable,
+ options, bool, out);
+ ret = 0;
+out:
+ gf_log(this->name, GF_LOG_DEBUG, "returning %d", ret);
+ return ret;
+}
void
-fini (xlator_t *this)
+fini(xlator_t *this)
{
- return;
+ read_only_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+ mem_put(priv);
+ this->private = NULL;
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+out:
+ return;
}
struct xlator_fops fops = {
- .open = worm_open,
-
- .unlink = ro_unlink,
- .rmdir = ro_rmdir,
- .rename = ro_rename,
- .truncate = ro_truncate,
- .removexattr = ro_removexattr,
- .fsyncdir = ro_fsyncdir,
- .xattrop = ro_xattrop,
- .inodelk = ro_inodelk,
- .finodelk = ro_finodelk,
- .entrylk = ro_entrylk,
- .fentrylk = ro_fentrylk,
- .lk = ro_lk,
+ .open = worm_open,
+ .writev = worm_writev,
+ .setattr = worm_setattr,
+ .fsetattr = worm_fsetattr,
+ .rename = worm_rename,
+ .link = worm_link,
+ .unlink = worm_unlink,
+ .truncate = worm_truncate,
+ .ftruncate = worm_ftruncate,
+ .create = worm_create,
+
+ .rmdir = ro_rmdir,
+ .removexattr = ro_removexattr,
+ .fsyncdir = ro_fsyncdir,
+ .xattrop = ro_xattrop,
+ .inodelk = ro_inodelk,
+ .finodelk = ro_finodelk,
+ .entrylk = ro_entrylk,
+ .fentrylk = ro_fentrylk,
+ .lk = ro_lk,
};
+int32_t
+worm_release(xlator_t *this, fd_t *fd)
+{
+ dict_t *dict = NULL;
+ int ret = -1;
+ dict = dict_new();
+ uint64_t value = 0;
+ loc_t loc = {
+ 0,
+ };
+ read_only_priv_t *priv = NULL;
+ priv = this->private;
+
+ if (priv->worm_file) {
+ if (!dict) {
+ gf_log(this->name, GF_LOG_ERROR, "Error creating the dict");
+ goto out;
+ }
+
+ ret = fd_ctx_get(fd, this, &value);
+ if (ret) {
+ gf_log(this->name, GF_LOG_DEBUG, "Failed to get the fd ctx");
+ }
+ if (!value) {
+ goto out;
+ }
+
+ ret = dict_set_int8(dict, "trusted.worm_file", 1);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Error in setting "
+ "the dict");
+ goto out;
+ }
+
+ loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(loc.gfid, fd->inode->gfid);
+ ret = syncop_setxattr(this, &loc, dict, 0, NULL, NULL);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Error setting xattr");
+ goto out;
+ }
+
+ gf_worm_state_transition(this, _gf_false, &loc, GF_FOP_WRITE);
+ }
+
+out:
+ loc_wipe(&loc);
+ if (dict)
+ dict_unref(dict);
+ return 0;
+}
+
struct xlator_cbks cbks = {
+ .release = worm_release,
};
struct volume_options options[] = {
- { .key = {NULL} },
+ {.key = {"worm"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ /*.validate_fn = validate_boolean,*/
+ .op_version = {2},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "When \"on\", makes a volume get write once read many "
+ " feature. It is turned \"off\" by default."},
+ {.key = {"worm-file-level"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ /*.validate_fn = validate_boolean,*/
+ .op_version = {GD_OP_VERSION_3_8_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "When \"on\", activates the file level worm. "
+ "It is turned \"off\" by default."},
+ {.key = {"worm-files-deletable"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ /*.validate_fn = validate_boolean,*/
+ .op_version = {GD_OP_VERSION_3_13_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "When \"off\", doesn't allow the Worm files"
+ "to be deleted. It is turned \"on\" by default."},
+ {.key = {"default-retention-period"},
+ .type = GF_OPTION_TYPE_TIME,
+ .default_value = "120",
+ /*.validate_fn = validate_worm_period,*/
+ .op_version = {GD_OP_VERSION_3_8_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "The default retention period for the files."},
+ {.key = {"retention-mode"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = "relax",
+ /*.validate_fn = validate_reten_mode,*/
+ .op_version = {GD_OP_VERSION_3_8_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "The mode of retention (relax/enterprise). "
+ "It is relax by default."},
+ {.key = {"auto-commit-period"},
+ .type = GF_OPTION_TYPE_TIME,
+ .default_value = "180",
+ /*.validate_fn = validate_worm_period,*/
+ .op_version = {GD_OP_VERSION_3_8_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .description = "Auto commit period for the files."},
+ {.key = {NULL}},
};
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "worm",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/sdfs/Makefile.am b/xlators/features/sdfs/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/sdfs/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/sdfs/src/Makefile.am b/xlators/features/sdfs/src/Makefile.am
new file mode 100644
index 00000000000..6118d46ad22
--- /dev/null
+++ b/xlators/features/sdfs/src/Makefile.am
@@ -0,0 +1,19 @@
+if WITH_SERVER
+xlator_LTLIBRARIES = sdfs.la
+endif
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+sdfs_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+sdfs_la_SOURCES = sdfs.c
+sdfs_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = sdfs.h sdfs-messages.h $(top_builddir)/xlators/lib/src/libxlator.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/xlators/lib/src \
+ -I$(top_srcdir)/rpc/xdr/src/ -I$(top_builddir)/rpc/xdr/src/
+
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/sdfs/src/sdfs-messages.h b/xlators/features/sdfs/src/sdfs-messages.h
new file mode 100644
index 00000000000..3053efa8935
--- /dev/null
+++ b/xlators/features/sdfs/src/sdfs-messages.h
@@ -0,0 +1,67 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _DFS_MESSAGES_H_
+#define _DFS_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* file bit-rot-bitd-messages.h
+ * brief SDFS log-message IDs and their descriptions
+ */
+
+/* NOTE: Rules for message additions
+ * 1) Each instance of a message is _better_ left with a unique message ID, even
+ * if the message format is the same. Reasoning is that, if the message
+ * format needs to change in one instance, the other instances are not
+ * impacted or the new change does not change the ID of the instance being
+ * modified.
+ * 2) Addition of a message,
+ * - Should increment the GLFS_NUM_MESSAGES
+ * - Append to the list of messages defined, towards the end
+ * - Retain macro naming as glfs_msg_X (for redability across developers)
+ * NOTE: Rules for message format modifications
+ * 3) Check acorss the code if the message ID macro in question is reused
+ * anywhere. If reused then then the modifications should ensure correctness
+ * everywhere, or needs a new message ID as (1) above was not adhered to. If
+ * not used anywhere, proceed with the required modification.
+ * NOTE: Rules for message deletion
+ * 4) Check (3) and if used anywhere else, then cannot be deleted. If not used
+ * anywhere, then can be deleted, but will leave a hole by design, as
+ * addition rules specify modification to the end of the list and not filling
+ * holes.
+ */
+
+#define GLFS_SDFS_BASE GLFS_MSGID_COMP_SDFS
+#define GLFS_SDFS_NUM_MESSAGES 2
+#define GLFS_MSGID_END (GLFS_SDFS_BASE + GLFS_SDFS_NUM_MESSAGES + 1)
+/* Messaged with message IDs */
+#define glfs_msg_start_x GLFS_DFS_BASE, "Invalid: Start of messages"
+/*------------*/
+
+#define SDFS_MSG_ENTRYLK_ERROR (GLFS_SDFS_BASE + 1)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+
+#define SDFS_MSG_MKDIR_ERROR (GLFS_SDFS_BASE + 2)
+/*!
+ * @messageid
+ * @diagnosis
+ * @recommendedaction
+ *
+ */
+/*------------*/
+
+#define glfs_msg_end_x GLFS_MSGID_END, "Invalid: End of messages"
+#endif /* !_SDFS_MESSAGES_H_ */
diff --git a/xlators/features/sdfs/src/sdfs.c b/xlators/features/sdfs/src/sdfs.c
new file mode 100644
index 00000000000..aaf13f0852e
--- /dev/null
+++ b/xlators/features/sdfs/src/sdfs.c
@@ -0,0 +1,1479 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <libgen.h>
+#include "sdfs.h"
+
+static int
+sdfs_frame_return(call_frame_t *frame)
+{
+ sdfs_local_t *local = NULL;
+
+ if (!frame)
+ return -1;
+
+ local = frame->local;
+
+ return GF_ATOMIC_DEC(local->call_cnt);
+}
+
+static void
+sdfs_lock_free(sdfs_entry_lock_t *entrylk)
+{
+ if (entrylk == NULL)
+ goto out;
+
+ loc_wipe(&entrylk->parent_loc);
+ GF_FREE(entrylk->basename);
+
+out:
+ return;
+}
+
+static void
+sdfs_lock_array_free(sdfs_lock_t *lock)
+{
+ sdfs_entry_lock_t *entrylk = NULL;
+ int i = 0;
+
+ if (lock == NULL)
+ goto out;
+
+ for (i = 0; i < lock->lock_count; i++) {
+ entrylk = &lock->entrylk[i];
+ sdfs_lock_free(entrylk);
+ }
+
+out:
+ return;
+}
+
+static void
+sdfs_local_cleanup(sdfs_local_t *local)
+{
+ if (!local)
+ return;
+
+ loc_wipe(&local->loc);
+ loc_wipe(&local->parent_loc);
+
+ if (local->stub) {
+ call_stub_destroy(local->stub);
+ local->stub = NULL;
+ }
+
+ sdfs_lock_array_free(local->lock);
+ GF_FREE(local->lock);
+
+ mem_put(local);
+}
+
+static int
+sdfs_build_parent_loc(loc_t *parent, loc_t *child)
+{
+ int ret = -1;
+ char *path = NULL;
+
+ if (!child->parent) {
+ goto out;
+ }
+ parent->inode = inode_ref(child->parent);
+ path = gf_strdup(child->path);
+ if (!path) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ parent->path = dirname(path);
+ if (!parent->path) {
+ goto out;
+ }
+
+ gf_uuid_copy(parent->gfid, child->pargfid);
+ return 0;
+
+out:
+ GF_FREE(path);
+ return ret;
+}
+
+static sdfs_local_t *
+sdfs_local_init(call_frame_t *frame, xlator_t *this)
+{
+ sdfs_local_t *local = NULL;
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto out;
+
+ frame->local = local;
+out:
+ return local;
+}
+
+static int
+sdfs_get_new_frame_common(call_frame_t *frame, call_frame_t **new_frame)
+{
+ int ret = -1;
+ sdfs_local_t *local = NULL;
+ client_t *client = NULL;
+
+ *new_frame = copy_frame(frame);
+ if (!*new_frame) {
+ goto err;
+ }
+
+ client = frame->root->client;
+ gf_client_ref(client);
+ (*new_frame)->root->client = client;
+
+ local = sdfs_local_init(*new_frame, THIS);
+ if (!local) {
+ goto err;
+ }
+
+ local->main_frame = frame;
+ /*Set unique lk-owner for the fop*/
+ set_lk_owner_from_ptr(&(*new_frame)->root->lk_owner, (*new_frame)->root);
+
+ ret = 0;
+err:
+ if ((ret == -1) && (*new_frame)) {
+ SDFS_STACK_DESTROY((*new_frame));
+ *new_frame = NULL;
+ }
+
+ return ret;
+}
+
+static int
+sdfs_get_new_frame(call_frame_t *frame, loc_t *loc, call_frame_t **new_frame)
+{
+ int ret = -1;
+ sdfs_local_t *local = NULL;
+
+ ret = sdfs_get_new_frame_common(frame, new_frame);
+ if (ret < 0) {
+ goto err;
+ }
+
+ local = (*new_frame)->local;
+
+ ret = sdfs_build_parent_loc(&local->parent_loc, loc);
+ if (ret) {
+ goto err;
+ }
+
+ ret = loc_copy(&local->loc, loc);
+ if (ret == -1) {
+ goto err;
+ }
+
+ ret = 0;
+err:
+ if (ret && (*new_frame)) {
+ SDFS_STACK_DESTROY((*new_frame));
+ *new_frame = NULL;
+ ret = -1;
+ }
+
+ return ret;
+}
+
+static int
+sdfs_get_new_frame_readdirp(call_frame_t *frame, fd_t *fd,
+ call_frame_t **new_frame)
+{
+ int ret = -1;
+ sdfs_local_t *local = NULL;
+
+ ret = sdfs_get_new_frame_common(frame, new_frame);
+ if (ret < 0) {
+ goto err;
+ }
+
+ local = (*new_frame)->local;
+ local->parent_loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->parent_loc.gfid, fd->inode->gfid);
+
+ ret = 0;
+err:
+ return ret;
+}
+
+int
+sdfs_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ call_stub_t *stub = NULL;
+
+ local = frame->local;
+
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+
+ if (local->stub) {
+ stub = local->stub;
+ local->stub = NULL;
+ call_resume(stub);
+ } else {
+ if (op_ret < 0)
+ gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR,
+ "Unlocking entry lock failed for %s", local->loc.name);
+
+ SDFS_STACK_DESTROY(frame);
+ }
+
+ return 0;
+}
+
+int
+sdfs_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+
+ local = frame->local;
+
+ STACK_UNWIND_STRICT(mkdir, local->main_frame, op_ret, op_errno, inode,
+ stbuf, preparent, postparent, xdata);
+
+ local->main_frame = NULL;
+ STACK_WIND(frame, sdfs_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc,
+ local->loc.name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata);
+ return 0;
+}
+
+int
+sdfs_mkdir_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+ int op_errno = -1;
+
+ local = frame->local;
+
+ gf_uuid_unparse(loc->pargfid, gfid);
+
+ if (local->op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR,
+ "Acquiring entry lock failed for directory %s "
+ "with parent gfid %s",
+ local->loc.name, gfid);
+ op_errno = local->op_errno;
+ goto err;
+ }
+
+ STACK_WIND(frame, sdfs_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(mkdir, local->main_frame, -1, op_errno, NULL, NULL,
+ NULL, NULL, NULL);
+
+ local->main_frame = NULL;
+ SDFS_STACK_DESTROY(frame);
+ return 0;
+}
+
+int
+sdfs_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ call_frame_t *new_frame = NULL;
+ call_stub_t *stub = NULL;
+ int op_errno = 0;
+
+ if (-1 == sdfs_get_new_frame(frame, loc, &new_frame)) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ stub = fop_mkdir_stub(new_frame, sdfs_mkdir_helper, loc, mode, umask,
+ xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local = new_frame->local;
+ local->stub = stub;
+
+ STACK_WIND(new_frame, sdfs_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc,
+ local->loc.name, ENTRYLK_LOCK, ENTRYLK_WRLCK, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+
+ if (new_frame)
+ SDFS_STACK_DESTROY(new_frame);
+
+ return 0;
+}
+
+int
+sdfs_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+
+ local = frame->local;
+
+ STACK_UNWIND_STRICT(rmdir, local->main_frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+
+ local->main_frame = NULL;
+ STACK_WIND(frame, sdfs_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc,
+ local->loc.name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata);
+ return 0;
+}
+
+int
+sdfs_rmdir_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+
+ gf_uuid_unparse(loc->pargfid, gfid);
+
+ if (local->op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR,
+ "Acquiring entry lock failed for directory %s "
+ "with parent gfid %s",
+ local->loc.name, gfid);
+ goto err;
+ }
+
+ STACK_WIND(frame, sdfs_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(rmdir, local->main_frame, -1, local->op_errno, NULL,
+ NULL, NULL);
+
+ local->main_frame = NULL;
+ SDFS_STACK_DESTROY(frame);
+ return 0;
+}
+
+int
+sdfs_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ call_frame_t *new_frame = NULL;
+ call_stub_t *stub = NULL;
+ int op_errno = 0;
+
+ if (-1 == sdfs_get_new_frame(frame, loc, &new_frame)) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ stub = fop_rmdir_stub(new_frame, sdfs_rmdir_helper, loc, flags, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local = new_frame->local;
+ local->stub = stub;
+
+ STACK_WIND(new_frame, sdfs_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc,
+ local->loc.name, ENTRYLK_LOCK, ENTRYLK_WRLCK, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(rmdir, frame, -1, op_errno, NULL, NULL, NULL);
+
+ if (new_frame)
+ SDFS_STACK_DESTROY(new_frame);
+
+ return 0;
+}
+
+int
+sdfs_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+
+ local = frame->local;
+
+ STACK_UNWIND_STRICT(create, local->main_frame, op_ret, op_errno, fd, inode,
+ stbuf, preparent, postparent, xdata);
+
+ local->main_frame = NULL;
+ STACK_WIND(frame, sdfs_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc,
+ local->loc.name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata);
+ return 0;
+}
+
+int
+sdfs_create_helper(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+
+ gf_uuid_unparse(loc->pargfid, gfid);
+
+ if (local->op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR,
+ "Acquiring entry lock failed for directory %s "
+ "with parent gfid %s",
+ local->loc.name, gfid);
+ goto err;
+ }
+
+ STACK_WIND(frame, sdfs_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(create, local->main_frame, -1, local->op_errno, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+
+ local->main_frame = NULL;
+ SDFS_STACK_DESTROY(frame);
+ return 0;
+}
+
+int
+sdfs_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ call_frame_t *new_frame = NULL;
+ call_stub_t *stub = NULL;
+ int op_errno = 0;
+
+ if (-1 == sdfs_get_new_frame(frame, loc, &new_frame)) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ stub = fop_create_stub(new_frame, sdfs_create_helper, loc, flags, mode,
+ umask, fd, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local = new_frame->local;
+ local->stub = stub;
+
+ STACK_WIND(new_frame, sdfs_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc,
+ local->loc.name, ENTRYLK_LOCK, ENTRYLK_WRLCK, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(create, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+
+ if (new_frame)
+ SDFS_STACK_DESTROY(new_frame);
+
+ return 0;
+}
+
+int
+sdfs_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+
+ local = frame->local;
+
+ STACK_UNWIND_STRICT(unlink, local->main_frame, op_ret, op_errno, preparent,
+ postparent, xdata);
+
+ local->main_frame = NULL;
+ STACK_WIND(frame, sdfs_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc,
+ local->loc.name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata);
+ return 0;
+}
+
+int
+sdfs_unlink_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+
+ gf_uuid_unparse(loc->pargfid, gfid);
+
+ if (local->op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR,
+ "Acquiring entry lock failed for directory %s "
+ "with parent gfid %s",
+ local->loc.name, gfid);
+ goto err;
+ }
+
+ STACK_WIND(frame, sdfs_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, flags, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(unlink, local->main_frame, -1, local->op_errno, NULL,
+ NULL, NULL);
+
+ local->main_frame = NULL;
+ SDFS_STACK_DESTROY(frame);
+ return 0;
+}
+
+int
+sdfs_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ call_frame_t *new_frame = NULL;
+ call_stub_t *stub = NULL;
+ int op_errno = 0;
+
+ if (-1 == sdfs_get_new_frame(frame, loc, &new_frame)) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ stub = fop_unlink_stub(new_frame, sdfs_unlink_helper, loc, flags, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local = new_frame->local;
+ local->stub = stub;
+
+ STACK_WIND(new_frame, sdfs_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc,
+ local->loc.name, ENTRYLK_LOCK, ENTRYLK_WRLCK, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(unlink, frame, -1, op_errno, NULL, NULL, NULL);
+
+ if (new_frame)
+ SDFS_STACK_DESTROY(new_frame);
+
+ return 0;
+}
+
+int
+sdfs_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+
+ local = frame->local;
+
+ STACK_UNWIND_STRICT(link, local->main_frame, op_ret, op_errno, inode, stbuf,
+ preparent, postparent, xdata);
+
+ local->main_frame = NULL;
+ STACK_WIND(frame, sdfs_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc,
+ local->loc.name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata);
+ return 0;
+}
+
+int
+sdfs_symlink_helper(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+
+ gf_uuid_unparse(loc->pargfid, gfid);
+
+ if (local->op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR,
+ "Acquiring entry lock failed for directory %s "
+ "with parent gfid %s",
+ local->loc.name, gfid);
+ goto err;
+ }
+
+ STACK_WIND(frame, sdfs_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkname, loc, umask, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(link, local->main_frame, -1, local->op_errno, NULL,
+ NULL, NULL, NULL, NULL);
+
+ local->main_frame = NULL;
+ SDFS_STACK_DESTROY(frame);
+ return 0;
+}
+
+int
+sdfs_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ call_frame_t *new_frame = NULL;
+ call_stub_t *stub = NULL;
+ int op_errno = 0;
+
+ if (-1 == sdfs_get_new_frame(frame, loc, &new_frame)) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ stub = fop_symlink_stub(new_frame, sdfs_symlink_helper, linkname, loc,
+ umask, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local = new_frame->local;
+ local->stub = stub;
+
+ STACK_WIND(new_frame, sdfs_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc,
+ local->loc.name, ENTRYLK_LOCK, ENTRYLK_WRLCK, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(link, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+
+ if (new_frame)
+ SDFS_STACK_DESTROY(new_frame);
+
+ return 0;
+}
+
+int
+sdfs_common_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ int this_call_cnt = 0;
+ int lk_index = 0;
+ sdfs_lock_t *locks = NULL;
+ call_stub_t *stub = NULL;
+
+ local = frame->local;
+ locks = local->lock;
+ lk_index = (long)cookie;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ } else {
+ locks->entrylk->locked[lk_index] = _gf_true;
+ }
+
+ this_call_cnt = sdfs_frame_return(frame);
+ if (this_call_cnt > 0) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "As there are more callcnt (%d) returning without WIND",
+ this_call_cnt);
+ return 0;
+ }
+
+ if (local->stub) {
+ stub = local->stub;
+ local->stub = NULL;
+ call_resume(stub);
+ } else {
+ if (local->op_ret < 0)
+ gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR,
+ "unlocking entry lock failed ");
+ SDFS_STACK_DESTROY(frame);
+ }
+
+ return 0;
+}
+
+int
+sdfs_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ sdfs_lock_t *lock = NULL;
+ int i = 0;
+ int lock_count = 0;
+
+ local = frame->local;
+ lock = local->lock;
+
+ STACK_UNWIND_STRICT(link, local->main_frame, op_ret, op_errno, inode, stbuf,
+ preparent, postparent, xdata);
+
+ local->main_frame = NULL;
+ lock_count = lock->lock_count;
+ for (i = 0; i < lock_count; i++) {
+ STACK_WIND_COOKIE(frame, sdfs_common_entrylk_cbk, (void *)(long)i,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->entrylk,
+ this->name, &lock->entrylk[i].parent_loc,
+ lock->entrylk[i].basename, ENTRYLK_UNLOCK,
+ ENTRYLK_WRLCK, xdata);
+ }
+
+ return 0;
+}
+
+int
+sdfs_link_helper(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ sdfs_lock_t *locks = NULL;
+ gf_boolean_t stack_destroy = _gf_true;
+ int lock_count = 0;
+ int i = 0;
+
+ local = frame->local;
+ locks = local->lock;
+
+ if (local->op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR,
+ "Acquiring entry lock failed");
+ goto err;
+ }
+
+ STACK_WIND(frame, sdfs_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(link, local->main_frame, -1, local->op_errno, NULL,
+ NULL, NULL, NULL, NULL);
+
+ local->main_frame = NULL;
+ for (i = 0; i < locks->lock_count && locks->entrylk->locked[i]; i++) {
+ lock_count++;
+ }
+ GF_ATOMIC_INIT(local->call_cnt, lock_count);
+
+ for (i = 0; i < lock_count; i++) {
+ if (!locks->entrylk->locked[i]) {
+ lock_count++;
+ continue;
+ }
+
+ stack_destroy = _gf_false;
+ STACK_WIND(frame, sdfs_common_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name,
+ &locks->entrylk[i].parent_loc, locks->entrylk[i].basename,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata);
+ }
+
+ if (stack_destroy)
+ SDFS_STACK_DESTROY(frame);
+
+ return 0;
+}
+
+static int
+sdfs_init_entry_lock(sdfs_entry_lock_t *lock, loc_t *loc)
+{
+ int ret = 0;
+
+ ret = sdfs_build_parent_loc(&lock->parent_loc, loc);
+ if (ret)
+ return -1;
+
+ lock->basename = gf_strdup(loc->name);
+ if (!lock->basename)
+ return -1;
+
+ return 0;
+}
+
+int
+sdfs_entry_lock_cmp(const void *l1, const void *l2)
+{
+ const sdfs_entry_lock_t *r1 = l1;
+ const sdfs_entry_lock_t *r2 = l2;
+ int ret = 0;
+ uuid_t gfid1 = {0};
+ uuid_t gfid2 = {0};
+
+ loc_gfid((loc_t *)&r1->parent_loc, gfid1);
+ loc_gfid((loc_t *)&r2->parent_loc, gfid2);
+ ret = gf_uuid_compare(gfid1, gfid2);
+ /*Entrylks with NULL basename are the 'smallest'*/
+ if (ret == 0) {
+ if (!r1->basename)
+ return -1;
+ if (!r2->basename)
+ return 1;
+ ret = strcmp(r1->basename, r2->basename);
+ }
+
+ if (ret <= 0)
+ return -1;
+ else
+ return 1;
+}
+
+int
+sdfs_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ call_frame_t *new_frame = NULL;
+ call_stub_t *stub = NULL;
+ sdfs_lock_t *lock = NULL;
+ client_t *client = NULL;
+ int ret = 0;
+ int op_errno = ENOMEM;
+
+ new_frame = copy_frame(frame);
+ if (!new_frame) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ /*Set unique lk-owner for the fop*/
+ set_lk_owner_from_ptr(&new_frame->root->lk_owner, new_frame->root);
+
+ gf_client_ref(client);
+ new_frame->root->client = client;
+ local = sdfs_local_init(new_frame, this);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->main_frame = frame;
+
+ lock = GF_CALLOC(1, sizeof(*lock), gf_common_mt_char);
+ if (!lock)
+ goto err;
+
+ local->lock = lock;
+
+ ret = sdfs_init_entry_lock(&lock->entrylk[0], newloc);
+ if (ret)
+ goto err;
+
+ ++lock->lock_count;
+
+ local->lock = lock;
+ GF_ATOMIC_INIT(local->call_cnt, lock->lock_count);
+
+ ret = loc_copy(&local->loc, newloc);
+ if (ret == -1) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ stub = fop_link_stub(new_frame, sdfs_link_helper, oldloc, newloc, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->stub = stub;
+
+ STACK_WIND_COOKIE(new_frame, sdfs_common_entrylk_cbk, 0, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name,
+ &lock->entrylk[0].parent_loc, lock->entrylk[0].basename,
+ ENTRYLK_LOCK, ENTRYLK_WRLCK, xdata);
+
+ return 0;
+err:
+
+ STACK_UNWIND_STRICT(link, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+
+ if (new_frame)
+ SDFS_STACK_DESTROY(new_frame);
+
+ return 0;
+}
+
+int
+sdfs_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+
+ local = frame->local;
+
+ STACK_UNWIND_STRICT(mknod, local->main_frame, op_ret, op_errno, inode,
+ stbuf, preparent, postparent, xdata);
+
+ local->main_frame = NULL;
+ STACK_WIND(frame, sdfs_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc,
+ local->loc.name, ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata);
+ return 0;
+}
+
+int
+sdfs_mknod_helper(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+
+ gf_uuid_unparse(loc->pargfid, gfid);
+
+ if (local->op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR,
+ "Acquiring entry lock failed for directory %s "
+ "with parent gfid %s",
+ local->loc.name, gfid);
+ goto err;
+ }
+
+ STACK_WIND(frame, sdfs_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(mknod, local->main_frame, -1, local->op_errno, NULL,
+ NULL, NULL, NULL, NULL);
+
+ local->main_frame = NULL;
+ SDFS_STACK_DESTROY(frame);
+ return 0;
+}
+
+int
+sdfs_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ call_frame_t *new_frame = NULL;
+ call_stub_t *stub = NULL;
+ int op_errno = 0;
+
+ if (-1 == sdfs_get_new_frame(frame, loc, &new_frame)) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ stub = fop_mknod_stub(new_frame, sdfs_mknod_helper, loc, mode, rdev, umask,
+ xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local = new_frame->local;
+ local->stub = stub;
+
+ STACK_WIND(new_frame, sdfs_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc,
+ local->loc.name, ENTRYLK_LOCK, ENTRYLK_WRLCK, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+
+ if (new_frame)
+ SDFS_STACK_DESTROY(new_frame);
+
+ return 0;
+}
+
+int
+sdfs_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ sdfs_lock_t *lock = NULL;
+ int i = 0;
+ int call_cnt = 0;
+
+ local = frame->local;
+ lock = local->lock;
+ GF_ATOMIC_INIT(local->call_cnt, lock->lock_count);
+
+ STACK_UNWIND_STRICT(rename, local->main_frame, op_ret, op_errno, stbuf,
+ preoldparent, postoldparent, prenewparent,
+ postnewparent, xdata);
+
+ local->main_frame = NULL;
+ call_cnt = GF_ATOMIC_GET(local->call_cnt);
+
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(frame, sdfs_common_entrylk_cbk, (void *)(long)i,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->entrylk,
+ this->name, &lock->entrylk[i].parent_loc,
+ lock->entrylk[i].basename, ENTRYLK_UNLOCK,
+ ENTRYLK_WRLCK, xdata);
+ }
+
+ return 0;
+}
+
+int
+sdfs_rename_helper(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ sdfs_lock_t *lock = NULL;
+ gf_boolean_t stack_destroy = _gf_true;
+ int lock_count = 0;
+ int i = 0;
+
+ local = frame->local;
+ lock = local->lock;
+
+ if (local->op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR,
+ "Acquiring entry lock failed ");
+ goto err;
+ }
+
+ STACK_WIND(frame, sdfs_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(rename, local->main_frame, -1, local->op_errno, NULL,
+ NULL, NULL, NULL, NULL, NULL);
+
+ local->main_frame = NULL;
+ for (i = 0; i < lock->lock_count && lock->entrylk->locked[i]; i++) {
+ lock_count++;
+ }
+ GF_ATOMIC_INIT(local->call_cnt, lock_count);
+
+ for (i = 0; i < lock_count; i++) {
+ if (!lock->entrylk->locked[i]) {
+ lock_count++;
+ continue;
+ }
+ stack_destroy = _gf_false;
+ STACK_WIND(frame, sdfs_common_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name,
+ &lock->entrylk[i].parent_loc, lock->entrylk[i].basename,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, xdata);
+ }
+
+ if (stack_destroy)
+ SDFS_STACK_DESTROY(frame);
+
+ return 0;
+}
+
+int
+sdfs_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ sdfs_lock_t *lock = NULL;
+ call_frame_t *new_frame = NULL;
+ call_stub_t *stub = NULL;
+ client_t *client = NULL;
+ int ret = 0;
+ int op_errno = ENOMEM;
+ int i = 0;
+ int call_cnt = 0;
+
+ new_frame = copy_frame(frame);
+ if (!new_frame) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ /*Set unique lk-owner for the fop*/
+ set_lk_owner_from_ptr(&new_frame->root->lk_owner, new_frame->root);
+
+ gf_client_ref(client);
+ new_frame->root->client = client;
+ local = sdfs_local_init(new_frame, this);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->main_frame = frame;
+
+ lock = GF_CALLOC(1, sizeof(*lock), gf_common_mt_char);
+ if (!lock)
+ goto err;
+
+ local->lock = lock;
+
+ ret = sdfs_init_entry_lock(&lock->entrylk[0], oldloc);
+ if (ret)
+ goto err;
+ lock->entrylk->locked[0] = _gf_false;
+
+ ++lock->lock_count;
+
+ ret = sdfs_init_entry_lock(&lock->entrylk[1], newloc);
+ if (ret)
+ goto err;
+ lock->entrylk->locked[1] = _gf_false;
+
+ ++lock->lock_count;
+
+ qsort(lock->entrylk, lock->lock_count, sizeof(*lock->entrylk),
+ sdfs_entry_lock_cmp);
+
+ local->lock = lock;
+ GF_ATOMIC_INIT(local->call_cnt, lock->lock_count);
+
+ stub = fop_rename_stub(new_frame, sdfs_rename_helper, oldloc, newloc,
+ xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local->stub = stub;
+ call_cnt = GF_ATOMIC_GET(local->call_cnt);
+ for (i = 0; i < call_cnt; i++) {
+ STACK_WIND_COOKIE(new_frame, sdfs_common_entrylk_cbk, (void *)(long)i,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->entrylk,
+ this->name, &lock->entrylk[i].parent_loc,
+ lock->entrylk[i].basename, ENTRYLK_LOCK,
+ ENTRYLK_WRLCK, xdata);
+ }
+
+ return 0;
+err:
+
+ STACK_UNWIND_STRICT(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+
+ if (new_frame)
+ SDFS_STACK_DESTROY(new_frame);
+
+ return 0;
+}
+
+int
+sdfs_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xdata, struct iatt *postparent)
+{
+ sdfs_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (!local->loc.parent) {
+ sdfs_local_cleanup(local);
+ frame->local = NULL;
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf,
+ xdata, postparent);
+ return 0;
+ }
+
+ STACK_UNWIND_STRICT(lookup, local->main_frame, op_ret, op_errno, inode,
+ stbuf, xdata, postparent);
+
+ local->main_frame = NULL;
+ STACK_WIND(frame, sdfs_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc,
+ local->loc.name, ENTRYLK_UNLOCK, ENTRYLK_RDLCK, xdata);
+ return 0;
+}
+
+int
+sdfs_lookup_helper(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+
+ gf_uuid_unparse(loc->pargfid, gfid);
+
+ if (local->op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR,
+ "Acquiring entry lock failed for directory %s "
+ "with parent gfid %s",
+ local->loc.name, gfid);
+ goto err;
+ }
+
+ STACK_WIND(frame, sdfs_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(lookup, local->main_frame, -1, local->op_errno, NULL,
+ NULL, NULL, NULL);
+ local->main_frame = NULL;
+
+ SDFS_STACK_DESTROY(frame);
+ return 0;
+}
+
+int
+sdfs_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ call_frame_t *new_frame = NULL;
+ call_stub_t *stub = NULL;
+ int op_errno = 0;
+
+ if (!loc->parent) {
+ local = sdfs_local_init(frame, this);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ return 0;
+ }
+
+ if (-1 == sdfs_get_new_frame(frame, loc, &new_frame)) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ stub = fop_lookup_stub(new_frame, sdfs_lookup_helper, loc, xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local = new_frame->local;
+ local->stub = stub;
+
+ STACK_WIND(new_frame, sdfs_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc,
+ local->loc.name, ENTRYLK_LOCK, ENTRYLK_RDLCK, xdata);
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+
+ if (new_frame)
+ SDFS_STACK_DESTROY(new_frame);
+
+ return 0;
+}
+
+int32_t
+sdfs_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+
+ local = frame->local;
+ STACK_UNWIND_STRICT(readdirp, local->main_frame, op_ret, op_errno, entries,
+ xdata);
+
+ local->main_frame = NULL;
+ STACK_WIND(frame, sdfs_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc,
+ NULL, ENTRYLK_UNLOCK, ENTRYLK_RDLCK, xdata);
+ return 0;
+}
+
+int32_t
+sdfs_readdirp_helper(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ char gfid[GF_UUID_BUF_SIZE] = {0};
+
+ local = frame->local;
+
+ gf_uuid_unparse(fd->inode->gfid, gfid);
+
+ if (local->op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SDFS_MSG_ENTRYLK_ERROR,
+ "Acquiring entry lock failed for directory %s "
+ "with parent gfid %s",
+ local->loc.name, gfid);
+ goto err;
+ }
+
+ STACK_WIND(frame, sdfs_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(readdirp, local->main_frame, -1, local->op_errno, NULL,
+ NULL);
+
+ local->main_frame = NULL;
+
+ SDFS_STACK_DESTROY(frame);
+ return 0;
+}
+
+int32_t
+sdfs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ sdfs_local_t *local = NULL;
+ call_frame_t *new_frame = NULL;
+ call_stub_t *stub = NULL;
+ int op_errno = 0;
+
+ if (-1 == sdfs_get_new_frame_readdirp(frame, fd, &new_frame)) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ stub = fop_readdirp_stub(new_frame, sdfs_readdirp_helper, fd, size, off,
+ xdata);
+ if (!stub) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ local = new_frame->local;
+ local->stub = stub;
+
+ STACK_WIND(new_frame, sdfs_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, &local->parent_loc,
+ NULL, ENTRYLK_LOCK, ENTRYLK_RDLCK, xdata);
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(readdirp, frame, -1, op_errno, NULL, NULL);
+
+ if (new_frame)
+ SDFS_STACK_DESTROY(new_frame);
+
+ return 0;
+}
+
+int
+init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this->children || this->children->next) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'dentry-fop-serializer' not configured with exactly one child");
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile ");
+ }
+
+ this->local_pool = mem_pool_new(sdfs_local_t, 512);
+ if (!this->local_pool) {
+ goto out;
+ }
+
+ GF_OPTION_INIT("pass-through", this->pass_through, bool, out);
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ int ret = -1;
+
+ GF_OPTION_RECONF("pass-through", this->pass_through, options, bool, out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+fini(xlator_t *this)
+{
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ return;
+}
+
+struct xlator_fops fops = {
+ .mkdir = sdfs_mkdir,
+ .rmdir = sdfs_rmdir,
+ .create = sdfs_create,
+ .unlink = sdfs_unlink,
+ .symlink = sdfs_symlink,
+ .link = sdfs_link,
+ .mknod = sdfs_mknod,
+ .rename = sdfs_rename,
+ .lookup = sdfs_lookup,
+ .readdirp = sdfs_readdirp,
+};
+
+struct xlator_cbks cbks;
+
+struct volume_options options[] = {
+ {.key = {"pass-through"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "true",
+ .op_version = {GD_OP_VERSION_4_1_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC | OPT_FLAG_CLIENT_OPT,
+ .tags = {"sdfs"},
+ .description = "Enable/Disable dentry serialize functionality"},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .op_version = {GD_OP_VERSION_4_0_0},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "sdfs",
+ .category = GF_TECH_PREVIEW,
+};
diff --git a/xlators/features/sdfs/src/sdfs.h b/xlators/features/sdfs/src/sdfs.h
new file mode 100644
index 00000000000..dded5a2d7fc
--- /dev/null
+++ b/xlators/features/sdfs/src/sdfs.h
@@ -0,0 +1,49 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/call-stub.h>
+#include "sdfs-messages.h"
+#include <glusterfs/atomic.h>
+
+#define SDFS_LOCK_COUNT_MAX 2
+
+typedef struct {
+ loc_t parent_loc;
+ char *basename;
+ int locked[SDFS_LOCK_COUNT_MAX];
+} sdfs_entry_lock_t;
+
+typedef struct {
+ sdfs_entry_lock_t entrylk[SDFS_LOCK_COUNT_MAX];
+ int lock_count;
+} sdfs_lock_t;
+
+struct sdfs_local {
+ call_frame_t *main_frame;
+ loc_t loc;
+ loc_t parent_loc;
+ call_stub_t *stub;
+ sdfs_lock_t *lock;
+ int op_ret;
+ int op_errno;
+ gf_atomic_t call_cnt;
+};
+typedef struct sdfs_local sdfs_local_t;
+
+#define SDFS_STACK_DESTROY(frame) \
+ do { \
+ sdfs_local_t *__local = NULL; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ gf_client_unref(frame->root->client); \
+ STACK_DESTROY(frame->root); \
+ sdfs_local_cleanup(__local); \
+ } while (0)
diff --git a/xlators/features/selinux/Makefile.am b/xlators/features/selinux/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/selinux/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/selinux/src/Makefile.am b/xlators/features/selinux/src/Makefile.am
new file mode 100644
index 00000000000..4f1e5e149b3
--- /dev/null
+++ b/xlators/features/selinux/src/Makefile.am
@@ -0,0 +1,20 @@
+if WITH_SERVER
+xlator_LTLIBRARIES = selinux.la
+endif
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+selinux_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+selinux_la_SOURCES = selinux.c
+
+selinux_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = selinux.h selinux-messages.h selinux-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
+
diff --git a/xlators/features/selinux/src/selinux-mem-types.h b/xlators/features/selinux/src/selinux-mem-types.h
new file mode 100644
index 00000000000..553e59e5a9d
--- /dev/null
+++ b/xlators/features/selinux/src/selinux-mem-types.h
@@ -0,0 +1,19 @@
+/*
+ Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __SELINUX_MEM_TYPES_H__
+#define __SELINUX_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_selinux_mem_types_ {
+ gf_selinux_mt_selinux_priv_t = gf_common_mt_end + 1,
+ gf_selinux_mt_end
+};
+#endif
diff --git a/xlators/features/selinux/src/selinux-messages.h b/xlators/features/selinux/src/selinux-messages.h
new file mode 100644
index 00000000000..f49a54f956c
--- /dev/null
+++ b/xlators/features/selinux/src/selinux-messages.h
@@ -0,0 +1,30 @@
+/*
+ Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _SELINUX_MESSAGES_H__
+#define _SELINUX_MESSAGES_H__
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(SL, SL_MSG_INVALID_VOLFILE, SL_MSG_ENOMEM,
+ SL_MSG_MEM_ACCT_INIT_FAILED, SL_MSG_SELINUX_GLUSTER_XATTR_MISSING,
+ SL_MSG_SELINUX_XATTR_MISSING);
+
+#endif /*_SELINUX_MESSAGES_H */
diff --git a/xlators/features/selinux/src/selinux.c b/xlators/features/selinux/src/selinux.c
new file mode 100644
index 00000000000..9b1b4b55e1a
--- /dev/null
+++ b/xlators/features/selinux/src/selinux.c
@@ -0,0 +1,323 @@
+/*
+ Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+
+#include "selinux.h"
+#include "selinux-messages.h"
+#include "selinux-mem-types.h"
+#include <glusterfs/compat-errno.h>
+
+static int
+selinux_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+{
+ int ret = 0;
+ char *name = cookie;
+
+ if (op_errno == 0 && dict && name &&
+ (!strcmp(name, SELINUX_GLUSTER_XATTR))) {
+ ret = dict_rename_key(dict, SELINUX_GLUSTER_XATTR, SELINUX_XATTR);
+ if (ret < 0)
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SL_MSG_SELINUX_GLUSTER_XATTR_MISSING,
+ "getxattr failed for %s", SELINUX_XATTR);
+ }
+
+ STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return ret;
+}
+
+static int
+selinux_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ selinux_priv_t *priv = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ char *xattr_name = (char *)name;
+
+ priv = this->private;
+
+ GF_VALIDATE_OR_GOTO("selinux", priv, err);
+
+ /* name can be NULL for listxattr calls */
+ if (!priv->selinux_enabled || !name)
+ goto off;
+
+ if (strcmp(name, SELINUX_XATTR) == 0)
+ xattr_name = SELINUX_GLUSTER_XATTR;
+
+off:
+ STACK_WIND_COOKIE(frame, selinux_fgetxattr_cbk, xattr_name,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fgetxattr, fd,
+ xattr_name, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, NULL, xdata);
+
+ return 0;
+}
+
+static int
+selinux_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+{
+ int ret = 0;
+ char *name = cookie;
+
+ if (op_errno == 0 && dict && name &&
+ (!strcmp(name, SELINUX_GLUSTER_XATTR))) {
+ ret = dict_rename_key(dict, SELINUX_GLUSTER_XATTR, SELINUX_XATTR);
+ if (ret < 0)
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SL_MSG_SELINUX_GLUSTER_XATTR_MISSING,
+ "getxattr failed for %s", SELINUX_XATTR);
+ }
+
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, xdata);
+
+ return 0;
+}
+
+static int
+selinux_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ selinux_priv_t *priv = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ char *xattr_name = (char *)name;
+
+ priv = this->private;
+
+ GF_VALIDATE_OR_GOTO("selinux", priv, err);
+
+ /* name can be NULL for listxattr calls */
+ if (!priv->selinux_enabled || !name)
+ goto off;
+
+ if (strcmp(name, SELINUX_XATTR) == 0)
+ xattr_name = SELINUX_GLUSTER_XATTR;
+
+off:
+ STACK_WIND_COOKIE(frame, selinux_getxattr_cbk, xattr_name,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->getxattr, loc,
+ xattr_name, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, NULL, xdata);
+ return 0;
+}
+
+static int
+selinux_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int
+selinux_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int flags, dict_t *xdata)
+{
+ selinux_priv_t *priv = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ int32_t ret = -1;
+
+ priv = this->private;
+
+ GF_VALIDATE_OR_GOTO("selinux", priv, err);
+
+ if (!priv->selinux_enabled && !dict)
+ goto off;
+
+ ret = dict_rename_key(dict, SELINUX_XATTR, SELINUX_GLUSTER_XATTR);
+ if (ret < 0 && ret != -ENODATA)
+ goto err;
+
+off:
+ STACK_WIND(frame, selinux_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+
+ return 0;
+err:
+ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int
+selinux_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int
+selinux_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int flags, dict_t *xdata)
+{
+ selinux_priv_t *priv = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ int32_t ret = -1;
+
+ priv = this->private;
+
+ GF_VALIDATE_OR_GOTO("selinux", priv, err);
+
+ if (!priv->selinux_enabled && !dict)
+ goto off;
+
+ ret = dict_rename_key(dict, SELINUX_XATTR, SELINUX_GLUSTER_XATTR);
+ if (ret < 0 && ret != -ENODATA)
+ goto err;
+
+off:
+ STACK_WIND(frame, selinux_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT(setxattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("selinux", this, out);
+
+ ret = xlator_mem_acct_init(this, gf_selinux_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SL_MSG_MEM_ACCT_INIT_FAILED,
+ "Memory accounting init failed");
+ return ret;
+ }
+out:
+ return ret;
+}
+
+int32_t
+init(xlator_t *this)
+{
+ int32_t ret = -1;
+ selinux_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("selinux", this, out);
+
+ if (!this->children || this->children->next) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, SL_MSG_INVALID_VOLFILE,
+ "Error: SELinux (%s) not configured with exactly one "
+ "child",
+ this->name);
+ return -1;
+ }
+
+ if (this->parents == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, SL_MSG_INVALID_VOLFILE,
+ "Dangling volume. Please check the volfile");
+ }
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_selinux_mt_selinux_priv_t);
+ if (!priv) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ goto out;
+ }
+
+ GF_OPTION_INIT("selinux", priv->selinux_enabled, bool, out);
+
+ this->local_pool = mem_pool_new(selinux_priv_t, 64);
+ if (!this->local_pool) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SL_MSG_ENOMEM,
+ "Failed to create local_t's memory pool");
+ goto out;
+ }
+
+ this->private = (void *)priv;
+ ret = 0;
+out:
+ if (ret) {
+ GF_FREE(priv);
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
+ return ret;
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ int32_t ret = -1;
+ selinux_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ GF_OPTION_RECONF("selinux", priv->selinux_enabled, options, bool, out);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+void
+fini(xlator_t *this)
+{
+ selinux_priv_t *priv = NULL;
+
+ priv = this->private;
+ GF_FREE(priv);
+
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+
+ return;
+}
+
+struct xlator_fops fops = {
+ .getxattr = selinux_getxattr,
+ .fgetxattr = selinux_fgetxattr,
+ .setxattr = selinux_setxattr,
+ .fsetxattr = selinux_fsetxattr,
+};
+
+struct xlator_cbks cbks = {};
+
+struct volume_options options[] = {
+ {
+ .key = {"selinux"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .description = "Enable/disable selinux translator",
+ .op_version = {GD_OP_VERSION_3_11_0},
+ .flags = OPT_FLAG_SETTABLE,
+ .tags = {"security", "linux"},
+ },
+ {
+ .key = {NULL},
+ }};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "selinux",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/selinux/src/selinux.h b/xlators/features/selinux/src/selinux.h
new file mode 100644
index 00000000000..1bbdad3bb36
--- /dev/null
+++ b/xlators/features/selinux/src/selinux.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2017 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __SELINUX_H__
+#define __SELINUX_H__
+
+#include <glusterfs/common-utils.h>
+
+#define SELINUX_XATTR "security.selinux"
+#define SELINUX_GLUSTER_XATTR "trusted.glusterfs.selinux"
+
+struct selinux_priv {
+ gf_boolean_t selinux_enabled;
+};
+
+typedef struct selinux_priv selinux_priv_t;
+
+#endif
diff --git a/xlators/features/shard/Makefile.am b/xlators/features/shard/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/shard/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/shard/src/Makefile.am b/xlators/features/shard/src/Makefile.am
new file mode 100644
index 00000000000..bf5700d4bcc
--- /dev/null
+++ b/xlators/features/shard/src/Makefile.am
@@ -0,0 +1,17 @@
+xlator_LTLIBRARIES = shard.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+shard_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+shard_la_SOURCES = shard.c
+
+shard_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = shard.h shard-mem-types.h shard-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/shard/src/shard-mem-types.h b/xlators/features/shard/src/shard-mem-types.h
new file mode 100644
index 00000000000..1fe7e2e2798
--- /dev/null
+++ b/xlators/features/shard/src/shard-mem-types.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __SHARD_MEM_TYPES_H__
+#define __SHARD_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_shard_mem_types_ {
+ gf_shard_mt_priv_t = gf_common_mt_end + 1,
+ gf_shard_mt_inode_list,
+ gf_shard_mt_inode_ctx_t,
+ gf_shard_mt_iovec,
+ gf_shard_mt_int64_t,
+ gf_shard_mt_uint64_t,
+ gf_shard_mt_end
+};
+#endif
diff --git a/xlators/features/shard/src/shard-messages.h b/xlators/features/shard/src/shard-messages.h
new file mode 100644
index 00000000000..2d0867eb136
--- /dev/null
+++ b/xlators/features/shard/src/shard-messages.h
@@ -0,0 +1,39 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _SHARD_MESSAGES_H_
+#define _SHARD_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(SHARD, SHARD_MSG_BASE_FILE_LOOKUP_FAILED, SHARD_MSG_DICT_OP_FAILED,
+ SHARD_MSG_DOT_SHARD_NODIR, SHARD_MSG_FD_CTX_SET_FAILED,
+ SHARD_MSG_INODE_CTX_GET_FAILED, SHARD_MSG_INODE_CTX_SET_FAILED,
+ SHARD_MSG_INODE_PATH_FAILED, SHARD_MSG_INTERNAL_XATTR_MISSING,
+ SHARD_MSG_INVALID_VOLFILE, SHARD_MSG_LOOKUP_SHARD_FAILED,
+ SHARD_MSG_MEM_ACCT_INIT_FAILED, SHARD_MSG_NULL_THIS,
+ SHARD_MSG_SIZE_SET_FAILED, SHARD_MSG_STAT_FAILED,
+ SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED,
+ SHARD_MSG_UPDATE_FILE_SIZE_FAILED, SHARD_MSG_FOP_NOT_SUPPORTED,
+ SHARD_MSG_INVALID_FOP, SHARD_MSG_MEMALLOC_FAILED,
+ SHARD_MSG_FOP_FAILED, SHARD_MSG_SHARDS_DELETION_FAILED,
+ SHARD_MSG_SHARD_DELETION_COMPLETED);
+
+#endif /* !_SHARD_MESSAGES_H_ */
diff --git a/xlators/features/shard/src/shard.c b/xlators/features/shard/src/shard.c
new file mode 100644
index 00000000000..e5f93063943
--- /dev/null
+++ b/xlators/features/shard/src/shard.c
@@ -0,0 +1,7382 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+
+#include "shard.h"
+#include "shard-mem-types.h"
+#include <glusterfs/byte-order.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/statedump.h>
+
+static gf_boolean_t
+__is_shard_dir(uuid_t gfid)
+{
+ shard_priv_t *priv = THIS->private;
+
+ if (gf_uuid_compare(gfid, priv->dot_shard_gfid) == 0)
+ return _gf_true;
+
+ return _gf_false;
+}
+
+static gf_boolean_t
+__is_gsyncd_on_shard_dir(call_frame_t *frame, loc_t *loc)
+{
+ if (frame->root->pid == GF_CLIENT_PID_GSYNCD &&
+ (__is_shard_dir(loc->pargfid) ||
+ (loc->parent && __is_shard_dir(loc->parent->gfid))))
+ return _gf_true;
+
+ return _gf_false;
+}
+
+void
+shard_make_block_bname(int block_num, uuid_t gfid, char *buf, size_t len)
+{
+ char gfid_str[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+
+ gf_uuid_unparse(gfid, gfid_str);
+ snprintf(buf, len, "%s.%d", gfid_str, block_num);
+}
+
+void
+shard_make_block_abspath(int block_num, uuid_t gfid, char *filepath, size_t len)
+{
+ char gfid_str[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+
+ gf_uuid_unparse(gfid, gfid_str);
+ snprintf(filepath, len, "/%s/%s.%d", GF_SHARD_DIR, gfid_str, block_num);
+}
+
+int
+__shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ shard_inode_ctx_t *ctx_p = NULL;
+
+ ret = __inode_ctx_get(inode, this, &ctx_uint);
+ if (ret == 0) {
+ *ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+ return ret;
+ }
+
+ ctx_p = GF_CALLOC(1, sizeof(*ctx_p), gf_shard_mt_inode_ctx_t);
+ if (!ctx_p)
+ return ret;
+
+ INIT_LIST_HEAD(&ctx_p->ilist);
+ INIT_LIST_HEAD(&ctx_p->to_fsync_list);
+
+ ctx_uint = (uint64_t)(uintptr_t)ctx_p;
+ ret = __inode_ctx_set(inode, this, &ctx_uint);
+ if (ret < 0) {
+ GF_FREE(ctx_p);
+ return ret;
+ }
+
+ *ctx = ctx_p;
+
+ return ret;
+}
+
+int
+shard_inode_ctx_get(inode_t *inode, xlator_t *this, shard_inode_ctx_t **ctx)
+{
+ int ret = 0;
+
+ LOCK(&inode->lock);
+ {
+ ret = __shard_inode_ctx_get(inode, this, ctx);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+int
+__shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
+ uint64_t block_size, int32_t valid)
+{
+ int ret = -1;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __shard_inode_ctx_get(inode, this, &ctx);
+ if (ret)
+ return ret;
+
+ if (valid & SHARD_MASK_BLOCK_SIZE)
+ ctx->block_size = block_size;
+
+ if (valid & SHARD_MASK_PROT)
+ ctx->stat.ia_prot = stbuf->ia_prot;
+
+ if (valid & SHARD_MASK_NLINK)
+ ctx->stat.ia_nlink = stbuf->ia_nlink;
+
+ if (valid & SHARD_MASK_UID)
+ ctx->stat.ia_uid = stbuf->ia_uid;
+
+ if (valid & SHARD_MASK_GID)
+ ctx->stat.ia_gid = stbuf->ia_gid;
+
+ if (valid & SHARD_MASK_SIZE)
+ ctx->stat.ia_size = stbuf->ia_size;
+
+ if (valid & SHARD_MASK_BLOCKS)
+ ctx->stat.ia_blocks = stbuf->ia_blocks;
+
+ if (valid & SHARD_MASK_TIMES) {
+ SHARD_TIME_UPDATE(ctx->stat.ia_mtime, ctx->stat.ia_mtime_nsec,
+ stbuf->ia_mtime, stbuf->ia_mtime_nsec);
+ SHARD_TIME_UPDATE(ctx->stat.ia_ctime, ctx->stat.ia_ctime_nsec,
+ stbuf->ia_ctime, stbuf->ia_ctime_nsec);
+ SHARD_TIME_UPDATE(ctx->stat.ia_atime, ctx->stat.ia_atime_nsec,
+ stbuf->ia_atime, stbuf->ia_atime_nsec);
+ }
+
+ if (valid & SHARD_MASK_OTHERS) {
+ ctx->stat.ia_ino = stbuf->ia_ino;
+ gf_uuid_copy(ctx->stat.ia_gfid, stbuf->ia_gfid);
+ ctx->stat.ia_dev = stbuf->ia_dev;
+ ctx->stat.ia_type = stbuf->ia_type;
+ ctx->stat.ia_rdev = stbuf->ia_rdev;
+ ctx->stat.ia_blksize = stbuf->ia_blksize;
+ }
+
+ if (valid & SHARD_MASK_REFRESH_RESET)
+ ctx->refresh = _gf_false;
+
+ return 0;
+}
+
+int
+shard_inode_ctx_set(inode_t *inode, xlator_t *this, struct iatt *stbuf,
+ uint64_t block_size, int32_t valid)
+{
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __shard_inode_ctx_set(inode, this, stbuf, block_size, valid);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+int
+__shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __shard_inode_ctx_get(inode, this, &ctx);
+ if (ret)
+ return ret;
+
+ ctx->refresh = _gf_true;
+
+ return 0;
+}
+int
+shard_inode_ctx_set_refresh_flag(inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __shard_inode_ctx_set_refresh_flag(inode, this);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+int
+__shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __shard_inode_ctx_get(inode, this, &ctx);
+ if (ret)
+ return ret;
+
+ ctx->refreshed = _gf_true;
+ return 0;
+}
+
+int
+shard_inode_ctx_mark_dir_refreshed(inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __shard_inode_ctx_mark_dir_refreshed(inode, this);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+int
+__shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
+ inode_t *shard_inode)
+{
+ int ret = -1;
+ shard_inode_ctx_t *base_ictx = NULL;
+ shard_inode_ctx_t *shard_ictx = NULL;
+
+ ret = __shard_inode_ctx_get(base_inode, this, &base_ictx);
+ if (ret)
+ return ret;
+
+ ret = __shard_inode_ctx_get(shard_inode, this, &shard_ictx);
+ if (ret)
+ return ret;
+
+ if (shard_ictx->fsync_needed) {
+ shard_ictx->fsync_needed++;
+ return 1;
+ }
+
+ list_add_tail(&shard_ictx->to_fsync_list, &base_ictx->to_fsync_list);
+ shard_ictx->inode = shard_inode;
+ shard_ictx->fsync_needed++;
+ base_ictx->fsync_count++;
+ shard_ictx->base_inode = base_inode;
+
+ return 0;
+}
+
+int
+shard_inode_ctx_add_to_fsync_list(inode_t *base_inode, xlator_t *this,
+ inode_t *shard_inode)
+{
+ int ret = -1;
+
+ /* This ref acts as a refkeepr on the base inode. We
+ * need to keep this inode alive as it holds the head
+ * of the to_fsync_list.
+ */
+ inode_ref(base_inode);
+ inode_ref(shard_inode);
+
+ LOCK(&base_inode->lock);
+ LOCK(&shard_inode->lock);
+ {
+ ret = __shard_inode_ctx_add_to_fsync_list(base_inode, this,
+ shard_inode);
+ }
+ UNLOCK(&shard_inode->lock);
+ UNLOCK(&base_inode->lock);
+
+ /* Unref the base inode corresponding to the ref above, if the shard is
+ * found to be already part of the fsync list.
+ */
+ if (ret != 0) {
+ inode_unref(base_inode);
+ inode_unref(shard_inode);
+ }
+ return ret;
+}
+
+gf_boolean_t
+__shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this)
+{
+ int ret = -1;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __shard_inode_ctx_get(inode, this, &ctx);
+ /* If inode ctx get fails, better to err on the side of caution and
+ * try again? Unless the failure is due to mem-allocation.
+ */
+ if (ret)
+ return _gf_true;
+
+ return !ctx->refreshed;
+}
+
+gf_boolean_t
+shard_inode_ctx_needs_lookup(inode_t *inode, xlator_t *this)
+{
+ gf_boolean_t flag = _gf_false;
+
+ LOCK(&inode->lock);
+ {
+ flag = __shard_inode_ctx_needs_lookup(inode, this);
+ }
+ UNLOCK(&inode->lock);
+
+ return flag;
+}
+int
+__shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf)
+{
+ int ret = -1;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __shard_inode_ctx_get(inode, this, &ctx);
+ if (ret)
+ return ret;
+
+ if ((stbuf->ia_size != ctx->stat.ia_size) ||
+ (stbuf->ia_blocks != ctx->stat.ia_blocks))
+ ctx->refresh = _gf_true;
+
+ return 0;
+}
+
+int
+shard_inode_ctx_invalidate(inode_t *inode, xlator_t *this, struct iatt *stbuf)
+{
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __shard_inode_ctx_invalidate(inode, this, stbuf);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+int
+__shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
+ uint64_t *block_size)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get(inode, this, &ctx_uint);
+ if (ret < 0)
+ return ret;
+
+ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+ *block_size = ctx->block_size;
+
+ return 0;
+}
+
+int
+shard_inode_ctx_get_block_size(inode_t *inode, xlator_t *this,
+ uint64_t *block_size)
+{
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __shard_inode_ctx_get_block_size(inode, this, block_size);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+int
+__shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
+ int *fsync_count)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get(inode, this, &ctx_uint);
+ if (ret < 0)
+ return ret;
+
+ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+ *fsync_count = ctx->fsync_needed;
+
+ return 0;
+}
+
+int
+shard_inode_ctx_get_fsync_count(inode_t *inode, xlator_t *this,
+ int *fsync_count)
+{
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __shard_inode_ctx_get_fsync_count(inode, this, fsync_count);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+int
+__shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
+ shard_inode_ctx_t *ctx_out)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get(inode, this, &ctx_uint);
+ if (ret < 0)
+ return ret;
+
+ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+ memcpy(ctx_out, ctx, sizeof(shard_inode_ctx_t));
+ return 0;
+}
+
+int
+shard_inode_ctx_get_all(inode_t *inode, xlator_t *this,
+ shard_inode_ctx_t *ctx_out)
+{
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __shard_inode_ctx_get_all(inode, this, ctx_out);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+int
+__shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
+ struct iatt *buf,
+ gf_boolean_t *need_refresh)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get(inode, this, &ctx_uint);
+ if (ret < 0)
+ return ret;
+
+ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+ if (ctx->refresh == _gf_false)
+ *buf = ctx->stat;
+ else
+ *need_refresh = _gf_true;
+
+ return 0;
+}
+
+int
+shard_inode_ctx_fill_iatt_from_cache(inode_t *inode, xlator_t *this,
+ struct iatt *buf,
+ gf_boolean_t *need_refresh)
+{
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __shard_inode_ctx_fill_iatt_from_cache(inode, this, buf,
+ need_refresh);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+void
+shard_local_wipe(shard_local_t *local)
+{
+ int i = 0;
+ int count = 0;
+
+ count = local->num_blocks;
+
+ syncbarrier_destroy(&local->barrier);
+ loc_wipe(&local->loc);
+ loc_wipe(&local->dot_shard_loc);
+ loc_wipe(&local->dot_shard_rm_loc);
+ loc_wipe(&local->loc2);
+ loc_wipe(&local->tmp_loc);
+ loc_wipe(&local->int_inodelk.loc);
+ loc_wipe(&local->int_entrylk.loc);
+ loc_wipe(&local->newloc);
+
+ if (local->name)
+ GF_FREE(local->name);
+
+ if (local->int_entrylk.basename)
+ GF_FREE(local->int_entrylk.basename);
+ if (local->fd)
+ fd_unref(local->fd);
+
+ if (local->xattr_req)
+ dict_unref(local->xattr_req);
+ if (local->xattr_rsp)
+ dict_unref(local->xattr_rsp);
+
+ for (i = 0; i < count; i++) {
+ if (!local->inode_list)
+ break;
+
+ if (local->inode_list[i])
+ inode_unref(local->inode_list[i]);
+ }
+
+ GF_FREE(local->inode_list);
+
+ GF_FREE(local->vector);
+ if (local->iobref)
+ iobref_unref(local->iobref);
+ if (local->list_inited)
+ gf_dirent_free(&local->entries_head);
+ if (local->inodelk_frame)
+ SHARD_STACK_DESTROY(local->inodelk_frame);
+ if (local->entrylk_frame)
+ SHARD_STACK_DESTROY(local->entrylk_frame);
+}
+
+int
+shard_modify_size_and_block_count(struct iatt *stbuf, dict_t *dict)
+{
+ int ret = -1;
+ void *size_attr = NULL;
+ uint64_t size_array[4];
+
+ ret = dict_get_ptr(dict, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
+ if (ret) {
+ gf_msg_callingfn(THIS->name, GF_LOG_ERROR, 0,
+ SHARD_MSG_INTERNAL_XATTR_MISSING,
+ "Failed to "
+ "get " GF_XATTR_SHARD_FILE_SIZE " for %s",
+ uuid_utoa(stbuf->ia_gfid));
+ return ret;
+ }
+
+ memcpy(size_array, size_attr, sizeof(size_array));
+
+ stbuf->ia_size = ntoh64(size_array[0]);
+ stbuf->ia_blocks = ntoh64(size_array[2]);
+
+ return 0;
+}
+
+int
+shard_call_count_return(call_frame_t *frame)
+{
+ int call_count = 0;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ LOCK(&frame->lock);
+ {
+ call_count = --local->call_count;
+ }
+ UNLOCK(&frame->lock);
+
+ return call_count;
+}
+
+static char *
+shard_internal_dir_string(shard_internal_dir_type_t type)
+{
+ char *str = NULL;
+
+ switch (type) {
+ case SHARD_INTERNAL_DIR_DOT_SHARD:
+ str = GF_SHARD_DIR;
+ break;
+ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+ str = GF_SHARD_REMOVE_ME_DIR;
+ break;
+ default:
+ break;
+ }
+ return str;
+}
+
+static int
+shard_init_internal_dir_loc(xlator_t *this, shard_local_t *local,
+ shard_internal_dir_type_t type)
+{
+ int ret = -1;
+ char *bname = NULL;
+ inode_t *parent = NULL;
+ loc_t *internal_dir_loc = NULL;
+ shard_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!local)
+ return -1;
+
+ switch (type) {
+ case SHARD_INTERNAL_DIR_DOT_SHARD:
+ internal_dir_loc = &local->dot_shard_loc;
+ bname = GF_SHARD_DIR;
+ parent = inode_ref(this->itable->root);
+ break;
+ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+ internal_dir_loc = &local->dot_shard_rm_loc;
+ bname = GF_SHARD_REMOVE_ME_DIR;
+ parent = inode_ref(priv->dot_shard_inode);
+ break;
+ default:
+ break;
+ }
+
+ internal_dir_loc->inode = inode_new(this->itable);
+ internal_dir_loc->parent = parent;
+ ret = inode_path(internal_dir_loc->parent, bname,
+ (char **)&internal_dir_loc->path);
+ if (ret < 0 || !(internal_dir_loc->inode)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+ "Inode path failed on %s", bname);
+ goto out;
+ }
+
+ internal_dir_loc->name = strrchr(internal_dir_loc->path, '/');
+ if (internal_dir_loc->name)
+ internal_dir_loc->name++;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+inode_t *
+__shard_update_shards_inode_list(inode_t *linked_inode, xlator_t *this,
+ inode_t *base_inode, int block_num,
+ uuid_t gfid)
+{
+ char block_bname[256] = {
+ 0,
+ };
+ inode_t *lru_inode = NULL;
+ shard_priv_t *priv = NULL;
+ shard_inode_ctx_t *ctx = NULL;
+ shard_inode_ctx_t *lru_inode_ctx = NULL;
+ shard_inode_ctx_t *lru_base_inode_ctx = NULL;
+ inode_t *fsync_inode = NULL;
+ inode_t *lru_base_inode = NULL;
+ gf_boolean_t do_fsync = _gf_false;
+
+ priv = this->private;
+
+ shard_inode_ctx_get(linked_inode, this, &ctx);
+
+ if (list_empty(&ctx->ilist)) {
+ if (priv->inode_count + 1 <= priv->lru_limit) {
+ /* If this inode was linked here for the first time (indicated
+ * by empty list), and if there is still space in the priv list,
+ * add this ctx to the tail of the list.
+ */
+ /* For as long as an inode is in lru list, we try to
+ * keep it alive by holding a ref on it.
+ */
+ inode_ref(linked_inode);
+ if (base_inode)
+ gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
+ else
+ gf_uuid_copy(ctx->base_gfid, gfid);
+ ctx->block_num = block_num;
+ list_add_tail(&ctx->ilist, &priv->ilist_head);
+ priv->inode_count++;
+ ctx->base_inode = inode_ref(base_inode);
+ } else {
+ /*If on the other hand there is no available slot for this inode
+ * in the list, delete the lru inode from the head of the list,
+ * unlink it. And in its place add this new inode into the list.
+ */
+ lru_inode_ctx = list_first_entry(&priv->ilist_head,
+ shard_inode_ctx_t, ilist);
+ GF_ASSERT(lru_inode_ctx->block_num > 0);
+ lru_base_inode = lru_inode_ctx->base_inode;
+ list_del_init(&lru_inode_ctx->ilist);
+ lru_inode = inode_find(linked_inode->table,
+ lru_inode_ctx->stat.ia_gfid);
+ /* If the lru inode was part of the pending-fsync list,
+ * the base inode needs to be unref'd, the lru inode
+ * deleted from fsync list and fsync'd in a new frame,
+ * and then unlinked in memory and forgotten.
+ */
+ if (!lru_base_inode)
+ goto after_fsync_check;
+ LOCK(&lru_base_inode->lock);
+ LOCK(&lru_inode->lock);
+ {
+ if (!list_empty(&lru_inode_ctx->to_fsync_list)) {
+ list_del_init(&lru_inode_ctx->to_fsync_list);
+ lru_inode_ctx->fsync_needed = 0;
+ do_fsync = _gf_true;
+ __shard_inode_ctx_get(lru_base_inode, this,
+ &lru_base_inode_ctx);
+ lru_base_inode_ctx->fsync_count--;
+ }
+ }
+ UNLOCK(&lru_inode->lock);
+ UNLOCK(&lru_base_inode->lock);
+
+ after_fsync_check:
+ if (!do_fsync) {
+ shard_make_block_bname(lru_inode_ctx->block_num,
+ lru_inode_ctx->base_gfid, block_bname,
+ sizeof(block_bname));
+ /* The following unref corresponds to the ref held at
+ * the time the shard was added to the lru list.
+ */
+ inode_unref(lru_inode);
+ inode_unlink(lru_inode, priv->dot_shard_inode, block_bname);
+ inode_forget(lru_inode, 0);
+ } else {
+ /* The following unref corresponds to the ref
+ * held when the shard was added to fsync list.
+ */
+ inode_unref(lru_inode);
+ fsync_inode = lru_inode;
+ if (lru_base_inode)
+ inode_unref(lru_base_inode);
+ }
+ /* The following unref corresponds to the ref
+ * held by inode_find() above.
+ */
+ inode_unref(lru_inode);
+
+ /* The following unref corresponds to the ref held on the base shard
+ * at the time of adding shard inode to lru list
+ */
+ if (lru_base_inode)
+ inode_unref(lru_base_inode);
+
+ /* For as long as an inode is in lru list, we try to
+ * keep it alive by holding a ref on it.
+ */
+ inode_ref(linked_inode);
+ if (base_inode)
+ gf_uuid_copy(ctx->base_gfid, base_inode->gfid);
+ else
+ gf_uuid_copy(ctx->base_gfid, gfid);
+ ctx->block_num = block_num;
+ ctx->base_inode = inode_ref(base_inode);
+ list_add_tail(&ctx->ilist, &priv->ilist_head);
+ }
+ } else {
+ /* If this is not the first time this inode is being operated on, move
+ * it to the most recently used end of the list.
+ */
+ list_move_tail(&ctx->ilist, &priv->ilist_head);
+ }
+ return fsync_inode;
+}
+
+int
+shard_common_failure_unwind(glusterfs_fop_t fop, call_frame_t *frame,
+ int32_t op_ret, int32_t op_errno)
+{
+ switch (fop) {
+ case GF_FOP_LOOKUP:
+ SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL,
+ NULL, NULL);
+ break;
+ case GF_FOP_STAT:
+ SHARD_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL);
+ break;
+ case GF_FOP_FSTAT:
+ SHARD_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL);
+ break;
+ case GF_FOP_TRUNCATE:
+ SHARD_STACK_UNWIND(truncate, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+ break;
+ case GF_FOP_FTRUNCATE:
+ SHARD_STACK_UNWIND(ftruncate, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+ break;
+ case GF_FOP_MKNOD:
+ SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+ break;
+ case GF_FOP_LINK:
+ SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+ break;
+ case GF_FOP_CREATE:
+ SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL,
+ NULL, NULL, NULL, NULL);
+ break;
+ case GF_FOP_UNLINK:
+ SHARD_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+ break;
+ case GF_FOP_RENAME:
+ SHARD_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL,
+ NULL, NULL, NULL, NULL);
+ break;
+ case GF_FOP_WRITE:
+ SHARD_STACK_UNWIND(writev, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+ break;
+ case GF_FOP_FALLOCATE:
+ SHARD_STACK_UNWIND(fallocate, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+ break;
+ case GF_FOP_ZEROFILL:
+ SHARD_STACK_UNWIND(zerofill, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+ break;
+ case GF_FOP_DISCARD:
+ SHARD_STACK_UNWIND(discard, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+ break;
+ case GF_FOP_READ:
+ SHARD_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, -1, NULL,
+ NULL, NULL);
+ break;
+ case GF_FOP_FSYNC:
+ SHARD_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+ break;
+ case GF_FOP_REMOVEXATTR:
+ SHARD_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL);
+ break;
+ case GF_FOP_FREMOVEXATTR:
+ SHARD_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, NULL);
+ break;
+ case GF_FOP_FGETXATTR:
+ SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, NULL, NULL);
+ break;
+ case GF_FOP_GETXATTR:
+ SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, NULL, NULL);
+ break;
+ case GF_FOP_FSETXATTR:
+ SHARD_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, NULL);
+ break;
+ case GF_FOP_SETXATTR:
+ SHARD_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL);
+ break;
+ case GF_FOP_SETATTR:
+ SHARD_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+ break;
+ case GF_FOP_FSETATTR:
+ SHARD_STACK_UNWIND(fsetattr, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+ break;
+ case GF_FOP_SEEK:
+ SHARD_STACK_UNWIND(seek, frame, op_ret, op_errno, 0, NULL);
+ break;
+ default:
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+ "Invalid fop id = %d", fop);
+ break;
+ }
+ return 0;
+}
+
+int
+shard_common_inode_write_success_unwind(glusterfs_fop_t fop,
+ call_frame_t *frame, int32_t op_ret)
+{
+ shard_local_t *local = frame->local;
+
+ /* the below 3 variables are required because, in SHARD_STACK_UNWIND()
+ macro, there is a check for local being null. So many static analyzers
+ backtrace the code with assumption of possible (local == NULL) case,
+ and complains for below lines. By handling it like below, we overcome
+ the warnings */
+
+ struct iatt *prebuf = ((local) ? &local->prebuf : NULL);
+ struct iatt *postbuf = ((local) ? &local->postbuf : NULL);
+ dict_t *xattr_rsp = ((local) ? local->xattr_rsp : NULL);
+
+ switch (fop) {
+ case GF_FOP_WRITE:
+ SHARD_STACK_UNWIND(writev, frame, op_ret, 0, prebuf, postbuf,
+ xattr_rsp);
+ break;
+ case GF_FOP_FALLOCATE:
+ SHARD_STACK_UNWIND(fallocate, frame, op_ret, 0, prebuf, postbuf,
+ xattr_rsp);
+ break;
+ case GF_FOP_ZEROFILL:
+ SHARD_STACK_UNWIND(zerofill, frame, op_ret, 0, prebuf, postbuf,
+ xattr_rsp);
+ break;
+ case GF_FOP_DISCARD:
+ SHARD_STACK_UNWIND(discard, frame, op_ret, 0, prebuf, postbuf,
+ xattr_rsp);
+ break;
+ default:
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+ "Invalid fop id = %d", fop);
+ break;
+ }
+ return 0;
+}
+
+int
+shard_evicted_inode_fsync_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ char block_bname[256] = {
+ 0,
+ };
+ fd_t *anon_fd = cookie;
+ inode_t *shard_inode = NULL;
+ shard_inode_ctx_t *ctx = NULL;
+ shard_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ if (anon_fd == NULL || op_ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, op_errno, SHARD_MSG_MEMALLOC_FAILED,
+ "fsync failed on shard");
+ goto out;
+ }
+ shard_inode = anon_fd->inode;
+
+ LOCK(&priv->lock);
+ LOCK(&shard_inode->lock);
+ {
+ __shard_inode_ctx_get(shard_inode, this, &ctx);
+ if ((list_empty(&ctx->to_fsync_list)) && (list_empty(&ctx->ilist))) {
+ shard_make_block_bname(ctx->block_num, shard_inode->gfid,
+ block_bname, sizeof(block_bname));
+ inode_unlink(shard_inode, priv->dot_shard_inode, block_bname);
+ /* The following unref corresponds to the ref held by
+ * inode_link() at the time the shard was created or
+ * looked up
+ */
+ inode_unref(shard_inode);
+ inode_forget(shard_inode, 0);
+ }
+ }
+ UNLOCK(&shard_inode->lock);
+ UNLOCK(&priv->lock);
+
+out:
+ if (anon_fd)
+ fd_unref(anon_fd);
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+int
+shard_initiate_evicted_inode_fsync(xlator_t *this, inode_t *inode)
+{
+ fd_t *anon_fd = NULL;
+ call_frame_t *fsync_frame = NULL;
+
+ fsync_frame = create_frame(this, this->ctx->pool);
+ if (!fsync_frame) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+ "Failed to create new frame "
+ "to fsync shard");
+ return -1;
+ }
+
+ anon_fd = fd_anonymous(inode);
+ if (!anon_fd) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+ "Failed to create anon fd to"
+ " fsync shard");
+ STACK_DESTROY(fsync_frame->root);
+ return -1;
+ }
+
+ STACK_WIND_COOKIE(fsync_frame, shard_evicted_inode_fsync_cbk, anon_fd,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
+ anon_fd, 1, NULL);
+ return 0;
+}
+
+int
+shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
+ xlator_t *this);
+
+int
+shard_common_resolve_shards(call_frame_t *frame, xlator_t *this,
+ shard_post_resolve_fop_handler_t post_res_handler)
+{
+ int i = -1;
+ uint32_t shard_idx_iter = 0;
+ char path[PATH_MAX] = {
+ 0,
+ };
+ uuid_t gfid = {
+ 0,
+ };
+ inode_t *inode = NULL;
+ inode_t *res_inode = NULL;
+ inode_t *fsync_inode = NULL;
+ shard_priv_t *priv = NULL;
+ shard_local_t *local = NULL;
+ uint64_t resolve_count = 0;
+
+ priv = this->private;
+ local = frame->local;
+ local->call_count = 0;
+ shard_idx_iter = local->first_block;
+ res_inode = local->resolver_base_inode;
+
+ if ((local->op_ret < 0) || (local->resolve_not))
+ goto out;
+
+ /* If this prealloc FOP is for fresh file creation, then the size of the
+ * file will be 0. Then there will be no shards associated with this file.
+ * So we can skip the lookup process for the shards which do not exists
+ * and directly issue mknod to crete shards.
+ *
+ * In case the prealloc fop is to extend the preallocated file to bigger
+ * size then just lookup and populate inodes of existing shards and
+ * update the create count
+ */
+ if (local->fop == GF_FOP_FALLOCATE) {
+ if (!local->prebuf.ia_size) {
+ local->inode_list[0] = inode_ref(res_inode);
+ local->create_count = local->last_block;
+ shard_common_inode_write_post_lookup_shards_handler(frame, this);
+ return 0;
+ }
+ if (local->prebuf.ia_size < local->total_size)
+ local->create_count = local->last_block -
+ ((local->prebuf.ia_size - 1) /
+ local->block_size);
+ }
+
+ resolve_count = local->last_block - local->create_count;
+
+ if (res_inode)
+ gf_uuid_copy(gfid, res_inode->gfid);
+ else
+ gf_uuid_copy(gfid, local->base_gfid);
+
+ while (shard_idx_iter <= resolve_count) {
+ i++;
+ if (shard_idx_iter == 0) {
+ local->inode_list[i] = inode_ref(res_inode);
+ shard_idx_iter++;
+ continue;
+ }
+
+ shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
+
+ inode = NULL;
+ inode = inode_resolve(this->itable, path);
+ if (inode) {
+ gf_msg_debug(this->name, 0,
+ "Shard %d already "
+ "present. gfid=%s. Saving inode for future.",
+ shard_idx_iter, uuid_utoa(inode->gfid));
+ local->inode_list[i] = inode;
+ /* Let the ref on the inodes that are already present
+ * in inode table still be held so that they don't get
+ * forgotten by the time the fop reaches the actual
+ * write stage.
+ */
+ LOCK(&priv->lock);
+ {
+ fsync_inode = __shard_update_shards_inode_list(
+ inode, this, res_inode, shard_idx_iter, gfid);
+ }
+ UNLOCK(&priv->lock);
+ shard_idx_iter++;
+ if (fsync_inode)
+ shard_initiate_evicted_inode_fsync(this, fsync_inode);
+ continue;
+ } else {
+ local->call_count++;
+ shard_idx_iter++;
+ }
+ }
+out:
+ post_res_handler(frame, this);
+ return 0;
+}
+
+int
+shard_update_file_size_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ inode_t *inode = NULL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if ((local->fd) && (local->fd->inode))
+ inode = local->fd->inode;
+ else if (local->loc.inode)
+ inode = local->loc.inode;
+
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SHARD_MSG_UPDATE_FILE_SIZE_FAILED,
+ "Update to file size"
+ " xattr failed on %s",
+ uuid_utoa(inode->gfid));
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ if (shard_modify_size_and_block_count(&local->postbuf, dict)) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+err:
+ local->post_update_size_handler(frame, this);
+ return 0;
+}
+
+int
+shard_set_size_attrs(int64_t size, int64_t block_count, int64_t **size_attr_p)
+{
+ int ret = -1;
+ int64_t *size_attr = NULL;
+
+ if (!size_attr_p)
+ goto out;
+
+ size_attr = GF_CALLOC(4, sizeof(int64_t), gf_shard_mt_int64_t);
+ if (!size_attr)
+ goto out;
+
+ size_attr[0] = hton64(size);
+ /* As sharding evolves, it _may_ be necessary to embed more pieces of
+ * information within the same xattr. So allocating slots for them in
+ * advance. For now, only bytes 0-63 and 128-191 which would make up the
+ * current size and block count respectively of the file are valid.
+ */
+ size_attr[2] = hton64(block_count);
+
+ *size_attr_p = size_attr;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+shard_update_file_size(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ loc_t *loc, shard_post_update_size_fop_handler_t handler)
+{
+ int ret = -1;
+ int64_t *size_attr = NULL;
+ int64_t delta_blocks = 0;
+ inode_t *inode = NULL;
+ shard_local_t *local = NULL;
+ dict_t *xattr_req = NULL;
+
+ local = frame->local;
+ local->post_update_size_handler = handler;
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (fd)
+ inode = fd->inode;
+ else
+ inode = loc->inode;
+
+ /* If both size and block count have not changed, then skip the xattrop.
+ */
+ delta_blocks = GF_ATOMIC_GET(local->delta_blocks);
+ if ((local->delta_size + local->hole_size == 0) && (delta_blocks == 0)) {
+ goto out;
+ }
+
+ ret = shard_set_size_attrs(local->delta_size + local->hole_size,
+ delta_blocks, &size_attr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SIZE_SET_FAILED,
+ "Failed to set size attrs for %s", uuid_utoa(inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, size_attr, 8 * 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to set key %s into dict. gfid=%s",
+ GF_XATTR_SHARD_FILE_SIZE, uuid_utoa(inode->gfid));
+ GF_FREE(size_attr);
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
+
+ if (fd)
+ STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd,
+ GF_XATTROP_ADD_ARRAY64, xattr_req, NULL);
+ else
+ STACK_WIND(frame, shard_update_file_size_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc,
+ GF_XATTROP_ADD_ARRAY64, xattr_req, NULL);
+
+ dict_unref(xattr_req);
+ return 0;
+
+out:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ handler(frame, this);
+ return 0;
+}
+
+static inode_t *
+shard_link_internal_dir_inode(shard_local_t *local, inode_t *inode,
+ struct iatt *buf, shard_internal_dir_type_t type)
+{
+ inode_t *linked_inode = NULL;
+ shard_priv_t *priv = NULL;
+ char *bname = NULL;
+ inode_t **priv_inode = NULL;
+ inode_t *parent = NULL;
+
+ priv = THIS->private;
+
+ switch (type) {
+ case SHARD_INTERNAL_DIR_DOT_SHARD:
+ bname = GF_SHARD_DIR;
+ priv_inode = &priv->dot_shard_inode;
+ parent = inode->table->root;
+ break;
+ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+ bname = GF_SHARD_REMOVE_ME_DIR;
+ priv_inode = &priv->dot_shard_rm_inode;
+ parent = priv->dot_shard_inode;
+ break;
+ default:
+ break;
+ }
+
+ linked_inode = inode_link(inode, parent, bname, buf);
+ inode_lookup(linked_inode);
+ *priv_inode = linked_inode;
+ return linked_inode;
+}
+
+int
+shard_refresh_internal_dir_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ shard_local_t *local = NULL;
+ inode_t *linked_inode = NULL;
+ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
+
+ local = frame->local;
+
+ if (op_ret) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto out;
+ }
+
+ /* To-Do: Fix refcount increment per call to
+ * shard_link_internal_dir_inode().
+ */
+ linked_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+ shard_inode_ctx_mark_dir_refreshed(linked_inode, this);
+out:
+ shard_common_resolve_shards(frame, this, local->post_res_handler);
+ return 0;
+}
+
+int
+shard_refresh_internal_dir(call_frame_t *frame, xlator_t *this,
+ shard_internal_dir_type_t type)
+{
+ loc_t loc = {
+ 0,
+ };
+ inode_t *inode = NULL;
+ shard_priv_t *priv = NULL;
+ shard_local_t *local = NULL;
+ uuid_t gfid = {
+ 0,
+ };
+
+ local = frame->local;
+ priv = this->private;
+
+ switch (type) {
+ case SHARD_INTERNAL_DIR_DOT_SHARD:
+ gf_uuid_copy(gfid, priv->dot_shard_gfid);
+ break;
+ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+ gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
+ break;
+ default:
+ break;
+ }
+
+ inode = inode_find(this->itable, gfid);
+
+ if (!shard_inode_ctx_needs_lookup(inode, this)) {
+ local->op_ret = 0;
+ goto out;
+ }
+
+ /* Plain assignment because the ref is already taken above through
+ * call to inode_find()
+ */
+ loc.inode = inode;
+ gf_uuid_copy(loc.gfid, gfid);
+
+ STACK_WIND_COOKIE(frame, shard_refresh_internal_dir_cbk, (void *)(long)type,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, &loc,
+ NULL);
+ loc_wipe(&loc);
+
+ return 0;
+
+out:
+ shard_common_resolve_shards(frame, this, local->post_res_handler);
+ return 0;
+}
+
+int
+shard_lookup_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ inode_t *link_inode = NULL;
+ shard_local_t *local = NULL;
+ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
+
+ local = frame->local;
+
+ if (op_ret) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+
+ if (!IA_ISDIR(buf->ia_type)) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, SHARD_MSG_DOT_SHARD_NODIR,
+ "%s already exists and "
+ "is not a directory. Please remove it from all bricks "
+ "and try again",
+ shard_internal_dir_string(type));
+ local->op_ret = -1;
+ local->op_errno = EIO;
+ goto unwind;
+ }
+
+ link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+ if (link_inode != inode) {
+ shard_refresh_internal_dir(frame, this, type);
+ } else {
+ shard_inode_ctx_mark_dir_refreshed(link_inode, this);
+ shard_common_resolve_shards(frame, this, local->post_res_handler);
+ }
+ return 0;
+
+unwind:
+ local->post_res_handler(frame, this);
+ return 0;
+}
+
+int
+shard_lookup_internal_dir(call_frame_t *frame, xlator_t *this,
+ shard_post_resolve_fop_handler_t post_res_handler,
+ shard_internal_dir_type_t type)
+{
+ int ret = -1;
+ dict_t *xattr_req = NULL;
+ shard_priv_t *priv = NULL;
+ shard_local_t *local = NULL;
+ uuid_t *gfid = NULL;
+ loc_t *loc = NULL;
+ gf_boolean_t free_gfid = _gf_true;
+
+ local = frame->local;
+ priv = this->private;
+ local->post_res_handler = post_res_handler;
+
+ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!gfid)
+ goto err;
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ switch (type) {
+ case SHARD_INTERNAL_DIR_DOT_SHARD:
+ gf_uuid_copy(*gfid, priv->dot_shard_gfid);
+ loc = &local->dot_shard_loc;
+ break;
+ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+ gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
+ loc = &local->dot_shard_rm_loc;
+ break;
+ default:
+ bzero(*gfid, sizeof(uuid_t));
+ break;
+ }
+
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to set gfid of %s into dict",
+ shard_internal_dir_string(type));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ } else {
+ free_gfid = _gf_false;
+ }
+
+ STACK_WIND_COOKIE(frame, shard_lookup_internal_dir_cbk, (void *)(long)type,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->lookup, loc,
+ xattr_req);
+
+ dict_unref(xattr_req);
+ return 0;
+
+err:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ if (free_gfid)
+ GF_FREE(gfid);
+ post_res_handler(frame, this);
+ return 0;
+}
+
+static void
+shard_inode_ctx_update(inode_t *inode, xlator_t *this, dict_t *xdata,
+ struct iatt *buf)
+{
+ int ret = 0;
+ uint64_t size = 0;
+ void *bsize = NULL;
+
+ if (shard_inode_ctx_get_block_size(inode, this, &size)) {
+ /* Fresh lookup */
+ ret = dict_get_ptr(xdata, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
+ if (!ret)
+ size = ntoh64(*((uint64_t *)bsize));
+ /* If the file is sharded, set its block size, otherwise just
+ * set 0.
+ */
+
+ shard_inode_ctx_set(inode, this, buf, size, SHARD_MASK_BLOCK_SIZE);
+ }
+ /* If the file is sharded, also set the remaining attributes,
+ * except for ia_size and ia_blocks.
+ */
+ if (size) {
+ shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
+ (void)shard_inode_ctx_invalidate(inode, this, buf);
+ }
+}
+
+int
+shard_delete_shards(void *opaque);
+
+int
+shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data);
+
+int
+shard_start_background_deletion(xlator_t *this)
+{
+ int ret = 0;
+ gf_boolean_t i_cleanup = _gf_true;
+ shard_priv_t *priv = NULL;
+ call_frame_t *cleanup_frame = NULL;
+
+ priv = this->private;
+
+ LOCK(&priv->lock);
+ {
+ switch (priv->bg_del_state) {
+ case SHARD_BG_DELETION_NONE:
+ i_cleanup = _gf_true;
+ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+ break;
+ case SHARD_BG_DELETION_LAUNCHING:
+ i_cleanup = _gf_false;
+ break;
+ case SHARD_BG_DELETION_IN_PROGRESS:
+ priv->bg_del_state = SHARD_BG_DELETION_LAUNCHING;
+ i_cleanup = _gf_false;
+ break;
+ default:
+ break;
+ }
+ }
+ UNLOCK(&priv->lock);
+ if (!i_cleanup)
+ return 0;
+
+ cleanup_frame = create_frame(this, this->ctx->pool);
+ if (!cleanup_frame) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+ "Failed to create "
+ "new frame to delete shards");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ set_lk_owner_from_ptr(&cleanup_frame->root->lk_owner, cleanup_frame->root);
+
+ ret = synctask_new(this->ctx->env, shard_delete_shards,
+ shard_delete_shards_cbk, cleanup_frame, cleanup_frame);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno,
+ SHARD_MSG_SHARDS_DELETION_FAILED,
+ "failed to create task to do background "
+ "cleanup of shards");
+ STACK_DESTROY(cleanup_frame->root);
+ goto err;
+ }
+ return 0;
+
+err:
+ LOCK(&priv->lock);
+ {
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
+ }
+ UNLOCK(&priv->lock);
+ return ret;
+}
+
+int
+shard_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ int ret = -1;
+ shard_priv_t *priv = NULL;
+ gf_boolean_t i_start_cleanup = _gf_false;
+
+ priv = this->private;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ if (IA_ISDIR(buf->ia_type))
+ goto unwind;
+
+ /* Also, if the file is sharded, get the file size and block cnt xattr,
+ * and store them in the stbuf appropriately.
+ */
+
+ if (dict_get(xdata, GF_XATTR_SHARD_FILE_SIZE) &&
+ frame->root->pid != GF_CLIENT_PID_GSYNCD)
+ shard_modify_size_and_block_count(buf, xdata);
+
+ /* If this was a fresh lookup, there are two possibilities:
+ * 1) If the file is sharded (indicated by the presence of block size
+ * xattr), store this block size, along with rdev and mode in its
+ * inode ctx.
+ * 2) If the file is not sharded, store size along with rdev and mode
+ * (which are anyway don't cares) in inode ctx. Since @ctx_tmp is
+ * already initialised to all zeroes, nothing more needs to be done.
+ */
+
+ (void)shard_inode_ctx_update(inode, this, xdata, buf);
+
+ LOCK(&priv->lock);
+ {
+ if (priv->first_lookup_done == _gf_false) {
+ priv->first_lookup_done = _gf_true;
+ i_start_cleanup = _gf_true;
+ }
+ }
+ UNLOCK(&priv->lock);
+
+ if (!i_start_cleanup)
+ goto unwind;
+
+ ret = shard_start_background_deletion(this);
+ if (ret < 0) {
+ LOCK(&priv->lock);
+ {
+ priv->first_lookup_done = _gf_false;
+ }
+ UNLOCK(&priv->lock);
+ }
+
+unwind:
+ SHARD_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ return 0;
+}
+
+int
+shard_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ int ret = -1;
+ int32_t op_errno = ENOMEM;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ this->itable = loc->inode->table;
+ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) &&
+ (frame->root->pid != GF_CLIENT_PID_GLFS_HEAL)) {
+ SHARD_ENTRY_FOP_CHECK(loc, op_errno, err);
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ loc_copy(&local->loc, loc);
+
+ local->xattr_req = xattr_req ? dict_ref(xattr_req) : dict_new();
+ if (!local->xattr_req)
+ goto err;
+
+ if (shard_inode_ctx_get_block_size(loc->inode, this, &block_size)) {
+ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to set dict"
+ " value: key:%s for path %s",
+ GF_XATTR_SHARD_BLOCK_SIZE, loc->path);
+ goto err;
+ }
+ }
+
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE,
+ 8 * 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to set dict value: key:%s for path %s.",
+ GF_XATTR_SHARD_FILE_SIZE, loc->path);
+ goto err;
+ }
+ }
+
+ if ((xattr_req) && (dict_get(xattr_req, GF_CONTENT_KEY)))
+ dict_del(xattr_req, GF_CONTENT_KEY);
+
+ STACK_WIND(frame, shard_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, local->xattr_req);
+ return 0;
+err:
+ shard_common_failure_unwind(GF_FOP_LOOKUP, frame, -1, op_errno);
+ return 0;
+}
+
+int
+shard_set_iattr_invoke_post_handler(call_frame_t *frame, xlator_t *this,
+ inode_t *inode, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ int ret = -1;
+ int32_t mask = SHARD_INODE_WRITE_MASK;
+ shard_local_t *local = frame->local;
+ shard_inode_ctx_t ctx = {
+ 0,
+ };
+
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SHARD_MSG_BASE_FILE_LOOKUP_FAILED,
+ "Lookup on base file"
+ " failed : %s",
+ uuid_utoa(inode->gfid));
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+
+ local->prebuf = *buf;
+ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unwind;
+ }
+
+ if (shard_inode_ctx_get_all(inode, this, &ctx))
+ mask = SHARD_ALL_MASK;
+
+ ret = shard_inode_ctx_set(inode, this, &local->prebuf, 0,
+ (mask | SHARD_MASK_REFRESH_RESET));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, SHARD_MSG_INODE_CTX_SET_FAILED, 0,
+ "Failed to set inode"
+ " write params into inode ctx for %s",
+ uuid_utoa(buf->ia_gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto unwind;
+ }
+
+unwind:
+ local->handler(frame, this);
+ return 0;
+}
+
+int
+shard_fstat_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ shard_local_t *local = frame->local;
+
+ shard_set_iattr_invoke_post_handler(frame, this, local->fd->inode, op_ret,
+ op_errno, buf, xdata);
+ return 0;
+}
+
+int
+shard_lookup_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ /* In case of op_ret < 0, inode passed to this function will be NULL
+ ex: in case of op_errno = ENOENT. So refer prefilled inode data
+ which is part of local.
+ Note: Reassigning/overriding the inode passed to this cbk with inode
+ which is part of *struct shard_local_t* won't cause any issue as
+ both inodes have same reference/address as of the inode passed */
+ inode = ((shard_local_t *)frame->local)->loc.inode;
+
+ shard_set_iattr_invoke_post_handler(frame, this, inode, op_ret, op_errno,
+ buf, xdata);
+ return 0;
+}
+
+/* This function decides whether to make file based lookup or
+ * fd based lookup (fstat) depending on the 3rd and 4th arg.
+ * If fd != NULL and loc == NULL then call is for fstat
+ * If fd == NULL and loc != NULL then call is for file based
+ * lookup. Please pass args based on the requirement.
+ */
+int
+shard_refresh_base_file(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ fd_t *fd, shard_post_fop_handler_t handler)
+{
+ int ret = -1;
+ inode_t *inode = NULL;
+ shard_local_t *local = NULL;
+ dict_t *xattr_req = NULL;
+ gf_boolean_t need_refresh = _gf_false;
+
+ local = frame->local;
+ local->handler = handler;
+ inode = fd ? fd->inode : loc->inode;
+
+ ret = shard_inode_ctx_fill_iatt_from_cache(inode, this, &local->prebuf,
+ &need_refresh);
+ /* By this time, inode ctx should have been created either in create,
+ * mknod, readdirp or lookup. If not it is a bug!
+ */
+ if ((ret == 0) && (need_refresh == _gf_false)) {
+ gf_msg_debug(this->name, 0,
+ "Skipping lookup on base file: %s"
+ "Serving prebuf off the inode ctx cache",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto out;
+ }
+
+ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, xattr_req, inode->gfid, local, out);
+
+ if (fd)
+ STACK_WIND(frame, shard_fstat_base_file_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xattr_req);
+ else
+ STACK_WIND(frame, shard_lookup_base_file_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+
+ dict_unref(xattr_req);
+ return 0;
+
+out:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ handler(frame, this);
+ return 0;
+}
+
+int
+shard_post_fstat_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret >= 0)
+ shard_inode_ctx_set(local->fd->inode, this, &local->prebuf, 0,
+ SHARD_LOOKUP_MASK);
+
+ SHARD_STACK_UNWIND(fstat, frame, local->op_ret, local->op_errno,
+ &local->prebuf, local->xattr_rsp);
+ return 0;
+}
+
+int
+shard_post_stat_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret >= 0)
+ shard_inode_ctx_set(local->loc.inode, this, &local->prebuf, 0,
+ SHARD_LOOKUP_MASK);
+
+ SHARD_STACK_UNWIND(stat, frame, local->op_ret, local->op_errno,
+ &local->prebuf, local->xattr_rsp);
+ return 0;
+}
+
+int
+shard_common_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ inode_t *inode = NULL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_STAT_FAILED,
+ "stat failed: %s",
+ local->fd ? uuid_utoa(local->fd->inode->gfid)
+ : uuid_utoa((local->loc.inode)->gfid));
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+
+ local->prebuf = *buf;
+ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unwind;
+ }
+ local->xattr_rsp = dict_ref(xdata);
+
+ if (local->loc.inode)
+ inode = local->loc.inode;
+ else
+ inode = local->fd->inode;
+
+ shard_inode_ctx_invalidate(inode, this, &local->prebuf);
+
+unwind:
+ local->handler(frame, this);
+ return 0;
+}
+
+int
+shard_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
+ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+ }
+
+ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block "
+ "size from inode ctx of %s",
+ uuid_utoa(loc->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ STACK_WIND(frame, default_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+ return 0;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ local->handler = shard_post_stat_handler;
+ loc_copy(&local->loc, loc);
+ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+ if (!local->xattr_req)
+ goto err;
+
+ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
+ local, err);
+
+ STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, local->xattr_req);
+ return 0;
+err:
+ shard_common_failure_unwind(GF_FOP_STAT, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
+ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+ }
+
+ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block "
+ "size from inode ctx of %s",
+ uuid_utoa(fd->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ STACK_WIND(frame, default_fstat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+ return 0;
+ }
+
+ if (!this->itable)
+ this->itable = fd->inode->table;
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ local->handler = shard_post_fstat_handler;
+ local->fd = fd_ref(fd);
+ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+ if (!local->xattr_req)
+ goto err;
+
+ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
+ local, err);
+
+ STACK_WIND(frame, shard_common_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, local->xattr_req);
+ return 0;
+err:
+ shard_common_failure_unwind(GF_FOP_FSTAT, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_post_update_size_truncate_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->fop == GF_FOP_TRUNCATE)
+ SHARD_STACK_UNWIND(truncate, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->postbuf, NULL);
+ else
+ SHARD_STACK_UNWIND(ftruncate, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->postbuf, NULL);
+ return 0;
+}
+
+int
+shard_truncate_last_shard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ inode_t *inode = NULL;
+ int64_t delta_blocks = 0;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ SHARD_UNSET_ROOT_FS_ID(frame, local);
+
+ inode = (local->fop == GF_FOP_TRUNCATE) ? local->loc.inode
+ : local->fd->inode;
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SHARD_MSG_TRUNCATE_LAST_SHARD_FAILED,
+ "truncate on last"
+ " shard failed : %s",
+ uuid_utoa(inode->gfid));
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ local->postbuf.ia_size = local->offset;
+ /* Let the delta be negative. We want xattrop to do subtraction */
+ local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
+ delta_blocks = GF_ATOMIC_ADD(local->delta_blocks,
+ postbuf->ia_blocks - prebuf->ia_blocks);
+ GF_ASSERT(delta_blocks <= 0);
+ local->postbuf.ia_blocks += delta_blocks;
+ local->hole_size = 0;
+
+ shard_inode_ctx_set(inode, this, &local->postbuf, 0, SHARD_MASK_TIMES);
+ shard_update_file_size(frame, this, NULL, &local->loc,
+ shard_post_update_size_truncate_handler);
+ return 0;
+err:
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+}
+
+int
+shard_truncate_last_shard(call_frame_t *frame, xlator_t *this, inode_t *inode)
+{
+ size_t last_shard_size_after = 0;
+ loc_t loc = {
+ 0,
+ };
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ /* A NULL inode could be due to the fact that the last shard which
+ * needs to be truncated does not exist due to it lying in a hole
+ * region. So the only thing left to do in that case would be an
+ * update to file size xattr.
+ */
+ if (!inode) {
+ gf_msg_debug(this->name, 0,
+ "Last shard to be truncated absent in backend: %" PRIu64
+ " of gfid %s. Directly proceeding to update file size",
+ local->first_block, uuid_utoa(local->loc.inode->gfid));
+ shard_update_file_size(frame, this, NULL, &local->loc,
+ shard_post_update_size_truncate_handler);
+ return 0;
+ }
+
+ SHARD_SET_ROOT_FS_ID(frame, local);
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, inode->gfid);
+
+ last_shard_size_after = (local->offset % local->block_size);
+
+ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, &loc, last_shard_size_after,
+ NULL);
+ loc_wipe(&loc);
+ return 0;
+}
+
+void
+shard_unlink_block_inode(shard_local_t *local, int shard_block_num);
+
+int
+shard_truncate_htol_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ int ret = 0;
+ int call_count = 0;
+ int shard_block_num = (long)cookie;
+ uint64_t block_count = 0;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto done;
+ }
+ ret = dict_get_uint64(xdata, GF_GET_FILE_BLOCK_COUNT, &block_count);
+ if (!ret) {
+ GF_ATOMIC_SUB(local->delta_blocks, block_count);
+ } else {
+ /* dict_get failed possibly due to a heterogeneous cluster? */
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to get key %s from dict during truncate of gfid %s",
+ GF_GET_FILE_BLOCK_COUNT,
+ uuid_utoa(local->resolver_base_inode->gfid));
+ }
+
+ shard_unlink_block_inode(local, shard_block_num);
+done:
+ call_count = shard_call_count_return(frame);
+ if (call_count == 0) {
+ SHARD_UNSET_ROOT_FS_ID(frame, local);
+ shard_truncate_last_shard(frame, this, local->inode_list[0]);
+ }
+ return 0;
+}
+
+int
+shard_truncate_htol(call_frame_t *frame, xlator_t *this, inode_t *inode)
+{
+ int i = 1;
+ int ret = -1;
+ int call_count = 0;
+ uint32_t cur_block = 0;
+ uint32_t last_block = 0;
+ char path[PATH_MAX] = {
+ 0,
+ };
+ char *bname = NULL;
+ loc_t loc = {
+ 0,
+ };
+ gf_boolean_t wind_failed = _gf_false;
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+ dict_t *xdata_req = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ cur_block = local->first_block + 1;
+ last_block = local->last_block;
+
+ /* Determine call count */
+ for (i = 1; i < local->num_blocks; i++) {
+ if (!local->inode_list[i])
+ continue;
+ call_count++;
+ }
+
+ if (!call_count) {
+ /* Call count = 0 implies that all of the shards that need to be
+ * unlinked do not exist. So shard xlator would now proceed to
+ * do the final truncate + size updates.
+ */
+ gf_msg_debug(this->name, 0,
+ "Shards to be unlinked as part of "
+ "truncate absent in backend: %s. Directly "
+ "proceeding to update file size",
+ uuid_utoa(inode->gfid));
+ local->postbuf.ia_size = local->offset;
+ local->postbuf.ia_blocks = local->prebuf.ia_blocks;
+ local->delta_size = local->postbuf.ia_size - local->prebuf.ia_size;
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
+ local->hole_size = 0;
+ shard_update_file_size(frame, this, local->fd, &local->loc,
+ shard_post_update_size_truncate_handler);
+ return 0;
+ }
+
+ local->call_count = call_count;
+ i = 1;
+ xdata_req = dict_new();
+ if (!xdata_req) {
+ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+ return 0;
+ }
+ ret = dict_set_uint64(xdata_req, GF_GET_FILE_BLOCK_COUNT, 8 * 8);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to set key %s into dict during truncate of %s",
+ GF_GET_FILE_BLOCK_COUNT,
+ uuid_utoa(local->resolver_base_inode->gfid));
+ dict_unref(xdata_req);
+ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+ return 0;
+ }
+
+ SHARD_SET_ROOT_FS_ID(frame, local);
+ while (cur_block <= last_block) {
+ if (!local->inode_list[i]) {
+ cur_block++;
+ i++;
+ continue;
+ }
+ if (wind_failed) {
+ shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1,
+ ENOMEM, NULL, NULL, NULL);
+ goto next;
+ }
+
+ shard_make_block_abspath(cur_block, inode->gfid, path, sizeof(path));
+ bname = strrchr(path, '/') + 1;
+ loc.parent = inode_ref(priv->dot_shard_inode);
+ ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+ "Inode path failed"
+ " on %s. Base file gfid = %s",
+ bname, uuid_utoa(inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ loc_wipe(&loc);
+ wind_failed = _gf_true;
+ shard_truncate_htol_cbk(frame, (void *)(long)cur_block, this, -1,
+ ENOMEM, NULL, NULL, NULL);
+ goto next;
+ }
+ loc.name = strrchr(loc.path, '/');
+ if (loc.name)
+ loc.name++;
+ loc.inode = inode_ref(local->inode_list[i]);
+
+ STACK_WIND_COOKIE(frame, shard_truncate_htol_cbk,
+ (void *)(long)cur_block, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &loc, 0, xdata_req);
+ loc_wipe(&loc);
+ next:
+ i++;
+ cur_block++;
+ if (!--call_count)
+ break;
+ }
+ dict_unref(xdata_req);
+ return 0;
+}
+
+int
+shard_truncate_do(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->num_blocks == 1) {
+ /* This means that there are no shards to be unlinked.
+ * The fop boils down to truncating the last shard, updating
+ * the size and unwinding.
+ */
+ shard_truncate_last_shard(frame, this, local->inode_list[0]);
+ return 0;
+ } else {
+ shard_truncate_htol(frame, this, local->loc.inode);
+ }
+ return 0;
+}
+
+int
+shard_post_lookup_shards_truncate_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ shard_truncate_do(frame, this);
+ return 0;
+}
+
+void
+shard_link_block_inode(shard_local_t *local, int block_num, inode_t *inode,
+ struct iatt *buf)
+{
+ int list_index = 0;
+ char block_bname[256] = {
+ 0,
+ };
+ uuid_t gfid = {
+ 0,
+ };
+ inode_t *linked_inode = NULL;
+ xlator_t *this = NULL;
+ inode_t *fsync_inode = NULL;
+ shard_priv_t *priv = NULL;
+ inode_t *base_inode = NULL;
+
+ this = THIS;
+ priv = this->private;
+ if (local->loc.inode) {
+ gf_uuid_copy(gfid, local->loc.inode->gfid);
+ base_inode = local->loc.inode;
+ } else if (local->resolver_base_inode) {
+ gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
+ base_inode = local->resolver_base_inode;
+ } else {
+ gf_uuid_copy(gfid, local->base_gfid);
+ }
+
+ shard_make_block_bname(block_num, gfid, block_bname, sizeof(block_bname));
+
+ shard_inode_ctx_set(inode, this, buf, 0, SHARD_LOOKUP_MASK);
+ linked_inode = inode_link(inode, priv->dot_shard_inode, block_bname, buf);
+ inode_lookup(linked_inode);
+ list_index = block_num - local->first_block;
+ local->inode_list[list_index] = linked_inode;
+
+ LOCK(&priv->lock);
+ {
+ fsync_inode = __shard_update_shards_inode_list(
+ linked_inode, this, base_inode, block_num, gfid);
+ }
+ UNLOCK(&priv->lock);
+ if (fsync_inode)
+ shard_initiate_evicted_inode_fsync(this, fsync_inode);
+}
+
+int
+shard_common_lookup_shards_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret, int32_t op_errno,
+ inode_t *inode, struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ int call_count = 0;
+ int shard_block_num = (long)cookie;
+ uuid_t gfid = {
+ 0,
+ };
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+ if (local->resolver_base_inode)
+ gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
+ else
+ gf_uuid_copy(gfid, local->base_gfid);
+
+ if (op_ret < 0) {
+ /* Ignore absence of shards in the backend in truncate fop. */
+ switch (local->fop) {
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ case GF_FOP_RENAME:
+ case GF_FOP_UNLINK:
+ if (op_errno == ENOENT)
+ goto done;
+ break;
+ case GF_FOP_WRITE:
+ case GF_FOP_READ:
+ case GF_FOP_ZEROFILL:
+ case GF_FOP_DISCARD:
+ case GF_FOP_FALLOCATE:
+ if ((!local->first_lookup_done) && (op_errno == ENOENT)) {
+ LOCK(&frame->lock);
+ {
+ local->create_count++;
+ }
+ UNLOCK(&frame->lock);
+ goto done;
+ }
+ break;
+ default:
+ break;
+ }
+
+ /* else */
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SHARD_MSG_LOOKUP_SHARD_FAILED,
+ "Lookup on shard %d "
+ "failed. Base file gfid = %s",
+ shard_block_num, uuid_utoa(gfid));
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto done;
+ }
+
+ shard_link_block_inode(local, shard_block_num, inode, buf);
+
+done:
+ if (local->lookup_shards_barriered) {
+ syncbarrier_wake(&local->barrier);
+ return 0;
+ } else {
+ call_count = shard_call_count_return(frame);
+ if (call_count == 0) {
+ if (!local->first_lookup_done)
+ local->first_lookup_done = _gf_true;
+ local->pls_fop_handler(frame, this);
+ }
+ }
+ return 0;
+}
+
+dict_t *
+shard_create_gfid_dict(dict_t *dict)
+{
+ int ret = 0;
+ dict_t *new = NULL;
+ unsigned char *gfid = NULL;
+
+ new = dict_copy_with_ref(dict, NULL);
+ if (!new)
+ return NULL;
+
+ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_char);
+ if (!gfid) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_generate(gfid);
+
+ ret = dict_set_gfuuid(new, "gfid-req", gfid, false);
+
+out:
+ if (ret) {
+ dict_unref(new);
+ new = NULL;
+ GF_FREE(gfid);
+ }
+
+ return new;
+}
+
+int
+shard_common_lookup_shards(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ shard_post_lookup_shards_fop_handler_t handler)
+{
+ int i = 0;
+ int ret = 0;
+ int count = 0;
+ int call_count = 0;
+ int32_t shard_idx_iter = 0;
+ int lookup_count = 0;
+ char path[PATH_MAX] = {
+ 0,
+ };
+ char *bname = NULL;
+ uuid_t gfid = {
+ 0,
+ };
+ loc_t loc = {
+ 0,
+ };
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+ gf_boolean_t wind_failed = _gf_false;
+ dict_t *xattr_req = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ count = call_count = local->call_count;
+ shard_idx_iter = local->first_block;
+ lookup_count = local->last_block - local->create_count;
+ local->pls_fop_handler = handler;
+ if (local->lookup_shards_barriered)
+ local->barrier.waitfor = local->call_count;
+
+ if (inode)
+ gf_uuid_copy(gfid, inode->gfid);
+ else
+ gf_uuid_copy(gfid, local->base_gfid);
+
+ while (shard_idx_iter <= lookup_count) {
+ if (local->inode_list[i]) {
+ i++;
+ shard_idx_iter++;
+ continue;
+ }
+
+ if (wind_failed) {
+ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter,
+ this, -1, ENOMEM, NULL, NULL, NULL,
+ NULL);
+ goto next;
+ }
+
+ shard_make_block_abspath(shard_idx_iter, gfid, path, sizeof(path));
+
+ bname = strrchr(path, '/') + 1;
+ loc.inode = inode_new(this->itable);
+ loc.parent = inode_ref(priv->dot_shard_inode);
+ gf_uuid_copy(loc.pargfid, priv->dot_shard_gfid);
+ ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+ if (ret < 0 || !(loc.inode)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+ "Inode path failed"
+ " on %s, base file gfid = %s",
+ bname, uuid_utoa(gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ loc_wipe(&loc);
+ wind_failed = _gf_true;
+ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter,
+ this, -1, ENOMEM, NULL, NULL, NULL,
+ NULL);
+ goto next;
+ }
+
+ loc.name = strrchr(loc.path, '/');
+ if (loc.name)
+ loc.name++;
+
+ xattr_req = shard_create_gfid_dict(local->xattr_req);
+ if (!xattr_req) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ wind_failed = _gf_true;
+ loc_wipe(&loc);
+ shard_common_lookup_shards_cbk(frame, (void *)(long)shard_idx_iter,
+ this, -1, ENOMEM, NULL, NULL, NULL,
+ NULL);
+ goto next;
+ }
+
+ STACK_WIND_COOKIE(frame, shard_common_lookup_shards_cbk,
+ (void *)(long)shard_idx_iter, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &loc, xattr_req);
+ loc_wipe(&loc);
+ dict_unref(xattr_req);
+ next:
+ shard_idx_iter++;
+ i++;
+
+ if (!--call_count)
+ break;
+ }
+ if (local->lookup_shards_barriered) {
+ syncbarrier_wait(&local->barrier, count);
+ local->pls_fop_handler(frame, this);
+ }
+ return 0;
+}
+
+int
+shard_post_resolve_truncate_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ if (local->op_errno == ENOENT) {
+ /* If lookup on /.shard fails with ENOENT, it means that
+ * the file was 0-byte in size but truncated sometime in
+ * the past to a higher size which is reflected in the
+ * size xattr, and now being truncated to a lower size.
+ * In this case, the only thing that needs to be done is
+ * to update the size xattr of the file and unwind.
+ */
+ local->first_block = local->last_block = 0;
+ local->num_blocks = 1;
+ local->call_count = 0;
+ local->op_ret = 0;
+ local->postbuf.ia_size = local->offset;
+ shard_update_file_size(frame, this, local->fd, &local->loc,
+ shard_post_update_size_truncate_handler);
+ return 0;
+ } else {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+ }
+
+ if (!local->call_count)
+ shard_truncate_do(frame, this);
+ else
+ shard_common_lookup_shards(frame, this, local->loc.inode,
+ shard_post_lookup_shards_truncate_handler);
+
+ return 0;
+}
+
+int
+shard_truncate_begin(call_frame_t *frame, xlator_t *this)
+{
+ int ret = 0;
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ /* First participant block here is the lowest numbered block that would
+ * hold the last byte of the file post successful truncation.
+ * Last participant block is the block that contains the last byte in
+ * the current state of the file.
+ * If (first block == last_block):
+ * then that means that the file only needs truncation of the
+ * first (or last since both are same) block.
+ * Else
+ * if (new_size % block_size == 0)
+ * then that means there is no truncate to be done with
+ * only shards from first_block + 1 through the last
+ * block needing to be unlinked.
+ * else
+ * both truncate of the first block and unlink of the
+ * remaining shards until end of file is required.
+ */
+ local->first_block = (local->offset == 0)
+ ? 0
+ : get_lowest_block(local->offset - 1,
+ local->block_size);
+ local->last_block = get_highest_block(0, local->prebuf.ia_size,
+ local->block_size);
+
+ local->num_blocks = local->last_block - local->first_block + 1;
+ GF_ASSERT(local->num_blocks > 0);
+ local->resolver_base_inode = (local->fop == GF_FOP_TRUNCATE)
+ ? local->loc.inode
+ : local->fd->inode;
+
+ if ((local->first_block == 0) && (local->num_blocks == 1)) {
+ if (local->fop == GF_FOP_TRUNCATE)
+ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, &local->loc,
+ local->offset, local->xattr_req);
+ else
+ STACK_WIND(frame, shard_truncate_last_shard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, local->fd,
+ local->offset, local->xattr_req);
+ return 0;
+ }
+
+ local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
+ gf_shard_mt_inode_list);
+ if (!local->inode_list)
+ goto err;
+
+ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
+ if (!local->dot_shard_loc.inode) {
+ ret = shard_init_internal_dir_loc(this, local,
+ SHARD_INTERNAL_DIR_DOT_SHARD);
+ if (ret)
+ goto err;
+ shard_lookup_internal_dir(frame, this,
+ shard_post_resolve_truncate_handler,
+ SHARD_INTERNAL_DIR_DOT_SHARD);
+ } else {
+ local->post_res_handler = shard_post_resolve_truncate_handler;
+ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+ }
+ return 0;
+
+err:
+ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_post_lookup_truncate_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+ struct iatt tmp_stbuf = {
+ 0,
+ };
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ local->postbuf = tmp_stbuf = local->prebuf;
+
+ if (local->prebuf.ia_size == local->offset) {
+ /* If the file size is same as requested size, unwind the call
+ * immediately.
+ */
+ if (local->fop == GF_FOP_TRUNCATE)
+ SHARD_STACK_UNWIND(truncate, frame, 0, 0, &local->prebuf,
+ &local->postbuf, NULL);
+ else
+ SHARD_STACK_UNWIND(ftruncate, frame, 0, 0, &local->prebuf,
+ &local->postbuf, NULL);
+ } else if (local->offset > local->prebuf.ia_size) {
+ /* If the truncate is from a lower to a higher size, set the
+ * new size xattr and unwind.
+ */
+ local->hole_size = local->offset - local->prebuf.ia_size;
+ local->delta_size = 0;
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
+ local->postbuf.ia_size = local->offset;
+ tmp_stbuf.ia_size = local->offset;
+ shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
+ SHARD_INODE_WRITE_MASK);
+ shard_update_file_size(frame, this, NULL, &local->loc,
+ shard_post_update_size_truncate_handler);
+ } else {
+ /* ... else
+ * i. unlink all shards that need to be unlinked.
+ * ii. truncate the last of the shards.
+ * iii. update the new size using setxattr.
+ * and unwind the fop.
+ */
+ local->hole_size = 0;
+ local->delta_size = (local->offset - local->prebuf.ia_size);
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
+ tmp_stbuf.ia_size = local->offset;
+ shard_inode_ctx_set(local->loc.inode, this, &tmp_stbuf, 0,
+ SHARD_INODE_WRITE_MASK);
+ shard_truncate_begin(frame, this);
+ }
+ return 0;
+}
+
+/* TO-DO:
+ * Fix updates to size and block count with racing write(s) and truncate(s).
+ */
+
+int
+shard_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block "
+ "size from inode ctx of %s",
+ uuid_utoa(loc->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ STACK_WIND(frame, default_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ return 0;
+ }
+
+ if (!this->itable)
+ this->itable = loc->inode->table;
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ ret = syncbarrier_init(&local->barrier);
+ if (ret)
+ goto err;
+ loc_copy(&local->loc, loc);
+ local->offset = offset;
+ local->block_size = block_size;
+ local->fop = GF_FOP_TRUNCATE;
+ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+ if (!local->xattr_req)
+ goto err;
+ local->resolver_base_inode = loc->inode;
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
+
+ shard_refresh_base_file(frame, this, &local->loc, NULL,
+ shard_post_lookup_truncate_handler);
+ return 0;
+
+err:
+ shard_common_failure_unwind(GF_FOP_TRUNCATE, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block "
+ "size from inode ctx of %s",
+ uuid_utoa(fd->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ STACK_WIND(frame, default_ftruncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ return 0;
+ }
+
+ if (!this->itable)
+ this->itable = fd->inode->table;
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ ret = syncbarrier_init(&local->barrier);
+ if (ret)
+ goto err;
+ local->fd = fd_ref(fd);
+ local->offset = offset;
+ local->block_size = block_size;
+ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+ if (!local->xattr_req)
+ goto err;
+ local->fop = GF_FOP_FTRUNCATE;
+
+ local->loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ local->resolver_base_inode = fd->inode;
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
+
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_truncate_handler);
+ return 0;
+err:
+ shard_common_failure_unwind(GF_FOP_FTRUNCATE, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int ret = -1;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1)
+ goto unwind;
+
+ ret = shard_inode_ctx_set(inode, this, buf, local->block_size,
+ SHARD_ALL_MASK);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
+ "Failed to set inode "
+ "ctx for %s",
+ uuid_utoa(inode->gfid));
+
+unwind:
+ SHARD_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+
+ return 0;
+}
+
+int
+shard_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ shard_priv_t *priv = NULL;
+ shard_local_t *local = NULL;
+
+ priv = this->private;
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ local->block_size = priv->block_size;
+ if (!__is_gsyncd_on_shard_dir(frame, loc)) {
+ SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
+ }
+
+ STACK_WIND(frame, shard_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
+ return 0;
+err:
+ shard_common_failure_unwind(GF_FOP_MKNOD, frame, -1, ENOMEM);
+ return 0;
+}
+
+int32_t
+shard_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+ if (op_ret < 0)
+ goto err;
+
+ shard_inode_ctx_set(inode, this, buf, 0,
+ SHARD_MASK_NLINK | SHARD_MASK_TIMES);
+ buf->ia_size = local->prebuf.ia_size;
+ buf->ia_blocks = local->prebuf.ia_blocks;
+
+ SHARD_STACK_UNWIND(link, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+err:
+ shard_common_failure_unwind(GF_FOP_LINK, frame, op_ret, op_errno);
+ return 0;
+}
+
+int
+shard_post_lookup_link_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ SHARD_STACK_UNWIND(link, frame, local->op_ret, local->op_errno, NULL,
+ NULL, NULL, NULL, NULL);
+ return 0;
+ }
+
+ STACK_WIND(frame, shard_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, &local->loc, &local->loc2,
+ local->xattr_req);
+ return 0;
+}
+
+int32_t
+shard_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block "
+ "size from inode ctx of %s",
+ uuid_utoa(oldloc->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size) {
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+ return 0;
+ }
+
+ if (!this->itable)
+ this->itable = oldloc->inode->table;
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->loc2, newloc);
+ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+ if (!local->xattr_req)
+ goto err;
+
+ shard_refresh_base_file(frame, this, &local->loc, NULL,
+ shard_post_lookup_link_handler);
+ return 0;
+err:
+ shard_common_failure_unwind(GF_FOP_LINK, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode);
+
+int
+shard_post_lookup_shards_unlink_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+ uuid_t gfid = {
+ 0,
+ };
+
+ local = frame->local;
+
+ if (local->resolver_base_inode)
+ gf_uuid_copy(gfid, local->resolver_base_inode->gfid);
+ else
+ gf_uuid_copy(gfid, local->base_gfid);
+
+ if ((local->op_ret < 0) && (local->op_errno != ENOENT)) {
+ gf_msg(this->name, GF_LOG_ERROR, local->op_errno, SHARD_MSG_FOP_FAILED,
+ "failed to delete shards of %s", uuid_utoa(gfid));
+ return 0;
+ }
+ local->op_ret = 0;
+ local->op_errno = 0;
+
+ shard_unlink_shards_do(frame, this, local->resolver_base_inode);
+ return 0;
+}
+
+int
+shard_post_resolve_unlink_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+ local->lookup_shards_barriered = _gf_true;
+
+ if (!local->call_count)
+ shard_unlink_shards_do(frame, this, local->resolver_base_inode);
+ else
+ shard_common_lookup_shards(frame, this, local->resolver_base_inode,
+ shard_post_lookup_shards_unlink_handler);
+ return 0;
+}
+
+void
+shard_unlink_block_inode(shard_local_t *local, int shard_block_num)
+{
+ char block_bname[256] = {
+ 0,
+ };
+ uuid_t gfid = {
+ 0,
+ };
+ inode_t *inode = NULL;
+ inode_t *base_inode = NULL;
+ xlator_t *this = NULL;
+ shard_priv_t *priv = NULL;
+ shard_inode_ctx_t *ctx = NULL;
+ shard_inode_ctx_t *base_ictx = NULL;
+ int unref_base_inode = 0;
+ int unref_shard_inode = 0;
+
+ this = THIS;
+ priv = this->private;
+
+ inode = local->inode_list[shard_block_num - local->first_block];
+ shard_inode_ctx_get(inode, this, &ctx);
+ base_inode = ctx->base_inode;
+ if (base_inode)
+ gf_uuid_copy(gfid, base_inode->gfid);
+ else
+ gf_uuid_copy(gfid, ctx->base_gfid);
+ shard_make_block_bname(shard_block_num, gfid, block_bname,
+ sizeof(block_bname));
+
+ LOCK(&priv->lock);
+ if (base_inode)
+ LOCK(&base_inode->lock);
+ LOCK(&inode->lock);
+ {
+ __shard_inode_ctx_get(inode, this, &ctx);
+ if (!list_empty(&ctx->ilist)) {
+ list_del_init(&ctx->ilist);
+ priv->inode_count--;
+ unref_base_inode++;
+ unref_shard_inode++;
+ GF_ASSERT(priv->inode_count >= 0);
+ }
+ if (ctx->fsync_needed) {
+ unref_base_inode++;
+ unref_shard_inode++;
+ list_del_init(&ctx->to_fsync_list);
+ if (base_inode) {
+ __shard_inode_ctx_get(base_inode, this, &base_ictx);
+ base_ictx->fsync_count--;
+ }
+ }
+ }
+ UNLOCK(&inode->lock);
+ if (base_inode)
+ UNLOCK(&base_inode->lock);
+
+ inode_unlink(inode, priv->dot_shard_inode, block_bname);
+ inode_ref_reduce_by_n(inode, unref_shard_inode);
+ inode_forget(inode, 0);
+
+ if (base_inode && unref_base_inode)
+ inode_ref_reduce_by_n(base_inode, unref_base_inode);
+ UNLOCK(&priv->lock);
+}
+
+int
+shard_rename_cbk(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ SHARD_STACK_UNWIND(rename, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->preoldparent,
+ &local->postoldparent, &local->prenewparent,
+ &local->postnewparent, local->xattr_rsp);
+ return 0;
+}
+
+int32_t
+shard_unlink_cbk(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = frame->local;
+
+ SHARD_STACK_UNWIND(unlink, frame, local->op_ret, local->op_errno,
+ &local->preoldparent, &local->postoldparent,
+ local->xattr_rsp);
+ return 0;
+}
+
+int
+shard_unlink_shards_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ int shard_block_num = (long)cookie;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto done;
+ }
+
+ shard_unlink_block_inode(local, shard_block_num);
+done:
+ syncbarrier_wake(&local->barrier);
+ return 0;
+}
+
+int
+shard_unlink_shards_do(call_frame_t *frame, xlator_t *this, inode_t *inode)
+{
+ int i = 0;
+ int ret = -1;
+ int count = 0;
+ uint32_t cur_block = 0;
+ uint32_t cur_block_idx = 0; /*this is idx into inode_list[] array */
+ char *bname = NULL;
+ char path[PATH_MAX] = {
+ 0,
+ };
+ uuid_t gfid = {
+ 0,
+ };
+ loc_t loc = {
+ 0,
+ };
+ gf_boolean_t wind_failed = _gf_false;
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (inode)
+ gf_uuid_copy(gfid, inode->gfid);
+ else
+ gf_uuid_copy(gfid, local->base_gfid);
+
+ for (i = 0; i < local->num_blocks; i++) {
+ if (!local->inode_list[i])
+ continue;
+ count++;
+ }
+
+ if (!count) {
+ /* callcount = 0 implies that all of the shards that need to be
+ * unlinked are non-existent (in other words the file is full of
+ * holes).
+ */
+ gf_msg_debug(this->name, 0,
+ "All shards that need to be "
+ "unlinked are non-existent: %s",
+ uuid_utoa(gfid));
+ return 0;
+ }
+
+ SHARD_SET_ROOT_FS_ID(frame, local);
+ local->barrier.waitfor = count;
+ cur_block = cur_block_idx + local->first_block;
+
+ while (cur_block_idx < local->num_blocks) {
+ if (!local->inode_list[cur_block_idx])
+ goto next;
+
+ if (wind_failed) {
+ shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
+ ENOMEM, NULL, NULL, NULL);
+ goto next;
+ }
+
+ shard_make_block_abspath(cur_block, gfid, path, sizeof(path));
+ bname = strrchr(path, '/') + 1;
+ loc.parent = inode_ref(priv->dot_shard_inode);
+ ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+ "Inode path failed"
+ " on %s, base file gfid = %s",
+ bname, uuid_utoa(gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ loc_wipe(&loc);
+ wind_failed = _gf_true;
+ shard_unlink_shards_do_cbk(frame, (void *)(long)cur_block, this, -1,
+ ENOMEM, NULL, NULL, NULL);
+ goto next;
+ }
+
+ loc.name = strrchr(loc.path, '/');
+ if (loc.name)
+ loc.name++;
+ loc.inode = inode_ref(local->inode_list[cur_block_idx]);
+
+ STACK_WIND_COOKIE(frame, shard_unlink_shards_do_cbk,
+ (void *)(long)cur_block, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &loc, local->xflag,
+ local->xattr_req);
+ loc_wipe(&loc);
+ next:
+ cur_block++;
+ cur_block_idx++;
+ }
+ syncbarrier_wait(&local->barrier, count);
+ SHARD_UNSET_ROOT_FS_ID(frame, local);
+ return 0;
+}
+
+int
+shard_regulated_shards_deletion(call_frame_t *cleanup_frame, xlator_t *this,
+ int now, int first_block, gf_dirent_t *entry)
+{
+ int i = 0;
+ int ret = 0;
+ shard_local_t *local = NULL;
+ uuid_t gfid = {
+ 0,
+ };
+
+ local = cleanup_frame->local;
+
+ local->inode_list = GF_CALLOC(now, sizeof(inode_t *),
+ gf_shard_mt_inode_list);
+ if (!local->inode_list)
+ return -ENOMEM;
+
+ local->first_block = first_block;
+ local->last_block = first_block + now - 1;
+ local->num_blocks = now;
+ gf_uuid_parse(entry->d_name, gfid);
+ gf_uuid_copy(local->base_gfid, gfid);
+ local->resolver_base_inode = inode_find(this->itable, gfid);
+ local->call_count = 0;
+ ret = syncbarrier_init(&local->barrier);
+ if (ret) {
+ GF_FREE(local->inode_list);
+ local->inode_list = NULL;
+ inode_unref(local->resolver_base_inode);
+ local->resolver_base_inode = NULL;
+ return -errno;
+ }
+ shard_common_resolve_shards(cleanup_frame, this,
+ shard_post_resolve_unlink_handler);
+
+ for (i = 0; i < local->num_blocks; i++) {
+ if (local->inode_list[i])
+ inode_unref(local->inode_list[i]);
+ }
+ GF_FREE(local->inode_list);
+ local->inode_list = NULL;
+ if (local->op_ret)
+ ret = -local->op_errno;
+ syncbarrier_destroy(&local->barrier);
+ inode_unref(local->resolver_base_inode);
+ local->resolver_base_inode = NULL;
+ STACK_RESET(cleanup_frame->root);
+ return ret;
+}
+
+int
+__shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
+ gf_dirent_t *entry, inode_t *inode)
+{
+ int ret = 0;
+ int shard_count = 0;
+ int first_block = 0;
+ int now = 0;
+ uint64_t size = 0;
+ uint64_t block_size = 0;
+ uint64_t size_array[4] = {
+ 0,
+ };
+ void *bsize = NULL;
+ void *size_attr = NULL;
+ dict_t *xattr_rsp = NULL;
+ loc_t loc = {
+ 0,
+ };
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+
+ priv = this->private;
+ local = cleanup_frame->local;
+ ret = dict_reset(local->xattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to reset dict");
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to set dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_FILE_SIZE, 8 * 4);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to set dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ loc.inode = inode_ref(inode);
+ loc.parent = inode_ref(priv->dot_shard_rm_inode);
+ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+ "Inode path failed on %s", entry->d_name);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ loc.name = strrchr(loc.path, '/');
+ if (loc.name)
+ loc.name++;
+ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, local->xattr_req,
+ &xattr_rsp);
+ if (ret)
+ goto err;
+
+ ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_BLOCK_SIZE, &bsize);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to get dict value: key:%s", GF_XATTR_SHARD_BLOCK_SIZE);
+ goto err;
+ }
+ block_size = ntoh64(*((uint64_t *)bsize));
+
+ ret = dict_get_ptr(xattr_rsp, GF_XATTR_SHARD_FILE_SIZE, &size_attr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to get dict value: key:%s", GF_XATTR_SHARD_FILE_SIZE);
+ goto err;
+ }
+
+ memcpy(size_array, size_attr, sizeof(size_array));
+ size = ntoh64(size_array[0]);
+
+ shard_count = (size / block_size) - 1;
+ if (shard_count < 0) {
+ gf_msg_debug(this->name, 0,
+ "Size of %s hasn't grown beyond "
+ "its shard-block-size. Nothing to delete. "
+ "Returning",
+ entry->d_name);
+ /* File size < shard-block-size, so nothing to delete */
+ ret = 0;
+ goto delete_marker;
+ }
+ if ((size % block_size) > 0)
+ shard_count++;
+
+ if (shard_count == 0) {
+ gf_msg_debug(this->name, 0,
+ "Size of %s is exactly equal to "
+ "its shard-block-size. Nothing to delete. "
+ "Returning",
+ entry->d_name);
+ ret = 0;
+ goto delete_marker;
+ }
+ gf_msg_debug(this->name, 0,
+ "base file = %s, "
+ "shard-block-size=%" PRIu64 ", file-size=%" PRIu64
+ ", "
+ "shard_count=%d",
+ entry->d_name, block_size, size, shard_count);
+
+ /* Perform a gfid-based lookup to see if gfid corresponding to marker
+ * file's base name exists.
+ */
+ loc_wipe(&loc);
+ loc.inode = inode_new(this->itable);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ gf_uuid_parse(entry->d_name, loc.gfid);
+ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
+ if (!ret) {
+ gf_msg_debug(this->name, 0,
+ "Base shard corresponding to gfid "
+ "%s is present. Skipping shard deletion. "
+ "Returning",
+ entry->d_name);
+ ret = 0;
+ goto delete_marker;
+ }
+
+ first_block = 1;
+
+ while (shard_count) {
+ if (shard_count < local->deletion_rate) {
+ now = shard_count;
+ shard_count = 0;
+ } else {
+ now = local->deletion_rate;
+ shard_count -= local->deletion_rate;
+ }
+
+ gf_msg_debug(this->name, 0,
+ "deleting %d shards starting from "
+ "block %d of gfid %s",
+ now, first_block, entry->d_name);
+ ret = shard_regulated_shards_deletion(cleanup_frame, this, now,
+ first_block, entry);
+ if (ret)
+ goto err;
+ first_block += now;
+ }
+
+delete_marker:
+ loc_wipe(&loc);
+ loc.inode = inode_ref(inode);
+ loc.parent = inode_ref(priv->dot_shard_rm_inode);
+ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+ "Inode path failed on %s", entry->d_name);
+ ret = -ENOMEM;
+ goto err;
+ }
+ loc.name = strrchr(loc.path, '/');
+ if (loc.name)
+ loc.name++;
+ ret = syncop_unlink(FIRST_CHILD(this), &loc, NULL, NULL);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_SHARDS_DELETION_FAILED,
+ "Failed to delete %s "
+ "from /%s",
+ entry->d_name, GF_SHARD_REMOVE_ME_DIR);
+err:
+ if (xattr_rsp)
+ dict_unref(xattr_rsp);
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
+shard_delete_shards_of_entry(call_frame_t *cleanup_frame, xlator_t *this,
+ gf_dirent_t *entry, inode_t *inode)
+{
+ int ret = -1;
+ loc_t loc = {
+ 0,
+ };
+ shard_priv_t *priv = NULL;
+
+ priv = this->private;
+ loc.inode = inode_ref(priv->dot_shard_rm_inode);
+
+ ret = syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
+ ENTRYLK_LOCK_NB, ENTRYLK_WRLCK, NULL, NULL);
+ if (ret < 0) {
+ if (ret == -EAGAIN) {
+ ret = 0;
+ }
+ goto out;
+ }
+ {
+ ret = __shard_delete_shards_of_entry(cleanup_frame, this, entry, inode);
+ }
+ syncop_entrylk(FIRST_CHILD(this), this->name, &loc, entry->d_name,
+ ENTRYLK_UNLOCK, ENTRYLK_WRLCK, NULL, NULL);
+out:
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
+shard_delete_shards_cbk(int ret, call_frame_t *frame, void *data)
+{
+ SHARD_STACK_DESTROY(frame);
+ return 0;
+}
+
+int
+shard_resolve_internal_dir(xlator_t *this, shard_local_t *local,
+ shard_internal_dir_type_t type)
+{
+ int ret = 0;
+ char *bname = NULL;
+ loc_t *loc = NULL;
+ shard_priv_t *priv = NULL;
+ uuid_t gfid = {
+ 0,
+ };
+ struct iatt stbuf = {
+ 0,
+ };
+
+ priv = this->private;
+
+ switch (type) {
+ case SHARD_INTERNAL_DIR_DOT_SHARD:
+ loc = &local->dot_shard_loc;
+ gf_uuid_copy(gfid, priv->dot_shard_gfid);
+ bname = GF_SHARD_DIR;
+ break;
+ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+ loc = &local->dot_shard_rm_loc;
+ gf_uuid_copy(gfid, priv->dot_shard_rm_gfid);
+ bname = GF_SHARD_REMOVE_ME_DIR;
+ break;
+ default:
+ break;
+ }
+
+ loc->inode = inode_find(this->itable, gfid);
+ if (!loc->inode) {
+ ret = shard_init_internal_dir_loc(this, local, type);
+ if (ret)
+ goto err;
+ ret = dict_reset(local->xattr_req);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to reset "
+ "dict");
+ ret = -ENOMEM;
+ goto err;
+ }
+ ret = dict_set_gfuuid(local->xattr_req, "gfid-req", gfid, true);
+ ret = syncop_lookup(FIRST_CHILD(this), loc, &stbuf, NULL,
+ local->xattr_req, NULL);
+ if (ret < 0) {
+ if (ret != -ENOENT)
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ SHARD_MSG_SHARDS_DELETION_FAILED,
+ "Lookup on %s failed, exiting", bname);
+ goto err;
+ } else {
+ shard_link_internal_dir_inode(local, loc->inode, &stbuf, type);
+ }
+ }
+ ret = 0;
+err:
+ return ret;
+}
+
+int
+shard_lookup_marker_entry(xlator_t *this, shard_local_t *local,
+ gf_dirent_t *entry)
+{
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+
+ loc.inode = inode_new(this->itable);
+ if (!loc.inode) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ loc.parent = inode_ref(local->fd->inode);
+
+ ret = inode_path(loc.parent, entry->d_name, (char **)&(loc.path));
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+ "Inode path failed on %s", entry->d_name);
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ loc.name = strrchr(loc.path, '/');
+ if (loc.name)
+ loc.name++;
+
+ ret = syncop_lookup(FIRST_CHILD(this), &loc, NULL, NULL, NULL, NULL);
+ if (ret < 0) {
+ goto err;
+ }
+ entry->inode = inode_ref(loc.inode);
+ ret = 0;
+err:
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
+shard_delete_shards(void *opaque)
+{
+ int ret = 0;
+ off_t offset = 0;
+ loc_t loc = {
+ 0,
+ };
+ inode_t *link_inode = NULL;
+ xlator_t *this = NULL;
+ shard_priv_t *priv = NULL;
+ shard_local_t *local = NULL;
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ call_frame_t *cleanup_frame = NULL;
+ gf_boolean_t done = _gf_false;
+
+ this = THIS;
+ priv = this->private;
+ INIT_LIST_HEAD(&entries.list);
+
+ cleanup_frame = opaque;
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+ "Failed to create local to "
+ "delete shards");
+ ret = -ENOMEM;
+ goto err;
+ }
+ cleanup_frame->local = local;
+ local->fop = GF_FOP_UNLINK;
+
+ local->xattr_req = dict_new();
+ if (!local->xattr_req) {
+ ret = -ENOMEM;
+ goto err;
+ }
+ local->deletion_rate = priv->deletion_rate;
+
+ ret = shard_resolve_internal_dir(this, local, SHARD_INTERNAL_DIR_DOT_SHARD);
+ if (ret == -ENOENT) {
+ gf_msg_debug(this->name, 0,
+ ".shard absent. Nothing to"
+ " delete. Exiting");
+ ret = 0;
+ goto err;
+ } else if (ret < 0) {
+ goto err;
+ }
+
+ ret = shard_resolve_internal_dir(this, local,
+ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
+ if (ret == -ENOENT) {
+ gf_msg_debug(this->name, 0,
+ ".remove_me absent. "
+ "Nothing to delete. Exiting");
+ ret = 0;
+ goto err;
+ } else if (ret < 0) {
+ goto err;
+ }
+
+ local->fd = fd_anonymous(local->dot_shard_rm_loc.inode);
+ if (!local->fd) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ for (;;) {
+ offset = 0;
+ LOCK(&priv->lock);
+ {
+ if (priv->bg_del_state == SHARD_BG_DELETION_LAUNCHING) {
+ priv->bg_del_state = SHARD_BG_DELETION_IN_PROGRESS;
+ } else if (priv->bg_del_state == SHARD_BG_DELETION_IN_PROGRESS) {
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
+ done = _gf_true;
+ }
+ }
+ UNLOCK(&priv->lock);
+ if (done)
+ break;
+ while (
+ (ret = syncop_readdirp(FIRST_CHILD(this), local->fd, 131072, offset,
+ &entries, local->xattr_req, NULL))) {
+ if (ret > 0)
+ ret = 0;
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ offset = entry->d_off;
+
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ continue;
+
+ if (!entry->inode) {
+ ret = shard_lookup_marker_entry(this, local, entry);
+ if (ret < 0)
+ continue;
+ }
+ link_inode = inode_link(entry->inode, local->fd->inode,
+ entry->d_name, &entry->d_stat);
+
+ gf_msg_debug(this->name, 0,
+ "Initiating deletion of "
+ "shards of gfid %s",
+ entry->d_name);
+ ret = shard_delete_shards_of_entry(cleanup_frame, this, entry,
+ link_inode);
+ inode_unlink(link_inode, local->fd->inode, entry->d_name);
+ inode_unref(link_inode);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret,
+ SHARD_MSG_SHARDS_DELETION_FAILED,
+ "Failed to clean up shards of gfid %s",
+ entry->d_name);
+ continue;
+ }
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ SHARD_MSG_SHARD_DELETION_COMPLETED,
+ "Deleted "
+ "shards of gfid=%s from backend",
+ entry->d_name);
+ }
+ gf_dirent_free(&entries);
+ if (ret)
+ break;
+ }
+ }
+ ret = 0;
+ loc_wipe(&loc);
+ return ret;
+
+err:
+ LOCK(&priv->lock);
+ {
+ priv->bg_del_state = SHARD_BG_DELETION_NONE;
+ }
+ UNLOCK(&priv->lock);
+ loc_wipe(&loc);
+ return ret;
+}
+
+int
+shard_unlock_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ if (op_ret)
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+ "Unlock failed. Please check brick logs for "
+ "more details");
+ SHARD_STACK_DESTROY(frame);
+ return 0;
+}
+
+int
+shard_unlock_inodelk(call_frame_t *frame, xlator_t *this)
+{
+ loc_t *loc = NULL;
+ call_frame_t *lk_frame = NULL;
+ shard_local_t *local = NULL;
+ shard_local_t *lk_local = NULL;
+ shard_inodelk_t *lock = NULL;
+
+ local = frame->local;
+ lk_frame = local->inodelk_frame;
+ lk_local = lk_frame->local;
+ local->inodelk_frame = NULL;
+ loc = &local->int_inodelk.loc;
+ lock = &lk_local->int_inodelk;
+ lock->flock.l_type = F_UNLCK;
+
+ STACK_WIND(lk_frame, shard_unlock_inodelk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk, lock->domain, loc, F_SETLK,
+ &lock->flock, NULL);
+ local->int_inodelk.acquired_lock = _gf_false;
+ return 0;
+}
+
+int
+shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata);
+int
+shard_rename_src_base_file(call_frame_t *frame, xlator_t *this)
+{
+ int ret = 0;
+ loc_t *dst_loc = NULL;
+ loc_t tmp_loc = {
+ 0,
+ };
+ shard_local_t *local = frame->local;
+
+ if (local->dst_block_size) {
+ tmp_loc.parent = inode_ref(local->loc2.parent);
+ ret = inode_path(tmp_loc.parent, local->loc2.name,
+ (char **)&tmp_loc.path);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+ "Inode path failed"
+ " on pargfid=%s bname=%s",
+ uuid_utoa(tmp_loc.parent->gfid), local->loc2.name);
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+
+ tmp_loc.name = strrchr(tmp_loc.path, '/');
+ if (tmp_loc.name)
+ tmp_loc.name++;
+ dst_loc = &tmp_loc;
+ } else {
+ dst_loc = &local->loc2;
+ }
+
+ /* To-Do: Request open-fd count on dst base file */
+ STACK_WIND(frame, shard_rename_src_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, &local->loc, dst_loc,
+ local->xattr_req);
+ loc_wipe(&tmp_loc);
+ return 0;
+err:
+ loc_wipe(&tmp_loc);
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+}
+
+int
+shard_unlink_base_file(call_frame_t *frame, xlator_t *this);
+
+int
+shard_set_size_attrs_on_marker_file_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ shard_priv_t *priv = NULL;
+ shard_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+ "Xattrop on marker file failed "
+ "while performing %s; entry gfid=%s",
+ gf_fop_string(local->fop), local->newloc.name);
+ goto err;
+ }
+
+ inode_unlink(local->newloc.inode, priv->dot_shard_rm_inode,
+ local->newloc.name);
+
+ if (local->fop == GF_FOP_UNLINK)
+ shard_unlink_base_file(frame, this);
+ else if (local->fop == GF_FOP_RENAME)
+ shard_rename_src_base_file(frame, this);
+ return 0;
+err:
+ shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
+ return 0;
+}
+
+int
+shard_set_size_attrs_on_marker_file(call_frame_t *frame, xlator_t *this)
+{
+ int op_errno = ENOMEM;
+ uint64_t bs = 0;
+ dict_t *xdata = NULL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+ xdata = dict_new();
+ if (!xdata)
+ goto err;
+
+ if (local->fop == GF_FOP_UNLINK)
+ bs = local->block_size;
+ else if (local->fop == GF_FOP_RENAME)
+ bs = local->dst_block_size;
+ SHARD_INODE_CREATE_INIT(this, bs, xdata, &local->newloc,
+ local->prebuf.ia_size, 0, err);
+ STACK_WIND(frame, shard_set_size_attrs_on_marker_file_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->xattrop,
+ &local->newloc, GF_XATTROP_GET_AND_SET, xdata, NULL);
+ dict_unref(xdata);
+ return 0;
+err:
+ if (xdata)
+ dict_unref(xdata);
+ shard_common_failure_unwind(local->fop, frame, -1, op_errno);
+ return 0;
+}
+
+int
+shard_lookup_marker_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ inode_t *linked_inode = NULL;
+ shard_priv_t *priv = NULL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ if (op_ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+ "Lookup on marker file failed "
+ "while performing %s; entry gfid=%s",
+ gf_fop_string(local->fop), local->newloc.name);
+ goto err;
+ }
+
+ linked_inode = inode_link(inode, priv->dot_shard_rm_inode,
+ local->newloc.name, buf);
+ inode_unref(local->newloc.inode);
+ local->newloc.inode = linked_inode;
+ shard_set_size_attrs_on_marker_file(frame, this);
+ return 0;
+err:
+ shard_common_failure_unwind(local->fop, frame, op_ret, op_errno);
+ return 0;
+}
+
+int
+shard_lookup_marker_file(call_frame_t *frame, xlator_t *this)
+{
+ int op_errno = ENOMEM;
+ dict_t *xattr_req = NULL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ xattr_req = shard_create_gfid_dict(local->xattr_req);
+ if (!xattr_req)
+ goto err;
+
+ STACK_WIND(frame, shard_lookup_marker_file_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &local->newloc, xattr_req);
+ dict_unref(xattr_req);
+ return 0;
+err:
+ shard_common_failure_unwind(local->fop, frame, -1, op_errno);
+ return 0;
+}
+
+int
+shard_create_marker_file_under_remove_me_cbk(
+ call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ inode_t *linked_inode = NULL;
+ shard_priv_t *priv = NULL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+ priv = this->private;
+
+ SHARD_UNSET_ROOT_FS_ID(frame, local);
+ if (op_ret < 0) {
+ if ((op_errno != EEXIST) && (op_errno != ENODATA)) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+ "Marker file creation "
+ "failed while performing %s; entry gfid=%s",
+ gf_fop_string(local->fop), local->newloc.name);
+ goto err;
+ } else {
+ shard_lookup_marker_file(frame, this);
+ return 0;
+ }
+ }
+
+ linked_inode = inode_link(inode, priv->dot_shard_rm_inode,
+ local->newloc.name, buf);
+ inode_unref(local->newloc.inode);
+ local->newloc.inode = linked_inode;
+
+ if (local->fop == GF_FOP_UNLINK)
+ shard_unlink_base_file(frame, this);
+ else if (local->fop == GF_FOP_RENAME)
+ shard_rename_src_base_file(frame, this);
+ return 0;
+err:
+ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+ return 0;
+}
+
+int
+shard_create_marker_file_under_remove_me(call_frame_t *frame, xlator_t *this,
+ loc_t *loc)
+{
+ int ret = 0;
+ int op_errno = ENOMEM;
+ uint64_t bs = 0;
+ char g1[64] = {
+ 0,
+ };
+ char g2[64] = {
+ 0,
+ };
+ dict_t *xattr_req = NULL;
+ shard_priv_t *priv = NULL;
+ shard_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ SHARD_SET_ROOT_FS_ID(frame, local);
+
+ xattr_req = shard_create_gfid_dict(local->xattr_req);
+ if (!xattr_req)
+ goto err;
+
+ local->newloc.inode = inode_new(this->itable);
+ local->newloc.parent = inode_ref(priv->dot_shard_rm_inode);
+ ret = inode_path(local->newloc.parent, uuid_utoa(loc->inode->gfid),
+ (char **)&local->newloc.path);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+ "Inode path failed on "
+ "pargfid=%s bname=%s",
+ uuid_utoa_r(priv->dot_shard_rm_gfid, g1),
+ uuid_utoa_r(loc->inode->gfid, g2));
+ goto err;
+ }
+ local->newloc.name = strrchr(local->newloc.path, '/');
+ if (local->newloc.name)
+ local->newloc.name++;
+
+ if (local->fop == GF_FOP_UNLINK)
+ bs = local->block_size;
+ else if (local->fop == GF_FOP_RENAME)
+ bs = local->dst_block_size;
+
+ SHARD_INODE_CREATE_INIT(this, bs, xattr_req, &local->newloc,
+ local->prebuf.ia_size, 0, err);
+
+ STACK_WIND(frame, shard_create_marker_file_under_remove_me_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mknod,
+ &local->newloc, 0, 0, 0644, xattr_req);
+ dict_unref(xattr_req);
+ return 0;
+
+err:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ shard_create_marker_file_under_remove_me_cbk(frame, 0, this, -1, op_errno,
+ NULL, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+shard_unlock_entrylk(call_frame_t *frame, xlator_t *this);
+
+int
+shard_unlink_base_file_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ int ret = 0;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ } else {
+ shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
+ local->preoldparent = *preparent;
+ local->postoldparent = *postparent;
+ if (xdata)
+ local->xattr_rsp = dict_ref(xdata);
+ if (local->cleanup_required)
+ shard_start_background_deletion(this);
+ }
+
+ if (local->entrylk_frame) {
+ ret = shard_unlock_entrylk(frame, this);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ }
+ }
+
+ ret = shard_unlock_inodelk(frame, this);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ }
+
+ shard_unlink_cbk(frame, this);
+ return 0;
+}
+
+int
+shard_unlink_base_file(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = frame->local;
+
+ /* To-Do: Request open-fd count on base file */
+ STACK_WIND(frame, shard_unlink_base_file_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &local->loc, local->xflag,
+ local->xattr_req);
+ return 0;
+}
+
+int
+shard_unlock_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ if (op_ret)
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SHARD_MSG_FOP_FAILED,
+ "Unlock failed. Please check brick logs for "
+ "more details");
+ SHARD_STACK_DESTROY(frame);
+ return 0;
+}
+
+int
+shard_unlock_entrylk(call_frame_t *frame, xlator_t *this)
+{
+ loc_t *loc = NULL;
+ call_frame_t *lk_frame = NULL;
+ shard_local_t *local = NULL;
+ shard_local_t *lk_local = NULL;
+ shard_entrylk_t *lock = NULL;
+
+ local = frame->local;
+ lk_frame = local->entrylk_frame;
+ lk_local = lk_frame->local;
+ local->entrylk_frame = NULL;
+ lock = &lk_local->int_entrylk;
+ loc = &lock->loc;
+
+ STACK_WIND(lk_frame, shard_unlock_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, loc,
+ lk_local->int_entrylk.basename, ENTRYLK_UNLOCK, ENTRYLK_WRLCK,
+ NULL);
+ local->int_entrylk.acquired_lock = _gf_false;
+ return 0;
+}
+
+int
+shard_post_entrylk_fop_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ switch (local->fop) {
+ case GF_FOP_UNLINK:
+ case GF_FOP_RENAME:
+ shard_create_marker_file_under_remove_me(frame, this,
+ &local->int_inodelk.loc);
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+ "post-entrylk handler not defined. This case should not"
+ " be hit");
+ break;
+ }
+ return 0;
+}
+
+int
+shard_acquire_entrylk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ call_frame_t *main_frame = NULL;
+ shard_local_t *local = NULL;
+ shard_local_t *main_local = NULL;
+
+ local = frame->local;
+ main_frame = local->main_frame;
+ main_local = main_frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(main_local->fop, main_frame, op_ret,
+ op_errno);
+ return 0;
+ }
+ main_local->int_entrylk.acquired_lock = _gf_true;
+ shard_post_entrylk_fop_handler(main_frame, this);
+ return 0;
+}
+
+int
+shard_acquire_entrylk(call_frame_t *frame, xlator_t *this, inode_t *inode,
+ uuid_t gfid)
+{
+ char gfid_str[GF_UUID_BUF_SIZE] = {
+ 0,
+ };
+ shard_local_t *local = NULL;
+ shard_local_t *entrylk_local = NULL;
+ shard_entrylk_t *int_entrylk = NULL;
+ call_frame_t *entrylk_frame = NULL;
+
+ local = frame->local;
+ entrylk_frame = create_frame(this, this->ctx->pool);
+ if (!entrylk_frame) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+ "Failed to create new frame "
+ "to lock marker file");
+ goto err;
+ }
+
+ entrylk_local = mem_get0(this->local_pool);
+ if (!entrylk_local) {
+ STACK_DESTROY(entrylk_frame->root);
+ goto err;
+ }
+
+ entrylk_frame->local = entrylk_local;
+ entrylk_local->main_frame = frame;
+ int_entrylk = &entrylk_local->int_entrylk;
+
+ int_entrylk->loc.inode = inode_ref(inode);
+ set_lk_owner_from_ptr(&entrylk_frame->root->lk_owner, entrylk_frame->root);
+ local->entrylk_frame = entrylk_frame;
+ gf_uuid_unparse(gfid, gfid_str);
+ int_entrylk->basename = gf_strdup(gfid_str);
+
+ STACK_WIND(entrylk_frame, shard_acquire_entrylk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->entrylk, this->name, &int_entrylk->loc,
+ int_entrylk->basename, ENTRYLK_LOCK, ENTRYLK_WRLCK, NULL);
+ return 0;
+err:
+ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_post_lookup_base_shard_rm_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+ return 0;
+ }
+
+ if (local->prebuf.ia_nlink > 1) {
+ gf_msg_debug(this->name, 0,
+ "link count on %s > 1:%d, "
+ "performing rename()/unlink()",
+ local->int_inodelk.loc.path, local->prebuf.ia_nlink);
+ if (local->fop == GF_FOP_RENAME)
+ shard_rename_src_base_file(frame, this);
+ else if (local->fop == GF_FOP_UNLINK)
+ shard_unlink_base_file(frame, this);
+ } else {
+ gf_msg_debug(this->name, 0,
+ "link count on %s = 1, creating "
+ "file under .remove_me",
+ local->int_inodelk.loc.path);
+ local->cleanup_required = _gf_true;
+ shard_acquire_entrylk(frame, this, priv->dot_shard_rm_inode,
+ local->prebuf.ia_gfid);
+ }
+ return 0;
+}
+
+int
+shard_post_inodelk_fop_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ switch (local->fop) {
+ case GF_FOP_UNLINK:
+ case GF_FOP_RENAME:
+ shard_refresh_base_file(frame, this, &local->int_inodelk.loc, NULL,
+ shard_post_lookup_base_shard_rm_handler);
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+ "post-inodelk handler not defined. This case should not"
+ " be hit");
+ break;
+ }
+ return 0;
+}
+
+int
+shard_acquire_inodelk_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ call_frame_t *main_frame = NULL;
+ shard_local_t *local = NULL;
+ shard_local_t *main_local = NULL;
+
+ local = frame->local;
+ main_frame = local->main_frame;
+ main_local = main_frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(main_local->fop, main_frame, op_ret,
+ op_errno);
+ return 0;
+ }
+ main_local->int_inodelk.acquired_lock = _gf_true;
+ shard_post_inodelk_fop_handler(main_frame, this);
+ return 0;
+}
+
+int
+shard_acquire_inodelk(call_frame_t *frame, xlator_t *this, loc_t *loc)
+{
+ call_frame_t *lk_frame = NULL;
+ shard_local_t *local = NULL;
+ shard_local_t *lk_local = NULL;
+ shard_inodelk_t *int_inodelk = NULL;
+
+ local = frame->local;
+ lk_frame = create_frame(this, this->ctx->pool);
+ if (!lk_frame) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, SHARD_MSG_MEMALLOC_FAILED,
+ "Failed to create new frame "
+ "to lock base shard");
+ goto err;
+ }
+ lk_local = mem_get0(this->local_pool);
+ if (!lk_local) {
+ STACK_DESTROY(lk_frame->root);
+ goto err;
+ }
+
+ lk_frame->local = lk_local;
+ lk_local->main_frame = frame;
+ int_inodelk = &lk_local->int_inodelk;
+
+ int_inodelk->flock.l_len = 0;
+ int_inodelk->flock.l_start = 0;
+ int_inodelk->domain = this->name;
+ int_inodelk->flock.l_type = F_WRLCK;
+ loc_copy(&local->int_inodelk.loc, loc);
+ set_lk_owner_from_ptr(&lk_frame->root->lk_owner, lk_frame->root);
+ local->inodelk_frame = lk_frame;
+
+ STACK_WIND(lk_frame, shard_acquire_inodelk_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->inodelk, int_inodelk->domain,
+ &local->int_inodelk.loc, F_SETLKW, &int_inodelk->flock, NULL);
+ return 0;
+err:
+ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_post_mkdir_rm_handler(call_frame_t *frame, xlator_t *this)
+{
+ loc_t *loc = NULL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+ return 0;
+ }
+ if (local->fop == GF_FOP_UNLINK)
+ loc = &local->loc;
+ else if (local->fop == GF_FOP_RENAME)
+ loc = &local->loc2;
+ shard_acquire_inodelk(frame, this, loc);
+ return 0;
+}
+
+int
+shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
+ shard_post_resolve_fop_handler_t handler,
+ shard_internal_dir_type_t type);
+int
+shard_pre_mkdir_rm_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, -1, local->op_errno);
+ return 0;
+ }
+ shard_mkdir_internal_dir(frame, this, shard_post_mkdir_rm_handler,
+ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
+ return 0;
+}
+
+void
+shard_begin_rm_resolution(call_frame_t *frame, xlator_t *this)
+{
+ shard_priv_t *priv = NULL;
+ shard_local_t *local = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ local->dot_shard_rm_loc.inode = inode_find(this->itable,
+ priv->dot_shard_rm_gfid);
+ if (!local->dot_shard_rm_loc.inode) {
+ local->dot_shard_loc.inode = inode_find(this->itable,
+ priv->dot_shard_gfid);
+ if (!local->dot_shard_loc.inode) {
+ shard_mkdir_internal_dir(frame, this, shard_pre_mkdir_rm_handler,
+ SHARD_INTERNAL_DIR_DOT_SHARD);
+ } else {
+ local->post_res_handler = shard_pre_mkdir_rm_handler;
+ shard_refresh_internal_dir(frame, this,
+ SHARD_INTERNAL_DIR_DOT_SHARD);
+ }
+ } else {
+ local->post_res_handler = shard_post_mkdir_rm_handler;
+ shard_refresh_internal_dir(frame, this,
+ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME);
+ }
+}
+
+int
+shard_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+ if ((ret) && (!IA_ISLNK(loc->inode->ia_type))) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block "
+ "size from inode ctx of %s",
+ uuid_utoa(loc->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ STACK_WIND(frame, default_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+ return 0;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ loc_copy(&local->loc, loc);
+ local->xflag = xflag;
+ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+ local->block_size = block_size;
+ local->resolver_base_inode = loc->inode;
+ local->fop = GF_FOP_UNLINK;
+ if (!this->itable)
+ this->itable = (local->loc.inode)->table;
+
+ local->resolve_not = _gf_true;
+ shard_begin_rm_resolution(frame, this);
+ return 0;
+err:
+ shard_common_failure_unwind(GF_FOP_UNLINK, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_post_rename_lookup_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_rename_cbk(frame, this);
+ return 0;
+}
+
+int
+shard_rename_src_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ int ret = 0;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto err;
+ }
+ /* Set ctx->refresh to TRUE to force a lookup on disk when
+ * shard_lookup_base_file() is called next to refresh the hard link
+ * count in ctx. Note that this is applicable only to the case where
+ * the rename dst is already existent and sharded.
+ */
+ if ((local->dst_block_size) && (!local->cleanup_required))
+ shard_inode_ctx_set_refresh_flag(local->int_inodelk.loc.inode, this);
+
+ local->prebuf = *buf;
+ local->preoldparent = *preoldparent;
+ local->postoldparent = *postoldparent;
+ local->prenewparent = *prenewparent;
+ local->postnewparent = *postnewparent;
+ if (xdata)
+ local->xattr_rsp = dict_ref(xdata);
+
+ if (local->dst_block_size) {
+ if (local->entrylk_frame) {
+ ret = shard_unlock_entrylk(frame, this);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ }
+ }
+
+ ret = shard_unlock_inodelk(frame, this);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = -ret;
+ goto err;
+ }
+ if (local->cleanup_required)
+ shard_start_background_deletion(this);
+ }
+
+ /* Now the base file of src, if sharded, is looked up to gather ia_size
+ * and ia_blocks.*/
+ if (local->block_size) {
+ local->tmp_loc.inode = inode_new(this->itable);
+ gf_uuid_copy(local->tmp_loc.gfid, (local->loc.inode)->gfid);
+ shard_refresh_base_file(frame, this, &local->tmp_loc, NULL,
+ shard_post_rename_lookup_handler);
+ } else {
+ shard_rename_cbk(frame, this);
+ }
+ return 0;
+err:
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+}
+
+int
+shard_post_lookup_dst_base_file_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ /* Save dst base file attributes into postbuf so the information is not
+ * lost when it is overwritten after lookup on base file of src in
+ * shard_lookup_base_file_cbk().
+ */
+ local->postbuf = local->prebuf;
+ shard_rename_src_base_file(frame, this);
+ return 0;
+}
+
+int
+shard_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+ uint64_t dst_block_size = 0;
+ shard_local_t *local = NULL;
+
+ if (IA_ISDIR(oldloc->inode->ia_type)) {
+ STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ return 0;
+ }
+
+ ret = shard_inode_ctx_get_block_size(oldloc->inode, this, &block_size);
+ if ((ret) && (!IA_ISLNK(oldloc->inode->ia_type))) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block "
+ "size from inode ctx of %s",
+ uuid_utoa(oldloc->inode->gfid));
+ goto err;
+ }
+
+ if (newloc->inode)
+ ret = shard_inode_ctx_get_block_size(newloc->inode, this,
+ &dst_block_size);
+
+ /* The following stack_wind covers the case where:
+ * a. the src file is not sharded and dst doesn't exist, OR
+ * b. the src and dst both exist but are not sharded.
+ */
+ if (((!block_size) && (!dst_block_size)) ||
+ frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ STACK_WIND(frame, default_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ return 0;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ loc_copy(&local->loc, oldloc);
+ loc_copy(&local->loc2, newloc);
+ local->resolver_base_inode = newloc->inode;
+ local->fop = GF_FOP_RENAME;
+ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+ if (!local->xattr_req)
+ goto err;
+
+ local->block_size = block_size;
+ local->dst_block_size = dst_block_size;
+ if (!this->itable)
+ this->itable = (local->loc.inode)->table;
+ local->resolve_not = _gf_true;
+
+ /* The following if-block covers the case where the dst file exists
+ * and is sharded.
+ */
+ if (local->dst_block_size) {
+ shard_begin_rm_resolution(frame, this);
+ } else {
+ /* The following block covers the case where the dst either doesn't
+ * exist or is NOT sharded but the src is sharded. In this case, shard
+ * xlator would go ahead and rename src to dst. Once done, it would also
+ * lookup the base shard of src to get the ia_size and ia_blocks xattr
+ * values.
+ */
+ shard_rename_src_base_file(frame, this);
+ }
+ return 0;
+
+err:
+ shard_common_failure_unwind(GF_FOP_RENAME, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int ret = -1;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret == -1)
+ goto unwind;
+
+ ret = shard_inode_ctx_set(inode, this, stbuf, local->block_size,
+ SHARD_ALL_MASK);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INODE_CTX_SET_FAILED,
+ "Failed to set inode "
+ "ctx for %s",
+ uuid_utoa(inode->gfid));
+
+unwind:
+ SHARD_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
+ preparent, postparent, xdata);
+ return 0;
+}
+
+int
+shard_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ shard_priv_t *priv = NULL;
+ shard_local_t *local = NULL;
+
+ priv = this->private;
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ local->block_size = priv->block_size;
+
+ if (!__is_gsyncd_on_shard_dir(frame, loc)) {
+ SHARD_INODE_CREATE_INIT(this, local->block_size, xdata, loc, 0, 0, err);
+ }
+
+ STACK_WIND(frame, shard_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ return 0;
+err:
+ shard_common_failure_unwind(GF_FOP_CREATE, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ /* To-Do: Handle open with O_TRUNC under locks */
+ SHARD_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata);
+ return 0;
+}
+
+int
+shard_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND(frame, shard_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+ return 0;
+}
+
+int
+shard_readv_do_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf, struct iobref *iobref,
+ dict_t *xdata)
+{
+ int i = 0;
+ int call_count = 0;
+ void *address = NULL;
+ uint64_t block_num = 0;
+ off_t off = 0;
+ struct iovec vec = {
+ 0,
+ };
+ shard_local_t *local = NULL;
+ fd_t *anon_fd = cookie;
+ shard_inode_ctx_t *ctx = NULL;
+
+ local = frame->local;
+
+ /* If shard has already seen a failure here before, there is no point
+ * in aggregating subsequent reads, so just go to out.
+ */
+ if (local->op_ret < 0)
+ goto out;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto out;
+ }
+
+ if (local->op_ret >= 0)
+ local->op_ret += op_ret;
+
+ shard_inode_ctx_get(anon_fd->inode, this, &ctx);
+ block_num = ctx->block_num;
+
+ if (block_num == local->first_block) {
+ address = local->iobuf->ptr;
+ } else {
+ /* else
+ * address to start writing to = beginning of buffer +
+ * number of bytes until end of first block +
+ * + block_size times number of blocks
+ * between the current block and the first
+ */
+ address = (char *)local->iobuf->ptr +
+ (local->block_size - (local->offset % local->block_size)) +
+ ((block_num - local->first_block - 1) * local->block_size);
+ }
+
+ for (i = 0; i < count; i++) {
+ address = (char *)address + off;
+ memcpy(address, vector[i].iov_base, vector[i].iov_len);
+ off += vector[i].iov_len;
+ }
+
+out:
+ if (anon_fd)
+ fd_unref(anon_fd);
+ call_count = shard_call_count_return(frame);
+ if (call_count == 0) {
+ SHARD_UNSET_ROOT_FS_ID(frame, local);
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+ local->op_errno);
+ } else {
+ if (xdata)
+ local->xattr_rsp = dict_ref(xdata);
+ vec.iov_base = local->iobuf->ptr;
+ if (local->offset + local->req_size > local->prebuf.ia_size)
+ local->total_size = local->prebuf.ia_size - local->offset;
+ vec.iov_len = local->total_size;
+ local->op_ret = local->total_size;
+ SHARD_STACK_UNWIND(readv, frame, local->op_ret, local->op_errno,
+ &vec, 1, &local->prebuf, local->iobref,
+ local->xattr_rsp);
+ return 0;
+ }
+ }
+
+ return 0;
+}
+
+int
+shard_readv_do(call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ int call_count = 0;
+ int last_block = 0;
+ int cur_block = 0;
+ off_t orig_offset = 0;
+ off_t shard_offset = 0;
+ size_t read_size = 0;
+ size_t remaining_size = 0;
+ fd_t *fd = NULL;
+ fd_t *anon_fd = NULL;
+ shard_local_t *local = NULL;
+ gf_boolean_t wind_failed = _gf_false;
+
+ local = frame->local;
+ fd = local->fd;
+
+ orig_offset = local->offset;
+ cur_block = local->first_block;
+ last_block = local->last_block;
+ remaining_size = local->total_size;
+ local->call_count = call_count = local->num_blocks;
+
+ SHARD_SET_ROOT_FS_ID(frame, local);
+
+ if (fd->flags & O_DIRECT)
+ local->flags = O_DIRECT;
+
+ while (cur_block <= last_block) {
+ if (wind_failed) {
+ shard_readv_do_cbk(frame, (void *)(long)0, this, -1, ENOMEM, NULL,
+ 0, NULL, NULL, NULL);
+ goto next;
+ }
+
+ shard_offset = orig_offset % local->block_size;
+ read_size = local->block_size - shard_offset;
+ if (read_size > remaining_size)
+ read_size = remaining_size;
+
+ remaining_size -= read_size;
+
+ if (cur_block == 0) {
+ anon_fd = fd_ref(fd);
+ } else {
+ anon_fd = fd_anonymous(local->inode_list[i]);
+ if (!anon_fd) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ wind_failed = _gf_true;
+ shard_readv_do_cbk(frame, (void *)(long)anon_fd, this, -1,
+ ENOMEM, NULL, 0, NULL, NULL, NULL);
+ goto next;
+ }
+ }
+
+ STACK_WIND_COOKIE(frame, shard_readv_do_cbk, anon_fd, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, anon_fd, read_size,
+ shard_offset, local->flags, local->xattr_req);
+
+ orig_offset += read_size;
+ next:
+ cur_block++;
+ i++;
+ call_count--;
+ }
+ return 0;
+}
+
+int
+shard_common_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int shard_block_num = (long)cookie;
+ int call_count = 0;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ if (op_errno == EEXIST) {
+ LOCK(&frame->lock);
+ {
+ local->eexist_count++;
+ }
+ UNLOCK(&frame->lock);
+ } else {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ }
+ gf_msg_debug(this->name, 0,
+ "mknod of shard %d "
+ "failed: %s",
+ shard_block_num, strerror(op_errno));
+ goto done;
+ }
+
+ shard_link_block_inode(local, shard_block_num, inode, buf);
+
+done:
+ call_count = shard_call_count_return(frame);
+ if (call_count == 0) {
+ SHARD_UNSET_ROOT_FS_ID(frame, local);
+ local->create_count = 0;
+ local->post_mknod_handler(frame, this);
+ }
+
+ return 0;
+}
+
+int
+shard_common_resume_mknod(call_frame_t *frame, xlator_t *this,
+ shard_post_mknod_fop_handler_t post_mknod_handler)
+{
+ int i = 0;
+ int shard_idx_iter = 0;
+ int last_block = 0;
+ int ret = 0;
+ int call_count = 0;
+ char path[PATH_MAX] = {
+ 0,
+ };
+ mode_t mode = 0;
+ char *bname = NULL;
+ shard_priv_t *priv = NULL;
+ shard_inode_ctx_t ctx_tmp = {
+ 0,
+ };
+ shard_local_t *local = NULL;
+ gf_boolean_t wind_failed = _gf_false;
+ fd_t *fd = NULL;
+ loc_t loc = {
+ 0,
+ };
+ dict_t *xattr_req = NULL;
+
+ local = frame->local;
+ priv = this->private;
+ fd = local->fd;
+ shard_idx_iter = local->first_block;
+ last_block = local->last_block;
+ call_count = local->call_count = local->create_count;
+ local->post_mknod_handler = post_mknod_handler;
+
+ SHARD_SET_ROOT_FS_ID(frame, local);
+
+ ret = shard_inode_ctx_get_all(fd->inode, this, &ctx_tmp);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get inode "
+ "ctx for %s",
+ uuid_utoa(fd->inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+ mode = st_mode_from_ia(ctx_tmp.stat.ia_prot, ctx_tmp.stat.ia_type);
+
+ while (shard_idx_iter <= last_block) {
+ if (local->inode_list[i]) {
+ shard_idx_iter++;
+ i++;
+ continue;
+ }
+
+ if (wind_failed) {
+ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this,
+ -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
+ goto next;
+ }
+
+ shard_make_block_abspath(shard_idx_iter, fd->inode->gfid, path,
+ sizeof(path));
+
+ xattr_req = shard_create_gfid_dict(local->xattr_req);
+ if (!xattr_req) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ wind_failed = _gf_true;
+ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this,
+ -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
+ goto next;
+ }
+
+ bname = strrchr(path, '/') + 1;
+ loc.inode = inode_new(this->itable);
+ loc.parent = inode_ref(priv->dot_shard_inode);
+ ret = inode_path(loc.parent, bname, (char **)&(loc.path));
+ if (ret < 0 || !(loc.inode)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_PATH_FAILED,
+ "Inode path failed"
+ "on %s, base file gfid = %s",
+ bname, uuid_utoa(fd->inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ wind_failed = _gf_true;
+ loc_wipe(&loc);
+ dict_unref(xattr_req);
+ shard_common_mknod_cbk(frame, (void *)(long)shard_idx_iter, this,
+ -1, ENOMEM, NULL, NULL, NULL, NULL, NULL);
+ goto next;
+ }
+
+ loc.name = strrchr(loc.path, '/');
+ if (loc.name)
+ loc.name++;
+
+ STACK_WIND_COOKIE(frame, shard_common_mknod_cbk,
+ (void *)(long)shard_idx_iter, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, &loc, mode,
+ ctx_tmp.stat.ia_rdev, 0, xattr_req);
+ loc_wipe(&loc);
+ dict_unref(xattr_req);
+
+ next:
+ shard_idx_iter++;
+ i++;
+ if (!--call_count)
+ break;
+ }
+
+ return 0;
+err:
+ /*
+ * This block is for handling failure in shard_inode_ctx_get_all().
+ * Failures in the while-loop are handled within the loop.
+ */
+ SHARD_UNSET_ROOT_FS_ID(frame, local);
+ post_mknod_handler(frame, this);
+ return 0;
+}
+
+int
+shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this);
+
+int
+shard_post_lookup_shards_readv_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ if (local->create_count) {
+ shard_common_resume_mknod(frame, this, shard_post_mknod_readv_handler);
+ } else {
+ shard_readv_do(frame, this);
+ }
+
+ return 0;
+}
+
+int
+shard_post_mknod_readv_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ if (!local->eexist_count) {
+ shard_readv_do(frame, this);
+ } else {
+ local->call_count = local->eexist_count;
+ shard_common_lookup_shards(frame, this, local->loc.inode,
+ shard_post_lookup_shards_readv_handler);
+ }
+ return 0;
+}
+
+int
+shard_post_resolve_readv_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ if (local->op_errno != ENOENT) {
+ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ } else {
+ struct iovec vec = {
+ 0,
+ };
+
+ vec.iov_base = local->iobuf->ptr;
+ vec.iov_len = local->total_size;
+ local->op_ret = local->total_size;
+ SHARD_STACK_UNWIND(readv, frame, local->op_ret, 0, &vec, 1,
+ &local->prebuf, local->iobref, NULL);
+ return 0;
+ }
+ }
+
+ if (local->call_count) {
+ shard_common_lookup_shards(frame, this, local->resolver_base_inode,
+ shard_post_lookup_shards_readv_handler);
+ } else {
+ shard_readv_do(frame, this);
+ }
+
+ return 0;
+}
+
+int
+shard_post_lookup_readv_handler(call_frame_t *frame, xlator_t *this)
+{
+ int ret = 0;
+ struct iobuf *iobuf = NULL;
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(GF_FOP_READ, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ if (local->offset >= local->prebuf.ia_size) {
+ /* If the read is being performed past the end of the file,
+ * unwind the FOP with 0 bytes read as status.
+ */
+ struct iovec vec = {
+ 0,
+ };
+
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, local->req_size);
+ if (!iobuf)
+ goto err;
+
+ vec.iov_base = iobuf->ptr;
+ vec.iov_len = 0;
+ local->iobref = iobref_new();
+ iobref_add(local->iobref, iobuf);
+ iobuf_unref(iobuf);
+
+ SHARD_STACK_UNWIND(readv, frame, 0, 0, &vec, 1, &local->prebuf,
+ local->iobref, NULL);
+ return 0;
+ }
+
+ local->first_block = get_lowest_block(local->offset, local->block_size);
+
+ local->total_size = local->req_size;
+
+ local->last_block = get_highest_block(local->offset, local->total_size,
+ local->block_size);
+
+ local->num_blocks = local->last_block - local->first_block + 1;
+ GF_ASSERT(local->num_blocks > 0);
+ local->resolver_base_inode = local->loc.inode;
+
+ local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
+ gf_shard_mt_inode_list);
+ if (!local->inode_list)
+ goto err;
+
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, local->total_size);
+ if (!iobuf)
+ goto err;
+
+ local->iobref = iobref_new();
+ if (!local->iobref) {
+ iobuf_unref(iobuf);
+ goto err;
+ }
+
+ if (iobref_add(local->iobref, iobuf) != 0) {
+ iobuf_unref(iobuf);
+ goto err;
+ }
+
+ memset(iobuf->ptr, 0, local->total_size);
+ iobuf_unref(iobuf);
+ local->iobuf = iobuf;
+
+ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
+ if (!local->dot_shard_loc.inode) {
+ ret = shard_init_internal_dir_loc(this, local,
+ SHARD_INTERNAL_DIR_DOT_SHARD);
+ if (ret)
+ goto err;
+ shard_lookup_internal_dir(frame, this, shard_post_resolve_readv_handler,
+ SHARD_INTERNAL_DIR_DOT_SHARD);
+ } else {
+ local->post_res_handler = shard_post_resolve_readv_handler;
+ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+ }
+ return 0;
+err:
+ shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int ret = 0;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block "
+ "size for %s from its inode ctx",
+ uuid_utoa(fd->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ /* block_size = 0 means that the file was created before
+ * sharding was enabled on the volume.
+ */
+ STACK_WIND(frame, default_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags,
+ xdata);
+ return 0;
+ }
+
+ if (!this->itable)
+ this->itable = fd->inode->table;
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ ret = syncbarrier_init(&local->barrier);
+ if (ret)
+ goto err;
+ local->fd = fd_ref(fd);
+ local->block_size = block_size;
+ local->offset = offset;
+ local->req_size = size;
+ local->flags = flags;
+ local->fop = GF_FOP_READ;
+ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+ if (!local->xattr_req)
+ goto err;
+
+ local->loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_readv_handler);
+ return 0;
+err:
+ shard_common_failure_unwind(GF_FOP_READ, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_common_inode_write_post_update_size_handler(call_frame_t *frame,
+ xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ } else {
+ shard_common_inode_write_success_unwind(local->fop, frame,
+ local->written_size);
+ }
+ return 0;
+}
+
+static gf_boolean_t
+shard_is_appending_write(shard_local_t *local)
+{
+ if (local->fop != GF_FOP_WRITE)
+ return _gf_false;
+ if (local->flags & O_APPEND)
+ return _gf_true;
+ if (local->fd->flags & O_APPEND)
+ return _gf_true;
+ return _gf_false;
+}
+
+int
+__shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
+ xlator_t *this)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get(inode, this, &ctx_uint);
+ if (ret < 0)
+ return ret;
+
+ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+ if (shard_is_appending_write(local)) {
+ local->delta_size = local->total_size;
+ } else if (local->offset + local->total_size > ctx->stat.ia_size) {
+ local->delta_size = (local->offset + local->total_size) -
+ ctx->stat.ia_size;
+ } else {
+ local->delta_size = 0;
+ }
+ ctx->stat.ia_size += (local->delta_size);
+ local->postbuf = ctx->stat;
+
+ return 0;
+}
+
+int
+shard_get_delta_size_from_inode_ctx(shard_local_t *local, inode_t *inode,
+ xlator_t *this)
+{
+ int ret = -1;
+
+ LOCK(&inode->lock);
+ {
+ ret = __shard_get_delta_size_from_inode_ctx(local, inode, this);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+int
+shard_common_inode_write_do_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ int call_count = 0;
+ fd_t *anon_fd = cookie;
+ shard_local_t *local = NULL;
+ glusterfs_fop_t fop = 0;
+
+ local = frame->local;
+ fop = local->fop;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ } else {
+ local->written_size += op_ret;
+ GF_ATOMIC_ADD(local->delta_blocks,
+ post->ia_blocks - pre->ia_blocks);
+ local->delta_size += (post->ia_size - pre->ia_size);
+ shard_inode_ctx_set(local->fd->inode, this, post, 0,
+ SHARD_MASK_TIMES);
+ if (local->fd->inode != anon_fd->inode)
+ shard_inode_ctx_add_to_fsync_list(local->fd->inode, this,
+ anon_fd->inode);
+ }
+ }
+ UNLOCK(&frame->lock);
+
+ if (anon_fd)
+ fd_unref(anon_fd);
+
+ call_count = shard_call_count_return(frame);
+ if (call_count == 0) {
+ SHARD_UNSET_ROOT_FS_ID(frame, local);
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(fop, frame, local->op_ret,
+ local->op_errno);
+ } else {
+ shard_get_delta_size_from_inode_ctx(local, local->fd->inode, this);
+ local->hole_size = 0;
+ if (xdata)
+ local->xattr_rsp = dict_ref(xdata);
+ shard_update_file_size(
+ frame, this, local->fd, NULL,
+ shard_common_inode_write_post_update_size_handler);
+ }
+ }
+
+ return 0;
+}
+
+int
+shard_common_inode_write_wind(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vec, int count, off_t shard_offset,
+ size_t size)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ switch (local->fop) {
+ case GF_FOP_WRITE:
+ STACK_WIND_COOKIE(
+ frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vec, count, shard_offset,
+ local->flags, local->iobref, local->xattr_req);
+ break;
+ case GF_FOP_FALLOCATE:
+ STACK_WIND_COOKIE(
+ frame, shard_common_inode_write_do_cbk, fd, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, local->flags,
+ shard_offset, size, local->xattr_req);
+ break;
+ case GF_FOP_ZEROFILL:
+ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd,
+ shard_offset, size, local->xattr_req);
+ break;
+ case GF_FOP_DISCARD:
+ STACK_WIND_COOKIE(frame, shard_common_inode_write_do_cbk, fd,
+ FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd,
+ shard_offset, size, local->xattr_req);
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+ "Invalid fop id = %d", local->fop);
+ break;
+ }
+ return 0;
+}
+
+int
+shard_common_inode_write_do(call_frame_t *frame, xlator_t *this)
+{
+ int i = 0;
+ int count = 0;
+ int call_count = 0;
+ int last_block = 0;
+ uint32_t cur_block = 0;
+ fd_t *fd = NULL;
+ fd_t *anon_fd = NULL;
+ shard_local_t *local = NULL;
+ struct iovec *vec = NULL;
+ gf_boolean_t wind_failed = _gf_false;
+ gf_boolean_t odirect = _gf_false;
+ off_t orig_offset = 0;
+ off_t shard_offset = 0;
+ off_t vec_offset = 0;
+ size_t remaining_size = 0;
+ size_t shard_write_size = 0;
+
+ local = frame->local;
+ fd = local->fd;
+
+ orig_offset = local->offset;
+ remaining_size = local->total_size;
+ cur_block = local->first_block;
+ local->call_count = call_count = local->num_blocks;
+ last_block = local->last_block;
+
+ SHARD_SET_ROOT_FS_ID(frame, local);
+
+ if (dict_set_uint32(local->xattr_req, GLUSTERFS_WRITE_UPDATE_ATOMIC, 4)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to set " GLUSTERFS_WRITE_UPDATE_ATOMIC
+ " into "
+ "dict: %s",
+ uuid_utoa(fd->inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ local->call_count = 1;
+ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1,
+ ENOMEM, NULL, NULL, NULL);
+ return 0;
+ }
+
+ if ((fd->flags & O_DIRECT) && (local->fop == GF_FOP_WRITE))
+ odirect = _gf_true;
+
+ while (cur_block <= last_block) {
+ if (wind_failed) {
+ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this, -1,
+ ENOMEM, NULL, NULL, NULL);
+ goto next;
+ }
+
+ shard_offset = orig_offset % local->block_size;
+ shard_write_size = local->block_size - shard_offset;
+ if (shard_write_size > remaining_size)
+ shard_write_size = remaining_size;
+
+ remaining_size -= shard_write_size;
+
+ if (local->fop == GF_FOP_WRITE) {
+ vec = NULL;
+ count = iov_subset(local->vector, local->count, vec_offset,
+ shard_write_size, &vec, 0);
+ if (count < 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ wind_failed = _gf_true;
+ shard_common_inode_write_do_cbk(frame, (void *)(long)0, this,
+ -1, ENOMEM, NULL, NULL, NULL);
+ goto next;
+ }
+ }
+
+ if (cur_block == 0) {
+ anon_fd = fd_ref(fd);
+ } else {
+ anon_fd = fd_anonymous(local->inode_list[i]);
+ if (!anon_fd) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ wind_failed = _gf_true;
+ GF_FREE(vec);
+ shard_common_inode_write_do_cbk(frame, (void *)(long)anon_fd,
+ this, -1, ENOMEM, NULL, NULL,
+ NULL);
+ goto next;
+ }
+
+ if (local->fop == GF_FOP_WRITE) {
+ if (odirect)
+ local->flags = O_DIRECT;
+ else
+ local->flags = GF_ANON_FD_FLAGS;
+ }
+ }
+
+ shard_common_inode_write_wind(frame, this, anon_fd, vec, count,
+ shard_offset, shard_write_size);
+ if (vec)
+ vec_offset += shard_write_size;
+ orig_offset += shard_write_size;
+ GF_FREE(vec);
+ vec = NULL;
+ next:
+ cur_block++;
+ i++;
+ call_count--;
+ }
+ return 0;
+}
+
+int
+shard_common_inode_write_post_mknod_handler(call_frame_t *frame,
+ xlator_t *this);
+
+int
+shard_common_inode_write_post_lookup_shards_handler(call_frame_t *frame,
+ xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ if (local->create_count) {
+ shard_common_resume_mknod(frame, this,
+ shard_common_inode_write_post_mknod_handler);
+ } else {
+ shard_common_inode_write_do(frame, this);
+ }
+
+ return 0;
+}
+
+int
+shard_common_inode_write_post_mknod_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ if (!local->eexist_count) {
+ shard_common_inode_write_do(frame, this);
+ } else {
+ local->call_count = local->eexist_count;
+ shard_common_lookup_shards(
+ frame, this, local->loc.inode,
+ shard_common_inode_write_post_lookup_shards_handler);
+ }
+
+ return 0;
+}
+
+int
+shard_common_inode_write_post_resolve_handler(call_frame_t *frame,
+ xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ if (local->call_count) {
+ shard_common_lookup_shards(
+ frame, this, local->resolver_base_inode,
+ shard_common_inode_write_post_lookup_shards_handler);
+ } else if (local->create_count) {
+ shard_common_inode_write_post_lookup_shards_handler(frame, this);
+ } else {
+ shard_common_inode_write_do(frame, this);
+ }
+
+ return 0;
+}
+
+int
+shard_common_inode_write_post_lookup_handler(call_frame_t *frame,
+ xlator_t *this)
+{
+ shard_local_t *local = frame->local;
+ shard_priv_t *priv = this->private;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ local->postbuf = local->prebuf;
+
+ /*Adjust offset to EOF so that correct shard is chosen for append*/
+ if (shard_is_appending_write(local))
+ local->offset = local->prebuf.ia_size;
+
+ local->first_block = get_lowest_block(local->offset, local->block_size);
+ local->last_block = get_highest_block(local->offset, local->total_size,
+ local->block_size);
+ local->num_blocks = local->last_block - local->first_block + 1;
+ GF_ASSERT(local->num_blocks > 0);
+ local->inode_list = GF_CALLOC(local->num_blocks, sizeof(inode_t *),
+ gf_shard_mt_inode_list);
+ if (!local->inode_list) {
+ shard_common_failure_unwind(local->fop, frame, -1, ENOMEM);
+ return 0;
+ }
+
+ gf_msg_trace(this->name, 0,
+ "%s: gfid=%s first_block=%" PRIu64
+ " "
+ "last_block=%" PRIu64 " num_blocks=%" PRIu64 " offset=%" PRId64
+ " total_size=%zu flags=%" PRId32 "",
+ gf_fop_list[local->fop],
+ uuid_utoa(local->resolver_base_inode->gfid),
+ local->first_block, local->last_block, local->num_blocks,
+ local->offset, local->total_size, local->flags);
+
+ local->dot_shard_loc.inode = inode_find(this->itable, priv->dot_shard_gfid);
+
+ if (!local->dot_shard_loc.inode) {
+ /*change handler*/
+ shard_mkdir_internal_dir(frame, this,
+ shard_common_inode_write_post_resolve_handler,
+ SHARD_INTERNAL_DIR_DOT_SHARD);
+ } else {
+ /*change handler*/
+ local->post_res_handler = shard_common_inode_write_post_resolve_handler;
+ shard_refresh_internal_dir(frame, this, SHARD_INTERNAL_DIR_DOT_SHARD);
+ }
+ return 0;
+}
+
+int
+shard_mkdir_internal_dir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ inode_t *link_inode = NULL;
+ shard_local_t *local = NULL;
+ shard_internal_dir_type_t type = (shard_internal_dir_type_t)cookie;
+
+ local = frame->local;
+
+ SHARD_UNSET_ROOT_FS_ID(frame, local);
+
+ if (op_ret == -1) {
+ if (op_errno != EEXIST) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ } else {
+ gf_msg_debug(this->name, 0,
+ "mkdir on %s failed "
+ "with EEXIST. Attempting lookup now",
+ shard_internal_dir_string(type));
+ shard_lookup_internal_dir(frame, this, local->post_res_handler,
+ type);
+ return 0;
+ }
+ }
+
+ link_inode = shard_link_internal_dir_inode(local, inode, buf, type);
+ if (link_inode != inode) {
+ shard_refresh_internal_dir(frame, this, type);
+ } else {
+ shard_inode_ctx_mark_dir_refreshed(link_inode, this);
+ shard_common_resolve_shards(frame, this, local->post_res_handler);
+ }
+ return 0;
+unwind:
+ shard_common_resolve_shards(frame, this, local->post_res_handler);
+ return 0;
+}
+
+int
+shard_mkdir_internal_dir(call_frame_t *frame, xlator_t *this,
+ shard_post_resolve_fop_handler_t handler,
+ shard_internal_dir_type_t type)
+{
+ int ret = -1;
+ shard_local_t *local = NULL;
+ shard_priv_t *priv = NULL;
+ dict_t *xattr_req = NULL;
+ uuid_t *gfid = NULL;
+ loc_t *loc = NULL;
+ gf_boolean_t free_gfid = _gf_true;
+
+ local = frame->local;
+ priv = this->private;
+
+ local->post_res_handler = handler;
+ gfid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!gfid)
+ goto err;
+
+ switch (type) {
+ case SHARD_INTERNAL_DIR_DOT_SHARD:
+ gf_uuid_copy(*gfid, priv->dot_shard_gfid);
+ loc = &local->dot_shard_loc;
+ break;
+ case SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME:
+ gf_uuid_copy(*gfid, priv->dot_shard_rm_gfid);
+ loc = &local->dot_shard_rm_loc;
+ break;
+ default:
+ bzero(*gfid, sizeof(uuid_t));
+ break;
+ }
+
+ xattr_req = dict_new();
+ if (!xattr_req)
+ goto err;
+
+ ret = shard_init_internal_dir_loc(this, local, type);
+ if (ret)
+ goto err;
+
+ ret = dict_set_gfuuid(xattr_req, "gfid-req", *gfid, false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_DICT_OP_FAILED,
+ "Failed to set gfid-req for %s",
+ shard_internal_dir_string(type));
+ goto err;
+ } else {
+ free_gfid = _gf_false;
+ }
+
+ SHARD_SET_ROOT_FS_ID(frame, local);
+
+ STACK_WIND_COOKIE(frame, shard_mkdir_internal_dir_cbk, (void *)(long)type,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir, loc,
+ 0755, 0, xattr_req);
+ dict_unref(xattr_req);
+ return 0;
+
+err:
+ if (xattr_req)
+ dict_unref(xattr_req);
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ if (free_gfid)
+ GF_FREE(gfid);
+ handler(frame, this);
+ return 0;
+}
+
+int
+shard_flush_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ /* To-Do: Wind flush on all shards of the file */
+ SHARD_STACK_UNWIND(flush, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+int
+shard_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ STACK_WIND(frame, shard_flush_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->flush, fd, xdata);
+ return 0;
+}
+
+int
+__shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
+ xlator_t *this)
+{
+ int ret = -1;
+ uint64_t ctx_uint = 0;
+ shard_inode_ctx_t *ctx = NULL;
+
+ ret = __inode_ctx_get(inode, this, &ctx_uint);
+ if (ret < 0)
+ return ret;
+
+ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+ local->postbuf.ia_ctime = ctx->stat.ia_ctime;
+ local->postbuf.ia_ctime_nsec = ctx->stat.ia_ctime_nsec;
+ local->postbuf.ia_atime = ctx->stat.ia_atime;
+ local->postbuf.ia_atime_nsec = ctx->stat.ia_atime_nsec;
+ local->postbuf.ia_mtime = ctx->stat.ia_mtime;
+ local->postbuf.ia_mtime_nsec = ctx->stat.ia_mtime_nsec;
+
+ return 0;
+}
+
+int
+shard_get_timestamps_from_inode_ctx(shard_local_t *local, inode_t *inode,
+ xlator_t *this)
+{
+ int ret = 0;
+
+ LOCK(&inode->lock);
+ {
+ ret = __shard_get_timestamps_from_inode_ctx(local, inode, this);
+ }
+ UNLOCK(&inode->lock);
+
+ return ret;
+}
+
+int
+shard_fsync_shards_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ int call_count = 0;
+ uint64_t fsync_count = 0;
+ fd_t *anon_fd = cookie;
+ shard_local_t *local = NULL;
+ shard_inode_ctx_t *ctx = NULL;
+ shard_inode_ctx_t *base_ictx = NULL;
+ inode_t *base_inode = NULL;
+ gf_boolean_t unref_shard_inode = _gf_false;
+
+ local = frame->local;
+ base_inode = local->fd->inode;
+
+ if (local->op_ret < 0)
+ goto out;
+
+ LOCK(&frame->lock);
+ {
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ UNLOCK(&frame->lock);
+ goto out;
+ }
+ shard_inode_ctx_set(local->fd->inode, this, postbuf, 0,
+ SHARD_MASK_TIMES);
+ }
+ UNLOCK(&frame->lock);
+ fd_ctx_get(anon_fd, this, &fsync_count);
+out:
+ if (anon_fd && (base_inode != anon_fd->inode)) {
+ LOCK(&base_inode->lock);
+ LOCK(&anon_fd->inode->lock);
+ {
+ __shard_inode_ctx_get(anon_fd->inode, this, &ctx);
+ __shard_inode_ctx_get(base_inode, this, &base_ictx);
+ if (op_ret == 0)
+ ctx->fsync_needed -= fsync_count;
+ GF_ASSERT(ctx->fsync_needed >= 0);
+ if (ctx->fsync_needed != 0) {
+ list_add_tail(&ctx->to_fsync_list, &base_ictx->to_fsync_list);
+ base_ictx->fsync_count++;
+ } else {
+ unref_shard_inode = _gf_true;
+ }
+ }
+ UNLOCK(&anon_fd->inode->lock);
+ UNLOCK(&base_inode->lock);
+ }
+
+ if (unref_shard_inode)
+ inode_unref(anon_fd->inode);
+ if (anon_fd)
+ fd_unref(anon_fd);
+
+ call_count = shard_call_count_return(frame);
+ if (call_count != 0)
+ return 0;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
+ local->op_errno);
+ } else {
+ shard_get_timestamps_from_inode_ctx(local, base_inode, this);
+ SHARD_STACK_UNWIND(fsync, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->postbuf, local->xattr_rsp);
+ }
+ return 0;
+}
+
+int
+shard_post_lookup_fsync_handler(call_frame_t *frame, xlator_t *this)
+{
+ int ret = 0;
+ int call_count = 0;
+ int fsync_count = 0;
+ fd_t *anon_fd = NULL;
+ inode_t *base_inode = NULL;
+ shard_local_t *local = NULL;
+ shard_inode_ctx_t *ctx = NULL;
+ shard_inode_ctx_t *iter = NULL;
+ struct list_head copy = {
+ 0,
+ };
+ shard_inode_ctx_t *tmp = NULL;
+
+ local = frame->local;
+ base_inode = local->fd->inode;
+ local->postbuf = local->prebuf;
+ INIT_LIST_HEAD(&copy);
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(GF_FOP_FSYNC, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ LOCK(&base_inode->lock);
+ {
+ __shard_inode_ctx_get(base_inode, this, &ctx);
+ list_splice_init(&ctx->to_fsync_list, &copy);
+ call_count = ctx->fsync_count;
+ ctx->fsync_count = 0;
+ }
+ UNLOCK(&base_inode->lock);
+
+ local->call_count = ++call_count;
+
+ /* Send fsync() on the base shard first */
+ anon_fd = fd_ref(local->fd);
+ STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, anon_fd, local->datasync,
+ local->xattr_req);
+ call_count--;
+ anon_fd = NULL;
+
+ list_for_each_entry_safe(iter, tmp, &copy, to_fsync_list)
+ {
+ list_del_init(&iter->to_fsync_list);
+ fsync_count = 0;
+ shard_inode_ctx_get_fsync_count(iter->inode, this, &fsync_count);
+ GF_ASSERT(fsync_count > 0);
+ anon_fd = fd_anonymous(iter->inode);
+ if (!anon_fd) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+ SHARD_MSG_MEMALLOC_FAILED,
+ "Failed to create "
+ "anon fd to fsync shard");
+ shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1,
+ ENOMEM, NULL, NULL, NULL);
+ continue;
+ }
+
+ ret = fd_ctx_set(anon_fd, this, fsync_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_FD_CTX_SET_FAILED,
+ "Failed to set fd "
+ "ctx for shard inode gfid=%s",
+ uuid_utoa(iter->inode->gfid));
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ shard_fsync_shards_cbk(frame, (void *)(long)anon_fd, this, -1,
+ ENOMEM, NULL, NULL, NULL);
+ continue;
+ }
+ STACK_WIND_COOKIE(frame, shard_fsync_shards_cbk, anon_fd,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->fsync,
+ anon_fd, local->datasync, local->xattr_req);
+ call_count--;
+ }
+
+ return 0;
+}
+
+int
+shard_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ int ret = 0;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block "
+ "size for %s from its inode ctx",
+ uuid_utoa(fd->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ STACK_WIND(frame, default_fsync_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ return 0;
+ }
+
+ if (!this->itable)
+ this->itable = fd->inode->table;
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ local->fd = fd_ref(fd);
+ local->fop = GF_FOP_FSYNC;
+ local->datasync = datasync;
+ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+ if (!local->xattr_req)
+ goto err;
+
+ local->loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_post_lookup_fsync_handler);
+ return 0;
+err:
+ shard_common_failure_unwind(GF_FOP_FSYNC, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_readdir_past_dot_shard_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, gf_dirent_t *orig_entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list)
+ {
+ list_del_init(&entry->list);
+ list_add_tail(&entry->list, &local->entries_head.list);
+
+ if (!entry->dict)
+ continue;
+
+ if (IA_ISDIR(entry->d_stat.ia_type))
+ continue;
+
+ if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE))
+ shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
+ if (!entry->inode)
+ continue;
+
+ shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
+ }
+ local->op_ret += op_ret;
+
+unwind:
+ if (local->fop == GF_FOP_READDIR)
+ SHARD_STACK_UNWIND(readdir, frame, local->op_ret, local->op_errno,
+ &local->entries_head, xdata);
+ else
+ SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno,
+ &local->entries_head, xdata);
+ return 0;
+}
+
+int32_t
+shard_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *orig_entries,
+ dict_t *xdata)
+{
+ fd_t *fd = NULL;
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmp = NULL;
+ shard_local_t *local = NULL;
+ gf_boolean_t last_entry = _gf_false;
+
+ local = frame->local;
+ fd = local->fd;
+
+ if (op_ret < 0)
+ goto unwind;
+
+ list_for_each_entry_safe(entry, tmp, (&orig_entries->list), list)
+ {
+ if (last_entry)
+ last_entry = _gf_false;
+
+ if (__is_root_gfid(fd->inode->gfid) &&
+ !(strcmp(entry->d_name, GF_SHARD_DIR))) {
+ local->offset = entry->d_off;
+ op_ret--;
+ last_entry = _gf_true;
+ continue;
+ }
+
+ list_del_init(&entry->list);
+ list_add_tail(&entry->list, &local->entries_head.list);
+
+ if (!entry->dict)
+ continue;
+
+ if (IA_ISDIR(entry->d_stat.ia_type))
+ continue;
+
+ if (dict_get(entry->dict, GF_XATTR_SHARD_FILE_SIZE) &&
+ frame->root->pid != GF_CLIENT_PID_GSYNCD)
+ shard_modify_size_and_block_count(&entry->d_stat, entry->dict);
+
+ if (!entry->inode)
+ continue;
+
+ shard_inode_ctx_update(entry->inode, this, entry->dict, &entry->d_stat);
+ }
+
+ local->op_ret = op_ret;
+
+ if (last_entry) {
+ if (local->fop == GF_FOP_READDIR)
+ STACK_WIND(frame, shard_readdir_past_dot_shard_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir,
+ local->fd, local->readdir_size, local->offset,
+ local->xattr_req);
+ else
+ STACK_WIND(frame, shard_readdir_past_dot_shard_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdirp,
+ local->fd, local->readdir_size, local->offset,
+ local->xattr_req);
+ return 0;
+ }
+
+unwind:
+ if (local->fop == GF_FOP_READDIR)
+ SHARD_STACK_UNWIND(readdir, frame, op_ret, op_errno,
+ &local->entries_head, xdata);
+ else
+ SHARD_STACK_UNWIND(readdirp, frame, op_ret, op_errno,
+ &local->entries_head, xdata);
+ return 0;
+}
+
+int
+shard_readdir_do(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, int whichop, dict_t *xdata)
+{
+ int ret = 0;
+ shard_local_t *local = NULL;
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ goto err;
+ }
+
+ frame->local = local;
+
+ local->fd = fd_ref(fd);
+ local->fop = whichop;
+ local->readdir_size = size;
+ INIT_LIST_HEAD(&local->entries_head.list);
+ local->list_inited = _gf_true;
+
+ if (whichop == GF_FOP_READDIR) {
+ STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, offset, xdata);
+ } else {
+ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
+ local, err);
+ ret = dict_set_uint64(local->xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, 0);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to set "
+ "dict value: key:%s, directory gfid=%s",
+ GF_XATTR_SHARD_BLOCK_SIZE, uuid_utoa(fd->inode->gfid));
+ goto err;
+ }
+
+ STACK_WIND(frame, shard_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, offset,
+ local->xattr_req);
+ }
+
+ return 0;
+
+err:
+ STACK_UNWIND_STRICT(readdir, frame, -1, ENOMEM, NULL, NULL);
+ return 0;
+}
+
+int32_t
+shard_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIR, xdata);
+ return 0;
+}
+
+int32_t
+shard_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ shard_readdir_do(frame, this, fd, size, offset, GF_FOP_READDIRP, xdata);
+ return 0;
+}
+
+int32_t
+shard_modify_and_set_iatt_in_dict(dict_t *xdata, shard_local_t *local,
+ char *key)
+{
+ int ret = 0;
+ struct iatt *tmpbuf = NULL;
+ struct iatt *stbuf = NULL;
+ data_t *data = NULL;
+
+ if (!xdata)
+ return 0;
+
+ data = dict_get(xdata, key);
+ if (!data)
+ return 0;
+
+ tmpbuf = data_to_iatt(data, key);
+ stbuf = GF_MALLOC(sizeof(struct iatt), gf_common_mt_char);
+ if (stbuf == NULL) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+ *stbuf = *tmpbuf;
+ stbuf->ia_size = local->prebuf.ia_size;
+ stbuf->ia_blocks = local->prebuf.ia_blocks;
+ ret = dict_set_iatt(xdata, key, stbuf, false);
+ if (ret < 0) {
+ local->op_ret = -1;
+ local->op_errno = ENOMEM;
+ goto err;
+ }
+ return 0;
+
+err:
+ GF_FREE(stbuf);
+ return -1;
+}
+
+int32_t
+shard_common_remove_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int ret = -1;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT);
+ if (ret < 0)
+ goto err;
+
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT);
+ if (ret < 0)
+ goto err;
+
+ if (local->fd)
+ SHARD_STACK_UNWIND(fremovexattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ else
+ SHARD_STACK_UNWIND(removexattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ return 0;
+
+err:
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+}
+
+int32_t
+shard_post_lookup_remove_xattr_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ if (local->fd)
+ STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, local->fd,
+ local->name, local->xattr_req);
+ else
+ STACK_WIND(frame, shard_common_remove_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, &local->loc,
+ local->name, local->xattr_req);
+ return 0;
+}
+
+int32_t
+shard_common_remove_xattr(call_frame_t *frame, xlator_t *this,
+ glusterfs_fop_t fop, loc_t *loc, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int ret = -1;
+ int op_errno = ENOMEM;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+ inode_t *inode = loc ? loc->inode : fd->inode;
+
+ if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
+ xdata);
+ return 0;
+ }
+
+ /* If shard's special xattrs are attempted to be removed,
+ * fail the fop with EPERM (except if the client is gsyncd).
+ */
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+ GF_IF_NATIVE_XATTR_GOTO(SHARD_XATTR_PREFIX "*", name, op_errno, err);
+ }
+
+ /* Repeat the same check for bulk-removexattr */
+ if (xdata && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+ dict_del(xdata, GF_XATTR_SHARD_BLOCK_SIZE);
+ dict_del(xdata, GF_XATTR_SHARD_FILE_SIZE);
+ }
+
+ ret = shard_inode_ctx_get_block_size(inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block size from inode ctx of %s",
+ uuid_utoa(inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name,
+ xdata);
+ return 0;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ local->fop = fop;
+ if (loc) {
+ if (loc_copy(&local->loc, loc) != 0)
+ goto err;
+ }
+
+ if (fd) {
+ local->fd = fd_ref(fd);
+ local->loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ }
+
+ if (name) {
+ local->name = gf_strdup(name);
+ if (!local->name)
+ goto err;
+ }
+
+ if (xdata)
+ local->xattr_req = dict_ref(xdata);
+
+ shard_refresh_base_file(frame, this, loc, fd,
+ shard_post_lookup_remove_xattr_handler);
+ return 0;
+err:
+ shard_common_failure_unwind(fop, frame, -1, op_errno);
+ return 0;
+}
+
+int32_t
+shard_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ shard_common_remove_xattr(frame, this, GF_FOP_REMOVEXATTR, loc, NULL, name,
+ xdata);
+ return 0;
+}
+
+int32_t
+shard_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ shard_common_remove_xattr(frame, this, GF_FOP_FREMOVEXATTR, NULL, fd, name,
+ xdata);
+ return 0;
+}
+
+int32_t
+shard_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ if (op_ret < 0)
+ goto unwind;
+
+ if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+ dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
+ dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
+ }
+
+unwind:
+ SHARD_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+shard_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ int op_errno = EINVAL;
+
+ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
+ (!strncmp(name, SHARD_XATTR_PREFIX, SLEN(SHARD_XATTR_PREFIX)))) {
+ op_errno = ENODATA;
+ goto out;
+ }
+
+ STACK_WIND(frame, shard_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
+out:
+ shard_common_failure_unwind(GF_FOP_FGETXATTR, frame, -1, op_errno);
+ return 0;
+}
+
+int32_t
+shard_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ if (op_ret < 0)
+ goto unwind;
+
+ if (dict && (frame->root->pid != GF_CLIENT_PID_GSYNCD)) {
+ dict_del(dict, GF_XATTR_SHARD_BLOCK_SIZE);
+ dict_del(dict, GF_XATTR_SHARD_FILE_SIZE);
+ }
+
+unwind:
+ SHARD_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+int32_t
+shard_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int op_errno = EINVAL;
+
+ if ((frame->root->pid != GF_CLIENT_PID_GSYNCD) && (name) &&
+ (!strncmp(name, SHARD_XATTR_PREFIX, sizeof(SHARD_XATTR_PREFIX) - 1))) {
+ op_errno = ENODATA;
+ goto out;
+ }
+
+ STACK_WIND(frame, shard_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+out:
+ shard_common_failure_unwind(GF_FOP_GETXATTR, frame, -1, op_errno);
+ return 0;
+}
+
+int32_t
+shard_common_set_xattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ int ret = -1;
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto err;
+ }
+
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_PRESTAT);
+ if (ret < 0)
+ goto err;
+
+ ret = shard_modify_and_set_iatt_in_dict(xdata, local, GF_POSTSTAT);
+ if (ret < 0)
+ goto err;
+
+ if (local->fd)
+ SHARD_STACK_UNWIND(fsetxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ else
+ SHARD_STACK_UNWIND(setxattr, frame, local->op_ret, local->op_errno,
+ xdata);
+ return 0;
+
+err:
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+}
+
+int32_t
+shard_post_lookup_set_xattr_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->op_ret < 0) {
+ shard_common_failure_unwind(local->fop, frame, local->op_ret,
+ local->op_errno);
+ return 0;
+ }
+
+ if (local->fd)
+ STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, local->fd,
+ local->xattr_req, local->flags, local->xattr_rsp);
+ else
+ STACK_WIND(frame, shard_common_set_xattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, &local->loc,
+ local->xattr_req, local->flags, local->xattr_rsp);
+ return 0;
+}
+
+int32_t
+shard_common_set_xattr(call_frame_t *frame, xlator_t *this, glusterfs_fop_t fop,
+ loc_t *loc, fd_t *fd, dict_t *dict, int32_t flags,
+ dict_t *xdata)
+{
+ int ret = -1;
+ int op_errno = ENOMEM;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+ inode_t *inode = loc ? loc->inode : fd->inode;
+
+ if ((IA_ISDIR(inode->ia_type)) || (IA_ISLNK(inode->ia_type))) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
+ return 0;
+ }
+
+ /* Sharded or not, if shard's special xattrs are attempted to be set,
+ * fail the fop with EPERM (except if the client is gsyncd.
+ */
+ if (frame->root->pid != GF_CLIENT_PID_GSYNCD) {
+ GF_IF_INTERNAL_XATTR_GOTO(SHARD_XATTR_PREFIX "*", dict, op_errno, err);
+ }
+
+ ret = shard_inode_ctx_get_block_size(inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block size from inode ctx of %s",
+ uuid_utoa(inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ if (loc)
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ else
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
+ return 0;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+ local->fop = fop;
+ if (loc) {
+ if (loc_copy(&local->loc, loc) != 0)
+ goto err;
+ }
+
+ if (fd) {
+ local->fd = fd_ref(fd);
+ local->loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+ }
+ local->flags = flags;
+ /* Reusing local->xattr_req and local->xattr_rsp to store the setxattr dict
+ * and the xdata dict
+ */
+ if (dict)
+ local->xattr_req = dict_ref(dict);
+ if (xdata)
+ local->xattr_rsp = dict_ref(xdata);
+
+ shard_refresh_base_file(frame, this, loc, fd,
+ shard_post_lookup_set_xattr_handler);
+ return 0;
+err:
+ shard_common_failure_unwind(fop, frame, -1, op_errno);
+ return 0;
+}
+
+int32_t
+shard_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ shard_common_set_xattr(frame, this, GF_FOP_FSETXATTR, NULL, fd, dict, flags,
+ xdata);
+ return 0;
+}
+
+int32_t
+shard_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ shard_common_set_xattr(frame, this, GF_FOP_SETXATTR, loc, NULL, dict, flags,
+ xdata);
+ return 0;
+}
+
+int
+shard_post_setattr_handler(call_frame_t *frame, xlator_t *this)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (local->fop == GF_FOP_SETATTR) {
+ if (local->op_ret >= 0)
+ shard_inode_ctx_set(local->loc.inode, this, &local->postbuf, 0,
+ SHARD_LOOKUP_MASK);
+ SHARD_STACK_UNWIND(setattr, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->postbuf, local->xattr_rsp);
+ } else if (local->fop == GF_FOP_FSETATTR) {
+ if (local->op_ret >= 0)
+ shard_inode_ctx_set(local->fd->inode, this, &local->postbuf, 0,
+ SHARD_LOOKUP_MASK);
+ SHARD_STACK_UNWIND(fsetattr, frame, local->op_ret, local->op_errno,
+ &local->prebuf, &local->postbuf, local->xattr_rsp);
+ }
+
+ return 0;
+}
+
+int
+shard_common_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
+{
+ shard_local_t *local = NULL;
+
+ local = frame->local;
+
+ if (op_ret < 0) {
+ local->op_ret = op_ret;
+ local->op_errno = op_errno;
+ goto unwind;
+ }
+
+ local->prebuf = *prebuf;
+ if (shard_modify_size_and_block_count(&local->prebuf, xdata)) {
+ local->op_ret = -1;
+ local->op_errno = EINVAL;
+ goto unwind;
+ }
+ if (xdata)
+ local->xattr_rsp = dict_ref(xdata);
+ local->postbuf = *postbuf;
+ local->postbuf.ia_size = local->prebuf.ia_size;
+ local->postbuf.ia_blocks = local->prebuf.ia_blocks;
+
+unwind:
+ local->handler(frame, this);
+ return 0;
+}
+
+int
+shard_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ if ((IA_ISDIR(loc->inode->ia_type)) || (IA_ISLNK(loc->inode->ia_type))) {
+ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
+ }
+
+ ret = shard_inode_ctx_get_block_size(loc->inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block size from inode ctx of %s",
+ uuid_utoa(loc->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ STACK_WIND(frame, default_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+ return 0;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ local->handler = shard_post_setattr_handler;
+ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+ if (!local->xattr_req)
+ goto err;
+ local->fop = GF_FOP_SETATTR;
+ loc_copy(&local->loc, loc);
+
+ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, local->loc.gfid,
+ local, err);
+
+ STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid,
+ local->xattr_req);
+ return 0;
+err:
+ shard_common_failure_unwind(GF_FOP_SETATTR, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int ret = -1;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ if ((IA_ISDIR(fd->inode->ia_type)) || (IA_ISLNK(fd->inode->ia_type))) {
+ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
+ }
+
+ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block size from inode ctx of %s",
+ uuid_utoa(fd->inode->gfid));
+ goto err;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ STACK_WIND(frame, default_fsetattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+ return 0;
+ }
+
+ if (!this->itable)
+ this->itable = fd->inode->table;
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto err;
+
+ frame->local = local;
+
+ local->handler = shard_post_setattr_handler;
+ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+ if (!local->xattr_req)
+ goto err;
+ local->fop = GF_FOP_FSETATTR;
+ local->fd = fd_ref(fd);
+
+ SHARD_MD_READ_FOP_INIT_REQ_DICT(this, local->xattr_req, fd->inode->gfid,
+ local, err);
+
+ STACK_WIND(frame, shard_common_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid,
+ local->xattr_req);
+ return 0;
+err:
+ shard_common_failure_unwind(GF_FOP_FSETATTR, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_common_inode_write_begin(call_frame_t *frame, xlator_t *this,
+ glusterfs_fop_t fop, fd_t *fd,
+ struct iovec *vector, int32_t count,
+ off_t offset, uint32_t flags, size_t len,
+ struct iobref *iobref, dict_t *xdata)
+{
+ int ret = 0;
+ int i = 0;
+ uint64_t block_size = 0;
+ shard_local_t *local = NULL;
+
+ ret = shard_inode_ctx_get_block_size(fd->inode, this, &block_size);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INODE_CTX_GET_FAILED,
+ "Failed to get block "
+ "size for %s from its inode ctx",
+ uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+ if (!block_size || frame->root->pid == GF_CLIENT_PID_GSYNCD) {
+ /* block_size = 0 means that the file was created before
+ * sharding was enabled on the volume.
+ */
+ switch (fop) {
+ case GF_FOP_WRITE:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector,
+ count, offset, flags, iobref, xdata);
+ break;
+ case GF_FOP_FALLOCATE:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, flags,
+ offset, len, xdata);
+ break;
+ case GF_FOP_ZEROFILL:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset,
+ len, xdata);
+ break;
+ case GF_FOP_DISCARD:
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset,
+ len, xdata);
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_INVALID_FOP,
+ "Invalid fop id = %d", fop);
+ break;
+ }
+ return 0;
+ }
+
+ if (!this->itable)
+ this->itable = fd->inode->table;
+
+ local = mem_get0(this->local_pool);
+ if (!local)
+ goto out;
+
+ frame->local = local;
+
+ ret = syncbarrier_init(&local->barrier);
+ if (ret)
+ goto out;
+ local->xattr_req = (xdata) ? dict_ref(xdata) : dict_new();
+ if (!local->xattr_req)
+ goto out;
+
+ if (vector) {
+ local->vector = iov_dup(vector, count);
+ if (!local->vector)
+ goto out;
+ for (i = 0; i < count; i++)
+ local->total_size += vector[i].iov_len;
+ local->count = count;
+ } else {
+ local->total_size = len;
+ }
+
+ local->fop = fop;
+ local->offset = offset;
+ local->flags = flags;
+ if (iobref)
+ local->iobref = iobref_ref(iobref);
+ local->fd = fd_ref(fd);
+ local->block_size = block_size;
+ local->resolver_base_inode = local->fd->inode;
+ GF_ATOMIC_INIT(local->delta_blocks, 0);
+
+ local->loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->loc.gfid, fd->inode->gfid);
+
+ shard_refresh_base_file(frame, this, NULL, fd,
+ shard_common_inode_write_post_lookup_handler);
+ return 0;
+out:
+ shard_common_failure_unwind(fop, frame, -1, ENOMEM);
+ return 0;
+}
+
+int
+shard_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t offset, uint32_t flags,
+ struct iobref *iobref, dict_t *xdata)
+{
+ shard_common_inode_write_begin(frame, this, GF_FOP_WRITE, fd, vector, count,
+ offset, flags, 0, iobref, xdata);
+ return 0;
+}
+
+int
+shard_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len, dict_t *xdata)
+{
+ if ((keep_size != 0) && (keep_size != FALLOC_FL_ZERO_RANGE) &&
+ (keep_size != (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)))
+ goto out;
+
+ shard_common_inode_write_begin(frame, this, GF_FOP_FALLOCATE, fd, NULL, 0,
+ offset, keep_size, len, NULL, xdata);
+ return 0;
+out:
+ shard_common_failure_unwind(GF_FOP_FALLOCATE, frame, -1, ENOTSUP);
+ return 0;
+}
+
+int
+shard_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ shard_common_inode_write_begin(frame, this, GF_FOP_ZEROFILL, fd, NULL, 0,
+ offset, 0, len, NULL, xdata);
+ return 0;
+}
+
+int
+shard_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ shard_common_inode_write_begin(frame, this, GF_FOP_DISCARD, fd, NULL, 0,
+ offset, 0, len, NULL, xdata);
+ return 0;
+}
+
+int32_t
+shard_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ /* TBD */
+ gf_msg(this->name, GF_LOG_INFO, ENOTSUP, SHARD_MSG_FOP_NOT_SUPPORTED,
+ "seek called on %s.", uuid_utoa(fd->inode->gfid));
+ shard_common_failure_unwind(GF_FOP_SEEK, frame, -1, ENOTSUP);
+ return 0;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_shard_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_MEM_ACCT_INIT_FAILED,
+ "Memory accounting init"
+ "failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int
+init(xlator_t *this)
+{
+ int ret = -1;
+ shard_priv_t *priv = NULL;
+
+ if (!this) {
+ gf_msg("shard", GF_LOG_ERROR, 0, SHARD_MSG_NULL_THIS,
+ "this is NULL. init() failed");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
+ "Dangling volume. Check volfile");
+ goto out;
+ }
+
+ if (!this->children || this->children->next) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SHARD_MSG_INVALID_VOLFILE,
+ "shard not configured with exactly one sub-volume. "
+ "Check volfile");
+ goto out;
+ }
+
+ priv = GF_CALLOC(1, sizeof(shard_priv_t), gf_shard_mt_priv_t);
+ if (!priv)
+ goto out;
+
+ GF_OPTION_INIT("shard-block-size", priv->block_size, size_uint64, out);
+
+ GF_OPTION_INIT("shard-deletion-rate", priv->deletion_rate, uint32, out);
+
+ GF_OPTION_INIT("shard-lru-limit", priv->lru_limit, uint64, out);
+
+ this->local_pool = mem_pool_new(shard_local_t, 128);
+ if (!this->local_pool) {
+ ret = -1;
+ goto out;
+ }
+ gf_uuid_parse(SHARD_ROOT_GFID, priv->dot_shard_gfid);
+ gf_uuid_parse(DOT_SHARD_REMOVE_ME_GFID, priv->dot_shard_rm_gfid);
+
+ this->private = priv;
+ LOCK_INIT(&priv->lock);
+ INIT_LIST_HEAD(&priv->ilist_head);
+ ret = 0;
+out:
+ if (ret) {
+ GF_FREE(priv);
+ mem_pool_destroy(this->local_pool);
+ }
+
+ return ret;
+}
+
+void
+fini(xlator_t *this)
+{
+ shard_priv_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("shard", this, out);
+
+ /*Itable was not created by shard, hence setting to NULL.*/
+ this->itable = NULL;
+
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+
+ priv = this->private;
+ if (!priv)
+ goto out;
+
+ this->private = NULL;
+ LOCK_DESTROY(&priv->lock);
+ GF_FREE(priv);
+
+out:
+ return;
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ int ret = -1;
+ shard_priv_t *priv = NULL;
+
+ priv = this->private;
+
+ GF_OPTION_RECONF("shard-block-size", priv->block_size, options, size, out);
+
+ GF_OPTION_RECONF("shard-deletion-rate", priv->deletion_rate, options,
+ uint32, out);
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+shard_forget(xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx_uint = 0;
+ shard_inode_ctx_t *ctx = NULL;
+ shard_priv_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv)
+ return 0;
+
+ inode_ctx_del(inode, this, &ctx_uint);
+ if (!ctx_uint)
+ return 0;
+
+ ctx = (shard_inode_ctx_t *)(uintptr_t)ctx_uint;
+
+ /* When LRU limit reaches inode will be forcefully removed from the
+ * table, inode needs to be removed from LRU of shard as well.
+ */
+ if (!list_empty(&ctx->ilist)) {
+ LOCK(&priv->lock);
+ {
+ list_del_init(&ctx->ilist);
+ priv->inode_count--;
+ }
+ UNLOCK(&priv->lock);
+ }
+ GF_FREE(ctx);
+
+ return 0;
+}
+
+int
+shard_release(xlator_t *this, fd_t *fd)
+{
+ /* TBD */
+ return 0;
+}
+
+int
+shard_priv_dump(xlator_t *this)
+{
+ shard_priv_t *priv = NULL;
+ char key_prefix[GF_DUMP_MAX_BUF_LEN] = {
+ 0,
+ };
+ char *str = NULL;
+
+ priv = this->private;
+
+ snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.%s", this->type, this->name);
+ gf_proc_dump_add_section("%s", key_prefix);
+ str = gf_uint64_2human_readable(priv->block_size);
+ gf_proc_dump_write("shard-block-size", "%s", str);
+ gf_proc_dump_write("inode-count", "%d", priv->inode_count);
+ gf_proc_dump_write("ilist_head", "%p", &priv->ilist_head);
+ gf_proc_dump_write("lru-max-limit", "%" PRIu64, priv->lru_limit);
+
+ GF_FREE(str);
+
+ return 0;
+}
+
+int
+shard_releasedir(xlator_t *this, fd_t *fd)
+{
+ return 0;
+}
+
+struct xlator_fops fops = {
+ .lookup = shard_lookup,
+ .open = shard_open,
+ .flush = shard_flush,
+ .fsync = shard_fsync,
+ .stat = shard_stat,
+ .fstat = shard_fstat,
+ .getxattr = shard_getxattr,
+ .fgetxattr = shard_fgetxattr,
+ .readv = shard_readv,
+ .writev = shard_writev,
+ .truncate = shard_truncate,
+ .ftruncate = shard_ftruncate,
+ .setxattr = shard_setxattr,
+ .fsetxattr = shard_fsetxattr,
+ .setattr = shard_setattr,
+ .fsetattr = shard_fsetattr,
+ .removexattr = shard_removexattr,
+ .fremovexattr = shard_fremovexattr,
+ .fallocate = shard_fallocate,
+ .discard = shard_discard,
+ .zerofill = shard_zerofill,
+ .readdir = shard_readdir,
+ .readdirp = shard_readdirp,
+ .create = shard_create,
+ .mknod = shard_mknod,
+ .link = shard_link,
+ .unlink = shard_unlink,
+ .rename = shard_rename,
+ .seek = shard_seek,
+};
+
+struct xlator_cbks cbks = {
+ .forget = shard_forget,
+ .release = shard_release,
+ .releasedir = shard_releasedir,
+};
+
+struct xlator_dumpops dumpops = {
+ .priv = shard_priv_dump,
+};
+
+struct volume_options options[] = {
+ {
+ .key = {"shard"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "enable/disable shard",
+ .op_version = {GD_OP_VERSION_6_0},
+ .flags = OPT_FLAG_SETTABLE,
+ },
+ {
+ .key = {"shard-block-size"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"shard"},
+ .default_value = "64MB",
+ .min = SHARD_MIN_BLOCK_SIZE,
+ .max = SHARD_MAX_BLOCK_SIZE,
+ .description = "The size unit used to break a file into multiple "
+ "chunks",
+ },
+ {
+ .key = {"shard-deletion-rate"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {GD_OP_VERSION_5_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"shard"},
+ .default_value = "100",
+ .min = 100,
+ .max = INT_MAX,
+ .description = "The number of shards to send deletes on at a time",
+ },
+ {
+ .key = {"shard-lru-limit"},
+ .type = GF_OPTION_TYPE_INT,
+ .op_version = {GD_OP_VERSION_5_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT,
+ .tags = {"shard"},
+ .default_value = "16384",
+ .min = 20,
+ .max = INT_MAX,
+ .description = "The number of resolved shard inodes to keep in "
+ "memory. A higher number means shards that are "
+ "resolved will remain in memory longer, avoiding "
+ "frequent lookups on them when they participate in "
+ "file operations. The option also has a bearing on "
+ "amount of memory consumed by these inodes and their "
+ "internal metadata",
+ },
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "shard",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/shard/src/shard.h b/xlators/features/shard/src/shard.h
new file mode 100644
index 00000000000..4fe181b64d5
--- /dev/null
+++ b/xlators/features/shard/src/shard.h
@@ -0,0 +1,348 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __SHARD_H__
+#define __SHARD_H__
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/compat-errno.h>
+#include "shard-messages.h"
+#include <glusterfs/syncop.h>
+
+#define GF_SHARD_DIR ".shard"
+#define GF_SHARD_REMOVE_ME_DIR ".remove_me"
+#define SHARD_MIN_BLOCK_SIZE (4 * GF_UNIT_MB)
+#define SHARD_MAX_BLOCK_SIZE (4 * GF_UNIT_TB)
+#define SHARD_XATTR_PREFIX "trusted.glusterfs.shard."
+#define GF_XATTR_SHARD_BLOCK_SIZE "trusted.glusterfs.shard.block-size"
+/**
+ * Bit masks for the valid flag, which is used while updating ctx
+ **/
+#define SHARD_MASK_BLOCK_SIZE (1 << 0)
+#define SHARD_MASK_PROT (1 << 1)
+#define SHARD_MASK_NLINK (1 << 2)
+#define SHARD_MASK_UID (1 << 3)
+#define SHARD_MASK_GID (1 << 4)
+#define SHARD_MASK_SIZE (1 << 6)
+#define SHARD_MASK_BLOCKS (1 << 7)
+#define SHARD_MASK_TIMES (1 << 8)
+#define SHARD_MASK_OTHERS (1 << 9)
+#define SHARD_MASK_REFRESH_RESET (1 << 10)
+
+#define SHARD_INODE_WRITE_MASK \
+ (SHARD_MASK_SIZE | SHARD_MASK_BLOCKS | SHARD_MASK_TIMES)
+
+#define SHARD_LOOKUP_MASK \
+ (SHARD_MASK_PROT | SHARD_MASK_NLINK | SHARD_MASK_UID | SHARD_MASK_GID | \
+ SHARD_MASK_TIMES | SHARD_MASK_OTHERS)
+
+#define SHARD_ALL_MASK \
+ (SHARD_MASK_BLOCK_SIZE | SHARD_MASK_PROT | SHARD_MASK_NLINK | \
+ SHARD_MASK_UID | SHARD_MASK_GID | SHARD_MASK_SIZE | SHARD_MASK_BLOCKS | \
+ SHARD_MASK_TIMES | SHARD_MASK_OTHERS)
+
+#define get_lowest_block(off, shard_size) ((off) / (shard_size))
+#define get_highest_block(off, len, shard_size) \
+ (((((off) + (len)) == 0) ? 0 : ((off) + (len)-1)) / (shard_size))
+
+int
+shard_unlock_inodelk(call_frame_t *frame, xlator_t *this);
+
+int
+shard_unlock_entrylk(call_frame_t *frame, xlator_t *this);
+
+#define SHARD_ENTRY_FOP_CHECK(loc, op_errno, label) \
+ do { \
+ if ((loc->name && !strcmp(GF_SHARD_DIR, loc->name)) && \
+ (((loc->parent) && __is_root_gfid(loc->parent->gfid)) || \
+ __is_root_gfid(loc->pargfid))) { \
+ op_errno = EPERM; \
+ goto label; \
+ } \
+ \
+ if ((loc->parent && __is_shard_dir(loc->parent->gfid)) || \
+ __is_shard_dir(loc->pargfid)) { \
+ op_errno = EPERM; \
+ goto label; \
+ } \
+ } while (0)
+
+#define SHARD_INODE_OP_CHECK(gfid, err, label) \
+ do { \
+ if (__is_shard_dir(gfid)) { \
+ err = EPERM; \
+ goto label; \
+ } \
+ } while (0)
+
+#define SHARD_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ shard_local_t *__local = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ if (__local && __local->int_inodelk.acquired_lock) \
+ shard_unlock_inodelk(frame, frame->this); \
+ if (__local && __local->int_entrylk.acquired_lock) \
+ shard_unlock_entrylk(frame, frame->this); \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ if (__local) { \
+ shard_local_wipe(__local); \
+ mem_put(__local); \
+ } \
+ } while (0)
+
+#define SHARD_STACK_DESTROY(frame) \
+ do { \
+ shard_local_t *__local = NULL; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY(frame->root); \
+ if (__local) { \
+ shard_local_wipe(__local); \
+ mem_put(__local); \
+ } \
+ } while (0);
+
+#define SHARD_INODE_CREATE_INIT(this, block_size, xattr_req, loc, size, \
+ block_count, label) \
+ do { \
+ int __ret = -1; \
+ int64_t *__size_attr = NULL; \
+ uint64_t *__bs = 0; \
+ \
+ __bs = GF_MALLOC(sizeof(uint64_t), gf_shard_mt_uint64_t); \
+ if (!__bs) \
+ goto label; \
+ *__bs = hton64(block_size); \
+ __ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_BLOCK_SIZE, __bs, \
+ sizeof(*__bs)); \
+ if (__ret) { \
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, \
+ "Failed to set key: %s " \
+ "on path %s", \
+ GF_XATTR_SHARD_BLOCK_SIZE, (loc)->path); \
+ GF_FREE(__bs); \
+ goto label; \
+ } \
+ \
+ __ret = shard_set_size_attrs(size, block_count, &__size_attr); \
+ if (__ret) \
+ goto label; \
+ \
+ __ret = dict_set_bin(xattr_req, GF_XATTR_SHARD_FILE_SIZE, __size_attr, \
+ 8 * 4); \
+ if (__ret) { \
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, \
+ "Failed to set key: %s " \
+ "on path %s", \
+ GF_XATTR_SHARD_FILE_SIZE, (loc)->path); \
+ GF_FREE(__size_attr); \
+ goto label; \
+ } \
+ } while (0)
+
+#define SHARD_MD_READ_FOP_INIT_REQ_DICT(this, dict, gfid, local, label) \
+ do { \
+ int __ret = -1; \
+ \
+ __ret = dict_set_uint64(dict, GF_XATTR_SHARD_FILE_SIZE, 8 * 4); \
+ if (__ret) { \
+ local->op_ret = -1; \
+ local->op_errno = ENOMEM; \
+ gf_msg(this->name, GF_LOG_WARNING, 0, SHARD_MSG_DICT_OP_FAILED, \
+ "Failed to set dict value:" \
+ " key:%s for %s.", \
+ GF_XATTR_SHARD_FILE_SIZE, uuid_utoa(gfid)); \
+ goto label; \
+ } \
+ } while (0)
+
+#define SHARD_SET_ROOT_FS_ID(frame, local) \
+ do { \
+ if (!local->is_set_fsid) { \
+ local->uid = frame->root->uid; \
+ local->gid = frame->root->gid; \
+ frame->root->uid = 0; \
+ frame->root->gid = 0; \
+ local->is_set_fsid = _gf_true; \
+ } \
+ } while (0)
+
+#define SHARD_UNSET_ROOT_FS_ID(frame, local) \
+ do { \
+ if (local->is_set_fsid) { \
+ frame->root->uid = local->uid; \
+ frame->root->gid = local->gid; \
+ local->is_set_fsid = _gf_false; \
+ } \
+ } while (0)
+
+#define SHARD_TIME_UPDATE(ctx_sec, ctx_nsec, new_sec, new_nsec) \
+ do { \
+ if (ctx_sec == new_sec) \
+ ctx_nsec = new_nsec = max(new_nsec, ctx_nsec); \
+ else if (ctx_sec > new_sec) { \
+ new_sec = ctx_sec; \
+ new_nsec = ctx_nsec; \
+ } else { \
+ ctx_sec = new_sec; \
+ ctx_nsec = new_nsec; \
+ } \
+ } while (0)
+
+typedef enum {
+ SHARD_BG_DELETION_NONE = 0,
+ SHARD_BG_DELETION_LAUNCHING,
+ SHARD_BG_DELETION_IN_PROGRESS,
+} shard_bg_deletion_state_t;
+
+/* rm = "remove me" */
+
+typedef struct shard_priv {
+ uint64_t block_size;
+ uuid_t dot_shard_gfid;
+ uuid_t dot_shard_rm_gfid;
+ inode_t *dot_shard_inode;
+ inode_t *dot_shard_rm_inode;
+ gf_lock_t lock;
+ int inode_count;
+ struct list_head ilist_head;
+ uint32_t deletion_rate;
+ shard_bg_deletion_state_t bg_del_state;
+ gf_boolean_t first_lookup_done;
+ uint64_t lru_limit;
+} shard_priv_t;
+
+typedef struct {
+ loc_t loc;
+ char *domain;
+ struct gf_flock flock;
+ gf_boolean_t acquired_lock;
+} shard_inodelk_t;
+
+typedef struct {
+ loc_t loc;
+ char *domain;
+ char *basename;
+ entrylk_cmd cmd;
+ entrylk_type type;
+ gf_boolean_t acquired_lock;
+} shard_entrylk_t;
+
+typedef int32_t (*shard_post_fop_handler_t)(call_frame_t *frame,
+ xlator_t *this);
+typedef int32_t (*shard_post_resolve_fop_handler_t)(call_frame_t *frame,
+ xlator_t *this);
+typedef int32_t (*shard_post_lookup_shards_fop_handler_t)(call_frame_t *frame,
+ xlator_t *this);
+
+typedef int32_t (*shard_post_mknod_fop_handler_t)(call_frame_t *frame,
+ xlator_t *this);
+
+typedef int32_t (*shard_post_update_size_fop_handler_t)(call_frame_t *frame,
+ xlator_t *this);
+
+typedef struct shard_local {
+ int op_ret;
+ int op_errno;
+ uint64_t first_block;
+ uint64_t last_block;
+ uint64_t num_blocks;
+ int call_count;
+ int eexist_count;
+ int create_count;
+ int xflag;
+ int count;
+ uint32_t flags;
+ uint32_t uid;
+ uint32_t gid;
+ uint64_t block_size;
+ uint64_t dst_block_size;
+ int32_t datasync;
+ off_t offset;
+ size_t total_size;
+ size_t written_size;
+ size_t hole_size;
+ size_t req_size;
+ size_t readdir_size;
+ int64_t delta_size;
+ gf_atomic_t delta_blocks;
+ loc_t loc;
+ loc_t dot_shard_loc;
+ loc_t dot_shard_rm_loc;
+ loc_t loc2;
+ loc_t tmp_loc;
+ fd_t *fd;
+ dict_t *xattr_req;
+ dict_t *xattr_rsp;
+ inode_t **inode_list;
+ glusterfs_fop_t fop;
+ struct iatt prebuf;
+ struct iatt postbuf;
+ struct iatt preoldparent;
+ struct iatt postoldparent;
+ struct iatt prenewparent;
+ struct iatt postnewparent;
+ struct iovec *vector;
+ struct iobref *iobref;
+ struct iobuf *iobuf;
+ gf_dirent_t entries_head;
+ gf_boolean_t is_set_fsid;
+ gf_boolean_t list_inited;
+ shard_post_fop_handler_t handler;
+ shard_post_lookup_shards_fop_handler_t pls_fop_handler;
+ shard_post_resolve_fop_handler_t post_res_handler;
+ shard_post_mknod_fop_handler_t post_mknod_handler;
+ shard_post_update_size_fop_handler_t post_update_size_handler;
+ shard_inodelk_t int_inodelk;
+ shard_entrylk_t int_entrylk;
+ inode_t *resolver_base_inode;
+ gf_boolean_t first_lookup_done;
+ syncbarrier_t barrier;
+ gf_boolean_t lookup_shards_barriered;
+ gf_boolean_t unlink_shards_barriered;
+ gf_boolean_t resolve_not;
+ loc_t newloc;
+ call_frame_t *main_frame;
+ call_frame_t *inodelk_frame;
+ call_frame_t *entrylk_frame;
+ uint32_t deletion_rate;
+ gf_boolean_t cleanup_required;
+ uuid_t base_gfid;
+ char *name;
+} shard_local_t;
+
+typedef struct shard_inode_ctx {
+ uint64_t block_size; /* The block size with which this inode is
+ sharded */
+ struct iatt stat;
+ gf_boolean_t refresh;
+ /* The following members of inode ctx will be applicable only to the
+ * individual shards' ctx and never the base file ctx.
+ */
+ struct list_head ilist;
+ uuid_t base_gfid;
+ int block_num;
+ gf_boolean_t refreshed;
+ struct list_head to_fsync_list;
+ int fsync_needed;
+ inode_t *inode;
+ int fsync_count;
+ inode_t *base_inode;
+} shard_inode_ctx_t;
+
+typedef enum {
+ SHARD_INTERNAL_DIR_DOT_SHARD = 1,
+ SHARD_INTERNAL_DIR_DOT_SHARD_REMOVE_ME,
+} shard_internal_dir_type_t;
+
+#endif /* __SHARD_H__ */
diff --git a/xlators/features/snapview-client/Makefile.am b/xlators/features/snapview-client/Makefile.am
new file mode 100644
index 00000000000..af437a64d6d
--- /dev/null
+++ b/xlators/features/snapview-client/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src
diff --git a/xlators/features/snapview-client/src/Makefile.am b/xlators/features/snapview-client/src/Makefile.am
new file mode 100644
index 00000000000..fa08656c537
--- /dev/null
+++ b/xlators/features/snapview-client/src/Makefile.am
@@ -0,0 +1,16 @@
+xlator_LTLIBRARIES = snapview-client.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+snapview_client_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+snapview_client_la_SOURCES = snapview-client.c
+snapview_client_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = snapview-client.h snapview-client-mem-types.h snapview-client-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/snapview-client/src/snapview-client-mem-types.h b/xlators/features/snapview-client/src/snapview-client-mem-types.h
new file mode 100644
index 00000000000..3c3ab555a55
--- /dev/null
+++ b/xlators/features/snapview-client/src/snapview-client-mem-types.h
@@ -0,0 +1,24 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _SVC_MEM_TYPES_H
+#define _SVC_MEM_TYPES_H
+
+#include <glusterfs/mem-types.h>
+
+enum svc_mem_types {
+ gf_svc_mt_svc_private_t = gf_common_mt_end + 1,
+ gf_svc_mt_svc_local_t,
+ gf_svc_mt_svc_inode_t,
+ gf_svc_mt_svc_fd_t,
+ gf_svc_mt_end
+};
+
+#endif
diff --git a/xlators/features/snapview-client/src/snapview-client-messages.h b/xlators/features/snapview-client/src/snapview-client-messages.h
new file mode 100644
index 00000000000..c02fb154930
--- /dev/null
+++ b/xlators/features/snapview-client/src/snapview-client-messages.h
@@ -0,0 +1,71 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _SNAPVIEW_CLIENT_MESSAGES_H_
+#define _SNAPVIEW_CLIENT_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(SNAPVIEW_CLIENT, SVC_MSG_NO_MEMORY, SVC_MSG_MEM_ACNT_FAILED,
+ SVC_MSG_SET_INODE_CONTEXT_FAILED, SVC_MSG_GET_INODE_CONTEXT_FAILED,
+ SVC_MSG_DELETE_INODE_CONTEXT_FAILED, SVC_MSG_SET_FD_CONTEXT_FAILED,
+ SVC_MSG_GET_FD_CONTEXT_FAILED, SVC_MSG_DICT_SET_FAILED,
+ SVC_MSG_SUBVOLUME_NULL, SVC_MSG_NO_CHILD_FOR_XLATOR,
+ SVC_MSG_XLATOR_CHILDREN_WRONG, SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL,
+ SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL, SVC_MSG_OPENDIR_SPECIAL_DIR,
+ SVC_MSG_RENAME_SNAPSHOT_ENTRY, SVC_MSG_LINK_SNAPSHOT_ENTRY,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, SVC_MSG_ENTRY_POINT_SPECIAL_DIR,
+ SVC_MSG_STR_LEN, SVC_MSG_INVALID_ENTRY_POINT, SVC_MSG_NULL_PRIV,
+ SVC_MSG_PRIV_DESTROY_FAILED, SVC_MSG_ALLOC_FD_FAILED,
+ SVC_MSG_ALLOC_INODE_FAILED, SVC_MSG_NULL_SPECIAL_DIR,
+ SVC_MSG_MEM_POOL_GET_FAILED);
+
+#define SVC_MSG_ALLOC_FD_FAILED_STR "failed to allocate new fd context"
+#define SVC_MSG_SET_FD_CONTEXT_FAILED_STR "failed to set fd context"
+#define SVC_MSG_STR_LEN_STR \
+ "destination buffer size is less than the length of entry point name"
+#define SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL_STR "lookup failed on normal graph"
+#define SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL_STR "lookup failed on snapview graph"
+#define SVC_MSG_SET_INODE_CONTEXT_FAILED_STR "failed to set inode context"
+#define SVC_MSG_NO_MEMORY_STR "failed to allocate memory"
+#define SVC_MSG_COPY_ENTRY_POINT_FAILED_STR \
+ "failed to copy the entry point string"
+#define SVC_MSG_GET_FD_CONTEXT_FAILED_STR "fd context not found"
+#define SVC_MSG_GET_INODE_CONTEXT_FAILED_STR "failed to get inode context"
+#define SVC_MSG_ALLOC_INODE_FAILED_STR "failed to allocate new inode"
+#define SVC_MSG_DICT_SET_FAILED_STR "failed to set dict"
+#define SVC_MSG_RENAME_SNAPSHOT_ENTRY_STR \
+ "rename happening on a entry residing in snapshot"
+#define SVC_MSG_DELETE_INODE_CONTEXT_FAILED_STR "failed to delete inode context"
+#define SVC_MSG_NULL_PRIV_STR "priv NULL"
+#define SVC_MSG_INVALID_ENTRY_POINT_STR "not a valid entry point"
+#define SVC_MSG_MEM_ACNT_FAILED_STR "Memory accouting init failed"
+#define SVC_MSG_NO_CHILD_FOR_XLATOR_STR "configured without any child"
+#define SVC_MSG_XLATOR_CHILDREN_WRONG_STR \
+ "snap-view-client has got wrong subvolumes. It can have only 2"
+#define SVC_MSG_ENTRY_POINT_SPECIAL_DIR_STR \
+ "entry point directory cannot be part of special directory"
+#define SVC_MSG_NULL_SPECIAL_DIR_STR "null special directory"
+#define SVC_MSG_MEM_POOL_GET_FAILED_STR \
+ "could not get mem pool for frame->local"
+#define SVC_MSG_PRIV_DESTROY_FAILED_STR "failed to destroy private"
+#define SVC_MSG_LINK_SNAPSHOT_ENTRY_STR \
+ "link happening on a entry residin gin snapshot"
+#endif /* !_SNAPVIEW_CLIENT_MESSAGES_H_ */
diff --git a/xlators/features/snapview-client/src/snapview-client.c b/xlators/features/snapview-client/src/snapview-client.c
new file mode 100644
index 00000000000..486c5179d5b
--- /dev/null
+++ b/xlators/features/snapview-client/src/snapview-client.c
@@ -0,0 +1,2791 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "snapview-client.h"
+#include <glusterfs/inode.h>
+#include <glusterfs/byte-order.h>
+
+static void
+svc_local_free(svc_local_t *local)
+{
+ if (local) {
+ loc_wipe(&local->loc);
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->xdata)
+ dict_unref(local->xdata);
+ mem_put(local);
+ }
+}
+
+static xlator_t *
+svc_get_subvolume(xlator_t *this, int inode_type)
+{
+ xlator_t *subvolume = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+
+ if (inode_type == VIRTUAL_INODE)
+ subvolume = SECOND_CHILD(this);
+ else
+ subvolume = FIRST_CHILD(this);
+
+out:
+ return subvolume;
+}
+
+static int32_t
+__svc_inode_ctx_set(xlator_t *this, inode_t *inode, int inode_type)
+{
+ uint64_t value = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ value = inode_type;
+
+ ret = __inode_ctx_set(inode, this, &value);
+
+out:
+ return ret;
+}
+
+static int
+__svc_inode_ctx_get(xlator_t *this, inode_t *inode, int *inode_type)
+{
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ ret = __inode_ctx_get(inode, this, &value);
+ if (ret < 0)
+ goto out;
+
+ *inode_type = (int)(value);
+
+out:
+ return ret;
+}
+
+static int
+svc_inode_ctx_get(xlator_t *this, inode_t *inode, int *inode_type)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ ret = __svc_inode_ctx_get(this, inode, inode_type);
+ }
+ UNLOCK(&inode->lock);
+
+out:
+ return ret;
+}
+
+static int32_t
+svc_inode_ctx_set(xlator_t *this, inode_t *inode, int inode_type)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ ret = __svc_inode_ctx_set(this, inode, inode_type);
+ }
+ UNLOCK(&inode->lock);
+
+out:
+ return ret;
+}
+
+static svc_fd_t *
+svc_fd_new(void)
+{
+ svc_fd_t *svc_fd = NULL;
+
+ svc_fd = GF_CALLOC(1, sizeof(*svc_fd), gf_svc_mt_svc_fd_t);
+
+ return svc_fd;
+}
+
+static svc_fd_t *
+__svc_fd_ctx_get(xlator_t *this, fd_t *fd)
+{
+ svc_fd_t *svc_fd = NULL;
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ ret = __fd_ctx_get(fd, this, &value);
+ if (ret)
+ return NULL;
+
+ svc_fd = (svc_fd_t *)((long)value);
+
+out:
+ return svc_fd;
+}
+
+static svc_fd_t *
+svc_fd_ctx_get(xlator_t *this, fd_t *fd)
+{
+ svc_fd_t *svc_fd = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ LOCK(&fd->lock);
+ {
+ svc_fd = __svc_fd_ctx_get(this, fd);
+ }
+ UNLOCK(&fd->lock);
+
+out:
+ return svc_fd;
+}
+
+static int
+__svc_fd_ctx_set(xlator_t *this, fd_t *fd, svc_fd_t *svc_fd)
+{
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, svc_fd, out);
+
+ value = (uint64_t)(long)svc_fd;
+
+ ret = __fd_ctx_set(fd, this, value);
+
+out:
+ return ret;
+}
+
+static svc_fd_t *
+__svc_fd_ctx_get_or_new(xlator_t *this, fd_t *fd)
+{
+ svc_fd_t *svc_fd = NULL;
+ int ret = -1;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ inode = fd->inode;
+ svc_fd = __svc_fd_ctx_get(this, fd);
+ if (svc_fd) {
+ ret = 0;
+ goto out;
+ }
+
+ svc_fd = svc_fd_new();
+ if (!svc_fd) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_ALLOC_FD_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+ goto out;
+ }
+
+ ret = __svc_fd_ctx_set(this, fd, svc_fd);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+ ret = -1;
+ }
+
+out:
+ if (ret) {
+ GF_FREE(svc_fd);
+ svc_fd = NULL;
+ }
+
+ return svc_fd;
+}
+
+static svc_fd_t *
+svc_fd_ctx_get_or_new(xlator_t *this, fd_t *fd)
+{
+ svc_fd_t *svc_fd = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ LOCK(&fd->lock);
+ {
+ svc_fd = __svc_fd_ctx_get_or_new(this, fd);
+ }
+ UNLOCK(&fd->lock);
+
+out:
+ return svc_fd;
+}
+
+/**
+ * @this: xlator
+ * @entry_point: pointer to the buffer provided by consumer
+ *
+ * This function is mainly for copying the entry point name
+ * (stored as string in priv->path) to a buffer point to by
+ * @entry_point within the lock. It is for the consumer to
+ * allocate the memory for the buffer.
+ *
+ * This function is called by all the functions (or fops)
+ * who need to use priv->path for avoiding the race.
+ * For example, either in lookup or in any other fop,
+ * while priv->path is being accessed, a reconfigure can
+ * happen to change priv->path. This ensures that, a lock
+ * is taken before accessing priv->path.
+ **/
+int
+gf_svc_get_entry_point(xlator_t *this, char *entry_point, size_t dest_size)
+{
+ int ret = -1;
+ svc_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, entry_point, out);
+
+ priv = this->private;
+
+ LOCK(&priv->lock);
+ {
+ if (dest_size <= strlen(priv->path)) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_STR_LEN,
+ "dest-size=%zu", dest_size, "priv-path-len=%zu",
+ strlen(priv->path), "path=%s", priv->path, NULL);
+ } else {
+ snprintf(entry_point, dest_size, "%s", priv->path);
+ ret = 0;
+ }
+ }
+ UNLOCK(&priv->lock);
+
+out:
+ return ret;
+}
+
+static int32_t
+gf_svc_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
+{
+ svc_local_t *local = NULL;
+ xlator_t *subvolume = NULL;
+ gf_boolean_t do_unwind = _gf_true;
+ int inode_type = -1;
+ int ret = -1;
+
+ local = frame->local;
+ subvolume = local->subvolume;
+ if (!subvolume) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, 0, SVC_MSG_SUBVOLUME_NULL,
+ "path: %s gfid: %s ", local->loc.path,
+ inode ? uuid_utoa(inode->gfid) : "");
+ GF_ASSERT(0);
+ }
+
+ /* There is a possibility that, the client process just came online
+ and does not have the inode on which the lookup came. In that case,
+ the fresh inode created from fuse for the lookup fop, won't have
+ the inode context set without which svc cannot decide where to
+ STACK_WIND to. So by default it decides to send the fop to the
+ regular subvolume (i.e first child of the xlator). If lookup fails
+ on the regular volume, then there is a possibility that the lookup
+ is happening on a virtual inode (i.e history data residing in snaps).
+ So if lookup fails with ENOENT and the inode context is not there,
+ then send the lookup to the 2nd child of svc.
+
+ If there are any changes in volfile/client-restarted then inode-ctx
+ is lost. In this case if nameless lookup fails with ESTALE,
+ then send the lookup to the 2nd child of svc.
+ */
+ if (op_ret) {
+ if (subvolume == FIRST_CHILD(this)) {
+ gf_smsg(this->name,
+ (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG
+ : GF_LOG_ERROR,
+ op_errno, SVC_MSG_NORMAL_GRAPH_LOOKUP_FAIL, "error=%s",
+ strerror(op_errno), NULL);
+ } else {
+ gf_smsg(this->name,
+ (op_errno == ENOENT || op_errno == ESTALE) ? GF_LOG_DEBUG
+ : GF_LOG_ERROR,
+ op_errno, SVC_MSG_SNAPVIEW_GRAPH_LOOKUP_FAIL, "error=%s",
+ strerror(op_errno), NULL);
+ goto out;
+ }
+
+ if ((op_errno == ENOENT || op_errno == ESTALE) &&
+ !gf_uuid_is_null(local->loc.gfid)) {
+ if (inode != NULL)
+ ret = svc_inode_ctx_get(this, inode, &inode_type);
+
+ if (ret < 0 || inode == NULL) {
+ gf_msg_debug(this->name, 0,
+ "Lookup on normal graph failed. "
+ " Sending lookup to snapview-server");
+ subvolume = SECOND_CHILD(this);
+ local->subvolume = subvolume;
+ STACK_WIND(frame, gf_svc_lookup_cbk, subvolume,
+ subvolume->fops->lookup, &local->loc, xdata);
+ do_unwind = _gf_false;
+ }
+ }
+
+ goto out;
+ }
+
+ if (subvolume == FIRST_CHILD(this))
+ inode_type = NORMAL_INODE;
+ else
+ inode_type = VIRTUAL_INODE;
+
+ ret = svc_inode_ctx_set(this, inode, inode_type);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(inode->gfid), NULL);
+
+out:
+ if (do_unwind) {
+ SVC_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, buf, xdata,
+ postparent);
+ }
+
+ return 0;
+}
+
+static int32_t
+gf_svc_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int32_t ret = -1;
+ svc_local_t *local = NULL;
+ xlator_t *subvolume = NULL;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ inode_t *parent = NULL;
+ dict_t *new_xdata = NULL;
+ int inode_type = -1;
+ int parent_type = -1;
+ gf_boolean_t wind = _gf_false;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ ret = svc_inode_ctx_get(this, loc->inode, &inode_type);
+ if (!__is_root_gfid(loc->gfid)) {
+ if (loc->parent) {
+ parent = inode_ref(loc->parent);
+ ret = svc_inode_ctx_get(this, loc->parent, &parent_type);
+ } else {
+ parent = inode_parent(loc->inode, loc->pargfid, NULL);
+ if (parent)
+ ret = svc_inode_ctx_get(this, parent, &parent_type);
+ }
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ frame->local = local;
+ loc_copy(&local->loc, loc);
+
+ if (__is_root_gfid(loc->inode->gfid)) {
+ subvolume = FIRST_CHILD(this);
+ GF_ASSERT(subvolume);
+ local->subvolume = subvolume;
+ wind = _gf_true;
+ goto out;
+ }
+
+ /* nfs sends nameless lookups directly using the gfid. In that case
+ loc->name will be NULL. So check if loc->name is NULL. If so, then
+ try to get the subvolume using inode context. But if the inode has
+ not been looked up yet, then send the lookup call to the first
+ subvolume.
+ */
+
+ if (!loc->name) {
+ if (gf_uuid_is_null(loc->inode->gfid)) {
+ subvolume = FIRST_CHILD(this);
+ local->subvolume = subvolume;
+ wind = _gf_true;
+ goto out;
+ } else {
+ if (inode_type >= 0)
+ subvolume = svc_get_subvolume(this, inode_type);
+ else
+ subvolume = FIRST_CHILD(this);
+ local->subvolume = subvolume;
+ wind = _gf_true;
+ goto out;
+ }
+ }
+
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ if (strcmp(loc->name, entry_point)) {
+ if (parent_type == VIRTUAL_INODE) {
+ subvolume = SECOND_CHILD(this);
+ } else {
+ /*
+ * Either parent type is normal graph, or the parent
+ * type is uncertain.
+ */
+ subvolume = FIRST_CHILD(this);
+ }
+ local->subvolume = subvolume;
+ } else {
+ subvolume = SECOND_CHILD(this);
+ local->subvolume = subvolume;
+ if (parent_type == NORMAL_INODE) {
+ /* Indication of whether the lookup is happening on the
+ entry point or not, to the snapview-server.
+ */
+ SVC_ENTRY_POINT_SET(this, xdata, op_ret, op_errno, new_xdata, ret,
+ out);
+ }
+ }
+
+ wind = _gf_true;
+
+out:
+ if (wind)
+ STACK_WIND(frame, gf_svc_lookup_cbk, subvolume, subvolume->fops->lookup,
+ loc, xdata);
+ else
+ SVC_STACK_UNWIND(lookup, frame, op_ret, op_errno, NULL, NULL, NULL,
+ NULL);
+ if (new_xdata)
+ dict_unref(new_xdata);
+
+ if (parent)
+ inode_unref(parent);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ xlator_t *subvolume = NULL;
+ int32_t ret = -1;
+ int inode_type = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+ svc_private_t *priv = NULL;
+ const char *path = NULL;
+ int path_len = -1;
+ int snap_len = -1;
+ loc_t root_loc = {
+ 0,
+ };
+ loc_t *temp_loc = NULL;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ priv = this->private;
+ SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, loc->inode,
+ subvolume, out);
+ path_len = strlen(loc->path);
+ snap_len = strlen(priv->path);
+ temp_loc = loc;
+
+ if (path_len >= snap_len && inode_type == VIRTUAL_INODE) {
+ path = &loc->path[path_len - snap_len];
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ if (!strcmp(path, entry_point)) {
+ /*
+ * statfs call for virtual snap directory.
+ * Sent the fops to parent volume by removing
+ * virtual directory from path
+ */
+ subvolume = FIRST_CHILD(this);
+ root_loc.path = gf_strdup("/");
+ gf_uuid_clear(root_loc.gfid);
+ root_loc.gfid[15] = 1;
+ root_loc.inode = inode_ref(loc->inode->table->root);
+ temp_loc = &root_loc;
+ }
+ }
+
+ STACK_WIND_TAIL(frame, subvolume, subvolume->fops->statfs, temp_loc, xdata);
+ if (temp_loc == &root_loc)
+ loc_wipe(temp_loc);
+
+ wind = _gf_true;
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(statfs, frame, op_ret, op_errno, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ /* TODO: FIX ME
+ * Consider a testcase:
+ * #mount -t nfs host1:/vol1 /mnt
+ * #ls /mnt
+ * #ls /mnt/.snaps (As expected this fails)
+ * #gluster volume set vol1 features.uss enable
+ * Now `ls /mnt/.snaps` should work, but fails with No such file or
+ * directory. This is because NFS client (gNFS) caches the list of files
+ * in a directory. This cache is updated if there are any changes in the
+ * directory attributes. So, one way to solve this problem is to change
+ * 'ctime' attribute when USS is enabled as below.
+ *
+ * if (op_ret == 0 && IA_ISDIR(buf->ia_type))
+ * buf->ia_ctime_nsec++;
+ *
+ * But this is not the ideal solution as applications see the unexpected
+ * ctime change causing failures.
+ */
+
+ SVC_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata);
+ return 0;
+}
+
+/* should all the fops be handled like lookup is supposed to be
+ handled? i.e just based on inode type decide where the call should
+ be sent and in the call back update the contexts.
+*/
+static int32_t
+gf_svc_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, loc->inode,
+ subvolume, out);
+
+ STACK_WIND(frame, gf_svc_stat_cbk, subvolume, subvolume->fops->stat, loc,
+ xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(stat, frame, op_ret, op_errno, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd->inode, out);
+
+ SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, fd->inode,
+ subvolume, out);
+
+ STACK_WIND_TAIL(frame, subvolume, subvolume->fops->fstat, fd, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(fstat, frame, op_ret, op_errno, NULL, NULL);
+
+ return ret;
+}
+
+static int32_t
+gf_svc_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ svc_fd_t *svc_fd = NULL;
+ svc_local_t *local = NULL;
+ svc_private_t *priv = NULL;
+ gf_boolean_t special_dir = _gf_false;
+ char path[PATH_MAX] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ if (op_ret)
+ goto out;
+
+ priv = this->private;
+ local = frame->local;
+
+ if (local->subvolume == FIRST_CHILD(this) && priv->special_dir &&
+ strcmp(priv->special_dir, "")) {
+ if (!__is_root_gfid(fd->inode->gfid))
+ snprintf(path, sizeof(path), "%s/.", priv->special_dir);
+ else
+ snprintf(path, sizeof(path), "/.");
+
+ if (!strcmp(local->loc.path, priv->special_dir) ||
+ !strcmp(local->loc.path, path)) {
+ gf_msg_debug(this->name, 0,
+ "got opendir on special directory"
+ " %s (gfid: %s)",
+ path, uuid_utoa(fd->inode->gfid));
+ special_dir = _gf_true;
+ }
+ }
+
+ if (special_dir) {
+ svc_fd = svc_fd_ctx_get_or_new(this, fd);
+ if (!svc_fd) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto out;
+ }
+
+ svc_fd->last_offset = -1;
+ svc_fd->special_dir = special_dir;
+ }
+
+out:
+ STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, xdata);
+
+ return 0;
+}
+
+/* If the inode represents a directory which is actually
+ present in a snapshot, then opendir on that directory
+ should be sent to the snap-view-server which opens
+ the directory in the corresponding graph.
+ In fact any opendir call on a virtual directory
+ should be sent to svs. Because if it fakes success
+ here, then later when readdir on that fd comes, there
+ will not be any corresponding fd opened on svs and
+ svc has to do things that open-behind is doing.
+*/
+static int32_t
+gf_svc_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+ svc_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY,
+ "path=%s", loc->path, "gfid=%s", uuid_utoa(fd->inode->gfid),
+ NULL);
+ goto out;
+ }
+ loc_copy(&local->loc, loc);
+ frame->local = local;
+
+ SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, loc->inode,
+ subvolume, out);
+ local->subvolume = subvolume;
+
+ STACK_WIND(frame, gf_svc_opendir_cbk, subvolume, subvolume->fops->opendir,
+ loc, fd, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(opendir, frame, op_ret, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ ret = svc_inode_ctx_get(this, loc->inode, &inode_type);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path,
+ "gfid= %s", uuid_utoa(loc->inode->gfid), NULL);
+ goto out;
+ }
+
+ if (inode_type == NORMAL_INODE) {
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid,
+ xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(setattr, frame, op_ret, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+/* XXX: This function is currently not used. Remove "#if 0" when required */
+#if 0
+static int32_t
+gf_svc_fsetattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, out);
+
+ ret = svc_inode_ctx_get (this, fd->inode, &inode_type);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_msg (this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "failed to "
+ "get the inode context for %s",
+ uuid_utoa (fd->inode->gfid));
+ goto out;
+ }
+
+ if (inode_type == NORMAL_INODE) {
+ STACK_WIND_TAIL (frame, FIRST_CHILD (this),
+ FIRST_CHILD (this)->fops->fsetattr, fd, stbuf,
+ valid, xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (fsetattr, frame, op_ret, op_errno,
+ NULL, NULL, NULL);
+ return 0;
+}
+#endif /* gf_svc_fsetattr() is not used */
+
+static int32_t
+gf_svc_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+ svc_private_t *priv = NULL;
+ char attrname[PATH_MAX] = "";
+ char attrval[64] = "";
+ dict_t *dict = NULL;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ /*
+ * Samba sends this special key for case insensitive
+ * filename check. This request comes with a parent
+ * path and with a special key GF_XATTR_GET_REAL_FILENAME_KEY.
+ * e.g. "glusterfs.get_real_filename:.snaps".
+ * If the name variable matches this key then we have
+ * to send back .snaps as the real filename.
+ */
+ if (!name)
+ goto stack_wind;
+
+ sscanf(name, "%[^:]:%[^@]", attrname, attrval);
+ strcat(attrname, ":");
+
+ if (!strcmp(attrname, GF_XATTR_GET_REAL_FILENAME_KEY)) {
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ if (!strcasecmp(attrval, entry_point)) {
+ dict = dict_new();
+ if (NULL == dict) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(dict, (char *)name, entry_point);
+
+ if (ret) {
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ op_errno = 0;
+ op_ret = strlen(entry_point) + 1;
+ /* We should return from here */
+ goto out;
+ }
+ }
+stack_wind:
+ SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, loc->inode,
+ subvolume, out);
+
+ STACK_WIND_TAIL(frame, subvolume, subvolume->fops->getxattr, loc, name,
+ xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, NULL);
+
+ if (dict)
+ dict_unref(dict);
+
+ return 0;
+}
+
+/* XXX: This function is currently not used. Mark it '#if 0' when required */
+#if 0
+static int32_t
+gf_svc_fgetxattr (call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ gf_boolean_t wind = _gf_false;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+
+ GF_VALIDATE_OR_GOTO ("svc", this, out);
+ GF_VALIDATE_OR_GOTO (this->name, frame, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd, out);
+ GF_VALIDATE_OR_GOTO (this->name, fd->inode, out);
+
+ SVC_GET_SUBVOL_FROM_CTX (this, op_ret, op_errno, inode_type, ret,
+ fd->inode, subvolume, out);
+
+ STACK_WIND_TAIL (frame, subvolume,
+ subvolume->fops->fgetxattr, fd, name, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND (fgetxattr, frame, op_ret, op_errno,
+ NULL, NULL);
+ return 0;
+}
+#endif /* gf_svc_fgetxattr() is not used */
+
+static int32_t
+gf_svc_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ ret = svc_inode_ctx_get(this, loc->inode, &inode_type);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "name=%s", loc->name,
+ "gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
+ goto out;
+ }
+
+ if (inode_type == NORMAL_INODE) {
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags,
+ xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(setxattr, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t ret = -1;
+ int inode_type = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd->inode, out);
+
+ ret = svc_inode_ctx_get(this, fd->inode, &inode_type);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
+ goto out;
+ }
+
+ if (inode_type == NORMAL_INODE) {
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags,
+ xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ STACK_UNWIND_STRICT(fsetxattr, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ int inode_type = -1;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ ret = svc_inode_ctx_get(this, loc->inode, &inode_type);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "name=%s", loc->name,
+ "gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
+ goto out;
+ }
+
+ if (inode_type == NORMAL_INODE) {
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(rmdir, frame, op_ret, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int inode_type = -1;
+ int ret = -1;
+
+ if (op_ret < 0)
+ goto out;
+
+ inode_type = NORMAL_INODE;
+ ret = svc_inode_ctx_set(this, inode, inode_type);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ NULL);
+
+out:
+ SVC_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+static int32_t
+gf_svc_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ int parent_type = -1;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ ret = svc_inode_ctx_get(this, loc->parent, &parent_type);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(loc->parent->gfid), NULL);
+ goto out;
+ }
+
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ if (strcmp(loc->name, entry_point) && parent_type == NORMAL_INODE) {
+ STACK_WIND(frame, gf_svc_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(mkdir, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int inode_type = -1;
+ int ret = -1;
+
+ if (op_ret < 0)
+ goto out;
+
+ inode_type = NORMAL_INODE;
+ ret = svc_inode_ctx_set(this, inode, inode_type);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ NULL);
+
+out:
+ SVC_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
+
+static int32_t
+gf_svc_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ int parent_type = -1;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ ret = svc_inode_ctx_get(this, loc->parent, &parent_type);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(loc->parent->gfid), NULL);
+ goto out;
+ }
+
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ if (strcmp(loc->name, entry_point) && parent_type == NORMAL_INODE) {
+ STACK_WIND(frame, gf_svc_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask,
+ xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(mknod, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+/* If the flags of the open call contain O_WRONLY or O_RDWR and the inode is
+ a virtual inode, then unwind the call back with EROFS. Otherwise simply
+ STACK_WIND the call to the first child of svc xlator.
+*/
+static int32_t
+gf_svc_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ xlator_t *subvolume = NULL;
+ int inode_type = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ int ret = -1;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ /* Another way is to STACK_WIND to normal subvolume, if inode
+ type is not there in the context. If the file actually resides
+ in snapshots, then ENOENT would be returned. Needs more analysis.
+ */
+ SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, loc->inode,
+ subvolume, out);
+
+ if (((flags & O_ACCMODE) == O_WRONLY) || ((flags & O_ACCMODE) == O_RDWR)) {
+ if (subvolume != FIRST_CHILD(this)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ }
+ }
+
+ STACK_WIND_TAIL(frame, subvolume, subvolume->fops->open, loc, flags, fd,
+ xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(open, frame, op_ret, op_errno, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int inode_type = -1;
+ int ret = -1;
+
+ if (op_ret < 0)
+ goto out;
+
+ inode_type = NORMAL_INODE;
+ ret = svc_inode_ctx_set(this, inode, inode_type);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ NULL);
+
+out:
+ SVC_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
+ preparent, postparent, xdata);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *xdata)
+{
+ int parent_type = -1;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ ret = svc_inode_ctx_get(this, loc->parent, &parent_type);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(loc->parent->gfid), NULL);
+ goto out;
+ }
+
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ if (strcmp(loc->name, entry_point) && parent_type == NORMAL_INODE) {
+ STACK_WIND(frame, gf_svc_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(create, frame, op_ret, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ int inode_type = -1;
+ int ret = -1;
+
+ if (op_ret < 0)
+ goto out;
+
+ inode_type = NORMAL_INODE;
+ ret = svc_inode_ctx_set(this, inode, inode_type);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ NULL);
+
+out:
+ SVC_STACK_UNWIND(symlink, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ int parent_type = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ int ret = -1;
+ gf_boolean_t wind = _gf_false;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ ret = svc_inode_ctx_get(this, loc->parent, &parent_type);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(loc->parent->gfid), NULL);
+ goto out;
+ }
+
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ if (strcmp(loc->name, entry_point) && parent_type == NORMAL_INODE) {
+ STACK_WIND(frame, gf_svc_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask,
+ xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(symlink, frame, op_ret, op_errno, NULL, NULL, NULL,
+ NULL, NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ int inode_type = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ int ret = -1;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ ret = svc_inode_ctx_get(this, loc->inode, &inode_type);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(loc->parent->gfid), NULL);
+ goto out;
+ }
+
+ if (inode_type == NORMAL_INODE) {
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, flags, xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(unlink, frame, op_ret, op_errno, NULL, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd->inode, out);
+
+ SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, fd->inode,
+ subvolume, out);
+
+ STACK_WIND_TAIL(frame, subvolume, subvolume->fops->readv, fd, size, offset,
+ flags, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(readv, frame, op_ret, op_errno, NULL, 0, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, loc->inode,
+ subvolume, out);
+
+ STACK_WIND_TAIL(frame, subvolume, subvolume->fops->readlink, loc, size,
+ xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ int ret = -1;
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, loc->inode,
+ subvolume, out);
+
+ STACK_WIND_TAIL(frame, subvolume, subvolume->fops->access, loc, mask,
+ xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(access, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+int32_t
+gf_svc_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmpentry = NULL;
+ svc_local_t *local = NULL;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto out;
+
+ local = frame->local;
+
+ /* If .snaps pre-exists, then it should not be listed
+ * in the NORMAL INODE directory when USS is enabled,
+ * so filter the .snaps entry if exists.
+ * However it is OK to list .snaps in VIRTUAL world
+ */
+ if (local->subvolume != FIRST_CHILD(this))
+ goto out;
+
+ /*
+ * Better to goto out if getting the entry point
+ * fails. We might end up sending the directory
+ * entry for the snapview entry point in the readdir
+ * response. But, the intention is to avoid the race
+ * condition where priv->path is being changed in
+ * reconfigure while this is accessing it.
+ */
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, op_errno,
+ SVC_MSG_COPY_ENTRY_POINT_FAILED, NULL);
+ goto out;
+ }
+
+ list_for_each_entry_safe(entry, tmpentry, &entries->list, list)
+ {
+ if (strcmp(entry_point, entry->d_name) == 0)
+ gf_dirent_entry_free(entry);
+ }
+
+out:
+ SVC_STACK_UNWIND(readdir, frame, op_ret, op_errno, entries, xdata);
+ return 0;
+}
+
+static int32_t
+gf_svc_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ svc_local_t *local = NULL;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+ svc_fd_t *svc_fd = NULL;
+ gf_dirent_t entries;
+
+ INIT_LIST_HEAD(&entries);
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd->inode, out);
+
+ svc_fd = svc_fd_ctx_get_or_new(this, fd);
+ if (!svc_fd)
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ else {
+ if (svc_fd->entry_point_handled && off == svc_fd->last_offset) {
+ op_ret = 0;
+ op_errno = ENOENT;
+ goto out;
+ }
+ }
+
+ SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, fd->inode,
+ subvolume, out);
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY,
+ "inode-gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto out;
+ }
+ local->subvolume = subvolume;
+ frame->local = local;
+
+ STACK_WIND(frame, gf_svc_readdir_cbk, subvolume, subvolume->fops->readdir,
+ fd, size, off, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(readdir, frame, op_ret, op_errno, &entries, NULL);
+
+ gf_dirent_free(&entries);
+
+ return 0;
+}
+
+/*
+ * This lookup if mainly for supporting USS for windows.
+ * Since the dentry for the entry-point directory is not sent in
+ * the readdir response, from windows explorer, there is no way
+ * to access the snapshots. If the explicit path of the entry-point
+ * directory is mentioned in the address bar, then windows sends
+ * readdir on the parent directory and compares if the entry point
+ * directory's name is there in readdir response. If it is not there
+ * then access to snapshot world is denied. And windows users cannot
+ * access snapshots via samba.
+ * So, to handle this a new option called special-directory is created,
+ * which if set, snapview-client will send the entry-point's dentry
+ * in readdirp o/p for the special directory, so that it will be
+ * visible from windows explorer.
+ * But to send that virtual entry, the following mechanism is used.
+ * 1) Check if readdir from posix is over.
+ * 2) If so, then send a lookup on entry point directory to snap daemon
+ * (this is needed because in readdirp inodes are linked, so we need to
+ * maintain 1:1 mapping between inodes (gfids) from snapview server to
+ * snapview client).
+ * 3) Once successful lookup response received, send a new entry to
+ * windows.
+ */
+
+static int32_t
+gf_svc_readdirp_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ svc_fd_t *svc_fd = NULL;
+ svc_local_t *local = NULL;
+ int inode_type = -1;
+ int ret = -1;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+
+ INIT_LIST_HEAD(&entries.list);
+
+ local = frame->local;
+
+ if (op_ret) {
+ if (op_errno == ESTALE && !local->revalidate) {
+ local->revalidate = 1;
+ ret = gf_svc_special_dir_revalidate_lookup(frame, this, xdata);
+
+ if (!ret)
+ return 0;
+ }
+ op_ret = 0;
+ op_errno = ENOENT;
+ goto out;
+ }
+
+ svc_fd = svc_fd_ctx_get(this, local->fd);
+ if (!svc_fd) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(local->fd->inode->gfid), NULL);
+ op_ret = 0;
+ op_errno = ENOENT;
+ goto out;
+ }
+
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED,
+ NULL);
+ op_ret = 0;
+ op_errno = ENOENT;
+ goto out;
+ }
+
+ entry = gf_dirent_for_name(entry_point);
+ if (!entry) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY,
+ "entry-point=%s", entry_point, NULL);
+ op_ret = 0;
+ op_errno = ENOMEM;
+ goto out;
+ }
+
+ entry->inode = inode_ref(inode);
+ entry->d_off = svc_fd->last_offset + 22;
+ entry->d_ino = buf->ia_ino;
+ entry->d_type = DT_DIR;
+ entry->d_stat = *buf;
+ inode_type = VIRTUAL_INODE;
+ ret = svc_inode_ctx_set(this, entry->inode, inode_type);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_SET_INODE_CONTEXT_FAILED,
+ "entry-name=%s", entry->d_name, NULL);
+
+ list_add_tail(&entry->list, &entries.list);
+ op_ret = 1;
+ svc_fd->last_offset = entry->d_off;
+ svc_fd->entry_point_handled = _gf_true;
+
+out:
+ SVC_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries,
+ local ? local->xdata : NULL);
+
+ gf_dirent_free(&entries);
+
+ return 0;
+}
+
+int
+gf_svc_special_dir_revalidate_lookup(call_frame_t *frame, xlator_t *this,
+ dict_t *xdata)
+{
+ svc_local_t *local = NULL;
+ loc_t *loc = NULL;
+ dict_t *tmp_xdata = NULL;
+ char *path = NULL;
+ int ret = -1;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+
+ local = frame->local;
+ loc = &local->loc;
+
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ inode_unref(loc->inode);
+ loc->inode = inode_new(loc->parent->table);
+ if (!loc->inode) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, SVC_MSG_ALLOC_INODE_FAILED,
+ NULL);
+ goto out;
+ }
+
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED,
+ NULL);
+ goto out;
+ }
+
+ gf_uuid_copy(local->loc.gfid, loc->inode->gfid);
+ ret = inode_path(loc->parent, entry_point, &path);
+ if (ret < 0)
+ goto out;
+
+ if (loc->path)
+ GF_FREE((char *)loc->path);
+
+ loc->path = gf_strdup(path);
+ if (loc->path) {
+ if (!loc->name || (loc->name && !strcmp(loc->name, ""))) {
+ loc->name = strrchr(loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ }
+ } else
+ loc->path = NULL;
+
+ tmp_xdata = dict_new();
+ if (!tmp_xdata) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str(tmp_xdata, "entry-point", "true");
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DICT_SET_FAILED, NULL);
+ goto out;
+ }
+
+ STACK_WIND(frame, gf_svc_readdirp_lookup_cbk, SECOND_CHILD(this),
+ SECOND_CHILD(this)->fops->lookup, loc, tmp_xdata);
+out:
+ if (tmp_xdata)
+ dict_unref(tmp_xdata);
+
+ GF_FREE(path);
+ return ret;
+}
+
+static gf_boolean_t
+gf_svc_readdir_on_special_dir(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ gf_dirent_t *entries, dict_t *xdata)
+{
+ svc_local_t *local = NULL;
+ svc_private_t *private = NULL;
+ inode_t *inode = NULL;
+ fd_t *fd = NULL;
+ char *path = NULL;
+ loc_t *loc = NULL;
+ dict_t *tmp_xdata = NULL;
+ int ret = -1;
+ gf_boolean_t unwind = _gf_true;
+ svc_fd_t *svc_fd = NULL;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+
+ private
+ = this->private;
+ local = frame->local;
+
+ loc = &local->loc;
+ fd = local->fd;
+ svc_fd = svc_fd_ctx_get(this, fd);
+ if (!svc_fd) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ goto out;
+ }
+
+ /*
+ * check if its end of readdir operation from posix, if special_dir
+ * option is set, if readdir is done on special directory and if
+ * readdirp is from normal regular graph.
+ */
+
+ if (!private->show_entry_point)
+ goto out;
+
+ if (op_ret == 0 && op_errno == ENOENT && private->special_dir &&
+ strcmp(private->special_dir, "") && svc_fd->special_dir &&
+ local->subvolume == FIRST_CHILD(this)) {
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ SVC_MSG_GET_FD_CONTEXT_FAILED, NULL);
+ goto out;
+ }
+
+ inode = inode_grep(fd->inode->table, fd->inode, entry_point);
+ if (!inode) {
+ inode = inode_new(fd->inode->table);
+ if (!inode) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_ALLOC_INODE_FAILED,
+ NULL);
+ goto out;
+ }
+ }
+
+ gf_uuid_copy(local->loc.pargfid, fd->inode->gfid);
+ gf_uuid_copy(local->loc.gfid, inode->gfid);
+ if (gf_uuid_is_null(inode->gfid))
+ ret = inode_path(fd->inode, entry_point, &path);
+ else
+ ret = inode_path(inode, NULL, &path);
+
+ if (ret < 0)
+ goto out;
+ loc->path = gf_strdup(path);
+ if (loc->path) {
+ if (!loc->name || (loc->name && !strcmp(loc->name, ""))) {
+ loc->name = strrchr(loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ }
+ }
+
+ loc->inode = inode;
+ loc->parent = inode_ref(fd->inode);
+ tmp_xdata = dict_new();
+ if (!tmp_xdata)
+ goto out;
+ ret = dict_set_str(tmp_xdata, "entry-point", "true");
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_DICT_SET_FAILED, NULL);
+ goto out;
+ }
+
+ local->cookie = cookie;
+ if (local->xdata) {
+ dict_unref(local->xdata);
+ local->xdata = NULL;
+ }
+ if (xdata)
+ local->xdata = dict_ref(xdata);
+
+ STACK_WIND(frame, gf_svc_readdirp_lookup_cbk, SECOND_CHILD(this),
+ SECOND_CHILD(this)->fops->lookup, loc, tmp_xdata);
+ unwind = _gf_false;
+ }
+
+out:
+ if (tmp_xdata)
+ dict_unref(tmp_xdata);
+
+ GF_FREE(path);
+ return unwind;
+}
+
+static int32_t
+gf_svc_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ gf_dirent_t *entry = NULL;
+ gf_dirent_t *tmpentry = NULL;
+ svc_local_t *local = NULL;
+ int inode_type = -1;
+ int ret = -1;
+ svc_fd_t *svc_fd = NULL;
+ gf_boolean_t unwind = _gf_true;
+ char entry_point[NAME_MAX + 1] = {
+ 0,
+ };
+
+ if (op_ret < 0)
+ goto out;
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+
+ local = frame->local;
+
+ svc_fd = svc_fd_ctx_get(this, local->fd);
+ if (!svc_fd) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(local->fd->inode->gfid), NULL);
+ }
+
+ if (local->subvolume == FIRST_CHILD(this))
+ inode_type = NORMAL_INODE;
+ else
+ inode_type = VIRTUAL_INODE;
+
+ /*
+ * Better to goto out and return whatever is there in the
+ * readdirp response (even if the readdir response contains
+ * a directory entry for the snapshot entry point). Otherwise
+ * if we ignore the error, then there is a chance of race
+ * condition where, priv->path is changed in reconfigure
+ */
+ if (gf_svc_get_entry_point(this, entry_point, sizeof(entry_point))) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_COPY_ENTRY_POINT_FAILED,
+ NULL);
+ goto out;
+ }
+
+ list_for_each_entry_safe(entry, tmpentry, &entries->list, list)
+ {
+ /* If .snaps pre-exists, then it should not be listed
+ * in the NORMAL INODE directory when USS is enabled,
+ * so filter the .snaps entry if exists.
+ * However it is OK to list .snaps in VIRTUAL world
+ */
+ if (inode_type == NORMAL_INODE && !strcmp(entry_point, entry->d_name)) {
+ gf_dirent_entry_free(entry);
+ continue;
+ }
+
+ if (!entry->inode)
+ continue;
+
+ ret = svc_inode_ctx_set(this, entry->inode, inode_type);
+ if (ret)
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ SVC_MSG_SET_INODE_CONTEXT_FAILED, NULL);
+ if (svc_fd)
+ svc_fd->last_offset = entry->d_off;
+ }
+
+ unwind = gf_svc_readdir_on_special_dir(frame, cookie, this, op_ret,
+ op_errno, entries, xdata);
+
+out:
+ if (unwind)
+ SVC_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ svc_local_t *local = NULL;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+ svc_fd_t *svc_fd = NULL;
+ gf_dirent_t entries;
+
+ INIT_LIST_HEAD(&entries.list);
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd->inode, out);
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ op_errno = ENOMEM;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ /*
+ * This is mainly for samba shares (or windows clients). As part of
+ * readdirp on the directory used as samba share, the entry point
+ * directory would have been added at the end. So when a new readdirp
+ * request comes, we have to check if the entry point has been handled
+ * or not in readdirp. That information and the offset used for it
+ * is remembered in fd context. If it has been handled, then simply
+ * unwind indication end of readdir operation.
+ */
+ svc_fd = svc_fd_ctx_get_or_new(this, fd);
+ if (!svc_fd)
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_GET_FD_CONTEXT_FAILED,
+ "gfid=%s", uuid_utoa(fd->inode->gfid), NULL);
+ else {
+ if (svc_fd->entry_point_handled && off == svc_fd->last_offset) {
+ op_ret = 0;
+ op_errno = ENOENT;
+ goto out;
+ }
+ }
+
+ SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, fd->inode,
+ subvolume, out);
+
+ local->subvolume = subvolume;
+ local->fd = fd_ref(fd);
+ frame->local = local;
+
+ STACK_WIND(frame, gf_svc_readdirp_cbk, subvolume, subvolume->fops->readdirp,
+ fd, size, off, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(readdirp, frame, op_ret, op_errno, &entries, NULL);
+
+ gf_dirent_free(&entries);
+
+ return 0;
+}
+
+/* Renaming the entries from or to snapshots is not allowed as the snapshots
+ are read-only.
+*/
+static int32_t
+gf_svc_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int src_inode_type = -1;
+ int dst_inode_type = -1;
+ int dst_parent_type = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t ret = -1;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, oldloc, out);
+ GF_VALIDATE_OR_GOTO(this->name, oldloc->inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, newloc, out);
+
+ ret = svc_inode_ctx_get(this, oldloc->inode, &src_inode_type);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(oldloc->inode->gfid), NULL);
+ goto out;
+ }
+
+ if (src_inode_type == VIRTUAL_INODE) {
+ op_ret = -1;
+ op_errno = EROFS;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_RENAME_SNAPSHOT_ENTRY, "name=%s", oldloc->name, NULL);
+ goto out;
+ }
+
+ if (newloc->inode) {
+ ret = svc_inode_ctx_get(this, newloc->inode, &dst_inode_type);
+ if (!ret && dst_inode_type == VIRTUAL_INODE) {
+ op_ret = -1;
+ op_errno = EROFS;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_RENAME_SNAPSHOT_ENTRY, "oldloc-name=%s",
+ oldloc->name, "newloc-name=%s", newloc->name, NULL);
+ goto out;
+ }
+ }
+
+ if (dst_inode_type < 0) {
+ ret = svc_inode_ctx_get(this, newloc->parent, &dst_parent_type);
+ if (!ret && dst_parent_type == VIRTUAL_INODE) {
+ op_ret = -1;
+ op_errno = EROFS;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_RENAME_SNAPSHOT_ENTRY, "oldloc-name=%s",
+ oldloc->name, "newloc-name=%s", newloc->name, NULL);
+ goto out;
+ }
+ }
+
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->rename,
+ oldloc, newloc, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(rename, frame, op_ret, op_errno, NULL, NULL, NULL,
+ NULL, NULL, NULL);
+ return 0;
+}
+
+/* Creating hardlinks for the files from the snapshot is not allowed as it
+ will be equivalent of creating hardlinks across different filesystems.
+ And so is vice versa.
+*/
+static int32_t
+gf_svc_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int src_inode_type = -1;
+ int dst_parent_type = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t ret = -1;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, oldloc, out);
+ GF_VALIDATE_OR_GOTO(this->name, oldloc->inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, newloc, out);
+
+ ret = svc_inode_ctx_get(this, oldloc->inode, &src_inode_type);
+ if (!ret && src_inode_type == VIRTUAL_INODE) {
+ op_ret = -1;
+ op_errno = EROFS;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY,
+ "oldloc-name=%s", oldloc->name, NULL);
+ goto out;
+ }
+
+ ret = svc_inode_ctx_get(this, newloc->parent, &dst_parent_type);
+ if (!ret && dst_parent_type == VIRTUAL_INODE) {
+ op_ret = -1;
+ op_errno = EROFS;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno, SVC_MSG_LINK_SNAPSHOT_ENTRY,
+ "oldloc-name=%s", oldloc->name, "newloc-name=%s", newloc->name,
+ NULL);
+ goto out;
+ }
+
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this), FIRST_CHILD(this)->fops->link,
+ oldloc, newloc, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(link, frame, op_ret, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+ return 0;
+}
+
+static int32_t
+gf_svc_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int ret = -1;
+ int inode_type = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ ret = svc_inode_ctx_get(this, loc->inode, &inode_type);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "path=%s", loc->path,
+ "gfid=%s", uuid_utoa(loc->inode->gfid), NULL);
+ goto out;
+ }
+
+ if (inode_type == NORMAL_INODE) {
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(removexattr, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+static int
+gf_svc_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int datasync,
+ dict_t *xdata)
+{
+ int inode_type = -1;
+ int ret = -1;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd->inode, out);
+
+ ret = svc_inode_ctx_get(this, fd->inode, &inode_type);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_smsg(this->name, GF_LOG_ERROR, op_errno,
+ SVC_MSG_GET_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(fd->inode->gfid), NULL);
+ goto out;
+ }
+
+ if (inode_type == NORMAL_INODE) {
+ STACK_WIND_TAIL(frame, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsync, fd, datasync, xdata);
+ } else {
+ op_ret = -1;
+ op_errno = EROFS;
+ goto out;
+ }
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(fsync, frame, op_ret, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ int inode_type = -1;
+ xlator_t *subvolume = NULL;
+ gf_boolean_t wind = _gf_false;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd->inode, out);
+
+ SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, fd->inode,
+ subvolume, out);
+
+ STACK_WIND_TAIL(frame, subvolume, subvolume->fops->flush, fd, xdata);
+
+ wind = _gf_true;
+
+out:
+ if (!wind)
+ SVC_STACK_UNWIND(flush, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+static int32_t
+gf_svc_releasedir(xlator_t *this, fd_t *fd)
+{
+ svc_fd_t *sfd = NULL;
+ uint64_t tmp_pfd = 0;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("snapview-client", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ ret = fd_ctx_del(fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, "pfd from fd=%p is NULL", fd);
+ goto out;
+ }
+
+ GF_FREE(sfd);
+
+out:
+ return 0;
+}
+
+static int32_t
+gf_svc_forget(xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ uint64_t value = 0;
+
+ GF_VALIDATE_OR_GOTO("svc", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ ret = inode_ctx_del(inode, this, &value);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ SVC_MSG_DELETE_INODE_CONTEXT_FAILED, "gfid=%s",
+ uuid_utoa(inode->gfid), NULL);
+ goto out;
+ }
+
+out:
+ return 0;
+}
+
+static int
+gf_svc_priv_destroy(xlator_t *this, svc_private_t *priv)
+{
+ int ret = -1;
+
+ if (!priv) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_NULL_PRIV, NULL);
+ goto out;
+ }
+
+ GF_FREE(priv->path);
+ GF_FREE(priv->special_dir);
+
+ LOCK_DESTROY(&priv->lock);
+
+ GF_FREE(priv);
+
+ if (this->local_pool) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/**
+ * ** NOTE **:
+ * =============
+ * The option "snapdir-entry-path" is NOT reconfigurable.
+ * That option as of now is only for the consumption of
+ * samba, where, it needs to tell glusterfs about the
+ * directory that is shared with windows client for the
+ * access. Now, in windows-explorer (GUI) interface, for
+ * the directory shared, the entry point to the snapshot
+ * world (snapshot-directory option) should be visible,
+ * atleast as a hidden entry. For that to happen, glusterfs
+ * has to send that entry in the readdir response coming on
+ * the directory used as the smb share. Therefore, samba,
+ * while initializing the gluster volume (via gfapi) sets
+ * the xlator option "snapdir-entry-path" to the directory
+ * which is to be shared with windows (check the file
+ * vfs_glusterfs.c from samba source code). So to avoid
+ * problems with smb access, not allowing snapdir-entry-path
+ * option to be configurable. That option is for those
+ * consumers who know what they are doing.
+ **/
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ svc_private_t *priv = NULL;
+ char *path = NULL;
+ gf_boolean_t show_entry_point = _gf_false;
+ char *tmp = NULL;
+
+ priv = this->private;
+
+ GF_OPTION_RECONF("snapshot-directory", path, options, str, out);
+ if (!path || (strlen(path) > NAME_MAX) || path[0] != '.') {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_INVALID_ENTRY_POINT,
+ "path=%s", path, NULL);
+ goto out;
+ }
+
+ GF_OPTION_RECONF("show-snapshot-directory", show_entry_point, options, bool,
+ out);
+
+ /*
+ * The assumption now is that priv->path is an allocated memory (either
+ * in init or in a previous reconfigure).
+ * So, the intention here is to preserve the older contents of the option
+ * until the new option's value has been completely stored in the priv.
+ * So, do this.
+ * - Store the pointer of priv->path in a temporary pointer.
+ * - Allocate new memory for the new value of the option that is just
+ * obtained from the above call to GF_OPTION_RECONF.
+ * - If the above allocation fails, again set the pointer from priv
+ * to the address stored in tmp. i.e. the previous value.
+ * - If the allocation succeeds, then free the tmp pointer.
+ * WARNING: Before changing the allocation and freeing logic of
+ * priv->path, always check the init function to see how
+ * priv->path is set. Take decisions accordingly. As of now,
+ * the assumption is that, the string elements of private
+ * structure of snapview-client are allocated (either in
+ * init or here in reconfugure).
+ */
+ LOCK(&priv->lock);
+ {
+ tmp = priv->path;
+ priv->path = NULL;
+ priv->path = gf_strdup(path);
+ if (!priv->path) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to reconfigure snapshot-directory option to %s",
+ path);
+ priv->path = tmp;
+ } else {
+ GF_FREE(tmp);
+ tmp = NULL;
+ }
+
+ priv->show_entry_point = show_entry_point;
+ }
+ UNLOCK(&priv->lock);
+
+out:
+ return 0;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int32_t ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_svc_mt_end + 1);
+
+ if (ret != 0) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_MEM_ACNT_FAILED, NULL);
+ }
+
+ return ret;
+}
+
+int32_t
+init(xlator_t *this)
+{
+ svc_private_t *private = NULL;
+ int ret = -1;
+ int children = 0;
+ xlator_list_t *xl = NULL;
+ char *path = NULL;
+ char *special_dir = NULL;
+
+ if (!this->children) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_CHILD_FOR_XLATOR, NULL);
+ goto out;
+ }
+
+ xl = this->children;
+ while (xl) {
+ children++;
+ xl = xl->next;
+ }
+
+ if (children != 2) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_XLATOR_CHILDREN_WRONG,
+ "subvol-num=%d", children, NULL);
+ goto out;
+ }
+
+ /* This can be the top of graph in certain cases */
+ if (!this->parents) {
+ gf_msg_debug(this->name, 0,
+ "dangling volume. Check "
+ "volfile");
+ }
+
+ private
+ = GF_CALLOC(1, sizeof(*private), gf_svc_mt_svc_private_t);
+ if (!private)
+ goto out;
+
+ LOCK_INIT(&private->lock);
+
+ GF_OPTION_INIT("snapshot-directory", path, str, out);
+ if (!path || (strlen(path) > NAME_MAX) || path[0] != '.') {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_INVALID_ENTRY_POINT,
+ "path=%s", path, NULL);
+ goto out;
+ }
+
+ private
+ ->path = gf_strdup(path);
+ if (!private->path) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY,
+ "entry-point-path=%s", path, NULL);
+ goto out;
+ }
+
+ GF_OPTION_INIT("snapdir-entry-path", special_dir, str, out);
+ if (!special_dir || strstr(special_dir, path)) {
+ if (special_dir)
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ SVC_MSG_ENTRY_POINT_SPECIAL_DIR, "path=%s", path,
+ "special-dir=%s", special_dir);
+ else
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NULL_SPECIAL_DIR,
+ NULL);
+ goto out;
+ }
+
+ private
+ ->special_dir = gf_strdup(special_dir);
+ if (!private->special_dir) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_NO_MEMORY,
+ "special-directory=%s", special_dir, NULL);
+ goto out;
+ }
+
+ GF_OPTION_INIT("show-snapshot-directory", private->show_entry_point, bool,
+ out);
+
+ this->local_pool = mem_pool_new(svc_local_t, 128);
+ if (!this->local_pool) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, SVC_MSG_MEM_POOL_GET_FAILED, NULL);
+ goto out;
+ }
+
+ this->private = private;
+
+ ret = 0;
+
+out:
+ if (ret)
+ (void)gf_svc_priv_destroy(this, private);
+
+ return ret;
+}
+
+void
+fini(xlator_t *this)
+{
+ svc_private_t *priv = NULL;
+
+ if (!this)
+ return;
+
+ priv = this->private;
+ if (!priv)
+ return;
+
+ /*
+ * Just log the failure and go ahead to
+ * set this->priv to NULL.
+ */
+ if (gf_svc_priv_destroy(this, priv))
+ gf_smsg(this->name, GF_LOG_WARNING, 0, SVC_MSG_PRIV_DESTROY_FAILED,
+ NULL);
+
+ this->private = NULL;
+
+ return;
+}
+
+int
+notify(xlator_t *this, int event, void *data, ...)
+{
+ xlator_t *subvol = NULL;
+ int ret = 0;
+
+ subvol = data;
+
+ /* As there are two subvolumes in snapview-client, there is
+ * a possibility that the regular subvolume is still down and
+ * snapd subvolume come up first. So if we don't handle this situation
+ * CHILD_UP event will be propagated upwards to fuse when
+ * regular subvolume is still down.
+ * This can cause data unavailable for the application.
+ * So for now send notifications up only for regular subvolume.
+ *
+ * TODO: In future if required we may need to handle
+ * notifications from virtual subvolume
+ */
+ if (subvol != SECOND_CHILD(this))
+ ret = default_notify(this, event, data);
+
+ return ret;
+}
+
+struct xlator_fops fops = {
+ .lookup = gf_svc_lookup,
+ .opendir = gf_svc_opendir,
+ .stat = gf_svc_stat,
+ .fstat = gf_svc_fstat,
+ .statfs = gf_svc_statfs,
+ .rmdir = gf_svc_rmdir,
+ .rename = gf_svc_rename,
+ .mkdir = gf_svc_mkdir,
+ .open = gf_svc_open,
+ .unlink = gf_svc_unlink,
+ .setattr = gf_svc_setattr,
+ .getxattr = gf_svc_getxattr,
+ .setxattr = gf_svc_setxattr,
+ .fsetxattr = gf_svc_fsetxattr,
+ .readv = gf_svc_readv,
+ .readdir = gf_svc_readdir,
+ .readdirp = gf_svc_readdirp,
+ .create = gf_svc_create,
+ .readlink = gf_svc_readlink,
+ .mknod = gf_svc_mknod,
+ .symlink = gf_svc_symlink,
+ .flush = gf_svc_flush,
+ .link = gf_svc_link,
+ .access = gf_svc_access,
+ .removexattr = gf_svc_removexattr,
+ .fsync = gf_svc_fsync,
+};
+
+struct xlator_cbks cbks = {
+ .forget = gf_svc_forget,
+ .releasedir = gf_svc_releasedir,
+};
+
+struct volume_options options[] = {
+ {
+ .key = {"snapshot-directory"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = ".snaps",
+ },
+ {
+ .key = {"snapdir-entry-path"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "An option to set the path of a directory on which "
+ "when readdir comes, dentry for the snapshot-directory"
+ " should be created and added in the readdir response",
+ .default_value = "",
+ },
+ {
+ .key = {"show-snapshot-directory"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .description = "If this option is set, and the option "
+ "\"snapdir-entry-path\" is set (which is set by samba "
+ "vfs plugin for glusterfs, then send the entry point "
+ "when readdir comes on the snapdir-entry-path",
+ .default_value = "off",
+ },
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "snapview-client",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/snapview-client/src/snapview-client.h b/xlators/features/snapview-client/src/snapview-client.h
new file mode 100644
index 00000000000..166116a439d
--- /dev/null
+++ b/xlators/features/snapview-client/src/snapview-client.h
@@ -0,0 +1,101 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __SNAP_VIEW_CLIENT_H__
+#define __SNAP_VIEW_CLIENT_H__
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include "snapview-client-mem-types.h"
+#include "snapview-client-messages.h"
+
+struct __svc_local {
+ loc_t loc;
+ xlator_t *subvolume;
+ fd_t *fd;
+ void *cookie;
+ dict_t *xdata;
+ uint16_t revalidate;
+};
+typedef struct __svc_local svc_local_t;
+
+#define SVC_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ svc_local_t *__local = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ svc_local_free(__local); \
+ } while (0)
+
+#define SVC_ENTRY_POINT_SET(this, xdata, op_ret, op_errno, new_xdata, ret, \
+ label) \
+ do { \
+ if (!xdata) { \
+ xdata = new_xdata = dict_new(); \
+ if (!new_xdata) { \
+ gf_log(this->name, GF_LOG_ERROR, \
+ "failed to allocate new dict"); \
+ op_ret = -1; \
+ op_errno = ENOMEM; \
+ goto label; \
+ } \
+ } \
+ ret = dict_set_str(xdata, "entry-point", "true"); \
+ if (ret) { \
+ gf_log(this->name, GF_LOG_ERROR, "failed to set dict"); \
+ op_ret = -1; \
+ op_errno = ENOMEM; \
+ goto label; \
+ } \
+ } while (0);
+
+#define SVC_GET_SUBVOL_FROM_CTX(this, op_ret, op_errno, inode_type, ret, \
+ inode, subvolume, label) \
+ do { \
+ ret = svc_inode_ctx_get(this, inode, &inode_type); \
+ if (ret < 0) { \
+ gf_log(this->name, GF_LOG_ERROR, \
+ "inode context not found for gfid %s", \
+ uuid_utoa(inode->gfid)); \
+ op_ret = -1; \
+ op_errno = EINVAL; \
+ goto label; \
+ } \
+ \
+ subvolume = svc_get_subvolume(this, inode_type); \
+ } while (0);
+
+struct svc_private {
+ char *path;
+ char *special_dir; /* needed for samba */
+ gf_boolean_t show_entry_point;
+ gf_lock_t lock; /* mainly to guard private->path */
+};
+typedef struct svc_private svc_private_t;
+
+struct svc_fd {
+ off_t last_offset;
+ gf_boolean_t entry_point_handled;
+ gf_boolean_t special_dir;
+};
+typedef struct svc_fd svc_fd_t;
+
+typedef enum { NORMAL_INODE = 1, VIRTUAL_INODE } inode_type_t;
+
+int
+gf_svc_special_dir_revalidate_lookup(call_frame_t *frame, xlator_t *this,
+ dict_t *xdata);
+
+#endif /* __SNAP_VIEW_CLIENT_H__ */
diff --git a/xlators/features/snapview-server/Makefile.am b/xlators/features/snapview-server/Makefile.am
new file mode 100644
index 00000000000..af437a64d6d
--- /dev/null
+++ b/xlators/features/snapview-server/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = src
diff --git a/xlators/features/snapview-server/src/Makefile.am b/xlators/features/snapview-server/src/Makefile.am
new file mode 100644
index 00000000000..2935f138a4c
--- /dev/null
+++ b/xlators/features/snapview-server/src/Makefile.am
@@ -0,0 +1,25 @@
+if WITH_SERVER
+xlator_LTLIBRARIES = snapview-server.la
+endif
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+snapview_server_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+snapview_server_la_SOURCES = snapview-server.c snapview-server-mgmt.c \
+ snapview-server-helpers.c
+
+snapview_server_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/api/src/libgfapi.la \
+ $(RLLIBS) $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
+
+noinst_HEADERS = snapview-server.h snapview-server-mem-types.h snapview-server-messages.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/api/src -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -DDATADIR=\"$(localstatedir)\"
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/snapview-server/src/snapview-server-helpers.c b/xlators/features/snapview-server/src/snapview-server-helpers.c
new file mode 100644
index 00000000000..62c1ddac49c
--- /dev/null
+++ b/xlators/features/snapview-server/src/snapview-server-helpers.c
@@ -0,0 +1,715 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "snapview-server.h"
+#include "snapview-server-mem-types.h"
+
+#include <glusterfs/xlator.h>
+#include "rpc-clnt.h"
+#include "xdr-generic.h"
+#include "protocol-common.h"
+#include <pthread.h>
+
+int
+__svs_inode_ctx_set(xlator_t *this, inode_t *inode, svs_inode_t *svs_inode)
+{
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, svs_inode, out);
+
+ value = (uint64_t)(long)svs_inode;
+
+ ret = __inode_ctx_set(inode, this, &value);
+
+out:
+ return ret;
+}
+
+svs_inode_t *
+__svs_inode_ctx_get(xlator_t *this, inode_t *inode)
+{
+ svs_inode_t *svs_inode = NULL;
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ ret = __inode_ctx_get(inode, this, &value);
+ if (ret)
+ goto out;
+
+ svs_inode = (svs_inode_t *)((long)value);
+
+out:
+ return svs_inode;
+}
+
+svs_inode_t *
+svs_inode_ctx_get(xlator_t *this, inode_t *inode)
+{
+ svs_inode_t *svs_inode = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ svs_inode = __svs_inode_ctx_get(this, inode);
+ }
+ UNLOCK(&inode->lock);
+
+out:
+ return svs_inode;
+}
+
+int32_t
+svs_inode_ctx_set(xlator_t *this, inode_t *inode, svs_inode_t *svs_inode)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, svs_inode, out);
+
+ LOCK(&inode->lock);
+ {
+ ret = __svs_inode_ctx_set(this, inode, svs_inode);
+ }
+ UNLOCK(&inode->lock);
+
+out:
+ return ret;
+}
+
+svs_inode_t *
+svs_inode_new(void)
+{
+ svs_inode_t *svs_inode = NULL;
+
+ svs_inode = GF_CALLOC(1, sizeof(*svs_inode), gf_svs_mt_svs_inode_t);
+
+ return svs_inode;
+}
+
+svs_inode_t *
+svs_inode_ctx_get_or_new(xlator_t *this, inode_t *inode)
+{
+ svs_inode_t *svs_inode = NULL;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ LOCK(&inode->lock);
+ {
+ svs_inode = __svs_inode_ctx_get(this, inode);
+ if (!svs_inode) {
+ svs_inode = svs_inode_new();
+ if (svs_inode) {
+ ret = __svs_inode_ctx_set(this, inode, svs_inode);
+ if (ret) {
+ GF_FREE(svs_inode);
+ svs_inode = NULL;
+ }
+ }
+ }
+ }
+ UNLOCK(&inode->lock);
+
+out:
+ return svs_inode;
+}
+
+svs_fd_t *
+svs_fd_new(void)
+{
+ svs_fd_t *svs_fd = NULL;
+
+ svs_fd = GF_CALLOC(1, sizeof(*svs_fd), gf_svs_mt_svs_fd_t);
+
+ return svs_fd;
+}
+
+int
+__svs_fd_ctx_set(xlator_t *this, fd_t *fd, svs_fd_t *svs_fd)
+{
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, svs_fd, out);
+
+ value = (uint64_t)(long)svs_fd;
+
+ ret = __fd_ctx_set(fd, this, value);
+
+out:
+ return ret;
+}
+
+svs_fd_t *
+__svs_fd_ctx_get(xlator_t *this, fd_t *fd)
+{
+ svs_fd_t *svs_fd = NULL;
+ uint64_t value = 0;
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ ret = __fd_ctx_get(fd, this, &value);
+ if (ret)
+ return NULL;
+
+ svs_fd = (svs_fd_t *)((long)value);
+
+out:
+ return svs_fd;
+}
+
+svs_fd_t *
+svs_fd_ctx_get(xlator_t *this, fd_t *fd)
+{
+ svs_fd_t *svs_fd = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ LOCK(&fd->lock);
+ {
+ svs_fd = __svs_fd_ctx_get(this, fd);
+ }
+ UNLOCK(&fd->lock);
+
+out:
+ return svs_fd;
+}
+
+int32_t
+svs_fd_ctx_set(xlator_t *this, fd_t *fd, svs_fd_t *svs_fd)
+{
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, svs_fd, out);
+
+ LOCK(&fd->lock);
+ {
+ ret = __svs_fd_ctx_set(this, fd, svs_fd);
+ }
+ UNLOCK(&fd->lock);
+
+out:
+ return ret;
+}
+
+svs_fd_t *
+__svs_fd_ctx_get_or_new(xlator_t *this, fd_t *fd)
+{
+ svs_fd_t *svs_fd = NULL;
+ int ret = -1;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ svs_inode_t *inode_ctx = NULL;
+ glfs_fd_t *glfd = NULL;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ inode = fd->inode;
+ svs_fd = __svs_fd_ctx_get(this, fd);
+ if (svs_fd) {
+ ret = 0;
+ goto out;
+ }
+
+ svs_fd = svs_fd_new();
+ if (!svs_fd) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_NEW_FD_CTX_FAILED,
+ "failed to allocate new fd "
+ "context for gfid %s",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ if (fd_is_anonymous(fd)) {
+ inode_ctx = svs_inode_ctx_get(this, inode);
+ if (!inode_ctx) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ SVS_MSG_GET_INODE_CONTEXT_FAILED,
+ "failed to get inode "
+ "context for %s",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ fs = inode_ctx->fs;
+ object = inode_ctx->object;
+
+ if (inode->ia_type == IA_IFDIR) {
+ glfd = glfs_h_opendir(fs, object);
+ if (!glfd) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, SVS_MSG_OPENDIR_FAILED,
+ "failed to "
+ "open the directory %s",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+ }
+
+ if (inode->ia_type == IA_IFREG) {
+ glfd = glfs_h_open(fs, object, O_RDONLY | O_LARGEFILE);
+ if (!glfd) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, SVS_MSG_OPEN_FAILED,
+ "failed to "
+ "open the file %s",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+ }
+
+ svs_fd->fd = glfd;
+ }
+
+ ret = __svs_fd_ctx_set(this, fd, svs_fd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_SET_FD_CONTEXT_FAILED,
+ "failed to set fd context "
+ "for gfid %s",
+ uuid_utoa(inode->gfid));
+ if (svs_fd->fd) {
+ if (inode->ia_type == IA_IFDIR) {
+ ret = glfs_closedir(svs_fd->fd);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ SVS_MSG_CLOSEDIR_FAILED,
+ "failed to close the fd for %s",
+ uuid_utoa(inode->gfid));
+ }
+ if (inode->ia_type == IA_IFREG) {
+ ret = glfs_close(svs_fd->fd);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_CLOSE_FAILED,
+ "failed to close the fd for %s",
+ uuid_utoa(inode->gfid));
+ }
+ }
+ ret = -1;
+ }
+
+out:
+ if (ret) {
+ GF_FREE(svs_fd);
+ svs_fd = NULL;
+ }
+
+ return svs_fd;
+}
+
+svs_fd_t *
+svs_fd_ctx_get_or_new(xlator_t *this, fd_t *fd)
+{
+ svs_fd_t *svs_fd = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ LOCK(&fd->lock);
+ {
+ svs_fd = __svs_fd_ctx_get_or_new(this, fd);
+ }
+ UNLOCK(&fd->lock);
+
+out:
+ return svs_fd;
+}
+
+int
+svs_uuid_generate(xlator_t *this, uuid_t gfid, char *snapname,
+ uuid_t origin_gfid)
+{
+ char ino_string[NAME_MAX + 32] = "";
+ uuid_t tmp = {
+ 0,
+ };
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, snapname, out);
+
+ (void)snprintf(ino_string, sizeof(ino_string), "%s%s", snapname,
+ uuid_utoa(origin_gfid));
+
+ if (gf_gfid_generate_from_xxh64(tmp, ino_string)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, SVS_MSG_GFID_GEN_FAILED,
+ "failed to generate "
+ "gfid for object with actual gfid of %s "
+ "(snapname: %s, key: %s)",
+ uuid_utoa(origin_gfid), snapname, ino_string);
+ goto out;
+ }
+
+ gf_uuid_copy(gfid, tmp);
+
+ ret = 0;
+
+ gf_msg_debug(this->name, 0, "gfid generated is %s ", uuid_utoa(gfid));
+
+out:
+ return ret;
+}
+
+void
+svs_fill_ino_from_gfid(struct iatt *buf)
+{
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, buf, out);
+
+ /* consider least significant 8 bytes of value out of gfid */
+ if (gf_uuid_is_null(buf->ia_gfid)) {
+ buf->ia_ino = -1;
+ goto out;
+ }
+
+ buf->ia_ino = gfid_to_ino(buf->ia_gfid);
+out:
+ return;
+}
+
+void
+svs_iatt_fill(uuid_t gfid, struct iatt *buf)
+{
+ struct timeval tv = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, buf, out);
+
+ buf->ia_type = IA_IFDIR;
+ buf->ia_uid = 0;
+ buf->ia_gid = 0;
+ buf->ia_size = 0;
+ buf->ia_nlink = 2;
+ buf->ia_blocks = 8;
+ buf->ia_size = 4096;
+
+ gf_uuid_copy(buf->ia_gfid, gfid);
+ svs_fill_ino_from_gfid(buf);
+
+ buf->ia_prot = ia_prot_from_st_mode(0755);
+
+ gettimeofday(&tv, 0);
+
+ buf->ia_mtime = buf->ia_atime = buf->ia_ctime = tv.tv_sec;
+ buf->ia_mtime_nsec = buf->ia_atime_nsec = buf->ia_ctime_nsec = (tv.tv_usec *
+ 1000);
+
+out:
+ return;
+}
+
+/* priv->snaplist_lock should be held before calling this function */
+snap_dirent_t *
+__svs_get_snap_dirent(xlator_t *this, const char *name)
+{
+ svs_private_t *private = NULL;
+ int i = 0;
+ snap_dirent_t *dirents = NULL;
+ snap_dirent_t *tmp_dirent = NULL;
+ snap_dirent_t *dirent = NULL;
+
+ private
+ = this->private;
+
+ dirents = private->dirents;
+ if (!dirents) {
+ goto out;
+ }
+
+ tmp_dirent = dirents;
+ for (i = 0; i < private->num_snaps; i++) {
+ if (!strcmp(tmp_dirent->name, name)) {
+ dirent = tmp_dirent;
+ break;
+ }
+ tmp_dirent++;
+ }
+
+out:
+ return dirent;
+}
+
+glfs_t *
+__svs_initialise_snapshot_volume(xlator_t *this, const char *name,
+ int32_t *op_errno)
+{
+ svs_private_t *priv = NULL;
+ int32_t ret = -1;
+ int32_t local_errno = ESTALE;
+ snap_dirent_t *dirent = NULL;
+ char volname[PATH_MAX] = {
+ 0,
+ };
+ glfs_t *fs = NULL;
+ int loglevel = GF_LOG_INFO;
+ char logfile[PATH_MAX] = {
+ 0,
+ };
+ char *volfile_server = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, name, out);
+
+ priv = this->private;
+
+ dirent = __svs_get_snap_dirent(this, name);
+ if (!dirent) {
+ gf_msg_debug(this->name, 0,
+ "snap entry for "
+ "name %s not found",
+ name);
+ local_errno = ENOENT;
+ goto out;
+ }
+
+ if (dirent->fs) {
+ ret = 0;
+ fs = dirent->fs;
+ goto out;
+ }
+
+ snprintf(volname, sizeof(volname), "/snaps/%s/%s/%s", dirent->name,
+ dirent->snap_volname, dirent->snap_volname);
+
+ fs = glfs_new(volname);
+ if (!fs) {
+ local_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, local_errno, SVS_MSG_GLFS_NEW_FAILED,
+ "glfs instance for snap volume %s "
+ "failed",
+ dirent->name);
+ goto out;
+ }
+
+ /*
+ * Before, localhost was used as the volfile server. But, with that
+ * method, accessing snapshots started giving ENOENT error if a
+ * specific bind address is mentioned in the glusterd volume file.
+ * Check the bug https://bugzilla.redhat.com/show_bug.cgi?id=1725211.
+ * So, the new method is tried below, where, snapview-server first
+ * uses the volfile server used by the snapd (obtained from the
+ * command line arguments saved in the global context of the process).
+ * If the volfile server in global context is NULL, then localhost
+ * is tried (like before).
+ */
+ if (this->ctx->cmd_args.volfile_server) {
+ volfile_server = gf_strdup(this->ctx->cmd_args.volfile_server);
+ if (!volfile_server) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+ SVS_MSG_VOLFILE_SERVER_GET_FAIL,
+ "failed to copy volfile server %s. ",
+ this->ctx->cmd_args.volfile_server);
+ ret = -1;
+ goto out;
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+ SVS_MSG_VOLFILE_SERVER_GET_FAIL,
+ "volfile server is NULL in cmd args. "
+ "Trying with localhost");
+ volfile_server = gf_strdup("localhost");
+ if (!volfile_server) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM,
+ SVS_MSG_VOLFILE_SERVER_GET_FAIL,
+ "failed to copy volfile server localhost.");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = glfs_set_volfile_server(fs, "tcp", volfile_server, 24007);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, local_errno,
+ SVS_MSG_SET_VOLFILE_SERVR_FAILED,
+ "setting the "
+ "volfile server %s for snap volume %s "
+ "failed",
+ volfile_server, dirent->name);
+ goto out;
+ }
+
+ snprintf(logfile, sizeof(logfile),
+ DEFAULT_SVD_LOG_FILE_DIRECTORY "/snaps/%s/%s-%s.log",
+ priv->volname, name, dirent->uuid);
+
+ ret = glfs_set_logging(fs, logfile, loglevel);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, local_errno,
+ SVS_MSG_SET_LOGGING_FAILED,
+ "failed to set the "
+ "log file path");
+ goto out;
+ }
+
+ ret = glfs_init(fs);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, local_errno, SVS_MSG_GLFS_INIT_FAILED,
+ "initing the "
+ "fs for %s failed",
+ dirent->name);
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (ret) {
+ if (op_errno)
+ *op_errno = local_errno;
+
+ if (fs)
+ glfs_fini(fs);
+ fs = NULL;
+ }
+
+ if (fs) {
+ dirent->fs = fs;
+ }
+
+ GF_FREE(volfile_server);
+ return fs;
+}
+
+glfs_t *
+svs_initialise_snapshot_volume(xlator_t *this, const char *name,
+ int32_t *op_errno)
+{
+ glfs_t *fs = NULL;
+ svs_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, name, out);
+
+ priv = this->private;
+
+ LOCK(&priv->snaplist_lock);
+ {
+ fs = __svs_initialise_snapshot_volume(this, name, op_errno);
+ }
+ UNLOCK(&priv->snaplist_lock);
+
+out:
+
+ return fs;
+}
+
+snap_dirent_t *
+svs_get_latest_snap_entry(xlator_t *this)
+{
+ svs_private_t *priv = NULL;
+ snap_dirent_t *dirents = NULL;
+ snap_dirent_t *dirent = NULL;
+
+ GF_VALIDATE_OR_GOTO("svs", this, out);
+
+ priv = this->private;
+
+ LOCK(&priv->snaplist_lock);
+ {
+ dirents = priv->dirents;
+ if (!dirents) {
+ goto unlock;
+ }
+ if (priv->num_snaps)
+ dirent = &dirents[priv->num_snaps - 1];
+ }
+unlock:
+ UNLOCK(&priv->snaplist_lock);
+
+out:
+ return dirent;
+}
+
+glfs_t *
+svs_get_latest_snapshot(xlator_t *this)
+{
+ glfs_t *fs = NULL;
+ snap_dirent_t *dirent = NULL;
+ svs_private_t *priv = NULL;
+
+ GF_VALIDATE_OR_GOTO("svs", this, out);
+ priv = this->private;
+
+ dirent = svs_get_latest_snap_entry(this);
+
+ if (dirent) {
+ LOCK(&priv->snaplist_lock);
+ {
+ fs = dirent->fs;
+ }
+ UNLOCK(&priv->snaplist_lock);
+ }
+
+out:
+ return fs;
+}
+
+glfs_t *
+svs_inode_ctx_glfs_mapping(xlator_t *this, svs_inode_t *inode_ctx)
+{
+ glfs_t *fs = NULL;
+
+ GF_VALIDATE_OR_GOTO("svs", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode_ctx, out);
+
+ fs = inode_ctx->fs;
+
+ SVS_CHECK_VALID_SNAPSHOT_HANDLE(fs, this);
+
+out:
+ return fs;
+}
+
+glfs_t *
+svs_inode_glfs_mapping(xlator_t *this, inode_t *inode)
+{
+ svs_inode_t *inode_ctx = NULL;
+ glfs_t *fs = NULL;
+
+ inode_ctx = svs_inode_ctx_get(this, inode);
+ if (!inode_ctx) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode context not found for"
+ " the inode %s",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ fs = svs_inode_ctx_glfs_mapping(this, inode_ctx);
+
+out:
+ return fs;
+}
diff --git a/xlators/features/snapview-server/src/snapview-server-mem-types.h b/xlators/features/snapview-server/src/snapview-server-mem-types.h
new file mode 100644
index 00000000000..63456b85323
--- /dev/null
+++ b/xlators/features/snapview-server/src/snapview-server-mem-types.h
@@ -0,0 +1,25 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __SNAP_VIEW_MEM_TYPES_H
+#define __SNAP_VIEW_MEM_TYPES_H
+
+#include <glusterfs/mem-types.h>
+
+enum snapview_mem_types {
+ gf_svs_mt_priv_t = gf_common_mt_end + 1,
+ gf_svs_mt_svs_inode_t,
+ gf_svs_mt_dirents_t,
+ gf_svs_mt_svs_fd_t,
+ gf_svs_mt_snaplist_t,
+ gf_svs_mt_end
+};
+
+#endif
diff --git a/xlators/features/snapview-server/src/snapview-server-messages.h b/xlators/features/snapview-server/src/snapview-server-messages.h
new file mode 100644
index 00000000000..f634ab5d2b0
--- /dev/null
+++ b/xlators/features/snapview-server/src/snapview-server-messages.h
@@ -0,0 +1,54 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _SNAPVIEW_SERVER_MESSAGES_H_
+#define _SNAPVIEW_SERVER_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(SNAPVIEW_SERVER, SVS_MSG_NO_MEMORY, SVS_MSG_MEM_ACNT_FAILED,
+ SVS_MSG_NULL_GFID, SVS_MSG_GET_LATEST_SNAP_FAILED,
+ SVS_MSG_INVALID_GLFS_CTX, SVS_MSG_LOCK_DESTROY_FAILED,
+ SVS_MSG_SNAPSHOT_LIST_CHANGED, SVS_MSG_MGMT_INIT_FAILED,
+ SVS_MSG_GET_SNAPSHOT_LIST_FAILED, SVS_MSG_GET_GLFS_H_OBJECT_FAILED,
+ SVS_MSG_PARENT_CTX_OR_NAME_NULL, SVS_MSG_SET_INODE_CONTEXT_FAILED,
+ SVS_MSG_GET_INODE_CONTEXT_FAILED, SVS_MSG_NEW_INODE_CTX_FAILED,
+ SVS_MSG_DELETE_INODE_CONTEXT_FAILED, SVS_MSG_SET_FD_CONTEXT_FAILED,
+ SVS_MSG_NEW_FD_CTX_FAILED, SVS_MSG_DELETE_FD_CTX_FAILED,
+ SVS_MSG_GETXATTR_FAILED, SVS_MSG_LISTXATTR_FAILED,
+ SVS_MSG_RELEASEDIR_FAILED, SVS_MSG_RELEASE_FAILED,
+ SVS_MSG_TELLDIR_FAILED, SVS_MSG_STAT_FAILED, SVS_MSG_STATFS_FAILED,
+ SVS_MSG_OPEN_FAILED, SVS_MSG_READ_FAILED, SVS_MSG_READLINK_FAILED,
+ SVS_MSG_ACCESS_FAILED, SVS_MSG_GET_FD_CONTEXT_FAILED,
+ SVS_MSG_DICT_SET_FAILED, SVS_MSG_OPENDIR_FAILED,
+ SVS_MSG_FS_INSTANCE_INVALID, SVS_MSG_SETFSUID_FAIL,
+ SVS_MSG_SETFSGID_FAIL, SVS_MSG_SETFSGRPS_FAIL,
+ SVS_MSG_BUILD_TRNSPRT_OPT_FAILED, SVS_MSG_RPC_INIT_FAILED,
+ SVS_MSG_REG_NOTIFY_FAILED, SVS_MSG_REG_CBK_PRGM_FAILED,
+ SVS_MSG_RPC_CLNT_START_FAILED, SVS_MSG_XDR_PAYLOAD_FAILED,
+ SVS_MSG_NULL_CTX, SVS_MSG_RPC_CALL_FAILED, SVS_MSG_XDR_DECODE_FAILED,
+ SVS_MSG_RSP_DICT_EMPTY, SVS_MSG_DICT_GET_FAILED,
+ SVS_MSG_SNAP_LIST_REFRESH_FAILED, SVS_MSG_RPC_REQ_FAILED,
+ SVS_MSG_CLOSEDIR_FAILED, SVS_MSG_CLOSE_FAILED,
+ SVS_MSG_GFID_GEN_FAILED, SVS_MSG_GLFS_NEW_FAILED,
+ SVS_MSG_SET_VOLFILE_SERVR_FAILED, SVS_MSG_SET_LOGGING_FAILED,
+ SVS_MSG_VOLFILE_SERVER_GET_FAIL, SVS_MSG_GLFS_INIT_FAILED);
+
+#endif /* !_SNAPVIEW_CLIENT_MESSAGES_H_ */
diff --git a/xlators/features/snapview-server/src/snapview-server-mgmt.c b/xlators/features/snapview-server/src/snapview-server-mgmt.c
new file mode 100644
index 00000000000..ecf31c3b880
--- /dev/null
+++ b/xlators/features/snapview-server/src/snapview-server-mgmt.c
@@ -0,0 +1,524 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "snapview-server.h"
+#include "snapview-server-mem-types.h"
+#include <pthread.h>
+
+int
+mgmt_cbk_snap(struct rpc_clnt *rpc, void *mydata, void *data)
+{
+ xlator_t *this = NULL;
+
+ this = mydata;
+ GF_ASSERT(this);
+
+ gf_msg("mgmt", GF_LOG_INFO, 0, SVS_MSG_SNAPSHOT_LIST_CHANGED,
+ "list of snapshots changed");
+
+ svs_get_snapshot_list(this);
+ return 0;
+}
+
+static rpcclnt_cb_actor_t svs_cbk_actors[GF_CBK_MAXVALUE] = {
+ [GF_CBK_GET_SNAPS] = {"GETSNAPS", mgmt_cbk_snap, GF_CBK_GET_SNAPS},
+};
+
+static struct rpcclnt_cb_program svs_cbk_prog = {
+ .progname = "GlusterFS Callback",
+ .prognum = GLUSTER_CBK_PROGRAM,
+ .progver = GLUSTER_CBK_VERSION,
+ .actors = svs_cbk_actors,
+ .numactors = GF_CBK_MAXVALUE,
+};
+
+static char *clnt_handshake_procs[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_NULL] = "NULL",
+ [GF_HNDSK_EVENT_NOTIFY] = "EVENTNOTIFY",
+};
+
+static rpc_clnt_prog_t svs_clnt_handshake_prog = {
+ .progname = "GlusterFS Handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .procnames = clnt_handshake_procs,
+};
+
+static int
+svs_rpc_notify(struct rpc_clnt *rpc, void *mydata, rpc_clnt_event_t event,
+ void *data)
+{
+ xlator_t *this = NULL;
+ int ret = 0;
+
+ this = mydata;
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ ret = svs_get_snapshot_list(this);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ SVS_MSG_GET_SNAPSHOT_LIST_FAILED,
+ "Error in refreshing the snaplist "
+ "infrastructure");
+ ret = -1;
+ }
+ break;
+ default:
+ break;
+ }
+ return ret;
+}
+
+int
+svs_mgmt_init(xlator_t *this)
+{
+ int ret = -1;
+ svs_private_t *priv = NULL;
+ dict_t *options = NULL;
+ int port = GF_DEFAULT_BASE_PORT;
+ char *host = NULL;
+ cmd_args_t *cmd_args = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_cmdline_option_t *opt = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->ctx, out);
+
+ priv = this->private;
+
+ ctx = this->ctx;
+ cmd_args = &ctx->cmd_args;
+
+ host = "localhost";
+ if (cmd_args->volfile_server)
+ host = cmd_args->volfile_server;
+
+ options = dict_new();
+ if (!options)
+ goto out;
+
+ opt = find_xlator_option_in_cmd_args_t("address-family", cmd_args);
+ ret = rpc_transport_inet_options_build(options, host, port,
+ (opt != NULL ? opt->value : NULL));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_BUILD_TRNSPRT_OPT_FAILED,
+ "failed to build the "
+ "transport options");
+ goto out;
+ }
+
+ priv->rpc = rpc_clnt_new(options, this, this->name, 8);
+ if (!priv->rpc) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_RPC_INIT_FAILED,
+ "failed to initialize RPC");
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify(priv->rpc, svs_rpc_notify, this);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, SVS_MSG_REG_NOTIFY_FAILED,
+ "failed to register notify function");
+ goto out;
+ }
+
+ ret = rpcclnt_cbk_program_register(priv->rpc, &svs_cbk_prog, this);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_REG_CBK_PRGM_FAILED,
+ "failed to register callback program");
+ goto out;
+ }
+
+ ret = rpc_clnt_start(priv->rpc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_RPC_CLNT_START_FAILED,
+ "failed to start the rpc "
+ "client");
+ goto out;
+ }
+
+ ret = 0;
+
+ gf_msg_debug(this->name, 0, "svs mgmt init successful");
+
+out:
+ if (options)
+ dict_unref(options);
+ if (ret)
+ if (priv) {
+ rpc_clnt_connection_cleanup(&priv->rpc->conn);
+ rpc_clnt_unref(priv->rpc);
+ priv->rpc = NULL;
+ }
+
+ return ret;
+}
+
+int
+svs_mgmt_submit_request(void *req, call_frame_t *frame, glusterfs_ctx_t *ctx,
+ rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn,
+ xdrproc_t xdrproc)
+{
+ int ret = -1;
+ int count = 0;
+ struct iovec iov = {
+ 0,
+ };
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ ssize_t xdr_size = 0;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", frame, out);
+ GF_VALIDATE_OR_GOTO("snapview-server", req, out);
+ GF_VALIDATE_OR_GOTO("snapview-server", ctx, out);
+ GF_VALIDATE_OR_GOTO("snapview-server", prog, out);
+
+ GF_ASSERT(frame->this);
+
+ iobref = iobref_new();
+ if (!iobref) {
+ gf_msg(frame->this->name, GF_LOG_WARNING, ENOMEM, SVS_MSG_NO_MEMORY,
+ "failed to allocate "
+ "new iobref");
+ goto out;
+ }
+
+ if (req) {
+ xdr_size = xdr_sizeof(xdrproc, req);
+
+ iobuf = iobuf_get2(ctx->iobuf_pool, xdr_size);
+ if (!iobuf) {
+ goto out;
+ }
+
+ iobref_add(iobref, iobuf);
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize(iobuf);
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic(iov, req, xdrproc);
+ if (ret == -1) {
+ gf_msg(frame->this->name, GF_LOG_WARNING, 0,
+ SVS_MSG_XDR_PAYLOAD_FAILED, "Failed to create XDR payload");
+ goto out;
+ }
+ iov.iov_len = ret;
+ count = 1;
+ }
+
+ ret = rpc_clnt_submit(ctx->mgmt, prog, procnum, cbkfn, &iov, count, NULL, 0,
+ iobref, frame, NULL, 0, NULL, 0, NULL);
+
+out:
+ if (iobref)
+ iobref_unref(iobref);
+
+ if (iobuf)
+ iobuf_unref(iobuf);
+ return ret;
+}
+
+int
+mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gf_getsnap_name_uuid_rsp rsp = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = -1;
+ dict_t *dict = NULL;
+ char key[32] = {0};
+ int len;
+ int snapcount = 0;
+ svs_private_t *priv = NULL;
+ xlator_t *this = NULL;
+ int i = 0;
+ int j = 0;
+ char *value = NULL;
+ snap_dirent_t *dirents = NULL;
+ snap_dirent_t *old_dirents = NULL;
+ int oldcount = 0;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", req, error_out);
+ GF_VALIDATE_OR_GOTO("snapview-server", myframe, error_out);
+ GF_VALIDATE_OR_GOTO("snapview-server", iov, error_out);
+
+ frame = myframe;
+ this = frame->this;
+ ctx = frame->this->ctx;
+ priv = this->private;
+
+ if (!ctx) {
+ errno = EINVAL;
+ gf_msg(frame->this->name, GF_LOG_ERROR, errno, SVS_MSG_NULL_CTX,
+ "NULL context");
+ goto out;
+ }
+
+ if (-1 == req->rpc_status) {
+ errno = EINVAL;
+ gf_msg(frame->this->name, GF_LOG_ERROR, errno, SVS_MSG_RPC_CALL_FAILED,
+ "RPC call is not successful");
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getsnap_name_uuid_rsp);
+ if (ret < 0) {
+ gf_msg(frame->this->name, GF_LOG_ERROR, 0, SVS_MSG_XDR_DECODE_FAILED,
+ "Failed to decode xdr response, rsp.op_ret = %d", rsp.op_ret);
+ goto out;
+ }
+
+ if (rsp.op_ret == -1) {
+ errno = rsp.op_errno;
+ ret = -1;
+ goto out;
+ }
+
+ if (!rsp.dict.dict_len) {
+ ret = -1;
+ errno = EINVAL;
+ gf_msg(frame->this->name, GF_LOG_ERROR, errno, SVS_MSG_RSP_DICT_EMPTY,
+ "Response dict is not populated");
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ ret = -1;
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret) {
+ errno = EINVAL;
+ gf_msg(frame->this->name, GF_LOG_ERROR, errno,
+ LG_MSG_DICT_UNSERIAL_FAILED, "Failed to unserialize dictionary");
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, "snap-count", (int32_t *)&snapcount);
+ if (ret) {
+ errno = EINVAL;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, errno, SVS_MSG_DICT_GET_FAILED,
+ "Error retrieving snapcount");
+ goto out;
+ }
+
+ if (snapcount > 0) {
+ /* first time we are fetching snap list */
+ dirents = GF_CALLOC(snapcount, sizeof(snap_dirent_t),
+ gf_svs_mt_dirents_t);
+ if (!dirents) {
+ errno = ENOMEM;
+ ret = -1;
+ gf_msg(frame->this->name, GF_LOG_ERROR, errno, SVS_MSG_NO_MEMORY,
+ "Unable to allocate memory");
+ goto out;
+ }
+ }
+
+ for (i = 0; i < snapcount; i++) {
+ len = snprintf(key, sizeof(key), "snap-volname.%d", i + 1);
+ ret = dict_get_strn(dict, key, len, &value);
+ if (ret) {
+ errno = EINVAL;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, errno, SVS_MSG_DICT_GET_FAILED,
+ "Error retrieving snap volname %d", i + 1);
+ goto out;
+ }
+
+ strncpy(dirents[i].snap_volname, value,
+ sizeof(dirents[i].snap_volname));
+
+ len = snprintf(key, sizeof(key), "snap-id.%d", i + 1);
+ ret = dict_get_strn(dict, key, len, &value);
+ if (ret) {
+ errno = EINVAL;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, errno, SVS_MSG_DICT_GET_FAILED,
+ "Error retrieving snap uuid %d", i + 1);
+ goto out;
+ }
+ strncpy(dirents[i].uuid, value, sizeof(dirents[i].uuid));
+
+ len = snprintf(key, sizeof(key), "snapname.%d", i + 1);
+ ret = dict_get_strn(dict, key, len, &value);
+ if (ret) {
+ errno = EINVAL;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, errno, SVS_MSG_DICT_GET_FAILED,
+ "Error retrieving snap name %d", i + 1);
+ goto out;
+ }
+ strncpy(dirents[i].name, value, sizeof(dirents[i].name));
+ }
+
+ /*
+ * Got the new snap list populated in dirents
+ * The new snap list is either a subset or a superset of
+ * the existing snaplist old_dirents which has priv->num_snaps
+ * number of entries.
+ *
+ * If subset, then clean up the fs for entries which are
+ * no longer relevant.
+ *
+ * For other overlapping entries set the fs for new dirents
+ * entries which have a fs assigned already in old_dirents
+ *
+ * We do this as we don't want to do new glfs_init()s repeatedly
+ * as the dirents entries for snapshot volumes get repatedly
+ * cleaned up and allocated. And if we don't then that will lead
+ * to memleaks
+ */
+
+ LOCK(&priv->snaplist_lock);
+ {
+ oldcount = priv->num_snaps;
+ old_dirents = priv->dirents;
+ for (i = 0; i < priv->num_snaps; i++) {
+ for (j = 0; j < snapcount; j++) {
+ if ((!strcmp(old_dirents[i].name, dirents[j].name)) &&
+ (!strcmp(old_dirents[i].uuid, dirents[j].uuid))) {
+ dirents[j].fs = old_dirents[i].fs;
+ old_dirents[i].fs = NULL;
+ break;
+ }
+ }
+ }
+
+ priv->dirents = dirents;
+ priv->num_snaps = snapcount;
+ }
+ UNLOCK(&priv->snaplist_lock);
+
+ if (old_dirents) {
+ for (i = 0; i < oldcount; i++) {
+ if (old_dirents[i].fs)
+ gf_msg_debug(this->name, 0,
+ "calling glfs_fini on "
+ "name: %s, snap_volname: %s, uuid: %s",
+ old_dirents[i].name, old_dirents[i].snap_volname,
+ old_dirents[i].uuid);
+ glfs_fini(old_dirents[i].fs);
+ }
+ }
+
+ GF_FREE(old_dirents);
+
+ ret = 0;
+
+out:
+ if (dict) {
+ dict_unref(dict);
+ }
+ free(rsp.dict.dict_val);
+ free(rsp.op_errstr);
+
+ if (ret && dirents) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, SVS_MSG_SNAP_LIST_REFRESH_FAILED,
+ "Could not update dirents with refreshed snap list");
+ GF_FREE(dirents);
+ }
+
+ if (myframe)
+ SVS_STACK_DESTROY(myframe);
+
+error_out:
+ return ret;
+}
+
+int
+svs_get_snapshot_list(xlator_t *this)
+{
+ gf_getsnap_name_uuid_req req = {{
+ 0,
+ }};
+ int ret = -1;
+ dict_t *dict = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ call_frame_t *frame = NULL;
+ svs_private_t *priv = NULL;
+ gf_boolean_t frame_cleanup = _gf_true;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+
+ ctx = this->ctx;
+ if (!ctx) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_NULL_CTX, "ctx is NULL");
+ goto out;
+ }
+
+ frame = create_frame(this, ctx->pool);
+ if (!frame) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_FRAME_ERROR,
+ "Error allocating frame");
+ goto out;
+ }
+
+ priv = this->private;
+
+ dict = dict_new();
+ if (!dict) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SVS_MSG_NO_MEMORY,
+ "Error allocating dictionary");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "volname", priv->volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_DICT_SET_FAILED,
+ "Error setting volname in dict");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, LG_MSG_DICT_UNSERIAL_FAILED,
+ "Failed to serialize dictionary");
+ ret = -1;
+ goto out;
+ }
+
+ ret = svs_mgmt_submit_request(
+ &req, frame, ctx, &svs_clnt_handshake_prog, GF_HNDSK_GET_SNAPSHOT_INFO,
+ mgmt_get_snapinfo_cbk, (xdrproc_t)xdr_gf_getsnap_name_uuid_req);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_RPC_REQ_FAILED,
+ "Error sending snapshot names RPC request");
+ }
+
+ frame_cleanup = _gf_false;
+
+out:
+ if (dict) {
+ dict_unref(dict);
+ }
+ GF_FREE(req.dict.dict_val);
+
+ if (frame_cleanup && frame) {
+ /*
+ * Destroy the frame if we encountered an error
+ * Else we need to clean it up in
+ * mgmt_get_snapinfo_cbk
+ */
+ SVS_STACK_DESTROY(frame);
+ }
+
+ return ret;
+}
diff --git a/xlators/features/snapview-server/src/snapview-server.c b/xlators/features/snapview-server/src/snapview-server.c
new file mode 100644
index 00000000000..76cccae5914
--- /dev/null
+++ b/xlators/features/snapview-server/src/snapview-server.c
@@ -0,0 +1,2720 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include "snapview-server.h"
+#include "snapview-server-mem-types.h"
+#include <glusterfs/compat-errno.h>
+
+#include <glusterfs/xlator.h>
+#include "rpc-clnt.h"
+#include "xdr-generic.h"
+#include "protocol-common.h"
+#include <glusterfs/syscall.h>
+#include <pthread.h>
+
+#include "glfs-internal.h"
+
+int
+gf_setcredentials(uid_t *uid, gid_t *gid, uint16_t ngrps, uint32_t *groups)
+{
+ int ret = 0;
+
+ if (uid) {
+ ret = glfs_setfsuid(*uid);
+ if (ret != 0) {
+ gf_msg("snapview-server", GF_LOG_ERROR, 0, SVS_MSG_SETFSUID_FAIL,
+ "failed to set uid "
+ "%u in thread context",
+ *uid);
+ return ret;
+ }
+ }
+ if (gid) {
+ ret = glfs_setfsgid(*gid);
+ if (ret != 0) {
+ gf_msg("snapview-server", GF_LOG_ERROR, 0, SVS_MSG_SETFSGID_FAIL,
+ "failed to set gid "
+ "%u in thread context",
+ *gid);
+ return ret;
+ }
+ }
+
+ if (ngrps != 0 && groups) {
+ ret = glfs_setfsgroups(ngrps, groups);
+ if (ret != 0) {
+ gf_msg("snapview-server", GF_LOG_ERROR, 0, SVS_MSG_SETFSGRPS_FAIL,
+ "failed to set "
+ "groups in thread context");
+ return ret;
+ }
+ }
+ return 0;
+}
+
+int32_t
+svs_lookup_entry_point(xlator_t *this, loc_t *loc, inode_t *parent,
+ struct iatt *buf, struct iatt *postparent,
+ int32_t *op_errno)
+{
+ uuid_t gfid;
+ svs_inode_t *inode_ctx = NULL;
+ int op_ret = -1;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, buf, out);
+ GF_VALIDATE_OR_GOTO(this->name, postparent, out);
+
+ if (gf_uuid_is_null(loc->inode->gfid)) {
+ gf_uuid_generate(gfid);
+ svs_iatt_fill(gfid, buf);
+
+ /* Here the inode context of the entry point directory
+ is filled with just the type of the inode and the gfid
+ of the parent from where the entry point was entered.
+ The glfs object and the fs instance will be NULL.
+ */
+ if (parent)
+ svs_iatt_fill(parent->gfid, postparent);
+ else {
+ svs_iatt_fill(buf->ia_gfid, postparent);
+ }
+
+ inode_ctx = svs_inode_ctx_get_or_new(this, loc->inode);
+ if (!inode_ctx) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno,
+ SVS_MSG_NEW_INODE_CTX_FAILED,
+ "failed to "
+ "allocate inode context for entry point "
+ "directory");
+ goto out;
+ }
+
+ gf_uuid_copy(inode_ctx->pargfid, loc->pargfid);
+ memcpy(&inode_ctx->buf, buf, sizeof(*buf));
+ inode_ctx->type = SNAP_VIEW_ENTRY_POINT_INODE;
+ } else {
+ inode_ctx = svs_inode_ctx_get(this, loc->inode);
+ if (inode_ctx) {
+ memcpy(buf, &inode_ctx->buf, sizeof(*buf));
+ svs_iatt_fill(inode_ctx->pargfid, postparent);
+ } else {
+ svs_iatt_fill(loc->inode->gfid, buf);
+ if (parent)
+ svs_iatt_fill(parent->gfid, postparent);
+ else {
+ svs_iatt_fill(loc->inode->gfid, postparent);
+ }
+ }
+ }
+
+ op_ret = 0;
+
+out:
+ return op_ret;
+}
+
+/* When lookup comes from client and the protocol/server tries to resolve
+ the pargfid via just sending the gfid as part of lookup, if the inode
+ for the parent gfid is not found. But since that gfid has not yet been
+ looked up yet, inode will not be having inode context and parent is not
+ there (as it is the parent of the entry that is being resolved). So
+ without parent and inode context, svs cannot know which snapshot
+ to look into. In such cases, the amguity is handled by looking
+ into the latest snapshot. If the directory is there in the latest
+ snapshot, lookup is successful, otherwise it is a failure. So for
+ any directory created after taking the latest snapshot, entry into
+ snapshot world is denied. i.e you have to be part of snapshot world
+ to enter it. If the gfid is not found there, then unwind with
+ ESTALE
+ This gets executed mainly in the situation where the snapshot entry
+ point is entered from a non-root directory and that non-root directory's
+ inode (or gfid) is not yet looked up. And in each case when a gfid has to
+ be looked up (without any inode contex and parent context present), last
+ snapshot is referred and a random gfid is not generated.
+*/
+int32_t
+svs_lookup_gfid(xlator_t *this, loc_t *loc, struct iatt *buf,
+ struct iatt *postparent, int32_t *op_errno)
+{
+ int32_t op_ret = -1;
+ unsigned char handle_obj[GFAPI_HANDLE_LENGTH] = {
+ 0,
+ };
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ struct stat statbuf = {
+ 0,
+ };
+ svs_inode_t *inode_ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, buf, out);
+ GF_VALIDATE_OR_GOTO(this->name, postparent, out);
+
+ if (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_NULL_GFID, "gfid is NULL");
+ goto out;
+ }
+
+ if (!gf_uuid_is_null(loc->inode->gfid))
+ memcpy(handle_obj, loc->inode->gfid, GFAPI_HANDLE_LENGTH);
+ else
+ memcpy(handle_obj, loc->gfid, GFAPI_HANDLE_LENGTH);
+
+ fs = svs_get_latest_snapshot(this);
+ if (!fs) {
+ op_ret = -1;
+ *op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno,
+ SVS_MSG_GET_LATEST_SNAP_FAILED,
+ "failed to get the latest "
+ "snapshot");
+ goto out;
+ }
+
+ object = glfs_h_create_from_handle(fs, handle_obj, GFAPI_HANDLE_LENGTH,
+ &statbuf);
+ if (!object) {
+ op_ret = -1;
+ *op_errno = ESTALE;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno,
+ SVS_MSG_GET_GLFS_H_OBJECT_FAILED,
+ "failed to do lookup and get "
+ "the handle on the snapshot %s (path: %s, gfid: %s)",
+ loc->name, loc->path, uuid_utoa(loc->gfid));
+ goto out;
+ }
+
+ inode_ctx = svs_inode_ctx_get_or_new(this, loc->inode);
+ if (!inode_ctx) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno,
+ SVS_MSG_NEW_INODE_CTX_FAILED,
+ "failed to allocate inode "
+ "context");
+ goto out;
+ }
+
+ iatt_from_stat(buf, &statbuf);
+ if (!gf_uuid_is_null(loc->gfid))
+ gf_uuid_copy(buf->ia_gfid, loc->gfid);
+ else
+ gf_uuid_copy(buf->ia_gfid, loc->inode->gfid);
+
+ inode_ctx->type = SNAP_VIEW_VIRTUAL_INODE;
+ inode_ctx->fs = fs;
+ inode_ctx->object = object;
+ memcpy(&inode_ctx->buf, buf, sizeof(*buf));
+ svs_iatt_fill(buf->ia_gfid, postparent);
+
+ op_ret = 0;
+
+out:
+ return op_ret;
+}
+
+/* If the parent is an entry point inode, then create the handle for the
+ snapshot on which lookup came. i.e in reality lookup came on
+ the directory from which the entry point directory was entered, but
+ lookup is into the past. So create the handle for it by doing
+ the name-less lookup on the gfid (which can be obtained from
+ parent's context
+*/
+int32_t
+svs_lookup_snapshot(xlator_t *this, loc_t *loc, struct iatt *buf,
+ struct iatt *postparent, inode_t *parent,
+ svs_inode_t *parent_ctx, int32_t *op_errno)
+{
+ int32_t op_ret = -1;
+ unsigned char handle_obj[GFAPI_HANDLE_LENGTH] = {
+ 0,
+ };
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ struct stat statbuf = {
+ 0,
+ };
+ svs_inode_t *inode_ctx = NULL;
+ uuid_t gfid;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, buf, out);
+ GF_VALIDATE_OR_GOTO(this->name, postparent, out);
+ GF_VALIDATE_OR_GOTO(this->name, parent_ctx, out);
+ GF_VALIDATE_OR_GOTO(this->name, parent, out);
+
+ fs = svs_initialise_snapshot_volume(this, loc->name, op_errno);
+ if (!fs) {
+ gf_msg_debug(this->name, 0,
+ "failed to create "
+ "the fs instance for snap %s",
+ loc->name);
+ *op_errno = ENOENT;
+ op_ret = -1;
+ goto out;
+ }
+
+ memcpy(handle_obj, parent_ctx->pargfid, GFAPI_HANDLE_LENGTH);
+ object = glfs_h_create_from_handle(fs, handle_obj, GFAPI_HANDLE_LENGTH,
+ &statbuf);
+ if (!object) {
+ op_ret = -1;
+ *op_errno = errno;
+ /* Should this be in warning or error mode? */
+ gf_msg_debug(this->name, 0,
+ "failed to do lookup and "
+ "get the handle on the snapshot %s",
+ loc->name);
+ goto out;
+ }
+
+ inode_ctx = svs_inode_ctx_get_or_new(this, loc->inode);
+ if (!inode_ctx) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno,
+ SVS_MSG_NEW_INODE_CTX_FAILED,
+ "failed to allocate "
+ "inode context");
+ goto out;
+ }
+
+ if (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid))
+ gf_uuid_generate(gfid);
+ else {
+ if (!gf_uuid_is_null(loc->inode->gfid))
+ gf_uuid_copy(gfid, loc->inode->gfid);
+ else
+ gf_uuid_copy(gfid, loc->gfid);
+ }
+ iatt_from_stat(buf, &statbuf);
+ gf_uuid_copy(buf->ia_gfid, gfid);
+ svs_fill_ino_from_gfid(buf);
+ inode_ctx->type = SNAP_VIEW_SNAPSHOT_INODE;
+ inode_ctx->fs = fs;
+ inode_ctx->object = object;
+ memcpy(&inode_ctx->buf, buf, sizeof(*buf));
+ svs_iatt_fill(parent->gfid, postparent);
+
+ SVS_STRDUP(inode_ctx->snapname, loc->name);
+ if (!inode_ctx->snapname) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ op_ret = 0;
+
+out:
+ if (op_ret) {
+ if (object)
+ glfs_h_close(object);
+
+ if (inode_ctx)
+ inode_ctx->object = NULL;
+ }
+
+ return op_ret;
+}
+
+/* Both parent and entry are from snapshot world */
+int32_t
+svs_lookup_entry(xlator_t *this, loc_t *loc, struct iatt *buf,
+ struct iatt *postparent, inode_t *parent,
+ svs_inode_t *parent_ctx, int32_t *op_errno)
+{
+ int32_t op_ret = -1;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ struct stat statbuf = {
+ 0,
+ };
+ svs_inode_t *inode_ctx = NULL;
+ glfs_object_t *parent_object = NULL;
+ uuid_t gfid = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+ GF_VALIDATE_OR_GOTO(this->name, buf, out);
+ GF_VALIDATE_OR_GOTO(this->name, postparent, out);
+ GF_VALIDATE_OR_GOTO(this->name, parent_ctx, out);
+ GF_VALIDATE_OR_GOTO(this->name, parent, out);
+
+ parent_object = parent_ctx->object;
+ fs = parent_ctx->fs;
+
+ object = glfs_h_lookupat(fs, parent_object, loc->name, &statbuf, 0);
+ if (!object) {
+ /* should this be in WARNING or ERROR mode? */
+ gf_msg_debug(this->name, 0,
+ "failed to do lookup and "
+ "get the handle for entry %s (path: %s)",
+ loc->name, loc->path);
+ op_ret = -1;
+ *op_errno = errno;
+ goto out;
+ }
+
+ if (gf_uuid_is_null(object->gfid)) {
+ /* should this be in WARNING or ERROR mode? */
+ gf_msg_debug(this->name, 0,
+ "gfid from glfs handle is "
+ "NULL for entry %s (path: %s)",
+ loc->name, loc->path);
+ op_ret = -1;
+ *op_errno = errno;
+ goto out;
+ }
+
+ inode_ctx = svs_inode_ctx_get_or_new(this, loc->inode);
+ if (!inode_ctx) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno,
+ SVS_MSG_NEW_INODE_CTX_FAILED,
+ "failed to allocate "
+ "inode context");
+ goto out;
+ }
+
+ if (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid)) {
+ if (svs_uuid_generate(this, gfid, parent_ctx->snapname, object->gfid)) {
+ /*
+ * should op_errno be something else such as
+ * EINVAL or ESTALE?
+ */
+ op_ret = -1;
+ *op_errno = EIO;
+ goto out;
+ }
+ } else {
+ if (!gf_uuid_is_null(loc->inode->gfid))
+ gf_uuid_copy(gfid, loc->inode->gfid);
+ else
+ gf_uuid_copy(gfid, loc->gfid);
+ }
+
+ iatt_from_stat(buf, &statbuf);
+ gf_uuid_copy(buf->ia_gfid, gfid);
+ svs_fill_ino_from_gfid(buf);
+ inode_ctx->type = SNAP_VIEW_VIRTUAL_INODE;
+ inode_ctx->fs = fs;
+ inode_ctx->object = object;
+ memcpy(&inode_ctx->buf, buf, sizeof(*buf));
+ svs_iatt_fill(parent->gfid, postparent);
+
+ if (IA_ISDIR(buf->ia_type)) {
+ SVS_STRDUP(inode_ctx->snapname, parent_ctx->snapname);
+ if (!inode_ctx->snapname) {
+ op_ret = -1;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ }
+
+ op_ret = 0;
+
+out:
+ if (op_ret) {
+ if (object)
+ glfs_h_close(object);
+
+ if (inode_ctx)
+ inode_ctx->object = NULL;
+ }
+
+ return op_ret;
+}
+
+/* inode context is there means lookup has come on an object which was
+ built either as part of lookup or as part of readdirp. But in readdirp
+ we would not have got the handle to access the object in the gfapi
+ world.
+ So if inode context contains glfs_t instance for the right
+ gfapi world and glfs_object_t handle for accessing it in the gfapi
+ world, then unwind with success as the snapshots as of now are
+ read-only.
+ If the above condition is not met, then send lookup call again to
+ the gfapi world. It can happen only if both parent context and
+ the name of the entry are present.
+
+ If parent is an entry point to snapshot world:
+ * parent is needed for getting the gfid on which lookup has to be done
+ (the gfid present in the inode is a virtual gfid) in the snapshot
+ world.
+ * name is required to get the right glfs_t instance on which lookup
+ has to be done
+
+ If parent is a directory from snapshot world:
+ * parent context is needed to get the glfs_t instance and to get the
+ handle to parent directory in the snapshot world.
+ * name is needed to do the lookup on the right entry in the snapshot
+ world
+*/
+int32_t
+svs_revalidate(xlator_t *this, loc_t *loc, inode_t *parent,
+ svs_inode_t *inode_ctx, svs_inode_t *parent_ctx,
+ struct iatt *buf, struct iatt *postparent, int32_t *op_errno)
+{
+ int32_t op_ret = -1;
+ int ret = -1;
+ char tmp_uuid[64] = {
+ 0,
+ };
+ glfs_t *fs = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, buf, out);
+ GF_VALIDATE_OR_GOTO(this->name, postparent, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode_ctx, out);
+
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ svs_iatt_fill(loc->inode->gfid, buf);
+ if (parent)
+ svs_iatt_fill(parent->gfid, postparent);
+ else
+ svs_iatt_fill(loc->inode->gfid, postparent);
+ op_ret = 0;
+ goto out;
+ } else {
+ /* Though fs and object are present in the inode context, its
+ * better to check if fs is valid or not before doing anything.
+ * Its for the protection from the following operations.
+ * 1) Create a file on the glusterfs mount point
+ * 2) Create a snapshot (say "snap1")
+ * 3) Access the contents of the snapshot
+ * 4) Delete the file from the mount point
+ * 5) Delete the snapshot "snap1"
+ * 6) Create a new snapshot "snap1"
+ *
+ * Now accessing the new snapshot "snap1" gives problems.
+ * Because the inode and dentry created for snap1 would not be
+ * deleted upon the deletion of the snapshot (as deletion of
+ * snapshot is a gluster cli operation, not a fop). So next time
+ * upon creation of a new snap with same name, the previous
+ * inode and dentry itself will be used. But the inode context
+ * contains old information about the glfs_t instance and the
+ * handle in the gfapi world. Thus the glfs_t instance should
+ * be checked before accessing. If its wrong, then right
+ * instance should be obtained by doing the lookup.
+ */
+ if (inode_ctx->fs && inode_ctx->object) {
+ fs = inode_ctx->fs;
+ SVS_CHECK_VALID_SNAPSHOT_HANDLE(fs, this);
+ if (fs) {
+ memcpy(buf, &inode_ctx->buf, sizeof(*buf));
+ if (parent)
+ svs_iatt_fill(parent->gfid, postparent);
+ else
+ svs_iatt_fill(buf->ia_gfid, postparent);
+ op_ret = 0;
+ goto out;
+ } else {
+ inode_ctx->fs = NULL;
+ inode_ctx->object = NULL;
+ ret = svs_get_handle(this, loc, inode_ctx, op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno,
+ SVS_MSG_GET_GLFS_H_OBJECT_FAILED,
+ "failed to get the handle for "
+ "%s (gfid %s)",
+ loc->path, uuid_utoa_r(loc->inode->gfid, tmp_uuid));
+ op_ret = -1;
+ goto out;
+ }
+ }
+ }
+
+ /* To send the lookup to gfapi world, both the name of the
+ entry as well as the parent context is needed.
+ */
+ if (!loc->name || !parent_ctx) {
+ *op_errno = ESTALE;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno,
+ SVS_MSG_PARENT_CTX_OR_NAME_NULL, "%s is NULL",
+ loc->name ? "parent context" : "loc->name");
+ goto out;
+ }
+
+ if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE)
+ op_ret = svs_lookup_snapshot(this, loc, buf, postparent, parent,
+ parent_ctx, op_errno);
+ else
+ op_ret = svs_lookup_entry(this, loc, buf, postparent, parent,
+ parent_ctx, op_errno);
+
+ goto out;
+ }
+
+out:
+ return op_ret;
+}
+
+int32_t
+svs_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ struct iatt buf = {
+ 0,
+ };
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ struct iatt postparent = {
+ 0,
+ };
+ svs_inode_t *inode_ctx = NULL;
+ svs_inode_t *parent_ctx = NULL;
+ int32_t ret = -1;
+ inode_t *parent = NULL;
+ gf_boolean_t entry_point_key = _gf_false;
+ gf_boolean_t entry_point = _gf_false;
+ call_stack_t *root = NULL;
+
+ GF_VALIDATE_OR_GOTO("svs", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ root = frame->root;
+ op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps,
+ root->groups);
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ /* For lookups sent on inodes (i.e not parent inode + basename, but
+ direct inode itself which usually is a nameless lookup or revalidate
+ on the inode), loc->name will not be there. Get it from path if
+ it is there.
+ This is the difference between nameless lookup and revalidate lookup
+ on an inode:
+ nameless lookup: loc->path contains gfid and strrchr on it fails
+ revalidate lookup: loc->path contains the entry name of the inode
+ and strrchr gives the name of the entry from path
+ */
+ if (loc->path) {
+ if (!loc->name || (loc->name && !strcmp(loc->name, ""))) {
+ loc->name = strrchr(loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ }
+ }
+
+ if (loc->parent)
+ parent = inode_ref(loc->parent);
+ else {
+ parent = inode_find(loc->inode->table, loc->pargfid);
+ if (!parent)
+ parent = inode_parent(loc->inode, NULL, NULL);
+ }
+ if (parent)
+ parent_ctx = svs_inode_ctx_get(this, parent);
+
+ inode_ctx = svs_inode_ctx_get(this, loc->inode);
+
+ if (xdata && !inode_ctx) {
+ ret = dict_get_str_boolean(xdata, "entry-point", _gf_false);
+ if (ret == -1) {
+ gf_msg_debug(this->name, 0,
+ "failed to get the "
+ "entry point info");
+ entry_point_key = _gf_false;
+ } else {
+ entry_point_key = ret;
+ }
+
+ if (loc->name && strlen(loc->name)) {
+ /* lookup can come with the entry-point set in the dict
+ * for the parent directory of the entry-point as well.
+ * So consider entry_point only for named lookup
+ */
+ entry_point = entry_point_key;
+ }
+ }
+
+ if (inode_ctx && inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ /* entry-point may not be set in the dictonary.
+ * This can happen if snap-view client is restarted where
+ * inode-ctx not available and a nameless lookup has come
+ */
+ entry_point = _gf_true;
+ }
+
+ /* lookup is on the entry point to the snapshot world */
+ if (entry_point) {
+ op_ret = svs_lookup_entry_point(this, loc, parent, &buf, &postparent,
+ &op_errno);
+ goto out;
+ }
+
+ /* revalidate */
+ if (inode_ctx) {
+ op_ret = svs_revalidate(this, loc, parent, inode_ctx, parent_ctx, &buf,
+ &postparent, &op_errno);
+ goto out;
+ }
+
+ /* This can happen when entry point directory is entered from non-root
+ directory. (ex: if /mnt/glusterfs is the mount point, then entry
+ point (say .snaps) is entered from /mnt/glusterfs/dir/.snaps). Also
+ it can happen when client sends a nameless lookup on just a gfid and
+ the server does not have the inode in the inode table.
+ */
+ if (!inode_ctx && !parent_ctx) {
+ if (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid)) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ gf_msg_debug(this->name, 0,
+ "gfid is NULL. Either the lookup "
+ "came on missing entry or the "
+ "entry is stale");
+ goto out;
+ }
+
+ if (!entry_point_key) {
+ /* This can happen when there is no inode_ctx available.
+ * snapview-server might have restarted or
+ * graph change might have happened
+ */
+ op_ret = -1;
+ op_errno = ESTALE;
+ goto out;
+ }
+
+ /* lookup is on the parent directory of entry-point.
+ * this would have already looked up by snap-view client
+ * so return success
+ */
+ if (!gf_uuid_is_null(loc->gfid))
+ gf_uuid_copy(buf.ia_gfid, loc->gfid);
+ else
+ gf_uuid_copy(buf.ia_gfid, loc->inode->gfid);
+
+ svs_iatt_fill(buf.ia_gfid, &buf);
+ svs_iatt_fill(buf.ia_gfid, &postparent);
+
+ op_ret = 0;
+ goto out;
+ }
+
+ if (parent_ctx) {
+ if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE)
+ op_ret = svs_lookup_snapshot(this, loc, &buf, &postparent, parent,
+ parent_ctx, &op_errno);
+ else
+ op_ret = svs_lookup_entry(this, loc, &buf, &postparent, parent,
+ parent_ctx, &op_errno);
+ goto out;
+ }
+
+out:
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno,
+ loc ? loc->inode : NULL, &buf, xdata, &postparent);
+
+ if (parent)
+ inode_unref(parent);
+
+ return 0;
+}
+
+int32_t
+svs_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ svs_inode_t *inode_ctx = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ svs_fd_t *svs_fd = NULL;
+ glfs_fd_t *glfd = NULL;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ call_stack_t *root = NULL;
+
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ root = frame->root;
+ op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps,
+ root->groups);
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ inode_ctx = svs_inode_ctx_get(this, loc->inode);
+ if (!inode_ctx) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode context not found "
+ "for the inode %s",
+ uuid_utoa(loc->inode->gfid));
+ goto out;
+ }
+
+ /* Fake success is sent if the opendir is on the entry point directory
+ or the inode is SNAP_VIEW_ENTRY_POINT_INODE
+ */
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ op_ret = 0;
+ op_errno = 0;
+ goto out;
+ } else {
+ SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret,
+ op_errno, out);
+
+ glfd = glfs_h_opendir(fs, object);
+ if (!glfd) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_OPENDIR_FAILED,
+ "opendir on %s failed "
+ "(gfid: %s)",
+ loc->name, uuid_utoa(loc->inode->gfid));
+ goto out;
+ }
+ svs_fd = svs_fd_ctx_get_or_new(this, fd);
+ if (!svs_fd) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_NEW_FD_CTX_FAILED,
+ "failed to allocate fd context "
+ "for %s (gfid: %s)",
+ loc->name, uuid_utoa(fd->inode->gfid));
+ glfs_closedir(glfd);
+ goto out;
+ }
+ svs_fd->fd = glfd;
+
+ op_ret = 0;
+ op_errno = 0;
+ }
+
+out:
+ STACK_UNWIND_STRICT(opendir, frame, op_ret, op_errno, fd, NULL);
+
+ return 0;
+}
+
+/*
+ * This function adds the xattr keys present in the list (@list) to the dict.
+ * But the list contains only the names of the xattrs (and no value, as
+ * the gfapi functions for the listxattr operations would return only the
+ * names of the xattrs in the buffer provided by the caller, though they had
+ * got the values of those xattrs from posix) as described in the man page of
+ * listxattr. But before unwinding snapview-server has to put those names
+ * back into the dict. But to get the values for those xattrs it has to do the
+ * getxattr operation on each xattr which might turn out to be a costly
+ * operation. So for each of the xattrs present in the list, a 0 byte value
+ * ("") is set into the dict before unwinding. Since ("") is also a valid xattr
+ * value(in a file system) we use an extra key in the same dictionary as an
+ * indicator to other xlators which want to cache the xattrs (as of now,
+ * md-cache which caches acl and selinux related xattrs) to not to cache the
+ * values of the xattrs present in the dict.
+ */
+int32_t
+svs_add_xattrs_to_dict(xlator_t *this, dict_t *dict, char *list, ssize_t size)
+{
+ char keybuffer[4096] = {
+ 0,
+ };
+ size_t remaining_size = 0;
+ int32_t list_offset = 0;
+ int32_t ret = -1;
+
+ GF_VALIDATE_OR_GOTO("snapview-daemon", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+ GF_VALIDATE_OR_GOTO(this->name, list, out);
+
+ remaining_size = size;
+ list_offset = 0;
+ while (remaining_size > 0) {
+ strncpy(keybuffer, list + list_offset, sizeof(keybuffer) - 1);
+#ifdef GF_DARWIN_HOST_OS
+ /* The protocol expect namespace for now */
+ char *newkey = NULL;
+ gf_add_prefix(XATTR_USER_PREFIX, keybuffer, &newkey);
+ strcpy(keybuffer, newkey);
+ GF_FREE(newkey);
+#endif
+ ret = dict_set_str(dict, keybuffer, "");
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_DICT_SET_FAILED,
+ "dict set operation "
+ "for the key %s failed.",
+ keybuffer);
+ goto out;
+ }
+
+ remaining_size -= strlen(keybuffer) + 1;
+ list_offset += strlen(keybuffer) + 1;
+ } /* while (remaining_size > 0) */
+
+ /* Add an additional key to indicate that we don't need to cache these
+ * xattrs(with value "") */
+ ret = dict_set_str(dict, "glusterfs.skip-cache", "");
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_DICT_SET_FAILED,
+ "dict set operation for the key glusterfs.skip-cache failed.");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+svs_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata)
+{
+ svs_inode_t *inode_ctx = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ char *value = 0;
+ ssize_t size = 0;
+ dict_t *dict = NULL;
+ call_stack_t *root = NULL;
+
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", frame, out);
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", loc, out);
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", loc->inode, out);
+
+ root = frame->root;
+ op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps,
+ root->groups);
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ inode_ctx = svs_inode_ctx_get(this, loc->inode);
+ if (!inode_ctx) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode context not found "
+ "for the inode %s",
+ uuid_utoa(loc->inode->gfid));
+ goto out;
+ }
+
+ /* ENODATA is sent if the getxattr is on entry point directory
+ or the inode is SNAP_VIEW_ENTRY_POINT_INODE. Entry point is
+ a virtual directory on which setxattr operations are not
+ allowed. If getxattr has to be faked as success, then a value
+ for the name of the xattr has to be sent which we don't have.
+ */
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ op_ret = -1;
+ op_errno = ENODATA;
+ goto out;
+ } else {
+ SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret,
+ op_errno, out);
+
+ dict = dict_new();
+ if (!dict) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY,
+ "failed to allocate dict");
+ goto out;
+ }
+
+ size = glfs_h_getxattrs(fs, object, name, NULL, 0);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ if (errno == ENODATA) {
+ gf_msg_debug(this->name, 0,
+ "getxattr on "
+ "%s failed (ket: %s) with %s",
+ loc->path, name, strerror(errno));
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GETXATTR_FAILED,
+ "getxattr on %s failed (key: %s) with %s", loc->path,
+ name, strerror(errno));
+ }
+ goto out;
+ }
+ value = GF_CALLOC(size + 1, sizeof(char), gf_common_mt_char);
+ if (!value) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY,
+ "failed to allocate memory for getxattr "
+ "on %s (key: %s)",
+ loc->name, name);
+ goto out;
+ }
+
+ size = glfs_h_getxattrs(fs, object, name, value, size);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_GETXATTR_FAILED,
+ "failed to get the xattr %s for "
+ "entry %s",
+ name, loc->name);
+ goto out;
+ }
+ value[size] = '\0';
+
+ if (name) {
+ op_ret = dict_set_dynptr(dict, (char *)name, value, size);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_DICT_SET_FAILED,
+ "dict set operation for %s for "
+ "the key %s failed.",
+ loc->path, name);
+ GF_FREE(value);
+ value = NULL;
+ goto out;
+ }
+ } else {
+ op_ret = svs_add_xattrs_to_dict(this, dict, value, size);
+ if (op_ret == -1) {
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY,
+ "failed to add xattrs from the list to "
+ "dict for %s (gfid: %s)",
+ loc->path, uuid_utoa(loc->inode->gfid));
+ goto out;
+ }
+ GF_FREE(value);
+ value = NULL;
+ }
+ }
+
+out:
+ if (op_ret && value)
+ GF_FREE(value);
+
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, NULL);
+
+ if (dict)
+ dict_unref(dict);
+
+ return 0;
+}
+
+int32_t
+svs_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ svs_inode_t *inode_ctx = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ char *value = 0;
+ ssize_t size = 0;
+ dict_t *dict = NULL;
+ svs_fd_t *sfd = NULL;
+ glfs_fd_t *glfd = NULL;
+
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", frame, out);
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", fd, out);
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", fd->inode, out);
+
+ inode_ctx = svs_inode_ctx_get(this, fd->inode);
+ if (!inode_ctx) {
+ op_ret = -1;
+ op_errno = ESTALE;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode context not found "
+ "for the inode %s",
+ uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+ if (!(svs_inode_ctx_glfs_mapping(this, inode_ctx))) {
+ op_ret = -1;
+ op_errno = EBADF;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_FS_INSTANCE_INVALID,
+ "glfs instance %p to which the inode %s "
+ "belongs to does not exist. The snapshot "
+ "corresponding to the instance might have"
+ "been deleted or deactivated",
+ inode_ctx->fs, uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+ sfd = svs_fd_ctx_get_or_new(this, fd);
+ if (!sfd) {
+ op_ret = -1;
+ op_errno = EBADFD;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GET_FD_CONTEXT_FAILED,
+ "failed to get the fd "
+ "context for %s",
+ uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+ glfd = sfd->fd;
+ /* EINVAL is sent if the getxattr is on entry point directory
+ or the inode is SNAP_VIEW_ENTRY_POINT_INODE. Entry point is
+ a virtual directory on which setxattr operations are not
+ allowed. If getxattr has to be faked as success, then a value
+ for the name of the xattr has to be sent which we don't have.
+ */
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto out;
+ } else {
+ dict = dict_new();
+ if (!dict) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY,
+ "failed to allocate dict "
+ "(gfid: %s, key: %s)",
+ uuid_utoa(fd->inode->gfid), name);
+ goto out;
+ }
+
+ if (name) {
+ size = glfs_fgetxattr(glfd, name, NULL, 0);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GETXATTR_FAILED,
+ "getxattr on %s failed "
+ "(key: %s)",
+ uuid_utoa(fd->inode->gfid), name);
+ goto out;
+ }
+ value = GF_CALLOC(size + 1, sizeof(char), gf_common_mt_char);
+ if (!value) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY,
+ "failed to "
+ "allocate memory for getxattr on %s "
+ "(key: %s)",
+ uuid_utoa(fd->inode->gfid), name);
+ goto out;
+ }
+
+ size = glfs_fgetxattr(glfd, name, value, size);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GETXATTR_FAILED,
+ "failed to get the xattr %s "
+ "for inode %s",
+ name, uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+ value[size] = '\0';
+
+ op_ret = dict_set_dynptr(dict, (char *)name, value, size);
+ if (op_ret < 0) {
+ op_errno = -op_ret;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_DICT_SET_FAILED,
+ "dict set operation for gfid %s "
+ "for the key %s failed.",
+ uuid_utoa(fd->inode->gfid), name);
+ goto out;
+ }
+ } else {
+ size = glfs_flistxattr(glfd, NULL, 0);
+ if (size == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_LISTXATTR_FAILED, "listxattr on %s failed",
+ uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+ value = GF_CALLOC(size + 1, sizeof(char), gf_common_mt_char);
+ if (!value) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY,
+ "failed to "
+ "allocate buffer for xattr "
+ "list (%s)",
+ uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+ size = glfs_flistxattr(glfd, value, size);
+ if (size == -1) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_LISTXATTR_FAILED, "listxattr on %s failed",
+ uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+ op_ret = svs_add_xattrs_to_dict(this, dict, value, size);
+ if (op_ret == -1) {
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY,
+ "failed to add xattrs from the list "
+ "to dict (gfid: %s)",
+ uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+ GF_FREE(value);
+ }
+
+ op_ret = 0;
+ op_errno = 0;
+ }
+
+out:
+ if (op_ret)
+ GF_FREE(value);
+
+ STACK_UNWIND_STRICT(fgetxattr, frame, op_ret, op_errno, dict, NULL);
+
+ if (dict)
+ dict_unref(dict);
+
+ return 0;
+}
+
+int32_t
+svs_releasedir(xlator_t *this, fd_t *fd)
+{
+ svs_fd_t *sfd = NULL;
+ uint64_t tmp_pfd = 0;
+ int ret = 0;
+ svs_inode_t *svs_inode = NULL;
+ glfs_t *fs = NULL;
+ inode_t *inode = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ ret = fd_ctx_del(fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, "pfd from fd=%p is NULL", fd);
+ goto out;
+ }
+
+ inode = fd->inode;
+
+ svs_inode = svs_inode_ctx_get(this, inode);
+ if (svs_inode) {
+ fs = svs_inode->fs; /* should inode->lock be held for this? */
+ SVS_CHECK_VALID_SNAPSHOT_HANDLE(fs, this);
+ if (fs) {
+ sfd = (svs_fd_t *)(long)tmp_pfd;
+ if (sfd->fd) {
+ ret = glfs_closedir(sfd->fd);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, errno,
+ SVS_MSG_RELEASEDIR_FAILED,
+ "failed to close the glfd for "
+ "directory %s",
+ uuid_utoa(fd->inode->gfid));
+ }
+ }
+ }
+
+ GF_FREE(sfd);
+
+out:
+ return 0;
+}
+
+int32_t
+svs_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int ret = -1;
+ uint64_t value = 0;
+ svs_inode_t *inode_ctx = NULL;
+ call_stack_t *root = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ root = frame->root;
+ op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps,
+ root->groups);
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ inode_ctx = svs_inode_ctx_get(this, fd->inode);
+ if (!inode_ctx) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode context not found for"
+ " the inode %s",
+ uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+ ret = fd_ctx_get(fd, this, &value);
+ if (ret < 0 && inode_ctx->type != SNAP_VIEW_ENTRY_POINT_INODE) {
+ op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_WARNING, op_errno,
+ SVS_MSG_GET_FD_CONTEXT_FAILED, "pfd is NULL on fd=%p", fd);
+ goto out;
+ }
+
+ op_ret = 0;
+
+out:
+ STACK_UNWIND_STRICT(flush, frame, op_ret, op_errno, NULL);
+
+ return 0;
+}
+
+int32_t
+svs_release(xlator_t *this, fd_t *fd)
+{
+ svs_fd_t *sfd = NULL;
+ uint64_t tmp_pfd = 0;
+ int ret = 0;
+ inode_t *inode = NULL;
+ svs_inode_t *svs_inode = NULL;
+ glfs_t *fs = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+
+ ret = fd_ctx_del(fd, this, &tmp_pfd);
+ if (ret < 0) {
+ gf_msg_debug(this->name, 0, "pfd from fd=%p is NULL", fd);
+ goto out;
+ }
+
+ inode = fd->inode;
+
+ svs_inode = svs_inode_ctx_get(this, inode);
+ if (svs_inode) {
+ fs = svs_inode->fs; /* should inode->lock be held for this? */
+ SVS_CHECK_VALID_SNAPSHOT_HANDLE(fs, this);
+ if (fs) {
+ sfd = (svs_fd_t *)(long)tmp_pfd;
+ if (sfd->fd) {
+ ret = glfs_close(sfd->fd);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ SVS_MSG_RELEASE_FAILED,
+ "failed to close "
+ "the glfd for %s",
+ uuid_utoa(fd->inode->gfid));
+ }
+ }
+ }
+
+ GF_FREE(sfd);
+out:
+ return 0;
+}
+
+int32_t
+svs_forget(xlator_t *this, inode_t *inode)
+{
+ int ret = -1;
+ uint64_t value = 0;
+ svs_inode_t *inode_ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ ret = inode_ctx_del(inode, this, &value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_DELETE_INODE_CONTEXT_FAILED,
+ "failed to delete the inode "
+ "context of %s",
+ uuid_utoa(inode->gfid));
+ goto out;
+ }
+
+ inode_ctx = (svs_inode_t *)(uintptr_t)value;
+ if (!inode_ctx)
+ goto out;
+
+ if (inode_ctx->snapname)
+ GF_FREE(inode_ctx->snapname);
+
+ /*
+ * glfs_h_close leads to unref and forgetting of the
+ * underlying inode in the gfapi world. i.e. the inode
+ * which inode_ctx->object points to.
+ * As of now the only possibility is, this forget came as a
+ * result of snapdaemon's inode table reaching the lru
+ * limit and receiving forget as a result of purging of
+ * extra inodes that exceeded the limit. But, care must
+ * be taken to ensure that, the gfapi instance to which
+ * the glfs_h_object belongs to is not deleted. Otherwise
+ * this might result in access of a freed pointer.
+ * This will still be helpful in reducing the memory
+ * footprint of snapdaemon when the fs instance itself is
+ * valid (i.e. present and not destroyed due to either snap
+ * deactivate or snap delete), but the lru limit is reached.
+ * The forget due to lru limit will make the underlying inode
+ * being unrefed and forgotten.
+ */
+ if (svs_inode_ctx_glfs_mapping(this, inode_ctx)) {
+ glfs_h_close(inode_ctx->object);
+ inode_ctx->object = NULL;
+ }
+ GF_FREE(inode_ctx);
+
+out:
+ return 0;
+}
+
+int
+svs_fill_readdir(xlator_t *this, gf_dirent_t *entries, size_t size, off_t off)
+{
+ gf_dirent_t *entry = NULL;
+ svs_private_t *priv = NULL;
+ int i = 0;
+ snap_dirent_t *dirents = NULL;
+ int this_size = 0;
+ int filled_size = 0;
+ int count = 0;
+
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", entries, out);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ /* create the dir entries */
+ LOCK(&priv->snaplist_lock);
+ {
+ dirents = priv->dirents;
+
+ for (i = off; i < priv->num_snaps;) {
+ this_size = sizeof(gf_dirent_t) + strlen(dirents[i].name) + 1;
+ if (this_size + filled_size > size)
+ goto unlock;
+
+ entry = gf_dirent_for_name(dirents[i].name);
+ if (!entry) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SVS_MSG_NO_MEMORY,
+ "failed to allocate dentry for %s", dirents[i].name);
+ goto unlock;
+ }
+
+ entry->d_off = i + 1;
+ /*
+ * readdir on the entry-point directory to the snapshot
+ * world, will return elements in the list of the
+ * snapshots as the directory entries. Since the entries
+ * returned are virtual entries which does not exist
+ * physically on the disk, pseudo inode numbers are
+ * generated.
+ */
+ entry->d_ino = i + 2 * 42;
+ entry->d_type = DT_DIR;
+ list_add_tail(&entry->list, &entries->list);
+ ++i;
+ count++;
+ filled_size += this_size;
+ }
+ }
+unlock:
+ UNLOCK(&priv->snaplist_lock);
+
+out:
+ return count;
+}
+
+int32_t
+svs_glfs_readdir(xlator_t *this, glfs_fd_t *glfd, gf_dirent_t *entries,
+ int32_t *op_errno, struct iatt *buf, gf_boolean_t readdirplus,
+ size_t size)
+{
+ int filled_size = 0;
+ int this_size = 0;
+ int32_t ret = -1;
+ int32_t count = 0;
+ gf_dirent_t *entry = NULL;
+ struct dirent *dirents = NULL;
+ struct dirent de = {
+ 0,
+ };
+ struct stat statbuf = {
+ 0,
+ };
+ off_t in_case = -1;
+
+ GF_VALIDATE_OR_GOTO("svs", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, glfd, out);
+ GF_VALIDATE_OR_GOTO(this->name, entries, out);
+
+ while (filled_size < size) {
+ in_case = glfs_telldir(glfd);
+ if (in_case == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, SVS_MSG_TELLDIR_FAILED,
+ "telldir failed");
+ break;
+ }
+
+ if (readdirplus)
+ ret = glfs_readdirplus_r(glfd, &statbuf, &de, &dirents);
+ else
+ ret = glfs_readdir_r(glfd, &de, &dirents);
+
+ if (ret == 0 && dirents != NULL) {
+ if (readdirplus)
+ this_size = max(sizeof(gf_dirent_t), sizeof(gfs3_dirplist)) +
+ strlen(de.d_name) + 1;
+ else
+ this_size = sizeof(gf_dirent_t) + strlen(de.d_name) + 1;
+
+ if (this_size + filled_size > size) {
+ glfs_seekdir(glfd, in_case);
+ break;
+ }
+
+ entry = gf_dirent_for_name(de.d_name);
+ if (!entry) {
+ /*
+ * Since gf_dirent_for_name can return
+ * NULL only when it fails to allocate
+ * memory for the directory entry,
+ * SVS_MSG_NO_MEMORY is used as the
+ * message-id.
+ */
+ gf_msg(this->name, GF_LOG_ERROR, errno, SVS_MSG_NO_MEMORY,
+ "could not create gf_dirent "
+ "for entry %s: (%s)",
+ entry->d_name, strerror(errno));
+ break;
+ }
+ entry->d_off = glfs_telldir(glfd);
+ entry->d_ino = de.d_ino;
+ entry->d_type = de.d_type;
+ if (readdirplus) {
+ iatt_from_stat(buf, &statbuf);
+ entry->d_stat = *buf;
+ }
+ list_add_tail(&entry->list, &entries->list);
+
+ filled_size += this_size;
+ count++;
+ } else if (ret == 0 && dirents == NULL) {
+ *op_errno = ENOENT;
+ break;
+ } else if (ret != 0) {
+ *op_errno = errno;
+ break;
+ }
+ dirents = NULL;
+ }
+
+out:
+ return count;
+}
+
+/* readdirp can be of 2 types.
+ 1) It can come on entry point directory where the list of snapshots
+ is sent as dirents. In this case, the iatt structure is filled
+ on the fly if the inode is not found for the entry or the inode
+ context is NULL. Other wise if inode is found and inode context
+ is there the iatt structure saved in the context is used.
+ 2) It can be on a directory in one of the snapshots. In this case,
+ the readdirp call would have sent us a iatt structure. So the same
+ structure is used with the exception that the gfid and the inode
+ numbers will be newly generated and filled in.
+*/
+void
+svs_readdirp_fill(xlator_t *this, inode_t *parent, svs_inode_t *parent_ctx,
+ gf_dirent_t *entry)
+{
+ inode_t *inode = NULL;
+ uuid_t random_gfid = {
+ 0,
+ };
+ struct iatt buf = {
+ 0,
+ };
+ svs_inode_t *inode_ctx = NULL;
+
+ GF_VALIDATE_OR_GOTO("snapview-server", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, parent, out);
+ GF_VALIDATE_OR_GOTO(this->name, parent_ctx, out);
+ GF_VALIDATE_OR_GOTO(this->name, entry, out);
+
+ if (!strcmp(entry->d_name, ".") || !strcmp(entry->d_name, ".."))
+ goto out;
+
+ inode = inode_grep(parent->table, parent, entry->d_name);
+ if (inode) {
+ entry->inode = inode;
+ inode_ctx = svs_inode_ctx_get(this, inode);
+ if (!inode_ctx) {
+ gf_uuid_copy(buf.ia_gfid, inode->gfid);
+ svs_iatt_fill(inode->gfid, &buf);
+ buf.ia_type = inode->ia_type;
+ } else {
+ buf = inode_ctx->buf;
+ }
+
+ entry->d_ino = buf.ia_ino;
+
+ if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE)
+ entry->d_stat = buf;
+ else {
+ entry->d_stat.ia_ino = buf.ia_ino;
+ gf_uuid_copy(entry->d_stat.ia_gfid, buf.ia_gfid);
+ }
+ } else {
+ if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ inode = inode_new(parent->table);
+ entry->inode = inode;
+
+ /* If inode context allocation fails, then do not send
+ * the inode for that particular entry as part of
+ * readdirp response. Fuse and protocol/server will link
+ * the inodes in readdirp only if the entry contains
+ * inode in it.
+ */
+ inode_ctx = svs_inode_ctx_get_or_new(this, inode);
+ if (!inode_ctx) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SVS_MSG_NO_MEMORY,
+ "failed to allocate inode "
+ "context for %s",
+ entry->d_name);
+ inode_unref(entry->inode);
+ entry->inode = NULL;
+ goto out;
+ }
+
+ /* Generate virtual gfid for SNAPSHOT dir and
+ * update the statbuf
+ */
+ gf_uuid_generate(random_gfid);
+ gf_uuid_copy(buf.ia_gfid, random_gfid);
+ svs_fill_ino_from_gfid(&buf);
+ buf.ia_type = IA_IFDIR;
+ entry->d_ino = buf.ia_ino;
+ entry->d_stat = buf;
+ inode_ctx->buf = buf;
+ inode_ctx->type = SNAP_VIEW_SNAPSHOT_INODE;
+ } else {
+ /* For files under snapshot world do not set
+ * entry->inode and reset statbuf (except ia_ino),
+ * so that FUSE/Kernel will send an explicit lookup.
+ * entry->d_stat contains the statbuf information
+ * of original file, so for NFS not to cache this
+ * information and to send explicit lookup, it is
+ * required to reset the statbuf.
+ * Virtual gfid for these files will be generated in the
+ * first lookup.
+ */
+ buf.ia_ino = entry->d_ino;
+ entry->d_stat = buf;
+ }
+ }
+
+out:
+ return;
+}
+
+/* In readdirp, though new inode is created along with the generation of
+ new gfid, the inode context created will not contain the glfs_t instance
+ for the filesystem it belongs to and the handle for it in the gfapi
+ world. (handle is obtained only by doing the lookup call on the entry
+ and doing lookup on each entry received as part of readdir call is a
+ costly operation. So the fs and handle is NULL in the inode context
+ and is filled in when lookup comes on that object.
+*/
+int32_t
+svs_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict)
+{
+ gf_dirent_t entries;
+ gf_dirent_t *entry = NULL;
+ struct iatt buf = {
+ 0,
+ };
+ int count = 0;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ svs_inode_t *parent_ctx = NULL;
+ svs_fd_t *svs_fd = NULL;
+ call_stack_t *root = NULL;
+
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", this, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, frame, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, fd, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, fd->inode, unwind);
+
+ INIT_LIST_HEAD(&entries.list);
+
+ root = frame->root;
+ op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps,
+ root->groups);
+ if (op_ret != 0) {
+ goto unwind;
+ }
+
+ parent_ctx = svs_inode_ctx_get(this, fd->inode);
+ if (!parent_ctx) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GET_INODE_CONTEXT_FAILED,
+ "failed to get the inode "
+ "context for %s",
+ uuid_utoa(fd->inode->gfid));
+ goto unwind;
+ }
+
+ if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ LOCK(&fd->lock);
+ {
+ count = svs_fill_readdir(this, &entries, size, off);
+ }
+ UNLOCK(&fd->lock);
+
+ op_ret = count;
+
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ svs_readdirp_fill(this, fd->inode, parent_ctx, entry);
+ }
+
+ goto unwind;
+ } else {
+ svs_fd = svs_fd_ctx_get_or_new(this, fd);
+ if (!svs_fd) {
+ op_ret = -1;
+ op_errno = EBADFD;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GET_FD_CONTEXT_FAILED,
+ "failed to get the fd context "
+ "for the inode %s",
+ uuid_utoa(fd->inode->gfid));
+ goto unwind;
+ }
+
+ glfs_seekdir(svs_fd->fd, off);
+
+ LOCK(&fd->lock);
+ {
+ count = svs_glfs_readdir(this, svs_fd->fd, &entries, &op_errno,
+ &buf, _gf_true, size);
+ }
+ UNLOCK(&fd->lock);
+
+ op_ret = count;
+
+ list_for_each_entry(entry, &entries.list, list)
+ {
+ svs_readdirp_fill(this, fd->inode, parent_ctx, entry);
+ }
+
+ goto unwind;
+ }
+
+unwind:
+ STACK_UNWIND_STRICT(readdirp, frame, op_ret, op_errno, &entries, dict);
+
+ gf_dirent_free(&entries);
+
+ return 0;
+}
+
+int32_t
+svs_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ gf_dirent_t entries = {
+ {
+ {
+ 0,
+ },
+ },
+ };
+ int count = 0;
+ svs_inode_t *inode_ctx = NULL;
+ int op_errno = EINVAL;
+ int op_ret = -1;
+ svs_fd_t *svs_fd = NULL;
+ glfs_fd_t *glfd = NULL;
+
+ INIT_LIST_HEAD(&entries.list);
+
+ GF_VALIDATE_OR_GOTO("snap-view-server", this, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, frame, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, fd, unwind);
+ GF_VALIDATE_OR_GOTO(this->name, fd->inode, unwind);
+
+ inode_ctx = svs_inode_ctx_get(this, fd->inode);
+ if (!inode_ctx) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode context not found in "
+ "the inode %s",
+ uuid_utoa(fd->inode->gfid));
+ goto unwind;
+ }
+
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ LOCK(&fd->lock);
+ {
+ count = svs_fill_readdir(this, &entries, size, off);
+ }
+ UNLOCK(&fd->lock);
+ } else {
+ svs_fd = svs_fd_ctx_get_or_new(this, fd);
+ if (!svs_fd) {
+ op_ret = -1;
+ op_errno = EBADFD;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GET_FD_CONTEXT_FAILED,
+ "failed to get the fd "
+ "context for %s",
+ uuid_utoa(fd->inode->gfid));
+ goto unwind;
+ }
+
+ glfd = svs_fd->fd;
+
+ LOCK(&fd->lock);
+ {
+ count = svs_glfs_readdir(this, glfd, &entries, &op_errno, NULL,
+ _gf_false, size);
+ }
+ UNLOCK(&fd->lock);
+ }
+
+ op_ret = count;
+
+unwind:
+ STACK_UNWIND_STRICT(readdir, frame, op_ret, op_errno, &entries, xdata);
+
+ gf_dirent_free(&entries);
+
+ return 0;
+}
+
+/*
+ * This function is mainly helpful for NFS. Till now NFS server was not linking
+ * the inodes in readdirp, which caused problems when below operations were
+ * performed.
+ *
+ * 1) ls -l in one of the snaopshots (snapview-server would generate gfids for
+ * each entry on the fly and link the inodes associated with those entries)
+ * 2) NFS server upon getting readdirp reply would not link the inodes of the
+ * entries. But it used to generate filehandles for each entry and associate
+ * the gfid of that entry with the filehandle and send it as part of the
+ * reply to nfs client.
+ * 3) NFS client would send the filehandle of one of those entries when some
+ * activity is done on it.
+ * 4) NFS server would not be able to find the inode for the gfid present in the
+ * filehandle (as the inode was not linked) and would go for hard resolution
+ * by sending a lookup on the gfid by creating a new inode.
+ * 5) snapview-client will not able to identify whether the inode is a real
+ * inode existing in the main volume or a virtual inode existing in the
+ * snapshots as there would not be any inode context.
+ * 6) Since the gfid upon which lookup is sent is a virtual gfid which is not
+ * present in the disk, lookup would fail and the application would get an
+ * error.
+ *
+ * The above problem is fixed by the below commit which makes snapview server
+ * more compatible with nfs server (1dea949cb60c3814c9206df6ba8dddec8d471a94).
+ * But now because NFS server does inode linking in readdirp has introduced
+ * the below issue.
+ * In readdirp though snapview-server allocates inode contexts it does not
+ * actually perform lookup on each entry it obtained in readdirp (as doing
+ * a lookup via gfapi over the network for each entry would be costly).
+ *
+ * Till now it was not a problem with NFS server, as NFS was sending a lookup on
+ * the gfid it got from NFS client, for which it was not able to find the right
+ * inode. So snapview-server was able to get the fs instance (glfs_t) of the
+ * snapshot volume to which the entry belongs to, and the handle for the entry
+ * from the corresponding snapshot volume and fill those information in the
+ * inode context.
+ *
+ * But now, since NFS server is able to find the inode from the inode table for
+ * the gfid it got from the NFS client, it won't send lookup. Rather it directly
+ * sends the fop it received from the client. Now this causes problems for
+ * snapview-server. Because for each fop snapview-server assumes that lookup has
+ * been performed on that entry and the entry's inode context contains the
+ * pointers for the fs instance and the handle to the entry in that fs. When NFS
+ * server sends the fop and snapview-server finds that the fs instance and the
+ * handle within the inode context are NULL it unwinds with EINVAL.
+ *
+ * So to handle this, if fs instance or handle within the inode context are
+ * NULL, then do a lookup based on parent inode context's fs instance. And
+ * unwind the results obtained as part of lookup
+ */
+
+int32_t
+svs_get_handle(xlator_t *this, loc_t *loc, svs_inode_t *inode_ctx,
+ int32_t *op_errno)
+{
+ svs_inode_t *parent_ctx = NULL;
+ int ret = -1;
+ inode_t *parent = NULL;
+ struct iatt postparent = {
+ 0,
+ };
+ struct iatt buf = {
+ 0,
+ };
+ char uuid1[64];
+
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ if (loc->path) {
+ if (!loc->name || (loc->name && !strcmp(loc->name, ""))) {
+ loc->name = strrchr(loc->path, '/');
+ if (loc->name)
+ loc->name++;
+ }
+ }
+
+ if (loc->parent)
+ parent = inode_ref(loc->parent);
+ else {
+ parent = inode_find(loc->inode->table, loc->pargfid);
+ if (!parent)
+ parent = inode_parent(loc->inode, NULL, NULL);
+ }
+
+ if (parent)
+ parent_ctx = svs_inode_ctx_get(this, parent);
+
+ if (!parent_ctx) {
+ *op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_WARNING, *op_errno,
+ SVS_MSG_GET_INODE_CONTEXT_FAILED,
+ "failed to get the parent "
+ "context for %s (%s)",
+ loc->path, uuid_utoa_r(loc->inode->gfid, uuid1));
+ goto out;
+ }
+
+ if (parent_ctx) {
+ if (parent_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE)
+ ret = svs_lookup_snapshot(this, loc, &buf, &postparent, parent,
+ parent_ctx, op_errno);
+ else
+ ret = svs_lookup_entry(this, loc, &buf, &postparent, parent,
+ parent_ctx, op_errno);
+ }
+
+out:
+ if (parent)
+ inode_unref(parent);
+
+ return ret;
+}
+
+int32_t
+svs_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ struct iatt buf = {
+ 0,
+ };
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ svs_inode_t *inode_ctx = NULL;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ struct stat stat = {
+ 0,
+ };
+ int ret = -1;
+ call_stack_t *root = NULL;
+
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ root = frame->root;
+ op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps,
+ root->groups);
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ /* Instead of doing the check of whether it is a entry point directory
+ or not by checking the name of the entry and then deciding what
+ to do, just check the inode context and decide what to be done.
+ */
+
+ inode_ctx = svs_inode_ctx_get(this, loc->inode);
+ if (!inode_ctx) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode context not found for %s", uuid_utoa(loc->inode->gfid));
+ goto out;
+ }
+
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ svs_iatt_fill(loc->inode->gfid, &buf);
+ op_ret = 0;
+ } else {
+ SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret,
+ op_errno, out);
+
+ ret = glfs_h_stat(fs, object, &stat);
+ if (ret) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_STAT_FAILED,
+ "glfs_h_stat on %s (gfid: %s) "
+ "failed",
+ loc->name, uuid_utoa(loc->inode->gfid));
+ goto out;
+ } else
+ gf_msg_debug(this->name, 0, "stat on %s (%s) successful", loc->path,
+ uuid_utoa(loc->inode->gfid));
+
+ iatt_from_stat(&buf, &stat);
+ gf_uuid_copy(buf.ia_gfid, loc->inode->gfid);
+ svs_fill_ino_from_gfid(&buf);
+ op_ret = ret;
+ }
+
+out:
+ STACK_UNWIND_STRICT(stat, frame, op_ret, op_errno, &buf, xdata);
+ return 0;
+}
+
+int32_t
+svs_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ struct iatt buf = {
+ 0,
+ };
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ svs_inode_t *inode_ctx = NULL;
+ struct stat stat = {
+ 0,
+ };
+ int ret = -1;
+ glfs_fd_t *glfd = NULL;
+ svs_fd_t *sfd = NULL;
+ call_stack_t *root = NULL;
+
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd->inode, out);
+
+ /* Instead of doing the check of whether it is a entry point directory
+ or not by checking the name of the entry and then deciding what
+ to do, just check the inode context and decide what to be done.
+ */
+
+ root = frame->root;
+ op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps,
+ root->groups);
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ inode_ctx = svs_inode_ctx_get(this, fd->inode);
+ if (!inode_ctx) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode context not found for"
+ " the inode %s",
+ uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ svs_iatt_fill(fd->inode->gfid, &buf);
+ op_ret = 0;
+ } else {
+ if (!(svs_inode_ctx_glfs_mapping(this, inode_ctx))) {
+ op_ret = -1;
+ op_errno = EBADF;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_FS_INSTANCE_INVALID,
+ "glfs instance %p to which the inode %s "
+ "belongs to does not exist. That snapshot "
+ "corresponding to the fs instance "
+ "might have been deleted or deactivated.",
+ inode_ctx->fs, uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+ sfd = svs_fd_ctx_get_or_new(this, fd);
+ if (!sfd) {
+ op_ret = -1;
+ op_errno = EBADFD;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GET_FD_CONTEXT_FAILED,
+ "failed to get the fd context "
+ "for %s",
+ uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+ glfd = sfd->fd;
+ ret = glfs_fstat(glfd, &stat);
+ if (ret) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_STAT_FAILED,
+ "glfs_fstat on gfid: %s failed", uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+ iatt_from_stat(&buf, &stat);
+ gf_uuid_copy(buf.ia_gfid, fd->inode->gfid);
+ svs_fill_ino_from_gfid(&buf);
+ op_ret = ret;
+ }
+
+out:
+ STACK_UNWIND_STRICT(fstat, frame, op_ret, op_errno, &buf, xdata);
+ return 0;
+}
+
+int32_t
+svs_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ struct statvfs buf = {
+ 0,
+ };
+ int32_t op_errno = EINVAL;
+ int32_t op_ret = -1;
+ svs_inode_t *inode_ctx = NULL;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ int ret = -1;
+ call_stack_t *root = NULL;
+
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ root = frame->root;
+ op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps,
+ root->groups);
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ /* Instead of doing the check of whether it is a entry point directory
+ or not by checking the name of the entry and then deciding what
+ to do, just check the inode context and decide what to be done.
+ */
+ inode_ctx = svs_inode_ctx_get(this, loc->inode);
+ if (!inode_ctx) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode context not found for %s", uuid_utoa(loc->inode->gfid));
+ goto out;
+ }
+
+ SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, op_errno,
+ out);
+
+ ret = glfs_h_statfs(fs, object, &buf);
+ if (ret) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_STATFS_FAILED,
+ "glfs_h_statvfs on %s (gfid: %s) "
+ "failed",
+ loc->name, uuid_utoa(loc->inode->gfid));
+ goto out;
+ }
+ op_ret = ret;
+
+out:
+ STACK_UNWIND_STRICT(statfs, frame, op_ret, op_errno, &buf, xdata);
+ return 0;
+}
+
+int32_t
+svs_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ svs_inode_t *inode_ctx = NULL;
+ svs_fd_t *sfd = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ glfs_fd_t *glfd = NULL;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ call_stack_t *root = NULL;
+
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ root = frame->root;
+
+ inode_ctx = svs_inode_ctx_get(this, loc->inode);
+ if (!inode_ctx) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode context for %s (gfid: %s) "
+ "not found",
+ loc->name, uuid_utoa(loc->inode->gfid));
+ goto out;
+ }
+
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE)
+ GF_ASSERT(0); // on entry point it should always be opendir
+
+ SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, op_errno,
+ out);
+
+ op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps,
+ root->groups);
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ glfd = glfs_h_open(fs, object, flags);
+ if (!glfd) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_OPEN_FAILED,
+ "glfs_h_open on %s failed (gfid: %s)", loc->name,
+ uuid_utoa(loc->inode->gfid));
+ goto out;
+ }
+
+ sfd = svs_fd_ctx_get_or_new(this, fd);
+ if (!sfd) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY,
+ "failed to allocate fd context "
+ "for %s (gfid: %s)",
+ loc->name, uuid_utoa(loc->inode->gfid));
+ glfs_close(glfd);
+ goto out;
+ }
+ sfd->fd = glfd;
+
+ op_ret = 0;
+
+out:
+ STACK_UNWIND_STRICT(open, frame, op_ret, op_errno, fd, NULL);
+ return 0;
+}
+
+int32_t
+svs_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ svs_private_t *priv = NULL;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec vec = {
+ 0,
+ };
+ svs_fd_t *sfd = NULL;
+ int ret = -1;
+ struct glfs_stat fstatbuf = {
+ 0,
+ };
+ glfs_fd_t *glfd = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ call_stack_t *root = NULL;
+
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd, out);
+ GF_VALIDATE_OR_GOTO(this->name, fd->inode, out);
+
+ priv = this->private;
+ VALIDATE_OR_GOTO(priv, out);
+
+ root = frame->root;
+ op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps,
+ root->groups);
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ if (!svs_inode_glfs_mapping(this, fd->inode)) {
+ op_ret = -1;
+ op_errno = EBADF; /* should this be some other error? */
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_FS_INSTANCE_INVALID,
+ "glfs instance to which the inode "
+ "%s receiving read request belongs, "
+ "does not exist anymore",
+ uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+ sfd = svs_fd_ctx_get_or_new(this, fd);
+ if (!sfd) {
+ op_ret = -1;
+ op_errno = EBADFD;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GET_INODE_CONTEXT_FAILED,
+ "failed to get the fd "
+ "context for %s",
+ uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+ glfd = sfd->fd;
+
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, size);
+ if (!iobuf) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_NO_MEMORY,
+ "failed to "
+ "allocate iobuf while reading the "
+ "file with gfid %s",
+ uuid_utoa(fd->inode->gfid));
+ goto out;
+ }
+
+ ret = glfs_pread(glfd, iobuf->ptr, size, offset, 0, &fstatbuf);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_READ_FAILED,
+ "glfs_read failed on %s (%s)", uuid_utoa(fd->inode->gfid),
+ strerror(op_errno));
+ goto out;
+ }
+
+ vec.iov_base = iobuf->ptr;
+ vec.iov_len = ret;
+
+ iobref = iobref_new();
+
+ iobref_add(iobref, iobuf);
+ glfs_iatt_from_statx(&stbuf, &fstatbuf);
+ gf_uuid_copy(stbuf.ia_gfid, fd->inode->gfid);
+ svs_fill_ino_from_gfid(&stbuf);
+
+ /* Hack to notify higher layers of EOF. */
+ if (!stbuf.ia_size || (offset + vec.iov_len) >= stbuf.ia_size)
+ op_errno = ENOENT;
+
+ op_ret = vec.iov_len;
+
+out:
+
+ STACK_UNWIND_STRICT(readv, frame, op_ret, op_errno, &vec, 1, &stbuf, iobref,
+ NULL);
+
+ if (iobref)
+ iobref_unref(iobref);
+ if (iobuf)
+ iobuf_unref(iobuf);
+
+ return 0;
+}
+
+int32_t
+svs_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ svs_inode_t *inode_ctx = NULL;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ int op_ret = -1;
+ int op_errno = EINVAL;
+ char *buf = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ int ret = -1;
+ struct stat stat = {
+ 0,
+ };
+ call_stack_t *root = NULL;
+
+ GF_VALIDATE_OR_GOTO("snap-view-daemon", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ root = frame->root;
+ op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps,
+ root->groups);
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ inode_ctx = svs_inode_ctx_get(this, loc->inode);
+ if (!inode_ctx) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GET_INODE_CONTEXT_FAILED,
+ "failed to get inode context "
+ "for %s (gfid: %s)",
+ loc->name, uuid_utoa(loc->inode->gfid));
+ goto out;
+ }
+
+ SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, op_errno,
+ out);
+
+ ret = glfs_h_stat(fs, object, &stat);
+ if (ret) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_STAT_FAILED,
+ "glfs_h_stat on %s (gfid: %s) "
+ "failed",
+ loc->name, uuid_utoa(loc->inode->gfid));
+ goto out;
+ }
+
+ iatt_from_stat(&stbuf, &stat);
+ gf_uuid_copy(stbuf.ia_gfid, loc->inode->gfid);
+ svs_fill_ino_from_gfid(&stbuf);
+
+ buf = alloca(size + 1);
+ op_ret = glfs_h_readlink(fs, object, buf, size);
+ if (op_ret == -1) {
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_READLINK_FAILED,
+ "readlink on %s failed (gfid: %s)", loc->name,
+ uuid_utoa(loc->inode->gfid));
+ goto out;
+ }
+
+ buf[op_ret] = 0;
+
+out:
+ STACK_UNWIND_STRICT(readlink, frame, op_ret, op_errno, buf, &stbuf, NULL);
+
+ return 0;
+}
+
+int32_t
+svs_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int mask,
+ dict_t *xdata)
+{
+ int ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ glfs_t *fs = NULL;
+ glfs_object_t *object = NULL;
+ svs_inode_t *inode_ctx = NULL;
+ gf_boolean_t is_fuse_call = 0;
+ int mode = 0;
+ call_stack_t *root = NULL;
+
+ GF_VALIDATE_OR_GOTO("svs", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, this->private, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc, out);
+ GF_VALIDATE_OR_GOTO(this->name, loc->inode, out);
+
+ root = frame->root;
+ op_ret = gf_setcredentials(&root->uid, &root->gid, root->ngrps,
+ root->groups);
+ if (op_ret != 0) {
+ goto out;
+ }
+
+ inode_ctx = svs_inode_ctx_get(this, loc->inode);
+ if (!inode_ctx) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ SVS_MSG_GET_INODE_CONTEXT_FAILED,
+ "inode context not found for %s", uuid_utoa(loc->inode->gfid));
+ goto out;
+ }
+
+ is_fuse_call = __is_fuse_call(frame);
+
+ /*
+ * For entry-point directory, set read and execute bits. But not write
+ * permissions.
+ */
+ if (inode_ctx->type == SNAP_VIEW_ENTRY_POINT_INODE) {
+ if (is_fuse_call) {
+ op_ret = 0;
+ op_errno = 0;
+ } else {
+ op_ret = 0;
+ mode |= POSIX_ACL_READ;
+ mode |= POSIX_ACL_EXECUTE;
+ op_errno = mode;
+ }
+ goto out;
+ }
+
+ SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, op_ret, op_errno,
+ out);
+
+ /* The actual posix_acl xlator does acl checks differently for
+ fuse and nfs. So set frame->root->pid as fspid of the syncop
+ if the call came from nfs
+ */
+ if (!is_fuse_call) {
+ syncopctx_setfspid(&frame->root->pid);
+ syncopctx_setfsuid(&frame->root->uid);
+ syncopctx_setfsgid(&frame->root->gid);
+ syncopctx_setfsgroups(frame->root->ngrps, frame->root->groups);
+ }
+
+ ret = glfs_h_access(fs, object, mask);
+ if (ret < 0) {
+ op_ret = -1;
+ op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, SVS_MSG_ACCESS_FAILED,
+ "failed to access %s (gfid: %s)", loc->path,
+ uuid_utoa(loc->inode->gfid));
+ goto out;
+ }
+
+ op_ret = 0;
+ op_errno = ret;
+
+out:
+
+ STACK_UNWIND_STRICT(access, frame, op_ret, op_errno, NULL);
+ return 0;
+}
+
+int32_t
+notify(xlator_t *this, int32_t event, void *data, ...)
+{
+ switch (event) {
+ case GF_EVENT_PARENT_UP: {
+ /* Tell the parent that snapview-server xlator is up */
+ default_notify(this, GF_EVENT_CHILD_UP, data);
+ } break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_svs_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, SVS_MSG_MEM_ACNT_FAILED,
+ "Memory accounting"
+ " init failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+int32_t
+init(xlator_t *this)
+{
+ svs_private_t *priv = NULL;
+ int ret = -1;
+
+ /* This can be the top of graph in certain cases */
+ if (!this->parents) {
+ gf_msg_debug(this->name, 0, "dangling volume. check volfile ");
+ }
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_svs_mt_priv_t);
+ if (!priv) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, SVS_MSG_NO_MEMORY,
+ "failed to "
+ "allocate memory for this->private ");
+ goto out;
+ }
+
+ this->private = priv;
+
+ GF_OPTION_INIT("volname", priv->volname, str, out);
+ LOCK_INIT(&priv->snaplist_lock);
+
+ LOCK(&priv->snaplist_lock);
+ {
+ priv->num_snaps = 0;
+ }
+ UNLOCK(&priv->snaplist_lock);
+
+ /* What to do here upon failure? should init be failed or succeed? */
+ /* If succeeded, then dynamic management of snapshots will not */
+ /* happen.*/
+ ret = svs_mgmt_init(this);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, SVS_MSG_MGMT_INIT_FAILED,
+ "failed to initiate the "
+ "mgmt rpc callback for svs. Dymamic management of the"
+ "snapshots will not happen");
+ goto out;
+ }
+
+ /* get the list of snaps first to return to client xlator */
+ ret = svs_get_snapshot_list(this);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ SVS_MSG_GET_SNAPSHOT_LIST_FAILED,
+ "Error initializing snaplist infrastructure");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (ret && priv) {
+ LOCK_DESTROY(&priv->snaplist_lock);
+ GF_FREE(priv->dirents);
+ GF_FREE(priv);
+ }
+
+ return ret;
+}
+
+void
+fini(xlator_t *this)
+{
+ svs_private_t *priv = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+ int ret = 0;
+
+ GF_ASSERT(this);
+ priv = this->private;
+ this->private = NULL;
+ ctx = this->ctx;
+ if (!ctx)
+ gf_msg(this->name, GF_LOG_ERROR, 0, SVS_MSG_INVALID_GLFS_CTX,
+ "Invalid ctx found");
+
+ if (priv) {
+ ret = LOCK_DESTROY(&priv->snaplist_lock);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno,
+ SVS_MSG_LOCK_DESTROY_FAILED,
+ "Could not destroy mutex snaplist_lock");
+ }
+
+ if (priv->dirents) {
+ GF_FREE(priv->dirents);
+ }
+
+ if (priv->rpc) {
+ /* cleanup the saved-frames before last unref */
+ rpc_clnt_connection_cleanup(&priv->rpc->conn);
+ rpc_clnt_unref(priv->rpc);
+ }
+
+ GF_FREE(priv);
+ }
+
+ return;
+}
+
+struct xlator_fops fops = {
+ .lookup = svs_lookup,
+ .stat = svs_stat,
+ .statfs = svs_statfs,
+ .opendir = svs_opendir,
+ .readdirp = svs_readdirp,
+ .readdir = svs_readdir,
+ .open = svs_open,
+ .readv = svs_readv,
+ .flush = svs_flush,
+ .fstat = svs_fstat,
+ .getxattr = svs_getxattr,
+ .access = svs_access,
+ .readlink = svs_readlink,
+ /* entry fops */
+};
+
+struct xlator_cbks cbks = {
+ .release = svs_release,
+ .releasedir = svs_releasedir,
+ .forget = svs_forget,
+};
+
+struct volume_options options[] = {
+ {
+ .key = {"volname"},
+ .type = GF_OPTION_TYPE_STR,
+ },
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "snapview-server",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/snapview-server/src/snapview-server.h b/xlators/features/snapview-server/src/snapview-server.h
new file mode 100644
index 00000000000..6472422e715
--- /dev/null
+++ b/xlators/features/snapview-server/src/snapview-server.h
@@ -0,0 +1,255 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __SNAP_VIEW_H__
+#define __SNAP_VIEW_H__
+
+#include <glusterfs/dict.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/mem-types.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/iatt.h>
+#include <ctype.h>
+#include <sys/uio.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include "glfs.h"
+#include "glfs-handles.h"
+#include "glfs-internal.h"
+#include "glusterfs3-xdr.h"
+#include <glusterfs/glusterfs-acl.h>
+#include <glusterfs/syncop.h>
+#include <glusterfs/list.h>
+#include <glusterfs/timer.h>
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "xdr-generic.h"
+#include "snapview-server-messages.h"
+
+#define DEFAULT_SVD_LOG_FILE_DIRECTORY DATADIR "/log/glusterfs"
+
+#define SNAP_VIEW_MAX_GLFS_T 256
+#define SNAP_VIEW_MAX_GLFS_FDS 1024
+#define SNAP_VIEW_MAX_GLFS_OBJ_HANDLES 1024
+
+#define SVS_STACK_DESTROY(_frame) \
+ do { \
+ ((call_frame_t *)_frame)->local = NULL; \
+ STACK_DESTROY(((call_frame_t *)_frame)->root); \
+ } while (0)
+
+#define SVS_CHECK_VALID_SNAPSHOT_HANDLE(fs, this) \
+ do { \
+ svs_private_t *_private = NULL; \
+ _private = this->private; \
+ int i = 0; \
+ gf_boolean_t found = _gf_false; \
+ glfs_t *tmp_fs = NULL; \
+ LOCK(&_private->snaplist_lock); \
+ { \
+ for (i = 0; i < _private->num_snaps; i++) { \
+ tmp_fs = _private->dirents[i].fs; \
+ gf_log(this->name, GF_LOG_DEBUG, \
+ "snap name: %s, snap volume: %s," \
+ "dirent->fs: %p", \
+ _private->dirents[i].name, \
+ _private->dirents[i].snap_volname, tmp_fs); \
+ if (tmp_fs && fs && (tmp_fs == fs)) { \
+ found = _gf_true; \
+ gf_msg_debug(this->name, 0, \
+ "found the fs " \
+ "instance"); \
+ break; \
+ } \
+ } \
+ } \
+ UNLOCK(&_private->snaplist_lock); \
+ \
+ if (!found) { \
+ gf_log(this->name, GF_LOG_WARNING, \
+ "failed to" \
+ " find the fs instance %p", \
+ fs); \
+ fs = NULL; \
+ } \
+ } while (0)
+
+#define SVS_GET_INODE_CTX_INFO(inode_ctx, fs, object, this, loc, ret, \
+ op_errno, label) \
+ do { \
+ fs = inode_ctx->fs; \
+ object = inode_ctx->object; \
+ SVS_CHECK_VALID_SNAPSHOT_HANDLE(fs, this); \
+ if (!fs) \
+ object = NULL; \
+ \
+ if (!fs || !object) { \
+ int32_t tmp = -1; \
+ char tmp_uuid[64]; \
+ \
+ tmp = svs_get_handle(this, loc, inode_ctx, &op_errno); \
+ if (tmp) { \
+ gf_log(this->name, GF_LOG_ERROR, \
+ "failed to get the handle for %s " \
+ "(gfid: %s)", \
+ loc->path, uuid_utoa_r(loc->inode->gfid, tmp_uuid)); \
+ ret = -1; \
+ goto label; \
+ } \
+ \
+ fs = inode_ctx->fs; \
+ object = inode_ctx->object; \
+ } \
+ } while (0);
+
+#define SVS_STRDUP(dst, src) \
+ do { \
+ if (dst && strcmp(src, dst)) { \
+ GF_FREE(dst); \
+ dst = NULL; \
+ } \
+ \
+ if (!dst) \
+ dst = gf_strdup(src); \
+ } while (0)
+
+int
+svs_mgmt_submit_request(void *req, call_frame_t *frame, glusterfs_ctx_t *ctx,
+ rpc_clnt_prog_t *prog, int procnum, fop_cbk_fn_t cbkfn,
+ xdrproc_t xdrproc);
+
+int
+svs_get_snapshot_list(xlator_t *this);
+
+int
+mgmt_get_snapinfo_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe);
+
+typedef enum {
+ SNAP_VIEW_ENTRY_POINT_INODE = 0,
+ SNAP_VIEW_SNAPSHOT_INODE,
+ SNAP_VIEW_VIRTUAL_INODE
+} inode_type_t;
+
+struct svs_inode {
+ glfs_t *fs;
+ glfs_object_t *object;
+ inode_type_t type;
+
+ /* used only for entry point directory where gfid of the directory
+ from where the entry point was entered is saved.
+ */
+ uuid_t pargfid;
+
+ /* This is used to generate gfid for all sub files/dirs under this
+ * snapshot
+ */
+ char *snapname;
+ struct iatt buf;
+};
+typedef struct svs_inode svs_inode_t;
+
+struct svs_fd {
+ glfs_fd_t *fd;
+};
+typedef struct svs_fd svs_fd_t;
+
+struct snap_dirent {
+ char name[NAME_MAX];
+ char uuid[UUID_CANONICAL_FORM_LEN + 1];
+ char snap_volname[NAME_MAX];
+ glfs_t *fs;
+};
+typedef struct snap_dirent snap_dirent_t;
+
+struct svs_private {
+ snap_dirent_t *dirents;
+ int num_snaps;
+ char *volname;
+ struct list_head snaplist;
+ gf_lock_t snaplist_lock;
+ struct rpc_clnt *rpc;
+};
+typedef struct svs_private svs_private_t;
+
+int
+__svs_inode_ctx_set(xlator_t *this, inode_t *inode, svs_inode_t *svs_inode);
+
+svs_inode_t *
+__svs_inode_ctx_get(xlator_t *this, inode_t *inode);
+
+svs_inode_t *
+svs_inode_ctx_get(xlator_t *this, inode_t *inode);
+
+int32_t
+svs_inode_ctx_set(xlator_t *this, inode_t *inode, svs_inode_t *svs_inode);
+
+svs_inode_t *
+svs_inode_ctx_get_or_new(xlator_t *this, inode_t *inode);
+
+int
+__svs_fd_ctx_set(xlator_t *this, fd_t *fd, svs_fd_t *svs_fd);
+
+svs_fd_t *
+__svs_fd_ctx_get(xlator_t *this, fd_t *fd);
+
+svs_fd_t *
+svs_fd_ctx_get(xlator_t *this, fd_t *fd);
+
+int32_t
+svs_fd_ctx_set(xlator_t *this, fd_t *fd, svs_fd_t *svs_fd);
+
+svs_fd_t *
+__svs_fd_ctx_get_or_new(xlator_t *this, fd_t *fd);
+
+svs_fd_t *
+svs_fd_ctx_get_or_new(xlator_t *this, fd_t *fd);
+
+int
+svs_uuid_generate(xlator_t *this, uuid_t gfid, char *snapname,
+ uuid_t origin_gfid);
+
+void
+svs_fill_ino_from_gfid(struct iatt *buf);
+
+void
+svs_iatt_fill(uuid_t gfid, struct iatt *buf);
+
+snap_dirent_t *
+svs_get_latest_snap_entry(xlator_t *this);
+
+glfs_t *
+svs_get_latest_snapshot(xlator_t *this);
+
+glfs_t *
+svs_initialise_snapshot_volume(xlator_t *this, const char *name,
+ int32_t *op_errno);
+
+glfs_t *
+__svs_initialise_snapshot_volume(xlator_t *this, const char *name,
+ int32_t *op_errno);
+
+snap_dirent_t *
+__svs_get_snap_dirent(xlator_t *this, const char *name);
+
+int
+svs_mgmt_init(xlator_t *this);
+
+int32_t
+svs_get_handle(xlator_t *this, loc_t *loc, svs_inode_t *inode_ctx,
+ int32_t *op_errno);
+
+glfs_t *
+svs_inode_glfs_mapping(xlator_t *this, inode_t *inode);
+
+glfs_t *
+svs_inode_ctx_glfs_mapping(xlator_t *this, svs_inode_t *inode_ctx);
+
+#endif /* __SNAP_VIEW_H__ */
diff --git a/xlators/features/thin-arbiter/Makefile.am b/xlators/features/thin-arbiter/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/thin-arbiter/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/thin-arbiter/src/Makefile.am b/xlators/features/thin-arbiter/src/Makefile.am
new file mode 100644
index 00000000000..a3c133e7798
--- /dev/null
+++ b/xlators/features/thin-arbiter/src/Makefile.am
@@ -0,0 +1,22 @@
+xlator_LTLIBRARIES = thin-arbiter.la
+
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+thin_arbiter_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+thin_arbiter_la_SOURCES = thin-arbiter.c \
+ $(top_builddir)/xlators/lib/src/libxlator.c
+
+thin_arbiter_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = thin-arbiter.h thin-arbiter-mem-types.h thin-arbiter-messages.h \
+ $(top_builddir)/xlators/lib/src/libxlator.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) \
+ -I$(top_srcdir)/libglusterfs/src -I$(top_srcdir)/xlators/lib/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h b/xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h
new file mode 100644
index 00000000000..69562d2febc
--- /dev/null
+++ b/xlators/features/thin-arbiter/src/thin-arbiter-mem-types.h
@@ -0,0 +1,19 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __THIN_ARBITER_MEM_TYPES_H__
+#define __THIN_ARBITER_MEM_TYPES_H__
+#include <glusterfs/mem-types.h>
+
+typedef enum gf_ta_mem_types_ {
+ gf_ta_mt_local_t = gf_common_mt_end + 1,
+ gf_ta_mt_char,
+ gf_ta_mt_end
+} gf_ta_mem_types_t;
+#endif
diff --git a/xlators/features/thin-arbiter/src/thin-arbiter-messages.h b/xlators/features/thin-arbiter/src/thin-arbiter-messages.h
new file mode 100644
index 00000000000..81d7491577a
--- /dev/null
+++ b/xlators/features/thin-arbiter/src/thin-arbiter-messages.h
@@ -0,0 +1,28 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _TA_MESSAGES_H_
+#define _TA_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(TA, TA_MSG_INVALID_FOP);
+
+#endif /* !_TA_MESSAGES_H_ */
diff --git a/xlators/features/thin-arbiter/src/thin-arbiter.c b/xlators/features/thin-arbiter/src/thin-arbiter.c
new file mode 100644
index 00000000000..ce3008636f1
--- /dev/null
+++ b/xlators/features/thin-arbiter/src/thin-arbiter.c
@@ -0,0 +1,661 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "thin-arbiter.h"
+#include "thin-arbiter-messages.h"
+#include "thin-arbiter-mem-types.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/common-utils.h>
+
+int
+ta_set_incoming_values(dict_t *dict, char *key, data_t *value, void *data)
+{
+ int32_t ret = 0;
+ ta_fop_t *fop = (ta_fop_t *)data;
+ int32_t *pending = NULL;
+
+ pending = GF_CALLOC(1, value->len, gf_ta_mt_char);
+ if (!pending) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = dict_set_bin(fop->brick_xattr, key, pending, value->len);
+out:
+ return ret;
+}
+
+int
+ta_get_incoming_and_brick_values(dict_t *dict, char *key, data_t *value,
+ void *data)
+{
+ ta_fop_t *fop = data;
+ char *source = NULL;
+ char *in_coming = NULL;
+ int32_t len = 0, ret = 0;
+
+ source = GF_CALLOC(1, value->len, gf_ta_mt_char);
+ if (!source) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = dict_get_ptr_and_len(fop->dict, key, (void **)&in_coming, &len);
+
+ if (!in_coming || value->len != len) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if (!memcmp(value->data, source, value->len) &&
+ (!memcmp(in_coming, source, len))) {
+ fop->on_disk[fop->idx] = 0;
+ } else {
+ fop->on_disk[fop->idx] = 1;
+ }
+
+ fop->idx++;
+out:
+ GF_FREE(source);
+ return ret;
+}
+
+void
+ta_release_fop(ta_fop_t *fop)
+{
+ if (!fop) {
+ return;
+ }
+ if (fop->fd) {
+ fd_unref(fop->fd);
+ }
+ loc_wipe(&fop->loc);
+ if (fop->dict) {
+ dict_unref(fop->dict);
+ }
+ if (fop->brick_xattr) {
+ dict_unref(fop->brick_xattr);
+ }
+
+ GF_FREE(fop);
+ return;
+}
+
+int32_t
+ta_set_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ TA_STACK_UNWIND(xattrop, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+/*
+case 1 - If brick value is 0 and incoming value is also 0, fine
+case 2 - If brick value is 0 and incoming value is non 0, fine
+case 3 - If brick value is non 0 and incoming value is also 0, fine
+case 4 - If brick value is non 0 and incoming value is non 0, fine
+case 5 - If incoming value is non zero on both brick, it is wrong
+case 6 - If incoming value is non zero but brick value for other
+brick is also non zero, wrong
+*/
+
+int32_t
+ta_verify_on_disk_source(ta_fop_t *fop, dict_t *dict)
+{
+ int ret = 0;
+
+ if (!fop) {
+ return -EINVAL;
+ }
+
+ ret = dict_foreach(dict, ta_get_incoming_and_brick_values, (void *)fop);
+ if (ret < 0) {
+ return ret;
+ }
+ if (fop->on_disk[0] && fop->on_disk[1]) {
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int32_t
+ta_get_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ ta_fop_t *fop = NULL;
+ int ret = 0;
+
+ fop = frame->local;
+ if (op_ret) {
+ goto unwind;
+ }
+
+ ret = ta_verify_on_disk_source(fop, dict);
+ if (ret < 0) {
+ op_errno = -ret;
+ goto unwind;
+ }
+
+ if (fop->fd) {
+ STACK_WIND(frame, ta_set_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fop->fd,
+ fop->xattrop_flags, fop->dict, NULL);
+ } else {
+ STACK_WIND(frame, ta_set_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, &fop->loc,
+ fop->xattrop_flags, fop->dict, NULL);
+ }
+ return 0;
+
+unwind:
+
+ TA_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL);
+ return -1;
+}
+
+ta_fop_t *
+ta_prepare_fop(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ ta_fop_t *fop = NULL;
+ int ret = 0;
+
+ fop = GF_CALLOC(1, sizeof(*fop), gf_ta_mt_local_t);
+ if (!fop) {
+ goto out;
+ }
+
+ if (loc) {
+ loc_copy(&fop->loc, loc);
+ }
+
+ if (fd) {
+ fop->fd = fd_ref(fd);
+ }
+
+ fop->xattrop_flags = flags;
+ fop->idx = 0;
+
+ if (dict != NULL) {
+ fop->dict = dict_ref(dict);
+ }
+ fop->brick_xattr = dict_new();
+ if (fop->brick_xattr == NULL) {
+ goto out;
+ }
+ ret = dict_foreach(dict, ta_set_incoming_values, (void *)fop);
+ if (ret < 0) {
+ goto out;
+ }
+ frame->local = fop;
+ return fop;
+
+out:
+ ta_release_fop(fop);
+ return NULL;
+}
+
+int32_t
+ta_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ int ret = 0;
+ ta_fop_t *fop = NULL;
+
+ fop = ta_prepare_fop(frame, this, NULL, fd, flags, dict, xdata);
+ if (!fop) {
+ ret = -ENOMEM;
+ goto unwind;
+ }
+
+ STACK_WIND(frame, ta_get_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, flags, fop->brick_xattr,
+ xdata);
+ return 0;
+
+unwind:
+
+ TA_STACK_UNWIND(xattrop, frame, -1, -ret, NULL, NULL);
+ return 0;
+}
+
+int32_t
+ta_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ int ret = 0;
+ ta_fop_t *fop = NULL;
+
+ fop = ta_prepare_fop(frame, this, loc, NULL, flags, dict, xdata);
+ if (!fop) {
+ ret = -ENOMEM;
+ goto unwind;
+ }
+
+ STACK_WIND(frame, ta_get_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, flags, fop->brick_xattr,
+ xdata);
+ return 0;
+
+unwind:
+
+ TA_STACK_UNWIND(xattrop, frame, -1, -ret, NULL, NULL);
+ return 0;
+}
+
+int32_t
+ta_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int32_t count, off_t off, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ TA_FAILED_FOP(writev, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ TA_FAILED_FOP(fsetxattr, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ TA_FAILED_FOP(setxattr, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t keep_size,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ TA_FAILED_FOP(fallocate, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ TA_FAILED_FOP(access, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ TA_FAILED_FOP(discard, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_entrylk(call_frame_t *frame, xlator_t *this, const char *volume, loc_t *loc,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ TA_FAILED_FOP(entrylk, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume, fd_t *fd,
+ const char *basename, entrylk_cmd cmd, entrylk_type type,
+ dict_t *xdata)
+{
+ TA_FAILED_FOP(fentrylk, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ TA_FAILED_FOP(flush, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ TA_FAILED_FOP(fsync, frame, EINVAL);
+ return 0;
+}
+int32_t
+ta_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
+ dict_t *xdata)
+{
+ TA_FAILED_FOP(fsyncdir, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata)
+{
+ TA_FAILED_FOP(getxattr, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ TA_FAILED_FOP(fgetxattr, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ TA_FAILED_FOP(link, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ TA_FAILED_FOP(lk, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ TA_FAILED_FOP(mkdir, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ TA_FAILED_FOP(mknod, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ TA_FAILED_FOP(open, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ TA_FAILED_FOP(opendir, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ TA_FAILED_FOP(readdir, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
+{
+ TA_FAILED_FOP(readdirp, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ TA_FAILED_FOP(readlink, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ TA_FAILED_FOP(readv, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ TA_FAILED_FOP(removexattr, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ TA_FAILED_FOP(fremovexattr, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ TA_FAILED_FOP(rename, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
+{
+ TA_FAILED_FOP(rmdir, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ TA_FAILED_FOP(setattr, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ TA_FAILED_FOP(fsetattr, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ TA_FAILED_FOP(stat, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ TA_FAILED_FOP(fstat, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ TA_FAILED_FOP(statfs, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_symlink(call_frame_t *frame, xlator_t *this, const char *linkname,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ TA_FAILED_FOP(symlink, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ TA_FAILED_FOP(truncate, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ TA_FAILED_FOP(ftruncate, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
+{
+ TA_FAILED_FOP(unlink, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ TA_FAILED_FOP(zerofill, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+ta_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ TA_FAILED_FOP(seek, frame, EINVAL);
+ return 0;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ ret = xlator_mem_acct_init(this, gf_ta_mt_end + 1);
+ if (ret)
+ gf_log(this->name, GF_LOG_ERROR,
+ "Memory accounting "
+ "initialization failed.");
+ return ret;
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ return 0;
+}
+
+int32_t
+init(xlator_t *this)
+{
+ if (!this->children || this->children->next) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "'thin_arbiter' not configured with exactly one child");
+ return -1;
+ }
+
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_ERROR, "dangling volume. check volfile ");
+ }
+ return 0;
+}
+
+void
+fini(xlator_t *this)
+{
+ return;
+}
+
+struct xlator_fops fops = {
+ /*Passed fop*/
+ .xattrop = ta_xattrop,
+ .fxattrop = ta_fxattrop,
+ /*Failed fop*/
+ .writev = ta_writev,
+ .stat = ta_stat,
+ .fstat = ta_fstat,
+ .truncate = ta_truncate,
+ .ftruncate = ta_ftruncate,
+ .access = ta_access,
+ .readlink = ta_readlink,
+ .mknod = ta_mknod,
+ .mkdir = ta_mkdir,
+ .unlink = ta_unlink,
+ .rmdir = ta_rmdir,
+ .symlink = ta_symlink,
+ .rename = ta_rename,
+ .link = ta_link,
+ .open = ta_open,
+ .readv = ta_readv,
+ .flush = ta_flush,
+ .fsync = ta_fsync,
+ .opendir = ta_opendir,
+ .readdir = ta_readdir,
+ .readdirp = ta_readdirp,
+ .fsyncdir = ta_fsyncdir,
+ .statfs = ta_statfs,
+ .setxattr = ta_setxattr,
+ .getxattr = ta_getxattr,
+ .fsetxattr = ta_fsetxattr,
+ .fgetxattr = ta_fgetxattr,
+ .removexattr = ta_removexattr,
+ .fremovexattr = ta_fremovexattr,
+ .lk = ta_lk,
+ .entrylk = ta_entrylk,
+ .fentrylk = ta_fentrylk,
+ .setattr = ta_setattr,
+ .fsetattr = ta_fsetattr,
+ .fallocate = ta_fallocate,
+ .discard = ta_discard,
+ .zerofill = ta_zerofill,
+ .seek = ta_seek,
+};
+
+struct xlator_cbks cbks = {};
+
+struct volume_options options[] = {
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {GD_OP_VERSION_6_0},
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "thin-arbiter",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/thin-arbiter/src/thin-arbiter.h b/xlators/features/thin-arbiter/src/thin-arbiter.h
new file mode 100644
index 00000000000..e5f914b84bf
--- /dev/null
+++ b/xlators/features/thin-arbiter/src/thin-arbiter.h
@@ -0,0 +1,59 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _THIN_ARBITER_H
+#define _THIN_ARBITER_H
+
+#include <glusterfs/locking.h>
+#include <glusterfs/common-utils.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/list.h>
+
+#define THIN_ARBITER_SOURCE_XATTR "trusted.ta.source"
+#define THIN_ARBITER_SOURCE_SIZE 2
+
+#define TA_FAILED_FOP(fop, frame, op_errno) \
+ do { \
+ default_##fop##_failure_cbk(frame, op_errno); \
+ } while (0)
+
+#define TA_STACK_UNWIND(fop, frame, op_ret, op_errno, params...) \
+ do { \
+ ta_fop_t *__local = NULL; \
+ int32_t __op_ret = 0; \
+ int32_t __op_errno = 0; \
+ \
+ __local = frame->local; \
+ __op_ret = op_ret; \
+ __op_errno = op_errno; \
+ if (__local) { \
+ ta_release_fop(__local); \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, __op_ret, __op_errno, params); \
+ \
+ } while (0)
+
+struct _ta_fop;
+typedef struct _ta_fop ta_fop_t;
+
+struct _ta_fop {
+ gf_xattrop_flags_t xattrop_flags;
+ loc_t loc;
+ fd_t *fd;
+ dict_t *dict;
+ dict_t *brick_xattr;
+ int32_t on_disk[2];
+ int32_t idx;
+};
+
+#endif /* _THIN_ARBITER_H */
diff --git a/xlators/features/trash/src/Makefile.am b/xlators/features/trash/src/Makefile.am
index 4671d06d309..8557e7171af 100644
--- a/xlators/features/trash/src/Makefile.am
+++ b/xlators/features/trash/src/Makefile.am
@@ -1,15 +1,19 @@
+if WITH_SERVER
xlator_LTLIBRARIES = trash.la
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/testing/features
+endif
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
-trash_la_LDFLAGS = -module -avoidversion
+trash_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
trash_la_SOURCES = trash.c
trash_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
noinst_HEADERS = trash.h trash-mem-types.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
CLEANFILES =
diff --git a/xlators/features/trash/src/trash-mem-types.h b/xlators/features/trash/src/trash-mem-types.h
index bb2cd33cfe0..43353c8f095 100644
--- a/xlators/features/trash/src/trash-mem-types.h
+++ b/xlators/features/trash/src/trash-mem-types.h
@@ -1,32 +1,22 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __TRASH_MEM_TYPES_H__
#define __TRASH_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_trash_mem_types_ {
- gf_trash_mt_trash_private_t = gf_common_mt_end + 1,
- gf_trash_mt_char,
- gf_trash_mt_trash_elim_pattern_t,
- gf_trash_mt_end
+ gf_trash_mt_trash_private_t = gf_common_mt_end + 1,
+ gf_trash_mt_char,
+ gf_trash_mt_uuid,
+ gf_trash_mt_trash_elim_path,
+ gf_trash_mt_end
};
#endif
-
diff --git a/xlators/features/trash/src/trash.c b/xlators/features/trash/src/trash.c
index 92ea19a55f3..7d09cba3e9c 100644
--- a/xlators/features/trash/src/trash.c
+++ b/xlators/features/trash/src/trash.c
@@ -1,1552 +1,2653 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "trash.h"
#include "trash-mem-types.h"
+#include <glusterfs/syscall.h>
-int32_t
-trash_ftruncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobuf);
+#define root_gfid \
+ (uuid_t) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }
+#define trash_gfid \
+ (uuid_t) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5 }
+#define internal_op_gfid \
+ (uuid_t) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6 }
int32_t
-trash_truncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf);
+trash_truncate_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata);
int32_t
-trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent);
+trash_truncate_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata);
int32_t
-trash_unlink_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent);
+trash_unlink_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata);
+/* Common routines used in this translator */
-void
-trash_local_wipe (trash_local_t *local)
+/**
+ * When a directory/file is created under trash directory, it should have
+ * the same permission as before. This function will fetch permission from
+ * the existing directory and returns the same
+ */
+mode_t
+get_permission(char *path)
{
- if (!local)
- goto out;
-
- loc_wipe (&local->loc);
- loc_wipe (&local->newloc);
-
- if (local->fd)
- fd_unref (local->fd);
-
- if (local->newfd)
- fd_unref (local->newfd);
+ mode_t mode = 0755;
+ struct stat sbuf = {
+ 0,
+ };
+ struct iatt ibuf = {
+ 0,
+ };
+ int ret = 0;
+
+ ret = sys_stat(path, &sbuf);
+ if (!ret) {
+ iatt_from_stat(&ibuf, &sbuf);
+ mode = st_mode_from_ia(ibuf.ia_prot, ibuf.ia_type);
+ } else
+ gf_log("trash", GF_LOG_DEBUG,
+ "stat on %s failed"
+ " using default",
+ path);
+ return mode;
+}
- mem_put (local);
+/**
+ * For normalization, trash directory name is stored inside priv structure as
+ * '/trash_directory/'. As a result the trailing and leading slashes are being
+ * striped out for additional usage.
+ */
+int
+extract_trash_directory(char *priv_value, const char **trash_directory)
+{
+ char *tmp = NULL;
+ int ret = 0;
+
+ GF_VALIDATE_OR_GOTO("trash", priv_value, out);
+
+ tmp = gf_strdup(priv_value + 1);
+ if (!tmp) {
+ ret = ENOMEM;
+ goto out;
+ }
+ if (tmp[strlen(tmp) - 1] == '/')
+ tmp[strlen(tmp) - 1] = '\0';
+ *trash_directory = gf_strdup(tmp);
+ if (!(*trash_directory)) {
+ ret = ENOMEM;
+ goto out;
+ }
out:
- return;
+ if (tmp)
+ GF_FREE(tmp);
+ return ret;
}
-int32_t
-trash_common_unwind_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+/**
+ * The trash directory path should be append at beginning of file path for
+ * delete or truncate operations. Normal trashing moves the contents to
+ * trash directory and trashing done by internal operations are moved to
+ * internal_op directory inside trash.
+ */
+void
+copy_trash_path(const char *priv_value, gf_boolean_t internal, char *path,
+ size_t path_size)
{
- TRASH_STACK_UNWIND (unlink, frame, op_ret, op_errno, preparent, postparent);
- return 0;
+ char trash_path[PATH_MAX] = {
+ 0,
+ };
+
+ strncpy(trash_path, priv_value, sizeof(trash_path));
+ trash_path[sizeof(trash_path) - 1] = 0;
+ if (internal)
+ strncat(trash_path, "internal_op/",
+ sizeof(trash_path) - strlen(trash_path) - 1);
+
+ strncpy(path, trash_path, path_size);
+ path[path_size - 1] = 0;
}
-int32_t
-trash_unlink_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+/**
+ * This function performs the reverse operation of copy_trash_path(). It gives
+ * out a pointer, whose starting value will be the path inside trash directory,
+ * similar to original path.
+ */
+void
+remove_trash_path(const char *path, gf_boolean_t internal, char **rem_path)
{
- trash_local_t *local = NULL;
- char *tmp_str = NULL;
- char *tmp_path = NULL;
- char *tmp_dirname = NULL;
- char *dir_name = NULL;
- int32_t count = 0;
- int32_t loop_count = 0;
- int i = 0;
- loc_t tmp_loc = {0,};
-
- local = frame->local;
- tmp_str = gf_strdup (local->newpath);
- if (!tmp_str) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
- loop_count = local->loop_count;
-
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- tmp_dirname = strchr (tmp_str, '/');
- while (tmp_dirname) {
- count = tmp_dirname - tmp_str;
- if (count == 0)
- count = 1;
- i++;
- if (i > loop_count)
- break;
- tmp_dirname = strchr (tmp_str + count + 1, '/');
- }
- tmp_path = memdup (local->newpath, count);
- if (!tmp_path) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
-
- tmp_loc.path = tmp_path;
-
- /* TODO:create the directory with proper permissions */
- STACK_WIND_COOKIE (frame, trash_unlink_mkdir_cbk, tmp_path,
- this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc, 0755, NULL);
-
- goto out;
- }
-
- if (op_ret == 0) {
- dir_name = dirname (tmp_str);
- if (strcmp((char*)cookie, dir_name) == 0) {
- tmp_loc.path = local->newpath;
- STACK_WIND (frame, trash_unlink_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- &local->loc, &tmp_loc);
- goto out;
- }
- }
-
- LOCK (&frame->lock);
- {
- loop_count = ++local->loop_count;
- }
- UNLOCK (&frame->lock);
- tmp_dirname = strchr (tmp_str, '/');
- while (tmp_dirname) {
- count = tmp_dirname - tmp_str;
- if (count == 0)
- count = 1;
- i++;
- if ((i > loop_count) || (count > PATH_MAX))
- break;
- tmp_dirname = strchr (tmp_str + count + 1, '/');
- }
- tmp_path = memdup (local->newpath, count);
- if (!tmp_path) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- goto out;
- }
- tmp_loc.path = tmp_path;
-
- STACK_WIND_COOKIE (frame, trash_unlink_mkdir_cbk, tmp_path,
- this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc, 0755, NULL);
-
-out:
- GF_FREE (cookie);
- if (tmp_str)
- GF_FREE (tmp_str);
+ if (rem_path == NULL) {
+ return;
+ }
- return 0;
+ *rem_path = strchr(path + 1, '/');
+ if (internal)
+ *rem_path = strchr(*rem_path + 1, '/');
}
-int32_t
-trash_rename_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent);
-
-int32_t
-trash_unlink_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+/**
+ * Checks whether the given path reside under the specified eliminate path
+ */
+int
+check_whether_eliminate_path(trash_elim_path *trav, const char *path)
{
- trash_local_t *local = NULL;
- char *tmp_str = NULL;
- char *dir_name = NULL;
- char *tmp_cookie = NULL;
- loc_t tmp_loc = {0,};
-
- local = frame->local;
-
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- tmp_str = gf_strdup (local->newpath);
- if (!tmp_str) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
- dir_name = dirname (tmp_str);
-
- tmp_loc.path = dir_name;
-
- tmp_cookie = gf_strdup (dir_name);
- if (!tmp_cookie) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
- /* TODO: create the directory with proper permissions */
- STACK_WIND_COOKIE (frame, trash_unlink_mkdir_cbk, tmp_cookie,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- &tmp_loc, 0755, NULL);
-
- GF_FREE (tmp_str);
-
- return 0;
- }
-
- if ((op_ret == -1) && (op_errno == ENOTDIR)) {
-
- gf_log (this->name, GF_LOG_DEBUG,
- "target(%s) exists, cannot keep the copy, deleting",
- local->newpath);
+ int match = 0;
- STACK_WIND (frame, trash_common_unwind_cbk,
- this->children->xlator,
- this->children->xlator->fops->unlink, &local->loc);
-
- return 0;
+ while (trav) {
+ if (strncmp(path, trav->path, strlen(trav->path)) == 0) {
+ match++;
+ break;
}
-
- if ((op_ret == -1) && (op_errno == EISDIR)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "target(%s) exists as directory, cannot keep copy, "
- "deleting", local->newpath);
-
- STACK_WIND (frame, trash_common_unwind_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, &local->loc);
- return 0;
- }
-
- /* All other cases, unlink should return success */
- TRASH_STACK_UNWIND (unlink, frame, 0, op_errno, &local->preparent,
- &local->postparent);
-
- return 0;
+ trav = trav->next;
+ }
+ return match;
}
-
-
-int32_t
-trash_common_unwind_buf_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+/**
+ * Stores the eliminate path into internal eliminate path structure
+ */
+int
+store_eliminate_path(char *str, trash_elim_path **eliminate)
{
- TRASH_STACK_UNWIND (truncate, frame, op_ret, op_errno, prebuf, postbuf);
- return 0;
+ trash_elim_path *trav = NULL;
+ char *component = NULL;
+ char elm_path[PATH_MAX] = {
+ 0,
+ };
+ int ret = 0;
+ char *strtokptr = NULL;
+
+ if ((str == NULL) || (eliminate == NULL)) {
+ ret = EINVAL;
+ goto out;
+ }
+
+ component = strtok_r(str, ",", &strtokptr);
+ while (component) {
+ trav = GF_CALLOC(1, sizeof(*trav), gf_trash_mt_trash_elim_path);
+ if (!trav) {
+ ret = ENOMEM;
+ goto out;
+ }
+ if (component[0] == '/')
+ sprintf(elm_path, "%s", component);
+ else
+ sprintf(elm_path, "/%s", component);
+
+ if (component[strlen(component) - 1] != '/')
+ strncat(elm_path, "/", sizeof(elm_path) - strlen(elm_path) - 1);
+
+ trav->path = gf_strdup(elm_path);
+ if (!trav->path) {
+ ret = ENOMEM;
+ gf_log("trash", GF_LOG_DEBUG, "out of memory");
+ GF_FREE(trav);
+ goto out;
+ }
+ trav->next = *eliminate;
+ *eliminate = trav;
+ component = strtok_r(NULL, ",", &strtokptr);
+ }
+out:
+ return ret;
}
-int
-trash_common_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *stbuf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+/**
+ * Appends time stamp to given string
+ */
+void
+append_time_stamp(char *name, size_t name_size)
{
- TRASH_STACK_UNWIND (rename, frame, op_ret, op_errno, stbuf, preoldparent,
- postoldparent, prenewparent, postnewparent);
- return 0;
+ int i;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+
+ gf_time_fmt(timestr, sizeof(timestr), gf_time(), gf_timefmt_F_HMS);
+
+ /* removing white spaces in timestamp */
+ for (i = 0; i < strlen(timestr); i++) {
+ if (timestr[i] == ' ')
+ timestr[i] = '_';
+ }
+ strncat(name, "_", name_size - strlen(name) - 1);
+ strncat(name, timestr, name_size - strlen(name) - 1);
}
+/* *
+ * Check whether delete/rename operation is permitted on
+ * trash directory
+ */
-int32_t
-trash_unlink_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+gf_boolean_t
+check_whether_op_permitted(trash_private_t *priv, loc_t *loc)
{
- trash_private_t *priv = NULL;
- trash_local_t *local = NULL;
- loc_t new_loc = {0,};
+ if ((priv->state && (gf_uuid_compare(loc->inode->gfid, trash_gfid) == 0)))
+ return _gf_false;
+ if (priv->internal &&
+ (gf_uuid_compare(loc->inode->gfid, internal_op_gfid) == 0))
+ return _gf_false;
- priv = this->private;
- local = frame->local;
-
- if (-1 == op_ret) {
- gf_log (this->name, GF_LOG_DEBUG, "%s: %s",
- local->loc.path, strerror (op_errno));
- goto fail;
- }
-
- if ((buf->ia_size == 0) ||
- (buf->ia_size > priv->max_trash_file_size)) {
- /* if the file is too big or zero, just unlink it */
-
- if (buf->ia_size > priv->max_trash_file_size) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: file size too big (%"PRId64") to "
- "move into trash directory",
- local->loc.path, buf->ia_size);
- }
-
- STACK_WIND (frame, trash_common_unwind_cbk,
- this->children->xlator,
- this->children->xlator->fops->unlink, &local->loc);
- return 0;
- }
-
- new_loc.path = local->newpath;
-
- STACK_WIND (frame, trash_unlink_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- &local->loc, &new_loc);
+ return _gf_true;
+}
- return 0;
+/**
+ * Wipe the memory used by trash location variable
+ */
+void
+trash_local_wipe(trash_local_t *local)
+{
+ if (!local)
+ goto out;
-fail:
- TRASH_STACK_UNWIND (unlink, frame, op_ret, op_errno, buf,
- NULL);
+ loc_wipe(&local->loc);
+ loc_wipe(&local->newloc);
- return 0;
+ if (local->fd)
+ fd_unref(local->fd);
+ if (local->newfd)
+ fd_unref(local->newfd);
+ mem_put(local);
+out:
+ return;
}
-int32_t
-trash_rename_rename_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf,
- struct iatt *preoldparent, struct iatt *postoldparent,
- struct iatt *prenewparent, struct iatt *postnewparent)
+/**
+ * Wipe the memory used by eliminate path through a
+ * recursive call
+ */
+void
+wipe_eliminate_path(trash_elim_path **trav)
{
- trash_local_t *local = NULL;
- char *tmp_str = NULL;
- char *dir_name = NULL;
- char *tmp_path = NULL;
- loc_t tmp_loc = {0,};
-
- local = frame->local;
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- tmp_str = gf_strdup (local->newpath);
- if (!tmp_str) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
- dir_name = dirname (tmp_str);
-
- /* check for the errno, if its ENOENT create directory and call
- * rename later
- */
- tmp_path = gf_strdup (dir_name);
- if (!tmp_path) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
- tmp_loc.path = tmp_path;
-
- /* TODO: create the directory with proper permissions */
- STACK_WIND_COOKIE (frame, trash_rename_mkdir_cbk, tmp_path,
- this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc, 0755, NULL);
-
- GF_FREE (tmp_str);
- return 0;
- }
-
- if ((op_ret == -1) && (op_errno == ENOTDIR)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "target(%s) exists, cannot keep the dest entry(%s): "
- "renaming", local->newpath, local->origpath);
- } else if ((op_ret == -1) && (op_errno == EISDIR)) {
- gf_log (this->name, GF_LOG_DEBUG,
- "target(%s) exists as a directory, cannot keep the "
- "copy (%s), renaming", local->newpath, local->origpath);
- }
+ if (trav == NULL) {
+ return;
+ }
- STACK_WIND (frame, trash_common_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename, &local->loc,
- &local->newloc);
+ if (*trav == NULL) {
+ return;
+ }
- return 0;
+ wipe_eliminate_path(&(*trav)->next);
+ GF_FREE((*trav)->path);
+ GF_FREE(*trav);
+ *trav = NULL;
}
-
+/**
+ * This is the call back of rename fop initated using STACK_WIND in
+ * reconfigure/notify function which is used to rename trash directory
+ * in the brick when it is required either in volume start or set.
+ * This frame must destroyed from this function itself since it was
+ * created by trash xlator
+ */
int32_t
-trash_rename_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+trash_dir_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- trash_local_t *local = NULL;
- char *tmp_str = NULL;
- char *tmp_path = NULL;
- char *tmp_dirname = NULL;
- char *dir_name = NULL;
- int32_t count = 0;
- loc_t tmp_loc = {0,};
-
- local = frame->local;
- tmp_str = gf_strdup (local->newpath);
- if (!tmp_str) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- goto out;
- }
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- tmp_dirname = strchr (tmp_str, '/');
- while (tmp_dirname) {
- count = tmp_dirname - tmp_str;
- if (count == 0)
- count = 1;
+ priv = this->private;
- tmp_dirname = strchr (tmp_str + count + 1, '/');
+ local = frame->local;
- tmp_path = memdup (local->newpath, count);
- if (!tmp_path) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "rename trash directory "
+ "failed: %s",
+ strerror(op_errno));
+ goto out;
+ }
- tmp_loc.path = tmp_path;
+ GF_FREE(priv->oldtrash_dir);
- /* TODO: create the directory with proper permissions */
- STACK_WIND_COOKIE (frame, trash_rename_mkdir_cbk,
- tmp_path, this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc, 0755, NULL);
- }
-
- goto out;
- }
+ priv->oldtrash_dir = gf_strdup(priv->newtrash_dir);
+ if (!priv->oldtrash_dir) {
+ op_ret = ENOMEM;
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ }
- dir_name = dirname (tmp_str);
- if (strcmp ((char*)cookie, dir_name) == 0) {
- tmp_loc.path = local->newpath;
+out:
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+ trash_local_wipe(local);
+ return op_ret;
+}
- STACK_WIND (frame, trash_rename_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- &local->newloc, &tmp_loc);
- }
+int
+rename_trash_directory(xlator_t *this)
+{
+ trash_private_t *priv = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ loc_t old_loc = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ trash_local_t *local = NULL;
+
+ priv = this->private;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (frame == NULL) {
+ gf_log(this->name, GF_LOG_ERROR, "failed to create frame");
+ ret = ENOMEM;
+ goto out;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ frame->local = local;
+
+ /* assign new location values to new_loc members */
+ gf_uuid_copy(loc.gfid, trash_gfid);
+ gf_uuid_copy(loc.pargfid, root_gfid);
+ ret = extract_trash_directory(priv->newtrash_dir, &loc.name);
+ if (ret) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+ loc.path = gf_strdup(priv->newtrash_dir);
+ if (!loc.path) {
+ ret = ENOMEM;
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+
+ /* assign old location values to old_loc members */
+ gf_uuid_copy(old_loc.gfid, trash_gfid);
+ gf_uuid_copy(old_loc.pargfid, root_gfid);
+ ret = extract_trash_directory(priv->oldtrash_dir, &old_loc.name);
+ if (ret) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+ old_loc.path = gf_strdup(priv->oldtrash_dir);
+ if (!old_loc.path) {
+ ret = ENOMEM;
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+
+ old_loc.inode = inode_ref(priv->trash_inode);
+ gf_uuid_copy(old_loc.inode->gfid, old_loc.gfid);
+
+ loc_copy(&local->loc, &old_loc);
+ loc_copy(&local->newloc, &loc);
+
+ STACK_WIND(frame, trash_dir_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, &old_loc, &loc, NULL);
+ return 0;
out:
- GF_FREE (cookie); /* strdup (dir_name) was sent here :) */
- if (tmp_str)
- GF_FREE (tmp_str);
+ if (frame) {
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+ }
- return 0;
+ trash_local_wipe(local);
+
+ return ret;
}
int32_t
-trash_rename_lookup_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *buf, dict_t *xattr,
- struct iatt *postparent)
+trash_internal_op_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- trash_private_t *priv = NULL;
- trash_local_t *local = NULL;
- loc_t tmp_loc = {0,};
-
- local = frame->local;
- priv = this->private;
-
- if (op_ret == -1) {
- STACK_WIND (frame, trash_common_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- &local->loc, &local->newloc);
- return 0;
- }
- if ((buf->ia_size == 0) ||
- (buf->ia_size > priv->max_trash_file_size)) {
- /* if the file is too big or zero, just unlink it */
-
- if (buf->ia_size > priv->max_trash_file_size) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: file size too big (%"PRId64") to "
- "move into trash directory",
- local->newloc.path, buf->ia_size);
- }
-
- STACK_WIND (frame, trash_common_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- &local->loc, &local->newloc);
- return 0;
- }
-
- tmp_loc.path = local->newpath;
-
- STACK_WIND (frame, trash_rename_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- &local->newloc, &tmp_loc);
-
- return 0;
+ trash_local_t *local = NULL;
+ local = frame->local;
+
+ if (op_ret != 0 && !(op_errno == EEXIST))
+ gf_log(this->name, GF_LOG_ERROR,
+ "mkdir failed for "
+ "internal op directory : %s",
+ strerror(op_errno));
+
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+ trash_local_wipe(local);
+ return op_ret;
}
+/**
+ * This is the call back of mkdir fop initated using STACK_WIND in
+ * notify/reconfigure function which is used to create trash directory
+ * in the brick when "trash" is on. The frame of the mkdir must
+ * destroyed from this function itself since it was created by trash xlator
+ */
int32_t
-trash_rename (call_frame_t *frame, xlator_t *this, loc_t *oldloc,
- loc_t *newloc)
+trash_dir_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- trash_elim_pattern_t *trav = NULL;
- trash_private_t *priv = NULL;
- trash_local_t *local = NULL;
- char timestr[64] = {0,};
- int32_t match = 0;
-
- priv = this->private;
- if (priv->eliminate) {
- trav = priv->eliminate;
- while (trav) {
- if (fnmatch(trav->pattern, newloc->name, 0) == 0) {
- match++;
- break;
- }
- trav = trav->next;
- }
- }
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;
- if ((strncmp (oldloc->path, priv->trash_dir,
- strlen (priv->trash_dir)) == 0) || match) {
- /* Trying to rename from the trash dir,
- do the actual rename */
- STACK_WIND (frame, trash_common_rename_cbk,
- this->children->xlator,
- this->children->xlator->fops->rename,
- oldloc, newloc);
+ priv = this->private;
- return 0;
- }
+ local = frame->local;
- local = mem_get0 (this->local_pool);
- if (!local) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- TRASH_STACK_UNWIND (rename, frame, -1, ENOMEM,
- NULL, NULL, NULL, NULL, NULL);
- return 0;
+ if (op_ret == 0) {
+ priv->oldtrash_dir = gf_strdup(priv->newtrash_dir);
+ if (!priv->oldtrash_dir) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ op_ret = ENOMEM;
}
+ } else if (op_ret != 0 && errno != EEXIST)
+ gf_log(this->name, GF_LOG_ERROR,
+ "mkdir failed for trash"
+ " directory : %s",
+ strerror(op_errno));
- frame->local = local;
- loc_copy (&local->loc, oldloc);
-
- loc_copy (&local->newloc, newloc);
-
- strcpy (local->origpath, newloc->path);
- strcpy (local->newpath, priv->trash_dir);
- strcat (local->newpath, newloc->path);
-
- {
- /* append timestamp to file name */
- /* TODO: can we make it optional? */
- gf_time_ftm (timestr, sizeof timestr, time (NULL),
- gf_timefmt_F_HMS);
- strcat (local->newpath, timestr);
- }
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+ trash_local_wipe(local);
+ return op_ret;
+}
- /* Send a lookup call on newloc, to ensure we are not
- overwriting */
- STACK_WIND (frame, trash_rename_lookup_cbk,
- this->children->xlator,
- this->children->xlator->fops->lookup, newloc, 0);
+/**
+ * This getxattr calls returns existing trash directory path in
+ * the dictionary
+ */
+int32_t
+trash_dir_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict,
+ dict_t *xdata)
+{
+ data_t *data = NULL;
+ trash_private_t *priv = NULL;
+ int ret = 0;
+ trash_local_t *local = NULL;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
+
+ local = frame->local;
+
+ data = dict_get(dict, GET_ANCESTRY_PATH_KEY);
+ if (!data) {
+ goto out;
+ }
+ priv->oldtrash_dir = GF_MALLOC(PATH_MAX, gf_common_mt_char);
+ if (!priv->oldtrash_dir) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ /* appending '/' if it is not present */
+ sprintf(priv->oldtrash_dir, "%s%c", data->data,
+ data->data[strlen(data->data) - 1] != '/' ? '/' : '\0');
+ gf_log(this->name, GF_LOG_DEBUG,
+ "old trash directory path "
+ "is %s",
+ priv->oldtrash_dir);
+ if (strcmp(priv->newtrash_dir, priv->oldtrash_dir) != 0) {
+ /* When user set a new name for trash directory, trash
+ * xlator will perform a rename operation on old trash
+ * directory to the new one using a STACK_WIND from here.
+ * This option can be configured only when volume is in
+ * started state
+ */
+ ret = rename_trash_directory(this);
+ }
+out:
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+ trash_local_wipe(local);
+ return ret;
+}
+/**
+ * This is a nameless look up for internal op directory
+ * The lookup is based on gfid, because internal op directory
+ * has fixed gfid.
+ */
+int32_t
+trash_internalop_dir_lookup_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ trash_private_t *priv = NULL;
+ int ret = 0;
+ uuid_t *gfid_ptr = NULL;
+ loc_t loc = {
+ 0,
+ };
+ char internal_op_path[PATH_MAX] = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ trash_local_t *local = NULL;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
+
+ local = frame->local;
+ if (op_ret != 0 && op_errno == ENOENT) {
+ loc_wipe(&local->loc);
+ gfid_ptr = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!gfid_ptr) {
+ ret = ENOMEM;
+ goto out;
+ }
+
+ gf_uuid_copy(*gfid_ptr, internal_op_gfid);
+
+ dict = dict_new();
+ if (!dict) {
+ ret = ENOMEM;
+ goto out;
+ }
+ ret = dict_set_gfuuid(dict, "gfid-req", *gfid_ptr, false);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "setting key gfid-req failed");
+ goto out;
+ }
+ gf_uuid_copy(loc.gfid, internal_op_gfid);
+ gf_uuid_copy(loc.pargfid, trash_gfid);
+
+ loc.inode = inode_new(priv->trash_itable);
+
+ /* The mkdir call for creating internal op directory */
+ loc.name = gf_strdup("internal_op");
+ if (!loc.name) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ sprintf(internal_op_path, "%s%s/", priv->newtrash_dir, loc.name);
+
+ loc.path = gf_strdup(internal_op_path);
+ if (!loc.path) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+
+ loc_copy(&local->loc, &loc);
+ STACK_WIND(frame, trash_internal_op_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, &loc, 0755, 0022, dict);
return 0;
+ }
+
+out:
+ if (ret && gfid_ptr)
+ GF_FREE(gfid_ptr);
+ if (dict)
+ dict_unref(dict);
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+ trash_local_wipe(local);
+ return op_ret;
}
+/**
+ * This is a nameless look up for old trash directory
+ * The lookup is based on gfid, because trash directory
+ * has fixed gfid.
+ */
int32_t
-trash_unlink (call_frame_t *frame, xlator_t *this, loc_t *loc)
+trash_dir_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, dict_t *xdata, struct iatt *postparent)
{
- trash_elim_pattern_t *trav = NULL;
- trash_private_t *priv = NULL;
- trash_local_t *local = NULL;
- char timestr[64] = {0,};
- int32_t match = 0;
+ trash_private_t *priv = NULL;
+ loc_t loc = {
+ 0,
+ };
+ int ret = 0;
+ uuid_t *gfid_ptr = NULL;
+ dict_t *dict = NULL;
+ trash_local_t *local = NULL;
- priv = this->private;
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
- if (priv->eliminate) {
- trav = priv->eliminate;
- while (trav) {
- if (fnmatch(trav->pattern, loc->name, 0) == 0) {
- match++;
- break;
- }
- trav = trav->next;
- }
- }
+ local = frame->local;
- if ((strncmp (loc->path, priv->trash_dir,
- strlen (priv->trash_dir)) == 0) || (match)) {
- if (match) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: file matches eliminate pattern, "
- "not moved to trash", loc->name);
- } else {
- /* unlink from the trash-dir, not keeping any copy */
- ;
- }
-
- STACK_WIND (frame, trash_common_unwind_cbk,
- this->children->xlator,
- this->children->xlator->fops->unlink, loc);
- return 0;
- }
+ loc_wipe(&local->loc);
+ if (op_ret == 0) {
+ gf_log(this->name, GF_LOG_DEBUG, "inode found with gfid %s",
+ uuid_utoa(buf->ia_gfid));
- local = mem_get0 (this->local_pool);
- if (!local) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- TRASH_STACK_UNWIND (unlink, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
- frame->local = local;
- loc_copy (&local->loc, loc);
-
- strcpy (local->origpath, loc->path);
- strcpy (local->newpath, priv->trash_dir);
- strcat (local->newpath, loc->path);
-
- {
- /* append timestamp to file name */
- /* TODO: can we make it optional? */
- gf_time_fmt (timestr, sizeof timestr, time (NULL),
- gf_timefmt_F_HMS);
- strcat (local->newpath, timestr);
- }
+ gf_uuid_copy(loc.gfid, trash_gfid);
- LOCK_INIT (&frame->lock);
+ /* Find trash inode using available information */
+ priv->trash_inode = inode_link(inode, NULL, NULL, buf);
- STACK_WIND (frame, trash_unlink_stat_cbk,
- this->children->xlator,
- this->children->xlator->fops->stat, loc);
+ loc.inode = inode_ref(priv->trash_inode);
+ loc_copy(&local->loc, &loc);
+ /*Used to find path of old trash directory*/
+ STACK_WIND(frame, trash_dir_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, &loc,
+ GET_ANCESTRY_PATH_KEY, xdata);
+ return 0;
+ }
+
+ /* If there is no old trash directory we set its value to new one,
+ * which is the valid condition for trash directory creation
+ */
+ else {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Creating trash "
+ "directory %s ",
+ priv->newtrash_dir);
+
+ gfid_ptr = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!gfid_ptr) {
+ ret = ENOMEM;
+ goto out;
+ }
+ gf_uuid_copy(*gfid_ptr, trash_gfid);
+
+ gf_uuid_copy(loc.gfid, trash_gfid);
+ gf_uuid_copy(loc.pargfid, root_gfid);
+ ret = extract_trash_directory(priv->newtrash_dir, &loc.name);
+ if (ret) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+ loc.path = gf_strdup(priv->newtrash_dir);
+ if (!loc.path) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+
+ priv->trash_inode = inode_new(priv->trash_itable);
+ priv->trash_inode->ia_type = IA_IFDIR;
+ loc.inode = inode_ref(priv->trash_inode);
+ dict = dict_new();
+ if (!dict) {
+ ret = ENOMEM;
+ goto out;
+ }
+ /* Fixed gfid is set for trash directory with
+ * this function
+ */
+ ret = dict_set_gfuuid(dict, "gfid-req", *gfid_ptr, false);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "setting key gfid-req failed");
+ goto out;
+ }
+ loc_copy(&local->loc, &loc);
+
+ /* The mkdir call for creating trash directory */
+ STACK_WIND(frame, trash_dir_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, &loc, 0755, 0022, dict);
return 0;
+ }
+out:
+ if (ret && gfid_ptr)
+ GF_FREE(gfid_ptr);
+ if (dict)
+ dict_unref(dict);
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+ trash_local_wipe(local);
+ return ret;
}
-int32_t
-trash_truncate_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+int
+create_or_rename_trash_directory(xlator_t *this)
{
- /* use this Function when a failure occurs, and
- delete the newly created file. */
- trash_local_t *local = NULL;
-
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "deleting the newly created file: %s",
- strerror (op_errno));
- }
+ trash_private_t *priv = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ trash_local_t *local = NULL;
+
+ priv = this->private;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (frame == NULL) {
+ gf_log(this->name, GF_LOG_ERROR, "failed to create frame");
+ ret = ENOMEM;
+ goto out;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ frame->local = local;
+
+ loc.inode = inode_new(priv->trash_itable);
+ gf_uuid_copy(loc.gfid, trash_gfid);
+ loc_copy(&local->loc, &loc);
+ gf_log(this->name, GF_LOG_DEBUG,
+ "nameless lookup for"
+ "old trash directory");
+ STACK_WIND(frame, trash_dir_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &loc, NULL);
+out:
+ return ret;
+}
- STACK_WIND (frame, trash_common_unwind_buf_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->truncate,
- &local->loc, local->fop_offset);
+int
+create_internalop_directory(xlator_t *this)
+{
+ trash_private_t *priv = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ call_frame_t *frame = NULL;
+ trash_local_t *local = NULL;
+
+ priv = this->private;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (frame == NULL) {
+ gf_log(this->name, GF_LOG_ERROR, "failed to create frame");
+ ret = ENOMEM;
+ goto out;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ frame->local = local;
+
+ gf_uuid_copy(loc.gfid, internal_op_gfid);
+ gf_uuid_copy(loc.pargfid, trash_gfid);
+ loc.inode = inode_new(priv->trash_itable);
+ loc.inode->ia_type = IA_IFDIR;
+
+ loc_copy(&local->loc, &loc);
+ STACK_WIND(frame, trash_internalop_dir_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, &loc, NULL);
+out:
- return 0;
+ return ret;
}
int32_t
-trash_truncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobuf)
+trash_common_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- trash_local_t *local = NULL;
+ STACK_UNWIND_STRICT(mkdir, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+ return 0;
+}
- local = frame->local;
+int32_t
+trash_common_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, buf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
+ return 0;
+}
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "readv on the existing file failed: %s",
- strerror (op_errno));
+int32_t
+trash_common_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ STACK_UNWIND_STRICT(rmdir, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
- STACK_WIND (frame, trash_truncate_unlink_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
- &local->newloc);
- goto out;
- }
+/**
+ * move backs from trash translator to unlink call
+ */
+int32_t
+trash_common_unwind_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ TRASH_STACK_UNWIND(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+ return 0;
+}
- local->fsize = stbuf->ia_size;
- STACK_WIND (frame, trash_truncate_writev_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->writev,
- local->newfd, vector, count, local->cur_offset, 0, iobuf);
+/**
+ * If the path is not present in the trash directory,it will recursively
+ * call this call-back and one by one directories will be created from
+ * the starting
+ */
+int32_t
+trash_unlink_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ trash_local_t *local = NULL;
+ char *tmp_str = NULL;
+ char *tmp_path = NULL;
+ char *tmp_dirname = NULL;
+ char *tmp_stat = NULL;
+ char real_path[PATH_MAX] = {
+ 0,
+ };
+ char *dir_name = NULL;
+ size_t count = 0;
+ int32_t loop_count = 0;
+ int i = 0;
+ loc_t tmp_loc = {
+ 0,
+ };
+ trash_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
+
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO("trash", local, out);
+
+ TRASH_UNSET_PID(frame, local);
+
+ tmp_str = gf_strdup(local->newpath);
+ if (!tmp_str) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ ret = -1;
+ goto out;
+ }
+ loop_count = local->loop_count;
+
+ /* The directory is not present , need to create it */
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ tmp_dirname = strchr(tmp_str, '/');
+ while (tmp_dirname) {
+ count = tmp_dirname - tmp_str;
+ if (count == 0)
+ count = 1;
+ i++;
+ if (i > loop_count)
+ break;
+ tmp_dirname = strchr(tmp_str + count + 1, '/');
+ }
+ tmp_path = gf_memdup(local->newpath, count + 1);
+ if (!tmp_path) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ tmp_path[count] = '\0';
+
+ loc_copy(&tmp_loc, &local->loc);
+ tmp_loc.path = gf_strdup(tmp_path);
+ if (!tmp_loc.path) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+
+ /* Stores the the name of directory to be created */
+ tmp_loc.name = gf_strdup(strrchr(tmp_path, '/') + 1);
+ if (!tmp_loc.name) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ strncpy(real_path, priv->brick_path, sizeof(real_path));
+ real_path[sizeof(real_path) - 1] = 0;
+
+ remove_trash_path(tmp_path, (frame->root->pid < 0), &tmp_stat);
+ if (tmp_stat)
+ strncat(real_path, tmp_stat,
+ sizeof(real_path) - strlen(real_path) - 1);
+
+ TRASH_SET_PID(frame, local);
+
+ STACK_WIND_COOKIE(frame, trash_unlink_mkdir_cbk, tmp_path,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ &tmp_loc, get_permission(real_path), 0022, xdata);
+ loc_wipe(&tmp_loc);
+ goto out;
+ }
+
+ /* Given path is created , comparing to the required path */
+ if (op_ret == 0) {
+ dir_name = dirname(tmp_str);
+ if (strcmp((char *)cookie, dir_name) == 0) {
+ /* File path exists we can rename it*/
+ loc_copy(&tmp_loc, &local->loc);
+ tmp_loc.path = local->newpath;
+ STACK_WIND(frame, trash_unlink_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, &local->loc, &tmp_loc,
+ xdata);
+ goto out;
+ }
+ }
+
+ if ((op_ret == -1) && (op_errno != EEXIST)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Directory creation failed [%s]. "
+ "Therefore unlinking %s without moving to trash "
+ "directory",
+ strerror(op_errno), local->loc.name);
+ STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &local->loc, 0, xdata);
+ goto out;
+ }
+
+ LOCK(&frame->lock);
+ {
+ loop_count = ++local->loop_count;
+ }
+ UNLOCK(&frame->lock);
+
+ tmp_dirname = strchr(tmp_str, '/');
+
+ /* Path is not completed , need to create remaining path */
+ while (tmp_dirname) {
+ count = tmp_dirname - tmp_str;
+ if (count == 0)
+ count = 1;
+ i++;
+ if (i > loop_count)
+ break;
+ tmp_dirname = strchr(tmp_str + count + 1, '/');
+ }
+ tmp_path = gf_memdup(local->newpath, count + 1);
+ if (!tmp_path) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ ret = -1;
+ goto out;
+ }
+ tmp_path[count] = '\0';
+
+ loc_copy(&tmp_loc, &local->loc);
+ tmp_loc.path = gf_strdup(tmp_path);
+ if (!tmp_loc.path) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ /* Stores the the name of directory to be created */
+ tmp_loc.name = gf_strdup(strrchr(tmp_path, '/') + 1);
+ if (!tmp_loc.name) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ strncpy(real_path, priv->brick_path, sizeof(real_path));
+ real_path[sizeof(real_path) - 1] = 0;
+
+ remove_trash_path(tmp_path, (frame->root->pid < 0), &tmp_stat);
+ if (tmp_stat)
+ strncat(real_path, tmp_stat, sizeof(real_path) - strlen(real_path) - 1);
+
+ TRASH_SET_PID(frame, local);
+
+ STACK_WIND_COOKIE(frame, trash_unlink_mkdir_cbk, tmp_path,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ &tmp_loc, get_permission(real_path), 0022, xdata);
out:
- return 0;
-
+ if (tmp_path)
+ GF_FREE(tmp_path);
+ if (tmp_str)
+ GF_FREE(tmp_str);
+ return ret;
}
+/**
+ * The name of unlinking file should be renamed as starting
+ * from trash directory as mentioned in the mount point
+ */
int32_t
-trash_truncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+trash_unlink_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ struct iatt *preoldparent, struct iatt *postoldparent,
+ struct iatt *prenewparent, struct iatt *postnewparent,
+ dict_t *xdata)
{
- trash_local_t *local = NULL;
-
- local = frame->local;
-
- if (op_ret == -1) {
- /* Let truncate work, but previous copy is not preserved. */
- gf_log (this->name, GF_LOG_DEBUG,
- "writev on the existing file failed: %s",
- strerror (op_errno));
-
- STACK_WIND (frame, trash_truncate_unlink_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink, &local->newloc);
- goto out;
- }
-
- if (local->cur_offset < local->fsize) {
- local->cur_offset += GF_BLOCK_READV_SIZE;
- /* Loop back and Read the contents again. */
- STACK_WIND (frame, trash_truncate_readv_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
- local->fd, (size_t)GF_BLOCK_READV_SIZE,
- local->cur_offset, 0);
- goto out;
- }
-
-
- /* OOFH.....Finally calling Truncate. */
- STACK_WIND (frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, &local->loc,
- local->fop_offset);
-
+ trash_local_t *local = NULL;
+ trash_private_t *priv = NULL;
+ char *tmp_str = NULL;
+ char *dir_name = NULL;
+ char *tmp_cookie = NULL;
+ loc_t tmp_loc = {
+ 0,
+ };
+ dict_t *new_xdata = NULL;
+ char *tmp_stat = NULL;
+ char real_path[PATH_MAX] = {
+ 0,
+ };
+ int ret = 0;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
+
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO("trash", local, out);
+
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ /* the file path does not exist we want to create path
+ * for the file
+ */
+ tmp_str = gf_strdup(local->newpath);
+ if (!tmp_str) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ dir_name = dirname(tmp_str); /* stores directory name */
+
+ loc_copy(&tmp_loc, &local->loc);
+ tmp_loc.path = gf_strdup(dir_name);
+ if (!tmp_loc.path) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+
+ tmp_cookie = gf_strdup(dir_name);
+ if (!tmp_cookie) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ strncpy(real_path, priv->brick_path, sizeof(real_path));
+ real_path[sizeof(real_path) - 1] = 0;
+ remove_trash_path(tmp_str, (frame->root->pid < 0), &tmp_stat);
+ if (tmp_stat)
+ strncat(real_path, tmp_stat,
+ sizeof(real_path) - strlen(real_path) - 1);
+
+ TRASH_SET_PID(frame, local);
+
+ /* create the directory with proper permissions */
+ STACK_WIND_COOKIE(frame, trash_unlink_mkdir_cbk, tmp_cookie,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ &tmp_loc, get_permission(real_path), 0022, xdata);
+ loc_wipe(&tmp_loc);
+ goto out;
+ }
+
+ if ((op_ret == -1) && (op_errno == ENOTDIR)) {
+ /* if entry is already present in trash directory,
+ * new one is not copied*/
+ gf_log(this->name, GF_LOG_DEBUG,
+ "target(%s) exists, cannot keep the copy, deleting",
+ local->newpath);
+
+ STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &local->loc, 0, xdata);
+
+ goto out;
+ }
+
+ if ((op_ret == -1) && (op_errno == EISDIR)) {
+ /* if entry is directory,we remove directly */
+ gf_log(this->name, GF_LOG_DEBUG,
+ "target(%s) exists as directory, cannot keep copy, "
+ "deleting",
+ local->newpath);
+
+ STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &local->loc, 0, xdata);
+ goto out;
+ }
+
+ /**********************************************************************
+ *
+ * CTR Xlator message handling done here!
+ *
+ **********************************************************************/
+ /**
+ * If unlink is handled by trash translator, it should inform the
+ * CTR Xlator. And trash translator only handles the unlink for
+ * the last hardlink.
+ *
+ * Check if there is a GF_REQUEST_LINK_COUNT_XDATA from CTR Xlator
+ *
+ */
+
+ if (local->ctr_link_count_req) {
+ /* Sending back inode link count to ctr_unlink
+ * (changetimerecoder xlator) via
+ * "GF_RESPONSE_LINK_COUNT_XDATA" key using xdata.
+ * */
+ if (xdata) {
+ ret = dict_set_uint32(xdata, GF_RESPONSE_LINK_COUNT_XDATA, 1);
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to set"
+ " GF_RESPONSE_LINK_COUNT_XDATA");
+ }
+ } else {
+ new_xdata = dict_new();
+ if (!new_xdata) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Memory allocation failure while "
+ "creating new_xdata");
+ goto ctr_out;
+ }
+ ret = dict_set_uint32(new_xdata, GF_RESPONSE_LINK_COUNT_XDATA, 1);
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to set"
+ " GF_RESPONSE_LINK_COUNT_XDATA");
+ }
+ ctr_out:
+ TRASH_STACK_UNWIND(unlink, frame, 0, op_errno, preoldparent,
+ postoldparent, new_xdata);
+ goto out;
+ }
+ }
+ /* All other cases, unlink should return success */
+ TRASH_STACK_UNWIND(unlink, frame, 0, op_errno, preoldparent, postoldparent,
+ xdata);
out:
- return 0;
-}
+ if (tmp_str)
+ GF_FREE(tmp_str);
+ if (tmp_cookie)
+ GF_FREE(tmp_cookie);
+ if (new_xdata)
+ dict_unref(new_xdata);
+ return ret;
+}
+/**
+ * move backs from trash translator to truncate call
+ */
int32_t
-trash_truncate_open_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd)
+trash_common_unwind_buf_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
{
- trash_local_t *local = NULL;
-
- local = frame->local;
-
- if (op_ret == -1) {
- //Let truncate work, but previous copy is not preserved.
- gf_log (this->name, GF_LOG_DEBUG,
- "open on the existing file failed: %s",
- strerror (op_errno));
+ TRASH_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+ return 0;
+}
- STACK_WIND (frame, trash_truncate_unlink_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
- &local->newloc);
- goto out;
- }
+int32_t
+trash_unlink_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
+{
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;
+ loc_t new_loc = {
+ 0,
+ };
+ int ret = 0;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
+
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO("trash", local, out);
+
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_DEBUG, "%s: %s", local->loc.path,
+ strerror(op_errno));
+ TRASH_STACK_UNWIND(unlink, frame, op_ret, op_errno, buf, NULL, xdata);
+ ret = -1;
+ goto out;
+ }
+
+ /* Only last hardlink will be moved to trash directory */
+ if (buf->ia_nlink > 1) {
+ STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &local->loc, 0, xdata);
+ goto out;
+ }
+
+ /* if the file is too big just unlink it */
+ if (buf->ia_size > (priv->max_trash_file_size)) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "%s: file size too big (%" PRId64
+ ") to "
+ "move into trash directory",
+ local->loc.path, buf->ia_size);
+
+ STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &local->loc, 0, xdata);
+ goto out;
+ }
+
+ /* Copies new path for renaming */
+ loc_copy(&new_loc, &local->loc);
+ new_loc.path = gf_strdup(local->newpath);
+ if (!new_loc.path) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+
+ STACK_WIND(frame, trash_unlink_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, &local->loc, &new_loc, xdata);
- local->cur_offset = local->fop_offset;
+out:
+ loc_wipe(&new_loc);
- STACK_WIND (frame, trash_truncate_readv_cbk,
- FIRST_CHILD (this), FIRST_CHILD (this)->fops->readv,
- local->fd, (size_t)GF_BLOCK_READV_SIZE, local->cur_offset, 0);
+ return ret;
+}
+/**
+ * Unlink is called internally by rm system call and also
+ * by internal operations of gluster such as self-heal
+ */
+int32_t
+trash_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflags,
+ dict_t *xdata)
+{
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL; /* files inside trash */
+ int32_t match = 0;
+ int32_t ctr_link_req = 0;
+ char *pathbuf = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
+
+ /* If trash is not active or not enabled through cli, then
+ * we bypass and wind back
+ */
+ if (!priv->state) {
+ STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, 0, xdata);
+ goto out;
+ }
+
+ /* The files removed by gluster internal operations such as self-heal,
+ * should moved to trash directory , but files by client should not
+ * moved
+ */
+ if ((frame->root->pid < 0) && !priv->internal) {
+ STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, 0, xdata);
+ goto out;
+ }
+ /* loc need some gfid which will be present in inode */
+ gf_uuid_copy(loc->gfid, loc->inode->gfid);
+
+ /* Checking for valid location */
+ if (gf_uuid_is_null(loc->gfid) && gf_uuid_is_null(loc->inode->gfid)) {
+ gf_log(this->name, GF_LOG_DEBUG, "Bad address");
+ STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, 0, xdata);
+ ret = EFAULT;
+ goto out;
+ }
+
+ /* This will be more accurate */
+ inode_path(loc->inode, NULL, &pathbuf);
+ /* Check whether the file is present under eliminate paths or
+ * inside trash directory. In both cases we don't need to move the
+ * file to trash directory. Instead delete it permanently
+ */
+ match = check_whether_eliminate_path(priv->eliminate, pathbuf);
+ if ((strncmp(pathbuf, priv->newtrash_dir, strlen(priv->newtrash_dir)) ==
+ 0) ||
+ (match)) {
+ if (match) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "%s is a file comes under an eliminate path, "
+ "so it is not moved to trash",
+ loc->name);
+ }
+
+ /* Trying to unlink from the trash-dir. So do the
+ * actual unlink without moving to trash-dir.
+ */
+ STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, 0, xdata);
+ goto out;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ TRASH_STACK_UNWIND(unlink, frame, -1, ENOMEM, NULL, NULL, xdata);
+ ret = ENOMEM;
+ goto out;
+ }
+ frame->local = local;
+ loc_copy(&local->loc, loc);
+
+ /* rename new location of file as starting from trash directory */
+ copy_trash_path(priv->newtrash_dir, (frame->root->pid < 0), local->newpath,
+ sizeof(local->newpath));
+ strncat(local->newpath, pathbuf,
+ sizeof(local->newpath) - strlen(local->newpath) - 1);
+
+ /* append timestamp to file name so that we can avoid
+ * name collisions inside trash
+ */
+ append_time_stamp(local->newpath, sizeof(local->newpath));
+ if (strlen(local->newpath) > PATH_MAX) {
+ STACK_WIND(frame, trash_common_unwind_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, 0, xdata);
+ goto out;
+ }
+
+ /* To know whether CTR xlator requested for the link count */
+ ret = dict_get_int32(xdata, GF_REQUEST_LINK_COUNT_XDATA, &ctr_link_req);
+ if (ret) {
+ local->ctr_link_count_req = _gf_false;
+ ret = 0;
+ } else
+ local->ctr_link_count_req = _gf_true;
+
+ LOCK_INIT(&frame->lock);
+
+ STACK_WIND(frame, trash_unlink_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
out:
- return 0;
+ return ret;
}
-
+/**
+ * Use this when a failure occurs, and delete the newly created file
+ */
int32_t
-trash_truncate_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+trash_truncate_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- trash_local_t *local = NULL;
- char *tmp_str = NULL;
- char *dir_name = NULL;
- char *tmp_path = NULL;
- int32_t flags = 0;
- loc_t tmp_loc = {0,};
-
- local = frame->local;
-
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- //Creating the directory structure here.
- tmp_str = gf_strdup (local->newpath);
- if (!tmp_str) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
- dir_name = dirname (tmp_str);
-
- tmp_path = gf_strdup (dir_name);
- if (!tmp_path) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
- tmp_loc.path = tmp_path;
-
- /* TODO: create the directory with proper permissions */
- STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk,
- tmp_path, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- &tmp_loc, 0755, NULL);
- GF_FREE (tmp_str);
- goto out;
- }
-
- if (op_ret == -1) {
- //Let truncate work, but previous copy is not preserved.
- //Deleting the newly created copy.
- gf_log (this->name, GF_LOG_DEBUG,
- "creation of new file in trash-dir failed, "
- "when truncate was called: %s", strerror (op_errno));
-
- STACK_WIND (frame, trash_common_unwind_buf_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, &local->loc,
- local->fop_offset);
- goto out;
- }
+ trash_local_t *local = NULL;
- flags = O_RDONLY;
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO("trash", local, out);
- local->fd = fd_create (local->loc.inode, frame->root->pid);
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_DEBUG, "deleting the newly created file: %s",
+ strerror(op_errno));
+ }
- STACK_WIND (frame, trash_truncate_open_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->open, &local->loc, flags,
- local->fd, 0);
+ STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, &local->loc,
+ local->fop_offset, xdata);
out:
- return 0;
+ return 0;
}
+/**
+ * Read from source file
+ */
int32_t
-trash_truncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+trash_truncate_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iovec *vector,
+ int32_t count, struct iatt *stbuf,
+ struct iobref *iobuf, dict_t *xdata)
{
- trash_local_t *local = NULL;
- char *tmp_str = NULL;
- char *tmp_path = NULL;
- char *tmp_dirname = NULL;
- char *dir_name = NULL;
- int32_t count = 0;
- int32_t flags = 0;
- int32_t loop_count = 0;
- int i = 0;
- loc_t tmp_loc = {0,};
-
- local = frame->local;
- if (!local)
- goto out;
-
- loop_count = local->loop_count;
-
- tmp_str = gf_strdup (local->newpath);
- if (!tmp_str) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- goto out;
- }
-
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- tmp_dirname = strchr (tmp_str, '/');
- while (tmp_dirname) {
- count = tmp_dirname - tmp_str;
- if (count == 0)
- count = 1;
- i++;
- if (i > loop_count)
- break;
- tmp_dirname = strchr (tmp_str + count + 1, '/');
- }
- tmp_path = memdup (local->newpath, count);
- if (!tmp_path) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
- tmp_loc.path = tmp_path;
- STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk,
- tmp_path, this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc, 0755, NULL);
-
- goto out;
- }
+ trash_local_t *local = NULL;
- if (op_ret == 0) {
- dir_name = dirname (tmp_str);
- if (strcmp ((char*)cookie, dir_name) == 0) {
- flags = O_CREAT|O_EXCL|O_WRONLY;
- ia_prot_t prot = {0, };
-
- //Call create again once directory structure is created.
- STACK_WIND (frame, trash_truncate_create_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
- &local->newloc, flags,
- st_mode_from_ia (prot, local->loc.inode->ia_type),
- local->newfd, NULL);
- goto out;
- }
- }
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO("trash", local, out);
- LOCK (&frame->lock);
- {
- loop_count = ++local->loop_count;
- }
- UNLOCK (&frame->lock);
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "readv on the existing file failed: %s", strerror(op_errno));
- tmp_dirname = strchr (tmp_str, '/');
- while (tmp_dirname) {
- count = tmp_dirname - tmp_str;
- if (count == 0)
- count = 1;
-
- i++;
- if ((i > loop_count) || (count > PATH_MAX))
- break;
- tmp_dirname = strchr (tmp_str + count + 1, '/');
- }
- tmp_path = memdup (local->newpath, count);
- if (!tmp_path) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
- tmp_loc.path = tmp_path;
+ STACK_WIND(frame, trash_truncate_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &local->newloc, 0, xdata);
+ goto out;
+ }
- STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk, tmp_path,
- this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc, 0755, NULL);
+ local->fsize = stbuf->ia_size;
+ STACK_WIND(frame, trash_truncate_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, local->newfd, vector, count,
+ local->cur_offset, 0, iobuf, xdata);
out:
- GF_FREE (cookie); /* strdup (dir_name) was sent here :) */
- if (tmp_str)
- GF_FREE (tmp_str);
-
- return 0;
+ return 0;
}
-
+/**
+ * Write to file created in trash directory
+ */
int32_t
-trash_truncate_stat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+trash_truncate_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *prebuf,
+ struct iatt *postbuf, dict_t *xdata)
{
- trash_private_t *priv = NULL;
- trash_local_t *local = NULL;
- char timestr[64] = {0,};
- char loc_newname[PATH_MAX] = {0,};
- int32_t flags = 0;
-
- priv = this->private;
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "fstat on the file failed: %s",
- strerror (op_errno));
-
- TRASH_STACK_UNWIND (truncate, frame, op_ret, op_errno, buf, NULL);
- return 0;
- }
+ trash_local_t *local = NULL;
+
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO("trash", local, out);
+
+ if (op_ret == -1) {
+ /* Let truncate work, but previous copy is not preserved. */
+ gf_log(this->name, GF_LOG_DEBUG,
+ "writev on the existing file failed: %s", strerror(op_errno));
+
+ STACK_WIND(frame, trash_truncate_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &local->newloc, 0, xdata);
+ goto out;
+ }
+
+ if (local->cur_offset < local->fsize) {
+ local->cur_offset += GF_BLOCK_READV_SIZE;
+ /* Loop back and Read the contents again. */
+ STACK_WIND(frame, trash_truncate_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, local->fd,
+ (size_t)GF_BLOCK_READV_SIZE, local->cur_offset, 0, xdata);
+ goto out;
+ }
+
+ /* OOFH.....Finally calling Truncate. */
+ STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, &local->loc,
+ local->fop_offset, xdata);
- if ((buf->ia_size == 0) || (buf->ia_size > priv->max_trash_file_size)) {
- // If the file is too big, just unlink it.
- if (buf->ia_size > priv->max_trash_file_size)
- gf_log (this->name, GF_LOG_DEBUG, "%s: file too big, "
- "not moving to trash", local->loc.path);
-
- STACK_WIND (frame, trash_common_unwind_buf_cbk,
- this->children->xlator,
- this->children->xlator->fops->truncate,
- &local->loc, local->fop_offset);
- return 0;
- }
-
- strcpy (local->newpath, priv->trash_dir);
- strcat (local->newpath, local->loc.path);
-
- {
- gf_time_fmt (timestr, sizeof timestr, time (NULL),
- gf_timefmt_F_HMS);
- strcat (local->newpath, timestr);
- }
- strcpy (loc_newname,local->loc.name);
- strcat (loc_newname,timestr);
-
- local->newloc.name = gf_strdup (loc_newname);
- local->newloc.path = gf_strdup (local->newpath);
- local->newloc.inode = inode_new (local->loc.inode->table);
- local->newfd = fd_create (local->newloc.inode, frame->root->pid);
-
- flags = O_CREAT|O_EXCL|O_WRONLY;
-
- STACK_WIND (frame, trash_truncate_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- &local->newloc, flags,
- st_mode_from_ia (buf->ia_prot, local->loc.inode->ia_type),
- local->newfd, NULL);
-
- return 0;
+out:
+ return 0;
}
+/**
+ * The source file is opened for reading and writing
+ */
int32_t
-trash_truncate (call_frame_t *frame, xlator_t *this, loc_t *loc,
- off_t offset)
+trash_truncate_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ dict_t *xdata)
{
- trash_elim_pattern_t *trav = NULL;
- trash_private_t *priv = NULL;
- trash_local_t *local = NULL;
- int32_t match = 0;
-
- priv = this->private;
- if (priv->eliminate) {
- trav = priv->eliminate;
- while (trav) {
- if (fnmatch(trav->pattern, loc->name, 0) == 0) {
- match++;
- break;
- }
- trav = trav->next;
- }
- }
+ trash_local_t *local = NULL;
- if ((strncmp (loc->path, priv->trash_dir,
- strlen (priv->trash_dir)) == 0) || (offset) || (match)) {
- if (match) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: file not moved to trash as per option "
- "'eliminate'", loc->path);
- }
-
- // Trying to truncate from the trash can dir,
- // do the actual truncate without moving to trash-dir.
- STACK_WIND (frame, trash_common_unwind_buf_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate, loc, offset);
- goto out;
- }
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO("trash", local, out);
- LOCK_INIT (&frame->lock);
+ if (op_ret == -1) {
+ /* Let truncate work, but previous copy is not preserved. */
+ gf_log(this->name, GF_LOG_DEBUG, "open on the existing file failed: %s",
+ strerror(op_errno));
- local = mem_get0 (this->local_pool);
- if (!local) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- TRASH_STACK_UNWIND (truncate, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
-
- loc_copy (&local->loc, loc);
+ STACK_WIND(frame, trash_truncate_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, &local->newloc, 0, xdata);
+ goto out;
+ }
- local->fop_offset = offset;
+ fd_bind(fd);
- frame->local = local;
+ local->cur_offset = 0;
- STACK_WIND (frame, trash_truncate_stat_cbk,
- this->children->xlator,
- this->children->xlator->fops->stat, loc);
+ STACK_WIND(frame, trash_truncate_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, local->fd,
+ (size_t)GF_BLOCK_READV_SIZE, local->cur_offset, 0, xdata);
out:
- return 0;
+ return 0;
}
+/**
+ * Creates new file descriptor for read and write operations,
+ * if the path is present in trash directory
+ */
int32_t
-trash_ftruncate_unlink_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *preparent, struct iatt *postparent)
+trash_truncate_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd,
+ inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
{
- trash_local_t *local = NULL;
-
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: failed to unlink new file: %s",
- local->newloc.path, strerror(op_errno));
-
+ trash_local_t *local = NULL;
+ char *tmp_str = NULL;
+ char *dir_name = NULL;
+ char *tmp_path = NULL;
+ int32_t flags = 0;
+ loc_t tmp_loc = {
+ 0,
+ };
+ char *tmp_stat = NULL;
+ char real_path[PATH_MAX] = {
+ 0,
+ };
+ trash_private_t *priv = NULL;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
+
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO("trash", local, out);
+
+ TRASH_UNSET_PID(frame, local);
+
+ /* Checks whether path is present in trash directory or not */
+
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ /* Creating the directory structure here. */
+ tmp_str = gf_strdup(local->newpath);
+ if (!tmp_str) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
}
+ dir_name = dirname(tmp_str);
- STACK_WIND (frame, trash_common_unwind_buf_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->ftruncate,
- local->fd, local->fop_offset);
+ tmp_path = gf_strdup(dir_name);
+ if (!tmp_path) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+ loc_copy(&tmp_loc, &local->newloc);
+ tmp_loc.path = gf_strdup(tmp_path);
+ if (!tmp_loc.path) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+ strncpy(real_path, priv->brick_path, sizeof(real_path));
+ real_path[sizeof(real_path) - 1] = 0;
+ remove_trash_path(tmp_path, (frame->root->pid < 0), &tmp_stat);
+ if (tmp_stat)
+ strncat(real_path, tmp_stat,
+ sizeof(real_path) - strlen(real_path) - 1);
+
+ TRASH_SET_PID(frame, local);
+
+ /* create the directory with proper permissions */
+ STACK_WIND_COOKIE(frame, trash_truncate_mkdir_cbk, tmp_path,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ &tmp_loc, get_permission(real_path), 0022, xdata);
+ loc_wipe(&tmp_loc);
+ goto out;
+ }
+
+ if (op_ret == -1) {
+ /* Let truncate work, but previous copy is not preserved.
+ * Deleting the newly created copy.
+ */
+ gf_log(this->name, GF_LOG_DEBUG,
+ "creation of new file in trash-dir failed, "
+ "when truncate was called: %s",
+ strerror(op_errno));
+
+ STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, &local->loc,
+ local->fop_offset, xdata);
+ goto out;
+ }
+
+ fd_bind(fd);
+ flags = O_RDONLY;
+
+ /* fd which represents source file for reading and writing from it */
+
+ local->fd = fd_create(local->loc.inode, frame->root->pid);
+
+ STACK_WIND(frame, trash_truncate_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, &local->loc, flags, local->fd, 0);
+out:
+ if (tmp_str)
+ GF_FREE(tmp_str);
+ if (tmp_path)
+ GF_FREE(tmp_path);
- return 0;
+ return 0;
}
+/**
+ * If the path is not present in the trash directory,it will recursively call
+ * this call-back and one by one directories will be created from the
+ * beginning
+ */
int32_t
-trash_ftruncate_writev_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iatt *prebuf, struct iatt *postbuf)
+trash_truncate_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
{
- trash_local_t *local = NULL;
-
- local = frame->local;
-
- if (op_ret == -1) {
- STACK_WIND (frame, trash_ftruncate_unlink_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
- &local->newloc);
- return 0;
- }
-
- if (local->cur_offset < local->fsize) {
- local->cur_offset += GF_BLOCK_READV_SIZE;
- STACK_WIND (frame, trash_ftruncate_readv_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
- local->fd, (size_t)GF_BLOCK_READV_SIZE,
- local->cur_offset, 0);
- return 0;
- }
+ trash_local_t *local = NULL;
+ trash_private_t *priv = NULL;
+ char *tmp_str = NULL;
+ char *tmp_path = NULL;
+ char *tmp_dirname = NULL;
+ char *dir_name = NULL;
+ char *tmp_stat = NULL;
+ char real_path[PATH_MAX] = {
+ 0,
+ };
+ size_t count = 0;
+ int32_t flags = 0;
+ int32_t loop_count = 0;
+ int i = 0;
+ loc_t tmp_loc = {
+ 0,
+ };
+ int ret = 0;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
+
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO("trash", local, out);
+
+ loop_count = local->loop_count;
+
+ TRASH_UNSET_PID(frame, local);
+
+ tmp_str = gf_strdup(local->newpath);
+ if (!tmp_str) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+
+ if ((op_ret == -1) && (op_errno == ENOENT)) {
+ tmp_dirname = strchr(tmp_str, '/');
+ while (tmp_dirname) {
+ count = tmp_dirname - tmp_str;
+ if (count == 0)
+ count = 1;
+ i++;
+ if (i > loop_count)
+ break;
+ tmp_dirname = strchr(tmp_str + count + 1, '/');
+ }
+ tmp_path = gf_memdup(local->newpath, count + 1);
+ if (!tmp_path) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ tmp_path[count] = '\0';
+
+ loc_copy(&tmp_loc, &local->newloc);
+ tmp_loc.path = gf_strdup(tmp_path);
+ if (!tmp_loc.path) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+
+ /* Stores the the name of directory to be created */
+ tmp_loc.name = gf_strdup(strrchr(tmp_path, '/') + 1);
+ if (!tmp_loc.name) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ strncpy(real_path, priv->brick_path, sizeof(real_path));
+ real_path[sizeof(real_path) - 1] = 0;
+ remove_trash_path(tmp_path, (frame->root->pid < 0), &tmp_stat);
+ if (tmp_stat)
+ strncat(real_path, tmp_stat,
+ sizeof(real_path) - strlen(real_path) - 1);
+
+ TRASH_SET_PID(frame, local);
+
+ STACK_WIND_COOKIE(frame, trash_truncate_mkdir_cbk, tmp_path,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ &tmp_loc, get_permission(real_path), 0022, xdata);
+ loc_wipe(&tmp_loc);
+ goto out;
+ }
+
+ if (op_ret == 0) {
+ dir_name = dirname(tmp_str);
+ if (strcmp((char *)cookie, dir_name) == 0) {
+ flags = O_CREAT | O_EXCL | O_WRONLY;
+ strncpy(real_path, priv->brick_path, sizeof(real_path));
+ real_path[sizeof(real_path) - 1] = 0;
+ strncat(real_path, local->origpath,
+ sizeof(real_path) - strlen(real_path) - 1);
+ /* Call create again once directory structure
+ is created. */
+
+ TRASH_SET_PID(frame, local);
+
+ STACK_WIND(frame, trash_truncate_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, &local->newloc, flags,
+ get_permission(real_path), 0022, local->newfd, xdata);
+ goto out;
+ }
+ }
+
+ if ((op_ret == -1) && (op_errno != EEXIST)) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Directory creation failed [%s]. "
+ "Therefore truncating %s without moving the "
+ "original copy to trash directory",
+ strerror(op_errno), local->loc.name);
+ STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, &local->loc,
+ local->fop_offset, xdata);
+ goto out;
+ }
+
+ LOCK(&frame->lock);
+ {
+ loop_count = ++local->loop_count;
+ }
+ UNLOCK(&frame->lock);
+
+ tmp_dirname = strchr(tmp_str, '/');
+ while (tmp_dirname) {
+ count = tmp_dirname - tmp_str;
+ if (count == 0)
+ count = 1;
+ i++;
+ if (i > loop_count)
+ break;
+ tmp_dirname = strchr(tmp_str + count + 1, '/');
+ }
+ tmp_path = gf_memdup(local->newpath, count + 1);
+ if (!tmp_path) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ tmp_path[count] = '\0';
+
+ loc_copy(&tmp_loc, &local->newloc);
+ tmp_loc.path = gf_strdup(tmp_path);
+ if (!tmp_loc.path) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+
+ /* Stores the the name of directory to be created */
+ tmp_loc.name = gf_strdup(strrchr(tmp_path, '/') + 1);
+ if (!tmp_loc.name) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+
+ strncpy(real_path, priv->brick_path, sizeof(real_path));
+ real_path[sizeof(real_path) - 1] = 0;
+ remove_trash_path(tmp_path, (frame->root->pid < 0), &tmp_stat);
+ if (tmp_stat)
+ strncat(real_path, tmp_stat, sizeof(real_path) - strlen(real_path) - 1);
+
+ TRASH_SET_PID(frame, local);
+
+ STACK_WIND_COOKIE(frame, trash_truncate_mkdir_cbk, tmp_path,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->mkdir,
+ &tmp_loc, get_permission(real_path), 0022, xdata);
- STACK_WIND (frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate, local->fd,
- local->fop_offset);
+out:
+ if (tmp_str)
+ GF_FREE(tmp_str);
+ if (tmp_path)
+ GF_FREE(tmp_path);
- return 0;
+ return ret;
}
-
int32_t
-trash_ftruncate_readv_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- struct iovec *vector, int32_t count,
- struct iatt *stbuf, struct iobref *iobuf)
+trash_truncate_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *buf,
+ dict_t *xdata)
{
- trash_local_t *local = NULL;
-
- local = frame->local;
- local->fsize = stbuf->ia_size;
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;
+ char loc_newname[PATH_MAX] = {
+ 0,
+ };
+ int32_t flags = 0;
+ dentry_t *dir_entry = NULL;
+ inode_table_t *table = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
+
+ local = frame->local;
+ GF_VALIDATE_OR_GOTO("trash", local, out);
+
+ table = local->loc.inode->table;
+
+ pthread_mutex_lock(&table->lock);
+ {
+ dir_entry = __dentry_search_arbit(local->loc.inode);
+ }
+ pthread_mutex_unlock(&table->lock);
+
+ if (op_ret == -1) {
+ gf_log(this->name, GF_LOG_DEBUG, "fstat on the file failed: %s",
+ strerror(op_errno));
+
+ TRASH_STACK_UNWIND(truncate, frame, op_ret, op_errno, buf, NULL, xdata);
+ goto out;
+ }
+
+ /* Only last hardlink will be moved to trash directory */
+ if (buf->ia_nlink > 1) {
+ STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, &local->loc,
+ local->fop_offset, xdata);
+ goto out;
+ }
+
+ /**
+ * If the file is too big or if it is extended truncate,
+ * just don't move it to trash directory.
+ */
+ if (buf->ia_size > (priv->max_trash_file_size) ||
+ buf->ia_size <= local->fop_offset) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "%s: file is too large to move to trash", local->loc.path);
+
+ STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, &local->loc,
+ local->fop_offset, xdata);
+ goto out;
+ }
+
+ /* Retrieves the name of file from path */
+ local->loc.name = gf_strdup(strrchr(local->loc.path, '/'));
+ if (!local->loc.name) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+
+ /* Stores new path for source file */
+ copy_trash_path(priv->newtrash_dir, (frame->root->pid < 0), local->newpath,
+ sizeof(local->newpath));
+ strncat(local->newpath, local->loc.path,
+ sizeof(local->newpath) - strlen(local->newpath) - 1);
+
+ /* append timestamp to file name so that we can avoid
+ name collisions inside trash */
+ append_time_stamp(local->newpath, sizeof(local->newpath));
+ if (strlen(local->newpath) > PATH_MAX) {
+ STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, &local->loc,
+ local->fop_offset, xdata);
+ goto out;
+ }
+
+ strncpy(loc_newname, local->loc.name, sizeof(loc_newname));
+ loc_newname[sizeof(loc_newname) - 1] = 0;
+ append_time_stamp(loc_newname, sizeof(loc_newname));
+ /* local->newloc represents old file(file inside trash),
+ where as local->loc represents truncated file. We need
+ to create new inode and fd for new file*/
+ local->newloc.name = gf_strdup(loc_newname);
+ if (!local->newloc.name) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ local->newloc.path = gf_strdup(local->newpath);
+ if (!local->newloc.path) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ local->newloc.inode = inode_new(local->loc.inode->table);
+ local->newfd = fd_create(local->newloc.inode, frame->root->pid);
+
+ /* Creating valid parent and pargfids for both files */
+
+ if (dir_entry == NULL) {
+ ret = EINVAL;
+ goto out;
+ }
+ local->loc.parent = inode_ref(dir_entry->parent);
+ gf_uuid_copy(local->loc.pargfid, dir_entry->parent->gfid);
+
+ local->newloc.parent = inode_ref(dir_entry->parent);
+ gf_uuid_copy(local->newloc.pargfid, dir_entry->parent->gfid);
+
+ flags = O_CREAT | O_EXCL | O_WRONLY;
+
+ TRASH_SET_PID(frame, local);
+
+ STACK_WIND(frame, trash_truncate_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, &local->newloc, flags,
+ st_mode_from_ia(buf->ia_prot, local->loc.inode->ia_type), 0022,
+ local->newfd, xdata);
- if (op_ret == -1) {
- STACK_WIND (frame, trash_ftruncate_unlink_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->unlink,
- &local->newloc);
- return 0;
- }
+out:
+ return ret;
+}
- STACK_WIND (frame, trash_ftruncate_writev_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
- local->newfd, vector, count, local->cur_offset, 0, NULL);
+/**
+ * Truncate can be explicitly called or implicitly by some other applications
+ * like text editors etc..
+ */
+int32_t
+trash_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL;
+ int32_t match = 0;
+ char *pathbuf = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
+ /* If trash is not active or not enabled through cli, then
+ * we bypass and wind back
+ */
+ if (!priv->state) {
+ STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ goto out;
+ }
+
+ /* The files removed by gluster operations such as self-heal,
+ should moved to trash directory, but files by client should
+ not moved */
+ if ((frame->root->pid < 0) && !priv->internal) {
+ STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ goto out;
+ }
+ /* This will be more accurate */
+ inode_path(loc->inode, NULL, &pathbuf);
+
+ /* Checks whether file is in trash directory or eliminate path.
+ * In all such cases it does not move to trash directory,
+ * truncate will be performed
+ */
+ match = check_whether_eliminate_path(priv->eliminate, pathbuf);
+
+ if ((strncmp(pathbuf, priv->newtrash_dir, strlen(priv->newtrash_dir)) ==
+ 0) ||
+ (match)) {
+ if (match) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "%s: file not moved to trash as per option "
+ "'eliminate path'",
+ loc->path);
+ }
+
+ /* Trying to truncate from the trash-dir. So do the
+ * actual truncate without moving to trash-dir.
+ */
+ STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+ goto out;
+ }
+
+ LOCK_INIT(&frame->lock);
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ TRASH_STACK_UNWIND(truncate, frame, -1, ENOMEM, NULL, NULL, xdata);
+ ret = ENOMEM;
+ goto out;
+ }
+
+ strncpy(local->origpath, pathbuf, sizeof(local->origpath));
+ local->origpath[sizeof(local->origpath) - 1] = 0;
+
+ loc_copy(&local->loc, loc);
+ local->loc.path = pathbuf;
+ local->fop_offset = offset;
+
+ frame->local = local;
+
+ STACK_WIND(frame, trash_truncate_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
- return 0;
+out:
+ return ret;
}
+/**
+ * When we call truncate from terminal it comes to ftruncate of trash-xlator.
+ * Since truncate internally calls ftruncate and we receive fd of the file,
+ * other than that it also called by Rebalance operation
+ */
+int32_t
+trash_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ trash_private_t *priv = NULL;
+ trash_local_t *local = NULL; /* file inside trash */
+ char *pathbuf = NULL; /* path of file from fd */
+ int32_t retval = 0;
+ int32_t match = 0;
+ int ret = 0;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
+ /* If trash is not active or not enabled through cli, then
+ * we bypass and wind back
+ */
+ if (!priv->state) {
+ STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ goto out;
+ }
+
+ /* The files removed by gluster operations such as self-heal,
+ * should moved to trash directory, but files by client
+ * should not moved
+ */
+ if ((frame->root->pid < 0) && !priv->internal) {
+ STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ goto out;
+ }
+ /* This will be more accurate */
+ retval = inode_path(fd->inode, NULL, &pathbuf);
+
+ /* Checking the eliminate path */
+
+ /* Checks whether file is trash directory or eliminate path or
+ * invalid fd. In all such cases it does not move to trash directory,
+ * ftruncate will be performed
+ */
+ match = check_whether_eliminate_path(priv->eliminate, pathbuf);
+ if ((strncmp(pathbuf, priv->newtrash_dir, strlen(priv->newtrash_dir)) ==
+ 0) ||
+ match || !retval) {
+ if (match) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "%s: file matches eliminate path, "
+ "not moved to trash",
+ pathbuf);
+ }
+
+ /* Trying to ftruncate from the trash-dir. So do the
+ * actual ftruncate without moving to trash-dir
+ */
+ STACK_WIND(frame, trash_common_unwind_buf_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+ goto out;
+ }
+
+ local = mem_get0(this->local_pool);
+ if (!local) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ TRASH_STACK_UNWIND(ftruncate, frame, -1, ENOMEM, NULL, NULL, xdata);
+ ret = -1;
+ goto out;
+ }
+
+ strncpy(local->origpath, pathbuf, sizeof(local->origpath));
+ local->origpath[sizeof(local->origpath) - 1] = 0;
+
+ /* To convert fd to location */
+ frame->local = local;
+
+ local->loc.path = pathbuf;
+ local->loc.inode = inode_ref(fd->inode);
+ gf_uuid_copy(local->loc.gfid, local->loc.inode->gfid);
+
+ local->fop_offset = offset;
+
+ /* Else remains same to truncate code, so from here flow goes
+ * to truncate_stat
+ */
+ STACK_WIND(frame, trash_truncate_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+out:
+ return ret;
+}
+/**
+ * The mkdir call is intercepted to avoid creation of
+ * trash directory in the mount by the user
+ */
int32_t
-trash_ftruncate_create_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, fd_t *fd,
- inode_t *inode, struct iatt *buf,
- struct iatt *preparent, struct iatt *postparent)
+trash_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
{
- trash_local_t *local = NULL;
- char *tmp_str = NULL;
- char *dir_name = NULL;
- char *tmp_path = NULL;
- loc_t tmp_loc = {0,};
-
- local = frame->local;
-
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- tmp_str = gf_strdup (local->newpath);
- if (!tmp_str) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
- dir_name = dirname (tmp_str);
-
- tmp_path = gf_strdup (dir_name);
- if (!tmp_path) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
- tmp_loc.path = tmp_path;
-
- /* TODO: create the directory with proper permissions */
- STACK_WIND_COOKIE (frame, trash_truncate_mkdir_cbk,
- tmp_path, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- &tmp_loc, 0755, NULL);
- GF_FREE (tmp_str);
- return 0;
- }
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ trash_private_t *priv = NULL;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
+
+ if (!check_whether_op_permitted(priv, loc)) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "mkdir issued on %s, which is not permitted",
+ priv->newtrash_dir);
+ op_errno = EPERM;
+ op_ret = -1;
+
+ STACK_UNWIND_STRICT(mkdir, frame, op_ret, op_errno, NULL, NULL, NULL,
+ NULL, xdata);
+ } else {
+ STACK_WIND(frame, trash_common_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, xdata);
+ }
- if (op_ret == -1) {
- STACK_WIND (frame, trash_common_unwind_buf_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- local->fd, local->fop_offset);
- return 0;
- }
+out:
+ return 0;
+}
- STACK_WIND (frame, trash_ftruncate_readv_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readv, local->fd,
- (size_t)GF_BLOCK_READV_SIZE, local->cur_offset, 0);
+/**
+ * The rename call is intercepted to avoid renaming
+ * of trash directory in the mount by the user
+ */
+int
+trash_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ trash_private_t *priv = NULL;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
+
+ if (!check_whether_op_permitted(priv, oldloc)) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "rename issued on %s, which is not permitted",
+ priv->newtrash_dir);
+ op_errno = EPERM;
+ op_ret = -1;
+
+ STACK_UNWIND_STRICT(rename, frame, op_ret, op_errno, NULL, NULL, NULL,
+ NULL, NULL, xdata);
+ } else {
+ STACK_WIND(frame, trash_common_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+ }
- return 0;
+out:
+ return 0;
}
-
+/**
+ * The rmdir call is intercepted to avoid deletion of
+ * trash directory in the mount by the user
+ */
int32_t
-trash_ftruncate_mkdir_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, inode_t *inode,
- struct iatt *stbuf, struct iatt *preparent,
- struct iatt *postparent)
+trash_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
{
- trash_local_t *local = NULL;
- char *tmp_str = NULL;
- char *tmp_path = NULL;
- char *tmp_dirname = NULL;
- char *dir_name = NULL;
- int32_t count = 0;
- int32_t flags = 0;
- int32_t loop_count = 0;
- int i = 0;
- loc_t tmp_loc = {0,};
-
- local = frame->local;
- if (!local)
- goto out;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ trash_private_t *priv = NULL;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
+
+ if (!check_whether_op_permitted(priv, loc)) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "rmdir issued on %s, which is not permitted",
+ priv->newtrash_dir);
+ op_errno = EPERM;
+ op_ret = -1;
+
+ STACK_UNWIND_STRICT(rmdir, frame, op_ret, op_errno, NULL, NULL, xdata);
+ } else {
+ STACK_WIND(frame, trash_common_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
+ }
- loop_count = local->loop_count;
+out:
+ return 0;
+}
- tmp_str = gf_strdup (local->newpath);
- if (!tmp_str) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
+/**
+ * Volume set option is handled by the reconfigure function.
+ * Here we checks whether each option is set or not ,if it
+ * sets then corresponding modifciations will be made
+ */
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ uint64_t max_fsize = 0;
+ int ret = 0;
+ char *tmp = NULL;
+ char *tmp_str = NULL;
+ trash_private_t *priv = NULL;
+ char trash_dir[PATH_MAX] = {
+ 0,
+ };
+
+ priv = this->private;
+
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
+
+ GF_OPTION_RECONF("trash-internal-op", priv->internal, options, bool, out);
+ GF_OPTION_RECONF("trash-dir", tmp, options, str, out);
+
+ GF_OPTION_RECONF("trash", priv->state, options, bool, out);
+
+ if (priv->state) {
+ ret = create_or_rename_trash_directory(this);
+
+ if (tmp)
+ sprintf(trash_dir, "/%s/", tmp);
+ else
+ sprintf(trash_dir, "%s", priv->oldtrash_dir);
+
+ if (strcmp(priv->newtrash_dir, trash_dir) != 0) {
+ /* When user set a new name for trash directory, trash
+ * xlator will perform a rename operation on old trash
+ * directory to the new one using a STACK_WIND from here.
+ * This option can be configured only when volume is in
+ * started state
+ */
+
+ GF_FREE(priv->newtrash_dir);
+
+ priv->newtrash_dir = gf_strdup(trash_dir);
+ if (!priv->newtrash_dir) {
+ ret = ENOMEM;
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
goto out;
- }
-
- if ((op_ret == -1) && (op_errno == ENOENT)) {
- tmp_dirname = strchr (tmp_str, '/');
- while (tmp_dirname) {
- count = tmp_dirname - tmp_str;
- if (count == 0)
- count = 1;
- i++;
- if (i > loop_count)
- break;
- tmp_dirname = strchr (tmp_str + count + 1, '/');
- }
- tmp_path = memdup (local->newpath, count);
- if (!tmp_path) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
- tmp_loc.path = tmp_path;
- STACK_WIND_COOKIE (frame, trash_ftruncate_mkdir_cbk,
- tmp_path, this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc, 0755, NULL);
-
+ }
+ gf_log(this->name, GF_LOG_DEBUG,
+ "Renaming %s -> %s from reconfigure", priv->oldtrash_dir,
+ priv->newtrash_dir);
+
+ if (!priv->newtrash_dir) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
goto out;
+ }
+ ret = rename_trash_directory(this);
+ }
+
+ if (priv->internal) {
+ ret = create_internalop_directory(this);
+ }
+ }
+ tmp = NULL;
+
+ GF_OPTION_RECONF("trash-max-filesize", max_fsize, options, size_uint64,
+ out);
+ if (max_fsize) {
+ priv->max_trash_file_size = max_fsize;
+ gf_log(this->name, GF_LOG_DEBUG, "%" GF_PRI_SIZET " max-size",
+ priv->max_trash_file_size);
+ }
+ GF_OPTION_RECONF("trash-eliminate-path", tmp, options, str, out);
+ if (!tmp) {
+ gf_log(this->name, GF_LOG_DEBUG,
+ "no option specified for 'eliminate', using NULL");
+ } else {
+ if (priv->eliminate)
+ wipe_eliminate_path(&priv->eliminate);
+
+ tmp_str = gf_strdup(tmp);
+ if (!tmp_str) {
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ ret = ENOMEM;
+ goto out;
}
+ ret = store_eliminate_path(tmp_str, &priv->eliminate);
+ }
- if (op_ret == 0) {
- dir_name = dirname (tmp_str);
- if (strcmp ((char*)cookie, dir_name) == 0) {
- ia_prot_t prot = {0, };
- flags = O_CREAT|O_EXCL|O_WRONLY;
-
- //Call create again once directory structure is created.
- STACK_WIND (frame, trash_ftruncate_create_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create,
- &local->newloc, flags,
- st_mode_from_ia (prot, local->loc.inode->ia_type),
- local->newfd, NULL);
- goto out;
- }
- }
+out:
- LOCK (&frame->lock);
- {
- loop_count = ++local->loop_count;
- }
- UNLOCK (&frame->lock);
- tmp_dirname = strchr (tmp_str, '/');
- while (tmp_dirname) {
- count = tmp_dirname - tmp_str;
- if (count == 0)
- count = 1;
-
- i++;
- if ((i > loop_count) || (count > PATH_MAX))
- break;
- tmp_dirname = strchr (tmp_str + count + 1, '/');
- }
- tmp_path = memdup (local->newpath, count);
- if (!tmp_path) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
- tmp_loc.path = tmp_path;
+ return ret;
+}
- STACK_WIND_COOKIE (frame, trash_ftruncate_mkdir_cbk, tmp_path,
- this->children->xlator,
- this->children->xlator->fops->mkdir,
- &tmp_loc, 0755, NULL);
+/**
+ * Notify is used to create the trash directory with fixed gfid
+ * using STACK_WIND only when posix xlator is up
+ */
+int
+notify(xlator_t *this, int event, void *data, ...)
+{
+ trash_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("trash", priv, out);
+
+ /* Check whether posix is up not */
+ if (event == GF_EVENT_CHILD_UP) {
+ if (!priv->state) {
+ gf_log(this->name, GF_LOG_DEBUG, "trash xlator is off");
+ goto out;
+ }
+
+ /* Here there is two possibilities ,if trash directory already
+ * exist ,then we need to perform a rename operation on the
+ * old one. Otherwise, we need to create the trash directory
+ * For both, we need to pass location variable, gfid of parent
+ * and a frame for calling STACK_WIND.The location variable
+ * requires name,path,gfid and inode
+ */
+ if (!priv->oldtrash_dir)
+ ret = create_or_rename_trash_directory(this);
+ else if (strcmp(priv->newtrash_dir, priv->oldtrash_dir) != 0)
+ ret = rename_trash_directory(this);
+ if (ret)
+ goto out;
+
+ if (priv->internal)
+ (void)create_internalop_directory(this);
+ }
out:
- GF_FREE (cookie); /* strdup (dir_name) was sent here :) */
- if (tmp_str)
- GF_FREE (tmp_str);
-
- return 0;
+ ret = default_notify(this, event, data);
+ if (ret)
+ gf_log(this->name, GF_LOG_INFO, "default notify event failed");
+ return ret;
}
-
int32_t
-trash_ftruncate_fstat_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int32_t op_ret, int32_t op_errno, struct iatt *buf)
+mem_acct_init(xlator_t *this)
{
- trash_private_t *priv = NULL;
- trash_local_t *local = NULL;
-
- priv = this->private;
- local = frame->local;
-
- if (op_ret == -1) {
- gf_log (this->name, GF_LOG_DEBUG,
- "%s: %s",local->newloc.path, strerror(op_errno));
+ int ret = -1;
- TRASH_STACK_UNWIND (ftruncate, frame, -1, op_errno, buf, NULL);
- return 0;
- }
- if ((buf->ia_size == 0) || (buf->ia_size > priv->max_trash_file_size))
- {
- STACK_WIND (frame, trash_common_unwind_buf_cbk,
- this->children->xlator,
- this->children->xlator->fops->ftruncate,
- local->fd, local->fop_offset);
- return 0;
- }
-
-
- STACK_WIND (frame, trash_ftruncate_create_cbk, FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->create, &local->newloc,
- ( O_CREAT | O_EXCL | O_WRONLY ),
- st_mode_from_ia (buf->ia_prot, local->loc.inode->ia_type),
- local->newfd, NULL);
+ GF_VALIDATE_OR_GOTO("trash", this, out);
- return 0;
+ ret = xlator_mem_acct_init(this, gf_trash_mt_end + 1);
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Memory accounting init"
+ "failed");
+ return ret;
+ }
+out:
+ return ret;
}
+/**
+ * trash_init
+ */
int32_t
-trash_ftruncate (call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset)
+init(xlator_t *this)
{
- trash_elim_pattern_t *trav = NULL;
- trash_private_t *priv = NULL;
- trash_local_t *local = NULL;
- dentry_t *dir_entry = NULL;
- char *pathbuf = NULL;
- inode_t *newinode = NULL;
- char timestr[64];
- int32_t retval = 0;
- int32_t match = 0;
-
- priv = this->private;
-
- dir_entry = __dentry_search_arbit (fd->inode);
- retval = inode_path (fd->inode, NULL, &pathbuf);
-
- if (priv->eliminate) {
- trav = priv->eliminate;
- while (trav) {
- if (fnmatch(trav->pattern, dir_entry->name, 0) == 0) {
- match++;
- break;
- }
- trav = trav->next;
- }
- }
-
- if ((strncmp (pathbuf, priv->trash_dir,
- strlen (priv->trash_dir)) == 0) ||
- (offset >= priv->max_trash_file_size) ||
- (!retval) ||
- match) {
- STACK_WIND (frame, trash_common_unwind_buf_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd, offset);
- return 0;
- }
-
- local = mem_get0 (this->local_pool);
- if (!local) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- TRASH_STACK_UNWIND (ftruncate, frame, -1, ENOMEM, NULL, NULL);
- return 0;
- }
-
- gf_time_fmt (timestr, sizeof timestr, time (NULL), gf_timefmt_F_HMS);
- strcpy (local->newpath, priv->trash_dir);
- strcat (local->newpath, pathbuf);
- strcat (local->newpath, timestr);
-
- local->fd = fd_ref (fd);
- newinode = inode_new (fd->inode->table);
- local->newfd = fd_create (newinode, frame->root->pid);
- frame->local=local;
-
- local->newloc.inode = newinode;
- local->newloc.path = local->newpath;
-
- local->loc.inode = inode_ref (fd->inode);
- local->loc.path = pathbuf;
-
- local->fop_offset = offset;
- local->cur_offset = offset;
-
- STACK_WIND (frame, trash_ftruncate_fstat_cbk, this->children->xlator,
- this->children->xlator->fops->fstat, fd);
+ trash_private_t *priv = NULL;
+ int ret = -1;
+ char *tmp = NULL;
+ char *tmp_str = NULL;
+ char trash_dir[PATH_MAX] = {
+ 0,
+ };
+ uint64_t max_trash_file_size64 = 0;
+ data_t *data = NULL;
+
+ GF_VALIDATE_OR_GOTO("trash", this, out);
+
+ if (!this->children || this->children->next) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "not configured with exactly one child. exiting");
+ ret = -1;
+ goto out;
+ }
+
+ if (!this->parents) {
+ gf_log(this->name, GF_LOG_WARNING, "dangling volume. check volfile");
+ }
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_trash_mt_trash_private_t);
+ if (!priv) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+
+ /* Trash priv data members are initialized through the following
+ * set of statements
+ */
+ GF_OPTION_INIT("trash", priv->state, bool, out);
+
+ GF_OPTION_INIT("trash-dir", tmp, str, out);
+
+ /* We store trash dir value as path for easier manipulation*/
+ if (!tmp) {
+ gf_log(this->name, GF_LOG_INFO,
+ "no option specified for 'trash-dir', "
+ "using \"/.trashcan/\"");
+ priv->newtrash_dir = gf_strdup("/.trashcan/");
+ if (!priv->newtrash_dir) {
+ ret = ENOMEM;
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+ } else {
+ sprintf(trash_dir, "/%s/", tmp);
+ priv->newtrash_dir = gf_strdup(trash_dir);
+ if (!priv->newtrash_dir) {
+ ret = ENOMEM;
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+ }
+ tmp = NULL;
+
+ GF_OPTION_INIT("trash-eliminate-path", tmp, str, out);
+ if (!tmp) {
+ gf_log(this->name, GF_LOG_INFO,
+ "no option specified for 'eliminate', using NULL");
+ } else {
+ tmp_str = gf_strdup(tmp);
+ if (!tmp_str) {
+ gf_log(this->name, GF_LOG_ERROR, "out of memory");
+ ret = ENOMEM;
+ goto out;
+ }
+ ret = store_eliminate_path(tmp_str, &priv->eliminate);
+ }
+ tmp = NULL;
+
+ GF_OPTION_INIT("trash-max-filesize", max_trash_file_size64, size_uint64,
+ out);
+ if (!max_trash_file_size64) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "no option specified for 'max-trashable-file-size', "
+ "using default = %lld MB",
+ GF_DEFAULT_MAX_FILE_SIZE / GF_UNIT_MB);
+ priv->max_trash_file_size = GF_DEFAULT_MAX_FILE_SIZE;
+ } else {
+ priv->max_trash_file_size = max_trash_file_size64;
+ gf_log(this->name, GF_LOG_DEBUG, "%" GF_PRI_SIZET " max-size",
+ priv->max_trash_file_size);
+ }
+
+ GF_OPTION_INIT("trash-internal-op", priv->internal, bool, out);
+
+ this->local_pool = mem_pool_new(trash_local_t, 64);
+ if (!this->local_pool) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "failed to create local_t's memory pool");
+ ret = ENOMEM;
+ goto out;
+ }
+
+ /* For creating directories inside trash with proper permissions,
+ * we need to perform stat on that directories, for this we use
+ * brick path
+ */
+ data = dict_get(this->options, "brick-path");
+ if (!data) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "no option specified for 'brick-path'");
+ ret = ENOMEM;
+ goto out;
+ }
+ priv->brick_path = gf_strdup(data->data);
+ if (!priv->brick_path) {
+ ret = ENOMEM;
+ gf_log(this->name, GF_LOG_DEBUG, "out of memory");
+ goto out;
+ }
+
+ priv->trash_itable = inode_table_new(0, this);
+ gf_log(this->name, GF_LOG_DEBUG, "brick path is%s", priv->brick_path);
+
+ this->private = (void *)priv;
+ ret = 0;
- return 0;
+out:
+ if (tmp_str)
+ GF_FREE(tmp_str);
+ if (ret) {
+ if (priv) {
+ if (priv->newtrash_dir)
+ GF_FREE(priv->newtrash_dir);
+ if (priv->oldtrash_dir)
+ GF_FREE(priv->oldtrash_dir);
+ if (priv->brick_path)
+ GF_FREE(priv->brick_path);
+ if (priv->eliminate)
+ wipe_eliminate_path(&priv->eliminate);
+ GF_FREE(priv);
+ }
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
+ return ret;
}
/**
- * trash_init -
+ * trash_fini
*/
-int32_t
-init (xlator_t *this)
+void
+fini(xlator_t *this)
{
- data_t *data = NULL;
- trash_private_t *_priv = NULL;
- trash_elim_pattern_t *trav = NULL;
- char *tmp_str = NULL;
- char *strtokptr = NULL;
- char *component = NULL;
- char trash_dir[PATH_MAX] = {0,};
- uint64_t max_trash_file_size64 = 0;
-
- /* Create .trashcan directory in init */
- if (!this->children || this->children->next) {
- gf_log (this->name, GF_LOG_ERROR,
- "not configured with exactly one child. exiting");
- return -1;
- }
+ trash_private_t *priv = NULL;
+ inode_table_t *inode_table = NULL;
- if (!this->parents) {
- gf_log (this->name, GF_LOG_WARNING,
- "dangling volume. check volfile ");
+ GF_VALIDATE_OR_GOTO("trash", this, out);
+ priv = this->private;
+ if (priv) {
+ inode_table = priv->trash_itable;
+ if (priv->newtrash_dir) {
+ GF_FREE(priv->newtrash_dir);
+ priv->newtrash_dir = NULL;
}
-
- _priv = GF_CALLOC (1, sizeof (*_priv), gf_trash_mt_trash_private_t);
- if (!_priv) {
- gf_log (this->name, GF_LOG_ERROR, "out of memory");
- return -1;
+ if (priv->oldtrash_dir) {
+ GF_FREE(priv->oldtrash_dir);
+ priv->oldtrash_dir = NULL;
}
-
- data = dict_get (this->options, "trash-dir");
- if (!data) {
- gf_log (this->name, GF_LOG_INFO,
- "no option specified for 'trash-dir', "
- "using \"/.trashcan/\"");
- _priv->trash_dir = gf_strdup ("/.trashcan");
- } else {
- /* Need a path with '/' as the first char, if not
- given, append it */
- if (data->data[0] == '/') {
- _priv->trash_dir = gf_strdup (data->data);
- } else {
- /* TODO: Make sure there is no ".." in the path */
- strcpy (trash_dir, "/");
- strcat (trash_dir, data->data);
- _priv->trash_dir = gf_strdup (trash_dir);
- }
+ if (priv->brick_path) {
+ GF_FREE(priv->brick_path);
+ priv->brick_path = NULL;
}
-
- data = dict_get (this->options, "eliminate-pattern");
- if (!data) {
- gf_log (this->name, GF_LOG_TRACE,
- "no option specified for 'eliminate', using NULL");
- } else {
- tmp_str = gf_strdup (data->data);
- if (!tmp_str) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- }
-
- /* Match Filename to option specified in eliminate. */
- component = strtok_r (tmp_str, "|", &strtokptr);
- while (component) {
- trav = GF_CALLOC (1, sizeof (*trav),
- gf_trash_mt_trash_elim_pattern_t);
- if (!trav) {
- gf_log (this->name, GF_LOG_DEBUG, "out of memory");
- break;
- }
- trav->pattern = component;
- trav->next = _priv->eliminate;
- _priv->eliminate = trav;
-
- component = strtok_r (NULL, "|", &strtokptr);
- }
- }
-
- /* TODO: do gf_string2sizet () */
- data = dict_get (this->options, "max-trashable-file-size");
- if (!data) {
- gf_log (this->name, GF_LOG_DEBUG,
- "no option specified for 'max-trashable-file-size', "
- "using default = %lld MB",
- GF_DEFAULT_MAX_FILE_SIZE / GF_UNIT_MB);
- _priv->max_trash_file_size = GF_DEFAULT_MAX_FILE_SIZE;
- } else {
- (void)gf_string2bytesize (data->data,
- &max_trash_file_size64);
- if( max_trash_file_size64 > GF_ALLOWED_MAX_FILE_SIZE ) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Size specified for max-size(in MB) is too "
- "large so using 1GB as max-size (NOT IDEAL)");
- _priv->max_trash_file_size = GF_ALLOWED_MAX_FILE_SIZE;
- } else
- _priv->max_trash_file_size = max_trash_file_size64;
- gf_log (this->name, GF_LOG_DEBUG, "%"GF_PRI_SIZET" max-size",
- _priv->max_trash_file_size);
+ if (priv->eliminate) {
+ wipe_eliminate_path(&priv->eliminate);
+ priv->eliminate = NULL;
}
-
- this->local_pool = mem_pool_new (trash_local_t, 64);
- if (!this->local_pool) {
- gf_log (this->name, GF_LOG_ERROR,
- "failed to create local_t's memory pool");
- return -1;
+ if (inode_table) {
+ inode_table_destroy(inode_table);
+ priv->trash_itable = NULL;
}
+ GF_FREE(priv);
+ }
-
- this->private = (void *)_priv;
- return 0;
-}
-
-void
-fini (xlator_t *this)
-{
- trash_private_t *priv = NULL;
-
- priv = this->private;
- if (priv)
- GF_FREE (priv);
-
- return;
+ if (this->local_pool) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
+ this->private = NULL;
+out:
+ return;
}
struct xlator_fops fops = {
- .unlink = trash_unlink,
- .rename = trash_rename,
- .truncate = trash_truncate,
- .ftruncate = trash_ftruncate,
+ .unlink = trash_unlink,
+ .truncate = trash_truncate,
+ .ftruncate = trash_ftruncate,
+ .rmdir = trash_rmdir,
+ .mkdir = trash_mkdir,
+ .rename = trash_rename,
};
-struct xlator_cbks cbks = {
-};
+struct xlator_cbks cbks = {};
struct volume_options options[] = {
- { .key = { "trash-directory" },
- .type = GF_OPTION_TYPE_PATH,
- },
- { .key = { "eliminate-pattern" },
- .type = GF_OPTION_TYPE_STR,
- },
- { .key = { "max-trashable-file-size" },
- .type = GF_OPTION_TYPE_SIZET,
- },
- { .key = {NULL} },
+ {
+ .key = {"trash"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enable/disable trash translator",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"backup"},
+ },
+ {
+ .key = {"trash-dir"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = ".trashcan",
+ .description = "Directory for trash files",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"backup"},
+ },
+ {
+ .key = {"trash-eliminate-path"},
+ .type = GF_OPTION_TYPE_STR,
+ .description = "Eliminate paths to be excluded "
+ "from trashing",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"backup"},
+ },
+ {
+ .key = {"trash-max-filesize"},
+ .type = GF_OPTION_TYPE_SIZET,
+ .default_value = "5MB",
+ .description = "Maximum size of file that can be "
+ "moved to trash",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"backup"},
+ },
+ {
+ .key = {"trash-internal-op"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "Enable/disable trash translator for "
+ "internal operations",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"backup"},
+ },
+ {.key = {"brick-path"},
+ .type = GF_OPTION_TYPE_PATH,
+ .default_value = "{{ brick.path }}"},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "trash",
+ .category = GF_TECH_PREVIEW,
};
diff --git a/xlators/features/trash/src/trash.h b/xlators/features/trash/src/trash.h
index d385ee34657..6671617c2c6 100644
--- a/xlators/features/trash/src/trash.h
+++ b/xlators/features/trash/src/trash.h
@@ -1,89 +1,97 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __TRASH_H__
#define __TRASH_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "logging.h"
-#include "dict.h"
-#include "xlator.h"
-#include "defaults.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
#include "inode.c"
#include "fnmatch.h"
#include <libgen.h>
#ifndef GF_BLOCK_READV_SIZE
-#define GF_BLOCK_READV_SIZE (128 * GF_UNIT_KB)
+#define GF_BLOCK_READV_SIZE (128 * GF_UNIT_KB)
#endif
#ifndef GF_DEFAULT_MAX_FILE_SIZE
#define GF_DEFAULT_MAX_FILE_SIZE (200 * GF_UNIT_MB)
#endif
-#ifndef GF_ALLOWED_MAX_FILE_SIZE
-#define GF_ALLOWED_MAX_FILE_SIZE (1 * GF_UNIT_GB)
-#endif
-
-
struct trash_struct {
- fd_t *fd; /* for the fd of existing file */
- fd_t *newfd; /* for the newly created file */
- loc_t loc; /* to store the location of the existing file */
- loc_t newloc; /* to store the location for the new file */
- size_t fsize; /* for keeping the size of existing file */
- off_t cur_offset; /* current offset for read and write ops */
- off_t fop_offset;
- char origpath[PATH_MAX];
- char newpath[PATH_MAX];
- int32_t loop_count;
- struct iatt preparent;
- struct iatt postparent;
+ fd_t *fd; /* for the fd of existing file */
+ fd_t *newfd; /* for the newly created file */
+ loc_t loc; /* to store the location of the existing file */
+ loc_t newloc; /* to store the location for the new file */
+ size_t fsize; /* for keeping the size of existing file */
+ off_t cur_offset; /* current offset for read and write ops */
+ off_t fop_offset; /* original offset received with the fop */
+ pid_t pid;
+ char origpath[PATH_MAX];
+ char newpath[PATH_MAX];
+ int32_t loop_count;
+ gf_boolean_t is_set_pid;
+ struct iatt preparent;
+ struct iatt postparent;
+ gf_boolean_t ctr_link_count_req;
};
typedef struct trash_struct trash_local_t;
-struct _trash_elim_pattern;
-typedef struct _trash_elim_pattern {
- struct _trash_elim_pattern *next;
- char *pattern;
-} trash_elim_pattern_t;
+struct _trash_elim_path {
+ struct _trash_elim_path *next;
+ char *path;
+};
+typedef struct _trash_elim_path trash_elim_path;
struct trash_priv {
- char *trash_dir;
- trash_elim_pattern_t *eliminate;
- size_t max_trash_file_size;
+ char *oldtrash_dir;
+ char *newtrash_dir;
+ char *brick_path;
+ trash_elim_path *eliminate;
+ size_t max_trash_file_size;
+ gf_boolean_t state;
+ gf_boolean_t internal;
+ inode_t *trash_inode;
+ inode_table_t *trash_itable;
};
typedef struct trash_priv trash_private_t;
-#define TRASH_STACK_UNWIND(op, frame, params ...) do { \
- trash_local_t *__local = NULL; \
- __local = frame->local; \
- frame->local = NULL; \
- STACK_UNWIND_STRICT (op, frame, params); \
- trash_local_wipe (__local); \
- } while (0)
-
+#define TRASH_SET_PID(frame, local) \
+ do { \
+ GF_ASSERT(!local->is_set_pid); \
+ if (!local->is_set_pid) { \
+ local->pid = frame->root->pid; \
+ frame->root->pid = GF_SERVER_PID_TRASH; \
+ local->is_set_pid = _gf_true; \
+ } \
+ } while (0)
+
+#define TRASH_UNSET_PID(frame, local) \
+ do { \
+ GF_ASSERT(local->is_set_pid); \
+ if (local->is_set_pid) { \
+ frame->root->pid = local->pid; \
+ local->is_set_pid = _gf_false; \
+ } \
+ } while (0)
+
+#define TRASH_STACK_UNWIND(op, frame, params...) \
+ do { \
+ trash_local_t *__local = NULL; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_UNWIND_STRICT(op, frame, params); \
+ trash_local_wipe(__local); \
+ } while (0)
#endif /* __TRASH_H__ */
diff --git a/xlators/features/upcall/Makefile.am b/xlators/features/upcall/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/upcall/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/upcall/src/Makefile.am b/xlators/features/upcall/src/Makefile.am
new file mode 100644
index 00000000000..72b7f55ae0a
--- /dev/null
+++ b/xlators/features/upcall/src/Makefile.am
@@ -0,0 +1,23 @@
+if WITH_SERVER
+xlator_LTLIBRARIES = upcall.la
+endif
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+upcall_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+
+upcall_la_SOURCES = upcall.c upcall-internal.c
+
+upcall_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la
+
+noinst_HEADERS = upcall.h upcall-mem-types.h upcall-messages.h \
+ upcall-cache-invalidation.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall -fno-strict-aliasing $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/features/upcall/src/upcall-cache-invalidation.h b/xlators/features/upcall/src/upcall-cache-invalidation.h
new file mode 100644
index 00000000000..db649b2c9a6
--- /dev/null
+++ b/xlators/features/upcall/src/upcall-cache-invalidation.h
@@ -0,0 +1,18 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __UPCALL_CACHE_INVALIDATION_H__
+#define __UPCALL_CACHE_INVALIDATION_H__
+
+/* The time period for which a client will be notified of cache_invalidation
+ * events post its last access */
+#define CACHE_INVALIDATION_TIMEOUT "60"
+
+#endif /* __UPCALL_CACHE_INVALIDATION_H__ */
diff --git a/xlators/features/upcall/src/upcall-internal.c b/xlators/features/upcall/src/upcall-internal.c
new file mode 100644
index 00000000000..c641bd6f432
--- /dev/null
+++ b/xlators/features/upcall/src/upcall-internal.c
@@ -0,0 +1,689 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
+
+#include <glusterfs/statedump.h>
+#include <glusterfs/syncop.h>
+
+#include "upcall.h"
+#include "upcall-mem-types.h"
+#include "glusterfs3-xdr.h"
+#include "protocol-common.h"
+#include <glusterfs/defaults.h>
+
+/*
+ * Check if any of the upcall options are enabled:
+ * - cache_invalidation
+ */
+gf_boolean_t
+is_upcall_enabled(xlator_t *this)
+{
+ upcall_private_t *priv = NULL;
+
+ if (this->private) {
+ priv = (upcall_private_t *)this->private;
+ return priv->cache_invalidation_enabled;
+ }
+
+ return _gf_false;
+}
+
+/*
+ * Get the cache_invalidation_timeout
+ */
+static int32_t
+get_cache_invalidation_timeout(xlator_t *this)
+{
+ upcall_private_t *priv = NULL;
+
+ if (this->private) {
+ priv = (upcall_private_t *)this->private;
+ return priv->cache_invalidation_timeout;
+ }
+
+ return 0;
+}
+
+static upcall_client_t *
+__add_upcall_client(call_frame_t *frame, client_t *client,
+ upcall_inode_ctx_t *up_inode_ctx, time_t now)
+{
+ upcall_client_t *up_client_entry = GF_MALLOC(
+ sizeof(*up_client_entry), gf_upcall_mt_upcall_client_entry_t);
+ if (!up_client_entry) {
+ gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_NO_MEMORY,
+ "Memory allocation failed");
+ return NULL;
+ }
+ INIT_LIST_HEAD(&up_client_entry->client_list);
+ up_client_entry->client_uid = gf_strdup(client->client_uid);
+ up_client_entry->access_time = now;
+ up_client_entry->expire_time_attr = get_cache_invalidation_timeout(
+ frame->this);
+
+ list_add_tail(&up_client_entry->client_list, &up_inode_ctx->client_list);
+
+ gf_log(THIS->name, GF_LOG_DEBUG, "upcall_entry_t client added - %s",
+ up_client_entry->client_uid);
+
+ return up_client_entry;
+}
+
+static int
+__upcall_inode_ctx_set(inode_t *inode, xlator_t *this)
+{
+ upcall_inode_ctx_t *inode_ctx = NULL;
+ upcall_private_t *priv = NULL;
+ int ret = -1;
+ uint64_t ctx = 0;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = __inode_ctx_get(inode, this, &ctx);
+
+ if (!ret)
+ goto out;
+
+ inode_ctx = GF_MALLOC(sizeof(upcall_inode_ctx_t),
+ gf_upcall_mt_upcall_inode_ctx_t);
+
+ if (!inode_ctx) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ pthread_mutex_init(&inode_ctx->client_list_lock, NULL);
+ INIT_LIST_HEAD(&inode_ctx->inode_ctx_list);
+ INIT_LIST_HEAD(&inode_ctx->client_list);
+ inode_ctx->destroy = 0;
+ gf_uuid_copy(inode_ctx->gfid, inode->gfid);
+
+ ctx = (long)inode_ctx;
+ ret = __inode_ctx_set(inode, this, &ctx);
+ if (ret) {
+ gf_log(this->name, GF_LOG_DEBUG, "failed to set inode ctx (%p)", inode);
+ GF_FREE(inode_ctx);
+ goto out;
+ }
+
+ /* add this inode_ctx to the global list */
+ LOCK(&priv->inode_ctx_lk);
+ {
+ list_add_tail(&inode_ctx->inode_ctx_list, &priv->inode_ctx_list);
+ }
+ UNLOCK(&priv->inode_ctx_lk);
+out:
+ return ret;
+}
+
+static upcall_inode_ctx_t *
+__upcall_inode_ctx_get(inode_t *inode, xlator_t *this)
+{
+ upcall_inode_ctx_t *inode_ctx = NULL;
+ uint64_t ctx = 0;
+ int ret = 0;
+
+ ret = __inode_ctx_get(inode, this, &ctx);
+
+ if (ret < 0) {
+ ret = __upcall_inode_ctx_set(inode, this);
+ if (ret < 0)
+ goto out;
+
+ ret = __inode_ctx_get(inode, this, &ctx);
+ if (ret < 0)
+ goto out;
+ }
+
+ inode_ctx = (upcall_inode_ctx_t *)(long)(ctx);
+
+out:
+ return inode_ctx;
+}
+
+upcall_inode_ctx_t *
+upcall_inode_ctx_get(inode_t *inode, xlator_t *this)
+{
+ upcall_inode_ctx_t *inode_ctx = NULL;
+
+ LOCK(&inode->lock);
+ {
+ inode_ctx = __upcall_inode_ctx_get(inode, this);
+ }
+ UNLOCK(&inode->lock);
+
+ return inode_ctx;
+}
+
+static int
+__upcall_cleanup_client_entry(upcall_client_t *up_client)
+{
+ list_del_init(&up_client->client_list);
+
+ GF_FREE(up_client->client_uid);
+ GF_FREE(up_client);
+
+ return 0;
+}
+
+static int
+upcall_cleanup_expired_clients(xlator_t *this, upcall_inode_ctx_t *up_inode_ctx,
+ time_t now)
+{
+ upcall_client_t *up_client = NULL;
+ upcall_client_t *tmp = NULL;
+ int ret = -1;
+ time_t timeout = 0;
+ time_t t_expired = 0;
+
+ timeout = get_cache_invalidation_timeout(this);
+
+ pthread_mutex_lock(&up_inode_ctx->client_list_lock);
+ {
+ list_for_each_entry_safe(up_client, tmp, &up_inode_ctx->client_list,
+ client_list)
+ {
+ t_expired = now - up_client->access_time;
+
+ if (t_expired > (2 * timeout)) {
+ gf_log(THIS->name, GF_LOG_TRACE, "Cleaning up client_entry(%s)",
+ up_client->client_uid);
+
+ ret = __upcall_cleanup_client_entry(up_client);
+
+ if (ret) {
+ gf_msg("upcall", GF_LOG_WARNING, 0,
+ UPCALL_MSG_INTERNAL_ERROR,
+ "Client entry cleanup failed (%p)", up_client);
+ goto out;
+ }
+ }
+ }
+ }
+ pthread_mutex_unlock(&up_inode_ctx->client_list_lock);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/*
+ * Free Upcall inode_ctx client list
+ */
+int
+__upcall_cleanup_inode_ctx_client_list(upcall_inode_ctx_t *inode_ctx)
+{
+ upcall_client_t *up_client = NULL;
+ upcall_client_t *tmp = NULL;
+
+ list_for_each_entry_safe(up_client, tmp, &inode_ctx->client_list,
+ client_list)
+ {
+ __upcall_cleanup_client_entry(up_client);
+ }
+
+ return 0;
+}
+
+static void
+upcall_cache_forget(xlator_t *this, inode_t *inode,
+ upcall_inode_ctx_t *up_inode_ctx);
+
+/*
+ * Free upcall_inode_ctx
+ */
+int
+upcall_cleanup_inode_ctx(xlator_t *this, inode_t *inode)
+{
+ uint64_t ctx = 0;
+ upcall_inode_ctx_t *inode_ctx = NULL;
+ int ret = 0;
+ upcall_private_t *priv = NULL;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = inode_ctx_del(inode, this, &ctx);
+
+ if (ret < 0) {
+ gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_INTERNAL_ERROR,
+ "Failed to del upcall_inode_ctx (%p)", inode);
+ goto out;
+ }
+
+ inode_ctx = (upcall_inode_ctx_t *)(long)ctx;
+
+ if (inode_ctx) {
+ /* Invalidate all the upcall cache entries */
+ upcall_cache_forget(this, inode, inode_ctx);
+
+ /* do we really need lock? yes now reaper thread
+ * may also be trying to cleanup the client entries.
+ */
+ pthread_mutex_lock(&inode_ctx->client_list_lock);
+ {
+ if (!list_empty(&inode_ctx->client_list)) {
+ __upcall_cleanup_inode_ctx_client_list(inode_ctx);
+ }
+ }
+ pthread_mutex_unlock(&inode_ctx->client_list_lock);
+
+ /* Mark the inode_ctx to be destroyed */
+ inode_ctx->destroy = 1;
+ gf_msg_debug("upcall", 0, "set upcall_inode_ctx (%p) to destroy mode",
+ inode_ctx);
+ }
+
+out:
+ return ret;
+}
+
+/*
+ * Traverse through the list of upcall_inode_ctx(s),
+ * cleanup the expired client entries and destroy the ctx
+ * which is no longer valid and has destroy bit set.
+ */
+void *
+upcall_reaper_thread(void *data)
+{
+ upcall_private_t *priv = NULL;
+ upcall_inode_ctx_t *inode_ctx = NULL;
+ upcall_inode_ctx_t *tmp = NULL;
+ xlator_t *this = NULL;
+ time_t timeout = 0;
+ time_t time_now;
+
+ this = (xlator_t *)data;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ time_now = gf_time();
+ while (!priv->fini) {
+ list_for_each_entry_safe(inode_ctx, tmp, &priv->inode_ctx_list,
+ inode_ctx_list)
+ {
+ /* cleanup expired clients */
+ upcall_cleanup_expired_clients(this, inode_ctx, time_now);
+
+ if (!inode_ctx->destroy) {
+ continue;
+ }
+
+ /* client list would have been cleaned up*/
+ gf_msg_debug("upcall", 0, "Freeing upcall_inode_ctx (%p)",
+ inode_ctx);
+ LOCK(&priv->inode_ctx_lk);
+ {
+ list_del_init(&inode_ctx->inode_ctx_list);
+ pthread_mutex_destroy(&inode_ctx->client_list_lock);
+ }
+ UNLOCK(&priv->inode_ctx_lk);
+ GF_FREE(inode_ctx);
+ inode_ctx = NULL;
+ }
+
+ /* don't do a very busy loop */
+ timeout = get_cache_invalidation_timeout(this);
+ sleep(timeout / 2);
+ time_now = gf_time();
+ }
+
+ return NULL;
+}
+
+/*
+ * Initialize upcall reaper thread.
+ */
+int
+upcall_reaper_thread_init(xlator_t *this)
+{
+ upcall_private_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = gf_thread_create(&priv->reaper_thr, NULL, upcall_reaper_thread, this,
+ "upreaper");
+
+ return ret;
+}
+
+int
+up_compare_afr_xattr(dict_t *d, char *k, data_t *v, void *tmp)
+{
+ dict_t *dict = tmp;
+
+ if (!strncmp(k, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX)) &&
+ (!is_data_equal(v, dict_get(dict, k))))
+ return -1;
+
+ return 0;
+}
+
+static void
+up_filter_afr_xattr(dict_t *xattrs, char *xattr, data_t *v)
+{
+ /* Filter the afr pending xattrs, with value 0. Ideally this should
+ * be executed only in case of xattrop and not in set and removexattr,
+ * butset and remove xattr fops do not come with keys AFR_XATTR_PREFIX
+ */
+ if (!strncmp(xattr, AFR_XATTR_PREFIX, SLEN(AFR_XATTR_PREFIX)) &&
+ (mem_0filled(v->data, v->len) == 0)) {
+ dict_del(xattrs, xattr);
+ }
+ return;
+}
+
+static gf_boolean_t
+up_key_is_regd_xattr(dict_t *regd_xattrs, char *regd_xattr, data_t *v,
+ void *xattr)
+{
+ int ret = _gf_false;
+ char *key = xattr;
+
+ if (fnmatch(regd_xattr, key, 0) == 0)
+ ret = _gf_true;
+
+ return ret;
+}
+
+int
+up_filter_unregd_xattr(dict_t *xattrs, char *xattr, data_t *v,
+ void *regd_xattrs)
+{
+ int ret = 0;
+
+ ret = dict_foreach_match(regd_xattrs, up_key_is_regd_xattr, xattr,
+ dict_null_foreach_fn, NULL);
+ if (ret == 0) {
+ /* xattr was not found in the registered xattr, hence do not
+ * send notification for its change
+ */
+ dict_del(xattrs, xattr);
+ goto out;
+ }
+ up_filter_afr_xattr(xattrs, xattr, v);
+out:
+ return 0;
+}
+
+int
+up_filter_xattr(dict_t *xattr, dict_t *regd_xattrs)
+{
+ int ret = 0;
+
+ ret = dict_foreach(xattr, up_filter_unregd_xattr, regd_xattrs);
+
+ return ret;
+}
+
+static void
+upcall_client_cache_invalidate(xlator_t *this, uuid_t gfid,
+ upcall_client_t *up_client_entry, uint32_t flags,
+ struct iatt *stbuf, struct iatt *p_stbuf,
+ struct iatt *oldp_stbuf, dict_t *xattr,
+ time_t now);
+
+gf_boolean_t
+up_invalidate_needed(dict_t *xattrs)
+{
+ if (dict_key_count(xattrs) == 0) {
+ gf_msg_trace("upcall", 0,
+ "None of xattrs requested for"
+ " invalidation, were changed. Nothing to "
+ "invalidate");
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+/*
+ * Given a client, first fetch upcall_entry_t from the inode_ctx client list.
+ * Later traverse through the client list of that upcall entry. If this client
+ * is not present in the list, create one client entry with this client info.
+ * Also check if there are other clients which need to be notified of this
+ * op. If yes send notify calls to them.
+ *
+ * Since sending notifications for cache_invalidation is a best effort,
+ * any errors during the process are logged and ignored.
+ */
+void
+upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
+ inode_t *inode, uint32_t flags, struct iatt *stbuf,
+ struct iatt *p_stbuf, struct iatt *oldp_stbuf,
+ dict_t *xattr)
+{
+ upcall_client_t *up_client_entry = NULL;
+ upcall_client_t *tmp = NULL;
+ upcall_inode_ctx_t *up_inode_ctx = NULL;
+ gf_boolean_t found = _gf_false;
+ time_t time_now;
+ inode_t *linked_inode = NULL;
+
+ if (!is_upcall_enabled(this))
+ return;
+
+ /* server-side generated fops like quota/marker will not have any
+ * client associated with them. Ignore such fops.
+ */
+ if (!client) {
+ gf_msg_debug("upcall", 0, "Internal fop - client NULL");
+ return;
+ }
+
+ /* For nameless LOOKUPs, inode created shall always be
+ * invalid. Hence check if there is any already linked inode.
+ * If yes, update the inode_ctx of that valid inode
+ */
+ if (inode && (inode->ia_type == IA_INVAL) && stbuf) {
+ linked_inode = inode_find(inode->table, stbuf->ia_gfid);
+ if (linked_inode) {
+ gf_log("upcall", GF_LOG_DEBUG,
+ "upcall_inode_ctx_get of linked inode (%p)", inode);
+ up_inode_ctx = upcall_inode_ctx_get(linked_inode, this);
+ }
+ }
+
+ if (inode && !up_inode_ctx)
+ up_inode_ctx = upcall_inode_ctx_get(inode, this);
+
+ if (!up_inode_ctx) {
+ gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_INTERNAL_ERROR,
+ "upcall_inode_ctx_get failed (%p)", inode);
+ return;
+ }
+
+ /* In case of LOOKUP, if first time, inode created shall be
+ * invalid till it gets linked to inode table. Read gfid from
+ * the stat returned in such cases.
+ */
+ if (gf_uuid_is_null(up_inode_ctx->gfid) && stbuf) {
+ /* That means inode must have been invalid when this inode_ctx
+ * is created. Copy the gfid value from stbuf instead.
+ */
+ gf_uuid_copy(up_inode_ctx->gfid, stbuf->ia_gfid);
+ }
+
+ if (gf_uuid_is_null(up_inode_ctx->gfid)) {
+ gf_msg_debug(this->name, 0,
+ "up_inode_ctx->gfid and "
+ "stbuf->ia_gfid is NULL, fop:%s",
+ gf_fop_list[frame->root->op]);
+ goto out;
+ }
+
+ time_now = gf_time();
+ pthread_mutex_lock(&up_inode_ctx->client_list_lock);
+ {
+ list_for_each_entry_safe(up_client_entry, tmp,
+ &up_inode_ctx->client_list, client_list)
+ {
+ /* Do not send UPCALL event if same client. */
+ if (!strcmp(client->client_uid, up_client_entry->client_uid)) {
+ up_client_entry->access_time = time_now;
+ found = _gf_true;
+ continue;
+ }
+
+ /*
+ * Ignore sending notifications in case of only UP_ATIME
+ */
+ if (!(flags & ~(UP_ATIME))) {
+ if (found)
+ break;
+ else /* we still need to find current client entry*/
+ continue;
+ }
+
+ /* any other client */
+
+ /* XXX: Send notifications asynchrounously
+ * instead of in the I/O path - BZ 1200264
+ * Also if the file is frequently accessed, set
+ * expire_time_attr to 0.
+ */
+ upcall_client_cache_invalidate(
+ this, up_inode_ctx->gfid, up_client_entry, flags, stbuf,
+ p_stbuf, oldp_stbuf, xattr, time_now);
+ }
+
+ if (!found) {
+ up_client_entry = __add_upcall_client(frame, client, up_inode_ctx,
+ time_now);
+ }
+ }
+ pthread_mutex_unlock(&up_inode_ctx->client_list_lock);
+out:
+ /* release the ref from inode_find */
+ if (linked_inode)
+ inode_unref(linked_inode);
+ return;
+}
+
+/*
+ * If the upcall_client_t has recently accessed the file (i.e, within
+ * priv->cache_invalidation_timeout), send a upcall notification.
+ */
+static void
+upcall_client_cache_invalidate(xlator_t *this, uuid_t gfid,
+ upcall_client_t *up_client_entry, uint32_t flags,
+ struct iatt *stbuf, struct iatt *p_stbuf,
+ struct iatt *oldp_stbuf, dict_t *xattr,
+ time_t now)
+{
+ struct gf_upcall up_req = {
+ 0,
+ };
+ struct gf_upcall_cache_invalidation ca_req = {
+ 0,
+ };
+ time_t timeout = 0;
+ int ret = -1;
+ time_t t_expired = now - up_client_entry->access_time;
+
+ GF_VALIDATE_OR_GOTO("upcall_client_cache_invalidate",
+ !(gf_uuid_is_null(gfid)), out);
+ timeout = get_cache_invalidation_timeout(this);
+
+ if (t_expired < timeout) {
+ /* Send notify call */
+ up_req.client_uid = up_client_entry->client_uid;
+ gf_uuid_copy(up_req.gfid, gfid);
+
+ ca_req.flags = flags;
+ ca_req.expire_time_attr = up_client_entry->expire_time_attr;
+ if (stbuf)
+ ca_req.stat = *stbuf;
+ if (p_stbuf)
+ ca_req.p_stat = *p_stbuf;
+ if (oldp_stbuf)
+ ca_req.oldp_stat = *oldp_stbuf;
+ ca_req.dict = xattr;
+
+ up_req.data = &ca_req;
+ up_req.event_type = GF_UPCALL_CACHE_INVALIDATION;
+
+ gf_log(THIS->name, GF_LOG_TRACE,
+ "Cache invalidation notification sent to %s",
+ up_client_entry->client_uid);
+
+ /* Need to send inode flags */
+ ret = this->notify(this, GF_EVENT_UPCALL, &up_req);
+
+ /*
+ * notify may fail as the client could have been
+ * dis(re)connected. Cleanup the client entry.
+ */
+ if (ret < 0)
+ __upcall_cleanup_client_entry(up_client_entry);
+
+ } else {
+ gf_log(THIS->name, GF_LOG_TRACE,
+ "Cache invalidation notification NOT sent to %s",
+ up_client_entry->client_uid);
+
+ if (t_expired > (2 * timeout)) {
+ /* Cleanup the entry */
+ __upcall_cleanup_client_entry(up_client_entry);
+ }
+ }
+out:
+ return;
+}
+
+/*
+ * This is called during upcall_inode_ctx cleanup in case of 'inode_forget'.
+ * Send "UP_FORGET" to all the clients so that they invalidate their cache
+ * entry and do a fresh lookup next time when any I/O comes in.
+ */
+static void
+upcall_cache_forget(xlator_t *this, inode_t *inode,
+ upcall_inode_ctx_t *up_inode_ctx)
+{
+ upcall_client_t *up_client_entry = NULL;
+ upcall_client_t *tmp = NULL;
+ uint32_t flags = UP_FORGET;
+ time_t time_now;
+
+ if (!up_inode_ctx) {
+ return;
+ }
+
+ time_now = gf_time();
+ pthread_mutex_lock(&up_inode_ctx->client_list_lock);
+ {
+ list_for_each_entry_safe(up_client_entry, tmp,
+ &up_inode_ctx->client_list, client_list)
+ {
+ /* Set the access time to gf_time()
+ * to send notify */
+ up_client_entry->access_time = time_now;
+
+ upcall_client_cache_invalidate(this, up_inode_ctx->gfid,
+ up_client_entry, flags, NULL, NULL,
+ NULL, NULL, time_now);
+ }
+ }
+ pthread_mutex_unlock(&up_inode_ctx->client_list_lock);
+}
diff --git a/xlators/features/upcall/src/upcall-mem-types.h b/xlators/features/upcall/src/upcall-mem-types.h
new file mode 100644
index 00000000000..f9883d9d72c
--- /dev/null
+++ b/xlators/features/upcall/src/upcall-mem-types.h
@@ -0,0 +1,23 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __UPCALL_MEM_TYPES_H__
+#define __UPCALL_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_upcall_mem_types_ {
+ gf_upcall_mt_conf_t = gf_common_mt_end + 1,
+ gf_upcall_mt_private_t,
+ gf_upcall_mt_upcall_inode_ctx_t,
+ gf_upcall_mt_upcall_client_entry_t,
+ gf_upcall_mt_end
+};
+#endif
diff --git a/xlators/features/upcall/src/upcall-messages.h b/xlators/features/upcall/src/upcall-messages.h
new file mode 100644
index 00000000000..4095a34c200
--- /dev/null
+++ b/xlators/features/upcall/src/upcall-messages.h
@@ -0,0 +1,29 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#ifndef _UPCALL_MESSAGES_H_
+#define _UPCALL_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(UPCALL, UPCALL_MSG_NO_MEMORY, UPCALL_MSG_INTERNAL_ERROR,
+ UPCALL_MSG_NOTIFY_FAILED);
+
+#endif /* !_UPCALL_MESSAGES_H_ */
diff --git a/xlators/features/upcall/src/upcall.c b/xlators/features/upcall/src/upcall.c
new file mode 100644
index 00000000000..0795f58059d
--- /dev/null
+++ b/xlators/features/upcall/src/upcall.c
@@ -0,0 +1,2505 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <unistd.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/common-utils.h>
+
+#include <glusterfs/statedump.h>
+
+#include "upcall.h"
+#include "upcall-mem-types.h"
+#include "glusterfs3-xdr.h"
+#include "protocol-common.h"
+#include <glusterfs/defaults.h>
+
+static int32_t
+up_open_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(open, frame, op_ret, op_errno, fd, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ fd_t *fd, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_open_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->open, loc, flags, fd, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(open, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_writev_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_WRITE_FLAGS;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, postbuf,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(writev, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+static int32_t
+up_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *vector,
+ int count, off_t off, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_writev_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->writev, fd, vector, count, off, flags,
+ iobref, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(writev, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_readv_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iovec *vector, int count, struct iatt *stbuf,
+ struct iobref *iobref, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, stbuf,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(readv, frame, op_ret, op_errno, vector, count, stbuf,
+ iobref, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_readv_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readv, fd, size, offset, flags, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(readv, frame, -1, op_errno, NULL, 0, NULL, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_lk_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct gf_flock *lock, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(lk, frame, op_ret, op_errno, lock, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *flock, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_lk_cbk, FIRST_CHILD(this), FIRST_CHILD(this)->fops->lk,
+ fd, cmd, flock, xdata);
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(lk, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_truncate_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *prebuf, struct iatt *postbuf,
+ dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_WRITE_FLAGS;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, postbuf,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(truncate, frame, op_ret, op_errno, prebuf, postbuf,
+ xdata);
+
+ return 0;
+}
+
+static int32_t
+up_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->truncate, loc, offset, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(truncate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_setattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *statpre, struct iatt *statpost,
+ dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ /* XXX: setattr -> UP_SIZE or UP_OWN or UP_MODE or UP_TIMES
+ * or INODE_UPDATE (or UP_PERM esp in case of ACLs -> INODE_INVALIDATE)
+ * Need to check what attr is changed and accordingly pass UP_FLAGS.
+ * Bug1200271.
+ */
+ flags = UP_ATTR_FLAGS;
+ /* If mode bits have changed invalidate the xattrs, as posix-acl and
+ * others store permission related information in xattrs. With changing
+ * of permissions/mode, we need to make clients to forget all the
+ * xattrs related to permissions.
+ * TODO: Invalidate the xattr system.posix_acl_access alone.
+ */
+ if (is_same_mode(statpre->ia_prot, statpost->ia_prot) != 0)
+ flags |= UP_XATTR;
+
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, statpost,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(setattr, frame, op_ret, op_errno, statpre, statpost,
+ xdata);
+
+ return 0;
+}
+
+static int32_t
+up_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setattr, loc, stbuf, valid, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(setattr, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_rename_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *stbuf, struct iatt *preoldparent,
+ struct iatt *postoldparent, struct iatt *prenewparent,
+ struct iatt *postnewparent, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = (UP_RENAME_FLAGS | UP_PARENT_DENTRY_FLAGS);
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, stbuf,
+ postnewparent, postoldparent, NULL);
+
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->rename_oldloc.parent,
+ flags, postoldparent, NULL, NULL, NULL);
+
+ if (local->rename_oldloc.parent == local->loc.parent)
+ goto out;
+
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->loc.parent, flags,
+ postnewparent, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(rename, frame, op_ret, op_errno, stbuf, preoldparent,
+ postoldparent, prenewparent, postnewparent, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+ /* copy oldloc */
+ loc_copy(&local->rename_oldloc, oldloc);
+out:
+ STACK_WIND(frame, up_rename_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rename, oldloc, newloc, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(rename, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_unlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = (UP_NLINK_FLAGS | UP_PARENT_DENTRY_FLAGS);
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL,
+ postparent, NULL, NULL);
+
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->loc.parent, flags,
+ postparent, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(unlink, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+
+ return 0;
+}
+
+static int32_t
+up_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_unlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->unlink, loc, xflag, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(unlink, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_link_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = (UP_NLINK_FLAGS | UP_PARENT_DENTRY_FLAGS);
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, stbuf,
+ postparent, NULL, NULL);
+
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->loc.parent, flags,
+ postparent, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(link, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc, loc_t *newloc,
+ dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, newloc, NULL, oldloc->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_link_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->link, oldloc, newloc, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(link, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
+}
+
+static int32_t
+up_rmdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, struct iatt *preparent, struct iatt *postparent,
+ dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ flags = (UP_NLINK_FLAGS | UP_PARENT_DENTRY_FLAGS);
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL,
+ postparent, NULL, NULL);
+
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->loc.parent, flags,
+ postparent, NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(rmdir, frame, op_ret, op_errno, preparent, postparent,
+ xdata);
+
+ return 0;
+}
+
+static int32_t
+up_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, loc, NULL, loc->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_rmdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->rmdir, loc, flags, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(rmdir, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_mkdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ /* invalidate parent's entry too */
+ flags = UP_TIMES;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags,
+ postparent, NULL, NULL, NULL);
+
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->loc.inode, flags, stbuf,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(mkdir, frame, op_ret, op_errno, inode, stbuf, preparent,
+ postparent, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *params)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_mkdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mkdir, loc, mode, umask, params);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(mkdir, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
+}
+
+static int32_t
+up_create_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, fd_t *fd, inode_t *inode, struct iatt *stbuf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ /* As its a new file create, no need of sending notification
+ * However invalidate parent's entry and update that fact that the
+ * client has accessed the newly created entry */
+ flags = UP_TIMES;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags,
+ postparent, NULL, NULL, NULL);
+
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->loc.inode, flags, stbuf,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(create, frame, op_ret, op_errno, fd, inode, stbuf,
+ preparent, postparent, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_create(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags,
+ mode_t mode, mode_t umask, fd_t *fd, dict_t *params)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_create_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->create, loc, flags, mode, umask, fd,
+ params);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(create, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, inode_t *inode, struct iatt *stbuf, dict_t *xattr,
+ struct iatt *postparent)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, stbuf,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(lookup, frame, op_ret, op_errno, inode, stbuf, xattr,
+ postparent);
+
+ return 0;
+}
+
+static int32_t
+up_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xattr_req)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xattr_req);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_stat_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct iatt *buf, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, buf, NULL,
+ NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(stat, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->stat, loc, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(stat, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_stat_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fstat, fd, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(fstat, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_truncate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ftruncate, fd, offset, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(ftruncate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_access_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(access, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_access(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t mask,
+ dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_access_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->access, loc, mask, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(access, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_readlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, const char *path, struct iatt *stbuf,
+ dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, stbuf,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(readlink, frame, op_ret, op_errno, path, stbuf, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_readlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readlink, loc, size, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(readlink, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_mknod_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, inode_t *inode, struct iatt *buf,
+ struct iatt *preparent, struct iatt *postparent, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ /* invalidate parent's entry too */
+ flags = UP_TIMES;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags,
+ postparent, NULL, NULL, NULL);
+
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->loc.inode, flags, buf,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(mknod, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_mknod_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->mknod, loc, mode, rdev, umask, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(mknod, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
+}
+
+static int32_t
+up_symlink_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *buf, struct iatt *preparent,
+ struct iatt *postparent, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ /* invalidate parent's entry too */
+ flags = UP_TIMES;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags,
+ postparent, NULL, NULL, NULL);
+
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->loc.inode, flags, buf,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(symlink, frame, op_ret, op_errno, inode, buf, preparent,
+ postparent, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, loc, NULL, loc->parent, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_symlink_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->symlink, linkpath, loc, umask, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(symlink, frame, -1, op_errno, NULL, NULL, NULL, NULL,
+ NULL);
+
+ return 0;
+}
+
+static int32_t
+up_opendir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, fd_t *fd, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(opendir, frame, op_ret, op_errno, fd, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_opendir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->opendir, loc, fd, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(opendir, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_statfs_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int32_t op_ret,
+ int32_t op_errno, struct statvfs *buf, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(statfs, frame, op_ret, op_errno, buf, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_statfs_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->statfs, loc, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(statfs, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_readdir_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(readdir, frame, op_ret, op_errno, entries, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_readdir_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdir, fd, size, off, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(readdir, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_readdirp_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, gf_dirent_t *entries,
+ dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+ gf_dirent_t *entry = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL,
+ NULL, NULL, NULL);
+
+ list_for_each_entry(entry, &entries->list, list)
+ {
+ if (entry->inode == NULL) {
+ continue;
+ }
+ upcall_cache_invalidate(frame, this, client, entry->inode, flags,
+ &entry->d_stat, NULL, NULL, NULL);
+ }
+
+out:
+ UPCALL_STACK_UNWIND(readdirp, frame, op_ret, op_errno, entries, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *dict)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_readdirp_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->readdirp, fd, size, off, dict);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(readdirp, frame, -1, op_errno, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iatt *stbuf,
+ int32_t valid, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_setattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetattr, fd, stbuf, valid, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(fsetattr, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_fallocate_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_WRITE_FLAGS;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, post,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(fallocate, frame, op_ret, op_errno, pre, post, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t mode,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_fallocate_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fallocate, fd, mode, offset, len,
+ xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(fallocate, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_discard_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_WRITE_FLAGS;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, post,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(discard, frame, op_ret, op_errno, pre, post, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_discard(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ size_t len, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_discard_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->discard, fd, offset, len, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(discard, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_zerofill_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, struct iatt *pre,
+ struct iatt *post, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_WRITE_FLAGS;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, post,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(zerofill, frame, op_ret, op_errno, pre, post, xdata);
+
+ return 0;
+}
+
+static int
+up_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ off_t len, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_zerofill_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->zerofill, fd, offset, len, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(zerofill, frame, -1, op_errno, NULL, NULL, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret,
+ int op_errno, off_t offset, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(seek, frame, op_ret, op_errno, offset, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ gf_seek_what_t what, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_seek_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->seek, fd, offset, what, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(seek, frame, -1, op_errno, 0, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_setxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+ int ret = 0;
+ struct iatt stbuf = {
+ 0,
+ };
+ upcall_private_t *priv = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ flags = UP_XATTR;
+
+ ret = up_filter_xattr(local->xattr, priv->xattrs);
+ if (ret < 0) {
+ op_ret = ret;
+ goto out;
+ }
+ if (!up_invalidate_needed(local->xattr))
+ goto out;
+
+ ret = dict_get_iatt(xdata, GF_POSTSTAT, &stbuf);
+ if (ret == 0)
+ flags |= UP_TIMES;
+
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, &stbuf,
+ NULL, NULL, local->xattr);
+
+out:
+ UPCALL_STACK_UNWIND(setxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, loc, NULL, loc->inode, dict);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_setxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->setxattr, loc, dict, flags, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(setxattr, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_fsetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+ int ret = 0;
+ struct iatt stbuf = {
+ 0,
+ };
+ upcall_private_t *priv = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ flags = UP_XATTR;
+
+ ret = up_filter_xattr(local->xattr, priv->xattrs);
+ if (ret < 0) {
+ op_ret = ret;
+ goto out;
+ }
+ if (!up_invalidate_needed(local->xattr))
+ goto out;
+
+ ret = dict_get_iatt(xdata, GF_POSTSTAT, &stbuf);
+ if (ret == 0)
+ flags |= UP_TIMES;
+
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, &stbuf,
+ NULL, NULL, local->xattr);
+
+out:
+ UPCALL_STACK_UNWIND(fsetxattr, frame, op_ret, op_errno, xdata);
+
+ return 0;
+}
+
+static int32_t
+up_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *dict,
+ int32_t flags, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, fd, fd->inode, dict);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_fsetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fsetxattr, fd, dict, flags, xdata);
+
+ return 0;
+
+err:
+ UPCALL_STACK_UNWIND(fsetxattr, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_fremovexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ int ret = 0;
+ upcall_private_t *priv = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_XATTR_RM;
+
+ ret = up_filter_xattr(local->xattr, priv->xattrs);
+ if (ret < 0) {
+ op_ret = ret;
+ goto out;
+ }
+ if (!up_invalidate_needed(local->xattr))
+ goto out;
+
+ ret = dict_get_iatt(xdata, GF_POSTSTAT, &stbuf);
+ if (ret == 0)
+ flags |= UP_TIMES;
+
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, &stbuf,
+ NULL, NULL, local->xattr);
+
+out:
+ UPCALL_STACK_UNWIND(fremovexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+up_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+ dict_t *xattr = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ xattr = dict_for_key_value(name, "", 1, _gf_true);
+ if (!xattr) {
+ goto err;
+ }
+
+ local = upcall_local_init(frame, this, NULL, fd, fd->inode, xattr);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+
+ STACK_WIND(frame, up_fremovexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fremovexattr, fd, name, xdata);
+ return 0;
+
+err:
+ if (xattr)
+ dict_unref(xattr);
+
+ UPCALL_STACK_UNWIND(fremovexattr, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_removexattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+ struct iatt stbuf = {
+ 0,
+ };
+ int ret = 0;
+ upcall_private_t *priv = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+ flags = UP_XATTR_RM;
+
+ ret = up_filter_xattr(local->xattr, priv->xattrs);
+ if (ret < 0) {
+ op_ret = ret;
+ goto out;
+ }
+ if (!up_invalidate_needed(local->xattr))
+ goto out;
+
+ ret = dict_get_iatt(xdata, GF_POSTSTAT, &stbuf);
+ if (ret == 0)
+ flags |= UP_TIMES;
+
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, &stbuf,
+ NULL, NULL, local->xattr);
+
+out:
+ UPCALL_STACK_UNWIND(removexattr, frame, op_ret, op_errno, xdata);
+ return 0;
+}
+
+static int32_t
+up_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+ dict_t *xattr = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ xattr = dict_for_key_value(name, "", 1, _gf_true);
+ if (!xattr) {
+ goto err;
+ }
+
+ local = upcall_local_init(frame, this, loc, NULL, loc->inode, xattr);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ if (xattr)
+ dict_unref(xattr);
+
+ STACK_WIND(frame, up_removexattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->removexattr, loc, name, xdata);
+ return 0;
+
+err:
+ if (xattr)
+ dict_unref(xattr);
+
+ UPCALL_STACK_UNWIND(removexattr, frame, -1, op_errno, NULL);
+
+ return 0;
+}
+
+static int32_t
+up_fgetxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(fgetxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+static int32_t
+up_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd, const char *name,
+ dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, fd->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_fgetxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fgetxattr, fd, name, xdata);
+ return 0;
+err:
+ UPCALL_STACK_UNWIND(fgetxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+up_getxattr_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ client_t *client = NULL;
+ uint32_t flags = 0;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ flags = UP_UPDATE_CLIENT;
+ upcall_cache_invalidate(frame, this, client, local->inode, flags, NULL,
+ NULL, NULL, NULL);
+
+out:
+ UPCALL_STACK_UNWIND(getxattr, frame, op_ret, op_errno, dict, xdata);
+ return 0;
+}
+
+static int32_t
+up_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc, const char *name,
+ dict_t *xdata)
+{
+ int32_t op_errno = ENOMEM;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ local = upcall_local_init(frame, this, NULL, NULL, loc->inode, NULL);
+ if (!local) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_getxattr_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->getxattr, loc, name, xdata);
+ return 0;
+err:
+ UPCALL_STACK_UNWIND(getxattr, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+/* The xattrops here mainly tracks changes in afr pending xattr.
+ * 1. xattrop doesn't carry info saying post op/pre op.
+ * 2. Pre xattrop will have 0 value for all pending xattrs,
+ * the cbk of pre xattrop carries the on-disk xattr value.
+ * Non zero on-disk xattr indicates pending healing.
+ * 3. Post xattrop will either have 0 or 1 as value of pending xattrs,
+ * 0 on success, 1 on failure. But the post xattrop cbk will have
+ * 0 or 1 or any higher value.
+ * 0 - if no healing required*
+ * 1 - if this is the first time pending xattr is being set.
+ * n - if there is already a pending xattr set, it will increment
+ * the on-disk value and send that in cbk.
+ * Our aim is to send an invalidation, only the first time a pending
+ * xattr was set on a file. Below are some of the exceptions in handling
+ * xattrop:
+ * - Do not filter unregistered xattrs in the cbk, but in the call path.
+ * Else, we will be invalidating on every preop, if the file already has
+ * pending xattr set. Filtering unregistered xattrs on the fop path
+ * ensures we invalidate only in postop, every time a postop comes with
+ * pending xattr value 1.
+ * - Consider a brick is down, and the postop sets pending xattrs as long
+ * as the other brick is down. But we do not want to invalidate every time
+ * a pending xattr is set, but we want to invalidate only the first time
+ * a pending xattr is set on any file. Hence, to identify if its the first
+ * time a pending xattr is set, we compare the value of pending xattrs that
+ * came in postop and postop cbk, if its same then its the first time.
+ */
+static int32_t
+up_xattrop_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, dict_t *dict, dict_t *xdata)
+{
+ client_t *client = NULL;
+ upcall_local_t *local = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ client = frame->root->client;
+ local = frame->local;
+
+ if ((op_ret < 0) || !local) {
+ goto out;
+ }
+
+ if (up_invalidate_needed(local->xattr)) {
+ if (dict_foreach(local->xattr, up_compare_afr_xattr, dict) < 0)
+ goto out;
+
+ upcall_cache_invalidate(frame, this, client, local->inode, UP_XATTR,
+ NULL, NULL, NULL, local->xattr);
+ }
+out:
+ if (frame->root->op == GF_FOP_FXATTROP) {
+ UPCALL_STACK_UNWIND(fxattrop, frame, op_ret, op_errno, dict, xdata);
+ } else {
+ UPCALL_STACK_UNWIND(xattrop, frame, op_ret, op_errno, dict, xdata);
+ }
+ return 0;
+}
+
+static int32_t
+up_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ upcall_local_t *local = NULL;
+ int ret = 0;
+ upcall_private_t *priv = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ local = upcall_local_init(frame, this, loc, NULL, loc->inode, xattr);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = up_filter_xattr(local->xattr, priv->xattrs);
+ if (ret < 0) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->xattrop, loc, optype, xattr, xdata);
+ return 0;
+err:
+ UPCALL_STACK_UNWIND(xattrop, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+static int32_t
+up_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t optype, dict_t *xattr, dict_t *xdata)
+{
+ int32_t op_errno = EINVAL;
+ upcall_local_t *local = NULL;
+ int ret = 0;
+ upcall_private_t *priv = NULL;
+
+ EXIT_IF_UPCALL_OFF(this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ local = upcall_local_init(frame, this, NULL, fd, fd->inode, xattr);
+ if (!local) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = up_filter_xattr(local->xattr, priv->xattrs);
+ if (ret < 0) {
+ goto err;
+ }
+
+out:
+ STACK_WIND(frame, up_xattrop_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->fxattrop, fd, optype, xattr, xdata);
+ return 0;
+err:
+ STACK_UNWIND_STRICT(fxattrop, frame, -1, op_errno, NULL, NULL);
+ return 0;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ int ret = -1;
+
+ if (!this)
+ return ret;
+
+ ret = xlator_mem_acct_init(this, gf_upcall_mt_end + 1);
+
+ if (ret != 0) {
+ gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_NO_MEMORY,
+ "Memory allocation failed");
+ return ret;
+ }
+
+ return ret;
+}
+
+void
+upcall_local_wipe(xlator_t *this, upcall_local_t *local)
+{
+ if (local) {
+ inode_unref(local->inode);
+ if (local->xattr)
+ dict_unref(local->xattr);
+ loc_wipe(&local->rename_oldloc);
+ loc_wipe(&local->loc);
+ if (local->fd)
+ fd_unref(local->fd);
+ mem_put(local);
+ }
+}
+
+upcall_local_t *
+upcall_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ inode_t *inode, dict_t *xattr)
+{
+ upcall_local_t *local = NULL;
+
+ GF_VALIDATE_OR_GOTO("upcall", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, frame, out);
+ GF_VALIDATE_OR_GOTO(this->name, inode, out);
+
+ local = mem_get0(THIS->local_pool);
+
+ if (!local)
+ goto out;
+
+ local->inode = inode_ref(inode);
+ if (xattr)
+ local->xattr = dict_copy_with_ref(xattr, NULL);
+
+ if (loc)
+ loc_copy(&local->loc, loc);
+ if (fd)
+ local->fd = fd_ref(fd);
+
+ frame->local = local;
+
+out:
+ return local;
+}
+
+static int32_t
+update_xattrs(dict_t *dict, char *key, data_t *value, void *data)
+{
+ dict_t *xattrs = data;
+ int ret = 0;
+
+ ret = dict_set_int8(xattrs, key, 0);
+ return ret;
+}
+
+int32_t
+up_ipc(call_frame_t *frame, xlator_t *this, int32_t op, dict_t *xdata)
+{
+ upcall_private_t *priv = NULL;
+ int ret = 0;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ if (op != GF_IPC_TARGET_UPCALL)
+ goto wind;
+
+ /* TODO: Bz-1371622 Along with the xattrs also store list of clients
+ * that are interested in notifications, so that the notification
+ * can be sent to the clients that have registered.
+ * Once this implemented there can be unregister of xattrs for
+ * notifications. Until then there is no unregister of xattrs*/
+ if (xdata && priv->xattrs) {
+ ret = dict_foreach(xdata, update_xattrs, priv->xattrs);
+ }
+
+out:
+ STACK_UNWIND_STRICT(ipc, frame, ret, 0, NULL);
+ return 0;
+
+wind:
+ STACK_WIND(frame, default_ipc_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->ipc, op, xdata);
+ return 0;
+}
+
+int
+reconfigure(xlator_t *this, dict_t *options)
+{
+ upcall_private_t *priv = NULL;
+ int ret = -1;
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ GF_OPTION_RECONF("cache-invalidation", priv->cache_invalidation_enabled,
+ options, bool, out);
+ GF_OPTION_RECONF("cache-invalidation-timeout",
+ priv->cache_invalidation_timeout, options, int32, out);
+
+ ret = 0;
+
+ if (priv->cache_invalidation_enabled && !priv->reaper_init_done) {
+ ret = upcall_reaper_thread_init(this);
+
+ if (ret) {
+ gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_INTERNAL_ERROR,
+ "reaper_thread creation failed (%s)."
+ " Disabling cache_invalidation",
+ strerror(errno));
+ }
+ priv->reaper_init_done = _gf_true;
+ }
+
+out:
+ return ret;
+}
+
+int
+init(xlator_t *this)
+{
+ int ret = -1;
+ upcall_private_t *priv = NULL;
+
+ priv = GF_CALLOC(1, sizeof(*priv), gf_upcall_mt_private_t);
+ if (!priv)
+ goto out;
+
+ priv->xattrs = dict_new();
+ if (!priv->xattrs)
+ goto out;
+
+ GF_OPTION_INIT("cache-invalidation", priv->cache_invalidation_enabled, bool,
+ out);
+ GF_OPTION_INIT("cache-invalidation-timeout",
+ priv->cache_invalidation_timeout, int32, out);
+
+ LOCK_INIT(&priv->inode_ctx_lk);
+ INIT_LIST_HEAD(&priv->inode_ctx_list);
+
+ priv->fini = 0;
+ priv->reaper_init_done = _gf_false;
+
+ this->private = priv;
+ this->local_pool = mem_pool_new(upcall_local_t, 512);
+ ret = 0;
+
+ if (priv->cache_invalidation_enabled) {
+ ret = upcall_reaper_thread_init(this);
+
+ if (ret) {
+ gf_msg("upcall", GF_LOG_WARNING, 0, UPCALL_MSG_INTERNAL_ERROR,
+ "reaper_thread creation failed (%s)."
+ " Disabling cache_invalidation",
+ strerror(errno));
+ }
+ priv->reaper_init_done = _gf_true;
+ }
+out:
+ if (ret && priv) {
+ if (priv->xattrs)
+ dict_unref(priv->xattrs);
+
+ GF_FREE(priv);
+ }
+
+ return ret;
+}
+
+void
+fini(xlator_t *this)
+{
+ upcall_private_t *priv = NULL;
+
+ priv = this->private;
+ if (!priv) {
+ return;
+ }
+ this->private = NULL;
+
+ priv->fini = 1;
+
+ if (priv->reaper_thr) {
+ gf_thread_cleanup_xint(priv->reaper_thr);
+ priv->reaper_thr = 0;
+ priv->reaper_init_done = _gf_false;
+ }
+
+ dict_unref(priv->xattrs);
+ LOCK_DESTROY(&priv->inode_ctx_lk);
+
+ /* Do we need to cleanup the inode_ctxs? IMO not required
+ * as inode_forget would have been done on all the inodes
+ * before calling xlator_fini */
+ GF_FREE(priv);
+
+ if (this->local_pool) {
+ mem_pool_destroy(this->local_pool);
+ this->local_pool = NULL;
+ }
+
+ return;
+}
+
+int
+upcall_forget(xlator_t *this, inode_t *inode)
+{
+ upcall_private_t *priv = this->private;
+
+ if (!priv)
+ goto out;
+
+ upcall_cleanup_inode_ctx(this, inode);
+out:
+ return 0;
+}
+
+int
+upcall_release(xlator_t *this, fd_t *fd)
+{
+ return 0;
+}
+
+int
+notify(xlator_t *this, int32_t event, void *data, ...)
+{
+ int ret = -1;
+ struct gf_upcall *up_req = NULL;
+
+ switch (event) {
+ case GF_EVENT_UPCALL: {
+ gf_log(this->name, GF_LOG_DEBUG, "Upcall Notify event = %d", event);
+
+ up_req = (struct gf_upcall *)data;
+
+ GF_VALIDATE_OR_GOTO(this->name, up_req, out);
+
+ ret = default_notify(this, event, up_req);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, UPCALL_MSG_NOTIFY_FAILED,
+ "Failed to notify cache invalidation"
+ " to client(%s)",
+ up_req->client_uid);
+ goto out;
+ }
+ } break;
+ default:
+ default_notify(this, event, data);
+ break;
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+struct xlator_fops fops = {
+ .ipc = up_ipc,
+ /* fops which change only "ATIME" do not result
+ * in any cache invalidation. Hence upcall
+ * notifications are not sent in this case.
+ * But however, we need to store/update the
+ * client info in the upcall state to be able
+ * to notify them in case of any changes done
+ * to the data.
+ *
+ * Below such fops do not trigger upcall
+ * notifications but will add/update
+ * clients info in the upcall inode ctx.*/
+ .lookup = up_lookup,
+ .open = up_open,
+ .statfs = up_statfs,
+ .opendir = up_opendir,
+ .readdir = up_readdir,
+ .readdirp = up_readdirp,
+ .stat = up_stat,
+ .fstat = up_fstat,
+ .access = up_access,
+ .readlink = up_readlink,
+ .readv = up_readv,
+ .lk = up_lk,
+ .seek = up_seek,
+
+ /* fops doing write */
+ .truncate = up_truncate,
+ .ftruncate = up_ftruncate,
+ .writev = up_writev,
+ .zerofill = up_zerofill,
+ .fallocate = up_fallocate,
+ .discard = up_discard,
+
+ /* fops changing attributes */
+ .fsetattr = up_fsetattr,
+ .setattr = up_setattr,
+
+ /* fops affecting parent dirent */
+ .mknod = up_mknod,
+ .create = up_create,
+ .symlink = up_symlink,
+ .mkdir = up_mkdir,
+
+ /* fops affecting both file and parent
+ * cache entries */
+ .unlink = up_unlink,
+ .link = up_link,
+ .rmdir = up_rmdir,
+ .rename = up_rename,
+
+ .setxattr = up_setxattr,
+ .fsetxattr = up_fsetxattr,
+ .getxattr = up_getxattr,
+ .fgetxattr = up_fgetxattr,
+ .fremovexattr = up_fremovexattr,
+ .removexattr = up_removexattr,
+ .xattrop = up_xattrop,
+ .fxattrop = up_fxattrop,
+
+#ifdef NOT_SUPPORTED
+ /* internal lk fops */
+ .inodelk = up_inodelk,
+ .finodelk = up_finodelk,
+ .entrylk = up_entrylk,
+ .fentrylk = up_fentrylk,
+
+ /* Below fops follow 'WRITE' which
+ * would have already sent upcall
+ * notifications */
+ .flush = up_flush,
+ .fsync = up_fsync,
+ .fsyncdir = up_fsyncdir,
+#endif
+};
+
+struct xlator_cbks cbks = {
+ .forget = upcall_forget,
+ .release = upcall_release,
+};
+
+struct volume_options options[] = {
+ {
+ .key = {"cache-invalidation"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "off",
+ .description = "When \"on\", sends cache-invalidation"
+ " notifications.",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"cache", "cacheconsistency", "upcall"},
+ },
+ {.key = {"cache-invalidation-timeout"},
+ .type = GF_OPTION_TYPE_INT,
+ .default_value = CACHE_INVALIDATION_TIMEOUT,
+ .description = "After 'timeout' seconds since the time"
+ " client accessed any file, cache-invalidation"
+ " notifications are no longer sent to that client.",
+ .op_version = {GD_OP_VERSION_3_7_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_DOC,
+ .tags = {"cache", "cachetimeout", "upcall"}},
+ {.key = {NULL}},
+};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "upcall",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/upcall/src/upcall.h b/xlators/features/upcall/src/upcall.h
new file mode 100644
index 00000000000..aa535088ad7
--- /dev/null
+++ b/xlators/features/upcall/src/upcall.h
@@ -0,0 +1,131 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef __UPCALL_H__
+#define __UPCALL_H__
+
+#include <glusterfs/compat-errno.h>
+#include "upcall-mem-types.h"
+#include <glusterfs/client_t.h>
+#include "upcall-messages.h"
+#include "upcall-cache-invalidation.h"
+#include <glusterfs/upcall-utils.h>
+
+#define EXIT_IF_UPCALL_OFF(this, label) \
+ do { \
+ if (!is_upcall_enabled(this)) \
+ goto label; \
+ } while (0)
+
+#define UPCALL_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ upcall_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ if (frame) { \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ upcall_local_wipe(__xl, __local); \
+ } while (0)
+
+#define UPCALL_STACK_DESTROY(frame) \
+ do { \
+ upcall_local_t *__local = NULL; \
+ xlator_t *__xl = NULL; \
+ __xl = frame->this; \
+ __local = frame->local; \
+ frame->local = NULL; \
+ STACK_DESTROY(frame->root); \
+ upcall_local_wipe(__xl, __local); \
+ } while (0)
+
+struct _upcall_private {
+ gf_boolean_t cache_invalidation_enabled;
+ int32_t cache_invalidation_timeout;
+ struct list_head inode_ctx_list;
+ gf_lock_t inode_ctx_lk;
+ gf_boolean_t reaper_init_done;
+ pthread_t reaper_thr;
+ int32_t fini;
+ dict_t *xattrs; /* list of xattrs registered by clients
+ for receiving invalidation */
+};
+typedef struct _upcall_private upcall_private_t;
+
+struct _upcall_client {
+ struct list_head client_list;
+ /* strdup to store client_uid, strdup. Free it explicitly */
+ char *client_uid;
+ time_t access_time; /* time last accessed */
+ /* the amount of time which client can cache this entry */
+ uint32_t expire_time_attr;
+};
+typedef struct _upcall_client upcall_client_t;
+
+/* Upcall entries are maintained in inode_ctx */
+struct _upcall_inode_ctx {
+ struct list_head inode_ctx_list;
+ struct list_head client_list;
+ pthread_mutex_t client_list_lock; /* mutex for clients list
+ of this upcall entry */
+ int destroy;
+ uuid_t gfid; /* gfid of the entry */
+};
+typedef struct _upcall_inode_ctx upcall_inode_ctx_t;
+
+struct upcall_local {
+ /* XXX: need to check if we can store
+ * pointers in 'local' which may get freed
+ * in future by other thread
+ */
+ inode_t *inode;
+ loc_t rename_oldloc;
+ loc_t loc; /* required for stat in *xattr_cbk */
+ fd_t *fd; /* required for fstat in *xattr_cbk */
+ dict_t *xattr;
+};
+typedef struct upcall_local upcall_local_t;
+
+void
+upcall_local_wipe(xlator_t *this, upcall_local_t *local);
+upcall_local_t *
+upcall_local_init(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ inode_t *inode, dict_t *xattr);
+
+upcall_inode_ctx_t *
+upcall_inode_ctx_get(inode_t *inode, xlator_t *this);
+int
+upcall_cleanup_inode_ctx(xlator_t *this, inode_t *inode);
+
+void *
+upcall_reaper_thread(void *data);
+int
+upcall_reaper_thread_init(xlator_t *this);
+
+/* Xlator options */
+gf_boolean_t
+is_upcall_enabled(xlator_t *this);
+
+/* Cache invalidation specific */
+void
+upcall_cache_invalidate(call_frame_t *frame, xlator_t *this, client_t *client,
+ inode_t *inode, uint32_t flags, struct iatt *stbuf,
+ struct iatt *p_stbuf, struct iatt *oldp_stbuf,
+ dict_t *xattr);
+int
+up_filter_xattr(dict_t *xattr, dict_t *regd_xattrs);
+
+int
+up_compare_afr_xattr(dict_t *d, char *k, data_t *v, void *tmp);
+
+gf_boolean_t
+up_invalidate_needed(dict_t *xattrs);
+#endif /* __UPCALL_H__ */
diff --git a/xlators/features/utime/Makefile.am b/xlators/features/utime/Makefile.am
new file mode 100644
index 00000000000..a985f42a877
--- /dev/null
+++ b/xlators/features/utime/Makefile.am
@@ -0,0 +1,3 @@
+SUBDIRS = src
+
+CLEANFILES =
diff --git a/xlators/features/utime/src/Makefile.am b/xlators/features/utime/src/Makefile.am
new file mode 100644
index 00000000000..7c3adbc2195
--- /dev/null
+++ b/xlators/features/utime/src/Makefile.am
@@ -0,0 +1,41 @@
+xlator_LTLIBRARIES = utime.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/features
+
+UTIME_SRC = $(top_srcdir)/xlators/features/utime/src
+
+utime_sources = $(UTIME_SRC)/utime-helpers.c
+utime_sources += $(UTIME_SRC)/utime.c
+
+utime_la_SOURCES = $(utime_sources)
+nodist_utime_la_SOURCES = utime-autogen-fops.c utime-autogen-fops.h
+BUILT_SOURCES = utime-autogen-fops.h
+
+utime_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
+utime_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS_utime = $(UTIME_SRC)/utime-helpers.h
+noinst_HEADERS_utime += $(UTIME_SRC)/utime.h
+noinst_HEADERS_utime += $(UTIME_SRC)/utime-messages.h
+noinst_HEADERS_utime += $(UTIME_SRC)/utime-mem-types.h
+noinst_HEADERS = $(top_srcdir)/xlators/lib/src/libxlator.h
+noinst_HEADERS += $(noinst_HEADERS_utime)
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(top_srcdir)/xlators/lib/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+noinst_PYTHON = utime-gen-fops-c.py utime-gen-fops-h.py
+EXTRA_DIST = utime-autogen-fops-tmpl.c utime-autogen-fops-tmpl.h
+
+utime-autogen-fops.c: utime-gen-fops-c.py utime-autogen-fops-tmpl.c
+ $(PYTHON) $(UTIME_SRC)/utime-gen-fops-c.py $(UTIME_SRC)/utime-autogen-fops-tmpl.c > $@
+
+utime-autogen-fops.h: utime-gen-fops-h.py utime-autogen-fops-tmpl.h
+ $(PYTHON) $(UTIME_SRC)/utime-gen-fops-h.py $(UTIME_SRC)/utime-autogen-fops-tmpl.h > $@
+
+CLEANFILES = $(nodist_utime_la_SOURCES)
+
+uninstall-local:
+ rm -f $(DESTDIR)$(xlatordir)/utime.so
diff --git a/xlators/features/utime/src/utime-autogen-fops-tmpl.c b/xlators/features/utime/src/utime-autogen-fops-tmpl.c
new file mode 100644
index 00000000000..f2f35322926
--- /dev/null
+++ b/xlators/features/utime/src/utime-autogen-fops-tmpl.c
@@ -0,0 +1,28 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* File: utime-autogen-fops-tmpl.c
+ * This file contains the utime autogenerated FOPs. This is run through
+ * the code generator, generator.py to generate the required FOPs.
+ */
+
+#ifndef _CONFIG_H
+#define _CONFIG_H
+#include "config.h"
+#endif
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/statedump.h>
+#include "utime-helpers.h"
+#include <glusterfs/timespec.h>
+
+#pragma generate
diff --git a/xlators/features/utime/src/utime-autogen-fops-tmpl.h b/xlators/features/utime/src/utime-autogen-fops-tmpl.h
new file mode 100644
index 00000000000..4e102ffed6c
--- /dev/null
+++ b/xlators/features/utime/src/utime-autogen-fops-tmpl.h
@@ -0,0 +1,22 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+/* File: utime-autogen-fops-tmpl.h
+ * This file contains the utime autogenerated FOPs declarations.
+ */
+
+#ifndef _UTIME_AUTOGEN_FOPS_H
+#define _UTIME_AUTOGEN_FOPS_H
+
+#include <glusterfs/xlator.h>
+
+#pragma generate
+
+#endif /* _UTIME_AUTOGEN_FOPS_H */
diff --git a/xlators/features/utime/src/utime-gen-fops-c.py b/xlators/features/utime/src/utime-gen-fops-c.py
new file mode 100755
index 00000000000..9fb3e1b8b1a
--- /dev/null
+++ b/xlators/features/utime/src/utime-gen-fops-c.py
@@ -0,0 +1,147 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+import os
+import sys
+
+curdir = os.path.dirname(sys.argv[0])
+gendir = os.path.join(curdir, '../../../../libglusterfs/src')
+sys.path.append(gendir)
+from generator import ops, fop_subs, cbk_subs, generate
+
+FOPS_COMMON_TEMPLATE = """
+int32_t
+gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ gl_timespec_get(&frame->root->ctime);
+
+ (void) utime_update_attribute_flags(frame, this, GF_FOP_@UPNAME@);
+ STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FOPS_CBK_COMMON_TEMPLATE = """
+int32_t
+gf_utime_@NAME@_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno,
+ @LONG_ARGS@)
+{
+ STACK_UNWIND_STRICT (@NAME@, frame, op_ret, op_errno, @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FOPS_READ_TEMPLATE = """
+int32_t
+gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ gl_timespec_get(&frame->root->ctime);
+
+ (void) utime_update_attribute_flags(frame, this, GF_FOP_READ);
+ STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FOPS_WRITE_TEMPLATE = """
+int32_t
+gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ gl_timespec_get(&frame->root->ctime);
+
+ (void) utime_update_attribute_flags(frame, this, GF_FOP_WRITE);
+ STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FOPS_COPY_FILE_RANGE_TEMPLATE = """
+int32_t
+gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ gl_timespec_get(&frame->root->ctime);
+
+ (void) utime_update_attribute_flags(frame, this, GF_FOP_COPY_FILE_RANGE);
+ STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+FOPS_SETATTR_TEMPLATE = """
+int32_t
+gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@)
+{
+ gl_timespec_get(&frame->root->ctime);
+
+ if (!valid) {
+ frame->root->flags |= MDATA_CTIME;
+ }
+
+ if (valid & (GF_SET_ATTR_UID | GF_SET_ATTR_GID)) {
+ frame->root->flags |= MDATA_CTIME;
+ }
+
+ if (valid & GF_SET_ATTR_MODE) {
+ frame->root->flags |= MDATA_CTIME;
+ }
+
+ if (valid & (GF_SET_ATTR_ATIME | GF_SET_ATTR_MTIME)) {
+ if (valid & GF_ATTR_ATIME_NOW) {
+ frame->root->ctime.tv_sec = stbuf->ia_atime;
+ frame->root->ctime.tv_nsec = stbuf->ia_atime_nsec;
+ } else if (valid & GF_ATTR_MTIME_NOW) {
+ frame->root->ctime.tv_sec = stbuf->ia_mtime;
+ frame->root->ctime.tv_nsec = stbuf->ia_mtime_nsec;
+ }
+ }
+
+ STACK_WIND (frame, gf_utime_@NAME@_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->@NAME@, @SHORT_ARGS@);
+ return 0;
+}
+"""
+
+utime_ops = ['fallocate', 'zerofill', 'opendir', 'mknod', 'mkdir',
+ 'unlink', 'rmdir', 'symlink', 'rename', 'link', 'truncate',
+ 'ftruncate', 'create', 'open', 'removexattr', 'fremovexattr']
+
+utime_read_op = ['readv']
+utime_write_op = ['writev']
+utime_setattr_ops = ['setattr', 'fsetattr']
+utime_copy_file_range_ops = ['copy_file_range']
+
+def gen_defaults():
+ for name in ops:
+ if name in utime_ops:
+ print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs))
+ print(generate(FOPS_COMMON_TEMPLATE, name, fop_subs))
+ if name in utime_read_op:
+ print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs))
+ print(generate(FOPS_READ_TEMPLATE, name, fop_subs))
+ if name in utime_write_op:
+ print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs))
+ print(generate(FOPS_WRITE_TEMPLATE, name, fop_subs))
+ if name in utime_setattr_ops:
+ print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs))
+ print(generate(FOPS_SETATTR_TEMPLATE, name, fop_subs))
+ if name in utime_copy_file_range_ops:
+ print(generate(FOPS_CBK_COMMON_TEMPLATE, name, cbk_subs))
+ print(generate(FOPS_COPY_FILE_RANGE_TEMPLATE, name, fop_subs))
+
+for l in open(sys.argv[1], 'r').readlines():
+ if l.find('#pragma generate') != -1:
+ print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
+ gen_defaults()
+ print("/* END GENERATED CODE */")
+ else:
+ print(l[:-1])
diff --git a/xlators/features/utime/src/utime-gen-fops-h.py b/xlators/features/utime/src/utime-gen-fops-h.py
new file mode 100755
index 00000000000..e96274c229a
--- /dev/null
+++ b/xlators/features/utime/src/utime-gen-fops-h.py
@@ -0,0 +1,35 @@
+#!/usr/bin/python3
+
+from __future__ import print_function
+import os
+import sys
+
+curdir = os.path.dirname(sys.argv[0])
+gendir = os.path.join(curdir, '../../../../libglusterfs/src')
+sys.path.append(gendir)
+from generator import ops, fop_subs, generate
+
+OP_FOP_TEMPLATE = """
+int32_t
+gf_utime_@NAME@ (call_frame_t *frame, xlator_t *this,
+ @LONG_ARGS@);
+"""
+
+utime_ops = ['fallocate', 'zerofill', 'opendir', 'mknod', 'mkdir',
+ 'unlink', 'rmdir', 'symlink', 'rename', 'link', 'truncate',
+ 'ftruncate', 'create', 'open', 'removexattr', 'fremovexattr',
+ 'readv', 'writev', 'setattr', 'fsetattr', 'copy_file_range']
+
+def gen_defaults():
+ for name, value in ops.items():
+ if name in utime_ops:
+ print(generate(OP_FOP_TEMPLATE, name, fop_subs))
+
+
+for l in open(sys.argv[1], 'r').readlines():
+ if l.find('#pragma generate') != -1:
+ print("/* BEGIN GENERATED CODE - DO NOT MODIFY */")
+ gen_defaults()
+ print("/* END GENERATED CODE */")
+ else:
+ print(l[:-1])
diff --git a/xlators/features/utime/src/utime-helpers.c b/xlators/features/utime/src/utime-helpers.c
new file mode 100644
index 00000000000..29d9ad93561
--- /dev/null
+++ b/xlators/features/utime/src/utime-helpers.c
@@ -0,0 +1,110 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "utime-helpers.h"
+#include "utime.h"
+
+void
+gl_timespec_get(struct timespec *ts)
+{
+#ifdef TIME_UTC
+ timespec_get(ts, TIME_UTC);
+#else
+ timespec_now_realtime(ts);
+#endif
+}
+
+void
+utime_update_attribute_flags(call_frame_t *frame, xlator_t *this,
+ glusterfs_fop_t fop)
+{
+ utime_priv_t *utime_priv = NULL;
+
+ if (!frame || !this) {
+ goto out;
+ }
+
+ utime_priv = this->private;
+
+ switch (fop) {
+ case GF_FOP_SETXATTR:
+ case GF_FOP_FSETXATTR:
+ frame->root->flags |= MDATA_CTIME;
+ break;
+
+ case GF_FOP_FALLOCATE:
+ case GF_FOP_ZEROFILL:
+ frame->root->flags |= MDATA_MTIME;
+ frame->root->flags |= MDATA_ATIME;
+ break;
+
+ case GF_FOP_OPENDIR:
+ case GF_FOP_OPEN:
+ case GF_FOP_READ:
+ if (!utime_priv->noatime) {
+ frame->root->flags |= MDATA_ATIME;
+ }
+ break;
+ case GF_FOP_MKNOD:
+ case GF_FOP_MKDIR:
+ case GF_FOP_SYMLINK:
+ case GF_FOP_CREATE:
+ frame->root->flags |= MDATA_ATIME;
+ frame->root->flags |= MDATA_CTIME;
+ frame->root->flags |= MDATA_MTIME;
+ frame->root->flags |= MDATA_PAR_CTIME;
+ frame->root->flags |= MDATA_PAR_MTIME;
+ break;
+
+ case GF_FOP_UNLINK:
+ case GF_FOP_RMDIR:
+ frame->root->flags |= MDATA_CTIME;
+ frame->root->flags |= MDATA_PAR_CTIME;
+ frame->root->flags |= MDATA_PAR_MTIME;
+ break;
+
+ case GF_FOP_WRITE:
+ frame->root->flags |= MDATA_MTIME;
+ frame->root->flags |= MDATA_CTIME;
+ break;
+
+ case GF_FOP_LINK:
+ case GF_FOP_RENAME:
+ frame->root->flags |= MDATA_CTIME;
+ frame->root->flags |= MDATA_PAR_CTIME;
+ frame->root->flags |= MDATA_PAR_MTIME;
+ break;
+
+ case GF_FOP_TRUNCATE:
+ case GF_FOP_FTRUNCATE:
+ frame->root->flags |= MDATA_CTIME;
+ frame->root->flags |= MDATA_MTIME;
+ break;
+
+ case GF_FOP_REMOVEXATTR:
+ case GF_FOP_FREMOVEXATTR:
+ frame->root->flags |= MDATA_CTIME;
+ break;
+
+ case GF_FOP_COPY_FILE_RANGE:
+ /* Below 2 are for destination fd */
+ frame->root->flags |= MDATA_CTIME;
+ frame->root->flags |= MDATA_MTIME;
+ /* Below flag is for the source fd */
+ if (!utime_priv->noatime) {
+ frame->root->flags |= MDATA_ATIME;
+ }
+ break;
+ default:
+ frame->root->flags = 0;
+ }
+out:
+ return;
+}
diff --git a/xlators/features/utime/src/utime-helpers.h b/xlators/features/utime/src/utime-helpers.h
new file mode 100644
index 00000000000..2e32d4bece6
--- /dev/null
+++ b/xlators/features/utime/src/utime-helpers.h
@@ -0,0 +1,25 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _UTIME_HELPERS_H
+#define _UTIME_HELPERS_H
+
+#include <glusterfs/stack.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/timespec.h>
+#include <time.h>
+
+void
+gl_timespec_get(struct timespec *ts);
+void
+utime_update_attribute_flags(call_frame_t *frame, xlator_t *this,
+ glusterfs_fop_t fop);
+
+#endif /* _UTIME_HELPERS_H */
diff --git a/xlators/features/utime/src/utime-mem-types.h b/xlators/features/utime/src/utime-mem-types.h
new file mode 100644
index 00000000000..ad1255f85f3
--- /dev/null
+++ b/xlators/features/utime/src/utime-mem-types.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __UTIME_MEM_TYPES_H__
+#define __UTIME_MEM_TYPES_H__
+
+#include <glusterfs/mem-types.h>
+
+enum gf_utime_mem_types_ {
+ utime_mt_utime_t = gf_common_mt_end + 1,
+ utime_mt_end
+};
+
+#endif /* __UTIME_MEM_TYPES_H__ */
diff --git a/xlators/features/utime/src/utime-messages.h b/xlators/features/utime/src/utime-messages.h
new file mode 100644
index 00000000000..bd40265abaf
--- /dev/null
+++ b/xlators/features/utime/src/utime-messages.h
@@ -0,0 +1,29 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __UTIME_MESSAGES_H__
+#define __UTIME_MESSAGES_H__
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(UTIME, UTIME_MSG_NO_MEMORY, UTIME_MSG_SET_MDATA_FAILED,
+ UTIME_MSG_DICT_SET_FAILED);
+
+#endif /* __UTIME_MESSAGES_H__ */
diff --git a/xlators/features/utime/src/utime.c b/xlators/features/utime/src/utime.c
new file mode 100644
index 00000000000..2acc63e6a05
--- /dev/null
+++ b/xlators/features/utime/src/utime.c
@@ -0,0 +1,392 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "utime.h"
+#include "utime-helpers.h"
+#include "utime-messages.h"
+#include "utime-mem-types.h"
+#include <glusterfs/call-stub.h>
+
+int32_t
+gf_utime_invalidate(xlator_t *this, inode_t *inode)
+{
+ return 0;
+}
+
+int32_t
+gf_utime_forget(xlator_t *this, inode_t *inode)
+{
+ return 0;
+}
+
+int32_t
+gf_utime_client_destroy(xlator_t *this, client_t *client)
+{
+ return 0;
+}
+
+void
+gf_utime_ictxmerge(xlator_t *this, fd_t *fd, inode_t *inode,
+ inode_t *linked_inode)
+{
+ return;
+}
+
+int32_t
+gf_utime_release(xlator_t *this, fd_t *fd)
+{
+ return 0;
+}
+
+int32_t
+gf_utime_releasedir(xlator_t *this, fd_t *fd)
+{
+ return 0;
+}
+
+int32_t
+gf_utime_client_disconnect(xlator_t *this, client_t *client)
+{
+ return 0;
+}
+
+int32_t
+gf_utime_fdctx_to_dict(xlator_t *this, fd_t *fd, dict_t *dict)
+{
+ return 0;
+}
+
+int32_t
+gf_utime_inode(xlator_t *this)
+{
+ return 0;
+}
+
+int32_t
+gf_utime_inode_to_dict(xlator_t *this, dict_t *dict)
+{
+ return 0;
+}
+
+int32_t
+gf_utime_history(xlator_t *this)
+{
+ return 0;
+}
+
+int32_t
+gf_utime_fd(xlator_t *this)
+{
+ return 0;
+}
+
+int32_t
+gf_utime_fd_to_dict(xlator_t *this, dict_t *dict)
+{
+ return 0;
+}
+
+int32_t
+gf_utime_fdctx(xlator_t *this, fd_t *fd)
+{
+ return 0;
+}
+
+int32_t
+gf_utime_inodectx(xlator_t *this, inode_t *ino)
+{
+ return 0;
+}
+
+int32_t
+gf_utime_inodectx_to_dict(xlator_t *this, inode_t *ino, dict_t *dict)
+{
+ return 0;
+}
+
+int32_t
+gf_utime_priv_to_dict(xlator_t *this, dict_t *dict, char *brickname)
+{
+ return 0;
+}
+
+int32_t
+gf_utime_priv(xlator_t *this)
+{
+ return 0;
+}
+
+int32_t
+mem_acct_init(xlator_t *this)
+{
+ if (xlator_mem_acct_init(this, utime_mt_end + 1) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, UTIME_MSG_NO_MEMORY,
+ "Memory accounting initialization failed.");
+ return -1;
+ }
+ return 0;
+}
+
+int32_t
+gf_utime_set_mdata_setxattr_cbk(call_frame_t *frame, void *cookie,
+ xlator_t *this, int op_ret, int op_errno,
+ dict_t *xdata)
+{
+ call_stub_t *stub = frame->local;
+ /* Don't fail lookup if mdata setxattr fails */
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, UTIME_MSG_SET_MDATA_FAILED,
+ "dict set of key for set-ctime-mdata failed");
+ }
+ frame->local = NULL;
+ call_resume(stub);
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+int32_t
+gf_utime_set_mdata_lookup_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int32_t op_ret, int32_t op_errno, inode_t *inode,
+ struct iatt *stbuf, dict_t *xdata,
+ struct iatt *postparent)
+{
+ dict_t *dict = NULL;
+ struct mdata_iatt *mdata = NULL;
+ int ret = 0;
+ loc_t loc = {
+ 0,
+ };
+ call_frame_t *new_frame = NULL;
+
+ if (!op_ret && dict_get(xdata, GF_XATTR_MDATA_KEY) == NULL) {
+ dict = dict_new();
+ if (!dict) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ mdata = GF_MALLOC(sizeof(struct mdata_iatt), gf_common_mt_char);
+ if (mdata == NULL) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+ iatt_to_mdata(mdata, stbuf);
+ ret = dict_set_mdata(dict, CTIME_MDATA_XDATA_KEY, mdata, _gf_false);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, UTIME_MSG_NO_MEMORY,
+ "dict set of key for set-ctime-mdata failed");
+ goto err;
+ }
+ new_frame = copy_frame(frame);
+ if (!new_frame) {
+ op_errno = ENOMEM;
+ goto stub_err;
+ }
+
+ new_frame->local = fop_lookup_cbk_stub(frame, default_lookup_cbk,
+ op_ret, op_errno, inode, stbuf,
+ xdata, postparent);
+ if (!new_frame->local) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, UTIME_MSG_NO_MEMORY,
+ "lookup_cbk stub allocation failed");
+ op_errno = ENOMEM;
+ STACK_DESTROY(new_frame->root);
+ goto stub_err;
+ }
+
+ loc.inode = inode_ref(inode);
+ gf_uuid_copy(loc.gfid, stbuf->ia_gfid);
+
+ new_frame->root->uid = 0;
+ new_frame->root->gid = 0;
+ new_frame->root->pid = GF_CLIENT_PID_SET_UTIME;
+ STACK_WIND(new_frame, gf_utime_set_mdata_setxattr_cbk,
+ FIRST_CHILD(this), FIRST_CHILD(this)->fops->setxattr, &loc,
+ dict, 0, NULL);
+
+ dict_unref(dict);
+ inode_unref(loc.inode);
+ return 0;
+ }
+
+ STACK_UNWIND_STRICT(lookup, frame, op_ret, op_errno, inode, stbuf, xdata,
+ postparent);
+ return 0;
+
+err:
+ if (mdata) {
+ GF_FREE(mdata);
+ }
+stub_err:
+ if (dict) {
+ dict_unref(dict);
+ }
+ STACK_UNWIND_STRICT(lookup, frame, -1, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int
+gf_utime_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
+{
+ int op_errno = EINVAL;
+ int ret = -1;
+
+ VALIDATE_OR_GOTO(frame, err);
+ VALIDATE_OR_GOTO(this, err);
+ VALIDATE_OR_GOTO(loc, err);
+ VALIDATE_OR_GOTO(loc->inode, err);
+
+ xdata = xdata ? dict_ref(xdata) : dict_new();
+ if (!xdata) {
+ op_errno = ENOMEM;
+ goto err;
+ }
+
+ ret = dict_set_int8(xdata, GF_XATTR_MDATA_KEY, 1);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, -ret, UTIME_MSG_DICT_SET_FAILED,
+ "%s: Unable to set dict value for %s", loc->path,
+ GF_XATTR_MDATA_KEY);
+ op_errno = -ret;
+ goto free_dict;
+ }
+
+ STACK_WIND(frame, gf_utime_set_mdata_lookup_cbk, FIRST_CHILD(this),
+ FIRST_CHILD(this)->fops->lookup, loc, xdata);
+ dict_unref(xdata);
+ return 0;
+
+free_dict:
+ dict_unref(xdata);
+err:
+ STACK_UNWIND_STRICT(lookup, frame, ret, op_errno, NULL, NULL, NULL, NULL);
+ return 0;
+}
+
+int32_t
+init(xlator_t *this)
+{
+ utime_priv_t *utime = NULL;
+
+ utime = GF_MALLOC(sizeof(*utime), utime_mt_utime_t);
+ if (utime == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, UTIME_MSG_NO_MEMORY,
+ "Failed to allocate private memory.");
+ return -1;
+ }
+ memset(utime, 0, sizeof(*utime));
+
+ this->private = utime;
+ GF_OPTION_INIT("noatime", utime->noatime, bool, err);
+
+ return 0;
+err:
+ return -1;
+}
+
+void
+fini(xlator_t *this)
+{
+ utime_priv_t *utime = NULL;
+
+ utime = this->private;
+ GF_FREE(utime);
+ return;
+}
+
+int32_t
+reconfigure(xlator_t *this, dict_t *options)
+{
+ utime_priv_t *utime = this->private;
+
+ GF_OPTION_RECONF("noatime", utime->noatime, options, bool, err);
+
+ return 0;
+err:
+ return -1;
+}
+
+int
+notify(xlator_t *this, int event, void *data, ...)
+{
+ return default_notify(this, event, data);
+}
+
+struct xlator_fops fops = {
+ .rename = gf_utime_rename,
+ .mknod = gf_utime_mknod,
+ .readv = gf_utime_readv,
+ .fremovexattr = gf_utime_fremovexattr,
+ .open = gf_utime_open,
+ .create = gf_utime_create,
+ .mkdir = gf_utime_mkdir,
+ .writev = gf_utime_writev,
+ .rmdir = gf_utime_rmdir,
+ .fallocate = gf_utime_fallocate,
+ .truncate = gf_utime_truncate,
+ .symlink = gf_utime_symlink,
+ .zerofill = gf_utime_zerofill,
+ .link = gf_utime_link,
+ .ftruncate = gf_utime_ftruncate,
+ .unlink = gf_utime_unlink,
+ .setattr = gf_utime_setattr,
+ .fsetattr = gf_utime_fsetattr,
+ .opendir = gf_utime_opendir,
+ .removexattr = gf_utime_removexattr,
+ .lookup = gf_utime_lookup,
+};
+struct xlator_cbks cbks = {
+ .invalidate = gf_utime_invalidate,
+ .forget = gf_utime_forget,
+ .client_destroy = gf_utime_client_destroy,
+ .ictxmerge = gf_utime_ictxmerge,
+ .release = gf_utime_release,
+ .releasedir = gf_utime_releasedir,
+ .client_disconnect = gf_utime_client_disconnect,
+};
+struct xlator_dumpops dumpops = {
+ .fdctx_to_dict = gf_utime_fdctx_to_dict,
+ .inode = gf_utime_inode,
+ .inode_to_dict = gf_utime_inode_to_dict,
+ .history = gf_utime_history,
+ .fd = gf_utime_fd,
+ .fd_to_dict = gf_utime_fd_to_dict,
+ .fdctx = gf_utime_fdctx,
+ .inodectx = gf_utime_inodectx,
+ .inodectx_to_dict = gf_utime_inodectx_to_dict,
+ .priv_to_dict = gf_utime_priv_to_dict,
+ .priv = gf_utime_priv,
+};
+
+struct volume_options options[] = {
+ {.key = {"noatime"},
+ .type = GF_OPTION_TYPE_BOOL,
+ .default_value = "on",
+ .op_version = {GD_OP_VERSION_5_0},
+ .flags = OPT_FLAG_SETTABLE | OPT_FLAG_CLIENT_OPT | OPT_FLAG_DOC,
+ .tags = {"ctime"},
+ .description = "Enable/Disable atime updation when ctime feature is "
+ "enabled. When noatime is on, atime is not updated with "
+ "ctime feature enabled and vice versa."},
+ {.key = {NULL}}};
+
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .notify = notify,
+ .reconfigure = reconfigure,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {GD_OP_VERSION_5_0},
+ .dumpops = &dumpops,
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "utime",
+ .category = GF_MAINTAINED,
+};
diff --git a/xlators/features/utime/src/utime.h b/xlators/features/utime/src/utime.h
new file mode 100644
index 00000000000..ba55eec00de
--- /dev/null
+++ b/xlators/features/utime/src/utime.h
@@ -0,0 +1,23 @@
+/*
+ Copyright (c) 2018 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __UTIME_H__
+#define __UTIME_H__
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include "utime-autogen-fops.h"
+
+typedef struct utime_priv {
+ gf_boolean_t noatime;
+} utime_priv_t;
+
+#endif /* __UTIME_H__ */
diff --git a/xlators/lib/src/libxlator.c b/xlators/lib/src/libxlator.c
index 195bd523663..8075fa0c29f 100644
--- a/xlators/lib/src/libxlator.c
+++ b/xlators/lib/src/libxlator.c
@@ -1,369 +1,490 @@
-/*Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#include "mem-types.h"
#include "libxlator.h"
+int marker_xtime_default_gauge[] = {
+ [MCNT_FOUND] = 1, [MCNT_NOTFOUND] = -1, [MCNT_ENODATA] = -1,
+ [MCNT_ENOTCONN] = -1, [MCNT_ENOENT] = -1, [MCNT_EOTHER] = -1,
+};
+
+int marker_uuid_default_gauge[] = {
+ [MCNT_FOUND] = 1, [MCNT_NOTFOUND] = 0, [MCNT_ENODATA] = 0,
+ [MCNT_ENOTCONN] = 0, [MCNT_ENOENT] = 0, [MCNT_EOTHER] = 0,
+};
+
+static int marker_idx_errno_map[] = {
+ [MCNT_FOUND] = EINVAL, [MCNT_NOTFOUND] = EINVAL,
+ [MCNT_ENOENT] = ENOENT, [MCNT_ENOTCONN] = ENOTCONN,
+ [MCNT_ENODATA] = ENODATA, [MCNT_EOTHER] = EINVAL,
+ [MCNT_MAX] = 0,
+};
/*Copy the contents of oldtimebuf to newtimbuf*/
static void
-update_timebuf (uint32_t *oldtimbuf, uint32_t *newtimebuf)
+update_timebuf(uint32_t *oldtimbuf, uint32_t *newtimebuf)
{
- newtimebuf[0] = (oldtimbuf[0]);
- newtimebuf[1] = (oldtimbuf[1]);
+ newtimebuf[0] = (oldtimbuf[0]);
+ newtimebuf[1] = (oldtimbuf[1]);
}
/* Convert Timebuf in network order to host order */
static void
-get_hosttime (uint32_t *oldtimbuf, uint32_t *newtimebuf)
+get_hosttime(uint32_t *oldtimbuf, uint32_t *newtimebuf)
{
- newtimebuf[0] = ntohl (oldtimbuf[0]);
- newtimebuf[1] = ntohl (oldtimbuf[1]);
+ newtimebuf[0] = ntohl(oldtimbuf[0]);
+ newtimebuf[1] = ntohl(oldtimbuf[1]);
}
-
-
/* Match the Incoming trusted.glusterfs.<uuid>.xtime against volume uuid */
int
-match_uuid_local (const char *name, char *uuid)
+match_uuid_local(const char *name, char *uuid)
{
- name = strtail ((char *)name, MARKER_XATTR_PREFIX);
- if (!name || name++[0] != '.')
- return -1;
+ if (!uuid || !*uuid)
+ return -1;
+
+ name = strtail((char *)name, MARKER_XATTR_PREFIX);
+ if (!name || name++ [0] != '.')
+ return -1;
- name = strtail ((char *)name, uuid);
- if (!name || strcmp (name, ".xtime") != 0)
- return -1;
+ name = strtail((char *)name, uuid);
+ if (!name || strcmp(name, ".xtime") != 0)
+ return -1;
- return 0;
+ return 0;
}
static void
-marker_local_incr_errcount (xl_marker_local_t *local, int op_errno)
+marker_local_incr_errcount(xl_marker_local_t *local, int op_errno)
{
- if (!local)
- return;
-
- switch (op_errno) {
- case ENODATA:
- local->enodata_count++;
- break;
- case ENOENT:
- local->enoent_count++;
- break;
- case ENOTCONN:
- local->enotconn_count++;
- break;
- default:
- break;
- }
+ marker_result_idx_t i = -1;
+
+ if (!local)
+ return;
+
+ switch (op_errno) {
+ case ENODATA:
+ i = MCNT_ENODATA;
+ break;
+ case ENOENT:
+ i = MCNT_ENOENT;
+ break;
+ case ENOTCONN:
+ i = MCNT_ENOTCONN;
+ break;
+ default:
+ i = MCNT_EOTHER;
+ break;
+ }
+
+ local->count[i]++;
}
-/* Aggregate all the <volid>.xtime attrs of the cluster and send the max*/
-int32_t
-cluster_markerxtime_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
-
+static int
+evaluate_marker_results(int *gauge, int *count)
{
-
- int32_t callcnt = 0;
- int ret = -1;
- uint32_t *net_timebuf = NULL;
- uint32_t host_timebuf[2] = {0,};
- char *marker_xattr = NULL;
- xl_marker_local_t *local = NULL;
- char *vol_uuid = NULL;
- char need_unwind = 0;
-
- if (!this || !frame || !frame->local || !cookie) {
- gf_log ("", GF_LOG_DEBUG, "possible NULL deref");
- need_unwind = 1;
- goto out;
+ int i = 0;
+ int op_errno = 0;
+ gf_boolean_t sane = _gf_true;
+
+ /* check if the policy of the gauge is violated;
+ * if yes, try to get the best errno, ie. look
+ * for the first position where there is a more
+ * specific kind of vioilation than the generic EINVAL
+ */
+ for (i = 0; i < MCNT_MAX; i++) {
+ if (sane) {
+ if ((gauge[i] > 0 && count[i] < gauge[i]) ||
+ (gauge[i] < 0 && count[i] >= -gauge[i])) {
+ sane = _gf_false;
+ /* generic action: adopt corresponding errno */
+ op_errno = marker_idx_errno_map[i];
+ }
+ } else {
+ /* already insane; trying to get a more informative
+ * errno by checking subsequent counters
+ */
+ if (count[i] > 0)
+ op_errno = marker_idx_errno_map[i];
}
+ if (op_errno && op_errno != EINVAL)
+ break;
+ }
- local = frame->local;
- if (!local || !local->vol_uuid) {
- gf_log (this->name, GF_LOG_DEBUG, "possible NULL deref");
- need_unwind = 1;
- goto out;
- }
-
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
-
- if (local->esomerr)
- goto unlock;
-
- vol_uuid = local->vol_uuid;
-
- if (op_ret) {
- marker_local_incr_errcount (local, op_errno);
- local->esomerr = op_errno;
- goto unlock;
- }
-
- if (!gf_asprintf (&marker_xattr, "%s.%s.%s",
- MARKER_XATTR_PREFIX, vol_uuid, XTIME)) {
- op_errno = ENOMEM;
- goto unlock;
- }
-
-
- if (dict_get_ptr (dict, marker_xattr, (void **)&net_timebuf)) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to get <uuid>.xtime attr");
- local->noxtime_count++;
- goto unlock;
- }
-
- if (local->has_xtime) {
- get_hosttime (net_timebuf, host_timebuf);
- if ( (host_timebuf[0]>local->host_timebuf[0]) ||
- (host_timebuf[0] == local->host_timebuf[0] &&
- host_timebuf[1] >= local->host_timebuf[1])) {
- update_timebuf (net_timebuf, local->net_timebuf);
- update_timebuf (host_timebuf, local->host_timebuf);
- }
-
- } else {
- get_hosttime (net_timebuf, local->host_timebuf);
- update_timebuf (net_timebuf, local->net_timebuf);
- local->has_xtime = _gf_true;
- }
+ return op_errno;
+}
- }
-unlock:
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- op_ret = 0;
- op_errno = 0;
- need_unwind = 1;
-
- if (local->has_xtime) {
- if (!dict)
- dict = dict_new();
-
- ret = dict_set_static_bin (dict, marker_xattr,
- (void *)local->net_timebuf, 8);
- if (ret) {
- op_ret = -1;
- op_errno = ENOMEM;
- goto out;
- }
- }
-
- if (local->noxtime_count)
- goto out;
-
- if (local->enodata_count || local->enotconn_count ||
- local->enoent_count) {
- op_ret = -1;
- op_errno = local->enodata_count? ENODATA:
- local->enotconn_count? ENOTCONN:
- local->enoent_count? ENOENT:
- local->esomerr;
- }
+static void
+cluster_marker_unwind(call_frame_t *frame, char *key, void *value, size_t size,
+ dict_t *dict)
+{
+ xl_marker_local_t *local = frame->local;
+ int ret = 0;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ gf_boolean_t unref = _gf_false;
+
+ frame->local = local->xl_local;
+
+ if (local->count[MCNT_FOUND]) {
+ if (!dict) {
+ dict = dict_new();
+ if (dict) {
+ unref = _gf_true;
+ } else {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
+ }
}
-out:
- if (need_unwind && local && local->xl_specf_unwind) {
- frame->local = local->xl_local;
- local->xl_specf_unwind (frame, op_ret,
- op_errno, dict, xdata);
- } else if (need_unwind) {
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno,
- dict, xdata);
+ ret = dict_set_static_bin(dict, key, value, size);
+ if (ret) {
+ op_ret = -1;
+ op_errno = ENOMEM;
+ goto out;
}
+ }
- if (marker_xattr)
- GF_FREE (marker_xattr);
- return 0;
+ op_errno = evaluate_marker_results(local->gauge, local->count);
+ if (op_errno)
+ op_ret = -1;
+out:
+ if (local->xl_specf_unwind) {
+ local->xl_specf_unwind(frame, op_ret, op_errno, dict, NULL);
+ } else {
+ STACK_UNWIND_STRICT(getxattr, frame, op_ret, op_errno, dict, NULL);
+ }
+
+ GF_FREE(local);
+ if (unref)
+ dict_unref(dict);
}
+/* Aggregate all the <volid>.xtime attrs of the cluster and send the max*/
int32_t
-cluster_markeruuid_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+cluster_markerxtime_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+
{
- int32_t callcnt = 0;
- struct volume_mark *volmark = NULL;
- xl_marker_local_t *local = NULL;
- int32_t ret = -1;
- char need_unwind = 0;
- char *vol_uuid = NULL;
-
- if (!this || !frame || !cookie) {
- gf_log ("", GF_LOG_DEBUG, "possible NULL deref");
- need_unwind = 1;
- goto out;
+ int32_t callcnt = 0;
+ uint32_t *net_timebuf = NULL;
+ uint32_t host_timebuf[2] = {
+ 0,
+ };
+ char marker_xattr[128] = {0};
+ xl_marker_local_t *local = NULL;
+
+ local = frame->local;
+
+ snprintf(marker_xattr, sizeof(marker_xattr), "%s.%s.%s",
+ MARKER_XATTR_PREFIX, local->vol_uuid, XTIME);
+
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+
+ if (op_ret) {
+ marker_local_incr_errcount(local, op_errno);
+ goto unlock;
}
- local = frame->local;
-
- if (!local) {
- gf_log (this->name, GF_LOG_DEBUG, "possible NULL deref");
- need_unwind = 1;
- goto out;
+ if (dict_get_ptr(dict, marker_xattr, (void **)&net_timebuf)) {
+ local->count[MCNT_NOTFOUND]++;
+ UNLOCK(&frame->lock);
+ gf_log(this->name, GF_LOG_WARNING,
+ "Unable to get <uuid>.xtime attr");
+ goto post_unlock;
}
- LOCK (&frame->lock);
- {
- callcnt = --local->call_count;
- vol_uuid = local->vol_uuid;
-
- if (op_ret) {
- marker_local_incr_errcount (local, op_errno);
- goto unlock;
- }
-
- ret = dict_get_bin (dict, GF_XATTR_MARKER_KEY,
- (void *)&volmark);
- if (ret)
- goto unlock;
-
- if (marker_has_volinfo (local)) {
- if ((local->volmark->major != volmark->major) ||
- (local->volmark->minor != volmark->minor)) {
- op_ret = -1;
- op_errno = EINVAL;
- goto unlock;
- }
-
- if (local->retval)
- goto unlock;
- else if (volmark->retval) {
- GF_FREE (local->volmark);
- local->volmark =
- memdup (volmark, sizeof (*volmark));
- local->retval = volmark->retval;
- } else if ((volmark->sec > local->volmark->sec) ||
- ((volmark->sec == local->volmark->sec) &&
- (volmark->usec >= local->volmark->usec))) {
- GF_FREE (local->volmark);
- local->volmark =
- memdup (volmark, sizeof (*volmark));
- }
-
- } else {
- local->volmark = memdup (volmark, sizeof (*volmark));
- VALIDATE_OR_GOTO (local->volmark, unlock);
- uuid_unparse (volmark->uuid, vol_uuid);
- if (volmark->retval)
- local->retval = volmark->retval;
- }
+ if (local->count[MCNT_FOUND]) {
+ get_hosttime(net_timebuf, host_timebuf);
+ if ((host_timebuf[0] > local->host_timebuf[0]) ||
+ (host_timebuf[0] == local->host_timebuf[0] &&
+ host_timebuf[1] >= local->host_timebuf[1])) {
+ update_timebuf(net_timebuf, local->net_timebuf);
+ update_timebuf(host_timebuf, local->host_timebuf);
+ }
+
+ } else {
+ get_hosttime(net_timebuf, local->host_timebuf);
+ update_timebuf(net_timebuf, local->net_timebuf);
+ local->count[MCNT_FOUND]++;
}
+ }
unlock:
- UNLOCK (&frame->lock);
-
- if (!callcnt) {
- op_ret = 0;
- op_errno = 0;
- need_unwind = 1;
-
- if (marker_has_volinfo (local)) {
- if (!dict)
- dict = dict_new();
-
- if (dict_set_bin (dict, GF_XATTR_MARKER_KEY,
- local->volmark,
- sizeof (struct volume_mark))) {
- op_ret = -1;
- op_errno = ENOMEM;
- }
- } else {
- op_ret = -1;
- op_errno = local->enotconn_count? ENOTCONN:
- local->enoent_count? ENOENT:EINVAL;
- }
+ UNLOCK(&frame->lock);
+post_unlock:
+ if (callcnt == 0)
+ cluster_marker_unwind(frame, marker_xattr, local->net_timebuf, 8, dict);
+
+ return 0;
+}
+
+int32_t
+cluster_markeruuid_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata)
+{
+ int32_t callcnt = 0;
+ struct volume_mark *volmark = NULL;
+ xl_marker_local_t *local = NULL;
+ int32_t ret = -1;
+ char *vol_uuid = NULL;
+
+ local = frame->local;
+
+ LOCK(&frame->lock);
+ {
+ callcnt = --local->call_count;
+ vol_uuid = local->vol_uuid;
+
+ if (op_ret) {
+ marker_local_incr_errcount(local, op_errno);
+ goto unlock;
}
- out:
- if (need_unwind && local && local->xl_specf_unwind) {
- frame->local = local->xl_local;
- local->xl_specf_unwind (frame, op_ret,
- op_errno, dict, xdata);
- return 0;
- } else if (need_unwind){
- STACK_UNWIND_STRICT (getxattr, frame, op_ret, op_errno,
- dict, xdata);
+ ret = dict_get_bin(dict, GF_XATTR_MARKER_KEY, (void *)&volmark);
+ if (ret)
+ goto unlock;
+
+ if (local->count[MCNT_FOUND]) {
+ if ((local->volmark->major != volmark->major) ||
+ (local->volmark->minor != volmark->minor)) {
+ op_ret = -1;
+ op_errno = EINVAL;
+ goto unlock;
+ }
+
+ if (local->retval) {
+ goto unlock;
+ } else if (volmark->retval) {
+ GF_FREE(local->volmark);
+ local->volmark = gf_memdup(volmark, sizeof(*volmark));
+ local->retval = volmark->retval;
+ } else if ((volmark->sec > local->volmark->sec) ||
+ ((volmark->sec == local->volmark->sec) &&
+ (volmark->usec >= local->volmark->usec))) {
+ GF_FREE(local->volmark);
+ local->volmark = gf_memdup(volmark, sizeof(*volmark));
+ }
+
+ } else {
+ local->volmark = gf_memdup(volmark, sizeof(*volmark));
+ VALIDATE_OR_GOTO(local->volmark, unlock);
+ gf_uuid_unparse(volmark->uuid, vol_uuid);
+ if (volmark->retval)
+ local->retval = volmark->retval;
+ local->count[MCNT_FOUND]++;
}
- return 0;
+ }
+unlock:
+ UNLOCK(&frame->lock);
+
+ if (callcnt == 0)
+ cluster_marker_unwind(frame, GF_XATTR_MARKER_KEY, local->volmark,
+ sizeof(*local->volmark), dict);
+
+ return 0;
}
+int
+gf_get_min_stime(xlator_t *this, dict_t *dst, char *key, data_t *value)
+{
+ int ret = -1;
+ uint32_t *net_timebuf = NULL;
+ uint32_t *value_timebuf = NULL;
+ uint32_t host_timebuf[2] = {
+ 0,
+ };
+ uint32_t host_value_timebuf[2] = {
+ 0,
+ };
+
+ /* stime should be minimum of all the other nodes */
+ ret = dict_get_bin(dst, key, (void **)&net_timebuf);
+ if (ret < 0) {
+ net_timebuf = GF_CALLOC(1, sizeof(int64_t), gf_common_mt_char);
+ if (!net_timebuf)
+ goto out;
+
+ ret = dict_set_bin(dst, key, net_timebuf, sizeof(int64_t));
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "key=%s: dict set failed", key);
+ goto error;
+ }
+ }
+
+ value_timebuf = data_to_bin(value);
+ if (!value_timebuf) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "key=%s: getting value of stime failed", key);
+ ret = -1;
+ goto out;
+ }
+
+ get_hosttime(value_timebuf, host_value_timebuf);
+ get_hosttime(net_timebuf, host_timebuf);
+
+ /* can't use 'min()' macro here as we need to compare two fields
+ in the array, selectively */
+ if ((host_value_timebuf[0] < host_timebuf[0]) ||
+ ((host_value_timebuf[0] == host_timebuf[0]) &&
+ (host_value_timebuf[1] < host_timebuf[1]))) {
+ update_timebuf(value_timebuf, net_timebuf);
+ }
+
+ ret = 0;
+out:
+ return ret;
+error:
+ /* To be used only when net_timebuf is not set in the dict */
+ if (net_timebuf)
+ GF_FREE(net_timebuf);
-int32_t
-cluster_getmarkerattr (call_frame_t *frame,xlator_t *this, loc_t *loc,
- const char *name, void *xl_local,
- xlator_specf_unwind_t xl_specf_getxattr_unwind,
- xlator_t **sub_volumes, int count, int type,
- char *vol_uuid)
+ return ret;
+}
+
+int
+gf_get_max_stime(xlator_t *this, dict_t *dst, char *key, data_t *value)
{
- int i = 0;
- xl_marker_local_t *local = NULL;
-
- VALIDATE_OR_GOTO (frame, err);
- VALIDATE_OR_GOTO (this, err);
- VALIDATE_OR_GOTO (loc, err);
- VALIDATE_OR_GOTO (loc->path, err);
- VALIDATE_OR_GOTO (loc->inode, err);
- VALIDATE_OR_GOTO (name, err);
- VALIDATE_OR_GOTO (xl_specf_getxattr_unwind, err);
-
- local = GF_CALLOC (sizeof (struct marker_str), 1,
- gf_common_mt_libxl_marker_local);
-
- if (!local)
- goto err;
-
- local->xl_local = xl_local;
- local->call_count = count;
- local->xl_specf_unwind = xl_specf_getxattr_unwind;
- local->vol_uuid = vol_uuid;
-
- frame->local = local;
-
- for (i=0; i < count; i++) {
- if (MARKER_UUID_TYPE == type)
- STACK_WIND (frame, cluster_markeruuid_cbk,
- *(sub_volumes + i),
- (*(sub_volumes + i))->fops->getxattr,
- loc, name, NULL);
- else if (MARKER_XTIME_TYPE == type)
- STACK_WIND (frame, cluster_markerxtime_cbk,
- *(sub_volumes + i),
- (*(sub_volumes + i))->fops->getxattr,
- loc, name, NULL);
- else {
- gf_log (this->name, GF_LOG_WARNING,
- "Unrecognized type (%d) of marker attr "
- "received", type);
- STACK_WIND (frame, default_getxattr_cbk,
- *(sub_volumes + i),
- (*(sub_volumes + i))->fops->getxattr,
- loc, name, NULL);
- break;
- }
+ int ret = -ENOMEM;
+ uint32_t *net_timebuf = NULL;
+ uint32_t *value_timebuf = NULL;
+ uint32_t host_timebuf[2] = {
+ 0,
+ };
+ uint32_t host_value_timebuf[2] = {
+ 0,
+ };
+
+ /* stime should be maximum of all the other nodes */
+ ret = dict_get_bin(dst, key, (void **)&net_timebuf);
+ if (ret < 0) {
+ net_timebuf = GF_CALLOC(1, sizeof(int64_t), gf_common_mt_char);
+ if (!net_timebuf)
+ goto out;
+
+ ret = dict_set_bin(dst, key, net_timebuf, sizeof(int64_t));
+ if (ret < 0) {
+ gf_log(this->name, GF_LOG_WARNING, "key=%s: dict set failed", key);
+ goto error;
}
+ }
+
+ value_timebuf = data_to_bin(value);
+ if (!value_timebuf) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "key=%s: getting value of stime failed", key);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ get_hosttime(value_timebuf, host_value_timebuf);
+ get_hosttime(net_timebuf, host_timebuf);
+
+ /* can't use 'max()' macro here as we need to compare two fields
+ in the array, selectively */
+ if ((host_value_timebuf[0] > host_timebuf[0]) ||
+ ((host_value_timebuf[0] == host_timebuf[0]) &&
+ (host_value_timebuf[1] > host_timebuf[1]))) {
+ update_timebuf(value_timebuf, net_timebuf);
+ }
+
+ ret = 0;
+out:
+ return ret;
+error:
+ /* To be used only when net_timebuf is not set in the dict */
+ if (net_timebuf)
+ GF_FREE(net_timebuf);
- return 0;
-err:
- return -1;
+ return ret;
+}
+static int
+_get_children_count(xlator_t *xl)
+{
+ int i = 0;
+ xlator_list_t *trav = NULL;
+ for (i = 0, trav = xl->children; trav; trav = trav->next, i++) {
+ /*'i' will have the value */
+ }
+
+ return i;
+}
+
+int
+cluster_handle_marker_getxattr(call_frame_t *frame, loc_t *loc,
+ const char *name, char *vol_uuid,
+ xlator_specf_unwind_t unwind,
+ int (*populate_args)(call_frame_t *frame,
+ int type, int *gauge,
+ xlator_t **subvols))
+{
+ xlator_t *this = frame->this;
+ xlator_t **subvols = NULL;
+ int num_subvols = 0;
+ int type = 0;
+ int i = 0;
+ int gauge[MCNT_MAX] = {0};
+ xl_marker_local_t *local = NULL;
+
+ if (GF_CLIENT_PID_GSYNCD != frame->root->pid)
+ return -EINVAL;
+
+ if (name == NULL)
+ return -EINVAL;
+
+ if (strcmp(GF_XATTR_MARKER_KEY, name) == 0) {
+ type = MARKER_UUID_TYPE;
+ memcpy(gauge, marker_uuid_default_gauge, sizeof(gauge));
+ } else if (match_uuid_local(name, vol_uuid) == 0) {
+ type = MARKER_XTIME_TYPE;
+ memcpy(gauge, marker_xtime_default_gauge, sizeof(gauge));
+ } else {
+ return -EINVAL;
+ }
+
+ num_subvols = _get_children_count(this);
+ subvols = alloca(num_subvols * sizeof(*subvols));
+ num_subvols = populate_args(frame, type, gauge, subvols);
+
+ local = GF_CALLOC(sizeof(struct marker_str), 1,
+ gf_common_mt_libxl_marker_local);
+
+ if (!local)
+ goto fail;
+
+ local->xl_local = frame->local;
+ local->call_count = num_subvols;
+ local->xl_specf_unwind = unwind;
+ local->vol_uuid = vol_uuid;
+ memcpy(local->gauge, gauge, sizeof(local->gauge));
+
+ frame->local = local;
+
+ for (i = 0; i < num_subvols; i++) {
+ if (MARKER_UUID_TYPE == type)
+ STACK_WIND(frame, cluster_markeruuid_cbk, subvols[i],
+ subvols[i]->fops->getxattr, loc, name, NULL);
+ else if (MARKER_XTIME_TYPE == type)
+ STACK_WIND(frame, cluster_markerxtime_cbk, subvols[i],
+ subvols[i]->fops->getxattr, loc, name, NULL);
+ }
+
+ return 0;
+fail:
+ if (unwind)
+ unwind(frame, -1, ENOMEM, NULL, NULL);
+ else
+ default_getxattr_failure_cbk(frame, ENOMEM);
+ return 0;
}
diff --git a/xlators/lib/src/libxlator.h b/xlators/lib/src/libxlator.h
index 560e388c75a..81da4060d55 100644
--- a/xlators/lib/src/libxlator.h
+++ b/xlators/lib/src/libxlator.h
@@ -1,113 +1,147 @@
-/*Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+/*
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _LIBXLATOR_H
#define _LIBXLATOR_H
+#include <glusterfs/defaults.h>
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "logging.h"
-#include "defaults.h"
-#include "common-utils.h"
-#include "compat.h"
-#include "compat-errno.h"
-
+#include <stdint.h> // for int32_t
+#include "glusterfs/dict.h" // for dict_t, data_t
+#include "glusterfs/globals.h" // for xlator_t, loc_t
+#include "glusterfs/stack.h" // for call_frame_t
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
#define MARKER_XATTR_PREFIX "trusted.glusterfs"
-#define XTIME "xtime"
-#define VOLUME_MARK "volume-mark"
+#define XTIME "xtime"
+#define VOLUME_MARK "volume-mark"
#define GF_XATTR_MARKER_KEY MARKER_XATTR_PREFIX "." VOLUME_MARK
#define UUID_SIZE 36
-#define MARKER_UUID_TYPE 1
-#define MARKER_XTIME_TYPE 2
-#define GF_XATTR_QUOTA_SIZE_KEY "trusted.glusterfs.quota.size"
-
-
-typedef int32_t (*xlator_specf_unwind_t) (call_frame_t *frame,
- int op_ret, int op_errno,
- dict_t *dict, dict_t *xdata);
+#define MARKER_UUID_TYPE 1
+#define MARKER_XTIME_TYPE 2
+typedef int32_t (*xlator_specf_unwind_t)(call_frame_t *frame, int op_ret,
+ int op_errno, dict_t *dict,
+ dict_t *xdata);
struct volume_mark {
- uint8_t major;
- uint8_t minor;
- uint8_t uuid[16];
- uint8_t retval;
- uint32_t sec;
- uint32_t usec;
-}__attribute__ ((__packed__));
+ uint8_t major;
+ uint8_t minor;
+ uint8_t uuid[16];
+ uint8_t retval;
+ uint32_t sec;
+ uint32_t usec;
+} __attribute__((__packed__));
+
+/*
+ * The enumerated type here
+ * is used to index two kind
+ * of integer arrays:
+ * - gauges
+ * - counters
+
+ * A counter is used internally,
+ * in getxattr callbacks, to count
+ * the results, categorized as
+ * the enum names suggest. So values
+ * in the counter are always non-negative.
+
+ * Gauges are part of the API.
+ * The caller passes one to the
+ * top-level aggregator function,
+ * cluster_getmarkerattr(). The gauge
+ * defines an evaluation policy for the
+ * counter. That is, at the
+ * end of the aggregation process
+ * the gauge is matched against the
+ * counter, and the policy
+ * represented by the gauge decides
+ * whether to return with success or failure,
+ * and in latter case, what particular failure
+ * case (errno).
+
+ * The rules are the following: for some index i,
+ * - if gauge[i] == 0, no requirement is set
+ * against counter[i];
+ * - if gauge[i] > 0, counter[i] >= gauge[i]
+ * is required;
+ * - if gauge[i] < 0, counter[i] < |gauge[i]|
+ * is required.
+
+ * If the requirement is not met, then i is mapped
+ * to the respective errno (MCNT_ENOENT -> ENOENT),
+ * or in lack of that, EINVAL.
+
+ * Cf. evaluate_marker_results() and marker_idx_errno_map[]
+ * in libxlator.c
+
+ * We provide two default gauges, one intended for xtime
+ * aggregation, other for volume mark aggregation. The
+ * policies they represent agree with the hard-coded
+ * one prior to gauges. Cf. marker_xtime_default_gauge
+ * and marker_uuid_default_gauge in libxlator.c
+ */
+
+typedef enum {
+ MCNT_FOUND,
+ MCNT_NOTFOUND,
+ MCNT_ENODATA,
+ MCNT_ENOTCONN,
+ MCNT_ENOENT,
+ MCNT_EOTHER,
+ MCNT_MAX
+} marker_result_idx_t;
+
+extern int marker_xtime_default_gauge[];
+extern int marker_uuid_default_gauge[];
struct marker_str {
- struct volume_mark *volmark;
- data_t *data;
-
- uint32_t host_timebuf[2];
- uint32_t net_timebuf[2];
- int32_t call_count;
- unsigned has_xtime:1;
- int32_t enoent_count;
- int32_t enotconn_count;
- int32_t enodata_count;
- int32_t noxtime_count;
-
- int esomerr;
-
- xlator_specf_unwind_t xl_specf_unwind;
- void *xl_local;
- char *vol_uuid;
- uint8_t retval;
+ struct volume_mark *volmark;
+ data_t *data;
+
+ uint32_t host_timebuf[2];
+ uint32_t net_timebuf[2];
+ int32_t call_count;
+ int gauge[MCNT_MAX];
+ int count[MCNT_MAX];
+
+ xlator_specf_unwind_t xl_specf_unwind;
+ void *xl_local;
+ char *vol_uuid;
+ uint8_t retval;
};
typedef struct marker_str xl_marker_local_t;
-static inline gf_boolean_t
-marker_has_volinfo (xl_marker_local_t *marker)
-{
- if (marker->volmark)
- return _gf_true;
- else
- return _gf_false;
-}
-
int32_t
-cluster_markerxtime_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
- int op_ret, int op_errno, dict_t *dict, dict_t *xdata);
-
-int32_t
-cluster_markeruuid_cbk (call_frame_t *frame, void *cookie, xlator_t *this,
+cluster_markerxtime_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
int op_ret, int op_errno, dict_t *dict, dict_t *xdata);
int32_t
-cluster_getmarkerattr (call_frame_t *frame,xlator_t *this, loc_t *loc,
- const char *name, void *xl_local,
- xlator_specf_unwind_t xl_specf_getxattr_unwind,
- xlator_t **sub_volumes, int count, int type,
- char *vol_uuid);
+cluster_markeruuid_cbk(call_frame_t *frame, void *cookie, xlator_t *this,
+ int op_ret, int op_errno, dict_t *dict, dict_t *xdata);
int
-match_uuid_local (const char *name, char *uuid);
-
+cluster_handle_marker_getxattr(call_frame_t *frame, loc_t *loc,
+ const char *name, char *vol_uuid,
+ xlator_specf_unwind_t unwind,
+ int (*populate_args)(call_frame_t *frame,
+ int type, int *gauge,
+ xlator_t **subvols));
+int
+match_uuid_local(const char *name, char *uuid);
+int
+gf_get_min_stime(xlator_t *this, dict_t *dst, char *key, data_t *value);
+int
+gf_get_max_stime(xlator_t *this, dict_t *dst, char *key, data_t *value);
#endif /* !_LIBXLATOR_H */
diff --git a/xlators/meta/Makefile.am b/xlators/meta/Makefile.am
index e1c45f3051c..af437a64d6d 100644
--- a/xlators/meta/Makefile.am
+++ b/xlators/meta/Makefile.am
@@ -1 +1 @@
-SUBDIRS=src \ No newline at end of file
+SUBDIRS = src
diff --git a/xlators/meta/src/Makefile.am b/xlators/meta/src/Makefile.am
index 385ff553f59..29b871d7984 100644
--- a/xlators/meta/src/Makefile.am
+++ b/xlators/meta/src/Makefile.am
@@ -1,10 +1,44 @@
-xlator_PROGRAMS = meta.so
-xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/
+xlator_LTLIBRARIES = meta.la
+xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator
-meta_so_SOURCES = meta.c tree.c misc.c view.c
-noinst_HEADERS = meta.h tree.h misc.h view.h
+meta_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall \
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles
+meta_la_SOURCES = meta.c meta-helpers.c meta-defaults.c \
+ root-dir.c \
+ graphs-dir.c \
+ frames-file.c \
+ graph-dir.c \
+ active-link.c \
+ xlator-dir.c \
+ top-link.c \
+ logging-dir.c \
+ logfile-link.c \
+ loglevel-file.c \
+ process_uuid-file.c \
+ volfile-file.c \
+ view-dir.c \
+ subvolumes-dir.c \
+ subvolume-link.c \
+ type-file.c \
+ version-file.c \
+ options-dir.c \
+ option-file.c \
+ cmdline-file.c \
+ name-file.c \
+ private-file.c \
+ history-file.c \
+ mallinfo-file.c \
+ meminfo-file.c \
+ measure-file.c \
+ profile-file.c
-CLEANFILES =
+meta_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la
+
+noinst_HEADERS = meta.h meta-hooks.h meta-mem-types.h
+
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src
+
+AM_CFLAGS = -Wall $(GF_CFLAGS)
+
+CLEANFILES =
diff --git a/xlators/meta/src/active-link.c b/xlators/meta/src/active-link.c
new file mode 100644
index 00000000000..7ee780d89e9
--- /dev/null
+++ b/xlators/meta/src/active-link.c
@@ -0,0 +1,34 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+
+static int
+active_link_fill(xlator_t *this, inode_t *inode, strfd_t *strfd)
+{
+ strprintf(strfd, "%s", this->ctx->active->graph_uuid);
+
+ return 0;
+}
+
+struct meta_ops active_link_ops = {.link_fill = active_link_fill};
+
+int
+meta_active_link_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &active_link_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/cmdline-file.c b/xlators/meta/src/cmdline-file.c
new file mode 100644
index 00000000000..eb24e985af9
--- /dev/null
+++ b/xlators/meta/src/cmdline-file.c
@@ -0,0 +1,39 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
+
+static int
+cmdline_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ if (this->ctx->cmdlinestr)
+ strprintf(strfd, "{ \n \"Cmdlinestr\": \"%s\"\n}",
+ this->ctx->cmdlinestr);
+ return strfd->size;
+}
+
+static struct meta_ops cmdline_file_ops = {
+ .file_fill = cmdline_file_fill,
+};
+
+int
+meta_cmdline_file_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &cmdline_file_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/frames-file.c b/xlators/meta/src/frames-file.c
new file mode 100644
index 00000000000..9a13db9a934
--- /dev/null
+++ b/xlators/meta/src/frames-file.c
@@ -0,0 +1,107 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
+
+static int
+frames_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ struct call_pool *pool = NULL;
+ call_stack_t *stack = NULL;
+ call_frame_t *frame = NULL;
+ int i = 0;
+ int j = 1;
+
+ if (!this || !file || !strfd)
+ return -1;
+
+ pool = this->ctx->pool;
+
+ strprintf(strfd, "{ \n\t\"Stack\": [\n");
+
+ LOCK(&pool->lock);
+ {
+ list_for_each_entry(stack, &pool->all_frames, all_frames)
+ {
+ strprintf(strfd, "\t {\n");
+ strprintf(strfd, "\t\t\"Number\": %d,\n", ++i);
+ strprintf(strfd, "\t\t\"Frame\": [\n");
+ j = 1;
+ list_for_each_entry(frame, &stack->myframes, frames)
+ {
+ strprintf(strfd, "\t\t {\n");
+ strprintf(strfd, "\t\t\t\"Number\": %d,\n", j++);
+ strprintf(strfd, "\t\t\t\"Xlator\": \"%s\",\n",
+ frame->this->name);
+ if (frame->begin.tv_sec)
+ strprintf(strfd, "\t\t\t\"Creation_time\": %d.%09d,\n",
+ (int)frame->begin.tv_sec,
+ (int)frame->begin.tv_nsec);
+ strprintf(strfd, " \t\t\t\"Refcount\": %d,\n",
+ frame->ref_count);
+ if (frame->parent)
+ strprintf(strfd, "\t\t\t\"Parent\": \"%s\",\n",
+ frame->parent->this->name);
+ if (frame->wind_from)
+ strprintf(strfd, "\t\t\t\"Wind_from\": \"%s\",\n",
+ frame->wind_from);
+ if (frame->wind_to)
+ strprintf(strfd, "\t\t\t\"Wind_to\": \"%s\",\n",
+ frame->wind_to);
+ if (frame->unwind_from)
+ strprintf(strfd, "\t\t\t\"Unwind_from\": \"%s\",\n",
+ frame->unwind_from);
+ if (frame->unwind_to)
+ strprintf(strfd, "\t\t\t\"Unwind_to\": \"%s\",\n",
+ frame->unwind_to);
+ strprintf(strfd, "\t\t\t\"Complete\": %d\n", frame->complete);
+ if (list_is_last(&frame->frames, &stack->myframes))
+ strprintf(strfd, "\t\t }\n");
+ else
+ strprintf(strfd, "\t\t },\n");
+ }
+ strprintf(strfd, "\t\t],\n");
+ strprintf(strfd, "\t\t\"Unique\": %" PRId64 ",\n", stack->unique);
+ strprintf(strfd, "\t\t\"Type\": \"%s\",\n", gf_fop_list[stack->op]);
+ strprintf(strfd, "\t\t\"UID\": %d,\n", stack->uid);
+ strprintf(strfd, "\t\t\"GID\": %d,\n", stack->gid);
+ strprintf(strfd, "\t\t\"LK_owner\": \"%s\"\n",
+ lkowner_utoa(&stack->lk_owner));
+ if (i == (int)pool->cnt)
+ strprintf(strfd, "\t }\n");
+ else
+ strprintf(strfd, "\t },\n");
+ }
+ strprintf(strfd, "\t],\n");
+ strprintf(strfd, "\t\"Call_Count\": %d\n", (int)pool->cnt);
+ strprintf(strfd, "}");
+ }
+ UNLOCK(&pool->lock);
+
+ return strfd->size;
+}
+
+static struct meta_ops frames_file_ops = {
+ .file_fill = frames_file_fill,
+};
+
+int
+meta_frames_file_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &frames_file_ops);
+ return 0;
+}
diff --git a/xlators/meta/src/graph-dir.c b/xlators/meta/src/graph-dir.c
new file mode 100644
index 00000000000..a8f4787880d
--- /dev/null
+++ b/xlators/meta/src/graph-dir.c
@@ -0,0 +1,98 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+static struct meta_dirent graph_dir_dirents[] = {
+ DOT_DOTDOT,
+
+ {
+ .name = "top",
+ .type = IA_IFLNK,
+ .hook = meta_top_link_hook,
+ },
+ {
+ .name = "volfile",
+ .type = IA_IFREG,
+ .hook = meta_volfile_file_hook,
+ },
+ {.name = NULL}};
+
+static int
+graph_dir_fill(xlator_t *this, inode_t *inode, struct meta_dirent **dp)
+{
+ struct meta_dirent *dirents = NULL;
+ glusterfs_graph_t *graph = NULL;
+ int i = 0;
+ int count = 0;
+ xlator_t *xl = NULL;
+
+ graph = meta_ctx_get(inode, this);
+
+ for (xl = graph->first; xl; xl = xl->next)
+ count++;
+
+ dirents = GF_MALLOC(sizeof(*dirents) * count, gf_meta_mt_dirents_t);
+ if (!dirents)
+ return -1;
+
+ i = 0;
+ for (xl = graph->first; xl; xl = xl->next) {
+ dirents[i].name = gf_strdup(xl->name);
+ dirents[i].type = IA_IFDIR;
+ dirents[i].hook = meta_xlator_dir_hook;
+ i++;
+ }
+
+ *dp = dirents;
+ return i;
+}
+
+struct meta_ops graph_dir_ops = {
+ .fixed_dirents = graph_dir_dirents,
+ .dir_fill = graph_dir_fill,
+};
+
+static glusterfs_graph_t *
+glusterfs_graph_lookup(xlator_t *this, const char *graph_uuid)
+{
+ glusterfs_graph_t *graph = NULL;
+ glusterfs_graph_t *tmp = NULL;
+
+ list_for_each_entry(tmp, &this->ctx->graphs, list)
+ {
+ if (strcmp(graph_uuid, tmp->graph_uuid) == 0) {
+ graph = tmp;
+ break;
+ }
+ }
+
+ return graph;
+}
+
+int
+meta_graph_dir_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ glusterfs_graph_t *graph = NULL;
+
+ graph = glusterfs_graph_lookup(this, loc->name);
+
+ meta_ops_set(loc->inode, this, &graph_dir_ops);
+
+ meta_ctx_set(loc->inode, this, (void *)graph);
+
+ return 0;
+}
diff --git a/xlators/meta/src/graphs-dir.c b/xlators/meta/src/graphs-dir.c
new file mode 100644
index 00000000000..a1ffbca7d5a
--- /dev/null
+++ b/xlators/meta/src/graphs-dir.c
@@ -0,0 +1,67 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+static struct meta_dirent graphs_dir_dirents[] = {
+ DOT_DOTDOT,
+
+ {
+ .name = "active",
+ .type = IA_IFLNK,
+ .hook = meta_active_link_hook,
+ },
+ {.name = NULL}};
+
+static int
+graphs_dir_fill(xlator_t *this, inode_t *dir, struct meta_dirent **dp)
+{
+ glusterfs_graph_t *graph = NULL;
+ int graphs_count = 0;
+ int i = 0;
+ struct meta_dirent *dirents = NULL;
+
+ list_for_each_entry(graph, &this->ctx->graphs, list) { graphs_count++; }
+
+ dirents = GF_CALLOC(sizeof(*dirents), graphs_count + 3,
+ gf_meta_mt_dirents_t);
+ if (!dirents)
+ return -1;
+
+ i = 0;
+ list_for_each_entry(graph, &this->ctx->graphs, list)
+ {
+ dirents[i].name = gf_strdup(graph->graph_uuid);
+ dirents[i].type = IA_IFDIR;
+ dirents[i].hook = meta_graph_dir_hook;
+ i++;
+ }
+
+ *dp = dirents;
+
+ return i;
+}
+
+struct meta_ops graphs_dir_ops = {.fixed_dirents = graphs_dir_dirents,
+ .dir_fill = graphs_dir_fill};
+
+int
+meta_graphs_dir_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &graphs_dir_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/history-file.c b/xlators/meta/src/history-file.c
new file mode 100644
index 00000000000..7742a635fed
--- /dev/null
+++ b/xlators/meta/src/history-file.c
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/statedump.h>
+
+static int
+history_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get(file, this);
+
+ gf_proc_dump_xlator_history(xl, strfd);
+
+ return strfd->size;
+}
+
+static struct meta_ops history_file_ops = {
+ .file_fill = history_file_fill,
+};
+
+int
+meta_history_file_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &history_file_ops);
+
+ meta_ctx_set(loc->inode, this, meta_ctx_get(loc->parent, this));
+
+ return 0;
+}
diff --git a/xlators/meta/src/logfile-link.c b/xlators/meta/src/logfile-link.c
new file mode 100644
index 00000000000..616a54518c0
--- /dev/null
+++ b/xlators/meta/src/logfile-link.c
@@ -0,0 +1,34 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+
+static int
+logfile_link_fill(xlator_t *this, inode_t *inode, strfd_t *strfd)
+{
+ strprintf(strfd, "%s", this->ctx->log.filename);
+
+ return 0;
+}
+
+struct meta_ops logfile_link_ops = {.link_fill = logfile_link_fill};
+
+int
+meta_logfile_link_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &logfile_link_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/logging-dir.c b/xlators/meta/src/logging-dir.c
new file mode 100644
index 00000000000..46e6f9e95dd
--- /dev/null
+++ b/xlators/meta/src/logging-dir.c
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+static struct meta_dirent logging_dir_dirents[] = {
+ DOT_DOTDOT,
+
+ {
+ .name = "logfile",
+ .type = IA_IFLNK,
+ .hook = meta_logfile_link_hook,
+ },
+ {
+ .name = "loglevel",
+ .type = IA_IFREG,
+ .hook = meta_loglevel_file_hook,
+ },
+ {.name = NULL}};
+
+struct meta_ops logging_dir_ops = {
+ .fixed_dirents = logging_dir_dirents,
+};
+
+int
+meta_logging_dir_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &logging_dir_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/loglevel-file.c b/xlators/meta/src/loglevel-file.c
new file mode 100644
index 00000000000..eeeeeaa5907
--- /dev/null
+++ b/xlators/meta/src/loglevel-file.c
@@ -0,0 +1,50 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include <glusterfs/strfd.h>
+
+static int
+loglevel_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ strprintf(strfd, "%d\n", this->ctx->log.loglevel);
+
+ return strfd->size;
+}
+
+static int
+loglevel_file_write(xlator_t *this, fd_t *fd, struct iovec *iov, int count)
+{
+ long int level = -1;
+
+ level = strtol(iov[0].iov_base, NULL, 0);
+ if (level >= GF_LOG_NONE && level <= GF_LOG_TRACE)
+ gf_log_set_loglevel(this->ctx, level);
+
+ return iov_length(iov, count);
+}
+
+static struct meta_ops loglevel_file_ops = {
+ .file_fill = loglevel_file_fill,
+ .file_write = loglevel_file_write,
+};
+
+int
+meta_loglevel_file_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &loglevel_file_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/mallinfo-file.c b/xlators/meta/src/mallinfo-file.c
new file mode 100644
index 00000000000..b4396d72189
--- /dev/null
+++ b/xlators/meta/src/mallinfo-file.c
@@ -0,0 +1,36 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include <glusterfs/statedump.h>
+
+static int
+mallinfo_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ gf_proc_dump_mallinfo(strfd);
+ return strfd->size;
+}
+
+static struct meta_ops mallinfo_file_ops = {
+ .file_fill = mallinfo_file_fill,
+};
+
+int
+meta_mallinfo_file_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &mallinfo_file_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/measure-file.c b/xlators/meta/src/measure-file.c
new file mode 100644
index 00000000000..52e92e48590
--- /dev/null
+++ b/xlators/meta/src/measure-file.c
@@ -0,0 +1,49 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include <glusterfs/strfd.h>
+
+static int
+measure_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ strprintf(strfd, "%d\n", this->ctx->measure_latency);
+
+ return strfd->size;
+}
+
+static int
+measure_file_write(xlator_t *this, fd_t *fd, struct iovec *iov, int count)
+{
+ long int num = -1;
+
+ num = strtol(iov[0].iov_base, NULL, 0);
+ this->ctx->measure_latency = !!num;
+
+ return iov_length(iov, count);
+}
+
+static struct meta_ops measure_file_ops = {
+ .file_fill = measure_file_fill,
+ .file_write = measure_file_write,
+};
+
+int
+meta_measure_file_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &measure_file_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/meminfo-file.c b/xlators/meta/src/meminfo-file.c
new file mode 100644
index 00000000000..d889dfb2ae8
--- /dev/null
+++ b/xlators/meta/src/meminfo-file.c
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/statedump.h>
+
+static int
+meminfo_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get(file, this);
+
+ gf_proc_dump_xlator_meminfo(xl, strfd);
+
+ return strfd->size;
+}
+
+static struct meta_ops meminfo_file_ops = {
+ .file_fill = meminfo_file_fill,
+};
+
+int
+meta_meminfo_file_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &meminfo_file_ops);
+
+ meta_ctx_set(loc->inode, this, meta_ctx_get(loc->parent, this));
+
+ return 0;
+}
diff --git a/xlators/meta/src/meta-defaults.c b/xlators/meta/src/meta-defaults.c
new file mode 100644
index 00000000000..91c328473f8
--- /dev/null
+++ b/xlators/meta/src/meta-defaults.c
@@ -0,0 +1,655 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+
+#include <glusterfs/compat-errno.h>
+
+int
+meta_default_fgetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ return default_fgetxattr_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_fsetxattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ return default_fsetxattr_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_setxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *dict, int32_t flags, dict_t *xdata)
+{
+ return default_setxattr_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_statfs(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ return default_statfs_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t flags, dict_t *xdata)
+{
+ return default_fsyncdir_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
+{
+ META_STACK_UNWIND(opendir, frame, 0, 0, fd, xdata);
+ return 0;
+}
+
+int
+meta_default_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ struct iatt iatt = {};
+
+ meta_iatt_fill(&iatt, fd->inode, fd->inode->ia_type);
+
+ META_STACK_UNWIND(fstat, frame, 0, 0, &iatt, xdata);
+
+ return 0;
+}
+
+int
+meta_default_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
+{
+ return default_fsync_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
+{
+ META_STACK_UNWIND(flush, frame, 0, 0, xdata);
+ return 0;
+}
+
+int
+meta_default_writev(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iovec *vector, int32_t count, off_t off,
+ uint32_t flags, struct iobref *iobref, dict_t *xdata)
+{
+ struct meta_ops *ops = NULL;
+ int ret = 0;
+ struct iatt dummy = {};
+
+ ops = meta_ops_get(fd->inode, this);
+ if (!ops)
+ goto err;
+
+ if (!ops->file_write)
+ goto err;
+
+ ret = ops->file_write(this, fd, vector, count);
+
+ META_STACK_UNWIND(writev, frame, (ret >= 0 ? ret : -1),
+ (ret < 0 ? -ret : 0), &dummy, &dummy, xdata);
+ return 0;
+err:
+ return default_writev_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
+{
+ meta_fd_t *meta_fd = NULL;
+ struct iovec iov = {};
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ off_t copy_offset = 0;
+ int copy_size = 0;
+ struct iatt iatt = {};
+
+ meta_fd = meta_fd_get(fd, this);
+ if (!meta_fd)
+ return default_readv_failure_cbk(frame, ENODATA);
+
+ if (!meta_fd->size)
+ meta_file_fill(this, fd);
+
+ iobuf = iobuf_get2(this->ctx->iobuf_pool, size);
+ if (!iobuf)
+ return default_readv_failure_cbk(frame, ENOMEM);
+
+ iobref = iobref_new();
+ if (!iobref) {
+ iobuf_unref(iobuf);
+ return default_readv_failure_cbk(frame, ENOMEM);
+ }
+
+ if (iobref_add(iobref, iobuf) != 0) {
+ iobref_unref(iobref);
+ iobuf_unref(iobuf);
+ return default_readv_failure_cbk(frame, ENOMEM);
+ }
+
+ iov.iov_base = iobuf_ptr(iobuf);
+
+ /* iobref would have taken a ref */
+ iobuf_unref(iobuf);
+
+ copy_offset = min(meta_fd->size, offset);
+ copy_size = min(size, (meta_fd->size - copy_offset));
+
+ if (copy_size)
+ memcpy(iov.iov_base, meta_fd->data + copy_offset, copy_size);
+ iov.iov_len = copy_size;
+
+ META_STACK_UNWIND(readv, frame, copy_size, 0, &iov, 1, &iatt, iobref, 0);
+
+ iobref_unref(iobref);
+
+ return 0;
+}
+
+int
+meta_default_open(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, fd_t *fd, dict_t *xdata)
+{
+ dict_t *xdata_rsp = NULL;
+
+ xdata_rsp = meta_direct_io_mode(xdata, frame);
+
+ META_STACK_UNWIND(open, frame, 0, 0, fd, xdata_rsp);
+
+ return 0;
+}
+
+int
+meta_default_create(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t flags, mode_t mode, mode_t umask, fd_t *fd,
+ dict_t *xdata)
+{
+ return default_create_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_link(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ return default_link_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_rename(call_frame_t *frame, xlator_t *this, loc_t *oldloc,
+ loc_t *newloc, dict_t *xdata)
+{
+ return default_rename_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_symlink(call_frame_t *frame, xlator_t *this, const char *linkpath,
+ loc_t *loc, mode_t umask, dict_t *xdata)
+{
+ return default_symlink_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_rmdir(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags,
+ dict_t *xdata)
+{
+ return default_rmdir_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_unlink(call_frame_t *frame, xlator_t *this, loc_t *loc, int xflag,
+ dict_t *xdata)
+{
+ return default_unlink_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_mkdir(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ mode_t umask, dict_t *xdata)
+{
+ return default_mkdir_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_mknod(call_frame_t *frame, xlator_t *this, loc_t *loc, mode_t mode,
+ dev_t rdev, mode_t umask, dict_t *xdata)
+{
+ return default_mknod_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ size_t size, dict_t *xdata)
+{
+ struct meta_ops *ops = NULL;
+ strfd_t *strfd = NULL;
+ struct iatt iatt = {};
+ int len = -1;
+
+ ops = meta_ops_get(loc->inode, this);
+ if (!ops || !ops->link_fill) {
+ META_STACK_UNWIND(readlink, frame, -1, EPERM, 0, 0, 0);
+ return 0;
+ }
+
+ strfd = strfd_open();
+ if (!strfd) {
+ META_STACK_UNWIND(readlink, frame, -1, ENOMEM, 0, 0, 0);
+ return 0;
+ }
+
+ ops->link_fill(this, loc->inode, strfd);
+
+ meta_iatt_fill(&iatt, loc->inode, IA_IFLNK);
+
+ if (strfd->data) {
+ len = strlen(strfd->data);
+ META_STACK_UNWIND(readlink, frame, len, 0, strfd->data, &iatt, xdata);
+ } else
+ META_STACK_UNWIND(readlink, frame, -1, ENODATA, 0, 0, 0);
+
+ strfd_close(strfd);
+
+ return 0;
+}
+
+int
+meta_default_access(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ int32_t mask, dict_t *xdata)
+{
+ return default_access_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, dict_t *xdata)
+{
+ struct iatt iatt = {};
+
+ meta_iatt_fill(&iatt, fd->inode, IA_IFREG);
+
+ META_STACK_UNWIND(ftruncate, frame, 0, 0, &iatt, &iatt, xdata);
+
+ return 0;
+}
+
+int
+meta_default_getxattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ return default_getxattr_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_xattrop(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ return default_xattrop_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_fxattrop(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ gf_xattrop_flags_t flags, dict_t *dict, dict_t *xdata)
+{
+ return default_fxattrop_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_removexattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ const char *name, dict_t *xdata)
+{
+ return default_removexattr_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_fremovexattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ const char *name, dict_t *xdata)
+{
+ return default_fremovexattr_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_lk(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t cmd,
+ struct gf_flock *lock, dict_t *xdata)
+{
+ return default_lk_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_inodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, int32_t cmd, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ return default_inodelk_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_finodelk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, int32_t cmd, struct gf_flock *lock,
+ dict_t *xdata)
+{
+ return default_finodelk_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_entrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ loc_t *loc, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ return default_entrylk_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_fentrylk(call_frame_t *frame, xlator_t *this, const char *volume,
+ fd_t *fd, const char *basename, entrylk_cmd cmd,
+ entrylk_type type, dict_t *xdata)
+{
+ return default_fentrylk_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_rchecksum(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, int32_t len, dict_t *xdata)
+{
+ return default_rchecksum_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t off, dict_t *xdata)
+{
+ meta_fd_t *meta_fd = NULL;
+ int i = 0;
+ gf_dirent_t head;
+ gf_dirent_t *list = NULL;
+ int ret = 0;
+ int this_size = 0;
+ int filled_size = 0;
+ int fixed_size = 0;
+ int dyn_size = 0;
+ struct meta_dirent *fixed_dirents = NULL;
+ struct meta_dirent *dyn_dirents = NULL;
+ struct meta_dirent *dirents = NULL;
+ struct meta_dirent *end = NULL;
+ struct meta_ops *ops = NULL;
+
+ INIT_LIST_HEAD(&head.list);
+
+ ops = meta_ops_get(fd->inode, this);
+ if (!ops)
+ goto err;
+
+ meta_fd = meta_fd_get(fd, this);
+ if (!meta_fd)
+ goto err;
+
+ meta_dir_fill(this, fd);
+
+ fixed_dirents = ops->fixed_dirents;
+ fixed_size = fixed_dirents_len(fixed_dirents);
+
+ dyn_dirents = meta_fd->dirents;
+ dyn_size = meta_fd->size;
+
+ for (i = off; i < (fixed_size + dyn_size);) {
+ if (i >= fixed_size) {
+ dirents = dyn_dirents + (i - fixed_size);
+ end = dyn_dirents + dyn_size;
+ } else {
+ dirents = fixed_dirents + i;
+ end = fixed_dirents + fixed_size;
+ }
+
+ while (dirents < end) {
+ this_size = sizeof(gf_dirent_t) + strlen(dirents->name) + 1;
+ if (this_size + filled_size > size)
+ goto unwind;
+
+ list = gf_dirent_for_name(dirents->name);
+ if (!list)
+ break;
+
+ list->d_off = i + 1;
+ list->d_ino = i + 42;
+ switch (dirents->type) {
+ case IA_IFDIR:
+ list->d_type = DT_DIR;
+ break;
+ case IA_IFCHR:
+ list->d_type = DT_CHR;
+ break;
+ case IA_IFBLK:
+ list->d_type = DT_BLK;
+ break;
+ case IA_IFIFO:
+ list->d_type = DT_FIFO;
+ break;
+ case IA_IFLNK:
+ list->d_type = DT_LNK;
+ break;
+ case IA_IFREG:
+ list->d_type = DT_REG;
+ break;
+ case IA_IFSOCK:
+ list->d_type = DT_SOCK;
+ break;
+ case IA_INVAL:
+ list->d_type = DT_UNKNOWN;
+ break;
+ }
+
+ list_add_tail(&list->list, &head.list);
+ ret++;
+ i++;
+ dirents++;
+ filled_size += this_size;
+ }
+ }
+
+unwind:
+ META_STACK_UNWIND(readdir, frame, ret, 0, &head, xdata);
+
+ gf_dirent_free(&head);
+
+ return 0;
+err:
+ META_STACK_UNWIND(readdir, frame, -1, ENOMEM, 0, 0);
+ return 0;
+}
+
+int
+meta_default_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ size_t size, off_t off, dict_t *xdata)
+{
+ return meta_default_readdir(frame, this, fd, size, off, xdata);
+}
+
+int
+meta_default_setattr(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ return default_setattr_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ off_t offset, dict_t *xdata)
+{
+ struct iatt iatt = {};
+
+ meta_iatt_fill(&iatt, loc->inode, IA_IFREG);
+
+ META_STACK_UNWIND(truncate, frame, 0, 0, &iatt, &iatt, xdata);
+
+ return 0;
+}
+
+int
+meta_default_stat(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ struct iatt iatt = {};
+
+ meta_iatt_fill(&iatt, loc->inode, loc->inode->ia_type);
+
+ META_STACK_UNWIND(stat, frame, 0, 0, &iatt, xdata);
+
+ return 0;
+}
+
+int
+meta_default_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ struct meta_ops *ops = NULL;
+ struct meta_dirent *dirent = NULL;
+ struct meta_dirent *dp = NULL;
+ int i = 0;
+ int ret = 0;
+
+ if (!loc->name)
+ return meta_inode_discover(frame, this, loc, xdata);
+
+ ops = meta_ops_get(loc->parent, this);
+ if (!ops)
+ return default_lookup_failure_cbk(frame, EPERM);
+
+ for (dirent = ops->fixed_dirents; dirent && dirent->name; dirent++) {
+ if (strcmp(dirent->name, loc->name) == 0)
+ goto hook;
+ }
+
+ dirent = NULL;
+ if (ops->dir_fill)
+ ret = ops->dir_fill(this, loc->parent, &dp);
+
+ for (i = 0; i < ret; i++) {
+ if (strcmp(dp[i].name, loc->name) == 0) {
+ dirent = &dp[i];
+ goto hook;
+ }
+ }
+hook:
+ if (dirent && dirent->hook) {
+ struct iatt parent = {};
+ struct iatt iatt = {};
+
+ dirent->hook(frame, this, loc, xdata);
+
+ meta_iatt_fill(&iatt, loc->inode, dirent->type);
+
+ META_STACK_UNWIND(lookup, frame, 0, 0, loc->inode, &iatt, xdata,
+ &parent);
+ } else {
+ META_STACK_UNWIND(lookup, frame, -1, ENOENT, 0, 0, 0, 0);
+ }
+
+ for (i = 0; i < ret; i++)
+ GF_FREE((void *)dp[i].name);
+ GF_FREE(dp);
+
+ return 0;
+}
+
+int
+meta_default_fsetattr(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ struct iatt *stbuf, int32_t valid, dict_t *xdata)
+{
+ return default_fsetattr_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_fallocate(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ int32_t keep_size, off_t offset, size_t len,
+ dict_t *xdata)
+{
+ return default_fallocate_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_discard(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, size_t len, dict_t *xdata)
+{
+ return default_discard_failure_cbk(frame, EPERM);
+}
+
+int
+meta_default_zerofill(call_frame_t *frame, xlator_t *this, fd_t *fd,
+ off_t offset, off_t len, dict_t *xdata)
+{
+ return default_zerofill_failure_cbk(frame, EPERM);
+}
+
+#define SET_META_DEFAULT_FOP(f, name) \
+ do { \
+ if (!f->name) \
+ f->name = meta_default_##name; \
+ } while (0)
+
+struct xlator_fops *
+meta_defaults_init(struct xlator_fops *fops)
+{
+ SET_META_DEFAULT_FOP(fops, create);
+ SET_META_DEFAULT_FOP(fops, open);
+ SET_META_DEFAULT_FOP(fops, stat);
+ SET_META_DEFAULT_FOP(fops, readlink);
+ SET_META_DEFAULT_FOP(fops, mknod);
+ SET_META_DEFAULT_FOP(fops, mkdir);
+ SET_META_DEFAULT_FOP(fops, unlink);
+ SET_META_DEFAULT_FOP(fops, rmdir);
+ SET_META_DEFAULT_FOP(fops, symlink);
+ SET_META_DEFAULT_FOP(fops, rename);
+ SET_META_DEFAULT_FOP(fops, link);
+ SET_META_DEFAULT_FOP(fops, truncate);
+ SET_META_DEFAULT_FOP(fops, readv);
+ SET_META_DEFAULT_FOP(fops, writev);
+ SET_META_DEFAULT_FOP(fops, statfs);
+ SET_META_DEFAULT_FOP(fops, flush);
+ SET_META_DEFAULT_FOP(fops, fsync);
+ SET_META_DEFAULT_FOP(fops, setxattr);
+ SET_META_DEFAULT_FOP(fops, getxattr);
+ SET_META_DEFAULT_FOP(fops, fsetxattr);
+ SET_META_DEFAULT_FOP(fops, fgetxattr);
+ SET_META_DEFAULT_FOP(fops, removexattr);
+ SET_META_DEFAULT_FOP(fops, fremovexattr);
+ SET_META_DEFAULT_FOP(fops, opendir);
+ SET_META_DEFAULT_FOP(fops, readdir);
+ SET_META_DEFAULT_FOP(fops, readdirp);
+ SET_META_DEFAULT_FOP(fops, fsyncdir);
+ SET_META_DEFAULT_FOP(fops, access);
+ SET_META_DEFAULT_FOP(fops, ftruncate);
+ SET_META_DEFAULT_FOP(fops, fstat);
+ SET_META_DEFAULT_FOP(fops, lk);
+ SET_META_DEFAULT_FOP(fops, inodelk);
+ SET_META_DEFAULT_FOP(fops, finodelk);
+ SET_META_DEFAULT_FOP(fops, entrylk);
+ SET_META_DEFAULT_FOP(fops, fentrylk);
+ SET_META_DEFAULT_FOP(fops, lookup);
+ SET_META_DEFAULT_FOP(fops, rchecksum);
+ SET_META_DEFAULT_FOP(fops, xattrop);
+ SET_META_DEFAULT_FOP(fops, fxattrop);
+ SET_META_DEFAULT_FOP(fops, setattr);
+ SET_META_DEFAULT_FOP(fops, fsetattr);
+ SET_META_DEFAULT_FOP(fops, fallocate);
+ SET_META_DEFAULT_FOP(fops, discard);
+ SET_META_DEFAULT_FOP(fops, zerofill);
+
+ return fops;
+}
diff --git a/xlators/meta/src/meta-helpers.c b/xlators/meta/src/meta-helpers.c
new file mode 100644
index 00000000000..cb54f547468
--- /dev/null
+++ b/xlators/meta/src/meta-helpers.c
@@ -0,0 +1,332 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+
+meta_fd_t *
+meta_fd_get(fd_t *fd, xlator_t *this)
+{
+ uint64_t value = 0;
+ meta_fd_t *meta_fd = NULL;
+
+ LOCK(&fd->lock);
+ {
+ if (__fd_ctx_get(fd, this, &value) < 0) {
+ if (!value) {
+ meta_fd = GF_CALLOC(1, sizeof(*meta_fd), gf_meta_mt_fd_t);
+ if (!meta_fd)
+ goto unlock;
+ value = (long)meta_fd;
+ __fd_ctx_set(fd, this, value);
+ }
+ } else {
+ meta_fd = (void *)(uintptr_t)value;
+ }
+ }
+unlock:
+ UNLOCK(&fd->lock);
+
+ return meta_fd;
+}
+
+int
+meta_fd_release(fd_t *fd, xlator_t *this)
+{
+ uint64_t value = 0;
+ meta_fd_t *meta_fd = NULL;
+ int i = 0;
+
+ fd_ctx_get(fd, this, &value);
+ meta_fd = (void *)(uintptr_t)value;
+
+ if (meta_fd && meta_fd->dirents) {
+ for (i = 0; i < meta_fd->size; i++)
+ GF_FREE((void *)meta_fd->dirents[i].name);
+ GF_FREE(meta_fd->dirents);
+ }
+
+ if (meta_fd) {
+ GF_FREE(meta_fd->data);
+ GF_FREE(meta_fd);
+ }
+ return 0;
+}
+
+struct meta_ops *
+meta_ops_get(inode_t *inode, xlator_t *this)
+{
+ struct meta_ops *ops = NULL;
+ uint64_t value = 0;
+
+ inode_ctx_get2(inode, this, NULL, &value);
+
+ ops = (void *)(uintptr_t)value;
+
+ return ops;
+}
+
+struct xlator_fops *
+meta_fops_get(inode_t *inode, xlator_t *this)
+{
+ struct meta_ops *ops = NULL;
+
+ ops = meta_ops_get(inode, this);
+ if (!ops)
+ return default_fops;
+
+ return &ops->fops;
+}
+
+int
+meta_ops_set(inode_t *inode, xlator_t *this, struct meta_ops *ops)
+{
+ uint64_t value = 0;
+ int ret = 0;
+
+ meta_defaults_init(&ops->fops);
+
+ value = (long)ops;
+
+ ret = inode_ctx_set2(inode, this, NULL, &value);
+
+ return ret;
+}
+
+void *
+meta_ctx_get(inode_t *inode, xlator_t *this)
+{
+ void *ctx = NULL;
+ uint64_t value = 0;
+
+ inode_ctx_get2(inode, this, &value, 0);
+
+ ctx = (void *)(uintptr_t)value;
+
+ return ctx;
+}
+
+int
+meta_ctx_set(inode_t *inode, xlator_t *this, void *ctx)
+{
+ uint64_t value = 0;
+ int ret = 0;
+
+ value = (long)ctx;
+
+ ret = inode_ctx_set2(inode, this, &value, 0);
+
+ return ret;
+}
+
+void
+meta_local_cleanup(meta_local_t *local, xlator_t *this)
+{
+ if (!local)
+ return;
+
+ if (local->xdata)
+ dict_unref(local->xdata);
+
+ GF_FREE(local);
+ return;
+}
+
+meta_local_t *
+meta_local(call_frame_t *frame)
+{
+ meta_local_t *local = NULL;
+
+ local = frame->local;
+ if (!local)
+ local = frame->local = GF_CALLOC(1, sizeof(*local), gf_meta_mt_local_t);
+ return local;
+}
+
+dict_t *
+meta_direct_io_mode(dict_t *xdata, call_frame_t *frame)
+{
+ meta_local_t *local = NULL;
+
+ if (!xdata) {
+ local = meta_local(frame);
+ if (!local)
+ return NULL;
+ xdata = local->xdata = dict_new();
+ if (!xdata)
+ return NULL;
+ }
+
+ if (dict_set_int8(xdata, "direct-io-mode", 1) != 0)
+ return NULL;
+
+ return xdata;
+}
+
+static void
+meta_uuid_copy(uuid_t dst, uuid_t src)
+{
+ gf_uuid_copy(dst, src);
+ if (gf_uuid_is_null(dst))
+ gf_uuid_generate(dst);
+}
+
+static void
+default_meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type,
+ gf_boolean_t is_tunable)
+{
+ struct timeval tv = {};
+
+ iatt->ia_type = type;
+ switch (type) {
+ case IA_IFDIR:
+ iatt->ia_prot = ia_prot_from_st_mode(0555);
+ iatt->ia_nlink = 2;
+ break;
+ case IA_IFLNK:
+ iatt->ia_prot = ia_prot_from_st_mode(0777);
+ iatt->ia_nlink = 1;
+ break;
+ default:
+ iatt->ia_prot = ia_prot_from_st_mode(is_tunable ? 0644 : 0444);
+ iatt->ia_nlink = 1;
+ break;
+ }
+ iatt->ia_uid = 0;
+ iatt->ia_gid = 0;
+ iatt->ia_size = 0;
+
+ meta_uuid_copy(iatt->ia_gfid, inode->gfid);
+ iatt->ia_ino = gfid_to_ino(iatt->ia_gfid);
+
+ gettimeofday(&tv, 0);
+ iatt->ia_mtime = iatt->ia_ctime = iatt->ia_atime = tv.tv_sec;
+ iatt->ia_mtime_nsec = iatt->ia_ctime_nsec = iatt->ia_atime_nsec =
+ (tv.tv_usec * 1000);
+ return;
+}
+
+void
+meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type)
+{
+ struct meta_ops *ops = NULL;
+
+ ops = meta_ops_get(inode, THIS);
+ if (!ops)
+ return;
+
+ if (!ops->iatt_fill)
+ default_meta_iatt_fill(iatt, inode, type, !!ops->file_write);
+ else
+ ops->iatt_fill(THIS, inode, iatt);
+ return;
+}
+
+int
+meta_inode_discover(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ struct iatt iatt = {};
+ struct iatt postparent = {};
+
+ meta_iatt_fill(&iatt, loc->inode, loc->inode->ia_type);
+
+ META_STACK_UNWIND(lookup, frame, 0, 0, loc->inode, &iatt, xdata,
+ &postparent);
+ return 0;
+}
+
+int
+meta_file_fill(xlator_t *this, fd_t *fd)
+{
+ meta_fd_t *meta_fd = NULL;
+ strfd_t *strfd = NULL;
+ struct meta_ops *ops = NULL;
+ int ret = 0;
+
+ meta_fd = meta_fd_get(fd, this);
+ if (!meta_fd)
+ return -1;
+
+ if (meta_fd->data)
+ return meta_fd->size;
+
+ strfd = strfd_open();
+ if (!strfd)
+ return -1;
+
+ ops = meta_ops_get(fd->inode, this);
+ if (!ops) {
+ strfd_close(strfd);
+ return -1;
+ }
+
+ if (ops->file_fill)
+ ret = ops->file_fill(this, fd->inode, strfd);
+
+ if (ret >= 0) {
+ meta_fd->data = strfd->data;
+ meta_fd->size = strfd->size;
+
+ strfd->data = NULL;
+ }
+
+ strfd_close(strfd);
+
+ return meta_fd->size;
+}
+
+int
+meta_dir_fill(xlator_t *this, fd_t *fd)
+{
+ meta_fd_t *meta_fd = NULL;
+ struct meta_ops *ops = NULL;
+ struct meta_dirent *dp = NULL;
+ int ret = 0;
+
+ meta_fd = meta_fd_get(fd, this);
+ if (!meta_fd)
+ return -1;
+
+ if (meta_fd->dirents)
+ return meta_fd->size;
+
+ ops = meta_ops_get(fd->inode, this);
+ if (!ops)
+ return -1;
+
+ if (ops->dir_fill)
+ ret = ops->dir_fill(this, fd->inode, &dp);
+
+ if (dp) {
+ meta_fd->dirents = dp;
+ meta_fd->size = ret;
+ }
+
+ return meta_fd->size;
+}
+
+int
+fixed_dirents_len(struct meta_dirent *dirents)
+{
+ int i = 0;
+ struct meta_dirent *dirent = NULL;
+
+ if (!dirents)
+ return 0;
+
+ for (dirent = dirents; dirent->name; dirent++)
+ i++;
+
+ return i;
+}
diff --git a/xlators/meta/src/meta-hooks.h b/xlators/meta/src/meta-hooks.h
new file mode 100644
index 00000000000..7208641398a
--- /dev/null
+++ b/xlators/meta/src/meta-hooks.h
@@ -0,0 +1,48 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef __META_HOOKS_H
+#define __META_HOOKS_H
+#include <glusterfs/xlator.h>
+
+#define DECLARE_HOOK(name) \
+ int meta_##name##_hook(call_frame_t *frame, xlator_t *this, loc_t *loc, \
+ dict_t *xdata)
+
+DECLARE_HOOK(root_dir);
+DECLARE_HOOK(graphs_dir);
+DECLARE_HOOK(frames_file);
+DECLARE_HOOK(graph_dir);
+DECLARE_HOOK(active_link);
+DECLARE_HOOK(xlator_dir);
+DECLARE_HOOK(top_link);
+DECLARE_HOOK(logging_dir);
+DECLARE_HOOK(logfile_link);
+DECLARE_HOOK(loglevel_file);
+DECLARE_HOOK(process_uuid_file);
+DECLARE_HOOK(volfile_file);
+DECLARE_HOOK(view_dir);
+DECLARE_HOOK(subvolumes_dir);
+DECLARE_HOOK(subvolume_link);
+DECLARE_HOOK(type_file);
+DECLARE_HOOK(version_file);
+DECLARE_HOOK(options_dir);
+DECLARE_HOOK(option_file);
+DECLARE_HOOK(cmdline_file);
+DECLARE_HOOK(name_file);
+DECLARE_HOOK(private_file);
+DECLARE_HOOK(mallinfo_file);
+DECLARE_HOOK(history_file);
+DECLARE_HOOK(master_dir);
+DECLARE_HOOK(meminfo_file);
+DECLARE_HOOK(measure_file);
+DECLARE_HOOK(profile_file);
+
+#endif
diff --git a/xlators/meta/src/meta-mem-types.h b/xlators/meta/src/meta-mem-types.h
index 9585b783837..033c306682f 100644
--- a/xlators/meta/src/meta-mem-types.h
+++ b/xlators/meta/src/meta-mem-types.h
@@ -1,35 +1,25 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __META_MEM_TYPES_H__
#define __META_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
enum gf_meta_mem_types_ {
- gf_meta_mt__open_local = gf_common_mt_end + 1,
- gf_meta_mt_dir_entry_t,
- gf_meta_mt_meta_dirent_t,
- gf_meta_mt_meta_private_t,
- gf_meta_mt_stat,
- gf_meta_mt_end
+ gf_meta_mt_priv_t = gf_common_mt_end + 1,
+ gf_meta_mt_fd_t,
+ gf_meta_mt_fd_data_t,
+ gf_meta_mt_strfd_t,
+ gf_meta_mt_dirents_t,
+ gf_meta_mt_local_t,
+ gf_meta_mt_end
};
#endif
-
diff --git a/xlators/meta/src/meta.c b/xlators/meta/src/meta.c
index 412b4a2b584..e1b9a2b6581 100644
--- a/xlators/meta/src/meta.c
+++ b/xlators/meta/src/meta.c
@@ -1,1307 +1,276 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "dict.h"
-#include "xlator.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
-#include "meta.h"
-#include "view.h"
#include "meta-mem-types.h"
+#include "meta.h"
-int32_t
-meta_getattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-meta_getattr (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
-
- if (file) {
- if (file->fops && file->fops->getattr) {
- STACK_WIND (frame, meta_getattr_cbk,
- this, file->fops->getattr, path);
- return 0;
- }
- else {
- STACK_UNWIND (frame, 0, 0, file->stbuf);
- return 0;
- }
- }
- else {
- STACK_WIND (frame, meta_getattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getattr,
- path);
- return 0;
- }
-}
-
-int32_t
-meta_chmod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-
-int32_t
-meta_chmod (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- mode_t mode)
-{
- STACK_WIND (frame,
- meta_chmod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chmod,
- path,
- mode);
- return 0;
-}
-
-int32_t
-meta_chown_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-
-int32_t
-meta_chown (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- uid_t uid,
- gid_t gid)
-{
- STACK_WIND (frame,
- meta_chown_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->chown,
- path,
- uid,
- gid);
- return 0;
-}
-
-
-int32_t
-meta_truncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-
-int32_t
-meta_truncate (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- off_t offset)
-{
- STACK_WIND (frame,
- meta_truncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->truncate,
- path,
- offset);
- return 0;
-}
-
-
-int32_t
-meta_ftruncate_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-
-int32_t
-meta_ftruncate (call_frame_t *frame,
- xlator_t *this,
- dict_t *fd,
- off_t offset)
-{
- STACK_WIND (frame,
- meta_ftruncate_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->ftruncate,
- fd,
- offset);
- return 0;
-}
-
-
-int32_t
-meta_utimes_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-
-int32_t
-meta_utimes (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- struct timespec *buf)
-{
- STACK_WIND (frame,
- meta_utimes_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->utimes,
- path,
- buf);
- return 0;
-}
-
-
-int32_t
-meta_access_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
-
-int32_t
-meta_access (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- mode_t mode)
-{
- STACK_WIND (frame,
- meta_access_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->access,
- path,
- mode);
- return 0;
-}
-
-int32_t
-meta_readlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- char *dest)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- dest);
- return 0;
-}
-
-int32_t
-meta_readlink (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- size_t size)
-{
- STACK_WIND (frame,
- meta_readlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->readlink,
- path,
- size);
- return 0;
-}
-
-int32_t
-meta_mknod_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-
-int32_t
-meta_mknod (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- mode_t mode,
- dev_t dev)
-{
- STACK_WIND (frame,
- meta_mknod_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mknod,
- path,
- mode,
- dev);
- return 0;
-}
-
-int32_t
-meta_mkdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-
-int32_t
-meta_mkdir (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- mode_t mode)
-{
- STACK_WIND (frame,
- meta_mkdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->mkdir,
- path,
- mode);
- return 0;
-}
-
-int32_t
-meta_unlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
-
-int32_t
-meta_unlink (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- STACK_WIND (frame,
- meta_unlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->unlink,
- path);
- return 0;
-}
-
-int32_t
-meta_rmdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
-
-int32_t
-meta_rmdir (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- STACK_WIND (frame,
- meta_rmdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rmdir,
- path);
- return 0;
-}
-
-int32_t
-meta_symlink_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
-
-int32_t
-meta_symlink (call_frame_t *frame,
- xlator_t *this,
- const char *oldpath,
- const char *newpath)
-{
- STACK_WIND (frame,
- meta_symlink_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->symlink,
- oldpath,
- newpath);
- return 0;
-}
+#include "meta-hooks.h"
-int32_t
-meta_rename_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+meta_lookup(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
+ inode_t *inode = NULL;
-int32_t
-meta_rename (call_frame_t *frame,
- xlator_t *this,
- const char *oldpath,
- const char *newpath)
-{
- STACK_WIND (frame,
- meta_rename_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->rename,
- oldpath,
- newpath);
- return 0;
-}
+ if (META_HOOK(loc) || IS_META_ROOT_GFID(loc->gfid)) {
+ struct iatt iatt = {};
+ struct iatt parent = {};
-int32_t
-meta_link_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
+ meta_root_dir_hook(frame, this, loc, xdata);
-int32_t
-meta_link (call_frame_t *frame,
- xlator_t *this,
- const char *oldpath,
- const char *newpath)
-{
- STACK_WIND (frame,
- meta_link_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->link,
- oldpath,
- newpath);
- return 0;
-}
+ meta_iatt_fill(&iatt, loc->inode, IA_IFDIR);
+ gf_uuid_parse(META_ROOT_GFID, iatt.ia_gfid);
-struct _open_local {
- const char *path;
-};
+ META_STACK_UNWIND(lookup, frame, 0, 0, loc->inode, &iatt, xdata,
+ &parent);
+ return 0;
+ }
-int32_t
-meta_open_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dict_t *ctx, struct stat *buf)
-{
- struct _open_local *local = frame->local;
- if (local)
- dict_set (ctx, this->name, str_to_data (local->path));
- STACK_UNWIND (frame, op_ret, op_errno, ctx, buf);
- return 0;
-}
+ if (loc->parent)
+ inode = loc->parent;
+ else
+ inode = loc->inode;
-int32_t
-meta_open (call_frame_t *frame, xlator_t *this,
- const char *path, int32_t flags, mode_t mode)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
+ META_FOP(inode, lookup, frame, this, loc, xdata);
- if (file) {
- if (file->fops && file->fops->open) {
- struct _open_local *local = GF_CALLOC (1, sizeof (struct _open_local), gf_meta_mt__open_local);
- ERR_ABORT (local);
- local->path = gf_strdup (path);
- frame->local = local;
- STACK_WIND (frame, meta_open_cbk,
- this, file->fops->open,
- path, flags, mode);
- return 0;
- }
- else {
- dict_t *ctx = get_new_dict ();
- dict_ref (ctx);
- dict_set (ctx, this->name, str_to_data (gf_strdup (path)));
- STACK_UNWIND (frame, 0, 0, ctx, file->stbuf);
- return 0;
- }
- }
- else {
- STACK_WIND (frame, meta_open_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->open,
- path, flags, mode);
return 0;
- }
}
-int32_t
-meta_create (call_frame_t *frame, xlator_t *this,
- const char *path, int32_t flags, mode_t mode)
+int
+meta_opendir(call_frame_t *frame, xlator_t *this, loc_t *loc, fd_t *fd,
+ dict_t *xdata)
{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
+ META_FOP(fd->inode, opendir, frame, this, loc, fd, xdata);
- if (file) {
- if (file->fops && file->fops->create) {
- struct _open_local *local = GF_CALLOC (1, sizeof (struct _open_local), gf_meta_mt__open_local);
- ERR_ABORT (local);
- local->path = gf_strdup (path);
- frame->local = local;
- STACK_WIND (frame, meta_open_cbk,
- this, file->fops->create,
- path, flags, mode);
- return 0;
- }
- else {
- STACK_UNWIND (frame, -1, 0, NULL, NULL);
- return 0;
- }
- }
- else {
- STACK_WIND (frame, meta_open_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->create,
- path, flags, mode);
return 0;
- }
}
-int32_t
-meta_readv_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct iovec *vector,
- int32_t count)
+int
+meta_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int flags, fd_t *fd,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- vector,
- count);
- return 0;
-}
-
-int32_t
-meta_readv (call_frame_t *frame,
- xlator_t *this,
- dict_t *fd,
- size_t size,
- off_t offset)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- data_t *path_data = dict_get (fd, this->name);
+ META_FOP(fd->inode, open, frame, this, loc, flags, fd, xdata);
- if (path_data) {
- const char *path = data_to_str (path_data);
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
-
- if (file && file->fops && file->fops->readv) {
- STACK_WIND (frame, meta_readv_cbk,
- this, file->fops->readv,
- fd, size, offset);
- return 0;
- }
- }
- else {
- STACK_WIND (frame, meta_readv_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readv,
- fd, size, offset);
return 0;
- }
}
-int32_t
-meta_writev_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno)
+int
+meta_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, uint32_t flags, dict_t *xdata)
{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
+ META_FOP(fd->inode, readv, frame, this, fd, size, offset, flags, xdata);
-int32_t
-meta_writev (call_frame_t *frame, xlator_t *this,
- dict_t *fd,
- struct iovec *vector, int32_t count, off_t offset)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- data_t *path_data = dict_get (fd, this->name);
-
- if (path_data) {
- const char *path = data_to_str (path_data);
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
-
- if (file && file->fops && file->fops->writev) {
- STACK_WIND (frame, meta_writev_cbk,
- this, file->fops->writev,
- fd, vector, count, offset);
- return 0;
- }
- }
- else {
- STACK_WIND (frame, meta_readv_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->writev,
- fd, vector, count, offset);
return 0;
- }
}
-int32_t
-meta_flush_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+meta_flush(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
+ META_FOP(fd->inode, flush, frame, this, fd, xdata);
-int32_t
-meta_flush (call_frame_t *frame,
- xlator_t *this,
- dict_t *fd)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- data_t *path_data = dict_get (fd, this->name);
-
- if (path_data) {
- const char *path = data_to_str (path_data);
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
-
- if (file) {
- if (file->fops && file->fops->flush) {
- STACK_WIND (frame, meta_flush_cbk,
- this, file->fops->flush,
- fd);
- return 0;
- }
- else {
- STACK_UNWIND (frame, 0, 0);
- return 0;
- }
- }
- }
- else {
- STACK_WIND (frame, meta_flush_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->flush,
- fd);
return 0;
- }
-}
-
-int32_t
-meta_release_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
}
-int32_t
-meta_release (call_frame_t *frame,
- xlator_t *this,
- dict_t *fd)
+int
+meta_stat(call_frame_t *frame, xlator_t *this, loc_t *loc, dict_t *xdata)
{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- data_t *path_data = dict_get (fd, this->name);
+ META_FOP(loc->inode, stat, frame, this, loc, xdata);
- if (path_data) {
- const char *path = data_to_str (path_data);
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
-
- if (file) {
- dict_unref (fd);
- STACK_UNWIND (frame, 0, 0);
- return 0;
- }
- }
- else {
- STACK_WIND (frame, meta_release_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->release,
- fd);
return 0;
- }
}
-int32_t
-meta_fsync_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+meta_fstat(call_frame_t *frame, xlator_t *this, fd_t *fd, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
+ META_FOP(fd->inode, fstat, frame, this, fd, xdata);
-int32_t
-meta_fsync (call_frame_t *frame,
- xlator_t *this,
- dict_t *fd,
- int32_t flags)
-{
- STACK_WIND (frame,
- meta_fsync_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsync,
- fd,
- flags);
- return 0;
+ return 0;
}
-int32_t
-meta_fgetattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct stat *buf)
+int
+meta_readdir(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
+ META_FOP(fd->inode, readdir, frame, this, fd, size, offset, xdata);
-int32_t
-meta_fgetattr (call_frame_t *frame,
- xlator_t *this,
- dict_t *fd)
-{
- STACK_WIND (frame,
- meta_fgetattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fgetattr,
- fd);
- return 0;
+ return 0;
}
-int32_t
-meta_opendir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dict_t *fd)
+int
+meta_readdirp(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size,
+ off_t offset, dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- fd);
- return 0;
-}
+ META_FOP(fd->inode, readdirp, frame, this, fd, size, offset, xdata);
-int32_t
-meta_opendir (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- meta_dirent_t *dir = lookup_meta_entry (root, path, NULL);
-
- if (dir) {
- dict_t *ctx = get_new_dict ();
- dict_set (ctx, this->name, str_to_data (gf_strdup (path)));
- STACK_UNWIND (frame, 0, 0, ctx);
- return 0;
- }
- else {
- STACK_WIND (frame, meta_opendir_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->opendir,
- path);
return 0;
- }
}
-int32_t
-meta_readdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- dir_entry_t *entries,
- int32_t count)
+int
+meta_readlink(call_frame_t *frame, xlator_t *this, loc_t *loc, size_t size,
+ dict_t *xdata)
{
- meta_private_t *priv = (meta_private_t *)this->private;
-
- if ((int) cookie == 1) {
- dir_entry_t *dir = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_meta_mt_dir_entry_t);
- ERR_ABORT (dir);
+ META_FOP(loc->inode, readlink, frame, this, loc, size, xdata);
- dir->name = gf_strdup (".meta");
- memcpy (&dir->buf, priv->tree->stbuf, sizeof (struct stat));
- dir->next = entries->next;
- entries->next = dir;
-
- STACK_UNWIND (frame, op_ret, op_errno, entries, count+1);
return 0;
- }
-
- STACK_UNWIND (frame, op_ret, op_errno, entries, count);
- return 0;
}
-int32_t
-meta_readdir (call_frame_t *frame,
- xlator_t *this,
- const char *path)
+int
+meta_writev(call_frame_t *frame, xlator_t *this, fd_t *fd, struct iovec *iov,
+ int count, off_t offset, uint32_t flags, struct iobref *iobref,
+ dict_t *xdata)
{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
-
- meta_dirent_t *dir = lookup_meta_entry (root, path, NULL);
- if (dir) {
- if (dir->fops && dir->fops->readdir) {
- STACK_WIND (frame, meta_readdir_cbk,
- this, dir->fops->readdir, path);
- return 0;
- }
- else {
- int count = 0;
- dir = dir->children;
- dir_entry_t *entries = NULL;
-
- while (dir) {
- dir_entry_t *d = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_meta_mt_dir_entry_t);
- ERR_ABORT (d);
- d->name = dir->name;
- d->buf = *dir->stbuf;
- d->next = entries;
- entries = d;
- count++;
- dir = dir->next;
- }
-
- dir_entry_t *header = GF_CALLOC (1, sizeof (dir_entry_t),
- gf_meta_mt_dir_entry_t);
- ERR_ABORT (header);
- header->next = entries;
- STACK_UNWIND (frame, 0, 0, header, count);
- return 0;
- }
- }
- else {
- if (!strcmp (path, "/")) {
- STACK_WIND_COOKIE (frame, meta_readdir_cbk,
- (int) 1, /* cookie to tell _cbk to add .meta entry */
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir,
- path);
- }
- else {
- STACK_WIND (frame, meta_readdir_cbk,
- FIRST_CHILD(this), FIRST_CHILD(this)->fops->readdir,
- path);
- }
- }
- return 0;
+ META_FOP(fd->inode, writev, frame, this, fd, iov, count, offset, flags,
+ iobref, xdata);
+ return 0;
}
-int32_t
-meta_releasedir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+meta_truncate(call_frame_t *frame, xlator_t *this, loc_t *loc, off_t offset,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
+ META_FOP(loc->inode, truncate, frame, this, loc, offset, xdata);
-int32_t
-meta_releasedir (call_frame_t *frame,
- xlator_t *this,
- dict_t *fd)
-{
- STACK_WIND (frame,
- meta_releasedir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->releasedir,
- fd);
- return 0;
+ return 0;
}
-int32_t
-meta_fsyncdir_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+int
+meta_ftruncate(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
+ META_FOP(fd->inode, ftruncate, frame, this, fd, offset, xdata);
-int32_t
-meta_fsyncdir (call_frame_t *frame,
- xlator_t *this,
- dict_t *fd,
- int32_t flags)
-{
- STACK_WIND (frame,
- meta_fsyncdir_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->fsyncdir,
- fd,
- flags);
- return 0;
+ return 0;
}
int32_t
-meta_statfs_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct statvfs *buf)
+meta_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- buf);
- return 0;
-}
+ META_FOP(fd->inode, fsync, frame, this, fd, flags, xdata);
-int32_t
-meta_statfs (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- STACK_WIND (frame,
- meta_statfs_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->statfs,
- path);
- return 0;
+ return 0;
}
int32_t
-meta_setxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
+meta_fsyncdir(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t flags,
+ dict_t *xdata)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
+ META_FOP(fd->inode, fsyncdir, frame, this, fd, flags, xdata);
-int32_t
-meta_setxattr (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- const char *name,
- const char *value,
- size_t size,
- int32_t flags)
-{
- STACK_WIND (frame,
- meta_setxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->setxattr,
- path,
- name,
- value,
- size,
- flags);
- return 0;
+ return 0;
}
-int32_t
-meta_getxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- char *value)
+int
+meta_forget(xlator_t *this, inode_t *inode)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- value);
- return 0;
+ return 0;
}
-int32_t
-meta_getxattr (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- const char *name,
- size_t size)
+int
+meta_release(xlator_t *this, fd_t *fd)
{
- STACK_WIND (frame,
- meta_getxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->getxattr,
- path,
- name,
- size);
- return 0;
+ return meta_fd_release(fd, this);
}
-int32_t
-meta_listxattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- char *value)
+int
+meta_releasedir(xlator_t *this, fd_t *fd)
{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- value);
- return 0;
+ return meta_fd_release(fd, this);
}
-int32_t
-meta_listxattr (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- size_t size)
+int
+mem_acct_init(xlator_t *this)
{
- STACK_WIND (frame,
- meta_listxattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->listxattr,
- path,
- size);
- return 0;
-}
+ int ret = -1;
-int32_t
-meta_removexattr_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno);
- return 0;
-}
+ if (!this)
+ return ret;
-int32_t
-meta_removexattr (call_frame_t *frame,
- xlator_t *this,
- const char *path,
- const char *name)
-{
- STACK_WIND (frame,
- meta_removexattr_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->removexattr,
- path,
- name);
- return 0;
-}
+ ret = xlator_mem_acct_init(this, gf_meta_mt_end + 1);
-int32_t
-meta_lk_cbk (call_frame_t *frame,
- void *cookie,
- xlator_t *this,
- int32_t op_ret,
- int32_t op_errno,
- struct gf_flock *lock)
-{
- STACK_UNWIND (frame,
- op_ret,
- op_errno,
- lock);
- return 0;
-}
+ if (ret != 0) {
+ gf_log(this->name, GF_LOG_ERROR, "Memory accounting init failed");
+ return ret;
+ }
-int32_t
-meta_lk (call_frame_t *frame,
- xlator_t *this,
- dict_t *file,
- int32_t cmd,
- struct gf_flock *lock)
-{
- STACK_WIND (frame,
- meta_lk_cbk,
- FIRST_CHILD(this),
- FIRST_CHILD(this)->fops->lk,
- file,
- cmd,
- lock);
- return 0;
+ return ret;
}
-static void
-add_xlator_to_tree (meta_dirent_t *tree, xlator_t *this,
- const char *prefix)
+int
+init(xlator_t *this)
{
- char *dir;
- gf_asprintf (&dir, "%s/%s", prefix, this->name);
-
- char *children;
- gf_asprintf (&children, "%s/%s", dir, "subvolumes");
-
- char *type;
- gf_asprintf (&type, "%s/%s", dir, "type");
-
- char *view;
- gf_asprintf (&view, "%s/%s", dir, "view");
-
- insert_meta_entry (tree, dir, S_IFDIR, NULL, NULL);
- insert_meta_entry (tree, children, S_IFDIR, NULL, NULL);
- meta_dirent_t *v = insert_meta_entry (tree, view, S_IFDIR, NULL,
- &meta_xlator_view_fops);
- v->view_xlator = this;
- meta_dirent_t *t = insert_meta_entry (tree, type, S_IFREG, NULL,
- &meta_xlator_type_fops);
- t->view_xlator = this;
-
- xlator_list_t *trav = this->children;
- while (trav) {
- add_xlator_to_tree (tree, trav->xlator, children);
- trav = trav->next;
- }
-}
+ meta_priv_t *priv = NULL;
+ int ret = -1;
-static void
-build_meta_tree (xlator_t *this)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- priv->tree = GF_CALLOC (1, sizeof (meta_dirent_t),
- gf_meta_mt_meta_dirent_t);
- ERR_ABORT (priv->tree);
- priv->tree->name = gf_strdup (".meta");
- priv->tree->stbuf = new_stbuf ();
- priv->tree->stbuf->st_mode = S_IFDIR | S_IRUSR | S_IRGRP | S_IROTH |
- S_IXUSR | S_IXGRP | S_IXOTH;
+ priv = GF_CALLOC(sizeof(*priv), 1, gf_meta_mt_priv_t);
+ if (!priv)
+ return ret;
- insert_meta_entry (priv->tree, "/.meta/version",
- S_IFREG, NULL, &meta_version_fops);
+ GF_OPTION_INIT("meta-dir-name", priv->meta_dir_name, str, out);
- insert_meta_entry (priv->tree, "/.meta/xlators",
- S_IFDIR, NULL, NULL);
+ this->private = priv;
+ ret = 0;
+out:
+ if (ret)
+ GF_FREE(priv);
- xlator_list_t *trav = this->children;
- while (trav) {
- add_xlator_to_tree (priv->tree, trav->xlator, "/.meta/xlators");
- trav = trav->next;
- }
+ return ret;
}
-int32_t
-mem_acct_init (xlator_t *this)
+void
+fini(xlator_t *this)
{
- int ret = -1;
-
- if (!this)
- return ret;
-
- ret = xlator_mem_acct_init (this, gf_meta_mt_end + 1);
-
- if (ret != 0) {
- gf_log(this->name, GF_LOG_ERROR, "Memory accounting init"
- "failed");
- return ret;
- }
-
- return ret;
+ GF_FREE(this->private);
+ return;
}
-int32_t
-init (xlator_t *this)
-{
- if (this->parent != NULL) {
- gf_log ("meta", GF_LOG_ERROR, "FATAL: meta should be the root of the xlator tree");
- return -1;
- }
-
- meta_private_t *priv = GF_CALLOC (1, sizeof (meta_private_t),
- gf_meta_mt_meta_private_t);
- ERR_ABORT (priv);
-
- data_t *directory = dict_get (this->options, "directory");
- if (directory) {
- priv->directory = gf_strdup (data_to_str (directory));
- }
- else {
- priv->directory = ".meta";
- }
-
- this->private = priv;
- build_meta_tree (this);
+struct xlator_fops fops = {.lookup = meta_lookup,
+ .opendir = meta_opendir,
+ .open = meta_open,
+ .readv = meta_readv,
+ .flush = meta_flush,
+ .stat = meta_stat,
+ .fstat = meta_fstat,
+ .readdir = meta_readdir,
+ .readdirp = meta_readdirp,
+ .readlink = meta_readlink,
+ .writev = meta_writev,
+ .truncate = meta_truncate,
+ .ftruncate = meta_ftruncate,
+ .fsync = meta_fsync,
+ .fsyncdir = meta_fsyncdir};
- return 0;
-}
+struct xlator_cbks cbks = {
+ .forget = meta_forget,
+ .release = meta_release,
+ .releasedir = meta_releasedir,
+};
-int32_t
-fini (xlator_t *this)
-{
- return 0;
-}
+struct volume_options options[] = {
+ {.key = {"meta-dir-name"},
+ .type = GF_OPTION_TYPE_STR,
+ .default_value = DEFAULT_META_DIR_NAME,
+ .description = "Name of default meta directory."},
+ {.key = {NULL}},
+};
-struct xlator_fops fops = {
- .getattr = meta_getattr,
- .readlink = meta_readlink,
- .mknod = meta_mknod,
- .mkdir = meta_mkdir,
- .unlink = meta_unlink,
- .rmdir = meta_rmdir,
- .symlink = meta_symlink,
- .rename = meta_rename,
- .link = meta_link,
- .chmod = meta_chmod,
- .chown = meta_chown,
- .truncate = meta_truncate,
- .utimes = meta_utimes,
- .open = meta_open,
- .readv = meta_readv,
- .writev = meta_writev,
- .statfs = meta_statfs,
- .flush = meta_flush,
- .release = meta_release,
- .fsync = meta_fsync,
- .setxattr = meta_setxattr,
- .getxattr = meta_getxattr,
- .listxattr = meta_listxattr,
- .removexattr = meta_removexattr,
- .opendir = meta_opendir,
- .readdir = meta_readdir,
- .releasedir = meta_releasedir,
- .fsyncdir = meta_fsyncdir,
- .access = meta_access,
- .ftruncate = meta_ftruncate,
- .fgetattr = meta_fgetattr,
- .create = meta_create,
- .lk = meta_lk,
+xlator_api_t xlator_api = {
+ .init = init,
+ .fini = fini,
+ .mem_acct_init = mem_acct_init,
+ .op_version = {1}, /* Present from the initial version */
+ .fops = &fops,
+ .cbks = &cbks,
+ .options = options,
+ .identifier = "meta",
+ .category = GF_TECH_PREVIEW,
};
diff --git a/xlators/meta/src/meta.h b/xlators/meta/src/meta.h
index 5636487c985..7f0cf28808a 100644
--- a/xlators/meta/src/meta.h
+++ b/xlators/meta/src/meta.h
@@ -1,48 +1,140 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __META_H__
#define __META_H__
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-struct _meta_dirent {
- const char *name;
- int type;
- struct _meta_dirent *children;
- struct _meta_dirent *parent;
- struct _meta_dirent *next;
- struct stat *stbuf;
- xlator_t *view_xlator;
- struct xlator_fops *fops;
+#include <glusterfs/strfd.h>
+
+#define DEFAULT_META_DIR_NAME ".meta"
+
+#define META_ROOT_GFID "ba926388-bb9c-4eec-ad60-79dba4cc083a"
+
+#define IS_META_ROOT_GFID(g) (strcmp(uuid_utoa(g), META_ROOT_GFID) == 0)
+
+typedef int (*meta_hook_t)(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+typedef struct {
+ dict_t *xdata;
+} meta_local_t;
+
+typedef struct {
+ char *meta_dir_name;
+} meta_priv_t;
+
+struct meta_dirent {
+ const char *name;
+ ia_type_t type;
+ meta_hook_t hook;
+};
+
+#define DOT_DOTDOT \
+ {.name = ".", .type = IA_IFDIR}, { .name = "..", .type = IA_IFDIR }
+
+struct meta_ops {
+ struct meta_dirent *fixed_dirents;
+ int (*dir_fill)(xlator_t *this, inode_t *dir, struct meta_dirent **entries);
+ int (*file_fill)(xlator_t *this, inode_t *file, strfd_t *strfd);
+ int (*iatt_fill)(xlator_t *this, inode_t *inode, struct iatt *iatt);
+ int (*link_fill)(xlator_t *this, inode_t *inode, strfd_t *strfd);
+ int (*file_write)(xlator_t *this, fd_t *fd, struct iovec *iov, int count);
+ struct xlator_fops fops;
+ struct xlator_cbks cbks;
};
-typedef struct _meta_dirent meta_dirent_t;
typedef struct {
- const char *directory;
- meta_dirent_t *tree;
-} meta_private_t;
+ char *data;
+ struct meta_dirent *dirents;
+ size_t size;
+} meta_fd_t;
+
+#define COUNT(arr) (sizeof(arr) / sizeof(arr[0]))
+
+#define META_HOOK(loc) \
+ (__is_root_gfid(loc->pargfid) && \
+ !strcmp(loc->name, META_PRIV(THIS)->meta_dir_name))
+
+#define META_PRIV(t) ((meta_priv_t *)(t->private))
+
+#define META_STACK_UNWIND(fop, frame, params...) \
+ do { \
+ meta_local_t *__local = NULL; \
+ xlator_t *__this = NULL; \
+ if (frame) { \
+ __local = frame->local; \
+ __this = frame->this; \
+ frame->local = NULL; \
+ } \
+ STACK_UNWIND_STRICT(fop, frame, params); \
+ if (__local) { \
+ meta_local_cleanup(__local, __this); \
+ } \
+ } while (0)
+
+#define META_FOP(i, fop, fr, t, params...) \
+ { \
+ struct xlator_fops *_fops = NULL; \
+ \
+ _fops = meta_fops_get(i, t); \
+ \
+ _fops->fop(fr, t, params); \
+ } \
+ while (0)
+
+void
+meta_iatt_fill(struct iatt *iatt, inode_t *inode, ia_type_t type);
+
+int
+meta_inode_discover(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata);
+
+int
+meta_ops_set(inode_t *inode, xlator_t *this, struct meta_ops *ops);
+
+struct xlator_fops *
+meta_fops_get(inode_t *inode, xlator_t *this);
+struct xlator_cbks *
+meta_cbks_get(inode_t *inode, xlator_t *this);
+struct meta_ops *
+meta_ops_get(inode_t *inode, xlator_t *this);
+
+int
+meta_ctx_set(inode_t *inode, xlator_t *this, void *ctx);
+
+void *
+meta_ctx_get(inode_t *inode, xlator_t *this);
+
+void
+meta_local_cleanup(meta_local_t *local, xlator_t *this);
+
+struct xlator_fops *
+meta_defaults_init(struct xlator_fops *fops);
+
+meta_fd_t *
+meta_fd_get(fd_t *fd, xlator_t *this);
+
+int
+meta_fd_release(fd_t *fd, xlator_t *this);
+
+dict_t *
+meta_direct_io_mode(dict_t *xdata, call_frame_t *frame);
+
+meta_local_t *
+meta_local(call_frame_t *frame);
+
+int
+meta_file_fill(xlator_t *this, fd_t *fd);
-#include "tree.h"
-#include "misc.h"
+int
+meta_dir_fill(xlator_t *this, fd_t *fd);
+int
+fixed_dirents_len(struct meta_dirent *dirents);
#endif /* __META_H__ */
diff --git a/xlators/meta/src/misc.c b/xlators/meta/src/misc.c
deleted file mode 100644
index bea07e70c81..00000000000
--- a/xlators/meta/src/misc.c
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#include <unistd.h>
-#include <sys/uio.h>
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "xlator.h"
-#include "meta.h"
-
-#define min(x,y) ((x) < (y) ? (x) : (y))
-
-/* /.meta/version */
-static const char *version_str = PACKAGE_NAME " " PACKAGE_VERSION "\n";
-
-int32_t
-meta_version_readv (call_frame_t *frame, xlator_t *this,
- dict_t *fd, size_t size, off_t offset)
-{
- static int version_size;
- version_size = strlen (version_str);
-
- struct iovec vec;
- vec.iov_base = version_str + offset;
- vec.iov_len = min (version_size - offset, size);
-
- STACK_UNWIND (frame, vec.iov_len, 0, &vec, 1);
- return 0;
-}
-
-int32_t
-meta_version_getattr (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
- file->stbuf->st_size = strlen (version_str);
- STACK_UNWIND (frame, 0, 0, file->stbuf);
-}
-
-struct xlator_fops meta_version_fops = {
- .readv = meta_version_readv,
- .getattr = meta_version_getattr
-};
-
diff --git a/xlators/meta/src/misc.h b/xlators/meta/src/misc.h
deleted file mode 100644
index f934a6d8dc8..00000000000
--- a/xlators/meta/src/misc.h
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __MISC_H__
-#define __MISC_H__
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-struct xlator_fops meta_version_fops;
-
-#endif /* __MISC_H__ */
diff --git a/xlators/meta/src/name-file.c b/xlators/meta/src/name-file.c
new file mode 100644
index 00000000000..5874a24d78a
--- /dev/null
+++ b/xlators/meta/src/name-file.c
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
+
+static int
+name_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get(file, this);
+
+ strprintf(strfd, "%s\n", xl->name);
+
+ return strfd->size;
+}
+
+static struct meta_ops name_file_ops = {
+ .file_fill = name_file_fill,
+};
+
+int
+meta_name_file_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &name_file_ops);
+
+ meta_ctx_set(loc->inode, this, meta_ctx_get(loc->parent, this));
+
+ return 0;
+}
diff --git a/xlators/meta/src/option-file.c b/xlators/meta/src/option-file.c
new file mode 100644
index 00000000000..ff55eca592f
--- /dev/null
+++ b/xlators/meta/src/option-file.c
@@ -0,0 +1,45 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+static int
+option_file_fill(xlator_t *this, inode_t *inode, strfd_t *strfd)
+{
+ data_t *data = NULL;
+
+ data = meta_ctx_get(inode, this);
+
+ strprintf(strfd, "%s\n", data_to_str(data));
+
+ return strfd->size;
+}
+
+static struct meta_ops option_file_ops = {.file_fill = option_file_fill};
+
+int
+meta_option_file_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get(loc->parent, this);
+
+ meta_ctx_set(loc->inode, this, dict_get(xl->options, (char *)loc->name));
+
+ meta_ops_set(loc->inode, this, &option_file_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/options-dir.c b/xlators/meta/src/options-dir.c
new file mode 100644
index 00000000000..d68a7eeaffc
--- /dev/null
+++ b/xlators/meta/src/options-dir.c
@@ -0,0 +1,65 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+static int
+dict_key_add(dict_t *dict, char *key, data_t *value, void *data)
+{
+ struct meta_dirent **direntp = data;
+
+ (*direntp)->name = gf_strdup(key);
+ (*direntp)->type = IA_IFREG;
+ (*direntp)->hook = meta_option_file_hook;
+
+ (*direntp)++;
+ return 0;
+}
+
+static int
+options_dir_fill(xlator_t *this, inode_t *inode, struct meta_dirent **dp)
+{
+ struct meta_dirent *dirent = NULL;
+ struct meta_dirent *direntp = NULL;
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get(inode, this);
+
+ dirent = GF_CALLOC(sizeof(*dirent), xl->options->count,
+ gf_meta_mt_dirents_t);
+ if (!dirent)
+ return -1;
+
+ direntp = dirent;
+
+ dict_foreach(xl->options, dict_key_add, &direntp);
+
+ *dp = dirent;
+
+ return xl->options->count;
+}
+
+static struct meta_ops options_dir_ops = {.dir_fill = options_dir_fill};
+
+int
+meta_options_dir_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ctx_set(loc->inode, this, meta_ctx_get(loc->parent, this));
+
+ meta_ops_set(loc->inode, this, &options_dir_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/private-file.c b/xlators/meta/src/private-file.c
new file mode 100644
index 00000000000..23ec319456b
--- /dev/null
+++ b/xlators/meta/src/private-file.c
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/statedump.h>
+
+static int
+private_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get(file, this);
+
+ gf_proc_dump_xlator_private(xl, strfd);
+
+ return strfd->size;
+}
+
+static struct meta_ops private_file_ops = {
+ .file_fill = private_file_fill,
+};
+
+int
+meta_private_file_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &private_file_ops);
+
+ meta_ctx_set(loc->inode, this, meta_ctx_get(loc->parent, this));
+
+ return 0;
+}
diff --git a/xlators/meta/src/process_uuid-file.c b/xlators/meta/src/process_uuid-file.c
new file mode 100644
index 00000000000..a24c1b57ab3
--- /dev/null
+++ b/xlators/meta/src/process_uuid-file.c
@@ -0,0 +1,37 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
+
+static int
+process_uuid_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ strprintf(strfd, "%s\n", this->ctx->process_uuid);
+ return strfd->size;
+}
+
+static struct meta_ops process_uuid_file_ops = {
+ .file_fill = process_uuid_file_fill,
+};
+
+int
+meta_process_uuid_file_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &process_uuid_file_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/profile-file.c b/xlators/meta/src/profile-file.c
new file mode 100644
index 00000000000..829dcb77451
--- /dev/null
+++ b/xlators/meta/src/profile-file.c
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/statedump.h>
+
+static int
+profile_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get(file, this);
+
+ gf_proc_dump_xlator_profile(xl, strfd);
+
+ return strfd->size;
+}
+
+static struct meta_ops profile_file_ops = {
+ .file_fill = profile_file_fill,
+};
+
+int
+meta_profile_file_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &profile_file_ops);
+
+ meta_ctx_set(loc->inode, this, meta_ctx_get(loc->parent, this));
+
+ return 0;
+}
diff --git a/xlators/meta/src/root-dir.c b/xlators/meta/src/root-dir.c
new file mode 100644
index 00000000000..80292bd3dda
--- /dev/null
+++ b/xlators/meta/src/root-dir.c
@@ -0,0 +1,77 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+static struct meta_dirent root_dir_dirents[] = {
+ DOT_DOTDOT,
+
+ {
+ .name = "graphs",
+ .type = IA_IFDIR,
+ .hook = meta_graphs_dir_hook,
+ },
+ {
+ .name = "frames",
+ .type = IA_IFREG,
+ .hook = meta_frames_file_hook,
+ },
+ {
+ .name = "logging",
+ .type = IA_IFDIR,
+ .hook = meta_logging_dir_hook,
+ },
+ {
+ .name = "process_uuid",
+ .type = IA_IFREG,
+ .hook = meta_process_uuid_file_hook,
+ },
+ {
+ .name = "version",
+ .type = IA_IFREG,
+ .hook = meta_version_file_hook,
+ },
+ {
+ .name = "cmdline",
+ .type = IA_IFREG,
+ .hook = meta_cmdline_file_hook,
+ },
+ {
+ .name = "mallinfo",
+ .type = IA_IFREG,
+ .hook = meta_mallinfo_file_hook,
+ },
+ {
+ .name = "master",
+ .type = IA_IFDIR,
+ .hook = meta_master_dir_hook,
+ },
+ {
+ .name = "measure_latency",
+ .type = IA_IFREG,
+ .hook = meta_measure_file_hook,
+ },
+ {.name = NULL}};
+
+static struct meta_ops meta_root_dir_ops = {.fixed_dirents = root_dir_dirents};
+
+int
+meta_root_dir_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &meta_root_dir_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/subvolume-link.c b/xlators/meta/src/subvolume-link.c
new file mode 100644
index 00000000000..5b1f752efd0
--- /dev/null
+++ b/xlators/meta/src/subvolume-link.c
@@ -0,0 +1,56 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+
+static int
+subvolume_link_fill(xlator_t *this, inode_t *inode, strfd_t *strfd)
+{
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get(inode, this);
+
+ strprintf(strfd, "../../%s", xl->name);
+
+ return 0;
+}
+
+struct meta_ops subvolume_link_ops = {.link_fill = subvolume_link_fill};
+
+int
+meta_subvolume_link_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ int count = 0;
+ int i = 0;
+ xlator_t *xl = NULL;
+ xlator_list_t *subv = NULL;
+ xlator_t *subvol = NULL;
+
+ count = strtol(loc->name, 0, 0);
+ xl = meta_ctx_get(loc->parent, this);
+
+ for (subv = xl->children; subv; subv = subv->next) {
+ if (i == count) {
+ subvol = subv->xlator;
+ break;
+ }
+ i++;
+ }
+
+ meta_ctx_set(loc->inode, this, subvol);
+
+ meta_ops_set(loc->inode, this, &subvolume_link_ops);
+ return 0;
+}
diff --git a/xlators/meta/src/subvolumes-dir.c b/xlators/meta/src/subvolumes-dir.c
new file mode 100644
index 00000000000..3cb170ea1f4
--- /dev/null
+++ b/xlators/meta/src/subvolumes-dir.c
@@ -0,0 +1,62 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+static int
+subvolumes_dir_fill(xlator_t *this, inode_t *dir, struct meta_dirent **dp)
+{
+ struct meta_dirent *dirents = NULL;
+ xlator_t *xl = NULL;
+ xlator_list_t *subv = NULL;
+ int i = 0;
+ int count = 0;
+
+ xl = meta_ctx_get(dir, this);
+
+ for (subv = xl->children; subv; subv = subv->next)
+ count++;
+
+ dirents = GF_MALLOC(sizeof(*dirents) * count, gf_meta_mt_dirents_t);
+ if (!dirents)
+ return -1;
+
+ for (subv = xl->children; subv; subv = subv->next) {
+ char num[16] = {};
+ snprintf(num, 16, "%d", i);
+
+ dirents[i].name = gf_strdup(num);
+ dirents[i].type = IA_IFLNK;
+ dirents[i].hook = meta_subvolume_link_hook;
+ i++;
+ }
+
+ *dp = dirents;
+
+ return count;
+}
+
+static struct meta_ops subvolumes_dir_ops = {.dir_fill = subvolumes_dir_fill};
+
+int
+meta_subvolumes_dir_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ctx_set(loc->inode, this, meta_ctx_get(loc->parent, this));
+
+ meta_ops_set(loc->inode, this, &subvolumes_dir_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/top-link.c b/xlators/meta/src/top-link.c
new file mode 100644
index 00000000000..33f0d407411
--- /dev/null
+++ b/xlators/meta/src/top-link.c
@@ -0,0 +1,40 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+
+static int
+top_link_fill(xlator_t *this, inode_t *inode, strfd_t *strfd)
+{
+ glusterfs_graph_t *graph = NULL;
+
+ graph = meta_ctx_get(inode, this);
+
+ strprintf(strfd, "%s", ((xlator_t *)graph->top)->name);
+
+ return 0;
+}
+
+struct meta_ops top_link_ops = {.link_fill = top_link_fill};
+
+int
+meta_top_link_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &top_link_ops);
+
+ meta_ctx_set(loc->inode, this, meta_ctx_get(loc->parent, this));
+
+ return 0;
+}
diff --git a/xlators/meta/src/tree.c b/xlators/meta/src/tree.c
deleted file mode 100644
index cad5cbd7172..00000000000
--- a/xlators/meta/src/tree.c
+++ /dev/null
@@ -1,179 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <string.h>
-
-#include "glusterfs.h"
-#include "xlator.h"
-
-#include "meta.h"
-#include "meta-mem-types.h"
-
-static int
-is_meta_path (const char *path)
-{
- while (*path == '/')
- path++;
- if (!strncmp (path, ".meta", strlen (".meta")))
- return 1;
- return 0;
-}
-
-struct stat *
-new_stbuf (void)
-{
- static int next_inode = 0;
- struct stat *stbuf = GF_CALLOC (1, sizeof (struct stat), gf_meta_mt_stat);
-
- ERR_ABORT (stbuf);
-
- stbuf->st_dev = 0;
- stbuf->st_ino = next_inode++;
- stbuf->st_mode = S_IRUSR | S_IRGRP | S_IROTH;
- stbuf->st_nlink = 1;
- stbuf->st_uid = 0;
- stbuf->st_gid = 0;
- stbuf->st_rdev = 0;
- stbuf->st_size = 0;
- stbuf->st_blksize = 0;
- stbuf->st_blocks = 0;
- stbuf->st_atime = time (NULL);
- stbuf->st_atim.tv_nsec = 0;
- stbuf->st_mtime = stbuf->st_atime;
- stbuf->st_mtim.tv_nsec = 0;
- stbuf->st_ctime = stbuf->st_ctime;
- stbuf->st_ctim.tv_nsec = 0;
-
- return stbuf;
-}
-
-/* find an entry among the siblings of an entry */
-static meta_dirent_t *
-find_entry (meta_dirent_t *node, const char *dir)
-{
- meta_dirent_t *trav = node;
- while (trav) {
- if (!strcmp (trav->name, dir))
- return trav;
- trav = trav->next;
- }
- return NULL;
-}
-
-/*
- * Return the meta_dirent_t corresponding to the pathname.
- *
- * If pathname does not exist in the meta tree, try to return
- * its highest parent that does exist. The part of the
- * pathname that is left over is returned in the value-result
- * variable {remain}.
- * For example, for "/.meta/xlators/brick1/view/foo/bar/baz",
- * return the entry for "/.meta/xlators/brick1/view"
- * and set remain to "/bar/baz"
- */
-
-meta_dirent_t *
-lookup_meta_entry (meta_dirent_t *root, const char *path,
- char **remain)
-{
- char *_path = gf_strdup (path);
-
- if (!is_meta_path (path))
- return NULL;
-
- meta_dirent_t *trav = root;
- char *dir = strtok (_path, "/");
- dir = strtok (NULL, "/");
-
- while (dir) {
- meta_dirent_t *ntrav;
- ntrav = find_entry (trav->children, dir);
- if (!ntrav) {
- /* we have reached bottom of the meta tree.
- Unknown dragons lie further below */
- if (remain) {
- char *piece = dir;
- while (piece) {
- char *tmp = *remain;
- if (*remain)
- gf_asprintf (remain, "/%s/%s", *remain, piece);
- else
- gf_asprintf (remain, "/%s", piece);
- if (tmp) GF_FREE (tmp);
- piece = strtok (NULL, "/");
- }
- }
- return trav;
- }
- dir = strtok (NULL, "/");
- trav = ntrav;
- }
-
- GF_FREE (_path);
- return trav;
-}
-
-meta_dirent_t *
-insert_meta_entry (meta_dirent_t *root, const char *path,
- int type, struct stat *stbuf, struct xlator_fops *fops)
-{
- if (!is_meta_path (path))
- return NULL;
- char *slashpos = strrchr (path, '/');
- char *dir = strndup (path, slashpos - path);
- meta_dirent_t *parent = lookup_meta_entry (root, dir, NULL);
- if (!dir)
- return NULL;
-
- meta_dirent_t *new = GF_CALLOC (1, sizeof (meta_dirent_t),
- gf_meta_mt_meta_dirent_t);
- ERR_ABORT (new);
- new->name = gf_strdup (slashpos+1);
- new->type = type;
- new->parent = parent;
- new->next = parent->children;
- parent->children = new;
- if (stbuf)
- new->stbuf = stbuf;
- else
- new->stbuf = new_stbuf ();
-
- new->stbuf->st_mode |= type;
- new->fops = fops;
- return new;
-}
-
-int main (void)
-{
- meta_dirent_t *root = GF_CALLOC (1, sizeof (meta_dirent_t),
- gf_meta_mt_meta_dirent_t);
- ERR_ABORT (root);
- root->name = gf_strdup (".meta");
-
- insert_meta_entry (root, "/.meta/version", S_IFREG, NULL, NULL);
- return 0;
-}
diff --git a/xlators/meta/src/tree.h b/xlators/meta/src/tree.h
deleted file mode 100644
index 8157148db5b..00000000000
--- a/xlators/meta/src/tree.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __TREE_H__
-#define __TREE_H__
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-meta_dirent_t *
-insert_meta_entry (meta_dirent_t *root, const char *path,
- int type, struct stat *stbuf, struct xlator_fops *fops);
-meta_dirent_t *
-lookup_meta_entry (meta_dirent_t *root, const char *path,
- char **remain);
-
-#endif /* __TREE_H__ */
diff --git a/xlators/meta/src/type-file.c b/xlators/meta/src/type-file.c
new file mode 100644
index 00000000000..ece342a0b2a
--- /dev/null
+++ b/xlators/meta/src/type-file.c
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
+
+static int
+type_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ xlator_t *xl = NULL;
+
+ xl = meta_ctx_get(file, this);
+
+ strprintf(strfd, "%s\n", xl->type);
+
+ return strfd->size;
+}
+
+static struct meta_ops type_file_ops = {
+ .file_fill = type_file_fill,
+};
+
+int
+meta_type_file_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &type_file_ops);
+
+ meta_ctx_set(loc->inode, this, meta_ctx_get(loc->parent, this));
+
+ return 0;
+}
diff --git a/xlators/meta/src/version-file.c b/xlators/meta/src/version-file.c
new file mode 100644
index 00000000000..36276fb810a
--- /dev/null
+++ b/xlators/meta/src/version-file.c
@@ -0,0 +1,37 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include <glusterfs/strfd.h>
+#include <glusterfs/lkowner.h>
+
+static int
+version_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ strprintf(strfd, "{ \n \"Package Version\": \"%s\"\n}", PACKAGE_VERSION);
+ return strfd->size;
+}
+
+static struct meta_ops version_file_ops = {
+ .file_fill = version_file_fill,
+};
+
+int
+meta_version_file_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &version_file_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/view-dir.c b/xlators/meta/src/view-dir.c
new file mode 100644
index 00000000000..30931061567
--- /dev/null
+++ b/xlators/meta/src/view-dir.c
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+static struct meta_dirent view_dir_dirents[] = {DOT_DOTDOT,
+
+ {.name = NULL}};
+
+static struct meta_ops view_dir_ops = {.fixed_dirents = view_dir_dirents};
+
+int
+meta_view_dir_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ctx_set(loc->inode, this, meta_ctx_get(loc->parent, this));
+
+ meta_ops_set(loc->inode, this, &view_dir_ops);
+
+ return 0;
+}
diff --git a/xlators/meta/src/view.c b/xlators/meta/src/view.c
deleted file mode 100644
index ba3c30e0bc2..00000000000
--- a/xlators/meta/src/view.c
+++ /dev/null
@@ -1,258 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "glusterfs.h"
-#include "xlator.h"
-
-#include "meta.h"
-
-/*
- * This file contains fops for the files and directories in
- * an xlator directory
- */
-
-/* /.meta/xlators/.../type */
-
-int32_t
-meta_xlator_type_readv (call_frame_t *frame, xlator_t *this,
- dict_t *fd, size_t size, off_t offset)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- data_t *path_data = dict_get (fd, this->name);
-
- if (path_data) {
- const char *path = data_to_str (path_data);
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
- xlator_t *view_xlator = file->view_xlator;
-
- int type_size;
- type_size = strlen (view_xlator->type);
-
- struct iovec vec;
- vec.iov_base = view_xlator->type + offset;
- vec.iov_len = min (type_size - offset, size);
-
- STACK_UNWIND (frame, vec.iov_len, 0, &vec, 1);
- return 0;
- }
-}
-
-int32_t
-meta_xlator_type_getattr (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
-
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
- xlator_t *view_xlator = file->view_xlator;
- file->stbuf->st_size = strlen (view_xlator->type);
-
- STACK_UNWIND (frame, 0, 0, file->stbuf);
- return 0;
-}
-
-struct xlator_fops meta_xlator_type_fops = {
- .readv = meta_xlator_type_readv,
- .getattr = meta_xlator_type_getattr
-};
-
-/*
- * fops for the "view" directory
- * {xlator}/view shows the filesystem as it appears
- * to {xlator}
- */
-
-static int32_t
-meta_xlator_view_getattr_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, buf);
- return 0;
-}
-
-int32_t
-meta_xlator_view_getattr (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- char *op_path = NULL;
-
- meta_dirent_t *file = lookup_meta_entry (root, path, &op_path);
-
- if (op_path) {
- STACK_WIND (frame, meta_xlator_view_getattr_cbk, file->view_xlator,
- file->view_xlator->fops->getattr,
- op_path);
- }
- else {
- STACK_UNWIND (frame, 0, 0, file->stbuf);
- }
-
- return 0;
-}
-
-static int32_t
-meta_xlator_view_readdir_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret, int32_t op_errno,
- dir_entry_t *entries, int32_t count)
-{
- STACK_UNWIND (frame, op_ret, op_errno, entries, count);
- return 0;
-}
-
-int32_t
-meta_xlator_view_readdir (call_frame_t *frame,
- xlator_t *this,
- const char *path)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- char *op_path = NULL;
-
- meta_dirent_t *dir = lookup_meta_entry (root, path, &op_path);
-
- STACK_WIND (frame, meta_xlator_view_readdir_cbk,
- dir->view_xlator, dir->view_xlator->fops->readdir,
- op_path ? op_path : "/");
- return 0;
-}
-
-static int32_t
-meta_xlator_view_open_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this,
- int32_t op_ret, int32_t op_errno,
- dict_t *ctx, struct stat *buf)
-{
- STACK_UNWIND (frame, op_ret, op_errno, ctx, buf);
- return 0;
-}
-
-int32_t
-meta_xlator_view_open (call_frame_t *frame, xlator_t *this,
- const char *path, int32_t flags, mode_t mode)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- char *op_path = NULL;
-
- meta_dirent_t *file = lookup_meta_entry (root, path, &op_path);
- STACK_WIND (frame, meta_xlator_view_open_cbk,
- file->view_xlator, file->view_xlator->fops->open,
- op_path, flags, mode);
- return 0;
-}
-
-int32_t
-meta_xlator_view_create (call_frame_t *frame, xlator_t *this,
- const char *path, int32_t flags, mode_t mode)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- char *op_path = NULL;
-
- meta_dirent_t *file = lookup_meta_entry (root, path, &op_path);
- STACK_WIND (frame, meta_xlator_view_open_cbk,
- file->view_xlator, file->view_xlator->fops->create,
- op_path, flags, mode);
- return 0;
-}
-
-static int32_t
-meta_xlator_view_readv_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno, struct iovec *vector,
- int32_t count)
-{
- STACK_UNWIND (frame, op_ret, op_errno, vector, count);
- return 0;
-}
-
-int32_t
-meta_xlator_view_readv (call_frame_t *frame, xlator_t *this,
- dict_t *fd, size_t size, off_t offset)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- data_t *path_data = dict_get (fd, this->name);
-
- if (path_data) {
- const char *path = data_to_str (path_data);
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
-
- STACK_WIND (frame, meta_xlator_view_readv_cbk,
- file->view_xlator, file->view_xlator->fops->readv,
- fd, size, offset);
- return 0;
- }
-
- STACK_UNWIND (frame, -1, EBADFD, NULL, 0);
- return 0;
-}
-
-static int32_t
-meta_xlator_view_writev_cbk (call_frame_t *frame, void *cookie,
- xlator_t *this, int32_t op_ret,
- int32_t op_errno)
-{
- STACK_UNWIND (frame, op_ret, op_errno);
- return 0;
-}
-
-int32_t
-meta_xlator_view_writev (call_frame_t *frame, xlator_t *this,
- dict_t *fd,
- struct iovec *vector, int32_t count, off_t offset)
-{
- meta_private_t *priv = (meta_private_t *) this->private;
- meta_dirent_t *root = priv->tree;
- data_t *path_data = dict_get (fd, this->name);
-
- if (path_data) {
- const char *path = data_to_str (path_data);
- meta_dirent_t *file = lookup_meta_entry (root, path, NULL);
-
- STACK_WIND (frame, meta_xlator_view_writev_cbk,
- file->view_xlator, file->view_xlator->fops->writev,
- fd, vector, count, offset);
- return 0;
- }
-
- STACK_UNWIND (frame, -1, EBADFD, NULL, 0);
- return 0;
-}
-
-struct xlator_fops meta_xlator_view_fops = {
- .getattr = meta_xlator_view_getattr,
- .readdir = meta_xlator_view_readdir,
- .open = meta_xlator_view_open,
- .create = meta_xlator_view_create,
- .readv = meta_xlator_view_readv,
- .writev = meta_xlator_view_writev
-};
diff --git a/xlators/meta/src/view.h b/xlators/meta/src/view.h
deleted file mode 100644
index 440c0d34d9b..00000000000
--- a/xlators/meta/src/view.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef __VIEW_H__
-#define __VIEW_H__
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-
-struct xlator_fops meta_xlator_type_fops;
-struct xlator_fops meta_xlator_view_fops;
-
-#endif /* __VIEW_H__ */
diff --git a/xlators/meta/src/volfile-file.c b/xlators/meta/src/volfile-file.c
new file mode 100644
index 00000000000..b2e2562ab8b
--- /dev/null
+++ b/xlators/meta/src/volfile-file.c
@@ -0,0 +1,79 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include <glusterfs/strfd.h>
+
+static int
+xldump_options(dict_t *this, char *key, data_t *value, void *strfd)
+{
+ strprintf(strfd, " option %s %s\n", key, value->data);
+ return 0;
+}
+
+static void
+xldump_subvolumes(xlator_t *this, void *strfd)
+{
+ xlator_list_t *subv = NULL;
+
+ if (!this->children)
+ return;
+
+ strprintf(strfd, " subvolumes");
+
+ for (subv = this->children; subv; subv = subv->next)
+ strprintf(strfd, " %s", subv->xlator->name);
+
+ strprintf(strfd, "\n");
+}
+
+static void
+xldump(xlator_t *each, void *strfd)
+{
+ strprintf(strfd, "volume %s\n", each->name);
+ strprintf(strfd, " type %s\n", each->type);
+ dict_foreach(each->options, xldump_options, strfd);
+
+ xldump_subvolumes(each, strfd);
+
+ strprintf(strfd, "end-volume\n");
+ strprintf(strfd, "\n");
+}
+
+static int
+volfile_file_fill(xlator_t *this, inode_t *file, strfd_t *strfd)
+{
+ glusterfs_graph_t *graph = NULL;
+
+ graph = meta_ctx_get(file, this);
+
+ xlator_foreach_depth_first(graph->top, xldump, strfd);
+
+ return strfd->size;
+}
+
+static struct meta_ops volfile_file_ops = {
+ .file_fill = volfile_file_fill,
+};
+
+int
+meta_volfile_file_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ops_set(loc->inode, this, &volfile_file_ops);
+
+ meta_ctx_set(loc->inode, this, meta_ctx_get(loc->parent, this));
+
+ return 0;
+}
diff --git a/xlators/meta/src/xlator-dir.c b/xlators/meta/src/xlator-dir.c
new file mode 100644
index 00000000000..86189715790
--- /dev/null
+++ b/xlators/meta/src/xlator-dir.c
@@ -0,0 +1,97 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+
+#include "meta-mem-types.h"
+#include "meta.h"
+#include "meta-hooks.h"
+
+static struct meta_dirent xlator_dir_dirents[] = {
+ DOT_DOTDOT,
+
+ {
+ .name = "view",
+ .type = IA_IFDIR,
+ .hook = meta_view_dir_hook,
+ },
+ {
+ .name = "type",
+ .type = IA_IFREG,
+ .hook = meta_type_file_hook,
+ },
+ {
+ .name = "name",
+ .type = IA_IFREG,
+ .hook = meta_name_file_hook,
+ },
+ {
+ .name = "subvolumes",
+ .type = IA_IFDIR,
+ .hook = meta_subvolumes_dir_hook,
+ },
+ {
+ .name = "options",
+ .type = IA_IFDIR,
+ .hook = meta_options_dir_hook,
+ },
+ {
+ .name = "private",
+ .type = IA_IFREG,
+ .hook = meta_private_file_hook,
+ },
+ {
+ .name = "history",
+ .type = IA_IFREG,
+ .hook = meta_history_file_hook,
+ },
+ {
+ .name = "meminfo",
+ .type = IA_IFREG,
+ .hook = meta_meminfo_file_hook,
+ },
+ {
+ .name = "profile",
+ .type = IA_IFREG,
+ .hook = meta_profile_file_hook,
+ },
+ {.name = NULL}};
+
+static struct meta_ops xlator_dir_ops = {.fixed_dirents = xlator_dir_dirents};
+
+int
+meta_xlator_dir_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ glusterfs_graph_t *graph = NULL;
+ xlator_t *xl = NULL;
+
+ graph = meta_ctx_get(loc->parent, this);
+
+ xl = xlator_search_by_name(graph->first, loc->name);
+
+ meta_ctx_set(loc->inode, this, xl);
+
+ meta_ops_set(loc->inode, this, &xlator_dir_ops);
+
+ return 0;
+}
+
+int
+meta_master_dir_hook(call_frame_t *frame, xlator_t *this, loc_t *loc,
+ dict_t *xdata)
+{
+ meta_ctx_set(loc->inode, this, this->ctx->master);
+
+ meta_ops_set(loc->inode, this, &xlator_dir_ops);
+
+ return 0;
+}
diff --git a/xlators/mgmt/glusterd/src/Makefile.am b/xlators/mgmt/glusterd/src/Makefile.am
index 485350b3d1e..685beb42d27 100644
--- a/xlators/mgmt/glusterd/src/Makefile.am
+++ b/xlators/mgmt/glusterd/src/Makefile.am
@@ -1,40 +1,79 @@
+if WITH_SERVER
xlator_LTLIBRARIES = glusterd.la
+endif
+
xlatordir = $(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator/mgmt
-glusterd_la_CPPFLAGS = "-DFILTERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/filter\""
-glusterd_la_LDFLAGS = -module -avoidversion $(LIBXML2_LIBS) -lcrypto
+glusterd_la_CPPFLAGS = $(AM_CPPFLAGS) \
+ -DFILTERDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/filter\" \
+ -DXLATORDIR=\"$(libdir)/glusterfs/$(PACKAGE_VERSION)/xlator\" \
+ -I$(top_srcdir)/libglusterd/src/
+
+glusterd_la_LDFLAGS = -module $(GF_XLATOR_DEFAULT_LDFLAGS)
glusterd_la_SOURCES = glusterd.c glusterd-handler.c glusterd-sm.c \
glusterd-op-sm.c glusterd-utils.c glusterd-rpc-ops.c \
glusterd-store.c glusterd-handshake.c glusterd-pmap.c \
- glusterd-volgen.c glusterd-rebalance.c glusterd-quota.c \
- glusterd-geo-rep.c glusterd-replace-brick.c glusterd-log-ops.c \
+ glusterd-volgen.c glusterd-rebalance.c \
+ glusterd-quota.c glusterd-bitrot.c glusterd-geo-rep.c \
+ glusterd-replace-brick.c glusterd-log-ops.c \
glusterd-volume-ops.c glusterd-brick-ops.c glusterd-mountbroker.c \
- glusterd-syncop.c glusterd-hooks.c
+ glusterd-syncop.c glusterd-hooks.c glusterd-volume-set.c \
+ glusterd-locks.c glusterd-snapshot.c glusterd-mgmt-handler.c \
+ glusterd-mgmt.c glusterd-peer-utils.c glusterd-statedump.c \
+ glusterd-snapshot-utils.c glusterd-conn-mgmt.c \
+ glusterd-proc-mgmt.c glusterd-svc-mgmt.c \
+ glusterd-nfs-svc.c glusterd-quotad-svc.c glusterd-svc-helper.c \
+ glusterd-conn-helper.c glusterd-snapd-svc.c glusterd-snapd-svc-helper.c \
+ glusterd-bitd-svc.c glusterd-scrub-svc.c glusterd-server-quorum.c \
+ glusterd-reset-brick.c glusterd-shd-svc.c glusterd-shd-svc-helper.c \
+ glusterd-gfproxyd-svc.c glusterd-gfproxyd-svc-helper.c glusterd-ganesha.c \
+ $(CONTRIBDIR)/mount/mntent.c
glusterd_la_LIBADD = $(top_builddir)/libglusterfs/src/libglusterfs.la \
- $(top_builddir)/rpc/xdr/src/libgfxdr.la \
- $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la
+ $(top_builddir)/libglusterd/src/libglusterd.la \
+ $(top_builddir)/rpc/xdr/src/libgfxdr.la \
+ $(top_builddir)/rpc/rpc-lib/src/libgfrpc.la \
+ $(XML_LIBS) -lcrypto $(URCU_LIBS) $(URCU_CDS_LIBS) $(LIB_DL) $(GF_XLATOR_MGNT_LIBADD)
noinst_HEADERS = glusterd.h glusterd-utils.h glusterd-op-sm.h \
glusterd-sm.h glusterd-store.h glusterd-mem-types.h \
glusterd-pmap.h glusterd-volgen.h glusterd-mountbroker.h \
- glusterd-syncop.h glusterd-hooks.h
+ glusterd-syncop.h glusterd-hooks.h glusterd-locks.h glusterd-quota.h \
+ glusterd-mgmt.h glusterd-messages.h glusterd-peer-utils.h \
+ glusterd-statedump.h glusterd-snapshot-utils.h glusterd-geo-rep.h \
+ glusterd-conn-mgmt.h glusterd-conn-helper.h glusterd-proc-mgmt.h \
+ glusterd-svc-mgmt.h glusterd-nfs-svc.h \
+ glusterd-quotad-svc.h glusterd-svc-helper.h glusterd-snapd-svc.h \
+ glusterd-snapd-svc-helper.h glusterd-rcu.h glusterd-bitd-svc.h \
+ glusterd-scrub-svc.h glusterd-server-quorum.h glusterd-errno.h \
+ glusterd-shd-svc.h glusterd-shd-svc-helper.h \
+ glusterd-gfproxyd-svc.h glusterd-gfproxyd-svc-helper.h \
+ $(CONTRIBDIR)/userspace-rcu/rculist-extra.h \
+ $(CONTRIBDIR)/mount/mntent_compat.h
-AM_CFLAGS = -fPIC -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -Wall -D$(GF_HOST_OS)\
- -I$(top_srcdir)/libglusterfs/src -shared -nostartfiles $(GF_CFLAGS)\
- -I$(rpclibdir) -L$(xlatordir)/ -I$(CONTRIBDIR)/rbtree \
- -I$(top_srcdir)/rpc/xdr/src -I$(top_srcdir)/rpc/rpc-lib/src \
- -I$(CONTRIBDIR)/uuid \
+AM_CPPFLAGS = $(GF_CPPFLAGS) -I$(top_srcdir)/libglusterfs/src \
+ -I$(top_srcdir)/rpc/xdr/src -I$(top_builddir)/rpc/xdr/src \
+ -I$(CONTRIBDIR)/rbtree -I$(top_srcdir)/rpc/rpc-lib/src \
+ -I$(CONTRIBDIR)/mount -I$(CONTRIBDIR)/userspace-rcu \
-DSBIN_DIR=\"$(sbindir)\" -DDATADIR=\"$(localstatedir)\" \
- -DGSYNCD_PREFIX=\"$(libexecdir)/glusterfs\"\
- -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) $(LIBXML2_CFLAGS)
+ -DGSYNCD_PREFIX=\"$(GLUSTERFS_LIBEXECDIR)\" \
+ -DCONFDIR=\"$(localstatedir)/run/gluster/shared_storage/nfs-ganesha\" \
+ -DGANESHA_PREFIX=\"$(libexecdir)/ganesha\" \
+ -DSYNCDAEMON_COMPILE=$(SYNCDAEMON_COMPILE) \
+ -I$(top_srcdir)/libglusterd/src/
+
+AM_CFLAGS = -Wall $(GF_CFLAGS) $(URCU_CFLAGS) $(URCU_CDS_CFLAGS) $(XML_CFLAGS)
+
+AM_LDFLAGS = -L$(xlatordir) $(URCU_LIBS) $(URCU_CDS_LIBS)
CLEANFILES =
install-data-hook:
-
-if GF_INSTALL_VAR_LIB_GLUSTERD
- $(mkdir_p) /var/lib/
- (stat /etc/glusterd && mv /etc/glusterd /var/lib/) || true;
- (ln -sf /var/lib/glusterd /etc/glusterd) || true;
+if WITH_SERVER
+if GF_INSTALL_GLUSTERD_WORKDIR
+ $(mkdir_p) $(DESTDIR)$(GLUSTERD_WORKDIR)
+ (stat $(DESTDIR)$(sysconfdir)/glusterd && \
+ mv $(DESTDIR)$(sysconfdir)/glusterd $(DESTDIR)$(GLUSTERD_WORKDIR)) || true;
+ (ln -sf $(DESTDIR)$(GLUSTERD_WORKDIR) $(sysconfdir)/glusterd) || true;
+endif
endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-bitd-svc.c b/xlators/mgmt/glusterd/src/glusterd-bitd-svc.c
new file mode 100644
index 00000000000..6adb799b18f
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-bitd-svc.c
@@ -0,0 +1,206 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-bitd-svc.h"
+#include "glusterd-svc-helper.h"
+
+void
+glusterd_bitdsvc_build(glusterd_svc_t *svc)
+{
+ svc->manager = glusterd_bitdsvc_manager;
+ svc->start = glusterd_bitdsvc_start;
+ svc->stop = glusterd_bitdsvc_stop;
+}
+
+int
+glusterd_bitdsvc_init(glusterd_svc_t *svc)
+{
+ return glusterd_svc_init(svc, bitd_svc_name);
+}
+
+static int
+glusterd_bitdsvc_create_volfile()
+{
+ char filepath[PATH_MAX] = {
+ 0,
+ };
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ glusterd_svc_build_volfile_path(bitd_svc_name, conf->workdir, filepath,
+ sizeof(filepath));
+
+ ret = glusterd_create_global_volfile(build_bitd_graph, filepath, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to create volfile");
+ goto out;
+ }
+
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_bitdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!svc->inited) {
+ ret = glusterd_bitdsvc_init(svc);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BITD_INIT_FAIL,
+ "Failed to init "
+ "bitd service");
+ goto out;
+ } else {
+ svc->inited = _gf_true;
+ gf_msg_debug(this->name, 0,
+ "BitD service "
+ "initialized");
+ }
+ }
+
+ if (glusterd_should_i_stop_bitd()) {
+ ret = svc->stop(svc, SIGTERM);
+ } else {
+ ret = glusterd_bitdsvc_create_volfile();
+ if (ret)
+ goto out;
+
+ ret = svc->stop(svc, SIGKILL);
+ if (ret)
+ goto out;
+
+ ret = svc->start(svc, flags);
+ if (ret)
+ goto out;
+
+ ret = glusterd_conn_connect(&(svc->conn));
+ if (ret)
+ goto out;
+ }
+
+out:
+ if (ret)
+ gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name);
+
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_bitdsvc_start(glusterd_svc_t *svc, int flags)
+{
+ int ret = -1;
+ dict_t *cmdict = NULL;
+
+ cmdict = dict_new();
+ if (!cmdict)
+ goto error_return;
+
+ ret = dict_set_str(cmdict, "cmdarg0", "--global-timer-wheel");
+ if (ret)
+ goto dealloc_dict;
+
+ ret = glusterd_svc_start(svc, flags, cmdict);
+
+dealloc_dict:
+ dict_unref(cmdict);
+error_return:
+ return ret;
+}
+
+int
+glusterd_bitdsvc_stop(glusterd_svc_t *svc, int sig)
+{
+ return glusterd_svc_stop(svc, sig);
+}
+
+int
+glusterd_bitdsvc_reconfigure()
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t identical = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ if (glusterd_should_i_stop_bitd())
+ goto manager;
+ /*
+ * Check both OLD and NEW volfiles, if they are SAME by size
+ * and cksum i.e. "character-by-character". If YES, then
+ * NOTHING has been changed, just return.
+ */
+ ret = glusterd_svc_check_volfile_identical(priv->bitd_svc.name,
+ build_bitd_graph, &identical);
+ if (ret)
+ goto out;
+ if (identical) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * They are not identical. Find out if the topology is changed
+ * OR just the volume options. If just the options which got
+ * changed, then inform the xlator to reconfigure the options.
+ */
+ identical = _gf_false; /* RESET the FLAG */
+ ret = glusterd_svc_check_topology_identical(priv->bitd_svc.name,
+ build_bitd_graph, &identical);
+ if (ret)
+ goto out; /*not able to compare due to some corruption */
+
+ /* Topology is not changed, but just the options. But write the
+ * options to bitd volfile, so that bitd will be reconfigured.
+ */
+ if (identical) {
+ ret = glusterd_bitdsvc_create_volfile();
+ if (ret == 0) { /* Only if above PASSES */
+ ret = glusterd_fetchspec_notify(THIS);
+ }
+ goto out;
+ }
+
+manager:
+ /*
+ * bitd volfile's topology has been changed. bitd server needs
+ * to be RESTARTED to ACT on the changed volfile.
+ */
+ ret = priv->bitd_svc.manager(&(priv->bitd_svc), NULL, PROC_START_NO_WAIT);
+
+out:
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-bitd-svc.h b/xlators/mgmt/glusterd/src/glusterd-bitd-svc.h
new file mode 100644
index 00000000000..1bff084a9a8
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-bitd-svc.h
@@ -0,0 +1,40 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_BITD_SVC_H_
+#define _GLUSTERD_BITD_SVC_H_
+
+#include "glusterd-svc-mgmt.h"
+
+#define bitd_svc_name "bitd"
+
+void
+glusterd_bitdsvc_build(glusterd_svc_t *svc);
+
+int
+glusterd_bitdsvc_init(glusterd_svc_t *svc);
+
+int
+glusterd_bitdsvc_manager(glusterd_svc_t *svc, void *data, int flags);
+
+int
+glusterd_bitdsvc_start(glusterd_svc_t *svc, int flags);
+
+int
+glusterd_bitdsvc_stop(glusterd_svc_t *svc, int sig);
+
+int
+glusterd_bitdsvc_reconfigure();
+
+void
+glusterd_bitdsvc_build_volfile_path(char *server, char *workdir, char *volfile,
+ size_t len);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-bitrot.c b/xlators/mgmt/glusterd/src/glusterd-bitrot.c
new file mode 100644
index 00000000000..37429fe9214
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-bitrot.c
@@ -0,0 +1,822 @@
+/*
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+ */
+
+#include <glusterfs/common-utils.h>
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/compat-errno.h>
+#include "glusterd-scrub-svc.h"
+#include "glusterd-messages.h"
+
+#include <sys/wait.h>
+#include <dlfcn.h>
+
+const char *gd_bitrot_op_list[GF_BITROT_OPTION_TYPE_MAX] = {
+ [GF_BITROT_OPTION_TYPE_NONE] = "none",
+ [GF_BITROT_OPTION_TYPE_ENABLE] = "enable",
+ [GF_BITROT_OPTION_TYPE_DISABLE] = "disable",
+ [GF_BITROT_OPTION_TYPE_SCRUB_THROTTLE] = "scrub-throttle",
+ [GF_BITROT_OPTION_TYPE_SCRUB_FREQ] = "scrub-frequency",
+ [GF_BITROT_OPTION_TYPE_SCRUB] = "scrub",
+ [GF_BITROT_OPTION_TYPE_EXPIRY_TIME] = "expiry-time",
+ [GF_BITROT_OPTION_TYPE_SIGNER_THREADS] = "signer-threads",
+};
+
+int
+__glusterd_handle_bitrot(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_BITROT;
+ char *volname = NULL;
+ char *scrub = NULL;
+ int32_t type = 0;
+ char msg[256] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT(req);
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(msg, sizeof(msg),
+ "Unable to decode the "
+ "command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Unable to get volume name");
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name, "
+ "while handling bitrot command");
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, "type", &type);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Unable to get type of command");
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get type of cmd, "
+ "while handling bitrot command");
+ goto out;
+ }
+
+ if (conf->op_version < GD_OP_VERSION_3_7_0) {
+ snprintf(msg, sizeof(msg),
+ "Cannot execute command. The "
+ "cluster is operating at version %d. Bitrot command "
+ "%s is unavailable in this version",
+ conf->op_version, gd_bitrot_op_list[type]);
+ ret = -1;
+ goto out;
+ }
+
+ if (type == GF_BITROT_CMD_SCRUB_STATUS) {
+ /* Backward compatibility handling for scrub status command*/
+ if (conf->op_version < GD_OP_VERSION_3_7_7) {
+ snprintf(msg, sizeof(msg),
+ "Cannot execute command. "
+ "The cluster is operating at version %d. "
+ "Bitrot scrub status command unavailable in "
+ "this version",
+ conf->op_version);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "scrub-value", &scrub);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get scrub value.");
+ ret = -1;
+ goto out;
+ }
+
+ if (!strncmp(scrub, "status", SLEN("status"))) {
+ ret = glusterd_op_begin_synctask(req, GD_OP_SCRUB_STATUS, dict);
+ goto out;
+ }
+ }
+
+ if (type == GF_BITROT_CMD_SCRUB_ONDEMAND) {
+ /* Backward compatibility handling for scrub status command*/
+ if (conf->op_version < GD_OP_VERSION_3_9_0) {
+ snprintf(msg, sizeof(msg),
+ "Cannot execute command. "
+ "The cluster is operating at version %d. "
+ "Bitrot scrub ondemand command unavailable in "
+ "this version",
+ conf->op_version);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "scrub-value", &scrub);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get scrub value.");
+ ret = -1;
+ goto out;
+ }
+
+ if (!strncmp(scrub, "ondemand", SLEN("ondemand"))) {
+ ret = glusterd_op_begin_synctask(req, GD_OP_SCRUB_ONDEMAND, dict);
+ goto out;
+ }
+ }
+
+ ret = glusterd_op_begin_synctask(req, GD_OP_BITROT, dict);
+
+out:
+ if (ret) {
+ if (msg[0] == '\0')
+ snprintf(msg, sizeof(msg), "Bitrot operation failed");
+ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, msg);
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_bitrot(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __glusterd_handle_bitrot);
+}
+
+static int
+glusterd_bitrot_scrub_throttle(glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char **op_errstr)
+{
+ int32_t ret = -1;
+ char *scrub_throttle = NULL;
+ char *option = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = dict_get_str(dict, "scrub-throttle-value", &scrub_throttle);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch scrub-"
+ "throttle value");
+ goto out;
+ }
+
+ option = gf_strdup(scrub_throttle);
+ ret = dict_set_dynstr(volinfo->dict, key, option);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to set option %s", key);
+ goto out;
+ }
+
+ ret = glusterd_scrubsvc_reconfigure();
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SCRUBSVC_RECONF_FAIL,
+ "Failed to reconfigure scrub "
+ "services");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int
+glusterd_bitrot_scrub_freq(glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+ char **op_errstr)
+{
+ int32_t ret = -1;
+ char *scrub_freq = NULL;
+ xlator_t *this = NULL;
+ char *option = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = dict_get_str(dict, "scrub-frequency-value", &scrub_freq);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch scrub-"
+ "freq value");
+ goto out;
+ }
+
+ option = gf_strdup(scrub_freq);
+ ret = dict_set_dynstr(volinfo->dict, key, option);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to set option %s", key);
+ goto out;
+ }
+
+ ret = glusterd_scrubsvc_reconfigure();
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SCRUBSVC_RECONF_FAIL,
+ "Failed to reconfigure scrub "
+ "services");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int
+glusterd_bitrot_scrub(glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+ char **op_errstr)
+{
+ int32_t ret = -1;
+ char *scrub_value = NULL;
+ xlator_t *this = NULL;
+ char *option = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = dict_get_str(dict, "scrub-value", &scrub_value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch scrub"
+ "value");
+ goto out;
+ }
+
+ if (!strcmp(scrub_value, "resume")) {
+ option = gf_strdup("Active");
+ } else {
+ option = gf_strdup(scrub_value);
+ }
+
+ ret = dict_set_dynstr(volinfo->dict, key, option);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to set option %s", key);
+ goto out;
+ }
+
+ ret = glusterd_scrubsvc_reconfigure();
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SCRUBSVC_RECONF_FAIL,
+ "Failed to reconfigure scrub "
+ "services");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int
+glusterd_bitrot_expiry_time(glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char **op_errstr)
+{
+ int32_t ret = -1;
+ uint32_t expiry_time = 0;
+ xlator_t *this = NULL;
+ char dkey[32] = {
+ 0,
+ };
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = dict_get_uint32(dict, "expiry-time", &expiry_time);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get bitrot expiry"
+ " timer value.");
+ goto out;
+ }
+
+ snprintf(dkey, sizeof(dkey), "%d", expiry_time);
+
+ ret = dict_set_dynstr_with_alloc(volinfo->dict, key, dkey);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to set option %s", key);
+ goto out;
+ }
+
+ ret = glusterd_bitdsvc_reconfigure();
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BITDSVC_RECONF_FAIL,
+ "Failed to reconfigure bitrot"
+ "services");
+ goto out;
+ }
+out:
+ return ret;
+}
+
+static gf_boolean_t
+is_bitd_configure_noop(xlator_t *this, glusterd_volinfo_t *volinfo)
+{
+ gf_boolean_t noop = _gf_true;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ if (!glusterd_is_bitrot_enabled(volinfo))
+ goto out;
+ else if (volinfo->status != GLUSTERD_STATUS_STARTED)
+ goto out;
+ else {
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (!glusterd_is_local_brick(this, volinfo, brickinfo))
+ continue;
+ noop = _gf_false;
+ return noop;
+ }
+ }
+out:
+ return noop;
+}
+
+static int
+glusterd_bitrot_signer_threads(glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *key, char **op_errstr)
+{
+ int32_t ret = -1;
+ uint32_t signer_th_count = 0;
+ uint32_t existing_th_count = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char dkey[32] = {
+ 0,
+ };
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ ret = dict_get_uint32(dict, "signer-threads", &signer_th_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get bitrot signer thread count.");
+ goto out;
+ }
+
+ ret = dict_get_uint32(volinfo->dict, key, &existing_th_count);
+ if (ret == 0 && signer_th_count == existing_th_count) {
+ goto out;
+ }
+
+ snprintf(dkey, sizeof(dkey), "%d", signer_th_count);
+ ret = dict_set_dynstr_with_alloc(volinfo->dict, key, dkey);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to set option %s", key);
+ goto out;
+ }
+
+ if (!is_bitd_configure_noop(this, volinfo)) {
+ ret = priv->bitd_svc.manager(&(priv->bitd_svc), NULL,
+ PROC_START_NO_WAIT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BITDSVC_RECONF_FAIL,
+ "Failed to reconfigure bitrot services");
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+static int
+glusterd_bitrot_enable(glusterd_volinfo_t *volinfo, char **op_errstr)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO(this->name, op_errstr, out);
+
+ if (glusterd_is_volume_started(volinfo) == 0) {
+ *op_errstr = gf_strdup(
+ "Volume is stopped, start volume "
+ "to enable bitrot.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_is_bitrot_enabled(volinfo);
+ if (ret) {
+ *op_errstr = gf_strdup("Bitrot is already enabled");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(volinfo->dict, VKEY_FEATURES_BITROT, "on");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "dict set failed");
+ goto out;
+ }
+
+ /*Once bitrot is enable scrubber should be in Active state*/
+ ret = dict_set_dynstr_with_alloc(volinfo->dict, "features.scrub", "Active");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to set option "
+ "features.scrub value");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr && !*op_errstr)
+ gf_asprintf(op_errstr,
+ "Enabling bitrot on volume %s has been "
+ "unsuccessful",
+ volinfo->volname);
+ return ret;
+}
+
+static int
+glusterd_bitrot_disable(glusterd_volinfo_t *volinfo, char **op_errstr)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO(this->name, op_errstr, out);
+
+ ret = dict_set_dynstr_with_alloc(volinfo->dict, VKEY_FEATURES_BITROT,
+ "off");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "dict set failed");
+ goto out;
+ }
+
+ /*Once bitrot disabled scrubber should be Inactive state*/
+ ret = dict_set_dynstr_with_alloc(volinfo->dict, "features.scrub",
+ "Inactive");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "features.scrub value");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret && op_errstr && !*op_errstr)
+ gf_asprintf(op_errstr,
+ "Disabling bitrot on volume %s has "
+ "been unsuccessful",
+ volinfo->volname);
+ return ret;
+}
+
+gf_boolean_t
+glusterd_should_i_stop_bitd()
+{
+ glusterd_conf_t *conf = THIS->private;
+ glusterd_volinfo_t *volinfo = NULL;
+ gf_boolean_t stopped = _gf_true;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ cds_list_for_each_entry(volinfo, &conf->volumes, vol_list)
+ {
+ if (!glusterd_is_bitrot_enabled(volinfo))
+ continue;
+ else if (volinfo->status != GLUSTERD_STATUS_STARTED)
+ continue;
+ else {
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (!glusterd_is_local_brick(this, volinfo, brickinfo))
+ continue;
+ stopped = _gf_false;
+ return stopped;
+ }
+
+ /* Before stopping bitrot/scrubber daemon check
+ * other volume also whether respective volume
+ * host a brick from this node or not.*/
+ continue;
+ }
+ }
+
+ return stopped;
+}
+
+static int
+glusterd_manage_bitrot(int opcode)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ switch (opcode) {
+ case GF_BITROT_OPTION_TYPE_ENABLE:
+ case GF_BITROT_OPTION_TYPE_DISABLE:
+ ret = priv->bitd_svc.manager(&(priv->bitd_svc), NULL,
+ PROC_START_NO_WAIT);
+ if (ret)
+ break;
+ ret = priv->scrub_svc.manager(&(priv->scrub_svc), NULL,
+ PROC_START_NO_WAIT);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+int
+glusterd_op_bitrot(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t ret = -1;
+ char *volname = NULL;
+ int type = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_asprintf(op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, "type", &type);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get type from "
+ "dict");
+ goto out;
+ }
+
+ switch (type) {
+ case GF_BITROT_OPTION_TYPE_ENABLE:
+ ret = glusterd_bitrot_enable(volinfo, op_errstr);
+ if (ret < 0)
+ goto out;
+ break;
+
+ case GF_BITROT_OPTION_TYPE_DISABLE:
+ ret = glusterd_bitrot_disable(volinfo, op_errstr);
+ if (ret < 0)
+ goto out;
+
+ break;
+
+ case GF_BITROT_OPTION_TYPE_SCRUB_THROTTLE:
+ ret = glusterd_bitrot_scrub_throttle(
+ volinfo, dict, "features.scrub-throttle", op_errstr);
+ if (ret)
+ goto out;
+ break;
+
+ case GF_BITROT_OPTION_TYPE_SCRUB_FREQ:
+ ret = glusterd_bitrot_scrub_freq(volinfo, dict,
+ "features.scrub-freq", op_errstr);
+ if (ret)
+ goto out;
+ break;
+
+ case GF_BITROT_OPTION_TYPE_SCRUB:
+ ret = glusterd_bitrot_scrub(volinfo, dict, "features.scrub",
+ op_errstr);
+ if (ret)
+ goto out;
+ break;
+
+ case GF_BITROT_OPTION_TYPE_EXPIRY_TIME:
+ ret = glusterd_bitrot_expiry_time(
+ volinfo, dict, "features.expiry-time", op_errstr);
+ if (ret)
+ goto out;
+ break;
+
+ case GF_BITROT_OPTION_TYPE_SIGNER_THREADS:
+ ret = glusterd_bitrot_signer_threads(
+ volinfo, dict, "features.signer-threads", op_errstr);
+ if (ret)
+ goto out;
+ break;
+
+ case GF_BITROT_CMD_SCRUB_STATUS:
+ case GF_BITROT_CMD_SCRUB_ONDEMAND:
+ break;
+
+ default:
+ gf_asprintf(op_errstr,
+ "Bitrot command failed. Invalid "
+ "opcode");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_manage_bitrot(type);
+ if (ret)
+ goto out;
+
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Unable to re-create "
+ "volfiles");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Failed to store volinfo for "
+ "bitrot");
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_op_stage_bitrot(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int ret = 0;
+ char *volname = NULL;
+ char *scrub_cmd = NULL;
+ char *scrub_cmd_from_dict = NULL;
+ char msg[2048] = {
+ 0,
+ };
+ int type = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_asprintf(op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ if (!glusterd_is_volume_started(volinfo)) {
+ *op_errstr = gf_strdup(
+ "Volume is stopped, start volume "
+ "before executing bit rot command.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, "type", &type);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get type for "
+ "operation");
+
+ *op_errstr = gf_strdup(
+ "Staging stage failed for bitrot "
+ "operation.");
+ goto out;
+ }
+
+ if ((GF_BITROT_OPTION_TYPE_ENABLE != type) &&
+ (glusterd_is_bitrot_enabled(volinfo) == 0)) {
+ ret = -1;
+ gf_asprintf(op_errstr, "Bitrot is not enabled on volume %s", volname);
+ goto out;
+ }
+
+ if ((GF_BITROT_OPTION_TYPE_SCRUB == type)) {
+ ret = dict_get_str(volinfo->dict, "features.scrub",
+ &scrub_cmd_from_dict);
+ if (!ret) {
+ ret = dict_get_str(dict, "scrub-value", &scrub_cmd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to "
+ "get scrub-value");
+ *op_errstr = gf_strdup(
+ "Staging failed for "
+ "bitrot operation. "
+ "Please check log file"
+ " for more details.");
+ goto out;
+ }
+ /* If scrubber is resume then value of scrubber will be
+ * "Active" in the dictionary. */
+ if (!strcmp(scrub_cmd_from_dict, scrub_cmd) ||
+ (!strncmp("Active", scrub_cmd_from_dict, SLEN("Active")) &&
+ !strncmp("resume", scrub_cmd, SLEN("resume")))) {
+ snprintf(msg, sizeof(msg),
+ "Scrub is already"
+ " %sd for volume %s",
+ scrub_cmd, volinfo->volname);
+ *op_errstr = gf_strdup(msg);
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = 0;
+ }
+
+out:
+ if (ret && op_errstr && *op_errstr)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_STAGE_BITROT_FAIL, "%s",
+ *op_errstr);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
index a0e24b434cd..e56cd0e6c74 100644
--- a/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-brick-ops.c
@@ -1,36 +1,27 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#include "common-utils.h"
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/common-utils.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
#include "glusterd.h"
#include "glusterd-op-sm.h"
+#include "glusterd-geo-rep.h"
#include "glusterd-store.h"
+#include "glusterd-mgmt.h"
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
-#include "run.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-messages.h"
+#include "glusterd-server-quorum.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
#include <sys/signal.h>
/* misc */
@@ -40,1286 +31,2064 @@
how many of the given bricks are added. other argument are self-
descriptive. */
int
-add_brick_at_right_order (glusterd_brickinfo_t *brickinfo,
- glusterd_volinfo_t *volinfo, int count,
- int32_t stripe_cnt, int32_t replica_cnt)
+add_brick_at_right_order(glusterd_brickinfo_t *brickinfo,
+ glusterd_volinfo_t *volinfo, int count,
+ int32_t stripe_cnt, int32_t replica_cnt)
{
- int idx = 0;
- int i = 0;
- int sub_cnt = 0;
- glusterd_brickinfo_t *brick = NULL;
-
- /* The complexity of the function is in deciding at which index
- to add new brick. Even though it can be defined with a complex
- single formula for all volume, it is seperated out to make it
- more readable */
- if (stripe_cnt) {
- /* common formula when 'stripe_count' is set */
- /* idx = ((count / ((stripe_cnt * volinfo->replica_count) -
- volinfo->dist_leaf_count)) * volinfo->dist_leaf_count) +
- (count + volinfo->dist_leaf_count);
- */
-
- sub_cnt = volinfo->dist_leaf_count;
-
- idx = ((count / ((stripe_cnt * volinfo->replica_count) -
- sub_cnt)) * sub_cnt) +
- (count + sub_cnt);
-
- goto insert_brick;
- }
-
- /* replica count is set */
- /* common formula when 'replica_count' is set */
- /* idx = ((count / (replica_cnt - existing_replica_count)) *
- existing_replica_count) +
- (count + existing_replica_count);
+ int idx = 0;
+ int i = 0;
+ int sub_cnt = 0;
+ glusterd_brickinfo_t *brick = NULL;
+
+ /* The complexity of the function is in deciding at which index
+ to add new brick. Even though it can be defined with a complex
+ single formula for all volume, it is separated out to make it
+ more readable */
+ if (stripe_cnt) {
+ /* common formula when 'stripe_count' is set */
+ /* idx = ((count / ((stripe_cnt * volinfo->replica_count) -
+ volinfo->dist_leaf_count)) * volinfo->dist_leaf_count) +
+ (count + volinfo->dist_leaf_count);
*/
- sub_cnt = volinfo->replica_count;
- idx = (count / (replica_cnt - sub_cnt) * sub_cnt) +
- (count + sub_cnt);
+ sub_cnt = volinfo->dist_leaf_count;
-insert_brick:
- i = 0;
- list_for_each_entry (brick, &volinfo->bricks, brick_list) {
- i++;
- if (i < idx)
- continue;
- gf_log (THIS->name, GF_LOG_DEBUG, "brick:%s index=%d, count=%d",
- brick->path, idx, count);
-
- list_add (&brickinfo->brick_list, &brick->brick_list);
- break;
- }
+ idx = ((count / ((stripe_cnt * volinfo->replica_count) - sub_cnt)) *
+ sub_cnt) +
+ (count + sub_cnt);
- return 0;
-}
+ goto insert_brick;
+ }
+ /* replica count is set */
+ /* common formula when 'replica_count' is set */
+ /* idx = ((count / (replica_cnt - existing_replica_count)) *
+ existing_replica_count) +
+ (count + existing_replica_count);
+ */
-static int
-gd_addbr_validate_stripe_count (glusterd_volinfo_t *volinfo, int stripe_count,
- int total_bricks, int *type, char *err_str,
- size_t err_len)
-{
- int ret = -1;
+ sub_cnt = volinfo->replica_count;
+ idx = (count / (replica_cnt - sub_cnt) * sub_cnt) + (count + sub_cnt);
- switch (volinfo->type) {
- case GF_CLUSTER_TYPE_NONE:
- if ((volinfo->brick_count * stripe_count) == total_bricks) {
- /* Change the volume type */
- *type = GF_CLUSTER_TYPE_STRIPE;
- gf_log (THIS->name, GF_LOG_INFO,
- "Changing the type of volume %s from "
- "'distribute' to 'stripe'", volinfo->volname);
- ret = 0;
- goto out;
- } else {
- snprintf (err_str, err_len, "Incorrect number of "
- "bricks (%d) supplied for stripe count (%d).",
- (total_bricks - volinfo->brick_count),
- stripe_count);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
- goto out;
- }
- break;
- case GF_CLUSTER_TYPE_REPLICATE:
- if (!(total_bricks % (volinfo->replica_count * stripe_count))) {
- /* Change the volume type */
- *type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
- gf_log (THIS->name, GF_LOG_INFO,
- "Changing the type of volume %s from "
- "'replicate' to 'replicate-stripe'",
- volinfo->volname);
- ret = 0;
- goto out;
- } else {
- snprintf (err_str, err_len, "Incorrect number of "
- "bricks (%d) supplied for changing volume's "
- "stripe count to %d, need at least %d bricks",
- (total_bricks - volinfo->brick_count),
- stripe_count,
- (volinfo->replica_count * stripe_count));
- gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
- goto out;
- }
- break;
- case GF_CLUSTER_TYPE_STRIPE:
- case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
- if (stripe_count < volinfo->stripe_count) {
- snprintf (err_str, err_len,
- "Incorrect stripe count (%d) supplied. "
- "Volume already has stripe count (%d)",
- stripe_count, volinfo->stripe_count);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
- goto out;
- }
- if (stripe_count == volinfo->stripe_count) {
- if (!(total_bricks % volinfo->dist_leaf_count)) {
- /* its same as the one which exists */
- ret = 1;
- goto out;
- }
- }
- if (stripe_count > volinfo->stripe_count) {
- /* We have to make sure before and after 'add-brick',
- the number or subvolumes for distribute will remain
- same, when stripe count is given */
- if ((volinfo->brick_count * (stripe_count *
- volinfo->replica_count)) ==
- (total_bricks * volinfo->dist_leaf_count)) {
- /* Change the dist_leaf_count */
- gf_log (THIS->name, GF_LOG_INFO,
- "Changing the stripe count of "
- "volume %s from %d to %d",
- volinfo->volname,
- volinfo->stripe_count, stripe_count);
- ret = 0;
- goto out;
- }
- }
- break;
- }
-
-out:
- return ret;
+insert_brick:
+ i = 0;
+ cds_list_for_each_entry(brick, &volinfo->bricks, brick_list)
+ {
+ i++;
+ if (i < idx)
+ continue;
+ gf_msg_debug(THIS->name, 0, "brick:%s index=%d, count=%d", brick->path,
+ idx, count);
+
+ cds_list_add(&brickinfo->brick_list, &brick->brick_list);
+ break;
+ }
+
+ return 0;
}
static int
-gd_addbr_validate_replica_count (glusterd_volinfo_t *volinfo, int replica_count,
- int total_bricks, int *type, char *err_str,
- int err_len)
+gd_addbr_validate_replica_count(glusterd_volinfo_t *volinfo, int replica_count,
+ int arbiter_count, int total_bricks, int *type,
+ char *err_str, int err_len)
{
- int ret = -1;
+ int ret = -1;
- /* replica count is set */
- switch (volinfo->type) {
+ /* replica count is set */
+ switch (volinfo->type) {
case GF_CLUSTER_TYPE_NONE:
- if ((volinfo->brick_count * replica_count) == total_bricks) {
- /* Change the volume type */
- *type = GF_CLUSTER_TYPE_REPLICATE;
- gf_log (THIS->name, GF_LOG_INFO,
- "Changing the type of volume %s from "
- "'distribute' to 'replica'", volinfo->volname);
- ret = 0;
- goto out;
-
- } else {
- snprintf (err_str, err_len, "Incorrect number of "
- "bricks (%d) supplied for replica count (%d).",
- (total_bricks - volinfo->brick_count),
- replica_count);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
- goto out;
- }
- break;
- case GF_CLUSTER_TYPE_STRIPE:
- if (!(total_bricks % (volinfo->dist_leaf_count * replica_count))) {
- /* Change the volume type */
- *type = GF_CLUSTER_TYPE_STRIPE_REPLICATE;
- gf_log (THIS->name, GF_LOG_INFO,
- "Changing the type of volume %s from "
- "'stripe' to 'replicate-stripe'",
- volinfo->volname);
- ret = 0;
- goto out;
- } else {
- snprintf (err_str, err_len, "Incorrect number of "
- "bricks (%d) supplied for changing volume's "
- "replica count to %d, need at least %d "
- "bricks",
- (total_bricks - volinfo->brick_count),
- replica_count, (volinfo->dist_leaf_count *
- replica_count));
- gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
- goto out;
- }
- break;
+ if ((volinfo->brick_count * replica_count) == total_bricks) {
+ /* Change the volume type */
+ *type = GF_CLUSTER_TYPE_REPLICATE;
+ gf_msg(THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_VOL_TYPE_CHANGING_INFO,
+ "Changing the type of volume %s from "
+ "'distribute' to 'replica'",
+ volinfo->volname);
+ ret = 0;
+ goto out;
+
+ } else {
+ snprintf(err_str, err_len,
+ "Incorrect number of "
+ "bricks (%d) supplied for replica count (%d).",
+ (total_bricks - volinfo->brick_count), replica_count);
+ gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "%s", err_str);
+ goto out;
+ }
+ break;
case GF_CLUSTER_TYPE_REPLICATE:
- case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
- if (replica_count < volinfo->replica_count) {
- snprintf (err_str, err_len,
- "Incorrect replica count (%d) supplied. "
- "Volume already has (%d)",
- replica_count, volinfo->replica_count);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
- goto out;
+ if (replica_count < volinfo->replica_count) {
+ snprintf(err_str, err_len,
+ "Incorrect replica count (%d) supplied. "
+ "Volume already has (%d)",
+ replica_count, volinfo->replica_count);
+ gf_msg(THIS->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "%s", err_str);
+ goto out;
+ }
+ if (replica_count == volinfo->replica_count) {
+ if (arbiter_count && !volinfo->arbiter_count) {
+ snprintf(err_str, err_len,
+ "Cannot convert replica 3 volume "
+ "to arbiter volume.");
+ gf_msg(THIS->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_ENTRY, "%s", err_str);
+ goto out;
}
- if (replica_count == volinfo->replica_count) {
- if (!(total_bricks % volinfo->dist_leaf_count)) {
- ret = 1;
- goto out;
- }
+ if (!(total_bricks % volinfo->dist_leaf_count)) {
+ ret = 1;
+ goto out;
}
- if (replica_count > volinfo->replica_count) {
- /* We have to make sure before and after 'add-brick',
- the number or subvolumes for distribute will remain
- same, when replica count is given */
- if ((total_bricks * volinfo->dist_leaf_count) ==
- (volinfo->brick_count * (replica_count *
- volinfo->stripe_count))) {
- /* Change the dist_leaf_count */
- gf_log (THIS->name, GF_LOG_INFO,
- "Changing the replica count of "
- "volume %s from %d to %d",
- volinfo->volname, volinfo->replica_count,
- replica_count);
- ret = 0;
- goto out;
- }
+ }
+ if (replica_count > volinfo->replica_count) {
+ /* We have to make sure before and after 'add-brick',
+ the number or subvolumes for distribute will remain
+ same, when replica count is given */
+ if ((total_bricks * volinfo->dist_leaf_count) ==
+ (volinfo->brick_count *
+ (replica_count * volinfo->stripe_count))) {
+ /* Change the dist_leaf_count */
+ gf_msg(THIS->name, GF_LOG_INFO, 0,
+ GD_MSG_REPLICA_COUNT_CHANGE_INFO,
+ "Changing the replica count of "
+ "volume %s from %d to %d",
+ volinfo->volname, volinfo->replica_count,
+ replica_count);
+ ret = 0;
+ goto out;
}
- break;
- }
+ }
+ break;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ snprintf(err_str, err_len,
+ "Volume %s cannot be converted "
+ "from dispersed to replicated-"
+ "dispersed",
+ volinfo->volname);
+ gf_msg(THIS->name, GF_LOG_ERROR, EPERM, GD_MSG_OP_NOT_PERMITTED,
+ "%s", err_str);
+ goto out;
+ }
out:
- return ret;
+ return ret;
}
static int
-gd_rmbr_validate_replica_count (glusterd_volinfo_t *volinfo,
- int32_t replica_count,
- int32_t brick_count, char *err_str,
- size_t err_len)
+gd_rmbr_validate_replica_count(glusterd_volinfo_t *volinfo,
+ int32_t replica_count, int32_t brick_count,
+ char *err_str, size_t err_len)
{
- int ret = -1;
- int replica_nodes = 0;
+ int ret = -1;
+ int replica_nodes = 0;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
- switch (volinfo->type) {
+ switch (volinfo->type) {
case GF_CLUSTER_TYPE_NONE:
- case GF_CLUSTER_TYPE_STRIPE:
- snprintf (err_str, err_len,
- "replica count (%d) option given for non replicate "
- "volume %s", replica_count, volinfo->volname);
- gf_log (THIS->name, GF_LOG_WARNING, "%s", err_str);
- goto out;
+ case GF_CLUSTER_TYPE_DISPERSE:
+ snprintf(err_str, err_len,
+ "replica count (%d) option given for non replicate "
+ "volume %s",
+ replica_count, volinfo->volname);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ARGUMENT,
+ err_str, NULL);
+ goto out;
case GF_CLUSTER_TYPE_REPLICATE:
- case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
- /* in remove brick, you can only reduce the replica count */
- if (replica_count > volinfo->replica_count) {
- snprintf (err_str, err_len,
- "given replica count (%d) option is more "
- "than volume %s's replica count (%d)",
- replica_count, volinfo->volname,
- volinfo->replica_count);
- gf_log (THIS->name, GF_LOG_WARNING, "%s", err_str);
- goto out;
- }
- if (replica_count == volinfo->replica_count) {
- /* This means the 'replica N' option on CLI was
- redundant. Check if the total number of bricks given
- for removal is same as 'dist_leaf_count' */
- if (brick_count % volinfo->dist_leaf_count) {
- snprintf (err_str, err_len,
- "number of bricks provided (%d) is "
- "not valid. need at least %d "
- "(or %dxN)", brick_count,
- volinfo->dist_leaf_count,
- volinfo->dist_leaf_count);
- gf_log (THIS->name, GF_LOG_WARNING, "%s",
- err_str);
- goto out;
- }
- ret = 1;
- goto out;
- }
-
- replica_nodes = ((volinfo->brick_count /
- volinfo->replica_count) *
- (volinfo->replica_count - replica_count));
-
- if (brick_count % replica_nodes) {
- snprintf (err_str, err_len,
- "need %d(xN) bricks for reducing replica "
- "count of the volume from %d to %d",
- replica_nodes, volinfo->replica_count,
- replica_count);
- goto out;
+ /* in remove brick, you can only reduce the replica count */
+ if (replica_count > volinfo->replica_count) {
+ snprintf(err_str, err_len,
+ "given replica count (%d) option is more "
+ "than volume %s's replica count (%d)",
+ replica_count, volinfo->volname,
+ volinfo->replica_count);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ARGUMENT, err_str, NULL);
+ goto out;
+ }
+ if (replica_count == volinfo->replica_count) {
+ /* This means the 'replica N' option on CLI was
+ redundant. Check if the total number of bricks given
+ for removal is same as 'dist_leaf_count' */
+ if (brick_count % volinfo->dist_leaf_count) {
+ snprintf(err_str, err_len,
+ "number of bricks provided (%d) is "
+ "not valid. need at least %d "
+ "(or %dxN)",
+ brick_count, volinfo->dist_leaf_count,
+ volinfo->dist_leaf_count);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ARGUMENT, err_str, NULL);
+ goto out;
}
- break;
- }
+ ret = 1;
+ goto out;
+ }
+
+ replica_nodes = ((volinfo->brick_count / volinfo->replica_count) *
+ (volinfo->replica_count - replica_count));
+
+ if (brick_count % replica_nodes) {
+ snprintf(err_str, err_len,
+ "need %d(xN) bricks for reducing replica "
+ "count of the volume from %d to %d",
+ replica_nodes, volinfo->replica_count, replica_count);
+ gf_smsg(this->name, GF_LOG_WARNING, EINVAL,
+ GD_MSG_INVALID_ARGUMENT, err_str, NULL);
+ goto out;
+ }
+ break;
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
/* Handler functions */
int
-glusterd_handle_add_brick (rpcsvc_request_t *req)
+__glusterd_handle_add_brick(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- gf_cli_req cli_req = {{0,}};
- dict_t *dict = NULL;
- char *bricks = NULL;
- char *volname = NULL;
- int brick_count = 0;
- char *brick_list = NULL;
- void *cli_rsp = NULL;
- char err_str[2048] = {0,};
- gf_cli_rsp rsp = {0,};
- glusterd_volinfo_t *volinfo = NULL;
- xlator_t *this = NULL;
- int total_bricks = 0;
- int32_t replica_count = 0;
- int32_t stripe_count = 0;
- int type = 0;
-
- this = THIS;
- GF_ASSERT(this);
-
- GF_ASSERT (req);
-
- if (!xdr_to_generic (req->msg[0], &cli_req,
- (xdrproc_t)xdr_gf_cli_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- snprintf (err_str, sizeof (err_str), "Garbage args received");
- goto out;
- }
-
- gf_log ("glusterd", GF_LOG_INFO, "Received add brick req");
-
- if (cli_req.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
-
- ret = dict_unserialize (cli_req.dict.dict_val,
- cli_req.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- snprintf (err_str, sizeof (err_str), "Unable to decode "
- "the buffer");
- goto out;
- }
- }
-
- ret = dict_get_str (dict, "volname", &volname);
-
- gf_cmd_log ("Volume add-brick", "on volname: %s attempted",
- volname);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
- snprintf (err_str, sizeof (err_str), "Unable to get volume "
- "name");
- goto out;
- }
-
- if (!(ret = glusterd_check_volume_exists (volname))) {
- ret = -1;
- snprintf(err_str, 2048, "Volume %s does not exist", volname);
- gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
- goto out;
- }
-
- ret = dict_get_int32 (dict, "count", &brick_count);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get count");
- snprintf (err_str, sizeof (err_str), "Unable to get volume "
- "brick count");
- goto out;
- }
-
- ret = dict_get_int32 (dict, "replica-count", &replica_count);
- if (!ret) {
- gf_log (THIS->name, GF_LOG_INFO, "replica-count is %d",
- replica_count);
- }
-
- ret = dict_get_int32 (dict, "stripe-count", &stripe_count);
- if (!ret) {
- gf_log (THIS->name, GF_LOG_INFO, "stripe-count is %d",
- stripe_count);
- }
-
-
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- snprintf (err_str, sizeof (err_str), "Unable to get volinfo "
- "for volume name %s", volname);
- gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
- goto out;
-
- }
-
- total_bricks = volinfo->brick_count + brick_count;
-
- if (!stripe_count && !replica_count) {
- if (volinfo->type == GF_CLUSTER_TYPE_NONE)
- goto brick_val;
-
- if ((volinfo->brick_count < volinfo->dist_leaf_count) &&
- (total_bricks <= volinfo->dist_leaf_count))
- goto brick_val;
-
- if ((brick_count % volinfo->dist_leaf_count) != 0) {
- snprintf(err_str, 2048, "Incorrect number of bricks"
- " supplied %d with count %d",
- brick_count, volinfo->dist_leaf_count);
- gf_log("glusterd", GF_LOG_ERROR, "%s", err_str);
- ret = -1;
- goto out;
- }
- goto brick_val;
- /* done with validation.. below section is if stripe|replica
- count is given */
- }
-
- /* These bricks needs to be added one per a replica or stripe volume */
- if (stripe_count) {
- ret = gd_addbr_validate_stripe_count (volinfo, stripe_count,
- total_bricks, &type,
- err_str,
- sizeof (err_str));
- if (ret == -1) {
- gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
- goto out;
- }
-
- /* if stripe count is same as earlier, set it back to 0 */
- if (ret == 1)
- stripe_count = 0;
-
- ret = dict_set_int32 (dict, "stripe-count", stripe_count);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to set the stripe-count in dict");
- goto out;
- }
- goto brick_val;
- }
-
- ret = gd_addbr_validate_replica_count (volinfo, replica_count,
- total_bricks,
- &type, err_str,
- sizeof (err_str));
- if (ret == -1) {
- gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
- goto out;
- }
-
- /* if replica count is same as earlier, set it back to 0 */
- if (ret == 1)
- replica_count = 0;
-
- ret = dict_set_int32 (dict, "replica-count", replica_count);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to set the replica-count in dict");
- goto out;
- }
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+ char *bricks = NULL;
+ char *volname = NULL;
+ int brick_count = 0;
+ void *cli_rsp = NULL;
+ char err_str[2048] = "";
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ int total_bricks = 0;
+ int32_t replica_count = 0;
+ int32_t arbiter_count = 0;
+ int32_t stripe_count = 0;
+ int type = 0;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(req);
+
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ snprintf(err_str, sizeof(err_str), "Garbage args received");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto out;
+ }
+
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_ADD_BRICK_REQ_RECVD,
+ "Received add brick req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(err_str, sizeof(err_str),
+ "Unable to decode "
+ "the command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Unable to get volume "
+ "name");
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s",
+ err_str);
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Unable to get volinfo "
+ "for volume name %s",
+ volname);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL, "%s",
+ err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Unable to get volume "
+ "brick count");
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s",
+ err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
+ &replica_count);
+ if (!ret) {
+ gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS,
+ "replica-count is %d", replica_count);
+ }
+
+ ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"),
+ &arbiter_count);
+ if (!ret) {
+ gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS,
+ "arbiter-count is %d", arbiter_count);
+ }
+
+ ret = dict_get_int32n(dict, "stripe-count", SLEN("stripe-count"),
+ &stripe_count);
+ if (!ret) {
+ gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS,
+ "stripe-count is %d", stripe_count);
+ }
+
+ if (!dict_getn(dict, "force", SLEN("force"))) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Failed to get flag");
+ goto out;
+ }
+
+ total_bricks = volinfo->brick_count + brick_count;
+
+ if (!stripe_count && !replica_count) {
+ if (volinfo->type == GF_CLUSTER_TYPE_NONE)
+ goto brick_val;
+
+ if ((volinfo->brick_count < volinfo->dist_leaf_count) &&
+ (total_bricks <= volinfo->dist_leaf_count))
+ goto brick_val;
+
+ if ((brick_count % volinfo->dist_leaf_count) != 0) {
+ snprintf(err_str, sizeof(err_str),
+ "Incorrect number "
+ "of bricks supplied %d with count %d",
+ brick_count, volinfo->dist_leaf_count);
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_REPLICA,
+ "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+ goto brick_val;
+ /* done with validation.. below section is if stripe|replica
+ count is given */
+ }
+
+ ret = gd_addbr_validate_replica_count(volinfo, replica_count, arbiter_count,
+ total_bricks, &type, err_str,
+ sizeof(err_str));
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COUNT_VALIDATE_FAILED, "%s",
+ err_str);
+ goto out;
+ }
+
+ /* if replica count is same as earlier, set it back to 0 */
+ if (ret == 1)
+ replica_count = 0;
+
+ ret = dict_set_int32n(dict, "replica-count", SLEN("replica-count"),
+ replica_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "failed to set the replica-count in dict");
+ goto out;
+ }
brick_val:
- ret = dict_get_str (dict, "bricks", &bricks);
+ ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Unable to get volume "
+ "bricks");
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s",
+ err_str);
+ goto out;
+ }
+
+ if (type != volinfo->type) {
+ ret = dict_set_int32n(dict, "type", SLEN("type"), type);
if (ret) {
- snprintf (err_str, sizeof (err_str), "Unable to get volume "
- "bricks");
- gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
- goto out;
- }
-
- gf_cmd_log ("Volume add-brick", "volname: %s type %d count:%d bricks:%s"
- ,volname, volinfo->type, brick_count, brick_list);
-
- if (type != volinfo->type) {
- ret = dict_set_int32 (dict, "type", type);
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to set the new type in dict");
- }
-
- ret = glusterd_op_begin (req, GD_OP_ADD_BRICK, dict);
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "failed to set the new type in dict");
+ goto out;
+ }
+ }
+
+ if (conf->op_version <= GD_OP_VERSION_3_7_5) {
+ gf_msg_debug(this->name, 0,
+ "The cluster is operating at "
+ "version less than or equal to %d. Falling back "
+ "to syncop framework.",
+ GD_OP_VERSION_3_7_5);
+ ret = glusterd_op_begin_synctask(req, GD_OP_ADD_BRICK, dict);
+ } else {
+ ret = glusterd_mgmt_v3_initiate_all_phases(req, GD_OP_ADD_BRICK, dict);
+ }
out:
- gf_cmd_log ("Volume add-brick","on volname: %s %s", volname,
- (ret != 0)? "FAILED" : "SUCCESS");
- if (ret) {
- if (dict)
- dict_unref (dict);
- rsp.op_ret = -1;
- rsp.op_errno = 0;
- if (err_str[0] == '\0')
- snprintf (err_str, sizeof (err_str), "Operation failed");
- rsp.op_errstr = err_str;
- cli_rsp = &rsp;
- glusterd_submit_reply(req, cli_rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gf_cli_rsp);
- ret = 0; //sent error to cli, prevent second reply
- }
-
- glusterd_friend_sm ();
- glusterd_op_sm ();
-
- if (cli_req.dict.dict_val)
- free (cli_req.dict.dict_val); //its malloced by xdr
-
- return ret;
+ if (ret) {
+ rsp.op_ret = -1;
+ rsp.op_errno = 0;
+ if (err_str[0] == '\0')
+ snprintf(err_str, sizeof(err_str), "Operation failed");
+ rsp.op_errstr = err_str;
+ cli_rsp = &rsp;
+ glusterd_to_cli(req, cli_rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp,
+ dict);
+ ret = 0; // sent error to cli, prevent second reply
+ }
+
+ free(cli_req.dict.dict_val); // its malloced by xdr
+
+ return ret;
}
-
int
-glusterd_handle_remove_brick (rpcsvc_request_t *req)
+glusterd_handle_add_brick(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- gf_cli_req cli_req = {{0,}};
- dict_t *dict = NULL;
- int32_t count = 0;
- char *brick = NULL;
- char key[256] = {0,};
- char *brick_list = NULL;
- int i = 1;
- glusterd_volinfo_t *volinfo = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
- int32_t pos = 0;
- int32_t sub_volume = 0;
- int32_t sub_volume_start = 0;
- int32_t sub_volume_end = 0;
- glusterd_brickinfo_t *tmp = NULL;
- char err_str[2048] = {0};
- gf_cli_rsp rsp = {0,};
- void *cli_rsp = NULL;
- char vol_type[256] = {0,};
- int32_t replica_count = 0;
- int32_t brick_index = 0;
- int32_t tmp_brick_idx = 0;
- int found = 0;
- int diff_count = 0;
- char *volname = 0;
-
- GF_ASSERT (req);
-
- if (!xdr_to_generic (req->msg[0], &cli_req,
- (xdrproc_t)xdr_gf_cli_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
+ return glusterd_big_locked_handler(req, __glusterd_handle_add_brick);
+}
- gf_log ("glusterd", GF_LOG_INFO, "Received rem brick req");
+static int
+subvol_matcher_init(int **subvols, int count)
+{
+ int ret = -1;
- if (cli_req.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
+ *subvols = GF_CALLOC(count, sizeof(int), gf_gld_mt_int);
+ if (*subvols)
+ ret = 0;
- ret = dict_unserialize (cli_req.dict.dict_val,
- cli_req.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- }
- }
+ return ret;
+}
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Unable to get volname");
- goto out;
- }
+static void
+subvol_matcher_update(int *subvols, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ glusterd_brickinfo_t *tmp = NULL;
+ int32_t sub_volume = 0;
+ int pos = 0;
+ if (subvols) {
+ cds_list_for_each_entry(tmp, &volinfo->bricks, brick_list)
+ {
+ if (strcmp(tmp->hostname, brickinfo->hostname) ||
+ strcmp(tmp->path, brickinfo->path)) {
+ pos++;
+ continue;
+ }
+ gf_msg_debug(THIS->name, 0, LOGSTR_FOUND_BRICK, brickinfo->hostname,
+ brickinfo->path, volinfo->volname);
+ sub_volume = (pos / volinfo->dist_leaf_count);
+ subvols[sub_volume]++;
+ break;
+ }
+ }
+}
- gf_cmd_log ("Volume remove-brick","on volname: %s attempted", volname);
- ret = dict_get_int32 (dict, "count", &count);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get count");
- goto out;
+static int
+subvol_matcher_verify(int *subvols, glusterd_volinfo_t *volinfo, char *err_str,
+ size_t err_len, char *vol_type, int replica_count)
+{
+ int i = 0;
+ int ret = 0;
+ int count = volinfo->replica_count - replica_count;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ if (replica_count && subvols) {
+ for (i = 0; i < volinfo->subvol_count; i++) {
+ if (subvols[i] != count) {
+ ret = -1;
+ snprintf(err_str, err_len,
+ "Remove exactly %d"
+ " brick(s) from each subvolume.",
+ count);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_SUBVOL_VERIFY_FAIL, err_str, NULL);
+ break;
+ }
}
+ return ret;
+ }
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- snprintf (err_str, 2048, "Volume %s does not exist",
- volname);
- gf_log ("", GF_LOG_ERROR, "%s", err_str);
- goto out;
+ do {
+ if (subvols && (subvols[i] % volinfo->dist_leaf_count == 0)) {
+ continue;
+ } else {
+ ret = -1;
+ snprintf(err_str, err_len, "Bricks not from same subvol for %s",
+ vol_type);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_SUBVOL_VERIFY_FAIL, err_str, NULL);
+ break;
}
+ } while (++i < volinfo->subvol_count);
- ret = dict_get_int32 (dict, "replica-count", &replica_count);
- if (!ret) {
- gf_log (THIS->name, GF_LOG_INFO,
- "request to change replica-count to %d", replica_count);
- ret = gd_rmbr_validate_replica_count (volinfo, replica_count,
- count, err_str,
- sizeof (err_str));
- if (ret < 0) {
- /* logging and error msg are done in above function
- itself */
- goto out;
- }
- dict_del (dict, "replica-count");
- if (ret) {
- replica_count = 0;
- } else {
- ret = dict_set_int32 (dict, "replica-count",
- replica_count);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to set the replica_count "
- "in dict");
- goto out;
- }
- }
- }
+ return ret;
+}
- /* 'vol_type' is used for giving the meaning full error msg for user */
- if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
- strcpy (vol_type, "replica");
- } else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE) {
- strcpy (vol_type, "stripe");
- } else if (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) {
- strcpy (vol_type, "stripe-replicate");
- } else {
- strcpy (vol_type, "distribute");
- }
+static void
+subvol_matcher_destroy(int *subvols)
+{
+ GF_FREE(subvols);
+}
- /* Do not allow remove-brick if the volume is plain stripe */
- if ((volinfo->type == GF_CLUSTER_TYPE_STRIPE) &&
- (volinfo->brick_count == volinfo->stripe_count)) {
- snprintf (err_str, 2048,
- "Removing brick from a plain stripe is not allowed");
- gf_log ("glusterd", GF_LOG_ERROR, "%s", err_str);
+static int
+glusterd_remove_brick_validate_arbiters(glusterd_volinfo_t *volinfo,
+ int32_t count, int32_t replica_count,
+ glusterd_brickinfo_t **brickinfo_list,
+ char *err_str, size_t err_len)
+{
+ int i = 0;
+ int ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *last = NULL;
+ char *arbiter_array = NULL;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (volinfo->type != GF_CLUSTER_TYPE_REPLICATE)
+ goto out;
+
+ if (!replica_count || !volinfo->arbiter_count)
+ goto out;
+
+ if (replica_count == 2) {
+ /* If it is an arbiter to replica 2 conversion, only permit
+ * removal of the arbiter brick.*/
+ for (i = 0; i < count; i++) {
+ brickinfo = brickinfo_list[i];
+ last = get_last_brick_of_brick_group(volinfo, brickinfo);
+ if (last != brickinfo) {
+ snprintf(err_str, err_len,
+ "Remove arbiter "
+ "brick(s) only when converting from "
+ "arbiter to replica 2 subvolume.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_REMOVE_ARBITER_BRICK, err_str, NULL);
ret = -1;
goto out;
- }
-
- if (!replica_count &&
- (volinfo->type == GF_CLUSTER_TYPE_STRIPE_REPLICATE) &&
- (volinfo->brick_count == volinfo->dist_leaf_count)) {
- snprintf (err_str, 2048,
- "Removing bricks from stripe-replicate"
- " configuration is not allowed without reducing "
- "replica or stripe count explicitly.");
- gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
+ }
+ }
+ } else if (replica_count == 1) {
+ /* If it is an arbiter to plain distribute conversion, in every
+ * replica subvol, the arbiter has to be one of the bricks that
+ * are removed. */
+ arbiter_array = GF_CALLOC(volinfo->subvol_count, sizeof(*arbiter_array),
+ gf_common_mt_char);
+ if (!arbiter_array)
+ return -1;
+ for (i = 0; i < count; i++) {
+ brickinfo = brickinfo_list[i];
+ last = get_last_brick_of_brick_group(volinfo, brickinfo);
+ if (last == brickinfo)
+ arbiter_array[brickinfo->group] = 1;
+ }
+ for (i = 0; i < volinfo->subvol_count; i++)
+ if (!arbiter_array[i]) {
+ snprintf(err_str, err_len,
+ "Removed bricks "
+ "must contain arbiter when converting"
+ " to plain distribute.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_REMOVE_ARBITER_BRICK, err_str, NULL);
ret = -1;
- goto out;
- }
+ break;
+ }
+ GF_FREE(arbiter_array);
+ }
- if (!replica_count &&
- (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) &&
- (volinfo->brick_count == volinfo->dist_leaf_count)) {
- snprintf (err_str, 2048,
- "Removing bricks from replicate configuration "
- "is not allowed without reducing replica count "
- "explicitly.");
- gf_log (THIS->name, GF_LOG_ERROR, "%s", err_str);
- ret = -1;
- goto out;
- }
+out:
+ return ret;
+}
- /* Do not allow remove-brick if the bricks given is less than
- the replica count or stripe count */
- if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) {
- if (volinfo->dist_leaf_count &&
- (count % volinfo->dist_leaf_count)) {
- snprintf (err_str, 2048, "Remove brick incorrect"
- " brick count of %d for %s %d",
- count, vol_type, volinfo->dist_leaf_count);
- gf_log ("", GF_LOG_ERROR, "%s", err_str);
- ret = -1;
- goto out;
- }
- }
+int
+__glusterd_handle_remove_brick(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+ int32_t count = 0;
+ char *brick = NULL;
+ char key[64] = "";
+ int keylen;
+ int i = 1;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t **brickinfo_list = NULL;
+ int *subvols = NULL;
+ char err_str[2048] = "";
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ void *cli_rsp = NULL;
+ char vol_type[256] = "";
+ int32_t replica_count = 0;
+ char *volname = 0;
+ xlator_t *this = NULL;
+ int cmd = -1;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ snprintf(err_str, sizeof(err_str), "Received garbage args");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto out;
+ }
+
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REM_BRICK_REQ_RECVD,
+ "Received rem brick req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(err_str, sizeof(err_str),
+ "Unable to decode "
+ "the command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Unable to get volume "
+ "name");
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s",
+ err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Unable to get brick "
+ "count");
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s",
+ err_str);
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str), "Volume %s does not exist", volname);
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, "%s",
+ err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "command", SLEN("command"), &cmd);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Unable to get cmd "
+ "ccommand");
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s",
+ err_str);
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
+ &replica_count);
+ if (!ret) {
+ gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED,
+ "request to change replica-count to %d", replica_count);
+ ret = gd_rmbr_validate_replica_count(volinfo, replica_count, count,
+ err_str, sizeof(err_str));
+ if (ret < 0) {
+ /* logging and error msg are done in above function
+ itself */
+ goto out;
+ }
+ dict_deln(dict, "replica-count", SLEN("replica-count"));
+ if (ret) {
+ replica_count = 0;
+ } else {
+ ret = dict_set_int32n(dict, "replica-count", SLEN("replica-count"),
+ replica_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set the replica_count "
+ "in dict");
+ goto out;
+ }
+ }
+ }
+
+ /* 'vol_type' is used for giving the meaning full error msg for user */
+ if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
+ strcpy(vol_type, "replica");
+ } else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ strcpy(vol_type, "disperse");
+ } else {
+ strcpy(vol_type, "distribute");
+ }
+
+ if (!replica_count && (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) &&
+ (volinfo->brick_count == volinfo->dist_leaf_count)) {
+ snprintf(err_str, sizeof(err_str),
+ "Removing bricks from replicate configuration "
+ "is not allowed without reducing replica count "
+ "explicitly.");
+ gf_msg(this->name, GF_LOG_ERROR, EPERM, GD_MSG_OP_NOT_PERMITTED_AC_REQD,
+ "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+
+ /* Do not allow remove-brick if the bricks given is less than
+ the replica count or stripe count */
+ if (!replica_count && (volinfo->type != GF_CLUSTER_TYPE_NONE)) {
+ if (volinfo->dist_leaf_count && (count % volinfo->dist_leaf_count)) {
+ snprintf(err_str, sizeof(err_str),
+ "Remove brick "
+ "incorrect brick count of %d for %s %d",
+ count, vol_type, volinfo->dist_leaf_count);
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s",
+ err_str);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
+ (volinfo->subvol_count > 1)) {
+ ret = subvol_matcher_init(&subvols, volinfo->subvol_count);
+ if (ret)
+ goto out;
+ }
- brick_list = GF_MALLOC (120000 * sizeof(*brick_list),gf_common_mt_char);
+ brickinfo_list = GF_CALLOC(count, sizeof(*brickinfo_list),
+ gf_common_mt_pointer);
+ if (!brickinfo_list) {
+ ret = -1;
+ goto out;
+ }
- if (!brick_list) {
- ret = -1;
- goto out;
+ while (i <= count) {
+ keylen = snprintf(key, sizeof(key), "brick%d", i);
+ ret = dict_get_strn(dict, key, keylen, &brick);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str), "Unable to get %s", key);
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "%s", err_str);
+ goto out;
}
+ gf_msg_debug(this->name, 0,
+ "Remove brick count %d brick:"
+ " %s",
+ i, brick);
- strcpy (brick_list, " ");
- while ( i <= count) {
- snprintf (key, 256, "brick%d", i);
- ret = dict_get_str (dict, key, &brick);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get %s", key);
- goto out;
- }
- gf_log ("", GF_LOG_DEBUG, "Remove brick count %d brick: %s",
- i, brick);
+ ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo,
+ _gf_false);
- ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo,
- &brickinfo,
- GF_PATH_COMPLETE);
- if (ret) {
- snprintf(err_str, 2048,"Incorrect brick %s for volume"
- " %s", brick, volname);
- gf_log ("", GF_LOG_ERROR, "%s", err_str);
- goto out;
- }
- strcat(brick_list, brick);
- strcat(brick_list, " ");
-
- i++;
- if ((volinfo->type == GF_CLUSTER_TYPE_NONE) ||
- (volinfo->brick_count <= volinfo->dist_leaf_count))
- continue;
-
- if (replica_count) {
- /* do the validation of bricks here */
- /* -2 because i++ is already done, and i starts with 1,
- instead of 0 */
- diff_count = (volinfo->replica_count - replica_count);
- brick_index = (((i -2) / diff_count) * volinfo->replica_count);
- tmp_brick_idx = 0;
- found = 0;
- list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
- tmp_brick_idx++;
- gf_log (THIS->name, GF_LOG_TRACE,
- "validate brick %s:%s (%d %d %d)",
- tmp->hostname, tmp->path, tmp_brick_idx,
- brick_index, volinfo->replica_count);
- if (tmp_brick_idx <= brick_index)
- continue;
- if (tmp_brick_idx >
- (brick_index + volinfo->replica_count))
- break;
- if ((!strcmp (tmp->hostname,brickinfo->hostname)) &&
- !strcmp (tmp->path, brickinfo->path)) {
- found = 1;
- break;
- }
- }
- if (found)
- continue;
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Incorrect brick "
+ "%s for volume %s",
+ brick, volname);
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_BRICK_NOT_FOUND,
+ "%s", err_str);
+ goto out;
+ }
+ brickinfo_list[i - 1] = brickinfo;
+
+ i++;
+ if ((volinfo->type == GF_CLUSTER_TYPE_NONE) ||
+ (volinfo->brick_count <= volinfo->dist_leaf_count))
+ continue;
+
+ subvol_matcher_update(subvols, volinfo, brickinfo);
+ }
+
+ if ((volinfo->type != GF_CLUSTER_TYPE_NONE) &&
+ (volinfo->subvol_count > 1)) {
+ ret = subvol_matcher_verify(subvols, volinfo, err_str, sizeof(err_str),
+ vol_type, replica_count);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_remove_brick_validate_arbiters(volinfo, count, replica_count,
+ brickinfo_list, err_str,
+ sizeof(err_str));
+ if (ret)
+ goto out;
+
+ if (conf->op_version < GD_OP_VERSION_8_0) {
+ gf_msg_debug(this->name, 0,
+ "The cluster is operating at "
+ "version less than %d. remove-brick operation"
+ "falling back to syncop framework.",
+ GD_OP_VERSION_8_0);
+ ret = glusterd_op_begin_synctask(req, GD_OP_REMOVE_BRICK, dict);
+ } else {
+ ret = glusterd_mgmt_v3_initiate_all_phases(req, GD_OP_REMOVE_BRICK,
+ dict);
+ }
- snprintf(err_str, 2048,"Bricks are from same subvol");
- gf_log (THIS->name, GF_LOG_INFO,
- "failed to validate brick %s:%s (%d %d %d)",
- tmp->hostname, tmp->path, tmp_brick_idx,
- brick_index, volinfo->replica_count);
- ret = -1;
- /* brick order is not valid */
- goto out;
- }
+out:
+ if (ret) {
+ rsp.op_ret = -1;
+ rsp.op_errno = 0;
+ if (err_str[0] == '\0')
+ snprintf(err_str, sizeof(err_str), "Operation failed");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_OP_FAILED, "%s",
+ err_str);
+ rsp.op_errstr = err_str;
+ cli_rsp = &rsp;
+ glusterd_to_cli(req, cli_rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp,
+ dict);
+
+ ret = 0; // sent error to cli, prevent second reply
+ }
+
+ if (brickinfo_list)
+ GF_FREE(brickinfo_list);
+ subvol_matcher_destroy(subvols);
+ free(cli_req.dict.dict_val); // its malloced by xdr
+
+ return ret;
+}
- pos = 0;
- list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
+int
+glusterd_handle_remove_brick(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __glusterd_handle_remove_brick);
+}
- if (strcmp (tmp->hostname,brickinfo->hostname) ||
- strcmp (tmp->path, brickinfo->path)) {
- pos++;
- continue;
- }
+static int
+_glusterd_restart_gsync_session(dict_t *this, char *key, data_t *value,
+ void *data)
+{
+ char *slave = NULL;
+ char *slave_buf = NULL;
+ char *path_list = NULL;
+ char *slave_vol = NULL;
+ char *slave_host = NULL;
+ char *slave_url = NULL;
+ char *conf_path = NULL;
+ char **errmsg = NULL;
+ int ret = -1;
+ glusterd_gsync_status_temp_t *param = NULL;
+ gf_boolean_t is_running = _gf_false;
+
+ param = (glusterd_gsync_status_temp_t *)data;
+
+ GF_ASSERT(param);
+ GF_ASSERT(param->volinfo);
+
+ slave = strchr(value->data, ':');
+ if (slave) {
+ slave++;
+ slave_buf = gf_strdup(slave);
+ if (!slave_buf) {
+ gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Failed to gf_strdup");
+ ret = -1;
+ goto out;
+ }
+ } else
+ return 0;
- gf_log ("", GF_LOG_INFO, "Found brick");
- if (!sub_volume && (volinfo->dist_leaf_count > 1)) {
- sub_volume = (pos / volinfo->dist_leaf_count) + 1;
- sub_volume_start = (volinfo->dist_leaf_count *
- (sub_volume - 1));
- sub_volume_end = (volinfo->dist_leaf_count *
- sub_volume) - 1;
- } else {
- if (pos < sub_volume_start ||
- pos >sub_volume_end) {
- ret = -1;
- snprintf(err_str, 2048,"Bricks not from"
- " same subvol for %s",
- vol_type);
- gf_log ("", GF_LOG_ERROR,
- "%s", err_str);
- goto out;
- }
- }
- break;
- }
- }
- gf_cmd_log ("Volume remove-brick","volname: %s count:%d bricks:%s",
- volname, count, brick_list);
+ ret = dict_set_dynstrn(param->rsp_dict, "slave", SLEN("slave"), slave_buf);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Unable to store slave");
+ if (slave_buf)
+ GF_FREE(slave_buf);
+ goto out;
+ }
+
+ ret = glusterd_get_slave_details_confpath(param->volinfo, param->rsp_dict,
+ &slave_url, &slave_host,
+ &slave_vol, &conf_path, errmsg);
+ if (ret) {
+ if (errmsg && *errmsg)
+ gf_msg("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_SLAVE_CONFPATH_DETAILS_FETCH_FAIL, "%s", *errmsg);
+ else
+ gf_msg("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_SLAVE_CONFPATH_DETAILS_FETCH_FAIL,
+ "Unable to fetch slave or confpath details.");
+ goto out;
+ }
+
+ /* In cases that gsyncd is not running, we will not invoke it
+ * because of add-brick. */
+ ret = glusterd_check_gsync_running_local(param->volinfo->volname, slave,
+ conf_path, &is_running);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_GSYNC_VALIDATION_FAIL,
+ "gsync running validation failed.");
+ goto out;
+ }
+ if (_gf_false == is_running) {
+ gf_msg_debug("glusterd", 0,
+ "gsync session for %s and %s is"
+ " not running on this node. Hence not restarting.",
+ param->volinfo->volname, slave);
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_get_local_brickpaths(param->volinfo, &path_list);
+ if (!path_list) {
+ gf_msg_debug("glusterd", 0,
+ "This node not being part of"
+ " volume should not be running gsyncd. Hence"
+ " no gsyncd process to restart.");
+ ret = 0;
+ goto out;
+ }
- ret = glusterd_op_begin (req, GD_OP_REMOVE_BRICK, dict);
- gf_cmd_log ("Volume remove-brick","on volname: %s %s", volname,
- (ret) ? "FAILED" : "SUCCESS");
+ ret = glusterd_check_restart_gsync_session(
+ param->volinfo, slave, param->rsp_dict, path_list, conf_path, 0);
+ if (ret)
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_GSYNC_RESTART_FAIL,
+ "Unable to restart gsync session.");
out:
- if (ret) {
- if (dict)
- dict_unref (dict);
- rsp.op_ret = -1;
- rsp.op_errno = 0;
- if (err_str[0] == '\0')
- snprintf (err_str, sizeof (err_str), "Operation failed");
- gf_log ("", GF_LOG_ERROR, "%s", err_str);
- rsp.op_errstr = err_str;
- cli_rsp = &rsp;
- glusterd_submit_reply(req, cli_rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gf_cli_rsp);
-
- ret = 0; //sent error to cli, prevent second reply
-
- }
- if (brick_list)
- GF_FREE (brick_list);
- if (cli_req.dict.dict_val)
- free (cli_req.dict.dict_val); //its malloced by xdr
-
- glusterd_friend_sm ();
- glusterd_op_sm ();
-
- return ret;
+ gf_msg_debug("glusterd", 0, "Returning %d.", ret);
+ return ret;
}
-
/* op-sm */
int
-glusterd_op_perform_add_bricks (glusterd_volinfo_t *volinfo, int32_t count,
- char *bricks, dict_t *dict)
+glusterd_op_perform_add_bricks(glusterd_volinfo_t *volinfo, int32_t count,
+ char *bricks, dict_t *dict)
{
- glusterd_brickinfo_t *brickinfo = NULL;
- char *brick = NULL;
- int32_t i = 1;
- char *brick_list = NULL;
- char *free_ptr1 = NULL;
- char *free_ptr2 = NULL;
- char *saveptr = NULL;
- int32_t ret = -1;
- int32_t stripe_count = 0;
- int32_t replica_count = 0;
- int32_t type = 0;
-
- GF_ASSERT (volinfo);
-
- if (bricks) {
- brick_list = gf_strdup (bricks);
- free_ptr1 = brick_list;
- }
+ char *brick = NULL;
+ int32_t i = 1;
+ char *brick_list = NULL;
+ char *free_ptr1 = NULL;
+ char *free_ptr2 = NULL;
+ char *saveptr = NULL;
+ int32_t ret = -1;
+ int32_t stripe_count = 0;
+ int32_t replica_count = 0;
+ int32_t arbiter_count = 0;
+ int32_t type = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_gsync_status_temp_t param = {
+ 0,
+ };
+ gf_boolean_t restart_needed = 0;
+ int brickid = 0;
+ char key[64] = "";
+ char *brick_mount_dir = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t is_valid_add_brick = _gf_false;
+ gf_boolean_t restart_shd = _gf_false;
+ struct statvfs brickstat = {
+ 0,
+ };
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(volinfo);
+
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ if (bricks) {
+ brick_list = gf_strdup(bricks);
+ free_ptr1 = brick_list;
+ }
+
+ if (count)
+ brick = strtok_r(brick_list + 1, " \n", &saveptr);
+
+ if (dict) {
+ ret = dict_get_int32n(dict, "stripe-count", SLEN("stripe-count"),
+ &stripe_count);
+ if (!ret)
+ gf_msg(THIS->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS,
+ "stripe-count is set %d", stripe_count);
+
+ ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
+ &replica_count);
+ if (!ret)
+ gf_msg(THIS->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS,
+ "replica-count is set %d", replica_count);
+ ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"),
+ &arbiter_count);
+ if (!ret)
+ gf_msg(THIS->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS,
+ "arbiter-count is set %d", arbiter_count);
+ ret = dict_get_int32n(dict, "type", SLEN("type"), &type);
+ if (!ret)
+ gf_msg(THIS->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_SUCCESS,
+ "type is set %d, need to change it", type);
+ }
+
+ brickid = glusterd_get_next_available_brickid(volinfo);
+ if (brickid < 0)
+ goto out;
+ while (i <= count) {
+ ret = glusterd_brickinfo_new_from_brick(brick, &brickinfo, _gf_true,
+ NULL);
+ if (ret)
+ goto out;
- if (count)
- brick = strtok_r (brick_list+1, " \n", &saveptr);
-
- if (dict) {
- ret = dict_get_int32 (dict, "stripe-count", &stripe_count);
- if (!ret)
- gf_log (THIS->name, GF_LOG_INFO,
- "stripe-count is set %d", stripe_count);
-
- ret = dict_get_int32 (dict, "replica-count", &replica_count);
- if (!ret)
- gf_log (THIS->name, GF_LOG_INFO,
- "replica-count is set %d", replica_count);
- ret = dict_get_int32 (dict, "type", &type);
- if (!ret)
- gf_log (THIS->name, GF_LOG_INFO,
- "type is set %d, need to change it", type);
- }
+ GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO(brickinfo, volinfo, brickid++);
- while ( i <= count) {
- ret = glusterd_brickinfo_from_brick (brick, &brickinfo);
- if (ret)
- goto out;
-
- ret = glusterd_resolve_brick (brickinfo);
- if (ret)
- goto out;
- if (stripe_count || replica_count) {
- add_brick_at_right_order (brickinfo, volinfo, (i - 1),
- stripe_count, replica_count);
- } else {
- list_add_tail (&brickinfo->brick_list, &volinfo->bricks);
- }
- brick = strtok_r (NULL, " \n", &saveptr);
- i++;
- volinfo->brick_count++;
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+ if (conf->op_version >= GD_OP_VERSION_3_6_0) {
+ brick_mount_dir = NULL;
+ snprintf(key, sizeof(key), "brick%d.mount_dir", i);
+ ret = dict_get_str(dict, key, &brick_mount_dir);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "%s not present", key);
+ goto out;
+ }
+ strncpy(brickinfo->mount_dir, brick_mount_dir,
+ SLEN(brickinfo->mount_dir));
}
+ ret = glusterd_resolve_brick(brickinfo);
+ if (ret)
+ goto out;
+
+ if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
+ ret = sys_statvfs(brickinfo->path, &brickstat);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_STATVFS_FAILED,
+ "Failed to fetch disk utilization "
+ "from the brick (%s:%s). Please check the health of "
+ "the brick. Error code was %s",
+ brickinfo->hostname, brickinfo->path, strerror(errno));
- /* Gets changed only if the options are given in add-brick cli */
- if (type)
- volinfo->type = type;
- if (replica_count) {
- volinfo->replica_count = replica_count;
+ goto out;
+ }
+ brickinfo->statfs_fsid = brickstat.f_fsid;
}
- if (stripe_count) {
- volinfo->stripe_count = stripe_count;
+ if (stripe_count || replica_count) {
+ add_brick_at_right_order(brickinfo, volinfo, (i - 1), stripe_count,
+ replica_count);
+ } else {
+ cds_list_add_tail(&brickinfo->brick_list, &volinfo->bricks);
+ }
+ brick = strtok_r(NULL, " \n", &saveptr);
+ i++;
+ volinfo->brick_count++;
+ }
+
+ /* Gets changed only if the options are given in add-brick cli */
+ if (type)
+ volinfo->type = type;
+ /* performance.client-io-threads is turned on by default,
+ * however this has adverse effects on replicate volumes due to
+ * replication design issues, till that get addressed
+ * performance.client-io-threads option is turned off for all
+ * replicate volumes if not already explicitly enabled.
+ */
+ if (type && glusterd_is_volume_replicate(volinfo) &&
+ conf->op_version >= GD_OP_VERSION_3_12_2) {
+ ret = dict_set_nstrn(volinfo->dict, "performance.client-io-threads",
+ SLEN("performance.client-io-threads"), "off",
+ SLEN("off"));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "performance.client-io-threads to off");
+ goto out;
+ }
+ }
+
+ if (replica_count) {
+ volinfo->replica_count = replica_count;
+ }
+ if (arbiter_count) {
+ volinfo->arbiter_count = arbiter_count;
+ }
+ if (stripe_count) {
+ volinfo->stripe_count = stripe_count;
+ }
+ volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo);
+
+ /* backward compatibility */
+ volinfo->sub_count = ((volinfo->dist_leaf_count == 1)
+ ? 0
+ : volinfo->dist_leaf_count);
+
+ volinfo->subvol_count = (volinfo->brick_count / volinfo->dist_leaf_count);
+
+ ret = 0;
+ if (GLUSTERD_STATUS_STARTED != volinfo->status)
+ goto generate_volfiles;
+
+ ret = generate_brick_volfiles(volinfo);
+ if (ret)
+ goto out;
+
+ brick_list = gf_strdup(bricks);
+ free_ptr2 = brick_list;
+ i = 1;
+
+ if (count)
+ brick = strtok_r(brick_list + 1, " \n", &saveptr);
+
+ if (glusterd_is_volume_replicate(volinfo)) {
+ if (replica_count && conf->op_version >= GD_OP_VERSION_3_7_10) {
+ is_valid_add_brick = _gf_true;
+ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+ ret = volinfo->shd.svc.stop(&(volinfo->shd.svc), SIGTERM);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTER_SERVICES_STOP_FAIL,
+ "Failed to stop shd for %s.", volinfo->volname);
+ }
+ restart_shd = _gf_true;
+ }
+ ret = generate_dummy_client_volfiles(volinfo);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to create volfile.");
+ goto out;
+ }
}
- volinfo->dist_leaf_count = (volinfo->stripe_count *
- volinfo->replica_count);
+ }
- /* backward compatibility */
- volinfo->sub_count = ((volinfo->dist_leaf_count == 1) ? 0:
- volinfo->dist_leaf_count);
-
- brick_list = gf_strdup (bricks);
- free_ptr2 = brick_list;
- i = 1;
-
- if (count)
- brick = strtok_r (brick_list+1, " \n", &saveptr);
-
- ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ while (i <= count) {
+ ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo,
+ _gf_true);
if (ret)
+ goto out;
+
+ if (gf_uuid_is_null(brickinfo->uuid)) {
+ ret = glusterd_resolve_brick(brickinfo);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_RESOLVE_BRICK_FAIL,
+ FMTSTR_RESOLVE_BRICK, brickinfo->hostname,
+ brickinfo->path);
goto out;
+ }
+ }
- while (i <= count) {
-
- ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
- &brickinfo,
- GF_PATH_PARTIAL);
- if (ret)
- goto out;
-
- if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_brick_start (volinfo, brickinfo);
- if (ret)
- goto out;
- }
- i++;
- brick = strtok_r (NULL, " \n", &saveptr);
+ /* if the volume is a replicate volume, do: */
+ if (is_valid_add_brick) {
+ if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
+ ret = glusterd_handle_replicate_brick_ops(volinfo, brickinfo,
+ GD_OP_ADD_BRICK);
+ if (ret < 0)
+ goto out;
+ }
}
+ ret = glusterd_brick_start(volinfo, brickinfo, _gf_true, _gf_false);
+ if (ret)
+ goto out;
+ i++;
+ brick = strtok_r(NULL, " \n", &saveptr);
+
+ /* Check if the brick is added in this node, and set
+ * the restart_needed flag. */
+ if ((!gf_uuid_compare(brickinfo->uuid, MY_UUID)) && !restart_needed) {
+ restart_needed = 1;
+ gf_msg_debug("glusterd", 0,
+ "Restart gsyncd session, if it's already "
+ "running.");
+ }
+ }
+
+ /* If the restart_needed flag is set, restart gsyncd sessions for that
+ * particular master with all the slaves. */
+ if (restart_needed) {
+ param.rsp_dict = dict;
+ param.volinfo = volinfo;
+ dict_foreach(volinfo->gsync_slaves, _glusterd_restart_gsync_session,
+ &param);
+ }
+
+generate_volfiles:
+ if (conf->op_version <= GD_OP_VERSION_3_7_5) {
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ } else {
+ /*
+ * The cluster is operating at version greater than
+ * gluster-3.7.5. So no need to sent volfile fetch
+ * request in commit phase, the same will be done
+ * in post validate phase with v3 framework.
+ */
+ }
out:
- if (free_ptr1)
- GF_FREE (free_ptr1);
- if (free_ptr2)
- GF_FREE (free_ptr2);
-
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ GF_FREE(free_ptr1);
+ GF_FREE(free_ptr2);
+ if (restart_shd) {
+ if (volinfo->shd.svc.manager(&(volinfo->shd.svc), volinfo,
+ PROC_START_NO_WAIT)) {
+ gf_msg("glusterd", GF_LOG_CRITICAL, 0,
+ GD_MSG_GLUSTER_SERVICE_START_FAIL,
+ "Failed to start shd for %s.", volinfo->volname);
+ }
+ }
+
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
}
-
int
-glusterd_op_perform_remove_brick (glusterd_volinfo_t *volinfo, char *brick,
- int force, int *need_migrate)
+glusterd_op_perform_remove_brick(glusterd_volinfo_t *volinfo, char *brick,
+ int force, int *need_migrate)
{
- glusterd_brickinfo_t *brickinfo = NULL;
- char *dup_brick = NULL;
- int32_t ret = -1;
- glusterd_conf_t *priv = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
- GF_ASSERT (volinfo);
- GF_ASSERT (brick);
+ GF_ASSERT(volinfo);
+ GF_ASSERT(brick);
- priv = THIS->private;
- GF_ASSERT (priv);
-
- dup_brick = gf_strdup (brick);
- if (!dup_brick)
- goto out;
+ priv = THIS->private;
+ GF_ASSERT(priv);
- ret = glusterd_volume_brickinfo_get_by_brick (dup_brick, volinfo,
- &brickinfo, GF_PATH_COMPLETE);
- if (ret)
- goto out;
+ ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo,
+ _gf_false);
+ if (ret)
+ goto out;
- ret = glusterd_resolve_brick (brickinfo);
- if (ret)
- goto out;
+ ret = glusterd_resolve_brick(brickinfo);
+ if (ret)
+ goto out;
- glusterd_volinfo_reset_defrag_stats (volinfo);
+ glusterd_volinfo_reset_defrag_stats(volinfo);
- if (!uuid_compare (brickinfo->uuid, MY_UUID)) {
- /* Only if the brick is in this glusterd, do the rebalance */
- if (need_migrate)
- *need_migrate = 1;
- }
+ if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
+ /* Only if the brick is in this glusterd, do the rebalance */
+ if (need_migrate)
+ *need_migrate = 1;
+ }
- if (force) {
- if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_brick_stop (volinfo, brickinfo);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Unable to stop "
- "glusterfs, ret: %d", ret);
- goto out;
- }
- }
- glusterd_delete_brick (volinfo, brickinfo);
- goto out;
+ if (force) {
+ ret = glusterd_brick_stop(volinfo, brickinfo, _gf_true);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_STOP_FAIL,
+ "Unable to stop "
+ "glusterfs, ret: %d",
+ ret);
}
+ goto out;
+ }
- brickinfo->decommissioned = 1;
+ brickinfo->decommissioned = 1;
+ ret = 0;
out:
- if (dup_brick)
- GF_FREE (dup_brick);
-
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
}
int
-glusterd_op_stage_add_brick (dict_t *dict, char **op_errstr)
+glusterd_op_stage_add_brick(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
{
- int ret = 0;
- char *volname = NULL;
- int count = 0;
- int i = 0;
- char *bricks = NULL;
- char *brick_list = NULL;
- char *saveptr = NULL;
- char *free_ptr = NULL;
- char *brick = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- glusterd_conf_t *priv = NULL;
- char msg[2048] = {0,};
- gf_boolean_t brick_alloc = _gf_false;
- char *all_bricks = NULL;
- char *str_ret = NULL;
-
- priv = THIS->private;
- if (!priv)
- goto out;
-
- ret = dict_get_str (dict, "volname", &volname);
+ int ret = 0;
+ char *volname = NULL;
+ int count = 0;
+ int replica_count = 0;
+ int arbiter_count = 0;
+ int i = 0;
+ int32_t local_brick_count = 0;
+ char *bricks = NULL;
+ char *brick_list = NULL;
+ char *saveptr = NULL;
+ char *free_ptr = NULL;
+ char *brick = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char msg[4096] = "";
+ char key[64] = "";
+ gf_boolean_t brick_alloc = _gf_false;
+ char *all_bricks = NULL;
+ char *str_ret = NULL;
+ gf_boolean_t is_force = _gf_false;
+ glusterd_conf_t *conf = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Unable to find volume: %s", volname);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id(dict, volinfo);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
+ &replica_count);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Unable to get replica count");
+ }
+
+ if (replica_count > 0) {
+ ret = op_version_check(this, GD_OP_VER_PERSISTENT_AFR_XATTRS, msg,
+ sizeof(msg));
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Unable to get volume name");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERSION_MISMATCH,
+ "%s", msg);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
+ }
+
+ glusterd_add_peers_to_auth_list(volname);
+
+ if (replica_count && glusterd_is_volume_replicate(volinfo)) {
+ /* Do not allow add-brick for stopped volumes when replica-count
+ * is being increased.
+ */
+ if (GLUSTERD_STATUS_STOPPED == volinfo->status &&
+ conf->op_version >= GD_OP_VERSION_3_7_10) {
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ " Volume must not be in"
+ " stopped state when replica-count needs to "
+ " be increased.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ msg);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
+ /* op-version check for replica 2 to arbiter conversion. If we
+ * don't have this check, an older peer added as arbiter brick
+ * will not have the arbiter xlator in its volfile. */
+ if ((replica_count == 3) && (conf->op_version < GD_OP_VERSION_3_8_0)) {
+ ret = dict_get_int32n(dict, "arbiter-count", SLEN("arbiter-count"),
+ &arbiter_count);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "No arbiter count present in the dict");
+ } else if (arbiter_count == 1) {
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "Cluster op-version must "
+ "be >= 30800 to add arbiter brick to a "
+ "replica 2 volume.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ msg);
+ *op_errstr = gf_strdup(msg);
goto out;
+ }
}
-
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Unable to find volume: %s", volname);
- goto out;
+ /* Do not allow increasing replica count for arbiter volumes. */
+ if (volinfo->arbiter_count) {
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "Increasing replica count "
+ "for arbiter volumes is not supported.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ msg);
+ *op_errstr = gf_strdup(msg);
+ goto out;
}
+ }
- ret = glusterd_validate_volume_id (dict, volinfo);
- if (ret)
- goto out;
+ is_force = dict_get_str_boolean(dict, "force", _gf_false);
- if (glusterd_is_rb_ongoing (volinfo)) {
- snprintf (msg, sizeof (msg), "Replace brick is in progress on "
- "volume %s. Please retry after replace-brick "
- "operation is committed or aborted", volname);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
- *op_errstr = gf_strdup (msg);
- ret = -1;
- goto out;
+ /* Check brick order if the volume type is replicate or disperse. If
+ * force at the end of command not given then check brick order.
+ * doing this check at the originator node is sufficient.
+ */
+
+ if (!is_force && is_origin_glusterd(dict)) {
+ ret = 0;
+ if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
+ gf_msg_debug(this->name, 0,
+ "Replicate cluster type "
+ "found. Checking brick order.");
+ if (replica_count)
+ ret = glusterd_check_brick_order(dict, msg, volinfo->type,
+ &volname, &bricks, &count,
+ replica_count);
+ else
+ ret = glusterd_check_brick_order(dict, msg, volinfo->type,
+ &volname, &bricks, &count,
+ volinfo->replica_count);
+ } else if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ gf_msg_debug(this->name, 0,
+ "Disperse cluster type"
+ " found. Checking brick order.");
+ ret = glusterd_check_brick_order(dict, msg, volinfo->type, &volname,
+ &bricks, &count,
+ volinfo->disperse_count);
+ }
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BAD_BRKORDER,
+ "Not adding brick because of "
+ "bad brick order. %s",
+ msg);
+ *op_errstr = gf_strdup(msg);
+ goto out;
}
+ }
- if (glusterd_is_defrag_on(volinfo)) {
- snprintf (msg, sizeof(msg), "Volume name %s rebalance is in "
- "progress. Please retry after completion", volname);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
- *op_errstr = gf_strdup (msg);
+ if (volinfo->replica_count < replica_count && !is_force) {
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID))
+ continue;
+ if (brickinfo->status == GF_BRICK_STOPPED) {
ret = -1;
+ len = snprintf(msg, sizeof(msg),
+ "Brick %s "
+ "is down, changing replica "
+ "count needs all the bricks "
+ "to be up to avoid data loss",
+ brickinfo->path);
+ if (len < 0) {
+ strcpy(msg, "<error>");
+ }
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL, "%s",
+ msg);
+ *op_errstr = gf_strdup(msg);
goto out;
+ }
}
- ret = dict_get_int32 (dict, "count", &count);
+ }
+
+ if (conf->op_version > GD_OP_VERSION_3_7_5 && is_origin_glusterd(dict)) {
+ ret = glusterd_validate_quorum(this, GD_OP_ADD_BRICK, dict, op_errstr);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get count");
- goto out;
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_SERVER_QUORUM_NOT_MET,
+ "Server quorum not met. Rejecting operation.");
+ goto out;
+ }
+ } else {
+ /* Case 1: conf->op_version <= GD_OP_VERSION_3_7_5
+ * in this case the add-brick is running
+ * syncop framework that will do a quorum
+ * check by default
+ * Case 2: We don't need to do quorum check on every
+ * node, only originator glusterd need to
+ * check for quorum
+ * So nothing need to be done in else
+ */
+ }
+
+ if (glusterd_is_defrag_on(volinfo)) {
+ snprintf(msg, sizeof(msg),
+ "Volume name %s rebalance is in "
+ "progress. Please retry after completion",
+ volname);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OIP_RETRY_LATER, "%s", msg);
+ *op_errstr = gf_strdup(msg);
+ ret = -1;
+ goto out;
+ }
+
+ if (volinfo->snap_count > 0 || !cds_list_empty(&volinfo->snap_volumes)) {
+ snprintf(msg, sizeof(msg),
+ "Volume %s has %" PRIu64
+ " snapshots. "
+ "Changing the volume configuration will not effect snapshots."
+ "But the snapshot brick mount should be intact to "
+ "make them function.",
+ volname, volinfo->snap_count);
+ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SNAP_WARN, "%s", msg);
+ msg[0] = '\0';
+ }
+
+ if (!count) {
+ ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get count");
+ goto out;
}
+ }
- ret = dict_get_str (dict, "bricks", &bricks);
+ if (!bricks) {
+ ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Unable to get bricks");
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get bricks");
+ goto out;
+ }
+ }
+
+ if (bricks) {
+ brick_list = gf_strdup(bricks);
+ all_bricks = gf_strdup(bricks);
+ free_ptr = brick_list;
+ }
+
+ if (count)
+ brick = strtok_r(brick_list + 1, " \n", &saveptr);
+
+ while (i < count) {
+ if (!glusterd_store_is_valid_brickpath(volname, brick) ||
+ !glusterd_is_valid_volfpath(volname, brick)) {
+ snprintf(msg, sizeof(msg),
+ "brick path %s is "
+ "too long",
+ brick);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRKPATH_TOO_LONG, "%s",
+ msg);
+ *op_errstr = gf_strdup(msg);
+
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_brickinfo_new_from_brick(brick, &brickinfo, _gf_true,
+ NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND,
+ "Add-brick: Unable"
+ " to get brickinfo");
+ goto out;
}
+ brick_alloc = _gf_true;
- if (bricks) {
- brick_list = gf_strdup (bricks);
- all_bricks = gf_strdup (bricks);
- free_ptr = brick_list;
+ ret = glusterd_new_brick_validate(brick, brickinfo, msg, sizeof(msg),
+ NULL);
+ if (ret) {
+ *op_errstr = gf_strdup(msg);
+ ret = -1;
+ goto out;
}
- if (count)
- brick = strtok_r (brick_list+1, " \n", &saveptr);
-
-
- while ( i < count) {
- if (!glusterd_store_is_valid_brickpath (volname, brick) ||
- !glusterd_is_valid_volfpath (volname, brick)) {
- snprintf (msg, sizeof (msg), "brick path %s is "
- "too long", brick);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
- *op_errstr = gf_strdup (msg);
-
- ret = -1;
- goto out;
-
- }
-
- ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
- &brickinfo,
- GF_PATH_PARTIAL);
- if (!ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Adding duplicate brick: %s", brick);
- snprintf (msg, sizeof (msg), "Brick %s is already a "
- "part of the volume", brick);
- *op_errstr = gf_strdup (msg);
- ret = -1;
- goto out;
- } else {
- ret = glusterd_brickinfo_from_brick (brick, &brickinfo);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Add-brick: Unable"
- " to get brickinfo");
- goto out;
- }
- brick_alloc = _gf_true;
- }
+ if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
+ ret = glusterd_validate_and_create_brickpath(
+ brickinfo, volinfo->volume_id, volinfo->volname, op_errstr,
+ is_force, _gf_false);
+ if (ret)
+ goto out;
- ret = glusterd_new_brick_validate (brick, brickinfo, msg,
- sizeof (msg));
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+ if (conf->op_version >= GD_OP_VERSION_3_6_0) {
+ ret = glusterd_get_brick_mount_dir(
+ brickinfo->path, brickinfo->hostname, brickinfo->mount_dir);
if (ret) {
- *op_errstr = gf_strdup (msg);
- ret = -1;
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
+ "Failed to get brick mount_dir");
+ goto out;
}
- if (!uuid_compare (brickinfo->uuid, MY_UUID)) {
- ret = glusterd_brick_create_path (brickinfo->hostname,
- brickinfo->path,
- volinfo->volume_id,
- op_errstr);
- if (ret)
- goto out;
+ snprintf(key, sizeof(key), "brick%d.mount_dir", i + 1);
+ ret = dict_set_dynstr_with_alloc(rsp_dict, key,
+ brickinfo->mount_dir);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED, "Failed to set %s", key);
+ goto out;
}
+ }
- glusterd_brickinfo_delete (brickinfo);
- brick_alloc = _gf_false;
- brickinfo = NULL;
- brick = strtok_r (NULL, " \n", &saveptr);
- i++;
+ local_brick_count = i + 1;
}
+ glusterd_brickinfo_delete(brickinfo);
+ brick_alloc = _gf_false;
+ brickinfo = NULL;
+ brick = strtok_r(NULL, " \n", &saveptr);
+ i++;
+ }
+
+ ret = dict_set_int32n(rsp_dict, "brick_count", SLEN("brick_count"),
+ local_brick_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to set local_brick_count");
+ goto out;
+ }
+
out:
- if (free_ptr)
- GF_FREE (free_ptr);
- if (brick_alloc && brickinfo)
- glusterd_brickinfo_delete (brickinfo);
- if (str_ret)
- GF_FREE (str_ret);
- if (all_bricks)
- GF_FREE (all_bricks);
+ GF_FREE(free_ptr);
+ if (brick_alloc && brickinfo)
+ glusterd_brickinfo_delete(brickinfo);
+ GF_FREE(str_ret);
+ GF_FREE(all_bricks);
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
- return ret;
+ return ret;
}
int
-glusterd_op_stage_remove_brick (dict_t *dict, char **op_errstr)
+glusterd_remove_brick_validate_bricks(gf1_op_commands cmd, int32_t brick_count,
+ dict_t *dict, glusterd_volinfo_t *volinfo,
+ char **errstr,
+ gf_cli_defrag_type cmd_defrag)
{
- int ret = -1;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- char *errstr = NULL;
- int32_t brick_count = 0;
- char msg[2048] = {0,};
- int32_t flag = 0;
- gf1_op_commands cmd = GF_OP_CMD_NONE;
-
- ret = dict_get_str (dict, "volname", &volname);
+ char *brick = NULL;
+ char msg[2048] = "";
+ char key[64] = "";
+ int keylen;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int i = 0;
+ int ret = -1;
+ char pidfile[PATH_MAX + 1] = {
+ 0,
+ };
+ glusterd_conf_t *priv = THIS->private;
+ int pid = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ /* Check whether all the nodes of the bricks to be removed are
+ * up, if not fail the operation */
+ for (i = 1; i <= brick_count; i++) {
+ keylen = snprintf(key, sizeof(key), "brick%d", i);
+ ret = dict_get_strn(dict, key, keylen, &brick);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
- goto out;
+ snprintf(msg, sizeof(msg), "Unable to get %s", key);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "key=%s", key, NULL);
+ *errstr = gf_strdup(msg);
+ goto out;
}
- ret = glusterd_volinfo_find (volname, &volinfo);
-
+ ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo,
+ _gf_false);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Volume %s does not exist", volname);
- goto out;
- }
-
- ret = glusterd_validate_volume_id (dict, volinfo);
- if (ret)
+ snprintf(msg, sizeof(msg),
+ "Incorrect brick "
+ "%s for volume %s",
+ brick, volinfo->volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INCORRECT_BRICK,
+ "Brick=%s, Volume=%s", brick, volinfo->volname, NULL);
+ *errstr = gf_strdup(msg);
+ goto out;
+ }
+ /* Do not allow commit if the bricks are not decommissioned
+ * if its a remove brick commit
+ */
+ if (!brickinfo->decommissioned && cmd == GF_OP_CMD_COMMIT) {
+ snprintf(msg, sizeof(msg),
+ "Brick %s "
+ "is not decommissioned. "
+ "Use start or force option",
+ brick);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_NOT_DECOM,
+ "Use 'start' or 'force' option, Brick=%s", brick, NULL);
+ *errstr = gf_strdup(msg);
+ ret = -1;
+ goto out;
+ }
+
+ if (glusterd_is_local_brick(THIS, volinfo, brickinfo)) {
+ switch (cmd) {
+ case GF_OP_CMD_START:
+ goto check;
+ case GF_OP_CMD_NONE:
+ default:
+ break;
+ }
+
+ switch (cmd_defrag) {
+ case GF_DEFRAG_CMD_NONE:
+ default:
+ continue;
+ }
+ check:
+ if (brickinfo->status != GF_BRICK_STARTED) {
+ snprintf(msg, sizeof(msg),
+ "Found stopped "
+ "brick %s. Use force option to "
+ "remove the offline brick",
+ brick);
+ gf_smsg(
+ this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_STOPPED,
+ "Use 'force' option to remove the offline brick, Brick=%s",
+ brick, NULL);
+ *errstr = gf_strdup(msg);
+ ret = -1;
goto out;
-
- if (glusterd_is_rb_ongoing (volinfo)) {
- snprintf (msg, sizeof (msg), "Replace brick is in progress on "
- "volume %s. Please retry after replace-brick "
- "operation is committed or aborted", volname);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
- *op_errstr = gf_strdup (msg);
+ }
+ GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, priv);
+ if (!gf_is_service_running(pidfile, &pid)) {
+ snprintf(msg, sizeof(msg),
+ "Found dead "
+ "brick %s",
+ brick);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_DEAD,
+ "Brick=%s", brick, NULL);
+ *errstr = gf_strdup(msg);
ret = -1;
goto out;
- }
+ } else {
+ ret = 0;
+ }
+ continue;
+ }
+
+ RCU_READ_LOCK;
+ peerinfo = glusterd_peerinfo_find_by_uuid(brickinfo->uuid);
+ if (!peerinfo) {
+ RCU_READ_UNLOCK;
+ snprintf(msg, sizeof(msg),
+ "Host node of the "
+ "brick %s is not in cluster",
+ brick);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_HOST_NOT_FOUND, "Brick=%s", brick, NULL);
+ *errstr = gf_strdup(msg);
+ ret = -1;
+ goto out;
+ }
+ if (!peerinfo->connected) {
+ RCU_READ_UNLOCK;
+ snprintf(msg, sizeof(msg),
+ "Host node of the "
+ "brick %s is down",
+ brick);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_HOST_DOWN,
+ "Brick=%s", brick, NULL);
+ *errstr = gf_strdup(msg);
+ ret = -1;
+ goto out;
+ }
+ RCU_READ_UNLOCK;
+ }
- ret = dict_get_int32 (dict, "command", &flag);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get brick count");
- goto out;
- }
- cmd = flag;
+out:
+ return ret;
+}
+int
+glusterd_op_stage_remove_brick(dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *errstr = NULL;
+ int32_t brick_count = 0;
+ char msg[2048] = "";
+ int32_t flag = 0;
+ gf1_op_commands cmd = GF_OP_CMD_NONE;
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+ gsync_status_param_t param = {
+ 0,
+ };
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = op_version_check(this, GD_OP_VER_PERSISTENT_AFR_XATTRS, msg,
+ sizeof(msg));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERSION_MISMATCH, "%s",
+ msg);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Volume %s does not exist", volname);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id(dict, volinfo);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32n(dict, "command", SLEN("command"), &flag);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get brick command");
+ goto out;
+ }
+ cmd = flag;
+
+ ret = dict_get_int32n(dict, "count", SLEN("count"), &brick_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get brick count");
+ goto out;
+ }
+
+ ret = 0;
+ if (volinfo->brick_count == brick_count) {
+ errstr = gf_strdup(
+ "Deleting all the bricks of the "
+ "volume is not allowed");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_DELETE, NULL);
ret = -1;
- switch (cmd) {
+ goto out;
+ }
+
+ ret = -1;
+ switch (cmd) {
case GF_OP_CMD_NONE:
- errstr = gf_strdup ("no remove-brick command issued");
- goto out;
+ errstr = gf_strdup("no remove-brick command issued");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NO_REMOVE_CMD,
+ NULL);
+ goto out;
case GF_OP_CMD_STATUS:
- ret = 0;
+ ret = 0;
+ goto out;
+ case GF_OP_CMD_START: {
+ if ((volinfo->type == GF_CLUSTER_TYPE_REPLICATE) &&
+ dict_getn(dict, "replica-count", SLEN("replica-count"))) {
+ snprintf(msg, sizeof(msg),
+ "Migration of data is not "
+ "needed when reducing replica count. Use the"
+ " 'force' option");
+ errstr = gf_strdup(msg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_USE_THE_FORCE, "%s",
+ errstr);
+ goto out;
+ }
+
+ if (GLUSTERD_STATUS_STARTED != volinfo->status) {
+ snprintf(msg, sizeof(msg),
+ "Volume %s needs "
+ "to be started before remove-brick "
+ "(you can use 'force' or 'commit' "
+ "to override this behavior)",
+ volinfo->volname);
+ errstr = gf_strdup(msg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_STARTED,
+ "%s", errstr);
+ goto out;
+ }
+ if (!gd_is_remove_brick_committed(volinfo)) {
+ snprintf(msg, sizeof(msg),
+ "An earlier remove-brick "
+ "task exists for volume %s. Either commit it"
+ " or stop it before starting a new task.",
+ volinfo->volname);
+ errstr = gf_strdup(msg);
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_OLD_REMOVE_BRICK_EXISTS,
+ "Earlier remove-brick"
+ " task exists for volume %s.",
+ volinfo->volname);
+ goto out;
+ }
+ if (glusterd_is_defrag_on(volinfo)) {
+ errstr = gf_strdup(
+ "Rebalance is in progress. Please "
+ "retry after completion");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OIP_RETRY_LATER,
+ "%s", errstr);
+ goto out;
+ }
+
+ /* Check if the connected clients are all of version
+ * glusterfs-3.6 and higher. This is needed to prevent some data
+ * loss issues that could occur when older clients are connected
+ * when rebalance is run.
+ */
+ ret = glusterd_check_client_op_version_support(
+ volname, GD_OP_VERSION_3_6_0, NULL);
+ if (ret) {
+ ret = gf_asprintf(op_errstr,
+ "Volume %s has one or "
+ "more connected clients of a version"
+ " lower than GlusterFS-v3.6.0. "
+ "Starting remove-brick in this state "
+ "could lead to data loss.\nPlease "
+ "disconnect those clients before "
+ "attempting this command again.",
+ volname);
+ goto out;
+ }
+
+ if (volinfo->snap_count > 0 ||
+ !cds_list_empty(&volinfo->snap_volumes)) {
+ snprintf(msg, sizeof(msg),
+ "Volume %s has %" PRIu64
+ " snapshots. "
+ "Changing the volume configuration will not effect "
+ "snapshots."
+ "But the snapshot brick mount should be intact to "
+ "make them function.",
+ volname, volinfo->snap_count);
+ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SNAP_WARN, "%s",
+ msg);
+ msg[0] = '\0';
+ }
+
+ ret = glusterd_remove_brick_validate_bricks(
+ cmd, brick_count, dict, volinfo, &errstr, GF_DEFRAG_CMD_NONE);
+ if (ret)
goto out;
- case GF_OP_CMD_START:
- {
- if (GLUSTERD_STATUS_STARTED != volinfo->status) {
- snprintf (msg, sizeof (msg), "Volume %s needs to be started "
- "before remove-brick (you can use 'force' or "
- "'commit' to override this behavior)",
- volinfo->volname);
- errstr = gf_strdup (msg);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", errstr);
- goto out;
+ if (is_origin_glusterd(dict)) {
+ ret = glusterd_generate_and_set_task_id(
+ dict, GF_REMOVE_BRICK_TID_KEY,
+ SLEN(GF_REMOVE_BRICK_TID_KEY));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL,
+ "Failed to generate task-id");
+ goto out;
}
- if (glusterd_is_defrag_on(volinfo)) {
- errstr = gf_strdup("Rebalance is in progress. Please retry"
- " after completion");
- gf_log ("glusterd", GF_LOG_ERROR, "%s", errstr);
- goto out;
+ } else {
+ ret = dict_get_strn(dict, GF_REMOVE_BRICK_TID_KEY,
+ SLEN(GF_REMOVE_BRICK_TID_KEY),
+ &task_id_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno,
+ GD_MSG_DICT_GET_FAILED, "Missing remove-brick-id");
+ ret = 0;
}
- break;
+ }
+ break;
}
case GF_OP_CMD_STOP:
- ret = 0;
- break;
+ ret = 0;
+ break;
case GF_OP_CMD_COMMIT:
- if (volinfo->decommission_in_progress) {
- errstr = gf_strdup ("use 'force' option as migration "
- "is in progress");
- goto out;
+ if (volinfo->decommission_in_progress) {
+ errstr = gf_strdup(
+ "use 'force' option as migration "
+ "is in progress");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_MIGRATION_PROG,
+ "Use 'force' option", NULL);
+ goto out;
+ }
+
+ if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_FAILED) {
+ errstr = gf_strdup(
+ "use 'force' option as migration "
+ "has failed");
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_MIGRATION_FAIL,
+ "Use 'force' option", NULL);
+ goto out;
+ }
+
+ if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_COMPLETE) {
+ if (volinfo->rebal.rebalance_failures > 0 ||
+ volinfo->rebal.skipped_files > 0) {
+ errstr = gf_strdup(
+ "use 'force' option as migration "
+ "of some files might have been skipped or "
+ "has failed");
+ gf_smsg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_MIGRATION_FAIL,
+ "Use 'force' option, some files might have been "
+ "skipped",
+ NULL);
+ goto out;
}
- break;
-
- case GF_OP_CMD_COMMIT_FORCE:
- break;
- }
+ }
- ret = dict_get_int32 (dict, "count", &brick_count);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get brick count");
+ ret = glusterd_remove_brick_validate_bricks(
+ cmd, brick_count, dict, volinfo, &errstr, GF_DEFRAG_CMD_NONE);
+ if (ret)
goto out;
- }
- ret = 0;
- if (volinfo->brick_count == brick_count) {
- errstr = gf_strdup ("Deleting all the bricks of the "
- "volume is not allowed");
+ /* If geo-rep is configured, for this volume, it should be
+ * stopped.
+ */
+ param.volinfo = volinfo;
+ ret = glusterd_check_geo_rep_running(&param, op_errstr);
+ if (ret || param.is_active) {
ret = -1;
goto out;
- }
+ }
-out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- if (ret && errstr) {
- if (op_errstr)
- *op_errstr = errstr;
- }
+ break;
- return ret;
+ case GF_OP_CMD_COMMIT_FORCE:
+ case GF_OP_CMD_DETACH_START:
+ case GF_OP_CMD_DETACH_COMMIT:
+ case GF_OP_CMD_DETACH_COMMIT_FORCE:
+ case GF_OP_CMD_STOP_DETACH_TIER:
+ break;
+ }
+ ret = 0;
+
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ if (ret && errstr) {
+ if (op_errstr)
+ *op_errstr = errstr;
+ }
+ if (!op_errstr && errstr)
+ GF_FREE(errstr);
+ return ret;
}
int
-glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo,
- gf_defrag_status_t status)
+glusterd_remove_brick_migrate_cbk(glusterd_volinfo_t *volinfo,
+ gf_defrag_status_t status)
{
- int ret = 0;
+ int ret = 0;
-#if 0 /* TODO: enable this behavior once cluster-wide awareness comes for
- defrag cbk function */
+#if 0 /* TODO: enable this behavior once cluster-wide awareness comes for \
+ defrag cbk function */
glusterd_brickinfo_t *brickinfo = NULL;
glusterd_brickinfo_t *tmp = NULL;
@@ -1331,7 +2100,8 @@ glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo,
break;
case GF_DEFRAG_STATUS_STOPPED:
/* Fall back to the old volume file */
- list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) {
+ cds_list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
+ brick_list) {
if (!brickinfo->decommissioned)
continue;
brickinfo->decommissioned = 0;
@@ -1341,13 +2111,15 @@ glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo,
case GF_DEFRAG_STATUS_COMPLETE:
/* Done with the task, you can remove the brick from the
volume file */
- list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks, brick_list) {
+ cds_list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
+ brick_list) {
if (!brickinfo->decommissioned)
continue;
gf_log (THIS->name, GF_LOG_INFO, "removing the brick %s",
brickinfo->path);
brickinfo->decommissioned = 0;
if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ /*TODO: use the 'atomic' flavour of brick_stop*/
ret = glusterd_brick_stop (volinfo, brickinfo);
if (ret) {
gf_log (THIS->name, GF_LOG_ERROR,
@@ -1383,269 +2155,642 @@ glusterd_remove_brick_migrate_cbk (glusterd_volinfo_t *volinfo,
#endif
- volinfo->decommission_in_progress = 0;
- return ret;
+ volinfo->decommission_in_progress = 0;
+ return ret;
}
-
int
-glusterd_op_add_brick (dict_t *dict, char **op_errstr)
+glusterd_op_add_brick(dict_t *dict, char **op_errstr)
{
- int ret = 0;
- char *volname = NULL;
- glusterd_conf_t *priv = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- xlator_t *this = NULL;
- char *bricks = NULL;
- int32_t count = 0;
-
- this = THIS;
- GF_ASSERT (this);
+ int ret = 0;
+ char *volname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char *bricks = NULL;
+ int32_t count = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get count");
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "bricks", SLEN("bricks"), &bricks);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get bricks");
+ goto out;
+ }
+
+ ret = glusterd_op_perform_add_bricks(volinfo, count, bricks, dict);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL,
+ "Unable to add bricks");
+ goto out;
+ }
+ if (priv->op_version <= GD_OP_VERSION_3_7_5) {
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+ } else {
+ /*
+ * The cluster is operating at version greater than
+ * gluster-3.7.5. So no need to store volfiles
+ * in commit phase, the same will be done
+ * in post validate phase with v3 framework.
+ */
+ }
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status)
+ ret = glusterd_svcs_manager(volinfo);
- priv = this->private;
- GF_ASSERT (priv);
+out:
+ return ret;
+}
- ret = dict_get_str (dict, "volname", &volname);
+int
+glusterd_post_commit_add_brick(dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
- goto out;
- }
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- ret = glusterd_volinfo_find (volname, &volinfo);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+ ret = glusterd_replace_old_auth_allow_list(volname);
+out:
+ return ret;
+}
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
- goto out;
- }
+int
+glusterd_post_commit_replace_brick(dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
- ret = dict_get_int32 (dict, "count", &count);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get count");
- goto out;
- }
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+ ret = glusterd_replace_old_auth_allow_list(volname);
+out:
+ return ret;
+}
- ret = dict_get_str (dict, "bricks", &bricks);
+int
+glusterd_set_rebalance_id_for_remove_brick(dict_t *req_dict, dict_t *rsp_dict)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2048] = {0};
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+ int32_t cmd = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(req_dict);
+
+ ret = dict_get_strn(rsp_dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "volname not found");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = dict_get_int32n(rsp_dict, "command", SLEN("command"), &cmd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get command");
+ goto out;
+ }
+
+ /* remove brick task id is generted in glusterd_op_stage_remove_brick(),
+ * but rsp_dict is unavailable there. So copying it to rsp_dict from
+ * req_dict here. */
+
+ if (is_origin_glusterd(rsp_dict)) {
+ ret = dict_get_strn(req_dict, GF_REMOVE_BRICK_TID_KEY,
+ SLEN(GF_REMOVE_BRICK_TID_KEY), &task_id_str);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get bricks");
+ snprintf(msg, sizeof(msg), "Missing rebalance id for remove-brick");
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_REBALANCE_ID_MISSING,
+ "%s", msg);
+ ret = 0;
+ } else {
+ gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+
+ ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id,
+ rsp_dict, GF_REMOVE_BRICK_TID_KEY,
+ SLEN(GF_REMOVE_BRICK_TID_KEY));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REMOVE_BRICK_ID_SET_FAIL,
+ "Failed to set remove-brick-id");
goto out;
- }
-
- ret = glusterd_op_perform_add_bricks (volinfo, count, bricks, dict);
+ }
+ }
+ }
+ if (!gf_uuid_is_null(volinfo->rebal.rebalance_id) &&
+ GD_OP_REMOVE_BRICK == volinfo->rebal.op) {
+ ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, rsp_dict,
+ GF_REMOVE_BRICK_TID_KEY,
+ SLEN(GF_REMOVE_BRICK_TID_KEY));
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to add bricks");
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set task-id for volume %s", volname);
+ goto out;
}
-
- /* Need to reset the defrag/rebalance status accordingly */
- switch (volinfo->defrag_status) {
- case GF_DEFRAG_STATUS_FAILED:
- case GF_DEFRAG_STATUS_COMPLETE:
- volinfo->defrag_status = 0;
- default:
- break;
- }
-
- ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret)
- goto out;
-
- if (GLUSTERD_STATUS_STARTED == volinfo->status)
- ret = glusterd_nodesvcs_handle_graph_change (volinfo);
-
+ }
out:
- return ret;
+ return ret;
}
-
int
-glusterd_op_remove_brick (dict_t *dict, char **op_errstr)
+glusterd_op_remove_brick(dict_t *dict, char **op_errstr)
{
- int ret = -1;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- char *brick = NULL;
- int32_t count = 0;
- int32_t i = 1;
- char key[256] = {0,};
- int32_t flag = 0;
- char err_str[4096] = {0,};
- int need_rebalance = 0;
- int force = 0;
- gf1_op_commands cmd = 0;
- int32_t replica_count = 0;
- glusterd_brickinfo_t *brickinfo = NULL;
- glusterd_brickinfo_t *tmp = NULL;
-
- ret = dict_get_str (dict, "volname", &volname);
-
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *brick = NULL;
+ int32_t count = 0;
+ int32_t i = 1;
+ char key[64] = "";
+ int keylen;
+ int32_t flag = 0;
+ int need_rebalance = 0;
+ int force = 0;
+ gf1_op_commands cmd = 0;
+ int32_t replica_count = 0;
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+ dict_t *bricks_dict = NULL;
+ char *brick_tmpstr = NULL;
+ int start_remove = 0;
+ uint32_t commit_hash = 0;
+ int defrag_cmd = 0;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "command", SLEN("command"), &flag);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get command");
+ goto out;
+ }
+ cmd = flag;
+
+ if (GF_OP_CMD_START == cmd)
+ start_remove = 1;
+
+ /* Set task-id, if available, in ctx dict for operations other than
+ * start
+ */
+
+ if (is_origin_glusterd(dict) && (!start_remove)) {
+ if (!gf_uuid_is_null(volinfo->rebal.rebalance_id)) {
+ ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id, dict,
+ GF_REMOVE_BRICK_TID_KEY,
+ SLEN(GF_REMOVE_BRICK_TID_KEY));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_REMOVE_BRICK_ID_SET_FAIL,
+ "Failed to set remove-brick-id");
goto out;
+ }
}
+ }
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
- goto out;
- }
+ /* Clear task-id, rebal.op and stored bricks on commmitting/stopping
+ * remove-brick */
+ if ((!start_remove) && (cmd != GF_OP_CMD_STATUS)) {
+ gf_uuid_clear(volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_NONE;
+ dict_unref(volinfo->rebal.dict);
+ volinfo->rebal.dict = NULL;
+ }
- ret = dict_get_int32 (dict, "command", &flag);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get brick count");
- goto out;
- }
- cmd = flag;
-
- ret = -1;
- switch (cmd) {
+ ret = -1;
+ switch (cmd) {
case GF_OP_CMD_NONE:
- goto out;
+ goto out;
case GF_OP_CMD_STATUS:
- ret = 0;
- goto out;
+ ret = 0;
+ goto out;
case GF_OP_CMD_STOP:
- {
- /* Fall back to the old volume file */
- list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
- brick_list) {
- if (!brickinfo->decommissioned)
- continue;
- brickinfo->decommissioned = 0;
- }
- ret = glusterd_create_volfiles_and_notify_services (volinfo);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to create volfiles");
- goto out;
- }
-
- ret = glusterd_store_volinfo (volinfo,
- GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to store volinfo");
- goto out;
- }
-
- ret = 0;
- goto out;
- }
-
case GF_OP_CMD_START:
- force = 0;
- break;
+ /* Reset defrag status to 'NOT STARTED' whenever a
+ * remove-brick/rebalance command is issued to remove
+ * stale information from previous run.
+ * Update defrag_cmd as well or it will only be done
+ * for nodes on which the brick to be removed exists.
+ */
+ /* coverity[MIXED_ENUMS] */
+ volinfo->rebal.defrag_cmd = cmd;
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED;
+ ret = dict_get_strn(dict, GF_REMOVE_BRICK_TID_KEY,
+ SLEN(GF_REMOVE_BRICK_TID_KEY), &task_id_str);
+ if (ret) {
+ gf_msg_debug(this->name, errno, "Missing remove-brick-id");
+ ret = 0;
+ } else {
+ gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_REMOVE_BRICK;
+ }
+ force = 0;
+ break;
case GF_OP_CMD_COMMIT:
- force = 1;
- break;
+ force = 1;
+ break;
case GF_OP_CMD_COMMIT_FORCE:
- if (volinfo->decommission_in_progress) {
- if (volinfo->defrag) {
- LOCK (&volinfo->defrag->lock);
- /* Fake 'rebalance-complete' so the graph change
- happens right away */
- volinfo->defrag_status = GF_DEFRAG_STATUS_COMPLETE;
+ if (volinfo->decommission_in_progress) {
+ if (volinfo->rebal.defrag) {
+ LOCK(&volinfo->rebal.defrag->lock);
+ /* Fake 'rebalance-complete' so the graph change
+ happens right away */
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_COMPLETE;
- UNLOCK (&volinfo->defrag->lock);
- }
- /* Graph change happens in rebalance _cbk function,
- no need to do anything here */
- /* TODO: '_cbk' function is not doing anything for now */
+ UNLOCK(&volinfo->rebal.defrag->lock);
}
-
- ret = 0;
- force = 1;
- break;
- }
-
- ret = dict_get_int32 (dict, "count", &count);
+ /* Graph change happens in rebalance _cbk function,
+ no need to do anything here */
+ /* TODO: '_cbk' function is not doing anything for now */
+ }
+
+ ret = 0;
+ force = 1;
+ break;
+ case GF_OP_CMD_DETACH_START:
+ case GF_OP_CMD_DETACH_COMMIT_FORCE:
+ case GF_OP_CMD_DETACH_COMMIT:
+ case GF_OP_CMD_STOP_DETACH_TIER:
+ break;
+ }
+
+ ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get count");
+ goto out;
+ }
+ /* Save the list of bricks for later usage only on starting a
+ * remove-brick. Right now this is required for displaying the task
+ * parameters with task status in volume status.
+ */
+
+ if (start_remove) {
+ bricks_dict = dict_new();
+ if (!bricks_dict) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_int32n(bricks_dict, "count", SLEN("count"), count);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get count");
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to save remove-brick count");
+ goto out;
}
+ }
-
- while ( i <= count) {
- snprintf (key, 256, "brick%d", i);
- ret = dict_get_str (dict, key, &brick);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get %s", key);
- goto out;
- }
-
- ret = glusterd_op_perform_remove_brick (volinfo, brick, force,
- &need_rebalance);
- if (ret)
- goto out;
- i++;
- }
- ret = dict_get_int32 (dict, "replica-count", &replica_count);
- if (!ret) {
- gf_log (THIS->name, GF_LOG_INFO,
- "changing replica count %d to %d on volume %s",
- volinfo->replica_count, replica_count,
- volinfo->volname);
- volinfo->replica_count = replica_count;
- volinfo->dist_leaf_count = (volinfo->stripe_count *
- replica_count);
- if (replica_count == 1) {
- if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
- volinfo->type = GF_CLUSTER_TYPE_NONE;
- /* backward compatibility */
- volinfo->sub_count = 0;
- } else {
- volinfo->type = GF_CLUSTER_TYPE_STRIPE;
- /* backward compatibility */
- volinfo->sub_count = volinfo->dist_leaf_count;
- }
- }
+ while (i <= count) {
+ keylen = snprintf(key, sizeof(key), "brick%d", i);
+ ret = dict_get_strn(dict, key, keylen, &brick);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get %s", key);
+ goto out;
}
- ret = glusterd_create_volfiles_and_notify_services (volinfo);
- if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "failed to create volfiles");
+ if (start_remove) {
+ brick_tmpstr = gf_strdup(brick);
+ if (!brick_tmpstr) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Failed to duplicate brick name");
goto out;
+ }
+ ret = dict_set_dynstrn(bricks_dict, key, keylen, brick_tmpstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to add brick to dict");
+ goto out;
+ }
+ brick_tmpstr = NULL;
}
- ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ ret = glusterd_op_perform_remove_brick(volinfo, brick, force,
+ &need_rebalance);
+ if (ret)
+ goto out;
+ i++;
+ }
+
+ if (start_remove)
+ volinfo->rebal.dict = dict_ref(bricks_dict);
+
+ ret = dict_get_int32n(dict, "replica-count", SLEN("replica-count"),
+ &replica_count);
+ if (!ret) {
+ gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED,
+ "changing replica count %d to %d on volume %s",
+ volinfo->replica_count, replica_count, volinfo->volname);
+ volinfo->replica_count = replica_count;
+ /* A reduction in replica count implies an arbiter volume
+ * earlier is now no longer one. */
+ if (volinfo->arbiter_count)
+ volinfo->arbiter_count = 0;
+ volinfo->sub_count = replica_count;
+ volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo);
+
+ /*
+ * volinfo->type and sub_count have already been set for
+ * volumes undergoing a detach operation, they should not
+ * be modified here.
+ */
+ if (replica_count == 1) {
+ if (volinfo->type == GF_CLUSTER_TYPE_REPLICATE) {
+ volinfo->type = GF_CLUSTER_TYPE_NONE;
+ /* backward compatibility */
+ volinfo->sub_count = 0;
+ }
+ }
+ }
+ volinfo->subvol_count = (volinfo->brick_count / volinfo->dist_leaf_count);
+
+ if (!glusterd_is_volume_replicate(volinfo) &&
+ conf->op_version >= GD_OP_VERSION_3_12_2) {
+ ret = dict_set_nstrn(volinfo->dict, "performance.client-io-threads",
+ SLEN("performance.client-io-threads"), "on",
+ SLEN("on"));
if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING, "failed to store volinfo");
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "performance.client-io-threads to on");
+ goto out;
+ }
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "failed to create volfiles");
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_STORE_FAIL,
+ "failed to store volinfo");
+ goto out;
+ }
+
+ if (start_remove && volinfo->status == GLUSTERD_STATUS_STARTED) {
+ ret = glusterd_svcs_reconfigure(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_RECONF_FAIL,
+ "Unable to reconfigure NFS-Server");
+ goto out;
}
+ }
- /* Need to reset the defrag/rebalance status accordingly */
- switch (volinfo->defrag_status) {
+ /* Need to reset the defrag/rebalance status accordingly */
+ switch (volinfo->rebal.defrag_status) {
case GF_DEFRAG_STATUS_FAILED:
case GF_DEFRAG_STATUS_COMPLETE:
- volinfo->defrag_status = 0;
+ volinfo->rebal.defrag_status = 0;
+ /* FALLTHROUGH */
default:
- break;
- }
- if (!force && need_rebalance) {
- /* perform the rebalance operations */
- ret = glusterd_handle_defrag_start (volinfo, err_str, 4096,
- GF_DEFRAG_CMD_START_FORCE,
- glusterd_remove_brick_migrate_cbk);
- if (!ret)
- volinfo->decommission_in_progress = 1;
+ break;
+ }
+ if (!force && need_rebalance) {
+ if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) {
+ volinfo->rebal.commit_hash = commit_hash;
+ }
+ /* perform the rebalance operations */
+ defrag_cmd = GF_DEFRAG_CMD_START_FORCE;
+ /*
+ * We need to set this *before* we issue commands to the
+ * bricks, or else we might end up setting it after the bricks
+ * have responded. If we fail to send the request(s) we'll
+ * clear it ourselves because nobody else will.
+ */
+ volinfo->decommission_in_progress = 1;
+ char err_str[4096] = "";
+ ret = glusterd_handle_defrag_start(
+ volinfo, err_str, sizeof(err_str), defrag_cmd,
+ glusterd_remove_brick_migrate_cbk, GD_OP_REMOVE_BRICK);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to start the rebalance");
- }
- } else {
- if (GLUSTERD_STATUS_STARTED == volinfo->status)
- ret = glusterd_nodesvcs_handle_graph_change (volinfo);
- }
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REBALANCE_START_FAIL,
+ "failed to start the rebalance");
+ /* TBD: shouldn't we do more than print a message? */
+ volinfo->decommission_in_progress = 0;
+ if (op_errstr)
+ *op_errstr = gf_strdup(err_str);
+ }
+ } else {
+ if (GLUSTERD_STATUS_STARTED == volinfo->status)
+ ret = glusterd_svcs_manager(volinfo);
+ }
+out:
+ GF_FREE(brick_tmpstr);
+ if (bricks_dict)
+ dict_unref(bricks_dict);
+ gf_msg_debug(this->name, 0, "returning %d ", ret);
+ return ret;
+}
+int
+glusterd_op_stage_barrier(dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ char *volname = NULL;
+ glusterd_volinfo_t *vol = NULL;
+ char *barrier_op = NULL;
+
+ GF_ASSERT(dict);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Volname not present in "
+ "dict");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &vol);
+ if (ret) {
+ gf_asprintf(op_errstr, "Volume %s does not exist", volname);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s",
+ *op_errstr);
+ goto out;
+ }
+
+ if (!glusterd_is_volume_started(vol)) {
+ gf_asprintf(op_errstr, "Volume %s is not started", volname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "barrier", SLEN("barrier"), &barrier_op);
+ if (ret == -1) {
+ gf_asprintf(op_errstr,
+ "Barrier op for volume %s not present "
+ "in dict",
+ volname);
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s",
+ *op_errstr);
+ goto out;
+ }
+ ret = 0;
out:
- if (ret && err_str[0] && op_errstr)
- *op_errstr = gf_strdup (err_str);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
- return ret;
+int
+glusterd_op_barrier(dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ char *volname = NULL;
+ glusterd_volinfo_t *vol = NULL;
+ char *barrier_op = NULL;
+
+ GF_ASSERT(dict);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Volname not present in "
+ "dict");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &vol);
+ if (ret) {
+ gf_asprintf(op_errstr, "Volume %s does not exist", volname);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s",
+ *op_errstr);
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "barrier", SLEN("barrier"), &barrier_op);
+ if (ret) {
+ gf_asprintf(op_errstr,
+ "Barrier op for volume %s not present "
+ "in dict",
+ volname);
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED, "%s",
+ *op_errstr);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(vol->dict, "features.barrier", barrier_op);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to set barrier op in"
+ " volume option dict");
+ goto out;
+ }
+
+ gd_update_volume_op_versions(vol);
+ ret = glusterd_create_volfiles(vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to create volfiles");
+ goto out;
+ }
+ ret = glusterd_store_volinfo(vol, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_handle_add_tier_brick(rpcsvc_request_t *req)
+{
+ return 0;
+}
+
+int
+glusterd_handle_attach_tier(rpcsvc_request_t *req)
+{
+ return 0;
+}
+
+int
+glusterd_handle_detach_tier(rpcsvc_request_t *req)
+{
+ return 0;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-helper.c b/xlators/mgmt/glusterd/src/glusterd-conn-helper.c
new file mode 100644
index 00000000000..a7f54ec24b7
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-conn-helper.c
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterd-conn-mgmt.h"
+#include "glusterd-svc-mgmt.h"
+
+#define _LGPL_SOURCE
+#include <urcu/rculist.h>
+
+glusterd_svc_t *
+glusterd_conn_get_svc_object(glusterd_conn_t *conn)
+{
+ return cds_list_entry(conn, glusterd_svc_t, conn);
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-helper.h b/xlators/mgmt/glusterd/src/glusterd-conn-helper.h
new file mode 100644
index 00000000000..6f500309175
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-conn-helper.h
@@ -0,0 +1,21 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_CONN_HELPER_H_
+#define _GLUSTERD_CONN_HELPER_H_
+
+#include "rpc-clnt.h"
+
+#include "glusterd-conn-mgmt.h"
+
+glusterd_svc_t *
+glusterd_conn_get_svc_object(glusterd_conn_t *conn);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
new file mode 100644
index 00000000000..5c01f0c70b6
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.c
@@ -0,0 +1,191 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/xlator.h>
+#include "rpc-clnt.h"
+#include "glusterd.h"
+#include "glusterd-conn-mgmt.h"
+#include "glusterd-conn-helper.h"
+#include "glusterd-utils.h"
+#include "glusterd-messages.h"
+
+int
+glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout,
+ glusterd_conn_notify_t notify)
+{
+ int ret = -1;
+ dict_t *options = NULL;
+ struct rpc_clnt *rpc = NULL;
+ xlator_t *this = THIS;
+ glusterd_svc_t *svc = NULL;
+
+ if (!this) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_XLATOR_NOT_DEFINED,
+ NULL);
+ goto out;
+ }
+
+ options = dict_new();
+ if (!options) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ svc = glusterd_conn_get_svc_object(conn);
+ if (!svc) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL,
+ "Failed to get the service");
+ goto out;
+ }
+
+ ret = rpc_transport_unix_options_build(options, sockpath, frame_timeout);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32n(options, "transport.socket.ignore-enoent",
+ SLEN("transport.socket.ignore-enoent"), 1);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=transport.socket.ignore-enoent", NULL);
+ goto out;
+ }
+
+ /* @options is free'd by rpc_transport when destroyed */
+ rpc = rpc_clnt_new(options, this, (char *)svc->name, 16);
+ if (!rpc) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify(rpc, glusterd_conn_common_notify, conn);
+ if (ret)
+ goto out;
+
+ ret = snprintf(conn->sockpath, sizeof(conn->sockpath), "%s", sockpath);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ } else
+ ret = 0;
+
+ conn->frame_timeout = frame_timeout;
+ conn->rpc = rpc;
+ conn->notify = notify;
+out:
+ if (options)
+ dict_unref(options);
+ if (ret) {
+ if (rpc) {
+ rpc_clnt_unref(rpc);
+ rpc = NULL;
+ }
+ }
+ return ret;
+}
+
+int
+glusterd_conn_term(glusterd_conn_t *conn)
+{
+ rpc_clnt_unref(conn->rpc);
+ return 0;
+}
+
+int
+glusterd_conn_connect(glusterd_conn_t *conn)
+{
+ return rpc_clnt_start(conn->rpc);
+}
+
+int
+glusterd_conn_disconnect(glusterd_conn_t *conn)
+{
+ rpc_clnt_disable(conn->rpc);
+
+ return 0;
+}
+
+int
+__glusterd_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ glusterd_conn_t *conn = mydata;
+
+ /* Silently ignoring this error, exactly like the current
+ * implementation */
+ if (!conn)
+ return 0;
+
+ return conn->notify(conn, event);
+}
+
+int
+glusterd_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ return glusterd_big_locked_notify(rpc, mydata, event, data,
+ __glusterd_conn_common_notify);
+}
+
+int32_t
+glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath,
+ int len)
+{
+ char sockfilepath[PATH_MAX] = {
+ 0,
+ };
+
+ snprintf(sockfilepath, sizeof(sockfilepath), "%s/run-%s", rundir,
+ uuid_utoa(uuid));
+
+ glusterd_set_socket_filepath(sockfilepath, socketpath, len);
+ return 0;
+}
+
+int
+__glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ glusterd_conf_t *conf = THIS->private;
+ glusterd_svc_proc_t *mux_proc = mydata;
+ int ret = -1;
+
+ /* Silently ignoring this error, exactly like the current
+ * implementation */
+ if (!mux_proc)
+ return 0;
+
+ if (event == RPC_CLNT_DESTROY) {
+ /*RPC_CLNT_DESTROY will only called after mux_proc detached from the
+ * list. So it is safe to call without lock. Processing
+ * RPC_CLNT_DESTROY under a lock will lead to deadlock.
+ */
+ if (mux_proc->data) {
+ glusterd_volinfo_unref(mux_proc->data);
+ mux_proc->data = NULL;
+ }
+ GF_FREE(mux_proc);
+ ret = 0;
+ } else {
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+ ret = mux_proc->notify(mux_proc, event);
+ }
+ pthread_mutex_unlock(&conf->attach_lock);
+ }
+ return ret;
+}
+
+int
+glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
+{
+ return glusterd_big_locked_notify(rpc, mydata, event, data,
+ __glusterd_muxsvc_conn_common_notify);
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h
new file mode 100644
index 00000000000..1b225621ab1
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-conn-mgmt.h
@@ -0,0 +1,53 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_CONN_MGMT_H_
+#define _GLUSTERD_CONN_MGMT_H_
+
+#include "rpc-clnt.h"
+
+typedef struct glusterd_conn_ glusterd_conn_t;
+
+typedef int (*glusterd_conn_notify_t)(glusterd_conn_t *conn,
+ rpc_clnt_event_t event);
+
+struct glusterd_conn_ {
+ struct rpc_clnt *rpc;
+ /* Existing daemons tend to specialize their respective
+ * notify implementations, so ... */
+ glusterd_conn_notify_t notify;
+ int frame_timeout;
+ char sockpath[PATH_MAX];
+};
+
+int
+glusterd_conn_init(glusterd_conn_t *conn, char *sockpath, int frame_timeout,
+ glusterd_conn_notify_t notify);
+
+int
+glusterd_conn_term(glusterd_conn_t *conn);
+
+int
+glusterd_conn_connect(glusterd_conn_t *conn);
+
+int
+glusterd_conn_disconnect(glusterd_conn_t *conn);
+
+int
+glusterd_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data);
+int
+glusterd_muxsvc_conn_common_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data);
+
+int32_t
+glusterd_conn_build_socket_filepath(char *rundir, uuid_t uuid, char *socketpath,
+ int len);
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-errno.h b/xlators/mgmt/glusterd/src/glusterd-errno.h
new file mode 100644
index 00000000000..c74070e0e8d
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-errno.h
@@ -0,0 +1,33 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_ERRNO_H
+#define _GLUSTERD_ERRNO_H
+
+enum glusterd_op_errno {
+ EG_INTRNL = 30800, /* Internal Error */
+ EG_OPNOTSUP = 30801, /* Gluster Op Not Supported */
+ EG_ANOTRANS = 30802, /* Another Transaction in Progress */
+ EG_BRCKDWN = 30803, /* One or more brick is down */
+ EG_NODEDWN = 30804, /* One or more node is down */
+ EG_HRDLMT = 30805, /* Hard Limit is reached */
+ EG_NOVOL = 30806, /* Volume does not exist */
+ EG_NOSNAP = 30807, /* Snap does not exist */
+ EG_RBALRUN = 30808, /* Rebalance is running */
+ EG_VOLRUN = 30809, /* Volume is running */
+ EG_VOLSTP = 30810, /* Volume is not running */
+ EG_VOLEXST = 30811, /* Volume exists */
+ EG_SNAPEXST = 30812, /* Snapshot exists */
+ EG_ISSNAP = 30813, /* Volume is a snap volume */
+ EG_GEOREPRUN = 30814, /* Geo-Replication is running */
+ EG_NOTTHINP = 30815, /* Bricks are not thinly provisioned */
+ EG_NOGANESHA = 30816, /* Global ganesha is not enabled */
+};
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-ganesha.c b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
new file mode 100644
index 00000000000..f08bd6cebee
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-ganesha.c
@@ -0,0 +1,927 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/common-utils.h>
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-messages.h"
+#include <glusterfs/syscall.h>
+
+#include <ctype.h>
+
+int
+start_ganesha(char **op_errstr);
+
+typedef struct service_command {
+ char *binary;
+ char *service;
+ int (*action)(struct service_command *, char *);
+} service_command;
+
+/* parsing_ganesha_ha_conf will allocate the returned string
+ * to be freed (GF_FREE) by the caller
+ * return NULL if error or not found */
+static char *
+parsing_ganesha_ha_conf(const char *key)
+{
+#define MAX_LINE 1024
+ char scratch[MAX_LINE * 2] = {
+ 0,
+ };
+ char *value = NULL, *pointer = NULL, *end_pointer = NULL;
+ FILE *fp;
+
+ fp = fopen(GANESHA_HA_CONF, "r");
+ if (fp == NULL) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "couldn't open the file %s", GANESHA_HA_CONF);
+ goto end_ret;
+ }
+ while ((pointer = fgets(scratch, MAX_LINE, fp)) != NULL) {
+ /* Read config file until we get matching "^[[:space:]]*key" */
+ if (*pointer == '#') {
+ continue;
+ }
+ while (isblank(*pointer)) {
+ pointer++;
+ }
+ if (strncmp(pointer, key, strlen(key))) {
+ continue;
+ }
+ pointer += strlen(key);
+ /* key found : if we fail to parse, we'll return an error
+ * rather than trying next one
+ * - supposition : conf file is bash compatible : no space
+ * around the '=' */
+ if (*pointer != '=') {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno,
+ GD_MSG_GET_CONFIG_INFO_FAILED, "Parsing %s failed at key %s",
+ GANESHA_HA_CONF, key);
+ goto end_close;
+ }
+ pointer++; /* jump the '=' */
+
+ if (*pointer == '"' || *pointer == '\'') {
+ /* dont get the quote */
+ pointer++;
+ }
+ end_pointer = pointer;
+ /* stop at the next closing quote or blank/newline */
+ do {
+ end_pointer++;
+ } while (!(*end_pointer == '\'' || *end_pointer == '"' ||
+ isspace(*end_pointer) || *end_pointer == '\0'));
+ *end_pointer = '\0';
+
+ /* got it. copy it and return */
+ value = gf_strdup(pointer);
+ break;
+ }
+
+end_close:
+ fclose(fp);
+end_ret:
+ return value;
+}
+
+static int
+sc_systemctl_action(struct service_command *sc, char *command)
+{
+ runner_t runner = {
+ 0,
+ };
+
+ runinit(&runner);
+ runner_add_args(&runner, sc->binary, command, sc->service, NULL);
+ return runner_run(&runner);
+}
+
+static int
+sc_service_action(struct service_command *sc, char *command)
+{
+ runner_t runner = {
+ 0,
+ };
+
+ runinit(&runner);
+ runner_add_args(&runner, sc->binary, sc->service, command, NULL);
+ return runner_run(&runner);
+}
+
+static int
+manage_service(char *action)
+{
+ int i = 0;
+ int ret = 0;
+ struct service_command sc_list[] = {{.binary = "/bin/systemctl",
+ .service = "nfs-ganesha",
+ .action = sc_systemctl_action},
+ {.binary = "/sbin/invoke-rc.d",
+ .service = "nfs-ganesha",
+ .action = sc_service_action},
+ {.binary = "/sbin/service",
+ .service = "nfs-ganesha",
+ .action = sc_service_action},
+ {.binary = NULL}};
+
+ while (sc_list[i].binary != NULL) {
+ ret = sys_access(sc_list[i].binary, X_OK);
+ if (ret == 0) {
+ gf_msg_debug(THIS->name, 0, "%s found.", sc_list[i].binary);
+ return sc_list[i].action(&sc_list[i], action);
+ }
+ i++;
+ }
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_UNRECOGNIZED_SVC_MNGR,
+ "Could not %s NFS-Ganesha.Service manager for distro"
+ " not recognized.",
+ action);
+ return ret;
+}
+
+/*
+ * Check if the cluster is a ganesha cluster or not *
+ */
+gf_boolean_t
+glusterd_is_ganesha_cluster()
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t ret_bool = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("ganesha", this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ ret = dict_get_str_boolean(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL,
+ _gf_false);
+ if (ret == _gf_true) {
+ ret_bool = _gf_true;
+ gf_msg_debug(this->name, 0, "nfs-ganesha is enabled for the cluster");
+ } else
+ gf_msg_debug(this->name, 0, "nfs-ganesha is disabled for the cluster");
+
+out:
+ return ret_bool;
+}
+
+/* Check if ganesha.enable is set to 'on', that checks if
+ * a particular volume is exported via NFS-Ganesha */
+gf_boolean_t
+glusterd_check_ganesha_export(glusterd_volinfo_t *volinfo)
+{
+ char *value = NULL;
+ gf_boolean_t is_exported = _gf_false;
+ int ret = 0;
+
+ ret = glusterd_volinfo_get(volinfo, "ganesha.enable", &value);
+ if ((ret == 0) && value) {
+ if (strcmp(value, "on") == 0) {
+ gf_msg_debug(THIS->name, 0,
+ "ganesha.enable set"
+ " to %s",
+ value);
+ is_exported = _gf_true;
+ }
+ }
+ return is_exported;
+}
+
+/* *
+ * The below function is called as part of commit phase for volume set option
+ * "ganesha.enable". If the value is "on", it creates export configuration file
+ * and then export the volume via dbus command. Incase of "off", the volume
+ * will be already unexported during stage phase, so it will remove the conf
+ * file from shared storage
+ */
+int
+glusterd_check_ganesha_cmd(char *key, char *value, char **errstr, dict_t *dict)
+{
+ int ret = 0;
+ char *volname = NULL;
+
+ GF_ASSERT(key);
+ GF_ASSERT(value);
+ GF_ASSERT(dict);
+
+ if ((strcmp(key, "ganesha.enable") == 0)) {
+ if ((strcmp(value, "on")) && (strcmp(value, "off"))) {
+ gf_asprintf(errstr,
+ "Invalid value"
+ " for volume set command. Use on/off only.");
+ ret = -1;
+ goto out;
+ }
+ if (strcmp(value, "on") == 0) {
+ ret = glusterd_handle_ganesha_op(dict, errstr, key, value);
+
+ } else if (is_origin_glusterd(dict)) {
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ gf_msg("glusterd-ganesha", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_GET_FAILED, "Unable to get volume name");
+ goto out;
+ }
+ ret = manage_export_config(volname, "off", errstr);
+ }
+ }
+out:
+ if (ret) {
+ gf_msg("glusterd-ganesha", GF_LOG_ERROR, 0,
+ GD_MSG_NFS_GNS_OP_HANDLE_FAIL,
+ "Handling NFS-Ganesha"
+ " op failed.");
+ }
+ return ret;
+}
+
+int
+glusterd_op_stage_set_ganesha(dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *value = NULL;
+ char *str = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(dict);
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_str(dict, "value", &value);
+ if (value == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "value not present.");
+ goto out;
+ }
+ /* This dict_get will fail if the user had never set the key before */
+ /*Ignoring the ret value and proceeding */
+ ret = dict_get_str(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL, &str);
+ if (str ? strcmp(value, str) == 0 : strcmp(value, "disable") == 0) {
+ gf_asprintf(op_errstr, "nfs-ganesha is already %sd.", value);
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(value, "enable") == 0) {
+ ret = start_ganesha(op_errstr);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_START_FAIL,
+ "Could not start NFS-Ganesha");
+ }
+ } else {
+ ret = stop_ganesha(op_errstr);
+ if (ret)
+ gf_msg_debug(THIS->name, 0,
+ "Could not stop "
+ "NFS-Ganesha.");
+ }
+
+out:
+
+ if (ret) {
+ if (!(*op_errstr)) {
+ *op_errstr = gf_strdup("Error, Validation Failed");
+ gf_msg_debug(this->name, 0, "Error, Cannot Validate option :%s",
+ GLUSTERD_STORE_KEY_GANESHA_GLOBAL);
+ } else {
+ gf_msg_debug(this->name, 0, "Error, Cannot Validate option");
+ }
+ }
+ return ret;
+}
+
+int
+glusterd_op_set_ganesha(dict_t *dict, char **errstr)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char *next_version = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_str(dict, "key", &key);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Couldn't get key in global option set");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "value", &value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Couldn't get value in global option set");
+ goto out;
+ }
+
+ ret = glusterd_handle_ganesha_op(dict, errstr, key, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NFS_GNS_SETUP_FAIL,
+ "Initial NFS-Ganesha set up failed");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr_with_alloc(priv->opts,
+ GLUSTERD_STORE_KEY_GANESHA_GLOBAL, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to set"
+ " nfs-ganesha in dict.");
+ goto out;
+ }
+ ret = glusterd_get_next_global_opt_version_str(priv->opts, &next_version);
+ if (ret) {
+ gf_msg_debug(THIS->name, 0,
+ "Could not fetch "
+ " global op version");
+ goto out;
+ }
+ ret = dict_set_str(priv->opts, GLUSTERD_GLOBAL_OPT_VERSION, next_version);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_options(this, priv->opts);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_FAIL,
+ "Failed to store options");
+ goto out;
+ }
+
+out:
+ gf_msg_debug(this->name, 0, "returning %d", ret);
+ return ret;
+}
+
+/* Following function parse GANESHA_HA_CONF
+ * The sample file looks like below,
+ * HA_NAME="ganesha-ha-360"
+ * HA_VOL_NAME="ha-state"
+ * HA_CLUSTER_NODES="server1,server2"
+ * VIP_rhs_1="10.x.x.x"
+ * VIP_rhs_2="10.x.x.x." */
+
+/* Check if the localhost is listed as one of nfs-ganesha nodes */
+gf_boolean_t
+check_host_list(void)
+{
+ glusterd_conf_t *priv = NULL;
+ char *hostname, *hostlist;
+ gf_boolean_t ret = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ priv = THIS->private;
+ GF_ASSERT(priv);
+
+ hostlist = parsing_ganesha_ha_conf("HA_CLUSTER_NODES");
+ if (hostlist == NULL) {
+ gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_GET_CONFIG_INFO_FAILED,
+ "couldn't get HA_CLUSTER_NODES from file %s", GANESHA_HA_CONF);
+ return _gf_false;
+ }
+
+ /* Hostlist is a comma separated list now */
+ hostname = strtok(hostlist, ",");
+ while (hostname != NULL) {
+ ret = gf_is_local_addr(hostname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NFS_GNS_HOST_FOUND,
+ "ganesha host found "
+ "Hostname is %s",
+ hostname);
+ break;
+ }
+ hostname = strtok(NULL, ",");
+ }
+
+ GF_FREE(hostlist);
+ return ret;
+}
+
+int
+gd_ganesha_send_dbus(char *volname, char *value)
+{
+ runner_t runner = {
+ 0,
+ };
+ int ret = -1;
+ runinit(&runner);
+
+ GF_VALIDATE_OR_GOTO("glusterd-ganesha", volname, out);
+ GF_VALIDATE_OR_GOTO("glusterd-ganesha", value, out);
+
+ ret = 0;
+ if (check_host_list()) {
+ /* Check whether ganesha is running on this node */
+ if (manage_service("status")) {
+ gf_msg("glusterd-ganesha", GF_LOG_WARNING, 0,
+ GD_MSG_GANESHA_NOT_RUNNING,
+ "Export failed, NFS-Ganesha is not running");
+ } else {
+ runner_add_args(&runner, GANESHA_PREFIX "/dbus-send.sh", CONFDIR,
+ value, volname, NULL);
+ ret = runner_run(&runner);
+ }
+ }
+out:
+ return ret;
+}
+
+int
+manage_export_config(char *volname, char *value, char **op_errstr)
+{
+ runner_t runner = {
+ 0,
+ };
+ int ret = -1;
+
+ GF_ASSERT(volname);
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/create-export-ganesha.sh",
+ CONFDIR, value, volname, NULL);
+ ret = runner_run(&runner);
+
+ if (ret && op_errstr)
+ gf_asprintf(op_errstr,
+ "Failed to create"
+ " NFS-Ganesha export config file.");
+
+ return ret;
+}
+
+/* Exports and unexports a particular volume via NFS-Ganesha */
+int
+ganesha_manage_export(dict_t *dict, char *value,
+ gf_boolean_t update_cache_invalidation, char **op_errstr)
+{
+ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ dict_t *vol_opts = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t option = _gf_false;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+
+ GF_ASSERT(value);
+ GF_ASSERT(dict);
+ GF_ASSERT(priv);
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+ ret = gf_string2boolean(value, &option);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "invalid value.");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ ret = glusterd_check_ganesha_export(volinfo);
+ if (ret && option) {
+ gf_asprintf(op_errstr,
+ "ganesha.enable "
+ "is already 'on'.");
+ ret = -1;
+ goto out;
+
+ } else if (!option && !ret) {
+ gf_asprintf(op_errstr,
+ "ganesha.enable "
+ "is already 'off'.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Check if global option is enabled, proceed only then */
+ ret = dict_get_str_boolean(priv->opts, GLUSTERD_STORE_KEY_GANESHA_GLOBAL,
+ _gf_false);
+ if (ret == -1) {
+ gf_msg_debug(this->name, 0,
+ "Failed to get "
+ "global option dict.");
+ gf_asprintf(op_errstr,
+ "The option "
+ "nfs-ganesha should be "
+ "enabled before setting ganesha.enable.");
+ goto out;
+ }
+ if (!ret) {
+ gf_asprintf(op_errstr,
+ "The option "
+ "nfs-ganesha should be "
+ "enabled before setting ganesha.enable.");
+ ret = -1;
+ goto out;
+ }
+
+ /* *
+ * Create the export file from the node where ganesha.enable "on"
+ * is executed
+ * */
+ if (option && is_origin_glusterd(dict)) {
+ ret = manage_export_config(volname, "on", op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Failed to create"
+ "export file for NFS-Ganesha\n");
+ goto out;
+ }
+ }
+ ret = gd_ganesha_send_dbus(volname, value);
+ if (ret) {
+ gf_asprintf(op_errstr,
+ "Dynamic export addition/deletion failed."
+ " Please see log file for details");
+ goto out;
+ }
+ if (update_cache_invalidation) {
+ vol_opts = volinfo->dict;
+ ret = dict_set_dynstr_with_alloc(vol_opts,
+ "features.cache-invalidation", value);
+ if (ret)
+ gf_asprintf(op_errstr,
+ "Cache-invalidation could not"
+ " be set to %s.",
+ value);
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ gf_asprintf(op_errstr, "failed to store volinfo for %s",
+ volinfo->volname);
+ }
+out:
+ return ret;
+}
+
+int
+tear_down_cluster(gf_boolean_t run_teardown)
+{
+ int ret = 0;
+ runner_t runner = {
+ 0,
+ };
+ struct stat st = {
+ 0,
+ };
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char path[PATH_MAX] = {
+ 0,
+ };
+
+ if (run_teardown) {
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "teardown",
+ CONFDIR, NULL);
+ ret = runner_run(&runner);
+ /* *
+ * Remove all the entries in CONFDIR expect ganesha.conf and
+ * ganesha-ha.conf
+ */
+ dir = sys_opendir(CONFDIR);
+ if (!dir) {
+ gf_msg_debug(THIS->name, 0,
+ "Failed to open directory %s. "
+ "Reason : %s",
+ CONFDIR, strerror(errno));
+ ret = 0;
+ goto out;
+ }
+
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
+ snprintf(path, PATH_MAX, "%s/%s", CONFDIR, entry->d_name);
+ ret = sys_lstat(path, &st);
+ if (ret == -1) {
+ gf_msg_debug(THIS->name, 0,
+ "Failed to stat entry %s :"
+ " %s",
+ path, strerror(errno));
+ goto out;
+ }
+
+ if (strcmp(entry->d_name, "ganesha.conf") == 0 ||
+ strcmp(entry->d_name, "ganesha-ha.conf") == 0)
+ gf_msg_debug(THIS->name, 0,
+ " %s is not required"
+ " to remove",
+ path);
+ else if (S_ISDIR(st.st_mode))
+ ret = recursive_rmdir(path);
+ else
+ ret = sys_unlink(path);
+
+ if (ret) {
+ gf_msg_debug(THIS->name, 0,
+ " Failed to remove %s. "
+ "Reason : %s",
+ path, strerror(errno));
+ }
+
+ gf_msg_debug(THIS->name, 0, "%s %s",
+ ret ? "Failed to remove" : "Removed", entry->d_name);
+ }
+
+ ret = sys_closedir(dir);
+ if (ret) {
+ gf_msg_debug(THIS->name, 0,
+ "Failed to close dir %s. Reason :"
+ " %s",
+ CONFDIR, strerror(errno));
+ }
+ goto exit;
+ }
+
+out:
+ if (dir && sys_closedir(dir)) {
+ gf_msg_debug(THIS->name, 0,
+ "Failed to close dir %s. Reason :"
+ " %s",
+ CONFDIR, strerror(errno));
+ }
+exit:
+ return ret;
+}
+
+int
+setup_cluster(gf_boolean_t run_setup)
+{
+ int ret = 0;
+ runner_t runner = {
+ 0,
+ };
+
+ if (run_setup) {
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "setup",
+ CONFDIR, NULL);
+ ret = runner_run(&runner);
+ }
+ return ret;
+}
+
+static int
+teardown(gf_boolean_t run_teardown, char **op_errstr)
+{
+ runner_t runner = {
+ 0,
+ };
+ int ret = 1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *vol_opts = NULL;
+
+ priv = THIS->private;
+
+ ret = tear_down_cluster(run_teardown);
+ if (ret == -1) {
+ gf_asprintf(op_errstr,
+ "Cleanup of NFS-Ganesha"
+ " HA config failed.");
+ goto out;
+ }
+
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh", "cleanup",
+ CONFDIR, NULL);
+ ret = runner_run(&runner);
+ if (ret)
+ gf_msg_debug(THIS->name, 0,
+ "Could not clean up"
+ " NFS-Ganesha related config");
+
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+ vol_opts = volinfo->dict;
+ /* All the volumes exported via NFS-Ganesha will be
+ unexported, hence setting the appropriate keys */
+ ret = dict_set_str(vol_opts, "features.cache-invalidation", "off");
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED,
+ "Could not set features.cache-invalidation "
+ "to off for %s",
+ volinfo->volname);
+
+ ret = dict_set_str(vol_opts, "ganesha.enable", "off");
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_DICT_SET_FAILED,
+ "Could not set ganesha.enable to off for %s",
+ volinfo->volname);
+
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL,
+ "failed to store volinfo for %s", volinfo->volname);
+ }
+out:
+ return ret;
+}
+
+int
+stop_ganesha(char **op_errstr)
+{
+ int ret = 0;
+ runner_t runner = {
+ 0,
+ };
+
+ if (check_host_list()) {
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh",
+ "--setup-ganesha-conf-files", CONFDIR, "no", NULL);
+ ret = runner_run(&runner);
+ if (ret) {
+ gf_asprintf(op_errstr,
+ "removal of symlink ganesha.conf "
+ "in /etc/ganesha failed");
+ }
+ ret = manage_service("stop");
+ if (ret)
+ gf_asprintf(op_errstr,
+ "NFS-Ganesha service could not"
+ "be stopped.");
+ }
+ return ret;
+}
+
+int
+start_ganesha(char **op_errstr)
+{
+ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {
+ 0,
+ };
+
+ priv = THIS->private;
+ GF_ASSERT(priv);
+
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+#ifdef BUILD_GNFS
+ /* Gluster-nfs has to be disabled across the trusted pool */
+ /* before attempting to start nfs-ganesha */
+ ret = dict_set_str_sizen(volinfo->dict, NFS_DISABLE_MAP_KEY, "on");
+ if (ret)
+ goto out;
+#endif
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ *op_errstr = gf_strdup(
+ "Failed to store the "
+ "Volume information");
+ goto out;
+ }
+ }
+
+ /* If the nfs svc is not initialized it means that the service is not
+ * running, hence we can skip the process of stopping gluster-nfs
+ * service
+ */
+#ifdef BUILD_GNFS
+ if (priv->nfs_svc.inited) {
+ ret = priv->nfs_svc.stop(&(priv->nfs_svc), SIGKILL);
+ if (ret) {
+ ret = -1;
+ gf_asprintf(op_errstr,
+ "Gluster-NFS service could"
+ "not be stopped, exiting.");
+ goto out;
+ }
+ }
+#endif
+
+ if (check_host_list()) {
+ runinit(&runner);
+ runner_add_args(&runner, GANESHA_PREFIX "/ganesha-ha.sh",
+ "--setup-ganesha-conf-files", CONFDIR, "yes", NULL);
+ ret = runner_run(&runner);
+ if (ret) {
+ gf_asprintf(op_errstr,
+ "creation of symlink ganesha.conf "
+ "in /etc/ganesha failed");
+ goto out;
+ }
+ ret = manage_service("start");
+ if (ret)
+ gf_asprintf(op_errstr,
+ "NFS-Ganesha failed to start."
+ "Please see log file for details");
+ }
+
+out:
+ return ret;
+}
+
+static int
+pre_setup(gf_boolean_t run_setup, char **op_errstr)
+{
+ int ret = 0;
+ if (run_setup) {
+ if (!check_host_list()) {
+ gf_asprintf(op_errstr,
+ "Running nfs-ganesha setup command "
+ "from node which is not part of ganesha cluster");
+ return -1;
+ }
+ }
+ ret = setup_cluster(run_setup);
+ if (ret == -1)
+ gf_asprintf(op_errstr,
+ "Failed to set up HA "
+ "config for NFS-Ganesha. "
+ "Please check the log file for details");
+ return ret;
+}
+
+int
+glusterd_handle_ganesha_op(dict_t *dict, char **op_errstr, char *key,
+ char *value)
+{
+ int32_t ret = -1;
+ gf_boolean_t option = _gf_false;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(key);
+ GF_ASSERT(value);
+
+ if (strcmp(key, "ganesha.enable") == 0) {
+ ret = ganesha_manage_export(dict, value, _gf_true, op_errstr);
+ if (ret < 0)
+ goto out;
+ }
+
+ /* It is possible that the key might not be set */
+ ret = gf_string2boolean(value, &option);
+ if (ret == -1) {
+ gf_asprintf(op_errstr, "Invalid value in key-value pair.");
+ goto out;
+ }
+
+ if (strcmp(key, GLUSTERD_STORE_KEY_GANESHA_GLOBAL) == 0) {
+ /* *
+ * The set up/teardown of pcs cluster should be performed only
+ * once. This will done on the node in which the cli command
+ * 'gluster nfs-ganesha <enable/disable>' got executed. So that
+ * node should part of ganesha HA cluster
+ */
+ if (option) {
+ ret = pre_setup(is_origin_glusterd(dict), op_errstr);
+ if (ret < 0)
+ goto out;
+ } else {
+ ret = teardown(is_origin_glusterd(dict), op_errstr);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
index c66b2db578e..bf062c87060 100644
--- a/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
+++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.c
@@ -1,165 +1,414 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#include "common-utils.h"
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/common-utils.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
#include "glusterd.h"
#include "glusterd-op-sm.h"
+#include "glusterd-geo-rep.h"
#include "glusterd-store.h"
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
-#include "run.h"
-#include "syscall.h"
+#include "glusterd-svc-helper.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
+#include "glusterd-messages.h"
#include <signal.h>
-static char *gsync_reserved_opts[] = {
- "gluster-command-dir",
- "pid-file",
- "state-file",
- "session-owner",
- NULL
+static int
+dict_get_param(dict_t *dict, char *key, char **param);
+
+struct gsync_config_opt_vals_ gsync_confopt_vals[] = {
+ {
+ .op_name = "change_detector",
+ .no_of_pos_vals = 2,
+ .case_sensitive = _gf_true,
+ .values = {"xsync", "changelog"},
+ },
+ {.op_name = "special_sync_mode",
+ .no_of_pos_vals = 2,
+ .case_sensitive = _gf_true,
+ .values = {"partial", "recover"}},
+ {.op_name = "log-level",
+ .no_of_pos_vals = 5,
+ .case_sensitive = _gf_false,
+ .values = {"critical", "error", "warning", "info", "debug"}},
+ {.op_name = "use-tarssh",
+ .no_of_pos_vals = 6,
+ .case_sensitive = _gf_false,
+ .values = {"true", "false", "0", "1", "yes", "no"}},
+ {.op_name = "ignore_deletes",
+ .no_of_pos_vals = 6,
+ .case_sensitive = _gf_false,
+ .values = {"true", "false", "0", "1", "yes", "no"}},
+ {.op_name = "use_meta_volume",
+ .no_of_pos_vals = 6,
+ .case_sensitive = _gf_false,
+ .values = {"true", "false", "0", "1", "yes", "no"}},
+ {.op_name = "use-meta-volume",
+ .no_of_pos_vals = 6,
+ .case_sensitive = _gf_false,
+ .values = {"true", "false", "0", "1", "yes", "no"}},
+ {
+ .op_name = NULL,
+ },
};
+static char *gsync_reserved_opts[] = {"gluster-command",
+ "pid-file",
+ "state-file",
+ "session-owner",
+ "state-socket-unencoded",
+ "socketdir",
+ "local-id",
+ "local-path",
+ "slave-id",
+ NULL};
+
+static char *gsync_no_restart_opts[] = {"checkpoint", "log_rsync_performance",
+ "log-rsync-performance", NULL};
+
+void
+set_gsyncd_inet6_arg(runner_t *runner)
+{
+ xlator_t *this = NULL;
+ char *af;
+ int ret;
+
+ this = THIS;
+ ret = dict_get_str(this->options, "transport.address-family", &af);
+ if (ret == 0)
+ runner_argprintf(runner, "--%s", af);
+}
+
int
-glusterd_handle_gsync_set (rpcsvc_request_t *req)
-{
- int32_t ret = 0;
- dict_t *dict = NULL;
- gf_cli_req cli_req = {{0},};
- glusterd_op_t cli_op = GD_OP_GSYNC_SET;
- char *master = NULL;
- char *slave = NULL;
- char operation[256] = {0,};
- int type = 0;
- glusterd_conf_t *priv = NULL;
- char *host_uuid = NULL;
-
- GF_ASSERT (req);
- GF_ASSERT (THIS);
- GF_ASSERT (THIS->private);
+__glusterd_handle_sys_exec(rpcsvc_request_t *req)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req cli_req = {
+ {0},
+ };
+ glusterd_op_t cli_op = GD_OP_SYS_EXEC;
+ glusterd_conf_t *priv = NULL;
+ char *host_uuid = NULL;
+ char err_str[64] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ GF_ASSERT(req);
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ snprintf(err_str, sizeof(err_str), "Garbage args received");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ goto out;
+ }
- priv = THIS->private;
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(err_str, sizeof(err_str),
+ "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
- if (!xdr_to_generic (req->msg[0], &cli_req,
- (xdrproc_t)xdr_gf_cli_req)) {
- req->rpc_err = GARBAGE_ARGS;
- goto out;
+ host_uuid = gf_strdup(uuid_utoa(MY_UUID));
+ if (host_uuid == NULL) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to get "
+ "the uuid of local glusterd");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL,
+ NULL);
+ ret = -1;
+ goto out;
}
- if (cli_req.dict.dict_len) {
- dict = dict_new ();
- if (!dict)
- goto out;
+ ret = dict_set_dynstr(dict, "host-uuid", host_uuid);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=host-uuid", NULL);
+ goto out;
+ }
+ }
- ret = dict_unserialize (cli_req.dict.dict_val,
- cli_req.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR, "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- } else {
- dict->extra_stdfree = cli_req.dict.dict_val;
- }
+ ret = glusterd_op_begin_synctask(req, cli_op, dict);
- host_uuid = gf_strdup (uuid_utoa(MY_UUID));
- if (host_uuid == NULL) {
- gf_log ("glusterd", GF_LOG_ERROR, "failed to get"
- "the uuid of the host machine");
- ret = -1;
- goto out;
- }
- ret = dict_set_dynstr (dict, "host-uuid", host_uuid);
- if (ret)
- goto out;
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf(err_str, sizeof(err_str), "Operation failed");
+ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str);
+ }
+ return ret;
+}
+int
+__glusterd_handle_copy_file(rpcsvc_request_t *req)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req cli_req = {
+ {0},
+ };
+ glusterd_op_t cli_op = GD_OP_COPY_FILE;
+ glusterd_conf_t *priv = NULL;
+ char *host_uuid = NULL;
+ char err_str[64] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ GF_ASSERT(req);
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ snprintf(err_str, sizeof(err_str), "Garbage args received");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ goto out;
}
- ret = dict_get_str (dict, "master", &master);
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
if (ret < 0) {
- gf_log ("", GF_LOG_INFO, "master not found, while handling"
- GEOREP" options");
- master = "(No Master)";
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to"
+ "unserialize req-buffer to dictionary");
+ snprintf(err_str, sizeof(err_str),
+ "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
}
- ret = dict_get_str (dict, "slave", &slave);
- if (ret < 0) {
- gf_log ("", GF_LOG_INFO, "slave not not found, while"
- "handling "GEOREP" options");
- slave = "(No Slave)";
+ host_uuid = gf_strdup(uuid_utoa(MY_UUID));
+ if (host_uuid == NULL) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to get "
+ "the uuid of local glusterd");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL,
+ NULL);
+ ret = -1;
+ goto out;
}
- ret = dict_get_int32 (dict, "type", &type);
+ ret = dict_set_dynstr(dict, "host-uuid", host_uuid);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_op_begin_synctask(req, cli_op, dict);
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf(err_str, sizeof(err_str), "Operation failed");
+ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str);
+ }
+ return ret;
+}
+
+int
+__glusterd_handle_gsync_set(rpcsvc_request_t *req)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req cli_req = {
+ {0},
+ };
+ glusterd_op_t cli_op = GD_OP_GSYNC_SET;
+ char *master = NULL;
+ char *slave = NULL;
+ char operation[64] = {
+ 0,
+ };
+ int type = 0;
+ glusterd_conf_t *priv = NULL;
+ char *host_uuid = NULL;
+ char err_str[64] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ GF_ASSERT(req);
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ snprintf(err_str, sizeof(err_str), "Garbage args received");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ goto out;
+ }
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
if (ret < 0) {
- gf_log ("", GF_LOG_WARNING, "command type not found, while"
- "handling "GEOREP" options");
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(err_str, sizeof(err_str),
+ "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
}
- switch (type) {
+ host_uuid = gf_strdup(uuid_utoa(MY_UUID));
+ if (host_uuid == NULL) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to get "
+ "the uuid of local glusterd");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_UUID_GET_FAIL,
+ NULL);
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr(dict, "host-uuid", host_uuid);
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "master", &master);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED,
+ "master not found, while handling " GEOREP " options");
+ master = "(No Master)";
+ }
+
+ ret = dict_get_str(dict, "slave", &slave);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED,
+ "slave not found, while handling " GEOREP " options");
+ slave = "(No Slave)";
+ }
+
+ ret = dict_get_int32(dict, "type", &type);
+ if (ret < 0) {
+ snprintf(err_str, sizeof(err_str),
+ "Command type not found "
+ "while handling " GEOREP " options");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ err_str);
+ goto out;
+ }
+
+ switch (type) {
+ case GF_GSYNC_OPTION_TYPE_CREATE:
+ snprintf(operation, sizeof(operation), "create");
+ cli_op = GD_OP_GSYNC_CREATE;
+ break;
case GF_GSYNC_OPTION_TYPE_START:
- strncpy (operation, "start", sizeof (operation));
- break;
+ snprintf(operation, sizeof(operation), "start");
+ break;
case GF_GSYNC_OPTION_TYPE_STOP:
- strncpy (operation, "stop", sizeof (operation));
- break;
+ snprintf(operation, sizeof(operation), "stop");
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_PAUSE:
+ snprintf(operation, sizeof(operation), "pause");
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_RESUME:
+ snprintf(operation, sizeof(operation), "resume");
+ break;
case GF_GSYNC_OPTION_TYPE_CONFIG:
- strncpy (operation, "config", sizeof (operation));
- break;
+ snprintf(operation, sizeof(operation), "config");
+ break;
case GF_GSYNC_OPTION_TYPE_STATUS:
- strncpy (operation, "status", sizeof (operation));
- break;
- case GF_GSYNC_OPTION_TYPE_ROTATE:
- strncpy (operation, "rotate", sizeof(operation));
- break;
- }
+ snprintf(operation, sizeof(operation), "status");
+ break;
+ }
- gf_cmd_log ("volume "GEOREP, " %s command on %s,%s", operation, master,
- slave);
- ret = glusterd_op_begin (req, GD_OP_GSYNC_SET, dict);
- gf_cmd_log ("volume "GEOREP, " %s command on %s,%s %s ", operation,
- master, slave, (ret != 0)? "FAILED" : "SUCCEEDED");
+ ret = glusterd_op_begin_synctask(req, cli_op, dict);
out:
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf(err_str, sizeof(err_str), "Operation failed");
+ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str);
+ }
+ return ret;
+}
- if (ret) {
- if (dict)
- dict_unref (dict);
- ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
- NULL, "operation failed");
- }
- return ret;
+int
+glusterd_handle_sys_exec(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __glusterd_handle_sys_exec);
+}
+
+int
+glusterd_handle_copy_file(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __glusterd_handle_copy_file);
}
+int
+glusterd_handle_gsync_set(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __glusterd_handle_gsync_set);
+}
/*****
*
@@ -168,2045 +417,6366 @@ out:
*****/
static void
-glusterd_urltransform_init (runner_t *runner, const char *transname)
+glusterd_urltransform_init(runner_t *runner, const char *transname)
{
- runinit (runner);
- runner_add_arg (runner, GSYNCD_PREFIX"/gsyncd");
- runner_argprintf (runner, "--%s-url", transname);
+ runinit(runner);
+ runner_add_arg(runner, GSYNCD_PREFIX "/gsyncd");
+ set_gsyncd_inet6_arg(runner);
+ runner_argprintf(runner, "--%s-url", transname);
}
static void
-glusterd_urltransform_add (runner_t *runner, const char *url)
+glusterd_urltransform_add(runner_t *runner, const char *url)
{
- runner_add_arg (runner, url);
+ runner_add_arg(runner, url);
}
-static void
-_glusterd_urltransform_add_iter (dict_t *dict, char *key, data_t *value, void *data)
+/* Helper routine to terminate just before slave_voluuid */
+static int32_t
+parse_slave_url(char *slv_url, char **slave)
{
- runner_t *runner = (runner_t *)data;
- char *slave = NULL;
+ char *tmp = NULL;
+ xlator_t *this = NULL;
+ int32_t ret = -1;
+
+ this = THIS;
+
+ /* slave format:
+ * master_node_uuid:ssh://slave_host::slave_vol:slave_voluuid */
+ *slave = strchr(slv_url, ':');
+ if (!(*slave)) {
+ goto out;
+ }
+ (*slave)++;
+
+ /* To terminate at : before slave volume uuid */
+ tmp = strstr(*slave, "::");
+ if (!tmp) {
+ goto out;
+ }
+ tmp += 2;
+ tmp = strchr(tmp, ':');
+ if (!tmp)
+ gf_msg_debug(this->name, 0, "old slave: %s!", *slave);
+ else
+ *tmp = '\0';
+
+ ret = 0;
+ gf_msg_debug(this->name, 0, "parsed slave: %s!", *slave);
+out:
+ return ret;
+}
- slave = strchr (value->data, ':');
- GF_ASSERT (slave);
- slave++;
- runner_add_arg (runner, slave);
+static int
+_glusterd_urltransform_add_iter(dict_t *dict, char *key, data_t *value,
+ void *data)
+{
+ runner_t *runner = (runner_t *)data;
+ char slv_url[VOLINFO_SLAVE_URL_MAX] = {0};
+ char *slave = NULL;
+ xlator_t *this = NULL;
+ int32_t ret = -1;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ gf_msg_debug(this->name, 0, "value->data %s", value->data);
+
+ if (snprintf(slv_url, sizeof(slv_url), "%s", value->data) >=
+ sizeof(slv_url)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ "Error in copying slave: %s!", value->data);
+ goto out;
+ }
+
+ ret = parse_slave_url(slv_url, &slave);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ "Error in parsing slave: %s!", value->data);
+ goto out;
+ }
+
+ runner_add_arg(runner, slave);
+ ret = 0;
+out:
+ return ret;
}
static void
-glusterd_urltransform_free (char **linearr, unsigned n)
+glusterd_urltransform_free(char **linearr, unsigned n)
{
- int i = 0;
+ int i = 0;
- for (; i < n; i++)
- GF_FREE (linearr[i]);
+ for (; i < n; i++)
+ GF_FREE(linearr[i]);
- GF_FREE (linearr);
+ GF_FREE(linearr);
}
static int
-glusterd_urltransform (runner_t *runner, char ***linearrp)
+glusterd_urltransform(runner_t *runner, char ***linearrp)
{
- char **linearr = NULL;
- char *line = NULL;
- unsigned arr_len = 32;
- unsigned arr_idx = 0;
- gf_boolean_t error = _gf_false;
-
- linearr = GF_CALLOC (arr_len, sizeof (char *), gf_gld_mt_linearr);
- if (!linearr) {
- error = _gf_true;
- goto out;
+ char **linearr = NULL;
+ char *line = NULL;
+ unsigned arr_len = 32;
+ unsigned arr_idx = 0;
+ gf_boolean_t error = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ linearr = GF_CALLOC(arr_len, sizeof(char *), gf_gld_mt_linearr);
+ if (!linearr) {
+ error = _gf_true;
+ goto out;
+ }
+
+ runner_redir(runner, STDOUT_FILENO, RUN_PIPE);
+ if (runner_start(runner) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SPAWNING_CHILD_FAILED,
+ "spawning child failed");
+
+ error = _gf_true;
+ goto out;
+ }
+
+ arr_idx = 0;
+ for (;;) {
+ size_t len;
+ line = GF_MALLOC(1024, gf_gld_mt_linebuf);
+ if (!line) {
+ error = _gf_true;
+ goto out;
}
- runner_redir (runner, STDOUT_FILENO, RUN_PIPE);
- if (runner_start (runner) != 0) {
- gf_log ("", GF_LOG_ERROR, "spawning child failed");
+ if (fgets(line, 1024, runner_chio(runner, STDOUT_FILENO)) == NULL) {
+ GF_FREE(line);
+ break;
+ }
+ len = strlen(line);
+ if (len == 0 || line[len - 1] != '\n') {
+ GF_FREE(line);
+ error = _gf_true;
+ goto out;
+ }
+ line[len - 1] = '\0';
+
+ if (arr_idx == arr_len) {
+ void *p = linearr;
+ arr_len <<= 1;
+ p = GF_REALLOC(linearr, arr_len);
+ if (!p) {
+ GF_FREE(line);
error = _gf_true;
goto out;
+ }
+ linearr = p;
}
+ linearr[arr_idx] = line;
- arr_idx = 0;
- for (;;) {
- line = GF_MALLOC (1024, gf_gld_mt_linebuf);
- if (!line) {
- error = _gf_true;
- goto out;
- }
-
- if (fgets (line, 1024, runner_chio (runner, STDOUT_FILENO)) ==
- NULL)
- break;
-
- if (line[strlen (line) - 1] != '\n') {
- GF_FREE (line);
- error = _gf_true;
- goto out;
- }
- line[strlen (line) - 1] = '\0';
-
- if (arr_idx == arr_len) {
- arr_len <<= 1;
- linearr = GF_REALLOC (linearr, arr_len);
- if (!linearr) {
- GF_FREE (line);
- error = _gf_true;
- goto out;
- }
- }
- linearr[arr_idx] = line;
-
- arr_idx++;
- }
-
- out:
+ arr_idx++;
+ }
- /* XXX chpid field is not exported by run API
- * but runner_end() does not abort the invoked
- * process (ie. it might block in waitpid(2))
- * so we resort to a manual kill a the private field
- */
- if (error && runner->chpid > 0)
- kill (runner->chpid, SIGKILL);
-
- if (runner_end (runner) != 0)
- error = _gf_true;
+out:
- if (error) {
- gf_log ("", GF_LOG_ERROR, "reading data from child failed");
- glusterd_urltransform_free (linearr, arr_idx);
- return -1;
- }
+ /* XXX chpid field is not exported by run API
+ * but runner_end() does not abort the invoked
+ * process (ie. it might block in waitpid(2))
+ * so we resort to a manual kill a the private field
+ */
+ if (error && runner->chpid > 0)
+ kill(runner->chpid, SIGKILL);
+
+ if (runner_end(runner) != 0)
+ error = _gf_true;
+
+ if (error) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_READ_CHILD_DATA_FAILED,
+ "reading data from child failed");
+ glusterd_urltransform_free(linearr, arr_idx);
+ return -1;
+ }
- *linearrp = linearr;
- return arr_idx;
+ *linearrp = linearr;
+ return arr_idx;
}
static int
-glusterd_urltransform_single (const char *url, const char *transname,
- char ***linearrp)
+glusterd_urltransform_single(const char *url, const char *transname,
+ char ***linearrp)
{
- runner_t runner = {0,};
+ runner_t runner = {
+ 0,
+ };
- glusterd_urltransform_init (&runner, transname);
- glusterd_urltransform_add (&runner, url);
- return glusterd_urltransform (&runner, linearrp);
+ glusterd_urltransform_init(&runner, transname);
+ glusterd_urltransform_add(&runner, url);
+ return glusterd_urltransform(&runner, linearrp);
}
-
struct dictidxmark {
- unsigned isrch;
- unsigned ithis;
- char *ikey;
+ unsigned isrch;
+ unsigned ithis;
+ char *ikey;
};
-static void
-_dict_mark_atindex (dict_t *dict, char *key, data_t *value, void *data)
+struct slave_vol_config {
+ char old_slvhost[_POSIX_HOST_NAME_MAX + 1];
+ char old_slvuser[LOGIN_NAME_MAX];
+ unsigned old_slvidx;
+ char slave_voluuid[UUID_CANONICAL_FORM_LEN + 1];
+};
+
+static int
+_dict_mark_atindex(dict_t *dict, char *key, data_t *value, void *data)
{
- struct dictidxmark *dim = data;
+ struct dictidxmark *dim = data;
- if (dim->isrch == dim->ithis)
- dim->ikey = key;
+ if (dim->isrch == dim->ithis)
+ dim->ikey = key;
- dim->ithis++;
+ dim->ithis++;
+ return 0;
}
static char *
-dict_get_by_index (dict_t *dict, unsigned i)
+dict_get_by_index(dict_t *dict, unsigned i)
{
- struct dictidxmark dim = {0,};
+ struct dictidxmark dim = {
+ 0,
+ };
- dim.isrch = i;
- dict_foreach (dict, _dict_mark_atindex, &dim);
+ dim.isrch = i;
+ dict_foreach(dict, _dict_mark_atindex, &dim);
- return dim.ikey;
+ return dim.ikey;
}
static int
-glusterd_get_slave (glusterd_volinfo_t *vol, const char *slaveurl, char **slavekey)
+glusterd_get_slave(glusterd_volinfo_t *vol, const char *slaveurl,
+ char **slavekey)
{
- runner_t runner = {0,};
- int n = 0;
- int i = 0;
- char **linearr = NULL;
+ runner_t runner = {
+ 0,
+ };
+ int n = 0;
+ int i = 0;
+ char **linearr = NULL;
+ int32_t ret = 0;
+
+ glusterd_urltransform_init(&runner, "canonicalize");
+ ret = dict_foreach(vol->gsync_slaves, _glusterd_urltransform_add_iter,
+ &runner);
+ if (ret < 0)
+ return -2;
+
+ glusterd_urltransform_add(&runner, slaveurl);
+
+ n = glusterd_urltransform(&runner, &linearr);
+ if (n == -1)
+ return -2;
+
+ for (i = 0; i < n - 1; i++) {
+ if (strcmp(linearr[i], linearr[n - 1]) == 0)
+ break;
+ }
+ glusterd_urltransform_free(linearr, n);
+
+ if (i < n - 1)
+ *slavekey = dict_get_by_index(vol->gsync_slaves, i);
+ else
+ i = -1;
+
+ return i;
+}
- glusterd_urltransform_init (&runner, "canonicalize");
- dict_foreach (vol->gsync_slaves, _glusterd_urltransform_add_iter, &runner);
- glusterd_urltransform_add (&runner, slaveurl);
+static int
+glusterd_query_extutil_generic(char *resbuf, size_t blen, runner_t *runner,
+ void *data,
+ int (*fcbk)(char *resbuf, size_t blen, FILE *fp,
+ void *data))
+{
+ int ret = 0;
+ xlator_t *this = NULL;
- n = glusterd_urltransform (&runner, &linearr);
- if (n == -1)
- return -2;
+ this = THIS;
+ GF_ASSERT(this);
- for (i = 0; i < n - 1; i++) {
- if (strcmp (linearr[i], linearr[n - 1]) == 0)
- break;
- }
- glusterd_urltransform_free (linearr, i);
+ runner_redir(runner, STDOUT_FILENO, RUN_PIPE);
+ if (runner_start(runner) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SPAWNING_CHILD_FAILED,
+ "spawning child failed");
- if (i < n - 1)
- *slavekey = dict_get_by_index (vol->gsync_slaves, i);
- else
- i = -1;
+ return -1;
+ }
+
+ ret = fcbk(resbuf, blen, runner_chio(runner, STDOUT_FILENO), data);
- return i;
+ ret |= runner_end(runner);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_READ_CHILD_DATA_FAILED,
+ "reading data from child failed");
+
+ return ret ? -1 : 0;
}
+static int
+_fcbk_singleline(char *resbuf, size_t blen, FILE *fp, void *data)
+{
+ char *ptr = NULL;
+
+ errno = 0;
+ ptr = fgets(resbuf, blen, fp);
+ if (ptr) {
+ size_t len = strlen(resbuf);
+ if (len && resbuf[len - 1] == '\n')
+ resbuf[len - 1] = '\0'; // strip off \n
+ }
+
+ return errno ? -1 : 0;
+}
+
+static int
+glusterd_query_extutil(char *resbuf, runner_t *runner)
+{
+ return glusterd_query_extutil_generic(resbuf, PATH_MAX, runner, NULL,
+ _fcbk_singleline);
+}
static int
-glusterd_query_extutil_generic (char *resbuf, size_t blen, runner_t *runner, void *data,
- int (*fcbk)(char *resbuf, size_t blen, FILE *fp, void *data))
+glusterd_get_slave_voluuid(char *slave_host, char *slave_vol, char *vol_uuid)
{
- int ret = 0;
+ runner_t runner = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
- runner_redir (runner, STDOUT_FILENO, RUN_PIPE);
- if (runner_start (runner) != 0) {
- gf_log ("", GF_LOG_ERROR, "spawning child failed");
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
- return -1;
- }
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
- ret = fcbk (resbuf, blen, runner_chio (runner, STDOUT_FILENO), data);
+ runinit(&runner);
+ runner_add_arg(&runner, GSYNCD_PREFIX "/gsyncd");
+ set_gsyncd_inet6_arg(&runner);
+ runner_add_arg(&runner, "--slavevoluuid-get");
+ runner_argprintf(&runner, "%s::%s", slave_host, slave_vol);
- ret |= runner_end (runner);
- if (ret)
- gf_log ("", GF_LOG_ERROR, "reading data from child failed");
+ synclock_unlock(&priv->big_lock);
+ ret = glusterd_query_extutil(vol_uuid, &runner);
+ synclock_lock(&priv->big_lock);
- return ret ? -1 : 0;
+out:
+ return ret;
}
static int
-_fcbk_singleline(char *resbuf, size_t blen, FILE *fp, void *data)
+_fcbk_conftodict(char *resbuf, size_t blen, FILE *fp, void *data)
{
- char *ptr = NULL;
+ char *ptr = NULL;
+ dict_t *dict = data;
+ char *v = NULL;
+ for (;;) {
errno = 0;
- ptr = fgets (resbuf, blen, fp);
- if (ptr)
- resbuf[strlen(resbuf)-1] = '\0'; //strip off \n
+ ptr = fgets(resbuf, blen - 2, fp);
+ if (!ptr)
+ break;
+ v = resbuf + strlen(resbuf) - 1;
+ while (isspace(*v))
+ /* strip trailing space */
+ *v-- = '\0';
+ if (v == resbuf)
+ /* skip empty line */
+ continue;
+ v = strchr(resbuf, ':');
+ if (!v)
+ return -1;
+ *v++ = '\0';
+ while (isspace(*v))
+ v++;
+ v = gf_strdup(v);
+ if (!v)
+ return -1;
+ if (dict_set_dynstr(dict, resbuf, v) != 0) {
+ GF_FREE(v);
+ return -1;
+ }
+ }
- return errno ? -1 : 0;
+ return errno ? -1 : 0;
}
static int
-glusterd_query_extutil (char *resbuf, runner_t *runner)
+glusterd_gsync_get_config(char *master, char *slave, char *conf_path,
+ dict_t *dict)
{
- return glusterd_query_extutil_generic (resbuf, PATH_MAX, runner, NULL,
- _fcbk_singleline);
+ /* key + value, where value must be able to accommodate a path */
+ char resbuf[256 + PATH_MAX] = {
+ 0,
+ };
+ runner_t runner = {
+ 0,
+ };
+
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
+ runner_argprintf(&runner, "%s", conf_path);
+ set_gsyncd_inet6_arg(&runner);
+ runner_argprintf(&runner, "--iprefix=%s", DATADIR);
+ runner_argprintf(&runner, ":%s", master);
+ runner_add_args(&runner, slave, "--config-get-all", NULL);
+
+ return glusterd_query_extutil_generic(resbuf, sizeof(resbuf), &runner, dict,
+ _fcbk_conftodict);
}
static int
-_fcbk_conftodict (char *resbuf, size_t blen, FILE *fp, void *data)
-{
- char *ptr = NULL;
- dict_t *dict = data;
- char *v = NULL;
-
- for (;;) {
- errno = 0;
- ptr = fgets (resbuf, blen, fp);
- if (!ptr)
- break;
- v = resbuf + strlen(resbuf) - 1;
- while (isspace (*v))
- /* strip trailing space */
- *v-- = '\0';
- if (v == resbuf)
- /* skip empty line */
- continue;
- v = strchr (resbuf, ':');
- if (!v)
- return -1;
- *v++ = '\0';
- while (isspace (*v))
- v++;
- v = gf_strdup (v);
- if (!v)
- return -1;
- if (dict_set_dynstr (dict, resbuf, v) != 0) {
- GF_FREE (v);
- return -1;
- }
+_fcbk_statustostruct(char *resbuf, size_t blen, FILE *fp, void *data)
+{
+ char *ptr = NULL;
+ char *v = NULL;
+ char *k = NULL;
+ gf_gsync_status_t *sts_val = NULL;
+ size_t len = 0;
+
+ sts_val = (gf_gsync_status_t *)data;
+
+ for (;;) {
+ errno = 0;
+ ptr = fgets(resbuf, blen - 2, fp);
+ if (!ptr)
+ break;
+
+ v = resbuf + strlen(resbuf) - 1;
+ while (isspace(*v))
+ /* strip trailing space */
+ *v-- = '\0';
+ if (v == resbuf)
+ /* skip empty line */
+ continue;
+ v = strchr(resbuf, ':');
+ if (!v)
+ return -1;
+ *v++ = '\0';
+ while (isspace(*v))
+ v++;
+ v = gf_strdup(v);
+ if (!v)
+ return -1;
+
+ k = gf_strdup(resbuf);
+ if (!k) {
+ GF_FREE(v);
+ return -1;
+ }
+
+ if (strcmp(k, "worker_status") == 0) {
+ len = min(strlen(v), (sizeof(sts_val->worker_status) - 1));
+ memcpy(sts_val->worker_status, v, len);
+ sts_val->worker_status[len] = '\0';
+ } else if (strcmp(k, "slave_node") == 0) {
+ len = min(strlen(v), (sizeof(sts_val->slave_node) - 1));
+ memcpy(sts_val->slave_node, v, len);
+ sts_val->slave_node[len] = '\0';
+ } else if (strcmp(k, "crawl_status") == 0) {
+ len = min(strlen(v), (sizeof(sts_val->crawl_status) - 1));
+ memcpy(sts_val->crawl_status, v, len);
+ sts_val->crawl_status[len] = '\0';
+ } else if (strcmp(k, "last_synced") == 0) {
+ len = min(strlen(v), (sizeof(sts_val->last_synced) - 1));
+ memcpy(sts_val->last_synced, v, len);
+ sts_val->last_synced[len] = '\0';
+ } else if (strcmp(k, "last_synced_utc") == 0) {
+ len = min(strlen(v), (sizeof(sts_val->last_synced_utc) - 1));
+ memcpy(sts_val->last_synced_utc, v, len);
+ sts_val->last_synced_utc[len] = '\0';
+ } else if (strcmp(k, "entry") == 0) {
+ len = min(strlen(v), (sizeof(sts_val->entry) - 1));
+ memcpy(sts_val->entry, v, len);
+ sts_val->entry[len] = '\0';
+ } else if (strcmp(k, "data") == 0) {
+ len = min(strlen(v), (sizeof(sts_val->data) - 1));
+ memcpy(sts_val->data, v, len);
+ sts_val->data[len] = '\0';
+ } else if (strcmp(k, "meta") == 0) {
+ len = min(strlen(v), (sizeof(sts_val->meta) - 1));
+ memcpy(sts_val->meta, v, len);
+ sts_val->meta[len] = '\0';
+ } else if (strcmp(k, "failures") == 0) {
+ len = min(strlen(v), (sizeof(sts_val->failures) - 1));
+ memcpy(sts_val->failures, v, len);
+ sts_val->failures[len] = '\0';
+ } else if (strcmp(k, "checkpoint_time") == 0) {
+ len = min(strlen(v), (sizeof(sts_val->checkpoint_time) - 1));
+ memcpy(sts_val->checkpoint_time, v, len);
+ sts_val->checkpoint_time[len] = '\0';
+ } else if (strcmp(k, "checkpoint_time_utc") == 0) {
+ len = min(strlen(v), (sizeof(sts_val->checkpoint_time_utc) - 1));
+ memcpy(sts_val->checkpoint_time_utc, v, len);
+ sts_val->checkpoint_time_utc[len] = '\0';
+ } else if (strcmp(k, "checkpoint_completed") == 0) {
+ len = min(strlen(v), (sizeof(sts_val->checkpoint_completed) - 1));
+ memcpy(sts_val->checkpoint_completed, v, len);
+ sts_val->checkpoint_completed[len] = '\0';
+ } else if (strcmp(k, "checkpoint_completion_time") == 0) {
+ len = min(strlen(v),
+ (sizeof(sts_val->checkpoint_completion_time) - 1));
+ memcpy(sts_val->checkpoint_completion_time, v, len);
+ sts_val->checkpoint_completion_time[len] = '\0';
+ } else if (strcmp(k, "checkpoint_completion_time_utc") == 0) {
+ len = min(strlen(v),
+ (sizeof(sts_val->checkpoint_completion_time_utc) - 1));
+ memcpy(sts_val->checkpoint_completion_time_utc, v, len);
+ sts_val->checkpoint_completion_time_utc[len] = '\0';
}
+ GF_FREE(v);
+ GF_FREE(k);
+ }
- return errno ? -1 : 0;
+ return errno ? -1 : 0;
}
static int
-glusterd_gsync_get_config (char *master, char *slave, char *gl_workdir, dict_t *dict)
+glusterd_gsync_get_status(char *master, char *slave, char *conf_path,
+ char *brick_path, gf_gsync_status_t *sts_val)
{
- /* key + value, where value must be able to accommodate a path */
- char resbuf[256 + PATH_MAX] = {0,};
- runner_t runner = {0,};
-
- runinit (&runner);
- runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
- runner_argprintf (&runner, "%s/"GSYNC_CONF, gl_workdir);
- runner_argprintf (&runner, ":%s", master);
- runner_add_args (&runner, slave, "--config-get-all", NULL);
+ /* key + value, where value must be able to accommodate a path */
+ char resbuf[256 + PATH_MAX] = {
+ 0,
+ };
+ runner_t runner = {
+ 0,
+ };
+
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
+ runner_argprintf(&runner, "%s", conf_path);
+ set_gsyncd_inet6_arg(&runner);
+ runner_argprintf(&runner, "--iprefix=%s", DATADIR);
+ runner_argprintf(&runner, ":%s", master);
+ runner_add_args(&runner, slave, "--status-get", NULL);
+ runner_add_args(&runner, "--path", brick_path, NULL);
+
+ return glusterd_query_extutil_generic(resbuf, sizeof(resbuf), &runner,
+ sts_val, _fcbk_statustostruct);
+}
- return glusterd_query_extutil_generic (resbuf, sizeof (resbuf),
- &runner, dict, _fcbk_conftodict);
+static int
+glusterd_gsync_get_param_file(char *prmfile, const char *param, char *master,
+ char *slave, char *conf_path)
+{
+ runner_t runner = {
+ 0,
+ };
+
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
+ runner_argprintf(&runner, "%s", conf_path);
+ set_gsyncd_inet6_arg(&runner);
+ runner_argprintf(&runner, "--iprefix=%s", DATADIR);
+ runner_argprintf(&runner, ":%s", master);
+ runner_add_args(&runner, slave, "--config-get", NULL);
+ runner_argprintf(&runner, "%s-file", param);
+
+ return glusterd_query_extutil(prmfile, &runner);
}
static int
-glusterd_gsync_get_param_file (char *prmfile, const char *param, char *master,
- char *slave, char *gl_workdir)
+gsyncd_getpidfile(char *master, char *slave, char *pidfile, char *conf_path,
+ gf_boolean_t *is_template_in_use)
{
- runner_t runner = {0,};
+ char temp_conf_path[PATH_MAX] = "";
+ char *working_conf_path = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ struct stat stbuf = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(this->private);
+ GF_ASSERT(conf_path);
+
+ priv = this->private;
+
+ GF_VALIDATE_OR_GOTO("gsync", master, out);
+ GF_VALIDATE_OR_GOTO("gsync", slave, out);
+
+ len = snprintf(temp_conf_path, sizeof(temp_conf_path),
+ "%s/" GSYNC_CONF_TEMPLATE, priv->workdir);
+ if ((len < 0) || (len >= sizeof(temp_conf_path))) {
+ goto out;
+ }
+
+ ret = sys_lstat(conf_path, &stbuf);
+ if (!ret) {
+ gf_msg_debug(this->name, 0, "Using passed config template(%s).",
+ conf_path);
+ working_conf_path = conf_path;
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, ENOENT, GD_MSG_FILE_OP_FAILED,
+ "Config file (%s) missing. Looking for template "
+ "config file (%s)",
+ conf_path, temp_conf_path);
+ ret = sys_lstat(temp_conf_path, &stbuf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED,
+ "Template config file (%s) missing.", temp_conf_path);
+ goto out;
+ }
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DEFAULT_TEMP_CONFIG,
+ "Using default config template(%s).", temp_conf_path);
+ working_conf_path = temp_conf_path;
+ *is_template_in_use = _gf_true;
+ }
+
+fetch_data:
+
+ ret = glusterd_gsync_get_param_file(pidfile, "pid", master, slave,
+ working_conf_path);
+ if ((ret == -1) || strlen(pidfile) == 0) {
+ if (*is_template_in_use == _gf_false) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PIDFILE_CREATE_FAILED,
+ "failed to create the pidfile string. "
+ "Trying default config template");
+ working_conf_path = temp_conf_path;
+ *is_template_in_use = _gf_true;
+ goto fetch_data;
+ } else {
+ ret = -2;
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PIDFILE_CREATE_FAILED,
+ "failed to "
+ "create the pidfile string from template "
+ "config");
+ goto out;
+ }
+ }
- runinit (&runner);
- runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
- runner_argprintf (&runner, "%s/"GSYNC_CONF, gl_workdir);
- runner_argprintf (&runner, ":%s", master);
- runner_add_args (&runner, slave, "--config-get", NULL);
- runner_argprintf (&runner, "%s-file", param);
+ gf_msg_debug(this->name, 0, "pidfile = %s", pidfile);
- return glusterd_query_extutil (prmfile, &runner);
+ ret = open(pidfile, O_RDWR);
+out:
+ return ret;
}
static int
-glusterd_gsync_get_session_owner (char *master, char *slave, char *session_owner,
- char *gl_workdir)
+gsync_status_byfd(int fd)
{
- runner_t runner = {0,};
+ GF_ASSERT(fd >= -1);
- runinit(&runner);
- runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
- runner_argprintf (&runner, "%s/"GSYNC_CONF, gl_workdir);
- runner_argprintf (&runner, ":%s", master);
- runner_add_args (&runner, slave, "--config-get", "session-owner",
- NULL);
+ if (lockf(fd, F_TEST, 0) == -1 && (errno == EAGAIN || errno == EACCES))
+ /* gsyncd keeps the pidfile locked */
+ return 0;
- return glusterd_query_extutil (session_owner, &runner);
+ return -1;
}
-/* check whether @slave is local or remote. normalized
- * urls starting with ssh are considered to be remote
- * @returns
- * 1 if slave is remote
- * 0 is slave is local
+/* status: return 0 when gsync is running
+ * return -1 when not running
*/
-static int
-glusterd_gsync_slave_is_remote (char *slave)
+int
+gsync_status(char *master, char *slave, char *conf_path, int *status,
+ gf_boolean_t *is_template_in_use)
{
- int ret = 0;
- char *ssh_pos = NULL;
+ char pidfile[PATH_MAX] = {
+ 0,
+ };
+ int fd = -1;
+
+ fd = gsyncd_getpidfile(master, slave, pidfile, conf_path,
+ is_template_in_use);
+ if (fd == -2)
+ return -1;
+
+ *status = gsync_status_byfd(fd);
- ssh_pos = strstr(slave, "ssh://");
- if ( ssh_pos && ((ssh_pos - slave) == 0) )
- ret = 1;
+ sys_close(fd);
+
+ return 0;
+}
- return ret;
+static int32_t
+glusterd_gsync_volinfo_dict_set(glusterd_volinfo_t *volinfo, char *key,
+ char *value)
+{
+ int32_t ret = -1;
+ char *gsync_status = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ gsync_status = gf_strdup(value);
+ if (!gsync_status) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = dict_set_dynstr(volinfo->dict, key, gsync_status);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set dict");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
}
static int
-glusterd_gsync_get_slave_log_file (char *master, char *slave, char *log_file)
+glusterd_verify_gsyncd_spawn(char *master, char *slave)
{
- int ret = -1;
- runner_t runner = {0,};
- char uuid_str[64] = {0,};
- glusterd_conf_t *priv = NULL;
- char *gl_workdir = NULL;
+ int ret = 0;
+ runner_t runner = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "--verify", "spawning",
+ NULL);
+ runner_argprintf(&runner, ":%s", master);
+ runner_add_args(&runner, slave, NULL);
+ runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
+ ret = runner_start(&runner);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SPAWNING_CHILD_FAILED,
+ "spawning child failed");
+ ret = -1;
+ goto out;
+ }
+
+ if (runner_end(&runner) != 0)
+ ret = -1;
- GF_ASSERT(THIS);
- GF_ASSERT(THIS->private);
+out:
+ gf_msg_debug(this->name, 0, "returning %d", ret);
+ return ret;
+}
- priv = THIS->private;
+static int
+gsync_verify_config_options(dict_t *dict, char **op_errstr, char *volname)
+{
+ char **resopt = NULL;
+ int i = 0;
+ int ret = -1;
+ char *subop = NULL;
+ char *slave = NULL;
+ char *op_name = NULL;
+ char *op_value = NULL;
+ char *t = NULL;
+ char errmsg[PATH_MAX] = "";
+ gf_boolean_t banned = _gf_true;
+ gf_boolean_t op_match = _gf_true;
+ gf_boolean_t val_match = _gf_true;
+ struct gsync_config_opt_vals_ *conf_vals = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (dict_get_str(dict, "subop", &subop) != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "missing subop");
+ *op_errstr = gf_strdup("Invalid config request");
+ return -1;
+ }
- GF_VALIDATE_OR_GOTO("gsyncd", master, out);
- GF_VALIDATE_OR_GOTO("gsyncd", slave, out);
+ if (dict_get_str(dict, "slave", &slave) != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ GEOREP " CONFIG: no slave given");
+ *op_errstr = gf_strdup("Slave required");
+ return -1;
+ }
- gl_workdir = priv->workdir;
+ if (strcmp(subop, "get-all") == 0)
+ return 0;
- /* get the session owner for the master-slave session */
- ret = glusterd_gsync_get_session_owner (master, slave, uuid_str,
- gl_workdir);
- if (ret)
- goto out;
+ if (dict_get_str(dict, "op_name", &op_name) != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "option name missing");
+ *op_errstr = gf_strdup("Option name missing");
+ return -1;
+ }
- /* get the log file for the slave */
- runinit(&runner);
- runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
- runner_argprintf (&runner, "%s/"GSYNC_CONF, gl_workdir);
- runner_argprintf (&runner, "--session-owner=%s", uuid_str);
- runner_add_args (&runner, slave, "--config-get", "log-file", NULL);
+ if (runcmd(GSYNCD_PREFIX "/gsyncd", "--config-check", op_name, NULL)) {
+ ret = glusterd_verify_gsyncd_spawn(volname, slave);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_SPAWN_FAILED,
+ "Unable to spawn "
+ "gsyncd");
+ return 0;
+ }
- ret = glusterd_query_extutil (log_file, &runner);
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid option %s", op_name);
+ *op_errstr = gf_strdup("Invalid option");
- out:
- return ret;
-}
+ return -1;
+ }
-static int
-gsyncd_getpidfile (char *master, char *slave, char *pidfile)
-{
- int ret = -1;
- glusterd_conf_t *priv = NULL;
+ if (strcmp(subop, "get") == 0)
+ return 0;
+
+ t = strtail(subop, "set");
+ if (!t)
+ t = strtail(subop, "del");
+ if (!t || (t[0] && strcmp(t, "-glob") != 0)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SUBOP_NOT_FOUND,
+ "unknown subop %s", subop);
+ *op_errstr = gf_strdup("Invalid config request");
+ return -1;
+ }
+
+ if (strtail(subop, "set") &&
+ dict_get_str(dict, "op_value", &op_value) != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "missing value for set");
+ *op_errstr = gf_strdup("missing value");
+ }
+
+ /* match option name against reserved options, modulo -/_
+ * difference
+ */
+ for (resopt = gsync_reserved_opts; *resopt; resopt++) {
+ banned = _gf_true;
+ for (i = 0; (*resopt)[i] && op_name[i]; i++) {
+ if ((*resopt)[i] == op_name[i] ||
+ ((*resopt)[i] == '-' && op_name[i] == '_'))
+ continue;
+ banned = _gf_false;
+ }
- GF_ASSERT (THIS);
- GF_ASSERT (THIS->private);
+ if (op_name[i] != '\0')
+ banned = _gf_false;
- priv = THIS->private;
+ if (banned) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_RESERVED_OPTION,
+ "Reserved option %s", op_name);
+ *op_errstr = gf_strdup("Reserved option");
- GF_VALIDATE_OR_GOTO ("gsync", master, out);
- GF_VALIDATE_OR_GOTO ("gsync", slave, out);
+ return -1;
+ break;
+ }
+ }
+
+ /* Check options in gsync_confopt_vals for invalid values */
+ for (conf_vals = gsync_confopt_vals; conf_vals->op_name; conf_vals++) {
+ op_match = _gf_true;
+ for (i = 0; conf_vals->op_name[i] && op_name[i]; i++) {
+ if (conf_vals->op_name[i] == op_name[i] ||
+ (conf_vals->op_name[i] == '_' && op_name[i] == '-'))
+ continue;
+ op_match = _gf_false;
+ }
- ret = glusterd_gsync_get_param_file (pidfile, "pid", master,
- slave, priv->workdir);
- if (ret == -1) {
- ret = -2;
- gf_log ("", GF_LOG_WARNING, "failed to create the pidfile string");
+ if (op_match) {
+ if (!op_value)
goto out;
+ val_match = _gf_false;
+ for (i = 0; i < conf_vals->no_of_pos_vals; i++) {
+ if (conf_vals->case_sensitive) {
+ if (!strcmp(conf_vals->values[i], op_value))
+ val_match = _gf_true;
+ } else {
+ if (!strcasecmp(conf_vals->values[i], op_value))
+ val_match = _gf_true;
+ }
+ }
+
+ if (!val_match) {
+ ret = snprintf(errmsg, sizeof(errmsg) - 1,
+ "Invalid value(%s) for"
+ " option %s",
+ op_value, op_name);
+ errmsg[ret] = '\0';
+
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "%s", errmsg);
+ *op_errstr = gf_strdup(errmsg);
+ return -1;
+ }
}
-
- ret = open (pidfile, O_RDWR);
-
- out:
- return ret;
+ }
+out:
+ return 0;
}
static int
-glusterd_gsyncd_getlogfile (char *master, char *slave, char *log_file)
+glusterd_get_gsync_status_mst_slv(glusterd_volinfo_t *volinfo, char *slave,
+ char *conf_path, dict_t *rsp_dict,
+ char *node);
+
+static int
+_get_status_mst_slv(dict_t *dict, char *key, data_t *value, void *data)
{
- int ret = -1;
- glusterd_conf_t *priv = NULL;
+ glusterd_gsync_status_temp_t *param = NULL;
+ char *slave = NULL;
+ char *slave_buf = NULL;
+ char *slave_url = NULL;
+ char *slave_vol = NULL;
+ char *slave_host = NULL;
+ char *errmsg = NULL;
+ char conf_path[PATH_MAX] = "";
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ char slv_url[VOLINFO_SLAVE_URL_MAX] = {0};
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ param = (glusterd_gsync_status_temp_t *)data;
+
+ GF_VALIDATE_OR_GOTO(this->name, param, out);
+ GF_VALIDATE_OR_GOTO(this->name, param->volinfo, out);
+
+ if (this)
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ if (snprintf(slv_url, sizeof(slv_url), "%s", value->data) >=
+ sizeof(slv_url)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ "Error in copying slave: %s!", value->data);
+ goto out;
+ }
+
+ ret = parse_slave_url(slv_url, &slave);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ "Error in parsing slave: %s!", value->data);
+ goto out;
+ }
+
+ ret = glusterd_get_slave_info(slave, &slave_url, &slave_host, &slave_vol,
+ &errmsg);
+ if (ret) {
+ if (errmsg)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave details. Error: %s", errmsg);
+ else
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave details.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf(conf_path, sizeof(conf_path) - 1,
+ "%s/" GEOREP "/%s_%s_%s/gsyncd.conf", priv->workdir,
+ param->volinfo->volname, slave_host, slave_vol);
+ conf_path[ret] = '\0';
+
+ ret = glusterd_get_gsync_status_mst_slv(param->volinfo, slave, conf_path,
+ param->rsp_dict, param->node);
+out:
- GF_ASSERT (THIS);
- GF_ASSERT (THIS->private);
+ if (errmsg)
+ GF_FREE(errmsg);
- priv = THIS->private;
+ if (slave_buf)
+ GF_FREE(slave_buf);
- GF_VALIDATE_OR_GOTO ("gsync", master, out);
- GF_VALIDATE_OR_GOTO ("gsync", slave, out);
+ if (slave_vol)
+ GF_FREE(slave_vol);
- ret = glusterd_gsync_get_param_file (log_file, "log", master,
- slave, priv->workdir);
- if (ret == -1) {
- ret = -2;
- gf_log ("", GF_LOG_WARNING, "failed to gsyncd logfile");
- goto out;
- }
+ if (slave_url)
+ GF_FREE(slave_url);
+
+ if (slave_host)
+ GF_FREE(slave_host);
- out:
- return ret;
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d.", ret);
+ return ret;
}
static int
-gsync_status_byfd (int fd)
+_get_max_gsync_slave_num(dict_t *dict, char *key, data_t *value, void *data)
{
- GF_ASSERT (fd >= -1);
+ int tmp_slvnum = 0;
+ int *slvnum = (int *)data;
- if (lockf (fd, F_TEST, 0) == -1 &&
- (errno == EAGAIN || errno == EACCES))
- /* gsyncd keeps the pidfile locked */
- return 0;
+ sscanf(key, "slave%d", &tmp_slvnum);
+ if (tmp_slvnum > *slvnum)
+ *slvnum = tmp_slvnum;
- return -1;
+ return 0;
}
-/* status: return 0 when gsync is running
- * return -1 when not running
- */
-int
-gsync_status (char *master, char *slave, int *status)
+static int
+_get_slave_idx_slave_voluuid(dict_t *dict, char *key, data_t *value, void *data)
{
- char pidfile[PATH_MAX] = {0,};
- int fd = -1;
+ char *slave_info = NULL;
+ xlator_t *this = NULL;
+ struct slave_vol_config *slave_cfg = NULL;
+ int i = 0;
+ int ret = -1;
+ unsigned tmp_slvnum = 0;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ slave_cfg = data;
+
+ if (value)
+ slave_info = value->data;
+
+ if (!(slave_info) || strlen(slave_info) == 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_SLAVE,
+ "Invalid slave in dict");
+ ret = -2;
+ goto out;
+ }
+
+ /* slave format:
+ * master_node_uuid:ssh://slave_host::slave_vol:slave_voluuid */
+ while (i++ < 5) {
+ slave_info = strchr(slave_info, ':');
+ if (slave_info)
+ slave_info++;
+ else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ "slave_info becomes NULL!");
+ ret = -2;
+ goto out;
+ }
+ }
+ if (strcmp(slave_info, slave_cfg->slave_voluuid) == 0) {
+ gf_msg_debug(this->name, 0,
+ "Same slave volume "
+ "already present %s",
+ slave_cfg->slave_voluuid);
+ ret = -1;
+
+ sscanf(key, "slave%d", &tmp_slvnum);
+ slave_cfg->old_slvidx = tmp_slvnum;
+
+ gf_msg_debug(this->name, 0,
+ "and "
+ "its index is: %d",
+ tmp_slvnum);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
- fd = gsyncd_getpidfile (master, slave, pidfile);
- if (fd == -2)
- return -1;
+static int
+glusterd_remove_slave_in_info(glusterd_volinfo_t *volinfo, char *slave,
+ char **op_errstr)
+{
+ int zero_slave_entries = _gf_true;
+ int ret = 0;
+ char *slavekey = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(volinfo);
+ GF_ASSERT(slave);
+
+ do {
+ ret = glusterd_get_slave(volinfo, slave, &slavekey);
+ if (ret < 0 && zero_slave_entries) {
+ ret++;
+ goto out;
+ }
+ zero_slave_entries = _gf_false;
+ dict_del(volinfo->gsync_slaves, slavekey);
+ } while (ret >= 0);
+
+ ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ *op_errstr = gf_strdup(
+ "Failed to store the Volume"
+ "information");
+ goto out;
+ }
+out:
+ gf_msg_debug(this->name, 0, "returning %d", ret);
+ return ret;
+}
- *status = gsync_status_byfd (fd);
+static int
+glusterd_gsync_get_uuid(char *slave, glusterd_volinfo_t *vol, uuid_t uuid)
+{
+ int ret = 0;
+ char *slavekey = NULL;
+ char *slaveentry = NULL;
+ char *t = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(vol);
+ GF_ASSERT(slave);
+
+ ret = glusterd_get_slave(vol, slave, &slavekey);
+ if (ret < 0) {
+ /* XXX colliding cases of failure and non-extant
+ * slave... now just doing this as callers of this
+ * function can make sense only of -1 and 0 as retvals;
+ * getting at the proper semanticals will involve
+ * fixing callers as well.
+ */
+ ret = -1;
+ goto out;
+ }
- sys_close (fd);
+ ret = dict_get_str(vol->gsync_slaves, slavekey, &slaveentry);
+ GF_ASSERT(ret == 0);
- return 0;
-}
+ t = strchr(slaveentry, ':');
+ GF_ASSERT(t);
+ *t = '\0';
+ ret = gf_uuid_parse(slaveentry, uuid);
+ *t = ':';
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
-static int32_t
-glusterd_gsync_volinfo_dict_set (glusterd_volinfo_t *volinfo,
- char *key, char *value)
+static int
+update_slave_voluuid(dict_t *dict, char *key, data_t *value, void *data)
{
- int32_t ret = -1;
- char *gsync_status = NULL;
+ char *slave = NULL;
+ char *slave_url = NULL;
+ char *slave_vol = NULL;
+ char *slave_host = NULL;
+ char *errmsg = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ char slv_url[VOLINFO_SLAVE_URL_MAX] = {0};
+ char slave_voluuid[GF_UUID_BUF_SIZE] = {0};
+ char *slave_info = NULL;
+ char *new_value = NULL;
+ char *same_key = NULL;
+ int cnt = 0;
+ gf_boolean_t *voluuid_updated = NULL;
+
+ this = THIS;
+
+ voluuid_updated = data;
+ slave_info = value->data;
+ gf_msg_debug(this->name, 0, "slave_info: %s!", slave_info);
+
+ /* old slave format:
+ * master_node_uuid:ssh://slave_host::slave_vol
+ * New slave format:
+ * master_node_uuid:ssh://slave_host::slave_vol:slave_voluuid */
+ while (slave_info) {
+ slave_info = strchr(slave_info, ':');
+ if (slave_info)
+ cnt++;
+ else
+ break;
+
+ slave_info++;
+ }
+
+ gf_msg_debug(this->name, 0, "cnt: %d", cnt);
+ /* check whether old slave format and update vol uuid if old format.
+ * With volume uuid, number of ':' is 5 and is 4 without.
+ */
+ if (cnt == 4) {
+ if (snprintf(slv_url, sizeof(slv_url), "%s", value->data) >=
+ sizeof(slv_url)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ "Error in copying slave: %s!", value->data);
+ goto out;
+ }
- gsync_status = gf_strdup (value);
- if (!gsync_status) {
- gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
- goto out;
+ ret = parse_slave_url(slv_url, &slave);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ "Error in parsing slave: %s!", value->data);
+ goto out;
}
- ret = dict_set_dynstr (volinfo->dict, key, gsync_status);
+ ret = glusterd_get_slave_info(slave, &slave_url, &slave_host,
+ &slave_vol, &errmsg);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to set dict");
- goto out;
+ if (errmsg)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave details. Error: %s", errmsg);
+ else
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave details.");
+ ret = -1;
+ goto out;
}
- ret = 0;
-out:
- return 0;
-}
-
-static int
-gsync_verify_config_options (dict_t *dict, char **op_errstr)
-{
- char **resopt = NULL;
- int i = 0;
- char *subop = NULL;
- char *slave = NULL;
- char *op_name = NULL;
- char *op_value = NULL;
- char *t = NULL;
- gf_boolean_t banned = _gf_true;
-
- if (dict_get_str (dict, "subop", &subop) != 0) {
- gf_log ("", GF_LOG_WARNING, "missing subop");
- *op_errstr = gf_strdup ("Invalid config request");
- return -1;
+ ret = glusterd_get_slave_voluuid(slave_host, slave_vol, slave_voluuid);
+ if ((ret) || (strlen(slave_voluuid) == 0)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REMOTE_VOL_UUID_FAIL,
+ "Unable to get remote volume uuid"
+ "slavehost:%s slavevol:%s",
+ slave_host, slave_vol);
+ /* Avoiding failure due to remote vol uuid fetch */
+ ret = 0;
+ goto out;
}
+ ret = gf_asprintf(&new_value, "%s:%s", value->data, slave_voluuid);
+ ret = gf_asprintf(&same_key, "%s", key);
- if (dict_get_str (dict, "slave", &slave) != 0) {
- gf_log ("", GF_LOG_WARNING, GEOREP" CONFIG: no slave given");
- *op_errstr = gf_strdup ("Slave required");
- return -1;
+ /* delete old key and add new value */
+ dict_del(dict, key);
+
+ /* set new value for the same key*/
+ ret = dict_set_dynstr(dict, same_key, new_value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REMOTE_VOL_UUID_FAIL,
+ "Error in setting dict value"
+ "new_value :%s",
+ new_value);
+ goto out;
}
+ *voluuid_updated = _gf_true;
+ }
+
+ ret = 0;
+out:
+ if (errmsg)
+ GF_FREE(errmsg);
- if (strcmp (subop, "get-all") == 0)
- return 0;
+ if (slave_url)
+ GF_FREE(slave_url);
- if (dict_get_str (dict, "op_name", &op_name) != 0) {
- gf_log ("", GF_LOG_WARNING, "option name missing");
- *op_errstr = gf_strdup ("Option name missing");
- return -1;
+ if (slave_vol)
+ GF_FREE(slave_vol);
+
+ if (slave_host)
+ GF_FREE(slave_host);
+
+ gf_msg_debug(this->name, 0, "Returning %d.", ret);
+ return ret;
+}
+
+static int
+glusterd_update_slave_voluuid_slaveinfo(glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ gf_boolean_t voluuid_updated = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
+
+ ret = dict_foreach(volinfo->gsync_slaves, update_slave_voluuid,
+ &voluuid_updated);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REMOTE_VOL_UUID_FAIL,
+ "Error in updating"
+ "volinfo");
+ goto out;
+ }
+
+ if (_gf_true == voluuid_updated) {
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
+ "Error in storing"
+ "volinfo");
+ goto out;
}
+ }
- if (runcmd (GSYNCD_PREFIX"/gsyncd", "--config-check", op_name, NULL)) {
- gf_log ("", GF_LOG_WARNING, "Invalid option %s", op_name);
- *op_errstr = gf_strdup ("Invalid option");
+ ret = 0;
+out:
+ gf_msg_debug((this ? this->name : "glusterd"), 0, "Returning %d", ret);
+ return ret;
+}
- return -1;
+int
+glusterd_check_gsync_running_local(char *master, char *slave, char *conf_path,
+ gf_boolean_t *is_run)
+{
+ int ret = -1;
+ int ret_status = 0;
+ gf_boolean_t is_template_in_use = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(master);
+ GF_ASSERT(slave);
+ GF_ASSERT(is_run);
+
+ *is_run = _gf_false;
+ ret = gsync_status(master, slave, conf_path, &ret_status,
+ &is_template_in_use);
+ if (ret == 0 && ret_status == 0)
+ *is_run = _gf_true;
+ else if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VALIDATE_FAILED,
+ GEOREP " validation failed");
+ goto out;
+ }
+ ret = 0;
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_store_slave_in_info(glusterd_volinfo_t *volinfo, char *slave,
+ char *host_uuid, char *slave_voluuid,
+ char **op_errstr, gf_boolean_t is_force)
+{
+ int ret = 0;
+ int maxslv = 0;
+ char **linearr = NULL;
+ char *value = NULL;
+ char *slavekey = NULL;
+ char *slaveentry = NULL;
+ char key[32] = {
+ 0,
+ };
+ int keylen;
+ char *t = NULL;
+ xlator_t *this = NULL;
+ struct slave_vol_config slave1 = {
+ {0},
+ };
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(volinfo);
+ GF_ASSERT(slave);
+ GF_ASSERT(host_uuid);
+ GF_VALIDATE_OR_GOTO(this->name, slave_voluuid, out);
+
+ ret = glusterd_get_slave(volinfo, slave, &slavekey);
+ switch (ret) {
+ case -2:
+ ret = -1;
+ goto out;
+ case -1:
+ break;
+ default:
+ if (!is_force)
+ GF_ASSERT(ret > 0);
+ ret = dict_get_str(volinfo->gsync_slaves, slavekey, &slaveentry);
+ GF_ASSERT(ret == 0);
+
+ /* same-name + same-uuid slave entries should have been filtered
+ * out in glusterd_op_verify_gsync_start_options(), so we can
+ * assert an uuid mismatch
+ */
+ t = strtail(slaveentry, host_uuid);
+ if (!is_force)
+ GF_ASSERT(!t || *t != ':');
+
+ if (is_force) {
+ gf_msg_debug(this->name, 0,
+ GEOREP
+ " has already "
+ "been invoked for the %s (master) and "
+ "%s (slave). Allowing without saving "
+ "info again due to force command.",
+ volinfo->volname, slave);
+ ret = 0;
+ goto out;
+ }
+
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVOKE_ERROR,
+ GEOREP
+ " has already been invoked for "
+ "the %s (master) and %s (slave) from a different "
+ "machine",
+ volinfo->volname, slave);
+ *op_errstr = gf_strdup(GEOREP
+ " already running in "
+ "another machine");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_urltransform_single(slave, "normalize", &linearr);
+ if (ret == -1)
+ goto out;
+
+ ret = gf_asprintf(&value, "%s:%s:%s", host_uuid, linearr[0], slave_voluuid);
+
+ glusterd_urltransform_free(linearr, 1);
+ if (ret == -1)
+ goto out;
+
+ /* Given the slave volume uuid, check and get any existing slave */
+ memcpy(slave1.slave_voluuid, slave_voluuid, UUID_CANONICAL_FORM_LEN);
+ ret = dict_foreach(volinfo->gsync_slaves, _get_slave_idx_slave_voluuid,
+ &slave1);
+
+ if (ret == 0) { /* New slave */
+ dict_foreach(volinfo->gsync_slaves, _get_max_gsync_slave_num, &maxslv);
+ keylen = snprintf(key, sizeof(key), "slave%d", maxslv + 1);
+
+ ret = dict_set_dynstrn(volinfo->gsync_slaves, key, keylen, value);
+ if (ret) {
+ GF_FREE(value);
+ goto out;
}
+ } else if (ret == -1) { /* Existing slave */
+ keylen = snprintf(key, sizeof(key), "slave%d", slave1.old_slvidx);
- if (strcmp (subop, "get") == 0)
- return 0;
+ gf_msg_debug(this->name, 0,
+ "Replacing key:%s with new value"
+ ":%s",
+ key, value);
- t = strtail (subop, "set");
- if (!t)
- t = strtail (subop, "del");
- if (!t || (t[0] && strcmp (t, "-glob") != 0)) {
- gf_log ("", GF_LOG_WARNING, "unknown subop %s", subop);
- *op_errstr = gf_strdup ("Invalid config request");
- return -1;
+ /* Add new slave's value, with the same slave index */
+ ret = dict_set_dynstrn(volinfo->gsync_slaves, key, keylen, value);
+ if (ret) {
+ GF_FREE(value);
+ goto out;
}
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REMOTE_VOL_UUID_FAIL,
+ "_get_slave_idx_slave_voluuid failed!");
+ GF_FREE(value);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ *op_errstr = gf_strdup(
+ "Failed to store the Volume "
+ "information");
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
- if (strtail (subop, "set") &&
- dict_get_str (dict, "op_value", &op_value) != 0) {
- gf_log ("", GF_LOG_WARNING, "missing value for set");
- *op_errstr = gf_strdup ("missing value");
+static int
+glusterd_op_verify_gsync_start_options(glusterd_volinfo_t *volinfo, char *slave,
+ char *conf_path, char *statefile,
+ char **op_errstr, gf_boolean_t is_force)
+{
+ int ret = -1;
+ int ret_status = 0;
+ gf_boolean_t is_template_in_use = _gf_false;
+ char msg[2048] = {0};
+ uuid_t uuid = {0};
+ xlator_t *this = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ char statefiledir[PATH_MAX] = {
+ 0,
+ };
+ char *statedir = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(volinfo);
+ GF_ASSERT(slave);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(conf_path);
+ GF_ASSERT(this && this->private);
+
+ if (GLUSTERD_STATUS_STARTED != volinfo->status) {
+ snprintf(msg, sizeof(msg),
+ "Volume %s needs to be started "
+ "before " GEOREP " start",
+ volinfo->volname);
+ goto out;
+ }
+
+ /* check session directory as statefile may not present
+ * during upgrade */
+ if (snprintf(statefiledir, sizeof(statefiledir), "%s", statefile) >=
+ sizeof(statefiledir)) {
+ snprintf(msg, sizeof(msg), "statefiledir truncated");
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, "%s",
+ msg);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
+ statedir = dirname(statefiledir);
+
+ ret = sys_lstat(statedir, &stbuf);
+ if (ret) {
+ snprintf(msg, sizeof(msg),
+ "Session between %s and %s has"
+ " not been created. Please create session and retry.",
+ volinfo->volname, slave);
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "%s statefile: %s", msg, statefile);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
+
+ /* Check if the gsync slave info is stored. If not
+ * session has not been created */
+ ret = glusterd_gsync_get_uuid(slave, volinfo, uuid);
+ if (ret) {
+ snprintf(msg, sizeof(msg),
+ "Session between %s and %s has"
+ " not been created. Please create session and retry.",
+ volinfo->volname, slave);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SESSION_CREATE_ERROR, "%s",
+ msg);
+ goto out;
+ }
+
+ /*Check if the gsync is already started in cmd. inited host
+ * If so initiate add it into the glusterd's priv*/
+ ret = gsync_status(volinfo->volname, slave, conf_path, &ret_status,
+ &is_template_in_use);
+ if (ret == 0) {
+ if ((ret_status == 0) && !is_force) {
+ snprintf(msg, sizeof(msg),
+ GEOREP
+ " session between"
+ " %s & %s already started",
+ volinfo->volname, slave);
+ ret = -1;
+ goto out;
}
+ } else if (ret == -1) {
+ snprintf(msg, sizeof(msg),
+ GEOREP
+ " start option "
+ "validation failed ");
+ goto out;
+ }
+
+ if (is_template_in_use == _gf_true) {
+ snprintf(msg, sizeof(msg),
+ GEOREP
+ " start "
+ "failed : pid-file entry missing "
+ "in config file.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_verify_gsyncd_spawn(volinfo->volname, slave);
+ if (ret && !is_force) {
+ snprintf(msg, sizeof(msg), "Unable to spawn gsyncd");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_SPAWN_FAILED, "%s",
+ msg);
+ }
+out:
+ if (ret && (msg[0] != '\0')) {
+ *op_errstr = gf_strdup(msg);
+ }
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
- /* match option name against reserved options, modulo -/_
- * difference
- */
- for (resopt = gsync_reserved_opts; *resopt; resopt++) {
- banned = _gf_true;
- for (i = 0; (*resopt)[i] && op_name[i]; i++) {
- if ((*resopt)[i] == op_name[i] ||
- ((*resopt)[i] == '-' && op_name[i] == '_'))
- continue;
- banned = _gf_false;
- }
- if (banned) {
- gf_log ("", GF_LOG_WARNING, "Reserved option %s", op_name);
- *op_errstr = gf_strdup ("Reserved option");
+void
+glusterd_check_geo_rep_configured(glusterd_volinfo_t *volinfo,
+ gf_boolean_t *flag)
+{
+ GF_ASSERT(volinfo);
+ GF_ASSERT(flag);
- return -1;
- break;
- }
- }
+ if (volinfo->gsync_slaves->count)
+ *flag = _gf_true;
+ else
+ *flag = _gf_false;
- return 0;
+ return;
}
+/*
+ * is_geo_rep_active:
+ * This function reads the state_file and sets is_active to 1 if the
+ * monitor status is neither "Stopped" or "Created"
+ *
+ * RETURN VALUE:
+ * 0: On successful read of state_file.
+ * -1: error.
+ */
+
static int
-glusterd_get_gsync_status_mst_slv (glusterd_volinfo_t *volinfo,
- char *slave, dict_t *rsp_dict);
+is_geo_rep_active(glusterd_volinfo_t *volinfo, char *slave, char *conf_path,
+ int *is_active)
+{
+ dict_t *confd = NULL;
+ char *statefile = NULL;
+ char *master = NULL;
+ char monitor_status[PATH_MAX] = "";
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ master = volinfo->volname;
+
+ confd = dict_new();
+ if (!confd) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Not able to create dict.");
+ goto out;
+ }
+
+ ret = glusterd_gsync_get_config(master, slave, conf_path, confd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GET_CONFIG_INFO_FAILED,
+ "Unable to get configuration data "
+ "for %s(master), %s(slave)",
+ master, slave);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_param(confd, "state_file", &statefile);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get state_file's name "
+ "for %s(master), %s(slave). Please check gsync "
+ "config file.",
+ master, slave);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_gsync_read_frm_status(statefile, monitor_status,
+ sizeof(monitor_status));
+ if (ret <= 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STAT_FILE_READ_FAILED,
+ "Unable to read the status file for %s(master), "
+ "%s(slave)",
+ master, slave);
+ snprintf(monitor_status, sizeof(monitor_status), "defunct");
+ }
+
+ if ((!strcmp(monitor_status, "Stopped")) ||
+ (!strcmp(monitor_status, "Created"))) {
+ *is_active = 0;
+ } else {
+ *is_active = 1;
+ }
+ ret = 0;
+out:
+ if (confd)
+ dict_unref(confd);
+ return ret;
+}
-static void
-_get_status_mst_slv (dict_t *this, char *key, data_t *value, void *data)
+/*
+ * _get_slave_status:
+ * Called for each slave in the volume from dict_foreach.
+ * It calls is_geo_rep_active to get the monitor status.
+ *
+ * RETURN VALUE:
+ * 0: On successful read of state_file from is_geo_rep_active.
+ * When it is found geo-rep is already active from previous calls.
+ * When there is no slave.
+ * -1: On error.
+ */
+
+int
+_get_slave_status(dict_t *dict, char *key, data_t *value, void *data)
{
- glusterd_gsync_status_temp_t *param = NULL;
- char *slave = NULL;
- int ret = 0;
+ gsync_status_param_t *param = NULL;
+ char *slave = NULL;
+ char *slave_url = NULL;
+ char *slave_vol = NULL;
+ char *slave_host = NULL;
+ char *errmsg = NULL;
+ char conf_path[PATH_MAX] = "";
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ param = (gsync_status_param_t *)data;
+
+ GF_ASSERT(param);
+ GF_ASSERT(param->volinfo);
+ if (param->is_active) {
+ ret = 0;
+ goto out;
+ }
- param = (glusterd_gsync_status_temp_t *)data;
+ this = THIS;
+ GF_ASSERT(this);
- GF_ASSERT (param);
- GF_ASSERT (param->volinfo);
+ priv = this->private;
+ if (priv == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_PRIV_NOT_FOUND,
+ "priv of glusterd not present");
+ goto out;
+ }
- slave = strchr(value->data, ':');
- if (slave)
- slave ++;
+ slave = strchr(value->data, ':');
+ if (!slave) {
+ ret = 0;
+ goto out;
+ }
+ slave++;
+
+ ret = glusterd_get_slave_info(slave, &slave_url, &slave_host, &slave_vol,
+ &errmsg);
+ if (ret) {
+ if (errmsg)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch"
+ " slave details. Error: %s",
+ errmsg);
else
- return;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave details.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf(conf_path, sizeof(conf_path) - 1,
+ "%s/" GEOREP "/%s_%s_%s/gsyncd.conf", priv->workdir,
+ param->volinfo->volname, slave_host, slave_vol);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CONF_PATH_ASSIGN_FAILED,
+ "Unable to assign conf_path.");
+ ret = -1;
+ goto out;
+ }
+ conf_path[ret] = '\0';
+
+ ret = is_geo_rep_active(param->volinfo, slave, conf_path,
+ &param->is_active);
+out:
+ if (errmsg)
+ GF_FREE(errmsg);
- ret = glusterd_get_gsync_status_mst_slv(param->volinfo,
- slave, param->rsp_dict);
+ if (slave_vol)
+ GF_FREE(slave_vol);
+ if (slave_url)
+ GF_FREE(slave_url);
+ if (slave_host)
+ GF_FREE(slave_host);
+
+ return ret;
}
+/* glusterd_check_geo_rep_running:
+ * Checks if any geo-rep session is running for the volume.
+ *
+ * RETURN VALUE:
+ * Sets param.active to true if any geo-rep session is active.
+ * This function sets op_errstr during some error and when any geo-rep
+ * session is active. It is caller's responsibility to free op_errstr
+ * in above cases.
+ */
-static void
-_get_max_gsync_slave_num (dict_t *this, char *key, data_t *value, void *data)
+int
+glusterd_check_geo_rep_running(gsync_status_param_t *param, char **op_errstr)
{
- int tmp_slvnum = 0;
- int *slvnum = (int *)data;
+ char msg[2048] = {
+ 0,
+ };
+ gf_boolean_t enabled = _gf_false;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(param);
+ GF_ASSERT(param->volinfo);
+ GF_ASSERT(op_errstr);
+
+ glusterd_check_geo_rep_configured(param->volinfo, &enabled);
+
+ if (enabled) {
+ ret = dict_foreach(param->volinfo->gsync_slaves, _get_slave_status,
+ param);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "_get_slave_satus failed");
+ snprintf(msg, sizeof(msg),
+ GEOREP
+ " Unable to"
+ " get the status of active " GEOREP
+ ""
+ " session for the volume '%s'.\n"
+ " Please check the log file for"
+ " more info.",
+ param->volinfo->volname);
+ *op_errstr = gf_strdup(msg);
+ ret = -1;
+ goto out;
+ }
- sscanf (key, "slave%d", &tmp_slvnum);
- if (tmp_slvnum > *slvnum)
- *slvnum = tmp_slvnum;
+ if (param->is_active) {
+ snprintf(msg, sizeof(msg),
+ GEOREP
+ " sessions"
+ " are active for the volume %s.\nStop"
+ " " GEOREP
+ " sessions involved in this"
+ " volume. Use 'volume " GEOREP
+ " status' command for more info.",
+ param->volinfo->volname);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
+ }
+out:
+ return ret;
}
static int
-glusterd_remove_slave_in_info (glusterd_volinfo_t *volinfo, char *slave,
- char **op_errstr)
+glusterd_op_verify_gsync_running(glusterd_volinfo_t *volinfo, char *slave,
+ char *conf_path, char **op_errstr)
{
- int ret = 0;
- char *slavekey = NULL;
-
- GF_ASSERT (volinfo);
- GF_ASSERT (slave);
-
- ret = glusterd_get_slave (volinfo, slave, &slavekey);
- if (ret < 0) {
- ret++;
- goto out;
- }
-
- dict_del (volinfo->gsync_slaves, slavekey);
+ int pfd = -1;
+ int ret = -1;
+ char msg[2048] = {0};
+ char pidfile[PATH_MAX] = {
+ 0,
+ };
+ gf_boolean_t is_template_in_use = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(THIS && THIS->private);
+ GF_ASSERT(volinfo);
+ GF_ASSERT(slave);
+ GF_ASSERT(conf_path);
+ GF_ASSERT(op_errstr);
+
+ if (GLUSTERD_STATUS_STARTED != volinfo->status) {
+ snprintf(msg, sizeof(msg),
+ "Volume %s needs to be started "
+ "before " GEOREP " start",
+ volinfo->volname);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_GEO_REP_START_FAILED,
+ "Volume is not in a started state, Volname=%s",
+ volinfo->volname, NULL);
+
+ goto out;
+ }
+
+ pfd = gsyncd_getpidfile(volinfo->volname, slave, pidfile, conf_path,
+ &is_template_in_use);
+ if (pfd == -2) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VALIDATE_FAILED,
+ GEOREP " stop validation failed for %s & %s", volinfo->volname,
+ slave);
+ ret = -1;
+ goto out;
+ }
+ if (gsync_status_byfd(pfd) == -1) {
+ snprintf(msg, sizeof(msg),
+ GEOREP
+ " session b/w %s & %s is "
+ "not running on this node.",
+ volinfo->volname, slave);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SESSION_INACTIVE, "%s", msg);
+ ret = -1;
+ /* monitor gsyncd already dead */
+ goto out;
+ }
+
+ if (is_template_in_use) {
+ snprintf(msg, sizeof(msg),
+ "pid-file entry missing in "
+ "the config file(%s).",
+ conf_path);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PIDFILE_NOT_FOUND, "%s",
+ msg);
+ ret = -1;
+ goto out;
+ }
+
+ if (pfd < 0)
+ goto out;
+
+ ret = 0;
+out:
+ if (ret && (msg[0] != '\0')) {
+ *op_errstr = gf_strdup(msg);
+ }
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
- ret = glusterd_store_volinfo (volinfo,
- GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret) {
- *op_errstr = gf_strdup ("Failed to store the Volume"
- "information");
- goto out;
- }
- out:
- gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
- return ret;
+static int
+glusterd_verify_gsync_status_opts(dict_t *dict, char **op_errstr)
+{
+ char *slave = NULL;
+ char *volname = NULL;
+ char errmsg[PATH_MAX] = {
+ 0,
+ };
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ char *conf_path = NULL;
+ char *slave_url = NULL;
+ char *slave_host = NULL;
+ char *slave_vol = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_PRIV_NOT_FOUND,
+ "priv of glusterd not present");
+ *op_errstr = gf_strdup("glusterd defunct");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "master", &volname);
+ if (ret < 0) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND,
+ "volume name does not exist");
+ snprintf(errmsg, sizeof(errmsg),
+ "Volume name %s does not"
+ " exist",
+ volname);
+ *op_errstr = gf_strdup(errmsg);
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "slave", &slave);
+ if (ret < 0) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_get_slave_details_confpath(volinfo, dict, &slave_url,
+ &slave_host, &slave_vol,
+ &conf_path, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave or confpath details.");
+ ret = -1;
+ goto out;
+ }
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
-static int
-glusterd_gsync_get_uuid (char *slave, glusterd_volinfo_t *vol,
- uuid_t uuid)
+int
+glusterd_op_gsync_args_get(dict_t *dict, char **op_errstr, char **master,
+ char **slave, char **host_uuid)
{
- int ret = 0;
- char *slavekey = NULL;
- char *slaveentry = NULL;
- char *t = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
- GF_ASSERT (vol);
- GF_ASSERT (slave);
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
- ret = glusterd_get_slave (vol, slave, &slavekey);
+ if (master) {
+ ret = dict_get_str(dict, "master", master);
if (ret < 0) {
- /* XXX colliding cases of failure and non-extant
- * slave... now just doing this as callers of this
- * function can make sense only of -1 and 0 as retvals;
- * getting at the proper semanticals will involve
- * fixing callers as well.
- */
- ret = -1;
- goto out;
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "master not found");
+ *op_errstr = gf_strdup("master not found");
+ goto out;
}
+ }
- ret = dict_get_str (vol->gsync_slaves, slavekey, &slaveentry);
- GF_ASSERT (ret == 0);
+ if (slave) {
+ ret = dict_get_str(dict, "slave", slave);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "slave not found");
+ *op_errstr = gf_strdup("slave not found");
+ goto out;
+ }
+ }
- t = strchr (slaveentry, ':');
- GF_ASSERT (t);
- *t = '\0';
- ret = uuid_parse (slaveentry, uuid);
- *t = ':';
+ if (host_uuid) {
+ ret = dict_get_str(dict, "host-uuid", host_uuid);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "host_uuid not found");
+ *op_errstr = gf_strdup("host_uuid not found");
+ goto out;
+ }
+ }
- out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ ret = 0;
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
-static int
-glusterd_check_gsync_running_local (char *master, char *slave,
- gf_boolean_t *is_run)
-{
- int ret = -1;
- int ret_status = 0;
-
- GF_ASSERT (master);
- GF_ASSERT (slave);
- GF_ASSERT (is_run);
-
- *is_run = _gf_false;
- ret = gsync_status (master, slave, &ret_status);
- if (ret == 0 && ret_status == 0) {
- *is_run = _gf_true;
- } else if (ret == -1) {
- gf_log ("", GF_LOG_WARNING, GEOREP" validation "
- " failed");
- goto out;
+int
+glusterd_op_stage_sys_exec(dict_t *dict, char **op_errstr)
+{
+ char errmsg[PATH_MAX] = "";
+ char *command = NULL;
+ char command_path[PATH_MAX] = "";
+ struct stat st = {
+ 0,
+ };
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ if (conf->op_version < 2) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION,
+ "Op Version not supported.");
+ snprintf(errmsg, sizeof(errmsg),
+ "One or more nodes do not"
+ " support the required op version.");
+ *op_errstr = gf_strdup(errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "command", &command);
+ if (ret) {
+ strcpy(errmsg, "internal error");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get command from dict");
+ goto out;
+ }
+
+ /* enforce local occurrence of the command */
+ if (strchr(command, '/')) {
+ strcpy(errmsg, "invalid command name");
+ ret = -1;
+ goto out;
+ }
+
+ sprintf(command_path, GSYNCD_PREFIX "/peer_%s", command);
+ /* check if it's executable */
+ ret = sys_access(command_path, X_OK);
+ if (!ret)
+ /* check if it's a regular file */
+ ret = sys_stat(command_path, &st);
+ if (!ret && !S_ISREG(st.st_mode))
+ ret = -1;
+
+out:
+ if (ret) {
+ if (errmsg[0] == '\0') {
+ if (command)
+ snprintf(errmsg, sizeof(errmsg),
+ "gsync peer_%s command not found.", command);
+ else
+ snprintf(errmsg, sizeof(errmsg), "%s",
+ "gsync peer command was not "
+ "specified");
}
- ret = 0;
- out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_CMD_ERROR, "%s",
+ errmsg);
+ }
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
-static int
-glusterd_store_slave_in_info (glusterd_volinfo_t *volinfo, char *slave,
- char *host_uuid, char **op_errstr)
-{
- int ret = 0;
- int maxslv = 0;
- char **linearr = NULL;
- char *value = NULL;
- char *slavekey = NULL;
- char *slaveentry = NULL;
- char key[512] = {0, };
- char *t = NULL;
-
- GF_ASSERT (volinfo);
- GF_ASSERT (slave);
- GF_ASSERT (host_uuid);
-
- ret = glusterd_get_slave (volinfo, slave, &slavekey);
- switch (ret) {
- case -2:
- ret = -1;
- goto out;
- case -1:
- break;
- default:
- GF_ASSERT (ret > 0);
- ret = dict_get_str (volinfo->gsync_slaves, slavekey, &slaveentry);
- GF_ASSERT (ret == 0);
+int
+glusterd_op_stage_copy_file(dict_t *dict, char **op_errstr)
+{
+ char abs_filename[PATH_MAX] = "";
+ char errmsg[PATH_MAX] = "";
+ char *filename = NULL;
+ char *host_uuid = NULL;
+ char uuid_str[64] = {0};
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ char workdir[PATH_MAX] = {
+ 0,
+ };
+ char realpath_filename[PATH_MAX] = {
+ 0,
+ };
+ char realpath_workdir[PATH_MAX] = {
+ 0,
+ };
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_PRIV_NOT_FOUND,
+ "priv of glusterd not present");
+ *op_errstr = gf_strdup("glusterd defunct");
+ goto out;
+ }
+
+ if (priv->op_version < 2) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION,
+ "Op Version not supported.");
+ snprintf(errmsg, sizeof(errmsg),
+ "One or more nodes do not"
+ " support the required op version.");
+ *op_errstr = gf_strdup(errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "host-uuid", &host_uuid);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch host-uuid from dict.");
+ goto out;
+ }
+
+ uuid_utoa_r(MY_UUID, uuid_str);
+ if (!strcmp(uuid_str, host_uuid)) {
+ ret = dict_get_str(dict, "source", &filename);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch filename from dict.");
+ *op_errstr = gf_strdup("command unsuccessful");
+ goto out;
+ }
+ len = snprintf(abs_filename, sizeof(abs_filename), "%s/%s",
+ priv->workdir, filename);
+ if ((len < 0) || (len >= sizeof(abs_filename))) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
- /* same-name + same-uuid slave entries should have been filtered
- * out in glusterd_op_verify_gsync_start_options(), so we can
- * assert an uuid mismatch
- */
- t = strtail (slaveentry, host_uuid);
- GF_ASSERT (!t || *t != ':');
-
- gf_log ("", GF_LOG_ERROR, GEOREP" has already been invoked for "
- "the %s (master) and %s (slave) "
- "from a different machine",
- volinfo->volname, slave);
- *op_errstr = gf_strdup (GEOREP" already running in an an"
- "another machine");
- ret = -1;
- goto out;
+ if (!realpath(priv->workdir, realpath_workdir)) {
+ len = snprintf(errmsg, sizeof(errmsg),
+ "Failed to "
+ "get realpath of %s: %s",
+ priv->workdir, strerror(errno));
+ if (len < 0) {
+ strcpy(errmsg, "<error>");
+ }
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_REALPATH_GET_FAIL,
+ "Realpath=%s, Reason=%s", priv->workdir, strerror(errno),
+ NULL);
+ *op_errstr = gf_strdup(errmsg);
+ ret = -1;
+ goto out;
}
- ret = glusterd_urltransform_single (slave, "normalize", &linearr);
- if (ret == -1)
- goto out;
+ if (!realpath(abs_filename, realpath_filename)) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Failed to get "
+ "realpath of %s: %s",
+ filename, strerror(errno));
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_REALPATH_GET_FAIL,
+ "Filename=%s, Reason=%s", filename, strerror(errno), NULL);
+ *op_errstr = gf_strdup(errmsg);
+ ret = -1;
+ goto out;
+ }
- ret = gf_asprintf (&value, "%s:%s", host_uuid, linearr[0]);
- glusterd_urltransform_free (linearr, 1);
- if (ret == -1)
- goto out;
+ /* Add Trailing slash to workdir, without slash strncmp
+ will succeed for /var/lib/glusterd_bad */
+ len = snprintf(workdir, sizeof(workdir), "%s/", realpath_workdir);
+ if ((len < 0) || (len >= sizeof(workdir))) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
- dict_foreach (volinfo->gsync_slaves, _get_max_gsync_slave_num, &maxslv);
- snprintf (key, 512, "slave%d", maxslv + 1);
- ret = dict_set_dynstr (volinfo->gsync_slaves, key, value);
- if (ret)
- goto out;
+ /* Protect against file copy outside $workdir */
+ if (strncmp(workdir, realpath_filename, strlen(workdir))) {
+ len = snprintf(errmsg, sizeof(errmsg),
+ "Source file"
+ " is outside of %s directory",
+ priv->workdir);
+ if (len < 0) {
+ strcpy(errmsg, "<error>");
+ }
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg,
+ NULL);
+ *op_errstr = gf_strdup(errmsg);
+ ret = -1;
+ goto out;
+ }
- ret = glusterd_store_volinfo (volinfo,
- GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ ret = sys_lstat(abs_filename, &stbuf);
if (ret) {
- *op_errstr = gf_strdup ("Failed to store the Volume "
- "information");
- goto out;
+ len = snprintf(errmsg, sizeof(errmsg),
+ "Source file"
+ " does not exist in %s",
+ priv->workdir);
+ if (len < 0) {
+ strcpy(errmsg, "<error>");
+ }
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg,
+ NULL);
+ *op_errstr = gf_strdup(errmsg);
+ goto out;
}
- ret = 0;
- out:
- return ret;
-}
-
-static int
-glusterd_op_verify_gsync_start_options (glusterd_volinfo_t *volinfo,
- char *slave, char **op_errstr)
-{
- int ret = -1;
- gf_boolean_t is_running = _gf_false;
- char msg[2048] = {0};
- uuid_t uuid = {0};
- glusterd_conf_t *priv = NULL;
- xlator_t *this = NULL;
-
- this = THIS;
-
- GF_ASSERT (volinfo);
- GF_ASSERT (slave);
- GF_ASSERT (op_errstr);
- GF_ASSERT (this && this->private);
+ if (!S_ISREG(stbuf.st_mode)) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Source file"
+ " is not a regular file.");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SRC_FILE_ERROR, errmsg,
+ NULL);
+ *op_errstr = gf_strdup(errmsg);
+ ret = -1;
+ goto out;
+ }
+ }
- priv = this->private;
+ ret = 0;
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
- if (GLUSTERD_STATUS_STARTED != volinfo->status) {
- snprintf (msg, sizeof (msg), "Volume %s needs to be started "
- "before "GEOREP" start", volinfo->volname);
- goto out;
+int
+glusterd_get_statefile_name(glusterd_volinfo_t *volinfo, char *slave,
+ char *conf_path, char **statefile,
+ gf_boolean_t *is_template_in_use)
+{
+ char *master = NULL;
+ char *buf = NULL;
+ char *working_conf_path = NULL;
+ char temp_conf_path[PATH_MAX] = "";
+ dict_t *confd = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ struct stat stbuf = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(this->private);
+ GF_ASSERT(volinfo);
+ GF_ASSERT(conf_path);
+ GF_ASSERT(is_template_in_use);
+
+ master = volinfo->volname;
+
+ confd = dict_new();
+ if (!confd) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Unable to create new dict");
+ goto out;
+ }
+
+ priv = THIS->private;
+
+ len = snprintf(temp_conf_path, sizeof(temp_conf_path),
+ "%s/" GSYNC_CONF_TEMPLATE, priv->workdir);
+ if ((len < 0) || (len >= sizeof(temp_conf_path))) {
+ goto out;
+ }
+
+ ret = sys_lstat(conf_path, &stbuf);
+ if (!ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CONFIG_INFO,
+ "Using passed config template(%s).", conf_path);
+ working_conf_path = conf_path;
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, ENOENT, GD_MSG_FILE_OP_FAILED,
+ "Config file (%s) missing. Looking for template config"
+ " file (%s)",
+ conf_path, temp_conf_path);
+ ret = sys_lstat(temp_conf_path, &stbuf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED,
+ "Template "
+ "config file (%s) missing.",
+ temp_conf_path);
+ goto out;
}
- /*Check if the gsync is already started in cmd. inited host
- * If so initiate add it into the glusterd's priv*/
- ret = glusterd_gsync_get_uuid (slave, volinfo, uuid);
- if ((ret == 0) && (uuid_compare (MY_UUID, uuid) == 0)) {
- ret = glusterd_check_gsync_running_local (volinfo->volname,
- slave, &is_running);
- if (ret) {
- snprintf (msg, sizeof (msg), GEOREP" start option "
- "validation failed ");
- goto out;
- }
- if (_gf_true == is_running) {
- snprintf (msg, sizeof (msg), GEOREP " session between"
- " %s & %s already started", volinfo->volname,
- slave);
- ret = -1;
- goto out;
- }
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DEFAULT_TEMP_CONFIG,
+ "Using default config template(%s).", temp_conf_path);
+ working_conf_path = temp_conf_path;
+ *is_template_in_use = _gf_true;
+ }
+
+fetch_data:
+ ret = glusterd_gsync_get_config(master, slave, working_conf_path, confd);
+ if (ret) {
+ if (*is_template_in_use == _gf_false) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GET_CONFIG_INFO_FAILED,
+ "Unable to get configuration data "
+ "for %s(master), %s(slave). "
+ "Trying template config.",
+ master, slave);
+ working_conf_path = temp_conf_path;
+ *is_template_in_use = _gf_true;
+ goto fetch_data;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GET_CONFIG_INFO_FAILED,
+ "Unable to get configuration data "
+ "for %s(master), %s(slave) from "
+ "template config",
+ master, slave);
+ goto out;
}
- ret = 0;
-out:
- if (ret && (msg[0] != '\0')) {
- *op_errstr = gf_strdup (msg);
+ }
+
+ ret = dict_get_param(confd, "state_file", &buf);
+ if (ret) {
+ if (*is_template_in_use == _gf_false) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get state_file's name. "
+ "Trying template config.");
+ working_conf_path = temp_conf_path;
+ *is_template_in_use = _gf_true;
+ goto fetch_data;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GET_STATEFILE_NAME_FAILED,
+ "Unable to get state_file's "
+ "name from template.");
+ goto out;
}
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ }
+
+ ret = 0;
+out:
+ if (buf) {
+ *statefile = gf_strdup(buf);
+ if (!*statefile)
+ ret = -1;
+ }
+
+ if (confd)
+ dict_unref(confd);
+
+ gf_msg_debug(this->name, 0, "Returning %d ", ret);
+ return ret;
}
int
-glusterd_check_gsync_running (glusterd_volinfo_t *volinfo, gf_boolean_t *flag)
+glusterd_create_status_file(char *master, char *slave, char *slave_host,
+ char *slave_vol, char *status)
{
+ int ret = -1;
+ runner_t runner = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
- GF_ASSERT (volinfo);
- GF_ASSERT (flag);
+ this = THIS;
+ GF_ASSERT(this);
- if (volinfo->gsync_slaves->count)
- *flag = _gf_true;
- else
- *flag = _gf_false;
-
- return 0;
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_PRIV_NOT_FOUND,
+ "priv of glusterd not present");
+ goto out;
+ }
+
+ if (!status) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATUS_NULL, "Status Empty");
+ goto out;
+ }
+ gf_msg_debug(this->name, 0, "slave = %s", slave);
+
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "--create", status, "-c",
+ NULL);
+ runner_argprintf(&runner, "%s/" GEOREP "/%s_%s_%s/gsyncd.conf",
+ priv->workdir, master, slave_host, slave_vol);
+ runner_argprintf(&runner, "--iprefix=%s", DATADIR);
+ runner_argprintf(&runner, ":%s", master);
+ runner_add_args(&runner, slave, NULL);
+ synclock_unlock(&priv->big_lock);
+ ret = runner_run(&runner);
+ synclock_lock(&priv->big_lock);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATUSFILE_CREATE_FAILED,
+ "Creating status file failed.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug(this->name, 0, "returning %d", ret);
+ return ret;
}
static int
-glusterd_op_verify_gsync_running (glusterd_volinfo_t *volinfo,
- char *slave, char **op_errstr)
+glusterd_verify_slave(char *volname, char *slave_url, char *slave_vol,
+ int ssh_port, char **op_errstr,
+ gf_boolean_t *is_force_blocker)
{
- int ret = -1;
- char msg[2048] = {0};
- uuid_t uuid = {0};
-
- GF_ASSERT (THIS && THIS->private);
- GF_ASSERT (volinfo);
- GF_ASSERT (slave);
- GF_ASSERT (op_errstr);
+ int32_t ret = -1;
+ runner_t runner = {
+ 0,
+ };
+ char log_file_path[PATH_MAX] = "";
+ char buf[PATH_MAX] = "";
+ char *tmp = NULL;
+ char *slave_url_buf = NULL;
+ char *save_ptr = NULL;
+ char *slave_user = NULL;
+ char *slave_ip = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ char *af = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(volname);
+ GF_ASSERT(slave_url);
+ GF_ASSERT(slave_vol);
+
+ /* Fetch the slave_user and slave_ip from the slave_url.
+ * If the slave_user is not present. Use "root"
+ */
+ if (strstr(slave_url, "@")) {
+ slave_url_buf = gf_strdup(slave_url);
+ if (!slave_url_buf) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_STRDUP_FAILED,
+ "Slave_url=%s", slave_url, NULL);
+ goto out;
+ }
- if (GLUSTERD_STATUS_STARTED != volinfo->status) {
- snprintf (msg, sizeof (msg), "Volume %s needs to be started "
- "before "GEOREP" start", volinfo->volname);
+ slave_user = strtok_r(slave_url_buf, "@", &save_ptr);
+ slave_ip = strtok_r(NULL, "@", &save_ptr);
+ } else {
+ slave_user = "root";
+ slave_ip = slave_url;
+ }
+
+ if (!slave_user || !slave_ip) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_URL_INVALID,
+ "Invalid slave url.");
+ goto out;
+ }
+
+ snprintf(log_file_path, sizeof(log_file_path), "%s/create_verify_log",
+ priv->logdir);
+
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gverify.sh", NULL);
+ runner_argprintf(&runner, "%s", volname);
+ runner_argprintf(&runner, "%s", slave_user);
+ runner_argprintf(&runner, "%s", slave_ip);
+ runner_argprintf(&runner, "%s", slave_vol);
+ runner_argprintf(&runner, "%d", ssh_port);
+ runner_argprintf(&runner, "%s", log_file_path);
+ ret = dict_get_str(this->options, "transport.address-family", &af);
+ if (ret)
+ af = "-";
+
+ runner_argprintf(&runner, "%s", af);
+
+ gf_msg_debug(this->name, 0, "gverify Args = %s %s %s %s %s %s %s %s",
+ runner.argv[0], runner.argv[1], runner.argv[2], runner.argv[3],
+ runner.argv[4], runner.argv[5], runner.argv[6],
+ runner.argv[7]);
+ runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
+ synclock_unlock(&priv->big_lock);
+ ret = runner_run(&runner);
+ synclock_lock(&priv->big_lock);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_SLAVE,
+ "Not a valid slave");
+ ret = glusterd_gsync_read_frm_status(log_file_path, buf, sizeof(buf));
+ if (ret <= 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_READ_ERROR,
+ "Unable to read from %s", log_file_path);
+ goto out;
+ }
- goto out;
+ /* Tokenize the error message from gverify.sh to figure out
+ * if the error is a force blocker or not. */
+ tmp = strtok_r(buf, "|", &save_ptr);
+ if (!tmp) {
+ ret = -1;
+ goto out;
}
- ret = glusterd_gsync_get_uuid (slave, volinfo, uuid);
- if (ret == -1) {
- snprintf (msg, sizeof (msg), GEOREP" session between %s & %s"
- " not active", volinfo->volname, slave);
- goto out;
+ if (!strcmp(tmp, "FORCE_BLOCKER"))
+ *is_force_blocker = 1;
+ else {
+ /* No FORCE_BLOCKER flag present so all that is
+ * present is the error message. */
+ *is_force_blocker = 0;
+ *op_errstr = gf_strdup(tmp);
+ ret = -1;
+ goto out;
}
- ret = 0;
+ /* Copy rest of the error message to op_errstr */
+ tmp = strtok_r(NULL, "|", &save_ptr);
+ if (tmp)
+ *op_errstr = gf_strdup(tmp);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
out:
- if (ret && (msg[0] != '\0')) {
- *op_errstr = gf_strdup (msg);
- }
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ GF_FREE(slave_url_buf);
+ sys_unlink(log_file_path);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
-static int
-glusterd_verify_gsync_status_opts (dict_t *dict, char **op_errstr)
+/** @slave_ip remains unmodified */
+int
+glusterd_geo_rep_parse_slave(char *slave_url, char **hostname, char **op_errstr)
{
- char *slave = NULL;
- char *volname = NULL;
- char errmsg[PATH_MAX] = {0, };
- gf_boolean_t exists = _gf_false;
- glusterd_volinfo_t *volinfo = NULL;
- int ret = 0;
-
- ret = dict_get_str (dict, "master", &volname);
- if (ret < 0) {
- ret = 0;
+ int ret = -1;
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char *host = NULL;
+ char errmsg[PATH_MAX] = "";
+ char *saved_url = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(slave_url);
+ GF_ASSERT(*slave_url);
+
+ saved_url = gf_strdup(slave_url);
+ if (!saved_url)
+ goto out;
+
+ /* Checking if hostname has user specified */
+ host = strstr(saved_url, "@");
+ if (!host) { /* no user specified */
+ if (hostname) {
+ *hostname = gf_strdup(saved_url);
+ if (!*hostname)
goto out;
}
- exists = glusterd_check_volume_exists (volname);
- ret = glusterd_volinfo_find (volname, &volinfo);
- if ((ret) || (!exists)) {
- gf_log ("", GF_LOG_WARNING, "volume name does not exist");
- snprintf (errmsg, sizeof(errmsg), "Volume name %s does not"
- " exist", volname);
- *op_errstr = gf_strdup (errmsg);
- ret = -1;
- goto out;
+ ret = 0;
+ goto out;
+ } else {
+ /* Moving the host past the '@' and checking if the
+ * actual hostname also has '@' */
+ host++;
+ if (strstr(host, "@")) {
+ gf_msg_debug(this->name, 0, "host = %s", host);
+ ret = snprintf(errmsg, sizeof(errmsg) - 1, "Invalid Hostname (%s).",
+ host);
+ errmsg[ret] = '\0';
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s",
+ errmsg);
+ ret = -1;
+ if (op_errstr)
+ *op_errstr = gf_strdup(errmsg);
+ goto out;
}
- ret = dict_get_str (dict, "slave", &slave);
- if (ret < 0) {
- ret = 0;
+ ret = -1;
+
+ /**
+ * preliminary check for valid slave format.
+ */
+ tmp = strtok_r(saved_url, "@", &save_ptr);
+ tmp = strtok_r(NULL, "@", &save_ptr);
+ if (!tmp)
+ goto out;
+ if (hostname) {
+ *hostname = gf_strdup(tmp);
+ if (!*hostname)
goto out;
}
+ }
- out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
-
+ ret = 0;
+out:
+ GF_FREE(saved_url);
+ if (ret)
+ if (hostname)
+ GF_FREE(*hostname);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
-
-int
-glusterd_op_gsync_args_get (dict_t *dict, char **op_errstr,
- char **master, char **slave)
+/* Return -1 only if there is a match in volume uuid */
+static int
+get_slavehost_from_voluuid(dict_t *dict, char *key, data_t *value, void *data)
{
+ char *slave_info = NULL;
+ char *tmp = NULL;
+ char *slave_host = NULL;
+ xlator_t *this = NULL;
+ struct slave_vol_config *slave_vol = NULL;
+ int i = 0;
+ int ret = -1;
- int ret = -1;
- GF_ASSERT (dict);
- GF_ASSERT (op_errstr);
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
- if (master) {
- ret = dict_get_str (dict, "master", master);
- if (ret < 0) {
- gf_log ("", GF_LOG_WARNING, "master not found");
- *op_errstr = gf_strdup ("master not found");
- goto out;
- }
- }
+ slave_vol = data;
+ slave_info = value->data;
- if (slave) {
- ret = dict_get_str (dict, "slave", slave);
- if (ret < 0) {
- gf_log ("", GF_LOG_WARNING, "slave not found");
- *op_errstr = gf_strdup ("slave not found");
- goto out;
+ gf_msg_debug(this->name, 0, "slave_info:%s !", slave_info);
+
+ if (!(slave_info) || strlen(slave_info) == 0) {
+ /* no slaves present, peace */
+ ret = 0;
+ goto out;
+ }
+
+ /* slave format:
+ * master_node_uuid:ssh://slave_host::slave_vol:slave_voluuid */
+ while (i++ < 5) {
+ slave_info = strchr(slave_info, ':');
+ if (slave_info)
+ slave_info++;
+ else
+ break;
+ }
+
+ if (!(slave_info) || strlen(slave_info) == 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ "slave_info format is wrong!");
+ ret = -2;
+ goto out;
+ } else {
+ if (strcmp(slave_info, slave_vol->slave_voluuid) == 0) {
+ ret = -1;
+
+ /* get corresponding slave host for reference*/
+ slave_host = value->data;
+ slave_host = strstr(slave_host, "://");
+ if (slave_host) {
+ slave_host += 3;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ "Invalid slave_host format!");
+ ret = -2;
+ goto out;
+ }
+ /* To go past username in non-root geo-rep session */
+ tmp = strchr(slave_host, '@');
+ if (tmp) {
+ if ((tmp - slave_host) >= LOGIN_NAME_MAX) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ "Invalid slave user length in %s", slave_host);
+ ret = -2;
+ goto out;
}
+ strncpy(slave_vol->old_slvuser, slave_host, (tmp - slave_host));
+ slave_vol->old_slvuser[(tmp - slave_host) + 1] = '\0';
+ slave_host = tmp + 1;
+ } else
+ strcpy(slave_vol->old_slvuser, "root");
+
+ tmp = strchr(slave_host, ':');
+ if (!tmp) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ "Invalid slave_host!");
+ ret = -2;
+ goto out;
+ }
+
+ strncpy(slave_vol->old_slvhost, slave_host, (tmp - slave_host));
+ slave_vol->old_slvhost[(tmp - slave_host) + 1] = '\0';
+
+ goto out;
}
+ }
+ ret = 0;
+out:
+ return ret;
+}
- ret = 0;
+/* Given slave host and slave volume, check whether slave volume uuid
+ * already present.
+ * If slave volume uuid is present, get corresponding slave host
+ * for reference */
+static int
+glusterd_get_slavehost_from_voluuid(glusterd_volinfo_t *volinfo,
+ char *slave_host, char *slave_vol,
+ struct slave_vol_config *slave1)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
+
+ ret = dict_foreach(volinfo->gsync_slaves, get_slavehost_from_voluuid,
+ slave1);
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
}
int
-glusterd_op_stage_gsync_set (dict_t *dict, char **op_errstr)
+glusterd_op_stage_gsync_create(dict_t *dict, char **op_errstr)
{
- int ret = 0;
- int type = 0;
- char *volname = NULL;
- char *slave = NULL;
- gf_boolean_t exists = _gf_false;
- glusterd_volinfo_t *volinfo = NULL;
- char errmsg[PATH_MAX] = {0,};
- dict_t *ctx = NULL;
-
+ char *down_peerstr = NULL;
+ char *slave = NULL;
+ char *volname = NULL;
+ char *host_uuid = NULL;
+ char *statefile = NULL;
+ char *slave_url = NULL;
+ char *slave_host = NULL;
+ char *slave_vol = NULL;
+ char *conf_path = NULL;
+ char errmsg[PATH_MAX] = "";
+ char common_pem_file[PATH_MAX] = "";
+ char hook_script[PATH_MAX] = "";
+ char uuid_str[64] = "";
+ int ret = -1;
+ int is_pem_push = -1;
+ int ssh_port = 22;
+ gf_boolean_t is_force = -1;
+ gf_boolean_t is_no_verify = -1;
+ gf_boolean_t is_force_blocker = -1;
+ gf_boolean_t is_template_in_use = _gf_false;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ struct slave_vol_config slave1 = {
+ {0},
+ };
+ char old_slave_url[SLAVE_URL_INFO_MAX] = {0};
+ char old_confpath[PATH_MAX] = {0};
+ gf_boolean_t is_running = _gf_false;
+ char *statedir = NULL;
+ char statefiledir[PATH_MAX] = {
+ 0,
+ };
+ gf_boolean_t is_different_slavehost = _gf_false;
+ gf_boolean_t is_different_username = _gf_false;
+ char *slave_user = NULL;
+ char *save_ptr = NULL;
+ char *slave_url_buf = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ ret = glusterd_op_gsync_args_get(dict, op_errstr, &volname, &slave,
+ &host_uuid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_ARG_FETCH_ERROR,
+ "Unable to fetch arguments");
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return -1;
+ }
+
+ if (conf->op_version < 2) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION,
+ "Op Version not supported.");
+ snprintf(errmsg, sizeof(errmsg),
+ "One or more nodes do not"
+ " support the required op version.");
+ *op_errstr = gf_strdup(errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND,
+ "volume name does not exist");
+ snprintf(errmsg, sizeof(errmsg),
+ "Volume name %s does not"
+ " exist",
+ volname);
+ goto out;
+ }
+
+ ret = glusterd_get_slave_details_confpath(volinfo, dict, &slave_url,
+ &slave_host, &slave_vol,
+ &conf_path, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave or confpath details.");
+ ret = -1;
+ goto out;
+ }
+
+ is_force = dict_get_str_boolean(dict, "force", _gf_false);
+
+ uuid_utoa_r(MY_UUID, uuid_str);
+ if (!strcmp(uuid_str, host_uuid)) {
+ ret = glusterd_are_vol_all_peers_up(volinfo, &conf->peers,
+ &down_peerstr);
+ if ((ret == _gf_false) && !is_force) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Peer %s,"
+ " which is a part of %s volume, is"
+ " down. Please bring up the peer and"
+ " retry.",
+ down_peerstr, volinfo->volname);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_DISCONNECTED, "%s",
+ errmsg);
+ *op_errstr = gf_strdup(errmsg);
+ GF_FREE(down_peerstr);
+ down_peerstr = NULL;
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return -1;
+ } else if (ret == _gf_false) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_PEER_DISCONNECTED,
+ "Peer %s, which is a part of %s volume, is"
+ " down. Force creating geo-rep session."
+ " On bringing up the peer, re-run"
+ " \"gluster system:: execute"
+ " gsec_create\" and \"gluster volume"
+ " geo-replication %s %s create push-pem"
+ " force\"",
+ down_peerstr, volinfo->volname, volinfo->volname, slave);
+ GF_FREE(down_peerstr);
+ down_peerstr = NULL;
+ }
- ret = dict_get_int32 (dict, "type", &type);
- if (ret < 0) {
- gf_log ("", GF_LOG_WARNING, "command type not found");
- *op_errstr = gf_strdup ("command unsuccessful");
- goto out;
+ ret = dict_get_int32(dict, "ssh_port", &ssh_port);
+ if (ret < 0 && ret != -ENOENT) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Fetching ssh_port failed while "
+ "handling " GEOREP " options");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ errmsg);
+ goto out;
}
- switch (type) {
- case GF_GSYNC_OPTION_TYPE_STATUS:
- ret = glusterd_verify_gsync_status_opts (dict, op_errstr);
+ is_no_verify = dict_get_str_boolean(dict, "no_verify", _gf_false);
+
+ if (!is_no_verify) {
+ /* Checking if slave host is pingable, has proper passwordless
+ * ssh login setup, slave volume is created, slave vol is empty,
+ * and if it has enough memory and bypass in case of force if
+ * the error is not a force blocker */
+ ret = glusterd_verify_slave(volname, slave_url, slave_vol, ssh_port,
+ op_errstr, &is_force_blocker);
+ if (ret) {
+ if (is_force && !is_force_blocker) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_INVALID_SLAVE,
+ "%s is not a valid slave "
+ "volume. Error: %s. Force "
+ "creating geo-rep"
+ " session.",
+ slave, *op_errstr);
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_SLAVE,
+ "%s is not a valid slave "
+ "volume. Error: %s",
+ slave, *op_errstr);
+ ret = -1;
- goto out;
- case GF_GSYNC_OPTION_TYPE_CONFIG:
- ret = gsync_verify_config_options (dict, op_errstr);
+ goto out;
+ }
+ }
+ }
+ ret = dict_get_int32(dict, "push_pem", &is_pem_push);
+ if (!ret && is_pem_push) {
+ ret = snprintf(common_pem_file, sizeof(common_pem_file),
+ "%s" GLUSTERD_COMMON_PEM_PUB_FILE, conf->workdir);
+ if ((ret < 0) || (ret >= sizeof(common_pem_file))) {
+ ret = -1;
goto out;
+ }
- case GF_GSYNC_OPTION_TYPE_ROTATE:
- /* checks same as status mode */
- ret = glusterd_verify_gsync_status_opts(dict, op_errstr);
+ ret = snprintf(hook_script, sizeof(hook_script),
+ "%s" GLUSTERD_CREATE_HOOK_SCRIPT, conf->workdir);
+ if ((ret < 0) || (ret >= sizeof(hook_script))) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = sys_lstat(common_pem_file, &stbuf);
+ if (ret) {
+ len = snprintf(errmsg, sizeof(errmsg),
+ "%s"
+ " required for push-pem is"
+ " not present. Please run"
+ " \"gluster system:: execute"
+ " gsec_create\"",
+ common_pem_file);
+ if (len < 0) {
+ strcpy(errmsg, "<error>");
+ }
+ gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED,
+ "%s", errmsg);
+ *op_errstr = gf_strdup(errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = sys_lstat(hook_script, &stbuf);
+ if (ret) {
+ len = snprintf(errmsg, sizeof(errmsg),
+ "The hook-script (%s) "
+ "required for push-pem is not "
+ "present. Please install the "
+ "hook-script and retry",
+ hook_script);
+ if (len < 0) {
+ strcpy(errmsg, "<error>");
+ }
+ gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED,
+ "%s", errmsg);
+ *op_errstr = gf_strdup(errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ if (!S_ISREG(stbuf.st_mode)) {
+ len = snprintf(errmsg, sizeof(errmsg),
+ "%s"
+ " required for push-pem is"
+ " not a regular file. Please"
+ " run \"gluster system:: "
+ "execute gsec_create\"",
+ common_pem_file);
+ if (len < 0) {
+ strcpy(errmsg, "<error>");
+ }
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REG_FILE_MISSING,
+ "%s", errmsg);
+ ret = -1;
goto out;
+ }
+ }
+ }
+
+ ret = glusterd_get_statefile_name(volinfo, slave, conf_path, &statefile,
+ &is_template_in_use);
+ if (ret) {
+ if (!strstr(slave, "::"))
+ snprintf(errmsg, sizeof(errmsg), "%s is not a valid slave url.",
+ slave);
+ else
+ snprintf(errmsg, sizeof(errmsg),
+ "Please check gsync "
+ "config file. Unable to get statefile's name");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATEFILE_NAME_NOT_FOUND,
+ "%s", errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "statefile", statefile);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to store statefile path");
+ goto out;
+ }
+
+ if (snprintf(statefiledir, sizeof(statefiledir), "%s", statefile) >=
+ sizeof(statefiledir)) {
+ snprintf(errmsg, sizeof(errmsg), "Failed copying statefiledir");
+ goto out;
+ }
+ statedir = dirname(statefiledir);
+
+ ret = sys_lstat(statedir, &stbuf);
+ if (!ret && !is_force) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Session between %s"
+ " and %s is already created.",
+ volinfo->volname, slave);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SESSION_ALREADY_EXIST, "%s",
+ errmsg);
+ ret = -1;
+ goto out;
+ } else if (!ret)
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_FORCE_CREATE_SESSION,
+ "Session between %s and %s is already created. Force"
+ " creating again.",
+ volinfo->volname, slave);
+
+ ret = glusterd_get_slave_voluuid(slave_host, slave_vol,
+ slave1.slave_voluuid);
+ if ((ret) || (strlen(slave1.slave_voluuid) == 0)) {
+ snprintf(errmsg, sizeof(errmsg), "Unable to get remote volume uuid.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REMOTE_VOL_UUID_FAIL, "%s",
+ errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(dict, "slave_voluuid",
+ slave1.slave_voluuid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set slave volume uuid in the dict");
+ goto out;
+ }
+
+ /* Check whether session is already created using slave volume uuid */
+ ret = glusterd_get_slavehost_from_voluuid(volinfo, slave_host, slave_vol,
+ &slave1);
+ if (ret == -1) {
+ if (!is_force) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Session between %s"
+ " and %s:%s is already created! Cannot create "
+ "with new slave:%s again!",
+ volinfo->volname, slave1.old_slvhost, slave_vol,
+ slave_host);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FORCE_CREATE_SESSION,
+ "Session between"
+ " %s and %s:%s is already created! "
+ "Cannot create with new slave:%s again!",
+ volinfo->volname, slave1.old_slvhost, slave_vol, slave_host);
+ goto out;
}
- ret = glusterd_op_gsync_args_get (dict, op_errstr, &volname, &slave);
- if (ret)
+ /* There is a remote possibility that slave_host can be NULL when
+ control reaches here. Add a check so we wouldn't crash in next
+ line */
+ if (!slave_host)
+ goto out;
+
+ /* Now, check whether session is already started.If so, warn!*/
+ is_different_slavehost = (strcmp(slave_host, slave1.old_slvhost) != 0)
+ ? _gf_true
+ : _gf_false;
+
+ if (strstr(slave_url, "@")) {
+ slave_url_buf = gf_strdup(slave_url);
+ if (!slave_url_buf) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Unable to allocate memory");
+ ret = -1;
+ goto out;
+ }
+ slave_user = strtok_r(slave_url_buf, "@", &save_ptr);
+ } else
+ slave_user = "root";
+ is_different_username = (strcmp(slave_user, slave1.old_slvuser) != 0)
+ ? _gf_true
+ : _gf_false;
+
+ /* Do the check, only if different slave host/slave user */
+ if (is_different_slavehost || is_different_username) {
+ len = snprintf(old_confpath, sizeof(old_confpath),
+ "%s/" GEOREP "/%s_%s_%s/gsyncd.conf", conf->workdir,
+ volinfo->volname, slave1.old_slvhost, slave_vol);
+ if ((len < 0) || (len >= sizeof(old_confpath))) {
+ ret = -1;
goto out;
+ }
- exists = glusterd_check_volume_exists (volname);
- ret = glusterd_volinfo_find (volname, &volinfo);
- if ((ret) || (!exists)) {
- gf_log ("", GF_LOG_WARNING, "volume name does not exist");
- snprintf (errmsg, sizeof(errmsg), "Volume name %s does not"
- " exist", volname);
- *op_errstr = gf_strdup (errmsg);
+ /* construct old slave url with (old) slave host */
+ len = snprintf(old_slave_url, sizeof(old_slave_url), "%s::%s",
+ slave1.old_slvhost, slave_vol);
+ if ((len < 0) || (len >= sizeof(old_slave_url))) {
ret = -1;
goto out;
+ }
+
+ ret = glusterd_check_gsync_running_local(
+ volinfo->volname, old_slave_url, old_confpath, &is_running);
+ if (_gf_true == is_running) {
+ (void)snprintf(errmsg, sizeof(errmsg),
+ "Geo"
+ "-replication session between %s and %s"
+ " is still active. Please stop the "
+ "session and retry.",
+ volinfo->volname, old_slave_url);
+ ret = -1;
+ goto out;
+ }
}
- switch (type) {
- case GF_GSYNC_OPTION_TYPE_START:
- ret = glusterd_op_verify_gsync_start_options (volinfo, slave,
- op_errstr);
- if (ret)
- goto out;
- ctx = glusterd_op_get_ctx();
- if (ctx) {
- /*gsyncd does a fuse mount to start the geo-rep session*/
- if (!glusterd_is_fuse_available ()) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to open"
- " /dev/fuse (%s), geo-replication start"
- " failed", strerror (errno));
- snprintf (errmsg, sizeof(errmsg),
- "fuse unvailable");
- *op_errstr = gf_strdup (errmsg);
- ret = -1;
- goto out;
- }
- }
- break;
-
- case GF_GSYNC_OPTION_TYPE_STOP:
- ret = glusterd_op_verify_gsync_running (volinfo, slave,
- op_errstr);
- break;
+ ret = dict_set_dynstr_with_alloc(dict, "old_slavehost",
+ slave1.old_slvhost);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set old_slavehost in the dict");
+ goto out;
}
+ ret = dict_set_int32(dict, "existing_session", _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set existing_session in the dict");
+ goto out;
+ }
+ } else if (ret == -2) {
+ snprintf(errmsg, sizeof(errmsg),
+ "get_slavehost_from_voluuid"
+ " failed for %s::%s. Please check the glusterd logs.",
+ slave_host, slave_vol);
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_FORCE_CREATE_SESSION,
+ "get_slavehost_from_voluuid failed %s %s!!", slave_host,
+ slave_vol);
+ goto out;
+ }
+
+ ret = glusterd_verify_gsyncd_spawn(volinfo->volname, slave);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg), "Unable to spawn gsyncd.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_SPAWN_FAILED, "%s",
+ errmsg);
+ goto out;
+ }
+
+ ret = 0;
out:
- return ret;
+
+ if (ret && errmsg[0] != '\0')
+ *op_errstr = gf_strdup(errmsg);
+
+ if (slave_url_buf)
+ GF_FREE(slave_url_buf);
+
+ return ret;
}
+/* pre-condition check for geo-rep pause/resume.
+ * Return: 0 on success
+ * -1 on any check failed.
+ */
static int
-stop_gsync (char *master, char *slave, char **msg)
-{
- int32_t ret = 0;
- int pfd = -1;
- pid_t pid = 0;
- char pidfile[PATH_MAX] = {0,};
- char buf [1024] = {0,};
- int i = 0;
-
- GF_ASSERT (THIS);
- GF_ASSERT (THIS->private);
-
- pfd = gsyncd_getpidfile (master, slave, pidfile);
- if (pfd == -2) {
- gf_log ("", GF_LOG_ERROR, GEOREP" stop validation "
- " failed for %s & %s", master, slave);
- ret = -1;
+gd_pause_resume_validation(int type, glusterd_volinfo_t *volinfo, char *slave,
+ char *statefile, char **op_errstr)
+{
+ int ret = 0;
+ char errmsg[PATH_MAX] = {
+ 0,
+ };
+ char monitor_status[NAME_MAX] = {
+ 0,
+ };
+
+ GF_ASSERT(volinfo);
+ GF_ASSERT(slave);
+ GF_ASSERT(statefile);
+ GF_ASSERT(op_errstr);
+
+ ret = glusterd_gsync_read_frm_status(statefile, monitor_status,
+ sizeof(monitor_status));
+ if (ret <= 0) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Pause check Failed:"
+ " Geo-rep session is not setup");
+ ret = -1;
+ goto out;
+ }
+
+ if (type == GF_GSYNC_OPTION_TYPE_PAUSE &&
+ strstr(monitor_status, "Paused")) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Geo-replication"
+ " session between %s and %s already Paused.",
+ volinfo->volname, slave);
+ ret = -1;
+ goto out;
+ }
+ if (type == GF_GSYNC_OPTION_TYPE_RESUME &&
+ !strstr(monitor_status, "Paused")) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Geo-replication"
+ " session between %s and %s is not Paused.",
+ volinfo->volname, slave);
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
+out:
+ if (ret && (errmsg[0] != '\0')) {
+ *op_errstr = gf_strdup(errmsg);
+ }
+ return ret;
+}
+
+int
+glusterd_op_stage_gsync_set(dict_t *dict, char **op_errstr)
+{
+ int ret = 0;
+ int type = 0;
+ char *volname = NULL;
+ char *slave = NULL;
+ char *slave_url = NULL;
+ char *slave_host = NULL;
+ char *slave_vol = NULL;
+ char *down_peerstr = NULL;
+ char *statefile = NULL;
+ char statefiledir[PATH_MAX] = {
+ 0,
+ };
+ char *statedir = NULL;
+ char *path_list = NULL;
+ char *conf_path = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char errmsg[PATH_MAX] = {
+ 0,
+ };
+ dict_t *ctx = NULL;
+ gf_boolean_t is_force = 0;
+ gf_boolean_t is_running = _gf_false;
+ gf_boolean_t is_template_in_use = _gf_false;
+ uuid_t uuid = {0};
+ char uuid_str[64] = {0};
+ char *host_uuid = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ ret = dict_get_int32(dict, "type", &type);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "command type not found");
+ *op_errstr = gf_strdup("command unsuccessful");
+ goto out;
+ }
+
+ if (type == GF_GSYNC_OPTION_TYPE_STATUS) {
+ ret = glusterd_verify_gsync_status_opts(dict, op_errstr);
+ goto out;
+ }
+
+ ret = glusterd_op_gsync_args_get(dict, op_errstr, &volname, &slave,
+ &host_uuid);
+ if (ret)
+ goto out;
+
+ uuid_utoa_r(MY_UUID, uuid_str);
+
+ if (conf->op_version < 2) {
+ snprintf(errmsg, sizeof(errmsg),
+ "One or more nodes do not"
+ " support the required op version.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Volume name %s does not"
+ " exist",
+ volname);
+ goto out;
+ }
+
+ ret = glusterd_get_slave_details_confpath(volinfo, dict, &slave_url,
+ &slave_host, &slave_vol,
+ &conf_path, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave or confpath details.");
+ ret = -1;
+ goto out;
+ }
+
+ is_force = dict_get_str_boolean(dict, "force", _gf_false);
+
+ ret = glusterd_get_statefile_name(volinfo, slave, conf_path, &statefile,
+ &is_template_in_use);
+ if (ret) {
+ if (!strstr(slave, "::")) {
+ snprintf(errmsg, sizeof(errmsg), "%s is not a valid slave url.",
+ slave);
+ ret = -1;
+ goto out;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_URL_INVALID,
+ "state_file entry missing in config file (%s)", conf_path);
+
+ if ((type == GF_GSYNC_OPTION_TYPE_STOP) && is_force) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_STOP_FORCE,
+ "Allowing stop "
+ "force to bypass missing statefile "
+ "entry in config file (%s), and "
+ "template file",
+ conf_path);
+ ret = 0;
+ } else
goto out;
}
- if (gsync_status_byfd (pfd) == -1) {
- gf_log ("", GF_LOG_ERROR, "gsyncd b/w %s & %s is not"
- " running", master, slave);
- if (msg)
- *msg = gf_strdup ("Warning: "GEOREP" session was "
- "defunct at stop time");
- /* monitor gsyncd already dead */
+ } else {
+ ret = dict_set_str(dict, "statefile", statefile);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to store statefile path");
+ goto out;
+ }
+ }
+
+ /* Allowing stop force to bypass the statefile check
+ * as this command acts as a fail safe method to stop geo-rep
+ * session. */
+ if (!((type == GF_GSYNC_OPTION_TYPE_STOP) && is_force)) {
+ /* check session directory as statefile may not present
+ * during upgrade */
+ if (snprintf(statefiledir, sizeof(statefiledir), "%s", statefile) >=
+ sizeof(statefiledir)) {
+ snprintf(errmsg, sizeof(errmsg), "Failed copying statefiledir");
+ ret = -1;
+ goto out;
+ }
+ statedir = dirname(statefiledir);
+
+ ret = sys_lstat(statedir, &stbuf);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Geo-replication"
+ " session between %s and %s does not exist.",
+ volinfo->volname, slave);
+ gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED,
+ "%s. statefile = %s", errmsg, statefile);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* Check if all peers that are a part of the volume are up or not */
+ if ((type == GF_GSYNC_OPTION_TYPE_DELETE) ||
+ ((type == GF_GSYNC_OPTION_TYPE_STOP) && !is_force) ||
+ (type == GF_GSYNC_OPTION_TYPE_PAUSE) ||
+ (type == GF_GSYNC_OPTION_TYPE_RESUME)) {
+ if (!strcmp(uuid_str, host_uuid)) {
+ ret = glusterd_are_vol_all_peers_up(volinfo, &conf->peers,
+ &down_peerstr);
+ if (ret == _gf_false) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Peer %s,"
+ " which is a part of %s volume, is"
+ " down. Please bring up the peer and"
+ " retry.",
+ down_peerstr, volinfo->volname);
+ ret = -1;
+ GF_FREE(down_peerstr);
+ down_peerstr = NULL;
goto out;
+ }
}
+ }
- if (pfd < 0)
+ switch (type) {
+ case GF_GSYNC_OPTION_TYPE_START:
+ if (is_template_in_use) {
+ snprintf(errmsg, sizeof(errmsg),
+ "state-file entry "
+ "missing in the config file(%s).",
+ conf_path);
+ ret = -1;
goto out;
+ }
- ret = read (pfd, buf, 1024);
- if (ret > 0) {
- pid = strtol (buf, NULL, 10);
- ret = kill (-pid, SIGTERM);
- if (ret) {
- gf_log ("", GF_LOG_WARNING,
- "failed to kill gsyncd");
- goto out;
- }
- for (i = 0; i < 20; i++) {
- if (gsync_status_byfd (pfd) == -1) {
- /* monitor gsyncd is dead but worker may
- * still be alive, give some more time
- * before SIGKILL (hack)
- */
- usleep (50000);
- break;
- }
- usleep (50000);
+ ret = glusterd_op_verify_gsync_start_options(
+ volinfo, slave, conf_path, statefile, op_errstr, is_force);
+ if (ret)
+ goto out;
+ ctx = glusterd_op_get_ctx();
+ if (ctx) {
+ /* gsyncd does a fuse mount to start
+ * the geo-rep session */
+ if (!glusterd_is_fuse_available()) {
+ gf_msg("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_GEO_REP_START_FAILED,
+ "Unable "
+ "to open /dev/fuse (%s), "
+ "geo-replication start failed",
+ strerror(errno));
+ snprintf(errmsg, sizeof(errmsg), "fuse unavailable");
+ ret = -1;
+ goto out;
}
- kill (-pid, SIGKILL);
- unlink (pidfile);
- }
- ret = 0;
+ }
+ break;
-out:
- sys_close (pfd);
- return ret;
-}
+ case GF_GSYNC_OPTION_TYPE_STOP:
+ if (!is_force) {
+ if (is_template_in_use) {
+ snprintf(errmsg, sizeof(errmsg),
+ "state-file entry missing in "
+ "the config file(%s).",
+ conf_path);
+ ret = -1;
+ goto out;
+ }
-static int
-glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave,
- dict_t *resp_dict);
+ ret = glusterd_op_verify_gsync_running(volinfo, slave,
+ conf_path, op_errstr);
+ if (ret) {
+ ret = glusterd_get_local_brickpaths(volinfo, &path_list);
+ if (!path_list && ret == -1)
+ goto out;
+ }
-static int
-glusterd_gsync_configure (glusterd_volinfo_t *volinfo, char *slave,
- dict_t *dict, dict_t *resp_dict, char **op_errstr)
-{
- int32_t ret = -1;
- char *op_name = NULL;
- char *op_value = NULL;
- runner_t runner = {0,};
- glusterd_conf_t *priv = NULL;
- char *subop = NULL;
- char *master = NULL;
-
- GF_ASSERT (slave);
- GF_ASSERT (op_errstr);
- GF_ASSERT (dict);
- GF_ASSERT (resp_dict);
-
- ret = dict_get_str (dict, "subop", &subop);
- if (ret != 0)
+ /* Check for geo-rep session is active or not for
+ * configured user.*/
+ ret = glusterd_gsync_get_uuid(slave, volinfo, uuid);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Geo-replication session between %s "
+ "and %s does not exist.",
+ volinfo->volname, slave);
+ ret = -1;
+ goto out;
+ }
+ }
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_PAUSE:
+ case GF_GSYNC_OPTION_TYPE_RESUME:
+ if (is_template_in_use) {
+ snprintf(errmsg, sizeof(errmsg),
+ "state-file entry missing in "
+ "the config file(%s).",
+ conf_path);
+ ret = -1;
goto out;
-
- if (strcmp (subop, "get") == 0 || strcmp (subop, "get-all") == 0) {
- /* deferred to cli */
- gf_log ("", GF_LOG_DEBUG, "Returning 0");
- return 0;
- }
-
- ret = dict_get_str (dict, "op_name", &op_name);
- if (ret != 0)
+ }
+
+ ret = glusterd_op_verify_gsync_running(volinfo, slave, conf_path,
+ op_errstr);
+ if (ret) {
+ ret = glusterd_get_local_brickpaths(volinfo, &path_list);
+ if (!path_list && ret == -1)
+ goto out;
+ }
+
+ /* Check for geo-rep session is active or not
+ * for configured user.*/
+ ret = glusterd_gsync_get_uuid(slave, volinfo, uuid);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Geo-replication"
+ " session between %s and %s does not exist.",
+ volinfo->volname, slave);
+ ret = -1;
goto out;
+ }
- if (strtail (subop, "set")) {
- ret = dict_get_str (dict, "op_value", &op_value);
- if (ret != 0)
+ if (!is_force) {
+ ret = gd_pause_resume_validation(type, volinfo, slave,
+ statefile, op_errstr);
+ if (ret) {
+ ret = glusterd_get_local_brickpaths(volinfo, &path_list);
+ if (!path_list && ret == -1)
goto out;
- }
+ }
+ }
+ break;
- if (THIS)
- priv = THIS->private;
- if (priv == NULL) {
- gf_log ("", GF_LOG_ERROR, "priv of glusterd not present");
- *op_errstr = gf_strdup ("glusterd defunct");
+ case GF_GSYNC_OPTION_TYPE_CONFIG:
+ if (is_template_in_use) {
+ snprintf(errmsg, sizeof(errmsg),
+ "state-file entry "
+ "missing in the config file(%s).",
+ conf_path);
+ ret = -1;
goto out;
- }
+ }
+
+ ret = gsync_verify_config_options(dict, op_errstr, volname);
+ goto out;
+ break;
+
+ case GF_GSYNC_OPTION_TYPE_DELETE:
+ /* Check if the gsync session is still running
+ * If so ask the user to stop geo-replication first.*/
+ if (is_template_in_use) {
+ snprintf(errmsg, sizeof(errmsg),
+ "state-file entry "
+ "missing in the config file(%s).",
+ conf_path);
+ ret = -1;
+ goto out;
+ }
- master = "";
- runinit (&runner);
- runner_add_args (&runner, GSYNCD_PREFIX"/gsyncd", "-c", NULL);
- runner_argprintf (&runner, "%s/"GSYNC_CONF, priv->workdir);
- if (volinfo) {
- master = volinfo->volname;
- runner_argprintf (&runner, ":%s", master);
- }
- runner_add_arg (&runner, slave);
- runner_argprintf (&runner, "--config-%s", subop);
- runner_add_arg (&runner, op_name);
- if (op_value)
- runner_add_arg (&runner, op_value);
- ret = runner_run (&runner);
- if (ret) {
- gf_log ("", GF_LOG_WARNING, "gsyncd failed to "
- "%s %s option for %s %s peers",
- subop, op_name, master, slave);
+ ret = glusterd_gsync_get_uuid(slave, volinfo, uuid);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Geo-replication"
+ " session between %s and %s does not exist.",
+ volinfo->volname, slave);
+ ret = -1;
+ goto out;
+ } else {
+ ret = glusterd_check_gsync_running_local(
+ volinfo->volname, slave, conf_path, &is_running);
+ if (_gf_true == is_running) {
+ snprintf(errmsg, sizeof(errmsg),
+ GEOREP
+ " session between %s & %s is "
+ "still active. Please stop the "
+ "session and retry.",
+ volinfo->volname, slave);
+ ret = -1;
+ goto out;
+ }
+ }
- gf_asprintf (op_errstr, GEOREP" config-%s failed for %s %s",
- subop, master, slave);
+ ret = glusterd_verify_gsyncd_spawn(volinfo->volname, slave);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg), "Unable to spawn gsyncd");
+ }
- goto out;
- }
- ret = 0;
- gf_asprintf (op_errstr, "config-%s successful", subop);
+ break;
+ }
out:
- if (!ret && volinfo) {
- ret = glusterd_check_restart_gsync_session (volinfo, slave,
- resp_dict);
- if (ret)
- *op_errstr = gf_strdup ("internal error");
- }
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
-}
+ if (path_list)
+ GF_FREE(path_list);
-static int
-glusterd_gsync_read_frm_status (char *path, char *buf, size_t blen)
-{
- int ret = 0;
- int status_fd = -1;
- char *p = NULL;
-
- GF_ASSERT (path);
- GF_ASSERT (buf);
- status_fd = open (path, O_RDONLY);
- if (status_fd == -1) {
- gf_log ("", GF_LOG_ERROR, "Unable to read gsyncd status"
- " file");
- return -1;
- }
- ret = read (status_fd, buf, blen - 1);
- if (ret > 0) {
- p = buf + strlen (buf) - 1;
- while (isspace (*p))
- *p-- = '\0';
- ret = 0;
- } else if (ret < 0)
- gf_log ("", GF_LOG_ERROR, "Status file of gsyncd is corrupt");
+ if (ret && errmsg[0] != '\0') {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR, "%s", errmsg);
+ *op_errstr = gf_strdup(errmsg);
+ }
- close (status_fd);
- return ret;
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
static int
-glusterd_gsync_fetch_status_extra (char *path, char *buf, size_t blen)
+gd_pause_or_resume_gsync(dict_t *dict, char *master, char *slave,
+ char *slave_host, char *slave_vol, char *conf_path,
+ char **op_errstr, gf_boolean_t is_pause)
{
- char sockpath[PATH_MAX] = {0,};
- struct sockaddr_un sa = {0,};
- size_t l = 0;
- int s = -1;
- struct pollfd pfd = {0,};
- int ret = 0;
-
- l = strlen (buf);
- /* seek to end of data in buf */
- buf += l;
- blen -= l;
-
- glusterd_set_socket_filepath (path, sockpath, sizeof (sockpath));
-
- strncpy(sa.sun_path, sockpath, sizeof(sa.sun_path));
- if (sa.sun_path[sizeof (sa.sun_path) - 1])
- return -1;
- sa.sun_family = AF_UNIX;
-
- s = socket(AF_UNIX, SOCK_STREAM, 0);
- if (s == -1)
- return -1;
+ int32_t ret = 0;
+ int pfd = -1;
+ long pid = 0;
+ char pidfile[PATH_MAX] = {
+ 0,
+ };
+ char errmsg[PATH_MAX] = "";
+ char buf[4096] = {
+ 0,
+ };
+ gf_boolean_t is_template_in_use = _gf_false;
+ char monitor_status[NAME_MAX] = {
+ 0,
+ };
+ char *statefile = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(master);
+ GF_ASSERT(slave);
+ GF_ASSERT(slave_host);
+ GF_ASSERT(slave_vol);
+ GF_ASSERT(conf_path);
+
+ pfd = gsyncd_getpidfile(master, slave, pidfile, conf_path,
+ &is_template_in_use);
+ if (pfd == -2) {
+ snprintf(errmsg, sizeof(errmsg),
+ "pid-file entry mising in config file and "
+ "template config file.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PIDFILE_NOT_FOUND, "%s",
+ errmsg);
+ *op_errstr = gf_strdup(errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ if (gsync_status_byfd(pfd) == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR,
+ "gsyncd b/w %s & %s is not running", master, slave);
+ /* monitor gsyncd already dead */
+ goto out;
+ }
+
+ if (pfd < 0)
+ goto out;
+
+ /* Prepare to update status file*/
+ ret = dict_get_str(dict, "statefile", &statefile);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Pause/Resume Failed: Unable to fetch statefile path");
+ goto out;
+ }
+ ret = glusterd_gsync_read_frm_status(statefile, monitor_status,
+ sizeof(monitor_status));
+ if (ret <= 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STAT_FILE_READ_FAILED,
+ "Pause/Resume Failed: "
+ "Unable to read status file for %s(master)"
+ " %s(slave)",
+ master, slave);
+ goto out;
+ }
+
+ ret = sys_read(pfd, buf, sizeof(buf) - 1);
+ if (ret > 0) {
+ buf[ret] = '\0';
+ pid = strtol(buf, NULL, 10);
+ if (is_pause) {
+ ret = kill(-pid, SIGSTOP);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_PID_KILL_FAIL,
+ "Failed"
+ " to pause gsyncd. Error: %s",
+ strerror(errno));
+ goto out;
+ }
+ /*On pause force, if status is already paused
+ do not update status again*/
+ if (strstr(monitor_status, "Paused"))
+ goto out;
- ret = connect (s, (struct sockaddr *)&sa, sizeof (sa));
- if (ret == -1)
+ ret = glusterd_create_status_file(master, slave, slave_host,
+ slave_vol, "Paused");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UPDATE_STATEFILE_FAILED,
+ "Unable to update state_file."
+ " Error : %s",
+ strerror(errno));
+ /* If status cannot be updated resume back */
+ if (kill(-pid, SIGCONT)) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Pause successful but could "
+ "not update status file. "
+ "Please use 'resume force' to"
+ " resume back and retry pause"
+ " to reflect in status");
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_PID_KILL_FAIL,
+ "Resume back Failed. Error:"
+ "%s",
+ strerror(errno));
+ *op_errstr = gf_strdup(errmsg);
+ }
goto out;
- pfd.fd = s;
- pfd.events = POLLIN;
- /* we don't want to hang on gsyncd */
- if (poll (&pfd, 1, 5000) < 1 ||
- !(pfd.revents & POLLIN)) {
- ret = -1;
+ }
+ } else {
+ ret = glusterd_create_status_file(master, slave, slave_host,
+ slave_vol, "Started");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UPDATE_STATEFILE_FAILED,
+ "Resume Failed: Unable to update "
+ "state_file. Error : %s",
+ strerror(errno));
+ goto out;
+ }
+ ret = kill(-pid, SIGCONT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_PID_KILL_FAIL,
+ "Resumed Failed: Unable to send"
+ " SIGCONT. Error: %s",
+ strerror(errno));
+ /* Process can't be resumed, update status
+ * back to paused. */
+ ret = glusterd_create_status_file(master, slave, slave_host,
+ slave_vol, monitor_status);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Resume failed!!! Status "
+ "inconsistent. Please use "
+ "'resume force' to resume and"
+ " reach consistent state");
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STATUS_UPDATE_FAILED,
+ "Updating status back to paused"
+ " Failed. Error: %s",
+ strerror(errno));
+ *op_errstr = gf_strdup(errmsg);
+ }
goto out;
+ }
}
- ret = read(s, buf, blen);
- /* we expect a terminating 0 byte */
- if (ret == 0 || (ret > 0 && buf[ret - 1]))
- ret = -1;
- if (ret > 0)
- ret = 0;
+ }
+ ret = 0;
- out:
- close (s);
- return ret;
+out:
+ sys_close(pfd);
+ /* coverity[INTEGER_OVERFLOW] */
+ return ret;
}
static int
-dict_get_param (dict_t *dict, char *key, char **param)
+stop_gsync(char *master, char *slave, char **msg, char *conf_path,
+ char **op_errstr, gf_boolean_t is_force)
{
- char *dk = NULL;
- char *s = NULL;
- char x = '\0';
- int ret = 0;
+ int32_t ret = 0;
+ int pfd = -1;
+ long pid = 0;
+ char pidfile[PATH_MAX] = {
+ 0,
+ };
+ char errmsg[PATH_MAX] = "";
+ char buf[4096] = {
+ 0,
+ };
+ int i = 0;
+ gf_boolean_t is_template_in_use = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(this->private);
+
+ pfd = gsyncd_getpidfile(master, slave, pidfile, conf_path,
+ &is_template_in_use);
+ if (pfd == -2) {
+ snprintf(errmsg, sizeof(errmsg) - 1,
+ "pid-file entry mising in config file and "
+ "template config file.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PIDFILE_NOT_FOUND, "%s",
+ errmsg);
+ *op_errstr = gf_strdup(errmsg);
+ ret = -1;
+ goto out;
+ }
+ if (gsync_status_byfd(pfd) == -1 && !is_force) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR,
+ "gsyncd b/w %s & %s is not running", master, slave);
+ /* monitor gsyncd already dead */
+ goto out;
+ }
+
+ if (pfd < 0)
+ goto out;
+
+ ret = sys_read(pfd, buf, sizeof(buf) - 1);
+ if (ret > 0) {
+ buf[ret] = '\0';
+ pid = strtol(buf, NULL, 10);
+ ret = kill(-pid, SIGTERM);
+ if (ret && !is_force) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_PID_KILL_FAIL,
+ "failed to kill gsyncd");
+ goto out;
+ }
+ for (i = 0; i < 20; i++) {
+ if (gsync_status_byfd(pfd) == -1) {
+ /* monitor gsyncd is dead but worker may
+ * still be alive, give some more time
+ * before SIGKILL (hack)
+ */
+ gf_nanosleep(50000 * GF_US_IN_NS);
+ break;
+ }
+ gf_nanosleep(50000 * GF_US_IN_NS);
+ }
+ kill(-pid, SIGKILL);
+ sys_unlink(pidfile);
+ }
+ ret = 0;
- if (dict_get_str (dict, key, param) == 0)
- return 0;
+out:
+ sys_close(pfd);
+ /* coverity[INTEGER_OVERFLOW] */
+ return ret;
+}
- dk = gf_strdup (key);
- if (!key)
- return -1;
+/*
+ * glusterd_gsync_op_already_set:
+ * This function checks whether the op_value is same as in the
+ * gsyncd.conf file.
+ *
+ * RETURN VALUE:
+ * 0 : op_value matches the conf file.
+ * 1 : op_value does not matches the conf file or op_param not
+ * found in conf file.
+ * -1 : error
+ */
- s = strpbrk (dk, "-_");
- if (!s)
- return -1;
- x = (*s == '-') ? '_' : '-';
- *s++ = x;
- while ((s = strpbrk (s, "-_")))
- *s++ = x;
+int
+glusterd_gsync_op_already_set(char *master, char *slave, char *conf_path,
+ char *op_name, char *op_value)
+{
+ dict_t *confd = NULL;
+ char *op_val_buf = NULL;
+ int32_t op_val_conf = 0;
+ int32_t op_val_cli = 0;
+ int32_t ret = -1;
+ gf_boolean_t is_bool = _gf_true;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ confd = dict_new();
+ if (!confd) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Not able to create dict.");
+ return -1;
+ }
+
+ ret = glusterd_gsync_get_config(master, slave, conf_path, confd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GET_CONFIG_INFO_FAILED,
+ "Unable to get configuration data for %s(master), "
+ "%s(slave)",
+ master, slave);
+ goto out;
+ }
+
+ ret = dict_get_param(confd, op_name, &op_val_buf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get op_value for %s(master), %s(slave). "
+ "Please check gsync config file.",
+ master, slave);
+ ret = 1;
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0, "val_cli:%s val_conf:%s", op_value,
+ op_val_buf);
+
+ if (!strcmp(op_val_buf, "true") || !strcmp(op_val_buf, "1") ||
+ !strcmp(op_val_buf, "yes")) {
+ op_val_conf = 1;
+ } else if (!strcmp(op_val_buf, "false") || !strcmp(op_val_buf, "0") ||
+ !strcmp(op_val_buf, "no")) {
+ op_val_conf = 0;
+ } else {
+ is_bool = _gf_false;
+ }
+
+ if (is_bool) {
+ if (op_value && (!strcmp(op_value, "true") || !strcmp(op_value, "1") ||
+ !strcmp(op_value, "yes"))) {
+ op_val_cli = 1;
+ } else {
+ op_val_cli = 0;
+ }
+
+ if (op_val_cli == op_val_conf) {
+ ret = 0;
+ goto out;
+ }
+ } else {
+ if (op_value && !strcmp(op_val_buf, op_value)) {
+ ret = 0;
+ goto out;
+ }
+ }
- ret = dict_get_str (dict, dk, param);
+ ret = 1;
- GF_FREE (dk);
- return ret;
+out:
+ dict_unref(confd);
+ return ret;
}
static int
-glusterd_read_status_file (char *master, char *slave,
- dict_t *dict)
-{
- glusterd_conf_t *priv = NULL;
- int ret = 0;
- char *statefile = NULL;
- char buf[1024] = {0, };
- char mst[1024] = {0, };
- char slv[1024] = {0, };
- char sts[1024] = {0, };
- char *bufp = NULL;
- dict_t *confd = NULL;
- int gsync_count = 0;
- int status = 0;
-
- GF_ASSERT (THIS);
- GF_ASSERT (THIS->private);
-
- confd = dict_new ();
- if (!dict)
- return -1;
+glusterd_gsync_configure(glusterd_volinfo_t *volinfo, char *slave,
+ char *path_list, dict_t *dict, dict_t *resp_dict,
+ char **op_errstr)
+{
+ int32_t ret = -1;
+ char *op_name = NULL;
+ char *op_value = NULL;
+ runner_t runner = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ char *subop = NULL;
+ char *master = NULL;
+ char *conf_path = NULL;
+ char *slave_host = NULL;
+ char *slave_vol = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ gf_boolean_t restart_required = _gf_true;
+ char **resopt = NULL;
+ gf_boolean_t op_already_set = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(slave);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(dict);
+ GF_ASSERT(resp_dict);
+
+ ret = dict_get_str(dict, "subop", &subop);
+ if (ret != 0)
+ goto out;
+
+ if (strcmp(subop, "get") == 0 || strcmp(subop, "get-all") == 0) {
+ /* deferred to cli */
+ gf_msg_debug(this->name, 0, "Returning 0");
+ return 0;
+ }
- priv = THIS->private;
- ret = glusterd_gsync_get_config (master, slave, priv->workdir,
- confd);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get configuration data"
- "for %s(master), %s(slave)", master, slave);
- goto out;
+ ret = dict_get_str(dict, "op_name", &op_name);
+ if (ret != 0)
+ goto out;
+ if (strtail(subop, "set")) {
+ ret = dict_get_str(dict, "op_value", &op_value);
+ if (ret != 0)
+ goto out;
+ }
+
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_PRIV_NOT_FOUND,
+ "priv of glusterd not present");
+ *op_errstr = gf_strdup("glusterd defunct");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "conf_path", &conf_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch conf file path.");
+ goto out;
+ }
+
+ master = "";
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
+ runner_argprintf(&runner, "%s", conf_path);
+ runner_argprintf(&runner, "--iprefix=%s", DATADIR);
+ if (volinfo) {
+ master = volinfo->volname;
+ runner_argprintf(&runner, ":%s", master);
+ }
+ runner_add_arg(&runner, slave);
+ runner_argprintf(&runner, "--config-%s", subop);
+ runner_add_arg(&runner, op_name);
+ if (op_value) {
+ runner_argprintf(&runner, "--value=%s", op_value);
+ }
+
+ if (strcmp(op_name, "checkpoint") != 0 && strtail(subop, "set")) {
+ ret = glusterd_gsync_op_already_set(master, slave, conf_path, op_name,
+ op_value);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GSYNCD_OP_SET_FAILED,
+ "glusterd_gsync_op_already_set failed.");
+ gf_asprintf(op_errstr,
+ GEOREP
+ " config-%s failed for "
+ "%s %s",
+ subop, master, slave);
+ goto out;
}
-
- ret = gsync_status (master, slave, &status);
- if (ret == 0 && status == -1) {
- strncpy (buf, "defunct", sizeof (buf));
- goto done;
- } else if (ret == -1)
- goto out;
-
- ret = dict_get_param (confd, "state_file", &statefile);
- if (ret)
- goto out;
- ret = glusterd_gsync_read_frm_status (statefile, buf, sizeof (buf));
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to read the status"
- "file for %s(master), %s(slave)", master, slave);
- strncpy (buf, "defunct", sizeof (buf));
- goto done;
+ if (ret == 0) {
+ gf_msg_debug(this->name, 0, "op_value is already set");
+ op_already_set = _gf_true;
+ goto out;
}
- if (strcmp (buf, "OK") != 0)
- goto done;
+ }
- ret = dict_get_param (confd, "state_socket_unencoded", &statefile);
- if (ret)
- goto out;
- ret = glusterd_gsync_fetch_status_extra (statefile, buf, sizeof (buf));
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to fetch extra status"
- "for %s(master), %s(slave)", master, slave);
- /* there is a slight chance that this occurs due to race
- * -- in that case, the following options all seem bad:
- *
- * - suppress irregurlar behavior by just leaving status
- * on "OK"
- * - freak out users with a misleading "defunct"
- * - overload the meaning of the regular error signal
- * mechanism of gsyncd, that is, when status is "faulty"
- *
- * -- so we just come up with something new...
- */
- strncpy (buf, "N/A", sizeof (buf));
- goto done;
- }
+ synclock_unlock(&priv->big_lock);
+ ret = runner_run(&runner);
+ synclock_lock(&priv->big_lock);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GSYNCD_ERROR,
+ "gsyncd failed to %s %s option for "
+ "%s %s peers",
+ subop, op_name, master, slave);
- done:
- ret = dict_get_int32 (dict, "gsync-count", &gsync_count);
+ gf_asprintf(op_errstr, GEOREP " config-%s failed for %s %s", subop,
+ master, slave);
- if (ret)
- gsync_count = 1;
- else
- gsync_count++;
+ goto out;
+ }
- snprintf (mst, sizeof (mst), "master%d", gsync_count);
- master = gf_strdup (master);
- if (!master)
- goto out;
- ret = dict_set_dynstr (dict, mst, master);
+ if ((!strcmp(op_name, "state_file")) && (op_value)) {
+ ret = sys_lstat(op_value, &stbuf);
if (ret) {
- GF_FREE (master);
+ ret = dict_get_str(dict, "slave_host", &slave_host);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch slave host.");
goto out;
- }
+ }
- snprintf (slv, sizeof (slv), "slave%d", gsync_count);
- slave = gf_strdup (slave);
- if (!slave)
+ ret = dict_get_str(dict, "slave_vol", &slave_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch slave volume name.");
goto out;
- ret = dict_set_dynstr (dict, slv, slave);
- if (ret) {
- GF_FREE (slave);
+ }
+
+ ret = glusterd_create_status_file(volinfo->volname, slave,
+ slave_host, slave_vol,
+ "Switching Status "
+ "File");
+ if (ret || sys_lstat(op_value, &stbuf)) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Unable to "
+ "create %s. Error : %s",
+ op_value, strerror(errno));
+ ret = -1;
goto out;
+ }
}
+ }
- snprintf (sts, sizeof (slv), "status%d", gsync_count);
- bufp = gf_strdup (buf);
- if (!bufp)
- goto out;
- ret = dict_set_dynstr (dict, sts, bufp);
- if (ret) {
- GF_FREE (bufp);
- goto out;
+ ret = 0;
+ gf_asprintf(op_errstr, "config-%s successful", subop);
+
+out:
+ if (!ret && volinfo && !op_already_set) {
+ for (resopt = gsync_no_restart_opts; *resopt; resopt++) {
+ restart_required = _gf_true;
+ if (!strcmp((*resopt), op_name)) {
+ restart_required = _gf_false;
+ break;
+ }
}
- ret = dict_set_int32 (dict, "gsync-count", gsync_count);
- if (ret)
- goto out;
- ret = 0;
- out:
- dict_destroy (confd);
+ if (restart_required) {
+ ret = glusterd_check_restart_gsync_session(
+ volinfo, slave, resp_dict, path_list, conf_path, 0);
+ if (ret)
+ *op_errstr = gf_strdup("internal error");
+ }
+ }
- gf_log ("", GF_LOG_DEBUG, "Returning %d ", ret);
- return ret;
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
-static int
-glusterd_check_restart_gsync_session (glusterd_volinfo_t *volinfo, char *slave,
- dict_t *resp_dict)
+int
+glusterd_gsync_read_frm_status(char *path, char *buf, size_t blen)
{
+ int ret = 0;
+ int status_fd = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(path);
+ GF_ASSERT(buf);
+ status_fd = open(path, O_RDONLY);
+ if (status_fd == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED,
+ "Unable to read gsyncd status file %s", path);
+ return -1;
+ }
+ ret = sys_read(status_fd, buf, blen - 1);
+ if (ret > 0) {
+ size_t len = strnlen(buf, ret);
+ /* Ensure there is a NUL byte and that it's not the first. */
+ if (len == 0 || len == blen - 1) {
+ ret = -1;
+ } else {
+ char *p = buf + len - 1;
+ while (isspace(*p))
+ *p-- = '\0';
+ }
+ } else if (ret == 0)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR,
+ "Status file of gsyncd is empty");
+ else /* ret < 0 */
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR,
+ "Status file of gsyncd is corrupt");
+
+ sys_close(status_fd);
+ return ret;
+}
- int ret = 0;
- uuid_t uuid = {0, };
- glusterd_conf_t *priv = NULL;
- char *status_msg = NULL;
+static int
+dict_get_param(dict_t *dict, char *key, char **param)
+{
+ char *dk = NULL;
+ char *s = NULL;
+ char x = '\0';
+ int ret = 0;
- GF_ASSERT (volinfo);
- GF_ASSERT (slave);
- GF_ASSERT (THIS);
- GF_ASSERT (THIS->private);
+ if (dict_get_str(dict, key, param) == 0)
+ return 0;
- priv = THIS->private;
+ dk = gf_strdup(key);
+ if (!dk)
+ return -1;
- if (glusterd_gsync_get_uuid (slave, volinfo, uuid))
- /* session does not exist, nothing to do */
- goto out;
- if (uuid_compare (MY_UUID, uuid) == 0) {
- ret = stop_gsync (volinfo->volname, slave, &status_msg);
- if (ret == 0 && status_msg)
- ret = dict_set_str (resp_dict, "gsync-status",
- status_msg);
- if (ret == 0)
- ret = glusterd_start_gsync (volinfo, slave,
- uuid_utoa(MY_UUID), NULL);
- }
+ s = strpbrk(dk, "-_");
+ if (!s) {
+ ret = -1;
+ goto out;
+ }
+ x = (*s == '-') ? '_' : '-';
+ *s++ = x;
+ while ((s = strpbrk(s, "-_")))
+ *s++ = x;
- out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ ret = dict_get_str(dict, dk, param);
+out:
+ GF_FREE(dk);
+ return ret;
}
-static int32_t
-glusterd_marker_create_volfile (glusterd_volinfo_t *volinfo)
+int
+glusterd_fetch_values_from_config(char *master, char *slave, char *confpath,
+ dict_t *confd, char **statefile,
+ char **georep_session_wrkng_dir,
+ char **socketfile)
{
- int32_t ret = 0;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = glusterd_gsync_get_config(master, slave, confpath, confd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GET_CONFIG_INFO_FAILED,
+ "Unable to get configuration data for %s(master), "
+ "%s(slave)",
+ master, slave);
+ goto out;
+ }
+
+ if (statefile) {
+ ret = dict_get_param(confd, "state_file", statefile);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get state_file's name "
+ "for %s(master), %s(slave). "
+ "Please check gsync config file.",
+ master, slave);
+ goto out;
+ }
+ }
- ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ if (georep_session_wrkng_dir) {
+ ret = dict_get_param(confd, "georep_session_working_dir",
+ georep_session_wrkng_dir);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to create volfile"
- " for setting of marker while '"GEOREP" start'");
- ret = -1;
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get geo-rep session's "
+ "working directory name for %s(master), "
+ "%s(slave). Please check gsync config file.",
+ master, slave);
+ goto out;
}
+ }
- ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret)
- goto out;
+ if (socketfile) {
+ ret = dict_get_param(confd, "state_socket_unencoded", socketfile);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get socket file's name "
+ "for %s(master), %s(slave). "
+ "Please check gsync config file.",
+ master, slave);
+ goto out;
+ }
+ }
- if (GLUSTERD_STATUS_STARTED == volinfo->status)
- ret = glusterd_nodesvcs_handle_graph_change (volinfo);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
-static int
-glusterd_set_marker_gsync (glusterd_volinfo_t *volinfo)
+int
+glusterd_read_status_file(glusterd_volinfo_t *volinfo, char *slave,
+ char *conf_path, dict_t *dict, char *node)
{
- int ret = -1;
- int marker_set = _gf_false;
- char *gsync_status = NULL;
+ char temp_conf_path[PATH_MAX] = "";
+ char *working_conf_path = NULL;
+ char *georep_session_wrkng_dir = NULL;
+ char *master = NULL;
+ char sts_val_name[1024] = "";
+ char monitor_status[NAME_MAX] = "";
+ char *statefile = NULL;
+ char *socketfile = NULL;
+ dict_t *confd = NULL;
+ char *slavekey = NULL;
+ char *slaveentry = NULL;
+ char *slaveuser = NULL;
+ char *saveptr = NULL;
+ char *temp = NULL;
+ char *temp_inp = NULL;
+ char *brick_host_uuid = NULL;
+ int brick_host_uuid_length = 0;
+ int gsync_count = 0;
+ int ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ gf_gsync_status_t *sts_val = NULL;
+ gf_boolean_t is_template_in_use = _gf_false;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(this->private);
+ GF_ASSERT(volinfo);
+ GF_ASSERT(conf_path);
+
+ master = volinfo->volname;
+
+ confd = dict_new();
+ if (!confd) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Not able to create dict.");
+ return -1;
+ }
- GF_ASSERT (THIS);
- GF_ASSERT (THIS->private);
+ priv = THIS->private;
- marker_set = glusterd_volinfo_get_boolean (volinfo, VKEY_MARKER_XTIME);
- if (marker_set == -1) {
- gf_log ("", GF_LOG_ERROR, "failed to get the marker status");
- ret = -1;
- goto out;
+ len = snprintf(temp_conf_path, sizeof(temp_conf_path),
+ "%s/" GSYNC_CONF_TEMPLATE, priv->workdir);
+ if ((len < 0) || (len >= sizeof(temp_conf_path))) {
+ return -1;
+ }
+
+ ret = sys_lstat(conf_path, &stbuf);
+ if (!ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CONFIG_INFO,
+ "Using passed config template(%s).", conf_path);
+ working_conf_path = conf_path;
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, ENOENT, GD_MSG_FILE_OP_FAILED,
+ "Config file (%s) missing. Looking for template "
+ "config file (%s)",
+ conf_path, temp_conf_path);
+ ret = sys_lstat(temp_conf_path, &stbuf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED,
+ "Template "
+ "config file (%s) missing.",
+ temp_conf_path);
+ goto out;
+ }
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DEFAULT_TEMP_CONFIG,
+ "Using default config template(%s).", temp_conf_path);
+ working_conf_path = temp_conf_path;
+ is_template_in_use = _gf_true;
+ }
+
+fetch_data:
+ ret = glusterd_fetch_values_from_config(
+ master, slave, working_conf_path, confd, &statefile,
+ &georep_session_wrkng_dir, &socketfile);
+ if (ret) {
+ if (is_template_in_use == _gf_false) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FETCH_CONFIG_VAL_FAILED,
+ "Unable to fetch config values "
+ "for %s(master), %s(slave). "
+ "Trying default config template",
+ master, slave);
+ working_conf_path = temp_conf_path;
+ is_template_in_use = _gf_true;
+ goto fetch_data;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FETCH_CONFIG_VAL_FAILED,
+ "Unable to "
+ "fetch config values for %s(master), "
+ "%s(slave)",
+ master, slave);
+ goto out;
+ }
+ }
+
+ ret = glusterd_gsync_read_frm_status(statefile, monitor_status,
+ sizeof(monitor_status));
+ if (ret <= 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STAT_FILE_READ_FAILED,
+ "Unable to read the status file for %s(master), "
+ "%s(slave) statefile: %s",
+ master, slave, statefile);
+ snprintf(monitor_status, sizeof(monitor_status), "defunct");
+ }
+
+ ret = dict_get_int32(dict, "gsync-count", &gsync_count);
+ if (ret)
+ gsync_count = 0;
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID))
+ continue;
+
+ sts_val = GF_CALLOC(1, sizeof(gf_gsync_status_t),
+ gf_common_mt_gsync_status_t);
+ if (!sts_val) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Out Of Memory");
+ goto out;
}
- if (marker_set == _gf_false) {
- gsync_status = gf_strdup ("on");
- if (gsync_status == NULL) {
- ret = -1;
- goto out;
- }
+ /* Slave Key */
+ ret = glusterd_get_slave(volinfo, slave, &slavekey);
+ if (ret < 0) {
+ GF_FREE(sts_val);
+ goto out;
+ }
+ memcpy(sts_val->slavekey, slavekey, strlen(slavekey));
+ sts_val->slavekey[strlen(slavekey)] = '\0';
- ret = glusterd_gsync_volinfo_dict_set (volinfo,
- VKEY_MARKER_XTIME, gsync_status);
- if (ret < 0)
- goto out;
+ /* Master Volume */
+ memcpy(sts_val->master, master, strlen(master));
+ sts_val->master[strlen(master)] = '\0';
- ret = glusterd_marker_create_volfile (volinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Setting dict failed");
- goto out;
- }
- }
- ret = 0;
+ /* Master Brick Node */
+ memcpy(sts_val->node, brickinfo->hostname, strlen(brickinfo->hostname));
+ sts_val->node[strlen(brickinfo->hostname)] = '\0';
-out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
-}
+ /* Master Brick Path */
+ memcpy(sts_val->brick, brickinfo->path, strlen(brickinfo->path));
+ sts_val->brick[strlen(brickinfo->path)] = '\0';
+ /* Brick Host UUID */
+ brick_host_uuid = uuid_utoa(brickinfo->uuid);
+ brick_host_uuid_length = strlen(brick_host_uuid);
+ memcpy(sts_val->brick_host_uuid, brick_host_uuid,
+ brick_host_uuid_length);
+ sts_val->brick_host_uuid[brick_host_uuid_length] = '\0';
+ /* Slave */
+ memcpy(sts_val->slave, slave, strlen(slave));
+ sts_val->slave[strlen(slave)] = '\0';
+ snprintf(sts_val->slave_node, sizeof(sts_val->slave_node), "N/A");
-static int
-glusterd_get_gsync_status_mst_slv (glusterd_volinfo_t *volinfo,
- char *slave, dict_t *rsp_dict)
-{
- uuid_t uuid = {0, };
- glusterd_conf_t *priv = NULL;
- int ret = 0;
+ snprintf(sts_val->worker_status, sizeof(sts_val->worker_status), "N/A");
- GF_ASSERT (volinfo);
- GF_ASSERT (slave);
- GF_ASSERT (THIS);
- GF_ASSERT (THIS->private);
+ snprintf(sts_val->crawl_status, sizeof(sts_val->crawl_status), "N/A");
- priv = THIS->private;
+ snprintf(sts_val->last_synced, sizeof(sts_val->last_synced), "N/A");
- ret = glusterd_gsync_get_uuid (slave, volinfo, uuid);
- if ((ret == 0) && (uuid_compare (MY_UUID, uuid) != 0))
- goto out;
+ snprintf(sts_val->last_synced_utc, sizeof(sts_val->last_synced_utc),
+ "N/A");
- if (ret) {
- ret = 0;
- gf_log ("", GF_LOG_INFO, "geo-replication status %s %s :"
- "session is not active", volinfo->volname, slave);
- goto out;
- }
+ snprintf(sts_val->entry, sizeof(sts_val->entry), "N/A");
- ret = glusterd_read_status_file (volinfo->volname, slave, rsp_dict);
- out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
- return ret;
-}
+ snprintf(sts_val->data, sizeof(sts_val->data), "N/A");
-static int
-glusterd_get_gsync_status_mst (glusterd_volinfo_t *volinfo, dict_t *rsp_dict)
-{
- glusterd_gsync_status_temp_t param = {0, };
+ snprintf(sts_val->meta, sizeof(sts_val->meta), "N/A");
- GF_ASSERT (volinfo);
+ snprintf(sts_val->failures, sizeof(sts_val->failures), "N/A");
- param.rsp_dict = rsp_dict;
- param.volinfo = volinfo;
- dict_foreach (volinfo->gsync_slaves, _get_status_mst_slv, &param);
+ snprintf(sts_val->checkpoint_time, sizeof(sts_val->checkpoint_time),
+ "N/A");
- return 0;
-}
+ snprintf(sts_val->checkpoint_time_utc,
+ sizeof(sts_val->checkpoint_time_utc), "N/A");
-static int
-glusterd_get_gsync_status_all ( dict_t *rsp_dict)
-{
+ snprintf(sts_val->checkpoint_completed,
+ sizeof(sts_val->checkpoint_completed), "N/A");
- int32_t ret = 0;
- glusterd_conf_t *priv = NULL;
- glusterd_volinfo_t *volinfo = NULL;
+ snprintf(sts_val->checkpoint_completion_time,
+ sizeof(sts_val->checkpoint_completion_time), "N/A");
- GF_ASSERT (THIS);
- priv = THIS->private;
+ snprintf(sts_val->checkpoint_completion_time_utc,
+ sizeof(sts_val->checkpoint_completion_time_utc), "N/A");
- GF_ASSERT (priv);
+ /* Get all the other values from Gsyncd */
+ ret = glusterd_gsync_get_status(master, slave, conf_path,
+ brickinfo->path, sts_val);
- list_for_each_entry (volinfo, &priv->volumes, vol_list) {
- ret = glusterd_get_gsync_status_mst (volinfo, rsp_dict);
- if (ret)
- goto out;
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GET_STATUS_DATA_FAIL,
+ "Unable to get status data "
+ "for %s(master), %s(slave), %s(brick)",
+ master, slave, brickinfo->path);
+ ret = -1;
+ goto out;
}
-out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
- return ret;
+ if (is_template_in_use) {
+ snprintf(sts_val->worker_status, sizeof(sts_val->worker_status),
+ "Config Corrupted");
+ }
-}
+ ret = dict_get_str(volinfo->gsync_slaves, slavekey, &slaveentry);
+ if (ret < 0) {
+ GF_FREE(sts_val);
+ goto out;
+ }
-static int
-glusterd_get_gsync_status (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
-{
- char *slave = NULL;
- char *volname = NULL;
- char errmsg[PATH_MAX] = {0, };
- gf_boolean_t exists = _gf_false;
- glusterd_volinfo_t *volinfo = NULL;
- int ret = 0;
+ memcpy(sts_val->session_slave, slaveentry, strlen(slaveentry));
+ sts_val->session_slave[strlen(slaveentry)] = '\0';
+ temp_inp = gf_strdup(slaveentry);
+ if (!temp_inp)
+ goto out;
- ret = dict_get_str (dict, "master", &volname);
- if (ret < 0){
- ret = glusterd_get_gsync_status_all (rsp_dict);
- goto out;
+ if (strstr(temp_inp, "@") == NULL) {
+ slaveuser = "root";
+ } else {
+ temp = strtok_r(temp_inp, "//", &saveptr);
+ temp = strtok_r(NULL, "/", &saveptr);
+ slaveuser = strtok_r(temp, "@", &saveptr);
}
+ memcpy(sts_val->slave_user, slaveuser, strlen(slaveuser));
+ sts_val->slave_user[strlen(slaveuser)] = '\0';
- exists = glusterd_check_volume_exists (volname);
- ret = glusterd_volinfo_find (volname, &volinfo);
- if ((ret) || (!exists)) {
- gf_log ("", GF_LOG_WARNING, "volume name does not exist");
- snprintf (errmsg, sizeof(errmsg), "Volume name %s does not"
- " exist", volname);
- *op_errstr = gf_strdup (errmsg);
- ret = -1;
- goto out;
+ snprintf(sts_val_name, sizeof(sts_val_name), "status_value%d",
+ gsync_count);
+ ret = dict_set_bin(dict, sts_val_name, sts_val,
+ sizeof(gf_gsync_status_t));
+ if (ret) {
+ GF_FREE(sts_val);
+ goto out;
}
+ gsync_count++;
+ sts_val = NULL;
+ }
- ret = dict_get_str (dict, "slave", &slave);
- if (ret < 0) {
- ret = glusterd_get_gsync_status_mst (volinfo, rsp_dict);
- goto out;
- }
+ ret = dict_set_int32(dict, "gsync-count", gsync_count);
+ if (ret)
+ goto out;
- ret = glusterd_get_gsync_status_mst_slv (volinfo, slave, rsp_dict);
+out:
+ GF_FREE(temp_inp);
+ dict_unref(confd);
- out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return 0;
}
-static int
-glusterd_send_sigstop (pid_t pid)
+int
+glusterd_check_restart_gsync_session(glusterd_volinfo_t *volinfo, char *slave,
+ dict_t *resp_dict, char *path_list,
+ char *conf_path, gf_boolean_t is_force)
{
- int ret = 0;
- ret = kill (pid, SIGSTOP);
- if (ret)
- gf_log ("", GF_LOG_ERROR, GEOREP"failed to send SIGSTOP signal");
- return ret;
+ int ret = 0;
+ glusterd_conf_t *priv = NULL;
+ char *status_msg = NULL;
+ gf_boolean_t is_running = _gf_false;
+ char *op_errstr = NULL;
+ char *key = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(volinfo);
+ GF_ASSERT(slave);
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ key = slave;
+
+ ret = glusterd_check_gsync_running_local(volinfo->volname, slave, conf_path,
+ &is_running);
+ if (!ret && (_gf_true != is_running))
+ /* gsynd not running, nothing to do */
+ goto out;
+
+ ret = stop_gsync(volinfo->volname, slave, &status_msg, conf_path,
+ &op_errstr, is_force);
+ if (ret == 0 && status_msg)
+ ret = dict_set_str(resp_dict, "gsync-status", status_msg);
+ if (ret == 0) {
+ dict_del(volinfo->gsync_active_slaves, key);
+ ret = glusterd_start_gsync(volinfo, slave, path_list, conf_path,
+ uuid_utoa(MY_UUID), NULL, _gf_false);
+ if (!ret) {
+ /* Add slave to the dict indicating geo-rep session is
+ * running.*/
+ ret = dict_set_dynstr_with_alloc(volinfo->gsync_active_slaves, key,
+ "running");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set"
+ " key:%s value:running in dict. But "
+ "the config succeeded.",
+ key);
+ goto out;
+ }
+ }
+ }
+
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ if (op_errstr)
+ GF_FREE(op_errstr);
+ return ret;
}
-static int
-glusterd_send_sigcont (pid_t pid)
+static int32_t
+glusterd_marker_changelog_create_volfile(glusterd_volinfo_t *volinfo)
{
- int ret = 0;
- ret = kill (pid, SIGCONT);
- if (ret)
- gf_log ("", GF_LOG_ERROR, GEOREP"failed to send SIGCONT signal");
- return ret;
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Unable to create volfile for setting of marker "
+ "while '" GEOREP " start'");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_svcs_manager(volinfo);
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
}
-/*
- * Log rotations flow is something like this:
- * - Send SIGSTOP to process group (this will stop monitor/worker process
- * and also the slave if it's local)
- * - Rotate log file for monitor/worker
- * - Rotate log file for slave if it's local
- * - Send SIGCONT to the process group. Monitor wakes up, kills the worker
- * (this is done in the SIGCONT handler), which results in the termination
- * of the slave (local/remote). After returning from signal handler,
- * monitor detects absence of worker and starts it again, which in-turn
- * starts the slave.
- */
static int
-glusterd_send_log_rotate_signal (pid_t pid, char *logfile1, char *logfile2)
+glusterd_set_gsync_knob(glusterd_volinfo_t *volinfo, char *key, int *vc)
{
- int ret = 0;
- char rlogfile[PATH_MAX] = {0,};
- time_t rottime = 0;
+ int ret = -1;
+ int conf_enabled = _gf_false;
+ xlator_t *this = NULL;
- ret = glusterd_send_sigstop (-pid);
- rottime = time (NULL);
+ this = THIS;
+ GF_ASSERT(this);
- snprintf (rlogfile, sizeof (rlogfile), "%s.%"PRIu64, logfile1,
- (uint64_t) rottime);
- ret = rename (logfile1, rlogfile);
- if (ret)
- gf_log ("", GF_LOG_ERROR, "rename failed for geo-rep log file");
+ GF_ASSERT(this->private);
- if (!*logfile2) {
- gf_log ("", GF_LOG_DEBUG, "Slave is not local,"
- " skipping rotation");
- ret = 0;
- goto out;
- }
+ conf_enabled = glusterd_volinfo_get_boolean(volinfo, key);
+ if (conf_enabled == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GET_KEY_FAILED,
+ "failed to get key %s from volinfo", key);
+ goto out;
+ }
- (void) snprintf (rlogfile, sizeof (rlogfile), "%s.%"PRIu64, logfile2,
- (uint64_t) rottime);
- ret = rename (logfile2, rlogfile);
- if (ret)
- gf_log ("", GF_LOG_ERROR, "rename failed for geo-rep slave"
- " log file");
+ ret = 0;
+ if (conf_enabled == _gf_false) {
+ *vc = 1;
+ ret = glusterd_gsync_volinfo_dict_set(volinfo, key, "on");
+ }
- out:
- ret = glusterd_send_sigcont (-pid);
-
- return ret;
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
static int
-glusterd_get_pid_from_file (char *master, char *slave, pid_t *pid)
+glusterd_set_gsync_confs(glusterd_volinfo_t *volinfo)
{
- int ret = -1;
- int pfd = 0;
- char pidfile[PATH_MAX] = {0,};
- char buff[1024] = {0,};
+ int ret = -1;
+ int volfile_changed = 0;
- pfd = gsyncd_getpidfile (master, slave, pidfile);
- if (pfd == -2) {
- gf_log ("", GF_LOG_ERROR, GEOREP" log-rotate validation "
- " failed for %s & %s", master, slave);
- goto out;
- }
- if (gsync_status_byfd (pfd) == -1) {
- gf_log ("", GF_LOG_ERROR, "gsyncd b/w %s & %s is not"
- " running", master, slave);
- goto out;
- }
+ ret = glusterd_set_gsync_knob(volinfo, VKEY_MARKER_XTIME, &volfile_changed);
+ if (ret)
+ goto out;
- if (pfd < 0)
- goto out;
-
- ret = read (pfd, buff, 1024);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, GEOREP" cannot read pid from pid-file");
- goto out;
- }
+ /**
+ * enable ignore-pid-check blindly as it could be needed for
+ * cascading setups.
+ */
+ ret = glusterd_set_gsync_knob(volinfo, VKEY_MARKER_XTIME_FORCE,
+ &volfile_changed);
+ if (ret)
+ goto out;
+ ret = glusterd_set_gsync_knob(volinfo, VKEY_CHANGELOG, &volfile_changed);
+ if (ret)
+ goto out;
- *pid = strtol (buff, NULL, 10);
- ret = 0;
+ if (volfile_changed)
+ ret = glusterd_marker_changelog_create_volfile(volinfo);
out:
- sys_close(pfd);
- return ret;
+ return ret;
}
static int
-glusterd_do_gsync_log_rotate (char *master, char *slave, uuid_t *uuid, char **op_errstr)
+glusterd_get_gsync_status_mst_slv(glusterd_volinfo_t *volinfo, char *slave,
+ char *conf_path, dict_t *rsp_dict, char *node)
{
- int ret = 0;
- glusterd_conf_t *priv = NULL;
- pid_t pid = 0;
- char log_file1[PATH_MAX] = {0,};
- char log_file2[PATH_MAX] = {0,};
-
- GF_ASSERT (THIS);
- GF_ASSERT (THIS->private);
+ char *statefile = NULL;
+ uuid_t uuid = {
+ 0,
+ };
+ int ret = 0;
+ gf_boolean_t is_template_in_use = _gf_false;
+ struct stat stbuf = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(volinfo);
+ GF_ASSERT(slave);
+ GF_ASSERT(this->private);
+
+ ret = glusterd_gsync_get_uuid(slave, volinfo, uuid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SESSION_INACTIVE,
+ "geo-replication status %s %s : session is not "
+ "active",
+ volinfo->volname, slave);
+
+ ret = glusterd_get_statefile_name(volinfo, slave, conf_path, &statefile,
+ &is_template_in_use);
+ if (ret) {
+ if (!strstr(slave, "::"))
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SLAVE_URL_INVALID,
+ "%s is not a valid slave url.", slave);
+ else
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ GD_MSG_GET_STATEFILE_NAME_FAILED,
+ "Unable to get statefile's name");
+ ret = 0;
+ goto out;
+ }
- priv = THIS->private;
+ ret = sys_lstat(statefile, &stbuf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, ENOENT, GD_MSG_FILE_OP_FAILED,
+ "%s statefile not present.", statefile);
+ ret = 0;
+ goto out;
+ }
+ }
- ret = glusterd_get_pid_from_file (master, slave, &pid);
- if (ret)
- goto out;
+ ret = glusterd_read_status_file(volinfo, slave, conf_path, rsp_dict, node);
+out:
+ if (statefile)
+ GF_FREE(statefile);
- /* log file */
- ret = glusterd_gsyncd_getlogfile (master, slave, log_file1);
- if (ret)
- goto out;
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
+ return ret;
+}
- /* check if slave is local or remote */
- ret = glusterd_gsync_slave_is_remote (slave);
- if (ret)
- goto do_rotate;
+int
+glusterd_get_gsync_status_mst(glusterd_volinfo_t *volinfo, dict_t *rsp_dict,
+ char *node)
+{
+ glusterd_gsync_status_temp_t param = {
+ 0,
+ };
- /* slave log file - slave is local and it's log can be rotated */
- ret = glusterd_gsync_get_slave_log_file (master, slave, log_file2);
- if (ret)
- goto out;
+ GF_ASSERT(volinfo);
- do_rotate:
- ret = glusterd_send_log_rotate_signal (pid, log_file1, log_file2);
+ param.rsp_dict = rsp_dict;
+ param.volinfo = volinfo;
+ param.node = node;
+ dict_foreach(volinfo->gsync_slaves, _get_status_mst_slv, &param);
- out:
- if (ret && op_errstr)
- *op_errstr = gf_strdup("Error rotating log file");
- return ret;
+ return 0;
}
static int
-glusterd_do_gsync_log_rotation_mst_slv (glusterd_volinfo_t *volinfo, char *slave,
- char **op_errstr)
-{
- uuid_t uuid = {0, };
- glusterd_conf_t *priv = NULL;
- int ret = 0;
- char errmsg[1024] = {0,};
- xlator_t *this = NULL;
-
- GF_ASSERT (volinfo);
- GF_ASSERT (slave);
- GF_ASSERT (THIS);
- this = THIS;
- GF_ASSERT (this->private);
- priv = this->private;
+glusterd_get_gsync_status_all(dict_t *rsp_dict, char *node)
+{
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
- ret = glusterd_gsync_get_uuid (slave, volinfo, uuid);
- if ((ret == 0) && (uuid_compare (MY_UUID, uuid) != 0))
- goto out;
+ this = THIS;
+ GF_ASSERT(this);
- if (ret) {
- snprintf(errmsg, sizeof(errmsg), "geo-replication session b/w %s %s not active",
- volinfo->volname, slave);
- gf_log (this->name, GF_LOG_WARNING, "%s", errmsg);
- if (op_errstr)
- *op_errstr = gf_strdup(errmsg);
- goto out;
- }
+ priv = this->private;
- ret = glusterd_do_gsync_log_rotate (volinfo->volname, slave, &uuid, op_errstr);
+ GF_ASSERT(priv);
- out:
- gf_log (this->name, GF_LOG_DEBUG, "Returning with %d", ret);
- return ret;
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+ ret = glusterd_get_gsync_status_mst(volinfo, rsp_dict, node);
+ if (ret)
+ goto out;
+ }
+
+out:
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
+ return ret;
}
-static void
-_iterate_log_rotate_mst_slv (dict_t *this, char *key, data_t *value, void *data)
+static int
+glusterd_get_gsync_status(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
{
- glusterd_gsync_status_temp_t *param = NULL;
- char *slave = NULL;
+ char *slave = NULL;
+ char *volname = NULL;
+ char *conf_path = NULL;
+ char errmsg[PATH_MAX] = {
+ 0,
+ };
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ char my_hostname[256] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = gethostname(my_hostname, 256);
+ if (ret) {
+ /* stick to N/A */
+ (void)strcpy(my_hostname, "N/A");
+ }
+
+ ret = dict_get_str(dict, "master", &volname);
+ if (ret < 0) {
+ ret = glusterd_get_gsync_status_all(rsp_dict, my_hostname);
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND,
+ "volume name does not exist");
+ snprintf(errmsg, sizeof(errmsg),
+ "Volume name %s does not"
+ " exist",
+ volname);
+ *op_errstr = gf_strdup(errmsg);
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "slave", &slave);
+ if (ret < 0) {
+ ret = glusterd_get_gsync_status_mst(volinfo, rsp_dict, my_hostname);
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "conf_path", &conf_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch conf file path.");
+ goto out;
+ }
+
+ ret = glusterd_get_gsync_status_mst_slv(volinfo, slave, conf_path, rsp_dict,
+ my_hostname);
- param = (glusterd_gsync_status_temp_t *) data;
-
- GF_ASSERT (param);
- GF_ASSERT (param->volinfo);
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
- slave = strchr (value->data, ':');
- if (slave)
- slave++;
+static int
+glusterd_gsync_delete(glusterd_volinfo_t *volinfo, char *slave,
+ char *slave_host, char *slave_vol, char *path_list,
+ dict_t *dict, dict_t *resp_dict, char **op_errstr)
+{
+ int32_t ret = -1;
+ runner_t runner = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ char *master = NULL;
+ char *gl_workdir = NULL;
+ char geo_rep_dir[PATH_MAX] = "";
+ char *conf_path = NULL;
+ xlator_t *this = NULL;
+ uint32_t reset_sync_time = _gf_false;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(slave);
+ GF_ASSERT(slave_host);
+ GF_ASSERT(slave_vol);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(dict);
+ GF_ASSERT(resp_dict);
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_PRIV_NOT_FOUND,
+ "priv of glusterd not present");
+ *op_errstr = gf_strdup("glusterd defunct");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "conf_path", &conf_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch conf file path.");
+ goto out;
+ }
+
+ gl_workdir = priv->workdir;
+ master = "";
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "--delete", "-c", NULL);
+ runner_argprintf(&runner, "%s", conf_path);
+ runner_argprintf(&runner, "--iprefix=%s", DATADIR);
+
+ runner_argprintf(&runner, "--path-list=%s", path_list);
+
+ ret = dict_get_uint32(dict, "reset-sync-time", &reset_sync_time);
+ if (!ret && reset_sync_time) {
+ runner_add_args(&runner, "--reset-sync-time", NULL);
+ }
+
+ if (volinfo) {
+ master = volinfo->volname;
+ runner_argprintf(&runner, ":%s", master);
+ }
+ runner_add_arg(&runner, slave);
+ runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
+ synclock_unlock(&priv->big_lock);
+ ret = runner_run(&runner);
+ synclock_lock(&priv->big_lock);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SESSION_DEL_FAILED,
+ "gsyncd failed to delete session info for %s and "
+ "%s peers",
+ master, slave);
+
+ gf_asprintf(op_errstr,
+ "gsyncd failed to "
+ "delete session info for %s and %s peers",
+ master, slave);
+
+ goto out;
+ }
+
+ ret = snprintf(geo_rep_dir, sizeof(geo_rep_dir) - 1,
+ "%s/" GEOREP "/%s_%s_%s", gl_workdir, volinfo->volname,
+ slave_host, slave_vol);
+ geo_rep_dir[ret] = '\0';
+
+ ret = sys_rmdir(geo_rep_dir);
+ if (ret) {
+ if (errno == ENOENT)
+ gf_msg_debug(this->name, 0, "Geo Rep Dir(%s) Not Present.",
+ geo_rep_dir);
else {
- gf_log ("", GF_LOG_ERROR, "geo-replication log-rotate: slave (%s) "
- "not conforming to format", slave);
- return;
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Unable to delete Geo Rep Dir(%s). Error: %s", geo_rep_dir,
+ strerror(errno));
+ goto out;
}
+ }
+
+ ret = 0;
- (void) glusterd_do_gsync_log_rotation_mst_slv (param->volinfo, slave, NULL);
+ gf_asprintf(op_errstr, "delete successful");
+
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
-static int
-glusterd_do_gsync_log_rotation_mst (glusterd_volinfo_t *volinfo)
+int
+glusterd_op_sys_exec(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
{
- glusterd_gsync_status_temp_t param = {0, };
+ char buf[PATH_MAX] = "";
+ char cmd_arg_name[PATH_MAX] = "";
+ char output_name[PATH_MAX] = "";
+ char errmsg[PATH_MAX] = "";
+ char *ptr = NULL;
+ char *bufp = NULL;
+ char *command = NULL;
+ char **cmd_args = NULL;
+ int ret = -1;
+ int i = -1;
+ int cmd_args_count = 0;
+ int output_count = 0;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(rsp_dict);
+
+ if (THIS)
+ priv = THIS->private;
+ if (priv == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_PRIV_NOT_FOUND,
+ "priv of glusterd not present");
+ *op_errstr = gf_strdup("glusterd defunct");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "command", &command);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get command from dict");
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, "cmd_args_count", &cmd_args_count);
+ if (ret)
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED,
+ "No cmd_args_count");
+
+ if (cmd_args_count) {
+ cmd_args = GF_CALLOC(cmd_args_count, sizeof(char *), gf_common_mt_char);
+ if (!cmd_args) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Unable to calloc. Errno = %s", strerror(errno));
+ goto out;
+ }
- GF_ASSERT (volinfo);
+ for (i = 1; i <= cmd_args_count; i++) {
+ snprintf(cmd_arg_name, sizeof(cmd_arg_name), "cmd_arg_%d", i);
+ ret = dict_get_str(dict, cmd_arg_name, &cmd_args[i - 1]);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get"
+ " %s in dict",
+ cmd_arg_name);
+ goto out;
+ }
+ }
+ }
+
+ runinit(&runner);
+ runner_argprintf(&runner, GSYNCD_PREFIX "/peer_%s", command);
+ for (i = 0; i < cmd_args_count; i++)
+ runner_add_arg(&runner, cmd_args[i]);
+ runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
+ synclock_unlock(&priv->big_lock);
+ ret = runner_start(&runner);
+ if (ret == -1) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Unable to "
+ "execute command. Error : %s",
+ strerror(errno));
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CMD_EXEC_FAIL, "%s", errmsg);
+ ret = -1;
+ synclock_lock(&priv->big_lock);
+ goto out;
+ }
+
+ do {
+ ptr = fgets(buf, sizeof(buf), runner_chio(&runner, STDOUT_FILENO));
+ if (ptr) {
+ ret = dict_get_int32(rsp_dict, "output_count", &output_count);
+ if (ret)
+ output_count = 1;
+ else
+ output_count++;
+ snprintf(output_name, sizeof(output_name), "output_%d",
+ output_count);
+ if (buf[strlen(buf) - 1] == '\n')
+ buf[strlen(buf) - 1] = '\0';
+ bufp = gf_strdup(buf);
+ if (!bufp)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STRDUP_FAILED,
+ "gf_strdup failed.");
+ ret = dict_set_dynstr(rsp_dict, output_name, bufp);
+ if (ret) {
+ GF_FREE(bufp);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "output set "
+ "failed.");
+ }
+ ret = dict_set_int32(rsp_dict, "output_count", output_count);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "output_count "
+ "set failed.");
+ }
+ } while (ptr);
+
+ ret = runner_end(&runner);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Unable to "
+ "end. Error : %s",
+ strerror(errno));
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNABLE_TO_END, "%s", errmsg);
+ ret = -1;
+ synclock_lock(&priv->big_lock);
+ goto out;
+ }
+ synclock_lock(&priv->big_lock);
+
+ ret = 0;
+out:
+ if (cmd_args) {
+ GF_FREE(cmd_args);
+ cmd_args = NULL;
+ }
- param.volinfo = volinfo;
- dict_foreach (volinfo->gsync_slaves, _iterate_log_rotate_mst_slv, &param);
- return 0;
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
-static int
-glusterd_rotate_gsync_all ()
+int
+glusterd_op_copy_file(dict_t *dict, char **op_errstr)
{
- int32_t ret = 0;
- glusterd_conf_t *priv = NULL;
- glusterd_volinfo_t *volinfo = NULL;
-
- GF_ASSERT (THIS);
+ char abs_filename[PATH_MAX] = "";
+ char errmsg[PATH_MAX] = "";
+ char *filename = NULL;
+ char *host_uuid = NULL;
+ char uuid_str[64] = {0};
+ char *contents = NULL;
+ char buf[4096] = "";
+ int ret = -1;
+ int fd = -1;
+ int bytes_writen = 0;
+ int bytes_read = 0;
+ int contents_size = -1;
+ int file_mode = -1;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ gf_boolean_t free_contents = _gf_true;
+ xlator_t *this = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (THIS)
priv = THIS->private;
+ if (priv == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_PRIV_NOT_FOUND,
+ "priv of glusterd not present");
+ *op_errstr = gf_strdup("glusterd defunct");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "host-uuid", &host_uuid);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, "source", &filename);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch filename from dict.");
+ *op_errstr = gf_strdup("command unsuccessful");
+ goto out;
+ }
+ len = snprintf(abs_filename, sizeof(abs_filename), "%s/%s", priv->workdir,
+ filename);
+ if ((len < 0) || (len >= sizeof(abs_filename))) {
+ ret = -1;
+ goto out;
+ }
+
+ uuid_utoa_r(MY_UUID, uuid_str);
+ if (!strcmp(uuid_str, host_uuid)) {
+ ret = sys_lstat(abs_filename, &stbuf);
+ if (ret) {
+ len = snprintf(errmsg, sizeof(errmsg),
+ "Source file "
+ "does not exist in %s",
+ priv->workdir);
+ if (len < 0) {
+ strcpy(errmsg, "<error>");
+ }
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED,
+ "%s", errmsg);
+ goto out;
+ }
- GF_ASSERT (priv);
+ contents = GF_CALLOC(1, stbuf.st_size + 1, gf_common_mt_char);
+ if (!contents) {
+ snprintf(errmsg, sizeof(errmsg), "Unable to allocate memory");
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY, "%s",
+ errmsg);
+ ret = -1;
+ goto out;
+ }
- list_for_each_entry (volinfo, &priv->volumes, vol_list) {
- ret = glusterd_do_gsync_log_rotation_mst (volinfo);
- if (ret)
- goto out;
+ fd = open(abs_filename, O_RDONLY);
+ if (fd < 0) {
+ len = snprintf(errmsg, sizeof(errmsg), "Unable to open %s",
+ abs_filename);
+ if (len < 0) {
+ strcpy(errmsg, "<error>");
+ }
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, "%s",
+ errmsg);
+ ret = -1;
+ goto out;
}
- out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
- return ret;
-}
+ do {
+ ret = sys_read(fd, buf, sizeof(buf) - 1);
+ if (ret > 0) {
+ buf[ret] = '\0';
+ memcpy(contents + bytes_read, buf, ret);
+ bytes_read += ret;
+ }
+ } while (ret > 0);
+
+ if (bytes_read != stbuf.st_size) {
+ len = snprintf(errmsg, sizeof(errmsg),
+ "Unable to read all the data from %s", abs_filename);
+ if (len < 0) {
+ strcpy(errmsg, "<error>");
+ }
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_READ_ERROR, "%s",
+ errmsg);
+ ret = -1;
+ goto out;
+ }
-static int
-glusterd_rotate_gsync_logs (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
-{
- char *slave = NULL;
- char *volname = NULL;
- char errmsg[1024] = {0,};
- gf_boolean_t exists = _gf_false;
- glusterd_volinfo_t *volinfo = NULL;
- char **linearr = NULL;
- int ret = 0;
+ ret = dict_set_int32(dict, "contents_size", stbuf.st_size);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Unable to set"
+ " contents size in dict.");
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "%s",
+ errmsg);
+ goto out;
+ }
- ret = dict_get_str (dict, "master", &volname);
- if (ret < 0) {
- ret = glusterd_rotate_gsync_all ();
- goto out;
+ ret = dict_set_int32(dict, "file_mode", (int32_t)stbuf.st_mode);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Unable to set"
+ " file mode in dict.");
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "%s",
+ errmsg);
+ goto out;
}
- exists = glusterd_check_volume_exists (volname);
- ret = glusterd_volinfo_find (volname, &volinfo);
- if ((ret) || (!exists)) {
- snprintf (errmsg, sizeof(errmsg), "Volume %s does not"
- " exist", volname);
- gf_log ("", GF_LOG_WARNING, "%s", errmsg);
- *op_errstr = gf_strdup (errmsg);
- ret = -1;
- goto out;
+ ret = dict_set_bin(dict, "common_pem_contents", contents,
+ stbuf.st_size);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Unable to set"
+ " pem contents in dict.");
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "%s",
+ errmsg);
+ goto out;
+ }
+ free_contents = _gf_false;
+ } else {
+ free_contents = _gf_false;
+ ret = dict_get_bin(dict, "common_pem_contents", (void **)&contents);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Unable to get"
+ " pem contents in dict.");
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ errmsg);
+ goto out;
+ }
+ ret = dict_get_int32(dict, "contents_size", &contents_size);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Unable to set"
+ " contents size in dict.");
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ errmsg);
+ goto out;
}
- ret = dict_get_str (dict, "slave", &slave);
- if (ret < 0) {
- ret = glusterd_do_gsync_log_rotation_mst (volinfo);
- goto out;
+ ret = dict_get_int32(dict, "file_mode", &file_mode);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Unable to get"
+ " file mode in dict.");
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ errmsg);
+ goto out;
}
- /* for the given slave use the normalized url */
- ret = glusterd_urltransform_single (slave, "normalize", &linearr);
- if (ret == -1)
- goto out;
+ fd = open(abs_filename, O_WRONLY | O_TRUNC | O_CREAT, 0600);
+ if (fd < 0) {
+ len = snprintf(errmsg, sizeof(errmsg), "Unable to open %s",
+ abs_filename);
+ if (len < 0) {
+ strcpy(errmsg, "<error>");
+ }
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, "%s",
+ errmsg);
+ ret = -1;
+ goto out;
+ }
- ret = glusterd_do_gsync_log_rotation_mst_slv (volinfo, linearr[0],
- op_errstr);
- if (ret)
- gf_log ("gsyncd", GF_LOG_ERROR, "gsyncd log-rotate failed for"
- " %s & %s", volname, slave);
+ bytes_writen = sys_write(fd, contents, contents_size);
+
+ if (bytes_writen != contents_size) {
+ len = snprintf(errmsg, sizeof(errmsg), "Failed to write to %s",
+ abs_filename);
+ if (len < 0) {
+ strcpy(errmsg, "<error>");
+ }
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, "%s",
+ errmsg);
+ ret = -1;
+ goto out;
+ }
- glusterd_urltransform_free (linearr, 1);
- out:
- return ret;
-}
+ sys_fchmod(fd, file_mode);
+ }
+ ret = 0;
+out:
+ if (fd != -1)
+ sys_close(fd);
-int
-glusterd_op_gsync_set (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
-{
- int32_t ret = -1;
- int32_t type = -1;
- dict_t *ctx = NULL;
- dict_t *resp_dict = NULL;
- char *host_uuid = NULL;
- char *slave = NULL;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- glusterd_conf_t *priv = NULL;
- char *status_msg = NULL;
- uuid_t uuid = {0, };
-
- GF_ASSERT (THIS);
- GF_ASSERT (THIS->private);
- GF_ASSERT (dict);
- GF_ASSERT (op_errstr);
+ if (free_contents)
+ GF_FREE(contents);
- priv = THIS->private;
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
- ret = dict_get_int32 (dict, "type", &type);
- if (ret < 0)
- goto out;
+int
+glusterd_op_gsync_set(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ int32_t type = -1;
+ char *host_uuid = NULL;
+ char *slave = NULL;
+ char *slave_url = NULL;
+ char *slave_vol = NULL;
+ char *slave_host = NULL;
+ char *volname = NULL;
+ char *path_list = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t is_force = _gf_false;
+ char *status_msg = NULL;
+ gf_boolean_t is_running = _gf_false;
+ char *conf_path = NULL;
+ char *key = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(rsp_dict);
+
+ ret = dict_get_int32(dict, "type", &type);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_get_str(dict, "host-uuid", &host_uuid);
+ if (ret < 0)
+ goto out;
+
+ if (type == GF_GSYNC_OPTION_TYPE_STATUS) {
+ ret = glusterd_get_gsync_status(dict, op_errstr, rsp_dict);
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "slave", &slave);
+ if (ret < 0)
+ goto out;
+
+ key = slave;
+
+ ret = dict_get_str(dict, "slave_url", &slave_url);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch slave url.");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "slave_host", &slave_host);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch slave hostname.");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "slave_vol", &slave_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch slave volume name.");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "conf_path", &conf_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch conf file path.");
+ goto out;
+ }
+
+ if (dict_get_str(dict, "master", &volname) == 0) {
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "Volinfo for"
+ " %s (master) not found",
+ volname);
+ goto out;
+ }
- ret = dict_get_str (dict, "host-uuid", &host_uuid);
- if (ret < 0)
+ ret = glusterd_get_local_brickpaths(volinfo, &path_list);
+ if (!path_list && ret == -1)
+ goto out;
+ }
+
+ if (type == GF_GSYNC_OPTION_TYPE_CONFIG) {
+ ret = glusterd_gsync_configure(volinfo, slave, path_list, dict,
+ rsp_dict, op_errstr);
+ if (!ret) {
+ ret = dict_set_str(rsp_dict, "conf_path", conf_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to store conf_file_path.");
goto out;
+ }
+ }
+ goto out;
+ }
+
+ if (type == GF_GSYNC_OPTION_TYPE_DELETE) {
+ ret = glusterd_remove_slave_in_info(volinfo, slave, op_errstr);
+ if (ret && !is_force && path_list)
+ goto out;
+
+ ret = glusterd_gsync_delete(volinfo, slave, slave_host, slave_vol,
+ path_list, dict, rsp_dict, op_errstr);
+ goto out;
+ }
+
+ if (!volinfo) {
+ ret = -1;
+ goto out;
+ }
+
+ is_force = dict_get_str_boolean(dict, "force", _gf_false);
+
+ if (type == GF_GSYNC_OPTION_TYPE_START) {
+ /* Add slave to the dict indicating geo-rep session is running*/
+ ret = dict_set_dynstr_with_alloc(volinfo->gsync_active_slaves, key,
+ "running");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set key:%s"
+ " value:running in the dict",
+ key);
+ goto out;
+ }
- ctx = glusterd_op_get_ctx ();
- resp_dict = ctx ? ctx : rsp_dict;
- GF_ASSERT (resp_dict);
-
- if (type == GF_GSYNC_OPTION_TYPE_STATUS) {
- ret = glusterd_get_gsync_status (dict, op_errstr, resp_dict);
- goto out;
+ /* If slave volume uuid is not present in gsync_slaves
+ * update it*/
+ ret = glusterd_update_slave_voluuid_slaveinfo(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REMOTE_VOL_UUID_FAIL,
+ "Error in updating"
+ " slave volume uuid for old slave info");
+ goto out;
}
- if (type == GF_GSYNC_OPTION_TYPE_ROTATE) {
- ret = glusterd_rotate_gsync_logs (dict, op_errstr, resp_dict);
- goto out;
+ ret = glusterd_start_gsync(volinfo, slave, path_list, conf_path,
+ host_uuid, op_errstr, _gf_false);
+ /* Delete added slave in the dict if start fails*/
+ if (ret)
+ dict_del(volinfo->gsync_active_slaves, key);
+ }
+
+ if (type == GF_GSYNC_OPTION_TYPE_STOP ||
+ type == GF_GSYNC_OPTION_TYPE_PAUSE ||
+ type == GF_GSYNC_OPTION_TYPE_RESUME) {
+ ret = glusterd_check_gsync_running_local(volinfo->volname, slave,
+ conf_path, &is_running);
+ if (!ret && !is_force && path_list && (_gf_true != is_running)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_GSYNCD_OP_SET_FAILED,
+ GEOREP
+ " is not "
+ "set up for %s(master) and %s(slave)",
+ volname, slave);
+ *op_errstr = gf_strdup(GEOREP " is not set up");
+ goto out;
}
- ret = dict_get_str (dict, "slave", &slave);
- if (ret < 0)
+ if (type == GF_GSYNC_OPTION_TYPE_PAUSE) {
+ ret = gd_pause_or_resume_gsync(dict, volname, slave, slave_host,
+ slave_vol, conf_path, op_errstr,
+ _gf_true);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PAUSE_FAILED,
+ GEOREP " Pause Failed");
+ else
+ dict_del(volinfo->gsync_active_slaves, key);
+
+ } else if (type == GF_GSYNC_OPTION_TYPE_RESUME) {
+ /* Add slave to the dict indicating geo-rep session is
+ * running*/
+ ret = dict_set_dynstr_with_alloc(volinfo->gsync_active_slaves, key,
+ "running");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set "
+ "key:%s value:running in dict",
+ key);
goto out;
-
- if (dict_get_str (dict, "master", &volname) == 0) {
- ret = glusterd_volinfo_find (volname, &volinfo);
+ }
+
+ ret = gd_pause_or_resume_gsync(dict, volname, slave, slave_host,
+ slave_vol, conf_path, op_errstr,
+ _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESUME_FAILED,
+ GEOREP " Resume Failed");
+ dict_del(volinfo->gsync_active_slaves, key);
+ }
+ } else {
+ ret = stop_gsync(volname, slave, &status_msg, conf_path, op_errstr,
+ is_force);
+
+ if (ret == 0 && status_msg)
+ ret = dict_set_str(rsp_dict, "gsync-status", status_msg);
+ if (!ret) {
+ ret = glusterd_create_status_file(
+ volinfo->volname, slave, slave_host, slave_vol, "Stopped");
if (ret) {
- gf_log ("", GF_LOG_WARNING, "Volinfo for %s (master) not found",
- volname);
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_UPDATE_STATEFILE_FAILED,
+ "Unable to update state_file. "
+ "Error : %s",
+ strerror(errno));
}
+ dict_del(volinfo->gsync_active_slaves, key);
+ }
}
+ }
- if (type == GF_GSYNC_OPTION_TYPE_CONFIG) {
- ret = glusterd_gsync_configure (volinfo, slave, dict, resp_dict,
- op_errstr);
- goto out;
- }
+out:
+ if (path_list) {
+ GF_FREE(path_list);
+ path_list = NULL;
+ }
- if (!volinfo) {
- ret = -1;
- goto out;
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_get_slave_details_confpath(glusterd_volinfo_t *volinfo, dict_t *dict,
+ char **slave_url, char **slave_host,
+ char **slave_vol, char **conf_path,
+ char **op_errstr)
+{
+ int ret = -1;
+ char confpath[PATH_MAX] = "";
+ glusterd_conf_t *priv = NULL;
+ char *slave = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_str(dict, "slave", &slave);
+ if (ret || !slave) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch slave from dict");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_get_slave_info(slave, slave_url, slave_host, slave_vol,
+ op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_FETCH_ERROR,
+ "Unable to fetch slave details.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "slave_url", *slave_url);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to store slave IP.");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "slave_host", *slave_host);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to store slave hostname");
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "slave_vol", *slave_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to store slave volume name.");
+ goto out;
+ }
+
+ ret = snprintf(confpath, sizeof(confpath) - 1,
+ "%s/" GEOREP "/%s_%s_%s/gsyncd.conf", priv->workdir,
+ volinfo->volname, *slave_host, *slave_vol);
+ confpath[ret] = '\0';
+ *conf_path = gf_strdup(confpath);
+ if (!(*conf_path)) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "Unable to gf_strdup. Error: %s", strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "conf_path", *conf_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to store conf_path");
+ goto out;
+ }
+
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_get_slave_info(char *slave, char **slave_url, char **hostname,
+ char **slave_vol, char **op_errstr)
+{
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char **linearr = NULL;
+ int32_t ret = -1;
+ char errmsg[PATH_MAX] = "";
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = glusterd_urltransform_single(slave, "normalize", &linearr);
+ if ((ret == -1) || (linearr[0] == NULL)) {
+ ret = snprintf(errmsg, sizeof(errmsg) - 1, "Invalid Url: %s", slave);
+ errmsg[ret] = '\0';
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NORMALIZE_URL_FAIL,
+ "Failed to normalize url");
+ goto out;
+ }
+
+ tmp = strtok_r(linearr[0], "/", &save_ptr);
+ tmp = strtok_r(NULL, "/", &save_ptr);
+ slave = NULL;
+ if (tmp != NULL) {
+ slave = strtok_r(tmp, ":", &save_ptr);
+ }
+ if (slave) {
+ ret = glusterd_geo_rep_parse_slave(slave, hostname, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_URL_INVALID,
+ "Invalid slave url: %s", *op_errstr);
+ goto out;
+ }
+ gf_msg_debug(this->name, 0, "Hostname : %s", *hostname);
+
+ *slave_url = gf_strdup(slave);
+ if (!*slave_url) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STRDUP_FAILED,
+ "Failed to gf_strdup");
+ ret = -1;
+ goto out;
}
+ gf_msg_debug(this->name, 0, "Slave URL : %s", *slave_url);
+ ret = 0;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid slave name");
+ goto out;
+ }
+
+ slave = strtok_r(NULL, ":", &save_ptr);
+ if (slave) {
+ *slave_vol = gf_strdup(slave);
+ if (!*slave_vol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STRDUP_FAILED,
+ "Failed to gf_strdup");
+ ret = -1;
+ GF_FREE(*slave_url);
+ goto out;
+ }
+ gf_msg_debug(this->name, 0, "Slave Vol : %s", *slave_vol);
+ ret = 0;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid slave name");
+ goto out;
+ }
- if (type == GF_GSYNC_OPTION_TYPE_START) {
+out:
+ if (linearr)
+ glusterd_urltransform_free(linearr, 1);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
- ret = glusterd_set_marker_gsync (volinfo);
- if (ret != 0) {
- gf_log ("", GF_LOG_WARNING, "marker start failed");
- *op_errstr = gf_strdup ("failed to initialize indexing");
- ret = -1;
- goto out;
- }
- ret = glusterd_store_slave_in_info(volinfo, slave,
- host_uuid, op_errstr);
- if (ret)
- goto out;
+static void
+runinit_gsyncd_setrx(runner_t *runner, char *conf_path)
+{
+ runinit(runner);
+ runner_add_args(runner, GSYNCD_PREFIX "/gsyncd", "-c", NULL);
+ runner_argprintf(runner, "%s", conf_path);
+ runner_add_arg(runner, "--config-set-rx");
+}
- ret = glusterd_start_gsync (volinfo, slave, host_uuid,
- op_errstr);
+static int
+glusterd_check_gsync_present(int *valid_state)
+{
+ char buff[PATH_MAX] = {
+ 0,
+ };
+ runner_t runner = {
+ 0,
+ };
+ char *ptr = NULL;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ runinit(&runner);
+ runner_add_args(&runner, GSYNCD_PREFIX "/gsyncd", "--version", NULL);
+ runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
+ ret = runner_start(&runner);
+ if (ret == -1) {
+ if (errno == ENOENT) {
+ gf_msg("glusterd", GF_LOG_INFO, ENOENT, GD_MSG_MODULE_NOT_INSTALLED,
+ GEOREP
+ " module "
+ "not installed in the system");
+ *valid_state = 0;
+ } else {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_MODULE_ERROR,
+ GEOREP " module not working as desired");
+ *valid_state = -1;
+ }
+ goto out;
+ }
+
+ ptr = fgets(buff, sizeof(buff), runner_chio(&runner, STDOUT_FILENO));
+ if (ptr) {
+ if (!strstr(buff, "gsyncd")) {
+ ret = -1;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_MODULE_ERROR,
+ GEOREP " module not working as desired");
+ *valid_state = -1;
+ goto out;
}
+ } else {
+ ret = -1;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_MODULE_ERROR,
+ GEOREP " module not working as desired");
+ *valid_state = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
- if (type == GF_GSYNC_OPTION_TYPE_STOP) {
+ runner_end(&runner);
- ret = glusterd_gsync_get_uuid (slave, volinfo, uuid);
- if (ret) {
- gf_log ("", GF_LOG_WARNING, GEOREP" is not set up for"
- "%s(master) and %s(slave)", volname, slave);
- *op_errstr = strdup (GEOREP" is not set up");
- goto out;
- }
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
- ret = glusterd_remove_slave_in_info(volinfo, slave, op_errstr);
- if (ret)
- goto out;
+static int
+create_conf_file(glusterd_conf_t *conf, char *conf_path)
+#define RUN_GSYNCD_CMD \
+ do { \
+ ret = runner_run_reuse(&runner); \
+ if (ret == -1) { \
+ runner_log(&runner, "glusterd", GF_LOG_ERROR, "command failed"); \
+ runner_end(&runner); \
+ goto out; \
+ } \
+ runner_end(&runner); \
+ } while (0)
+{
+ int ret = 0;
+ runner_t runner = {
+ 0,
+ };
+ char georepdir[PATH_MAX] = {
+ 0,
+ };
+ int valid_state = 0;
+
+ valid_state = -1;
+ ret = glusterd_check_gsync_present(&valid_state);
+ if (-1 == ret) {
+ ret = valid_state;
+ goto out;
+ }
+
+ ret = snprintf(georepdir, sizeof(georepdir) - 1, "%s/" GEOREP,
+ conf->workdir);
+ georepdir[ret] = '\0';
+
+ /************
+ * master pre-configuration
+ ************/
+
+ /* remote-gsyncd */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_args(&runner, "remote-gsyncd", GSYNCD_PREFIX "/gsyncd", ".", ".",
+ NULL);
+ RUN_GSYNCD_CMD;
+
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_args(&runner, "remote-gsyncd", "/nonexistent/gsyncd", ".",
+ "^ssh:", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-command-dir */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_args(&runner, "gluster-command-dir", SBIN_DIR "/", ".", ".",
+ NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-params */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_args(&runner, "gluster-params", "aux-gfid-mount acl", ".", ".",
+ NULL);
+ RUN_GSYNCD_CMD;
+
+ /* ssh-command */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_arg(&runner, "ssh-command");
+ runner_argprintf(&runner,
+ "ssh -oPasswordAuthentication=no "
+ "-oStrictHostKeyChecking=no "
+ "-i %s/secret.pem",
+ georepdir);
+ runner_add_args(&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* ssh-command tar */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_arg(&runner, "ssh-command-tar");
+ runner_argprintf(&runner,
+ "ssh -oPasswordAuthentication=no "
+ "-oStrictHostKeyChecking=no "
+ "-i %s/tar_ssh.pem",
+ georepdir);
+ runner_add_args(&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* pid-file */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_arg(&runner, "pid-file");
+ runner_argprintf(&runner,
+ "%s/${mastervol}_${remotehost}_${slavevol}/monitor.pid",
+ georepdir);
+ runner_add_args(&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* geo-rep-working-dir */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_arg(&runner, "georep-session-working-dir");
+ runner_argprintf(&runner, "%s/${mastervol}_${remotehost}_${slavevol}/",
+ georepdir);
+ runner_add_args(&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* state-file */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_arg(&runner, "state-file");
+ runner_argprintf(&runner,
+ "%s/${mastervol}_${remotehost}_${slavevol}/monitor.status",
+ georepdir);
+ runner_add_args(&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* state-detail-file */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_arg(&runner, "state-detail-file");
+ runner_argprintf(
+ &runner,
+ "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}-detail.status",
+ georepdir);
+ runner_add_args(&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* state-socket */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_arg(&runner, "state-socket-unencoded");
+ runner_argprintf(
+ &runner, "%s/${mastervol}_${remotehost}_${slavevol}/${eSlave}.socket",
+ georepdir);
+ runner_add_args(&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* socketdir */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_args(&runner, "socketdir", GLUSTERD_SOCK_DIR, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* log-file */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_arg(&runner, "log-file");
+ runner_argprintf(&runner, "%s/%s/${mastervol}/${eSlave}.log", conf->logdir,
+ GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* changelog-log-file */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_arg(&runner, "changelog-log-file");
+ runner_argprintf(&runner,
+ "%s/%s/${mastervol}/${eSlave}${local_id}-changes.log",
+ conf->logdir, GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-log-file */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_arg(&runner, "gluster-log-file");
+ runner_argprintf(&runner,
+ "%s/%s/${mastervol}/${eSlave}${local_id}.gluster.log",
+ conf->logdir, GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* ignore-deletes */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_args(&runner, "ignore-deletes", "false", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* special-sync-mode */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_args(&runner, "special-sync-mode", "partial", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* change-detector == changelog */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_args(&runner, "change-detector", "changelog", ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_arg(&runner, "working-dir");
+ runner_argprintf(&runner, "%s/${mastervol}/${eSlave}",
+ DEFAULT_GLUSTERFSD_MISC_DIRETORY);
+ runner_add_args(&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /************
+ * slave pre-configuration
+ ************/
+
+ /* slave-gluster-command-dir */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_args(&runner, "slave-gluster-command-dir", SBIN_DIR "/", ".",
+ NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-params */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_args(&runner, "gluster-params", "aux-gfid-mount acl", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* log-file */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_arg(&runner, "log-file");
+ runner_argprintf(&runner,
+ "%s/%s-slaves/"
+ "${session_owner}:${local_node}${local_id}.${slavevol}."
+ "log",
+ conf->logdir, GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* MountBroker log-file */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_arg(&runner, "log-file-mbr");
+ runner_argprintf(&runner,
+ "%s/%s-slaves/mbr/"
+ "${session_owner}:${local_node}${local_id}.${slavevol}."
+ "log",
+ conf->logdir, GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
+
+ /* gluster-log-file */
+ runinit_gsyncd_setrx(&runner, conf_path);
+ runner_add_arg(&runner, "gluster-log-file");
+ runner_argprintf(&runner,
+ "%s/%s-slaves/"
+ "${session_owner}:${local_node}${local_id}.${slavevol}."
+ "gluster.log",
+ conf->logdir, GEOREP);
+ runner_add_args(&runner, ".", ".", NULL);
+ RUN_GSYNCD_CMD;
- if (uuid_compare (MY_UUID, uuid) != 0) {
- goto out;
+out:
+ return ret ? -1 : 0;
+}
+
+static int
+glusterd_create_essential_dir_files(glusterd_volinfo_t *volinfo, dict_t *dict,
+ char *slave, char *slave_host,
+ char *slave_vol, char **op_errstr)
+{
+ int ret = -1;
+ char *conf_path = NULL;
+ char *statefile = NULL;
+ char buf[PATH_MAX] = "";
+ char errmsg[PATH_MAX] = "";
+ glusterd_conf_t *conf = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ conf = this->private;
+
+ ret = dict_get_str(dict, "conf_path", &conf_path);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg), "Unable to fetch conf file path.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ errmsg);
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "statefile", &statefile);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg), "Unable to fetch statefile path.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ errmsg);
+ goto out;
+ }
+
+ ret = snprintf(buf, sizeof(buf), "%s/" GEOREP "/%s_%s_%s", conf->workdir,
+ volinfo->volname, slave_host, slave_vol);
+ if ((ret < 0) || (ret >= sizeof(buf))) {
+ ret = -1;
+ goto out;
+ }
+ ret = mkdir_p(buf, 0755, _gf_true);
+ if (ret) {
+ len = snprintf(errmsg, sizeof(errmsg),
+ "Unable to create %s"
+ ". Error : %s",
+ buf, strerror(errno));
+ if (len < 0) {
+ strcpy(errmsg, "<error>");
+ }
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, "%s",
+ errmsg);
+ goto out;
+ }
+
+ ret = snprintf(buf, PATH_MAX, "%s/" GEOREP "/%s", conf->logdir,
+ volinfo->volname);
+ if ((ret < 0) || (ret >= PATH_MAX)) {
+ ret = -1;
+ goto out;
+ }
+ ret = mkdir_p(buf, 0755, _gf_true);
+ if (ret) {
+ len = snprintf(errmsg, sizeof(errmsg),
+ "Unable to create %s"
+ ". Error : %s",
+ buf, strerror(errno));
+ if (len < 0) {
+ strcpy(errmsg, "<error>");
+ }
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, "%s",
+ errmsg);
+ goto out;
+ }
+
+ ret = sys_lstat(conf_path, &stbuf);
+ if (!ret) {
+ gf_msg_debug(this->name, 0,
+ "Session already running."
+ " Not creating config file again.");
+ } else {
+ ret = create_conf_file(conf, conf_path);
+ if (ret || sys_lstat(conf_path, &stbuf)) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Failed to create"
+ " config file(%s).",
+ conf_path);
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, "%s",
+ errmsg);
+ goto out;
+ }
+ }
+
+ ret = sys_lstat(statefile, &stbuf);
+ if (!ret) {
+ gf_msg_debug(this->name, 0,
+ "Session already running."
+ " Not creating status file again.");
+ goto out;
+ } else {
+ ret = glusterd_create_status_file(volinfo->volname, slave, slave_host,
+ slave_vol, "Created");
+ if (ret || sys_lstat(statefile, &stbuf)) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Unable to create %s"
+ ". Error : %s",
+ statefile, strerror(errno));
+ *op_errstr = gf_strdup(errmsg);
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED, "%s",
+ errmsg);
+ ret = -1;
+ goto out;
+ }
+ }
+
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_op_gsync_create(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ char common_pem_file[PATH_MAX] = "";
+ char errmsg[PATH_MAX] = {
+ 0,
+ };
+ char hooks_args[PATH_MAX] = "";
+ char uuid_str[64] = "";
+ char *host_uuid = NULL;
+ char *slave_url = NULL;
+ char *slave_url_buf = NULL;
+ char *slave_user = NULL;
+ char *slave_ip = NULL;
+ char *save_ptr = NULL;
+ char *slave_host = NULL;
+ char *slave_vol = NULL;
+ char *arg_buf = NULL;
+ char *volname = NULL;
+ char *slave = NULL;
+ int32_t ret = -1;
+ int32_t is_pem_push = -1;
+ int32_t ssh_port = 22;
+ gf_boolean_t is_force = -1;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char old_working_dir[PATH_MAX] = {0};
+ char new_working_dir[PATH_MAX] = {0};
+ char *slave_voluuid = NULL;
+ char *old_slavehost = NULL;
+ gf_boolean_t is_existing_session = _gf_false;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+
+ ret = glusterd_op_gsync_args_get(dict, op_errstr, &volname, &slave,
+ &host_uuid);
+ if (ret)
+ goto out;
+
+ len = snprintf(common_pem_file, sizeof(common_pem_file),
+ "%s" GLUSTERD_COMMON_PEM_PUB_FILE, conf->workdir);
+ if ((len < 0) || (len >= sizeof(common_pem_file))) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Volinfo for %s (master) not found", volname);
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "slave_vol", &slave_vol);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg), "Unable to fetch slave volume name.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ errmsg);
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "slave_url", &slave_url);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg), "Unable to fetch slave IP.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ /* Fetch the slave_user and slave_ip from the slave_url.
+ * If the slave_user is not present. Use "root"
+ */
+ if (strstr(slave_url, "@")) {
+ slave_url_buf = gf_strdup(slave_url);
+ if (!slave_url_buf) {
+ ret = -1;
+ goto out;
+ }
+ slave_user = strtok_r(slave_url, "@", &save_ptr);
+ slave_ip = strtok_r(NULL, "@", &save_ptr);
+ } else {
+ slave_user = "root";
+ slave_ip = slave_url;
+ }
+
+ if (!slave_user || !slave_ip) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVE_URL_INVALID,
+ "Invalid slave url.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "slave_host", &slave_host);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg), "Unable to fetch slave host");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, "ssh_port", &ssh_port);
+ if (ret < 0 && ret != -ENOENT) {
+ snprintf(errmsg, sizeof(errmsg), "Fetching ssh_port failed");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ is_force = dict_get_str_boolean(dict, "force", _gf_false);
+
+ uuid_utoa_r(MY_UUID, uuid_str);
+ if (!strcmp(uuid_str, host_uuid)) {
+ ret = dict_get_int32(dict, "push_pem", &is_pem_push);
+ if (!ret && is_pem_push) {
+ gf_msg_debug(this->name, 0,
+ "Trying to setup"
+ " pem files in slave");
+ is_pem_push = 1;
+ } else
+ is_pem_push = 0;
+
+ len = snprintf(hooks_args, sizeof(hooks_args),
+ "is_push_pem=%d,pub_file=%s,slave_user=%s,"
+ "slave_ip=%s,slave_vol=%s,ssh_port=%d",
+ is_pem_push, common_pem_file, slave_user, slave_ip,
+ slave_vol, ssh_port);
+ if ((len < 0) || (len >= sizeof(hooks_args))) {
+ ret = -1;
+ goto out;
+ }
+ } else
+ snprintf(hooks_args, sizeof(hooks_args),
+ "This argument will stop the hooks script");
+
+ arg_buf = gf_strdup(hooks_args);
+ if (!arg_buf) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STRDUP_FAILED,
+ "Failed to gf_strdup");
+ if (is_force) {
+ ret = 0;
+ goto create_essentials;
+ }
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_str(dict, "hooks_args", arg_buf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set hooks_args in dict.");
+ if (is_force) {
+ ret = 0;
+ goto create_essentials;
+ }
+ goto out;
+ }
+
+create_essentials:
+ /* Fetch slave volume uuid, to get stored in volume info. */
+ ret = dict_get_str(dict, "slave_voluuid", &slave_voluuid);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Unable to fetch slave volume uuid from dict");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ is_existing_session = dict_get_str_boolean(dict, "existing_session",
+ _gf_false);
+ if (is_existing_session) {
+ ret = dict_get_str(dict, "old_slavehost", &old_slavehost);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg), "Unable to fetch old_slavehost");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ errmsg);
+ ret = -1;
+ goto out;
+ }
+
+ /* Rename existing geo-rep session with new Slave Host */
+ ret = snprintf(old_working_dir, sizeof(old_working_dir) - 1,
+ "%s/" GEOREP "/%s_%s_%s", conf->workdir,
+ volinfo->volname, old_slavehost, slave_vol);
+
+ ret = snprintf(new_working_dir, sizeof(new_working_dir) - 1,
+ "%s/" GEOREP "/%s_%s_%s", conf->workdir,
+ volinfo->volname, slave_host, slave_vol);
+
+ ret = sys_rename(old_working_dir, new_working_dir);
+ if (!ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_FORCE_CREATE_SESSION,
+ "rename of old working dir %s to "
+ "new working dir %s is done! ",
+ old_working_dir, new_working_dir);
+ } else {
+ if (errno == ENOENT) {
+ /* log error, but proceed with directory
+ * creation below */
+ gf_msg_debug(this->name, 0,
+ "old_working_dir(%s) "
+ "not present.",
+ old_working_dir);
+ } else {
+ len = snprintf(errmsg, sizeof(errmsg),
+ "rename of old working dir %s "
+ "to new working dir %s "
+ "failed! Error: %s",
+ old_working_dir, new_working_dir,
+ strerror(errno));
+ if (len < 0) {
+ strcpy(errmsg, "<error>");
}
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_FORCE_CREATE_SESSION,
+ "rename of old working dir %s to "
+ "new working dir %s failed! Error: %s!",
+ old_working_dir, new_working_dir, strerror(errno));
- ret = stop_gsync (volname, slave, &status_msg);
- if (ret == 0 && status_msg)
- ret = dict_set_str (resp_dict, "gsync-status",
- status_msg);
- if (ret != 0)
- *op_errstr = gf_strdup ("internal error");
+ ret = -1;
+ goto out;
+ }
}
+ }
+
+ ret = glusterd_create_essential_dir_files(volinfo, dict, slave, slave_host,
+ slave_vol, op_errstr);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_slave_in_info(volinfo, slave, host_uuid, slave_voluuid,
+ op_errstr, is_force);
+ if (ret) {
+ snprintf(errmsg, sizeof(errmsg),
+ "Unable to store"
+ " slave info.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SLAVEINFO_STORE_ERROR, "%s",
+ errmsg);
+ goto out;
+ }
+
+ /* Enable marker and changelog */
+ ret = glusterd_set_gsync_confs(volinfo);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_MARKER_START_FAIL,
+ "marker/changelog"
+ " start failed");
+ snprintf(errmsg, sizeof(errmsg), "Index initialization failed");
+
+ ret = -1;
+ goto out;
+ }
out:
- gf_log ("", GF_LOG_DEBUG,"Returning %d", ret);
- return ret;
+ if (ret && errmsg[0] != '\0') {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GSYNCD_ERROR, "%s", errmsg);
+ *op_errstr = gf_strdup(errmsg);
+ }
+
+ GF_FREE(slave_url_buf);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-geo-rep.h b/xlators/mgmt/glusterd/src/glusterd-geo-rep.h
new file mode 100644
index 00000000000..7d1318f522c
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-geo-rep.h
@@ -0,0 +1,52 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_GEO_REP_H_
+#define _GLUSTERD_GEO_REP_H_
+
+#ifndef GSYNC_CONF_TEMPLATE
+#define GSYNC_CONF_TEMPLATE GEOREP "/gsyncd_template.conf"
+#endif
+
+/* <slave host>::<slave volume> */
+#define SLAVE_URL_INFO_MAX (_POSIX_HOST_NAME_MAX + GD_VOLUME_NAME_MAX + 3)
+
+/* slave info format:
+ * <master host uuid>:ssh://{<slave_user>@}<slave host>::<slave volume> \
+ * :<slave volume uuid> */
+#define VOLINFO_SLAVE_URL_MAX \
+ (LOGIN_NAME_MAX + (2 * GF_UUID_BUF_SIZE) + SLAVE_URL_INFO_MAX + 10)
+
+typedef struct glusterd_gsync_status_temp {
+ dict_t *rsp_dict;
+ glusterd_volinfo_t *volinfo;
+ char *node;
+} glusterd_gsync_status_temp_t;
+
+typedef struct gsync_status_param {
+ glusterd_volinfo_t *volinfo;
+ int is_active;
+} gsync_status_param_t;
+
+int
+gsync_status(char *master, char *slave, char *conf_path, int *status,
+ gf_boolean_t *is_template_in_use);
+
+void
+glusterd_check_geo_rep_configured(glusterd_volinfo_t *volinfo,
+ gf_boolean_t *flag);
+int
+_get_slave_status(dict_t *dict, char *key, data_t *value, void *data);
+int
+glusterd_check_geo_rep_running(gsync_status_param_t *param, char **op_errstr);
+
+int
+glusterd_get_gsync_status_mst(glusterd_volinfo_t *volinfo, dict_t *rsp_dict,
+ char *node);
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c
new file mode 100644
index 00000000000..319bfa140f3
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.c
@@ -0,0 +1,235 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-gfproxyd-svc-helper.h"
+#include "glusterd-messages.h"
+#include <glusterfs/syscall.h>
+#include "glusterd-volgen.h"
+
+void
+glusterd_svc_build_gfproxyd_rundir(glusterd_volinfo_t *volinfo, char *path,
+ int path_len)
+{
+ char workdir[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = THIS->private;
+
+ GLUSTERD_GET_VOLUME_PID_DIR(workdir, volinfo, priv);
+
+ snprintf(path, path_len, "%s", workdir);
+}
+
+void
+glusterd_svc_build_gfproxyd_socket_filepath(glusterd_volinfo_t *volinfo,
+ char *path, int path_len)
+{
+ char sockfilepath[PATH_MAX] = {
+ 0,
+ };
+ char rundir[PATH_MAX] = {
+ 0,
+ };
+ int32_t len = 0;
+
+ glusterd_svc_build_gfproxyd_rundir(volinfo, rundir, sizeof(rundir));
+ len = snprintf(sockfilepath, sizeof(sockfilepath), "%s/run-%s", rundir,
+ uuid_utoa(MY_UUID));
+ if ((len < 0) || (len >= sizeof(sockfilepath))) {
+ sockfilepath[0] = 0;
+ }
+
+ glusterd_set_socket_filepath(sockfilepath, path, path_len);
+}
+
+void
+glusterd_svc_build_gfproxyd_pidfile(glusterd_volinfo_t *volinfo, char *path,
+ int path_len)
+{
+ char rundir[PATH_MAX] = {
+ 0,
+ };
+
+ glusterd_svc_build_gfproxyd_rundir(volinfo, rundir, sizeof(rundir));
+
+ snprintf(path, path_len, "%s/%s.gfproxyd.pid", rundir, volinfo->volname);
+}
+
+void
+glusterd_svc_build_gfproxyd_volfile_path(glusterd_volinfo_t *volinfo,
+ char *path, int path_len)
+{
+ char workdir[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = THIS->private;
+
+ GLUSTERD_GET_VOLUME_DIR(workdir, volinfo, priv);
+
+ snprintf(path, path_len, "%s/%s.gfproxyd.vol", workdir, volinfo->volname);
+}
+
+void
+glusterd_svc_build_gfproxyd_logdir(char *logdir, char *volname, size_t len)
+{
+ glusterd_conf_t *conf = THIS->private;
+ snprintf(logdir, len, "%s/gfproxy/%s", conf->logdir, volname);
+}
+
+void
+glusterd_svc_build_gfproxyd_logfile(char *logfile, char *logdir, size_t len)
+{
+ snprintf(logfile, len, "%s/gfproxyd.log", logdir);
+}
+
+int
+glusterd_is_gfproxyd_enabled(glusterd_volinfo_t *volinfo)
+{
+ return glusterd_volinfo_get_boolean(volinfo, VKEY_CONFIG_GFPROXY);
+}
+
+static int
+glusterd_svc_get_gfproxyd_volfile(glusterd_volinfo_t *volinfo, char *svc_name,
+ char *orgvol, char **tmpvol, int path_len)
+{
+ int tmp_fd = -1;
+ int ret = -1;
+ int need_unlink = 0;
+
+ glusterd_svc_build_gfproxyd_volfile_path(volinfo, orgvol, path_len);
+
+ ret = gf_asprintf(tmpvol, "/tmp/g%s-XXXXXX", svc_name);
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */
+ tmp_fd = mkstemp(*tmpvol);
+ if (tmp_fd < 0) {
+ gf_msg("glusterd", GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+ "Unable to create temp file"
+ " %s:(%s)",
+ *tmpvol, strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ need_unlink = 1;
+ ret = glusterd_build_gfproxyd_volfile(volinfo, *tmpvol);
+out:
+ if (need_unlink && ret < 0)
+ sys_unlink(*tmpvol);
+
+ if ((ret < 0) && (*tmpvol != NULL)) {
+ GF_FREE(*tmpvol);
+ *tmpvol = NULL;
+ }
+
+ if (tmp_fd >= 0)
+ sys_close(tmp_fd);
+
+ return ret;
+}
+
+int
+glusterd_svc_check_gfproxyd_volfile_identical(char *svc_name,
+ glusterd_volinfo_t *volinfo,
+ gf_boolean_t *identical)
+{
+ char orgvol[PATH_MAX] = {
+ 0,
+ };
+ char *tmpvol = NULL;
+ int ret = -1;
+ int need_unlink = 0;
+
+ GF_VALIDATE_OR_GOTO("glusterd", identical, out);
+
+ ret = glusterd_svc_get_gfproxyd_volfile(volinfo, svc_name, orgvol, &tmpvol,
+ PATH_MAX);
+ if (ret)
+ goto out;
+
+ need_unlink = 1;
+ ret = glusterd_check_files_identical(orgvol, tmpvol, identical);
+ if (ret)
+ goto out;
+
+out:
+ if (need_unlink)
+ sys_unlink(tmpvol);
+
+ if (tmpvol != NULL)
+ GF_FREE(tmpvol);
+
+ return ret;
+}
+
+int
+glusterd_svc_check_gfproxyd_topology_identical(char *svc_name,
+ glusterd_volinfo_t *volinfo,
+ gf_boolean_t *identical)
+{
+ char orgvol[PATH_MAX] = {
+ 0,
+ };
+ char *tmpvol = NULL;
+ int ret = -1;
+ int tmpclean = 0;
+
+ GF_VALIDATE_OR_GOTO("glusterd", identical, out);
+
+ ret = glusterd_svc_get_gfproxyd_volfile(volinfo, svc_name, orgvol, &tmpvol,
+ PATH_MAX);
+ if (ret)
+ goto out;
+
+ tmpclean = 1; /* SET the flag to unlink() tmpfile */
+
+ /* Compare the topology of volfiles */
+ ret = glusterd_check_topology_identical(orgvol, tmpvol, identical);
+out:
+ if (tmpclean)
+ sys_unlink(tmpvol);
+
+ if (tmpvol != NULL)
+ GF_FREE(tmpvol);
+
+ return ret;
+}
+
+glusterd_volinfo_t *
+glusterd_gfproxyd_volinfo_from_svc(glusterd_svc_t *svc)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_gfproxydsvc_t *gfproxyd = NULL;
+
+ /* Get volinfo->gfproxyd from svc object */
+ gfproxyd = cds_list_entry(svc, glusterd_gfproxydsvc_t, svc);
+ if (!gfproxyd) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SNAPD_OBJ_GET_FAIL,
+ "Failed to get gfproxyd "
+ "object from gfproxyd service");
+ goto out;
+ }
+
+ /* Get volinfo from gfproxyd */
+ volinfo = cds_list_entry(gfproxyd, glusterd_volinfo_t, gfproxyd);
+ if (!volinfo) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get volinfo from "
+ "from gfproxyd");
+ goto out;
+ }
+out:
+ return volinfo;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.h
new file mode 100644
index 00000000000..3aca218a65d
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc-helper.h
@@ -0,0 +1,51 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_GFPROXYD_SVC_HELPER_H_
+#define _GLUSTERD_GFPROXYD_SVC_HELPER_H_
+
+#include "glusterd.h"
+
+void
+glusterd_svc_build_gfproxyd_rundir(glusterd_volinfo_t *volinfo, char *path,
+ int path_len);
+
+void
+glusterd_svc_build_gfproxyd_socket_filepath(glusterd_volinfo_t *volinfo,
+ char *path, int path_len);
+
+void
+glusterd_svc_build_gfproxyd_pidfile(glusterd_volinfo_t *volinfo, char *path,
+ int path_len);
+
+void
+glusterd_svc_build_gfproxyd_volfile_path(glusterd_volinfo_t *volinfo,
+ char *path, int path_len);
+
+void
+glusterd_svc_build_gfproxyd_logdir(char *logdir, char *volname, size_t len);
+
+void
+glusterd_svc_build_gfproxyd_logfile(char *logfile, char *logdir, size_t len);
+
+int
+glusterd_svc_check_gfproxyd_volfile_identical(char *svc_name,
+ glusterd_volinfo_t *volinfo,
+ gf_boolean_t *identical);
+int
+glusterd_svc_check_gfproxyd_topology_identical(char *svc_name,
+ glusterd_volinfo_t *volinfo,
+ gf_boolean_t *identical);
+int
+glusterd_is_gfproxyd_enabled(glusterd_volinfo_t *volinfo);
+
+glusterd_volinfo_t *
+glusterd_gfproxyd_volinfo_from_svc(glusterd_svc_t *svc);
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
new file mode 100644
index 00000000000..a0bfea41f0f
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.c
@@ -0,0 +1,478 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-gfproxyd-svc.h"
+#include "glusterd-messages.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-gfproxyd-svc-helper.h"
+#include <glusterfs/syscall.h>
+
+void
+glusterd_gfproxydsvc_build(glusterd_svc_t *svc)
+{
+ svc->manager = glusterd_gfproxydsvc_manager;
+ svc->start = glusterd_gfproxydsvc_start;
+ svc->stop = glusterd_gfproxydsvc_stop;
+ svc->reconfigure = glusterd_gfproxydsvc_reconfigure;
+}
+
+int
+glusterd_gfproxydsvc_stop(glusterd_svc_t *svc, int sig)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+
+ ret = glusterd_svc_stop(svc, sig);
+ if (ret)
+ goto out;
+
+ volinfo = glusterd_gfproxyd_volinfo_from_svc(svc);
+ volinfo->gfproxyd.port = 0;
+
+out:
+ return ret;
+}
+
+int
+glusterd_gfproxydsvc_init(glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ char rundir[PATH_MAX] = {
+ 0,
+ };
+ char sockpath[PATH_MAX] = {
+ 0,
+ };
+ char pidfile[PATH_MAX] = {
+ 0,
+ };
+ char volfile[PATH_MAX] = {
+ 0,
+ };
+ char logdir[PATH_MAX] = {
+ 0,
+ };
+ char logfile[PATH_MAX] = {
+ 0,
+ };
+ char volfileid[256] = {0};
+ glusterd_svc_t *svc = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_conn_notify_t notify = NULL;
+ xlator_t *this = NULL;
+ char *volfileserver = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ svc = &(volinfo->gfproxyd.svc);
+
+ ret = snprintf(svc->name, sizeof(svc->name), "%s", gfproxyd_svc_name);
+ if (ret < 0)
+ goto out;
+
+ notify = glusterd_svc_common_rpc_notify;
+
+ glusterd_svc_build_gfproxyd_rundir(volinfo, rundir, sizeof(rundir));
+ glusterd_svc_create_rundir(rundir);
+
+ /* Initialize the connection mgmt */
+ glusterd_svc_build_gfproxyd_socket_filepath(volinfo, sockpath,
+ sizeof(sockpath));
+ ret = glusterd_conn_init(&(svc->conn), sockpath, 600, notify);
+ if (ret)
+ goto out;
+
+ /* Initialize the process mgmt */
+ glusterd_svc_build_gfproxyd_pidfile(volinfo, pidfile, sizeof(pidfile));
+ glusterd_svc_build_gfproxyd_volfile_path(volinfo, volfile, sizeof(volfile));
+ glusterd_svc_build_gfproxyd_logdir(logdir, volinfo->volname,
+ sizeof(logdir));
+ ret = mkdir_p(logdir, 0755, _gf_true);
+ if ((ret == -1) && (EEXIST != errno)) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create logdir %s", logdir);
+ goto out;
+ }
+ glusterd_svc_build_gfproxyd_logfile(logfile, logdir, sizeof(logfile));
+ len = snprintf(volfileid, sizeof(volfileid), "gfproxyd/%s",
+ volinfo->volname);
+ if ((len < 0) || (len >= sizeof(volfileid))) {
+ ret = -1;
+ goto out;
+ }
+
+ if (dict_get_strn(this->options, "transport.socket.bind-address",
+ SLEN("transport.socket.bind-address"),
+ &volfileserver) != 0) {
+ volfileserver = "localhost";
+ }
+ ret = glusterd_proc_init(&(svc->proc), gfproxyd_svc_name, pidfile, logdir,
+ logfile, volfile, volfileid, volfileserver);
+ if (ret)
+ goto out;
+
+out:
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_gfproxydsvc_create_volfile(glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ ret = glusterd_generate_gfproxyd_volfile(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to create volfile");
+ goto out;
+ }
+
+out:
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_gfproxydsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+{
+ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ volinfo = data;
+ GF_VALIDATE_OR_GOTO(this->name, data, out);
+
+ if (!svc->inited) {
+ ret = glusterd_gfproxydsvc_init(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_QUOTASVC,
+ "Failed to init "
+ "gfproxyd service");
+ goto out;
+ } else {
+ svc->inited = _gf_true;
+ gf_msg_debug(this->name, 0,
+ "gfproxyd service "
+ "initialized");
+ }
+ }
+
+ ret = glusterd_is_gfproxyd_enabled(volinfo);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to read volume "
+ "options");
+ goto out;
+ }
+
+ if (ret) {
+ if (!glusterd_is_volume_started(volinfo)) {
+ if (glusterd_proc_is_running(&svc->proc)) {
+ ret = svc->stop(svc, SIGTERM);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_STOP_FAIL,
+ "Couldn't stop gfproxyd for "
+ "volume: %s",
+ volinfo->volname);
+ } else {
+ /* Since gfproxyd is not running set ret to 0 */
+ ret = 0;
+ }
+ goto out;
+ }
+
+ ret = glusterd_gfproxydsvc_create_volfile(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_CREATE_FAIL,
+ "Couldn't create "
+ "gfroxyd volfile for volume: %s",
+ volinfo->volname);
+ goto out;
+ }
+ ret = svc->stop(svc, SIGTERM);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_START_FAIL,
+ "Couldn't stop "
+ "gfproxyd for volume: %s",
+ volinfo->volname);
+ goto out;
+ }
+
+ ret = svc->start(svc, flags);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_START_FAIL,
+ "Couldn't start "
+ "gfproxyd for volume: %s",
+ volinfo->volname);
+ goto out;
+ }
+
+ glusterd_volinfo_ref(volinfo);
+ ret = glusterd_conn_connect(&(svc->conn));
+ if (ret) {
+ glusterd_volinfo_unref(volinfo);
+ volinfo = NULL;
+ goto out;
+ }
+
+ } else if (glusterd_proc_is_running(&svc->proc)) {
+ ret = svc->stop(svc, SIGTERM);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_STOP_FAIL,
+ "Couldn't stop gfproxyd for volume: %s", volinfo->volname);
+ goto out;
+ }
+ }
+
+out:
+ if (ret) {
+ if (volinfo) {
+ gf_event(EVENT_SVC_MANAGER_FAILED, "volume=%s;svc_name=%s",
+ volinfo->volname, svc->name);
+ }
+ }
+
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_gfproxydsvc_start(glusterd_svc_t *svc, int flags)
+{
+ int ret = -1;
+ runner_t runner = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ char valgrind_logfile[PATH_MAX] = {0};
+ int gfproxyd_port = 0;
+ char msg[1024] = {
+ 0,
+ };
+ char gfproxyd_id[PATH_MAX] = {
+ 0,
+ };
+ glusterd_volinfo_t *volinfo = NULL;
+ char *localtime_logging = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ volinfo = glusterd_gfproxyd_volinfo_from_svc(svc);
+ if (!volinfo)
+ goto out;
+
+ ret = sys_access(svc->proc.volfile, F_OK);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_DEBUG, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "gfproxyd Volfile %s is not present", svc->proc.volfile);
+ ret = glusterd_gfproxydsvc_create_volfile(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Couldn't create "
+ "gfproxyd volfile for volume: %s",
+ volinfo->volname);
+ goto out;
+ }
+ }
+ runinit(&runner);
+
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
+ len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s",
+ svc->proc.logdir, svc->proc.logfile);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ ret = -1;
+ goto out;
+ }
+
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
+ runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
+ }
+
+ snprintf(gfproxyd_id, sizeof(gfproxyd_id), "gfproxyd-%s", volinfo->volname);
+ runner_add_args(&runner, SBIN_DIR "/glusterfsd", "-s",
+ svc->proc.volfileserver, "--volfile-id",
+ svc->proc.volfileid, "-p", svc->proc.pidfile, "-l",
+ svc->proc.logfile, "--brick-name", gfproxyd_id, "-S",
+ svc->conn.sockpath, NULL);
+
+ if (volinfo->memory_accounting)
+ runner_add_arg(&runner, "--mem-accounting");
+ if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY,
+ SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY),
+ &localtime_logging) == 0) {
+ if (strcmp(localtime_logging, "enable") == 0)
+ runner_add_arg(&runner, "--localtime-logging");
+ }
+
+ gfproxyd_port = pmap_assign_port(this, volinfo->gfproxyd.port, gfproxyd_id);
+ volinfo->gfproxyd.port = gfproxyd_port;
+
+ runner_add_arg(&runner, "--brick-port");
+ runner_argprintf(&runner, "%d", gfproxyd_port);
+ runner_add_arg(&runner, "--xlator-option");
+ runner_argprintf(&runner, "%s-server.listen-port=%d", volinfo->volname,
+ gfproxyd_port);
+
+ snprintf(msg, sizeof(msg), "Starting the gfproxyd service for volume %s",
+ volinfo->volname);
+ runner_log(&runner, this->name, GF_LOG_DEBUG, msg);
+
+ if (flags == PROC_START_NO_WAIT) {
+ ret = runner_run_nowait(&runner);
+ } else {
+ synclock_unlock(&priv->big_lock);
+ {
+ ret = runner_run(&runner);
+ }
+ synclock_lock(&priv->big_lock);
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_gfproxydsvc_restart()
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp = NULL;
+ int ret = -1;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+ glusterd_svc_t *svc = NULL;
+
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list)
+ {
+ /* Start per volume gfproxyd svc */
+ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+ svc = &(volinfo->gfproxyd.svc);
+ ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_START_FAIL,
+ "Couldn't resolve gfproxyd for "
+ "vol: %s on restart",
+ volinfo->volname);
+ gf_event(EVENT_SVC_MANAGER_FAILED, "volume=%s;svc_name=%s",
+ volinfo->volname, svc->name);
+ goto out;
+ }
+ }
+ }
+out:
+ return ret;
+}
+
+int
+glusterd_gfproxydsvc_reconfigure(void *data)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ gf_boolean_t identical = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ volinfo = data;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ if (!volinfo->gfproxyd.svc.inited)
+ goto manager;
+
+ if (!glusterd_is_gfproxyd_enabled(volinfo))
+ goto manager;
+ else if (!glusterd_proc_is_running(&volinfo->gfproxyd.svc.proc))
+ goto manager;
+
+ /*
+ * Check both OLD and NEW volfiles, if they are SAME by size
+ * and cksum i.e. "character-by-character". If YES, then
+ * NOTHING has been changed, just return.
+ */
+ ret = glusterd_svc_check_gfproxyd_volfile_identical(
+ volinfo->gfproxyd.svc.name, volinfo, &identical);
+ if (ret)
+ goto out;
+
+ if (identical) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * They are not identical. Find out if the topology is changed
+ * OR just the volume options. If just the options which got
+ * changed, then inform the xlator to reconfigure the options.
+ */
+ identical = _gf_false; /* RESET the FLAG */
+ ret = glusterd_svc_check_gfproxyd_topology_identical(
+ volinfo->gfproxyd.svc.name, volinfo, &identical);
+ if (ret)
+ goto out;
+
+ /* Topology is not changed, but just the options. But write the
+ * options to gfproxyd volfile, so that gfproxyd will be reconfigured.
+ */
+ if (identical) {
+ ret = glusterd_gfproxydsvc_create_volfile(volinfo);
+ if (ret == 0) { /* Only if above PASSES */
+ ret = glusterd_fetchspec_notify(this);
+ }
+ goto out;
+ }
+manager:
+ /*
+ * gfproxyd volfile's topology has been changed. gfproxyd server needs
+ * to be RESTARTED to ACT on the changed volfile.
+ */
+ ret = volinfo->gfproxyd.svc.manager(&(volinfo->gfproxyd.svc), volinfo,
+ PROC_START_NO_WAIT);
+
+out:
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h
new file mode 100644
index 00000000000..d396b4015f3
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-gfproxyd-svc.h
@@ -0,0 +1,47 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_GFPROXYD_SVC_H_
+#define _GLUSTERD_GFPROXYD_SVC_H_
+
+#include "glusterd-svc-mgmt.h"
+
+#define gfproxyd_svc_name "gfproxyd"
+
+struct glusterd_gfproxydsvc_ {
+ glusterd_svc_t svc;
+ gf_store_handle_t *handle;
+ int port;
+};
+
+typedef struct glusterd_gfproxydsvc_ glusterd_gfproxydsvc_t;
+
+void
+glusterd_gfproxydsvc_build(glusterd_svc_t *svc);
+
+int
+glusterd_gfproxydsvc_manager(glusterd_svc_t *svc, void *data, int flags);
+
+int
+glusterd_gfproxydsvc_start(glusterd_svc_t *svc, int flags);
+
+int
+glusterd_gfproxydsvc_stop(glusterd_svc_t *svc, int sig);
+
+int
+glusterd_gfproxydsvc_reconfigure();
+
+void
+glusterd_gfproxydsvc_build_volfile_path(char *server, char *workdir,
+ char *volfile, size_t len);
+
+int
+glusterd_gfproxydsvc_restart();
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-handler.c b/xlators/mgmt/glusterd/src/glusterd-handler.c
index 51b859857b2..1b21c40596d 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handler.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handler.c
@@ -1,48 +1,38 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <inttypes.h>
-
-#include "globals.h"
-#include "glusterfs.h"
-#include "compat.h"
-#include "dict.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/dict.h>
#include "protocol-common.h"
-#include "xlator.h"
-#include "logging.h"
-#include "timer.h"
-#include "defaults.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "statedump.h"
-#include "run.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/timer.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/statedump.h>
+#include <glusterfs/run.h>
#include "glusterd-mem-types.h"
#include "glusterd.h"
#include "glusterd-sm.h"
#include "glusterd-op-sm.h"
#include "glusterd-utils.h"
+#include "glusterd-mgmt.h"
+#include "glusterd-server-quorum.h"
#include "glusterd-store.h"
+#include "glusterd-locks.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-geo-rep.h"
#include "glusterd1-xdr.h"
#include "cli1-xdr.h"
@@ -50,2967 +40,6674 @@
#include "rpc-clnt.h"
#include "glusterd-volgen.h"
#include "glusterd-mountbroker.h"
+#include "glusterd-messages.h"
+#include "glusterd-errno.h"
#include <sys/resource.h>
#include <inttypes.h>
-#include "defaults.c"
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
-#include "globals.h"
#include "glusterd-syncop.h"
+#include "glusterd-messages.h"
-static int
-glusterd_handle_friend_req (rpcsvc_request_t *req, uuid_t uuid,
- char *hostname, int port,
- gd1_mgmt_friend_req *friend_req)
+extern glusterd_op_info_t opinfo;
+static int volcount;
+
+int
+glusterd_big_locked_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data,
+ rpc_clnt_notify_t notify_fn)
{
- int ret = -1;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_friend_sm_event_t *event = NULL;
- glusterd_friend_req_ctx_t *ctx = NULL;
- char rhost[UNIX_PATH_MAX + 1] = {0};
- uuid_t friend_uuid = {0};
- dict_t *dict = NULL;
+ glusterd_conf_t *priv = THIS->private;
+ int ret = -1;
- uuid_parse (uuid_utoa (uuid), friend_uuid);
- if (!port)
- port = GF_DEFAULT_BASE_PORT;
+ synclock_lock(&priv->big_lock);
+ ret = notify_fn(rpc, mydata, event, data);
+ synclock_unlock(&priv->big_lock);
- ret = glusterd_remote_hostname_get (req, rhost, sizeof (rhost));
- ret = glusterd_friend_find (uuid, rhost, &peerinfo);
+ return ret;
+}
- if (ret) {
- ret = glusterd_xfer_friend_add_resp (req, rhost, port, -1,
- GF_PROBE_UNKNOWN_PEER);
- if (friend_req->vols.vols_val) {
- free (friend_req->vols.vols_val);
- friend_req->vols.vols_val = NULL;
- }
- goto out;
- }
+int
+glusterd_big_locked_handler(rpcsvc_request_t *req, rpcsvc_actor actor_fn)
+{
+ glusterd_conf_t *priv = THIS->private;
+ int ret = -1;
- ret = glusterd_friend_sm_new_event
- (GD_FRIEND_EVENT_RCVD_FRIEND_REQ, &event);
+ synclock_lock(&priv->big_lock);
+ ret = actor_fn(req);
+ synclock_unlock(&priv->big_lock);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "event generation failed: %d", ret);
- return ret;
- }
+ return ret;
+}
- event->peerinfo = peerinfo;
+static int
+glusterd_handle_friend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
+ int port, gd1_mgmt_friend_req *friend_req)
+{
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_friend_req_ctx_t *ctx = NULL;
+ char rhost[UNIX_PATH_MAX + 1] = {0};
+ dict_t *dict = NULL;
- ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_friend_req_ctx_t);
+ if (!port)
+ port = GF_DEFAULT_BASE_PORT;
- if (!ctx) {
- gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
- ret = -1;
- goto out;
- }
+ ret = glusterd_remote_hostname_get(req, rhost, sizeof(rhost));
- uuid_copy (ctx->uuid, uuid);
- if (hostname)
- ctx->hostname = gf_strdup (hostname);
- ctx->req = req;
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t);
+ dict = dict_new();
- dict = dict_new ();
- if (!dict) {
- ret = -1;
- goto out;
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find(uuid, rhost);
+
+ if (peerinfo == NULL) {
+ gf_event(EVENT_PEER_REJECT, "peer=%s", hostname);
+ ret = glusterd_xfer_friend_add_resp(req, hostname, rhost, port, -1,
+ GF_PROBE_UNKNOWN_PEER);
+ if (friend_req->vols.vols_val) {
+ free(friend_req->vols.vols_val);
+ friend_req->vols.vols_val = NULL;
}
+ goto out;
+ }
- ret = dict_unserialize (friend_req->vols.vols_val,
- friend_req->vols.vols_len,
- &dict);
+ ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_RCVD_FRIEND_REQ, &event);
- if (ret)
- goto out;
- else
- dict->extra_stdfree = friend_req->vols.vols_val;
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL,
+ "event generation failed: %d", ret);
+ goto out;
+ }
- ctx->vols = dict;
- event->ctx = ctx;
+ event->peername = gf_strdup(peerinfo->hostname);
+ gf_uuid_copy(event->peerid, peerinfo->uuid);
- ret = glusterd_friend_sm_inject_event (event);
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject event %d, "
- "ret = %d", event->event, ret);
- goto out;
- }
+ if (!ctx) {
+ gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Unable to allocate memory");
+ ret = -1;
+ goto out;
+ }
- ret = 0;
+ gf_uuid_copy(ctx->uuid, uuid);
+ if (hostname)
+ ctx->hostname = gf_strdup(hostname);
+ ctx->req = req;
+
+ if (!dict) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(friend_req->vols.vols_val, friend_req->vols.vols_len,
+ &dict);
+
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
+ goto out;
+ } else
+ dict->extra_stdfree = friend_req->vols.vols_val;
+
+ ctx->vols = dict;
+ event->ctx = ctx;
+
+ ret = glusterd_friend_sm_inject_event(event);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL,
+ "Unable to inject event %d, "
+ "ret = %d",
+ event->event, ret);
+ goto out;
+ }
+
+ ret = 0;
+ if (peerinfo && (0 == peerinfo->connected))
+ ret = GLUSTERD_CONNECTION_AWAITED;
out:
- if (0 != ret) {
- if (ctx && ctx->hostname)
- GF_FREE (ctx->hostname);
- if (ctx)
- GF_FREE (ctx);
- if (dict) {
- if ((!dict->extra_stdfree) &&
- friend_req->vols.vols_val)
- free (friend_req->vols.vols_val);
- dict_unref (dict);
- } else {
- if (friend_req->vols.vols_val)
- free (friend_req->vols.vols_val);
- }
- if (event)
- GF_FREE (event);
+ RCU_READ_UNLOCK;
+
+ if (ret && (ret != GLUSTERD_CONNECTION_AWAITED)) {
+ if (ctx && ctx->hostname)
+ GF_FREE(ctx->hostname);
+ GF_FREE(ctx);
+ if (dict) {
+ if ((!dict->extra_stdfree) && friend_req->vols.vols_val)
+ free(friend_req->vols.vols_val);
+ dict_unref(dict);
} else {
- if (peerinfo && (0 == peerinfo->connected))
- ret = GLUSTERD_CONNECTION_AWAITED;
+ free(friend_req->vols.vols_val);
}
- return ret;
+ if (event)
+ GF_FREE(event->peername);
+ GF_FREE(event);
+ }
+
+ return ret;
}
static int
-glusterd_handle_unfriend_req (rpcsvc_request_t *req, uuid_t uuid,
- char *hostname, int port)
+glusterd_handle_unfriend_req(rpcsvc_request_t *req, uuid_t uuid, char *hostname,
+ int port)
{
- int ret = -1;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_friend_sm_event_t *event = NULL;
- glusterd_friend_req_ctx_t *ctx = NULL;
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_friend_req_ctx_t *ctx = NULL;
- if (!port)
- port = GF_DEFAULT_BASE_PORT;
+ if (!port)
+ port = GF_DEFAULT_BASE_PORT;
- ret = glusterd_friend_find (uuid, hostname, &peerinfo);
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_friend_req_ctx_t);
- if (ret) {
- gf_log ("glusterd", GF_LOG_CRITICAL,
- "Received remove-friend from unknown peer %s",
- hostname);
- ret = glusterd_xfer_friend_remove_resp (req, hostname,
- port);
- goto out;
- }
+ RCU_READ_LOCK;
- ret = glusterd_friend_sm_new_event
- (GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND, &event);
+ peerinfo = glusterd_peerinfo_find(uuid, hostname);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "event generation failed: %d", ret);
- return ret;
- }
+ if (peerinfo == NULL) {
+ RCU_READ_UNLOCK;
+ gf_msg("glusterd", GF_LOG_CRITICAL, 0, GD_MSG_REQ_FROM_UNKNOWN_PEER,
+ "Received remove-friend from unknown peer %s", hostname);
+ ret = glusterd_xfer_friend_remove_resp(req, hostname, port);
+ goto out;
+ }
- event->peerinfo = peerinfo;
+ ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND,
+ &event);
- ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_friend_req_ctx_t);
+ if (ret) {
+ RCU_READ_UNLOCK;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL,
+ "event generation failed: %d", ret);
+ goto out;
+ }
- if (!ctx) {
- gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
- ret = -1;
- goto out;
- }
+ if (hostname)
+ event->peername = gf_strdup(hostname);
- uuid_copy (ctx->uuid, uuid);
- if (hostname)
- ctx->hostname = gf_strdup (hostname);
- ctx->req = req;
+ gf_uuid_copy(event->peerid, uuid);
- event->ctx = ctx;
+ if (!ctx) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Unable to allocate memory");
+ goto out;
+ }
- ret = glusterd_friend_sm_inject_event (event);
+ gf_uuid_copy(ctx->uuid, uuid);
+ if (hostname)
+ ctx->hostname = gf_strdup(hostname);
+ ctx->req = req;
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject event %d, "
- "ret = %d", event->event, ret);
- goto out;
- }
+ event->ctx = ctx;
- ret = 0;
+ ret = glusterd_friend_sm_inject_event(event);
+
+ if (ret) {
+ RCU_READ_UNLOCK;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL,
+ "Unable to inject event %d, "
+ "ret = %d",
+ event->event, ret);
+ goto out;
+ }
+
+ RCU_READ_UNLOCK;
+
+ return 0;
out:
- if (0 != ret) {
- if (ctx && ctx->hostname)
- GF_FREE (ctx->hostname);
- if (ctx)
- GF_FREE (ctx);
- }
- return ret;
+ if (0 != ret) {
+ if (ctx && ctx->hostname)
+ GF_FREE(ctx->hostname);
+ GF_FREE(ctx);
+ if (event)
+ GF_FREE(event->peername);
+ GF_FREE(event);
+ }
+
+ return ret;
}
+struct args_pack {
+ dict_t *dict;
+ int vol_count;
+ int opt_count;
+};
+
static int
-glusterd_add_peer_detail_to_dict (glusterd_peerinfo_t *peerinfo,
- dict_t *friends, int count)
+_build_option_key(dict_t *d, char *k, data_t *v, void *tmp)
{
+ char reconfig_key[256] = {
+ 0,
+ };
+ int keylen;
+ struct args_pack *pack = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ pack = tmp;
+ if (strcmp(k, GLUSTERD_GLOBAL_OPT_VERSION) == 0)
+ return 0;
- int ret = -1;
- char key[256] = {0, };
+ if (priv->op_version > GD_OP_VERSION_MIN) {
+ if ((strcmp(k, "features.limit-usage") == 0) ||
+ (strcmp(k, "features.soft-limit") == 0))
+ return 0;
+ }
+
+ /* snap-max-hard-limit and snap-max-soft-limit are system *
+ * options set and managed by snapshot config option. Hence *
+ * they should not be displayed in gluster volume info. *
+ */
+ if ((strcmp(k, "snap-max-hard-limit") == 0) ||
+ (strcmp(k, "snap-max-soft-limit") == 0))
+ return 0;
- GF_ASSERT (peerinfo);
- GF_ASSERT (friends);
+ keylen = snprintf(reconfig_key, sizeof(reconfig_key), "volume%d.option.%s",
+ pack->vol_count, k);
+ ret = dict_set_strn(pack->dict, reconfig_key, keylen, v->data);
+ if (0 == ret)
+ pack->opt_count++;
- snprintf (key, 256, "friend%d.uuid", count);
- uuid_utoa_r (peerinfo->uuid, peerinfo->uuid_str);
- ret = dict_set_str (friends, key, peerinfo->uuid_str);
- if (ret)
- goto out;
+ return 0;
+}
- snprintf (key, 256, "friend%d.hostname", count);
- ret = dict_set_str (friends, key, peerinfo->hostname);
+int
+glusterd_add_arbiter_info_to_bricks(glusterd_volinfo_t *volinfo,
+ dict_t *volumes, int count)
+{
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+ int i = 0;
+ int ret = 0;
+
+ if (volinfo->replica_count == 1 || volinfo->arbiter_count != 1)
+ return 0;
+ for (i = 1; i <= volinfo->brick_count; i++) {
+ if (i % volinfo->replica_count != 0)
+ continue;
+ keylen = snprintf(key, sizeof(key), "volume%d.brick%d.isArbiter", count,
+ i);
+ ret = dict_set_int32n(volumes, key, keylen, 1);
if (ret)
- goto out;
+ return ret;
+ }
+ return 0;
+}
- snprintf (key, 256, "friend%d.port", count);
- ret = dict_set_int32 (friends, key, peerinfo->port);
- if (ret)
- goto out;
+int
+glusterd_add_volume_detail_to_dict(glusterd_volinfo_t *volinfo, dict_t *volumes,
+ int count)
+{
+ int ret = -1;
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *ta_brickinfo = NULL;
+ char *buf = NULL;
+ int i = 1;
+ dict_t *dict = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *volume_id_str = NULL;
+ struct args_pack pack = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ int32_t len = 0;
+
+ char ta_brick[4096] = {
+ 0,
+ };
+
+ GF_ASSERT(volinfo);
+ GF_ASSERT(volumes);
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT(priv);
+
+ keylen = snprintf(key, sizeof(key), "volume%d.name", count);
+ ret = dict_set_strn(volumes, key, keylen, volinfo->volname);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "volume%d.type", count);
+ ret = dict_set_int32n(volumes, key, keylen, volinfo->type);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "volume%d.status", count);
+ ret = dict_set_int32n(volumes, key, keylen, volinfo->status);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "volume%d.brick_count", count);
+ ret = dict_set_int32n(volumes, key, keylen, volinfo->brick_count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "volume%d.dist_count", count);
+ ret = dict_set_int32n(volumes, key, keylen, volinfo->dist_leaf_count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "volume%d.stripe_count", count);
+ ret = dict_set_int32n(volumes, key, keylen, volinfo->stripe_count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "volume%d.replica_count", count);
+ ret = dict_set_int32n(volumes, key, keylen, volinfo->replica_count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "volume%d.disperse_count", count);
+ ret = dict_set_int32n(volumes, key, keylen, volinfo->disperse_count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "volume%d.redundancy_count", count);
+ ret = dict_set_int32n(volumes, key, keylen, volinfo->redundancy_count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "volume%d.arbiter_count", count);
+ ret = dict_set_int32n(volumes, key, keylen, volinfo->arbiter_count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "volume%d.transport", count);
+ ret = dict_set_int32n(volumes, key, keylen, volinfo->transport_type);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_count", count);
+ ret = dict_set_int32n(volumes, key, keylen, volinfo->thin_arbiter_count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ volume_id_str = gf_strdup(uuid_utoa(volinfo->volume_id));
+ if (!volume_id_str) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "volume%d.volume_id", count);
+ ret = dict_set_dynstrn(volumes, key, keylen, volume_id_str);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "volume%d.rebalance", count);
+ ret = dict_set_int32n(volumes, key, keylen, volinfo->rebal.defrag_cmd);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "volume%d.snap_count", count);
+ ret = dict_set_int32n(volumes, key, keylen, volinfo->snap_count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ char brick[1024] = {
+ 0,
+ };
+ char brick_uuid[64] = {
+ 0,
+ };
+ len = snprintf(brick, sizeof(brick), "%s:%s", brickinfo->hostname,
+ brickinfo->path);
+ if ((len < 0) || (len >= sizeof(brick))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+ buf = gf_strdup(brick);
+ keylen = snprintf(key, sizeof(key), "volume%d.brick%d", count, i);
+ ret = dict_set_dynstrn(volumes, key, keylen, buf);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+ keylen = snprintf(key, sizeof(key), "volume%d.brick%d.uuid", count, i);
+ snprintf(brick_uuid, sizeof(brick_uuid), "%s",
+ uuid_utoa(brickinfo->uuid));
+ buf = gf_strdup(brick_uuid);
+ if (!buf) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "brick_uuid=%s", brick_uuid, NULL);
+ goto out;
+ }
+ ret = dict_set_dynstrn(volumes, key, keylen, buf);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
- snprintf (key, 256, "friend%d.state", count);
- ret = dict_set_str (friends, key,
- glusterd_friend_sm_state_name_get(peerinfo->state.state));
- if (ret)
- goto out;
+ i++;
+ }
+ if (volinfo->thin_arbiter_count == 1) {
+ ta_brickinfo = list_first_entry(&volinfo->ta_bricks,
+ glusterd_brickinfo_t, brick_list);
+ len = snprintf(ta_brick, sizeof(ta_brick), "%s:%s",
+ ta_brickinfo->hostname, ta_brickinfo->path);
+ if ((len < 0) || (len >= sizeof(ta_brick))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+ buf = gf_strdup(ta_brick);
+ keylen = snprintf(key, sizeof(key), "volume%d.thin_arbiter_brick",
+ count);
+ ret = dict_set_dynstrn(volumes, key, keylen, buf);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+ }
+
+ ret = glusterd_add_arbiter_info_to_bricks(volinfo, volumes, count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ARBITER_BRICK_SET_INFO_FAIL, NULL);
+ goto out;
+ }
+
+ dict = volinfo->dict;
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ ret = 0;
+ goto out;
+ }
- snprintf (key, 256, "friend%d.connected", count);
- ret = dict_set_int32 (friends, key, (int32_t)peerinfo->connected);
- if (ret)
- goto out;
+ pack.dict = volumes;
+ pack.vol_count = count;
+ pack.opt_count = 0;
+ dict_foreach(dict, _build_option_key, (void *)&pack);
+ dict_foreach(priv->opts, _build_option_key, &pack);
+ keylen = snprintf(key, sizeof(key), "volume%d.opt_count", pack.vol_count);
+ ret = dict_set_int32n(volumes, key, keylen, pack.opt_count);
out:
- return ret;
+ return ret;
}
-
-int
-glusterd_add_volume_detail_to_dict (glusterd_volinfo_t *volinfo,
- dict_t *volumes, int count)
+int32_t
+glusterd_op_txn_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx,
+ char *err_str, size_t err_len)
{
+ int32_t ret = -1;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int32_t locked = 0;
+ char *tmp = NULL;
+ char *volname = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_op_info_t txn_op_info = {
+ {0},
+ };
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ uint32_t op_errno = 0;
+ uint32_t timeout = 0;
+
+ GF_ASSERT(req);
+ GF_ASSERT((op > GD_OP_NONE) && (op < GD_OP_MAX));
+ GF_ASSERT(NULL != ctx);
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ dict = ctx;
+
+ /* Generate a transaction-id for this operation and
+ * save it in the dict. This transaction id distinguishes
+ * each transaction, and helps separate opinfos in the
+ * op state machine. */
+ ret = glusterd_generate_txn_id(dict, &txn_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_IDGEN_FAIL,
+ "Failed to generate transaction id");
+ goto out;
+ }
+
+ /* Save the MY_UUID as the originator_uuid. This originator_uuid
+ * will be used by is_origin_glusterd() to determine if a node
+ * is the originator node for a command. */
+ ret = glusterd_set_originator_uuid(dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UUID_SET_FAIL,
+ "Failed to set originator_uuid.");
+ goto out;
+ }
+
+ /* Based on the op_version, acquire a cluster or mgmt_v3 lock */
+ if (priv->op_version < GD_OP_VERSION_3_6_0) {
+ ret = glusterd_lock(MY_UUID);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_LOCK_FAIL,
+ "Unable to acquire lock on localhost, ret: %d", ret);
+ snprintf(err_str, err_len,
+ "Another transaction is in progress. "
+ "Please try again after some time.");
+ goto out;
+ }
+ } else {
+ /* If no volname is given as a part of the command, locks will
+ * not be held */
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &tmp);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DICT_GET_FAILED,
+ "No Volume name present. "
+ "Locks not being held.");
+ goto local_locking_done;
+ } else {
+ /* Use a copy of volname, as cli response will be
+ * sent before the unlock, and the volname in the
+ * dict, might be removed */
+ volname = gf_strdup(tmp);
+ if (!volname)
+ goto out;
+ }
- int ret = -1;
- char key[256] = {0, };
- glusterd_brickinfo_t *brickinfo = NULL;
- char *buf = NULL;
- int i = 1;
- data_pair_t *pairs = NULL;
- char reconfig_key[256] = {0, };
- dict_t *dict = NULL;
- data_t *value = NULL;
- int opt_count = 0;
- glusterd_conf_t *priv = NULL;
- char *volume_id_str = NULL;
-
+ /* Cli will add timeout key to dict if the default timeout is
+ * other than 2 minutes. Here we use this value to check whether
+ * mgmt_v3_lock_timeout should be set to default value or we
+ * need to change the value according to timeout value
+ * i.e, timeout + 120 seconds. */
+ ret = dict_get_uint32(dict, "timeout", &timeout);
+ if (!ret)
+ priv->mgmt_v3_lock_timeout = timeout + 120;
- GF_ASSERT (volinfo);
- GF_ASSERT (volumes);
+ ret = glusterd_mgmt_v3_lock(volname, MY_UUID, &op_errno, "vol");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Unable to acquire lock for %s", volname);
+ snprintf(err_str, err_len,
+ "Another transaction is in progress for %s. "
+ "Please try again after some time.",
+ volname);
+ goto out;
+ }
+ }
+
+ locked = 1;
+ gf_msg_debug(this->name, 0, "Acquired lock on localhost");
+
+local_locking_done:
+ /* If no volname is given as a part of the command, locks will
+ * not be held, hence sending stage event. */
+ if (volname || (priv->op_version < GD_OP_VERSION_3_6_0))
+ event_type = GD_OP_EVENT_START_LOCK;
+ else {
+ txn_op_info.state.state = GD_OP_STATE_LOCK_SENT;
+ event_type = GD_OP_EVENT_ALL_ACC;
+ }
+
+ /* Save opinfo for this transaction with the transaction id */
+ glusterd_txn_opinfo_init(&txn_op_info, NULL, &op, ctx, req);
+
+ ret = glusterd_set_txn_opinfo(txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set transaction's opinfo");
+ if (ctx)
+ dict_unref(ctx);
+ goto out;
+ }
+
+ ret = glusterd_op_sm_inject_event(event_type, txn_id, ctx);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL,
+ "Failed to acquire cluster"
+ " lock.");
+ goto out;
+ }
- priv = THIS->private;
+out:
+ if (locked && ret) {
+ /* Based on the op-version, we release the
+ * cluster or mgmt_v3 lock */
+ if (priv->op_version < GD_OP_VERSION_3_6_0)
+ glusterd_unlock(MY_UUID);
+ else {
+ ret = glusterd_mgmt_v3_unlock(volname, MY_UUID, "vol");
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Unable to release lock for %s", volname);
+ ret = -1;
+ }
+ }
- GF_ASSERT (priv);
+ if (volname)
+ GF_FREE(volname);
- snprintf (key, 256, "volume%d.name", count);
- ret = dict_set_str (volumes, key, volinfo->volname);
- if (ret)
- goto out;
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
- snprintf (key, 256, "volume%d.type", count);
- ret = dict_set_int32 (volumes, key, volinfo->type);
- if (ret)
- goto out;
+int
+__glusterd_handle_cluster_lock(rpcsvc_request_t *req)
+{
+ dict_t *op_ctx = NULL;
+ int32_t ret = -1;
+ gd1_mgmt_cluster_lock_req lock_req = {
+ {0},
+ };
+ glusterd_op_lock_ctx_t *ctx = NULL;
+ glusterd_op_sm_event_type_t op = GD_OP_EVENT_LOCK;
+ glusterd_op_info_t txn_op_info = {
+ {0},
+ };
+ glusterd_conf_t *priv = NULL;
+ uuid_t *txn_id = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(req);
+
+ txn_id = &priv->global_txn_id;
+
+ ret = xdr_to_generic(req->msg[0], &lock_req,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_lock_req);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode lock "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0, "Received LOCK from uuid: %s",
+ uuid_utoa(lock_req.uuid));
+
+ RCU_READ_LOCK;
+ ret = (glusterd_peerinfo_find_by_uuid(lock_req.uuid) == NULL);
+ RCU_READ_UNLOCK;
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
+ "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa(lock_req.uuid));
+ ret = -1;
+ goto out;
+ }
- snprintf (key, 256, "volume%d.status", count);
- ret = dict_set_int32 (volumes, key, volinfo->status);
- if (ret)
- goto out;
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_op_lock_ctx_t);
- snprintf (key, 256, "volume%d.brick_count", count);
- ret = dict_set_int32 (volumes, key, volinfo->brick_count);
- if (ret)
- goto out;
+ if (!ctx) {
+ // respond here
+ return -1;
+ }
- snprintf (key, 256, "volume%d.dist_count", count);
- ret = dict_set_int32 (volumes, key, volinfo->dist_leaf_count);
- if (ret)
- goto out;
+ gf_uuid_copy(ctx->uuid, lock_req.uuid);
+ ctx->req = req;
+ ctx->dict = NULL;
- snprintf (key, 256, "volume%d.stripe_count", count);
- ret = dict_set_int32 (volumes, key, volinfo->stripe_count);
- if (ret)
- goto out;
+ op_ctx = dict_new();
+ if (!op_ctx) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ "Unable to set new dict");
+ goto out;
+ }
+
+ glusterd_txn_opinfo_init(&txn_op_info, NULL, &op, op_ctx, req);
+
+ ret = glusterd_set_txn_opinfo(txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set transaction's opinfo");
+ dict_unref(txn_op_info.op_ctx);
+ goto out;
+ }
+
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_LOCK, txn_id, ctx);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL,
+ "Failed to inject event GD_OP_EVENT_LOCK");
- snprintf (key, 256, "volume%d.replica_count", count);
- ret = dict_set_int32 (volumes, key, volinfo->replica_count);
- if (ret)
- goto out;
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
- snprintf (key, 256, "volume%d.transport", count);
- ret = dict_set_int32 (volumes, key, volinfo->transport_type);
- if (ret)
- goto out;
+ glusterd_friend_sm();
+ glusterd_op_sm();
- volume_id_str = gf_strdup (uuid_utoa (volinfo->volume_id));
- if (!volume_id_str)
- goto out;
+ if (ret)
+ GF_FREE(ctx);
- snprintf (key, sizeof (key), "volume%d.volume_id", count);
- ret = dict_set_dynstr (volumes, key, volume_id_str);
- if (ret)
- goto out;
+ return ret;
+}
- snprintf (key, 256, "volume%d.rebalance", count);
- ret = dict_set_int32 (volumes, key, volinfo->defrag_cmd);
- if (ret)
- goto out;
+int
+glusterd_handle_cluster_lock(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __glusterd_handle_cluster_lock);
+}
- list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
- char brick[1024] = {0,};
- snprintf (key, 256, "volume%d.brick%d", count, i);
- snprintf (brick, 1024, "%s:%s", brickinfo->hostname,
- brickinfo->path);
- buf = gf_strdup (brick);
- ret = dict_set_dynstr (volumes, key, buf);
- if (ret)
- goto out;
- i++;
- }
+static int
+glusterd_req_ctx_create(rpcsvc_request_t *rpc_req, int op, uuid_t uuid,
+ char *buf_val, size_t buf_len,
+ gf_gld_mem_types_t mem_type,
+ glusterd_req_ctx_t **req_ctx_out)
+{
+ int ret = -1;
+ char str[50] = {
+ 0,
+ };
+ glusterd_req_ctx_t *req_ctx = NULL;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ gf_uuid_unparse(uuid, str);
+ gf_msg_debug(this->name, 0, "Received op from uuid %s", str);
+
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ req_ctx = GF_CALLOC(1, sizeof(*req_ctx), mem_type);
+ if (!req_ctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ gf_uuid_copy(req_ctx->uuid, uuid);
+ req_ctx->op = op;
+ ret = dict_unserialize(buf_val, buf_len, &dict);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
+ goto out;
+ }
+
+ req_ctx->dict = dict;
+ req_ctx->req = rpc_req;
+ *req_ctx_out = req_ctx;
+ ret = 0;
+out:
+ if (ret) {
+ if (dict)
+ dict_unref(dict);
+ GF_FREE(req_ctx);
+ }
+ return ret;
+}
- dict = volinfo->dict;
- if (!dict) {
- ret = 0;
- goto out;
+int
+__glusterd_handle_stage_op(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ gd1_mgmt_stage_op_req op_req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_op_info_t txn_op_info = {
+ {0},
+ };
+ glusterd_op_sm_state_info_t state = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(req);
+
+ txn_id = &priv->global_txn_id;
+
+ ret = xdr_to_generic(req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_stage_op_req);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode stage "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ ret = glusterd_req_ctx_create(req, op_req.op, op_req.uuid,
+ op_req.buf.buf_val, op_req.buf.buf_len,
+ gf_gld_mt_op_stage_ctx_t, &req_ctx);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_CTX_CREATE_FAIL,
+ "Failed to create req_ctx");
+ goto out;
+ }
+
+ ret = dict_get_bin(req_ctx->dict, "transaction_id", (void **)&txn_id);
+ gf_msg_debug(this->name, 0, "transaction ID = %s", uuid_utoa(*txn_id));
+
+ RCU_READ_LOCK;
+ ret = (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL);
+ RCU_READ_UNLOCK;
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
+ "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa(op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ /* In cases where there is no volname, the receivers won't have a
+ * transaction opinfo created, as for those operations, the locking
+ * phase where the transaction opinfos are created, won't be called.
+ * skip_locking will be true for all such transaction and we clear
+ * the txn_opinfo after the staging phase, except for geo-replication
+ * operations where we need to access txn_opinfo in the later phases also.
+ */
+ ret = glusterd_get_txn_opinfo(txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "No transaction's opinfo set");
+
+ state.state = GD_OP_STATE_LOCKED;
+ glusterd_txn_opinfo_init(&txn_op_info, &state, &op_req.op,
+ req_ctx->dict, req);
+
+ if (req_ctx->op != GD_OP_GSYNC_SET)
+ txn_op_info.skip_locking = _gf_true;
+ ret = glusterd_set_txn_opinfo(txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set transaction's opinfo");
+ dict_unref(req_ctx->dict);
+ goto out;
}
+ }
- pairs = dict->members_list;
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_STAGE_OP, txn_id, req_ctx);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL,
+ "Failed to inject event GD_OP_EVENT_STAGE_OP");
- while (pairs) {
- if (1 == glusterd_check_option_exists (pairs->key, NULL)) {
- value = pairs->value;
- if (!value)
- continue;
-
- snprintf (reconfig_key, 256, "volume%d.option.%s", count,
- pairs->key);
- ret = dict_set_str (volumes, reconfig_key, value->data);
- if (!ret)
- opt_count++;
- }
- pairs = pairs->next;
- }
-
- snprintf (key, 256, "volume%d.opt_count", count);
- ret = dict_set_int32 (volumes, key, opt_count);
out:
- return ret;
+ free(op_req.buf.buf_val); // malloced by xdr
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ return ret;
}
int
-glusterd_friend_find (uuid_t uuid, char *hostname,
- glusterd_peerinfo_t **peerinfo)
+glusterd_handle_stage_op(rpcsvc_request_t *req)
{
- int ret = -1;
+ return glusterd_big_locked_handler(req, __glusterd_handle_stage_op);
+}
- if (uuid) {
- ret = glusterd_friend_find_by_uuid (uuid, peerinfo);
+int
+__glusterd_handle_commit_op(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ gd1_mgmt_commit_op_req op_req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(req);
+
+ txn_id = &priv->global_txn_id;
+
+ ret = xdr_to_generic(req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_commit_op_req);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode commit "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ RCU_READ_LOCK;
+ ret = (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL);
+ RCU_READ_UNLOCK;
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
+ "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa(op_req.uuid));
+ ret = -1;
+ goto out;
+ }
- if (ret) {
- gf_log ("glusterd", GF_LOG_INFO,
- "Unable to find peer by uuid");
- } else {
- goto out;
- }
+ // the structures should always be equal
+ GF_ASSERT(sizeof(gd1_mgmt_commit_op_req) == sizeof(gd1_mgmt_stage_op_req));
+ ret = glusterd_req_ctx_create(req, op_req.op, op_req.uuid,
+ op_req.buf.buf_val, op_req.buf.buf_len,
+ gf_gld_mt_op_commit_ctx_t, &req_ctx);
+ if (ret)
+ goto out;
- }
+ ret = dict_get_bin(req_ctx->dict, "transaction_id", (void **)&txn_id);
+ gf_msg_debug(this->name, 0, "transaction ID = %s", uuid_utoa(*txn_id));
- if (hostname) {
- ret = glusterd_friend_find_by_hostname (hostname, peerinfo);
-
- if (ret) {
- gf_log ("glusterd", GF_LOG_INFO,
- "Unable to find hostname: %s", hostname);
- } else {
- goto out;
- }
- }
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_COMMIT_OP, txn_id, req_ctx);
out:
- return ret;
+ free(op_req.buf.buf_val); // malloced by xdr
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ return ret;
}
-int32_t
-glusterd_op_txn_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx)
+int
+glusterd_handle_commit_op(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- xlator_t *this = NULL;
- glusterd_conf_t *priv = NULL;
- int32_t locked = 0;
+ return glusterd_big_locked_handler(req, __glusterd_handle_commit_op);
+}
- GF_ASSERT (req);
- GF_ASSERT ((op > GD_OP_NONE) && (op < GD_OP_MAX));
- GF_ASSERT (NULL != ctx);
+int
+__glusterd_handle_cli_probe(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {
+ {
+ 0,
+ },
+ };
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gf_boolean_t run_fsm = _gf_true;
+ xlator_t *this = NULL;
+ char *bind_name = NULL;
+ dict_t *dict = NULL;
+ char *hostname = NULL;
+ int port = 0;
+ int op_errno = 0;
+
+ GF_ASSERT(req);
+ this = THIS;
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "xdr decoding error");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new();
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "Failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ }
+ }
+
+ ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_NOTFOUND_IN_DICT,
+ "Failed to get hostname");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "port", SLEN("port"), &port);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PORT_NOTFOUND_IN_DICT,
+ "Failed to get port");
+ goto out;
+ }
+
+ if (glusterd_is_any_volume_in_server_quorum(this) &&
+ !does_gd_meet_server_quorum(this)) {
+ glusterd_xfer_cli_probe_resp(req, -1, GF_PROBE_QUORUM_NOT_MET, NULL,
+ hostname, port, dict);
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_SERVER_QUORUM_NOT_MET,
+ "Server quorum not met. Rejecting operation.");
+ ret = 0;
+ goto out;
+ }
+
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_CLI_REQ_RECVD,
+ "Received CLI probe req %s %d", hostname, port);
+
+ if (dict_get_strn(this->options, "transport.socket.bind-address",
+ SLEN("transport.socket.bind-address"), &bind_name) == 0) {
+ gf_msg_debug("glusterd", 0,
+ "only checking probe address vs. bind address");
+ ret = gf_is_same_address(bind_name, hostname);
+ } else {
+ ret = gf_is_local_addr(hostname);
+ }
+ if (ret) {
+ glusterd_xfer_cli_probe_resp(req, 0, GF_PROBE_LOCALHOST, NULL, hostname,
+ port, dict);
+ ret = 0;
+ goto out;
+ }
- this = THIS;
- priv = this->private;
- GF_ASSERT (priv);
+ RCU_READ_LOCK;
- ret = glusterd_lock (MY_UUID);
+ peerinfo = glusterd_peerinfo_find_by_hostname(hostname);
+ ret = (peerinfo && gd_peer_has_address(peerinfo, hostname));
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to acquire local lock, ret: %d", ret);
- goto out;
- }
+ RCU_READ_UNLOCK;
- locked = 1;
- gf_log (this->name, GF_LOG_INFO, "Acquired local lock");
+ if (ret) {
+ gf_msg_debug("glusterd", 0,
+ "Probe host %s port %d "
+ "already a peer",
+ hostname, port);
+ glusterd_xfer_cli_probe_resp(req, 0, GF_PROBE_FRIEND, NULL, hostname,
+ port, dict);
+ ret = 0;
+ goto out;
+ }
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_START_LOCK, NULL);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to acquire cluster"
- " lock.");
- goto out;
- }
+ ret = glusterd_probe_begin(req, hostname, port, dict, &op_errno);
- glusterd_op_set_op (op);
- glusterd_op_set_ctx (ctx);
- glusterd_op_set_req (req);
+ if (ret == GLUSTERD_CONNECTION_AWAITED) {
+ // fsm should be run after connection establishes
+ run_fsm = _gf_false;
+ ret = 0;
+ } else if (ret == -1) {
+ glusterd_xfer_cli_probe_resp(req, -1, op_errno, NULL, hostname, port,
+ dict);
+ goto out;
+ }
out:
- if (locked && ret)
- glusterd_unlock (MY_UUID);
+ free(cli_req.dict.dict_val);
- gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (run_fsm) {
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ }
+
+ return ret;
}
int
-glusterd_handle_cluster_lock (rpcsvc_request_t *req)
+glusterd_handle_cli_probe(rpcsvc_request_t *req)
{
- gd1_mgmt_cluster_lock_req lock_req = {{0},};
- int32_t ret = -1;
- glusterd_op_lock_ctx_t *ctx = NULL;
- glusterd_peerinfo_t *peerinfo = NULL;
+ return glusterd_big_locked_handler(req, __glusterd_handle_cli_probe);
+}
- GF_ASSERT (req);
+int
+__glusterd_handle_cli_deprobe(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {
+ {
+ 0,
+ },
+ };
+ uuid_t uuid = {0};
+ int op_errno = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *dict = NULL;
+ char *hostname = NULL;
+ int port = 0;
+ int flags = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp = NULL;
+ glusterd_snap_t *snapinfo = NULL;
+ glusterd_snap_t *tmpsnap = NULL;
+ gf_boolean_t need_free = _gf_false;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new();
- if (!xdr_to_generic (req->msg[0], &lock_req, (xdrproc_t)xdr_gd1_mgmt_cluster_lock_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
+ if (dict) {
+ need_free = _gf_true;
+ } else {
+ ret = -1;
+ goto out;
}
- gf_log ("glusterd", GF_LOG_INFO,
- "Received LOCK from uuid: %s", uuid_utoa (lock_req.uuid));
-
- if (glusterd_friend_find_by_uuid (lock_req.uuid, &peerinfo)) {
- gf_log (THIS->name, GF_LOG_WARNING, "%s doesn't "
- "belong to the cluster. Ignoring request.",
- uuid_utoa (lock_req.uuid));
- ret = -1;
- goto out;
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "Failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
}
-
- ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t);
-
- if (!ctx) {
- //respond here
- return -1;
+ }
+
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_CLI_REQ_RECVD,
+ "Received CLI deprobe req");
+
+ ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_NOTFOUND_IN_DICT,
+ "Failed to get hostname");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "port", SLEN("port"), &port);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PORT_NOTFOUND_IN_DICT,
+ "Failed to get port");
+ goto out;
+ }
+ ret = dict_get_int32n(dict, "flags", SLEN("flags"), &flags);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FLAGS_NOTFOUND_IN_DICT,
+ "Failed to get flags");
+ goto out;
+ }
+
+ ret = glusterd_hostname_to_uuid(hostname, uuid);
+ if (ret) {
+ op_errno = GF_DEPROBE_NOT_FRIEND;
+ goto out;
+ }
+
+ if (!gf_uuid_compare(uuid, MY_UUID)) {
+ op_errno = GF_DEPROBE_LOCALHOST;
+ ret = -1;
+ goto out;
+ }
+
+ if (!(flags & GF_CLI_FLAG_OP_FORCE)) {
+ /* Check if peers are connected, except peer being
+ * detached*/
+ if (!glusterd_chk_peers_connected_befriended(uuid)) {
+ ret = -1;
+ op_errno = GF_DEPROBE_FRIEND_DOWN;
+ goto out;
+ }
+ }
+
+ /* Check for if volumes exist with some bricks on the peer being
+ * detached. It's not a problem if a volume contains none or all
+ * of its bricks on the peer being detached
+ */
+ cds_list_for_each_entry_safe(volinfo, tmp, &priv->volumes, vol_list)
+ {
+ ret = glusterd_friend_contains_vol_bricks(volinfo, uuid);
+ if (ret == 1) {
+ op_errno = GF_DEPROBE_BRICK_EXIST;
+ goto out;
}
+ }
+
+ cds_list_for_each_entry_safe(snapinfo, tmpsnap, &priv->snapshots, snap_list)
+ {
+ ret = glusterd_friend_contains_snap_bricks(snapinfo, uuid);
+ if (ret == 1) {
+ op_errno = GF_DEPROBE_SNAP_BRICK_EXIST;
+ goto out;
+ }
+ }
+ if (!(flags & GF_CLI_FLAG_OP_FORCE)) {
+ if (glusterd_is_any_volume_in_server_quorum(this) &&
+ !does_gd_meet_server_quorum(this)) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_SERVER_QUORUM_NOT_MET,
+ "Server quorum not met. Rejecting operation.");
+ ret = -1;
+ op_errno = GF_DEPROBE_QUORUM_NOT_MET;
+ goto out;
+ }
+ }
- uuid_copy (ctx->uuid, lock_req.uuid);
- ctx->req = req;
+ if (!gf_uuid_is_null(uuid)) {
+ ret = glusterd_deprobe_begin(req, hostname, port, uuid, dict,
+ &op_errno);
+ } else {
+ ret = glusterd_deprobe_begin(req, hostname, port, NULL, dict,
+ &op_errno);
+ }
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCK, ctx);
+ need_free = _gf_false;
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ free(cli_req.dict.dict_val);
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ if (ret) {
+ ret = glusterd_xfer_cli_deprobe_resp(req, ret, op_errno, NULL, hostname,
+ dict);
+ if (need_free) {
+ dict_unref(dict);
+ }
+ }
- return ret;
+ glusterd_friend_sm();
+ glusterd_op_sm();
+
+ return ret;
}
int
-glusterd_req_ctx_create (rpcsvc_request_t *rpc_req,
- glusterd_op_t op, uuid_t uuid,
- char *buf_val, size_t buf_len,
- gf_gld_mem_types_t mem_type,
- glusterd_req_ctx_t **req_ctx_out)
+glusterd_handle_cli_deprobe(rpcsvc_request_t *req)
{
- int ret = -1;
- char str[50] = {0,};
- glusterd_req_ctx_t *req_ctx = NULL;
- dict_t *dict = NULL;
-
- uuid_unparse (uuid, str);
- gf_log ("glusterd", GF_LOG_INFO,
- "Received op from uuid: %s", str);
-
- dict = dict_new ();
- if (!dict)
- goto out;
+ return glusterd_big_locked_handler(req, __glusterd_handle_cli_deprobe);
+}
- req_ctx = GF_CALLOC (1, sizeof (*req_ctx), mem_type);
- if (!req_ctx) {
- goto out;
+int
+__glusterd_handle_cli_list_friends(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf1_cli_peer_list_req cli_req = {
+ 0,
+ };
+ dict_t *dict = NULL;
+
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf1_cli_peer_list_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_CLI_REQ_RECVD,
+ "Received cli list req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
}
+ }
- uuid_copy (req_ctx->uuid, uuid);
- req_ctx->op = op;
- ret = dict_unserialize (buf_val, buf_len, &dict);
- if (ret) {
- gf_log ("", GF_LOG_WARNING,
- "failed to unserialize the dictionary");
- goto out;
- }
+ ret = glusterd_list_friends(req, dict, cli_req.flags);
- req_ctx->dict = dict;
- req_ctx->req = rpc_req;
- *req_ctx_out = req_ctx;
- ret = 0;
out:
- if (ret) {
- if (dict)
- dict_unref (dict);
- if (req_ctx)
- GF_FREE (req_ctx);
- }
- return ret;
+ if (dict)
+ dict_unref(dict);
+
+ glusterd_friend_sm();
+ glusterd_op_sm();
+
+ return ret;
}
int
-glusterd_handle_stage_op (rpcsvc_request_t *req)
+glusterd_handle_cli_list_friends(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- glusterd_req_ctx_t *req_ctx = NULL;
- gd1_mgmt_stage_op_req op_req = {{0},};
- glusterd_peerinfo_t *peerinfo = NULL;
+ return glusterd_big_locked_handler(req, __glusterd_handle_cli_list_friends);
+}
- GF_ASSERT (req);
- if (!xdr_to_generic (req->msg[0], &op_req, (xdrproc_t)xdr_gd1_mgmt_stage_op_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
+static int
+__glusterd_handle_cli_get_volume(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ int32_t flags = 0;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg(this->name, GF_LOG_DEBUG, 0, GD_MSG_GET_VOL_REQ_RCVD,
+ "Received get vol req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
}
+ }
- if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) {
- gf_log (THIS->name, GF_LOG_WARNING, "%s doesn't "
- "belong to the cluster. Ignoring request.",
- uuid_utoa (op_req.uuid));
- ret = -1;
- goto out;
- }
+ ret = dict_get_int32n(dict, "flags", SLEN("flags"), &flags);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FLAGS_NOTFOUND_IN_DICT,
+ "failed to get flags");
+ goto out;
+ }
+ ret = glusterd_get_volumes(req, dict, flags);
- ret = glusterd_req_ctx_create (req, op_req.op, op_req.uuid,
- op_req.buf.buf_val, op_req.buf.buf_len,
- gf_gld_mt_op_stage_ctx_t, &req_ctx);
- if (ret)
- goto out;
+out:
+ if (dict)
+ dict_unref(dict);
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_OP, req_ctx);
+ glusterd_friend_sm();
+ glusterd_op_sm();
- out:
- if (op_req.buf.buf_val)
- free (op_req.buf.buf_val);//malloced by xdr
- glusterd_friend_sm ();
- glusterd_op_sm ();
- return ret;
+ return ret;
}
int
-glusterd_handle_commit_op (rpcsvc_request_t *req)
+glusterd_handle_cli_get_volume(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- glusterd_req_ctx_t *req_ctx = NULL;
- gd1_mgmt_commit_op_req op_req = {{0},};
- glusterd_peerinfo_t *peerinfo = NULL;
-
- GF_ASSERT (req);
-
- if (!xdr_to_generic (req->msg[0], &op_req, (xdrproc_t)xdr_gd1_mgmt_commit_op_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
+ return glusterd_big_locked_handler(req, __glusterd_handle_cli_get_volume);
+}
- if (glusterd_friend_find_by_uuid (op_req.uuid, &peerinfo)) {
- gf_log (THIS->name, GF_LOG_WARNING, "%s doesn't "
- "belong to the cluster. Ignoring request.",
- uuid_utoa (op_req.uuid));
- ret = -1;
- goto out;
+int
+__glusterd_handle_cli_uuid_reset(rpcsvc_request_t *req)
+{
+ int ret = -1;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ uuid_t uuid = {0};
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ char msg_str[128] = {
+ 0,
+ };
+
+ GF_ASSERT(req);
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg_debug("glusterd", 0, "Received uuid reset req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(msg_str, sizeof(msg_str),
+ "Unable to decode "
+ "the buffer");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
}
+ }
+
+ /* In the above section if dict_unserialize is successful, ret is set
+ * to zero.
+ */
+ ret = -1;
+ // Do not allow peer reset if there are any volumes in the cluster
+ if (!cds_list_empty(&priv->volumes)) {
+ snprintf(msg_str, sizeof(msg_str),
+ "volumes are already "
+ "present in the cluster. Resetting uuid is not "
+ "allowed");
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLS_ALREADY_PRESENT, "%s",
+ msg_str);
+ goto out;
+ }
+
+ // Do not allow peer reset if trusted storage pool is already formed
+ if (!cds_list_empty(&priv->peers)) {
+ snprintf(msg_str, sizeof(msg_str),
+ "trusted storage pool "
+ "has been already formed. Please detach this peer "
+ "from the pool and reset its uuid.");
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_TSP_ALREADY_FORMED, "%s",
+ msg_str);
+ goto out;
+ }
+
+ gf_uuid_copy(uuid, priv->uuid);
+ ret = glusterd_uuid_generate_save();
+
+ if (!gf_uuid_compare(uuid, MY_UUID)) {
+ snprintf(msg_str, sizeof(msg_str),
+ "old uuid and the new uuid"
+ " are same. Try gluster peer reset again");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UUIDS_SAME_RETRY, "%s",
+ msg_str);
+ ret = -1;
+ goto out;
+ }
- //the structures should always be equal
- GF_ASSERT (sizeof (gd1_mgmt_commit_op_req) == sizeof (gd1_mgmt_stage_op_req));
- ret = glusterd_req_ctx_create (req, op_req.op, op_req.uuid,
- op_req.buf.buf_val, op_req.buf.buf_len,
- gf_gld_mt_op_commit_ctx_t, &req_ctx);
- if (ret)
- goto out;
+out:
+ if (ret) {
+ rsp.op_ret = -1;
+ if (msg_str[0] == '\0')
+ snprintf(msg_str, sizeof(msg_str),
+ "Operation "
+ "failed");
+ rsp.op_errstr = msg_str;
+ ret = 0;
+ } else {
+ rsp.op_errstr = "";
+ }
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_OP, req_ctx);
- if (ret)
- goto out;
- ret = glusterd_op_init_ctx (op_req.op);
+ glusterd_to_cli(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp, dict);
-out:
- if (op_req.buf.buf_val)
- free (op_req.buf.buf_val);//malloced by xdr
- glusterd_friend_sm ();
- glusterd_op_sm ();
- return ret;
+ return ret;
}
int
-glusterd_handle_cli_probe (rpcsvc_request_t *req)
+glusterd_handle_cli_uuid_reset(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- gf1_cli_probe_req cli_req = {0,};
- glusterd_peerinfo_t *peerinfo = NULL;
- gf_boolean_t run_fsm = _gf_true;
- GF_ASSERT (req);
-
- if (!xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf1_cli_probe_req)) {
- //failed to decode msg;
- gf_log ("", GF_LOG_ERROR, "xdr decoding error");
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- gf_cmd_log ("peer probe", " on host %s:%d", cli_req.hostname,
- cli_req.port);
- gf_log ("glusterd", GF_LOG_INFO, "Received CLI probe req %s %d",
- cli_req.hostname, cli_req.port);
+ return glusterd_big_locked_handler(req, __glusterd_handle_cli_uuid_reset);
+}
- if (!(ret = glusterd_is_local_addr(cli_req.hostname))) {
- glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_LOCALHOST, NULL,
- cli_req.hostname, cli_req.port);
- goto out;
+int
+__glusterd_handle_cli_uuid_get(rpcsvc_request_t *req)
+{
+ int ret = -1;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ char err_str[64] = {
+ 0,
+ };
+ char uuid_str[64] = {
+ 0,
+ };
+
+ GF_ASSERT(req);
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg_debug("glusterd", 0, "Received uuid get req");
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ ret = -1;
+ goto out;
}
- if (!(ret = glusterd_friend_find_by_hostname(cli_req.hostname,
- &peerinfo))) {
- if (strcmp (peerinfo->hostname, cli_req.hostname) == 0) {
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(err_str, sizeof(err_str),
+ "Unable to decode "
+ "the buffer");
+ goto out;
- gf_log ("glusterd", GF_LOG_DEBUG, "Probe host %s port %d"
- " already a peer", cli_req.hostname, cli_req.port);
- glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_FRIEND,
- NULL, cli_req.hostname,
- cli_req.port);
- goto out;
- }
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
}
- ret = glusterd_probe_begin (req, cli_req.hostname, cli_req.port);
-
- gf_cmd_log ("peer probe","on host %s:%d %s",cli_req.hostname, cli_req.port,
- (ret) ? "FAILED" : "SUCCESS");
+ }
- if (ret == GLUSTERD_CONNECTION_AWAITED) {
- //fsm should be run after connection establishes
- run_fsm = _gf_false;
- ret = 0;
- }
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ uuid_utoa_r(MY_UUID, uuid_str);
+ ret = dict_set_strn(rsp_dict, "uuid", SLEN("uuid"), uuid_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set uuid in "
+ "dictionary.");
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+ ret = 0;
out:
- if (cli_req.hostname)
- free (cli_req.hostname);//its malloced by xdr
+ if (ret) {
+ rsp.op_ret = -1;
+ if (err_str[0] == '\0')
+ snprintf(err_str, sizeof(err_str),
+ "Operation "
+ "failed");
+ rsp.op_errstr = err_str;
+
+ } else {
+ rsp.op_errstr = "";
+ }
- if (run_fsm) {
- glusterd_friend_sm ();
- glusterd_op_sm ();
- }
+ glusterd_to_cli(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp, dict);
- return ret;
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+ GF_FREE(rsp.dict.dict_val);
+
+ return 0;
+}
+int
+glusterd_handle_cli_uuid_get(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __glusterd_handle_cli_uuid_get);
}
int
-glusterd_handle_cli_deprobe (rpcsvc_request_t *req)
+__glusterd_handle_cli_list_volume(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- gf1_cli_deprobe_req cli_req = {0,};
- uuid_t uuid = {0};
- int op_errno = 0;
- xlator_t *this = NULL;
- glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ dict_t *dict = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int count = 0;
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+
+ GF_ASSERT(req);
+
+ priv = THIS->private;
+ GF_ASSERT(priv);
+
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+ keylen = snprintf(key, sizeof(key), "volume%d", count);
+ ret = dict_set_strn(dict, key, keylen, volinfo->volname);
+ if (ret)
+ goto out;
+ count++;
+ }
- this = THIS;
- GF_ASSERT (this);
- priv = this->private;
- GF_ASSERT (priv);
- GF_ASSERT (req);
+ ret = dict_set_int32n(dict, "count", SLEN("count"), count);
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
+ goto out;
+ }
- if (!xdr_to_generic (req->msg[0], &cli_req,
- (xdrproc_t)xdr_gf1_cli_deprobe_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
+ ret = dict_allocate_and_serialize(dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret)
+ goto out;
- gf_log ("glusterd", GF_LOG_INFO, "Received CLI deprobe req");
+ ret = 0;
- ret = glusterd_hostname_to_uuid (cli_req.hostname, uuid);
- if (ret) {
- op_errno = GF_DEPROBE_NOT_FRIEND;
- goto out;
- }
-
- if (!uuid_compare (uuid, MY_UUID)) {
- op_errno = GF_DEPROBE_LOCALHOST;
- ret = -1;
- goto out;
- }
-
- if (!uuid_is_null (uuid) && !(cli_req.flags & GF_CLI_FLAG_OP_FORCE)) {
- /* Check if peers are connected, except peer being detached*/
- if (!glusterd_chk_peers_connected_befriended (uuid)) {
- ret = -1;
- op_errno = GF_DEPROBE_FRIEND_DOWN;
- goto out;
- }
- ret = glusterd_all_volume_cond_check (
- glusterd_friend_brick_belongs,
- -1, &uuid);
- if (ret) {
- op_errno = GF_DEPROBE_BRICK_EXIST;
- goto out;
- }
- }
+out:
+ rsp.op_ret = ret;
+ if (ret)
+ rsp.op_errstr = "Error listing volumes";
+ else
+ rsp.op_errstr = "";
- if (!uuid_is_null (uuid)) {
- ret = glusterd_deprobe_begin (req, cli_req.hostname,
- cli_req.port, uuid);
- } else {
- ret = glusterd_deprobe_begin (req, cli_req.hostname,
- cli_req.port, NULL);
- }
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp);
+ ret = 0;
- gf_cmd_log ("peer deprobe", "on host %s:%d %s", cli_req.hostname,
- cli_req.port, (ret) ? "FAILED" : "SUCCESS");
-out:
- if (ret) {
- ret = glusterd_xfer_cli_deprobe_resp (req, ret, op_errno, NULL,
- cli_req.hostname);
- }
+ if (dict)
+ dict_unref(dict);
- if (cli_req.hostname)
- free (cli_req.hostname);//malloced by xdr
+ GF_FREE(rsp.dict.dict_val);
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ glusterd_friend_sm();
+ glusterd_op_sm();
- return ret;
+ return ret;
}
int
-glusterd_handle_cli_list_friends (rpcsvc_request_t *req)
+glusterd_handle_cli_list_volume(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- gf1_cli_peer_list_req cli_req = {0,};
- dict_t *dict = NULL;
+ return glusterd_big_locked_handler(req, __glusterd_handle_cli_list_volume);
+}
- GF_ASSERT (req);
+int32_t
+glusterd_op_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx,
+ char *err_str, size_t err_len)
+{
+ int ret = -1;
- if (!xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf1_cli_peer_list_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
+ ret = glusterd_op_txn_begin(req, op, ctx, err_str, err_len);
- gf_log ("glusterd", GF_LOG_INFO, "Received cli list req");
+ return ret;
+}
- if (cli_req.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
+int
+__glusterd_handle_ganesha_cmd(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_GANESHA;
+ char *op_errstr = NULL;
+ char err_str[2048] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to decode "
+ "request received from cli");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s",
+ err_str);
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ ret = -1;
+ goto out;
+ }
- ret = dict_unserialize (cli_req.dict.dict_val,
- cli_req.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- } else {
- dict->extra_stdfree = cli_req.dict.dict_val;
- }
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(err_str, sizeof(err_str),
+ "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
}
+ }
- ret = glusterd_list_friends (req, dict, cli_req.flags);
+ gf_msg_trace(this->name, 0, "Received global option request");
+ ret = glusterd_op_begin_synctask(req, GD_OP_GANESHA, dict);
out:
- if (dict)
- dict_unref (dict);
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf(err_str, sizeof(err_str), "Operation failed");
+ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str);
+ }
+ if (op_errstr)
+ GF_FREE(op_errstr);
+ if (dict)
+ dict_unref(dict);
+
+ return ret;
+}
- glusterd_friend_sm ();
- glusterd_op_sm ();
+int
+glusterd_handle_ganesha_cmd(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __glusterd_handle_ganesha_cmd);
+}
- return ret;
+static int
+__glusterd_handle_reset_volume(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_RESET_VOLUME;
+ char *volname = NULL;
+ char err_str[64] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, 0, "Received reset vol req");
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to decode request "
+ "received from cli");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s",
+ err_str);
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(err_str, sizeof(err_str),
+ "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
+ }
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to get volume "
+ "name");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLNAME_NOTFOUND_IN_DICT,
+ "%s", err_str);
+ goto out;
+ }
+ gf_msg_debug(this->name, 0,
+ "Received volume reset request for "
+ "volume %s",
+ volname);
+
+ ret = glusterd_op_begin_synctask(req, GD_OP_RESET_VOLUME, dict);
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf(err_str, sizeof(err_str), "Operation failed");
+ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str);
+ }
+
+ return ret;
}
int
-glusterd_handle_cli_get_volume (rpcsvc_request_t *req)
+glusterd_handle_reset_volume(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- gf_cli_req cli_req = {{0,}};
- dict_t *dict = NULL;
- int32_t flags = 0;
-
- GF_ASSERT (req);
+ return glusterd_big_locked_handler(req, __glusterd_handle_reset_volume);
+}
- if (!xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
+int
+__glusterd_handle_set_volume(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_SET_VOLUME;
+ char *key = NULL;
+ char *value = NULL;
+ char *volname = NULL;
+ char *op_errstr = NULL;
+ gf_boolean_t help = _gf_false;
+ char err_str[2048] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to decode "
+ "request received from cli");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s",
+ err_str);
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(err_str, sizeof(err_str),
+ "Unable to decode "
+ "the command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
}
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to get volume "
+ "name while handling volume set command");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ err_str);
+ goto out;
+ }
+
+ if (strcmp(volname, "help") == 0 || strcmp(volname, "help-xml") == 0) {
+ ret = glusterd_volset_help(dict, &op_errstr);
+ help = _gf_true;
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "key1", SLEN("key1"), &key);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to get key while"
+ " handling volume set for %s",
+ volname);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ err_str);
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "value1", SLEN("value1"), &value);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to get value while"
+ " handling volume set for %s",
+ volname);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ err_str);
+ goto out;
+ }
+ gf_msg_debug(this->name, 0,
+ "Received volume set request for "
+ "volume %s",
+ volname);
+
+ ret = glusterd_op_begin_synctask(req, GD_OP_SET_VOLUME, dict);
- gf_log ("glusterd", GF_LOG_INFO, "Received get vol req");
+out:
+ if (help)
+ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict,
+ (op_errstr) ? op_errstr : "");
+ else if (ret) {
+ if (err_str[0] == '\0')
+ snprintf(err_str, sizeof(err_str), "Operation failed");
+ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str);
+ }
+ if (op_errstr)
+ GF_FREE(op_errstr);
+
+ return ret;
+}
- if (cli_req.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
+int
+glusterd_handle_set_volume(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __glusterd_handle_set_volume);
+}
- ret = dict_unserialize (cli_req.dict.dict_val,
- cli_req.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- } else {
- dict->extra_stdfree = cli_req.dict.dict_val;
- }
+int
+__glusterd_handle_sync_volume(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+ gf_cli_rsp cli_rsp = {0.};
+ char msg[2048] = {
+ 0,
+ };
+ char *volname = NULL;
+ gf1_cli_sync_volume flags = 0;
+ char *hostname = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s",
+ "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(msg, sizeof(msg),
+ "Unable to decode the "
+ "command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
}
-
- ret = dict_get_int32 (dict, "flags", &flags);
+ }
+
+ ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Failed to get hostname");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_NOTFOUND_IN_DICT,
+ "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ ret = dict_get_int32n(dict, "flags", SLEN("flags"), (int32_t *)&flags);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to get flags");
- goto out;
+ snprintf(msg, sizeof(msg), "Failed to get volume name or flags");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FLAGS_NOTFOUND_IN_DICT,
+ "%s", msg);
+ goto out;
}
+ }
- ret = glusterd_get_volumes (req, dict, flags);
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_VOL_SYNC_REQ_RCVD,
+ "Received volume sync req "
+ "for volume %s",
+ (flags & GF_CLI_SYNC_ALL) ? "all" : volname);
-out:
- if (dict)
- dict_unref (dict);
+ if (gf_is_local_addr(hostname)) {
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "sync from localhost"
+ " not allowed");
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SYNC_FROM_LOCALHOST_UNALLOWED, "%s", msg);
+ goto out;
+ }
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ ret = glusterd_op_begin_synctask(req, GD_OP_SYNC_VOLUME, dict);
- return ret;
+out:
+ if (ret) {
+ cli_rsp.op_ret = -1;
+ cli_rsp.op_errstr = msg;
+ if (msg[0] == '\0')
+ snprintf(msg, sizeof(msg), "Operation failed");
+ glusterd_to_cli(req, &cli_rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp,
+ dict);
+
+ ret = 0; // sent error to cli, prevent second reply
+ }
+
+ return ret;
}
int
-glusterd_handle_cli_list_volume (rpcsvc_request_t *req)
+glusterd_handle_sync_volume(rpcsvc_request_t *req)
{
- int ret = -1;
- dict_t *dict = NULL;
- glusterd_conf_t *priv = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- int count = 0;
- char key[1024] = {0,};
- gf_cli_rsp rsp = {0,};
-
- GF_ASSERT (req);
+ return glusterd_big_locked_handler(req, __glusterd_handle_sync_volume);
+}
- priv = THIS->private;
- GF_ASSERT (priv);
+int
+glusterd_fsm_log_send_resp(rpcsvc_request_t *req, int op_ret, char *op_errstr,
+ dict_t *dict)
+{
+ int ret = -1;
+ gf1_cli_fsm_log_rsp rsp = {0};
- dict = dict_new ();
- if (!dict)
- goto out;
+ GF_ASSERT(req);
+ GF_ASSERT(op_errstr);
- list_for_each_entry (volinfo, &priv->volumes, vol_list) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), "volume%d", count);
- ret = dict_set_str (dict, key, volinfo->volname);
- if (ret)
- goto out;
- count++;
+ rsp.op_ret = op_ret;
+ rsp.op_errstr = op_errstr;
+ if (rsp.op_ret == 0) {
+ ret = dict_allocate_and_serialize(dict, &rsp.fsm_log.fsm_log_val,
+ &rsp.fsm_log.fsm_log_len);
+ if (ret < 0) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ return ret;
}
+ }
- ret = dict_set_int32 (dict, "count", count);
- if (ret)
- goto out;
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf1_cli_fsm_log_rsp);
+ GF_FREE(rsp.fsm_log.fsm_log_val);
- ret = dict_allocate_and_serialize (dict, &rsp.dict.dict_val,
- (size_t *)&rsp.dict.dict_len);
- if (ret)
- goto out;
+ gf_msg_debug("glusterd", 0, "Responded, ret: %d", ret);
- ret = 0;
+ return 0;
+}
-out:
- rsp.op_ret = ret;
- if (ret)
- rsp.op_errstr = "Error listing volumes";
- else
- rsp.op_errstr = "";
+int
+__glusterd_handle_fsm_log(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf1_cli_fsm_log_req cli_req = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char msg[2048] = {0};
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ GF_ASSERT(req);
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("xlator", (this != NULL), out);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req,
+ (xdrproc_t)xdr_gf1_cli_fsm_log_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode "
+ "request received from client.");
+ req->rpc_err = GARBAGE_ARGS;
+ snprintf(msg, sizeof(msg), "Garbage request");
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
- ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gf_cli_rsp);
+ if (strcmp("", cli_req.name) == 0) {
+ conf = this->private;
+ ret = glusterd_sm_tr_log_add_to_dict(dict, &conf->op_sm_log);
+ } else {
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find_by_hostname(cli_req.name);
+ if (!peerinfo) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ snprintf(msg, sizeof(msg), "%s is not a peer", cli_req.name);
+ } else {
+ ret = glusterd_sm_tr_log_add_to_dict(dict, &peerinfo->sm_log);
+ RCU_READ_UNLOCK;
+ }
+ }
- if (dict)
- dict_unref (dict);
+out:
+ (void)glusterd_fsm_log_send_resp(req, ret, msg, dict);
+ free(cli_req.name); // malloced by xdr
+ if (dict)
+ dict_unref(dict);
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ glusterd_friend_sm();
+ glusterd_op_sm();
- return ret;
+ return 0; // send 0 to avoid double reply
}
-int32_t
-glusterd_op_begin (rpcsvc_request_t *req, glusterd_op_t op, void *ctx)
+int
+glusterd_handle_fsm_log(rpcsvc_request_t *req)
{
- int ret = -1;
+ return glusterd_big_locked_handler(req, __glusterd_handle_fsm_log);
+}
- ret = glusterd_op_txn_begin (req, op, ctx);
+int
+glusterd_op_lock_send_resp(rpcsvc_request_t *req, int32_t status)
+{
+ gd1_mgmt_cluster_lock_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
- return ret;
+ GF_ASSERT(req);
+ glusterd_get_uuid(&rsp.uuid);
+ rsp.op_ret = status;
+
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_lock_rsp);
+
+ gf_msg_debug(THIS->name, 0, "Responded to lock, ret: %d", ret);
+
+ return 0;
}
int
-glusterd_handle_reset_volume (rpcsvc_request_t *req)
+glusterd_op_unlock_send_resp(rpcsvc_request_t *req, int32_t status)
{
- int32_t ret = -1;
- gf_cli_req cli_req = {{0,}};
- dict_t *dict = NULL;
- glusterd_op_t cli_op = GD_OP_RESET_VOLUME;
- char *volname = NULL;
+ gd1_mgmt_cluster_unlock_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
- GF_ASSERT (req);
+ GF_ASSERT(req);
+ rsp.op_ret = status;
+ glusterd_get_uuid(&rsp.uuid);
- if (!xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_rsp);
- if (cli_req.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
+ gf_msg_debug(THIS->name, 0, "Responded to unlock, ret: %d", ret);
- ret = dict_unserialize (cli_req.dict.dict_val,
- cli_req.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR, "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- } else {
- dict->extra_stdfree = cli_req.dict.dict_val;
- }
- }
+ return ret;
+}
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to get volname");
- goto out;
- }
+int
+glusterd_op_mgmt_v3_lock_send_resp(rpcsvc_request_t *req, uuid_t *txn_id,
+ int32_t status)
+{
+ gd1_mgmt_v3_lock_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
- gf_cmd_log ("Volume reset", "volume : %s", volname);
- ret = glusterd_op_begin (req, GD_OP_RESET_VOLUME, dict);
- gf_cmd_log ("Volume reset", " on volume %s %s ", volname,
- ((ret == 0)? " SUCCEDED":" FAILED"));
+ GF_ASSERT(req);
+ GF_ASSERT(txn_id);
+ glusterd_get_uuid(&rsp.uuid);
+ rsp.op_ret = status;
+ if (rsp.op_ret)
+ rsp.op_errno = errno;
+ gf_uuid_copy(rsp.txn_id, *txn_id);
-out:
- glusterd_friend_sm ();
- glusterd_op_sm ();
- if (ret) {
- if (dict)
- dict_unref (dict);
- ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
- NULL, "operation failed");
- }
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp);
- return ret;
-}
+ gf_msg_debug(THIS->name, 0, "Responded to mgmt_v3 lock, ret: %d", ret);
+ return ret;
+}
int
-glusterd_handle_set_volume (rpcsvc_request_t *req)
+glusterd_op_mgmt_v3_unlock_send_resp(rpcsvc_request_t *req, uuid_t *txn_id,
+ int32_t status)
{
- int32_t ret = -1;
- gf_cli_req cli_req = {{0,}};
- dict_t *dict = NULL;
- glusterd_op_t cli_op = GD_OP_SET_VOLUME;
- char *key = NULL;
- char *value = NULL;
- char *volname = NULL;
+ gd1_mgmt_v3_unlock_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
- GF_ASSERT (req);
+ GF_ASSERT(req);
+ GF_ASSERT(txn_id);
+ rsp.op_ret = status;
+ if (rsp.op_ret)
+ rsp.op_errno = errno;
+ glusterd_get_uuid(&rsp.uuid);
+ gf_uuid_copy(rsp.txn_id, *txn_id);
- if (!xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp);
- if (cli_req.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
+ gf_msg_debug(THIS->name, 0, "Responded to mgmt_v3 unlock, ret: %d", ret);
- ret = dict_unserialize (cli_req.dict.dict_val,
- cli_req.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- } else {
- dict->extra_stdfree = cli_req.dict.dict_val;
- }
- }
+ return ret;
+}
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log ("", GF_LOG_WARNING, "Unable to get volume name, while"
- "handling volume set command");
- goto out;
- }
+int
+__glusterd_handle_cluster_unlock(rpcsvc_request_t *req)
+{
+ gd1_mgmt_cluster_unlock_req unlock_req = {
+ {0},
+ };
+ int32_t ret = -1;
+ glusterd_op_lock_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(req);
+
+ txn_id = &priv->global_txn_id;
+
+ ret = xdr_to_generic(req->msg[0], &unlock_req,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_req);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode unlock "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0, "Received UNLOCK from uuid: %s",
+ uuid_utoa(unlock_req.uuid));
+
+ RCU_READ_LOCK;
+ ret = (glusterd_peerinfo_find_by_uuid(unlock_req.uuid) == NULL);
+ RCU_READ_LOCK;
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
+ "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa(unlock_req.uuid));
+ ret = -1;
+ goto out;
+ }
- ret = dict_get_str (dict, "key1", &key);
- if (ret) {
- if (strcmp (volname, "help-xml") && strcmp (volname, "help")) {
- gf_log ("", GF_LOG_WARNING, "Unable to get key, while "
- "handling volume set for %s",volname);
- goto out;
- }
- }
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_op_lock_ctx_t);
- ret = dict_get_str (dict, "value1", &value);
- if (ret) {
- if (strcmp (volname, "help-xml") && strcmp (volname, "help")) {
- gf_log ("", GF_LOG_WARNING, "Unable to get value, while"
- "handling volume set for %s",volname);
- goto out;
- }
- }
+ if (!ctx) {
+ // respond here
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "No memory.");
+ return -1;
+ }
+ gf_uuid_copy(ctx->uuid, unlock_req.uuid);
+ ctx->req = req;
+ ctx->dict = NULL;
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_UNLOCK, txn_id, ctx);
- gf_cmd_log ("volume set", "volume-name:%s: key:%s, value:%s",volname,
- key, value);
- ret = glusterd_op_begin (req, GD_OP_SET_VOLUME, dict);
- gf_cmd_log ("volume set", "volume-name:%s: key:%s, value:%s %s",
- volname, key, value, (ret == 0)? "SUCCEDED" : "FAILED" );
out:
+ glusterd_friend_sm();
+ glusterd_op_sm();
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ return ret;
+}
- if (ret) {
- if (dict)
- dict_unref (dict);
- ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
- NULL, "operation failed");
- }
- return ret;
+int
+glusterd_handle_cluster_unlock(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __glusterd_handle_cluster_unlock);
}
int
-glusterd_handle_sync_volume (rpcsvc_request_t *req)
+glusterd_op_stage_send_resp(rpcsvc_request_t *req, int32_t op, int32_t status,
+ char *op_errstr, dict_t *rsp_dict)
{
- int32_t ret = -1;
- gf_cli_req cli_req = {{0,}};
- dict_t *dict = NULL;
- gf_cli_rsp cli_rsp = {0.};
- char msg[2048] = {0,};
- glusterd_volinfo_t *volinfo = NULL;
- char *volname = NULL;
- gf1_cli_sync_volume flags = 0;
- char *hostname = NULL;
+ gd1_mgmt_stage_op_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid(&rsp.uuid);
+ rsp.op = op;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
- GF_ASSERT (req);
+ ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ return ret;
+ }
- if (!xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_stage_op_rsp);
- if (cli_req.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
+ gf_msg_debug(this->name, 0, "Responded to stage, ret: %d", ret);
+ GF_FREE(rsp.dict.dict_val);
- ret = dict_unserialize (cli_req.dict.dict_val,
- cli_req.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- } else {
- dict->extra_stdfree = cli_req.dict.dict_val;
- }
- }
+ return ret;
+}
- ret = dict_get_str (dict, "hostname", &hostname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to get hostname");
- goto out;
- }
+int
+glusterd_op_commit_send_resp(rpcsvc_request_t *req, int32_t op, int32_t status,
+ char *op_errstr, dict_t *rsp_dict)
+{
+ gd1_mgmt_commit_op_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ rsp.op_ret = status;
+ glusterd_get_uuid(&rsp.uuid);
+ rsp.op = op;
+
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- ret = dict_get_int32 (dict, "flags", (int32_t*)&flags);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Unable to get volume"
- "name, or flags");
- goto out;
- }
+ if (rsp_dict) {
+ ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
}
+ }
- gf_log ("glusterd", GF_LOG_INFO, "Received volume sync req "
- "for volume %s",
- (flags & GF_CLI_SYNC_ALL) ? "all" : volname);
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_commit_op_rsp);
- if (!glusterd_is_local_addr (hostname)) {
- ret = -1;
- snprintf (msg, sizeof (msg), "sync from localhost"
- " not allowed");
- goto out;
- }
+ gf_msg_debug(this->name, 0, "Responded to commit, ret: %d", ret);
- if (!flags) {
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (!ret) {
- snprintf (msg, sizeof (msg), "please delete the "
- "volume: %s before sync", volname);
- ret = -1;
- goto out;
- }
-
- ret = dict_set_dynmstr (dict, "volname", volname);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "volume name set failed");
- snprintf (msg, sizeof (msg), "volume name set failed");
- goto out;
- }
- } else {
- if (glusterd_volume_count_get ()) {
- snprintf (msg, sizeof (msg), "please delete all the "
- "volumes before full sync");
- ret = -1;
- goto out;
- }
- }
+out:
+ GF_FREE(rsp.dict.dict_val);
+ return ret;
+}
- ret = glusterd_op_begin (req, GD_OP_SYNC_VOLUME, dict);
+int
+__glusterd_handle_incoming_friend_req(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_friend_req friend_req = {
+ {0},
+ };
+ gf_boolean_t run_fsm = _gf_true;
+
+ GF_ASSERT(req);
+ ret = xdr_to_generic(req->msg[0], &friend_req,
+ (xdrproc_t)xdr_gd1_mgmt_friend_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode "
+ "request received from friend");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_PROBE_RCVD,
+ "Received probe from uuid: %s", uuid_utoa(friend_req.uuid));
+ ret = glusterd_handle_friend_req(req, friend_req.uuid, friend_req.hostname,
+ friend_req.port, &friend_req);
+
+ if (ret == GLUSTERD_CONNECTION_AWAITED) {
+ // fsm should be run after connection establishes
+ run_fsm = _gf_false;
+ ret = 0;
+ }
out:
- if (ret) {
- cli_rsp.op_ret = -1;
- cli_rsp.op_errstr = msg;
- if (msg[0] == '\0')
- snprintf (msg, sizeof (msg), "Operation failed");
- glusterd_submit_reply(req, &cli_rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gf_cli_rsp);
- if (dict)
- dict_unref (dict);
+ free(friend_req.hostname); // malloced by xdr
+ if (run_fsm) {
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ }
- ret = 0; //sent error to cli, prevent second reply
- }
-
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ return ret;
+}
- return ret;
+int
+glusterd_handle_incoming_friend_req(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req,
+ __glusterd_handle_incoming_friend_req);
}
int
-glusterd_fsm_log_send_resp (rpcsvc_request_t *req, int op_ret,
- char *op_errstr, dict_t *dict)
+__glusterd_handle_incoming_unfriend_req(rpcsvc_request_t *req)
{
+ int32_t ret = -1;
+ gd1_mgmt_friend_req friend_req = {
+ {0},
+ };
+ char remote_hostname[UNIX_PATH_MAX + 1] = {
+ 0,
+ };
+
+ GF_ASSERT(req);
+ ret = xdr_to_generic(req->msg[0], &friend_req,
+ (xdrproc_t)xdr_gd1_mgmt_friend_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode "
+ "request received.");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_UNFRIEND_REQ_RCVD,
+ "Received unfriend from uuid: %s", uuid_utoa(friend_req.uuid));
+
+ ret = glusterd_remote_hostname_get(req, remote_hostname,
+ sizeof(remote_hostname));
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL,
+ "Unable to get the remote hostname");
+ goto out;
+ }
+ ret = glusterd_handle_unfriend_req(req, friend_req.uuid, remote_hostname,
+ friend_req.port);
- int ret = -1;
- gf1_cli_fsm_log_rsp rsp = {0};
+out:
+ free(friend_req.hostname); // malloced by xdr
+ free(friend_req.vols.vols_val); // malloced by xdr
- GF_ASSERT (req);
- GF_ASSERT (op_errstr);
+ glusterd_friend_sm();
+ glusterd_op_sm();
- rsp.op_ret = op_ret;
- rsp.op_errstr = op_errstr;
- if (rsp.op_ret == 0)
- ret = dict_allocate_and_serialize (dict, &rsp.fsm_log.fsm_log_val,
- (size_t *)&rsp.fsm_log.fsm_log_len);
+ return ret;
+}
+
+int
+glusterd_handle_incoming_unfriend_req(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req,
+ __glusterd_handle_incoming_unfriend_req);
+}
- ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gf1_cli_fsm_log_rsp);
- if (rsp.fsm_log.fsm_log_val)
- GF_FREE (rsp.fsm_log.fsm_log_val);
+int
+glusterd_handle_friend_update_delete(dict_t *dict)
+{
+ char *hostname = NULL;
+ int32_t ret = -1;
- gf_log ("glusterd", GF_LOG_DEBUG, "Responded, ret: %d", ret);
+ GF_ASSERT(dict);
- return 0;
+ ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname);
+ if (ret)
+ goto out;
+
+ ret = glusterd_friend_remove(NULL, hostname);
+
+out:
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
}
int
-glusterd_handle_fsm_log (rpcsvc_request_t *req)
+glusterd_peer_hostname_update(glusterd_peerinfo_t *peerinfo,
+ const char *hostname, gf_boolean_t store_update)
{
- int32_t ret = -1;
- gf1_cli_fsm_log_req cli_req = {0,};
- dict_t *dict = NULL;
- glusterd_sm_tr_log_t *log = NULL;
- xlator_t *this = NULL;
- glusterd_conf_t *conf = NULL;
- char msg[2048] = {0};
- glusterd_peerinfo_t *peerinfo = NULL;
+ int ret = 0;
- GF_ASSERT (req);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(hostname);
- if (!xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf1_cli_fsm_log_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- snprintf (msg, sizeof (msg), "Garbage request");
- goto out;
- }
+ ret = gd_add_address_to_peer(peerinfo, hostname);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_HOSTNAME_ADD_TO_PEERLIST_FAIL,
+ "Couldn't add address to the peer info");
+ goto out;
+ }
+
+ if (store_update)
+ ret = glusterd_store_peerinfo(peerinfo);
+out:
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
- if (strcmp ("", cli_req.name) == 0) {
- this = THIS;
- conf = this->private;
- log = &conf->op_sm_log;
+int
+__glusterd_handle_friend_update(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_friend_update friend_req = {
+ {0},
+ };
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ gd1_mgmt_friend_update_rsp rsp = {
+ {0},
+ };
+ dict_t *dict = NULL;
+ char key[32] = {
+ 0,
+ };
+ int keylen;
+ char *uuid_buf = NULL;
+ int i = 1;
+ int count = 0;
+ uuid_t uuid = {
+ 0,
+ };
+ glusterd_peerctx_args_t args = {0};
+ int32_t op = 0;
+
+ GF_ASSERT(req);
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = xdr_to_generic(req->msg[0], &friend_req,
+ (xdrproc_t)xdr_gd1_mgmt_friend_update);
+ if (ret < 0) {
+ // failed to decode msg;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode "
+ "request received");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ ret = 0;
+ RCU_READ_LOCK;
+ if (glusterd_peerinfo_find(friend_req.uuid, NULL) == NULL) {
+ ret = -1;
+ }
+ RCU_READ_UNLOCK;
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_REQ_FROM_UNKNOWN_PEER,
+ "Received friend update request "
+ "from unknown peer %s",
+ uuid_utoa(friend_req.uuid));
+ gf_event(EVENT_UNKNOWN_PEER, "peer=%s", uuid_utoa(friend_req.uuid));
+ goto out;
+ }
+
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_FRIEND_UPDATE_RCVD,
+ "Received friend update from uuid: %s", uuid_utoa(friend_req.uuid));
+
+ if (friend_req.friends.friends_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(friend_req.friends.friends_val,
+ friend_req.friends.friends_len, &dict);
+ if (ret < 0) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ goto out;
} else {
- ret = glusterd_friend_find_by_hostname (cli_req.name,
- &peerinfo);
- if (ret) {
- snprintf (msg, sizeof (msg), "%s is not a peer",
- cli_req.name);
- goto out;
- }
- log = &peerinfo->sm_log;
+ dict->extra_stdfree = friend_req.friends.friends_val;
+ }
+ }
+
+ ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=count", NULL);
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "op", SLEN("op"), &op);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=op", NULL);
+ goto out;
+ }
+
+ if (GD_FRIEND_UPDATE_DEL == op) {
+ (void)glusterd_handle_friend_update_delete(dict);
+ goto out;
+ }
+
+ args.mode = GD_MODE_ON;
+ while (i <= count) {
+ keylen = snprintf(key, sizeof(key), "friend%d.uuid", i);
+ ret = dict_get_strn(dict, key, keylen, &uuid_buf);
+ if (ret)
+ goto out;
+ gf_uuid_parse(uuid_buf, uuid);
+
+ if (!gf_uuid_compare(uuid, MY_UUID)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_UUID_RECEIVED,
+ "Received my uuid as Friend");
+ i++;
+ continue;
}
- dict = dict_new ();
- if (!dict) {
+ snprintf(key, sizeof(key), "friend%d", i);
+
+ RCU_READ_LOCK;
+ peerinfo = glusterd_peerinfo_find(uuid, NULL);
+ if (peerinfo == NULL) {
+ /* Create a new peer and add it to the list as there is
+ * no existing peer with the uuid
+ */
+ peerinfo = gd_peerinfo_from_dict(dict, key);
+ if (peerinfo == NULL) {
ret = -1;
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEERINFO_CREATE_FAIL,
+ "Could not create peerinfo from dict "
+ "for prefix %s",
+ key);
+ goto unlock;
+ }
+
+ /* As this is a new peer, it should be added as a
+ * friend. The friend state machine will take care of
+ * correcting the state as required
+ */
+ peerinfo->state.state = GD_FRIEND_STATE_BEFRIENDED;
+
+ ret = glusterd_friend_add_from_peerinfo(peerinfo, 0, &args);
+ } else {
+ /* As an existing peer was found, update it with the new
+ * information
+ */
+ ret = gd_update_peerinfo_from_dict(peerinfo, dict, key);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_PEER_INFO_UPDATE_FAIL,
+ "Failed to "
+ "update peer %s",
+ peerinfo->hostname);
+ goto unlock;
+ }
+ ret = glusterd_store_peerinfo(peerinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEERINFO_CREATE_FAIL,
+ "Failed to store peerinfo");
+ gf_event(EVENT_PEER_STORE_FAILURE, "peer=%s",
+ peerinfo->hostname);
+ }
}
+ unlock:
+ RCU_READ_UNLOCK;
+ if (ret)
+ break;
- ret = glusterd_sm_tr_log_add_to_dict (dict, log);
-out:
- (void)glusterd_fsm_log_send_resp (req, ret, msg, dict);
- if (cli_req.name)
- free (cli_req.name);//malloced by xdr
- if (dict)
- dict_unref (dict);
+ peerinfo = NULL;
+ i++;
+ }
- glusterd_friend_sm ();
- glusterd_op_sm ();
+out:
+ gf_uuid_copy(rsp.uuid, MY_UUID);
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_friend_update_rsp);
+ if (dict) {
+ if (!dict->extra_stdfree && friend_req.friends.friends_val)
+ free(friend_req.friends.friends_val); // malloced by xdr
+ dict_unref(dict);
+ } else {
+ free(friend_req.friends.friends_val); // malloced by xdr
+ }
+
+ if (peerinfo)
+ glusterd_peerinfo_cleanup(peerinfo);
+
+ glusterd_friend_sm();
+ glusterd_op_sm();
+
+ return ret;
+}
- return 0;//send 0 to avoid double reply
+int
+glusterd_handle_friend_update(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __glusterd_handle_friend_update);
}
int
-glusterd_op_lock_send_resp (rpcsvc_request_t *req, int32_t status)
+__glusterd_handle_probe_query(rpcsvc_request_t *req)
{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gd1_mgmt_probe_req probe_req = {
+ {0},
+ };
+ gd1_mgmt_probe_rsp rsp = {
+ {0},
+ };
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_args_t args = {0};
+ int port = 0;
+ char remote_hostname[UNIX_PATH_MAX + 1] = {
+ 0,
+ };
+
+ GF_ASSERT(req);
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("xlator", (this != NULL), out);
+
+ ret = xdr_to_generic(req->msg[0], &probe_req,
+ (xdrproc_t)xdr_gd1_mgmt_probe_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode probe "
+ "request");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ conf = this->private;
+ if (probe_req.port)
+ port = probe_req.port;
+ else
+ port = GF_DEFAULT_BASE_PORT;
+
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_PROBE_RCVD,
+ "Received probe from uuid: %s", uuid_utoa(probe_req.uuid));
+
+ /* Check for uuid collision and handle it in a user friendly way by
+ * sending the error.
+ */
+ if (!gf_uuid_compare(probe_req.uuid, MY_UUID)) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_UUIDS_SAME_RETRY,
+ "Peer uuid %s is same as "
+ "local uuid. Please check the uuid of both the peers "
+ "from %s/%s",
+ uuid_utoa(probe_req.uuid), GLUSTERD_DEFAULT_WORKDIR,
+ GLUSTERD_INFO_FILE);
+ rsp.op_ret = -1;
+ rsp.op_errno = GF_PROBE_SAME_UUID;
+ rsp.port = port;
+ goto respond;
+ }
+
+ ret = glusterd_remote_hostname_get(req, remote_hostname,
+ sizeof(remote_hostname));
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_HOSTNAME_RESOLVE_FAIL,
+ "Unable to get the remote hostname");
+ goto out;
+ }
+
+ RCU_READ_LOCK;
+ peerinfo = glusterd_peerinfo_find(probe_req.uuid, remote_hostname);
+ if ((peerinfo == NULL) && (!cds_list_empty(&conf->peers))) {
+ rsp.op_ret = -1;
+ rsp.op_errno = GF_PROBE_ANOTHER_CLUSTER;
+ } else if (peerinfo == NULL) {
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_PEER_NOT_FOUND,
+ "Unable to find peerinfo"
+ " for host: %s (%d)",
+ remote_hostname, port);
+ args.mode = GD_MODE_ON;
+ ret = glusterd_friend_add(remote_hostname, port,
+ GD_FRIEND_STATE_PROBE_RCVD, NULL, &peerinfo,
+ 0, &args);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PEER_ADD_FAIL,
+ "Failed to add peer %s", remote_hostname);
+ rsp.op_errno = GF_PROBE_ADD_FAILED;
+ }
+ }
+ RCU_READ_UNLOCK;
- gd1_mgmt_cluster_lock_rsp rsp = {{0},};
- int ret = -1;
+respond:
+ gf_uuid_copy(rsp.uuid, MY_UUID);
- GF_ASSERT (req);
- glusterd_get_uuid (&rsp.uuid);
- rsp.op_ret = status;
+ rsp.hostname = probe_req.hostname;
+ rsp.op_errstr = "";
- ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_cluster_lock_rsp);
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_probe_rsp);
+ ret = 0;
- gf_log ("glusterd", GF_LOG_INFO,
- "Responded, ret: %d", ret);
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_RESPONSE_INFO,
+ "Responded to %s, op_ret: %d, "
+ "op_errno: %d, ret: %d",
+ remote_hostname, rsp.op_ret, rsp.op_errno, ret);
- return 0;
+out:
+ free(probe_req.hostname); // malloced by xdr
+
+ glusterd_friend_sm();
+ glusterd_op_sm();
+
+ return ret;
}
int
-glusterd_op_unlock_send_resp (rpcsvc_request_t *req, int32_t status)
+glusterd_handle_probe_query(rpcsvc_request_t *req)
{
+ return glusterd_big_locked_handler(req, __glusterd_handle_probe_query);
+}
- gd1_mgmt_cluster_unlock_rsp rsp = {{0},};
- int ret = -1;
-
- GF_ASSERT (req);
- rsp.op_ret = status;
- glusterd_get_uuid (&rsp.uuid);
+int
+__glusterd_handle_cli_profile_volume(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_PROFILE_VOLUME;
+ char *volname = NULL;
+ int32_t op = 0;
+ char err_str[64] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len > 0) {
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ goto out;
+ }
+ dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, &dict);
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Unable to get volume "
+ "name");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLNAME_NOTFOUND_IN_DICT,
+ "%s", err_str);
+ goto out;
+ }
+
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_VOL_PROFILE_REQ_RCVD,
+ "Received volume profile req "
+ "for volume %s",
+ volname);
+ ret = dict_get_int32n(dict, "op", SLEN("op"), &op);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str), "Unable to get operation");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ err_str);
+ goto out;
+ }
+
+ if (conf->op_version < GD_OP_VERSION_6_0) {
+ gf_msg_debug(this->name, 0,
+ "The cluster is operating at "
+ "version less than %d. Falling back "
+ "to op-sm framework.",
+ GD_OP_VERSION_6_0);
+ ret = glusterd_op_begin(req, cli_op, dict, err_str, sizeof(err_str));
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ } else {
+ ret = glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase(
+ req, cli_op, dict);
+ }
- ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_rsp);
+out:
+ free(cli_req.dict.dict_val);
- gf_log ("glusterd", GF_LOG_INFO,
- "Responded to unlock, ret: %d", ret);
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf(err_str, sizeof(err_str), "Operation failed");
+ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str);
+ }
- return ret;
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
int
-glusterd_handle_cluster_unlock (rpcsvc_request_t *req)
+glusterd_handle_cli_profile_volume(rpcsvc_request_t *req)
{
- gd1_mgmt_cluster_unlock_req unlock_req = {{0}, };
- int32_t ret = -1;
- glusterd_op_lock_ctx_t *ctx = NULL;
- glusterd_peerinfo_t *peerinfo = NULL;
-
- GF_ASSERT (req);
+ return glusterd_big_locked_handler(req,
+ __glusterd_handle_cli_profile_volume);
+}
- if (!xdr_to_generic (req->msg[0], &unlock_req,
- (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
+int
+__glusterd_handle_getwd(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf1_cli_getwd_rsp rsp = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ GF_ASSERT(req);
- gf_log ("glusterd", GF_LOG_INFO,
- "Received UNLOCK from uuid: %s", uuid_utoa (unlock_req.uuid));
+ priv = THIS->private;
+ GF_ASSERT(priv);
- if (glusterd_friend_find_by_uuid (unlock_req.uuid, &peerinfo)) {
- gf_log (THIS->name, GF_LOG_WARNING, "%s doesn't "
- "belong to the cluster. Ignoring request.",
- uuid_utoa (unlock_req.uuid));
- ret = -1;
- goto out;
- }
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_GETWD_REQ_RCVD,
+ "Received getwd req");
- ctx = GF_CALLOC (1, sizeof (*ctx), gf_gld_mt_op_lock_ctx_t);
+ rsp.wd = priv->workdir;
- if (!ctx) {
- //respond here
- return -1;
- }
- uuid_copy (ctx->uuid, unlock_req.uuid);
- ctx->req = req;
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf1_cli_getwd_rsp);
+ ret = 0;
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_UNLOCK, ctx);
+ glusterd_friend_sm();
+ glusterd_op_sm();
-out:
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ return ret;
+}
- return ret;
+int
+glusterd_handle_getwd(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __glusterd_handle_getwd);
}
int
-glusterd_op_stage_send_resp (rpcsvc_request_t *req,
- int32_t op, int32_t status,
- char *op_errstr, dict_t *rsp_dict)
+__glusterd_handle_mount(rpcsvc_request_t *req)
{
- gd1_mgmt_stage_op_rsp rsp = {{0},};
- int ret = -1;
-
- GF_ASSERT (req);
- rsp.op_ret = status;
- glusterd_get_uuid (&rsp.uuid);
- rsp.op = op;
- if (op_errstr)
- rsp.op_errstr = op_errstr;
- else
- rsp.op_errstr = "";
-
- ret = dict_allocate_and_serialize (rsp_dict,
- &rsp.dict.dict_val,
- (size_t *)&rsp.dict.dict_len);
+ gf1_cli_mount_req mnt_req = {
+ 0,
+ };
+ gf1_cli_mount_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ int ret = 0;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT(req);
+ priv = THIS->private;
+
+ ret = xdr_to_generic(req->msg[0], &mnt_req,
+ (xdrproc_t)xdr_gf1_cli_mount_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode mount "
+ "request received");
+ req->rpc_err = GARBAGE_ARGS;
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_MOUNT_REQ_RCVD,
+ "Received mount req");
+
+ if (mnt_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(mnt_req.dict.dict_val, mnt_req.dict.dict_len,
+ &dict);
if (ret < 0) {
- gf_log ("", GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- return ret;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ rsp.op_ret = -1;
+ rsp.op_errno = -EINVAL;
+ goto out;
+ } else {
+ dict->extra_stdfree = mnt_req.dict.dict_val;
}
+ }
- ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_stage_op_rsp);
+ synclock_unlock(&priv->big_lock);
+ rsp.op_ret = glusterd_do_mount(mnt_req.label, dict, &rsp.path,
+ &rsp.op_errno);
+ synclock_lock(&priv->big_lock);
- gf_log ("glusterd", GF_LOG_INFO,
- "Responded to stage, ret: %d", ret);
- if (rsp.dict.dict_val)
- GF_FREE (rsp.dict.dict_val);
+out:
+ if (!rsp.path)
+ rsp.path = gf_strdup("");
- return ret;
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf1_cli_mount_rsp);
+ ret = 0;
+
+ if (dict)
+ dict_unref(dict);
+
+ GF_FREE(rsp.path);
+
+ glusterd_friend_sm();
+ glusterd_op_sm();
+
+ return ret;
}
int
-glusterd_op_commit_send_resp (rpcsvc_request_t *req,
- int32_t op, int32_t status, char *op_errstr,
- dict_t *rsp_dict)
+glusterd_handle_mount(rpcsvc_request_t *req)
{
- gd1_mgmt_commit_op_rsp rsp = {{0}, };
- int ret = -1;
-
- GF_ASSERT (req);
- rsp.op_ret = status;
- glusterd_get_uuid (&rsp.uuid);
- rsp.op = op;
-
- if (op_errstr)
- rsp.op_errstr = op_errstr;
- else
- rsp.op_errstr = "";
+ return glusterd_big_locked_handler(req, __glusterd_handle_mount);
+}
- if (rsp_dict) {
- ret = dict_allocate_and_serialize (rsp_dict,
- &rsp.dict.dict_val,
- (size_t *)&rsp.dict.dict_len);
- if (ret < 0) {
- gf_log ("", GF_LOG_DEBUG,
- "failed to get serialized length of dict");
- goto out;
- }
+int
+__glusterd_handle_umount(rpcsvc_request_t *req)
+{
+ gf1_cli_umount_req umnt_req = {
+ 0,
+ };
+ gf1_cli_umount_rsp rsp = {
+ 0,
+ };
+ char *mountbroker_root = NULL;
+ char mntp[PATH_MAX] = {
+ 0,
+ };
+ char *path = NULL;
+ runner_t runner = {
+ 0,
+ };
+ int ret = 0;
+ xlator_t *this = THIS;
+ gf_boolean_t dir_ok = _gf_false;
+ char *pdir = NULL;
+ char *t = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT(req);
+ GF_ASSERT(this);
+ priv = this->private;
+
+ ret = xdr_to_generic(req->msg[0], &umnt_req,
+ (xdrproc_t)xdr_gf1_cli_umount_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode umount"
+ "request");
+ req->rpc_err = GARBAGE_ARGS;
+ rsp.op_ret = -1;
+ goto out;
+ }
+
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_UMOUNT_REQ_RCVD,
+ "Received umount req");
+
+ if (dict_get_strn(this->options, "mountbroker-root",
+ SLEN("mountbroker-root"), &mountbroker_root) != 0) {
+ rsp.op_errno = ENOENT;
+ goto out;
+ }
+
+ /* check if it is allowed to umount path */
+ path = gf_strdup(umnt_req.path);
+ if (!path) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED, NULL);
+ rsp.op_errno = ENOMEM;
+ goto out;
+ }
+ dir_ok = _gf_false;
+ pdir = dirname(path);
+ t = strtail(pdir, mountbroker_root);
+ if (t && *t == '/') {
+ t = strtail(++t, MB_HIVE);
+ if (t && !*t)
+ dir_ok = _gf_true;
+ }
+ GF_FREE(path);
+ if (!dir_ok) {
+ rsp.op_errno = EACCES;
+ goto out;
+ }
+
+ synclock_unlock(&priv->big_lock);
+
+ if (umnt_req.lazy) {
+ rsp.op_ret = gf_umount_lazy(this->name, umnt_req.path, 0);
+ } else {
+ runinit(&runner);
+ runner_add_args(&runner, _PATH_UMOUNT, umnt_req.path, NULL);
+ rsp.op_ret = runner_run(&runner);
+ }
+
+ synclock_lock(&priv->big_lock);
+ if (rsp.op_ret == 0) {
+ if (realpath(umnt_req.path, mntp))
+ sys_rmdir(mntp);
+ else {
+ rsp.op_ret = -1;
+ rsp.op_errno = errno;
}
+ if (sys_unlink(umnt_req.path) != 0) {
+ rsp.op_ret = -1;
+ rsp.op_errno = errno;
+ }
+ }
+out:
+ if (rsp.op_errno)
+ rsp.op_ret = -1;
- ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_commit_op_rsp);
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf1_cli_umount_rsp);
+ ret = 0;
- gf_log ("glusterd", GF_LOG_INFO,
- "Responded to commit, ret: %d", ret);
+ glusterd_friend_sm();
+ glusterd_op_sm();
-out:
- if (rsp.dict.dict_val)
- GF_FREE (rsp.dict.dict_val);
- return ret;
+ return ret;
}
int
-glusterd_handle_incoming_friend_req (rpcsvc_request_t *req)
+glusterd_handle_umount(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- gd1_mgmt_friend_req friend_req = {{0},};
- gf_boolean_t run_fsm = _gf_true;
-
- GF_ASSERT (req);
- if (!xdr_to_generic (req->msg[0], &friend_req, (xdrproc_t)xdr_gd1_mgmt_friend_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- gf_log ("glusterd", GF_LOG_INFO,
- "Received probe from uuid: %s", uuid_utoa (friend_req.uuid));
- ret = glusterd_handle_friend_req (req, friend_req.uuid,
- friend_req.hostname, friend_req.port,
- &friend_req);
-
- if (ret == GLUSTERD_CONNECTION_AWAITED) {
- //fsm should be run after connection establishes
- run_fsm = _gf_false;
- ret = 0;
- }
+ return glusterd_big_locked_handler(req, __glusterd_handle_umount);
+}
+int
+glusterd_friend_remove(uuid_t uuid, char *hostname)
+{
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find(uuid, hostname);
+ if (peerinfo == NULL) {
+ RCU_READ_UNLOCK;
+ goto out;
+ }
+
+ ret = glusterd_friend_remove_cleanup_vols(peerinfo->uuid);
+ RCU_READ_UNLOCK;
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_CLEANUP_FAIL,
+ "Volumes cleanup failed");
+ /* Giving up the critical section here as glusterd_peerinfo_cleanup must
+ * be called from outside a critical section
+ */
+ ret = glusterd_peerinfo_cleanup(peerinfo);
out:
- if (friend_req.hostname)
- free (friend_req.hostname);//malloced by xdr
+ gf_msg_debug(THIS->name, 0, "returning %d", ret);
+ /* coverity[LOCK] */
+ return ret;
+}
- if (run_fsm) {
- glusterd_friend_sm ();
- glusterd_op_sm ();
+int
+glusterd_rpc_create(struct rpc_clnt **rpc, dict_t *options,
+ rpc_clnt_notify_t notify_fn, void *notify_data,
+ gf_boolean_t force)
+{
+ struct rpc_clnt *new_rpc = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(options);
+ GF_VALIDATE_OR_GOTO(this->name, rpc, out);
+
+ if (force && rpc && *rpc) {
+ (void)rpc_clnt_unref(*rpc);
+ *rpc = NULL;
+ }
+
+ /* TODO: is 32 enough? or more ? */
+ new_rpc = rpc_clnt_new(options, this, this->name, 16);
+ if (!new_rpc)
+ goto out;
+
+ ret = rpc_clnt_register_notify(new_rpc, notify_fn, notify_data);
+ if (ret)
+ goto out;
+ ret = rpc_clnt_start(new_rpc);
+out:
+ if (ret) {
+ if (new_rpc) {
+ (void)rpc_clnt_unref(new_rpc);
}
+ } else {
+ *rpc = new_rpc;
+ }
- return ret;
+ gf_msg_debug(this->name, 0, "returning %d", ret);
+ return ret;
}
int
-glusterd_handle_incoming_unfriend_req (rpcsvc_request_t *req)
+glusterd_transport_inet_options_build(dict_t *dict, const char *hostname,
+ int port, char *af)
{
- int32_t ret = -1;
- gd1_mgmt_friend_req friend_req = {{0},};
- char remote_hostname[UNIX_PATH_MAX + 1] = {0,};
+ xlator_t *this = NULL;
+ int32_t interval = -1;
+ int32_t time = -1;
+ int32_t timeout = -1;
+ int ret = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(hostname);
+
+ if (!port)
+ port = GLUSTERD_DEFAULT_PORT;
+
+ /* Build default transport options */
+ ret = rpc_transport_inet_options_build(dict, hostname, port, af);
+ if (ret)
+ goto out;
+
+ /* Set frame-timeout to 10mins. Default timeout of 30 mins is too long
+ * when compared to 2 mins for cli timeout. This ensures users don't
+ * wait too long after cli timesout before being able to resume normal
+ * operations
+ */
+ ret = dict_set_int32n(dict, "frame-timeout", SLEN("frame-timeout"), 600);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set frame-timeout");
+ goto out;
+ }
+
+ /* Set keepalive options */
+ ret = dict_get_int32n(this->options, "transport.socket.keepalive-interval",
+ SLEN("transport.socket.keepalive-interval"),
+ &interval);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get socket keepalive-interval");
+ }
+ ret = dict_get_int32n(this->options, "transport.socket.keepalive-time",
+ SLEN("transport.socket.keepalive-time"), &time);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get socket keepalive-time");
+ }
+ ret = dict_get_int32n(this->options, "transport.tcp-user-timeout",
+ SLEN("transport.tcp-user-timeout"), &timeout);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get tcp-user-timeout");
+ }
+
+ if ((interval > 0) || (time > 0))
+ ret = rpc_transport_keepalive_options_set(dict, interval, time,
+ timeout);
+out:
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
- GF_ASSERT (req);
- if (!xdr_to_generic (req->msg[0], &friend_req, (xdrproc_t)xdr_gd1_mgmt_friend_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
+int
+glusterd_friend_rpc_create(xlator_t *this, glusterd_peerinfo_t *peerinfo,
+ glusterd_peerctx_args_t *args)
+{
+ dict_t *options = NULL;
+ int ret = -1;
+ glusterd_peerctx_t *peerctx = NULL;
+ data_t *data = NULL;
+ char *af = NULL;
+
+ peerctx = GF_CALLOC(1, sizeof(*peerctx), gf_gld_mt_peerctx_t);
+ if (!peerctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ options = dict_new();
+ if (!options) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ if (args)
+ peerctx->args = *args;
+
+ gf_uuid_copy(peerctx->peerid, peerinfo->uuid);
+ peerctx->peername = gf_strdup(peerinfo->hostname);
+ peerctx->peerinfo_gen = peerinfo->generation; /* A peerinfos generation
+ number can be used to
+ uniquely identify a
+ peerinfo */
+
+ ret = dict_get_str(this->options, "transport.address-family", &af);
+ if (ret)
+ gf_log(this->name, GF_LOG_TRACE,
+ "option transport.address-family is not set in xlator options");
+ ret = glusterd_transport_inet_options_build(options, peerinfo->hostname,
+ peerinfo->port, af);
+ if (ret)
+ goto out;
+
+ /*
+ * For simulated multi-node testing, we need to make sure that we
+ * create our RPC endpoint with the same address that the peer would
+ * use to reach us.
+ */
+
+ if (this->options) {
+ data = dict_getn(this->options, "transport.socket.bind-address",
+ SLEN("transport.socket.bind-address"));
+ if (data) {
+ ret = dict_set_sizen(options, "transport.socket.source-addr", data);
}
-
- gf_log ("glusterd", GF_LOG_INFO,
- "Received unfriend from uuid: %s", uuid_utoa (friend_req.uuid));
-
- ret = glusterd_remote_hostname_get (req, remote_hostname,
- sizeof (remote_hostname));
+ data = dict_getn(this->options, "ping-timeout", SLEN("ping-timeout"));
+ if (data) {
+ ret = dict_set_sizen(options, "ping-timeout", data);
+ }
+ }
+
+ /* Enable encryption for the client connection if management encryption
+ * is enabled
+ */
+ if (this->ctx->secure_mgmt) {
+ ret = dict_set_nstrn(options, "transport.socket.ssl-enabled",
+ SLEN("transport.socket.ssl-enabled"), "on",
+ SLEN("on"));
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get the remote hostname");
- goto out;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set ssl-enabled in dict");
+ goto out;
}
- ret = glusterd_handle_unfriend_req (req, friend_req.uuid,
- remote_hostname, friend_req.port);
+ this->ctx->ssl_cert_depth = glusterfs_read_secure_access_file();
+ }
+
+ ret = glusterd_rpc_create(&peerinfo->rpc, options, glusterd_peer_rpc_notify,
+ peerctx, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL,
+ "failed to create rpc for"
+ " peer %s",
+ peerinfo->hostname);
+ gf_event(EVENT_PEER_RPC_CREATE_FAILED, "peer=%s", peerinfo->hostname);
+ goto out;
+ }
+ peerctx = NULL;
+ ret = 0;
out:
- if (friend_req.hostname)
- free (friend_req.hostname);//malloced by xdr
- if (friend_req.vols.vols_val)
- free (friend_req.vols.vols_val);//malloced by xdr
-
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ if (options)
+ dict_unref(options);
- return ret;
+ GF_FREE(peerctx);
+ return ret;
}
int
-glusterd_handle_friend_update_delete (dict_t *dict)
+glusterd_friend_add(const char *hoststr, int port,
+ glusterd_friend_sm_state_t state, uuid_t *uuid,
+ glusterd_peerinfo_t **friend, gf_boolean_t restore,
+ glusterd_peerctx_args_t *args)
{
- char *hostname = NULL;
- int32_t ret = -1;
-
- GF_ASSERT (dict);
-
- ret = dict_get_str (dict, "hostname", &hostname);
- if (ret)
- goto out;
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT(conf);
+ GF_ASSERT(hoststr);
+ GF_ASSERT(friend);
+
+ *friend = glusterd_peerinfo_new(state, uuid, hoststr, port);
+ if (*friend == NULL) {
+ ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_ADD_FAIL, NULL);
+ goto out;
+ }
+
+ /*
+ * We can't add to the list after calling glusterd_friend_rpc_create,
+ * even if it succeeds, because by then the callback to take it back
+ * off and free might have happened already (notably in the case of an
+ * invalid peer name). That would mean we're adding something that had
+ * just been free, and we're likely to crash later.
+ */
+ cds_list_add_tail_rcu(&(*friend)->uuid_list, &conf->peers);
+
+ // restore needs to first create the list of peers, then create rpcs
+ // to keep track of quorum in race-free manner. In restore for each peer
+ // rpc-create calls rpc_notify when the friend-list is partially
+ // constructed, leading to wrong quorum calculations.
+ if (!restore) {
+ ret = glusterd_store_peerinfo(*friend);
+ if (ret == 0) {
+ ret = glusterd_friend_rpc_create(this, *friend, args);
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEERINFO_CREATE_FAIL,
+ "Failed to store peerinfo");
+ gf_event(EVENT_PEER_STORE_FAILURE, "peer=%s", (*friend)->hostname);
+ }
+ }
- ret = glusterd_friend_remove (NULL, hostname);
+ if (ret) {
+ (void)glusterd_peerinfo_cleanup(*friend);
+ *friend = NULL;
+ }
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CONNECT_RETURNED,
+ "connect returned %d", ret);
+ return ret;
}
+/* glusterd_friend_add_from_peerinfo() adds a new peer into the local friends
+ * list from a pre created @peerinfo object. It otherwise works similarly to
+ * glusterd_friend_add()
+ */
int
-glusterd_friend_hostname_update (glusterd_peerinfo_t *peerinfo,
- char *hostname,
- gf_boolean_t store_update)
+glusterd_friend_add_from_peerinfo(glusterd_peerinfo_t *friend,
+ gf_boolean_t restore,
+ glusterd_peerctx_args_t *args)
{
- char *new_hostname = NULL;
- int ret = 0;
-
- GF_ASSERT (peerinfo);
- GF_ASSERT (hostname);
-
- new_hostname = gf_strdup (hostname);
- if (!new_hostname) {
- ret = -1;
- goto out;
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GF_VALIDATE_OR_GOTO(this->name, (friend != NULL), out);
+
+ /*
+ * We can't add to the list after calling glusterd_friend_rpc_create,
+ * even if it succeeds, because by then the callback to take it back
+ * off and free might have happened already (notably in the case of an
+ * invalid peer name). That would mean we're adding something that had
+ * just been free, and we're likely to crash later.
+ */
+ cds_list_add_tail_rcu(&friend->uuid_list, &conf->peers);
+
+ // restore needs to first create the list of peers, then create rpcs
+ // to keep track of quorum in race-free manner. In restore for each peer
+ // rpc-create calls rpc_notify when the friend-list is partially
+ // constructed, leading to wrong quorum calculations.
+ if (!restore) {
+ ret = glusterd_store_peerinfo(friend);
+ if (ret == 0) {
+ ret = glusterd_friend_rpc_create(this, friend, args);
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEERINFO_CREATE_FAIL,
+ "Failed to store peerinfo");
+ gf_event(EVENT_PEER_STORE_FAILURE, "peer=%s", friend->hostname);
}
+ }
- GF_FREE (peerinfo->hostname);
- peerinfo->hostname = new_hostname;
- if (store_update)
- ret = glusterd_store_peerinfo (peerinfo);
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CONNECT_RETURNED,
+ "connect returned %d", ret);
+ return ret;
}
int
-glusterd_handle_friend_update (rpcsvc_request_t *req)
-{
- int32_t ret = -1;
- gd1_mgmt_friend_update friend_req = {{0},};
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_conf_t *priv = NULL;
- xlator_t *this = NULL;
- glusterd_peerinfo_t *tmp = NULL;
- gd1_mgmt_friend_update_rsp rsp = {{0},};
- dict_t *dict = NULL;
- char key[100] = {0,};
- char *uuid_buf = NULL;
- char *hostname = NULL;
- int i = 1;
- int count = 0;
- uuid_t uuid = {0,};
- glusterd_peerctx_args_t args = {0};
- int32_t op = 0;
-
- GF_ASSERT (req);
-
- this = THIS;
- GF_ASSERT (this);
- priv = this->private;
- GF_ASSERT (priv);
-
- if (!xdr_to_generic (req->msg[0], &friend_req, (xdrproc_t)xdr_gd1_mgmt_friend_update)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
+glusterd_probe_begin(rpcsvc_request_t *req, const char *hoststr, int port,
+ dict_t *dict, int *op_errno)
+{
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_args_t args = {0};
+ glusterd_friend_sm_event_t *event = NULL;
- ret = glusterd_friend_find (friend_req.uuid, NULL, &tmp);
- if (ret) {
- gf_log ("", GF_LOG_CRITICAL, "Received friend update request "
- "from unknown peer %s", uuid_utoa (friend_req.uuid));
- goto out;
- }
- gf_log ("glusterd", GF_LOG_INFO,
- "Received friend update from uuid: %s", uuid_utoa (friend_req.uuid));
+ GF_ASSERT(hoststr);
- if (friend_req.friends.friends_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
+ RCU_READ_LOCK;
+ peerinfo = glusterd_peerinfo_find(NULL, hoststr);
- ret = dict_unserialize (friend_req.friends.friends_val,
- friend_req.friends.friends_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- } else {
- dict->extra_stdfree = friend_req.friends.friends_val;
- }
+ if (peerinfo == NULL) {
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_PEER_NOT_FOUND,
+ "Unable to find peerinfo"
+ " for host: %s (%d)",
+ hoststr, port);
+ args.mode = GD_MODE_ON;
+ args.req = req;
+ args.dict = dict;
+ ret = glusterd_friend_add(hoststr, port, GD_FRIEND_STATE_DEFAULT, NULL,
+ &peerinfo, 0, &args);
+ if ((!ret) && (!peerinfo->connected)) {
+ ret = GLUSTERD_CONNECTION_AWAITED;
}
- ret = dict_get_int32 (dict, "count", &count);
- if (ret)
- goto out;
-
- ret = dict_get_int32 (dict, "op", &op);
+ } else if (peerinfo->connected &&
+ (GD_FRIEND_STATE_BEFRIENDED == peerinfo->state.state)) {
+ if (peerinfo->detaching) {
+ ret = -1;
+ if (op_errno)
+ *op_errno = GF_PROBE_FRIEND_DETACHING;
+ goto out;
+ }
+ ret = glusterd_peer_hostname_update(peerinfo, hoststr, _gf_false);
if (ret)
- goto out;
+ goto out;
+ // Injecting a NEW_NAME event to update cluster
+ ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_NEW_NAME, &event);
+ if (!ret) {
+ event->peername = gf_strdup(peerinfo->hostname);
+ gf_uuid_copy(event->peerid, peerinfo->uuid);
- if (GD_FRIEND_UPDATE_DEL == op) {
- ret = glusterd_handle_friend_update_delete (dict);
- goto out;
+ ret = glusterd_friend_sm_inject_event(event);
+ glusterd_xfer_cli_probe_resp(req, 0, GF_PROBE_SUCCESS, NULL,
+ (char *)hoststr, port, dict);
}
+ } else {
+ glusterd_xfer_cli_probe_resp(req, 0, GF_PROBE_FRIEND, NULL,
+ (char *)hoststr, port, dict);
+ ret = 0;
+ }
- args.mode = GD_MODE_ON;
- while ( i <= count) {
- snprintf (key, sizeof (key), "friend%d.uuid", i);
- ret = dict_get_str (dict, key, &uuid_buf);
- if (ret)
- goto out;
- uuid_parse (uuid_buf, uuid);
- snprintf (key, sizeof (key), "friend%d.hostname", i);
- ret = dict_get_str (dict, key, &hostname);
- if (ret)
- goto out;
+out:
+ RCU_READ_UNLOCK;
+ gf_msg_debug("glusterd", 0, "returning %d", ret);
+ return ret;
+}
- gf_log ("", GF_LOG_INFO, "Received uuid: %s, hostname:%s",
- uuid_buf, hostname);
+int
+glusterd_deprobe_begin(rpcsvc_request_t *req, const char *hoststr, int port,
+ uuid_t uuid, dict_t *dict, int *op_errno)
+{
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_probe_ctx_t *ctx = NULL;
- if (uuid_is_null (uuid)) {
- gf_log (this->name, GF_LOG_WARNING, "Updates mustn't "
- "contain peer with 'null' uuid");
- continue;
- }
+ GF_ASSERT(hoststr);
+ GF_ASSERT(req);
- if (!uuid_compare (uuid, MY_UUID)) {
- gf_log ("", GF_LOG_INFO, "Received my uuid as Friend");
- i++;
- continue;
- }
+ RCU_READ_LOCK;
- ret = glusterd_friend_find (uuid, hostname, &tmp);
+ peerinfo = glusterd_peerinfo_find(uuid, hoststr);
+ if (peerinfo == NULL) {
+ ret = -1;
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_PEER_NOT_FOUND,
+ "Unable to find peerinfo"
+ " for host: %s %d",
+ hoststr, port);
+ goto out;
+ }
+
+ if (!peerinfo->rpc) {
+ // handle this case
+ goto out;
+ }
+
+ if (peerinfo->detaching) {
+ ret = -1;
+ if (op_errno)
+ *op_errno = GF_DEPROBE_FRIEND_DETACHING;
+ goto out;
+ }
- if (!ret) {
- if (strcmp (hostname, tmp->hostname) != 0) {
- glusterd_friend_hostname_update (tmp, hostname,
- _gf_true);
- }
- i++;
- continue;
- }
+ ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_INIT_REMOVE_FRIEND,
+ &event);
- ret = glusterd_friend_add (hostname, friend_req.port,
- GD_FRIEND_STATE_BEFRIENDED,
- &uuid, &peerinfo, 0, &args);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL,
+ "Unable to get new event");
+ goto out;
+ }
- i++;
- }
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_probe_ctx_t);
-out:
- uuid_copy (rsp.uuid, MY_UUID);
- ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_friend_update_rsp);
- if (dict) {
- if (!dict->extra_stdfree && friend_req.friends.friends_val)
- free (friend_req.friends.friends_val);//malloced by xdr
- dict_unref (dict);
- } else {
- if (friend_req.friends.friends_val)
- free (friend_req.friends.friends_val);//malloced by xdr
- }
+ if (!ctx) {
+ goto out;
+ }
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ ctx->hostname = gf_strdup(hoststr);
+ ctx->port = port;
+ ctx->req = req;
+ ctx->dict = dict;
- return ret;
+ event->ctx = ctx;
+
+ event->peername = gf_strdup(hoststr);
+ gf_uuid_copy(event->peerid, uuid);
+
+ ret = glusterd_friend_sm_inject_event(event);
+
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL,
+ "Unable to inject event %d, "
+ "ret = %d",
+ event->event, ret);
+ goto out;
+ }
+ peerinfo->detaching = _gf_true;
+
+out:
+ RCU_READ_UNLOCK;
+ return ret;
}
int
-glusterd_handle_probe_query (rpcsvc_request_t *req)
+glusterd_xfer_friend_remove_resp(rpcsvc_request_t *req, char *hostname,
+ int port)
{
- int32_t ret = -1;
- xlator_t *this = NULL;
- glusterd_conf_t *conf = NULL;
- gd1_mgmt_probe_req probe_req = {{0},};
- gd1_mgmt_probe_rsp rsp = {{0},};
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_peerctx_args_t args = {0};
- int port = 0;
- char remote_hostname[UNIX_PATH_MAX + 1] = {0,};
-
- GF_ASSERT (req);
-
- if (!xdr_to_generic (req->msg[0], &probe_req, (xdrproc_t)xdr_gd1_mgmt_probe_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
+ gd1_mgmt_friend_rsp rsp = {
+ {0},
+ };
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(hostname);
+
+ rsp.op_ret = 0;
+ this = THIS;
+ GF_ASSERT(this);
+
+ gf_uuid_copy(rsp.uuid, MY_UUID);
+ rsp.hostname = hostname;
+ rsp.port = port;
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_friend_rsp);
+
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_RESPONSE_INFO,
+ "Responded to %s (%d), ret: %d", hostname, port, ret);
+ return ret;
+}
- this = THIS;
+int
+glusterd_xfer_friend_add_resp(rpcsvc_request_t *req, char *myhostname,
+ char *remote_hostname, int port, int32_t op_ret,
+ int32_t op_errno)
+{
+ gd1_mgmt_friend_rsp rsp = {
+ {0},
+ };
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(myhostname);
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ gf_uuid_copy(rsp.uuid, MY_UUID);
+ rsp.op_ret = op_ret;
+ rsp.op_errno = op_errno;
+ rsp.hostname = gf_strdup(myhostname);
+ rsp.port = port;
+
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_friend_rsp);
+
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_RESPONSE_INFO,
+ "Responded to %s (%d), ret: %d, op_ret: %d", remote_hostname, port,
+ ret, op_ret);
+ GF_FREE(rsp.hostname);
+ return ret;
+}
- conf = this->private;
- if (probe_req.port)
- port = probe_req.port;
- else
- port = GF_DEFAULT_BASE_PORT;
+static void
+set_probe_error_str(int op_ret, int op_errno, char *op_errstr, char *errstr,
+ size_t len, char *hostname, int port)
+{
+ if ((op_errstr) && (strcmp(op_errstr, ""))) {
+ snprintf(errstr, len, "%s", op_errstr);
+ return;
+ }
+
+ if (!op_ret) {
+ switch (op_errno) {
+ case GF_PROBE_LOCALHOST:
+ snprintf(errstr, len,
+ "Probe on localhost not "
+ "needed");
+ break;
- gf_log ("glusterd", GF_LOG_INFO,
- "Received probe from uuid: %s", uuid_utoa (probe_req.uuid));
+ case GF_PROBE_FRIEND:
+ snprintf(errstr, len,
+ "Host %s port %d already"
+ " in peer list",
+ hostname, port);
+ break;
- ret = glusterd_remote_hostname_get (req, remote_hostname,
- sizeof (remote_hostname));
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get the remote hostname");
- goto out;
- }
- ret = glusterd_friend_find (probe_req.uuid, remote_hostname, &peerinfo);
- if ((ret != 0 ) && (!list_empty (&conf->peers))) {
- rsp.op_ret = -1;
- rsp.op_errno = GF_PROBE_ANOTHER_CLUSTER;
- } else if (ret) {
- gf_log ("glusterd", GF_LOG_INFO, "Unable to find peerinfo"
- " for host: %s (%d)", remote_hostname, port);
- args.mode = GD_MODE_ON;
- ret = glusterd_friend_add (remote_hostname, port,
- GD_FRIEND_STATE_PROBE_RCVD,
- NULL, &peerinfo, 0, &args);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Failed to add peer %s",
- remote_hostname);
- rsp.op_errno = GF_PROBE_ADD_FAILED;
- }
+ case GF_PROBE_FRIEND_DETACHING:
+ snprintf(errstr, len,
+ "Peer is already being "
+ "detached from cluster.\n"
+ "Check peer status by running "
+ "gluster peer status");
+ break;
+ default:
+ if (op_errno != 0)
+ snprintf(errstr, len,
+ "Probe returned "
+ "with %s",
+ strerror(op_errno));
+ break;
}
+ } else {
+ switch (op_errno) {
+ case GF_PROBE_ANOTHER_CLUSTER:
+ snprintf(errstr, len,
+ "%s is either already "
+ "part of another cluster or having "
+ "volumes configured",
+ hostname);
+ break;
- uuid_copy (rsp.uuid, conf->uuid);
+ case GF_PROBE_VOLUME_CONFLICT:
+ snprintf(errstr, len,
+ "At least one volume on "
+ "%s conflicts with existing volumes "
+ "in the cluster",
+ hostname);
+ break;
- rsp.hostname = probe_req.hostname;
+ case GF_PROBE_UNKNOWN_PEER:
+ snprintf(errstr, len,
+ "%s responded with "
+ "'unknown peer' error, this could "
+ "happen if %s doesn't have localhost "
+ "in its peer database",
+ hostname, hostname);
+ break;
- ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_probe_rsp);
+ case GF_PROBE_ADD_FAILED:
+ snprintf(errstr, len,
+ "Failed to add peer "
+ "information on %s",
+ hostname);
+ break;
- gf_log ("glusterd", GF_LOG_INFO, "Responded to %s, op_ret: %d, "
- "op_errno: %d, ret: %d", remote_hostname,
- rsp.op_ret, rsp.op_errno, ret);
+ case GF_PROBE_SAME_UUID:
+ snprintf(errstr, len,
+ "Peer uuid (host %s) is "
+ "same as local uuid",
+ hostname);
+ break;
-out:
- if (probe_req.hostname)
- free (probe_req.hostname);//malloced by xdr
+ case GF_PROBE_QUORUM_NOT_MET:
+ snprintf(errstr, len,
+ "Cluster quorum is not "
+ "met. Changing peers is not allowed "
+ "in this state");
+ break;
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ case GF_PROBE_MISSED_SNAP_CONFLICT:
+ snprintf(errstr, len,
+ "Failed to update "
+ "list of missed snapshots from "
+ "peer %s",
+ hostname);
+ break;
- return ret;
+ case GF_PROBE_SNAP_CONFLICT:
+ snprintf(errstr, len,
+ "Conflict in comparing "
+ "list of snapshots from "
+ "peer %s",
+ hostname);
+ break;
+
+ default:
+ snprintf(errstr, len,
+ "Probe returned with "
+ "%s",
+ strerror(op_errno));
+ break;
+ }
+ }
}
int
-glusterd_handle_cli_profile_volume (rpcsvc_request_t *req)
+glusterd_xfer_cli_probe_resp(rpcsvc_request_t *req, int32_t op_ret,
+ int32_t op_errno, char *op_errstr, char *hostname,
+ int port, dict_t *dict)
{
- int32_t ret = -1;
- gf_cli_req cli_req = {{0,}};
- dict_t *dict = NULL;
- glusterd_op_t cli_op = GD_OP_PROFILE_VOLUME;
- char *volname = NULL;
- int32_t op = 0;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int32_t ret = -1;
+ char errstr[2048] = {
+ 0,
+ };
+ char *cmd_str = NULL;
+ xlator_t *this = THIS;
+
+ GF_ASSERT(req);
+ GF_ASSERT(this);
+
+ (void)set_probe_error_str(op_ret, op_errno, op_errstr, errstr,
+ sizeof(errstr), hostname, port);
+
+ if (dict) {
+ ret = dict_get_strn(dict, "cmd-str", SLEN("cmd-str"), &cmd_str);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CMDSTR_NOTFOUND_IN_DICT,
+ "Failed to get "
+ "command string");
+ }
- GF_ASSERT (req);
+ rsp.op_ret = op_ret;
+ rsp.op_errno = op_errno;
+ rsp.op_errstr = (errstr[0] != '\0') ? errstr : "";
- if (!xdr_to_generic (req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
+ gf_cmd_log("", "%s : %s %s %s", cmd_str, (op_ret) ? "FAILED" : "SUCCESS",
+ (errstr[0] != '\0') ? ":" : " ",
+ (errstr[0] != '\0') ? errstr : " ");
- if (cli_req.dict.dict_len > 0) {
- dict = dict_new();
- if (!dict)
- goto out;
- dict_unserialize (cli_req.dict.dict_val,
- cli_req.dict.dict_len, &dict);
- }
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp);
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to get volname");
- goto out;
- }
+ if (dict)
+ dict_unref(dict);
+ gf_msg_debug(this->name, 0, "Responded to CLI, ret: %d", ret);
- gf_log (THIS->name, GF_LOG_INFO, "Received volume profile req "
- "for volume %s", volname);
- ret = dict_get_int32 (dict, "op", &op);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to get op");
- goto out;
- }
+ return ret;
+}
- gf_cmd_log ("Volume stats", "volume : %s, op: %d", volname, op);
- ret = glusterd_op_begin (req, cli_op, dict);
- gf_cmd_log ("Volume stats", " on volume %s, op: %d %s ",
- volname, op,
- ((ret == 0)? " SUCCEDED":" FAILED"));
+static void
+set_deprobe_error_str(int op_ret, int op_errno, char *op_errstr, char *errstr,
+ size_t len, char *hostname)
+{
+ if ((op_errstr) && (strcmp(op_errstr, ""))) {
+ snprintf(errstr, len, "%s", op_errstr);
+ return;
+ }
+
+ if (op_ret) {
+ switch (op_errno) {
+ case GF_DEPROBE_LOCALHOST:
+ snprintf(errstr, len, "%s is localhost", hostname);
+ break;
-out:
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ case GF_DEPROBE_NOT_FRIEND:
+ snprintf(errstr, len,
+ "%s is not part of "
+ "cluster",
+ hostname);
+ break;
- if (ret && dict)
- dict_unref (dict);
- if (cli_req.dict.dict_val)
- free (cli_req.dict.dict_val);
- if (ret)
- ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
- NULL, "operation failed");
+ case GF_DEPROBE_BRICK_EXIST:
+ snprintf(errstr, len,
+ "Peer %s hosts one or more bricks. If the peer is in "
+ "not recoverable state then use either replace-brick "
+ "or remove-brick command with force to remove all "
+ "bricks from the peer and attempt the peer detach "
+ "again.",
+ hostname);
+ break;
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ case GF_DEPROBE_SNAP_BRICK_EXIST:
+ snprintf(errstr, len,
+ "%s is part of existing "
+ "snapshot. Remove those snapshots "
+ "before proceeding ",
+ hostname);
+ break;
+
+ case GF_DEPROBE_FRIEND_DOWN:
+ snprintf(errstr, len,
+ "One of the peers is "
+ "probably down. Check with "
+ "'peer status'");
+ break;
+
+ case GF_DEPROBE_QUORUM_NOT_MET:
+ snprintf(errstr, len,
+ "Cluster quorum is not "
+ "met. Changing peers is not allowed "
+ "in this state");
+ break;
+
+ case GF_DEPROBE_FRIEND_DETACHING:
+ snprintf(errstr, len,
+ "Peer is already being "
+ "detached from cluster.\n"
+ "Check peer status by running "
+ "gluster peer status");
+ break;
+ default:
+ snprintf(errstr, len,
+ "Detach returned with "
+ "%s",
+ strerror(op_errno));
+ break;
+ }
+ }
}
int
-glusterd_handle_getwd (rpcsvc_request_t *req)
+glusterd_xfer_cli_deprobe_resp(rpcsvc_request_t *req, int32_t op_ret,
+ int32_t op_errno, char *op_errstr,
+ char *hostname, dict_t *dict)
{
- int32_t ret = -1;
- gf1_cli_getwd_rsp rsp = {0,};
- glusterd_conf_t *priv = NULL;
-
- GF_ASSERT (req);
-
- priv = THIS->private;
- GF_ASSERT (priv);
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ int32_t ret = -1;
+ char *cmd_str = NULL;
+ char errstr[2048] = {
+ 0,
+ };
+
+ GF_ASSERT(req);
+
+ (void)set_deprobe_error_str(op_ret, op_errno, op_errstr, errstr,
+ sizeof(errstr), hostname);
+
+ if (dict) {
+ ret = dict_get_strn(dict, "cmd-str", SLEN("cmd-str"), &cmd_str);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_CMDSTR_NOTFOUND_IN_DICT,
+ "Failed to get "
+ "command string");
+ }
- gf_log ("glusterd", GF_LOG_INFO, "Received getwd req");
+ rsp.op_ret = op_ret;
+ rsp.op_errno = op_errno;
+ rsp.op_errstr = (errstr[0] != '\0') ? errstr : "";
- rsp.wd = priv->workdir;
+ gf_cmd_log("", "%s : %s %s %s", cmd_str, (op_ret) ? "FAILED" : "SUCCESS",
+ (errstr[0] != '\0') ? ":" : " ",
+ (errstr[0] != '\0') ? errstr : " ");
- ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gf1_cli_getwd_rsp);
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_cli_rsp);
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ gf_msg_debug(THIS->name, 0, "Responded to CLI, ret: %d", ret);
- return ret;
+ return ret;
}
-
-int
-glusterd_handle_mount (rpcsvc_request_t *req)
+int32_t
+glusterd_list_friends(rpcsvc_request_t *req, dict_t *dict, int32_t flags)
{
- gf1_cli_mount_req mnt_req = {0,};
- gf1_cli_mount_rsp rsp = {0,};
- dict_t *dict = NULL;
- int ret = 0;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_peerinfo_t *entry = NULL;
+ int32_t count = 0;
+ dict_t *friends = NULL;
+ gf1_cli_peer_list_rsp rsp = {
+ 0,
+ };
+ char my_uuid_str[64] = {
+ 0,
+ };
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ friends = dict_new();
+ if (!friends) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ /* Reset ret to 0, needed to prevent failure in case no peers exist */
+ ret = 0;
+ RCU_READ_LOCK;
+ if (!cds_list_empty(&priv->peers)) {
+ cds_list_for_each_entry_rcu(entry, &priv->peers, uuid_list)
+ {
+ count++;
+ ret = gd_add_peer_detail_to_dict(entry, friends, count);
+ if (ret)
+ goto unlock;
+ }
+ }
+unlock:
+ RCU_READ_UNLOCK;
+ if (ret)
+ goto out;
+
+ if (flags == GF_CLI_LIST_POOL_NODES) {
+ count++;
+ keylen = snprintf(key, sizeof(key), "friend%d.uuid", count);
+ uuid_utoa_r(MY_UUID, my_uuid_str);
+ ret = dict_set_strn(friends, key, keylen, my_uuid_str);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
- GF_ASSERT (req);
+ keylen = snprintf(key, sizeof(key), "friend%d.hostname", count);
+ ret = dict_set_nstrn(friends, key, keylen, "localhost",
+ SLEN("localhost"));
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
- if (!xdr_to_generic (req->msg[0], &mnt_req, (xdrproc_t)xdr_gf1_cli_mount_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
+ keylen = snprintf(key, sizeof(key), "friend%d.connected", count);
+ ret = dict_set_int32n(friends, key, keylen, 1);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
}
+ }
- gf_log ("glusterd", GF_LOG_INFO, "Received mount req");
+ ret = dict_set_int32n(friends, "count", SLEN("count"), count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
+ goto out;
+ }
- if (mnt_req.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
+ ret = dict_allocate_and_serialize(friends, &rsp.friends.friends_val,
+ &rsp.friends.friends_len);
- ret = dict_unserialize (mnt_req.dict.dict_val,
- mnt_req.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- rsp.op_ret = -1;
- rsp.op_errno = -EINVAL;
- goto out;
- } else {
- dict->extra_stdfree = mnt_req.dict.dict_val;
- }
- }
+ if (ret)
+ goto out;
- rsp.op_ret = glusterd_do_mount (mnt_req.label, dict,
- &rsp.path, &rsp.op_errno);
+ ret = 0;
+out:
- out:
- if (!rsp.path)
- rsp.path = "";
+ if (friends)
+ dict_unref(friends);
- ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gf1_cli_mount_rsp);
+ rsp.op_ret = ret;
- if (dict)
- dict_unref (dict);
- if (*rsp.path)
- GF_FREE (rsp.path);
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf1_cli_peer_list_rsp);
+ ret = 0;
+ GF_FREE(rsp.friends.friends_val);
- glusterd_friend_sm ();
- glusterd_op_sm ();
-
- return ret;
+ return ret;
}
-int
-glusterd_handle_umount (rpcsvc_request_t *req)
+int32_t
+glusterd_get_volumes(rpcsvc_request_t *req, dict_t *dict, int32_t flags)
{
- gf1_cli_umount_req umnt_req = {0,};
- gf1_cli_umount_rsp rsp = {0,};
- char *mountbroker_root = NULL;
- char mntp[PATH_MAX] = {0,};
- char *path = NULL;
- runner_t runner = {0,};
- int ret = 0;
- xlator_t *this = THIS;
- gf_boolean_t dir_ok = _gf_false;
- char *pdir = NULL;
- char *t = NULL;
-
- GF_ASSERT (req);
- GF_ASSERT (this);
+ int32_t ret = -1;
+ int32_t ret_bkp = 0;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *entry = NULL;
+ int32_t count = 0;
+ dict_t *volumes = NULL;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ char *volname = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT(priv);
+ volumes = dict_new();
+ if (!volumes) {
+ gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Out of Memory");
+ goto out;
+ }
+
+ if (cds_list_empty(&priv->volumes)) {
+ if (flags == GF_CLI_GET_VOLUME)
+ ret_bkp = -1;
+ ret = 0;
+ goto respond;
+ }
+ if (flags == GF_CLI_GET_VOLUME_ALL) {
+ cds_list_for_each_entry(entry, &priv->volumes, vol_list)
+ {
+ ret = glusterd_add_volume_detail_to_dict(entry, volumes, count);
+ if (ret)
+ goto respond;
- if (!xdr_to_generic (req->msg[0], &umnt_req, (xdrproc_t)xdr_gf1_cli_umount_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- rsp.op_ret = -1;
- goto out;
+ count++;
}
- gf_log ("glusterd", GF_LOG_INFO, "Received umount req");
+ } else if (flags == GF_CLI_GET_NEXT_VOLUME) {
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- if (dict_get_str (this->options, "mountbroker-root",
- &mountbroker_root) != 0) {
- rsp.op_errno = ENOENT;
- goto out;
+ if (ret) {
+ if (priv->volumes.next) {
+ entry = cds_list_entry(priv->volumes.next, typeof(*entry),
+ vol_list);
+ }
+ } else {
+ ret = glusterd_volinfo_find(volname, &entry);
+ if (ret)
+ goto respond;
+ entry = cds_list_entry(entry->vol_list.next, typeof(*entry),
+ vol_list);
}
- /* check if it is allowed to umount path */
- path = gf_strdup (umnt_req.path);
- if (!path) {
- rsp.op_errno = ENOMEM;
- goto out;
- }
- dir_ok = _gf_false;
- pdir = dirname (path);
- t = strtail (pdir, mountbroker_root);
- if (t && *t == '/') {
- t = strtail(++t, MB_HIVE);
- if (t && !*t)
- dir_ok = _gf_true;
- }
- GF_FREE (path);
- if (!dir_ok) {
- rsp.op_errno = EACCES;
- goto out;
- }
+ if (&entry->vol_list == &priv->volumes) {
+ goto respond;
+ } else {
+ ret = glusterd_add_volume_detail_to_dict(entry, volumes, count);
+ if (ret)
+ goto respond;
- runinit (&runner);
- runner_add_args (&runner, "umount", umnt_req.path, NULL);
- if (umnt_req.lazy)
- runner_add_arg (&runner, "-l");
- rsp.op_ret = runner_run (&runner);
- if (rsp.op_ret == 0) {
- if (realpath (umnt_req.path, mntp))
- rmdir (mntp);
- else {
- rsp.op_ret = -1;
- rsp.op_errno = errno;
- }
- if (unlink (umnt_req.path) != 0) {
- rsp.op_ret = -1;
- rsp.op_errno = errno;
- }
+ count++;
}
+ } else if (flags == GF_CLI_GET_VOLUME) {
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
- out:
- if (rsp.op_errno)
- rsp.op_ret = -1;
+ if (ret)
+ goto respond;
- ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gf1_cli_umount_rsp);
+ ret = glusterd_volinfo_find(volname, &entry);
+ if (ret) {
+ ret_bkp = ret;
+ goto respond;
+ }
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ ret = glusterd_add_volume_detail_to_dict(entry, volumes, count);
+ if (ret)
+ goto respond;
- return ret;
-}
+ count++;
+ }
-int
-glusterd_friend_remove (uuid_t uuid, char *hostname)
-{
- int ret = 0;
- glusterd_peerinfo_t *peerinfo = NULL;
+respond:
+ ret = dict_set_int32n(volumes, "count", SLEN("count"), count);
+ if (ret)
+ goto out;
+ ret = dict_allocate_and_serialize(volumes, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
- ret = glusterd_friend_find (uuid, hostname, &peerinfo);
- if (ret)
- goto out;
+ if (ret)
+ goto out;
- ret = glusterd_friend_remove_cleanup_vols (peerinfo->uuid);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING, "Volumes cleanup failed");
- ret = glusterd_friend_cleanup (peerinfo);
+ ret = 0;
out:
- gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
- return ret;
+ if (ret_bkp == -1) {
+ rsp.op_ret = ret_bkp;
+ rsp.op_errstr = "Volume does not exist";
+ rsp.op_errno = EG_NOVOL;
+ } else {
+ rsp.op_ret = ret;
+ rsp.op_errstr = "";
+ }
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp);
+ ret = 0;
+
+ if (volumes)
+ dict_unref(volumes);
+
+ GF_FREE(rsp.dict.dict_val);
+ return ret;
}
int
-glusterd_rpc_create (struct rpc_clnt **rpc,
- dict_t *options,
- rpc_clnt_notify_t notify_fn,
- void *notify_data)
+__glusterd_handle_status_volume(rpcsvc_request_t *req)
{
- struct rpc_clnt *new_rpc = NULL;
- int ret = -1;
- xlator_t *this = NULL;
+ int32_t ret = -1;
+ uint32_t cmd = 0;
+ dict_t *dict = NULL;
+ char *volname = 0;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ glusterd_op_t cli_op = GD_OP_STATUS_VOLUME;
+ char err_str[256] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len > 0) {
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ goto out;
+ }
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize buffer");
+ snprintf(err_str, sizeof(err_str),
+ "Unable to decode "
+ "the command");
+ goto out;
+ }
+ }
- this = THIS;
- GF_ASSERT (this);
+ ret = dict_get_uint32(dict, "cmd", &cmd);
+ if (ret)
+ goto out;
- GF_ASSERT (options);
+ if (!(cmd & GF_CLI_STATUS_ALL)) {
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Unable to get "
+ "volume name");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s",
+ err_str);
+ goto out;
+ }
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_STATUS_VOL_REQ_RCVD,
+ "Received status volume req for volume %s", volname);
+ }
+ if ((cmd & GF_CLI_STATUS_CLIENT_LIST) &&
+ (conf->op_version < GD_OP_VERSION_3_13_0)) {
+ snprintf(err_str, sizeof(err_str),
+ "The cluster is operating "
+ "at version less than %d. Getting the client-list "
+ "is not allowed in this state.",
+ GD_OP_VERSION_3_13_0);
+ ret = -1;
+ goto out;
+ }
+
+ if ((cmd & GF_CLI_STATUS_QUOTAD) &&
+ (conf->op_version == GD_OP_VERSION_MIN)) {
+ snprintf(err_str, sizeof(err_str),
+ "The cluster is operating "
+ "at version 1. Getting the status of quotad is not "
+ "allowed in this state.");
+ ret = -1;
+ goto out;
+ }
+
+ if ((cmd & GF_CLI_STATUS_SNAPD) &&
+ (conf->op_version < GD_OP_VERSION_3_6_0)) {
+ snprintf(err_str, sizeof(err_str),
+ "The cluster is operating "
+ "at a lesser version than %d. Getting the status of "
+ "snapd is not allowed in this state",
+ GD_OP_VERSION_3_6_0);
+ ret = -1;
+ goto out;
+ }
+
+ if ((cmd & GF_CLI_STATUS_BITD) &&
+ (conf->op_version < GD_OP_VERSION_3_7_0)) {
+ snprintf(err_str, sizeof(err_str),
+ "The cluster is operating "
+ "at a lesser version than %d. Getting the status of "
+ "bitd is not allowed in this state",
+ GD_OP_VERSION_3_7_0);
+ ret = -1;
+ goto out;
+ }
+
+ if ((cmd & GF_CLI_STATUS_SCRUB) &&
+ (conf->op_version < GD_OP_VERSION_3_7_0)) {
+ snprintf(err_str, sizeof(err_str),
+ "The cluster is operating "
+ "at a lesser version than %d. Getting the status of "
+ "scrub is not allowed in this state",
+ GD_OP_VERSION_3_7_0);
+ ret = -1;
+ goto out;
+ }
- /* TODO: is 32 enough? or more ? */
- new_rpc = rpc_clnt_new (options, this->ctx, this->name, 16);
- if (!new_rpc)
- goto out;
+ ret = glusterd_op_begin_synctask(req, GD_OP_STATUS_VOLUME, dict);
- ret = rpc_clnt_register_notify (new_rpc, notify_fn, notify_data);
- *rpc = new_rpc;
- if (ret)
- goto out;
- ret = rpc_clnt_start (new_rpc);
out:
- if (ret) {
- if (new_rpc) {
- (void) rpc_clnt_unref (new_rpc);
- }
- }
- gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
- return ret;
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf(err_str, sizeof(err_str), "Operation failed");
+ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str);
+ }
+ free(cli_req.dict.dict_val);
+
+ return ret;
}
int
-glusterd_transport_keepalive_options_get (int *interval, int *time)
+glusterd_handle_status_volume(rpcsvc_request_t *req)
{
- int ret = 0;
- xlator_t *this = NULL;
+ return glusterd_big_locked_handler(req, __glusterd_handle_status_volume);
+}
- this = THIS;
- GF_ASSERT (this);
+int
+__glusterd_handle_cli_clearlocks_volume(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ glusterd_op_t cli_op = GD_OP_CLEARLOCKS_VOLUME;
+ char *volname = NULL;
+ dict_t *dict = NULL;
+ char err_str[64] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = -1;
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ dict = dict_new();
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to unserialize req-buffer to"
+ " dictionary");
+ snprintf(err_str, sizeof(err_str),
+ "unable to decode "
+ "the command");
+ goto out;
+ }
- ret = dict_get_int32 (this->options,
- "transport.socket.keepalive-interval",
- interval);
- ret = dict_get_int32 (this->options,
- "transport.socket.keepalive-time",
- time);
- return 0;
+ } else {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CLI_REQ_EMPTY,
+ "Empty cli request.");
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Unable to get volume "
+ "name");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLNAME_NOTFOUND_IN_DICT,
+ "%s", err_str);
+ goto out;
+ }
+
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CLRCLK_VOL_REQ_RCVD,
+ "Received clear-locks volume req "
+ "for volume %s",
+ volname);
+
+ ret = glusterd_op_begin_synctask(req, GD_OP_CLEARLOCKS_VOLUME, dict);
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf(err_str, sizeof(err_str), "Operation failed");
+ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, err_str);
+ }
+ free(cli_req.dict.dict_val);
+
+ return ret;
}
int
-glusterd_transport_inet_keepalive_options_build (dict_t **options,
- const char *hostname, int port)
+glusterd_handle_cli_clearlocks_volume(rpcsvc_request_t *req)
{
- dict_t *dict = NULL;
- int32_t interval = -1;
- int32_t time = -1;
- int ret = 0;
+ return glusterd_big_locked_handler(req,
+ __glusterd_handle_cli_clearlocks_volume);
+}
- GF_ASSERT (options);
- GF_ASSERT (hostname);
+static int
+get_volinfo_from_brickid(char *brickid, glusterd_volinfo_t **volinfo)
+{
+ int ret = -1;
+ char *volid_str = NULL;
+ char *brick = NULL;
+ char *brickid_dup = NULL;
+ uuid_t volid = {0};
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(brickid);
+
+ brickid_dup = gf_strdup(brickid);
+ if (!brickid_dup)
+ goto out;
+
+ volid_str = brickid_dup;
+ brick = strchr(brickid_dup, ':');
+ if (!brick) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND,
+ "Invalid brickid");
+ goto out;
+ }
+
+ *brick = '\0';
+ brick++;
+ gf_uuid_parse(volid_str, volid);
+ ret = glusterd_volinfo_find_by_volume_id(volid, volinfo);
+ if (ret) {
+ /* Check if it is a snapshot volume */
+ ret = glusterd_snap_volinfo_find_by_volume_id(volid, volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to find volinfo");
+ goto out;
+ }
+ }
- if (!port)
- port = GLUSTERD_DEFAULT_PORT;
- ret = rpc_transport_inet_options_build (&dict, hostname, port);
- if (ret)
- goto out;
+ ret = 0;
+out:
+ GF_FREE(brickid_dup);
+ return ret;
+}
- glusterd_transport_keepalive_options_get (&interval, &time);
+static int
+__glusterd_handle_barrier(rpcsvc_request_t *req)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+ char *volname = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (!cli_req.dict.dict_len) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len, &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "Failed to unserialize "
+ "request dictionary.");
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLNAME_NOTFOUND_IN_DICT,
+ "Volname not present in "
+ "dict");
+ goto out;
+ }
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BARRIER_VOL_REQ_RCVD,
+ "Received barrier volume request for "
+ "volume %s",
+ volname);
+
+ ret = glusterd_op_begin_synctask(req, GD_OP_BARRIER, dict);
- if ((interval > 0) || (time > 0))
- ret = rpc_transport_keepalive_options_set (dict, interval, time);
- *options = dict;
out:
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (ret) {
+ ret = glusterd_op_send_cli_response(GD_OP_BARRIER, ret, 0, req, dict,
+ "Operation failed");
+ }
+ free(cli_req.dict.dict_val);
+ return ret;
}
int
-glusterd_friend_add (const char *hoststr, int port,
- glusterd_friend_sm_state_t state,
- uuid_t *uuid,
- glusterd_peerinfo_t **friend,
- gf_boolean_t restore,
- glusterd_peerctx_args_t *args)
+glusterd_handle_barrier(rpcsvc_request_t *req)
{
- int ret = 0;
- xlator_t *this = NULL;
- glusterd_conf_t *conf = NULL;
- glusterd_peerctx_t *peerctx = NULL;
- dict_t *options = NULL;
- gf_boolean_t handover = _gf_false;
+ return glusterd_big_locked_handler(req, __glusterd_handle_barrier);
+}
- this = THIS;
- conf = this->private;
- GF_ASSERT (conf);
- GF_ASSERT (hoststr);
+static gf_boolean_t
+gd_is_global_option(char *opt_key)
+{
+ GF_VALIDATE_OR_GOTO(THIS->name, opt_key, out);
- peerctx = GF_CALLOC (1, sizeof (*peerctx), gf_gld_mt_peerctx_t);
- if (!peerctx) {
- ret = -1;
- goto out;
- }
+ return (strcmp(opt_key, GLUSTERD_SHARED_STORAGE_KEY) == 0 ||
+ strcmp(opt_key, GLUSTERD_QUORUM_RATIO_KEY) == 0 ||
+ strcmp(opt_key, GLUSTERD_GLOBAL_OP_VERSION_KEY) == 0 ||
+ strcmp(opt_key, GLUSTERD_BRICK_MULTIPLEX_KEY) == 0 ||
+ strcmp(opt_key, GLUSTERD_LOCALTIME_LOGGING_KEY) == 0 ||
+ strcmp(opt_key, GLUSTERD_DAEMON_LOG_LEVEL_KEY) == 0 ||
+ strcmp(opt_key, GLUSTERD_MAX_OP_VERSION_KEY) == 0);
- if (args)
- peerctx->args = *args;
+out:
+ return _gf_false;
+}
- ret = glusterd_peerinfo_new (friend, state, uuid, hoststr);
- if (ret)
+int32_t
+glusterd_get_volume_opts(rpcsvc_request_t *req, dict_t *dict)
+{
+ int32_t ret = -1;
+ int32_t count = 1;
+ int exists = 0;
+ char *key = NULL;
+ char *orig_key = NULL;
+ char *key_fixed = NULL;
+ char *volname = NULL;
+ char *value = NULL;
+ char err_str[2048] = {
+ 0,
+ };
+ char dict_key[50] = {
+ 0,
+ };
+ int keylen;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ char op_version_buff[10] = {
+ 0,
+ };
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GF_ASSERT(req);
+ GF_ASSERT(dict);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to get volume "
+ "name while handling get volume option command");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLNAME_NOTFOUND_IN_DICT,
+ "%s", err_str);
+ goto out;
+ }
+
+ if (strcasecmp(volname, "all") == 0) {
+ ret = glusterd_get_global_options_for_all_vols(req, dict,
+ &rsp.op_errstr);
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "key", SLEN("key"), &key);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to get key "
+ "while handling get volume option for %s",
+ volname);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ err_str);
+ goto out;
+ }
+ gf_msg_debug(this->name, 0,
+ "Received get volume opt request for "
+ "volume %s",
+ volname);
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str), FMTSTR_CHECK_VOL_EXISTS, volname);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+ if (strcmp(key, "all")) {
+ if (fnmatch(GD_HOOKS_SPECIFIC_KEY, key, FNM_NOESCAPE) == 0) {
+ keylen = sprintf(dict_key, "key%d", count);
+ ret = dict_set_strn(dict, dict_key, keylen, key);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to "
+ "set %s in dictionary",
+ key);
goto out;
-
- peerctx->peerinfo = *friend;
-
- ret = glusterd_transport_inet_keepalive_options_build (&options,
- hoststr, port);
- if (ret)
+ }
+ ret = dict_get_str(volinfo->dict, key, &value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to "
+ "get %s in dictionary",
+ key);
+ goto out;
+ }
+ keylen = sprintf(dict_key, "value%d", count);
+ ret = dict_set_strn(dict, dict_key, keylen, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to "
+ "set %s in dictionary",
+ key);
goto out;
+ }
+ } else {
+ exists = glusterd_check_option_exists(key, &key_fixed);
+ if (!exists) {
+ snprintf(err_str, sizeof(err_str),
+ "Option "
+ "with name: %s does not exist",
+ key);
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_UNKNOWN_KEY,
+ "%s", err_str);
+ if (key_fixed)
+ snprintf(err_str + ret, sizeof(err_str) - ret,
+ "Did you mean %s?", key_fixed);
+ ret = -1;
+ goto out;
+ }
+ if (key_fixed) {
+ orig_key = key;
+ key = key_fixed;
+ }
+
+ if (gd_is_global_option(key)) {
+ char warn_str[] =
+ "Warning: support to get \
+ global option value using volume get \
+ <volname>` will be deprecated from \
+ next release. Consider using `volume \
+ get all` instead for global options";
+
+ ret = dict_set_strn(dict, "warning", SLEN("warning"), warn_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set warning "
+ "message in dictionary");
+ goto out;
+ }
+ }
- if (!restore) {
- ret = glusterd_store_peerinfo (*friend);
+ if (strcmp(key, GLUSTERD_MAX_OP_VERSION_KEY) == 0) {
+ ret = glusterd_get_global_max_op_version(req, dict, 1);
+ if (ret)
+ goto out;
+ } else if (strcmp(key, GLUSTERD_GLOBAL_OP_VERSION_KEY) == 0) {
+ keylen = sprintf(dict_key, "key%d", count);
+ ret = dict_set_strn(dict, dict_key, keylen, key);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed"
+ "to set %s in dictionary",
+ key);
+ goto out;
+ }
+ keylen = sprintf(dict_key, "value%d", count);
+ sprintf(op_version_buff, "%d", priv->op_version);
+ ret = dict_set_strn(dict, dict_key, keylen, op_version_buff);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed"
+ " to set value for key %s in "
+ "dictionary",
+ key);
+ goto out;
+ }
+ } else if (strcmp(key, "config.memory-accounting") == 0) {
+ keylen = sprintf(dict_key, "key%d", count);
+ ret = dict_set_strn(dict, dict_key, keylen, key);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed"
+ " to set %s in dictionary",
+ key);
+ goto out;
+ }
+ keylen = sprintf(dict_key, "value%d", count);
+
+ if (volinfo->memory_accounting)
+ ret = dict_set_nstrn(dict, dict_key, keylen, "Enabled",
+ SLEN("Enabled"));
+ else
+ ret = dict_set_nstrn(dict, dict_key, keylen, "Disabled",
+ SLEN("Disabled"));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed"
+ " to set value for key %s in "
+ "dictionary",
+ key);
+ goto out;
+ }
+ } else if (strcmp(key, "config.transport") == 0) {
+ keylen = sprintf(dict_key, "key%d", count);
+ ret = dict_set_strn(dict, dict_key, keylen, key);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to store "
- "peerinfo");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s in "
+ "dictionary",
+ key);
+ goto out;
+ }
+ keylen = sprintf(dict_key, "value%d", count);
+
+ if (volinfo->transport_type == GF_TRANSPORT_RDMA)
+ ret = dict_set_nstrn(dict, dict_key, keylen, "rdma",
+ SLEN("rdma"));
+ else if (volinfo->transport_type == GF_TRANSPORT_TCP)
+ ret = dict_set_nstrn(dict, dict_key, keylen, "tcp",
+ SLEN("tcp"));
+ else if (volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA)
+ ret = dict_set_nstrn(dict, dict_key, keylen, "tcp,rdma",
+ SLEN("tcp,rdma"));
+ else
+ ret = dict_set_nstrn(dict, dict_key, keylen, "none",
+ SLEN("none"));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set value for key "
+ "%s in dictionary",
+ key);
+ goto out;
+ }
+ } else {
+ keylen = sprintf(dict_key, "key%d", count);
+ ret = dict_set_strn(dict, dict_key, keylen, key);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s in "
+ "dictionary",
+ key);
+ goto out;
+ }
+ keylen = sprintf(dict_key, "value%d", count);
+ ret = dict_get_str(priv->opts, key, &value);
+ if (!ret) {
+ ret = dict_set_strn(dict, dict_key, keylen, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s in "
+ " dictionary",
+ key);
goto out;
+ }
+ } else {
+ ret = glusterd_get_default_val_for_volopt(
+ dict, _gf_false, key, orig_key, volinfo,
+ &rsp.op_errstr);
+ if (ret && !rsp.op_errstr) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to fetch the "
+ "value of %s, check "
+ "log file for more"
+ " details",
+ key);
+ }
}
+ }
}
- list_add_tail (&(*friend)->uuid_list, &conf->peers);
- ret = glusterd_rpc_create (&(*friend)->rpc, options,
- glusterd_peer_rpc_notify,
- peerctx);
+ /* Request is for a single option, explicitly set count to 1
+ * in the dictionary.
+ */
+ ret = dict_set_int32n(dict, "count", SLEN("count"), 1);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "failed to create rpc for"
- " peer %s", (char*)hoststr);
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to set count "
+ "value in the dictionary");
+ goto out;
}
- handover = _gf_true;
+ } else {
+ /* Handle the "all" volume option request */
+ ret = glusterd_get_default_val_for_volopt(dict, _gf_true, NULL, NULL,
+ volinfo, &rsp.op_errstr);
+ if (ret && !rsp.op_errstr) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to fetch the value of all volume "
+ "options, check log file for more details");
+ }
+ }
out:
- if (ret && !handover) {
- (void) glusterd_friend_cleanup (*friend);
- *friend = NULL;
- }
+ if (ret) {
+ if (!rsp.op_errstr)
+ rsp.op_errstr = err_str;
+ rsp.op_ret = ret;
+ } else {
+ rsp.op_errstr = "";
+ rsp.op_ret = 0;
+ }
- gf_log (this->name, GF_LOG_INFO, "connect returned %d", ret);
- return ret;
+ ret = dict_allocate_and_serialize(dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp);
+ GF_FREE(rsp.dict.dict_val);
+ GF_FREE(key_fixed);
+ return ret;
}
int
-glusterd_probe_begin (rpcsvc_request_t *req, const char *hoststr, int port)
+__glusterd_handle_get_vol_opt(rpcsvc_request_t *req)
{
- int ret = -1;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_peerctx_args_t args = {0};
- glusterd_friend_sm_event_t *event = NULL;
-
- GF_ASSERT (hoststr);
-
- ret = glusterd_friend_find (NULL, (char *)hoststr, &peerinfo);
-
- if (ret) {
- gf_log ("glusterd", GF_LOG_INFO, "Unable to find peerinfo"
- " for host: %s (%d)", hoststr, port);
- args.mode = GD_MODE_ON;
- args.req = req;
- ret = glusterd_friend_add ((char *)hoststr, port,
- GD_FRIEND_STATE_DEFAULT,
- NULL, &peerinfo, 0, &args);
- if ((!ret) && (!peerinfo->connected)) {
- ret = GLUSTERD_CONNECTION_AWAITED;
- }
-
- } else if (peerinfo->connected &&
- (GD_FRIEND_STATE_BEFRIENDED == peerinfo->state.state)) {
- ret = glusterd_friend_hostname_update (peerinfo, (char*)hoststr,
- _gf_false);
- if (ret)
- goto out;
- //this is just to rename so inject local acc for cluster update
- ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_LOCAL_ACC,
- &event);
- if (!ret) {
- event->peerinfo = peerinfo;
- ret = glusterd_friend_sm_inject_event (event);
- glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_SUCCESS,
- NULL, (char*)hoststr,
- port);
- }
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+ char err_str[64] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to decode "
+ "request received from cli");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s",
+ err_str);
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(err_str, sizeof(err_str),
+ "Unable to decode "
+ "the command");
+ goto out;
} else {
- glusterd_xfer_cli_probe_resp (req, 0, GF_PROBE_FRIEND, NULL,
- (char*)hoststr, port);
+ dict->extra_stdfree = cli_req.dict.dict_val;
}
+ }
+ ret = glusterd_get_volume_opts(req, dict);
out:
- gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
- return ret;
+ if (dict)
+ dict_unref(dict);
+
+ return ret;
}
int
-glusterd_deprobe_begin (rpcsvc_request_t *req, const char *hoststr, int port,
- uuid_t uuid)
+glusterd_handle_get_vol_opt(rpcsvc_request_t *req)
{
- int ret = -1;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_friend_sm_event_t *event = NULL;
- glusterd_probe_ctx_t *ctx = NULL;
-
- GF_ASSERT (hoststr);
- GF_ASSERT (req);
-
- ret = glusterd_friend_find (uuid, (char *)hoststr, &peerinfo);
-
- if (ret) {
- gf_log ("glusterd", GF_LOG_INFO, "Unable to find peerinfo"
- " for host: %s %d", hoststr, port);
- goto out;
- }
-
- if (!peerinfo->rpc) {
- //handle this case
- goto out;
- }
-
- ret = glusterd_friend_sm_new_event
- (GD_FRIEND_EVENT_INIT_REMOVE_FRIEND, &event);
-
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "Unable to get new event");
- return ret;
- }
-
- ctx = GF_CALLOC (1, sizeof(*ctx), gf_gld_mt_probe_ctx_t);
-
- if (!ctx) {
- goto out;
- }
+ return glusterd_big_locked_handler(req, __glusterd_handle_get_vol_opt);
+}
- ctx->hostname = gf_strdup (hoststr);
- ctx->port = port;
- ctx->req = req;
+extern struct rpc_clnt_program gd_brick_prog;
- event->ctx = ctx;
+static int
+glusterd_print_global_options(dict_t *opts, char *key, data_t *val, void *data)
+{
+ FILE *fp = NULL;
- event->peerinfo = peerinfo;
+ GF_VALIDATE_OR_GOTO(THIS->name, key, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, val, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, data, out);
- ret = glusterd_friend_sm_inject_event (event);
-
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject event %d, "
- "ret = %d", event->event, ret);
- goto out;
- }
+ if (strcmp(key, GLUSTERD_GLOBAL_OPT_VERSION) == 0)
+ goto out;
+ fp = (FILE *)data;
+ fprintf(fp, "%s: %s\n", key, val->data);
out:
- return ret;
+ return 0;
}
-
-int
-glusterd_xfer_friend_remove_resp (rpcsvc_request_t *req, char *hostname, int port)
+static int
+glusterd_print_volume_options(dict_t *opts, char *key, data_t *val, void *data)
{
- gd1_mgmt_friend_rsp rsp = {{0}, };
- int32_t ret = -1;
- xlator_t *this = NULL;
- glusterd_conf_t *conf = NULL;
-
- GF_ASSERT (hostname);
-
- rsp.op_ret = 0;
- this = THIS;
- GF_ASSERT (this);
+ FILE *fp = NULL;
- conf = this->private;
-
- uuid_copy (rsp.uuid, conf->uuid);
- rsp.hostname = hostname;
- rsp.port = port;
- ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_friend_rsp);
+ GF_VALIDATE_OR_GOTO(THIS->name, key, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, val, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, data, out);
- gf_log ("glusterd", GF_LOG_INFO,
- "Responded to %s (%d), ret: %d", hostname, port, ret);
- return ret;
+ fp = (FILE *)data;
+ fprintf(fp, "Volume%d.options.%s: %s\n", volcount, key, val->data);
+out:
+ return 0;
}
-
-int
-glusterd_xfer_friend_add_resp (rpcsvc_request_t *req, char *hostname, int port,
- int32_t op_ret, int32_t op_errno)
+static int
+glusterd_print_gsync_status(FILE *fp, dict_t *gsync_dict)
{
- gd1_mgmt_friend_rsp rsp = {{0}, };
- int32_t ret = -1;
- xlator_t *this = NULL;
- glusterd_conf_t *conf = NULL;
+ int ret = -1;
+ int gsync_count = 0;
+ int i = 0;
+ gf_gsync_status_t *status_vals = NULL;
+ char status_val_name[PATH_MAX] = {
+ 0,
+ };
- GF_ASSERT (hostname);
+ GF_VALIDATE_OR_GOTO(THIS->name, fp, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, gsync_dict, out);
- this = THIS;
- GF_ASSERT (this);
+ ret = dict_get_int32n(gsync_dict, "gsync-count", SLEN("gsync-count"),
+ &gsync_count);
- conf = this->private;
+ fprintf(fp, "Volume%d.gsync_count: %d\n", volcount, gsync_count);
- uuid_copy (rsp.uuid, conf->uuid);
- rsp.op_ret = op_ret;
- rsp.op_errno = op_errno;
- rsp.hostname = gf_strdup (hostname);
- rsp.port = port;
+ if (gsync_count == 0) {
+ ret = 0;
+ goto out;
+ }
- ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gd1_mgmt_friend_rsp);
+ for (i = 0; i < gsync_count; i++) {
+ snprintf(status_val_name, sizeof(status_val_name), "status_value%d", i);
- gf_log ("glusterd", GF_LOG_INFO,
- "Responded to %s (%d), ret: %d", hostname, port, ret);
- if (rsp.hostname)
- GF_FREE (rsp.hostname);
- return ret;
+ ret = dict_get_bin(gsync_dict, status_val_name,
+ (void **)&(status_vals));
+ if (ret)
+ goto out;
+
+ fprintf(fp, "Volume%d.pair%d.session_slave: %s\n", volcount, i + 1,
+ get_struct_variable(21, status_vals));
+ fprintf(fp, "Volume%d.pair%d.master_node: %s\n", volcount, i + 1,
+ get_struct_variable(0, status_vals));
+ fprintf(fp, "Volume%d.pair%d.master_volume: %s\n", volcount, i + 1,
+ get_struct_variable(1, status_vals));
+ fprintf(fp, "Volume%d.pair%d.master_brick: %s\n", volcount, i + 1,
+ get_struct_variable(2, status_vals));
+ fprintf(fp, "Volume%d.pair%d.slave_user: %s\n", volcount, i + 1,
+ get_struct_variable(3, status_vals));
+ fprintf(fp, "Volume%d.pair%d.slave: %s\n", volcount, i + 1,
+ get_struct_variable(4, status_vals));
+ fprintf(fp, "Volume%d.pair%d.slave_node: %s\n", volcount, i + 1,
+ get_struct_variable(5, status_vals));
+ fprintf(fp, "Volume%d.pair%d.status: %s\n", volcount, i + 1,
+ get_struct_variable(6, status_vals));
+ fprintf(fp, "Volume%d.pair%d.crawl_status: %s\n", volcount, i + 1,
+ get_struct_variable(7, status_vals));
+ fprintf(fp, "Volume%d.pair%d.last_synced: %s\n", volcount, i + 1,
+ get_struct_variable(8, status_vals));
+ fprintf(fp, "Volume%d.pair%d.entry: %s\n", volcount, i + 1,
+ get_struct_variable(9, status_vals));
+ fprintf(fp, "Volume%d.pair%d.data: %s\n", volcount, i + 1,
+ get_struct_variable(10, status_vals));
+ fprintf(fp, "Volume%d.pair%d.meta: %s\n", volcount, i + 1,
+ get_struct_variable(11, status_vals));
+ fprintf(fp, "Volume%d.pair%d.failures: %s\n", volcount, i + 1,
+ get_struct_variable(12, status_vals));
+ fprintf(fp, "Volume%d.pair%d.checkpoint_time: %s\n", volcount, i + 1,
+ get_struct_variable(13, status_vals));
+ fprintf(fp, "Volume%d.pair%d.checkpoint_completed: %s\n", volcount,
+ i + 1, get_struct_variable(14, status_vals));
+ fprintf(fp, "Volume%d.pair%d.checkpoint_completion_time: %s\n",
+ volcount, i + 1, get_struct_variable(15, status_vals));
+ }
+out:
+ return ret;
}
-int
-glusterd_xfer_cli_probe_resp (rpcsvc_request_t *req, int32_t op_ret,
- int32_t op_errno, char *op_errstr, char *hostname,
- int port)
+static int
+glusterd_print_gsync_status_by_vol(FILE *fp, glusterd_volinfo_t *volinfo)
{
- gf1_cli_probe_rsp rsp = {0, };
- int32_t ret = -1;
-
- GF_ASSERT (req);
-
- rsp.op_ret = op_ret;
- rsp.op_errno = op_errno;
- rsp.op_errstr = op_errstr ? op_errstr : "";
- rsp.hostname = hostname;
- rsp.port = port;
-
- ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gf1_cli_probe_rsp);
+ int ret = -1;
+ dict_t *gsync_rsp_dict = NULL;
+ char my_hostname[256] = {
+ 0,
+ };
+
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, fp, out);
+
+ gsync_rsp_dict = dict_new();
+ if (!gsync_rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ ret = gethostname(my_hostname, sizeof(my_hostname));
+ if (ret) {
+ /* stick to N/A */
+ (void)strcpy(my_hostname, "N/A");
+ }
+
+ ret = glusterd_get_gsync_status_mst(volinfo, gsync_rsp_dict, my_hostname);
+ /* Ignoring ret as above function always returns ret = 0 */
+
+ ret = glusterd_print_gsync_status(fp, gsync_rsp_dict);
+out:
+ if (gsync_rsp_dict)
+ dict_unref(gsync_rsp_dict);
+ return ret;
+}
- gf_log ("glusterd", GF_LOG_INFO, "Responded to CLI, ret: %d",ret);
+static int
+glusterd_print_snapinfo_by_vol(FILE *fp, glusterd_volinfo_t *volinfo,
+ int volcount)
+{
+ int ret = -1;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp_vol = NULL;
+ glusterd_snap_t *snapinfo = NULL;
+ int snapcount = 0;
+ char timestr[GF_TIMESTR_SIZE] = {
+ 0,
+ };
+ char snap_status_str[STATUS_STRLEN] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, fp, out);
+
+ cds_list_for_each_entry_safe(snap_vol, tmp_vol, &volinfo->snap_volumes,
+ snapvol_list)
+ {
+ snapcount++;
+ snapinfo = snap_vol->snapshot;
+
+ ret = glusterd_get_snap_status_str(snapinfo, snap_status_str);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_STATE_STR_GET_FAILED,
+ "Failed to get status for snapshot: %s", snapinfo->snapname);
- return ret;
+ goto out;
+ }
+ gf_time_fmt(timestr, sizeof timestr, snapinfo->time_stamp,
+ gf_timefmt_FT);
+
+ fprintf(fp, "Volume%d.snapshot%d.name: %s\n", volcount, snapcount,
+ snapinfo->snapname);
+ fprintf(fp, "Volume%d.snapshot%d.id: %s\n", volcount, snapcount,
+ uuid_utoa(snapinfo->snap_id));
+ fprintf(fp, "Volume%d.snapshot%d.time: %s\n", volcount, snapcount,
+ timestr);
+
+ if (snapinfo->description)
+ fprintf(fp, "Volume%d.snapshot%d.description: %s\n", volcount,
+ snapcount, snapinfo->description);
+ fprintf(fp, "Volume%d.snapshot%d.status: %s\n", volcount, snapcount,
+ snap_status_str);
+ }
+
+ ret = 0;
+out:
+ return ret;
}
-int
-glusterd_xfer_cli_deprobe_resp (rpcsvc_request_t *req, int32_t op_ret,
- int32_t op_errno, char *op_errstr,
- char *hostname)
+static int
+glusterd_print_client_details(FILE *fp, dict_t *dict,
+ glusterd_volinfo_t *volinfo, int volcount,
+ glusterd_brickinfo_t *brickinfo, int brickcount)
{
- gf1_cli_deprobe_rsp rsp = {0, };
- int32_t ret = -1;
-
- GF_ASSERT (req);
+ int ret = -1;
+ xlator_t *this = NULL;
+ int brick_index = -1;
+ int client_count = 0;
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+ char *clientname = NULL;
+ uint64_t bytesread = 0;
+ uint64_t byteswrite = 0;
+ uint32_t opversion = 0;
+
+ glusterd_pending_node_t *pending_node = NULL;
+ rpc_clnt_t *rpc = NULL;
+ struct syncargs args = {
+ 0,
+ };
+ gd1_mgmt_brick_op_req *brick_req = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID) ||
+ !glusterd_is_brick_started(brickinfo)) {
+ ret = 0;
+ goto out;
+ }
- rsp.op_ret = op_ret;
- rsp.op_errno = op_errno;
- rsp.op_errstr = op_errstr ? op_errstr : "";
- rsp.hostname = hostname;
+ brick_index++;
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Unable to allocate memory");
+ goto out;
+ }
- ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gf1_cli_deprobe_rsp);
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ pending_node->index = brick_index;
- gf_log ("glusterd", GF_LOG_INFO, "Responded to CLI, ret: %d",ret);
+ rpc = glusterd_pending_node_get_rpc(pending_node);
+ if (!rpc) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
+ "Failed to retrieve rpc object");
+ goto out;
+ }
- return ret;
-}
+ brick_req = GF_CALLOC(1, sizeof(*brick_req), gf_gld_mt_mop_brick_req_t);
+ if (!brick_req) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Unable to allocate memory");
+ goto out;
+ }
+
+ brick_req->op = GLUSTERD_BRICK_STATUS;
+ brick_req->name = "";
+ brick_req->dict.dict_val = NULL;
+ brick_req->dict.dict_len = 0;
+
+ ret = dict_set_strn(dict, "brick-name", SLEN("brick-name"),
+ brickinfo->path);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=brick-name", NULL);
+ goto out;
+ }
+
+ ret = dict_set_int32n(dict, "cmd", SLEN("cmd"), GF_CLI_STATUS_CLIENTS);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cmd", NULL);
+ goto out;
+ }
+
+ ret = dict_set_strn(dict, "volname", SLEN("volname"), volinfo->volname);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=volname", NULL);
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val,
+ &brick_req->input.input_len);
+ if (ret)
+ goto out;
+
+ GD_SYNCOP(rpc, (&args), NULL, gd_syncop_brick_op_cbk, brick_req,
+ &gd_brick_prog, brick_req->op, xdr_gd1_mgmt_brick_op_req);
+
+ if (args.op_ret)
+ goto out;
+
+ ret = dict_get_int32n(args.dict, "clientcount", SLEN("clientcount"),
+ &client_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Couldn't get client count");
+ goto out;
+ }
+
+ fprintf(fp, "Volume%d.Brick%d.client_count: %d\n", volcount, brickcount,
+ client_count);
+
+ if (client_count == 0) {
+ ret = 0;
+ goto out;
+ }
-int32_t
-glusterd_list_friends (rpcsvc_request_t *req, dict_t *dict, int32_t flags)
-{
- int32_t ret = -1;
- glusterd_conf_t *priv = NULL;
- glusterd_peerinfo_t *entry = NULL;
- int32_t count = 0;
- dict_t *friends = NULL;
- gf1_cli_peer_list_rsp rsp = {0,};
-
- priv = THIS->private;
- GF_ASSERT (priv);
-
- if (!list_empty (&priv->peers)) {
- friends = dict_new ();
- if (!friends) {
- gf_log ("", GF_LOG_WARNING, "Out of Memory");
- goto out;
- }
- } else {
- ret = 0;
- goto out;
+ int i;
+ for (i = 1; i <= client_count; i++) {
+ keylen = snprintf(key, sizeof(key), "client%d.hostname", i - 1);
+ ret = dict_get_strn(args.dict, key, keylen, &clientname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get client hostname");
+ goto out;
}
- if (flags == GF_CLI_LIST_ALL) {
- list_for_each_entry (entry, &priv->peers, uuid_list) {
- count++;
- ret = glusterd_add_peer_detail_to_dict (entry,
- friends, count);
- if (ret)
- goto out;
-
- }
-
- ret = dict_set_int32 (friends, "count", count);
+ snprintf(key, sizeof(key), "Client%d.hostname", i);
+ fprintf(fp, "Volume%d.Brick%d.%s: %s\n", volcount, brickcount, key,
+ clientname);
- if (ret)
- goto out;
+ snprintf(key, sizeof(key), "client%d.bytesread", i - 1);
+ ret = dict_get_uint64(args.dict, key, &bytesread);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get bytesread from client");
+ goto out;
}
- ret = dict_allocate_and_serialize (friends, &rsp.friends.friends_val,
- (size_t *)&rsp.friends.friends_len);
+ snprintf(key, sizeof(key), "Client%d.bytesread", i);
+ fprintf(fp, "Volume%d.Brick%d.%s: %" PRIu64 "\n", volcount, brickcount,
+ key, bytesread);
- if (ret)
- goto out;
-
- ret = 0;
-out:
+ snprintf(key, sizeof(key), "client%d.byteswrite", i - 1);
+ ret = dict_get_uint64(args.dict, key, &byteswrite);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get byteswrite from client");
+ goto out;
+ }
- if (friends)
- dict_unref (friends);
+ snprintf(key, sizeof(key), "Client%d.byteswrite", i);
+ fprintf(fp, "Volume%d.Brick%d.%s: %" PRIu64 "\n", volcount, brickcount,
+ key, byteswrite);
- rsp.op_ret = ret;
+ snprintf(key, sizeof(key), "client%d.opversion", i - 1);
+ ret = dict_get_uint32(args.dict, key, &opversion);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get client opversion");
+ goto out;
+ }
- ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gf1_cli_peer_list_rsp);
- if (rsp.friends.friends_val)
- GF_FREE (rsp.friends.friends_val);
+ snprintf(key, sizeof(key), "Client%d.opversion", i);
+ fprintf(fp, "Volume%d.Brick%d.%s: %" PRIu32 "\n", volcount, brickcount,
+ key, opversion);
+ }
- return ret;
+out:
+ if (pending_node)
+ GF_FREE(pending_node);
+
+ if (brick_req) {
+ if (brick_req->input.input_val)
+ GF_FREE(brick_req->input.input_val);
+ GF_FREE(brick_req);
+ }
+ if (args.dict)
+ dict_unref(args.dict);
+ if (args.errstr)
+ GF_FREE(args.errstr);
+
+ return ret;
}
-int32_t
-glusterd_get_volumes (rpcsvc_request_t *req, dict_t *dict, int32_t flags)
-{
- int32_t ret = -1;
- glusterd_conf_t *priv = NULL;
- glusterd_volinfo_t *entry = NULL;
- int32_t count = 0;
- dict_t *volumes = NULL;
- gf_cli_rsp rsp = {0,};
- char *volname = NULL;
-
- priv = THIS->private;
- GF_ASSERT (priv);
-
- volumes = dict_new ();
- if (!volumes) {
- gf_log ("", GF_LOG_WARNING, "Out of Memory");
- goto out;
+static int
+glusterd_get_state(rpcsvc_request_t *req, dict_t *dict)
+{
+ int32_t ret = -1;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ FILE *fp = NULL;
+ DIR *dp = NULL;
+ char err_str[2048] = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peer_hostname_t *peer_hostname_info = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+ dict_t *vol_all_opts = NULL;
+ struct statvfs brickstat = {0};
+ char *odir = NULL;
+ char *filename = NULL;
+ char *ofilepath = NULL;
+ char *tmp_str = NULL;
+ int count = 0;
+ int count_bkp = 0;
+ int odirlen = 0;
+ time_t now = 0;
+ char timestamp[16] = {
+ 0,
+ };
+ uint32_t get_state_cmd = 0;
+ uint64_t memtotal = 0;
+ uint64_t memfree = 0;
+ char id_str[64] = {
+ 0,
+ };
+
+ char *vol_type_str = NULL;
+
+ char transport_type_str[STATUS_STRLEN] = {
+ 0,
+ };
+ char quorum_status_str[STATUS_STRLEN] = {
+ 0,
+ };
+ char rebal_status_str[STATUS_STRLEN] = {
+ 0,
+ };
+ char vol_status_str[STATUS_STRLEN] = {
+ 0,
+ };
+ char brick_status_str[STATUS_STRLEN] = {
+ 0,
+ };
+ this = THIS;
+ GF_VALIDATE_OR_GOTO(THIS->name, this, out);
+
+ priv = THIS->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+ ret = dict_get_strn(dict, "odir", SLEN("odir"), &tmp_str);
+ if (ret) {
+ odirlen = gf_asprintf(&odir, "%s", "/var/run/gluster/");
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED,
+ "Default output directory: %s", odir);
+ } else {
+ odirlen = gf_asprintf(&odir, "%s", tmp_str);
+ }
+
+ dp = sys_opendir(odir);
+ if (dp) {
+ sys_closedir(dp);
+ } else {
+ if (errno == ENOENT) {
+ snprintf(err_str, sizeof(err_str),
+ "Output directory %s does not exist.", odir);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ err_str);
+ } else if (errno == ENOTDIR) {
+ snprintf(err_str, sizeof(err_str),
+ "Output directory "
+ "does not exist. %s points to a file.",
+ odir);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ err_str);
}
- if (list_empty (&priv->volumes)) {
- ret = 0;
- goto respond;
+ GF_FREE(odir);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "filename", SLEN("filename"), &tmp_str);
+ if (ret) {
+ now = gf_time();
+ strftime(timestamp, sizeof(timestamp), "%Y%m%d_%H%M%S",
+ localtime(&now));
+ gf_asprintf(&filename, "%s_%s", "glusterd_state", timestamp);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED,
+ "Default filename: %s", filename);
+ } else {
+ gf_asprintf(&filename, "%s", tmp_str);
+ }
+
+ ret = gf_asprintf(&ofilepath, "%s%s%s", odir,
+ ((odir[odirlen - 1] != '/') ? "/" : ""), filename);
+
+ if (ret < 0) {
+ GF_FREE(odir);
+ GF_FREE(filename);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to get the output path");
+ ret = -1;
+ goto out;
+ }
+ GF_FREE(odir);
+ GF_FREE(filename);
+
+ ret = dict_set_dynstrn(dict, "ofilepath", SLEN("ofilepath"), ofilepath);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set output path");
+ goto out;
+ }
+
+ fp = fopen(ofilepath, "w");
+ if (!fp) {
+ snprintf(err_str, sizeof(err_str), "Failed to open file at %s",
+ ofilepath);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ err_str);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_uint32(dict, "getstate-cmd", &get_state_cmd);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "get-state command type not set");
+ ret = 0;
+ }
+
+ if (get_state_cmd == GF_CLI_GET_STATE_VOLOPTS) {
+ fprintf(fp, "[Volume Options]\n");
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+ fprintf(fp, "Volume%d.name: %s\n", ++count, volinfo->volname);
+
+ volcount = count;
+ vol_all_opts = dict_new();
+
+ ret = glusterd_get_default_val_for_volopt(
+ vol_all_opts, _gf_true, NULL, NULL, volinfo, &rsp.op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OPTS_IMPORT_FAIL,
+ "Failed to "
+ "fetch the value of all volume options "
+ "for volume %s",
+ volinfo->volname);
+ if (vol_all_opts)
+ dict_unref(vol_all_opts);
+ continue;
+ }
+
+ dict_foreach(vol_all_opts, glusterd_print_volume_options, fp);
+
+ if (vol_all_opts)
+ dict_unref(vol_all_opts);
}
+ ret = 0;
+ goto out;
+ }
- if (flags == GF_CLI_GET_VOLUME_ALL) {
- list_for_each_entry (entry, &priv->volumes, vol_list) {
- ret = glusterd_add_volume_detail_to_dict (entry,
- volumes, count);
- if (ret)
- goto respond;
+ fprintf(fp, "[Global]\n");
- count++;
+ uuid_utoa_r(priv->uuid, id_str);
+ fprintf(fp, "MYUUID: %s\n", id_str);
- }
+ fprintf(fp, "op-version: %d\n", priv->op_version);
- } else if (flags == GF_CLI_GET_NEXT_VOLUME) {
- ret = dict_get_str (dict, "volname", &volname);
+ fprintf(fp, "\n[Global options]\n");
- if (ret) {
- if (priv->volumes.next) {
- entry = list_entry (priv->volumes.next,
- typeof (*entry),
- vol_list);
- }
- } else {
- ret = glusterd_volinfo_find (volname, &entry);
- if (ret)
- goto respond;
- entry = list_entry (entry->vol_list.next,
- typeof (*entry),
- vol_list);
- }
+ if (priv->opts)
+ dict_foreach(priv->opts, glusterd_print_global_options, fp);
- if (&entry->vol_list == &priv->volumes) {
- goto respond;
- } else {
- ret = glusterd_add_volume_detail_to_dict (entry,
- volumes, count);
- if (ret)
- goto respond;
+ fprintf(fp, "\n[Peers]\n");
+ RCU_READ_LOCK;
- count++;
- }
- } else if (flags == GF_CLI_GET_VOLUME) {
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto respond;
+ cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
+ {
+ fprintf(fp, "Peer%d.primary_hostname: %s\n", ++count,
+ peerinfo->hostname);
+ fprintf(fp, "Peer%d.uuid: %s\n", count, gd_peer_uuid_str(peerinfo));
+ fprintf(fp, "Peer%d.state: %s\n", count,
+ glusterd_friend_sm_state_name_get(peerinfo->state.state));
+ fprintf(fp, "Peer%d.connected: %s\n", count,
+ peerinfo->connected ? "Connected" : "Disconnected");
- ret = glusterd_volinfo_find (volname, &entry);
- if (ret)
- goto respond;
+ fprintf(fp, "Peer%d.othernames: ", count);
+ count_bkp = 0;
+ cds_list_for_each_entry(peer_hostname_info, &peerinfo->hostnames,
+ hostname_list)
+ {
+ if (strcmp(peerinfo->hostname, peer_hostname_info->hostname) == 0)
+ continue;
- ret = glusterd_add_volume_detail_to_dict (entry,
- volumes, count);
- if (ret)
- goto respond;
+ if (count_bkp > 0)
+ fprintf(fp, ",");
- count++;
+ fprintf(fp, "%s", peer_hostname_info->hostname);
+ count_bkp++;
}
+ count_bkp = 0;
+ fprintf(fp, "\n");
+ }
+ RCU_READ_UNLOCK;
-respond:
- ret = dict_set_int32 (volumes, "count", count);
- if (ret)
- goto out;
- ret = dict_allocate_and_serialize (volumes, &rsp.dict.dict_val,
- (size_t *)&rsp.dict.dict_len);
+ count = 0;
+ fprintf(fp, "\n[Volumes]\n");
- if (ret)
- goto out;
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+ ret = glusterd_volume_get_type_str(volinfo, &vol_type_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATE_STR_GET_FAILED,
+ "Failed to get type for volume: %s", volinfo->volname);
+ goto out;
+ }
- ret = 0;
-out:
- rsp.op_ret = ret;
+ ret = glusterd_volume_get_status_str(volinfo, vol_status_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATE_STR_GET_FAILED,
+ "Failed to get status for volume: %s", volinfo->volname);
+ goto out;
+ }
- rsp.op_errstr = "";
- ret = glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gf_cli_rsp);
+ ret = glusterd_volume_get_transport_type_str(volinfo,
+ transport_type_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATE_STR_GET_FAILED,
+ "Failed to get transport type for volume: %s",
+ volinfo->volname);
+ goto out;
+ }
- if (volumes)
- dict_unref (volumes);
+ ret = glusterd_volume_get_quorum_status_str(volinfo, quorum_status_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATE_STR_GET_FAILED,
+ "Failed to get quorum status for volume: %s",
+ volinfo->volname);
+ goto out;
+ }
- if (rsp.dict.dict_val)
- GF_FREE (rsp.dict.dict_val);
- return ret;
-}
+ ret = glusterd_volume_get_rebalance_status_str(volinfo,
+ rebal_status_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STATE_STR_GET_FAILED,
+ "Failed to get rebalance status for volume: %s",
+ volinfo->volname);
+ goto out;
+ }
-int
-glusterd_handle_status_volume (rpcsvc_request_t *req)
-{
- int32_t ret = -1;
- uint32_t cmd = 0;
- dict_t *dict = NULL;
- char *volname = 0;
- gf_cli_req cli_req = {{0,}};
- glusterd_op_t cli_op = GD_OP_STATUS_VOLUME;
+ fprintf(fp, "Volume%d.name: %s\n", ++count, volinfo->volname);
+
+ uuid_utoa_r(volinfo->volume_id, id_str);
+ fprintf(fp, "Volume%d.id: %s\n", count, id_str);
- GF_ASSERT (req);
+ fprintf(fp, "Volume%d.type: %s\n", count, vol_type_str);
+ fprintf(fp, "Volume%d.transport_type: %s\n", count, transport_type_str);
+ fprintf(fp, "Volume%d.status: %s\n", count, vol_status_str);
+ fprintf(fp, "Volume%d.profile_enabled: %d\n", count,
+ glusterd_is_profile_on(volinfo));
+ fprintf(fp, "Volume%d.brickcount: %d\n", count, volinfo->brick_count);
- if (!xdr_to_generic (req->msg[0], &cli_req,
- (xdrproc_t)xdr_gf_cli_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
+ count_bkp = count;
+ count = 0;
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ fprintf(fp, "Volume%d.Brick%d.path: %s:%s\n", count_bkp, ++count,
+ brickinfo->hostname, brickinfo->path);
+ fprintf(fp, "Volume%d.Brick%d.hostname: %s\n", count_bkp, count,
+ brickinfo->hostname);
+ /* Determine which one is the arbiter brick */
+ if (volinfo->arbiter_count == 1) {
+ if (count % volinfo->replica_count == 0) {
+ fprintf(fp,
+ "Volume%d.Brick%d."
+ "is_arbiter: 1\n",
+ count_bkp, count);
+ }
+ }
+ /* Add following information only for bricks
+ * local to current node */
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID))
+ continue;
+ fprintf(fp, "Volume%d.Brick%d.port: %d\n", count_bkp, count,
+ brickinfo->port);
+ fprintf(fp, "Volume%d.Brick%d.rdma_port: %d\n", count_bkp, count,
+ brickinfo->rdma_port);
+ fprintf(fp, "Volume%d.Brick%d.port_registered: %d\n", count_bkp,
+ count, brickinfo->port_registered);
+ glusterd_brick_get_status_str(brickinfo, brick_status_str);
+ fprintf(fp, "Volume%d.Brick%d.status: %s\n", count_bkp, count,
+ brick_status_str);
+
+ ret = sys_statvfs(brickinfo->path, &brickstat);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "statfs error: %s ", strerror(errno));
+ memfree = 0;
+ memtotal = 0;
+ } else {
+ memfree = brickstat.f_bfree * brickstat.f_bsize;
+ memtotal = brickstat.f_blocks * brickstat.f_bsize;
+ }
+
+ fprintf(fp, "Volume%d.Brick%d.spacefree: %" PRIu64 "Bytes\n",
+ count_bkp, count, memfree);
+ fprintf(fp, "Volume%d.Brick%d.spacetotal: %" PRIu64 "Bytes\n",
+ count_bkp, count, memtotal);
+
+ if (get_state_cmd != GF_CLI_GET_STATE_DETAIL)
+ continue;
+
+ ret = glusterd_print_client_details(fp, dict, volinfo, count_bkp,
+ brickinfo, count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CLIENTS_GET_STATE_FAILED,
+ "Failed to get client details");
goto out;
+ }
}
- if (cli_req.dict.dict_len > 0) {
- dict = dict_new();
- if (!dict)
- goto out;
- ret = dict_unserialize (cli_req.dict.dict_val,
- cli_req.dict.dict_len, &dict);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to "
- "unserialize buffer");
- goto out;
- }
+ count = count_bkp;
+ ret = glusterd_print_snapinfo_by_vol(fp, volinfo, count);
+ if (ret)
+ goto out;
+
+ fprintf(fp, "Volume%d.snap_count: %" PRIu64 "\n", count,
+ volinfo->snap_count);
+ fprintf(fp, "Volume%d.stripe_count: %d\n", count,
+ volinfo->stripe_count);
+ fprintf(fp, "Volume%d.replica_count: %d\n", count,
+ volinfo->replica_count);
+ fprintf(fp, "Volume%d.subvol_count: %d\n", count,
+ volinfo->subvol_count);
+ fprintf(fp, "Volume%d.arbiter_count: %d\n", count,
+ volinfo->arbiter_count);
+ fprintf(fp, "Volume%d.disperse_count: %d\n", count,
+ volinfo->disperse_count);
+ fprintf(fp, "Volume%d.redundancy_count: %d\n", count,
+ volinfo->redundancy_count);
+ fprintf(fp, "Volume%d.quorum_status: %s\n", count, quorum_status_str);
+
+ fprintf(fp, "Volume%d.snapd_svc.online_status: %s\n", count,
+ volinfo->snapd.svc.online ? "Online" : "Offline");
+ fprintf(fp, "Volume%d.snapd_svc.inited: %s\n", count,
+ volinfo->snapd.svc.inited ? "True" : "False");
+
+ uuid_utoa_r(volinfo->rebal.rebalance_id, id_str);
+ char *rebal_data = gf_uint64_2human_readable(
+ volinfo->rebal.rebalance_data);
+
+ fprintf(fp, "Volume%d.rebalance.id: %s\n", count, id_str);
+ fprintf(fp, "Volume%d.rebalance.status: %s\n", count, rebal_status_str);
+ fprintf(fp, "Volume%d.rebalance.failures: %" PRIu64 "\n", count,
+ volinfo->rebal.rebalance_failures);
+ fprintf(fp, "Volume%d.rebalance.skipped: %" PRIu64 "\n", count,
+ volinfo->rebal.skipped_files);
+ fprintf(fp, "Volume%d.rebalance.lookedup: %" PRIu64 "\n", count,
+ volinfo->rebal.lookedup_files);
+ fprintf(fp, "Volume%d.rebalance.files: %" PRIu64 "\n", count,
+ volinfo->rebal.rebalance_files);
+ fprintf(fp, "Volume%d.rebalance.data: %s\n", count, rebal_data);
+ fprintf(fp, "Volume%d.time_left: %" PRIu64 "\n", count,
+ volinfo->rebal.time_left);
+
+ GF_FREE(rebal_data);
+
+ fprintf(fp, "Volume%d.shd_svc.online_status: %s\n", count,
+ volinfo->shd.svc.online ? "Online" : "Offline");
+ fprintf(fp, "Volume%d.shd_svc.inited: %s\n", count,
+ volinfo->shd.svc.inited ? "True" : "False");
+
+ if (volinfo->rep_brick.src_brick && volinfo->rep_brick.dst_brick) {
+ fprintf(fp, "Volume%d.replace_brick.src: %s:%s\n", count,
+ volinfo->rep_brick.src_brick->hostname,
+ volinfo->rep_brick.src_brick->path);
+ fprintf(fp, "Volume%d.replace_brick.dest: %s:%s\n", count,
+ volinfo->rep_brick.dst_brick->hostname,
+ volinfo->rep_brick.dst_brick->path);
}
- ret = dict_get_uint32 (dict, "cmd", &cmd);
+ volcount = count;
+ ret = glusterd_print_gsync_status_by_vol(fp, volinfo);
if (ret)
- goto out;
+ goto out;
- if (!(cmd & GF_CLI_STATUS_ALL)) {
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to get volname");
- goto out;
- }
- gf_log (THIS->name, GF_LOG_INFO,
- "Received status volume req "
- "for volume %s", volname);
+ if (volinfo->dict)
+ dict_foreach(volinfo->dict, glusterd_print_volume_options, fp);
- }
+ fprintf(fp, "\n");
+ }
- ret = glusterd_op_begin (req, GD_OP_STATUS_VOLUME, dict);
+ count = 0;
+ fprintf(fp, "\n[Services]\n");
+#ifdef BUILD_GNFS
+ if (priv->nfs_svc.inited) {
+ fprintf(fp, "svc%d.name: %s\n", ++count, priv->nfs_svc.name);
+ fprintf(fp, "svc%d.online_status: %s\n\n", count,
+ priv->nfs_svc.online ? "Online" : "Offline");
+ }
+#endif
+ if (priv->bitd_svc.inited) {
+ fprintf(fp, "svc%d.name: %s\n", ++count, priv->bitd_svc.name);
+ fprintf(fp, "svc%d.online_status: %s\n\n", count,
+ priv->bitd_svc.online ? "Online" : "Offline");
+ }
+
+ if (priv->scrub_svc.inited) {
+ fprintf(fp, "svc%d.name: %s\n", ++count, priv->scrub_svc.name);
+ fprintf(fp, "svc%d.online_status: %s\n\n", count,
+ priv->scrub_svc.online ? "Online" : "Offline");
+ }
+
+ if (priv->quotad_svc.inited) {
+ fprintf(fp, "svc%d.name: %s\n", ++count, priv->quotad_svc.name);
+ fprintf(fp, "svc%d.online_status: %s\n\n", count,
+ priv->quotad_svc.online ? "Online" : "Offline");
+ }
+
+ fprintf(fp, "\n[Misc]\n");
+ if (priv->pmap) {
+ fprintf(fp, "Base port: %d\n", priv->pmap->base_port);
+ fprintf(fp, "Last allocated port: %d\n", priv->pmap->last_alloc);
+ }
out:
- if (ret && dict)
- dict_unref (dict);
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ if (fp)
+ fclose(fp);
- if (ret)
- ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
- NULL, "operation failed");
- if (cli_req.dict.dict_val)
- free (cli_req.dict.dict_val);
+ rsp.op_ret = ret;
+ if (rsp.op_errstr == NULL)
+ rsp.op_errstr = err_str;
- return ret;
+ ret = dict_allocate_and_serialize(dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ glusterd_to_cli(req, &rsp, NULL, 0, NULL, (xdrproc_t)xdr_gf_cli_rsp, dict);
+ GF_FREE(rsp.dict.dict_val);
+
+ return ret;
}
-int
-glusterd_handle_cli_clearlocks_volume (rpcsvc_request_t *req)
+static int
+__glusterd_handle_get_state(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- gf_cli_req cli_req = {{0,}};
- glusterd_op_t cli_op = GD_OP_CLEARLOCKS_VOLUME;
- char *volname = NULL;
- dict_t *dict = NULL;
-
- GF_ASSERT (req);
-
- ret = -1;
- if (!xdr_to_generic (req->msg[0], &cli_req,
- (xdrproc_t)xdr_gf_cli_req)) {
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- if (cli_req.dict.dict_len) {
- dict = dict_new ();
-
- ret = dict_unserialize (cli_req.dict.dict_val,
- cli_req.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to unserialize req-buffer to"
- " dictionary");
- goto out;
- }
-
+ int32_t ret = -1;
+ gf_cli_req cli_req = {
+ {
+ 0,
+ },
+ };
+ dict_t *dict = NULL;
+ char err_str[64] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO(THIS->name, this, out);
+ GF_VALIDATE_OR_GOTO(this->name, req, out);
+
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DAEMON_STATE_REQ_RCVD,
+ "Received request to get state for glusterd");
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to decode "
+ "request received from cli");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s",
+ err_str);
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(err_str, sizeof(err_str),
+ "Unable to decode"
+ " the command");
+ goto out;
} else {
- ret = -1;
- gf_log (THIS->name, GF_LOG_ERROR, "Empty cli request.");
- goto out;
- }
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to get volname");
- goto out;
+ dict->extra_stdfree = cli_req.dict.dict_val;
}
+ }
- gf_log (THIS->name, GF_LOG_INFO, "Received clear-locks volume req "
- "for volume %s", volname);
-
- ret = glusterd_op_begin (req, cli_op, dict);
-
- gf_cmd_log ("clear-locks", "on volume %s %s", volname,
- ((0 == ret) ? "SUCCEEDED" : "FAILED"));
+ ret = glusterd_get_state(req, dict);
out:
- if (ret && dict)
- dict_unref (dict);
+ if (dict && ret) {
+ /*
+ * When glusterd_to_cli (called from glusterd_get_state)
+ * succeeds, it frees the dict for us, so this would be a
+ * double free, but in other cases it's our responsibility.
+ */
+ dict_unref(dict);
+ }
+ return ret;
+}
- glusterd_friend_sm ();
- glusterd_op_sm ();
+int
+glusterd_handle_get_state(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __glusterd_handle_get_state);
+}
+static int
+get_brickinfo_from_brickid(char *brickid, glusterd_brickinfo_t **brickinfo)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volid_str = NULL;
+ char *brick = NULL;
+ char *brickid_dup = NULL;
+ uuid_t volid = {0};
+ int ret = -1;
+
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ brickid_dup = gf_strdup(brickid);
+ if (!brickid_dup) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "brick_id=%s", brickid, NULL);
+ goto out;
+ }
+
+ volid_str = brickid_dup;
+ brick = strchr(brickid_dup, ':');
+ if (!volid_str) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
+ goto out;
+ }
+
+ if (!brick) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
+ goto out;
+ }
+
+ *brick = '\0';
+ brick++;
+ gf_uuid_parse(volid_str, volid);
+ ret = glusterd_volinfo_find_by_volume_id(volid, &volinfo);
+ if (ret) {
+ /* Check if it a snapshot volume */
+ ret = glusterd_snap_volinfo_find_by_volume_id(volid, &volinfo);
if (ret)
- ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
- NULL, "operation failed");
- if (cli_req.dict.dict_val)
- free (cli_req.dict.dict_val);
+ goto out;
+ }
- return ret;
+ ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, brickinfo,
+ _gf_false);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
+ GF_FREE(brickid_dup);
+ return ret;
}
+static int gd_stale_rpc_disconnect_log;
+
int
-glusterd_brick_rpc_notify (struct rpc_clnt *rpc, void *mydata,
- rpc_clnt_event_t event,
- void *data)
+__glusterd_brick_rpc_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
{
- xlator_t *this = NULL;
- glusterd_conf_t *conf = NULL;
- int ret = 0;
- glusterd_brickinfo_t *brickinfo = NULL;
+ char *brickid = NULL;
+ int ret = 0;
+ glusterd_conf_t *conf = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ int32_t pid = -1;
+ glusterd_brickinfo_t *brickinfo_tmp = NULL;
+ glusterd_brick_proc_t *brick_proc = NULL;
+ char pidfile[PATH_MAX] = {0};
+ char *brickpath = NULL;
+ gf_boolean_t is_service_running = _gf_true;
+
+ brickid = mydata;
+ if (!brickid)
+ return 0;
- brickinfo = mydata;
- if (!brickinfo)
- return 0;
+ ret = get_brickinfo_from_brickid(brickid, &brickinfo);
+ if (ret)
+ return 0;
- this = THIS;
- GF_ASSERT (this);
- conf = this->private;
- GF_ASSERT (conf);
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
- switch (event) {
+ switch (event) {
case RPC_CLNT_CONNECT:
- gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT");
- glusterd_set_brick_status (brickinfo, GF_BRICK_STARTED);
- ret = default_notify (this, GF_EVENT_CHILD_UP, NULL);
-
- break;
-
- case RPC_CLNT_DISCONNECT:
- gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT");
- glusterd_set_brick_status (brickinfo, GF_BRICK_STOPPED);
- break;
+ ret = get_volinfo_from_brickid(brickid, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get volinfo from "
+ "brickid(%s)",
+ brickid);
+ goto out;
+ }
+ /* If a node on coming back up, already starts a brick
+ * before the handshake, and the notification comes after
+ * the handshake is done, then we need to check if this
+ * is a restored brick with a snapshot pending. If so, we
+ * need to stop the brick
+ */
+ if (brickinfo->snap_status == -1) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SNAPSHOT_PENDING,
+ "Snapshot is pending on %s:%s. "
+ "Hence not starting the brick",
+ brickinfo->hostname, brickinfo->path);
+ ret = glusterd_brick_stop(volinfo, brickinfo, _gf_false);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_STOP_FAIL,
+ "Unable to stop %s:%s", brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
- default:
- gf_log (this->name, GF_LOG_TRACE,
- "got some other RPC event %d", event);
break;
- }
-
- return ret;
-}
-
-int
-glusterd_nodesvc_rpc_notify (struct rpc_clnt *rpc, void *mydata,
- rpc_clnt_event_t event,
- void *data)
-{
- xlator_t *this = NULL;
- glusterd_conf_t *conf = NULL;
- char *server = NULL;
- int ret = 0;
+ }
+ gf_msg_debug(this->name, 0, "Connected to %s:%s",
+ brickinfo->hostname, brickinfo->path);
- this = THIS;
- GF_ASSERT (this);
- conf = this->private;
- GF_ASSERT (conf);
+ glusterd_set_brick_status(brickinfo, GF_BRICK_STARTED);
- server = mydata;
- if (!server)
- return 0;
+ gf_event(EVENT_BRICK_CONNECTED, "peer=%s;volume=%s;brick=%s",
+ brickinfo->hostname, volinfo->volname, brickinfo->path);
- switch (event) {
- case RPC_CLNT_CONNECT:
- gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT");
- (void) glusterd_nodesvc_set_running (server, _gf_true);
- ret = default_notify (this, GF_EVENT_CHILD_UP, NULL);
+ ret = default_notify(this, GF_EVENT_CHILD_UP, NULL);
- break;
+ break;
case RPC_CLNT_DISCONNECT:
- gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT");
- (void) glusterd_nodesvc_set_running (server, _gf_false);
+ if (rpc != brickinfo->rpc) {
+ /*
+ * There used to be a bunch of races in the volume
+ * start/stop code that could result in us getting here
+ * and setting the brick status incorrectly. Many of
+ * those have been fixed or avoided, but just in case
+ * any are still left it doesn't hurt to keep the extra
+ * check and avoid further damage.
+ */
+ GF_LOG_OCCASIONALLY(gd_stale_rpc_disconnect_log, this->name,
+ GF_LOG_WARNING,
+ "got disconnect from stale rpc on "
+ "%s",
+ brickinfo->path);
break;
+ }
+ if (glusterd_is_brick_started(brickinfo)) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_BRICK_DISCONNECTED,
+ "Brick %s:%s has disconnected from glusterd.",
+ brickinfo->hostname, brickinfo->path);
+ ret = get_volinfo_from_brickid(brickid, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get volinfo from "
+ "brickid(%s)",
+ brickid);
+ goto out;
+ }
+ gf_event(EVENT_BRICK_DISCONNECTED, "peer=%s;volume=%s;brick=%s",
+ brickinfo->hostname, volinfo->volname,
+ brickinfo->path);
+ /* In case of an abrupt shutdown of a brick PMAP_SIGNOUT
+ * event is not received by glusterd which can lead to a
+ * stale port entry in glusterd, so forcibly clean up
+ * the same if the process is not running sometime
+ * gf_is_service_running true so to ensure about brick instance
+ * call search_brick_path_from_proc
+ */
+ GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, conf);
+ is_service_running = gf_is_service_running(pidfile, &pid);
+ if (pid > 0)
+ brickpath = search_brick_path_from_proc(pid,
+ brickinfo->path);
+ if (!is_service_running || !brickpath) {
+ ret = pmap_registry_remove(
+ THIS, brickinfo->port, brickinfo->path,
+ GF_PMAP_PORT_BRICKSERVER, NULL, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING,
+ GD_MSG_PMAP_REGISTRY_REMOVE_FAIL, 0,
+ "Failed to remove pmap "
+ "registry for port %d for "
+ "brick %s",
+ brickinfo->port, brickinfo->path);
+ ret = 0;
+ }
+ }
+ }
+
+ if (brickpath)
+ GF_FREE(brickpath);
+
+ if (is_brick_mx_enabled() && glusterd_is_brick_started(brickinfo)) {
+ brick_proc = brickinfo->brick_proc;
+ if (!brick_proc)
+ break;
+ cds_list_for_each_entry(brickinfo_tmp, &brick_proc->bricks,
+ mux_bricks)
+ {
+ glusterd_set_brick_status(brickinfo_tmp, GF_BRICK_STOPPED);
+ brickinfo_tmp->start_triggered = _gf_false;
+ /* When bricks are stopped, ports also need to
+ * be cleaned up
+ */
+ pmap_registry_remove(
+ THIS, brickinfo_tmp->port, brickinfo_tmp->path,
+ GF_PMAP_PORT_BRICKSERVER, NULL, _gf_true);
+ }
+ } else {
+ glusterd_set_brick_status(brickinfo, GF_BRICK_STOPPED);
+ brickinfo->start_triggered = _gf_false;
+ }
+ break;
+
+ case RPC_CLNT_DESTROY:
+ GF_FREE(mydata);
+ mydata = NULL;
+ break;
default:
- gf_log (this->name, GF_LOG_TRACE,
- "got some other RPC event %d", event);
- break;
- }
+ gf_msg_trace(this->name, 0, "got some other RPC event %d", event);
+ break;
+ }
- return ret;
+out:
+ return ret;
}
int
-glusterd_friend_remove_notify (glusterd_peerctx_t *peerctx)
+glusterd_brick_rpc_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
{
- int ret = -1;
- glusterd_friend_sm_event_t *new_event = NULL;
- glusterd_peerinfo_t *peerinfo = peerctx->peerinfo;
- rpcsvc_request_t *req = peerctx->args.req;
- char *errstr = peerctx->errstr;
-
- GF_ASSERT (peerctx);
-
- peerinfo = peerctx->peerinfo;
- req = peerctx->args.req;
- errstr = peerctx->errstr;
+ return glusterd_big_locked_notify(rpc, mydata, event, data,
+ __glusterd_brick_rpc_notify);
+}
- ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_REMOVE_FRIEND,
- &new_event);
- if (!ret) {
- if (!req) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "Unable to find the request for responding "
- "to User (%s)", peerinfo->hostname);
- goto out;
- }
+int
+glusterd_friend_remove_notify(glusterd_peerctx_t *peerctx, int32_t op_errno)
+{
+ int ret = -1;
+ glusterd_friend_sm_event_t *new_event = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ rpcsvc_request_t *req = NULL;
+ char *errstr = NULL;
+ dict_t *dict = NULL;
+
+ GF_ASSERT(peerctx);
+
+ RCU_READ_LOCK;
+ peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
+ if (!peerinfo) {
+ gf_msg_debug(THIS->name, 0,
+ "Could not find peer %s(%s). "
+ "Peer could have been deleted.",
+ peerctx->peername, uuid_utoa(peerctx->peerid));
+ ret = 0;
+ goto out;
+ }
+
+ req = peerctx->args.req;
+ dict = peerctx->args.dict;
+ errstr = peerctx->errstr;
+
+ ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_REMOVE_FRIEND,
+ &new_event);
+ if (!ret) {
+ if (!req) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_EVENT_NEW_GET_FAIL,
+ "Unable to find the request for responding "
+ "to User (%s)",
+ peerinfo->hostname);
+ goto out;
+ }
- glusterd_xfer_cli_probe_resp (req, -1, ENOTCONN, errstr,
- peerinfo->hostname, peerinfo->port);
+ glusterd_xfer_cli_probe_resp(req, -1, op_errno, errstr,
+ peerinfo->hostname, peerinfo->port, dict);
- new_event->peerinfo = peerinfo;
- ret = glusterd_friend_sm_inject_event (new_event);
- glusterd_friend_sm ();
+ new_event->peername = gf_strdup(peerinfo->hostname);
+ gf_uuid_copy(new_event->peerid, peerinfo->uuid);
+ ret = glusterd_friend_sm_inject_event(new_event);
- } else {
- gf_log ("glusterd", GF_LOG_ERROR,
- "Unable to create event for removing peer %s",
- peerinfo->hostname);
- }
+ } else {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL,
+ "Unable to create event for removing peer %s",
+ peerinfo->hostname);
+ }
out:
- return ret;
+ RCU_READ_UNLOCK;
+ return ret;
}
int
-glusterd_peer_rpc_notify (struct rpc_clnt *rpc, void *mydata,
- rpc_clnt_event_t event,
- void *data)
+__glusterd_peer_rpc_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
{
- xlator_t *this = NULL;
- glusterd_conf_t *conf = NULL;
- int ret = 0;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_peerctx_t *peerctx = NULL;
- uuid_t owner = {0,};
- uuid_t *peer_uuid = NULL;
-
- peerctx = mydata;
- if (!peerctx)
- return 0;
-
- peerinfo = peerctx->peerinfo;
- this = THIS;
- conf = this->private;
-
- switch (event) {
- case RPC_CLNT_CONNECT:
- {
- gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_CONNECT");
- peerinfo->connected = 1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = 0;
+ int32_t op_errno = ENOTCONN;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_t *peerctx = NULL;
+ gf_boolean_t quorum_action = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterfs_ctx_t *ctx = NULL;
+
+ uuid_t uuid;
+
+ peerctx = mydata;
+ if (!peerctx)
+ return 0;
- ret = glusterd_peer_handshake (this, rpc, peerctx);
- if (ret)
- gf_log ("", GF_LOG_ERROR, "glusterd handshake failed");
- break;
+ this = THIS;
+ conf = this->private;
+
+ switch (event) {
+ case RPC_CLNT_DESTROY:
+ GF_FREE(peerctx->errstr);
+ GF_FREE(peerctx->peername);
+ GF_FREE(peerctx);
+ return 0;
+ case RPC_CLNT_PING:
+ return 0;
+ default:
+ break;
+ }
+ ctx = this->ctx;
+ GF_VALIDATE_OR_GOTO(this->name, ctx, out);
+ if (ctx->cleanup_started) {
+ gf_log(this->name, GF_LOG_INFO,
+ "glusterd already received a SIGTERM, "
+ "dropping the event %d for peer %s",
+ event, peerctx->peername);
+ return 0;
+ }
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
+ if (!peerinfo) {
+ /* Peerinfo should be available at this point if its a connect
+ * event. Not finding it means that something terrible has
+ * happened. For non-connect event we might end up having a null
+ * peerinfo, so log at debug level.
+ */
+ gf_msg(THIS->name,
+ (RPC_CLNT_CONNECT == event) ? GF_LOG_CRITICAL : GF_LOG_DEBUG,
+ ENOENT, GD_MSG_PEER_NOT_FOUND,
+ "Could not find peer "
+ "%s(%s)",
+ peerctx->peername, uuid_utoa(peerctx->peerid));
+
+ if (RPC_CLNT_CONNECT == event) {
+ gf_event(EVENT_PEER_NOT_FOUND, "peer=%s;uuid=%s", peerctx->peername,
+ uuid_utoa(peerctx->peerid));
+ }
+ ret = -1;
+ goto out;
+ }
+
+ switch (event) {
+ case RPC_CLNT_CONNECT: {
+ gf_msg_debug(this->name, 0, "got RPC_CLNT_CONNECT");
+ peerinfo->connected = 1;
+ peerinfo->quorum_action = _gf_true;
+ peerinfo->generation = uatomic_add_return(&conf->generation, 1);
+ peerctx->peerinfo_gen = peerinfo->generation;
+ /* EVENT_PEER_CONNECT will only be sent if peerctx->uuid is not
+ * NULL, otherwise it indicates this RPC_CLNT_CONNECT is from a
+ * peer probe trigger and given we already generate an event for
+ * peer probe this would be unnecessary.
+ */
+ if (!gf_uuid_is_null(peerinfo->uuid)) {
+ gf_event(EVENT_PEER_CONNECT, "host=%s;uuid=%s",
+ peerinfo->hostname, uuid_utoa(peerinfo->uuid));
+ }
+ ret = glusterd_peer_dump_version(this, rpc, peerctx);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HANDSHAKE_FAILED,
+ "glusterd handshake failed");
+ break;
}
- case RPC_CLNT_DISCONNECT:
- {
- gf_log (this->name, GF_LOG_DEBUG, "got RPC_CLNT_DISCONNECT %d",
- peerinfo->state.state);
-
- peerinfo->connected = 0;
+ case RPC_CLNT_DISCONNECT: {
+ /* If DISCONNECT event is already processed, skip the further
+ * ones
+ */
+ if (is_rpc_clnt_disconnected(&rpc->conn))
+ break;
- /*
- local glusterd (thinks that it) is the owner of the cluster
- lock and 'fails' the operation on the first disconnect from
- a peer.
- */
- glusterd_get_lock_owner (&owner);
- if (!uuid_compare (conf->uuid, owner)) {
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_START_UNLOCK,
- NULL);
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_PEER_DISCONNECTED,
+ "Peer <%s> (<%s>), in state <%s>, has disconnected "
+ "from glusterd.",
+ peerinfo->hostname, uuid_utoa(peerinfo->uuid),
+ glusterd_friend_sm_state_name_get(peerinfo->state.state));
+ gf_event(EVENT_PEER_DISCONNECT, "peer=%s;uuid=%s;state=%s",
+ peerinfo->hostname, uuid_utoa(peerinfo->uuid),
+ glusterd_friend_sm_state_name_get(peerinfo->state.state));
+
+ if (peerinfo->connected) {
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ glusterd_get_lock_owner(&uuid);
+ if (!gf_uuid_is_null(uuid) &&
+ !gf_uuid_compare(peerinfo->uuid, uuid))
+ glusterd_unlock(peerinfo->uuid);
+ } else {
+ cds_list_for_each_entry(volinfo, &conf->volumes, vol_list)
+ {
+ ret = glusterd_mgmt_v3_unlock(volinfo->volname,
+ peerinfo->uuid, "vol");
if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Unable"
- " to enqueue cluster unlock event");
- break;
- }
-
- peer_uuid = GF_CALLOC (1, sizeof (*peer_uuid), gf_common_mt_char);
- if (!peer_uuid) {
- ret = -1;
- break;
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Lock not released "
+ "for %s",
+ volinfo->volname);
+ }
}
- uuid_copy (*peer_uuid, peerinfo->uuid);
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP,
- peer_uuid);
- if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Unable"
- " to enque local lock flush event.");
-
- //Inject friend disconnected here
- if (peerinfo->state.state == GD_FRIEND_STATE_DEFAULT) {
- glusterd_friend_remove_notify (peerctx);
- }
+ op_errno = GF_PROBE_ANOTHER_CLUSTER;
+ ret = 0;
+ }
+
+ if ((peerinfo->quorum_contrib != QUORUM_DOWN) &&
+ (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED)) {
+ peerinfo->quorum_contrib = QUORUM_DOWN;
+ quorum_action = _gf_true;
+ peerinfo->quorum_action = _gf_false;
+ }
+
+ /* Remove peer if it is not a friend and connection/handshake
+ * fails, and notify cli. Happens only during probe.
+ */
+ if (peerinfo->state.state == GD_FRIEND_STATE_DEFAULT) {
+ glusterd_friend_remove_notify(peerctx, op_errno);
+ goto out;
+ }
- //default_notify (this, GF_EVENT_CHILD_DOWN, NULL);
- break;
+ peerinfo->connected = 0;
+ break;
}
+
default:
- gf_log (this->name, GF_LOG_TRACE,
- "got some other RPC event %d", event);
- ret = 0;
- break;
- }
+ gf_msg_trace(this->name, 0, "got some other RPC event %d", event);
+ ret = 0;
+ break;
+ }
- glusterd_friend_sm ();
- glusterd_op_sm ();
- return ret;
+out:
+ RCU_READ_UNLOCK;
+
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ if (quorum_action)
+ glusterd_do_quorum_action();
+ return ret;
}
int
-glusterd_null (rpcsvc_request_t *req)
+glusterd_peer_rpc_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
{
+ return glusterd_big_locked_notify(rpc, mydata, event, data,
+ __glusterd_peer_rpc_notify);
+}
- return 0;
+int
+glusterd_null(rpcsvc_request_t *req)
+{
+ return 0;
}
-rpcsvc_actor_t gd_svc_mgmt_actors[] = {
- [GLUSTERD_MGMT_NULL] = { "NULL", GLUSTERD_MGMT_NULL, glusterd_null, NULL, NULL, 0},
- [GLUSTERD_MGMT_CLUSTER_LOCK] = { "CLUSTER_LOCK", GLUSTERD_MGMT_CLUSTER_LOCK, glusterd_handle_cluster_lock, NULL, NULL, 0},
- [GLUSTERD_MGMT_CLUSTER_UNLOCK] = { "CLUSTER_UNLOCK", GLUSTERD_MGMT_CLUSTER_UNLOCK, glusterd_handle_cluster_unlock, NULL, NULL, 0},
- [GLUSTERD_MGMT_STAGE_OP] = { "STAGE_OP", GLUSTERD_MGMT_STAGE_OP, glusterd_handle_stage_op, NULL, NULL, 0},
- [GLUSTERD_MGMT_COMMIT_OP] = { "COMMIT_OP", GLUSTERD_MGMT_COMMIT_OP, glusterd_handle_commit_op, NULL, NULL, 0},
+static rpcsvc_actor_t gd_svc_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = {
+ [GLUSTERD_MGMT_NULL] = {"NULL", glusterd_null, NULL, GLUSTERD_MGMT_NULL,
+ DRC_NA, 0},
+ [GLUSTERD_MGMT_CLUSTER_LOCK] = {"CLUSTER_LOCK",
+ glusterd_handle_cluster_lock, NULL,
+ GLUSTERD_MGMT_CLUSTER_LOCK, DRC_NA, 0},
+ [GLUSTERD_MGMT_CLUSTER_UNLOCK] = {"CLUSTER_UNLOCK",
+ glusterd_handle_cluster_unlock, NULL,
+ GLUSTERD_MGMT_CLUSTER_UNLOCK, DRC_NA, 0},
+ [GLUSTERD_MGMT_STAGE_OP] = {"STAGE_OP", glusterd_handle_stage_op, NULL,
+ GLUSTERD_MGMT_STAGE_OP, DRC_NA, 0},
+ [GLUSTERD_MGMT_COMMIT_OP] =
+ {
+ "COMMIT_OP",
+ glusterd_handle_commit_op,
+ NULL,
+ GLUSTERD_MGMT_COMMIT_OP,
+ DRC_NA,
+ 0,
+ },
};
struct rpcsvc_program gd_svc_mgmt_prog = {
- .progname = "GlusterD svc mgmt",
- .prognum = GD_MGMT_PROGRAM,
- .progver = GD_MGMT_VERSION,
- .numactors = GLUSTERD_MGMT_MAXVALUE,
- .actors = gd_svc_mgmt_actors,
+ .progname = "GlusterD svc mgmt",
+ .prognum = GD_MGMT_PROGRAM,
+ .progver = GD_MGMT_VERSION,
+ .numactors = GLUSTERD_MGMT_MAXVALUE,
+ .actors = gd_svc_mgmt_actors,
+ .synctask = _gf_true,
};
-rpcsvc_actor_t gd_svc_peer_actors[] = {
- [GLUSTERD_FRIEND_NULL] = { "NULL", GLUSTERD_MGMT_NULL, glusterd_null, NULL, NULL, 0},
- [GLUSTERD_PROBE_QUERY] = { "PROBE_QUERY", GLUSTERD_PROBE_QUERY, glusterd_handle_probe_query, NULL, NULL, 0},
- [GLUSTERD_FRIEND_ADD] = { "FRIEND_ADD", GLUSTERD_FRIEND_ADD, glusterd_handle_incoming_friend_req, NULL, NULL, 0},
- [GLUSTERD_FRIEND_REMOVE] = { "FRIEND_REMOVE", GLUSTERD_FRIEND_REMOVE, glusterd_handle_incoming_unfriend_req, NULL, NULL, 0},
- [GLUSTERD_FRIEND_UPDATE] = { "FRIEND_UPDATE", GLUSTERD_FRIEND_UPDATE, glusterd_handle_friend_update, NULL, NULL, 0},
+static rpcsvc_actor_t gd_svc_peer_actors[GLUSTERD_FRIEND_MAXVALUE] = {
+ [GLUSTERD_FRIEND_NULL] = {"NULL", glusterd_null, NULL, GLUSTERD_MGMT_NULL,
+ DRC_NA, 0},
+ [GLUSTERD_PROBE_QUERY] = {"PROBE_QUERY", glusterd_handle_probe_query, NULL,
+ GLUSTERD_PROBE_QUERY, DRC_NA, 0},
+ [GLUSTERD_FRIEND_ADD] = {"FRIEND_ADD", glusterd_handle_incoming_friend_req,
+ NULL, GLUSTERD_FRIEND_ADD, DRC_NA, 0},
+ [GLUSTERD_FRIEND_REMOVE] = {"FRIEND_REMOVE",
+ glusterd_handle_incoming_unfriend_req, NULL,
+ GLUSTERD_FRIEND_REMOVE, DRC_NA, 0},
+ [GLUSTERD_FRIEND_UPDATE] = {"FRIEND_UPDATE", glusterd_handle_friend_update,
+ NULL, GLUSTERD_FRIEND_UPDATE, DRC_NA, 0},
};
struct rpcsvc_program gd_svc_peer_prog = {
- .progname = "GlusterD svc peer",
- .prognum = GD_FRIEND_PROGRAM,
- .progver = GD_FRIEND_VERSION,
- .numactors = GLUSTERD_FRIEND_MAXVALUE,
- .actors = gd_svc_peer_actors,
+ .progname = "GlusterD svc peer",
+ .prognum = GD_FRIEND_PROGRAM,
+ .progver = GD_FRIEND_VERSION,
+ .numactors = GLUSTERD_FRIEND_MAXVALUE,
+ .actors = gd_svc_peer_actors,
+ .synctask = _gf_false,
};
+static rpcsvc_actor_t gd_svc_cli_actors[GLUSTER_CLI_MAXVALUE] = {
+ [GLUSTER_CLI_PROBE] = {"CLI_PROBE", glusterd_handle_cli_probe, NULL,
+ GLUSTER_CLI_PROBE, DRC_NA, 0},
+ [GLUSTER_CLI_CREATE_VOLUME] = {"CLI_CREATE_VOLUME",
+ glusterd_handle_create_volume, NULL,
+ GLUSTER_CLI_CREATE_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_DEFRAG_VOLUME] = {"CLI_DEFRAG_VOLUME",
+ glusterd_handle_defrag_volume, NULL,
+ GLUSTER_CLI_DEFRAG_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_DEPROBE] = {"FRIEND_REMOVE", glusterd_handle_cli_deprobe, NULL,
+ GLUSTER_CLI_DEPROBE, DRC_NA, 0},
+ [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS",
+ glusterd_handle_cli_list_friends, NULL,
+ GLUSTER_CLI_LIST_FRIENDS, DRC_NA, 0},
+ [GLUSTER_CLI_UUID_RESET] = {"UUID_RESET", glusterd_handle_cli_uuid_reset,
+ NULL, GLUSTER_CLI_UUID_RESET, DRC_NA, 0},
+ [GLUSTER_CLI_UUID_GET] = {"UUID_GET", glusterd_handle_cli_uuid_get, NULL,
+ GLUSTER_CLI_UUID_GET, DRC_NA, 0},
+ [GLUSTER_CLI_START_VOLUME] = {"START_VOLUME",
+ glusterd_handle_cli_start_volume, NULL,
+ GLUSTER_CLI_START_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_STOP_VOLUME] = {"STOP_VOLUME", glusterd_handle_cli_stop_volume,
+ NULL, GLUSTER_CLI_STOP_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_DELETE_VOLUME] = {"DELETE_VOLUME",
+ glusterd_handle_cli_delete_volume, NULL,
+ GLUSTER_CLI_DELETE_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", glusterd_handle_cli_get_volume,
+ NULL, GLUSTER_CLI_GET_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_ADD_BRICK] = {"ADD_BRICK", glusterd_handle_add_brick, NULL,
+ GLUSTER_CLI_ADD_BRICK, DRC_NA, 0},
+ [GLUSTER_CLI_ATTACH_TIER] = {"ATTACH_TIER", glusterd_handle_attach_tier,
+ NULL, GLUSTER_CLI_ATTACH_TIER, DRC_NA, 0},
+ [GLUSTER_CLI_REPLACE_BRICK] = {"REPLACE_BRICK",
+ glusterd_handle_replace_brick, NULL,
+ GLUSTER_CLI_REPLACE_BRICK, DRC_NA, 0},
+ [GLUSTER_CLI_REMOVE_BRICK] = {"REMOVE_BRICK", glusterd_handle_remove_brick,
+ NULL, GLUSTER_CLI_REMOVE_BRICK, DRC_NA, 0},
+ [GLUSTER_CLI_LOG_ROTATE] = {"LOG FILENAME", glusterd_handle_log_rotate,
+ NULL, GLUSTER_CLI_LOG_ROTATE, DRC_NA, 0},
+ [GLUSTER_CLI_SET_VOLUME] = {"SET_VOLUME", glusterd_handle_set_volume, NULL,
+ GLUSTER_CLI_SET_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_SYNC_VOLUME] = {"SYNC_VOLUME", glusterd_handle_sync_volume,
+ NULL, GLUSTER_CLI_SYNC_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_RESET_VOLUME] = {"RESET_VOLUME", glusterd_handle_reset_volume,
+ NULL, GLUSTER_CLI_RESET_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_FSM_LOG] = {"FSM_LOG", glusterd_handle_fsm_log, NULL,
+ GLUSTER_CLI_FSM_LOG, DRC_NA, 0},
+ [GLUSTER_CLI_GSYNC_SET] = {"GSYNC_SET", glusterd_handle_gsync_set, NULL,
+ GLUSTER_CLI_GSYNC_SET, DRC_NA, 0},
+ [GLUSTER_CLI_PROFILE_VOLUME] = {"STATS_VOLUME",
+ glusterd_handle_cli_profile_volume, NULL,
+ GLUSTER_CLI_PROFILE_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_QUOTA] = {"QUOTA", glusterd_handle_quota, NULL,
+ GLUSTER_CLI_QUOTA, DRC_NA, 0},
+ [GLUSTER_CLI_GETWD] = {"GETWD", glusterd_handle_getwd, NULL,
+ GLUSTER_CLI_GETWD, DRC_NA, 1},
+ [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME",
+ glusterd_handle_status_volume, NULL,
+ GLUSTER_CLI_STATUS_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_MOUNT] = {"MOUNT", glusterd_handle_mount, NULL,
+ GLUSTER_CLI_MOUNT, DRC_NA, 1},
+ [GLUSTER_CLI_UMOUNT] = {"UMOUNT", glusterd_handle_umount, NULL,
+ GLUSTER_CLI_UMOUNT, DRC_NA, 1},
+ [GLUSTER_CLI_HEAL_VOLUME] = {"HEAL_VOLUME", glusterd_handle_cli_heal_volume,
+ NULL, GLUSTER_CLI_HEAL_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME",
+ glusterd_handle_cli_statedump_volume,
+ NULL, GLUSTER_CLI_STATEDUMP_VOLUME,
+ DRC_NA, 0},
+ [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", glusterd_handle_cli_list_volume,
+ NULL, GLUSTER_CLI_LIST_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME",
+ glusterd_handle_cli_clearlocks_volume,
+ NULL, GLUSTER_CLI_CLRLOCKS_VOLUME, DRC_NA,
+ 0},
+ [GLUSTER_CLI_COPY_FILE] = {"COPY_FILE", glusterd_handle_copy_file, NULL,
+ GLUSTER_CLI_COPY_FILE, DRC_NA, 0},
+ [GLUSTER_CLI_SYS_EXEC] = {"SYS_EXEC", glusterd_handle_sys_exec, NULL,
+ GLUSTER_CLI_SYS_EXEC, DRC_NA, 0},
+ [GLUSTER_CLI_SNAP] = {"SNAP", glusterd_handle_snapshot, NULL,
+ GLUSTER_CLI_SNAP, DRC_NA, 0},
+ [GLUSTER_CLI_BARRIER_VOLUME] = {"BARRIER_VOLUME", glusterd_handle_barrier,
+ NULL, GLUSTER_CLI_BARRIER_VOLUME, DRC_NA,
+ 0},
+ [GLUSTER_CLI_GANESHA] = {"GANESHA", glusterd_handle_ganesha_cmd, NULL,
+ GLUSTER_CLI_GANESHA, DRC_NA, 0},
+ [GLUSTER_CLI_GET_VOL_OPT] = {"GET_VOL_OPT", glusterd_handle_get_vol_opt,
+ NULL, DRC_NA, 0},
+ [GLUSTER_CLI_BITROT] = {"BITROT", glusterd_handle_bitrot, NULL,
+ GLUSTER_CLI_BITROT, DRC_NA, 0},
+ [GLUSTER_CLI_GET_STATE] = {"GET_STATE", glusterd_handle_get_state, NULL,
+ GLUSTER_CLI_GET_STATE, DRC_NA, 0},
+ [GLUSTER_CLI_RESET_BRICK] = {"RESET_BRICK", glusterd_handle_reset_brick,
+ NULL, GLUSTER_CLI_RESET_BRICK, DRC_NA, 0},
+ [GLUSTER_CLI_TIER] = {"TIER", glusterd_handle_tier, NULL, GLUSTER_CLI_TIER,
+ DRC_NA, 0},
+ [GLUSTER_CLI_REMOVE_TIER_BRICK] = {"REMOVE_TIER_BRICK",
+ glusterd_handle_tier, NULL,
+ GLUSTER_CLI_REMOVE_TIER_BRICK, DRC_NA,
+ 0},
+ [GLUSTER_CLI_ADD_TIER_BRICK] = {"ADD_TIER_BRICK",
+ glusterd_handle_add_tier_brick, NULL,
+ GLUSTER_CLI_ADD_TIER_BRICK, DRC_NA, 0},
+};
+struct rpcsvc_program gd_svc_cli_prog = {
+ .progname = "GlusterD svc cli",
+ .prognum = GLUSTER_CLI_PROGRAM,
+ .progver = GLUSTER_CLI_VERSION,
+ .numactors = GLUSTER_CLI_MAXVALUE,
+ .actors = gd_svc_cli_actors,
+ .synctask = _gf_true,
+};
-rpcsvc_actor_t gd_svc_cli_actors[] = {
- [GLUSTER_CLI_PROBE] = { "CLI_PROBE", GLUSTER_CLI_PROBE, glusterd_handle_cli_probe, NULL, NULL, 0},
- [GLUSTER_CLI_CREATE_VOLUME] = { "CLI_CREATE_VOLUME", GLUSTER_CLI_CREATE_VOLUME, glusterd_handle_create_volume, NULL,NULL, 0},
- [GLUSTER_CLI_DEFRAG_VOLUME] = { "CLI_DEFRAG_VOLUME", GLUSTER_CLI_DEFRAG_VOLUME, glusterd_handle_defrag_volume, NULL,NULL, 0},
- [GLUSTER_CLI_DEPROBE] = { "FRIEND_REMOVE", GLUSTER_CLI_DEPROBE, glusterd_handle_cli_deprobe, NULL, NULL, 0},
- [GLUSTER_CLI_LIST_FRIENDS] = { "LIST_FRIENDS", GLUSTER_CLI_LIST_FRIENDS, glusterd_handle_cli_list_friends, NULL, NULL, 0},
- [GLUSTER_CLI_START_VOLUME] = { "START_VOLUME", GLUSTER_CLI_START_VOLUME, glusterd_handle_cli_start_volume, NULL, NULL, 0},
- [GLUSTER_CLI_STOP_VOLUME] = { "STOP_VOLUME", GLUSTER_CLI_STOP_VOLUME, glusterd_handle_cli_stop_volume, NULL, NULL, 0},
- [GLUSTER_CLI_DELETE_VOLUME] = { "DELETE_VOLUME", GLUSTER_CLI_DELETE_VOLUME, glusterd_handle_cli_delete_volume, NULL, NULL, 0},
- [GLUSTER_CLI_GET_VOLUME] = { "GET_VOLUME", GLUSTER_CLI_GET_VOLUME, glusterd_handle_cli_get_volume, NULL, NULL, 0},
- [GLUSTER_CLI_ADD_BRICK] = { "ADD_BRICK", GLUSTER_CLI_ADD_BRICK, glusterd_handle_add_brick, NULL, NULL, 0},
- [GLUSTER_CLI_REPLACE_BRICK] = { "REPLACE_BRICK", GLUSTER_CLI_REPLACE_BRICK, glusterd_handle_replace_brick, NULL, NULL, 0},
- [GLUSTER_CLI_REMOVE_BRICK] = { "REMOVE_BRICK", GLUSTER_CLI_REMOVE_BRICK, glusterd_handle_remove_brick, NULL, NULL, 0},
- [GLUSTER_CLI_LOG_ROTATE] = { "LOG FILENAME", GLUSTER_CLI_LOG_ROTATE, glusterd_handle_log_rotate, NULL, NULL, 0},
- [GLUSTER_CLI_SET_VOLUME] = { "SET_VOLUME", GLUSTER_CLI_SET_VOLUME, glusterd_handle_set_volume, NULL, NULL, 0},
- [GLUSTER_CLI_SYNC_VOLUME] = { "SYNC_VOLUME", GLUSTER_CLI_SYNC_VOLUME, glusterd_handle_sync_volume, NULL, NULL, 0},
- [GLUSTER_CLI_RESET_VOLUME] = { "RESET_VOLUME", GLUSTER_CLI_RESET_VOLUME, glusterd_handle_reset_volume, NULL, NULL, 0},
- [GLUSTER_CLI_FSM_LOG] = { "FSM_LOG", GLUSTER_CLI_FSM_LOG, glusterd_handle_fsm_log, NULL, NULL, 0},
- [GLUSTER_CLI_GSYNC_SET] = { "GSYNC_SET", GLUSTER_CLI_GSYNC_SET, glusterd_handle_gsync_set, NULL, NULL, 0},
- [GLUSTER_CLI_PROFILE_VOLUME] = { "STATS_VOLUME", GLUSTER_CLI_PROFILE_VOLUME, glusterd_handle_cli_profile_volume, NULL, NULL, 0},
- [GLUSTER_CLI_QUOTA] = { "QUOTA", GLUSTER_CLI_QUOTA, glusterd_handle_quota, NULL, NULL, 0},
- [GLUSTER_CLI_GETWD] = { "GETWD", GLUSTER_CLI_GETWD, glusterd_handle_getwd, NULL, NULL, 1},
- [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME", GLUSTER_CLI_STATUS_VOLUME, glusterd_handle_status_volume, NULL, NULL, 0},
- [GLUSTER_CLI_MOUNT] = { "MOUNT", GLUSTER_CLI_MOUNT, glusterd_handle_mount, NULL, NULL, 1},
- [GLUSTER_CLI_UMOUNT] = { "UMOUNT", GLUSTER_CLI_UMOUNT, glusterd_handle_umount, NULL, NULL, 1},
- [GLUSTER_CLI_HEAL_VOLUME] = { "HEAL_VOLUME", GLUSTER_CLI_HEAL_VOLUME, glusterd_handle_cli_heal_volume, NULL, NULL, 0},
- [GLUSTER_CLI_STATEDUMP_VOLUME] = {"STATEDUMP_VOLUME", GLUSTER_CLI_STATEDUMP_VOLUME, glusterd_handle_cli_statedump_volume, NULL, NULL, 0},
- [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", GLUSTER_CLI_LIST_VOLUME, glusterd_handle_cli_list_volume, NULL, NULL, 0},
- [GLUSTER_CLI_CLRLOCKS_VOLUME] = {"CLEARLOCKS_VOLUME", GLUSTER_CLI_CLRLOCKS_VOLUME, glusterd_handle_cli_clearlocks_volume, NULL, NULL, 0},
+/**
+ * This set of RPC progs are deemed to be trusted. Most of the actors support
+ * read only queries, the only exception being MOUNT/UMOUNT which is required
+ * by geo-replication to support unprivileged master -> slave sessions.
+ */
+static rpcsvc_actor_t gd_svc_cli_trusted_actors[GLUSTER_CLI_MAXVALUE] = {
+ [GLUSTER_CLI_LIST_FRIENDS] = {"LIST_FRIENDS",
+ glusterd_handle_cli_list_friends, NULL,
+ GLUSTER_CLI_LIST_FRIENDS, DRC_NA, 0},
+ [GLUSTER_CLI_UUID_GET] = {"UUID_GET", glusterd_handle_cli_uuid_get, NULL,
+ GLUSTER_CLI_UUID_GET, DRC_NA, 0},
+ [GLUSTER_CLI_GET_VOLUME] = {"GET_VOLUME", glusterd_handle_cli_get_volume,
+ NULL, GLUSTER_CLI_GET_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_GETWD] = {"GETWD", glusterd_handle_getwd, NULL,
+ GLUSTER_CLI_GETWD, DRC_NA, 1},
+ [GLUSTER_CLI_STATUS_VOLUME] = {"STATUS_VOLUME",
+ glusterd_handle_status_volume, NULL,
+ GLUSTER_CLI_STATUS_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_LIST_VOLUME] = {"LIST_VOLUME", glusterd_handle_cli_list_volume,
+ NULL, GLUSTER_CLI_LIST_VOLUME, DRC_NA, 0},
+ [GLUSTER_CLI_MOUNT] = {"MOUNT", glusterd_handle_mount, NULL,
+ GLUSTER_CLI_MOUNT, DRC_NA, 1},
+ [GLUSTER_CLI_UMOUNT] = {"UMOUNT", glusterd_handle_umount, NULL,
+ GLUSTER_CLI_UMOUNT, DRC_NA, 1},
};
-struct rpcsvc_program gd_svc_cli_prog = {
- .progname = "GlusterD svc cli",
- .prognum = GLUSTER_CLI_PROGRAM,
- .progver = GLUSTER_CLI_VERSION,
- .numactors = GLUSTER_CLI_MAXVALUE,
- .actors = gd_svc_cli_actors,
+struct rpcsvc_program gd_svc_cli_trusted_progs = {
+ .progname = "GlusterD svc cli read-only",
+ .prognum = GLUSTER_CLI_PROGRAM,
+ .progver = GLUSTER_CLI_VERSION,
+ .numactors = GLUSTER_CLI_MAXVALUE,
+ .actors = gd_svc_cli_trusted_actors,
+ .synctask = _gf_true,
};
+
+/* As we cant remove the handlers, I'm moving the tier based
+ * handlers to this file as we no longer have gluster-tier.c
+ * and other tier.c files
+ */
+
+int
+glusterd_handle_tier(rpcsvc_request_t *req)
+{
+ return 0;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-handshake.c b/xlators/mgmt/glusterd/src/glusterd-handshake.c
index 2d9dd8ada7e..d96e35503dd 100644
--- a/xlators/mgmt/glusterd/src/glusterd-handshake.c
+++ b/xlators/mgmt/glusterd/src/glusterd-handshake.c
@@ -1,521 +1,2580 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
-#include "xlator.h"
-#include "defaults.h"
-#include "glusterfs.h"
-#include "compat-errno.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/compat-errno.h>
#include "glusterd.h"
#include "glusterd-utils.h"
#include "glusterd-op-sm.h"
-
+#include "glusterd-store.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-snapd-svc-helper.h"
+#include "glusterd-volgen.h"
+#include "glusterd-quotad-svc.h"
+#include "glusterd-messages.h"
#include "glusterfs3.h"
#include "protocol-common.h"
#include "rpcsvc.h"
#include "rpc-common-xdr.h"
+#include "glusterd-gfproxyd-svc-helper.h"
+#include "glusterd-shd-svc-helper.h"
extern struct rpc_clnt_program gd_peer_prog;
extern struct rpc_clnt_program gd_mgmt_prog;
+extern struct rpc_clnt_program gd_mgmt_v3_prog;
-#define TRUSTED_PREFIX "trusted-"
+#define TRUSTED_PREFIX "trusted-"
+#define GD_PEER_ID_KEY "peer-id"
-typedef ssize_t (*gfs_serialize_t) (struct iovec outmsg, void *data);
+typedef ssize_t (*gfs_serialize_t)(struct iovec outmsg, void *data);
-static size_t
-build_volfile_path (const char *volname, char *path,
- size_t path_len, char *trusted_str)
+static int
+get_snap_volname_and_volinfo(const char *volpath, char **volname,
+ glusterd_volinfo_t **volinfo)
{
- struct stat stbuf = {0,};
- int32_t ret = -1;
- glusterd_conf_t *priv = NULL;
- char *vol = NULL;
- char *dup_volname = NULL;
- char *free_ptr = NULL;
- char *tmp = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- char *server = NULL;
-
- priv = THIS->private;
+ int ret = -1;
+ char *save_ptr = NULL;
+ char *str_token = NULL;
+ char *snapname = NULL;
+ char *volname_token = NULL;
+ char *vol = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+ char *tmp_str_token = NULL;
+ char *volfile_token = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(volpath);
+ GF_ASSERT(volinfo);
+
+ str_token = gf_strdup(volpath);
+ if (NULL == str_token) {
+ goto out;
+ }
+
+ tmp_str_token = str_token;
+
+ /* Input volname will have below formats:
+ * /snaps/<snapname>/<volname>.<hostname>
+ * or
+ * /snaps/<snapname>/<parent-volname>
+ * We need to extract snapname and parent_volname */
+
+ /*split string by "/" */
+ strtok_r(str_token, "/", &save_ptr);
+ snapname = strtok_r(NULL, "/", &save_ptr);
+ if (!snapname) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid path: %s", volpath);
+ goto out;
+ }
+
+ volname_token = strtok_r(NULL, "/", &save_ptr);
+ if (!volname_token) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid path: %s", volpath);
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name(snapname);
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND,
+ "Failed to "
+ "fetch snap %s",
+ snapname);
+ goto out;
+ }
+
+ /* Find if its a parent volume name or snap volume
+ * name. This function will succeed if volname_token
+ * is a parent volname
+ */
+ ret = glusterd_volinfo_find(volname_token, volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "failed to get the volinfo for the volume %s", volname_token);
+
+ /* Get the actual volfile name. */
+ volfile_token = strtok_r(NULL, "/", &save_ptr);
+ *volname = gf_strdup(volfile_token);
+ if (NULL == *volname) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "Volname=%s", volfile_token, NULL);
+ ret = -1;
+ goto out;
+ }
- if (strstr (volname, "gluster/")) {
- server = strchr (volname, '/') + 1;
- glusterd_get_nodesvc_volfile (server, priv->workdir,
- path, path_len);
- ret = 1;
+ /*
+ * Ideally, this should succeed as volname_token now contains
+ * the name of the snap volume (i.e. name of the volume that
+ * represents the snapshot). But, if for some reason, volinfo
+ * for the snap volume is not found, then try to get from the
+ * name of the volfile. Name of the volfile is like this.
+ * <snap volume name>.<hostname>.<brick path>.vol
+ */
+ ret = glusterd_snap_volinfo_find(volname_token, snap, volinfo);
+ if (ret) {
+ /* Split the volume name */
+ vol = strtok_r(volfile_token, ".", &save_ptr);
+ if (!vol) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid "
+ "volname (%s)",
+ volfile_token);
goto out;
- } else if (volname[0] != '/') {
- /* Normal behavior */
- dup_volname = gf_strdup (volname);
- } else {
- /* Bringing in NFS like behavior for mount command, */
- /* With this, one can mount a volume with below cmd */
- /* bash# mount -t glusterfs server:/volume /mnt/pnt */
- dup_volname = gf_strdup (&volname[1]);
+ }
+
+ ret = glusterd_snap_volinfo_find(vol, snap, volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_INFO_FAIL,
+ "Failed to "
+ "fetch snap volume from volname (%s)",
+ vol);
+ goto out;
+ }
}
-
- free_ptr = dup_volname;
-
- ret = glusterd_volinfo_find (dup_volname, &volinfo);
+ } else {
+ /*volname_token is parent volname*/
+ ret = glusterd_snap_volinfo_find_from_parent_volname(volname_token,
+ snap, volinfo);
if (ret) {
- /* Split the volume name */
- vol = strtok_r (dup_volname, ".", &tmp);
- if (!vol)
- goto out;
- ret = glusterd_volinfo_find (vol, &volinfo);
- if (ret)
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_INFO_FAIL,
+ "Failed to "
+ "fetch snap volume from parent "
+ "volname (%s)",
+ volname_token);
+ goto out;
}
- if (!glusterd_auth_get_username (volinfo))
- trusted_str = NULL;
+ /* Since volname_token is a parent volname we should
+ * get the snap volname here*/
+ *volname = gf_strdup((*volinfo)->volname);
+ if (NULL == *volname) {
+ ret = -1;
+ goto out;
+ }
+ }
- ret = snprintf (path, path_len, "%s/vols/%s/%s.vol",
- priv->workdir, volinfo->volname, volname);
- if (ret == -1)
- goto out;
+out:
+ if (ret && NULL != *volname) {
+ GF_FREE(*volname);
+ *volname = NULL;
+ }
+
+ if (tmp_str_token)
+ GF_FREE(tmp_str_token);
+ return ret;
+}
- ret = stat (path, &stbuf);
+int32_t
+glusterd_get_client_per_brick_volfile(glusterd_volinfo_t *volinfo,
+ char *filename, char *path, int path_len)
+{
+ char workdir[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ int32_t ret = -1;
- if ((ret == -1) && (errno == ENOENT)) {
- snprintf (path, path_len, "%s/vols/%s/%s%s-fuse.vol",
- priv->workdir, volinfo->volname,
- (trusted_str ? trusted_str : ""), dup_volname);
- ret = stat (path, &stbuf);
- }
+ GF_VALIDATE_OR_GOTO("glusterd", THIS, out);
+ priv = THIS->private;
+ GF_VALIDATE_OR_GOTO(THIS->name, priv, out);
- if ((ret == -1) && (errno == ENOENT)) {
- snprintf (path, path_len, "%s/vols/%s/%s-tcp.vol",
- priv->workdir, volinfo->volname, volname);
- }
+ GLUSTERD_GET_VOLUME_DIR(workdir, volinfo, priv);
- ret = 1;
+ snprintf(path, path_len, "%s/%s", workdir, filename);
+
+ ret = 0;
out:
- if (free_ptr)
- GF_FREE (free_ptr);
- return ret;
+ return ret;
}
-int
-server_getspec (rpcsvc_request_t *req)
-{
- int32_t ret = -1;
- int32_t op_errno = 0;
- int32_t spec_fd = -1;
- size_t file_len = 0;
- char filename[PATH_MAX] = {0,};
- struct stat stbuf = {0,};
- char *volume = NULL;
- char *tmp = NULL;
- int cookie = 0;
- rpc_transport_t *trans = NULL;
- gf_getspec_req args = {0,};
- gf_getspec_rsp rsp = {0,};
- char addrstr[RPCSVC_PEER_STRLEN] = {0};
-
-
- if (!xdr_to_generic (req->msg[0], &args,
- (xdrproc_t)xdr_gf_getspec_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto fail;
+size_t
+build_volfile_path(char *volume_id, char *path, size_t path_len,
+ char *trusted_str, dict_t *dict)
+{
+ struct stat stbuf = {
+ 0,
+ };
+ int32_t ret = -1;
+ char *vol = NULL;
+ char *dup_volname = NULL;
+ char *save_ptr = NULL;
+ char *free_ptr = NULL;
+ char *volname = NULL;
+ char *volid_ptr = NULL;
+ char dup_volid[PATH_MAX] = {
+ 0,
+ };
+ char path_prefix[PATH_MAX] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(volume_id);
+ GF_ASSERT(path);
+
+ volid_ptr = strstr(volume_id, "snapd/");
+ if (volid_ptr) {
+ volid_ptr = strchr(volid_ptr, '/');
+ if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
+ ret = -1;
+ goto out;
}
+ volid_ptr++;
- volume = args.key;
+ ret = glusterd_volinfo_find(volid_ptr, &volinfo);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Couldn't find volinfo");
+ goto out;
+ }
+ glusterd_svc_build_snapd_volfile(volinfo, path, path_len);
+ ret = 0;
+ goto out;
+ }
+
+ volid_ptr = strstr(volume_id, "gluster/");
+ if (volid_ptr) {
+ volid_ptr = strchr(volid_ptr, '/');
+ if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+ volid_ptr++;
- trans = req->trans;
- ret = rpcsvc_transport_peername (trans, (char *)&addrstr,
- sizeof (addrstr));
- if (ret)
- goto fail;
+ glusterd_svc_build_volfile_path(volid_ptr, priv->workdir, path,
+ path_len);
+ ret = 0;
+ goto out;
+ }
+
+ volid_ptr = strstr(volume_id, "gfproxy-client/");
+ if (volid_ptr) {
+ volid_ptr = strchr(volid_ptr, '/');
+ if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+ volid_ptr++;
- tmp = strrchr (addrstr, ':');
- *tmp = '\0';
+ ret = glusterd_volinfo_find(volid_ptr, &volinfo);
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_ERROR, "Couldn't find volinfo");
+ goto out;
+ }
- /* we trust the local admin */
- if (!glusterd_is_local_addr (addrstr)) {
+ glusterd_get_gfproxy_client_volfile(volinfo, path, path_len);
- ret = build_volfile_path (volume, filename,
- sizeof (filename),
- TRUSTED_PREFIX);
- } else {
- ret = build_volfile_path (volume, filename,
- sizeof (filename), NULL);
- }
-
- if (ret > 0) {
- /* to allocate the proper buffer to hold the file data */
- ret = stat (filename, &stbuf);
- if (ret < 0){
- gf_log ("glusterd", GF_LOG_ERROR,
- "Unable to stat %s (%s)",
- filename, strerror (errno));
- goto fail;
- }
+ ret = 0;
+ goto out;
+ }
+
+ volid_ptr = strstr(volume_id, "gfproxyd/");
+ if (volid_ptr) {
+ volid_ptr = strchr(volid_ptr, '/');
+ if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+ volid_ptr++;
- spec_fd = open (filename, O_RDONLY);
- if (spec_fd < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "Unable to open %s (%s)",
- filename, strerror (errno));
- goto fail;
- }
- ret = file_len = stbuf.st_size;
- } else {
- op_errno = ENOENT;
+ ret = glusterd_volinfo_find(volid_ptr, &volinfo);
+ if (ret == -1) {
+ gf_log(this->name, GF_LOG_ERROR, "Couldn't find volinfo");
+ goto out;
}
- if (file_len) {
- rsp.spec = CALLOC (file_len+1, sizeof (char));
- if (!rsp.spec) {
- ret = -1;
- op_errno = ENOMEM;
- goto fail;
- }
- ret = read (spec_fd, rsp.spec, file_len);
+ glusterd_svc_build_gfproxyd_volfile_path(volinfo, path, path_len);
+ ret = 0;
+ goto out;
+ }
+
+ volid_ptr = strstr(volume_id, "shd/");
+ if (volid_ptr) {
+ volid_ptr = strchr(volid_ptr, '/');
+ if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+ volid_ptr++;
- close (spec_fd);
+ ret = glusterd_volinfo_find(volid_ptr, &volinfo);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Couldn't find volinfo for volid=%s", volid_ptr);
+ goto out;
}
- /* convert to XDR */
-fail:
- rsp.op_ret = ret;
+ glusterd_svc_build_shd_volfile_path(volinfo, path, path_len);
+
+ ret = glusterd_svc_set_shd_pidfile(volinfo, dict);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Couldn't set pidfile in dict for volid=%s", volid_ptr);
+ goto out;
+ }
+ ret = 0;
+ goto out;
+ }
+
+ volid_ptr = strstr(volume_id, "/snaps/");
+ if (volid_ptr) {
+ ret = get_snap_volname_and_volinfo(volid_ptr, &volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_INFO_FAIL,
+ "Failed to get snap"
+ " volinfo from path (%s)",
+ volume_id);
+ ret = -1;
+ goto out;
+ }
+
+ len = snprintf(path_prefix, sizeof(path_prefix), "%s/snaps/%s",
+ priv->workdir, volinfo->snapshot->snapname);
+ volid_ptr = volname;
+ /* this is to ensure that volname recvd from
+ get_snap_volname_and_volinfo is free'd */
+ free_ptr = volname;
+ if ((len < 0) || (len >= sizeof(path_prefix))) {
+ ret = -1;
+ goto out;
+ }
- if (op_errno)
- rsp.op_errno = gf_errno_to_error (op_errno);
- if (cookie)
- rsp.op_errno = cookie;
+ goto gotvolinfo;
+ }
- if (!rsp.spec)
- rsp.spec = strdup ("");
+ volid_ptr = strstr(volume_id, "rebalance/");
+ if (volid_ptr) {
+ volid_ptr = strchr(volid_ptr, '/');
+ if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+ volid_ptr++;
- glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gf_getspec_rsp);
- if (args.key)
- free (args.key);//malloced by xdr
- if (rsp.spec)
- free (rsp.spec);
+ ret = glusterd_volinfo_find(volid_ptr, &volinfo);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Couldn't find volinfo");
+ goto out;
+ }
+ glusterd_get_rebalance_volfile(volinfo, path, path_len);
+ ret = 0;
+ goto out;
+ }
+
+ volid_ptr = strstr(volume_id, "client_per_brick/");
+ if (volid_ptr) {
+ volid_ptr = strchr(volid_ptr, '/');
+ if (!volid_ptr) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+ volid_ptr++;
+
+ dup_volname = gf_strdup(volid_ptr);
+ if (!dup_volname) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "strdup failed");
+ ret = -1;
+ goto out;
+ }
- return 0;
+ /* Split the volume name */
+ vol = strtok_r(dup_volname, ".", &save_ptr);
+ if (!vol) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SPLIT_FAIL,
+ "Volume name=%s", dup_volname, NULL);
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_volinfo_find(vol, &volinfo);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Couldn't find volinfo");
+ goto out;
+ }
+ ret = glusterd_get_client_per_brick_volfile(volinfo, volid_ptr, path,
+ path_len);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY,
+ "failed to get volinfo path");
+ goto out;
+ }
+
+ ret = sys_access(path, F_OK);
+ goto out;
+ }
+
+ if (volume_id[0] == '/') {
+ /* Normal behavior */
+ volid_ptr = volume_id;
+ volid_ptr++;
+
+ } else {
+ /* Bringing in NFS like behavior for mount command, */
+ /* With this, one can mount a volume with below cmd */
+ /* bash# mount -t glusterfs server:/volume /mnt/pnt */
+ volid_ptr = volume_id;
+ }
+
+ len = snprintf(path_prefix, sizeof(path_prefix), "%s/vols", priv->workdir);
+ if ((len < 0) || (len >= sizeof(path_prefix))) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volid_ptr, &volinfo);
+
+ if (ret) {
+ dup_volname = gf_strdup(volid_ptr);
+ if (!dup_volname) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "Volume name=%s", volid_ptr, NULL);
+ ret = -1;
+ goto out;
+ }
+ /* Split the volume name */
+ vol = strtok_r(dup_volname, ".", &save_ptr);
+ if (!vol) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SPLIT_FAIL,
+ "Volume name=%s", dup_volname, NULL);
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_volinfo_find(vol, &volinfo);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL,
+ NULL);
+ goto out;
+ }
+ }
+
+gotvolinfo:
+ if (!glusterd_auth_get_username(volinfo))
+ trusted_str = NULL;
+
+ ret = snprintf(path, path_len, "%s/%s/%s.vol", path_prefix,
+ volinfo->volname, volid_ptr);
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ ret = sys_stat(path, &stbuf);
+
+ if ((ret == -1) && (errno == ENOENT)) {
+ if (snprintf(dup_volid, PATH_MAX, "%s", volid_ptr) >= PATH_MAX)
+ goto out;
+ if (!strchr(dup_volid, '.')) {
+ switch (volinfo->transport_type) {
+ case GF_TRANSPORT_TCP:
+ strcat(dup_volid, ".tcp");
+ break;
+ case GF_TRANSPORT_RDMA:
+ strcat(dup_volid, ".rdma");
+ break;
+ case GF_TRANSPORT_BOTH_TCP_RDMA:
+ strcat(dup_volid, ".tcp");
+ break;
+ default:
+ break;
+ }
+ }
+ snprintf(path, path_len, "%s/%s/%s%s-fuse.vol", path_prefix,
+ volinfo->volname, (trusted_str ? trusted_str : ""), dup_volid);
+ ret = sys_stat(path, &stbuf);
+ }
+out:
+ if (dup_volname)
+ GF_FREE(dup_volname);
+ if (free_ptr)
+ GF_FREE(free_ptr);
+ return ret;
}
+/* Get and store op-versions of the clients sending the getspec request
+ * Clients of versions <= 3.3, don't send op-versions, their op-versions are
+ * defaulted to 1. Also fetch brick_name.
+ */
int32_t
-server_event_notify (rpcsvc_request_t *req)
-{
- int32_t ret = -1;
- int32_t op_errno = 0;
- gf_event_notify_req args = {0,};
- gf_event_notify_rsp rsp = {0,};
- dict_t *dict = NULL;
- gf_boolean_t need_rsp = _gf_true;
-
- if (!xdr_to_generic (req->msg[0], &args,
- (xdrproc_t)xdr_gf_event_notify_req)) {
- req->rpc_err = GARBAGE_ARGS;
- goto fail;
- }
+glusterd_get_args_from_dict(gf_getspec_req *args, peer_info_t *peerinfo,
+ char **brick_name)
+{
+ dict_t *dict = NULL;
+ int client_max_op_version = 1;
+ int client_min_op_version = 1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ char *name = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(args);
+ GF_ASSERT(peerinfo);
+
+ if (!args->xdata.xdata_len) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
+ ret = 0;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(args->xdata.xdata_val, args->xdata.xdata_len, &dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "Failed to unserialize request dictionary");
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, "min-op-version", &client_min_op_version);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get client-min-op-version");
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, "max-op-version", &client_max_op_version);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get client-max-op-version");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "brick_name", &name);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "No brick name present");
+ ret = 0;
+ goto out;
+ }
+ *brick_name = gf_strdup(name);
+ if (*brick_name == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "Brick_name=%s", name, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0, "brick_name = %s", *brick_name);
+out:
+ peerinfo->max_op_version = client_max_op_version;
+ peerinfo->min_op_version = client_min_op_version;
- if (args.dict.dict_len) {
- dict = dict_new ();
- if (!dict)
- return ret;
- ret = dict_unserialize (args.dict.dict_val,
- args.dict.dict_len, &dict);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Failed to unserialize req");
- goto fail;
- }
+ if (dict)
+ dict_unref(dict);
+
+ return ret;
+}
+
+/* Given the missed_snapinfo and snap_opinfo take the
+ * missed lvm snapshot
+ */
+int32_t
+glusterd_create_missed_snap(glusterd_missed_snap_info *missed_snapinfo,
+ glusterd_snap_op_t *snap_opinfo)
+{
+ char *device = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int32_t ret = -1;
+ int32_t i = 0;
+ uuid_t snap_uuid = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ char *mnt_device = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(missed_snapinfo);
+ GF_ASSERT(snap_opinfo);
+
+ gf_uuid_parse(missed_snapinfo->snap_uuid, snap_uuid);
+
+ /* Find the snap-object */
+ snap = glusterd_find_snap_by_id(snap_uuid);
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND,
+ "Unable to find the snap with snap_uuid %s",
+ missed_snapinfo->snap_uuid);
+ ret = -1;
+ goto out;
+ }
+
+ /* Find the snap_vol */
+ cds_list_for_each_entry(volinfo, &snap->volumes, vol_list)
+ {
+ if (!strcmp(volinfo->volname, snap_opinfo->snap_vol_id)) {
+ snap_vol = volinfo;
+ break;
}
+ }
+
+ if (!snap_vol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Unable to find the snap_vol(%s) "
+ "for snap(%s)",
+ snap_opinfo->snap_vol_id, snap->snapname);
+ ret = -1;
+ goto out;
+ }
+
+ /* Find the missed brick in the snap volume */
+ cds_list_for_each_entry(brickinfo, &snap_vol->bricks, brick_list)
+ {
+ i++;
+ if (i == snap_opinfo->brick_num)
+ break;
+ }
+
+ if (brickinfo->snap_status != -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_STATUS_NOT_PENDING,
+ "The snap status of the missed "
+ "brick(%s) is not pending",
+ brickinfo->path);
+ goto out;
+ }
+
+ /* Fetch the device path */
+ mnt_device = glusterd_get_brick_mount_device(snap_opinfo->brick_path);
+ if (!mnt_device) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_GET_INFO_FAIL,
+ "Getting device name for the"
+ "brick %s:%s failed",
+ brickinfo->hostname, snap_opinfo->brick_path);
+ ret = -1;
+ goto out;
+ }
+
+ device = glusterd_build_snap_device_path(mnt_device, snap_vol->volname,
+ snap_opinfo->brick_num - 1);
+ if (!device) {
+ gf_msg(this->name, GF_LOG_ERROR, ENXIO,
+ GD_MSG_SNAP_DEVICE_NAME_GET_FAIL,
+ "cannot copy the snapshot "
+ "device name (volname: %s, snapname: %s)",
+ snap_vol->volname, snap->snapname);
+ ret = -1;
+ goto out;
+ }
+ if (snprintf(brickinfo->device_path, sizeof(brickinfo->device_path), "%s",
+ device) >= sizeof(brickinfo->device_path)) {
+ gf_msg(this->name, GF_LOG_ERROR, ENXIO,
+ GD_MSG_SNAP_DEVICE_NAME_GET_FAIL,
+ "cannot copy the device_path "
+ "(device_path: %s)",
+ brickinfo->device_path);
+ ret = -1;
+ goto out;
+ }
+
+ /* Update the backend file-system type of snap brick in
+ * snap volinfo. */
+ ret = glusterd_update_mntopts(snap_opinfo->brick_path, brickinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MOUNTOPTS_FAIL,
+ "Failed to update "
+ "mount options for %s brick",
+ brickinfo->path);
+ /* We should not fail snapshot operation if we fail to get
+ * the file-system type */
+ }
+
+ ret = glusterd_take_lvm_snapshot(brickinfo, snap_opinfo->brick_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_OP_FAILED,
+ "Failed to take snapshot of %s", snap_opinfo->brick_path);
+ goto out;
+ }
+
+ /* After the snapshot both the origin brick (LVM brick) and
+ * the snapshot brick will have the same file-system label. This
+ * will cause lot of problems at mount time. Therefore we must
+ * generate a new label for the snapshot brick
+ */
+ ret = glusterd_update_fs_label(brickinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_SET_INFO_FAIL,
+ "Failed to update "
+ "file-system label for %s brick",
+ brickinfo->path);
+ /* Failing to update label should not cause snapshot failure.
+ * Currently label is updated only for XFS and ext2/ext3/ext4
+ * file-system.
+ */
+ }
+
+ /* Create and mount the snap brick */
+ ret = glusterd_snap_brick_create(snap_vol, brickinfo,
+ snap_opinfo->brick_num - 1, 0);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_CREATION_FAIL,
+ "Failed to "
+ " create and mount the brick(%s) for the snap %s",
+ snap_opinfo->brick_path, snap_vol->snapshot->snapname);
+ goto out;
+ }
+
+ brickinfo->snap_status = 0;
+ ret = glusterd_brick_start(snap_vol, brickinfo, _gf_false, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_DISCONNECTED,
+ "starting the "
+ "brick %s:%s for the snap %s failed",
+ brickinfo->hostname, brickinfo->path, snap->snapname);
+ goto out;
+ }
+ ret = glusterd_store_volinfo(snap_vol, GLUSTERD_VOLINFO_VER_AC_NONE);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
+ "Failed to store snapshot "
+ "volinfo (%s) for snap %s",
+ snap_vol->volname, snap->snapname);
+ goto out;
+ }
- switch (args.op) {
- case GF_EN_DEFRAG_STATUS:
- gf_log ("", GF_LOG_INFO,
- "recieved defrag status updated");
- if (dict) {
- glusterd_defrag_event_notify_handle (dict);
- need_rsp = _gf_false;
+out:
+ if (mnt_device)
+ GF_FREE(mnt_device);
+ if (device)
+ GF_FREE(device);
+
+ return ret;
+}
+
+/* Look into missed_snap_list, to see it the given brick_name,
+ * has any missed snap creates for the local node */
+int32_t
+glusterd_take_missing_brick_snapshots(char *brick_name)
+{
+ char *my_node_uuid = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_missed_snap_info *missed_snapinfo = NULL;
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ int32_t ret = -1;
+ gf_boolean_t update_list = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(brick_name);
+
+ my_node_uuid = uuid_utoa(MY_UUID);
+
+ cds_list_for_each_entry(missed_snapinfo, &priv->missed_snaps_list,
+ missed_snaps)
+ {
+ /* If the missed snap op is not for the local node
+ * then continue
+ */
+ if (strcmp(my_node_uuid, missed_snapinfo->node_uuid))
+ continue;
+
+ cds_list_for_each_entry(snap_opinfo, &missed_snapinfo->snap_ops,
+ snap_ops_list)
+ {
+ /* Check if the missed snap's op is a create for
+ * the brick name in question
+ */
+ if ((snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE) &&
+ (!strcmp(brick_name, snap_opinfo->brick_path))) {
+ /* Perform a snap create if the
+ * op is still pending
+ */
+ if (snap_opinfo->status == GD_MISSED_SNAP_PENDING) {
+ ret = glusterd_create_missed_snap(missed_snapinfo,
+ snap_opinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_CREATE_FAIL,
+ "Failed to create "
+ "missed snap for %s",
+ brick_name);
+ /* At this stage, we will mark
+ * the entry as done. Because
+ * of the failure other
+ * snapshots will not be
+ * affected, and neither the
+ * brick. Only the current snap
+ * brick will always remain as
+ * pending.
+ */
+ }
+ snap_opinfo->status = GD_MISSED_SNAP_DONE;
+ update_list = _gf_true;
}
+ /* One snap-id won't have more than one missed
+ * create for the same brick path. Hence
+ * breaking in search of another missed create
+ * for the same brick path in the local node
+ */
break;
- default:
- gf_log ("", GF_LOG_ERROR, "Unkown op recieved in in event "
- "notify");
- ret = -1;
- break;
+ }
+ }
+ }
+
+ if (update_list == _gf_true) {
+ ret = glusterd_store_update_missed_snaps();
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
+ "Failed to update missed_snaps_list");
+ goto out;
}
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+/* Checks if the client supports the volume, ie. client can understand all the
+ * options in the volfile
+ */
+static gf_boolean_t
+_client_supports_volume(peer_info_t *peerinfo, int32_t *op_errno)
+{
+ gf_boolean_t ret = _gf_true;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(op_errno);
+
+ /* Only check when the volfile being requested is a volume. Not finding
+ * a volinfo implies that the volfile requested for is not of a gluster
+ * volume. A non volume volfile is requested by the local gluster
+ * services like shd and nfs-server. These need not be checked as they
+ * will be running at the same op-version as glusterd and will be able
+ * to support all the features
+ */
+ if ((glusterd_volinfo_find(peerinfo->volname, &volinfo) == 0) &&
+ ((peerinfo->min_op_version > volinfo->client_op_version) ||
+ (peerinfo->max_op_version < volinfo->client_op_version))) {
+ ret = _gf_false;
+ *op_errno = ENOTSUP;
+ gf_msg("glusterd", GF_LOG_INFO, ENOTSUP, GD_MSG_UNSUPPORTED_VERSION,
+ "Client %s (%d -> %d) doesn't support required "
+ "op-version (%d). Rejecting volfile request.",
+ peerinfo->identifier, peerinfo->min_op_version,
+ peerinfo->max_op_version, volinfo->client_op_version);
+ }
+
+ return ret;
+}
+
+int
+__server_getspec(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ int32_t spec_fd = -1;
+ size_t file_len = 0;
+ char filename[PATH_MAX] = {
+ 0,
+ };
+ struct stat stbuf = {
+ 0,
+ };
+ char *brick_name = NULL;
+ char *volume = NULL;
+ char *tmp = NULL;
+ rpc_transport_t *trans = NULL;
+ gf_getspec_req args = {
+ 0,
+ };
+ gf_getspec_rsp rsp = {
+ 0,
+ };
+ char addrstr[RPCSVC_PEER_STRLEN] = {0};
+ peer_info_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ dict_t *dict = NULL;
+ glusterd_peerinfo_t *peer = NULL;
+ glusterd_conf_t *conf = NULL;
+ int peer_cnt = 0;
+ char *peer_hosts = NULL;
+ char *tmp_str = NULL;
+ char portstr[10] = {
+ 0,
+ };
+ int len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ conf = this->private;
+ ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_gf_getspec_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode the message");
+ goto fail;
+ }
+
+ peerinfo = &req->trans->peerinfo;
+
+ volume = args.key;
+
+ if (strlen(volume) >= (NAME_MAX)) {
+ op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_NAME_TOO_LONG,
+ "volume name too long (%s)", volume);
+ goto fail;
+ }
+
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_MOUNT_REQ_RCVD,
+ "Received mount request for volume %s", volume);
+
+ /* Need to strip leading '/' from volnames. This was introduced to
+ * support nfs style mount parameters for native gluster mount
+ */
+ if (volume[0] == '/')
+ ret = snprintf(peerinfo->volname, sizeof(peerinfo->volname), "%s",
+ &volume[1]);
+ else
+ ret = snprintf(peerinfo->volname, sizeof(peerinfo->volname), "%s",
+ volume);
+ if (ret < 0 || ret >= sizeof(peerinfo->volname)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "peerinfo->volname %s truncated or error occurred: "
+ "(ret: %d)",
+ peerinfo->volname, ret);
+ ret = -1;
+ goto fail;
+ }
+
+ ret = glusterd_get_args_from_dict(&args, peerinfo, &brick_name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get args from dict");
+ goto fail;
+ }
+
+ if (!_client_supports_volume(peerinfo, &op_errno)) {
+ ret = -1;
+ goto fail;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ trans = req->trans;
+ /* addrstr will be empty for cli socket connections */
+ ret = rpcsvc_transport_peername(trans, (char *)&addrstr, sizeof(addrstr));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_RPC_TRANSPORT_GET_PEERNAME_FAIL,
+ "Failed to get the peername");
+ goto fail;
+ }
+
+ tmp = strrchr(addrstr, ':');
+ if (tmp)
+ *tmp = '\0';
+
+ /* The trusted volfiles are given to the glusterd owned process like NFS
+ * server, self-heal daemon etc., so that they are not inadvertently
+ * blocked by a auth.{allow,reject} setting. The trusted volfile is not
+ * meant for external users.
+ * For unix domain socket, address will be empty.
+ */
+ if (strlen(addrstr) == 0 || gf_is_local_addr(addrstr)) {
+ ret = build_volfile_path(volume, filename, sizeof(filename),
+ TRUSTED_PREFIX, dict);
+ } else {
+ ret = build_volfile_path(volume, filename, sizeof(filename), NULL,
+ dict);
+ }
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peer, &conf->peers, uuid_list)
+ {
+ if (!peer->connected)
+ continue;
+ if (!peer_hosts) {
+ if (peer->port) {
+ snprintf(portstr, sizeof(portstr), "%d", peer->port);
+ } else {
+ snprintf(portstr, sizeof(portstr), "%d", GLUSTERD_DEFAULT_PORT);
+ }
+ len = strlen(peer->hostname) + strlen(portstr) + 3;
+ tmp_str = GF_CALLOC(1, len, gf_gld_mt_char);
+ snprintf(tmp_str, len, "%s%s%s%s", peer->hostname, ":", portstr,
+ " ");
+ peer_hosts = tmp_str;
+ } else {
+ len = strlen(peer_hosts) + strlen(peer->hostname) +
+ strlen(portstr) + 3;
+ tmp_str = GF_CALLOC(1, len, gf_gld_mt_char);
+ snprintf(tmp_str, len, "%s%s%s%s%s", peer_hosts, peer->hostname,
+ ":", portstr, " ");
+ GF_FREE(peer_hosts);
+ peer_hosts = tmp_str;
+ }
+ peer_cnt++;
+ }
+ RCU_READ_UNLOCK;
+ if (peer_cnt) {
+ op_ret = dict_set_str(dict, GLUSTERD_BRICK_SERVERS, peer_hosts);
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set peer_host in dict");
+ ret = op_ret;
+ goto fail;
+ }
+ }
+
+ if (ret == 0) {
+ if (dict->count > 0) {
+ ret = dict_allocate_and_serialize(dict, &rsp.xdata.xdata_val,
+ &rsp.xdata.xdata_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto fail;
+ }
+ }
+
+ /* to allocate the proper buffer to hold the file data */
+ ret = sys_stat(filename, &stbuf);
+ if (ret < 0) {
+ gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Unable to stat %s (%s)", filename, strerror(errno));
+ goto fail;
+ }
+
+ spec_fd = open(filename, O_RDONLY);
+ if (spec_fd < 0) {
+ gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Unable to open %s (%s)", filename, strerror(errno));
+ goto fail;
+ }
+ ret = file_len = stbuf.st_size;
+ } else {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND, NULL);
+ op_errno = ENOENT;
+ goto fail;
+ }
+
+ if (file_len) {
+ rsp.spec = CALLOC(file_len + 1, sizeof(char));
+ if (!rsp.spec) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ ret = -1;
+ op_errno = ENOMEM;
+ goto fail;
+ }
+ ret = sys_read(spec_fd, rsp.spec, file_len);
+ }
+
+ if (brick_name) {
+ gf_msg_debug(this->name, 0, "Look for missing snap creates for %s",
+ brick_name);
+ op_ret = glusterd_take_missing_brick_snapshots(brick_name);
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_CREATE_FAIL,
+ "Failed to take missing brick snapshots");
+ ret = -1;
+ goto fail;
+ }
+ }
+ /* convert to XDR */
fail:
- rsp.op_ret = ret;
-
- if (op_errno)
- rsp.op_errno = gf_errno_to_error (op_errno);
-
- if (need_rsp)
- glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_gf_event_notify_rsp);
- if (dict)
- dict_unref (dict);
- if (args.dict.dict_val)
- free (args.dict.dict_val);//malloced by xdr
-
- return 0;
-}
-rpcsvc_actor_t gluster_handshake_actors[] = {
- [GF_HNDSK_NULL] = {"NULL", GF_HNDSK_NULL, NULL, NULL, NULL, 0},
- [GF_HNDSK_GETSPEC] = {"GETSPEC", GF_HNDSK_GETSPEC, server_getspec, NULL, NULL, 0},
- [GF_HNDSK_EVENT_NOTIFY] = {"EVENTNOTIFY", GF_HNDSK_EVENT_NOTIFY, server_event_notify,
- NULL, NULL, 0},
-};
+ if (spec_fd >= 0)
+ sys_close(spec_fd);
+ GF_FREE(brick_name);
-struct rpcsvc_program gluster_handshake_prog = {
- .progname = "GlusterFS Handshake",
- .prognum = GLUSTER_HNDSK_PROGRAM,
- .progver = GLUSTER_HNDSK_VERSION,
- .actors = gluster_handshake_actors,
- .numactors = GF_HNDSK_MAXVALUE,
-};
+ rsp.op_ret = ret;
+ if (rsp.op_ret < 0)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MOUNT_REQ_FAIL,
+ "Failed to mount the volume");
-char *glusterd_dump_proc[GF_DUMP_MAXVALUE] = {
- [GF_DUMP_NULL] = "NULL",
- [GF_DUMP_DUMP] = "DUMP",
-};
+ if (op_errno)
+ rsp.op_errno = gf_errno_to_error(op_errno);
-rpc_clnt_prog_t glusterd_dump_prog = {
- .progname = "GLUSTERD-DUMP",
- .prognum = GLUSTER_DUMP_PROGRAM,
- .progver = GLUSTER_DUMP_VERSION,
- .procnames = glusterd_dump_proc,
-};
+ if (!rsp.spec)
+ rsp.spec = strdup("");
-static int
-glusterd_event_connected_inject (glusterd_peerctx_t *peerctx)
-{
- GF_ASSERT (peerctx);
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_getspec_rsp);
+ free(args.key); // malloced by xdr
+ free(rsp.spec);
+
+ if (peer_hosts)
+ GF_FREE(peer_hosts);
+ if (dict)
+ dict_unref(dict);
- glusterd_friend_sm_event_t *event = NULL;
- glusterd_probe_ctx_t *ctx = NULL;
- int ret = -1;
- glusterd_peerinfo_t *peerinfo = NULL;
+ if (args.xdata.xdata_val)
+ free(args.xdata.xdata_val);
+ if (rsp.xdata.xdata_val)
+ GF_FREE(rsp.xdata.xdata_val);
+
+ return 0;
+}
- ret = glusterd_friend_sm_new_event
- (GD_FRIEND_EVENT_CONNECTED, &event);
+int
+server_getspec(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __server_getspec);
+}
+int32_t
+__server_event_notify(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_event_notify_req args = {
+ 0,
+ };
+ gf_event_notify_rsp rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ gf_boolean_t need_rsp = _gf_true;
+
+ ret = xdr_to_generic(req->msg[0], &args,
+ (xdrproc_t)xdr_gf_event_notify_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto fail;
+ }
+
+ if (args.dict.dict_len) {
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ return ret;
+ }
+ ret = dict_unserialize(args.dict.dict_val, args.dict.dict_len, &dict);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get new event");
- goto out;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "Failed to unserialize req");
+ goto fail;
}
+ }
- ctx = GF_CALLOC (1, sizeof(*ctx), gf_gld_mt_probe_ctx_t);
+ switch (args.op) {
+ case GF_EN_DEFRAG_STATUS:
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_DEFRAG_STATUS_UPDATED,
+ "received defrag status updated");
+ if (dict) {
+ glusterd_defrag_event_notify_handle(dict);
+ need_rsp = _gf_false;
+ }
+ break;
+ default:
+ gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_OP_UNSUPPORTED,
+ "Unknown op received in event "
+ "notify");
+ gf_event(EVENT_NOTIFY_UNKNOWN_OP, "op=%d", args.op);
+ ret = -1;
+ break;
+ }
- if (!ctx) {
- ret = -1;
- gf_log ("", GF_LOG_ERROR, "Memory not available");
- goto out;
- }
+fail:
+ rsp.op_ret = ret;
- peerinfo = peerctx->peerinfo;
- ctx->hostname = gf_strdup (peerinfo->hostname);
- ctx->port = peerinfo->port;
- ctx->req = peerctx->args.req;
+ if (need_rsp)
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_event_notify_rsp);
+ if (dict)
+ dict_unref(dict);
+ free(args.dict.dict_val); // malloced by xdr
- event->peerinfo = peerinfo;
- event->ctx = ctx;
+ return 0;
+}
- ret = glusterd_friend_sm_inject_event (event);
+int32_t
+server_event_notify(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __server_event_notify);
+}
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject "
- "EVENT_CONNECTED ret = %d", ret);
- goto out;
+int
+gd_validate_cluster_op_version(xlator_t *this, int cluster_op_version,
+ char *peerid)
+{
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+
+ conf = this->private;
+
+ if (cluster_op_version > GD_OP_VERSION_MAX) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERSION_MISMATCH,
+ "operating version %d is more than the maximum "
+ "supported (%d) on the machine (as per peer request "
+ "from %s)",
+ cluster_op_version, GD_OP_VERSION_MAX, peerid);
+ goto out;
+ }
+
+ /* The peer can only reduce its op-version when it doesn't have any
+ * volumes. Reducing op-version when it already contains volumes can
+ * lead to inconsistencies in the cluster
+ */
+ if ((cluster_op_version < conf->op_version) &&
+ !cds_list_empty(&conf->volumes)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERS_ADJUST_FAIL,
+ "cannot reduce operating version to %d from current "
+ "version %d as volumes exist (as per peer request from "
+ "%s)",
+ cluster_op_version, conf->op_version, peerid);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Validate if glusterd can serve the management handshake request
+ *
+ * Requests are allowed if,
+ * - glusterd has no peers & no volumes, or
+ * - the request came from a known peer
+ * A known peer is identified using the following steps
+ * - the dict is checked for a peer uuid, which if present is matched with the
+ * peer list, else
+ * - the incoming request address is matched with the peer list
+ */
+gf_boolean_t
+gd_validate_mgmt_hndsk_req(rpcsvc_request_t *req, dict_t *dict)
+{
+ int ret = -1;
+ char hostname[UNIX_PATH_MAX + 1] = {
+ 0,
+ };
+ glusterd_peerinfo_t *peer = NULL;
+ xlator_t *this = NULL;
+ char *uuid_str = NULL;
+ uuid_t peer_uuid = {
+ 0,
+ };
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!glusterd_have_peers() && !glusterd_have_volumes())
+ return _gf_true;
+
+ ret = dict_get_str(dict, GD_PEER_ID_KEY, &uuid_str);
+ /* Try to match uuid only if available, don't fail as older peers will
+ * not send a uuid
+ */
+ if (!ret) {
+ gf_uuid_parse(uuid_str, peer_uuid);
+ RCU_READ_LOCK;
+ ret = (glusterd_peerinfo_find(peer_uuid, NULL) != NULL);
+ RCU_READ_UNLOCK;
+ if (ret)
+ return _gf_true;
+ }
+
+ /* If you cannot get the hostname, you cannot authenticate */
+ ret = glusterd_remote_hostname_get(req, hostname, sizeof(hostname));
+ if (ret)
+ return _gf_false;
+
+ /* If peer object is not found it indicates that request is from an
+ * unknown peer, if its found, validate whether its uuid is also
+ * available in the peerinfo list. There could be a case where hostname
+ * is available in the peerinfo list but the uuid has changed of the
+ * node due to a reinstall, in that case the validation should fail!
+ */
+ RCU_READ_LOCK;
+ if (!uuid_str) {
+ ret = (glusterd_peerinfo_find(NULL, hostname) == NULL);
+ } else {
+ peer = glusterd_peerinfo_find(NULL, hostname);
+ if (!peer) {
+ ret = -1;
+ } else if (peer && glusterd_peerinfo_find(peer_uuid, NULL) != NULL) {
+ ret = 0;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HANDSHAKE_REQ_REJECTED,
+ "Request from "
+ "peer %s has an entry in peerinfo, but uuid "
+ "does not match",
+ req->trans->peerinfo.identifier);
+ ret = -1;
}
+ }
+ RCU_READ_UNLOCK;
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HANDSHAKE_REQ_REJECTED,
+ "Rejecting management "
+ "handshake request from unknown peer %s",
+ req->trans->peerinfo.identifier);
+ gf_event(EVENT_PEER_REJECT, "peer=%s", req->trans->peerinfo.identifier);
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+int
+__glusterd_mgmt_hndsk_versions(rpcsvc_request_t *req)
+{
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+ gf_mgmt_hndsk_req args = {
+ {
+ 0,
+ },
+ };
+ gf_mgmt_hndsk_rsp rsp = {
+ 0,
+ };
+ dict_t *args_dict = NULL;
+
+ this = THIS;
+ conf = this->private;
+
+ ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_gf_mgmt_hndsk_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE(this, args_dict, args.hndsk.hndsk_val,
+ (args.hndsk.hndsk_len), ret, op_errno, out);
+
+ /* Check if we can service the request */
+ if (!gd_validate_mgmt_hndsk_req(req, args_dict)) {
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, GD_OP_VERSION_KEY, conf->op_version);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set operating version");
+ rsp.op_ret = ret;
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, GD_MIN_OP_VERSION_KEY, GD_OP_VERSION_MIN);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set %s", GD_MIN_OP_VERSION_KEY);
+ rsp.op_ret = ret;
+ goto out;
+ }
+
+ ret = dict_set_int32(dict, GD_MAX_OP_VERSION_KEY, GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set %s", GD_MAX_OP_VERSION_KEY);
+ rsp.op_ret = ret;
+ goto out;
+ }
+
+ ret = 0;
+
+ GF_PROTOCOL_DICT_SERIALIZE(this, dict, (&rsp.hndsk.hndsk_val),
+ rsp.hndsk.hndsk_len, op_errno, out);
out:
- gf_log ("", GF_LOG_DEBUG, "returning %d", ret);
- return ret;
+
+ rsp.op_ret = ret;
+ rsp.op_errno = op_errno;
+
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_mgmt_hndsk_rsp);
+
+ ret = 0;
+
+ if (dict)
+ dict_unref(dict);
+
+ if (args.hndsk.hndsk_val)
+ free(args.hndsk.hndsk_val);
+
+ if (rsp.hndsk.hndsk_val)
+ GF_FREE(rsp.hndsk.hndsk_val);
+
+ if (args_dict)
+ dict_unref(args_dict);
+
+ return ret;
}
int
-glusterd_set_clnt_mgmt_program (glusterd_peerinfo_t *peerinfo,
- gf_prog_detail *prog)
+glusterd_mgmt_hndsk_versions(rpcsvc_request_t *req)
{
- gf_prog_detail *trav = NULL;
- int ret = -1;
+ return glusterd_big_locked_handler(req, __glusterd_mgmt_hndsk_versions);
+}
- if (!peerinfo || !prog)
- goto out;
+int
+__glusterd_mgmt_hndsk_versions_ack(rpcsvc_request_t *req)
+{
+ dict_t *clnt_dict = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+ int op_errno = EINVAL;
+ int peer_op_version = 0;
+ gf_mgmt_hndsk_req args = {
+ {
+ 0,
+ },
+ };
+ gf_mgmt_hndsk_rsp rsp = {
+ 0,
+ };
+
+ this = THIS;
+ conf = this->private;
+
+ ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_gf_mgmt_hndsk_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE(this, clnt_dict, args.hndsk.hndsk_val,
+ (args.hndsk.hndsk_len), ret, op_errno, out);
+
+ ret = dict_get_int32(clnt_dict, GD_OP_VERSION_KEY, &peer_op_version);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to get the op-version key peer=%s",
+ req->trans->peerinfo.identifier);
+ goto out;
+ }
+
+ ret = gd_validate_cluster_op_version(this, peer_op_version,
+ req->trans->peerinfo.identifier);
+ if (ret)
+ goto out;
+
+ /* As this is ACK from the Cluster for the versions supported,
+ can set the op-version of 'this' glusterd to the one
+ received. */
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_VERS_INFO,
+ "using the op-version %d", peer_op_version);
+ conf->op_version = peer_op_version;
+ ret = glusterd_store_global_info(this);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLOBAL_OP_VERSION_SET_FAIL,
+ "Failed to store op-version");
- trav = prog;
+out:
+ rsp.op_ret = ret;
+ rsp.op_errno = op_errno;
- while (trav) {
- /* Select 'programs' */
- if ((gd_mgmt_prog.prognum == trav->prognum) &&
- (gd_mgmt_prog.progver == trav->progver)) {
- peerinfo->mgmt = &gd_mgmt_prog;
- ret = 0;
- }
- if ((gd_peer_prog.prognum == trav->prognum) &&
- (gd_peer_prog.progver == trav->progver)) {
- peerinfo->peer = &gd_peer_prog;
- ret = 0;
- }
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "%s (%"PRId64":%"PRId64") not supported",
- trav->progname, trav->prognum,
- trav->progver);
- }
- trav = trav->next;
- }
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_mgmt_hndsk_rsp);
- if (peerinfo->mgmt) {
- gf_log ("", GF_LOG_INFO,
- "Using Program %s, Num (%d), Version (%d)",
- peerinfo->mgmt->progname, peerinfo->mgmt->prognum,
- peerinfo->mgmt->progver);
- }
- if (peerinfo->peer) {
- gf_log ("", GF_LOG_INFO,
- "Using Program %s, Num (%d), Version (%d)",
- peerinfo->peer->progname, peerinfo->peer->prognum,
- peerinfo->peer->progver);
- }
+ ret = 0;
-out:
- return ret;
+ if (clnt_dict)
+ dict_unref(clnt_dict);
+
+ if (args.hndsk.hndsk_val)
+ free(args.hndsk.hndsk_val);
+
+ return ret;
}
int
-glusterd_peer_dump_version_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+glusterd_mgmt_hndsk_versions_ack(rpcsvc_request_t *req)
{
- int ret = -1;
- gf_dump_rsp rsp = {0,};
- xlator_t *this = NULL;
- gf_prog_detail *trav = NULL;
- gf_prog_detail *next = NULL;
- call_frame_t *frame = NULL;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_peerctx_t *peerctx = NULL;
- char msg[1024] = {0,};
-
- this = THIS;
- frame = myframe;
- peerctx = frame->local;
- peerinfo = peerctx->peerinfo;
-
- if (-1 == req->rpc_status) {
- snprintf (msg, sizeof (msg),
- "Error through RPC layer, retry again later");
- gf_log ("", GF_LOG_ERROR, "%s", msg);
- peerctx->errstr = gf_strdup (msg);
- goto out;
+ return glusterd_big_locked_handler(req, __glusterd_mgmt_hndsk_versions_ack);
+}
+
+int
+__server_get_volume_info(rpcsvc_request_t *req)
+{
+ int ret = -1;
+ int32_t op_errno = ENOENT;
+ gf_get_volume_info_req vol_info_req = {{
+ 0,
+ }};
+ gf_get_volume_info_rsp vol_info_rsp = {
+ 0,
+ };
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ dict_t *dict = NULL;
+ dict_t *dict_rsp = NULL;
+ char *volume_id_str = NULL;
+ int32_t flags = 0;
+
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &vol_info_req,
+ (xdrproc_t)xdr_gf_get_volume_info_req);
+ if (ret < 0) {
+ /* failed to decode msg */
+ req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto out;
+ }
+ gf_smsg(this->name, GF_LOG_INFO, 0, GD_MSG_VOL_INFO_REQ_RECVD, NULL);
+
+ if (vol_info_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gf_dump_rsp);
+ ret = dict_unserialize(vol_info_req.dict.dict_val,
+ vol_info_req.dict.dict_len, &dict);
if (ret < 0) {
- snprintf (msg, sizeof (msg), "Failed to decode XDR");
- gf_log ("", GF_LOG_ERROR, "%s", msg);
- peerctx->errstr = gf_strdup (msg);
- goto out;
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ } else {
+ dict->extra_stdfree = vol_info_req.dict.dict_val;
}
- if (-1 == rsp.op_ret) {
- snprintf (msg, sizeof (msg),
- "Failed to get the 'versions' from remote server");
- gf_log (frame->this->name, GF_LOG_ERROR, "%s", msg);
- peerctx->errstr = gf_strdup (msg);
- goto out;
+ }
+
+ ret = dict_get_int32(dict, "flags", &flags);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=flags", NULL);
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ if (!flags) {
+ /* Nothing to query about. Just return success */
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_FLAG_SET, NULL);
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
+ op_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL,
+ "Volname=%s", volname, NULL);
+ op_errno = EINVAL;
+ ret = -1;
+ goto out;
+ }
+
+ if (flags & (int32_t)GF_GET_VOLUME_UUID) {
+ volume_id_str = gf_strdup(uuid_utoa(volinfo->volume_id));
+ if (!volume_id_str) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ NULL);
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
}
- /* Make sure we assign the proper program to peer */
- ret = glusterd_set_clnt_mgmt_program (peerinfo, rsp.prog);
+ dict_rsp = dict_new();
+ if (!dict_rsp) {
+ gf_smsg(this->name, GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ op_errno = ENOMEM;
+ GF_FREE(volume_id_str);
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstr(dict_rsp, "volume_id", volume_id_str);
if (ret) {
- gf_log ("", GF_LOG_WARNING, "failed to set the mgmt program");
- goto out;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=volume_id", NULL);
+ op_errno = -ret;
+ ret = -1;
+ goto out;
}
+ }
+ ret = dict_allocate_and_serialize(dict_rsp, &vol_info_rsp.dict.dict_val,
+ &vol_info_rsp.dict.dict_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
- ret = default_notify (this, GF_EVENT_CHILD_UP, NULL);
+out:
+ vol_info_rsp.op_ret = ret;
+ vol_info_rsp.op_errno = op_errno;
+ vol_info_rsp.op_errstr = "";
+ glusterd_submit_reply(req, &vol_info_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_get_volume_info_rsp);
+ ret = 0;
+
+ if (dict) {
+ dict_unref(dict);
+ }
+
+ if (dict_rsp) {
+ dict_unref(dict_rsp);
+ }
+
+ if (vol_info_rsp.dict.dict_val) {
+ GF_FREE(vol_info_rsp.dict.dict_val);
+ }
+ return ret;
+}
+
+int
+server_get_volume_info(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __server_get_volume_info);
+}
- if (GD_MODE_ON == peerctx->args.mode) {
- ret = glusterd_event_connected_inject (peerctx);
- peerctx->args.req = NULL;
- } else if (GD_MODE_SWITCH_ON == peerctx->args.mode) {
- peerctx->args.mode = GD_MODE_ON;
+/*
+ * glusterd function to get the list of snapshot names and uuids
+ */
+int
+__server_get_snap_info(rpcsvc_request_t *req)
+{
+ int ret = -1;
+ int op_errno = ENOENT;
+ gf_getsnap_name_uuid_req snap_info_req = {{
+ 0,
+ }};
+ gf_getsnap_name_uuid_rsp snap_info_rsp = {
+ 0,
+ };
+ dict_t *dict = NULL;
+ dict_t *dict_rsp = NULL;
+ char *volname = NULL;
+
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &snap_info_req,
+ (xdrproc_t)xdr_gf_getsnap_name_uuid_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode management handshake response");
+ goto out;
+ }
+
+ if (snap_info_req.dict.dict_len) {
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(snap_info_req.dict.dict_val,
+ snap_info_req.dict.dict_len, &dict);
+ if (ret < 0) {
+ gf_msg("glusterd", GF_LOG_ERROR, EINVAL,
+ GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "Failed to unserialize dictionary");
+ op_errno = EINVAL;
+ ret = -1;
+ goto out;
} else {
- gf_log ("", GF_LOG_WARNING, "unknown mode %d",
- peerctx->args.mode);
+ dict->extra_stdfree = snap_info_req.dict.dict_val;
}
+ }
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ op_errno = EINVAL;
+ gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_DICT_GET_FAILED,
+ "Failed to retrieve volname");
+ ret = -1;
+ goto out;
+ }
+
+ dict_rsp = dict_new();
+ if (!dict_rsp) {
+ gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ op_errno = ENOMEM;
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_snapshot_get_volnames_uuids(dict_rsp, volname,
+ &snap_info_rsp);
+
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Error getting snapshot volume names and uuids : %s", volname);
+ op_errno = EINVAL;
+ }
- glusterd_friend_sm ();
- glusterd_op_sm ();
+out:
+ snap_info_rsp.op_ret = ret;
+ snap_info_rsp.op_errno = op_errno;
+ snap_info_rsp.op_errstr = "";
+ glusterd_submit_reply(req, &snap_info_rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gf_getsnap_name_uuid_rsp);
+
+ if (dict) {
+ dict_unref(dict);
+ }
+
+ if (dict_rsp) {
+ dict_unref(dict_rsp);
+ }
+
+ if (snap_info_rsp.dict.dict_val) {
+ GF_FREE(snap_info_rsp.dict.dict_val);
+ }
+
+ return 0;
+}
+
+int
+server_get_snap_info(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __server_get_snap_info);
+}
+
+static rpcsvc_actor_t gluster_handshake_actors[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_NULL] = {"NULL", NULL, NULL, GF_HNDSK_NULL, DRC_NA, 0},
+ [GF_HNDSK_GETSPEC] = {"GETSPEC", server_getspec, NULL, GF_HNDSK_GETSPEC,
+ DRC_NA, 0},
+ [GF_HNDSK_EVENT_NOTIFY] = {"EVENTNOTIFY", server_event_notify, NULL,
+ GF_HNDSK_EVENT_NOTIFY, DRC_NA, 0},
+ [GF_HNDSK_GET_VOLUME_INFO] = {"GETVOLUMEINFO", server_get_volume_info, NULL,
+ GF_HNDSK_GET_VOLUME_INFO, DRC_NA, 0},
+ [GF_HNDSK_GET_SNAPSHOT_INFO] = {"GETSNAPINFO", server_get_snap_info, NULL,
+ GF_HNDSK_GET_SNAPSHOT_INFO, DRC_NA, 0},
+};
+
+struct rpcsvc_program gluster_handshake_prog = {
+ .progname = "Gluster Handshake",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .actors = gluster_handshake_actors,
+ .numactors = GF_HNDSK_MAXVALUE,
+};
+
+/* A minimal RPC program just for the cli getspec command */
+static rpcsvc_actor_t gluster_cli_getspec_actors[GF_HNDSK_MAXVALUE] = {
+ [GF_HNDSK_GETSPEC] = {"GETSPEC", server_getspec, NULL, GF_HNDSK_GETSPEC,
+ DRC_NA, 0},
+};
+
+struct rpcsvc_program gluster_cli_getspec_prog = {
+ .progname = "Gluster Handshake (CLI Getspec)",
+ .prognum = GLUSTER_HNDSK_PROGRAM,
+ .progver = GLUSTER_HNDSK_VERSION,
+ .actors = gluster_cli_getspec_actors,
+ .numactors = GF_HNDSK_MAXVALUE,
+};
+
+static char *glusterd_dump_proc[GF_DUMP_MAXVALUE] = {
+ [GF_DUMP_NULL] = "NULL",
+ [GF_DUMP_DUMP] = "DUMP",
+ [GF_DUMP_PING] = "PING",
+};
+
+static rpc_clnt_prog_t glusterd_dump_prog = {
+ .progname = "GLUSTERD-DUMP",
+ .prognum = GLUSTER_DUMP_PROGRAM,
+ .progver = GLUSTER_DUMP_VERSION,
+ .procnames = glusterd_dump_proc,
+};
+
+static rpcsvc_actor_t glusterd_mgmt_hndsk_actors[GD_MGMT_HNDSK_MAXVALUE] = {
+ [GD_MGMT_HNDSK_NULL] = {"NULL", NULL, NULL, GD_MGMT_HNDSK_NULL, DRC_NA, 0},
+ [GD_MGMT_HNDSK_VERSIONS] = {"MGMT-VERS", glusterd_mgmt_hndsk_versions, NULL,
+ GD_MGMT_HNDSK_VERSIONS, DRC_NA, 0},
+ [GD_MGMT_HNDSK_VERSIONS_ACK] = {"MGMT-VERS-ACK",
+ glusterd_mgmt_hndsk_versions_ack, NULL,
+ GD_MGMT_HNDSK_VERSIONS_ACK, DRC_NA, 0},
+};
+
+struct rpcsvc_program glusterd_mgmt_hndsk_prog = {
+ .progname = "Gluster MGMT Handshake",
+ .prognum = GD_MGMT_HNDSK_PROGRAM,
+ .progver = GD_MGMT_HNDSK_VERSION,
+ .actors = glusterd_mgmt_hndsk_actors,
+ .numactors = GD_MGMT_HNDSK_MAXVALUE,
+};
+
+static char *glusterd_mgmt_hndsk_proc[GD_MGMT_HNDSK_MAXVALUE] = {
+ [GD_MGMT_HNDSK_NULL] = "NULL",
+ [GD_MGMT_HNDSK_VERSIONS] = "MGMT-VERS",
+ [GD_MGMT_HNDSK_VERSIONS_ACK] = "MGMT-VERS-ACK",
+};
+
+static rpc_clnt_prog_t gd_clnt_mgmt_hndsk_prog = {
+ .progname = "Gluster MGMT Handshake",
+ .prognum = GD_MGMT_HNDSK_PROGRAM,
+ .progver = GD_MGMT_HNDSK_VERSION,
+ .procnames = glusterd_mgmt_hndsk_proc,
+};
+
+static int
+glusterd_event_connected_inject(glusterd_peerctx_t *peerctx)
+{
+ GF_ASSERT(peerctx);
+
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_probe_ctx_t *ctx = NULL;
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_CONNECTED, &event);
+
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL,
+ "Unable to get new event");
+ goto out;
+ }
+
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_probe_ctx_t);
+
+ if (!ctx) {
+ ret = -1;
+ gf_msg("glusterd", GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Memory not available");
+ goto out;
+ }
+
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
+ if (!peerinfo) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
+ "Could not find peer %s(%s)", peerctx->peername,
+ uuid_utoa(peerctx->peerid));
+ GF_FREE(ctx);
+ goto out;
+ }
+ ctx->hostname = gf_strdup(peerinfo->hostname);
+ ctx->port = peerinfo->port;
+ ctx->req = peerctx->args.req;
+ ctx->dict = peerctx->args.dict;
+
+ event->peername = gf_strdup(peerinfo->hostname);
+ gf_uuid_copy(event->peerid, peerinfo->uuid);
+ event->ctx = ctx;
+
+ ret = glusterd_friend_sm_inject_event(event);
+
+ RCU_READ_UNLOCK;
+
+ if (ret)
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL,
+ "Unable to inject "
+ "EVENT_CONNECTED ret = %d",
+ ret);
- ret = 0;
out:
+ gf_msg_debug("glusterd", 0, "returning %d", ret);
+ return ret;
+}
- /* don't use GF_FREE, buffer was allocated by libc */
- if (rsp.prog) {
- trav = rsp.prog;
- while (trav) {
- next = trav->next;
- free (trav->progname);
- free (trav);
- trav = next;
- }
- }
+int
+gd_validate_peer_op_version(xlator_t *this, glusterd_peerinfo_t *peerinfo,
+ dict_t *dict, char **errstr)
+{
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ int32_t peer_op_version = 0;
+ int32_t peer_min_op_version = 0;
+ int32_t peer_max_op_version = 0;
+
+ if (!dict) {
+ gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ goto out;
+ }
+
+ if (!this) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_XLATOR_NOT_DEFINED,
+ NULL);
+ goto out;
+ }
+
+ if (!peerinfo) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
+ goto out;
+ }
+
+ conf = this->private;
+
+ ret = dict_get_int32(dict, GD_OP_VERSION_KEY, &peer_op_version);
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", GD_OP_VERSION_KEY, NULL);
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, GD_MAX_OP_VERSION_KEY, &peer_max_op_version);
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", GD_MAX_OP_VERSION_KEY, NULL);
+ goto out;
+ }
+
+ ret = dict_get_int32(dict, GD_MIN_OP_VERSION_KEY, &peer_min_op_version);
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", GD_MIN_OP_VERSION_KEY, NULL);
+ goto out;
+ }
+
+ ret = -1;
+ /* Check if peer can support our op_version */
+ if ((peer_max_op_version < conf->op_version) ||
+ (peer_min_op_version > conf->op_version)) {
+ ret = gf_asprintf(errstr,
+ "Peer %s does not support required "
+ "op-version",
+ peerinfo->hostname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (peerinfo)
+ gf_msg_debug((this ? this->name : "glusterd"), 0, "Peer %s %s",
+ peerinfo->hostname, ((ret < 0) ? "rejected" : "accepted"));
+ return ret;
+}
+
+int
+__glusterd_mgmt_hndsk_version_ack_cbk(struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gf_mgmt_hndsk_rsp rsp = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_t *peerctx = NULL;
+ char msg[64] = {
+ 0,
+ };
+
+ this = THIS;
+ frame = myframe;
+ peerctx = frame->local;
+
+ RCU_READ_LOCK;
+ peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
+ if (!peerinfo) {
+ gf_msg_debug(this->name, 0, "Could not find peer %s(%s)",
+ peerctx->peername, uuid_utoa(peerctx->peerid));
+ ret = -1;
+ goto out;
+ }
+
+ if (-1 == req->rpc_status) {
+ snprintf(msg, sizeof(msg),
+ "Error through RPC layer, retry again later");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_LAYER_ERROR, "%s", msg);
+ peerctx->errstr = gf_strdup(msg);
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_mgmt_hndsk_rsp);
+ if (ret < 0) {
+ snprintf(msg, sizeof(msg), "Failed to decode XDR");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", msg);
+ peerctx->errstr = gf_strdup(msg);
+ goto out;
+ }
+
+ if (-1 == rsp.op_ret) {
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "Failed to get handshake ack from remote server");
+ gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_NO_HANDSHAKE_ACK,
+ "%s", msg);
+ peerctx->errstr = gf_strdup(msg);
+ goto out;
+ }
+
+ /* TODO: this is hardcoded as of now, but I don't forsee any problems
+ * with this as long as we are properly handshaking operating versions
+ */
+ peerinfo->mgmt = &gd_mgmt_prog;
+ peerinfo->peer = &gd_peer_prog;
+ peerinfo->mgmt_v3 = &gd_mgmt_v3_prog;
+
+ ret = default_notify(this, GF_EVENT_CHILD_UP, NULL);
+
+ if (GD_MODE_ON == peerctx->args.mode) {
+ (void)glusterd_event_connected_inject(peerctx);
+ peerctx->args.req = NULL;
+ } else if (GD_MODE_SWITCH_ON == peerctx->args.mode) {
+ peerctx->args.mode = GD_MODE_ON;
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_UNKNOWN_MODE,
+ "unknown mode %d", peerctx->args.mode);
+ }
+
+ ret = 0;
+out:
+
+ if (ret != 0 && peerinfo)
+ rpc_transport_disconnect(peerinfo->rpc->conn.trans, _gf_false);
+
+ RCU_READ_UNLOCK;
+
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+
+ if (rsp.hndsk.hndsk_val)
+ free(rsp.hndsk.hndsk_val);
+
+ glusterd_friend_sm();
+
+ return 0;
+}
+
+int
+glusterd_mgmt_hndsk_version_ack_cbk(struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ __glusterd_mgmt_hndsk_version_ack_cbk);
+}
+int
+__glusterd_mgmt_hndsk_version_cbk(struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ int op_errno = EINVAL;
+ gf_mgmt_hndsk_rsp rsp = {
+ 0,
+ };
+ gf_mgmt_hndsk_req arg = {{
+ 0,
+ }};
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_t *peerctx = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ glusterd_conf_t *conf = NULL;
+ char msg[64] = {
+ 0,
+ };
+
+ this = THIS;
+ conf = this->private;
+ frame = myframe;
+ peerctx = frame->local;
+
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
+ if (!peerinfo) {
+ ret = -1;
+ gf_msg_debug(this->name, 0, "Could not find peer %s(%s)",
+ peerctx->peername, uuid_utoa(peerctx->peerid));
+ goto out;
+ }
+
+ if (-1 == req->rpc_status) {
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "Error through RPC layer, retry again later");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_LAYER_ERROR, "%s", msg);
+ peerctx->errstr = gf_strdup(msg);
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_mgmt_hndsk_rsp);
+ if (ret < 0) {
+ snprintf(msg, sizeof(msg),
+ "Failed to decode management "
+ "handshake response");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", msg);
+ peerctx->errstr = gf_strdup(msg);
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_UNSERIALIZE(this, dict, rsp.hndsk.hndsk_val,
+ rsp.hndsk.hndsk_len, ret, op_errno, out);
+
+ op_errno = rsp.op_errno;
+ if (-1 == rsp.op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno, GD_MSG_VERS_GET_FAIL,
+ "failed to get the 'versions' from peer (%s)",
+ req->conn->trans->peerinfo.identifier);
+ goto out;
+ }
+
+ /* Check if peer can be part of cluster */
+ ret = gd_validate_peer_op_version(this, peerinfo, dict, &peerctx->errstr);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERSION_MISMATCH,
+ "failed to validate the operating version of peer (%s)",
+ peerinfo->hostname);
+ goto out;
+ }
+
+ rsp_dict = dict_new();
+ if (!rsp_dict)
+ goto out;
+
+ ret = dict_set_int32(rsp_dict, GD_OP_VERSION_KEY, conf->op_version);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set operating version in dict");
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE(this, rsp_dict, (&arg.hndsk.hndsk_val),
+ arg.hndsk.hndsk_len, op_errno, out);
+
+ ret = glusterd_submit_request(
+ peerinfo->rpc, &arg, frame, &gd_clnt_mgmt_hndsk_prog,
+ GD_MGMT_HNDSK_VERSIONS_ACK, NULL, this,
+ glusterd_mgmt_hndsk_version_ack_cbk, (xdrproc_t)xdr_gf_mgmt_hndsk_req);
+
+out:
+ if (ret) {
frame->local = NULL;
- STACK_DESTROY (frame->root);
+ STACK_DESTROY(frame->root);
+ if (peerinfo)
+ rpc_transport_disconnect(peerinfo->rpc->conn.trans, _gf_false);
+ }
+
+ RCU_READ_UNLOCK;
- if (ret != 0)
- rpc_transport_disconnect (peerinfo->rpc->conn.trans);
+ if (rsp.hndsk.hndsk_val)
+ free(rsp.hndsk.hndsk_val);
- return 0;
+ if (arg.hndsk.hndsk_val)
+ GF_FREE(arg.hndsk.hndsk_val);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ return 0;
}
+int
+glusterd_mgmt_hndsk_version_cbk(struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ __glusterd_mgmt_hndsk_version_cbk);
+}
int
-glusterd_peer_handshake (xlator_t *this, struct rpc_clnt *rpc,
- glusterd_peerctx_t *peerctx)
+glusterd_mgmt_handshake(xlator_t *this, glusterd_peerctx_t *peerctx)
{
- call_frame_t *frame = NULL;
- gf_dump_req req = {0,};
- int ret = -1;
+ call_frame_t *frame = NULL;
+ gf_mgmt_hndsk_req req = {
+ {
+ 0,
+ },
+ };
+ glusterd_peerinfo_t *peerinfo = NULL;
+ dict_t *req_dict = NULL;
+ int ret = -1;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ gf_smsg("glusterd", GF_LOG_WARNING, errno, GD_MSG_FRAME_CREATE_FAIL,
+ NULL);
+ goto out;
+ }
+
+ frame->local = peerctx;
+
+ req_dict = dict_new();
+ if (!req_dict) {
+ gf_smsg("glusterd", GF_LOG_WARNING, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ goto out;
+ }
+
+ ret = dict_set_dynstr(req_dict, GD_PEER_ID_KEY,
+ gf_strdup(uuid_utoa(MY_UUID)));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "failed to set peer ID in dict");
+ goto out;
+ }
+
+ GF_PROTOCOL_DICT_SERIALIZE(this, req_dict, (&req.hndsk.hndsk_val),
+ req.hndsk.hndsk_len, ret, out);
+
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
+ if (!peerinfo) {
+ RCU_READ_UNLOCK;
+ gf_msg_debug(THIS->name, 0, "Could not find peer %s(%s)",
+ peerctx->peername, uuid_utoa(peerctx->peerid));
+ goto out;
+ }
+
+ ret = glusterd_submit_request(
+ peerinfo->rpc, &req, frame, &gd_clnt_mgmt_hndsk_prog,
+ GD_MGMT_HNDSK_VERSIONS, NULL, this, glusterd_mgmt_hndsk_version_cbk,
+ (xdrproc_t)xdr_gf_mgmt_hndsk_req);
+
+ RCU_READ_UNLOCK;
+
+ ret = 0;
- frame = create_frame (this, this->ctx->pool);
- if (!frame)
- goto out;
+out:
+ if (req_dict)
+ dict_unref(req_dict);
+
+ if (ret && frame)
+ STACK_DESTROY(frame->root);
+
+ return ret;
+}
+
+int
+glusterd_set_clnt_mgmt_program(glusterd_peerinfo_t *peerinfo,
+ gf_prog_detail *prog)
+{
+ gf_prog_detail *trav = NULL;
+ int ret = -1;
+
+ if (!peerinfo || !prog)
+ goto out;
+
+ trav = prog;
+
+ while (trav) {
+ ret = -1;
+ if ((gd_mgmt_prog.prognum == trav->prognum) &&
+ (gd_mgmt_prog.progver == trav->progver)) {
+ peerinfo->mgmt = &gd_mgmt_prog;
+ ret = 0;
+ }
+
+ if ((gd_peer_prog.prognum == trav->prognum) &&
+ (gd_peer_prog.progver == trav->progver)) {
+ peerinfo->peer = &gd_peer_prog;
+ ret = 0;
+ }
+
+ if (ret) {
+ gf_msg_debug("glusterd", 0,
+ "%s (%" PRId64 ":%" PRId64 ") not supported",
+ trav->progname, trav->prognum, trav->progver);
+ }
- frame->local = peerctx;
+ trav = trav->next;
+ }
+
+ if (peerinfo->mgmt) {
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_VERS_INFO,
+ "Using Program %s, Num (%d), Version (%d)",
+ peerinfo->mgmt->progname, peerinfo->mgmt->prognum,
+ peerinfo->mgmt->progver);
+ }
+
+ if (peerinfo->peer) {
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_VERS_INFO,
+ "Using Program %s, Num (%d), Version (%d)",
+ peerinfo->peer->progname, peerinfo->peer->prognum,
+ peerinfo->peer->progver);
+ }
+
+ if (peerinfo->mgmt_v3) {
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_VERS_INFO,
+ "Using Program %s, Num (%d), Version (%d)",
+ peerinfo->mgmt_v3->progname, peerinfo->mgmt_v3->prognum,
+ peerinfo->mgmt_v3->progver);
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static gf_boolean_t
+_mgmt_hndsk_prog_present(gf_prog_detail *prog)
+{
+ gf_boolean_t ret = _gf_false;
+ gf_prog_detail *trav = NULL;
+
+ GF_ASSERT(prog);
- req.gfs_id = 0xcafe;
+ trav = prog;
- ret = glusterd_submit_request (peerctx->peerinfo->rpc, &req, frame,
- &glusterd_dump_prog, GF_DUMP_DUMP,
- NULL, this,
- glusterd_peer_dump_version_cbk,
- (xdrproc_t)xdr_gf_dump_req);
+ while (trav) {
+ if ((trav->prognum == GD_MGMT_HNDSK_PROGRAM) &&
+ (trav->progver == GD_MGMT_HNDSK_VERSION)) {
+ ret = _gf_true;
+ goto out;
+ }
+ trav = trav->next;
+ }
out:
- return ret;
+ return ret;
+}
+
+int
+__glusterd_peer_dump_version_cbk(struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int ret = -1;
+ gf_dump_rsp rsp = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ gf_prog_detail *trav = NULL;
+ gf_prog_detail *next = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peerctx_t *peerctx = NULL;
+ glusterd_conf_t *conf = NULL;
+ char msg[1024] = {
+ 0,
+ };
+
+ this = THIS;
+ conf = this->private;
+ frame = myframe;
+ peerctx = frame->local;
+
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
+ if (!peerinfo) {
+ gf_msg_debug(this->name, 0, "Couldn't find peer %s(%s)",
+ peerctx->peername, uuid_utoa(peerctx->peerid));
+ goto out;
+ }
+
+ if (-1 == req->rpc_status) {
+ snprintf(msg, sizeof(msg),
+ "Error through RPC layer, retry again later");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_LAYER_ERROR, "%s", msg);
+ peerctx->errstr = gf_strdup(msg);
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_dump_rsp);
+ if (ret < 0) {
+ snprintf(msg, sizeof(msg), "Failed to decode XDR");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL, "%s", msg);
+ peerctx->errstr = gf_strdup(msg);
+ goto out;
+ }
+ if (-1 == rsp.op_ret) {
+ snprintf(msg, sizeof(msg),
+ "Failed to get the 'versions' from remote server");
+ gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_VERS_GET_FAIL, "%s",
+ msg);
+ peerctx->errstr = gf_strdup(msg);
+ goto out;
+ }
+
+ if (_mgmt_hndsk_prog_present(rsp.prog)) {
+ gf_msg_debug(this->name, 0,
+ "Proceeding to op-version handshake with peer %s",
+ peerinfo->hostname);
+ ret = glusterd_mgmt_handshake(this, peerctx);
+ goto out;
+ } else if (conf->op_version > 1) {
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "Peer %s does not support required op-version",
+ peerinfo->hostname);
+ peerctx->errstr = gf_strdup(msg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VERSION_UNSUPPORTED, "%s",
+ msg);
+ goto out;
+ }
+
+ /* Make sure we assign the proper program to peer */
+ ret = glusterd_set_clnt_mgmt_program(peerinfo, rsp.prog);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_MGMT_PGM_SET_FAIL,
+ "failed to set the mgmt program");
+ goto out;
+ }
+
+ ret = default_notify(this, GF_EVENT_CHILD_UP, NULL);
+
+ if (GD_MODE_ON == peerctx->args.mode) {
+ (void)glusterd_event_connected_inject(peerctx);
+ peerctx->args.req = NULL;
+ } else if (GD_MODE_SWITCH_ON == peerctx->args.mode) {
+ peerctx->args.mode = GD_MODE_ON;
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_UNKNOWN_MODE,
+ "unknown mode %d", peerctx->args.mode);
+ }
+
+ ret = 0;
+
+out:
+ if (ret != 0 && peerinfo)
+ rpc_transport_disconnect(peerinfo->rpc->conn.trans, _gf_false);
+
+ RCU_READ_UNLOCK;
+
+ glusterd_friend_sm();
+ glusterd_op_sm();
+
+ /* don't use GF_FREE, buffer was allocated by libc */
+ if (rsp.prog) {
+ trav = rsp.prog;
+ while (trav) {
+ next = trav->next;
+ free(trav->progname);
+ free(trav);
+ trav = next;
+ }
+ }
+
+ frame->local = NULL;
+ STACK_DESTROY(frame->root);
+
+ return 0;
+}
+
+int
+glusterd_peer_dump_version_cbk(struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ __glusterd_peer_dump_version_cbk);
+}
+
+int
+glusterd_peer_dump_version(xlator_t *this, struct rpc_clnt *rpc,
+ glusterd_peerctx_t *peerctx)
+{
+ call_frame_t *frame = NULL;
+ gf_dump_req req = {
+ 0,
+ };
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int ret = -1;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ gf_smsg(this->name, GF_LOG_WARNING, errno, GD_MSG_FRAME_CREATE_FAIL,
+ NULL);
+ goto out;
+ }
+
+ frame->local = peerctx;
+ if (!peerctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
+ goto out;
+ }
+
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find_by_generation(peerctx->peerinfo_gen);
+ if (!peerinfo) {
+ RCU_READ_UNLOCK;
+ gf_msg_debug(this->name, 0, "Couldn't find peer %s(%s)",
+ peerctx->peername, uuid_utoa(peerctx->peerid));
+ goto out;
+ }
+
+ req.gfs_id = 0xcafe;
+
+ ret = glusterd_submit_request(
+ peerinfo->rpc, &req, frame, &glusterd_dump_prog, GF_DUMP_DUMP, NULL,
+ this, glusterd_peer_dump_version_cbk, (xdrproc_t)xdr_gf_dump_req);
+
+ RCU_READ_UNLOCK;
+out:
+ if (ret && frame)
+ STACK_DESTROY(frame->root);
+
+ return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.c b/xlators/mgmt/glusterd/src/glusterd-hooks.c
index ab8e0700524..61c0f1c946f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-hooks.c
+++ b/xlators/mgmt/glusterd/src/glusterd-hooks.c
@@ -1,528 +1,641 @@
/*
- Copyright (c) 2007-2012 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2007-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#include "globals.h"
-#include "glusterfs.h"
-#include "dict.h"
-#include "xlator.h"
-#include "logging.h"
-#include "run.h"
-#include "defaults.h"
-#include "compat.h"
-#include "compat-errno.h"
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/run.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
#include "glusterd.h"
#include "glusterd-sm.h"
#include "glusterd-op-sm.h"
#include "glusterd-utils.h"
#include "glusterd-store.h"
#include "glusterd-hooks.h"
+#include "glusterd-messages.h"
#include <fnmatch.h>
#define EMPTY ""
-char glusterd_hook_dirnames[GD_OP_MAX][256] =
-{
- [GD_OP_NONE] = EMPTY,
- [GD_OP_CREATE_VOLUME] = "create",
- [GD_OP_START_BRICK] = EMPTY,
- [GD_OP_STOP_BRICK] = EMPTY,
- [GD_OP_DELETE_VOLUME] = "delete",
- [GD_OP_START_VOLUME] = "start",
- [GD_OP_STOP_VOLUME] = "stop",
- [GD_OP_DEFRAG_VOLUME] = EMPTY,
- [GD_OP_ADD_BRICK] = "add-brick",
- [GD_OP_REMOVE_BRICK] = "remove-brick",
- [GD_OP_REPLACE_BRICK] = EMPTY,
- [GD_OP_SET_VOLUME] = "set",
- [GD_OP_RESET_VOLUME] = EMPTY,
- [GD_OP_SYNC_VOLUME] = EMPTY,
- [GD_OP_LOG_ROTATE] = EMPTY,
- [GD_OP_GSYNC_SET] = EMPTY,
- [GD_OP_PROFILE_VOLUME] = EMPTY,
- [GD_OP_QUOTA] = EMPTY,
- [GD_OP_STATUS_VOLUME] = EMPTY,
- [GD_OP_REBALANCE] = EMPTY,
- [GD_OP_HEAL_VOLUME] = EMPTY,
- [GD_OP_STATEDUMP_VOLUME] = EMPTY,
- [GD_OP_LIST_VOLUME] = EMPTY,
- [GD_OP_CLEARLOCKS_VOLUME] = EMPTY,
- [GD_OP_DEFRAG_BRICK_VOLUME] = EMPTY,
+char glusterd_hook_dirnames[GD_OP_MAX][256] = {
+ [GD_OP_NONE] = EMPTY,
+ [GD_OP_CREATE_VOLUME] = "create",
+ [GD_OP_START_BRICK] = EMPTY,
+ [GD_OP_STOP_BRICK] = EMPTY,
+ [GD_OP_DELETE_VOLUME] = "delete",
+ [GD_OP_START_VOLUME] = "start",
+ [GD_OP_STOP_VOLUME] = "stop",
+ [GD_OP_DEFRAG_VOLUME] = EMPTY,
+ [GD_OP_ADD_BRICK] = "add-brick",
+ [GD_OP_REMOVE_BRICK] = "remove-brick",
+ [GD_OP_REPLACE_BRICK] = EMPTY,
+ [GD_OP_SET_VOLUME] = "set",
+ [GD_OP_RESET_VOLUME] = "reset",
+ [GD_OP_SYNC_VOLUME] = EMPTY,
+ [GD_OP_LOG_ROTATE] = EMPTY,
+ [GD_OP_GSYNC_CREATE] = "gsync-create",
+ [GD_OP_GSYNC_SET] = EMPTY,
+ [GD_OP_PROFILE_VOLUME] = EMPTY,
+ [GD_OP_QUOTA] = EMPTY,
+ [GD_OP_STATUS_VOLUME] = EMPTY,
+ [GD_OP_REBALANCE] = EMPTY,
+ [GD_OP_HEAL_VOLUME] = EMPTY,
+ [GD_OP_STATEDUMP_VOLUME] = EMPTY,
+ [GD_OP_LIST_VOLUME] = EMPTY,
+ [GD_OP_CLEARLOCKS_VOLUME] = EMPTY,
+ [GD_OP_DEFRAG_BRICK_VOLUME] = EMPTY,
+ [GD_OP_RESET_BRICK] = EMPTY,
};
#undef EMPTY
-static inline gf_boolean_t
-glusterd_is_hook_enabled (char *script)
+static gf_boolean_t
+glusterd_is_hook_enabled(char *script)
{
- return (script[0] == 'S');
+ return (script[0] == 'S' && (fnmatch("*.rpmsave", script, 0) != 0) &&
+ (fnmatch("*.rpmnew", script, 0) != 0));
}
int
-glusterd_hooks_create_hooks_directory (char *basedir)
+glusterd_hooks_create_hooks_directory(char *basedir)
{
- int ret = -1;
- int op = GD_OP_NONE;
- int type = GD_COMMIT_HOOK_NONE;
- char version_dir[PATH_MAX] = {0, };
- char path[PATH_MAX] = {0, };
- char *cmd_subdir = NULL;
- char type_subdir[GD_COMMIT_HOOK_MAX][256] = {{0, },
- "pre",
- "post"};
- glusterd_conf_t *priv = NULL;
-
- priv = THIS->private;
-
- snprintf (path, sizeof (path), "%s/hooks", basedir);
- ret = mkdir_p (path, 0777, _gf_true);
- if (ret) {
- gf_log (THIS->name, GF_LOG_CRITICAL, "Unable to create %s due"
- "to %s", path, strerror (errno));
- goto out;
+ int ret = -1;
+ int op = GD_OP_NONE;
+ int type = GD_COMMIT_HOOK_NONE;
+ char version_dir[PATH_MAX] = {
+ 0,
+ };
+ char path[PATH_MAX] = {
+ 0,
+ };
+ char *cmd_subdir = NULL;
+ char type_subdir[GD_COMMIT_HOOK_MAX][256] = {{
+ 0,
+ },
+ "pre",
+ "post"};
+ glusterd_conf_t *priv = NULL;
+ int32_t len = 0;
+
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+
+ snprintf(path, sizeof(path), "%s/hooks", basedir);
+ ret = mkdir_p(path, 0755, _gf_true);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED,
+ "Path=%s", path, NULL);
+ goto out;
+ }
+
+ GLUSTERD_GET_HOOKS_DIR(version_dir, GLUSTERD_HOOK_VER, priv);
+ ret = mkdir_p(version_dir, 0755, _gf_true);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_CREATE_DIR_FAILED,
+ "Directory=%s", version_dir, NULL);
+ goto out;
+ }
+
+ for (op = GD_OP_NONE + 1; op < GD_OP_MAX; op++) {
+ cmd_subdir = glusterd_hooks_get_hooks_cmd_subdir(op);
+ if (strlen(cmd_subdir) == 0)
+ continue;
+
+ len = snprintf(path, sizeof(path), "%s/%s", version_dir, cmd_subdir);
+ if ((len < 0) || (len >= sizeof(path))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ ret = -1;
+ goto out;
}
-
- GLUSTERD_GET_HOOKS_DIR (version_dir, GLUSTERD_HOOK_VER, priv);
- ret = mkdir_p (version_dir, 0777, _gf_true);
+ ret = mkdir_p(path, 0755, _gf_true);
if (ret) {
- gf_log (THIS->name, GF_LOG_CRITICAL, "Unable to create %s due "
- "to %s", version_dir, strerror (errno));
- goto out;
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED, "Path=%s", path, NULL);
+ goto out;
}
- for (op = GD_OP_NONE+1; op < GD_OP_MAX; op++) {
- cmd_subdir = glusterd_hooks_get_hooks_cmd_subdir (op);
- if (strlen (cmd_subdir) == 0)
- continue;
-
- snprintf (path, sizeof (path), "%s/%s", version_dir,
- cmd_subdir);
- ret = mkdir_p (path, 0777, _gf_true);
- if (ret) {
- gf_log (THIS->name, GF_LOG_CRITICAL,
- "Unable to create %s due to %s",
- path, strerror (errno));
- goto out;
- }
-
- for (type = GD_COMMIT_HOOK_PRE; type < GD_COMMIT_HOOK_MAX;
- type++) {
- snprintf (path, sizeof (path), "%s/%s/%s",
- version_dir, cmd_subdir, type_subdir[type]);
- ret = mkdir_p (path, 0777, _gf_true);
- if (ret) {
- gf_log (THIS->name, GF_LOG_CRITICAL,
- "Unable to create %s due to %s",
- path, strerror (errno));
- goto out;
- }
- }
+ for (type = GD_COMMIT_HOOK_PRE; type < GD_COMMIT_HOOK_MAX; type++) {
+ len = snprintf(path, sizeof(path), "%s/%s/%s", version_dir,
+ cmd_subdir, type_subdir[type]);
+ if ((len < 0) || (len >= sizeof(path))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL,
+ NULL);
+ ret = -1;
+ goto out;
+ }
+ ret = mkdir_p(path, 0755, _gf_true);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_CREATE_DIR_FAILED, "Path=%s", path, NULL);
+ goto out;
+ }
}
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-char*
-glusterd_hooks_get_hooks_cmd_subdir (glusterd_op_t op)
+char *
+glusterd_hooks_get_hooks_cmd_subdir(glusterd_op_t op)
{
- GF_ASSERT ((op > GD_OP_NONE) && (op < GD_OP_MAX));
+ GF_ASSERT((op > GD_OP_NONE) && (op < GD_OP_MAX));
- return glusterd_hook_dirnames[op];
+ return glusterd_hook_dirnames[op];
}
-int
-glusterd_hooks_set_volume_args (dict_t *dict, runner_t *runner)
+void
+glusterd_hooks_add_working_dir(runner_t *runner, glusterd_conf_t *priv)
{
- int i = 0;
- int count = 0;
- int ret = -1;
- char query[1024] = {0,};
- char *key = NULL;
- char *value = NULL;
-
- ret = dict_get_int32 (dict, "count", &count);
- if (ret)
- goto out;
-
- /* This will not happen unless op_ctx
- * is corrupted*/
- if (!count)
- goto out;
-
- runner_add_arg (runner, "-o");
- for (i = 1; (ret == 0); i++) {
- snprintf (query, sizeof (query), "key%d", i);
- ret = dict_get_str (dict, query, &key);
- if (ret)
- continue;
-
- snprintf (query, sizeof (query), "value%d", i);
- ret = dict_get_str (dict, query, &value);
- if (ret)
- continue;
-
- runner_argprintf (runner, "%s=%s", key, value);
- }
-
- ret = 0;
-out:
- return ret;
+ runner_argprintf(runner, "--gd-workdir=%s", priv->workdir);
}
-static int
-glusterd_hooks_add_op_args (runner_t *runner, glusterd_op_t op,
- dict_t *op_ctx, glusterd_commit_hook_type_t type)
+void
+glusterd_hooks_add_op(runner_t *runner, char *op)
{
- int vol_count = 0;
- gf_boolean_t truth = _gf_false;
- glusterd_volinfo_t *voliter = NULL;
- glusterd_conf_t *priv = NULL;
- int ret = -1;
-
- priv = THIS->private;
- list_for_each_entry (voliter, &priv->volumes,
- vol_list) {
- if (glusterd_is_volume_started (voliter))
- vol_count++;
- }
-
- ret = 0;
- switch (op) {
- case GD_OP_START_VOLUME:
- if (type == GD_COMMIT_HOOK_PRE &&
- vol_count == 0)
- truth = _gf_true;
-
- else if (type == GD_COMMIT_HOOK_POST &&
- vol_count == 1)
- truth = _gf_true;
-
- else
- truth = _gf_false;
+ runner_argprintf(runner, "--volume-op=%s", op);
+}
- runner_argprintf (runner, "--first=%s",
- truth? "yes":"no");
- break;
+void
+glusterd_hooks_add_hooks_version(runner_t *runner)
+{
+ runner_argprintf(runner, "--version=%d", GLUSTERD_HOOK_VER);
+}
- case GD_OP_STOP_VOLUME:
- if (type == GD_COMMIT_HOOK_PRE &&
- vol_count == 1)
- truth = _gf_true;
+static void
+glusterd_hooks_add_custom_args(dict_t *dict, runner_t *runner)
+{
+ char *hooks_args = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
- else if (type == GD_COMMIT_HOOK_POST &&
- vol_count == 0)
- truth = _gf_true;
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+ GF_VALIDATE_OR_GOTO(this->name, runner, out);
- else
- truth = _gf_false;
+ ret = dict_get_str(dict, "hooks_args", &hooks_args);
+ if (ret)
+ gf_msg_debug(this->name, 0, "No Hooks Arguments.");
+ else
+ gf_msg_debug(this->name, 0, "Hooks Args = %s", hooks_args);
- runner_argprintf (runner, "--last=%s",
- truth? "yes":"no");
- break;
+ if (hooks_args)
+ runner_argprintf(runner, "%s", hooks_args);
- case GD_OP_SET_VOLUME:
- ret = glusterd_hooks_set_volume_args (op_ctx, runner);
- break;
+out:
+ return;
+}
- default:
- break;
+int
+glusterd_hooks_set_volume_args(dict_t *dict, runner_t *runner)
+{
+ int i = 0;
+ int count = 0;
+ int ret = -1;
+ int flag = 0;
+ char query[1024] = {
+ 0,
+ };
+ char *key = NULL;
+ char *value = NULL;
+ char *inet_family = NULL;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = dict_get_int32(dict, "count", &count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=count", NULL);
+ goto out;
+ }
+
+ /* This will not happen unless op_ctx
+ * is corrupted*/
+ if (!count) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ENTRY, "count",
+ NULL);
+ goto out;
+ }
+
+ runner_add_arg(runner, "-o");
+ for (i = 1; ret == 0; i++) {
+ snprintf(query, sizeof(query), "key%d", i);
+ ret = dict_get_str(dict, query, &key);
+ if (ret)
+ continue;
+ snprintf(query, sizeof(query), "value%d", i);
+ ret = dict_get_str(dict, query, &value);
+ if (ret)
+ continue;
+
+ runner_argprintf(runner, "%s=%s", key, value);
+ if ((strncmp(key, "cluster.enable-shared-storage",
+ SLEN("cluster.enable-shared-storage")) == 0 ||
+ strncmp(key, "enable-shared-storage",
+ SLEN("enable-shared-storage")) == 0) &&
+ strncmp(value, "enable", SLEN("enable")) == 0)
+ flag = 1;
+ }
+
+ glusterd_hooks_add_custom_args(dict, runner);
+ if (flag == 1) {
+ ret = dict_get_str_sizen(this->options, "transport.address-family",
+ &inet_family);
+ if (!ret) {
+ runner_argprintf(runner, "transport.address-family=%s",
+ inet_family);
}
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
- return ret;
+static int
+glusterd_hooks_add_op_args(runner_t *runner, glusterd_op_t op, dict_t *op_ctx,
+ glusterd_commit_hook_type_t type)
+{
+ int vol_count = 0;
+ gf_boolean_t truth = _gf_false;
+ glusterd_volinfo_t *voliter = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+
+ priv = THIS->private;
+ cds_list_for_each_entry(voliter, &priv->volumes, vol_list)
+ {
+ if (glusterd_is_volume_started(voliter))
+ vol_count++;
+ }
+
+ ret = 0;
+ switch (op) {
+ case GD_OP_START_VOLUME:
+ if (type == GD_COMMIT_HOOK_PRE && vol_count == 0)
+ truth = _gf_true;
+
+ else if (type == GD_COMMIT_HOOK_POST && vol_count == 1)
+ truth = _gf_true;
+
+ else
+ truth = _gf_false;
+
+ runner_argprintf(runner, "--first=%s", truth ? "yes" : "no");
+
+ glusterd_hooks_add_hooks_version(runner);
+ glusterd_hooks_add_op(runner, "start");
+ glusterd_hooks_add_working_dir(runner, priv);
+
+ break;
+
+ case GD_OP_STOP_VOLUME:
+ if (type == GD_COMMIT_HOOK_PRE && vol_count == 1)
+ truth = _gf_true;
+
+ else if (type == GD_COMMIT_HOOK_POST && vol_count == 0)
+ truth = _gf_true;
+
+ else
+ truth = _gf_false;
+
+ runner_argprintf(runner, "--last=%s", truth ? "yes" : "no");
+ break;
+
+ case GD_OP_SET_VOLUME:
+ ret = glusterd_hooks_set_volume_args(op_ctx, runner);
+ glusterd_hooks_add_working_dir(runner, priv);
+ break;
+
+ case GD_OP_GSYNC_CREATE:
+ glusterd_hooks_add_custom_args(op_ctx, runner);
+ break;
+
+ case GD_OP_ADD_BRICK:
+ glusterd_hooks_add_hooks_version(runner);
+ glusterd_hooks_add_op(runner, "add-brick");
+ glusterd_hooks_add_working_dir(runner, priv);
+ break;
+
+ case GD_OP_RESET_VOLUME:
+ glusterd_hooks_add_hooks_version(runner);
+ glusterd_hooks_add_op(runner, "reset");
+ glusterd_hooks_add_working_dir(runner, priv);
+ break;
+
+ default:
+ break;
+ }
+
+ return ret;
}
int
-glusterd_hooks_run_hooks (char *hooks_path, glusterd_op_t op, dict_t *op_ctx,
- glusterd_commit_hook_type_t type)
+glusterd_hooks_run_hooks(char *hooks_path, glusterd_op_t op, dict_t *op_ctx,
+ glusterd_commit_hook_type_t type)
{
- xlator_t *this = NULL;
- glusterd_conf_t *priv = NULL;
- runner_t runner = {0, };
- struct dirent *entry = NULL;
- DIR *hookdir = NULL;
- char *volname = NULL;
- char **lines = NULL;
- int N = 8; /*arbitrary*/
- int lineno = 0;
- int line_count = 0;
- int ret = -1;
-
- this = THIS;
- priv = this->private;
-
- ret = dict_get_str (op_ctx, "volname", &volname);
- if (ret) {
- gf_log (this->name, GF_LOG_CRITICAL, "Failed to get volname "
- "from operation context");
+ xlator_t *this = NULL;
+ runner_t runner = {
+ 0,
+ };
+ DIR *hookdir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char *volname = NULL;
+ char **lines = NULL;
+ int N = 8; /*arbitrary*/
+ int lineno = 0;
+ int line_count = 0;
+ int ret = -1;
+
+ this = THIS;
+
+ ret = dict_get_str(op_ctx, "volname", &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_DICT_GET_FAILED,
+ "Failed to get volname "
+ "from operation context");
+ goto out;
+ }
+
+ hookdir = sys_opendir(hooks_path);
+ if (!hookdir) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Failed to open dir %s", hooks_path);
+ goto out;
+ }
+
+ lines = GF_CALLOC(1, N * sizeof(*lines), gf_gld_mt_charptr);
+ if (!lines) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ ret = -1;
+ line_count = 0;
+
+ while ((entry = sys_readdir(hookdir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
+ if (line_count == N - 1) {
+ N *= 2;
+ lines = GF_REALLOC(lines, N * sizeof(char *));
+ if (!lines) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
}
- hookdir = opendir (hooks_path);
- if (!hookdir) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR, "Failed to open dir %s, due "
- "to %s", hooks_path, strerror (errno));
- goto out;
+ if (glusterd_is_hook_enabled(entry->d_name)) {
+ lines[line_count] = gf_strdup(entry->d_name);
+ line_count++;
}
+ }
- lines = GF_CALLOC (1, N * sizeof (*lines), gf_gld_mt_charptr);
- if (!lines) {
- ret = -1;
- goto out;
- }
+ lines[line_count] = NULL;
+ lines = GF_REALLOC(lines, (line_count + 1) * sizeof(char *));
+ if (!lines)
+ goto out;
- ret = -1;
- line_count = 0;
- glusterd_for_each_entry (entry, hookdir);
- while (entry) {
- if (line_count == N-1) {
- N *= 2;
- lines = GF_REALLOC (lines, N * sizeof (char *));
- if (!lines)
- goto out;
- }
-
- if (glusterd_is_hook_enabled (entry->d_name)) {
- lines[line_count] = gf_strdup (entry->d_name);
- line_count++;
- }
-
- glusterd_for_each_entry (entry, hookdir);
- }
+ qsort(lines, line_count, sizeof(*lines), glusterd_compare_lines);
- lines[line_count] = NULL;
- lines = GF_REALLOC (lines, (line_count + 1) * sizeof (char *));
- if (!lines)
- goto out;
+ for (lineno = 0; lineno < line_count; lineno++) {
+ runinit(&runner);
+ runner_argprintf(&runner, "%s/%s", hooks_path, lines[lineno]);
+ /*Add future command line arguments to hook scripts below*/
+ runner_argprintf(&runner, "--volname=%s", volname);
+ ret = glusterd_hooks_add_op_args(&runner, op, op_ctx, type);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_ADD_OP_ARGS_FAIL,
+ "Failed to add "
+ "command specific arguments");
+ goto out;
+ }
- qsort (lines, line_count, sizeof (*lines), glusterd_compare_lines);
-
- for (lineno = 0; lineno < line_count; lineno++) {
-
- runinit (&runner);
- runner_argprintf (&runner, "%s/%s", hooks_path, lines[lineno]);
- /*Add future command line arguments to hook scripts below*/
- runner_argprintf (&runner, "--volname=%s", volname);
- ret = glusterd_hooks_add_op_args (&runner, op, op_ctx, type);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to add "
- "command specific arguments");
- goto out;
- }
-
- ret = runner_run_reuse (&runner);
- if (ret) {
- runner_log (&runner, this->name, GF_LOG_ERROR,
- "Failed to execute script");
- } else {
- runner_log (&runner, this->name, GF_LOG_INFO,
- "Ran script");
- }
- runner_end (&runner);
+ ret = runner_run_reuse(&runner);
+ if (ret) {
+ runner_log(&runner, this->name, GF_LOG_ERROR,
+ "Failed to execute script");
+ } else {
+ runner_log(&runner, this->name, GF_LOG_INFO, "Ran script");
}
+ runner_end(&runner);
+ }
- ret = 0;
+ ret = 0;
out:
- if (lines) {
- for (lineno = 0; lineno < line_count+1; lineno++)
- if (lines[lineno])
- GF_FREE (lines[lineno]);
+ if (lines) {
+ for (lineno = 0; lineno < line_count + 1; lineno++)
+ GF_FREE(lines[lineno]);
- GF_FREE (lines);
- }
+ GF_FREE(lines);
+ }
- if (hookdir)
- closedir (hookdir);
+ if (hookdir)
+ sys_closedir(hookdir);
- return ret;
+ return ret;
}
int
-glusterd_hooks_post_stub_enqueue (char *scriptdir, glusterd_op_t op,
- dict_t *op_ctx)
+glusterd_hooks_post_stub_enqueue(char *scriptdir, glusterd_op_t op,
+ dict_t *op_ctx)
{
- int ret = -1;
- glusterd_hooks_stub_t *stub = NULL;
- glusterd_hooks_private_t *hooks_priv = NULL;
- glusterd_conf_t *conf = NULL;
-
- conf = THIS->private;
- hooks_priv = conf->hooks_priv;
-
- ret = glusterd_hooks_stub_init (&stub, scriptdir, op, op_ctx);
- if (ret)
- goto out;
-
- pthread_mutex_lock (&hooks_priv->mutex);
- {
- hooks_priv->waitcount++;
- list_add_tail (&stub->all_hooks, &hooks_priv->list);
- pthread_cond_signal (&hooks_priv->cond);
- }
- pthread_mutex_unlock (&hooks_priv->mutex);
-
- ret = 0;
+ int ret = -1;
+ glusterd_hooks_stub_t *stub = NULL;
+ glusterd_hooks_private_t *hooks_priv = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ conf = THIS->private;
+ hooks_priv = conf->hooks_priv;
+
+ ret = glusterd_hooks_stub_init(&stub, scriptdir, op, op_ctx);
+ if (ret)
+ goto out;
+
+ pthread_mutex_lock(&hooks_priv->mutex);
+ {
+ hooks_priv->waitcount++;
+ cds_list_add_tail(&stub->all_hooks, &hooks_priv->list);
+ pthread_cond_signal(&hooks_priv->cond);
+ }
+ pthread_mutex_unlock(&hooks_priv->mutex);
+
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-glusterd_hooks_stub_init (glusterd_hooks_stub_t **stub, char *scriptdir,
- glusterd_op_t op, dict_t *op_ctx)
+glusterd_hooks_stub_init(glusterd_hooks_stub_t **stub, char *scriptdir,
+ glusterd_op_t op, dict_t *op_ctx)
{
- int ret = -1;
- glusterd_hooks_stub_t *hooks_stub = NULL;
-
- GF_ASSERT (stub);
- if (!stub)
- goto out;
-
- hooks_stub = GF_CALLOC (1, sizeof (*hooks_stub),
- gf_gld_mt_hooks_stub_t);
- if (!hooks_stub)
- goto out;
-
- INIT_LIST_HEAD (&hooks_stub->all_hooks);
- hooks_stub->op = op;
- hooks_stub->scriptdir = gf_strdup (scriptdir);
- if (!hooks_stub->scriptdir)
- goto out;
-
- hooks_stub->op_ctx = dict_copy_with_ref (op_ctx, hooks_stub->op_ctx);
- if (!hooks_stub->op_ctx)
- goto out;
-
- *stub = hooks_stub;
- ret = 0;
+ int ret = -1;
+ glusterd_hooks_stub_t *hooks_stub = NULL;
+
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(stub);
+ if (!stub)
+ goto out;
+
+ hooks_stub = GF_CALLOC(1, sizeof(*hooks_stub), gf_gld_mt_hooks_stub_t);
+ if (!hooks_stub) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ CDS_INIT_LIST_HEAD(&hooks_stub->all_hooks);
+ hooks_stub->op = op;
+ hooks_stub->scriptdir = gf_strdup(scriptdir);
+ if (!hooks_stub->scriptdir) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "scriptdir=%s", scriptdir, NULL);
+ goto out;
+ }
+
+ hooks_stub->op_ctx = dict_copy_with_ref(op_ctx, hooks_stub->op_ctx);
+ if (!hooks_stub->op_ctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ *stub = hooks_stub;
+ ret = 0;
out:
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Failed to initialize "
- "post hooks stub");
- glusterd_hooks_stub_cleanup (hooks_stub);
- }
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_HOOK_STUB_INIT_FAIL,
+ NULL);
+ glusterd_hooks_stub_cleanup(hooks_stub);
+ }
- return ret;
+ return ret;
}
void
-glusterd_hooks_stub_cleanup (glusterd_hooks_stub_t *stub)
+glusterd_hooks_stub_cleanup(glusterd_hooks_stub_t *stub)
{
- if (!stub) {
- gf_log_callingfn (THIS->name, GF_LOG_WARNING,
- "hooks_stub is NULL");
- return;
- }
+ if (!stub) {
+ gf_msg_callingfn(THIS->name, GF_LOG_WARNING, 0, GD_MSG_HOOK_STUB_NULL,
+ "hooks_stub is NULL");
+ return;
+ }
- if (stub->op_ctx)
- dict_unref (stub->op_ctx);
+ if (stub->op_ctx)
+ dict_unref(stub->op_ctx);
- if (stub->scriptdir)
- GF_FREE (stub->scriptdir);
+ GF_FREE(stub->scriptdir);
- if (stub)
- GF_FREE (stub);
+ GF_FREE(stub);
}
-static void*
-hooks_worker (void *args)
+static void *
+hooks_worker(void *args)
{
- glusterd_conf_t *conf = NULL;
- glusterd_hooks_private_t *hooks_priv = NULL;
- glusterd_hooks_stub_t *stub = NULL;
-
- THIS = args;
- conf = THIS->private;
- hooks_priv = conf->hooks_priv;
-
- for (;;) {
- pthread_mutex_lock (&hooks_priv->mutex);
- {
- while (list_empty (&hooks_priv->list)) {
- pthread_cond_wait (&hooks_priv->cond,
- &hooks_priv->mutex);
- }
- stub = list_entry (hooks_priv->list.next,
- glusterd_hooks_stub_t,
- all_hooks);
- list_del_init (&stub->all_hooks);
- hooks_priv->waitcount--;
-
- }
- pthread_mutex_unlock (&hooks_priv->mutex);
-
- glusterd_hooks_run_hooks (stub->scriptdir, stub->op,
- stub->op_ctx, GD_COMMIT_HOOK_POST);
- glusterd_hooks_stub_cleanup (stub);
+ glusterd_conf_t *conf = NULL;
+ glusterd_hooks_private_t *hooks_priv = NULL;
+ glusterd_hooks_stub_t *stub = NULL;
+
+ THIS = args;
+ conf = THIS->private;
+ hooks_priv = conf->hooks_priv;
+
+ for (;;) {
+ pthread_mutex_lock(&hooks_priv->mutex);
+ {
+ while (cds_list_empty(&hooks_priv->list)) {
+ pthread_cond_wait(&hooks_priv->cond, &hooks_priv->mutex);
+ }
+ stub = cds_list_entry(hooks_priv->list.next, glusterd_hooks_stub_t,
+ all_hooks);
+ cds_list_del_init(&stub->all_hooks);
+ hooks_priv->waitcount--;
}
+ pthread_mutex_unlock(&hooks_priv->mutex);
+
+ glusterd_hooks_run_hooks(stub->scriptdir, stub->op, stub->op_ctx,
+ GD_COMMIT_HOOK_POST);
+ glusterd_hooks_stub_cleanup(stub);
+ }
- return NULL;
+ return NULL;
}
int
-glusterd_hooks_priv_init (glusterd_hooks_private_t **new)
+glusterd_hooks_priv_init(glusterd_hooks_private_t **new)
{
- int ret = -1;
- glusterd_hooks_private_t *hooks_priv = NULL;
-
- if (!new)
- goto out;
-
- hooks_priv = GF_CALLOC (1, sizeof (*hooks_priv),
- gf_gld_mt_hooks_priv_t);
- if (!hooks_priv)
- goto out;
-
- pthread_mutex_init (&hooks_priv->mutex, NULL);
- pthread_cond_init (&hooks_priv->cond, NULL);
- INIT_LIST_HEAD (&hooks_priv->list);
- hooks_priv->waitcount = 0;
-
- *new = hooks_priv;
- ret = 0;
+ int ret = -1;
+ glusterd_hooks_private_t *hooks_priv = NULL;
+
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!new) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
+ goto out;
+ }
+
+ hooks_priv = GF_CALLOC(1, sizeof(*hooks_priv), gf_gld_mt_hooks_priv_t);
+ if (!hooks_priv) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ pthread_mutex_init(&hooks_priv->mutex, NULL);
+ pthread_cond_init(&hooks_priv->cond, NULL);
+ CDS_INIT_LIST_HEAD(&hooks_priv->list);
+ hooks_priv->waitcount = 0;
+
+ *new = hooks_priv;
+ ret = 0;
out:
- return ret;
+ return ret;
}
int
-glusterd_hooks_spawn_worker (xlator_t *this)
+glusterd_hooks_spawn_worker(xlator_t *this)
{
- int ret = -1;
- glusterd_conf_t *conf = NULL;
- glusterd_hooks_private_t *hooks_priv = NULL;
-
-
- ret = glusterd_hooks_priv_init (&hooks_priv);
- if (ret)
- goto out;
-
- conf = this->private;
- conf->hooks_priv = hooks_priv;
- ret = pthread_create (&hooks_priv->worker, NULL, hooks_worker,
- (void *)this);
- if (ret)
- gf_log (this->name, GF_LOG_CRITICAL, "Failed to spawn post "
- "hooks worker thread");
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ glusterd_hooks_private_t *hooks_priv = NULL;
+
+ ret = glusterd_hooks_priv_init(&hooks_priv);
+ if (ret)
+ goto out;
+
+ conf = this->private;
+ conf->hooks_priv = hooks_priv;
+ ret = gf_thread_create(&hooks_priv->worker, NULL, hooks_worker,
+ (void *)this, "gdhooks");
+ if (ret)
+ gf_msg(this->name, GF_LOG_CRITICAL, errno, GD_MSG_SPAWN_THREADS_FAIL,
+ "Failed to spawn post "
+ "hooks worker thread");
out:
- return ret;
+ return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-hooks.h b/xlators/mgmt/glusterd/src/glusterd-hooks.h
index e717395be40..f8b887b9bd7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-hooks.h
+++ b/xlators/mgmt/glusterd/src/glusterd-hooks.h
@@ -1,101 +1,88 @@
/*
- Copyright (c) 2006-2012 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _GLUSTERD_HOOKS_H_
#define _GLUSTERD_HOOKS_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <fnmatch.h>
-#define GLUSTERD_GET_HOOKS_DIR(path, version, priv) \
- snprintf (path, PATH_MAX, "%s/hooks/%d", priv->workdir,\
- version);
+#define GLUSTERD_GET_HOOKS_DIR(path, version, priv) \
+ do { \
+ int32_t len; \
+ len = snprintf(path, PATH_MAX, "%s/hooks/%d", priv->workdir, version); \
+ if (len < 0) { \
+ path[0] = 0; \
+ } \
+ } while (0)
-#define GLUSTERD_HOOK_VER 1
+#define GLUSTERD_HOOK_VER 1
-#define GD_HOOKS_SPECIFIC_KEY "user.*"
+#define GD_HOOKS_SPECIFIC_KEY "user.*"
typedef enum glusterd_commit_hook_type {
- GD_COMMIT_HOOK_NONE = 0,
- GD_COMMIT_HOOK_PRE,
- GD_COMMIT_HOOK_POST,
- GD_COMMIT_HOOK_MAX
+ GD_COMMIT_HOOK_NONE = 0,
+ GD_COMMIT_HOOK_PRE,
+ GD_COMMIT_HOOK_POST,
+ GD_COMMIT_HOOK_MAX
} glusterd_commit_hook_type_t;
typedef struct hooks_private {
- struct list_head list;
- int waitcount; //debug purposes
- pthread_mutex_t mutex;
- pthread_cond_t cond;
- pthread_t worker;
+ struct cds_list_head list;
+ pthread_mutex_t mutex;
+ pthread_cond_t cond;
+ pthread_t worker;
+ int waitcount; // debug purposes
} glusterd_hooks_private_t;
typedef struct hooks_stub {
- struct list_head all_hooks;
- char *scriptdir;
- glusterd_op_t op;
- dict_t *op_ctx;
+ struct cds_list_head all_hooks;
+ char *scriptdir;
+ dict_t *op_ctx;
+ glusterd_op_t op;
} glusterd_hooks_stub_t;
-
static inline gf_boolean_t
-is_key_glusterd_hooks_friendly (xlator_t *this, char *volname, char *key)
+is_key_glusterd_hooks_friendly(char *key)
{
- gf_boolean_t is_friendly = _gf_false;
+ gf_boolean_t is_friendly = _gf_false;
- /* This is very specific to hooks friendly behavior */
- if (fnmatch (GD_HOOKS_SPECIFIC_KEY, key, FNM_NOESCAPE) == 0) {
- gf_log (this->name, GF_LOG_DEBUG,
- "user configured key (%s) sent on volume %s",
- key, volname);
- is_friendly = _gf_true;
- }
+ /* This is very specific to hooks friendly behavior */
+ if (fnmatch(GD_HOOKS_SPECIFIC_KEY, key, FNM_NOESCAPE) == 0) {
+ gf_msg_debug(THIS->name, 0, "user namespace key %s", key);
+ is_friendly = _gf_true;
+ }
- return is_friendly;
+ return is_friendly;
}
int
-glusterd_hooks_create_hooks_directory (char *basedir);
+glusterd_hooks_create_hooks_directory(char *basedir);
char *
-glusterd_hooks_get_hooks_cmd_subdir (glusterd_op_t op);
+glusterd_hooks_get_hooks_cmd_subdir(glusterd_op_t op);
int
-glusterd_hooks_run_hooks (char *hooks_path, glusterd_op_t op, dict_t *op_ctx,
- glusterd_commit_hook_type_t type);
+glusterd_hooks_run_hooks(char *hooks_path, glusterd_op_t op, dict_t *op_ctx,
+ glusterd_commit_hook_type_t type);
int
-glusterd_hooks_spawn_worker (xlator_t *this);
+glusterd_hooks_spawn_worker(xlator_t *this);
int
-glusterd_hooks_stub_init (glusterd_hooks_stub_t **stub, char *scriptdir,
- glusterd_op_t op, dict_t *op_ctx);
+glusterd_hooks_stub_init(glusterd_hooks_stub_t **stub, char *scriptdir,
+ glusterd_op_t op, dict_t *op_ctx);
void
-glusterd_hooks_stub_cleanup (glusterd_hooks_stub_t *stub);
+glusterd_hooks_stub_cleanup(glusterd_hooks_stub_t *stub);
int
-glusterd_hooks_post_stub_enqueue (char *scriptdir, glusterd_op_t op,
- dict_t *op_ctx);
+glusterd_hooks_post_stub_enqueue(char *scriptdir, glusterd_op_t op,
+ dict_t *op_ctx);
int
-glusterd_hooks_priv_init (glusterd_hooks_private_t **new);
+glusterd_hooks_priv_init(glusterd_hooks_private_t **new);
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.c b/xlators/mgmt/glusterd/src/glusterd-locks.c
new file mode 100644
index 00000000000..11523f2854b
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-locks.c
@@ -0,0 +1,870 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/common-utils.h>
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-locks.h"
+#include "glusterd-errno.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
+#include "glusterd-messages.h"
+
+#include <signal.h>
+
+#define GF_MAX_LOCKING_ENTITIES 3
+
+/* Valid entities that the mgmt_v3 lock can hold locks upon *
+ * To add newer entities to be locked, we can just add more *
+ * entries to this table along with the type and default value */
+glusterd_valid_entities valid_types[] = {
+ {"vol", _gf_true},
+ {"snap", _gf_false},
+ {"global", _gf_false},
+ {NULL},
+};
+
+/* Checks if the lock request is for a valid entity */
+static gf_boolean_t
+glusterd_mgmt_v3_is_type_valid(char *type)
+{
+ int i = 0;
+
+ GF_ASSERT(type);
+
+ for (i = 0; valid_types[i].type; i++) {
+ if (!strcmp(type, valid_types[i].type)) {
+ return _gf_true;
+ }
+ }
+
+ return _gf_false;
+}
+
+/* Initialize the global mgmt_v3 lock list(dict) when
+ * glusterd is spawned */
+int32_t
+glusterd_mgmt_v3_lock_init()
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ priv->mgmt_v3_lock = dict_new();
+ if (!priv->mgmt_v3_lock)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Destroy the global mgmt_v3 lock list(dict) when
+ * glusterd cleanup is performed */
+void
+glusterd_mgmt_v3_lock_fini()
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ if (priv->mgmt_v3_lock)
+ dict_unref(priv->mgmt_v3_lock);
+}
+
+/* Initialize the global mgmt_v3_timer lock list(dict) when
+ * glusterd is spawned */
+int32_t
+glusterd_mgmt_v3_lock_timer_init()
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ priv->mgmt_v3_lock_timer = dict_new();
+ if (!priv->mgmt_v3_lock_timer)
+ goto out;
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* Destroy the global mgmt_v3_timer lock list(dict) when
+ * glusterd cleanup is performed */
+void
+glusterd_mgmt_v3_lock_timer_fini()
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ if (priv->mgmt_v3_lock_timer)
+ dict_unref(priv->mgmt_v3_lock_timer);
+out:
+ return;
+}
+
+static int32_t
+glusterd_get_mgmt_v3_lock_owner(char *key, uuid_t *uuid)
+{
+ int32_t ret = -1;
+ glusterd_mgmt_v3_lock_obj *lock_obj = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ if (!key || !uuid) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "key or uuid is null.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_bin(priv->mgmt_v3_lock, key, (void **)&lock_obj);
+ if (!ret)
+ gf_uuid_copy(*uuid, lock_obj->lock_owner);
+
+ ret = 0;
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* This function is called with the locked_count and type, to *
+ * release all the acquired locks. */
+static int32_t
+glusterd_release_multiple_locks_per_entity(dict_t *dict, uuid_t uuid,
+ int32_t locked_count, char *type)
+{
+ char name_buf[PATH_MAX] = "";
+ char *name = NULL;
+ int32_t i = -1;
+ int32_t op_ret = 0;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(type);
+
+ if (locked_count == 0) {
+ gf_msg_debug(this->name, 0, "No %s locked as part of this transaction",
+ type);
+ goto out;
+ }
+
+ /* Release all the locks held */
+ for (i = 0; i < locked_count; i++) {
+ ret = snprintf(name_buf, sizeof(name_buf), "%sname%d", type, i + 1);
+
+ /* Looking for volname1, volname2 or snapname1, *
+ * as key in the dict snapname2 */
+ ret = dict_get_strn(dict, name_buf, ret, &name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get %s locked_count = %d", name_buf,
+ locked_count);
+ op_ret = ret;
+ continue;
+ }
+
+ ret = glusterd_mgmt_v3_unlock(name, uuid, type);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release lock for %s.", name);
+ op_ret = ret;
+ }
+ }
+
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", op_ret);
+ return op_ret;
+}
+
+/* Given the count and type of the entity this function acquires *
+ * locks on multiple elements of the same entity. For example: *
+ * If type is "vol" this function tries to acquire locks on multiple *
+ * volumes */
+static int32_t
+glusterd_acquire_multiple_locks_per_entity(dict_t *dict, uuid_t uuid,
+ uint32_t *op_errno, int32_t count,
+ char *type)
+{
+ char name_buf[PATH_MAX] = "";
+ char *name = NULL;
+ int32_t i = -1;
+ int32_t ret = -1;
+ int32_t locked_count = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(type);
+
+ /* Locking one element after other */
+ for (i = 0; i < count; i++) {
+ ret = snprintf(name_buf, sizeof(name_buf), "%sname%d", type, i + 1);
+
+ /* Looking for volname1, volname2 or snapname1, *
+ * as key in the dict snapname2 */
+ ret = dict_get_strn(dict, name_buf, ret, &name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get %s count = %d", name_buf, count);
+ break;
+ }
+
+ ret = glusterd_mgmt_v3_lock(name, uuid, op_errno, type);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Failed to acquire lock for %s %s "
+ "on behalf of %s. Reversing "
+ "this transaction",
+ type, name, uuid_utoa(uuid));
+ break;
+ }
+ locked_count++;
+ }
+
+ if (count == locked_count) {
+ /* If all locking ops went successfully, return as success */
+ ret = 0;
+ goto out;
+ }
+
+ /* If we failed to lock one element, unlock others and return failure */
+ ret = glusterd_release_multiple_locks_per_entity(dict, uuid, locked_count,
+ type);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MULTIPLE_LOCK_RELEASE_FAIL,
+ "Failed to release multiple %s locks", type);
+ }
+ ret = -1;
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Given the type of entity, this function figures out if it should unlock a *
+ * single element of multiple elements of the said entity. For example: *
+ * if the type is "vol", this function will accordingly unlock a single volume *
+ * or multiple volumes */
+static int32_t
+glusterd_mgmt_v3_unlock_entity(dict_t *dict, uuid_t uuid, char *type,
+ gf_boolean_t default_value)
+{
+ char name_buf[PATH_MAX] = "";
+ char *name = NULL;
+ int32_t count = -1;
+ int32_t ret = -1;
+ gf_boolean_t hold_locks = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(type);
+
+ snprintf(name_buf, sizeof(name_buf), "hold_%s_locks", type);
+ hold_locks = dict_get_str_boolean(dict, name_buf, default_value);
+
+ if (hold_locks == _gf_false) {
+ /* Locks were not held for this particular entity *
+ * Hence nothing to release */
+ ret = 0;
+ goto out;
+ }
+
+ /* Looking for volcount or snapcount in the dict */
+ ret = snprintf(name_buf, sizeof(name_buf), "%scount", type);
+ ret = dict_get_int32n(dict, name_buf, ret, &count);
+ if (ret) {
+ /* count is not present. Only one *
+ * element name needs to be unlocked */
+ ret = snprintf(name_buf, sizeof(name_buf), "%sname", type);
+ ret = dict_get_strn(dict, name_buf, ret, &name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch %sname", type);
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_unlock(name, uuid, type);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release lock for %s %s "
+ "on behalf of %s.",
+ type, name, uuid_utoa(uuid));
+ goto out;
+ }
+ } else {
+ /* Unlocking one element name after another */
+ ret = glusterd_release_multiple_locks_per_entity(dict, uuid, count,
+ type);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MULTIPLE_LOCK_RELEASE_FAIL,
+ "Failed to release all %s locks", type);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Given the type of entity, this function figures out if it should lock a *
+ * single element or multiple elements of the said entity. For example: *
+ * if the type is "vol", this function will accordingly lock a single volume *
+ * or multiple volumes */
+static int32_t
+glusterd_mgmt_v3_lock_entity(dict_t *dict, uuid_t uuid, uint32_t *op_errno,
+ char *type, gf_boolean_t default_value)
+{
+ char name_buf[PATH_MAX] = "";
+ char *name = NULL;
+ int32_t count = -1;
+ int32_t ret = -1;
+ gf_boolean_t hold_locks = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(type);
+
+ snprintf(name_buf, sizeof(name_buf), "hold_%s_locks", type);
+ hold_locks = dict_get_str_boolean(dict, name_buf, default_value);
+
+ if (hold_locks == _gf_false) {
+ /* Not holding locks for this particular entity */
+ ret = 0;
+ goto out;
+ }
+
+ /* Looking for volcount or snapcount in the dict */
+ ret = snprintf(name_buf, sizeof(name_buf), "%scount", type);
+ ret = dict_get_int32n(dict, name_buf, ret, &count);
+ if (ret) {
+ /* count is not present. Only one *
+ * element name needs to be locked */
+ ret = snprintf(name_buf, sizeof(name_buf), "%sname", type);
+ ret = dict_get_strn(dict, name_buf, ret, &name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch %sname", type);
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_lock(name, uuid, op_errno, type);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Failed to acquire lock for %s %s "
+ "on behalf of %s.",
+ type, name, uuid_utoa(uuid));
+ goto out;
+ }
+ } else {
+ /* Locking one element name after another */
+ ret = glusterd_acquire_multiple_locks_per_entity(dict, uuid, op_errno,
+ count, type);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MULTIPLE_LOCK_ACQUIRE_FAIL,
+ "Failed to acquire all %s locks", type);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Try to release locks of multiple entities like *
+ * volume, snaps etc. */
+int32_t
+glusterd_multiple_mgmt_v3_unlock(dict_t *dict, uuid_t uuid)
+{
+ int32_t i = -1;
+ int32_t ret = -1;
+ int32_t op_ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY, "dict is null.");
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 0; valid_types[i].type; i++) {
+ ret = glusterd_mgmt_v3_unlock_entity(dict, uuid, valid_types[i].type,
+ valid_types[i].default_value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MULTIPLE_LOCK_RELEASE_FAIL, "Unable to unlock all %s",
+ valid_types[i].type);
+ op_ret = ret;
+ }
+ }
+
+ ret = op_ret;
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Try to acquire locks on multiple entities like *
+ * volume, snaps etc. */
+int32_t
+glusterd_multiple_mgmt_v3_lock(dict_t *dict, uuid_t uuid, uint32_t *op_errno)
+{
+ int32_t i = -1;
+ int32_t ret = -1;
+ int32_t locked_count = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY, "dict is null.");
+ ret = -1;
+ goto out;
+ }
+
+ /* Locking one entity after other */
+ for (i = 0; valid_types[i].type; i++) {
+ ret = glusterd_mgmt_v3_lock_entity(dict, uuid, op_errno,
+ valid_types[i].type,
+ valid_types[i].default_value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MULTIPLE_LOCK_ACQUIRE_FAIL, "Unable to lock all %s",
+ valid_types[i].type);
+ break;
+ }
+ locked_count++;
+ }
+
+ if (locked_count == GF_MAX_LOCKING_ENTITIES) {
+ /* If all locking ops went successfully, return as success */
+ ret = 0;
+ goto out;
+ }
+
+ /* If we failed to lock one entity, unlock others and return failure */
+ for (i = 0; i < locked_count; i++) {
+ ret = glusterd_mgmt_v3_unlock_entity(dict, uuid, valid_types[i].type,
+ valid_types[i].default_value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MULTIPLE_LOCK_RELEASE_FAIL, "Unable to unlock all %s",
+ valid_types[i].type);
+ }
+ }
+ ret = -1;
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_mgmt_v3_lock(const char *name, uuid_t uuid, uint32_t *op_errno,
+ char *type)
+{
+ char key[PATH_MAX] = "";
+ int32_t ret = -1;
+ glusterd_mgmt_v3_lock_obj *lock_obj = NULL;
+ glusterd_mgmt_v3_lock_timer *mgmt_lock_timer = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t is_valid = _gf_true;
+ uuid_t owner = {0};
+ xlator_t *this = NULL;
+ char *bt = NULL;
+ struct timespec delay = {0};
+ char *key_dup = NULL;
+ glusterfs_ctx_t *mgmt_lock_timer_ctx = NULL;
+ xlator_t *mgmt_lock_timer_xl = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ if (!name || !type) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "name or type is null.");
+ ret = -1;
+ goto out;
+ }
+
+ is_valid = glusterd_mgmt_v3_is_type_valid(type);
+ if (is_valid != _gf_true) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid entity. Cannot perform locking "
+ "operation on %s types",
+ type);
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf(key, sizeof(key), "%s_%s", name, type);
+ if (ret != strlen(name) + 1 + strlen(type)) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CREATE_KEY_FAIL,
+ "Unable to create key");
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0, "Trying to acquire lock of %s for %s", key,
+ uuid_utoa(uuid));
+
+ ret = glusterd_get_mgmt_v3_lock_owner(key, &owner);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Unable to get mgmt_v3 lock owner");
+ goto out;
+ }
+
+ /* If the lock has already been held for the given volume
+ * we fail */
+ if (!gf_uuid_is_null(owner)) {
+ gf_msg_callingfn(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_LOCK_ALREADY_HELD, "Lock for %s held by %s",
+ name, uuid_utoa(owner));
+ ret = -1;
+ *op_errno = EG_ANOTRANS;
+ goto out;
+ }
+
+ lock_obj = GF_MALLOC(sizeof(glusterd_mgmt_v3_lock_obj),
+ gf_common_mt_mgmt_v3_lock_obj_t);
+ if (!lock_obj) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy(lock_obj->lock_owner, uuid);
+
+ ret = dict_set_bin(priv->mgmt_v3_lock, key, lock_obj,
+ sizeof(glusterd_mgmt_v3_lock_obj));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set lock owner in mgmt_v3 lock");
+ GF_FREE(lock_obj);
+ goto out;
+ }
+
+ mgmt_lock_timer = GF_CALLOC(1, sizeof(glusterd_mgmt_v3_lock_timer),
+ gf_common_mt_mgmt_v3_lock_timer_t);
+
+ if (!mgmt_lock_timer) {
+ ret = -1;
+ goto out;
+ }
+
+ mgmt_lock_timer->xl = THIS;
+ /*changing to default timeout value*/
+ priv->mgmt_v3_lock_timeout = GF_LOCK_TIMER;
+
+ ret = -1;
+ mgmt_lock_timer_xl = mgmt_lock_timer->xl;
+ if (!mgmt_lock_timer_xl) {
+ GF_FREE(mgmt_lock_timer);
+ goto out;
+ }
+
+ mgmt_lock_timer_ctx = mgmt_lock_timer_xl->ctx;
+ if (!mgmt_lock_timer_ctx) {
+ GF_FREE(mgmt_lock_timer);
+ goto out;
+ }
+
+ key_dup = gf_strdup(key);
+ delay.tv_sec = priv->mgmt_v3_lock_timeout;
+ delay.tv_nsec = 0;
+
+ mgmt_lock_timer->timer = gf_timer_call_after(
+ mgmt_lock_timer_ctx, delay, gd_mgmt_v3_unlock_timer_cbk, key_dup);
+
+ ret = dict_set_bin(priv->mgmt_v3_lock_timer, key, mgmt_lock_timer,
+ sizeof(glusterd_mgmt_v3_lock_timer));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set timer in mgmt_v3 lock");
+ GF_FREE(key_dup);
+ GF_FREE(mgmt_lock_timer);
+ goto out;
+ }
+
+ /* Saving the backtrace into the pre-allocated buffer, ctx->btbuf*/
+ if ((bt = gf_backtrace_save(NULL))) {
+ snprintf(key, sizeof(key), "debug.last-success-bt-%s", key_dup);
+ ret = dict_set_dynstr_with_alloc(priv->mgmt_v3_lock, key, bt);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to save "
+ "the back trace for lock %s granted to %s",
+ key_dup, uuid_utoa(uuid));
+ ret = 0;
+ }
+
+ gf_msg_debug(this->name, 0, "Lock for %s successfully held by %s", key_dup,
+ uuid_utoa(uuid));
+
+ ret = 0;
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/*
+ * This call back will ensure to unlock the lock_obj, in case we hit a situation
+ * where unlocking failed and stale lock exist*/
+void
+gd_mgmt_v3_unlock_timer_cbk(void *data)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_mgmt_v3_lock_timer *mgmt_lock_timer = NULL;
+ char *key = NULL;
+ int keylen;
+ char bt_key[PATH_MAX] = "";
+ int bt_key_len = 0;
+ int32_t ret = -1;
+ glusterfs_ctx_t *mgmt_lock_timer_ctx = NULL;
+ xlator_t *mgmt_lock_timer_xl = NULL;
+ gf_timer_t *timer = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ GF_ASSERT(NULL != data);
+ key = (char *)data;
+
+ keylen = strlen(key);
+ dict_deln(conf->mgmt_v3_lock, key, keylen);
+
+ bt_key_len = snprintf(bt_key, PATH_MAX, "debug.last-success-bt-%s", key);
+ if (bt_key_len != SLEN("debug.last-success-bt-") + keylen) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CREATE_KEY_FAIL,
+ "Unable to create backtrace "
+ "key");
+ goto out;
+ }
+
+ dict_deln(conf->mgmt_v3_lock, bt_key, bt_key_len);
+
+ ret = dict_get_bin(conf->mgmt_v3_lock_timer, key,
+ (void **)&mgmt_lock_timer);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to get lock owner in mgmt_v3 lock");
+ }
+
+out:
+ if (mgmt_lock_timer && mgmt_lock_timer->timer) {
+ mgmt_lock_timer_xl = mgmt_lock_timer->xl;
+ GF_VALIDATE_OR_GOTO(this->name, mgmt_lock_timer_xl, ret_function);
+
+ mgmt_lock_timer_ctx = mgmt_lock_timer_xl->ctx;
+ GF_VALIDATE_OR_GOTO(this->name, mgmt_lock_timer_ctx, ret_function);
+
+ timer = mgmt_lock_timer->timer;
+ GF_FREE(timer->data);
+ gf_timer_call_cancel(mgmt_lock_timer_ctx, mgmt_lock_timer->timer);
+ dict_deln(conf->mgmt_v3_lock_timer, bt_key, bt_key_len);
+ mgmt_lock_timer->timer = NULL;
+ gf_log(this->name, GF_LOG_INFO,
+ "unlock timer is cancelled for volume_type"
+ " %s",
+ key);
+ }
+
+ret_function:
+
+ return;
+}
+
+int32_t
+glusterd_mgmt_v3_unlock(const char *name, uuid_t uuid, char *type)
+{
+ char key[PATH_MAX] = "";
+ char key_dup[PATH_MAX] = "";
+ int keylen;
+ int32_t ret = -1;
+ gf_boolean_t is_valid = _gf_true;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_mgmt_v3_lock_timer *mgmt_lock_timer = NULL;
+ uuid_t owner = {0};
+ xlator_t *this = NULL;
+ glusterfs_ctx_t *mgmt_lock_timer_ctx = NULL;
+ xlator_t *mgmt_lock_timer_xl = NULL;
+ gf_timer_t *timer = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ if (!name || !type) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "name is null.");
+ ret = -1;
+ goto out;
+ }
+
+ is_valid = glusterd_mgmt_v3_is_type_valid(type);
+ if (is_valid != _gf_true) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid entity. Cannot perform unlocking "
+ "operation on %s types",
+ type);
+ ret = -1;
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "%s_%s", name, type);
+ if (keylen != strlen(name) + 1 + strlen(type)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CREATE_KEY_FAIL,
+ "Unable to create key");
+ ret = -1;
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0, "Trying to release lock of %s %s for %s as %s",
+ type, name, uuid_utoa(uuid), key);
+
+ ret = glusterd_get_mgmt_v3_lock_owner(key, &owner);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Unable to get mgmt_v3 lock owner");
+ goto out;
+ }
+
+ if (gf_uuid_is_null(owner)) {
+ gf_msg_callingfn(this->name, GF_LOG_WARNING, 0, GD_MSG_LOCK_NOT_HELD,
+ "Lock for %s %s not held", type, name);
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_uuid_compare(uuid, owner);
+ if (ret) {
+ gf_msg_callingfn(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_LOCK_OWNER_MISMATCH,
+ "Lock owner mismatch. "
+ "Lock for %s %s held by %s",
+ type, name, uuid_utoa(owner));
+ goto out;
+ }
+
+ /* Removing the mgmt_v3 lock from the global list */
+ dict_deln(priv->mgmt_v3_lock, key, keylen);
+
+ ret = dict_get_bin(priv->mgmt_v3_lock_timer, key,
+ (void **)&mgmt_lock_timer);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to get mgmt lock key in mgmt_v3 lock");
+ goto out;
+ }
+
+ (void)snprintf(key_dup, sizeof(key_dup), "%s", key);
+
+ /* Remove the backtrace key as well */
+ ret = snprintf(key, sizeof(key), "debug.last-success-bt-%s", key_dup);
+ if (ret != SLEN("debug.last-success-bt-") + keylen) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CREATE_KEY_FAIL,
+ "Unable to create backtrace "
+ "key");
+ ret = -1;
+ goto out;
+ }
+ dict_deln(priv->mgmt_v3_lock, key, ret);
+
+ gf_msg_debug(this->name, 0, "Lock for %s %s successfully released", type,
+ name);
+
+ /* Release owner reference which was held during lock */
+ if (mgmt_lock_timer && mgmt_lock_timer->timer) {
+ ret = -1;
+ mgmt_lock_timer_xl = mgmt_lock_timer->xl;
+ GF_VALIDATE_OR_GOTO(this->name, mgmt_lock_timer_xl, out);
+
+ mgmt_lock_timer_ctx = mgmt_lock_timer_xl->ctx;
+ GF_VALIDATE_OR_GOTO(this->name, mgmt_lock_timer_ctx, out);
+ ret = 0;
+
+ timer = mgmt_lock_timer->timer;
+ GF_FREE(timer->data);
+ gf_timer_call_cancel(mgmt_lock_timer_ctx, mgmt_lock_timer->timer);
+ dict_deln(priv->mgmt_v3_lock_timer, key_dup, keylen);
+ }
+ ret = glusterd_volinfo_find(name, &volinfo);
+ if (volinfo && volinfo->stage_deleted) {
+ /* this indicates a volume still exists and the volume delete
+ * operation has failed in some of the phases, need to ensure
+ * stage_deleted flag is set back to false
+ */
+ volinfo->stage_deleted = _gf_false;
+ gf_log(this->name, GF_LOG_INFO,
+ "Volume %s still exist, setting "
+ "stage deleted flag to false for the volume",
+ volinfo->volname);
+ }
+ ret = 0;
+out:
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-locks.h b/xlators/mgmt/glusterd/src/glusterd-locks.h
new file mode 100644
index 00000000000..44667cebd3d
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-locks.h
@@ -0,0 +1,57 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_LOCKS_H_
+#define _GLUSTERD_LOCKS_H_
+
+typedef struct glusterd_mgmt_v3_lock_object_ {
+ uuid_t lock_owner;
+} glusterd_mgmt_v3_lock_obj;
+
+typedef struct glusterd_mgmt_v3_lock_timer_ {
+ gf_timer_t *timer;
+ xlator_t *xl;
+} glusterd_mgmt_v3_lock_timer;
+
+typedef struct glusterd_mgmt_v3_lock_valid_entities {
+ char *type; /* Entity type like vol, snap */
+ gf_boolean_t default_value; /* The default value that *
+ * determines if the locks *
+ * should be held for that *
+ * entity */
+} glusterd_valid_entities;
+
+int32_t
+glusterd_mgmt_v3_lock_init();
+
+void
+glusterd_mgmt_v3_lock_fini();
+
+int32_t
+glusterd_mgmt_v3_lock_timer_init();
+
+void
+glusterd_mgmt_v3_lock_timer_fini();
+
+int32_t
+glusterd_mgmt_v3_lock(const char *key, uuid_t uuid, uint32_t *op_errno,
+ char *type);
+
+int32_t
+glusterd_mgmt_v3_unlock(const char *key, uuid_t uuid, char *type);
+
+int32_t
+glusterd_multiple_mgmt_v3_lock(dict_t *dict, uuid_t uuid, uint32_t *op_errno);
+
+int32_t
+glusterd_multiple_mgmt_v3_unlock(dict_t *dict, uuid_t uuid);
+
+void
+gd_mgmt_v3_unlock_timer_cbk(void *data);
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-log-ops.c b/xlators/mgmt/glusterd/src/glusterd-log-ops.c
index 2056c84b53a..34abf35cb00 100644
--- a/xlators/mgmt/glusterd/src/glusterd-log-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-log-ops.c
@@ -1,28 +1,13 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#include "common-utils.h"
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/common-utils.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
#include "glusterd.h"
@@ -30,253 +15,276 @@
#include "glusterd-store.h"
#include "glusterd-utils.h"
#include "glusterd-volgen.h"
+#include "glusterd-messages.h"
+#include <glusterfs/syscall.h>
#include <signal.h>
int
-glusterd_handle_log_rotate (rpcsvc_request_t *req)
+__glusterd_handle_log_rotate(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- gf_cli_req cli_req = {{0,}};
- dict_t *dict = NULL;
- glusterd_op_t cli_op = GD_OP_LOG_ROTATE;
- char *volname = NULL;
-
- GF_ASSERT (req);
-
- if (!xdr_to_generic (req->msg[0], &cli_req,
- (xdrproc_t)xdr_gf_cli_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- if (cli_req.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
-
- ret = dict_unserialize (cli_req.dict.dict_val,
- cli_req.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- }
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_LOG_ROTATE;
+ char *volname = NULL;
+ char msg[64] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(msg, sizeof(msg),
+ "Unable to decode the "
+ "command");
+ goto out;
}
+ }
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "failed to get volname");
- goto out;
- }
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Failed to get volume name");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg);
+ goto out;
+ }
- gf_log ("glusterd", GF_LOG_INFO, "Received log rotate req "
- "for volume %s", volname);
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_LOG_ROTATE_REQ_RECVD,
+ "Received log rotate req "
+ "for volume %s",
+ volname);
- ret = dict_set_uint64 (dict, "rotate-key", (uint64_t)time (NULL));
- if (ret)
- goto out;
+ ret = dict_set_uint64(dict, "rotate-key", (uint64_t)gf_time());
+ if (ret)
+ goto out;
- ret = glusterd_op_begin (req, GD_OP_LOG_ROTATE, dict);
+ ret = glusterd_op_begin_synctask(req, GD_OP_LOG_ROTATE, dict);
out:
- if (ret && dict)
- dict_unref (dict);
-
- glusterd_friend_sm ();
- glusterd_op_sm ();
-
- if (ret)
- ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
- NULL, "operation failed");
+ if (ret) {
+ if (msg[0] == '\0')
+ snprintf(msg, sizeof(msg), "Operation failed");
+ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, msg);
+ }
+
+ free(cli_req.dict.dict_val);
+ return ret;
+}
- if (cli_req.dict.dict_val)
- free (cli_req.dict.dict_val);
- return ret;
+int
+glusterd_handle_log_rotate(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __glusterd_handle_log_rotate);
}
/* op-sm */
int
-glusterd_op_stage_log_rotate (dict_t *dict, char **op_errstr)
+glusterd_op_stage_log_rotate(dict_t *dict, char **op_errstr)
{
- int ret = -1;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- gf_boolean_t exists = _gf_false;
- char msg[2048] = {0};
- char *brick = NULL;
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
- goto out;
- }
-
- exists = glusterd_check_volume_exists (volname);
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (!exists) {
- snprintf (msg, sizeof (msg), "Volume %s does not exist",
- volname);
- gf_log ("", GF_LOG_ERROR, "%s", msg);
- *op_errstr = gf_strdup (msg);
- ret = -1;
- goto out;
- }
-
- if (_gf_false == glusterd_is_volume_started (volinfo)) {
- snprintf (msg, sizeof (msg), "Volume %s needs to be started before"
- " log rotate.", volname);
- gf_log ("", GF_LOG_ERROR, "%s", msg);
- *op_errstr = gf_strdup (msg);
- ret = -1;
- goto out;
- }
-
- ret = dict_get_str (dict, "brick", &brick);
- /* If no brick is specified, do log-rotate for
- all the bricks in the volume */
- if (ret) {
- ret = 0;
- goto out;
- }
-
- if (strchr (brick, ':')) {
- ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo, NULL,
- GF_PATH_COMPLETE);
- if (ret) {
- snprintf (msg, sizeof (msg), "Incorrect brick %s "
- "for volume %s", brick, volname);
- gf_log ("", GF_LOG_ERROR, "%s", msg);
- *op_errstr = gf_strdup (msg);
- goto out;
- }
- }
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2048] = {0};
+ char *brick = NULL;
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Volume %s does not exist", volname);
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", msg);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
+
+ if (_gf_false == glusterd_is_volume_started(volinfo)) {
+ snprintf(msg, sizeof(msg),
+ "Volume %s needs to be started before"
+ " log rotate.",
+ volname);
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_STARTED, "%s", msg);
+ *op_errstr = gf_strdup(msg);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "brick", &brick);
+ /* If no brick is specified, do log-rotate for
+ all the bricks in the volume */
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=brick", NULL);
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, NULL,
+ _gf_false);
+ if (ret) {
+ snprintf(msg, sizeof(msg),
+ "Incorrect brick %s "
+ "for volume %s",
+ brick, volname);
+ gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s",
+ msg);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
- return ret;
+ return ret;
}
-
int
-glusterd_op_log_rotate (dict_t *dict)
+glusterd_op_log_rotate(dict_t *dict)
{
- int ret = -1;
- glusterd_conf_t *priv = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
- xlator_t *this = NULL;
- char *volname = NULL;
- char *brick = NULL;
- char path[PATH_MAX] = {0,};
- char logfile[PATH_MAX] = {0,};
- char pidfile[PATH_MAX] = {0,};
- FILE *file = NULL;
- pid_t pid = 0;
- uint64_t key = 0;
- int valid_brick = 0;
- glusterd_brickinfo_t *tmpbrkinfo = NULL;
-
- this = THIS;
- GF_ASSERT (this);
- priv = this->private;
- GF_ASSERT (priv);
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "volname not found");
- goto out;
- }
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+ char *volname = NULL;
+ char *brick = NULL;
+ char logfile[PATH_MAX] = {
+ 0,
+ };
+ char pidfile[PATH_MAX] = {
+ 0,
+ };
+ FILE *file = NULL;
+ pid_t pid = 0;
+ uint64_t key = 0;
+ int valid_brick = 0;
+ glusterd_brickinfo_t *tmpbrkinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "volname not found");
+ goto out;
+ }
+
+ ret = dict_get_uint64(dict, "rotate-key", &key);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "rotate key not found");
+ goto out;
+ }
+
+ ret = dict_get_str(dict, "brick", &brick);
+ /* If no brick is specified, do log-rotate for
+ all the bricks in the volume */
+ if (ret) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=brick", NULL);
+ goto cont;
+ }
+
+ ret = glusterd_brickinfo_new_from_brick(brick, &tmpbrkinfo, _gf_false,
+ NULL);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_BRICK_NOT_FOUND,
+ "cannot get brickinfo from brick");
+ goto out;
+ }
- ret = dict_get_uint64 (dict, "rotate-key", &key);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "rotate key not found");
- goto out;
+cont:
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret)
+ goto out;
+
+ ret = -1;
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID))
+ continue;
+
+ if (tmpbrkinfo && brick &&
+ (strcmp(tmpbrkinfo->hostname, brickinfo->hostname) ||
+ strcmp(tmpbrkinfo->path, brickinfo->path)))
+ continue;
+
+ valid_brick = 1;
+
+ GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, priv);
+ file = fopen(pidfile, "r+");
+ if (!file) {
+ gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Unable to open pidfile: %s", pidfile);
+ ret = -1;
+ goto out;
}
- ret = dict_get_str (dict, "brick", &brick);
- /* If no brick is specified, do log-rotate for
- all the bricks in the volume */
- if (ret)
- goto cont;
-
- if (!strchr (brick, ':'))
- brick = NULL;
- else {
- ret = glusterd_brickinfo_from_brick (brick, &tmpbrkinfo);
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "cannot get brickinfo from brick");
- goto out;
- }
+ ret = fscanf(file, "%d", &pid);
+ if (ret <= 0) {
+ fclose(file);
+ gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Unable to read pidfile: %s", pidfile);
+ ret = -1;
+ goto out;
}
+ fclose(file);
+ file = NULL;
-cont:
- ret = glusterd_volinfo_find (volname, &volinfo);
+ snprintf(logfile, PATH_MAX, "%s.%" PRIu64, brickinfo->logfile, key);
+
+ ret = sys_rename(brickinfo->logfile, logfile);
if (ret)
- goto out;
+ gf_msg("glusterd", GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+ "rename failed");
- ret = -1;
- list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
- if (uuid_compare (brickinfo->uuid, MY_UUID))
- continue;
-
- if (brick &&
- (strcmp (tmpbrkinfo->hostname, brickinfo->hostname) ||
- strcmp (tmpbrkinfo->path,brickinfo->path)))
- continue;
-
- valid_brick = 1;
-
- GLUSTERD_GET_VOLUME_DIR (path, volinfo, priv);
- GLUSTERD_GET_BRICK_PIDFILE (pidfile, path, brickinfo->hostname,
- brickinfo->path);
-
- file = fopen (pidfile, "r+");
- if (!file) {
- gf_log ("", GF_LOG_ERROR, "Unable to open pidfile: %s",
- pidfile);
- ret = -1;
- goto out;
- }
-
- ret = fscanf (file, "%d", &pid);
- if (ret <= 0) {
- gf_log ("", GF_LOG_ERROR, "Unable to read pidfile: %s",
- pidfile);
- ret = -1;
- goto out;
- }
- fclose (file);
- file = NULL;
-
- snprintf (logfile, PATH_MAX, "%s.%"PRIu64,
- brickinfo->logfile, key);
-
- ret = rename (brickinfo->logfile, logfile);
- if (ret)
- gf_log ("", GF_LOG_WARNING, "rename failed");
-
- ret = kill (pid, SIGHUP);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to SIGHUP to %d", pid);
- goto out;
- }
- ret = 0;
-
- /* If request was for brick, only one iteration is enough */
- if (brick)
- break;
+ ret = kill(pid, SIGHUP);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_PID_KILL_FAIL,
+ "Unable to SIGHUP to %d", pid);
+ goto out;
}
+ ret = 0;
+
+ /* If request was for brick, only one iteration is enough */
+ if (brick)
+ break;
+ }
- if (ret && !valid_brick)
- ret = 0;
+ if (ret && !valid_brick)
+ ret = 0;
out:
- if (tmpbrkinfo)
- glusterd_brickinfo_delete (tmpbrkinfo);
+ if (tmpbrkinfo)
+ glusterd_brickinfo_delete(tmpbrkinfo);
- return ret;
+ return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-mem-types.h b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
index 9031f83a5b0..d7257e1a7b5 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mem-types.h
+++ b/xlators/mgmt/glusterd/src/glusterd-mem-types.h
@@ -1,81 +1,58 @@
/*
- Copyright (c) 2008-2011 Gluster, Inc. <http://www.gluster.com>
+ Copyright (c) 2008-2012 Red Hat, Inc. <http://www.redhat.com>
This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef __GLUSTERD_MEM_TYPES_H__
#define __GLUSTERD_MEM_TYPES_H__
-#include "mem-types.h"
+#include <glusterfs/mem-types.h>
typedef enum gf_gld_mem_types_ {
- gf_gld_mt_dir_entry_t = gf_common_mt_end + 1,
- gf_gld_mt_volfile_ctx = gf_common_mt_end + 2,
- gf_gld_mt_glusterd_state_t = gf_common_mt_end + 3,
- gf_gld_mt_glusterd_conf_t = gf_common_mt_end + 4,
- gf_gld_mt_locker = gf_common_mt_end + 5,
- gf_gld_mt_string = gf_common_mt_end + 6,
- gf_gld_mt_lock_table = gf_common_mt_end + 7,
- gf_gld_mt_char = gf_common_mt_end + 8,
- gf_gld_mt_glusterd_connection_t = gf_common_mt_end + 9,
- gf_gld_mt_resolve_comp = gf_common_mt_end + 10,
- gf_gld_mt_peerinfo_t = gf_common_mt_end + 11,
- gf_gld_mt_friend_sm_event_t = gf_common_mt_end + 12,
- gf_gld_mt_friend_req_ctx_t = gf_common_mt_end + 13,
- gf_gld_mt_friend_update_ctx_t = gf_common_mt_end + 14,
- gf_gld_mt_op_sm_event_t = gf_common_mt_end + 15,
- gf_gld_mt_op_lock_ctx_t = gf_common_mt_end + 16,
- gf_gld_mt_op_stage_ctx_t = gf_common_mt_end + 17,
- gf_gld_mt_op_commit_ctx_t = gf_common_mt_end + 18,
- gf_gld_mt_mop_stage_req_t = gf_common_mt_end + 19,
- gf_gld_mt_probe_ctx_t = gf_common_mt_end + 20,
- gf_gld_mt_create_volume_ctx_t = gf_common_mt_end + 21,
- gf_gld_mt_start_volume_ctx_t = gf_common_mt_end + 22,
- gf_gld_mt_stop_volume_ctx_t = gf_common_mt_end + 23,
- gf_gld_mt_delete_volume_ctx_t = gf_common_mt_end + 24,
- gf_gld_mt_glusterd_volinfo_t = gf_common_mt_end + 25,
- gf_gld_mt_glusterd_brickinfo_t = gf_common_mt_end + 26,
- gf_gld_mt_peer_hostname_t = gf_common_mt_end + 27,
- gf_gld_mt_ifreq = gf_common_mt_end + 28,
- gf_gld_mt_store_handle_t = gf_common_mt_end + 29,
- gf_gld_mt_store_iter_t = gf_common_mt_end + 30,
- gf_gld_mt_defrag_info = gf_common_mt_end + 31,
- gf_gld_mt_log_filename_ctx_t = gf_common_mt_end + 32,
- gf_gld_mt_log_locate_ctx_t = gf_common_mt_end + 33,
- gf_gld_mt_log_rotate_ctx_t = gf_common_mt_end + 34,
- gf_gld_mt_peerctx_t = gf_common_mt_end + 35,
- gf_gld_mt_sm_tr_log_t = gf_common_mt_end + 36,
- gf_gld_mt_pending_node_t = gf_common_mt_end + 37,
- gf_gld_mt_brick_rsp_ctx_t = gf_common_mt_end + 38,
- gf_gld_mt_mop_brick_req_t = gf_common_mt_end + 39,
- gf_gld_mt_op_allack_ctx_t = gf_common_mt_end + 40,
- gf_gld_mt_linearr = gf_common_mt_end + 41,
- gf_gld_mt_linebuf = gf_common_mt_end + 42,
- gf_gld_mt_mount_pattern = gf_common_mt_end + 43,
- gf_gld_mt_mount_comp_container = gf_common_mt_end + 44,
- gf_gld_mt_mount_component = gf_common_mt_end + 45,
- gf_gld_mt_mount_spec = gf_common_mt_end + 46,
- gf_gld_mt_georep_meet_spec = gf_common_mt_end + 47,
- gf_gld_mt_nodesrv_t = gf_common_mt_end + 48,
- gf_gld_mt_charptr = gf_common_mt_end + 49,
- gf_gld_mt_hooks_stub_t = gf_common_mt_end + 50,
- gf_gld_mt_hooks_priv_t = gf_common_mt_end + 51,
- gf_gld_mt_end = gf_common_mt_end + 52,
+ gf_gld_mt_glusterd_conf_t = gf_common_mt_end + 1,
+ gf_gld_mt_char,
+ gf_gld_mt_peerinfo_t,
+ gf_gld_mt_friend_sm_event_t,
+ gf_gld_mt_friend_req_ctx_t,
+ gf_gld_mt_friend_update_ctx_t,
+ gf_gld_mt_op_sm_event_t,
+ gf_gld_mt_op_lock_ctx_t,
+ gf_gld_mt_op_stage_ctx_t,
+ gf_gld_mt_op_commit_ctx_t,
+ gf_gld_mt_mop_stage_req_t,
+ gf_gld_mt_probe_ctx_t,
+ gf_gld_mt_glusterd_volinfo_t,
+ gf_gld_mt_volinfo_dict_data_t,
+ gf_gld_mt_glusterd_brickinfo_t,
+ gf_gld_mt_peer_hostname_t,
+ gf_gld_mt_defrag_info,
+ gf_gld_mt_peerctx_t,
+ gf_gld_mt_sm_tr_log_t,
+ gf_gld_mt_pending_node_t,
+ gf_gld_mt_brick_rsp_ctx_t,
+ gf_gld_mt_mop_brick_req_t,
+ gf_gld_mt_op_allack_ctx_t,
+ gf_gld_mt_linearr,
+ gf_gld_mt_linebuf,
+ gf_gld_mt_mount_pattern,
+ gf_gld_mt_mount_comp_container,
+ gf_gld_mt_mount_spec,
+ gf_gld_mt_georep_meet_spec,
+ gf_gld_mt_charptr,
+ gf_gld_mt_hooks_stub_t,
+ gf_gld_mt_hooks_priv_t,
+ gf_gld_mt_mop_commit_req_t,
+ gf_gld_mt_int,
+ gf_gld_mt_snap_t,
+ gf_gld_mt_missed_snapinfo_t,
+ gf_gld_mt_snap_create_args_t,
+ gf_gld_mt_glusterd_brick_proc_t,
+ gf_gld_mt_glusterd_svc_proc_t,
+ gf_gld_mt_end,
} gf_gld_mem_types_t;
#endif
-
diff --git a/xlators/mgmt/glusterd/src/glusterd-messages.h b/xlators/mgmt/glusterd/src/glusterd-messages.h
new file mode 100644
index 00000000000..3a1e600fb03
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-messages.h
@@ -0,0 +1,451 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_MESSAGES_H_
+#define _GLUSTERD_MESSAGES_H_
+
+#include <glusterfs/glfs-message-id.h>
+
+/* To add new message IDs, append new identifiers at the end of the list.
+ *
+ * Never remove a message ID. If it's not used anymore, you can rename it or
+ * leave it as it is, but not delete it. This is to prevent reutilization of
+ * IDs by other messages.
+ *
+ * The component name must match one of the entries defined in
+ * glfs-message-id.h.
+ */
+
+GLFS_MSGID(
+ GLUSTERD, GD_MSG_SERVER_QUORUM_NOT_MET,
+ GD_MSG_SERVER_QUORUM_LOST_STOPPING_BRICKS,
+ GD_MSG_SERVER_QUORUM_MET_STARTING_BRICKS, GD_MSG_PEER_DISCONNECTED,
+ GD_MSG_BRICK_DISCONNECTED, GD_MSG_NODE_DISCONNECTED,
+ GD_MSG_REBALANCE_DISCONNECTED, GD_MSG_VOL_CLEANUP_FAIL,
+ GD_MSG_VOL_VERS_MISMATCH, GD_MSG_CKSUM_VERS_MISMATCH,
+ GD_MSG_QUOTA_CONFIG_VERS_MISMATCH, GD_MSG_QUOTA_CONFIG_CKSUM_MISMATCH,
+ GD_MSG_BRICK_STOP_FAIL, GD_MSG_SVC_KILL_FAIL, GD_MSG_PID_KILL_FAIL,
+ GD_MSG_REBAL_NO_SOCK_FILE, GD_MSG_UNIX_OP_BUILD_FAIL,
+ GD_MSG_RPC_CREATE_FAIL, GD_MSG_FAIL_DEFAULT_OPT_SET,
+ GD_MSG_CLUSTER_UNLOCK_FAILED, GD_MSG_NO_MEMORY, GD_MSG_UNSUPPORTED_VERSION,
+ GD_MSG_COMMAND_NOT_FOUND, GD_MSG_SNAPSHOT_OP_FAILED, GD_MSG_INVALID_ENTRY,
+ GD_MSG_VOL_NOT_FOUND, GD_MSG_REG_COMPILE_FAILED, GD_MSG_FILE_OP_FAILED,
+ GD_MSG_SNAP_CREATION_FAIL, GD_MSG_VOL_OP_FAILED, GD_MSG_CREATE_DIR_FAILED,
+ GD_MSG_DIR_OP_FAILED, GD_MSG_VOL_STOP_FAILED, GD_MSG_NO_CLI_RESP,
+ GD_MSG_LOCK_INIT_FAILED, GD_MSG_SNAP_LIST_GET_FAIL, GD_MSG_UNOUNT_FAILED,
+ GD_MSG_LOCK_DESTROY_FAILED, GD_MSG_SNAP_CLEANUP_FAIL,
+ GD_MSG_SNAP_ACTIVATE_FAIL, GD_MSG_SNAP_DEACTIVATE_FAIL,
+ GD_MSG_SNAP_RESTORE_FAIL, GD_MSG_SNAP_REMOVE_FAIL, GD_MSG_SNAP_CONFIG_FAIL,
+ GD_MSG_SNAP_STATUS_FAIL, GD_MSG_SNAP_INIT_FAIL, GD_MSG_VOLINFO_SET_FAIL,
+ GD_MSG_VOLINFO_GET_FAIL, GD_MSG_BRICK_CREATION_FAIL,
+ GD_MSG_BRICK_GET_INFO_FAIL, GD_MSG_BRICK_NEW_INFO_FAIL, GD_MSG_LVS_FAIL,
+ GD_MSG_SET_XATTR_FAIL, GD_MSG_UMOUNTING_SNAP_BRICK, GD_MSG_OP_UNSUPPORTED,
+ GD_MSG_SNAP_NOT_FOUND, GD_MSG_FS_LABEL_UPDATE_FAIL, GD_MSG_LVM_MOUNT_FAILED,
+ GD_MSG_DICT_SET_FAILED, GD_MSG_CANONICALIZE_FAIL, GD_MSG_DICT_GET_FAILED,
+ GD_MSG_SNAP_INFO_FAIL, GD_MSG_SNAP_VOL_CONFIG_FAIL,
+ GD_MSG_SNAP_OBJECT_STORE_FAIL, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ GD_MSG_SNAP_RESTORE_REVERT_FAIL, GD_MSG_SNAP_LIST_SET_FAIL,
+ GD_MSG_VOLFILE_CREATE_FAIL, GD_MSG_VOLINFO_REMOVE_FAIL,
+ GD_MSG_VOL_DELETE_FAIL, GD_MSG_SNAPSHOT_PENDING,
+ GD_MSG_BRICK_PATH_UNMOUNTED, GD_MSG_BRICK_ADD_FAIL,
+ GD_MSG_BRICK_SET_INFO_FAIL, GD_MSG_LVCREATE_FAIL, GD_MSG_VG_GET_FAIL,
+ GD_MSG_TPOOL_GET_FAIL, GD_MSG_LVM_REMOVE_FAILED,
+ GD_MSG_MISSEDSNAP_INFO_SET_FAIL, GD_MSG_BRK_MOUNTOPTS_FAIL,
+ GD_MSG_MISSED_SNAP_LIST_STORE_FAIL, GD_MSG_INVALID_MISSED_SNAP_ENTRY,
+ GD_MSG_MISSED_SNAP_GET_FAIL, GD_MSG_MISSED_SNAP_CREATE_FAIL,
+ GD_MSG_DUP_ENTRY, GD_MSG_MISSED_SNAP_STATUS_DONE, GD_MSG_NO_EXEC_PERMS,
+ GD_MSG_GLOBAL_OP_VERSION_SET_FAIL, GD_MSG_HARD_LIMIT_SET_FAIL,
+ GD_MSG_OP_SUCCESS, GD_MSG_STORE_FAIL, GD_MSG_GLOBAL_OP_VERSION_GET_FAIL,
+ GD_MSG_GEOREP_GET_FAILED, GD_MSG_GLUSTERD_UMOUNT_FAIL,
+ GD_MSG_QUORUM_CHECK_FAIL, GD_MSG_QUORUM_COUNT_IGNORED,
+ GD_MSG_SNAP_MOUNT_FAIL, GD_MSG_RSP_DICT_USE_FAIL, GD_MSG_SNAP_IMPORT_FAIL,
+ GD_MSG_SNAP_CONFLICT, GD_MSG_MISSED_SNAP_DELETE,
+ GD_MSG_QUOTA_CONFIG_IMPORT_FAIL, GD_MSG_SNAPDIR_CREATE_FAIL,
+ GD_MSG_MISSED_SNAP_PRESENT, GD_MSG_UUID_NULL, GD_MSG_TSTAMP_SET_FAIL,
+ GD_MSG_RESP_AGGR_FAIL, GD_MSG_DICT_EMPTY, GD_MSG_DICT_CREATE_FAIL,
+ GD_MSG_SNAPD_STOP_FAIL, GD_MSG_SOFT_LIMIT_REACHED, GD_MSG_SNAPD_START_FAIL,
+ GD_MSG_SNAPD_CREATE_FAIL, GD_MSG_SNAPD_INIT_FAIL, GD_MSG_MGMTV3_OP_FAIL,
+ GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL, GD_MSG_MGMTV3_UNLOCK_FAIL,
+ GD_MSG_MGMTV3_LOCK_GET_FAIL, GD_MSG_MGMTV3_LOCKDOWN_FAIL,
+ GD_MSG_POST_VALIDATION_FAIL, GD_MSG_PRE_VALIDATION_FAIL,
+ GD_MSG_COMMIT_OP_FAIL, GD_MSG_PEER_LIST_CREATE_FAIL, GD_MSG_BRICK_OP_FAIL,
+ GD_MSG_OPINFO_SET_FAIL, GD_MSG_OP_EVENT_UNLOCK_FAIL,
+ GD_MSG_MGMTV3_OP_RESP_FAIL, GD_MSG_PEER_NOT_FOUND, GD_MSG_REQ_DECODE_FAIL,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, GD_MSG_ALREADY_STOPPED,
+ GD_MSG_PRE_VALD_RESP_FAIL, GD_MSG_SVC_GET_FAIL, GD_MSG_VOLFILE_NOT_FOUND,
+ GD_MSG_OP_EVENT_LOCK_FAIL, GD_MSG_NON_STRIPE_VOL, GD_MSG_SNAPD_OBJ_GET_FAIL,
+ GD_MSG_QUOTA_DISABLED, GD_MSG_CACHE_MINMAX_SIZE_INVALID,
+ GD_MSG_QUOTA_GET_STAT_FAIL, GD_MSG_SUBVOLUMES_EXCEED, GD_MSG_BRICK_ADD,
+ GD_MSG_BRICK_REMOVE, GD_MSG_CREATE_KEY_FAIL,
+ GD_MSG_MULTIPLE_LOCK_ACQUIRE_FAIL, GD_MSG_MULTIPLE_LOCK_RELEASE_FAIL,
+ GD_MSG_RESP_FROM_UNKNOWN_PEER, GD_MSG_BRICK_MOUNDIRS_AGGR_FAIL,
+ GD_MSG_GFID_VALIDATE_SET_FAIL, GD_MSG_PEER_LOCK_FAIL,
+ GD_MSG_PEER_UNLOCK_FAIL, GD_MSG_MGMT_OP_FAIL,
+ GD_MSG_TRANS_OPINFO_CLEAR_FAIL, GD_MSG_GLUSTERD_LOCK_FAIL,
+ GD_MSG_TRANS_OPINFO_SET_FAIL, GD_MSG_TRANS_IDGEN_FAIL, GD_MSG_RPC_FAILURE,
+ GD_MSG_OP_VERS_ADJUST_FAIL, GD_MSG_SNAP_DEVICE_NAME_GET_FAIL,
+ GD_MSG_SNAP_STATUS_NOT_PENDING, GD_MSG_MGMT_PGM_SET_FAIL,
+ GD_MSG_EVENT_INJECT_FAIL, GD_MSG_VERS_INFO, GD_MSG_VOL_INFO_REQ_RECVD,
+ GD_MSG_VERS_GET_FAIL, GD_MSG_EVENT_NEW_GET_FAIL, GD_MSG_RPC_LAYER_ERROR,
+ GD_MSG_NO_HANDSHAKE_ACK, GD_MSG_OP_VERSION_MISMATCH,
+ GD_MSG_HANDSHAKE_REQ_REJECTED, GD_MSG_UNKNOWN_MODE,
+ GD_MSG_DEFRAG_STATUS_UPDATED, GD_MSG_NO_FLAG_SET,
+ GD_MSG_VERSION_UNSUPPORTED, GD_MSG_UUID_SET_FAIL, GD_MSG_MOUNT_REQ_FAIL,
+ GD_MSG_GLUSTERD_GLOBAL_INFO_STORE_FAIL, GD_MSG_OP_VERS_STORE_FAIL,
+ GD_MSG_SNAP_AUTOMIC_UPDATE_FAIL, GD_MSG_SNAPINFO_WRITE_FAIL,
+ GD_MSG_SNAPINFO_CREATE_FAIL, GD_MSG_SNAPD_INFO_STORE_FAIL,
+ GD_MSG_BRK_MNTPATH_MOUNT_FAIL, GD_MSG_BRK_MNTPATH_GET_FAIL,
+ GD_MSG_SNAP_BRK_MNT_RECREATE_FAIL, GD_MSG_SNAP_RESOLVE_BRICK_FAIL,
+ GD_MSG_RESOLVE_BRICK_FAIL, GD_MSG_BRK_MNT_RECREATE_FAIL,
+ GD_MSG_TMP_FILE_UNLINK_FAIL, GD_MSG_VOL_VALS_WRITE_FAIL,
+ GD_MSG_STORE_HANDLE_GET_FAIL, GD_MSG_STORE_HANDLE_WRITE_FAIL,
+ GD_MSG_MISSED_SNAP_LIST_STORE_HANDLE_GET_FAIL,
+ GD_MSG_MISSED_SNAP_LIST_EMPTY, GD_MSG_SNAP_VOL_RETRIEVE_FAIL,
+ GD_MSG_SNAPSHOT_UPDATE_FAIL, GD_MSG_SNAPD_PORT_STORE_FAIL,
+ GD_MSG_CKSUM_STORE_FAIL, GD_MSG_STORE_HANDLE_CREATE_FAIL,
+ GD_MSG_HANDLE_NULL, GD_MSG_VOL_RESTORE_FAIL, GD_MSG_NAME_TOO_LONG,
+ GD_MSG_UUID_PARSE_FAIL, GD_MSG_UNKNOWN_KEY, GD_MSG_STORE_ITER_DESTROY_FAIL,
+ GD_MSG_STORE_ITER_GET_FAIL, GD_MSG_VOLINFO_UPDATE_FAIL,
+ GD_MSG_PARSE_BRICKINFO_FAIL, GD_MSG_VERS_STORE_FAIL, GD_MSG_HEADER_ADD_FAIL,
+ GD_MSG_QUOTA_CONF_WRITE_FAIL, GD_MSG_QUOTA_CONF_CORRUPT, GD_MSG_FORK_FAIL,
+ GD_MSG_CKSUM_COMPUTE_FAIL, GD_MSG_VERS_CKSUM_STORE_FAIL,
+ GD_MSG_GET_XATTR_FAIL, GD_MSG_CONVERSION_FAILED, GD_MSG_VOL_NOT_DISTRIBUTE,
+ GD_MSG_VOL_STOPPED, GD_MSG_OPCTX_GET_FAIL, GD_MSG_TASKID_GEN_FAIL,
+ GD_MSG_REBALANCE_ID_MISSING, GD_MSG_NO_REBALANCE_PFX_IN_VOLNAME,
+ GD_MSG_DEFRAG_STATUS_UPDATE_FAIL, GD_MSG_UUID_GEN_STORE_FAIL,
+ GD_MSG_UUID_STORE_FAIL, GD_MSG_NO_INIT, GD_MSG_MODULE_NOT_INSTALLED,
+ GD_MSG_MODULE_NOT_WORKING, GD_MSG_WRITE_ACCESS_GRANT_FAIL,
+ GD_MSG_DIRPATH_TOO_LONG, GD_MSG_LOGGROUP_INVALID, GD_MSG_DIR_PERM_LIBERAL,
+ GD_MSG_DIR_PERM_STRICT, GD_MSG_MOUNT_SPEC_INSTALL_FAIL,
+ GD_MSG_GLUSTERD_SOCK_LISTENER_START_FAIL, GD_MSG_DIR_NOT_FOUND,
+ GD_MSG_FAILED_INIT_SHDSVC, GD_MSG_FAILED_INIT_NFSSVC,
+ GD_MSG_FAILED_INIT_QUOTASVC, GD_MSG_RPC_INIT_FAIL,
+ GD_MSG_RPCSVC_REG_NOTIFY_RETURNED, GD_MSG_RPC_TRANSPORT_COUNT_GET_FAIL,
+ GD_MSG_RPC_LISTENER_CREATE_FAIL, GD_MSG_OP_VERS_RESTORE_FAIL,
+ GD_MSG_SELF_HEALD_DISABLED, GD_MSG_PRIV_NULL, GD_MSG_GSYNC_VALIDATION_FAIL,
+ GD_MSG_SLAVE_CONFPATH_DETAILS_FETCH_FAIL, GD_MSG_OP_NOT_PERMITTED_AC_REQD,
+ GD_MSG_OP_NOT_PERMITTED, GD_MSG_REBALANCE_START_FAIL,
+ GD_MSG_NFS_RECONF_FAIL, GD_MSG_REMOVE_BRICK_ID_SET_FAIL,
+ GD_MSG_BRICK_MOUNTDIR_GET_FAIL, GD_MSG_BRICK_NOT_FOUND,
+ GD_MSG_BRKPATH_TOO_LONG, GD_MSG_CLRLOCKS_CLNT_UMOUNT_FAIL,
+ GD_MSG_CLRLOCKS_CLNT_MOUNT_FAIL, GD_MSG_CLRLOCKS_MOUNTDIR_CREATE_FAIL,
+ GD_MSG_BRK_PORT_NUM_GET_FAIL, GD_MSG_BRK_STATEDUMP_FAIL,
+ GD_MSG_VOL_GRAPH_CHANGE_NOTIFY_FAIL, GD_MSG_INVALID_VG,
+ GD_MSG_GLUSTERD_OP_FAILED, GD_MSG_HOSTNAME_ADD_TO_PEERLIST_FAIL,
+ GD_MSG_STALE_PEERINFO_REMOVE_FAIL, GD_MSG_TRANS_ID_GET_FAIL,
+ GD_MSG_RES_DECODE_FAIL, GD_MSG_VOL_ALREADY_EXIST, GD_MSG_BAD_BRKORDER,
+ GD_MSG_BAD_BRKORDER_CHECK_FAIL, GD_MSG_BRICK_SELECT_FAIL,
+ GD_MSG_NO_LOCK_RESP_FROM_PEER, GD_MSG_MGMTV3_LOCK_FROM_UUID_REJCT,
+ GD_MSG_STAGE_FROM_UUID_REJCT, GD_MSG_UNLOCK_FROM_UUID_REJCT,
+ GD_MSG_MGMTV3_UNLOCK_FROM_UUID_REJCT, GD_MSG_COMMIT_FROM_UUID_REJCT,
+ GD_MSG_VOL_NOT_STARTED, GD_MSG_VOL_NOT_REPLICA, GD_MSG_VOL_NOT_DISPERSE,
+ GD_MSG_OLD_REMOVE_BRICK_EXISTS, GD_MSG_USE_THE_FORCE, GD_MSG_OIP,
+ GD_MSG_OIP_RETRY_LATER, GD_MSG_GSYNC_RESTART_FAIL,
+ GD_MSG_LOCK_FROM_UUID_REJCT, GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL,
+ GD_MSG_HOSTNAME_RESOLVE_FAIL, GD_MSG_COUNT_VALIDATE_FAILED,
+ GD_MSG_SPAWNING_CHILD_FAILED, GD_MSG_READ_CHILD_DATA_FAILED,
+ GD_MSG_DEFAULT_TEMP_CONFIG, GD_MSG_PIDFILE_CREATE_FAILED,
+ GD_MSG_GSYNCD_SPAWN_FAILED, GD_MSG_SUBOP_NOT_FOUND, GD_MSG_RESERVED_OPTION,
+ GD_MSG_GLUSTERD_PRIV_NOT_FOUND, GD_MSG_SLAVEINFO_FETCH_ERROR,
+ GD_MSG_VALIDATE_FAILED, GD_MSG_INVOKE_ERROR, GD_MSG_SESSION_CREATE_ERROR,
+ GD_MSG_STOP_FORCE, GD_MSG_GET_CONFIG_INFO_FAILED,
+ GD_MSG_STAT_FILE_READ_FAILED, GD_MSG_CONF_PATH_ASSIGN_FAILED,
+ GD_MSG_SESSION_INACTIVE, GD_MSG_PIDFILE_NOT_FOUND, GD_MSG_PEER_CMD_ERROR,
+ GD_MSG_SRC_FILE_ERROR, GD_MSG_GET_STATEFILE_NAME_FAILED, GD_MSG_STATUS_NULL,
+ GD_MSG_STATUSFILE_CREATE_FAILED, GD_MSG_SLAVE_URL_INVALID,
+ GD_MSG_INVALID_SLAVE, GD_MSG_READ_ERROR, GD_MSG_ARG_FETCH_ERROR,
+ GD_MSG_REG_FILE_MISSING, GD_MSG_STATEFILE_NAME_NOT_FOUND,
+ GD_MSG_GEO_REP_START_FAILED, GD_MSG_GSYNCD_ERROR,
+ GD_MSG_UPDATE_STATEFILE_FAILED, GD_MSG_STATUS_UPDATE_FAILED,
+ GD_MSG_GSYNCD_OP_SET_FAILED, GD_MSG_BUFFER_EMPTY, GD_MSG_CONFIG_INFO,
+ GD_MSG_FETCH_CONFIG_VAL_FAILED, GD_MSG_GSYNCD_PARSE_ERROR,
+ GD_MSG_SESSION_ALREADY_EXIST, GD_MSG_FORCE_CREATE_SESSION,
+ GD_MSG_GET_KEY_FAILED, GD_MSG_SESSION_DEL_FAILED, GD_MSG_CMD_EXEC_FAIL,
+ GD_MSG_STRDUP_FAILED, GD_MSG_UNABLE_TO_END, GD_MSG_PAUSE_FAILED,
+ GD_MSG_NORMALIZE_URL_FAIL, GD_MSG_MODULE_ERROR,
+ GD_MSG_SLAVEINFO_STORE_ERROR, GD_MSG_MARKER_START_FAIL,
+ GD_MSG_RESUME_FAILED, GD_MSG_GLUSTERFS_START_FAIL,
+ GD_MSG_GLUSTERFS_STOP_FAIL, GD_MSG_RBOP_STATE_STORE_FAIL,
+ GD_MSG_PUMP_XLATOR_DISABLED, GD_MSG_ABORT_OP_FAIL, GD_MSG_PAUSE_OP_FAIL,
+ GD_MSG_GLUSTER_SERVICE_START_FAIL, GD_MSG_HANDSHAKE_FAILED,
+ GD_MSG_CLI_REQ_EMPTY, GD_MSG_PEER_ADD_FAIL,
+ GD_MSG_SYNC_FROM_LOCALHOST_UNALLOWED, GD_MSG_UUIDS_SAME_RETRY,
+ GD_MSG_TSP_ALREADY_FORMED, GD_MSG_VOLS_ALREADY_PRESENT,
+ GD_MSG_REQ_CTX_CREATE_FAIL, GD_MSG_PEER_INFO_UPDATE_FAIL,
+ GD_MSG_PEERINFO_CREATE_FAIL, GD_MSG_REQ_FROM_UNKNOWN_PEER,
+ GD_MSG_STATUS_REPLY_STRING_CREATE_FAIL, GD_MSG_TOKENIZE_FAIL,
+ GD_MSG_LAZY_UMOUNT_FAIL, GD_MSG_NFS_SERVER_START_FAIL,
+ GD_MSG_GLUSTER_SERVICES_STOP_FAIL, GD_MSG_BRK_CLEANUP_FAIL,
+ GD_MSG_RB_ALREADY_STARTED, GD_MSG_RB_BRICKINFO_GET_FAIL, GD_MSG_BAD_FORMAT,
+ GD_MSG_RB_CMD_FAIL, GD_MSG_RB_NOT_STARTED_OR_PAUSED, GD_MSG_RB_NOT_STARTED,
+ GD_MSG_RB_PAUSED_ALREADY, GD_MSG_NO_FREE_PORTS,
+ GD_MSG_EVENT_STATE_TRANSITION_FAIL, GD_MSG_HANDLER_RETURNED,
+ GD_MSG_SNAP_COMPARE_CONFLICT, GD_MSG_PEER_DETACH_CLEANUP_FAIL,
+ GD_MSG_STALE_VOL_REMOVE_FAIL, GD_MSG_AC_ERROR, GD_MSG_LOCK_FAIL,
+ GD_MSG_MGMTV3_LOCK_REQ_SEND_FAIL, GD_MSG_GLUSTERD_UNLOCK_FAIL,
+ GD_MSG_RBOP_START_FAIL, GD_MSG_UNKNOWN_RESPONSE,
+ GD_MSG_COMMIT_REQ_SEND_FAIL, GD_MSG_OPCTX_UPDATE_FAIL, GD_MSG_OPCTX_NULL,
+ GD_MSG_DICT_COPY_FAIL, GD_MSG_SHD_STATUS_SET_FAIL,
+ GD_MSG_REPLICA_INDEX_GET_FAIL, GD_MSG_NFS_SERVER_NOT_RUNNING,
+ GD_MSG_STAGE_REQ_SEND_FAIL, GD_MSG_LOCK_REQ_SEND_FAIL,
+ GD_MSG_VOLNAMES_GET_FAIL, GD_MSG_NO_TASK_ID, GD_MSG_ADD_REMOVE_BRICK_FAIL,
+ GD_MSG_SVC_RESTART_FAIL, GD_MSG_VOL_SET_FAIL, GD_MSG_QUOTAD_NOT_RUNNING,
+ GD_MSG_XLATOR_COUNT_GET_FAIL, GD_MSG_TRANS_OPINFO_GET_FAIL,
+ GD_MSG_TRANS_ID_INVALID, GD_MSG_NO_OPTIONS_GIVEN, GD_MSG_SNAPD_NOT_RUNNING,
+ GD_MSG_ADD_ADDRESS_TO_PEER_FAIL, GD_MSG_PEER_ADDRESS_GET_FAIL,
+ GD_MSG_GETADDRINFO_FAIL, GD_MSG_PEERINFO_DELETE_FAIL, GD_MSG_KEY_NULL,
+ GD_MSG_SPAWN_SVCS_FAIL, GD_MSG_DICT_ITER_FAIL,
+ GD_MSG_TASK_STATUS_UPDATE_FAIL, GD_MSG_VOL_ID_MISMATCH,
+ GD_MSG_STR_TO_BOOL_FAIL, GD_MSG_RB_MNT_BRICKS_MISMATCH,
+ GD_MSG_RB_SRC_BRICKS_MISMATCH, GD_MSG_MNTENTRY_GET_FAIL,
+ GD_MSG_INODE_SIZE_GET_FAIL, GD_MSG_NO_STATEFILE_ENTRY,
+ GD_MSG_PMAP_UNSET_FAIL, GD_MSG_GLOBAL_OPT_IMPORT_FAIL,
+ GD_MSD_BRICK_DISCONNECT_FAIL, GD_MSG_SNAP_DETAILS_IMPORT_FAIL,
+ GD_MSG_BRICKINFO_CREATE_FAIL, GD_MSG_QUOTA_CKSUM_VER_STORE_FAIL,
+ GD_MSG_CKSUM_GET_FAIL, GD_MSG_BRICKPATH_ROOT_GET_FAIL,
+ GD_MSG_HOSTNAME_TO_UUID_FAIL, GD_MSG_REPLY_SUBMIT_FAIL,
+ GD_MSG_SERIALIZE_MSG_FAIL, GD_MSG_ENCODE_FAIL,
+ GD_MSG_RB_DST_BRICKS_MISMATCH, GD_MSG_XLATOR_VOLOPT_DYNLOAD_ERROR,
+ GD_MSG_VOLNAME_NOTFOUND_IN_DICT, GD_MSG_FLAGS_NOTFOUND_IN_DICT,
+ GD_MSG_HOSTNAME_NOTFOUND_IN_DICT, GD_MSG_PORT_NOTFOUND_IN_DICT,
+ GD_MSG_CMDSTR_NOTFOUND_IN_DICT, GD_MSG_SNAP_OBJ_NEW_FAIL,
+ GD_MSG_SNAP_BACKEND_MAKE_FAIL, GD_MSG_SNAP_CLONE_FAILED,
+ GD_MSG_SNAP_CLONE_PREVAL_FAILED, GD_MSG_SNAP_CLONE_POSTVAL_FAILED,
+ GD_MSG_VOLINFO_STORE_FAIL, GD_MSG_NEW_FRIEND_SM_EVENT_GET_FAIL,
+ GD_MSG_VOL_TYPE_CHANGING_INFO, GD_MSG_BRKPATH_MNTPNT_MISMATCH,
+ GD_MSG_TASKS_COUNT_MISMATCH, GD_MSG_WRONG_OPTS_SETTING,
+ GD_MSG_PATH_ALREADY_PART_OF_VOL, GD_MSG_BRICK_VALIDATE_FAIL,
+ GD_MSG_READIN_FILE_FAILED, GD_MSG_IMPORT_PRDICT_DICT,
+ GD_MSG_VOL_OPTS_IMPORT_FAIL, GD_MSG_BRICK_IMPORT_FAIL,
+ GD_MSG_VOLINFO_IMPORT_FAIL, GD_MSG_BRICK_ID_GEN_FAILED,
+ GD_MSG_GET_STATUS_DATA_FAIL, GD_MSG_BITROT_NOT_RUNNING,
+ GD_MSG_SCRUBBER_NOT_RUNNING, GD_MSG_SRC_BRICK_PORT_UNAVAIL,
+ GD_MSG_BITD_INIT_FAIL, GD_MSG_SCRUB_INIT_FAIL, GD_MSG_VAR_RUN_DIR_INIT_FAIL,
+ GD_MSG_VAR_RUN_DIR_FIND_FAIL, GD_MSG_SCRUBSVC_RECONF_FAIL,
+ GD_MSG_BITDSVC_RECONF_FAIL, GD_MSG_NFS_GNS_START_FAIL,
+ GD_MSG_NFS_GNS_SETUP_FAIL, GD_MSG_UNRECOGNIZED_SVC_MNGR,
+ GD_MSG_NFS_GNS_OP_HANDLE_FAIL, GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ GD_MSG_NFS_GNS_HOST_FOUND, GD_MSG_REBALANCE_CMD_IN_TIER_VOL,
+ GD_MSG_INCOMPATIBLE_VALUE, GD_MSG_GENERATED_UUID,
+ GD_MSG_FILE_DESC_LIMIT_SET, GD_MSG_CURR_WORK_DIR_INFO,
+ GD_MSG_STRIPE_COUNT_CHANGE_INFO, GD_MSG_REPLICA_COUNT_CHANGE_INFO,
+ GD_MSG_ADD_BRICK_REQ_RECVD, GD_MSG_VOL_ALREADY_TIER,
+ GD_MSG_REM_BRICK_REQ_RECVD, GD_MSG_VOL_NOT_TIER,
+ GD_MSG_LOG_ROTATE_REQ_RECVD, GD_MSG_CLI_REQ_RECVD, GD_MSG_GET_VOL_REQ_RCVD,
+ GD_MSG_VOL_SYNC_REQ_RCVD, GD_MSG_PROBE_RCVD, GD_MSG_UNFRIEND_REQ_RCVD,
+ GD_MSG_FRIEND_UPDATE_RCVD, GD_MSG_RESPONSE_INFO,
+ GD_MSG_VOL_PROFILE_REQ_RCVD, GD_MSG_GETWD_REQ_RCVD, GD_MSG_MOUNT_REQ_RCVD,
+ GD_MSG_UMOUNT_REQ_RCVD, GD_MSG_CONNECT_RETURNED, GD_MSG_STATUS_VOL_REQ_RCVD,
+ GD_MSG_CLRCLK_VOL_REQ_RCVD, GD_MSG_BARRIER_VOL_REQ_RCVD,
+ GD_MSG_UUID_RECEIVED, GD_MSG_REPLACE_BRK_COMMIT_FORCE_REQ_RCVD,
+ GD_MSG_BRK_PORT_NO_ADD_INDO, GD_MSG_REPLACE_BRK_REQ_RCVD,
+ GD_MSG_ADD_OP_ARGS_FAIL, GD_MSG_POST_HOOK_STUB_INIT_FAIL,
+ GD_MSG_HOOK_STUB_NULL, GD_MSG_SPAWN_THREADS_FAIL,
+ GD_MSG_STALE_VOL_DELETE_INFO, GD_MSG_PROBE_REQ_RESP_RCVD,
+ GD_MSG_HOST_PRESENT_ALREADY, GD_MSG_OP_VERS_INFO, GD_MSG_OP_VERS_SET_INFO,
+ GD_MSG_NEW_NODE_STATE_CREATION, GD_MSG_ALREADY_MOUNTED,
+ GD_MSG_SHARED_STRG_VOL_OPT_VALIDATE_FAIL, GD_MSG_NFS_GNS_STOP_FAIL,
+ GD_MSG_NFS_GNS_RESET_FAIL, GD_MSG_SHARED_STRG_SET_FAIL,
+ GD_MSG_VOL_TRANSPORT_TYPE_CHANGE, GD_MSG_PEER_COUNT_GET_FAIL,
+ GD_MSG_INSUFFICIENT_UP_NODES, GD_MSG_OP_STAGE_STATS_VOL_FAIL,
+ GD_MSG_VOL_ID_SET_FAIL, GD_MSG_OP_STAGE_RESET_VOL_FAIL,
+ GD_MSG_OP_STAGE_BITROT_FAIL, GD_MSG_OP_STAGE_QUOTA_FAIL,
+ GD_MSG_OP_STAGE_DELETE_VOL_FAIL, GD_MSG_HANDLE_HEAL_CMD_FAIL,
+ GD_MSG_CLRCLK_SND_CMD_FAIL, GD_MSG_DISPERSE_CLUSTER_FOUND,
+ GD_MSG_HEAL_VOL_REQ_RCVD, GD_MSG_STATEDUMP_VOL_REQ_RCVD,
+ GD_MSG_THINPOOLS_FOR_THINLVS, GD_MSG_OP_STAGE_CREATE_VOL_FAIL,
+ GD_MSG_OP_STAGE_START_VOL_FAIL, GD_MSG_NFS_GNS_UNEXPRT_VOL_FAIL,
+ GD_MSG_TASK_ID_INFO, GD_MSG_DEREGISTER_SUCCESS, GD_MSG_STATEDUMP_OPTS_RCVD,
+ GD_MSG_STATEDUMP_INFO, GD_MSG_RECOVERING_CORRUPT_CONF,
+ GD_MSG_RETRIEVED_UUID, GD_MSG_XLATOR_CREATE_FAIL,
+ GD_MSG_GRAPH_ENTRY_ADD_FAIL, GD_MSG_ERROR_ENCOUNTERED,
+ GD_MSG_FILTER_RUN_FAILED, GD_MSG_DEFAULT_OPT_INFO,
+ GD_MSG_MARKER_STATUS_GET_FAIL, GD_MSG_MARKER_DISABLE_FAIL,
+ GD_MSG_GRAPH_FEATURE_ADD_FAIL, GD_MSG_XLATOR_SET_OPT_FAIL,
+ GD_MSG_BUILD_GRAPH_FAILED, GD_MSG_XML_TEXT_WRITE_FAIL,
+ GD_MSG_XML_DOC_START_FAIL, GD_MSG_XML_ELE_CREATE_FAIL,
+ GD_MSG_VOLUME_INCONSISTENCY, GD_MSG_XLATOR_LINK_FAIL,
+ GD_MSG_REMOTE_HOST_GET_FAIL, GD_MSG_GRAPH_SET_OPT_FAIL,
+ GD_MSG_ROOT_SQUASH_ENABLED, GD_MSG_ROOT_SQUASH_FAILED,
+ GD_MSG_LOCK_OWNER_MISMATCH, GD_MSG_LOCK_NOT_HELD, GD_MSG_LOCK_ALREADY_HELD,
+ GD_MSG_SVC_START_SUCCESS, GD_MSG_SVC_STOP_SUCCESS, GD_MSG_PARAM_NULL,
+ GD_MSG_SVC_STOP_FAIL, GD_MSG_SHARED_STORAGE_DOES_NOT_EXIST,
+ GD_MSG_SNAP_PAUSE_TIER_FAIL, GD_MSG_SNAP_RESUME_TIER_FAIL,
+ GD_MSG_FILE_NOT_FOUND, GD_MSG_RETRY_WITH_NEW_PORT,
+ GD_MSG_REMOTE_VOL_UUID_FAIL, GD_MSG_SLAVE_VOL_PARSE_FAIL,
+ GD_MSG_DICT_GET_SUCCESS, GD_MSG_PMAP_REGISTRY_REMOVE_FAIL,
+ GD_MSG_MNTBROKER_LABEL_NULL, GD_MSG_MNTBROKER_LABEL_MISS,
+ GD_MSG_MNTBROKER_SPEC_MISMATCH, GD_MSG_SYSCALL_FAIL,
+ GD_MSG_DAEMON_STATE_REQ_RCVD, GD_MSG_BRICK_CLEANUP_SUCCESS,
+ GD_MSG_STATE_STR_GET_FAILED, GD_MSG_RESET_BRICK_COMMIT_FORCE_REQ_RCVD,
+ GD_MSG_RESET_BRICK_CMD_FAIL, GD_MSG_TIERD_STOP_FAIL,
+ GD_MSG_TIERD_CREATE_FAIL, GD_MSG_TIERD_START_FAIL,
+ GD_MSG_TIERD_OBJ_GET_FAIL, GD_MSG_TIERD_NOT_RUNNING, GD_MSG_TIERD_INIT_FAIL,
+ GD_MSG_BRICK_MX_SET_FAIL, GD_MSG_NO_SIG_TO_PID_ZERO,
+ GD_MSG_TIER_WATERMARK_RESET_FAIL, GD_MSG_CLIENTS_GET_STATE_FAILED,
+ GD_MSG_GNFS_XLATOR_NOT_INSTALLED, GD_MSG_PIDFILE_UNLINKING,
+ GD_MSG_VOL_SET_VALIDATION_INFO, GD_MSG_NO_MUX_LIMIT,
+ GD_MSG_BRICKPROC_REM_BRICK_FAILED, GD_MSG_BRICKPROC_ADD_BRICK_FAILED,
+ GD_MSG_BRICKPROC_NEW_FAILED, GD_MSG_STATVFS_FAILED, GD_MSG_GARBAGE_ARGS,
+ GD_MSG_LOCALTIME_LOGGING_VOL_OPT_VALIDATE_FAIL,
+ GD_MSG_LOCALTIME_LOGGING_ENABLE, GD_MSG_LOCALTIME_LOGGING_DISABLE,
+ GD_MSG_PORTS_EXHAUSTED, GD_MSG_CHANGELOG_GET_FAIL,
+ GD_MSG_MANAGER_FUNCTION_FAILED,
+ GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL, GD_MSG_SHD_START_FAIL,
+ GD_MSG_SHD_OBJ_GET_FAIL, GD_MSG_SVC_ATTACH_FAIL, GD_MSG_ATTACH_INFO,
+ GD_MSG_DETACH_INFO, GD_MSG_SVC_DETACH_FAIL,
+ GD_MSG_RPC_TRANSPORT_GET_PEERNAME_FAIL, GD_MSG_CLUSTER_RC_ENABLE,
+ GD_MSG_NFS_GANESHA_DISABLED, GD_MSG_GANESHA_NOT_RUNNING, GD_MSG_SNAP_WARN,
+ GD_MSG_BRICK_SUBVOL_VERIFY_FAIL, GD_MSG_REMOVE_ARBITER_BRICK,
+ GD_MSG_BRICK_NOT_DECOM, GD_MSG_BRICK_STOPPED, GD_MSG_BRICK_DEAD,
+ GD_MSG_BRICK_HOST_NOT_FOUND, GD_MSG_BRICK_HOST_DOWN, GD_MSG_BRICK_DELETE,
+ GD_MSG_BRICK_NO_REMOVE_CMD, GD_MSG_MIGRATION_PROG, GD_MSG_MIGRATION_FAIL,
+ GD_MSG_COPY_FAIL, GD_MSG_REALPATH_GET_FAIL,
+ GD_MSG_ARBITER_BRICK_SET_INFO_FAIL, GD_MSG_STRCHR_FAIL, GD_MSG_SPLIT_FAIL,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, GD_MSG_VOL_SHD_NOT_COMP,
+ GD_MSG_BITROT_NOT_ENABLED, GD_MSG_CREATE_BRICK_DIR_FAILED,
+ GD_MSG_CREATE_GLUSTER_DIR_FAILED, GD_MSG_BRICK_CREATE_MNTPNT,
+ GD_MSG_BRICK_CREATE_ROOT, GD_MSG_SET_XATTR_BRICK_FAIL,
+ GD_MSG_REMOVE_XATTR_FAIL, GD_MSG_XLATOR_NOT_DEFINED,
+ GD_MSG_BRICK_NOT_RUNNING, GD_MSG_INCORRECT_BRICK, GD_MSG_UUID_GET_FAIL,
+ GD_MSG_INVALID_ARGUMENT, GD_MSG_FRAME_CREATE_FAIL,
+ GD_MSG_SNAPSHOT_NOT_THIN_PROVISIONED, GD_MSG_VOL_STOP_ARGS_GET_FAILED,
+ GD_MSG_LSTAT_FAIL, GD_MSG_VOLUME_NOT_IMPORTED,
+ GD_MSG_ADD_BRICK_MNT_INFO_FAIL, GD_MSG_GET_MNT_ENTRY_INFO_FAIL,
+ GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL, GD_MSG_POST_COMMIT_OP_FAIL,
+ GD_MSG_POST_COMMIT_FROM_UUID_REJCT, GD_MSG_POST_COMMIT_REQ_SEND_FAIL);
+
+#define GD_MSG_INVALID_ENTRY_STR "Invalid data entry"
+#define GD_MSG_INVALID_ARGUMENT_STR \
+ "Invalid arguments have been given to function"
+#define GD_MSG_GARBAGE_ARGS_STR "Garbage args received"
+#define GD_MSG_BRICK_SUBVOL_VERIFY_FAIL_STR "Brick's subvol verification fail"
+#define GD_MSG_REMOVE_ARBITER_BRICK_STR "Failed to remove arbiter bricks"
+#define GD_MSG_DICT_GET_FAILED_STR "Dict get failed"
+#define GD_MSG_DICT_SET_FAILED_STR "Dict set failed"
+#define GD_MSG_BRICK_NOT_FOUND_STR "Brick not found in volume"
+#define GD_MSG_BRICK_NOT_DECOM_STR "Brick is not decommissoned"
+#define GD_MSG_BRICK_STOPPED_STR "Found stopped brick"
+#define GD_MSG_BRICK_DEAD_STR "Found dead brick"
+#define GD_MSG_BRICK_HOST_NOT_FOUND_STR \
+ "Host node of the brick is not a part of cluster"
+#define GD_MSG_BRICK_HOST_DOWN_STR "Host node of the brick is down"
+#define GD_MSG_BRICK_DELETE_STR \
+ "Deleting all the bricks of the volume is not allowed"
+#define GD_MSG_BRICK_NO_REMOVE_CMD_STR "No remove-brick command issued"
+#define GD_MSG_INCORRECT_BRICK_STR "Incorrect brick for volume"
+#define GD_MSG_MIGRATION_PROG_STR "Migration is in progress"
+#define GD_MSG_MIGRATION_FAIL_STR "Migration has failed"
+#define GD_MSG_XLATOR_NOT_DEFINED_STR "Xlator not defined"
+#define GD_MSG_DICT_CREATE_FAIL_STR "Failed to create dictionary"
+#define GD_MSG_COPY_FAIL_STR "Failed to copy"
+#define GD_MSG_UUID_GET_FAIL_STR "Failed to get the uuid of local glusterd"
+#define GD_MSG_GEO_REP_START_FAILED_STR "Georep start failed for volume"
+#define GD_MSG_REALPATH_GET_FAIL_STR "Failed to get realpath"
+#define GD_MSG_FILE_NOT_FOUND_STR "File not found in directory"
+#define GD_MSG_SRC_FILE_ERROR_STR "Error in source file"
+#define GD_MSG_DICT_UNSERIALIZE_FAIL_STR "Failed to unserialize dict"
+#define GD_MSG_VOL_ID_SET_FAIL_STR "Failed to set volume id"
+#define GD_MSG_ARBITER_BRICK_SET_INFO_FAIL_STR \
+ "Failed to add arbiter info to brick"
+#define GD_MSG_NO_MEMORY_STR "Out of memory"
+#define GD_MSG_GLUSTERD_UMOUNT_FAIL_STR "Failed to unmount path"
+#define GD_MSG_PEER_ADD_FAIL_STR "Failed to add new peer"
+#define GD_MSG_BRICK_GET_INFO_FAIL_STR "Failed to get brick info"
+#define GD_MSG_STRCHR_FAIL_STR "Failed to get the character"
+#define GD_MSG_SPLIT_FAIL_STR "Failed to split"
+#define GD_MSG_VOLINFO_GET_FAIL_STR "Failed to get volinfo"
+#define GD_MSG_PEER_NOT_FOUND_STR "Failed to find peer info"
+#define GD_MSG_DICT_COPY_FAIL_STR "Failed to copy values from dictionary"
+#define GD_MSG_ALLOC_AND_COPY_UUID_FAIL_STR \
+ "Failed to allocate memory or copy uuid"
+#define GD_MSG_VOL_NOT_FOUND_STR "Volume not found"
+#define GD_MSG_PEER_DISCONNECTED_STR "Peer is disconnected"
+#define GD_MSG_QUOTA_GET_STAT_FAIL_STR "Failed to get quota status"
+#define GD_MSG_SNAP_STATUS_FAIL_STR "Failed to get status of snapd"
+#define GD_MSG_VALIDATE_FAILED_STR "Failed to validate volume"
+#define GD_MSG_VOL_NOT_STARTED_STR "Volume is not started"
+#define GD_MSG_VOL_SHD_NOT_COMP_STR "Volume is not Self-heal compatible"
+#define GD_MSG_SELF_HEALD_DISABLED_STR "Self-heal daemon is disabled"
+#define GD_MSG_NFS_GANESHA_DISABLED_STR "NFS server is disabled"
+#define GD_MSG_QUOTA_DISABLED_STR "Quota is disabled"
+#define GD_MSG_BITROT_NOT_RUNNING_STR "Bitrot is not enabled"
+#define GD_MSG_BITROT_NOT_ENABLED_STR "Volume does not have bitrot enabled"
+#define GD_MSG_SNAPD_NOT_RUNNING_STR "Snapd is not enabled"
+#define GD_MSG_STRDUP_FAILED_STR "Strdup operation failed"
+#define GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL_STR \
+ "Failed to get quorum cluster counts"
+#define GD_MSG_GLUSTER_SERVICE_START_FAIL_STR "Failed to start glusterd service"
+#define GD_MSG_PEER_ADDRESS_GET_FAIL_STR "Failed to get the address of peer"
+#define GD_MSG_INVALID_SLAVE_STR "Volume is not a slave volume"
+#define GD_MSG_BRICK_NOT_RUNNING_STR "One or more bricks are not running"
+#define GD_MSG_BRK_MNTPATH_GET_FAIL_STR "Failed to get brick mount device"
+#define GD_MSG_SNAPSHOT_NOT_THIN_PROVISIONED_STR \
+ "Snapshot is supported only for thin provisioned LV."
+#define GD_MSG_SNAP_DEVICE_NAME_GET_FAIL_STR \
+ "Failed to copy snapshot device name"
+#define GD_MSG_SNAP_NOT_FOUND_STR "Snapshot does not exist"
+#define GD_MSG_CREATE_BRICK_DIR_FAILED_STR "Failed to create brick directory"
+#define GD_MSG_LSTAT_FAIL_STR "Lstat operation failed"
+#define GD_MSG_DIR_OP_FAILED_STR \
+ "The provided path is already present. It is not a directory"
+#define GD_MSG_BRICK_CREATION_FAIL_STR \
+ "Brick isn't allowed to be created inside glusterd's working directory."
+#define GD_MSG_BRICK_CREATE_ROOT_STR \
+ "The brick is being created in the root partition. It is recommended " \
+ "that you don't use the system's root partition for storage backend."
+#define GD_MSG_BRICK_CREATE_MNTPNT_STR \
+ "The brick is a mount point. Please create a sub-directory under the " \
+ "mount point and use that as the brick directory."
+#define GD_MSG_CREATE_GLUSTER_DIR_FAILED_STR \
+ "Failed to create glusterfs directory"
+#define GD_MSG_VOLINFO_IMPORT_FAIL_STR "Volume is not yet imported"
+#define GD_MSG_BRICK_SET_INFO_FAIL_STR \
+ "Failed to add brick mount details to dict"
+#define GD_MSG_SET_XATTR_BRICK_FAIL_STR \
+ "Glusterfs is not supported on brick. Setting extended attribute failed"
+#define GD_MSG_SET_XATTR_FAIL_STR "Failed to set extended attribute"
+#define GD_MSG_REMOVE_XATTR_FAIL_STR "Failed to remove extended attribute"
+#define GD_MSG_XLATOR_SET_OPT_FAIL_STR "Failed to set xlator type"
+#define GD_MSG_XLATOR_LINK_FAIL_STR \
+ "Failed to do the link of xlator with children"
+#define GD_MSG_READ_ERROR_STR "Failed to read directory"
+#define GD_MSG_INCOMPATIBLE_VALUE_STR "Incompatible transport type"
+#define GD_MSG_VOL_STOP_ARGS_GET_FAILED_STR "Failed to get volume stop args"
+#define GD_MSG_FRAME_CREATE_FAIL_STR "Failed to create frame"
+#define GD_MSG_VOLUME_NOT_IMPORTED_STR "Volume has not been imported"
+#define GD_MSG_ADD_BRICK_MNT_INFO_FAIL_STR \
+ "Failed to add brick mount details to dict"
+#define GD_MSG_GET_MNT_ENTRY_INFO_FAIL_STR "Failed to get mount entry details"
+#define GD_MSG_BRICKPATH_ROOT_GET_FAIL_STR "failed to get brick root details"
+#define GD_MSG_VOL_INFO_REQ_RECVD_STR "Received get volume info req"
+#define GD_MSG_NO_FLAG_SET_STR "No flags set"
+#define GD_MSG_CREATE_DIR_FAILED_STR "Failed to create directory"
+#define GD_MSG_POST_HOOK_STUB_INIT_FAIL_STR \
+ "Failed to initialize post hooks stub"
+#define GD_MSG_FILE_OP_FAILED_STR "File operation failed"
+#define GD_MSG_INODE_SIZE_GET_FAIL_STR "Failed to get inode size"
+#define GD_MSG_CMD_EXEC_FAIL_STR "Command execution failed"
+#define GD_MSG_XLATOR_CREATE_FAIL_STR "Failed to create xlator"
+#define GD_MSG_CLRCLK_VOL_REQ_RCVD_STR "Received clear-locks request for volume"
+#define GD_MSG_BRK_PORT_NUM_GET_FAIL_STR \
+ "Couldn't get port number of local bricks"
+#define GD_MSG_CLRLOCKS_MOUNTDIR_CREATE_FAIL_STR \
+ "Creating mount directory for clear-locks failed"
+#define GD_MSG_CLRLOCKS_CLNT_MOUNT_FAIL_STR \
+ "Failed to mount clear-locks maintenance client"
+#define GD_MSG_CLRLOCKS_CLNT_UMOUNT_FAIL_STR \
+ "Failed to unmount clear-locks mount point"
+#define GD_MSG_CLRCLK_SND_CMD_FAIL_STR "Failed to send command for clear-locks"
+#define GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL_STR \
+ "Failed to allocate memory or get serialized length of dict"
+#define GD_MSG_GET_XATTR_FAIL_STR "Failed to get extended attribute"
+
+#endif /* !_GLUSTERD_MESSAGES_H_ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c
new file mode 100644
index 00000000000..1069688a89d
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt-handler.c
@@ -0,0 +1,1144 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+/* rpc related syncops */
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "xdr-generic.h"
+#include "glusterd1-xdr.h"
+#include "glusterd-syncop.h"
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-locks.h"
+#include "glusterd-mgmt.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-messages.h"
+
+static int
+glusterd_mgmt_v3_null(rpcsvc_request_t *req)
+{
+ return 0;
+}
+
+static int
+glusterd_mgmt_v3_lock_send_resp(rpcsvc_request_t *req, int32_t status,
+ uint32_t op_errno)
+{
+ gd1_mgmt_v3_lock_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ rsp.op_ret = status;
+ if (rsp.op_ret)
+ rsp.op_errno = op_errno;
+
+ glusterd_get_uuid(&rsp.uuid);
+
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp);
+
+ gf_msg_debug(this->name, 0, "Responded to mgmt_v3 lock, ret: %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_synctasked_mgmt_v3_lock(rpcsvc_request_t *req,
+ gd1_mgmt_v3_lock_req *lock_req,
+ glusterd_op_lock_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ uint32_t op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(ctx);
+ GF_ASSERT(ctx->dict);
+
+ /* Trying to acquire multiple mgmt_v3 locks */
+ ret = glusterd_multiple_mgmt_v3_lock(ctx->dict, ctx->uuid, &op_errno);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Failed to acquire mgmt_v3 locks for %s", uuid_utoa(ctx->uuid));
+
+ ret = glusterd_mgmt_v3_lock_send_resp(req, ret, op_errno);
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_state_machine_mgmt_v3_lock(rpcsvc_request_t *req,
+ gd1_mgmt_v3_lock_req *lock_req,
+ glusterd_op_lock_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_op_info_t txn_op_info = {
+ {0},
+ };
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ glusterd_txn_opinfo_init(&txn_op_info, NULL, &lock_req->op, ctx->dict, req);
+
+ ret = glusterd_set_txn_opinfo(&lock_req->txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPINFO_SET_FAIL,
+ "Unable to set transaction's opinfo");
+ goto out;
+ }
+
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_LOCK, &lock_req->txn_id, ctx);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_EVENT_LOCK_FAIL,
+ "Failed to inject event GD_OP_EVENT_LOCK");
+
+out:
+ glusterd_friend_sm();
+ glusterd_op_sm();
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_mgmt_v3_lock_fn(rpcsvc_request_t *req)
+{
+ gd1_mgmt_v3_lock_req lock_req = {
+ {0},
+ };
+ int32_t ret = -1;
+ glusterd_op_lock_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_synctasked = _gf_false;
+ gf_boolean_t free_ctx = _gf_false;
+ glusterd_conf_t *conf = NULL;
+ uint32_t timeout = 0;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT(conf);
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &lock_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_req);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode lock "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0,
+ "Received mgmt_v3 lock req "
+ "from uuid: %s",
+ uuid_utoa(lock_req.uuid));
+
+ if (glusterd_peerinfo_find_by_uuid(lock_req.uuid) == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
+ "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa(lock_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_op_lock_ctx_t);
+ if (!ctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy(ctx->uuid, lock_req.uuid);
+ ctx->req = req;
+
+ ctx->dict = dict_new();
+ if (!ctx->dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(lock_req.dict.dict_val, lock_req.dict.dict_len,
+ &ctx->dict);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
+ goto out;
+ }
+
+ /* Cli will add timeout key to dict if the default timeout is
+ * other than 2 minutes. Here we use this value to check whether
+ * mgmt_v3_lock_timeout should be set to default value or we
+ * need to change the value according to timeout value
+ * i.e, timeout + 120 seconds. */
+ ret = dict_get_uint32(ctx->dict, "timeout", &timeout);
+ if (!ret)
+ conf->mgmt_v3_lock_timeout = timeout + 120;
+
+ is_synctasked = dict_get_str_boolean(ctx->dict, "is_synctasked", _gf_false);
+ if (is_synctasked) {
+ ret = glusterd_synctasked_mgmt_v3_lock(req, &lock_req, ctx);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Failed to acquire mgmt_v3_locks");
+ /* Ignore the return code, as it shouldn't be propagated
+ * from the handler function so as to avoid double
+ * deletion of the req
+ */
+ ret = 0;
+ }
+
+ /* The above function does not take ownership of ctx.
+ * Therefore we need to free the ctx explicitly. */
+ free_ctx = _gf_true;
+ } else {
+ /* Shouldn't ignore the return code here, and it should
+ * be propagated from the handler function as in failure
+ * case it doesn't delete the req object
+ */
+ ret = glusterd_op_state_machine_mgmt_v3_lock(req, &lock_req, ctx);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Failed to acquire mgmt_v3_locks");
+ }
+
+out:
+
+ if (ctx && (ret || free_ctx)) {
+ if (ctx->dict)
+ dict_unref(ctx->dict);
+
+ GF_FREE(ctx);
+ }
+
+ free(lock_req.dict.dict_val);
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_mgmt_v3_pre_validate_send_resp(rpcsvc_request_t *req, int32_t op,
+ int32_t status, char *op_errstr,
+ dict_t *rsp_dict, uint32_t op_errno)
+{
+ gd1_mgmt_v3_pre_val_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid(&rsp.uuid);
+ rsp.op = op;
+ rsp.op_errno = op_errno;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp);
+
+ GF_FREE(rsp.dict.dict_val);
+out:
+ gf_msg_debug(this->name, 0, "Responded to pre validation, ret: %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_pre_validate_fn(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_pre_val_req op_req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ uint32_t op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_req);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode pre validation "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
+ "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa(op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
+ goto out;
+ }
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
+ return -1;
+ }
+
+ ret = gd_mgmt_v3_pre_validate_fn(op_req.op, dict, &op_errstr, rsp_dict,
+ &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL,
+ "Pre Validation failed on operation %s", gd_op_list[op_req.op]);
+ }
+
+ ret = glusterd_mgmt_v3_pre_validate_send_resp(
+ req, op_req.op, ret, op_errstr, rsp_dict, op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_OP_RESP_FAIL,
+ "Failed to send Pre Validation "
+ "response for operation %s",
+ gd_op_list[op_req.op]);
+ goto out;
+ }
+
+out:
+ if (op_errstr && (strcmp(op_errstr, "")))
+ GF_FREE(op_errstr);
+
+ free(op_req.dict.dict_val);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ /* Return 0 from handler to avoid double deletion of req obj */
+ return 0;
+}
+
+static int
+glusterd_mgmt_v3_brick_op_send_resp(rpcsvc_request_t *req, int32_t op,
+ int32_t status, char *op_errstr,
+ dict_t *rsp_dict)
+{
+ gd1_mgmt_v3_brick_op_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid(&rsp.uuid);
+ rsp.op = op;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp);
+
+ GF_FREE(rsp.dict.dict_val);
+out:
+ gf_msg_debug(this->name, 0, "Responded to brick op, ret: %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_brick_op_fn(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_brick_op_req op_req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_req);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode brick op "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
+ "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa(op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
+ goto out;
+ }
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
+ return -1;
+ }
+
+ ret = gd_mgmt_v3_brick_op_fn(op_req.op, dict, &op_errstr, rsp_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_FAIL,
+ "Brick Op failed on operation %s", gd_op_list[op_req.op]);
+ }
+
+ ret = glusterd_mgmt_v3_brick_op_send_resp(req, op_req.op, ret, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALD_RESP_FAIL,
+ "Failed to send brick op "
+ "response for operation %s",
+ gd_op_list[op_req.op]);
+ goto out;
+ }
+
+out:
+ if (op_errstr && (strcmp(op_errstr, "")))
+ GF_FREE(op_errstr);
+
+ free(op_req.dict.dict_val);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ /* Return 0 from handler to avoid double deletion of req obj */
+ return 0;
+}
+
+static int
+glusterd_mgmt_v3_commit_send_resp(rpcsvc_request_t *req, int32_t op,
+ int32_t status, char *op_errstr,
+ uint32_t op_errno, dict_t *rsp_dict)
+{
+ gd1_mgmt_v3_commit_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid(&rsp.uuid);
+ rsp.op = op;
+ rsp.op_errno = op_errno;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp);
+
+ GF_FREE(rsp.dict.dict_val);
+out:
+ gf_msg_debug(this->name, 0, "Responded to commit, ret: %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_commit_fn(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_commit_req op_req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ uint32_t op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_commit_req);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode commit "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
+ "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa(op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
+ goto out;
+ }
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
+ return -1;
+ }
+
+ ret = gd_mgmt_v3_commit_fn(op_req.op, dict, &op_errstr, &op_errno,
+ rsp_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "commit failed on operation %s", gd_op_list[op_req.op]);
+ }
+
+ ret = glusterd_mgmt_v3_commit_send_resp(req, op_req.op, ret, op_errstr,
+ op_errno, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_OP_RESP_FAIL,
+ "Failed to send commit "
+ "response for operation %s",
+ gd_op_list[op_req.op]);
+ goto out;
+ }
+
+out:
+ if (op_errstr && (strcmp(op_errstr, "")))
+ GF_FREE(op_errstr);
+
+ free(op_req.dict.dict_val);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ /* Return 0 from handler to avoid double deletion of req obj */
+ return 0;
+}
+
+static int
+glusterd_mgmt_v3_post_commit_send_resp(rpcsvc_request_t *req, int32_t op,
+ int32_t status, char *op_errstr,
+ uint32_t op_errno, dict_t *rsp_dict)
+{
+ gd1_mgmt_v3_post_commit_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid(&rsp.uuid);
+ rsp.op = op;
+ rsp.op_errno = op_errno;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_rsp);
+
+ GF_FREE(rsp.dict.dict_val);
+out:
+ gf_msg_debug(this->name, 0, "Responded to post commit, ret: %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_post_commit_fn(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_post_commit_req op_req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ uint32_t op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_req);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode post commit "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
+ "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa(op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
+ goto out;
+ }
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
+ return -1;
+ }
+
+ ret = gd_mgmt_v3_post_commit_fn(op_req.op, dict, &op_errstr, &op_errno,
+ rsp_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "post commit failed on operation %s", gd_op_list[op_req.op]);
+ }
+
+ ret = glusterd_mgmt_v3_post_commit_send_resp(req, op_req.op, ret, op_errstr,
+ op_errno, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_OP_RESP_FAIL,
+ "Failed to send post commit "
+ "response for operation %s",
+ gd_op_list[op_req.op]);
+ goto out;
+ }
+
+out:
+ if (op_errstr && (strcmp(op_errstr, "")))
+ GF_FREE(op_errstr);
+
+ free(op_req.dict.dict_val);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ /* Return 0 from handler to avoid double deletion of req obj */
+ return 0;
+}
+
+static int
+glusterd_mgmt_v3_post_validate_send_resp(rpcsvc_request_t *req, int32_t op,
+ int32_t status, char *op_errstr,
+ dict_t *rsp_dict)
+{
+ gd1_mgmt_v3_post_val_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ rsp.op_ret = status;
+ glusterd_get_uuid(&rsp.uuid);
+ rsp.op = op;
+ if (op_errstr)
+ rsp.op_errstr = op_errstr;
+ else
+ rsp.op_errstr = "";
+
+ ret = dict_allocate_and_serialize(rsp_dict, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp);
+
+ GF_FREE(rsp.dict.dict_val);
+out:
+ gf_msg_debug(this->name, 0, "Responded to post validation, ret: %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_post_validate_fn(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_post_val_req op_req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &op_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_val_req);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode post validation "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (glusterd_peerinfo_find_by_uuid(op_req.uuid) == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
+ "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa(op_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ ret = dict_unserialize(op_req.dict.dict_val, op_req.dict.dict_len, &dict);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
+ goto out;
+ }
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
+ return -1;
+ }
+
+ ret = gd_mgmt_v3_post_validate_fn(op_req.op, op_req.op_ret, dict,
+ &op_errstr, rsp_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_VALIDATION_FAIL,
+ "Post Validation failed on operation %s", gd_op_list[op_req.op]);
+ }
+
+ ret = glusterd_mgmt_v3_post_validate_send_resp(req, op_req.op, ret,
+ op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_OP_RESP_FAIL,
+ "Failed to send Post Validation "
+ "response for operation %s",
+ gd_op_list[op_req.op]);
+ goto out;
+ }
+
+out:
+ if (op_errstr && (strcmp(op_errstr, "")))
+ GF_FREE(op_errstr);
+
+ free(op_req.dict.dict_val);
+
+ if (dict)
+ dict_unref(dict);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ /* Return 0 from handler to avoid double deletion of req obj */
+ return 0;
+}
+
+static int
+glusterd_mgmt_v3_unlock_send_resp(rpcsvc_request_t *req, int32_t status)
+{
+ gd1_mgmt_v3_unlock_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ rsp.op_ret = status;
+ if (rsp.op_ret)
+ rsp.op_errno = errno;
+
+ glusterd_get_uuid(&rsp.uuid);
+
+ ret = glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp);
+
+ gf_msg_debug(this->name, 0, "Responded to mgmt_v3 unlock, ret: %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_syctasked_mgmt_v3_unlock(rpcsvc_request_t *req,
+ gd1_mgmt_v3_unlock_req *unlock_req,
+ glusterd_op_lock_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(ctx);
+
+ /* Trying to release multiple mgmt_v3 locks */
+ ret = glusterd_multiple_mgmt_v3_unlock(ctx->dict, ctx->uuid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release mgmt_v3 locks for %s", uuid_utoa(ctx->uuid));
+ }
+
+ ret = glusterd_mgmt_v3_unlock_send_resp(req, ret);
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_state_machine_mgmt_v3_unlock(rpcsvc_request_t *req,
+ gd1_mgmt_v3_unlock_req *lock_req,
+ glusterd_op_lock_ctx_t *ctx)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_UNLOCK, &lock_req->txn_id,
+ ctx);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_EVENT_UNLOCK_FAIL,
+ "Failed to inject event GD_OP_EVENT_UNLOCK");
+
+ glusterd_friend_sm();
+ glusterd_op_sm();
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_handle_mgmt_v3_unlock_fn(rpcsvc_request_t *req)
+{
+ gd1_mgmt_v3_unlock_req lock_req = {
+ {0},
+ };
+ int32_t ret = -1;
+ glusterd_op_lock_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_synctasked = _gf_false;
+ gf_boolean_t free_ctx = _gf_false;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ ret = xdr_to_generic(req->msg[0], &lock_req,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_req);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode unlock "
+ "request received from peer");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0,
+ "Received volume unlock req "
+ "from uuid: %s",
+ uuid_utoa(lock_req.uuid));
+
+ if (glusterd_peerinfo_find_by_uuid(lock_req.uuid) == NULL) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_PEER_NOT_FOUND,
+ "%s doesn't "
+ "belong to the cluster. Ignoring request.",
+ uuid_utoa(lock_req.uuid));
+ ret = -1;
+ goto out;
+ }
+
+ ctx = GF_CALLOC(1, sizeof(*ctx), gf_gld_mt_op_lock_ctx_t);
+ if (!ctx) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy(ctx->uuid, lock_req.uuid);
+ ctx->req = req;
+
+ ctx->dict = dict_new();
+ if (!ctx->dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_unserialize(lock_req.dict.dict_val, lock_req.dict.dict_len,
+ &ctx->dict);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ NULL);
+ goto out;
+ }
+
+ is_synctasked = dict_get_str_boolean(ctx->dict, "is_synctasked", _gf_false);
+ if (is_synctasked) {
+ ret = glusterd_syctasked_mgmt_v3_unlock(req, &lock_req, ctx);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release mgmt_v3_locks");
+ /* Ignore the return code, as it shouldn't be propagated
+ * from the handler function so as to avoid double
+ * deletion of the req
+ */
+ ret = 0;
+ }
+
+ /* The above function does not take ownership of ctx.
+ * Therefore we need to free the ctx explicitly. */
+ free_ctx = _gf_true;
+ } else {
+ /* Shouldn't ignore the return code here, and it should
+ * be propagated from the handler function as in failure
+ * case it doesn't delete the req object
+ */
+ ret = glusterd_op_state_machine_mgmt_v3_unlock(req, &lock_req, ctx);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release mgmt_v3_locks");
+ }
+
+out:
+
+ if (ctx && (ret || free_ctx)) {
+ if (ctx->dict)
+ dict_unref(ctx->dict);
+
+ GF_FREE(ctx);
+ }
+
+ free(lock_req.dict.dict_val);
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_handle_mgmt_v3_lock(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, glusterd_handle_mgmt_v3_lock_fn);
+}
+
+static int
+glusterd_handle_pre_validate(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, glusterd_handle_pre_validate_fn);
+}
+
+static int
+glusterd_handle_brick_op(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, glusterd_handle_brick_op_fn);
+}
+
+static int
+glusterd_handle_commit(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, glusterd_handle_commit_fn);
+}
+
+static int
+glusterd_handle_post_commit(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, glusterd_handle_post_commit_fn);
+}
+
+static int
+glusterd_handle_post_validate(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, glusterd_handle_post_validate_fn);
+}
+
+int
+glusterd_handle_mgmt_v3_unlock(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, glusterd_handle_mgmt_v3_unlock_fn);
+}
+
+static rpcsvc_actor_t gd_svc_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = {
+ [GLUSTERD_MGMT_V3_NULL] = {"NULL", glusterd_mgmt_v3_null, NULL,
+ GLUSTERD_MGMT_V3_NULL, DRC_NA, 0},
+ [GLUSTERD_MGMT_V3_LOCK] = {"MGMT_V3_LOCK", glusterd_handle_mgmt_v3_lock,
+ NULL, GLUSTERD_MGMT_V3_LOCK, DRC_NA, 0},
+ [GLUSTERD_MGMT_V3_PRE_VALIDATE] = {"PRE_VAL", glusterd_handle_pre_validate,
+ NULL, GLUSTERD_MGMT_V3_PRE_VALIDATE,
+ DRC_NA, 0},
+ [GLUSTERD_MGMT_V3_BRICK_OP] = {"BRCK_OP", glusterd_handle_brick_op, NULL,
+ GLUSTERD_MGMT_V3_BRICK_OP, DRC_NA, 0},
+ [GLUSTERD_MGMT_V3_COMMIT] = {"COMMIT", glusterd_handle_commit, NULL,
+ GLUSTERD_MGMT_V3_COMMIT, DRC_NA, 0},
+ [GLUSTERD_MGMT_V3_POST_COMMIT] = {"POST_COMMIT",
+ glusterd_handle_post_commit, NULL,
+ GLUSTERD_MGMT_V3_POST_COMMIT, DRC_NA, 0},
+ [GLUSTERD_MGMT_V3_POST_VALIDATE] = {"POST_VAL",
+ glusterd_handle_post_validate, NULL,
+ GLUSTERD_MGMT_V3_POST_VALIDATE, DRC_NA,
+ 0},
+ [GLUSTERD_MGMT_V3_UNLOCK] = {"MGMT_V3_UNLOCK",
+ glusterd_handle_mgmt_v3_unlock, NULL,
+ GLUSTERD_MGMT_V3_UNLOCK, DRC_NA, 0},
+};
+
+struct rpcsvc_program gd_svc_mgmt_v3_prog = {
+ .progname = "GlusterD svc mgmt v3",
+ .prognum = GD_MGMT_PROGRAM,
+ .progver = GD_MGMT_V3_VERSION,
+ .numactors = GLUSTERD_MGMT_V3_MAXVALUE,
+ .actors = gd_svc_mgmt_v3_actors,
+ .synctask = _gf_true,
+};
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
new file mode 100644
index 00000000000..bca7221062b
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.c
@@ -0,0 +1,3114 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+/* rpc related syncops */
+#include "rpc-clnt.h"
+#include "protocol-common.h"
+#include "xdr-generic.h"
+#include "glusterd1-xdr.h"
+#include "glusterd-syncop.h"
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-locks.h"
+#include "glusterd-mgmt.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-server-quorum.h"
+#include "glusterd-volgen.h"
+#include "glusterd-store.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-messages.h"
+#include "glusterd-errno.h"
+#include "glusterd-hooks.h"
+
+extern struct rpc_clnt_program gd_mgmt_v3_prog;
+
+void
+gd_mgmt_v3_collate_errors(struct syncargs *args, int op_ret, int op_errno,
+ char *op_errstr, int op_code, uuid_t peerid,
+ u_char *uuid)
+{
+ char *peer_str = NULL;
+ char err_str[PATH_MAX] = "Please check log file for details.";
+ char op_err[PATH_MAX] = "";
+ xlator_t *this = NULL;
+ int is_operrstr_blk = 0;
+ char *err_string = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(args);
+ GF_ASSERT(uuid);
+
+ if (op_ret) {
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ RCU_READ_LOCK;
+ peerinfo = glusterd_peerinfo_find(peerid, NULL);
+ if (peerinfo)
+ peer_str = gf_strdup(peerinfo->hostname);
+ else
+ peer_str = gf_strdup(uuid_utoa(uuid));
+
+ RCU_READ_UNLOCK;
+
+ is_operrstr_blk = (op_errstr && strcmp(op_errstr, ""));
+ err_string = (is_operrstr_blk) ? op_errstr : err_str;
+
+ switch (op_code) {
+ case GLUSTERD_MGMT_V3_LOCK: {
+ snprintf(op_err, sizeof(op_err), "Locking failed on %s. %s",
+ peer_str, err_string);
+ break;
+ }
+ case GLUSTERD_MGMT_V3_PRE_VALIDATE: {
+ snprintf(op_err, sizeof(op_err),
+ "Pre Validation failed on %s. %s", peer_str,
+ err_string);
+ break;
+ }
+ case GLUSTERD_MGMT_V3_BRICK_OP: {
+ snprintf(op_err, sizeof(op_err), "Brick ops failed on %s. %s",
+ peer_str, err_string);
+ break;
+ }
+ case GLUSTERD_MGMT_V3_COMMIT: {
+ snprintf(op_err, sizeof(op_err), "Commit failed on %s. %s",
+ peer_str, err_string);
+ break;
+ }
+ case GLUSTERD_MGMT_V3_POST_COMMIT: {
+ snprintf(op_err, sizeof(op_err), "Post commit failed on %s. %s",
+ peer_str, err_string);
+ break;
+ }
+ case GLUSTERD_MGMT_V3_POST_VALIDATE: {
+ snprintf(op_err, sizeof(op_err),
+ "Post Validation failed on %s. %s", peer_str,
+ err_string);
+ break;
+ }
+ case GLUSTERD_MGMT_V3_UNLOCK: {
+ snprintf(op_err, sizeof(op_err), "Unlocking failed on %s. %s",
+ peer_str, err_string);
+ break;
+ }
+ default:
+ snprintf(op_err, sizeof(op_err), "Unknown error! on %s. %s",
+ peer_str, err_string);
+ }
+
+ if (args->errstr) {
+ len = snprintf(err_str, sizeof(err_str), "%s\n%s", args->errstr,
+ op_err);
+ if (len < 0) {
+ strcpy(err_str, "<error>");
+ }
+ GF_FREE(args->errstr);
+ args->errstr = NULL;
+ } else
+ snprintf(err_str, sizeof(err_str), "%s", op_err);
+
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_OP_FAIL, "%s",
+ op_err);
+ args->errstr = gf_strdup(err_str);
+ }
+
+ GF_FREE(peer_str);
+
+ return;
+}
+
+int32_t
+gd_mgmt_v3_pre_validate_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict, uint32_t *op_errno)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(rsp_dict);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ switch (op) {
+ case GD_OP_SNAP:
+ ret = glusterd_snapshot_prevalidate(dict, op_errstr, rsp_dict,
+ op_errno);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Snapshot Prevalidate Failed");
+ goto out;
+ }
+
+ break;
+
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_op_stage_replace_brick(dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Replace-brick prevalidation failed.");
+ goto out;
+ }
+ break;
+ case GD_OP_ADD_BRICK:
+ ret = glusterd_op_stage_add_brick(dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "ADD-brick prevalidation failed.");
+ goto out;
+ }
+ break;
+ case GD_OP_START_VOLUME:
+ ret = glusterd_op_stage_start_volume(dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Volume start prevalidation failed.");
+ goto out;
+ }
+ break;
+ case GD_OP_STOP_VOLUME:
+ ret = glusterd_op_stage_stop_volume(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Volume stop prevalidation failed.");
+ goto out;
+ }
+ break;
+ case GD_OP_REMOVE_BRICK:
+ ret = glusterd_op_stage_remove_brick(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Remove brick prevalidation failed.");
+ goto out;
+ }
+ break;
+
+ case GD_OP_RESET_BRICK:
+ ret = glusterd_reset_brick_prevalidate(dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "Reset brick prevalidation failed.");
+ goto out;
+ }
+ break;
+
+ case GD_OP_PROFILE_VOLUME:
+ ret = glusterd_op_stage_stats_volume(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_PRE_VALIDATION_FAIL,
+ "prevalidation failed for profile operation.");
+ goto out;
+ }
+ break;
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_mgmt_v3_op_stage_rebalance(dict, op_errstr);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Rebalance Prevalidate Failed");
+ goto out;
+ }
+ break;
+
+ case GD_OP_MAX_OPVERSION:
+ ret = 0;
+ break;
+
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug(this->name, 0, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_brick_op_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(rsp_dict);
+
+ switch (op) {
+ case GD_OP_SNAP: {
+ ret = glusterd_snapshot_brickop(dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_OP_FAIL,
+ "snapshot brickop failed");
+ goto out;
+ }
+ break;
+ }
+ case GD_OP_PROFILE_VOLUME:
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME: {
+ ret = gd_brick_op_phase(op, rsp_dict, dict, op_errstr);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "%s brickop "
+ "failed",
+ gd_op_list[op]);
+ goto out;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_msg_trace(this->name, 0, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+ GF_ASSERT(rsp_dict);
+
+ glusterd_op_commit_hook(op, dict, GD_COMMIT_HOOK_PRE);
+ switch (op) {
+ case GD_OP_SNAP: {
+ ret = glusterd_snapshot(dict, op_errstr, op_errno, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Snapshot Commit Failed");
+ goto out;
+ }
+ break;
+ }
+ case GD_OP_REPLACE_BRICK: {
+ ret = glusterd_op_replace_brick(dict, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Replace-brick commit failed.");
+ goto out;
+ }
+ break;
+ }
+ case GD_OP_ADD_BRICK: {
+ ret = glusterd_op_add_brick(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Add-brick commit failed.");
+ goto out;
+ }
+ break;
+ }
+ case GD_OP_START_VOLUME: {
+ ret = glusterd_op_start_volume(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Volume start commit failed.");
+ goto out;
+ }
+ break;
+ }
+ case GD_OP_STOP_VOLUME: {
+ ret = glusterd_op_stop_volume(dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Volume stop commit failed.");
+ goto out;
+ }
+ break;
+ }
+ case GD_OP_REMOVE_BRICK: {
+ ret = glusterd_op_remove_brick(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Remove-brick commit failed.");
+ goto out;
+ }
+ break;
+ }
+ case GD_OP_RESET_BRICK: {
+ ret = glusterd_op_reset_brick(dict, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Reset-brick commit failed.");
+ goto out;
+ }
+ break;
+ }
+ case GD_OP_MAX_OPVERSION: {
+ ret = glusterd_op_get_max_opversion(op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Commit failed.");
+ goto out;
+ }
+ break;
+ }
+ case GD_OP_PROFILE_VOLUME: {
+ ret = glusterd_op_stats_volume(dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "commit failed for volume profile operation.");
+ goto out;
+ }
+ break;
+ }
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME: {
+ ret = glusterd_mgmt_v3_op_rebalance(dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Rebalance Commit Failed");
+ goto out;
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug(this->name, 0, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_post_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+ GF_ASSERT(rsp_dict);
+
+ switch (op) {
+ case GD_OP_ADD_BRICK:
+ ret = glusterd_post_commit_add_brick(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Add-brick post commit failed.");
+ goto out;
+ }
+ break;
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_post_commit_replace_brick(dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Replace-brick post commit failed.");
+ goto out;
+ }
+ break;
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug(this->name, 0, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_post_validate_fn(glusterd_op_t op, int32_t op_ret, dict_t *dict,
+ char **op_errstr, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(rsp_dict);
+
+ if (op_ret == 0)
+ glusterd_op_commit_hook(op, dict, GD_COMMIT_HOOK_POST);
+
+ switch (op) {
+ case GD_OP_SNAP: {
+ ret = glusterd_snapshot_postvalidate(dict, op_ret, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_POST_VALIDATION_FAIL,
+ "postvalidate operation failed");
+ goto out;
+ }
+ break;
+ }
+ case GD_OP_ADD_BRICK: {
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get"
+ " volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Unable to "
+ "allocate memory");
+ goto out;
+ }
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret)
+ goto out;
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+ break;
+ }
+ case GD_OP_START_VOLUME: {
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get"
+ " volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Unable to "
+ "allocate memory");
+ goto out;
+ }
+
+ break;
+ }
+ case GD_OP_STOP_VOLUME: {
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get"
+ " volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Unable to "
+ "allocate memory");
+ goto out;
+ }
+ break;
+ }
+
+ default:
+ break;
+ }
+
+ ret = 0;
+
+out:
+ gf_msg_trace(this->name, 0, "OP = %d. Returning %d", op, ret);
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_lock_cbk_fn(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ gd1_mgmt_v3_lock_rsp rsp = {
+ {0},
+ };
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(myframe);
+
+ /* Even though the lock command has failed, while collating the errors
+ (gd_mgmt_v3_collate_errors), args->op_ret and args->op_errno will be
+ used. @args is obtained from frame->local. So before checking the
+ status of the request and going out if its a failure, args should be
+ set to frame->local. Otherwise, while collating args will be NULL.
+ This applies to other phases such as prevalidate, brickop, commit and
+ postvalidate also.
+ */
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL);
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp);
+ if (ret < 0)
+ goto out;
+
+ gf_uuid_copy(args->uuid, rsp.uuid);
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+out:
+ gd_mgmt_v3_collate_errors(args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_V3_LOCK, *peerid, rsp.uuid);
+ GF_FREE(peerid);
+
+ if (rsp.dict.dict_val)
+ free(rsp.dict.dict_val);
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY(frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_lock_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ gd_mgmt_v3_lock_cbk_fn);
+}
+
+int
+gd_mgmt_v3_lock(glusterd_op_t op, dict_t *op_ctx, glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid, uuid_t recv_uuid)
+{
+ gd1_mgmt_v3_lock_req req = {
+ {0},
+ };
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(args);
+
+ ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ gf_uuid_copy(req.uuid, my_uuid);
+ req.op = op;
+
+ GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
+ goto out;
+ }
+
+ ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid,
+ &gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_LOCK,
+ gd_mgmt_v3_lock_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_req);
+out:
+ GF_FREE(req.dict.dict_val);
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_initiate_lockdown(glusterd_op_t op, dict_t *dict,
+ char **op_errstr, uint32_t *op_errno,
+ gf_boolean_t *is_acquired,
+ uint32_t txn_generation)
+{
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ struct syncargs args = {0};
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ uint32_t timeout = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(is_acquired);
+
+ /* Cli will add timeout key to dict if the default timeout is
+ * other than 2 minutes. Here we use this value to check whether
+ * mgmt_v3_lock_timeout should be set to default value or we
+ * need to change the value according to timeout value
+ * i.e, timeout + 120 seconds. */
+ ret = dict_get_uint32(dict, "timeout", &timeout);
+ if (!ret)
+ conf->mgmt_v3_lock_timeout = timeout + 120;
+
+ /* Trying to acquire multiple mgmt_v3 locks on local node */
+ ret = glusterd_multiple_mgmt_v3_lock(dict, MY_UUID, op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Failed to acquire mgmt_v3 locks on localhost");
+ goto out;
+ }
+
+ *is_acquired = _gf_true;
+
+ /* Sending mgmt_v3 lock req to other nodes in the cluster */
+ gd_syncargs_init(&args, NULL);
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+
+ peer_cnt = 0;
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
+ {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_generation)
+ continue;
+
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_mgmt_v3_lock(op, dict, peerinfo, &args, MY_UUID, peer_uuid);
+ peer_cnt++;
+ }
+ RCU_READ_UNLOCK;
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.errstr)
+ *op_errstr = gf_strdup(args.errstr);
+
+ ret = args.op_ret;
+ *op_errno = args.op_errno;
+
+ gf_msg_debug(this->name, 0,
+ "Sent lock op req for %s "
+ "to %d peers. Returning %d",
+ gd_op_list[op], peer_cnt, ret);
+out:
+ if (ret) {
+ if (*op_errstr)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "%s", *op_errstr);
+
+ ret = gf_asprintf(op_errstr,
+ "Another transaction is in progress. "
+ "Please try again after some time.");
+
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+
+ return ret;
+}
+
+int
+glusterd_pre_validate_aggr_rsp_dict(glusterd_op_t op, dict_t *aggr, dict_t *rsp)
+{
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(aggr);
+ GF_ASSERT(rsp);
+
+ switch (op) {
+ case GD_OP_SNAP:
+ ret = glusterd_snap_pre_validate_use_rsp_dict(aggr, rsp);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL,
+ "Failed to aggregate prevalidate "
+ "response dictionaries.");
+ goto out;
+ }
+ break;
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_rb_use_rsp_dict(aggr, rsp);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL,
+ "Failed to aggregate prevalidate "
+ "response dictionaries.");
+ goto out;
+ }
+ break;
+ case GD_OP_START_VOLUME:
+ case GD_OP_ADD_BRICK:
+ ret = glusterd_aggr_brick_mount_dirs(aggr, rsp);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_MOUNDIRS_AGGR_FAIL,
+ "Failed to "
+ "aggregate brick mount dirs");
+ goto out;
+ }
+ break;
+ case GD_OP_RESET_BRICK:
+ ret = glusterd_rb_use_rsp_dict(aggr, rsp);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL,
+ "Failed to aggregate prevalidate "
+ "response dictionaries.");
+ goto out;
+ }
+ case GD_OP_STOP_VOLUME:
+ case GD_OP_REMOVE_BRICK:
+ case GD_OP_PROFILE_VOLUME:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ case GD_OP_REBALANCE:
+ break;
+ case GD_OP_MAX_OPVERSION:
+ break;
+ default:
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid op (%s)", gd_op_list[op]);
+
+ break;
+ }
+out:
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_pre_validate_cbk_fn(struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ gd1_mgmt_v3_pre_val_rsp rsp = {
+ {0},
+ };
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL);
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_rsp);
+ if (ret < 0)
+ goto out;
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
+ if (ret < 0) {
+ free(rsp.dict.dict_val);
+ goto out;
+ } else {
+ rsp_dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ gf_uuid_copy(args->uuid, rsp.uuid);
+ pthread_mutex_lock(&args->lock_dict);
+ {
+ ret = glusterd_pre_validate_aggr_rsp_dict(rsp.op, args->dict, rsp_dict);
+ }
+ pthread_mutex_unlock(&args->lock_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ if (!rsp.op_ret)
+ op_ret = ret;
+ else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+ } else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+
+out:
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ gd_mgmt_v3_collate_errors(args, op_ret, op_errno, rsp.op_errstr,
+ GLUSTERD_MGMT_V3_PRE_VALIDATE, *peerid, rsp.uuid);
+
+ if (rsp.op_errstr)
+ free(rsp.op_errstr);
+ GF_FREE(peerid);
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY(frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_pre_validate_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ gd_mgmt_v3_pre_validate_cbk_fn);
+}
+
+int
+gd_mgmt_v3_pre_validate_req(glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_pre_val_req req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(args);
+
+ ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ gf_uuid_copy(req.uuid, my_uuid);
+ req.op = op;
+
+ GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
+ goto out;
+ }
+
+ ret = gd_syncop_submit_request(
+ peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_PRE_VALIDATE, gd_mgmt_v3_pre_validate_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_v3_pre_val_req);
+out:
+ GF_FREE(req.dict.dict_val);
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict,
+ char **op_errstr, uint32_t *op_errno,
+ uint32_t txn_generation)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GF_ASSERT(req_dict);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create response dictionary");
+ goto out;
+ }
+
+ if (op == GD_OP_PROFILE_VOLUME || op == GD_OP_STOP_VOLUME ||
+ op == GD_OP_REBALANCE || op == GD_OP_REMOVE_BRICK) {
+ ret = glusterd_validate_quorum(this, op, req_dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SERVER_QUORUM_NOT_MET,
+ "Server quorum not met. Rejecting operation.");
+ goto out;
+ }
+ }
+
+ /* Pre Validation on local node */
+ ret = gd_mgmt_v3_pre_validate_fn(op, req_dict, op_errstr, rsp_dict,
+ op_errno);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL,
+ "Pre Validation failed for "
+ "operation %s on local node",
+ gd_op_list[op]);
+
+ if (*op_errstr == NULL) {
+ ret = gf_asprintf(op_errstr,
+ "Pre-validation failed "
+ "on localhost. Please "
+ "check log file for details");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+ goto out;
+ }
+
+ if (op != GD_OP_MAX_OPVERSION) {
+ ret = glusterd_pre_validate_aggr_rsp_dict(op, req_dict, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL,
+ "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ goto out;
+ }
+
+ dict_unref(rsp_dict);
+ rsp_dict = NULL;
+ }
+
+ /* Sending Pre Validation req to other nodes in the cluster */
+ gd_syncargs_init(&args, req_dict);
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+
+ peer_cnt = 0;
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
+ {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_generation)
+ continue;
+
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_mgmt_v3_pre_validate_req(op, req_dict, peerinfo, &args, MY_UUID,
+ peer_uuid);
+ peer_cnt++;
+ }
+ RCU_READ_UNLOCK;
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL,
+ "Pre Validation failed on peers");
+
+ if (args.errstr)
+ *op_errstr = gf_strdup(args.errstr);
+ }
+
+ ret = args.op_ret;
+ *op_errno = args.op_errno;
+
+ gf_msg_debug(this->name, 0,
+ "Sent pre valaidation req for %s "
+ "to %d peers. Returning %d",
+ gd_op_list[op], peer_cnt, ret);
+out:
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_build_payload(dict_t **req, char **op_errstr, dict_t *dict,
+ glusterd_op_t op)
+{
+ int32_t ret = -1;
+ dict_t *req_dict = NULL;
+ xlator_t *this = NULL;
+ char *volname = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(dict);
+
+ req_dict = dict_new();
+ if (!req_dict)
+ goto out;
+
+ switch (op) {
+ case GD_OP_MAX_OPVERSION:
+ case GD_OP_SNAP:
+ dict_copy(dict, req_dict);
+ break;
+ case GD_OP_START_VOLUME:
+ case GD_OP_STOP_VOLUME:
+ case GD_OP_ADD_BRICK:
+ case GD_OP_REMOVE_BRICK:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ case GD_OP_REPLACE_BRICK:
+ case GD_OP_RESET_BRICK:
+ case GD_OP_PROFILE_VOLUME: {
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "volname is not present in "
+ "operation ctx");
+ goto out;
+ }
+
+ if (strcasecmp(volname, "all")) {
+ ret = glusterd_dict_set_volid(dict, volname, op_errstr);
+ if (ret)
+ goto out;
+ }
+ dict_copy(dict, req_dict);
+ } break;
+
+ case GD_OP_REBALANCE: {
+ if (gd_set_commit_hash(dict) != 0) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_DICT_GET_FAILED,
+ "volname is not present in "
+ "operation ctx");
+ goto out;
+ }
+
+ if (strcasecmp(volname, "all")) {
+ ret = glusterd_dict_set_volid(dict, volname, op_errstr);
+ if (ret)
+ goto out;
+ }
+ dict_copy(dict, req_dict);
+ } break;
+
+ default:
+ break;
+ }
+
+ *req = req_dict;
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_brick_op_cbk_fn(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ gd1_mgmt_v3_brick_op_rsp rsp = {
+ {0},
+ };
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ /* If the operation failed, then iov can be NULL. So better check the
+ status of the operation and then worry about iov (if the status of
+ the command is success)
+ */
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL);
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_rsp);
+ if (ret < 0)
+ goto out;
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
+ if (ret < 0) {
+ goto out;
+ } else {
+ rsp_dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ gf_uuid_copy(args->uuid, rsp.uuid);
+ pthread_mutex_lock(&args->lock_dict);
+ {
+ if (rsp.op == GD_OP_DEFRAG_BRICK_VOLUME ||
+ rsp.op == GD_OP_PROFILE_VOLUME)
+ ret = glusterd_syncop_aggr_rsp_dict(rsp.op, args->dict, rsp_dict);
+ }
+ pthread_mutex_unlock(&args->lock_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ if (!rsp.op_ret)
+ op_ret = ret;
+ else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+ } else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+
+out:
+
+ gd_mgmt_v3_collate_errors(args, op_ret, op_errno, rsp.op_errstr,
+ GLUSTERD_MGMT_V3_BRICK_OP, *peerid, rsp.uuid);
+
+ if (rsp.op_errstr)
+ free(rsp.op_errstr);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ GF_FREE(peerid);
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY(frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ gd_mgmt_v3_brick_op_cbk_fn);
+}
+
+int
+gd_mgmt_v3_brick_op_req(glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo, struct syncargs *args,
+ uuid_t my_uuid, uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_brick_op_req req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ uuid_t *peerid = {
+ 0,
+ };
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(args);
+
+ ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ gf_uuid_copy(req.uuid, my_uuid);
+ req.op = op;
+
+ GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
+ goto out;
+ }
+
+ ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid,
+ &gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_BRICK_OP,
+ gd_mgmt_v3_brick_op_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_v3_brick_op_req);
+out:
+ GF_FREE(req.dict.dict_val);
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_brick_op(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr, uint32_t txn_generation)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GF_ASSERT(req_dict);
+ GF_ASSERT(op_errstr);
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create response dictionary");
+ goto out;
+ }
+
+ /* Perform brick op on local node */
+ ret = gd_mgmt_v3_brick_op_fn(op, req_dict, op_errstr, rsp_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_FAIL,
+ "Brick ops failed for "
+ "operation %s on local node",
+ gd_op_list[op]);
+
+ if (*op_errstr == NULL) {
+ ret = gf_asprintf(op_errstr,
+ "Brick ops failed "
+ "on localhost. Please "
+ "check log file for details");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+ goto out;
+ }
+ if (op == GD_OP_DEFRAG_BRICK_VOLUME || op == GD_OP_PROFILE_VOLUME) {
+ ret = glusterd_syncop_aggr_rsp_dict(op, op_ctx, rsp_dict);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ goto out;
+ }
+ }
+
+ dict_unref(rsp_dict);
+ rsp_dict = NULL;
+
+ /* Sending brick op req to other nodes in the cluster */
+ gd_syncargs_init(&args, op_ctx);
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+
+ peer_cnt = 0;
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
+ {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_generation)
+ continue;
+
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_mgmt_v3_brick_op_req(op, req_dict, peerinfo, &args, MY_UUID,
+ peer_uuid);
+ peer_cnt++;
+ }
+ RCU_READ_UNLOCK;
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_FAIL,
+ "Brick ops failed on peers");
+
+ if (args.errstr)
+ *op_errstr = gf_strdup(args.errstr);
+ }
+
+ ret = args.op_ret;
+
+ gf_msg_debug(this->name, 0,
+ "Sent brick op req for %s "
+ "to %d peers. Returning %d",
+ gd_op_list[op], peer_cnt, ret);
+out:
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_commit_cbk_fn(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ gd1_mgmt_v3_commit_rsp rsp = {
+ {0},
+ };
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL);
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_commit_rsp);
+ if (ret < 0)
+ goto out;
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
+ if (ret < 0) {
+ free(rsp.dict.dict_val);
+ goto out;
+ } else {
+ rsp_dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ gf_uuid_copy(args->uuid, rsp.uuid);
+ pthread_mutex_lock(&args->lock_dict);
+ {
+ ret = glusterd_syncop_aggr_rsp_dict(rsp.op, args->dict, rsp_dict);
+ }
+ pthread_mutex_unlock(&args->lock_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ if (!rsp.op_ret)
+ op_ret = ret;
+ else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+ } else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+
+out:
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ gd_mgmt_v3_collate_errors(args, op_ret, op_errno, rsp.op_errstr,
+ GLUSTERD_MGMT_V3_COMMIT, *peerid, rsp.uuid);
+ GF_FREE(peerid);
+
+ if (rsp.op_errstr)
+ free(rsp.op_errstr);
+
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY(frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_commit_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ gd_mgmt_v3_commit_cbk_fn);
+}
+
+int
+gd_mgmt_v3_commit_req(glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo, struct syncargs *args,
+ uuid_t my_uuid, uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_commit_req req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(args);
+
+ ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ gf_uuid_copy(req.uuid, my_uuid);
+ req.op = op;
+
+ GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
+ goto out;
+ }
+
+ ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid,
+ &gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_COMMIT,
+ gd_mgmt_v3_commit_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_v3_commit_req);
+out:
+ GF_FREE(req.dict.dict_val);
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr, uint32_t *op_errno,
+ uint32_t txn_generation)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(req_dict);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ switch (op) {
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+
+ ret = glusterd_set_rebalance_id_in_rsp_dict(req_dict, op_ctx);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to set rebalance id in dict.");
+ }
+ break;
+ case GD_OP_REMOVE_BRICK:
+ ret = glusterd_set_rebalance_id_for_remove_brick(req_dict, op_ctx);
+ if (ret) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "Failed to set rebalance id for remove-brick in dict.");
+ }
+ break;
+ default:
+ break;
+ }
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create response dictionary");
+ goto out;
+ }
+
+ /* Commit on local node */
+ ret = gd_mgmt_v3_commit_fn(op, req_dict, op_errstr, op_errno, rsp_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Commit failed for "
+ "operation %s on local node",
+ gd_op_list[op]);
+
+ if (*op_errstr == NULL) {
+ ret = gf_asprintf(op_errstr,
+ "Commit failed "
+ "on localhost. Please "
+ "check log file for details.");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+ goto out;
+ }
+
+ ret = glusterd_syncop_aggr_rsp_dict(op, op_ctx, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ goto out;
+ }
+
+ dict_unref(rsp_dict);
+ rsp_dict = NULL;
+
+ /* Sending commit req to other nodes in the cluster */
+ gd_syncargs_init(&args, op_ctx);
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+ peer_cnt = 0;
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
+ {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_generation)
+ continue;
+ if (!peerinfo->connected)
+ continue;
+
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_mgmt_v3_commit_req(op, req_dict, peerinfo, &args, MY_UUID,
+ peer_uuid);
+ peer_cnt++;
+ }
+ RCU_READ_UNLOCK;
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Commit failed on peers");
+
+ if (args.errstr)
+ *op_errstr = gf_strdup(args.errstr);
+ }
+
+ ret = args.op_ret;
+ *op_errno = args.op_errno;
+
+ gf_msg_debug(this->name, 0,
+ "Sent commit req for %s to %d "
+ "peers. Returning %d",
+ gd_op_list[op], peer_cnt, ret);
+out:
+ glusterd_op_modify_op_ctx(op, op_ctx);
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_post_commit_cbk_fn(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ gd1_mgmt_v3_post_commit_rsp rsp = {
+ {0},
+ };
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL);
+
+ ret = xdr_to_generic(*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_rsp);
+ if (ret < 0)
+ goto out;
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ rsp_dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &rsp_dict);
+ if (ret < 0) {
+ free(rsp.dict.dict_val);
+ goto out;
+ } else {
+ rsp_dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ gf_uuid_copy(args->uuid, rsp.uuid);
+ pthread_mutex_lock(&args->lock_dict);
+ {
+ ret = glusterd_syncop_aggr_rsp_dict(rsp.op, args->dict, rsp_dict);
+ }
+ pthread_mutex_unlock(&args->lock_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ if (!rsp.op_ret)
+ op_ret = ret;
+ else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+ } else {
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+ }
+
+out:
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+
+ gd_mgmt_v3_collate_errors(args, op_ret, op_errno, rsp.op_errstr,
+ GLUSTERD_MGMT_V3_POST_COMMIT, *peerid, rsp.uuid);
+ GF_FREE(peerid);
+
+ if (rsp.op_errstr)
+ free(rsp.op_errstr);
+
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY(frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_post_commit_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ gd_mgmt_v3_post_commit_cbk_fn);
+}
+
+int
+gd_mgmt_v3_post_commit_req(glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo, struct syncargs *args,
+ uuid_t my_uuid, uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_post_commit_req req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(args);
+
+ ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ gf_uuid_copy(req.uuid, my_uuid);
+ req.op = op;
+
+ GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
+ goto out;
+ }
+
+ ret = gd_syncop_submit_request(
+ peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_POST_COMMIT, gd_mgmt_v3_post_commit_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_commit_req);
+out:
+ GF_FREE(req.dict.dict_val);
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_post_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr, uint32_t *op_errno,
+ uint32_t txn_generation)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(req_dict);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create response dictionary");
+ goto out;
+ }
+
+ /* Post commit on local node */
+ ret = gd_mgmt_v3_post_commit_fn(op, req_dict, op_errstr, op_errno,
+ rsp_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Post commit failed for "
+ "operation %s on local node",
+ gd_op_list[op]);
+
+ if (*op_errstr == NULL) {
+ ret = gf_asprintf(op_errstr,
+ "Post commit failed "
+ "on localhost. Please "
+ "check log file for details.");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+ goto out;
+ }
+
+ ret = glusterd_syncop_aggr_rsp_dict(op, op_ctx, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESP_AGGR_FAIL, "%s",
+ "Failed to aggregate response from "
+ " node/brick");
+ goto out;
+ }
+
+ dict_unref(rsp_dict);
+ rsp_dict = NULL;
+
+ /* Sending post commit req to other nodes in the cluster */
+ gd_syncargs_init(&args, op_ctx);
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+ peer_cnt = 0;
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
+ {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_generation)
+ continue;
+ if (!peerinfo->connected)
+ continue;
+
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_mgmt_v3_post_commit_req(op, req_dict, peerinfo, &args, MY_UUID,
+ peer_uuid);
+ peer_cnt++;
+ }
+ RCU_READ_UNLOCK;
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Post commit failed on peers");
+
+ if (args.errstr)
+ *op_errstr = gf_strdup(args.errstr);
+ }
+
+ ret = args.op_ret;
+ *op_errno = args.op_errno;
+
+ gf_msg_debug(this->name, 0,
+ "Sent post commit req for %s to %d "
+ "peers. Returning %d",
+ gd_op_list[op], peer_cnt, ret);
+out:
+ glusterd_op_modify_op_ctx(op, op_ctx);
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_post_validate_cbk_fn(struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ gd1_mgmt_v3_post_val_rsp rsp = {
+ {0},
+ };
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL);
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_post_val_rsp);
+ if (ret < 0)
+ goto out;
+
+ gf_uuid_copy(args->uuid, rsp.uuid);
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+out:
+ gd_mgmt_v3_collate_errors(args, op_ret, op_errno, rsp.op_errstr,
+ GLUSTERD_MGMT_V3_POST_VALIDATE, *peerid,
+ rsp.uuid);
+ if (rsp.op_errstr)
+ free(rsp.op_errstr);
+
+ if (rsp.dict.dict_val)
+ free(rsp.dict.dict_val);
+ GF_FREE(peerid);
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY(frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_post_validate_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ gd_mgmt_v3_post_validate_cbk_fn);
+}
+
+int
+gd_mgmt_v3_post_validate_req(glusterd_op_t op, int32_t op_ret, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo,
+ struct syncargs *args, uuid_t my_uuid,
+ uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_post_val_req req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(args);
+
+ ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ gf_uuid_copy(req.uuid, my_uuid);
+ req.op = op;
+ req.op_ret = op_ret;
+
+ GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
+ goto out;
+ }
+
+ ret = gd_syncop_submit_request(
+ peerinfo->rpc, &req, args, peerid, &gd_mgmt_v3_prog,
+ GLUSTERD_MGMT_V3_POST_VALIDATE, gd_mgmt_v3_post_validate_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_v3_post_val_req);
+out:
+ GF_FREE(req.dict.dict_val);
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_post_validate(glusterd_op_t op, int32_t op_ret, dict_t *dict,
+ dict_t *req_dict, char **op_errstr,
+ uint32_t txn_generation)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ dict_t *rsp_dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GF_ASSERT(dict);
+ GF_VALIDATE_OR_GOTO(this->name, req_dict, out);
+ GF_ASSERT(op_errstr);
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create response dictionary");
+ goto out;
+ }
+
+ /* Post Validation on local node */
+ ret = gd_mgmt_v3_post_validate_fn(op, op_ret, req_dict, op_errstr,
+ rsp_dict);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_VALIDATION_FAIL,
+ "Post Validation failed for "
+ "operation %s on local node",
+ gd_op_list[op]);
+
+ if (*op_errstr == NULL) {
+ ret = gf_asprintf(op_errstr,
+ "Post-validation failed "
+ "on localhost. Please check "
+ "log file for details");
+ if (ret == -1)
+ *op_errstr = NULL;
+
+ ret = -1;
+ }
+ goto out;
+ }
+
+ dict_unref(rsp_dict);
+ rsp_dict = NULL;
+
+ /* Sending Post Validation req to other nodes in the cluster */
+ gd_syncargs_init(&args, req_dict);
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+
+ peer_cnt = 0;
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
+ {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_generation)
+ continue;
+
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_mgmt_v3_post_validate_req(op, op_ret, req_dict, peerinfo, &args,
+ MY_UUID, peer_uuid);
+ peer_cnt++;
+ }
+ RCU_READ_UNLOCK;
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_VALIDATION_FAIL,
+ "Post Validation failed on peers");
+
+ if (args.errstr)
+ *op_errstr = gf_strdup(args.errstr);
+ }
+
+ ret = args.op_ret;
+
+ gf_msg_debug(this->name, 0,
+ "Sent post valaidation req for %s "
+ "to %d peers. Returning %d",
+ gd_op_list[op], peer_cnt, ret);
+out:
+ return ret;
+}
+
+int32_t
+gd_mgmt_v3_unlock_cbk_fn(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int32_t ret = -1;
+ struct syncargs *args = NULL;
+ gd1_mgmt_v3_unlock_rsp rsp = {
+ {0},
+ };
+ call_frame_t *frame = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = -1;
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ args = frame->local;
+ peerid = frame->cookie;
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ op_errno = ENOTCONN;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, iov, out, op_errno, EINVAL);
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp);
+ if (ret < 0)
+ goto out;
+
+ gf_uuid_copy(args->uuid, rsp.uuid);
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+out:
+ gd_mgmt_v3_collate_errors(args, op_ret, op_errno, NULL,
+ GLUSTERD_MGMT_V3_UNLOCK, *peerid, rsp.uuid);
+ if (rsp.dict.dict_val)
+ free(rsp.dict.dict_val);
+ GF_FREE(peerid);
+ /* req->rpc_status set to -1 means, STACK_DESTROY will be called from
+ * the caller function.
+ */
+ if (req->rpc_status != -1)
+ STACK_DESTROY(frame->root);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+gd_mgmt_v3_unlock_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ gd_mgmt_v3_unlock_cbk_fn);
+}
+
+int
+gd_mgmt_v3_unlock(glusterd_op_t op, dict_t *op_ctx,
+ glusterd_peerinfo_t *peerinfo, struct syncargs *args,
+ uuid_t my_uuid, uuid_t recv_uuid)
+{
+ int32_t ret = -1;
+ gd1_mgmt_v3_unlock_req req = {
+ {0},
+ };
+ xlator_t *this = NULL;
+ uuid_t *peerid = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(op_ctx);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(args);
+
+ ret = dict_allocate_and_serialize(op_ctx, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ gf_uuid_copy(req.uuid, my_uuid);
+ req.op = op;
+
+ GD_ALLOC_COPY_UUID(peerid, peerinfo->uuid, ret);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_ALLOC_AND_COPY_UUID_FAIL, NULL);
+ goto out;
+ }
+
+ ret = gd_syncop_submit_request(peerinfo->rpc, &req, args, peerid,
+ &gd_mgmt_v3_prog, GLUSTERD_MGMT_V3_UNLOCK,
+ gd_mgmt_v3_unlock_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_req);
+out:
+ GF_FREE(req.dict.dict_val);
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_mgmt_v3_release_peer_locks(glusterd_op_t op, dict_t *dict,
+ int32_t op_ret, char **op_errstr,
+ gf_boolean_t is_acquired,
+ uint32_t txn_generation)
+{
+ int32_t ret = -1;
+ int32_t peer_cnt = 0;
+ uuid_t peer_uuid = {0};
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ struct syncargs args = {0};
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+
+ /* If the lock has not been held during this
+ * transaction, do not send unlock requests */
+ if (!is_acquired)
+ goto out;
+
+ /* Sending mgmt_v3 unlock req to other nodes in the cluster */
+ gd_syncargs_init(&args, NULL);
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+ peer_cnt = 0;
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
+ {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > txn_generation)
+ continue;
+
+ if (!peerinfo->connected)
+ continue;
+ if (op != GD_OP_SYNC_VOLUME &&
+ peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)
+ continue;
+
+ gd_mgmt_v3_unlock(op, dict, peerinfo, &args, MY_UUID, peer_uuid);
+ peer_cnt++;
+ }
+ RCU_READ_UNLOCK;
+
+ if (0 == peer_cnt) {
+ ret = 0;
+ goto out;
+ }
+
+ gd_synctask_barrier_wait((&args), peer_cnt);
+
+ if (args.op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Unlock failed on peers");
+
+ if (!op_ret && args.errstr)
+ *op_errstr = gf_strdup(args.errstr);
+ }
+
+ ret = args.op_ret;
+
+ gf_msg_debug(this->name, 0,
+ "Sent unlock op req for %s "
+ "to %d peers. Returning %d",
+ gd_op_list[op], peer_cnt, ret);
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase(rpcsvc_request_t *req,
+ glusterd_op_t op,
+ dict_t *dict)
+{
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ dict_t *req_dict = NULL;
+ dict_t *tmp_dict = NULL;
+ glusterd_conf_t *conf = NULL;
+ char *op_errstr = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_acquired = _gf_false;
+ uuid_t *originator_uuid = NULL;
+ uint32_t txn_generation = 0;
+ uint32_t op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(dict);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ /* Save the peer list generation */
+ txn_generation = conf->generation;
+ cmm_smp_rmb();
+ /* This read memory barrier makes sure that this assignment happens here
+ * only and is not reordered and optimized by either the compiler or the
+ * processor.
+ */
+
+ /* Save the MY_UUID as the originator_uuid. This originator_uuid
+ * will be used by is_origin_glusterd() to determine if a node
+ * is the originator node for a command. */
+ originator_uuid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!originator_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy(*originator_uuid, MY_UUID);
+ ret = dict_set_bin(dict, "originator_uuid", originator_uuid,
+ sizeof(uuid_t));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set originator_uuid.");
+ GF_FREE(originator_uuid);
+ goto out;
+ }
+
+ /* Marking the operation as complete synctasked */
+ ret = dict_set_int32(dict, "is_synctasked", _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set synctasked flag.");
+ goto out;
+ }
+
+ /* Use a copy at local unlock as cli response will be sent before
+ * the unlock and the volname in the dict might be removed */
+ tmp_dict = dict_new();
+ if (!tmp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Unable to create dict");
+ goto out;
+ }
+ dict_copy(dict, tmp_dict);
+
+ /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */
+ ret = glusterd_mgmt_v3_initiate_lockdown(op, dict, &op_errstr, &op_errno,
+ &is_acquired, txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCKDOWN_FAIL,
+ "mgmt_v3 lockdown failed.");
+ goto out;
+ }
+
+ /* BUILD PAYLOAD */
+ ret = glusterd_mgmt_v3_build_payload(&req_dict, &op_errstr, dict, op);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL,
+ LOGSTR_BUILD_PAYLOAD, gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ goto out;
+ }
+
+ /* PRE-COMMIT VALIDATE PHASE */
+ ret = glusterd_mgmt_v3_pre_validate(op, req_dict, &op_errstr, &op_errno,
+ txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL,
+ "Pre Validation Failed");
+ goto out;
+ }
+
+ /* BRICK-OPS */
+ ret = glusterd_mgmt_v3_brick_op(op, dict, req_dict, &op_errstr,
+ txn_generation);
+ if (ret) {
+ gf_log(this->name, GF_LOG_ERROR, "Brick Op Failed");
+ goto out;
+ }
+
+ /* COMMIT OP PHASE */
+ ret = glusterd_mgmt_v3_commit(op, dict, req_dict, &op_errstr, &op_errno,
+ txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Commit Op Failed");
+ goto out;
+ }
+
+ /* POST-COMMIT VALIDATE PHASE */
+ /* As of now, post_validate is not trying to cleanup any failed
+ commands. So as of now, I am sending 0 (op_ret as 0).
+ */
+ ret = glusterd_mgmt_v3_post_validate(op, 0, dict, req_dict, &op_errstr,
+ txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_VALIDATION_FAIL,
+ "Post Validation Failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ op_ret = ret;
+ /* UNLOCK PHASE FOR PEERS*/
+ (void)glusterd_mgmt_v3_release_peer_locks(op, dict, op_ret, &op_errstr,
+ is_acquired, txn_generation);
+
+ /* LOCAL VOLUME(S) UNLOCK */
+ if (is_acquired) {
+ /* Trying to release multiple mgmt_v3 locks */
+ ret = glusterd_multiple_mgmt_v3_unlock(tmp_dict, MY_UUID);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release mgmt_v3 locks on localhost");
+ op_ret = ret;
+ }
+ }
+
+ if (op_ret && (op_errno == 0))
+ op_errno = EG_INTRNL;
+
+ if (op != GD_OP_MAX_OPVERSION) {
+ /* SEND CLI RESPONSE */
+ glusterd_op_send_cli_response(op, op_ret, op_errno, req, dict,
+ op_errstr);
+ }
+
+ if (req_dict)
+ dict_unref(req_dict);
+
+ if (tmp_dict)
+ dict_unref(tmp_dict);
+
+ if (op_errstr) {
+ GF_FREE(op_errstr);
+ op_errstr = NULL;
+ }
+
+ return 0;
+}
+
+int32_t
+glusterd_mgmt_v3_initiate_all_phases(rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict)
+{
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ dict_t *req_dict = NULL;
+ dict_t *tmp_dict = NULL;
+ glusterd_conf_t *conf = NULL;
+ char *op_errstr = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_acquired = _gf_false;
+ uuid_t *originator_uuid = NULL;
+ uint32_t txn_generation = 0;
+ uint32_t op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(dict);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ /* Save the peer list generation */
+ txn_generation = conf->generation;
+ cmm_smp_rmb();
+ /* This read memory barrier makes sure that this assignment happens here
+ * only and is not reordered and optimized by either the compiler or the
+ * processor.
+ */
+
+ /* Save the MY_UUID as the originator_uuid. This originator_uuid
+ * will be used by is_origin_glusterd() to determine if a node
+ * is the originator node for a command. */
+ originator_uuid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!originator_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy(*originator_uuid, MY_UUID);
+ ret = dict_set_bin(dict, "originator_uuid", originator_uuid,
+ sizeof(uuid_t));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set originator_uuid.");
+ GF_FREE(originator_uuid);
+ goto out;
+ }
+
+ /* Marking the operation as complete synctasked */
+ ret = dict_set_int32(dict, "is_synctasked", _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set synctasked flag.");
+ goto out;
+ }
+
+ /* Use a copy at local unlock as cli response will be sent before
+ * the unlock and the volname in the dict might be removed */
+ tmp_dict = dict_new();
+ if (!tmp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Unable to create dict");
+ goto out;
+ }
+ dict_copy(dict, tmp_dict);
+
+ /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */
+ ret = glusterd_mgmt_v3_initiate_lockdown(op, dict, &op_errstr, &op_errno,
+ &is_acquired, txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCKDOWN_FAIL,
+ "mgmt_v3 lockdown failed.");
+ goto out;
+ }
+
+ /* BUILD PAYLOAD */
+ ret = glusterd_mgmt_v3_build_payload(&req_dict, &op_errstr, dict, op);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL,
+ LOGSTR_BUILD_PAYLOAD, gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ goto out;
+ }
+
+ /* PRE-COMMIT VALIDATE PHASE */
+ ret = glusterd_mgmt_v3_pre_validate(op, req_dict, &op_errstr, &op_errno,
+ txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL,
+ "Pre Validation Failed");
+ goto out;
+ }
+
+ /* COMMIT OP PHASE */
+ ret = glusterd_mgmt_v3_commit(op, dict, req_dict, &op_errstr, &op_errno,
+ txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Commit Op Failed");
+ goto out;
+ }
+
+ /* POST COMMIT OP PHASE */
+ ret = glusterd_mgmt_v3_post_commit(op, dict, req_dict, &op_errstr,
+ &op_errno, txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_COMMIT_OP_FAIL,
+ "Post commit Op Failed");
+ goto out;
+ }
+
+ /* POST-COMMIT VALIDATE PHASE */
+ /* As of now, post_validate is not trying to cleanup any failed
+ commands. So as of now, I am sending 0 (op_ret as 0).
+ */
+ ret = glusterd_mgmt_v3_post_validate(op, 0, dict, req_dict, &op_errstr,
+ txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_VALIDATION_FAIL,
+ "Post Validation Failed");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ op_ret = ret;
+ /* UNLOCK PHASE FOR PEERS*/
+ (void)glusterd_mgmt_v3_release_peer_locks(op, dict, op_ret, &op_errstr,
+ is_acquired, txn_generation);
+
+ /* LOCAL VOLUME(S) UNLOCK */
+ if (is_acquired) {
+ /* Trying to release multiple mgmt_v3 locks */
+ ret = glusterd_multiple_mgmt_v3_unlock(tmp_dict, MY_UUID);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release mgmt_v3 locks on localhost");
+ op_ret = ret;
+ }
+ }
+
+ if (op_ret && (op_errno == 0))
+ op_errno = EG_INTRNL;
+
+ if (op != GD_OP_MAX_OPVERSION) {
+ /* SEND CLI RESPONSE */
+ glusterd_op_send_cli_response(op, op_ret, op_errno, req, dict,
+ op_errstr);
+ }
+
+ if (req_dict)
+ dict_unref(req_dict);
+
+ if (tmp_dict)
+ dict_unref(tmp_dict);
+
+ if (op_errstr) {
+ GF_FREE(op_errstr);
+ op_errstr = NULL;
+ }
+
+ return 0;
+}
+
+int32_t
+glusterd_set_barrier_value(dict_t *dict, char *option)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *vol = NULL;
+ char *volname = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(dict);
+ GF_ASSERT(option);
+
+ /* TODO : Change this when we support multiple volume.
+ * As of now only snapshot of single volume is supported,
+ * Hence volname1 is directly fetched
+ */
+ ret = dict_get_strn(dict, "volname1", SLEN("volname1"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Volname not present in "
+ "dict");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Volume %s not found ", volname);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(dict, "barrier", option);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set barrier op "
+ "in request dictionary");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(vol->dict, "features.barrier", option);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set barrier op "
+ "in volume option dict");
+ goto out;
+ }
+
+ gd_update_volume_op_versions(vol);
+
+ ret = glusterd_create_volfiles(vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to create volfiles");
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo(vol, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_mgmt_v3_initiate_snap_phases(rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict)
+{
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ dict_t *req_dict = NULL;
+ dict_t *tmp_dict = NULL;
+ glusterd_conf_t *conf = NULL;
+ char *op_errstr = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t is_acquired = _gf_false;
+ uuid_t *originator_uuid = NULL;
+ gf_boolean_t success = _gf_false;
+ char *cli_errstr = NULL;
+ uint32_t txn_generation = 0;
+ uint32_t op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(dict);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ /* Save the peer list generation */
+ txn_generation = conf->generation;
+ cmm_smp_rmb();
+ /* This read memory barrier makes sure that this assignment happens here
+ * only and is not reordered and optimized by either the compiler or the
+ * processor.
+ */
+
+ /* Save the MY_UUID as the originator_uuid. This originator_uuid
+ * will be used by is_origin_glusterd() to determine if a node
+ * is the originator node for a command. */
+ originator_uuid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!originator_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy(*originator_uuid, MY_UUID);
+ ret = dict_set_bin(dict, "originator_uuid", originator_uuid,
+ sizeof(uuid_t));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set originator_uuid.");
+ GF_FREE(originator_uuid);
+ goto out;
+ }
+
+ /* Marking the operation as complete synctasked */
+ ret = dict_set_int32n(dict, "is_synctasked", SLEN("is_synctasked"),
+ _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set synctasked flag.");
+ goto out;
+ }
+
+ /* Use a copy at local unlock as cli response will be sent before
+ * the unlock and the volname in the dict might be removed */
+ tmp_dict = dict_new();
+ if (!tmp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Unable to create dict");
+ goto out;
+ }
+ dict_copy(dict, tmp_dict);
+
+ /* LOCKDOWN PHASE - Acquire mgmt_v3 locks */
+ ret = glusterd_mgmt_v3_initiate_lockdown(op, dict, &op_errstr, &op_errno,
+ &is_acquired, txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCKDOWN_FAIL,
+ "mgmt_v3 lockdown failed.");
+ goto out;
+ }
+
+ /* BUILD PAYLOAD */
+ ret = glusterd_mgmt_v3_build_payload(&req_dict, &op_errstr, dict, op);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL,
+ LOGSTR_BUILD_PAYLOAD, gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ goto out;
+ }
+
+ /* PRE-COMMIT VALIDATE PHASE */
+ ret = glusterd_mgmt_v3_pre_validate(op, req_dict, &op_errstr, &op_errno,
+ txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL,
+ "Pre Validation Failed");
+ goto out;
+ }
+
+ /* quorum check of the volume is done here */
+ ret = glusterd_snap_quorum_check(req_dict, _gf_false, &op_errstr,
+ &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_QUORUM_CHECK_FAIL,
+ "Volume quorum check failed");
+ goto out;
+ }
+
+ /* Set the operation type as pre, so that differentiation can be
+ * made whether the brickop is sent during pre-commit or post-commit
+ */
+ ret = dict_set_dynstr_with_alloc(req_dict, "operation-type", "pre");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "operation-type in dictionary");
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_brick_op(op, dict, req_dict, &op_errstr,
+ txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_FAIL,
+ "Brick Ops Failed");
+ goto unbarrier;
+ }
+
+ /* COMMIT OP PHASE */
+ /* TODO: As of now, the plan is to do quorum check before sending the
+ commit fop and if the quorum succeeds, then commit is sent to all
+ the other glusterds.
+ snap create functionality now creates the in memory and on disk
+ objects for the snapshot (marking them as incomplete), takes the lvm
+ snapshot and then updates the status of the in memory and on disk
+ snap objects as complete. Suppose one of the glusterds goes down
+ after taking the lvm snapshot, but before updating the snap object,
+ then treat it as a snapshot create failure and trigger cleanup.
+ i.e the number of commit responses received by the originator
+ glusterd shold be the same as the number of peers it has sent the
+ request to (i.e npeers variable). If not, then originator glusterd
+ will initiate cleanup in post-validate fop.
+ Question: What if one of the other glusterds goes down as explained
+ above and along with it the originator glusterd also goes down?
+ Who will initiate the cleanup?
+ */
+ ret = dict_set_int32n(req_dict, "cleanup", SLEN("cleanup"), 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set dict");
+ goto unbarrier;
+ }
+
+ ret = glusterd_mgmt_v3_commit(op, dict, req_dict, &op_errstr, &op_errno,
+ txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Commit Op Failed");
+ /* If the main op fails, we should save the error string.
+ Because, op_errstr will be used for unbarrier and
+ unlock ops also. We might lose the actual error that
+ caused the failure.
+ */
+ cli_errstr = op_errstr;
+ op_errstr = NULL;
+ goto unbarrier;
+ }
+
+ success = _gf_true;
+unbarrier:
+ /* Set the operation type as post, so that differentiation can be
+ * made whether the brickop is sent during pre-commit or post-commit
+ */
+ ret = dict_set_dynstr_with_alloc(req_dict, "operation-type", "post");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "operation-type in dictionary");
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_brick_op(op, dict, req_dict, &op_errstr,
+ txn_generation);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_FAIL,
+ "Brick Ops Failed");
+ goto out;
+ }
+
+ /*Do a quorum check if the commit phase is successful*/
+ if (success) {
+ // quorum check of the snapshot volume
+ ret = glusterd_snap_quorum_check(dict, _gf_true, &op_errstr, &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_QUORUM_CHECK_FAIL,
+ "Snapshot Volume quorum check failed");
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ op_ret = ret;
+
+ if (success == _gf_false)
+ op_ret = -1;
+
+ /* POST-COMMIT VALIDATE PHASE */
+ ret = glusterd_mgmt_v3_post_validate(op, op_ret, dict, req_dict, &op_errstr,
+ txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_POST_VALIDATION_FAIL,
+ "Post Validation Failed");
+ op_ret = -1;
+ }
+
+ /* UNLOCK PHASE FOR PEERS*/
+ (void)glusterd_mgmt_v3_release_peer_locks(op, dict, op_ret, &op_errstr,
+ is_acquired, txn_generation);
+
+ /* If the commit op (snapshot taking) failed, then the error is stored
+ in cli_errstr and unbarrier is called. Suppose, if unbarrier also
+ fails, then the error happened in unbarrier is logged and freed.
+ The error happened in commit op, which is stored in cli_errstr
+ is sent to cli.
+ */
+ if (cli_errstr) {
+ GF_FREE(op_errstr);
+ op_errstr = NULL;
+ op_errstr = cli_errstr;
+ }
+
+ /* LOCAL VOLUME(S) UNLOCK */
+ if (is_acquired) {
+ /* Trying to release multiple mgmt_v3 locks */
+ ret = glusterd_multiple_mgmt_v3_unlock(tmp_dict, MY_UUID);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release mgmt_v3 locks on localhost");
+ op_ret = ret;
+ }
+ }
+
+ if (op_ret && (op_errno == 0))
+ op_errno = EG_INTRNL;
+
+ /* SEND CLI RESPONSE */
+ glusterd_op_send_cli_response(op, op_ret, op_errno, req, dict, op_errstr);
+
+ if (req_dict)
+ dict_unref(req_dict);
+
+ if (tmp_dict)
+ dict_unref(tmp_dict);
+
+ if (op_errstr) {
+ GF_FREE(op_errstr);
+ op_errstr = NULL;
+ }
+
+ return 0;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-mgmt.h
new file mode 100644
index 00000000000..27dd1849519
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-mgmt.h
@@ -0,0 +1,97 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_MGMT_H_
+#define _GLUSTERD_MGMT_H_
+
+void
+gd_mgmt_v3_collate_errors(struct syncargs *args, int op_ret, int op_errno,
+ char *op_errstr, int op_code, uuid_t peerid,
+ u_char *uuid);
+
+int32_t
+gd_mgmt_v3_pre_validate_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict, uint32_t *op_errno);
+
+int32_t
+gd_mgmt_v3_brick_op_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict);
+
+int32_t
+gd_mgmt_v3_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict);
+
+int32_t
+gd_mgmt_v3_post_commit_fn(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict);
+
+int32_t
+gd_mgmt_v3_post_validate_fn(glusterd_op_t op, int32_t op_ret, dict_t *dict,
+ char **op_errstr, dict_t *rsp_dict);
+
+int32_t
+glusterd_mgmt_v3_initiate_all_phases(rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict);
+
+int32_t
+glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase(rpcsvc_request_t *req,
+ glusterd_op_t op,
+ dict_t *dict);
+
+int32_t
+glusterd_mgmt_v3_initiate_snap_phases(rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict);
+
+int
+glusterd_snap_pre_validate_use_rsp_dict(dict_t *dst, dict_t *src);
+
+int32_t
+glusterd_set_barrier_value(dict_t *dict, char *option);
+int
+
+glusterd_mgmt_v3_initiate_lockdown(glusterd_op_t op, dict_t *dict,
+ char **op_errstr, uint32_t *op_errno,
+ gf_boolean_t *is_acquired,
+ uint32_t txn_generation);
+
+int
+glusterd_mgmt_v3_build_payload(dict_t **req, char **op_errstr, dict_t *dict,
+ glusterd_op_t op);
+
+int
+glusterd_mgmt_v3_pre_validate(glusterd_op_t op, dict_t *req_dict,
+ char **op_errstr, uint32_t *op_errno,
+ uint32_t txn_generation);
+
+int
+glusterd_mgmt_v3_commit(glusterd_op_t op, dict_t *op_ctx, dict_t *req_dict,
+ char **op_errstr, uint32_t *op_errno,
+ uint32_t txn_generation);
+
+int
+glusterd_mgmt_v3_release_peer_locks(glusterd_op_t op, dict_t *dict,
+ int32_t op_ret, char **op_errstr,
+ gf_boolean_t is_acquired,
+ uint32_t txn_generation);
+
+int32_t
+glusterd_multiple_mgmt_v3_unlock(dict_t *dict, uuid_t uuid);
+
+int
+glusterd_reset_brick_prevalidate(dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict);
+int
+glusterd_op_reset_brick(dict_t *dict, dict_t *rsp_dict);
+
+int
+glusterd_post_commit_add_brick(dict_t *dict, char **op_errstr);
+
+int
+glusterd_post_commit_replace_brick(dict_t *dict, char **op_errstr);
+#endif /* _GLUSTERD_MGMT_H_ */
diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
index 50ce1484e62..645d845ee76 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
+++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.c
@@ -1,704 +1,721 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <inttypes.h>
#include <fnmatch.h>
#include <pwd.h>
-#include "globals.h"
-#include "glusterfs.h"
-#include "compat.h"
-#include "dict.h"
-#include "list.h"
-#include "logging.h"
-#include "defaults.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "run.h"
+#include <glusterfs/globals.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/list.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/run.h>
#include "glusterd-mem-types.h"
#include "glusterd.h"
#include "glusterd-utils.h"
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
#include "glusterd-mountbroker.h"
#include "glusterd-op-sm.h"
+#include "glusterd-messages.h"
static int
-seq_dict_foreach (dict_t *dict,
- int (*fn)(char *str, void *data),
- void *data)
+seq_dict_foreach(dict_t *dict, int (*fn)(char *str, void *data), void *data)
{
- char index[] = "4294967296"; // 1<<32
- int i = 0;
- char *val = NULL;
- int ret = 0;
-
- for (;;i++) {
- snprintf(index, sizeof(index), "%d", i);
- ret = dict_get_str (dict, index, &val);
- if (ret != 0)
- return ret == -ENOENT ? 0 : ret;
- ret = fn (val, data);
- if (ret != 0)
- return ret;
- }
+ char index[] = "4294967296"; // 1<<32
+ int i = 0;
+ char *val = NULL;
+ int ret = 0;
+
+ for (;; i++) {
+ snprintf(index, sizeof(index), "%d", i);
+ ret = dict_get_str(dict, index, &val);
+ if (ret != 0)
+ return ret == -ENOENT ? 0 : ret;
+ ret = fn(val, data);
+ if (ret != 0)
+ return ret;
+ }
}
int
-parse_mount_pattern_desc (gf_mount_spec_t *mspec, char *pdesc)
+parse_mount_pattern_desc(gf_mount_spec_t *mspec, char *pdesc)
#define SYNTAX_ERR -2
{
- char *curs = NULL;
- char *c2 = NULL;
- char sc = '\0';
- char **cc = NULL;
- gf_mount_pattern_t *pat = NULL;
- int pnum = 0;
- int ret = 0;
- int lastsup = -1;
- int incl = -1;
- char **pcc = NULL;
- int pnc = 0;
-
- skipwhite (&pdesc);
-
- /* a bow to theory */
- if (!*pdesc)
- return 0;
-
- /* count number of components, separated by '&' */
- mspec->len = 0;
- for (curs = pdesc; *curs; curs++) {
- if (*curs == ')')
- mspec->len++;
+ char *curs = NULL;
+ char *c2 = NULL;
+ char sc = '\0';
+ char **cc = NULL;
+ gf_mount_pattern_t *pat = NULL;
+ int pnum = 0;
+ int ret = 0;
+ int lastsup = -1;
+ int incl = -1;
+ char **pcc = NULL;
+ int pnc = 0;
+
+ skipwhite(&pdesc);
+
+ /* a bow to theory */
+ if (!*pdesc)
+ return 0;
+
+ /* count number of components, separated by '&' */
+ mspec->len = 0;
+ for (curs = pdesc; *curs; curs++) {
+ if (*curs == ')')
+ mspec->len++;
+ }
+
+ mspec->patterns = GF_CALLOC(mspec->len, sizeof(*mspec->patterns),
+ gf_gld_mt_mount_pattern);
+ if (!mspec->patterns) {
+ gf_smsg("glusterd", GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ pat = mspec->patterns;
+ curs = pdesc;
+ skipwhite(&curs);
+ for (;;) {
+ incl = -1;
+
+ /* check for pattern signedness modifier */
+ if (*curs == '-') {
+ pat->negative = _gf_true;
+ curs++;
}
- mspec->patterns = GF_CALLOC (mspec->len, sizeof (*mspec->patterns),
- gf_gld_mt_mount_pattern);
- if (!mspec->patterns) {
- ret = -1;
- goto out;
+ /* now should come condition specifier,
+ * then opening paren
+ */
+ c2 = nwstrtail(curs, "SUB(");
+ if (c2) {
+ pat->condition = SET_SUB;
+ goto got_cond;
+ }
+ c2 = nwstrtail(curs, "SUP(");
+ if (c2) {
+ pat->condition = SET_SUPER;
+ lastsup = pat - mspec->patterns;
+ goto got_cond;
+ }
+ c2 = nwstrtail(curs, "EQL(");
+ if (c2) {
+ pat->condition = SET_EQUAL;
+ goto got_cond;
+ }
+ c2 = nwstrtail(curs, "MEET(");
+ if (c2) {
+ pat->condition = SET_INTERSECT;
+ goto got_cond;
+ }
+ c2 = nwstrtail(curs, "SUB+(");
+ if (c2) {
+ pat->condition = SET_SUB;
+ incl = lastsup;
+ goto got_cond;
}
- pat = mspec->patterns;
- curs = pdesc;
- skipwhite (&curs);
- for (;;) {
- incl = -1;
-
- /* check for pattern signedness modifier */
- if (*curs == '-') {
- pat->negative = _gf_true;
- curs++;
- }
-
- /* now should come condition specifier,
- * then opening paren
- */
- c2 = nwstrtail (curs, "SUB(");
- if (c2) {
- pat->condition = SET_SUB;
- goto got_cond;
- }
- c2 = nwstrtail (curs, "SUP(");
- if (c2) {
- pat->condition = SET_SUPER;
- lastsup = pat - mspec->patterns;
- goto got_cond;
- }
- c2 = nwstrtail (curs, "EQL(");
- if (c2) {
- pat->condition = SET_EQUAL;
- goto got_cond;
- }
- c2 = nwstrtail (curs, "MEET(");
- if (c2) {
- pat->condition = SET_INTERSECT;
- goto got_cond;
- }
- c2 = nwstrtail (curs, "SUB+(");
- if (c2) {
- pat->condition = SET_SUB;
- incl = lastsup;
- goto got_cond;
- }
+ ret = SYNTAX_ERR;
+ goto out;
+ got_cond:
+ curs = c2;
+ skipwhite(&curs);
+ /* count the number of components for pattern */
+ pnum = *curs == ')' ? 0 : 1;
+ for (c2 = curs; *c2 != ')';) {
+ if (strchr("&|", *c2)) {
ret = SYNTAX_ERR;
goto out;
-
- got_cond:
- curs = c2;
- skipwhite (&curs);
- /* count the number of components for pattern */
- pnum = *curs == ')' ? 0 : 1;
- for (c2 = curs ;*c2 != ')';) {
- if (strchr ("&|", *c2)) {
- ret = SYNTAX_ERR;
- goto out;
- }
- while (!strchr ("|&)", *c2) && !isspace (*c2))
- c2++;
- skipwhite (&c2);
- switch (*c2) {
- case ')':
- break;
- case '\0':
- case '&':
- ret = SYNTAX_ERR;
- goto out;
- case '|':
- *c2 = ' ';
- skipwhite (&c2);
- /* fall through */
- default:
- pnum++;
- }
- }
- if (incl >= 0) {
- pnc = 0;
- for (pcc = mspec->patterns[incl].components; *pcc; pcc++)
- pnc++;
- pnum += pnc;
- }
- pat->components = GF_CALLOC (pnum + 1, sizeof (*pat->components),
- gf_gld_mt_mount_comp_container);
- if (!pat->components) {
- ret = -1;
- goto out;
- }
-
- cc = pat->components;
- /* copy over included component set */
- if (incl >= 0) {
- memcpy (pat->components,
- mspec->patterns[incl].components,
- pnc * sizeof (*pat->components));
- cc += pnc;
- }
- /* parse and add components */
- c2 = ""; /* reset c2 */
- while (*c2 != ')') {
- c2 = curs;
- while (!isspace (*c2) && *c2 != ')')
- c2++;
- sc = *c2;
- *c2 = '\0';;
- *cc = gf_strdup (curs);
- if (!*cc) {
- ret = -1;
- goto out;
- }
- *c2 = sc;
- skipwhite (&c2);
- curs = c2;
- cc++;
- }
-
- curs++;
- skipwhite (&curs);
- if (*curs == '&') {
- curs++;
- skipwhite (&curs);
- }
-
- if (!*curs)
- break;
- pat++;
+ }
+ while (!strchr("|&)", *c2) && !isspace(*c2))
+ c2++;
+ skipwhite(&c2);
+ switch (*c2) {
+ case ')':
+ break;
+ case '\0':
+ case '&':
+ ret = SYNTAX_ERR;
+ goto out;
+ case '|':
+ *c2 = ' ';
+ skipwhite(&c2);
+ /* fall through */
+ default:
+ pnum++;
+ }
}
-
- out:
- if (ret == SYNTAX_ERR) {
- gf_log ("", GF_LOG_ERROR, "cannot parse mount patterns %s",
- pdesc);
+ if (incl >= 0) {
+ pnc = 0;
+ for (pcc = mspec->patterns[incl].components; *pcc; pcc++)
+ pnc++;
+ pnum += pnc;
}
-
- /* We've allocted a lotta stuff here but don't bother with freeing
- * on error, in that case we'll terminate anyway
- */
- return ret ? -1 : 0;
-}
-#undef SYNTAX_ERR
-
-
-const char *georep_mnt_desc_template =
- "SUP("
- "xlator-option=\\*-dht.assert-no-child-down=true "
- "volfile-server=localhost "
- "client-pid=%d "
- "user-map-root=%s "
- ")"
- "SUB+("
- "log-file="DEFAULT_LOG_FILE_DIRECTORY"/"GEOREP"*/* "
- "log-level=* "
- "volfile-id=* "
- ")"
- "MEET("
- "%s"
- ")";
-
-const char *hadoop_mnt_desc_template =
- "SUP("
- "volfile-server=%s "
- "client-pid=%d "
- "volfile-id=%s "
- "user-map-root=%s "
- ")"
- "SUB+("
- "log-file="DEFAULT_LOG_FILE_DIRECTORY"/"GHADOOP"*/* "
- "log-level=* "
- ")";
-
-int
-make_georep_mountspec (gf_mount_spec_t *mspec, const char *volnames,
- char *user)
-{
- char *georep_mnt_desc = NULL;
- char *meetspec = NULL;
- char *vols = NULL;
- char *vol = NULL;
- char *p = NULL;
- char *savetok = NULL;
- char *fa[3] = {0,};
- size_t siz = 0;
- int vc = 0;
- int i = 0;
- int ret = 0;
-
- vols = gf_strdup ((char *)volnames);
- if (!vols)
- goto out;
-
- for (vc = 1, p = vols; *p; p++) {
- if (*p == ',')
- vc++;
+ pat->components = GF_CALLOC(pnum + 1, sizeof(*pat->components),
+ gf_gld_mt_mount_comp_container);
+ if (!pat->components) {
+ ret = -1;
+ goto out;
}
- siz = strlen (volnames) + vc * strlen("volfile-id=");
- meetspec = GF_CALLOC (1, siz + 1, gf_gld_mt_georep_meet_spec);
- if (!meetspec)
- goto out;
- for (p = vols;;) {
- vol = strtok_r (p, ",", &savetok);
- if (!vol) {
- GF_ASSERT (vc == 0);
- break;
- }
- p = NULL;
- strcat (meetspec, "volfile-id=");
- strcat (meetspec, vol);
- if (--vc > 0)
- strcat (meetspec, " ");
+ cc = pat->components;
+ /* copy over included component set */
+ if (incl >= 0) {
+ memcpy(pat->components, mspec->patterns[incl].components,
+ pnc * sizeof(*pat->components));
+ cc += pnc;
}
-
- ret = gf_asprintf (&georep_mnt_desc, georep_mnt_desc_template,
- GF_CLIENT_PID_GSYNCD, user, meetspec);
- if (ret == -1) {
- georep_mnt_desc = NULL;
+ /* parse and add components */
+ c2 = ""; /* reset c2 */
+ while (*c2 != ')') {
+ c2 = curs;
+ while (!isspace(*c2) && *c2 != ')')
+ c2++;
+ sc = *c2;
+ *c2 = '\0';
+ ;
+ *cc = gf_strdup(curs);
+ if (!*cc) {
+ ret = -1;
goto out;
+ }
+ *c2 = sc;
+ skipwhite(&c2);
+ curs = c2;
+ cc++;
}
- ret = parse_mount_pattern_desc (mspec, georep_mnt_desc);
-
- out:
- fa[0] = meetspec;
- fa[1] = vols;
- fa[2] = georep_mnt_desc;
-
- for (i = 0; i < 3; i++) {
- if (fa[i])
- GF_FREE (fa[i]);
- else
- ret = -1;
+ curs++;
+ skipwhite(&curs);
+ if (*curs == '&') {
+ curs++;
+ skipwhite(&curs);
}
- return ret;
+ if (!*curs)
+ break;
+ pat++;
+ }
+
+out:
+ if (ret == SYNTAX_ERR) {
+ gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "cannot parse mount patterns %s", pdesc);
+ }
+
+ /* We've allocted a lotta stuff here but don't bother with freeing
+ * on error, in that case we'll terminate anyway
+ */
+ return ret ? -1 : 0;
}
+#undef SYNTAX_ERR
+
+const char *georep_mnt_desc_template =
+ "SUP("
+ "aux-gfid-mount "
+ "acl "
+ "volfile-server=localhost "
+ "client-pid=%d "
+ "user-map-root=%s "
+ ")"
+ "SUB+("
+ "log-file=%s/" GEOREP
+ "*/* "
+ "log-level=* "
+ "volfile-id=* "
+ ")"
+ "MEET("
+ "%s"
+ ")";
int
-make_ghadoop_mountspec (gf_mount_spec_t *mspec, const char *volname,
- char *user, char *server)
+make_georep_mountspec(gf_mount_spec_t *mspec, const char *volnames, char *user,
+ char *logdir)
{
- char *hadoop_mnt_desc = NULL;
- int ret = 0;
-
- ret = gf_asprintf (&hadoop_mnt_desc, hadoop_mnt_desc_template,
- server, GF_CLIENT_PID_HADOOP, volname, user);
- if (ret == -1)
- return ret;
-
- return parse_mount_pattern_desc (mspec, hadoop_mnt_desc);
+ char *georep_mnt_desc = NULL;
+ char *meetspec = NULL;
+ char *vols = NULL;
+ char *vol = NULL;
+ char *p = NULL;
+ char *savetok = NULL;
+ char *fa[3] = {
+ 0,
+ };
+ size_t siz = 0;
+ int vc = 0;
+ int i = 0;
+ int ret = 0;
+
+ vols = gf_strdup((char *)volnames);
+ if (!vols) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "Volume name=%s", volnames, NULL);
+ goto out;
+ }
+
+ for (vc = 1, p = vols; *p; p++) {
+ if (*p == ',')
+ vc++;
+ }
+ siz = strlen(volnames) + vc * SLEN("volfile-id=");
+ meetspec = GF_CALLOC(1, siz + 1, gf_gld_mt_georep_meet_spec);
+ if (!meetspec) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ for (p = vols;;) {
+ vol = strtok_r(p, ",", &savetok);
+ if (!vol) {
+ GF_ASSERT(vc == 0);
+ break;
+ }
+ p = NULL;
+ strcat(meetspec, "volfile-id=");
+ strcat(meetspec, vol);
+ if (--vc > 0)
+ strcat(meetspec, " ");
+ }
+
+ ret = gf_asprintf(&georep_mnt_desc, georep_mnt_desc_template,
+ GF_CLIENT_PID_GSYNCD, user, logdir, meetspec);
+ if (ret == -1) {
+ georep_mnt_desc = NULL;
+ goto out;
+ }
+
+ ret = parse_mount_pattern_desc(mspec, georep_mnt_desc);
+
+out:
+ fa[0] = meetspec;
+ fa[1] = vols;
+ fa[2] = georep_mnt_desc;
+
+ for (i = 0; i < 3; i++) {
+ if (fa[i] == NULL)
+ ret = -1;
+ else
+ GF_FREE(fa[i]);
+ }
+
+ return ret;
}
static gf_boolean_t
-match_comp (char *str, char *patcomp)
+match_comp(char *str, char *patcomp)
{
- char *c1 = patcomp;
- char *c2 = str;
-
- GF_ASSERT (c1);
- GF_ASSERT (c2);
-
- while (*c1 == *c2) {
- if (!*c1)
- return _gf_true;
- c1++;
- c2++;
- if (c1[-1] == '=')
- break;
- }
-
- return fnmatch (c1, c2, 0) == 0 ? _gf_true : _gf_false;
+ char *c1 = patcomp;
+ char *c2 = str;
+
+ GF_ASSERT(c1);
+ GF_ASSERT(c2);
+
+ while (*c1 == *c2) {
+ if (!*c1)
+ return _gf_true;
+ c1++;
+ c2++;
+ if (c1[-1] == '=')
+ break;
+ }
+
+ return fnmatch(c1, c2, 0) == 0 ? _gf_true : _gf_false;
}
struct gf_set_descriptor {
- gf_boolean_t priv[2];
- gf_boolean_t common;
+ gf_boolean_t priv[2];
+ gf_boolean_t common;
};
static int
-_gf_set_dict_iter1 (char *val, void *data)
+_gf_set_dict_iter1(char *val, void *data)
{
- void **dataa = data;
- struct gf_set_descriptor *sd = dataa[0];
- char **curs = dataa[1];
- gf_boolean_t priv = _gf_true;
-
- while (*curs) {
- if (match_comp (val, *curs)) {
- priv = _gf_false;
- sd->common = _gf_true;
- }
- curs++;
+ void **dataa = data;
+ struct gf_set_descriptor *sd = dataa[0];
+ char **curs = dataa[1];
+ gf_boolean_t priv = _gf_true;
+
+ while (*curs) {
+ if (match_comp(val, *curs)) {
+ priv = _gf_false;
+ sd->common = _gf_true;
}
+ curs++;
+ }
- if (priv)
- sd->priv[0] = _gf_true;
+ if (priv)
+ sd->priv[0] = _gf_true;
- return 0;
+ return 0;
}
static int
-_gf_set_dict_iter2 (char *val, void *data)
+_gf_set_dict_iter2(char *val, void *data)
{
- void **dataa = data;
- gf_boolean_t *boo = dataa[0];
- char *comp = dataa[1];
+ void **dataa = data;
+ gf_boolean_t *boo = dataa[0];
+ char *comp = dataa[1];
- if (match_comp (val, comp))
- *boo = _gf_true;
+ if (match_comp(val, comp))
+ *boo = _gf_true;
- return 0;
+ return 0;
}
static void
-relate_sets (struct gf_set_descriptor *sd, dict_t *argdict, char **complist)
+relate_sets(struct gf_set_descriptor *sd, dict_t *argdict, char **complist)
{
- void *dataa[] = {NULL, NULL};
- gf_boolean_t boo = _gf_false;
+ void *dataa[] = {NULL, NULL};
+ gf_boolean_t boo = _gf_false;
- memset (sd, 0, sizeof (*sd));
+ memset(sd, 0, sizeof(*sd));
- dataa[0] = sd;
- dataa[1] = complist;
- seq_dict_foreach (argdict, _gf_set_dict_iter1, dataa);
+ dataa[0] = sd;
+ dataa[1] = complist;
+ seq_dict_foreach(argdict, _gf_set_dict_iter1, dataa);
- while (*complist) {
- boo = _gf_false;
- dataa[0] = &boo;
- dataa[1] = *complist;
- seq_dict_foreach (argdict, _gf_set_dict_iter2, dataa);
+ while (*complist) {
+ boo = _gf_false;
+ dataa[0] = &boo;
+ dataa[1] = *complist;
+ seq_dict_foreach(argdict, _gf_set_dict_iter2, dataa);
- if (boo)
- sd->common = _gf_true;
- else
- sd->priv[1] = _gf_true;
+ if (boo)
+ sd->common = _gf_true;
+ else
+ sd->priv[1] = _gf_true;
- complist++;
- }
+ complist++;
+ }
}
static int
-_arg_parse_uid (char *val, void *data)
+_arg_parse_uid(char *val, void *data)
{
- char *user = strtail (val, "user-map-root=");
- struct passwd *pw = NULL;
+ char *user = strtail(val, "user-map-root=");
+ struct passwd *pw = NULL;
- if (!user)
- return 0;
- pw = getpwnam (user);
- if (!pw)
- return -EINVAL;
+ if (!user)
+ return 0;
+ pw = getpwnam(user);
+ if (!pw)
+ return -EINVAL;
- if (*(int *)data >= 0)
- /* uid ambiguity, already found */
- return -EINVAL;
+ if (*(int *)data >= 0)
+ /* uid ambiguity, already found */
+ return -EINVAL;
- *(int *)data = pw->pw_uid;
- return 0;
+ *(int *)data = pw->pw_uid;
+ return 0;
}
static int
-evaluate_mount_request (gf_mount_spec_t *mspec, dict_t *argdict)
+evaluate_mount_request(xlator_t *this, gf_mount_spec_t *mspec, dict_t *argdict)
{
- struct gf_set_descriptor sd = {{0,},};
- int i = 0;
- int uid = -1;
- int ret = 0;
- gf_boolean_t match = _gf_false;
-
- for (i = 0; i < mspec->len; i++) {
- relate_sets (&sd, argdict, mspec->patterns[i].components);
- switch (mspec->patterns[i].condition) {
- case SET_SUB:
- match = !sd.priv[0];
- break;
- case SET_SUPER:
- match = !sd.priv[1];
- break;
- case SET_EQUAL:
- match = (!sd.priv[0] && !sd.priv[1]);
- break;
- case SET_INTERSECT:
- match = sd.common;
- break;
- default:
- GF_ASSERT(!"unreached");
- }
- if (mspec->patterns[i].negative)
- match = !match;
-
- if (!match)
- return -EPERM;
+ struct gf_set_descriptor sd = {
+ {
+ 0,
+ },
+ };
+ int i = 0;
+ int uid = -1;
+ int ret = 0;
+ gf_boolean_t match = _gf_false;
+
+ for (i = 0; i < mspec->len; i++) {
+ relate_sets(&sd, argdict, mspec->patterns[i].components);
+ switch (mspec->patterns[i].condition) {
+ case SET_SUB:
+ match = !sd.priv[0];
+ break;
+ case SET_SUPER:
+ match = !sd.priv[1];
+ break;
+ case SET_EQUAL:
+ match = (!sd.priv[0] && !sd.priv[1]);
+ break;
+ case SET_INTERSECT:
+ match = sd.common;
+ break;
+ default:
+ GF_ASSERT(!"unreached");
+ }
+ if (mspec->patterns[i].negative)
+ match = !match;
+
+ if (!match) {
+ gf_msg(this->name, GF_LOG_ERROR, EPERM,
+ GD_MSG_MNTBROKER_SPEC_MISMATCH,
+ "Mountbroker spec mismatch!!! SET: %d "
+ "COMPONENT: %d. Review the mount args passed",
+ mspec->patterns[i].condition, i);
+ return -EPERM;
}
+ }
- ret = seq_dict_foreach (argdict, _arg_parse_uid, &uid);
- if (ret != 0)
- return ret;
+ ret = seq_dict_foreach(argdict, _arg_parse_uid, &uid);
+ if (ret != 0)
+ return ret;
- return uid;
+ return uid;
}
static int
-_volname_get (char *val, void *data)
+_volname_get(char *val, void *data)
{
- char **volname = data;
+ char **volname = data;
- *volname = strtail (val, "volfile-id=");
+ *volname = strtail(val, "volfile-id=");
- return *volname ? 1 : 0;
+ return *volname ? 1 : 0;
}
static int
-_runner_add (char *val, void *data)
+_runner_add(char *val, void *data)
{
- runner_t *runner = data;
+ runner_t *runner = data;
- runner_argprintf (runner, "--%s", val);
+ runner_argprintf(runner, "--%s", val);
- return 0;
+ return 0;
}
int
-glusterd_do_mount (char *label, dict_t *argdict, char **path, int *op_errno)
+glusterd_do_mount(char *label, dict_t *argdict, char **path, int *op_errno)
{
- glusterd_conf_t *priv = NULL;
- char *mountbroker_root = NULL;
- gf_mount_spec_t *mspec = NULL;
- int uid = -ENOENT;
- char *volname = NULL;
- glusterd_volinfo_t *vol = NULL;
- char *mtptemp = NULL;
- char *mntlink = NULL;
- char *cookieswitch = NULL;
- char *cookie = NULL;
- char *sla = NULL;
- struct stat st = {0,};
- runner_t runner = {0,};
- int ret = 0;
- xlator_t *this = THIS;
-
- priv = this->private;
- GF_ASSERT (priv);
-
- GF_ASSERT (op_errno);
- *op_errno = 0;
-
- if (dict_get_str (this->options, "mountbroker-root",
- &mountbroker_root) != 0) {
- *op_errno = ENOENT;
- goto out;
+ glusterd_conf_t *priv = NULL;
+ char *mountbroker_root = NULL;
+ gf_mount_spec_t *mspec = NULL;
+ int uid = -ENOENT;
+ char *volname = NULL;
+ glusterd_volinfo_t *vol = NULL;
+ char *mtptemp = NULL;
+ char *mntlink = NULL;
+ char *cookieswitch = NULL;
+ char *cookie = NULL;
+ char *sla = NULL;
+ struct stat st = {
+ 0,
+ };
+ runner_t runner = {
+ 0,
+ };
+ int ret = 0;
+ xlator_t *this = THIS;
+ mode_t orig_umask = 0;
+ gf_boolean_t found_label = _gf_false;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GF_ASSERT(op_errno);
+ *op_errno = 0;
+
+ if (dict_get_strn(this->options, "mountbroker-root",
+ SLEN("mountbroker-root"), &mountbroker_root) != 0) {
+ *op_errno = ENOENT;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "'option mountbroker-root' "
+ "missing in glusterd vol file");
+ goto out;
+ }
+
+ GF_ASSERT(label);
+ if (!*label) {
+ *op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno, GD_MSG_MNTBROKER_LABEL_NULL,
+ "label is NULL (%s)", strerror(*op_errno));
+ goto out;
+ }
+
+ /* look up spec for label */
+ cds_list_for_each_entry(mspec, &priv->mount_specs, speclist)
+ {
+ if (strcmp(mspec->label, label) != 0)
+ continue;
+
+ found_label = _gf_true;
+ uid = evaluate_mount_request(this, mspec, argdict);
+ break;
+ }
+ if (uid < 0) {
+ *op_errno = -uid;
+ if (!found_label) {
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno,
+ GD_MSG_MNTBROKER_LABEL_MISS,
+ "Missing mspec: Check the corresponding option "
+ "in glusterd vol file for mountbroker user: %s",
+ label);
}
-
- GF_ASSERT (label);
- if (!*label) {
- *op_errno = EINVAL;
- goto out;
+ goto out;
+ }
+
+ /* some sanity check on arguments */
+ seq_dict_foreach(argdict, _volname_get, &volname);
+ if (!volname) {
+ *op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_DICT_GET_FAILED,
+ "Dict get failed for the key 'volname'");
+ goto out;
+ }
+ if (glusterd_volinfo_find(volname, &vol) != 0 ||
+ !glusterd_is_volume_started(vol)) {
+ *op_errno = ENOENT;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno, GD_MSG_MOUNT_REQ_FAIL,
+ "Either volume is not started or volinfo not found");
+ goto out;
+ }
+
+ /* go do mount */
+
+ /** create actual mount dir */
+
+ /*** "overload" string name to be possible to used for cookie
+ creation, see below */
+ ret = gf_asprintf(&mtptemp, "%s/user%d/mtpt-%s-XXXXXX/cookie",
+ mountbroker_root, uid, label);
+ if (ret == -1) {
+ mtptemp = NULL;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ /*** hide cookie part */
+ cookieswitch = strrchr(mtptemp, '/');
+ *cookieswitch = '\0';
+
+ sla = strrchr(mtptemp, '/');
+ *sla = '\0';
+ ret = sys_mkdir(mtptemp, 0700);
+ if (ret == 0)
+ ret = sys_chown(mtptemp, uid, 0);
+ else if (errno == EEXIST)
+ ret = 0;
+ if (ret == -1) {
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno, GD_MSG_SYSCALL_FAIL,
+ "Mountbroker User directory creation failed");
+ goto out;
+ }
+ ret = sys_lstat(mtptemp, &st);
+ if (ret == -1) {
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno, GD_MSG_SYSCALL_FAIL,
+ "stat on mountbroker user directory failed");
+ goto out;
+ }
+ if (!(S_ISDIR(st.st_mode) && (st.st_mode & ~S_IFMT) == 0700 &&
+ st.st_uid == uid && st.st_gid == 0)) {
+ *op_errno = EACCES;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno, GD_MSG_MOUNT_REQ_FAIL,
+ "Incorrect mountbroker user directory attributes");
+ goto out;
+ }
+ *sla = '/';
+
+ if (!mkdtemp(mtptemp)) {
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno, GD_MSG_SYSCALL_FAIL,
+ "Mountbroker mount directory creation failed");
+ goto out;
+ }
+
+ /** create private "cookie" symlink */
+
+ /*** occupy an entry in the hive dir via mkstemp */
+ ret = gf_asprintf(&cookie, "%s/" MB_HIVE "/mntXXXXXX", mountbroker_root);
+ if (ret == -1) {
+ cookie = NULL;
+ *op_errno = ENOMEM;
+ goto out;
+ }
+ orig_umask = umask(S_IRWXG | S_IRWXO);
+ ret = mkstemp(cookie);
+ umask(orig_umask);
+ if (ret == -1) {
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno, GD_MSG_SYSCALL_FAIL,
+ "Mountbroker cookie file creation failed");
+ goto out;
+ }
+ sys_close(ret);
+
+ /*** assembly the path from cookie to mountpoint */
+ sla = strchr(sla - 1, '/');
+ GF_ASSERT(sla);
+ ret = gf_asprintf(&mntlink, "../user%d%s", uid, sla);
+ if (ret == -1) {
+ *op_errno = ENOMEM;
+ goto out;
+ }
+
+ /*** create cookie link in (to-be) mountpoint,
+ move it over to the final place */
+ *cookieswitch = '/';
+ ret = sys_symlink(mntlink, mtptemp);
+ if (ret != -1)
+ ret = sys_rename(mtptemp, cookie);
+ *cookieswitch = '\0';
+ if (ret == -1) {
+ *op_errno = errno;
+ gf_msg(this->name, GF_LOG_ERROR, *op_errno, GD_MSG_SYSCALL_FAIL,
+ "symlink or rename failed");
+ goto out;
+ }
+
+ /** invoke glusterfs on the mountpoint */
+
+ runinit(&runner);
+ runner_add_arg(&runner, SBIN_DIR "/glusterfs");
+ seq_dict_foreach(argdict, _runner_add, &runner);
+ runner_add_arg(&runner, mtptemp);
+ ret = runner_run_reuse(&runner);
+ if (ret == -1) {
+ *op_errno = EIO; /* XXX hacky fake */
+ runner_log(&runner, "", GF_LOG_ERROR, "command failed");
+ }
+ runner_end(&runner);
+
+out:
+
+ if (*op_errno) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, *op_errno, GD_MSG_MOUNT_REQ_FAIL,
+ "unsuccessful mount request");
+ if (mtptemp) {
+ *cookieswitch = '/';
+ sys_unlink(mtptemp);
+ *cookieswitch = '\0';
+ sys_rmdir(mtptemp);
}
-
- /* look up spec for label */
- list_for_each_entry (mspec, &priv->mount_specs,
- speclist) {
- if (strcmp (mspec->label, label) != 0)
- continue;
- uid = evaluate_mount_request (mspec, argdict);
- break;
- }
- if (uid < 0) {
- *op_errno = -uid;
- goto out;
- }
-
- /* some sanity check on arguments */
- seq_dict_foreach (argdict, _volname_get, &volname);
- if (!volname) {
- *op_errno = EINVAL;
- goto out;
- }
- if (glusterd_volinfo_find (volname, &vol) != 0 ||
- !glusterd_is_volume_started (vol)) {
- *op_errno = ENOENT;
- goto out;
+ if (cookie) {
+ sys_unlink(cookie);
+ GF_FREE(cookie);
}
- /* go do mount */
+ } else {
+ ret = 0;
+ *path = cookie;
+ }
- /** create actual mount dir */
+ if (mtptemp)
+ GF_FREE(mtptemp);
+ if (mntlink)
+ GF_FREE(mntlink);
- /*** "overload" string name to be possible to used for cookie
- creation, see below */
- ret = gf_asprintf (&mtptemp, "%s/user%d/mtpt-%s-XXXXXX/cookie",
- mountbroker_root, uid, label);
- if (ret == -1) {
- mtptemp = NULL;
- *op_errno = ENOMEM;
- goto out;
- }
- /*** hide cookie part */
- cookieswitch = strrchr (mtptemp, '/');
- *cookieswitch = '\0';
-
- sla = strrchr (mtptemp, '/');
- *sla = '\0';
- ret = mkdir (mtptemp, 0700);
- if (ret == 0)
- ret = chown (mtptemp, uid, 0);
- else if (errno == EEXIST)
- ret = 0;
- if (ret == -1) {
- *op_errno = errno;
- goto out;
- }
- ret = lstat (mtptemp, &st);
- if (ret == -1) {
- *op_errno = errno;
- goto out;
- }
- if (!(S_ISDIR (st.st_mode) && (st.st_mode & ~S_IFMT) == 0700 &&
- st.st_uid == uid && st.st_gid == 0)) {
- *op_errno = EACCES;
- goto out;
- }
- *sla = '/';
-
- if (!mkdtemp (mtptemp)) {
- *op_errno = errno;
- goto out;
- }
-
- /** create private "cookie" symlink */
-
- /*** occupy an entry in the hive dir via mkstemp */
- ret = gf_asprintf (&cookie, "%s/"MB_HIVE"/mntXXXXXX",
- mountbroker_root);
- if (ret == -1) {
- cookie = NULL;
- *op_errno = ENOMEM;
- goto out;
- }
- ret = mkstemp (cookie);
- if (ret == -1) {
- *op_errno = errno;
- goto out;
- }
- close (ret);
-
- /*** assembly the path from cookie to mountpoint */
- sla = strchr (sla - 1, '/');
- GF_ASSERT (sla);
- ret = gf_asprintf (&mntlink, "../user%d%s", uid, sla);
- if (ret == -1) {
- *op_errno = ENOMEM;
- goto out;
- }
-
- /*** create cookie link in (to-be) mountpoint,
- move it over to the final place */
- *cookieswitch = '/';
- ret = symlink (mntlink, mtptemp);
- if (ret != -1)
- ret = rename (mtptemp, cookie);
- *cookieswitch = '\0';
- if (ret == -1) {
- *op_errno = errno;
- goto out;
- }
-
- /** invoke glusterfs on the mountpoint */
-
- runinit (&runner);
- runner_add_arg (&runner, SBIN_DIR"/glusterfs");
- seq_dict_foreach (argdict, _runner_add, &runner);
- runner_add_arg (&runner, mtptemp);
- ret = runner_run_reuse (&runner);
- if (ret == -1) {
- *op_errno = EIO; /* XXX hacky fake */
- runner_log (&runner, "", GF_LOG_ERROR, "command failed");
- }
- runner_end (&runner);
-
- out:
-
- if (*op_errno) {
- ret = -1;
- gf_log ("", GF_LOG_WARNING, "unsuccessful mount request (%s)",
- strerror (*op_errno));
- if (mtptemp) {
- *cookieswitch = '/';
- unlink (mtptemp);
- *cookieswitch = '\0';
- rmdir (mtptemp);
- }
- if (cookie) {
- unlink (cookie);
- GF_FREE (cookie);
- }
-
- } else {
- ret = 0;
- *path = cookie;
- }
-
- if (mtptemp)
- GF_FREE (mtptemp);
-
- return ret;
+ return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-mountbroker.h b/xlators/mgmt/glusterd/src/glusterd-mountbroker.h
index 729cf3fbc77..20c1347f52f 100644
--- a/xlators/mgmt/glusterd/src/glusterd-mountbroker.h
+++ b/xlators/mgmt/glusterd/src/glusterd-mountbroker.h
@@ -1,52 +1,37 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#define MB_HIVE "mb_hive"
-typedef enum {
- SET_SUB = 1,
- SET_SUPER,
- SET_EQUAL,
- SET_INTERSECT
-} gf_setrel_t;
+typedef enum { SET_SUB = 1, SET_SUPER, SET_EQUAL, SET_INTERSECT } gf_setrel_t;
struct gf_mount_pattern {
- char **components;
- gf_setrel_t condition;
- gf_boolean_t negative;
+ char **components;
+ gf_setrel_t condition;
+ gf_boolean_t negative;
};
typedef struct gf_mount_pattern gf_mount_pattern_t;
struct gf_mount_spec {
- struct list_head speclist;
- char *label;
- gf_mount_pattern_t *patterns;
- size_t len;
+ struct cds_list_head speclist;
+ char *label;
+ gf_mount_pattern_t *patterns;
+ size_t len;
};
typedef struct gf_mount_spec gf_mount_spec_t;
+int
+parse_mount_pattern_desc(gf_mount_spec_t *mspec, char *pdesc);
-int parse_mount_pattern_desc (gf_mount_spec_t *mspec, char *pdesc);
-
-int make_georep_mountspec (gf_mount_spec_t *mspec, const char *volname,
- char *user);
-int make_ghadoop_mountspec (gf_mount_spec_t *mspec, const char *volname,
- char *user, char *server);
+int
+make_georep_mountspec(gf_mount_spec_t *mspec, const char *volname, char *user,
+ char *logdir);
-int glusterd_do_mount (char *label, dict_t *argdict, char **path, int *op_errno);
+int
+glusterd_do_mount(char *label, dict_t *argdict, char **path, int *op_errno);
diff --git a/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c
new file mode 100644
index 00000000000..4908dbbc213
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.c
@@ -0,0 +1,228 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifdef BUILD_GNFS
+
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-nfs-svc.h"
+#include "glusterd-messages.h"
+#include "glusterd-svc-helper.h"
+
+static gf_boolean_t
+glusterd_nfssvc_need_start()
+{
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t start = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ priv = THIS->private;
+
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+ if (!glusterd_is_volume_started(volinfo))
+ continue;
+
+ if (dict_get_str_boolean(volinfo->dict, NFS_DISABLE_MAP_KEY, 1))
+ continue;
+ start = _gf_true;
+ break;
+ }
+
+ return start;
+}
+
+static int
+glusterd_nfssvc_create_volfile()
+{
+ char filepath[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *conf = THIS->private;
+
+ glusterd_svc_build_volfile_path(conf->nfs_svc.name, conf->workdir, filepath,
+ sizeof(filepath));
+ return glusterd_create_global_volfile(build_nfs_graph, filepath, NULL);
+}
+
+static int
+glusterd_nfssvc_manager(glusterd_svc_t *svc, void *data, int flags)
+{
+ int ret = -1;
+
+ if (!svc->inited) {
+ ret = glusterd_svc_init(svc, "nfs");
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_NFSSVC,
+ "Failed to init nfs service");
+ goto out;
+ } else {
+ svc->inited = _gf_true;
+ gf_msg_debug(THIS->name, 0, "nfs service initialized");
+ }
+ }
+
+ ret = svc->stop(svc, SIGKILL);
+ if (ret)
+ goto out;
+
+ /* not an error, or a (very) soft error at best */
+ if (sys_access(XLATORDIR "/nfs/server.so", R_OK) != 0) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_GNFS_XLATOR_NOT_INSTALLED,
+ "nfs/server.so xlator is not installed");
+ goto out;
+ }
+
+ ret = glusterd_nfssvc_create_volfile();
+ if (ret)
+ goto out;
+
+ if (glusterd_nfssvc_need_start()) {
+ ret = svc->start(svc, flags);
+ if (ret)
+ goto out;
+
+ ret = glusterd_conn_connect(&(svc->conn));
+ if (ret)
+ goto out;
+ }
+out:
+ if (ret)
+ gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name);
+
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_nfssvc_start(glusterd_svc_t *svc, int flags)
+{
+ return glusterd_svc_start(svc, flags, NULL);
+}
+
+static int
+glusterd_nfssvc_stop(glusterd_svc_t *svc, int sig)
+{
+ int ret = -1;
+ gf_boolean_t deregister = _gf_false;
+
+ if (glusterd_proc_is_running(&(svc->proc)))
+ deregister = _gf_true;
+
+ ret = glusterd_svc_stop(svc, sig);
+ if (ret)
+ goto out;
+ if (deregister)
+ glusterd_nfs_pmap_deregister();
+
+out:
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+void
+glusterd_nfssvc_build(glusterd_svc_t *svc)
+{
+ svc->manager = glusterd_nfssvc_manager;
+ svc->start = glusterd_nfssvc_start;
+ svc->stop = glusterd_nfssvc_stop;
+}
+
+int
+glusterd_nfssvc_reconfigure()
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t identical = _gf_false;
+ gf_boolean_t vol_started = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ /* not an error, or a (very) soft error at best */
+ if (sys_access(XLATORDIR "/nfs/server.so", R_OK) != 0) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_GNFS_XLATOR_NOT_INSTALLED,
+ "nfs/server.so xlator is not installed");
+ ret = 0;
+ goto out;
+ }
+
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ vol_started = _gf_true;
+ break;
+ }
+ }
+ if (!vol_started) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * Check both OLD and NEW volfiles, if they are SAME by size
+ * and cksum i.e. "character-by-character". If YES, then
+ * NOTHING has been changed, just return.
+ */
+
+ ret = glusterd_svc_check_volfile_identical(priv->nfs_svc.name,
+ build_nfs_graph, &identical);
+ if (ret)
+ goto out;
+
+ if (identical) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * They are not identical. Find out if the topology is changed
+ * OR just the volume options. If just the options which got
+ * changed, then inform the xlator to reconfigure the options.
+ */
+ identical = _gf_false; /* RESET the FLAG */
+ ret = glusterd_svc_check_topology_identical(priv->nfs_svc.name,
+ build_nfs_graph, &identical);
+ if (ret)
+ goto out;
+
+ /* Topology is not changed, but just the options. But write the
+ * options to NFS volfile, so that NFS will be reconfigured.
+ */
+ if (identical) {
+ ret = glusterd_nfssvc_create_volfile();
+ if (ret == 0) { /* Only if above PASSES */
+ ret = glusterd_fetchspec_notify(THIS);
+ }
+ goto out;
+ }
+
+ /*
+ * NFS volfile's topology has been changed. NFS server needs
+ * to be RESTARTED to ACT on the changed volfile.
+ */
+ ret = priv->nfs_svc.manager(&(priv->nfs_svc), NULL, PROC_START_NO_WAIT);
+
+out:
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-nfs-svc.h b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.h
new file mode 100644
index 00000000000..6bfdde95749
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-nfs-svc.h
@@ -0,0 +1,27 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_NFS_SVC_H_
+#define _GLUSTERD_NFS_SVC_H_
+
+#include "glusterd-svc-mgmt.h"
+
+#ifdef BUILD_GNFS
+void
+glusterd_nfssvc_build(glusterd_svc_t *svc);
+
+int
+glusterd_nfssvc_init(glusterd_svc_t *svc);
+
+int
+glusterd_nfssvc_reconfigure();
+
+#endif /* BUILD_GNFS */
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.c b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
index d0e8882fb7a..c537fc33a85 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.c
@@ -1,2175 +1,4832 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
#include <time.h>
#include <sys/uio.h>
#include <sys/resource.h>
#include <sys/mount.h>
#include <libgen.h>
-#include "uuid.h"
+#include <glusterfs/compat-uuid.h>
#include "fnmatch.h"
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "protocol-common.h"
#include "glusterd.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "list.h"
-#include "dict.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "statedump.h"
-#include "glusterd-sm.h"
+#include <glusterfs/call-stub.h>
+#include <glusterfs/list.h>
+#include <glusterfs/dict.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/statedump.h>
#include "glusterd-op-sm.h"
#include "glusterd-utils.h"
#include "glusterd-store.h"
-#include "glusterd-hooks.h"
-#include "glusterd-volgen.h"
-#include "syscall.h"
+#include "glusterd-locks.h"
+#include "glusterd-quota.h"
+#include <glusterfs/syscall.h>
#include "cli1-xdr.h"
-#include "common-utils.h"
-#include "run.h"
-
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-shd-svc-helper.h"
+#include "glusterd-shd-svc.h"
+#include "glusterd-quotad-svc.h"
+#include "glusterd-server-quorum.h"
#include <sys/types.h>
#include <signal.h>
#include <sys/wait.h>
+#include "glusterd-gfproxyd-svc-helper.h"
-static struct list_head gd_op_sm_queue;
-pthread_mutex_t gd_op_sm_lock;
-glusterd_op_info_t opinfo = {{0},};
-static int glusterfs_port = GLUSTERD_DEFAULT_PORT;
-static char *glusterd_op_sm_state_names[] = {
- "Default",
- "Lock sent",
- "Locked",
- "Stage op sent",
- "Staged",
- "Commit op sent",
- "Committed",
- "Unlock sent",
- "Stage op failed",
- "Commit op failed",
- "Brick op sent",
- "Brick op failed",
- "Brick op Committed",
- "Brick op Commit failed",
- "Ack drain",
- "Invalid",
+#define len_strcmp(key, len, str) \
+ ((len == SLEN(str)) && (strcmp(key, str) == 0))
+
+extern char local_node_hostname[PATH_MAX];
+static int
+glusterd_set_shared_storage(dict_t *dict, char *key, char *value,
+ char **op_errstr);
+
+/*
+ * Valid options for all volumes to be listed in the valid_all_vol_opts table.
+ * To add newer options to all volumes, we can just add more entries to this
+ * table.
+ *
+ * It's important that every value have a default, or have a special handler
+ * in glusterd_get_global_options_for_all_vols, or else we might crash there.
+ */
+const glusterd_all_vol_opts valid_all_vol_opts[] = {
+ {GLUSTERD_QUORUM_RATIO_KEY, "51"},
+ {GLUSTERD_SHARED_STORAGE_KEY, "disable"},
+ /* This one actually gets filled in dynamically. */
+ {GLUSTERD_GLOBAL_OP_VERSION_KEY, "BUG_NO_OP_VERSION"},
+ /*
+ * This one should be filled in dynamically, but it didn't used to be
+ * (before the defaults were added here) so the value is unclear.
+ *
+ * TBD: add a dynamic handler to set the appropriate value
+ */
+ {GLUSTERD_MAX_OP_VERSION_KEY, "BUG_NO_MAX_OP_VERSION"},
+ {GLUSTERD_BRICK_MULTIPLEX_KEY, "disable"},
+ /* Set this value to 0 by default implying brick-multiplexing
+ * behaviour with no limit set on the number of brick instances that
+ * can be attached per process.
+ * TBD: Discuss the default value for this. Maybe this should be a
+ * dynamic value depending on the memory specifications per node */
+ {GLUSTERD_BRICKMUX_LIMIT_KEY, GLUSTERD_BRICKMUX_LIMIT_DFLT_VALUE},
+ {GLUSTERD_VOL_CNT_PER_THRD, GLUSTERD_VOL_CNT_PER_THRD_DEFAULT_VALUE},
+ {GLUSTERD_LOCALTIME_LOGGING_KEY, "disable"},
+ {GLUSTERD_DAEMON_LOG_LEVEL_KEY, "INFO"},
+ {NULL},
};
-static char *glusterd_op_sm_event_names[] = {
- "GD_OP_EVENT_NONE",
- "GD_OP_EVENT_START_LOCK",
- "GD_OP_EVENT_LOCK",
- "GD_OP_EVENT_RCVD_ACC",
- "GD_OP_EVENT_ALL_ACC",
- "GD_OP_EVENT_STAGE_ACC",
- "GD_OP_EVENT_COMMIT_ACC",
- "GD_OP_EVENT_RCVD_RJT",
- "GD_OP_EVENT_STAGE_OP",
- "GD_OP_EVENT_COMMIT_OP",
- "GD_OP_EVENT_UNLOCK",
- "GD_OP_EVENT_START_UNLOCK",
- "GD_OP_EVENT_ALL_ACK",
- "GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP",
- "GD_OP_EVENT_INVALID"
+static struct cds_list_head gd_op_sm_queue;
+synclock_t gd_op_sm_lock;
+glusterd_op_info_t opinfo = {
+ {0},
};
-char*
-glusterd_op_sm_state_name_get (int state)
+int32_t
+glusterd_txn_opinfo_dict_init()
{
- if (state < 0 || state >= GD_OP_STATE_MAX)
- return glusterd_op_sm_state_names[GD_OP_STATE_MAX];
- return glusterd_op_sm_state_names[state];
-}
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
-char*
-glusterd_op_sm_event_name_get (int event)
-{
- if (event < 0 || event >= GD_OP_EVENT_MAX)
- return glusterd_op_sm_event_names[GD_OP_EVENT_MAX];
- return glusterd_op_sm_event_names[event];
-}
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
-void
-glusterd_destroy_lock_ctx (glusterd_op_lock_ctx_t *ctx)
-{
- if (!ctx)
- return;
- GF_FREE (ctx);
+ priv->glusterd_txn_opinfo = dict_new();
+ if (!priv->glusterd_txn_opinfo) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ memset(priv->global_txn_id, '\0', sizeof(uuid_t));
+
+ ret = 0;
+out:
+ return ret;
}
void
-glusterd_set_volume_status (glusterd_volinfo_t *volinfo,
- glusterd_volume_status status)
+glusterd_txn_opinfo_dict_fini()
{
- GF_ASSERT (volinfo);
- volinfo->status = status;
-}
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
-gf_boolean_t
-glusterd_is_volume_started (glusterd_volinfo_t *volinfo)
-{
- GF_ASSERT (volinfo);
- return (volinfo->status == GLUSTERD_STATUS_STARTED);
-}
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
-static int
-glusterd_op_sm_inject_all_acc ()
-{
- int32_t ret = -1;
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL);
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (priv->glusterd_txn_opinfo)
+ dict_unref(priv->glusterd_txn_opinfo);
}
-int
-glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickinfo,
- gd1_mgmt_brick_op_req **req, dict_t *dict)
+void
+glusterd_txn_opinfo_init(glusterd_op_info_t *opinfo,
+ glusterd_op_sm_state_info_t *state, int *op,
+ dict_t *op_ctx, rpcsvc_request_t *req)
{
- int ret = -1;
- gd1_mgmt_brick_op_req *brick_req = NULL;
- char *volname = NULL;
- char name[1024] = {0,};
- gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID;
+ glusterd_conf_t *conf = NULL;
- GF_ASSERT (op < GD_OP_MAX);
- GF_ASSERT (op > GD_OP_NONE);
- GF_ASSERT (req);
+ GF_ASSERT(opinfo);
+ conf = THIS->private;
+ GF_ASSERT(conf);
- switch (op) {
- case GD_OP_REMOVE_BRICK:
- case GD_OP_STOP_VOLUME:
- brick_req = GF_CALLOC (1, sizeof (*brick_req),
- gf_gld_mt_mop_brick_req_t);
- if (!brick_req) {
- gf_log ("", GF_LOG_ERROR, "Out of Memory");
- goto out;
- }
- brick_req->op = GLUSTERD_BRICK_TERMINATE;
- brick_req->name = "";
- break;
- case GD_OP_PROFILE_VOLUME:
- brick_req = GF_CALLOC (1, sizeof (*brick_req),
- gf_gld_mt_mop_brick_req_t);
+ if (state)
+ opinfo->state = *state;
- if (!brick_req) {
- gf_log ("", GF_LOG_ERROR, "Out of Memory");
- goto out;
- }
+ if (op)
+ opinfo->op = *op;
- brick_req->op = GLUSTERD_BRICK_XLATOR_INFO;
- brick_req->name = brickinfo->path;
+ if (op_ctx)
+ opinfo->op_ctx = dict_ref(op_ctx);
+ else
+ opinfo->op_ctx = NULL;
- break;
- case GD_OP_HEAL_VOLUME:
- {
- brick_req = GF_CALLOC (1, sizeof (*brick_req),
- gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
- goto out;
+ if (req)
+ opinfo->req = req;
- brick_req->op = GLUSTERD_BRICK_XLATOR_OP;
- brick_req->name = "";
- ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op);
- if (ret)
- goto out;
- ret = dict_set_int32 (dict, "xl-op", heal_op);
- }
- break;
- case GD_OP_STATUS_VOLUME:
- {
- brick_req = GF_CALLOC (1, sizeof (*brick_req),
- gf_gld_mt_mop_brick_req_t);
- if (!brick_req) {
- gf_log (THIS->name, GF_LOG_ERROR, "Out of memory");
- goto out;
- }
- brick_req->op = GLUSTERD_BRICK_STATUS;
- brick_req->name = "";
- }
- break;
- case GD_OP_REBALANCE:
- case GD_OP_DEFRAG_BRICK_VOLUME:
- brick_req = GF_CALLOC (1, sizeof (*brick_req),
- gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
- goto out;
+ opinfo->txn_generation = conf->generation;
+ cmm_smp_rmb();
- brick_req->op = GLUSTERD_BRICK_XLATOR_DEFRAG;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret)
- goto out;
- snprintf (name, 1024, "%s-dht",volname);
- brick_req->name = gf_strdup (name);
+ return;
+}
- break;
- default:
- goto out;
- break;
- }
+int32_t
+glusterd_generate_txn_id(dict_t *dict, uuid_t **txn_id)
+{
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(dict);
+
+ *txn_id = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!*txn_id) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ if (priv->op_version < GD_OP_VERSION_3_6_0)
+ gf_uuid_copy(**txn_id, priv->global_txn_id);
+ else
+ gf_uuid_generate(**txn_id);
+
+ ret = dict_set_bin(dict, "transaction_id", *txn_id, sizeof(**txn_id));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set transaction id.");
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0, "Transaction_id = %s", uuid_utoa(**txn_id));
+out:
+ if (ret && *txn_id) {
+ GF_FREE(*txn_id);
+ *txn_id = NULL;
+ }
- ret = dict_allocate_and_serialize (dict, &brick_req->input.input_val,
- (size_t*)&brick_req->input.input_len);
- if (ret)
- goto out;
- *req = brick_req;
- ret = 0;
+ return ret;
+}
+int32_t
+glusterd_get_txn_opinfo(uuid_t *txn_id, glusterd_op_info_t *opinfo)
+{
+ int32_t ret = -1;
+ glusterd_txn_opinfo_obj *opinfo_obj = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ if (!txn_id || !opinfo) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_ID_GET_FAIL,
+ "Empty transaction id or opinfo received.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_bin(priv->glusterd_txn_opinfo, uuid_utoa(*txn_id),
+ (void **)&opinfo_obj);
+ if (ret)
+ goto out;
+
+ (*opinfo) = opinfo_obj->opinfo;
+
+ gf_msg_debug(this->name, 0,
+ "Successfully got opinfo for transaction ID : %s",
+ uuid_utoa(*txn_id));
+
+ ret = 0;
out:
- if (ret && brick_req)
- GF_FREE (brick_req);
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
-int
-glusterd_node_op_build_payload (glusterd_op_t op, gd1_mgmt_brick_op_req **req,
- dict_t *dict)
+int32_t
+glusterd_set_txn_opinfo(uuid_t *txn_id, glusterd_op_info_t *opinfo)
{
- int ret = -1;
- gd1_mgmt_brick_op_req *brick_req = NULL;
+ int32_t ret = -1;
+ glusterd_txn_opinfo_obj *opinfo_obj = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ if (!txn_id) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_ID_GET_FAIL,
+ "Empty transaction id received.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_bin(priv->glusterd_txn_opinfo, uuid_utoa(*txn_id),
+ (void **)&opinfo_obj);
+ if (ret) {
+ opinfo_obj = GF_CALLOC(1, sizeof(glusterd_txn_opinfo_obj),
+ gf_common_mt_txn_opinfo_obj_t);
+ if (!opinfo_obj) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_bin(priv->glusterd_txn_opinfo, uuid_utoa(*txn_id),
+ opinfo_obj, sizeof(glusterd_txn_opinfo_obj));
+ if (ret) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "Unable to set opinfo for transaction"
+ " ID : %s",
+ uuid_utoa(*txn_id));
+ goto out;
+ }
+ }
- GF_ASSERT (op < GD_OP_MAX);
- GF_ASSERT (op > GD_OP_NONE);
- GF_ASSERT (req);
+ opinfo_obj->opinfo = (*opinfo);
- switch (op) {
- case GD_OP_PROFILE_VOLUME:
- brick_req = GF_CALLOC (1, sizeof (*brick_req),
- gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
- goto out;
-
- brick_req->op = GLUSTERD_NODE_PROFILE;
- brick_req->name = "";
+ gf_msg_debug(this->name, 0,
+ "Successfully set opinfo for transaction ID : %s",
+ uuid_utoa(*txn_id));
+ ret = 0;
+out:
+ if (ret)
+ if (opinfo_obj)
+ GF_FREE(opinfo_obj);
- break;
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
- case GD_OP_STATUS_VOLUME:
- brick_req = GF_CALLOC (1, sizeof (*brick_req),
- gf_gld_mt_mop_brick_req_t);
- if (!brick_req)
- goto out;
+int32_t
+glusterd_clear_txn_opinfo(uuid_t *txn_id)
+{
+ int32_t ret = -1;
+ glusterd_op_info_t txn_op_info = {
+ {0},
+ };
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ if (!txn_id) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_ID_GET_FAIL,
+ "Empty transaction id received.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_get_txn_opinfo(txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_GET_FAIL,
+ "Unable to get transaction opinfo "
+ "for transaction ID : %s",
+ uuid_utoa(*txn_id));
+ goto out;
+ }
+
+ if (txn_op_info.op_ctx)
+ dict_unref(txn_op_info.op_ctx);
+
+ dict_del(priv->glusterd_txn_opinfo, uuid_utoa(*txn_id));
+
+ gf_msg_debug(this->name, 0,
+ "Successfully cleared opinfo for transaction ID : %s",
+ uuid_utoa(*txn_id));
+
+ ret = 0;
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
- brick_req->op = GLUSTERD_NODE_STATUS;
- brick_req->name = "";
+static int glusterfs_port = GLUSTERD_DEFAULT_PORT;
+static char *glusterd_op_sm_state_names[] = {
+ "Default",
+ "Lock sent",
+ "Locked",
+ "Stage op sent",
+ "Staged",
+ "Commit op sent",
+ "Committed",
+ "Unlock sent",
+ "Stage op failed",
+ "Commit op failed",
+ "Brick op sent",
+ "Brick op failed",
+ "Brick op Committed",
+ "Brick op Commit failed",
+ "Ack drain",
+ "Invalid",
+};
- break;
+static char *glusterd_op_sm_event_names[] = {
+ "GD_OP_EVENT_NONE", "GD_OP_EVENT_START_LOCK",
+ "GD_OP_EVENT_LOCK", "GD_OP_EVENT_RCVD_ACC",
+ "GD_OP_EVENT_ALL_ACC", "GD_OP_EVENT_STAGE_ACC",
+ "GD_OP_EVENT_COMMIT_ACC", "GD_OP_EVENT_RCVD_RJT",
+ "GD_OP_EVENT_STAGE_OP", "GD_OP_EVENT_COMMIT_OP",
+ "GD_OP_EVENT_UNLOCK", "GD_OP_EVENT_START_UNLOCK",
+ "GD_OP_EVENT_ALL_ACK", "GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP",
+ "GD_OP_EVENT_INVALID"};
+
+char *
+glusterd_op_sm_state_name_get(int state)
+{
+ if (state < 0 || state >= GD_OP_STATE_MAX)
+ return glusterd_op_sm_state_names[GD_OP_STATE_MAX];
+ return glusterd_op_sm_state_names[state];
+}
- default:
- goto out;
- }
+char *
+glusterd_op_sm_event_name_get(int event)
+{
+ if (event < 0 || event >= GD_OP_EVENT_MAX)
+ return glusterd_op_sm_event_names[GD_OP_EVENT_MAX];
+ return glusterd_op_sm_event_names[event];
+}
- ret = dict_allocate_and_serialize (dict, &brick_req->input.input_val,
- (size_t*)&brick_req->input.input_len);
+static void
+glusterd_destroy_lock_ctx(glusterd_op_lock_ctx_t *ctx)
+{
+ if (!ctx)
+ return;
+ GF_FREE(ctx);
+}
- if (ret)
- goto out;
+void
+glusterd_set_volume_status(glusterd_volinfo_t *volinfo,
+ glusterd_volume_status status)
+{
+ GF_ASSERT(volinfo);
+ volinfo->status = status;
+}
- *req = brick_req;
- ret = 0;
+static int
+glusterd_op_sm_inject_all_acc(uuid_t *txn_id)
+{
+ int ret = -1;
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACC, txn_id, NULL);
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+static int
+glusterd_check_bitrot_cmd(char *key, const int keylen, char *errstr,
+ const size_t size)
+{
+ int ret = -1;
+
+ if (len_strcmp(key, keylen, "bitrot") ||
+ len_strcmp(key, keylen, "features.bitrot")) {
+ snprintf(errstr, size,
+ " 'gluster volume set <VOLNAME> %s' is invalid command."
+ " Use 'gluster volume bitrot <VOLNAME> {enable|disable}'"
+ " instead.",
+ key);
+ goto out;
+ } else if (len_strcmp(key, keylen, "scrub-freq") ||
+ len_strcmp(key, keylen, "features.scrub-freq")) {
+ snprintf(errstr, size,
+ " 'gluster volume set <VOLNAME> %s' is invalid command."
+ " Use 'gluster volume bitrot <VOLNAME> scrub-frequency"
+ " {hourly|daily|weekly|biweekly|monthly}' instead.",
+ key);
+ goto out;
+ } else if (len_strcmp(key, keylen, "scrub") ||
+ len_strcmp(key, keylen, "features.scrub")) {
+ snprintf(errstr, size,
+ " 'gluster volume set <VOLNAME> %s' is invalid command."
+ " Use 'gluster volume bitrot <VOLNAME> scrub {pause|resume}'"
+ " instead.",
+ key);
+ goto out;
+ } else if (len_strcmp(key, keylen, "scrub-throttle") ||
+ len_strcmp(key, keylen, "features.scrub-throttle")) {
+ snprintf(errstr, size,
+ " 'gluster volume set <VOLNAME> %s' is invalid command."
+ " Use 'gluster volume bitrot <VOLNAME> scrub-throttle "
+ " {lazy|normal|aggressive}' instead.",
+ key);
+ goto out;
+ }
+
+ ret = 0;
out:
- if (ret && brick_req)
- GF_FREE (brick_req);
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
}
static int
-glusterd_op_stage_set_volume (dict_t *dict, char **op_errstr)
+glusterd_check_quota_cmd(char *key, const int keylen, char *value, char *errstr,
+ size_t size)
{
- int ret = 0;
- char *volname = NULL;
- int exists = 0;
- char *key = NULL;
- char *key_fixed = NULL;
- char *value = NULL;
- char str[100] = {0, };
- int count = 0;
- int dict_count = 0;
- char errstr[2048] = {0, };
- glusterd_volinfo_t *volinfo = NULL;
- dict_t *val_dict = NULL;
- gf_boolean_t global_opt = _gf_false;
- glusterd_volinfo_t *voliter = NULL;
- glusterd_conf_t *priv = NULL;
- xlator_t *this = NULL;
-
- GF_ASSERT (dict);
- this = THIS;
- GF_ASSERT (this);
- priv = this->private;
- GF_ASSERT (priv);
-
- val_dict = dict_new();
- if (!val_dict)
+ int ret = -1;
+ gf_boolean_t b = _gf_false;
+
+ if (len_strcmp(key, keylen, "quota") ||
+ len_strcmp(key, keylen, "features.quota")) {
+ ret = gf_string2boolean(value, &b);
+ if (ret)
+ goto out;
+ ret = -1;
+ if (b) {
+ snprintf(errstr, size,
+ " 'gluster volume set <VOLNAME> %s %s' is deprecated."
+ " Use 'gluster volume quota <VOLNAME> enable' instead.",
+ key, value);
+ } else {
+ snprintf(errstr, size,
+ " 'gluster volume set <VOLNAME> %s %s' is deprecated."
+ " Use 'gluster volume quota <VOLNAME> disable' instead.",
+ key, value);
+ }
+ goto out;
+ } else if (len_strcmp(key, keylen, "inode-quota") ||
+ len_strcmp(key, keylen, "features.inode-quota")) {
+ ret = gf_string2boolean(value, &b);
+ if (ret)
+ goto out;
+ ret = -1;
+ if (b) {
+ snprintf(
+ errstr, size,
+ " 'gluster volume set <VOLNAME> %s %s' is deprecated."
+ " Use 'gluster volume inode-quota <VOLNAME> enable' instead.",
+ key, value);
+ } else {
+ /* inode-quota disable not supported,
+ * use quota disable
+ */
+ snprintf(errstr, size,
+ " 'gluster volume set <VOLNAME> %s %s' is deprecated."
+ " Use 'gluster volume quota <VOLNAME> disable' instead.",
+ key, value);
+ }
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_brick_op_build_payload(glusterd_op_t op,
+ glusterd_brickinfo_t *brickinfo,
+ gd1_mgmt_brick_op_req **req, dict_t *dict)
+{
+ int ret = -1;
+ gd1_mgmt_brick_op_req *brick_req = NULL;
+ char *volname = NULL;
+ char name[1024] = {
+ 0,
+ };
+ gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(op < GD_OP_MAX);
+ GF_ASSERT(op > GD_OP_NONE);
+ GF_ASSERT(req);
+
+ switch (op) {
+ case GD_OP_REMOVE_BRICK:
+ case GD_OP_STOP_VOLUME:
+ brick_req = GF_CALLOC(1, sizeof(*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
+ }
+ brick_req->op = GLUSTERD_BRICK_TERMINATE;
+ brick_req->name = brickinfo->path;
+ glusterd_set_brick_status(brickinfo, GF_BRICK_STOPPING);
+ break;
+ case GD_OP_PROFILE_VOLUME:
+ brick_req = GF_CALLOC(1, sizeof(*brick_req),
+ gf_gld_mt_mop_brick_req_t);
- ret = dict_get_int32 (dict, "count", &dict_count);
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
+ goto out;
+ }
+
+ brick_req->op = GLUSTERD_BRICK_XLATOR_INFO;
+ brick_req->name = brickinfo->path;
+
+ break;
+ case GD_OP_HEAL_VOLUME: {
+ brick_req = GF_CALLOC(1, sizeof(*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
+ goto out;
+ }
+
+ brick_req->op = GLUSTERD_BRICK_XLATOR_OP;
+ brick_req->name = "";
+ ret = dict_get_int32n(dict, "heal-op", SLEN("heal-op"),
+ (int32_t *)&heal_op);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=heal-op", NULL);
+ goto out;
+ }
+ ret = dict_set_int32n(dict, "xl-op", SLEN("xl-op"), heal_op);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=xl-op", NULL);
+ goto out;
+ }
+ } break;
+ case GD_OP_STATUS_VOLUME: {
+ brick_req = GF_CALLOC(1, sizeof(*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
+ goto out;
+ }
+ brick_req->op = GLUSTERD_BRICK_STATUS;
+ brick_req->name = "";
+ ret = dict_set_strn(dict, "brick-name", SLEN("brick-name"),
+ brickinfo->path);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=brick-name", NULL);
+ goto out;
+ }
+ } break;
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ brick_req = GF_CALLOC(1, sizeof(*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
+ goto out;
+ }
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Count(dict),not set in Volume-Set");
+ brick_req->op = GLUSTERD_BRICK_XLATOR_DEFRAG;
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
goto out;
- }
+ }
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_VOLINFO_GET_FAIL, "Volume=%s", volname, NULL);
+ goto out;
+ }
+ snprintf(name, sizeof(name), "%s-dht", volname);
+ brick_req->name = gf_strdup(name);
+
+ break;
+ case GD_OP_SNAP:
+ case GD_OP_BARRIER:
+ brick_req = GF_CALLOC(1, sizeof(*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
+ goto out;
+ }
+ brick_req->op = GLUSTERD_BRICK_BARRIER;
+ brick_req->name = brickinfo->path;
+ break;
- if ( dict_count == 0 ) {
- /*No options would be specified of volume set help */
- if (dict_get (dict, "help" )) {
- ret = 0;
- goto out;
- }
+ default:
+ goto out;
+ break;
+ }
+
+ brick_req->dict.dict_len = 0;
+ brick_req->dict.dict_val = NULL;
+ ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val,
+ &brick_req->input.input_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+ *req = brick_req;
+ ret = 0;
- if (dict_get (dict, "help-xml" )) {
+out:
+ if (ret && brick_req)
+ GF_FREE(brick_req);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
-#if (HAVE_LIB_XML)
- ret = 0;
- goto out;
-#else
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "libxml not present in the system");
- *op_errstr = gf_strdup ("Error: xml libraries not "
- "present to produce xml-output");
- goto out;
-#endif
- }
- gf_log (this->name, GF_LOG_ERROR, "No options received ");
- *op_errstr = gf_strdup ("Options not specified");
- ret = -1;
+int
+glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req,
+ dict_t *dict)
+{
+ int ret = -1;
+ gd1_mgmt_brick_op_req *brick_req = NULL;
+ char *volname = NULL;
+
+ GF_ASSERT(op < GD_OP_MAX);
+ GF_ASSERT(op > GD_OP_NONE);
+ GF_ASSERT(req);
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
+
+ switch (op) {
+ case GD_OP_PROFILE_VOLUME:
+ brick_req = GF_CALLOC(1, sizeof(*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
- }
+ }
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
- goto out;
- }
+ brick_req->op = GLUSTERD_NODE_PROFILE;
+ brick_req->name = "";
- exists = glusterd_check_volume_exists (volname);
- if (!exists) {
- snprintf (errstr, sizeof (errstr), "Volume %s does not exist",
- volname);
- gf_log (this->name, GF_LOG_ERROR, "%s", errstr);
- *op_errstr = gf_strdup (errstr);
- ret = -1;
- goto out;
- }
+ break;
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to allocate memory");
+ case GD_OP_STATUS_VOLUME:
+ brick_req = GF_CALLOC(1, sizeof(*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
goto out;
- }
+ }
- ret = glusterd_validate_volume_id (dict, volinfo);
- if (ret)
- goto out;
+ brick_req->op = GLUSTERD_NODE_STATUS;
+ brick_req->name = "";
- for ( count = 1; ret != 1 ; count++ ) {
- global_opt = _gf_false;
- sprintf (str, "key%d", count);
- ret = dict_get_str (dict, str, &key);
- if (ret)
- break;
+ break;
- sprintf (str, "value%d", count);
- ret = dict_get_str (dict, str, &value);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid key,value pair in 'volume set'");
- ret = -1;
- goto out;
- }
+ case GD_OP_SCRUB_STATUS:
+ case GD_OP_SCRUB_ONDEMAND:
+ brick_req = GF_CALLOC(1, sizeof(*brick_req),
+ gf_gld_mt_mop_brick_req_t);
+ if (!brick_req) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY,
+ NULL);
+ goto out;
+ }
- if (strcmp (key, "memory-accounting") == 0) {
- gf_log (this->name, GF_LOG_INFO,
- "enabling memory accounting for volume %s",
- volname);
- ret = 0;
- goto out;
- }
+ brick_req->op = GLUSTERD_NODE_BITROT;
- if (is_key_glusterd_hooks_friendly (this, volname, key)) {
- ret = 0;
- goto out;
- }
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
+ goto out;
+ }
- exists = glusterd_check_option_exists (key, &key_fixed);
- if (exists == -1) {
- ret = -1;
- goto out;
- }
- if (!exists) {
- gf_log (this->name, GF_LOG_ERROR,
- "Option with name: %s "
- "does not exist", key);
- ret = snprintf (errstr, 2048,
- "option : %s does not exist",
- key);
- if (key_fixed)
- snprintf (errstr + ret, 2048 - ret,
- "\nDid you mean %s?", key_fixed);
- *op_errstr = gf_strdup (errstr);
- ret = -1;
- goto out;
- }
+ brick_req->name = gf_strdup(volname);
+ break;
+ default:
+ goto out;
+ }
- if (key_fixed)
- key = key_fixed;
+ brick_req->dict.dict_len = 0;
+ brick_req->dict.dict_val = NULL;
+ ret = dict_allocate_and_serialize(dict, &brick_req->input.input_val,
+ &brick_req->input.input_len);
- ret = glusterd_check_globaloption (key);
- if (ret)
- global_opt = _gf_true;
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
- ret = dict_set_str (val_dict, key, value);
+ *req = brick_req;
+ ret = 0;
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to set the options in 'volume set'");
- ret = -1;
- goto out;
- }
+out:
+ if (ret && brick_req)
+ GF_FREE(brick_req);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
- *op_errstr = NULL;
- if (!global_opt)
- ret = glusterd_validate_reconfopts (volinfo, val_dict, op_errstr);
- else {
- voliter = NULL;
- list_for_each_entry (voliter, &priv->volumes, vol_list) {
- ret = glusterd_validate_globalopts (voliter, val_dict, op_errstr);
- if (ret)
- break;
- }
- }
+static int
+glusterd_validate_quorum_options(xlator_t *this, char *fullkey, char *value,
+ char **op_errstr)
+{
+ int ret = 0;
+ char *key = NULL;
+ volume_option_t *opt = NULL;
+
+ if (!glusterd_is_quorum_option(fullkey))
+ goto out;
+ key = strchr(fullkey, '.');
+ if (key == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRCHR_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+ key++;
+ opt = xlator_volume_option_get(this, key);
+ if (!opt) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+ ret = xlator_option_validate(this, key, value, opt, op_errstr);
+out:
+ return ret;
+}
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG, "Could not create temp "
- "volfile, some option failed: %s", *op_errstr);
- goto out;
- }
- dict_del (val_dict, key);
+static int
+glusterd_validate_brick_mx_options(xlator_t *this, char *fullkey, char *value,
+ char **op_errstr)
+{
+ int ret = 0;
- if (key_fixed) {
- GF_FREE (key_fixed);
- key_fixed = NULL;
- }
- }
+ // Placeholder function for now
+ return ret;
+}
- ret = 0;
+static int
+glusterd_validate_shared_storage(char *value, char *errstr)
+{
+ int32_t ret = -1;
+ int32_t count = -1;
+ char *op = NULL;
+ char hook_script[PATH_MAX] = "";
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int32_t len = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ GF_VALIDATE_OR_GOTO(this->name, value, out);
+ GF_VALIDATE_OR_GOTO(this->name, errstr, out);
+
+ if ((strcmp(value, "enable")) && (strcmp(value, "disable"))) {
+ snprintf(errstr, PATH_MAX,
+ "Invalid option(%s). Valid options "
+ "are 'enable' and 'disable'",
+ value);
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s",
+ errstr);
+ ret = -1;
+ goto out;
+ }
+
+ len = snprintf(hook_script, sizeof(hook_script),
+ "%s" GLUSTERD_SHRD_STRG_HOOK_SCRIPT, conf->workdir);
+ if ((len < 0) || (len >= sizeof(hook_script))) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = sys_access(hook_script, R_OK | X_OK);
+ if (ret) {
+ len = snprintf(errstr, PATH_MAX,
+ "The hook-script (%s) required "
+ "for this operation is not present. "
+ "Please install the hook-script "
+ "and retry",
+ hook_script);
+ if (len < 0) {
+ strncpy(errstr, "<error>", PATH_MAX);
+ }
+ gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED, "%s",
+ errstr);
+ goto out;
+ }
+
+ if (!strncmp(value, "disable", SLEN("disable"))) {
+ ret = dict_get_strn(conf->opts, GLUSTERD_SHARED_STORAGE_KEY,
+ SLEN(GLUSTERD_SHARED_STORAGE_KEY), &op);
+ if (ret || !strncmp(op, "disable", SLEN("disable"))) {
+ snprintf(errstr, PATH_MAX,
+ "Shared storage volume "
+ "does not exist. Please enable shared storage"
+ " for creating shared storage volume.");
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SHARED_STORAGE_DOES_NOT_EXIST, "%s", errstr);
+ ret = -1;
+ goto out;
+ }
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(GLUSTER_SHARED_STORAGE, &volinfo);
+ if (!ret) {
+ snprintf(errstr, PATH_MAX,
+ "Shared storage volume(" GLUSTER_SHARED_STORAGE
+ ") already exists.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_ALREADY_EXIST, "%s",
+ errstr);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_count_connected_peers(&count);
+ if (ret) {
+ snprintf(errstr, PATH_MAX,
+ "Failed to calculate number of connected peers.");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_COUNT_GET_FAIL, "%s",
+ errstr);
+ goto out;
+ }
+
+ if (count <= 1) {
+ snprintf(errstr, PATH_MAX,
+ "More than one node should "
+ "be up/present in the cluster to enable this option");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INSUFFICIENT_UP_NODES, "%s",
+ errstr);
+ ret = -1;
+ goto out;
+ }
out:
- if (val_dict)
- dict_unref (val_dict);
+ return ret;
+}
- if (key_fixed)
- GF_FREE (key_fixed);
+static int
+glusterd_validate_localtime_logging(char *value, char *errstr)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int already_enabled = 0;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+ GF_VALIDATE_OR_GOTO(this->name, value, out);
+
+ already_enabled = gf_log_get_localtime();
+
+ ret = 0;
+ if (strcmp(value, "enable") == 0) {
+ gf_log_set_localtime(1);
+ if (!already_enabled)
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_LOCALTIME_LOGGING_ENABLE,
+ "localtime logging enable");
+ } else if (strcmp(value, "disable") == 0) {
+ gf_log_set_localtime(0);
+ if (already_enabled)
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_LOCALTIME_LOGGING_DISABLE,
+ "localtime logging disable");
+ } else {
+ ret = -1;
+ GF_VALIDATE_OR_GOTO(this->name, errstr, out);
+ snprintf(errstr, PATH_MAX,
+ "Invalid option(%s). Valid options "
+ "are 'enable' and 'disable'",
+ value);
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s",
+ errstr);
+ }
- if (ret) {
- if (!(*op_errstr)) {
- *op_errstr = gf_strdup ("Error, Validation Failed");
- gf_log (this->name, GF_LOG_DEBUG,
- "Error, Cannot Validate option :%s",
- *op_errstr);
- } else {
- gf_log (this->name, GF_LOG_DEBUG,
- "Error, Cannot Validate option");
- }
- }
- return ret;
+out:
+ return ret;
}
static int
-glusterd_op_stage_reset_volume (dict_t *dict, char **op_errstr)
+glusterd_validate_daemon_log_level(char *value, char *errstr)
{
- int ret = 0;
- char *volname = NULL;
- gf_boolean_t exists = _gf_false;
- char msg[2048] = {0};
- char *key = NULL;
- char *key_fixed = NULL;
- glusterd_volinfo_t *volinfo = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ GF_VALIDATE_OR_GOTO(this->name, value, out);
+
+ ret = 0;
+
+ if ((strcmp(value, "INFO")) && (strcmp(value, "WARNING")) &&
+ (strcmp(value, "DEBUG")) && (strcmp(value, "TRACE")) &&
+ (strcmp(value, "ERROR"))) {
+ ret = -1;
+ GF_VALIDATE_OR_GOTO(this->name, errstr, out);
+ snprintf(errstr, PATH_MAX,
+ "Invalid option(%s). Valid options "
+ "are 'INFO' or 'WARNING' or 'ERROR' or 'DEBUG' or "
+ " 'TRACE'",
+ value);
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s",
+ errstr);
+ }
- ret = dict_get_str (dict, "volname", &volname);
+out:
+ return ret;
+}
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
+static int
+glusterd_op_stage_set_volume(dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ int exists = 0;
+ char *key = NULL;
+ char *key_fixed = NULL;
+ char *value = NULL;
+ char *val_dup = NULL;
+ char keystr[100] = {
+ 0,
+ };
+ int keystr_len;
+ int keylen;
+ char *trash_path = NULL;
+ int trash_path_len = 0;
+ int count = 0;
+ int dict_count = 0;
+ char errstr[PATH_MAX] = {
+ 0,
+ };
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ dict_t *val_dict = NULL;
+ gf_boolean_t global_opt = _gf_false;
+ gf_boolean_t key_matched = _gf_false; /* if a key was processed or not*/
+ glusterd_volinfo_t *voliter = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ uint32_t new_op_version = GD_OP_VERSION_MIN;
+ uint32_t local_new_op_version = GD_OP_VERSION_MIN;
+ uint32_t local_new_client_op_version = GD_OP_VERSION_MIN;
+ uint32_t key_op_version = GD_OP_VERSION_MIN;
+ uint32_t local_key_op_version = GD_OP_VERSION_MIN;
+ gf_boolean_t origin_glusterd = _gf_true;
+ gf_boolean_t check_op_version = _gf_true;
+ gf_boolean_t trash_enabled = _gf_false;
+ gf_boolean_t all_vol = _gf_false;
+ struct volopt_map_entry *vmep = NULL;
+
+ GF_ASSERT(dict);
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ /* Check if we can support the required op-version
+ * This check is not done on the originator glusterd. The originator
+ * glusterd sets this value.
+ */
+ origin_glusterd = is_origin_glusterd(dict);
+
+ if (!origin_glusterd) {
+ /* Check for v3.3.x origin glusterd */
+ check_op_version = dict_get_str_boolean(dict, "check-op-version",
+ _gf_false);
+
+ if (check_op_version) {
+ ret = dict_get_uint32(dict, "new-op-version", &new_op_version);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=new-op-version", NULL);
goto out;
- }
+ }
- exists = glusterd_check_volume_exists (volname);
-
- if (!exists) {
- snprintf (msg, sizeof (msg), "Volume %s does not "
- "exist", volname);
- gf_log ("", GF_LOG_ERROR, "%s", msg);
- *op_errstr = gf_strdup (msg);
+ if ((new_op_version > GD_OP_VERSION_MAX) ||
+ (new_op_version < GD_OP_VERSION_MIN)) {
ret = -1;
+ snprintf(errstr, sizeof(errstr),
+ "Required op_version (%d) is not supported."
+ " Max supported op version is %d",
+ new_op_version, priv->op_version);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION,
+ "%s", errstr);
goto out;
+ }
}
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret)
- goto out;
+ }
- ret = glusterd_validate_volume_id (dict, volinfo);
- if (ret)
- goto out;
+ ret = dict_get_int32_sizen(dict, "count", &dict_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Count(dict),not set in Volume-Set");
+ goto out;
+ }
- ret = dict_get_str (dict, "key", &key);
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to get option key");
- goto out;
+ if (dict_count == 0) {
+ /*No options would be specified of volume set help */
+ if (dict_get_sizen(dict, "help")) {
+ ret = 0;
+ goto out;
}
- if (strcmp(key, "all")) {
- exists = glusterd_check_option_exists (key, &key_fixed);
- if (exists == -1) {
- ret = -1;
- goto out;
- }
- if (!exists) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "Option %s does not exist", key);
- ret = snprintf (msg, 2048,
- "Option %s does not exist", key);
- if (key_fixed)
- snprintf (msg + ret, 2048 - ret,
- "\nDid you mean %s?", key_fixed);
- *op_errstr = gf_strdup (msg);
- ret = -1;
- goto out;
- }
+
+ if (dict_get_sizen(dict, "help-xml")) {
+#if (HAVE_LIB_XML)
+ ret = 0;
+ goto out;
+#else
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MODULE_NOT_INSTALLED,
+ "libxml not present in the system");
+ *op_errstr = gf_strdup(
+ "Error: xml libraries not present to produce xml-output");
+ goto out;
+#endif
+ }
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_OPTIONS_GIVEN,
+ "No options received ");
+ *op_errstr = gf_strdup("Options not specified");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_str_sizen(dict, "volname", &volname);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
+ goto out;
+ }
+
+ if (strcasecmp(volname, "all") != 0) {
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(errstr, sizeof(errstr), FMTSTR_CHECK_VOL_EXISTS, volname);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
}
-out:
- if (key_fixed)
- GF_FREE (key_fixed);
+ ret = glusterd_validate_volume_id(dict, volinfo);
+ if (ret)
+ goto out;
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ local_new_op_version = volinfo->op_version;
+ local_new_client_op_version = volinfo->client_op_version;
- return ret;
-}
+ } else {
+ all_vol = _gf_true;
+ }
+ val_dict = dict_new();
+ if (!val_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+ for (count = 1; ret != 1; count++) {
+ keystr_len = sprintf(keystr, "key%d", count);
+ ret = dict_get_strn(dict, keystr, keystr_len, &key);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", keystr, NULL);
+ break;
+ }
-static int
-glusterd_op_stage_sync_volume (dict_t *dict, char **op_errstr)
-{
- int ret = -1;
- char *volname = NULL;
- char *hostname = NULL;
- gf_boolean_t exists = _gf_false;
- glusterd_peerinfo_t *peerinfo = NULL;
- char msg[2048] = {0,};
- glusterd_volinfo_t *volinfo = NULL;
-
- ret = dict_get_str (dict, "hostname", &hostname);
+ keystr_len = sprintf(keystr, "value%d", count);
+ ret = dict_get_strn(dict, keystr, keystr_len, &value);
if (ret) {
- snprintf (msg, sizeof (msg), "hostname couldn't be "
- "retrieved from msg");
- *op_errstr = gf_strdup (msg);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "invalid key,value pair in 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ key_matched = _gf_false;
+ keylen = strlen(key);
+ if (len_strcmp(key, keylen, "config.memory-accounting")) {
+ key_matched = _gf_true;
+ gf_msg_debug(this->name, 0,
+ "enabling memory accounting for volume %s", volname);
+ ret = 0;
+ } else if (len_strcmp(key, keylen, "config.transport")) {
+ key_matched = _gf_true;
+ gf_msg_debug(this->name, 0, "changing transport-type for volume %s",
+ volname);
+ ret = 0;
+ /* if value is none of 'tcp/rdma/tcp,rdma' error out */
+ if (!((strcasecmp(value, "rdma") == 0) ||
+ (strcasecmp(value, "tcp") == 0) ||
+ (strcasecmp(value, "tcp,rdma") == 0) ||
+ (strcasecmp(value, "rdma,tcp") == 0))) {
+ ret = snprintf(errstr, sizeof(errstr),
+ "transport-type %s does not exist", value);
+ /* lets not bother about above return value,
+ its a failure anyways */
+ ret = -1;
goto out;
+ }
+ } else if (len_strcmp(key, keylen, "ganesha.enable")) {
+ key_matched = _gf_true;
+ if (!strcmp(value, "off") == 0) {
+ ret = ganesha_manage_export(dict, "off", _gf_true, op_errstr);
+ if (ret)
+ goto out;
+ }
}
- ret = glusterd_is_local_addr (hostname);
- if (ret) {
- ret = glusterd_friend_find (NULL, hostname, &peerinfo);
- if (ret) {
- snprintf (msg, sizeof (msg), "%s, is not a friend",
- hostname);
- *op_errstr = gf_strdup (msg);
- goto out;
- }
+ if (!key_matched) {
+ ret = glusterd_check_bitrot_cmd(key, keylen, errstr,
+ sizeof(errstr));
+ if (ret)
+ goto out;
+ ret = glusterd_check_quota_cmd(key, keylen, value, errstr,
+ sizeof(errstr));
+ if (ret)
+ goto out;
+ }
- if (!peerinfo->connected) {
- snprintf (msg, sizeof (msg), "%s, is not connected at "
- "the moment", hostname);
- *op_errstr = gf_strdup (msg);
- ret = -1;
- goto out;
- }
- } else {
+ if (is_key_glusterd_hooks_friendly(key))
+ continue;
- //volname is not present in case of sync all
- ret = dict_get_str (dict, "volname", &volname);
- if (!ret) {
- exists = glusterd_check_volume_exists (volname);
- if (!exists) {
- snprintf (msg, sizeof (msg), "Volume %s "
- "does not exist", volname);
- *op_errstr = gf_strdup (msg);
- ret = -1;
- goto out;
- }
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret)
- goto out;
+ ret = glusterd_volopt_validate(volinfo, dict, key, value, op_errstr);
+ if (ret)
+ goto out;
- ret = glusterd_validate_volume_id (dict, volinfo);
- if (ret)
- goto out;
- } else {
- ret = 0;
- }
+ exists = glusterd_check_option_exists(key, &key_fixed);
+ if (exists == -1) {
+ ret = -1;
+ goto out;
}
-out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
-
- return ret;
-}
-
-static int
-glusterd_op_stage_status_volume (dict_t *dict, char **op_errstr)
-{
- int ret = -1;
- uint32_t cmd = 0;
- char msg[2048] = {0,};
- char *volname = NULL;
- char *brick = NULL;
- xlator_t *this = NULL;
- glusterd_conf_t *priv = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
- glusterd_brickinfo_t *tmpbrickinfo = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- dict_t *vol_opts = NULL;
- gf_boolean_t nfs_disabled = _gf_false;
- gf_boolean_t shd_enabled = _gf_true;
-
- GF_ASSERT (dict);
- this = THIS;
- GF_ASSERT (this);
- priv = this->private;
- GF_ASSERT(priv);
-
- ret = dict_get_uint32 (dict, "cmd", &cmd);
- if (ret)
+ if (!exists) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "Option with name: %s does not exist", key);
+ ret = snprintf(errstr, sizeof(errstr), "option : %s does not exist",
+ key);
+ if (key_fixed)
+ snprintf(errstr + ret, sizeof(errstr) - ret,
+ "\nDid you mean %s?", key_fixed);
+ ret = -1;
+ goto out;
+ }
+
+ if (key_fixed) {
+ key = key_fixed;
+ keylen = strlen(key_fixed);
+ }
+
+ if (len_strcmp(key, keylen, "cluster.granular-entry-heal")) {
+ /* For granular entry-heal, if the set command was
+ * invoked through volume-set CLI, then allow the
+ * command only if the volume is still in 'Created'
+ * state
+ */
+ if (volinfo && volinfo->status != GLUSTERD_STATUS_NONE &&
+ (dict_get_sizen(dict, "is-special-key") == NULL)) {
+ snprintf(errstr, sizeof(errstr),
+ " 'gluster volume set <VOLNAME> %s {enable, disable}'"
+ " is not supported."
+ " Use 'gluster volume heal <VOLNAME> "
+ "granular-entry-heal {enable, disable}' instead.",
+ key);
+ ret = -1;
goto out;
-
- if (cmd & GF_CLI_STATUS_ALL)
+ }
+ } else if (len_strcmp(key, keylen, GLUSTERD_GLOBAL_OP_VERSION_KEY)) {
+ /* Check if the key is cluster.op-version and set
+ * local_new_op_version to the value given if possible.
+ */
+ if (!all_vol) {
+ ret = -1;
+ snprintf(errstr, sizeof(errstr),
+ "Option \"%s\" is not valid for a single volume", key);
goto out;
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Unable to get volume name");
+ }
+ /* Check if cluster.op-version is the only option being
+ * set
+ */
+ if (count != 1) {
+ ret = -1;
+ snprintf(errstr, sizeof(errstr),
+ "Option \"%s\" cannot be set along with other options",
+ key);
goto out;
- }
+ }
+ /* Just reusing the variable, but I'm using it for
+ * storing the op-version from value
+ */
+ ret = gf_string2uint(value, &local_key_op_version);
+ if (ret) {
+ snprintf(errstr, sizeof(errstr),
+ "invalid number format \"%s\" in option \"%s\"", value,
+ key);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY, "%s",
+ errstr);
+ goto out;
+ }
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- snprintf (msg, sizeof(msg), "Volume %s does not exist",
- volname);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ if (local_key_op_version > GD_OP_VERSION_MAX ||
+ local_key_op_version < GD_OP_VERSION_MIN) {
ret = -1;
+ snprintf(errstr, sizeof(errstr),
+ "Required op_version (%d) is not supported."
+ " Max supported op version is %d",
+ local_key_op_version, priv->op_version);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VERSION_UNSUPPORTED,
+ "%s", errstr);
goto out;
+ }
+ if (local_key_op_version > priv->op_version) {
+ local_new_op_version = local_key_op_version;
+ } else {
+ ret = -1;
+ snprintf(errstr, sizeof(errstr),
+ "Required op-version (%d) should"
+ " not be equal or lower than current"
+ " cluster op-version (%d).",
+ local_key_op_version, priv->op_version);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VERSION_UNSUPPORTED,
+ "%s", errstr);
+ goto out;
+ }
+
+ goto cont;
}
- ret = glusterd_validate_volume_id (dict, volinfo);
+ ALL_VOLUME_OPTION_CHECK(volname, _gf_false, key, ret, op_errstr, out);
+ ret = glusterd_validate_quorum_options(this, key, value, op_errstr);
if (ret)
- goto out;
+ goto out;
- ret = glusterd_is_volume_started (volinfo);
- if (!ret) {
- snprintf (msg, sizeof (msg), "Volume %s is not started",
- volname);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ ret = glusterd_validate_brick_mx_options(this, key, value, op_errstr);
+ if (ret)
+ goto out;
+
+ vmep = gd_get_vmep(key);
+ local_key_op_version = glusterd_get_op_version_from_vmep(vmep);
+ if (local_key_op_version > local_new_op_version)
+ local_new_op_version = local_key_op_version;
+ if (gd_is_client_option(vmep) &&
+ (local_key_op_version > local_new_client_op_version))
+ local_new_client_op_version = local_key_op_version;
+
+ sprintf(keystr, "op-version%d", count);
+ if (origin_glusterd) {
+ ret = dict_set_uint32(dict, keystr, local_key_op_version);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set key-op-version in dict");
+ goto out;
+ }
+ } else if (check_op_version) {
+ ret = dict_get_uint32(dict, keystr, &key_op_version);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get key-op-version from dict");
+ goto out;
+ }
+ if (local_key_op_version != key_op_version) {
ret = -1;
+ snprintf(errstr, sizeof(errstr),
+ "option: %s op-version mismatch", key);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERSION_MISMATCH,
+ "%s, required op-version = %" PRIu32
+ ", available op-version = %" PRIu32,
+ errstr, key_op_version, local_key_op_version);
goto out;
+ }
}
- vol_opts = volinfo->dict;
+ global_opt = glusterd_check_globaloption(key);
- if ((cmd & GF_CLI_STATUS_NFS) != 0) {
- nfs_disabled = dict_get_str_boolean (vol_opts, "nfs.disable",
- _gf_false);
- if (nfs_disabled) {
- ret = -1;
- snprintf (msg, sizeof (msg),
- "NFS server is disabled for volume %s",
- volname);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ if (len_strcmp(key, keylen, GLUSTERD_SHARED_STORAGE_KEY)) {
+ ret = glusterd_validate_shared_storage(value, errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SHARED_STRG_VOL_OPT_VALIDATE_FAIL,
+ "Failed to validate shared storage volume options");
+ goto out;
+ }
+ } else if (len_strcmp(key, keylen, GLUSTERD_LOCALTIME_LOGGING_KEY)) {
+ ret = glusterd_validate_localtime_logging(value, errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_LOCALTIME_LOGGING_VOL_OPT_VALIDATE_FAIL,
+ "Failed to validate localtime logging volume options");
+ goto out;
+ }
+ } else if (len_strcmp(key, keylen, GLUSTERD_DAEMON_LOG_LEVEL_KEY)) {
+ ret = glusterd_validate_daemon_log_level(value, errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DAEMON_LOG_LEVEL_VOL_OPT_VALIDATE_FAIL,
+ "Failed to validate daemon-log-level volume options");
+ goto out;
+ }
+ } else if (len_strcmp(key, keylen, "features.trash-dir")) {
+ if (volinfo) {
+ ret = glusterd_volinfo_get(volinfo, VKEY_FEATURES_TRASH,
+ &val_dup);
+ if (!ret && val_dup) {
+ ret = gf_string2boolean(val_dup, &trash_enabled);
+ if (ret)
goto out;
}
- } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
- if (!glusterd_is_volume_replicate (volinfo)) {
+ }
+ if (!trash_enabled) {
+ snprintf(errstr, sizeof(errstr),
+ "Trash translator is not enabled. "
+ "Use volume set %s trash on",
+ volname);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_SET_FAIL,
+ "Unable to set the options in 'volume set': %s", errstr);
+ ret = -1;
+ goto out;
+ }
+ if (strchr(value, '/')) {
+ snprintf(errstr, sizeof(errstr),
+ "Path is not allowed as option");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_SET_FAIL,
+ "Unable to set the options in 'volume set': %s", errstr);
+ ret = -1;
+ goto out;
+ }
+
+ list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ /* Check for local brick */
+ if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
+ trash_path_len = strlen(value) + strlen(brickinfo->path) +
+ 2;
+ trash_path = GF_MALLOC(trash_path_len, gf_common_mt_char);
+ snprintf(trash_path, trash_path_len, "%s/%s",
+ brickinfo->path, value);
+
+ /* Checks whether a directory with
+ given option exists or not */
+ if (!sys_access(trash_path, R_OK)) {
+ snprintf(errstr, sizeof(errstr), "Path %s exists",
+ value);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_SET_FAIL,
+ "Unable to set the options in 'volume set': %s",
+ errstr);
ret = -1;
- snprintf (msg, sizeof (msg),
- "Volume %s is not of type replicate",
- volname);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
goto out;
- }
-
- shd_enabled = dict_get_str_boolean (vol_opts,
- "cluster.self-heal-daemon",
- _gf_true);
- if (!shd_enabled) {
+ } else {
+ gf_msg_debug(this->name, 0,
+ "Directory with given name does not exist,"
+ " continuing");
+ }
+
+ if (volinfo->status == GLUSTERD_STATUS_STARTED &&
+ brickinfo->status != GF_BRICK_STARTED) {
+ /* If volume is in started state , checks
+ whether bricks are online */
+ snprintf(errstr, sizeof(errstr),
+ "One or more bricks are down");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_SET_FAIL,
+ "Unable to set the options in 'volume set': %s",
+ errstr);
ret = -1;
- snprintf (msg, sizeof (msg),
- "Self-heal Daemon is disabled for volume %s",
- volname);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
goto out;
+ }
}
-
- } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) {
- ret = dict_get_str (dict, "brick", &brick);
- if (ret)
- goto out;
-
- ret = glusterd_brickinfo_from_brick (brick, &brickinfo);
- if (ret) {
- snprintf (msg, sizeof (msg), "%s is not a brick",
- brick);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
- goto out;
+ if (trash_path) {
+ GF_FREE(trash_path);
+ trash_path = NULL;
}
+ }
+ }
- ret = glusterd_volume_brickinfo_get (NULL,
- brickinfo->hostname,
- brickinfo->path,
- volinfo,
- &tmpbrickinfo,
- GF_PATH_COMPLETE);
-
- if (ret) {
- snprintf (msg, sizeof(msg), "No brick %s in"
- " volume %s", brick, volname);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
+ ret = dict_set_strn(val_dict, key, keylen, value);
- ret = -1;
- goto out;
- }
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set the options in 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ *op_errstr = NULL;
+ if (!global_opt && !all_vol)
+ ret = glusterd_validate_reconfopts(volinfo, val_dict, op_errstr);
+ else if (!all_vol) {
+ voliter = NULL;
+ cds_list_for_each_entry(voliter, &priv->volumes, vol_list)
+ {
+ ret = glusterd_validate_globalopts(voliter, val_dict,
+ op_errstr);
+ if (ret)
+ break;
+ }
}
- ret = 0;
-
- out:
if (ret) {
- if (msg[0] != '\0')
- *op_errstr = gf_strdup (msg);
- else
- *op_errstr = gf_strdup ("Validation Failed for Status");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Could not create temp volfile, some option failed: %s",
+ *op_errstr);
+ goto out;
+ }
+ dict_deln(val_dict, key, keylen);
+
+ if (key_fixed) {
+ GF_FREE(key_fixed);
+ key_fixed = NULL;
+ }
+ }
+
+ /* Check if all the connected clients support the new client-op-version
+ */
+ ret = glusterd_check_client_op_version_support(
+ volname, local_new_client_op_version, op_errstr);
+ if (ret)
+ goto out;
+cont:
+ if (origin_glusterd) {
+ ret = dict_set_uint32(dict, "new-op-version", local_new_op_version);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set new-op-version in dict");
+ goto out;
+ }
+ /* Set this value in dict so other peers know to check for
+ * op-version. This is a hack for 3.3.x compatibility
+ *
+ * TODO: Remove this and the other places this is referred once
+ * 3.3.x compatibility is not required
+ */
+ ret = dict_set_int32_sizen(dict, "check-op-version", 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set check-op-version in dict");
+ goto out;
}
+ }
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning: %d", ret);
- return ret;
-}
+ ret = 0;
+out:
+ if (val_dict)
+ dict_unref(val_dict);
-static gf_boolean_t
-glusterd_is_profile_on (glusterd_volinfo_t *volinfo)
-{
- int ret = -1;
- gf_boolean_t is_latency_on = _gf_false;
- gf_boolean_t is_fd_stats_on = _gf_false;
-
- GF_ASSERT (volinfo);
-
- ret = glusterd_volinfo_get_boolean (volinfo, VKEY_DIAG_CNT_FOP_HITS);
- if (ret != -1)
- is_fd_stats_on = ret;
- ret = glusterd_volinfo_get_boolean (volinfo, VKEY_DIAG_LAT_MEASUREMENT);
- if (ret != -1)
- is_latency_on = ret;
- if ((_gf_true == is_latency_on) &&
- (_gf_true == is_fd_stats_on))
- return _gf_true;
- return _gf_false;
+ if (trash_path)
+ GF_FREE(trash_path);
+
+ GF_FREE(key_fixed);
+ if (errstr[0] != '\0')
+ *op_errstr = gf_strdup(errstr);
+
+ if (ret) {
+ if (!(*op_errstr)) {
+ *op_errstr = gf_strdup("Error, Validation Failed");
+ gf_msg_debug(this->name, 0, "Error, Cannot Validate option :%s",
+ *op_errstr);
+ } else {
+ gf_msg_debug(this->name, 0, "Error, Cannot Validate option");
+ }
+ }
+ return ret;
}
static int
-glusterd_op_stage_stats_volume (dict_t *dict, char **op_errstr)
+glusterd_op_stage_reset_volume(dict_t *dict, char **op_errstr)
{
- int ret = -1;
- char *volname = NULL;
- gf_boolean_t exists = _gf_false;
- char msg[2048] = {0,};
- int32_t stats_op = GF_CLI_STATS_NONE;
- glusterd_volinfo_t *volinfo = NULL;
-
- ret = dict_get_str (dict, "volname", &volname);
+ int ret = 0;
+ char *volname = NULL;
+ int exists = 0;
+ char msg[2048] = {0};
+ char *key = NULL;
+ char *key_fixed = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ if (strcasecmp(volname, "all") != 0) {
+ ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
- snprintf (msg, sizeof (msg), "Volume name get failed");
- goto out;
+ snprintf(msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
}
- exists = glusterd_check_volume_exists (volname);
- ret = glusterd_volinfo_find (volname, &volinfo);
- if ((!exists) || (ret < 0)) {
- snprintf (msg, sizeof (msg), "Volume %s, "
- "doesn't exist", volname);
- ret = -1;
- goto out;
- }
-
- ret = glusterd_validate_volume_id (dict, volinfo);
+ ret = glusterd_validate_volume_id(dict, volinfo);
if (ret)
- goto out;
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "key", SLEN("key"), &key);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get option key");
+ goto out;
+ }
+
+ /* *
+ * If key ganesha.enable is set, then volume should be unexported from
+ * ganesha server. Also it is a volume-level option, perform only when
+ * volume name not equal to "all"(in other words if volinfo != NULL)
+ */
+ if (volinfo && (!strcmp(key, "all") || !strcmp(key, "ganesha.enable"))) {
+ if (glusterd_check_ganesha_export(volinfo)) {
+ ret = ganesha_manage_export(dict, "off", _gf_true, op_errstr);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL,
+ "Could not reset ganesha.enable key");
+ }
+ }
+
+ if (strcmp(key, "all")) {
+ exists = glusterd_check_option_exists(key, &key_fixed);
+ if (exists == -1) {
+ ret = -1;
+ goto out;
+ }
- ret = dict_get_int32 (dict, "op", &stats_op);
- if (ret) {
- snprintf (msg, sizeof (msg), "Volume profile op get failed");
+ if (!exists) {
+ ret = snprintf(msg, sizeof(msg), "Option %s does not exist", key);
+ if (key_fixed)
+ snprintf(msg + ret, sizeof(msg) - ret, "\nDid you mean %s?",
+ key_fixed);
+ ret = -1;
+ goto out;
+ } else if (exists > 0) {
+ if (key_fixed)
+ key = key_fixed;
+
+ /* 'gluster volume set/reset <VOLNAME>
+ * features.quota/features.inode-quota' should
+ * not be allowed as it is deprecated.
+ * Setting and resetting quota/inode-quota features
+ * should be allowed only through 'gluster volume quota
+ * <VOLNAME> enable/disable'.
+ * But, 'gluster volume set features.quota-deem-statfs'
+ * can be turned on/off when quota is enabled.
+ */
+
+ if (strcmp(VKEY_FEATURES_INODE_QUOTA, key) == 0 ||
+ strcmp(VKEY_FEATURES_QUOTA, key) == 0) {
+ snprintf(msg, sizeof(msg),
+ "'gluster volume "
+ "reset <VOLNAME> %s' is deprecated. "
+ "Use 'gluster volume quota <VOLNAME> "
+ "disable' instead.",
+ key);
+ ret = -1;
goto out;
+ }
+ ALL_VOLUME_OPTION_CHECK(volname, _gf_false, key, ret, op_errstr,
+ out);
}
+ }
- if (GF_CLI_STATS_START == stats_op) {
- if (_gf_true == glusterd_is_profile_on (volinfo)) {
- snprintf (msg, sizeof (msg), "Profile on Volume %s is"
- " already started", volinfo->volname);
- ret = -1;
- goto out;
- }
+out:
+ GF_FREE(key_fixed);
- }
- if ((GF_CLI_STATS_STOP == stats_op) ||
- (GF_CLI_STATS_INFO == stats_op)) {
- if (_gf_false == glusterd_is_profile_on (volinfo)) {
- snprintf (msg, sizeof (msg), "Profile on Volume %s is"
- " not started", volinfo->volname);
- ret = -1;
+ if (msg[0] != '\0') {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_STAGE_RESET_VOL_FAIL,
+ "%s", msg);
+ *op_errstr = gf_strdup(msg);
+ }
- goto out;
- }
- }
- if ((GF_CLI_STATS_TOP == stats_op) ||
- (GF_CLI_STATS_INFO == stats_op)) {
- if (_gf_false == glusterd_is_volume_started (volinfo)) {
- snprintf (msg, sizeof (msg), "Volume %s is not started.",
- volinfo->volname);
- gf_log ("glusterd", GF_LOG_ERROR, "%s", msg);
- ret = -1;
- goto out;
- }
- }
- ret = 0;
-out:
- if (msg[0] != '\0') {
- gf_log ("glusterd", GF_LOG_ERROR, "%s", msg);
- *op_errstr = gf_strdup (msg);
- }
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
-}
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
-static void
-_delete_reconfig_opt (dict_t *this, char *key, data_t *value, void *data)
+static int
+glusterd_op_stage_sync_volume(dict_t *dict, char **op_errstr)
{
- int32_t *is_force = 0;
-
- GF_ASSERT (data);
- is_force = (int32_t*)data;
-
- if (*is_force != 1 &&
- (_gf_true == glusterd_check_voloption_flags (key,
- OPT_FLAG_FORCE))) {
- /* indicate to caller that we don't set the option
- * due to being protected
- */
- *is_force = -1;
+ int ret = -1;
+ char *volname = NULL;
+ char *hostname = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ char msg[2048] = {
+ 0,
+ };
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname);
+ if (ret) {
+ snprintf(msg, sizeof(msg),
+ "hostname couldn't be "
+ "retrieved from msg");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=hostname", NULL);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
+
+ if (gf_is_local_addr(hostname)) {
+ // volname is not present in case of sync all
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (!ret) {
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(msg, sizeof(msg),
+ "Volume %s "
+ "does not exist",
+ volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOL_NOT_FOUND,
+ "Volume=%s", volname, NULL);
+ *op_errstr = gf_strdup(msg);
goto out;
- }
+ }
+ }
+ } else {
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find(NULL, hostname);
+ if (peerinfo == NULL) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ snprintf(msg, sizeof(msg), "%s, is not a friend", hostname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND,
+ "Peer_name=%s", hostname, NULL);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+
+ } else if (!peerinfo->connected) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "%s, is not connected at "
+ "the moment",
+ hostname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_DISCONNECTED,
+ "Peer_name=%s", hostname, NULL);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
+
+ RCU_READ_UNLOCK;
+ }
- gf_log ("", GF_LOG_DEBUG, "deleting dict with key=%s,value=%s",
- key, value->data);
- dict_del (this, key);
out:
- return;
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+
+ return ret;
}
static int
-glusterd_options_reset (glusterd_volinfo_t *volinfo, char *key,
- int32_t *is_force)
+glusterd_op_stage_status_volume(dict_t *dict, char **op_errstr)
{
- int ret = 0;
- data_t *value = NULL;
- char *key_fixed = NULL;
-
- gf_log ("", GF_LOG_DEBUG, "Received volume set reset command");
-
- GF_ASSERT (volinfo->dict);
- GF_ASSERT (key);
-
- if (!strncmp(key, "all", 3))
- dict_foreach (volinfo->dict, _delete_reconfig_opt, is_force);
- else {
- if (glusterd_check_option_exists (key, &key_fixed) != 1) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "volinfo dict inconsistency: option %s not found",
- key);
- ret = -1;
- goto out;
- }
- value = dict_get (volinfo->dict, key_fixed);
- if (!value) {
- gf_log ("glusterd", GF_LOG_DEBUG,
- "no value set for option %s", key_fixed);
- goto out;
- }
- _delete_reconfig_opt (volinfo->dict, key_fixed, value, is_force);
+ int ret = -1;
+ uint32_t cmd = 0;
+ char msg[2048] = {
+ 0,
+ };
+ char *volname = NULL;
+ char *brick = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ dict_t *vol_opts = NULL;
+#ifdef BUILD_GNFS
+ gf_boolean_t nfs_disabled = _gf_false;
+#endif
+ gf_boolean_t shd_enabled = _gf_false;
+
+ GF_ASSERT(dict);
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_uint32(dict, "cmd", &cmd);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=cmd", NULL);
+ goto out;
+ }
+
+ if (cmd & GF_CLI_STATUS_ALL)
+ goto out;
+
+ if ((cmd & GF_CLI_STATUS_QUOTAD) &&
+ (priv->op_version == GD_OP_VERSION_MIN)) {
+ snprintf(msg, sizeof(msg),
+ "The cluster is operating at "
+ "version 1. Getting the status of quotad is not "
+ "allowed in this state.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_QUOTA_GET_STAT_FAIL,
+ msg, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ if ((cmd & GF_CLI_STATUS_SNAPD) &&
+ (priv->op_version < GD_OP_VERSION_3_6_0)) {
+ snprintf(msg, sizeof(msg),
+ "The cluster is operating at "
+ "version less than %d. Getting the "
+ "status of snapd is not allowed in this state.",
+ GD_OP_VERSION_3_6_0);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SNAP_STATUS_FAIL, msg,
+ NULL);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL,
+ "Volume=%s", volname, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id(dict, volinfo);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VALIDATE_FAILED, NULL);
+ goto out;
+ }
+
+ ret = glusterd_is_volume_started(volinfo);
+ if (!ret) {
+ snprintf(msg, sizeof(msg), "Volume %s is not started", volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOL_NOT_STARTED,
+ "Volume=%s", volname, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ vol_opts = volinfo->dict;
+
+ if ((cmd & GF_CLI_STATUS_SHD) != 0) {
+ if (glusterd_is_shd_compatible_volume(volinfo)) {
+ shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts);
+ } else {
+ ret = -1;
+ snprintf(msg, sizeof(msg), "Volume %s is not Self-heal compatible",
+ volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOL_SHD_NOT_COMP,
+ "Volume=%s", volname, NULL);
+ goto out;
+ }
+ if (!shd_enabled) {
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "Self-heal Daemon is disabled for volume %s", volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SELF_HEALD_DISABLED,
+ "Volume=%s", volname, NULL);
+ goto out;
+ }
+#ifdef BUILD_GNFS
+ } else if ((cmd & GF_CLI_STATUS_NFS) != 0) {
+ nfs_disabled = dict_get_str_boolean(vol_opts, NFS_DISABLE_MAP_KEY,
+ _gf_false);
+ if (nfs_disabled) {
+ ret = -1;
+ snprintf(msg, sizeof(msg), "NFS server is disabled for volume %s",
+ volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_NFS_GANESHA_DISABLED, "Volume=%s", volname, NULL);
+ goto out;
+ }
+#endif
+ } else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) {
+ if (!glusterd_is_volume_quota_enabled(volinfo)) {
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "Volume %s does not have "
+ "quota enabled",
+ volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_QUOTA_DISABLED,
+ "Volume=%s", volname, NULL);
+ goto out;
+ }
+ } else if ((cmd & GF_CLI_STATUS_BITD) != 0) {
+ if (!glusterd_is_bitrot_enabled(volinfo)) {
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "Volume %s does not have "
+ "bitrot enabled",
+ volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BITROT_NOT_ENABLED,
+ "Volume=%s", volname, NULL);
+ goto out;
+ }
+ } else if ((cmd & GF_CLI_STATUS_SCRUB) != 0) {
+ if (!glusterd_is_bitrot_enabled(volinfo)) {
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "Volume %s does not have "
+ "bitrot enabled. Scrubber will be enabled "
+ "automatically if bitrot is enabled",
+ volname);
+ gf_smsg(
+ this->name, GF_LOG_ERROR, errno, GD_MSG_BITROT_NOT_ENABLED,
+ "Scrubber will be enabled automatically if bitrot is enabled",
+ "Volume=%s", volname, NULL);
+ goto out;
+ }
+ } else if ((cmd & GF_CLI_STATUS_SNAPD) != 0) {
+ if (!glusterd_is_snapd_enabled(volinfo)) {
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "Volume %s does not have "
+ "uss enabled",
+ volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_SNAPD_NOT_RUNNING,
+ "Volume=%s", volname, NULL);
+ goto out;
+ }
+ } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) {
+ ret = dict_get_strn(dict, "brick", SLEN("brick"), &brick);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=brick", NULL);
+ goto out;
}
- ret = glusterd_create_volfiles_and_notify_services (volinfo);
-
+ ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo,
+ _gf_false);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to create volfile for"
- " 'volume set'");
- ret = -1;
- goto out;
+ snprintf(msg, sizeof(msg),
+ "No brick %s in"
+ " volume %s",
+ brick, volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_BRICK_NOT_FOUND,
+ "Brick=%s, Volume=%s", brick, volname, NULL);
+ ret = -1;
+ goto out;
}
+ }
- ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret)
- goto out;
+ ret = 0;
- if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_nodesvcs_handle_reconfigure (volinfo);
- if (ret)
- goto out;
- }
+out:
+ if (ret) {
+ if (msg[0] != '\0')
+ *op_errstr = gf_strdup(msg);
+ else
+ *op_errstr = gf_strdup("Validation Failed for Status");
+ }
- ret = 0;
+ gf_msg_debug(this->name, 0, "Returning: %d", ret);
+ return ret;
+}
+int
+glusterd_op_stage_stats_volume(dict_t *dict, char **op_errstr)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char msg[2048] = {
+ 0,
+ };
+ int32_t stats_op = GF_CLI_STATS_NONE;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Volume name get failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(msg, sizeof(msg),
+ "Volume %s, "
+ "doesn't exist",
+ volname);
+ goto out;
+ }
+
+ ret = glusterd_validate_volume_id(dict, volinfo);
+ if (ret)
+ goto out;
+
+ ret = dict_get_int32n(dict, "op", SLEN("op"), &stats_op);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Volume profile op get failed");
+ goto out;
+ }
+
+ if (GF_CLI_STATS_START == stats_op) {
+ if (_gf_true == glusterd_is_profile_on(volinfo)) {
+ snprintf(msg, sizeof(msg),
+ "Profile on Volume %s is"
+ " already started",
+ volinfo->volname);
+ ret = -1;
+ goto out;
+ }
+ } else if ((GF_CLI_STATS_STOP == stats_op) ||
+ (GF_CLI_STATS_INFO == stats_op)) {
+ if (_gf_false == glusterd_is_profile_on(volinfo)) {
+ snprintf(msg, sizeof(msg),
+ "Profile on Volume %s is"
+ " not started",
+ volinfo->volname);
+ ret = -1;
+
+ goto out;
+ }
+ }
+ if ((GF_CLI_STATS_TOP == stats_op) || (GF_CLI_STATS_INFO == stats_op)) {
+ if (_gf_false == glusterd_is_volume_started(volinfo)) {
+ snprintf(msg, sizeof(msg), "Volume %s is not started.",
+ volinfo->volname);
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_STARTED, "%s",
+ msg);
+ ret = -1;
+ goto out;
+ }
+ }
+ ret = 0;
out:
- if (key_fixed)
- GF_FREE (key_fixed);
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (msg[0] != '\0') {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_OP_STAGE_STATS_VOL_FAIL,
+ "%s", msg);
+ *op_errstr = gf_strdup(msg);
+ }
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
}
+static int
+_delete_reconfig_opt(dict_t *this, char *key, data_t *value, void *data)
+{
+ int32_t *is_force = 0;
+
+ GF_ASSERT(data);
+ is_force = (int32_t *)data;
+
+ /* Keys which has the flag VOLOPT_FLAG_NEVER_RESET
+ * should not be deleted
+ */
+
+ if (_gf_true ==
+ glusterd_check_voloption_flags(key, VOLOPT_FLAG_NEVER_RESET)) {
+ if (*is_force != 1)
+ *is_force = *is_force | GD_OP_PROTECTED;
+ goto out;
+ }
+
+ if (*is_force != 1) {
+ if (_gf_true ==
+ glusterd_check_voloption_flags(key, VOLOPT_FLAG_FORCE)) {
+ /* indicate to caller that we don't set the option
+ * due to being protected
+ */
+ *is_force = *is_force | GD_OP_PROTECTED;
+ goto out;
+ } else {
+ *is_force = *is_force | GD_OP_UNPROTECTED;
+ }
+ }
+
+ gf_msg_debug("glusterd", 0, "deleting dict with key=%s,value=%s", key,
+ value->data);
+ dict_del(this, key);
+ /**Delete scrubber (pause/resume) option from the dictionary if bitrot
+ * option is going to be reset
+ * */
+ if (!strncmp(key, VKEY_FEATURES_BITROT, strlen(VKEY_FEATURES_BITROT))) {
+ dict_del_sizen(this, VKEY_FEATURES_SCRUB);
+ }
+out:
+ return 0;
+}
static int
-glusterd_op_reset_volume (dict_t *dict, char **op_errstr)
+_delete_reconfig_global_opt(dict_t *this, char *key, data_t *value, void *data)
{
- glusterd_volinfo_t *volinfo = NULL;
- int ret = -1;
- char *volname = NULL;
- char *key = NULL;
- int32_t is_force = 0;
+ GF_ASSERT(data);
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name " );
- goto out;
- }
+ if (strcmp(GLUSTERD_GLOBAL_OPT_VERSION, key) == 0)
+ goto out;
- ret = dict_get_int32 (dict, "force", &is_force);
- if (ret)
- is_force = 0;
+ _delete_reconfig_opt(this, key, value, data);
+out:
+ return 0;
+}
- ret = dict_get_str (dict, "key", &key);
+static int
+glusterd_options_reset(glusterd_volinfo_t *volinfo, char *key,
+ int32_t *is_force)
+{
+ int ret = 0;
+ data_t *value = NULL;
+ char *key_fixed = NULL;
+ xlator_t *this = NULL;
+ glusterd_svc_t *svc = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(volinfo->dict);
+ GF_ASSERT(key);
+
+ if (!strncmp(key, "all", 3)) {
+ dict_foreach(volinfo->dict, _delete_reconfig_opt, is_force);
+ ret = glusterd_enable_default_options(volinfo, NULL);
if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to get option key");
- goto out;
- }
-
- ret = glusterd_volinfo_find (volname, &volinfo);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FAIL_DEFAULT_OPT_SET,
+ "Failed to set "
+ "default options on reset for volume %s",
+ volinfo->volname);
+ goto out;
+ }
+ } else {
+ value = dict_get(volinfo->dict, key);
+ if (!value) {
+ gf_msg_debug(this->name, 0, "no value set for option %s", key);
+ goto out;
+ }
+ _delete_reconfig_opt(volinfo->dict, key, value, is_force);
+ ret = glusterd_enable_default_options(volinfo, key);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FAIL_DEFAULT_OPT_SET,
+ "Failed to set "
+ "default value for option '%s' on reset for "
+ "volume %s",
+ key, volinfo->volname);
+ goto out;
+ }
+ }
+
+ gd_update_volume_op_versions(volinfo);
+ if (!volinfo->is_snap_volume) {
+ svc = &(volinfo->snapd.svc);
+ ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
+ if (ret)
+ goto out;
+ }
+ svc = &(volinfo->gfproxyd.svc);
+ ret = svc->reconfigure(volinfo);
+ if (ret)
+ goto out;
+
+ svc = &(volinfo->shd.svc);
+ ret = svc->reconfigure(volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Unable to create volfile for"
+ " 'volume reset'");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_svcs_reconfigure(volinfo);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ GF_FREE(key_fixed);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_op_reset_all_volume_options(xlator_t *this, dict_t *dict)
+{
+ char *key = NULL;
+ char *key_fixed = NULL;
+ int ret = -1;
+ int32_t is_force = 0;
+ glusterd_conf_t *conf = NULL;
+ dict_t *dup_opt = NULL;
+ gf_boolean_t all = _gf_false;
+ char *next_version = NULL;
+ gf_boolean_t quorum_action = _gf_false;
+
+ conf = this->private;
+ ret = dict_get_strn(dict, "key", SLEN("key"), &key);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get key");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "force", SLEN("force"), &is_force);
+ if (ret)
+ is_force = 0;
+
+ if (strcmp(key, "all")) {
+ ret = glusterd_check_option_exists(key, &key_fixed);
+ if (ret <= 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "Option %s does not "
+ "exist",
+ key);
+ ret = -1;
+ goto out;
+ }
+ } else {
+ all = _gf_true;
+ }
+
+ if (key_fixed)
+ key = key_fixed;
+
+ ret = -1;
+ dup_opt = dict_new();
+ if (!dup_opt) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+ if (!all) {
+ dict_copy(conf->opts, dup_opt);
+ dict_del(dup_opt, key);
+ }
+ ret = glusterd_get_next_global_opt_version_str(conf->opts, &next_version);
+ if (ret)
+ goto out;
+
+ ret = dict_set_strn(dup_opt, GLUSTERD_GLOBAL_OPT_VERSION,
+ SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL);
+ goto out;
+ }
+
+ ret = glusterd_store_options(this, dup_opt);
+ if (ret)
+ goto out;
+
+ if (glusterd_is_quorum_changed(conf->opts, key, NULL))
+ quorum_action = _gf_true;
+
+ ret = dict_set_dynstrn(conf->opts, GLUSTERD_GLOBAL_OPT_VERSION,
+ SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL);
+ goto out;
+ } else
+ next_version = NULL;
+
+ if (!all) {
+ dict_del(conf->opts, key);
+ } else {
+ dict_foreach(conf->opts, _delete_reconfig_global_opt, &is_force);
+ }
+out:
+ GF_FREE(key_fixed);
+ if (dup_opt)
+ dict_unref(dup_opt);
+
+ gf_msg_debug(this->name, 0, "returning %d", ret);
+ if (quorum_action)
+ glusterd_do_quorum_action();
+ GF_FREE(next_version);
+ return ret;
+}
+
+static int
+glusterd_op_reset_volume(dict_t *dict, char **op_rspstr)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = -1;
+ char *volname = NULL;
+ char *key = NULL;
+ char *key_fixed = NULL;
+ int32_t is_force = 0;
+ gf_boolean_t quorum_action = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ if (strcasecmp(volname, "all") == 0) {
+ ret = glusterd_op_reset_all_volume_options(this, dict);
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "force", SLEN("force"), &is_force);
+ if (ret)
+ is_force = 0;
+
+ ret = dict_get_strn(dict, "key", SLEN("key"), &key);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get option key");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ if (strcmp(key, "all") &&
+ glusterd_check_option_exists(key, &key_fixed) != 1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "volinfo dict inconsistency: option %s not found", key);
+ ret = -1;
+ goto out;
+ }
+ if (key_fixed)
+ key = key_fixed;
+
+ if (glusterd_is_quorum_changed(volinfo->dict, key, NULL))
+ quorum_action = _gf_true;
+
+ ret = glusterd_options_reset(volinfo, key, &is_force);
+ if (ret == -1) {
+ gf_asprintf(op_rspstr, "Volume reset : failed");
+ } else if (is_force & GD_OP_PROTECTED) {
+ if (is_force & GD_OP_UNPROTECTED) {
+ gf_asprintf(op_rspstr,
+ "All unprotected fields were"
+ " reset. To reset the protected fields,"
+ " use 'force'.");
+ } else {
+ ret = -1;
+ gf_asprintf(op_rspstr,
+ "'%s' is protected. To reset"
+ " use 'force'.",
+ key);
}
+ }
- ret = glusterd_options_reset (volinfo, key, &is_force);
- if (is_force == -1) {
- ret = -1;
- gf_asprintf(op_errstr, "'%s' is protected. To reset use 'force'.",
- key);
+ if (!strcmp(key, "ganesha.enable") || !strcmp(key, "all")) {
+ if (glusterd_check_ganesha_export(volinfo) &&
+ is_origin_glusterd(dict)) {
+ ret = manage_export_config(volname, "off", op_rspstr);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NFS_GNS_RESET_FAIL,
+ "Could not reset ganesha.enable key");
}
+ }
out:
- gf_log ("", GF_LOG_DEBUG, "'volume reset' returning %d", ret);
- return ret;
+ GF_FREE(key_fixed);
+ if (quorum_action)
+ glusterd_do_quorum_action();
+ gf_msg_debug(this->name, 0, "'volume reset' returning %d", ret);
+ return ret;
}
-
int
-glusterd_stop_bricks (glusterd_volinfo_t *volinfo)
+glusterd_stop_bricks(glusterd_volinfo_t *volinfo)
{
- glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
- list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
- if (glusterd_brick_stop (volinfo, brickinfo))
- return -1;
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ /*TODO: Need to change @del_brick in brick_stop to _gf_true
+ * once we enable synctask in peer rpc prog */
+ if (glusterd_brick_stop(volinfo, brickinfo, _gf_false)) {
+ gf_event(EVENT_BRICK_STOP_FAILED, "peer=%s;volume=%s;brick=%s",
+ brickinfo->hostname, volinfo->volname, brickinfo->path);
+ return -1;
}
+ }
- return 0;
+ return 0;
}
int
-glusterd_start_bricks (glusterd_volinfo_t *volinfo)
+glusterd_start_bricks(glusterd_volinfo_t *volinfo)
+
{
- glusterd_brickinfo_t *brickinfo = NULL;
+ int ret = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ GF_ASSERT(volinfo);
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (!brickinfo->start_triggered) {
+ pthread_mutex_lock(&brickinfo->restart_mutex);
+ {
+ /* coverity[SLEEP] */
+ ret = glusterd_brick_start(volinfo, brickinfo, _gf_false,
+ _gf_false);
+ }
+ pthread_mutex_unlock(&brickinfo->restart_mutex);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_DISCONNECTED,
+ "Failed to start %s:%s for %s", brickinfo->hostname,
+ brickinfo->path, volinfo->volname);
+ gf_event(EVENT_BRICK_START_FAILED, "peer=%s;volume=%s;brick=%s",
+ brickinfo->hostname, volinfo->volname,
+ brickinfo->path);
+ goto out;
+ }
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
- list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
- if (glusterd_brick_start (volinfo, brickinfo))
- return -1;
+static int
+glusterd_update_volumes_dict(glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char *address_family_str = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ /* 3.9.0 onwards gNFS will be disabled by default. In case of an upgrade
+ * from anything below than 3.9.0 to 3.9.x the volume's dictionary will
+ * not have 'nfs.disable' key set which means the same will not be set
+ * to on until explicitly done. setnfs.disable to 'on' at op-version
+ * bump up flow is the ideal way here. The same is also applicable for
+ * transport.address-family where if the transport type is set to tcp
+ * then transport.address-family is defaulted to 'inet'.
+ */
+ if (conf->op_version >= GD_OP_VERSION_3_9_0) {
+ if (dict_get_str_boolean(volinfo->dict, NFS_DISABLE_MAP_KEY, 1)) {
+ ret = dict_set_dynstr_with_alloc(volinfo->dict, NFS_DISABLE_MAP_KEY,
+ "on");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "option ' NFS_DISABLE_MAP_KEY ' on "
+ "volume %s",
+ volinfo->volname);
+ goto out;
+ }
}
+ ret = dict_get_strn(volinfo->dict, "transport.address-family",
+ SLEN("transport.address-family"),
+ &address_family_str);
+ if (ret) {
+ if (volinfo->transport_type == GF_TRANSPORT_TCP) {
+ ret = dict_set_dynstr_with_alloc(
+ volinfo->dict, "transport.address-family", "inet");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to set transport."
+ "address-family on %s",
+ volinfo->volname);
+ goto out;
+ }
+ }
+ }
+ }
+ ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- return 0;
+out:
+ return ret;
}
static int
-glusterd_volset_help (dict_t *dict)
+glusterd_set_brick_mx_opts(dict_t *dict, char *key, char *value,
+ char **op_errstr)
{
- int ret = -1;
- gf_boolean_t xml_out = _gf_false;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
- if (dict_get (dict, "help" ))
- xml_out = _gf_false;
- else if (dict_get (dict, "help-xml" ))
- xml_out = _gf_true;
- else
- goto out;
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+ GF_VALIDATE_OR_GOTO(this->name, key, out);
+ GF_VALIDATE_OR_GOTO(this->name, value, out);
+ GF_VALIDATE_OR_GOTO(this->name, op_errstr, out);
- ret = glusterd_get_volopt_content (xml_out);
- out:
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ ret = 0;
+
+ priv = this->private;
+
+ if (!strcmp(key, GLUSTERD_BRICK_MULTIPLEX_KEY)) {
+ ret = dict_set_dynstrn(priv->opts, GLUSTERD_BRICK_MULTIPLEX_KEY,
+ SLEN(GLUSTERD_BRICK_MULTIPLEX_KEY),
+ gf_strdup(value));
+ }
+
+out:
+ return ret;
}
+/* This is a hack to prevent client-io-threads from being loaded in the graph
+ * when the cluster-op-version is bumped up from 3.8.x to 3.13.x. The key is
+ * deleted subsequently in glusterd_create_volfiles(). */
static int
-glusterd_op_set_volume (dict_t *dict)
+glusterd_dict_set_skip_cliot_key(glusterd_volinfo_t *volinfo)
{
- int ret = 0;
- glusterd_volinfo_t *volinfo = NULL;
- char *volname = NULL;
- xlator_t *this = NULL;
- glusterd_conf_t *priv = NULL;
- int count = 1;
- char *key = NULL;
- char *key_fixed = NULL;
- char *value = NULL;
- char str[50] = {0, };
- gf_boolean_t global_opt = _gf_false;
- glusterd_volinfo_t *voliter = NULL;
- int32_t dict_count = 0;
-
- this = THIS;
- GF_ASSERT (this);
-
- priv = this->private;
- GF_ASSERT (priv);
-
- ret = dict_get_int32 (dict, "count", &dict_count);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Count(dict),not set in Volume-Set");
- goto out;
- }
+ return dict_set_int32n(volinfo->dict, "skip-CLIOT", SLEN("skip-CLIOT"), 1);
+}
- if (dict_count == 0) {
- ret = glusterd_volset_help (dict);
+static int
+glusterd_op_set_all_volume_options(xlator_t *this, dict_t *dict,
+ char **op_errstr)
+{
+ char *key = NULL;
+ char *key_fixed = NULL;
+ char *value = NULL;
+ char *dup_value = NULL;
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ dict_t *dup_opt = NULL;
+ char *next_version = NULL;
+ gf_boolean_t quorum_action = _gf_false;
+ uint32_t op_version = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_svc_t *svc = NULL;
+ gf_boolean_t svcs_reconfigure = _gf_false;
+
+ conf = this->private;
+ ret = dict_get_strn(dict, "key1", SLEN("key1"), &key);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=key1", NULL);
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "value1", SLEN("value1"), &value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "invalid key,value pair in 'volume set'");
+ goto out;
+ }
+
+ ret = glusterd_check_option_exists(key, &key_fixed);
+ if (ret <= 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNKNOWN_KEY,
+ "Invalid key %s", key);
+ ret = -1;
+ goto out;
+ }
+
+ if (key_fixed)
+ key = key_fixed;
+
+ ret = glusterd_set_shared_storage(dict, key, value, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SHARED_STRG_SET_FAIL,
+ "Failed to set shared storage option");
+ goto out;
+ }
+
+ ret = glusterd_set_brick_mx_opts(dict, key, value, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_MX_SET_FAIL,
+ "Failed to set brick multiplexing option");
+ goto out;
+ }
+
+ /* If the key is cluster.op-version, set conf->op_version to the value
+ * if needed and save it.
+ */
+ if (strcmp(key, GLUSTERD_GLOBAL_OP_VERSION_KEY) == 0) {
+ ret = 0;
+
+ ret = gf_string2uint(value, &op_version);
+ if (ret)
+ goto out;
+
+ if (op_version >= conf->op_version) {
+ conf->op_version = op_version;
+
+ /* When a bump up happens, update the quota.conf file
+ * as well. This is because, till 3.7 we had a quota
+ * conf version v1.1 in quota.conf. When inode-quota
+ * feature is introduced, this needs to be changed to
+ * v1.2 in quota.conf and 16 bytes uuid in quota.conf
+ * needs to be changed to 17 bytes. Look
+ * glusterd_store_quota_config for more details.
+ */
+ cds_list_for_each_entry(volinfo, &conf->volumes, vol_list)
+ {
+ ret = glusterd_store_quota_config(
+ volinfo, NULL, NULL, GF_QUOTA_OPTION_TYPE_UPGRADE, NULL);
if (ret)
- gf_log (this->name, GF_LOG_ERROR, "Volume set"
- " help internal error");
- goto out;
- }
+ goto out;
+ ret = glusterd_update_volumes_dict(volinfo);
+ if (ret)
+ goto out;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Unable to get volume name");
- goto out;
- }
+ if (glusterd_dict_set_skip_cliot_key(volinfo))
+ goto out;
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Unable to allocate memory");
- goto out;
- }
+ if (!volinfo->is_snap_volume) {
+ svc = &(volinfo->snapd.svc);
+ ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
+ if (ret)
+ goto out;
+ }
- for (count = 1; ret != -1 ; count++) {
+ svc = &(volinfo->gfproxyd.svc);
+ ret = svc->reconfigure(volinfo);
+ if (ret)
+ goto out;
- global_opt = _gf_false;
- sprintf (str, "key%d", count);
- ret = dict_get_str (dict, str, &key);
+ svc = &(volinfo->shd.svc);
+ ret = svc->reconfigure(volinfo);
if (ret)
- break;
+ goto out;
- sprintf (str, "value%d", count);
- ret = dict_get_str (dict, str, &value);
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "invalid key,value pair in 'volume set'");
- ret = -1;
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL,
+ "Unable to create volfile for"
+ " 'volume set'");
+ goto out;
}
-
- if (strcmp (key, "memory-accounting") == 0) {
- ret = gf_string2boolean (value,
- &volinfo->memory_accounting);
- goto out;
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ svcs_reconfigure = _gf_true;
}
-
- if (is_key_glusterd_hooks_friendly (this, volname, key)) {
- ret = 0;
- goto out;
+ }
+ if (svcs_reconfigure) {
+ ret = glusterd_svcs_reconfigure(NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL,
+ "Unable to restart "
+ "services");
+ goto out;
}
+ }
- ret = glusterd_check_option_exists (key, &key_fixed);
- GF_ASSERT (ret);
- if (ret == -1) {
- key_fixed = NULL;
- goto out;
- }
+ ret = glusterd_store_global_info(this);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERS_STORE_FAIL,
+ "Failed to store op-version.");
+ }
+ }
+ /* No need to save cluster.op-version in conf->opts
+ */
+ goto out;
+ }
+ ret = -1;
+ dup_opt = dict_new();
+ if (!dup_opt) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+ dict_copy(conf->opts, dup_opt);
+ ret = dict_set_str(dup_opt, key, value);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ ret = glusterd_get_next_global_opt_version_str(conf->opts, &next_version);
+ if (ret)
+ goto out;
+
+ ret = dict_set_strn(dup_opt, GLUSTERD_GLOBAL_OPT_VERSION,
+ SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL);
+ goto out;
+ }
+
+ ret = glusterd_store_options(this, dup_opt);
+ if (ret)
+ goto out;
+
+ if (glusterd_is_quorum_changed(conf->opts, key, value))
+ quorum_action = _gf_true;
+
+ ret = dict_set_dynstrn(conf->opts, GLUSTERD_GLOBAL_OPT_VERSION,
+ SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", GLUSTERD_GLOBAL_OPT_VERSION, NULL);
+ goto out;
+ } else
+ next_version = NULL;
+
+ dup_value = gf_strdup(value);
+ if (!dup_value)
+ goto out;
+
+ ret = dict_set_dynstr(conf->opts, key, dup_value);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ } else
+ dup_value = NULL; /* Protect the allocation from GF_FREE */
- ret = glusterd_check_globaloption (key);
- if (ret)
- global_opt = _gf_true;
+out:
+ GF_FREE(dup_value);
+ GF_FREE(key_fixed);
+ if (dup_opt)
+ dict_unref(dup_opt);
+
+ gf_msg_debug(this->name, 0, "returning %d", ret);
+ if (quorum_action)
+ glusterd_do_quorum_action();
+ GF_FREE(next_version);
+ return ret;
+}
- if (!global_opt)
- value = gf_strdup (value);
+int
+glusterd_op_get_max_opversion(char **op_errstr, dict_t *rsp_dict)
+{
+ int ret = -1;
- if (!value) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to set the options in 'volume set'");
- ret = -1;
- goto out;
- }
+ GF_VALIDATE_OR_GOTO(THIS->name, rsp_dict, out);
- if (key_fixed)
- key = key_fixed;
+ ret = dict_set_int32n(rsp_dict, "max-opversion", SLEN("max-opversion"),
+ GD_OP_VERSION_MAX);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Setting value for max-opversion to dict failed");
+ goto out;
+ }
- if (global_opt) {
- list_for_each_entry (voliter, &priv->volumes, vol_list) {
- value = gf_strdup (value);
- ret = dict_set_dynstr (voliter->dict, key, value);
- if (ret)
- goto out;
- }
- } else {
- ret = dict_set_dynstr (volinfo->dict, key, value);
- if (ret)
- goto out;
- }
+out:
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
- if (key_fixed) {
- GF_FREE (key_fixed);
- key_fixed = NULL;
- }
+static int
+glusterd_set_shared_storage(dict_t *dict, char *key, char *value,
+ char **op_errstr)
+{
+ int32_t ret = -1;
+ char hooks_args[PATH_MAX] = {
+ 0,
+ };
+ char errstr[PATH_MAX] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+ GF_VALIDATE_OR_GOTO(this->name, key, out);
+ GF_VALIDATE_OR_GOTO(this->name, value, out);
+ GF_VALIDATE_OR_GOTO(this->name, op_errstr, out);
+
+ ret = 0;
+
+ if (strcmp(key, GLUSTERD_SHARED_STORAGE_KEY)) {
+ goto out;
+ }
+
+ /* Re-create the brick path so as to be *
+ * able to re-use it *
+ */
+ ret = recursive_rmdir(GLUSTER_SHARED_STORAGE_BRICK_DIR);
+ if (ret) {
+ snprintf(errstr, PATH_MAX,
+ "Failed to remove shared "
+ "storage brick(%s). "
+ "Reason: %s",
+ GLUSTER_SHARED_STORAGE_BRICK_DIR, strerror(errno));
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED, "%s",
+ errstr);
+ ret = -1;
+ goto out;
+ }
+
+ ret = mkdir_p(GLUSTER_SHARED_STORAGE_BRICK_DIR, 0755, _gf_true);
+ if (-1 == ret) {
+ snprintf(errstr, PATH_MAX,
+ "Failed to create shared "
+ "storage brick(%s). "
+ "Reason: %s",
+ GLUSTER_SHARED_STORAGE_BRICK_DIR, strerror(errno));
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED, "%s",
+ errstr);
+ goto out;
+ }
+
+ if (is_origin_glusterd(dict)) {
+ len = snprintf(hooks_args, sizeof(hooks_args),
+ "is_originator=1,local_node_hostname=%s",
+ local_node_hostname);
+ } else {
+ len = snprintf(hooks_args, sizeof(hooks_args),
+ "is_originator=0,local_node_hostname=%s",
+ local_node_hostname);
+ }
+ if ((len < 0) || (len >= sizeof(hooks_args))) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(dict, "hooks_args", hooks_args);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Failed to set"
+ " hooks_args in dict.");
+ goto out;
+ }
+
+out:
+ if (ret && strlen(errstr)) {
+ *op_errstr = gf_strdup(errstr);
+ }
+
+ return ret;
+}
+
+static int
+glusterd_op_set_volume(dict_t *dict, char **errstr)
+{
+ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int count = 1;
+ char *key = NULL;
+ char *key_fixed = NULL;
+ char *value = NULL;
+ char keystr[50] = {
+ 0,
+ };
+ int keylen;
+ gf_boolean_t global_opt = _gf_false;
+ gf_boolean_t global_opts_set = _gf_false;
+ glusterd_volinfo_t *voliter = NULL;
+ int32_t dict_count = 0;
+ gf_boolean_t check_op_version = _gf_false;
+ uint32_t new_op_version = 0;
+ gf_boolean_t quorum_action = _gf_false;
+ glusterd_svc_t *svc = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_int32n(dict, "count", SLEN("count"), &dict_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Count(dict),not set in Volume-Set");
+ goto out;
+ }
+
+ if (dict_count == 0) {
+ ret = glusterd_volset_help(NULL, errstr);
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ if (strcasecmp(volname, "all") == 0) {
+ ret = glusterd_op_set_all_volume_options(this, dict, errstr);
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ /* TODO: Remove this once v3.3 compatibility is not required */
+ check_op_version = dict_get_str_boolean(dict, "check-op-version",
+ _gf_false);
+
+ if (check_op_version) {
+ ret = dict_get_uint32(dict, "new-op-version", &new_op_version);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get new op-version from dict");
+ goto out;
}
+ }
+
+ for (count = 1; ret != -1; count++) {
+ keylen = snprintf(keystr, sizeof(keystr), "key%d", count);
+ ret = dict_get_strn(dict, keystr, keylen, &key);
+ if (ret)
+ break;
- if (count == 1) {
- gf_log (this->name, GF_LOG_ERROR, "No options received ");
+ keylen = snprintf(keystr, sizeof(keystr), "value%d", count);
+ ret = dict_get_strn(dict, keystr, keylen, &value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "invalid key,value pair in 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ if (strcmp(key, "config.memory-accounting") == 0) {
+ ret = gf_string2boolean(value, &volinfo->memory_accounting);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid value in key-value pair.");
+ goto out;
+ }
+ }
+
+ if (strcmp(key, "config.transport") == 0) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_VOL_TRANSPORT_TYPE_CHANGE,
+ "changing transport-type for volume %s to %s", volname,
+ value);
+ ret = 0;
+ if (strcasecmp(value, "rdma") == 0) {
+ volinfo->transport_type = GF_TRANSPORT_RDMA;
+ } else if (strcasecmp(value, "tcp") == 0) {
+ volinfo->transport_type = GF_TRANSPORT_TCP;
+ } else if ((strcasecmp(value, "tcp,rdma") == 0) ||
+ (strcasecmp(value, "rdma,tcp") == 0)) {
+ volinfo->transport_type = GF_TRANSPORT_BOTH_TCP_RDMA;
+ } else {
ret = -1;
goto out;
+ }
}
- if (!global_opt) {
- ret = glusterd_create_volfiles_and_notify_services (volinfo);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to create volfile for"
- " 'volume set'");
- ret = -1;
- goto out;
- }
+ ret = glusterd_check_ganesha_cmd(key, value, errstr, dict);
+ if (ret == -1)
+ goto out;
- ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret)
- goto out;
+ if (!is_key_glusterd_hooks_friendly(key)) {
+ ret = glusterd_check_option_exists(key, &key_fixed);
+ GF_ASSERT(ret);
+ if (ret <= 0) {
+ key_fixed = NULL;
+ goto out;
+ }
+ }
- if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_nodesvcs_handle_reconfigure (volinfo);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to restart NFS-Server");
- goto out;
- }
- }
+ global_opt = _gf_false;
+ if (glusterd_check_globaloption(key)) {
+ global_opt = _gf_true;
+ global_opts_set = _gf_true;
+ }
- } else {
- list_for_each_entry (voliter, &priv->volumes, vol_list) {
- volinfo = voliter;
- ret = glusterd_create_volfiles_and_notify_services (volinfo);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to create volfile for"
- " 'volume set'");
- ret = -1;
- goto out;
- }
+ if (!global_opt)
+ value = gf_strdup(value);
- ret = glusterd_store_volinfo (volinfo,
- GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret)
- goto out;
-
- if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_nodesvcs_handle_reconfigure (volinfo);
- if (ret) {
- gf_log (this->name, GF_LOG_WARNING,
- "Unable to restart NFS-Server");
- goto out;
- }
- }
- }
+ if (!value) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_SET_FAIL,
+ "Unable to set the options in 'volume set'");
+ ret = -1;
+ goto out;
}
- ret = 0;
- out:
if (key_fixed)
- GF_FREE (key_fixed);
- gf_log (this->name, GF_LOG_DEBUG, "returning %d", ret);
- return ret;
-}
+ key = key_fixed;
+ if (glusterd_is_quorum_changed(volinfo->dict, key, value))
+ quorum_action = _gf_true;
-static int
-glusterd_op_sync_volume (dict_t *dict, char **op_errstr,
- dict_t *rsp_dict)
-{
- int ret = -1;
- char *volname = NULL;
- char *hostname = NULL;
- char msg[2048] = {0,};
- int count = 1;
- int vol_count = 0;
- glusterd_conf_t *priv = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- xlator_t *this = NULL;
-
- this = THIS;
- GF_ASSERT (this);
- priv = this->private;
- GF_ASSERT (priv);
-
- ret = dict_get_str (dict, "hostname", &hostname);
- if (ret) {
- snprintf (msg, sizeof (msg), "hostname couldn't be "
- "retrieved from msg");
- *op_errstr = gf_strdup (msg);
+ if (global_opt) {
+ cds_list_for_each_entry(voliter, &priv->volumes, vol_list)
+ {
+ value = gf_strdup(value);
+ ret = dict_set_dynstr(voliter->dict, key, value);
+ if (ret)
+ goto out;
+ }
+ } else {
+ ret = dict_set_dynstr(volinfo->dict, key, value);
+ if (ret)
goto out;
}
- if (glusterd_is_local_addr (hostname)) {
- ret = 0;
+ if (key_fixed) {
+ GF_FREE(key_fixed);
+ key_fixed = NULL;
+ }
+ }
+
+ if (count == 1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_OPTIONS_GIVEN,
+ "No options received ");
+ ret = -1;
+ goto out;
+ }
+
+ /* Update the cluster op-version before regenerating volfiles so that
+ * correct volfiles are generated
+ */
+ if (new_op_version > priv->op_version) {
+ priv->op_version = new_op_version;
+ ret = glusterd_store_global_info(this);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERS_STORE_FAIL,
+ "Failed to store op-version");
+ goto out;
+ }
+ }
+ if (!global_opts_set) {
+ gd_update_volume_op_versions(volinfo);
+
+ if (!volinfo->is_snap_volume) {
+ svc = &(volinfo->snapd.svc);
+ ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
+ if (ret)
goto out;
}
+ svc = &(volinfo->gfproxyd.svc);
+ ret = svc->reconfigure(volinfo);
+ if (ret)
+ goto out;
- //volname is not present in case of sync all
- ret = dict_get_str (dict, "volname", &volname);
- if (!ret) {
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Volume with name: %s "
- "not exists", volname);
- goto out;
- }
+ svc = &(volinfo->shd.svc);
+ ret = svc->reconfigure(volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Unable to create volfile for"
+ " 'volume set'");
+ ret = -1;
+ goto out;
}
- if (!rsp_dict) {
- //this should happen only on source
- ret = 0;
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_svcs_reconfigure(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL,
+ "Unable to restart services");
goto out;
+ }
}
- if (volname) {
- ret = glusterd_add_volume_to_dict (volinfo, rsp_dict,
- 1);
- vol_count = 1;
- } else {
- list_for_each_entry (volinfo, &priv->volumes, vol_list) {
- ret = glusterd_add_volume_to_dict (volinfo,
- rsp_dict, count);
- if (ret)
- goto out;
+ } else {
+ cds_list_for_each_entry(voliter, &priv->volumes, vol_list)
+ {
+ volinfo = voliter;
+ gd_update_volume_op_versions(volinfo);
+
+ if (!volinfo->is_snap_volume) {
+ svc = &(volinfo->snapd.svc);
+ ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
+ if (ret)
+ goto out;
+ }
+
+ svc = &(volinfo->gfproxyd.svc);
+ ret = svc->reconfigure(volinfo);
+ if (ret)
+ goto out;
+
+ svc = &(volinfo->shd.svc);
+ ret = svc->reconfigure(volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Unable to create volfile for"
+ " 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
- vol_count = count++;
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_svcs_reconfigure(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_RESTART_FAIL,
+ "Unable to restart services");
+ goto out;
}
+ }
}
- ret = dict_set_int32 (rsp_dict, "count", vol_count);
+ }
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
-
- return ret;
+ GF_FREE(key_fixed);
+ gf_msg_debug(this->name, 0, "returning %d", ret);
+ if (quorum_action)
+ glusterd_do_quorum_action();
+ return ret;
}
static int
-glusterd_add_profile_volume_options (glusterd_volinfo_t *volinfo)
+glusterd_op_sync_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
{
- int ret = -1;
- char *latency_key = NULL;
- char *fd_stats_key = NULL;
-
- GF_ASSERT (volinfo);
-
- latency_key = VKEY_DIAG_LAT_MEASUREMENT;
- fd_stats_key = VKEY_DIAG_CNT_FOP_HITS;
+ int ret = -1;
+ char *volname = NULL;
+ char *hostname = NULL;
+ char msg[2048] = {
+ 0,
+ };
+ int count = 1;
+ int vol_count = 0;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname);
+ if (ret) {
+ snprintf(msg, sizeof(msg),
+ "hostname couldn't be "
+ "retrieved from msg");
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=hostname", NULL);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
+
+ if (!gf_is_local_addr(hostname)) {
+ ret = 0;
+ goto out;
+ }
- ret = dict_set_str (volinfo->dict, latency_key, "on");
+ // volname is not present in case of sync all
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (!ret) {
+ ret = glusterd_volinfo_find(volname, &volinfo);
if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "failed to set the volume %s "
- "option %s value %s",
- volinfo->volname, latency_key, "on");
- goto out;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Volume with name: %s "
+ "not exists",
+ volname);
+ goto out;
}
+ }
- ret = dict_set_str (volinfo->dict, fd_stats_key, "on");
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "failed to set the volume %s "
- "option %s value %s",
- volinfo->volname, fd_stats_key, "on");
+ if (!rsp_dict) {
+ // this should happen only on source
+ gf_smsg(this->name, GF_LOG_INFO, errno, GD_MSG_INVALID_ARGUMENT, NULL);
+ ret = 0;
+ goto out;
+ }
+
+ if (volname) {
+ ret = glusterd_add_volume_to_dict(volinfo, rsp_dict, 1, "volume");
+ if (ret)
+ goto out;
+ vol_count = 1;
+ } else {
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+ ret = glusterd_add_volume_to_dict(volinfo, rsp_dict, count,
+ "volume");
+ if (ret)
goto out;
+
+ vol_count = count++;
}
+ }
+ ret = dict_set_int32n(rsp_dict, "count", SLEN("count"), vol_count);
+
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+
+ return ret;
+}
+
+static int
+glusterd_add_profile_volume_options(glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+
+ GF_ASSERT(volinfo);
+
+ ret = dict_set_nstrn(volinfo->dict, VKEY_DIAG_LAT_MEASUREMENT,
+ SLEN(VKEY_DIAG_LAT_MEASUREMENT), "on", SLEN("on"));
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set the volume %s "
+ "option %s value %s",
+ volinfo->volname, VKEY_DIAG_LAT_MEASUREMENT, "on");
+ goto out;
+ }
+
+ ret = dict_set_nstrn(volinfo->dict, VKEY_DIAG_CNT_FOP_HITS,
+ SLEN(VKEY_DIAG_CNT_FOP_HITS), "on", SLEN("on"));
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set the volume %s "
+ "option %s value %s",
+ volinfo->volname, VKEY_DIAG_CNT_FOP_HITS, "on");
+ goto out;
+ }
+out:
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
}
static void
-glusterd_remove_profile_volume_options (glusterd_volinfo_t *volinfo)
+glusterd_remove_profile_volume_options(glusterd_volinfo_t *volinfo)
{
- char *latency_key = NULL;
- char *fd_stats_key = NULL;
+ GF_ASSERT(volinfo);
+
+ dict_del_sizen(volinfo->dict, VKEY_DIAG_LAT_MEASUREMENT);
+ dict_del_sizen(volinfo->dict, VKEY_DIAG_CNT_FOP_HITS);
+}
- GF_ASSERT (volinfo);
+int
+glusterd_op_stats_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char msg[2048] = {
+ 0,
+ };
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t stats_op = GF_CLI_STATS_NONE;
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "volume name get failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Volume %s does not exists", volname);
+
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "op", SLEN("op"), &stats_op);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "volume profile op get failed");
+ goto out;
+ }
+
+ switch (stats_op) {
+ case GF_CLI_STATS_START:
+ ret = glusterd_add_profile_volume_options(volinfo);
+ if (ret)
+ goto out;
+ break;
+ case GF_CLI_STATS_STOP:
+ glusterd_remove_profile_volume_options(volinfo);
+ break;
+ case GF_CLI_STATS_INFO:
+ case GF_CLI_STATS_TOP:
+ // info is already collected in brick op.
+ // just goto out;
+ ret = 0;
+ goto out;
+ break;
+ default:
+ GF_ASSERT(0);
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "Invalid profile op: %d", stats_op);
+ ret = -1;
+ goto out;
+ break;
+ }
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Unable to create volfile for"
+ " 'volume set'");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_svcs_reconfigure(volinfo);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
- latency_key = VKEY_DIAG_LAT_MEASUREMENT;
- fd_stats_key = VKEY_DIAG_CNT_FOP_HITS;
- dict_del (volinfo->dict, latency_key);
- dict_del (volinfo->dict, fd_stats_key);
+ return ret;
}
static int
-glusterd_op_stats_volume (dict_t *dict, char **op_errstr,
- dict_t *rsp_dict)
+_add_remove_bricks_to_dict(dict_t *dict, glusterd_volinfo_t *volinfo,
+ char *prefix)
{
- int ret = -1;
- char *volname = NULL;
- char msg[2048] = {0,};
- glusterd_volinfo_t *volinfo = NULL;
- int32_t stats_op = GF_CLI_STATS_NONE;
-
- ret = dict_get_str (dict, "volname", &volname);
+ int ret = -1;
+ int count = 0;
+ int i = 0;
+ char brick_key[16] = {
+ 0,
+ };
+ char dict_key[64] = {
+ /* dict_key is small as prefix is up to 32 chars */
+ 0,
+ };
+ int keylen;
+ char *brick = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(volinfo);
+ GF_ASSERT(prefix);
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = dict_get_int32n(volinfo->rebal.dict, "count", SLEN("count"), &count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get brick count");
+ goto out;
+ }
+
+ keylen = snprintf(dict_key, sizeof(dict_key), "%s.count", prefix);
+ ret = dict_set_int32n(dict, dict_key, keylen, count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set brick count in dict");
+ goto out;
+ }
+
+ for (i = 1; i <= count; i++) {
+ keylen = snprintf(brick_key, sizeof(brick_key), "brick%d", i);
+
+ ret = dict_get_strn(volinfo->rebal.dict, brick_key, keylen, &brick);
if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed");
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get %s", brick_key);
+ goto out;
}
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- snprintf (msg, sizeof (msg), "Volume %s does not exists",
- volname);
-
- gf_log ("", GF_LOG_ERROR, "%s", msg);
- goto out;
+ keylen = snprintf(dict_key, sizeof(dict_key), "%s.%s", prefix,
+ brick_key);
+ if ((keylen < 0) || (keylen >= sizeof(dict_key))) {
+ ret = -1;
+ goto out;
}
-
- ret = dict_get_int32 (dict, "op", &stats_op);
+ ret = dict_set_strn(dict, dict_key, keylen, brick);
if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "volume profile op get failed");
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to add brick to dict");
+ goto out;
}
+ brick = NULL;
+ }
- switch (stats_op) {
- case GF_CLI_STATS_START:
- ret = glusterd_add_profile_volume_options (volinfo);
- if (ret)
- goto out;
- break;
- case GF_CLI_STATS_STOP:
- glusterd_remove_profile_volume_options (volinfo);
- break;
- case GF_CLI_STATS_INFO:
- case GF_CLI_STATS_TOP:
- //info is already collected in brick op.
- //just goto out;
- ret = 0;
+out:
+ return ret;
+}
+
+/* This adds the respective task-id and all available parameters of a task into
+ * a dictionary
+ */
+static int
+_add_task_to_dict(dict_t *dict, glusterd_volinfo_t *volinfo, int op, int index)
+{
+ int ret = -1;
+ char key[32] = {
+ 0,
+ };
+ int keylen;
+ char *uuid_str = NULL;
+ int status = 0;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(volinfo);
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ switch (op) {
+ case GD_OP_REMOVE_BRICK:
+ snprintf(key, sizeof(key), "task%d", index);
+ ret = _add_remove_bricks_to_dict(dict, volinfo, key);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_ADD_REMOVE_BRICK_FAIL,
+ "Failed to add remove bricks to dict");
goto out;
- break;
+ }
+ case GD_OP_REBALANCE:
+ uuid_str = gf_strdup(uuid_utoa(volinfo->rebal.rebalance_id));
+ status = volinfo->rebal.defrag_status;
+ break;
+
default:
- GF_ASSERT (0);
- gf_log ("glusterd", GF_LOG_ERROR, "Invalid profile op: %d",
- stats_op);
- ret = -1;
- goto out;
- break;
- }
- ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_TASK_ID,
+ "%s operation doesn't have a"
+ " task_id",
+ gd_op_list[op]);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "task%d.type", index);
+ ret = dict_set_strn(dict, key, keylen, (char *)gd_op_list[op]);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Error setting task type in dict");
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "task%d.id", index);
+
+ if (!uuid_str)
+ goto out;
+ ret = dict_set_dynstrn(dict, key, keylen, uuid_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Error setting task id in dict");
+ goto out;
+ }
+ uuid_str = NULL;
+
+ keylen = snprintf(key, sizeof(key), "task%d.status", index);
+ ret = dict_set_int32n(dict, key, keylen, status);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Error setting task status in dict");
+ goto out;
+ }
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to create volfile for"
- " 'volume set'");
- ret = -1;
- goto out;
- }
+out:
+ if (uuid_str)
+ GF_FREE(uuid_str);
+ return ret;
+}
- ret = glusterd_store_volinfo (volinfo,
- GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret)
- goto out;
+static int
+glusterd_aggregate_task_status(dict_t *rsp_dict, glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ int tasks = 0;
+ xlator_t *this = NULL;
- if (GLUSTERD_STATUS_STARTED == volinfo->status)
- ret = glusterd_nodesvcs_handle_reconfigure (volinfo);
+ this = THIS;
+ GF_ASSERT(this);
- ret = 0;
+ if (!gf_uuid_is_null(volinfo->rebal.rebalance_id)) {
+ ret = _add_task_to_dict(rsp_dict, volinfo, volinfo->rebal.op, tasks);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to add task details to dict");
+ goto out;
+ }
+ tasks++;
+ }
+ ret = dict_set_int32n(rsp_dict, "tasks", SLEN("tasks"), tasks);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Error setting tasks count in dict");
+ goto out;
+ }
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
-
- return ret;
+ return ret;
}
static int
-glusterd_op_status_volume (dict_t *dict, char **op_errstr,
- dict_t *rsp_dict)
+glusterd_op_status_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
{
- int ret = -1;
- int node_count = 0;
- int brick_index = -1;
- int other_count = 0;
- int other_index = 0;
- uint32_t cmd = 0;
- char *volname = NULL;
- char *brick = NULL;
- xlator_t *this = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
- glusterd_conf_t *priv = NULL;
- dict_t *vol_opts = NULL;
- gf_boolean_t nfs_disabled = _gf_false;
- gf_boolean_t shd_enabled = _gf_true;
-
- this = THIS;
- GF_ASSERT (this);
- priv = this->private;
-
- GF_ASSERT (priv);
-
- GF_ASSERT (dict);
-
- ret = dict_get_uint32 (dict, "cmd", &cmd);
- if (ret)
- goto out;
+ int ret = -1;
+ int node_count = 0;
+ int brick_index = -1;
+ int other_count = 0;
+ int other_index = 0;
+ uint32_t cmd = 0;
+ char *volname = NULL;
+ char *brick = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *vol_opts = NULL;
+#ifdef BUILD_GNFS
+ gf_boolean_t nfs_disabled = _gf_false;
+#endif
+ gf_boolean_t shd_enabled = _gf_false;
+ gf_boolean_t origin_glusterd = _gf_false;
+ int snapd_enabled, bitrot_enabled, volume_quota_enabled;
- if (!rsp_dict) {
- //this should happen only on source
- ret = 0;
- rsp_dict = glusterd_op_get_ctx ();
-
- if ((cmd & GF_CLI_STATUS_ALL)) {
- ret = glusterd_get_all_volnames (rsp_dict);
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to get all volume "
- "names for status");
- }
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
- }
+ GF_ASSERT(priv);
- ret = dict_set_uint32 (rsp_dict, "cmd", cmd);
- if (ret)
- goto out;
+ GF_ASSERT(dict);
- if (cmd & GF_CLI_STATUS_ALL)
- goto out;
+ origin_glusterd = is_origin_glusterd(dict);
- ret = dict_get_str (dict, "volname", &volname);
+ ret = dict_get_uint32(dict, "cmd", &cmd);
+ if (ret)
+ goto out;
+
+ if (origin_glusterd) {
+ ret = 0;
+ if ((cmd & GF_CLI_STATUS_ALL)) {
+ ret = glusterd_get_all_volnames(rsp_dict);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLNAMES_GET_FAIL,
+ "failed to get all volume "
+ "names for status");
+ }
+ }
+
+ ret = dict_set_uint32(rsp_dict, "cmd", cmd);
+ if (ret)
+ goto out;
+
+ if (cmd & GF_CLI_STATUS_ALL)
+ goto out;
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Volume with name: %s "
+ "does not exist",
+ volname);
+ goto out;
+ }
+ vol_opts = volinfo->dict;
+
+ if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) {
+ ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict, 0,
+ vol_opts);
if (ret)
- goto out;
+ goto out;
+ other_count++;
+ node_count++;
+#ifdef BUILD_GNFS
+ } else if ((cmd & GF_CLI_STATUS_NFS) != 0) {
+ ret = glusterd_add_node_to_dict(priv->nfs_svc.name, rsp_dict, 0,
+ vol_opts);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+#endif
+ } else if ((cmd & GF_CLI_STATUS_BITD) != 0) {
+ ret = glusterd_add_node_to_dict(priv->bitd_svc.name, rsp_dict, 0,
+ vol_opts);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+ } else if ((cmd & GF_CLI_STATUS_SCRUB) != 0) {
+ ret = glusterd_add_node_to_dict(priv->scrub_svc.name, rsp_dict, 0,
+ vol_opts);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+ } else if ((cmd & GF_CLI_STATUS_SNAPD) != 0) {
+ ret = glusterd_add_snapd_to_dict(volinfo, rsp_dict, other_index);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+ } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
+ ret = glusterd_add_shd_to_dict(volinfo, rsp_dict, other_index);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+ } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) {
+ ret = dict_get_strn(dict, "brick", SLEN("brick"), &brick);
+ if (ret)
+ goto out;
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Volume with name: %s "
- "does not exist", volname);
- goto out;
+ ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo,
+ _gf_false);
+ if (ret)
+ goto out;
+
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID))
+ goto out;
+
+ glusterd_add_brick_to_dict(volinfo, brickinfo, rsp_dict, ++brick_index);
+ if (cmd & GF_CLI_STATUS_DETAIL)
+ glusterd_add_brick_detail_to_dict(volinfo, brickinfo, rsp_dict,
+ brick_index);
+ node_count++;
+
+ } else if ((cmd & GF_CLI_STATUS_TASKS) != 0) {
+ ret = glusterd_aggregate_task_status(rsp_dict, volinfo);
+ goto out;
+
+ } else {
+ snapd_enabled = glusterd_is_snapd_enabled(volinfo);
+ shd_enabled = gd_is_self_heal_enabled(volinfo, vol_opts);
+#ifdef BUILD_GNFS
+ nfs_disabled = dict_get_str_boolean(vol_opts, NFS_DISABLE_MAP_KEY,
+ _gf_false);
+#endif
+ volume_quota_enabled = glusterd_is_volume_quota_enabled(volinfo);
+ bitrot_enabled = glusterd_is_bitrot_enabled(volinfo);
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ brick_index++;
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID))
+ continue;
+
+ glusterd_add_brick_to_dict(volinfo, brickinfo, rsp_dict,
+ brick_index);
+
+ if (cmd & GF_CLI_STATUS_DETAIL) {
+ glusterd_add_brick_detail_to_dict(volinfo, brickinfo, rsp_dict,
+ brick_index);
+ }
+ node_count++;
}
- vol_opts = volinfo->dict;
- if ((cmd & GF_CLI_STATUS_NFS) != 0) {
- ret = glusterd_add_node_to_dict ("nfs", rsp_dict, 0, vol_opts);
+ if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) {
+ other_index = brick_index + 1;
+ if (snapd_enabled) {
+ ret = glusterd_add_snapd_to_dict(volinfo, rsp_dict,
+ other_index);
if (ret)
- goto out;
+ goto out;
other_count++;
+ other_index++;
node_count++;
+ }
- } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
- ret = glusterd_add_node_to_dict ("glustershd", rsp_dict, 0,
- vol_opts);
- if (ret)
+ if (glusterd_is_shd_compatible_volume(volinfo)) {
+ if (shd_enabled) {
+ ret = glusterd_add_shd_to_dict(volinfo, rsp_dict,
+ other_index);
+ if (ret)
goto out;
+ other_count++;
+ other_index++;
+ node_count++;
+ }
+ }
+#ifdef BUILD_GNFS
+ if (!nfs_disabled) {
+ ret = glusterd_add_node_to_dict(priv->nfs_svc.name, rsp_dict,
+ other_index, vol_opts);
+ if (ret)
+ goto out;
+ other_index++;
other_count++;
node_count++;
-
- } else if ((cmd & GF_CLI_STATUS_BRICK) != 0) {
- ret = dict_get_str (dict, "brick", &brick);
+ }
+#endif
+ if (volume_quota_enabled) {
+ ret = glusterd_add_node_to_dict(priv->quotad_svc.name, rsp_dict,
+ other_index, vol_opts);
if (ret)
- goto out;
+ goto out;
+ other_count++;
+ node_count++;
+ other_index++;
+ }
- ret = glusterd_volume_brickinfo_get_by_brick (brick,
- volinfo,
- &brickinfo,
- GF_PATH_COMPLETE);
+ if (bitrot_enabled) {
+ ret = glusterd_add_node_to_dict(priv->bitd_svc.name, rsp_dict,
+ other_index, vol_opts);
if (ret)
- goto out;
-
- if (uuid_compare (brickinfo->uuid, MY_UUID))
- goto out;
-
- glusterd_add_brick_to_dict (volinfo, brickinfo, rsp_dict,
- ++brick_index);
- if (cmd & GF_CLI_STATUS_DETAIL)
- glusterd_add_brick_detail_to_dict (volinfo, brickinfo,
- rsp_dict,
- brick_index);
+ goto out;
+ other_count++;
node_count++;
-
- } else {
- list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
- brick_index++;
- if (uuid_compare (brickinfo->uuid, MY_UUID))
- continue;
-
- glusterd_add_brick_to_dict (volinfo, brickinfo,
- rsp_dict, brick_index);
-
- if (cmd & GF_CLI_STATUS_DETAIL) {
- glusterd_add_brick_detail_to_dict (volinfo,
- brickinfo,
- rsp_dict,
- brick_index);
- }
- node_count++;
- }
-
- if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) {
- other_index = brick_index + 1;
-
- nfs_disabled = dict_get_str_boolean (vol_opts,
- "nfs.disable",
- _gf_false);
- if (!nfs_disabled) {
- ret = glusterd_add_node_to_dict ("nfs",
- rsp_dict,
- other_index,
- vol_opts);
- if (ret)
- goto out;
- other_index++;
- other_count++;
- node_count++;
- }
-
- shd_enabled = dict_get_str_boolean
- (vol_opts, "cluster.self-heal-daemon",
- _gf_true);
- if (glusterd_is_volume_replicate (volinfo)
- && shd_enabled) {
- ret = glusterd_add_node_to_dict ("glustershd",
- rsp_dict,
- other_index,
- vol_opts);
- if (ret)
- goto out;
- other_count++;
- node_count++;
- }
- }
- }
-
- ret = dict_set_int32 (rsp_dict, "brick-index-max", brick_index);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Error setting brick-index-max to dict");
- goto out;
- }
- ret = dict_set_int32 (rsp_dict, "other-count", other_count);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Error setting other-count to dict");
- goto out;
- }
- ret = dict_set_int32 (rsp_dict, "count", node_count);
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR,
- "Error setting node count to dict");
+ other_index++;
+ /* For handling scrub status. Scrub daemon will be
+ * running automatically when bitrot is enable */
+ ret = glusterd_add_node_to_dict(priv->scrub_svc.name, rsp_dict,
+ other_index, vol_opts);
+ if (ret)
+ goto out;
+ other_count++;
+ node_count++;
+ }
+ }
+ }
+
+ ret = dict_set_int32n(rsp_dict, "type", SLEN("type"), volinfo->type);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=type", NULL);
+ goto out;
+ }
+
+ ret = dict_set_int32n(rsp_dict, "brick-index-max", SLEN("brick-index-max"),
+ brick_index);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=brick-index-max", NULL);
+ goto out;
+ }
+ ret = dict_set_int32n(rsp_dict, "other-count", SLEN("other-count"),
+ other_count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=other-count", NULL);
+ goto out;
+ }
+ ret = dict_set_int32n(rsp_dict, "count", SLEN("count"), node_count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
+ goto out;
+ }
+
+ /* Active tasks */
+ /* Tasks are added only for normal volume status request for either a
+ * single volume or all volumes
+ */
+ if (!glusterd_status_has_tasks(cmd))
+ goto out;
+
+ ret = glusterd_aggregate_task_status(rsp_dict, volinfo);
+ if (ret)
+ goto out;
+ ret = 0;
out:
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
- return ret;
+ return ret;
}
static int
-glusterd_op_ac_none (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_none(glusterd_op_sm_event_t *event, void *ctx)
{
- int ret = 0;
+ int ret = 0;
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ gf_msg_debug(THIS->name, 0, "Returning with %d", ret);
- return ret;
+ return ret;
}
static int
-glusterd_op_ac_send_lock (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_sm_locking_failed(uuid_t *txn_id)
{
- int ret = 0;
- rpc_clnt_procedure_t *proc = NULL;
- glusterd_conf_t *priv = NULL;
- xlator_t *this = NULL;
- glusterd_peerinfo_t *peerinfo = NULL;
- uint32_t pending_count = 0;
+ int ret = -1;
- this = THIS;
- priv = this->private;
- GF_ASSERT (priv);
+ opinfo.op_ret = -1;
+ opinfo.op_errstr = gf_strdup("locking failed for one of the peer.");
- list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
- GF_ASSERT (peerinfo);
+ ret = glusterd_set_txn_opinfo(txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+ /* Inject a reject event such that unlocking gets triggered right away*/
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_RCVD_RJT, txn_id, NULL);
- if (!peerinfo->connected || !peerinfo->mgmt)
- continue;
- if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) &&
- (glusterd_op_get_op() != GD_OP_SYNC_VOLUME))
- continue;
+ return ret;
+}
- proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_LOCK];
- if (proc->fn) {
- ret = proc->fn (NULL, this, peerinfo);
- if (ret)
- continue;
- pending_count++;
+static int
+glusterd_op_ac_send_lock(glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ uint32_t pending_count = 0;
+ dict_t *dict = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
+ {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > opinfo.txn_generation)
+ continue;
+
+ if (!peerinfo->connected || !peerinfo->mgmt)
+ continue;
+ if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) &&
+ (glusterd_op_get_op() != GD_OP_SYNC_VOLUME))
+ continue;
+
+ /* Based on the op_version, acquire a cluster or mgmt_v3 lock */
+ if (priv->op_version < GD_OP_VERSION_3_6_0) {
+ proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_LOCK];
+ if (proc->fn) {
+ ret = proc->fn(NULL, this, peerinfo);
+ if (ret) {
+ RCU_READ_UNLOCK;
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_LOCK_REQ_SEND_FAIL,
+ "Failed to send lock request "
+ "for operation 'Volume %s' to "
+ "peer %s",
+ gd_op_list[opinfo.op], peerinfo->hostname);
+ goto out;
}
- }
+ /* Mark the peer as locked*/
+ peerinfo->locked = _gf_true;
+ pending_count++;
+ }
+ } else {
+ dict = glusterd_op_get_ctx();
+ dict_ref(dict);
- opinfo.pending_count = pending_count;
- if (!opinfo.pending_count)
- ret = glusterd_op_sm_inject_all_acc ();
+ proc = &peerinfo->mgmt_v3->proctable[GLUSTERD_MGMT_V3_LOCK];
+ if (proc->fn) {
+ ret = dict_set_static_ptr(dict, "peerinfo", peerinfo);
+ if (ret) {
+ RCU_READ_UNLOCK;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set peerinfo");
+ dict_unref(dict);
+ goto out;
+ }
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ ret = proc->fn(NULL, this, dict);
+ if (ret) {
+ RCU_READ_UNLOCK;
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_MGMTV3_LOCK_REQ_SEND_FAIL,
+ "Failed to send mgmt_v3 lock "
+ "request for operation "
+ "'Volume %s' to peer %s",
+ gd_op_list[opinfo.op], peerinfo->hostname);
+ dict_unref(dict);
+ goto out;
+ }
+ /* Mark the peer as locked*/
+ peerinfo->locked = _gf_true;
+ pending_count++;
+ }
+ }
+ }
+ RCU_READ_UNLOCK;
- return ret;
-}
+ opinfo.pending_count = pending_count;
-static int
-glusterd_op_ac_send_unlock (glusterd_op_sm_event_t *event, void *ctx)
-{
- int ret = 0;
- rpc_clnt_procedure_t *proc = NULL;
- glusterd_conf_t *priv = NULL;
- xlator_t *this = NULL;
- glusterd_peerinfo_t *peerinfo = NULL;
- uint32_t pending_count = 0;
+ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
- this = THIS;
- priv = this->private;
- GF_ASSERT (priv);
+ if (!opinfo.pending_count)
+ ret = glusterd_op_sm_inject_all_acc(&event->txn_id);
- /*ret = glusterd_unlock (MY_UUID);
+out:
+ if (ret)
+ ret = glusterd_op_sm_locking_failed(&event->txn_id);
- if (ret)
- goto out;
- */
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
+ return ret;
+}
- list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
- GF_ASSERT (peerinfo);
+static int
+glusterd_op_ac_send_unlock(glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ uint32_t pending_count = 0;
+ dict_t *dict = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
+ {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > opinfo.txn_generation)
+ continue;
+
+ if (!peerinfo->connected || !peerinfo->mgmt || !peerinfo->locked)
+ continue;
+ if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) &&
+ (glusterd_op_get_op() != GD_OP_SYNC_VOLUME))
+ continue;
+ /* Based on the op_version,
+ * release the cluster or mgmt_v3 lock */
+ if (priv->op_version < GD_OP_VERSION_3_6_0) {
+ proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_UNLOCK];
+ if (proc->fn) {
+ ret = proc->fn(NULL, this, peerinfo);
+ if (ret) {
+ opinfo.op_errstr = gf_strdup(
+ "Unlocking failed for one of "
+ "the peer.");
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "Unlocking failed for operation"
+ " volume %s on peer %s",
+ gd_op_list[opinfo.op], peerinfo->hostname);
+ continue;
+ }
+ pending_count++;
+ peerinfo->locked = _gf_false;
+ }
+ } else {
+ dict = glusterd_op_get_ctx();
+ dict_ref(dict);
- if (!peerinfo->connected || !peerinfo->mgmt)
- continue;
- if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) &&
- (glusterd_op_get_op() != GD_OP_SYNC_VOLUME))
- continue;
+ proc = &peerinfo->mgmt_v3->proctable[GLUSTERD_MGMT_V3_UNLOCK];
+ if (proc->fn) {
+ ret = dict_set_static_ptr(dict, "peerinfo", peerinfo);
+ if (ret) {
+ opinfo.op_errstr = gf_strdup(
+ "Unlocking failed for one of the "
+ "peer.");
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "Unlocking failed for operation"
+ " volume %s on peer %s",
+ gd_op_list[opinfo.op], peerinfo->hostname);
+ dict_unref(dict);
+ continue;
+ }
- proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_CLUSTER_UNLOCK];
- if (proc->fn) {
- ret = proc->fn (NULL, this, peerinfo);
- if (ret)
- continue;
- pending_count++;
+ ret = proc->fn(NULL, this, dict);
+ if (ret) {
+ opinfo.op_errstr = gf_strdup(
+ "Unlocking failed for one of the "
+ "peer.");
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "Unlocking failed for operation"
+ " volume %s on peer %s",
+ gd_op_list[opinfo.op], peerinfo->hostname);
+ dict_unref(dict);
+ continue;
}
+ pending_count++;
+ peerinfo->locked = _gf_false;
+ }
}
+ }
+ RCU_READ_UNLOCK;
- opinfo.pending_count = pending_count;
- if (!opinfo.pending_count)
- ret = glusterd_op_sm_inject_all_acc ();
+ opinfo.pending_count = pending_count;
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
- return ret;
+ if (!opinfo.pending_count)
+ ret = glusterd_op_sm_inject_all_acc(&event->txn_id);
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
+ return ret;
}
static int
-glusterd_op_ac_ack_drain (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_ack_drain(glusterd_op_sm_event_t *event, void *ctx)
{
- int ret = 0;
+ int ret = 0;
- if (opinfo.pending_count > 0)
- opinfo.pending_count--;
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
- if (!opinfo.pending_count)
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, NULL);
+ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ if (!opinfo.pending_count)
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACK, &event->txn_id,
+ NULL);
- return ret;
+ gf_msg_debug(THIS->name, 0, "Returning with %d", ret);
+
+ return ret;
}
static int
-glusterd_op_ac_send_unlock_drain (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_send_unlock_drain(glusterd_op_sm_event_t *event, void *ctx)
{
- return glusterd_op_ac_ack_drain (event, ctx);
+ return glusterd_op_ac_ack_drain(event, ctx);
}
static int
-glusterd_op_ac_lock (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_lock(glusterd_op_sm_event_t *event, void *ctx)
{
- glusterd_op_lock_ctx_t *lock_ctx = NULL;
- int32_t ret = 0;
-
-
- GF_ASSERT (event);
- GF_ASSERT (ctx);
-
- lock_ctx = (glusterd_op_lock_ctx_t *)ctx;
-
- ret = glusterd_lock (lock_ctx->uuid);
-
- gf_log ("", GF_LOG_DEBUG, "Lock Returned %d", ret);
+ int32_t ret = 0;
+ char *volname = NULL;
+ char *globalname = NULL;
+ glusterd_op_lock_ctx_t *lock_ctx = NULL;
+ xlator_t *this = NULL;
+ uint32_t op_errno = 0;
+ glusterd_conf_t *conf = NULL;
+ uint32_t timeout = 0;
+
+ GF_ASSERT(event);
+ GF_ASSERT(ctx);
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ lock_ctx = (glusterd_op_lock_ctx_t *)ctx;
+
+ /* If the req came from a node running on older op_version
+ * the dict won't be present. Based on it acquiring a cluster
+ * or mgmt_v3 lock */
+ if (lock_ctx->dict == NULL) {
+ ret = glusterd_lock(lock_ctx->uuid);
+ glusterd_op_lock_send_resp(lock_ctx->req, ret);
+ } else {
+ /* Cli will add timeout key to dict if the default timeout is
+ * other than 2 minutes. Here we use this value to check whether
+ * mgmt_v3_lock_timeout should be set to default value or we
+ * need to change the value according to timeout value
+ * i.e, timeout + 120 seconds. */
+ ret = dict_get_uint32(lock_ctx->dict, "timeout", &timeout);
+ if (!ret)
+ conf->mgmt_v3_lock_timeout = timeout + 120;
+
+ ret = dict_get_strn(lock_ctx->dict, "volname", SLEN("volname"),
+ &volname);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to acquire volname");
+ else {
+ ret = glusterd_mgmt_v3_lock(volname, lock_ctx->uuid, &op_errno,
+ "vol");
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Unable to acquire lock for %s", volname);
+ goto out;
+ }
+ ret = dict_get_strn(lock_ctx->dict, "globalname", SLEN("globalname"),
+ &globalname);
+ if (!ret) {
+ ret = glusterd_mgmt_v3_lock(globalname, lock_ctx->uuid, &op_errno,
+ "global");
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_GET_FAIL,
+ "Unable to acquire lock for %s", globalname);
+ }
+ out:
+ glusterd_op_mgmt_v3_lock_send_resp(lock_ctx->req, &event->txn_id, ret);
- glusterd_op_lock_send_resp (lock_ctx->req, ret);
+ dict_unref(lock_ctx->dict);
+ }
- return ret;
+ gf_msg_debug(THIS->name, 0, "Lock Returned %d", ret);
+ return ret;
}
static int
-glusterd_op_ac_unlock (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_unlock(glusterd_op_sm_event_t *event, void *ctx)
{
- int ret = 0;
- glusterd_op_lock_ctx_t *lock_ctx = NULL;
-
- GF_ASSERT (event);
- GF_ASSERT (ctx);
-
- lock_ctx = (glusterd_op_lock_ctx_t *)ctx;
+ int32_t ret = 0;
+ char *volname = NULL;
+ char *globalname = NULL;
+ glusterd_op_lock_ctx_t *lock_ctx = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(event);
+ GF_ASSERT(ctx);
+
+ this = THIS;
+ priv = this->private;
+
+ lock_ctx = (glusterd_op_lock_ctx_t *)ctx;
+
+ /* If the req came from a node running on older op_version
+ * the dict won't be present. Based on it releasing the cluster
+ * or mgmt_v3 lock */
+ if (lock_ctx->dict == NULL) {
+ ret = glusterd_unlock(lock_ctx->uuid);
+ glusterd_op_unlock_send_resp(lock_ctx->req, ret);
+ } else {
+ ret = dict_get_strn(lock_ctx->dict, "volname", SLEN("volname"),
+ &volname);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to acquire volname");
+ else {
+ ret = glusterd_mgmt_v3_unlock(volname, lock_ctx->uuid, "vol");
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Unable to release lock for %s", volname);
+ goto out;
+ }
- ret = glusterd_unlock (lock_ctx->uuid);
+ ret = dict_get_strn(lock_ctx->dict, "globalname", SLEN("globalname"),
+ &globalname);
+ if (!ret) {
+ ret = glusterd_mgmt_v3_unlock(globalname, lock_ctx->uuid, "global");
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Unable to release lock for %s", globalname);
+ }
+ out:
+ glusterd_op_mgmt_v3_unlock_send_resp(lock_ctx->req, &event->txn_id,
+ ret);
- gf_log ("", GF_LOG_DEBUG, "Unlock Returned %d", ret);
+ dict_unref(lock_ctx->dict);
+ }
- glusterd_op_unlock_send_resp (lock_ctx->req, ret);
+ gf_msg_debug(this->name, 0, "Unlock Returned %d", ret);
- return ret;
+ if (priv->pending_quorum_action)
+ glusterd_do_quorum_action();
+ return ret;
}
static int
-glusterd_op_ac_local_unlock (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_local_unlock(glusterd_op_sm_event_t *event, void *ctx)
{
- int ret = 0;
- uuid_t *originator = NULL;
+ int ret = 0;
+ uuid_t *originator = NULL;
- GF_ASSERT (event);
- GF_ASSERT (ctx);
+ GF_ASSERT(event);
+ GF_ASSERT(ctx);
- originator = (uuid_t *) ctx;
+ originator = (uuid_t *)ctx;
- ret = glusterd_unlock (*originator);
+ ret = glusterd_unlock(*originator);
- gf_log ("", GF_LOG_DEBUG, "Unlock Returned %d", ret);
+ gf_msg_debug(THIS->name, 0, "Unlock Returned %d", ret);
- return ret;
+ return ret;
}
static int
-glusterd_op_ac_rcvd_lock_acc (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_rcvd_lock_acc(glusterd_op_sm_event_t *event, void *ctx)
{
- int ret = 0;
+ int ret = 0;
- GF_ASSERT (event);
+ GF_ASSERT(event);
- if (opinfo.pending_count > 0)
- opinfo.pending_count--;
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
- if (opinfo.pending_count > 0)
- goto out;
+ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ if (opinfo.pending_count > 0)
+ goto out;
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL);
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACC, &event->txn_id,
+ NULL);
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
out:
- return ret;
+ return ret;
}
-static int
-glusterd_dict_set_volid (dict_t *dict, char *volname, char **op_errstr)
+int
+glusterd_dict_set_volid(dict_t *dict, char *volname, char **op_errstr)
{
- int ret = -1;
- glusterd_volinfo_t *volinfo = NULL;
- char *volid = NULL;
- char msg[1024] = {0,};
-
- if (!dict || !volname)
- goto out;
-
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- snprintf (msg, sizeof (msg), "Volume %s does not exist",
- volname);
- gf_log (THIS->name, GF_LOG_ERROR, "%s", msg);
- *op_errstr = gf_strdup (msg);
- goto out;
- }
- volid = gf_strdup (uuid_utoa (volinfo->volume_id));
- if (!volid) {
- ret = -1;
- goto out;
- }
- ret = dict_set_dynstr (dict, "vol-id", volid);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to set volume id in dictionary");
- goto out;
- }
+ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volid = NULL;
+ char msg[1024] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!dict || !volname) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(msg, sizeof(msg), FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+ volid = gf_strdup(uuid_utoa(volinfo->volume_id));
+ if (!volid) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstrn(dict, "vol-id", SLEN("vol-id"), volid);
+ if (ret) {
+ snprintf(msg, sizeof(msg),
+ "Failed to set volume id of volume"
+ " %s",
+ volname);
+ GF_FREE(volid);
+ goto out;
+ }
out:
- return ret;
+ if (msg[0] != '\0') {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_ID_SET_FAIL, "%s", msg);
+ *op_errstr = gf_strdup(msg);
+ }
+ return ret;
}
int
-glusterd_op_build_payload (dict_t **req, char **op_errstr)
+gd_set_commit_hash(dict_t *dict)
{
- int ret = -1;
- void *ctx = NULL;
- dict_t *req_dict = NULL;
- glusterd_op_t op = GD_OP_NONE;
- char *volname = NULL;
- uint32_t status_cmd = GF_CLI_STATUS_NONE;
- char *errstr = NULL;
-
- GF_ASSERT (req);
-
- req_dict = dict_new ();
- if (!req_dict)
- goto out;
+ struct timeval tv;
+ uint32_t hash;
+
+ /*
+ * We need a commit hash that won't conflict with others we might have
+ * set, or zero which is the implicit value if we never have. Using
+ * seconds<<3 like this ensures that we'll only get a collision if two
+ * consecutive rebalances are separated by exactly 2^29 seconds - about
+ * 17 years - and even then there's only a 1/8 chance of a collision in
+ * the low order bits. It's far more likely that this code will have
+ * changed completely by then. If not, call me in 2031.
+ *
+ * P.S. Time zone changes? Yeah, right.
+ */
+ gettimeofday(&tv, NULL);
+ hash = tv.tv_sec << 3;
+
+ /*
+ * Make sure at least one of those low-order bits is set. The extra
+ * shifting is because not all machines have sub-millisecond time
+ * resolution.
+ */
+ hash |= 1 << ((tv.tv_usec >> 10) % 3);
+
+ return dict_set_uint32(dict, "commit-hash", hash);
+}
- op = glusterd_op_get_op ();
- ctx = (void*)glusterd_op_get_ctx ();
+int
+glusterd_op_build_payload(dict_t **req, char **op_errstr, dict_t *op_ctx)
+{
+ int ret = -1;
+ void *ctx = NULL;
+ dict_t *dict = NULL;
+ dict_t *req_dict = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+ char *volname = NULL;
+ uint32_t status_cmd = GF_CLI_STATUS_NONE;
+ xlator_t *this = NULL;
+ gf_boolean_t do_common = _gf_false;
+
+ GF_ASSERT(req);
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ req_dict = dict_new();
+ if (!req_dict)
+ goto out;
+
+ if (!op_ctx) {
+ op = glusterd_op_get_op();
+ ctx = (void *)glusterd_op_get_ctx();
if (!ctx) {
- gf_log ("", GF_LOG_ERROR, "Null Context for "
- "op %d", op);
- ret = -1;
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_OPTIONS_GIVEN,
+ "Null Context for "
+ "op %d",
+ op);
+ ret = -1;
+ goto out;
}
- switch (op) {
- case GD_OP_CREATE_VOLUME:
- {
- dict_t *dict = ctx;
- ++glusterfs_port;
- ret = dict_set_int32 (dict, "port",
- glusterfs_port);
- if (ret)
- goto out;
- dict_copy (dict, req_dict);
- }
- break;
-
- case GD_OP_GSYNC_SET:
- {
- dict_t *dict = ctx;
- ret = glusterd_op_gsync_args_get (dict,
- &errstr,
- &volname,
- NULL);
- if (ret == 0) {
- ret = glusterd_dict_set_volid
- (dict, volname, op_errstr);
- if (ret)
- goto out;
- }
- dict_copy (dict, req_dict);
- }
- break;
-
- case GD_OP_SET_VOLUME:
- {
- dict_t *dict = ctx;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_CRITICAL,
- "volname is not present in "
- "operation ctx");
- goto out;
- }
- if (strcmp (volname, "help") &&
- strcmp (volname, "help-xml")) {
- ret = glusterd_dict_set_volid
- (dict, volname, op_errstr);
- if (ret)
- goto out;
- }
- dict_copy (dict, req_dict);
- }
- break;
-
- case GD_OP_STATUS_VOLUME:
- {
- dict_t *dict = ctx;
- ret = dict_get_uint32 (dict, "cmd",
- &status_cmd);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Status command not present "
- "in op ctx");
- goto out;
- }
- if (GF_CLI_STATUS_ALL & status_cmd) {
- dict_copy (dict, req_dict);
- break;
- }
- }
+ } else {
+#define GD_SYNC_OPCODE_KEY "sync-mgmt-operation"
+ ret = dict_get_int32(op_ctx, GD_SYNC_OPCODE_KEY, (int32_t *)&op);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get volume"
+ " operation");
+ goto out;
+ }
+ ctx = op_ctx;
+#undef GD_SYNC_OPCODE_KEY
+ }
+
+ dict = ctx;
+ switch (op) {
+ case GD_OP_CREATE_VOLUME: {
+ ++glusterfs_port;
+ ret = dict_set_int32n(dict, "port", SLEN("port"), glusterfs_port);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set port in "
+ "dictionary");
+ goto out;
+ }
+ dict_copy(dict, req_dict);
+ } break;
+
+ case GD_OP_GSYNC_CREATE:
+ case GD_OP_GSYNC_SET: {
+ ret = glusterd_op_gsync_args_get(dict, op_errstr, &volname, NULL,
+ NULL);
+ if (ret == 0) {
+ ret = glusterd_dict_set_volid(dict, volname, op_errstr);
+ if (ret)
+ goto out;
+ }
+ dict_copy(dict, req_dict);
+ } break;
+
+ case GD_OP_SET_VOLUME: {
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_DICT_GET_FAILED,
+ "volname is not present in "
+ "operation ctx");
+ goto out;
+ }
+ if (strcmp(volname, "help") && strcmp(volname, "help-xml") &&
+ strcasecmp(volname, "all")) {
+ ret = glusterd_dict_set_volid(dict, volname, op_errstr);
+ if (ret)
+ goto out;
+ }
+ dict_unref(req_dict);
+ req_dict = dict_ref(dict);
+ } break;
+
+ case GD_OP_REMOVE_BRICK: {
+ dict_t *dict = ctx;
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_DICT_GET_FAILED,
+ "volname is not present in "
+ "operation ctx");
+ goto out;
+ }
- case GD_OP_DELETE_VOLUME:
- case GD_OP_START_VOLUME:
- case GD_OP_STOP_VOLUME:
- case GD_OP_ADD_BRICK:
- case GD_OP_REPLACE_BRICK:
- case GD_OP_RESET_VOLUME:
- case GD_OP_REMOVE_BRICK:
- case GD_OP_LOG_ROTATE:
- case GD_OP_SYNC_VOLUME:
- case GD_OP_QUOTA:
- case GD_OP_PROFILE_VOLUME:
- case GD_OP_REBALANCE:
- case GD_OP_HEAL_VOLUME:
- case GD_OP_STATEDUMP_VOLUME:
- case GD_OP_CLEARLOCKS_VOLUME:
- case GD_OP_DEFRAG_BRICK_VOLUME:
- {
- dict_t *dict = ctx;
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_CRITICAL,
- "volname is not present in "
- "operation ctx");
- goto out;
- }
-
- ret = glusterd_dict_set_volid (dict, volname,
- op_errstr);
- if (ret)
- goto out;
- dict_copy (dict, req_dict);
- }
- break;
+ ret = glusterd_dict_set_volid(dict, volname, op_errstr);
+ if (ret)
+ goto out;
- default:
- break;
- }
+ if (gd_set_commit_hash(dict) != 0) {
+ goto out;
+ }
+
+ dict_unref(req_dict);
+ req_dict = dict_ref(dict);
+ } break;
+
+ case GD_OP_STATUS_VOLUME: {
+ ret = dict_get_uint32(dict, "cmd", &status_cmd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Status command not present "
+ "in op ctx");
+ goto out;
+ }
+ if (GF_CLI_STATUS_ALL & status_cmd) {
+ dict_copy(dict, req_dict);
+ break;
+ }
+ do_common = _gf_true;
+ } break;
- *req = req_dict;
- ret = 0;
+ case GD_OP_DELETE_VOLUME:
+ case GD_OP_START_VOLUME:
+ case GD_OP_STOP_VOLUME:
+ case GD_OP_ADD_BRICK:
+ case GD_OP_REPLACE_BRICK:
+ case GD_OP_RESET_VOLUME:
+ case GD_OP_LOG_ROTATE:
+ case GD_OP_QUOTA:
+ case GD_OP_PROFILE_VOLUME:
+ case GD_OP_HEAL_VOLUME:
+ case GD_OP_STATEDUMP_VOLUME:
+ case GD_OP_CLEARLOCKS_VOLUME:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ case GD_OP_BARRIER:
+ case GD_OP_BITROT:
+ case GD_OP_SCRUB_STATUS:
+ case GD_OP_SCRUB_ONDEMAND:
+ case GD_OP_RESET_BRICK: {
+ do_common = _gf_true;
+ } break;
+
+ case GD_OP_REBALANCE: {
+ if (gd_set_commit_hash(dict) != 0) {
+ goto out;
+ }
+ do_common = _gf_true;
+ } break;
-out:
- return ret;
-}
+ case GD_OP_SYNC_VOLUME:
+ case GD_OP_COPY_FILE:
+ case GD_OP_SYS_EXEC:
+ case GD_OP_GANESHA: {
+ dict_copy(dict, req_dict);
+ } break;
-static int
-glusterd_op_ac_send_stage_op (glusterd_op_sm_event_t *event, void *ctx)
-{
- int ret = 0;
- rpc_clnt_procedure_t *proc = NULL;
- glusterd_conf_t *priv = NULL;
- xlator_t *this = NULL;
- glusterd_peerinfo_t *peerinfo = NULL;
- dict_t *dict = NULL;
- char *op_errstr = NULL;
- glusterd_op_t op = GD_OP_NONE;
- uint32_t pending_count = 0;
-
- this = THIS;
- GF_ASSERT (this);
- priv = this->private;
- GF_ASSERT (priv);
-
- op = glusterd_op_get_op ();
-
- ret = glusterd_op_build_payload (&dict, &op_errstr);
+ default:
+ break;
+ }
+
+ /*
+ * This has been moved out of the switch so that multiple ops with
+ * other special needs can all "fall through" to it.
+ */
+ if (do_common) {
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Building payload failed");
- opinfo.op_errstr = op_errstr;
- goto out;
+ gf_msg(this->name, GF_LOG_CRITICAL, -ret, GD_MSG_DICT_GET_FAILED,
+ "volname is not present in "
+ "operation ctx");
+ goto out;
}
- /* rsp_dict NULL from source */
- ret = glusterd_op_stage_validate (op, dict, &op_errstr, NULL);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Staging failed");
- opinfo.op_errstr = op_errstr;
+ if (strcasecmp(volname, "all")) {
+ ret = glusterd_dict_set_volid(dict, volname, op_errstr);
+ if (ret)
goto out;
}
+ dict_copy(dict, req_dict);
+ }
- list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
- GF_ASSERT (peerinfo);
+ *req = req_dict;
+ ret = 0;
- if (!peerinfo->connected || !peerinfo->mgmt)
- continue;
- if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) &&
- (glusterd_op_get_op() != GD_OP_SYNC_VOLUME))
- continue;
+out:
+ return ret;
+}
- proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_STAGE_OP];
- GF_ASSERT (proc);
- if (proc->fn) {
- ret = dict_set_static_ptr (dict, "peerinfo", peerinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "failed to set peerinfo");
- goto out;
- }
+static int
+glusterd_op_ac_send_stage_op(glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = 0;
+ int ret1 = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ char *op_errstr = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+ uint32_t pending_count = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ op = glusterd_op_get_op();
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create rsp_dict");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_op_build_payload(&dict, &op_errstr, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL,
+ LOGSTR_BUILD_PAYLOAD, gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+ ret = glusterd_validate_quorum(this, op, dict, &op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_SERVER_QUORUM_NOT_MET,
+ "Server quorum not met. Rejecting operation.");
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+ ret = glusterd_op_stage_validate(op, dict, &op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VALIDATE_FAILED,
+ LOGSTR_STAGE_FAIL, gd_op_list[op], "localhost",
+ (op_errstr) ? ":" : " ", (op_errstr) ? op_errstr : " ");
+ if (op_errstr == NULL)
+ gf_asprintf(&op_errstr, OPERRSTR_STAGE_FAIL, "localhost");
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
+ {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > opinfo.txn_generation)
+ continue;
- ret = proc->fn (NULL, this, dict);
- if (ret)
- continue;
- pending_count++;
- }
- }
+ if (!peerinfo->connected || !peerinfo->mgmt)
+ continue;
+ if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) &&
+ (glusterd_op_get_op() != GD_OP_SYNC_VOLUME))
+ continue;
- opinfo.pending_count = pending_count;
+ proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_STAGE_OP];
+ GF_ASSERT(proc);
+ if (proc->fn) {
+ ret = dict_set_static_ptr(dict, "peerinfo", peerinfo);
+ if (ret) {
+ RCU_READ_UNLOCK;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to "
+ "set peerinfo");
+ goto out;
+ }
+
+ ret = proc->fn(NULL, this, dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_STAGE_REQ_SEND_FAIL,
+ "Failed to "
+ "send stage request for operation "
+ "'Volume %s' to peer %s",
+ gd_op_list[op], peerinfo->hostname);
+ continue;
+ }
+ pending_count++;
+ }
+ }
+ RCU_READ_UNLOCK;
+
+ opinfo.pending_count = pending_count;
out:
- if (dict)
- dict_unref (dict);
- if (ret) {
- glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL);
- opinfo.op_ret = ret;
- }
+ if (ret)
+ opinfo.op_ret = ret;
- gf_log ("glusterd", GF_LOG_INFO, "Sent op req to %d peers",
- opinfo.pending_count);
+ ret1 = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+ if (ret1)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
- if (!opinfo.pending_count)
- ret = glusterd_op_sm_inject_all_acc ();
+ if (rsp_dict)
+ dict_unref(rsp_dict);
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ if (dict)
+ dict_unref(dict);
+ if (ret) {
+ glusterd_op_sm_inject_event(GD_OP_EVENT_RCVD_RJT, &event->txn_id, NULL);
+ opinfo.op_ret = ret;
+ }
- return ret;
+ gf_msg_debug(this->name, 0,
+ "Sent stage op request for "
+ "'Volume %s' to %d peers",
+ gd_op_list[op], opinfo.pending_count);
+
+ if (!opinfo.pending_count)
+ ret = glusterd_op_sm_inject_all_acc(&event->txn_id);
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
+
+ return ret;
}
-static int32_t
-glusterd_op_start_rb_timer (dict_t *dict)
+/* This function takes a dict and converts the uuid values of key specified
+ * into hostnames
+ */
+static int
+glusterd_op_volume_dict_uuid_to_hostname(dict_t *dict, const char *key_fmt,
+ int idx_min, int idx_max)
{
- int32_t op = 0;
- struct timeval timeout = {0, };
- glusterd_conf_t *priv = NULL;
- int32_t ret = -1;
- dict_t *rb_ctx = NULL;
+ int ret = -1;
+ int i = 0;
+ char key[128];
+ int keylen;
+ char *uuid_str = NULL;
+ uuid_t uuid = {
+ 0,
+ };
+ char *hostname = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(dict);
+ GF_ASSERT(key_fmt);
+
+ for (i = idx_min; i < idx_max; i++) {
+ keylen = snprintf(key, sizeof(key), key_fmt, i);
+ ret = dict_get_strn(dict, key, keylen, &uuid_str);
+ if (ret) {
+ ret = 0;
+ continue;
+ }
- GF_ASSERT (dict);
- priv = THIS->private;
+ gf_msg_debug(this->name, 0, "Got uuid %s", uuid_str);
- ret = dict_get_int32 (dict, "operation", &op);
+ ret = gf_uuid_parse(uuid_str, uuid);
+ /* if parsing fails don't error out
+ * let the original value be retained
+ */
if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "dict_get on operation failed");
+ ret = 0;
+ continue;
+ }
+
+ hostname = glusterd_uuid_to_hostname(uuid);
+ if (hostname) {
+ gf_msg_debug(this->name, 0, "%s -> %s", uuid_str, hostname);
+ ret = dict_set_dynstrn(dict, key, keylen, hostname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Error setting hostname %s to dict", hostname);
+ GF_FREE(hostname);
goto out;
+ }
}
+ }
- if (op != GF_REPLACE_OP_START) {
- ret = glusterd_op_sm_inject_all_acc ();
- goto out;
- }
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
- timeout.tv_sec = 5;
- timeout.tv_usec = 0;
+static int
+reassign_defrag_status(dict_t *dict, char *key, int keylen,
+ gf_defrag_status_t *status)
+{
+ int ret = 0;
+ if (!*status)
+ return ret;
- rb_ctx = dict_copy (dict, rb_ctx);
- if (!rb_ctx) {
- gf_log (THIS->name, GF_LOG_ERROR, "Couldn't copy "
- "replace brick context. Can't start replace brick");
- ret = -1;
- goto out;
- }
- priv->timer = gf_timer_call_after (THIS->ctx, timeout,
- glusterd_do_replace_brick,
- (void *) rb_ctx);
+ switch (*status) {
+ case GF_DEFRAG_STATUS_STARTED:
+ *status = GF_DEFRAG_STATUS_LAYOUT_FIX_STARTED;
+ break;
- ret = 0;
+ case GF_DEFRAG_STATUS_STOPPED:
+ *status = GF_DEFRAG_STATUS_LAYOUT_FIX_STOPPED;
+ break;
-out:
- return ret;
+ case GF_DEFRAG_STATUS_COMPLETE:
+ *status = GF_DEFRAG_STATUS_LAYOUT_FIX_COMPLETE;
+ break;
+
+ case GF_DEFRAG_STATUS_FAILED:
+ *status = GF_DEFRAG_STATUS_LAYOUT_FIX_FAILED;
+ break;
+ default:
+ break;
+ }
+
+ ret = dict_set_int32n(dict, key, keylen, *status);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to reset defrag %s in dict", key);
+
+ return ret;
}
-/* This function takes a dict and converts the uuid values of key specified
- * into hostnames
+/* Check and reassign the defrag_status enum got from the rebalance process
+ * of all peers so that the rebalance-status CLI command can display if a
+ * full-rebalance or just a fix-layout was carried out.
*/
static int
-glusterd_op_volume_dict_uuid_to_hostname (dict_t *dict, const char *key_fmt,
- int idx_min, int idx_max)
+glusterd_op_check_peer_defrag_status(dict_t *dict, int count)
{
- int ret = -1;
- int i = 0;
- char key[1024];
- char *uuid_str = NULL;
- uuid_t uuid = {0,};
- char *hostname = NULL;
-
- GF_ASSERT (dict);
- GF_ASSERT (key_fmt);
-
- for (i = idx_min; i < idx_max; i++) {
- memset (key, 0, sizeof (key));
- snprintf (key, sizeof (key), key_fmt, i);
- ret = dict_get_str (dict, key, &uuid_str);
- if (ret)
- continue;
+ glusterd_volinfo_t *volinfo = NULL;
+ gf_defrag_status_t status = GF_DEFRAG_STATUS_NOT_STARTED;
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+ char *volname = NULL;
+ int ret = -1;
+ int i = 1;
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND,
+ FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ if (volinfo->rebal.defrag_cmd != GF_DEFRAG_CMD_START_LAYOUT_FIX) {
+ /* Fix layout was not issued; we don't need to reassign
+ the status */
+ ret = 0;
+ goto out;
+ }
- gf_log (THIS->name, GF_LOG_DEBUG, "Got uuid %s",
- uuid_str);
+ do {
+ keylen = snprintf(key, sizeof(key), "status-%d", i);
+ ret = dict_get_int32n(dict, key, keylen, (int32_t *)&status);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to get defrag %s", key);
+ goto out;
+ }
+ ret = reassign_defrag_status(dict, key, keylen, &status);
+ if (ret)
+ goto out;
+ i++;
+ } while (i <= count);
- ret = uuid_parse (uuid_str, uuid);
- /* if parsing fails don't error out
- * let the original value be retained
- */
- if (ret)
- continue;
+ ret = 0;
+out:
+ return ret;
+}
- hostname = glusterd_uuid_to_hostname (uuid);
- if (hostname) {
- gf_log (THIS->name, GF_LOG_DEBUG, "%s -> %s",
- uuid_str, hostname);
- ret = dict_set_dynstr (dict, key, hostname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Error setting hostname to dict");
- GF_FREE (hostname);
- goto out;
- }
- }
- }
+/* This function is used to verify if op_ctx indeed
+ requires modification. This is necessary since the
+ dictionary for certain commands might not have the
+ necessary keys required for the op_ctx modification
+ to succeed.
+
+ Special Cases:
+ - volume status all
+ - volume status
+
+ Regular Cases:
+ - volume status <volname> <brick>
+ - volume status <volname> mem
+ - volume status <volname> clients
+ - volume status <volname> inode
+ - volume status <volname> fd
+ - volume status <volname> callpool
+ - volume status <volname> tasks
+*/
+static gf_boolean_t
+glusterd_is_volume_status_modify_op_ctx(uint32_t cmd)
+{
+ if ((cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE) {
+ if (cmd & GF_CLI_STATUS_BRICK)
+ return _gf_false;
+ if (cmd & GF_CLI_STATUS_ALL)
+ return _gf_false;
+ return _gf_true;
+ }
+ return _gf_false;
+}
+
+int
+glusterd_op_modify_port_key(dict_t *op_ctx, int brick_index_max)
+{
+ char *port = NULL;
+ int i = 0;
+ int ret = -1;
+ char key[64] = {0};
+ int keylen;
+ char old_key[64] = {0};
+ int old_keylen;
+
+ for (i = 0; i <= brick_index_max; i++) {
+ keylen = snprintf(key, sizeof(key), "brick%d.rdma_port", i);
+ ret = dict_get_strn(op_ctx, key, keylen, &port);
+
+ if (ret) {
+ old_keylen = snprintf(old_key, sizeof(old_key), "brick%d.port", i);
+ ret = dict_get_strn(op_ctx, old_key, old_keylen, &port);
+ if (ret)
+ goto out;
+
+ ret = dict_set_strn(op_ctx, key, keylen, port);
+ if (ret)
+ goto out;
+ ret = dict_set_nstrn(op_ctx, old_key, old_keylen, "\0", SLEN("\0"));
+ if (ret)
+ goto out;
+ }
+ }
out:
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
}
/* This function is used to modify the op_ctx dict before sending it back
@@ -2177,2651 +4834,3331 @@ out:
* hostnames etc.
*/
void
-glusterd_op_modify_op_ctx (glusterd_op_t op)
+glusterd_op_modify_op_ctx(glusterd_op_t op, void *ctx)
{
- int ret = -1;
- dict_t *op_ctx = NULL;
- int brick_index_max = -1;
- int other_count = 0;
- int count = 0;
- uint32_t cmd = GF_CLI_STATUS_NONE;
-
+ int ret = -1;
+ dict_t *op_ctx = NULL;
+ int brick_index_max = -1;
+ int other_count = 0;
+ int count = 0;
+ uint32_t cmd = GF_CLI_STATUS_NONE;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *port = 0;
+ int i = 0;
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+
+ if (ctx)
+ op_ctx = ctx;
+ else
op_ctx = glusterd_op_get_ctx();
- if (!op_ctx) {
- gf_log (THIS->name, GF_LOG_CRITICAL,
- "Operation context is not present.");
- goto out;
- }
- switch (op) {
+ if (!op_ctx) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_OPCTX_NULL,
+ "Operation context is not present.");
+ goto out;
+ }
+
+ switch (op) {
case GD_OP_STATUS_VOLUME:
- ret = dict_get_uint32 (op_ctx, "cmd", &cmd);
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "Failed to get status cmd");
- goto out;
- }
- if (!(cmd & GF_CLI_STATUS_NFS || cmd & GF_CLI_STATUS_SHD ||
- (cmd & GF_CLI_STATUS_MASK) == GF_CLI_STATUS_NONE)) {
- gf_log (THIS->name, GF_LOG_INFO,
- "op_ctx modification not required for status "
- "operation being performed");
- goto out;
- }
+ ret = dict_get_uint32(op_ctx, "cmd", &cmd);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to get status cmd");
+ goto out;
+ }
- ret = dict_get_int32 (op_ctx, "brick-index-max",
- &brick_index_max);
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "Failed to get brick-index-max");
- goto out;
- }
+ if (!glusterd_is_volume_status_modify_op_ctx(cmd)) {
+ gf_msg_debug(this->name, 0,
+ "op_ctx modification not required for status "
+ "operation being performed");
+ goto out;
+ }
+
+ ret = dict_get_int32n(op_ctx, "brick-index-max",
+ SLEN("brick-index-max"), &brick_index_max);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to get brick-index-max");
+ goto out;
+ }
+
+ ret = dict_get_int32n(op_ctx, "other-count", SLEN("other-count"),
+ &other_count);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to get other-count");
+ goto out;
+ }
+
+ count = brick_index_max + other_count + 1;
- ret = dict_get_int32 (op_ctx, "other-count", &other_count);
+ /*
+ * a glusterd lesser than version 3.7 will be sending the
+ * rdma port in older key. Changing that value from here
+ * to support backward compatibility
+ */
+ ret = dict_get_strn(op_ctx, "volname", SLEN("volname"), &volname);
+ if (ret)
+ goto out;
+
+ for (i = 0; i <= brick_index_max; i++) {
+ keylen = snprintf(key, sizeof(key), "brick%d.rdma_port", i);
+ ret = dict_get_strn(op_ctx, key, keylen, &port);
if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "Failed to get other-count");
+ ret = dict_set_nstrn(op_ctx, key, keylen, "\0", SLEN("\0"));
+ if (ret)
goto out;
}
-
- count = brick_index_max + other_count + 1;
-
- ret = glusterd_op_volume_dict_uuid_to_hostname (op_ctx,
- "brick%d.path",
- 0, count);
+ }
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret)
+ goto out;
+ if (conf->op_version < GD_OP_VERSION_3_7_0 &&
+ volinfo->transport_type == GF_TRANSPORT_RDMA) {
+ ret = glusterd_op_modify_port_key(op_ctx, brick_index_max);
if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "Failed uuid to hostname conversion");
+ goto out;
+ }
+ /* add 'brick%d.peerid' into op_ctx with value of 'brick%d.path'.
+ nfs/sshd like services have this additional uuid */
+ {
+ char *uuid_str = NULL;
+ char *uuid = NULL;
+ int i;
+
+ for (i = brick_index_max + 1; i < count; i++) {
+ keylen = snprintf(key, sizeof(key), "brick%d.path", i);
+ ret = dict_get_strn(op_ctx, key, keylen, &uuid_str);
+ if (!ret) {
+ keylen = snprintf(key, sizeof(key), "brick%d.peerid",
+ i);
+ uuid = gf_strdup(uuid_str);
+ if (!uuid) {
+ gf_msg_debug(this->name, 0,
+ "unable to create dup of"
+ " uuid_str");
+ continue;
+ }
+ ret = dict_set_dynstrn(op_ctx, key, keylen, uuid);
+ if (ret != 0) {
+ GF_FREE(uuid);
+ }
+ }
+ }
+ }
- break;
+ ret = glusterd_op_volume_dict_uuid_to_hostname(
+ op_ctx, "brick%d.path", 0, count);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_CONVERSION_FAILED,
+ "Failed uuid to hostname conversion");
+
+ break;
case GD_OP_PROFILE_VOLUME:
- ret = dict_get_str_boolean (op_ctx, "nfs", _gf_false);
- if (!ret)
- goto out;
+ ret = dict_get_str_boolean(op_ctx, "nfs", _gf_false);
+ if (!ret)
+ goto out;
- ret = dict_get_int32 (op_ctx, "count", &count);
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "Failed to get brick count");
- goto out;
- }
+ ret = dict_get_int32n(op_ctx, "count", SLEN("count"), &count);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to get brick count");
+ goto out;
+ }
- ret = glusterd_op_volume_dict_uuid_to_hostname (op_ctx,
- "%d-brick",
- 1, (count + 1));
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "Failed uuid to hostname conversion");
+ ret = glusterd_op_volume_dict_uuid_to_hostname(op_ctx, "%d-brick",
+ 1, (count + 1));
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_CONVERSION_FAILED,
+ "Failed uuid to hostname conversion");
- break;
+ break;
/* For both rebalance and remove-brick status, the glusterd op is the
* same
*/
case GD_OP_DEFRAG_BRICK_VOLUME:
- ret = dict_get_int32 (op_ctx, "count", &count);
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "Failed to get count");
- goto out;
+ case GD_OP_SCRUB_STATUS:
+ case GD_OP_SCRUB_ONDEMAND:
+ ret = dict_get_int32n(op_ctx, "count", SLEN("count"), &count);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to get count");
+ goto out;
+ }
+
+ /* add 'node-name-%d' into op_ctx with value uuid_str.
+ this will be used to convert to hostname later */
+ {
+ char *uuid_str = NULL;
+ char *uuid = NULL;
+ int i;
+
+ for (i = 1; i <= count; i++) {
+ keylen = snprintf(key, sizeof(key), "node-uuid-%d", i);
+ ret = dict_get_strn(op_ctx, key, keylen, &uuid_str);
+ if (!ret) {
+ keylen = snprintf(key, sizeof(key), "node-name-%d", i);
+ uuid = gf_strdup(uuid_str);
+ if (!uuid) {
+ gf_msg_debug(this->name, 0,
+ "unable to create dup of"
+ " uuid_str");
+ continue;
+ }
+ ret = dict_set_dynstrn(op_ctx, key, keylen, uuid);
+ if (ret != 0) {
+ GF_FREE(uuid);
+ }
+ }
}
-
- ret = glusterd_op_volume_dict_uuid_to_hostname (op_ctx,
- "node-uuid-%d",
- 1, (count + 1));
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "Failed uuid to hostname conversion");
+ }
+
+ ret = glusterd_op_volume_dict_uuid_to_hostname(
+ op_ctx, "node-name-%d", 1, (count + 1));
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_CONVERSION_FAILED,
+ "Failed uuid to hostname conversion");
+
+ /* Since Both rebalance and bitrot scrub status/ondemand
+ * are going to use same code path till here, we should
+ * break in case of scrub status.
+ */
+ if (op == GD_OP_SCRUB_STATUS || op == GD_OP_SCRUB_ONDEMAND) {
break;
+ }
- default:
- ret = 0;
- gf_log (THIS->name, GF_LOG_INFO,
- "op_ctx modification not required");
- break;
+ ret = glusterd_op_check_peer_defrag_status(op_ctx, count);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DEFRAG_STATUS_UPDATE_FAIL,
+ "Failed to reset defrag status for fix-layout");
+ break;
- }
+ default:
+ ret = 0;
+ gf_msg_debug(this->name, 0, "op_ctx modification not required");
+ break;
+ }
out:
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING,
- "op_ctx modification failed");
- return;
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_OPCTX_UPDATE_FAIL,
+ "op_ctx modification failed");
+ return;
}
-static int
-glusterd_op_commit_hook (glusterd_op_t op, dict_t *op_ctx, glusterd_commit_hook_type_t type)
+int
+glusterd_op_commit_hook(glusterd_op_t op, dict_t *op_ctx,
+ glusterd_commit_hook_type_t type)
{
- glusterd_conf_t *priv = NULL;
- char hookdir[PATH_MAX] = {0, };
- char scriptdir[PATH_MAX] = {0, };
- char type_subdir[256] = {0, };
- char *cmd_subdir = NULL;
- int ret = -1;
-
- priv = THIS->private;
- switch (type) {
- case GD_COMMIT_HOOK_NONE:
- case GD_COMMIT_HOOK_MAX:
- /*Won't be called*/
- break;
-
- case GD_COMMIT_HOOK_PRE:
- strcpy (type_subdir, "pre");
- break;
- case GD_COMMIT_HOOK_POST:
- strcpy (type_subdir, "post");
- break;
- }
-
- cmd_subdir = glusterd_hooks_get_hooks_cmd_subdir (op);
- if (strlen (cmd_subdir) == 0)
- return -1;
-
- GLUSTERD_GET_HOOKS_DIR (hookdir, GLUSTERD_HOOK_VER, priv);
- snprintf (scriptdir, sizeof (scriptdir), "%s/%s/%s",
- hookdir, cmd_subdir, type_subdir);
-
- switch (type) {
- case GD_COMMIT_HOOK_NONE:
- case GD_COMMIT_HOOK_MAX:
- /*Won't be called*/
- break;
-
- case GD_COMMIT_HOOK_PRE:
- ret = glusterd_hooks_run_hooks (scriptdir, op, op_ctx,
- type);
- break;
- case GD_COMMIT_HOOK_POST:
- ret = glusterd_hooks_post_stub_enqueue (scriptdir, op,
- op_ctx);
- break;
- }
-
- return ret;
+ glusterd_conf_t *priv = NULL;
+ char hookdir[PATH_MAX] = {
+ 0,
+ };
+ char scriptdir[PATH_MAX] = {
+ 0,
+ };
+ char *type_subdir = "";
+ char *cmd_subdir = NULL;
+ int ret = -1;
+ int32_t len = 0;
+
+ priv = THIS->private;
+ switch (type) {
+ case GD_COMMIT_HOOK_NONE:
+ case GD_COMMIT_HOOK_MAX:
+ /*Won't be called*/
+ break;
+
+ case GD_COMMIT_HOOK_PRE:
+ type_subdir = "pre";
+ break;
+ case GD_COMMIT_HOOK_POST:
+ type_subdir = "post";
+ break;
+ }
+
+ cmd_subdir = glusterd_hooks_get_hooks_cmd_subdir(op);
+ if (strlen(cmd_subdir) == 0)
+ return -1;
+
+ GLUSTERD_GET_HOOKS_DIR(hookdir, GLUSTERD_HOOK_VER, priv);
+ len = snprintf(scriptdir, sizeof(scriptdir), "%s/%s/%s", hookdir,
+ cmd_subdir, type_subdir);
+ if ((len < 0) || (len >= sizeof(scriptdir))) {
+ return -1;
+ }
+
+ switch (type) {
+ case GD_COMMIT_HOOK_NONE:
+ case GD_COMMIT_HOOK_MAX:
+ /*Won't be called*/
+ break;
+
+ case GD_COMMIT_HOOK_PRE:
+ ret = glusterd_hooks_run_hooks(scriptdir, op, op_ctx, type);
+ break;
+ case GD_COMMIT_HOOK_POST:
+ ret = glusterd_hooks_post_stub_enqueue(scriptdir, op, op_ctx);
+ break;
+ }
+
+ return ret;
}
static int
-glusterd_op_ac_send_commit_op (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_send_commit_op(glusterd_op_sm_event_t *event, void *ctx)
{
- int ret = 0;
- rpc_clnt_procedure_t *proc = NULL;
- glusterd_conf_t *priv = NULL;
- xlator_t *this = NULL;
- dict_t *dict = NULL;
- dict_t *op_dict = NULL;
- glusterd_peerinfo_t *peerinfo = NULL;
- char *op_errstr = NULL;
- glusterd_op_t op = GD_OP_NONE;
- uint32_t pending_count = 0;
-
- this = THIS;
- GF_ASSERT (this);
- priv = this->private;
- GF_ASSERT (priv);
-
- op = glusterd_op_get_op ();
- op_dict = glusterd_op_get_ctx ();
-
- ret = glusterd_op_build_payload (&dict, &op_errstr);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Building payload failed");
- opinfo.op_errstr = op_errstr;
- goto out;
- }
-
- glusterd_op_commit_hook (op, op_dict, GD_COMMIT_HOOK_PRE);
- ret = glusterd_op_commit_perform (op, dict, &op_errstr, NULL); //rsp_dict invalid for source
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Commit failed");
- opinfo.op_errstr = op_errstr;
- goto out;
- }
+ int ret = 0;
+ int ret1 = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ dict_t *dict = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ char *op_errstr = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+ uint32_t pending_count = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ op = glusterd_op_get_op();
+
+ ret = glusterd_op_build_payload(&dict, &op_errstr, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL,
+ LOGSTR_BUILD_PAYLOAD, gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+ ret = glusterd_op_commit_perform(op, dict, &op_errstr,
+ NULL); // rsp_dict invalid for source
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ LOGSTR_COMMIT_FAIL, gd_op_list[op], "localhost",
+ (op_errstr) ? ":" : " ", (op_errstr) ? op_errstr : " ");
+ if (op_errstr == NULL)
+ gf_asprintf(&op_errstr, OPERRSTR_COMMIT_FAIL, "localhost");
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
+ {
+ /* Only send requests to peers who were available before the
+ * transaction started
+ */
+ if (peerinfo->generation > opinfo.txn_generation)
+ continue;
- glusterd_op_commit_hook (op, op_dict, GD_COMMIT_HOOK_POST);
+ if (!peerinfo->connected || !peerinfo->mgmt)
+ continue;
+ if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) &&
+ (glusterd_op_get_op() != GD_OP_SYNC_VOLUME))
+ continue;
- list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
- GF_ASSERT (peerinfo);
+ proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_COMMIT_OP];
+ GF_ASSERT(proc);
+ if (proc->fn) {
+ ret = dict_set_static_ptr(dict, "peerinfo", peerinfo);
+ if (ret) {
+ RCU_READ_UNLOCK;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set peerinfo");
+ goto out;
+ }
+ ret = proc->fn(NULL, this, dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_COMMIT_REQ_SEND_FAIL,
+ "Failed to "
+ "send commit request for operation "
+ "'Volume %s' to peer %s",
+ gd_op_list[op], peerinfo->hostname);
+ continue;
+ }
+ pending_count++;
+ }
+ }
+ RCU_READ_UNLOCK;
+
+ opinfo.pending_count = pending_count;
+ gf_msg_debug(this->name, 0,
+ "Sent commit op req for 'Volume %s' "
+ "to %d peers",
+ gd_op_list[op], opinfo.pending_count);
+out:
+ if (dict)
+ dict_unref(dict);
- if (!peerinfo->connected || !peerinfo->mgmt)
- continue;
- if ((peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED) &&
- (glusterd_op_get_op() != GD_OP_SYNC_VOLUME))
- continue;
+ if (ret)
+ opinfo.op_ret = ret;
- proc = &peerinfo->mgmt->proctable[GLUSTERD_MGMT_COMMIT_OP];
- GF_ASSERT (proc);
- if (proc->fn) {
- ret = dict_set_static_ptr (dict, "peerinfo", peerinfo);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to set peerinfo");
- goto out;
- }
- ret = proc->fn (NULL, this, dict);
- if (ret)
- continue;
- pending_count++;
- }
- }
-
- opinfo.pending_count = pending_count;
- gf_log (THIS->name, GF_LOG_INFO, "Sent op req to %d peers",
- opinfo.pending_count);
-out:
- if (dict)
- dict_unref (dict);
- if (ret) {
- glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL);
- opinfo.op_ret = ret;
- }
+ ret1 = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+ if (ret1)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
- if (!opinfo.pending_count) {
- if (op == GD_OP_REPLACE_BRICK) {
- ret = glusterd_op_start_rb_timer (op_dict);
+ if (ret) {
+ glusterd_op_sm_inject_event(GD_OP_EVENT_RCVD_RJT, &event->txn_id, NULL);
+ opinfo.op_ret = ret;
+ }
- } else {
- glusterd_op_modify_op_ctx (op);
- ret = glusterd_op_sm_inject_all_acc ();
- }
- goto err;
+ if (!opinfo.pending_count) {
+ if (op == GD_OP_REPLACE_BRICK) {
+ ret = glusterd_op_sm_inject_all_acc(&event->txn_id);
+ } else {
+ glusterd_op_modify_op_ctx(op, NULL);
+ ret = glusterd_op_sm_inject_all_acc(&event->txn_id);
}
+ goto err;
+ }
err:
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret);
-
- return ret;
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
+ return ret;
}
static int
-glusterd_op_ac_rcvd_stage_op_acc (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_rcvd_stage_op_acc(glusterd_op_sm_event_t *event, void *ctx)
{
- int ret = 0;
+ int ret = 0;
- GF_ASSERT (event);
+ GF_ASSERT(event);
- if (opinfo.pending_count > 0)
- opinfo.pending_count--;
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
- if (opinfo.pending_count > 0)
- goto out;
+ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_STAGE_ACC, NULL);
+ if (opinfo.pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_STAGE_ACC, &event->txn_id,
+ NULL);
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
- return ret;
+ return ret;
}
static int
-glusterd_op_ac_stage_op_failed (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_stage_op_failed(glusterd_op_sm_event_t *event, void *ctx)
{
- int ret = 0;
+ int ret = 0;
- GF_ASSERT (event);
+ GF_ASSERT(event);
- if (opinfo.pending_count > 0)
- opinfo.pending_count--;
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
- if (opinfo.pending_count > 0)
- goto out;
+ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, NULL);
+ if (opinfo.pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACK, &event->txn_id,
+ NULL);
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
- return ret;
+ return ret;
}
static int
-glusterd_op_ac_commit_op_failed (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_commit_op_failed(glusterd_op_sm_event_t *event, void *ctx)
{
- int ret = 0;
+ int ret = 0;
- GF_ASSERT (event);
+ GF_ASSERT(event);
- if (opinfo.pending_count > 0)
- opinfo.pending_count--;
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
- if (opinfo.pending_count > 0)
- goto out;
+ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, NULL);
+ if (opinfo.pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACK, &event->txn_id,
+ NULL);
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
- return ret;
+ return ret;
}
static int
-glusterd_op_ac_brick_op_failed (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_brick_op_failed(glusterd_op_sm_event_t *event, void *ctx)
{
- int ret = 0;
- glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL;
- gf_boolean_t free_errstr = _gf_false;
-
- GF_ASSERT (event);
- GF_ASSERT (ctx);
- ev_ctx = ctx;
-
- ret = glusterd_remove_pending_entry (&opinfo.pending_bricks, ev_ctx->pending_node->node);
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "unknown response received ");
- ret = -1;
- free_errstr = _gf_true;
- goto out;
- }
- if (opinfo.brick_pending_count > 0)
- opinfo.brick_pending_count--;
- if (opinfo.op_ret == 0)
- opinfo.op_ret = ev_ctx->op_ret;
-
- if (opinfo.op_errstr == NULL)
- opinfo.op_errstr = ev_ctx->op_errstr;
- else
- free_errstr = _gf_true;
-
- if (opinfo.brick_pending_count > 0)
- goto out;
-
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, ev_ctx->commit_ctx);
+ int ret = 0;
+ glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL;
+ gf_boolean_t free_errstr = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(event);
+ GF_ASSERT(ctx);
+ ev_ctx = ctx;
+
+ ret = glusterd_remove_pending_entry(&opinfo.pending_bricks,
+ ev_ctx->pending_node->node);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNKNOWN_RESPONSE,
+ "unknown response received ");
+ ret = -1;
+ free_errstr = _gf_true;
+ goto out;
+ }
+ if (opinfo.brick_pending_count > 0)
+ opinfo.brick_pending_count--;
+ if (opinfo.op_ret == 0)
+ opinfo.op_ret = ev_ctx->op_ret;
+
+ if (opinfo.op_errstr == NULL)
+ opinfo.op_errstr = ev_ctx->op_errstr;
+ else
+ free_errstr = _gf_true;
+
+ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ if (opinfo.brick_pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACK, &event->txn_id,
+ ev_ctx->commit_ctx);
out:
- if (ev_ctx->rsp_dict)
- dict_unref (ev_ctx->rsp_dict);
- if (free_errstr && ev_ctx->op_errstr)
- GF_FREE (ev_ctx->op_errstr);
- GF_FREE (ctx);
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
-
- return ret;
+ if (ev_ctx->rsp_dict)
+ dict_unref(ev_ctx->rsp_dict);
+ if (free_errstr && ev_ctx->op_errstr)
+ GF_FREE(ev_ctx->op_errstr);
+ GF_FREE(ctx);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+
+ return ret;
}
static int
-glusterd_op_ac_rcvd_commit_op_acc (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_rcvd_commit_op_acc(glusterd_op_sm_event_t *event, void *ctx)
{
- dict_t *op_ctx = NULL;
- int ret = 0;
- gf_boolean_t commit_ack_inject = _gf_true;
- glusterd_op_t op = GD_OP_NONE;
-
- op = glusterd_op_get_op ();
- GF_ASSERT (event);
-
- if (opinfo.pending_count > 0)
- opinfo.pending_count--;
-
- if (opinfo.pending_count > 0)
- goto out;
-
- if (op == GD_OP_REPLACE_BRICK) {
- op_ctx = glusterd_op_get_ctx ();
- if (!op_ctx) {
- gf_log (THIS->name, GF_LOG_CRITICAL, "Operation "
- "context is not present.");
- ret = -1;
- goto out;
- }
-
- ret = glusterd_op_start_rb_timer (op_ctx);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Couldn't start "
- "replace-brick operation.");
- goto out;
- }
-
- commit_ack_inject = _gf_false;
- goto out;
+ int ret = 0;
+ gf_boolean_t commit_ack_inject = _gf_true;
+ glusterd_op_t op = GD_OP_NONE;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ op = glusterd_op_get_op();
+ GF_ASSERT(event);
+
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
+
+ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ if (opinfo.pending_count > 0)
+ goto out;
+
+ if (op == GD_OP_REPLACE_BRICK) {
+ ret = glusterd_op_sm_inject_all_acc(&event->txn_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RBOP_START_FAIL,
+ "Couldn't start "
+ "replace-brick operation.");
+ goto out;
}
+ commit_ack_inject = _gf_false;
+ goto out;
+ }
out:
- if (commit_ack_inject) {
- if (ret)
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL);
- else if (!opinfo.pending_count) {
- glusterd_op_modify_op_ctx (op);
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, NULL);
- }
- /*else do nothing*/
+ if (commit_ack_inject) {
+ if (ret)
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_RCVD_RJT,
+ &event->txn_id, NULL);
+ else if (!opinfo.pending_count) {
+ glusterd_op_modify_op_ctx(op, NULL);
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_COMMIT_ACC,
+ &event->txn_id, NULL);
}
+ /*else do nothing*/
+ }
- return ret;
+ return ret;
}
static int
-glusterd_op_ac_rcvd_unlock_acc (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_rcvd_unlock_acc(glusterd_op_sm_event_t *event, void *ctx)
{
- int ret = 0;
+ int ret = 0;
- GF_ASSERT (event);
+ GF_ASSERT(event);
- if (opinfo.pending_count > 0)
- opinfo.pending_count--;
+ if (opinfo.pending_count > 0)
+ opinfo.pending_count--;
- if (opinfo.pending_count > 0)
- goto out;
+ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ if (opinfo.pending_count > 0)
+ goto out;
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACC, NULL);
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACC, &event->txn_id,
+ NULL);
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
out:
- return ret;
+ return ret;
}
int32_t
-glusterd_op_clear_errstr() {
- opinfo.op_errstr = NULL;
- return 0;
+glusterd_op_clear_errstr()
+{
+ opinfo.op_errstr = NULL;
+ return 0;
}
int32_t
-glusterd_op_set_ctx (void *ctx)
+glusterd_op_set_ctx(void *ctx)
{
+ opinfo.op_ctx = ctx;
- opinfo.op_ctx = ctx;
-
- return 0;
-
+ return 0;
}
int32_t
-glusterd_op_reset_ctx ()
+glusterd_op_reset_ctx()
{
+ glusterd_op_set_ctx(NULL);
- glusterd_op_set_ctx (NULL);
-
- return 0;
+ return 0;
}
int32_t
-glusterd_op_txn_complete ()
+glusterd_op_txn_complete(uuid_t *txn_id)
{
- int32_t ret = -1;
- glusterd_conf_t *priv = NULL;
- int32_t op = -1;
- int32_t op_ret = 0;
- int32_t op_errno = 0;
- rpcsvc_request_t *req = NULL;
- void *ctx = NULL;
- char *op_errstr = NULL;
-
-
- priv = THIS->private;
- GF_ASSERT (priv);
-
- op = glusterd_op_get_op ();
- ctx = glusterd_op_get_ctx ();
- op_ret = opinfo.op_ret;
- op_errno = opinfo.op_errno;
- req = opinfo.req;
- if (opinfo.op_errstr)
- op_errstr = opinfo.op_errstr;
-
- opinfo.op_ret = 0;
- opinfo.op_errno = 0;
- glusterd_op_clear_op ();
- glusterd_op_reset_ctx ();
- glusterd_op_clear_errstr ();
-
- ret = glusterd_unlock (MY_UUID);
-
- /* unlock cant/shouldnt fail here!! */
- if (ret) {
- gf_log ("glusterd", GF_LOG_CRITICAL,
- "Unable to clear local lock, ret: %d", ret);
- } else {
- gf_log ("glusterd", GF_LOG_INFO, "Cleared local lock");
- }
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ int32_t op = -1;
+ int32_t op_ret = 0;
+ int32_t op_errno = 0;
+ rpcsvc_request_t *req = NULL;
+ void *ctx = NULL;
+ char *op_errstr = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ op = glusterd_op_get_op();
+ ctx = glusterd_op_get_ctx();
+ op_ret = opinfo.op_ret;
+ op_errno = opinfo.op_errno;
+ req = opinfo.req;
+ if (opinfo.op_errstr)
+ op_errstr = opinfo.op_errstr;
+
+ opinfo.op_ret = 0;
+ opinfo.op_errno = 0;
+ glusterd_op_clear_op();
+ glusterd_op_reset_ctx();
+ glusterd_op_clear_errstr();
+
+ /* Based on the op-version, we release the cluster or mgmt_v3 lock */
+ if (priv->op_version < GD_OP_VERSION_3_6_0) {
+ ret = glusterd_unlock(MY_UUID);
+ /* unlock can't/shouldn't fail here!! */
+ if (ret)
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_GLUSTERD_UNLOCK_FAIL,
+ "Unable to clear local lock, ret: %d", ret);
+ else
+ gf_msg_debug(this->name, 0, "Cleared local lock");
+ } else {
+ ret = dict_get_strn(ctx, "volname", SLEN("volname"), &volname);
+ if (ret)
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED,
+ "No Volume name present. "
+ "Locks have not been held.");
- ret = glusterd_op_send_cli_response (op, op_ret,
- op_errno, req, ctx, op_errstr);
+ if (volname) {
+ ret = glusterd_mgmt_v3_unlock(volname, MY_UUID, "vol");
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Unable to release lock for %s", volname);
+ }
+ }
+
+ ret = glusterd_op_send_cli_response(op, op_ret, op_errno, req, ctx,
+ op_errstr);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_CLI_RESP,
+ "Responding to cli failed, "
+ "ret: %d",
+ ret);
+ // Ignore this error, else state machine blocks
+ ret = 0;
+ }
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Responding to cli failed, ret: %d",
- ret);
- //Ignore this error, else state machine blocks
- ret = 0;
- }
+ if (op_errstr && (strcmp(op_errstr, "")))
+ GF_FREE(op_errstr);
- glusterd_op_free_ctx (op, ctx);
- if (op_errstr && (strcmp (op_errstr, "")))
- GF_FREE (op_errstr);
+ if (priv->pending_quorum_action)
+ glusterd_do_quorum_action();
+ /* Clearing the transaction opinfo */
+ ret = glusterd_clear_txn_opinfo(txn_id);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_CLEAR_FAIL,
+ "Unable to clear transaction's opinfo");
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
static int
-glusterd_op_ac_unlocked_all (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_unlocked_all(glusterd_op_sm_event_t *event, void *ctx)
{
- int ret = 0;
+ int ret = 0;
- GF_ASSERT (event);
+ GF_ASSERT(event);
- ret = glusterd_op_txn_complete ();
+ ret = glusterd_op_txn_complete(&event->txn_id);
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
- return ret;
+ return ret;
}
static int
-glusterd_op_ac_stage_op (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_stage_op(glusterd_op_sm_event_t *event, void *ctx)
{
- int ret = -1;
- glusterd_req_ctx_t *req_ctx = NULL;
- int32_t status = 0;
- dict_t *rsp_dict = NULL;
- char *op_errstr = NULL;
- dict_t *dict = NULL;
-
- GF_ASSERT (ctx);
+ int ret = -1;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ int32_t status = 0;
+ dict_t *rsp_dict = NULL;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_op_info_t txn_op_info = {
+ {0},
+ };
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GF_ASSERT(ctx);
+
+ req_ctx = ctx;
+
+ dict = req_ctx->dict;
+
+ rsp_dict = dict_new();
+ if (!rsp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ "Failed to get new dictionary");
+ return -1;
+ }
+
+ status = glusterd_op_stage_validate(req_ctx->op, dict, &op_errstr,
+ rsp_dict);
+
+ if (status) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VALIDATE_FAILED,
+ "Stage failed on operation"
+ " 'Volume %s', Status : %d",
+ gd_op_list[req_ctx->op], status);
+ }
+
+ txn_id = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+
+ if (txn_id)
+ gf_uuid_copy(*txn_id, event->txn_id);
+ else {
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_get_txn_opinfo(&event->txn_id, &txn_op_info);
+
+ ret = dict_set_bin(rsp_dict, "transaction_id", txn_id, sizeof(*txn_id));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set transaction id.");
+ GF_FREE(txn_id);
+ txn_id = NULL;
+ goto out;
+ }
+
+ ret = glusterd_op_stage_send_resp(req_ctx->req, req_ctx->op, status,
+ op_errstr, rsp_dict);
- req_ctx = ctx;
-
- dict = req_ctx->dict;
-
- rsp_dict = dict_new ();
- if (!rsp_dict) {
- gf_log ("", GF_LOG_DEBUG,
- "Out of memory");
- return -1;
- }
-
- status = glusterd_op_stage_validate (req_ctx->op, dict, &op_errstr,
- rsp_dict);
-
- if (status) {
- gf_log ("", GF_LOG_ERROR, "Validate failed: %d", status);
- }
-
- ret = glusterd_op_stage_send_resp (req_ctx->req, req_ctx->op,
- status, op_errstr, rsp_dict);
+out:
+ if (op_errstr && (strcmp(op_errstr, "")))
+ GF_FREE(op_errstr);
- if (op_errstr && (strcmp (op_errstr, "")))
- GF_FREE (op_errstr);
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ /* for no volname transactions, the txn_opinfo needs to be cleaned up
+ * as there's no unlock event triggered. However if the originator node of
+ * this transaction is still running with a version lower than 60000,
+ * txn_opinfo can't be cleared as that'll lead to a race of referring op_ctx
+ * after it's being freed.
+ */
+ if (txn_op_info.skip_locking && priv->op_version >= GD_OP_VERSION_6_0 &&
+ txn_id)
+ ret = glusterd_clear_txn_opinfo(txn_id);
- if (rsp_dict)
- dict_unref (rsp_dict);
+ if (rsp_dict)
+ dict_unref(rsp_dict);
- return ret;
+ return ret;
}
static gf_boolean_t
-glusterd_need_brick_op (glusterd_op_t op)
+glusterd_need_brick_op(glusterd_op_t op)
{
- gf_boolean_t ret = _gf_false;
+ gf_boolean_t ret = _gf_false;
- GF_ASSERT (GD_OP_NONE < op && op < GD_OP_MAX);
+ GF_ASSERT(GD_OP_NONE < op && op < GD_OP_MAX);
- switch (op) {
+ switch (op) {
case GD_OP_PROFILE_VOLUME:
case GD_OP_STATUS_VOLUME:
case GD_OP_DEFRAG_BRICK_VOLUME:
case GD_OP_HEAL_VOLUME:
- ret = _gf_true;
- break;
+ case GD_OP_SCRUB_STATUS:
+ case GD_OP_SCRUB_ONDEMAND:
+ ret = _gf_true;
+ break;
default:
- ret = _gf_false;
- }
+ ret = _gf_false;
+ }
- return ret;
+ return ret;
}
-static dict_t*
-glusterd_op_init_commit_rsp_dict (glusterd_op_t op)
+dict_t *
+glusterd_op_init_commit_rsp_dict(glusterd_op_t op)
{
- dict_t *rsp_dict = NULL;
- dict_t *op_ctx = NULL;
+ dict_t *rsp_dict = NULL;
+ dict_t *op_ctx = NULL;
- GF_ASSERT (GD_OP_NONE < op && op < GD_OP_MAX);
+ GF_ASSERT(GD_OP_NONE < op && op < GD_OP_MAX);
- if (glusterd_need_brick_op (op)) {
- op_ctx = glusterd_op_get_ctx ();
- GF_ASSERT (op_ctx);
- rsp_dict = dict_ref (op_ctx);
- } else {
- rsp_dict = dict_new ();
- }
+ if (glusterd_need_brick_op(op)) {
+ op_ctx = glusterd_op_get_ctx();
+ GF_ASSERT(op_ctx);
+ rsp_dict = dict_ref(op_ctx);
+ } else {
+ rsp_dict = dict_new();
+ }
- return rsp_dict;
+ return rsp_dict;
}
static int
-glusterd_op_ac_commit_op (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_commit_op(glusterd_op_sm_event_t *event, void *ctx)
{
- int ret = 0;
- glusterd_req_ctx_t *req_ctx = NULL;
- int32_t status = 0;
- char *op_errstr = NULL;
- dict_t *dict = NULL;
- dict_t *rsp_dict = NULL;
-
- GF_ASSERT (ctx);
-
- req_ctx = ctx;
-
- dict = req_ctx->dict;
-
- rsp_dict = glusterd_op_init_commit_rsp_dict (req_ctx->op);
- if (NULL == rsp_dict)
- return -1;
-
- glusterd_op_commit_hook (req_ctx->op, dict, GD_COMMIT_HOOK_PRE);
-
- if (GD_OP_CLEARLOCKS_VOLUME == req_ctx->op) {
- /*clear locks should be run only on
- * originator glusterd*/
- status = 0;
-
- } else {
- status = glusterd_op_commit_perform (req_ctx->op, dict,
- &op_errstr, rsp_dict);
- }
-
- if (status) {
- gf_log (THIS->name, GF_LOG_ERROR, "Commit failed: %d", status);
- } else {
- /* On successful commit */
- glusterd_op_commit_hook (req_ctx->op, dict,
- GD_COMMIT_HOOK_POST);
- }
-
- ret = glusterd_op_commit_send_resp (req_ctx->req, req_ctx->op,
- status, op_errstr, rsp_dict);
-
- glusterd_op_fini_ctx ();
- if (op_errstr && (strcmp (op_errstr, "")))
- GF_FREE (op_errstr);
+ int ret = 0;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ int32_t status = 0;
+ char *op_errstr = NULL;
+ dict_t *dict = NULL;
+ dict_t *rsp_dict = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_op_info_t txn_op_info = {
+ {0},
+ };
+ gf_boolean_t need_cleanup = _gf_true;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(ctx);
+
+ req_ctx = ctx;
+
+ dict = req_ctx->dict;
+
+ rsp_dict = glusterd_op_init_commit_rsp_dict(req_ctx->op);
+ if (NULL == rsp_dict)
+ return -1;
+
+ if (GD_OP_CLEARLOCKS_VOLUME == req_ctx->op) {
+ /*clear locks should be run only on
+ * originator glusterd*/
+ status = 0;
+
+ } else {
+ status = glusterd_op_commit_perform(req_ctx->op, dict, &op_errstr,
+ rsp_dict);
+ }
+
+ if (status)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Commit of operation "
+ "'Volume %s' failed: %d",
+ gd_op_list[req_ctx->op], status);
+
+ txn_id = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+
+ if (txn_id)
+ gf_uuid_copy(*txn_id, event->txn_id);
+ else {
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_get_txn_opinfo(&event->txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_GET_FAIL,
+ "Unable to get transaction opinfo "
+ "for transaction ID : %s",
+ uuid_utoa(event->txn_id));
+ goto out;
+ }
+
+ ret = dict_set_bin(rsp_dict, "transaction_id", txn_id, sizeof(*txn_id));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set transaction id.");
+ if (txn_op_info.skip_locking)
+ ret = glusterd_clear_txn_opinfo(txn_id);
+ need_cleanup = _gf_false;
+ GF_FREE(txn_id);
+ goto out;
+ }
+
+ ret = glusterd_op_commit_send_resp(req_ctx->req, req_ctx->op, status,
+ op_errstr, rsp_dict);
- if (rsp_dict)
- dict_unref (rsp_dict);
-
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret);
-
- return ret;
+out:
+ if (op_errstr && (strcmp(op_errstr, "")))
+ GF_FREE(op_errstr);
+
+ if (rsp_dict)
+ dict_unref(rsp_dict);
+ /* for no volname transactions, the txn_opinfo needs to be cleaned up
+ * as there's no unlock event triggered
+ */
+ if (need_cleanup && txn_id && txn_op_info.skip_locking)
+ ret = glusterd_clear_txn_opinfo(txn_id);
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
+
+ return ret;
}
static int
-glusterd_op_ac_send_commit_failed (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_send_commit_failed(glusterd_op_sm_event_t *event, void *ctx)
{
- int ret = 0;
- glusterd_req_ctx_t *req_ctx = NULL;
- dict_t *op_ctx = NULL;
+ int ret = 0;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ dict_t *op_ctx = NULL;
- GF_ASSERT (ctx);
+ GF_ASSERT(ctx);
- req_ctx = ctx;
+ req_ctx = ctx;
- op_ctx = glusterd_op_get_ctx ();
+ op_ctx = glusterd_op_get_ctx();
- ret = glusterd_op_commit_send_resp (req_ctx->req, req_ctx->op,
- opinfo.op_ret, opinfo.op_errstr,
- op_ctx);
+ ret = glusterd_op_commit_send_resp(req_ctx->req, req_ctx->op, opinfo.op_ret,
+ opinfo.op_errstr, op_ctx);
- glusterd_op_fini_ctx ();
- if (opinfo.op_errstr && (strcmp (opinfo.op_errstr, ""))) {
- GF_FREE (opinfo.op_errstr);
- opinfo.op_errstr = NULL;
- }
+ if (opinfo.op_errstr && (strcmp(opinfo.op_errstr, ""))) {
+ GF_FREE(opinfo.op_errstr);
+ opinfo.op_errstr = NULL;
+ }
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
- return ret;
+ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ gf_msg_debug(THIS->name, 0, "Returning with %d", ret);
+ return ret;
}
static int
-glusterd_op_sm_transition_state (glusterd_op_info_t *opinfo,
- glusterd_op_sm_t *state,
- glusterd_op_sm_event_type_t event_type)
+glusterd_op_sm_transition_state(glusterd_op_info_t *opinfo,
+ glusterd_op_sm_t *state,
+ glusterd_op_sm_event_type_t event_type)
{
- glusterd_conf_t *conf = NULL;
+ glusterd_conf_t *conf = NULL;
- GF_ASSERT (state);
- GF_ASSERT (opinfo);
+ GF_ASSERT(state);
+ GF_ASSERT(opinfo);
- conf = THIS->private;
- GF_ASSERT (conf);
+ conf = THIS->private;
+ GF_ASSERT(conf);
- (void) glusterd_sm_tr_log_transition_add (&conf->op_sm_log,
- opinfo->state.state,
- state[event_type].next_state,
- event_type);
+ (void)glusterd_sm_tr_log_transition_add(
+ &conf->op_sm_log, opinfo->state.state, state[event_type].next_state,
+ event_type);
- opinfo->state.state = state[event_type].next_state;
- return 0;
+ opinfo->state.state = state[event_type].next_state;
+ return 0;
}
int32_t
-glusterd_op_stage_validate (glusterd_op_t op, dict_t *dict, char **op_errstr,
- dict_t *rsp_dict)
+glusterd_op_stage_validate(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
{
- int ret = -1;
+ int ret = -1;
+ xlator_t *this = THIS;
- switch (op) {
- case GD_OP_CREATE_VOLUME:
- ret = glusterd_op_stage_create_volume (dict, op_errstr);
- break;
-
- case GD_OP_START_VOLUME:
- ret = glusterd_op_stage_start_volume (dict, op_errstr);
- break;
+ switch (op) {
+ case GD_OP_CREATE_VOLUME:
+ ret = glusterd_op_stage_create_volume(dict, op_errstr, rsp_dict);
+ break;
- case GD_OP_STOP_VOLUME:
- ret = glusterd_op_stage_stop_volume (dict, op_errstr);
- break;
+ case GD_OP_START_VOLUME:
+ ret = glusterd_op_stage_start_volume(dict, op_errstr, rsp_dict);
+ break;
- case GD_OP_DELETE_VOLUME:
- ret = glusterd_op_stage_delete_volume (dict, op_errstr);
- break;
+ case GD_OP_STOP_VOLUME:
+ ret = glusterd_op_stage_stop_volume(dict, op_errstr);
+ break;
- case GD_OP_ADD_BRICK:
- ret = glusterd_op_stage_add_brick (dict, op_errstr);
- break;
+ case GD_OP_DELETE_VOLUME:
+ ret = glusterd_op_stage_delete_volume(dict, op_errstr);
+ break;
- case GD_OP_REPLACE_BRICK:
- ret = glusterd_op_stage_replace_brick (dict, op_errstr,
- rsp_dict);
- break;
+ case GD_OP_ADD_BRICK:
+ ret = glusterd_op_stage_add_brick(dict, op_errstr, rsp_dict);
+ break;
- case GD_OP_SET_VOLUME:
- ret = glusterd_op_stage_set_volume (dict, op_errstr);
- break;
+ case GD_OP_REPLACE_BRICK:
+ ret = glusterd_op_stage_replace_brick(dict, op_errstr, rsp_dict);
+ break;
- case GD_OP_RESET_VOLUME:
- ret = glusterd_op_stage_reset_volume (dict, op_errstr);
- break;
+ case GD_OP_SET_VOLUME:
+ ret = glusterd_op_stage_set_volume(dict, op_errstr);
+ break;
- case GD_OP_REMOVE_BRICK:
- ret = glusterd_op_stage_remove_brick (dict, op_errstr);
- break;
+ case GD_OP_GANESHA:
+ ret = glusterd_op_stage_set_ganesha(dict, op_errstr);
+ break;
- case GD_OP_LOG_ROTATE:
- ret = glusterd_op_stage_log_rotate (dict, op_errstr);
- break;
+ case GD_OP_RESET_VOLUME:
+ ret = glusterd_op_stage_reset_volume(dict, op_errstr);
+ break;
+ case GD_OP_REMOVE_BRICK:
+ ret = glusterd_op_stage_remove_brick(dict, op_errstr);
+ break;
- case GD_OP_SYNC_VOLUME:
- ret = glusterd_op_stage_sync_volume (dict, op_errstr);
- break;
+ case GD_OP_LOG_ROTATE:
+ ret = glusterd_op_stage_log_rotate(dict, op_errstr);
+ break;
- case GD_OP_GSYNC_SET:
- ret = glusterd_op_stage_gsync_set (dict, op_errstr);
- break;
+ case GD_OP_SYNC_VOLUME:
+ ret = glusterd_op_stage_sync_volume(dict, op_errstr);
+ break;
- case GD_OP_PROFILE_VOLUME:
- ret = glusterd_op_stage_stats_volume (dict, op_errstr);
- break;
+ case GD_OP_GSYNC_CREATE:
+ ret = glusterd_op_stage_gsync_create(dict, op_errstr);
+ break;
- case GD_OP_QUOTA:
- ret = glusterd_op_stage_quota (dict, op_errstr);
- break;
+ case GD_OP_GSYNC_SET:
+ ret = glusterd_op_stage_gsync_set(dict, op_errstr);
+ break;
- case GD_OP_STATUS_VOLUME:
- ret = glusterd_op_stage_status_volume (dict, op_errstr);
- break;
+ case GD_OP_PROFILE_VOLUME:
+ ret = glusterd_op_stage_stats_volume(dict, op_errstr);
+ break;
- case GD_OP_REBALANCE:
- case GD_OP_DEFRAG_BRICK_VOLUME:
- ret = glusterd_op_stage_rebalance (dict, op_errstr);
- break;
+ case GD_OP_QUOTA:
+ ret = glusterd_op_stage_quota(dict, op_errstr, rsp_dict);
+ break;
- case GD_OP_HEAL_VOLUME:
- ret = glusterd_op_stage_heal_volume (dict, op_errstr);
- break;
+ case GD_OP_STATUS_VOLUME:
+ ret = glusterd_op_stage_status_volume(dict, op_errstr);
+ break;
- case GD_OP_STATEDUMP_VOLUME:
- ret = glusterd_op_stage_statedump_volume (dict,
- op_errstr);
- break;
- case GD_OP_CLEARLOCKS_VOLUME:
- ret = glusterd_op_stage_clearlocks_volume (dict,
- op_errstr);
- break;
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_op_stage_rebalance(dict, op_errstr);
+ break;
- default:
- gf_log ("", GF_LOG_ERROR, "Unknown op %d",
- op);
- }
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_op_stage_heal_volume(dict, op_errstr);
+ break;
+
+ case GD_OP_STATEDUMP_VOLUME:
+ ret = glusterd_op_stage_statedump_volume(dict, op_errstr);
+ break;
+ case GD_OP_CLEARLOCKS_VOLUME:
+ ret = glusterd_op_stage_clearlocks_volume(dict, op_errstr);
+ break;
+
+ case GD_OP_COPY_FILE:
+ ret = glusterd_op_stage_copy_file(dict, op_errstr);
+ break;
+
+ case GD_OP_SYS_EXEC:
+ ret = glusterd_op_stage_sys_exec(dict, op_errstr);
+ break;
+
+ case GD_OP_BARRIER:
+ ret = glusterd_op_stage_barrier(dict, op_errstr);
+ break;
+
+ case GD_OP_BITROT:
+ case GD_OP_SCRUB_STATUS:
+ case GD_OP_SCRUB_ONDEMAND:
+ ret = glusterd_op_stage_bitrot(dict, op_errstr, rsp_dict);
+ break;
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ default:
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "Unknown op %s", gd_op_list[op]);
+ }
- return ret;
+ gf_msg_debug(this->name, 0, "OP = %d. Returning %d", op, ret);
+ return ret;
}
+static void
+glusterd_wait_for_blockers(glusterd_conf_t *priv)
+{
+ while (GF_ATOMIC_GET(priv->blockers)) {
+ synccond_wait(&priv->cond_blockers, &priv->big_lock);
+ }
+}
int32_t
-glusterd_op_commit_perform (glusterd_op_t op, dict_t *dict, char **op_errstr,
- dict_t *rsp_dict)
+glusterd_op_commit_perform(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
{
- int ret = -1;
+ int ret = -1;
+ xlator_t *this = THIS;
- switch (op) {
- case GD_OP_CREATE_VOLUME:
- ret = glusterd_op_create_volume (dict, op_errstr);
- break;
-
- case GD_OP_START_VOLUME:
- ret = glusterd_op_start_volume (dict, op_errstr);
- break;
-
- case GD_OP_STOP_VOLUME:
- ret = glusterd_op_stop_volume (dict);
- break;
+ glusterd_op_commit_hook(op, dict, GD_COMMIT_HOOK_PRE);
+ switch (op) {
+ case GD_OP_CREATE_VOLUME:
+ ret = glusterd_op_create_volume(dict, op_errstr);
+ break;
- case GD_OP_DELETE_VOLUME:
- ret = glusterd_op_delete_volume (dict);
- break;
+ case GD_OP_START_VOLUME:
+ ret = glusterd_op_start_volume(dict, op_errstr);
+ break;
- case GD_OP_ADD_BRICK:
- ret = glusterd_op_add_brick (dict, op_errstr);
- break;
+ case GD_OP_STOP_VOLUME:
+ ret = glusterd_op_stop_volume(dict);
+ break;
+
+ case GD_OP_DELETE_VOLUME:
+ glusterd_wait_for_blockers(this->private);
+ ret = glusterd_op_delete_volume(dict);
+ break;
+
+ case GD_OP_ADD_BRICK:
+ glusterd_wait_for_blockers(this->private);
+ ret = glusterd_op_add_brick(dict, op_errstr);
+ break;
+
+ case GD_OP_REPLACE_BRICK:
+ glusterd_wait_for_blockers(this->private);
+ ret = glusterd_op_replace_brick(dict, rsp_dict);
+ break;
+
+ case GD_OP_SET_VOLUME:
+ ret = glusterd_op_set_volume(dict, op_errstr);
+ break;
+ case GD_OP_GANESHA:
+ ret = glusterd_op_set_ganesha(dict, op_errstr);
+ break;
+ case GD_OP_RESET_VOLUME:
+ ret = glusterd_op_reset_volume(dict, op_errstr);
+ break;
- case GD_OP_REPLACE_BRICK:
- ret = glusterd_op_replace_brick (dict, rsp_dict);
- break;
+ case GD_OP_REMOVE_BRICK:
+ glusterd_wait_for_blockers(this->private);
+ ret = glusterd_op_remove_brick(dict, op_errstr);
+ break;
- case GD_OP_SET_VOLUME:
- ret = glusterd_op_set_volume (dict);
- break;
+ case GD_OP_LOG_ROTATE:
+ ret = glusterd_op_log_rotate(dict);
+ break;
- case GD_OP_RESET_VOLUME:
- ret = glusterd_op_reset_volume (dict, op_errstr);
- break;
+ case GD_OP_SYNC_VOLUME:
+ ret = glusterd_op_sync_volume(dict, op_errstr, rsp_dict);
+ break;
- case GD_OP_REMOVE_BRICK:
- ret = glusterd_op_remove_brick (dict, op_errstr);
- break;
+ case GD_OP_GSYNC_CREATE:
+ ret = glusterd_op_gsync_create(dict, op_errstr, rsp_dict);
+ break;
- case GD_OP_LOG_ROTATE:
- ret = glusterd_op_log_rotate (dict);
- break;
+ case GD_OP_GSYNC_SET:
+ ret = glusterd_op_gsync_set(dict, op_errstr, rsp_dict);
+ break;
- case GD_OP_SYNC_VOLUME:
- ret = glusterd_op_sync_volume (dict, op_errstr, rsp_dict);
- break;
+ case GD_OP_PROFILE_VOLUME:
+ ret = glusterd_op_stats_volume(dict, op_errstr, rsp_dict);
+ break;
- case GD_OP_GSYNC_SET:
- ret = glusterd_op_gsync_set (dict, op_errstr, rsp_dict);
- break;
+ case GD_OP_QUOTA:
+ ret = glusterd_op_quota(dict, op_errstr, rsp_dict);
+ break;
- case GD_OP_PROFILE_VOLUME:
- ret = glusterd_op_stats_volume (dict, op_errstr,
- rsp_dict);
- break;
+ case GD_OP_STATUS_VOLUME:
+ ret = glusterd_op_status_volume(dict, op_errstr, rsp_dict);
+ break;
- case GD_OP_QUOTA:
- ret = glusterd_op_quota (dict, op_errstr);
- break;
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_op_rebalance(dict, op_errstr, rsp_dict);
+ break;
- case GD_OP_STATUS_VOLUME:
- ret = glusterd_op_status_volume (dict, op_errstr, rsp_dict);
- break;
+ case GD_OP_HEAL_VOLUME:
+ ret = glusterd_op_heal_volume(dict, op_errstr);
+ break;
- case GD_OP_REBALANCE:
- case GD_OP_DEFRAG_BRICK_VOLUME:
- ret = glusterd_op_rebalance (dict, op_errstr, rsp_dict);
- break;
+ case GD_OP_STATEDUMP_VOLUME:
+ ret = glusterd_op_statedump_volume(dict, op_errstr);
+ break;
- case GD_OP_HEAL_VOLUME:
- ret = glusterd_op_heal_volume (dict, op_errstr);
- break;
+ case GD_OP_CLEARLOCKS_VOLUME:
+ ret = glusterd_op_clearlocks_volume(dict, op_errstr, rsp_dict);
+ break;
- case GD_OP_STATEDUMP_VOLUME:
- ret = glusterd_op_statedump_volume (dict, op_errstr);
- break;
+ case GD_OP_COPY_FILE:
+ ret = glusterd_op_copy_file(dict, op_errstr);
+ break;
- case GD_OP_CLEARLOCKS_VOLUME:
- ret = glusterd_op_clearlocks_volume (dict, op_errstr);
- break;
+ case GD_OP_SYS_EXEC:
+ ret = glusterd_op_sys_exec(dict, op_errstr, rsp_dict);
+ break;
- default:
- gf_log ("", GF_LOG_ERROR, "Unknown op %d",
- op);
- break;
- }
+ case GD_OP_BARRIER:
+ ret = glusterd_op_barrier(dict, op_errstr);
+ break;
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ case GD_OP_BITROT:
+ case GD_OP_SCRUB_STATUS:
+ case GD_OP_SCRUB_ONDEMAND:
+ ret = glusterd_op_bitrot(dict, op_errstr, rsp_dict);
+ break;
- return ret;
-}
+ default:
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "Unknown op %s", gd_op_list[op]);
+ break;
+ }
-void
-_profile_volume_add_brick_rsp (dict_t *this, char *key, data_t *value,
- void *data)
-{
- char new_key[256] = {0};
- glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL;
- data_t *new_value = NULL;
-
- rsp_ctx = data;
- new_value = data_copy (value);
- GF_ASSERT (new_value);
- snprintf (new_key, sizeof (new_key), "%d-%s", rsp_ctx->count, key);
- dict_set (rsp_ctx->dict, new_key, new_value);
-}
+ if (ret == 0)
+ glusterd_op_commit_hook(op, dict, GD_COMMIT_HOOK_POST);
-int
-glusterd_profile_volume_brick_rsp (void *pending_entry,
- dict_t *rsp_dict, dict_t *op_ctx,
- char **op_errstr, gd_node_type type)
-{
- int ret = 0;
- glusterd_pr_brick_rsp_conv_t rsp_ctx = {0};
- int32_t count = 0;
- char brick[PATH_MAX+1024] = {0};
- char key[256] = {0};
- char *full_brick = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
- xlator_t *this = NULL;
- glusterd_conf_t *priv = NULL;
-
- GF_ASSERT (rsp_dict);
- GF_ASSERT (op_ctx);
- GF_ASSERT (op_errstr);
- GF_ASSERT (pending_entry);
-
- this = THIS;
- GF_ASSERT (this);
- priv = this->private;
- GF_ASSERT (priv);
-
- ret = dict_get_int32 (op_ctx, "count", &count);
- if (ret) {
- count = 1;
- } else {
- count++;
- }
- snprintf (key, sizeof (key), "%d-brick", count);
- if (type == GD_NODE_BRICK) {
- brickinfo = pending_entry;
- snprintf (brick, sizeof (brick), "%s:%s", brickinfo->hostname,
- brickinfo->path);
- } else if (type == GD_NODE_NFS) {
- snprintf (brick, sizeof (brick), "%s", uuid_utoa (MY_UUID));
- }
- full_brick = gf_strdup (brick);
- GF_ASSERT (full_brick);
- ret = dict_set_dynstr (op_ctx, key, full_brick);
-
- rsp_ctx.count = count;
- rsp_ctx.dict = op_ctx;
- dict_foreach (rsp_dict, _profile_volume_add_brick_rsp, &rsp_ctx);
- dict_del (op_ctx, "count");
- ret = dict_set_int32 (op_ctx, "count", count);
- return ret;
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
-//input-key: <replica-id>:<child-id>-*
-//output-key: <brick-id>-*
-void
-_heal_volume_add_shd_rsp (dict_t *this, char *key, data_t *value, void *data)
+static int
+glusterd_bricks_select_stop_volume(dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected)
{
- char new_key[256] = {0,};
- char int_str[16] = {0};
- data_t *new_value = NULL;
- char *rxl_end = NULL;
- char *rxl_child_end = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- int rxl_id = 0;
- int rxl_child_id = 0;
- int brick_id = 0;
- int int_len = 0;
- int ret = 0;
- glusterd_heal_rsp_conv_t *rsp_ctx = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
-
- rsp_ctx = data;
- rxl_end = strchr (key, '-');
- if (!rxl_end)
- goto out;
-
- int_len = strlen (key) - strlen (rxl_end);
- strncpy (int_str, key, int_len);
- int_str[int_len] = '\0';
- ret = gf_string2int (int_str, &rxl_id);
- if (ret)
- goto out;
-
- rxl_child_end = strchr (rxl_end + 1, '-');
- if (!rxl_child_end)
- goto out;
-
- int_len = strlen (rxl_end) - strlen (rxl_child_end) - 1;
- strncpy (int_str, rxl_end + 1, int_len);
- int_str[int_len] = '\0';
- ret = gf_string2int (int_str, &rxl_child_id);
- if (ret)
+ int ret = 0;
+ int flags = 0;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_pending_node_t *pending_node = NULL;
+
+ ret = glusterd_op_stop_volume_args_get(dict, &volname, &flags);
+ if (ret)
+ goto out;
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ FMTSTR_CHECK_VOL_EXISTS, volname);
+ gf_asprintf(op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (glusterd_is_brick_started(brickinfo)) {
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
goto out;
-
- volinfo = rsp_ctx->volinfo;
- brick_id = rxl_id * volinfo->replica_count + rxl_child_id;
-
- if (!strcmp (rxl_child_end, "-status")) {
- brickinfo = glusterd_get_brickinfo_by_position (volinfo,
- brick_id);
- if (!brickinfo)
- goto out;
- if (!glusterd_is_local_brick (rsp_ctx->this, volinfo,
- brickinfo))
- goto out;
+ } else {
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ cds_list_add_tail(&pending_node->list, selected);
+ pending_node = NULL;
+ }
+ /*
+ * This is not really the right place to do it, but
+ * it's the most convenient.
+ * TBD: move this to *after* the RPC
+ */
+ brickinfo->status = GF_BRICK_STOPPED;
}
- new_value = data_copy (value);
- snprintf (new_key, sizeof (new_key), "%d%s", brick_id, rxl_child_end);
- dict_set (rsp_ctx->dict, new_key, new_value);
+ }
out:
- return;
+ return ret;
}
-int
-glusterd_heal_volume_brick_rsp (dict_t *req_dict, dict_t *rsp_dict,
- dict_t *op_ctx, char **op_errstr)
+static int
+glusterd_bricks_select_remove_brick(dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected)
{
- int ret = 0;
- glusterd_heal_rsp_conv_t rsp_ctx = {0};
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char *brick = NULL;
+ int32_t count = 0;
+ int32_t i = 1;
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+ glusterd_pending_node_t *pending_node = NULL;
+ int32_t command = 0;
+ int32_t force = 0;
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "count", SLEN("count"), &count);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, -ret, GD_MSG_DICT_GET_FAILED,
+ "Unable to get count");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "command", SLEN("command"), &command);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, -ret, GD_MSG_DICT_GET_FAILED,
+ "Unable to get command");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "force", SLEN("force"), &force);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DICT_GET_FAILED,
+ "force flag is not set");
+ ret = 0;
+ goto out;
+ }
- GF_ASSERT (rsp_dict);
- GF_ASSERT (op_ctx);
- GF_ASSERT (op_errstr);
+ while (i <= count) {
+ keylen = snprintf(key, sizeof(key), "brick%d", i);
- ret = dict_get_str (req_dict, "volname", &volname);
+ ret = dict_get_strn(dict, key, keylen, &brick);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
- goto out;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get brick");
+ goto out;
}
- ret = glusterd_volinfo_find (volname, &volinfo);
+ ret = glusterd_volume_brickinfo_get_by_brick(brick, volinfo, &brickinfo,
+ _gf_false);
if (ret)
- goto out;
-
- rsp_ctx.dict = op_ctx;
- rsp_ctx.volinfo = volinfo;
- rsp_ctx.this = THIS;
- dict_foreach (rsp_dict, _heal_volume_add_shd_rsp, &rsp_ctx);
-
-out:
- return ret;
-}
-
-void
-_status_volume_add_brick_rsp (dict_t *this, char *key, data_t *value,
- void *data)
-{
- char new_key[256] = {0,};
- data_t *new_value = 0;
- glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL;
-
- rsp_ctx = data;
- new_value = data_copy (value);
- snprintf (new_key, sizeof (new_key), "brick%d.%s", rsp_ctx->count, key);
- dict_set (rsp_ctx->dict, new_key, new_value);
-
- return;
-}
-
-int
-glusterd_status_volume_brick_rsp (dict_t *rsp_dict, dict_t *op_ctx,
- char **op_errstr)
-{
- int ret = 0;
- glusterd_pr_brick_rsp_conv_t rsp_ctx = {0};
- int32_t count = 0;
- int index = 0;
-
- GF_ASSERT (rsp_dict);
- GF_ASSERT (op_ctx);
- GF_ASSERT (op_errstr);
+ goto out;
- ret = dict_get_int32 (op_ctx, "count", &count);
- if (ret) {
- count = 0;
- } else {
- count++;
- }
- ret = dict_get_int32 (rsp_dict, "index", &index);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get node index");
+ if (glusterd_is_brick_started(brickinfo)) {
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
goto out;
+ } else {
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ cds_list_add_tail(&pending_node->list, selected);
+ pending_node = NULL;
+ }
+ /*
+ * This is not really the right place to do it, but
+ * it's the most convenient.
+ * TBD: move this to *after* the RPC
+ */
+ brickinfo->status = GF_BRICK_STOPPED;
}
- dict_del (rsp_dict, "index");
-
- rsp_ctx.count = index;
- rsp_ctx.dict = op_ctx;
- dict_foreach (rsp_dict, _status_volume_add_brick_rsp, &rsp_ctx);
- ret = dict_set_int32 (op_ctx, "count", count);
+ i++;
+ }
out:
- return ret;
+ return ret;
}
-int
-glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict,
- dict_t *op_ctx)
+static int
+glusterd_bricks_select_profile_volume(dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected)
{
- int ret = 0;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- char key[256] = {0,};
- int32_t i = 0;
- char buf[1024] = {0,};
- char *node_str = NULL;
- glusterd_conf_t *priv = NULL;
-
- priv = THIS->private;
- GF_ASSERT (req_dict);
-
- ret = dict_get_str (req_dict, "volname", &volname);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
- goto out;
- }
-
- ret = glusterd_volinfo_find (volname, &volinfo);
+ int ret = -1;
+ char *volname = NULL;
+ char msg[2048] = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ int32_t stats_op = GF_CLI_STATS_NONE;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_pending_node_t *pending_node = NULL;
+ char *brick = NULL;
+ int32_t pid = -1;
+ char pidfile[PATH_MAX] = {0};
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "volume name get failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Volume %s does not exists", volname);
+
+ *op_errstr = gf_strdup(msg);
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "op", SLEN("op"), &stats_op);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "volume profile op get failed");
+ goto out;
+ }
+
+ switch (stats_op) {
+ case GF_CLI_STATS_START:
+ case GF_CLI_STATS_STOP:
+ goto out;
+ break;
+ case GF_CLI_STATS_INFO:
+#ifdef BUILD_GNFS
+ ret = dict_get_str_boolean(dict, "nfs", _gf_false);
+ if (ret) {
+ if (!priv->nfs_svc.online) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NFS_SERVER_NOT_RUNNING,
+ "NFS server"
+ " is not running");
+ goto out;
+ }
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = &(priv->nfs_svc);
+ pending_node->type = GD_NODE_NFS;
+ cds_list_add_tail(&pending_node->list, selected);
+ pending_node = NULL;
- if (ret)
+ ret = 0;
goto out;
+ }
+#endif
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (glusterd_is_brick_started(brickinfo)) {
+ /*
+ * In normal use, glusterd_is_brick_started
+ * will give us the answer we need. However,
+ * in our tests the brick gets detached behind
+ * our back, so we need to double-check this
+ * way.
+ */
+ GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo,
+ priv);
+ if (!gf_is_service_running(pidfile, &pid)) {
+ continue;
+ }
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ cds_list_add_tail(&pending_node->list, selected);
+ pending_node = NULL;
+ }
+ }
+ }
+ break;
- if (rsp_dict) {
- ret = glusterd_defrag_volume_status_update (volinfo,
- rsp_dict);
- }
+ case GF_CLI_STATS_TOP:
+#ifdef BUILD_GNFS
+ ret = dict_get_str_boolean(dict, "nfs", _gf_false);
+ if (ret) {
+ if (!priv->nfs_svc.online) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_NFS_SERVER_NOT_RUNNING,
+ "NFS server"
+ " is not running");
+ goto out;
+ }
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = &(priv->nfs_svc);
+ pending_node->type = GD_NODE_NFS;
+ cds_list_add_tail(&pending_node->list, selected);
+ pending_node = NULL;
- if (!op_ctx) {
- dict_copy (rsp_dict, op_ctx);
+ ret = 0;
goto out;
- }
-
- ret = dict_get_int32 (op_ctx, "count", &i);
- i++;
-
- ret = dict_set_int32 (op_ctx, "count", i);
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR, "Failed to set count");
-
- snprintf (buf, 1024, "%s", uuid_utoa (MY_UUID));
- node_str = gf_strdup (buf);
-
- snprintf (key, 256, "node-uuid-%d",i);
- ret = dict_set_dynstr (op_ctx, key, node_str);
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to set node-uuid");
+ }
+#endif
+ ret = dict_get_strn(dict, "brick", SLEN("brick"), &brick);
+ if (!ret) {
+ ret = glusterd_volume_brickinfo_get_by_brick(
+ brick, volinfo, &brickinfo, _gf_true);
+ if (ret)
+ goto out;
- memset (key, 0 , 256);
- snprintf (key, 256, "files-%d", i);
- ret = dict_set_uint64 (op_ctx, key, volinfo->rebalance_files);
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to set file count");
+ if (!glusterd_is_brick_started(brickinfo))
+ goto out;
- memset (key, 0 , 256);
- snprintf (key, 256, "size-%d", i);
- ret = dict_set_uint64 (op_ctx, key, volinfo->rebalance_data);
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to set size of xfer");
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ cds_list_add_tail(&pending_node->list, selected);
+ pending_node = NULL;
+ goto out;
+ }
+ }
+ ret = 0;
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (glusterd_is_brick_started(brickinfo)) {
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ cds_list_add_tail(&pending_node->list, selected);
+ pending_node = NULL;
+ }
+ }
+ }
+ break;
- memset (key, 0 , 256);
- snprintf (key, 256, "lookups-%d", i);
- ret = dict_set_uint64 (op_ctx, key, volinfo->lookedup_files);
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to set lookedup file count");
+ default:
+ GF_ASSERT(0);
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "Invalid profile op: %d", stats_op);
+ ret = -1;
+ goto out;
+ break;
+ }
- memset (key, 0 , 256);
- snprintf (key, 256, "status-%d", i);
- ret = dict_set_int32 (op_ctx, key, volinfo->defrag_status);
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to set status");
+out:
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
- memset (key, 0 , 256);
- snprintf (key, 256, "failures-%d", i);
- ret = dict_set_uint64 (op_ctx, key, volinfo->rebalance_failures);
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to set failure count");
+ return ret;
+}
- memset (key, 0, 256);
- snprintf (key, 256, "run-time-%d", i);
- ret = dict_set_double (op_ctx, key, volinfo->rebalance_time);
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to set run-time");
+int
+_get_hxl_children_count(glusterd_volinfo_t *volinfo)
+{
+ if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ return volinfo->disperse_count;
+ } else {
+ return volinfo->replica_count;
+ }
+}
+static int
+_add_hxlator_to_dict(dict_t *dict, glusterd_volinfo_t *volinfo, int index,
+ int count)
+{
+ int ret = -1;
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+ char *xname = NULL;
+ char *xl_type = 0;
+
+ if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ xl_type = "disperse";
+ } else {
+ xl_type = "replicate";
+ }
+ keylen = snprintf(key, sizeof(key), "xl-%d", count);
+ ret = gf_asprintf(&xname, "%s-%s-%d", volinfo->volname, xl_type, index);
+ if (ret == -1)
+ goto out;
+
+ ret = dict_set_dynstrn(dict, key, keylen, xname);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32(dict, xname, index);
out:
- return ret;
+ return ret;
}
-int32_t
-glusterd_handle_node_rsp (glusterd_req_ctx_t *req_ctx, void *pending_entry,
- glusterd_op_t op, dict_t *rsp_dict, dict_t *op_ctx,
- char **op_errstr, gd_node_type type)
+int
+get_replica_index_for_per_replica_cmd(glusterd_volinfo_t *volinfo, dict_t *dict)
{
- int ret = 0;
+ int ret = 0;
+ char *hostname = NULL;
+ char *path = NULL;
+ int index = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int cmd_replica_index = -1;
+ int replica_count = -1;
+
+ if (!dict) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "per-replica-cmd-hostname",
+ SLEN("per-replica-cmd-hostname"), &hostname);
+ if (ret)
+ goto out;
+ ret = dict_get_strn(dict, "per-replica-cmd-path",
+ SLEN("per-replica-cmd-path"), &path);
+ if (ret)
+ goto out;
+
+ replica_count = volinfo->replica_count;
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (gf_uuid_is_null(brickinfo->uuid))
+ (void)glusterd_resolve_brick(brickinfo);
+ if (!strcmp(brickinfo->path, path) &&
+ !strcmp(brickinfo->hostname, hostname)) {
+ cmd_replica_index = index / (replica_count);
+ goto out;
+ }
+ index++;
+ }
- GF_ASSERT (op_errstr);
-
- switch (op) {
- case GD_OP_PROFILE_VOLUME:
- ret = glusterd_profile_volume_brick_rsp (pending_entry,
- rsp_dict, op_ctx,
- op_errstr, type);
- break;
- case GD_OP_STATUS_VOLUME:
- ret = glusterd_status_volume_brick_rsp (rsp_dict, op_ctx,
- op_errstr);
- break;
-
- case GD_OP_DEFRAG_BRICK_VOLUME:
- glusterd_defrag_volume_node_rsp (req_ctx->dict,
- rsp_dict, op_ctx);
- break;
-
- case GD_OP_HEAL_VOLUME:
- ret = glusterd_heal_volume_brick_rsp (req_ctx->dict, rsp_dict,
- op_ctx, op_errstr);
- break;
- default:
- break;
- }
+out:
+ if (ret)
+ cmd_replica_index = -1;
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return cmd_replica_index;
}
-static int
-glusterd_bricks_select_stop_volume (dict_t *dict, char **op_errstr)
+int
+_select_hxlator_with_matching_brick(xlator_t *this, glusterd_volinfo_t *volinfo,
+ dict_t *dict, int *index)
{
- int ret = 0;
- int flags = 0;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
- glusterd_pending_node_t *pending_node = NULL;
-
+ char *path = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int hxl_children = 0;
- ret = glusterd_op_stop_volume_args_get (dict, &volname, &flags);
- if (ret)
- goto out;
+ if (!dict || dict_get_strn(dict, "per-replica-cmd-path",
+ SLEN("per-replica-cmd-path"), &path))
+ return -1;
- ret = glusterd_volinfo_find (volname, &volinfo);
+ hxl_children = _get_hxl_children_count(volinfo);
+ if ((*index) == 0)
+ (*index)++;
- if (ret)
- goto out;
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (gf_uuid_is_null(brickinfo->uuid))
+ (void)glusterd_resolve_brick(brickinfo);
- list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
- if (glusterd_is_brick_started (brickinfo)) {
- pending_node = GF_CALLOC (1, sizeof (*pending_node),
- gf_gld_mt_pending_node_t);
- if (!pending_node) {
- ret = -1;
- goto out;
- } else {
- pending_node->node = brickinfo;
- pending_node->type = GD_NODE_BRICK;
- list_add_tail (&pending_node->list, &opinfo.pending_bricks);
- pending_node = NULL;
- }
- }
+ if ((!gf_uuid_compare(MY_UUID, brickinfo->uuid)) &&
+ (!strncmp(brickinfo->path, path, strlen(path)))) {
+ _add_hxlator_to_dict(dict, volinfo, ((*index) - 1) / hxl_children,
+ 0);
+ return 1;
}
+ (*index)++;
+ }
-out:
- return ret;
+ return 0;
}
-
-static int
-glusterd_bricks_select_remove_brick (dict_t *dict, char **op_errstr)
+void
+_select_hxlators_with_local_bricks(xlator_t *this, glusterd_volinfo_t *volinfo,
+ dict_t *dict, int *index, int *hxlator_count)
{
- int ret = -1;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
- char *brick = NULL;
- int32_t count = 0;
- int32_t i = 1;
- char key[256] = {0,};
- glusterd_pending_node_t *pending_node = NULL;
- int32_t force = 0;
-
- ret = dict_get_str (dict, "volname", &volname);
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int hxl_children = 0;
+ gf_boolean_t add = _gf_false;
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
- goto out;
- }
+ hxl_children = _get_hxl_children_count(volinfo);
- ret = glusterd_volinfo_find (volname, &volinfo);
+ if ((*index) == 0)
+ (*index)++;
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
- goto out;
- }
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (gf_uuid_is_null(brickinfo->uuid))
+ (void)glusterd_resolve_brick(brickinfo);
- ret = dict_get_int32 (dict, "count", &count);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get count");
- goto out;
- }
+ if (!gf_uuid_compare(MY_UUID, brickinfo->uuid))
+ add = _gf_true;
- ret = dict_get_int32 (dict, "force", &force);
- if (ret) {
- gf_log (THIS->name, GF_LOG_INFO, "force flag is not set");
- ret = 0;
- goto out;
+ if ((*index) % hxl_children == 0) {
+ if (add) {
+ _add_hxlator_to_dict(dict, volinfo,
+ ((*index) - 1) / hxl_children,
+ (*hxlator_count));
+ (*hxlator_count)++;
+ }
+ add = _gf_false;
}
- while ( i <= count) {
- snprintf (key, 256, "brick%d", i);
- ret = dict_get_str (dict, key, &brick);
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to get brick");
- goto out;
- }
+ (*index)++;
+ }
+}
- ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
- &brickinfo,
- GF_PATH_COMPLETE);
- if (ret)
- goto out;
- if (glusterd_is_brick_started (brickinfo)) {
- pending_node = GF_CALLOC (1, sizeof (*pending_node),
- gf_gld_mt_pending_node_t);
- if (!pending_node) {
- ret = -1;
- goto out;
- } else {
- pending_node->node = brickinfo;
- pending_node->type = GD_NODE_BRICK;
- list_add_tail (&pending_node->list, &opinfo.pending_bricks);
- pending_node = NULL;
- }
+int
+_select_hxlators_for_full_self_heal(xlator_t *this, glusterd_volinfo_t *volinfo,
+ dict_t *dict, int *index,
+ int *hxlator_count)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int hxl_children = 0;
+ uuid_t candidate = {0};
+ int brick_index = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int delta = 0;
+ uuid_t candidate_max = {0};
+
+ if ((*index) == 0)
+ (*index)++;
+ if (volinfo->type == GF_CLUSTER_TYPE_DISPERSE) {
+ hxl_children = volinfo->disperse_count;
+ } else {
+ hxl_children = volinfo->replica_count;
+ }
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (gf_uuid_compare(brickinfo->uuid, candidate_max) > 0) {
+ if (!gf_uuid_compare(MY_UUID, brickinfo->uuid)) {
+ gf_uuid_copy(candidate_max, brickinfo->uuid);
+ } else {
+ peerinfo = glusterd_peerinfo_find(brickinfo->uuid, NULL);
+ if (peerinfo && peerinfo->connected) {
+ gf_uuid_copy(candidate_max, brickinfo->uuid);
}
- i++;
- }
-
-out:
- return ret;
+ }
+ }
+ }
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (gf_uuid_is_null(brickinfo->uuid))
+ (void)glusterd_resolve_brick(brickinfo);
+
+ delta %= hxl_children;
+ if ((*index + delta) == (brick_index + hxl_children)) {
+ if (!gf_uuid_compare(MY_UUID, brickinfo->uuid)) {
+ gf_uuid_copy(candidate, brickinfo->uuid);
+ } else {
+ peerinfo = glusterd_peerinfo_find(brickinfo->uuid, NULL);
+ if (peerinfo && peerinfo->connected) {
+ gf_uuid_copy(candidate, brickinfo->uuid);
+ } else if (peerinfo &&
+ (!gf_uuid_compare(candidate_max, MY_UUID))) {
+ _add_hxlator_to_dict(dict, volinfo,
+ ((*index) - 1) / hxl_children,
+ (*hxlator_count));
+ (*hxlator_count)++;
+ }
+ }
+
+ if (!gf_uuid_compare(MY_UUID, candidate)) {
+ _add_hxlator_to_dict(dict, volinfo,
+ ((*index) - 1) / hxl_children,
+ (*hxlator_count));
+ (*hxlator_count)++;
+ }
+ gf_uuid_clear(candidate);
+ brick_index += hxl_children;
+ delta++;
+ }
+
+ (*index)++;
+ }
+ return *hxlator_count;
}
static int
-glusterd_bricks_select_profile_volume (dict_t *dict, char **op_errstr)
+glusterd_bricks_select_snap(dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected)
{
- int ret = -1;
- char *volname = NULL;
- char msg[2048] = {0,};
- glusterd_conf_t *priv = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- xlator_t *this = NULL;
- int32_t stats_op = GF_CLI_STATS_NONE;
- glusterd_brickinfo_t *brickinfo = NULL;
- glusterd_pending_node_t *pending_node = NULL;
- char *brick = NULL;
-
- this = THIS;
- GF_ASSERT (this);
- priv = this->private;
- GF_ASSERT (priv);
-
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed");
- goto out;
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_pending_node_t *pending_node = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int brick_index = -1;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get"
+ " volname");
+ goto out;
+ }
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret)
+ goto out;
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ brick_index++;
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID) ||
+ !glusterd_is_brick_started(brickinfo)) {
+ continue;
+ }
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
}
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ pending_node->index = brick_index;
+ cds_list_add_tail(&pending_node->list, selected);
+ pending_node = NULL;
+ }
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- snprintf (msg, sizeof (msg), "Volume %s does not exists",
- volname);
+ ret = 0;
- *op_errstr = gf_strdup (msg);
- gf_log ("", GF_LOG_ERROR, "%s", msg);
- goto out;
- }
+out:
+ gf_msg_debug(THIS->name, 0, "Returning ret %d", ret);
+ return ret;
+}
- ret = dict_get_int32 (dict, "op", &stats_op);
+static int
+fill_shd_status_for_local_bricks(dict_t *dict, glusterd_volinfo_t *volinfo,
+ cli_cmd_type type, int *index,
+ dict_t *req_dict)
+{
+ glusterd_brickinfo_t *brickinfo = NULL;
+ static char *msg = "self-heal-daemon is not running on";
+ char key[32] = {
+ 0,
+ };
+ int keylen;
+ char value[128] = {
+ 0,
+ };
+ int ret = 0;
+ xlator_t *this = NULL;
+ int cmd_replica_index = -1;
+
+ this = THIS;
+
+ if (type == PER_HEAL_XL) {
+ cmd_replica_index = get_replica_index_for_per_replica_cmd(volinfo,
+ req_dict);
+ if (cmd_replica_index == -1) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_REPLICA_INDEX_GET_FAIL,
+ "Could not find the "
+ "replica index for per replica type command");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (gf_uuid_is_null(brickinfo->uuid))
+ (void)glusterd_resolve_brick(brickinfo);
+
+ if (gf_uuid_compare(MY_UUID, brickinfo->uuid)) {
+ (*index)++;
+ continue;
+ }
+
+ if (type == PER_HEAL_XL) {
+ if (cmd_replica_index != ((*index) / volinfo->replica_count)) {
+ (*index)++;
+ continue;
+ }
+ }
+ keylen = snprintf(key, sizeof(key), "%d-status", (*index));
+ snprintf(value, sizeof(value), "%s %s", msg, uuid_utoa(MY_UUID));
+ ret = dict_set_dynstrn(dict, key, keylen, gf_strdup(value));
if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "volume profile op get failed");
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to"
+ "set the dictionary for shd status msg");
+ goto out;
}
-
- switch (stats_op) {
- case GF_CLI_STATS_START:
- case GF_CLI_STATS_STOP:
- goto out;
- break;
- case GF_CLI_STATS_INFO:
- ret = dict_get_str_boolean (dict, "nfs", _gf_false);
- if (ret) {
- if (!glusterd_nodesvc_is_running ("nfs")) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR, "NFS server"
- " is not running");
- goto out;
- }
- pending_node = GF_CALLOC (1, sizeof (*pending_node),
- gf_gld_mt_pending_node_t);
- if (!pending_node) {
- ret = -1;
- goto out;
- }
- pending_node->node = priv->nfs;
- pending_node->type = GD_NODE_NFS;
- list_add_tail (&pending_node->list,
- &opinfo.pending_bricks);
- pending_node = NULL;
-
- ret = 0;
- goto out;
-
- }
- list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
- if (glusterd_is_brick_started (brickinfo)) {
- pending_node = GF_CALLOC (1, sizeof (*pending_node),
- gf_gld_mt_pending_node_t);
- if (!pending_node) {
- ret = -1;
- goto out;
- } else {
- pending_node->node = brickinfo;
- pending_node->type = GD_NODE_BRICK;
- list_add_tail (&pending_node->list,
- &opinfo.pending_bricks);
- pending_node = NULL;
- }
- }
- }
- break;
-
- case GF_CLI_STATS_TOP:
- ret = dict_get_str_boolean (dict, "nfs", _gf_false);
- if (ret) {
- if (!glusterd_nodesvc_is_running ("nfs")) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR, "NFS server"
- " is not running");
- goto out;
- }
- pending_node = GF_CALLOC (1, sizeof (*pending_node),
- gf_gld_mt_pending_node_t);
- if (!pending_node) {
- ret = -1;
- goto out;
- }
- pending_node->node = priv->nfs;
- pending_node->type = GD_NODE_NFS;
- list_add_tail (&pending_node->list,
- &opinfo.pending_bricks);
- pending_node = NULL;
-
- ret = 0;
- goto out;
-
- }
- ret = dict_get_str (dict, "brick", &brick);
- if (!ret) {
- ret = glusterd_volume_brickinfo_get_by_brick (brick, volinfo,
- &brickinfo,
- GF_PATH_COMPLETE);
- if (ret)
- goto out;
-
- if (!glusterd_is_brick_started (brickinfo))
- goto out;
-
- pending_node = GF_CALLOC (1, sizeof (*pending_node),
- gf_gld_mt_pending_node_t);
- if (!pending_node) {
- ret = -1;
- goto out;
- } else {
- pending_node->node = brickinfo;
- pending_node->type = GD_NODE_BRICK;
- list_add_tail (&pending_node->list,
- &opinfo.pending_bricks);
- pending_node = NULL;
- goto out;
- }
- }
- ret = 0;
- list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
- if (glusterd_is_brick_started (brickinfo)) {
- pending_node = GF_CALLOC (1, sizeof (*pending_node),
- gf_gld_mt_pending_node_t);
- if (!pending_node) {
- ret = -1;
- goto out;
- } else {
- pending_node->node = brickinfo;
- pending_node->type = GD_NODE_BRICK;
- list_add_tail (&pending_node->list,
- &opinfo.pending_bricks);
- pending_node = NULL;
- }
- }
- }
- break;
-
- default:
- GF_ASSERT (0);
- gf_log ("glusterd", GF_LOG_ERROR, "Invalid profile op: %d",
- stats_op);
- ret = -1;
- goto out;
- break;
+ keylen = snprintf(key, sizeof(key), "%d-shd-status", (*index));
+ ret = dict_set_nstrn(dict, key, keylen, "off", SLEN("off"));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to"
+ " set dictionary for shd status msg");
+ goto out;
}
+ (*index)++;
+ }
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
-
- return ret;
+ return ret;
}
-
-static int
-_add_rxlator_to_dict (dict_t *dict, char *volname, int index, int count)
+int
+glusterd_shd_select_brick_xlator(dict_t *dict, gf_xl_afr_op_t heal_op,
+ glusterd_volinfo_t *volinfo, int *index,
+ int *hxlator_count, dict_t *rsp_dict)
{
- int ret = -1;
- char key[128] = {0,};
- char *xname = NULL;
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_svc_t *svc = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ svc = &(volinfo->shd.svc);
+
+ switch (heal_op) {
+ case GF_SHD_OP_INDEX_SUMMARY:
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT:
+ if (!svc->online) {
+ if (!rsp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL,
+ "Received "
+ "empty ctx.");
+ goto out;
+ }
- snprintf (key, sizeof (key), "xl-%d", count);
- ret = gf_asprintf (&xname, "%s-replicate-%d", volname, index);
- if (ret == -1)
+ ret = fill_shd_status_for_local_bricks(
+ rsp_dict, volinfo, ALL_HEAL_XL, index, dict);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SHD_STATUS_SET_FAIL,
+ "Unable to "
+ "fill the shd status for the local "
+ "bricks");
goto out;
-
- ret = dict_set_dynstr (dict, key, xname);
- if (ret)
+ }
+ break;
+
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ if (!svc->online) {
+ if (!rsp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_NULL,
+ "Received "
+ "empty ctx.");
+ goto out;
+ }
+ ret = fill_shd_status_for_local_bricks(
+ rsp_dict, volinfo, PER_HEAL_XL, index, dict);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SHD_STATUS_SET_FAIL,
+ "Unable to "
+ "fill the shd status for the local"
+ " bricks.");
goto out;
+ }
+ break;
- ret = dict_set_int32 (dict, xname, index);
+ default:
+ break;
+ }
+
+ switch (heal_op) {
+ case GF_SHD_OP_HEAL_FULL:
+ _select_hxlators_for_full_self_heal(this, volinfo, dict, index,
+ hxlator_count);
+ break;
+ case GF_SHD_OP_STATISTICS_HEAL_COUNT_PER_REPLICA:
+ (*hxlator_count) += _select_hxlator_with_matching_brick(
+ this, volinfo, dict, index);
+ break;
+ default:
+ _select_hxlators_with_local_bricks(this, volinfo, dict, index,
+ hxlator_count);
+ break;
+ }
+ ret = (*hxlator_count);
out:
- return ret;
+ return ret;
}
-int
-_select_rxlators_with_local_bricks (xlator_t *this, glusterd_volinfo_t *volinfo,
- dict_t *dict)
+static int
+glusterd_bricks_select_heal_volume(dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected,
+ dict_t *rsp_dict)
{
- glusterd_brickinfo_t *brickinfo = NULL;
- glusterd_conf_t *priv = NULL;
- int index = 1;
- int rxlator_count = 0;
- int replica_count = 0;
- gf_boolean_t add = _gf_false;
-
- priv = this->private;
- replica_count = volinfo->replica_count;
- list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
- if (uuid_is_null (brickinfo->uuid))
- (void)glusterd_resolve_brick (brickinfo);
-
- if (!uuid_compare (MY_UUID, brickinfo->uuid))
- add = _gf_true;
- if (index % replica_count == 0) {
- if (add) {
- _add_rxlator_to_dict (dict, volinfo->volname,
- (index-1)/replica_count,
- rxlator_count);
- rxlator_count++;
- }
- add = _gf_false;
- }
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char msg[2048] = {
+ 0,
+ };
+ glusterd_pending_node_t *pending_node = NULL;
+ gf_xl_afr_op_t heal_op = GF_SHD_OP_INVALID;
+ int hxlator_count = 0;
+ int index = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "volume name get failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Volume %s does not exist", volname);
+
+ *op_errstr = gf_strdup(msg);
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "heal-op", SLEN("heal-op"),
+ (int32_t *)&heal_op);
+ if (ret || (heal_op == GF_SHD_OP_INVALID)) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "heal op invalid");
+ goto out;
+ }
+ ret = glusterd_shd_select_brick_xlator(dict, heal_op, volinfo, &index,
+ &hxlator_count, rsp_dict);
+ if (ret < 0) {
+ goto out;
+ }
+
+ if (!hxlator_count)
+ goto out;
+ if (hxlator_count == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_XLATOR_COUNT_GET_FAIL,
+ "Could not determine the"
+ "translator count");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32n(dict, "count", SLEN("count"), hxlator_count);
+ if (ret)
+ goto out;
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = &(volinfo->shd.svc);
+ pending_node->type = GD_NODE_SHD;
+ cds_list_add_tail(&pending_node->list, selected);
+ pending_node = NULL;
+ }
- index++;
- }
- return rxlator_count;
+out:
+ gf_msg_debug(THIS->name, 0, "Returning ret %d", ret);
+ return ret;
}
-int
-_select_rxlators_for_full_self_heal (xlator_t *this,
- glusterd_volinfo_t *volinfo,
- dict_t *dict)
+static int
+glusterd_bricks_select_rebalance_volume(dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected)
{
- glusterd_brickinfo_t *brickinfo = NULL;
- glusterd_conf_t *priv = NULL;
- int index = 1;
- int rxlator_count = 0;
- int replica_count = 0;
- uuid_t candidate = {0};
-
- priv = this->private;
- replica_count = volinfo->replica_count;
-
- list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
- if (uuid_is_null (brickinfo->uuid))
- (void)glusterd_resolve_brick (brickinfo);
-
- if (uuid_compare (brickinfo->uuid, candidate) > 0)
- uuid_copy (candidate, brickinfo->uuid);
-
- if (index % replica_count == 0) {
- if (!uuid_compare (MY_UUID, candidate)) {
- _add_rxlator_to_dict (dict, volinfo->volname,
- (index-1)/replica_count,
- rxlator_count);
- rxlator_count++;
- }
- uuid_clear (candidate);
- }
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char msg[2048] = {
+ 0,
+ };
+ glusterd_pending_node_t *pending_node = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "volume name get failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Volume %s does not exist", volname);
+
+ *op_errstr = gf_strdup(msg);
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND, "%s", msg);
+ goto out;
+ }
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ } else {
+ pending_node->node = volinfo;
+ pending_node->type = GD_NODE_REBALANCE;
+ cds_list_add_tail(&pending_node->list, selected);
+ pending_node = NULL;
+ }
- index++;
- }
- return rxlator_count;
+out:
+ return ret;
}
static int
-glusterd_bricks_select_heal_volume (dict_t *dict, char **op_errstr)
+glusterd_bricks_select_status_volume(dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected)
{
- int ret = -1;
- char *volname = NULL;
- glusterd_conf_t *priv = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- xlator_t *this = NULL;
- char msg[2048] = {0,};
- glusterd_pending_node_t *pending_node = NULL;
- gf_xl_afr_op_t heal_op = GF_AFR_OP_INVALID;
- int rxlator_count = 0;
-
- this = THIS;
- GF_ASSERT (this);
- priv = this->private;
- GF_ASSERT (priv);
-
- ret = dict_get_str (dict, "volname", &volname);
+ int ret = -1;
+ int cmd = 0;
+ int brick_index = -1;
+ char *volname = NULL;
+ char *brickname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_pending_node_t *pending_node = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_svc_t *svc = NULL;
+
+ GF_ASSERT(dict);
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_int32n(dict, "cmd", SLEN("cmd"), &cmd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get status type");
+ goto out;
+ }
+
+ if (cmd & GF_CLI_STATUS_ALL)
+ goto out;
+
+ switch (cmd & GF_CLI_STATUS_MASK) {
+ case GF_CLI_STATUS_MEM:
+ case GF_CLI_STATUS_CLIENTS:
+ case GF_CLI_STATUS_INODE:
+ case GF_CLI_STATUS_FD:
+ case GF_CLI_STATUS_CALLPOOL:
+ case GF_CLI_STATUS_NFS:
+ case GF_CLI_STATUS_SHD:
+ case GF_CLI_STATUS_QUOTAD:
+ case GF_CLI_STATUS_SNAPD:
+ case GF_CLI_STATUS_BITD:
+ case GF_CLI_STATUS_SCRUB:
+ case GF_CLI_STATUS_CLIENT_LIST:
+ break;
+ default:
+ goto out;
+ }
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volname");
+ goto out;
+ }
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ goto out;
+ }
+
+ if ((cmd & GF_CLI_STATUS_BRICK) != 0) {
+ ret = dict_get_strn(dict, "brick", SLEN("brick"), &brickname);
if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed");
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get brick");
+ goto out;
}
+ ret = glusterd_volume_brickinfo_get_by_brick(brickname, volinfo,
+ &brickinfo, _gf_false);
+ if (ret)
+ goto out;
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- snprintf (msg, sizeof (msg), "Volume %s does not exist",
- volname);
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID) ||
+ !glusterd_is_brick_started(brickinfo))
+ goto out;
- *op_errstr = gf_strdup (msg);
- gf_log ("", GF_LOG_ERROR, "%s", msg);
- goto out;
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
}
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ pending_node->index = 0;
+ cds_list_add_tail(&pending_node->list, selected);
- ret = dict_get_int32 (dict, "heal-op", (int32_t*)&heal_op);
- if (ret || (heal_op == GF_AFR_OP_INVALID)) {
- gf_log ("glusterd", GF_LOG_ERROR, "heal op invalid");
- goto out;
+ ret = 0;
+#ifdef BUILD_GNFS
+ } else if ((cmd & GF_CLI_STATUS_NFS) != 0) {
+ if (!priv->nfs_svc.online) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NFS_SERVER_NOT_RUNNING,
+ "NFS server is not running");
+ goto out;
+ }
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
}
+ pending_node->node = &(priv->nfs_svc);
+ pending_node->type = GD_NODE_NFS;
+ pending_node->index = 0;
+ cds_list_add_tail(&pending_node->list, selected);
- switch (heal_op) {
- case GF_AFR_OP_HEAL_FULL:
- rxlator_count = _select_rxlators_for_full_self_heal (this,
- volinfo,
- dict);
- break;
- default:
- rxlator_count = _select_rxlators_with_local_bricks (this,
- volinfo,
- dict);
- break;
+ ret = 0;
+#endif
+ } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
+ svc = &(volinfo->shd.svc);
+ if (!svc->online) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SELF_HEALD_DISABLED,
+ "Self-heal daemon is not running");
+ goto out;
+ }
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
}
- if (!rxlator_count)
- goto out;
- ret = dict_set_int32 (dict, "count", rxlator_count);
- if (ret)
- goto out;
+ pending_node->node = svc;
+ pending_node->type = GD_NODE_SHD;
+ pending_node->index = 0;
+ cds_list_add_tail(&pending_node->list, selected);
- pending_node = GF_CALLOC (1, sizeof (*pending_node),
- gf_gld_mt_pending_node_t);
+ ret = 0;
+ } else if ((cmd & GF_CLI_STATUS_QUOTAD) != 0) {
+ if (!priv->quotad_svc.online) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_QUOTAD_NOT_RUNNING,
+ "Quotad is not "
+ "running");
+ ret = -1;
+ goto out;
+ }
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
if (!pending_node) {
- ret = -1;
- goto out;
- } else {
- pending_node->node = priv->shd;
- pending_node->type = GD_NODE_SHD;
- list_add_tail (&pending_node->list,
- &opinfo.pending_bricks);
- pending_node = NULL;
+ ret = -1;
+ goto out;
}
+ pending_node->node = &(priv->quotad_svc);
+ pending_node->type = GD_NODE_QUOTAD;
+ pending_node->index = 0;
+ cds_list_add_tail(&pending_node->list, selected);
-out:
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning ret %d", ret);
- return ret;
-
-}
-
-
-static int
-glusterd_bricks_select_rebalance_volume (dict_t *dict, char **op_errstr)
-{
- int ret = -1;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- xlator_t *this = NULL;
- char msg[2048] = {0,};
- glusterd_pending_node_t *pending_node = NULL;
-
- this = THIS;
- GF_ASSERT (this);
+ ret = 0;
+ } else if ((cmd & GF_CLI_STATUS_BITD) != 0) {
+ if (!priv->bitd_svc.online) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BITROT_NOT_RUNNING,
+ "Bitrot is not "
+ "running");
+ ret = -1;
+ goto out;
+ }
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = &(priv->bitd_svc);
+ pending_node->type = GD_NODE_BITD;
+ pending_node->index = 0;
+ cds_list_add_tail(&pending_node->list, selected);
+ ret = 0;
+ } else if ((cmd & GF_CLI_STATUS_SCRUB) != 0) {
+ if (!priv->scrub_svc.online) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SCRUBBER_NOT_RUNNING,
+ "Scrubber is not "
+ "running");
+ ret = -1;
+ goto out;
+ }
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+ pending_node->node = &(priv->scrub_svc);
+ pending_node->type = GD_NODE_SCRUB;
+ pending_node->index = 0;
+ cds_list_add_tail(&pending_node->list, selected);
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "volume name get failed");
- goto out;
+ ret = 0;
+ } else if ((cmd & GF_CLI_STATUS_SNAPD) != 0) {
+ if (!volinfo->snapd.svc.online) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_NOT_RUNNING,
+ "snapd is not "
+ "running");
+ ret = -1;
+ goto out;
+ }
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "failed to allocate "
+ "memory for pending node");
+ ret = -1;
+ goto out;
}
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- snprintf (msg, sizeof (msg), "Volume %s does not exist",
- volname);
+ pending_node->node = (void *)(&volinfo->snapd);
+ pending_node->type = GD_NODE_SNAPD;
+ pending_node->index = 0;
+ cds_list_add_tail(&pending_node->list, selected);
- *op_errstr = gf_strdup (msg);
- gf_log ("", GF_LOG_ERROR, "%s", msg);
- goto out;
- }
- pending_node = GF_CALLOC (1, sizeof (*pending_node),
- gf_gld_mt_pending_node_t);
- if (!pending_node) {
+ ret = 0;
+ } else {
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ brick_index++;
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID) ||
+ !glusterd_is_brick_started(brickinfo)) {
+ continue;
+ }
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
ret = -1;
+ gf_msg(THIS->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Unable to allocate memory");
goto out;
- } else {
- pending_node->node = volinfo;
- pending_node->type = GD_NODE_REBALANCE;
- list_add_tail (&pending_node->list,
- &opinfo.pending_bricks);
- pending_node = NULL;
- }
-
+ }
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ pending_node->index = brick_index;
+ cds_list_add_tail(&pending_node->list, selected);
+ pending_node = NULL;
+ }
+ }
out:
- return ret;
+ return ret;
}
-
-
-
static int
-glusterd_bricks_select_status_volume (dict_t *dict, char **op_errstr)
+glusterd_bricks_select_scrub(dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected)
{
- int ret = -1;
- int cmd = 0;
- int brick_index = -1;
- char *volname = NULL;
- char *brickname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
- glusterd_pending_node_t *pending_node = NULL;
- xlator_t *this = NULL;
- glusterd_conf_t *priv = NULL;
-
- GF_ASSERT (dict);
-
- this = THIS;
- GF_ASSERT (this);
- priv = this->private;
- GF_ASSERT (priv);
-
- ret = dict_get_int32 (dict, "cmd", &cmd);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Unable to get status type");
- goto out;
- }
-
- if (cmd & GF_CLI_STATUS_ALL)
- goto out;
-
- switch (cmd & GF_CLI_STATUS_MASK) {
- case GF_CLI_STATUS_MEM:
- case GF_CLI_STATUS_CLIENTS:
- case GF_CLI_STATUS_INODE:
- case GF_CLI_STATUS_FD:
- case GF_CLI_STATUS_CALLPOOL:
- case GF_CLI_STATUS_NFS:
- case GF_CLI_STATUS_SHD:
- break;
- default:
- goto out;
- }
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "Unable to get volname");
- goto out;
- }
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- goto out;
- }
-
- if ( (cmd & GF_CLI_STATUS_BRICK) != 0) {
- ret = dict_get_str (dict, "brick", &brickname);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to get brick");
- goto out;
- }
- ret = glusterd_volume_brickinfo_get_by_brick (brickname,
- volinfo,
- &brickinfo,
- GF_PATH_COMPLETE);
- if (ret)
- goto out;
-
- if (uuid_compare (brickinfo->uuid, MY_UUID)||
- !glusterd_is_brick_started (brickinfo))
- goto out;
-
- pending_node = GF_CALLOC (1, sizeof (*pending_node),
- gf_gld_mt_pending_node_t);
- if (!pending_node) {
- ret = -1;
- goto out;
- }
- pending_node->node = brickinfo;
- pending_node->type = GD_NODE_BRICK;
- pending_node->index = 0;
- list_add_tail (&pending_node->list, &opinfo.pending_bricks);
-
- ret = 0;
- } else if ((cmd & GF_CLI_STATUS_NFS) != 0) {
- if (!glusterd_nodesvc_is_running ("nfs")) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "NFS server is not running");
- goto out;
- }
- pending_node = GF_CALLOC (1, sizeof (*pending_node),
- gf_gld_mt_pending_node_t);
- if (!pending_node) {
- ret = -1;
- goto out;
- }
- pending_node->node = priv->nfs;
- pending_node->type = GD_NODE_NFS;
- pending_node->index = 0;
- list_add_tail (&pending_node->list, &opinfo.pending_bricks);
-
- ret = 0;
- } else if ((cmd & GF_CLI_STATUS_SHD) != 0) {
- if (!glusterd_nodesvc_is_running ("glustershd")) {
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR,
- "Self-heal daemon is not running");
- goto out;
- }
- pending_node = GF_CALLOC (1, sizeof (*pending_node),
- gf_gld_mt_pending_node_t);
- if (!pending_node) {
- ret = -1;
- goto out;
- }
- pending_node->node = priv->shd;
- pending_node->type = GD_NODE_SHD;
- pending_node->index = 0;
- list_add_tail (&pending_node->list, &opinfo.pending_bricks);
-
- ret = 0;
- } else {
- list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
- brick_index++;
- if (uuid_compare (brickinfo->uuid, MY_UUID) ||
- !glusterd_is_brick_started (brickinfo)) {
- continue;
- }
- pending_node = GF_CALLOC (1, sizeof (*pending_node),
- gf_gld_mt_pending_node_t);
- if (!pending_node) {
- ret = -1;
- gf_log (THIS->name ,GF_LOG_ERROR,
- "Unable to allocate memory");
- goto out;
- }
- pending_node->node = brickinfo;
- pending_node->type = GD_NODE_BRICK;
- pending_node->index = brick_index;
- list_add_tail (&pending_node->list,
- &opinfo.pending_bricks);
- pending_node = NULL;
- }
- }
+ int ret = -1;
+ char *volname = NULL;
+ char msg[2048] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_pending_node_t *pending_node = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT(this);
+ GF_ASSERT(priv);
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get"
+ " volname");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Volume %s does not exist", volname);
+
+ *op_errstr = gf_strdup(msg);
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, "%s",
+ msg);
+ goto out;
+ }
+
+ if (!priv->scrub_svc.online) {
+ ret = 0;
+ snprintf(msg, sizeof(msg), "Scrubber daemon is not running");
+
+ gf_msg_debug(this->name, 0, "%s", msg);
+ goto out;
+ }
+
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
+ }
+
+ pending_node->node = &(priv->scrub_svc);
+ pending_node->type = GD_NODE_SCRUB;
+ cds_list_add_tail(&pending_node->list, selected);
+ pending_node = NULL;
out:
- return ret;
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
-
+/* Select the bricks to send the barrier request to.
+ * This selects the bricks of the given volume which are present on this peer
+ * and are running
+ */
static int
-glusterd_op_ac_send_brick_op (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_bricks_select_barrier(dict_t *dict, struct cds_list_head *selected)
{
- int ret = 0;
- rpc_clnt_procedure_t *proc = NULL;
- glusterd_conf_t *priv = NULL;
- xlator_t *this = NULL;
- glusterd_op_t op = GD_OP_NONE;
- glusterd_req_ctx_t *req_ctx = NULL;
- char *op_errstr = NULL;
-
- this = THIS;
- priv = this->private;
-
- if (ctx) {
- req_ctx = ctx;
- } else {
- req_ctx = GF_CALLOC (1, sizeof (*req_ctx),
- gf_gld_mt_op_allack_ctx_t);
- op = glusterd_op_get_op ();
- req_ctx->op = op;
- uuid_copy (req_ctx->uuid, MY_UUID);
- ret = glusterd_op_build_payload (&req_ctx->dict, &op_errstr);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Building payload failed");
- opinfo.op_errstr = op_errstr;
- goto out;
- }
- }
-
- proc = &priv->gfs_mgmt->proctable[GLUSTERD_BRICK_OP];
- if (proc->fn) {
- ret = proc->fn (NULL, this, req_ctx);
- if (ret)
- goto out;
- }
-
- if (!opinfo.pending_count && !opinfo.brick_pending_count) {
- glusterd_clear_pending_nodes (&opinfo.pending_bricks);
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, req_ctx);
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_pending_node_t *pending_node = NULL;
+
+ GF_ASSERT(dict);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get volname");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Failed to find volume %s", volname);
+ goto out;
+ }
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID) ||
+ !glusterd_is_brick_started(brickinfo)) {
+ continue;
+ }
+ pending_node = GF_CALLOC(1, sizeof(*pending_node),
+ gf_gld_mt_pending_node_t);
+ if (!pending_node) {
+ ret = -1;
+ goto out;
}
+ pending_node->node = brickinfo;
+ pending_node->type = GD_NODE_BRICK;
+ cds_list_add_tail(&pending_node->list, selected);
+ pending_node = NULL;
+ }
out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
-
- return ret;
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
}
-
static int
-glusterd_op_ac_rcvd_brick_op_acc (glusterd_op_sm_event_t *event, void *ctx)
+glusterd_op_ac_send_brick_op(glusterd_op_sm_event_t *event, void *ctx)
{
- int ret = 0;
- glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL;
- char *op_errstr = NULL;
- glusterd_op_t op = GD_OP_NONE;
- gd_node_type type = GD_NODE_NONE;
- dict_t *op_ctx = NULL;
- glusterd_req_ctx_t *req_ctx = NULL;
- void *pending_entry = NULL;
-
- GF_ASSERT (event);
- GF_ASSERT (ctx);
- ev_ctx = ctx;
-
- req_ctx = ev_ctx->commit_ctx;
- GF_ASSERT (req_ctx);
-
- op = req_ctx->op;
- op_ctx = glusterd_op_get_ctx ();
- pending_entry = ev_ctx->pending_node->node;
- type = ev_ctx->pending_node->type;
-
- ret = glusterd_remove_pending_entry (&opinfo.pending_bricks,
- pending_entry);
+ int ret = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ char *op_errstr = NULL;
+ gf_boolean_t free_req_ctx = _gf_false;
+
+ this = THIS;
+ priv = this->private;
+
+ if (ctx) {
+ req_ctx = ctx;
+ } else {
+ req_ctx = GF_CALLOC(1, sizeof(*req_ctx), gf_gld_mt_op_allack_ctx_t);
+ if (!req_ctx)
+ goto out;
+ free_req_ctx = _gf_true;
+ op = glusterd_op_get_op();
+ req_ctx->op = op;
+ gf_uuid_copy(req_ctx->uuid, MY_UUID);
+ ret = glusterd_op_build_payload(&req_ctx->dict, &op_errstr, NULL);
if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "unknown response received ");
- ret = -1;
- goto out;
- }
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL, LOGSTR_BUILD_PAYLOAD,
+ gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+ }
+
+ proc = &priv->gfs_mgmt->proctable[GLUSTERD_BRICK_OP];
+ if (proc->fn) {
+ ret = proc->fn(NULL, this, req_ctx);
+ if (ret)
+ goto out;
+ }
- if (opinfo.brick_pending_count > 0)
- opinfo.brick_pending_count--;
+ if (!opinfo.pending_count && !opinfo.brick_pending_count) {
+ glusterd_clear_pending_nodes(&opinfo.pending_bricks);
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACK, &event->txn_id,
+ req_ctx);
+ }
- glusterd_handle_node_rsp (req_ctx, pending_entry, op, ev_ctx->rsp_dict,
- op_ctx, &op_errstr, type);
+out:
+ if (ret && free_req_ctx)
+ GF_FREE(req_ctx);
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
- if (opinfo.brick_pending_count > 0)
- goto out;
+ return ret;
+}
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_ALL_ACK, ev_ctx->commit_ctx);
+static int
+glusterd_op_ac_rcvd_brick_op_acc(glusterd_op_sm_event_t *event, void *ctx)
+{
+ int ret = -1;
+ glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL;
+ char *op_errstr = NULL;
+ glusterd_op_t op = GD_OP_NONE;
+ gd_node_type type = GD_NODE_NONE;
+ dict_t *op_ctx = NULL;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ void *pending_entry = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, event, out);
+ GF_VALIDATE_OR_GOTO(this->name, ctx, out);
+ ev_ctx = ctx;
+ GF_VALIDATE_OR_GOTO(this->name, ev_ctx, out);
+
+ req_ctx = ev_ctx->commit_ctx;
+ GF_VALIDATE_OR_GOTO(this->name, req_ctx, out);
+
+ op = req_ctx->op;
+ op_ctx = glusterd_op_get_ctx();
+ pending_entry = ev_ctx->pending_node->node;
+ type = ev_ctx->pending_node->type;
+
+ ret = glusterd_remove_pending_entry(&opinfo.pending_bricks, pending_entry);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNKNOWN_RESPONSE,
+ "unknown response received ");
+ ret = -1;
+ goto out;
+ }
+
+ if (opinfo.brick_pending_count > 0)
+ opinfo.brick_pending_count--;
+
+ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ glusterd_handle_node_rsp(req_ctx->dict, pending_entry, op, ev_ctx->rsp_dict,
+ op_ctx, &op_errstr, type);
+
+ if (opinfo.brick_pending_count > 0)
+ goto out;
+
+ ret = glusterd_op_sm_inject_event(GD_OP_EVENT_ALL_ACK, &event->txn_id,
+ ev_ctx->commit_ctx);
out:
- if (ev_ctx->rsp_dict)
- dict_unref (ev_ctx->rsp_dict);
- GF_FREE (ev_ctx);
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
-
- return ret;
+ if (ev_ctx && ev_ctx->rsp_dict)
+ dict_unref(ev_ctx->rsp_dict);
+ GF_FREE(ev_ctx);
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
}
int32_t
-glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr)
+glusterd_op_bricks_select(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected, dict_t *rsp_dict)
{
- int ret = 0;
+ int ret = 0;
- GF_ASSERT (dict);
- GF_ASSERT (op_errstr);
- GF_ASSERT (op > GD_OP_NONE);
- GF_ASSERT (op < GD_OP_MAX);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(op > GD_OP_NONE);
+ GF_ASSERT(op < GD_OP_MAX);
- switch (op) {
+ switch (op) {
case GD_OP_STOP_VOLUME:
- ret = glusterd_bricks_select_stop_volume (dict, op_errstr);
- break;
-
+ ret = glusterd_bricks_select_stop_volume(dict, op_errstr, selected);
+ break;
case GD_OP_REMOVE_BRICK:
- ret = glusterd_bricks_select_remove_brick (dict, op_errstr);
- break;
+ ret = glusterd_bricks_select_remove_brick(dict, op_errstr,
+ selected);
+ break;
case GD_OP_PROFILE_VOLUME:
- ret = glusterd_bricks_select_profile_volume (dict, op_errstr);
- break;
+ ret = glusterd_bricks_select_profile_volume(dict, op_errstr,
+ selected);
+ break;
case GD_OP_HEAL_VOLUME:
- ret = glusterd_bricks_select_heal_volume (dict, op_errstr);
- break;
+ ret = glusterd_bricks_select_heal_volume(dict, op_errstr, selected,
+ rsp_dict);
+ break;
case GD_OP_STATUS_VOLUME:
- ret = glusterd_bricks_select_status_volume (dict, op_errstr);
- break;
-
+ ret = glusterd_bricks_select_status_volume(dict, op_errstr,
+ selected);
+ break;
case GD_OP_DEFRAG_BRICK_VOLUME:
- ret = glusterd_bricks_select_rebalance_volume (dict, op_errstr);
- break;
+ ret = glusterd_bricks_select_rebalance_volume(dict, op_errstr,
+ selected);
+ break;
+
+ case GD_OP_BARRIER:
+ ret = glusterd_bricks_select_barrier(dict, selected);
+ break;
+ case GD_OP_SNAP:
+ ret = glusterd_bricks_select_snap(dict, op_errstr, selected);
+ break;
+ case GD_OP_SCRUB_STATUS:
+ case GD_OP_SCRUB_ONDEMAND:
+ ret = glusterd_bricks_select_scrub(dict, op_errstr, selected);
+ break;
default:
- break;
- }
+ break;
+ }
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
- return ret;
+ return ret;
}
-glusterd_op_sm_t glusterd_op_state_default [] = {
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_NONE
- {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_send_lock},//EVENT_START_LOCK
- {GD_OP_STATE_LOCKED, glusterd_op_ac_lock}, //EVENT_LOCK
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_RCVD_ACC
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_ALL_ACC
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_STAGE_ACC
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_RCVD_RJT
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_STAGE_OP
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_START_UNLOCK
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_ALL_ACK
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, //EVENT_MAX
+glusterd_op_sm_t glusterd_op_state_default[] = {
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_NONE
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_send_lock}, // EVENT_START_LOCK
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_lock}, // EVENT_LOCK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_RCVD_ACC
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_ALL_ACC
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_STAGE_ACC
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_COMMIT_ACC
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_RCVD_RJT
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_STAGE_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_START_UNLOCK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_none}, // EVENT_MAX
};
-glusterd_op_sm_t glusterd_op_state_lock_sent [] = {
- {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_NONE
- {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none},//EVENT_START_LOCK
- {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_lock}, //EVENT_LOCK
- {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_rcvd_lock_acc}, //EVENT_RCVD_ACC
- {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_send_stage_op}, //EVENT_ALL_ACC
- {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC
- {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_send_unlock_drain}, //EVENT_RCVD_RJT
- {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP
- {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
- {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK
- {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
- {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, //EVENT_MAX
+glusterd_op_sm_t glusterd_op_state_lock_sent[] = {
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_NONE
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_START_LOCK
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_lock}, // EVENT_LOCK
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_rcvd_lock_acc}, // EVENT_RCVD_ACC
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_send_stage_op}, // EVENT_ALL_ACC
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_STAGE_ACC
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_ACC
+ {GD_OP_STATE_ACK_DRAIN,
+ glusterd_op_ac_send_unlock_drain}, // EVENT_RCVD_RJT
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_STAGE_OP
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_ALL_ACK
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_LOCK_SENT, glusterd_op_ac_none}, // EVENT_MAX
};
-glusterd_op_sm_t glusterd_op_state_locked [] = {
- {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_NONE
- {GD_OP_STATE_LOCKED, glusterd_op_ac_none},//EVENT_START_LOCK
- {GD_OP_STATE_LOCKED, glusterd_op_ac_lock}, //EVENT_LOCK
- {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_RCVD_ACC
- {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_ALL_ACC
- {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
- {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
- {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_RCVD_RJT
- {GD_OP_STATE_STAGED, glusterd_op_ac_stage_op}, //EVENT_STAGE_OP
- {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
- {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_START_UNLOCK
- {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_ALL_ACK
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP
- {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, //EVENT_MAX
+glusterd_op_sm_t glusterd_op_state_locked[] = {
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_NONE
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_START_LOCK
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_lock}, // EVENT_LOCK
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_RCVD_ACC
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_ALL_ACC
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_STAGE_ACC
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_COMMIT_ACC
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_RCVD_RJT
+ {GD_OP_STATE_STAGED, glusterd_op_ac_stage_op}, // EVENT_STAGE_OP
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_START_UNLOCK
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT,
+ glusterd_op_ac_local_unlock}, // EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_LOCKED, glusterd_op_ac_none}, // EVENT_MAX
};
-glusterd_op_sm_t glusterd_op_state_stage_op_sent [] = {
- {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_NONE
- {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none},//EVENT_START_LOCK
- {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_lock}, //EVENT_LOCK
- {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_rcvd_stage_op_acc}, //EVENT_RCVD_ACC
- {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_send_brick_op}, //EVENT_ALL_ACC
- {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_send_brick_op}, //EVENT_STAGE_ACC
- {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
- {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_stage_op_failed}, //EVENT_RCVD_RJT
- {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP
- {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
- {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK
- {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
- {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, //EVENT_MAX
+glusterd_op_sm_t glusterd_op_state_stage_op_sent[] = {
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, // EVENT_NONE
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, // EVENT_START_LOCK
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_lock}, // EVENT_LOCK
+ {GD_OP_STATE_STAGE_OP_SENT,
+ glusterd_op_ac_rcvd_stage_op_acc}, // EVENT_RCVD_ACC
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_send_brick_op}, // EVENT_ALL_ACC
+ {GD_OP_STATE_BRICK_OP_SENT,
+ glusterd_op_ac_send_brick_op}, // EVENT_STAGE_ACC
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_ACC
+ {GD_OP_STATE_STAGE_OP_FAILED,
+ glusterd_op_ac_stage_op_failed}, // EVENT_RCVD_RJT
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, // EVENT_STAGE_OP
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, // EVENT_ALL_ACK
+ {GD_OP_STATE_STAGE_OP_SENT,
+ glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_STAGE_OP_SENT, glusterd_op_ac_none}, // EVENT_MAX
};
-glusterd_op_sm_t glusterd_op_state_stage_op_failed [] = {
- {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_NONE
- {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK
- {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_lock}, //EVENT_LOCK
- {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_stage_op_failed}, //EVENT_RCVD_ACC
- {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC
- {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
- {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
- {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_stage_op_failed}, //EVENT_RCVD_RJT
- {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_OP
- {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK
- {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
- {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, //EVENT_MAX
+glusterd_op_sm_t glusterd_op_state_stage_op_failed[] = {
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, // EVENT_NONE
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, // EVENT_START_LOCK
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_lock}, // EVENT_LOCK
+ {GD_OP_STATE_STAGE_OP_FAILED,
+ glusterd_op_ac_stage_op_failed}, // EVENT_RCVD_ACC
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, // EVENT_ALL_ACC
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, // EVENT_STAGE_ACC
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, // EVENT_COMMIT_ACC
+ {GD_OP_STATE_STAGE_OP_FAILED,
+ glusterd_op_ac_stage_op_failed}, // EVENT_RCVD_RJT
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, // EVENT_STAGE_OP
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, // EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, // EVENT_ALL_ACK
+ {GD_OP_STATE_STAGE_OP_FAILED,
+ glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_STAGE_OP_FAILED, glusterd_op_ac_none}, // EVENT_MAX
};
-glusterd_op_sm_t glusterd_op_state_staged [] = {
- {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_NONE
- {GD_OP_STATE_STAGED, glusterd_op_ac_none},//EVENT_START_LOCK
- {GD_OP_STATE_STAGED, glusterd_op_ac_lock}, //EVENT_LOCK
- {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_RCVD_ACC
- {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_ALL_ACC
- {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
- {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
- {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_RCVD_RJT
- {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_STAGE_OP
- {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_send_brick_op}, //EVENT_COMMIT_OP
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
- {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_START_UNLOCK
- {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_ALL_ACK
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP
- {GD_OP_STATE_STAGED, glusterd_op_ac_none}, //EVENT_MAX
+glusterd_op_sm_t glusterd_op_state_staged[] = {
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_NONE
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_START_LOCK
+ {GD_OP_STATE_STAGED, glusterd_op_ac_lock}, // EVENT_LOCK
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_RCVD_ACC
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_ALL_ACC
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_STAGE_ACC
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_COMMIT_ACC
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_RCVD_RJT
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_STAGE_OP
+ {GD_OP_STATE_BRICK_COMMITTED,
+ glusterd_op_ac_send_brick_op}, // EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_START_UNLOCK
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT,
+ glusterd_op_ac_local_unlock}, // EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_STAGED, glusterd_op_ac_none}, // EVENT_MAX
};
-glusterd_op_sm_t glusterd_op_state_brick_op_sent [] = {
- {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_NONE
- {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none},//EVENT_START_LOCK
- {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_lock}, //EVENT_LOCK
- {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_rcvd_brick_op_acc}, //EVENT_RCVD_ACC
- {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACC
- {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC
- {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
- {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT
- {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_BRICK_OP
- {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
- {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_send_commit_op}, //EVENT_ALL_ACK
- {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
- {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, //EVENT_MAX
+glusterd_op_sm_t glusterd_op_state_brick_op_sent[] = {
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, // EVENT_NONE
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, // EVENT_START_LOCK
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_lock}, // EVENT_LOCK
+ {GD_OP_STATE_BRICK_OP_SENT,
+ glusterd_op_ac_rcvd_brick_op_acc}, // EVENT_RCVD_ACC
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, // EVENT_ALL_ACC
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, // EVENT_STAGE_ACC
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_ACC
+ {GD_OP_STATE_BRICK_OP_FAILED,
+ glusterd_op_ac_brick_op_failed}, // EVENT_RCVD_RJT
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, // EVENT_BRICK_OP
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK
+ {GD_OP_STATE_COMMIT_OP_SENT,
+ glusterd_op_ac_send_commit_op}, // EVENT_ALL_ACK
+ {GD_OP_STATE_BRICK_OP_SENT,
+ glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_BRICK_OP_SENT, glusterd_op_ac_none}, // EVENT_MAX
};
-glusterd_op_sm_t glusterd_op_state_brick_op_failed [] = {
- {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_NONE
- {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK
- {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_lock}, //EVENT_LOCK
- {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_ACC
- {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC
- {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
- {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
- {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT
- {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_BRICK_OP
- {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK
- {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
- {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, //EVENT_MAX
+glusterd_op_sm_t glusterd_op_state_brick_op_failed[] = {
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, // EVENT_NONE
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, // EVENT_START_LOCK
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_lock}, // EVENT_LOCK
+ {GD_OP_STATE_BRICK_OP_FAILED,
+ glusterd_op_ac_brick_op_failed}, // EVENT_RCVD_ACC
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, // EVENT_ALL_ACC
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, // EVENT_STAGE_ACC
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, // EVENT_COMMIT_ACC
+ {GD_OP_STATE_BRICK_OP_FAILED,
+ glusterd_op_ac_brick_op_failed}, // EVENT_RCVD_RJT
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, // EVENT_BRICK_OP
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, // EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, // EVENT_ALL_ACK
+ {GD_OP_STATE_BRICK_OP_FAILED,
+ glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_BRICK_OP_FAILED, glusterd_op_ac_none}, // EVENT_MAX
};
-glusterd_op_sm_t glusterd_op_state_brick_committed [] = {
- {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_NONE
- {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none},//EVENT_START_LOCK
- {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_lock}, //EVENT_LOCK
- {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_rcvd_brick_op_acc}, //EVENT_RCVD_ACC
- {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_ALL_ACC
- {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
- {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
- {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT
- {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_STAGE_OP
- {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
- {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_START_UNLOCK
- {GD_OP_STATE_COMMITED, glusterd_op_ac_commit_op}, //EVENT_ALL_ACK
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP
- {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, //EVENT_MAX
+glusterd_op_sm_t glusterd_op_state_brick_committed[] = {
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_NONE
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_START_LOCK
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_lock}, // EVENT_LOCK
+ {GD_OP_STATE_BRICK_COMMITTED,
+ glusterd_op_ac_rcvd_brick_op_acc}, // EVENT_RCVD_ACC
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_ALL_ACC
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_STAGE_ACC
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_COMMIT_ACC
+ {GD_OP_STATE_BRICK_COMMIT_FAILED,
+ glusterd_op_ac_brick_op_failed}, // EVENT_RCVD_RJT
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_STAGE_OP
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_START_UNLOCK
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_commit_op}, // EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT,
+ glusterd_op_ac_local_unlock}, // EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_BRICK_COMMITTED, glusterd_op_ac_none}, // EVENT_MAX
};
-glusterd_op_sm_t glusterd_op_state_brick_commit_failed [] = {
- {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_NONE
- {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK
- {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_lock}, //EVENT_LOCK
- {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_ACC
- {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC
- {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
- {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
- {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_brick_op_failed}, //EVENT_RCVD_RJT
- {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_OP
- {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
- {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_START_UNLOCK
- {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_send_commit_failed}, //EVENT_ALL_ACK
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP
- {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, //EVENT_MAX
+glusterd_op_sm_t glusterd_op_state_brick_commit_failed[] = {
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, // EVENT_NONE
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, // EVENT_START_LOCK
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_lock}, // EVENT_LOCK
+ {GD_OP_STATE_BRICK_COMMIT_FAILED,
+ glusterd_op_ac_brick_op_failed}, // EVENT_RCVD_ACC
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, // EVENT_ALL_ACC
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, // EVENT_STAGE_ACC
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, // EVENT_COMMIT_ACC
+ {GD_OP_STATE_BRICK_COMMIT_FAILED,
+ glusterd_op_ac_brick_op_failed}, // EVENT_RCVD_RJT
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, // EVENT_STAGE_OP
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, // EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK
+ {GD_OP_STATE_BRICK_COMMIT_FAILED,
+ glusterd_op_ac_none}, // EVENT_START_UNLOCK
+ {GD_OP_STATE_BRICK_COMMIT_FAILED,
+ glusterd_op_ac_send_commit_failed}, // EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT,
+ glusterd_op_ac_local_unlock}, // EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_BRICK_COMMIT_FAILED, glusterd_op_ac_none}, // EVENT_MAX
};
-glusterd_op_sm_t glusterd_op_state_commit_op_failed [] = {
- {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_NONE
- {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none},//EVENT_START_LOCK
- {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_lock}, //EVENT_LOCK
- {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_commit_op_failed}, //EVENT_RCVD_ACC
- {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_ALL_ACC
- {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
- {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
- {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_commit_op_failed}, //EVENT_RCVD_RJT
- {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_STAGE_OP
- {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK
- {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
- {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, //EVENT_MAX
+glusterd_op_sm_t glusterd_op_state_commit_op_failed[] = {
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, // EVENT_NONE
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, // EVENT_START_LOCK
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_lock}, // EVENT_LOCK
+ {GD_OP_STATE_COMMIT_OP_FAILED,
+ glusterd_op_ac_commit_op_failed}, // EVENT_RCVD_ACC
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, // EVENT_ALL_ACC
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, // EVENT_STAGE_ACC
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, // EVENT_COMMIT_ACC
+ {GD_OP_STATE_COMMIT_OP_FAILED,
+ glusterd_op_ac_commit_op_failed}, // EVENT_RCVD_RJT
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, // EVENT_STAGE_OP
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, // EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, // EVENT_ALL_ACK
+ {GD_OP_STATE_COMMIT_OP_FAILED,
+ glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_none}, // EVENT_MAX
};
-glusterd_op_sm_t glusterd_op_state_commit_op_sent [] = {
- {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_NONE
- {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none},//EVENT_START_LOCK
- {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_lock}, //EVENT_LOCK
- {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_rcvd_commit_op_acc}, //EVENT_RCVD_ACC
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACC
- {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_COMMIT_ACC
- {GD_OP_STATE_COMMIT_OP_FAILED, glusterd_op_ac_commit_op_failed}, //EVENT_RCVD_RJT
- {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP
- {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
- {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK
- {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
- {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, //EVENT_MAX
+glusterd_op_sm_t glusterd_op_state_commit_op_sent[] = {
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, // EVENT_NONE
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, // EVENT_START_LOCK
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_lock}, // EVENT_LOCK
+ {GD_OP_STATE_COMMIT_OP_SENT,
+ glusterd_op_ac_rcvd_commit_op_acc}, // EVENT_RCVD_ACC
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, // EVENT_ALL_ACC
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, // EVENT_STAGE_ACC
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, // EVENT_COMMIT_ACC
+ {GD_OP_STATE_COMMIT_OP_FAILED,
+ glusterd_op_ac_commit_op_failed}, // EVENT_RCVD_RJT
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, // EVENT_STAGE_OP
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, // EVENT_ALL_ACK
+ {GD_OP_STATE_COMMIT_OP_SENT,
+ glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_COMMIT_OP_SENT, glusterd_op_ac_none}, // EVENT_MAX
};
-glusterd_op_sm_t glusterd_op_state_committed [] = {
- {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_NONE
- {GD_OP_STATE_COMMITED, glusterd_op_ac_none},//EVENT_START_LOCK
- {GD_OP_STATE_COMMITED, glusterd_op_ac_lock}, //EVENT_LOCK
- {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_RCVD_ACC
- {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_ALL_ACC
- {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_STAGE_ACC
- {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
- {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_RCVD_RJT
- {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_STAGE_OP
- {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_COMMIT_OP
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
- {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_START_UNLOCK
- {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_ALL_ACK
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_local_unlock}, //EVENT_LOCAL_UNLOCK_NO_RESP
- {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, //EVENT_MAX
+glusterd_op_sm_t glusterd_op_state_committed[] = {
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_NONE
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_START_LOCK
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_lock}, // EVENT_LOCK
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_RCVD_ACC
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_ALL_ACC
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_STAGE_ACC
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_COMMIT_ACC
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_RCVD_RJT
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_STAGE_OP
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_START_UNLOCK
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_ALL_ACK
+ {GD_OP_STATE_DEFAULT,
+ glusterd_op_ac_local_unlock}, // EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_COMMITED, glusterd_op_ac_none}, // EVENT_MAX
};
-glusterd_op_sm_t glusterd_op_state_unlock_sent [] = {
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_NONE
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none},//EVENT_START_LOCK
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_lock}, //EVENT_LOCK
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_rcvd_unlock_acc}, //EVENT_RCVD_ACC
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlocked_all}, //EVENT_ALL_ACC
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_ACC
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_rcvd_unlock_acc}, //EVENT_RCVD_RJT
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_STAGE_OP
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_COMMIT_OP
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_ALL_ACK
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, //EVENT_MAX
+glusterd_op_sm_t glusterd_op_state_unlock_sent[] = {
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, // EVENT_NONE
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, // EVENT_START_LOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_lock}, // EVENT_LOCK
+ {GD_OP_STATE_UNLOCK_SENT,
+ glusterd_op_ac_rcvd_unlock_acc}, // EVENT_RCVD_ACC
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlocked_all}, // EVENT_ALL_ACC
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, // EVENT_STAGE_ACC
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_ACC
+ {GD_OP_STATE_UNLOCK_SENT,
+ glusterd_op_ac_rcvd_unlock_acc}, // EVENT_RCVD_RJT
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, // EVENT_STAGE_OP
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, // EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, // EVENT_ALL_ACK
+ {GD_OP_STATE_UNLOCK_SENT,
+ glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_none}, // EVENT_MAX
};
-glusterd_op_sm_t glusterd_op_state_ack_drain [] = {
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_NONE
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none},//EVENT_START_LOCK
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_lock}, //EVENT_LOCK
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_send_unlock_drain}, //EVENT_RCVD_ACC
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_ALL_ACC
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_STAGE_ACC
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_COMMIT_ACC
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_send_unlock_drain}, //EVENT_RCVD_RJT
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_STAGE_OP
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_COMMIT_OP
- {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, //EVENT_UNLOCK
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_START_UNLOCK
- {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, //EVENT_ALL_ACK
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_LOCAL_UNLOCK_NO_RESP
- {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, //EVENT_MAX
+glusterd_op_sm_t glusterd_op_state_ack_drain[] = {
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_NONE
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_LOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_lock}, // EVENT_LOCK
+ {GD_OP_STATE_ACK_DRAIN,
+ glusterd_op_ac_send_unlock_drain}, // EVENT_RCVD_ACC
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_ALL_ACC
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_STAGE_ACC
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_COMMIT_ACC
+ {GD_OP_STATE_ACK_DRAIN,
+ glusterd_op_ac_send_unlock_drain}, // EVENT_RCVD_RJT
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_STAGE_OP
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_COMMIT_OP
+ {GD_OP_STATE_DEFAULT, glusterd_op_ac_unlock}, // EVENT_UNLOCK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_START_UNLOCK
+ {GD_OP_STATE_UNLOCK_SENT, glusterd_op_ac_send_unlock}, // EVENT_ALL_ACK
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_LOCAL_UNLOCK_NO_RESP
+ {GD_OP_STATE_ACK_DRAIN, glusterd_op_ac_none}, // EVENT_MAX
};
-glusterd_op_sm_t *glusterd_op_state_table [] = {
- glusterd_op_state_default,
- glusterd_op_state_lock_sent,
- glusterd_op_state_locked,
- glusterd_op_state_stage_op_sent,
- glusterd_op_state_staged,
- glusterd_op_state_commit_op_sent,
- glusterd_op_state_committed,
- glusterd_op_state_unlock_sent,
- glusterd_op_state_stage_op_failed,
- glusterd_op_state_commit_op_failed,
- glusterd_op_state_brick_op_sent,
- glusterd_op_state_brick_op_failed,
- glusterd_op_state_brick_committed,
- glusterd_op_state_brick_commit_failed,
- glusterd_op_state_ack_drain
-};
+glusterd_op_sm_t *glusterd_op_state_table[] = {
+ glusterd_op_state_default, glusterd_op_state_lock_sent,
+ glusterd_op_state_locked, glusterd_op_state_stage_op_sent,
+ glusterd_op_state_staged, glusterd_op_state_commit_op_sent,
+ glusterd_op_state_committed, glusterd_op_state_unlock_sent,
+ glusterd_op_state_stage_op_failed, glusterd_op_state_commit_op_failed,
+ glusterd_op_state_brick_op_sent, glusterd_op_state_brick_op_failed,
+ glusterd_op_state_brick_committed, glusterd_op_state_brick_commit_failed,
+ glusterd_op_state_ack_drain};
int
-glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type,
- glusterd_op_sm_event_t **new_event)
+glusterd_op_sm_new_event(glusterd_op_sm_event_type_t event_type,
+ glusterd_op_sm_event_t **new_event)
{
- glusterd_op_sm_event_t *event = NULL;
+ glusterd_op_sm_event_t *event = NULL;
- GF_ASSERT (new_event);
- GF_ASSERT (GD_OP_EVENT_NONE <= event_type &&
- GD_OP_EVENT_MAX > event_type);
+ GF_ASSERT(new_event);
+ GF_ASSERT(GD_OP_EVENT_NONE <= event_type && GD_OP_EVENT_MAX > event_type);
- event = GF_CALLOC (1, sizeof (*event), gf_gld_mt_op_sm_event_t);
+ event = GF_CALLOC(1, sizeof(*event), gf_gld_mt_op_sm_event_t);
- if (!event)
- return -1;
+ if (!event)
+ return -1;
- *new_event = event;
- event->event = event_type;
- INIT_LIST_HEAD (&event->list);
+ *new_event = event;
+ event->event = event_type;
+ CDS_INIT_LIST_HEAD(&event->list);
- return 0;
+ return 0;
}
int
-glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type,
- void *ctx)
+glusterd_op_sm_inject_event(glusterd_op_sm_event_type_t event_type,
+ uuid_t *txn_id, void *ctx)
{
- int32_t ret = -1;
- glusterd_op_sm_event_t *event = NULL;
+ int32_t ret = -1;
+ glusterd_op_sm_event_t *event = NULL;
- GF_ASSERT (event_type < GD_OP_EVENT_MAX &&
- event_type >= GD_OP_EVENT_NONE);
+ GF_ASSERT(event_type < GD_OP_EVENT_MAX && event_type >= GD_OP_EVENT_NONE);
- ret = glusterd_op_sm_new_event (event_type, &event);
+ ret = glusterd_op_sm_new_event(event_type, &event);
- if (ret)
- goto out;
+ if (ret)
+ goto out;
+
+ event->ctx = ctx;
- event->ctx = ctx;
+ if (txn_id)
+ gf_uuid_copy(event->txn_id, *txn_id);
- gf_log ("glusterd", GF_LOG_DEBUG, "Enqueue event: '%s'",
- glusterd_op_sm_event_name_get (event->event));
- list_add_tail (&event->list, &gd_op_sm_queue);
+ gf_msg_debug(THIS->name, 0, "Enqueue event: '%s'",
+ glusterd_op_sm_event_name_get(event->event));
+ cds_list_add_tail(&event->list, &gd_op_sm_queue);
out:
- return ret;
+ return ret;
}
void
-glusterd_destroy_req_ctx (glusterd_req_ctx_t *ctx)
+glusterd_destroy_req_ctx(glusterd_req_ctx_t *ctx)
{
- if (!ctx)
- return;
- if (ctx->dict)
- dict_unref (ctx->dict);
- GF_FREE (ctx);
+ if (!ctx)
+ return;
+ if (ctx->dict)
+ dict_unref(ctx->dict);
+ GF_FREE(ctx);
}
void
-glusterd_destroy_local_unlock_ctx (uuid_t *ctx)
+glusterd_destroy_local_unlock_ctx(uuid_t *ctx)
{
- if (!ctx)
- return;
- GF_FREE (ctx);
+ if (!ctx)
+ return;
+ GF_FREE(ctx);
}
void
-glusterd_destroy_op_event_ctx (glusterd_op_sm_event_t *event)
+glusterd_destroy_op_event_ctx(glusterd_op_sm_event_t *event)
{
- if (!event)
- return;
+ if (!event)
+ return;
- switch (event->event) {
+ switch (event->event) {
case GD_OP_EVENT_LOCK:
case GD_OP_EVENT_UNLOCK:
- glusterd_destroy_lock_ctx (event->ctx);
- break;
+ glusterd_destroy_lock_ctx(event->ctx);
+ break;
case GD_OP_EVENT_STAGE_OP:
case GD_OP_EVENT_ALL_ACK:
- glusterd_destroy_req_ctx (event->ctx);
- break;
+ glusterd_destroy_req_ctx(event->ctx);
+ break;
case GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP:
- glusterd_destroy_local_unlock_ctx (event->ctx);
- break;
+ glusterd_destroy_local_unlock_ctx(event->ctx);
+ break;
default:
- break;
- }
+ break;
+ }
}
int
-glusterd_op_sm ()
+glusterd_op_sm()
{
- glusterd_op_sm_event_t *event = NULL;
- glusterd_op_sm_event_t *tmp = NULL;
- int ret = -1;
- int lock_err = 0;
- glusterd_op_sm_ac_fn handler = NULL;
- glusterd_op_sm_t *state = NULL;
- glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
-
- if ((lock_err = pthread_mutex_trylock (&gd_op_sm_lock))) {
- gf_log (THIS->name, GF_LOG_DEBUG, "lock failed due to %s",
- strerror (lock_err));
- goto lock_failed;
- }
-
- while (!list_empty (&gd_op_sm_queue)) {
-
- list_for_each_entry_safe (event, tmp, &gd_op_sm_queue, list) {
-
- list_del_init (&event->list);
- event_type = event->event;
- gf_log ("", GF_LOG_DEBUG, "Dequeued event of type: '%s'",
- glusterd_op_sm_event_name_get(event_type));
-
- state = glusterd_op_state_table[opinfo.state.state];
-
- GF_ASSERT (state);
-
- handler = state[event_type].handler;
- GF_ASSERT (handler);
-
- ret = handler (event, event->ctx);
-
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "handler returned: %d", ret);
- glusterd_destroy_op_event_ctx (event);
- GF_FREE (event);
- continue;
- }
-
- ret = glusterd_op_sm_transition_state (&opinfo, state,
- event_type);
-
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "Unable to transition"
- "state from '%s' to '%s'",
- glusterd_op_sm_state_name_get(opinfo.state.state),
- glusterd_op_sm_state_name_get(state[event_type].next_state));
- (void ) pthread_mutex_unlock (&gd_op_sm_lock);
- return ret;
- }
-
- glusterd_destroy_op_event_ctx (event);
- GF_FREE (event);
+ glusterd_op_sm_event_t *event = NULL;
+ glusterd_op_sm_event_t *tmp = NULL;
+ int ret = -1;
+ int lock_err = 0;
+ glusterd_op_sm_ac_fn handler = NULL;
+ glusterd_op_sm_t *state = NULL;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ xlator_t *this = NULL;
+ glusterd_op_info_t txn_op_info;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = synclock_trylock(&gd_op_sm_lock);
+ if (ret) {
+ lock_err = errno;
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_LOCK_FAIL,
+ "lock failed due to %s", strerror(lock_err));
+ goto lock_failed;
+ }
+
+ while (!cds_list_empty(&gd_op_sm_queue)) {
+ cds_list_for_each_entry_safe(event, tmp, &gd_op_sm_queue, list)
+ {
+ cds_list_del_init(&event->list);
+ event_type = event->event;
+ gf_msg_debug(this->name, 0,
+ "Dequeued event of "
+ "type: '%s'",
+ glusterd_op_sm_event_name_get(event_type));
+
+ gf_msg_debug(this->name, 0, "transaction ID = %s",
+ uuid_utoa(event->txn_id));
+
+ ret = glusterd_get_txn_opinfo(&event->txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg_callingfn(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_GET_FAIL,
+ "Unable to get transaction "
+ "opinfo for transaction ID :"
+ "%s",
+ uuid_utoa(event->txn_id));
+ glusterd_destroy_op_event_ctx(event);
+ GF_FREE(event);
+ continue;
+ } else
+ opinfo = txn_op_info;
+
+ state = glusterd_op_state_table[opinfo.state.state];
+
+ GF_ASSERT(state);
+
+ handler = state[event_type].handler;
+ GF_ASSERT(handler);
+
+ ret = handler(event, event->ctx);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HANDLER_RETURNED,
+ "handler returned: %d", ret);
+ glusterd_destroy_op_event_ctx(event);
+ GF_FREE(event);
+ continue;
+ }
+
+ ret = glusterd_op_sm_transition_state(&opinfo, state, event_type);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_STATE_TRANSITION_FAIL,
+ "Unable to transition"
+ "state from '%s' to '%s'",
+ glusterd_op_sm_state_name_get(opinfo.state.state),
+ glusterd_op_sm_state_name_get(
+ state[event_type].next_state));
+ (void)synclock_unlock(&gd_op_sm_lock);
+ return ret;
+ }
+
+ if ((state[event_type].next_state == GD_OP_STATE_DEFAULT) &&
+ (event_type == GD_OP_EVENT_UNLOCK)) {
+ /* Clearing the transaction opinfo */
+ ret = glusterd_clear_txn_opinfo(&event->txn_id);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_CLEAR_FAIL,
+ "Unable to clear "
+ "transaction's opinfo");
+ } else {
+ if ((priv->op_version < GD_OP_VERSION_6_0) ||
+ !(event_type == GD_OP_EVENT_STAGE_OP &&
+ opinfo.state.state == GD_OP_STATE_STAGED &&
+ opinfo.skip_locking)) {
+ ret = glusterd_set_txn_opinfo(&event->txn_id, &opinfo);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
}
- }
+ }
+ glusterd_destroy_op_event_ctx(event);
+ GF_FREE(event);
+ }
+ }
- (void ) pthread_mutex_unlock (&gd_op_sm_lock);
- ret = 0;
+ (void)synclock_unlock(&gd_op_sm_lock);
+ ret = 0;
lock_failed:
- return ret;
+ return ret;
}
int32_t
-glusterd_op_set_op (glusterd_op_t op)
+glusterd_op_set_op(glusterd_op_t op)
{
+ GF_ASSERT(op < GD_OP_MAX);
+ GF_ASSERT(op > GD_OP_NONE);
- GF_ASSERT (op < GD_OP_MAX);
- GF_ASSERT (op > GD_OP_NONE);
-
- opinfo.op = op;
-
- return 0;
+ opinfo.op = op;
+ return 0;
}
int32_t
-glusterd_op_get_op ()
+glusterd_op_get_op()
{
-
- return opinfo.op;
-
+ return opinfo.op;
}
int32_t
-glusterd_op_set_req (rpcsvc_request_t *req)
+glusterd_op_set_req(rpcsvc_request_t *req)
{
-
- GF_ASSERT (req);
- opinfo.req = req;
- return 0;
+ GF_ASSERT(req);
+ opinfo.req = req;
+ return 0;
}
int32_t
-glusterd_op_clear_op (glusterd_op_t op)
+glusterd_op_clear_op(glusterd_op_t op)
{
+ opinfo.op = GD_OP_NONE;
- opinfo.op = GD_OP_NONE;
-
- return 0;
-
+ return 0;
}
int32_t
-glusterd_op_init_ctx (glusterd_op_t op)
+glusterd_op_free_ctx(glusterd_op_t op, void *ctx)
{
- int ret = 0;
- dict_t *dict = NULL;
-
- GF_ASSERT (GD_OP_NONE < op && op < GD_OP_MAX);
-
- if (_gf_false == glusterd_need_brick_op (op)) {
- gf_log ("", GF_LOG_DEBUG, "Received op: %d, returning", op);
- goto out;
- }
- dict = dict_new ();
- if (dict == NULL) {
- ret = -1;
- goto out;
- }
- ret = glusterd_op_set_ctx (dict);
- if (ret)
- goto out;
-out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
-}
-
-
-
-int32_t
-glusterd_op_fini_ctx ()
-{
- dict_t *dict = NULL;
-
- dict = glusterd_op_get_ctx ();
- if (dict)
- dict_unref (dict);
-
- glusterd_op_reset_ctx ();
- return 0;
-}
-
-
-
-int32_t
-glusterd_op_free_ctx (glusterd_op_t op, void *ctx)
-{
-
- if (ctx) {
- switch (op) {
- case GD_OP_CREATE_VOLUME:
- case GD_OP_DELETE_VOLUME:
- case GD_OP_STOP_VOLUME:
- case GD_OP_ADD_BRICK:
- case GD_OP_REMOVE_BRICK:
- case GD_OP_REPLACE_BRICK:
- case GD_OP_LOG_ROTATE:
- case GD_OP_SYNC_VOLUME:
- case GD_OP_SET_VOLUME:
- case GD_OP_START_VOLUME:
- case GD_OP_RESET_VOLUME:
- case GD_OP_GSYNC_SET:
- case GD_OP_QUOTA:
- case GD_OP_PROFILE_VOLUME:
- case GD_OP_STATUS_VOLUME:
- case GD_OP_REBALANCE:
- case GD_OP_HEAL_VOLUME:
- case GD_OP_STATEDUMP_VOLUME:
- case GD_OP_CLEARLOCKS_VOLUME:
- case GD_OP_DEFRAG_BRICK_VOLUME:
- dict_unref (ctx);
- break;
- default:
- GF_ASSERT (0);
- break;
- }
+ if (ctx) {
+ switch (op) {
+ case GD_OP_CREATE_VOLUME:
+ case GD_OP_DELETE_VOLUME:
+ case GD_OP_STOP_VOLUME:
+ case GD_OP_ADD_BRICK:
+ case GD_OP_REMOVE_BRICK:
+ case GD_OP_REPLACE_BRICK:
+ case GD_OP_LOG_ROTATE:
+ case GD_OP_SYNC_VOLUME:
+ case GD_OP_SET_VOLUME:
+ case GD_OP_START_VOLUME:
+ case GD_OP_RESET_VOLUME:
+ case GD_OP_GSYNC_SET:
+ case GD_OP_QUOTA:
+ case GD_OP_PROFILE_VOLUME:
+ case GD_OP_STATUS_VOLUME:
+ case GD_OP_REBALANCE:
+ case GD_OP_HEAL_VOLUME:
+ case GD_OP_STATEDUMP_VOLUME:
+ case GD_OP_CLEARLOCKS_VOLUME:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ case GD_OP_MAX_OPVERSION:
+ dict_unref(ctx);
+ break;
+ default:
+ GF_ASSERT(0);
+ break;
}
+ }
- glusterd_op_reset_ctx ();
- return 0;
-
+ glusterd_op_reset_ctx();
+ return 0;
}
void *
-glusterd_op_get_ctx ()
+glusterd_op_get_ctx()
{
-
- return opinfo.op_ctx;
-
+ return opinfo.op_ctx;
}
int
-glusterd_op_sm_init ()
+glusterd_op_sm_init()
{
- INIT_LIST_HEAD (&gd_op_sm_queue);
- pthread_mutex_init (&gd_op_sm_lock, NULL);
- return 0;
+ CDS_INIT_LIST_HEAD(&gd_op_sm_queue);
+ synclock_init(&gd_op_sm_lock, SYNC_LOCK_DEFAULT);
+ return 0;
}
-
diff --git a/xlators/mgmt/glusterd/src/glusterd-op-sm.h b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
index 7316b6fbbb7..8a24b16612a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-op-sm.h
+++ b/xlators/mgmt/glusterd/src/glusterd-op-sm.h
@@ -1,276 +1,313 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _GLUSTERD_OP_SM_H_
#define _GLUSTERD_OP_SM_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
-#ifndef GSYNC_CONF
-#define GSYNC_CONF GEOREP"/gsyncd.conf"
-#endif
-
#include <pthread.h>
-#include "uuid.h"
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "call-stub.h"
-#include "fd.h"
-#include "byte-order.h"
+#include <glusterfs/compat-uuid.h>
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/byte-order.h>
#include "glusterd.h"
#include "protocol-common.h"
+#include "glusterd-hooks.h"
-#define GD_VOLUME_NAME_MAX 256
+#define GD_OP_PROTECTED (0x02)
+#define GD_OP_UNPROTECTED (0x04)
typedef enum glusterd_op_sm_state_ {
- GD_OP_STATE_DEFAULT = 0,
- GD_OP_STATE_LOCK_SENT,
- GD_OP_STATE_LOCKED,
- GD_OP_STATE_STAGE_OP_SENT,
- GD_OP_STATE_STAGED,
- GD_OP_STATE_COMMIT_OP_SENT,
- GD_OP_STATE_COMMITED,
- GD_OP_STATE_UNLOCK_SENT,
- GD_OP_STATE_STAGE_OP_FAILED,
- GD_OP_STATE_COMMIT_OP_FAILED,
- GD_OP_STATE_BRICK_OP_SENT,
- GD_OP_STATE_BRICK_OP_FAILED,
- GD_OP_STATE_BRICK_COMMITTED,
- GD_OP_STATE_BRICK_COMMIT_FAILED,
- GD_OP_STATE_ACK_DRAIN,
- GD_OP_STATE_MAX,
+ GD_OP_STATE_DEFAULT = 0,
+ GD_OP_STATE_LOCK_SENT,
+ GD_OP_STATE_LOCKED,
+ GD_OP_STATE_STAGE_OP_SENT,
+ GD_OP_STATE_STAGED,
+ GD_OP_STATE_COMMIT_OP_SENT,
+ GD_OP_STATE_COMMITED,
+ GD_OP_STATE_UNLOCK_SENT,
+ GD_OP_STATE_STAGE_OP_FAILED,
+ GD_OP_STATE_COMMIT_OP_FAILED,
+ GD_OP_STATE_BRICK_OP_SENT,
+ GD_OP_STATE_BRICK_OP_FAILED,
+ GD_OP_STATE_BRICK_COMMITTED,
+ GD_OP_STATE_BRICK_COMMIT_FAILED,
+ GD_OP_STATE_ACK_DRAIN,
+ GD_OP_STATE_MAX,
} glusterd_op_sm_state_t;
typedef enum glusterd_op_sm_event_type_ {
- GD_OP_EVENT_NONE = 0,
- GD_OP_EVENT_START_LOCK,
- GD_OP_EVENT_LOCK,
- GD_OP_EVENT_RCVD_ACC,
- GD_OP_EVENT_ALL_ACC,
- GD_OP_EVENT_STAGE_ACC,
- GD_OP_EVENT_COMMIT_ACC,
- GD_OP_EVENT_RCVD_RJT,
- GD_OP_EVENT_STAGE_OP,
- GD_OP_EVENT_COMMIT_OP,
- GD_OP_EVENT_UNLOCK,
- GD_OP_EVENT_START_UNLOCK,
- GD_OP_EVENT_ALL_ACK,
- GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP,
- GD_OP_EVENT_MAX
+ GD_OP_EVENT_NONE = 0,
+ GD_OP_EVENT_START_LOCK,
+ GD_OP_EVENT_LOCK,
+ GD_OP_EVENT_RCVD_ACC,
+ GD_OP_EVENT_ALL_ACC,
+ GD_OP_EVENT_STAGE_ACC,
+ GD_OP_EVENT_COMMIT_ACC,
+ GD_OP_EVENT_RCVD_RJT,
+ GD_OP_EVENT_STAGE_OP,
+ GD_OP_EVENT_COMMIT_OP,
+ GD_OP_EVENT_UNLOCK,
+ GD_OP_EVENT_START_UNLOCK,
+ GD_OP_EVENT_ALL_ACK,
+ GD_OP_EVENT_LOCAL_UNLOCK_NO_RESP,
+ GD_OP_EVENT_MAX
} glusterd_op_sm_event_type_t;
-
struct glusterd_op_sm_event_ {
- struct list_head list;
- void *ctx;
- glusterd_op_sm_event_type_t event;
+ struct cds_list_head list;
+ void *ctx;
+ glusterd_op_sm_event_type_t event;
+ uuid_t txn_id;
};
typedef struct glusterd_op_sm_event_ glusterd_op_sm_event_t;
-typedef int (*glusterd_op_sm_ac_fn) (glusterd_op_sm_event_t *, void *);
+typedef int (*glusterd_op_sm_ac_fn)(glusterd_op_sm_event_t *, void *);
typedef struct glusterd_op_sm_ {
- glusterd_op_sm_state_t next_state;
- glusterd_op_sm_ac_fn handler;
+ glusterd_op_sm_state_t next_state;
+ glusterd_op_sm_ac_fn handler;
} glusterd_op_sm_t;
typedef struct glusterd_op_sm_state_info_ {
- glusterd_op_sm_state_t state;
- struct timeval time;
+ glusterd_op_sm_state_t state;
+ struct timeval time;
} glusterd_op_sm_state_info_t;
struct glusterd_op_info_ {
- glusterd_op_sm_state_info_t state;
- int32_t pending_count;
- int32_t brick_pending_count;
- int32_t op_count;
- glusterd_op_t op;
- struct list_head op_peers;
- void *op_ctx;
- rpcsvc_request_t *req;
- int32_t op_ret;
- int32_t op_errno;
- char *op_errstr;
- struct list_head pending_bricks;
+ glusterd_op_sm_state_info_t state;
+ int32_t pending_count;
+ int32_t brick_pending_count;
+ int32_t op_count;
+ /* op is an enum, glusterd_op_t or glusterd_op_sm_state_info_t */
+ int op;
+ struct cds_list_head op_peers;
+ void *op_ctx;
+ rpcsvc_request_t *req;
+ int32_t op_ret;
+ int32_t op_errno;
+ char *op_errstr;
+ struct cds_list_head pending_bricks;
+ uint32_t txn_generation;
+ gf_boolean_t skip_locking;
};
typedef struct glusterd_op_info_ glusterd_op_info_t;
struct glusterd_op_log_filename_ctx_ {
- char volume_name[GD_VOLUME_NAME_MAX];
- char brick[GD_VOLUME_NAME_MAX];
- char path[PATH_MAX];
+ char volume_name[GD_VOLUME_NAME_MAX];
+ char brick[GD_VOLUME_NAME_MAX];
+ char path[PATH_MAX];
};
typedef struct glusterd_op_log_filename_ctx_ glusterd_op_log_filename_ctx_t;
struct glusterd_op_lock_ctx_ {
- uuid_t uuid;
- rpcsvc_request_t *req;
+ uuid_t uuid;
+ dict_t *dict;
+ rpcsvc_request_t *req;
};
typedef struct glusterd_op_lock_ctx_ glusterd_op_lock_ctx_t;
struct glusterd_req_ctx_ {
- rpcsvc_request_t *req;
- u_char uuid[16];
- int op;
- dict_t *dict;
+ rpcsvc_request_t *req;
+ u_char uuid[16];
+ int op;
+ dict_t *dict;
};
typedef struct glusterd_req_ctx_ glusterd_req_ctx_t;
typedef struct glusterd_op_brick_rsp_ctx_ {
- int op_ret;
- char *op_errstr;
- dict_t *rsp_dict;
- glusterd_req_ctx_t *commit_ctx;
- glusterd_pending_node_t *pending_node;
+ int op_ret;
+ char *op_errstr;
+ dict_t *rsp_dict;
+ glusterd_req_ctx_t *commit_ctx;
+ glusterd_pending_node_t *pending_node;
} glusterd_op_brick_rsp_ctx_t;
typedef struct glusterd_pr_brick_rsp_conv_t {
- int count;
- dict_t *dict;
+ int count;
+ dict_t *dict;
} glusterd_pr_brick_rsp_conv_t;
typedef struct glusterd_heal_rsp_conv_ {
- dict_t *dict;
- glusterd_volinfo_t *volinfo;
- xlator_t *this;
+ dict_t *dict;
+ glusterd_volinfo_t *volinfo;
+ xlator_t *this;
} glusterd_heal_rsp_conv_t;
typedef struct glusterd_status_rsp_conv_ {
- int count;
- int brick_index_max;
- int other_count;
- dict_t *dict;
+ int count;
+ int brick_index_max;
+ int other_count;
+ dict_t *dict;
} glusterd_status_rsp_conv_t;
-typedef struct glusterd_gsync_status_temp {
- dict_t *rsp_dict;
- glusterd_volinfo_t *volinfo;
-}glusterd_gsync_status_temp_t;
+typedef struct glusterd_txn_opinfo_object_ {
+ glusterd_op_info_t opinfo;
+} glusterd_txn_opinfo_obj;
+
+typedef enum cli_cmd_type_ {
+ PER_HEAL_XL,
+ ALL_HEAL_XL,
+} cli_cmd_type;
+
+typedef struct glusterd_all_volume_options {
+ char *option;
+ char *dflt_val;
+} glusterd_all_vol_opts;
+
int
-glusterd_op_sm_new_event (glusterd_op_sm_event_type_t event_type,
- glusterd_op_sm_event_t **new_event);
+glusterd_op_commit_hook(glusterd_op_t op, dict_t *op_ctx,
+ glusterd_commit_hook_type_t type);
+
+int
+glusterd_op_sm_new_event(glusterd_op_sm_event_type_t event_type,
+ glusterd_op_sm_event_t **new_event);
int
-glusterd_op_sm_inject_event (glusterd_op_sm_event_type_t event_type,
- void *ctx);
+glusterd_op_sm_inject_event(glusterd_op_sm_event_type_t event_type,
+ uuid_t *txn_id, void *ctx);
int
-glusterd_op_sm_init ();
+glusterd_op_sm_init();
int
-glusterd_op_sm ();
+glusterd_op_sm();
int32_t
-glusterd_op_set_ctx (void *ctx);
+glusterd_op_set_ctx(void *ctx);
int32_t
-glusterd_op_set_op (glusterd_op_t op);
+glusterd_op_set_op(glusterd_op_t op);
int
-glusterd_op_build_payload (dict_t **req, char **op_errstr);
+glusterd_op_build_payload(dict_t **req, char **op_errstr, dict_t *op_ctx);
+
+int32_t
+glusterd_op_stage_validate(glusterd_op_t op, dict_t *req, char **op_errstr,
+ dict_t *rsp_dict);
+
+int32_t
+glusterd_op_commit_perform(glusterd_op_t op, dict_t *req, char **op_errstr,
+ dict_t *dict);
int32_t
-glusterd_op_stage_validate (glusterd_op_t op, dict_t *req, char **op_errstr,
- dict_t *rsp_dict);
+glusterd_op_txn_begin(rpcsvc_request_t *req, glusterd_op_t op, void *ctx,
+ char *err_str, size_t err_len);
int32_t
-glusterd_op_commit_perform (glusterd_op_t op, dict_t *req, char **op_errstr,
- dict_t* dict);
+glusterd_op_txn_complete();
void *
-glusterd_op_get_ctx ();
+glusterd_op_get_ctx();
int32_t
-glusterd_op_set_req (rpcsvc_request_t *req);
+glusterd_op_set_req(rpcsvc_request_t *req);
int32_t
-glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,
- int32_t op_errno, rpcsvc_request_t *req,
- void *ctx, char *op_errstr);
+glusterd_op_send_cli_response(glusterd_op_t op, int32_t op_ret,
+ int32_t op_errno, rpcsvc_request_t *req,
+ void *ctx, char *op_errstr);
int32_t
-glusterd_op_get_op ();
+glusterd_op_get_op();
int32_t
-glusterd_op_clear_op ();
+glusterd_op_clear_op();
int32_t
-glusterd_op_free_ctx (glusterd_op_t op, void *ctx);
+glusterd_op_free_ctx(glusterd_op_t op, void *ctx);
int
glusterd_check_option_exists(char *optstring, char **completion);
int
-set_xlator_option (dict_t *dict, char *key, char *value);
+set_xlator_option(dict_t *dict, char *key, char *value);
-void
-glusterd_do_replace_brick (void *data);
-
-char*
-glusterd_op_sm_state_name_get (int state);
+char *
+glusterd_op_sm_state_name_get(int state);
-char*
-glusterd_op_sm_event_name_get (int event);
+char *
+glusterd_op_sm_event_name_get(int event);
int32_t
-glusterd_op_bricks_select (glusterd_op_t op, dict_t *dict, char **op_errstr);
+glusterd_op_bricks_select(glusterd_op_t op, dict_t *dict, char **op_errstr,
+ struct cds_list_head *selected, dict_t *rsp_dict);
int
-glusterd_brick_op_build_payload (glusterd_op_t op, glusterd_brickinfo_t *brickinfo,
- gd1_mgmt_brick_op_req **req, dict_t *dict);
+glusterd_brick_op_build_payload(glusterd_op_t op,
+ glusterd_brickinfo_t *brickinfo,
+ gd1_mgmt_brick_op_req **req, dict_t *dict);
int
-glusterd_node_op_build_payload (glusterd_op_t op, gd1_mgmt_brick_op_req **req,
+glusterd_node_op_build_payload(glusterd_op_t op, gd1_mgmt_brick_op_req **req,
dict_t *dict);
int32_t
-glusterd_handle_brick_rsp (void *pending_entry, glusterd_op_t op,
- dict_t *rsp_dict, dict_t *ctx_dict, char **op_errstr,
- gd_node_type type);
-int32_t
-glusterd_op_init_ctx (glusterd_op_t op);
-int32_t
-glusterd_op_fini_ctx ();
+glusterd_handle_brick_rsp(void *pending_entry, glusterd_op_t op,
+ dict_t *rsp_dict, dict_t *ctx_dict, char **op_errstr,
+ gd_node_type type);
+
+dict_t *
+glusterd_op_init_commit_rsp_dict(glusterd_op_t op);
+
+void
+glusterd_op_modify_op_ctx(glusterd_op_t op, void *op_ctx);
+
+int
+glusterd_set_detach_bricks(dict_t *dict, glusterd_volinfo_t *volinfo);
+
int32_t
-glusterd_volume_stats_read_perf (char *brick_path, int32_t blk_size,
- int32_t blk_count, double *throughput, double *time);
+glusterd_volume_stats_read_perf(char *brick_path, int32_t blk_size,
+ int32_t blk_count, double *throughput,
+ double *time);
int32_t
-glusterd_volume_stats_write_perf (char *brick_path, int32_t blk_size,
- int32_t blk_count, double *throughput, double *time);
+glusterd_volume_stats_write_perf(char *brick_path, int32_t blk_size,
+ int32_t blk_count, double *throughput,
+ double *time);
gf_boolean_t
-glusterd_is_volume_started (glusterd_volinfo_t *volinfo);
+glusterd_is_volume_started(glusterd_volinfo_t *volinfo);
+
int
-glusterd_start_bricks (glusterd_volinfo_t *volinfo);
+glusterd_start_bricks(glusterd_volinfo_t *volinfo);
+
gf_boolean_t
-glusterd_are_all_volumes_stopped ();
+glusterd_are_all_volumes_stopped();
+int
+glusterd_stop_bricks(glusterd_volinfo_t *volinfo);
int
-glusterd_stop_bricks (glusterd_volinfo_t *volinfo);
+glusterd_defrag_volume_node_rsp(dict_t *req_dict, dict_t *rsp_dict,
+ dict_t *op_ctx);
+
+int32_t
+glusterd_get_txn_opinfo(uuid_t *txn_id, glusterd_op_info_t *opinfo);
+
+int32_t
+glusterd_set_txn_opinfo(uuid_t *txn_id, glusterd_op_info_t *opinfo);
+
+int32_t
+glusterd_clear_txn_opinfo(uuid_t *txn_id);
+
+int32_t
+glusterd_generate_txn_id(dict_t *dict, uuid_t **txn_id);
+
+void
+glusterd_set_opinfo(char *errstr, int32_t op_errno, int32_t op_ret);
+
+int
+glusterd_dict_set_volid(dict_t *dict, char *volname, char **op_errstr);
+
int
-gsync_status (char *master, char *slave, int *status);
+glusterd_op_stats_volume(dict_t *dict, char **op_errstr, dict_t *rsp_dict);
int
-glusterd_check_gsync_running (glusterd_volinfo_t *volinfo, gf_boolean_t *flag);
+glusterd_op_stage_stats_volume(dict_t *dict, char **op_errstr);
int
-glusterd_defrag_volume_node_rsp (dict_t *req_dict, dict_t *rsp_dict,
- dict_t *op_ctx);
+gd_set_commit_hash(dict_t *dict);
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.c b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
new file mode 100644
index 00000000000..18d355cb186
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.c
@@ -0,0 +1,1058 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterd-peer-utils.h"
+#include "glusterd-store.h"
+#include "glusterd-server-quorum.h"
+#include "glusterd-messages.h"
+#include <glusterfs/common-utils.h>
+#include "glusterd-utils.h"
+
+void
+glusterd_peerinfo_destroy(struct rcu_head *head)
+{
+ int32_t ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_peer_hostname_t *hostname = NULL;
+ glusterd_peer_hostname_t *tmp = NULL;
+
+ /* This works as rcu_head is the first member of gd_rcu_head */
+ peerinfo = caa_container_of((gd_rcu_head *)head, glusterd_peerinfo_t,
+ rcu_head);
+
+ /* Set THIS to the saved this. Needed by some functions below */
+ THIS = peerinfo->rcu_head.this;
+
+ CDS_INIT_LIST_HEAD(&peerinfo->uuid_list);
+
+ ret = glusterd_store_delete_peerinfo(peerinfo);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_PEERINFO_DELETE_FAIL,
+ "Deleting peer info failed");
+ }
+
+ GF_FREE(peerinfo->hostname);
+ peerinfo->hostname = NULL;
+
+ cds_list_for_each_entry_safe(hostname, tmp, &peerinfo->hostnames,
+ hostname_list)
+ {
+ glusterd_peer_hostname_free(hostname);
+ }
+
+ glusterd_sm_tr_log_delete(&peerinfo->sm_log);
+ pthread_mutex_unlock(&peerinfo->delete_lock);
+ pthread_mutex_destroy(&peerinfo->delete_lock);
+ GF_FREE(peerinfo);
+
+ peerinfo = NULL;
+
+ return;
+}
+
+int32_t
+glusterd_peerinfo_cleanup(glusterd_peerinfo_t *peerinfo)
+{
+ GF_ASSERT(peerinfo);
+ gf_boolean_t quorum_action = _gf_false;
+ glusterd_conf_t *priv = THIS->private;
+
+ if (pthread_mutex_trylock(&peerinfo->delete_lock)) {
+ /* Someone else is already deleting the peer, so give up */
+ return 0;
+ }
+
+ if (peerinfo->quorum_contrib != QUORUM_NONE)
+ quorum_action = _gf_true;
+ if (peerinfo->rpc) {
+ peerinfo->rpc = glusterd_rpc_clnt_unref(priv, peerinfo->rpc);
+ peerinfo->rpc = NULL;
+ }
+
+ cds_list_del_rcu(&peerinfo->uuid_list);
+ /* Saving THIS, as it is needed by the callback function */
+ peerinfo->rcu_head.this = THIS;
+ call_rcu(&peerinfo->rcu_head.head, glusterd_peerinfo_destroy);
+
+ if (quorum_action)
+ /* coverity[SLEEP] */
+ glusterd_do_quorum_action();
+ return 0;
+}
+
+/* gd_peerinfo_find_from_hostname iterates over all the addresses saved for each
+ * peer and matches it to @hoststr.
+ * Returns the matched peer if found else returns NULL
+ */
+static glusterd_peerinfo_t *
+gd_peerinfo_find_from_hostname(const char *hoststr)
+{
+ xlator_t *this = THIS;
+ glusterd_conf_t *priv = NULL;
+ glusterd_peerinfo_t *peer = NULL;
+ glusterd_peerinfo_t *found = NULL;
+ glusterd_peer_hostname_t *tmphost = NULL;
+
+ GF_ASSERT(this != NULL);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, (priv != NULL), out);
+
+ GF_VALIDATE_OR_GOTO(this->name, (hoststr != NULL), out);
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peer, &priv->peers, uuid_list)
+ {
+ cds_list_for_each_entry_rcu(tmphost, &peer->hostnames, hostname_list)
+ {
+ if (!strncasecmp(tmphost->hostname, hoststr, 1024)) {
+ gf_msg_debug(this->name, 0, "Friend %s found.. state: %d",
+ tmphost->hostname, peer->state.state);
+ found = peer; /* Probably needs to be
+ dereferenced*/
+ goto unlock;
+ }
+ }
+ }
+unlock:
+ RCU_READ_UNLOCK;
+out:
+ return found;
+}
+
+/* gd_peerinfo_find_from_addrinfo iterates over all the addresses saved for each
+ * peer, resolves them and compares them to @addr.
+ *
+ *
+ * NOTE: As getaddrinfo is a blocking call and is being performed multiple times
+ * in this function, it could lead to the calling thread to be blocked for
+ * significant amounts of time.
+ *
+ * Returns the matched peer if found else returns NULL
+ */
+static glusterd_peerinfo_t *
+gd_peerinfo_find_from_addrinfo(const struct addrinfo *addr)
+{
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+ glusterd_peerinfo_t *peer = NULL;
+ glusterd_peerinfo_t *found = NULL;
+ glusterd_peer_hostname_t *address = NULL;
+ int ret = 0;
+ struct addrinfo *paddr = NULL;
+ struct addrinfo *tmp = NULL;
+
+ GF_ASSERT(this != NULL);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out);
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peer, &conf->peers, uuid_list)
+ {
+ cds_list_for_each_entry_rcu(address, &peer->hostnames, hostname_list)
+ {
+ /* TODO: Cache the resolved addrinfos to improve
+ * performance
+ */
+ ret = getaddrinfo(address->hostname, NULL, NULL, &paddr);
+ if (ret) {
+ /* Don't fail if getaddrinfo fails, continue
+ * onto the next address
+ */
+ gf_msg_trace(this->name, 0, "getaddrinfo for %s failed (%s)",
+ address->hostname, gai_strerror(ret));
+ continue;
+ }
+
+ for (tmp = paddr; tmp != NULL; tmp = tmp->ai_next) {
+ if (gf_compare_sockaddr(addr->ai_addr, tmp->ai_addr)) {
+ found = peer; /* (de)referenced? */
+ break;
+ }
+ }
+
+ freeaddrinfo(paddr);
+ if (found)
+ goto unlock;
+ }
+ }
+unlock:
+ RCU_READ_UNLOCK;
+out:
+ return found;
+}
+
+/* glusterd_peerinfo_find_by_hostname searches for a peer which matches the
+ * hostname @hoststr and if found returns the pointer to peerinfo object.
+ * Returns NULL otherwise.
+ *
+ * It first attempts a quick search by string matching @hoststr. If that fails,
+ * it'll attempt a more thorough match by resolving the addresses and matching
+ * the resolved addrinfos.
+ */
+glusterd_peerinfo_t *
+glusterd_peerinfo_find_by_hostname(const char *hoststr)
+{
+ int ret = -1;
+ struct addrinfo *addr = NULL;
+ struct addrinfo *p = NULL;
+ xlator_t *this = THIS;
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ GF_ASSERT(hoststr);
+
+ peerinfo = gd_peerinfo_find_from_hostname(hoststr);
+ if (peerinfo)
+ return peerinfo;
+
+ ret = getaddrinfo(hoststr, NULL, NULL, &addr);
+ if (ret != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, ret, GD_MSG_GETADDRINFO_FAIL,
+ "error in getaddrinfo: %s\n", gai_strerror(ret));
+ goto out;
+ }
+
+ for (p = addr; p != NULL; p = p->ai_next) {
+ peerinfo = gd_peerinfo_find_from_addrinfo(p);
+ if (peerinfo) {
+ freeaddrinfo(addr);
+ return peerinfo;
+ }
+ }
+
+out:
+ gf_msg_debug(this->name, 0, "Unable to find friend: %s", hoststr);
+ if (addr)
+ freeaddrinfo(addr);
+ return NULL;
+}
+
+int
+glusterd_hostname_to_uuid(char *hostname, uuid_t uuid)
+{
+ GF_ASSERT(hostname);
+ GF_ASSERT(uuid);
+
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ peerinfo = glusterd_peerinfo_find_by_hostname(hostname);
+ if (peerinfo) {
+ ret = 0;
+ gf_uuid_copy(uuid, peerinfo->uuid);
+ } else {
+ if (gf_is_local_addr(hostname)) {
+ gf_uuid_copy(uuid, MY_UUID);
+ ret = 0;
+ } else {
+ ret = -1;
+ }
+ }
+
+ gf_msg_debug(this->name, 0, "returning %d", ret);
+ return ret;
+}
+
+/* glusterd_peerinfo_find_by_uuid searches for a peer which matches the
+ * uuid @uuid and if found returns the pointer to peerinfo object.
+ * Returns NULL otherwise.
+ */
+glusterd_peerinfo_t *
+glusterd_peerinfo_find_by_uuid(uuid_t uuid)
+{
+ glusterd_conf_t *priv = NULL;
+ glusterd_peerinfo_t *entry = NULL;
+ glusterd_peerinfo_t *found = NULL;
+ xlator_t *this = THIS;
+ glusterd_friend_sm_state_t state;
+
+ GF_ASSERT(this);
+
+ if (gf_uuid_is_null(uuid))
+ return NULL;
+
+ priv = this->private;
+
+ GF_ASSERT(priv);
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(entry, &priv->peers, uuid_list)
+ {
+ if (!gf_uuid_compare(entry->uuid, uuid)) {
+ found = entry; /* Probably should be rcu_dereferenced */
+ state = found->state.state;
+ break;
+ }
+ }
+ RCU_READ_UNLOCK;
+
+ if (found)
+ gf_msg_debug(this->name, 0, "Friend found... state: %s",
+ glusterd_friend_sm_state_name_get(state));
+ else
+ gf_msg_debug(this->name, 0, "Friend with uuid: %s, not found",
+ uuid_utoa(uuid));
+ return found;
+}
+
+/* glusterd_peerinfo_find will search for a peer matching either @uuid or
+ * @hostname and return a pointer to the peerinfo object
+ * Returns NULL otherwise.
+ */
+glusterd_peerinfo_t *
+glusterd_peerinfo_find(uuid_t uuid, const char *hostname)
+{
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = THIS;
+
+ GF_ASSERT(this);
+
+ if (uuid) {
+ peerinfo = glusterd_peerinfo_find_by_uuid(uuid);
+
+ if (peerinfo) {
+ return peerinfo;
+ } else {
+ gf_msg_debug(this->name, 0, "Unable to find peer by uuid: %s",
+ uuid_utoa(uuid));
+ }
+ }
+
+ if (hostname) {
+ peerinfo = glusterd_peerinfo_find_by_hostname(hostname);
+
+ if (peerinfo) {
+ return peerinfo;
+ } else {
+ gf_msg_debug(this->name, 0, "Unable to find hostname: %s",
+ hostname);
+ }
+ }
+ return NULL;
+}
+
+/* glusterd_peerinfo_new will create a new peerinfo object and set it's members
+ * values using the passed parameters.
+ * @hostname is added as the first entry in peerinfo->hostnames list and also
+ * set to peerinfo->hostname.
+ * It returns a pointer to peerinfo object if successful and returns NULL
+ * otherwise. The caller should take care of freeing the created peerinfo
+ * object.
+ */
+glusterd_peerinfo_t *
+glusterd_peerinfo_new(glusterd_friend_sm_state_t state, uuid_t *uuid,
+ const char *hostname, int port)
+{
+ glusterd_peerinfo_t *new_peer = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ new_peer = GF_CALLOC(1, sizeof(*new_peer), gf_gld_mt_peerinfo_t);
+ if (!new_peer) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ CDS_INIT_LIST_HEAD(&new_peer->uuid_list);
+
+ new_peer->state.state = state;
+
+ CDS_INIT_LIST_HEAD(&new_peer->hostnames);
+ if (hostname) {
+ ret = gd_add_address_to_peer(new_peer, hostname);
+ if (ret)
+ goto out;
+ /* Also set it to peerinfo->hostname. Doing this as we use
+ * peerinfo->hostname in a lot of places and is really hard to
+ * get everything right
+ */
+ new_peer->hostname = gf_strdup(hostname);
+ }
+
+ if (uuid) {
+ gf_uuid_copy(new_peer->uuid, *uuid);
+ }
+
+ ret = glusterd_sm_tr_log_init(
+ &new_peer->sm_log, glusterd_friend_sm_state_name_get,
+ glusterd_friend_sm_event_name_get, GLUSTERD_TR_LOG_SIZE);
+ if (ret)
+ goto out;
+
+ if (new_peer->state.state == GD_FRIEND_STATE_BEFRIENDED)
+ new_peer->quorum_contrib = QUORUM_WAITING;
+ new_peer->port = port;
+
+ pthread_mutex_init(&new_peer->delete_lock, NULL);
+
+ new_peer->generation = uatomic_add_return(&conf->generation, 1);
+out:
+ if (ret && new_peer) {
+ glusterd_peerinfo_cleanup(new_peer);
+ new_peer = NULL;
+ }
+ return new_peer;
+}
+
+/* Check if the all peers are connected and befriended, except the peer
+ * specified (the peer being detached)
+ */
+gf_boolean_t
+glusterd_chk_peers_connected_befriended(uuid_t skip_uuid)
+{
+ gf_boolean_t ret = _gf_true;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT(priv);
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
+ {
+ if (!gf_uuid_is_null(skip_uuid) &&
+ !gf_uuid_compare(skip_uuid, peerinfo->uuid))
+ continue;
+
+ if ((GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state) ||
+ !(peerinfo->connected)) {
+ ret = _gf_false;
+ break;
+ }
+ }
+ RCU_READ_UNLOCK;
+
+ gf_msg_debug(THIS->name, 0, "Returning %s", (ret ? "TRUE" : "FALSE"));
+ return ret;
+}
+
+/* Return hostname for given uuid if it exists
+ * else return NULL
+ */
+char *
+glusterd_uuid_to_hostname(uuid_t uuid)
+{
+ char *hostname = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_peerinfo_t *entry = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT(priv);
+
+ if (!gf_uuid_compare(MY_UUID, uuid)) {
+ hostname = gf_strdup("localhost");
+ return hostname;
+ }
+ RCU_READ_LOCK;
+ if (!cds_list_empty(&priv->peers)) {
+ cds_list_for_each_entry_rcu(entry, &priv->peers, uuid_list)
+ {
+ if (!gf_uuid_compare(entry->uuid, uuid)) {
+ hostname = gf_strdup(entry->hostname);
+ break;
+ }
+ }
+ }
+ RCU_READ_UNLOCK;
+
+ return hostname;
+}
+
+char *
+gd_peer_uuid_str(glusterd_peerinfo_t *peerinfo)
+{
+ if ((peerinfo == NULL) || gf_uuid_is_null(peerinfo->uuid))
+ return NULL;
+
+ if (peerinfo->uuid_str[0] == '\0')
+ uuid_utoa_r(peerinfo->uuid, peerinfo->uuid_str);
+
+ return peerinfo->uuid_str;
+}
+
+gf_boolean_t
+glusterd_are_all_peers_up()
+{
+ glusterd_peerinfo_t *peerinfo = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t peers_up = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
+ {
+ if (!peerinfo->connected) {
+ RCU_READ_UNLOCK;
+ goto out;
+ }
+ }
+ RCU_READ_UNLOCK;
+
+ peers_up = _gf_true;
+
+out:
+ return peers_up;
+}
+
+gf_boolean_t
+glusterd_are_vol_all_peers_up(glusterd_volinfo_t *volinfo,
+ struct cds_list_head *peers, char **down_peerstr)
+{
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ gf_boolean_t ret = _gf_false;
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (!gf_uuid_compare(brickinfo->uuid, MY_UUID))
+ continue;
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, peers, uuid_list)
+ {
+ if (gf_uuid_compare(peerinfo->uuid, brickinfo->uuid))
+ continue;
+
+ /*Found peer who owns the brick, return false
+ * if peer is not connected or not friend */
+ if (!(peerinfo->connected) ||
+ (peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)) {
+ *down_peerstr = gf_strdup(peerinfo->hostname);
+ RCU_READ_UNLOCK;
+ gf_msg_debug(THIS->name, 0, "Peer %s is down. ", *down_peerstr);
+ goto out;
+ }
+ }
+ RCU_READ_UNLOCK;
+ }
+
+ ret = _gf_true;
+out:
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_peer_hostname_new(const char *hostname,
+ glusterd_peer_hostname_t **name)
+{
+ glusterd_peer_hostname_t *peer_hostname = NULL;
+ int32_t ret = -1;
+
+ GF_ASSERT(hostname);
+ GF_ASSERT(name);
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ peer_hostname = GF_CALLOC(1, sizeof(*peer_hostname),
+ gf_gld_mt_peer_hostname_t);
+
+ if (!peer_hostname) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ peer_hostname->hostname = gf_strdup(hostname);
+ CDS_INIT_LIST_HEAD(&peer_hostname->hostname_list);
+
+ *name = peer_hostname;
+ ret = 0;
+
+out:
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+void
+glusterd_peer_hostname_free(glusterd_peer_hostname_t *name)
+{
+ if (!name)
+ return;
+
+ cds_list_del_init(&name->hostname_list);
+
+ GF_FREE(name->hostname);
+ name->hostname = NULL;
+
+ GF_FREE(name);
+
+ return;
+}
+
+gf_boolean_t
+gd_peer_has_address(glusterd_peerinfo_t *peerinfo, const char *address)
+{
+ glusterd_peer_hostname_t *hostname = NULL;
+
+ GF_VALIDATE_OR_GOTO("glusterd", (peerinfo != NULL), out);
+ GF_VALIDATE_OR_GOTO("glusterd", (address != NULL), out);
+
+ cds_list_for_each_entry(hostname, &peerinfo->hostnames, hostname_list)
+ {
+ if (strcmp(hostname->hostname, address) == 0) {
+ return _gf_true;
+ }
+ }
+
+out:
+ return _gf_false;
+}
+
+int
+gd_add_address_to_peer(glusterd_peerinfo_t *peerinfo, const char *address)
+{
+ int ret = -1;
+ glusterd_peer_hostname_t *hostname = NULL;
+
+ GF_VALIDATE_OR_GOTO("glusterd", (peerinfo != NULL), out);
+ GF_VALIDATE_OR_GOTO("glusterd", (address != NULL), out);
+
+ if (gd_peer_has_address(peerinfo, address)) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_peer_hostname_new(address, &hostname);
+ if (ret)
+ goto out;
+
+ cds_list_add_tail_rcu(&hostname->hostname_list, &peerinfo->hostnames);
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* gd_add_friend_to_dict() adds details of @friend into @dict with the given
+ * @prefix. All the parameters are compulsory.
+ *
+ * The complete address list is added to the dict only if the cluster op-version
+ * is >= GD_OP_VERSION_3_6_0
+ */
+int
+gd_add_friend_to_dict(glusterd_peerinfo_t *friend, dict_t *dict,
+ const char *prefix)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char key[100] = {
+ 0,
+ };
+ glusterd_peer_hostname_t *address = NULL;
+ int count = 0;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", (this != NULL), out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO(this->name, (friend != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out);
+
+ snprintf(key, sizeof(key), "%s.uuid", prefix);
+ ret = dict_set_dynstr_with_alloc(dict, key, uuid_utoa(friend->uuid));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set key %s in dict", key);
+ goto out;
+ }
+
+ /* Setting the first hostname from the list with this key for backward
+ * compatibility
+ */
+ snprintf(key, sizeof(key), "%s.hostname", prefix);
+ address = cds_list_entry(&friend->hostnames, glusterd_peer_hostname_t,
+ hostname_list);
+ ret = dict_set_dynstr_with_alloc(dict, key, address->hostname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set key %s in dict", key);
+ goto out;
+ }
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ address = NULL;
+ count = 0;
+ cds_list_for_each_entry(address, &friend->hostnames, hostname_list)
+ {
+ GF_VALIDATE_OR_GOTO(this->name, (address != NULL), out);
+
+ snprintf(key, sizeof(key), "%s.hostname%d", prefix, count);
+ ret = dict_set_dynstr_with_alloc(dict, key, address->hostname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set key %s in dict", key);
+ goto out;
+ }
+ count++;
+ }
+ ret = snprintf(key, sizeof(key), "%s.address-count", prefix);
+ ret = dict_set_int32n(dict, key, ret, count);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set key %s in dict", key);
+
+out:
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+/* gd_update_peerinfo_from_dict will update the hostnames for @peerinfo from
+ * peer details with @prefix in @dict.
+ * Returns 0 on success and -1 on failure.
+ */
+int
+gd_update_peerinfo_from_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict,
+ const char *prefix)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char key[100] = {
+ 0,
+ };
+ char *hostname = NULL;
+ int count = 0;
+ int i = 0;
+
+ this = THIS;
+ GF_ASSERT(this != NULL);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO(this->name, (peerinfo != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out);
+
+ ret = snprintf(key, sizeof(key), "%s.hostname", prefix);
+ ret = dict_get_strn(dict, key, ret, &hostname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key %s not present in "
+ "dictionary",
+ key);
+ goto out;
+ }
+ ret = gd_add_address_to_peer(peerinfo, hostname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_ADD_ADDRESS_TO_PEER_FAIL,
+ "Could not add address to peer");
+ goto out;
+ }
+ /* Also set peerinfo->hostname to the first address */
+ if (peerinfo->hostname != NULL)
+ GF_FREE(peerinfo->hostname);
+ peerinfo->hostname = gf_strdup(hostname);
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = snprintf(key, sizeof(key), "%s.address-count", prefix);
+ ret = dict_get_int32n(dict, key, ret, &count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key %s not present in "
+ "dictionary",
+ key);
+ goto out;
+ }
+ hostname = NULL;
+ for (i = 0; i < count; i++) {
+ ret = snprintf(key, sizeof(key), "%s.hostname%d", prefix, i);
+ ret = dict_get_strn(dict, key, ret, &hostname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key %s not present "
+ "in dictionary",
+ key);
+ goto out;
+ }
+ ret = gd_add_address_to_peer(peerinfo, hostname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_ADD_ADDRESS_TO_PEER_FAIL,
+ "Could not add address to peer");
+ goto out;
+ }
+
+ hostname = NULL;
+ }
+
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* gd_peerinfo_from_dict creates a peerinfo object from details of peer with
+ * @prefix in @dict.
+ * Returns a pointer to the created peerinfo object on success, and NULL on
+ * failure.
+ */
+glusterd_peerinfo_t *
+gd_peerinfo_from_dict(dict_t *dict, const char *prefix)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_peerinfo_t *new_peer = NULL;
+ char key[64] = {
+ 0,
+ };
+ char *uuid_str = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", (this != NULL), out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO(this->name, (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out);
+
+ new_peer = glusterd_peerinfo_new(GD_FRIEND_STATE_DEFAULT, NULL, NULL, 0);
+ if (new_peer == NULL) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEERINFO_CREATE_FAIL,
+ "Could not create peerinfo "
+ "object");
+ goto out;
+ }
+
+ ret = snprintf(key, sizeof(key), "%s.uuid", prefix);
+ ret = dict_get_strn(dict, key, ret, &uuid_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key %s not present in "
+ "dictionary",
+ key);
+ goto out;
+ }
+ gf_uuid_parse(uuid_str, new_peer->uuid);
+
+ ret = gd_update_peerinfo_from_dict(new_peer, dict, prefix);
+
+out:
+ if ((ret != 0) && (new_peer != NULL)) {
+ glusterd_peerinfo_cleanup(new_peer);
+ new_peer = NULL;
+ }
+
+ return new_peer;
+}
+
+static int
+gd_add_peer_hostnames_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict,
+ const char *prefix)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char key[64] = {
+ 0,
+ };
+ glusterd_peer_hostname_t *addr = NULL;
+ int count = 0;
+
+ this = THIS;
+ GF_ASSERT(this != NULL);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out);
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ GF_VALIDATE_OR_GOTO(this->name, (peerinfo != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out);
+
+ cds_list_for_each_entry(addr, &peerinfo->hostnames, hostname_list)
+ {
+ snprintf(key, sizeof(key), "%s.hostname%d", prefix, count);
+ ret = dict_set_dynstr_with_alloc(dict, key, addr->hostname);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+ count++;
+ }
+
+ ret = snprintf(key, sizeof(key), "%s.hostname_count", prefix);
+ ret = dict_set_int32n(dict, key, ret, count);
+
+out:
+ return ret;
+}
+
+int
+gd_add_peer_detail_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *friends,
+ int count)
+{
+ int ret = -1;
+ char key[32] = {
+ 0,
+ };
+ int keylen;
+ char *peer_uuid_str = NULL;
+
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(friends);
+
+ peer_uuid_str = gd_peer_uuid_str(peerinfo);
+ keylen = snprintf(key, sizeof(key), "friend%d.uuid", count);
+ ret = dict_set_strn(friends, key, keylen, peer_uuid_str);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "friend%d.hostname", count);
+ ret = dict_set_strn(friends, key, keylen, peerinfo->hostname);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "friend%d.port", count);
+ ret = dict_set_int32n(friends, key, keylen, peerinfo->port);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "friend%d.stateId", count);
+ ret = dict_set_int32n(friends, key, keylen, peerinfo->state.state);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Key=%s in dict", key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "friend%d.state", count);
+ ret = dict_set_strn(
+ friends, key, keylen,
+ glusterd_friend_sm_state_name_get(peerinfo->state.state));
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "key=%s",
+ key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "friend%d.connected", count);
+ ret = dict_set_int32n(friends, key, keylen, (int32_t)peerinfo->connected);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "friend%d", count);
+ ret = gd_add_peer_hostnames_to_dict(peerinfo, friends, key);
+
+out:
+ return ret;
+}
+
+/* glusterd_peerinfo_find_by_generation searches for a peer which has the
+ * generation number @generation and if found returns the pointer to peerinfo
+ * object. Returns NULL otherwise.
+ */
+glusterd_peerinfo_t *
+glusterd_peerinfo_find_by_generation(uint32_t generation)
+{
+ glusterd_conf_t *priv = NULL;
+ glusterd_peerinfo_t *entry = NULL;
+ glusterd_peerinfo_t *found = NULL;
+ xlator_t *this = THIS;
+ glusterd_friend_sm_state_t state;
+
+ GF_ASSERT(this);
+
+ priv = this->private;
+
+ GF_ASSERT(priv);
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(entry, &priv->peers, uuid_list)
+ {
+ if (entry->generation == generation) {
+ found = entry; /* Probably should be rcu_dereferenced */
+ state = found->state.state;
+ break;
+ }
+ }
+ RCU_READ_UNLOCK;
+
+ if (found)
+ gf_msg_debug(this->name, 0, "Friend found... state: %s",
+ glusterd_friend_sm_state_name_get(state));
+ else
+ gf_msg_debug(this->name, 0,
+ "Friend with generation: %" PRIu32 ", not found",
+ generation);
+ return found;
+}
+
+int
+glusterd_get_peers_count()
+{
+ int count = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_peerinfo_t *peer = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peer, &conf->peers, uuid_list) count++;
+ RCU_READ_UNLOCK;
+
+out:
+ return count;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-peer-utils.h b/xlators/mgmt/glusterd/src/glusterd-peer-utils.h
new file mode 100644
index 00000000000..fd254d57391
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-peer-utils.h
@@ -0,0 +1,82 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_PEER_UTILS_H
+#define _GLUSTERD_PEER_UTILS_H
+
+#include "glusterd.h"
+
+int32_t
+glusterd_peerinfo_cleanup(glusterd_peerinfo_t *peerinfo);
+
+glusterd_peerinfo_t *
+glusterd_peerinfo_find_by_hostname(const char *hoststr);
+
+int
+glusterd_hostname_to_uuid(char *hostname, uuid_t uuid);
+
+glusterd_peerinfo_t *
+glusterd_peerinfo_find_by_uuid(uuid_t uuid);
+
+glusterd_peerinfo_t *
+glusterd_peerinfo_find(uuid_t uuid, const char *hostname);
+
+glusterd_peerinfo_t *
+glusterd_peerinfo_new(glusterd_friend_sm_state_t state, uuid_t *uuid,
+ const char *hostname, int port);
+
+gf_boolean_t
+glusterd_chk_peers_connected_befriended(uuid_t skip_uuid);
+
+char *
+glusterd_uuid_to_hostname(uuid_t uuid);
+
+char *
+gd_peer_uuid_str(glusterd_peerinfo_t *peerinfo);
+
+gf_boolean_t
+glusterd_are_all_peers_up();
+
+gf_boolean_t
+glusterd_are_vol_all_peers_up(glusterd_volinfo_t *volinfo,
+ struct cds_list_head *peers, char **down_peerstr);
+
+int32_t
+glusterd_peer_hostname_new(const char *hostname,
+ glusterd_peer_hostname_t **name);
+void
+glusterd_peer_hostname_free(glusterd_peer_hostname_t *name);
+
+gf_boolean_t
+gd_peer_has_address(glusterd_peerinfo_t *peerinfo, const char *address);
+
+int
+gd_add_address_to_peer(glusterd_peerinfo_t *peerinfo, const char *address);
+
+int
+gd_add_friend_to_dict(glusterd_peerinfo_t *friend, dict_t *dict,
+ const char *prefix);
+
+int
+gd_update_peerinfo_from_dict(glusterd_peerinfo_t *peerinfo, dict_t *dict,
+ const char *prefix);
+
+glusterd_peerinfo_t *
+gd_peerinfo_from_dict(dict_t *dict, const char *prefix);
+
+int
+gd_add_peer_detail_to_dict(glusterd_peerinfo_t *peerinfo, dict_t *friends,
+ int count);
+glusterd_peerinfo_t *
+glusterd_peerinfo_find_by_generation(uint32_t generation);
+
+int
+glusterd_get_peers_count();
+#endif /* _GLUSTERD_PEER_UTILS_H */
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.c b/xlators/mgmt/glusterd/src/glusterd-pmap.c
index cef89b49a63..16ac628ab82 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.c
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.c
@@ -1,31 +1,17 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
+ Copyright (c) 2010-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
-#include "xlator.h"
-#include "glusterfs.h"
-#include "compat-errno.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/compat-errno.h>
#include "glusterd.h"
#include "glusterd-utils.h"
@@ -33,440 +19,648 @@
#include "portmap-xdr.h"
#include "xdr-generic.h"
#include "protocol-common.h"
+#include "glusterd-messages.h"
#include "rpcsvc.h"
#include <sys/socket.h>
#include <sys/types.h>
#include <netinet/in.h>
-
-int
-pmap_port_isfree (int port)
+static int
+pmap_port_isfree(int port)
{
- struct sockaddr_in sin;
- int sock = -1;
- int ret = 0;
+ struct sockaddr_in sin;
+ int sock = -1;
+ int ret = 0;
- memset (&sin, 0, sizeof (sin));
- sin.sin_family = PF_INET;
- sin.sin_port = hton16 (port);
+ memset(&sin, 0, sizeof(sin));
+ sin.sin_family = PF_INET;
+ sin.sin_port = hton16(port);
- sock = socket (PF_INET, SOCK_STREAM, 0);
- if (sock == -1)
- return -1;
+ sock = socket(PF_INET, SOCK_STREAM, 0);
+ if (sock == -1)
+ return -1;
- ret = bind (sock, (struct sockaddr *)&sin, sizeof (sin));
- close (sock);
+ ret = bind(sock, (struct sockaddr *)&sin, sizeof(sin));
+ sys_close(sock);
- return (ret == 0) ? 1 : 0;
+ return (ret == 0) ? 1 : 0;
}
-
-struct pmap_registry *
-pmap_registry_new (void)
+static struct pmap_registry *
+pmap_registry_new(xlator_t *this)
{
- struct pmap_registry *pmap = NULL;
- int i = 0;
-
- pmap = CALLOC (sizeof (*pmap), 1);
- if (!pmap)
- return NULL;
-
- for (i = 0; i < 65536; i++) {
- if (pmap_port_isfree (i))
- pmap->ports[i].type = GF_PMAP_PORT_FREE;
- else
- pmap->ports[i].type = GF_PMAP_PORT_FOREIGN;
- }
-
- pmap->base_port = GF_IANA_PRIV_PORTS_START;
- pmap->last_alloc = GF_IANA_PRIV_PORTS_START;
-
- return pmap;
+ struct pmap_registry *pmap = NULL;
+ int i = 0;
+
+ pmap = CALLOC(sizeof(*pmap), 1);
+ if (!pmap)
+ return NULL;
+
+ pmap->base_port = pmap->last_alloc = ((glusterd_conf_t *)(this->private))
+ ->base_port;
+ pmap->max_port = ((glusterd_conf_t *)(this->private))->max_port;
+ for (i = pmap->base_port; i <= pmap->max_port; i++) {
+ if (pmap_port_isfree(i))
+ pmap->ports[i].type = GF_PMAP_PORT_FREE;
+ else
+ pmap->ports[i].type = GF_PMAP_PORT_FOREIGN;
+ }
+
+ return pmap;
}
-
struct pmap_registry *
-pmap_registry_get (xlator_t *this)
+pmap_registry_get(xlator_t *this)
{
- glusterd_conf_t *priv = NULL;
- struct pmap_registry *pmap = NULL;
+ glusterd_conf_t *priv = NULL;
+ struct pmap_registry *pmap = NULL;
- priv = this->private;
+ priv = this->private;
- pmap = priv->pmap;
- if (!pmap) {
- pmap = pmap_registry_new ();
- if (!pmap)
- return NULL;
- priv->pmap = pmap;
- }
-
- return pmap;
-}
-
-
-static char*
-nextword (char *str)
-{
- while (*str && !isspace (*str))
- str++;
- while (*str && isspace (*str))
- str++;
+ pmap = priv->pmap;
+ if (!pmap) {
+ pmap = pmap_registry_new(this);
+ if (!pmap)
+ return NULL;
+ priv->pmap = pmap;
+ }
- return str;
+ return pmap;
}
+/*
+ * The "destroy" argument avoids a double search in pmap_registry_remove - one
+ * to find the entry in the table, and the other to find the particular
+ * brickname within that entry (which might cover multiple bricks). We do the
+ * actual deletion here by "whiting out" the brick name with spaces. It's up
+ * to pmap_registry_remove to figure out what to do from there.
+ */
int
-pmap_registry_search (xlator_t *this, const char *brickname,
- gf_pmap_port_type_t type)
+pmap_registry_search(xlator_t *this, const char *brickname,
+ gf_pmap_port_type_t type, gf_boolean_t destroy)
{
- struct pmap_registry *pmap = NULL;
- int p = 0;
- char *brck = NULL;
- char *nbrck = NULL;
-
- pmap = pmap_registry_get (this);
-
- for (p = pmap->base_port; p <= pmap->last_alloc; p++) {
- if (!pmap->ports[p].brickname || pmap->ports[p].type != type)
- continue;
-
- for (brck = pmap->ports[p].brickname;;) {
- nbrck = strtail (brck, brickname);
- if (nbrck && (!*nbrck || isspace (*nbrck)))
- return p;
- brck = nextword (brck);
- if (!*brck)
- break;
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+ char *brck = NULL;
+ size_t i;
+
+ pmap = pmap_registry_get(this);
+
+ for (p = pmap->last_alloc; p >= pmap->base_port; p--) {
+ if (!pmap->ports[p].brickname || pmap->ports[p].type != type)
+ continue;
+
+ brck = pmap->ports[p].brickname;
+ for (;;) {
+ for (i = 0; brck[i] && !isspace(brck[i]); ++i)
+ ;
+ if (i == 0 && brck[i] == '\0')
+ break;
+
+ if (strncmp(brck, brickname, i) == 0) {
+ /*
+ * Without this check, we'd break when brck
+ * is merely a substring of brickname.
+ */
+ if (brickname[i] == '\0') {
+ if (destroy)
+ do {
+ *(brck++) = ' ';
+ } while (--i);
+ return p;
}
+ }
+
+ brck += i;
+
+ /*
+ * Skip over *any* amount of whitespace, including
+ * none (if we're already at the end of the string).
+ */
+ while (isspace(*brck))
+ ++brck;
+ /*
+ * We're either at the end of the string (which will be
+ * handled above strncmp on the next iteration) or at
+ * the next non-whitespace substring (which will be
+ * handled by strncmp itself).
+ */
}
+ }
- return 0;
+ return 0;
}
-int
-pmap_registry_search_by_xprt (xlator_t *this, void *xprt,
- gf_pmap_port_type_t type)
+static int
+pmap_registry_search_by_xprt(xlator_t *this, void *xprt,
+ gf_pmap_port_type_t type)
{
- struct pmap_registry *pmap = NULL;
- int p = 0;
- int port = 0;
-
- pmap = pmap_registry_get (this);
-
- for (p = pmap->base_port; p <= pmap->last_alloc; p++) {
- if (!pmap->ports[p].xprt)
- continue;
- if (pmap->ports[p].xprt == xprt &&
- pmap->ports[p].type == type) {
- port = p;
- break;
- }
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+ int port = 0;
+
+ pmap = pmap_registry_get(this);
+
+ for (p = pmap->last_alloc; p >= pmap->base_port; p--) {
+ if (!pmap->ports[p].xprt)
+ continue;
+ if (pmap->ports[p].xprt == xprt) {
+ if (pmap->ports[p].type == type || type == GF_PMAP_PORT_ANY) {
+ port = p;
+ break;
+ }
}
+ }
- return port;
+ return port;
}
-
-char *
-pmap_registry_search_by_port (xlator_t *this, int port)
+static char *
+pmap_registry_search_by_port(xlator_t *this, int port)
{
- struct pmap_registry *pmap = NULL;
- char *brickname = NULL;
+ struct pmap_registry *pmap = NULL;
+ char *brickname = NULL;
+ int max_port = 0;
- if (port > 65535)
- goto out;
+ max_port = ((glusterd_conf_t *)(this->private))->max_port;
+ if (port > max_port)
+ goto out;
- pmap = pmap_registry_get (this);
+ pmap = pmap_registry_get(this);
- if (pmap->ports[port].type == GF_PMAP_PORT_BRICKSERVER)
- brickname = pmap->ports[port].brickname;
+ if (pmap->ports[port].type == GF_PMAP_PORT_BRICKSERVER)
+ brickname = pmap->ports[port].brickname;
out:
- return brickname;
+ return brickname;
}
-
int
-pmap_registry_alloc (xlator_t *this)
+pmap_registry_alloc(xlator_t *this)
{
- struct pmap_registry *pmap = NULL;
- int p = 0;
- int port = 0;
-
- pmap = pmap_registry_get (this);
-
- for (p = pmap->last_alloc; p < 65535; p++) {
- if (pmap->ports[p].type != GF_PMAP_PORT_FREE)
- continue;
-
- if (pmap_port_isfree (p)) {
- pmap->ports[p].type = GF_PMAP_PORT_LEASED;
- port = p;
- break;
- }
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+ int port = 0;
+
+ pmap = pmap_registry_get(this);
+
+ for (p = pmap->base_port; p <= pmap->max_port; p++) {
+ /* GF_PMAP_PORT_FOREIGN may be freed up ? */
+ if ((pmap->ports[p].type == GF_PMAP_PORT_FREE) ||
+ (pmap->ports[p].type == GF_PMAP_PORT_FOREIGN)) {
+ if (pmap_port_isfree(p)) {
+ pmap->ports[p].type = GF_PMAP_PORT_LEASED;
+ port = p;
+ break;
+ }
}
+ }
- if (port)
- pmap->last_alloc = port;
+ if (port > pmap->last_alloc)
+ pmap->last_alloc = port;
- return port;
+ return port;
}
+/* pmap_assign_port does a pmap_registry_remove followed by pmap_registry_alloc,
+ * the reason for the former is to ensure we don't end up with stale ports
+ */
int
-pmap_registry_bind (xlator_t *this, int port, const char *brickname,
- gf_pmap_port_type_t type, void *xprt)
+pmap_assign_port(xlator_t *this, int old_port, const char *path)
{
- struct pmap_registry *pmap = NULL;
- int p = 0;
-
- pmap = pmap_registry_get (this);
+ int ret = -1;
+ int new_port = 0;
+
+ if (old_port) {
+ ret = pmap_registry_remove(this, 0, path, GF_PMAP_PORT_BRICKSERVER,
+ NULL, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, GD_MSG_PMAP_REGISTRY_REMOVE_FAIL,
+ 0,
+ "Failed to"
+ "remove pmap registry for older signin for path"
+ " %s",
+ path);
+ }
+ }
+ new_port = pmap_registry_alloc(this);
+ return new_port;
+}
- if (port > 65535)
- goto out;
+int
+pmap_registry_bind(xlator_t *this, int port, const char *brickname,
+ gf_pmap_port_type_t type, void *xprt)
+{
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
- p = port;
- pmap->ports[p].type = type;
- if (pmap->ports[p].brickname)
- free (pmap->ports[p].brickname);
- pmap->ports[p].brickname = strdup (brickname);
- pmap->ports[p].type = type;
- pmap->ports[p].xprt = xprt;
+ pmap = pmap_registry_get(this);
- gf_log ("pmap", GF_LOG_INFO, "adding brick %s on port %d",
- brickname, port);
+ if (port > pmap->max_port)
+ goto out;
- if (pmap->last_alloc < p)
- pmap->last_alloc = p;
+ p = port;
+ if (pmap->ports[p].type == GF_PMAP_PORT_FREE) {
+ /* Because of some crazy race in volume start code path because
+ * of friend handshaking with volumes with quorum enabled we
+ * might end up into a situation where glusterd would start a
+ * brick and get a disconnect and then immediately try to start
+ * the same brick instance based on another friend update
+ * request. And then if for the very first brick even if the
+ * process doesn't come up at the end sign in event gets sent
+ * and we end up having two duplicate portmap entries for the
+ * same brick. Since in brick start we mark the previous port as
+ * free, its better to consider a sign in request as no op if
+ * the corresponding port type is marked as free
+ */
+ goto out;
+ }
+ if (pmap->ports[p].brickname) {
+ char *tmp = pmap->ports[p].brickname;
+ asprintf(&pmap->ports[p].brickname, "%s %s", tmp, brickname);
+ free(tmp);
+ } else {
+ pmap->ports[p].brickname = strdup(brickname);
+ }
+ pmap->ports[p].type = type;
+ pmap->ports[p].xprt = xprt;
+
+ gf_msg("pmap", GF_LOG_INFO, 0, GD_MSG_BRICK_ADD,
+ "adding brick %s on port %d", brickname, port);
+
+ if (pmap->last_alloc < p)
+ pmap->last_alloc = p;
out:
- return 0;
+ return 0;
}
int
-pmap_registry_remove (xlator_t *this, int port, const char *brickname,
- gf_pmap_port_type_t type, void *xprt)
+pmap_registry_extend(xlator_t *this, int port, const char *brickname)
{
- struct pmap_registry *pmap = NULL;
- int p = 0;
- glusterd_conf_t *priv = NULL;
+ struct pmap_registry *pmap = NULL;
+ char *old_bn;
+ char *new_bn;
+ size_t bn_len;
+ char *entry;
+ int found = 0;
+
+ pmap = pmap_registry_get(this);
+
+ if (port > pmap->max_port) {
+ return -1;
+ }
+
+ switch (pmap->ports[port].type) {
+ case GF_PMAP_PORT_LEASED:
+ case GF_PMAP_PORT_BRICKSERVER:
+ break;
+ default:
+ return -1;
+ }
+
+ old_bn = pmap->ports[port].brickname;
+ if (old_bn) {
+ bn_len = strlen(brickname);
+ entry = strstr(old_bn, brickname);
+ while (entry) {
+ found = 1;
+ if ((entry != old_bn) && (entry[-1] != ' ')) {
+ found = 0;
+ }
+ if ((entry[bn_len] != ' ') && (entry[bn_len] != '\0')) {
+ found = 0;
+ }
+ if (found) {
+ return 0;
+ }
+ entry = strstr(entry + bn_len, brickname);
+ }
+ asprintf(&new_bn, "%s %s", old_bn, brickname);
+ } else {
+ new_bn = strdup(brickname);
+ }
- priv = this->private;
- pmap = priv->pmap;
- if (!pmap)
- goto out;
+ if (!new_bn) {
+ return -1;
+ }
- if (port) {
- if (port > 65535)
- goto out;
+ pmap->ports[port].brickname = new_bn;
+ free(old_bn);
- p = port;
- goto remove;
- }
+ return 0;
+}
- if (brickname && strchr (brickname, '/')) {
- p = pmap_registry_search (this, brickname, type);
- if (p)
- goto remove;
- }
+int
+pmap_registry_remove(xlator_t *this, int port, const char *brickname,
+ gf_pmap_port_type_t type, void *xprt,
+ gf_boolean_t brick_disconnect)
+{
+ struct pmap_registry *pmap = NULL;
+ int p = 0;
+ glusterd_conf_t *priv = NULL;
+ char *brick_str;
+
+ priv = this->private;
+ pmap = priv->pmap;
+ if (!pmap)
+ goto out;
- if (xprt) {
- p = pmap_registry_search_by_xprt (this, xprt, type);
- if (p)
- goto remove;
- }
+ if (port) {
+ if (port > pmap->max_port)
+ goto out;
+ }
- goto out;
-remove:
- gf_log ("pmap", GF_LOG_INFO, "removing brick %s on port %d",
- pmap->ports[p].brickname, p);
+ if (brickname) {
+ p = pmap_registry_search(this, brickname, type, _gf_true);
+ if (p)
+ goto remove;
+ }
- if (pmap->ports[p].brickname)
- free (pmap->ports[p].brickname);
+ if (xprt) {
+ p = pmap_registry_search_by_xprt(this, xprt, type);
+ if (p)
+ goto remove;
+ }
- pmap->ports[p].brickname = NULL;
+ goto out;
+remove:
+ gf_msg("pmap", GF_LOG_INFO, 0, GD_MSG_BRICK_REMOVE,
+ "removing brick %s on port %d", brickname, p);
+
+ if (xprt && (xprt == pmap->ports[p].xprt)) {
pmap->ports[p].xprt = NULL;
+ }
+
+ /*
+ * This is where we garbage-collect. If all of the brick names have
+ * been "whited out" by pmap_registry_search(...,destroy=_gf_true) and
+ * there's no xprt either, then we have nothing left worth saving and
+ * can delete the entire entry.
+ */
+ if (brick_disconnect || !pmap->ports[p].xprt) {
+ /* If the signout call is being triggered by brick disconnect
+ * then clean up all the bricks (in case of brick mux)
+ */
+ if (!brick_disconnect) {
+ brick_str = pmap->ports[p].brickname;
+ if (brick_str) {
+ while (*brick_str != '\0') {
+ if (*(brick_str++) != ' ') {
+ goto out;
+ }
+ }
+ }
+ }
+ free(pmap->ports[p].brickname);
+ pmap->ports[p].brickname = NULL;
+ pmap->ports[p].type = GF_PMAP_PORT_FREE;
+ }
out:
- return 0;
+ return 0;
}
int
-gluster_pmap_portbybrick (rpcsvc_request_t *req)
+__gluster_pmap_portbybrick(rpcsvc_request_t *req)
{
- pmap_port_by_brick_req args = {0,};
- pmap_port_by_brick_rsp rsp = {0,};
- char *brick = NULL;
- int port = 0;
-
- if (!xdr_to_generic (req->msg[0], &args,
- (xdrproc_t)xdr_pmap_port_by_brick_req)) {
- req->rpc_err = GARBAGE_ARGS;
- goto fail;
- }
-
- brick = args.brick;
-
- port = pmap_registry_search (THIS, brick, GF_PMAP_PORT_BRICKSERVER);
-
- if (!port)
- rsp.op_ret = -1;
-
- rsp.port = port;
+ pmap_port_by_brick_req args = {
+ 0,
+ };
+ pmap_port_by_brick_rsp rsp = {
+ 0,
+ };
+ char *brick = NULL;
+ int port = 0;
+ int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &args,
+ (xdrproc_t)xdr_pmap_port_by_brick_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto fail;
+ }
+
+ brick = args.brick;
+
+ port = pmap_registry_search(this, brick, GF_PMAP_PORT_BRICKSERVER,
+ _gf_false);
+
+ if (!port)
+ rsp.op_ret = -1;
+
+ rsp.port = port;
fail:
- glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_pmap_port_by_brick_rsp);
- if (args.brick)
- free (args.brick);//malloced by xdr
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_port_by_brick_rsp);
+ free(args.brick); // malloced by xdr
- return 0;
+ return 0;
}
-
int
-gluster_pmap_brickbyport (rpcsvc_request_t *req)
+gluster_pmap_portbybrick(rpcsvc_request_t *req)
{
- pmap_brick_by_port_req args = {0,};
- pmap_brick_by_port_rsp rsp = {0,};
-
- if (!xdr_to_generic (req->msg[0], &args,
- (xdrproc_t)xdr_pmap_brick_by_port_req)) {
- req->rpc_err = GARBAGE_ARGS;
- goto fail;
- }
+ return glusterd_big_locked_handler(req, __gluster_pmap_portbybrick);
+}
- rsp.brick = pmap_registry_search_by_port (THIS, args.port);
- if (!rsp.brick) {
- rsp.op_ret = -1;
- rsp.brick = "";
- }
+int
+__gluster_pmap_brickbyport(rpcsvc_request_t *req)
+{
+ pmap_brick_by_port_req args = {
+ 0,
+ };
+ pmap_brick_by_port_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &args,
+ (xdrproc_t)xdr_pmap_brick_by_port_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto fail;
+ }
+
+ rsp.brick = pmap_registry_search_by_port(THIS, args.port);
+ if (!rsp.brick) {
+ rsp.op_ret = -1;
+ rsp.brick = "";
+ }
fail:
- glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_pmap_brick_by_port_rsp);
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_brick_by_port_rsp);
- return 0;
+ return 0;
}
-static int
-glusterd_brick_update_signin (glusterd_brickinfo_t *brickinfo,
- gf_boolean_t value)
+int
+gluster_pmap_brickbyport(rpcsvc_request_t *req)
{
- brickinfo->signed_in = value;
-
- return 0;
+ return glusterd_big_locked_handler(req, __gluster_pmap_brickbyport);
}
int
-gluster_pmap_signup (rpcsvc_request_t *req)
+__gluster_pmap_signin(rpcsvc_request_t *req)
{
- pmap_signup_req args = {0,};
- pmap_signup_rsp rsp = {0,};
-
-
- if (!xdr_to_generic (req->msg[0], &args,
- (xdrproc_t)xdr_pmap_signup_req)) {
- req->rpc_err = GARBAGE_ARGS;
- goto fail;
- }
-
- rsp.op_ret = pmap_registry_bind (THIS, args.port, args.brick,
- GF_PMAP_PORT_BRICKSERVER, req->trans);
+ pmap_signin_req args = {
+ 0,
+ };
+ pmap_signin_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_pmap_signin_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto fail;
+ }
+
+ rsp.op_ret = pmap_registry_bind(THIS, args.port, args.brick,
+ GF_PMAP_PORT_BRICKSERVER, req->trans);
+
+ ret = glusterd_get_brickinfo(THIS, args.brick, args.port, &brickinfo);
+ /* Update portmap status in brickinfo */
+ if (brickinfo)
+ brickinfo->port_registered = _gf_true;
fail:
- glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_pmap_signup_rsp);
- if (args.brick)
- free (args.brick);//malloced by xdr
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_signin_rsp);
+ free(args.brick); // malloced by xdr
- return 0;
+ return 0;
}
int
-gluster_pmap_signin (rpcsvc_request_t *req)
+gluster_pmap_signin(rpcsvc_request_t *req)
{
- pmap_signin_req args = {0,};
- pmap_signin_rsp rsp = {0,};
- glusterd_brickinfo_t *brickinfo = NULL;
- int ret = -1;
-
- if (!xdr_to_generic (req->msg[0], &args,
- (xdrproc_t)xdr_pmap_signin_req)) {
- req->rpc_err = GARBAGE_ARGS;
- goto fail;
- }
-
- rsp.op_ret = pmap_registry_bind (THIS, args.port, args.brick,
- GF_PMAP_PORT_BRICKSERVER, req->trans);
-
- ret = glusterd_get_brickinfo (THIS, args.brick, args.port, _gf_true,
- &brickinfo);
-fail:
- glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_pmap_signin_rsp);
- if (args.brick)
- free (args.brick);//malloced by xdr
-
- if (!ret)
- glusterd_brick_update_signin (brickinfo, _gf_true);
-
- return 0;
+ return glusterd_big_locked_handler(req, __gluster_pmap_signin);
}
-
-
int
-gluster_pmap_signout (rpcsvc_request_t *req)
+__gluster_pmap_signout(rpcsvc_request_t *req)
{
- pmap_signout_req args = {0,};
- pmap_signout_rsp rsp = {0,};
- int ret = -1;
- glusterd_brickinfo_t *brickinfo = NULL;
-
- if (!xdr_to_generic (req->msg[0], &args,
- (xdrproc_t)xdr_pmap_signout_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto fail;
+ pmap_signout_req args = {
+ 0,
+ };
+ pmap_signout_rsp rsp = {
+ 0,
+ };
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char pidfile[PATH_MAX] = {0};
+ char brick_path[PATH_MAX] = {
+ 0,
+ };
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, fail);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, fail);
+
+ ret = xdr_to_generic(req->msg[0], &args, (xdrproc_t)xdr_pmap_signout_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto fail;
+ }
+ rsp.op_ret = pmap_registry_remove(THIS, args.port, args.brick,
+ GF_PMAP_PORT_BRICKSERVER, req->trans,
+ _gf_false);
+
+ ret = glusterd_get_brickinfo(THIS, args.brick, args.port, &brickinfo);
+ if (args.rdma_port) {
+ snprintf(brick_path, PATH_MAX, "%s.rdma", args.brick);
+ rsp.op_ret = pmap_registry_remove(THIS, args.rdma_port, brick_path,
+ GF_PMAP_PORT_BRICKSERVER, req->trans,
+ _gf_false);
+ }
+ /* Update portmap status on brickinfo */
+ if (brickinfo)
+ brickinfo->port_registered = _gf_false;
+
+ /* Clean up the pidfile for this brick given glusterfsd doesn't clean it
+ * any more. This is required to ensure we don't end up with having
+ * stale pid files in case a brick is killed from the backend
+ */
+ ret = glusterd_get_volinfo_from_brick(args.brick, &volinfo);
+ if (!ret) {
+ if (volinfo && brickinfo) {
+ GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, conf);
+ sys_unlink(pidfile);
+
+ /* Setting the brick status to GF_BRICK_STOPPED to
+ * ensure correct brick status is maintained on the
+ * glusterd end when a brick is killed from the
+ * backend */
+ brickinfo->status = GF_BRICK_STOPPED;
+
+ /* Remove brick from brick process if not already
+ * removed in the brick op phase. This situation would
+ * arise when the brick is killed explicitly from the
+ * backend */
+ ret = glusterd_brick_process_remove_brick(brickinfo, NULL);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Couldn't remove "
+ "brick %s:%s from brick process",
+ brickinfo->hostname, brickinfo->path);
+ /* Ignore 'ret' here since the brick might
+ * have already been deleted in brick op phase
+ */
+ ret = 0;
+ }
}
+ }
- rsp.op_ret = pmap_registry_remove (THIS, args.port, args.brick,
- GF_PMAP_PORT_BRICKSERVER, req->trans);
-
- ret = glusterd_get_brickinfo (THIS, args.brick, args.port, _gf_true,
- &brickinfo);
fail:
- glusterd_submit_reply (req, &rsp, NULL, 0, NULL,
- (xdrproc_t)xdr_pmap_signout_rsp);
- if (args.brick)
- free (args.brick);//malloced by xdr
+ glusterd_submit_reply(req, &rsp, NULL, 0, NULL,
+ (xdrproc_t)xdr_pmap_signout_rsp);
+ free(args.brick); // malloced by xdr
- if (!ret)
- glusterd_brick_update_signin (brickinfo, _gf_false);
+ return 0;
+}
- return 0;
+int
+gluster_pmap_signout(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __gluster_pmap_signout);
}
-rpcsvc_actor_t gluster_pmap_actors[] = {
- [GF_PMAP_NULL] = {"NULL", GF_PMAP_NULL, NULL, NULL, NULL, 0},
- [GF_PMAP_PORTBYBRICK] = {"PORTBYBRICK", GF_PMAP_PORTBYBRICK,
- gluster_pmap_portbybrick, NULL, NULL, 0},
- [GF_PMAP_BRICKBYPORT] = {"BRICKBYPORT", GF_PMAP_BRICKBYPORT,
- gluster_pmap_brickbyport, NULL, NULL, 0},
- [GF_PMAP_SIGNIN] = {"SIGNIN", GF_PMAP_SIGNIN,
- gluster_pmap_signin, NULL, NULL, 0},
- [GF_PMAP_SIGNOUT] = {"SIGNOUT", GF_PMAP_SIGNOUT,
- gluster_pmap_signout, NULL, NULL, 0},
- [GF_PMAP_SIGNUP] = {"SIGNUP", GF_PMAP_SIGNUP,
- gluster_pmap_signup, NULL, NULL, 0},
+static rpcsvc_actor_t gluster_pmap_actors[GF_PMAP_MAXVALUE] = {
+ [GF_PMAP_NULL] = {"NULL", NULL, NULL, GF_PMAP_NULL, DRC_NA, 0},
+ [GF_PMAP_PORTBYBRICK] = {"PORTBYBRICK", gluster_pmap_portbybrick, NULL,
+ GF_PMAP_PORTBYBRICK, DRC_NA, 0},
+ [GF_PMAP_BRICKBYPORT] = {"BRICKBYPORT", gluster_pmap_brickbyport, NULL,
+ GF_PMAP_BRICKBYPORT, DRC_NA, 0},
+ [GF_PMAP_SIGNIN] = {"SIGNIN", gluster_pmap_signin, NULL, GF_PMAP_SIGNIN,
+ DRC_NA, 0},
+ [GF_PMAP_SIGNOUT] = {"SIGNOUT", gluster_pmap_signout, NULL, GF_PMAP_SIGNOUT,
+ DRC_NA, 0},
};
-
struct rpcsvc_program gluster_pmap_prog = {
- .progname = "Gluster Portmap",
- .prognum = GLUSTER_PMAP_PROGRAM,
- .progver = GLUSTER_PMAP_VERSION,
- .actors = gluster_pmap_actors,
- .numactors = GF_PMAP_MAXVALUE,
+ .progname = "Gluster Portmap",
+ .prognum = GLUSTER_PMAP_PROGRAM,
+ .progver = GLUSTER_PMAP_VERSION,
+ .actors = gluster_pmap_actors,
+ .numactors = GF_PMAP_MAXVALUE,
};
diff --git a/xlators/mgmt/glusterd/src/glusterd-pmap.h b/xlators/mgmt/glusterd/src/glusterd-pmap.h
index bb2bbc86e1e..51d75361431 100644
--- a/xlators/mgmt/glusterd/src/glusterd-pmap.h
+++ b/xlators/mgmt/glusterd/src/glusterd-pmap.h
@@ -1,64 +1,57 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-
#ifndef _GLUSTERD_PMAP_H_
#define _GLUSTERD_PMAP_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <pthread.h>
-#include "uuid.h"
+#include <glusterfs/compat-uuid.h>
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "call-stub.h"
-#include "fd.h"
-#include "byte-order.h"
-#include "glusterd.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/byte-order.h>
#include "rpcsvc.h"
-
-#define GF_IANA_PRIV_PORTS_START 49152 /* RFC 6335 */
-
struct pmap_port_status {
- gf_pmap_port_type_t type;
- char *brickname;
- void *xprt;
+ char *brickname;
+ void *xprt;
+ gf_pmap_port_type_t type;
};
struct pmap_registry {
- int base_port;
- int last_alloc;
- struct pmap_port_status ports[65536];
+ struct pmap_port_status ports[GF_PORT_MAX + 1];
+ int base_port;
+ int max_port;
+ int last_alloc;
};
-int pmap_registry_alloc (xlator_t *this);
-int pmap_registry_bind (xlator_t *this, int port, const char *brickname,
- gf_pmap_port_type_t type, void *xprt);
-int pmap_registry_remove (xlator_t *this, int port, const char *brickname,
- gf_pmap_port_type_t type, void *xprt);
-int pmap_registry_search (xlator_t *this, const char *brickname,
- gf_pmap_port_type_t type);
-struct pmap_registry *pmap_registry_get (xlator_t *this);
+int
+pmap_assign_port(xlator_t *this, int port, const char *path);
+int
+pmap_mark_port_leased(xlator_t *this, int port);
+int
+pmap_registry_alloc(xlator_t *this);
+int
+pmap_registry_bind(xlator_t *this, int port, const char *brickname,
+ gf_pmap_port_type_t type, void *xprt);
+int
+pmap_registry_extend(xlator_t *this, int port, const char *brickname);
+int
+pmap_registry_remove(xlator_t *this, int port, const char *brickname,
+ gf_pmap_port_type_t type, void *xprt,
+ gf_boolean_t brick_disconnect);
+int
+pmap_registry_search(xlator_t *this, const char *brickname,
+ gf_pmap_port_type_t type, gf_boolean_t destroy);
+struct pmap_registry *
+pmap_registry_get(xlator_t *this);
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
new file mode 100644
index 00000000000..a05c90d7b10
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.c
@@ -0,0 +1,152 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <stdio.h>
+#include <limits.h>
+#include <signal.h>
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include "glusterd-messages.h"
+#include "glusterd-proc-mgmt.h"
+
+int
+glusterd_proc_init(glusterd_proc_t *proc, char *name, char *pidfile,
+ char *logdir, char *logfile, char *volfile, char *volfileid,
+ char *volfileserver)
+{
+ int ret = -1;
+
+ ret = snprintf(proc->name, sizeof(proc->name), "%s", name);
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf(proc->pidfile, sizeof(proc->pidfile), "%s", pidfile);
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf(proc->logdir, sizeof(proc->logdir), "%s", logdir);
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf(proc->logfile, sizeof(proc->logfile), "%s", logfile);
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf(proc->volfile, sizeof(proc->volfile), "%s", volfile);
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf(proc->volfileid, sizeof(proc->volfileid), "%s", volfileid);
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf(proc->volfileserver, sizeof(proc->volfileserver), "%s",
+ volfileserver);
+ if (ret < 0)
+ goto out;
+
+out:
+ if (ret > 0)
+ ret = 0;
+
+ return ret;
+}
+
+int
+glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags)
+{
+ /* NB: Copy-paste code from glusterd_service_stop, the source may be
+ * removed once all daemon management use proc */
+
+ int32_t ret = -1;
+ pid_t pid = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ if (!gf_is_service_running(proc->pidfile, &pid)) {
+ ret = 0;
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_ALREADY_STOPPED,
+ "%s already stopped", proc->name);
+ goto out;
+ }
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_STOP_SUCCESS,
+ "Stopping %s daemon running in pid: "
+ "%d",
+ proc->name, pid);
+
+ ret = kill(pid, sig);
+ if (ret) {
+ switch (errno) {
+ case ESRCH:
+ gf_msg_debug(this->name, 0,
+ "%s is already "
+ "stopped",
+ proc->name);
+ ret = 0;
+ goto out;
+ default:
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SVC_KILL_FAIL,
+ "Unable to kill %s "
+ "service, reason:%s",
+ proc->name, strerror(errno));
+ }
+ } else {
+ (void)glusterd_unlink_file(proc->pidfile);
+ }
+ if (flags != PROC_STOP_FORCE)
+ goto out;
+
+ synclock_unlock(&conf->big_lock);
+ synctask_sleep(1);
+ synclock_lock(&conf->big_lock);
+ if (gf_is_service_running(proc->pidfile, &pid)) {
+ ret = kill(pid, SIGKILL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_PID_KILL_FAIL,
+ "Unable to kill pid:%d, "
+ "reason:%s",
+ pid, strerror(errno));
+ goto out;
+ }
+ ret = glusterd_unlink_file(proc->pidfile);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_proc_get_pid(glusterd_proc_t *proc)
+{
+ int pid = -1;
+ (void)gf_is_service_running(proc->pidfile, &pid);
+ return pid;
+}
+
+int
+glusterd_proc_is_running(glusterd_proc_t *proc)
+{
+ int pid = -1;
+
+ return gf_is_service_running(proc->pidfile, &pid);
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.h
new file mode 100644
index 00000000000..e8e9ffc5082
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-proc-mgmt.h
@@ -0,0 +1,44 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_PROC_MGMT_H_
+#define _GLUSTERD_PROC_MGMT_H_
+
+typedef struct glusterd_proc_ glusterd_proc_t;
+
+enum proc_flags {
+ PROC_NONE = 0,
+ PROC_START,
+ PROC_START_NO_WAIT,
+ PROC_STOP,
+ PROC_STOP_FORCE
+};
+
+struct glusterd_proc_ {
+ char name[NAME_MAX];
+ char pidfile[PATH_MAX];
+ char logdir[PATH_MAX];
+ char logfile[PATH_MAX];
+ char volfile[PATH_MAX];
+ char volfileserver[PATH_MAX];
+ char volfileid[256];
+};
+
+int
+glusterd_proc_init(glusterd_proc_t *proc, char *name, char *pidfile,
+ char *logdir, char *logfile, char *volfile, char *volfileid,
+ char *volfileserver);
+
+int
+glusterd_proc_stop(glusterd_proc_t *proc, int sig, int flags);
+
+int
+glusterd_proc_is_running(glusterd_proc_t *proc);
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.c b/xlators/mgmt/glusterd/src/glusterd-quota.c
index b063421ec1a..8370c174ce3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-quota.c
+++ b/xlators/mgmt/glusterd/src/glusterd-quota.c
@@ -1,843 +1,2259 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#include "common-utils.h"
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/common-utils.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
#include "glusterd.h"
#include "glusterd-op-sm.h"
#include "glusterd-store.h"
#include "glusterd-utils.h"
+#include "glusterd-quotad-svc.h"
#include "glusterd-volgen.h"
-#include "run.h"
+#include "glusterd-messages.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/byte-order.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/quota-common-utils.h>
+#include "glusterd-quota.h"
#include <sys/wait.h>
+#include <dlfcn.h>
-int
-glusterd_handle_quota (rpcsvc_request_t *req)
-{
- int32_t ret = -1;
- gf_cli_req cli_req = {{0,}};
- dict_t *dict = NULL;
- glusterd_op_t cli_op = GD_OP_QUOTA;
- char operation[256] = {0, };
- char *volname = NULL;
- int32_t type = 0;
-
- GF_ASSERT (req);
-
- if (!xdr_to_generic (req->msg[0], &cli_req,
- (xdrproc_t)xdr_gf_cli_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- if (cli_req.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
-
- ret = dict_unserialize (cli_req.dict.dict_val,
- cli_req.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR, "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- } else {
- dict->extra_stdfree = cli_req.dict.dict_val;
- }
- }
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log ("", GF_LOG_WARNING, "Unable to get volume name, while"
- "handling quota command");
- goto out;
- }
-
- ret = dict_get_int32 (dict, "type", &type);
- if (ret) {
- gf_log ("", GF_LOG_WARNING, "Unable to get type of cmd. , while"
- "handling quota command");
- goto out;
- }
-
- switch (type) {
- case GF_QUOTA_OPTION_TYPE_ENABLE:
- strncpy (operation, "enable", sizeof (operation));
- break;
+#ifndef _PATH_SETFATTR
+#ifdef GF_LINUX_HOST_OS
+#define _PATH_SETFATTR "setfattr"
+#endif
+#ifdef __NetBSD__
+#define _PATH_SETFATTR "/usr/pkg/bin/setfattr"
+#endif
+#endif
- case GF_QUOTA_OPTION_TYPE_DISABLE:
- strncpy (operation, "disable", sizeof (operation));
- break;
+/* Any negative pid to make it special client */
+#define QUOTA_CRAWL_PID "-100"
+
+#define GLUSTERFS_GET_QUOTA_LIMIT_MOUNT_PIDFILE(pidfile, volname) \
+ { \
+ snprintf(pidfile, PATH_MAX - 1, \
+ DEFAULT_VAR_RUN_DIRECTORY "/%s_quota_limit.pid", volname); \
+ }
+
+#define GLUSTERFS_GET_QUOTA_LIST_MOUNT_PIDFILE(pidfile, volname) \
+ { \
+ snprintf(pidfile, PATH_MAX - 1, \
+ DEFAULT_VAR_RUN_DIRECTORY "/%s_quota_list.pid", volname); \
+ }
+
+#define GLUSTERD_GET_QUOTA_CRAWL_PIDDIR(piddir, volinfo, type) \
+ do { \
+ char _volpath[PATH_MAX] = { \
+ 0, \
+ }; \
+ int32_t _crawl_pid_len; \
+ GLUSTERD_GET_VOLUME_DIR(_volpath, volinfo, priv); \
+ if (type == GF_QUOTA_OPTION_TYPE_ENABLE || \
+ type == GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS) \
+ _crawl_pid_len = snprintf(piddir, PATH_MAX, "%s/run/quota/enable", \
+ _volpath); \
+ else \
+ _crawl_pid_len = snprintf(piddir, PATH_MAX, \
+ "%s/run/quota/disable", _volpath); \
+ if ((_crawl_pid_len < 0) || (_crawl_pid_len >= PATH_MAX)) { \
+ piddir[0] = 0; \
+ } \
+ } while (0)
+
+#define GLUSTERD_GET_TMP_PATH(abspath, path) \
+ do { \
+ snprintf(abspath, sizeof(abspath) - 1, \
+ DEFAULT_VAR_RUN_DIRECTORY "/tmp%s", path); \
+ } while (0)
+
+#define GLUSTERD_GET_QUOTA_LIST_MOUNT_PATH(abspath, volname, path) \
+ do { \
+ snprintf(abspath, sizeof(abspath) - 1, \
+ DEFAULT_VAR_RUN_DIRECTORY "/%s_quota_list%s", volname, path); \
+ } while (0)
+
+const char *gd_quota_op_list[GF_QUOTA_OPTION_TYPE_MAX + 1] = {
+ [GF_QUOTA_OPTION_TYPE_NONE] = "none",
+ [GF_QUOTA_OPTION_TYPE_ENABLE] = "enable",
+ [GF_QUOTA_OPTION_TYPE_DISABLE] = "disable",
+ [GF_QUOTA_OPTION_TYPE_LIMIT_USAGE] = "limit-usage",
+ [GF_QUOTA_OPTION_TYPE_REMOVE] = "remove",
+ [GF_QUOTA_OPTION_TYPE_LIST] = "list",
+ [GF_QUOTA_OPTION_TYPE_VERSION] = "version",
+ [GF_QUOTA_OPTION_TYPE_ALERT_TIME] = "alert-time",
+ [GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT] = "soft-timeout",
+ [GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT] = "hard-timeout",
+ [GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT] = "default-soft-limit",
+ [GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS] = "limit-objects",
+ [GF_QUOTA_OPTION_TYPE_LIST_OBJECTS] = "list-objects",
+ [GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS] = "remove-objects",
+ [GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS] = "enable-objects",
+ [GF_QUOTA_OPTION_TYPE_UPGRADE] = "upgrade",
+ [GF_QUOTA_OPTION_TYPE_MAX] = NULL};
+
+gf_boolean_t
+glusterd_is_quota_supported(int32_t type, char **op_errstr)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t supported = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ if ((conf->op_version == GD_OP_VERSION_MIN) &&
+ (type > GF_QUOTA_OPTION_TYPE_VERSION))
+ goto out;
+
+ if ((conf->op_version < GD_OP_VERSION_3_7_0) &&
+ (type > GF_QUOTA_OPTION_TYPE_VERSION_OBJECTS))
+ goto out;
+
+ /* Quota Operations that change quota.conf shouldn't
+ * be allowed as the quota.conf format changes in 3.7
+ */
+ if ((conf->op_version < GD_OP_VERSION_3_7_0) &&
+ (type == GF_QUOTA_OPTION_TYPE_ENABLE ||
+ type == GF_QUOTA_OPTION_TYPE_LIMIT_USAGE ||
+ type == GF_QUOTA_OPTION_TYPE_REMOVE))
+ goto out;
+
+ /* Quota xattr version implemented in 3.7.6
+ * quota-version is incremented when quota is enabled
+ * Quota enable and disable performance enhancement has been done
+ * in version 3.7.12.
+ * so don't allow enabling/disabling quota in heterogeneous
+ * cluster during upgrade
+ */
+ if (type == GF_QUOTA_OPTION_TYPE_ENABLE ||
+ type == GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS ||
+ type == GF_QUOTA_OPTION_TYPE_DISABLE) {
+ if (conf->op_version < GD_OP_VERSION_3_7_12)
+ goto out;
+ }
+
+ supported = _gf_true;
- case GF_QUOTA_OPTION_TYPE_LIMIT_USAGE:
- strncpy (operation, "limit-usage", sizeof (operation));
- break;
+out:
+ if (!supported && op_errstr != NULL && conf)
+ gf_asprintf(op_errstr,
+ "Volume quota failed. The cluster is "
+ "operating at version %d. Quota command"
+ " %s is unavailable in this version.",
+ conf->op_version, gd_quota_op_list[type]);
+
+ return supported;
+}
- case GF_QUOTA_OPTION_TYPE_REMOVE:
- strncpy (operation, "remove", sizeof (operation));
- break;
+int
+__glusterd_handle_quota(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+ glusterd_op_t cli_op = GD_OP_QUOTA;
+ char *volname = NULL;
+ int32_t type = 0;
+ char msg[2048] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(msg, sizeof(msg),
+ "Unable to decode the "
+ "command");
+ goto out;
+ } else {
+ dict->extra_stdfree = cli_req.dict.dict_val;
}
- gf_cmd_log ("volume quota", " %s command on %s", operation, volname);
- ret = glusterd_op_begin (req, GD_OP_QUOTA, dict);
- gf_cmd_log ("volume quota", " %s command on %s %s", operation,volname,
- (ret != 0)? "FAILED" : "SUCCEEDED");
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Unable to get volume name");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name, "
+ "while handling quota command");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "type", SLEN("type"), &type);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Unable to get type of command");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get type of cmd, "
+ "while handling quota command");
+ goto out;
+ }
+
+ if (!glusterd_is_quota_supported(type, NULL)) {
+ snprintf(msg, sizeof(msg),
+ "Volume quota failed. The cluster "
+ "is operating at version %d. Quota command"
+ " %s is unavailable in this version.",
+ conf->op_version, gd_quota_op_list[type]);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_op_begin_synctask(req, GD_OP_QUOTA, dict);
out:
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ if (ret) {
+ if (msg[0] == '\0')
+ snprintf(msg, sizeof(msg), "Operation failed");
+ ret = glusterd_op_send_cli_response(cli_op, ret, 0, req, dict, msg);
+ }
- if (ret) {
- if (dict)
- dict_unref (dict);
- ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
- NULL, "operation failed");
- }
+ return ret;
+}
- return ret;
+int
+glusterd_handle_quota(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, __glusterd_handle_quota);
}
int32_t
-glusterd_check_if_quota_trans_enabled (glusterd_volinfo_t *volinfo)
+glusterd_check_if_quota_trans_enabled(glusterd_volinfo_t *volinfo)
{
- int32_t ret = 0;
- int flag = _gf_false;
-
- flag = glusterd_volinfo_get_boolean (volinfo, VKEY_FEATURES_QUOTA);
- if (flag == -1) {
- gf_log ("", GF_LOG_ERROR, "failed to get the quota status");
- ret = -1;
- goto out;
- }
-
- if (flag == _gf_false) {
- gf_log ("", GF_LOG_ERROR, "first enable the quota translator");
- ret = -1;
- goto out;
- }
- ret = 0;
+ int32_t ret = 0;
+ int flag = _gf_false;
+
+ flag = glusterd_volinfo_get_boolean(volinfo, VKEY_FEATURES_QUOTA);
+ if (flag == -1) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_QUOTA_GET_STAT_FAIL,
+ "failed to get the quota status");
+ ret = -1;
+ goto out;
+ }
+
+ if (flag == _gf_false) {
+ ret = -1;
+ goto out;
+ }
+ ret = 0;
out:
- return ret;
+ return ret;
}
-/* At the end of the function, the variable found will be set
- * to true if the path to be removed was present in the limit-list,
- * else will be false.
- */
int32_t
-_glusterd_quota_remove_limits (char **quota_limits, char *path,
- gf_boolean_t *found)
+_glusterd_quota_initiate_fs_crawl(glusterd_conf_t *priv,
+ glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brick, int type,
+ char *pid_dir)
{
- int ret = 0;
- int i = 0;
- int size = 0;
- int len = 0;
- int pathlen = 0;
- int skiplen = 0;
- int flag = 0;
- char *limits = NULL;
- char *qlimits = NULL;
-
- if (found != NULL)
- *found = _gf_false;
-
- if (*quota_limits == NULL)
- return -1;
-
- qlimits = *quota_limits;
-
- pathlen = strlen (path);
-
- len = strlen (qlimits);
-
- limits = GF_CALLOC (len + 1, sizeof (char), gf_gld_mt_char);
- if (!limits)
- return -1;
-
- while (i < len) {
- if (!memcmp ((void *) &qlimits [i], (void *)path, pathlen))
- if (qlimits [i + pathlen] == ':') {
- flag = 1;
- if (found != NULL)
- *found = _gf_true;
- }
-
- while (qlimits [i + size] != ',' &&
- qlimits [i + size] != '\0')
- size++;
-
- if (!flag) {
- memcpy ((void *) &limits [i], (void *) &qlimits [i], size + 1);
- } else {
- skiplen = size + 1;
- size = len - i - size;
- memcpy ((void *) &limits [i], (void *) &qlimits [i + skiplen], size);
- break;
- }
-
- i += size + 1;
- size = 0;
+ pid_t pid;
+ int32_t ret = -1;
+ int status = 0;
+ char mountdir[PATH_MAX] = {
+ 0,
+ };
+ char logfile[PATH_MAX] = {
+ 0,
+ };
+ char brickpath[PATH_MAX] = {
+ 0,
+ };
+ char vol_id[PATH_MAX] = {
+ 0,
+ };
+ char pidfile[PATH_MAX] = {
+ 0,
+ };
+ runner_t runner = {0};
+ char *volfileserver = NULL;
+ FILE *pidfp = NULL;
+ int32_t len = 0;
+
+ GF_VALIDATE_OR_GOTO("glusterd", THIS, out);
+
+ GLUSTERD_GET_TMP_PATH(mountdir, "/");
+ ret = sys_mkdir(mountdir, 0755);
+ if (ret && errno != EEXIST) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_MOUNT_REQ_FAIL,
+ "failed to create temporary "
+ "directory %s",
+ mountdir);
+ ret = -1;
+ goto out;
+ }
+
+ strcat(mountdir, "mntXXXXXX");
+ if (mkdtemp(mountdir) == NULL) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_MOUNT_REQ_FAIL,
+ "failed to create a temporary "
+ "mount directory: %s",
+ mountdir);
+ ret = -1;
+ goto out;
+ }
+
+ GLUSTERD_REMOVE_SLASH_FROM_PATH(brick->path, brickpath);
+ len = snprintf(logfile, sizeof(logfile),
+ DEFAULT_QUOTA_CRAWL_LOG_DIRECTORY "/%s.log", brickpath);
+ if ((len < 0) || (len >= sizeof(vol_id))) {
+ ret = -1;
+ goto out;
+ }
+
+ if (dict_get_strn(THIS->options, "transport.socket.bind-address",
+ SLEN("transport.socket.bind-address"),
+ &volfileserver) != 0)
+ volfileserver = "localhost";
+
+ len = snprintf(vol_id, sizeof(vol_id), "client_per_brick/%s.%s.%s.%s.vol",
+ volinfo->volname, "client", brick->hostname, brickpath);
+ if ((len < 0) || (len >= sizeof(vol_id))) {
+ ret = -1;
+ goto out;
+ }
+
+ runinit(&runner);
+
+ if (type == GF_QUOTA_OPTION_TYPE_ENABLE ||
+ type == GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS)
+ runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s", volfileserver,
+ "--volfile-id", vol_id, "--use-readdirp=yes",
+ "--client-pid", QUOTA_CRAWL_PID, "-l", logfile,
+ mountdir, NULL);
+ else
+ runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s", volfileserver,
+ "--volfile-id", vol_id, "--use-readdirp=no",
+ "--client-pid", QUOTA_CRAWL_PID, "-l", logfile,
+ mountdir, NULL);
+
+ synclock_unlock(&priv->big_lock);
+ ret = runner_run_reuse(&runner);
+ synclock_lock(&priv->big_lock);
+ if (ret == -1) {
+ runner_log(&runner, "glusterd", GF_LOG_DEBUG, "command failed");
+ runner_end(&runner);
+ goto out;
+ }
+ runner_end(&runner);
+
+ if ((pid = fork()) < 0) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_FORK_FAIL,
+ "fork from parent failed");
+ gf_umount_lazy("glusterd", mountdir, 1);
+ ret = -1;
+ goto out;
+ } else if (pid == 0) { // first child
+ /* fork one more to not hold back main process on
+ * blocking call below
+ */
+ pid = fork();
+ if (pid < 0) {
+ gf_umount_lazy("glusterd", mountdir, 1);
+ _exit(EXIT_FAILURE);
+ } else if (pid > 0) {
+ _exit(EXIT_SUCCESS);
}
- if (!flag) {
- ret = 1;
- } else {
- len = strlen (limits);
+ ret = chdir(mountdir);
+ if (ret == -1) {
+ gf_msg(THIS->name, GF_LOG_WARNING, errno, GD_MSG_DIR_OP_FAILED,
+ "chdir %s failed", mountdir);
+ gf_umount_lazy("glusterd", mountdir, 1);
+ exit(EXIT_FAILURE);
+ }
+ runinit(&runner);
+
+ if (type == GF_QUOTA_OPTION_TYPE_ENABLE ||
+ type == GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS)
+ runner_add_args(&runner, "/usr/bin/find", ".", "-exec",
+ "/usr/bin/stat", "{}", "\\", ";", NULL);
+
+ else if (type == GF_QUOTA_OPTION_TYPE_DISABLE) {
+#if defined(GF_DARWIN_HOST_OS)
+ runner_add_args(
+ &runner, "/usr/bin/find", ".", "-exec", "/usr/bin/xattr", "-w",
+ VIRTUAL_QUOTA_XATTR_CLEANUP_KEY, "1", "{}", "\\", ";", NULL);
+#elif defined(__FreeBSD__)
+ runner_add_args(&runner, "/usr/bin/find", ".", "-exec",
+ "/usr/sbin/setextattr", EXTATTR_NAMESPACE_USER,
+ VIRTUAL_QUOTA_XATTR_CLEANUP_KEY, "1", "{}", "\\",
+ ";", NULL);
+#else
+ runner_add_args(&runner, "find", ".", "-exec", _PATH_SETFATTR, "-n",
+ VIRTUAL_QUOTA_XATTR_CLEANUP_KEY, "-v", "1", "{}",
+ "\\", ";", NULL);
+#endif
+ }
- if (len == 0) {
- GF_FREE (qlimits);
+ if (runner_start(&runner) == -1) {
+ gf_umount_lazy("glusterd", mountdir, 1);
+ _exit(EXIT_FAILURE);
+ }
- *quota_limits = NULL;
+ len = snprintf(pidfile, sizeof(pidfile), "%s/%s.pid", pid_dir,
+ brickpath);
+ if ((len >= 0) && (len < sizeof(pidfile))) {
+ pidfp = fopen(pidfile, "w");
+ if (pidfp != NULL) {
+ fprintf(pidfp, "%d\n", runner.chpid);
+ fflush(pidfp);
+ fclose(pidfp);
+ }
+ }
- goto out;
- }
+#ifndef GF_LINUX_HOST_OS
+ runner_end(&runner); /* blocks in waitpid */
+#endif
+ gf_umount_lazy("glusterd", mountdir, 1);
- if (limits[len - 1] == ',') {
- limits[len - 1] = '\0';
- len --;
- }
+ _exit(EXIT_SUCCESS);
+ }
+ ret = (waitpid(pid, &status, 0) == pid && WIFEXITED(status) &&
+ WEXITSTATUS(status) == EXIT_SUCCESS)
+ ? 0
+ : -1;
- GF_FREE (qlimits);
+out:
+ return ret;
+}
- qlimits = GF_CALLOC (len + 1, sizeof (char), gf_gld_mt_char);
+void
+glusterd_stop_all_quota_crawl_service(glusterd_conf_t *priv,
+ glusterd_volinfo_t *volinfo, int type)
+{
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char pid_dir[PATH_MAX] = {
+ 0,
+ };
+ char pidfile[PATH_MAX] = {
+ 0,
+ };
+ int32_t len = 0;
+
+ GLUSTERD_GET_QUOTA_CRAWL_PIDDIR(pid_dir, volinfo, type);
+
+ dir = sys_opendir(pid_dir);
+ if (dir == NULL)
+ return;
+
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
+ len = snprintf(pidfile, sizeof(pidfile), "%s/%s", pid_dir,
+ entry->d_name);
+ if ((len >= 0) && (len < sizeof(pidfile))) {
+ glusterd_service_stop_nolock("quota_crawl", pidfile, SIGKILL,
+ _gf_true);
+ sys_unlink(pidfile);
+ }
+ }
+ sys_closedir(dir);
+}
- if (!qlimits) {
- ret = -1;
- goto out;
- }
+int32_t
+glusterd_quota_initiate_fs_crawl(glusterd_conf_t *priv,
+ glusterd_volinfo_t *volinfo, int type)
+{
+ int32_t ret = -1;
+ glusterd_brickinfo_t *brick = NULL;
+ char pid_dir[PATH_MAX] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("glusterd", THIS, out);
+
+ ret = glusterd_generate_client_per_brick_volfile(volinfo);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_OP_FAILED,
+ "failed to generate client volume file");
+ goto out;
+ }
+
+ ret = mkdir_p(DEFAULT_QUOTA_CRAWL_LOG_DIRECTORY, 0755, _gf_true);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_GLUSTERD_OP_FAILED,
+ "failed to create dir %s: %s", DEFAULT_QUOTA_CRAWL_LOG_DIRECTORY,
+ strerror(errno));
+ goto out;
+ }
+
+ GLUSTERD_GET_QUOTA_CRAWL_PIDDIR(pid_dir, volinfo, type);
+ ret = mkdir_p(pid_dir, 0755, _gf_true);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_GLUSTERD_OP_FAILED,
+ "failed to create dir %s: %s", pid_dir, strerror(errno));
+ goto out;
+ }
+
+ /* When quota enable is performed, stop alreday running enable crawl
+ * process and start fresh crawl process. let disable process continue
+ * if running to cleanup the older xattrs
+ * When quota disable is performed, stop both enable/disable crawl
+ * process and start fresh crawl process to cleanup the xattrs
+ */
+ glusterd_stop_all_quota_crawl_service(priv, volinfo,
+ GF_QUOTA_OPTION_TYPE_ENABLE);
+ if (type == GF_QUOTA_OPTION_TYPE_DISABLE)
+ glusterd_stop_all_quota_crawl_service(priv, volinfo,
+ GF_QUOTA_OPTION_TYPE_DISABLE);
+
+ cds_list_for_each_entry(brick, &volinfo->bricks, brick_list)
+ {
+ if (gf_uuid_compare(brick->uuid, MY_UUID))
+ continue;
+
+ ret = _glusterd_quota_initiate_fs_crawl(priv, volinfo, brick, type,
+ pid_dir);
- memcpy ((void *) qlimits, (void *) limits, len + 1);
+ if (ret)
+ goto out;
+ }
- *quota_limits = qlimits;
+ ret = 0;
+out:
+ return ret;
+}
- ret = 0;
- }
+int32_t
+glusterd_quota_get_default_soft_limit(glusterd_volinfo_t *volinfo,
+ dict_t *rsp_dict)
+{
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char *default_limit = NULL;
+ char *val = NULL;
+
+ if (rsp_dict == NULL)
+ return -1;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ ret = glusterd_volinfo_get(volinfo, "features.default-soft-limit",
+ &default_limit);
+ if (default_limit)
+ val = gf_strdup(default_limit);
+ else
+ val = gf_strdup("80%");
+
+ ret = dict_set_dynstr_sizen(rsp_dict, "default-soft-limit", val);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set default "
+ "soft-limit into dict");
+ goto out;
+ }
+ ret = 0;
out:
- if (limits)
- GF_FREE (limits);
-
- return ret;
+ return ret;
}
int32_t
-glusterd_quota_initiate_fs_crawl (glusterd_conf_t *priv, char *volname)
+glusterd_inode_quota_enable(glusterd_volinfo_t *volinfo, char **op_errstr,
+ gf_boolean_t *crawl)
{
- pid_t pid;
- int32_t ret = 0;
- int status = 0;
- char mountdir[] = "/tmp/mntXXXXXX";
- runner_t runner = {0};
-
- if (mkdtemp (mountdir) == NULL) {
- gf_log ("glusterd", GF_LOG_DEBUG,
- "failed to create a temporary mount directory");
- ret = -1;
- goto out;
- }
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO(this->name, crawl, out);
+ GF_VALIDATE_OR_GOTO(this->name, op_errstr, out);
+
+ if (glusterd_is_volume_started(volinfo) == 0) {
+ *op_errstr = gf_strdup(
+ "Volume is stopped, start volume "
+ "to enable inode quota.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_check_if_quota_trans_enabled(volinfo);
+ if (ret != 0) {
+ *op_errstr = gf_strdup(
+ "Quota is disabled. Enabling quota "
+ "will enable inode quota");
+ ret = -1;
+ goto out;
+ }
+
+ if (glusterd_is_volume_inode_quota_enabled(volinfo)) {
+ *op_errstr = gf_strdup("Inode Quota is already enabled");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(volinfo->dict, VKEY_FEATURES_INODE_QUOTA,
+ "on");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "dict set failed");
+ goto out;
+ }
+
+ *crawl = _gf_true;
+
+ ret = glusterd_store_quota_config(
+ volinfo, NULL, NULL, GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS, op_errstr);
+
+ ret = 0;
+out:
+ if (ret && op_errstr && !*op_errstr)
+ gf_asprintf(op_errstr,
+ "Enabling inode quota on volume %s has "
+ "been unsuccessful",
+ volinfo->volname);
+ return ret;
+}
- runinit (&runner);
- runner_add_args (&runner, SBIN_DIR"/glusterfs",
- "-s", "localhost",
- "--volfile-id", volname,
- "-l", DEFAULT_LOG_FILE_DIRECTORY"/quota-crawl.log",
- mountdir, NULL);
+int32_t
+glusterd_quota_enable(glusterd_volinfo_t *volinfo, char **op_errstr,
+ gf_boolean_t *crawl)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO(this->name, crawl, out);
+ GF_VALIDATE_OR_GOTO(this->name, op_errstr, out);
+
+ if (glusterd_is_volume_started(volinfo) == 0) {
+ *op_errstr = gf_strdup(
+ "Volume is stopped, start volume "
+ "to enable quota.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_check_if_quota_trans_enabled(volinfo);
+ if (ret == 0) {
+ *op_errstr = gf_strdup("Quota is already enabled");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(volinfo->dict, VKEY_FEATURES_QUOTA, "on");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "dict set failed");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(volinfo->dict, VKEY_FEATURES_INODE_QUOTA,
+ "on");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "dict set failed");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(volinfo->dict,
+ "features.quota-deem-statfs", "on");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "setting quota-deem-statfs"
+ "in volinfo failed");
+ goto out;
+ }
+
+ *crawl = _gf_true;
+
+ ret = glusterd_store_quota_config(volinfo, NULL, NULL,
+ GF_QUOTA_OPTION_TYPE_ENABLE, op_errstr);
+
+ ret = 0;
+out:
+ if (ret && op_errstr && !*op_errstr)
+ gf_asprintf(op_errstr,
+ "Enabling quota on volume %s has been "
+ "unsuccessful",
+ volinfo->volname);
+ return ret;
+}
- ret = runner_run_reuse (&runner);
- if (ret == -1) {
- runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed");
- runner_end (&runner);
- goto out;
+int32_t
+glusterd_quota_disable(glusterd_volinfo_t *volinfo, char **op_errstr,
+ gf_boolean_t *crawl)
+{
+ int32_t ret = -1;
+ int i = 0;
+ char *value = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char *quota_options[] = {"features.soft-timeout",
+ "features.hard-timeout",
+ "features.alert-time",
+ "features.default-soft-limit",
+ "features.quota-deem-statfs",
+ "features.quota-timeout",
+ NULL};
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO(this->name, op_errstr, out);
+
+ ret = glusterd_check_if_quota_trans_enabled(volinfo);
+ if (ret == -1) {
+ *op_errstr = gf_strdup("Quota is already disabled");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(volinfo->dict, VKEY_FEATURES_QUOTA, "off");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "dict set failed");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(volinfo->dict, VKEY_FEATURES_INODE_QUOTA,
+ "off");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "dict set failed");
+ goto out;
+ }
+
+ for (i = 0; quota_options[i]; i++) {
+ ret = glusterd_volinfo_get(volinfo, quota_options[i], &value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "failed to get option"
+ " %s",
+ quota_options[i]);
+ } else {
+ dict_del(volinfo->dict, quota_options[i]);
}
- runner_end (&runner);
+ }
- if ((pid = fork ()) < 0) {
- gf_log ("glusterd", GF_LOG_WARNING, "fork from parent failed");
- ret = -1;
- goto out;
- } else if (pid == 0) {//first child
- /* fork one more to not hold back main process on
- * blocking call below
- */
- pid = fork ();
- if (pid)
- _exit (pid > 0 ? EXIT_SUCCESS : EXIT_FAILURE);
-
- ret = chdir (mountdir);
- if (ret == -1) {
- gf_log ("glusterd", GF_LOG_WARNING, "chdir %s failed, "
- "reason: %s", mountdir, strerror (errno));
- exit (EXIT_FAILURE);
- }
- runinit (&runner);
- runner_add_args (&runner, "/usr/bin/find", "find", ".", NULL);
- if (runner_start (&runner) == -1)
- _exit (EXIT_FAILURE);
+ *crawl = _gf_true;
-#ifndef GF_LINUX_HOST_OS
- runner_end (&runner); /* blocks in waitpid */
- runcmd ("umount", mountdir, NULL);
-#else
- runcmd ("umount", "-l", mountdir, NULL);
-#endif
- rmdir (mountdir);
- _exit (EXIT_SUCCESS);
- }
- ret = (waitpid (pid, &status, 0) == pid &&
- WIFEXITED (status) && WEXITSTATUS (status) == EXIT_SUCCESS) ? 0 : -1;
+ (void)glusterd_clean_up_quota_store(volinfo);
+ ret = 0;
out:
- return ret;
+ if (ret && op_errstr && !*op_errstr)
+ gf_asprintf(op_errstr,
+ "Disabling quota on volume %s has been "
+ "unsuccessful",
+ volinfo->volname);
+ return ret;
}
-char *
-glusterd_quota_get_limit_value (char *quota_limits, char *path)
+static int
+glusterd_set_quota_limit(char *volname, char *path, char *hard_limit,
+ char *soft_limit, char *key, char **op_errstr)
{
- int32_t i, j, k, l, len;
- int32_t pat_len, diff;
- char *ret_str = NULL;
-
- len = strlen (quota_limits);
- pat_len = strlen (path);
- i = 0;
- j = 0;
-
- while (i < len) {
- j = i;
- k = 0;
- while (path [k] == quota_limits [j]) {
- j++;
- k++;
- }
+ int ret = -1;
+ xlator_t *this = NULL;
+ char abspath[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ quota_limits_t existing_limit = {
+ 0,
+ };
+ quota_limits_t new_limit = {
+ 0,
+ };
+ double soft_limit_double = 0;
+ int64_t local_hl = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GLUSTERD_GET_QUOTA_LIMIT_MOUNT_PATH(abspath, volname, path);
+ ret = gf_lstat_dir(abspath, NULL);
+ if (ret) {
+ gf_asprintf(op_errstr,
+ "Failed to find the directory %s. "
+ "Reason : %s",
+ abspath, strerror(errno));
+ goto out;
+ }
+
+ if (!soft_limit) {
+ ret = sys_lgetxattr(abspath, key, (void *)&existing_limit,
+ sizeof(existing_limit));
+ if (ret < 0) {
+ switch (errno) {
+#if defined(ENOATTR) && (ENOATTR != ENODATA)
+ case ENODATA: /* FALLTHROUGH */
+#endif
+ case ENOATTR:
+ existing_limit.sl = -1;
+ break;
+ default:
+ gf_asprintf(op_errstr,
+ "Failed to get the "
+ "xattr %s from %s. Reason : %s",
+ key, abspath, strerror(errno));
+ goto out;
+ }
+ } else {
+ existing_limit.hl = ntoh64(existing_limit.hl);
+ existing_limit.sl = ntoh64(existing_limit.sl);
+ }
+ new_limit.sl = existing_limit.sl;
- l = j;
+ } else {
+ ret = gf_string2percent(soft_limit, &soft_limit_double);
+ if (ret)
+ goto out;
+ new_limit.sl = soft_limit_double;
+ }
- while (quota_limits [j] != ',' &&
- quota_limits [j] != '\0')
- j++;
+ new_limit.sl = hton64(new_limit.sl);
- if (quota_limits [l] == ':' && pat_len == (l - i)) {
- diff = j - i;
- ret_str = GF_CALLOC (diff + 1, sizeof (char),
- gf_gld_mt_char);
+ ret = gf_string2bytesize_int64(hard_limit, &local_hl);
+ if (ret)
+ goto out;
- strncpy (ret_str, &quota_limits [i], diff);
+ new_limit.hl = hton64(local_hl);
- break;
- }
- i = ++j; //skip ','
- }
+ ret = sys_lsetxattr(abspath, key, (char *)(void *)&new_limit,
+ sizeof(new_limit), 0);
+ if (ret == -1) {
+ gf_asprintf(op_errstr,
+ "setxattr of %s failed on %s."
+ " Reason : %s",
+ key, abspath, strerror(errno));
+ goto out;
+ }
+ ret = 0;
- return ret_str;
+out:
+ return ret;
}
-char*
-_glusterd_quota_get_limit_usages (glusterd_volinfo_t *volinfo,
- char *path, char **op_errstr)
+static int
+glusterd_update_quota_conf_version(glusterd_volinfo_t *volinfo)
{
- int32_t ret = 0;
- char *quota_limits = NULL;
- char *ret_str = NULL;
-
- if (volinfo == NULL)
- return NULL;
-
- ret = glusterd_volinfo_get (volinfo, VKEY_FEATURES_LIMIT_USAGE,
- &quota_limits);
- if (ret)
- return NULL;
- if (quota_limits == NULL) {
- ret_str = NULL;
- *op_errstr = gf_strdup ("Limit not set on any directory");
- } else if (path == NULL)
- ret_str = gf_strdup (quota_limits);
- else
- ret_str = glusterd_quota_get_limit_value (quota_limits, path);
-
- return ret_str;
+ volinfo->quota_conf_version++;
+ return 0;
}
-int32_t
-glusterd_quota_get_limit_usages (glusterd_conf_t *priv,
- glusterd_volinfo_t *volinfo,
- char *volname,
- dict_t *dict,
- char **op_errstr)
+/*The function glusterd_find_gfid_match () does the following:
+ * Given a buffer of gfids, the number of bytes read and the key gfid that needs
+ * to be found, the function compares 16 bytes at a time from @buf against
+ * @gfid.
+ *
+ * What happens when the match is found:
+ * i. If the function was called as part of 'limit-usage' operation, the call
+ * returns with write_byte_count = bytes_read
+ *ii. If the function as called as part of 'quota remove' operation, @buf
+ * is modified in memory such that the match is deleted from the buffer, and
+ * also @write_byte_count is set to original buf size minus the sixteen bytes
+ * that was deleted as part of 'remove'.
+ *
+ * What happens when the match is not found in the current buffer:
+ * The function returns with write_byte_count = bytes_read, which means to say
+ * that the caller of this function must write the entire buffer to the tmp file
+ * and continue the search.
+ */
+static gf_boolean_t
+glusterd_find_gfid_match_3_6(uuid_t gfid, unsigned char *buf, size_t bytes_read,
+ int opcode, size_t *write_byte_count)
{
- int32_t i = 0;
- int32_t ret = 0;
- int32_t count = 0;
- char *path = NULL;
- dict_t *ctx = NULL;
- char cmd_str [1024] = {0, };
- char *ret_str = NULL;
-
- ctx = glusterd_op_get_ctx ();
- if (ctx == NULL)
- return 0;
-
- ret = dict_get_int32 (dict, "count", &count);
- if (ret < 0)
- goto out;
-
- if (count == 0) {
- ret_str = _glusterd_quota_get_limit_usages (volinfo, NULL,
- op_errstr);
+ int gfid_index = 0;
+ int shift_count = 0;
+ unsigned char tmp_buf[17] = {
+ 0,
+ };
+
+ /* This function if for backward compatibility */
+
+ while (gfid_index != bytes_read) {
+ memcpy((void *)tmp_buf, (void *)&buf[gfid_index], 16);
+ if (!gf_uuid_compare(gfid, tmp_buf)) {
+ if (opcode == GF_QUOTA_OPTION_TYPE_REMOVE) {
+ shift_count = bytes_read - (gfid_index + 16);
+ memmove((void *)&buf[gfid_index], (void *)&buf[gfid_index + 16],
+ shift_count);
+ *write_byte_count = bytes_read - 16;
+ } else {
+ *write_byte_count = bytes_read;
+ }
+ return _gf_true;
} else {
- i = 0;
- while (count--) {
- snprintf (cmd_str, 1024, "path%d", i++);
-
- ret = dict_get_str (dict, cmd_str, &path);
- if (ret < 0)
- goto out;
-
- ret_str = _glusterd_quota_get_limit_usages (volinfo, path, op_errstr);
- }
+ gfid_index += 16;
}
+ }
+ if (gfid_index == bytes_read)
+ *write_byte_count = bytes_read;
- if (ret_str) {
- ret = dict_set_dynstr (ctx, "limit_list", ret_str);
- }
-out:
- return ret;
+ return _gf_false;
}
-int32_t
-glusterd_quota_enable (glusterd_volinfo_t *volinfo, char **op_errstr,
- gf_boolean_t *crawl)
+static gf_boolean_t
+glusterd_find_gfid_match(uuid_t gfid, char gfid_type, unsigned char *buf,
+ size_t bytes_read, int opcode,
+ size_t *write_byte_count)
{
- int32_t ret = -1;
- char *quota_status = NULL;
+ int gfid_index = 0;
+ int shift_count = 0;
+ unsigned char tmp_buf[17] = {
+ 0,
+ };
+ char type = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ if (conf->op_version < GD_OP_VERSION_3_7_0)
+ return glusterd_find_gfid_match_3_6(gfid, buf, bytes_read, opcode,
+ write_byte_count);
+
+ while (gfid_index != bytes_read) {
+ memcpy((void *)tmp_buf, (void *)&buf[gfid_index], 16);
+ type = buf[gfid_index + 16];
+
+ if (!gf_uuid_compare(gfid, tmp_buf) && type == gfid_type) {
+ if (opcode == GF_QUOTA_OPTION_TYPE_REMOVE ||
+ opcode == GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS) {
+ shift_count = bytes_read - (gfid_index + 17);
+ memmove((void *)&buf[gfid_index], (void *)&buf[gfid_index + 17],
+ shift_count);
+ *write_byte_count = bytes_read - 17;
+ } else {
+ *write_byte_count = bytes_read;
+ }
+ return _gf_true;
+ } else {
+ gfid_index += 17;
+ }
+ }
+ if (gfid_index == bytes_read)
+ *write_byte_count = bytes_read;
- GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out);
- GF_VALIDATE_OR_GOTO ("glusterd", crawl, out);
- GF_VALIDATE_OR_GOTO ("glusterd", op_errstr, out);
+out:
- if (glusterd_is_volume_started (volinfo) == 0) {
- *op_errstr = gf_strdup ("Volume is stopped, start volume "
- "to enable quota.");
- goto out;
- }
+ return _gf_false;
+}
- ret = glusterd_check_if_quota_trans_enabled (volinfo);
- if (ret == 0) {
- *op_errstr = gf_strdup ("Quota is already enabled");
- goto out;
- }
+/* The function glusterd_copy_to_tmp_file() reads the "remaining" bytes from
+ * the source fd and writes them to destination fd, at the rate of 1000 entries
+ * a time (qconf_line_sz is the size of an entry)
+ */
- quota_status = gf_strdup ("on");
- if (!quota_status) {
- gf_log ("", GF_LOG_ERROR, "memory allocation failed");
- *op_errstr = gf_strdup ("Enabling quota has been unsuccessful");
- goto out;
+static int
+glusterd_copy_to_tmp_file(int src_fd, int dst_fd, int qconf_line_sz)
+{
+ int ret = 0;
+ ssize_t bytes_read = 0;
+ xlator_t *this = NULL;
+ unsigned char *buf = 0;
+ int buf_sz = qconf_line_sz * 1000;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(buf_sz > 0);
+
+ buf = GF_CALLOC(buf_sz, 1, gf_common_mt_char);
+ if (!buf) {
+ ret = -1;
+ goto out;
+ }
+
+ while ((bytes_read = sys_read(src_fd, buf, buf_sz)) > 0) {
+ if (bytes_read % qconf_line_sz != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_QUOTA_CONF_CORRUPT,
+ "quota.conf "
+ "corrupted");
+ ret = -1;
+ goto out;
}
-
- ret = dict_set_dynstr (volinfo->dict, VKEY_FEATURES_QUOTA, quota_status);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "dict set failed");
- *op_errstr = gf_strdup ("Enabling quota has been unsuccessful");
- goto out;
+ ret = sys_write(dst_fd, (void *)buf, bytes_read);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_QUOTA_CONF_WRITE_FAIL,
+ "write into quota.conf failed.");
+ goto out;
}
+ }
+ ret = 0;
- *op_errstr = gf_strdup ("Enabling quota has been successful");
-
- *crawl = _gf_true;
-
- ret = 0;
out:
- return ret;
+ if (buf)
+ GF_FREE(buf);
+ return ret;
}
-int32_t
-glusterd_quota_disable (glusterd_volinfo_t *volinfo, char **op_errstr)
+int
+glusterd_store_quota_conf_upgrade(glusterd_volinfo_t *volinfo)
{
- int32_t ret = -1;
- char *quota_status = NULL, *quota_limits = NULL;
-
- GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out);
- GF_VALIDATE_OR_GOTO ("glusterd", op_errstr, out);
-
- ret = glusterd_check_if_quota_trans_enabled (volinfo);
- if (ret == -1) {
- *op_errstr = gf_strdup ("Quota is already disabled");
- goto out;
- }
+ int ret = -1;
+ int fd = -1;
+ int conf_fd = -1;
+ unsigned char gfid[17] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ char type = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ fd = gf_store_mkstemp(volinfo->quota_conf_shandle);
+ if (fd < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ conf_fd = open(volinfo->quota_conf_shandle->path, O_RDONLY);
+ if (conf_fd == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = quota_conf_skip_header(conf_fd);
+ if (ret)
+ goto out;
+
+ ret = glusterd_quota_conf_write_header(fd);
+ if (ret)
+ goto out;
+
+ while (1) {
+ ret = quota_conf_read_gfid(conf_fd, gfid, &type, 1.1);
+ if (ret == 0)
+ break;
+ else if (ret < 0)
+ goto out;
+
+ ret = glusterd_quota_conf_write_gfid(fd, gfid,
+ GF_QUOTA_CONF_TYPE_USAGE);
+ if (ret < 0)
+ goto out;
+ }
- quota_status = gf_strdup ("off");
- if (!quota_status) {
- gf_log ("", GF_LOG_ERROR, "memory allocation failed");
- *op_errstr = gf_strdup ("Disabling quota has been unsuccessful");
- goto out;
- }
+out:
+ if (conf_fd != -1)
+ sys_close(conf_fd);
- ret = dict_set_dynstr (volinfo->dict, VKEY_FEATURES_QUOTA, quota_status);
+ if (ret && (fd > 0)) {
+ gf_store_unlink_tmppath(volinfo->quota_conf_shandle);
+ } else if (!ret) {
+ ret = gf_store_rename_tmppath(volinfo->quota_conf_shandle);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "dict set failed");
- *op_errstr = gf_strdup ("Disabling quota has been unsuccessful");
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Failed to rename "
+ "quota conf file");
+ return ret;
}
- *op_errstr = gf_strdup ("Disabling quota has been successful");
-
- ret = glusterd_volinfo_get (volinfo, VKEY_FEATURES_LIMIT_USAGE,
- &quota_limits);
+ ret = glusterd_compute_cksum(volinfo, _gf_true);
if (ret) {
- gf_log ("", GF_LOG_WARNING, "failed to get the quota limits");
- } else {
- GF_FREE (quota_limits);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_COMPUTE_FAIL,
+ "Failed to "
+ "compute cksum for quota conf file");
+ return ret;
}
- dict_del (volinfo->dict, VKEY_FEATURES_LIMIT_USAGE);
+ ret = glusterd_store_save_quota_version_and_cksum(volinfo);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_QUOTA_CKSUM_VER_STORE_FAIL,
+ "Failed to "
+ "store quota version and cksum");
+ }
-out:
- return ret;
+ return ret;
}
-int32_t
-glusterd_quota_limit_usage (glusterd_volinfo_t *volinfo, dict_t *dict, char **op_errstr)
+int
+glusterd_store_quota_config(glusterd_volinfo_t *volinfo, char *path,
+ char *gfid_str, int opcode, char **op_errstr)
{
- int32_t ret = -1;
- char *path = NULL;
- char *limit = NULL;
- char *value = NULL;
- char msg [1024] = {0,};
- char *quota_limits = NULL;
-
- GF_VALIDATE_OR_GOTO ("glusterd", dict, out);
- GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out);
- GF_VALIDATE_OR_GOTO ("glusterd", op_errstr, out);
-
- ret = glusterd_check_if_quota_trans_enabled (volinfo);
- if (ret == -1) {
- *op_errstr = gf_strdup ("Quota is disabled, please enable "
- "quota");
- goto out;
+ int ret = -1;
+ int fd = -1;
+ int conf_fd = -1;
+ ssize_t bytes_read = 0;
+ size_t bytes_to_write = 0;
+ uuid_t gfid = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ gf_boolean_t found = _gf_false;
+ gf_boolean_t modified = _gf_false;
+ gf_boolean_t is_file_empty = _gf_false;
+ gf_boolean_t is_first_read = _gf_true;
+ glusterd_conf_t *conf = NULL;
+ float version = 0.0f;
+ char type = 0;
+ int quota_conf_line_sz = 16;
+ unsigned char *buf = 0;
+ int buf_sz = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ glusterd_store_create_quota_conf_sh_on_absence(volinfo);
+
+ conf_fd = open(volinfo->quota_conf_shandle->path, O_RDONLY);
+ if (conf_fd == -1) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = quota_conf_read_version(conf_fd, &version);
+ if (ret)
+ goto out;
+
+ if (version < 1.2f && conf->op_version >= GD_OP_VERSION_3_7_0) {
+ /* Upgrade quota.conf file to newer format */
+ sys_close(conf_fd);
+ conf_fd = -1;
+
+ ret = glusterd_store_quota_conf_upgrade(volinfo);
+ if (ret)
+ goto out;
+
+ if (GF_QUOTA_OPTION_TYPE_UPGRADE == opcode) {
+ /* Nothing more to be done here */
+ goto out;
}
- ret = glusterd_volinfo_get (volinfo, VKEY_FEATURES_LIMIT_USAGE,
- &quota_limits);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "failed to get the quota limits");
- *op_errstr = gf_strdup ("failed to set limit");
- goto out;
+ conf_fd = open(volinfo->quota_conf_shandle->path, O_RDONLY);
+ if (conf_fd == -1) {
+ ret = -1;
+ goto out;
}
- ret = dict_get_str (dict, "path", &path);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to fetch quota limits" );
- *op_errstr = gf_strdup ("failed to set limit");
- goto out;
+ ret = quota_conf_skip_header(conf_fd);
+ if (ret)
+ goto out;
+ } else if (GF_QUOTA_OPTION_TYPE_UPGRADE == opcode) {
+ /* No change to be done in quota_conf*/
+ goto out;
+ }
+
+ /* If op-ver is gt 3.7, then quota.conf will be upgraded, and 17 bytes
+ * storted in the new format. 16 bytes uuid and
+ * 1 byte type (usage/object)
+ */
+ if (conf->op_version >= GD_OP_VERSION_3_7_0)
+ quota_conf_line_sz++;
+
+ buf_sz = quota_conf_line_sz * 1000;
+
+ buf = GF_CALLOC(buf_sz, 1, gf_common_mt_char);
+ if (!buf) {
+ ret = -1;
+ goto out;
+ }
+
+ fd = gf_store_mkstemp(volinfo->quota_conf_shandle);
+ if (fd < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_quota_conf_write_header(fd);
+ if (ret)
+ goto out;
+
+ /* Just create empty quota.conf file if create */
+ if (GF_QUOTA_OPTION_TYPE_ENABLE == opcode ||
+ GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS == opcode) {
+ modified = _gf_true;
+ goto out;
+ }
+
+ /* Check if gfid_str is given for opts other than ENABLE */
+ if (!gfid_str) {
+ ret = -1;
+ goto out;
+ }
+ gf_uuid_parse(gfid_str, gfid);
+
+ if (opcode > GF_QUOTA_OPTION_TYPE_VERSION_OBJECTS)
+ type = GF_QUOTA_CONF_TYPE_OBJECTS;
+ else
+ type = GF_QUOTA_CONF_TYPE_USAGE;
+
+ for (;;) {
+ bytes_read = sys_read(conf_fd, buf, buf_sz);
+ if (bytes_read <= 0) {
+ /*The flag @is_first_read is TRUE when the loop is
+ * entered, and is set to false if the first read
+ * reads non-zero bytes of data. The flag is used to
+ * detect if quota.conf is an empty file, but for the
+ * header. This is done to log appropriate error message
+ * when 'quota remove' is attempted when there are no
+ * limits set on the given volume.
+ */
+ if (is_first_read)
+ is_file_empty = _gf_true;
+ break;
+ }
+ if ((bytes_read % quota_conf_line_sz) != 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_QUOTA_CONF_CORRUPT,
+ "quota.conf "
+ "corrupted");
+ ret = -1;
+ goto out;
}
+ found = glusterd_find_gfid_match(gfid, type, buf, bytes_read, opcode,
+ &bytes_to_write);
- ret = dict_get_str (dict, "limit", &limit);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to fetch quota limits" );
- *op_errstr = gf_strdup ("failed to set limit");
- goto out;
+ ret = sys_write(fd, (void *)buf, bytes_to_write);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_QUOTA_CONF_WRITE_FAIL,
+ "write into quota.conf failed.");
+ goto out;
}
- if (quota_limits) {
- ret = _glusterd_quota_remove_limits (&quota_limits, path, NULL);
- if (ret == -1) {
- gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
- *op_errstr = gf_strdup ("failed to set limit");
- goto out;
- }
+ /*If the match is found in this iteration, copy the rest of
+ * quota.conf into quota.conf.tmp and break.
+ * Else continue with the search.
+ */
+ if (found) {
+ ret = glusterd_copy_to_tmp_file(conf_fd, fd, quota_conf_line_sz);
+ if (ret)
+ goto out;
+ break;
}
+ is_first_read = _gf_false;
+ }
- if (quota_limits == NULL) {
- ret = gf_asprintf (&value, "%s:%s", path, limit);
+ switch (opcode) {
+ case GF_QUOTA_OPTION_TYPE_LIMIT_USAGE:
+ if (!found) {
+ ret = glusterd_quota_conf_write_gfid(fd, gfid,
+ GF_QUOTA_CONF_TYPE_USAGE);
if (ret == -1) {
- gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
- *op_errstr = gf_strdup ("failed to set limit");
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_QUOTA_CONF_WRITE_FAIL,
+ "write into quota.conf failed. ");
+ goto out;
}
- } else {
- ret = gf_asprintf (&value, "%s,%s:%s",
- quota_limits, path, limit);
+ modified = _gf_true;
+ }
+ break;
+ case GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS:
+ if (!found) {
+ ret = glusterd_quota_conf_write_gfid(
+ fd, gfid, GF_QUOTA_CONF_TYPE_OBJECTS);
if (ret == -1) {
- gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
- *op_errstr = gf_strdup ("failed to set limit");
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_QUOTA_CONF_WRITE_FAIL,
+ "write into quota.conf failed. ");
+ goto out;
}
+ modified = _gf_true;
+ }
+ break;
- GF_FREE (quota_limits);
- }
+ case GF_QUOTA_OPTION_TYPE_REMOVE:
+ case GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS:
+ if (is_file_empty) {
+ gf_asprintf(op_errstr,
+ "Cannot remove limit on"
+ " %s. The quota configuration file"
+ " for volume %s is empty.",
+ path, volinfo->volname);
+ ret = -1;
+ goto out;
+ } else {
+ if (!found) {
+ gf_asprintf(op_errstr,
+ "Error. gfid %s"
+ " for path %s not found in"
+ " store",
+ gfid_str, path);
+ ret = -1;
+ goto out;
+ } else {
+ modified = _gf_true;
+ }
+ }
+ break;
- quota_limits = value;
+ default:
+ ret = 0;
+ break;
+ }
- ret = dict_set_str (volinfo->dict, VKEY_FEATURES_LIMIT_USAGE,
- quota_limits);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to set quota limits" );
- *op_errstr = gf_strdup ("failed to set limit");
- goto out;
- }
- snprintf (msg, 1024, "limit set on %s", path);
- *op_errstr = gf_strdup (msg);
+ if (modified)
+ glusterd_update_quota_conf_version(volinfo);
- ret = 0;
+ ret = 0;
out:
- return ret;
+ if (conf_fd != -1) {
+ sys_close(conf_fd);
+ }
+
+ if (buf)
+ GF_FREE(buf);
+
+ if (ret && (fd > 0)) {
+ gf_store_unlink_tmppath(volinfo->quota_conf_shandle);
+ } else if (!ret && GF_QUOTA_OPTION_TYPE_UPGRADE != opcode) {
+ ret = gf_store_rename_tmppath(volinfo->quota_conf_shandle);
+ if (modified) {
+ ret = glusterd_compute_cksum(volinfo, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_COMPUTE_FAIL,
+ "Failed to "
+ "compute cksum for quota conf file");
+ return ret;
+ }
+
+ ret = glusterd_store_save_quota_version_and_cksum(volinfo);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_VERS_CKSUM_STORE_FAIL,
+ "Failed to "
+ "store quota version and cksum");
+ }
+ }
+ return ret;
}
int32_t
-glusterd_quota_remove_limits (glusterd_volinfo_t *volinfo, dict_t *dict, char **op_errstr)
+glusterd_quota_limit_usage(glusterd_volinfo_t *volinfo, dict_t *dict,
+ int opcode, char **op_errstr)
{
- int32_t ret = -1;
- char str [PATH_MAX + 1024] = {0,};
- char *quota_limits = NULL;
- char *path = NULL;
- gf_boolean_t flag = _gf_false;
-
- GF_VALIDATE_OR_GOTO ("glusterd", dict, out);
- GF_VALIDATE_OR_GOTO ("glusterd", volinfo, out);
- GF_VALIDATE_OR_GOTO ("glusterd", op_errstr, out);
+ int32_t ret = -1;
+ char *path = NULL;
+ char *hard_limit = NULL;
+ char *soft_limit = NULL;
+ char *gfid_str = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+ GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO(this->name, op_errstr, out);
+
+ ret = glusterd_check_if_quota_trans_enabled(volinfo);
+ if (ret == -1) {
+ *op_errstr = gf_strdup(
+ "Quota is disabled, please enable "
+ "quota");
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "path", SLEN("path"), &path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch path");
+ goto out;
+ }
+ ret = gf_canonicalize_path(path);
+ if (ret)
+ goto out;
+
+ ret = dict_get_strn(dict, "hard-limit", SLEN("hard-limit"), &hard_limit);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch hard limit");
+ goto out;
+ }
+
+ if (dict_getn(dict, "soft-limit", SLEN("soft-limit"))) {
+ ret = dict_get_strn(dict, "soft-limit", SLEN("soft-limit"),
+ &soft_limit);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch "
+ "soft limit");
+ goto out;
+ }
+ }
- ret = glusterd_check_if_quota_trans_enabled (volinfo);
- if (ret == -1) {
- *op_errstr = gf_strdup ("Quota is disabled, please enable quota");
- goto out;
+ if (is_origin_glusterd(dict)) {
+ if (opcode == GF_QUOTA_OPTION_TYPE_LIMIT_USAGE) {
+ ret = glusterd_set_quota_limit(volinfo->volname, path, hard_limit,
+ soft_limit, QUOTA_LIMIT_KEY,
+ op_errstr);
+ } else {
+ ret = glusterd_set_quota_limit(volinfo->volname, path, hard_limit,
+ soft_limit, QUOTA_LIMIT_OBJECTS_KEY,
+ op_errstr);
}
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "gfid", SLEN("gfid"), &gfid_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get gfid of path "
+ "%s",
+ path);
+ goto out;
+ }
+
+ ret = glusterd_store_quota_config(volinfo, path, gfid_str, opcode,
+ op_errstr);
+ if (ret)
+ goto out;
+
+ ret = 0;
+out:
- ret = glusterd_volinfo_get (volinfo, VKEY_FEATURES_LIMIT_USAGE,
- &quota_limits);
+ if (ret && op_errstr && !*op_errstr)
+ gf_asprintf(op_errstr,
+ "Failed to set hard limit on path %s "
+ "for volume %s",
+ path, volinfo->volname);
+ return ret;
+}
+
+static int
+glusterd_remove_quota_limit(char *volname, char *path, char **op_errstr,
+ int type)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ char abspath[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GLUSTERD_GET_QUOTA_LIMIT_MOUNT_PATH(abspath, volname, path);
+ ret = gf_lstat_dir(abspath, NULL);
+ if (ret) {
+ gf_asprintf(op_errstr,
+ "Failed to find the directory %s. "
+ "Reason : %s",
+ abspath, strerror(errno));
+ goto out;
+ }
+
+ if (type == GF_QUOTA_OPTION_TYPE_REMOVE) {
+ ret = sys_lremovexattr(abspath, QUOTA_LIMIT_KEY);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "failed to get the quota limits");
- goto out;
+ gf_asprintf(op_errstr,
+ "removexattr failed on %s. "
+ "Reason : %s",
+ abspath, strerror(errno));
+ goto out;
}
+ }
- ret = dict_get_str (dict, "path", &path);
+ if (type == GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS) {
+ ret = sys_lremovexattr(abspath, QUOTA_LIMIT_OBJECTS_KEY);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to fetch quota limits" );
- goto out;
+ gf_asprintf(op_errstr,
+ "removexattr failed on %s. "
+ "Reason : %s",
+ abspath, strerror(errno));
+ goto out;
}
+ }
+ ret = 0;
- ret = _glusterd_quota_remove_limits (&quota_limits, path, &flag);
- if (ret == -1) {
- if (flag == _gf_true)
- snprintf (str, sizeof (str), "Removing limit on %s has "
- "been unsuccessful", path);
- else
- snprintf (str, sizeof (str), "%s has no limit set", path);
- *op_errstr = gf_strdup (str);
- goto out;
- } else {
- if (flag == _gf_true)
- snprintf (str, sizeof (str), "Removed quota limit on "
- "%s", path);
- else
- snprintf (str, sizeof (str), "no limit set on %s",
- path);
- *op_errstr = gf_strdup (str);
- }
+out:
+ return ret;
+}
- if (quota_limits) {
- ret = dict_set_str (volinfo->dict, VKEY_FEATURES_LIMIT_USAGE,
- quota_limits);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to set quota limits" );
- goto out;
- }
- } else {
- dict_del (volinfo->dict, VKEY_FEATURES_LIMIT_USAGE);
- }
+int32_t
+glusterd_quota_remove_limits(glusterd_volinfo_t *volinfo, dict_t *dict,
+ int opcode, char **op_errstr, int type)
+{
+ int32_t ret = -1;
+ char *path = NULL;
+ char *gfid_str = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+ GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO(this->name, op_errstr, out);
+
+ ret = glusterd_check_if_quota_trans_enabled(volinfo);
+ if (ret == -1) {
+ *op_errstr = gf_strdup(
+ "Quota is disabled, please enable "
+ "quota");
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "path", SLEN("path"), &path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch path");
+ goto out;
+ }
+
+ ret = gf_canonicalize_path(path);
+ if (ret)
+ goto out;
+
+ if (is_origin_glusterd(dict)) {
+ ret = glusterd_remove_quota_limit(volinfo->volname, path, op_errstr,
+ type);
+ if (ret)
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "gfid", SLEN("gfid"), &gfid_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get gfid of path "
+ "%s",
+ path);
+ goto out;
+ }
+
+ ret = glusterd_store_quota_config(volinfo, path, gfid_str, opcode,
+ op_errstr);
+ if (ret)
+ goto out;
- ret = 0;
+ ret = 0;
out:
- return ret;
+ return ret;
}
-
int
-glusterd_op_quota (dict_t *dict, char **op_errstr)
+glusterd_set_quota_option(glusterd_volinfo_t *volinfo, dict_t *dict, char *key,
+ char **op_errstr)
{
- glusterd_volinfo_t *volinfo = NULL;
- int32_t ret = -1;
- char *volname = NULL;
- dict_t *ctx = NULL;
- int type = -1;
- gf_boolean_t start_crawl = _gf_false;
- glusterd_conf_t *priv = NULL;
+ int ret = 0;
+ char *value = NULL;
+ xlator_t *this = NULL;
+ char *option = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = glusterd_check_if_quota_trans_enabled(volinfo);
+ if (ret == -1) {
+ gf_asprintf(op_errstr,
+ "Cannot set %s. Quota on volume %s is "
+ "disabled",
+ key, volinfo->volname);
+ return -1;
+ }
+
+ ret = dict_get_strn(dict, "value", SLEN("value"), &value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Option value absent.");
+ return -1;
+ }
+
+ option = gf_strdup(value);
+ ret = dict_set_dynstr(volinfo->dict, key, option);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to set option %s", key);
+ return -1;
+ }
+
+ return 0;
+}
- GF_ASSERT (dict);
- GF_ASSERT (op_errstr);
+static int
+glusterd_quotad_op(int opcode)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
- priv = THIS->private;
+ this = THIS;
+ GF_ASSERT(this);
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name " );
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ switch (opcode) {
+ case GF_QUOTA_OPTION_TYPE_ENABLE:
+ case GF_QUOTA_OPTION_TYPE_DISABLE:
+
+ if (glusterd_all_volumes_with_quota_stopped())
+ ret = glusterd_svc_stop(&(priv->quotad_svc), SIGTERM);
+ else
+ ret = priv->quotad_svc.manager(&(priv->quotad_svc), NULL,
+ PROC_START);
+ break;
+
+ default:
+ ret = 0;
+ break;
+ }
+ return ret;
+}
+
+int
+glusterd_op_quota(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t ret = -1;
+ char *volname = NULL;
+ int type = -1;
+ gf_boolean_t start_crawl = _gf_false;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_asprintf(op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "type", SLEN("type"), &type);
+
+ if (!glusterd_is_quota_supported(type, op_errstr)) {
+ ret = -1;
+ goto out;
+ }
+
+ switch (type) {
+ case GF_QUOTA_OPTION_TYPE_ENABLE:
+ ret = glusterd_quota_enable(volinfo, op_errstr, &start_crawl);
+ if (ret < 0)
goto out;
- }
+ break;
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
+ case GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS:
+ ret = glusterd_inode_quota_enable(volinfo, op_errstr, &start_crawl);
+ if (ret < 0)
goto out;
- }
+ break;
- ret = dict_get_int32 (dict, "type", &type);
+ case GF_QUOTA_OPTION_TYPE_DISABLE:
+ ret = glusterd_quota_disable(volinfo, op_errstr, &start_crawl);
+ if (ret < 0)
+ goto out;
- if (type == GF_QUOTA_OPTION_TYPE_ENABLE) {
- ret = glusterd_quota_enable (volinfo, op_errstr, &start_crawl);
- if (ret < 0)
- goto out;
+ break;
- goto create_vol;
- }
+ case GF_QUOTA_OPTION_TYPE_LIMIT_USAGE:
+ case GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS:
+ ret = glusterd_quota_limit_usage(volinfo, dict, type, op_errstr);
+ goto out;
- if (type == GF_QUOTA_OPTION_TYPE_DISABLE) {
- ret = glusterd_quota_disable (volinfo, op_errstr);
- if (ret < 0)
- goto out;
+ case GF_QUOTA_OPTION_TYPE_REMOVE:
+ case GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS:
+ ret = glusterd_quota_remove_limits(volinfo, dict, type, op_errstr,
+ type);
+ goto out;
+
+ case GF_QUOTA_OPTION_TYPE_LIST:
+ case GF_QUOTA_OPTION_TYPE_LIST_OBJECTS:
+ ret = glusterd_check_if_quota_trans_enabled(volinfo);
+ if (ret == -1) {
+ *op_errstr = gf_strdup(
+ "Cannot list limits, "
+ "quota is disabled");
+ goto out;
+ }
+ ret = glusterd_quota_get_default_soft_limit(volinfo, rsp_dict);
+ goto out;
+
+ case GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT:
+ ret = glusterd_set_quota_option(volinfo, dict,
+ "features.soft-timeout", op_errstr);
+ if (ret)
+ goto out;
+ break;
- goto create_vol;
- }
+ case GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT:
+ ret = glusterd_set_quota_option(volinfo, dict,
+ "features.hard-timeout", op_errstr);
+ if (ret)
+ goto out;
+ break;
- if (type == GF_QUOTA_OPTION_TYPE_LIMIT_USAGE) {
- ret = glusterd_quota_limit_usage (volinfo, dict, op_errstr);
- if (ret < 0)
- goto out;
+ case GF_QUOTA_OPTION_TYPE_ALERT_TIME:
+ ret = glusterd_set_quota_option(volinfo, dict,
+ "features.alert-time", op_errstr);
+ if (ret)
+ goto out;
+ break;
- goto create_vol;
+ case GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT:
+ ret = glusterd_set_quota_option(
+ volinfo, dict, "features.default-soft-limit", op_errstr);
+ if (ret)
+ goto out;
+ break;
+
+ default:
+ gf_asprintf(op_errstr,
+ "Quota command failed. Invalid "
+ "opcode");
+ ret = -1;
+ goto out;
+ }
+
+ if (priv->op_version > GD_OP_VERSION_MIN) {
+ ret = glusterd_quotad_op(type);
+ if (ret)
+ goto out;
+ }
+
+ if (GF_QUOTA_OPTION_TYPE_ENABLE == type)
+ volinfo->quota_xattr_version++;
+ ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ if (GF_QUOTA_OPTION_TYPE_ENABLE == type)
+ volinfo->quota_xattr_version--;
+ goto out;
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Unable to re-create "
+ "volfiles");
+ if (GF_QUOTA_OPTION_TYPE_ENABLE == type) {
+ /* rollback volinfo */
+ volinfo->quota_xattr_version--;
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL,
+ "Failed to store volinfo for volume %s",
+ volinfo->volname);
+ }
}
- if (type == GF_QUOTA_OPTION_TYPE_REMOVE) {
- ret = glusterd_quota_remove_limits (volinfo, dict, op_errstr);
- if (ret < 0)
- goto out;
+ ret = -1;
+ goto out;
+ }
- goto create_vol;
- }
+#if BUILD_GNFS
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ if (priv->op_version == GD_OP_VERSION_MIN)
+ (void)priv->nfs_svc.manager(&(priv->nfs_svc), NULL, 0);
+ }
+#endif
- if (type == GF_QUOTA_OPTION_TYPE_LIST) {
- ret = glusterd_check_if_quota_trans_enabled (volinfo);
- if (ret == -1) {
- *op_errstr = gf_strdup ("cannot list the limits, "
- "quota is disabled");
- goto out;
- }
+ if (rsp_dict && start_crawl == _gf_true)
+ glusterd_quota_initiate_fs_crawl(priv, volinfo, type);
- ret = glusterd_quota_get_limit_usages (priv, volinfo, volname,
- dict, op_errstr);
+ ret = 0;
+out:
+ if (type == GF_QUOTA_OPTION_TYPE_LIMIT_USAGE ||
+ type == GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS ||
+ type == GF_QUOTA_OPTION_TYPE_REMOVE ||
+ type == GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS) {
+ /* During a list operation we need the aux mount to be
+ * accessible until the listing is done at the cli
+ */
+ glusterd_remove_auxiliary_mount(volinfo->volname);
+ }
+
+ return ret;
+}
- goto out;
+/*
+ * glusterd_get_gfid_from_brick() fetches the 'trusted.gfid' attribute of @path
+ * from each brick in the backend and places the same in the rsp_dict with the
+ * keys being gfid0, gfid1, gfid2 and so on. The absence of @path in the backend
+ * is not treated as error.
+ */
+static int
+glusterd_get_gfid_from_brick(dict_t *dict, glusterd_volinfo_t *volinfo,
+ dict_t *rsp_dict, char **op_errstr)
+{
+ int ret = -1;
+ int count = 0;
+ char *path = NULL;
+ char backend_path[PATH_MAX] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+ char *gfid_str = NULL;
+ uuid_t gfid;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_strn(dict, "path", SLEN("path"), &path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get path");
+ goto out;
+ }
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ ret = glusterd_resolve_brick(brickinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESOLVE_BRICK_FAIL,
+ FMTSTR_RESOLVE_BRICK, brickinfo->hostname, brickinfo->path);
+ goto out;
}
-create_vol:
- ret = glusterd_create_volfiles_and_notify_services (volinfo);
+
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID))
+ continue;
+
+ if (brickinfo->vg[0])
+ continue;
+
+ snprintf(backend_path, sizeof(backend_path), "%s%s", brickinfo->path,
+ path);
+
+ ret = gf_lstat_dir(backend_path, NULL);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to re-create volfile for"
- " 'quota'");
- ret = -1;
- goto out;
+ gf_msg(this->name, GF_LOG_INFO, errno, GD_MSG_DIR_OP_FAILED,
+ "Failed to find "
+ "directory %s.",
+ backend_path);
+ ret = 0;
+ continue;
}
+ ret = sys_lgetxattr(backend_path, GFID_XATTR_KEY, gfid, 16);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_INFO, errno, GD_MSG_GET_XATTR_FAIL,
+ "Attribute=%s, Directory=%s", GFID_XATTR_KEY, backend_path,
+ NULL);
+ ret = 0;
+ continue;
+ }
+ keylen = snprintf(key, sizeof(key), "gfid%d", count);
- ret = glusterd_store_volinfo (volinfo, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret)
- goto out;
+ gfid_str = gf_strdup(uuid_utoa(gfid));
+ if (!gfid_str) {
+ ret = -1;
+ goto out;
+ }
- if (GLUSTERD_STATUS_STARTED == volinfo->status)
- ret = glusterd_check_generate_start_nfs ();
+ ret = dict_set_dynstrn(rsp_dict, key, keylen, gfid_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to place "
+ "gfid of %s in dict",
+ backend_path);
+ GF_FREE(gfid_str);
+ goto out;
+ }
+ count++;
+ }
- ret = 0;
+ ret = dict_set_int32n(rsp_dict, "count", SLEN("count"), count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set count");
+ goto out;
+ }
+ ret = 0;
out:
- ctx = glusterd_op_get_ctx ();
- if (ctx && start_crawl == _gf_true)
- glusterd_quota_initiate_fs_crawl (priv, volname);
+ return ret;
+}
- if (ctx && *op_errstr) {
- ret = dict_set_dynstr (ctx, "errstr", *op_errstr);
- if (ret) {
- GF_FREE (*op_errstr);
- gf_log ("", GF_LOG_DEBUG,
- "failed to set error message in ctx");
- }
- *op_errstr = NULL;
- }
+static int
+_glusterd_validate_quota_opts(dict_t *dict, int type, char **errstr)
+{
+ int ret = -1;
+ xlator_t *this = THIS;
+ void *quota_xl = NULL;
+ volume_opt_list_t opt_list = {
+ {0},
+ };
+ volume_option_t *opt = NULL;
+ char *key = NULL;
+ char *value = NULL;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(this);
+
+ ret = xlator_volopt_dynload("features/quota", &quota_xl, &opt_list);
+ if (ret)
+ goto out;
+
+ switch (type) {
+ case GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT:
+ case GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT:
+ case GF_QUOTA_OPTION_TYPE_ALERT_TIME:
+ case GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT:
+ key = (char *)gd_quota_op_list[type];
+ break;
+ default:
+ ret = -1;
+ goto out;
+ }
+
+ opt = xlator_volume_option_get_list(&opt_list, key);
+ if (!opt) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_UNKNOWN_KEY,
+ "Unknown option: %s", key);
+ goto out;
+ }
+ ret = dict_get_strn(dict, "value", SLEN("value"), &value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Value not found for key %s", key);
+ goto out;
+ }
+
+ ret = xlator_option_validate(this, key, value, opt, errstr);
- return ret;
+out:
+ if (quota_xl) {
+ dlclose(quota_xl);
+ quota_xl = NULL;
+ }
+ return ret;
}
-int
-glusterd_op_stage_quota (dict_t *dict, char **op_errstr)
+static int
+glusterd_create_quota_auxiliary_mount(xlator_t *this, char *volname, int type)
{
- int ret = 0;
- char *volname = NULL;
- gf_boolean_t exists = _gf_false;
- int type = 0;
- dict_t *ctx = NULL;
+ int ret = -1;
+ char mountdir[PATH_MAX] = {
+ 0,
+ };
+ char pidfile_path[PATH_MAX] = {
+ 0,
+ };
+ char logfile[PATH_MAX] = {
+ 0,
+ };
+ char qpid[16] = {
+ 0,
+ };
+ char *volfileserver = NULL;
+ glusterd_conf_t *priv = NULL;
+ struct stat buf = {
+ 0,
+ };
+ FILE *file = NULL;
+
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ if (type == GF_QUOTA_OPTION_TYPE_LIST ||
+ type == GF_QUOTA_OPTION_TYPE_LIST_OBJECTS) {
+ GLUSTERFS_GET_QUOTA_LIST_MOUNT_PIDFILE(pidfile_path, volname);
+ GLUSTERD_GET_QUOTA_LIST_MOUNT_PATH(mountdir, volname, "/");
+ } else {
+ GLUSTERFS_GET_QUOTA_LIMIT_MOUNT_PIDFILE(pidfile_path, volname);
+ GLUSTERD_GET_QUOTA_LIMIT_MOUNT_PATH(mountdir, volname, "/");
+ }
+
+ file = fopen(pidfile_path, "r");
+ if (file) {
+ /* Previous command did not clean up pid file.
+ * remove aux mount if it exists*/
+ gf_umount_lazy(this->name, mountdir, 1);
+ fclose(file);
+ }
+
+ ret = sys_mkdir(mountdir, 0755);
+ if (ret && errno != EEXIST) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_MOUNT_REQ_FAIL,
+ "Failed to create auxiliary "
+ "mount directory %s",
+ mountdir);
+ goto out;
+ }
+ snprintf(logfile, PATH_MAX - 1, "%s/quota-mount-%s.log", priv->logdir,
+ volname);
+ snprintf(qpid, 15, "%d", GF_CLIENT_PID_QUOTA_MOUNT);
+
+ if (dict_get_strn(this->options, "transport.socket.bind-address",
+ SLEN("transport.socket.bind-address"),
+ &volfileserver) != 0)
+ volfileserver = "localhost";
+
+ synclock_unlock(&priv->big_lock);
+ ret = runcmd(SBIN_DIR "/glusterfs", "--volfile-server", volfileserver,
+ "--volfile-id", volname, "-l", logfile, "-p", pidfile_path,
+ "--client-pid", qpid, mountdir, NULL);
+ if (ret == 0) {
+ /* Block here till mount process is ready to accept FOPs.
+ * Else, if glusterd acquires biglock below before
+ * mount process is ready, then glusterd and mount process
+ * can get into a deadlock situation.
+ */
+ ret = sys_stat(mountdir, &buf);
+ if (ret < 0)
+ ret = -errno;
+ } else {
+ ret = -errno;
+ }
- GF_ASSERT (dict);
- GF_ASSERT (op_errstr);
+ synclock_lock(&priv->big_lock);
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
- goto out;
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, -ret, GD_MSG_MOUNT_REQ_FAIL,
+ "Failed to mount glusterfs "
+ "client. Please check the log file %s for more details",
+ logfile);
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+glusterd_op_stage_quota(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int ret = 0;
+ char *volname = NULL;
+ int type = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *hard_limit_str = NULL;
+ int64_t hard_limit = 0;
+ gf_boolean_t get_gfid = _gf_false;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_asprintf(op_errstr, FMTSTR_CHECK_VOL_EXISTS, volname);
+ goto out;
+ }
+
+ if (!glusterd_is_volume_started(volinfo)) {
+ *op_errstr = gf_strdup(
+ "Volume is stopped, start volume "
+ "before executing quota command.");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "type", SLEN("type"), &type);
+ if (ret) {
+ *op_errstr = gf_strdup(
+ "Volume quota failed, internal error, "
+ "unable to get type of operation");
+ goto out;
+ }
+
+ if ((!glusterd_is_volume_quota_enabled(volinfo)) &&
+ (type != GF_QUOTA_OPTION_TYPE_ENABLE)) {
+ *op_errstr = gf_strdup(
+ "Quota is disabled, please enable "
+ "quota");
+ ret = -1;
+ goto out;
+ }
+
+ if (type > GF_QUOTA_OPTION_TYPE_VERSION_OBJECTS) {
+ if (!glusterd_is_volume_inode_quota_enabled(volinfo) &&
+ type != GF_QUOTA_OPTION_TYPE_ENABLE_OBJECTS) {
+ *op_errstr = gf_strdup(
+ "Inode Quota is disabled, "
+ "please enable inode quota");
+ ret = -1;
+ goto out;
}
+ }
+
+ if (!glusterd_is_quota_supported(type, op_errstr)) {
+ ret = -1;
+ goto out;
+ }
+
+ if ((GF_QUOTA_OPTION_TYPE_ENABLE != type) &&
+ (glusterd_check_if_quota_trans_enabled(volinfo) != 0)) {
+ ret = -1;
+ gf_asprintf(op_errstr, "Quota is not enabled on volume %s", volname);
+ goto out;
+ }
+
+ switch (type) {
+ case GF_QUOTA_OPTION_TYPE_LIST:
+ case GF_QUOTA_OPTION_TYPE_LIST_OBJECTS:
+ case GF_QUOTA_OPTION_TYPE_LIMIT_USAGE:
+ case GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS:
+ case GF_QUOTA_OPTION_TYPE_REMOVE:
+ case GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS:
+ /* Quota auxiliary mount is needed by CLI
+ * for list command and need by glusterd for
+ * setting/removing limit
+ */
+ if (is_origin_glusterd(dict)) {
+ ret = glusterd_create_quota_auxiliary_mount(this, volname,
+ type);
+ if (ret) {
+ *op_errstr = gf_strdup(
+ "Failed to start aux "
+ "mount");
+ goto out;
+ }
+ }
+ break;
+ }
- exists = glusterd_check_volume_exists (volname);
- if (!exists) {
- gf_log ("", GF_LOG_ERROR, "Volume with name: %s "
- "does not exist",
- volname);
- *op_errstr = gf_strdup ("Invalid volume name");
- ret = -1;
+ switch (type) {
+ case GF_QUOTA_OPTION_TYPE_LIMIT_USAGE:
+ ret = dict_get_strn(dict, "hard-limit", SLEN("hard-limit"),
+ &hard_limit_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get hard-limit from dict");
goto out;
- }
+ }
+ ret = gf_string2bytesize_int64(hard_limit_str, &hard_limit);
+ if (ret) {
+ if (errno == ERANGE || hard_limit < 0)
+ gf_asprintf(op_errstr,
+ "Hard-limit "
+ "value out of range (0 - %" PRId64 "): %s",
+ hard_limit, hard_limit_str);
+ else
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_CONVERSION_FAILED,
+ "Failed to convert hard-limit "
+ "string to value");
+ goto out;
+ }
+ get_gfid = _gf_true;
+ break;
+ case GF_QUOTA_OPTION_TYPE_LIMIT_OBJECTS:
+ get_gfid = _gf_true;
+ break;
- ret = dict_get_int32 (dict, "type", &type);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get 'type' for quota op");
- *op_errstr = gf_strdup ("Volume quota failed, internal error "
- ", unable to get type of operation");
+ case GF_QUOTA_OPTION_TYPE_REMOVE:
+ case GF_QUOTA_OPTION_TYPE_REMOVE_OBJECTS:
+ get_gfid = _gf_true;
+ break;
+
+ case GF_QUOTA_OPTION_TYPE_SOFT_TIMEOUT:
+ case GF_QUOTA_OPTION_TYPE_HARD_TIMEOUT:
+ case GF_QUOTA_OPTION_TYPE_ALERT_TIME:
+ case GF_QUOTA_OPTION_TYPE_DEFAULT_SOFT_LIMIT:
+ ret = _glusterd_validate_quota_opts(dict, type, op_errstr);
+ if (ret)
goto out;
- }
+ break;
+ default:
+ break;
+ }
- ctx = glusterd_op_get_ctx();
- if (ctx && (type == GF_QUOTA_OPTION_TYPE_ENABLE
- || type == GF_QUOTA_OPTION_TYPE_LIST)) {
- /* Fuse mount req. only for enable & list-usage options*/
- if (!glusterd_is_fuse_available ()) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to open /dev/"
- "fuse (%s), quota command failed",
- strerror (errno));
- *op_errstr = gf_strdup ("Fuse unavailable");
- ret = -1;
- goto out;
- }
- }
+ if (get_gfid == _gf_true) {
+ ret = glusterd_get_gfid_from_brick(dict, volinfo, rsp_dict, op_errstr);
+ if (ret)
+ goto out;
+ }
+
+ ret = 0;
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ if (ret && op_errstr && *op_errstr)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_STAGE_QUOTA_FAIL, "%s",
+ *op_errstr);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
- return ret;
+ return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-quota.h b/xlators/mgmt/glusterd/src/glusterd-quota.h
new file mode 100644
index 00000000000..ab2092a9c6a
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-quota.h
@@ -0,0 +1,17 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_QUOTA_
+#define _GLUSTERD_QUOTA_
+
+int
+glusterd_store_quota_config(glusterd_volinfo_t *volinfo, char *path,
+ char *gfid_str, int opcode, char **op_errstr);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c
new file mode 100644
index 00000000000..f26d832a06d
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.c
@@ -0,0 +1,217 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-quotad-svc.h"
+#include "glusterd-messages.h"
+#include "glusterd-svc-helper.h"
+
+char *quotad_svc_name = "quotad";
+
+void
+glusterd_quotadsvc_build(glusterd_svc_t *svc)
+{
+ svc->manager = glusterd_quotadsvc_manager;
+ svc->start = glusterd_quotadsvc_start;
+ svc->stop = glusterd_svc_stop;
+}
+
+int
+glusterd_quotadsvc_init(glusterd_svc_t *svc)
+{
+ int ret = -1;
+
+ ret = glusterd_svc_init(svc, quotad_svc_name);
+ if (ret)
+ goto out;
+
+out:
+ return ret;
+}
+
+static int
+glusterd_quotadsvc_create_volfile()
+{
+ char filepath[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *conf = THIS->private;
+
+ glusterd_svc_build_volfile_path(quotad_svc_name, conf->workdir, filepath,
+ sizeof(filepath));
+ return glusterd_create_global_volfile(build_quotad_graph, filepath, NULL);
+}
+
+int
+glusterd_quotadsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+{
+ int ret = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ if (!svc->inited) {
+ ret = glusterd_quotadsvc_init(svc);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_QUOTASVC,
+ "Failed to init "
+ "quotad service");
+ goto out;
+ } else {
+ svc->inited = _gf_true;
+ gf_msg_debug(THIS->name, 0,
+ "quotad service "
+ "initialized");
+ }
+ }
+
+ volinfo = data;
+
+ /* If all the volumes are stopped or all shd compatible volumes
+ * are stopped then stop the service if:
+ * - volinfo is NULL or
+ * - volinfo is present and volume is shd compatible
+ * Otherwise create volfile and restart service if:
+ * - volinfo is NULL or
+ * - volinfo is present and volume is shd compatible
+ */
+ if (glusterd_are_all_volumes_stopped() ||
+ glusterd_all_volumes_with_quota_stopped()) {
+ if (!(volinfo && !glusterd_is_volume_quota_enabled(volinfo))) {
+ ret = svc->stop(svc, SIGTERM);
+ }
+ } else {
+ if (!(volinfo && !glusterd_is_volume_quota_enabled(volinfo))) {
+ ret = glusterd_quotadsvc_create_volfile();
+ if (ret)
+ goto out;
+
+ ret = svc->stop(svc, SIGTERM);
+ if (ret)
+ goto out;
+
+ ret = svc->start(svc, flags);
+ if (ret)
+ goto out;
+
+ ret = glusterd_conn_connect(&(svc->conn));
+ if (ret)
+ goto out;
+ }
+ }
+out:
+ if (ret)
+ gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name);
+
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_quotadsvc_start(glusterd_svc_t *svc, int flags)
+{
+ int i = 0;
+ int ret = -1;
+ dict_t *cmdline = NULL;
+ char key[16] = {0};
+ char *options[] = {svc->name, "--process-name", NULL};
+
+ cmdline = dict_new();
+ if (!cmdline) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ for (i = 0; options[i]; i++) {
+ ret = snprintf(key, sizeof(key), "arg%d", i);
+ ret = dict_set_strn(cmdline, key, ret, options[i]);
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_svc_start(svc, flags, cmdline);
+
+out:
+ if (cmdline)
+ dict_unref(cmdline);
+
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_quotadsvc_reconfigure()
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t identical = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ if (glusterd_all_volumes_with_quota_stopped())
+ goto manager;
+
+ /*
+ * Check both OLD and NEW volfiles, if they are SAME by size
+ * and cksum i.e. "character-by-character". If YES, then
+ * NOTHING has been changed, just return.
+ */
+ ret = glusterd_svc_check_volfile_identical(priv->quotad_svc.name,
+ build_quotad_graph, &identical);
+ if (ret)
+ goto out;
+
+ if (identical) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * They are not identical. Find out if the topology is changed
+ * OR just the volume options. If just the options which got
+ * changed, then inform the xlator to reconfigure the options.
+ */
+ identical = _gf_false; /* RESET the FLAG */
+ ret = glusterd_svc_check_topology_identical(priv->quotad_svc.name,
+ build_quotad_graph, &identical);
+ if (ret)
+ goto out;
+
+ /* Topology is not changed, but just the options. But write the
+ * options to quotad volfile, so that quotad will be reconfigured.
+ */
+ if (identical) {
+ ret = glusterd_quotadsvc_create_volfile();
+ if (ret == 0) { /* Only if above PASSES */
+ ret = glusterd_fetchspec_notify(THIS);
+ }
+ goto out;
+ }
+manager:
+ /*
+ * quotad volfile's topology has been changed. quotad server needs
+ * to be RESTARTED to ACT on the changed volfile.
+ */
+ ret = priv->quotad_svc.manager(&(priv->quotad_svc), NULL,
+ PROC_START_NO_WAIT);
+
+out:
+ gf_msg_debug(this ? this->name : "Quotad", 0, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-quotad-svc.h b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.h
new file mode 100644
index 00000000000..e8d9bbee964
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-quotad-svc.h
@@ -0,0 +1,31 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_QUOTAD_SVC_H_
+#define _GLUSTERD_QUOTAD_SVC_H_
+
+#include "glusterd-svc-mgmt.h"
+
+void
+glusterd_quotadsvc_build(glusterd_svc_t *svc);
+
+int
+glusterd_quotadsvc_init(glusterd_svc_t *svc);
+
+int
+glusterd_quotadsvc_start(glusterd_svc_t *svc, int flags);
+
+int
+glusterd_quotadsvc_manager(glusterd_svc_t *svc, void *data, int flags);
+
+int
+glusterd_quotadsvc_reconfigure();
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-rcu.h b/xlators/mgmt/glusterd/src/glusterd-rcu.h
new file mode 100644
index 00000000000..c85f9bea8f8
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-rcu.h
@@ -0,0 +1,36 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_RCU_H
+#define _GLUSTERD_RCU_H
+
+#include <urcu-bp.h>
+#include <urcu/rculist.h>
+#include <urcu/compiler.h>
+#include <urcu/uatomic.h>
+#include <urcu-call-rcu.h>
+
+#ifdef URCU_OLD
+#include "rculist-extra.h"
+#endif
+
+#include <glusterfs/xlator.h>
+
+/* gd_rcu_head is a composite struct, composed of struct rcu_head and a this
+ * pointer, which is used to pass the THIS pointer to call_rcu callbacks.
+ *
+ * Use this in place of struct rcu_head when embedding into another struct
+ */
+typedef struct glusterd_rcu_head_ {
+ struct rcu_head head;
+ xlator_t *this;
+} gd_rcu_head;
+
+#endif /* _GLUSTERD_RCU_H */
diff --git a/xlators/mgmt/glusterd/src/glusterd-rebalance.c b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
index 67c9679772b..458bf168ede 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rebalance.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rebalance.c
@@ -1,665 +1,1422 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include <inttypes.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/resource.h>
#include <sys/statvfs.h>
-#include "globals.h"
-#include "compat.h"
+#include <glusterfs/compat.h>
#include "protocol-common.h"
-#include "xlator.h"
-#include "logging.h"
-#include "timer.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/timer.h>
#include "glusterd-mem-types.h"
#include "glusterd.h"
#include "glusterd-sm.h"
#include "glusterd-op-sm.h"
#include "glusterd-utils.h"
+#include "glusterd-mgmt.h"
+#include "glusterd-messages.h"
#include "glusterd-store.h"
-#include "run.h"
+#include <glusterfs/run.h>
#include "glusterd-volgen.h"
+#include "glusterd-messages.h"
-#include "syscall.h"
+#include <glusterfs/syscall.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
-int32_t
-glusterd3_1_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe);
-
-void
-glusterd_rebalance_cmd_attempted_log (int cmd, char *volname)
-{
- switch (cmd) {
- case GF_DEFRAG_CMD_START_LAYOUT_FIX:
- gf_cmd_log ("Volume rebalance"," on volname: %s "
- "cmd: start fix layout , attempted",
- volname);
- gf_log ("glusterd", GF_LOG_INFO, "Received rebalance "
- "volume start layout fix on %s", volname);
- break;
- case GF_DEFRAG_CMD_START_FORCE:
- gf_cmd_log ("Volume rebalance"," on volname: %s "
- "cmd: start data force attempted",
- volname);
- gf_log ("glusterd", GF_LOG_INFO, "Received rebalance "
- "volume start migrate data on %s", volname);
- break;
- case GF_DEFRAG_CMD_START:
- gf_cmd_log ("Volume rebalance"," on volname: %s "
- "cmd: start, attempted", volname);
- gf_log ("glusterd", GF_LOG_INFO, "Received rebalance "
- "volume start on %s", volname);
- break;
- case GF_DEFRAG_CMD_STOP:
- gf_cmd_log ("Volume rebalance"," on volname: %s "
- "cmd: stop, attempted", volname);
- gf_log ("glusterd", GF_LOG_INFO, "Received rebalance "
- "volume stop on %s", volname);
- break;
- default:
- break;
- }
-}
-
-void
-glusterd_rebalance_cmd_log (int cmd, char *volname, int status)
-{
- if (cmd != GF_DEFRAG_CMD_STATUS) {
- gf_cmd_log ("volume rebalance"," on volname: %s %d %s",
- volname, cmd, ((status)?"FAILED":"SUCCESS"));
- }
-}
+#define GLUSTERD_GET_DEFRAG_SOCK_FILE(path, volinfo) \
+ do { \
+ int32_t _defrag_sockfile_len; \
+ char tmppath[PATH_MAX] = { \
+ 0, \
+ }; \
+ _defrag_sockfile_len = snprintf( \
+ tmppath, PATH_MAX, \
+ DEFAULT_VAR_RUN_DIRECTORY "/gluster-%s-%s-%s.sock", "rebalance", \
+ volinfo->volname, uuid_utoa(MY_UUID)); \
+ if ((_defrag_sockfile_len < 0) || \
+ (_defrag_sockfile_len >= PATH_MAX)) { \
+ path[0] = 0; \
+ } else { \
+ glusterd_set_socket_filepath(tmppath, path, sizeof(path)); \
+ } \
+ } while (0)
+int32_t
+glusterd_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe);
int
-glusterd_defrag_start_validate (glusterd_volinfo_t *volinfo, char *op_errstr,
- size_t len)
+glusterd_defrag_start_validate(glusterd_volinfo_t *volinfo, char *op_errstr,
+ size_t len, glusterd_op_t op)
{
- int ret = -1;
-
- if (glusterd_is_defrag_on (volinfo)) {
- gf_log ("glusterd", GF_LOG_DEBUG,
- "rebalance on volume %s already started",
- volinfo->volname);
- snprintf (op_errstr, len, "Rebalance on %s is already started",
- volinfo->volname);
- goto out;
- }
-
- if (glusterd_is_rb_started (volinfo) ||
- glusterd_is_rb_paused (volinfo)) {
- gf_log ("glusterd", GF_LOG_DEBUG,
- "Rebalance failed as replace brick is in progress on volume %s",
- volinfo->volname);
- snprintf (op_errstr, len, "Rebalance failed as replace brick is in progress on "
- "volume %s", volinfo->volname);
- goto out;
- }
- ret = 0;
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ /* Check only if operation is not remove-brick */
+ if ((GD_OP_REMOVE_BRICK != op) && !gd_is_remove_brick_committed(volinfo)) {
+ gf_msg_debug(this->name, 0,
+ "A remove-brick task on "
+ "volume %s is not yet committed",
+ volinfo->volname);
+ snprintf(op_errstr, len,
+ "A remove-brick task on volume %s is"
+ " not yet committed. Either commit or stop the "
+ "remove-brick task.",
+ volinfo->volname);
+ goto out;
+ }
+
+ if (glusterd_is_defrag_on(volinfo)) {
+ gf_msg_debug(this->name, 0, "rebalance on volume %s already started",
+ volinfo->volname);
+ snprintf(op_errstr, len, "Rebalance on %s is already started",
+ volinfo->volname);
+ goto out;
+ }
+
+ ret = 0;
out:
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
int32_t
-glusterd_defrag_notify (struct rpc_clnt *rpc, void *mydata,
- rpc_clnt_event_t event, void *data)
+__glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
{
- glusterd_volinfo_t *volinfo = NULL;
- glusterd_defrag_info_t *defrag = NULL;
- int ret = 0;
- char pidfile[PATH_MAX];
- glusterd_conf_t *priv = NULL;
-
- priv = THIS->private;
- if (!priv)
- return 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_defrag_info_t *defrag = NULL;
+ int ret = 0;
+ char pidfile[PATH_MAX];
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ int pid = -1;
+
+ this = THIS;
+ if (!this)
+ return 0;
- volinfo = mydata;
- if (!volinfo)
- return 0;
+ priv = this->private;
+ if (!priv)
+ return 0;
- defrag = volinfo->defrag;
- if (!defrag)
- return 0;
+ volinfo = mydata;
+ if (!volinfo)
+ return 0;
- if ((event == RPC_CLNT_DISCONNECT) && defrag->connected)
- volinfo->defrag = NULL;
+ defrag = volinfo->rebal.defrag;
+ if (!defrag)
+ return 0;
- GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv);
+ if ((event == RPC_CLNT_DISCONNECT) && defrag->connected)
+ volinfo->rebal.defrag = NULL;
- switch (event) {
- case RPC_CLNT_CONNECT:
- {
- if (defrag->connected)
- return 0;
+ GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv);
- LOCK (&defrag->lock);
- {
- defrag->connected = 1;
- }
- UNLOCK (&defrag->lock);
+ switch (event) {
+ case RPC_CLNT_CONNECT: {
+ if (defrag->connected)
+ return 0;
- gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_CONNECT",
- rpc->conn.trans->name);
- break;
+ LOCK(&defrag->lock);
+ {
+ defrag->connected = 1;
+ }
+ UNLOCK(&defrag->lock);
+
+ gf_msg_debug(this->name, 0, "%s got RPC_CLNT_CONNECT",
+ rpc->conn.name);
+ break;
}
- case RPC_CLNT_DISCONNECT:
- {
- if (!defrag->connected)
- return 0;
+ case RPC_CLNT_DISCONNECT: {
+ if (!defrag->connected)
+ return 0;
- LOCK (&defrag->lock);
- {
- defrag->connected = 0;
- }
- UNLOCK (&defrag->lock);
-
- if (!glusterd_is_service_running (pidfile, NULL)) {
- if (volinfo->defrag_status ==
- GF_DEFRAG_STATUS_STARTED) {
- volinfo->defrag_status =
- GF_DEFRAG_STATUS_FAILED;
- } else {
- volinfo->defrag_cmd = 0;
- }
- }
-
- glusterd_store_perform_node_state_store (volinfo);
-
- if (defrag->rpc) {
- rpc_clnt_unref (defrag->rpc);
- defrag->rpc = NULL;
+ LOCK(&defrag->lock);
+ {
+ defrag->connected = 0;
+ }
+ UNLOCK(&defrag->lock);
+
+ if (!gf_is_service_running(pidfile, &pid)) {
+ if (volinfo->rebal.defrag_status == GF_DEFRAG_STATUS_STARTED) {
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_FAILED;
}
- if (defrag->cbk_fn)
- defrag->cbk_fn (volinfo, volinfo->defrag_status);
+ }
- if (defrag)
- GF_FREE (defrag);
- gf_log ("", GF_LOG_DEBUG, "%s got RPC_CLNT_DISCONNECT",
- rpc->conn.trans->name);
- break;
+ glusterd_store_perform_node_state_store(volinfo);
+
+ rpc_clnt_disable(defrag->rpc);
+ glusterd_defrag_rpc_put(defrag);
+ if (defrag->cbk_fn)
+ defrag->cbk_fn(volinfo, volinfo->rebal.defrag_status);
+
+ GF_FREE(defrag);
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REBALANCE_DISCONNECTED,
+ "Rebalance process for volume %s has disconnected.",
+ volinfo->volname);
+ break;
}
+ case RPC_CLNT_DESTROY:
+ glusterd_volinfo_unref(volinfo);
+ break;
default:
- gf_log ("", GF_LOG_TRACE,
- "got some other RPC event %d", event);
- ret = 0;
- break;
- }
+ gf_msg_trace(this->name, 0, "got some other RPC event %d", event);
+ ret = 0;
+ break;
+ }
- return ret;
+ return ret;
}
-int
-glusterd_handle_defrag_start (glusterd_volinfo_t *volinfo, char *op_errstr,
- size_t len, int cmd, defrag_cbk_fn_t cbk)
+int32_t
+glusterd_defrag_notify(struct rpc_clnt *rpc, void *mydata,
+ rpc_clnt_event_t event, void *data)
{
- int ret = -1;
- glusterd_defrag_info_t *defrag = NULL;
- runner_t runner = {0,};
- glusterd_conf_t *priv = NULL;
- char defrag_path[PATH_MAX];
- char sockfile[PATH_MAX] = {0,};
- char pidfile[PATH_MAX] = {0,};
- char logfile[PATH_MAX] = {0,};
- dict_t *options = NULL;
-#ifdef DEBUG
- char valgrind_logfile[PATH_MAX] = {0,};
-#endif
- priv = THIS->private;
-
- GF_ASSERT (volinfo);
- GF_ASSERT (op_errstr);
-
- ret = glusterd_defrag_start_validate (volinfo, op_errstr, len);
- if (ret)
- goto out;
- if (!volinfo->defrag)
- volinfo->defrag = GF_CALLOC (1, sizeof (glusterd_defrag_info_t),
- gf_gld_mt_defrag_info);
- if (!volinfo->defrag)
- goto out;
-
- defrag = volinfo->defrag;
+ return glusterd_big_locked_notify(rpc, mydata, event, data,
+ __glusterd_defrag_notify);
+}
- defrag->cmd = cmd;
+int
+glusterd_handle_defrag_start(glusterd_volinfo_t *volinfo, char *op_errstr,
+ size_t len, int cmd, defrag_cbk_fn_t cbk,
+ glusterd_op_t op)
+{
+ xlator_t *this = NULL;
+ int ret = -1;
+ glusterd_defrag_info_t *defrag = NULL;
+ runner_t runner = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ char defrag_path[PATH_MAX];
+ char sockfile[PATH_MAX] = {
+ 0,
+ };
+ char pidfile[PATH_MAX] = {
+ 0,
+ };
+ char logfile[PATH_MAX] = {
+ 0,
+ };
+ char volname[PATH_MAX] = {
+ 0,
+ };
+ char valgrind_logfile[PATH_MAX] = {
+ 0,
+ };
+ char msg[1024] = {
+ 0,
+ };
+ char *volfileserver = NULL;
+ char *localtime_logging = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("glusterd", priv, out);
+
+ GF_ASSERT(volinfo);
+ GF_ASSERT(op_errstr);
+
+ ret = glusterd_defrag_start_validate(volinfo, op_errstr, len, op);
+ if (ret)
+ goto out;
+ if (!volinfo->rebal.defrag)
+ volinfo->rebal.defrag = GF_CALLOC(1, sizeof(*volinfo->rebal.defrag),
+ gf_gld_mt_defrag_info);
+ if (!volinfo->rebal.defrag)
+ goto out;
+
+ defrag = volinfo->rebal.defrag;
+
+ defrag->cmd = cmd;
+
+ volinfo->rebal.defrag_cmd = cmd;
+ volinfo->rebal.op = op;
+
+ LOCK_INIT(&defrag->lock);
+
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED;
+
+ glusterd_volinfo_reset_defrag_stats(volinfo);
+ glusterd_store_perform_node_state_store(volinfo);
+
+ GLUSTERD_GET_DEFRAG_DIR(defrag_path, volinfo, priv);
+ ret = mkdir_p(defrag_path, 0755, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
+ "Failed to create "
+ "directory %s",
+ defrag_path);
+ goto out;
+ }
+
+ GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo);
+ GLUSTERD_GET_DEFRAG_PID_FILE(pidfile, volinfo, priv);
+ snprintf(logfile, PATH_MAX, "%s/%s-%s.log", priv->logdir, volinfo->volname,
+ "rebalance");
+ runinit(&runner);
+
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
+ snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s-rebalance.log",
+ priv->logdir, volinfo->volname);
+
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
+ runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
+ }
+
+ snprintf(volname, sizeof(volname), "rebalance/%s", volinfo->volname);
+
+ if (dict_get_strn(this->options, "transport.socket.bind-address",
+ SLEN("transport.socket.bind-address"),
+ &volfileserver) != 0) {
+ volfileserver = "localhost";
+ }
+
+ runner_add_args(
+ &runner, SBIN_DIR "/glusterfs", "-s", volfileserver, "--volfile-id",
+ volname, "--xlator-option", "*dht.use-readdirp=yes", "--xlator-option",
+ "*dht.lookup-unhashed=yes", "--xlator-option",
+ "*dht.assert-no-child-down=yes", "--xlator-option",
+ "*dht.readdir-optimize=on", "--process-name", "rebalance", NULL);
+
+ runner_add_arg(&runner, "--xlator-option");
+ runner_argprintf(&runner, "*dht.rebalance-cmd=%d", cmd);
+ runner_add_arg(&runner, "--xlator-option");
+ runner_argprintf(&runner, "*dht.node-uuid=%s", uuid_utoa(MY_UUID));
+ runner_add_arg(&runner, "--xlator-option");
+ runner_argprintf(&runner, "*dht.commit-hash=%u",
+ volinfo->rebal.commit_hash);
+ runner_add_arg(&runner, "--socket-file");
+ runner_argprintf(&runner, "%s", sockfile);
+ runner_add_arg(&runner, "--pid-file");
+ runner_argprintf(&runner, "%s", pidfile);
+ runner_add_arg(&runner, "-l");
+ runner_argprintf(&runner, "%s", logfile);
+ if (volinfo->memory_accounting)
+ runner_add_arg(&runner, "--mem-accounting");
+ if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY,
+ SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY),
+ &localtime_logging) == 0) {
+ if (strcmp(localtime_logging, "enable") == 0)
+ runner_add_arg(&runner, "--localtime-logging");
+ }
+
+ snprintf(msg, sizeof(msg), "Starting the rebalance service for volume %s",
+ volinfo->volname);
+ runner_log(&runner, this->name, GF_LOG_DEBUG, msg);
+
+ ret = runner_run_nowait(&runner);
+ if (ret) {
+ gf_msg_debug("glusterd", 0, "rebalance command failed");
+ goto out;
+ }
+
+ sleep(5);
+
+ ret = glusterd_rebalance_rpc_create(volinfo);
+
+ // FIXME: this cbk is passed as NULL in all occurrences. May be
+ // we never needed it.
+ if (cbk)
+ defrag->cbk_fn = cbk;
- LOCK_INIT (&defrag->lock);
+out:
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
- volinfo->defrag_status = GF_DEFRAG_STATUS_STARTED;
+int
+glusterd_rebalance_defrag_init(glusterd_volinfo_t *volinfo, defrag_cbk_fn_t cbk)
- glusterd_volinfo_reset_defrag_stats (volinfo);
- volinfo->defrag_cmd = cmd;
- glusterd_store_perform_node_state_store (volinfo);
+{
+ glusterd_defrag_info_t *defrag = NULL;
+ int ret = -1;
+
+ if (!volinfo->rebal.defrag) {
+ volinfo->rebal.defrag = GF_CALLOC(1, sizeof(*volinfo->rebal.defrag),
+ gf_gld_mt_defrag_info);
+ } else {
+ /*
+ * if defrag variable is already initialized,
+ * we skip the initialization.
+ */
+ ret = 0;
+ goto out;
+ }
+
+ if (!volinfo->rebal.defrag)
+ goto out;
+ defrag = volinfo->rebal.defrag;
+
+ defrag->cmd = volinfo->rebal.defrag_cmd;
+ LOCK_INIT(&defrag->lock);
+ if (cbk)
+ defrag->cbk_fn = cbk;
+ ret = 0;
+out:
+ return ret;
+}
- GLUSTERD_GET_DEFRAG_DIR (defrag_path, volinfo, priv);
- ret = mkdir_p (defrag_path, 0777, _gf_true);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Failed to create "
- "directory %s", defrag_path);
- goto out;
- }
+int
+glusterd_rebalance_rpc_create(glusterd_volinfo_t *volinfo)
+{
+ dict_t *options = NULL;
+ char sockfile[PATH_MAX] = {
+ 0,
+ };
+ int ret = -1;
+ glusterd_defrag_info_t *defrag = volinfo->rebal.defrag;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ // rebalance process is not started
+ if (!defrag)
+ goto out;
+
+ options = dict_new();
+ if (!options) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ GLUSTERD_GET_DEFRAG_SOCK_FILE(sockfile, volinfo);
+
+ /* Setting frame-timeout to 10mins (600seconds).
+ * Unix domain sockets ensures that the connection is reliable. The
+ * default timeout of 30mins used for unreliable network connections is
+ * too long for unix domain socket connections.
+ */
+ ret = rpc_transport_unix_options_build(options, sockfile, 600);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_UNIX_OP_BUILD_FAIL,
+ "Unix options build failed");
+ goto out;
+ }
+
+ glusterd_volinfo_ref(volinfo);
+ ret = glusterd_rpc_create(&defrag->rpc, options, glusterd_defrag_notify,
+ volinfo, _gf_true);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_RPC_CREATE_FAIL,
+ "Glusterd RPC creation failed");
+ goto out;
+ }
+ ret = 0;
+out:
+ if (options)
+ dict_unref(options);
+ return ret;
+}
- GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv);
- GLUSTERD_GET_DEFRAG_PID_FILE (pidfile, volinfo, priv);
- snprintf (logfile, PATH_MAX, "%s/%s-rebalance.log",
- DEFAULT_LOG_FILE_DIRECTORY, volinfo->volname);
- runinit (&runner);
-#ifdef DEBUG
- if (priv->valgrind) {
- snprintf (valgrind_logfile, PATH_MAX,
- "%s/valgrind-%s-rebalance.log",
- DEFAULT_LOG_FILE_DIRECTORY,
- volinfo->volname);
-
- runner_add_args (&runner, "valgrind", "--leak-check=full",
- "--trace-children=yes", NULL);
- runner_argprintf (&runner, "--log-file=%s", valgrind_logfile);
- }
-#endif
-
- runner_add_args (&runner, SBIN_DIR"/glusterfs",
- "-s", "localhost", "--volfile-id", volinfo->volname,
- "--xlator-option", "*dht.use-readdirp=yes",
- "--xlator-option", "*dht.lookup-unhashed=yes",
- "--xlator-option", "*dht.assert-no-child-down=yes",
- "--xlator-option", "*replicate*.data-self-heal=off",
- "--xlator-option",
- "*replicate*.metadata-self-heal=off",
- "--xlator-option", "*replicate*.entry-self-heal=off",
- NULL);
- runner_add_arg (&runner, "--xlator-option");
- runner_argprintf ( &runner, "*dht.rebalance-cmd=%d",cmd);
- runner_add_arg (&runner, "--xlator-option");
- runner_argprintf (&runner, "*dht.node-uuid=%s", uuid_utoa(MY_UUID));
- runner_add_arg (&runner, "--socket-file");
- runner_argprintf (&runner, "%s",sockfile);
- runner_add_arg (&runner, "--pid-file");
- runner_argprintf (&runner, "%s",pidfile);
- runner_add_arg (&runner, "-l");
- runner_argprintf (&runner, logfile);
- if (volinfo->memory_accounting)
- runner_add_arg (&runner, "--mem-accounting");
-
- ret = runner_run_reuse (&runner);
- if (ret) {
- runner_log (&runner, "glusterd", GF_LOG_DEBUG, "command failed");
- runner_end (&runner);
- goto out;
- }
+int
+glusterd_rebalance_cmd_validate(int cmd, char *volname,
+ glusterd_volinfo_t **volinfo, char *op_errstr,
+ size_t len)
+{
+ int ret = -1;
+
+ if (glusterd_volinfo_find(volname, volinfo)) {
+ gf_msg("glusterd", GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Received rebalance on invalid"
+ " volname %s",
+ volname);
+ snprintf(op_errstr, len, "Volume %s does not exist", volname);
+ goto out;
+ }
+ if ((*volinfo)->brick_count <= (*volinfo)->dist_leaf_count) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_DISTRIBUTE,
+ "Volume %s is not a "
+ "distribute type or contains only 1 brick",
+ volname);
+ snprintf(op_errstr, len,
+ "Volume %s is not a distribute "
+ "volume or contains only 1 brick.\n"
+ "Not performing rebalance",
+ volname);
+ goto out;
+ }
+
+ if ((*volinfo)->status != GLUSTERD_STATUS_STARTED) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOL_STOPPED,
+ "Received rebalance on stopped"
+ " volname %s",
+ volname);
+ snprintf(op_errstr, len,
+ "Volume %s needs to "
+ "be started to perform rebalance",
+ volname);
+ goto out;
+ }
+
+ ret = 0;
- sleep (5);
- ret = rpc_clnt_transport_unix_options_build (&options, sockfile);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed");
- goto out;
- }
+out:
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
- ret = glusterd_rpc_create (&defrag->rpc, options,
- glusterd_defrag_notify, volinfo);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed");
- goto out;
+int
+__glusterd_handle_defrag_volume(rpcsvc_request_t *req)
+{
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ glusterd_conf_t *priv = NULL;
+ int32_t op = GD_OP_NONE;
+ dict_t *dict = NULL;
+ char *volname = NULL;
+ gf_cli_defrag_type cmd = 0;
+ char msg[2048] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(msg, sizeof(msg),
+ "Unable to decode the "
+ "command");
+ goto out;
}
-
- if (cbk)
- defrag->cbk_fn = cbk;
-
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Failed to get volume name");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"),
+ (int32_t *)&cmd);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Failed to get command");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_set_static_bin(dict, "node-uuid", MY_UUID, 16);
+ if (ret)
+ goto out;
+
+ if ((cmd == GF_DEFRAG_CMD_STATUS) || (cmd == GF_DEFRAG_CMD_STOP)) {
+ op = GD_OP_DEFRAG_BRICK_VOLUME;
+ } else
+ op = GD_OP_REBALANCE;
+
+ if (priv->op_version < GD_OP_VERSION_6_0) {
+ gf_msg_debug(this->name, 0,
+ "The cluster is operating at "
+ "version less than %d. Falling back "
+ "to op-sm framework.",
+ GD_OP_VERSION_6_0);
+ ret = glusterd_op_begin(req, op, dict, msg, sizeof(msg));
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ } else {
+ ret = glusterd_mgmt_v3_initiate_all_phases_with_brickop_phase(req, op,
+ dict);
+ }
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (ret) {
+ if (msg[0] == '\0')
+ snprintf(msg, sizeof(msg), "Operation failed");
+ ret = glusterd_op_send_cli_response(GD_OP_REBALANCE, ret, 0, req, dict,
+ msg);
+ }
+
+ free(cli_req.dict.dict_val); // malloced by xdr
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
-
int
-glusterd_rebalance_rpc_create (glusterd_volinfo_t *volinfo,
- glusterd_conf_t *priv, int cmd)
+glusterd_handle_defrag_volume(rpcsvc_request_t *req)
{
- dict_t *options = NULL;
- char sockfile[PATH_MAX] = {0,};
- int ret = -1;
- glusterd_defrag_info_t *defrag = NULL;
-
- if (!volinfo->defrag)
- volinfo->defrag = GF_CALLOC (1, sizeof (glusterd_defrag_info_t),
- gf_gld_mt_defrag_info);
- if (!volinfo->defrag)
- goto out;
-
- defrag = volinfo->defrag;
+ return glusterd_big_locked_handler(req, __glusterd_handle_defrag_volume);
+}
- defrag->cmd = cmd;
+static int
+glusterd_brick_validation(dict_t *dict, char *key, data_t *value, void *data)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *volinfo = data;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = glusterd_volume_brickinfo_get_by_brick(value->data, volinfo,
+ &brickinfo, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_BRICK_NOT_FOUND,
+ "Incorrect brick %s for "
+ "volume %s",
+ value->data, volinfo->volname);
+ return ret;
+ }
+
+ if (!brickinfo->decommissioned) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_BRICK_NOT_FOUND,
+ "Incorrect brick %s for "
+ "volume %s",
+ value->data, volinfo->volname);
+ ret = -1;
+ return ret;
+ }
- LOCK_INIT (&defrag->lock);
+ return ret;
+}
- GLUSTERD_GET_DEFRAG_SOCK_FILE (sockfile, volinfo, priv);
- ret = rpc_clnt_transport_unix_options_build (&options, sockfile);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Unix options build failed");
- goto out;
+int
+glusterd_set_rebalance_id_in_rsp_dict(dict_t *req_dict, dict_t *rsp_dict)
+{
+ int ret = -1;
+ int32_t cmd = 0;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char msg[2048] = {0};
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(req_dict);
+
+ ret = dict_get_strn(rsp_dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "volname not found");
+ goto out;
+ }
+
+ ret = dict_get_int32n(rsp_dict, "rebalance-command",
+ SLEN("rebalance-command"), &cmd);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "cmd not found");
+ goto out;
+ }
+
+ ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg,
+ sizeof(msg));
+ if (ret) {
+ gf_msg_debug(this->name, 0, "failed to validate");
+ goto out;
+ }
+
+ /* reblance id is generted in glusterd_mgmt_v3_op_stage_rebalance(), but
+ * rsp_dict is unavailable there. So copying it to rsp_dict from req_dict
+ * here. So that cli can display the rebalance id.*/
+ if ((cmd == GF_DEFRAG_CMD_START) ||
+ (cmd == GF_DEFRAG_CMD_START_LAYOUT_FIX) ||
+ (cmd == GF_DEFRAG_CMD_START_FORCE)) {
+ if (is_origin_glusterd(rsp_dict)) {
+ ret = dict_get_strn(req_dict, GF_REBALANCE_TID_KEY,
+ SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Missing rebalance-id");
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_REBALANCE_ID_MISSING, "%s", msg);
+ ret = 0;
+ } else {
+ gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+ ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id,
+ rsp_dict, GF_REBALANCE_TID_KEY,
+ SLEN(GF_REBALANCE_TID_KEY));
+ if (ret) {
+ snprintf(msg, sizeof(msg),
+ "Failed to set rebalance id for volume %s",
+ volname);
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_DICT_SET_FAILED, "%s", msg);
+ }
+ }
}
-
- ret = glusterd_rpc_create (&defrag->rpc, options,
- glusterd_defrag_notify, volinfo);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "RPC create failed");
+ }
+
+ /* Set task-id, if available, in rsp_dict for operations other than
+ * start. This is needed when we want rebalance id in xml output
+ */
+ if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) {
+ if (!gf_uuid_is_null(volinfo->rebal.rebalance_id)) {
+ if (GD_OP_REMOVE_BRICK == volinfo->rebal.op)
+ ret = glusterd_copy_uuid_to_dict(
+ volinfo->rebal.rebalance_id, rsp_dict,
+ GF_REMOVE_BRICK_TID_KEY, SLEN(GF_REMOVE_BRICK_TID_KEY));
+ else
+ ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id,
+ rsp_dict, GF_REBALANCE_TID_KEY,
+ SLEN(GF_REBALANCE_TID_KEY));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set task-id for volume %s", volname);
goto out;
+ }
}
- ret = 0;
+ }
out:
- return ret;
+ return ret;
}
int
-glusterd_rebalance_cmd_validate (int cmd, char *volname,
- glusterd_volinfo_t **volinfo,
- char *op_errstr, size_t len)
+glusterd_mgmt_v3_op_stage_rebalance(dict_t *dict, char **op_errstr)
{
- int ret = -1;
-
- if (glusterd_volinfo_find(volname, volinfo)) {
- gf_log ("glusterd", GF_LOG_ERROR, "Received rebalance on invalid"
- " volname %s", volname);
- snprintf (op_errstr, len, "Volume %s does not exist",
- volname);
+ char *volname = NULL;
+ char *cmd_str = NULL;
+ int ret = 0;
+ int32_t cmd = 0;
+ char msg[2048] = {0};
+ glusterd_volinfo_t *volinfo = NULL;
+ char *task_id_str = NULL;
+ xlator_t *this = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "volname not found");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"),
+ &cmd);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "cmd not found");
+ goto out;
+ }
+
+ ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg,
+ sizeof(msg));
+ if (ret) {
+ gf_msg_debug(this->name, 0, "failed to validate");
+ goto out;
+ }
+ switch (cmd) {
+ case GF_DEFRAG_CMD_START:
+ case GF_DEFRAG_CMD_START_LAYOUT_FIX:
+ /* Check if the connected clients are all of version
+ * glusterfs-3.6 and higher. This is needed to prevent some data
+ * loss issues that could occur when older clients are connected
+ * when rebalance is run. This check can be bypassed by using
+ * 'force'
+ */
+ ret = glusterd_check_client_op_version_support(
+ volname, GD_OP_VERSION_3_6_0, NULL);
+ if (ret) {
+ ret = gf_asprintf(op_errstr,
+ "Volume %s has one or "
+ "more connected clients of a version"
+ " lower than GlusterFS-v3.6.0. "
+ "Starting rebalance in this state "
+ "could lead to data loss.\nPlease "
+ "disconnect those clients before "
+ "attempting this command again.",
+ volname);
goto out;
- }
- if ((*volinfo)->brick_count <= (*volinfo)->dist_leaf_count) {
- gf_log ("glusterd", GF_LOG_ERROR, "Volume %s is not a "
- "distribute type or contains only 1 brick", volname);
- snprintf (op_errstr, len, "Volume %s is not a distribute "
- "volume or contains only 1 brick.\n"
- "Not performing rebalance", volname);
+ }
+ /* Fall through */
+ case GF_DEFRAG_CMD_START_FORCE:
+ if (is_origin_glusterd(dict)) {
+ ret = glusterd_generate_and_set_task_id(
+ dict, GF_REBALANCE_TID_KEY, SLEN(GF_REBALANCE_TID_KEY));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL,
+ "Failed to generate task-id");
+ goto out;
+ }
+ } else {
+ ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY,
+ SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Missing rebalance-id");
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_REBALANCE_ID_MISSING, "%s", msg);
+ ret = 0;
+ }
+ }
+ ret = glusterd_defrag_start_validate(volinfo, msg, sizeof(msg),
+ GD_OP_REBALANCE);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "defrag start validate "
+ "failed for volume %s.",
+ volinfo->volname);
goto out;
- }
+ }
+ break;
+ case GF_DEFRAG_CMD_STATUS:
+ case GF_DEFRAG_CMD_STOP:
- if ((*volinfo)->status != GLUSTERD_STATUS_STARTED) {
- gf_log ("glusterd", GF_LOG_ERROR, "Received rebalance on stopped"
- " volname %s", volname);
- snprintf (op_errstr, len, "Volume %s needs to "
- "be started to perform rebalance", volname);
+ ret = dict_get_strn(dict, "cmd-str", SLEN("cmd-str"), &cmd_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get "
+ "command string");
+ ret = -1;
goto out;
- }
+ }
+ if ((strstr(cmd_str, "rebalance") != NULL) &&
+ (volinfo->rebal.op != GD_OP_REBALANCE)) {
+ snprintf(msg, sizeof(msg),
+ "Rebalance not started "
+ "for volume %s.",
+ volinfo->volname);
+ ret = -1;
+ goto out;
+ }
+
+ if (strstr(cmd_str, "remove-brick") != NULL) {
+ if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) {
+ snprintf(msg, sizeof(msg),
+ "remove-brick not "
+ "started for volume %s.",
+ volinfo->volname);
+ ret = -1;
+ goto out;
+ }
- ret = 0;
+ /* For remove-brick status/stop command check whether
+ * given input brick is part of volume or not.*/
+
+ ret = dict_foreach_fnmatch(dict, "brick*",
+ glusterd_brick_validation, volinfo);
+ if (ret == -1) {
+ snprintf(msg, sizeof(msg),
+ "Incorrect brick"
+ " for volume %s",
+ volinfo->volname);
+ goto out;
+ }
+ }
+ break;
+
+ default:
+ break;
+ }
+ ret = 0;
out:
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (ret && op_errstr && msg[0])
+ *op_errstr = gf_strdup(msg);
+
+ return ret;
}
int
-glusterd_handle_defrag_volume (rpcsvc_request_t *req)
+glusterd_mgmt_v3_op_rebalance(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
{
- int32_t ret = -1;
- gf_cli_req cli_req = {{0,}};
- glusterd_conf_t *priv = NULL;
- dict_t *dict = NULL;
- char *volname = NULL;
- gf_cli_defrag_type cmd = 0;
-
- GF_ASSERT (req);
-
- priv = THIS->private;
+ char *volname = NULL;
+ int ret = 0;
+ int32_t cmd = 0;
+ char msg[2048] = {0};
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+ gf_boolean_t volfile_update = _gf_false;
+ char *task_id_str = NULL;
+ xlator_t *this = NULL;
+ uint32_t commit_hash;
+ int32_t is_force = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "volname not given");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"),
+ &cmd);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "command not given");
+ goto out;
+ }
+
+ ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg,
+ sizeof(msg));
+ if (ret) {
+ gf_msg_debug(this->name, 0, "cmd validate failed");
+ goto out;
+ }
+
+ switch (cmd) {
+ case GF_DEFRAG_CMD_START:
+ case GF_DEFRAG_CMD_START_LAYOUT_FIX:
+ case GF_DEFRAG_CMD_START_FORCE:
- if (!xdr_to_generic (req->msg[0], &cli_req,
- (xdrproc_t)xdr_gf_cli_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
- if (cli_req.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
-
- ret = dict_unserialize (cli_req.dict.dict_val,
- cli_req.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
+ ret = dict_get_int32n(dict, "force", SLEN("force"), &is_force);
+ if (ret)
+ is_force = 0;
+ if (!is_force) {
+ /* Reset defrag status to 'NOT STARTED' whenever a
+ * remove-brick/rebalance command is issued to remove
+ * stale information from previous run.
+ */
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED;
+
+ ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY,
+ SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Missing rebalance"
+ " id");
+ ret = 0;
+ } else {
+ gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_REBALANCE;
}
- }
+ if (!gd_should_i_start_rebalance(volinfo)) {
+ /* Store the rebalance-id and rebalance command
+ * even if the peer isn't starting a rebalance
+ * process. On peers where a rebalance process
+ * is started, glusterd_handle_defrag_start
+ * performs the storing.
+ * Storing this is needed for having
+ * 'volume status' work correctly.
+ */
+ glusterd_store_perform_node_state_store(volinfo);
+ break;
+ }
+ if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) {
+ volinfo->rebal.commit_hash = commit_hash;
+ }
+ ret = glusterd_handle_defrag_start(volinfo, msg, sizeof(msg),
+ cmd, NULL, GD_OP_REBALANCE);
+ break;
+ } else {
+ /* Reset defrag status to 'STARTED' so that the
+ * pid is checked and restarted accordingly.
+ * If the pid is not running it executes the
+ * "NOT_STARTED" case and restarts the process
+ */
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED;
+ volinfo->rebal.defrag_cmd = cmd;
+ volinfo->rebal.op = GD_OP_REBALANCE;
+
+ ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY,
+ SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Missing rebalance"
+ " id");
+ ret = 0;
+ } else {
+ gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_REBALANCE;
+ }
+ if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) {
+ volinfo->rebal.commit_hash = commit_hash;
+ }
+ ret = glusterd_restart_rebalance_for_volume(volinfo);
+ break;
+ }
+ case GF_DEFRAG_CMD_STOP:
+ /* Clear task-id only on explicitly stopping rebalance.
+ * Also clear the stored operation, so it doesn't cause trouble
+ * with future rebalance/remove-brick starts
+ */
+ gf_uuid_clear(volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_NONE;
+
+ /* Fall back to the old volume file in case of decommission*/
+ cds_list_for_each_entry_safe(brickinfo, tmp, &volinfo->bricks,
+ brick_list)
+ {
+ if (!brickinfo->decommissioned)
+ continue;
+ brickinfo->decommissioned = 0;
+ volfile_update = _gf_true;
+ }
+
+ if (volfile_update == _gf_false) {
+ ret = 0;
+ break;
+ }
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to get volname");
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL, "failed to create volfiles");
goto out;
- }
+ }
- ret = dict_get_int32 (dict, "rebalance-command", (int32_t*)&cmd);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to get command");
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL,
+ "failed to store volinfo");
goto out;
- }
+ }
- glusterd_rebalance_cmd_attempted_log (cmd, volname);
+ ret = 0;
+ break;
- ret = dict_set_static_bin (dict, "node-uuid", MY_UUID, 16);
- if (ret)
- goto out;
-
- if ((cmd == GF_DEFRAG_CMD_STATUS) ||
- (cmd == GF_DEFRAG_CMD_STOP)) {
- ret = glusterd_op_begin (req, GD_OP_DEFRAG_BRICK_VOLUME,
- dict);
- } else
- ret = glusterd_op_begin (req, GD_OP_REBALANCE, dict);
+ case GF_DEFRAG_CMD_STATUS:
+ break;
+ default:
+ break;
+ }
out:
+ if (ret && op_errstr && msg[0])
+ *op_errstr = gf_strdup(msg);
- glusterd_friend_sm ();
- glusterd_op_sm ();
-
- if (ret) {
- if (dict)
- dict_unref (dict);
- ret = glusterd_op_send_cli_response (GD_OP_REBALANCE, ret, 0, req,
- NULL, "operation failed");
- }
-
- if (cli_req.dict.dict_val)
- free (cli_req.dict.dict_val);//malloced by xdr
-
- return 0;
+ return ret;
}
-
int
-glusterd_op_stage_rebalance (dict_t *dict, char **op_errstr)
+glusterd_op_stage_rebalance(dict_t *dict, char **op_errstr)
{
- char *volname = NULL;
- int ret = 0;
- int32_t cmd = 0;
- char msg[2048] = {0};
- glusterd_volinfo_t *volinfo = NULL;
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG, "volname not found");
- goto out;
- }
- ret = dict_get_int32 (dict, "rebalance-command", &cmd);
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG, "cmd not found");
- goto out;
- }
-
- ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo,
- msg, sizeof (msg));
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG, "failed to validate");
- goto out;
- }
- switch (cmd) {
+ char *volname = NULL;
+ char *cmd_str = NULL;
+ int ret = 0;
+ int32_t cmd = 0;
+ char msg[2048] = {0};
+ glusterd_volinfo_t *volinfo = NULL;
+ char *task_id_str = NULL;
+ dict_t *op_ctx = NULL;
+ xlator_t *this = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "volname not found");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"),
+ &cmd);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "cmd not found");
+ goto out;
+ }
+
+ ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg,
+ sizeof(msg));
+ if (ret) {
+ gf_msg_debug(this->name, 0, "failed to validate");
+ goto out;
+ }
+ switch (cmd) {
case GF_DEFRAG_CMD_START:
case GF_DEFRAG_CMD_START_LAYOUT_FIX:
+ /* Check if the connected clients are all of version
+ * glusterfs-3.6 and higher. This is needed to prevent some data
+ * loss issues that could occur when older clients are connected
+ * when rebalance is run. This check can be bypassed by using
+ * 'force'
+ */
+ ret = glusterd_check_client_op_version_support(
+ volname, GD_OP_VERSION_3_6_0, NULL);
+ if (ret) {
+ ret = gf_asprintf(op_errstr,
+ "Volume %s has one or "
+ "more connected clients of a version"
+ " lower than GlusterFS-v3.6.0. "
+ "Starting rebalance in this state "
+ "could lead to data loss.\nPlease "
+ "disconnect those clients before "
+ "attempting this command again.",
+ volname);
+ goto out;
+ }
+ /* Fall through */
case GF_DEFRAG_CMD_START_FORCE:
- ret = glusterd_defrag_start_validate (volinfo,
- msg, sizeof (msg));
+ if (is_origin_glusterd(dict)) {
+ op_ctx = glusterd_op_get_ctx();
+ if (!op_ctx) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_GET_FAIL,
+ "Failed to get op_ctx");
+ goto out;
+ }
+
+ ret = glusterd_generate_and_set_task_id(
+ op_ctx, GF_REBALANCE_TID_KEY, SLEN(GF_REBALANCE_TID_KEY));
if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "start validate failed");
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL,
+ "Failed to generate task-id");
+ goto out;
}
- break;
+ } else {
+ ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY,
+ SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Missing rebalance-id");
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_REBALANCE_ID_MISSING, "%s", msg);
+ ret = 0;
+ }
+ }
+ ret = glusterd_defrag_start_validate(volinfo, msg, sizeof(msg),
+ GD_OP_REBALANCE);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "defrag start validate "
+ "failed for volume %s.",
+ volinfo->volname);
+ goto out;
+ }
+ break;
case GF_DEFRAG_CMD_STATUS:
case GF_DEFRAG_CMD_STOP:
- break;
+
+ ret = dict_get_strn(dict, "cmd-str", SLEN("cmd-str"), &cmd_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get "
+ "command string");
+ ret = -1;
+ goto out;
+ }
+ if ((strstr(cmd_str, "rebalance") != NULL) &&
+ (volinfo->rebal.op != GD_OP_REBALANCE)) {
+ snprintf(msg, sizeof(msg),
+ "Rebalance not started "
+ "for volume %s.",
+ volinfo->volname);
+ ret = -1;
+ goto out;
+ }
+
+ if (strstr(cmd_str, "remove-brick") != NULL) {
+ if (volinfo->rebal.op != GD_OP_REMOVE_BRICK) {
+ snprintf(msg, sizeof(msg),
+ "remove-brick not "
+ "started for volume %s.",
+ volinfo->volname);
+ ret = -1;
+ goto out;
+ }
+
+ /* For remove-brick status/stop command check whether
+ * given input brick is part of volume or not.*/
+
+ ret = dict_foreach_fnmatch(dict, "brick*",
+ glusterd_brick_validation, volinfo);
+ if (ret == -1) {
+ snprintf(msg, sizeof(msg),
+ "Incorrect brick"
+ " for volume %s",
+ volinfo->volname);
+ goto out;
+ }
+ }
+ break;
+
default:
- break;
- }
+ break;
+ }
- ret = 0;
+ ret = 0;
out:
- if (ret && op_errstr && msg[0])
- *op_errstr = gf_strdup (msg);
+ if (ret && op_errstr && msg[0])
+ *op_errstr = gf_strdup(msg);
- return ret;
+ return ret;
}
-
int
-glusterd_op_rebalance (dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+glusterd_op_rebalance(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
{
- char *volname = NULL;
- int ret = 0;
- int32_t cmd = 0;
- char msg[2048] = {0};
- glusterd_volinfo_t *volinfo = NULL;
- glusterd_conf_t *priv = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
- glusterd_brickinfo_t *tmp = NULL;
- gf_boolean_t volfile_update = _gf_false;
-
- priv = THIS->private;
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG, "volname not given");
+ char *volname = NULL;
+ int ret = 0;
+ int32_t cmd = 0;
+ char msg[2048] = {0};
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *tmp = NULL;
+ gf_boolean_t volfile_update = _gf_false;
+ char *task_id_str = NULL;
+ dict_t *ctx = NULL;
+ xlator_t *this = NULL;
+ uint32_t commit_hash;
+ int32_t is_force = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "volname not given");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "rebalance-command", SLEN("rebalance-command"),
+ &cmd);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "command not given");
+ goto out;
+ }
+
+ ret = glusterd_rebalance_cmd_validate(cmd, volname, &volinfo, msg,
+ sizeof(msg));
+ if (ret) {
+ gf_msg_debug(this->name, 0, "cmd validate failed");
+ goto out;
+ }
+
+ /* Set task-id, if available, in op_ctx dict for operations other than
+ * start
+ */
+ if (cmd == GF_DEFRAG_CMD_STATUS || cmd == GF_DEFRAG_CMD_STOP) {
+ if (!gf_uuid_is_null(volinfo->rebal.rebalance_id)) {
+ ctx = glusterd_op_get_ctx();
+ if (!ctx) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OPCTX_GET_FAIL,
+ "Failed to get op_ctx");
+ ret = -1;
goto out;
- }
-
- ret = dict_get_int32 (dict, "rebalance-command", &cmd);
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG, "command not given");
+ }
+
+ if (GD_OP_REMOVE_BRICK == volinfo->rebal.op)
+ ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id,
+ ctx, GF_REMOVE_BRICK_TID_KEY,
+ SLEN(GF_REMOVE_BRICK_TID_KEY));
+ else
+ ret = glusterd_copy_uuid_to_dict(volinfo->rebal.rebalance_id,
+ ctx, GF_REBALANCE_TID_KEY,
+ SLEN(GF_REBALANCE_TID_KEY));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TASKID_GEN_FAIL,
+ "Failed to set task-id");
goto out;
+ }
}
+ }
- ret = glusterd_rebalance_cmd_validate (cmd, volname, &volinfo,
- msg, sizeof (msg));
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG, "cmd validate failed");
- goto out;
- }
-
- switch (cmd) {
+ switch (cmd) {
case GF_DEFRAG_CMD_START:
case GF_DEFRAG_CMD_START_LAYOUT_FIX:
case GF_DEFRAG_CMD_START_FORCE:
- ret = glusterd_handle_defrag_start (volinfo, msg, sizeof (msg),
- cmd, NULL);
- break;
- case GF_DEFRAG_CMD_STOP:
- /* Fall back to the old volume file in case of decommission*/
- list_for_each_entry_safe (brickinfo, tmp, &volinfo->bricks,
- brick_list) {
- if (!brickinfo->decommissioned)
- continue;
- brickinfo->decommissioned = 0;
- volfile_update = _gf_true;
- }
- if (volfile_update == _gf_false) {
- ret = 0;
- break;
- }
-
- ret = glusterd_create_volfiles_and_notify_services (volinfo);
+ ret = dict_get_int32n(dict, "force", SLEN("force"), &is_force);
+ if (ret)
+ is_force = 0;
+ if (!is_force) {
+ /* Reset defrag status to 'NOT STARTED' whenever a
+ * remove-brick/rebalance command is issued to remove
+ * stale information from previous run.
+ */
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_NOT_STARTED;
+
+ ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY,
+ SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to create volfiles");
- goto out;
+ gf_msg_debug(this->name, 0,
+ "Missing rebalance"
+ " id");
+ ret = 0;
+ } else {
+ gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_REBALANCE;
}
-
- ret = glusterd_store_volinfo (volinfo,
- GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (!gd_should_i_start_rebalance(volinfo)) {
+ /* Store the rebalance-id and rebalance command
+ * even if the peer isn't starting a rebalance
+ * process. On peers where a rebalance process
+ * is started, glusterd_handle_defrag_start
+ * performs the storing.
+ * Storing this is needed for having
+ * 'volume status' work correctly.
+ */
+ glusterd_store_perform_node_state_store(volinfo);
+ break;
+ }
+ if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) {
+ volinfo->rebal.commit_hash = commit_hash;
+ }
+ ret = glusterd_handle_defrag_start(volinfo, msg, sizeof(msg),
+ cmd, NULL, GD_OP_REBALANCE);
+ break;
+ } else {
+ /* Reset defrag status to 'STARTED' so that the
+ * pid is checked and restarted accordingly.
+ * If the pid is not running it executes the
+ * "NOT_STARTED" case and restarts the process
+ */
+ volinfo->rebal.defrag_status = GF_DEFRAG_STATUS_STARTED;
+ volinfo->rebal.defrag_cmd = cmd;
+ volinfo->rebal.op = GD_OP_REBALANCE;
+
+ ret = dict_get_strn(dict, GF_REBALANCE_TID_KEY,
+ SLEN(GF_REBALANCE_TID_KEY), &task_id_str);
if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "failed to store volinfo");
- goto out;
+ gf_msg_debug(this->name, 0,
+ "Missing rebalance"
+ " id");
+ ret = 0;
+ } else {
+ gf_uuid_parse(task_id_str, volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_REBALANCE;
}
-
+ if (dict_get_uint32(dict, "commit-hash", &commit_hash) == 0) {
+ volinfo->rebal.commit_hash = commit_hash;
+ }
+ ret = glusterd_restart_rebalance_for_volume(volinfo);
+ break;
+ }
+ case GF_DEFRAG_CMD_STOP:
+ /* Clear task-id only on explicitly stopping rebalance.
+ * Also clear the stored operation, so it doesn't cause trouble
+ * with future rebalance/remove-brick starts
+ */
+ gf_uuid_clear(volinfo->rebal.rebalance_id);
+ volinfo->rebal.op = GD_OP_NONE;
+
+ /* Fall back to the old volume file in case of decommission*/
+ cds_list_for_each_entry_safe(brickinfo, tmp, &volinfo->bricks,
+ brick_list)
+ {
+ if (!brickinfo->decommissioned)
+ continue;
+ brickinfo->decommissioned = 0;
+ volfile_update = _gf_true;
+ }
+
+ if (volfile_update == _gf_false) {
ret = 0;
break;
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_VOLFILE_CREATE_FAIL, "failed to create volfiles");
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLINFO_SET_FAIL,
+ "failed to store volinfo");
+ goto out;
+ }
+
+ ret = 0;
+ break;
case GF_DEFRAG_CMD_STATUS:
- break;
+ break;
default:
- break;
- }
-
- glusterd_rebalance_cmd_log (cmd, volname, ret);
+ break;
+ }
out:
- if (ret && op_errstr && msg[0])
- *op_errstr = gf_strdup (msg);
+ if (ret && op_errstr && msg[0])
+ *op_errstr = gf_strdup(msg);
- return ret;
+ return ret;
}
int32_t
-glusterd_defrag_event_notify_handle (dict_t *dict)
+glusterd_defrag_event_notify_handle(dict_t *dict)
{
- glusterd_volinfo_t *volinfo = NULL;
- char *volname = NULL;
- int32_t ret = -1;
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Failed to get volname");
- return ret;
- }
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ char *volname_ptr = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get volname");
+ return ret;
+ }
+
+ volname_ptr = strstr(volname, "rebalance/");
+ if (volname_ptr) {
+ volname_ptr = strchr(volname_ptr, '/');
+ volname = volname_ptr + 1;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_REBALANCE_PFX_IN_VOLNAME,
+ "volname received (%s) is not prefixed with rebalance.",
+ volname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get volinfo for %s", volname);
+ return ret;
+ }
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Failed to get volinfo for %s"
- , volname);
- return ret;
- }
+ ret = glusterd_defrag_volume_status_update(volinfo, dict, 0);
- ret = glusterd_defrag_volume_status_update (volinfo, dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DEFRAG_STATUS_UPDATE_FAIL,
+ "Failed to update status");
+ gf_event(EVENT_REBALANCE_STATUS_UPDATE_FAILED, "volume=%s",
+ volinfo->volname);
+ }
- if (ret)
- gf_log ("", GF_LOG_ERROR, "Failed to update status");
- return ret;
+out:
+ return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
index 067196952ce..43c2f4373e0 100644
--- a/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
+++ b/xlators/mgmt/glusterd/src/glusterd-replace-brick.c
@@ -1,1813 +1,716 @@
/*
- Copyright (c) 2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ Copyright (c) 2011-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#include "common-utils.h"
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/common-utils.h>
#include "cli1-xdr.h"
#include "xdr-generic.h"
-#include "glusterfs.h"
+#include <glusterfs/glusterfs.h>
#include "glusterd.h"
#include "glusterd-op-sm.h"
+#include "glusterd-geo-rep.h"
#include "glusterd-store.h"
#include "glusterd-utils.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-svc-helper.h"
#include "glusterd-volgen.h"
-#include "run.h"
-#include "syscall.h"
+#include "glusterd-messages.h"
+#include "glusterd-server-quorum.h"
+#include "glusterd-mgmt.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
#include <signal.h>
int
-glusterd_get_replace_op_str (gf1_cli_replace_op op, char *op_str)
-{
- int ret = -1;
-
- if (!op_str)
- goto out;
-
- switch (op) {
- case GF_REPLACE_OP_START:
- strcpy (op_str, "start");
- break;
- case GF_REPLACE_OP_COMMIT:
- strcpy (op_str, "commit");
- break;
- case GF_REPLACE_OP_PAUSE:
- strcpy (op_str, "pause");
- break;
- case GF_REPLACE_OP_ABORT:
- strcpy (op_str, "abort");
- break;
- case GF_REPLACE_OP_STATUS:
- strcpy (op_str, "status");
- break;
- case GF_REPLACE_OP_COMMIT_FORCE:
- strcpy (op_str, "commit-force");
- break;
- default:
- strcpy (op_str, "unknown");
- break;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
+glusterd_mgmt_v3_initiate_replace_brick_cmd_phases(rpcsvc_request_t *req,
+ glusterd_op_t op,
+ dict_t *dict);
int
-glusterd_handle_replace_brick (rpcsvc_request_t *req)
+__glusterd_handle_replace_brick(rpcsvc_request_t *req)
{
- int32_t ret = -1;
- gf_cli_req cli_req = {{0,}};
- dict_t *dict = NULL;
- char *src_brick = NULL;
- char *dst_brick = NULL;
- int32_t op = 0;
- char operation[256];
- glusterd_op_t cli_op = GD_OP_REPLACE_BRICK;
- char *volname = NULL;
-
- GF_ASSERT (req);
-
- if (!xdr_to_generic (req->msg[0], &cli_req,
- (xdrproc_t)xdr_gf_cli_req)) {
- //failed to decode msg;
- req->rpc_err = GARBAGE_ARGS;
- goto out;
- }
-
- gf_log ("glusterd", GF_LOG_INFO, "Received replace brick req");
-
- if (cli_req.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
-
- ret = dict_unserialize (cli_req.dict.dict_val,
- cli_req.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize req-buffer to dictionary");
- goto out;
- }
- }
-
- ret = dict_get_str (dict, "volname", &volname);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "could not get volname");
- goto out;
- }
-
- ret = dict_get_int32 (dict, "operation", &op);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "dict_get on operation failed");
- goto out;
- }
-
- ret = dict_get_str (dict, "src-brick", &src_brick);
-
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get src brick");
- goto out;
- }
- gf_log ("", GF_LOG_DEBUG,
- "src brick=%s", src_brick);
-
- ret = dict_get_str (dict, "dst-brick", &dst_brick);
-
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get dest brick");
- goto out;
- }
-
- (void) glusterd_get_replace_op_str (op, operation);
- gf_log ("", GF_LOG_DEBUG, "dst brick=%s", dst_brick);
- gf_log ("glusterd", GF_LOG_INFO, "Received replace brick %s request",
- operation);
- gf_cmd_log ("Volume replace-brick","volname: %s src_brick:%s"
- " dst_brick:%s op:%s", volname, src_brick, dst_brick,
- operation);
-
- ret = glusterd_op_begin (req, GD_OP_REPLACE_BRICK, dict);
- gf_cmd_log ("Volume replace-brick","on volname: %s %s", volname,
- (ret) ? "FAILED" : "SUCCESS");
+ int32_t ret = -1;
+ gf_cli_req cli_req = {{
+ 0,
+ }};
+ dict_t *dict = NULL;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ char *cli_op = NULL;
+ glusterd_op_t op = -1;
+ char *volname = NULL;
+ char msg[256] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ // failed to decode msg;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "Failed to decode "
+ "request received from cli");
+ req->rpc_err = GARBAGE_ARGS;
+ goto out;
+ }
+
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_REPLACE_BRK_REQ_RCVD,
+ "Received replace brick req");
+
+ if (cli_req.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(msg, sizeof(msg),
+ "Unable to decode the "
+ "command");
+ goto out;
+ }
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Could not get volume name");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg);
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "operation", SLEN("operation"), &cli_op);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "dict_get on operation failed");
+ snprintf(msg, sizeof(msg), "Could not get operation");
+ goto out;
+ }
+
+ op = gd_cli_to_gd_op(cli_op);
+
+ if (conf->op_version < GD_OP_VERSION_3_9_0 &&
+ strcmp(cli_op, "GF_REPLACE_OP_COMMIT_FORCE")) {
+ snprintf(msg, sizeof(msg),
+ "Cannot execute command. The "
+ "cluster is operating at version %d. reset-brick "
+ "command %s is unavailable in this version.",
+ conf->op_version, gd_rb_op_to_str(cli_op));
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "src-brick", SLEN("src-brick"), &src_brick);
+
+ if (ret) {
+ snprintf(msg, sizeof(msg), "Failed to get src brick");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s", msg);
+ goto out;
+ }
+ gf_msg_debug(this->name, 0, "src brick=%s", src_brick);
+
+ if (!strcmp(cli_op, "GF_RESET_OP_COMMIT") ||
+ !strcmp(cli_op, "GF_RESET_OP_COMMIT_FORCE") ||
+ !strcmp(cli_op, "GF_REPLACE_OP_COMMIT_FORCE")) {
+ ret = dict_get_strn(dict, "dst-brick", SLEN("dst-brick"), &dst_brick);
+
+ if (ret) {
+ snprintf(msg, sizeof(msg),
+ "Failed to get"
+ "dest brick");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED, "%s",
+ msg);
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0, "dst brick=%s", dst_brick);
+ }
+
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ (op == GD_OP_REPLACE_BRICK)
+ ? GD_MSG_REPLACE_BRK_COMMIT_FORCE_REQ_RCVD
+ : GD_MSG_RESET_BRICK_COMMIT_FORCE_REQ_RCVD,
+ "Received %s request.", gd_rb_op_to_str(cli_op));
+
+ ret = glusterd_mgmt_v3_initiate_replace_brick_cmd_phases(req, op, dict);
out:
- if (ret && dict)
- dict_unref (dict);
- if (cli_req.dict.dict_val)
- free (cli_req.dict.dict_val);//malloced by xdr
+ if (ret) {
+ glusterd_op_send_cli_response(op, ret, 0, req, dict, msg);
+ }
+ ret = 0;
+ free(cli_req.dict.dict_val); // malloced by xdr
- glusterd_friend_sm ();
- glusterd_op_sm ();
-
- if (ret)
- ret = glusterd_op_send_cli_response (cli_op, ret, 0, req,
- NULL, "operation failed");
-
- return ret;
-}
-
-static int
-glusterd_get_rb_dst_brickinfo (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t **brickinfo)
-{
- int32_t ret = -1;
-
- if (!volinfo || !brickinfo)
- goto out;
-
- *brickinfo = volinfo->dst_brick;
-
- ret = 0;
-
-out:
- return ret;
+ return ret;
}
int
-glusterd_op_stage_replace_brick (dict_t *dict, char **op_errstr,
- dict_t *rsp_dict)
+glusterd_handle_reset_brick(rpcsvc_request_t *req)
{
- int ret = 0;
- char *src_brick = NULL;
- char *dst_brick = NULL;
- char *volname = NULL;
- int replace_op = 0;
- glusterd_volinfo_t *volinfo = NULL;
- glusterd_brickinfo_t *src_brickinfo = NULL;
- char *host = NULL;
- char *path = NULL;
- char msg[2048] = {0};
- char *dup_dstbrick = NULL;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_brickinfo_t *dst_brickinfo = NULL;
- gf_boolean_t is_run = _gf_false;
- dict_t *ctx = NULL;
- glusterd_conf_t *priv = NULL;
- char *savetok = NULL;
- char voldir[PATH_MAX] = {0};
- char pidfile[PATH_MAX] = {0};
-
- priv = THIS->private;
- GF_ASSERT (priv);
-
- ret = dict_get_str (dict, "src-brick", &src_brick);
-
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get src brick");
- goto out;
- }
-
- gf_log ("", GF_LOG_DEBUG, "src brick=%s", src_brick);
-
- ret = dict_get_str (dict, "dst-brick", &dst_brick);
-
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get dest brick");
- goto out;
- }
-
- gf_log ("", GF_LOG_DEBUG, "dst brick=%s", dst_brick);
-
- ret = dict_get_str (dict, "volname", &volname);
-
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
- goto out;
- }
-
- ret = dict_get_int32 (dict, "operation", (int32_t *)&replace_op);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "dict get on replace-brick operation failed");
- goto out;
- }
-
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- snprintf (msg, sizeof (msg), "volume: %s does not exist",
- volname);
- *op_errstr = gf_strdup (msg);
- goto out;
- }
-
- if (GLUSTERD_STATUS_STARTED != volinfo->status) {
- ret = -1;
- snprintf (msg, sizeof (msg), "volume: %s is not started",
- volname);
- *op_errstr = gf_strdup (msg);
- goto out;
- }
-
- if (!glusterd_store_is_valid_brickpath (volname, dst_brick) ||
- !glusterd_is_valid_volfpath (volname, dst_brick)) {
- snprintf (msg, sizeof (msg), "brick path %s is too "
- "long.", dst_brick);
- gf_log ("", GF_LOG_ERROR, "%s", msg);
- *op_errstr = gf_strdup (msg);
-
- ret = -1;
- goto out;
- }
-
- ret = glusterd_check_gsync_running (volinfo, &is_run);
- if (ret && (is_run == _gf_false))
- gf_log ("", GF_LOG_WARNING, "Unable to get the status"
- " of active "GEOREP" session");
- if (is_run) {
- gf_log ("", GF_LOG_WARNING, GEOREP" sessions active"
- "for the volume %s ", volname);
- snprintf (msg, sizeof(msg), GEOREP" sessions are active "
- "for the volume %s.\nStop "GEOREP " sessions "
- "involved in this volume. Use 'volume "GEOREP
- " status' command for more info.",
- volname);
- *op_errstr = gf_strdup (msg);
- ret = -1;
- goto out;
- }
-
- if (glusterd_is_defrag_on(volinfo)) {
- snprintf (msg, sizeof(msg), "Volume name %s rebalance is in "
- "progress. Please retry after completion", volname);
- gf_log ("glusterd", GF_LOG_ERROR, "%s", msg);
- *op_errstr = gf_strdup (msg);
- ret = -1;
- goto out;
- }
-
- switch (replace_op) {
- case GF_REPLACE_OP_START:
- if (glusterd_is_rb_started (volinfo)) {
- gf_log ("", GF_LOG_ERROR, "Replace brick is already "
- "started for volume ");
- ret = -1;
- goto out;
- }
- break;
- case GF_REPLACE_OP_PAUSE:
- if (glusterd_is_rb_paused (volinfo)) {
- gf_log ("", GF_LOG_ERROR, "Replace brick is already "
- "paused for volume ");
- ret = -1;
- goto out;
- } else if (!glusterd_is_rb_started(volinfo)) {
- gf_log ("", GF_LOG_ERROR, "Replace brick is not"
- " started for volume ");
- ret = -1;
- goto out;
- }
- break;
-
- case GF_REPLACE_OP_ABORT:
- if (!glusterd_is_rb_ongoing (volinfo)) {
- gf_log ("", GF_LOG_ERROR, "Replace brick is not"
- " started or paused for volume ");
- ret = -1;
- goto out;
- }
- break;
-
- case GF_REPLACE_OP_COMMIT:
- if (!glusterd_is_rb_ongoing (volinfo)) {
- gf_log ("", GF_LOG_ERROR, "Replace brick is not "
- "started for volume ");
- ret = -1;
- goto out;
- }
- break;
-
- case GF_REPLACE_OP_COMMIT_FORCE: break;
-
- case GF_REPLACE_OP_STATUS:
-
- if (glusterd_is_rb_ongoing (volinfo) == _gf_false) {
- ret = gf_asprintf (op_errstr, "replace-brick not"
- " started on volume %s",
- volinfo->volname);
- if (ret < 0) {
- *op_errstr = NULL;
- goto out;
- }
-
- gf_log (THIS->name, GF_LOG_ERROR, "%s", *op_errstr);
- ret = -1;
- goto out;
- }
- break;
-
- default:
- ret = -1;
- goto out;
- }
-
- ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo,
- &src_brickinfo,
- GF_PATH_COMPLETE);
- if (ret) {
- snprintf (msg, sizeof (msg), "brick: %s does not exist in "
- "volume: %s", src_brick, volname);
- *op_errstr = gf_strdup (msg);
- goto out;
- }
-
- ctx = glusterd_op_get_ctx();
- if (ctx) {
- if (!glusterd_is_fuse_available ()) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to open /dev/"
- "fuse (%s), replace-brick command failed",
- strerror (errno));
- snprintf (msg, sizeof(msg), "Fuse unavailable\n "
- "Replace-brick failed");
- *op_errstr = gf_strdup (msg);
- ret = -1;
- goto out;
- }
- }
-
- if (!glusterd_is_local_addr (src_brickinfo->hostname)) {
- gf_log ("", GF_LOG_DEBUG,
- "I AM THE SOURCE HOST");
- if (src_brickinfo->port && rsp_dict) {
- ret = dict_set_int32 (rsp_dict, "src-brick-port",
- src_brickinfo->port);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "Could not set src-brick-port=%d",
- src_brickinfo->port);
- }
- }
-
- GLUSTERD_GET_VOLUME_DIR (voldir, volinfo, priv);
- GLUSTERD_GET_BRICK_PIDFILE (pidfile, voldir,
- src_brickinfo->hostname,
- src_brickinfo->path);
- if ((replace_op != GF_REPLACE_OP_COMMIT_FORCE) &&
- !glusterd_is_service_running (pidfile, NULL)) {
- snprintf(msg, sizeof(msg), "Source brick %s:%s "
- "is not online.", src_brickinfo->hostname,
- src_brickinfo->path);
- *op_errstr = gf_strdup (msg);
- ret = -1;
- goto out;
- }
-
-
- }
-
- dup_dstbrick = gf_strdup (dst_brick);
- if (!dup_dstbrick) {
- ret = -1;
- gf_log ("", GF_LOG_ERROR, "Memory allocation failed");
- goto out;
- }
- host = strtok_r (dup_dstbrick, ":", &savetok);
- path = strtok_r (NULL, ":", &savetok);
-
- if (!host || !path) {
- gf_log ("", GF_LOG_ERROR,
- "dst brick %s is not of form <HOSTNAME>:<export-dir>",
- dst_brick);
- ret = -1;
- goto out;
- }
- if (!glusterd_brickinfo_get (NULL, host, path, NULL)) {
- snprintf(msg, sizeof(msg), "Brick: %s:%s already in use",
- host, path);
- *op_errstr = gf_strdup (msg);
- ret = -1;
- goto out;
- }
-
- ret = glusterd_brickinfo_from_brick (dst_brick, &dst_brickinfo);
- if (ret)
- goto out;
-
- if (!glusterd_is_rb_ongoing (volinfo) &&
- (replace_op == GF_REPLACE_OP_START ||
- replace_op == GF_REPLACE_OP_COMMIT_FORCE)) {
-
- volinfo->src_brick = src_brickinfo;
- volinfo->dst_brick = dst_brickinfo;
- }
-
- if (glusterd_rb_check_bricks (volinfo, src_brickinfo, dst_brickinfo)) {
-
- ret = -1;
- *op_errstr = gf_strdup ("Incorrect source or "
- "destination brick");
- if (*op_errstr)
- gf_log (THIS->name, GF_LOG_ERROR, "%s", *op_errstr);
- goto out;
- }
-
- if (!glusterd_is_rb_ongoing (volinfo) &&
- !glusterd_is_local_addr (host)) {
- ret = glusterd_brick_create_path (host, path,
- volinfo->volume_id,
- op_errstr);
- if (ret)
- goto out;
- }
-
- if (glusterd_is_local_addr (host)) {
- ret = glusterd_friend_find (NULL, host, &peerinfo);
- if (ret) {
- snprintf (msg, sizeof (msg), "%s, is not a friend",
- host);
- *op_errstr = gf_strdup (msg);
- goto out;
- }
-
- if (!peerinfo->connected) {
- snprintf (msg, sizeof (msg), "%s, is not connected at "
- "the moment", host);
- *op_errstr = gf_strdup (msg);
- ret = -1;
- goto out;
- }
-
- if (GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state) {
- snprintf (msg, sizeof (msg), "%s, is not befriended "
- "at the moment", host);
- *op_errstr = gf_strdup (msg);
- ret = -1;
- goto out;
- }
- }
- ret = 0;
-
-out:
- if (dup_dstbrick)
- GF_FREE (dup_dstbrick);
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
-
- return ret;
+ return glusterd_big_locked_handler(req, __glusterd_handle_replace_brick);
}
-static int
-rb_regenerate_volfiles (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *brickinfo,
- int32_t pump_needed)
+int
+glusterd_handle_replace_brick(rpcsvc_request_t *req)
{
- dict_t *dict = NULL;
- int ret = 0;
-
- dict = volinfo->dict;
-
- gf_log ("", GF_LOG_DEBUG,
- "attempting to set pump value=%d", pump_needed);
-
- ret = dict_set_int32 (dict, "enable-pump", pump_needed);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "could not dict_set enable-pump");
- goto out;
- }
-
- ret = glusterd_create_rb_volfiles (volinfo, brickinfo);
-
-out:
- return ret;
+ return glusterd_big_locked_handler(req, __glusterd_handle_replace_brick);
}
-static int
-rb_src_brick_restart (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *src_brickinfo,
- int activate_pump)
-{
- int ret = 0;
-
- gf_log ("", GF_LOG_DEBUG,
- "Attempting to kill src");
-
- ret = glusterd_nfs_server_stop (volinfo);
-
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to stop nfs, ret: %d",
- ret);
- }
-
- ret = glusterd_volume_stop_glusterfs (volinfo, src_brickinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to stop "
- "glusterfs, ret: %d", ret);
- goto out;
- }
-
- glusterd_delete_volfile (volinfo, src_brickinfo);
-
- if (activate_pump) {
- ret = rb_regenerate_volfiles (volinfo, src_brickinfo, 1);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "Could not regenerate volfiles with pump");
- goto out;
- }
- } else {
- ret = rb_regenerate_volfiles (volinfo, src_brickinfo, 0);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "Could not regenerate volfiles without pump");
- goto out;
- }
-
- }
-
- sleep (2);
- ret = glusterd_volume_start_glusterfs (volinfo, src_brickinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to start "
- "glusterfs, ret: %d", ret);
- goto out;
- }
-
-out:
- ret = glusterd_nfs_server_start (volinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to start nfs, ret: %d",
- ret);
- }
- return ret;
-}
-
-static int
-rb_send_xattr_command (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *src_brickinfo,
- glusterd_brickinfo_t *dst_brickinfo,
- const char *xattr_key, const char *value)
+int
+glusterd_op_stage_replace_brick(dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
{
- glusterd_conf_t *priv = NULL;
- xlator_t *this = NULL;
- char mount_point_path[PATH_MAX] = {0,};
- int ret = -1;
-
- this = THIS;
- priv = this->private;
-
- snprintf (mount_point_path, PATH_MAX, "%s/vols/%s/%s",
- priv->workdir, volinfo->volname, RB_CLIENT_MOUNTPOINT);
-
- ret = sys_lsetxattr (mount_point_path, xattr_key, value,
- strlen (value) + 1, 0);
+ int ret = 0;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ char *volname = NULL;
+ char *op = NULL;
+ glusterd_op_t gd_op = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *src_brickinfo = NULL;
+ char *host = NULL;
+ char msg[2048] = {0};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_brickinfo_t *dst_brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ char pidfile[PATH_MAX] = {0};
+ xlator_t *this = NULL;
+ gf_boolean_t is_force = _gf_false;
+ char *dup_dstbrick = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = glusterd_brick_op_prerequisites(dict, &op, &gd_op, &volname, &volinfo,
+ &src_brick, &src_brickinfo, pidfile,
+ op_errstr, rsp_dict);
+ if (ret)
+ goto out;
+
+ if (volinfo->type == GF_CLUSTER_TYPE_NONE) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_NOT_PERMITTED,
+ "replace-brick is not permitted on distribute only "
+ "volumes");
+ gf_asprintf(op_errstr,
+ "replace-brick is not permitted on "
+ "distribute only volumes. Please use add-brick "
+ "and remove-brick operations instead.");
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_validate_quorum(this, gd_op, dict, op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SERVER_QUORUM_NOT_MET,
+ "Server quorum not met. Rejecting operation.");
+ goto out;
+ }
+
+ if (strcmp(op, "GF_REPLACE_OP_COMMIT_FORCE")) {
+ ret = -1;
+ goto out;
+ } else {
+ is_force = _gf_true;
+ }
+
+ if (volinfo->snap_count > 0 || !cds_list_empty(&volinfo->snap_volumes)) {
+ snprintf(msg, sizeof(msg),
+ "Volume %s has %" PRIu64
+ " snapshots. "
+ "Changing the volume configuration will not effect snapshots."
+ "But the snapshot brick mount should be intact to "
+ "make them function.",
+ volname, volinfo->snap_count);
+ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SNAP_WARN, "%s", msg);
+ msg[0] = '\0';
+ }
+
+ glusterd_add_peers_to_auth_list(volname);
+
+ ret = glusterd_get_dst_brick_info(&dst_brick, volname, op_errstr,
+ &dst_brickinfo, &host, dict,
+ &dup_dstbrick);
+ if (ret)
+ goto out;
+
+ ret = glusterd_new_brick_validate(dst_brick, dst_brickinfo, msg,
+ sizeof(msg), op);
+ /* fail if brick being replaced with itself */
+ if (ret) {
+ *op_errstr = gf_strdup(msg);
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_VALIDATE_FAIL, "%s",
+ *op_errstr);
+ goto out;
+ }
+
+ volinfo->rep_brick.src_brick = src_brickinfo;
+ volinfo->rep_brick.dst_brick = dst_brickinfo;
+
+ if (glusterd_rb_check_bricks(volinfo, src_brickinfo, dst_brickinfo)) {
+ ret = -1;
+ *op_errstr = gf_strdup(
+ "Incorrect source or "
+ "destination brick");
+ if (*op_errstr)
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_BRICK_NOT_FOUND,
+ "%s", *op_errstr);
+ goto out;
+ }
+
+ if (gf_is_local_addr(host)) {
+ ret = glusterd_validate_and_create_brickpath(
+ dst_brickinfo, volinfo->volume_id, volinfo->volname, op_errstr,
+ is_force, _gf_false);
if (ret)
- gf_log (this->name, GF_LOG_DEBUG, "setxattr on key: "
- "%s failed", xattr_key);
- return ret;
-}
-
-static int
-rb_spawn_dst_brick (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *brickinfo)
-{
- glusterd_conf_t *priv = NULL;
- runner_t runner = {0,};
- int ret = -1;
- int32_t port = 0;
-
- priv = THIS->private;
-
- port = pmap_registry_alloc (THIS);
- brickinfo->port = port;
-
- GF_ASSERT (port);
-
- runinit (&runner);
- runner_add_arg (&runner, SBIN_DIR"/glusterfs");
- runner_argprintf (&runner, "-f" "%s/vols/%s/"RB_DSTBRICKVOL_FILENAME,
- priv->workdir, volinfo->volname);
- runner_argprintf (&runner, "-p" "%s/vols/%s/"RB_DSTBRICK_PIDFILE,
- priv->workdir, volinfo->volname);
- runner_add_arg (&runner, "--xlator-option");
- runner_argprintf (&runner, "src-server.listen-port=%d", port);
- if (volinfo->memory_accounting)
- runner_add_arg (&runner, "--mem-accounting");
-
- ret = runner_run (&runner);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "Could not start glusterfs");
- goto out;
- }
-
- gf_log ("", GF_LOG_DEBUG,
- "Successfully started glusterfs: brick=%s:%s",
- brickinfo->hostname, brickinfo->path);
-
- ret = 0;
-
-out:
- return ret;
-}
-
-static int
-rb_spawn_glusterfs_client (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *brickinfo)
-{
- glusterd_conf_t *priv = NULL;
- char cmd_str[8192] = {0,};
- runner_t runner = {0,};
- struct stat buf;
- int ret = -1;
-
- priv = THIS->private;
-
- runinit (&runner);
- runner_add_arg (&runner, SBIN_DIR"/glusterfs");
- runner_argprintf (&runner, "-f" "%s/vols/%s/"RB_CLIENTVOL_FILENAME,
- priv->workdir, volinfo->volname);
- runner_argprintf (&runner, "%s/vols/%s/"RB_CLIENT_MOUNTPOINT,
- priv->workdir, volinfo->volname);
- if (volinfo->memory_accounting)
- runner_add_arg (&runner, "--mem-accounting");
-
- ret = runner_run (&runner);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "Could not start glusterfs");
- goto out;
- }
-
- gf_log ("", GF_LOG_DEBUG,
- "Successfully started glusterfs: brick=%s:%s",
- brickinfo->hostname, brickinfo->path);
-
- memset (cmd_str, 0, sizeof (cmd_str));
-
- snprintf (cmd_str, 4096, "%s/vols/%s/%s",
- priv->workdir, volinfo->volname,
- RB_CLIENT_MOUNTPOINT);
-
- ret = stat (cmd_str, &buf);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "stat on mountpoint failed");
- goto out;
- }
-
- gf_log ("", GF_LOG_DEBUG,
- "stat on mountpoint succeeded");
-
- ret = 0;
+ goto out;
+ }
+
+ if (!gf_is_local_addr(host)) {
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find(NULL, host);
+ if (peerinfo == NULL) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ snprintf(msg, sizeof(msg), "%s, is not a friend", host);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+
+ } else if (!peerinfo->connected) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "%s, is not connected at "
+ "the moment",
+ host);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+
+ } else if (GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "%s, is not befriended "
+ "at the moment",
+ host);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
+ RCU_READ_UNLOCK;
+
+ } else if (priv->op_version >= GD_OP_VERSION_3_6_0) {
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+
+ if (!(gf_uuid_compare(dst_brickinfo->uuid, MY_UUID))) {
+ ret = glusterd_get_brick_mount_dir(dst_brickinfo->path,
+ dst_brickinfo->hostname,
+ dst_brickinfo->mount_dir);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
+ "Failed to get brick mount_dir");
+ goto out;
+ }
+ ret = dict_set_dynstr_with_alloc(rsp_dict, "brick1.mount_dir",
+ dst_brickinfo->mount_dir);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set brick.mount_dir");
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32n(rsp_dict, "brick_count", SLEN("brick_count"), 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set local_brick_count");
+ goto out;
+ }
+ }
+
+ ret = 0;
out:
- return ret;
-}
+ GF_FREE(dup_dstbrick);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
-static const char *client_volfile_str = "volume mnt-client\n"
- " type protocol/client\n"
- " option remote-host %s\n"
- " option remote-subvolume %s\n"
- " option remote-port %d\n"
- " option transport-type %s\n"
- " option username %s\n"
- " option password %s\n"
- "end-volume\n"
- "volume mnt-wb\n"
- " type performance/write-behind\n"
- " subvolumes mnt-client\n"
- "end-volume\n";
-
-static int
-rb_generate_client_volfile (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *src_brickinfo)
-{
- glusterd_conf_t *priv = NULL;
- xlator_t *this = NULL;
- FILE *file = NULL;
- char filename[PATH_MAX] = {0, };
- int ret = -1;
- int fd = -1;
- char *ttype = NULL;
-
- this = THIS;
- priv = this->private;
-
- gf_log (this->name, GF_LOG_DEBUG, "Creating volfile");
-
- snprintf (filename, PATH_MAX, "%s/vols/%s/%s",
- priv->workdir, volinfo->volname,
- RB_CLIENTVOL_FILENAME);
-
- fd = open (filename, O_CREAT | O_RDONLY, S_IRUSR | S_IWUSR);
- if (fd < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s", strerror (errno));
- goto out;
- }
- close (fd);
-
- file = fopen (filename, "w+");
- if (!file) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Open of volfile failed");
- ret = -1;
- goto out;
- }
-
- GF_ASSERT (src_brickinfo->port);
-
- ttype = glusterd_get_trans_type_rb (volinfo->transport_type);
- if (NULL == ttype){
- ret = -1;
- goto out;
- }
-
- fprintf (file, client_volfile_str, src_brickinfo->hostname,
- src_brickinfo->path,
- src_brickinfo->port, ttype,
- glusterd_auth_get_username (volinfo),
- glusterd_auth_get_password (volinfo));
-
- fclose (file);
- GF_FREE (ttype);
-
- ret = 0;
-
-out:
- return ret;
+ return ret;
}
-static const char *dst_brick_volfile_str = "volume src-posix\n"
- " type storage/posix\n"
- " option directory %s\n"
- " option volume-id %s\n"
- "end-volume\n"
- "volume %s\n"
- " type features/locks\n"
- " subvolumes src-posix\n"
- "end-volume\n"
- "volume src-server\n"
- " type protocol/server\n"
- " option auth.login.%s.allow %s\n"
- " option auth.login.%s.password %s\n"
- " option auth.addr.%s.allow *\n"
- " option transport-type %s\n"
- " subvolumes %s\n"
- "end-volume\n";
-
-static int
-rb_generate_dst_brick_volfile (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *dst_brickinfo)
-{
- glusterd_conf_t *priv = NULL;
- xlator_t *this = NULL;
- FILE *file = NULL;
- char filename[PATH_MAX] = {0, };
- int ret = -1;
- int fd = -1;
- char *trans_type = NULL;
-
- this = THIS;
- priv = this->private;
-
- gf_log (this->name, GF_LOG_DEBUG,
- "Creating volfile");
-
- snprintf (filename, PATH_MAX, "%s/vols/%s/%s",
- priv->workdir, volinfo->volname,
- RB_DSTBRICKVOL_FILENAME);
-
- fd = creat (filename, S_IRUSR | S_IWUSR);
- if (fd < 0) {
- gf_log (this->name, GF_LOG_ERROR,
- "%s", strerror (errno));
- goto out;
- }
- close (fd);
-
- file = fopen (filename, "w+");
- if (!file) {
- gf_log (this->name, GF_LOG_DEBUG,
- "Open of volfile failed");
- ret = -1;
- goto out;
- }
-
- trans_type = glusterd_get_trans_type_rb (volinfo->transport_type);
- if (NULL == trans_type){
- ret = -1;
- goto out;
- }
-
- fprintf (file, dst_brick_volfile_str,
- dst_brickinfo->path,
- uuid_utoa (volinfo->volume_id),
- dst_brickinfo->path,
- dst_brickinfo->path,
- glusterd_auth_get_username (volinfo),
- glusterd_auth_get_username (volinfo),
- glusterd_auth_get_password (volinfo),
- dst_brickinfo->path,
- trans_type,
- dst_brickinfo->path);
-
- GF_FREE (trans_type);
-
- fclose (file);
-
- ret = 0;
-
-out:
- return ret;
-}
-
-
-static int
-rb_mountpoint_mkdir (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *src_brickinfo)
-{
- glusterd_conf_t *priv = NULL;
- char mount_point_path[PATH_MAX] = {0,};
- int ret = -1;
-
- priv = THIS->private;
-
- snprintf (mount_point_path, PATH_MAX, "%s/vols/%s/%s",
- priv->workdir, volinfo->volname,
- RB_CLIENT_MOUNTPOINT);
-
- ret = mkdir (mount_point_path, 0777);
- if (ret && (errno != EEXIST)) {
- gf_log ("", GF_LOG_DEBUG, "mkdir failed, errno: %d",
- errno);
- goto out;
- }
-
- ret = 0;
-
-out:
- return ret;
-}
-
-static int
-rb_mountpoint_rmdir (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *src_brickinfo)
-{
- glusterd_conf_t *priv = NULL;
- char mount_point_path[PATH_MAX] = {0,};
- int ret = -1;
-
- priv = THIS->private;
-
- snprintf (mount_point_path, PATH_MAX, "%s/vols/%s/%s",
- priv->workdir, volinfo->volname,
- RB_CLIENT_MOUNTPOINT);
-
- ret = rmdir (mount_point_path);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG, "rmdir failed");
- goto out;
- }
-
- ret = 0;
-
-out:
- return ret;
-}
-
-static int
-rb_destroy_maintenance_client (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *src_brickinfo)
-{
- glusterd_conf_t *priv = NULL;
- runner_t runner = {0,};
- char filename[PATH_MAX] = {0,};
- int ret = -1;
-
- priv = THIS->private;
-
- runinit (&runner);
- runner_add_args (&runner, "/bin/umount", "-f", NULL);
- runner_argprintf (&runner, "%s/vols/%s/"RB_CLIENT_MOUNTPOINT,
- priv->workdir, volinfo->volname);
-
- ret = runner_run (&runner);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "umount failed on maintenance client");
- goto out;
- }
-
- ret = rb_mountpoint_rmdir (volinfo, src_brickinfo);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "rmdir of mountpoint failed");
- goto out;
- }
-
- snprintf (filename, PATH_MAX, "%s/vols/%s/%s",
- priv->workdir, volinfo->volname,
- RB_CLIENTVOL_FILENAME);
-
- ret = unlink (filename);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "unlink failed");
- goto out;
- }
-
- ret = 0;
-
-out:
- return ret;
-}
-
-static int
-rb_spawn_maintenance_client (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *src_brickinfo)
-{
- int ret = -1;
-
- ret = rb_generate_client_volfile (volinfo, src_brickinfo);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG, "Unable to generate client "
- "volfile");
- goto out;
- }
-
- ret = rb_mountpoint_mkdir (volinfo, src_brickinfo);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG, "Unable to mkdir "
- "mountpoint");
- goto out;
- }
-
- ret = rb_spawn_glusterfs_client (volinfo, src_brickinfo);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG, "Unable to start glusterfs");
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-static int
-rb_spawn_destination_brick (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *dst_brickinfo)
-
-{
- int ret = -1;
-
- ret = rb_generate_dst_brick_volfile (volinfo, dst_brickinfo);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG, "Unable to generate client "
- "volfile");
- goto out;
- }
-
- ret = rb_spawn_dst_brick (volinfo, dst_brickinfo);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG, "Unable to start glusterfs");
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-static int
-rb_kill_destination_brick (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *dst_brickinfo)
-{
- glusterd_conf_t *priv = NULL;
- char pidfile[PATH_MAX] = {0,};
-
- priv = THIS->private;
-
- snprintf (pidfile, PATH_MAX, "%s/vols/%s/%s",
- priv->workdir, volinfo->volname,
- RB_DSTBRICK_PIDFILE);
-
- return glusterd_service_stop ("brick", pidfile, SIGTERM, _gf_true);
-}
-
-static int
-rb_get_xattr_command (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *src_brickinfo,
- glusterd_brickinfo_t *dst_brickinfo,
- const char *xattr_key,
- char *value)
-{
- glusterd_conf_t *priv = NULL;
- xlator_t *this = NULL;
- char mount_point_path[PATH_MAX] = {0,};
- int ret = -1;
-
- this = THIS;
- priv = THIS->private;
-
- snprintf (mount_point_path, PATH_MAX, "%s/vols/%s/%s",
- priv->workdir, volinfo->volname,
- RB_CLIENT_MOUNTPOINT);
-
- ret = sys_lgetxattr (mount_point_path, xattr_key, value, 8192);
-
- if (ret < 0) {
- gf_log (this->name, GF_LOG_DEBUG, "getxattr on key: %s "
- "failed", xattr_key);
- goto out;
- }
-
- ret = 0;
-out:
- return ret;
-}
-
-static int
-rb_send_cmd (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *src,
- glusterd_brickinfo_t *dst,
- gf1_cli_replace_op op)
-{
- char start_value[8192] = {0,};
- char status[8192] = {0,};
- char *status_reply = NULL;
- dict_t *ctx = NULL;
- int ret = 0;
-
- GF_ASSERT (volinfo);
- GF_ASSERT (src);
- GF_ASSERT (dst);
- GF_ASSERT ((op > GF_REPLACE_OP_NONE)
- && (op <= GF_REPLACE_OP_COMMIT_FORCE));
-
- switch (op) {
- case GF_REPLACE_OP_START:
- {
- snprintf (start_value, sizeof (start_value),
- "%s:%s:%d", dst->hostname, dst->path,
- dst->port);
- ret = rb_send_xattr_command (volinfo, src, dst,
- RB_PUMP_CMD_START,
- start_value);
- }
- break;
- case GF_REPLACE_OP_PAUSE:
- {
- ret = rb_send_xattr_command (volinfo, src, dst,
- RB_PUMP_CMD_PAUSE,
- RB_PUMP_DEF_ARG);
- }
- break;
- case GF_REPLACE_OP_ABORT:
- {
- ret = rb_send_xattr_command (volinfo, src, dst,
- RB_PUMP_CMD_ABORT,
- RB_PUMP_DEF_ARG);
- }
- break;
- case GF_REPLACE_OP_COMMIT:
- {
- ret = rb_send_xattr_command (volinfo, src, dst,
- RB_PUMP_CMD_COMMIT,
- RB_PUMP_DEF_ARG);
- }
- break;
- case GF_REPLACE_OP_STATUS:
- {
- ret = rb_get_xattr_command (volinfo, src, dst,
- RB_PUMP_CMD_STATUS,
- status);
- if (ret)
- goto out;
-
- ctx = glusterd_op_get_ctx ();
- GF_ASSERT (ctx);
- if (!ctx) {
- ret = -1;
- gf_log (THIS->name, GF_LOG_CRITICAL,
- "ctx is not present.");
- goto out;
- }
-
- status_reply = gf_strdup (status);
- ret = dict_set_dynstr (ctx, "status-reply",
- status_reply);
- if (ret) {
- GF_FREE (status_reply);
- gf_log (THIS->name, GF_LOG_ERROR, "Couldn't "
- "set rb status response in context.");
- goto out;
- }
- }
- break;
- default:
- {
- GF_ASSERT (0);
- ret = -1;
- gf_log (THIS->name, GF_LOG_CRITICAL, "Invalid replace"
- " brick subcommand.");
- }
- break;
- }
-out:
- return ret;
-}
-
-static int
-rb_do_operation (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *src_brickinfo,
- glusterd_brickinfo_t *dst_brickinfo,
- gf1_cli_replace_op op)
-{
-
- int ret = -1;
- char op_str[256] = {0, };
- xlator_t *this = NULL;
-
- this = THIS;
-
- ret = rb_spawn_maintenance_client (volinfo, src_brickinfo);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG, "Could not spawn "
- "maintenance client");
- goto umount;
- }
-
- ret = rb_send_cmd (volinfo, src_brickinfo, dst_brickinfo, op);
- if (ret) {
- (void) glusterd_get_replace_op_str (op, op_str);
- gf_log (this->name, GF_LOG_DEBUG, "Sending replace-brick "
- "sub-command %s failed.", op_str);
- }
-
-umount:
- if (rb_destroy_maintenance_client (volinfo, src_brickinfo))
- gf_log (this->name, GF_LOG_DEBUG, "Failed to destroy "
- "maintenance client");
-
- return ret;
-}
-
-/* Set src-brick's port number to be used in the maintenance mount
- * after all commit acks are received.
- */
-static int
-rb_update_srcbrick_port (glusterd_brickinfo_t *src_brickinfo, dict_t *rsp_dict,
- dict_t *req_dict, int32_t replace_op)
+int
+glusterd_op_perform_replace_brick(glusterd_volinfo_t *volinfo, char *old_brick,
+ char *new_brick, dict_t *dict)
{
- xlator_t *this = NULL;
- dict_t *ctx = NULL;
- int ret = 0;
- int dict_ret = 0;
- int src_port = 0;
-
- this = THIS;
-
- ctx = glusterd_op_get_ctx ();
- if (ctx) {
- dict_ret = dict_get_int32 (req_dict, "src-brick-port", &src_port);
- if (src_port)
- src_brickinfo->port = src_port;
- }
-
- if (!glusterd_is_local_addr (src_brickinfo->hostname)) {
- gf_log ("", GF_LOG_INFO,
- "adding src-brick port no");
-
- src_brickinfo->port = pmap_registry_search (this,
- src_brickinfo->path, GF_PMAP_PORT_BRICKSERVER);
- if (!src_brickinfo->port &&
- replace_op != GF_REPLACE_OP_COMMIT_FORCE ) {
- gf_log ("", GF_LOG_ERROR,
- "Src brick port not available");
- ret = -1;
- goto out;
- }
-
- if (rsp_dict) {
- ret = dict_set_int32 (rsp_dict, "src-brick-port", src_brickinfo->port);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "Could not set src-brick port no");
- goto out;
- }
- }
-
- ctx = glusterd_op_get_ctx ();
- if (ctx) {
- ret = dict_set_int32 (ctx, "src-brick-port", src_brickinfo->port);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "Could not set src-brick port no");
- goto out;
- }
- }
-
- }
+ char *brick_mount_dir = NULL;
+ glusterd_brickinfo_t *old_brickinfo = NULL;
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ struct statvfs brickstat = {
+ 0,
+ };
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(volinfo);
+
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ ret = glusterd_brickinfo_new_from_brick(new_brick, &new_brickinfo, _gf_true,
+ NULL);
+ if (ret)
+ goto out;
+
+ ret = glusterd_resolve_brick(new_brickinfo);
+ if (ret)
+ goto out;
+
+ if (!gf_uuid_compare(new_brickinfo->uuid, MY_UUID)) {
+ ret = sys_statvfs(new_brickinfo->path, &brickstat);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_STATVFS_FAILED,
+ "Failed to fetch disk utilization "
+ "from the brick (%s:%s). Please check the health of "
+ "the brick. Error code was %s",
+ new_brickinfo->hostname, new_brickinfo->path,
+ strerror(errno));
+
+ goto out;
+ }
+ new_brickinfo->statfs_fsid = brickstat.f_fsid;
+ }
+
+ ret = glusterd_volume_brickinfo_get_by_brick(old_brick, volinfo,
+ &old_brickinfo, _gf_false);
+ if (ret)
+ goto out;
+
+ (void)snprintf(new_brickinfo->brick_id, sizeof(new_brickinfo->brick_id),
+ "%s", old_brickinfo->brick_id);
+ new_brickinfo->port = old_brickinfo->port;
+
+ /* A bricks mount dir is required only by snapshots which were
+ * introduced in gluster-3.6.0
+ */
+ if (conf->op_version >= GD_OP_VERSION_3_6_0) {
+ ret = dict_get_strn(dict, "brick1.mount_dir", SLEN("brick1.mount_dir"),
+ &brick_mount_dir);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
+ "brick1.mount_dir not present");
+ goto out;
+ }
+ (void)snprintf(new_brickinfo->mount_dir,
+ sizeof(new_brickinfo->mount_dir), "%s", brick_mount_dir);
+ }
+
+ cds_list_add(&new_brickinfo->brick_list, &old_brickinfo->brick_list);
+
+ volinfo->brick_count++;
+
+ ret = glusterd_op_perform_remove_brick(volinfo, old_brick, 1, NULL);
+ if (ret)
+ goto out;
+
+ /* if the volume is a replicate volume, do: */
+ if (glusterd_is_volume_replicate(volinfo)) {
+ if (!gf_uuid_compare(new_brickinfo->uuid, MY_UUID)) {
+ ret = glusterd_handle_replicate_brick_ops(volinfo, new_brickinfo,
+ GD_OP_REPLACE_BRICK);
+ if (ret < 0)
+ goto out;
+ }
+ }
+
+ ret = glusterd_create_volfiles_and_notify_services(volinfo);
+ if (ret)
+ goto out;
+
+ if (GLUSTERD_STATUS_STARTED == volinfo->status) {
+ ret = glusterd_brick_start(volinfo, new_brickinfo, _gf_false,
+ _gf_false);
+ if (ret)
+ goto out;
+ }
out:
- return ret;
-
-}
-
-static int
-rb_update_dstbrick_port (glusterd_brickinfo_t *dst_brickinfo, dict_t *rsp_dict,
- dict_t *req_dict, int32_t replace_op)
-{
- dict_t *ctx = NULL;
- int ret = 0;
- int dict_ret = 0;
- int dst_port = 0;
-
- ctx = glusterd_op_get_ctx ();
- if (ctx) {
- dict_ret = dict_get_int32 (req_dict, "dst-brick-port", &dst_port);
- if (dst_port)
- dst_brickinfo->port = dst_port;
- }
-
- if (!glusterd_is_local_addr (dst_brickinfo->hostname)) {
- gf_log ("", GF_LOG_INFO,
- "adding dst-brick port no");
-
- if (rsp_dict) {
- ret = dict_set_int32 (rsp_dict, "dst-brick-port",
- dst_brickinfo->port);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "Could not set dst-brick port no in rsp dict");
- goto out;
- }
- }
-
- ctx = glusterd_op_get_ctx ();
- if (ctx) {
- ret = dict_set_int32 (ctx, "dst-brick-port",
- dst_brickinfo->port);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "Could not set dst-brick port no");
- goto out;
- }
- }
- }
-out:
- return ret;
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
}
-static int
-glusterd_op_perform_replace_brick (glusterd_volinfo_t *volinfo,
- char *old_brick, char *new_brick)
+int
+glusterd_op_replace_brick(dict_t *dict, dict_t *rsp_dict)
{
- glusterd_brickinfo_t *old_brickinfo = NULL;
- glusterd_brickinfo_t *new_brickinfo = NULL;
- int32_t ret = -1;
-
- GF_ASSERT (volinfo);
-
- ret = glusterd_brickinfo_from_brick (new_brick,
- &new_brickinfo);
- if (ret)
- goto out;
-
- ret = glusterd_volume_brickinfo_get_by_brick (old_brick, volinfo,
- &old_brickinfo,
- GF_PATH_COMPLETE);
- if (ret)
- goto out;
-
- ret = glusterd_resolve_brick (new_brickinfo);
- if (ret)
- goto out;
-
- list_add_tail (&new_brickinfo->brick_list,
- &old_brickinfo->brick_list);
-
- volinfo->brick_count++;
-
- ret = glusterd_op_perform_remove_brick (volinfo, old_brick, 1, NULL);
- if (ret)
- goto out;
-
- ret = glusterd_create_volfiles_and_notify_services (volinfo);
- if (ret)
- goto out;
-
- if (GLUSTERD_STATUS_STARTED == volinfo->status) {
- ret = glusterd_brick_start (volinfo, new_brickinfo);
- if (ret)
- goto out;
- }
-
+ int ret = 0;
+ char *replace_op = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ glusterd_brickinfo_t *src_brickinfo = NULL;
+ glusterd_brickinfo_t *dst_brickinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_strn(dict, "src-brick", SLEN("src-brick"), &src_brick);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get src brick");
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0, "src brick=%s", src_brick);
+
+ ret = dict_get_strn(dict, "dst-brick", SLEN("dst-brick"), &dst_brick);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get dst brick");
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0, "dst brick=%s", dst_brick);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "operation", SLEN("operation"), &replace_op);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "dict_get on operation failed");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Unable to allocate memory");
+ goto out;
+ }
+
+ ret = glusterd_volume_brickinfo_get_by_brick(src_brick, volinfo,
+ &src_brickinfo, _gf_false);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Unable to get src-brickinfo");
+ goto out;
+ }
+
+ ret = glusterd_get_rb_dst_brickinfo(volinfo, &dst_brickinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RB_BRICKINFO_GET_FAIL,
+ "Unable to get "
+ "replace brick destination brickinfo");
+ goto out;
+ }
+
+ ret = glusterd_resolve_brick(dst_brickinfo);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Unable to resolve dst-brickinfo");
+ goto out;
+ }
+
+ ret = rb_update_dstbrick_port(dst_brickinfo, rsp_dict, dict);
+ if (ret)
+ goto out;
+
+ if (strcmp(replace_op, "GF_REPLACE_OP_COMMIT_FORCE")) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_svcs_stop(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTER_SERVICES_STOP_FAIL,
+ "Unable to stop gluster services, ret: %d", ret);
+ }
+
+ ret = glusterd_op_perform_replace_brick(volinfo, src_brick, dst_brick,
+ dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_BRICK_ADD_FAIL,
+ "Unable to add dst-brick: "
+ "%s to volume: %s",
+ dst_brick, volinfo->volname);
+ (void)glusterd_svcs_manager(volinfo);
+ goto out;
+ }
+
+ volinfo->rebal.defrag_status = 0;
+
+ ret = glusterd_svcs_manager(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_GLUSTER_SERVICE_START_FAIL,
+ "Failed to start one or more gluster services.");
+ }
+
+ ret = glusterd_fetchspec_notify(THIS);
+ glusterd_brickinfo_delete(volinfo->rep_brick.dst_brick);
+ volinfo->rep_brick.src_brick = NULL;
+ volinfo->rep_brick.dst_brick = NULL;
+
+ if (!ret)
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RBOP_STATE_STORE_FAIL,
+ "Couldn't store"
+ " replace brick operation's state");
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
}
int
-glusterd_op_replace_brick (dict_t *dict, dict_t *rsp_dict)
+glusterd_mgmt_v3_initiate_replace_brick_cmd_phases(rpcsvc_request_t *req,
+ glusterd_op_t op,
+ dict_t *dict)
{
- int ret = 0;
- dict_t *ctx = NULL;
- int replace_op = 0;
- glusterd_volinfo_t *volinfo = NULL;
- char *volname = NULL;
- xlator_t *this = NULL;
- glusterd_conf_t *priv = NULL;
- char *src_brick = NULL;
- char *dst_brick = NULL;
- glusterd_brickinfo_t *src_brickinfo = NULL;
- glusterd_brickinfo_t *dst_brickinfo = NULL;
-
- this = THIS;
- GF_ASSERT (this);
-
- priv = this->private;
- GF_ASSERT (priv);
-
- ret = dict_get_str (dict, "src-brick", &src_brick);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get src brick");
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "src brick=%s", src_brick);
-
- ret = dict_get_str (dict, "dst-brick", &dst_brick);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get dst brick");
- goto out;
- }
-
- gf_log (this->name, GF_LOG_DEBUG,
- "dst brick=%s", dst_brick);
-
- ret = dict_get_str (dict, "volname", &volname);
-
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
- goto out;
- }
-
- ret = dict_get_int32 (dict, "operation", (int32_t *)&replace_op);
- if (ret) {
- gf_log (this->name, GF_LOG_DEBUG,
- "dict_get on operation failed");
- goto out;
- }
-
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
- goto out;
- }
-
- ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo,
- &src_brickinfo,
- GF_PATH_COMPLETE);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG, "Unable to get src-brickinfo");
- goto out;
- }
-
-
- ret = glusterd_get_rb_dst_brickinfo (volinfo, &dst_brickinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get "
- "replace brick destination brickinfo");
- goto out;
- }
-
- ret = glusterd_resolve_brick (dst_brickinfo);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG, "Unable to resolve dst-brickinfo");
- goto out;
- }
-
- ret = rb_update_srcbrick_port (src_brickinfo, rsp_dict,
- dict, replace_op);
- if (ret)
- goto out;
-
- if ((GF_REPLACE_OP_START != replace_op)) {
- ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict,
- dict, replace_op);
- if (ret)
- goto out;
- }
-
- switch (replace_op) {
- case GF_REPLACE_OP_START:
- {
- if (!glusterd_is_local_addr (dst_brickinfo->hostname)) {
- gf_log ("", GF_LOG_INFO,
- "I AM THE DESTINATION HOST");
- if (!glusterd_is_rb_paused (volinfo)) {
- ret = rb_spawn_destination_brick (volinfo, dst_brickinfo);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "Failed to spawn destination brick");
- goto out;
- }
- } else {
- gf_log ("", GF_LOG_ERROR, "Replace brick is already "
- "started=> no need to restart dst brick ");
- }
- }
-
-
- if (!glusterd_is_local_addr (src_brickinfo->hostname)) {
- ret = rb_src_brick_restart (volinfo, src_brickinfo,
- 1);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "Could not restart src-brick");
- goto out;
- }
- }
-
- if (!glusterd_is_local_addr (dst_brickinfo->hostname)) {
- gf_log ("", GF_LOG_INFO,
- "adding dst-brick port no");
-
- ret = rb_update_dstbrick_port (dst_brickinfo, rsp_dict,
- dict, replace_op);
- if (ret)
- goto out;
- }
-
- glusterd_set_rb_status (volinfo, GF_RB_STATUS_STARTED);
- break;
- }
-
- case GF_REPLACE_OP_COMMIT:
- {
- ctx = glusterd_op_get_ctx ();
- if (ctx) {
- ret = rb_do_operation (volinfo, src_brickinfo,
- dst_brickinfo,
- GF_REPLACE_OP_COMMIT);
- if (ret) {
- gf_log ("", GF_LOG_ERROR,
- "Commit operation failed");
- goto out;
- }
- }
- }
- /* fall through */
- case GF_REPLACE_OP_COMMIT_FORCE:
- {
- ret = dict_set_int32 (volinfo->dict, "enable-pump", 0);
- gf_log (THIS->name, GF_LOG_DEBUG,
- "Received commit - will be adding dst brick and "
- "removing src brick");
-
- if (!glusterd_is_local_addr (dst_brickinfo->hostname)) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "I AM THE DESTINATION HOST");
- ret = rb_kill_destination_brick (volinfo, dst_brickinfo);
- if (ret) {
- gf_log (THIS->name, GF_LOG_CRITICAL,
- "Unable to cleanup dst brick");
- goto out;
- }
- }
-
- ret = glusterd_nodesvcs_stop (volinfo);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Unable to stop nfs server, ret: %d", ret);
- }
-
- ret = glusterd_op_perform_replace_brick (volinfo, src_brick,
- dst_brick);
- if (ret) {
- gf_log (THIS->name, GF_LOG_CRITICAL, "Unable to add "
- "dst-brick: %s to volume: %s",
- dst_brick, volinfo->volname);
- (void) glusterd_nodesvcs_handle_graph_change (volinfo);
- goto out;
- }
-
- volinfo->defrag_status = 0;
-
- ret = glusterd_nodesvcs_handle_graph_change (volinfo);
- if (ret) {
- gf_log (THIS->name, GF_LOG_CRITICAL,
- "Failed to generate nfs volume file");
- }
-
-
- ret = glusterd_fetchspec_notify (THIS);
- glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE);
- glusterd_brickinfo_delete (volinfo->dst_brick);
- volinfo->src_brick = volinfo->dst_brick = NULL;
- }
- break;
-
- case GF_REPLACE_OP_PAUSE:
- {
- gf_log ("", GF_LOG_DEBUG,
- "Received pause - doing nothing");
- ctx = glusterd_op_get_ctx ();
- if (ctx) {
- ret = rb_do_operation (volinfo, src_brickinfo,
- dst_brickinfo,
- GF_REPLACE_OP_PAUSE);
- if (ret) {
- gf_log ("", GF_LOG_ERROR,
- "Pause operation failed");
- goto out;
- }
- }
-
- glusterd_set_rb_status (volinfo, GF_RB_STATUS_PAUSED);
- }
- break;
-
- case GF_REPLACE_OP_ABORT:
- {
-
- ctx = glusterd_op_get_ctx ();
- if (ctx) {
- ret = rb_do_operation (volinfo, src_brickinfo,
- dst_brickinfo,
- GF_REPLACE_OP_ABORT);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Abort operation failed");
- goto out;
- }
- }
-
- ret = dict_set_int32 (volinfo->dict, "enable-pump", 0);
- if (ret) {
- gf_log (THIS->name, GF_LOG_CRITICAL, "Unable to disable pump");
- }
-
- if (!glusterd_is_local_addr (src_brickinfo->hostname)) {
- ret = rb_src_brick_restart (volinfo, src_brickinfo,
- 0);
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Couldn't restart src brick "
- "with pump xlator disabled.");
- goto out;
- }
- }
-
- if (!glusterd_is_local_addr (dst_brickinfo->hostname)) {
- gf_log (THIS->name, GF_LOG_INFO,
- "I AM THE DESTINATION HOST");
- ret = rb_kill_destination_brick (volinfo, dst_brickinfo);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "Failed to kill destination brick");
- goto out;
- }
- }
- glusterd_set_rb_status (volinfo, GF_RB_STATUS_NONE);
- glusterd_brickinfo_delete (volinfo->dst_brick);
- volinfo->src_brick = volinfo->dst_brick = NULL;
- }
- break;
-
- case GF_REPLACE_OP_STATUS:
- {
- gf_log ("", GF_LOG_DEBUG,
- "received status - doing nothing");
- ctx = glusterd_op_get_ctx ();
- if (ctx) {
- if (glusterd_is_rb_paused (volinfo)) {
- ret = dict_set_str (ctx, "status-reply",
- "replace brick has been paused");
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to set pump status"
- " in ctx");
- goto out;
- }
-
- ret = rb_do_operation (volinfo, src_brickinfo,
- dst_brickinfo,
- GF_REPLACE_OP_STATUS);
- if (ret)
- goto out;
- }
-
- }
- break;
-
- default:
- ret = -1;
- goto out;
- }
- if (!ret && replace_op != GF_REPLACE_OP_STATUS)
- ret = glusterd_store_volinfo (volinfo,
- GLUSTERD_VOLINFO_VER_AC_INCREMENT);
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR, "Couldn't store"
- " replace brick operation's state");
+ int32_t ret = -1;
+ int32_t op_ret = -1;
+ uint32_t txn_generation = 0;
+ uint32_t op_errno = 0;
+ char *op_errstr = NULL;
+ dict_t *req_dict = NULL;
+ dict_t *tmp_dict = NULL;
+ uuid_t *originator_uuid = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t is_acquired = _gf_false;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(dict);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ txn_generation = conf->generation;
+ originator_uuid = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!originator_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy(*originator_uuid, MY_UUID);
+ ret = dict_set_bin(dict, "originator_uuid", originator_uuid,
+ sizeof(uuid_t));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set originator_uuid.");
+ GF_FREE(originator_uuid);
+ goto out;
+ }
+
+ ret = dict_set_int32n(dict, "is_synctasked", SLEN("is_synctasked"),
+ _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set synctasked flag to true.");
+ goto out;
+ }
+
+ tmp_dict = dict_new();
+ if (!tmp_dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Unable to create dict");
+ goto out;
+ }
+ dict_copy(dict, tmp_dict);
+
+ ret = glusterd_mgmt_v3_initiate_lockdown(op, dict, &op_errstr, &op_errno,
+ &is_acquired, txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCKDOWN_FAIL,
+ "mgmt_v3 lockdown failed.");
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_build_payload(&req_dict, &op_errstr, dict, op);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_PAYLOAD_BUILD_FAIL,
+ LOGSTR_BUILD_PAYLOAD, gd_op_list[op]);
+ if (op_errstr == NULL)
+ gf_asprintf(&op_errstr, OPERRSTR_BUILD_PAYLOAD);
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_pre_validate(op, req_dict, &op_errstr, &op_errno,
+ txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL,
+ "Pre Validation Failed");
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_commit(op, dict, req_dict, &op_errstr, &op_errno,
+ txn_generation);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_OP_FAIL,
+ "Commit Op Failed");
+ goto out;
+ }
+
+ ret = 0;
out:
- return ret;
-}
-
-void
-glusterd_do_replace_brick (void *data)
-{
- glusterd_volinfo_t *volinfo = NULL;
- int32_t op = 0;
- int32_t src_port = 0;
- int32_t dst_port = 0;
- dict_t *dict = NULL;
- char *src_brick = NULL;
- char *dst_brick = NULL;
- char *volname = NULL;
- glusterd_brickinfo_t *src_brickinfo = NULL;
- glusterd_brickinfo_t *dst_brickinfo = NULL;
- glusterd_conf_t *priv = NULL;
-
- int ret = 0;
-
- dict = data;
-
- GF_ASSERT (THIS);
-
- priv = THIS->private;
+ op_ret = ret;
- if (priv->timer) {
- gf_timer_call_cancel (THIS->ctx, priv->timer);
- priv->timer = NULL;
- gf_log ("", GF_LOG_DEBUG,
- "Cancelling timer thread");
- }
+ (void)glusterd_mgmt_v3_release_peer_locks(op, dict, op_ret, &op_errstr,
+ is_acquired, txn_generation);
- gf_log ("", GF_LOG_DEBUG,
- "Replace brick operation detected");
-
- ret = dict_get_int32 (dict, "operation", &op);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "dict_get on operation failed");
- goto out;
- }
- ret = dict_get_str (dict, "src-brick", &src_brick);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get src brick");
- goto out;
- }
-
- gf_log ("", GF_LOG_DEBUG,
- "src brick=%s", src_brick);
-
- ret = dict_get_str (dict, "dst-brick", &dst_brick);
+ if (is_acquired) {
+ ret = glusterd_multiple_mgmt_v3_unlock(tmp_dict, MY_UUID);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get dst brick");
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FAIL,
+ "Failed to release mgmt_v3 locks on "
+ "localhost.");
+ op_ret = ret;
}
+ }
+ /* SEND CLI RESPONSE */
+ glusterd_op_send_cli_response(op, op_ret, op_errno, req, dict, op_errstr);
- gf_log ("", GF_LOG_DEBUG,
- "dst brick=%s", dst_brick);
-
- ret = dict_get_str (dict, "volname", &volname);
+ if (req_dict)
+ dict_unref(req_dict);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
- goto out;
- }
-
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to allocate memory");
- goto out;
- }
-
- ret = glusterd_volume_brickinfo_get_by_brick (src_brick, volinfo,
- &src_brickinfo,
- GF_PATH_COMPLETE);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG, "Unable to get src-brickinfo");
- goto out;
- }
-
- ret = glusterd_get_rb_dst_brickinfo (volinfo, &dst_brickinfo);
- if (!dst_brickinfo) {
- gf_log ("", GF_LOG_DEBUG, "Unable to get dst-brickinfo");
- goto out;
- }
-
- ret = glusterd_resolve_brick (dst_brickinfo);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG, "Unable to resolve dst-brickinfo");
- goto out;
- }
-
- ret = dict_get_int32 (dict, "src-brick-port", &src_port);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get src-brick port");
- goto out;
- }
-
- ret = dict_get_int32 (dict, "dst-brick-port", &dst_port);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get dst-brick port");
- }
-
- dst_brickinfo->port = dst_port;
- src_brickinfo->port = src_port;
-
- switch (op) {
- case GF_REPLACE_OP_START:
- if (!dst_port) {
- ret = -1;
- goto out;
- }
-
- ret = rb_do_operation (volinfo, src_brickinfo, dst_brickinfo,
- GF_REPLACE_OP_START);
- if (ret)
- goto out;
- break;
- case GF_REPLACE_OP_PAUSE:
- case GF_REPLACE_OP_ABORT:
- case GF_REPLACE_OP_COMMIT:
- case GF_REPLACE_OP_COMMIT_FORCE:
- case GF_REPLACE_OP_STATUS:
- break;
- default:
- ret = -1;
- goto out;
- }
-
-out:
- if (ret)
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, NULL);
- else
- ret = glusterd_op_sm_inject_event (GD_OP_EVENT_COMMIT_ACC, NULL);
+ if (tmp_dict)
+ dict_unref(tmp_dict);
- if (dict)
- dict_unref (dict);
+ if (op_errstr) {
+ GF_FREE(op_errstr);
+ op_errstr = NULL;
+ }
- glusterd_op_sm ();
+ return 0;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-reset-brick.c b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
new file mode 100644
index 00000000000..e4d247a1d6c
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-reset-brick.c
@@ -0,0 +1,376 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/common-utils.h>
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+#include <glusterfs/glusterfs.h>
+#include "glusterd.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-geo-rep.h"
+#include "glusterd-store.h"
+#include "glusterd-utils.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-volgen.h"
+#include "glusterd-messages.h"
+#include "glusterd-mgmt.h"
+#include <glusterfs/run.h>
+#include <glusterfs/syscall.h>
+
+#include <signal.h>
+
+int
+glusterd_reset_brick_prevalidate(dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = 0;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ char *volname = NULL;
+ char *op = NULL;
+ glusterd_op_t gd_op = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *src_brickinfo = NULL;
+ char *host = NULL;
+ char msg[2048] = {0};
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_brickinfo_t *dst_brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ char pidfile[PATH_MAX] = {0};
+ xlator_t *this = NULL;
+ gf_boolean_t is_force = _gf_false;
+ int32_t ignore_partition = 0;
+ pid_t pid = -1;
+ uuid_t volume_id = {
+ 0,
+ };
+ char *dup_dstbrick = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = glusterd_brick_op_prerequisites(dict, &op, &gd_op, &volname, &volinfo,
+ &src_brick, &src_brickinfo, pidfile,
+ op_errstr, rsp_dict);
+ if (ret)
+ goto out;
+
+ if (!strcmp(op, "GF_RESET_OP_START"))
+ goto done;
+
+ if (!strcmp(op, "GF_RESET_OP_COMMIT_FORCE"))
+ is_force = _gf_true;
+
+ ret = glusterd_get_dst_brick_info(&dst_brick, volname, op_errstr,
+ &dst_brickinfo, &host, dict,
+ &dup_dstbrick);
+ if (ret)
+ goto out;
+
+ ret = glusterd_new_brick_validate(dst_brick, dst_brickinfo, msg,
+ sizeof(msg), op);
+ /* if bricks are not same and reset brick was used, fail command.
+ * Only replace brick should be used to replace with new bricks
+ * to the volume.
+ */
+ if (ret == 0) {
+ if (!gf_uuid_compare(MY_UUID, dst_brickinfo->uuid)) {
+ ret = -1;
+ *op_errstr = gf_strdup(
+ "When destination brick is new,"
+ " please use"
+ " gluster volume "
+ "replace-brick <volname> "
+ "<src-brick> <dst-brick> "
+ "commit force");
+ if (*op_errstr)
+ gf_msg(this->name, GF_LOG_ERROR, EPERM,
+ GD_MSG_BRICK_VALIDATE_FAIL, "%s", *op_errstr);
+ goto out;
+ }
+ } else if (ret == 1) {
+ if (gf_is_service_running(pidfile, &pid)) {
+ ret = -1;
+ *op_errstr = gf_strdup(
+ "Source brick"
+ " must be stopped."
+ " Please use "
+ "gluster volume "
+ "reset-brick <volname> "
+ "<dst-brick> start.");
+ if (*op_errstr)
+ gf_msg(this->name, GF_LOG_ERROR, EPERM,
+ GD_MSG_BRICK_VALIDATE_FAIL, "%s", *op_errstr);
+ goto out;
+ }
+ ret = sys_lgetxattr(dst_brickinfo->path, GF_XATTR_VOL_ID_KEY, volume_id,
+ 16);
+ if (gf_uuid_compare(dst_brickinfo->uuid, src_brickinfo->uuid) ||
+ (ret >= 0 && is_force == _gf_false)) {
+ ret = -1;
+ *op_errstr = gf_strdup(
+ "Brick not available."
+ "It may be containing "
+ "or be contained "
+ "by an existing brick."
+ "Use 'force' option to "
+ "override this.");
+ if (*op_errstr)
+ gf_msg(this->name, GF_LOG_ERROR, EPERM,
+ GD_MSG_BRICK_VALIDATE_FAIL, "%s", *op_errstr);
+ goto out;
+ }
+ ret = 0;
+ } else {
+ *op_errstr = gf_strdup(msg);
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_VALIDATE_FAIL, "%s",
+ *op_errstr);
+ goto out;
+ }
+
+ volinfo->rep_brick.src_brick = src_brickinfo;
+ volinfo->rep_brick.dst_brick = dst_brickinfo;
+
+ ret = dict_get_int32n(dict, "ignore-partition", SLEN("ignore-partition"),
+ &ignore_partition);
+ ret = 0;
+ if (gf_is_local_addr(host)) {
+ ret = glusterd_validate_and_create_brickpath(
+ dst_brickinfo, volinfo->volume_id, volinfo->volname, op_errstr,
+ is_force, ignore_partition);
+ if (ret)
+ goto out;
+ } else {
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find(NULL, host);
+ if (peerinfo == NULL) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ snprintf(msg, sizeof(msg), "%s, is not a friend.", host);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+
+ } else if (!peerinfo->connected) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "%s,"
+ "is not connected at "
+ "the moment.",
+ host);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+
+ } else if (GD_FRIEND_STATE_BEFRIENDED != peerinfo->state.state) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ snprintf(msg, sizeof(msg),
+ "%s, is not befriended "
+ "at the moment.",
+ host);
+ *op_errstr = gf_strdup(msg);
+ goto out;
+ }
+ RCU_READ_UNLOCK;
+ }
+
+ if (!(gf_uuid_compare(dst_brickinfo->uuid, MY_UUID))) {
+ ret = glusterd_get_brick_mount_dir(dst_brickinfo->path,
+ dst_brickinfo->hostname,
+ dst_brickinfo->mount_dir);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_MOUNTDIR_GET_FAIL,
+ "Failed to get brick mount_dir");
+ goto out;
+ }
+ ret = dict_set_dynstr_with_alloc(rsp_dict, "brick1.mount_dir",
+ dst_brickinfo->mount_dir);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set brick.mount_dir");
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32n(rsp_dict, "brick_count", SLEN("brick_count"), 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set local_brick_count.");
+ goto out;
+ }
+
+done:
+ ret = 0;
+out:
+ GF_FREE(dup_dstbrick);
+ gf_msg_debug(this->name, 0, "Returning %d.", ret);
+
+ return ret;
+}
+
+int
+glusterd_op_reset_brick(dict_t *dict, dict_t *rsp_dict)
+{
+ int ret = 0;
+ char *op = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *src_brick = NULL;
+ char *dst_brick = NULL;
+ glusterd_brickinfo_t *src_brickinfo = NULL;
+ glusterd_brickinfo_t *dst_brickinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_strn(dict, "operation", SLEN("operation"), &op);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "dict_get on operation failed");
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret)
+ goto out;
+
+ ret = dict_get_strn(dict, "src-brick", SLEN("src-brick"), &src_brick);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get src brick");
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0, "src brick=%s", src_brick);
+
+ ret = glusterd_volume_brickinfo_get_by_brick(src_brick, volinfo,
+ &src_brickinfo, _gf_false);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Unable to get src-brickinfo");
+ goto out;
+ }
+
+ if (!strcmp(op, "GF_RESET_OP_START")) {
+ ret = glusterd_volume_stop_glusterfs(volinfo, src_brickinfo, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_BRICK_STOP_FAIL,
+ "Unable to stop"
+ " brick: %s:%s",
+ src_brickinfo->hostname, src_brickinfo->path);
+ }
+
+ goto out;
+
+ } else if (!strcmp(op, "GF_RESET_OP_COMMIT") ||
+ !strcmp(op, "GF_RESET_OP_COMMIT_FORCE")) {
+ ret = dict_get_strn(dict, "dst-brick", SLEN("dst-brick"), &dst_brick);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get dst brick");
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0, "dst brick=%s", dst_brick);
+
+ ret = glusterd_get_rb_dst_brickinfo(volinfo, &dst_brickinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RB_BRICKINFO_GET_FAIL,
+ "Unable to get "
+ "reset brick "
+ "destination brickinfo");
+ goto out;
+ }
+
+ ret = glusterd_resolve_brick(dst_brickinfo);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Unable to resolve dst-brickinfo");
+ goto out;
+ }
+
+ ret = rb_update_dstbrick_port(dst_brickinfo, rsp_dict, dict);
+ if (ret)
+ goto out;
+
+ if (gf_uuid_compare(dst_brickinfo->uuid, MY_UUID)) {
+ gf_msg_debug(this->name, 0, "I AM THE DESTINATION HOST");
+ ret = glusterd_volume_stop_glusterfs(volinfo, src_brickinfo,
+ _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_BRICK_STOP_FAIL,
+ "Unable to stop brick: %s:%s", src_brickinfo->hostname,
+ src_brickinfo->path);
+ goto out;
+ }
+ }
+
+ ret = glusterd_svcs_stop(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTER_SERVICES_STOP_FAIL,
+ "Unable to stop gluster services, ret: %d", ret);
+ goto out;
+ }
+ ret = glusterd_op_perform_replace_brick(volinfo, src_brick, dst_brick,
+ dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_BRICK_ADD_FAIL,
+ "Unable to add dst-brick: "
+ "%s to volume: %s",
+ dst_brick, volinfo->volname);
+ (void)glusterd_svcs_manager(volinfo);
+ goto out;
+ }
+
+ volinfo->rebal.defrag_status = 0;
+
+ ret = glusterd_svcs_manager(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_GLUSTER_SERVICE_START_FAIL,
+ "Failed to start one or more gluster services.");
+ }
+
+ ret = glusterd_fetchspec_notify(THIS);
+ glusterd_brickinfo_delete(volinfo->rep_brick.dst_brick);
+ volinfo->rep_brick.src_brick = NULL;
+ volinfo->rep_brick.dst_brick = NULL;
+
+ if (!ret)
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RBOP_STATE_STORE_FAIL,
+ "Couldn't store"
+ " reset brick operation's state.");
+ }
+ } else {
+ ret = -1;
+ goto out;
+ }
+
+out:
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
index 240f808fe25..88662e3bbae 100644
--- a/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
+++ b/xlators/mgmt/glusterd/src/glusterd-rpc-ops.c
@@ -1,27 +1,12 @@
/*
- Copyright (c) 2010-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
-
+ Copyright (c) 2010-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#include "rpc-clnt.h"
#include "glusterd1-xdr.h"
@@ -29,110 +14,107 @@
#include "xdr-generic.h"
-#include "compat-errno.h"
+#include <glusterfs/compat-errno.h>
#include "glusterd-op-sm.h"
#include "glusterd-sm.h"
#include "glusterd.h"
#include "protocol-common.h"
#include "glusterd-utils.h"
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
+#include "glusterd-messages.h"
+#include "glusterd-snapshot-utils.h"
#include <sys/uio.h>
+#define SERVER_PATH_MAX (16 * 1024)
-#define SERVER_PATH_MAX (16 * 1024)
-
+#define GLUSTERD_STACK_DESTROY(frame) \
+ do { \
+ frame->local = NULL; \
+ STACK_DESTROY(frame->root); \
+ } while (0)
extern glusterd_op_info_t opinfo;
+extern uuid_t global_txn_id;
int32_t
-glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,
- int32_t op_errno, rpcsvc_request_t *req,
- void *op_ctx, char *op_errstr)
+glusterd_op_send_cli_response(glusterd_op_t op, int32_t op_ret,
+ int32_t op_errno, rpcsvc_request_t *req,
+ void *op_ctx, char *op_errstr)
{
- int32_t ret = -1;
- void *cli_rsp = NULL;
- dict_t *ctx = NULL;
- char *free_ptr = NULL;
- glusterd_conf_t *conf = NULL;
- xdrproc_t xdrproc = NULL;
- char *errstr = NULL;
- int32_t status = 0;
- int32_t count = 0;
- gf_cli_rsp rsp = {0,};
-
- GF_ASSERT (THIS);
-
- conf = THIS->private;
-
- GF_ASSERT (conf);
-
- ctx = op_ctx;
-
- switch (op) {
- case GD_OP_REMOVE_BRICK:
- {
- if (ctx)
- ret = dict_get_str (ctx, "errstr", &errstr);
- break;
- }
- case GD_OP_RESET_VOLUME:
- {
- if (op_ret && !op_errstr)
- errstr = "Error while resetting options";
- break;
+ int32_t ret = -1;
+ void *cli_rsp = NULL;
+ dict_t *ctx = NULL;
+ char *free_ptr = NULL;
+ glusterd_conf_t *conf = NULL;
+ xdrproc_t xdrproc = NULL;
+ char *errstr = NULL;
+ int32_t status = 0;
+ int32_t count = 0;
+ gf_cli_rsp rsp = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+
+ GF_ASSERT(conf);
+
+ ctx = op_ctx;
+
+ switch (op) {
+ case GD_OP_REMOVE_BRICK: {
+ if (ctx)
+ ret = dict_get_strn(ctx, "errstr", SLEN("errstr"), &errstr);
+ break;
+ }
+ case GD_OP_RESET_VOLUME: {
+ if (op_ret && !op_errstr)
+ errstr = "Error while resetting options";
+ break;
}
case GD_OP_REBALANCE:
- case GD_OP_DEFRAG_BRICK_VOLUME:
- {
- if (ctx) {
- ret = dict_get_int32 (ctx, "status", &status);
- if (ret) {
- gf_log (THIS->name, GF_LOG_TRACE,
- "failed to get status");
- }
- }
- break;
- }
- case GD_OP_GSYNC_SET:
- {
- if (ctx) {
- ret = dict_get_str (ctx, "errstr", &errstr);
- ret = dict_set_str (ctx, "glusterd_workdir", conf->workdir);
- /* swallow error here, that will be re-triggered in cli */
-
- }
- break;
-
- }
- case GD_OP_QUOTA:
- {
- if (ctx && !op_errstr) {
- ret = dict_get_str (ctx, "errstr", &errstr);
+ case GD_OP_DEFRAG_BRICK_VOLUME: {
+ if (ctx) {
+ ret = dict_get_int32n(ctx, "status", SLEN("status"), &status);
+ if (ret) {
+ gf_msg_trace(this->name, 0, "failed to get status");
}
- break;
- }
- case GD_OP_PROFILE_VOLUME:
- {
- if (ctx && dict_get_int32 (ctx, "count", &count)) {
- ret = dict_set_int32 (ctx, "count", 0);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "failed to set count in dictionary");
- }
+ }
+ break;
+ }
+ case GD_OP_GSYNC_CREATE:
+ case GD_OP_GSYNC_SET: {
+ if (ctx) {
+ ret = dict_get_strn(ctx, "errstr", SLEN("errstr"), &errstr);
+ ret = dict_set_strn(ctx, "glusterd_workdir",
+ SLEN("glusterd_workdir"), conf->workdir);
+ /* swallow error here, that will be re-triggered in cli */
+ }
+ break;
+ }
+ case GD_OP_PROFILE_VOLUME: {
+ if (ctx && dict_get_int32n(ctx, "count", SLEN("count"), &count)) {
+ ret = dict_set_int32n(ctx, "count", SLEN("count"), 0);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set count in dictionary");
}
- break;
+ }
+ break;
}
case GD_OP_START_BRICK:
- case GD_OP_STOP_BRICK:
- {
- gf_log ("", GF_LOG_DEBUG, "not supported op %d", op);
- break;
+ case GD_OP_STOP_BRICK: {
+ gf_msg_debug(this->name, 0, "op '%s' not supported",
+ gd_op_list[op]);
+ break;
}
case GD_OP_NONE:
- case GD_OP_MAX:
- {
- gf_log ("", GF_LOG_ERROR, "invalid operation %d", op);
- break;
+ case GD_OP_MAX: {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_OP_UNSUPPORTED,
+ "invalid operation");
+ break;
}
case GD_OP_CREATE_VOLUME:
case GD_OP_START_VOLUME:
@@ -149,1902 +131,2318 @@ glusterd_op_send_cli_response (glusterd_op_t op, int32_t op_ret,
case GD_OP_LIST_VOLUME:
case GD_OP_CLEARLOCKS_VOLUME:
case GD_OP_HEAL_VOLUME:
- {
- /*nothing specific to be done*/
- break;
- }
- }
+ case GD_OP_QUOTA:
+ case GD_OP_SNAP:
+ case GD_OP_BARRIER:
+ case GD_OP_BITROT:
+ case GD_OP_SCRUB_STATUS:
+ case GD_OP_SCRUB_ONDEMAND:
+ case GD_OP_RESET_BRICK:
+ case GD_OP_MAX_OPVERSION:
+ case GD_OP_DETACH_NOT_STARTED:
+ case GD_OP_GANESHA:
+ case GD_OP_DETACH_TIER:
+ case GD_OP_TIER_MIGRATE:
+ case GD_OP_TIER_START_STOP:
+ case GD_OP_TIER_STATUS:
+ case GD_OP_DETACH_TIER_STATUS:
+ case GD_OP_REMOVE_TIER_BRICK:
+ case GD_OP_ADD_TIER_BRICK:
- rsp.op_ret = op_ret;
- rsp.op_errno = errno;
- if (errstr)
- rsp.op_errstr = errstr;
- else if (op_errstr)
- rsp.op_errstr = op_errstr;
-
- if (!rsp.op_errstr)
- rsp.op_errstr = "";
-
- if (ctx) {
- ret = dict_allocate_and_serialize (ctx, &rsp.dict.dict_val,
- (size_t*)&rsp.dict.dict_len);
- if (ret < 0 )
- gf_log (THIS->name, GF_LOG_ERROR, "failed to "
- "serialize buffer");
- else
- free_ptr = rsp.dict.dict_val;
- }
+ {
+ /*nothing specific to be done*/
+ break;
+ }
+ case GD_OP_COPY_FILE: {
+ if (ctx)
+ ret = dict_get_strn(ctx, "errstr", SLEN("errstr"), &errstr);
+ break;
+ }
+ case GD_OP_SYS_EXEC: {
+ if (ctx) {
+ ret = dict_get_strn(ctx, "errstr", SLEN("errstr"), &errstr);
+ ret = dict_set_strn(ctx, "glusterd_workdir",
+ SLEN("glusterd_workdir"), conf->workdir);
+ }
+ break;
+ }
+ }
+
+ rsp.op_ret = op_ret;
+ rsp.op_errno = op_errno;
+
+ if (errstr)
+ rsp.op_errstr = errstr;
+ else if (op_errstr)
+ rsp.op_errstr = op_errstr;
+
+ if (!rsp.op_errstr)
+ rsp.op_errstr = "";
+
+ if (ctx) {
+ ret = dict_allocate_and_serialize(ctx, &rsp.dict.dict_val,
+ &rsp.dict.dict_len);
+ if (ret < 0)
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ else
+ free_ptr = rsp.dict.dict_val;
+ }
- /* needed by 'rebalance status' */
- if (status)
- rsp.op_errno = status;
+ /* needed by 'rebalance status' */
+ if (status)
+ rsp.op_errno = status;
- cli_rsp = &rsp;
- xdrproc = (xdrproc_t) xdr_gf_cli_rsp;
+ cli_rsp = &rsp;
+ xdrproc = (xdrproc_t)xdr_gf_cli_rsp;
- ret = glusterd_submit_reply (req, cli_rsp, NULL, 0, NULL,
- xdrproc);
+ glusterd_to_cli(req, cli_rsp, NULL, 0, NULL, xdrproc, ctx);
+ ret = 0;
- if (free_ptr)
- GF_FREE (free_ptr);
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ GF_FREE(free_ptr);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
int
-glusterd3_1_probe_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+glusterd_big_locked_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe, fop_cbk_fn_t fn)
{
- gd1_mgmt_probe_rsp rsp = {{0},};
- int ret = 0;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_friend_sm_event_t *event = NULL;
- glusterd_probe_ctx_t *ctx = NULL;
-
- if (-1 == req->rpc_status) {
- goto out;
- }
-
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_probe_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- //rsp.op_ret = -1;
- //rsp.op_errno = EINVAL;
- goto out;
- }
-
- gf_log ("glusterd", GF_LOG_INFO,
- "Received probe resp from uuid: %s, host: %s",
- uuid_utoa (rsp.uuid), rsp.hostname);
- if (rsp.op_ret != 0) {
- ctx = ((call_frame_t *)myframe)->local;
- ((call_frame_t *)myframe)->local = NULL;
-
- GF_ASSERT (ctx);
-
- if (ctx->req) {
- glusterd_xfer_cli_probe_resp (ctx->req, rsp.op_ret,
- rsp.op_errno,
- rsp.op_errstr,
- ctx->hostname, ctx->port);
- }
-
- glusterd_destroy_probe_ctx (ctx);
- (void) glusterd_friend_remove (rsp.uuid, rsp.hostname);
- ret = rsp.op_ret;
- goto out;
- }
- ret = glusterd_friend_find (rsp.uuid, rsp.hostname, &peerinfo);
- if (ret) {
- GF_ASSERT (0);
- }
-
- if (strncasecmp (rsp.hostname, peerinfo->hostname, 1024)) {
- gf_log (THIS->name, GF_LOG_INFO, "Host: %s with uuid: %s "
- "already present in cluster with alias hostname: %s",
- rsp.hostname, uuid_utoa (rsp.uuid), peerinfo->hostname);
-
- ctx = ((call_frame_t *)myframe)->local;
- ((call_frame_t *)myframe)->local = NULL;
-
- GF_ASSERT (ctx);
-
- rsp.op_errno = GF_PROBE_FRIEND;
- if (ctx->req) {
- glusterd_xfer_cli_probe_resp (ctx->req, rsp.op_ret,
- rsp.op_errno,
- rsp.op_errstr,
- ctx->hostname, ctx->port);
- }
-
- glusterd_destroy_probe_ctx (ctx);
- (void) glusterd_friend_remove (NULL, rsp.hostname);
- ret = rsp.op_ret;
- goto out;
- }
-
- uuid_copy (peerinfo->uuid, rsp.uuid);
-
- ret = glusterd_friend_sm_new_event
- (GD_FRIEND_EVENT_INIT_FRIEND_REQ, &event);
-
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "Unable to get event");
- goto out;
- }
-
- event->peerinfo = peerinfo;
- event->ctx = ((call_frame_t *)myframe)->local;
- ((call_frame_t *)myframe)->local = NULL;
- ret = glusterd_friend_sm_inject_event (event);
-
-
- if (!ret) {
- glusterd_friend_sm ();
- glusterd_op_sm ();
- }
+ glusterd_conf_t *priv = THIS->private;
+ int ret = -1;
- gf_log ("glusterd", GF_LOG_INFO, "Received resp to probe req");
+ synclock_lock(&priv->big_lock);
+ ret = fn(req, iov, count, myframe);
+ synclock_unlock(&priv->big_lock);
-out:
- if (rsp.hostname)
- free (rsp.hostname);//malloced by xdr
- GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
- return ret;
+ return ret;
}
int
-glusterd3_1_friend_add_cbk (struct rpc_req * req, struct iovec *iov,
- int count, void *myframe)
+__glusterd_probe_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gd1_mgmt_friend_rsp rsp = {{0},};
- int ret = -1;
- glusterd_friend_sm_event_t *event = NULL;
- glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE;
- glusterd_peerinfo_t *peerinfo = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = -1;
- glusterd_probe_ctx_t *ctx = NULL;
- glusterd_friend_update_ctx_t *ev_ctx = NULL;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_friend_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- op_ret = rsp.op_ret;
- op_errno = rsp.op_errno;
-
- gf_log ("glusterd", GF_LOG_INFO,
- "Received %s from uuid: %s, host: %s, port: %d",
- (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid), rsp.hostname, rsp.port);
-
- ret = glusterd_friend_find (rsp.uuid, rsp.hostname, &peerinfo);
+ gd1_mgmt_probe_rsp rsp = {
+ {0},
+ };
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_probe_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ if (-1 == req->rpc_status) {
+ goto out;
+ }
+
+ this = THIS;
+ GF_ASSERT(this != NULL);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out);
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_probe_rsp);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RES_DECODE_FAIL, "error");
+ // rsp.op_ret = -1;
+ // rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_PROBE_REQ_RESP_RCVD,
+ "Received probe resp from uuid: %s, host: %s", uuid_utoa(rsp.uuid),
+ rsp.hostname);
+ if (rsp.op_ret != 0) {
+ ctx = ((call_frame_t *)myframe)->local;
+ ((call_frame_t *)myframe)->local = NULL;
+ GF_ASSERT(ctx);
+
+ if (ctx->req) {
+ glusterd_xfer_cli_probe_resp(ctx->req, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr, ctx->hostname,
+ ctx->port, ctx->dict);
+ }
+
+ glusterd_destroy_probe_ctx(ctx);
+ (void)glusterd_friend_remove(rsp.uuid, rsp.hostname);
+ ret = rsp.op_ret;
+ goto out;
+ }
+
+ RCU_READ_LOCK;
+ peerinfo = glusterd_peerinfo_find(rsp.uuid, rsp.hostname);
+ if (peerinfo == NULL) {
+ RCU_READ_UNLOCK
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
+ "Could not find peerd %s(%s)", rsp.hostname,
+ uuid_utoa(rsp.uuid));
+ goto out;
+ }
+
+ /*
+ * In the case of a fresh probe rsp.uuid and peerinfo.uuid will not
+ * match, as peerinfo->uuid will be NULL.
+ *
+ * In the case of a peer probe being done to add a new network to a
+ * peer, rsp.uuid will match an existing peerinfo.uuid. If we have this
+ * stage it means that the current address/hostname being used isn't
+ * present in the found peerinfo. If it were, we would have found out
+ * earlier in the probe process and wouldn't even reach till here. So,
+ * we need to add the new hostname to the peer.
+ *
+ * This addition should only be done for cluster op-version >=
+ * GD_OP_VERSION_3_6_0 as address lists are only supported from then on.
+ * Also, this update should only be done when an explicit CLI probe
+ * command was used to begin the probe process.
+ */
+ if ((conf->op_version >= GD_OP_VERSION_3_6_0) &&
+ (gf_uuid_compare(rsp.uuid, peerinfo->uuid) == 0)) {
+ ctx = ((call_frame_t *)myframe)->local;
+ /* Presence of ctx->req implies this probe was started by a cli
+ * probe command
+ */
+ if (ctx->req == NULL)
+ goto cont;
+
+ gf_msg_debug(this->name, 0,
+ "Adding address '%s' to "
+ "existing peer %s",
+ rsp.hostname, uuid_utoa(rsp.uuid));
+
+ ret = glusterd_friend_remove(NULL, rsp.hostname);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "received friend add response from"
- " unknown peer uuid: %s", uuid_utoa (rsp.uuid));
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_STALE_PEERINFO_REMOVE_FAIL,
+ "Could not remove "
+ "stale peerinfo with name %s",
+ rsp.hostname);
+ goto reply;
}
- if (op_ret)
- event_type = GD_FRIEND_EVENT_RCVD_RJT;
- else
- event_type = GD_FRIEND_EVENT_RCVD_ACC;
-
- ret = glusterd_friend_sm_new_event (event_type, &event);
-
+ ret = gd_add_address_to_peer(peerinfo, rsp.hostname);
if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "Unable to get event");
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_HOSTNAME_ADD_TO_PEERLIST_FAIL,
+ "Couldn't add hostname to peer list");
+ goto reply;
}
- event->peerinfo = peerinfo;
- ev_ctx = GF_CALLOC (1, sizeof (*ev_ctx),
- gf_gld_mt_friend_update_ctx_t);
- if (!ev_ctx) {
- ret = -1;
- goto out;
- }
-
- uuid_copy (ev_ctx->uuid, rsp.uuid);
- ev_ctx->hostname = gf_strdup (rsp.hostname);
-
- event->ctx = ev_ctx;
- ret = glusterd_friend_sm_inject_event (event);
- if (ret)
- goto out;
-
-out:
- ctx = ((call_frame_t *)myframe)->local;
- ((call_frame_t *)myframe)->local = NULL;
-
- GF_ASSERT (ctx);
-
- if (ctx->req)//reverse probe doesn't have req
- ret = glusterd_xfer_cli_probe_resp (ctx->req, op_ret, op_errno,
- NULL, ctx->hostname,
- ctx->port);
+ /* Injecting EVENT_NEW_NAME to send update */
+ ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_NEW_NAME, &event);
if (!ret) {
- glusterd_friend_sm ();
- glusterd_op_sm ();
- }
- if (ctx)
- glusterd_destroy_probe_ctx (ctx);
- if (rsp.hostname)
- free (rsp.hostname);//malloced by xdr
- GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
- return ret;
-}
+ event->peername = gf_strdup(peerinfo->hostname);
+ gf_uuid_copy(event->peerid, peerinfo->uuid);
-int
-glusterd3_1_friend_remove_cbk (struct rpc_req * req, struct iovec *iov,
- int count, void *myframe)
-{
- gd1_mgmt_friend_rsp rsp = {{0},};
- glusterd_conf_t *conf = NULL;
- int ret = -1;
- glusterd_friend_sm_event_t *event = NULL;
- glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE;
- glusterd_peerinfo_t *peerinfo = NULL;
- int32_t op_ret = -1;
- int32_t op_errno = -1;
- glusterd_probe_ctx_t *ctx = NULL;
- gf_boolean_t move_sm_now = _gf_true;
-
- conf = THIS->private;
- GF_ASSERT (conf);
+ ret = glusterd_friend_sm_inject_event(event);
+ }
+ rsp.op_errno = GF_PROBE_FRIEND;
+ reply:
ctx = ((call_frame_t *)myframe)->local;
((call_frame_t *)myframe)->local = NULL;
- GF_ASSERT (ctx);
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- move_sm_now = _gf_false;
- goto inject;
+ if (!ctx) {
+ ret = -1;
+ goto unlock;
}
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_friend_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto respond;
+ if (ctx->req) {
+ glusterd_xfer_cli_probe_resp(ctx->req, ret, rsp.op_errno,
+ rsp.op_errstr, ctx->hostname,
+ ctx->port, ctx->dict);
}
- op_ret = rsp.op_ret;
- op_errno = rsp.op_errno;
-
- gf_log ("glusterd", GF_LOG_INFO,
- "Received %s from uuid: %s, host: %s, port: %d",
- (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid), rsp.hostname, rsp.port);
-
-inject:
- ret = glusterd_friend_find (rsp.uuid, ctx->hostname, &peerinfo);
+ glusterd_destroy_probe_ctx(ctx);
- if (ret) {
- //can happen as part of rpc clnt connection cleanup
- //when the frame timeout happens after 30 minutes
- goto respond;
- }
+ goto unlock;
- event_type = GD_FRIEND_EVENT_REMOVE_FRIEND;
+ } else if (strncasecmp(rsp.hostname, peerinfo->hostname, 1024)) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_HOST_PRESENT_ALREADY,
+ "Host: %s with uuid: %s "
+ "already present in cluster with alias hostname: %s",
+ rsp.hostname, uuid_utoa(rsp.uuid), peerinfo->hostname);
- ret = glusterd_friend_sm_new_event (event_type, &event);
+ ctx = ((call_frame_t *)myframe)->local;
+ ((call_frame_t *)myframe)->local = NULL;
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "Unable to get event");
- goto respond;
+ if (!ctx) {
+ ret = -1;
+ goto unlock;
}
- event->peerinfo = peerinfo;
- ret = glusterd_friend_sm_inject_event (event);
+ rsp.op_errno = GF_PROBE_FRIEND;
+ if (ctx->req) {
+ glusterd_xfer_cli_probe_resp(ctx->req, rsp.op_ret, rsp.op_errno,
+ rsp.op_errstr, ctx->hostname,
+ ctx->port, ctx->dict);
+ }
- if (ret)
- goto respond;
+ glusterd_destroy_probe_ctx(ctx);
+ (void)glusterd_friend_remove(NULL, rsp.hostname);
+ ret = rsp.op_ret;
- /*friend_sm would be moved on CLNT_DISCONNECT, consequently
- cleaning up peerinfo. Else, we run the risk of triggering
- a clnt_destroy within saved_frames_unwind.
- */
- op_ret = 0;
+ goto unlock;
+ }
+cont:
+ gf_uuid_copy(peerinfo->uuid, rsp.uuid);
-respond:
- ret = glusterd_xfer_cli_deprobe_resp (ctx->req, op_ret, op_errno, NULL,
- ctx->hostname);
- if (!ret && move_sm_now) {
- glusterd_friend_sm ();
- glusterd_op_sm ();
- }
+ ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_INIT_FRIEND_REQ, &event);
- if (ctx) {
- glusterd_broadcast_friend_delete (ctx->hostname, NULL);
- glusterd_destroy_probe_ctx (ctx);
- }
+ if (ret) {
+ RCU_READ_UNLOCK;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_NEW_FRIEND_SM_EVENT_GET_FAIL,
+ "Unable to get event");
+ goto out;
+ }
- if (rsp.hostname)
- free (rsp.hostname);//malloced by xdr
- GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
- return ret;
-}
+ event->peername = gf_strdup(peerinfo->hostname);
+ gf_uuid_copy(event->peerid, peerinfo->uuid);
-int32_t
-glusterd3_1_friend_update_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
-{
- int ret = -1;
- gd1_mgmt_friend_update_rsp rsp = {{0}, };
- xlator_t *this = NULL;
+ event->ctx = ((call_frame_t *)myframe)->local;
+ ((call_frame_t *)myframe)->local = NULL;
+ ret = glusterd_friend_sm_inject_event(event);
- GF_ASSERT (req);
- this = THIS;
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_PROBE_REQ_RESP_RCVD,
+ "Received resp to probe req");
- if (-1 == req->rpc_status) {
- gf_log (this->name, GF_LOG_ERROR, "RPC Error");
- goto out;
- }
-
- ret = xdr_to_generic (*iov, &rsp,
- (xdrproc_t)xdr_gd1_mgmt_friend_update_rsp);
- if (ret < 0) {
- gf_log (this->name, GF_LOG_ERROR, "Failed to serialize friend"
- " update repsonse");
- goto out;
- }
+unlock:
+ RCU_READ_UNLOCK;
- ret = 0;
out:
- gf_log (this->name, GF_LOG_INFO, "Received %s from uuid: %s",
- (ret)?"RJT":"ACC", uuid_utoa (rsp.uuid));
-
- GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
- return ret;
+ free(rsp.hostname); // malloced by xdr
+ GLUSTERD_STACK_DESTROY(((call_frame_t *)myframe));
+
+ /* Attempt to start the state machine. Needed as no state machine could
+ * be running at time this RPC reply was received
+ */
+ if (!ret) {
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ }
+
+ return ret;
}
-int32_t
-glusterd3_1_cluster_lock_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+int
+glusterd_probe_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gd1_mgmt_cluster_lock_rsp rsp = {{0},};
- int ret = -1;
- int32_t op_ret = -1;
- glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
- glusterd_peerinfo_t *peerinfo = NULL;
-
- GF_ASSERT (req);
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_cluster_lock_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
-out:
- op_ret = rsp.op_ret;
-
- gf_log ("glusterd", GF_LOG_INFO,
- "Received %s from uuid: %s",
- (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid));
-
- ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo);
-
- if (ret) {
- gf_log ("", GF_LOG_CRITICAL, "Lock response received from "
- "unknown peer: %s", uuid_utoa (rsp.uuid));
- }
-
- if (op_ret) {
- event_type = GD_OP_EVENT_RCVD_RJT;
- opinfo.op_ret = op_ret;
- } else {
- event_type = GD_OP_EVENT_RCVD_ACC;
- }
-
- ret = glusterd_op_sm_inject_event (event_type, NULL);
-
- if (!ret) {
- glusterd_friend_sm ();
- glusterd_op_sm ();
- }
-
- GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
- return ret;
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ __glusterd_probe_cbk);
}
-int32_t
-glusterd3_1_cluster_unlock_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+int
+__glusterd_friend_add_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gd1_mgmt_cluster_lock_rsp rsp = {{0},};
- int ret = -1;
- int32_t op_ret = -1;
- glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
- glusterd_peerinfo_t *peerinfo = NULL;
-
-
- GF_ASSERT (req);
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- goto out;
- }
-
+ gd1_mgmt_friend_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = EINVAL;
+ glusterd_probe_ctx_t *ctx = NULL;
+ glusterd_friend_update_ctx_t *ev_ctx = NULL;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_friend_rsp);
+ if (ret < 0) {
+ gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_RES_DECODE_FAIL,
+ "error");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto out;
+ }
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_RESPONSE_INFO,
+ "Received %s from uuid: %s, host: %s, port: %d",
+ (op_ret) ? "RJT" : "ACC", uuid_utoa(rsp.uuid), rsp.hostname,
+ rsp.port);
+
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find(rsp.uuid, rsp.hostname);
+ if (peerinfo == NULL) {
+ RCU_READ_UNLOCK
+ ret = -1;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER,
+ "received friend add response from"
+ " unknown peer uuid: %s",
+ uuid_utoa(rsp.uuid));
+ goto out;
+ }
+
+ if (op_ret)
+ event_type = GD_FRIEND_EVENT_RCVD_RJT;
+ else
+ event_type = GD_FRIEND_EVENT_RCVD_ACC;
+
+ ret = glusterd_friend_sm_new_event(event_type, &event);
+
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL,
+ "Unable to get event");
+ goto unlock;
+ }
+
+ ev_ctx = GF_CALLOC(1, sizeof(*ev_ctx), gf_gld_mt_friend_update_ctx_t);
+ if (!ev_ctx) {
+ ret = -1;
+ goto unlock;
+ }
+
+ gf_uuid_copy(ev_ctx->uuid, rsp.uuid);
+ ev_ctx->hostname = gf_strdup(rsp.hostname);
+
+ event->peername = gf_strdup(peerinfo->hostname);
+ gf_uuid_copy(event->peerid, peerinfo->uuid);
+ event->ctx = ev_ctx;
+ ret = glusterd_friend_sm_inject_event(event);
+
+unlock:
+ RCU_READ_UNLOCK;
out:
- op_ret = rsp.op_ret;
-
- gf_log ("glusterd", GF_LOG_INFO,
- "Received %s from uuid: %s",
- (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid));
-
- ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo);
-
- if (ret) {
- gf_log ("", GF_LOG_CRITICAL, "Unlock response received from "
- "unknown peer %s", uuid_utoa (rsp.uuid));
- }
-
- if (op_ret) {
- event_type = GD_OP_EVENT_RCVD_RJT;
- opinfo.op_ret = op_ret;
- } else {
- event_type = GD_OP_EVENT_RCVD_ACC;
- }
-
- ret = glusterd_op_sm_inject_event (event_type, NULL);
-
- if (!ret) {
- glusterd_friend_sm ();
- glusterd_op_sm ();
- }
-
- GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
- return ret;
+ ctx = ((call_frame_t *)myframe)->local;
+ ((call_frame_t *)myframe)->local = NULL;
+
+ if (ctx && ctx->req) {
+ /*reverse probe doesn't have req*/
+ ret = glusterd_xfer_cli_probe_resp(ctx->req, op_ret, op_errno, NULL,
+ ctx->hostname, ctx->port, ctx->dict);
+ }
+ if (!ret) {
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ }
+
+ if (ctx)
+ glusterd_destroy_probe_ctx(ctx);
+ free(rsp.hostname); // malloced by xdr
+ GLUSTERD_STACK_DESTROY(((call_frame_t *)myframe));
+ return ret;
}
-static int32_t
-glusterd_append_gsync_status (dict_t *dst, dict_t *src)
+int
+glusterd_friend_add_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- int ret = 0;
- char *stop_msg = NULL;
-
- ret = dict_get_str (src, "gsync-status", &stop_msg);
- if (ret) {
- ret = 0;
- goto out;
- }
-
- ret = dict_set_dynstr (dst, "gsync-status", gf_strdup (stop_msg));
- if (ret) {
- gf_log ("glusterd", GF_LOG_WARNING, "Unable to set the stop"
- "message in the ctx dictionary");
- goto out;
- }
-
- ret = 0;
- out:
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
-
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ __glusterd_friend_add_cbk);
}
-static int32_t
-glusterd_append_status_dicts (dict_t *dst, dict_t *src)
+int
+__glusterd_friend_remove_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- int dst_count = 0;
- int src_count = 0;
- int i = 0;
- int ret = 0;
- char mst[PATH_MAX] = {0,};
- char slv[PATH_MAX] = {0, };
- char sts[PATH_MAX] = {0, };
- char *mst_val = NULL;
- char *slv_val = NULL;
- char *sts_val = NULL;
-
- GF_ASSERT (dst);
-
- if (src == NULL)
- goto out;
+ gd1_mgmt_friend_rsp rsp = {
+ {0},
+ };
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ glusterd_probe_ctx_t *ctx = NULL;
+ gf_boolean_t move_sm_now = _gf_true;
+
+ conf = THIS->private;
+ GF_ASSERT(conf);
+
+ ctx = ((call_frame_t *)myframe)->local;
+ ((call_frame_t *)myframe)->local = NULL;
+ if (!ctx) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL,
+ "Unable to get glusterd probe context");
+ goto out;
+ }
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ move_sm_now = _gf_false;
+ goto inject;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_friend_rsp);
+ if (ret < 0) {
+ gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_RES_DECODE_FAIL,
+ "error");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ goto respond;
+ }
+
+ op_ret = rsp.op_ret;
+ op_errno = rsp.op_errno;
+
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_RESPONSE_INFO,
+ "Received %s from uuid: %s, host: %s, port: %d",
+ (op_ret) ? "RJT" : "ACC", uuid_utoa(rsp.uuid), rsp.hostname,
+ rsp.port);
- ret = dict_get_int32 (dst, "gsync-count", &dst_count);
- if (ret)
- dst_count = 0;
-
- ret = dict_get_int32 (src, "gsync-count", &src_count);
- if (ret || !src_count) {
- gf_log ("", GF_LOG_DEBUG, "Source brick empty");
- ret = 0;
- goto out;
- }
+inject:
+ RCU_READ_LOCK;
- for (i = 1; i <= src_count; i++) {
- snprintf (mst, sizeof(mst), "master%d", i);
- snprintf (slv, sizeof(slv), "slave%d", i);
- snprintf (sts, sizeof(sts), "status%d", i);
+ peerinfo = glusterd_peerinfo_find(rsp.uuid, ctx->hostname);
+ if (peerinfo == NULL) {
+ // can happen as part of rpc clnt connection cleanup
+ // when the frame timeout happens after 30 minutes
+ goto unlock;
+ }
- ret = dict_get_str (src, mst, &mst_val);
- if (ret)
- goto out;
+ event_type = GD_FRIEND_EVENT_REMOVE_FRIEND;
- ret = dict_get_str (src, slv, &slv_val);
- if (ret)
- goto out;
+ ret = glusterd_friend_sm_new_event(event_type, &event);
- ret = dict_get_str (src, sts, &sts_val);
- if (ret)
- goto out;
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL,
+ "Unable to get event");
+ goto unlock;
+ }
+ event->peername = gf_strdup(peerinfo->hostname);
+ gf_uuid_copy(event->peerid, peerinfo->uuid);
- snprintf (mst, sizeof(mst), "master%d", i+dst_count);
- snprintf (slv, sizeof(slv), "slave%d", i+dst_count);
- snprintf (sts, sizeof(sts), "status%d", i+dst_count);
+ ret = glusterd_friend_sm_inject_event(event);
- ret = dict_set_dynstr (dst, mst, gf_strdup (mst_val));
- if (ret)
- goto out;
+ if (ret)
+ goto unlock;
- ret = dict_set_dynstr (dst, slv, gf_strdup (slv_val));
- if (ret)
- goto out;
+ /*friend_sm would be moved on CLNT_DISCONNECT, consequently
+ cleaning up peerinfo. Else, we run the risk of triggering
+ a clnt_destroy within saved_frames_unwind.
+ */
+ op_ret = 0;
- ret = dict_set_dynstr (dst, sts, gf_strdup (sts_val));
- if (ret)
- goto out;
-
- }
-
- ret = dict_set_int32 (dst, "gsync-count", dst_count+src_count);
-
- out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+unlock:
+ RCU_READ_UNLOCK;
+respond:
+ ret = glusterd_xfer_cli_deprobe_resp(ctx->req, op_ret, op_errno, NULL,
+ ctx->hostname, ctx->dict);
+ if (!ret && move_sm_now) {
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ }
+
+ glusterd_broadcast_friend_delete(ctx->hostname, NULL);
+ glusterd_destroy_probe_ctx(ctx);
+out:
+ free(rsp.hostname); // malloced by xdr
+ GLUSTERD_STACK_DESTROY(((call_frame_t *)myframe));
+ return ret;
}
-static int32_t
-glusterd_gsync_use_rsp_dict (dict_t *rsp_dict, char *op_errstr)
+int
+glusterd_friend_remove_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- dict_t *ctx = NULL;
- int ret = 0;
-
- ctx = glusterd_op_get_ctx ();
- if (!ctx) {
- gf_log ("", GF_LOG_ERROR,
- "Operation Context is not present");
- GF_ASSERT (0);
- }
-
- if (rsp_dict) {
- ret = glusterd_append_status_dicts (ctx, rsp_dict);
- if (ret)
- goto out;
-
- ret = glusterd_append_gsync_status (ctx, rsp_dict);
- if (ret)
- goto out;
- }
- if (strcmp ("", op_errstr)) {
- ret = dict_set_dynstr (ctx, "errstr", gf_strdup(op_errstr));
- if (ret)
- goto out;
- }
-
- ret = 0;
- out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d ", ret);
- return ret;
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ __glusterd_friend_remove_cbk);
}
-static int32_t
-glusterd_rb_use_rsp_dict (dict_t *rsp_dict)
-{
- int32_t src_port = 0;
- int32_t dst_port = 0;
- int ret = 0;
- dict_t *ctx = NULL;
-
-
- ctx = glusterd_op_get_ctx ();
- if (!ctx) {
- gf_log ("", GF_LOG_ERROR,
- "Operation Context is not present");
- GF_ASSERT (0);
- }
-
- if (rsp_dict) {
- ret = dict_get_int32 (rsp_dict, "src-brick-port", &src_port);
- if (ret == 0) {
- gf_log ("", GF_LOG_DEBUG,
- "src-brick-port=%d found", src_port);
- }
-
- ret = dict_get_int32 (rsp_dict, "dst-brick-port", &dst_port);
- if (ret == 0) {
- gf_log ("", GF_LOG_DEBUG,
- "dst-brick-port=%d found", dst_port);
- }
-
- }
-
- if (src_port) {
- ret = dict_set_int32 (ctx, "src-brick-port",
- src_port);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "Could not set src-brick");
- goto out;
- }
- }
-
- if (dst_port) {
- ret = dict_set_int32 (ctx, "dst-brick-port",
- dst_port);
- if (ret) {
- gf_log ("", GF_LOG_DEBUG,
- "Could not set dst-brick");
- goto out;
- }
-
- }
+int32_t
+__glusterd_friend_update_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ int ret = -1;
+ gd1_mgmt_friend_update_rsp rsp = {
+ {0},
+ };
+ xlator_t *this = NULL;
+
+ GF_ASSERT(req);
+ this = THIS;
+
+ if (-1 == req->rpc_status) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE, "RPC Error");
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_friend_update_rsp);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RES_DECODE_FAIL,
+ "Failed to serialize friend"
+ " update response");
+ goto out;
+ }
+
+ ret = 0;
out:
- return ret;
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_RESPONSE_INFO,
+ "Received %s from uuid: %s", (ret) ? "RJT" : "ACC",
+ uuid_utoa(rsp.uuid));
+ GLUSTERD_STACK_DESTROY(((call_frame_t *)myframe));
+ return ret;
}
-int32_t
-glusterd3_1_stage_op_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+int
+glusterd_friend_update_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gd1_mgmt_stage_op_rsp rsp = {{0},};
- int ret = -1;
- int32_t op_ret = -1;
- glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
- glusterd_peerinfo_t *peerinfo = NULL;
- dict_t *dict = NULL;
- char err_str[2048] = {0};
- char *peer_str = NULL;
-
- GF_ASSERT (req);
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- /* use standard allocation because to keep uniformity
- in freeing it */
- rsp.op_errstr = strdup ("error");
- goto out;
- }
-
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_stage_op_rsp);
- if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- /* use standard allocation because to keep uniformity
- in freeing it */
- rsp.op_errstr = strdup ("xdr decoding failed");
- goto out;
- }
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ __glusterd_friend_update_cbk);
+}
- if (rsp.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
-
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize rsp-buffer to dictionary");
- event_type = GD_OP_EVENT_RCVD_RJT;
- goto out;
- } else {
- dict->extra_stdfree = rsp.dict.dict_val;
- }
- }
+int32_t
+__glusterd_cluster_lock_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gd1_mgmt_cluster_lock_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *err_str = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(req);
+
+ txn_id = &priv->global_txn_id;
+
+ if (-1 == req->rpc_status) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_LOCK_RESP_FROM_PEER,
+ "Lock response is not "
+ "received from one of the peer");
+ err_str = "Lock response is not received from one of the peer";
+ glusterd_set_opinfo(err_str, ENETRESET, -1);
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_cluster_lock_rsp);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RES_DECODE_FAIL,
+ "Failed to decode "
+ "cluster lock response received from peer");
+ err_str =
+ "Failed to decode cluster lock response received from"
+ " peer";
+ glusterd_set_opinfo(err_str, EINVAL, -1);
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ op_ret = rsp.op_ret;
+
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_LOCK_FROM_UUID_REJCT,
+ "Received lock RJT from uuid: %s", uuid_utoa(rsp.uuid));
+ } else {
+ gf_msg_debug(this->name, 0, "Received lock ACC from uuid: %s",
+ uuid_utoa(rsp.uuid));
+ }
+
+ RCU_READ_LOCK;
+ ret = (glusterd_peerinfo_find(rsp.uuid, NULL) == NULL);
+ RCU_READ_UNLOCK;
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER,
+ "cluster lock response received from unknown peer: %s."
+ "Ignoring response",
+ uuid_utoa(rsp.uuid));
+ err_str = "cluster lock response received from unknown peer";
+ goto out;
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ opinfo.op_errstr = gf_strdup(
+ "Another transaction could be in "
+ "progress. Please try again after"
+ " some time.");
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ }
out:
- op_ret = rsp.op_ret;
-
- gf_log ("glusterd", GF_LOG_INFO,
- "Received %s from uuid: %s",
- (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid));
-
- ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo);
-
- if (ret) {
- gf_log ("", GF_LOG_CRITICAL, "Stage response received from "
- "unknown peer: %s", uuid_utoa (rsp.uuid));
- }
- if (op_ret) {
- event_type = GD_OP_EVENT_RCVD_RJT;
- opinfo.op_ret = op_ret;
- if (strcmp ("", rsp.op_errstr)) {
- opinfo.op_errstr = gf_strdup (rsp.op_errstr);
- } else {
- if (peerinfo)
- peer_str = peerinfo->hostname;
- else
- peer_str = uuid_utoa (rsp.uuid);
- snprintf (err_str, sizeof (err_str), "Operation failed "
- "on %s", peer_str);
- opinfo.op_errstr = gf_strdup (err_str);
- }
- if (!opinfo.op_errstr) {
- gf_log ("", GF_LOG_ERROR, "memory allocation failed");
- ret = -1;
- goto out;
- }
- } else {
- event_type = GD_OP_EVENT_RCVD_ACC;
- }
-
- switch (rsp.op) {
- case GD_OP_REPLACE_BRICK:
- glusterd_rb_use_rsp_dict (dict);
- break;
- }
+ ret = glusterd_set_txn_opinfo(txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
- ret = glusterd_op_sm_inject_event (event_type, NULL);
+ ret = glusterd_op_sm_inject_event(event_type, txn_id, NULL);
- if (!ret) {
- glusterd_friend_sm ();
- glusterd_op_sm ();
- }
+ if (!ret) {
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ }
- if (rsp.op_errstr)
- free (rsp.op_errstr); //malloced by xdr
- if (dict) {
- if (!dict->extra_stdfree && rsp.dict.dict_val)
- free (rsp.dict.dict_val); //malloced by xdr
- dict_unref (dict);
- } else {
- if (rsp.dict.dict_val)
- free (rsp.dict.dict_val); //malloced by xdr
- }
- GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
- return ret;
+ GLUSTERD_STACK_DESTROY(((call_frame_t *)myframe));
+ return ret;
}
-static int32_t
-glusterd_sync_use_rsp_dict (dict_t *rsp_dict)
+int32_t
+glusterd_cluster_lock_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- int ret = 0;
-
- GF_ASSERT (rsp_dict);
-
- if (!rsp_dict) {
- goto out;
- }
-
- ret = glusterd_import_friend_volumes (rsp_dict);
-out:
- return ret;
-
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ __glusterd_cluster_lock_cbk);
}
void
-_profile_volume_add_friend_rsp (dict_t *this, char *key, data_t *value,
- void *data)
+glusterd_set_opinfo(char *errstr, int32_t op_errno, int32_t op_ret)
{
- char new_key[256] = {0};
- glusterd_pr_brick_rsp_conv_t *rsp_ctx = NULL;
- data_t *new_value = NULL;
- int brick_count = 0;
- char brick_key[256];
-
- if (strcmp (key, "count") == 0)
- return;
- sscanf (key, "%d%s", &brick_count, brick_key);
- rsp_ctx = data;
- new_value = data_copy (value);
- GF_ASSERT (new_value);
- snprintf (new_key, sizeof (new_key), "%d%s",
- rsp_ctx->count + brick_count, brick_key);
- dict_set (rsp_ctx->dict, new_key, new_value);
+ opinfo.op_errstr = gf_strdup(errstr);
+ opinfo.op_errno = op_errno;
+ opinfo.op_ret = op_ret;
}
-int
-glusterd_profile_volume_use_rsp_dict (dict_t *rsp_dict)
+static int32_t
+glusterd_mgmt_v3_lock_peers_cbk_fn(struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
{
- int ret = 0;
- glusterd_pr_brick_rsp_conv_t rsp_ctx = {0};
- int32_t brick_count = 0;
- int32_t count = 0;
- dict_t *ctx_dict = NULL;
- glusterd_op_t op = GD_OP_NONE;
+ gd1_mgmt_v3_lock_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ uuid_t *txn_id = NULL;
+ char *err_str = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ frame = myframe;
+ txn_id = frame->cookie;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_LOCK_RESP_FROM_PEER,
+ "Lock response is not "
+ "received from one of the peer");
+ err_str = "Lock response is not received from one of the peer";
+ glusterd_set_opinfo(err_str, ENETRESET, -1);
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_lock_rsp);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RES_DECODE_FAIL,
+ "Failed to decode "
+ "mgmt_v3 lock response received from peer");
+ err_str =
+ "Failed to decode mgmt_v3 lock response received from"
+ " peer";
+ glusterd_set_opinfo(err_str, EINVAL, -1);
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ op_ret = rsp.op_ret;
+
+ txn_id = &rsp.txn_id;
+
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_LOCK_FROM_UUID_REJCT,
+ "Received mgmt_v3 lock RJT from uuid: %s", uuid_utoa(rsp.uuid));
+ } else {
+ gf_msg_debug(this->name, 0, "Received mgmt_v3 lock ACC from uuid: %s",
+ uuid_utoa(rsp.uuid));
+ }
+
+ RCU_READ_LOCK;
+ ret = (glusterd_peerinfo_find(rsp.uuid, NULL) == NULL);
+ RCU_READ_UNLOCK;
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER,
+ "mgmt_v3 lock response received "
+ "from unknown peer: %s. Ignoring response",
+ uuid_utoa(rsp.uuid));
+ goto out;
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ opinfo.op_errstr = gf_strdup(
+ "Another transaction could be in "
+ "progress. Please try again after"
+ " some time.");
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ }
- GF_ASSERT (rsp_dict);
-
- ret = dict_get_int32 (rsp_dict, "count", &brick_count);
- if (ret) {
- ret = 0; //no bricks in the rsp
- goto out;
- }
-
- op = glusterd_op_get_op ();
- GF_ASSERT (GD_OP_PROFILE_VOLUME == op);
- ctx_dict = glusterd_op_get_ctx ();
-
- ret = dict_get_int32 (ctx_dict, "count", &count);
- rsp_ctx.count = count;
- rsp_ctx.dict = ctx_dict;
- dict_foreach (rsp_dict, _profile_volume_add_friend_rsp, &rsp_ctx);
- dict_del (ctx_dict, "count");
- ret = dict_set_int32 (ctx_dict, "count", count + brick_count);
out:
- return ret;
+
+ ret = glusterd_set_txn_opinfo(txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ ret = glusterd_op_sm_inject_event(event_type, txn_id, NULL);
+ if (!ret) {
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ }
+
+ GF_FREE(frame->cookie);
+ GLUSTERD_STACK_DESTROY(frame);
+ return ret;
}
-void
-glusterd_volume_status_add_peer_rsp (dict_t *this, char *key, data_t *value,
- void *data)
+int32_t
+glusterd_mgmt_v3_lock_peers_cbk(struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
{
- glusterd_status_rsp_conv_t *rsp_ctx = NULL;
- data_t *new_value = NULL;
- char brick_key[1024] = {0,};
- char new_key[1024] = {0,};
- int32_t index = 0;
- int32_t ret = 0;
-
- if (!strcmp (key, "count") || !strcmp (key, "cmd") ||
- !strcmp (key, "brick-index-max") || !strcmp (key, "other-count"))
- return;
-
- rsp_ctx = data;
- new_value = data_copy (value);
- GF_ASSERT (new_value);
-
- sscanf (key, "brick%d.%s", &index, brick_key);
-
- if (index > rsp_ctx->brick_index_max) {
- snprintf (new_key, sizeof (new_key), "brick%d.%s",
- index + rsp_ctx->other_count, brick_key);
- } else
- strncpy (new_key, key, sizeof (new_key));
-
- ret = dict_set (rsp_ctx->dict, new_key, new_value);
- if (ret)
- gf_log ("", GF_LOG_ERROR, "Unable to set key: %s in dict",
- key);
-
- return;
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ glusterd_mgmt_v3_lock_peers_cbk_fn);
}
-int
-glusterd_volume_status_use_rsp_dict (dict_t *rsp_dict)
+static int32_t
+glusterd_mgmt_v3_unlock_peers_cbk_fn(struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
{
- int ret = 0;
- glusterd_status_rsp_conv_t rsp_ctx = {0};
- int32_t node_count = 0;
- int32_t rsp_node_count = 0;
- int32_t brick_index_max = -1;
- int32_t other_count = 0;
- int32_t rsp_other_count = 0;
- dict_t *ctx_dict = NULL;
- glusterd_op_t op = GD_OP_NONE;
-
- GF_ASSERT (rsp_dict);
-
- ret = dict_get_int32 (rsp_dict, "count", &rsp_node_count);
- if (ret) {
- ret = 0; //no bricks in the rsp
- goto out;
- }
+ gd1_mgmt_v3_unlock_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ xlator_t *this = NULL;
+ call_frame_t *frame = NULL;
+ uuid_t *txn_id = NULL;
+ char *err_str = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+
+ frame = myframe;
+ txn_id = frame->cookie;
+ frame->cookie = NULL;
+
+ if (-1 == req->rpc_status) {
+ err_str = "Unlock response not received from one of the peer.";
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "UnLock response is not received from one of the peer");
+ glusterd_set_opinfo(err_str, 0, 0);
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_v3_unlock_rsp);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "Failed to decode mgmt_v3 unlock response received from"
+ "peer");
+ err_str =
+ "Failed to decode mgmt_v3 unlock response received "
+ "from peer";
+ glusterd_set_opinfo(err_str, 0, 0);
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ op_ret = rsp.op_ret;
+
+ txn_id = &rsp.txn_id;
+
+ if (op_ret) {
+ gf_msg(
+ this->name, GF_LOG_ERROR, 0, GD_MSG_MGMTV3_UNLOCK_FROM_UUID_REJCT,
+ "Received mgmt_v3 unlock RJT from uuid: %s", uuid_utoa(rsp.uuid));
+ } else {
+ gf_msg_debug(this->name, 0, "Received mgmt_v3 unlock ACC from uuid: %s",
+ uuid_utoa(rsp.uuid));
+ }
+
+ RCU_READ_LOCK;
+ ret = (glusterd_peerinfo_find(rsp.uuid, NULL) == NULL);
+ RCU_READ_UNLOCK;
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "mgmt_v3 unlock response received "
+ "from unknown peer: %s. Ignoring response",
+ uuid_utoa(rsp.uuid));
+ goto out;
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ opinfo.op_errstr = gf_strdup(
+ "Another transaction could be in "
+ "progress. Please try again after"
+ " some time.");
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ }
- ret = dict_get_int32 (rsp_dict, "other-count", &rsp_other_count);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to get other count from rsp_dict");
- goto out;
- }
-
- op = glusterd_op_get_op ();
- GF_ASSERT (GD_OP_STATUS_VOLUME == op);
- ctx_dict = glusterd_op_get_ctx (op);
-
- ret = dict_get_int32 (ctx_dict, "count", &node_count);
- ret = dict_get_int32 (ctx_dict, "brick-index-max", &brick_index_max);
- ret = dict_get_int32 (ctx_dict, "other-count", &other_count);
+out:
- rsp_ctx.count = node_count;
- rsp_ctx.brick_index_max = brick_index_max;
- rsp_ctx.other_count = other_count;
- rsp_ctx.dict = ctx_dict;
+ ret = glusterd_set_txn_opinfo(txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
- dict_foreach (rsp_dict, glusterd_volume_status_add_peer_rsp, &rsp_ctx);
+ ret = glusterd_op_sm_inject_event(event_type, txn_id, NULL);
- ret = dict_set_int32 (ctx_dict, "count", node_count + rsp_node_count);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to update node count");
- goto out;
- }
+ if (!ret) {
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ }
- ret = dict_set_int32 (ctx_dict, "other-count",
- (other_count + rsp_other_count));
- if (ret)
- gf_log (THIS->name, GF_LOG_ERROR,
- "Failed to update other-count");
-out:
- return ret;
+ GF_FREE(frame->cookie);
+ GLUSTERD_STACK_DESTROY(frame);
+ return ret;
}
-int
-glusterd_volume_rebalance_use_rsp_dict (dict_t *rsp_dict)
+int32_t
+glusterd_mgmt_v3_unlock_peers_cbk(struct rpc_req *req, struct iovec *iov,
+ int count, void *myframe)
{
- int ret = 0;
- dict_t *ctx_dict = NULL;
- glusterd_op_t op = GD_OP_NONE;
- uint64_t value = 0;
- int32_t value32 = 0;
- char *volname = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- char key[256] = {0,};
- int32_t index = 0;
- int32_t i = 0;
- char *node_uuid = NULL;
- char *node_uuid_str = NULL;
- double elapsed_time = 0;
-
- GF_ASSERT (rsp_dict);
-
- op = glusterd_op_get_op ();
- GF_ASSERT ((GD_OP_REBALANCE == op) ||
- (GD_OP_DEFRAG_BRICK_VOLUME == op));
-
- ctx_dict = glusterd_op_get_ctx (op);
-
- if (!ctx_dict)
- goto out;
-
- ret = dict_get_int32 (ctx_dict, "count", &i);
- i++;
- ret = dict_set_int32 (ctx_dict, "count", i);
- if (ret)
- gf_log ("", GF_LOG_ERROR, "Failed to set index");
-
- ret = dict_get_str (ctx_dict, "volname", &volname);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get volume name");
- goto out;
- }
-
- ret = glusterd_volinfo_find (volname, &volinfo);
-
- if (ret)
- goto out;
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ glusterd_mgmt_v3_unlock_peers_cbk_fn);
+}
- ret = dict_get_int32 (rsp_dict, "count", &index);
- if (ret)
- gf_log ("", GF_LOG_ERROR, "failed to get index");
+int32_t
+__glusterd_cluster_unlock_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gd1_mgmt_cluster_lock_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_conf_t *priv = NULL;
+ char *err_str = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(req);
+
+ txn_id = &priv->global_txn_id;
+
+ if (-1 == req->rpc_status) {
+ err_str = "Unlock response not received from one of the peer.";
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "UnLock response is not received from one of the peer");
+ glusterd_set_opinfo(err_str, 0, 0);
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_rsp);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "Failed to decode unlock response received from peer");
+ err_str =
+ "Failed to decode cluster unlock response received "
+ "from peer";
+ glusterd_set_opinfo(err_str, 0, 0);
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ op_ret = rsp.op_ret;
+
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNLOCK_FROM_UUID_REJCT,
+ "Received unlock RJT from uuid: %s", uuid_utoa(rsp.uuid));
+ } else {
+ gf_msg_debug(this->name, 0, "Received unlock ACC from uuid: %s",
+ uuid_utoa(rsp.uuid));
+ }
+
+ RCU_READ_LOCK;
+ ret = (glusterd_peerinfo_find(rsp.uuid, NULL) == NULL);
+ RCU_READ_UNLOCK;
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_CLUSTER_UNLOCK_FAILED,
+ "Unlock response received from unknown peer %s",
+ uuid_utoa(rsp.uuid));
+ goto out;
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ }
- snprintf (key, 256, "files-%d", index);
- ret = dict_get_uint64 (rsp_dict, key, &value);
- if (!ret) {
- memset (key, 0, 256);
- snprintf (key, 256, "files-%d", i);
- ret = dict_set_uint64 (ctx_dict, key, value);
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "failed to set the file count");
- }
- }
+out:
- memset (key, 0, 256);
- snprintf (key, 256, "size-%d", index);
- ret = dict_get_uint64 (rsp_dict, key, &value);
- if (!ret) {
- memset (key, 0, 256);
- snprintf (key, 256, "size-%d", i);
- ret = dict_set_uint64 (ctx_dict, key, value);
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "failed to set the size of migration");
- }
- }
+ ret = glusterd_set_txn_opinfo(txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
- memset (key, 0, 256);
- snprintf (key, 256, "lookups-%d", index);
- ret = dict_get_uint64 (rsp_dict, key, &value);
- if (!ret) {
- memset (key, 0, 256);
- snprintf (key, 256, "lookups-%d", i);
- ret = dict_set_uint64 (ctx_dict, key, value);
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "failed to set lookuped file count");
- }
- }
+ ret = glusterd_op_sm_inject_event(event_type, txn_id, NULL);
- memset (key, 0, 256);
- snprintf (key, 256, "status-%d", index);
- ret = dict_get_int32 (rsp_dict, key, &value32);
- if (!ret) {
- memset (key, 0, 256);
- snprintf (key, 256, "status-%d", i);
- ret = dict_set_int32 (ctx_dict, key, value32);
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "failed to set status");
- }
- }
+ if (!ret) {
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ }
- memset (key, 0, 256);
- snprintf (key, 256, "node-uuid-%d", index);
- ret = dict_get_str (rsp_dict, key, &node_uuid);
- if (!ret) {
- memset (key, 0, 256);
- snprintf (key, 256, "node-uuid-%d", i);
- node_uuid_str = gf_strdup (node_uuid);
- ret = dict_set_dynstr (ctx_dict, key, node_uuid_str);
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "failed to set node-uuid");
- }
- }
+ GLUSTERD_STACK_DESTROY(((call_frame_t *)myframe));
+ return ret;
+}
- memset (key, 0, 256);
- snprintf (key, 256, "failures-%d", index);
- ret = dict_get_uint64 (rsp_dict, key, &value);
- if (!ret) {
- memset (key, 0, 256);
- snprintf (key, 256, "failures-%d", i);
- ret = dict_set_uint64 (ctx_dict, key, value);
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "failed to set failure count");
- }
- }
+int32_t
+glusterd_cluster_unlock_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ __glusterd_cluster_unlock_cbk);
+}
- memset (key, 0, 256);
- snprintf (key, 256, "run-time-%d", index);
- ret = dict_get_double (rsp_dict, key, &elapsed_time);
- if (!ret) {
- memset (key, 0, 256);
- snprintf (key, 256, "run-time-%d", i);
- ret = dict_set_double (ctx_dict, key, elapsed_time);
- if (ret) {
- gf_log (THIS->name, GF_LOG_DEBUG,
- "failed to set run-time");
- }
+int32_t
+__glusterd_stage_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ gd1_mgmt_stage_op_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ dict_t *dict = NULL;
+ char *peer_str = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ uuid_t *txn_id = NULL;
+ call_frame_t *frame = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ txn_id = frame->cookie;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ /* use standard allocation because to keep uniformity
+ in freeing it */
+ rsp.op_errstr = strdup("error");
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_stage_op_rsp);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RES_DECODE_FAIL,
+ "Failed to decode stage "
+ "response received from peer");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ /* use standard allocation because to keep uniformity
+ in freeing it */
+ rsp.op_errstr = strdup(
+ "Failed to decode stage response "
+ "received from peer.");
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize rsp-buffer to dictionary");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ } else {
+ dict->extra_stdfree = rsp.dict.dict_val;
}
-
- ret = 0;
+ }
out:
- return ret;
+ op_ret = rsp.op_ret;
+
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STAGE_FROM_UUID_REJCT,
+ "Received stage RJT from uuid: %s", uuid_utoa(rsp.uuid));
+ } else {
+ gf_msg_debug(this->name, 0, "Received stage ACC from uuid: %s",
+ uuid_utoa(rsp.uuid));
+ }
+
+ RCU_READ_LOCK;
+ peerinfo = glusterd_peerinfo_find(rsp.uuid, NULL);
+ if (peerinfo == NULL) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER,
+ "Stage response received "
+ "from unknown peer: %s. Ignoring response.",
+ uuid_utoa(rsp.uuid));
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ if (strcmp("", rsp.op_errstr)) {
+ opinfo.op_errstr = gf_strdup(rsp.op_errstr);
+ } else {
+ if (peerinfo)
+ peer_str = peerinfo->hostname;
+ else
+ peer_str = uuid_utoa(rsp.uuid);
+ char err_str[2048];
+ snprintf(err_str, sizeof(err_str), OPERRSTR_STAGE_FAIL, peer_str);
+ opinfo.op_errstr = gf_strdup(err_str);
+ }
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ }
+
+ RCU_READ_UNLOCK;
+
+ ret = glusterd_set_txn_opinfo(txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ ret = glusterd_op_sm_inject_event(event_type, txn_id, NULL);
+
+ if (!ret) {
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ }
+
+ free(rsp.op_errstr); // malloced by xdr
+ if (dict) {
+ if (!dict->extra_stdfree && rsp.dict.dict_val)
+ free(rsp.dict.dict_val); // malloced by xdr
+ dict_unref(dict);
+ } else {
+ free(rsp.dict.dict_val); // malloced by xdr
+ }
+ GF_FREE(frame->cookie);
+ GLUSTERD_STACK_DESTROY(((call_frame_t *)myframe));
+ return ret;
}
-int
-glusterd_volume_heal_use_rsp_dict (dict_t *rsp_dict)
+int32_t
+glusterd_stage_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- int ret = 0;
- dict_t *ctx_dict = NULL;
- glusterd_op_t op = GD_OP_NONE;
-
- GF_ASSERT (rsp_dict);
-
- op = glusterd_op_get_op ();
- GF_ASSERT (GD_OP_HEAL_VOLUME == op);
-
- ctx_dict = glusterd_op_get_ctx (op);
-
- if (!ctx_dict)
- goto out;
- dict_copy (rsp_dict, ctx_dict);
-out:
- return ret;
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ __glusterd_stage_op_cbk);
}
int32_t
-glusterd3_1_commit_op_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+__glusterd_commit_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gd1_mgmt_commit_op_rsp rsp = {{0},};
- int ret = -1;
- int32_t op_ret = -1;
- glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
- glusterd_peerinfo_t *peerinfo = NULL;
- dict_t *dict = NULL;
- char err_str[2048] = {0};
- char *peer_str = NULL;
-
-
- GF_ASSERT (req);
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- /* use standard allocation because to keep uniformity
- in freeing it */
- rsp.op_errstr = strdup ("error");
- event_type = GD_OP_EVENT_RCVD_RJT;
- goto out;
- }
-
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_commit_op_rsp);
+ gd1_mgmt_commit_op_rsp rsp = {
+ {0},
+ };
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ dict_t *dict = NULL;
+ char *peer_str = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_op_info_t txn_op_info = {
+ {0},
+ };
+ call_frame_t *frame = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(myframe);
+
+ frame = myframe;
+ txn_id = frame->cookie;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ /* use standard allocation because to keep uniformity
+ in freeing it */
+ rsp.op_errstr = strdup("error");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_commit_op_rsp);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RES_DECODE_FAIL,
+ "Failed to decode commit "
+ "response received from peer");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ /* use standard allocation because to keep uniformity
+ in freeing it */
+ rsp.op_errstr = strdup(
+ "Failed to decode commit response "
+ "received from peer.");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ if (rsp.dict.dict_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(rsp.dict.dict_val, rsp.dict.dict_len, &dict);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "xdr decoding error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- /* use standard allocation because to keep uniformity
- in freeing it */
- rsp.op_errstr = strdup ("xdr decoding error");
- event_type = GD_OP_EVENT_RCVD_RJT;
- goto out;
- }
-
- if (rsp.dict.dict_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
-
- ret = dict_unserialize (rsp.dict.dict_val,
- rsp.dict.dict_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize rsp-buffer to dictionary");
- event_type = GD_OP_EVENT_RCVD_RJT;
- goto out;
- } else {
- dict->extra_stdfree = rsp.dict.dict_val;
- }
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize rsp-buffer to dictionary");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ } else {
+ dict->extra_stdfree = rsp.dict.dict_val;
+ }
+ }
+
+ op_ret = rsp.op_ret;
+
+ if (op_ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMIT_FROM_UUID_REJCT,
+ "Received commit RJT from uuid: %s", uuid_utoa(rsp.uuid));
+ } else {
+ gf_msg_debug(this->name, 0, "Received commit ACC from uuid: %s",
+ uuid_utoa(rsp.uuid));
+ }
+
+ ret = glusterd_get_txn_opinfo(txn_id, &txn_op_info);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_GET_FAIL,
+ "Failed to get txn_op_info "
+ "for txn_id = %s",
+ uuid_utoa(*txn_id));
+ }
+
+ RCU_READ_LOCK;
+ peerinfo = glusterd_peerinfo_find(rsp.uuid, NULL);
+ if (peerinfo == NULL) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_RESP_FROM_UNKNOWN_PEER,
+ "Commit response for "
+ "'Volume %s' received from unknown peer: %s",
+ gd_op_list[opinfo.op], uuid_utoa(rsp.uuid));
+ }
+
+ if (op_ret) {
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ opinfo.op_ret = op_ret;
+ if (strcmp("", rsp.op_errstr)) {
+ opinfo.op_errstr = gf_strdup(rsp.op_errstr);
+ } else {
+ if (peerinfo)
+ peer_str = peerinfo->hostname;
+ else
+ peer_str = uuid_utoa(rsp.uuid);
+ char err_str[2048];
+ snprintf(err_str, sizeof(err_str), OPERRSTR_COMMIT_FAIL, peer_str);
+ opinfo.op_errstr = gf_strdup(err_str);
}
-
- op_ret = rsp.op_ret;
-
- gf_log ("glusterd", GF_LOG_INFO,
- "Received %s from uuid: %s",
- (op_ret)?"RJT":"ACC", uuid_utoa (rsp.uuid));
-
- ret = glusterd_friend_find (rsp.uuid, NULL, &peerinfo);
-
- if (ret) {
- gf_log ("", GF_LOG_CRITICAL, "Commit response received from "
- "unknown peer: %s", uuid_utoa (rsp.uuid));
+ if (!opinfo.op_errstr) {
+ goto unlock;
}
+ } else {
+ event_type = GD_OP_EVENT_RCVD_ACC;
+ GF_ASSERT(rsp.op == txn_op_info.op);
- if (op_ret) {
- event_type = GD_OP_EVENT_RCVD_RJT;
- opinfo.op_ret = op_ret;
- if (strcmp ("", rsp.op_errstr)) {
- opinfo.op_errstr = gf_strdup(rsp.op_errstr);
- } else {
- if (peerinfo)
- peer_str = peerinfo->hostname;
- else
- peer_str = uuid_utoa (rsp.uuid);
- snprintf (err_str, sizeof (err_str), "Operation failed "
- "on %s", peer_str);
- opinfo.op_errstr = gf_strdup (err_str);
- }
- if (!opinfo.op_errstr) {
- gf_log ("", GF_LOG_ERROR, "memory allocation failed");
- ret = -1;
- goto out;
- }
- } else {
- event_type = GD_OP_EVENT_RCVD_ACC;
- switch (rsp.op) {
- case GD_OP_REPLACE_BRICK:
- ret = glusterd_rb_use_rsp_dict (dict);
- if (ret)
- goto out;
- break;
-
- case GD_OP_SYNC_VOLUME:
- ret = glusterd_sync_use_rsp_dict (dict);
- if (ret)
- goto out;
- break;
-
- case GD_OP_PROFILE_VOLUME:
- ret = glusterd_profile_volume_use_rsp_dict (dict);
- if (ret)
- goto out;
- break;
-
- case GD_OP_GSYNC_SET:
- ret = glusterd_gsync_use_rsp_dict (dict, rsp.op_errstr);
- if (ret)
- goto out;
- break;
-
- case GD_OP_STATUS_VOLUME:
- ret = glusterd_volume_status_use_rsp_dict (dict);
- if (ret)
- goto out;
- break;
-
- case GD_OP_REBALANCE:
- case GD_OP_DEFRAG_BRICK_VOLUME:
- ret = glusterd_volume_rebalance_use_rsp_dict (dict);
- if (ret)
- goto out;
+ switch (rsp.op) {
+ case GD_OP_PROFILE_VOLUME:
+ ret = glusterd_profile_volume_use_rsp_dict(txn_op_info.op_ctx,
+ dict);
+ if (ret)
+ goto unlock;
break;
- case GD_OP_HEAL_VOLUME:
- ret = glusterd_volume_heal_use_rsp_dict (dict);
- if (ret)
- goto out;
-
+ case GD_OP_REBALANCE:
+ case GD_OP_DEFRAG_BRICK_VOLUME:
+ ret = glusterd_volume_rebalance_use_rsp_dict(txn_op_info.op_ctx,
+ dict);
+ if (ret)
+ goto unlock;
break;
- default:
+ default:
break;
- }
}
+ }
+unlock:
+ RCU_READ_UNLOCK;
out:
- ret = glusterd_op_sm_inject_event (event_type, NULL);
-
- if (!ret) {
- glusterd_friend_sm ();
- glusterd_op_sm ();
- }
- if (dict)
- dict_unref (dict);
- if (rsp.op_errstr)
- free (rsp.op_errstr); //malloced by xdr
- GLUSTERD_STACK_DESTROY (((call_frame_t *)myframe));
- return ret;
+ ret = glusterd_set_txn_opinfo(txn_id, &opinfo);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ ret = glusterd_op_sm_inject_event(event_type, txn_id, NULL);
+
+ if (!ret) {
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ }
+
+ if (dict)
+ dict_unref(dict);
+ free(rsp.op_errstr); // malloced by xdr
+ GF_FREE(frame->cookie);
+ GLUSTERD_STACK_DESTROY(((call_frame_t *)myframe));
+ return ret;
}
-
-
int32_t
-glusterd3_1_probe (call_frame_t *frame, xlator_t *this,
- void *data)
+glusterd_commit_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gd1_mgmt_probe_req req = {{0},};
- int ret = 0;
- int port = 0;
- char *hostname = NULL;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_conf_t *priv = NULL;
- dict_t *dict = NULL;
-
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
-
- dict = data;
- priv = this->private;
-
- GF_ASSERT (priv);
- ret = dict_get_str (dict, "hostname", &hostname);
- if (ret)
- goto out;
- ret = dict_get_int32 (dict, "port", &port);
- if (ret)
- port = GF_DEFAULT_BASE_PORT;
-
- ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo));
- if (ret)
- goto out;
-
- uuid_copy (req.uuid, MY_UUID);
- req.hostname = gf_strdup (hostname);
- req.port = port;
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ __glusterd_commit_op_cbk);
+}
- ret = glusterd_submit_request (peerinfo->rpc, &req, frame, peerinfo->peer,
- GLUSTERD_PROBE_QUERY,
- NULL, this, glusterd3_1_probe_cbk,
- (xdrproc_t)xdr_gd1_mgmt_probe_req);
+int32_t
+glusterd_rpc_probe(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gd1_mgmt_probe_req req = {
+ {0},
+ };
+ int ret = 0;
+ int port = 0;
+ char *hostname = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *dict = NULL;
+
+ if (!frame || !this || !data) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ dict = data;
+ priv = this->private;
+
+ GF_ASSERT(priv);
+ ret = dict_get_strn(dict, "hostname", SLEN("hostname"), &hostname);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=hostname", NULL);
+ goto out;
+ }
+ ret = dict_get_int32n(dict, "port", SLEN("port"), &port);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_DEBUG, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=port", NULL);
+ port = GF_DEFAULT_BASE_PORT;
+ }
+
+ ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo));
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=peerinfo", NULL);
+ goto out;
+ }
+
+ gf_uuid_copy(req.uuid, MY_UUID);
+ req.hostname = gf_strdup(hostname);
+ req.port = port;
+
+ ret = glusterd_submit_request(
+ peerinfo->rpc, &req, frame, peerinfo->peer, GLUSTERD_PROBE_QUERY, NULL,
+ this, glusterd_probe_cbk, (xdrproc_t)xdr_gd1_mgmt_probe_req);
out:
- if (req.hostname)
- GF_FREE (req.hostname);
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ GF_FREE(req.hostname);
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
}
-
int32_t
-glusterd3_1_friend_add (call_frame_t *frame, xlator_t *this,
- void *data)
+glusterd_rpc_friend_add(call_frame_t *frame, xlator_t *this, void *data)
{
- gd1_mgmt_friend_req req = {{0},};
- int ret = 0;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_conf_t *priv = NULL;
- glusterd_friend_sm_event_t *event = NULL;
- dict_t *vols = NULL;
-
-
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
+ gd1_mgmt_friend_req req = {
+ {0},
+ };
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+ dict_t *peer_data = NULL;
+
+ if (!frame || !this || !data) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ event = data;
+ priv = this->private;
+
+ GF_ASSERT(priv);
+
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
+ if (!peerinfo) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
+ "Could not find peer %s(%s)", event->peername,
+ uuid_utoa(event->peerid));
+ goto out;
+ }
+
+ req.hostname = gf_strdup(peerinfo->hostname);
+ req.port = peerinfo->port;
+
+ RCU_READ_UNLOCK;
+
+ gf_uuid_copy(req.uuid, MY_UUID);
+
+ peer_data = dict_new();
+ if (!peer_data) {
+ gf_smsg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ errno = ENOMEM;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(peer_data, "hostname_in_cluster",
+ peerinfo->hostname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Unable to add hostname of the peer");
+ goto out;
+ }
+
+ if (priv->op_version >= GD_OP_VERSION_3_6_0) {
+ ret = glusterd_add_missed_snaps_to_export_dict(peer_data);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
+ "Unable to add list of missed snapshots "
+ "in the peer_data dict for handshake");
+ goto out;
}
- event = data;
- priv = this->private;
-
- GF_ASSERT (priv);
-
- peerinfo = event->peerinfo;
-
- ret = glusterd_build_volume_dict (&vols);
- if (ret)
- goto out;
-
- uuid_copy (req.uuid, MY_UUID);
- req.hostname = peerinfo->hostname;
- req.port = peerinfo->port;
-
- ret = dict_allocate_and_serialize (vols, &req.vols.vols_val,
- (size_t *)&req.vols.vols_len);
- if (ret)
- goto out;
-
- ret = glusterd_submit_request (peerinfo->rpc, &req, frame, peerinfo->peer,
- GLUSTERD_FRIEND_ADD,
- NULL, this, glusterd3_1_friend_add_cbk,
- (xdrproc_t)xdr_gd1_mgmt_friend_req);
+ ret = glusterd_add_snapshots_to_export_dict(peer_data);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_LIST_SET_FAIL,
+ "Unable to add list of snapshots "
+ "in the peer_data dict for handshake");
+ goto out;
+ }
+ }
+
+ /* Don't add any key-value in peer_data dictionary after call this function
+ */
+ ret = glusterd_add_volumes_to_export_dict(peer_data, &req.vols.vols_val,
+ &req.vols.vols_len);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to add list of volumes "
+ "in the peer_data dict for handshake");
+ goto out;
+ }
+
+ if (!req.vols.vols_len) {
+ ret = dict_allocate_and_serialize(peer_data, &req.vols.vols_val,
+ &req.vols.vols_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+ }
+ ret = glusterd_submit_request(
+ peerinfo->rpc, &req, frame, peerinfo->peer, GLUSTERD_FRIEND_ADD, NULL,
+ this, glusterd_friend_add_cbk, (xdrproc_t)xdr_gd1_mgmt_friend_req);
out:
- if (req.vols.vols_val)
- GF_FREE (req.vols.vols_val);
+ GF_FREE(req.vols.vols_val);
+ GF_FREE(req.hostname);
- if (vols)
- dict_unref (vols);
+ if (peer_data)
+ dict_unref(peer_data);
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
}
int32_t
-glusterd3_1_friend_remove (call_frame_t *frame, xlator_t *this,
- void *data)
+glusterd_rpc_friend_remove(call_frame_t *frame, xlator_t *this, void *data)
{
- gd1_mgmt_friend_req req = {{0},};
- int ret = 0;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_conf_t *priv = NULL;
- glusterd_friend_sm_event_t *event = NULL;
-
- if (!frame || !this || !data) {
- ret = -1;
- goto out;
- }
+ gd1_mgmt_friend_req req = {
+ {0},
+ };
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
+
+ if (!frame || !this || !data) {
+ ret = -1;
+ goto out;
+ }
+
+ event = data;
+ priv = this->private;
+
+ GF_ASSERT(priv);
+
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
+ if (!peerinfo) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
+ "Could not find peer %s(%s)", event->peername,
+ uuid_utoa(event->peerid));
+ goto out;
+ }
+
+ gf_uuid_copy(req.uuid, MY_UUID);
+ req.hostname = gf_strdup(peerinfo->hostname);
+ req.port = peerinfo->port;
+
+ ret = glusterd_submit_request(peerinfo->rpc, &req, frame, peerinfo->peer,
+ GLUSTERD_FRIEND_REMOVE, NULL, this,
+ glusterd_friend_remove_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_friend_req);
+
+ RCU_READ_UNLOCK;
+out:
+ GF_FREE(req.hostname);
- event = data;
- priv = this->private;
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
+}
- GF_ASSERT (priv);
+int32_t
+glusterd_rpc_friend_update(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gd1_mgmt_friend_update req = {
+ {0},
+ };
+ int ret = 0;
+ glusterd_conf_t *priv = NULL;
+ dict_t *friends = NULL;
+ call_frame_t *dummy_frame = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ friends = data;
+ if (!friends)
+ goto out;
+
+ ret = dict_get_ptr(friends, "peerinfo", VOID(&peerinfo));
+ if (ret)
+ goto out;
+ /* Don't want to send the pointer over */
+ dict_deln(friends, "peerinfo", SLEN("peerinfo"));
+
+ ret = dict_allocate_and_serialize(friends, &req.friends.friends_val,
+ &req.friends.friends_len);
+ if (ret)
+ goto out;
+
+ gf_uuid_copy(req.uuid, MY_UUID);
+
+ dummy_frame = create_frame(this, this->ctx->pool);
+ ret = glusterd_submit_request(peerinfo->rpc, &req, dummy_frame,
+ peerinfo->peer, GLUSTERD_FRIEND_UPDATE, NULL,
+ this, glusterd_friend_update_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_friend_update);
- peerinfo = event->peerinfo;
+out:
+ GF_FREE(req.friends.friends_val);
- uuid_copy (req.uuid, MY_UUID);
- req.hostname = peerinfo->hostname;
- req.port = peerinfo->port;
- ret = glusterd_submit_request (peerinfo->rpc, &req, frame, peerinfo->peer,
- GLUSTERD_FRIEND_REMOVE, NULL,
- this, glusterd3_1_friend_remove_cbk,
- (xdrproc_t)xdr_gd1_mgmt_friend_req);
+ if (ret && dummy_frame)
+ STACK_DESTROY(dummy_frame->root);
-out:
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
}
-
int32_t
-glusterd3_1_friend_update (call_frame_t *frame, xlator_t *this,
- void *data)
+glusterd_cluster_lock(call_frame_t *frame, xlator_t *this, void *data)
{
- gd1_mgmt_friend_update req = {{0},};
- int ret = 0;
- glusterd_conf_t *priv = NULL;
- dict_t *friends = NULL;
- char *dict_buf = NULL;
- size_t len = -1;
- call_frame_t *dummy_frame = NULL;
- glusterd_peerinfo_t *peerinfo = NULL;
-
- priv = this->private;
- GF_ASSERT (priv);
-
- friends = data;
- if (!friends)
- goto out;
+ gd1_mgmt_cluster_lock_req req = {
+ {0},
+ };
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ call_frame_t *dummy_frame = NULL;
- ret = dict_get_ptr (friends, "peerinfo", VOID(&peerinfo));
- if (ret)
- goto out;
+ if (!this)
+ goto out;
- ret = dict_allocate_and_serialize (friends, &dict_buf, (size_t *)&len);
- if (ret)
- goto out;
+ peerinfo = data;
- req.friends.friends_val = dict_buf;
- req.friends.friends_len = len;
+ priv = this->private;
+ GF_ASSERT(priv);
- uuid_copy (req.uuid, MY_UUID);
+ glusterd_get_uuid(&req.uuid);
- dummy_frame = create_frame (this, this->ctx->pool);
- ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
- peerinfo->peer,
- GLUSTERD_FRIEND_UPDATE, NULL,
- this, glusterd3_1_friend_update_cbk,
- (xdrproc_t)xdr_gd1_mgmt_friend_update);
+ dummy_frame = create_frame(this, this->ctx->pool);
+ if (!dummy_frame)
+ goto out;
+ ret = glusterd_submit_request(peerinfo->rpc, &req, dummy_frame,
+ peerinfo->mgmt, GLUSTERD_MGMT_CLUSTER_LOCK,
+ NULL, this, glusterd_cluster_lock_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_lock_req);
out:
- if (req.friends.friends_val)
- GF_FREE (req.friends.friends_val);
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (ret && dummy_frame)
+ STACK_DESTROY(dummy_frame->root);
+ return ret;
}
int32_t
-glusterd3_1_cluster_lock (call_frame_t *frame, xlator_t *this,
- void *data)
+glusterd_mgmt_v3_lock_peers(call_frame_t *frame, xlator_t *this, void *data)
{
- gd1_mgmt_cluster_lock_req req = {{0},};
- int ret = -1;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_conf_t *priv = NULL;
- call_frame_t *dummy_frame = NULL;
-
- if (!this)
- goto out;
-
- peerinfo = data;
-
- priv = this->private;
- GF_ASSERT (priv);
-
- glusterd_get_uuid (&req.uuid);
-
- dummy_frame = create_frame (this, this->ctx->pool);
- if (!dummy_frame)
- goto out;
-
- ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
- peerinfo->mgmt, GLUSTERD_MGMT_CLUSTER_LOCK,
- NULL,
- this, glusterd3_1_cluster_lock_cbk,
- (xdrproc_t)xdr_gd1_mgmt_cluster_lock_req);
+ gd1_mgmt_v3_lock_req req = {
+ {0},
+ };
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *dict = NULL;
+ uuid_t *txn_id = NULL;
+
+ if (!this)
+ goto out;
+
+ dict = data;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo));
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=peerinfo", NULL);
+ goto out;
+ }
+
+ // peerinfo should not be in payload
+ dict_deln(dict, "peerinfo", SLEN("peerinfo"));
+
+ glusterd_get_uuid(&req.uuid);
+
+ ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ /* Sending valid transaction ID to peers */
+ ret = dict_get_bin(dict, "transaction_id", (void **)&txn_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_ID_GET_FAIL,
+ "Failed to get transaction id.");
+ goto out;
+ } else {
+ gf_msg_debug(this->name, 0, "Transaction_id = %s", uuid_utoa(*txn_id));
+ gf_uuid_copy(req.txn_id, *txn_id);
+ }
+
+ if (!frame)
+ frame = create_frame(this, this->ctx->pool);
+
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+ frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!frame->cookie) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ ret = -1;
+ goto out;
+ }
+ gf_uuid_copy(frame->cookie, req.txn_id);
+
+ ret = glusterd_submit_request(peerinfo->rpc, &req, frame, peerinfo->mgmt_v3,
+ GLUSTERD_MGMT_V3_LOCK, NULL, this,
+ glusterd_mgmt_v3_lock_peers_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_v3_lock_req);
out:
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ if (dict)
+ dict_unref(dict);
+ if (req.dict.dict_val)
+ GF_FREE(req.dict.dict_val);
+ return ret;
}
int32_t
-glusterd3_1_cluster_unlock (call_frame_t *frame, xlator_t *this,
- void *data)
+glusterd_mgmt_v3_unlock_peers(call_frame_t *frame, xlator_t *this, void *data)
{
- gd1_mgmt_cluster_lock_req req = {{0},};
- int ret = -1;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_conf_t *priv = NULL;
- call_frame_t *dummy_frame = NULL;
-
- if (!this ) {
- ret = -1;
- goto out;
- }
- peerinfo = data;
- priv = this->private;
- GF_ASSERT (priv);
-
- glusterd_get_uuid (&req.uuid);
-
- dummy_frame = create_frame (this, this->ctx->pool);
- if (!dummy_frame)
- goto out;
-
- ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
- peerinfo->mgmt, GLUSTERD_MGMT_CLUSTER_UNLOCK,
- NULL,
- this, glusterd3_1_cluster_unlock_cbk,
- (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_req);
+ gd1_mgmt_v3_unlock_req req = {
+ {0},
+ };
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *dict = NULL;
+ uuid_t *txn_id = NULL;
+
+ if (!this)
+ goto out;
+
+ dict = data;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo));
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=peerinfo", NULL);
+ goto out;
+ }
+
+ // peerinfo should not be in payload
+ dict_deln(dict, "peerinfo", SLEN("peerinfo"));
+
+ glusterd_get_uuid(&req.uuid);
+
+ ret = dict_allocate_and_serialize(dict, &req.dict.dict_val,
+ &req.dict.dict_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+
+ /* Sending valid transaction ID to peers */
+ ret = dict_get_bin(dict, "transaction_id", (void **)&txn_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_ID_GET_FAIL,
+ "Failed to get transaction id.");
+ goto out;
+ } else {
+ gf_msg_debug(this->name, 0, "Transaction_id = %s", uuid_utoa(*txn_id));
+ gf_uuid_copy(req.txn_id, *txn_id);
+ }
+
+ if (!frame)
+ frame = create_frame(this, this->ctx->pool);
+
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+ frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!frame->cookie) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ ret = -1;
+ goto out;
+ }
+ gf_uuid_copy(frame->cookie, req.txn_id);
+
+ ret = glusterd_submit_request(peerinfo->rpc, &req, frame, peerinfo->mgmt_v3,
+ GLUSTERD_MGMT_V3_UNLOCK, NULL, this,
+ glusterd_mgmt_v3_unlock_peers_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_v3_unlock_req);
out:
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ if (dict)
+ dict_unref(dict);
+
+ if (req.dict.dict_val)
+ GF_FREE(req.dict.dict_val);
+ return ret;
}
int32_t
-glusterd3_1_stage_op (call_frame_t *frame, xlator_t *this,
- void *data)
+glusterd_cluster_unlock(call_frame_t *frame, xlator_t *this, void *data)
{
- gd1_mgmt_stage_op_req req = {{0,},};
- int ret = -1;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_conf_t *priv = NULL;
- call_frame_t *dummy_frame = NULL;
- dict_t *dict = NULL;
- gf_boolean_t is_alloc = _gf_true;
-
- if (!this) {
- goto out;
- }
-
- dict = data;
-
- priv = this->private;
- GF_ASSERT (priv);
-
- ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo));
- if (ret)
- goto out;
-
- //peerinfo should not be in payload
- dict_del (dict, "peerinfo");
-
- glusterd_get_uuid (&req.uuid);
- req.op = glusterd_op_get_op ();
-
- ret = dict_allocate_and_serialize (dict, &req.buf.buf_val,
- (size_t *)&req.buf.buf_len);
- if (ret)
- goto out;
+ gd1_mgmt_cluster_lock_req req = {
+ {0},
+ };
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ call_frame_t *dummy_frame = NULL;
+
+ if (!this) {
+ ret = -1;
+ goto out;
+ }
+ peerinfo = data;
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ glusterd_get_uuid(&req.uuid);
+
+ dummy_frame = create_frame(this, this->ctx->pool);
+ if (!dummy_frame)
+ goto out;
+
+ ret = glusterd_submit_request(peerinfo->rpc, &req, dummy_frame,
+ peerinfo->mgmt, GLUSTERD_MGMT_CLUSTER_UNLOCK,
+ NULL, this, glusterd_cluster_unlock_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_cluster_unlock_req);
+out:
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ if (ret && dummy_frame)
+ STACK_DESTROY(dummy_frame->root);
- dummy_frame = create_frame (this, this->ctx->pool);
- if (!dummy_frame)
- goto out;
+ return ret;
+}
- ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
- peerinfo->mgmt, GLUSTERD_MGMT_STAGE_OP,
- NULL,
- this, glusterd3_1_stage_op_cbk,
- (xdrproc_t)xdr_gd1_mgmt_stage_op_req);
+int32_t
+glusterd_stage_op(call_frame_t *frame, xlator_t *this, void *data)
+{
+ gd1_mgmt_stage_op_req req = {
+ {
+ 0,
+ },
+ };
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *dict = NULL;
+ uuid_t *txn_id = NULL;
+
+ if (!this) {
+ goto out;
+ }
+
+ dict = data;
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo));
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=peerinfo", NULL);
+ goto out;
+ }
+
+ // peerinfo should not be in payload
+ dict_deln(dict, "peerinfo", SLEN("peerinfo"));
+
+ glusterd_get_uuid(&req.uuid);
+ req.op = glusterd_op_get_op();
+
+ ret = dict_allocate_and_serialize(dict, &req.buf.buf_val, &req.buf.buf_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+ /* Sending valid transaction ID to peers */
+ ret = dict_get_bin(dict, "transaction_id", (void **)&txn_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_ID_GET_FAIL,
+ "Failed to get transaction id.");
+ goto out;
+ } else {
+ gf_msg_debug(this->name, 0, "Transaction_id = %s", uuid_utoa(*txn_id));
+ }
+
+ if (!frame)
+ frame = create_frame(this, this->ctx->pool);
+
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+ frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!frame->cookie) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ ret = -1;
+ goto out;
+ }
+ gf_uuid_copy(frame->cookie, *txn_id);
+
+ ret = glusterd_submit_request(peerinfo->rpc, &req, frame, peerinfo->mgmt,
+ GLUSTERD_MGMT_STAGE_OP, NULL, this,
+ glusterd_stage_op_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_stage_op_req);
out:
- if ((_gf_true == is_alloc) && req.buf.buf_val)
- GF_FREE (req.buf.buf_val);
+ if (req.buf.buf_val)
+ GF_FREE(req.buf.buf_val);
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
}
int32_t
-glusterd3_1_commit_op (call_frame_t *frame, xlator_t *this,
- void *data)
+glusterd_commit_op(call_frame_t *frame, xlator_t *this, void *data)
{
- gd1_mgmt_commit_op_req req = {{0,},};
- int ret = -1;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_conf_t *priv = NULL;
- call_frame_t *dummy_frame = NULL;
- dict_t *dict = NULL;
- gf_boolean_t is_alloc = _gf_true;
-
- if (!this) {
- goto out;
- }
-
- dict = data;
- priv = this->private;
- GF_ASSERT (priv);
-
- ret = dict_get_ptr (dict, "peerinfo", VOID (&peerinfo));
- if (ret)
- goto out;
-
- //peerinfo should not be in payload
- dict_del (dict, "peerinfo");
-
- glusterd_get_uuid (&req.uuid);
- req.op = glusterd_op_get_op ();
-
- ret = dict_allocate_and_serialize (dict, &req.buf.buf_val,
- (size_t *)&req.buf.buf_len);
- if (ret)
- goto out;
-
- dummy_frame = create_frame (this, this->ctx->pool);
- if (!dummy_frame)
- goto out;
-
- ret = glusterd_submit_request (peerinfo->rpc, &req, dummy_frame,
- peerinfo->mgmt, GLUSTERD_MGMT_COMMIT_OP,
- NULL,
- this, glusterd3_1_commit_op_cbk,
- (xdrproc_t)xdr_gd1_mgmt_commit_op_req);
+ gd1_mgmt_commit_op_req req = {
+ {
+ 0,
+ },
+ };
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *dict = NULL;
+ uuid_t *txn_id = NULL;
+
+ if (!this) {
+ goto out;
+ }
+
+ dict = data;
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_ptr(dict, "peerinfo", VOID(&peerinfo));
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=peerinfo", NULL);
+ goto out;
+ }
+
+ // peerinfo should not be in payload
+ dict_deln(dict, "peerinfo", SLEN("peerinfo"));
+
+ glusterd_get_uuid(&req.uuid);
+ req.op = glusterd_op_get_op();
+
+ ret = dict_allocate_and_serialize(dict, &req.buf.buf_val, &req.buf.buf_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto out;
+ }
+ /* Sending valid transaction ID to peers */
+ ret = dict_get_bin(dict, "transaction_id", (void **)&txn_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_ID_GET_FAIL,
+ "Failed to get transaction id.");
+ goto out;
+ } else {
+ gf_msg_debug(this->name, 0, "Transaction_id = %s", uuid_utoa(*txn_id));
+ }
+
+ if (!frame)
+ frame = create_frame(this, this->ctx->pool);
+
+ if (!frame) {
+ ret = -1;
+ goto out;
+ }
+ frame->cookie = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!frame->cookie) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ ret = -1;
+ goto out;
+ }
+ gf_uuid_copy(frame->cookie, *txn_id);
+
+ ret = glusterd_submit_request(peerinfo->rpc, &req, frame, peerinfo->mgmt,
+ GLUSTERD_MGMT_COMMIT_OP, NULL, this,
+ glusterd_commit_op_cbk,
+ (xdrproc_t)xdr_gd1_mgmt_commit_op_req);
out:
- if ((_gf_true == is_alloc) && req.buf.buf_val)
- GF_FREE (req.buf.buf_val);
+ if (req.buf.buf_val)
+ GF_FREE(req.buf.buf_val);
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
}
int32_t
-glusterd3_1_brick_op_cbk (struct rpc_req *req, struct iovec *iov,
- int count, void *myframe)
+__glusterd_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
{
- gd1_mgmt_brick_op_rsp rsp = {0};
- int ret = -1;
- int32_t op_ret = -1;
- glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
- call_frame_t *frame = NULL;
- glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL;
- dict_t *dict = NULL;
- int index = 0;
- glusterd_req_ctx_t *req_ctx = NULL;
- glusterd_pending_node_t *node = NULL;
-
- GF_ASSERT (req);
- frame = myframe;
- req_ctx = frame->local;
-
- if (-1 == req->rpc_status) {
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- /* use standard allocation because to keep uniformity
- in freeing it */
- rsp.op_errstr = strdup ("error");
- event_type = GD_OP_EVENT_RCVD_RJT;
- goto out;
- }
-
- ret = xdr_to_generic (*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ gd1_mgmt_brick_op_rsp rsp = {0};
+ int ret = -1;
+ int32_t op_ret = -1;
+ glusterd_op_sm_event_type_t event_type = GD_OP_EVENT_NONE;
+ call_frame_t *frame = NULL;
+ glusterd_op_brick_rsp_ctx_t *ev_ctx = NULL;
+ dict_t *dict = NULL;
+ int index = 0;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ glusterd_pending_node_t *node = NULL;
+ xlator_t *this = NULL;
+ uuid_t *txn_id = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(req);
+
+ txn_id = &priv->global_txn_id;
+ frame = myframe;
+ req_ctx = frame->local;
+
+ if (-1 == req->rpc_status) {
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ /* use standard allocation because to keep uniformity
+ in freeing it */
+ rsp.op_errstr = strdup("error");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gd1_mgmt_brick_op_rsp);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RES_DECODE_FAIL,
+ "Failed to decode brick op "
+ "response received");
+ rsp.op_ret = -1;
+ rsp.op_errno = EINVAL;
+ rsp.op_errstr = strdup("Unable to decode brick op response");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ }
+
+ if (rsp.output.output_len) {
+ /* Unserialize the dictionary */
+ dict = dict_new();
+
+ ret = dict_unserialize(rsp.output.output_val, rsp.output.output_len,
+ &dict);
if (ret < 0) {
- gf_log ("", GF_LOG_ERROR, "error");
- rsp.op_ret = -1;
- rsp.op_errno = EINVAL;
- rsp.op_errstr = strdup ("Unable to decode brick op response");
- event_type = GD_OP_EVENT_RCVD_RJT;
- goto out;
- }
-
- if (rsp.output.output_len) {
- /* Unserialize the dictionary */
- dict = dict_new ();
-
- ret = dict_unserialize (rsp.output.output_val,
- rsp.output.output_len,
- &dict);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "failed to "
- "unserialize rsp-buffer to dictionary");
- event_type = GD_OP_EVENT_RCVD_RJT;
- goto out;
- } else {
- dict->extra_stdfree = rsp.output.output_val;
- }
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "Failed to "
+ "unserialize rsp-buffer to dictionary");
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
+ } else {
+ dict->extra_stdfree = rsp.output.output_val;
}
+ }
- op_ret = rsp.op_ret;
+ op_ret = rsp.op_ret;
- /* Add index to rsp_dict for GD_OP_STATUS_VOLUME */
- if (GD_OP_STATUS_VOLUME == req_ctx->op) {
- node = frame->cookie;
- index = node->index;
- ret = dict_set_int32 (dict, "index", index);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Error setting index on brick status rsp dict");
- rsp.op_ret = -1;
- event_type = GD_OP_EVENT_RCVD_RJT;
- goto out;
- }
+ /* Add index to rsp_dict for GD_OP_STATUS_VOLUME */
+ if (GD_OP_STATUS_VOLUME == req_ctx->op) {
+ node = frame->cookie;
+ index = node->index;
+ ret = dict_set_int32n(dict, "index", SLEN("index"), index);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Error setting index on brick status rsp dict");
+ rsp.op_ret = -1;
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ goto out;
}
+ }
out:
- ev_ctx = GF_CALLOC (1, sizeof (*ev_ctx), gf_gld_mt_brick_rsp_ctx_t);
- GF_ASSERT (ev_ctx);
+
+ if (req_ctx && req_ctx->dict) {
+ ret = dict_get_bin(req_ctx->dict, "transaction_id", (void **)&txn_id);
+ gf_msg_debug(this->name, -ret, "transaction ID = %s",
+ uuid_utoa(*txn_id));
+ }
+
+ ev_ctx = GF_CALLOC(1, sizeof(*ev_ctx), gf_gld_mt_brick_rsp_ctx_t);
+ if (ev_ctx) {
if (op_ret) {
- event_type = GD_OP_EVENT_RCVD_RJT;
- ev_ctx->op_ret = op_ret;
- ev_ctx->op_errstr = gf_strdup(rsp.op_errstr);
+ event_type = GD_OP_EVENT_RCVD_RJT;
+ ev_ctx->op_ret = op_ret;
+ ev_ctx->op_errstr = gf_strdup(rsp.op_errstr);
} else {
- event_type = GD_OP_EVENT_RCVD_ACC;
+ event_type = GD_OP_EVENT_RCVD_ACC;
}
ev_ctx->pending_node = frame->cookie;
- ev_ctx->rsp_dict = dict;
+ ev_ctx->rsp_dict = dict;
ev_ctx->commit_ctx = frame->local;
- ret = glusterd_op_sm_inject_event (event_type, ev_ctx);
- if (!ret) {
- glusterd_friend_sm ();
- glusterd_op_sm ();
+ ret = glusterd_op_sm_inject_event(event_type, txn_id, ev_ctx);
+ }
+ if (!ret) {
+ glusterd_friend_sm();
+ glusterd_op_sm();
+ }
+
+ if (ret) {
+ if (dict) {
+ dict_unref(dict);
+ }
+ if (ev_ctx) {
+ GF_FREE(ev_ctx->op_errstr);
+ GF_FREE(ev_ctx);
}
+ }
+ free(rsp.op_errstr); // malloced by xdr
+ GLUSTERD_STACK_DESTROY(frame);
+ return ret;
+}
- if (ret && dict)
- dict_unref (dict);
- if (rsp.op_errstr)
- free (rsp.op_errstr); //malloced by xdr
- GLUSTERD_STACK_DESTROY (frame);
- return ret;
+int32_t
+glusterd_brick_op_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *myframe)
+{
+ return glusterd_big_locked_cbk(req, iov, count, myframe,
+ __glusterd_brick_op_cbk);
}
int32_t
-glusterd3_1_brick_op (call_frame_t *frame, xlator_t *this,
- void *data)
+glusterd_brick_op(call_frame_t *frame, xlator_t *this, void *data)
{
- gd1_mgmt_brick_op_req *req = NULL;
- int ret = 0;
- glusterd_conf_t *priv = NULL;
- call_frame_t *dummy_frame = NULL;
- char *op_errstr = NULL;
- int pending_bricks = 0;
- glusterd_pending_node_t *pending_node;
- glusterd_req_ctx_t *req_ctx = NULL;
- struct rpc_clnt *rpc = NULL;
- dict_t *op_ctx = NULL;
-
- if (!this) {
- ret = -1;
- goto out;
- }
- priv = this->private;
- GF_ASSERT (priv);
+ gd1_mgmt_brick_op_req *req = NULL;
+ int ret = 0;
+ int ret1 = 0;
+ glusterd_conf_t *priv = NULL;
+ call_frame_t *dummy_frame = NULL;
+ char *op_errstr = NULL;
+ int pending_bricks = 0;
+ glusterd_pending_node_t *pending_node;
+ glusterd_req_ctx_t *req_ctx = NULL;
+ struct rpc_clnt *rpc = NULL;
+ dict_t *op_ctx = NULL;
+ uuid_t *txn_id = NULL;
+
+ if (!this) {
+ ret = -1;
+ goto out;
+ }
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ txn_id = &priv->global_txn_id;
+
+ req_ctx = data;
+ GF_ASSERT(req_ctx);
+ CDS_INIT_LIST_HEAD(&opinfo.pending_bricks);
+
+ ret = dict_get_bin(req_ctx->dict, "transaction_id", (void **)&txn_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_SELECT_FAIL,
+ "Could not get transaction ID from dict, global"
+ "transaction ID = %s",
+ uuid_utoa(*txn_id));
+ } else {
+ gf_msg_debug(this->name, 0, "transaction ID = %s", uuid_utoa(*txn_id));
+ }
+ ret = glusterd_op_bricks_select(req_ctx->op, req_ctx->dict, &op_errstr,
+ &opinfo.pending_bricks, NULL);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_SELECT_FAIL,
+ "Failed to select bricks "
+ "while performing brick op during 'Volume %s'",
+ gd_op_list[opinfo.op]);
+ opinfo.op_errstr = op_errstr;
+ goto out;
+ }
+
+ cds_list_for_each_entry(pending_node, &opinfo.pending_bricks, list)
+ {
+ dummy_frame = create_frame(this, this->ctx->pool);
+ if (!dummy_frame)
+ continue;
+
+ if ((pending_node->type == GD_NODE_NFS) ||
+ (pending_node->type == GD_NODE_QUOTAD) ||
+ (pending_node->type == GD_NODE_SNAPD) ||
+ (pending_node->type == GD_NODE_SCRUB) ||
+ ((pending_node->type == GD_NODE_SHD) &&
+ (req_ctx->op == GD_OP_STATUS_VOLUME))) {
+ ret = glusterd_node_op_build_payload(
+ req_ctx->op, (gd1_mgmt_brick_op_req **)&req, req_ctx->dict);
+ } else {
+ ret = glusterd_brick_op_build_payload(
+ req_ctx->op, pending_node->node, (gd1_mgmt_brick_op_req **)&req,
+ req_ctx->dict);
+ }
+ if (ret || !req) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_OP_PAYLOAD_BUILD_FAIL,
+ "Failed to "
+ "build op payload during "
+ "'Volume %s'",
+ gd_op_list[req_ctx->op]);
+ goto out;
+ }
+
+ dummy_frame->local = data;
+ dummy_frame->cookie = pending_node;
+
+ rpc = glusterd_pending_node_get_rpc(pending_node);
+ if (!rpc) {
+ if (pending_node->type == GD_NODE_REBALANCE) {
+ opinfo.brick_pending_count = 0;
+ ret = 0;
+ GF_FREE(req->input.input_val);
+ GF_FREE(req);
+ req = NULL;
+ GLUSTERD_STACK_DESTROY(dummy_frame);
- req_ctx = data;
- GF_ASSERT (req_ctx);
- INIT_LIST_HEAD (&opinfo.pending_bricks);
- ret = glusterd_op_bricks_select (req_ctx->op, req_ctx->dict, &op_errstr);
+ op_ctx = glusterd_op_get_ctx();
+ if (!op_ctx)
+ goto out;
+ glusterd_defrag_volume_node_rsp(req_ctx->dict, NULL, op_ctx);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Brick Op failed");
- opinfo.op_errstr = op_errstr;
goto out;
+ }
+
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RPC_FAILURE,
+ "Brick Op failed "
+ "due to rpc failure.");
+ goto out;
}
- list_for_each_entry (pending_node, &opinfo.pending_bricks, list) {
- dummy_frame = create_frame (this, this->ctx->pool);
- if (!dummy_frame)
- continue;
-
- if ((pending_node->type == GD_NODE_NFS) ||
- ((pending_node->type == GD_NODE_SHD) &&
- (req_ctx->op == GD_OP_STATUS_VOLUME)))
- ret = glusterd_node_op_build_payload
- (req_ctx->op,
- (gd1_mgmt_brick_op_req **)&req,
- req_ctx->dict);
- else
- ret = glusterd_brick_op_build_payload
- (req_ctx->op, pending_node->node,
- (gd1_mgmt_brick_op_req **)&req,
- req_ctx->dict);
+ ret = glusterd_submit_request(
+ rpc, req, dummy_frame, priv->gfs_mgmt, req->op, NULL, this,
+ glusterd_brick_op_cbk, (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ GF_FREE(req->input.input_val);
+ GF_FREE(req);
+ req = NULL;
- if (ret)
- goto out;
-
- dummy_frame->local = data;
- dummy_frame->cookie = pending_node;
-
- rpc = glusterd_pending_node_get_rpc (pending_node);
- if (!rpc) {
- if (pending_node->type == GD_NODE_REBALANCE) {
- opinfo.brick_pending_count = 0;
- ret = 0;
- if (req) {
- if (req->input.input_val)
- GF_FREE (req->input.input_val);
- GF_FREE (req);
- req = NULL;
- }
- GLUSTERD_STACK_DESTROY (dummy_frame);
-
- op_ctx = glusterd_op_get_ctx ();
- if (!op_ctx)
- goto out;
- glusterd_defrag_volume_node_rsp (req_ctx->dict,
- NULL, op_ctx);
-
- goto out;
- }
-
- ret = -1;
- gf_log (this->name, GF_LOG_ERROR, "Brick Op failed "
- "due to rpc failure.");
- goto out;
- }
+ if (!ret)
+ pending_bricks++;
- ret = glusterd_submit_request (rpc, req, dummy_frame,
- priv->gfs_mgmt,
- req->op, NULL,
- this, glusterd3_1_brick_op_cbk,
- (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
- if (req) {
- if (req->input.input_val)
- GF_FREE (req->input.input_val);
- GF_FREE (req);
- req = NULL;
- }
- if (!ret)
- pending_bricks++;
- }
+ glusterd_pending_node_put_rpc(pending_node);
+ }
- gf_log ("glusterd", GF_LOG_DEBUG, "Sent op req to %d bricks",
- pending_bricks);
- opinfo.brick_pending_count = pending_bricks;
+ gf_msg_trace(this->name, 0,
+ "Sent brick op req for operation "
+ "'Volume %s' to %d bricks",
+ gd_op_list[req_ctx->op], pending_bricks);
+ opinfo.brick_pending_count = pending_bricks;
out:
- if (ret) {
- glusterd_op_sm_inject_event (GD_OP_EVENT_RCVD_RJT, data);
- opinfo.op_ret = ret;
- }
- gf_log ("glusterd", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+
+ if (ret)
+ opinfo.op_ret = ret;
+
+ ret1 = glusterd_set_txn_opinfo(txn_id, &opinfo);
+ if (ret1)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_TRANS_OPINFO_SET_FAIL,
+ "Unable to set "
+ "transaction's opinfo");
+
+ if (ret) {
+ glusterd_op_sm_inject_event(GD_OP_EVENT_RCVD_RJT, txn_id, data);
+ opinfo.op_ret = ret;
+ }
+
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
}
struct rpc_clnt_procedure gd_brick_actors[GLUSTERD_BRICK_MAXVALUE] = {
- [GLUSTERD_BRICK_NULL] = {"NULL", NULL },
- [GLUSTERD_BRICK_OP] = {"BRICK_OP", glusterd3_1_brick_op},
+ [GLUSTERD_BRICK_NULL] = {"NULL", NULL},
+ [GLUSTERD_BRICK_OP] = {"BRICK_OP", glusterd_brick_op},
};
struct rpc_clnt_procedure gd_peer_actors[GLUSTERD_FRIEND_MAXVALUE] = {
- [GLUSTERD_FRIEND_NULL] = {"NULL", NULL },
- [GLUSTERD_PROBE_QUERY] = {"PROBE_QUERY", glusterd3_1_probe},
- [GLUSTERD_FRIEND_ADD] = {"FRIEND_ADD", glusterd3_1_friend_add},
- [GLUSTERD_FRIEND_REMOVE] = {"FRIEND_REMOVE", glusterd3_1_friend_remove},
- [GLUSTERD_FRIEND_UPDATE] = {"FRIEND_UPDATE", glusterd3_1_friend_update},
+ [GLUSTERD_FRIEND_NULL] = {"NULL", NULL},
+ [GLUSTERD_PROBE_QUERY] = {"PROBE_QUERY", glusterd_rpc_probe},
+ [GLUSTERD_FRIEND_ADD] = {"FRIEND_ADD", glusterd_rpc_friend_add},
+ [GLUSTERD_FRIEND_REMOVE] = {"FRIEND_REMOVE", glusterd_rpc_friend_remove},
+ [GLUSTERD_FRIEND_UPDATE] = {"FRIEND_UPDATE", glusterd_rpc_friend_update},
};
struct rpc_clnt_procedure gd_mgmt_actors[GLUSTERD_MGMT_MAXVALUE] = {
- [GLUSTERD_MGMT_NULL] = {"NULL", NULL },
- [GLUSTERD_MGMT_CLUSTER_LOCK] = {"CLUSTER_LOCK", glusterd3_1_cluster_lock},
- [GLUSTERD_MGMT_CLUSTER_UNLOCK] = {"CLUSTER_UNLOCK", glusterd3_1_cluster_unlock},
- [GLUSTERD_MGMT_STAGE_OP] = {"STAGE_OP", glusterd3_1_stage_op},
- [GLUSTERD_MGMT_COMMIT_OP] = {"COMMIT_OP", glusterd3_1_commit_op},
+ [GLUSTERD_MGMT_NULL] = {"NULL", NULL},
+ [GLUSTERD_MGMT_CLUSTER_LOCK] = {"CLUSTER_LOCK", glusterd_cluster_lock},
+ [GLUSTERD_MGMT_CLUSTER_UNLOCK] = {"CLUSTER_UNLOCK",
+ glusterd_cluster_unlock},
+ [GLUSTERD_MGMT_STAGE_OP] = {"STAGE_OP", glusterd_stage_op},
+ [GLUSTERD_MGMT_COMMIT_OP] = {"COMMIT_OP", glusterd_commit_op},
+};
+
+struct rpc_clnt_procedure gd_mgmt_v3_actors[GLUSTERD_MGMT_V3_MAXVALUE] = {
+ [GLUSTERD_MGMT_V3_NULL] = {"NULL", NULL},
+ [GLUSTERD_MGMT_V3_LOCK] = {"MGMT_V3_LOCK", glusterd_mgmt_v3_lock_peers},
+ [GLUSTERD_MGMT_V3_UNLOCK] = {"MGMT_V3_UNLOCK",
+ glusterd_mgmt_v3_unlock_peers},
};
struct rpc_clnt_program gd_mgmt_prog = {
- .progname = "glusterd mgmt",
- .prognum = GD_MGMT_PROGRAM,
- .progver = GD_MGMT_VERSION,
- .proctable = gd_mgmt_actors,
- .numproc = GLUSTERD_MGMT_MAXVALUE,
+ .progname = "glusterd mgmt",
+ .prognum = GD_MGMT_PROGRAM,
+ .progver = GD_MGMT_VERSION,
+ .proctable = gd_mgmt_actors,
+ .numproc = GLUSTERD_MGMT_MAXVALUE,
};
struct rpc_clnt_program gd_brick_prog = {
- .progname = "brick operations",
- .prognum = GD_BRICK_PROGRAM,
- .progver = GD_BRICK_VERSION,
- .proctable = gd_brick_actors,
- .numproc = GLUSTERD_BRICK_MAXVALUE,
+ .progname = "brick operations",
+ .prognum = GD_BRICK_PROGRAM,
+ .progver = GD_BRICK_VERSION,
+ .proctable = gd_brick_actors,
+ .numproc = GLUSTERD_BRICK_MAXVALUE,
};
struct rpc_clnt_program gd_peer_prog = {
- .progname = "Peer mgmt",
- .prognum = GD_FRIEND_PROGRAM,
- .progver = GD_FRIEND_VERSION,
- .proctable = gd_peer_actors,
- .numproc = GLUSTERD_FRIEND_MAXVALUE,
+ .progname = "Peer mgmt",
+ .prognum = GD_FRIEND_PROGRAM,
+ .progver = GD_FRIEND_VERSION,
+ .proctable = gd_peer_actors,
+ .numproc = GLUSTERD_FRIEND_MAXVALUE,
};
+struct rpc_clnt_program gd_mgmt_v3_prog = {
+ .progname = "glusterd mgmt v3",
+ .prognum = GD_MGMT_PROGRAM,
+ .progver = GD_MGMT_V3_VERSION,
+ .proctable = gd_mgmt_v3_actors,
+ .numproc = GLUSTERD_MGMT_V3_MAXVALUE,
+};
diff --git a/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c
new file mode 100644
index 00000000000..c49a0eefba5
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.c
@@ -0,0 +1,207 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-scrub-svc.h"
+#include "glusterd-svc-helper.h"
+
+char *scrub_svc_name = "scrub";
+
+void
+glusterd_scrubsvc_build(glusterd_svc_t *svc)
+{
+ svc->manager = glusterd_scrubsvc_manager;
+ svc->start = glusterd_scrubsvc_start;
+ svc->stop = glusterd_scrubsvc_stop;
+}
+
+int
+glusterd_scrubsvc_init(glusterd_svc_t *svc)
+{
+ return glusterd_svc_init(svc, scrub_svc_name);
+}
+
+static int
+glusterd_scrubsvc_create_volfile()
+{
+ char filepath[PATH_MAX] = {
+ 0,
+ };
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ glusterd_svc_build_volfile_path(scrub_svc_name, conf->workdir, filepath,
+ sizeof(filepath));
+
+ ret = glusterd_create_global_volfile(build_scrub_graph, filepath, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to create volfile");
+ goto out;
+ }
+
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_scrubsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+{
+ int ret = -EINVAL;
+
+ if (!svc->inited) {
+ ret = glusterd_scrubsvc_init(svc);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SCRUB_INIT_FAIL,
+ "Failed to init "
+ "scrub service");
+ goto out;
+ } else {
+ svc->inited = _gf_true;
+ gf_msg_debug(THIS->name, 0,
+ "scrub service "
+ "initialized");
+ }
+ }
+
+ if (glusterd_should_i_stop_bitd()) {
+ ret = svc->stop(svc, SIGTERM);
+ } else {
+ ret = glusterd_scrubsvc_create_volfile();
+ if (ret)
+ goto out;
+
+ ret = svc->stop(svc, SIGKILL);
+ if (ret)
+ goto out;
+
+ ret = svc->start(svc, flags);
+ if (ret)
+ goto out;
+
+ ret = glusterd_conn_connect(&(svc->conn));
+ if (ret)
+ goto out;
+ }
+
+out:
+ if (ret)
+ gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_scrubsvc_start(glusterd_svc_t *svc, int flags)
+{
+ int ret = -1;
+ dict_t *cmdict = NULL;
+
+ cmdict = dict_new();
+ if (!cmdict) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto error_return;
+ }
+
+ ret = dict_set_str(cmdict, "cmdarg0", "--global-timer-wheel");
+ if (ret)
+ goto dealloc_dict;
+
+ ret = glusterd_svc_start(svc, flags, cmdict);
+
+dealloc_dict:
+ dict_unref(cmdict);
+error_return:
+ return ret;
+}
+
+int
+glusterd_scrubsvc_stop(glusterd_svc_t *svc, int sig)
+{
+ return glusterd_svc_stop(svc, sig);
+}
+
+int
+glusterd_scrubsvc_reconfigure()
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t identical = _gf_false;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ if (glusterd_should_i_stop_bitd())
+ goto manager;
+
+ /*
+ * Check both OLD and NEW volfiles, if they are SAME by size
+ * and cksum i.e. "character-by-character". If YES, then
+ * NOTHING has been changed, just return.
+ */
+ ret = glusterd_svc_check_volfile_identical(priv->scrub_svc.name,
+ build_scrub_graph, &identical);
+ if (ret)
+ goto out;
+
+ if (identical) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * They are not identical. Find out if the topology is changed
+ * OR just the volume options. If just the options which got
+ * changed, then inform the xlator to reconfigure the options.
+ */
+ identical = _gf_false; /* RESET the FLAG */
+ ret = glusterd_svc_check_topology_identical(priv->scrub_svc.name,
+ build_scrub_graph, &identical);
+ if (ret)
+ goto out;
+
+ /* Topology is not changed, but just the options. But write the
+ * options to scrub volfile, so that scrub will be reconfigured.
+ */
+ if (identical) {
+ ret = glusterd_scrubsvc_create_volfile();
+ if (ret == 0) { /* Only if above PASSES */
+ ret = glusterd_fetchspec_notify(THIS);
+ }
+ goto out;
+ }
+
+manager:
+ /*
+ * scrub volfile's topology has been changed. scrub server needs
+ * to be RESTARTED to ACT on the changed volfile.
+ */
+ ret = priv->scrub_svc.manager(&(priv->scrub_svc), NULL, PROC_START_NO_WAIT);
+
+out:
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-scrub-svc.h b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.h
new file mode 100644
index 00000000000..514b1de96a0
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-scrub-svc.h
@@ -0,0 +1,45 @@
+/*
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_SCRUB_SVC_H_
+#define _GLUSTERD_SCRUB_SVC_H_
+
+#include "glusterd-svc-mgmt.h"
+
+typedef struct glusterd_scrubsvc_ glusterd_scrubsvc_t;
+
+struct glusterd_scrubsvc_ {
+ glusterd_svc_t svc;
+ gf_store_handle_t *handle;
+};
+
+void
+glusterd_scrubsvc_build(glusterd_svc_t *svc);
+
+int
+glusterd_scrubsvc_init(glusterd_svc_t *svc);
+
+int
+glusterd_scrubsvc_manager(glusterd_svc_t *svc, void *data, int flags);
+
+int
+glusterd_scrubsvc_start(glusterd_svc_t *svc, int flags);
+
+int
+glusterd_scrubsvc_stop(glusterd_svc_t *svc, int sig);
+
+int
+glusterd_scrubsvc_reconfigure();
+
+void
+glusterd_scrubsvc_build_volfile_path(char *server, char *workdir, char *volfile,
+ size_t len);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.c b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
new file mode 100644
index 00000000000..b0b8a2e4018
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.c
@@ -0,0 +1,486 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <glusterfs/common-utils.h>
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-messages.h"
+#include "glusterd-server-quorum.h"
+#include "glusterd-store.h"
+#include "glusterd-syncop.h"
+#include "glusterd-op-sm.h"
+
+#define CEILING_POS(X) (((X) - (int)(X)) > 0 ? (int)((X) + 1) : (int)(X))
+
+static gf_boolean_t
+glusterd_is_get_op(xlator_t *this, glusterd_op_t op, dict_t *dict)
+{
+ char *key = NULL;
+ char *volname = NULL;
+ int ret = 0;
+
+ if (op == GD_OP_STATUS_VOLUME)
+ return _gf_true;
+
+ if (op == GD_OP_SET_VOLUME) {
+ /*check for set volume help*/
+ ret = dict_get_str(dict, "volname", &volname);
+ if (volname && ((strcmp(volname, "help") == 0) ||
+ (strcmp(volname, "help-xml") == 0))) {
+ ret = dict_get_str(dict, "key1", &key);
+ if (ret < 0)
+ return _gf_true;
+ }
+ }
+ return _gf_false;
+}
+
+gf_boolean_t
+glusterd_is_quorum_validation_required(xlator_t *this, glusterd_op_t op,
+ dict_t *dict)
+{
+ gf_boolean_t required = _gf_true;
+ char *key = NULL;
+ char *key_fixed = NULL;
+ int ret = -1;
+
+ if (glusterd_is_get_op(this, op, dict)) {
+ required = _gf_false;
+ goto out;
+ }
+ if ((op != GD_OP_SET_VOLUME) && (op != GD_OP_RESET_VOLUME))
+ goto out;
+ if (op == GD_OP_SET_VOLUME)
+ ret = dict_get_str(dict, "key1", &key);
+ else if (op == GD_OP_RESET_VOLUME)
+ ret = dict_get_str(dict, "key", &key);
+ if (ret)
+ goto out;
+ ret = glusterd_check_option_exists(key, &key_fixed);
+ if (ret <= 0)
+ goto out;
+ if (key_fixed)
+ key = key_fixed;
+ if (glusterd_is_quorum_option(key))
+ required = _gf_false;
+out:
+ GF_FREE(key_fixed);
+ return required;
+}
+
+int
+glusterd_validate_quorum(xlator_t *this, glusterd_op_t op, dict_t *dict,
+ char **op_errstr)
+{
+ int ret = 0;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *errstr = NULL;
+
+ errstr = "Quorum not met. Volume operation not allowed.";
+ if (!glusterd_is_quorum_validation_required(this, op, dict))
+ goto out;
+
+ ret = dict_get_str(dict, "volname", &volname);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=volname", NULL);
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_VOLINFO_GET_FAIL, NULL);
+ ret = 0;
+ goto out;
+ }
+
+ if (!glusterd_is_volume_in_server_quorum(volinfo)) {
+ ret = 0;
+ goto out;
+ }
+
+ if (does_gd_meet_server_quorum(this)) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = -1;
+ *op_errstr = gf_strdup(errstr);
+
+out:
+ return ret;
+}
+
+gf_boolean_t
+glusterd_is_quorum_option(char *option)
+{
+ gf_boolean_t res = _gf_false;
+ int i = 0;
+ static const char *const keys[] = {GLUSTERD_QUORUM_TYPE_KEY,
+ GLUSTERD_QUORUM_RATIO_KEY, NULL};
+
+ for (i = 0; keys[i]; i++) {
+ if (strcmp(option, keys[i]) == 0) {
+ res = _gf_true;
+ break;
+ }
+ }
+ return res;
+}
+
+gf_boolean_t
+glusterd_is_quorum_changed(dict_t *options, char *option, char *value)
+{
+ int ret = 0;
+ gf_boolean_t reconfigured = _gf_false;
+ gf_boolean_t all = _gf_false;
+ char *oldquorum = NULL;
+ char *newquorum = NULL;
+ char *oldratio = NULL;
+ char *newratio = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ if ((strcmp("all", option) != 0) && !glusterd_is_quorum_option(option))
+ goto out;
+
+ if (strcmp("all", option) == 0)
+ all = _gf_true;
+
+ if (all || (strcmp(GLUSTERD_QUORUM_TYPE_KEY, option) == 0)) {
+ newquorum = value;
+ ret = dict_get_str(options, GLUSTERD_QUORUM_TYPE_KEY, &oldquorum);
+ if (ret)
+ gf_msg(this->name, GF_LOG_DEBUG, 0, GD_MSG_DICT_GET_FAILED,
+ "dict_get_str failed on %s", GLUSTERD_QUORUM_TYPE_KEY);
+ }
+
+ if (all || (strcmp(GLUSTERD_QUORUM_RATIO_KEY, option) == 0)) {
+ newratio = value;
+ ret = dict_get_str(options, GLUSTERD_QUORUM_RATIO_KEY, &oldratio);
+ if (ret)
+ gf_msg(this->name, GF_LOG_DEBUG, 0, GD_MSG_DICT_GET_FAILED,
+ "dict_get_str failed on %s", GLUSTERD_QUORUM_RATIO_KEY);
+ }
+
+ reconfigured = _gf_true;
+
+ if (oldquorum && newquorum && (strcmp(oldquorum, newquorum) == 0))
+ reconfigured = _gf_false;
+ if (oldratio && newratio && (strcmp(oldratio, newratio) == 0))
+ reconfigured = _gf_false;
+
+ if ((oldratio == NULL) && (newratio == NULL) && (oldquorum == NULL) &&
+ (newquorum == NULL))
+ reconfigured = _gf_false;
+out:
+ return reconfigured;
+}
+
+static gf_boolean_t
+_is_contributing_to_quorum(gd_quorum_contrib_t contrib)
+{
+ if ((contrib == QUORUM_UP) || (contrib == QUORUM_DOWN))
+ return _gf_true;
+ return _gf_false;
+}
+
+gf_boolean_t
+does_quorum_meet(int active_count, int quorum_count)
+{
+ return (active_count >= quorum_count);
+}
+
+int
+glusterd_get_quorum_cluster_counts(xlator_t *this, int *active_count,
+ int *quorum_count)
+{
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+ int inquorum_count = 0;
+ char *val = NULL;
+ double quorum_percentage = 0.0;
+ gf_boolean_t ratio = _gf_false;
+ int count = 0;
+
+ conf = this->private;
+
+ /* Start with counting self */
+ inquorum_count = 1;
+ if (active_count)
+ *active_count = 1;
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &conf->peers, uuid_list)
+ {
+ if (_is_contributing_to_quorum(peerinfo->quorum_contrib))
+ inquorum_count = inquorum_count + 1;
+ if (active_count && (peerinfo->quorum_contrib == QUORUM_UP))
+ *active_count = *active_count + 1;
+ }
+ RCU_READ_UNLOCK;
+
+ ret = dict_get_str(conf->opts, GLUSTERD_QUORUM_RATIO_KEY, &val);
+ if (ret == 0) {
+ ret = gf_string2percent(val, &quorum_percentage);
+ if (ret == 0)
+ ratio = _gf_true;
+ }
+ if (ratio)
+ count = CEILING_POS(inquorum_count * quorum_percentage / 100.0);
+ else
+ count = (inquorum_count * 50 / 100) + 1;
+
+ *quorum_count = count;
+ ret = 0;
+
+ return ret;
+}
+
+gf_boolean_t
+glusterd_is_volume_in_server_quorum(glusterd_volinfo_t *volinfo)
+{
+ gf_boolean_t res = _gf_false;
+ char *quorum_type = NULL;
+ int ret = 0;
+
+ ret = dict_get_str(volinfo->dict, GLUSTERD_QUORUM_TYPE_KEY, &quorum_type);
+ if (ret) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_DICT_GET_FAILED,
+ "Key=%s", GLUSTERD_QUORUM_TYPE_KEY, NULL);
+ goto out;
+ }
+
+ if (strcmp(quorum_type, GLUSTERD_SERVER_QUORUM) == 0)
+ res = _gf_true;
+out:
+ return res;
+}
+
+gf_boolean_t
+glusterd_is_any_volume_in_server_quorum(xlator_t *this)
+{
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ conf = this->private;
+ list_for_each_entry(volinfo, &conf->volumes, vol_list)
+ {
+ if (glusterd_is_volume_in_server_quorum(volinfo)) {
+ return _gf_true;
+ }
+ }
+ return _gf_false;
+}
+
+gf_boolean_t
+does_gd_meet_server_quorum(xlator_t *this)
+{
+ int quorum_count = 0;
+ int active_count = 0;
+ gf_boolean_t in = _gf_false;
+ int ret = -1;
+
+ ret = glusterd_get_quorum_cluster_counts(this, &active_count,
+ &quorum_count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_QUORUM_CLUSTER_COUNT_GET_FAIL, NULL);
+ goto out;
+ }
+
+ if (!does_quorum_meet(active_count, quorum_count)) {
+ goto out;
+ }
+
+ in = _gf_true;
+out:
+ return in;
+}
+
+void
+glusterd_do_volume_quorum_action(xlator_t *this, glusterd_volinfo_t *volinfo,
+ gf_boolean_t meets_quorum)
+{
+ int ret = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ gd_quorum_status_t quorum_status = NOT_APPLICABLE_QUORUM;
+ gf_boolean_t follows_quorum = _gf_false;
+ gf_boolean_t quorum_status_unchanged = _gf_false;
+
+ if (volinfo->status != GLUSTERD_STATUS_STARTED) {
+ volinfo->quorum_status = NOT_APPLICABLE_QUORUM;
+ goto out;
+ }
+
+ follows_quorum = glusterd_is_volume_in_server_quorum(volinfo);
+ if (follows_quorum) {
+ if (meets_quorum)
+ quorum_status = MEETS_QUORUM;
+ else
+ quorum_status = DOESNT_MEET_QUORUM;
+ } else {
+ quorum_status = NOT_APPLICABLE_QUORUM;
+ }
+
+ /*
+ * The following check is added to prevent spurious brick starts when
+ * events occur that affect quorum.
+ * Example:
+ * There is a cluster of 10 peers. Volume is in quorum. User
+ * takes down one brick from the volume to perform maintenance.
+ * Suddenly one of the peers go down. Cluster is still in quorum. But
+ * because of this 'peer going down' event, quorum is calculated and
+ * the bricks that are down are brought up again. In this process it
+ * also brings up the brick that is purposefully taken down.
+ */
+ if (volinfo->quorum_status == quorum_status) {
+ quorum_status_unchanged = _gf_true;
+ goto out;
+ }
+
+ if (quorum_status == MEETS_QUORUM) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_SERVER_QUORUM_MET_STARTING_BRICKS,
+ "Server quorum regained for volume %s. Starting local "
+ "bricks.",
+ volinfo->volname);
+ gf_event(EVENT_QUORUM_REGAINED, "volume=%s", volinfo->volname);
+ } else if (quorum_status == DOESNT_MEET_QUORUM) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0,
+ GD_MSG_SERVER_QUORUM_LOST_STOPPING_BRICKS,
+ "Server quorum lost for volume %s. Stopping local "
+ "bricks.",
+ volinfo->volname);
+ gf_event(EVENT_QUORUM_LOST, "volume=%s", volinfo->volname);
+ }
+
+ list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (!glusterd_is_local_brick(this, volinfo, brickinfo))
+ continue;
+ if (quorum_status == DOESNT_MEET_QUORUM) {
+ ret = glusterd_brick_stop(volinfo, brickinfo, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_STOP_FAIL,
+ "Failed to "
+ "stop brick %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ }
+ } else {
+ if (!brickinfo->start_triggered) {
+ pthread_mutex_lock(&brickinfo->restart_mutex);
+ {
+ /* coverity[SLEEP] */
+ ret = glusterd_brick_start(volinfo, brickinfo, _gf_false,
+ _gf_false);
+ }
+ pthread_mutex_unlock(&brickinfo->restart_mutex);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRICK_DISCONNECTED, "Failed to start %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ }
+ }
+ }
+ }
+ volinfo->quorum_status = quorum_status;
+ if (quorum_status == MEETS_QUORUM) {
+ /* bricks might have been restarted and so as the port change
+ * might have happened
+ */
+ ret = glusterd_store_volinfo(volinfo, GLUSTERD_VOLINFO_VER_AC_NONE);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
+ "Failed to write volinfo for volume %s", volinfo->volname);
+ goto out;
+ }
+ }
+out:
+ if (quorum_status_unchanged) {
+ list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (!glusterd_is_local_brick(this, volinfo, brickinfo))
+ continue;
+ ret = glusterd_brick_start(volinfo, brickinfo, _gf_false, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_DISCONNECTED,
+ "Failed to "
+ "connect to %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ }
+ }
+ }
+ return;
+}
+
+int
+glusterd_do_quorum_action()
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = 0;
+ int active_count = 0;
+ int quorum_count = 0;
+ gf_boolean_t meets = _gf_false;
+
+ this = THIS;
+ conf = this->private;
+
+ conf->pending_quorum_action = _gf_true;
+ ret = glusterd_lock(conf->uuid);
+ if (ret)
+ goto out;
+
+ {
+ ret = glusterd_get_quorum_cluster_counts(this, &active_count,
+ &quorum_count);
+ if (ret)
+ goto unlock;
+
+ if (does_quorum_meet(active_count, quorum_count))
+ meets = _gf_true;
+ list_for_each_entry(volinfo, &conf->volumes, vol_list)
+ {
+ glusterd_do_volume_quorum_action(this, volinfo, meets);
+ }
+ }
+unlock:
+ (void)glusterd_unlock(conf->uuid);
+ conf->pending_quorum_action = _gf_false;
+out:
+ return ret;
+}
+
+/* ret = 0 represents quorum is not met
+ * ret = 1 represents quorum is met
+ * ret = 2 represents quorum not applicable
+ */
+
+int
+check_quorum_for_brick_start(glusterd_volinfo_t *volinfo,
+ gf_boolean_t node_quorum)
+{
+ gf_boolean_t volume_quorum = _gf_false;
+ int ret = 0;
+
+ volume_quorum = glusterd_is_volume_in_server_quorum(volinfo);
+ if (volume_quorum) {
+ if (node_quorum)
+ ret = 1;
+ } else {
+ ret = 2;
+ }
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-server-quorum.h b/xlators/mgmt/glusterd/src/glusterd-server-quorum.h
new file mode 100644
index 00000000000..e11bf1a9206
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-server-quorum.h
@@ -0,0 +1,46 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_SERVER_QUORUM_H
+#define _GLUSTERD_SERVER_QUORUM_H
+
+int
+glusterd_validate_quorum(xlator_t *this, glusterd_op_t op, dict_t *dict,
+ char **op_errstr);
+
+gf_boolean_t
+glusterd_is_quorum_changed(dict_t *options, char *option, char *value);
+
+int
+glusterd_do_quorum_action();
+
+int
+glusterd_get_quorum_cluster_counts(xlator_t *this, int *active_count,
+ int *quorum_count);
+
+gf_boolean_t
+glusterd_is_quorum_option(char *option);
+
+gf_boolean_t
+glusterd_is_volume_in_server_quorum(glusterd_volinfo_t *volinfo);
+
+gf_boolean_t
+glusterd_is_any_volume_in_server_quorum(xlator_t *this);
+
+gf_boolean_t
+does_gd_meet_server_quorum(xlator_t *this);
+
+int
+check_quorum_for_brick_start(glusterd_volinfo_t *volinfo,
+ gf_boolean_t node_quorum);
+
+gf_boolean_t
+does_quorum_meet(int active_count, int quorum_count);
+
+#endif /* _GLUSTERD_SERVER_QUORUM_H */
diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
new file mode 100644
index 00000000000..5661e391a9c
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.c
@@ -0,0 +1,153 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-shd-svc-helper.h"
+#include "glusterd-messages.h"
+#include "glusterd-volgen.h"
+
+void
+glusterd_svc_build_shd_socket_filepath(glusterd_volinfo_t *volinfo, char *path,
+ int path_len)
+{
+ char sockfilepath[PATH_MAX] = {
+ 0,
+ };
+ char rundir[PATH_MAX] = {
+ 0,
+ };
+ int32_t len = 0;
+ glusterd_conf_t *priv = THIS->private;
+
+ if (!priv)
+ return;
+
+ GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv);
+ len = snprintf(sockfilepath, sizeof(sockfilepath), "%s/run-%s", rundir,
+ uuid_utoa(MY_UUID));
+ if ((len < 0) || (len >= sizeof(sockfilepath))) {
+ sockfilepath[0] = 0;
+ }
+
+ glusterd_set_socket_filepath(sockfilepath, path, path_len);
+}
+
+void
+glusterd_svc_build_shd_pidfile(glusterd_volinfo_t *volinfo, char *path,
+ int path_len)
+{
+ char rundir[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = THIS->private;
+
+ if (!priv)
+ return;
+
+ GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv);
+
+ snprintf(path, path_len, "%s/%s-shd.pid", rundir, volinfo->volname);
+}
+
+void
+glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path,
+ int path_len)
+{
+ char workdir[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = THIS->private;
+
+ if (!priv)
+ return;
+
+ GLUSTERD_GET_VOLUME_DIR(workdir, volinfo, priv);
+
+ snprintf(path, path_len, "%s/%s-shd.vol", workdir, volinfo->volname);
+}
+
+void
+glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd)
+{
+ glusterd_svc_proc_t *svc_proc = NULL;
+ glusterd_svc_t *svc = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t need_unref = _gf_false;
+ rpc_clnt_t *rpc = NULL;
+
+ conf = THIS->private;
+ if (!conf)
+ return;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, conf, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, shd, out);
+
+ svc = &shd->svc;
+ shd->attached = _gf_false;
+
+ if (svc->conn.rpc) {
+ rpc_clnt_unref(svc->conn.rpc);
+ svc->conn.rpc = NULL;
+ }
+
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+ svc_proc = svc->svc_proc;
+ svc->svc_proc = NULL;
+ svc->inited = _gf_false;
+ cds_list_del_init(&svc->mux_svc);
+ glusterd_unlink_file(svc->proc.pidfile);
+
+ if (svc_proc && cds_list_empty(&svc_proc->svcs)) {
+ cds_list_del_init(&svc_proc->svc_proc_list);
+ /* We cannot free svc_proc list from here. Because
+ * if there are pending events on the rpc, it will
+ * try to access the corresponding svc_proc, so unrefing
+ * rpc request and then cleaning up the memory is carried
+ * from the notify function upon RPC_CLNT_DESTROY destroy.
+ */
+ need_unref = _gf_true;
+ rpc = svc_proc->rpc;
+ svc_proc->rpc = NULL;
+ }
+ }
+ pthread_mutex_unlock(&conf->attach_lock);
+ /*rpc unref has to be performed outside the lock*/
+ if (need_unref && rpc)
+ rpc_clnt_unref(rpc);
+out:
+ return;
+}
+
+int
+glusterd_svc_set_shd_pidfile(glusterd_volinfo_t *volinfo, dict_t *dict)
+{
+ int ret = -1;
+ glusterd_svc_t *svc = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+ svc = &(volinfo->shd.svc);
+
+ ret = dict_set_dynstr_with_alloc(dict, "pidfile", svc->proc.pidfile);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set pidfile %s in dict", svc->proc.pidfile);
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
new file mode 100644
index 00000000000..1f0984ba857
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc-helper.h
@@ -0,0 +1,42 @@
+/*
+ Copyright (c) 2016 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_SHD_SVC_HELPER_H_
+#define _GLUSTERD_SHD_SVC_HELPER_H_
+
+#include "glusterd.h"
+#include "glusterd-svc-mgmt.h"
+
+void
+glusterd_svc_build_shd_socket_filepath(glusterd_volinfo_t *volinfo, char *path,
+ int path_len);
+
+void
+glusterd_svc_build_shd_pidfile(glusterd_volinfo_t *volinfo, char *path,
+ int path_len);
+
+void
+glusterd_svc_build_shd_volfile_path(glusterd_volinfo_t *volinfo, char *path,
+ int path_len);
+
+void
+glusterd_shd_svcproc_cleanup(glusterd_shdsvc_t *shd);
+
+int
+glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo,
+ glusterd_svc_t *svc, int flags);
+
+int
+glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo);
+
+int
+glusterd_svc_set_shd_pidfile(glusterd_volinfo_t *volinfo, dict_t *dict);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.c b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
new file mode 100644
index 00000000000..1c56384a14b
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.c
@@ -0,0 +1,796 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-shd-svc.h"
+#include "glusterd-shd-svc-helper.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-store.h"
+
+#define GD_SHD_PROCESS_NAME "--process-name"
+char *shd_svc_name = "glustershd";
+
+void
+glusterd_shdsvc_build(glusterd_svc_t *svc)
+{
+ int ret = -1;
+ ret = snprintf(svc->name, sizeof(svc->name), "%s", shd_svc_name);
+ if (ret < 0)
+ return;
+
+ CDS_INIT_LIST_HEAD(&svc->mux_svc);
+ svc->manager = glusterd_shdsvc_manager;
+ svc->start = glusterd_shdsvc_start;
+ svc->stop = glusterd_shdsvc_stop;
+ svc->reconfigure = glusterd_shdsvc_reconfigure;
+}
+
+int
+glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn,
+ glusterd_svc_proc_t *mux_svc)
+{
+ int ret = -1;
+ char rundir[PATH_MAX] = {
+ 0,
+ };
+ char sockpath[PATH_MAX] = {
+ 0,
+ };
+ char pidfile[PATH_MAX] = {
+ 0,
+ };
+ char volfile[PATH_MAX] = {
+ 0,
+ };
+ char logdir[PATH_MAX] = {
+ 0,
+ };
+ char logfile[PATH_MAX] = {
+ 0,
+ };
+ char volfileid[256] = {0};
+ glusterd_svc_t *svc = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_muxsvc_conn_notify_t notify = NULL;
+ xlator_t *this = NULL;
+ char *volfileserver = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO(THIS->name, this, out);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ volinfo = data;
+ GF_VALIDATE_OR_GOTO(this->name, data, out);
+ GF_VALIDATE_OR_GOTO(this->name, mux_svc, out);
+
+ svc = &(volinfo->shd.svc);
+
+ ret = snprintf(svc->name, sizeof(svc->name), "%s", shd_svc_name);
+ if (ret < 0)
+ goto out;
+
+ notify = glusterd_muxsvc_common_rpc_notify;
+ glusterd_store_perform_node_state_store(volinfo);
+
+ GLUSTERD_GET_SHD_RUNDIR(rundir, volinfo, priv);
+ glusterd_svc_create_rundir(rundir);
+
+ glusterd_svc_build_logfile_path(shd_svc_name, priv->logdir, logfile,
+ sizeof(logfile));
+
+ /* Initialize the connection mgmt */
+ if (mux_conn && mux_svc->rpc) {
+ /* multiplexed svc */
+ svc->conn.frame_timeout = mux_conn->frame_timeout;
+ /* This will be unrefed from glusterd_shd_svcproc_cleanup*/
+ svc->conn.rpc = rpc_clnt_ref(mux_svc->rpc);
+ ret = snprintf(svc->conn.sockpath, sizeof(svc->conn.sockpath), "%s",
+ mux_conn->sockpath);
+ if (ret < 0)
+ goto out;
+ } else {
+ ret = mkdir_p(priv->logdir, 0755, _gf_true);
+ if ((ret == -1) && (EEXIST != errno)) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create logdir %s", logdir);
+ goto out;
+ }
+
+ glusterd_svc_build_shd_socket_filepath(volinfo, sockpath,
+ sizeof(sockpath));
+ ret = glusterd_muxsvc_conn_init(&(svc->conn), mux_svc, sockpath, 600,
+ notify);
+ if (ret)
+ goto out;
+ /* This will be unrefed when the last svcs is detached from the list */
+ if (!mux_svc->rpc)
+ mux_svc->rpc = rpc_clnt_ref(svc->conn.rpc);
+ }
+
+ /* Initialize the process mgmt */
+ glusterd_svc_build_shd_pidfile(volinfo, pidfile, sizeof(pidfile));
+ glusterd_svc_build_shd_volfile_path(volinfo, volfile, PATH_MAX);
+ len = snprintf(volfileid, sizeof(volfileid), "shd/%s", volinfo->volname);
+ if ((len < 0) || (len >= sizeof(volfileid))) {
+ ret = -1;
+ goto out;
+ }
+
+ if (dict_get_strn(this->options, "transport.socket.bind-address",
+ SLEN("transport.socket.bind-address"),
+ &volfileserver) != 0) {
+ volfileserver = "localhost";
+ }
+ ret = glusterd_proc_init(&(svc->proc), shd_svc_name, pidfile, logdir,
+ logfile, volfile, volfileid, volfileserver);
+ if (ret)
+ goto out;
+
+out:
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_shdsvc_create_volfile(glusterd_volinfo_t *volinfo)
+{
+ char filepath[PATH_MAX] = {
+ 0,
+ };
+
+ int ret = -1;
+ dict_t *mod_dict = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ glusterd_svc_build_shd_volfile_path(volinfo, filepath, PATH_MAX);
+ if (!glusterd_is_shd_compatible_volume(volinfo)) {
+ /* If volfile exist, delete it. This case happens when we
+ * change from replica/ec to distribute.
+ */
+ (void)glusterd_unlink_file(filepath);
+ ret = 0;
+ goto out;
+ }
+ mod_dict = dict_new();
+ if (!mod_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ ret = dict_set_uint32(mod_dict, "cluster.background-self-heal-count", 0);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.background-self-heal-count", NULL);
+ goto out;
+ }
+
+ ret = dict_set_str(mod_dict, "cluster.data-self-heal", "on");
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.data-self-heal", NULL);
+ goto out;
+ }
+
+ ret = dict_set_str(mod_dict, "cluster.metadata-self-heal", "on");
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.metadata-self-heal", NULL);
+ goto out;
+ }
+
+ ret = dict_set_str(mod_dict, "cluster.entry-self-heal", "on");
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.entry-self-heal", NULL);
+ goto out;
+ }
+
+ ret = glusterd_shdsvc_generate_volfile(volinfo, filepath, mod_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to create volfile");
+ goto out;
+ }
+
+out:
+ if (mod_dict)
+ dict_unref(mod_dict);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+gf_boolean_t
+glusterd_svcs_shd_compatible_volumes_stopped(glusterd_svc_t *svc)
+{
+ glusterd_svc_proc_t *svc_proc = NULL;
+ glusterd_shdsvc_t *shd = NULL;
+ glusterd_svc_t *temp_svc = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ gf_boolean_t comp = _gf_false;
+ glusterd_conf_t *conf = THIS->private;
+
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+ svc_proc = svc->svc_proc;
+ if (!svc_proc)
+ goto unlock;
+ cds_list_for_each_entry(temp_svc, &svc_proc->svcs, mux_svc)
+ {
+ /* Get volinfo->shd from svc object */
+ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
+ if (!shd) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL,
+ "Failed to get shd object "
+ "from shd service");
+ goto unlock;
+ }
+
+ /* Get volinfo from shd */
+ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd);
+ if (!volinfo) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get volinfo from "
+ "from shd");
+ goto unlock;
+ }
+ if (!glusterd_is_shd_compatible_volume(volinfo))
+ continue;
+ if (volinfo->status == GLUSTERD_STATUS_STARTED)
+ goto unlock;
+ }
+ comp = _gf_true;
+ }
+unlock:
+ pthread_mutex_unlock(&conf->attach_lock);
+out:
+ return comp;
+}
+
+int
+glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+{
+ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_boolean_t shd_restart = _gf_false;
+
+ conf = THIS->private;
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ volinfo = data;
+ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
+
+ if (volinfo->is_snap_volume) {
+ /* healing of a snap volume is not supported yet*/
+ ret = 0;
+ goto out;
+ }
+
+ while (conf->restart_shd) {
+ synccond_wait(&conf->cond_restart_shd, &conf->big_lock);
+ }
+ conf->restart_shd = _gf_true;
+ shd_restart = _gf_true;
+
+ if (volinfo)
+ glusterd_volinfo_ref(volinfo);
+
+ if (!glusterd_is_shd_compatible_volume(volinfo)) {
+ ret = 0;
+ if (svc->inited) {
+ /* This means glusterd was running for this volume and now
+ * it was converted to a non-shd volume. So just stop the shd
+ */
+ ret = svc->stop(svc, SIGTERM);
+ }
+ goto out;
+ }
+ ret = glusterd_shdsvc_create_volfile(volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_shd_svc_mux_init(volinfo, svc);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC,
+ "Failed to init shd service");
+ goto out;
+ }
+
+ /* If all the volumes are stopped or all shd compatible volumes
+ * are stopped then stop the service if:
+ * - volinfo is NULL or
+ * - volinfo is present and volume is shd compatible
+ * Otherwise create volfile and restart service if:
+ * - volinfo is NULL or
+ * - volinfo is present and volume is shd compatible
+ */
+ if (glusterd_svcs_shd_compatible_volumes_stopped(svc)) {
+ /* TODO
+ * Take a lock and detach all svc's to stop the process
+ * also reset the init flag
+ */
+ ret = svc->stop(svc, SIGTERM);
+ } else if (volinfo) {
+ if (volinfo->status != GLUSTERD_STATUS_STARTED) {
+ ret = svc->stop(svc, SIGTERM);
+ if (ret)
+ goto out;
+ }
+ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+ ret = svc->start(svc, flags);
+ if (ret)
+ goto out;
+ }
+ }
+out:
+ if (shd_restart) {
+ conf->restart_shd = _gf_false;
+ synccond_broadcast(&conf->cond_restart_shd);
+ }
+ if (volinfo)
+ glusterd_volinfo_unref(volinfo);
+ if (ret)
+ gf_event(EVENT_SVC_MANAGER_FAILED, "svc_name=%s", svc->name);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_new_shd_svc_start(glusterd_svc_t *svc, int flags)
+{
+ int ret = -1;
+ char glusterd_uuid_option[PATH_MAX] = {0};
+ char client_pid[32] = {0};
+ dict_t *cmdline = NULL;
+ xlator_t *this = THIS;
+ GF_ASSERT(this);
+
+ cmdline = dict_new();
+ if (!cmdline)
+ goto out;
+
+ ret = snprintf(glusterd_uuid_option, sizeof(glusterd_uuid_option),
+ "*replicate*.node-uuid=%s", uuid_utoa(MY_UUID));
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf(client_pid, sizeof(client_pid), "--client-pid=%d",
+ GF_CLIENT_PID_SELF_HEALD);
+ if (ret < 0)
+ goto out;
+
+ ret = dict_set_str(cmdline, "arg", client_pid);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=arg", NULL);
+ goto out;
+ }
+
+ /* Pass cmdline arguments as key-value pair. The key is merely
+ * a carrier and is not used. Since dictionary follows LIFO the value
+ * should be put in reverse order*/
+ ret = dict_set_str(cmdline, "arg4", svc->name);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=arg4", NULL);
+ goto out;
+ }
+
+ ret = dict_set_str(cmdline, "arg3", GD_SHD_PROCESS_NAME);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=arg3", NULL);
+ goto out;
+ }
+
+ ret = dict_set_str(cmdline, "arg2", glusterd_uuid_option);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=arg2", NULL);
+ goto out;
+ }
+
+ ret = dict_set_str(cmdline, "arg1", "--xlator-option");
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=arg1", NULL);
+ goto out;
+ }
+
+ ret = glusterd_svc_start(svc, flags, cmdline);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_GLUSTER_SERVICE_START_FAIL, NULL);
+ goto out;
+ }
+
+ ret = glusterd_conn_connect(&(svc->conn));
+out:
+ if (cmdline)
+ dict_unref(cmdline);
+ return ret;
+}
+
+int
+glusterd_recover_shd_attach_failure(glusterd_volinfo_t *volinfo,
+ glusterd_svc_t *svc, int flags)
+{
+ int ret = -1;
+ glusterd_svc_proc_t *mux_proc = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ conf = THIS->private;
+
+ if (!conf || !volinfo || !svc)
+ return -1;
+ glusterd_shd_svcproc_cleanup(&volinfo->shd);
+ mux_proc = glusterd_svcprocess_new();
+ if (!mux_proc) {
+ return -1;
+ }
+ ret = glusterd_shdsvc_init(volinfo, NULL, mux_proc);
+ if (ret)
+ return -1;
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+ cds_list_add_tail(&mux_proc->svc_proc_list, &conf->shd_procs);
+ svc->svc_proc = mux_proc;
+ cds_list_del_init(&svc->mux_svc);
+ cds_list_add_tail(&svc->mux_svc, &mux_proc->svcs);
+ }
+ pthread_mutex_unlock(&conf->attach_lock);
+
+ ret = glusterd_new_shd_svc_start(svc, flags);
+ if (!ret) {
+ volinfo->shd.attached = _gf_true;
+ }
+ return ret;
+}
+
+int
+glusterd_shdsvc_start(glusterd_svc_t *svc, int flags)
+{
+ int ret = -1;
+ glusterd_shdsvc_t *shd = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ conf = THIS->private;
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+
+ /* Get volinfo->shd from svc object */
+ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
+ if (!shd) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL,
+ "Failed to get shd object "
+ "from shd service");
+ return -1;
+ }
+
+ /* Get volinfo from shd */
+ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd);
+ if (!volinfo) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get volinfo from "
+ "from shd");
+ return -1;
+ }
+
+ if (volinfo->status != GLUSTERD_STATUS_STARTED)
+ return -1;
+
+ glusterd_volinfo_ref(volinfo);
+
+ if (!svc->inited) {
+ ret = glusterd_shd_svc_mux_init(volinfo, svc);
+ if (ret)
+ goto out;
+ }
+
+ if (shd->attached) {
+ glusterd_volinfo_ref(volinfo);
+ /* Unref will happen from glusterd_svc_attach_cbk */
+ ret = glusterd_attach_svc(svc, volinfo, flags);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to attach shd svc(volume=%s) to pid=%d",
+ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+ glusterd_shd_svcproc_cleanup(&volinfo->shd);
+ glusterd_volinfo_unref(volinfo);
+ goto out1;
+ }
+ goto out;
+ }
+ ret = glusterd_new_shd_svc_start(svc, flags);
+ if (!ret) {
+ shd->attached = _gf_true;
+ }
+out:
+ if (ret && volinfo)
+ glusterd_shd_svcproc_cleanup(&volinfo->shd);
+ if (volinfo)
+ glusterd_volinfo_unref(volinfo);
+out1:
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_shdsvc_reconfigure(glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ gf_boolean_t identical = _gf_false;
+ dict_t *mod_dict = NULL;
+ glusterd_svc_t *svc = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ if (!volinfo) {
+ /* reconfigure will be called separately*/
+ ret = 0;
+ goto out;
+ }
+
+ glusterd_volinfo_ref(volinfo);
+ svc = &(volinfo->shd.svc);
+ if (glusterd_svcs_shd_compatible_volumes_stopped(svc))
+ goto manager;
+
+ /*
+ * Check both OLD and NEW volfiles, if they are SAME by size
+ * and cksum i.e. "character-by-character". If YES, then
+ * NOTHING has been changed, just return.
+ */
+
+ if (!glusterd_is_shd_compatible_volume(volinfo)) {
+ if (svc->inited)
+ goto manager;
+
+ /* Nothing to do if not shd compatible */
+ ret = 0;
+ goto out;
+ }
+ mod_dict = dict_new();
+ if (!mod_dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ ret = dict_set_uint32(mod_dict, "cluster.background-self-heal-count", 0);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.background-self-heal-count", NULL);
+ goto out;
+ }
+
+ ret = dict_set_str(mod_dict, "cluster.data-self-heal", "on");
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.data-self-heal", NULL);
+ goto out;
+ }
+
+ ret = dict_set_str(mod_dict, "cluster.metadata-self-heal", "on");
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.metadata-self-heal", NULL);
+ goto out;
+ }
+
+ ret = dict_set_int32(mod_dict, "graph-check", 1);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=graph-check", NULL);
+ goto out;
+ }
+
+ ret = dict_set_str(mod_dict, "cluster.entry-self-heal", "on");
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=cluster.entry-self-heal", NULL);
+ goto out;
+ }
+
+ ret = glusterd_volume_svc_check_volfile_identical(
+ "glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile,
+ &identical);
+ if (ret)
+ goto out;
+
+ if (identical) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * They are not identical. Find out if the topology is changed
+ * OR just the volume options. If just the options which got
+ * changed, then inform the xlator to reconfigure the options.
+ */
+ identical = _gf_false; /* RESET the FLAG */
+ ret = glusterd_volume_svc_check_topology_identical(
+ "glustershd", mod_dict, volinfo, glusterd_shdsvc_generate_volfile,
+ &identical);
+ if (ret)
+ goto out;
+
+ /* Topology is not changed, but just the options. But write the
+ * options to shd volfile, so that shd will be reconfigured.
+ */
+ if (identical) {
+ ret = glusterd_shdsvc_create_volfile(volinfo);
+ if (ret == 0) { /* Only if above PASSES */
+ ret = glusterd_fetchspec_notify(THIS);
+ }
+ goto out;
+ }
+manager:
+ /*
+ * shd volfile's topology has been changed. volfile needs
+ * to be RECONFIGURED to ACT on the changed volfile.
+ */
+ ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
+
+out:
+ if (volinfo)
+ glusterd_volinfo_unref(volinfo);
+ if (mod_dict)
+ dict_unref(mod_dict);
+ gf_msg_debug(this ? this->name : "glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_shdsvc_restart()
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp = NULL;
+ int ret = -1;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+ glusterd_svc_t *svc = NULL;
+
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ pthread_mutex_lock(&conf->volume_lock);
+ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list)
+ {
+ glusterd_volinfo_ref(volinfo);
+ pthread_mutex_unlock(&conf->volume_lock);
+ /* Start per volume shd svc */
+ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+ svc = &(volinfo->shd.svc);
+ ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SHD_START_FAIL,
+ "Couldn't start shd for "
+ "vol: %s on restart",
+ volinfo->volname);
+ gf_event(EVENT_SVC_MANAGER_FAILED, "volume=%s;svc_name=%s",
+ volinfo->volname, svc->name);
+ glusterd_volinfo_unref(volinfo);
+ goto out;
+ }
+ }
+ glusterd_volinfo_unref(volinfo);
+ pthread_mutex_lock(&conf->volume_lock);
+ }
+ pthread_mutex_unlock(&conf->volume_lock);
+out:
+ return ret;
+}
+
+int
+glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig)
+{
+ int ret = -1;
+ glusterd_svc_proc_t *svc_proc = NULL;
+ glusterd_shdsvc_t *shd = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ gf_boolean_t empty = _gf_false;
+ glusterd_conf_t *conf = NULL;
+ int pid = -1;
+
+ conf = THIS->private;
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ svc_proc = svc->svc_proc;
+ if (!svc_proc) {
+ /*
+ * This can happen when stop was called on a volume that is not shd
+ * compatible.
+ */
+ gf_msg_debug("glusterd", 0, "svc_proc is null, ie shd already stopped");
+ ret = 0;
+ goto out;
+ }
+
+ /* Get volinfo->shd from svc object */
+ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
+ if (!shd) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL,
+ "Failed to get shd object "
+ "from shd service");
+ return -1;
+ }
+
+ /* Get volinfo from shd */
+ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd);
+ if (!volinfo) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get volinfo from "
+ "from shd");
+ return -1;
+ }
+
+ glusterd_volinfo_ref(volinfo);
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+ if (!gf_is_service_running(svc->proc.pidfile, &pid)) {
+ gf_msg_debug(THIS->name, 0, "shd isn't running");
+ }
+ cds_list_del_init(&svc->mux_svc);
+ empty = cds_list_empty(&svc_proc->svcs);
+ if (empty) {
+ svc_proc->status = GF_SVC_STOPPING;
+ cds_list_del_init(&svc_proc->svc_proc_list);
+ }
+ }
+ pthread_mutex_unlock(&conf->attach_lock);
+ if (empty) {
+ /* Unref will happen when destroying the connection */
+ glusterd_volinfo_ref(volinfo);
+ svc_proc->data = volinfo;
+ ret = glusterd_svc_stop(svc, sig);
+ if (ret) {
+ glusterd_volinfo_unref(volinfo);
+ goto out;
+ }
+ }
+ if (!empty && pid != -1) {
+ ret = glusterd_detach_svc(svc, volinfo, sig);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
+ "shd service is failed to detach volume %s from pid %d",
+ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+ else
+ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_SVC_STOP_SUCCESS,
+ "Shd service is detached for volume %s from pid %d",
+ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+ }
+ svc->online = _gf_false;
+ (void)glusterd_unlink_file((char *)svc->proc.pidfile);
+ glusterd_shd_svcproc_cleanup(shd);
+ ret = 0;
+ glusterd_volinfo_unref(volinfo);
+out:
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-shd-svc.h b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h
new file mode 100644
index 00000000000..55b409f4b69
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-shd-svc.h
@@ -0,0 +1,45 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_SHD_SVC_H_
+#define _GLUSTERD_SHD_SVC_H_
+
+#include "glusterd-svc-mgmt.h"
+#include "glusterd.h"
+
+typedef struct glusterd_shdsvc_ glusterd_shdsvc_t;
+struct glusterd_shdsvc_ {
+ glusterd_svc_t svc;
+ gf_boolean_t attached;
+};
+
+void
+glusterd_shdsvc_build(glusterd_svc_t *svc);
+
+int
+glusterd_shdsvc_init(void *data, glusterd_conn_t *mux_conn,
+ glusterd_svc_proc_t *svc_proc);
+
+int
+glusterd_shdsvc_manager(glusterd_svc_t *svc, void *data, int flags);
+
+int
+glusterd_shdsvc_start(glusterd_svc_t *svc, int flags);
+
+int
+glusterd_shdsvc_reconfigure();
+
+int
+glusterd_shdsvc_restart();
+
+int
+glusterd_shdsvc_stop(glusterd_svc_t *svc, int sig);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.c b/xlators/mgmt/glusterd/src/glusterd-sm.c
index 1fe78a14995..bf2d81b644a 100644
--- a/xlators/mgmt/glusterd/src/glusterd-sm.c
+++ b/xlators/mgmt/glusterd/src/glusterd-sm.c
@@ -1,637 +1,933 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
#include <time.h>
#include <sys/uio.h>
#include <sys/resource.h>
#include <libgen.h>
-#include "uuid.h"
+#include <glusterfs/compat-uuid.h>
#include "fnmatch.h"
-#include "xlator.h"
+#include <glusterfs/xlator.h>
#include "protocol-common.h"
#include "glusterd.h"
-#include "call-stub.h"
-#include "defaults.h"
-#include "list.h"
-#include "dict.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "statedump.h"
+#include <glusterfs/call-stub.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/list.h>
+#include "glusterd-messages.h"
+#include <glusterfs/dict.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/statedump.h>
#include "glusterd-sm.h"
#include "glusterd-op-sm.h"
#include "glusterd-utils.h"
#include "glusterd-store.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-server-quorum.h"
+#include "glusterd-gfproxyd-svc-helper.h"
-static struct list_head gd_friend_sm_queue;
-
-static char *glusterd_friend_sm_state_names[] = {
- "Establishing Connection",
- "Probe Sent to Peer",
- "Probe Received from Peer",
- "Peer in Cluster",
- "Accepted peer request",
- "Sent and Received peer request",
- "Peer Rejected",
- "Peer detach in progress",
- "Probe Received from peer",
- "Connected to Peer",
- "Peer is connected and Accepted",
- "Invalid State"
+char local_node_hostname[PATH_MAX] = {
+ 0,
};
-static char *glusterd_friend_sm_event_names[] = {
- "GD_FRIEND_EVENT_NONE",
- "GD_FRIEND_EVENT_PROBE",
- "GD_FRIEND_EVENT_INIT_FRIEND_REQ",
- "GD_FRIEND_EVENT_RCVD_ACC",
- "GD_FRIEND_EVENT_LOCAL_ACC",
- "GD_FRIEND_EVENT_RCVD_RJT",
- "GD_FRIEND_EVENT_LOCAL_RJT",
- "GD_FRIEND_EVENT_RCVD_FRIEND_REQ",
- "GD_FRIEND_EVENT_INIT_REMOVE_FRIEND",
- "GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND",
- "GD_FRIEND_EVENT_REMOVE_FRIEND",
- "GD_FRIEND_EVENT_CONNECTED",
- "GD_FRIEND_EVENT_MAX"
-};
+static struct cds_list_head gd_friend_sm_queue;
+
+static char *glusterd_friend_sm_state_names[] = {
+ "Establishing Connection",
+ "Probe Sent to Peer",
+ "Probe Received from Peer",
+ "Peer in Cluster",
+ "Accepted peer request",
+ "Sent and Received peer request",
+ "Peer Rejected",
+ "Peer detach in progress",
+ "Probe Received from peer",
+ "Connected to Peer",
+ "Peer is connected and Accepted",
+ "Invalid State"};
-char*
-glusterd_friend_sm_state_name_get (int state)
+static char *glusterd_friend_sm_event_names[] = {
+ "GD_FRIEND_EVENT_NONE",
+ "GD_FRIEND_EVENT_PROBE",
+ "GD_FRIEND_EVENT_INIT_FRIEND_REQ",
+ "GD_FRIEND_EVENT_RCVD_ACC",
+ "GD_FRIEND_EVENT_LOCAL_ACC",
+ "GD_FRIEND_EVENT_RCVD_RJT",
+ "GD_FRIEND_EVENT_LOCAL_RJT",
+ "GD_FRIEND_EVENT_RCVD_FRIEND_REQ",
+ "GD_FRIEND_EVENT_INIT_REMOVE_FRIEND",
+ "GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND",
+ "GD_FRIEND_EVENT_REMOVE_FRIEND",
+ "GD_FRIEND_EVENT_CONNECTED",
+ "GD_FRIEND_EVENT_NEW_NAME",
+ "GD_FRIEND_EVENT_MAX"};
+
+char *
+glusterd_friend_sm_state_name_get(int state)
{
- if (state < 0 || state >= GD_FRIEND_STATE_MAX)
- return glusterd_friend_sm_state_names[GD_FRIEND_STATE_MAX];
- return glusterd_friend_sm_state_names[state];
+ if (state < 0 || state >= GD_FRIEND_STATE_MAX)
+ return glusterd_friend_sm_state_names[GD_FRIEND_STATE_MAX];
+ return glusterd_friend_sm_state_names[state];
}
-char*
-glusterd_friend_sm_event_name_get (int event)
+char *
+glusterd_friend_sm_event_name_get(int event)
{
- if (event < 0 || event >= GD_FRIEND_EVENT_MAX)
- return glusterd_friend_sm_event_names[GD_FRIEND_EVENT_MAX];
- return glusterd_friend_sm_event_names[event];
+ if (event < 0 || event >= GD_FRIEND_EVENT_MAX)
+ return glusterd_friend_sm_event_names[GD_FRIEND_EVENT_MAX];
+ return glusterd_friend_sm_event_names[event];
}
void
-glusterd_destroy_probe_ctx (glusterd_probe_ctx_t *ctx)
+glusterd_destroy_probe_ctx(glusterd_probe_ctx_t *ctx)
{
- if (!ctx)
- return;
+ if (!ctx)
+ return;
- if (ctx->hostname)
- GF_FREE (ctx->hostname);
- GF_FREE (ctx);
+ GF_FREE(ctx->hostname);
+ GF_FREE(ctx);
}
void
-glusterd_destroy_friend_req_ctx (glusterd_friend_req_ctx_t *ctx)
+glusterd_destroy_friend_req_ctx(glusterd_friend_req_ctx_t *ctx)
{
- if (!ctx)
- return;
-
- if (ctx->vols)
- dict_unref (ctx->vols);
- if (ctx->hostname)
- GF_FREE (ctx->hostname);
- GF_FREE (ctx);
+ if (!ctx)
+ return;
+
+ if (ctx->vols)
+ dict_unref(ctx->vols);
+ GF_FREE(ctx->hostname);
+ GF_FREE(ctx);
}
void
-glusterd_destroy_friend_update_ctx (glusterd_friend_update_ctx_t *ctx)
+glusterd_destroy_friend_update_ctx(glusterd_friend_update_ctx_t *ctx)
{
- if (!ctx)
- return;
- if (ctx->hostname)
- GF_FREE (ctx->hostname);
- GF_FREE (ctx);
+ if (!ctx)
+ return;
+ GF_FREE(ctx->hostname);
+ GF_FREE(ctx);
}
int
-glusterd_broadcast_friend_delete (char *hostname, uuid_t uuid)
+glusterd_broadcast_friend_delete(char *hostname, uuid_t uuid)
{
- int ret = 0;
- rpc_clnt_procedure_t *proc = NULL;
- xlator_t *this = NULL;
- glusterd_friend_update_ctx_t ctx = {{0},};
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_conf_t *priv = NULL;
- dict_t *friends = NULL;
- char key[100] = {0,};
- int32_t count = 0;
-
- this = THIS;
- priv = this->private;
-
- GF_ASSERT (priv);
-
- ctx.hostname = hostname;
- ctx.op = GD_FRIEND_UPDATE_DEL;
-
- friends = dict_new ();
- if (!friends)
- goto out;
-
- snprintf (key, sizeof (key), "op");
- ret = dict_set_int32 (friends, key, ctx.op);
- if (ret)
- goto out;
-
- snprintf (key, sizeof (key), "hostname");
- ret = dict_set_str (friends, key, hostname);
- if (ret)
- goto out;
-
- ret = dict_set_int32 (friends, "count", count);
- if (ret)
- goto out;
-
- list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
- if (!peerinfo->connected || !peerinfo->peer)
- continue;
-
- ret = dict_set_static_ptr (friends, "peerinfo", peerinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "failed to set peerinfo");
- goto out;
- }
-
- proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE];
- if (proc->fn) {
- ret = proc->fn (NULL, this, friends);
- }
+ int ret = 0;
+ rpc_clnt_procedure_t *proc = NULL;
+ xlator_t *this = NULL;
+ glusterd_friend_update_ctx_t ctx = {
+ {0},
+ };
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ dict_t *friends = NULL;
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+ int32_t count = 0;
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT(priv);
+
+ ctx.hostname = hostname;
+ ctx.op = GD_FRIEND_UPDATE_DEL;
+
+ friends = dict_new();
+ if (!friends) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "op");
+ ret = dict_set_int32n(friends, key, keylen, ctx.op);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "hostname");
+ ret = dict_set_strn(friends, key, keylen, hostname);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ ret = dict_set_int32n(friends, "count", SLEN("count"), count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto out;
+ }
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
+ {
+ if (!peerinfo->connected || !peerinfo->peer)
+ continue;
+
+ /* Setting a direct reference to peerinfo in the dict is okay as
+ * it is only going to be used within this read critical section
+ * (in glusterd_rpc_friend_update)
+ */
+ ret = dict_set_static_ptr(friends, "peerinfo", peerinfo);
+ if (ret) {
+ RCU_READ_UNLOCK;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set peerinfo");
+ goto out;
}
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE];
+ if (proc->fn) {
+ ret = proc->fn(NULL, this, friends);
+ }
+ }
+ RCU_READ_UNLOCK;
out:
- if (friends)
- dict_unref (friends);
+ if (friends)
+ dict_unref(friends);
- return ret;
+ gf_msg_debug("glusterd", 0, "Returning with %d", ret);
+ return ret;
}
-
static int
-glusterd_ac_none (glusterd_friend_sm_event_t *event, void *ctx)
+glusterd_ac_none(glusterd_friend_sm_event_t *event, void *ctx)
{
- int ret = 0;
+ int ret = 0;
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ gf_msg_debug("glusterd", 0, "Returning with %d", ret);
- return ret;
+ return ret;
}
static int
-glusterd_ac_error (glusterd_friend_sm_event_t *event, void *ctx)
+glusterd_ac_error(glusterd_friend_sm_event_t *event, void *ctx)
{
- int ret = 0;
+ int ret = 0;
- gf_log ("", GF_LOG_ERROR, "Received event %d ", event->event);
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_AC_ERROR, "Received event %d ",
+ event->event);
- return ret;
+ return ret;
}
static int
-glusterd_ac_reverse_probe_begin (glusterd_friend_sm_event_t *event, void *ctx)
+glusterd_ac_reverse_probe_begin(glusterd_friend_sm_event_t *event, void *ctx)
{
- int ret = 0;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_friend_sm_event_t *new_event = NULL;
- glusterd_probe_ctx_t *new_ev_ctx = NULL;
-
- GF_ASSERT (event);
- GF_ASSERT (ctx);
-
- peerinfo = event->peerinfo;
- ret = glusterd_friend_sm_new_event
- (GD_FRIEND_EVENT_PROBE, &new_event);
-
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to get new new_event");
- ret = -1;
- goto out;
- }
-
- new_ev_ctx = GF_CALLOC (1, sizeof(*new_ev_ctx), gf_gld_mt_probe_ctx_t);
-
- if (!new_ev_ctx) {
- ret = -1;
- goto out;
- }
-
- new_ev_ctx->hostname = gf_strdup (peerinfo->hostname);
- new_ev_ctx->port = peerinfo->port;
- new_ev_ctx->req = NULL;
-
- new_event->peerinfo = peerinfo;
- new_event->ctx = new_ev_ctx;
-
- ret = glusterd_friend_sm_inject_event (new_event);
-
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to inject new_event %d, "
- "ret = %d", new_event->event, ret);
- }
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_t *new_event = NULL;
+ glusterd_probe_ctx_t *new_ev_ctx = NULL;
+
+ GF_ASSERT(event);
+ GF_ASSERT(ctx);
+
+ new_ev_ctx = GF_CALLOC(1, sizeof(*new_ev_ctx), gf_gld_mt_probe_ctx_t);
+
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
+ if (!peerinfo) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
+ "Could not find peer %s(%s)", event->peername,
+ uuid_utoa(event->peerid));
+ goto out;
+ }
+
+ ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_PROBE, &new_event);
+
+ if (ret) {
+ RCU_READ_UNLOCK;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL,
+ "Unable to get new new_event");
+ ret = -1;
+ goto out;
+ }
+
+ if (!new_ev_ctx) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ goto out;
+ }
+
+ new_ev_ctx->hostname = gf_strdup(peerinfo->hostname);
+ new_ev_ctx->port = peerinfo->port;
+ new_ev_ctx->req = NULL;
+
+ new_event->peername = gf_strdup(peerinfo->hostname);
+ gf_uuid_copy(new_event->peerid, peerinfo->uuid);
+ new_event->ctx = new_ev_ctx;
+
+ ret = glusterd_friend_sm_inject_event(new_event);
+
+ RCU_READ_UNLOCK;
+
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_INJECT_FAIL,
+ "Unable to inject new_event %d, "
+ "ret = %d",
+ new_event->event, ret);
+ }
out:
- if (ret) {
- if (new_event)
- GF_FREE (new_event);
- if (new_ev_ctx->hostname)
- GF_FREE (new_ev_ctx->hostname);
- if (new_ev_ctx)
- GF_FREE (new_ev_ctx);
- }
- gf_log ("", GF_LOG_DEBUG, "returning with %d", ret);
- return ret;
+ if (ret) {
+ if (new_event)
+ GF_FREE(new_event->peername);
+ GF_FREE(new_event);
+ if (new_ev_ctx)
+ GF_FREE(new_ev_ctx->hostname);
+ GF_FREE(new_ev_ctx);
+ }
+ gf_msg_debug("glusterd", 0, "returning with %d", ret);
+ return ret;
}
static int
-glusterd_ac_friend_add (glusterd_friend_sm_event_t *event, void *ctx)
+glusterd_ac_friend_add(glusterd_friend_sm_event_t *event, void *ctx)
{
- int ret = 0;
- glusterd_peerinfo_t *peerinfo = NULL;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- glusterd_conf_t *conf = NULL;
- xlator_t *this = NULL;
-
- GF_ASSERT (event);
- peerinfo = event->peerinfo;
-
- this = THIS;
- conf = this->private;
-
- GF_ASSERT (conf);
-
- if (!peerinfo->peer)
- goto out;
- proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_ADD];
- if (proc->fn) {
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- goto out;
- }
- frame->local = ctx;
- ret = proc->fn (frame, this, event);
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(event);
+
+ this = THIS;
+ conf = this->private;
+
+ GF_ASSERT(conf);
+
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
+ if (!peerinfo) {
+ RCU_READ_UNLOCK;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
+ "Could not find peer %s(%s)", event->peername,
+ uuid_utoa(event->peerid));
+ goto out;
+ }
+
+ if (!peerinfo->peer) {
+ RCU_READ_UNLOCK;
+ goto out;
+ }
+ proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_ADD];
+ if (proc->fn) {
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ RCU_READ_UNLOCK;
+ goto out;
}
+ frame->local = ctx;
+ ret = proc->fn(frame, this, event);
+ }
+ RCU_READ_UNLOCK;
out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ if (ret && frame)
+ STACK_DESTROY(frame->root);
- return ret;
+ gf_msg_debug("glusterd", 0, "Returning with %d", ret);
+ return ret;
}
static int
-glusterd_ac_friend_probe (glusterd_friend_sm_event_t *event, void *ctx)
+glusterd_ac_friend_probe(glusterd_friend_sm_event_t *event, void *ctx)
{
- int ret = -1;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- glusterd_conf_t *conf = NULL;
- xlator_t *this = NULL;
- glusterd_probe_ctx_t *probe_ctx = NULL;
- glusterd_peerinfo_t *peerinfo = NULL;
- dict_t *dict = NULL;
-
- GF_ASSERT (ctx);
-
- probe_ctx = ctx;
-
- this = THIS;
-
- GF_ASSERT (this);
-
- conf = this->private;
-
- GF_ASSERT (conf);
-
- ret = glusterd_friend_find (NULL, probe_ctx->hostname, &peerinfo);
+ int ret = -1;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ glusterd_probe_ctx_t *probe_ctx = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ dict_t *dict = NULL;
+
+ GF_ASSERT(ctx);
+
+ probe_ctx = ctx;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+
+ conf = this->private;
+
+ GF_ASSERT(conf);
+
+ RCU_READ_LOCK;
+ peerinfo = glusterd_peerinfo_find(NULL, probe_ctx->hostname);
+ if (peerinfo == NULL) {
+ // We should not reach this state ideally
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND, NULL);
+ ret = -1;
+ goto unlock;
+ }
+
+ if (!peerinfo->peer) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_ADDRESS_GET_FAIL,
+ NULL);
+ goto unlock;
+ }
+ proc = &peerinfo->peer->proctable[GLUSTERD_PROBE_QUERY];
+ if (proc->fn) {
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL,
+ NULL);
+ goto unlock;
+ }
+ frame->local = ctx;
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ goto unlock;
+ }
+ ret = dict_set_strn(dict, "hostname", SLEN("hostname"),
+ probe_ctx->hostname);
if (ret) {
- //We should not reach this state ideally
- GF_ASSERT (0);
- goto out;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=hostname", NULL);
+ goto unlock;
}
- if (!peerinfo->peer)
- goto out;
- proc = &peerinfo->peer->proctable[GLUSTERD_PROBE_QUERY];
- if (proc->fn) {
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- goto out;
- }
- frame->local = ctx;
- dict = dict_new ();
- if (!dict)
- goto out;
- ret = dict_set_str (dict, "hostname", probe_ctx->hostname);
- if (ret)
- goto out;
-
- ret = dict_set_int32 (dict, "port", probe_ctx->port);
- if (ret)
- goto out;
-
- ret = dict_set_static_ptr (dict, "peerinfo", peerinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "failed to set peerinfo");
- goto out;
- }
-
- ret = proc->fn (frame, this, dict);
- if (ret)
- goto out;
-
+ ret = dict_set_int32n(dict, "port", SLEN("port"), probe_ctx->port);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=port", NULL);
+ goto unlock;
}
+ /* The peerinfo reference being set here is going to be used
+ * only within this critical section, in glusterd_rpc_probe
+ * (ie. proc->fn).
+ */
+ ret = dict_set_static_ptr(dict, "peerinfo", peerinfo);
+ if (ret) {
+ RCU_READ_UNLOCK;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set peerinfo");
+ goto out;
+ }
+ ret = proc->fn(frame, this, dict);
+ if (ret)
+ goto unlock;
+ }
+unlock:
+ RCU_READ_UNLOCK;
out:
- if (dict)
- dict_unref (dict);
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
- return ret;
+ if (dict)
+ dict_unref(dict);
+ gf_msg_debug("glusterd", 0, "Returning with %d", ret);
+
+ if (ret && frame)
+ STACK_DESTROY(frame->root);
+
+ return ret;
}
static int
-glusterd_ac_send_friend_remove_req (glusterd_friend_sm_event_t *event,
- void *data)
+glusterd_ac_send_friend_remove_req(glusterd_friend_sm_event_t *event,
+ void *data)
{
- int ret = 0;
- glusterd_peerinfo_t *peerinfo = NULL;
- rpc_clnt_procedure_t *proc = NULL;
- call_frame_t *frame = NULL;
- glusterd_conf_t *conf = NULL;
- xlator_t *this = NULL;
- glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE;
- glusterd_probe_ctx_t *ctx = NULL;
- glusterd_friend_sm_event_t *new_event = NULL;
-
- GF_ASSERT (event);
- peerinfo = event->peerinfo;
-
- this = THIS;
- conf = this->private;
-
- GF_ASSERT (conf);
-
- ctx = event->ctx;
-
- if (!peerinfo->connected) {
- event_type = GD_FRIEND_EVENT_REMOVE_FRIEND;
-
- ret = glusterd_friend_sm_new_event (event_type, &new_event);
-
- if (!ret) {
- new_event->peerinfo = peerinfo;
- ret = glusterd_friend_sm_inject_event (new_event);
- } else {
- gf_log ("glusterd", GF_LOG_ERROR,
- "Unable to get event");
- }
-
- if (ctx)
- ret = glusterd_xfer_cli_deprobe_resp (ctx->req, ret, 0,
- NULL,
- ctx->hostname);
- glusterd_friend_sm ();
- glusterd_op_sm ();
-
- if (ctx) {
- glusterd_broadcast_friend_delete (ctx->hostname, NULL);
- glusterd_destroy_probe_ctx (ctx);
- }
- goto out;
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ call_frame_t *frame = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE;
+ glusterd_probe_ctx_t *ctx = NULL;
+ glusterd_friend_sm_event_t *new_event = NULL;
+
+ GF_ASSERT(event);
+
+ this = THIS;
+ conf = this->private;
+
+ GF_ASSERT(conf);
+
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
+ if (!peerinfo) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
+ "Could not find peer %s(%s)", event->peername,
+ uuid_utoa(event->peerid));
+ goto out;
+ }
+ ctx = event->ctx;
+
+ if (!peerinfo->connected) {
+ event_type = GD_FRIEND_EVENT_REMOVE_FRIEND;
+
+ ret = glusterd_friend_sm_new_event(event_type, &new_event);
+
+ if (!ret) {
+ new_event->peername = peerinfo->hostname;
+ gf_uuid_copy(new_event->peerid, peerinfo->uuid);
+ ret = glusterd_friend_sm_inject_event(new_event);
+ } else {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_EVENT_NEW_GET_FAIL,
+ "Unable to get event");
}
- if (!peerinfo->peer)
- goto out;
- proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_REMOVE];
- if (proc->fn) {
- frame = create_frame (this, this->ctx->pool);
- if (!frame) {
- goto out;
- }
- frame->local = data;
- ret = proc->fn (frame, this, event);
+ if (ctx) {
+ ret = glusterd_xfer_cli_deprobe_resp(ctx->req, ret, 0, NULL,
+ ctx->hostname, ctx->dict);
+ glusterd_broadcast_friend_delete(ctx->hostname, NULL);
+ glusterd_destroy_probe_ctx(ctx);
}
+ goto unlock;
+ }
+
+ if (!peerinfo->peer) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_PEER_ADDRESS_GET_FAIL,
+ NULL);
+ goto unlock;
+ }
+ proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_REMOVE];
+ if (proc->fn) {
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL,
+ NULL);
+ goto unlock;
+ }
+ frame->local = data;
+ ret = proc->fn(frame, this, event);
+ }
+unlock:
+ RCU_READ_UNLOCK;
out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
- return ret;
+ gf_msg_debug("glusterd", 0, "Returning with %d", ret);
+
+ if (ret && frame)
+ STACK_DESTROY(frame->root);
+
+ return ret;
}
static gf_boolean_t
-glusterd_should_update_peer (glusterd_peerinfo_t *peerinfo,
- glusterd_peerinfo_t *cur_peerinfo)
+glusterd_should_update_peer(glusterd_peerinfo_t *peerinfo,
+ glusterd_peerinfo_t *cur_peerinfo)
{
- gf_boolean_t is_valid = _gf_false;
+ if ((peerinfo == cur_peerinfo) ||
+ (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED))
+ return _gf_true;
- if ((peerinfo == cur_peerinfo) ||
- (peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED))
- is_valid = _gf_true;
-
- return is_valid;
+ return _gf_false;
}
static int
-glusterd_ac_send_friend_update (glusterd_friend_sm_event_t *event, void *ctx)
+glusterd_ac_send_friend_update(glusterd_friend_sm_event_t *event, void *ctx)
{
- int ret = 0;
- glusterd_peerinfo_t *cur_peerinfo = NULL;
- glusterd_peerinfo_t *peerinfo = NULL;
- rpc_clnt_procedure_t *proc = NULL;
- xlator_t *this = NULL;
- glusterd_friend_update_ctx_t ev_ctx = {{0}};
- glusterd_conf_t *priv = NULL;
- dict_t *friends = NULL;
- char key[100] = {0,};
- char *dup_buf = NULL;
- int32_t count = 0;
-
- GF_ASSERT (event);
- cur_peerinfo = event->peerinfo;
-
- this = THIS;
- priv = this->private;
-
- GF_ASSERT (priv);
-
- ev_ctx.op = GD_FRIEND_UPDATE_ADD;
-
- friends = dict_new ();
- if (!friends)
- goto out;
-
- snprintf (key, sizeof (key), "op");
- ret = dict_set_int32 (friends, key, ev_ctx.op);
- if (ret)
- goto out;
-
- list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
- if (!glusterd_should_update_peer (peerinfo, cur_peerinfo))
- continue;
-
- count++;
- snprintf (key, sizeof (key), "friend%d.uuid", count);
- dup_buf = gf_strdup (uuid_utoa (peerinfo->uuid));
- ret = dict_set_dynstr (friends, key, dup_buf);
- if (ret)
- goto out;
- snprintf (key, sizeof (key), "friend%d.hostname", count);
- ret = dict_set_str (friends, key, peerinfo->hostname);
- if (ret)
- goto out;
- gf_log ("", GF_LOG_INFO, "Added uuid: %s, host: %s",
- dup_buf, peerinfo->hostname);
- }
-
- ret = dict_set_int32 (friends, "count", count);
+ int ret = 0;
+ glusterd_peerinfo_t *cur_peerinfo = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ xlator_t *this = NULL;
+ glusterd_friend_update_ctx_t ev_ctx = {{0}};
+ glusterd_conf_t *priv = NULL;
+ dict_t *friends = NULL;
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+ int32_t count = 0;
+
+ GF_ASSERT(event);
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT(priv);
+
+ keylen = snprintf(key, sizeof(key), "op");
+ friends = dict_new();
+
+ RCU_READ_LOCK;
+
+ cur_peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
+ if (!cur_peerinfo) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
+ "Could not find peer %s(%s)", event->peername,
+ uuid_utoa(event->peerid));
+ goto out;
+ }
+
+ if (!friends) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto unlock;
+ }
+
+ ev_ctx.op = GD_FRIEND_UPDATE_ADD;
+ ret = dict_set_int32n(friends, key, keylen, ev_ctx.op);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto unlock;
+ }
+
+ cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
+ {
+ if (!glusterd_should_update_peer(peerinfo, cur_peerinfo))
+ continue;
+
+ count++;
+
+ snprintf(key, sizeof(key), "friend%d", count);
+ ret = gd_add_friend_to_dict(peerinfo, friends, key);
if (ret)
- goto out;
+ goto unlock;
+ }
- list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
- if (!peerinfo->connected || !peerinfo->peer)
- continue;
+ ret = dict_set_int32n(friends, "count", SLEN("count"), count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
+ goto unlock;
+ }
- if (!glusterd_should_update_peer (peerinfo, cur_peerinfo))
- continue;
+ cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
+ {
+ if (!peerinfo->connected || !peerinfo->peer)
+ continue;
- ret = dict_set_static_ptr (friends, "peerinfo", peerinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "failed to set peerinfo");
- goto out;
- }
+ if (!glusterd_should_update_peer(peerinfo, cur_peerinfo))
+ continue;
- proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE];
- if (proc->fn) {
- ret = proc->fn (NULL, this, friends);
- }
+ ret = dict_set_static_ptr(friends, "peerinfo", peerinfo);
+ if (ret) {
+ RCU_READ_UNLOCK;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set peerinfo");
+ goto out;
}
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ proc = &peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE];
+ if (proc->fn) {
+ ret = proc->fn(NULL, this, friends);
+ }
+ }
+unlock:
+ RCU_READ_UNLOCK;
out:
- if (friends)
- dict_unref (friends);
- return ret;
+ if (friends)
+ dict_unref(friends);
+
+ gf_msg_debug("glusterd", 0, "Returning with %d", ret);
+ return ret;
}
+/* ac_update_friend only sends friend update to the friend that caused this
+ * event to happen
+ */
static int
-glusterd_peer_detach_cleanup (glusterd_conf_t *priv)
+glusterd_ac_update_friend(glusterd_friend_sm_event_t *event, void *ctx)
{
- int ret = -1;
- glusterd_volinfo_t *volinfo = NULL;
- glusterd_volinfo_t *tmp_volinfo = NULL;
-
- GF_ASSERT (priv);
-
- list_for_each_entry_safe (volinfo,tmp_volinfo,
- &priv->volumes, vol_list) {
- if (!glusterd_friend_contains_vol_bricks (volinfo,
- MY_UUID)) {
- gf_log (THIS->name, GF_LOG_INFO,
- "Deleting stale volume %s", volinfo->volname);
- ret = glusterd_delete_volume (volinfo);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR,
- "Error deleting stale volume");
- goto out;
- }
- }
- }
+ int ret = 0;
+ glusterd_peerinfo_t *cur_peerinfo = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ rpc_clnt_procedure_t *proc = NULL;
+ xlator_t *this = NULL;
+ glusterd_friend_update_ctx_t ev_ctx = {{0}};
+ glusterd_conf_t *priv = NULL;
+ dict_t *friends = NULL;
+ char key[64] = {
+ 0,
+ };
+ int keylen;
+ int32_t count = 0;
+
+ GF_ASSERT(event);
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT(priv);
+
+ friends = dict_new();
+ keylen = snprintf(key, sizeof(key), "op");
+
+ RCU_READ_LOCK;
+
+ cur_peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
+ if (!cur_peerinfo) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
+ "Could not find peer %s(%s)", event->peername,
+ uuid_utoa(event->peerid));
+ goto out;
+ }
+
+ /* Bail out early if peer is not connected.
+ * We cannot send requests to the peer until we have established our
+ * client connection to it.
+ */
+ if (!cur_peerinfo->connected || !cur_peerinfo->peer) {
ret = 0;
+ goto unlock;
+ }
+
+ if (!friends) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL, NULL);
+ goto out;
+ }
+
+ ev_ctx.op = GD_FRIEND_UPDATE_ADD;
+ ret = dict_set_int32n(friends, key, keylen, ev_ctx.op);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=%s", key, NULL);
+ goto unlock;
+ }
+
+ cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
+ {
+ if (!glusterd_should_update_peer(peerinfo, cur_peerinfo))
+ continue;
+
+ count++;
+
+ snprintf(key, sizeof(key), "friend%d", count);
+ ret = gd_add_friend_to_dict(peerinfo, friends, key);
+ if (ret)
+ goto unlock;
+ }
+
+ ret = dict_set_int32n(friends, "count", SLEN("count"), count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "Key=count", NULL);
+ goto unlock;
+ }
+
+ ret = dict_set_static_ptr(friends, "peerinfo", cur_peerinfo);
+ if (ret) {
+ RCU_READ_UNLOCK;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to set peerinfo");
+ goto out;
+ }
+
+ proc = &cur_peerinfo->peer->proctable[GLUSTERD_FRIEND_UPDATE];
+ if (proc->fn)
+ ret = proc->fn(NULL, this, friends);
+
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
+
+unlock:
+ RCU_READ_UNLOCK;
out:
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
-}
-static int
-glusterd_ac_handle_friend_remove_req (glusterd_friend_sm_event_t *event,
- void *ctx)
-{
- int ret = 0;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_friend_req_ctx_t *ev_ctx = NULL;
- glusterd_friend_sm_event_t *new_event = NULL;
- glusterd_conf_t *priv = NULL;
+ if (friends)
+ dict_unref(friends);
- GF_ASSERT (ctx);
- ev_ctx = ctx;
- peerinfo = event->peerinfo;
- GF_ASSERT (peerinfo);
+ return ret;
+}
- priv = THIS->private;
- GF_ASSERT (priv);
+/* Clean up stale volumes on the peer being detached. The volumes which have
+ * bricks on other peers are stale with respect to the detached peer.
+ */
+static void
+glusterd_peer_detach_cleanup(glusterd_conf_t *priv)
+{
+ int ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+ glusterd_svc_t *svc = NULL;
+
+ GF_ASSERT(priv);
+
+ cds_list_for_each_entry_safe(volinfo, tmp_volinfo, &priv->volumes, vol_list)
+ {
+ /* The peer detach checks make sure that, at this point in the
+ * detach process, there are only volumes contained completely
+ * within or completely outside the detached peer.
+ * The only stale volumes at this point are the ones
+ * completely outside the peer and can be safely deleted.
+ */
+ if (!glusterd_friend_contains_vol_bricks(volinfo, MY_UUID)) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_STALE_VOL_DELETE_INFO,
+ "Deleting stale volume %s", volinfo->volname);
+
+ /*Stop snapd daemon service if snapd daemon is running*/
+ if (!volinfo->is_snap_volume) {
+ svc = &(volinfo->snapd.svc);
+ ret = svc->stop(svc, SIGTERM);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
+ "Failed "
+ "to stop snapd daemon service");
+ }
+ }
- ret = glusterd_xfer_friend_remove_resp (ev_ctx->req, ev_ctx->hostname,
- ev_ctx->port);
+ if (glusterd_is_shd_compatible_volume(volinfo)) {
+ svc = &(volinfo->shd.svc);
+ ret = svc->stop(svc, SIGTERM);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
+ "Failed "
+ "to stop shd daemon service");
+ }
+ }
- list_for_each_entry (peerinfo, &priv->peers, uuid_list) {
+ if (glusterd_is_gfproxyd_enabled(volinfo)) {
+ svc = &(volinfo->gfproxyd.svc);
+ ret = svc->stop(svc, SIGTERM);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
+ "Failed "
+ "to stop gfproxyd daemon service");
+ }
+ }
+
+ ret = glusterd_cleanup_snaps_for_volume(volinfo);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_VOL_DELETE_FAIL,
+ "Error deleting snapshots for volume %s",
+ volinfo->volname);
+ }
+
+ ret = glusterd_delete_volume(volinfo);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0,
+ GD_MSG_STALE_VOL_REMOVE_FAIL,
+ "Error deleting stale volume");
+ }
+ }
+ }
+
+ /*Reconfigure all daemon services upon peer detach*/
+ ret = glusterd_svcs_reconfigure(NULL);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SVC_STOP_FAIL,
+ "Failed to reconfigure all daemon services.");
+ }
+}
- ret = glusterd_friend_sm_new_event (GD_FRIEND_EVENT_REMOVE_FRIEND,
- &new_event);
- if (ret)
- goto out;
+static int
+glusterd_ac_handle_friend_remove_req(glusterd_friend_sm_event_t *event,
+ void *ctx)
+{
+ int ret = 0;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_req_ctx_t *ev_ctx = NULL;
+ glusterd_friend_sm_event_t *new_event = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT(ctx);
+ ev_ctx = ctx;
+
+ priv = THIS->private;
+ GF_ASSERT(priv);
+
+ ret = glusterd_xfer_friend_remove_resp(ev_ctx->req, ev_ctx->hostname,
+ ev_ctx->port);
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
+ {
+ ret = glusterd_friend_sm_new_event(GD_FRIEND_EVENT_REMOVE_FRIEND,
+ &new_event);
+ if (ret) {
+ RCU_READ_UNLOCK;
+ goto out;
+ }
- new_event->peerinfo = peerinfo;
+ new_event->peername = gf_strdup(peerinfo->hostname);
+ gf_uuid_copy(new_event->peerid, peerinfo->uuid);
- ret = glusterd_friend_sm_inject_event (new_event);
- if (ret)
- goto out;
- }
- ret = glusterd_peer_detach_cleanup (priv);
+ ret = glusterd_friend_sm_inject_event(new_event);
if (ret) {
- gf_log (THIS->name, GF_LOG_WARNING,
- "Peer detach cleanup was not successful");
- ret = 0;
+ RCU_READ_UNLOCK;
+ goto out;
}
+
+ new_event = NULL;
+ }
+ RCU_READ_UNLOCK;
+
+ glusterd_peer_detach_cleanup(priv);
out:
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning with %d", ret);
+ if (new_event)
+ GF_FREE(new_event->peername);
+ GF_FREE(new_event);
- return ret;
+ gf_msg_debug(THIS->name, 0, "Returning with %d", ret);
+ return ret;
}
static int
-glusterd_ac_friend_remove (glusterd_friend_sm_event_t *event, void *ctx)
+glusterd_ac_friend_remove(glusterd_friend_sm_event_t *event, void *ctx)
{
- int ret = -1;
-
- ret = glusterd_friend_remove_cleanup_vols (event->peerinfo->uuid);
- if (ret)
- gf_log (THIS->name, GF_LOG_WARNING, "Volumes cleanup failed");
-
- ret = glusterd_friend_cleanup (event->peerinfo);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Cleanup returned: %d", ret);
- }
-
- return 0;
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ GF_ASSERT(event);
+
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
+ if (!peerinfo) {
+ RCU_READ_UNLOCK;
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
+ "Could not find peer %s(%s)", event->peername,
+ uuid_utoa(event->peerid));
+ goto out;
+ }
+ ret = glusterd_friend_remove_cleanup_vols(peerinfo->uuid);
+ RCU_READ_UNLOCK;
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_VOL_CLEANUP_FAIL,
+ "Volumes cleanup failed");
+
+ /* Exiting read critical section as glusterd_peerinfo_cleanup calls
+ * synchronize_rcu before freeing the peerinfo
+ */
+
+ ret = glusterd_peerinfo_cleanup(peerinfo);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_PEER_DETACH_CLEANUP_FAIL,
+ "Cleanup returned: %d", ret);
+ }
+out:
+ return 0;
}
/*static int
@@ -645,426 +941,682 @@ glusterd_ac_none (void *ctx)
}*/
static int
-glusterd_ac_handle_friend_add_req (glusterd_friend_sm_event_t *event, void *ctx)
+glusterd_ac_handle_friend_add_req(glusterd_friend_sm_event_t *event, void *ctx)
{
- int ret = 0;
- uuid_t uuid;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_friend_req_ctx_t *ev_ctx = NULL;
- glusterd_friend_update_ctx_t *new_ev_ctx = NULL;
- glusterd_friend_sm_event_t *new_event = NULL;
- glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE;
- int status = 0;
- int32_t op_ret = -1;
- int32_t op_errno = 0;
-
- GF_ASSERT (ctx);
- ev_ctx = ctx;
- uuid_copy (uuid, ev_ctx->uuid);
- peerinfo = event->peerinfo;
- GF_ASSERT (peerinfo);
- uuid_copy (peerinfo->uuid, ev_ctx->uuid);
-
- //Build comparison logic here.
- ret = glusterd_compare_friend_data (ev_ctx->vols, &status);
- if (ret)
- goto out;
+ int ret = 0;
+ uuid_t uuid;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_req_ctx_t *ev_ctx = NULL;
+ glusterd_friend_update_ctx_t *new_ev_ctx = NULL;
+ glusterd_friend_sm_event_t *new_event = NULL;
+ glusterd_friend_sm_event_type_t event_type = GD_FRIEND_EVENT_NONE;
+ glusterd_conf_t *conf = NULL;
+ int status = 0;
+ int32_t op_ret = -1;
+ int32_t op_errno = 0;
+ xlator_t *this = NULL;
+ char *hostname = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(ctx);
+ ev_ctx = ctx;
+ gf_uuid_copy(uuid, ev_ctx->uuid);
+
+ RCU_READ_LOCK;
+ peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
+ if (!peerinfo) {
+ RCU_READ_UNLOCK;
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
+ "Could not find peer %s(%s)", event->peername,
+ uuid_utoa(event->peerid));
+ goto out;
+ }
+
+ /* TODO: How do you do an atomic copy of uuid_t */
+ /* TODO: Updating within a read-critical section is also invalid
+ * Update properly with updater synchronization
+ */
+ gf_uuid_copy(peerinfo->uuid, ev_ctx->uuid);
+
+ RCU_READ_UNLOCK;
+
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ /* Passing the peername from the event. glusterd_compare_friend_data
+ * updates volumes and will use synchronize_rcu. If we were to pass
+ * peerinfo->hostname, we would have to do it under a read critical
+ * section which would lead to a deadlock
+ */
+
+ // Build comparison logic here.
+ pthread_mutex_lock(&conf->import_volumes);
+ {
+ ret = glusterd_compare_friend_data(ev_ctx->vols, &status,
+ event->peername);
+ if (ret) {
+ pthread_mutex_unlock(&conf->import_volumes);
+ goto out;
+ }
if (GLUSTERD_VOL_COMP_RJT != status) {
- event_type = GD_FRIEND_EVENT_LOCAL_ACC;
- op_ret = 0;
+ event_type = GD_FRIEND_EVENT_LOCAL_ACC;
+ op_ret = 0;
} else {
- event_type = GD_FRIEND_EVENT_LOCAL_RJT;
- op_errno = GF_PROBE_VOLUME_CONFLICT;
- op_ret = -1;
+ event_type = GD_FRIEND_EVENT_LOCAL_RJT;
+ op_errno = GF_PROBE_VOLUME_CONFLICT;
+ op_ret = -1;
}
- ret = glusterd_friend_sm_new_event (event_type, &new_event);
-
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Out of Memory");
- }
-
- new_event->peerinfo = peerinfo;
-
- new_ev_ctx = GF_CALLOC (1, sizeof (*new_ev_ctx),
- gf_gld_mt_friend_update_ctx_t);
- if (!new_ev_ctx) {
- ret = -1;
- goto out;
+ /* Compare missed_snapshot list with the peer *
+ * if volume comparison is successful */
+ if ((op_ret == 0) && (conf->op_version >= GD_OP_VERSION_3_6_0)) {
+ ret = glusterd_import_friend_missed_snap_list(ev_ctx->vols);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
+ "Failed to import peer's "
+ "missed_snaps_list.");
+ event_type = GD_FRIEND_EVENT_LOCAL_RJT;
+ op_errno = GF_PROBE_MISSED_SNAP_CONFLICT;
+ op_ret = -1;
+ }
+
+ /* glusterd_compare_friend_snapshots and functions only require
+ * a peers hostname and uuid. It also does updates, which
+ * require use of synchronize_rcu. So we pass the hostname and
+ * id from the event instead of the peerinfo object to prevent
+ * deadlocks as above.
+ */
+ ret = glusterd_compare_friend_snapshots(
+ ev_ctx->vols, event->peername, event->peerid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_COMPARE_CONFLICT,
+ "Conflict in comparing peer's snapshots");
+ event_type = GD_FRIEND_EVENT_LOCAL_RJT;
+ op_errno = GF_PROBE_SNAP_CONFLICT;
+ op_ret = -1;
+ }
}
-
- uuid_copy (new_ev_ctx->uuid, ev_ctx->uuid);
- new_ev_ctx->hostname = gf_strdup (ev_ctx->hostname);
- new_ev_ctx->op = GD_FRIEND_UPDATE_ADD;
-
- new_event->ctx = new_ev_ctx;
-
- glusterd_friend_sm_inject_event (new_event);
-
- ret = glusterd_xfer_friend_add_resp (ev_ctx->req, ev_ctx->hostname,
- ev_ctx->port, op_ret, op_errno);
+ }
+ pthread_mutex_unlock(&conf->import_volumes);
+ ret = glusterd_friend_sm_new_event(event_type, &new_event);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Out of Memory");
+ goto out;
+ }
+
+ new_event->peername = gf_strdup(event->peername);
+ gf_uuid_copy(new_event->peerid, event->peerid);
+
+ new_ev_ctx = GF_CALLOC(1, sizeof(*new_ev_ctx),
+ gf_gld_mt_friend_update_ctx_t);
+ if (!new_ev_ctx) {
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_copy(new_ev_ctx->uuid, ev_ctx->uuid);
+ new_ev_ctx->hostname = gf_strdup(ev_ctx->hostname);
+ new_ev_ctx->op = GD_FRIEND_UPDATE_ADD;
+
+ new_event->ctx = new_ev_ctx;
+
+ ret = dict_get_strn(ev_ctx->vols, "hostname_in_cluster",
+ SLEN("hostname_in_cluster"), &hostname);
+ if (ret || !hostname) {
+ gf_msg_debug(this->name, 0, "Unable to fetch local hostname from peer");
+ } else if (snprintf(local_node_hostname, sizeof(local_node_hostname), "%s",
+ hostname) >= sizeof(local_node_hostname)) {
+ gf_msg_debug(this->name, 0, "local_node_hostname truncated");
+ ret = -1;
+ goto out;
+ }
+
+ glusterd_friend_sm_inject_event(new_event);
+ new_event = NULL;
+
+ ret = glusterd_xfer_friend_add_resp(ev_ctx->req, ev_ctx->hostname,
+ event->peername, ev_ctx->port, op_ret,
+ op_errno);
out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ if (new_event)
+ GF_FREE(new_event->peername);
+ GF_FREE(new_event);
- return ret;
+ gf_msg_debug("glusterd", 0, "Returning with %d", ret);
+ return ret;
}
static int
-glusterd_friend_sm_transition_state (glusterd_peerinfo_t *peerinfo,
- glusterd_sm_t *state,
- glusterd_friend_sm_event_type_t event_type)
+glusterd_friend_sm_transition_state(uuid_t peerid, char *peername,
+ glusterd_sm_t *state,
+ glusterd_friend_sm_event_type_t event_type)
{
+ int ret = -1;
+ glusterd_peerinfo_t *peerinfo = NULL;
- GF_ASSERT (state);
- GF_ASSERT (peerinfo);
+ GF_ASSERT(state);
+ GF_ASSERT(peername);
- (void) glusterd_sm_tr_log_transition_add (&peerinfo->sm_log,
- peerinfo->state.state,
- state[event_type].next_state,
- event_type);
+ RCU_READ_LOCK;
+ peerinfo = glusterd_peerinfo_find(peerid, peername);
+ if (!peerinfo) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_PEER_NOT_FOUND, NULL);
+ goto out;
+ }
- peerinfo->state.state = state[event_type].next_state;
- return 0;
-}
+ (void)glusterd_sm_tr_log_transition_add(
+ &peerinfo->sm_log, peerinfo->state.state, state[event_type].next_state,
+ event_type);
+ uatomic_set(&peerinfo->state.state, state[event_type].next_state);
-glusterd_sm_t glusterd_state_default [] = {
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none},
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_probe},//EV_PROBE
- {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_friend_add}, //EV_INIT_FRIEND_REQ
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_ACC
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_RJT
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
- {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_probe}, //EVENT_CONNECTED
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_MAX
-};
+ ret = 0;
+out:
+ RCU_READ_UNLOCK;
+ return ret;
+}
-glusterd_sm_t glusterd_state_probe_rcvd [] = {
- {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none},
- {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EV_PROBE
- {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EV_INIT_FRIEND_REQ
- {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_ACC
- {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
- {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_RJT
- {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
- {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
- {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_CONNECTED
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_MAX
+glusterd_sm_t glusterd_state_default[] = {
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none},
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_probe}, // EV_PROBE
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_friend_add}, // EV_INIT_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_RCVD,
+ glusterd_ac_handle_friend_add_req}, // EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_send_friend_remove_req}, // EV_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_probe}, // EVENT_CONNECTED
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_NEW_NAME
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_MAX
};
-glusterd_sm_t glusterd_state_connected_rcvd [] = {
- {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none},
- {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EV_PROBE
- {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EV_INIT_FRIEND_REQ
- {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_RCVD_ACC
- {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_reverse_probe_begin}, //EVENT_RCVD_LOCAL_ACC
- {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_RCVD_RJT
- {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
- {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
- {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_CONNECTED
- {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, //EVENT_MAX
+glusterd_sm_t glusterd_state_probe_rcvd[] = {
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none},
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, // EV_PROBE
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, // EV_INIT_FRIEND_REQ
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, // EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, // EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, // EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_PROBE_RCVD, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_RCVD,
+ glusterd_ac_handle_friend_add_req}, // EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_send_friend_remove_req}, // EV_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EVENT_CONNECTED
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_NEW_NAME
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_MAX
};
-glusterd_sm_t glusterd_state_connected_accepted [] = {
- {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none},
- {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_friend_probe}, //EV_PROBE
- {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_friend_add}, //EV_INIT_FRIEND_REQ
- {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_ACC
- {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
- {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_RJT
- {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
- {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_RCV_FRIEND_REQ
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EV_INIT_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
- {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_CONNECTED
- {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, //EVENT_MAX
+glusterd_sm_t glusterd_state_connected_rcvd[] = {
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none},
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EV_PROBE
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EV_INIT_FRIEND_REQ
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED,
+ glusterd_ac_reverse_probe_begin}, // EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_CONNECTED_RCVD,
+ glusterd_ac_handle_friend_add_req}, // EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_send_friend_remove_req}, // EV_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EVENT_CONNECTED
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EVENT_NEW_NAME
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EVENT_MAX
};
-glusterd_sm_t glusterd_state_req_sent [] = {
- {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_NONE,
- {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_PROBE,
- {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
- {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_ACC
- {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
- {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT
- {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
- {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
- {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND,
- {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
- {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none},//EVENT_CONNECTED
- {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none},//EVENT_MAX
+glusterd_sm_t glusterd_state_connected_accepted[] = {
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none},
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_friend_probe}, // EV_PROBE
+ {GD_FRIEND_STATE_REQ_SENT_RCVD,
+ glusterd_ac_friend_add}, // EV_INIT_FRIEND_REQ
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, // EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED,
+ glusterd_ac_none}, // EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, // EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED,
+ glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED,
+ glusterd_ac_none}, // EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_send_friend_remove_req}, // EV_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT, glusterd_ac_none}, // EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, // EVENT_CONNECTED
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, // EVENT_NEW_NAME
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_none}, // EVENT_MAX
};
-glusterd_sm_t glusterd_state_req_rcvd [] = {
- {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_NONE,
- {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_PROBE,
- {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
- {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_RCVD_ACC
- {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
- {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_RCVD_RJT
- {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
- {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, //EVENT_RCV_FRIEND_REQ
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND,
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
- {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none},//EVENT_CONNECTED
- {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none},//EVENT_MAX
+glusterd_sm_t glusterd_state_req_sent[] = {
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_NONE,
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_PROBE,
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, // EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_SENT_RCVD,
+ glusterd_ac_handle_friend_add_req}, // EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_UNFRIEND_SENT,
+ glusterd_ac_send_friend_remove_req}, // EVENT_INIT_REMOVE_FRIEND,
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_CONNECTED
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_NEW_NAME
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_MAX
};
-glusterd_sm_t glusterd_state_befriended [] = {
- {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_NONE,
- {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_PROBE,
- {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
- {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_RCVD_ACC
- {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_send_friend_update}, //EVENT_RCVD_LOCAL_ACC
- {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT
- {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
- {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
- {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND,
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
- {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_friend_add},//EVENT_CONNECTED
- {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none},//EVENT_MAX
+glusterd_sm_t glusterd_state_req_rcvd[] = {
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, // EVENT_NONE,
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, // EVENT_PROBE,
+ {GD_FRIEND_STATE_REQ_SENT_RCVD,
+ glusterd_ac_none}, // EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, // EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, // EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, // EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, // EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_send_friend_remove_req}, // EVENT_INIT_REMOVE_FRIEND,
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_handle_friend_remove_req}, // EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EVENT_CONNECTED
+ {GD_FRIEND_STATE_CONNECTED_RCVD, glusterd_ac_none}, // EVENT_NEW_NAME
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, // EVENT_MAX
};
-glusterd_sm_t glusterd_state_req_sent_rcvd [] = {
- {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_NONE,
- {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_PROBE,
- {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
- {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_send_friend_update}, //EVENT_RCVD_ACC
- {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
- {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT
- {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
- {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, //EVENT_RCV_FRIEND_REQ
- {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND,
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
- {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none},//EVENT_CONNECTED
- {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none},//EVENT_MAX
+glusterd_sm_t glusterd_state_befriended[] = {
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, // EVENT_NONE,
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, // EVENT_PROBE,
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, // EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_update_friend}, // EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_BEFRIENDED,
+ glusterd_ac_update_friend}, // EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_BEFRIENDED,
+ glusterd_ac_handle_friend_add_req}, // EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_UNFRIEND_SENT,
+ glusterd_ac_send_friend_remove_req}, // EVENT_INIT_REMOVE_FRIEND,
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_handle_friend_remove_req}, // EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_friend_add}, // EVENT_CONNECTED
+ {GD_FRIEND_STATE_BEFRIENDED,
+ glusterd_ac_send_friend_update}, // EVENT_NEW_NAME
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, // EVENT_MAX
};
-glusterd_sm_t glusterd_state_rejected [] = {
- {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_NONE,
- {GD_FRIEND_STATE_REJECTED, glusterd_ac_friend_probe}, //EVENT_PROBE,
- {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_friend_add}, //EVENT_INIT_FRIEND_REQ,
- {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_RCVD_ACC
- {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
- {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT
- {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
- {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
- {GD_FRIEND_STATE_REJECTED, glusterd_ac_friend_add},//EVENT_CONNECTED
- {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none},//EVENT_MAX
+glusterd_sm_t glusterd_state_req_sent_rcvd[] = {
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, // EVENT_NONE,
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, // EVENT_PROBE,
+ {GD_FRIEND_STATE_REQ_SENT_RCVD,
+ glusterd_ac_none}, // EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_BEFRIENDED,
+ glusterd_ac_send_friend_update}, // EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, // EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, // EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_UNFRIEND_SENT,
+ glusterd_ac_send_friend_remove_req}, // EVENT_INIT_REMOVE_FRIEND,
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_handle_friend_remove_req}, // EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, // EVENT_CONNECTED
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, // EVENT_NEW_NAME
+ {GD_FRIEND_STATE_REQ_SENT_RCVD, glusterd_ac_none}, // EVENT_MAX
};
-glusterd_sm_t glusterd_state_req_accepted [] = {
- {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_NONE,
- {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_PROBE,
- {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
- {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_send_friend_update}, //EVENT_RCVD_ACC
- {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_send_friend_update}, //EVENT_RCVD_LOCAL_ACC
- {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_RJT
- {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, //EVENT_RCVD_LOCAL_RJT
- {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_handle_friend_add_req}, //EVENT_RCV_FRIEND_REQ
- {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_send_friend_remove_req}, //EVENT_INIT_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_handle_friend_remove_req}, //EVENT_RCVD_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
- {GD_FRIEND_STATE_CONNECTED_ACCEPTED, glusterd_ac_reverse_probe_begin},//EVENT_CONNECTED
- {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none},//EVENT_MAX
+glusterd_sm_t glusterd_state_rejected[] = {
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_NONE,
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_friend_probe}, // EVENT_PROBE,
+ {GD_FRIEND_STATE_REQ_SENT,
+ glusterd_ac_friend_add}, // EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, // EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_BEFRIENDED, glusterd_ac_none}, // EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_RCVD,
+ glusterd_ac_handle_friend_add_req}, // EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_send_friend_remove_req}, // EVENT_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_handle_friend_remove_req}, // EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_friend_add}, // EVENT_CONNECTED
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_NEW_NAME
+ {GD_FRIEND_STATE_REQ_RCVD, glusterd_ac_none}, // EVENT_MAX
};
-glusterd_sm_t glusterd_state_unfriend_sent [] = {
- {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_NONE,
- {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, //EVENT_PROBE,
- {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_INIT_FRIEND_REQ,
- {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_RCVD_ACC
- {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_RCVD_LOCAL_ACC
- {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, //EVENT_RCVD_RJT
- {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, //EVENT_RCVD_LOCAL_RJT
- {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, //EVENT_RCV_FRIEND_REQ
- {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_INIT_REMOVE_FRIEND
- {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, //EVENT_RCVD_REMOVE_FRIEND
- {GD_FRIEND_STATE_DEFAULT, glusterd_ac_friend_remove}, //EVENT_REMOVE_FRIEND
- {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none},//EVENT_CONNECTED
- {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none},//EVENT_MAX
+glusterd_sm_t glusterd_state_req_accepted[] = {
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, // EVENT_NONE,
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, // EVENT_PROBE,
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, // EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_BEFRIENDED,
+ glusterd_ac_send_friend_update}, // EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_BEFRIENDED,
+ glusterd_ac_send_friend_update}, // EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_REJECTED, glusterd_ac_none}, // EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_REQ_ACCEPTED,
+ glusterd_ac_handle_friend_add_req}, // EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_REQ_ACCEPTED,
+ glusterd_ac_send_friend_remove_req}, // EVENT_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_handle_friend_remove_req}, // EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_CONNECTED_ACCEPTED,
+ glusterd_ac_reverse_probe_begin}, // EVENT_CONNECTED
+ {GD_FRIEND_STATE_REQ_ACCEPTED, glusterd_ac_none}, // EVENT_NEW_NAME
+ {GD_FRIEND_STATE_REQ_SENT, glusterd_ac_none}, // EVENT_MAX
};
-glusterd_sm_t *glusterd_friend_state_table [] = {
- glusterd_state_default,
- glusterd_state_req_sent,
- glusterd_state_req_rcvd,
- glusterd_state_befriended,
- glusterd_state_req_accepted,
- glusterd_state_req_sent_rcvd,
- glusterd_state_rejected,
- glusterd_state_unfriend_sent,
- glusterd_state_probe_rcvd,
- glusterd_state_connected_rcvd,
- glusterd_state_connected_accepted
+glusterd_sm_t glusterd_state_unfriend_sent[] = {
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, // EVENT_NONE,
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, // EVENT_PROBE,
+ {GD_FRIEND_STATE_UNFRIEND_SENT,
+ glusterd_ac_none}, // EVENT_INIT_FRIEND_REQ,
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, // EVENT_RCVD_ACC
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, // EVENT_RCVD_LOCAL_ACC
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, // EVENT_RCVD_RJT
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, // EVENT_RCVD_LOCAL_RJT
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_error}, // EVENT_RCV_FRIEND_REQ
+ {GD_FRIEND_STATE_UNFRIEND_SENT,
+ glusterd_ac_none}, // EVENT_INIT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_UNFRIEND_SENT,
+ glusterd_ac_none}, // EVENT_RCVD_REMOVE_FRIEND
+ {GD_FRIEND_STATE_DEFAULT,
+ glusterd_ac_friend_remove}, // EVENT_REMOVE_FRIEND
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, // EVENT_CONNECTED
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, // EVENT_NEW_NAME
+ {GD_FRIEND_STATE_UNFRIEND_SENT, glusterd_ac_none}, // EVENT_MAX
};
+glusterd_sm_t *glusterd_friend_state_table[] = {
+ glusterd_state_default, glusterd_state_req_sent,
+ glusterd_state_req_rcvd, glusterd_state_befriended,
+ glusterd_state_req_accepted, glusterd_state_req_sent_rcvd,
+ glusterd_state_rejected, glusterd_state_unfriend_sent,
+ glusterd_state_probe_rcvd, glusterd_state_connected_rcvd,
+ glusterd_state_connected_accepted};
+
int
-glusterd_friend_sm_new_event (glusterd_friend_sm_event_type_t event_type,
- glusterd_friend_sm_event_t **new_event)
+glusterd_friend_sm_new_event(glusterd_friend_sm_event_type_t event_type,
+ glusterd_friend_sm_event_t **new_event)
{
- glusterd_friend_sm_event_t *event = NULL;
+ glusterd_friend_sm_event_t *event = NULL;
- GF_ASSERT (new_event);
- GF_ASSERT (GD_FRIEND_EVENT_NONE <= event_type &&
- GD_FRIEND_EVENT_MAX > event_type);
+ GF_ASSERT(new_event);
+ GF_ASSERT(GD_FRIEND_EVENT_NONE <= event_type &&
+ GD_FRIEND_EVENT_MAX > event_type);
- event = GF_CALLOC (1, sizeof (*event), gf_gld_mt_friend_sm_event_t);
+ event = GF_CALLOC(1, sizeof(*event), gf_gld_mt_friend_sm_event_t);
- if (!event)
- return -1;
+ if (!event)
+ return -1;
- *new_event = event;
- event->event = event_type;
- INIT_LIST_HEAD (&event->list);
+ *new_event = event;
+ event->event = event_type;
+ CDS_INIT_LIST_HEAD(&event->list);
- return 0;
+ return 0;
}
int
-glusterd_friend_sm_inject_event (glusterd_friend_sm_event_t *event)
+glusterd_friend_sm_inject_event(glusterd_friend_sm_event_t *event)
{
- GF_ASSERT (event);
- gf_log ("glusterd", GF_LOG_DEBUG, "Enqueue event: '%s'",
- glusterd_friend_sm_event_name_get (event->event));
- list_add_tail (&event->list, &gd_friend_sm_queue);
+ GF_ASSERT(event);
+ gf_msg_debug("glusterd", 0, "Enqueue event: '%s'",
+ glusterd_friend_sm_event_name_get(event->event));
+ cds_list_add_tail(&event->list, &gd_friend_sm_queue);
- return 0;
+ return 0;
}
void
-glusterd_destroy_friend_event_context (glusterd_friend_sm_event_t *event)
+glusterd_destroy_friend_event_context(glusterd_friend_sm_event_t *event)
{
- if (!event)
- return;
+ if (!event)
+ return;
- switch (event->event) {
+ switch (event->event) {
case GD_FRIEND_EVENT_RCVD_FRIEND_REQ:
case GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND:
- glusterd_destroy_friend_req_ctx (event->ctx);
- break;
+ glusterd_destroy_friend_req_ctx(event->ctx);
+ break;
case GD_FRIEND_EVENT_LOCAL_ACC:
case GD_FRIEND_EVENT_LOCAL_RJT:
case GD_FRIEND_EVENT_RCVD_ACC:
case GD_FRIEND_EVENT_RCVD_RJT:
- glusterd_destroy_friend_update_ctx (event->ctx);
- break;
+ glusterd_destroy_friend_update_ctx(event->ctx);
+ break;
default:
- break;
- }
+ break;
+ }
+}
+
+gf_boolean_t
+gd_does_peer_affect_quorum(glusterd_friend_sm_state_t old_state,
+ glusterd_friend_sm_event_type_t event_type,
+ glusterd_peerinfo_t *peerinfo)
+{
+ gf_boolean_t affects = _gf_false;
+
+ // When glusterd comes up with friends in BEFRIENDED state in store,
+ // wait until compare-data happens.
+ if ((old_state == GD_FRIEND_STATE_BEFRIENDED) &&
+ (event_type != GD_FRIEND_EVENT_RCVD_ACC) &&
+ (event_type != GD_FRIEND_EVENT_LOCAL_ACC))
+ goto out;
+ if ((peerinfo->state.state == GD_FRIEND_STATE_BEFRIENDED) &&
+ peerinfo->connected) {
+ affects = _gf_true;
+ }
+out:
+ return affects;
}
int
-glusterd_friend_sm ()
+glusterd_friend_sm()
{
- glusterd_friend_sm_event_t *event = NULL;
- glusterd_friend_sm_event_t *tmp = NULL;
- int ret = -1;
- glusterd_friend_sm_ac_fn handler = NULL;
- glusterd_sm_t *state = NULL;
- glusterd_peerinfo_t *peerinfo = NULL;
- glusterd_friend_sm_event_type_t event_type = 0;
- gf_boolean_t is_await_conn = _gf_false;
-
- while (!list_empty (&gd_friend_sm_queue)) {
- list_for_each_entry_safe (event, tmp, &gd_friend_sm_queue, list) {
-
- list_del_init (&event->list);
- event_type = event->event;
- peerinfo = event->peerinfo;
- if (!peerinfo) {
- gf_log ("glusterd", GF_LOG_CRITICAL, "Received"
- " event %s with empty peer info",
- glusterd_friend_sm_event_name_get (event_type));
-
- GF_FREE (event);
- continue;
- }
- gf_log ("", GF_LOG_DEBUG, "Dequeued event of type: '%s'",
- glusterd_friend_sm_event_name_get (event_type));
-
-
- state = glusterd_friend_state_table[peerinfo->state.state];
-
- GF_ASSERT (state);
-
- handler = state[event_type].handler;
- GF_ASSERT (handler);
-
- ret = handler (event, event->ctx);
- if (ret == GLUSTERD_CONNECTION_AWAITED) {
- is_await_conn = _gf_true;
- ret = 0;
- }
-
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "handler returned: "
- "%d", ret);
- glusterd_destroy_friend_event_context (event);
- GF_FREE (event);
- continue;
- }
-
- if ((GD_FRIEND_EVENT_REMOVE_FRIEND == event_type) ||
- (GD_FRIEND_EVENT_INIT_REMOVE_FRIEND == event_type)){
- glusterd_destroy_friend_event_context (event);
- GF_FREE (event);
- continue;
- }
-
- ret = glusterd_friend_sm_transition_state (peerinfo,
- state, event_type);
-
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to transition"
- " state from '%s' to '%s' for event '%s'",
- glusterd_friend_sm_state_name_get(peerinfo->state.state),
- glusterd_friend_sm_state_name_get(state[event_type].next_state),
- glusterd_friend_sm_event_name_get(event_type));
- goto out;
- }
-
- ret = glusterd_store_peerinfo (peerinfo);
-
- glusterd_destroy_friend_event_context (event);
- GF_FREE (event);
- if (is_await_conn)
- break;
+ glusterd_friend_sm_event_t *event = NULL;
+ glusterd_friend_sm_event_t *tmp = NULL;
+ int ret = -1;
+ glusterd_friend_sm_ac_fn handler = NULL;
+ glusterd_sm_t *state = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_friend_sm_event_type_t event_type = 0;
+ gf_boolean_t is_await_conn = _gf_false;
+ gf_boolean_t quorum_action = _gf_false;
+ glusterd_friend_sm_state_t old_state = GD_FRIEND_STATE_DEFAULT;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ while (!cds_list_empty(&gd_friend_sm_queue)) {
+ cds_list_for_each_entry_safe(event, tmp, &gd_friend_sm_queue, list)
+ {
+ cds_list_del_init(&event->list);
+ event_type = event->event;
+
+ RCU_READ_LOCK;
+
+ peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
+ if (!peerinfo) {
+ RCU_READ_UNLOCK;
+ gf_msg("glusterd", GF_LOG_CRITICAL, 0, GD_MSG_PEER_NOT_FOUND,
+ "Received"
+ " event %s with empty peer info",
+ glusterd_friend_sm_event_name_get(event_type));
+
+ GF_FREE(event);
+ continue;
+ }
+ old_state = peerinfo->state.state;
+ RCU_READ_UNLOCK;
+ gf_msg_debug("glusterd", 0, "Dequeued event of type: '%s'",
+ glusterd_friend_sm_event_name_get(event_type));
+
+ /* Giving up read-critical section here as we only need
+ * the current state to call the handler.
+ *
+ * We cannot continue into the handler in a read
+ * critical section as there are handlers who do
+ * updates, and could cause deadlocks.
+ */
+
+ state = glusterd_friend_state_table[old_state];
+
+ GF_ASSERT(state);
+
+ handler = state[event_type].handler;
+ GF_ASSERT(handler);
+
+ ret = handler(event, event->ctx);
+ if (ret == GLUSTERD_CONNECTION_AWAITED) {
+ is_await_conn = _gf_true;
+ ret = 0;
+ }
+
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_HANDLER_RETURNED,
+ "handler returned: "
+ "%d",
+ ret);
+ glusterd_destroy_friend_event_context(event);
+ GF_FREE(event);
+ continue;
+ }
+
+ if ((GD_FRIEND_EVENT_REMOVE_FRIEND == event_type) ||
+ (GD_FRIEND_EVENT_INIT_REMOVE_FRIEND == event_type)) {
+ glusterd_destroy_friend_event_context(event);
+ GF_FREE(event);
+ continue;
+ }
+
+ ret = glusterd_friend_sm_transition_state(
+ event->peerid, event->peername, state, event_type);
+
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0,
+ GD_MSG_EVENT_STATE_TRANSITION_FAIL,
+ "Unable to transition"
+ " state from '%s' to '%s' for event '%s'",
+ glusterd_friend_sm_state_name_get(old_state),
+ glusterd_friend_sm_state_name_get(
+ state[event_type].next_state),
+ glusterd_friend_sm_event_name_get(event_type));
+ goto out;
+ }
+
+ peerinfo = NULL;
+ /* We need to obtain peerinfo reference once again as we
+ * had exited the read critical section above.
+ */
+ RCU_READ_LOCK;
+ peerinfo = glusterd_peerinfo_find(event->peerid, event->peername);
+ if (!peerinfo) {
+ RCU_READ_UNLOCK;
+ /* A peer can only be deleted as a effect of
+ * this state machine, and two such state
+ * machines can never run at the same time.
+ * So if we cannot find the peerinfo here,
+ * something has gone terribly wrong.
+ */
+ ret = -1;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PEER_NOT_FOUND,
+ "Cannot find peer %s(%s)", event->peername,
+ uuid_utoa(event->peerid));
+ goto out;
+ }
+ if (gd_does_peer_affect_quorum(old_state, event_type, peerinfo)) {
+ peerinfo->quorum_contrib = QUORUM_UP;
+ if (peerinfo->quorum_action) {
+ peerinfo->quorum_action = _gf_false;
+ quorum_action = _gf_true;
}
- if (is_await_conn)
- break;
+ }
+
+ ret = glusterd_store_peerinfo(peerinfo);
+ RCU_READ_UNLOCK;
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PEERINFO_CREATE_FAIL,
+ "Failed to store peerinfo");
+ }
+
+ glusterd_destroy_friend_event_context(event);
+ GF_FREE(event);
+ if (is_await_conn)
+ break;
}
+ if (is_await_conn)
+ break;
+ }
- ret = 0;
+ ret = 0;
out:
- return ret;
+ if (quorum_action) {
+ /* When glusterd is restarted, it needs to wait until the 'friends' view
+ * of the volumes settle, before it starts any of the internal daemons.
+ *
+ * Every friend that was part of the cluster, would send its
+ * cluster-view, 'our' way. For every friend, who belongs to
+ * a partition which has a different cluster-view from our
+ * partition, we may update our cluster-view. For subsequent
+ * friends from that partition would agree with us, if the first
+ * friend wasn't rejected. For every first friend, whom we agreed with,
+ * we would need to start internal daemons/bricks belonging to the
+ * new volumes.
+ * glusterd_spawn_daemons calls functions that are idempotent. ie,
+ * the functions spawn process(es) only if they are not started yet.
+ *
+ * */
+ synclock_unlock(&priv->big_lock);
+ glusterd_launch_synctask(glusterd_spawn_daemons, NULL);
+ synclock_lock(&priv->big_lock);
+ glusterd_do_quorum_action();
+ }
+ return ret;
}
-
int
-glusterd_friend_sm_init ()
+glusterd_friend_sm_init()
{
- INIT_LIST_HEAD (&gd_friend_sm_queue);
- return 0;
+ CDS_INIT_LIST_HEAD(&gd_friend_sm_queue);
+ return 0;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-sm.h b/xlators/mgmt/glusterd/src/glusterd-sm.h
index e8a545c27b2..11cbd85b3e3 100644
--- a/xlators/mgmt/glusterd/src/glusterd-sm.h
+++ b/xlators/mgmt/glusterd/src/glusterd-sm.h
@@ -1,212 +1,216 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _GLUSTERD_SM_H_
#define _GLUSTERD_SM_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <pthread.h>
-#include "uuid.h"
+#include <glusterfs/compat-uuid.h>
#include "rpc-clnt.h"
-#include "glusterfs.h"
-#include "xlator.h"
-#include "logging.h"
-#include "call-stub.h"
-#include "fd.h"
-#include "byte-order.h"
-//#include "glusterd.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/byte-order.h>
#include "rpcsvc.h"
+#include <glusterfs/store.h>
-struct glusterd_store_handle_ {
- char *path;
- int fd;
- FILE *read;
-};
+#include "glusterd-rcu.h"
-typedef struct glusterd_store_handle_ glusterd_store_handle_t;
+typedef enum gd_quorum_contribution_ {
+ QUORUM_NONE,
+ QUORUM_WAITING,
+ QUORUM_DOWN,
+ QUORUM_UP
+} gd_quorum_contrib_t;
typedef enum glusterd_friend_sm_state_ {
- GD_FRIEND_STATE_DEFAULT = 0,
- GD_FRIEND_STATE_REQ_SENT,
- GD_FRIEND_STATE_REQ_RCVD,
- GD_FRIEND_STATE_BEFRIENDED,
- GD_FRIEND_STATE_REQ_ACCEPTED,
- GD_FRIEND_STATE_REQ_SENT_RCVD,
- GD_FRIEND_STATE_REJECTED,
- GD_FRIEND_STATE_UNFRIEND_SENT,
- GD_FRIEND_STATE_PROBE_RCVD,
- GD_FRIEND_STATE_CONNECTED_RCVD,
- GD_FRIEND_STATE_CONNECTED_ACCEPTED,
- GD_FRIEND_STATE_MAX
+ GD_FRIEND_STATE_DEFAULT = 0,
+ GD_FRIEND_STATE_REQ_SENT,
+ GD_FRIEND_STATE_REQ_RCVD,
+ GD_FRIEND_STATE_BEFRIENDED,
+ GD_FRIEND_STATE_REQ_ACCEPTED,
+ GD_FRIEND_STATE_REQ_SENT_RCVD,
+ GD_FRIEND_STATE_REJECTED,
+ GD_FRIEND_STATE_UNFRIEND_SENT,
+ GD_FRIEND_STATE_PROBE_RCVD,
+ GD_FRIEND_STATE_CONNECTED_RCVD,
+ GD_FRIEND_STATE_CONNECTED_ACCEPTED,
+ GD_FRIEND_STATE_MAX
} glusterd_friend_sm_state_t;
typedef struct glusterd_peer_state_info_ {
- glusterd_friend_sm_state_t state;
- struct timeval transition_time;
-}glusterd_peer_state_info_t;
+ glusterd_friend_sm_state_t state;
+ struct timeval transition_time;
+} glusterd_peer_state_info_t;
typedef struct glusterd_peer_hostname_ {
- char *hostname;
- struct list_head hostname_list;
-}glusterd_peer_hostname_t;
+ char *hostname;
+ struct cds_list_head hostname_list;
+} glusterd_peer_hostname_t;
typedef struct glusterd_sm_transition_ {
- int old_state;
- int event;
- int new_state;
- time_t time;
+ int old_state;
+ int event;
+ int new_state;
+ time_t time;
} glusterd_sm_transition_t;
typedef struct glusterd_sm_tr_log_ {
- glusterd_sm_transition_t *transitions;
- size_t current;
- size_t size;
- size_t count;
- char* (*state_name_get) (int);
- char* (*event_name_get) (int);
+ glusterd_sm_transition_t *transitions;
+ size_t current;
+ size_t size;
+ size_t count;
+ char *(*state_name_get)(int);
+ char *(*event_name_get)(int);
} glusterd_sm_tr_log_t;
struct glusterd_peerinfo_ {
- uuid_t uuid;
- char uuid_str[50];
- glusterd_peer_state_info_t state;
- char *hostname;
- int port;
- struct list_head uuid_list;
- struct list_head op_peers_list;
- struct rpc_clnt *rpc;
- rpc_clnt_prog_t *mgmt;
- rpc_clnt_prog_t *peer;
- int connected;
- glusterd_store_handle_t *shandle;
- glusterd_sm_tr_log_t sm_log;
+ uuid_t uuid;
+ char uuid_str[50]; /* Retrieve this using
+ * gd_peer_uuid_str ()
+ */
+ glusterd_peer_state_info_t state;
+ char *hostname;
+ struct cds_list_head hostnames;
+ int port;
+ struct cds_list_head uuid_list;
+ struct cds_list_head op_peers_list;
+ struct rpc_clnt *rpc;
+ rpc_clnt_prog_t *mgmt;
+ rpc_clnt_prog_t *peer;
+ rpc_clnt_prog_t *mgmt_v3;
+ int connected;
+ gf_store_handle_t *shandle;
+ glusterd_sm_tr_log_t sm_log;
+ gf_boolean_t quorum_action;
+ gd_quorum_contrib_t quorum_contrib;
+ gf_boolean_t locked;
+ gf_boolean_t detaching;
+ /* Members required for proper cleanup using RCU */
+ gd_rcu_head rcu_head;
+ pthread_mutex_t delete_lock;
+ uint32_t generation;
};
typedef struct glusterd_peerinfo_ glusterd_peerinfo_t;
+typedef struct glusterd_local_peers_ {
+ glusterd_peerinfo_t *peerinfo;
+ struct cds_list_head op_peers_list;
+} glusterd_local_peers_t;
+
typedef enum glusterd_ev_gen_mode_ {
- GD_MODE_OFF,
- GD_MODE_ON,
- GD_MODE_SWITCH_ON
+ GD_MODE_OFF,
+ GD_MODE_ON,
+ GD_MODE_SWITCH_ON
} glusterd_ev_gen_mode_t;
typedef struct glusterd_peer_ctx_args_ {
- rpcsvc_request_t *req;
- glusterd_ev_gen_mode_t mode;
+ rpcsvc_request_t *req;
+ glusterd_ev_gen_mode_t mode;
+ dict_t *dict;
} glusterd_peerctx_args_t;
typedef struct glusterd_peer_ctx_ {
- glusterd_peerctx_args_t args;
- glusterd_peerinfo_t *peerinfo;
- char *errstr;
+ glusterd_peerctx_args_t args;
+ uuid_t peerid;
+ char *peername;
+ uint32_t peerinfo_gen;
+ char *errstr;
} glusterd_peerctx_t;
typedef enum glusterd_friend_sm_event_type_ {
- GD_FRIEND_EVENT_NONE = 0,
- GD_FRIEND_EVENT_PROBE,
- GD_FRIEND_EVENT_INIT_FRIEND_REQ,
- GD_FRIEND_EVENT_RCVD_ACC,
- GD_FRIEND_EVENT_LOCAL_ACC,
- GD_FRIEND_EVENT_RCVD_RJT,
- GD_FRIEND_EVENT_LOCAL_RJT,
- GD_FRIEND_EVENT_RCVD_FRIEND_REQ,
- GD_FRIEND_EVENT_INIT_REMOVE_FRIEND,
- GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND,
- GD_FRIEND_EVENT_REMOVE_FRIEND,
- GD_FRIEND_EVENT_CONNECTED,
- GD_FRIEND_EVENT_MAX
+ GD_FRIEND_EVENT_NONE = 0,
+ GD_FRIEND_EVENT_PROBE,
+ GD_FRIEND_EVENT_INIT_FRIEND_REQ,
+ GD_FRIEND_EVENT_RCVD_ACC,
+ GD_FRIEND_EVENT_LOCAL_ACC,
+ GD_FRIEND_EVENT_RCVD_RJT,
+ GD_FRIEND_EVENT_LOCAL_RJT,
+ GD_FRIEND_EVENT_RCVD_FRIEND_REQ,
+ GD_FRIEND_EVENT_INIT_REMOVE_FRIEND,
+ GD_FRIEND_EVENT_RCVD_REMOVE_FRIEND,
+ GD_FRIEND_EVENT_REMOVE_FRIEND,
+ GD_FRIEND_EVENT_CONNECTED,
+ GD_FRIEND_EVENT_NEW_NAME,
+ GD_FRIEND_EVENT_MAX
} glusterd_friend_sm_event_type_t;
-
typedef enum glusterd_friend_update_op_ {
- GD_FRIEND_UPDATE_NONE = 0,
- GD_FRIEND_UPDATE_ADD,
- GD_FRIEND_UPDATE_DEL,
+ GD_FRIEND_UPDATE_NONE = 0,
+ GD_FRIEND_UPDATE_ADD,
+ GD_FRIEND_UPDATE_DEL,
} glusterd_friend_update_op_t;
-
struct glusterd_friend_sm_event_ {
- struct list_head list;
- glusterd_peerinfo_t *peerinfo;
- void *ctx;
- glusterd_friend_sm_event_type_t event;
+ struct cds_list_head list;
+ uuid_t peerid;
+ char *peername;
+ void *ctx;
+ glusterd_friend_sm_event_type_t event;
};
typedef struct glusterd_friend_sm_event_ glusterd_friend_sm_event_t;
-typedef int (*glusterd_friend_sm_ac_fn) (glusterd_friend_sm_event_t *, void *);
+typedef int (*glusterd_friend_sm_ac_fn)(glusterd_friend_sm_event_t *, void *);
typedef struct glusterd_sm_ {
- glusterd_friend_sm_state_t next_state;
- glusterd_friend_sm_ac_fn handler;
+ glusterd_friend_sm_state_t next_state;
+ glusterd_friend_sm_ac_fn handler;
} glusterd_sm_t;
typedef struct glusterd_friend_req_ctx_ {
- uuid_t uuid;
- char *hostname;
- rpcsvc_request_t *req;
- int port;
- dict_t *vols;
+ uuid_t uuid;
+ char *hostname;
+ rpcsvc_request_t *req;
+ int port;
+ dict_t *vols;
} glusterd_friend_req_ctx_t;
typedef struct glusterd_friend_update_ctx_ {
- uuid_t uuid;
- char *hostname;
- int op;
+ uuid_t uuid;
+ char *hostname;
+ int op;
} glusterd_friend_update_ctx_t;
typedef struct glusterd_probe_ctx_ {
- char *hostname;
- rpcsvc_request_t *req;
- int port;
+ char *hostname;
+ rpcsvc_request_t *req;
+ int port;
+ dict_t *dict;
} glusterd_probe_ctx_t;
int
-glusterd_friend_sm_new_event (glusterd_friend_sm_event_type_t event_type,
- glusterd_friend_sm_event_t **new_event);
+glusterd_friend_sm_new_event(glusterd_friend_sm_event_type_t event_type,
+ glusterd_friend_sm_event_t **new_event);
int
-glusterd_friend_sm_inject_event (glusterd_friend_sm_event_t *event);
+glusterd_friend_sm_inject_event(glusterd_friend_sm_event_t *event);
int
-glusterd_friend_sm_init ();
+glusterd_friend_sm_init();
int
-glusterd_friend_sm ();
+glusterd_friend_sm();
void
-glusterd_destroy_probe_ctx (glusterd_probe_ctx_t *ctx);
+glusterd_destroy_probe_ctx(glusterd_probe_ctx_t *ctx);
void
-glusterd_destroy_friend_req_ctx (glusterd_friend_req_ctx_t *ctx);
+glusterd_destroy_friend_req_ctx(glusterd_friend_req_ctx_t *ctx);
-char*
-glusterd_friend_sm_state_name_get (int state);
+char *
+glusterd_friend_sm_state_name_get(int state);
-char*
-glusterd_friend_sm_event_name_get (int event);
+char *
+glusterd_friend_sm_event_name_get(int event);
int
-glusterd_broadcast_friend_delete (char *hostname, uuid_t uuid);
+glusterd_broadcast_friend_delete(char *hostname, uuid_t uuid);
void
-glusterd_destroy_friend_update_ctx (glusterd_friend_update_ctx_t *ctx);
+glusterd_destroy_friend_update_ctx(glusterd_friend_update_ctx_t *ctx);
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.c
new file mode 100644
index 00000000000..42ef51b01b4
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.c
@@ -0,0 +1,75 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include "glusterd.h"
+#include "glusterd-utils.h"
+#include "glusterd-snapd-svc-helper.h"
+
+void
+glusterd_svc_build_snapd_rundir(glusterd_volinfo_t *volinfo, char *path,
+ int path_len)
+{
+ char workdir[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = THIS->private;
+
+ GLUSTERD_GET_VOLUME_PID_DIR(workdir, volinfo, priv);
+ snprintf(path, path_len, "%s", workdir);
+}
+
+void
+glusterd_svc_build_snapd_socket_filepath(glusterd_volinfo_t *volinfo,
+ char *path, int path_len)
+{
+ char sockfilepath[PATH_MAX] = {
+ 0,
+ };
+ char rundir[PATH_MAX] = {
+ 0,
+ };
+ int32_t len = 0;
+
+ glusterd_svc_build_snapd_rundir(volinfo, rundir, sizeof(rundir));
+ len = snprintf(sockfilepath, sizeof(sockfilepath), "%s/run-%s", rundir,
+ uuid_utoa(MY_UUID));
+ if ((len < 0) || (len >= sizeof(sockfilepath))) {
+ sockfilepath[0] = 0;
+ }
+
+ glusterd_set_socket_filepath(sockfilepath, path, path_len);
+}
+
+void
+glusterd_svc_build_snapd_pidfile(glusterd_volinfo_t *volinfo, char *path,
+ int path_len)
+{
+ char rundir[PATH_MAX] = {
+ 0,
+ };
+
+ glusterd_svc_build_snapd_rundir(volinfo, rundir, sizeof(rundir));
+
+ snprintf(path, path_len, "%s/%s-snapd.pid", rundir, volinfo->volname);
+}
+
+void
+glusterd_svc_build_snapd_volfile(glusterd_volinfo_t *volinfo, char *path,
+ int path_len)
+{
+ char workdir[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = THIS->private;
+
+ GLUSTERD_GET_VOLUME_DIR(workdir, volinfo, priv);
+
+ snprintf(path, path_len, "%s/%s-snapd.vol", workdir, volinfo->volname);
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.h
new file mode 100644
index 00000000000..3e23c2ce942
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc-helper.h
@@ -0,0 +1,32 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_SNAPD_SVC_HELPER_H_
+#define _GLUSTERD_SNAPD_SVC_HELPER_H_
+
+#include "glusterd.h"
+
+void
+glusterd_svc_build_snapd_rundir(glusterd_volinfo_t *volinfo, char *path,
+ int path_len);
+
+void
+glusterd_svc_build_snapd_socket_filepath(glusterd_volinfo_t *volinfo,
+ char *path, int path_len);
+
+void
+glusterd_svc_build_snapd_pidfile(glusterd_volinfo_t *volinfo, char *path,
+ int path_len);
+
+void
+glusterd_svc_build_snapd_volfile(glusterd_volinfo_t *volinfo, char *path,
+ int path_len);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
new file mode 100644
index 00000000000..d75f249b29e
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.c
@@ -0,0 +1,478 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
+#include "glusterd-utils.h"
+#include "glusterd-volgen.h"
+#include "glusterd-messages.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-conn-mgmt.h"
+#include "glusterd-proc-mgmt.h"
+#include "glusterd-snapd-svc.h"
+#include "glusterd-snapd-svc-helper.h"
+#include "glusterd-snapshot-utils.h"
+#include <glusterfs/syscall.h>
+
+char *snapd_svc_name = "snapd";
+
+static void
+glusterd_svc_build_snapd_logdir(char *logdir, char *volname, size_t len)
+{
+ glusterd_conf_t *priv = THIS->private;
+ snprintf(logdir, len, "%s/snaps/%s", priv->logdir, volname);
+}
+
+static void
+glusterd_svc_build_snapd_logfile(char *logfile, char *logdir, size_t len)
+{
+ snprintf(logfile, len, "%s/snapd.log", logdir);
+}
+
+void
+glusterd_snapdsvc_build(glusterd_svc_t *svc)
+{
+ svc->manager = glusterd_snapdsvc_manager;
+ svc->start = glusterd_snapdsvc_start;
+ svc->stop = glusterd_svc_stop;
+}
+
+int
+glusterd_snapdsvc_init(void *data)
+{
+ int ret = -1;
+ char rundir[PATH_MAX] = {
+ 0,
+ };
+ char sockpath[PATH_MAX] = {
+ 0,
+ };
+ char pidfile[PATH_MAX] = {
+ 0,
+ };
+ char volfile[PATH_MAX] = {
+ 0,
+ };
+ char logdir[PATH_MAX] = {
+ 0,
+ };
+ char logfile[PATH_MAX] = {
+ 0,
+ };
+ char volfileid[256] = {0};
+ glusterd_svc_t *svc = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_conn_notify_t notify = NULL;
+ xlator_t *this = NULL;
+ char *volfileserver = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ volinfo = data;
+
+ svc = &(volinfo->snapd.svc);
+
+ ret = snprintf(svc->name, sizeof(svc->name), "%s", snapd_svc_name);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ notify = glusterd_snapdsvc_rpc_notify;
+
+ glusterd_svc_build_snapd_rundir(volinfo, rundir, sizeof(rundir));
+ glusterd_svc_create_rundir(rundir);
+
+ /* Initialize the connection mgmt */
+ glusterd_svc_build_snapd_socket_filepath(volinfo, sockpath,
+ sizeof(sockpath));
+ ret = glusterd_conn_init(&(svc->conn), sockpath, 600, notify);
+ if (ret)
+ goto out;
+
+ /* Initialize the process mgmt */
+ glusterd_svc_build_snapd_pidfile(volinfo, pidfile, sizeof(pidfile));
+ glusterd_svc_build_snapd_volfile(volinfo, volfile, sizeof(volfile));
+ glusterd_svc_build_snapd_logdir(logdir, volinfo->volname, sizeof(logdir));
+ ret = mkdir_p(logdir, 0755, _gf_true);
+ if ((ret == -1) && (EEXIST != errno)) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create logdir %s", logdir);
+ goto out;
+ }
+ glusterd_svc_build_snapd_logfile(logfile, logdir, sizeof(logfile));
+ len = snprintf(volfileid, sizeof(volfileid), "snapd/%s", volinfo->volname);
+ if ((len < 0) || (len >= sizeof(volfileid))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ if (dict_get_str(this->options, "transport.socket.bind-address",
+ &volfileserver) != 0) {
+ volfileserver = "localhost";
+ }
+ ret = glusterd_proc_init(&(svc->proc), snapd_svc_name, pidfile, logdir,
+ logfile, volfile, volfileid, volfileserver);
+ if (ret)
+ goto out;
+
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_snapdsvc_manager(glusterd_svc_t *svc, void *data, int flags)
+{
+ int ret = 0;
+ xlator_t *this = THIS;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ volinfo = data;
+
+ if (!svc->inited) {
+ ret = glusterd_snapdsvc_init(volinfo);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_INIT_FAIL,
+ "Failed to initialize "
+ "snapd service for volume %s",
+ volinfo->volname);
+ goto out;
+ } else {
+ svc->inited = _gf_true;
+ gf_msg_debug(THIS->name, 0,
+ "snapd service "
+ "initialized");
+ }
+ }
+
+ ret = glusterd_is_snapd_enabled(volinfo);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to read volume "
+ "options");
+ goto out;
+ }
+
+ if (ret) {
+ if (!glusterd_is_volume_started(volinfo)) {
+ if (glusterd_proc_is_running(&svc->proc)) {
+ ret = svc->stop(svc, SIGTERM);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_STOP_FAIL,
+ "Couldn't stop snapd for "
+ "volume: %s",
+ volinfo->volname);
+ } else {
+ /* Since snapd is not running set ret to 0 */
+ ret = 0;
+ }
+ goto out;
+ }
+
+ ret = glusterd_snapdsvc_create_volfile(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_CREATE_FAIL,
+ "Couldn't create "
+ "snapd volfile for volume: %s",
+ volinfo->volname);
+ goto out;
+ }
+
+ ret = svc->start(svc, flags);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_START_FAIL,
+ "Couldn't start "
+ "snapd for volume: %s",
+ volinfo->volname);
+ goto out;
+ }
+
+ glusterd_volinfo_ref(volinfo);
+ ret = glusterd_conn_connect(&(svc->conn));
+ if (ret) {
+ glusterd_volinfo_unref(volinfo);
+ goto out;
+ }
+
+ } else if (glusterd_proc_is_running(&svc->proc)) {
+ ret = svc->stop(svc, SIGTERM);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_STOP_FAIL,
+ "Couldn't stop snapd for volume: %s", volinfo->volname);
+ goto out;
+ }
+ volinfo->snapd.port = 0;
+ }
+
+out:
+ if (ret) {
+ gf_event(EVENT_SVC_MANAGER_FAILED, "volume=%s;svc_name=%s",
+ volinfo->volname, svc->name);
+ }
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int32_t
+glusterd_snapdsvc_start(glusterd_svc_t *svc, int flags)
+{
+ int ret = -1;
+ runner_t runner = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ char valgrind_logfile[PATH_MAX] = {0};
+ int snapd_port = 0;
+ char msg[1024] = {
+ 0,
+ };
+ char snapd_id[PATH_MAX] = {
+ 0,
+ };
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_snapdsvc_t *snapd = NULL;
+ char *localtime_logging = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ if (glusterd_proc_is_running(&svc->proc)) {
+ ret = 0;
+ goto out;
+ }
+
+ /* Get volinfo->snapd from svc object */
+ snapd = cds_list_entry(svc, glusterd_snapdsvc_t, svc);
+ if (!snapd) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_OBJ_GET_FAIL,
+ "Failed to get snapd object "
+ "from snapd service");
+ goto out;
+ }
+
+ /* Get volinfo from snapd */
+ volinfo = cds_list_entry(snapd, glusterd_volinfo_t, snapd);
+ if (!volinfo) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get volinfo from "
+ "from snapd");
+ goto out;
+ }
+
+ ret = sys_access(svc->proc.volfile, F_OK);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_DEBUG, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "snapd Volfile %s is not present", svc->proc.volfile);
+ /* If glusterd is down on one of the nodes and during
+ * that time "USS is enabled" for the first time. After some
+ * time when the glusterd which was down comes back it tries
+ * to look for the snapd volfile and it does not find snapd
+ * volfile and because of this starting of snapd fails.
+ * Therefore, if volfile is not present then create a fresh
+ * volfile.
+ */
+ ret = glusterd_snapdsvc_create_volfile(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Couldn't create "
+ "snapd volfile for volume: %s",
+ volinfo->volname);
+ goto out;
+ }
+ }
+ runinit(&runner);
+
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
+ len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-snapd.log",
+ svc->proc.logdir);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
+ runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
+ }
+
+ snprintf(snapd_id, sizeof(snapd_id), "snapd-%s", volinfo->volname);
+ runner_add_args(&runner, SBIN_DIR "/glusterfsd", "-s",
+ svc->proc.volfileserver, "--volfile-id",
+ svc->proc.volfileid, "-p", svc->proc.pidfile, "-l",
+ svc->proc.logfile, "--brick-name", snapd_id, "-S",
+ svc->conn.sockpath, "--process-name", svc->name, NULL);
+ if (dict_get_str(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY,
+ &localtime_logging) == 0) {
+ if (strcmp(localtime_logging, "enable") == 0)
+ runner_add_arg(&runner, "--localtime-logging");
+ }
+
+ snapd_port = pmap_assign_port(THIS, volinfo->snapd.port, snapd_id);
+ if (!snapd_port) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PORTS_EXHAUSTED,
+ "All the ports in the range are exhausted, can't start "
+ "snapd for volume %s",
+ volinfo->volname);
+ ret = -1;
+ goto out;
+ }
+
+ volinfo->snapd.port = snapd_port;
+
+ runner_add_arg(&runner, "--brick-port");
+ runner_argprintf(&runner, "%d", snapd_port);
+ runner_add_arg(&runner, "--xlator-option");
+ runner_argprintf(&runner, "%s-server.listen-port=%d", volinfo->volname,
+ snapd_port);
+ runner_add_arg(&runner, "--no-mem-accounting");
+
+ snprintf(msg, sizeof(msg), "Starting the snapd service for volume %s",
+ volinfo->volname);
+ runner_log(&runner, this->name, GF_LOG_DEBUG, msg);
+
+ if (flags == PROC_START_NO_WAIT) {
+ ret = runner_run_nowait(&runner);
+ } else {
+ synclock_unlock(&priv->big_lock);
+ {
+ ret = runner_run(&runner);
+ }
+ synclock_lock(&priv->big_lock);
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_snapdsvc_restart()
+{
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp = NULL;
+ int ret = 0;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+ glusterd_svc_t *svc = NULL;
+
+ GF_ASSERT(this);
+
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ cds_list_for_each_entry_safe(volinfo, tmp, &conf->volumes, vol_list)
+ {
+ /* Start per volume snapd svc */
+ if (volinfo->status == GLUSTERD_STATUS_STARTED) {
+ svc = &(volinfo->snapd.svc);
+ ret = svc->manager(svc, volinfo, PROC_START_NO_WAIT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_START_FAIL,
+ "Couldn't resolve snapd for "
+ "vol: %s on restart",
+ volinfo->volname);
+ gf_event(EVENT_SVC_MANAGER_FAILED, "volume=%s;svc_name=%s",
+ volinfo->volname, svc->name);
+ goto out;
+ }
+ }
+ }
+out:
+ return ret;
+}
+
+int
+glusterd_snapdsvc_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event)
+{
+ int ret = 0;
+ glusterd_svc_t *svc = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_snapdsvc_t *snapd = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ svc = cds_list_entry(conn, glusterd_svc_t, conn);
+ if (!svc) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL,
+ "Failed to get the service");
+ return -1;
+ }
+ snapd = cds_list_entry(svc, glusterd_snapdsvc_t, svc);
+ if (!snapd) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_OBJ_GET_FAIL,
+ "Failed to get the "
+ "snapd object");
+ return -1;
+ }
+
+ volinfo = cds_list_entry(snapd, glusterd_volinfo_t, snapd);
+ if (!volinfo) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get the "
+ "volinfo object");
+ return -1;
+ }
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ gf_msg_debug(this->name, 0,
+ "%s has connected with "
+ "glusterd.",
+ svc->name);
+ gf_event(EVENT_SVC_CONNECTED, "volume=%s;svc_name=%s",
+ volinfo->volname, svc->name);
+ svc->online = _gf_true;
+ break;
+
+ case RPC_CLNT_DISCONNECT:
+ if (svc->online) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NODE_DISCONNECTED,
+ "%s has disconnected "
+ "from glusterd.",
+ svc->name);
+ gf_event(EVENT_SVC_DISCONNECTED, "volume=%s;svc_name=%s",
+ volinfo->volname, svc->name);
+ svc->online = _gf_false;
+ }
+ break;
+
+ case RPC_CLNT_DESTROY:
+ glusterd_volinfo_unref(volinfo);
+ break;
+
+ default:
+ gf_msg_trace(this->name, 0, "got some other RPC event %d", event);
+ break;
+ }
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapd-svc.h b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.h
new file mode 100644
index 00000000000..e15dbf54315
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-snapd-svc.h
@@ -0,0 +1,42 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_SNAPD_SVC_H_
+#define _GLUSTERD_SNAPD_SVC_H_
+
+#include "glusterd-svc-mgmt.h"
+
+typedef struct glusterd_snapdsvc_ glusterd_snapdsvc_t;
+
+struct glusterd_snapdsvc_ {
+ glusterd_svc_t svc;
+ gf_store_handle_t *handle;
+ int port;
+};
+
+void
+glusterd_snapdsvc_build(glusterd_svc_t *svc);
+
+int
+glusterd_snapdsvc_init(void *data);
+
+int
+glusterd_snapdsvc_manager(glusterd_svc_t *svc, void *data, int flags);
+
+int
+glusterd_snapdsvc_start(glusterd_svc_t *svc, int flags);
+
+int
+glusterd_snapdsvc_restart();
+
+int
+glusterd_snapdsvc_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
new file mode 100644
index 00000000000..995268b796d
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.c
@@ -0,0 +1,4290 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <inttypes.h>
+
+#if defined(GF_LINUX_HOST_OS)
+#include <mntent.h>
+#else
+#include "mntent_compat.h"
+#endif
+#include <dlfcn.h>
+
+#include <glusterfs/dict.h>
+#include <glusterfs/syscall.h>
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-messages.h"
+#include "glusterd-store.h"
+#include "glusterd-volgen.h"
+#include "glusterd-snapd-svc.h"
+#include "glusterd-svc-helper.h"
+#include "glusterd-snapd-svc-helper.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-server-quorum.h"
+#include "glusterd-messages.h"
+#include "glusterd-errno.h"
+
+/*
+ * glusterd_snap_geo_rep_restore:
+ * This function restores the atime and mtime of marker.tstamp
+ * if present from snapped marker.tstamp file.
+ */
+
+int32_t
+glusterd_snapobject_delete(glusterd_snap_t *snap)
+{
+ if (snap == NULL) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_PARAM_NULL,
+ "snap is NULL");
+ return -1;
+ }
+
+ cds_list_del_init(&snap->snap_list);
+ cds_list_del_init(&snap->volumes);
+ if (LOCK_DESTROY(&snap->lock))
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_LOCK_DESTROY_FAILED,
+ "Failed destroying lock"
+ "of snap %s",
+ snap->snapname);
+
+ GF_FREE(snap->description);
+ GF_FREE(snap);
+
+ return 0;
+}
+
+/*
+ * This function is to be called only from glusterd_peer_detach_cleanup()
+ * as this continues to delete snaps in spite of faiure while deleting
+ * one, as we don't want to fail peer_detach in such a case.
+ */
+int
+glusterd_cleanup_snaps_for_volume(glusterd_volinfo_t *volinfo)
+{
+ int32_t op_ret = 0;
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *dummy_snap_vol = NULL;
+ glusterd_snap_t *snap = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ cds_list_for_each_entry_safe(snap_vol, dummy_snap_vol,
+ &volinfo->snap_volumes, snapvol_list)
+ {
+ snap = snap_vol->snapshot;
+ ret = glusterd_store_delete_snap(snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_DELETE_FAIL,
+ "Failed to remove "
+ "snap %s from store",
+ snap->snapname);
+ op_ret = ret;
+ continue;
+ }
+
+ ret = glusterd_snapobject_delete(snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_DELETE_FAIL,
+ "Failed to delete "
+ "snap object %s",
+ snap->snapname);
+ op_ret = ret;
+ continue;
+ }
+
+ ret = glusterd_store_delete_volume(snap_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_DELETE_FAIL,
+ "Failed to remove "
+ "volume %s from store",
+ snap_vol->volname);
+ op_ret = ret;
+ continue;
+ }
+
+ ret = glusterd_volinfo_delete(snap_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_DELETE_FAIL,
+ "Failed to remove "
+ "volinfo %s ",
+ snap_vol->volname);
+ op_ret = ret;
+ continue;
+ }
+ }
+
+ return op_ret;
+}
+
+int
+glusterd_snap_geo_rep_restore(glusterd_volinfo_t *snap_volinfo,
+ glusterd_volinfo_t *new_volinfo)
+{
+ char vol_tstamp_file[PATH_MAX] = {
+ 0,
+ };
+ char snap_tstamp_file[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ int geo_rep_indexing_on = 0;
+ int ret = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(snap_volinfo);
+ GF_ASSERT(new_volinfo);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ /* Check if geo-rep indexing is enabled, if yes, we need restore
+ * back the mtime of 'marker.tstamp' file.
+ */
+ geo_rep_indexing_on = glusterd_volinfo_get_boolean(new_volinfo,
+ VKEY_MARKER_XTIME);
+ if (geo_rep_indexing_on == -1) {
+ gf_msg_debug(this->name, 0,
+ "Failed"
+ " to check whether geo-rep-indexing enabled or not");
+ ret = 0;
+ goto out;
+ }
+
+ if (geo_rep_indexing_on == 1) {
+ GLUSTERD_GET_VOLUME_DIR(vol_tstamp_file, new_volinfo, priv);
+ strncat(vol_tstamp_file, "/marker.tstamp",
+ PATH_MAX - strlen(vol_tstamp_file) - 1);
+ GLUSTERD_GET_VOLUME_DIR(snap_tstamp_file, snap_volinfo, priv);
+ strncat(snap_tstamp_file, "/marker.tstamp",
+ PATH_MAX - strlen(snap_tstamp_file) - 1);
+ ret = gf_set_timestamp(snap_tstamp_file, vol_tstamp_file);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TSTAMP_SET_FAIL,
+ "Unable to set atime and mtime of %s as of %s",
+ vol_tstamp_file, snap_tstamp_file);
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+/* This function will copy snap volinfo to the new
+ * passed volinfo and regenerate backend store files
+ * for the restored snap.
+ *
+ * @param new_volinfo new volinfo
+ * @param snap_volinfo volinfo of snap volume
+ *
+ * @return 0 on success and -1 on failure
+ *
+ * TODO: Duplicate all members of volinfo, e.g. geo-rep sync slaves
+ */
+int32_t
+glusterd_snap_volinfo_restore(dict_t *dict, dict_t *rsp_dict,
+ glusterd_volinfo_t *new_volinfo,
+ glusterd_volinfo_t *snap_volinfo,
+ int32_t volcount)
+{
+ char *value = NULL;
+ char key[64] = "";
+ int32_t brick_count = -1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *new_brickinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp_dict);
+
+ GF_VALIDATE_OR_GOTO(this->name, new_volinfo, out);
+ GF_VALIDATE_OR_GOTO(this->name, snap_volinfo, out);
+
+ brick_count = 0;
+ cds_list_for_each_entry(brickinfo, &snap_volinfo->bricks, brick_list)
+ {
+ brick_count++;
+ ret = glusterd_brickinfo_new(&new_brickinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NEW_INFO_FAIL,
+ "Failed to create "
+ "new brickinfo");
+ goto out;
+ }
+
+ /* Duplicate brickinfo */
+ ret = glusterd_brickinfo_dup(brickinfo, new_brickinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_SET_INFO_FAIL,
+ "Failed to dup "
+ "brickinfo");
+ goto out;
+ }
+
+ /* Fetch values if present in dict These values won't
+ * be present in case of a missed restore. In that case
+ * it's fine to use the local node's value
+ */
+ snprintf(key, sizeof(key), "snap%d.brick%d.path", volcount,
+ brick_count);
+ ret = dict_get_str(dict, key, &value);
+ if (!ret)
+ gf_strncpy(new_brickinfo->path, value, sizeof(new_brickinfo->path));
+
+ snprintf(key, sizeof(key), "snap%d.brick%d.snap_status", volcount,
+ brick_count);
+ ret = dict_get_int32(dict, key, &new_brickinfo->snap_status);
+
+ snprintf(key, sizeof(key), "snap%d.brick%d.device_path", volcount,
+ brick_count);
+ ret = dict_get_str(dict, key, &value);
+ if (!ret)
+ gf_strncpy(new_brickinfo->device_path, value,
+ sizeof(new_brickinfo->device_path));
+
+ snprintf(key, sizeof(key), "snap%d.brick%d.fs_type", volcount,
+ brick_count);
+ ret = dict_get_str(dict, key, &value);
+ if (!ret)
+ gf_strncpy(new_brickinfo->fstype, value,
+ sizeof(new_brickinfo->fstype));
+
+ snprintf(key, sizeof(key), "snap%d.brick%d.mnt_opts", volcount,
+ brick_count);
+ ret = dict_get_str(dict, key, &value);
+ if (!ret)
+ gf_strncpy(new_brickinfo->mnt_opts, value,
+ sizeof(new_brickinfo->mnt_opts));
+
+ /* If the brick is not of this peer, or snapshot is missed *
+ * for the brick do not replace the xattr for it */
+ if ((!gf_uuid_compare(brickinfo->uuid, MY_UUID)) &&
+ (brickinfo->snap_status != -1)) {
+ /* We need to replace the volume id of all the bricks
+ * to the volume id of the origin volume. new_volinfo
+ * has the origin volume's volume id*/
+ ret = sys_lsetxattr(new_brickinfo->path, GF_XATTR_VOL_ID_KEY,
+ new_volinfo->volume_id,
+ sizeof(new_volinfo->volume_id), XATTR_REPLACE);
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SET_XATTR_FAIL,
+ "Attribute=%s, Path=%s, Reason=%s, Snap=%s",
+ GF_XATTR_VOL_ID_KEY, new_brickinfo->path,
+ strerror(errno), new_volinfo->volname, NULL);
+ goto out;
+ }
+ }
+
+ /* If a snapshot is pending for this brick then
+ * restore should also be pending
+ */
+ if (brickinfo->snap_status == -1) {
+ /* Adding missed delete to the dict */
+ ret = glusterd_add_missed_snaps_to_dict(
+ rsp_dict, snap_volinfo, brickinfo, brick_count,
+ GF_SNAP_OPTION_TYPE_RESTORE);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSEDSNAP_INFO_SET_FAIL,
+ "Failed to add missed snapshot info "
+ "for %s:%s in the rsp_dict",
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+ }
+
+ cds_list_add_tail(&new_brickinfo->brick_list, &new_volinfo->bricks);
+ /* ownership of new_brickinfo is passed to new_volinfo */
+ new_brickinfo = NULL;
+ }
+
+ /* Regenerate all volfiles */
+ ret = glusterd_create_volfiles_and_notify_services(new_volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to regenerate volfiles");
+ goto out;
+ }
+
+ /* Restore geo-rep marker.tstamp's timestamp */
+ ret = glusterd_snap_geo_rep_restore(snap_volinfo, new_volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TSTAMP_SET_FAIL,
+ "Geo-rep: marker.tstamp's timestamp restoration failed");
+ goto out;
+ }
+
+out:
+ if (ret && (NULL != new_brickinfo)) {
+ (void)glusterd_brickinfo_delete(new_brickinfo);
+ }
+
+ return ret;
+}
+
+int
+glusterd_snap_volinfo_find_by_volume_id(uuid_t volume_id,
+ glusterd_volinfo_t **volinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *voliter = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(volinfo);
+
+ if (gf_uuid_is_null(volume_id)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_UUID_NULL,
+ "Volume UUID is NULL");
+ goto out;
+ }
+
+ cds_list_for_each_entry(snap, &priv->snapshots, snap_list)
+ {
+ cds_list_for_each_entry(voliter, &snap->volumes, vol_list)
+ {
+ if (gf_uuid_compare(volume_id, voliter->volume_id))
+ continue;
+ *volinfo = voliter;
+ ret = 0;
+ goto out;
+ }
+ }
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_NOT_FOUND,
+ "Snap volume not found");
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_snap_volinfo_find(char *snap_volname, glusterd_snap_t *snap,
+ glusterd_volinfo_t **volinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(snap);
+ GF_ASSERT(snap_volname);
+
+ cds_list_for_each_entry(snap_vol, &snap->volumes, vol_list)
+ {
+ if (!strcmp(snap_vol->volname, snap_volname)) {
+ ret = 0;
+ *volinfo = snap_vol;
+ goto out;
+ }
+ }
+
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_SNAP_NOT_FOUND,
+ "Snap volume %s not found", snap_volname);
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_snap_volinfo_find_from_parent_volname(char *origin_volname,
+ glusterd_snap_t *snap,
+ glusterd_volinfo_t **volinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(snap);
+ GF_ASSERT(origin_volname);
+
+ cds_list_for_each_entry(snap_vol, &snap->volumes, vol_list)
+ {
+ if (!strcmp(snap_vol->parent_volname, origin_volname)) {
+ ret = 0;
+ *volinfo = snap_vol;
+ goto out;
+ }
+ }
+
+ gf_msg_debug(this->name, 0,
+ "Snap volume not found(snap: %s, "
+ "origin-volume: %s",
+ snap->snapname, origin_volname);
+
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Exports a bricks snapshot details only if required
+ *
+ * The details will be exported only if the cluster op-version is greater than
+ * 4, ie. snapshot is supported in the cluster
+ */
+int
+gd_add_brick_snap_details_to_dict(dict_t *dict, char *prefix,
+ glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char key[256] = {
+ 0,
+ };
+
+ this = THIS;
+ GF_ASSERT(this != NULL);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO(this->name, (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (brickinfo != NULL), out);
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%s.snap_status", prefix);
+ ret = dict_set_int32(dict, key, brickinfo->snap_status);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_STATUS_FAIL,
+ "Failed to set snap_status for %s:%s", brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%s.device_path", prefix);
+ ret = dict_set_str(dict, key, brickinfo->device_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap_device for %s:%s", brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%s.fs_type", prefix);
+ ret = dict_set_str(dict, key, brickinfo->fstype);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set fstype for %s:%s", brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%s.mnt_opts", prefix);
+ ret = dict_set_str(dict, key, brickinfo->mnt_opts);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MOUNTOPTS_FAIL,
+ "Failed to set mnt_opts for %s:%s", brickinfo->hostname,
+ brickinfo->path);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%s.mount_dir", prefix);
+ ret = dict_set_str(dict, key, brickinfo->mount_dir);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to set mount_dir for %s:%s", brickinfo->hostname,
+ brickinfo->path);
+
+out:
+ return ret;
+}
+
+/* Exports a volumes snapshot details only if required.
+ *
+ * The snapshot details will only be exported if the cluster op-version is
+ * greater than 4, ie. snapshot is supported in the cluster
+ */
+int
+gd_add_vol_snap_details_to_dict(dict_t *dict, char *prefix,
+ glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char key[256] = {
+ 0,
+ };
+
+ this = THIS;
+ GF_ASSERT(this != NULL);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO(this->name, (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (volinfo != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out);
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%s.restored_from_snap", prefix);
+ ret = dict_set_dynstr_with_alloc(dict, key,
+ uuid_utoa(volinfo->restored_from_snap));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set %s for volume"
+ "%s",
+ key, volinfo->volname);
+ goto out;
+ }
+
+ if (strlen(volinfo->parent_volname) > 0) {
+ snprintf(key, sizeof(key), "%s.parent_volname", prefix);
+ ret = dict_set_dynstr_with_alloc(dict, key, volinfo->parent_volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set %s "
+ "for volume %s",
+ key, volinfo->volname);
+ goto out;
+ }
+ }
+
+ snprintf(key, sizeof(key), "%s.is_snap_volume", prefix);
+ ret = dict_set_uint32(dict, key, volinfo->is_snap_volume);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set %s for volume"
+ "%s",
+ key, volinfo->volname);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%s.snap-max-hard-limit", prefix);
+ ret = dict_set_uint64(dict, key, volinfo->snap_max_hard_limit);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set %s for volume"
+ "%s",
+ key, volinfo->volname);
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_add_missed_snaps_to_export_dict(dict_t *peer_data)
+{
+ char name_buf[PATH_MAX] = "";
+ char value[PATH_MAX] = "";
+ int32_t missed_snap_count = 0;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_missed_snap_info *missed_snapinfo = NULL;
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(peer_data);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ /* Add the missed_entries in the dict */
+ cds_list_for_each_entry(missed_snapinfo, &priv->missed_snaps_list,
+ missed_snaps)
+ {
+ cds_list_for_each_entry(snap_opinfo, &missed_snapinfo->snap_ops,
+ snap_ops_list)
+ {
+ snprintf(name_buf, sizeof(name_buf), "missed_snaps_%d",
+ missed_snap_count);
+ snprintf(value, sizeof(value), "%s:%s=%s:%d:%s:%d:%d",
+ missed_snapinfo->node_uuid, missed_snapinfo->snap_uuid,
+ snap_opinfo->snap_vol_id, snap_opinfo->brick_num,
+ snap_opinfo->brick_path, snap_opinfo->op,
+ snap_opinfo->status);
+
+ ret = dict_set_dynstr_with_alloc(peer_data, name_buf, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set %s", name_buf);
+ goto out;
+ }
+ missed_snap_count++;
+ }
+ }
+
+ ret = dict_set_int32(peer_data, "missed_snap_count", missed_snap_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set missed_snap_count");
+ goto out;
+ }
+
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_add_snap_to_dict(glusterd_snap_t *snap, dict_t *peer_data,
+ int32_t snap_count)
+{
+ char buf[64] = "";
+ char prefix[32] = "";
+ int32_t ret = -1;
+ int32_t volcount = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ gf_boolean_t host_bricks = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(snap);
+ GF_ASSERT(peer_data);
+
+ snprintf(prefix, sizeof(prefix), "snap%d", snap_count);
+
+ cds_list_for_each_entry(volinfo, &snap->volumes, vol_list)
+ {
+ volcount++;
+ ret = glusterd_add_volume_to_dict(volinfo, peer_data, volcount, prefix);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to add snap:%s volume:%s "
+ "to peer_data dict for handshake",
+ snap->snapname, volinfo->volname);
+ goto out;
+ }
+
+ if (glusterd_is_volume_quota_enabled(volinfo)) {
+ ret = glusterd_vol_add_quota_conf_to_dict(volinfo, peer_data,
+ volcount, prefix);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to add quota conf for "
+ "snap:%s volume:%s to peer_data "
+ "dict for handshake",
+ snap->snapname, volinfo->volname);
+ goto out;
+ }
+ }
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
+ host_bricks = _gf_true;
+ break;
+ }
+ }
+ }
+
+ snprintf(buf, sizeof(buf), "%s.host_bricks", prefix);
+ ret = dict_set_int8(peer_data, buf, (int8_t)host_bricks);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set host_bricks for snap %s", snap->snapname);
+ goto out;
+ }
+
+ snprintf(buf, sizeof(buf), "%s.volcount", prefix);
+ ret = dict_set_int32(peer_data, buf, volcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set volcount for snap %s", snap->snapname);
+ goto out;
+ }
+
+ snprintf(buf, sizeof(buf), "%s.snapname", prefix);
+ ret = dict_set_dynstr_with_alloc(peer_data, buf, snap->snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set snapname for snap %s", snap->snapname);
+ goto out;
+ }
+
+ snprintf(buf, sizeof(buf), "%s.snap_id", prefix);
+ ret = dict_set_dynstr_with_alloc(peer_data, buf, uuid_utoa(snap->snap_id));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap_id for snap %s", snap->snapname);
+ goto out;
+ }
+
+ if (snap->description) {
+ snprintf(buf, sizeof(buf), "%s.description", prefix);
+ ret = dict_set_dynstr_with_alloc(peer_data, buf, snap->description);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set description for snap %s", snap->snapname);
+ goto out;
+ }
+ }
+
+ snprintf(buf, sizeof(buf), "%s.time_stamp", prefix);
+ ret = dict_set_int64(peer_data, buf, (int64_t)snap->time_stamp);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set time_stamp for snap %s", snap->snapname);
+ goto out;
+ }
+
+ snprintf(buf, sizeof(buf), "%s.snap_restored", prefix);
+ ret = dict_set_int8(peer_data, buf, snap->snap_restored);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap_restored for snap %s", snap->snapname);
+ goto out;
+ }
+
+ snprintf(buf, sizeof(buf), "%s.snap_status", prefix);
+ ret = dict_set_int32(peer_data, buf, snap->snap_status);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap_status for snap %s", snap->snapname);
+ goto out;
+ }
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_add_snapshots_to_export_dict(dict_t *peer_data)
+{
+ int32_t snap_count = 0;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(peer_data);
+
+ cds_list_for_each_entry(snap, &priv->snapshots, snap_list)
+ {
+ snap_count++;
+ ret = glusterd_add_snap_to_dict(snap, peer_data, snap_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to add snap(%s) to the "
+ " peer_data dict for handshake",
+ snap->snapname);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32(peer_data, "snap_count", snap_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap_count");
+ goto out;
+ }
+
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Imports the snapshot details of a brick if required and available
+ *
+ * Snapshot details will be imported only if the cluster op-version is >= 4
+ */
+int
+gd_import_new_brick_snap_details(dict_t *dict, char *prefix,
+ glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char key[512] = {
+ 0,
+ };
+ char *snap_device = NULL;
+ char *fs_type = NULL;
+ char *mnt_opts = NULL;
+ char *mount_dir = NULL;
+
+ this = THIS;
+ GF_ASSERT(this != NULL);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO(this->name, (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (brickinfo != NULL), out);
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%s.snap_status", prefix);
+ ret = dict_get_int32(dict, key, &brickinfo->snap_status);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "%s missing in payload", key);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%s.device_path", prefix);
+ ret = dict_get_str(dict, key, &snap_device);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "%s missing in payload", key);
+ goto out;
+ }
+ gf_strncpy(brickinfo->device_path, snap_device,
+ sizeof(brickinfo->device_path));
+ snprintf(key, sizeof(key), "%s.fs_type", prefix);
+ ret = dict_get_str(dict, key, &fs_type);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "%s missing in payload", key);
+ goto out;
+ }
+ gf_strncpy(brickinfo->fstype, fs_type, sizeof(brickinfo->fstype));
+
+ snprintf(key, sizeof(key), "%s.mnt_opts", prefix);
+ ret = dict_get_str(dict, key, &mnt_opts);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "%s missing in payload", key);
+ goto out;
+ }
+ gf_strncpy(brickinfo->mnt_opts, mnt_opts, sizeof(brickinfo->mnt_opts));
+
+ snprintf(key, sizeof(key), "%s.mount_dir", prefix);
+ ret = dict_get_str(dict, key, &mount_dir);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "%s missing in payload", key);
+ goto out;
+ }
+ gf_strncpy(brickinfo->mount_dir, mount_dir, sizeof(brickinfo->mount_dir));
+
+out:
+ return ret;
+}
+
+/*
+ * Imports the snapshot details of a volume if required and available
+ *
+ * Snapshot details will be imported only if cluster.op_version is greater than
+ * or equal to GD_OP_VERSION_3_6_0, the op-version from which volume snapshot is
+ * supported.
+ */
+int
+gd_import_volume_snap_details(dict_t *dict, glusterd_volinfo_t *volinfo,
+ char *prefix, char *volname)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char key[256] = {
+ 0,
+ };
+ char *restored_snap = NULL;
+
+ this = THIS;
+ GF_ASSERT(this != NULL);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO(this->name, (dict != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (volinfo != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (prefix != NULL), out);
+ GF_VALIDATE_OR_GOTO(this->name, (volname != NULL), out);
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%s.is_snap_volume", prefix);
+ uint32_t is_snap_int;
+ ret = dict_get_uint32(dict, key, &is_snap_int);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "%s missing in payload "
+ "for %s",
+ key, volname);
+ goto out;
+ }
+ volinfo->is_snap_volume = (is_snap_int != 0);
+
+ snprintf(key, sizeof(key), "%s.restored_from_snap", prefix);
+ ret = dict_get_str(dict, key, &restored_snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "%s missing in payload "
+ "for %s",
+ key, volname);
+ goto out;
+ }
+
+ gf_uuid_parse(restored_snap, volinfo->restored_from_snap);
+
+ snprintf(key, sizeof(key), "%s.snap-max-hard-limit", prefix);
+ ret = dict_get_uint64(dict, key, &volinfo->snap_max_hard_limit);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "%s missing in payload "
+ "for %s",
+ key, volname);
+out:
+ return ret;
+}
+
+int32_t
+glusterd_perform_missed_op(glusterd_snap_t *snap, int32_t op)
+{
+ dict_t *dict = NULL;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp = NULL;
+ xlator_t *this = NULL;
+ uuid_t null_uuid = {0};
+ char *parent_volname = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(snap);
+
+ dict = dict_new();
+ if (!dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Unable to create dict");
+ ret = -1;
+ goto out;
+ }
+
+ switch (op) {
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = glusterd_snap_remove(dict, snap, _gf_true, _gf_false,
+ _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to remove snap");
+ goto out;
+ }
+
+ break;
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ cds_list_for_each_entry_safe(snap_volinfo, tmp, &snap->volumes,
+ vol_list)
+ {
+ parent_volname = gf_strdup(snap_volinfo->parent_volname);
+ if (!parent_volname)
+ goto out;
+
+ ret = glusterd_volinfo_find(parent_volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Could not get volinfo of %s", parent_volname);
+ goto out;
+ }
+
+ volinfo->version--;
+ gf_uuid_copy(volinfo->restored_from_snap, null_uuid);
+
+ /* gd_restore_snap_volume() uses the dict and volcount
+ * to fetch snap brick info from other nodes, which were
+ * collected during prevalidation. As this is an ad-hoc
+ * op and only local node's data matter, hence sending
+ * volcount as 0 and re-using the same dict because we
+ * need not record any missed creates in the rsp_dict.
+ */
+ ret = gd_restore_snap_volume(dict, dict, volinfo, snap_volinfo,
+ 0);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_RESTORE_FAIL,
+ "Failed to restore snap for %s", snap->snapname);
+ volinfo->version++;
+ goto out;
+ }
+
+ /* Restore is successful therefore delete the original
+ * volume's volinfo. If the volinfo is already restored
+ * then we should delete the backend LVMs */
+ if (!gf_uuid_is_null(volinfo->restored_from_snap)) {
+ ret = glusterd_lvm_snapshot_remove(dict, volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to remove LVM backend");
+ goto out;
+ }
+ }
+
+ /* Detach the volinfo from priv->volumes, so that no new
+ * command can ref it any more and then unref it.
+ */
+ cds_list_del_init(&volinfo->vol_list);
+ glusterd_volinfo_unref(volinfo);
+
+ ret = glusterd_snapshot_restore_cleanup(dict, parent_volname,
+ snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_CLEANUP_FAIL,
+ "Failed to perform snapshot restore "
+ "cleanup for %s volume",
+ parent_volname);
+ goto out;
+ }
+
+ GF_FREE(parent_volname);
+ parent_volname = NULL;
+ }
+
+ break;
+ default:
+ /* The entry must be a create, delete, or
+ * restore entry
+ */
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid missed snap entry");
+ ret = -1;
+ goto out;
+ }
+
+out:
+ dict_unref(dict);
+ if (parent_volname) {
+ GF_FREE(parent_volname);
+ parent_volname = NULL;
+ }
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Perform missed deletes and restores on this node */
+int32_t
+glusterd_perform_missed_snap_ops()
+{
+ int32_t ret = -1;
+ int32_t op_status = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_missed_snap_info *missed_snapinfo = NULL;
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+ uuid_t snap_uuid = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ cds_list_for_each_entry(missed_snapinfo, &priv->missed_snaps_list,
+ missed_snaps)
+ {
+ /* If the pending snap_op is not for this node then continue */
+ if (strcmp(missed_snapinfo->node_uuid, uuid_utoa(MY_UUID)))
+ continue;
+
+ /* Find the snap id */
+ gf_uuid_parse(missed_snapinfo->snap_uuid, snap_uuid);
+ snap = NULL;
+ snap = glusterd_find_snap_by_id(snap_uuid);
+ if (!snap) {
+ /* If the snap is not found, then a delete or a
+ * restore can't be pending on that snap_uuid.
+ */
+ gf_msg_debug(this->name, 0, "Not a pending delete or restore op");
+ continue;
+ }
+
+ op_status = GD_MISSED_SNAP_PENDING;
+ cds_list_for_each_entry(snap_opinfo, &missed_snapinfo->snap_ops,
+ snap_ops_list)
+ {
+ /* If the snap_op is create or its status is
+ * GD_MISSED_SNAP_DONE then continue
+ */
+ if ((snap_opinfo->status == GD_MISSED_SNAP_DONE) ||
+ (snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE))
+ continue;
+
+ /* Perform the actual op for the first time for
+ * this snap, and mark the snap_status as
+ * GD_MISSED_SNAP_DONE. For other entries for the same
+ * snap, just mark the entry as done.
+ */
+ if (op_status == GD_MISSED_SNAP_PENDING) {
+ ret = glusterd_perform_missed_op(snap, snap_opinfo->op);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAPSHOT_OP_FAILED,
+ "Failed to perform missed snap op");
+ goto out;
+ }
+ op_status = GD_MISSED_SNAP_DONE;
+ }
+
+ snap_opinfo->status = GD_MISSED_SNAP_DONE;
+ }
+ }
+
+ ret = 0;
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Import friend volumes missed_snap_list and update *
+ * missed_snap_list if need be */
+int32_t
+glusterd_import_friend_missed_snap_list(dict_t *peer_data)
+{
+ int32_t missed_snap_count = -1;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(peer_data);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ /* Add the friends missed_snaps entries to the in-memory list */
+ ret = dict_get_int32(peer_data, "missed_snap_count", &missed_snap_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_MISSED_SNAP_GET_FAIL,
+ "No missed snaps");
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_add_missed_snaps_to_list(peer_data, missed_snap_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
+ "Failed to add missed snaps to list");
+ goto out;
+ }
+
+ ret = glusterd_perform_missed_snap_ops();
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_OP_FAILED,
+ "Failed to perform snap operations");
+ /* Not going to out at this point coz some *
+ * missed ops might have been performed. We *
+ * need to persist the current list *
+ */
+ }
+
+ ret = glusterd_store_update_missed_snaps();
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
+ "Failed to update missed_snaps_list");
+ goto out;
+ }
+
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/*
+ * This function will set boolean "conflict" to true if peer snap
+ * has a version greater than snap version of local node. Otherwise
+ * boolean "conflict" will be set to false.
+ */
+int
+glusterd_check_peer_has_higher_snap_version(dict_t *peer_data,
+ char *peer_snap_name, int volcount,
+ gf_boolean_t *conflict,
+ char *prefix, glusterd_snap_t *snap,
+ char *hostname)
+{
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ char key[256] = {0};
+ int version = 0, i = 0;
+ int ret = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(snap);
+ GF_ASSERT(peer_data);
+
+ for (i = 1; i <= volcount; i++) {
+ snprintf(key, sizeof(key), "%s%d.version", prefix, i);
+ ret = dict_get_int32(peer_data, key, &version);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to get "
+ "version of snap volume = %s",
+ peer_snap_name);
+ return -1;
+ }
+
+ /* TODO : As of now there is only one volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ snap_volinfo = cds_list_entry(snap->volumes.next, glusterd_volinfo_t,
+ vol_list);
+ if (!snap_volinfo) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get snap "
+ "volinfo %s",
+ snap->snapname);
+ return -1;
+ }
+
+ if (version > snap_volinfo->version) {
+ /* Mismatch detected */
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_VOL_VERS_MISMATCH,
+ "Version of volume %s differ. "
+ "local version = %d, remote version = %d "
+ "on peer %s",
+ snap_volinfo->volname, snap_volinfo->version, version,
+ hostname);
+ *conflict = _gf_true;
+ break;
+ } else {
+ *conflict = _gf_false;
+ }
+ }
+ return 0;
+}
+
+/* Check for the peer_snap_name in the list of existing snapshots.
+ * If a snap exists with the same name and a different snap_id, then
+ * there is a conflict. Set conflict as _gf_true, and snap to the
+ * conflicting snap object. If a snap exists with the same name, and the
+ * same snap_id, then there is no conflict. Set conflict as _gf_false
+ * and snap to the existing snap object. If no snap exists with the
+ * peer_snap_name, then there is no conflict. Set conflict as _gf_false
+ * and snap to NULL.
+ */
+void
+glusterd_is_peer_snap_conflicting(char *peer_snap_name, char *peer_snap_id,
+ gf_boolean_t *conflict,
+ glusterd_snap_t **snap, char *hostname)
+{
+ uuid_t peer_snap_uuid = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(peer_snap_name);
+ GF_ASSERT(peer_snap_id);
+ GF_ASSERT(conflict);
+ GF_ASSERT(snap);
+ GF_ASSERT(hostname);
+
+ *snap = glusterd_find_snap_by_name(peer_snap_name);
+ if (*snap) {
+ gf_uuid_parse(peer_snap_id, peer_snap_uuid);
+ if (!gf_uuid_compare(peer_snap_uuid, (*snap)->snap_id)) {
+ /* Current node contains the same snap having
+ * the same snapname and snap_id
+ */
+ gf_msg_debug(this->name, 0,
+ "Snapshot %s from peer %s present in "
+ "localhost",
+ peer_snap_name, hostname);
+ *conflict = _gf_false;
+ } else {
+ /* Current node contains the same snap having
+ * the same snapname but different snap_id
+ */
+ gf_msg_debug(this->name, 0,
+ "Snapshot %s from peer %s conflicts with "
+ "snapshot in localhost",
+ peer_snap_name, hostname);
+ *conflict = _gf_true;
+ }
+ } else {
+ /* Peer contains snapshots missing on the current node */
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_MISSED_SNAP_PRESENT,
+ "Snapshot %s from peer %s missing on localhost", peer_snap_name,
+ hostname);
+ *conflict = _gf_false;
+ }
+}
+
+/* Check if the local node is hosting any bricks for the given snapshot */
+gf_boolean_t
+glusterd_are_snap_bricks_local(glusterd_snap_t *snap)
+{
+ gf_boolean_t is_local = _gf_false;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(snap);
+
+ cds_list_for_each_entry(volinfo, &snap->volumes, vol_list)
+ {
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
+ is_local = _gf_true;
+ goto out;
+ }
+ }
+ }
+
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", is_local);
+ return is_local;
+}
+
+/* Check if the peer has missed any snap delete
+ * or restore for the given snap_id
+ */
+gf_boolean_t
+glusterd_peer_has_missed_snap_delete(uuid_t peerid, char *peer_snap_id)
+{
+ char *peer_uuid = NULL;
+ gf_boolean_t missed_delete = _gf_false;
+ glusterd_conf_t *priv = NULL;
+ glusterd_missed_snap_info *missed_snapinfo = NULL;
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(peer_snap_id);
+
+ peer_uuid = uuid_utoa(peerid);
+
+ cds_list_for_each_entry(missed_snapinfo, &priv->missed_snaps_list,
+ missed_snaps)
+ {
+ /* Look for missed snap for the same peer, and
+ * the same snap_id
+ */
+ if ((!strcmp(peer_uuid, missed_snapinfo->node_uuid)) &&
+ (!strcmp(peer_snap_id, missed_snapinfo->snap_uuid))) {
+ /* Check if the missed snap's op is delete and the
+ * status is pending
+ */
+ cds_list_for_each_entry(snap_opinfo, &missed_snapinfo->snap_ops,
+ snap_ops_list)
+ {
+ if (((snap_opinfo->op == GF_SNAP_OPTION_TYPE_DELETE) ||
+ (snap_opinfo->op == GF_SNAP_OPTION_TYPE_RESTORE)) &&
+ (snap_opinfo->status == GD_MISSED_SNAP_PENDING)) {
+ missed_delete = _gf_true;
+ goto out;
+ }
+ }
+ }
+ }
+
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", missed_delete);
+ return missed_delete;
+}
+
+/* Generate and store snap volfiles for imported snap object */
+int32_t
+glusterd_gen_snap_volfiles(glusterd_volinfo_t *snap_vol, char *peer_snap_name)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *parent_volinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(snap_vol);
+ GF_ASSERT(peer_snap_name);
+
+ ret = glusterd_store_volinfo(snap_vol, GLUSTERD_VOLINFO_VER_AC_NONE);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL,
+ "Failed to store snapshot "
+ "volinfo (%s) for snap %s",
+ snap_vol->volname, peer_snap_name);
+ goto out;
+ }
+
+ ret = generate_brick_volfiles(snap_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "generating the brick volfiles for the "
+ "snap %s failed",
+ peer_snap_name);
+ goto out;
+ }
+
+ ret = generate_client_volfiles(snap_vol, GF_CLIENT_TRUSTED);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "generating the trusted client volfiles for "
+ "the snap %s failed",
+ peer_snap_name);
+ goto out;
+ }
+
+ ret = generate_client_volfiles(snap_vol, GF_CLIENT_OTHER);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "generating the client volfiles for the "
+ "snap %s failed",
+ peer_snap_name);
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(snap_vol->parent_volname, &parent_volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Parent volinfo "
+ "not found for %s volume of snap %s",
+ snap_vol->volname, peer_snap_name);
+ goto out;
+ }
+
+ glusterd_list_add_snapvol(parent_volinfo, snap_vol);
+
+ ret = glusterd_store_volinfo(snap_vol, GLUSTERD_VOLINFO_VER_AC_NONE);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL,
+ "Failed to store snap volinfo");
+ goto out;
+ }
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Import snapshot info from peer_data and add it to priv */
+int32_t
+glusterd_import_friend_snap(dict_t *peer_data, int32_t snap_count,
+ char *peer_snap_name, char *peer_snap_id)
+{
+ char buf[64] = "";
+ char prefix[32] = "";
+ char *description = NULL;
+ dict_t *dict = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_conf_t *priv = NULL;
+ int32_t ret = -1;
+ int32_t volcount = -1;
+ int32_t i = -1;
+ xlator_t *this = NULL;
+ int64_t time_stamp;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(peer_data);
+ GF_ASSERT(peer_snap_name);
+ GF_ASSERT(peer_snap_id);
+
+ snprintf(prefix, sizeof(prefix), "snap%d", snap_count);
+
+ snap = glusterd_new_snap_object();
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "Could not create "
+ "the snap object for snap %s",
+ peer_snap_name);
+ goto out;
+ }
+
+ dict = dict_new();
+ if (!dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create dict");
+ ret = -1;
+ goto out;
+ }
+
+ gf_strncpy(snap->snapname, peer_snap_name, sizeof(snap->snapname));
+ gf_uuid_parse(peer_snap_id, snap->snap_id);
+
+ snprintf(buf, sizeof(buf), "%s.description", prefix);
+ ret = dict_get_str(peer_data, buf, &description);
+ if (ret == 0 && description) {
+ snap->description = gf_strdup(description);
+ if (snap->description == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "Saving the Snapshot Description Failed");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ snprintf(buf, sizeof(buf), "%s.time_stamp", prefix);
+ ret = dict_get_int64(peer_data, buf, &time_stamp);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get time_stamp for snap %s", peer_snap_name);
+ goto out;
+ }
+ snap->time_stamp = (time_t)time_stamp;
+
+ snprintf(buf, sizeof(buf), "%s.snap_restored", prefix);
+ ret = dict_get_int8(peer_data, buf, (int8_t *)&snap->snap_restored);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get snap_restored for snap %s", peer_snap_name);
+ goto out;
+ }
+
+ snprintf(buf, sizeof(buf), "%s.snap_status", prefix);
+ ret = dict_get_int32(peer_data, buf, (int32_t *)&snap->snap_status);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get snap_status for snap %s", peer_snap_name);
+ goto out;
+ }
+
+ /* If the snap is scheduled to be decommissioned, then
+ * don't accept the snap */
+ if (snap->snap_status == GD_SNAP_STATUS_DECOMMISSION) {
+ gf_msg_debug(this->name, 0,
+ "The snap(%s) is scheduled to be decommissioned "
+ "Not accepting the snap.",
+ peer_snap_name);
+ glusterd_snap_remove(dict, snap, _gf_true, _gf_true, _gf_false);
+ ret = 0;
+ goto out;
+ }
+
+ snprintf(buf, sizeof(buf), "%s.volcount", prefix);
+ ret = dict_get_int32(peer_data, buf, &volcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volcount for snap %s", peer_snap_name);
+ goto out;
+ }
+
+ ret = glusterd_store_create_snap_dir(snap);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SNAPDIR_CREATE_FAIL,
+ "Failed to create snap dir");
+ goto out;
+ }
+
+ glusterd_list_add_order(&snap->snap_list, &priv->snapshots,
+ glusterd_compare_snap_time);
+
+ for (i = 1; i <= volcount; i++) {
+ ret = glusterd_import_volinfo(peer_data, i, &snap_vol, prefix);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL,
+ "Failed to import snap volinfo for "
+ "snap %s",
+ peer_snap_name);
+ goto out;
+ }
+
+ snap_vol->snapshot = snap;
+
+ ret = glusterd_gen_snap_volfiles(snap_vol, peer_snap_name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to generate snap vol files "
+ "for snap %s",
+ peer_snap_name);
+ goto out;
+ }
+ /* During handshake, after getting updates from friend mount
+ * point for activated snapshot should exist and should not
+ * for deactivated snapshot.
+ */
+ if (glusterd_is_volume_started(snap_vol)) {
+ ret = glusterd_recreate_vol_brick_mounts(this, snap_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRK_MNT_RECREATE_FAIL,
+ "Failed to recreate brick mounts"
+ " for %s",
+ snap->snapname);
+ goto out;
+ }
+
+ (void)glusterd_start_bricks(snap_vol);
+ ret = glusterd_store_volinfo(snap_vol,
+ GLUSTERD_VOLINFO_VER_AC_NONE);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_STORE_FAIL,
+ "Failed to "
+ "write volinfo for volume %s",
+ snap_vol->volname);
+ goto out;
+ }
+ } else {
+ (void)glusterd_stop_bricks(snap_vol);
+ ret = glusterd_snap_unmount(this, snap_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_UMOUNT_FAIL,
+ "Failed to unmounts for %s", snap->snapname);
+ }
+ }
+
+ ret = glusterd_import_quota_conf(peer_data, i, snap_vol, prefix);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_QUOTA_CONFIG_IMPORT_FAIL,
+ "Failed to import quota conf "
+ "for snap %s",
+ peer_snap_name);
+ goto out;
+ }
+
+ snap_vol = NULL;
+ }
+
+ ret = glusterd_store_snap(snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "Could not store snap"
+ "object %s",
+ peer_snap_name);
+ goto out;
+ }
+ glusterd_fetchsnap_notify(this);
+
+out:
+ if (ret)
+ glusterd_snap_remove(dict, snap, _gf_true, _gf_true, _gf_false);
+
+ if (dict)
+ dict_unref(dict);
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* During a peer-handshake, after the volumes have synced, and the list of
+ * missed snapshots have synced, the node will perform the pending deletes
+ * and restores on this list. At this point, the current snapshot list in
+ * the node will be updated, and hence in case of conflicts arising during
+ * snapshot handshake, the peer hosting the bricks will be given precedence
+ * Likewise, if there will be a conflict, and both peers will be in the same
+ * state, i.e either both would be hosting bricks or both would not be hosting
+ * bricks, then a decision can't be taken and a peer-reject will happen.
+ *
+ * glusterd_compare_snap() & glusterd_update_snaps () implement the following
+ * algorithm to perform the above task. Please note the former function tries to
+ * iterate over the snaps one at a time and updating the relevant fields in the
+ * dictionary and then glusterd_update_snaps () go over all the snaps and update
+ * them at one go as part of a synctask.
+ * Step 1: Start.
+ * Step 2: Check if the peer is missing a delete or restore on the said snap.
+ * If yes, goto step 6.
+ * Step 3: Check if there is a conflict between the peer's data and the
+ * local snap. If no, goto step 5.
+ * Step 4: As there is a conflict, check if both the peer and the local nodes
+ * are hosting bricks. Based on the results perform the following:
+ * Peer Hosts Bricks Local Node Hosts Bricks Action
+ * Yes Yes Goto Step 8
+ * No No Goto Step 8
+ * Yes No Goto Step 9
+ * No Yes Goto Step 7
+ * Step 5: Check if the local node is missing the peer's data.
+ * If yes, goto step 10.
+ * Step 6: Check if the snap volume version is lesser than peer_data
+ * if yes goto step 9
+ * Step 7: It's a no-op. Goto step 11
+ * Step 8: Peer Reject. Goto step 11
+ * Step 9: Delete local node's data.
+ * Step 10: Accept Peer Data.
+ * Step 11: Stop
+ *
+ */
+int32_t
+glusterd_compare_snap(dict_t *peer_data, int32_t snap_count, char *peername,
+ uuid_t peerid)
+{
+ char buf[64] = "";
+ char prefix[32] = "";
+ char *peer_snap_name = NULL;
+ char *peer_snap_id = NULL;
+ glusterd_snap_t *snap = NULL;
+ gf_boolean_t conflict = _gf_false;
+ gf_boolean_t is_local = _gf_false;
+ gf_boolean_t is_hosted = _gf_false;
+ gf_boolean_t missed_delete = _gf_false;
+ int32_t ret = -1;
+ int32_t volcount = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(peer_data);
+ GF_ASSERT(peername);
+
+ snprintf(prefix, sizeof(prefix), "snap%d", snap_count);
+
+ ret = dict_set_uint32(peer_data, buf, 0);
+ snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix);
+ ret = dict_set_uint32(peer_data, buf, 0);
+ snprintf(buf, sizeof(buf), "%s.remove_lvm", prefix);
+ ret = dict_set_uint32(peer_data, buf, 0);
+ snprintf(buf, sizeof(buf), "%s.remove_my_data", prefix);
+ ret = dict_set_uint32(peer_data, buf, 0);
+
+ /* Fetch the peer's snapname */
+ snprintf(buf, sizeof(buf), "%s.snapname", prefix);
+ ret = dict_get_str(peer_data, buf, &peer_snap_name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snapname from peer: %s", peername);
+ goto out;
+ }
+
+ /* Fetch the peer's snap_id */
+ snprintf(buf, sizeof(buf), "%s.snap_id", prefix);
+ ret = dict_get_str(peer_data, buf, &peer_snap_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snap_id from peer: %s", peername);
+ goto out;
+ }
+
+ snprintf(buf, sizeof(buf), "%s.volcount", prefix);
+ ret = dict_get_int32(peer_data, buf, &volcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volcount for snap %s", peer_snap_name);
+ goto out;
+ }
+
+ /* Check if the peer has missed a snap delete or restore
+ * resulting in stale data for the snap in question
+ */
+ missed_delete = glusterd_peer_has_missed_snap_delete(peerid, peer_snap_id);
+ if (missed_delete == _gf_true) {
+ /* Peer has missed delete on the missing/conflicting snap_id */
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_MISSED_SNAP_DELETE,
+ "Peer %s has missed a delete "
+ "on snap %s",
+ peername, peer_snap_name);
+ ret = 0;
+ goto out;
+ }
+
+ /* Check if there is a conflict, and if the
+ * peer data is already present
+ */
+ glusterd_is_peer_snap_conflicting(peer_snap_name, peer_snap_id, &conflict,
+ &snap, peername);
+ if (conflict == _gf_false) {
+ if (!snap) {
+ /* Peer has snap with the same snapname
+ * and snap_id, which local node doesn't have.
+ */
+ snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix);
+ ret = dict_set_uint32(peer_data, buf, 1);
+ goto out;
+ }
+ /* Peer has snap with the same snapname
+ * and snap_id. Now check if peer has a
+ * snap with higher snap version than local
+ * node has.
+ */
+ ret = glusterd_check_peer_has_higher_snap_version(
+ peer_data, peer_snap_name, volcount, &conflict, prefix, snap,
+ peername);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_VERS_MISMATCH,
+ "Failed "
+ "to check version of snap volume");
+ goto out;
+ }
+ if (conflict == _gf_true) {
+ /*
+ * Snap version of peer is higher than snap
+ * version of local node.
+ *
+ * Remove data in local node and accept peer data.
+ * We just need to heal snap info of local node, So
+ * When removing data from local node, make sure
+ * we are not removing backend lvm of the snap.
+ */
+ snprintf(buf, sizeof(buf), "%s.remove_lvm", prefix);
+ ret = dict_set_uint32(peer_data, buf, 0);
+ snprintf(buf, sizeof(buf), "%s.remove_my_data", prefix);
+ ret = dict_set_uint32(peer_data, buf, 1);
+ snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix);
+ ret = dict_set_uint32(peer_data, buf, 1);
+
+ } else {
+ ret = 0;
+ }
+ goto out;
+ }
+
+ /* There is a conflict. Check if the current node is
+ * hosting bricks for the conflicted snap.
+ */
+ is_local = glusterd_are_snap_bricks_local(snap);
+
+ /* Check if the peer is hosting any bricks for the
+ * conflicting snap
+ */
+ snprintf(buf, sizeof(buf), "%s.host_bricks", prefix);
+ ret = dict_get_int8(peer_data, buf, (int8_t *)&is_hosted);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch host_bricks from peer: %s "
+ "for %s",
+ peername, peer_snap_name);
+ goto out;
+ }
+
+ /* As there is a conflict at this point of time, the data of the
+ * node that hosts a brick takes precedence. If both the local
+ * node and the peer are in the same state, i.e if both of them
+ * are either hosting or not hosting the bricks, for the snap,
+ * then it's a peer reject
+ */
+ if (is_hosted == is_local) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CONFLICT,
+ "Conflict in snapshot %s with peer %s", peer_snap_name,
+ peername);
+ ret = -1;
+ goto out;
+ }
+
+ if (is_hosted == _gf_false) {
+ /* If there was a conflict, and the peer is not hosting
+ * any brick, then don't accept peer data
+ */
+ gf_msg_debug(this->name, 0,
+ "Peer doesn't hosts bricks for conflicting "
+ "snap(%s). Not accepting peer data.",
+ peer_snap_name);
+ ret = 0;
+ goto out;
+ }
+
+ /* The peer is hosting a brick in case of conflict
+ * And local node isn't. Hence remove local node's
+ * data and accept peer data
+ */
+ gf_msg_debug(this->name, 0,
+ "Peer hosts bricks for conflicting "
+ "snap(%s). Removing local data. Accepting peer data.",
+ peer_snap_name);
+ snprintf(buf, sizeof(buf), "%s.remove_lvm", prefix);
+ ret = dict_set_uint32(peer_data, buf, 1);
+ snprintf(buf, sizeof(buf), "%s.remove_my_data", prefix);
+ ret = dict_set_uint32(peer_data, buf, 1);
+ snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix);
+ ret = dict_set_uint32(peer_data, buf, 1);
+
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_update_snaps_synctask(void *opaque)
+{
+ int32_t ret = -1;
+ int32_t snap_count = 0;
+ int i = 1;
+ xlator_t *this = NULL;
+ dict_t *peer_data = NULL;
+ char buf[64] = "";
+ char prefix[32] = "";
+ char *peer_snap_name = NULL;
+ char *peer_snap_id = NULL;
+ char *peername = NULL;
+ gf_boolean_t remove_lvm = _gf_false;
+ gf_boolean_t remove_my_data = _gf_false;
+ gf_boolean_t accept_peer_data = _gf_false;
+ int32_t val = 0;
+ glusterd_snap_t *snap = NULL;
+ dict_t *dict = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ peer_data = (dict_t *)opaque;
+ GF_ASSERT(peer_data);
+
+ synclock_lock(&conf->big_lock);
+
+ while (conf->restart_bricks) {
+ synccond_wait(&conf->cond_restart_bricks, &conf->big_lock);
+ }
+ conf->restart_bricks = _gf_true;
+
+ ret = dict_get_int32(peer_data, "snap_count", &snap_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to fetch snap_count");
+ goto out;
+ }
+ ret = dict_get_str(peer_data, "peername", &peername);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to fetch peername");
+ goto out;
+ }
+
+ for (i = 1; i <= snap_count; i++) {
+ snprintf(prefix, sizeof(prefix), "snap%d", i);
+
+ /* Fetch the peer's snapname */
+ snprintf(buf, sizeof(buf), "%s.snapname", prefix);
+ ret = dict_get_str(peer_data, buf, &peer_snap_name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snapname from peer: %s", peername);
+ goto out;
+ }
+
+ /* Fetch the peer's snap_id */
+ snprintf(buf, sizeof(buf), "%s.snap_id", prefix);
+ ret = dict_get_str(peer_data, buf, &peer_snap_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snap_id from peer: %s", peername);
+ goto out;
+ }
+
+ /* remove_my_data */
+ snprintf(buf, sizeof(buf), "%s.remove_my_data", prefix);
+ ret = dict_get_int32(peer_data, buf, &val);
+ if (val)
+ remove_my_data = _gf_true;
+ else
+ remove_my_data = _gf_false;
+
+ if (remove_my_data) {
+ snprintf(buf, sizeof(buf), "%s.remove_lvm", prefix);
+ ret = dict_get_int32(peer_data, buf, &val);
+ if (val)
+ remove_lvm = _gf_true;
+ else
+ remove_lvm = _gf_false;
+
+ dict = dict_new();
+ if (!dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Unable to create dict");
+ ret = -1;
+ goto out;
+ }
+ snap = glusterd_find_snap_by_name(peer_snap_name);
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_PRESENT,
+ "Snapshot %s from peer %s missing on "
+ "localhost",
+ peer_snap_name, peername);
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_snap_remove(dict, snap, remove_lvm, _gf_false,
+ _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to remove snap %s", snap->snapname);
+ goto out;
+ }
+
+ dict_unref(dict);
+ dict = NULL;
+ }
+ snprintf(buf, sizeof(buf), "%s.accept_peer_data", prefix);
+ ret = dict_get_int32(peer_data, buf, &val);
+ if (val)
+ accept_peer_data = _gf_true;
+ else
+ accept_peer_data = _gf_false;
+
+ if (accept_peer_data) {
+ /* Accept Peer Data */
+ ret = glusterd_import_friend_snap(peer_data, i, peer_snap_name,
+ peer_snap_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_IMPORT_FAIL,
+ "Failed to import snap %s from peer %s", peer_snap_name,
+ peername);
+ goto out;
+ }
+ }
+ }
+
+out:
+ if (peer_data)
+ dict_unref(peer_data);
+ if (dict)
+ dict_unref(dict);
+ conf->restart_bricks = _gf_false;
+ synccond_broadcast(&conf->cond_restart_bricks);
+
+ return ret;
+}
+
+/* Compare snapshots present in peer_data, with the snapshots in
+ * the current node
+ */
+int32_t
+glusterd_compare_friend_snapshots(dict_t *peer_data, char *peername,
+ uuid_t peerid)
+{
+ int32_t ret = -1;
+ int32_t snap_count = 0;
+ int i = 1;
+ xlator_t *this = NULL;
+ dict_t *peer_data_copy = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(peer_data);
+ GF_ASSERT(peername);
+
+ ret = dict_get_int32(peer_data, "snap_count", &snap_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to fetch snap_count");
+ goto out;
+ }
+
+ if (!snap_count)
+ goto out;
+
+ for (i = 1; i <= snap_count; i++) {
+ /* Compare one snapshot from peer_data at a time */
+ ret = glusterd_compare_snap(peer_data, i, peername, peerid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_OP_FAILED,
+ "Failed to compare snapshots with peer %s", peername);
+ goto out;
+ }
+ }
+ /* Update the snaps at one go */
+ peer_data_copy = dict_copy_with_ref(peer_data, NULL);
+ ret = dict_set_str(peer_data_copy, "peername", peername);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set peername into the dict");
+ if (peer_data_copy)
+ dict_unref(peer_data_copy);
+ goto out;
+ }
+ glusterd_launch_synctask(glusterd_update_snaps_synctask, peer_data_copy);
+
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_add_snapd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
+ int32_t count)
+{
+ int ret = -1;
+ int32_t pid = -1;
+ int32_t brick_online = -1;
+ char key[64] = {0};
+ char base_key[32] = {0};
+ char pidfile[PATH_MAX] = {0};
+ xlator_t *this = NULL;
+
+ GF_ASSERT(volinfo);
+ GF_ASSERT(dict);
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ snprintf(base_key, sizeof(base_key), "brick%d", count);
+ snprintf(key, sizeof(key), "%s.hostname", base_key);
+ ret = dict_set_str(dict, key, "Snapshot Daemon");
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%s.path", base_key);
+ ret = dict_set_dynstr(dict, key, gf_strdup(uuid_utoa(MY_UUID)));
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%s.port", base_key);
+ ret = dict_set_int32(dict, key, volinfo->snapd.port);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
+ goto out;
+ }
+
+ glusterd_svc_build_snapd_pidfile(volinfo, pidfile, sizeof(pidfile));
+
+ brick_online = gf_is_service_running(pidfile, &pid);
+ if (brick_online == _gf_false)
+ pid = -1;
+
+ snprintf(key, sizeof(key), "%s.pid", base_key);
+ ret = dict_set_int32(dict, key, pid);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "Key=%s",
+ key, NULL);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%s.status", base_key);
+ ret = dict_set_int32(dict, key, brick_online);
+
+out:
+ if (ret)
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_snap_config_use_rsp_dict(dict_t *dst, dict_t *src)
+{
+ char buf[PATH_MAX] = "";
+ char *volname = NULL;
+ int ret = -1;
+ int config_command = 0;
+ uint64_t i = 0;
+ uint64_t hard_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+ uint64_t soft_limit = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT;
+ uint64_t value = 0;
+ uint64_t voldisplaycount = 0;
+
+ if (!dst || !src) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY,
+ "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = dict_get_int32(dst, "config-command", &config_command);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to get config-command type");
+ goto out;
+ }
+
+ switch (config_command) {
+ case GF_SNAP_CONFIG_DISPLAY:
+ ret = dict_get_uint64(src, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ &hard_limit);
+ if (!ret) {
+ ret = dict_set_uint64(
+ dst, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, hard_limit);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap_max_hard_limit");
+ goto out;
+ }
+ } else {
+ /* Received dummy response from other nodes */
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_uint64(src, GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT,
+ &soft_limit);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get snap_max_soft_limit");
+ goto out;
+ }
+
+ ret = dict_set_uint64(dst, GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT,
+ soft_limit);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap_max_soft_limit");
+ goto out;
+ }
+
+ ret = dict_get_uint64(src, "voldisplaycount", &voldisplaycount);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get voldisplaycount");
+ goto out;
+ }
+
+ ret = dict_set_uint64(dst, "voldisplaycount", voldisplaycount);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set voldisplaycount");
+ goto out;
+ }
+
+ for (i = 0; i < voldisplaycount; i++) {
+ snprintf(buf, sizeof(buf), "volume%" PRIu64 "-volname", i);
+ ret = dict_get_str(src, buf, &volname);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get %s", buf);
+ goto out;
+ }
+ ret = dict_set_str(dst, buf, volname);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set %s", buf);
+ goto out;
+ }
+
+ snprintf(buf, sizeof(buf),
+ "volume%" PRIu64 "-snap-max-hard-limit", i);
+ ret = dict_get_uint64(src, buf, &value);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get %s", buf);
+ goto out;
+ }
+ ret = dict_set_uint64(dst, buf, value);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set %s", buf);
+ goto out;
+ }
+
+ snprintf(buf, sizeof(buf),
+ "volume%" PRIu64 "-active-hard-limit", i);
+ ret = dict_get_uint64(src, buf, &value);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get %s", buf);
+ goto out;
+ }
+ ret = dict_set_uint64(dst, buf, value);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set %s", buf);
+ goto out;
+ }
+
+ snprintf(buf, sizeof(buf),
+ "volume%" PRIu64 "-snap-max-soft-limit", i);
+ ret = dict_get_uint64(src, buf, &value);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get %s", buf);
+ goto out;
+ }
+ ret = dict_set_uint64(dst, buf, value);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set %s", buf);
+ goto out;
+ }
+ }
+
+ break;
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_merge_brick_status(dict_t *dst, dict_t *src)
+{
+ int64_t volume_count = 0;
+ int64_t index = 0;
+ int64_t j = 0;
+ int64_t brick_count = 0;
+ int64_t brick_order = 0;
+ char key[64] = {
+ 0,
+ };
+ char key_prefix[16] = {
+ 0,
+ };
+ char snapbrckcnt[PATH_MAX] = {
+ 0,
+ };
+ char snapbrckord[PATH_MAX] = {
+ 0,
+ };
+ char *clonename = NULL;
+ int ret = -1;
+ int32_t brick_online = 0;
+ xlator_t *this = NULL;
+ int32_t snap_command = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!dst || !src) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY,
+ "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = dict_get_int32(dst, "type", &snap_command);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ if (snap_command == GF_SNAP_OPTION_TYPE_DELETE) {
+ gf_msg_debug(this->name, 0,
+ "snapshot delete command."
+ " Need not merge the status of the bricks");
+ ret = 0;
+ goto out;
+ }
+
+ /* Try and fetch clonename. If present set status with clonename *
+ * else do so as snap-vol */
+ ret = dict_get_str(dst, "clonename", &clonename);
+ if (ret) {
+ snprintf(key_prefix, sizeof(key_prefix), "snap-vol");
+ } else
+ snprintf(key_prefix, sizeof(key_prefix), "clone");
+
+ ret = dict_get_int64(src, "volcount", &volume_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to "
+ "get the volume count");
+ goto out;
+ }
+
+ for (index = 0; index < volume_count; index++) {
+ ret = snprintf(snapbrckcnt, sizeof(snapbrckcnt) - 1,
+ "snap-vol%" PRId64 "_brickcount", index + 1);
+ ret = dict_get_int64(src, snapbrckcnt, &brick_count);
+ if (ret) {
+ gf_msg_trace(this->name, 0,
+ "No bricks for this volume in this dict (%s)",
+ snapbrckcnt);
+ continue;
+ }
+
+ for (j = 0; j < brick_count; j++) {
+ /* Fetching data from source dict */
+ snprintf(snapbrckord, sizeof(snapbrckord) - 1,
+ "snap-vol%" PRId64 ".brick%" PRId64 ".order", index + 1,
+ j);
+
+ ret = dict_get_int64(src, snapbrckord, &brick_order);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get brick order (%s)", snapbrckord);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "%s%" PRId64 ".brick%" PRId64 ".status",
+ key_prefix, index + 1, brick_order);
+ ret = dict_get_int32(src, key, &brick_online);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to "
+ "get the brick status (%s)",
+ key);
+ goto out;
+ }
+
+ ret = dict_set_int32(dst, key, brick_online);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to "
+ "set the brick status (%s)",
+ key);
+ goto out;
+ }
+ brick_online = 0;
+ }
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* Aggregate missed_snap_counts from different nodes and save it *
+ * in the req_dict of the originator node */
+int
+glusterd_snap_create_use_rsp_dict(dict_t *dst, dict_t *src)
+{
+ char *buf = NULL;
+ char *tmp_str = NULL;
+ char name_buf[PATH_MAX] = "";
+ int32_t i = -1;
+ int32_t ret = -1;
+ int32_t src_missed_snap_count = -1;
+ int32_t dst_missed_snap_count = -1;
+ xlator_t *this = NULL;
+ int8_t soft_limit_flag = -1;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!dst || !src) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY,
+ "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = glusterd_merge_brick_status(dst, src);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_SET_INFO_FAIL,
+ "failed to merge brick "
+ "status");
+ goto out;
+ }
+
+ ret = dict_get_str(src, "snapuuid", &buf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to get snap UUID");
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(dst, "snapuuid", buf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap uuid in dict");
+ goto out;
+ }
+
+ /* set in dst dictionary soft-limit-reach only if soft-limit-reach
+ * is present src dictionary */
+ ret = dict_get_int8(src, "soft-limit-reach", &soft_limit_flag);
+ if (!ret) {
+ ret = dict_set_int8(dst, "soft-limit-reach", soft_limit_flag);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "soft_limit_flag");
+ goto out;
+ }
+ }
+
+ ret = dict_get_int32(src, "missed_snap_count", &src_missed_snap_count);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "No missed snaps");
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_int32(dst, "missed_snap_count", &dst_missed_snap_count);
+ if (ret) {
+ /* Initialize dst_missed_count for the first time */
+ dst_missed_snap_count = 0;
+ }
+
+ for (i = 0; i < src_missed_snap_count; i++) {
+ snprintf(name_buf, sizeof(name_buf), "missed_snaps_%d", i);
+ ret = dict_get_str(src, name_buf, &buf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch %s", name_buf);
+ goto out;
+ }
+
+ snprintf(name_buf, sizeof(name_buf), "missed_snaps_%d",
+ dst_missed_snap_count);
+
+ tmp_str = gf_strdup(buf);
+ if (!tmp_str) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr(dst, name_buf, tmp_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set %s", name_buf);
+ goto out;
+ }
+
+ tmp_str = NULL;
+ dst_missed_snap_count++;
+ }
+
+ ret = dict_set_int32(dst, "missed_snap_count", dst_missed_snap_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set dst_missed_snap_count");
+ goto out;
+ }
+
+out:
+ if (ret && tmp_str)
+ GF_FREE(tmp_str);
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_snap_use_rsp_dict(dict_t *dst, dict_t *src)
+{
+ int ret = -1;
+ int32_t snap_command = 0;
+
+ if (!dst || !src) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY,
+ "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = dict_get_int32(dst, "type", &snap_command);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ ret = glusterd_snap_create_use_rsp_dict(dst, src);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_RSP_DICT_USE_FAIL,
+ "Unable to use rsp dict");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ ret = glusterd_snap_config_use_rsp_dict(dst, src);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_RSP_DICT_USE_FAIL,
+ "Unable to use rsp dict");
+ goto out;
+ }
+ break;
+ default:
+ /* copy the response dictinary's contents to the dict to be
+ * sent back to the cli */
+ dict_copy(src, dst);
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_compare_snap_time(struct cds_list_head *list1,
+ struct cds_list_head *list2)
+{
+ glusterd_snap_t *snap1 = NULL;
+ glusterd_snap_t *snap2 = NULL;
+ double diff_time = 0;
+
+ GF_ASSERT(list1);
+ GF_ASSERT(list2);
+
+ snap1 = cds_list_entry(list1, glusterd_snap_t, snap_list);
+ snap2 = cds_list_entry(list2, glusterd_snap_t, snap_list);
+ diff_time = difftime(snap1->time_stamp, snap2->time_stamp);
+
+ return (int)diff_time;
+}
+
+int
+glusterd_compare_snap_vol_time(struct cds_list_head *list1,
+ struct cds_list_head *list2)
+{
+ glusterd_volinfo_t *snapvol1 = NULL;
+ glusterd_volinfo_t *snapvol2 = NULL;
+ double diff_time = 0;
+
+ GF_ASSERT(list1);
+ GF_ASSERT(list2);
+
+ snapvol1 = cds_list_entry(list1, glusterd_volinfo_t, snapvol_list);
+ snapvol2 = cds_list_entry(list2, glusterd_volinfo_t, snapvol_list);
+ diff_time = difftime(snapvol1->snapshot->time_stamp,
+ snapvol2->snapshot->time_stamp);
+
+ return (int)diff_time;
+}
+
+int32_t
+glusterd_missed_snapinfo_new(glusterd_missed_snap_info **missed_snapinfo)
+{
+ glusterd_missed_snap_info *new_missed_snapinfo = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(missed_snapinfo);
+
+ new_missed_snapinfo = GF_CALLOC(1, sizeof(*new_missed_snapinfo),
+ gf_gld_mt_missed_snapinfo_t);
+
+ if (!new_missed_snapinfo) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ CDS_INIT_LIST_HEAD(&new_missed_snapinfo->missed_snaps);
+ CDS_INIT_LIST_HEAD(&new_missed_snapinfo->snap_ops);
+
+ *missed_snapinfo = new_missed_snapinfo;
+
+ ret = 0;
+
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_missed_snap_op_new(glusterd_snap_op_t **snap_op)
+{
+ glusterd_snap_op_t *new_snap_op = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(snap_op);
+
+ new_snap_op = GF_CALLOC(1, sizeof(*new_snap_op),
+ gf_gld_mt_missed_snapinfo_t);
+
+ if (!new_snap_op) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ goto out;
+ }
+
+ new_snap_op->brick_num = -1;
+ new_snap_op->op = -1;
+ new_snap_op->status = -1;
+ CDS_INIT_LIST_HEAD(&new_snap_op->snap_ops_list);
+
+ *snap_op = new_snap_op;
+
+ ret = 0;
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+gf_boolean_t
+mntopts_exists(const char *str, const char *opts)
+{
+ char *dup_val = NULL;
+ char *savetok = NULL;
+ char *token = NULL;
+ gf_boolean_t exists = _gf_false;
+
+ GF_ASSERT(opts);
+
+ if (!str || !strlen(str))
+ goto out;
+
+ dup_val = gf_strdup(str);
+ if (!dup_val)
+ goto out;
+
+ token = strtok_r(dup_val, ",", &savetok);
+ while (token) {
+ if (!strcmp(token, opts)) {
+ exists = _gf_true;
+ goto out;
+ }
+ token = strtok_r(NULL, ",", &savetok);
+ }
+
+out:
+ GF_FREE(dup_val);
+ return exists;
+}
+
+int32_t
+glusterd_mount_lvm_snapshot(glusterd_brickinfo_t *brickinfo,
+ char *brick_mount_path)
+{
+ char msg[NAME_MAX] = "";
+ char mnt_opts[1024] = "";
+ int32_t ret = -1;
+ runner_t runner = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(brick_mount_path);
+ GF_ASSERT(brickinfo);
+
+ runinit(&runner);
+ len = snprintf(msg, sizeof(msg), "mount %s %s", brickinfo->device_path,
+ brick_mount_path);
+ if (len < 0) {
+ strcpy(msg, "<error>");
+ }
+
+ gf_strncpy(mnt_opts, brickinfo->mnt_opts, sizeof(mnt_opts));
+
+ /* XFS file-system does not allow to mount file-system with duplicate
+ * UUID. File-system UUID of snapshot and its origin volume is same.
+ * Therefore to mount such a snapshot in XFS we need to pass nouuid
+ * option
+ */
+ if (!strcmp(brickinfo->fstype, "xfs") &&
+ !mntopts_exists(mnt_opts, "nouuid")) {
+ if (strlen(mnt_opts) > 0)
+ strcat(mnt_opts, ",");
+ strcat(mnt_opts, "nouuid");
+ }
+
+ if (strlen(mnt_opts) > 0) {
+ runner_add_args(&runner, "mount", "-o", mnt_opts,
+ brickinfo->device_path, brick_mount_path, NULL);
+ } else {
+ runner_add_args(&runner, "mount", brickinfo->device_path,
+ brick_mount_path, NULL);
+ }
+
+ runner_log(&runner, this->name, GF_LOG_DEBUG, msg);
+ ret = runner_run(&runner);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_MOUNT_FAIL,
+ "mounting the snapshot "
+ "logical device %s failed (error: %s)",
+ brickinfo->device_path, strerror(errno));
+ goto out;
+ } else
+ gf_msg_debug(this->name, 0,
+ "mounting the snapshot "
+ "logical device %s successful",
+ brickinfo->device_path);
+
+out:
+ gf_msg_trace(this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+gf_boolean_t
+glusterd_volume_quorum_calculate(glusterd_volinfo_t *volinfo, dict_t *dict,
+ int down_count, gf_boolean_t first_brick_on,
+ int8_t snap_force, int quorum_count,
+ char *quorum_type, char **op_errstr,
+ uint32_t *op_errno)
+{
+ gf_boolean_t quorum_met = _gf_false;
+ const char err_str[] = "One or more bricks may be down.";
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ if (!volinfo || !dict) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_INVALID_ENTRY,
+ "input parameters NULL");
+ goto out;
+ }
+
+ /* In a n-way replication where n >= 3 we should not take a snapshot
+ * if even one brick is down, irrespective of the quorum being met.
+ * TODO: Remove this restriction once n-way replication is
+ * supported with snapshot.
+ */
+ if (down_count) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_DISCONNECTED, "%s",
+ err_str);
+ *op_errstr = gf_strdup(err_str);
+ *op_errno = EG_BRCKDWN;
+ } else {
+ quorum_met = _gf_true;
+ }
+
+ /* TODO : Support for n-way relication in snapshot*/
+out:
+ return quorum_met;
+}
+
+static int32_t
+glusterd_volume_quorum_check(glusterd_volinfo_t *volinfo, int64_t index,
+ dict_t *dict, const char *key_prefix,
+ int8_t snap_force, int quorum_count,
+ char *quorum_type, char **op_errstr,
+ uint32_t *op_errno)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ int64_t i = 0;
+ int64_t j = 0;
+ char key[128] = {
+ 0,
+ }; /* key_prefix is passed from above, but is really quite small */
+ int keylen;
+ int down_count = 0;
+ gf_boolean_t first_brick_on = _gf_true;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t quorum_met = _gf_false;
+ int distribute_subvols = 0;
+ int32_t brick_online = 0;
+ const char err_str[] = "quorum is not met";
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ if (!volinfo || !dict) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_INVALID_ENTRY,
+ "input parameters NULL");
+ goto out;
+ }
+
+ if ((!glusterd_is_volume_replicate(volinfo) ||
+ volinfo->replica_count < 3) &&
+ (GF_CLUSTER_TYPE_DISPERSE != volinfo->type)) {
+ for (i = 0; i < volinfo->brick_count; i++) {
+ /* for a pure distribute volume, and replica volume
+ with replica count 2, quorum is not met if even
+ one of its subvolumes is down
+ */
+ keylen = snprintf(key, sizeof(key),
+ "%s%" PRId64 ".brick%" PRId64 ".status",
+ key_prefix, index, i);
+ ret = dict_get_int32n(dict, key, keylen, &brick_online);
+ if (ret || !brick_online) {
+ ret = 1;
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SERVER_QUORUM_NOT_MET, "%s", err_str);
+ *op_errstr = gf_strdup(err_str);
+ *op_errno = EG_BRCKDWN;
+ goto out;
+ }
+ }
+ ret = 0;
+ quorum_met = _gf_true;
+ } else {
+ distribute_subvols = volinfo->brick_count / volinfo->dist_leaf_count;
+ for (j = 0; j < distribute_subvols; j++) {
+ /* by default assume quorum is not met
+ TODO: Handle distributed striped replicate volumes
+ Currently only distributed replicate volumes are
+ handled.
+ */
+ ret = 1;
+ quorum_met = _gf_false;
+ for (i = 0; i < volinfo->dist_leaf_count; i++) {
+ keylen = snprintf(
+ key, sizeof(key), "%s%" PRId64 ".brick%" PRId64 ".status",
+ key_prefix, index, (j * volinfo->dist_leaf_count) + i);
+ ret = dict_get_int32n(dict, key, keylen, &brick_online);
+ if (ret || !brick_online) {
+ if (i == 0)
+ first_brick_on = _gf_false;
+ down_count++;
+ }
+ }
+
+ quorum_met = glusterd_volume_quorum_calculate(
+ volinfo, dict, down_count, first_brick_on, snap_force,
+ quorum_count, quorum_type, op_errstr, op_errno);
+ /* goto out if quorum is not met */
+ if (!quorum_met) {
+ ret = -1;
+ goto out;
+ }
+
+ down_count = 0;
+ first_brick_on = _gf_true;
+ }
+ }
+
+ if (quorum_met) {
+ gf_msg_debug(this->name, 0, "volume %s is in quorum", volinfo->volname);
+ ret = 0;
+ }
+
+out:
+ return ret;
+}
+
+static int32_t
+glusterd_snap_common_quorum_calculate(glusterd_volinfo_t *volinfo, dict_t *dict,
+ int64_t index, const char *key_prefix,
+ int8_t snap_force,
+ gf_boolean_t snap_volume,
+ char **op_errstr, uint32_t *op_errno)
+{
+ int quorum_count = 0;
+ char *quorum_type = NULL;
+ int32_t tmp = 0;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+ GF_VALIDATE_OR_GOTO(this->name, volinfo, out);
+
+ /* for replicate volumes with replica count equal to or
+ greater than 3, do quorum check by getting what type
+ of quorum rule has been set by getting the volume
+ option set. If getting the option fails, then assume
+ default.
+ AFR does this:
+ if quorum type is "auto":
+ - for odd number of bricks (n), n/2 + 1
+ bricks should be present
+ - for even number of bricks n, n/2 bricks
+ should be present along with the 1st
+ subvolume
+ if quorum type is not "auto":
+ - get the quorum count from dict with the
+ help of the option "cluster.quorum-count"
+ if the option is not there in the dict,
+ then assume quorum type is auto and follow
+ the above method.
+ For non replicate volumes quorum is met only if all
+ the bricks of the volume are online
+ */
+
+ if (GF_CLUSTER_TYPE_REPLICATE == volinfo->type) {
+ if (volinfo->replica_count % 2 == 0)
+ quorum_count = volinfo->replica_count / 2;
+ else
+ quorum_count = volinfo->replica_count / 2 + 1;
+ } else if (GF_CLUSTER_TYPE_DISPERSE == volinfo->type) {
+ quorum_count = volinfo->disperse_count - volinfo->redundancy_count;
+ } else {
+ quorum_count = volinfo->brick_count;
+ }
+
+ ret = dict_get_str_sizen(volinfo->dict, "cluster.quorum-type",
+ &quorum_type);
+ if (!ret && !strcmp(quorum_type, "fixed")) {
+ ret = dict_get_int32_sizen(volinfo->dict, "cluster.quorum-count", &tmp);
+ /* if quorum-type option is not found in the
+ dict assume auto quorum type. i.e n/2 + 1.
+ The same assumption is made when quorum-count
+ option cannot be obtained from the dict (even
+ if the quorum-type option is not set to auto,
+ the behavior is set to the default behavior)
+ */
+ if (!ret) {
+ /* for dispersed volumes, only allow quorums
+ equal or larger than minimum functional
+ value.
+ */
+ if ((GF_CLUSTER_TYPE_DISPERSE != volinfo->type) ||
+ (tmp >= quorum_count)) {
+ quorum_count = tmp;
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_QUORUM_COUNT_IGNORED,
+ "Ignoring small quorum-count "
+ "(%d) on dispersed volume",
+ tmp);
+ quorum_type = NULL;
+ }
+ } else
+ quorum_type = NULL;
+ }
+
+ ret = glusterd_volume_quorum_check(volinfo, index, dict, key_prefix,
+ snap_force, quorum_count, quorum_type,
+ op_errstr, op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND,
+ "volume %s "
+ "is not in quorum",
+ volinfo->volname);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+static int32_t
+glusterd_snap_quorum_check_for_clone(dict_t *dict, gf_boolean_t snap_volume,
+ char **op_errstr, uint32_t *op_errno)
+{
+ const char err_str[] = "glusterds are not in quorum";
+ char key_prefix[16] = {
+ 0,
+ };
+ char *snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+ char *volname = NULL;
+ int64_t volcount = 0;
+ int64_t i = 0;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ if (!dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY, "dict is NULL");
+ goto out;
+ }
+
+ if (snap_volume) {
+ ret = dict_get_str_sizen(dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to "
+ "get snapname");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name(snapname);
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND,
+ "failed to "
+ "get the snapshot %s",
+ snapname);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ /* Do a quorum check of glusterds also. Because, the missed snapshot
+ * information will be saved by glusterd and if glusterds are not in
+ * quorum, then better fail the snapshot
+ */
+ if (!does_gd_meet_server_quorum(this)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SERVER_QUORUM_NOT_MET,
+ "%s", err_str);
+ *op_errstr = gf_strdup(err_str);
+ *op_errno = EG_NODEDWN;
+ ret = -1;
+ goto out;
+ } else
+ gf_msg_debug(this->name, 0, "glusterds are in quorum");
+
+ ret = dict_get_int64(dict, "volcount", &volcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to get "
+ "volcount");
+ goto out;
+ }
+
+ for (i = 1; i <= volcount; i++) {
+ ret = dict_get_str_sizen(dict, "clonename", &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to "
+ "get clonename");
+ goto out;
+ }
+
+ if (snap_volume && snap) {
+ cds_list_for_each_entry(tmp_volinfo, &snap->volumes, vol_list)
+ {
+ if (!tmp_volinfo) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND,
+ "failed to get snap volume "
+ "for snap %s",
+ snapname);
+ ret = -1;
+ goto out;
+ }
+ volinfo = tmp_volinfo;
+ }
+ } else {
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "failed to find the volume %s", volname);
+ goto out;
+ }
+ }
+
+ snprintf(key_prefix, sizeof(key_prefix), "%s",
+ snap_volume ? "vol" : "clone");
+
+ ret = glusterd_snap_common_quorum_calculate(
+ volinfo, dict, i, key_prefix, 0, snap_volume, op_errstr, op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND,
+ "volume %s "
+ "is not in quorum",
+ volname);
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+static int32_t
+glusterd_snap_quorum_check_for_create(dict_t *dict, gf_boolean_t snap_volume,
+ char **op_errstr, uint32_t *op_errno)
+{
+ int8_t snap_force = 0;
+ int32_t force = 0;
+ const char err_str[] = "glusterds are not in quorum";
+ char key_prefix[16] = {
+ 0,
+ };
+ char *snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+ int64_t volcount = 0;
+ char key[32] = {
+ 0,
+ };
+ int64_t i = 0;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ if (!dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY, "dict is NULL");
+ goto out;
+ }
+
+ if (snap_volume) {
+ ret = dict_get_str(dict, "snapname", &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to "
+ "get snapname");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name(snapname);
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND,
+ "failed to "
+ "get the snapshot %s",
+ snapname);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = dict_get_int32(dict, "flags", &force);
+ if (!ret && (force & GF_CLI_FLAG_OP_FORCE))
+ snap_force = 1;
+
+ /* Do a quorum check of glusterds also. Because, the missed snapshot
+ * information will be saved by glusterd and if glusterds are not in
+ * quorum, then better fail the snapshot
+ */
+ if (!does_gd_meet_server_quorum(this)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SERVER_QUORUM_NOT_MET,
+ "%s", err_str);
+ *op_errstr = gf_strdup(err_str);
+ *op_errno = EG_NODEDWN;
+ ret = -1;
+ goto out;
+ } else
+ gf_msg_debug(this->name, 0, "glusterds are in quorum");
+
+ ret = dict_get_int64(dict, "volcount", &volcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to get "
+ "volcount");
+ goto out;
+ }
+
+ for (i = 1; i <= volcount; i++) {
+ snprintf(key, sizeof(key), "%s%" PRId64,
+ snap_volume ? "snap-volname" : "volname", i);
+ ret = dict_get_str(dict, key, &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to "
+ "get volname");
+ goto out;
+ }
+
+ if (snap_volume) {
+ ret = glusterd_snap_volinfo_find(volname, snap, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND,
+ "failed to get snap volume %s "
+ "for snap %s",
+ volname, snapname);
+ goto out;
+ }
+ } else {
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "failed to find the volume %s", volname);
+ goto out;
+ }
+ }
+
+ snprintf(key_prefix, sizeof(key_prefix), "%s",
+ snap_volume ? "snap-vol" : "vol");
+
+ ret = glusterd_snap_common_quorum_calculate(
+ volinfo, dict, i, key_prefix, snap_force, snap_volume, op_errstr,
+ op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_NOT_FOUND,
+ "volume %s "
+ "is not in quorum",
+ volinfo->volname);
+ goto out;
+ }
+ }
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snap_quorum_check(dict_t *dict, gf_boolean_t snap_volume,
+ char **op_errstr, uint32_t *op_errno)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ int32_t snap_command = 0;
+ const char err_str[] = "glusterds are not in quorum";
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ if (!dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_EMPTY, "dict is NULL");
+ goto out;
+ }
+
+ ret = dict_get_int32_sizen(dict, "type", &snap_command);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ ret = glusterd_snap_quorum_check_for_create(dict, snap_volume,
+ op_errstr, op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_QUORUM_CHECK_FAIL,
+ "Quorum check"
+ "failed during snapshot create command");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ ret = glusterd_snap_quorum_check_for_clone(dict, !snap_volume,
+ op_errstr, op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_QUORUM_CHECK_FAIL,
+ "Quorum check"
+ "failed during snapshot clone command");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ if (!does_gd_meet_server_quorum(this)) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SERVER_QUORUM_NOT_MET, "%s", err_str);
+ *op_errstr = gf_strdup(err_str);
+ *op_errno = EG_NODEDWN;
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0,
+ "glusterds are in "
+ "quorum");
+ break;
+ default:
+ break;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+glusterd_is_path_mounted(const char *path)
+{
+ FILE *mtab = NULL;
+ struct mntent *part = NULL;
+ int is_mounted = 0;
+
+ if ((mtab = setmntent("/etc/mtab", "r")) != NULL) {
+ while ((part = getmntent(mtab)) != NULL) {
+ if ((part->mnt_fsname != NULL) &&
+ (strcmp(part->mnt_dir, path)) == 0) {
+ is_mounted = 1;
+ break;
+ }
+ }
+ endmntent(mtab);
+ }
+ return is_mounted;
+}
+/* This function will do unmount for snaps.
+ */
+int32_t
+glusterd_snap_unmount(xlator_t *this, glusterd_volinfo_t *volinfo)
+{
+ char *brick_mount_path = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int32_t ret = -1;
+ int retry_count = 0;
+
+ GF_ASSERT(this);
+ GF_ASSERT(volinfo);
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ /* If the brick is not of this node, we continue */
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
+ continue;
+ }
+ /* If snapshot is pending, we continue */
+ if (brickinfo->snap_status == -1) {
+ continue;
+ }
+
+ ret = glusterd_find_brick_mount_path(brickinfo->path,
+ &brick_mount_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MNTPATH_GET_FAIL,
+ "Failed to find brick_mount_path for %s", brickinfo->path);
+ goto out;
+ }
+ /* unmount cannot be done when the brick process is still in
+ * the process of shutdown, so give three re-tries
+ */
+ retry_count = 0;
+ while (retry_count <= 2) {
+ retry_count++;
+ /* umount2 system call doesn't cleanup mtab entry
+ * after un-mount, using external umount command.
+ */
+ ret = glusterd_umount(brick_mount_path);
+ if (!ret)
+ break;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_UMOUNT_FAIL,
+ "umount failed "
+ "for path %s (brick: %s): %s. Retry(%d)",
+ brick_mount_path, brickinfo->path, strerror(errno),
+ retry_count);
+ sleep(3);
+ }
+ }
+
+out:
+ if (brick_mount_path)
+ GF_FREE(brick_mount_path);
+
+ return ret;
+}
+
+int32_t
+glusterd_umount(const char *path)
+{
+ char msg[NAME_MAX] = "";
+ int32_t ret = -1;
+ runner_t runner = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(path);
+
+ if (!glusterd_is_path_mounted(path)) {
+ return 0;
+ }
+
+ runinit(&runner);
+ snprintf(msg, sizeof(msg), "umount path %s", path);
+ runner_add_args(&runner, _PATH_UMOUNT, "-f", path, NULL);
+ runner_log(&runner, this->name, GF_LOG_DEBUG, msg);
+ ret = runner_run(&runner);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_GLUSTERD_UMOUNT_FAIL,
+ "umounting %s failed (%s)", path, strerror(errno));
+
+ gf_msg_trace(this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_copy_file(const char *source, const char *destination)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ char buffer[1024] = "";
+ int src_fd = -1;
+ int dest_fd = -1;
+ int read_len = -1;
+ struct stat stbuf = {
+ 0,
+ };
+ mode_t dest_mode = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(source);
+ GF_ASSERT(destination);
+
+ /* Here is stat is made to get the file permission of source file*/
+ ret = sys_lstat(source, &stbuf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "%s not found", source);
+ goto out;
+ }
+
+ dest_mode = stbuf.st_mode & 0777;
+
+ src_fd = open(source, O_RDONLY);
+ if (src_fd == -1) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Unable to open file %s", source);
+ goto out;
+ }
+
+ dest_fd = sys_creat(destination, dest_mode);
+ if (dest_fd < 0) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED,
+ "Unble to open a file %s", destination);
+ goto out;
+ }
+
+ do {
+ ret = sys_read(src_fd, buffer, sizeof(buffer));
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Error reading file "
+ "%s",
+ source);
+ goto out;
+ }
+ read_len = ret;
+ if (read_len == 0)
+ break;
+
+ ret = sys_write(dest_fd, buffer, read_len);
+ if (ret != read_len) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED,
+ "Writing in "
+ "file %s failed with error %s",
+ destination, strerror(errno));
+ goto out;
+ }
+ } while (ret > 0);
+out:
+ if (src_fd != -1)
+ sys_close(src_fd);
+
+ if (dest_fd > 0)
+ sys_close(dest_fd);
+ return ret;
+}
+
+int32_t
+glusterd_copy_folder(const char *source, const char *destination)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ DIR *dir_ptr = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char src_path[PATH_MAX] = {
+ 0,
+ };
+ char dest_path[PATH_MAX] = {
+ 0,
+ };
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(source);
+ GF_ASSERT(destination);
+
+ dir_ptr = sys_opendir(source);
+ if (!dir_ptr) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Unable to open %s", source);
+ goto out;
+ }
+
+ for (;;) {
+ errno = 0;
+ entry = sys_readdir(dir_ptr, scratch);
+ if (!entry || errno != 0)
+ break;
+
+ if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0)
+ continue;
+ ret = snprintf(src_path, sizeof(src_path), "%s/%s", source,
+ entry->d_name);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ ret = snprintf(dest_path, sizeof(dest_path), "%s/%s", destination,
+ entry->d_name);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ ret = glusterd_copy_file(src_path, dest_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Could not copy "
+ "%s to %s",
+ src_path, dest_path);
+ goto out;
+ }
+ }
+out:
+ if (dir_ptr)
+ (void)sys_closedir(dir_ptr);
+
+ return ret;
+}
+
+int32_t
+glusterd_get_geo_rep_session(char *slave_key, char *origin_volname,
+ dict_t *gsync_slaves_dict, char *session,
+ char *slave)
+{
+ int32_t ret = -1;
+ int32_t len = 0;
+ char *token = NULL;
+ char *tok = NULL;
+ char *temp = NULL;
+ char *ip = NULL;
+ char *ip_i = NULL;
+ char *ip_temp = NULL;
+ char *buffer = NULL;
+ xlator_t *this = NULL;
+ char *slave_temp = NULL;
+ char *save_ptr = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(slave_key);
+ GF_ASSERT(origin_volname);
+ GF_ASSERT(gsync_slaves_dict);
+
+ ret = dict_get_str(gsync_slaves_dict, slave_key, &buffer);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to "
+ "get value for key %s",
+ slave_key);
+ goto out;
+ }
+
+ temp = gf_strdup(buffer);
+ if (!temp) {
+ ret = -1;
+ goto out;
+ }
+
+ /* geo-rep session string format being parsed:
+ * "master_node_uuid:ssh://slave_host::slave_vol:slave_voluuid"
+ */
+ token = strtok_r(temp, "/", &save_ptr);
+
+ token = strtok_r(NULL, ":", &save_ptr);
+ if (!token) {
+ ret = -1;
+ goto out;
+ }
+ token++;
+
+ ip = gf_strdup(token);
+ if (!ip) {
+ ret = -1;
+ goto out;
+ }
+ ip_i = ip;
+
+ token = strtok_r(NULL, ":", &save_ptr);
+ if (!token) {
+ ret = -1;
+ goto out;
+ }
+
+ slave_temp = gf_strdup(token);
+ if (!slave) {
+ ret = -1;
+ goto out;
+ }
+
+ /* If 'ip' has 'root@slavehost', point to 'slavehost' as
+ * working directory for root users are created without
+ * 'root@' */
+ ip_temp = gf_strdup(ip);
+ tok = strtok_r(ip_temp, "@", &save_ptr);
+ len = strlen(tok);
+ tok = strtok_r(NULL, "@", &save_ptr);
+ if (tok != NULL)
+ ip_i = ip + len + 1;
+
+ ret = snprintf(session, PATH_MAX, "%s_%s_%s", origin_volname, ip_i,
+ slave_temp);
+ if (ret < 0) /* Negative value is an error */
+ goto out;
+
+ ret = snprintf(slave, PATH_MAX, "%s::%s", ip, slave_temp);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = 0; /* Success */
+
+out:
+ if (temp)
+ GF_FREE(temp);
+
+ if (ip)
+ GF_FREE(ip);
+
+ if (ip_temp)
+ GF_FREE(ip_temp);
+
+ if (slave_temp)
+ GF_FREE(slave_temp);
+
+ return ret;
+}
+
+int32_t
+glusterd_copy_quota_files(glusterd_volinfo_t *src_vol,
+ glusterd_volinfo_t *dest_vol,
+ gf_boolean_t *conf_present)
+{
+ int32_t ret = -1;
+ char src_dir[PATH_MAX] = "";
+ char dest_dir[PATH_MAX] = "";
+ char src_path[PATH_MAX] = "";
+ char dest_path[PATH_MAX] = "";
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GF_ASSERT(src_vol);
+ GF_ASSERT(dest_vol);
+
+ GLUSTERD_GET_VOLUME_DIR(src_dir, src_vol, priv);
+
+ GLUSTERD_GET_VOLUME_DIR(dest_dir, dest_vol, priv);
+
+ ret = snprintf(src_path, sizeof(src_path), "%s/quota.conf", src_dir);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ /* quota.conf is not present if quota is not enabled, Hence ignoring
+ * the absence of this file
+ */
+ ret = sys_lstat(src_path, &stbuf);
+ if (ret) {
+ ret = 0;
+ gf_msg_debug(this->name, 0, "%s not found", src_path);
+ goto out;
+ }
+
+ ret = snprintf(dest_path, sizeof(dest_path), "%s/quota.conf", dest_dir);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ ret = glusterd_copy_file(src_path, dest_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Failed to copy %s in %s", src_path, dest_path);
+ goto out;
+ }
+
+ ret = snprintf(src_path, sizeof(src_path), "%s/quota.cksum", src_dir);
+ if (ret < 0)
+ goto out;
+
+ /* if quota.conf is present, quota.cksum has to be present. *
+ * Fail snapshot operation if file is absent *
+ */
+ ret = sys_lstat(src_path, &stbuf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_NOT_FOUND,
+ "%s not found", src_path);
+ goto out;
+ }
+
+ ret = snprintf(dest_path, sizeof(dest_path), "%s/quota.cksum", dest_dir);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ ret = glusterd_copy_file(src_path, dest_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Failed to copy %s in %s", src_path, dest_path);
+ goto out;
+ }
+
+ *conf_present = _gf_true;
+out:
+ return ret;
+}
+
+/* *
+ * Here there are two possibilities, either destination is snaphot or
+ * clone. In the case of snapshot nfs_ganesha export file will be copied
+ * to snapdir. If it is clone , then new export file will be created for
+ * the clone in the GANESHA_EXPORT_DIRECTORY, replacing occurences of
+ * volname with clonename
+ */
+int
+glusterd_copy_nfs_ganesha_file(glusterd_volinfo_t *src_vol,
+ glusterd_volinfo_t *dest_vol)
+{
+ int32_t ret = -1;
+ char snap_dir[PATH_MAX] = {
+ 0,
+ };
+ char src_path[PATH_MAX] = {
+ 0,
+ };
+ char dest_path[PATH_MAX] = {
+ 0,
+ };
+ char buffer[BUFSIZ] = {
+ 0,
+ };
+ char *find_ptr = NULL;
+ char *buff_ptr = NULL;
+ char *tmp_ptr = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ FILE *src = NULL;
+ FILE *dest = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("snapshot", this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ GF_VALIDATE_OR_GOTO(this->name, src_vol, out);
+ GF_VALIDATE_OR_GOTO(this->name, dest_vol, out);
+
+ if (glusterd_check_ganesha_export(src_vol) == _gf_false) {
+ gf_msg_debug(this->name, 0,
+ "%s is not exported via "
+ "NFS-Ganesha. Skipping copy of export conf.",
+ src_vol->volname);
+ ret = 0;
+ goto out;
+ }
+
+ if (src_vol->is_snap_volume) {
+ GLUSTERD_GET_SNAP_DIR(snap_dir, src_vol->snapshot, priv);
+ ret = snprintf(src_path, PATH_MAX, "%s/export.%s.conf", snap_dir,
+ src_vol->snapshot->snapname);
+ } else {
+ ret = snprintf(src_path, PATH_MAX, "%s/export.%s.conf",
+ GANESHA_EXPORT_DIRECTORY, src_vol->volname);
+ }
+ if (ret < 0 || ret >= PATH_MAX)
+ goto out;
+
+ ret = sys_lstat(src_path, &stbuf);
+ if (ret) {
+ /*
+ * This code path is hit, only when the src_vol is being *
+ * exported via NFS-Ganesha. So if the conf file is not *
+ * available, we fail the snapshot operation. *
+ */
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Stat on %s failed with %s", src_path, strerror(errno));
+ goto out;
+ }
+
+ if (dest_vol->is_snap_volume) {
+ memset(snap_dir, 0, PATH_MAX);
+ GLUSTERD_GET_SNAP_DIR(snap_dir, dest_vol->snapshot, priv);
+ ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf",
+ snap_dir, dest_vol->snapshot->snapname);
+ if (ret < 0)
+ goto out;
+
+ ret = glusterd_copy_file(src_path, dest_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Failed to copy %s in %s", src_path, dest_path);
+ goto out;
+ }
+
+ } else {
+ ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf",
+ GANESHA_EXPORT_DIRECTORY, dest_vol->volname);
+ if (ret < 0)
+ goto out;
+
+ src = fopen(src_path, "r");
+ dest = fopen(dest_path, "w");
+
+ if (!src || !dest) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED,
+ "Failed to open %s", dest ? src_path : dest_path);
+ ret = -1;
+ goto out;
+ }
+
+ /* *
+ * if the source volume is snapshot, the export conf file
+ * consists of orginal volname
+ */
+ if (src_vol->is_snap_volume)
+ find_ptr = gf_strdup(src_vol->parent_volname);
+ else
+ find_ptr = gf_strdup(src_vol->volname);
+
+ if (!find_ptr)
+ goto out;
+
+ /* Replacing volname with clonename */
+ while (fgets(buffer, BUFSIZ, src)) {
+ buff_ptr = buffer;
+ while ((tmp_ptr = strstr(buff_ptr, find_ptr))) {
+ while (buff_ptr < tmp_ptr)
+ fputc((int)*buff_ptr++, dest);
+ fputs(dest_vol->volname, dest);
+ buff_ptr += strlen(find_ptr);
+ }
+ fputs(buff_ptr, dest);
+ memset(buffer, 0, BUFSIZ);
+ }
+ }
+out:
+ if (src)
+ fclose(src);
+ if (dest)
+ fclose(dest);
+ if (find_ptr)
+ GF_FREE(find_ptr);
+
+ return ret;
+}
+
+int32_t
+glusterd_restore_geo_rep_files(glusterd_volinfo_t *snap_vol)
+{
+ int32_t ret = -1;
+ char src_path[PATH_MAX] = "";
+ char dest_path[PATH_MAX] = "";
+ xlator_t *this = NULL;
+ char *origin_volname = NULL;
+ glusterd_volinfo_t *origin_vol = NULL;
+ int i = 0;
+ char key[32] = "";
+ char session[PATH_MAX] = "";
+ char slave[PATH_MAX] = "";
+ char snapgeo_dir[PATH_MAX] = "";
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GF_ASSERT(snap_vol);
+
+ origin_volname = gf_strdup(snap_vol->parent_volname);
+ if (!origin_volname) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(origin_volname, &origin_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Unable to fetch "
+ "volinfo for volname %s",
+ origin_volname);
+ goto out;
+ }
+
+ for (i = 1; i <= snap_vol->gsync_slaves->count; i++) {
+ ret = snprintf(key, sizeof(key), "slave%d", i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* "origin_vol" is used here because geo-replication saves
+ * the session in the form of master_ip_slave.
+ * As we need the master volume to be same even after
+ * restore, we are passing the origin volume name.
+ *
+ * "snap_vol->gsync_slaves" contain the slave information
+ * when the snapshot was taken, hence we have to restore all
+ * those slaves information when we do snapshot restore.
+ */
+ ret = glusterd_get_geo_rep_session(
+ key, origin_vol->volname, snap_vol->gsync_slaves, session, slave);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GEOREP_GET_FAILED,
+ "Failed to get geo-rep session");
+ goto out;
+ }
+
+ GLUSTERD_GET_SNAP_GEO_REP_DIR(snapgeo_dir, snap_vol->snapshot, priv);
+ ret = snprintf(src_path, sizeof(src_path), "%s/%s", snapgeo_dir,
+ session);
+ if (ret < 0)
+ goto out;
+
+ ret = snprintf(dest_path, sizeof(dest_path), "%s/%s/%s", priv->workdir,
+ GEOREP, session);
+ if (ret < 0)
+ goto out;
+
+ ret = glusterd_copy_folder(src_path, dest_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DIR_OP_FAILED,
+ "Could not copy "
+ "%s to %s",
+ src_path, dest_path);
+ goto out;
+ }
+ }
+out:
+ if (origin_volname)
+ GF_FREE(origin_volname);
+
+ return ret;
+}
+
+int
+glusterd_restore_nfs_ganesha_file(glusterd_volinfo_t *src_vol,
+ glusterd_snap_t *snap)
+{
+ int32_t ret = -1;
+ char snap_dir[PATH_MAX] = "";
+ char src_path[PATH_MAX] = "";
+ char dest_path[PATH_MAX] = "";
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("snapshot", this, out);
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, priv, out);
+
+ GF_VALIDATE_OR_GOTO(this->name, src_vol, out);
+ GF_VALIDATE_OR_GOTO(this->name, snap, out);
+
+ GLUSTERD_GET_SNAP_DIR(snap_dir, snap, priv);
+
+ ret = snprintf(src_path, sizeof(src_path), "%s/export.%s.conf", snap_dir,
+ snap->snapname);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ ret = sys_lstat(src_path, &stbuf);
+ if (ret) {
+ if (errno == ENOENT) {
+ ret = 0;
+ gf_msg_debug(this->name, 0, "%s not found", src_path);
+ } else
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+ "Stat on %s failed with %s", src_path, strerror(errno));
+ goto out;
+ }
+
+ ret = snprintf(dest_path, sizeof(dest_path), "%s/export.%s.conf",
+ GANESHA_EXPORT_DIRECTORY, src_vol->volname);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ ret = glusterd_copy_file(src_path, dest_path);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Failed to copy %s in %s", src_path, dest_path);
+
+out:
+ return ret;
+}
+
+/* Snapd functions */
+int
+glusterd_is_snapd_enabled(glusterd_volinfo_t *volinfo)
+{
+ int ret = 0;
+ xlator_t *this = THIS;
+
+ ret = dict_get_str_boolean(volinfo->dict, "features.uss", -2);
+ if (ret == -2) {
+ gf_msg_debug(this->name, 0,
+ "Key features.uss not "
+ "present in the dict for volume %s",
+ volinfo->volname);
+ ret = 0;
+
+ } else if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get 'features.uss'"
+ " from dict for volume %s",
+ volinfo->volname);
+ }
+
+ return ret;
+}
+
+int32_t
+glusterd_is_snap_soft_limit_reached(glusterd_volinfo_t *volinfo, dict_t *dict)
+{
+ int32_t ret = -1;
+ uint64_t opt_max_hard = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+ uint64_t opt_max_soft = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT;
+ uint64_t limit = 0;
+ int auto_delete = 0;
+ uint64_t effective_max_limit = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT(volinfo);
+ GF_ASSERT(dict);
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ /* config values snap-max-hard-limit and snap-max-soft-limit are
+ * optional and hence we are not erroring out if values are not
+ * present
+ */
+ gd_get_snap_conf_values_if_present(priv->opts, &opt_max_hard,
+ &opt_max_soft);
+
+ /* "auto-delete" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ auto_delete = dict_get_str_boolean(
+ priv->opts, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE, _gf_false);
+
+ if (volinfo->snap_max_hard_limit < opt_max_hard)
+ effective_max_limit = volinfo->snap_max_hard_limit;
+ else
+ effective_max_limit = opt_max_hard;
+
+ limit = (opt_max_soft * effective_max_limit) / 100;
+
+ if (volinfo->snap_count >= limit && auto_delete != _gf_true) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SOFT_LIMIT_REACHED,
+ "Soft-limit "
+ "(value = %" PRIu64
+ ") of volume %s is reached. "
+ "Snapshot creation is not possible once effective "
+ "hard-limit (value = %" PRIu64 ") is reached.",
+ limit, volinfo->volname, effective_max_limit);
+
+ ret = dict_set_int8(dict, "soft-limit-reach", _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to "
+ "set soft limit exceed flag in "
+ "response dictionary");
+ }
+
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function initializes the parameter sys_hard_limit,
+ * sys_soft_limit and auto_delete value to the value set
+ * in dictionary, If value is not present then it is
+ * initialized to default values. Hence this function does not
+ * return any values.
+ */
+void
+gd_get_snap_conf_values_if_present(dict_t *dict, uint64_t *sys_hard_limit,
+ uint64_t *sys_soft_limit)
+{
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(dict);
+
+ /* "snap-max-hard-limit" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ if (dict_get_uint64(dict, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ sys_hard_limit)) {
+ gf_msg_debug(this->name, 0,
+ "%s is not present in"
+ "dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT);
+ }
+
+ /* "snap-max-soft-limit" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ if (dict_get_uint64(dict, GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT,
+ sys_soft_limit)) {
+ gf_msg_debug(this->name, 0,
+ "%s is not present in"
+ "dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT);
+ }
+}
+
+int
+glusterd_get_snap_status_str(glusterd_snap_t *snapinfo, char *snap_status_str)
+{
+ int ret = -1;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, snapinfo, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, snap_status_str, out);
+
+ switch (snapinfo->snap_status) {
+ case GD_SNAP_STATUS_NONE:
+ sprintf(snap_status_str, "%s", "none");
+ break;
+ case GD_SNAP_STATUS_INIT:
+ sprintf(snap_status_str, "%s", "init");
+ break;
+ case GD_SNAP_STATUS_IN_USE:
+ sprintf(snap_status_str, "%s", "in_use");
+ break;
+ case GD_SNAP_STATUS_DECOMMISSION:
+ sprintf(snap_status_str, "%s", "decommissioned");
+ break;
+ case GD_SNAP_STATUS_UNDER_RESTORE:
+ sprintf(snap_status_str, "%s", "under_restore");
+ break;
+ case GD_SNAP_STATUS_RESTORED:
+ sprintf(snap_status_str, "%s", "restored");
+ break;
+ default:
+ goto out;
+ }
+ ret = 0;
+out:
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h
new file mode 100644
index 00000000000..5762999bba7
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot-utils.h
@@ -0,0 +1,169 @@
+/*
+ Copyright (c) 2015 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#ifndef _GLUSTERD_SNAP_UTILS_H
+#define _GLUSTERD_SNAP_UTILS_H
+
+#define GLUSTERD_GET_SNAP_DIR(path, snap, priv) \
+ do { \
+ int32_t _snap_dir_len; \
+ _snap_dir_len = snprintf(path, PATH_MAX, "%s/snaps/%s", priv->workdir, \
+ snap->snapname); \
+ if ((_snap_dir_len < 0) || (_snap_dir_len >= PATH_MAX)) { \
+ path[0] = 0; \
+ } \
+ } while (0)
+
+int32_t
+glusterd_snap_volinfo_find(char *volname, glusterd_snap_t *snap,
+ glusterd_volinfo_t **volinfo);
+
+int32_t
+glusterd_snap_volinfo_find_from_parent_volname(char *origin_volname,
+ glusterd_snap_t *snap,
+ glusterd_volinfo_t **volinfo);
+
+int
+glusterd_snap_volinfo_find_by_volume_id(uuid_t volume_id,
+ glusterd_volinfo_t **volinfo);
+
+int32_t
+glusterd_add_snapd_to_dict(glusterd_volinfo_t *volinfo, dict_t *dict,
+ int32_t count);
+
+int
+glusterd_compare_snap_time(struct cds_list_head *, struct cds_list_head *);
+
+int
+glusterd_compare_snap_vol_time(struct cds_list_head *, struct cds_list_head *);
+
+int32_t
+glusterd_snap_volinfo_restore(dict_t *dict, dict_t *rsp_dict,
+ glusterd_volinfo_t *new_volinfo,
+ glusterd_volinfo_t *snap_volinfo,
+ int32_t volcount);
+int32_t
+glusterd_snapobject_delete(glusterd_snap_t *snap);
+
+int32_t
+glusterd_cleanup_snaps_for_volume(glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_missed_snapinfo_new(glusterd_missed_snap_info **missed_snapinfo);
+
+int32_t
+glusterd_missed_snap_op_new(glusterd_snap_op_t **snap_op);
+
+int32_t
+glusterd_add_missed_snaps_to_dict(dict_t *rsp_dict,
+ glusterd_volinfo_t *snap_vol,
+ glusterd_brickinfo_t *brickinfo,
+ int32_t brick_number, int32_t op);
+
+int32_t
+glusterd_add_missed_snaps_to_export_dict(dict_t *peer_data);
+
+int32_t
+glusterd_import_friend_missed_snap_list(dict_t *peer_data);
+
+int
+gd_restore_snap_volume(dict_t *dict, dict_t *rsp_dict,
+ glusterd_volinfo_t *orig_vol,
+ glusterd_volinfo_t *snap_vol, int32_t volcount);
+
+int32_t
+glusterd_mount_lvm_snapshot(glusterd_brickinfo_t *brickinfo,
+ char *brick_mount_path);
+
+int32_t
+glusterd_umount(const char *path);
+
+int32_t
+glusterd_snap_unmount(xlator_t *this, glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_add_snapshots_to_export_dict(dict_t *peer_data);
+
+int32_t
+glusterd_compare_friend_snapshots(dict_t *peer_data, char *peername,
+ uuid_t peerid);
+
+int32_t
+glusterd_store_create_snap_dir(glusterd_snap_t *snap);
+
+int32_t
+glusterd_copy_file(const char *source, const char *destination);
+
+int32_t
+glusterd_copy_folder(const char *source, const char *destination);
+
+int32_t
+glusterd_get_geo_rep_session(char *slave_key, char *origin_volname,
+ dict_t *gsync_slaves_dict, char *session,
+ char *slave);
+
+int32_t
+glusterd_restore_geo_rep_files(glusterd_volinfo_t *snap_vol);
+
+int32_t
+glusterd_copy_quota_files(glusterd_volinfo_t *src_vol,
+ glusterd_volinfo_t *dest_vol,
+ gf_boolean_t *conf_present);
+
+int
+glusterd_snap_use_rsp_dict(dict_t *aggr, dict_t *rsp_dict);
+
+int
+gd_add_vol_snap_details_to_dict(dict_t *dict, char *prefix,
+ glusterd_volinfo_t *volinfo);
+
+int
+gd_add_brick_snap_details_to_dict(dict_t *dict, char *prefix,
+ glusterd_brickinfo_t *brickinfo);
+
+int
+gd_import_new_brick_snap_details(dict_t *dict, char *prefix,
+ glusterd_brickinfo_t *brickinfo);
+
+int
+gd_import_volume_snap_details(dict_t *dict, glusterd_volinfo_t *volinfo,
+ char *prefix, char *volname);
+
+int32_t
+glusterd_snap_quorum_check(dict_t *dict, gf_boolean_t snap_volume,
+ char **op_errstr, uint32_t *op_errno);
+
+int32_t
+glusterd_snap_brick_create(glusterd_volinfo_t *snap_volinfo,
+ glusterd_brickinfo_t *brickinfo, int32_t brick_count,
+ int32_t clone);
+
+int
+glusterd_snapshot_restore_cleanup(dict_t *rsp_dict, char *volname,
+ glusterd_snap_t *snap);
+
+void
+glusterd_get_snapd_dir(glusterd_volinfo_t *volinfo, char *path, int path_len);
+
+int
+glusterd_is_snapd_enabled(glusterd_volinfo_t *volinfo);
+
+int32_t
+glusterd_check_and_set_config_limit(glusterd_conf_t *priv);
+
+int32_t
+glusterd_is_snap_soft_limit_reached(glusterd_volinfo_t *volinfo, dict_t *dict);
+
+void
+gd_get_snap_conf_values_if_present(dict_t *opts, uint64_t *sys_hard_limit,
+ uint64_t *sys_soft_limit);
+int
+glusterd_get_snap_status_str(glusterd_snap_t *snapinfo, char *snap_status_str);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-snapshot.c b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
new file mode 100644
index 00000000000..aeaa8d15214
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-snapshot.c
@@ -0,0 +1,10087 @@
+/*
+ Copyright (c) 2013-2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <inttypes.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/resource.h>
+#include <sys/statvfs.h>
+#include <sys/mount.h>
+#include <signal.h>
+#include "glusterd-messages.h"
+#include "glusterd-errno.h"
+
+#if defined(GF_LINUX_HOST_OS)
+#include <mntent.h>
+#else
+#include "mntent_compat.h"
+#endif
+
+#ifdef __NetBSD__
+#define umount2(dir, flags) unmount(dir, ((flags) != 0) ? MNT_FORCE : 0)
+#endif
+
+#if defined(GF_DARWIN_HOST_OS) || defined(__FreeBSD__)
+#include <sys/param.h>
+#include <sys/mount.h>
+#define umount2(dir, flags) unmount(dir, ((flags) != 0) ? MNT_FORCE : 0)
+#endif
+
+#include <regex.h>
+
+#include <glusterfs/compat.h>
+#include "protocol-common.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/timer.h>
+#include "glusterd-mem-types.h"
+#include "glusterd.h"
+#include "glusterd-sm.h"
+#include "glusterd-op-sm.h"
+#include "glusterd-utils.h"
+#include "glusterd-store.h"
+#include <glusterfs/run.h>
+#include "glusterd-volgen.h"
+#include "glusterd-mgmt.h"
+#include "glusterd-syncop.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-snapd-svc.h"
+
+#include "glusterfs3.h"
+
+#include <glusterfs/syscall.h>
+#include "cli1-xdr.h"
+#include "xdr-generic.h"
+
+#include <glusterfs/lvm-defaults.h>
+#include <glusterfs/events.h>
+
+#define GLUSTERD_GET_UUID_NOHYPHEN(ret_string, uuid) \
+ do { \
+ char *snap_volname_ptr = ret_string; \
+ char tmp_uuid[64]; \
+ char *snap_volid_ptr = uuid_utoa_r(uuid, tmp_uuid); \
+ while (*snap_volid_ptr) { \
+ if (*snap_volid_ptr == '-') { \
+ snap_volid_ptr++; \
+ } else { \
+ (*snap_volname_ptr++) = (*snap_volid_ptr++); \
+ } \
+ } \
+ *snap_volname_ptr = '\0'; \
+ } while (0)
+
+char snap_mount_dir[VALID_GLUSTERD_PATHMAX];
+struct snap_create_args_ {
+ xlator_t *this;
+ dict_t *dict;
+ dict_t *rsp_dict;
+ glusterd_volinfo_t *snap_vol;
+ glusterd_brickinfo_t *brickinfo;
+ struct syncargs *args;
+ int32_t volcount;
+ int32_t brickcount;
+ int32_t brickorder;
+};
+
+/* This structure is used to store unsupported options and their values
+ * for snapshotted volume.
+ */
+struct gd_snap_unsupported_opt_t {
+ char *key;
+ char *value;
+};
+
+typedef struct snap_create_args_ snap_create_args_t;
+
+/* This function is called to get the device path of the snap lvm. Usually
+ if /dev/mapper/<group-name>-<lvm-name> is the device for the lvm,
+ then the snap device will be /dev/<group-name>/<snapname>.
+ This function takes care of building the path for the snap device.
+*/
+
+char *
+glusterd_build_snap_device_path(char *device, char *snapname,
+ int32_t brickcount)
+{
+ char snap[PATH_MAX] = "";
+ char msg[1024] = "";
+ char volgroup[PATH_MAX] = "";
+ char *snap_device = NULL;
+ xlator_t *this = NULL;
+ runner_t runner = {
+ 0,
+ };
+ char *ptr = NULL;
+ int ret = -1;
+
+ this = THIS;
+ GF_ASSERT(this);
+ if (!device) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "device is NULL");
+ goto out;
+ }
+ if (!snapname) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "snapname is NULL");
+ goto out;
+ }
+
+ runinit(&runner);
+ runner_add_args(&runner, "lvs", "--noheadings", "-o", "vg_name", device,
+ NULL);
+ runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
+ snprintf(msg, sizeof(msg), "Get volume group for device %s", device);
+ runner_log(&runner, this->name, GF_LOG_DEBUG, msg);
+ ret = runner_start(&runner);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_VG_GET_FAIL,
+ "Failed to get volume group "
+ "for device %s",
+ device);
+ runner_end(&runner);
+ goto out;
+ }
+ ptr = fgets(volgroup, sizeof(volgroup),
+ runner_chio(&runner, STDOUT_FILENO));
+ if (!ptr || !strlen(volgroup)) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_VG_GET_FAIL,
+ "Failed to get volume group "
+ "for snap %s",
+ snapname);
+ runner_end(&runner);
+ ret = -1;
+ goto out;
+ }
+ runner_end(&runner);
+
+ snprintf(snap, sizeof(snap), "/dev/%s/%s_%d", gf_trim(volgroup), snapname,
+ brickcount);
+ snap_device = gf_strdup(snap);
+ if (!snap_device) {
+ gf_msg(this->name, GF_LOG_WARNING, ENOMEM, GD_MSG_NO_MEMORY,
+ "Cannot copy the snapshot device name for snapname: %s",
+ snapname);
+ }
+
+out:
+ return snap_device;
+}
+
+/* Look for disconnected peers, for missed snap creates or deletes */
+static int32_t
+glusterd_find_missed_snap(dict_t *rsp_dict, glusterd_volinfo_t *vol,
+ struct cds_list_head *peers, int32_t op)
+{
+ int32_t brick_count = -1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(peers);
+ GF_ASSERT(vol);
+
+ brick_count = 0;
+ cds_list_for_each_entry(brickinfo, &vol->bricks, brick_list)
+ {
+ if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
+ /* If the brick belongs to the same node */
+ brick_count++;
+ continue;
+ }
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, peers, uuid_list)
+ {
+ if (gf_uuid_compare(peerinfo->uuid, brickinfo->uuid)) {
+ /* If the brick doesn't belong to this peer */
+ continue;
+ }
+
+ /* Found peer who owns the brick, *
+ * if peer is not connected or not *
+ * friend add it to missed snap list */
+ if (!(peerinfo->connected) ||
+ (peerinfo->state.state != GD_FRIEND_STATE_BEFRIENDED)) {
+ ret = glusterd_add_missed_snaps_to_dict(
+ rsp_dict, vol, brickinfo, brick_count + 1, op);
+ if (ret) {
+ RCU_READ_UNLOCK;
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_CREATE_FAIL,
+ "Failed to add missed snapshot "
+ "info for %s:%s in the "
+ "rsp_dict",
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+ }
+ }
+ RCU_READ_UNLOCK;
+ brick_count++;
+ }
+
+ ret = 0;
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+snap_max_limits_display_commit(dict_t *rsp_dict, char *volname, char *op_errstr,
+ int len)
+{
+ char err_str[PATH_MAX] = "";
+ char key[64] = "";
+ int keylen;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = -1;
+ uint64_t active_hard_limit = 0;
+ uint64_t snap_max_limit = 0;
+ uint64_t soft_limit_value = -1;
+ uint64_t count = 0;
+ xlator_t *this = NULL;
+ uint64_t opt_hard_max = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+ uint64_t opt_soft_max = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT;
+ char *auto_delete = "disable";
+ char *snap_activate = "disable";
+
+ this = THIS;
+
+ GF_ASSERT(this);
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(op_errstr);
+
+ conf = this->private;
+
+ GF_ASSERT(conf);
+
+ /* config values snap-max-hard-limit and snap-max-soft-limit are
+ * optional and hence we are not erroring out if values are not
+ * present
+ */
+ gd_get_snap_conf_values_if_present(conf->opts, &opt_hard_max,
+ &opt_soft_max);
+
+ if (!volname) {
+ /* For system limit */
+ cds_list_for_each_entry(volinfo, &conf->volumes, vol_list)
+ {
+ if (volinfo->is_snap_volume == _gf_true)
+ continue;
+
+ snap_max_limit = volinfo->snap_max_hard_limit;
+ if (snap_max_limit > opt_hard_max)
+ active_hard_limit = opt_hard_max;
+ else
+ active_hard_limit = snap_max_limit;
+
+ soft_limit_value = (opt_soft_max * active_hard_limit) / 100;
+
+ keylen = snprintf(key, sizeof(key), "volume%" PRId64 "-volname",
+ count);
+ ret = dict_set_strn(rsp_dict, key, keylen, volinfo->volname);
+ if (ret) {
+ len = snprintf(err_str, PATH_MAX, "Failed to set %s", key);
+ if (len < 0) {
+ strcpy(err_str, "<error>");
+ }
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "volume%" PRId64 "-snap-max-hard-limit",
+ count);
+ ret = dict_set_uint64(rsp_dict, key, snap_max_limit);
+ if (ret) {
+ len = snprintf(err_str, PATH_MAX, "Failed to set %s", key);
+ if (len < 0) {
+ strcpy(err_str, "<error>");
+ }
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "volume%" PRId64 "-active-hard-limit",
+ count);
+ ret = dict_set_uint64(rsp_dict, key, active_hard_limit);
+ if (ret) {
+ len = snprintf(err_str, PATH_MAX, "Failed to set %s", key);
+ if (len < 0) {
+ strcpy(err_str, "<error>");
+ }
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "volume%" PRId64 "-snap-max-soft-limit",
+ count);
+ ret = dict_set_uint64(rsp_dict, key, soft_limit_value);
+ if (ret) {
+ len = snprintf(err_str, PATH_MAX, "Failed to set %s", key);
+ if (len < 0) {
+ strcpy(err_str, "<error>");
+ }
+ goto out;
+ }
+ count++;
+ }
+
+ ret = dict_set_uint64(rsp_dict, "voldisplaycount", count);
+ if (ret) {
+ snprintf(err_str, PATH_MAX, "Failed to set voldisplaycount");
+ goto out;
+ }
+ } else {
+ /* For one volume */
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(err_str, PATH_MAX,
+ "Volume (%s) does not "
+ "exist",
+ volname);
+ goto out;
+ }
+
+ snap_max_limit = volinfo->snap_max_hard_limit;
+ if (snap_max_limit > opt_hard_max)
+ active_hard_limit = opt_hard_max;
+ else
+ active_hard_limit = snap_max_limit;
+
+ soft_limit_value = (opt_soft_max * active_hard_limit) / 100;
+
+ keylen = snprintf(key, sizeof(key), "volume%" PRId64 "-volname", count);
+ ret = dict_set_strn(rsp_dict, key, keylen, volinfo->volname);
+ if (ret) {
+ len = snprintf(err_str, PATH_MAX, "Failed to set %s", key);
+ if (len < 0) {
+ strcpy(err_str, "<error>");
+ }
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "volume%" PRId64 "-snap-max-hard-limit",
+ count);
+ ret = dict_set_uint64(rsp_dict, key, snap_max_limit);
+ if (ret) {
+ len = snprintf(err_str, PATH_MAX, "Failed to set %s", key);
+ if (len < 0) {
+ strcpy(err_str, "<error>");
+ }
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "volume%" PRId64 "-active-hard-limit",
+ count);
+ ret = dict_set_uint64(rsp_dict, key, active_hard_limit);
+ if (ret) {
+ len = snprintf(err_str, PATH_MAX, "Failed to set %s", key);
+ if (len < 0) {
+ strcpy(err_str, "<error>");
+ }
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "volume%" PRId64 "-snap-max-soft-limit",
+ count);
+ ret = dict_set_uint64(rsp_dict, key, soft_limit_value);
+ if (ret) {
+ len = snprintf(err_str, PATH_MAX, "Failed to set %s", key);
+ if (len < 0) {
+ strcpy(err_str, "<error>");
+ }
+ goto out;
+ }
+
+ count++;
+
+ ret = dict_set_uint64(rsp_dict, "voldisplaycount", count);
+ if (ret) {
+ snprintf(err_str, PATH_MAX, "Failed to set voldisplaycount");
+ goto out;
+ }
+ }
+
+ ret = dict_set_uint64(rsp_dict, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ opt_hard_max);
+ if (ret) {
+ snprintf(err_str, PATH_MAX, "Failed to set %s in response dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT);
+ goto out;
+ }
+
+ ret = dict_set_uint64(rsp_dict, GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT,
+ opt_soft_max);
+ if (ret) {
+ snprintf(err_str, PATH_MAX, "Failed to set %s in response dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT);
+ goto out;
+ }
+
+ /* "auto-delete" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ ret = dict_get_strn(conf->opts, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE,
+ SLEN(GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE),
+ &auto_delete);
+
+ ret = dict_set_dynstr_with_alloc(
+ rsp_dict, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE, auto_delete);
+ if (ret) {
+ snprintf(err_str, PATH_MAX, "Failed to set %s in response dictionary",
+ GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE);
+ goto out;
+ }
+
+ /* "snap-activate-on-create" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ ret = dict_get_strn(conf->opts, GLUSTERD_STORE_KEY_SNAP_ACTIVATE,
+ SLEN(GLUSTERD_STORE_KEY_SNAP_ACTIVATE), &snap_activate);
+
+ ret = dict_set_dynstr_with_alloc(rsp_dict, GLUSTERD_STORE_KEY_SNAP_ACTIVATE,
+ snap_activate);
+ if (ret) {
+ snprintf(err_str, PATH_MAX, "Failed to set %s in response dictionary",
+ GLUSTERD_STORE_KEY_SNAP_ACTIVATE);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ strncpy(op_errstr, err_str, len);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED, "%s",
+ err_str);
+ }
+ return ret;
+}
+
+/* Third argument of scandir(used in glusterd_copy_geo_rep_session_files)
+ * is filter function. As we don't want "." and ".." files present in the
+ * directory, we are excliding these 2 files.
+ * "file_select" function here does the job of filtering.
+ */
+int
+file_select(const struct dirent *entry)
+{
+ if (entry == NULL)
+ return (FALSE);
+
+ if ((strcmp(entry->d_name, ".") == 0) || (strcmp(entry->d_name, "..") == 0))
+ return (FALSE);
+ else
+ return (TRUE);
+}
+
+int32_t
+glusterd_copy_geo_rep_session_files(char *session, glusterd_volinfo_t *snap_vol)
+{
+ int32_t ret = -1;
+ char snap_session_dir[PATH_MAX] = "";
+ char georep_session_dir[PATH_MAX] = "";
+ regex_t *reg_exp = NULL;
+ int file_count = -1;
+ struct dirent **files = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ int i = 0;
+ char src_path[PATH_MAX] = "";
+ char dest_path[PATH_MAX] = "";
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GF_ASSERT(session);
+ GF_ASSERT(snap_vol);
+
+ ret = snprintf(georep_session_dir, sizeof(georep_session_dir), "%s/%s/%s",
+ priv->workdir, GEOREP, session);
+ if (ret < 0) { /* Negative value is an error */
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ ret = snprintf(snap_session_dir, sizeof(snap_session_dir), "%s/%s/%s/%s/%s",
+ priv->workdir, GLUSTERD_VOL_SNAP_DIR_PREFIX,
+ snap_vol->snapshot->snapname, GEOREP, session);
+ if (ret < 0) { /* Negative value is an error */
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ ret = mkdir_p(snap_session_dir, 0755, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Creating directory %s failed", snap_session_dir);
+ goto out;
+ }
+
+ /* TODO : good to have - Allocate in stack instead of heap */
+ reg_exp = GF_CALLOC(1, sizeof(regex_t), gf_common_mt_regex_t);
+ if (!reg_exp) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Failed to allocate memory for regular expression");
+ goto out;
+ }
+
+ ret = regcomp(reg_exp, "(.*status$)|(.*conf$)\0", REG_EXTENDED);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_REG_COMPILE_FAILED,
+ "Failed to compile the regular expression");
+ goto out;
+ }
+
+ /* If there are no files in a particular session then fail it*/
+ file_count = scandir(georep_session_dir, &files, file_select, alphasort);
+ if (file_count <= 0) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOENT, GD_MSG_FILE_OP_FAILED,
+ "Session files not present "
+ "in %s",
+ georep_session_dir);
+ goto out;
+ }
+
+ /* Now compare the file name with regular expression to see if
+ * there is a match
+ */
+ for (i = 0; i < file_count; i++) {
+ if (regexec(reg_exp, files[i]->d_name, 0, NULL, 0))
+ continue;
+
+ ret = snprintf(src_path, sizeof(src_path), "%s/%s", georep_session_dir,
+ files[i]->d_name);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ ret = snprintf(dest_path, sizeof(dest_path), "%s/%s", snap_session_dir,
+ files[i]->d_name);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ ret = glusterd_copy_file(src_path, dest_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Could not copy file %s of session %s", files[i]->d_name,
+ session);
+ goto out;
+ }
+ }
+out:
+ /* files are malloc'd by scandir, free them */
+ if (file_count > 0) {
+ while (file_count--) {
+ free(files[file_count]);
+ }
+ free(files);
+ }
+
+ if (reg_exp)
+ GF_FREE(reg_exp);
+
+ return ret;
+}
+
+/* This function will take backup of the volume store
+ * of the to-be restored volume. This will help us to
+ * revert the operation if it fails.
+ *
+ * @param volinfo volinfo of the origin volume
+ *
+ * @return 0 on success and -1 on failure
+ */
+int
+glusterd_snapshot_backup_vol(glusterd_volinfo_t *volinfo)
+{
+ char pathname[PATH_MAX] = "";
+ int ret = -1;
+ int op_ret = 0;
+ char delete_path[PATH_MAX] = "";
+ char trashdir[PATH_MAX] = "";
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(volinfo);
+
+ GLUSTERD_GET_VOLUME_DIR(pathname, volinfo, priv);
+
+ len = snprintf(delete_path, sizeof(delete_path),
+ "%s/" GLUSTERD_TRASH "/vols-%s.deleted", priv->workdir,
+ volinfo->volname);
+ if ((len < 0) || (len >= sizeof(delete_path))) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ len = snprintf(trashdir, sizeof(trashdir), "%s/" GLUSTERD_TRASH,
+ priv->workdir);
+ if ((len < 0) || (len >= sizeof(trashdir))) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ /* Create trash folder if it is not there */
+ ret = sys_mkdir(trashdir, 0755);
+ if (ret && errno != EEXIST) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Failed to create trash directory, reason : %s",
+ strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ /* Move the origin volume volder to the backup location */
+ ret = sys_rename(pathname, delete_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Failed to rename snap "
+ "directory %s to %s",
+ pathname, delete_path);
+ goto out;
+ }
+
+ /* Re-create an empty origin volume folder so that restore can
+ * happen. */
+ ret = sys_mkdir(pathname, 0755);
+ if (ret && errno != EEXIST) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Failed to create origin "
+ "volume directory (%s), reason : %s",
+ pathname, strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ /* Save the actual return value */
+ op_ret = ret;
+ if (ret) {
+ /* Revert the changes in case of failure */
+ ret = sys_rmdir(pathname);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to rmdir: %s,err: %s", pathname,
+ strerror(errno));
+ }
+
+ ret = sys_rename(delete_path, pathname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Failed to rename directory %s to %s", delete_path,
+ pathname);
+ }
+
+ ret = sys_rmdir(trashdir);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to rmdir: %s, Reason: %s",
+ trashdir, strerror(errno));
+ }
+ }
+
+ gf_msg_trace(this->name, 0, "Returning %d", op_ret);
+
+ return op_ret;
+}
+
+static int32_t
+glusterd_copy_geo_rep_files(glusterd_volinfo_t *origin_vol,
+ glusterd_volinfo_t *snap_vol, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ int i = 0;
+ xlator_t *this = NULL;
+ char key[32] = "";
+ char session[PATH_MAX] = "";
+ char slave[PATH_MAX] = "";
+ char snapgeo_dir[PATH_MAX] = "";
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GF_ASSERT(origin_vol);
+ GF_ASSERT(snap_vol);
+ GF_ASSERT(rsp_dict);
+
+ /* This condition is not satisfied if the volume
+ * is slave volume.
+ */
+ if (!origin_vol->gsync_slaves) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_SLAVE, NULL);
+ ret = 0;
+ goto out;
+ }
+
+ GLUSTERD_GET_SNAP_GEO_REP_DIR(snapgeo_dir, snap_vol->snapshot, priv);
+
+ ret = sys_mkdir(snapgeo_dir, 0755);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Creating directory %s failed", snapgeo_dir);
+ goto out;
+ }
+
+ for (i = 1; i <= origin_vol->gsync_slaves->count; i++) {
+ ret = snprintf(key, sizeof(key), "slave%d", i);
+ if (ret < 0) /* Negative value is an error */
+ goto out;
+
+ ret = glusterd_get_geo_rep_session(
+ key, origin_vol->volname, origin_vol->gsync_slaves, session, slave);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GEOREP_GET_FAILED,
+ "Failed to get geo-rep session");
+ goto out;
+ }
+
+ ret = glusterd_copy_geo_rep_session_files(session, snap_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED,
+ "Failed to copy files"
+ " related to session %s",
+ session);
+ goto out;
+ }
+ }
+
+out:
+ return ret;
+}
+
+/* This function will restore a snapshot volumes
+ *
+ * @param dict dictionary containing snapshot restore request
+ * @param op_errstr In case of any failure error message will be returned
+ * in this variable
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_snapshot_restore(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int ret = -1;
+ int32_t volcount = -1;
+ char *snapname = NULL;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ glusterd_volinfo_t *tmp = NULL;
+ glusterd_volinfo_t *parent_volinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(rsp_dict);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get snap name");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name(snapname);
+ if (NULL == snap) {
+ ret = gf_asprintf(op_errstr, "Snapshot (%s) does not exist", snapname);
+ if (ret < 0) {
+ goto out;
+ }
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND, "%s",
+ *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ volcount = 0;
+ cds_list_for_each_entry_safe(snap_volinfo, tmp, &snap->volumes, vol_list)
+ {
+ volcount++;
+ ret = glusterd_volinfo_find(snap_volinfo->parent_volname,
+ &parent_volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Could not get volinfo of %s", snap_volinfo->parent_volname);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(rsp_dict, "snapuuid",
+ uuid_utoa(snap->snap_id));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap "
+ "uuid in response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(rsp_dict, "volname",
+ snap_volinfo->parent_volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap "
+ "uuid in response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(rsp_dict, "volid",
+ uuid_utoa(parent_volinfo->volume_id));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap "
+ "uuid in response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+ if (is_origin_glusterd(dict) == _gf_true) {
+ /* From origin glusterd check if *
+ * any peers with snap bricks is down */
+ ret = glusterd_find_missed_snap(rsp_dict, snap_volinfo,
+ &priv->peers,
+ GF_SNAP_OPTION_TYPE_RESTORE);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_GET_FAIL,
+ "Failed to find missed snap restores");
+ goto out;
+ }
+ }
+ /* During snapshot restore, mount point for stopped snap
+ * should exist as it is required to set extended attribute.
+ */
+ ret = glusterd_recreate_vol_brick_mounts(this, snap_volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MNT_RECREATE_FAIL,
+ "Failed to recreate brick mounts for %s", snap->snapname);
+ goto out;
+ }
+
+ ret = gd_restore_snap_volume(dict, rsp_dict, parent_volinfo,
+ snap_volinfo, volcount);
+ if (ret) {
+ /* No need to update op_errstr because it is assumed
+ * that the called function will do that in case of
+ * failure.
+ */
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESTORE_FAIL,
+ "Failed to restore "
+ "snap for %s",
+ snapname);
+ goto out;
+ }
+
+ /* Detach the volinfo from priv->volumes, so that no new
+ * command can ref it any more and then unref it.
+ */
+ cds_list_del_init(&parent_volinfo->vol_list);
+ glusterd_volinfo_unref(parent_volinfo);
+ }
+
+ ret = 0;
+
+ /* TODO: Need to check if we need to delete the snap after the
+ * operation is successful or not. Also need to persist the state
+ * of restore operation in the store.
+ */
+out:
+ return ret;
+}
+
+/* This function is called before actual restore is taken place. This function
+ * will validate whether the snapshot volumes are ready to be restored or not.
+ *
+ * @param dict dictionary containing snapshot restore request
+ * @param op_errstr In case of any failure error message will be returned
+ * in this variable
+ * @param rsp_dict response dictionary
+ * @return Negative value on Failure and 0 in success
+ */
+int32_t
+glusterd_snapshot_restore_prevalidate(dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict)
+{
+ int ret = -1;
+ int32_t i = 0;
+ int32_t volcount = 0;
+ int32_t brick_count = 0;
+ gf_boolean_t snap_restored = _gf_false;
+ char key[64] = "";
+ int keylen;
+ char *volname = NULL;
+ char *snapname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+ GF_ASSERT(rsp_dict);
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get "
+ "snap name");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name(snapname);
+ if (NULL == snap) {
+ ret = gf_asprintf(op_errstr, "Snapshot (%s) does not exist", snapname);
+ *op_errno = EG_SNAPEXST;
+ if (ret < 0) {
+ goto out;
+ }
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND, "%s",
+ *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ snap_restored = snap->snap_restored;
+
+ if (snap_restored) {
+ ret = gf_asprintf(op_errstr,
+ "Snapshot (%s) is already "
+ "restored",
+ snapname);
+ if (ret < 0) {
+ goto out;
+ }
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_OP_FAILED, "%s",
+ *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_strn(rsp_dict, "snapname", SLEN("snapname"), snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "snap name(%s)",
+ snapname);
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "volcount", SLEN("volcount"), &volcount);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get volume count");
+ goto out;
+ }
+
+ /* Snapshot restore will only work if all the volumes,
+ that are part of the snapshot, are stopped. */
+ for (i = 1; i <= volcount; ++i) {
+ keylen = snprintf(key, sizeof(key), "volname%d", i);
+ ret = dict_get_strn(dict, key, keylen, &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to "
+ "get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ ret = gf_asprintf(op_errstr,
+ "Volume (%s) "
+ "does not exist",
+ volname);
+ *op_errno = EG_NOVOL;
+ if (ret < 0) {
+ goto out;
+ }
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, "%s",
+ *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ if (glusterd_is_volume_started(volinfo)) {
+ ret = gf_asprintf(
+ op_errstr,
+ "Volume (%s) has been "
+ "started. Volume needs to be stopped before restoring "
+ "a snapshot.",
+ volname);
+ *op_errno = EG_VOLRUN;
+ if (ret < 0) {
+ goto out;
+ }
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_OP_FAILED, "%s",
+ *op_errstr);
+ ret = -1;
+ goto out;
+ }
+
+ /* Take backup of the volinfo folder */
+ ret = glusterd_snapshot_backup_vol(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED,
+ "Failed to backup "
+ "volume backend files for %s volume",
+ volinfo->volname);
+ goto out;
+ }
+ }
+
+ /* Get brickinfo for snap_volumes */
+ volcount = 0;
+ cds_list_for_each_entry(volinfo, &snap->volumes, vol_list)
+ {
+ volcount++;
+ brick_count = 0;
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ brick_count++;
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID))
+ continue;
+
+ keylen = snprintf(key, sizeof(key), "snap%d.brick%d.path", volcount,
+ brick_count);
+ ret = dict_set_strn(rsp_dict, key, keylen, brickinfo->path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "snap%d.brick%d.snap_status",
+ volcount, brick_count);
+ ret = dict_set_int32n(rsp_dict, key, keylen,
+ brickinfo->snap_status);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "snap%d.brick%d.device_path",
+ volcount, brick_count);
+ ret = dict_set_strn(rsp_dict, key, keylen, brickinfo->device_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "snap%d.brick%d.fs_type",
+ volcount, brick_count);
+ ret = dict_set_strn(rsp_dict, key, keylen, brickinfo->fstype);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "snap%d.brick%d.mnt_opts",
+ volcount, brick_count);
+ ret = dict_set_strn(rsp_dict, key, keylen, brickinfo->mnt_opts);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+ }
+
+ keylen = snprintf(key, sizeof(key), "snap%d.brick_count", volcount);
+ ret = dict_set_int32n(rsp_dict, key, keylen, brick_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32n(rsp_dict, "volcount", SLEN("volcount"), volcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int
+snap_max_hard_limits_validate(dict_t *dict, char *volname, uint64_t value,
+ char **op_errstr)
+{
+ char err_str[PATH_MAX] = "";
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = -1;
+ uint64_t max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+ xlator_t *this = NULL;
+ uint64_t opt_hard_max = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+
+ conf = this->private;
+
+ GF_ASSERT(conf);
+
+ if (volname) {
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (!ret) {
+ if (volinfo->is_snap_volume) {
+ ret = -1;
+ snprintf(err_str, PATH_MAX,
+ "%s is a snap volume. Configuring "
+ "snap-max-hard-limit for a snap "
+ "volume is prohibited.",
+ volname);
+ goto out;
+ }
+ }
+ }
+
+ /* "snap-max-hard-limit" might not be set by user explicitly,
+ * in that case it's better to use the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ ret = dict_get_uint64(conf->opts, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ &opt_hard_max);
+ if (ret) {
+ ret = 0;
+ gf_msg_debug(this->name, 0,
+ "%s is not present in "
+ "opts dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT);
+ }
+
+ /* volume snap-max-hard-limit cannot exceed system snap-max-hard-limit.
+ * Hence during prevalidate following checks are made to ensure the
+ * snap-max-hard-limit set on one particular volume does not
+ * exceed snap-max-hard-limit set globally (system limit).
+ */
+ if (value && volname) {
+ max_limit = opt_hard_max;
+ }
+
+ if (value > max_limit) {
+ ret = -1;
+ snprintf(err_str, PATH_MAX,
+ "Invalid snap-max-hard-limit "
+ "%" PRIu64 ". Expected range 1 - %" PRIu64,
+ value, max_limit);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ *op_errstr = gf_strdup(err_str);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_OP_FAILED, "%s",
+ err_str);
+ }
+ return ret;
+}
+
+int
+glusterd_snapshot_config_prevalidate(dict_t *dict, char **op_errstr,
+ uint32_t *op_errno)
+{
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ int config_command = 0;
+ char err_str[PATH_MAX] = "";
+ glusterd_conf_t *conf = NULL;
+ uint64_t hard_limit = 0;
+ uint64_t soft_limit = 0;
+ gf_loglevel_t loglevel = GF_LOG_ERROR;
+ uint64_t max_limit = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+ int32_t cur_auto_delete = 0;
+ int32_t req_auto_delete = 0;
+ int32_t cur_snap_activate = 0;
+ int32_t req_snap_activate = 0;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ conf = this->private;
+
+ GF_ASSERT(conf);
+
+ ret = dict_get_int32n(dict, "config-command", SLEN("config-command"),
+ &config_command);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str), "failed to get config-command type");
+ goto out;
+ }
+
+ if (config_command != GF_SNAP_CONFIG_TYPE_SET) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (volname) {
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str), "Volume (%s) does not exist.",
+ volname);
+ *op_errno = EG_NOVOL;
+ goto out;
+ }
+ }
+
+ /* config values snap-max-hard-limit and snap-max-soft-limit are
+ * optional and hence we are not erroring out if values are not
+ * present
+ */
+ gd_get_snap_conf_values_if_present(dict, &hard_limit, &soft_limit);
+
+ if (hard_limit) {
+ /* Validations for snap-max-hard-limits */
+ ret = snap_max_hard_limits_validate(dict, volname, hard_limit,
+ op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HARD_LIMIT_SET_FAIL,
+ "snap-max-hard-limit validation failed.");
+ *op_errno = EINVAL;
+ goto out;
+ }
+ }
+
+ if (soft_limit) {
+ max_limit = GLUSTERD_SNAPS_MAX_SOFT_LIMIT_PERCENT;
+ if (soft_limit > max_limit) {
+ ret = -1;
+ snprintf(err_str, PATH_MAX,
+ "Invalid "
+ "snap-max-soft-limit "
+ "%" PRIu64 ". Expected range 1 - %" PRIu64,
+ soft_limit, max_limit);
+ *op_errno = EINVAL;
+ goto out;
+ }
+ }
+
+ if (hard_limit || soft_limit) {
+ ret = 0;
+ goto out;
+ }
+
+ if (dict_getn(dict, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE,
+ SLEN(GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE))) {
+ req_auto_delete = dict_get_str_boolean(
+ dict, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE, _gf_false);
+ if (req_auto_delete < 0) {
+ ret = -1;
+ snprintf(err_str, sizeof(err_str),
+ "Please enter a "
+ "valid boolean value for auto-delete");
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ /* Ignoring the error as the auto-delete is optional and
+ might not be present in the options dictionary.*/
+ cur_auto_delete = dict_get_str_boolean(
+ conf->opts, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE, _gf_false);
+
+ if (cur_auto_delete == req_auto_delete) {
+ ret = -1;
+ if (cur_auto_delete == _gf_true)
+ snprintf(err_str, sizeof(err_str),
+ "auto-delete is already enabled");
+ else
+ snprintf(err_str, sizeof(err_str),
+ "auto-delete is already disabled");
+ *op_errno = EINVAL;
+ goto out;
+ }
+ } else if (dict_getn(dict, GLUSTERD_STORE_KEY_SNAP_ACTIVATE,
+ SLEN(GLUSTERD_STORE_KEY_SNAP_ACTIVATE))) {
+ req_snap_activate = dict_get_str_boolean(
+ dict, GLUSTERD_STORE_KEY_SNAP_ACTIVATE, _gf_false);
+ if (req_snap_activate < 0) {
+ ret = -1;
+ snprintf(err_str, sizeof(err_str),
+ "Please enter a "
+ "valid boolean value for activate-on-create");
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ /* Ignoring the error as the activate-on-create is optional and
+ might not be present in the options dictionary.*/
+ cur_snap_activate = dict_get_str_boolean(
+ conf->opts, GLUSTERD_STORE_KEY_SNAP_ACTIVATE, _gf_false);
+
+ if (cur_snap_activate == req_snap_activate) {
+ ret = -1;
+ if (cur_snap_activate == _gf_true)
+ snprintf(err_str, sizeof(err_str),
+ "activate-on-create is already enabled");
+ else
+ snprintf(err_str, sizeof(err_str),
+ "activate-on-create is already disabled");
+ *op_errno = EINVAL;
+ goto out;
+ }
+ } else {
+ ret = -1;
+ snprintf(err_str, sizeof(err_str), "Invalid option");
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ if (ret && err_str[0] != '\0') {
+ gf_msg(this->name, loglevel, 0, GD_MSG_SNAPSHOT_OP_FAILED, "%s",
+ err_str);
+ *op_errstr = gf_strdup(err_str);
+ }
+
+ return ret;
+}
+
+/* This function will be called from RPC handler routine.
+ * This function is responsible for getting the requested
+ * snapshot config into the dictionary.
+ *
+ * @param req RPC request object. Required for sending a response back.
+ * @param op glusterd operation. Required for sending a response back.
+ * @param dict pointer to dictionary which will contain both
+ * request and response key-pair values.
+ * @return -1 on error and 0 on success
+ */
+int
+glusterd_handle_snapshot_config(rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len)
+{
+ int32_t ret = -1;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ int config_command = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_VALIDATE_OR_GOTO(this->name, req, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+ /* TODO : Type of lock to be taken when we are setting
+ * limits system wide
+ */
+ ret = dict_get_int32n(dict, "config-command", SLEN("config-command"),
+ &config_command);
+ if (ret) {
+ snprintf(err_str, len, "Failed to get config-command type");
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=config-command", NULL);
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+
+ switch (config_command) {
+ case GF_SNAP_CONFIG_TYPE_SET:
+ if (!volname) {
+ ret = dict_set_int32n(dict, "hold_vol_locks",
+ SLEN("hold_vol_locks"), _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set hold_vol_locks value "
+ "as _gf_false");
+ goto out;
+ }
+ }
+ ret = glusterd_mgmt_v3_initiate_all_phases(req, op, dict);
+ break;
+ case GF_SNAP_CONFIG_DISPLAY:
+ /* Reading data from local node only */
+ ret = snap_max_limits_display_commit(dict, volname, err_str, len);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HARD_LIMIT_SET_FAIL,
+ "snap-max-limit "
+ "display commit failed.");
+ goto out;
+ }
+
+ /* If everything is successful then send the response
+ * back to cli
+ */
+ ret = glusterd_op_send_cli_response(op, 0, 0, req, dict, err_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_CLI_RESP,
+ "Failed to send cli "
+ "response");
+ goto out;
+ }
+
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_COMMAND_NOT_FOUND,
+ "Unknown config type");
+ ret = -1;
+ break;
+ }
+out:
+ return ret;
+}
+int
+glusterd_snap_create_clone_pre_val_use_rsp_dict(dict_t *dst, dict_t *src)
+{
+ char *snap_brick_dir = NULL;
+ char *snap_device = NULL;
+ char key[64] = "";
+ int keylen;
+ char *value = "";
+ char snapbrckcnt[PATH_MAX] = "";
+ char snapbrckord[PATH_MAX] = "";
+ int ret = -1;
+ int64_t i = -1;
+ int64_t j = -1;
+ int64_t volume_count = 0;
+ int64_t brick_count = 0;
+ int64_t brick_order = 0;
+ xlator_t *this = NULL;
+ int32_t brick_online = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dst);
+ GF_ASSERT(src);
+
+ ret = dict_get_int64(src, "volcount", &volume_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to "
+ "get the volume count");
+ goto out;
+ }
+
+ for (i = 0; i < volume_count; i++) {
+ ret = snprintf(snapbrckcnt, sizeof(snapbrckcnt) - 1,
+ "vol%" PRId64 "_brickcount", i + 1);
+ ret = dict_get_int64(src, snapbrckcnt, &brick_count);
+ if (ret) {
+ gf_msg_trace(this->name, 0,
+ "No bricks for this volume in this dict");
+ continue;
+ }
+
+ for (j = 0; j < brick_count; j++) {
+ /* Fetching data from source dict */
+ snprintf(key, sizeof(key), "vol%" PRId64 ".brickdir%" PRId64, i + 1,
+ j);
+ ret = dict_get_ptr(src, key, (void **)&snap_brick_dir);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch %s", key);
+ continue;
+ }
+
+ /* Fetching brick order from source dict */
+ snprintf(snapbrckord, sizeof(snapbrckord) - 1,
+ "vol%" PRId64 ".brick%" PRId64 ".order", i + 1, j);
+ ret = dict_get_int64(src, snapbrckord, &brick_order);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get brick order");
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "vol%" PRId64 ".brickdir%" PRId64, i + 1,
+ brick_order);
+ ret = dict_set_dynstr_with_alloc(dst, key, snap_brick_dir);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "vol%" PRId64 ".fstype%" PRId64,
+ i + 1, j);
+ ret = dict_get_strn(src, key, keylen, &value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch %s", key);
+ continue;
+ }
+
+ snprintf(key, sizeof(key), "vol%" PRId64 ".fstype%" PRId64, i + 1,
+ brick_order);
+ ret = dict_set_dynstr_with_alloc(dst, key, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key),
+ "vol%" PRId64 ".mnt_opts%" PRId64, i + 1, j);
+ ret = dict_get_strn(src, key, keylen, &value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch %s", key);
+ continue;
+ }
+
+ snprintf(key, sizeof(key), "vol%" PRId64 ".mnt_opts%" PRId64, i + 1,
+ brick_order);
+ ret = dict_set_dynstr_with_alloc(dst, key, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key),
+ "vol%" PRId64 ".brick_snapdevice%" PRId64, i + 1, j);
+ ret = dict_get_ptr(src, key, (void **)&snap_device);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snap_device");
+ goto out;
+ }
+
+ snprintf(key, sizeof(key),
+ "vol%" PRId64 ".brick_snapdevice%" PRId64, i + 1,
+ brick_order);
+ ret = dict_set_dynstr_with_alloc(dst, key, snap_device);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key),
+ "vol%" PRId64 ".brick%" PRId64 ".status", i + 1,
+ brick_order);
+ ret = dict_get_int32n(src, key, keylen, &brick_online);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to "
+ "get the brick status");
+ goto out;
+ }
+
+ ret = dict_set_int32n(dst, key, keylen, brick_online);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to "
+ "set the brick status");
+ goto out;
+ }
+ brick_online = 0;
+ }
+ }
+ ret = 0;
+out:
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Aggregate brickinfo's of the snap volumes to be restored from */
+int32_t
+glusterd_snap_restore_use_rsp_dict(dict_t *dst, dict_t *src)
+{
+ char key[64] = "";
+ int keylen;
+ char *strvalue = NULL;
+ int32_t value = -1;
+ int32_t i = -1;
+ int32_t j = -1;
+ int32_t vol_count = -1;
+ int32_t brickcount = -1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!dst || !src) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = dict_get_int32(src, "volcount", &vol_count);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "No volumes");
+ ret = 0;
+ goto out;
+ }
+
+ for (i = 1; i <= vol_count; i++) {
+ keylen = snprintf(key, sizeof(key), "snap%d.brick_count", i);
+ ret = dict_get_int32n(src, key, keylen, &brickcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get %s", key);
+ goto out;
+ }
+
+ for (j = 1; j <= brickcount; j++) {
+ keylen = snprintf(key, sizeof(key), "snap%d.brick%d.path", i, j);
+ ret = dict_get_strn(src, key, keylen, &strvalue);
+ if (ret) {
+ /* The brickinfo will be present in
+ * another rsp_dict */
+ gf_msg_debug(this->name, 0, "%s not present", key);
+ ret = 0;
+ continue;
+ }
+ ret = dict_set_dynstr_with_alloc(dst, key, strvalue);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to set %s", key);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "snap%d.brick%d.snap_status", i,
+ j);
+ ret = dict_get_int32n(src, key, keylen, &value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get %s", key);
+ goto out;
+ }
+ ret = dict_set_int32n(dst, key, keylen, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "snap%d.brick%d.device_path", i,
+ j);
+ ret = dict_get_strn(src, key, keylen, &strvalue);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get %s", key);
+ goto out;
+ }
+ ret = dict_set_dynstr_with_alloc(dst, key, strvalue);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to set %s", key);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "snap%d.brick%d.fs_type", i, j);
+ ret = dict_get_strn(src, key, keylen, &strvalue);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get %s", key);
+ goto out;
+ }
+ ret = dict_set_dynstr_with_alloc(dst, key, strvalue);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to set %s", key);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "snap%d.brick%d.mnt_opts", i,
+ j);
+ ret = dict_get_strn(src, key, keylen, &strvalue);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get %s", key);
+ goto out;
+ }
+ ret = dict_set_dynstr_with_alloc(dst, key, strvalue);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to set %s", key);
+ goto out;
+ }
+ }
+ }
+
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_snap_pre_validate_use_rsp_dict(dict_t *dst, dict_t *src)
+{
+ int ret = -1;
+ int32_t snap_command = 0;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!dst || !src) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "Source or Destination "
+ "dict is empty.");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dst, "type", SLEN("type"), &snap_command);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ ret = glusterd_snap_create_clone_pre_val_use_rsp_dict(dst, src);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to use "
+ "rsp dict");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_snap_restore_use_rsp_dict(dst, src);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RSP_DICT_USE_FAIL,
+ "Unable to use "
+ "rsp dict");
+ goto out;
+ }
+ break;
+ default:
+ break;
+ }
+
+ ret = 0;
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_add_brick_status_to_dict(dict_t *dict, glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ char *key_prefix)
+{
+ char pidfile[PATH_MAX] = "";
+ int32_t brick_online = 0;
+ pid_t pid = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ int ret = -1;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(volinfo);
+ GF_ASSERT(brickinfo);
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ if (!key_prefix) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "key prefix is NULL");
+ goto out;
+ }
+
+ GLUSTERD_GET_BRICK_PIDFILE(pidfile, volinfo, brickinfo, conf);
+
+ brick_online = gf_is_service_running(pidfile, &pid);
+
+ ret = dict_set_int32(dict, key_prefix, brick_online);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key_prefix);
+ goto out;
+ }
+ brick_online = 0;
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* This function will check whether the given device
+ * is a thinly provisioned LV or not.
+ *
+ * @param device LV device path
+ *
+ * @return _gf_true if LV is thin else _gf_false
+ */
+gf_boolean_t
+glusterd_is_thinp_brick(char *device, uint32_t *op_errno)
+{
+ int ret = -1;
+ char msg[1024] = "";
+ char pool_name[PATH_MAX] = "";
+ char *ptr = NULL;
+ xlator_t *this = NULL;
+ runner_t runner = {
+ 0,
+ };
+ gf_boolean_t is_thin = _gf_false;
+
+ this = THIS;
+
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, device, out);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ snprintf(msg, sizeof(msg), "Get thin pool name for device %s", device);
+
+ runinit(&runner);
+
+ runner_add_args(&runner, "lvs", "--noheadings", "-o", "pool_lv", device,
+ NULL);
+ runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
+ runner_log(&runner, this->name, GF_LOG_DEBUG, msg);
+
+ ret = runner_start(&runner);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_TPOOL_GET_FAIL,
+ "Failed to get thin pool "
+ "name for device %s",
+ device);
+ runner_end(&runner);
+ goto out;
+ }
+
+ ptr = fgets(pool_name, sizeof(pool_name),
+ runner_chio(&runner, STDOUT_FILENO));
+ if (!ptr || !strlen(pool_name)) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_TPOOL_GET_FAIL,
+ "Failed to get pool name "
+ "for device %s",
+ device);
+ runner_end(&runner);
+ ret = -1;
+ goto out;
+ }
+
+ runner_end(&runner);
+
+ /* Trim all the whitespaces. */
+ ptr = gf_trim(pool_name);
+
+ /* If the LV has thin pool associated with this
+ * then it is a thinly provisioned LV else it is
+ * regular LV */
+ if (0 != ptr[0]) {
+ is_thin = _gf_true;
+ }
+
+out:
+ if (!is_thin)
+ *op_errno = EG_NOTTHINP;
+
+ return is_thin;
+}
+
+int
+glusterd_snap_create_clone_common_prevalidate(
+ dict_t *rsp_dict, int flags, char *snapname, char *err_str,
+ char *snap_volname, int64_t volcount, glusterd_volinfo_t *volinfo,
+ gf_loglevel_t *loglevel, int clone, uint32_t *op_errno)
+{
+ char *device = NULL;
+ char *orig_device = NULL;
+ char key[128] = "";
+ int ret = -1;
+ int64_t i = 1;
+ int64_t brick_order = 0;
+ int64_t brick_count = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT(conf);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ if (!snapname || !volinfo) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Failed to validate "
+ "snapname or volume information");
+ ret = -1;
+ goto out;
+ }
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
+ brick_order++;
+ continue;
+ }
+
+ if (!glusterd_is_brick_started(brickinfo)) {
+ if (!clone && (flags & GF_CLI_FLAG_OP_FORCE)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_DISCONNECTED,
+ "brick %s:%s is not started", brickinfo->hostname,
+ brickinfo->path);
+ brick_order++;
+ brick_count++;
+ continue;
+ }
+ if (!clone) {
+ snprintf(err_str, PATH_MAX,
+ "One or more bricks are not running. "
+ "Please run volume status command to see "
+ "brick status.\n"
+ "Please start the stopped brick "
+ "and then issue snapshot create "
+ "command or use [force] option in "
+ "snapshot create to override this "
+ "behavior.");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_NOT_RUNNING,
+ "Please run volume status command to see brick "
+ "status.Please start the stopped brick and then issue "
+ "snapshot create command or use 'force' option in "
+ "snapshot create to override this behavior.",
+ NULL);
+ } else {
+ snprintf(err_str, PATH_MAX,
+ "One or more bricks are not running. "
+ "Please run snapshot status command to see "
+ "brick status.\n"
+ "Please start the stopped brick "
+ "and then issue snapshot clone "
+ "command ");
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRICK_NOT_RUNNING,
+ "Please run snapshot status command to see brick "
+ "status. Please start the stopped brick and then issue "
+ "snapshot clone command.",
+ NULL);
+ }
+ *op_errno = EG_BRCKDWN;
+ ret = -1;
+ goto out;
+ }
+
+ orig_device = glusterd_get_brick_mount_device(brickinfo->path);
+ if (!orig_device) {
+ len = snprintf(err_str, PATH_MAX,
+ "getting device name for the brick "
+ "%s:%s failed",
+ brickinfo->hostname, brickinfo->path);
+ if (len < 0) {
+ strcpy(err_str, "<error>");
+ }
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_BRK_MNTPATH_GET_FAIL,
+ "Brick_hostname=%s, Brick_path=%s", brickinfo->hostname,
+ brickinfo->path, NULL);
+ ret = -1;
+ goto out;
+ }
+ if (!clone) {
+ if (!glusterd_is_thinp_brick(orig_device, op_errno)) {
+ snprintf(err_str, PATH_MAX,
+ "Snapshot is supported only for "
+ "thin provisioned LV. Ensure that "
+ "all bricks of %s are thinly "
+ "provisioned LV.",
+ volinfo->volname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_SNAPSHOT_NOT_THIN_PROVISIONED,
+ "Ensure that all bricks of volume are thinly "
+ "provisioned LV, Volume=%s",
+ volinfo->volname, NULL);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ device = glusterd_build_snap_device_path(orig_device, snap_volname,
+ brick_count);
+ if (!device) {
+ snprintf(err_str, PATH_MAX,
+ "cannot copy the snapshot device "
+ "name (volname: %s, snapname: %s)",
+ volinfo->volname, snapname);
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_SNAP_DEVICE_NAME_GET_FAIL, "Volname=%s, Snapname=%s",
+ volinfo->volname, snapname, NULL);
+ *loglevel = GF_LOG_WARNING;
+ ret = -1;
+ goto out;
+ }
+
+ GF_FREE(orig_device);
+ orig_device = NULL;
+
+ snprintf(key, sizeof(key), "vol%" PRId64 ".brick_snapdevice%" PRId64, i,
+ brick_count);
+ ret = dict_set_dynstr_with_alloc(rsp_dict, key, device);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ ret = glusterd_update_mntopts(brickinfo->path, brickinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MOUNTOPTS_FAIL,
+ "Failed to "
+ "update mount options for %s brick",
+ brickinfo->path);
+ }
+
+ snprintf(key, sizeof(key), "vol%" PRId64 ".fstype%" PRId64, i,
+ brick_count);
+ ret = dict_set_dynstr_with_alloc(rsp_dict, key, brickinfo->fstype);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "vol%" PRId64 ".mnt_opts%" PRId64, i,
+ brick_count);
+ ret = dict_set_dynstr_with_alloc(rsp_dict, key, brickinfo->mnt_opts);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "vol%" PRId64 ".brickdir%" PRId64, i,
+ brick_count);
+ ret = dict_set_dynstr_with_alloc(rsp_dict, key, brickinfo->mount_dir);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key) - 1, "vol%" PRId64 ".brick%" PRId64 ".order",
+ i, brick_count);
+ ret = dict_set_int64(rsp_dict, key, brick_order);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "vol%" PRId64 ".brick%" PRId64 ".status", i,
+ brick_order);
+
+ ret = glusterd_add_brick_status_to_dict(rsp_dict, volinfo, brickinfo,
+ key);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to "
+ "add brick status to dict");
+ goto out;
+ }
+ brick_count++;
+ brick_order++;
+ if (device) {
+ GF_FREE(device);
+ device = NULL;
+ }
+ }
+ snprintf(key, sizeof(key) - 1, "vol%" PRId64 "_brickcount", volcount);
+ ret = dict_set_int64(rsp_dict, key, brick_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+ ret = 0;
+out:
+ if (orig_device)
+ GF_FREE(orig_device);
+
+ if (device)
+ GF_FREE(device);
+
+ return ret;
+}
+
+int
+glusterd_snapshot_clone_prevalidate(dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict, uint32_t *op_errno)
+{
+ char *clonename = NULL;
+ char *snapname = NULL;
+ char device_name[64] = "";
+ glusterd_snap_t *snap = NULL;
+ char err_str[PATH_MAX] = "";
+ int ret = -1;
+ int64_t volcount = 1;
+ glusterd_volinfo_t *snap_vol = NULL;
+ xlator_t *this = NULL;
+ uuid_t *snap_volid = NULL;
+ gf_loglevel_t loglevel = GF_LOG_ERROR;
+ glusterd_volinfo_t *volinfo = NULL;
+
+ this = THIS;
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(dict);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ ret = dict_get_strn(dict, "clonename", SLEN("clonename"), &clonename);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to "
+ "get the clone name");
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str), "Failed to get snapname");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(clonename, &volinfo);
+ if (!ret) {
+ ret = -1;
+ snprintf(err_str, sizeof(err_str),
+ "Volume with name:%s "
+ "already exists",
+ clonename);
+ *op_errno = EG_VOLEXST;
+ goto out;
+ }
+ /* need to find snap volinfo*/
+ snap = glusterd_find_snap_by_name(snapname);
+ if (!snap) {
+ ret = -1;
+ snprintf(err_str, sizeof(err_str),
+ "Failed to find :%s "
+ "snap",
+ snapname);
+ goto out;
+ }
+
+ /* TODO : As of now there is only one volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ snap_vol = list_entry(snap->volumes.next, glusterd_volinfo_t, vol_list);
+ if (!snap_vol) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get snap "
+ "volinfo %s",
+ snap->snapname);
+ goto out;
+ }
+
+ if (!glusterd_is_volume_started(snap_vol)) {
+ snprintf(err_str, sizeof(err_str),
+ "Snapshot %s is "
+ "not activated",
+ snap->snapname);
+ loglevel = GF_LOG_WARNING;
+ *op_errno = EG_VOLSTP;
+ goto out;
+ }
+
+ ret = dict_get_bin(dict, "vol1_volid", (void **)&snap_volid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snap_volid");
+ goto out;
+ }
+
+ GLUSTERD_GET_UUID_NOHYPHEN(device_name, *snap_volid);
+
+ /* Adding snap bricks mount paths to the dict */
+ ret = glusterd_snap_create_clone_common_prevalidate(
+ rsp_dict, 0, snapname, err_str, device_name, 1, snap_vol, &loglevel, 1,
+ op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL,
+ "Failed to pre validate");
+ goto out;
+ }
+
+ ret = dict_set_int64(rsp_dict, "volcount", volcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set volcount");
+ goto out;
+ }
+
+out:
+
+ if (ret && err_str[0] != '\0') {
+ gf_msg(this->name, loglevel, 0, GD_MSG_SNAP_CLONE_PREVAL_FAILED, "%s",
+ err_str);
+ *op_errstr = gf_strdup(err_str);
+ }
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_snapshot_create_prevalidate(dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict, uint32_t *op_errno)
+{
+ char *volname = NULL;
+ char *snapname = NULL;
+ char key[64] = "";
+ int keylen;
+ char snap_volname[64] = "";
+ char err_str[PATH_MAX] = "";
+ int ret = -1;
+ int64_t i = 0;
+ int64_t volcount = 0;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ uuid_t *snap_volid = NULL;
+ gf_loglevel_t loglevel = GF_LOG_ERROR;
+ glusterd_conf_t *conf = NULL;
+ int64_t effective_max_limit = 0;
+ int flags = 0;
+ uint64_t opt_hard_max = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+ char *description = NULL;
+
+ this = THIS;
+ GF_ASSERT(op_errstr);
+ conf = this->private;
+ GF_ASSERT(conf);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ ret = dict_get_int64(dict, "volcount", &volcount);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to "
+ "get the volume count");
+ goto out;
+ }
+ if (volcount <= 0) {
+ snprintf(err_str, sizeof(err_str),
+ "Invalid volume count %" PRId64 " supplied", volcount);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str), "Failed to get snapname");
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "description", SLEN("description"), &description);
+ if (description && !(*description)) {
+ /* description should have a non-null value */
+ ret = -1;
+ snprintf(err_str, sizeof(err_str),
+ "Snapshot cannot be "
+ "created with empty description");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "flags", SLEN("flags"), &flags);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get flags");
+ goto out;
+ }
+
+ if (glusterd_find_snap_by_name(snapname)) {
+ ret = -1;
+ snprintf(err_str, sizeof(err_str),
+ "Snapshot %s already "
+ "exists",
+ snapname);
+ *op_errno = EG_SNAPEXST;
+ goto out;
+ }
+
+ for (i = 1; i <= volcount; i++) {
+ keylen = snprintf(key, sizeof(key), "volname%" PRId64, i);
+ ret = dict_get_strn(dict, key, keylen, &volname);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str), "failed to get volume name");
+ goto out;
+ }
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(err_str, sizeof(err_str), "Volume (%s) does not exist ",
+ volname);
+ *op_errno = EG_NOVOL;
+ goto out;
+ }
+
+ ret = -1;
+ if (!glusterd_is_volume_started(volinfo)) {
+ snprintf(err_str, sizeof(err_str),
+ "volume %s is "
+ "not started",
+ volinfo->volname);
+ loglevel = GF_LOG_WARNING;
+ *op_errno = EG_VOLSTP;
+ goto out;
+ }
+
+ if (glusterd_is_defrag_on(volinfo)) {
+ snprintf(err_str, sizeof(err_str),
+ "rebalance process is running for the "
+ "volume %s",
+ volname);
+ loglevel = GF_LOG_WARNING;
+ *op_errno = EG_RBALRUN;
+ goto out;
+ }
+
+ if (gd_vol_is_geo_rep_active(volinfo)) {
+ snprintf(err_str, sizeof(err_str),
+ "geo-replication session is running for "
+ "the volume %s. Session needs to be "
+ "stopped before taking a snapshot.",
+ volname);
+ loglevel = GF_LOG_WARNING;
+ *op_errno = EG_GEOREPRUN;
+ goto out;
+ }
+
+ if (volinfo->is_snap_volume == _gf_true) {
+ snprintf(err_str, sizeof(err_str), "Volume %s is a snap volume",
+ volname);
+ loglevel = GF_LOG_WARNING;
+ *op_errno = EG_ISSNAP;
+ goto out;
+ }
+
+ /* "snap-max-hard-limit" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ ret = dict_get_uint64(
+ conf->opts, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, &opt_hard_max);
+ if (ret) {
+ ret = 0;
+ gf_msg_debug(this->name, 0,
+ "%s is not present "
+ "in opts dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT);
+ }
+
+ if (volinfo->snap_max_hard_limit < opt_hard_max)
+ effective_max_limit = volinfo->snap_max_hard_limit;
+ else
+ effective_max_limit = opt_hard_max;
+
+ if (volinfo->snap_count >= effective_max_limit) {
+ ret = -1;
+ snprintf(err_str, sizeof(err_str),
+ "The number of existing snaps has reached "
+ "the effective maximum limit of %" PRIu64
+ ", "
+ "for the volume (%s). Please delete few "
+ "snapshots before taking further snapshots.",
+ effective_max_limit, volname);
+ loglevel = GF_LOG_WARNING;
+ *op_errno = EG_HRDLMT;
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "vol%" PRId64 "_volid", i);
+ ret = dict_get_bin(dict, key, (void **)&snap_volid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snap_volid");
+ goto out;
+ }
+
+ /* snap volume uuid is used as lvm snapshot name.
+ This will avoid restrictions on snapshot names
+ provided by user */
+ GLUSTERD_GET_UUID_NOHYPHEN(snap_volname, *snap_volid);
+
+ ret = glusterd_snap_create_clone_common_prevalidate(
+ rsp_dict, flags, snapname, err_str, snap_volname, i, volinfo,
+ &loglevel, 0, op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_PRE_VALIDATION_FAIL,
+ "Failed to pre validate");
+ goto out;
+ }
+ }
+
+ ret = dict_set_int64(rsp_dict, "volcount", volcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set volcount");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (ret && err_str[0] != '\0') {
+ gf_msg(this->name, loglevel, 0, GD_MSG_SNAPSHOT_OP_FAILED, "%s",
+ err_str);
+ *op_errstr = gf_strdup(err_str);
+ }
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+glusterd_snap_t *
+glusterd_new_snap_object()
+{
+ glusterd_snap_t *snap = NULL;
+
+ snap = GF_CALLOC(1, sizeof(*snap), gf_gld_mt_snap_t);
+
+ if (snap) {
+ if (LOCK_INIT(&snap->lock)) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_LOCK_INIT_FAILED,
+ "Failed initiating"
+ " snap lock");
+ GF_FREE(snap);
+ return NULL;
+ }
+
+ CDS_INIT_LIST_HEAD(&snap->snap_list);
+ CDS_INIT_LIST_HEAD(&snap->volumes);
+ snap->snapname[0] = 0;
+ snap->snap_status = GD_SNAP_STATUS_INIT;
+ }
+
+ return snap;
+};
+
+/* Function glusterd_list_add_snapvol adds the volinfo object (snapshot volume)
+ to the snapshot object list and to the parent volume list */
+int32_t
+glusterd_list_add_snapvol(glusterd_volinfo_t *origin_vol,
+ glusterd_volinfo_t *snap_vol)
+{
+ int ret = -1;
+ glusterd_snap_t *snap = NULL;
+
+ GF_VALIDATE_OR_GOTO("glusterd", origin_vol, out);
+ GF_VALIDATE_OR_GOTO("glusterd", snap_vol, out);
+
+ snap = snap_vol->snapshot;
+ GF_ASSERT(snap);
+
+ cds_list_add_tail(&snap_vol->vol_list, &snap->volumes);
+ LOCK(&origin_vol->lock);
+ {
+ glusterd_list_add_order(&snap_vol->snapvol_list,
+ &origin_vol->snap_volumes,
+ glusterd_compare_snap_vol_time);
+
+ origin_vol->snap_count++;
+ }
+ UNLOCK(&origin_vol->lock);
+
+ gf_msg_debug(THIS->name, 0, "Snapshot %s added to the list",
+ snap->snapname);
+ ret = 0;
+out:
+ return ret;
+}
+
+glusterd_snap_t *
+glusterd_find_snap_by_name(char *snapname)
+{
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(snapname);
+
+ cds_list_for_each_entry(snap, &priv->snapshots, snap_list)
+ {
+ if (!strcmp(snap->snapname, snapname)) {
+ gf_msg_debug(THIS->name, 0,
+ "Found "
+ "snap %s (%s)",
+ snap->snapname, uuid_utoa(snap->snap_id));
+ goto out;
+ }
+ }
+ snap = NULL;
+out:
+ return snap;
+}
+
+glusterd_snap_t *
+glusterd_find_snap_by_id(uuid_t snap_id)
+{
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT(priv);
+
+ if (gf_uuid_is_null(snap_id))
+ goto out;
+
+ cds_list_for_each_entry(snap, &priv->snapshots, snap_list)
+ {
+ if (!gf_uuid_compare(snap->snap_id, snap_id)) {
+ gf_msg_debug(THIS->name, 0,
+ "Found "
+ "snap %s (%s)",
+ snap->snapname, uuid_utoa(snap->snap_id));
+ goto out;
+ }
+ }
+ snap = NULL;
+out:
+ return snap;
+}
+
+int
+glusterd_do_lvm_snapshot_remove(glusterd_volinfo_t *snap_vol,
+ glusterd_brickinfo_t *brickinfo,
+ const char *mount_pt, const char *snap_device)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {
+ 0,
+ };
+ char msg[1024] = "";
+ char pidfile[PATH_MAX] = "";
+ pid_t pid = -1;
+ int retry_count = 0;
+ char *mnt_pt = NULL;
+ gf_boolean_t unmount = _gf_true;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ if (!brickinfo) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "brickinfo NULL");
+ goto out;
+ }
+ GF_ASSERT(snap_vol);
+ GF_ASSERT(mount_pt);
+ GF_ASSERT(snap_device);
+
+ GLUSTERD_GET_BRICK_PIDFILE(pidfile, snap_vol, brickinfo, priv);
+ if (gf_is_service_running(pidfile, &pid)) {
+ (void)send_attach_req(this, brickinfo->rpc, brickinfo->path, NULL, NULL,
+ GLUSTERD_BRICK_TERMINATE);
+ brickinfo->status = GF_BRICK_STOPPED;
+ }
+
+ /* Check if the brick is mounted and then try unmounting the brick */
+ ret = glusterd_get_brick_root(brickinfo->path, &mnt_pt);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_PATH_UNMOUNTED,
+ "Getting the root "
+ "of the brick for volume %s (snap %s) failed. "
+ "Removing lv (%s).",
+ snap_vol->volname, snap_vol->snapshot->snapname, snap_device);
+ /* The brick path is already unmounted. Remove the lv only *
+ * Need not fail the operation */
+ ret = 0;
+ unmount = _gf_false;
+ }
+
+ if ((unmount == _gf_true) && (strcmp(mnt_pt, mount_pt))) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_PATH_UNMOUNTED,
+ "Lvm is not mounted for brick %s:%s. "
+ "Removing lv (%s).",
+ brickinfo->hostname, brickinfo->path, snap_device);
+ /* The brick path is already unmounted. Remove the lv only *
+ * Need not fail the operation */
+ unmount = _gf_false;
+ }
+
+ /* umount cannot be done when the brick process is still in the process
+ of shutdown, so give three re-tries */
+ while ((unmount == _gf_true) && (retry_count < 3)) {
+ retry_count++;
+ /*umount2 system call doesn't cleanup mtab entry after un-mount.
+ So use external umount command*/
+ ret = glusterd_umount(mount_pt);
+ if (!ret)
+ break;
+
+ gf_msg_debug(this->name, 0,
+ "umount failed for "
+ "path %s (brick: %s): %s. Retry(%d)",
+ mount_pt, brickinfo->path, strerror(errno), retry_count);
+
+ /*
+ * This used to be one second, but that wasn't long enough
+ * to get past the spurious EPERM errors that prevent some
+ * tests (especially bug-1162462.t) from passing reliably.
+ *
+ * TBD: figure out where that garbage is coming from
+ */
+ sleep(3);
+ }
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNOUNT_FAILED,
+ "umount failed for "
+ "path %s (brick: %s): %s.",
+ mount_pt, brickinfo->path, strerror(errno));
+ /*
+ * This is cheating, but necessary until we figure out how to
+ * shut down a brick within a still-living brick daemon so that
+ * random translators aren't keeping the mountpoint alive.
+ *
+ * TBD: figure out a real solution
+ */
+ ret = 0;
+ goto out;
+ }
+
+ runinit(&runner);
+ len = snprintf(msg, sizeof(msg),
+ "remove snapshot of the brick %s:%s, "
+ "device: %s",
+ brickinfo->hostname, brickinfo->path, snap_device);
+ if (len < 0) {
+ strcpy(msg, "<error>");
+ }
+ runner_add_args(&runner, LVM_REMOVE, "-f", snap_device, NULL);
+ runner_log(&runner, "", GF_LOG_DEBUG, msg);
+
+ ret = runner_run(&runner);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "removing snapshot of the "
+ "brick (%s:%s) of device %s failed",
+ brickinfo->hostname, brickinfo->path, snap_device);
+ goto out;
+ }
+
+out:
+ if (mnt_pt)
+ GF_FREE(mnt_pt);
+
+ return ret;
+}
+
+int32_t
+glusterd_lvm_snapshot_remove(dict_t *rsp_dict, glusterd_volinfo_t *snap_vol)
+{
+ int32_t brick_count = -1;
+ int32_t ret = -1;
+ int32_t err = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+ char brick_dir[PATH_MAX] = "";
+ char snap_path[PATH_MAX] = "";
+ char *tmp = NULL;
+ char *brick_mount_path = NULL;
+ gf_boolean_t is_brick_dir_present = _gf_false;
+ struct stat stbuf = {
+ 0,
+ };
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(snap_vol);
+
+ if ((snap_vol->is_snap_volume == _gf_false) &&
+ (gf_uuid_is_null(snap_vol->restored_from_snap))) {
+ gf_msg_debug(this->name, 0,
+ "Not a snap volume, or a restored snap volume.");
+ ret = 0;
+ goto out;
+ }
+
+ brick_count = -1;
+ cds_list_for_each_entry(brickinfo, &snap_vol->bricks, brick_list)
+ {
+ brick_count++;
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
+ gf_msg_debug(this->name, 0, "%s:%s belongs to a different node",
+ brickinfo->hostname, brickinfo->path);
+ continue;
+ }
+
+ /* Fetch the brick mount path from the brickinfo->path */
+ ret = glusterd_find_brick_mount_path(brickinfo->path,
+ &brick_mount_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_GET_INFO_FAIL,
+ "Failed to find brick_mount_path for %s", brickinfo->path);
+ ret = 0;
+ continue;
+ }
+
+ /* As deactivated snapshot have no active mount point we
+ * check only for activated snapshot.
+ */
+ if (snap_vol->status == GLUSTERD_STATUS_STARTED) {
+ ret = sys_lstat(brick_mount_path, &stbuf);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Brick %s:%s already deleted.",
+ brickinfo->hostname, brickinfo->path);
+ ret = 0;
+ continue;
+ }
+ }
+
+ if (brickinfo->snap_status == -1) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SNAPSHOT_PENDING,
+ "snapshot was pending. lvm not present "
+ "for brick %s:%s of the snap %s.",
+ brickinfo->hostname, brickinfo->path,
+ snap_vol->snapshot->snapname);
+
+ if (rsp_dict && (snap_vol->is_snap_volume == _gf_true)) {
+ /* Adding missed delete to the dict */
+ ret = glusterd_add_missed_snaps_to_dict(
+ rsp_dict, snap_vol, brickinfo, brick_count + 1,
+ GF_SNAP_OPTION_TYPE_DELETE);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_CREATE_FAIL,
+ "Failed to add missed snapshot "
+ "info for %s:%s in the "
+ "rsp_dict",
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+ }
+
+ continue;
+ }
+
+ /* Check if the brick has a LV associated with it */
+ if (strlen(brickinfo->device_path) == 0) {
+ gf_msg_debug(this->name, 0,
+ "Brick (%s:%s) does not have a LV "
+ "associated with it. Removing the brick path",
+ brickinfo->hostname, brickinfo->path);
+ goto remove_brick_path;
+ }
+
+ /* Verify if the device path exists or not */
+ ret = sys_stat(brickinfo->device_path, &stbuf);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "LV (%s) for brick (%s:%s) not present. "
+ "Removing the brick path",
+ brickinfo->device_path, brickinfo->hostname,
+ brickinfo->path);
+ /* Making ret = 0 as absence of device path should *
+ * not fail the remove operation */
+ ret = 0;
+ goto remove_brick_path;
+ }
+
+ ret = glusterd_do_lvm_snapshot_remove(
+ snap_vol, brickinfo, brick_mount_path, brickinfo->device_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to "
+ "remove the snapshot %s (%s)",
+ brickinfo->path, brickinfo->device_path);
+ err = -1; /* We need to record this failure */
+ }
+
+ remove_brick_path:
+ /* After removing the brick dir fetch the parent path
+ * i.e /var/run/gluster/snaps/<snap-vol-id>/
+ */
+ if (is_brick_dir_present == _gf_false) {
+ /* Need to fetch brick_dir to be removed from
+ * brickinfo->path, as in a restored volume,
+ * snap_vol won't have the non-hyphenated snap_vol_id
+ */
+ tmp = strstr(brick_mount_path, "brick");
+ if (!tmp) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid brick %s", brickinfo->path);
+ GF_FREE(brick_mount_path);
+ brick_mount_path = NULL;
+ continue;
+ }
+
+ strncpy(brick_dir, brick_mount_path,
+ (size_t)(tmp - brick_mount_path));
+
+ /* Peers not hosting bricks will have _gf_false */
+ is_brick_dir_present = _gf_true;
+ }
+
+ GF_FREE(brick_mount_path);
+ brick_mount_path = NULL;
+ }
+
+ if (is_brick_dir_present == _gf_true) {
+ ret = recursive_rmdir(brick_dir);
+ if (ret) {
+ if (errno == ENOTEMPTY) {
+ /* Will occur when multiple glusterds
+ * are running in the same node
+ */
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_DIR_OP_FAILED,
+ "Failed to rmdir: %s, err: %s. "
+ "More than one glusterd running "
+ "on this node.",
+ brick_dir, strerror(errno));
+ ret = 0;
+ goto out;
+ } else
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Failed to rmdir: %s, err: %s", brick_dir,
+ strerror(errno));
+ goto out;
+ }
+
+ /* After removing brick_dir, fetch and remove snap path
+ * i.e. /var/run/gluster/snaps/<snap-name>.
+ */
+ if (!snap_vol->snapshot) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY,
+ "snapshot not"
+ "present in snap_vol");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf(snap_path, sizeof(snap_path), "%s/%s", snap_mount_dir,
+ snap_vol->snapshot->snapname);
+ ret = recursive_rmdir(snap_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Failed to remove "
+ "%s directory : error : %s",
+ snap_path, strerror(errno));
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ if (err) {
+ ret = err;
+ }
+ GF_FREE(brick_mount_path);
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_snap_volume_remove(dict_t *rsp_dict, glusterd_volinfo_t *snap_vol,
+ gf_boolean_t remove_lvm, gf_boolean_t force)
+{
+ int ret = -1;
+ int save_ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *origin_vol = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(snap_vol);
+
+ if (!snap_vol) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY,
+ "snap_vol in NULL");
+ ret = -1;
+ goto out;
+ }
+
+ cds_list_for_each_entry(brickinfo, &snap_vol->bricks, brick_list)
+ {
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID))
+ continue;
+
+ ret = glusterd_brick_stop(snap_vol, brickinfo, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_STOP_FAIL,
+ "Failed to stop "
+ "brick for volume %s",
+ snap_vol->volname);
+ save_ret = ret;
+
+ /* Don't clean up the snap on error when
+ force flag is disabled */
+ if (!force)
+ goto out;
+ }
+ }
+
+ /* Only remove the backend lvm when required */
+ if (remove_lvm) {
+ ret = glusterd_lvm_snapshot_remove(rsp_dict, snap_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to remove "
+ "lvm snapshot volume %s",
+ snap_vol->volname);
+ save_ret = ret;
+ if (!force)
+ goto out;
+ }
+ }
+
+ ret = glusterd_store_delete_volume(snap_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOL_DELETE_FAIL,
+ "Failed to remove volume %s "
+ "from store",
+ snap_vol->volname);
+ save_ret = ret;
+ if (!force)
+ goto out;
+ }
+
+ if (!cds_list_empty(&snap_vol->snapvol_list)) {
+ ret = glusterd_volinfo_find(snap_vol->parent_volname, &origin_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Failed to get "
+ "parent volinfo %s for volume %s",
+ snap_vol->parent_volname, snap_vol->volname);
+ save_ret = ret;
+ if (!force)
+ goto out;
+ }
+ origin_vol->snap_count--;
+ }
+
+ glusterd_volinfo_unref(snap_vol);
+
+ if (save_ret)
+ ret = save_ret;
+out:
+ gf_msg_trace(this->name, 0, "returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_snap_remove(dict_t *rsp_dict, glusterd_snap_t *snap,
+ gf_boolean_t remove_lvm, gf_boolean_t force,
+ gf_boolean_t is_clone)
+{
+ int ret = -1;
+ int save_ret = 0;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(snap);
+
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY,
+ "snap is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ cds_list_for_each_entry_safe(snap_vol, tmp, &snap->volumes, vol_list)
+ {
+ ret = glusterd_snap_volume_remove(rsp_dict, snap_vol, remove_lvm,
+ force);
+ if (ret && !force) {
+ /* Don't clean up the snap on error when
+ force flag is disabled */
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to remove "
+ "volinfo %s for snap %s",
+ snap_vol->volname, snap->snapname);
+ save_ret = ret;
+ goto out;
+ }
+ }
+
+ /* A clone does not persist snap info in /var/lib/glusterd/snaps/ *
+ * and hence there is no snap info to be deleted from there *
+ */
+ if (!is_clone) {
+ ret = glusterd_store_delete_snap(snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to remove snap %s from store", snap->snapname);
+ save_ret = ret;
+ if (!force)
+ goto out;
+ }
+ }
+
+ ret = glusterd_snapobject_delete(snap);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to delete "
+ "snap object %s",
+ snap->snapname);
+
+ if (save_ret)
+ ret = save_ret;
+out:
+ gf_msg_trace(THIS->name, 0, "returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_snapshot_get_snapvol_detail(dict_t *dict, glusterd_volinfo_t *snap_vol,
+ const char *keyprefix, const int detail)
+{
+ int ret = -1;
+ int snap_limit = 0;
+ char key[64] = ""; /* keyprefix is quite small, up to 32 byts */
+ int keylen;
+ char *value = NULL;
+ glusterd_volinfo_t *origin_vol = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ uint64_t opt_hard_max = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GF_ASSERT(dict);
+ GF_ASSERT(snap_vol);
+ GF_ASSERT(keyprefix);
+
+ /* Volume Name */
+ value = gf_strdup(snap_vol->volname);
+ if (!value)
+ goto out;
+
+ keylen = snprintf(key, sizeof(key), "%s.volname", keyprefix);
+ ret = dict_set_dynstrn(dict, key, keylen, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "volume name in dictionary: %s",
+ key);
+ goto out;
+ }
+
+ /* Volume ID */
+ value = gf_strdup(uuid_utoa(snap_vol->volume_id));
+ if (NULL == value) {
+ ret = -1;
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "%s.vol-id", keyprefix);
+ ret = dict_set_dynstrn(dict, key, keylen, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY,
+ "Failed to set "
+ "volume id in dictionary: %s",
+ key);
+ goto out;
+ }
+ value = NULL;
+
+ /* volume status */
+ keylen = snprintf(key, sizeof(key), "%s.vol-status", keyprefix);
+ switch (snap_vol->status) {
+ case GLUSTERD_STATUS_STARTED:
+ ret = dict_set_nstrn(dict, key, keylen, "Started", SLEN("Started"));
+ break;
+ case GLUSTERD_STATUS_STOPPED:
+ ret = dict_set_nstrn(dict, key, keylen, "Stopped", SLEN("Stopped"));
+ break;
+ case GD_SNAP_STATUS_NONE:
+ ret = dict_set_nstrn(dict, key, keylen, "None", SLEN("None"));
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid volume status");
+ ret = -1;
+ goto out;
+ }
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set volume status"
+ " in dictionary: %s",
+ key);
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(snap_vol->parent_volname, &origin_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "failed to get the parent "
+ "volinfo for the volume %s",
+ snap_vol->volname);
+ goto out;
+ }
+
+ /* "snap-max-hard-limit" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ ret = dict_get_uint64(conf->opts, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ &opt_hard_max);
+ if (ret) {
+ ret = 0;
+ gf_msg_debug(this->name, 0,
+ "%s is not present in "
+ "opts dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT);
+ }
+
+ if (opt_hard_max < origin_vol->snap_max_hard_limit) {
+ snap_limit = opt_hard_max;
+ gf_msg_debug(this->name, 0,
+ "system snap-max-hard-limit is"
+ " lesser than volume snap-max-hard-limit, "
+ "snap-max-hard-limit value is set to %d",
+ snap_limit);
+ } else {
+ snap_limit = origin_vol->snap_max_hard_limit;
+ gf_msg_debug(this->name, 0,
+ "volume snap-max-hard-limit is"
+ " lesser than system snap-max-hard-limit, "
+ "snap-max-hard-limit value is set to %d",
+ snap_limit);
+ }
+
+ keylen = snprintf(key, sizeof(key), "%s.snaps-available", keyprefix);
+ if (snap_limit > origin_vol->snap_count)
+ ret = dict_set_int32n(dict, key, keylen,
+ snap_limit - origin_vol->snap_count);
+ else
+ ret = dict_set_int32(dict, key, 0);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set available snaps");
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "%s.snapcount", keyprefix);
+ ret = dict_set_int32n(dict, key, keylen, origin_vol->snap_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save snapcount");
+ goto out;
+ }
+
+ if (!detail)
+ goto out;
+
+ /* Parent volume name */
+ value = gf_strdup(snap_vol->parent_volname);
+ if (!value)
+ goto out;
+
+ keylen = snprintf(key, sizeof(key), "%s.origin-volname", keyprefix);
+ ret = dict_set_dynstrn(dict, key, keylen, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set parent "
+ "volume name in dictionary: %s",
+ key);
+ goto out;
+ }
+ value = NULL;
+
+ ret = 0;
+out:
+ if (value)
+ GF_FREE(value);
+
+ return ret;
+}
+
+static int
+glusterd_snapshot_get_snap_detail(dict_t *dict, glusterd_snap_t *snap,
+ const char *keyprefix,
+ glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ int volcount = 0;
+ char key[32] = ""; /* keyprefix is quite small, up to 16 bytes */
+ int keylen;
+ char timestr[GF_TIMESTR_SIZE] = "";
+ char *value = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp_vol = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(snap);
+ GF_ASSERT(keyprefix);
+
+ /* Snap Name */
+ value = gf_strdup(snap->snapname);
+ if (!value)
+ goto out;
+
+ keylen = snprintf(key, sizeof(key), "%s.snapname", keyprefix);
+ ret = dict_set_dynstrn(dict, key, keylen, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "snap name in dictionary");
+ goto out;
+ }
+
+ /* Snap ID */
+ value = gf_strdup(uuid_utoa(snap->snap_id));
+ if (NULL == value) {
+ ret = -1;
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "%s.snap-id", keyprefix);
+ ret = dict_set_dynstrn(dict, key, keylen, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "snap id in dictionary");
+ goto out;
+ }
+ value = NULL;
+
+ gf_time_fmt(timestr, sizeof timestr, snap->time_stamp, gf_timefmt_FT);
+ value = gf_strdup(timestr);
+
+ if (NULL == value) {
+ ret = -1;
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "%s.snap-time", keyprefix);
+ ret = dict_set_dynstrn(dict, key, keylen, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "snap time stamp in dictionary");
+ goto out;
+ }
+ value = NULL;
+
+ /* If snap description is provided then add that into dictionary */
+ if (NULL != snap->description) {
+ value = gf_strdup(snap->description);
+ if (NULL == value) {
+ ret = -1;
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "%s.snap-desc", keyprefix);
+ ret = dict_set_dynstrn(dict, key, keylen, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "snap description in dictionary");
+ goto out;
+ }
+ value = NULL;
+ }
+
+ keylen = snprintf(key, sizeof(key), "%s.snap-status", keyprefix);
+ switch (snap->snap_status) {
+ case GD_SNAP_STATUS_INIT:
+ ret = dict_set_nstrn(dict, key, keylen, "Init", SLEN("Init"));
+ break;
+ case GD_SNAP_STATUS_IN_USE:
+ ret = dict_set_nstrn(dict, key, keylen, "In-use", SLEN("In-use"));
+ break;
+ case GD_SNAP_STATUS_DECOMMISSION:
+ ret = dict_set_nstrn(dict, key, keylen, "Decommisioned",
+ SLEN("Decommisioned"));
+ break;
+ case GD_SNAP_STATUS_RESTORED:
+ ret = dict_set_nstrn(dict, key, keylen, "Restored",
+ SLEN("Restored"));
+ break;
+ case GD_SNAP_STATUS_NONE:
+ ret = dict_set_nstrn(dict, key, keylen, "None", SLEN("None"));
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid snap status");
+ ret = -1;
+ goto out;
+ }
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap status "
+ "in dictionary");
+ goto out;
+ }
+
+ if (volinfo) {
+ volcount = 1;
+ snprintf(key, sizeof(key), "%s.vol%d", keyprefix, volcount);
+ ret = glusterd_snapshot_get_snapvol_detail(dict, volinfo, key, 0);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_DICT_GET_FAILED,
+ "Failed to "
+ "get volume detail %s for snap %s",
+ snap_vol->volname, snap->snapname);
+ goto out;
+ }
+ goto done;
+ }
+
+ cds_list_for_each_entry_safe(snap_vol, tmp_vol, &snap->volumes, vol_list)
+ {
+ volcount++;
+ snprintf(key, sizeof(key), "%s.vol%d", keyprefix, volcount);
+ ret = glusterd_snapshot_get_snapvol_detail(dict, snap_vol, key, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to "
+ "get volume detail %s for snap %s",
+ snap_vol->volname, snap->snapname);
+ goto out;
+ }
+ }
+
+done:
+ keylen = snprintf(key, sizeof(key), "%s.vol-count", keyprefix);
+ ret = dict_set_int32n(dict, key, keylen, volcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (value)
+ GF_FREE(value);
+
+ return ret;
+}
+
+static int
+glusterd_snapshot_get_all_snap_info(dict_t *dict)
+{
+ int ret = -1;
+ int snapcount = 0;
+ char key[16] = "";
+ glusterd_snap_t *snap = NULL;
+ glusterd_snap_t *tmp_snap = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ /* General parameter validation */
+ GF_ASSERT(dict);
+
+ cds_list_for_each_entry_safe(snap, tmp_snap, &priv->snapshots, snap_list)
+ {
+ snapcount++;
+ snprintf(key, sizeof(key), "snap%d", snapcount);
+ ret = glusterd_snapshot_get_snap_detail(dict, snap, key, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get "
+ "snapdetail for snap %s",
+ snap->snapname);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32n(dict, "snapcount", SLEN("snapcount"), snapcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snapcount");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_get_info_by_volume(dict_t *dict, char *volname, char *err_str,
+ size_t len)
+{
+ int ret = -1;
+ int snapcount = 0;
+ int snap_limit = 0;
+ char *value = NULL;
+ char key[16] = "";
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp_vol = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ uint64_t opt_hard_max = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GF_ASSERT(dict);
+ GF_ASSERT(volname);
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(err_str, len, "Volume (%s) does not exist", volname);
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, "%s",
+ err_str);
+ goto out;
+ }
+
+ /* "snap-max-hard-limit" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ ret = dict_get_uint64(conf->opts, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ &opt_hard_max);
+ if (ret) {
+ ret = 0;
+ gf_msg_debug(this->name, 0,
+ "%s is not present in "
+ "opts dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT);
+ }
+
+ if (opt_hard_max < volinfo->snap_max_hard_limit) {
+ snap_limit = opt_hard_max;
+ gf_msg_debug(this->name, 0,
+ "system snap-max-hard-limit is"
+ " lesser than volume snap-max-hard-limit, "
+ "snap-max-hard-limit value is set to %d",
+ snap_limit);
+ } else {
+ snap_limit = volinfo->snap_max_hard_limit;
+ gf_msg_debug(this->name, 0,
+ "volume snap-max-hard-limit is"
+ " lesser than system snap-max-hard-limit, "
+ "snap-max-hard-limit value is set to %d",
+ snap_limit);
+ }
+
+ if (snap_limit > volinfo->snap_count)
+ ret = dict_set_int32n(dict, "snaps-available", SLEN("snaps-available"),
+ snap_limit - volinfo->snap_count);
+ else
+ ret = dict_set_int32n(dict, "snaps-available", SLEN("snaps-available"),
+ 0);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set available snaps");
+ goto out;
+ }
+
+ /* Origin volume name */
+ value = gf_strdup(volinfo->volname);
+ if (!value)
+ goto out;
+
+ ret = dict_set_dynstrn(dict, "origin-volname", SLEN("origin-volname"),
+ value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set parent "
+ "volume name in dictionary: %s",
+ value);
+ goto out;
+ }
+ value = NULL;
+
+ cds_list_for_each_entry_safe(snap_vol, tmp_vol, &volinfo->snap_volumes,
+ snapvol_list)
+ {
+ snapcount++;
+ snprintf(key, sizeof(key), "snap%d", snapcount);
+ ret = glusterd_snapshot_get_snap_detail(dict, snap_vol->snapshot, key,
+ snap_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get "
+ "snapdetail for snap %s",
+ snap_vol->snapshot->snapname);
+ goto out;
+ }
+ }
+ ret = dict_set_int32n(dict, "snapcount", SLEN("snapcount"), snapcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snapcount");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (value)
+ GF_FREE(value);
+
+ return ret;
+}
+
+/* This function will be called from RPC handler routine.
+ * This function is responsible for getting the requested
+ * snapshot info into the dictionary.
+ *
+ * @param req RPC request object. Required for sending a response back.
+ * @param op glusterd operation. Required for sending a response back.
+ * @param dict pointer to dictionary which will contain both
+ * request and response key-pair values.
+ * @return -1 on error and 0 on success
+ */
+int
+glusterd_handle_snapshot_info(rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len)
+{
+ int ret = -1;
+ int8_t snap_driven = 1;
+ char *volname = NULL;
+ char *snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+ int32_t cmd = GF_SNAP_INFO_TYPE_ALL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_VALIDATE_OR_GOTO(this->name, req, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+
+ ret = dict_get_int32n(dict, "sub-cmd", SLEN("sub-cmd"), &cmd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get type "
+ "of snapshot info");
+ goto out;
+ }
+
+ switch (cmd) {
+ case GF_SNAP_INFO_TYPE_ALL: {
+ ret = glusterd_snapshot_get_all_snap_info(dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get info of all snaps");
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_INFO_TYPE_SNAP: {
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get snap name");
+ goto out;
+ }
+
+ ret = dict_set_int32n(dict, "snapcount", SLEN("snapcount"), 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snapcount");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name(snapname);
+ if (!snap) {
+ snprintf(err_str, len, "Snapshot (%s) does not exist",
+ snapname);
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND,
+ "%s", err_str);
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_snapshot_get_snap_detail(dict, snap, "snap1", NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND,
+ "Failed to get snap detail of snap "
+ "%s",
+ snap->snapname);
+ goto out;
+ }
+ break;
+ }
+
+ case GF_SNAP_INFO_TYPE_VOL: {
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Failed to get volname");
+ goto out;
+ }
+ ret = glusterd_snapshot_get_info_by_volume(dict, volname, err_str,
+ len);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Failed to get volume info of volume "
+ "%s",
+ volname);
+ goto out;
+ }
+ snap_driven = 0;
+ break;
+ }
+ }
+
+ ret = dict_set_int8(dict, "snap-driven", snap_driven);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap-driven");
+ goto out;
+ }
+
+ /* If everything is successful then send the response back to cli.
+ * In case of failure the caller of this function will take care
+ of the response */
+ ret = glusterd_op_send_cli_response(op, 0, 0, req, dict, err_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_CLI_RESP,
+ "Failed to send cli "
+ "response");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* This function sets all the snapshot names in the dictionary */
+int
+glusterd_snapshot_get_all_snapnames(dict_t *dict)
+{
+ int ret = -1;
+ int snapcount = 0;
+ char *snapname = NULL;
+ char key[64] = "";
+ int keylen;
+ glusterd_snap_t *snap = NULL;
+ glusterd_snap_t *tmp_snap = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(dict);
+
+ cds_list_for_each_entry_safe(snap, tmp_snap, &priv->snapshots, snap_list)
+ {
+ snapcount++;
+ snapname = gf_strdup(snap->snapname);
+ if (!snapname) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "strdup failed");
+ ret = -1;
+ goto out;
+ }
+ keylen = snprintf(key, sizeof(key), "snapname%d", snapcount);
+ ret = dict_set_dynstrn(dict, key, keylen, snapname);
+ if (ret) {
+ GF_FREE(snapname);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32n(dict, "snapcount", SLEN("snapcount"), snapcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snapcount");
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+
+/* This function sets all the snapshot names
+ under a given volume in the dictionary */
+int
+glusterd_snapshot_get_vol_snapnames(dict_t *dict, glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ int snapcount = 0;
+ char *snapname = NULL;
+ char key[32] = "";
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp_vol = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(dict);
+ GF_ASSERT(volinfo);
+
+ cds_list_for_each_entry_safe(snap_vol, tmp_vol, &volinfo->snap_volumes,
+ snapvol_list)
+ {
+ snapcount++;
+ snprintf(key, sizeof(key), "snapname%d", snapcount);
+
+ ret = dict_set_dynstr_with_alloc(dict, key,
+ snap_vol->snapshot->snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to "
+ "set %s",
+ key);
+ GF_FREE(snapname);
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32n(dict, "snapcount", SLEN("snapcount"), snapcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snapcount");
+ goto out;
+ }
+
+ ret = 0;
+out:
+
+ return ret;
+}
+
+int
+glusterd_handle_snapshot_list(rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len,
+ uint32_t *op_errno)
+{
+ int ret = -1;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_VALIDATE_OR_GOTO(this->name, req, out);
+ GF_VALIDATE_OR_GOTO(this->name, dict, out);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ /* Ignore error for getting volname as it is optional */
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+
+ if (NULL == volname) {
+ ret = glusterd_snapshot_get_all_snapnames(dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_LIST_GET_FAIL,
+ "Failed to get snapshot list");
+ goto out;
+ }
+ } else {
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(err_str, len, "Volume (%s) does not exist", volname);
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND, "%s",
+ err_str);
+ *op_errno = EG_NOVOL;
+ goto out;
+ }
+
+ ret = glusterd_snapshot_get_vol_snapnames(dict, volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_LIST_GET_FAIL,
+ "Failed to get snapshot list for volume %s", volname);
+ goto out;
+ }
+ }
+
+ /* If everything is successful then send the response back to cli.
+ In case of failure the caller of this function will take of response.*/
+ ret = glusterd_op_send_cli_response(op, 0, 0, req, dict, err_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_CLI_RESP,
+ "Failed to send cli "
+ "response");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* This is a snapshot create handler function. This function will be
+ * executed in the originator node. This function is responsible for
+ * calling mgmt_v3 framework to do the actual snap creation on all the bricks
+ *
+ * @param req RPC request object
+ * @param op gluster operation
+ * @param dict dictionary containing snapshot restore request
+ * @param err_str In case of an err this string should be populated
+ * @param len length of err_str buffer
+ *
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_handle_snapshot_create(rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len)
+{
+ int ret = -1;
+ char *volname = NULL;
+ char *snapname = NULL;
+ int64_t volcount = 0;
+ xlator_t *this = NULL;
+ char key[64] = "";
+ int keylen;
+ char *username = NULL;
+ char *password = NULL;
+ uuid_t *uuid_ptr = NULL;
+ uuid_t tmp_uuid = {0};
+ int i = 0;
+ int timestamp = 0;
+ char snap_volname[GD_VOLUME_NAME_MAX] = "";
+ char new_snapname[GLUSTERD_MAX_SNAP_NAME] = "";
+ char gmt_snaptime[GLUSTERD_MAX_SNAP_NAME] = "";
+ time_t snap_time;
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(dict);
+ GF_ASSERT(err_str);
+
+ ret = dict_get_int64(dict, "volcount", &volcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to "
+ "get the volume count");
+ goto out;
+ }
+ if (volcount <= 0) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid volume count %" PRId64 " supplied", volcount);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to get the snapname");
+ goto out;
+ }
+
+ timestamp = dict_get_str_boolean(dict, "no-timestamp", _gf_false);
+ if (timestamp == -1) {
+ gf_log(this->name, GF_LOG_ERROR,
+ "Failed to get "
+ "no-timestamp flag ");
+ goto out;
+ }
+
+ snap_time = gf_time();
+ ret = dict_set_int64(dict, "snap-time", (int64_t)snap_time);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap-time");
+ goto out;
+ }
+
+ if (!timestamp) {
+ strftime(gmt_snaptime, sizeof(gmt_snaptime), "_GMT-%Y.%m.%d-%H.%M.%S",
+ gmtime(&snap_time));
+ snprintf(new_snapname, sizeof(new_snapname), "%s%s", snapname,
+ gmt_snaptime);
+ ret = dict_set_dynstr_with_alloc(dict, "snapname", new_snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to update "
+ "snap-name");
+ goto out;
+ }
+ snapname = new_snapname;
+ }
+
+ if (strlen(snapname) >= GLUSTERD_MAX_SNAP_NAME) {
+ snprintf(err_str, len,
+ "snapname cannot exceed 255 "
+ "characters");
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY, "%s",
+ err_str);
+ ret = -1;
+ goto out;
+ }
+
+ uuid_ptr = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!uuid_ptr) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Out Of Memory");
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_generate(*uuid_ptr);
+ ret = dict_set_bin(dict, "snap-id", uuid_ptr, sizeof(uuid_t));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap-id");
+ GF_FREE(uuid_ptr);
+ goto out;
+ }
+ uuid_ptr = NULL;
+
+ for (i = 1; i <= volcount; i++) {
+ keylen = snprintf(key, sizeof(key), "volname%d", i);
+ ret = dict_get_strn(dict, key, keylen, &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get volume name");
+ goto out;
+ }
+
+ /* generate internal username and password for the snap*/
+ gf_uuid_generate(tmp_uuid);
+ username = gf_strdup(uuid_utoa(tmp_uuid));
+ keylen = snprintf(key, sizeof(key), "volume%d_username", i);
+ ret = dict_set_dynstrn(dict, key, keylen, username);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap "
+ "username for volume %s",
+ volname);
+ GF_FREE(username);
+ goto out;
+ }
+
+ gf_uuid_generate(tmp_uuid);
+ password = gf_strdup(uuid_utoa(tmp_uuid));
+ keylen = snprintf(key, sizeof(key), "volume%d_password", i);
+ ret = dict_set_dynstrn(dict, key, keylen, password);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap "
+ "password for volume %s",
+ volname);
+ GF_FREE(password);
+ goto out;
+ }
+
+ uuid_ptr = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!uuid_ptr) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Out Of Memory");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "vol%d_volid", i);
+ gf_uuid_generate(*uuid_ptr);
+ ret = dict_set_bin(dict, key, uuid_ptr, sizeof(uuid_t));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap_volid");
+ GF_FREE(uuid_ptr);
+ goto out;
+ }
+ GLUSTERD_GET_UUID_NOHYPHEN(snap_volname, *uuid_ptr);
+ snprintf(key, sizeof(key), "snap-volname%d", i);
+ ret = dict_set_dynstr_with_alloc(dict, key, snap_volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap volname");
+ GF_FREE(uuid_ptr);
+ goto out;
+ }
+ }
+
+ ret = glusterd_mgmt_v3_initiate_snap_phases(req, op, dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_INIT_FAIL,
+ "Failed to initiate snap "
+ "phases");
+ }
+
+out:
+ return ret;
+}
+
+/* This is a snapshot status handler function. This function will be
+ * executed in a originator node. This function is responsible for
+ * calling mgmt v3 framework to get the actual snapshot status from
+ * all the bricks
+ *
+ * @param req RPC request object
+ * @param op gluster operation
+ * @param dict dictionary containing snapshot status request
+ * @param err_str In case of an err this string should be populated
+ * @param len length of err_str buffer
+ *
+ * return : 0 in case of success.
+ * -1 in case of failure.
+ *
+ */
+int
+glusterd_handle_snapshot_status(rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(req);
+ GF_ASSERT(dict);
+ GF_ASSERT(err_str);
+
+ ret = glusterd_mgmt_v3_initiate_snap_phases(req, op, dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_INIT_FAIL,
+ "Failed to initiate "
+ "snap phases");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This is a snapshot clone handler function. This function will be
+ * executed in the originator node. This function is responsible for
+ * calling mgmt_v3 framework to do the actual snap clone on all the bricks
+ *
+ * @param req RPC request object
+ * @param op gluster operation
+ * @param dict dictionary containing snapshot restore request
+ * @param err_str In case of an err this string should be populated
+ * @param len length of err_str buffer
+ *
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_handle_snapshot_clone(rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, size_t len)
+{
+ int ret = -1;
+ char *clonename = NULL;
+ char *snapname = NULL;
+ xlator_t *this = NULL;
+ char key[64] = "";
+ int keylen;
+ char *username = NULL;
+ char *password = NULL;
+ char *volname = NULL;
+ uuid_t *uuid_ptr = NULL;
+ uuid_t tmp_uuid = {0};
+ int i = 0;
+ char snap_volname[GD_VOLUME_NAME_MAX] = "";
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(req);
+ GF_ASSERT(dict);
+ GF_ASSERT(err_str);
+
+ ret = dict_get_strn(dict, "clonename", SLEN("clonename"), &clonename);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to "
+ "get the clone name");
+ goto out;
+ }
+ /*We need to take a volume lock on clone name*/
+ volname = gf_strdup(clonename);
+ keylen = snprintf(key, sizeof(key), "volname1");
+ ret = dict_set_dynstrn(dict, key, keylen, volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set clone "
+ "name for volume locking");
+ GF_FREE(volname);
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to get the snapname");
+ goto out;
+ }
+
+ uuid_ptr = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!uuid_ptr) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Out Of Memory");
+ ret = -1;
+ goto out;
+ }
+
+ gf_uuid_generate(*uuid_ptr);
+ ret = dict_set_bin(dict, "clone-id", uuid_ptr, sizeof(uuid_t));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set clone-id");
+ GF_FREE(uuid_ptr);
+ goto out;
+ }
+ uuid_ptr = NULL;
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get snapname name");
+ goto out;
+ }
+
+ gf_uuid_generate(tmp_uuid);
+ username = gf_strdup(uuid_utoa(tmp_uuid));
+ keylen = snprintf(key, sizeof(key), "volume1_username");
+ ret = dict_set_dynstrn(dict, key, keylen, username);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set clone "
+ "username for volume %s",
+ clonename);
+ GF_FREE(username);
+ goto out;
+ }
+
+ gf_uuid_generate(tmp_uuid);
+ password = gf_strdup(uuid_utoa(tmp_uuid));
+ keylen = snprintf(key, sizeof(key), "volume1_password");
+ ret = dict_set_dynstrn(dict, key, keylen, password);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set clone "
+ "password for volume %s",
+ clonename);
+ GF_FREE(password);
+ goto out;
+ }
+
+ uuid_ptr = GF_MALLOC(sizeof(uuid_t), gf_common_mt_uuid_t);
+ if (!uuid_ptr) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Out Of Memory");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "vol1_volid");
+ gf_uuid_generate(*uuid_ptr);
+ ret = dict_set_bin(dict, key, uuid_ptr, sizeof(uuid_t));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set clone_volid");
+ GF_FREE(uuid_ptr);
+ goto out;
+ }
+ snprintf(key, sizeof(key), "clone-volname%d", i);
+ ret = dict_set_dynstr_with_alloc(dict, key, snap_volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set snap volname");
+ GF_FREE(uuid_ptr);
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_initiate_snap_phases(req, op, dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_INIT_FAIL,
+ "Failed to initiate "
+ "snap phases");
+ }
+
+out:
+ return ret;
+}
+
+/* This is a snapshot restore handler function. This function will be
+ * executed in the originator node. This function is responsible for
+ * calling mgmt_v3 framework to do the actual restore on all the bricks
+ *
+ * @param req RPC request object
+ * @param op gluster operation
+ * @param dict dictionary containing snapshot restore request
+ * @param err_str In case of an err this string should be populated
+ * @param len length of err_str buffer
+ *
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_handle_snapshot_restore(rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str,
+ uint32_t *op_errno, size_t len)
+{
+ int ret = -1;
+ char *snapname = NULL;
+ char *buf = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ int32_t i = 0;
+ char key[64] = "";
+ int keylen;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+
+ GF_ASSERT(conf);
+ GF_ASSERT(req);
+ GF_ASSERT(dict);
+ GF_ASSERT(err_str);
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to "
+ "get snapname");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name(snapname);
+ if (!snap) {
+ snprintf(err_str, len, "Snapshot (%s) does not exist", snapname);
+ *op_errno = EG_NOSNAP;
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND, "%s",
+ err_str);
+ ret = -1;
+ goto out;
+ }
+
+ list_for_each_entry(snap_volinfo, &snap->volumes, vol_list)
+ {
+ i++;
+ keylen = snprintf(key, sizeof(key), "volname%d", i);
+ buf = gf_strdup(snap_volinfo->parent_volname);
+ if (!buf) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstrn(dict, key, keylen, buf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not set "
+ "parent volume name %s in the dict",
+ snap_volinfo->parent_volname);
+ GF_FREE(buf);
+ goto out;
+ }
+ buf = NULL;
+ }
+
+ ret = dict_set_int32n(dict, "volcount", SLEN("volcount"), i);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save volume count");
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_initiate_snap_phases(req, op, dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_INIT_FAIL,
+ "Failed to initiate snap phases");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+glusterd_snap_t *
+glusterd_create_snap_object(dict_t *dict, dict_t *rsp_dict)
+{
+ char *snapname = NULL;
+ uuid_t *snap_id = NULL;
+ char *description = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ int64_t time_stamp = 0;
+
+ this = THIS;
+ priv = this->private;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp_dict);
+
+ /* Fetch snapname, description, id and time from dict */
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snapname");
+ goto out;
+ }
+
+ /* Ignore ret value for description*/
+ ret = dict_get_strn(dict, "description", SLEN("description"), &description);
+
+ ret = dict_get_bin(dict, "snap-id", (void **)&snap_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snap_id");
+ goto out;
+ }
+
+ ret = dict_get_int64(dict, "snap-time", &time_stamp);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snap-time");
+ goto out;
+ }
+ if (time_stamp <= 0) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid time-stamp: %" PRId64, time_stamp);
+ goto out;
+ }
+
+ cds_list_for_each_entry(snap, &priv->snapshots, snap_list)
+ {
+ if (!strcmp(snap->snapname, snapname) ||
+ !gf_uuid_compare(snap->snap_id, *snap_id)) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "Found duplicate snap %s (%s)", snap->snapname,
+ uuid_utoa(snap->snap_id));
+ ret = -1;
+ break;
+ }
+ }
+ if (ret) {
+ snap = NULL;
+ goto out;
+ }
+
+ snap = glusterd_new_snap_object();
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "Could not create "
+ "the snap object for snap %s",
+ snapname);
+ goto out;
+ }
+
+ gf_strncpy(snap->snapname, snapname, sizeof(snap->snapname));
+ gf_uuid_copy(snap->snap_id, *snap_id);
+ snap->time_stamp = (time_t)time_stamp;
+ /* Set the status as GD_SNAP_STATUS_INIT and once the backend snapshot
+ is taken and snap is really ready to use, set the status to
+ GD_SNAP_STATUS_IN_USE. This helps in identifying the incomplete
+ snapshots and cleaning them up.
+ */
+ snap->snap_status = GD_SNAP_STATUS_INIT;
+ if (description) {
+ snap->description = gf_strdup(description);
+ if (snap->description == NULL) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "Saving the Snapshot Description Failed");
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = glusterd_store_snap(snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "Could not store snap"
+ "object %s",
+ snap->snapname);
+ goto out;
+ }
+
+ glusterd_list_add_order(&snap->snap_list, &priv->snapshots,
+ glusterd_compare_snap_time);
+
+ gf_msg_trace(this->name, 0, "Snapshot %s added to the list",
+ snap->snapname);
+
+ ret = 0;
+
+out:
+ if (ret) {
+ if (snap)
+ glusterd_snap_remove(rsp_dict, snap, _gf_true, _gf_true, _gf_false);
+ snap = NULL;
+ }
+
+ return snap;
+}
+
+/* Added missed_snap_entry to rsp_dict */
+int32_t
+glusterd_add_missed_snaps_to_dict(dict_t *rsp_dict,
+ glusterd_volinfo_t *snap_vol,
+ glusterd_brickinfo_t *brickinfo,
+ int32_t brick_number, int32_t op)
+{
+ char *snap_uuid = NULL;
+ char missed_snap_entry[PATH_MAX] = "";
+ char name_buf[PATH_MAX] = "";
+ int32_t missed_snap_count = -1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(snap_vol);
+ GF_ASSERT(brickinfo);
+
+ snap_uuid = gf_strdup(uuid_utoa(snap_vol->snapshot->snap_id));
+ if (!snap_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ len = snprintf(missed_snap_entry, sizeof(missed_snap_entry),
+ "%s:%s=%s:%d:%s:%d:%d", uuid_utoa(brickinfo->uuid),
+ snap_uuid, snap_vol->volname, brick_number, brickinfo->path,
+ op, GD_MISSED_SNAP_PENDING);
+ if ((len < 0) || (len >= sizeof(missed_snap_entry))) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ /* Fetch the missed_snap_count from the dict */
+ ret = dict_get_int32n(rsp_dict, "missed_snap_count",
+ SLEN("missed_snap_count"), &missed_snap_count);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Key=missed_snap_count", NULL);
+ /* Initialize the missed_snap_count for the first time */
+ missed_snap_count = 0;
+ }
+
+ /* Setting the missed_snap_entry in the rsp_dict */
+ snprintf(name_buf, sizeof(name_buf), "missed_snaps_%d", missed_snap_count);
+ ret = dict_set_dynstr_with_alloc(rsp_dict, name_buf, missed_snap_entry);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set missed_snap_entry (%s) "
+ "in the rsp_dict.",
+ missed_snap_entry);
+ goto out;
+ }
+ missed_snap_count++;
+
+ /* Setting the new missed_snap_count in the dict */
+ ret = dict_set_int32n(rsp_dict, "missed_snap_count",
+ SLEN("missed_snap_count"), missed_snap_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set missed_snap_count for %s "
+ "in the rsp_dict.",
+ missed_snap_entry);
+ goto out;
+ }
+
+out:
+ if (snap_uuid)
+ GF_FREE(snap_uuid);
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* This function actually calls the command (or the API) for taking the
+ snapshot of the backend brick filesystem. If this is successful,
+ then call the glusterd_snap_create function to create the snap object
+ for glusterd
+*/
+int32_t
+glusterd_take_lvm_snapshot(glusterd_brickinfo_t *brickinfo,
+ char *origin_brick_path)
+{
+ char msg[NAME_MAX] = "";
+ char buf[PATH_MAX] = "";
+ char *ptr = NULL;
+ char *origin_device = NULL;
+ int ret = -1;
+ gf_boolean_t match = _gf_false;
+ runner_t runner = {
+ 0,
+ };
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(brickinfo);
+ GF_ASSERT(origin_brick_path);
+
+ origin_device = glusterd_get_brick_mount_device(origin_brick_path);
+ if (!origin_device) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_GET_INFO_FAIL,
+ "getting device name for "
+ "the brick %s failed",
+ origin_brick_path);
+ goto out;
+ }
+
+ /* Figuring out if setactivationskip flag is supported or not */
+ runinit(&runner);
+ snprintf(msg, sizeof(msg), "running lvcreate help");
+ runner_add_args(&runner, LVM_CREATE, "--help", NULL);
+ runner_log(&runner, "", GF_LOG_DEBUG, msg);
+ runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
+ ret = runner_start(&runner);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_LVCREATE_FAIL,
+ "Failed to run lvcreate help");
+ runner_end(&runner);
+ goto out;
+ }
+
+ /* Looking for setactivationskip in lvcreate --help */
+ do {
+ ptr = fgets(buf, sizeof(buf), runner_chio(&runner, STDOUT_FILENO));
+ if (ptr) {
+ if (strstr(buf, "setactivationskip")) {
+ match = _gf_true;
+ break;
+ }
+ }
+ } while (ptr != NULL);
+ runner_end(&runner);
+
+ /* Taking the actual snapshot */
+ runinit(&runner);
+ snprintf(msg, sizeof(msg), "taking snapshot of the brick %s",
+ origin_brick_path);
+ if (match == _gf_true)
+ runner_add_args(&runner, LVM_CREATE, "-s", origin_device,
+ "--setactivationskip", "n", "--name",
+ brickinfo->device_path, NULL);
+ else
+ runner_add_args(&runner, LVM_CREATE, "-s", origin_device, "--name",
+ brickinfo->device_path, NULL);
+ runner_log(&runner, this->name, GF_LOG_DEBUG, msg);
+ ret = runner_run(&runner);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "taking snapshot of the "
+ "brick (%s) of device %s failed",
+ origin_brick_path, origin_device);
+ }
+
+out:
+ if (origin_device)
+ GF_FREE(origin_device);
+
+ return ret;
+}
+
+int32_t
+glusterd_snap_brick_create(glusterd_volinfo_t *snap_volinfo,
+ glusterd_brickinfo_t *brickinfo, int32_t brick_count,
+ int32_t clone)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ char snap_brick_mount_path[PATH_MAX] = "";
+ char clone_uuid[64] = "";
+ struct stat statbuf = {
+ 0,
+ };
+ int32_t len = 0;
+
+ this = THIS;
+
+ GF_ASSERT(snap_volinfo);
+ GF_ASSERT(brickinfo);
+
+ if (clone) {
+ GLUSTERD_GET_UUID_NOHYPHEN(clone_uuid, snap_volinfo->volume_id);
+ len = snprintf(snap_brick_mount_path, sizeof(snap_brick_mount_path),
+ "%s/%s/brick%d", snap_mount_dir, clone_uuid,
+ brick_count + 1);
+ } else {
+ len = snprintf(snap_brick_mount_path, sizeof(snap_brick_mount_path),
+ "%s/%s/brick%d", snap_mount_dir, snap_volinfo->volname,
+ brick_count + 1);
+ }
+ if ((len < 0) || (len >= sizeof(snap_brick_mount_path))) {
+ goto out;
+ }
+
+ ret = mkdir_p(snap_brick_mount_path, 0755, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "creating the brick directory"
+ " %s for the snapshot %s(device: %s) failed",
+ snap_brick_mount_path, snap_volinfo->volname,
+ brickinfo->device_path);
+ goto out;
+ }
+ /* mount the snap logical device on the directory inside
+ /run/gluster/snaps/<snapname>/@snap_brick_mount_path
+ Way to mount the snap brick via mount api is this.
+ ret = mount (device, snap_brick_mount_path, entry->mnt_type,
+ MS_MGC_VAL, "nouuid");
+ But for now, mounting using runner apis.
+ */
+ ret = glusterd_mount_lvm_snapshot(brickinfo, snap_brick_mount_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_LVM_MOUNT_FAILED,
+ "Failed to mount lvm snapshot.");
+ goto out;
+ }
+
+ ret = sys_stat(brickinfo->path, &statbuf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+ "stat of the brick %s"
+ "(brick mount: %s) failed (%s)",
+ brickinfo->path, snap_brick_mount_path, strerror(errno));
+ goto out;
+ }
+ ret = sys_lsetxattr(brickinfo->path, GF_XATTR_VOL_ID_KEY,
+ snap_volinfo->volume_id, 16, XATTR_REPLACE);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SET_XATTR_FAIL,
+ "Failed to set "
+ "extended attribute %s on %s. Reason: "
+ "%s, snap: %s",
+ GF_XATTR_VOL_ID_KEY, brickinfo->path, strerror(errno),
+ snap_volinfo->volname);
+ goto out;
+ }
+
+out:
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_UMOUNTING_SNAP_BRICK,
+ "unmounting the snap brick"
+ " mount %s",
+ snap_brick_mount_path);
+ /*umount2 system call doesn't cleanup mtab entry after un-mount.
+ So use external umount command*/
+ glusterd_umount(snap_brick_mount_path);
+ }
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int32_t
+glusterd_add_brick_to_snap_volume(dict_t *dict, dict_t *rsp_dict,
+ glusterd_volinfo_t *snap_vol,
+ glusterd_brickinfo_t *original_brickinfo,
+ int64_t volcount, int32_t brick_count,
+ int clone)
+{
+ char key[64] = "";
+ int keylen;
+ char *value = NULL;
+ char *snap_brick_dir = NULL;
+ char snap_brick_path[PATH_MAX] = "";
+ char clone_uuid[64] = "";
+ char *snap_device = NULL;
+ glusterd_brickinfo_t *snap_brickinfo = NULL;
+ gf_boolean_t add_missed_snap = _gf_false;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ char abspath[PATH_MAX] = "";
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(snap_vol);
+ GF_ASSERT(original_brickinfo);
+
+ snprintf(key, sizeof(key), "vol%" PRId64 ".origin_brickpath%d", volcount,
+ brick_count);
+ ret = dict_set_dynstr_with_alloc(dict, key, original_brickinfo->path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ ret = glusterd_brickinfo_new(&snap_brickinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_NEW_INFO_FAIL,
+ "initializing the brick for the snap "
+ "volume failed (snapname: %s)",
+ snap_vol->snapshot->snapname);
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "vol%" PRId64 ".fstype%d", volcount,
+ brick_count);
+ ret = dict_get_strn(dict, key, keylen, &value);
+ if (!ret) {
+ /* Update the fstype in original brickinfo as well */
+ gf_strncpy(original_brickinfo->fstype, value,
+ sizeof(original_brickinfo->fstype));
+ gf_strncpy(snap_brickinfo->fstype, value,
+ sizeof(snap_brickinfo->fstype));
+ } else {
+ if (is_origin_glusterd(dict) == _gf_true)
+ add_missed_snap = _gf_true;
+ }
+
+ keylen = snprintf(key, sizeof(key), "vol%" PRId64 ".mnt_opts%d", volcount,
+ brick_count);
+ ret = dict_get_strn(dict, key, keylen, &value);
+ if (!ret) {
+ /* Update the mnt_opts in original brickinfo as well */
+ gf_strncpy(original_brickinfo->mnt_opts, value,
+ sizeof(original_brickinfo->mnt_opts));
+ gf_strncpy(snap_brickinfo->mnt_opts, value,
+ sizeof(snap_brickinfo->mnt_opts));
+ } else {
+ if (is_origin_glusterd(dict) == _gf_true)
+ add_missed_snap = _gf_true;
+ }
+
+ keylen = snprintf(key, sizeof(key), "vol%" PRId64 ".brickdir%d", volcount,
+ brick_count);
+ ret = dict_get_strn(dict, key, keylen, &snap_brick_dir);
+ if (ret) {
+ /* Using original brickinfo here because it will be a
+ * pending snapshot and storing the original brickinfo
+ * will help in mapping while recreating the missed snapshot
+ */
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_NOT_FOUND,
+ "Unable to fetch "
+ "snap mount path(%s). Adding to missed_snap_list",
+ key);
+ snap_brickinfo->snap_status = -1;
+
+ snap_brick_dir = original_brickinfo->mount_dir;
+
+ /* In origiator node add snaps missed
+ * from different nodes to the dict
+ */
+ if (is_origin_glusterd(dict) == _gf_true)
+ add_missed_snap = _gf_true;
+ }
+
+ if ((snap_brickinfo->snap_status != -1) &&
+ (!gf_uuid_compare(original_brickinfo->uuid, MY_UUID)) &&
+ (!glusterd_is_brick_started(original_brickinfo))) {
+ /* In case if the brick goes down after prevalidate. */
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_DISCONNECTED,
+ "brick %s:%s is not"
+ " started (snap: %s)",
+ original_brickinfo->hostname, original_brickinfo->path,
+ snap_vol->snapshot->snapname);
+
+ snap_brickinfo->snap_status = -1;
+ add_missed_snap = _gf_true;
+ }
+
+ if (add_missed_snap) {
+ ret = glusterd_add_missed_snaps_to_dict(
+ rsp_dict, snap_vol, original_brickinfo, brick_count + 1,
+ GF_SNAP_OPTION_TYPE_CREATE);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSEDSNAP_INFO_SET_FAIL,
+ "Failed to add missed"
+ " snapshot info for %s:%s in the rsp_dict",
+ original_brickinfo->hostname, original_brickinfo->path);
+ goto out;
+ }
+ }
+
+ /* Create brick-path in the format /var/run/gluster/snaps/ *
+ * <snap-uuid>/<original-brick#>/snap-brick-dir *
+ */
+ if (clone) {
+ GLUSTERD_GET_UUID_NOHYPHEN(clone_uuid, snap_vol->volume_id);
+ len = snprintf(snap_brick_path, sizeof(snap_brick_path),
+ "%s/%s/brick%d%s", snap_mount_dir, clone_uuid,
+ brick_count + 1, snap_brick_dir);
+ } else {
+ len = snprintf(snap_brick_path, sizeof(snap_brick_path),
+ "%s/%s/brick%d%s", snap_mount_dir, snap_vol->volname,
+ brick_count + 1, snap_brick_dir);
+ }
+ if ((len < 0) || (len >= sizeof(snap_brick_path))) {
+ ret = -1;
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "vol%" PRId64 ".brick_snapdevice%d",
+ volcount, brick_count);
+ ret = dict_get_strn(dict, key, keylen, &snap_device);
+ if (ret) {
+ /* If the device name is empty, so will be the brick path
+ * Hence the missed snap has already been added above
+ */
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND,
+ "Unable to fetch "
+ "snap device (%s). Leaving empty",
+ key);
+ } else
+ gf_strncpy(snap_brickinfo->device_path, snap_device,
+ sizeof(snap_brickinfo->device_path));
+
+ ret = gf_canonicalize_path(snap_brick_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CANONICALIZE_FAIL,
+ "Failed to canonicalize path");
+ goto out;
+ }
+
+ gf_strncpy(snap_brickinfo->hostname, original_brickinfo->hostname,
+ sizeof(snap_brickinfo->hostname));
+ gf_strncpy(snap_brickinfo->path, snap_brick_path,
+ sizeof(snap_brickinfo->path));
+
+ if (!realpath(snap_brick_path, abspath)) {
+ /* ENOENT indicates that brick path has not been created which
+ * is a valid scenario */
+ if (errno != ENOENT) {
+ gf_msg(this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_BRICKINFO_CREATE_FAIL,
+ "realpath () "
+ "failed for brick %s. The underlying filesystem"
+ " may be in bad state",
+ snap_brick_path);
+ ret = -1;
+ goto out;
+ }
+ }
+ gf_strncpy(snap_brickinfo->real_path, abspath,
+ sizeof(snap_brickinfo->real_path));
+
+ gf_strncpy(snap_brickinfo->mount_dir, original_brickinfo->mount_dir,
+ sizeof(snap_brickinfo->mount_dir));
+ gf_uuid_copy(snap_brickinfo->uuid, original_brickinfo->uuid);
+ /* AFR changelog names are based on brick_id and hence the snap
+ * volume's bricks must retain the same ID */
+ cds_list_add_tail(&snap_brickinfo->brick_list, &snap_vol->bricks);
+
+ if (clone) {
+ GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO(snap_brickinfo, snap_vol,
+ brick_count);
+ } else
+ gf_strncpy(snap_brickinfo->brick_id, original_brickinfo->brick_id,
+ sizeof(snap_brickinfo->brick_id));
+
+out:
+ if (ret && snap_brickinfo)
+ GF_FREE(snap_brickinfo);
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* This function will update the file-system label of the
+ * backend snapshot brick.
+ *
+ * @param brickinfo brickinfo of the snap volume
+ *
+ * @return 0 on success and -1 on failure
+ */
+int
+glusterd_update_fs_label(glusterd_brickinfo_t *brickinfo)
+{
+ int32_t ret = -1;
+ char msg[PATH_MAX] = "";
+ char label[NAME_MAX] = "";
+ uuid_t uuid = {
+ 0,
+ };
+ runner_t runner = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(brickinfo);
+
+ /* Generate a new UUID */
+ gf_uuid_generate(uuid);
+
+ GLUSTERD_GET_UUID_NOHYPHEN(label, uuid);
+
+ runinit(&runner);
+
+ /* Call the file-system specific tools to update the file-system
+ * label. Currently we are only supporting xfs and ext2/ext3/ext4
+ * file-system.
+ */
+ if (0 == strcmp(brickinfo->fstype, "xfs")) {
+ /* XFS label is of size 12. Therefore we should truncate the
+ * label to 12 bytes*/
+ label[12] = '\0';
+ len = snprintf(msg, sizeof(msg),
+ "Changing filesystem label "
+ "of %s brick to %s",
+ brickinfo->path, label);
+ if (len < 0) {
+ strcpy(msg, "<error>");
+ }
+ /* Run the run xfs_admin tool to change the label
+ * of the file-system */
+ runner_add_args(&runner, "xfs_admin", "-L", label,
+ brickinfo->device_path, NULL);
+ } else if (0 == strcmp(brickinfo->fstype, "ext4") ||
+ 0 == strcmp(brickinfo->fstype, "ext3") ||
+ 0 == strcmp(brickinfo->fstype, "ext2")) {
+ /* Ext2/Ext3/Ext4 label is of size 16. Therefore we should
+ * truncate the label to 16 bytes*/
+ label[16] = '\0';
+ len = snprintf(msg, sizeof(msg),
+ "Changing filesystem label "
+ "of %s brick to %s",
+ brickinfo->path, label);
+ if (len < 0) {
+ strcpy(msg, "<error>");
+ }
+ /* For ext2/ext3/ext4 run tune2fs to change the
+ * file-system label */
+ runner_add_args(&runner, "tune2fs", "-L", label, brickinfo->device_path,
+ NULL);
+ } else {
+ gf_msg(this->name, GF_LOG_WARNING, EOPNOTSUPP, GD_MSG_OP_UNSUPPORTED,
+ "Changing file-system "
+ "label of %s file-system is not supported as of now",
+ brickinfo->fstype);
+ runner_end(&runner);
+ ret = -1;
+ goto out;
+ }
+
+ runner_log(&runner, this->name, GF_LOG_DEBUG, msg);
+ ret = runner_run(&runner);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FS_LABEL_UPDATE_FAIL,
+ "Failed to change "
+ "filesystem label of %s brick to %s",
+ brickinfo->path, label);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int32_t
+glusterd_take_brick_snapshot(dict_t *dict, glusterd_volinfo_t *snap_vol,
+ glusterd_brickinfo_t *brickinfo, int32_t volcount,
+ int32_t brick_count, int32_t clone)
+{
+ char *origin_brick_path = NULL;
+ char key[64] = "";
+ int keylen;
+ int32_t ret = -1;
+ gf_boolean_t snap_activate = _gf_false;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(snap_vol);
+ GF_ASSERT(brickinfo);
+ GF_ASSERT(priv);
+
+ if (strlen(brickinfo->device_path) == 0) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Device path is empty "
+ "brick %s:%s",
+ brickinfo->hostname, brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "vol%d.origin_brickpath%d", volcount,
+ brick_count);
+ ret = dict_get_strn(dict, key, keylen, &origin_brick_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch "
+ "brick path (%s)",
+ key);
+ goto out;
+ }
+
+ ret = glusterd_take_lvm_snapshot(brickinfo, origin_brick_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "Failed to take snapshot of "
+ "brick %s:%s",
+ brickinfo->hostname, origin_brick_path);
+ goto out;
+ }
+
+ /* After the snapshot both the origin brick (LVM brick) and
+ * the snapshot brick will have the same file-system label. This
+ * will cause lot of problems at mount time. Therefore we must
+ * generate a new label for the snapshot brick
+ */
+ ret = glusterd_update_fs_label(brickinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FS_LABEL_UPDATE_FAIL,
+ "Failed to update "
+ "file-system label for %s brick",
+ brickinfo->path);
+ /* Failing to update label should not cause snapshot failure.
+ * Currently label is updated only for XFS and ext2/ext3/ext4
+ * file-system.
+ */
+ }
+
+ /* create the complete brick here in case of clone and
+ * activate-on-create configuration.
+ */
+ snap_activate = dict_get_str_boolean(
+ priv->opts, GLUSTERD_STORE_KEY_SNAP_ACTIVATE, _gf_false);
+ if (clone || snap_activate) {
+ ret = glusterd_snap_brick_create(snap_vol, brickinfo, brick_count,
+ clone);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_CREATION_FAIL,
+ "not able to "
+ "create the brick for the snap %s, volume %s",
+ snap_vol->snapshot->snapname, snap_vol->volname);
+ goto out;
+ }
+ }
+
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_snap_clear_unsupported_opt(
+ glusterd_volinfo_t *volinfo,
+ struct gd_snap_unsupported_opt_t *unsupported_opt)
+{
+ int ret = -1;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
+
+ for (i = 0; unsupported_opt[i].key; i++) {
+ glusterd_volinfo_get(volinfo, unsupported_opt[i].key,
+ &unsupported_opt[i].value);
+
+ if (unsupported_opt[i].value) {
+ unsupported_opt[i].value = gf_strdup(unsupported_opt[i].value);
+ if (!unsupported_opt[i].value) {
+ ret = -1;
+ goto out;
+ }
+ dict_del(volinfo->dict, unsupported_opt[i].key);
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int
+glusterd_snap_set_unsupported_opt(
+ glusterd_volinfo_t *volinfo,
+ struct gd_snap_unsupported_opt_t *unsupported_opt)
+{
+ int ret = -1;
+ int i = 0;
+
+ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
+
+ for (i = 0; unsupported_opt[i].key; i++) {
+ if (!unsupported_opt[i].value)
+ continue;
+
+ ret = dict_set_dynstr(volinfo->dict, unsupported_opt[i].key,
+ unsupported_opt[i].value);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, errno, GD_MSG_DICT_SET_FAILED,
+ "dict set failed");
+ goto out;
+ }
+ unsupported_opt[i].value = NULL;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+glusterd_volinfo_t *
+glusterd_do_snap_vol(glusterd_volinfo_t *origin_vol, glusterd_snap_t *snap,
+ dict_t *dict, dict_t *rsp_dict, int64_t volcount,
+ int clone)
+{
+ char key[64] = "";
+ int keylen;
+ char *username = NULL;
+ char *password = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ uuid_t *snap_volid = NULL;
+ int32_t ret = -1;
+ int32_t brick_count = 0;
+ xlator_t *this = NULL;
+ char *clonename = NULL;
+ gf_boolean_t conf_present = _gf_false;
+ int i = 0;
+
+ struct gd_snap_unsupported_opt_t unsupported_opt[] = {
+ {.key = VKEY_FEATURES_QUOTA, .value = NULL},
+ {.key = VKEY_FEATURES_INODE_QUOTA, .value = NULL},
+ {.key = "feature.deem-statfs", .value = NULL},
+ {.key = "features.quota-deem-statfs", .value = NULL},
+ {.key = NULL, .value = NULL}};
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(dict);
+ GF_ASSERT(origin_vol);
+ GF_ASSERT(rsp_dict);
+
+ /* fetch username, password and vol_id from dict*/
+ keylen = snprintf(key, sizeof(key), "volume%" PRId64 "_username", volcount);
+ ret = dict_get_strn(dict, key, keylen, &username);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get %s for "
+ "snap %s",
+ key, snap->snapname);
+ goto out;
+ }
+ keylen = snprintf(key, sizeof(key), "volume%" PRId64 "_password", volcount);
+ ret = dict_get_strn(dict, key, keylen, &password);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get %s for "
+ "snap %s",
+ key, snap->snapname);
+ goto out;
+ }
+
+ snprintf(key, sizeof(key), "vol%" PRId64 "_volid", volcount);
+ ret = dict_get_bin(dict, key, (void **)&snap_volid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snap_volid");
+ goto out;
+ }
+
+ /* We are not setting the username and password here as
+ * we need to set the user name and password passed in
+ * the dictionary
+ */
+ ret = glusterd_volinfo_dup(origin_vol, &snap_vol, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED,
+ "Failed to duplicate volinfo "
+ "for the snapshot %s",
+ snap->snapname);
+ goto out;
+ }
+
+ /* uuid is used as lvm snapshot name.
+ This will avoid restrictions on snapshot names provided by user */
+ gf_uuid_copy(snap_vol->volume_id, *snap_volid);
+ snap_vol->is_snap_volume = _gf_true;
+ snap_vol->snapshot = snap;
+
+ if (clone) {
+ snap_vol->is_snap_volume = _gf_false;
+ ret = dict_get_strn(dict, "clonename", SLEN("clonename"), &clonename);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get %s "
+ "for snap %s",
+ key, snap->snapname);
+ goto out;
+ }
+ cds_list_add_tail(&snap_vol->vol_list, &snap->volumes);
+ gf_strncpy(snap_vol->volname, clonename, sizeof(snap_vol->volname));
+ gf_uuid_copy(snap_vol->restored_from_snap,
+ origin_vol->snapshot->snap_id);
+
+ } else {
+ GLUSTERD_GET_UUID_NOHYPHEN(snap_vol->volname, *snap_volid);
+ gf_strncpy(snap_vol->parent_volname, origin_vol->volname,
+ sizeof(snap_vol->parent_volname));
+ ret = glusterd_list_add_snapvol(origin_vol, snap_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_LIST_SET_FAIL,
+ "could not add the "
+ "snap volume %s to the list",
+ snap_vol->volname);
+ goto out;
+ }
+ /* TODO : Sync before taking a snapshot */
+ /* Copy the status and config files of geo-replication before
+ * taking a snapshot. During restore operation these files needs
+ * to be copied back in /var/lib/glusterd/georeplication/
+ */
+ ret = glusterd_copy_geo_rep_files(origin_vol, snap_vol, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED,
+ "Failed to copy "
+ "geo-rep config and status files for volume %s",
+ origin_vol->volname);
+ goto out;
+ }
+ }
+
+ glusterd_auth_set_username(snap_vol, username);
+ glusterd_auth_set_password(snap_vol, password);
+
+ /* Adding snap brickinfos to the snap volinfo */
+ brick_count = 0;
+ cds_list_for_each_entry(brickinfo, &origin_vol->bricks, brick_list)
+ {
+ ret = glusterd_add_brick_to_snap_volume(
+ dict, rsp_dict, snap_vol, brickinfo, volcount, brick_count, clone);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_ADD_FAIL,
+ "Failed to add the snap brick for "
+ "%s:%s to the snap volume",
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+ brick_count++;
+ }
+
+ /* During snapshot creation if I/O is in progress,
+ * then barrier value is enabled. Hence during snapshot create
+ * and in-turn snapshot restore the barrier value is set to enable.
+ * Because of this further I/O on the mount point fails.
+ * Hence remove the barrier key from newly created snap volinfo
+ * before storing and generating the brick volfiles. Also update
+ * the snap vol's version after removing the barrier key.
+ */
+ dict_deln(snap_vol->dict, "features.barrier", SLEN("features.barrier"));
+ gd_update_volume_op_versions(snap_vol);
+
+ /* *
+ * Create the export file from the node where ganesha.enable "on"
+ * is executed
+ * */
+ if (glusterd_is_ganesha_cluster() &&
+ glusterd_check_ganesha_export(snap_vol)) {
+ if (is_origin_glusterd(dict)) {
+ ret = manage_export_config(clonename, "on", NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Failed to create"
+ "export file for NFS-Ganesha\n");
+ goto out;
+ }
+ }
+
+ ret = dict_set_dynstr_with_alloc(snap_vol->dict,
+ "features.cache-invalidation", "on");
+ ret = gd_ganesha_send_dbus(clonename, "on");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Dynamic export addition/deletion failed."
+ " Please see log file for details. Clone name = %s",
+ clonename);
+ goto out;
+ }
+ }
+ if (!glusterd_is_ganesha_cluster() &&
+ glusterd_check_ganesha_export(snap_vol)) {
+ /* This happens when a snapshot was created when Ganesha was
+ * enabled globally. Then Ganesha disabled from the cluster.
+ * In such cases, we will have the volume level option set
+ * on dict, So we have to disable it as it doesn't make sense
+ * to keep the option.
+ */
+
+ ret = dict_set_dynstr(snap_vol->dict, "ganesha.enable", "off");
+ if (ret)
+ goto out;
+ }
+
+ ret = glusterd_store_volinfo(snap_vol, GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL,
+ "Failed to store snapshot "
+ "volinfo (%s) for snap %s",
+ snap_vol->volname, snap->snapname);
+ goto out;
+ }
+
+ ret = glusterd_copy_quota_files(origin_vol, snap_vol, &conf_present);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_VOL_CONFIG_FAIL,
+ "Failed to copy quota "
+ "config and cksum for volume %s",
+ origin_vol->volname);
+ goto out;
+ }
+
+ if (snap_vol->is_snap_volume) {
+ ret = glusterd_snap_clear_unsupported_opt(snap_vol, unsupported_opt);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED,
+ "Failed to clear quota "
+ "option for the snap %s (volume: %s)",
+ snap->snapname, origin_vol->volname);
+ goto out;
+ }
+ }
+
+ ret = generate_brick_volfiles(snap_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "generating the brick "
+ "volfiles for the snap %s (volume: %s) failed",
+ snap->snapname, origin_vol->volname);
+ goto reset_option;
+ }
+
+ ret = generate_client_volfiles(snap_vol, GF_CLIENT_TRUSTED);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "generating the trusted "
+ "client volfiles for the snap %s (volume: %s) failed",
+ snap->snapname, origin_vol->volname);
+ goto reset_option;
+ }
+
+ ret = generate_client_volfiles(snap_vol, GF_CLIENT_OTHER);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "generating the client "
+ "volfiles for the snap %s (volume: %s) failed",
+ snap->snapname, origin_vol->volname);
+ goto reset_option;
+ }
+
+reset_option:
+ if (snap_vol->is_snap_volume) {
+ if (glusterd_snap_set_unsupported_opt(snap_vol, unsupported_opt)) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED,
+ "Failed to reset quota "
+ "option for the snap %s (volume: %s)",
+ snap->snapname, origin_vol->volname);
+ }
+ }
+out:
+ if (ret) {
+ for (i = 0; unsupported_opt[i].key; i++)
+ GF_FREE(unsupported_opt[i].value);
+
+ if (snap_vol) {
+ if (glusterd_is_ganesha_cluster() &&
+ glusterd_check_ganesha_export(snap_vol)) {
+ if (is_origin_glusterd(dict)) {
+ ret = manage_export_config(clonename, "on", NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Failed to create"
+ "export file for NFS-Ganesha\n");
+ }
+ }
+
+ ret = gd_ganesha_send_dbus(clonename, "off");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_EXPORT_FILE_CREATE_FAIL,
+ "Dynamic export addition/deletion failed."
+ " Please see log file for details. Clone name = %s",
+ clonename);
+ }
+ }
+
+ glusterd_snap_volume_remove(rsp_dict, snap_vol, _gf_true, _gf_true);
+ }
+ snap_vol = NULL;
+ }
+
+ return snap_vol;
+}
+
+/*This is the prevalidate function for both activate and deactive of snap
+ * For Activate operation pass is_op_activate as _gf_true
+ * For Deactivate operation pass is_op_activate as _gf_false
+ * */
+int
+glusterd_snapshot_activate_deactivate_prevalidate(dict_t *dict,
+ char **op_errstr,
+ uint32_t *op_errno,
+ dict_t *rsp_dict,
+ gf_boolean_t is_op_activate)
+{
+ int32_t ret = -1;
+ char *snapname = NULL;
+ xlator_t *this = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ char err_str[PATH_MAX] = "";
+ gf_loglevel_t loglevel = GF_LOG_ERROR;
+ glusterd_volume_status volume_status = GLUSTERD_STATUS_STOPPED;
+ int flags = 0;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ if (!dict || !op_errstr) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "input parameters NULL");
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Getting the snap name "
+ "failed");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name(snapname);
+ if (!snap) {
+ snprintf(err_str, sizeof(err_str),
+ "Snapshot (%s) does not "
+ "exist.",
+ snapname);
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND,
+ "Snapname=%s", snapname, NULL);
+ *op_errno = EG_NOSNAP;
+ ret = -1;
+ goto out;
+ }
+
+ /*If its activation of snap then fetch the flags*/
+ if (is_op_activate) {
+ ret = dict_get_int32n(dict, "flags", SLEN("flags"), &flags);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get flags");
+ goto out;
+ }
+ }
+
+ /* TODO : As of now there is only volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ snap_volinfo = cds_list_entry(snap->volumes.next, glusterd_volinfo_t,
+ vol_list);
+ if (!snap_volinfo) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOLINFO_GET_FAIL,
+ "Unable to fetch snap_volinfo");
+ ret = -1;
+ goto out;
+ }
+
+ /*TODO: When multiple snapvolume are involved a cumulative
+ * logic is required to tell whether is snapshot is
+ * started/partially started/stopped*/
+ if (is_op_activate) {
+ volume_status = GLUSTERD_STATUS_STARTED;
+ }
+
+ if (snap_volinfo->status == volume_status) {
+ if (is_op_activate) {
+ /* if flag is to GF_CLI_FLAG_OP_FORCE
+ * try to start the snap volume, even
+ * if the volume_status is GLUSTERD_STATUS_STARTED.
+ * By doing so we try to bring
+ * back the brick processes that are down*/
+ if (!(flags & GF_CLI_FLAG_OP_FORCE)) {
+ snprintf(err_str, sizeof(err_str),
+ "Snapshot %s is already activated.", snapname);
+ *op_errno = EINVAL;
+ ret = -1;
+ }
+ } else {
+ snprintf(err_str, sizeof(err_str),
+ "Snapshot %s is already deactivated.", snapname);
+ *op_errno = EINVAL;
+ ret = -1;
+ }
+ goto out;
+ }
+ ret = 0;
+out:
+
+ if (ret && err_str[0] != '\0' && op_errstr) {
+ gf_msg(this->name, loglevel, 0, GD_MSG_SNAPSHOT_OP_FAILED, "%s",
+ err_str);
+ *op_errstr = gf_strdup(err_str);
+ }
+
+ return ret;
+}
+
+int32_t
+glusterd_handle_snapshot_delete_vol(dict_t *dict, char *err_str,
+ uint32_t *op_errno, int len)
+{
+ int32_t ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+ char *volname = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get "
+ "volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(err_str, len, "Volume (%s) does not exist", volname);
+ *op_errno = EG_NOVOL;
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Failed to get volinfo of "
+ "volume %s",
+ volname);
+ goto out;
+ }
+
+ ret = glusterd_snapshot_get_vol_snapnames(dict, volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_LIST_GET_FAIL,
+ "Failed to get snapshot list for volume %s", volname);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+static int32_t
+glusterd_handle_snapshot_delete_all(dict_t *dict)
+{
+ int32_t ret = -1;
+ int32_t i = 0;
+ char key[32] = "";
+ glusterd_conf_t *priv = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_snap_t *tmp_snap = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GF_ASSERT(dict);
+
+ cds_list_for_each_entry_safe(snap, tmp_snap, &priv->snapshots, snap_list)
+ {
+ /* indexing from 1 to n, to keep it uniform with other code
+ * paths
+ */
+ i++;
+ ret = snprintf(key, sizeof(key), "snapname%d", i);
+ if (ret < 0) {
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(dict, key, snap->snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save "
+ "snap name");
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32n(dict, "snapcount", SLEN("snapcount"), i);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save snapcount");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_handle_snapshot_delete_type_snap(rpcsvc_request_t *req,
+ glusterd_op_t op, dict_t *dict,
+ char *err_str, uint32_t *op_errno,
+ size_t len)
+{
+ int32_t ret = -1;
+ int64_t volcount = 0;
+ char *snapname = NULL;
+ char *volname = NULL;
+ char key[64] = "";
+ int keylen;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(req);
+ GF_ASSERT(dict);
+ GF_ASSERT(err_str);
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get snapname");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name(snapname);
+ if (!snap) {
+ snprintf(err_str, len, "Snapshot (%s) does not exist", snapname);
+ *op_errno = EG_NOSNAP;
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND, "%s",
+ err_str);
+ ret = -1;
+ goto out;
+ }
+
+ /* Set volnames in the dict to get mgmt_v3 lock */
+ cds_list_for_each_entry_safe(snap_vol, tmp, &snap->volumes, vol_list)
+ {
+ volcount++;
+ volname = gf_strdup(snap_vol->parent_volname);
+ if (!volname) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "strdup failed");
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "volname%" PRId64, volcount);
+ ret = dict_set_dynstrn(dict, key, keylen, volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "volume name in dictionary");
+ GF_FREE(volname);
+ goto out;
+ }
+ volname = NULL;
+ }
+ ret = dict_set_int64(dict, "volcount", volcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set volcount");
+ goto out;
+ }
+
+ ret = glusterd_mgmt_v3_initiate_snap_phases(req, op, dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_INIT_FAIL,
+ "Failed to initiate snap "
+ "phases");
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+/* This is a snapshot remove handler function. This function will be
+ * executed in the originator node. This function is responsible for
+ * calling mgmt v3 framework to do the actual remove on all the bricks
+ *
+ * @param req RPC request object
+ * @param op gluster operation
+ * @param dict dictionary containing snapshot remove request
+ * @param err_str In case of an err this string should be populated
+ * @param len length of err_str buffer
+ *
+ * @return Negative value on Failure and 0 in success
+ */
+int
+glusterd_handle_snapshot_delete(rpcsvc_request_t *req, glusterd_op_t op,
+ dict_t *dict, char *err_str, uint32_t *op_errno,
+ size_t len)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ int32_t delete_cmd = -1;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+
+ GF_ASSERT(req);
+ GF_ASSERT(dict);
+ GF_ASSERT(err_str);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ ret = dict_get_int32n(dict, "sub-cmd", SLEN("sub-cmd"), &delete_cmd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMAND_NOT_FOUND,
+ "Failed to get sub-cmd");
+ goto out;
+ }
+
+ switch (delete_cmd) {
+ case GF_SNAP_DELETE_TYPE_SNAP:
+ case GF_SNAP_DELETE_TYPE_ITER:
+ ret = glusterd_handle_snapshot_delete_type_snap(
+ req, op, dict, err_str, op_errno, len);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to handle "
+ "snapshot delete for type SNAP");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_DELETE_TYPE_ALL:
+ ret = glusterd_handle_snapshot_delete_all(dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to handle "
+ "snapshot delete for type ALL");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_DELETE_TYPE_VOL:
+ ret = glusterd_handle_snapshot_delete_vol(dict, err_str, op_errno,
+ len);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to handle "
+ "snapshot delete for type VOL");
+ goto out;
+ }
+ break;
+
+ default:
+ *op_errno = EINVAL;
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Wrong snapshot delete type");
+ break;
+ }
+
+ if (ret == 0 && (delete_cmd == GF_SNAP_DELETE_TYPE_ALL ||
+ delete_cmd == GF_SNAP_DELETE_TYPE_VOL)) {
+ ret = glusterd_op_send_cli_response(op, 0, 0, req, dict, err_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_CLI_RESP,
+ "Failed to send cli "
+ "response");
+ goto out;
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_remove_prevalidate(dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ char *snapname = NULL;
+ xlator_t *this = NULL;
+ glusterd_snap_t *snap = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ if (!dict || !op_errstr) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "input parameters NULL");
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Getting the snap name "
+ "failed");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name(snapname);
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND,
+ "Snapshot (%s) does not exist", snapname);
+ *op_errno = EG_NOSNAP;
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(dict, "snapuuid",
+ uuid_utoa(snap->snap_id));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap "
+ "uuid in response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_status_prevalidate(dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict)
+{
+ int ret = -1;
+ char *snapname = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ int32_t cmd = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ char *volname = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ if (!dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "Input dict is NULL");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "sub-cmd", SLEN("sub-cmd"), &cmd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Could not fetch status cmd");
+ goto out;
+ }
+
+ switch (cmd) {
+ case GF_SNAP_STATUS_TYPE_ALL: {
+ break;
+ }
+ case GF_SNAP_STATUS_TYPE_ITER:
+ case GF_SNAP_STATUS_TYPE_SNAP: {
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Could not fetch snapname");
+ goto out;
+ }
+
+ if (!glusterd_find_snap_by_name(snapname)) {
+ ret = gf_asprintf(op_errstr,
+ "Snapshot (%s) "
+ "does not exist",
+ snapname);
+ *op_errno = EG_NOSNAP;
+ if (ret < 0) {
+ goto out;
+ }
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND,
+ "Snapshot (%s) does not exist", snapname);
+ goto out;
+ }
+ break;
+ }
+ case GF_SNAP_STATUS_TYPE_VOL: {
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Could not fetch volname");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ ret = gf_asprintf(op_errstr,
+ "Volume (%s) "
+ "does not exist",
+ volname);
+ *op_errno = EG_NOVOL;
+ if (ret < 0) {
+ goto out;
+ }
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Volume "
+ "%s not present",
+ volname);
+ goto out;
+ }
+ break;
+ }
+ default: {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_COMMAND_NOT_FOUND,
+ "Invalid command");
+ *op_errno = EINVAL;
+ break;
+ }
+ }
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snapshot_activate_commit(dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ char *snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ xlator_t *this = NULL;
+ int flags = 0;
+ int brick_count = -1;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(op_errstr);
+
+ if (!dict || !op_errstr) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "input parameters NULL");
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Getting the snap name "
+ "failed");
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "flags", SLEN("flags"), &flags);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get flags");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name(snapname);
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND,
+ "Snapshot (%s) does not exist", snapname);
+ ret = -1;
+ goto out;
+ }
+
+ /* TODO : As of now there is only volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ snap_volinfo = cds_list_entry(snap->volumes.next, glusterd_volinfo_t,
+ vol_list);
+ if (!snap_volinfo) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Unable to fetch snap_volinfo");
+ ret = -1;
+ goto out;
+ }
+
+ /* create the complete brick here */
+ cds_list_for_each_entry(brickinfo, &snap_volinfo->bricks, brick_list)
+ {
+ brick_count++;
+ if (gf_uuid_compare(brickinfo->uuid, MY_UUID))
+ continue;
+ ret = glusterd_snap_brick_create(snap_volinfo, brickinfo, brick_count,
+ _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_CREATION_FAIL,
+ "not able to "
+ "create the brick for the snap %s, volume %s",
+ snap_volinfo->snapshot->snapname, snap_volinfo->volname);
+ goto out;
+ }
+ }
+
+ ret = glusterd_start_volume(snap_volinfo, flags, _gf_true);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_ACTIVATE_FAIL,
+ "Failed to activate snap volume %s of the snap %s",
+ snap_volinfo->volname, snap->snapname);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(rsp_dict, "snapuuid",
+ uuid_utoa(snap->snap_id));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap "
+ "uuid in response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snapshot_deactivate_commit(dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ char *snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ xlator_t *this = NULL;
+ char snap_path[PATH_MAX] = "";
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(op_errstr);
+
+ if (!dict || !op_errstr) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "input parameters NULL");
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Getting the snap name "
+ "failed");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name(snapname);
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND,
+ "Snapshot (%s) does not exist", snapname);
+ ret = -1;
+ goto out;
+ }
+
+ /* TODO : As of now there is only volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ snap_volinfo = cds_list_entry(snap->volumes.next, glusterd_volinfo_t,
+ vol_list);
+ if (!snap_volinfo) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Unable to fetch snap_volinfo");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_stop_volume(snap_volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_DEACTIVATE_FAIL,
+ "Failed to deactivate"
+ "snap %s",
+ snapname);
+ goto out;
+ }
+
+ ret = glusterd_snap_unmount(this, snap_volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_UMOUNT_FAIL,
+ "Failed to unmounts for %s", snap->snapname);
+ }
+
+ /*Remove /var/run/gluster/snaps/<snap-name> entry for deactivated snaps.
+ * This entry will be created again during snap activate.
+ */
+ snprintf(snap_path, sizeof(snap_path), "%s/%s", snap_mount_dir, snapname);
+ ret = recursive_rmdir(snap_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Failed to remove "
+ "%s directory : error : %s",
+ snap_path, strerror(errno));
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(rsp_dict, "snapuuid",
+ uuid_utoa(snap->snap_id));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap "
+ "uuid in response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snapshot_remove_commit(dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ char *snapname = NULL;
+ char *dup_snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(op_errstr);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ if (!dict || !op_errstr) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "input parameters NULL");
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Getting the snap name "
+ "failed");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name(snapname);
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND,
+ "Snapshot (%s) does not exist", snapname);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(rsp_dict, "snapuuid",
+ uuid_utoa(snap->snap_id));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap uuid in "
+ "response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+ /* Save the snap status as GD_SNAP_STATUS_DECOMMISSION so
+ * that if the node goes down the snap would be removed
+ */
+ snap->snap_status = GD_SNAP_STATUS_DECOMMISSION;
+ ret = glusterd_store_snap(snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_OBJECT_STORE_FAIL,
+ "Failed to "
+ "store snap object %s",
+ snap->snapname);
+ goto out;
+ } else
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_OP_SUCCESS,
+ "Successfully marked "
+ "snap %s for decommission.",
+ snap->snapname);
+
+ if (is_origin_glusterd(dict) == _gf_true) {
+ /* TODO : As of now there is only volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ snap_volinfo = cds_list_entry(snap->volumes.next, glusterd_volinfo_t,
+ vol_list);
+ if (!snap_volinfo) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Unable to fetch snap_volinfo");
+ ret = -1;
+ goto out;
+ }
+
+ /* From origin glusterd check if *
+ * any peers with snap bricks is down */
+ ret = glusterd_find_missed_snap(rsp_dict, snap_volinfo, &priv->peers,
+ GF_SNAP_OPTION_TYPE_DELETE);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_GET_FAIL,
+ "Failed to find missed snap deletes");
+ goto out;
+ }
+ }
+
+ ret = glusterd_snap_remove(rsp_dict, snap, _gf_true, _gf_false, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to remove snap %s", snapname);
+ goto out;
+ }
+
+ dup_snapname = gf_strdup(snapname);
+ if (!dup_snapname) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Strdup failed");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr(rsp_dict, "snapname", dup_snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set the snapname");
+ GF_FREE(dup_snapname);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_do_snap_cleanup(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ char *name = NULL;
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_snap_t *snap = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ if (!dict || !op_errstr) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "input parameters NULL");
+ goto out;
+ }
+
+ /* As of now snapshot of multiple volumes are not supported */
+ ret = dict_get_strn(dict, "volname1", SLEN("volname1"), &volname);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get"
+ " volume name");
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "getting the snap "
+ "name failed (volume: %s)",
+ volname);
+ goto out;
+ }
+
+ /*
+ If the snapname is not found that means the failure happened at
+ staging, or in commit, before the snap object is created, in which
+ case there is nothing to cleanup. So set ret to 0.
+ */
+ snap = glusterd_find_snap_by_name(name);
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_INFO, EINVAL, GD_MSG_SNAP_NOT_FOUND,
+ "Snapshot (%s) does not exist", name);
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_snap_remove(rsp_dict, snap, _gf_true, _gf_true, _gf_false);
+ if (ret) {
+ /* Ignore failure as this is a cleanup of half cooked
+ snapshot */
+ gf_msg_debug(this->name, 0, "removing the snap %s failed", name);
+ ret = 0;
+ }
+
+ name = NULL;
+
+ ret = 0;
+
+out:
+
+ return ret;
+}
+
+/* In case of a successful, delete or create operation, during post_validate *
+ * look for missed snap operations and update the missed snap lists */
+int32_t
+glusterd_snapshot_update_snaps_post_validate(dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ int32_t missed_snap_count = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(op_errstr);
+
+ ret = dict_get_int32n(dict, "missed_snap_count", SLEN("missed_snap_count"),
+ &missed_snap_count);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "No missed snaps");
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_add_missed_snaps_to_list(dict, missed_snap_count);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSEDSNAP_INFO_SET_FAIL,
+ "Failed to add missed snaps to list");
+ goto out;
+ }
+
+ ret = glusterd_store_update_missed_snaps();
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSEDSNAP_INFO_SET_FAIL,
+ "Failed to update missed_snaps_list");
+ goto out;
+ }
+
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_take_brick_snapshot_task(void *opaque)
+{
+ int ret = 0;
+ int32_t clone = 0;
+ snap_create_args_t *snap_args = NULL;
+ char *clonename = NULL;
+ char key[64] = "";
+ int keylen;
+
+ GF_ASSERT(opaque);
+
+ snap_args = (snap_create_args_t *)opaque;
+ THIS = snap_args->this;
+
+ /* Try and fetch clonename. If present set status with clonename *
+ * else do so as snap-vol */
+ ret = dict_get_strn(snap_args->dict, "clonename", SLEN("clonename"),
+ &clonename);
+ if (ret) {
+ keylen = snprintf(key, sizeof(key), "snap-vol%d.brick%d.status",
+ snap_args->volcount, snap_args->brickorder);
+ } else {
+ keylen = snprintf(key, sizeof(key), "clone%d.brick%d.status",
+ snap_args->volcount, snap_args->brickorder);
+ clone = 1;
+ }
+
+ ret = glusterd_take_brick_snapshot(
+ snap_args->dict, snap_args->snap_vol, snap_args->brickinfo,
+ snap_args->volcount, snap_args->brickorder, clone);
+
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "Failed to "
+ "take backend snapshot for brick "
+ "%s:%s volume(%s)",
+ snap_args->brickinfo->hostname, snap_args->brickinfo->path,
+ snap_args->snap_vol->volname);
+ }
+
+ if (dict_set_int32n(snap_args->rsp_dict, key, keylen, (ret) ? 0 : 1)) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to "
+ "add %s to dict",
+ key);
+ ret = -1;
+ goto out;
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_take_brick_snapshot_cbk(int ret, call_frame_t *frame, void *opaque)
+{
+ snap_create_args_t *snap_args = NULL;
+ struct syncargs *args = NULL;
+
+ GF_ASSERT(opaque);
+
+ snap_args = (snap_create_args_t *)opaque;
+ args = snap_args->args;
+
+ if (ret)
+ args->op_ret = ret;
+
+ GF_FREE(opaque);
+ synctask_barrier_wake(args);
+ return 0;
+}
+
+int32_t
+glusterd_schedule_brick_snapshot(dict_t *dict, dict_t *rsp_dict,
+ glusterd_snap_t *snap)
+{
+ int ret = -1;
+ int32_t volcount = 0;
+ int32_t brickcount = 0;
+ int32_t brickorder = 0;
+ int32_t taskcount = 0;
+ char key[64] = "";
+ int keylen;
+ xlator_t *this = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ struct syncargs args = {0};
+ snap_create_args_t *snap_args = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(snap);
+
+ ret = synctask_barrier_init((&args));
+ if (ret)
+ goto out;
+ cds_list_for_each_entry(snap_vol, &snap->volumes, vol_list)
+ {
+ volcount++;
+ brickcount = 0;
+ brickorder = 0;
+ cds_list_for_each_entry(brickinfo, &snap_vol->bricks, brick_list)
+ {
+ keylen = snprintf(key, sizeof(key), "snap-vol%d.brick%d.order",
+ volcount, brickcount);
+ ret = dict_set_int32n(rsp_dict, key, keylen, brickorder);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set %s", key);
+ goto out;
+ }
+
+ if ((gf_uuid_compare(brickinfo->uuid, MY_UUID)) ||
+ (brickinfo->snap_status == -1)) {
+ if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
+ brickcount++;
+ keylen = snprintf(key, sizeof(key),
+ "snap-vol%d.brick%d.status", volcount,
+ brickorder);
+ ret = dict_set_int32n(rsp_dict, key, keylen, 0);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "failed to add %s to "
+ "dict",
+ key);
+ goto out;
+ }
+ }
+ brickorder++;
+ continue;
+ }
+
+ snap_args = GF_CALLOC(1, sizeof(*snap_args),
+ gf_gld_mt_snap_create_args_t);
+ if (!snap_args) {
+ ret = -1;
+ goto out;
+ }
+
+ snap_args->this = this;
+ snap_args->dict = dict;
+ snap_args->rsp_dict = rsp_dict;
+ snap_args->snap_vol = snap_vol;
+ snap_args->brickinfo = brickinfo;
+ snap_args->volcount = volcount;
+ snap_args->brickcount = brickcount;
+ snap_args->brickorder = brickorder;
+ snap_args->args = &args;
+
+ ret = synctask_new(
+ this->ctx->env, glusterd_take_brick_snapshot_task,
+ glusterd_take_brick_snapshot_cbk, NULL, snap_args);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "Failed to "
+ "spawn task for snapshot create");
+ GF_FREE(snap_args);
+ goto out;
+ }
+ taskcount++;
+ brickcount++;
+ brickorder++;
+ }
+
+ snprintf(key, sizeof(key), "snap-vol%d_brickcount", volcount);
+ ret = dict_set_int64(rsp_dict, key, brickcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "failed to "
+ "add %s to dict",
+ key);
+ goto out;
+ }
+ }
+ synctask_barrier_wait((&args), taskcount);
+ taskcount = 0;
+
+ if (args.op_ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "Failed to create snapshot");
+
+ ret = args.op_ret;
+out:
+ if (ret && taskcount)
+ synctask_barrier_wait((&args), taskcount);
+
+ return ret;
+}
+
+glusterd_snap_t *
+glusterd_create_snap_object_for_clone(dict_t *dict, dict_t *rsp_dict)
+{
+ char *snapname = NULL;
+ uuid_t *snap_id = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ this = THIS;
+
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp_dict);
+
+ /* Fetch snapname, description, id and time from dict */
+ ret = dict_get_strn(dict, "clonename", SLEN("clonename"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch clonename");
+ goto out;
+ }
+
+ ret = dict_get_bin(dict, "clone-id", (void **)&snap_id);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch clone_id");
+ goto out;
+ }
+
+ snap = glusterd_new_snap_object();
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_OBJ_NEW_FAIL,
+ "Could not create "
+ "the snap object for snap %s",
+ snapname);
+ goto out;
+ }
+
+ gf_strncpy(snap->snapname, snapname, sizeof(snap->snapname));
+ gf_uuid_copy(snap->snap_id, *snap_id);
+
+ ret = 0;
+
+out:
+ if (ret) {
+ snap = NULL;
+ }
+
+ return snap;
+}
+
+int32_t
+glusterd_snapshot_clone_commit(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int ret = -1;
+ int64_t volcount = 0;
+ char *snapname = NULL;
+ char *volname = NULL;
+ char *tmp_name = NULL;
+ xlator_t *this = NULL;
+ glusterd_snap_t *snap_parent = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *origin_vol = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(rsp_dict);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_strn(dict, "clonename", SLEN("clonename"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch clonename");
+ goto out;
+ }
+ tmp_name = gf_strdup(snapname);
+ if (!tmp_name) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr(rsp_dict, "clonename", tmp_name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set clonename in rsp_dict");
+ GF_FREE(tmp_name);
+ goto out;
+ }
+ tmp_name = NULL;
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to get snap name");
+ goto out;
+ }
+
+ snap_parent = glusterd_find_snap_by_name(volname);
+ if (!snap_parent) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND,
+ "Failed to "
+ "fetch snap %s",
+ volname);
+ goto out;
+ }
+
+ /* TODO : As of now there is only one volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ origin_vol = cds_list_entry(snap_parent->volumes.next, glusterd_volinfo_t,
+ vol_list);
+ if (!origin_vol) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get snap "
+ "volinfo %s",
+ snap_parent->snapname);
+ goto out;
+ }
+
+ snap = glusterd_create_snap_object_for_clone(dict, rsp_dict);
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_OBJ_NEW_FAIL,
+ "creating the"
+ "snap object %s failed",
+ snapname);
+ ret = -1;
+ goto out;
+ }
+
+ snap_vol = glusterd_do_snap_vol(origin_vol, snap, dict, rsp_dict, 1, 1);
+ if (!snap_vol) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "taking the "
+ "snapshot of the volume %s failed",
+ volname);
+ goto out;
+ }
+
+ volcount = 1;
+ ret = dict_set_int64(rsp_dict, "volcount", volcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set volcount");
+ goto out;
+ }
+
+ ret = glusterd_schedule_brick_snapshot(dict, rsp_dict, snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_BACKEND_MAKE_FAIL,
+ "Failed to take backend "
+ "snapshot %s",
+ snap->snapname);
+ goto out;
+ }
+
+ cds_list_del_init(&snap_vol->vol_list);
+ ret = dict_set_dynstr_with_alloc(rsp_dict, "snapuuid",
+ uuid_utoa(snap_vol->volume_id));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap "
+ "uuid in response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+ glusterd_list_add_order(&snap_vol->vol_list, &priv->volumes,
+ glusterd_compare_volume_name);
+
+ ret = 0;
+
+out:
+ if (ret) {
+ if (snap)
+ glusterd_snap_remove(rsp_dict, snap, _gf_true, _gf_true, _gf_true);
+ snap = NULL;
+ }
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_snapshot_create_commit(dict_t *dict, char **op_errstr,
+ uint32_t *op_errno, dict_t *rsp_dict)
+{
+ int ret = -1;
+ int64_t i = 0;
+ int64_t volcount = 0;
+ int32_t snap_activate = 0;
+ int32_t flags = 0;
+ char *snapname = NULL;
+ char *volname = NULL;
+ char *tmp_name = NULL;
+ char key[64] = "";
+ int keylen;
+ xlator_t *this = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *origin_vol = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+ GF_ASSERT(rsp_dict);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_int64(dict, "volcount", &volcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to "
+ "get the volume count");
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch snapname");
+ goto out;
+ }
+ tmp_name = gf_strdup(snapname);
+ if (!tmp_name) {
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Out of memory");
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstr(rsp_dict, "snapname", tmp_name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to set snapname in rsp_dict");
+ GF_FREE(tmp_name);
+ goto out;
+ }
+ tmp_name = NULL;
+
+ snap = glusterd_create_snap_object(dict, rsp_dict);
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "creating the"
+ "snap object %s failed",
+ snapname);
+ ret = -1;
+ goto out;
+ }
+
+ for (i = 1; i <= volcount; i++) {
+ keylen = snprintf(key, sizeof(key), "volname%" PRId64, i);
+ ret = dict_get_strn(dict, key, keylen, &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &origin_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "failed to get the volinfo for "
+ "the volume %s",
+ volname);
+ goto out;
+ }
+
+ if (is_origin_glusterd(dict)) {
+ ret = glusterd_is_snap_soft_limit_reached(origin_vol, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_OP_FAILED,
+ "Failed to "
+ "check soft limit exceeded or not, "
+ "for volume %s ",
+ origin_vol->volname);
+ goto out;
+ }
+ }
+
+ snap_vol = glusterd_do_snap_vol(origin_vol, snap, dict, rsp_dict, i, 0);
+ if (!snap_vol) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "taking the "
+ "snapshot of the volume %s failed",
+ volname);
+ goto out;
+ }
+ }
+ ret = dict_set_int64(rsp_dict, "volcount", volcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set volcount");
+ goto out;
+ }
+
+ ret = glusterd_schedule_brick_snapshot(dict, rsp_dict, snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "Failed to take backend "
+ "snapshot %s",
+ snap->snapname);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(rsp_dict, "snapuuid",
+ uuid_utoa(snap->snap_id));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snap "
+ "uuid in response dictionary for %s snapshot",
+ snap->snapname);
+ goto out;
+ }
+
+ snap_activate = dict_get_str_boolean(
+ priv->opts, GLUSTERD_STORE_KEY_SNAP_ACTIVATE, _gf_false);
+ if (!snap_activate) {
+ cds_list_for_each_entry(snap_vol, &snap->volumes, vol_list)
+ {
+ snap_vol->status = GLUSTERD_STATUS_STOPPED;
+ ret = glusterd_store_volinfo(snap_vol,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_SET_FAIL,
+ "Failed to store snap volinfo %s", snap_vol->volname);
+ goto out;
+ }
+ }
+
+ goto out;
+ }
+
+ /* Activate created bricks in case of activate-on-create config. */
+ ret = dict_get_int32n(dict, "flags", SLEN("flags"), &flags);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get flags");
+ goto out;
+ }
+
+ cds_list_for_each_entry(snap_vol, &snap->volumes, vol_list)
+ {
+ ret = glusterd_start_volume(snap_vol, flags, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_ACTIVATE_FAIL,
+ "Failed to activate snap volume %s of the "
+ "snap %s",
+ snap_vol->volname, snap->snapname);
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ if (ret) {
+ if (snap)
+ glusterd_snap_remove(rsp_dict, snap, _gf_true, _gf_true, _gf_false);
+ snap = NULL;
+ }
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+snap_max_hard_limit_set_commit(dict_t *dict, uint64_t value, char *volname,
+ char **op_errstr)
+{
+ char err_str[PATH_MAX] = "";
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int ret = -1;
+ xlator_t *this = NULL;
+ char *next_version = NULL;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+
+ conf = this->private;
+
+ GF_ASSERT(conf);
+
+ /* TODO: Initiate auto deletion when there is a limit change */
+ if (!volname) {
+ /* For system limit */
+ ret = dict_set_uint64(conf->opts,
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to store "
+ "%s in the options",
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT);
+ goto out;
+ }
+
+ ret = glusterd_get_next_global_opt_version_str(conf->opts,
+ &next_version);
+ if (ret)
+ goto out;
+
+ ret = dict_set_strn(conf->opts, GLUSTERD_GLOBAL_OPT_VERSION,
+ SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_options(this, conf->opts);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_FAIL,
+ "Failed to store "
+ "options");
+ goto out;
+ }
+ } else {
+ /* For one volume */
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ snprintf(err_str, PATH_MAX,
+ "Failed to get the"
+ " volinfo for volume %s",
+ volname);
+ goto out;
+ }
+
+ volinfo->snap_max_hard_limit = value;
+
+ ret = glusterd_store_volinfo(volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ snprintf(err_str, PATH_MAX,
+ "Failed to store "
+ "snap-max-hard-limit for volume %s",
+ volname);
+ goto out;
+ }
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ *op_errstr = gf_strdup(err_str);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_OP_FAILED, "%s",
+ err_str);
+ }
+ return ret;
+}
+
+int
+glusterd_snapshot_config_commit(dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ char *volname = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ int config_command = 0;
+ uint64_t hard_limit = 0;
+ uint64_t soft_limit = 0;
+ char *next_version = NULL;
+ char *auto_delete = NULL;
+ char *snap_activate = NULL;
+ gf_boolean_t system_conf = _gf_false;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+
+ conf = this->private;
+
+ GF_ASSERT(conf);
+
+ ret = dict_get_int32n(dict, "config-command", SLEN("config-command"),
+ &config_command);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMAND_NOT_FOUND,
+ "failed to get config-command type");
+ goto out;
+ }
+ if (config_command != GF_SNAP_CONFIG_TYPE_SET) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+
+ /* config values snap-max-hard-limit and snap-max-soft-limit are
+ * optional and hence we are not erroring out if values are not
+ * present
+ */
+ gd_get_snap_conf_values_if_present(dict, &hard_limit, &soft_limit);
+
+ if (hard_limit) {
+ /* Commit ops for snap-max-hard-limit */
+ ret = snap_max_hard_limit_set_commit(dict, hard_limit, volname,
+ op_errstr);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HARD_LIMIT_SET_FAIL,
+ "snap-max-hard-limit set commit failed.");
+ goto out;
+ }
+ }
+
+ if (soft_limit) {
+ /* For system limit */
+ system_conf = _gf_true;
+ ret = dict_set_uint64(
+ conf->opts, GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT, soft_limit);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to save %s in the dictionary",
+ GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT);
+ goto out;
+ }
+ }
+
+ if (hard_limit || soft_limit) {
+ ret = 0;
+ goto done;
+ }
+
+ if (!dict_get_strn(dict, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE,
+ SLEN(GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE),
+ &auto_delete)) {
+ system_conf = _gf_true;
+ ret = dict_set_dynstr_with_alloc(
+ conf->opts, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE, auto_delete);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not "
+ "save auto-delete value in conf->opts");
+ goto out;
+ }
+ } else if (!dict_get_strn(dict, GLUSTERD_STORE_KEY_SNAP_ACTIVATE,
+ SLEN(GLUSTERD_STORE_KEY_SNAP_ACTIVATE),
+ &snap_activate)) {
+ system_conf = _gf_true;
+ ret = dict_set_dynstr_with_alloc(
+ conf->opts, GLUSTERD_STORE_KEY_SNAP_ACTIVATE, snap_activate);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save "
+ "snap-activate-on-create value in conf->opts");
+ goto out;
+ }
+ } else {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid option");
+ goto out;
+ }
+
+done:
+ if (system_conf) {
+ ret = glusterd_get_next_global_opt_version_str(conf->opts,
+ &next_version);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLOBAL_OP_VERSION_GET_FAIL,
+ "Failed to get next global opt-version");
+ goto out;
+ }
+
+ ret = dict_set_strn(conf->opts, GLUSTERD_GLOBAL_OPT_VERSION,
+ SLEN(GLUSTERD_GLOBAL_OPT_VERSION), next_version);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLOBAL_OP_VERSION_SET_FAIL,
+ "Failed to set next global opt-version");
+ goto out;
+ }
+
+ ret = glusterd_store_options(this, conf->opts);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_FAIL,
+ "Failed to store options");
+ goto out;
+ }
+ }
+
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+glusterd_get_brick_lvm_details(dict_t *rsp_dict,
+ glusterd_brickinfo_t *brickinfo, char *volname,
+ char *device, const char *key_prefix)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ runner_t runner = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ char msg[PATH_MAX] = "";
+ char buf[PATH_MAX] = "";
+ char *ptr = NULL;
+ char *token = NULL;
+ char key[160] = ""; /* key_prefix is 128 bytes at most */
+ char *value = NULL;
+
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(brickinfo);
+ GF_ASSERT(volname);
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ device = glusterd_get_brick_mount_device(brickinfo->path);
+ if (!device) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_GET_INFO_FAIL,
+ "Getting device name for "
+ "the brick %s:%s failed",
+ brickinfo->hostname, brickinfo->path);
+ goto out;
+ }
+ runinit(&runner);
+ snprintf(msg, sizeof(msg),
+ "running lvs command, "
+ "for getting snap status");
+ /* Using lvs command fetch the Volume Group name,
+ * Percentage of data filled and Logical Volume size
+ *
+ * "-o" argument is used to get the desired information,
+ * example : "lvs /dev/VolGroup/thin_vol -o vgname,lv_size",
+ * will get us Volume Group name and Logical Volume size.
+ *
+ * Here separator used is ":",
+ * for the above given command with separator ":",
+ * The output will be "vgname:lvsize"
+ */
+ runner_add_args(&runner, LVS, device, "--noheading", "-o",
+ "vg_name,data_percent,lv_size", "--separator", ":", NULL);
+ runner_redir(&runner, STDOUT_FILENO, RUN_PIPE);
+ runner_log(&runner, "", GF_LOG_DEBUG, msg);
+ ret = runner_start(&runner);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_LVS_FAIL,
+ "Could not perform lvs action");
+ goto end;
+ }
+ do {
+ ptr = fgets(buf, sizeof(buf), runner_chio(&runner, STDOUT_FILENO));
+
+ if (ptr == NULL)
+ break;
+ token = strtok(buf, ":");
+ if (token != NULL) {
+ while (token[0] == ' ')
+ token++;
+ value = gf_strdup(token);
+ if (!value) {
+ ret = -1;
+ goto end;
+ }
+ ret = snprintf(key, sizeof(key), "%s.vgname", key_prefix);
+ if (ret < 0) {
+ goto end;
+ }
+
+ ret = dict_set_dynstr(rsp_dict, key, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save vgname ");
+ goto end;
+ }
+ }
+
+ token = strtok(NULL, ":");
+ if (token != NULL) {
+ value = gf_strdup(token);
+ if (!value) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "token=%s", token, NULL);
+ ret = -1;
+ goto end;
+ }
+ ret = snprintf(key, sizeof(key), "%s.data", key_prefix);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL,
+ NULL);
+ goto end;
+ }
+
+ ret = dict_set_dynstr(rsp_dict, key, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save data percent ");
+ goto end;
+ }
+ }
+ token = strtok(NULL, ":");
+ if (token != NULL) {
+ value = gf_strdup(token);
+ if (!value) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "token=%s", token, NULL);
+ ret = -1;
+ goto end;
+ }
+ ret = snprintf(key, sizeof(key), "%s.lvsize", key_prefix);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL,
+ NULL);
+ goto end;
+ }
+
+ ret = dict_set_dynstr(rsp_dict, key, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save meta data percent ");
+ goto end;
+ }
+ }
+
+ } while (ptr != NULL);
+
+ ret = 0;
+
+end:
+ runner_end(&runner);
+
+out:
+ if (ret && value) {
+ GF_FREE(value);
+ }
+
+ if (device)
+ GF_FREE(device);
+
+ return ret;
+}
+
+static int
+glusterd_get_single_brick_status(char **op_errstr, dict_t *rsp_dict,
+ const char *keyprefix, int index,
+ glusterd_volinfo_t *snap_volinfo,
+ glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ char key[128] = ""; /* keyprefix is not longer than 64 bytes */
+ int keylen;
+ char *device = NULL;
+ char *value = NULL;
+ char brick_path[PATH_MAX] = "";
+ char pidfile[PATH_MAX] = "";
+ pid_t pid = -1;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(keyprefix);
+ GF_ASSERT(snap_volinfo);
+ GF_ASSERT(brickinfo);
+
+ keylen = snprintf(key, sizeof(key), "%s.brick%d.path", keyprefix, index);
+ if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf(brick_path, sizeof(brick_path), "%s:%s", brickinfo->hostname,
+ brickinfo->path);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ value = gf_strdup(brick_path);
+ if (!value) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_STRDUP_FAILED,
+ "brick_path=%s", brick_path, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstrn(rsp_dict, key, keylen, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to store "
+ "brick_path %s",
+ brickinfo->path);
+ goto out;
+ }
+
+ if (brickinfo->snap_status == -1) {
+ /* Setting vgname as "Pending Snapshot" */
+ value = gf_strdup("Pending Snapshot");
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "%s.brick%d.vgname", keyprefix,
+ index);
+ ret = dict_set_dynstrn(rsp_dict, key, keylen, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save vgname ");
+ goto out;
+ }
+
+ ret = 0;
+ goto out;
+ }
+ value = NULL;
+
+ keylen = snprintf(key, sizeof(key), "%s.brick%d.status", keyprefix, index);
+ if (keylen < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ if (brickinfo->status == GF_BRICK_STOPPED) {
+ value = gf_strdup("No");
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_strn(rsp_dict, key, keylen, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save brick status");
+ goto out;
+ }
+ value = NULL;
+ } else {
+ value = gf_strdup("Yes");
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_strn(rsp_dict, key, keylen, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save brick status");
+ goto out;
+ }
+ value = NULL;
+
+ GLUSTERD_GET_BRICK_PIDFILE(pidfile, snap_volinfo, brickinfo, priv);
+
+ if (gf_is_service_running(pidfile, &pid)) {
+ keylen = snprintf(key, sizeof(key), "%s.brick%d.pid", keyprefix,
+ index);
+ if (keylen < 0) {
+ ret = -1;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL,
+ NULL);
+ goto out;
+ }
+
+ ret = dict_set_int32n(rsp_dict, key, keylen, pid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save pid %d", pid);
+ goto out;
+ }
+ }
+ }
+
+ keylen = snprintf(key, sizeof(key), "%s.brick%d", keyprefix, index);
+ if (keylen < 0) {
+ ret = -1;
+ goto out;
+ }
+ /* While getting snap status we should show relevant information
+ * for deactivated snaps.
+ */
+ if (snap_volinfo->status == GLUSTERD_STATUS_STOPPED) {
+ /* Setting vgname as "Deactivated Snapshot" */
+ value = gf_strdup("N/A (Deactivated Snapshot)");
+ if (!value) {
+ ret = -1;
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "%s.brick%d.vgname", keyprefix,
+ index);
+ ret = dict_set_dynstrn(rsp_dict, key, keylen, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save vgname ");
+ goto out;
+ }
+
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_get_brick_lvm_details(rsp_dict, brickinfo,
+ snap_volinfo->volname, device, key);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRICK_GET_INFO_FAIL,
+ "Failed to get "
+ "brick LVM details");
+ goto out;
+ }
+out:
+ if (ret && value) {
+ GF_FREE(value);
+ }
+
+ return ret;
+}
+
+static int
+glusterd_get_single_snap_status(char **op_errstr, dict_t *rsp_dict,
+ const char *keyprefix, glusterd_snap_t *snap)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ char key[64] = ""; /* keyprefix is "status.snap0" */
+ int keylen;
+ char brickkey[PATH_MAX] = "";
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int volcount = 0;
+ int brickcount = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(keyprefix);
+ GF_ASSERT(snap);
+
+ cds_list_for_each_entry_safe(snap_volinfo, tmp_volinfo, &snap->volumes,
+ vol_list)
+ {
+ keylen = snprintf(key, sizeof(key), "%s.vol%d", keyprefix, volcount);
+ if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+ cds_list_for_each_entry(brickinfo, &snap_volinfo->bricks, brick_list)
+ {
+ if (!glusterd_is_local_brick(this, snap_volinfo, brickinfo)) {
+ brickcount++;
+ continue;
+ }
+
+ ret = glusterd_get_single_brick_status(
+ op_errstr, rsp_dict, key, brickcount, snap_volinfo, brickinfo);
+
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_STATUS_FAIL,
+ "Getting "
+ "single snap status failed");
+ goto out;
+ }
+ brickcount++;
+ }
+ keylen = snprintf(brickkey, sizeof(brickkey), "%s.brickcount", key);
+ if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ ret = dict_set_int32n(rsp_dict, brickkey, keylen, brickcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save brick count");
+ goto out;
+ }
+ volcount++;
+ }
+
+ keylen = snprintf(key, sizeof(key), "%s.volcount", keyprefix);
+ if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_int32n(rsp_dict, key, keylen, volcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save volcount");
+ goto out;
+ }
+
+out:
+
+ return ret;
+}
+
+static int
+glusterd_get_each_snap_object_status(char **op_errstr, dict_t *rsp_dict,
+ glusterd_snap_t *snap,
+ const char *keyprefix)
+{
+ int ret = -1;
+ char key[32] = ""; /* keyprefix is "status.snap0" */
+ int keylen;
+ char *temp = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(snap);
+ GF_ASSERT(keyprefix);
+
+ /* TODO : Get all the snap volume info present in snap object,
+ * as of now, There will be only one snapvolinfo per snap object
+ */
+ keylen = snprintf(key, sizeof(key), "%s.snapname", keyprefix);
+ if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ temp = gf_strdup(snap->snapname);
+ if (temp == NULL) {
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstrn(rsp_dict, key, keylen, temp);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save "
+ "snap name");
+ goto out;
+ }
+
+ temp = NULL;
+
+ keylen = snprintf(key, sizeof(key), "%s.uuid", keyprefix);
+ if (keylen < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ temp = gf_strdup(uuid_utoa(snap->snap_id));
+ if (temp == NULL) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_set_dynstrn(rsp_dict, key, keylen, temp);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save "
+ "snap UUID");
+ goto out;
+ }
+
+ temp = NULL;
+
+ ret = glusterd_get_single_snap_status(op_errstr, rsp_dict, keyprefix, snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_STATUS_FAIL,
+ "Could not get single snap status");
+ goto out;
+ }
+
+ keylen = snprintf(key, sizeof(key), "%s.volcount", keyprefix);
+ if (keylen < 0) {
+ ret = keylen;
+ goto out;
+ }
+
+ ret = dict_set_int32n(rsp_dict, key, keylen, 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save volcount");
+ goto out;
+ }
+out:
+ if (ret && temp)
+ GF_FREE(temp);
+
+ return ret;
+}
+
+int
+glusterd_get_snap_status_of_volume(char **op_errstr, dict_t *rsp_dict,
+ char *volname, char *keyprefix)
+{
+ int ret = -1;
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ glusterd_volinfo_t *temp_volinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ char key[64] = "";
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int i = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GF_ASSERT(op_errstr);
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(volname);
+ GF_ASSERT(keyprefix);
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Failed to get volinfo of "
+ "volume %s",
+ volname);
+ goto out;
+ }
+
+ cds_list_for_each_entry_safe(snap_volinfo, temp_volinfo,
+ &volinfo->snap_volumes, snapvol_list)
+ {
+ ret = snprintf(key, sizeof(key), "status.snap%d.snapname", i);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(rsp_dict, key,
+ snap_volinfo->snapshot->snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save "
+ "snap name");
+ goto out;
+ }
+
+ i++;
+ }
+
+ ret = dict_set_int32n(rsp_dict, "status.snapcount",
+ SLEN("status.snapcount"), i);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to save snapcount");
+ ret = -1;
+ goto out;
+ }
+out:
+ return ret;
+}
+
+int
+glusterd_get_all_snapshot_status(dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int32_t i = 0;
+ int ret = -1;
+ char key[64] = "";
+ glusterd_conf_t *priv = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_snap_t *tmp_snap = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+
+ cds_list_for_each_entry_safe(snap, tmp_snap, &priv->snapshots, snap_list)
+ {
+ ret = snprintf(key, sizeof(key), "status.snap%d.snapname", i);
+ if (ret < 0) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ goto out;
+ }
+
+ ret = dict_set_dynstr_with_alloc(rsp_dict, key, snap->snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save "
+ "snap name");
+ goto out;
+ }
+
+ i++;
+ }
+
+ ret = dict_set_int32n(rsp_dict, "status.snapcount",
+ SLEN("status.snapcount"), i);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save snapcount");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_status_commit(dict_t *dict, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ xlator_t *this = NULL;
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ int32_t cmd = -1;
+ char *snapname = NULL;
+ glusterd_snap_t *snap = NULL;
+ char *volname = NULL;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(op_errstr);
+
+ conf = this->private;
+
+ GF_ASSERT(conf);
+ ret = dict_get_int32n(dict, "sub-cmd", SLEN("sub-cmd"), &cmd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get status cmd type");
+ goto out;
+ }
+
+ ret = dict_set_int32n(rsp_dict, "sub-cmd", SLEN("sub-cmd"), cmd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Could not save status cmd in rsp dictionary");
+ goto out;
+ }
+ switch (cmd) {
+ case GF_SNAP_STATUS_TYPE_ALL: {
+ ret = glusterd_get_all_snapshot_status(dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_STATUS_FAIL,
+ "Unable to "
+ "get snapshot status");
+ goto out;
+ }
+ break;
+ }
+ case GF_SNAP_STATUS_TYPE_ITER:
+ case GF_SNAP_STATUS_TYPE_SNAP: {
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to "
+ "get snap name");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name(snapname);
+ if (!snap) {
+ ret = gf_asprintf(op_errstr,
+ "Snapshot (%s) "
+ "does not exist",
+ snapname);
+ if (ret < 0) {
+ goto out;
+ }
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Unable to "
+ "get snap volinfo");
+ goto out;
+ }
+ ret = glusterd_get_each_snap_object_status(op_errstr, rsp_dict,
+ snap, "status.snap0");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_STATUS_FAIL,
+ "Unable to "
+ "get status of snap");
+ goto out;
+ }
+
+ ret = dict_set_int32n(rsp_dict, "status.snapcount",
+ SLEN("status.snapcount"), 1);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Unable to "
+ "set snapcount to 1");
+ goto out;
+ }
+ break;
+ }
+ case GF_SNAP_STATUS_TYPE_VOL: {
+ ret = dict_get_strn(dict, "volname", SLEN("volname"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to"
+ " get volume name");
+ goto out;
+ }
+
+ ret = glusterd_get_snap_status_of_volume(op_errstr, rsp_dict,
+ volname, "status.vol0");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_STATUS_FAIL,
+ "Function :"
+ " glusterd_get_snap_status_of_volume "
+ "failed");
+ goto out;
+ }
+ }
+ }
+ ret = 0;
+out:
+ return ret;
+}
+
+int32_t
+glusterd_handle_snap_limit(dict_t *dict, dict_t *rsp_dict)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ uint64_t effective_max_limit = 0;
+ int64_t volcount = 0;
+ int i = 0;
+ char *volname = NULL;
+ char key[64] = "";
+ int keylen;
+ char msg[PATH_MAX] = "";
+ glusterd_volinfo_t *volinfo = NULL;
+ uint64_t limit = 0;
+ int64_t count = 0;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *tmp_volinfo = NULL;
+ uint64_t opt_max_hard = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+ uint64_t opt_max_soft = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp_dict);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_int64(dict, "volcount", &volcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to get the volcount");
+ goto out;
+ }
+
+ for (i = 1; i <= volcount; i++) {
+ keylen = snprintf(key, sizeof(key), "volname%d", i);
+ ret = dict_get_strn(dict, key, keylen, &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to get the "
+ "volname");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "volinfo for %s "
+ "not found",
+ volname);
+ goto out;
+ }
+
+ /* config values snap-max-hard-limit and snap-max-soft-limit are
+ * optional and hence we are not erroring out if values are not
+ * present
+ */
+ gd_get_snap_conf_values_if_present(priv->opts, &opt_max_hard,
+ &opt_max_soft);
+
+ /* The minimum of the 2 limits i.e system wide limit and
+ volume wide limit will be considered
+ */
+ if (volinfo->snap_max_hard_limit < opt_max_hard)
+ effective_max_limit = volinfo->snap_max_hard_limit;
+ else
+ effective_max_limit = opt_max_hard;
+
+ limit = (opt_max_soft * effective_max_limit) / 100;
+
+ count = volinfo->snap_count - limit;
+ if (count <= 0)
+ goto out;
+
+ tmp_volinfo = cds_list_entry(volinfo->snap_volumes.next,
+ glusterd_volinfo_t, snapvol_list);
+ snap = tmp_volinfo->snapshot;
+ GF_ASSERT(snap);
+
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SOFT_LIMIT_REACHED,
+ "Soft-limit "
+ "(value = %" PRIu64
+ ") of volume %s is reached. "
+ "Deleting snapshot %s.",
+ limit, volinfo->volname, snap->snapname);
+
+ snprintf(msg, sizeof(msg),
+ "snapshot_name=%s;"
+ "snapshot_uuid=%s",
+ snap->snapname, uuid_utoa(snap->snap_id));
+
+ LOCK(&snap->lock);
+ {
+ snap->snap_status = GD_SNAP_STATUS_DECOMMISSION;
+ ret = glusterd_store_snap(snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_SNAP_OBJECT_STORE_FAIL,
+ "could "
+ "not store snap object %s",
+ snap->snapname);
+ goto unlock;
+ }
+
+ ret = glusterd_snap_remove(rsp_dict, snap, _gf_true, _gf_true,
+ _gf_false);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "failed to remove snap %s", snap->snapname);
+ }
+ unlock:
+ UNLOCK(&snap->lock);
+ if (is_origin_glusterd(dict) == _gf_true) {
+ if (ret)
+ gf_event(EVENT_SNAPSHOT_DELETE_FAILED, "%s", msg);
+ else
+ gf_event(EVENT_SNAPSHOT_DELETED, "%s", msg);
+ }
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snapshot_clone_postvalidate(dict_t *dict, int32_t op_ret,
+ char **op_errstr, dict_t *rsp_dict)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ int32_t cleanup = 0;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ char *clonename = NULL;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp_dict);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_strn(dict, "clonename", SLEN("clonename"), &clonename);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch "
+ "clonename");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(clonename, &snap_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "unable to find clone "
+ "%s volinfo",
+ clonename);
+ goto out;
+ }
+
+ if (snap_vol)
+ snap = snap_vol->snapshot;
+ else {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND,
+ "Snapshot volume is null");
+ goto out;
+ }
+
+ /* Fetch snap object from snap_vol and delete it all in case of *
+ * a failure, or else, just delete the snap object as it is not *
+ * needed in case of a clone *
+ */
+ if (op_ret) {
+ ret = dict_get_int32n(dict, "cleanup", SLEN("cleanup"), &cleanup);
+ if (!ret && cleanup && snap) {
+ glusterd_snap_remove(rsp_dict, snap, _gf_true, _gf_true, _gf_true);
+ }
+ /* Irrespective of status of cleanup its better
+ * to return from this function. As the functions
+ * following this block is not required to be
+ * executed in case of failure scenario.
+ */
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_snapobject_delete(snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to delete "
+ "snap object %s",
+ snap->snapname);
+ goto out;
+ }
+ snap_vol->snapshot = NULL;
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snapshot_create_postvalidate(dict_t *dict, int32_t op_ret,
+ char **op_errstr, dict_t *rsp_dict)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ int32_t cleanup = 0;
+ glusterd_snap_t *snap = NULL;
+ char *snapname = NULL;
+ char *volname = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ uint64_t opt_hard_max = GLUSTERD_SNAPS_MAX_HARD_LIMIT;
+ uint64_t opt_max_soft = GLUSTERD_SNAPS_DEF_SOFT_LIMIT_PERCENT;
+ int64_t effective_max_limit = 0;
+ int64_t soft_limit = 0;
+ int32_t snap_activate = _gf_false;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp_dict);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ if (op_ret) {
+ ret = dict_get_int32n(dict, "cleanup", SLEN("cleanup"), &cleanup);
+ if (!ret && cleanup) {
+ ret = glusterd_do_snap_cleanup(dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CLEANUP_FAIL,
+ "cleanup "
+ "operation failed");
+ goto out;
+ }
+ }
+ /* Irrespective of status of cleanup its better
+ * to return from this function. As the functions
+ * following this block is not required to be
+ * executed in case of failure scenario.
+ */
+ ret = 0;
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch "
+ "snapname");
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name(snapname);
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND,
+ "unable to find snap "
+ "%s",
+ snapname);
+ goto out;
+ }
+
+ snap->snap_status = GD_SNAP_STATUS_IN_USE;
+ ret = glusterd_store_snap(snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_OBJECT_STORE_FAIL,
+ "Could not store snap"
+ "object %s",
+ snap->snapname);
+ goto out;
+ }
+
+ ret = glusterd_snapshot_update_snaps_post_validate(dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "Failed to "
+ "create snapshot");
+ goto out;
+ }
+
+ /*
+ * If activate_on_create was enabled, and we have reached this *
+ * section of the code, that means, that after successfully *
+ * creating the snapshot, we have also successfully started the *
+ * snapshot bricks on all nodes. So from originator node we can *
+ * send EVENT_SNAPSHOT_ACTIVATED event. *
+ * *
+ * Also check, if hard limit and soft limit is reached in case *
+ * of successfully creating the snapshot, and generate the event *
+ */
+ if (is_origin_glusterd(dict) == _gf_true) {
+ snap_activate = dict_get_str_boolean(
+ priv->opts, GLUSTERD_STORE_KEY_SNAP_ACTIVATE, _gf_false);
+
+ if (snap_activate == _gf_true) {
+ gf_event(EVENT_SNAPSHOT_ACTIVATED,
+ "snapshot_name=%s;"
+ "snapshot_uuid=%s",
+ snap->snapname, uuid_utoa(snap->snap_id));
+ }
+
+ ret = dict_get_strn(dict, "volname1", SLEN("volname1"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get volname.");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Failed to get volinfo.");
+ goto out;
+ }
+
+ /* config values snap-max-hard-limit and snap-max-soft-limit are
+ * optional and hence we are not erroring out if values are not
+ * present
+ */
+ gd_get_snap_conf_values_if_present(priv->opts, &opt_hard_max,
+ &opt_max_soft);
+
+ if (volinfo->snap_max_hard_limit < opt_hard_max)
+ effective_max_limit = volinfo->snap_max_hard_limit;
+ else
+ effective_max_limit = opt_hard_max;
+
+ /*
+ * Check for hard limit. If it is reached after taking *
+ * this snapshot, then generate event for the same. If *
+ * it is not reached, then check for the soft limit, *
+ * and generate event accordingly. *
+ */
+ if (volinfo->snap_count >= effective_max_limit) {
+ gf_event(EVENT_SNAPSHOT_HARD_LIMIT_REACHED,
+ "volume_name=%s;volume_id=%s", volname,
+ uuid_utoa(volinfo->volume_id));
+ } else {
+ soft_limit = (opt_max_soft * effective_max_limit) / 100;
+ if (volinfo->snap_count >= soft_limit) {
+ gf_event(EVENT_SNAPSHOT_SOFT_LIMIT_REACHED,
+ "volume_name=%s;volume_id=%s", volname,
+ uuid_utoa(volinfo->volume_id));
+ }
+ }
+ }
+
+ /* "auto-delete" might not be set by user explicitly,
+ * in that case it's better to consider the default value.
+ * Hence not erroring out if Key is not found.
+ */
+ ret = dict_get_str_boolean(priv->opts, GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE,
+ _gf_false);
+ if (_gf_true == ret) {
+ ret = glusterd_handle_snap_limit(dict, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "failed to remove snap");
+ /* ignore the errors of autodelete */
+ ret = 0;
+ }
+ }
+
+out:
+ return ret;
+}
+
+int32_t
+glusterd_snapshot(dict_t *dict, char **op_errstr, uint32_t *op_errno,
+ dict_t *rsp_dict)
+{
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ int32_t snap_command = 0;
+ char *snap_name = NULL;
+ char temp[PATH_MAX] = "";
+ int ret = -1;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp_dict);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = dict_get_int32n(dict, "type", SLEN("type"), &snap_command);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMAND_NOT_FOUND,
+ "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case (GF_SNAP_OPTION_TYPE_CREATE):
+ ret = glusterd_snapshot_create_commit(dict, op_errstr, op_errno,
+ rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "Failed to "
+ "create snapshot");
+ goto out;
+ }
+ break;
+
+ case (GF_SNAP_OPTION_TYPE_CLONE):
+ ret = glusterd_snapshot_clone_commit(dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CLONE_FAILED,
+ "Failed to "
+ "clone snapshot");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ ret = glusterd_snapshot_config_commit(dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CONFIG_FAIL,
+ "snapshot config failed");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = glusterd_snapshot_remove_commit(dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to "
+ "delete snapshot");
+ if (*op_errstr) {
+ /* If error string is already set
+ * then goto out */
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"),
+ &snap_name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to get snapname");
+ snap_name = "NA";
+ }
+
+ snprintf(temp, sizeof(temp),
+ "Snapshot %s might "
+ "not be in an usable state.",
+ snap_name);
+
+ *op_errstr = gf_strdup(temp);
+ ret = -1;
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_snapshot_restore(dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_RESTORE_FAIL,
+ "Failed to "
+ "restore snapshot");
+ goto out;
+ }
+
+ break;
+ case GF_SNAP_OPTION_TYPE_ACTIVATE:
+ ret = glusterd_snapshot_activate_commit(dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_ACTIVATE_FAIL,
+ "Failed to "
+ "activate snapshot");
+ goto out;
+ }
+
+ break;
+
+ case GF_SNAP_OPTION_TYPE_DEACTIVATE:
+ ret = glusterd_snapshot_deactivate_commit(dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_DEACTIVATE_FAIL,
+ "Failed to "
+ "deactivate snapshot");
+ goto out;
+ }
+
+ break;
+
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ ret = glusterd_snapshot_status_commit(dict, op_errstr, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_STATUS_FAIL,
+ "Failed to "
+ "show snapshot status");
+ goto out;
+ }
+ break;
+
+ default:
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_INVALID_ENTRY,
+ "invalid snap command");
+ goto out;
+ break;
+ }
+
+ ret = 0;
+
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_brickop(dict_t *dict, char **op_errstr, dict_t *rsp_dict)
+{
+ int ret = -1;
+ int64_t vol_count = 0;
+ int64_t count = 1;
+ char key[64] = "";
+ int keylen;
+ char *volname = NULL;
+ int32_t snap_command = 0;
+ xlator_t *this = NULL;
+ char *op_type = NULL;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp_dict);
+
+ ret = dict_get_int32n(dict, "type", SLEN("type"), &snap_command);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMAND_NOT_FOUND,
+ "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+
+ /* op_type with tell us whether its pre-commit operation
+ * or post-commit
+ */
+ ret = dict_get_strn(dict, "operation-type", SLEN("operation-type"),
+ &op_type);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to fetch "
+ "operation type");
+ goto out;
+ }
+
+ if (strcmp(op_type, "pre") == 0) {
+ /* BRICK OP PHASE for enabling barrier, Enable barrier
+ * if its a pre-commit operation
+ */
+ ret = glusterd_set_barrier_value(dict, "enable");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to "
+ "set barrier value as enable in dict");
+ goto out;
+ }
+ } else if (strcmp(op_type, "post") == 0) {
+ /* BRICK OP PHASE for disabling barrier, Disable barrier
+ * if its a post-commit operation
+ */
+ ret = glusterd_set_barrier_value(dict, "disable");
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Failed to "
+ "set barrier value as disable in "
+ "dict");
+ goto out;
+ }
+ } else {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid op_type");
+ goto out;
+ }
+
+ ret = dict_get_int64(dict, "volcount", &vol_count);
+ if (ret)
+ goto out;
+ while (count <= vol_count) {
+ keylen = snprintf(key, sizeof(key), "volname%" PRId64, count);
+ ret = dict_get_strn(dict, key, keylen, &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to get volname");
+ goto out;
+ }
+ ret = dict_set_strn(dict, "volname", SLEN("volname"), volname);
+ if (ret)
+ goto out;
+
+ ret = gd_brick_op_phase(GD_OP_SNAP, NULL, dict, op_errstr);
+ if (ret)
+ goto out;
+ volname = NULL;
+ count++;
+ }
+
+ dict_deln(dict, "volname", SLEN("volname"));
+ ret = 0;
+ break;
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ break;
+ default:
+ break;
+ }
+
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_prevalidate(dict_t *dict, char **op_errstr, dict_t *rsp_dict,
+ uint32_t *op_errno)
+{
+ int snap_command = 0;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp_dict);
+ GF_VALIDATE_OR_GOTO(this->name, op_errno, out);
+
+ ret = dict_get_int32n(dict, "type", SLEN("type"), &snap_command);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMAND_NOT_FOUND,
+ "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case (GF_SNAP_OPTION_TYPE_CREATE):
+ ret = glusterd_snapshot_create_prevalidate(dict, op_errstr,
+ rsp_dict, op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "Snapshot create "
+ "pre-validation failed");
+ goto out;
+ }
+ break;
+
+ case (GF_SNAP_OPTION_TYPE_CLONE):
+ ret = glusterd_snapshot_clone_prevalidate(dict, op_errstr, rsp_dict,
+ op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_CLONE_PREVAL_FAILED,
+ "Snapshot clone "
+ "pre-validation failed");
+ goto out;
+ }
+ break;
+
+ case (GF_SNAP_OPTION_TYPE_CONFIG):
+ ret = glusterd_snapshot_config_prevalidate(dict, op_errstr,
+ op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CONFIG_FAIL,
+ "Snapshot config "
+ "pre-validation failed");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_snapshot_restore_prevalidate(dict, op_errstr,
+ op_errno, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_RESTORE_FAIL,
+ "Snapshot restore "
+ "validation failed");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_ACTIVATE:
+ ret = glusterd_snapshot_activate_deactivate_prevalidate(
+ dict, op_errstr, op_errno, rsp_dict, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_ACTIVATE_FAIL,
+ "Snapshot activate "
+ "validation failed");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_DEACTIVATE:
+ ret = glusterd_snapshot_activate_deactivate_prevalidate(
+ dict, op_errstr, op_errno, rsp_dict, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_DEACTIVATE_FAIL,
+ "Snapshot deactivate validation failed");
+ goto out;
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = glusterd_snapshot_remove_prevalidate(dict, op_errstr,
+ op_errno, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "Snapshot remove "
+ "validation failed");
+ goto out;
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ ret = glusterd_snapshot_status_prevalidate(dict, op_errstr,
+ op_errno, rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_STATUS_FAIL,
+ "Snapshot status "
+ "validation failed");
+ goto out;
+ }
+ break;
+
+ default:
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_COMMAND_NOT_FOUND,
+ "invalid snap command");
+ *op_errno = EINVAL;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function is called to remove the trashpath, in cases
+ * when the restore operation is successful and we don't need
+ * the backup, and incases when the restore op is failed before
+ * commit, and we don't need to revert the backup.
+ *
+ * @param volname name of the volume which is being restored
+ *
+ * @return 0 on success or -1 on failure
+ */
+int
+glusterd_remove_trashpath(char *volname)
+{
+ int ret = -1;
+ char delete_path[PATH_MAX] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+ struct stat stbuf = {
+ 0,
+ };
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+
+ GF_ASSERT(volname);
+
+ len = snprintf(delete_path, sizeof(delete_path),
+ "%s/" GLUSTERD_TRASH "/vols-%s.deleted", priv->workdir,
+ volname);
+ if ((len < 0) || (len >= sizeof(delete_path))) {
+ goto out;
+ }
+
+ ret = sys_lstat(delete_path, &stbuf);
+ if (ret) {
+ /* If the trash dir does not exist, return *
+ * without failure *
+ */
+ if (errno == ENOENT) {
+ ret = 0;
+ goto out;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Failed to lstat "
+ "backup dir (%s)",
+ delete_path);
+ goto out;
+ }
+ }
+
+ /* Delete the backup copy of volume folder */
+ ret = recursive_rmdir(delete_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Failed to remove "
+ "backup dir (%s)",
+ delete_path);
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function is called if snapshot restore operation
+ * is successful. It will cleanup the backup files created
+ * during the restore operation.
+ *
+ * @param rsp_dict Response dictionary
+ * @param volinfo volinfo of the volume which is being restored
+ * @param snap snap object
+ *
+ * @return 0 on success or -1 on failure
+ */
+int
+glusterd_snapshot_restore_cleanup(dict_t *rsp_dict, char *volname,
+ glusterd_snap_t *snap)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(rsp_dict);
+ GF_ASSERT(volname);
+ GF_ASSERT(snap);
+
+ /* Now delete the snap entry. */
+ ret = glusterd_snap_remove(rsp_dict, snap, _gf_false, _gf_true, _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to delete "
+ "snap %s",
+ snap->snapname);
+ goto out;
+ }
+
+ /* Delete the backup copy of volume folder */
+ ret = glusterd_remove_trashpath(volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Failed to remove "
+ "backup dir");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function is called when the snapshot restore operation failed
+ * for some reasons. In such case we revert the restore operation.
+ *
+ * @param volinfo volinfo of the origin volume
+ *
+ * @return 0 on success and -1 on failure
+ */
+int
+glusterd_snapshot_revert_partial_restored_vol(glusterd_volinfo_t *volinfo)
+{
+ int ret = 0;
+ char pathname[PATH_MAX] = "";
+ char trash_path[PATH_MAX] = "";
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *reverted_vol = NULL;
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *tmp_vol = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(volinfo);
+
+ GLUSTERD_GET_VOLUME_DIR(pathname, volinfo, priv);
+
+ len = snprintf(trash_path, sizeof(trash_path),
+ "%s/" GLUSTERD_TRASH "/vols-%s.deleted", priv->workdir,
+ volinfo->volname);
+ if ((len < 0) || (len >= sizeof(trash_path))) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_COPY_FAIL, NULL);
+ ret = -1;
+ goto out;
+ }
+
+ /* Since snapshot restore failed we cannot rely on the volume
+ * data stored under vols folder. Therefore delete the origin
+ * volume's backend folder.*/
+ ret = recursive_rmdir(pathname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Failed to remove "
+ "%s directory",
+ pathname);
+ goto out;
+ }
+
+ /* Now move the backup copy of the vols to its original
+ * location.*/
+ ret = sys_rename(trash_path, pathname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Failed to rename folder "
+ "from %s to %s",
+ trash_path, pathname);
+ goto out;
+ }
+
+ /* Retrieve the volume from the store */
+ reverted_vol = glusterd_store_retrieve_volume(volinfo->volname, NULL);
+ if (NULL == reverted_vol) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED,
+ "Failed to load restored "
+ "%s volume",
+ volinfo->volname);
+ goto out;
+ }
+
+ /* Retrieve the snap_volumes list from the older volinfo */
+ reverted_vol->snap_count = volinfo->snap_count;
+ cds_list_for_each_entry_safe(snap_vol, tmp_vol, &volinfo->snap_volumes,
+ snapvol_list)
+ {
+ cds_list_add_tail(&snap_vol->snapvol_list, &reverted_vol->snap_volumes);
+
+ cds_list_for_each_entry(brickinfo, &snap_vol->bricks, brick_list)
+ {
+ /*
+ * If the brick is not of this peer, or snapshot is *
+ * missed for the brick don't restore the xattr for it *
+ */
+ if ((!gf_uuid_compare(brickinfo->uuid, MY_UUID)) &&
+ (brickinfo->snap_status != -1)) {
+ /*
+ * We need to restore volume id of all snap *
+ * bricks to volume id of the snap volume. *
+ */
+ ret = sys_lsetxattr(brickinfo->path, GF_XATTR_VOL_ID_KEY,
+ snap_vol->volume_id,
+ sizeof(snap_vol->volume_id), XATTR_REPLACE);
+ if (ret == -1) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_SET_XATTR_FAIL,
+ "Attribute=%s, Path=%s, Reason=%s, Snap=%s",
+ GF_XATTR_VOL_ID_KEY, brickinfo->path,
+ strerror(errno), snap_vol->volname, NULL);
+ goto out;
+ }
+ }
+ }
+ }
+
+ /* Since we retrieved the volinfo from store now we don't
+ * want the older volinfo. Therefore delete the older volinfo */
+ glusterd_volinfo_unref(volinfo);
+ ret = 0;
+out:
+ return ret;
+}
+
+/* This function is called when glusterd is started and we need
+ * to revert a failed snapshot restore.
+ *
+ * @param snap snapshot object of the restored snap
+ *
+ * @return 0 on success and -1 on failure
+ */
+int
+glusterd_snapshot_revert_restore_from_snap(glusterd_snap_t *snap)
+{
+ int ret = -1;
+ char volname[PATH_MAX] = "";
+ glusterd_volinfo_t *snap_volinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+ GF_ASSERT(snap);
+
+ /* TODO : As of now there is only one volume in snapshot.
+ * Change this when multiple volume snapshot is introduced
+ */
+ snap_volinfo = cds_list_entry(snap->volumes.next, glusterd_volinfo_t,
+ vol_list);
+
+ gf_strncpy(volname, snap_volinfo->parent_volname, sizeof(volname));
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Could not get volinfo of "
+ "%s",
+ snap_volinfo->parent_volname);
+ goto out;
+ }
+
+ ret = glusterd_snapshot_revert_partial_restored_vol(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESTORE_REVERT_FAIL,
+ "Failed to revert snapshot "
+ "restore operation for %s volume",
+ volname);
+ goto out;
+ }
+out:
+ return ret;
+}
+
+/* This function is called from post-validation. Based on the op_ret
+ * it will take a decision on whether to revert the operation or
+ * perform cleanup.
+ *
+ * @param dict dictionary object
+ * @param op_ret return value of the restore operation
+ * @param op_errstr error string
+ * @param rsp_dict Response dictionary
+ *
+ * @return 0 on success and -1 on failure
+ */
+int
+glusterd_snapshot_restore_postop(dict_t *dict, int32_t op_ret, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int ret = -1;
+ char *name = NULL;
+ char *volname = NULL;
+ int cleanup = 0;
+ glusterd_snap_t *snap = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp_dict);
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "getting the snap "
+ "name failed (volume: %s)",
+ name);
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name(name);
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_NOT_FOUND,
+ "Snapshot (%s) does not exist", name);
+ ret = -1;
+ goto out;
+ }
+
+ /* TODO: fix this when multiple volume support will come */
+ ret = dict_get_strn(dict, "volname1", SLEN("volname1"), &volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "failed to get volume name");
+ goto out;
+ }
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_NOT_FOUND,
+ "Volume (%s) does not exist ", volname);
+ goto out;
+ }
+
+ ret = dict_get_strn(dict, "snapname", SLEN("snapname"), &name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "getting the snap "
+ "name failed (volume: %s)",
+ volinfo->volname);
+ goto out;
+ }
+
+ snap = glusterd_find_snap_by_name(name);
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_SNAP_NOT_FOUND,
+ "snap %s is not found", name);
+ ret = -1;
+ goto out;
+ }
+
+ /* On success perform the cleanup operation */
+ if (0 == op_ret) {
+ ret = glusterd_snapshot_restore_cleanup(rsp_dict, volname, snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CLEANUP_FAIL,
+ "Failed to perform "
+ "snapshot restore cleanup for %s volume",
+ volname);
+ goto out;
+ }
+ } else { /* On failure revert snapshot restore */
+ ret = dict_get_int32n(dict, "cleanup", SLEN("cleanup"), &cleanup);
+ /* Perform cleanup only when required */
+ if (ret || (0 == cleanup)) {
+ /* Delete the backup copy of volume folder */
+ ret = glusterd_remove_trashpath(volinfo->volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Failed to remove backup dir");
+ goto out;
+ }
+ ret = 0;
+ goto out;
+ }
+
+ ret = glusterd_snapshot_revert_partial_restored_vol(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESTORE_REVERT_FAIL,
+ "Failed to revert "
+ "restore operation for %s volume",
+ volname);
+ goto out;
+ }
+
+ snap->snap_status = GD_SNAP_STATUS_IN_USE;
+ /* We need to save this in disk */
+ ret = glusterd_store_snap(snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_OBJECT_STORE_FAIL,
+ "Could not store snap object for %s snap", snap->snapname);
+ goto out;
+ }
+
+ /* After restore fails, we have to remove mount point for
+ * deactivated snaps which was created at start of restore op.
+ */
+ if (volinfo->status == GLUSTERD_STATUS_STOPPED) {
+ ret = glusterd_snap_unmount(this, volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_GLUSTERD_UMOUNT_FAIL,
+ "Failed to unmounts for %s", snap->snapname);
+ }
+ }
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_snapshot_postvalidate(dict_t *dict, int32_t op_ret, char **op_errstr,
+ dict_t *rsp_dict)
+{
+ int snap_command = 0;
+ xlator_t *this = NULL;
+ int ret = -1;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp_dict);
+
+ ret = dict_get_int32n(dict, "type", SLEN("type"), &snap_command);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMAND_NOT_FOUND,
+ "unable to get the type of "
+ "the snapshot command");
+ goto out;
+ }
+
+ switch (snap_command) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ ret = glusterd_snapshot_create_postvalidate(dict, op_ret, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "Snapshot create "
+ "post-validation failed");
+ goto out;
+ }
+ glusterd_fetchsnap_notify(this);
+ break;
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ ret = glusterd_snapshot_clone_postvalidate(dict, op_ret, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_CLONE_POSTVAL_FAILED,
+ "Snapshot create "
+ "post-validation failed");
+ goto out;
+ }
+ glusterd_fetchsnap_notify(this);
+ break;
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ if (op_ret) {
+ gf_msg_debug(this->name, 0,
+ "op_ret = %d. Not performing delete "
+ "post_validate",
+ op_ret);
+ ret = 0;
+ goto out;
+ }
+ ret = glusterd_snapshot_update_snaps_post_validate(dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
+ "Failed to "
+ "update missed snaps list");
+ goto out;
+ }
+ glusterd_fetchsnap_notify(this);
+ break;
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_snapshot_update_snaps_post_validate(dict, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESTORE_FAIL,
+ "Failed to "
+ "update missed snaps list");
+ goto out;
+ }
+
+ ret = glusterd_snapshot_restore_postop(dict, op_ret, op_errstr,
+ rsp_dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESTORE_FAIL,
+ "Failed to "
+ "perform snapshot restore post-op");
+ goto out;
+ }
+ glusterd_fetchsnap_notify(this);
+ break;
+ case GF_SNAP_OPTION_TYPE_ACTIVATE:
+ case GF_SNAP_OPTION_TYPE_DEACTIVATE:
+ glusterd_fetchsnap_notify(this);
+ break;
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ case GF_SNAP_OPTION_TYPE_INFO:
+ case GF_SNAP_OPTION_TYPE_LIST:
+ /*Nothing to be done. But want to
+ * avoid the default case warning*/
+ ret = 0;
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_COMMAND_NOT_FOUND,
+ "invalid snap command");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ return ret;
+}
+
+/*
+ Verify availability of lvm commands
+*/
+
+static gf_boolean_t
+glusterd_is_lvm_cmd_available(char *lvm_cmd)
+{
+ int32_t ret = 0;
+ struct stat buf = {
+ 0,
+ };
+
+ if (!lvm_cmd)
+ return _gf_false;
+
+ ret = sys_stat(lvm_cmd, &buf);
+ if (ret != 0) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "stat fails on %s, exiting. (errno = %d (%s))", lvm_cmd, errno,
+ strerror(errno));
+ return _gf_false;
+ }
+
+ if ((!ret) && (!S_ISREG(buf.st_mode))) {
+ gf_msg(THIS->name, GF_LOG_CRITICAL, EINVAL, GD_MSG_COMMAND_NOT_FOUND,
+ "Provided command %s is not a regular file,"
+ "exiting",
+ lvm_cmd);
+ return _gf_false;
+ }
+
+ if ((!ret) && (!(buf.st_mode & S_IXUSR))) {
+ gf_msg(THIS->name, GF_LOG_CRITICAL, 0, GD_MSG_NO_EXEC_PERMS,
+ "Provided command %s has no exec permissions,"
+ "exiting",
+ lvm_cmd);
+ return _gf_false;
+ }
+
+ return _gf_true;
+}
+
+int
+glusterd_handle_snapshot_fn(rpcsvc_request_t *req)
+{
+ int32_t ret = 0;
+ dict_t *dict = NULL;
+ gf_cli_req cli_req = {
+ {0},
+ };
+ glusterd_op_t cli_op = GD_OP_SNAP;
+ int type = 0;
+ glusterd_conf_t *conf = NULL;
+ char *host_uuid = NULL;
+ char err_str[2048] = "";
+ xlator_t *this = NULL;
+ uint32_t op_errno = 0;
+
+ GF_ASSERT(req);
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ ret = xdr_to_generic(req->msg[0], &cli_req, (xdrproc_t)xdr_gf_cli_req);
+ if (ret < 0) {
+ req->rpc_err = GARBAGE_ARGS;
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_GARBAGE_ARGS, NULL);
+ goto out;
+ }
+
+ if (cli_req.dict.dict_len > 0) {
+ dict = dict_new();
+ if (!dict)
+ goto out;
+
+ ret = dict_unserialize(cli_req.dict.dict_val, cli_req.dict.dict_len,
+ &dict);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_UNSERIALIZE_FAIL,
+ "failed to "
+ "unserialize req-buffer to dictionary");
+ snprintf(err_str, sizeof(err_str),
+ "Unable to decode "
+ "the command");
+ goto out;
+ }
+
+ dict->extra_stdfree = cli_req.dict.dict_val;
+
+ host_uuid = gf_strdup(uuid_utoa(MY_UUID));
+ if (host_uuid == NULL) {
+ snprintf(err_str, sizeof(err_str),
+ "Failed to get "
+ "the uuid of local glusterd");
+ ret = -1;
+ goto out;
+ }
+ ret = dict_set_dynstrn(dict, "host-uuid", SLEN("host-uuid"), host_uuid);
+ if (ret) {
+ GF_FREE(host_uuid);
+ goto out;
+ }
+
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "request dict length is %d", cli_req.dict.dict_len);
+ goto out;
+ }
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ snprintf(err_str, sizeof(err_str),
+ "Cluster operating version"
+ " is lesser than the supported version "
+ "for a snapshot");
+ op_errno = EG_OPNOTSUP;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNSUPPORTED_VERSION,
+ "%s (%d < %d)", err_str, conf->op_version, GD_OP_VERSION_3_6_0);
+ ret = -1;
+ goto out;
+ }
+
+ ret = dict_get_int32n(dict, "type", SLEN("type"), &type);
+ if (ret < 0) {
+ snprintf(err_str, sizeof(err_str), "Command type not found");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMAND_NOT_FOUND, "%s",
+ err_str);
+ goto out;
+ }
+
+ if (!glusterd_is_lvm_cmd_available(LVM_CREATE)) {
+ snprintf(err_str, sizeof(err_str),
+ "LVM commands not found,"
+ " snapshot functionality is disabled");
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_COMMAND_NOT_FOUND, "%s",
+ err_str);
+ ret = -1;
+ goto out;
+ }
+
+ switch (type) {
+ case GF_SNAP_OPTION_TYPE_CREATE:
+ ret = glusterd_handle_snapshot_create(req, cli_op, dict, err_str,
+ sizeof(err_str));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CREATION_FAIL,
+ "Snapshot create failed: %s", err_str);
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_CLONE:
+ ret = glusterd_handle_snapshot_clone(req, cli_op, dict, err_str,
+ sizeof(err_str));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CLONE_FAILED,
+ "Snapshot clone "
+ "failed: %s",
+ err_str);
+ }
+ break;
+
+ case GF_SNAP_OPTION_TYPE_RESTORE:
+ ret = glusterd_handle_snapshot_restore(req, cli_op, dict, err_str,
+ &op_errno, sizeof(err_str));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_RESTORE_FAIL,
+ "Snapshot restore failed: %s", err_str);
+ }
+
+ break;
+ case GF_SNAP_OPTION_TYPE_INFO:
+ ret = glusterd_handle_snapshot_info(req, cli_op, dict, err_str,
+ sizeof(err_str));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_INFO_FAIL,
+ "Snapshot info failed");
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_LIST:
+ ret = glusterd_handle_snapshot_list(req, cli_op, dict, err_str,
+ sizeof(err_str), &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_LIST_GET_FAIL,
+ "Snapshot list failed");
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_CONFIG:
+ ret = glusterd_handle_snapshot_config(req, cli_op, dict, err_str,
+ sizeof(err_str));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_CONFIG_FAIL,
+ "snapshot config failed");
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_DELETE:
+ ret = glusterd_handle_snapshot_delete(req, cli_op, dict, err_str,
+ &op_errno, sizeof(err_str));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "Snapshot delete failed: %s", err_str);
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_ACTIVATE:
+ ret = glusterd_mgmt_v3_initiate_snap_phases(req, cli_op, dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_ACTIVATE_FAIL,
+ "Snapshot activate failed: %s", err_str);
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_DEACTIVATE:
+ ret = glusterd_mgmt_v3_initiate_snap_phases(req, cli_op, dict);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_DEACTIVATE_FAIL,
+ "Snapshot deactivate failed: %s", err_str);
+ }
+ break;
+ case GF_SNAP_OPTION_TYPE_STATUS:
+ ret = glusterd_handle_snapshot_status(req, cli_op, dict, err_str,
+ sizeof(err_str));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_STATUS_FAIL,
+ "Snapshot status failed: %s", err_str);
+ }
+ break;
+ default:
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_COMMAND_NOT_FOUND,
+ "Unknown snapshot request "
+ "type (%d)",
+ type);
+ ret = -1; /* Failure */
+ }
+
+out:
+ if (ret) {
+ if (err_str[0] == '\0')
+ snprintf(err_str, sizeof(err_str), "Operation failed");
+
+ if (ret && (op_errno == 0))
+ op_errno = EG_INTRNL;
+
+ ret = glusterd_op_send_cli_response(cli_op, ret, op_errno, req, dict,
+ err_str);
+ }
+
+ return ret;
+}
+
+int
+glusterd_handle_snapshot(rpcsvc_request_t *req)
+{
+ return glusterd_big_locked_handler(req, glusterd_handle_snapshot_fn);
+}
+
+static void
+glusterd_free_snap_op(glusterd_snap_op_t *snap_op)
+{
+ if (snap_op) {
+ if (snap_op->brick_path)
+ GF_FREE(snap_op->brick_path);
+
+ GF_FREE(snap_op);
+ }
+}
+
+static void
+glusterd_free_missed_snapinfo(glusterd_missed_snap_info *missed_snapinfo)
+{
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ glusterd_snap_op_t *tmp = NULL;
+
+ if (missed_snapinfo) {
+ cds_list_for_each_entry_safe(snap_opinfo, tmp,
+ &missed_snapinfo->snap_ops, snap_ops_list)
+ {
+ glusterd_free_snap_op(snap_opinfo);
+ snap_opinfo = NULL;
+ }
+
+ if (missed_snapinfo->node_uuid)
+ GF_FREE(missed_snapinfo->node_uuid);
+
+ if (missed_snapinfo->snap_uuid)
+ GF_FREE(missed_snapinfo->snap_uuid);
+
+ GF_FREE(missed_snapinfo);
+ }
+}
+
+/* Look for duplicates and accordingly update the list */
+int32_t
+glusterd_update_missed_snap_entry(glusterd_missed_snap_info *missed_snapinfo,
+ glusterd_snap_op_t *missed_snap_op)
+{
+ int32_t ret = -1;
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ gf_boolean_t match = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(missed_snapinfo);
+ GF_ASSERT(missed_snap_op);
+
+ cds_list_for_each_entry(snap_opinfo, &missed_snapinfo->snap_ops,
+ snap_ops_list)
+ {
+ /* If the entry is not for the same snap_vol_id
+ * then continue
+ */
+ if (strcmp(snap_opinfo->snap_vol_id, missed_snap_op->snap_vol_id))
+ continue;
+
+ if ((!strcmp(snap_opinfo->brick_path, missed_snap_op->brick_path)) &&
+ (snap_opinfo->op == missed_snap_op->op)) {
+ /* If two entries have conflicting status
+ * GD_MISSED_SNAP_DONE takes precedence
+ */
+ if ((snap_opinfo->status == GD_MISSED_SNAP_PENDING) &&
+ (missed_snap_op->status == GD_MISSED_SNAP_DONE)) {
+ snap_opinfo->status = GD_MISSED_SNAP_DONE;
+ gf_msg(this->name, GF_LOG_INFO, 0,
+ GD_MSG_MISSED_SNAP_STATUS_DONE,
+ "Updating missed snap status "
+ "for %s:%s=%s:%d:%s:%d as DONE",
+ missed_snapinfo->node_uuid, missed_snapinfo->snap_uuid,
+ snap_opinfo->snap_vol_id, snap_opinfo->brick_num,
+ snap_opinfo->brick_path, snap_opinfo->op);
+ ret = 0;
+ glusterd_free_snap_op(missed_snap_op);
+ goto out;
+ }
+ match = _gf_true;
+ break;
+ } else if ((snap_opinfo->brick_num == missed_snap_op->brick_num) &&
+ (snap_opinfo->op == GF_SNAP_OPTION_TYPE_CREATE) &&
+ ((missed_snap_op->op == GF_SNAP_OPTION_TYPE_DELETE) ||
+ (missed_snap_op->op == GF_SNAP_OPTION_TYPE_RESTORE))) {
+ /* Optimizing create and delete entries for the same
+ * brick and same node
+ */
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_MISSED_SNAP_STATUS_DONE,
+ "Updating missed snap status "
+ "for %s:%s=%s:%d:%s:%d as DONE",
+ missed_snapinfo->node_uuid, missed_snapinfo->snap_uuid,
+ snap_opinfo->snap_vol_id, snap_opinfo->brick_num,
+ snap_opinfo->brick_path, snap_opinfo->op);
+ snap_opinfo->status = GD_MISSED_SNAP_DONE;
+ ret = 0;
+ glusterd_free_snap_op(missed_snap_op);
+ goto out;
+ }
+ }
+
+ if (match == _gf_true) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_DUP_ENTRY,
+ "Duplicate entry. Not updating");
+ glusterd_free_snap_op(missed_snap_op);
+ } else {
+ cds_list_add_tail(&missed_snap_op->snap_ops_list,
+ &missed_snapinfo->snap_ops);
+ }
+
+ ret = 0;
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Add new missed snap entry to the missed_snaps list. */
+int32_t
+glusterd_add_new_entry_to_list(char *missed_info, char *snap_vol_id,
+ int32_t brick_num, char *brick_path,
+ int32_t snap_op, int32_t snap_status)
+{
+ char *buf = NULL;
+ char *save_ptr = NULL;
+ char node_snap_info[PATH_MAX] = "";
+ int32_t ret = -1;
+ glusterd_missed_snap_info *missed_snapinfo = NULL;
+ glusterd_snap_op_t *missed_snap_op = NULL;
+ glusterd_conf_t *priv = NULL;
+ gf_boolean_t match = _gf_false;
+ gf_boolean_t free_missed_snap_info = _gf_false;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(missed_info);
+ GF_ASSERT(snap_vol_id);
+ GF_ASSERT(brick_path);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ /* Create the snap_op object consisting of the *
+ * snap id and the op */
+ ret = glusterd_missed_snap_op_new(&missed_snap_op);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_CREATE_FAIL,
+ "Failed to create new missed snap object.");
+ ret = -1;
+ goto out;
+ }
+
+ missed_snap_op->snap_vol_id = gf_strdup(snap_vol_id);
+ if (!missed_snap_op->snap_vol_id) {
+ ret = -1;
+ goto out;
+ }
+ missed_snap_op->brick_path = gf_strdup(brick_path);
+ if (!missed_snap_op->brick_path) {
+ ret = -1;
+ goto out;
+ }
+ missed_snap_op->brick_num = brick_num;
+ missed_snap_op->op = snap_op;
+ missed_snap_op->status = snap_status;
+
+ /* Look for other entries for the same node and same snap */
+ cds_list_for_each_entry(missed_snapinfo, &priv->missed_snaps_list,
+ missed_snaps)
+ {
+ snprintf(node_snap_info, sizeof(node_snap_info), "%s:%s",
+ missed_snapinfo->node_uuid, missed_snapinfo->snap_uuid);
+ if (!strcmp(node_snap_info, missed_info)) {
+ /* Found missed snapshot info for *
+ * the same node and same snap */
+ match = _gf_true;
+ break;
+ }
+ }
+
+ if (match == _gf_false) {
+ /* First snap op missed for the brick */
+ ret = glusterd_missed_snapinfo_new(&missed_snapinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_CREATE_FAIL,
+ "Failed to create missed snapinfo");
+ goto out;
+ }
+ free_missed_snap_info = _gf_true;
+ buf = strtok_r(missed_info, ":", &save_ptr);
+ if (!buf) {
+ ret = -1;
+ goto out;
+ }
+ missed_snapinfo->node_uuid = gf_strdup(buf);
+ if (!missed_snapinfo->node_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ buf = strtok_r(NULL, ":", &save_ptr);
+ if (!buf) {
+ ret = -1;
+ goto out;
+ }
+ missed_snapinfo->snap_uuid = gf_strdup(buf);
+ if (!missed_snapinfo->snap_uuid) {
+ ret = -1;
+ goto out;
+ }
+
+ cds_list_add_tail(&missed_snap_op->snap_ops_list,
+ &missed_snapinfo->snap_ops);
+ cds_list_add_tail(&missed_snapinfo->missed_snaps,
+ &priv->missed_snaps_list);
+
+ ret = 0;
+ goto out;
+ } else {
+ ret = glusterd_update_missed_snap_entry(missed_snapinfo,
+ missed_snap_op);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_CREATE_FAIL,
+ "Failed to update existing missed snap entry.");
+ goto out;
+ }
+ }
+
+out:
+ if (ret) {
+ glusterd_free_snap_op(missed_snap_op);
+
+ if (missed_snapinfo && (free_missed_snap_info == _gf_true))
+ glusterd_free_missed_snapinfo(missed_snapinfo);
+ }
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* Add missing snap entries to the in-memory conf->missed_snap_list */
+int32_t
+glusterd_add_missed_snaps_to_list(dict_t *dict, int32_t missed_snap_count)
+{
+ char *buf = NULL;
+ char *tmp = NULL;
+ char *save_ptr = NULL;
+ char *nodeid = NULL;
+ char *snap_uuid = NULL;
+ char *snap_vol_id = NULL;
+ char *brick_path = NULL;
+ char missed_info[PATH_MAX] = "";
+ char key[64] = "";
+ int keylen;
+ int32_t i = -1;
+ int32_t ret = -1;
+ int32_t brick_num = -1;
+ int32_t snap_op = -1;
+ int32_t snap_status = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ /* We can update the missed_snaps_list without acquiring *
+ * any additional locks as big lock will be held. */
+ for (i = 0; i < missed_snap_count; i++) {
+ keylen = snprintf(key, sizeof(key), "missed_snaps_%d", i);
+ ret = dict_get_strn(dict, key, keylen, &buf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_GET_FAILED,
+ "Unable to fetch %s", key);
+ goto out;
+ }
+
+ gf_msg_debug(this->name, 0, "missed_snap_entry = %s", buf);
+
+ /* Need to make a duplicate string coz the same dictionary *
+ * is resent to the non-originator nodes */
+ tmp = gf_strdup(buf);
+ if (!tmp) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Fetch the node-id, snap-id, brick_num,
+ * brick_path, snap_op and snap status
+ */
+ nodeid = strtok_r(tmp, ":", &save_ptr);
+ snap_uuid = strtok_r(NULL, "=", &save_ptr);
+ snap_vol_id = strtok_r(NULL, ":", &save_ptr);
+ brick_num = atoi(strtok_r(NULL, ":", &save_ptr));
+ brick_path = strtok_r(NULL, ":", &save_ptr);
+ snap_op = atoi(strtok_r(NULL, ":", &save_ptr));
+ snap_status = atoi(strtok_r(NULL, ":", &save_ptr));
+
+ if (!nodeid || !snap_uuid || !brick_path || !snap_vol_id ||
+ brick_num < 1 || snap_op < 1 || snap_status < 1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_MISSED_SNAP_ENTRY,
+ "Invalid missed_snap_entry");
+ ret = -1;
+ goto out;
+ }
+
+ snprintf(missed_info, sizeof(missed_info), "%s:%s", nodeid, snap_uuid);
+
+ ret = glusterd_add_new_entry_to_list(missed_info, snap_vol_id,
+ brick_num, brick_path, snap_op,
+ snap_status);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
+ "Failed to store missed snaps_list");
+ goto out;
+ }
+
+ GF_FREE(tmp);
+ tmp = NULL;
+ }
+
+ ret = 0;
+out:
+ if (tmp)
+ GF_FREE(tmp);
+
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+/* This function will restore origin volume to it's snap.
+ * The restore operation will simply replace the Gluster origin
+ * volume with the snap volume.
+ * TODO: Multi-volume delete to be done.
+ * Cleanup in case of restore failure is pending.
+ *
+ * @param orig_vol volinfo of origin volume
+ * @param snap_vol volinfo of snapshot volume
+ *
+ * @return 0 on success and negative value on error
+ */
+int
+gd_restore_snap_volume(dict_t *dict, dict_t *rsp_dict,
+ glusterd_volinfo_t *orig_vol,
+ glusterd_volinfo_t *snap_vol, int32_t volcount)
+{
+ int ret = -1;
+ glusterd_volinfo_t *new_volinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_volinfo_t *temp_volinfo = NULL;
+ glusterd_volinfo_t *voliter = NULL;
+ gf_boolean_t conf_present = _gf_false;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(dict);
+ GF_ASSERT(rsp_dict);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GF_VALIDATE_OR_GOTO(this->name, orig_vol, out);
+ GF_VALIDATE_OR_GOTO(this->name, snap_vol, out);
+ snap = snap_vol->snapshot;
+ GF_VALIDATE_OR_GOTO(this->name, snap, out);
+
+ /* Set the status to under restore so that if the
+ * the node goes down during restore and comes back
+ * the state of the volume can be reverted correctly
+ */
+ snap->snap_status = GD_SNAP_STATUS_UNDER_RESTORE;
+
+ /* We need to save this in disk so that if node goes
+ * down the status is in updated state.
+ */
+ ret = glusterd_store_snap(snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED,
+ "Could not store snap "
+ "object for %s snap of %s volume",
+ snap_vol->volname, snap_vol->parent_volname);
+ goto out;
+ }
+
+ /* Snap volume must be stopped before performing the
+ * restore operation.
+ */
+ ret = glusterd_stop_volume(snap_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_STOP_FAILED,
+ "Failed to stop "
+ "snap volume");
+ goto out;
+ }
+
+ /* Create a new volinfo for the restored volume */
+ ret = glusterd_volinfo_dup(snap_vol, &new_volinfo, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED,
+ "Failed to create volinfo");
+ goto out;
+ }
+
+ /* Following entries need to be derived from origin volume. */
+ gf_strncpy(new_volinfo->volname, orig_vol->volname,
+ sizeof(new_volinfo->volname));
+ gf_uuid_copy(new_volinfo->volume_id, orig_vol->volume_id);
+ new_volinfo->snap_count = orig_vol->snap_count;
+ gf_uuid_copy(new_volinfo->restored_from_snap, snap_vol->snapshot->snap_id);
+
+ /* Use the same version as the original version */
+ new_volinfo->version = orig_vol->version;
+
+ /* Copy the snap vol info to the new_volinfo.*/
+ ret = glusterd_snap_volinfo_restore(dict, rsp_dict, new_volinfo, snap_vol,
+ volcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESTORE_FAIL,
+ "Failed to restore snap");
+ goto out;
+ }
+
+ /* In case a new node is added to the peer, after a snapshot was
+ * taken, the geo-rep files are not synced to that node. This
+ * leads to the failure of snapshot restore. Hence, ignoring the
+ * missing geo-rep files in the new node, and proceeding with
+ * snapshot restore. Once the restore is successful, the missing
+ * geo-rep files can be generated with "gluster volume geo-rep
+ * <master-vol> <slave-vol> create push-pem force"
+ */
+ ret = glusterd_restore_geo_rep_files(snap_vol);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_SNAP_RESTORE_FAIL,
+ "Failed to restore "
+ "geo-rep files for snap %s",
+ snap_vol->snapshot->snapname);
+ }
+
+ /* Need not save cksum, as we will copy cksum file in *
+ * this function *
+ */
+ ret = glusterd_copy_quota_files(snap_vol, orig_vol, &conf_present);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESTORE_FAIL,
+ "Failed to restore "
+ "quota files for snap %s",
+ snap_vol->snapshot->snapname);
+ goto out;
+ }
+
+ /* New volinfo always shows the status as created. Therefore
+ * set the status to the original volume's status. */
+ glusterd_set_volume_status(new_volinfo, orig_vol->status);
+
+ cds_list_add_tail(&new_volinfo->vol_list, &conf->volumes);
+
+ ret = glusterd_store_volinfo(new_volinfo,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_OP_FAILED,
+ "Failed to store volinfo");
+ goto out;
+ }
+
+ ret = 0;
+out:
+ if (ret) {
+ /* In case of any failure we should free new_volinfo. Doing
+ * this will also remove the entry we added in conf->volumes
+ * if it was added there.
+ */
+ if (new_volinfo)
+ (void)glusterd_volinfo_delete(new_volinfo);
+ } else {
+ cds_list_for_each_entry_safe(voliter, temp_volinfo,
+ &orig_vol->snap_volumes, snapvol_list)
+ {
+ cds_list_add_tail(&voliter->snapvol_list,
+ &new_volinfo->snap_volumes);
+ }
+ }
+
+ return ret;
+}
+
+int
+glusterd_snapshot_get_volnames_uuids(dict_t *dict, char *volname,
+ gf_getsnap_name_uuid_rsp *snap_info_rsp)
+{
+ int ret = -1;
+ int snapcount = 0;
+ char key[32] = "";
+ glusterd_volinfo_t *snap_vol = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *tmp_vol = NULL;
+ xlator_t *this = NULL;
+ int op_errno = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(volname);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, dict, out, op_errno, EINVAL);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, volname, out, op_errno, EINVAL);
+ GF_VALIDATE_OR_GOTO_WITH_ERROR(this->name, snap_info_rsp, out, op_errno,
+ EINVAL);
+
+ ret = glusterd_volinfo_find(volname, &volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_VOL_NOT_FOUND,
+ "Failed to get volinfo of volume %s", volname);
+ op_errno = EINVAL;
+ goto out;
+ }
+
+ cds_list_for_each_entry_safe(snap_vol, tmp_vol, &volinfo->snap_volumes,
+ snapvol_list)
+ {
+ if (GLUSTERD_STATUS_STARTED != snap_vol->status)
+ continue;
+
+ snapcount++;
+
+ /* Set Snap Name */
+ snprintf(key, sizeof(key), "snapname.%d", snapcount);
+ ret = dict_set_dynstr_with_alloc(dict, key,
+ snap_vol->snapshot->snapname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "snap name in dictionary");
+ goto out;
+ }
+
+ /* Set Snap ID */
+ snprintf(key, sizeof(key), "snap-id.%d", snapcount);
+ ret = dict_set_dynstr_with_alloc(
+ dict, key, uuid_utoa(snap_vol->snapshot->snap_id));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "snap id in dictionary");
+ goto out;
+ }
+
+ /* Snap Volname which is used to activate the snap vol */
+ snprintf(key, sizeof(key), "snap-volname.%d", snapcount);
+ ret = dict_set_dynstr_with_alloc(dict, key, snap_vol->volname);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set "
+ "snap id in dictionary");
+ goto out;
+ }
+ }
+
+ ret = dict_set_int32n(dict, "snap-count", SLEN("snap-count"), snapcount);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Failed to set snapcount");
+ op_errno = -ret;
+ goto out;
+ }
+
+ ret = dict_allocate_and_serialize(dict, &snap_info_rsp->dict.dict_val,
+ &snap_info_rsp->dict.dict_len);
+ if (ret) {
+ op_errno = -ret;
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ snap_info_rsp->op_ret = ret;
+ snap_info_rsp->op_errno = op_errno;
+ snap_info_rsp->op_errstr = "";
+
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-statedump.c b/xlators/mgmt/glusterd/src/glusterd-statedump.c
new file mode 100644
index 00000000000..225d10cc546
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-statedump.c
@@ -0,0 +1,243 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/statedump.h>
+#include "glusterd.h"
+#include "glusterd-shd-svc.h"
+#include "glusterd-quotad-svc.h"
+#include "glusterd-locks.h"
+#include "glusterd-messages.h"
+
+static void
+glusterd_dump_peer(glusterd_peerinfo_t *peerinfo, char *input_key, int index,
+ gf_boolean_t xpeers)
+{
+ char subkey[GF_DUMP_MAX_BUF_LEN + 11] = "";
+ char key[GF_DUMP_MAX_BUF_LEN] = "";
+
+ strncpy(key, input_key, sizeof(key) - 1);
+
+ snprintf(subkey, sizeof(subkey), "%s%d", key, index);
+
+ gf_proc_dump_build_key(key, subkey, "uuid");
+ gf_proc_dump_write(key, "%s", uuid_utoa(peerinfo->uuid));
+
+ gf_proc_dump_build_key(key, subkey, "hostname");
+ gf_proc_dump_write(key, "%s", peerinfo->hostname);
+
+ gf_proc_dump_build_key(key, subkey, "port");
+ gf_proc_dump_write(key, "%d", peerinfo->port);
+
+ gf_proc_dump_build_key(key, subkey, "state");
+ gf_proc_dump_write(key, "%d", peerinfo->state.state);
+
+ gf_proc_dump_build_key(key, subkey, "quorum-action");
+ gf_proc_dump_write(key, "%d", peerinfo->quorum_action);
+
+ gf_proc_dump_build_key(key, subkey, "quorum-contrib");
+ gf_proc_dump_write(key, "%d", peerinfo->quorum_contrib);
+
+ gf_proc_dump_build_key(key, subkey, "detaching");
+ gf_proc_dump_write(key, "%d", peerinfo->detaching);
+
+ gf_proc_dump_build_key(key, subkey, "locked");
+ gf_proc_dump_write(key, "%d", peerinfo->locked);
+}
+
+static void
+glusterd_dump_peer_rpcstat(glusterd_peerinfo_t *peerinfo, char *input_key,
+ int index)
+{
+ rpc_clnt_connection_t *conn = NULL;
+ int ret = -1;
+ rpc_clnt_t *rpc = NULL;
+ char rpcsvc_peername[RPCSVC_PEER_STRLEN] = "";
+ char subkey[GF_DUMP_MAX_BUF_LEN + 11] = "";
+ char key[GF_DUMP_MAX_BUF_LEN] = "";
+
+ strncpy(key, input_key, sizeof(key) - 1);
+
+ /* Dump the rpc connection statistics */
+ rpc = peerinfo->rpc;
+ if (rpc) {
+ conn = &rpc->conn;
+ snprintf(subkey, sizeof(subkey), "%s%d", key, index);
+ ret = rpcsvc_transport_peername(conn->trans, (char *)&rpcsvc_peername,
+ sizeof(rpcsvc_peername));
+ if (!ret) {
+ gf_proc_dump_build_key(key, subkey, "rpc.peername");
+ gf_proc_dump_write(key, "%s", rpcsvc_peername);
+ }
+ gf_proc_dump_build_key(key, subkey, "rpc.connected");
+ gf_proc_dump_write(key, "%d", conn->connected);
+
+ gf_proc_dump_build_key(key, subkey, "rpc.total-bytes-read");
+ gf_proc_dump_write(key, "%" PRIu64, conn->trans->total_bytes_read);
+
+ gf_proc_dump_build_key(key, subkey, "rpc.total-bytes-written");
+ gf_proc_dump_write(key, "%" PRIu64, conn->trans->total_bytes_write);
+
+ gf_proc_dump_build_key(key, subkey, "rpc.ping_msgs_sent");
+ gf_proc_dump_write(key, "%" PRIu64, conn->pingcnt);
+
+ gf_proc_dump_build_key(key, subkey, "rpc.msgs_sent");
+ gf_proc_dump_write(key, "%" PRIu64, conn->msgcnt);
+ }
+}
+
+static void
+glusterd_dump_client_details(glusterd_conf_t *conf)
+{
+ rpc_transport_t *xprt = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = "";
+ char subkey[50] = "";
+ int index = 1;
+
+ pthread_mutex_lock(&conf->xprt_lock);
+ {
+ list_for_each_entry(xprt, &conf->xprt_list, list)
+ {
+ snprintf(subkey, sizeof(subkey), "glusterd.client%d", index);
+
+ gf_proc_dump_build_key(key, subkey, "identifier");
+ gf_proc_dump_write(key, "%s", xprt->peerinfo.identifier);
+
+ gf_proc_dump_build_key(key, subkey, "volname");
+ gf_proc_dump_write(key, "%s", xprt->peerinfo.volname);
+
+ gf_proc_dump_build_key(key, subkey, "max-op-version");
+ gf_proc_dump_write(key, "%u", xprt->peerinfo.max_op_version);
+
+ gf_proc_dump_build_key(key, subkey, "min-op-version");
+ gf_proc_dump_write(key, "%u", xprt->peerinfo.min_op_version);
+ index++;
+ }
+ }
+ pthread_mutex_unlock(&conf->xprt_lock);
+}
+
+/* The following function is just for dumping mgmt_v3_lock dictionary, any other
+ * dict passed to this API will not work */
+
+static void
+glusterd_dict_mgmt_v3_lock_statedump(dict_t *dict)
+{
+ int ret = 0;
+ int dumplen = 0;
+ data_pair_t *trav = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = "";
+ char dump[64 * 1024] = "";
+
+ if (!dict) {
+ gf_msg_callingfn("glusterd", GF_LOG_WARNING, EINVAL, GD_MSG_DICT_EMPTY,
+ "dict NULL");
+ goto out;
+ }
+ for (trav = dict->members_list; trav; trav = trav->next) {
+ if (strstr(trav->key, "debug.last-success-bt") != NULL) {
+ ret = snprintf(&dump[dumplen], sizeof(dump) - dumplen, "\n\t%s:%s",
+ trav->key, trav->value->data);
+ } else {
+ ret = snprintf(
+ &dump[dumplen], sizeof(dump) - dumplen, "\n\t%s:%s", trav->key,
+ uuid_utoa(((glusterd_mgmt_v3_lock_obj *)(trav->value->data))
+ ->lock_owner));
+ }
+ if ((ret == -1) || !ret)
+ return;
+ dumplen += ret;
+ }
+
+ if (dumplen) {
+ gf_proc_dump_build_key(key, "glusterd", "mgmt_v3_lock");
+ gf_proc_dump_write(key, "%s", dump);
+ }
+
+out:
+ return;
+}
+
+int
+glusterd_dump_priv(xlator_t *this)
+{
+ glusterd_conf_t *priv = NULL;
+ char key[GF_DUMP_MAX_BUF_LEN] = "";
+ int port = 0;
+ struct pmap_registry *pmap = NULL;
+
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+
+ priv = this->private;
+ if (!priv)
+ return 0;
+
+ gf_proc_dump_build_key(key, "xlator.glusterd", "priv");
+ gf_proc_dump_add_section("%s", key);
+
+ pthread_mutex_lock(&priv->mutex);
+ {
+ gf_proc_dump_build_key(key, "glusterd", "my-uuid");
+ gf_proc_dump_write(key, "%s", uuid_utoa(priv->uuid));
+
+ gf_proc_dump_build_key(key, "glusterd", "working-directory");
+ gf_proc_dump_write(key, "%s", priv->workdir);
+
+ gf_proc_dump_build_key(key, "glusterd", "max-op-version");
+ gf_proc_dump_write(key, "%d", GD_OP_VERSION_MAX);
+
+ gf_proc_dump_build_key(key, "glusterd", "min-op-version");
+ gf_proc_dump_write(key, "%d", GD_OP_VERSION_MIN);
+
+ gf_proc_dump_build_key(key, "glusterd", "current-op-version");
+ gf_proc_dump_write(key, "%d", priv->op_version);
+
+ gf_proc_dump_build_key(key, "glusterd", "ping-timeout");
+ gf_proc_dump_write(key, "%d", priv->ping_timeout);
+#ifdef BUILD_GNFS
+ gf_proc_dump_build_key(key, "glusterd", "nfs.online");
+ gf_proc_dump_write(key, "%d", priv->nfs_svc.online);
+#endif
+ gf_proc_dump_build_key(key, "glusterd", "quotad.online");
+ gf_proc_dump_write(key, "%d", priv->quotad_svc.online);
+
+ gf_proc_dump_build_key(key, "glusterd", "bitd.online");
+ gf_proc_dump_write(key, "%d", priv->bitd_svc.online);
+
+ gf_proc_dump_build_key(key, "glusterd", "scrub.online");
+ gf_proc_dump_write(key, "%d", priv->scrub_svc.online);
+
+ /* Dump peer details */
+ GLUSTERD_DUMP_PEERS(&priv->peers, uuid_list, _gf_false);
+
+ /* Dump pmap data structure from base port to last alloc */
+ pmap = priv->pmap;
+ if (pmap) {
+ for (port = pmap->base_port; port <= pmap->last_alloc; port++) {
+ gf_proc_dump_build_key(key, "glusterd", "pmap_port");
+ gf_proc_dump_write(key, "%d", port);
+ gf_proc_dump_build_key(key, "glusterd", "pmap[%d].type", port);
+ gf_proc_dump_write(key, "%d", pmap->ports[port].type);
+ gf_proc_dump_build_key(key, "glusterd", "pmap[%d].brickname",
+ port);
+ gf_proc_dump_write(key, "%s", pmap->ports[port].brickname);
+ }
+ }
+ /* Dump client details */
+ glusterd_dump_client_details(priv);
+
+ /* Dump mgmt_v3_lock from the dictionary if any */
+ glusterd_dict_mgmt_v3_lock_statedump(priv->mgmt_v3_lock);
+ dict_dump_to_statedump(priv->opts, "options", "glusterd");
+ }
+ pthread_mutex_unlock(&priv->mutex);
+
+out:
+ return 0;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-statedump.h b/xlators/mgmt/glusterd/src/glusterd-statedump.h
new file mode 100644
index 00000000000..b5ef1f48e82
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-statedump.h
@@ -0,0 +1,18 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_STATEDUMP_H_
+#define _GLUSTERD_STATEDUMP_H_
+
+#include <glusterfs/xlator.h>
+
+int
+glusterd_dump_priv(xlator_t *this);
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.c b/xlators/mgmt/glusterd/src/glusterd-store.c
index 173cf244146..d94dceb10b7 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.c
+++ b/xlators/mgmt/glusterd/src/glusterd-store.c
@@ -1,2642 +1,5125 @@
/*
- Copyright (c) 2007-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
+ Copyright (c) 2007-2013 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
*/
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
#include "glusterd-op-sm.h"
#include <inttypes.h>
-
-#include "globals.h"
-#include "glusterfs.h"
-#include "compat.h"
-#include "dict.h"
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/dict.h>
#include "protocol-common.h"
-#include "xlator.h"
-#include "logging.h"
-#include "timer.h"
-#include "defaults.h"
-#include "compat.h"
-#include "compat-errno.h"
-#include "statedump.h"
+#include <glusterfs/xlator.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/timer.h>
+#include <glusterfs/syscall.h>
+#include <glusterfs/defaults.h>
+#include <glusterfs/compat.h>
+#include <glusterfs/compat-errno.h>
+#include <glusterfs/statedump.h>
#include "glusterd-mem-types.h"
#include "glusterd.h"
#include "glusterd-sm.h"
#include "glusterd-op-sm.h"
#include "glusterd-utils.h"
+#include "glusterd-hooks.h"
+#include <glusterfs/store.h>
#include "glusterd-store.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-messages.h"
#include "rpc-clnt.h"
-#include "common-utils.h"
+#include <glusterfs/common-utils.h>
+#include <glusterfs/quota-common-utils.h>
#include <sys/resource.h>
#include <inttypes.h>
#include <dirent.h>
-static int32_t
-glusterd_store_mkdir (char *path)
-{
- int32_t ret = -1;
-
- ret = mkdir (path, 0777);
-
- if ((-1 == ret) && (EEXIST != errno)) {
- gf_log ("", GF_LOG_ERROR, "mkdir() failed on path %s,"
- "errno: %s", path, strerror (errno));
- } else {
- ret = 0;
- }
-
- return ret;
-}
-
-int32_t
-glusterd_store_handle_create_on_absence (glusterd_store_handle_t **shandle,
- char *path)
-{
- GF_ASSERT (shandle);
- int32_t ret = 0;
-
- if (*shandle == NULL) {
- ret = glusterd_store_handle_new (path, shandle);
+#if defined(GF_LINUX_HOST_OS)
+#include <mntent.h>
+#else
+#include "mntent_compat.h"
+#endif
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to create store"
- " handle for path: %s", path);
- }
- }
- return ret;
-}
+#define GLUSTERD_GET_BRICK_DIR(path, volinfo, priv) \
+ do { \
+ int32_t _brick_len; \
+ if (volinfo->is_snap_volume) { \
+ _brick_len = snprintf(path, PATH_MAX, "%s/snaps/%s/%s/%s", \
+ priv->workdir, volinfo->snapshot->snapname, \
+ volinfo->volname, GLUSTERD_BRICK_INFO_DIR); \
+ } else { \
+ _brick_len = snprintf(path, PATH_MAX, "%s/%s/%s/%s", \
+ priv->workdir, GLUSTERD_VOLUME_DIR_PREFIX, \
+ volinfo->volname, GLUSTERD_BRICK_INFO_DIR); \
+ } \
+ if ((_brick_len < 0) || (_brick_len >= PATH_MAX)) { \
+ path[0] = 0; \
+ } \
+ } while (0)
-int32_t
-glusterd_store_mkstemp (glusterd_store_handle_t *shandle)
+void
+glusterd_replace_slash_with_hyphen(char *str)
{
- int fd = -1;
- char tmppath[PATH_MAX] = {0,};
-
- GF_ASSERT (shandle);
- GF_ASSERT (shandle->path);
+ char *ptr = NULL;
- snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path);
- fd = open (tmppath, O_RDWR | O_CREAT | O_TRUNC, 0600);
- if (fd <= 0) {
- gf_log ("glusterd", GF_LOG_ERROR, "Failed to open %s, "
- "error: %s", tmppath, strerror (errno));
- }
+ ptr = strchr(str, '/');
- return fd;
+ while (ptr) {
+ *ptr = '-';
+ ptr = strchr(ptr, '/');
+ }
}
int32_t
-glusterd_store_rename_tmppath (glusterd_store_handle_t *shandle)
+glusterd_store_create_brick_dir(glusterd_volinfo_t *volinfo)
{
- int32_t ret = -1;
- char tmppath[PATH_MAX] = {0,};
-
- GF_ASSERT (shandle);
- GF_ASSERT (shandle->path);
-
- snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path);
- ret = rename (tmppath, shandle->path);
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "Failed to mv %s to %s, "
- "error: %s", tmppath, shandle->path, strerror (errno));
- }
+ int32_t ret = -1;
+ char brickdirpath[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
- return ret;
-}
+ GF_ASSERT(volinfo);
-int32_t
-glusterd_store_unlink_tmppath (glusterd_store_handle_t *shandle)
-{
- int32_t ret = -1;
- char tmppath[PATH_MAX] = {0,};
-
- GF_ASSERT (shandle);
- GF_ASSERT (shandle->path);
+ priv = THIS->private;
+ GF_ASSERT(priv);
- snprintf (tmppath, sizeof (tmppath), "%s.tmp", shandle->path);
- ret = unlink (tmppath);
- if (ret && (errno != ENOENT)) {
- gf_log ("glusterd", GF_LOG_ERROR, "Failed to mv %s to %s, "
- "error: %s", tmppath, shandle->path, strerror (errno));
- } else {
- ret = 0;
- }
+ GLUSTERD_GET_BRICK_DIR(brickdirpath, volinfo, priv);
+ ret = gf_store_mkdir(brickdirpath);
- return ret;
+ return ret;
}
static void
-glusterd_replace_slash_with_hipen (char *str)
+glusterd_store_key_vol_brick_set(glusterd_brickinfo_t *brickinfo,
+ char *key_vol_brick, size_t len)
{
- char *ptr = NULL;
+ GF_ASSERT(brickinfo);
+ GF_ASSERT(key_vol_brick);
+ GF_ASSERT(len >= PATH_MAX);
- ptr = strchr (str, '/');
-
- while (ptr) {
- *ptr = '-';
- ptr = strchr (str, '/');
- }
+ snprintf(key_vol_brick, len, "%s", brickinfo->path);
+ glusterd_replace_slash_with_hyphen(key_vol_brick);
}
-int32_t
-glusterd_store_create_brick_dir (glusterd_volinfo_t *volinfo)
+static void
+glusterd_store_brickinfofname_set(glusterd_brickinfo_t *brickinfo,
+ char *brickfname, size_t len)
{
- int32_t ret = -1;
- char brickdirpath[PATH_MAX] = {0,};
- glusterd_conf_t *priv = NULL;
-
- GF_ASSERT (volinfo);
+ char key_vol_brick[PATH_MAX] = {0};
- priv = THIS->private;
- GF_ASSERT (priv);
+ GF_ASSERT(brickfname);
+ GF_ASSERT(brickinfo);
+ GF_ASSERT(len >= PATH_MAX);
- GLUSTERD_GET_BRICK_DIR (brickdirpath, volinfo, priv);
- ret = glusterd_store_mkdir (brickdirpath);
-
- return ret;
+ glusterd_store_key_vol_brick_set(brickinfo, key_vol_brick,
+ sizeof(key_vol_brick));
+ snprintf(brickfname, len, "%s:%s", brickinfo->hostname, key_vol_brick);
}
static void
-glusterd_store_key_vol_brick_set (glusterd_brickinfo_t *brickinfo,
- char *key_vol_brick, size_t len)
+glusterd_store_brickinfopath_set(glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo,
+ char *brickpath, size_t len)
{
- GF_ASSERT (brickinfo);
- GF_ASSERT (key_vol_brick);
- GF_ASSERT (len >= PATH_MAX);
-
- snprintf (key_vol_brick, len, "%s", brickinfo->path);
- glusterd_replace_slash_with_hipen (key_vol_brick);
+ char brickfname[PATH_MAX] = {0};
+ char brickdirpath[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+
+ GF_ASSERT(brickpath);
+ GF_ASSERT(brickinfo);
+ GF_ASSERT(len >= PATH_MAX);
+
+ priv = THIS->private;
+ GF_ASSERT(priv);
+
+ GLUSTERD_GET_BRICK_DIR(brickdirpath, volinfo, priv);
+ glusterd_store_brickinfofname_set(brickinfo, brickfname,
+ sizeof(brickfname));
+ snprintf(brickpath, len, "%s/%s", brickdirpath, brickfname);
}
static void
-glusterd_store_brickinfofname_set (glusterd_brickinfo_t *brickinfo,
- char *brickfname, size_t len)
+glusterd_store_snapd_path_set(glusterd_volinfo_t *volinfo, char *snapd_path,
+ size_t len)
{
- char key_vol_brick[PATH_MAX] = {0};
+ char volpath[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
- GF_ASSERT (brickfname);
- GF_ASSERT (brickinfo);
- GF_ASSERT (len >= PATH_MAX);
-
- glusterd_store_key_vol_brick_set (brickinfo, key_vol_brick,
- sizeof (key_vol_brick));
- snprintf (brickfname, len, "%s:%s", brickinfo->hostname, key_vol_brick);
-}
-
-static void
-glusterd_store_brickinfopath_set (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *brickinfo,
- char *brickpath, size_t len)
-{
- char brickfname[PATH_MAX] = {0};
- char brickdirpath[PATH_MAX] = {0,};
- glusterd_conf_t *priv = NULL;
+ GF_ASSERT(volinfo);
+ GF_ASSERT(len >= PATH_MAX);
- GF_ASSERT (brickpath);
- GF_ASSERT (brickinfo);
- GF_ASSERT (len >= PATH_MAX);
+ priv = THIS->private;
+ GF_ASSERT(priv);
- priv = THIS->private;
- GF_ASSERT (priv);
+ GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv);
- GLUSTERD_GET_BRICK_DIR (brickdirpath, volinfo, priv);
- glusterd_store_brickinfofname_set (brickinfo, brickfname,
- sizeof (brickfname));
- snprintf (brickpath, len, "%s/%s", brickdirpath, brickfname);
+ snprintf(snapd_path, len, "%s/snapd.info", volpath);
}
gf_boolean_t
-glusterd_store_is_valid_brickpath (char *volname, char *brick)
+glusterd_store_is_valid_brickpath(char *volname, char *brick)
{
- char brickpath[PATH_MAX] = {0};
- glusterd_brickinfo_t *brickinfo = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- int32_t ret = 0;
-
- ret = glusterd_brickinfo_from_brick (brick, &brickinfo);
- if (ret) {
- gf_log ("", GF_LOG_WARNING, "brick path validation failed");
- ret = 0;
- goto out;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ int32_t ret = 0;
+ size_t volname_len = strlen(volname);
+ xlator_t *this = NULL;
+ int bpath_len = 0;
+ const char delim[2] = "/";
+ char *sub_dir = NULL;
+ char *saveptr = NULL;
+ char *brickpath_ptr = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = glusterd_brickinfo_new_from_brick(brick, &brickinfo, _gf_false, NULL);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_BRICK_CREATION_FAIL,
+ "Failed to create brick "
+ "info for brick %s",
+ brick);
+ ret = 0;
+ goto out;
+ }
+ ret = glusterd_volinfo_new(&volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_VOLFILE_CREATE_FAIL,
+ "Failed to create volinfo");
+ ret = 0;
+ goto out;
+ }
+ if (volname_len >= sizeof(volinfo->volname)) {
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_NAME_TOO_LONG,
+ "volume name too long");
+ ret = 0;
+ goto out;
+ }
+ memcpy(volinfo->volname, volname, volname_len + 1);
+
+ /* Check whether brickpath is less than PATH_MAX */
+ ret = 1;
+ bpath_len = strlen(brickinfo->path);
+
+ if (brickinfo->path[bpath_len - 1] != '/') {
+ if (bpath_len >= PATH_MAX) {
+ ret = 0;
+ goto out;
}
- ret = glusterd_volinfo_new (&volinfo);
- if (ret) {
- gf_log ("", GF_LOG_WARNING, "brick path validation failed");
- ret = 0;
- goto out;
+ } else {
+ /* Path has a trailing "/" which should not be considered in
+ * length check validation
+ */
+ if (bpath_len >= PATH_MAX + 1) {
+ ret = 0;
+ goto out;
}
- strncpy (volinfo->volname, volname, sizeof (volinfo->volname));
- glusterd_store_brickinfopath_set (volinfo, brickinfo, brickpath,
- sizeof (brickpath));
+ }
- ret = (strlen (brickpath) < _POSIX_PATH_MAX);
+ /* The following validation checks whether each sub directories in the
+ * brick path meets the POSIX max length validation
+ */
+
+ brickpath_ptr = brickinfo->path;
+ sub_dir = strtok_r(brickpath_ptr, delim, &saveptr);
+
+ while (sub_dir != NULL) {
+ if (strlen(sub_dir) >= _POSIX_PATH_MAX) {
+ ret = 0;
+ goto out;
+ }
+ sub_dir = strtok_r(NULL, delim, &saveptr);
+ }
out:
- if (brickinfo)
- glusterd_brickinfo_delete (brickinfo);
- if (volinfo)
- glusterd_volinfo_delete (volinfo);
+ if (brickinfo)
+ glusterd_brickinfo_delete(brickinfo);
+ if (volinfo)
+ glusterd_volinfo_unref(volinfo);
- return ret;
+ return ret;
}
int32_t
-glusterd_store_volinfo_brick_fname_write (int vol_fd,
+glusterd_store_volinfo_brick_fname_write(int vol_fd,
glusterd_brickinfo_t *brickinfo,
- int32_t brick_count)
+ int32_t brick_count,
+ int is_thin_arbiter)
{
- char key[PATH_MAX] = {0,};
- char brickfname[PATH_MAX] = {0,};
- int32_t ret = -1;
-
- snprintf (key, sizeof (key), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK,
- brick_count);
- glusterd_store_brickinfofname_set (brickinfo, brickfname,
- sizeof (brickfname));
- ret = glusterd_store_save_value (vol_fd, key, brickfname);
- return ret;
+ char key[64] = {
+ 0,
+ };
+ char brickfname[PATH_MAX] = {
+ 0,
+ };
+ int32_t ret = -1;
+
+ if (!is_thin_arbiter) {
+ snprintf(key, sizeof(key), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK,
+ brick_count);
+ } else {
+ snprintf(key, sizeof(key), "%s-%d", GLUSTERD_STORE_KEY_VOL_TA_BRICK,
+ brick_count);
+ }
+ glusterd_store_brickinfofname_set(brickinfo, brickfname,
+ sizeof(brickfname));
+ ret = gf_store_save_value(vol_fd, key, brickfname);
+ return ret;
}
int32_t
-glusterd_store_create_brick_shandle_on_absence (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *brickinfo)
+glusterd_store_create_brick_shandle_on_absence(glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo)
{
- char brickpath[PATH_MAX] = {0,};
- int32_t ret = 0;
-
- GF_ASSERT (volinfo);
- GF_ASSERT (brickinfo);
-
- glusterd_store_brickinfopath_set (volinfo, brickinfo, brickpath,
- sizeof (brickpath));
- ret = glusterd_store_handle_create_on_absence (&brickinfo->shandle,
- brickpath);
- return ret;
+ char brickpath[PATH_MAX] = {
+ 0,
+ };
+ int32_t ret = 0;
+
+ GF_ASSERT(volinfo);
+ GF_ASSERT(brickinfo);
+
+ glusterd_store_brickinfopath_set(volinfo, brickinfo, brickpath,
+ sizeof(brickpath));
+ ret = gf_store_handle_create_on_absence(&brickinfo->shandle, brickpath);
+ return ret;
}
int32_t
-glusterd_store_brickinfo_write (int fd, glusterd_brickinfo_t *brickinfo)
+glusterd_store_create_snapd_shandle_on_absence(glusterd_volinfo_t *volinfo)
{
- char value[256] = {0,};
- int32_t ret = 0;
+ char snapd_path[PATH_MAX] = {
+ 0,
+ };
+ int32_t ret = 0;
- GF_ASSERT (brickinfo);
- GF_ASSERT (fd > 0);
-
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_HOSTNAME,
- brickinfo->hostname);
- if (ret)
- goto out;
+ GF_ASSERT(volinfo);
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_PATH,
- brickinfo->path);
- if (ret)
- goto out;
+ glusterd_store_snapd_path_set(volinfo, snapd_path, sizeof(snapd_path));
+ ret = gf_store_handle_create_on_absence(&volinfo->snapd.handle, snapd_path);
+ return ret;
+}
- snprintf (value, sizeof(value), "%d", brickinfo->port);
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_PORT,
- value);
+/* Store the bricks snapshot details only if required
+ *
+ * The snapshot details will be stored only if the cluster op-version is
+ * greater than or equal to 4
+ */
+static int
+gd_store_brick_snap_details_write(int fd, glusterd_brickinfo_t *brickinfo)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char value[5 * PATH_MAX];
+ uint total_len = 0;
- snprintf (value, sizeof(value), "%d", brickinfo->rdma_port);
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT,
- value);
+ this = THIS;
+ GF_ASSERT(this != NULL);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out);
- snprintf (value, sizeof(value), "%d", brickinfo->decommissioned);
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED,
- value);
+ GF_VALIDATE_OR_GOTO(this->name, (fd > 0), out);
+ GF_VALIDATE_OR_GOTO(this->name, (brickinfo != NULL), out);
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ if (brickinfo->device_path[0] != '\0') {
+ ret = snprintf(value + total_len, sizeof(value) - total_len, "%s=%s\n",
+ GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH,
+ brickinfo->device_path);
+ if (ret < 0 || ret >= sizeof(value) - total_len) {
+ ret = -1;
+ goto err;
+ }
+ total_len += ret;
+ }
+
+ if (brickinfo->mount_dir[0] != '\0') {
+ ret = snprintf(value + total_len, sizeof(value) - total_len, "%s=%s\n",
+ GLUSTERD_STORE_KEY_BRICK_MOUNT_DIR,
+ brickinfo->mount_dir);
+ if (ret < 0 || ret >= sizeof(value) - total_len) {
+ ret = -1;
+ goto err;
+ }
+ total_len += ret;
+ }
+
+ if (brickinfo->fstype[0] != '\0') {
+ ret = snprintf(value + total_len, sizeof(value) - total_len, "%s=%s\n",
+ GLUSTERD_STORE_KEY_BRICK_FSTYPE, brickinfo->fstype);
+ if (ret < 0 || ret >= sizeof(value) - total_len) {
+ ret = -1;
+ goto err;
+ }
+ total_len += ret;
+ }
+
+ if (brickinfo->mnt_opts[0] != '\0') {
+ ret = snprintf(value + total_len, sizeof(value) - total_len, "%s=%s\n",
+ GLUSTERD_STORE_KEY_BRICK_MNTOPTS, brickinfo->mnt_opts);
+ if (ret < 0 || ret >= sizeof(value) - total_len) {
+ ret = -1;
+ goto err;
+ }
+ total_len += ret;
+ }
+
+ ret = snprintf(value + total_len, sizeof(value) - total_len, "%s=%d\n",
+ GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS,
+ brickinfo->snap_status);
+ if (ret < 0 || ret >= sizeof(value) - total_len) {
+ ret = -1;
+ goto err;
+ }
+ total_len += ret;
+
+ ret = snprintf(value + total_len, sizeof(value) - total_len,
+ "%s=%" PRIu64 "\n", GLUSTERD_STORE_KEY_BRICK_FSID,
+ brickinfo->statfs_fsid);
+ if (ret < 0 || ret >= sizeof(value) - total_len) {
+ ret = -1;
+ goto err;
+ }
+
+ ret = gf_store_save_items(fd, value);
+err:
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_FS_LABEL_UPDATE_FAIL,
+ "Failed to save "
+ "snap detils of brick %s",
+ brickinfo->path);
+ }
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
}
-int32_t
-glusterd_store_perform_brick_store (glusterd_brickinfo_t *brickinfo)
+static int32_t
+glusterd_store_brickinfo_write(int fd, glusterd_brickinfo_t *brickinfo)
{
- int fd = -1;
- int32_t ret = -1;
- GF_ASSERT (brickinfo);
-
- fd = glusterd_store_mkstemp (brickinfo->shandle);
- if (fd <= 0) {
- ret = -1;
- goto out;
- }
-
- ret = glusterd_store_brickinfo_write (fd, brickinfo);
- if (ret)
- goto out;
-
- ret = glusterd_store_rename_tmppath (brickinfo->shandle);
+ char value[5 * PATH_MAX];
+ int32_t ret = -1;
+
+ GF_ASSERT(brickinfo);
+ GF_ASSERT(fd > 0);
+
+ ret = snprintf(value, sizeof(value),
+ "%s=%s\n%s=%s\n%s=%s\n%s=%s\n%s=%d\n%s=%d\n%s=%d\n%s=%s\n",
+ GLUSTERD_STORE_KEY_BRICK_UUID, uuid_utoa(brickinfo->uuid),
+ GLUSTERD_STORE_KEY_BRICK_HOSTNAME, brickinfo->hostname,
+ GLUSTERD_STORE_KEY_BRICK_PATH, brickinfo->path,
+ GLUSTERD_STORE_KEY_BRICK_REAL_PATH, brickinfo->path,
+ GLUSTERD_STORE_KEY_BRICK_PORT, brickinfo->port,
+ GLUSTERD_STORE_KEY_BRICK_RDMA_PORT, brickinfo->rdma_port,
+ GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED,
+ brickinfo->decommissioned, GLUSTERD_STORE_KEY_BRICK_ID,
+ brickinfo->brick_id);
+
+ if (ret < 0 || ret >= sizeof(value)) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_store_save_items(fd, value);
+ if (ret)
+ goto out;
+
+ ret = gd_store_brick_snap_details_write(fd, brickinfo);
+ if (ret)
+ goto out;
+
+ if (!brickinfo->vg[0])
+ goto out;
+
+ ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_BRICK_VGNAME,
+ brickinfo->vg);
out:
- if (ret && (fd > 0))
- glusterd_store_unlink_tmppath (brickinfo->shandle);
- if (fd > 0)
- close (fd);
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
}
int32_t
-glusterd_store_brickinfo (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *brickinfo, int32_t brick_count,
- int vol_fd)
+glusterd_store_snapd_write(int fd, glusterd_volinfo_t *volinfo)
{
- int32_t ret = -1;
-
- GF_ASSERT (volinfo);
- GF_ASSERT (brickinfo);
-
- ret = glusterd_store_volinfo_brick_fname_write (vol_fd, brickinfo,
- brick_count);
- if (ret)
- goto out;
-
- ret = glusterd_store_create_brick_dir (volinfo);
- if (ret)
- goto out;
+ char value[64] = {
+ 0,
+ };
+ int32_t ret = 0;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(volinfo);
+ GF_ASSERT(fd > 0);
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ snprintf(value, sizeof(value), "%d", volinfo->snapd.port);
+ ret = gf_store_save_value(fd, GLUSTERD_STORE_KEY_SNAPD_PORT, value);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_PORT_STORE_FAIL,
+ "failed to store the snapd "
+ "port of volume %s",
+ volinfo->volname);
+
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
- ret = glusterd_store_create_brick_shandle_on_absence (volinfo,
- brickinfo);
- if (ret)
- goto out;
+static int32_t
+glusterd_store_perform_brick_store(glusterd_brickinfo_t *brickinfo)
+{
+ int fd = -1;
+ int32_t ret = -1;
+ GF_ASSERT(brickinfo);
+
+ fd = gf_store_mkstemp(brickinfo->shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
+ ret = glusterd_store_brickinfo_write(fd, brickinfo);
+ if (ret)
+ goto out;
- ret = glusterd_store_perform_brick_store (brickinfo);
out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
- return ret;
+ if (ret && (fd > 0)) {
+ gf_store_unlink_tmppath(brickinfo->shandle);
+ }
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
}
int32_t
-glusterd_store_delete_brick (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *brickinfo)
+glusterd_store_perform_snapd_store(glusterd_volinfo_t *volinfo)
{
- int32_t ret = -1;
- glusterd_conf_t *priv = NULL;
- char path[PATH_MAX] = {0,};
- char brickpath[PATH_MAX] = {0,};
- char *ptr = NULL;
- char *tmppath = NULL;
-
- GF_ASSERT (volinfo);
- GF_ASSERT (brickinfo);
-
- priv = THIS->private;
+ int fd = -1;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(volinfo);
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ fd = gf_store_mkstemp(volinfo->snapd.handle);
+ if (fd <= 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "failed to create the "
+ "temporary file for the snapd store handle of volume "
+ "%s",
+ volinfo->volname);
+ goto out;
+ }
+
+ ret = glusterd_store_snapd_write(fd, volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_PORT_STORE_FAIL,
+ "failed to write snapd port "
+ "info to store handle (volume: %s",
+ volinfo->volname);
+ goto out;
+ }
+
+ ret = gf_store_rename_tmppath(volinfo->snapd.handle);
- GF_ASSERT (priv);
-
- GLUSTERD_GET_BRICK_DIR (path, volinfo, priv);
-
- tmppath = gf_strdup (brickinfo->path);
+out:
+ if (ret && (fd > 0))
+ gf_store_unlink_tmppath(volinfo->snapd.handle);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
+}
- ptr = strchr (tmppath, '/');
+static int32_t
+glusterd_store_brickinfo(glusterd_volinfo_t *volinfo,
+ glusterd_brickinfo_t *brickinfo, int32_t brick_count,
+ int vol_fd, int is_thin_arbiter)
+{
+ int32_t ret = -1;
- while (ptr) {
- *ptr = '-';
- ptr = strchr (tmppath, '/');
- }
+ GF_ASSERT(volinfo);
+ GF_ASSERT(brickinfo);
- snprintf (brickpath, sizeof (brickpath), "%s/%s:%s",
- path, brickinfo->hostname, tmppath);
+ ret = glusterd_store_volinfo_brick_fname_write(
+ vol_fd, brickinfo, brick_count, is_thin_arbiter);
+ if (ret)
+ goto out;
- GF_FREE (tmppath);
+ ret = glusterd_store_create_brick_shandle_on_absence(volinfo, brickinfo);
+ if (ret)
+ goto out;
- ret = unlink (brickpath);
+ ret = glusterd_store_perform_brick_store(brickinfo);
+out:
+ gf_msg_debug(THIS->name, 0, "Returning with %d", ret);
+ return ret;
+}
- if ((ret < 0) && (errno != ENOENT)) {
- gf_log ("", GF_LOG_ERROR, "Unlink failed on %s, reason: %s",
- brickpath, strerror(errno));
- ret = -1;
- goto out;
- } else {
- ret = 0;
- }
+int32_t
+glusterd_store_snapd_info(glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ GF_ASSERT(volinfo);
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ ret = glusterd_store_create_snapd_shandle_on_absence(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_CREATE_FAIL,
+ "failed to create store "
+ "handle for snapd (volume: %s)",
+ volinfo->volname);
+ goto out;
+ }
+
+ ret = glusterd_store_perform_snapd_store(volinfo);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPD_INFO_STORE_FAIL,
+ "failed to store snapd info "
+ "of the volume %s",
+ volinfo->volname);
out:
- if (brickinfo->shandle) {
- glusterd_store_handle_destroy (brickinfo->shandle);
- brickinfo->shandle = NULL;
- }
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
- return ret;
+ if (ret)
+ gf_store_unlink_tmppath(volinfo->snapd.handle);
+
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
+ return ret;
}
int32_t
-glusterd_store_remove_bricks (glusterd_volinfo_t *volinfo)
+glusterd_store_delete_brick(glusterd_brickinfo_t *brickinfo, char *delete_path)
{
- int32_t ret = 0;
- glusterd_brickinfo_t *tmp = NULL;
- glusterd_conf_t *priv = NULL;
- char brickdir [PATH_MAX] = {0,};
- DIR *dir = NULL;
- struct dirent *entry = NULL;
- char path[PATH_MAX] = {0,};
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ char brickpath[PATH_MAX] = {
+ 0,
+ };
+ char *ptr = NULL;
+ char *tmppath = NULL;
+ xlator_t *this = NULL;
- GF_ASSERT (volinfo);
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(brickinfo);
- list_for_each_entry (tmp, &volinfo->bricks, brick_list) {
- ret = glusterd_store_delete_brick (volinfo, tmp);
- if (ret)
- goto out;
- }
+ priv = this->private;
+ GF_ASSERT(priv);
- priv = THIS->private;
- GF_ASSERT (priv);
+ tmppath = gf_strdup(brickinfo->path);
- GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv);
+ ptr = strchr(tmppath, '/');
- dir = opendir (brickdir);
+ while (ptr) {
+ *ptr = '-';
+ ptr = strchr(tmppath, '/');
+ }
- glusterd_for_each_entry (entry, dir);
+ snprintf(brickpath, sizeof(brickpath),
+ "%s/" GLUSTERD_BRICK_INFO_DIR "/%s:%s", delete_path,
+ brickinfo->hostname, tmppath);
- while (entry) {
- snprintf (path, sizeof (path), "%s/%s",
- brickdir, entry->d_name);
- ret = unlink (path);
- if (ret && errno != ENOENT) {
- gf_log ("", GF_LOG_ERROR, "Unable to unlink %s, "
- "reason: %s", path, strerror(errno));
- }
- glusterd_for_each_entry (entry, dir);
- }
+ GF_FREE(tmppath);
- closedir (dir);
+ ret = sys_unlink(brickpath);
- ret = rmdir (brickdir);
+ if ((ret < 0) && (errno != ENOENT)) {
+ gf_msg_debug(this->name, 0, "Unlink failed on %s", brickpath);
+ ret = -1;
+ goto out;
+ } else {
+ ret = 0;
+ }
out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
- return ret;
+ if (brickinfo->shandle) {
+ gf_store_handle_destroy(brickinfo->shandle);
+ brickinfo->shandle = NULL;
+ }
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
+ return ret;
}
-static void
-_storeslaves (dict_t *this, char *key, data_t *value, void *data)
+static int
+_storeopts(dict_t *dict_value, char *key, data_t *value, void *data)
{
- int32_t ret = 0;
- glusterd_store_handle_t *shandle = NULL;
+ int32_t ret = 0;
+ int32_t exists = 0;
+ int32_t option_len = 0;
+ gf_store_handle_t *shandle = NULL;
+ glusterd_volinfo_data_store_t *dict_data = NULL;
+ xlator_t *this = NULL;
- shandle = (glusterd_store_handle_t*)data;
+ this = THIS;
+ GF_ASSERT(this);
- GF_ASSERT (shandle);
- GF_ASSERT (shandle->fd > 0);
- GF_ASSERT (shandle->path);
- GF_ASSERT (key);
- GF_ASSERT (value && value->data);
+ dict_data = (glusterd_volinfo_data_store_t *)data;
+ shandle = dict_data->shandle;
- if ((!shandle) || (shandle->fd <= 0) || (!shandle->path))
- return;
+ GF_ASSERT(shandle);
+ GF_ASSERT(shandle->fd > 0);
+ GF_ASSERT(key);
+ GF_ASSERT(value);
+ GF_ASSERT(value->data);
- if (!key)
- return;
- if (!value || !value->data)
- return;
+ if (dict_data->key_check == 1) {
+ if (is_key_glusterd_hooks_friendly(key)) {
+ exists = 1;
- gf_log ("", GF_LOG_DEBUG, "Storing in volinfo:key= %s, val=%s",
- key, value->data);
-
- ret = glusterd_store_save_value (shandle->fd, key, (char*)value->data);
+ } else {
+ exists = glusterd_check_option_exists(key, NULL);
+ }
+ }
+ if (exists == 1 || dict_data->key_check == 0) {
+ gf_msg_debug(this->name, 0,
+ "Storing in buffer for volinfo:key= %s, "
+ "val=%s",
+ key, value->data);
+ } else {
+ gf_msg_debug(this->name, 0, "Discarding:key= %s, val=%s", key,
+ value->data);
+ return 0;
+ }
+
+ /*
+ * The option_len considers the length of the key value
+ * pair and along with that '=' and '\n', but as value->len
+ * already considers a NULL at the end of the data, adding
+ * just 1.
+ */
+ option_len = strlen(key) + value->len + 1;
+
+ if ((VOLINFO_BUFFER_SIZE - dict_data->buffer_len - 1) < option_len) {
+ ret = gf_store_save_items(shandle->fd, dict_data->buffer);
if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to write into store"
- " handle for path: %s", shandle->path);
- return;
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL);
+ return -1;
}
+ dict_data->buffer_len = 0;
+ dict_data->buffer[0] = '\0';
+ }
+ ret = snprintf(dict_data->buffer + dict_data->buffer_len, option_len + 1,
+ "%s=%s\n", key, value->data);
+ if (ret < 0 || ret > option_len + 1) {
+ gf_smsg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_COPY_FAIL, NULL);
+ return -1;
+ }
+
+ dict_data->buffer_len += ret;
+
+ return 0;
}
-
-void _storeopts (dict_t *this, char *key, data_t *value, void *data)
+/* Store the volumes snapshot details only if required
+ *
+ * The snapshot details will be stored only if the cluster op-version is
+ * greater than or equal to 4
+ */
+static int
+glusterd_volume_write_snap_details(int fd, glusterd_volinfo_t *volinfo)
{
- int32_t ret = 0;
- int32_t exists = 0;
- glusterd_store_handle_t *shandle = NULL;
-
- shandle = (glusterd_store_handle_t*)data;
-
- GF_ASSERT (shandle);
- GF_ASSERT (shandle->fd > 0);
- GF_ASSERT (shandle->path);
- GF_ASSERT (key);
- GF_ASSERT (value && value->data);
-
- if ((!shandle) || (shandle->fd <= 0) || (!shandle->path))
- return;
-
- if (!key)
- return;
- if (!value || !value->data)
- return;
-
- exists = glusterd_check_option_exists (key, NULL);
- if (1 == exists) {
- gf_log ("", GF_LOG_DEBUG, "Storing in volinfo:key= %s, val=%s",
- key, value->data);
- } else {
- gf_log ("", GF_LOG_DEBUG, "Discarding:key= %s, val=%s",
- key, value->data);
- return;
- }
-
- ret = glusterd_store_save_value (shandle->fd, key, (char*)value->data);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to write into store"
- " handle for path: %s", shandle->path);
- return;
- }
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ char buf[PATH_MAX] = {
+ 0,
+ };
+
+ this = THIS;
+ GF_ASSERT(this != NULL);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out);
+
+ GF_VALIDATE_OR_GOTO(this->name, (fd > 0), out);
+ GF_VALIDATE_OR_GOTO(this->name, (volinfo != NULL), out);
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = snprintf(buf, sizeof(buf), "%s=%s\n%s=%s\n%s=%" PRIu64 "\n",
+ GLUSTERD_STORE_KEY_PARENT_VOLNAME, volinfo->parent_volname,
+ GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP,
+ uuid_utoa(volinfo->restored_from_snap),
+ GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ volinfo->snap_max_hard_limit);
+ if (ret < 0 || ret >= sizeof(buf)) {
+ ret = -1;
+ goto err;
+ }
+
+ ret = gf_store_save_items(fd, buf);
+ if (ret) {
+ goto err;
+ }
+ ret = glusterd_store_snapd_info(volinfo);
+err:
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPINFO_WRITE_FAIL,
+ "Failed to write snap details"
+ " for volume %s",
+ volinfo->volname);
+out:
+ return ret;
}
-int32_t
-glusterd_volume_exclude_options_write (int fd, glusterd_volinfo_t *volinfo)
+static int32_t
+glusterd_volume_exclude_options_write(int fd, glusterd_volinfo_t *volinfo)
{
- char *str = NULL;
+ char *str = NULL;
+ char buf[PATH_MAX];
+ uint total_len = 0;
+ int32_t ret = -1;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT(this);
+ GF_ASSERT(fd > 0);
+ GF_ASSERT(volinfo);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, (conf != NULL), out);
+
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len,
+ "%s=%d\n%s=%d\n%s=%d\n%s=%d\n%s=%d\n%s=%d\n",
+ GLUSTERD_STORE_KEY_VOL_TYPE, volinfo->type,
+ GLUSTERD_STORE_KEY_VOL_COUNT, volinfo->brick_count,
+ GLUSTERD_STORE_KEY_VOL_STATUS, volinfo->status,
+ GLUSTERD_STORE_KEY_VOL_SUB_COUNT, volinfo->sub_count,
+ GLUSTERD_STORE_KEY_VOL_STRIPE_CNT, volinfo->stripe_count,
+ GLUSTERD_STORE_KEY_VOL_REPLICA_CNT, volinfo->replica_count);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ total_len += ret;
+
+ if ((conf->op_version >= GD_OP_VERSION_3_7_6) && volinfo->arbiter_count) {
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%d\n",
+ GLUSTERD_STORE_KEY_VOL_ARBITER_CNT,
+ volinfo->arbiter_count);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ total_len += ret;
+ }
+
+ if (conf->op_version >= GD_OP_VERSION_3_6_0) {
+ ret = snprintf(
+ buf + total_len, sizeof(buf) - total_len, "%s=%d\n%s=%d\n",
+ GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT, volinfo->disperse_count,
+ GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT, volinfo->redundancy_count);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ total_len += ret;
+ }
+
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len,
+ "%s=%d\n%s=%d\n%s=%s\n", GLUSTERD_STORE_KEY_VOL_VERSION,
+ volinfo->version, GLUSTERD_STORE_KEY_VOL_TRANSPORT,
+ volinfo->transport_type, GLUSTERD_STORE_KEY_VOL_ID,
+ uuid_utoa(volinfo->volume_id));
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ total_len += ret;
+
+ str = glusterd_auth_get_username(volinfo);
+ if (str) {
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%s\n",
+ GLUSTERD_STORE_KEY_USERNAME, str);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ total_len += ret;
+ }
+
+ str = glusterd_auth_get_password(volinfo);
+ if (str) {
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%s\n",
+ GLUSTERD_STORE_KEY_PASSWORD, str);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ total_len += ret;
+ }
+
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%d\n%s=%d\n",
+ GLUSTERD_STORE_KEY_VOL_OP_VERSION, volinfo->op_version,
+ GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION,
+ volinfo->client_op_version);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ total_len += ret;
+
+ if (conf->op_version >= GD_OP_VERSION_3_7_6) {
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%d\n",
+ GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION,
+ volinfo->quota_xattr_version);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ total_len += ret;
+ }
+ if (conf->op_version >= GD_OP_VERSION_3_10_0) {
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=0\n",
+ GF_TIER_ENABLED);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ total_len += ret;
+ }
+
+ if ((conf->op_version >= GD_OP_VERSION_7_0) &&
+ volinfo->thin_arbiter_count) {
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%d\n",
+ GLUSTERD_STORE_KEY_VOL_THIN_ARBITER_CNT,
+ volinfo->thin_arbiter_count);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ total_len += ret;
+ }
- GF_ASSERT (fd > 0);
- GF_ASSERT (volinfo);
+ ret = gf_store_save_items(fd, buf);
+ if (ret)
+ goto out;
- char buf[PATH_MAX] = {0,};
- int32_t ret = -1;
+ ret = glusterd_volume_write_snap_details(fd, volinfo);
- snprintf (buf, sizeof (buf), "%d", volinfo->type);
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_TYPE, buf);
- if (ret)
- goto out;
+out:
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_VALS_WRITE_FAIL,
+ "Unable to write volume "
+ "values for %s",
+ volinfo->volname);
+ return ret;
+}
- snprintf (buf, sizeof (buf), "%d", volinfo->brick_count);
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_COUNT, buf);
- if (ret)
- goto out;
+static void
+glusterd_store_voldirpath_set(glusterd_volinfo_t *volinfo, char *voldirpath)
+{
+ glusterd_conf_t *priv = NULL;
- snprintf (buf, sizeof (buf), "%d", volinfo->status);
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_STATUS, buf);
- if (ret)
- goto out;
+ GF_ASSERT(volinfo);
+ priv = THIS->private;
+ GF_ASSERT(priv);
- snprintf (buf, sizeof (buf), "%d", volinfo->sub_count);
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_SUB_COUNT,
- buf);
- if (ret)
- goto out;
+ GLUSTERD_GET_VOLUME_DIR(voldirpath, volinfo, priv);
+}
- snprintf (buf, sizeof (buf), "%d", volinfo->stripe_count);
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_STRIPE_CNT,
- buf);
- if (ret)
- goto out;
+static void
+glusterd_store_piddirpath_set(glusterd_volinfo_t *volinfo, char *piddirpath)
+{
+ glusterd_conf_t *priv = NULL;
- snprintf (buf, sizeof (buf), "%d", volinfo->replica_count);
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT,
- buf);
- if (ret)
- goto out;
+ GF_ASSERT(volinfo);
+ priv = THIS->private;
+ GF_ASSERT(priv);
- snprintf (buf, sizeof (buf), "%d", volinfo->version);
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_VERSION,
- buf);
- if (ret)
- goto out;
+ GLUSTERD_GET_VOLUME_PID_DIR(piddirpath, volinfo, priv);
+}
- snprintf (buf, sizeof (buf), "%d", volinfo->transport_type);
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_TRANSPORT,
- buf);
- if (ret)
- goto out;
+static int32_t
+glusterd_store_create_volume_dirs(glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = -1;
+ char dirpath[PATH_MAX] = {
+ 0,
+ };
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_ID,
- uuid_utoa (volinfo->volume_id));
- if (ret)
- goto out;
+ GF_ASSERT(volinfo);
- str = glusterd_auth_get_username (volinfo);
- if (str) {
- ret = glusterd_store_save_value (fd,
- GLUSTERD_STORE_KEY_USERNAME,
- str);
- if (ret)
- goto out;
- }
+ glusterd_store_voldirpath_set(volinfo, dirpath);
+ ret = gf_store_mkdir(dirpath);
+ if (ret)
+ goto out;
- str = glusterd_auth_get_password (volinfo);
- if (str) {
- ret = glusterd_store_save_value (fd,
- GLUSTERD_STORE_KEY_PASSWORD,
- str);
- if (ret)
- goto out;
- }
+ glusterd_store_piddirpath_set(volinfo, dirpath);
+ ret = gf_store_mkdir(dirpath);
+ if (ret)
+ goto out;
out:
- if (ret)
- gf_log ("", GF_LOG_ERROR, "Unable to write volume values"
- " for %s", volinfo->volname);
- return ret;
+ gf_msg_debug(THIS->name, 0, "Returning with %d", ret);
+ return ret;
}
-static void
-glusterd_store_voldirpath_set (glusterd_volinfo_t *volinfo, char *voldirpath,
- size_t len)
+int32_t
+glusterd_store_create_snap_dir(glusterd_snap_t *snap)
{
- glusterd_conf_t *priv = NULL;
-
- GF_ASSERT (volinfo);
- priv = THIS->private;
- GF_ASSERT (priv);
-
- snprintf (voldirpath, len, "%s/%s/%s", priv->workdir,
- GLUSTERD_VOLUME_DIR_PREFIX, volinfo->volname);
+ int32_t ret = -1;
+ char snapdirpath[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+
+ priv = THIS->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(snap);
+
+ GLUSTERD_GET_SNAP_DIR(snapdirpath, snap, priv);
+
+ ret = mkdir_p(snapdirpath, 0755, _gf_true);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
+ "Failed to create snaps dir "
+ "%s",
+ snapdirpath);
+ }
+ return ret;
}
static int32_t
-glusterd_store_create_volume_dir (glusterd_volinfo_t *volinfo)
+glusterd_store_volinfo_write(int fd, glusterd_volinfo_t *volinfo)
{
- int32_t ret = -1;
- char voldirpath[PATH_MAX] = {0,};
-
- GF_ASSERT (volinfo);
+ int32_t ret = -1;
+ gf_store_handle_t *shandle = NULL;
+ GF_ASSERT(fd > 0);
+ GF_ASSERT(volinfo);
+ GF_ASSERT(volinfo->shandle);
+ xlator_t *this = NULL;
+ glusterd_volinfo_data_store_t *dict_data = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ shandle = volinfo->shandle;
+
+ dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t),
+ gf_gld_mt_volinfo_dict_data_t);
+ if (dict_data == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL);
+ return -1;
+ }
+
+ ret = glusterd_volume_exclude_options_write(fd, volinfo);
+ if (ret) {
+ goto out;
+ }
+
+ dict_data->shandle = shandle;
+ dict_data->key_check = 1;
+
+ shandle->fd = fd;
+ dict_foreach(volinfo->dict, _storeopts, (void *)dict_data);
+
+ dict_data->key_check = 0;
+ dict_foreach(volinfo->gsync_slaves, _storeopts, (void *)dict_data);
+
+ if (dict_data->buffer_len > 0) {
+ ret = gf_store_save_items(fd, dict_data->buffer);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL);
+ goto out;
+ }
+ }
- glusterd_store_voldirpath_set (volinfo, voldirpath,
- sizeof (voldirpath));
- ret = glusterd_store_mkdir (voldirpath);
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
- return ret;
+ shandle->fd = 0;
+out:
+ GF_FREE(dict_data);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
-int32_t
-glusterd_store_volinfo_write (int fd, glusterd_volinfo_t *volinfo)
+static int32_t
+glusterd_store_snapinfo_write(glusterd_snap_t *snap)
{
- int32_t ret = -1;
- glusterd_store_handle_t *shandle = NULL;
- GF_ASSERT (fd > 0);
- GF_ASSERT (volinfo);
- GF_ASSERT (volinfo->shandle);
-
- shandle = volinfo->shandle;
- ret = glusterd_volume_exclude_options_write (fd, volinfo);
- if (ret)
- goto out;
+ int32_t ret = -1;
+ int fd = 0;
+ char buf[PATH_MAX];
+ uint total_len = 0;
+
+ GF_ASSERT(snap);
+
+ fd = gf_store_mkstemp(snap->shandle);
+ if (fd <= 0)
+ goto out;
+
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len,
+ "%s=%s\n%s=%d\n%s=%d\n", GLUSTERD_STORE_KEY_SNAP_ID,
+ uuid_utoa(snap->snap_id), GLUSTERD_STORE_KEY_SNAP_STATUS,
+ snap->snap_status, GLUSTERD_STORE_KEY_SNAP_RESTORED,
+ snap->snap_restored);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ total_len += ret;
+
+ if (snap->description) {
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%s\n",
+ GLUSTERD_STORE_KEY_SNAP_DESC, snap->description);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ total_len += ret;
+ }
- shandle->fd = fd;
- dict_foreach (volinfo->dict, _storeopts, shandle);
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%ld\n",
+ GLUSTERD_STORE_KEY_SNAP_TIMESTAMP, snap->time_stamp);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ ret = gf_store_save_items(fd, buf);
- dict_foreach (volinfo->gsync_slaves, _storeslaves, shandle);
- shandle->fd = 0;
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
}
static void
-glusterd_store_rbstatepath_set (glusterd_volinfo_t *volinfo, char *rbstatepath,
- size_t len)
+glusterd_store_volfpath_set(glusterd_volinfo_t *volinfo, char *volfpath,
+ size_t len)
{
- char voldirpath[PATH_MAX] = {0,};
- GF_ASSERT (volinfo);
- GF_ASSERT (rbstatepath);
- GF_ASSERT (len <= PATH_MAX);
-
- glusterd_store_voldirpath_set (volinfo, voldirpath,
- sizeof (voldirpath));
- snprintf (rbstatepath, len, "%s/%s", voldirpath,
- GLUSTERD_VOLUME_RBSTATE_FILE);
+ char voldirpath[PATH_MAX] = {
+ 0,
+ };
+ GF_ASSERT(volinfo);
+ GF_ASSERT(volfpath);
+ GF_ASSERT(len <= PATH_MAX);
+
+ glusterd_store_voldirpath_set(volinfo, voldirpath);
+ snprintf(volfpath, len, "%s/%s", voldirpath, GLUSTERD_VOLUME_INFO_FILE);
}
static void
-glusterd_store_volfpath_set (glusterd_volinfo_t *volinfo, char *volfpath,
- size_t len)
+glusterd_store_node_state_path_set(glusterd_volinfo_t *volinfo,
+ char *node_statepath, size_t len)
{
- char voldirpath[PATH_MAX] = {0,};
- GF_ASSERT (volinfo);
- GF_ASSERT (volfpath);
- GF_ASSERT (len <= PATH_MAX);
-
- glusterd_store_voldirpath_set (volinfo, voldirpath,
- sizeof (voldirpath));
- snprintf (volfpath, len, "%s/%s", voldirpath, GLUSTERD_VOLUME_INFO_FILE);
+ char voldirpath[PATH_MAX] = {
+ 0,
+ };
+ GF_ASSERT(volinfo);
+ GF_ASSERT(node_statepath);
+ GF_ASSERT(len <= PATH_MAX);
+
+ glusterd_store_voldirpath_set(volinfo, voldirpath);
+ snprintf(node_statepath, len, "%s/%s", voldirpath,
+ GLUSTERD_NODE_STATE_FILE);
}
static void
-glusterd_store_node_state_path_set (glusterd_volinfo_t *volinfo,
- char *node_statepath, size_t len)
+glusterd_store_quota_conf_path_set(glusterd_volinfo_t *volinfo,
+ char *quota_conf_path, size_t len)
{
- char voldirpath[PATH_MAX] = {0,};
- GF_ASSERT (volinfo);
- GF_ASSERT (node_statepath);
- GF_ASSERT (len <= PATH_MAX);
-
- glusterd_store_voldirpath_set (volinfo, voldirpath,
- sizeof (voldirpath));
- snprintf (node_statepath, len, "%s/%s", voldirpath,
- GLUSTERD_NODE_STATE_FILE);
+ char voldirpath[PATH_MAX] = {
+ 0,
+ };
+ GF_ASSERT(volinfo);
+ GF_ASSERT(quota_conf_path);
+ GF_ASSERT(len <= PATH_MAX);
+
+ glusterd_store_voldirpath_set(volinfo, voldirpath);
+ snprintf(quota_conf_path, len, "%s/%s", voldirpath,
+ GLUSTERD_VOLUME_QUOTA_CONFIG);
}
-int32_t
-glusterd_store_create_rbstate_shandle_on_absence (glusterd_volinfo_t *volinfo)
+static void
+glusterd_store_missed_snaps_list_path_set(char *missed_snaps_list, size_t len)
{
- char rbstatepath[PATH_MAX] = {0};
- int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
- GF_ASSERT (volinfo);
+ priv = THIS->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(missed_snaps_list);
+ GF_ASSERT(len <= PATH_MAX);
+
+ snprintf(missed_snaps_list, len,
+ "%s/snaps/" GLUSTERD_MISSED_SNAPS_LIST_FILE, priv->workdir);
+}
- glusterd_store_rbstatepath_set (volinfo, rbstatepath, sizeof (rbstatepath));
- ret = glusterd_store_handle_create_on_absence (&volinfo->rb_shandle,
- rbstatepath);
- return ret;
+static void
+glusterd_store_snapfpath_set(glusterd_snap_t *snap, char *snap_fpath,
+ size_t len)
+{
+ glusterd_conf_t *priv = NULL;
+ priv = THIS->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(snap);
+ GF_ASSERT(snap_fpath);
+ GF_ASSERT(len <= PATH_MAX);
+
+ snprintf(snap_fpath, len, "%s/snaps/%s/%s", priv->workdir, snap->snapname,
+ GLUSTERD_SNAP_INFO_FILE);
}
int32_t
-glusterd_store_create_vol_shandle_on_absence (glusterd_volinfo_t *volinfo)
+glusterd_store_create_vol_shandle_on_absence(glusterd_volinfo_t *volinfo)
{
- char volfpath[PATH_MAX] = {0};
- int32_t ret = 0;
+ char volfpath[PATH_MAX] = {0};
+ int32_t ret = 0;
- GF_ASSERT (volinfo);
+ GF_ASSERT(volinfo);
- glusterd_store_volfpath_set (volinfo, volfpath, sizeof (volfpath));
- ret = glusterd_store_handle_create_on_absence (&volinfo->shandle,
- volfpath);
- return ret;
+ glusterd_store_volfpath_set(volinfo, volfpath, sizeof(volfpath));
+ ret = gf_store_handle_create_on_absence(&volinfo->shandle, volfpath);
+ return ret;
}
int32_t
-glusterd_store_create_nodestate_sh_on_absence (glusterd_volinfo_t *volinfo)
+glusterd_store_create_nodestate_sh_on_absence(glusterd_volinfo_t *volinfo)
{
- char node_state_path[PATH_MAX] = {0};
- int32_t ret = 0;
+ char node_state_path[PATH_MAX] = {0};
+ int32_t ret = 0;
- GF_ASSERT (volinfo);
+ GF_ASSERT(volinfo);
- glusterd_store_node_state_path_set (volinfo, node_state_path,
- sizeof (node_state_path));
- ret =
- glusterd_store_handle_create_on_absence (&volinfo->node_state_shandle,
- node_state_path);
+ glusterd_store_node_state_path_set(volinfo, node_state_path,
+ sizeof(node_state_path));
+ ret = gf_store_handle_create_on_absence(&volinfo->node_state_shandle,
+ node_state_path);
- return ret;
+ return ret;
}
int32_t
-glusterd_store_brickinfos (glusterd_volinfo_t *volinfo, int vol_fd)
+glusterd_store_create_quota_conf_sh_on_absence(glusterd_volinfo_t *volinfo)
{
- int32_t ret = 0;
- glusterd_brickinfo_t *brickinfo = NULL;
- int32_t brick_count = 0;
+ char quota_conf_path[PATH_MAX] = {0};
+ int32_t ret = 0;
- GF_ASSERT (volinfo);
+ GF_ASSERT(volinfo);
- list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
- ret = glusterd_store_brickinfo (volinfo, brickinfo,
- brick_count, vol_fd);
- if (ret)
- goto out;
- brick_count++;
- }
-out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ glusterd_store_quota_conf_path_set(volinfo, quota_conf_path,
+ sizeof(quota_conf_path));
+ ret = gf_store_handle_create_on_absence(&volinfo->quota_conf_shandle,
+ quota_conf_path);
+
+ return ret;
}
-int32_t
-glusterd_store_rbstate_write (int fd, glusterd_volinfo_t *volinfo)
+static int32_t
+glusterd_store_create_missed_snaps_list_shandle_on_absence()
{
- int ret = -1;
- char buf[PATH_MAX] = {0, };
+ char missed_snaps_list[PATH_MAX] = "";
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
- GF_ASSERT (fd > 0);
- GF_ASSERT (volinfo);
+ this = THIS;
+ GF_ASSERT(this);
- snprintf (buf, sizeof (buf), "%d", volinfo->rb_status);
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_RB_STATUS,
- buf);
- if (ret)
- goto out;
+ priv = this->private;
+ GF_ASSERT(priv);
- if (volinfo->rb_status > GF_RB_STATUS_NONE) {
-
- snprintf (buf, sizeof (buf), "%s:%s",
- volinfo->src_brick->hostname,
- volinfo->src_brick->path);
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_RB_SRC_BRICK,
- buf);
- if (ret)
- goto out;
-
- snprintf (buf, sizeof (buf), "%s:%s",
- volinfo->dst_brick->hostname,
- volinfo->dst_brick->path);
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_RB_DST_BRICK,
- buf);
- if (ret)
- goto out;
- }
+ glusterd_store_missed_snaps_list_path_set(missed_snaps_list,
+ sizeof(missed_snaps_list));
-out:
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ ret = gf_store_handle_create_on_absence(&priv->missed_snaps_list_shandle,
+ missed_snaps_list);
+ return ret;
}
int32_t
-glusterd_store_perform_rbstate_store (glusterd_volinfo_t *volinfo)
+glusterd_store_create_snap_shandle_on_absence(glusterd_snap_t *snap)
{
- int fd = -1;
- int32_t ret = -1;
- GF_ASSERT (volinfo);
+ char snapfpath[PATH_MAX] = {0};
+ int32_t ret = 0;
- fd = glusterd_store_mkstemp (volinfo->rb_shandle);
- if (fd <= 0) {
- ret = -1;
- goto out;
- }
+ GF_ASSERT(snap);
- ret = glusterd_store_rbstate_write (fd, volinfo);
- if (ret)
- goto out;
+ glusterd_store_snapfpath_set(snap, snapfpath, sizeof(snapfpath));
+ ret = gf_store_handle_create_on_absence(&snap->shandle, snapfpath);
+ return ret;
+}
- ret = glusterd_store_rename_tmppath (volinfo->rb_shandle);
+static int32_t
+glusterd_store_brickinfos(glusterd_volinfo_t *volinfo, int vol_fd)
+{
+ int32_t ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *ta_brickinfo = NULL;
+ int32_t brick_count = 0;
+ int32_t ta_brick_count = 0;
+
+ GF_ASSERT(volinfo);
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ ret = glusterd_store_brickinfo(volinfo, brickinfo, brick_count, vol_fd,
+ 0);
+ if (ret)
+ goto out;
+ brick_count++;
+ }
+ if (volinfo->thin_arbiter_count == 1) {
+ ta_brickinfo = list_first_entry(&volinfo->ta_bricks,
+ glusterd_brickinfo_t, brick_list);
+ ret = glusterd_store_brickinfo(volinfo, ta_brickinfo, ta_brick_count,
+ vol_fd, 1);
+ if (ret)
+ goto out;
+ }
out:
- if (ret && (fd > 0))
- glusterd_store_unlink_tmppath (volinfo->rb_shandle);
- if (fd > 0)
- close (fd);
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
}
int32_t
-glusterd_store_node_state_write (int fd, glusterd_volinfo_t *volinfo)
+glusterd_store_node_state_write(int fd, glusterd_volinfo_t *volinfo)
{
- int ret = -1;
- char buf[PATH_MAX] = {0, };
+ int ret = -1;
+ char buf[PATH_MAX];
+ char uuid[UUID_SIZE + 1];
+ uint total_len = 0;
+ glusterd_volinfo_data_store_t *dict_data = NULL;
+ gf_store_handle_t shandle;
+ xlator_t *this = NULL;
- GF_ASSERT (fd > 0);
- GF_ASSERT (volinfo);
+ this = THIS;
+ GF_ASSERT(this);
- if (volinfo->defrag_cmd == GF_DEFRAG_CMD_STATUS) {
- ret = 0;
+ GF_ASSERT(fd > 0);
+ GF_ASSERT(volinfo);
+
+ if (volinfo->rebal.defrag_cmd == GF_DEFRAG_CMD_STATUS) {
+ ret = 0;
+ goto out;
+ }
+
+ gf_uuid_unparse(volinfo->rebal.rebalance_id, uuid);
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len,
+ "%s=%d\n%s=%d\n%s=%d\n%s=%s\n",
+ GLUSTERD_STORE_KEY_VOL_DEFRAG, volinfo->rebal.defrag_cmd,
+ GLUSTERD_STORE_KEY_VOL_DEFRAG_STATUS,
+ volinfo->rebal.defrag_status, GLUSTERD_STORE_KEY_DEFRAG_OP,
+ volinfo->rebal.op, GF_REBALANCE_TID_KEY, uuid);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ total_len += ret;
+
+ ret = snprintf(
+ buf + total_len, sizeof(buf) - total_len,
+ "%s=%" PRIu64 "\n%s=%" PRIu64 "\n%s=%" PRIu64 "\n%s=%" PRIu64
+ "\n%s=%" PRIu64 "\n%s=%lf\n",
+ GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES, volinfo->rebal.rebalance_files,
+ GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE, volinfo->rebal.rebalance_data,
+ GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED, volinfo->rebal.lookedup_files,
+ GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES,
+ volinfo->rebal.rebalance_failures,
+ GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED, volinfo->rebal.skipped_files,
+ GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME, volinfo->rebal.rebalance_time);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_store_save_items(fd, buf);
+ if (ret) {
+ goto out;
+ }
+
+ if (volinfo->rebal.dict) {
+ dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t),
+ gf_gld_mt_volinfo_dict_data_t);
+ if (dict_data == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL);
+ return -1;
+ }
+ dict_data->shandle = &shandle;
+ shandle.fd = fd;
+ dict_foreach(volinfo->rebal.dict, _storeopts, (void *)dict_data);
+ if (dict_data->buffer_len > 0) {
+ ret = gf_store_save_items(fd, dict_data->buffer);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED,
+ NULL);
goto out;
+ ;
+ }
}
-
- snprintf (buf, sizeof (buf), "%d", volinfo->defrag_cmd);
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_VOL_DEFRAG,
- buf);
+ }
out:
- gf_log (THIS->name, GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ GF_FREE(dict_data);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
int32_t
-glusterd_store_perform_node_state_store (glusterd_volinfo_t *volinfo)
+glusterd_store_perform_node_state_store(glusterd_volinfo_t *volinfo)
{
- int fd = -1;
- int32_t ret = -1;
- GF_ASSERT (volinfo);
+ int fd = -1;
+ int32_t ret = -1;
+ GF_ASSERT(volinfo);
- fd = glusterd_store_mkstemp (volinfo->node_state_shandle);
- if (fd <= 0) {
- ret = -1;
- goto out;
- }
+ fd = gf_store_mkstemp(volinfo->node_state_shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
- ret = glusterd_store_node_state_write (fd, volinfo);
- if (ret)
- goto out;
+ ret = glusterd_store_node_state_write(fd, volinfo);
+ if (ret)
+ goto out;
- ret = glusterd_store_rename_tmppath (volinfo->node_state_shandle);
+ ret = gf_store_rename_tmppath(volinfo->node_state_shandle);
+ if (ret)
+ goto out;
out:
- if (ret && (fd > 0))
- glusterd_store_unlink_tmppath (volinfo->node_state_shandle);
- if (fd > 0)
- close (fd);
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (ret && (fd > 0))
+ gf_store_unlink_tmppath(volinfo->node_state_shandle);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
}
-int32_t
-glusterd_store_perform_volume_store (glusterd_volinfo_t *volinfo)
+static int32_t
+glusterd_store_perform_volume_store(glusterd_volinfo_t *volinfo)
{
- int fd = -1;
- int32_t ret = -1;
- GF_ASSERT (volinfo);
+ int fd = -1;
+ int32_t ret = -1;
+ GF_ASSERT(volinfo);
- fd = glusterd_store_mkstemp (volinfo->shandle);
- if (fd <= 0) {
- ret = -1;
- goto out;
- }
+ fd = gf_store_mkstemp(volinfo->shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
- ret = glusterd_store_volinfo_write (fd, volinfo);
- if (ret)
- goto out;
+ ret = glusterd_store_volinfo_write(fd, volinfo);
+ if (ret)
+ goto out;
- ret = glusterd_store_brickinfos (volinfo, fd);
- if (ret)
- goto out;
+ ret = glusterd_store_create_brick_dir(volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_brickinfos(volinfo, fd);
+ if (ret)
+ goto out;
- ret = glusterd_store_rename_tmppath (volinfo->shandle);
out:
- if (ret && (fd > 0))
- glusterd_store_unlink_tmppath (volinfo->shandle);
- if (fd > 0)
- close (fd);
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (ret && (fd > 0))
+ gf_store_unlink_tmppath(volinfo->shandle);
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+ return ret;
}
void
-glusterd_perform_volinfo_version_action (glusterd_volinfo_t *volinfo,
- glusterd_volinfo_ver_ac_t ac)
+glusterd_perform_volinfo_version_action(glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_ver_ac_t ac)
{
- GF_ASSERT (volinfo);
+ GF_ASSERT(volinfo);
- switch (ac) {
+ switch (ac) {
case GLUSTERD_VOLINFO_VER_AC_NONE:
- break;
+ break;
case GLUSTERD_VOLINFO_VER_AC_INCREMENT:
- volinfo->version++;
- break;
- }
+ volinfo->version++;
+ break;
+ case GLUSTERD_VOLINFO_VER_AC_DECREMENT:
+ volinfo->version--;
+ break;
+ }
}
-int32_t
-glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac)
+void
+glusterd_store_bricks_cleanup_tmp(glusterd_volinfo_t *volinfo)
{
- int32_t ret = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
- GF_ASSERT (volinfo);
+ GF_ASSERT(volinfo);
- glusterd_perform_volinfo_version_action (volinfo, ac);
- ret = glusterd_store_create_volume_dir (volinfo);
- if (ret)
- goto out;
-
- ret = glusterd_store_create_vol_shandle_on_absence (volinfo);
- if (ret)
- goto out;
-
- ret = glusterd_store_create_rbstate_shandle_on_absence (volinfo);
- if (ret)
- goto out;
-
- ret = glusterd_store_create_nodestate_sh_on_absence (volinfo);
- if (ret)
- goto out;
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ gf_store_unlink_tmppath(brickinfo->shandle);
+ }
+}
- ret = glusterd_store_perform_volume_store (volinfo);
- if (ret)
- goto out;
+void
+glusterd_store_volume_cleanup_tmp(glusterd_volinfo_t *volinfo)
+{
+ GF_ASSERT(volinfo);
- ret = glusterd_store_perform_rbstate_store (volinfo);
- if (ret)
- goto out;
+ glusterd_store_bricks_cleanup_tmp(volinfo);
- ret = glusterd_store_perform_node_state_store (volinfo);
- if (ret)
- goto out;
+ gf_store_unlink_tmppath(volinfo->shandle);
- //checksum should be computed at the end
- ret = glusterd_volume_compute_cksum (volinfo);
- if (ret)
- goto out;
-out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
+ gf_store_unlink_tmppath(volinfo->node_state_shandle);
- return ret;
+ gf_store_unlink_tmppath(volinfo->snapd.handle);
}
-
int32_t
-glusterd_store_delete_volume (glusterd_volinfo_t *volinfo)
+glusterd_store_brickinfos_atomic_update(glusterd_volinfo_t *volinfo)
{
- char pathname[PATH_MAX] = {0,};
- int32_t ret = 0;
- glusterd_conf_t *priv = NULL;
- DIR *dir = NULL;
- struct dirent *entry = NULL;
- char path[PATH_MAX] = {0,};
- struct stat st = {0, };
+ int ret = -1;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *ta_brickinfo = NULL;
- GF_ASSERT (volinfo);
- priv = THIS->private;
-
- GF_ASSERT (priv);
- snprintf (pathname, sizeof (pathname), "%s/vols/%s", priv->workdir,
- volinfo->volname);
-
- dir = opendir (pathname);
- if (!dir)
- goto out;
- ret = glusterd_store_remove_bricks (volinfo);
+ GF_ASSERT(volinfo);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Remove bricks failed for %s",
- volinfo->volname);
- }
-
- glusterd_for_each_entry (entry, dir);
- while (entry) {
-
- snprintf (path, PATH_MAX, "%s/%s", pathname, entry->d_name);
- ret = stat (path, &st);
- if (ret == -1) {
- gf_log ("", GF_LOG_ERROR, "Failed to stat entry: %s:%s",
- path, strerror (errno));
- goto stat_failed;
- }
-
- if (S_ISDIR (st.st_mode))
- ret = rmdir (path);
- else
- ret = unlink (path);
-
- if (ret)
- gf_log ("", GF_LOG_INFO, "errno:%d (%s)", errno,
- strerror (errno));
-
- gf_log ("", GF_LOG_INFO, "%s %s",
- ret?"Failed to remove":"Removed",
- entry->d_name);
-stat_failed:
- memset (path, 0, sizeof(path));
- glusterd_for_each_entry (entry, dir);
- }
-
- ret = closedir (dir);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Failed to close dir, errno:%d",
- errno);
- }
-
- ret = rmdir (pathname);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Failed to rmdir: %s, err: %s",
- pathname, strerror (errno));
- }
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ ret = gf_store_rename_tmppath(brickinfo->shandle);
+ if (ret)
+ goto out;
+ }
+ if (volinfo->thin_arbiter_count == 1) {
+ ta_brickinfo = list_first_entry(&volinfo->ta_bricks,
+ glusterd_brickinfo_t, brick_list);
+ ret = gf_store_rename_tmppath(ta_brickinfo->shandle);
+ if (ret)
+ goto out;
+ }
out:
- if (volinfo->shandle) {
- glusterd_store_handle_destroy (volinfo->shandle);
- volinfo->shandle = NULL;
- }
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
-
- return ret;
+ return ret;
}
-
-int
-glusterd_store_read_and_tokenize (FILE *file, char *str,
- char **iter_key, char **iter_val,
- glusterd_store_op_errno_t *store_errno)
+int32_t
+glusterd_store_volinfo_atomic_update(glusterd_volinfo_t *volinfo)
{
- int32_t ret = -1;
- char *savetok = NULL;
-
- GF_ASSERT (file);
- GF_ASSERT (str);
- GF_ASSERT (iter_key);
- GF_ASSERT (iter_val);
- GF_ASSERT (store_errno);
-
- ret = fscanf (file, "%s", str);
- if (ret <= 0 || feof (file)) {
- ret = -1;
- *store_errno = GD_STORE_EOF;
- goto out;
- }
+ int ret = -1;
+ GF_ASSERT(volinfo);
- *iter_key = strtok_r (str, "=", &savetok);
- if (*iter_key == NULL) {
- ret = -1;
- *store_errno = GD_STORE_KEY_NULL;
- goto out;
- }
+ ret = gf_store_rename_tmppath(volinfo->shandle);
+ if (ret)
+ goto out;
- *iter_val = strtok_r (NULL, "=", &savetok);
- if (*iter_key == NULL) {
- ret = -1;
- *store_errno = GD_STORE_VALUE_NULL;
- goto out;
- }
-
- *store_errno = GD_STORE_SUCCESS;
- ret = 0;
out:
- return ret;
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Couldn't rename "
+ "temporary file(s)");
+ return ret;
}
int32_t
-glusterd_store_retrieve_value (glusterd_store_handle_t *handle,
- char *key, char **value)
+glusterd_store_volume_atomic_update(glusterd_volinfo_t *volinfo)
{
- int32_t ret = -1;
- char *scan_str = NULL;
- char *iter_key = NULL;
- char *iter_val = NULL;
- char *free_str = NULL;
- struct stat st = {0,};
- glusterd_store_op_errno_t store_errno = GD_STORE_SUCCESS;
-
- GF_ASSERT (handle);
-
- handle->fd = open (handle->path, O_RDWR);
-
- if (handle->fd == -1) {
- gf_log ("", GF_LOG_ERROR, "Unable to open file %s errno: %s",
- handle->path, strerror (errno));
- goto out;
- }
- if (!handle->read)
- handle->read = fdopen (handle->fd, "r");
-
- if (!handle->read) {
- gf_log ("", GF_LOG_ERROR, "Unable to open file %s errno: %s",
- handle->path, strerror (errno));
- goto out;
- }
-
- ret = fstat (handle->fd, &st);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_WARNING,
- "stat on file failed");
- ret = -1;
- store_errno = GD_STORE_STAT_FAILED;
- goto out;
- }
+ int ret = -1;
+ GF_ASSERT(volinfo);
- scan_str = GF_CALLOC (1, st.st_size,
- gf_gld_mt_char);
- if (scan_str == NULL) {
- ret = -1;
- store_errno = GD_STORE_ENOMEM;
- goto out;
- }
+ ret = glusterd_store_brickinfos_atomic_update(volinfo);
+ if (ret)
+ goto out;
- free_str = scan_str;
+ ret = glusterd_store_volinfo_atomic_update(volinfo);
- do {
- ret = glusterd_store_read_and_tokenize (handle->read, scan_str,
- &iter_key, &iter_val,
- &store_errno);
- if (ret < 0) {
- goto out;
- }
-
- gf_log ("", GF_LOG_DEBUG, "key %s read", iter_key);
-
- if (!strcmp (key, iter_key)) {
- gf_log ("", GF_LOG_DEBUG, "key %s found", key);
- ret = 0;
- if (iter_val)
- *value = gf_strdup (iter_val);
- goto out;
- }
- } while (1);
out:
- if (handle->fd > 0) {
- close (handle->fd);
- handle->read = NULL;
- }
-
- if (free_str)
- GF_FREE (free_str);
-
- return ret;
+ return ret;
}
int32_t
-glusterd_store_save_value (int fd, char *key, char *value)
+glusterd_store_snap_atomic_update(glusterd_snap_t *snap)
{
- int32_t ret = -1;
- FILE *fp = NULL;
+ int ret = -1;
+ GF_ASSERT(snap);
- GF_ASSERT (fd > 0);
- GF_ASSERT (key);
- GF_ASSERT (value);
+ ret = gf_store_rename_tmppath(snap->shandle);
+ if (ret)
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Couldn't rename "
+ "temporary file(s)");
- fp = fdopen (fd, "a+");
- if (fp == NULL) {
- gf_log ("", GF_LOG_WARNING, "fdopen failed.");
- ret = -1;
- goto out;
- }
-
- ret = fprintf (fp, "%s=%s\n", key, value);
- if (ret < 0) {
- gf_log ("", GF_LOG_WARNING, "Unable to store key: %s,"
- "value: %s, error: %s", key, value,
- strerror (errno));
- ret = -1;
- goto out;
- }
+ return ret;
+}
- ret = fflush (fp);
- if (feof (fp)) {
- gf_log ("", GF_LOG_WARNING,
- "fflush failed, error: %s",
- strerror (errno));
- ret = -1;
- goto out;
- }
+int32_t
+glusterd_store_snap(glusterd_snap_t *snap)
+{
+ int32_t ret = -1;
+
+ GF_ASSERT(snap);
+
+ ret = glusterd_store_create_snap_dir(snap);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SNAPDIR_CREATE_FAIL,
+ "Failed to create snap dir");
+ goto out;
+ }
+
+ ret = glusterd_store_create_snap_shandle_on_absence(snap);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SNAPINFO_CREATE_FAIL,
+ "Failed to create snap info "
+ "file");
+ goto out;
+ }
+
+ ret = glusterd_store_snapinfo_write(snap);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SNAPINFO_WRITE_FAIL,
+ "Failed to write snap info");
+ goto out;
+ }
+
+ ret = glusterd_store_snap_atomic_update(snap);
+ if (ret) {
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_AUTOMIC_UPDATE_FAIL,
+ "Failed to do automic update");
+ goto out;
+ }
- ret = 0;
out:
+ if (ret && snap->shandle)
+ gf_store_unlink_tmppath(snap->shandle);
- gf_log ("", GF_LOG_DEBUG, "returning: %d", ret);
- return ret;
+ gf_msg_trace(THIS->name, 0, "Returning %d", ret);
+ return ret;
}
int32_t
-glusterd_store_handle_new (char *path, glusterd_store_handle_t **handle)
+glusterd_store_volinfo(glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_ver_ac_t ac)
{
- int32_t ret = -1;
- glusterd_store_handle_t *shandle = NULL;
- int fd = -1;
- char *spath = NULL;
+ int32_t ret = -1;
+ glusterfs_ctx_t *ctx = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ ctx = this->ctx;
+ GF_ASSERT(ctx);
+ GF_ASSERT(volinfo);
+
+ pthread_mutex_lock(&ctx->cleanup_lock);
+ pthread_mutex_lock(&volinfo->store_volinfo_lock);
+ {
+ glusterd_perform_volinfo_version_action(volinfo, ac);
+
+ ret = glusterd_store_create_volume_dirs(volinfo);
+ if (ret)
+ goto unlock;
- shandle = GF_CALLOC (1, sizeof (*shandle), gf_gld_mt_store_handle_t);
- if (!shandle)
- goto out;
+ ret = glusterd_store_create_vol_shandle_on_absence(volinfo);
+ if (ret)
+ goto unlock;
- spath = gf_strdup (path);
+ ret = glusterd_store_create_nodestate_sh_on_absence(volinfo);
+ if (ret)
+ goto unlock;
- if (!spath)
- goto out;
+ ret = glusterd_store_perform_volume_store(volinfo);
+ if (ret)
+ goto unlock;
- fd = open (path, O_RDWR | O_CREAT | O_APPEND, 0600);
- if (fd <= 0) {
- gf_log ("glusterd", GF_LOG_ERROR, "Failed to open file: %s, "
- "error: %s", path, strerror (errno));
- goto out;
+ ret = glusterd_store_volume_atomic_update(volinfo);
+ if (ret) {
+ glusterd_perform_volinfo_version_action(
+ volinfo, GLUSTERD_VOLINFO_VER_AC_DECREMENT);
+ goto unlock;
}
- shandle->path = spath;
- *handle = shandle;
+ ret = glusterd_store_perform_node_state_store(volinfo);
+ if (ret)
+ goto unlock;
- ret = 0;
+ /* checksum should be computed at the end */
+ ret = glusterd_compute_cksum(volinfo, _gf_false);
+ if (ret)
+ goto unlock;
+ }
+unlock:
+ pthread_mutex_unlock(&volinfo->store_volinfo_lock);
+ pthread_mutex_unlock(&ctx->cleanup_lock);
-out:
- if (fd > 0)
- close (fd);
+ if (ret)
+ glusterd_store_volume_cleanup_tmp(volinfo);
- if (ret == -1) {
- if (spath)
- GF_FREE (spath);
- if (shandle) {
- GF_FREE (shandle);
- }
- }
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
}
-int
-glusterd_store_handle_retrieve (char *path, glusterd_store_handle_t **handle)
+int32_t
+glusterd_store_delete_volume(glusterd_volinfo_t *volinfo)
{
- int32_t ret = -1;
- struct stat statbuf = {0};
+ char pathname[PATH_MAX] = {
+ 0,
+ };
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ char delete_path[PATH_MAX] = {
+ 0,
+ };
+ char trashdir[PATH_MAX] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ gf_boolean_t rename_fail = _gf_false;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ GF_ASSERT(volinfo);
+ priv = this->private;
+
+ GF_ASSERT(priv);
+
+ GLUSTERD_GET_VOLUME_DIR(pathname, volinfo, priv);
+
+ len = snprintf(delete_path, sizeof(delete_path),
+ "%s/" GLUSTERD_TRASH "/%s.deleted", priv->workdir,
+ uuid_utoa(volinfo->volume_id));
+ if ((len < 0) || (len >= sizeof(delete_path))) {
+ goto out;
+ }
+
+ len = snprintf(trashdir, sizeof(trashdir), "%s/" GLUSTERD_TRASH,
+ priv->workdir);
+ if ((len < 0) || (len >= sizeof(trashdir))) {
+ goto out;
+ }
+
+ ret = sys_mkdir(trashdir, 0755);
+ if (ret && errno != EEXIST) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
+ "Failed to create trash "
+ "directory");
+ goto out;
+ }
+
+ ret = sys_rename(pathname, delete_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Failed to rename volume "
+ "directory for volume %s",
+ volinfo->volname);
+ rename_fail = _gf_true;
+ goto out;
+ }
+
+ ret = recursive_rmdir(trashdir);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to rmdir: %s", trashdir);
+ }
- ret = stat (path, &statbuf);
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to retrieve store "
- "handle for %s, error: %s", path, strerror (errno));
- goto out;
- }
- ret = glusterd_store_handle_new (path, handle);
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (volinfo->shandle) {
+ gf_store_handle_destroy(volinfo->shandle);
+ volinfo->shandle = NULL;
+ }
+ ret = (rename_fail == _gf_true) ? -1 : 0;
+
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
+/*TODO: cleanup the duplicate code and implement a generic function for
+ * deleting snap/volume depending on the parameter flag */
int32_t
-glusterd_store_handle_destroy (glusterd_store_handle_t *handle)
+glusterd_store_delete_snap(glusterd_snap_t *snap)
{
- int32_t ret = -1;
-
- if (!handle) {
- ret = 0;
- goto out;
+ char pathname[PATH_MAX] = {
+ 0,
+ };
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char path[PATH_MAX] = {
+ 0,
+ };
+ char delete_path[PATH_MAX] = {
+ 0,
+ };
+ char trashdir[PATH_MAX] = {
+ 0,
+ };
+ struct stat st = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ gf_boolean_t rename_fail = _gf_false;
+ int32_t len = 0;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ GF_ASSERT(snap);
+ GLUSTERD_GET_SNAP_DIR(pathname, snap, priv);
+
+ len = snprintf(delete_path, sizeof(delete_path),
+ "%s/" GLUSTERD_TRASH "/snap-%s.deleted", priv->workdir,
+ uuid_utoa(snap->snap_id));
+ if ((len < 0) || (len >= sizeof(delete_path))) {
+ goto out;
+ }
+
+ len = snprintf(trashdir, sizeof(trashdir), "%s/" GLUSTERD_TRASH,
+ priv->workdir);
+ if ((len < 0) || (len >= sizeof(trashdir))) {
+ goto out;
+ }
+
+ ret = sys_mkdir(trashdir, 0755);
+ if (ret && errno != EEXIST) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
+ "Failed to create trash "
+ "directory");
+ goto out;
+ }
+
+ ret = sys_rename(pathname, delete_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Failed to rename snap "
+ "directory %s to %s",
+ pathname, delete_path);
+ rename_fail = _gf_true;
+ goto out;
+ }
+
+ dir = sys_opendir(delete_path);
+ if (!dir) {
+ gf_msg_debug(this->name, 0, "Failed to open directory %s.",
+ delete_path);
+ goto out;
+ }
+
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
+ len = snprintf(path, PATH_MAX, "%s/%s", delete_path, entry->d_name);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ goto stat_failed;
+ }
+ ret = sys_stat(path, &st);
+ if (ret == -1) {
+ gf_msg_debug(this->name, 0,
+ "Failed to stat "
+ "entry %s",
+ path);
+ goto stat_failed;
}
- GF_FREE (handle->path);
+ if (S_ISDIR(st.st_mode))
+ ret = sys_rmdir(path);
+ else
+ ret = sys_unlink(path);
- GF_FREE (handle);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ " Failed to remove "
+ "%s",
+ path);
+ }
- ret = 0;
+ gf_msg_debug(this->name, 0, "%s %s",
+ ret ? "Failed to remove" : "Removed", entry->d_name);
+ stat_failed:
+ memset(path, 0, sizeof(path));
+ }
+
+ ret = sys_closedir(dir);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to close dir %s.", delete_path);
+ }
+
+ ret = sys_rmdir(delete_path);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to rmdir: %s", delete_path);
+ }
+ ret = sys_rmdir(trashdir);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to rmdir: %s", trashdir);
+ }
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
-
- return ret;
+ if (snap->shandle) {
+ gf_store_handle_destroy(snap->shandle);
+ snap->shandle = NULL;
+ }
+ ret = (rename_fail == _gf_true) ? -1 : 0;
+
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
-int32_t
-glusterd_store_uuid ()
+int
+glusterd_store_global_info(xlator_t *this)
{
- glusterd_conf_t *priv = NULL;
- char path[PATH_MAX] = {0,};
- int32_t ret = -1;
- glusterd_store_handle_t *handle = NULL;
- xlator_t *this = NULL;
-
- this = THIS;
- priv = this->private;
-
- snprintf (path, PATH_MAX, "%s/%s", priv->workdir,
- GLUSTERD_INFO_FILE);
-
- if (!priv->handle) {
- ret = glusterd_store_handle_new (path, &handle);
-
- if (ret) {
- gf_log (this->name, GF_LOG_ERROR,
- "Unable to get store handle!");
- goto out;
- }
-
- priv->handle = handle;
- } else {
- handle = priv->handle;
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ char buf[PATH_MAX];
+ uint total_len = 0;
+ gf_store_handle_t *handle = NULL;
+ char *uuid_str = NULL;
+
+ conf = this->private;
+
+ uuid_str = gf_strdup(uuid_utoa(MY_UUID));
+ if (!uuid_str)
+ goto out;
+
+ if (!conf->handle) {
+ ret = snprintf(buf, sizeof(buf), "%s/%s", conf->workdir,
+ GLUSTERD_INFO_FILE);
+ if ((ret < 0) || (ret >= sizeof(buf))) {
+ ret = -1;
+ goto out;
}
-
- /* make glusterd's uuid available for users */
- ret = chmod (handle->path, 0644);
+ ret = gf_store_handle_new(buf, &handle);
if (ret) {
- gf_log (this->name, GF_LOG_ERROR, "chmod error for %s: %s",
- GLUSTERD_INFO_FILE, strerror (errno));
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_GET_FAIL,
+ "Unable to get store handle");
+ goto out;
}
- handle->fd = open (handle->path, O_RDWR | O_CREAT | O_TRUNC, 0644);
- if (handle->fd <= 0) {
- ret = -1;
- goto out;
- }
- ret = glusterd_store_save_value (handle->fd, GLUSTERD_STORE_UUID_KEY,
- uuid_utoa (MY_UUID));
+ conf->handle = handle;
+ } else
+ handle = conf->handle;
+
+ /* These options need to be available for all users */
+ ret = sys_chmod(handle->path, 0644);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "chmod error for %s", GLUSTERD_INFO_FILE);
+ goto out;
+ }
+
+ handle->fd = gf_store_mkstemp(handle);
+ if (handle->fd < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf(buf, sizeof(buf), "%s=%s\n", GLUSTERD_STORE_UUID_KEY,
+ uuid_str);
+ if (ret < 0 || ret >= sizeof(buf)) {
+ ret = -1;
+ goto out;
+ }
+ total_len += ret;
+
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%d\n",
+ GD_OP_VERSION_KEY, conf->op_version);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_store_save_items(handle->fd, buf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_CRITICAL, 0, GD_MSG_OP_VERS_STORE_FAIL,
+ "Storing glusterd global-info failed ret = %d", ret);
+ goto out;
+ }
+
+ ret = gf_store_rename_tmppath(handle);
+out:
+ if (handle) {
+ if (ret && (handle->fd >= 0))
+ gf_store_unlink_tmppath(handle);
+ }
- if (ret) {
- gf_log (this->name, GF_LOG_CRITICAL,
- "Storing uuid failed ret = %d", ret);
- goto out;
- }
+ if (uuid_str)
+ GF_FREE(uuid_str);
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_GLOBAL_INFO_STORE_FAIL,
+ "Failed to store glusterd global-info");
+ return ret;
+}
+
+int
+glusterd_store_max_op_version(xlator_t *this)
+{
+ int ret = -1;
+ glusterd_conf_t *conf = NULL;
+ char op_version_str[15] = {
+ 0,
+ };
+ char path[PATH_MAX] = {
+ 0,
+ };
+ gf_store_handle_t *handle = NULL;
+ int32_t len = 0;
+
+ conf = this->private;
+
+ len = snprintf(path, PATH_MAX, "%s/%s", conf->workdir,
+ GLUSTERD_UPGRADE_FILE);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ goto out;
+ }
+ ret = gf_store_handle_new(path, &handle);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_GET_FAIL,
+ "Unable to get store handle");
+ goto out;
+ }
+
+ /* These options need to be available for all users */
+ ret = sys_chmod(handle->path, 0644);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "chmod error for %s", GLUSTERD_UPGRADE_FILE);
+ goto out;
+ }
+
+ handle->fd = gf_store_mkstemp(handle);
+ if (handle->fd < 0) {
+ ret = -1;
+ goto out;
+ }
+
+ snprintf(op_version_str, sizeof(op_version_str), "%d", GD_OP_VERSION_MAX);
+ ret = gf_store_save_value(handle->fd, GD_MAX_OP_VERSION_KEY,
+ op_version_str);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_OP_VERS_STORE_FAIL,
+ "Storing op-version failed ret = %d", ret);
+ goto out;
+ }
+
+ ret = gf_store_rename_tmppath(handle);
out:
- if (handle->fd > 0) {
- close (handle->fd);
- handle->fd = 0;
- }
- gf_log (this->name, GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ if (handle) {
+ if (ret && (handle->fd >= 0))
+ gf_store_unlink_tmppath(handle);
+ }
+
+ if (ret)
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_GLUSTERD_GLOBAL_INFO_STORE_FAIL,
+ "Failed to store max op-version");
+ if (handle)
+ gf_store_handle_destroy(handle);
+ return ret;
}
-int32_t
-glusterd_retrieve_uuid ()
+int
+glusterd_retrieve_max_op_version(xlator_t *this, int *op_version)
{
- char *uuid_str = NULL;
- int32_t ret = -1;
- glusterd_store_handle_t *handle = NULL;
- glusterd_conf_t *priv = NULL;
- char path[PATH_MAX] = {0,};
+ char *op_version_str = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ int tmp_version = 0;
+ char *tmp = NULL;
+ char path[PATH_MAX] = {
+ 0,
+ };
+ gf_store_handle_t *handle = NULL;
+ int32_t len = 0;
+
+ priv = this->private;
+
+ len = snprintf(path, PATH_MAX, "%s/%s", priv->workdir,
+ GLUSTERD_UPGRADE_FILE);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ goto out;
+ }
+ ret = gf_store_handle_retrieve(path, &handle);
+
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Unable to get store "
+ "handle!");
+ goto out;
+ }
+
+ ret = gf_store_retrieve_value(handle, GD_MAX_OP_VERSION_KEY,
+ &op_version_str);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "No previous op_version present");
+ goto out;
+ }
+
+ tmp_version = strtol(op_version_str, &tmp, 10);
+ if ((tmp_version <= 0) || (tmp && strlen(tmp) > 1)) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_UNSUPPORTED_VERSION,
+ "invalid version number");
+ goto out;
+ }
+
+ *op_version = tmp_version;
+
+ ret = 0;
+out:
+ if (op_version_str)
+ GF_FREE(op_version_str);
+ if (handle)
+ gf_store_handle_destroy(handle);
+ return ret;
+}
- priv = THIS->private;
+int
+glusterd_retrieve_op_version(xlator_t *this, int *op_version)
+{
+ char *op_version_str = NULL;
+ glusterd_conf_t *priv = NULL;
+ int ret = -1;
+ int tmp_version = 0;
+ char *tmp = NULL;
+ char path[PATH_MAX] = {
+ 0,
+ };
+ gf_store_handle_t *handle = NULL;
+ int32_t len = 0;
+
+ priv = this->private;
+
+ if (!priv->handle) {
+ len = snprintf(path, PATH_MAX, "%s/%s", priv->workdir,
+ GLUSTERD_INFO_FILE);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ goto out;
+ }
+ ret = gf_store_handle_retrieve(path, &handle);
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Unable to get store "
+ "handle!");
+ goto out;
+ }
- if (!priv->handle) {
- snprintf (path, PATH_MAX, "%s/%s", priv->workdir,
- GLUSTERD_INFO_FILE);
- ret = glusterd_store_handle_retrieve (path, &handle);
+ priv->handle = handle;
+ }
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to get store "
- "handle!");
- goto out;
- }
+ ret = gf_store_retrieve_value(priv->handle, GD_OP_VERSION_KEY,
+ &op_version_str);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "No previous op_version present");
+ goto out;
+ }
- priv->handle = handle;
- }
+ tmp_version = strtol(op_version_str, &tmp, 10);
+ if ((tmp_version <= 0) || (tmp && strlen(tmp) > 1)) {
+ gf_msg(this->name, GF_LOG_WARNING, EINVAL, GD_MSG_UNSUPPORTED_VERSION,
+ "invalid version number");
+ goto out;
+ }
- ret = glusterd_store_retrieve_value (priv->handle,
- GLUSTERD_STORE_UUID_KEY,
- &uuid_str);
+ *op_version = tmp_version;
- if (ret) {
- gf_log ("", GF_LOG_INFO, "No previous uuid is present");
- goto out;
- }
+ ret = 0;
+out:
+ if (op_version_str)
+ GF_FREE(op_version_str);
- uuid_parse (uuid_str, priv->uuid);
+ return ret;
+}
+int
+glusterd_restore_op_version(xlator_t *this)
+{
+ glusterd_conf_t *conf = NULL;
+ int ret = 0;
+ int op_version = 0;
+
+ conf = this->private;
+
+ ret = glusterd_retrieve_op_version(this, &op_version);
+ if (!ret) {
+ if ((op_version < GD_OP_VERSION_MIN) ||
+ (op_version > GD_OP_VERSION_MAX)) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_UNSUPPORTED_VERSION,
+ "wrong op-version (%d) retrieved", op_version);
+ ret = -1;
+ goto out;
+ }
+ conf->op_version = op_version;
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_OP_VERS_INFO,
+ "retrieved op-version: %d", conf->op_version);
+ goto out;
+ }
+
+ /* op-version can be missing from the store file in 2 cases,
+ * 1. This is a new install of glusterfs
+ * 2. This is an upgrade of glusterfs from a version without op-version
+ * to a version with op-version (eg. 3.3 -> 3.4)
+ *
+ * Detection of a new install or an upgrade from an older install can be
+ * done by checking for the presence of the its peer-id in the store
+ * file. If peer-id is present, the installation is an upgrade else, it
+ * is a new install.
+ *
+ * For case 1, set op-version to GD_OP_VERSION_MAX.
+ * For case 2, set op-version to GD_OP_VERSION_MIN.
+ */
+ ret = glusterd_retrieve_uuid();
+ if (ret) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_OP_VERS_SET_INFO,
+ "Detected new install. Setting"
+ " op-version to maximum : %d",
+ GD_OP_VERSION_MAX);
+ conf->op_version = GD_OP_VERSION_MAX;
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_OP_VERS_SET_INFO,
+ "Upgrade detected. Setting"
+ " op-version to minimum : %d",
+ GD_OP_VERSION_MIN);
+ conf->op_version = GD_OP_VERSION_MIN;
+ }
+ ret = 0;
out:
- if (uuid_str)
- GF_FREE (uuid_str);
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ return ret;
}
int32_t
-glusterd_store_iter_new (glusterd_store_handle_t *shandle,
- glusterd_store_iter_t **iter)
+glusterd_retrieve_uuid()
{
- int32_t ret = -1;
- glusterd_store_iter_t *tmp_iter = NULL;
- int fd = -1;
-
- GF_ASSERT (shandle);
- GF_ASSERT (iter);
-
- tmp_iter = GF_CALLOC (1, sizeof (*tmp_iter),
- gf_gld_mt_store_iter_t);
-
- if (!tmp_iter) {
- gf_log ("", GF_LOG_ERROR, "Out of Memory");
- goto out;
+ char *uuid_str = NULL;
+ int32_t ret = -1;
+ gf_store_handle_t *handle = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ char path[PATH_MAX] = {
+ 0,
+ };
+ int32_t len = 0;
+
+ this = THIS;
+ priv = this->private;
+
+ if (!priv->handle) {
+ len = snprintf(path, PATH_MAX, "%s/%s", priv->workdir,
+ GLUSTERD_INFO_FILE);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ goto out;
}
+ ret = gf_store_handle_retrieve(path, &handle);
- fd = open (shandle->path, O_RDWR);
-
- if (fd < 0) {
- gf_log ("", GF_LOG_ERROR, "Unable to open %s, errno: %d",
- shandle->path, errno);
- goto out;
- }
-
- tmp_iter->fd = fd;
-
- tmp_iter->file = fdopen (tmp_iter->fd, "r");
-
- if (!tmp_iter->file) {
- gf_log ("", GF_LOG_ERROR, "Unable to open file %s errno: %d",
- shandle->path, errno);
- goto out;
+ if (ret) {
+ gf_msg_debug(this->name, 0,
+ "Unable to get store"
+ "handle!");
+ goto out;
}
- strncpy (tmp_iter->filepath, shandle->path, sizeof (tmp_iter->filepath));
- *iter = tmp_iter;
- ret = 0;
+ priv->handle = handle;
+ }
+ pthread_mutex_lock(&priv->mutex);
+ {
+ ret = gf_store_retrieve_value(priv->handle, GLUSTERD_STORE_UUID_KEY,
+ &uuid_str);
+ }
+ pthread_mutex_unlock(&priv->mutex);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "No previous uuid is present");
+ goto out;
+ }
+
+ gf_uuid_parse(uuid_str, priv->uuid);
out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
- return ret;
+ GF_FREE(uuid_str);
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
}
-int32_t
-glusterd_store_validate_key_value (char *storepath, char *key, char*val,
- glusterd_store_op_errno_t *op_errno)
+int
+glusterd_store_retrieve_snapd(glusterd_volinfo_t *volinfo)
{
- int ret = 0;
+ int ret = -1;
+ char *key = NULL;
+ char *value = NULL;
+ char volpath[PATH_MAX] = {
+ 0,
+ };
+ char path[PATH_MAX] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_store_iter_t *iter = NULL;
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = THIS->private;
+ GF_ASSERT(volinfo);
+
+ if (conf->op_version < GD_OP_VERSION_3_6_0) {
+ ret = 0;
+ goto out;
+ }
+
+ /*
+ * This is needed for upgrade situations. Say a volume is created with
+ * older version of glusterfs and upgraded to a glusterfs version equal
+ * to or greater than GD_OP_VERSION_3_6_0. The older glusterd would not
+ * have created the snapd.info file related to snapshot daemon for user
+ * serviceable snapshots. So as part of upgrade when the new glusterd
+ * starts, as part of restore (restoring the volume to be precise), it
+ * tries to snapd related info from snapd.info file. But since there was
+ * no such file till now, the restore operation fails. Thus, to prevent
+ * it from happening check whether user serviceable snapshots features
+ * is enabled before restoring snapd. If its disabled, then simply
+ * exit by returning success (without even checking for the snapd.info).
+ */
+
+ if (!dict_get_str_boolean(volinfo->dict, "features.uss", _gf_false)) {
+ ret = 0;
+ goto out;
+ }
+
+ GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, conf);
+
+ len = snprintf(path, sizeof(path), "%s/%s", volpath,
+ GLUSTERD_VOLUME_SNAPD_INFO_FILE);
+ if ((len < 0) || (len >= sizeof(path))) {
+ goto out;
+ }
+
+ ret = gf_store_handle_retrieve(path, &volinfo->snapd.handle);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HANDLE_NULL,
+ "volinfo handle is NULL");
+ goto out;
+ }
+
+ ret = gf_store_iter_new(volinfo->snapd.handle, &iter);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_GET_FAIL,
+ "Failed to get new store "
+ "iter");
+ goto out;
+ }
+
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_GET_FAIL,
+ "Failed to get next store "
+ "iter");
+ goto out;
+ }
+
+ while (!ret) {
+ if (!strncmp(key, GLUSTERD_STORE_KEY_SNAPD_PORT,
+ SLEN(GLUSTERD_STORE_KEY_SNAPD_PORT))) {
+ volinfo->snapd.port = atoi(value);
+ }
- GF_ASSERT (op_errno);
- GF_ASSERT (storepath);
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ }
- if ((key == NULL) && (val == NULL)) {
- ret = -1;
- gf_log ("glusterd", GF_LOG_ERROR, "Glusterd store may be "
- "corrupted, Invalid key and value (null) in %s",
- storepath);
- *op_errno = GD_STORE_KEY_VALUE_NULL;
- } else if (key == NULL) {
- ret = -1;
- gf_log ("glusterd", GF_LOG_ERROR, "Glusterd store may be "
- "corrupted, Invalid key (null) in %s", storepath);
- *op_errno = GD_STORE_KEY_NULL;
- } else if (val == NULL) {
- ret = -1;
- gf_log ("glusterd", GF_LOG_ERROR, "Glusterd store may be "
- "corrupted, Invalid value (null) for key %s in %s",
- key, storepath);
- *op_errno = GD_STORE_VALUE_NULL;
- } else {
- ret = 0;
- *op_errno = GD_STORE_SUCCESS;
- }
+ if (op_errno != GD_STORE_EOF)
+ goto out;
+
+ ret = 0;
+
+out:
+ if (gf_store_iter_destroy(&iter)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+ "Failed to destroy store iter");
+ ret = -1;
+ }
- return ret;
+ return ret;
}
int32_t
-glusterd_store_iter_get_next (glusterd_store_iter_t *iter,
- char **key, char **value,
- glusterd_store_op_errno_t *op_errno)
-{
- int32_t ret = -1;
- char *scan_str = NULL;
- char *free_str = NULL;
- char *iter_key = NULL;
- char *iter_val = NULL;
- struct stat st = {0,};
- glusterd_store_op_errno_t store_errno = GD_STORE_SUCCESS;
-
- GF_ASSERT (iter);
- GF_ASSERT (iter->file);
- GF_ASSERT (key);
- GF_ASSERT (value);
-
- ret = fstat (iter->fd, &st);
- if (ret < 0) {
- gf_log ("glusterd", GF_LOG_WARNING,
- "stat on file failed");
- ret = -1;
- store_errno = GD_STORE_STAT_FAILED;
- goto out;
- }
+glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo)
+{
+ int32_t ret = 0;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_brickinfo_t *ta_brickinfo = NULL;
+ gf_store_iter_t *iter = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char brickdir[PATH_MAX] = {
+ 0,
+ };
+ char path[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ int32_t brick_count = 0;
+ int32_t ta_brick_count = 0;
+ char tmpkey[32] = {
+ 0,
+ };
+ gf_store_iter_t *tmpiter = NULL;
+ char *tmpvalue = NULL;
+ char abspath[PATH_MAX] = {0};
+ struct pmap_registry *pmap = NULL;
+ xlator_t *this = NULL;
+ int brickid = 0;
+ /* ta_brick_id initialization with 2 since ta-brick id starts with
+ * volname-ta-2
+ */
+ int ta_brick_id = 2;
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+ int32_t len = 0;
+
+ GF_ASSERT(volinfo);
+ GF_ASSERT(volinfo->volname);
+
+ this = THIS;
+ priv = this->private;
+
+ GLUSTERD_GET_BRICK_DIR(brickdir, volinfo, priv);
+
+ ret = gf_store_iter_new(volinfo->shandle, &tmpiter);
+
+ if (ret)
+ goto out;
+
+ while (brick_count < volinfo->brick_count) {
+ ret = glusterd_brickinfo_new(&brickinfo);
- scan_str = GF_CALLOC (1, st.st_size,
- gf_gld_mt_char);
- if (scan_str == NULL) {
- ret = -1;
- store_errno = GD_STORE_ENOMEM;
- goto out;
+ if (ret)
+ goto out;
+ snprintf(tmpkey, sizeof(tmpkey), "%s-%d", GLUSTERD_STORE_KEY_VOL_BRICK,
+ brick_count);
+ ret = gf_store_iter_get_matching(tmpiter, tmpkey, &tmpvalue);
+ len = snprintf(path, sizeof(path), "%s/%s", brickdir, tmpvalue);
+ GF_FREE(tmpvalue);
+ tmpvalue = NULL;
+ if ((len < 0) || (len >= sizeof(path))) {
+ ret = -1;
+ goto out;
}
- *key = NULL;
- *value = NULL;
+ ret = gf_store_handle_retrieve(path, &brickinfo->shandle);
- free_str = scan_str;
-
- ret = glusterd_store_read_and_tokenize (iter->file, scan_str,
- &iter_key, &iter_val,
- &store_errno);
- if (ret < 0) {
- goto out;
- }
+ if (ret)
+ goto out;
+ ret = gf_store_iter_new(brickinfo->shandle, &iter);
- ret = glusterd_store_validate_key_value (iter->filepath, iter_key,
- iter_val, &store_errno);
if (ret)
- goto out;
+ goto out;
- *value = gf_strdup (iter_val);
-
- *key = gf_strdup (iter_key);
- if (!iter_key || !iter_val) {
- ret = -1;
- store_errno = GD_STORE_ENOMEM;
- goto out;
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_STORE_ITER_GET_FAIL,
+ "Unable to iterate "
+ "the store for brick: %s",
+ path);
+ goto out;
}
+ while (!ret) {
+ if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_HOSTNAME,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_HOSTNAME))) {
+ if (snprintf(brickinfo->hostname, sizeof(brickinfo->hostname),
+ "%s", value) >= sizeof(brickinfo->hostname)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "brick hostname truncated: %s", brickinfo->hostname);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_PATH,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_PATH))) {
+ if (snprintf(brickinfo->path, sizeof(brickinfo->path), "%s",
+ value) >= sizeof(brickinfo->path)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "brick path truncated: %s", brickinfo->path);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_REAL_PATH,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_REAL_PATH))) {
+ if (snprintf(brickinfo->real_path, sizeof(brickinfo->real_path),
+ "%s", value) >= sizeof(brickinfo->real_path)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "real_path truncated: %s", brickinfo->real_path);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_PORT,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_PORT))) {
+ ret = gf_string2int(value, &brickinfo->port);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
+ }
- ret = 0;
-
-out:
- if (ret) {
- if (*key) {
- GF_FREE (*key);
- *key = NULL;
+ if (brickinfo->port < priv->base_port) {
+ /* This is required to adhere to the
+ IANA standards */
+ brickinfo->port = 0;
+ } else {
+ /* This is required to have proper ports
+ assigned to bricks after restart */
+ pmap = pmap_registry_get(THIS);
+ if (pmap->last_alloc <= brickinfo->port)
+ pmap->last_alloc = brickinfo->port + 1;
}
- if (*value) {
- GF_FREE (*value);
- *value = NULL;
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_RDMA_PORT))) {
+ ret = gf_string2int(value, &brickinfo->rdma_port);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
}
- }
- if (free_str)
- GF_FREE (free_str);
- if (op_errno)
- *op_errno = store_errno;
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
- return ret;
-}
+ if (brickinfo->rdma_port < priv->base_port) {
+ /* This is required to adhere to the
+ IANA standards */
+ brickinfo->rdma_port = 0;
+ } else {
+ /* This is required to have proper ports
+ assigned to bricks after restart */
+ pmap = pmap_registry_get(THIS);
+ if (pmap->last_alloc <= brickinfo->rdma_port)
+ pmap->last_alloc = brickinfo->rdma_port + 1;
+ }
-int32_t
-glusterd_store_iter_get_matching (glusterd_store_iter_t *iter,
- char *key, char **value)
-{
- int32_t ret = -1;
- char *tmp_key = NULL;
- char *tmp_value = NULL;
+ } else if (!strncmp(
+ key, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED))) {
+ ret = gf_string2int(value, &brickinfo->decommissioned);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
+ }
- ret = glusterd_store_iter_get_next (iter, &tmp_key, &tmp_value,
- NULL);
- while (!ret) {
- if (!strncmp (key, tmp_key, strlen (key))){
- *value = tmp_value;
- GF_FREE (tmp_key);
- goto out;
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH))) {
+ if (snprintf(brickinfo->device_path,
+ sizeof(brickinfo->device_path), "%s",
+ value) >= sizeof(brickinfo->device_path)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "device_path truncated: %s", brickinfo->device_path);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_MOUNT_DIR,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_MOUNT_DIR))) {
+ if (snprintf(brickinfo->mount_dir, sizeof(brickinfo->mount_dir),
+ "%s", value) >= sizeof(brickinfo->mount_dir)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "mount_dir truncated: %s", brickinfo->mount_dir);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS))) {
+ ret = gf_string2int(value, &brickinfo->snap_status);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
}
- GF_FREE (tmp_key);
- GF_FREE (tmp_value);
- ret = glusterd_store_iter_get_next (iter, &tmp_key,
- &tmp_value, NULL);
- }
-out:
- return ret;
-}
-int32_t
-glusterd_store_iter_destroy (glusterd_store_iter_t *iter)
-{
- int32_t ret = -1;
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_FSTYPE,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_FSTYPE))) {
+ if (snprintf(brickinfo->fstype, sizeof(brickinfo->fstype), "%s",
+ value) >= sizeof(brickinfo->fstype)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL, "fstype truncated: %s",
+ brickinfo->fstype);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_MNTOPTS,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_MNTOPTS))) {
+ if (snprintf(brickinfo->mnt_opts, sizeof(brickinfo->mnt_opts),
+ "%s", value) >= sizeof(brickinfo->mnt_opts)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "mnt_opts truncated: %s", brickinfo->mnt_opts);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_VGNAME,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_VGNAME))) {
+ if (snprintf(brickinfo->vg, sizeof(brickinfo->vg), "%s",
+ value) >= sizeof(brickinfo->vg)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "brickinfo->vg truncated: %s", brickinfo->vg);
+ goto out;
+ }
+ } else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_ID)) {
+ if (snprintf(brickinfo->brick_id, sizeof(brickinfo->brick_id),
+ "%s", value) >= sizeof(brickinfo->brick_id)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "brick_id truncated: %s", brickinfo->brick_id);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_FSID,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_FSID))) {
+ ret = gf_string2uint64(value, &brickinfo->statfs_fsid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "%s "
+ "is not a valid uint64_t value",
+ value);
+ }
- GF_ASSERT (iter);
- GF_ASSERT (iter->fd > 0);
+ } else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_UUID)) {
+ gf_uuid_parse(value, brickinfo->uuid);
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNKNOWN_KEY,
+ "Unknown key: %s", key);
+ }
- ret = fclose (iter->file);
+ GF_FREE(key);
+ GF_FREE(value);
+ key = NULL;
+ value = NULL;
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to close fd: %d, ret: %d, "
- "errno: %d" ,iter->fd, ret, errno);
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
}
- GF_FREE (iter);
+ if (op_errno != GD_STORE_EOF) {
+ gf_msg(this->name, GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "Error parsing brickinfo: "
+ "op_errno=%d",
+ op_errno);
+ goto out;
+ }
- return ret;
-}
+ if (brickinfo->brick_id[0] == '\0') {
+ /* This is an old volume upgraded to op_version 4 */
+ GLUSTERD_ASSIGN_BRICKID_TO_BRICKINFO(brickinfo, volinfo, brickid++);
+ }
+ /* Populate brickinfo->real_path for normal volumes, for
+ * snapshot or snapshot restored volume this would be done post
+ * creating the brick mounts
+ */
+ if (gf_uuid_is_null(brickinfo->uuid))
+ (void)glusterd_resolve_brick(brickinfo);
+ if (brickinfo->real_path[0] == '\0' && !volinfo->is_snap_volume &&
+ gf_uuid_is_null(volinfo->restored_from_snap)) {
+ /* By now if the brick is a local brick then it will be
+ * able to resolve which is the only thing we want now
+ * for checking whether the brickinfo->uuid matches
+ * with MY_UUID for realpath check. Hence do not handle
+ * error
+ */
+ if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
+ if (!realpath(brickinfo->path, abspath)) {
+ gf_msg(this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_BRICKINFO_CREATE_FAIL,
+ "realpath() failed for brick %s"
+ ". The underlying file system "
+ "may be in bad state",
+ brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+ if (strlen(abspath) >= sizeof(brickinfo->real_path)) {
+ ret = -1;
+ goto out;
+ }
+ (void)strncpy(brickinfo->real_path, abspath,
+ sizeof(brickinfo->real_path));
+ }
+ }
-char*
-glusterd_store_strerror (glusterd_store_op_errno_t op_errno)
-{
- switch (op_errno) {
- case GD_STORE_SUCCESS:
- return "Success";
- case GD_STORE_KEY_NULL:
- return "Invalid Key";
- case GD_STORE_VALUE_NULL:
- return "Invalid Value";
- case GD_STORE_KEY_VALUE_NULL:
- return "Invalid Key and Value";
- case GD_STORE_EOF:
- return "No data";
- case GD_STORE_ENOMEM:
- return "No memory";
- default:
- return "Invalid errno";
+ /* Handle upgrade case of shared_brick_count 'fsid' */
+ /* Ideally statfs_fsid should never be 0 if done right */
+ if (!gf_uuid_compare(brickinfo->uuid, MY_UUID) &&
+ brickinfo->statfs_fsid == 0) {
+ struct statvfs brickstat = {
+ 0,
+ };
+ ret = sys_statvfs(brickinfo->path, &brickstat);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, errno,
+ GD_MSG_BRICKINFO_CREATE_FAIL,
+ "failed to get statfs() call on brick %s",
+ brickinfo->path);
+ /* No need for treating it as an error, lets continue
+ with just a message */
+ } else {
+ brickinfo->statfs_fsid = brickstat.f_fsid;
+ }
}
- return "Invalid errno";
-}
-int32_t
-glusterd_store_retrieve_bricks (glusterd_volinfo_t *volinfo)
-{
+ cds_list_add_tail(&brickinfo->brick_list, &volinfo->bricks);
+ brick_count++;
+ }
- int32_t ret = 0;
- glusterd_brickinfo_t *brickinfo = NULL;
- glusterd_store_iter_t *iter = NULL;
- char *key = NULL;
- char *value = NULL;
- char brickdir[PATH_MAX] = {0,};
- char path[PATH_MAX] = {0,};
- glusterd_conf_t *priv = NULL;
- int32_t brick_count = 0;
- char tmpkey[4096] = {0,};
- glusterd_store_iter_t *tmpiter = NULL;
- char *tmpvalue = NULL;
- struct pmap_registry *pmap = NULL;
- glusterd_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+ if (gf_store_iter_destroy(&tmpiter)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+ "Failed to destroy store iter");
+ ret = -1;
+ goto out;
+ }
- GF_ASSERT (volinfo);
- GF_ASSERT (volinfo->volname);
+ ret = gf_store_iter_new(volinfo->shandle, &tmpiter);
- priv = THIS->private;
+ if (ret)
+ goto out;
- GLUSTERD_GET_BRICK_DIR (brickdir, volinfo, priv)
+ if (volinfo->thin_arbiter_count == 1) {
+ snprintf(tmpkey, sizeof(tmpkey), "%s-%d",
+ GLUSTERD_STORE_KEY_VOL_TA_BRICK, 0);
+ while (ta_brick_count < volinfo->subvol_count) {
+ ret = glusterd_brickinfo_new(&ta_brickinfo);
+ if (ret)
+ goto out;
- ret = glusterd_store_iter_new (volinfo->shandle, &tmpiter);
+ ret = gf_store_iter_get_matching(tmpiter, tmpkey, &tmpvalue);
- if (ret)
+ len = snprintf(path, sizeof(path), "%s/%s", brickdir, tmpvalue);
+ GF_FREE(tmpvalue);
+ tmpvalue = NULL;
+ if ((len < 0) || (len >= sizeof(path))) {
+ ret = -1;
goto out;
+ }
- while (brick_count < volinfo->brick_count) {
- ret = glusterd_brickinfo_new (&brickinfo);
+ ret = gf_store_handle_retrieve(path, &ta_brickinfo->shandle);
- if (ret)
- goto out;
- snprintf (tmpkey, sizeof (tmpkey), "%s-%d",
- GLUSTERD_STORE_KEY_VOL_BRICK,brick_count);
- ret = glusterd_store_iter_get_matching (tmpiter, tmpkey,
- &tmpvalue);
- snprintf (path, sizeof (path), "%s/%s", brickdir, tmpvalue);
-
- GF_FREE (tmpvalue);
+ if (ret)
+ goto out;
- tmpvalue = NULL;
+ ret = gf_store_iter_new(ta_brickinfo->shandle, &iter);
- ret = glusterd_store_handle_retrieve (path, &brickinfo->shandle);
+ if (ret)
+ goto out;
- if (ret)
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ if (ret) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_STORE_ITER_GET_FAIL,
+ "Unable to iterate "
+ "the store for brick: %s",
+ path);
+ goto out;
+ }
+
+ while (!ret) {
+ if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_HOSTNAME,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_HOSTNAME))) {
+ if (snprintf(ta_brickinfo->hostname,
+ sizeof(ta_brickinfo->hostname), "%s",
+ value) >= sizeof(ta_brickinfo->hostname)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "brick hostname truncated: %s",
+ ta_brickinfo->hostname);
goto out;
-
- ret = glusterd_store_iter_new (brickinfo->shandle, &iter);
-
- if (ret)
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_PATH,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_PATH))) {
+ if (snprintf(ta_brickinfo->path, sizeof(ta_brickinfo->path),
+ "%s", value) >= sizeof(ta_brickinfo->path)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "brick path truncated: %s", ta_brickinfo->path);
goto out;
-
- ret = glusterd_store_iter_get_next (iter, &key, &value,
- &op_errno);
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR, "Unable to iterate "
- "the store for brick: %s, reason: %s", path,
- glusterd_store_strerror (op_errno));
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_REAL_PATH,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_REAL_PATH))) {
+ if (snprintf(ta_brickinfo->real_path,
+ sizeof(ta_brickinfo->real_path), "%s",
+ value) >= sizeof(ta_brickinfo->real_path)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "real_path truncated: %s",
+ ta_brickinfo->real_path);
goto out;
- }
- while (!ret) {
- if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_HOSTNAME,
- strlen (GLUSTERD_STORE_KEY_BRICK_HOSTNAME))) {
- strncpy (brickinfo->hostname, value, 1024);
- } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_PATH,
- strlen (GLUSTERD_STORE_KEY_BRICK_PATH))) {
- strncpy (brickinfo->path, value,
- sizeof (brickinfo->path));
- } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_PORT,
- strlen (GLUSTERD_STORE_KEY_BRICK_PORT))) {
- gf_string2int (value, &brickinfo->port);
-
- if (brickinfo->port < GF_IANA_PRIV_PORTS_START){
- /* This is required to adhere to the
- IANA standards */
- brickinfo->port = 0;
- } else {
- /* This is required to have proper ports
- assigned to bricks after restart */
- pmap = pmap_registry_get (THIS);
- if (pmap->last_alloc <= brickinfo->port)
- pmap->last_alloc =
- brickinfo->port + 1;
- }
- } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT,
- strlen (GLUSTERD_STORE_KEY_BRICK_RDMA_PORT))) {
- gf_string2int (value, &brickinfo->rdma_port);
-
- if (brickinfo->rdma_port <
- GF_IANA_PRIV_PORTS_START){
- /* This is required to adhere to the
- IANA standards */
- brickinfo->rdma_port = 0;
- } else {
- /* This is required to have proper ports
- assigned to bricks after restart */
- pmap = pmap_registry_get (THIS);
- if (pmap->last_alloc <=
- brickinfo->rdma_port)
- pmap->last_alloc =
- brickinfo->rdma_port +1;
- }
-
- } else if (!strncmp (key, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED,
- strlen (GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED))) {
- gf_string2int (value, &brickinfo->decommissioned);
- } else {
- gf_log ("", GF_LOG_ERROR, "Unknown key: %s",
- key);
- }
-
- GF_FREE (key);
- GF_FREE (value);
- key = NULL;
- value = NULL;
-
- ret = glusterd_store_iter_get_next (iter, &key, &value,
- &op_errno);
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_PORT,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_PORT))) {
+ ret = gf_string2int(value, &ta_brickinfo->port);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
+ }
+
+ if (ta_brickinfo->port < priv->base_port) {
+ /* This is required to adhere to the
+ IANA standards */
+ ta_brickinfo->port = 0;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_RDMA_PORT,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_RDMA_PORT))) {
+ ret = gf_string2int(value, &ta_brickinfo->rdma_port);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
+ }
+
+ if (ta_brickinfo->rdma_port < priv->base_port) {
+ /* This is required to adhere to the
+ IANA standards */
+ ta_brickinfo->rdma_port = 0;
+ }
+ } else if (!strncmp(
+ key, GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED))) {
+ ret = gf_string2int(value, &ta_brickinfo->decommissioned);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
+ }
+
+ } else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_ID)) {
+ if (snprintf(ta_brickinfo->brick_id,
+ sizeof(ta_brickinfo->brick_id), "%s",
+ value) >= sizeof(ta_brickinfo->brick_id)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "brick_id truncated: %s",
+ ta_brickinfo->brick_id);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_BRICK_FSID,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_FSID))) {
+ ret = gf_string2uint64(value, &ta_brickinfo->statfs_fsid);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_INVALID_ENTRY,
+ "%s "
+ "is not a valid uint64_t value",
+ value);
+ }
+ } else if (!strcmp(key, GLUSTERD_STORE_KEY_BRICK_UUID)) {
+ gf_uuid_parse(value, brickinfo->uuid);
+ } else if (!strncmp(
+ key, GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS,
+ SLEN(GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS))) {
+ ret = gf_string2int(value, &ta_brickinfo->snap_status);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INCOMPATIBLE_VALUE,
+ "Failed to convert "
+ "string to integer");
+ }
+
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNKNOWN_KEY,
+ "Unknown key: %s", key);
}
- if (op_errno != GD_STORE_EOF)
- goto out;
- ret = glusterd_store_iter_destroy (iter);
+ GF_FREE(key);
+ GF_FREE(value);
+ key = NULL;
+ value = NULL;
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ }
- if (ret)
- goto out;
+ GLUSTERD_ASSIGN_BRICKID_TO_TA_BRICKINFO(ta_brickinfo, volinfo,
+ ta_brick_id);
+ ta_brick_id += 3;
- list_add_tail (&brickinfo->brick_list, &volinfo->bricks);
- brick_count++;
+ cds_list_add_tail(&ta_brickinfo->brick_list, &volinfo->ta_bricks);
+ ta_brick_count++;
}
+ }
+
+ assign_brick_groups(volinfo);
+ ret = 0;
- ret = glusterd_store_iter_destroy (tmpiter);
- if (ret)
- goto out;
out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ if (gf_store_iter_destroy(&tmpiter)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+ "Failed to destroy store iter");
+ ret = -1;
+ }
- return ret;
-}
+ if (gf_store_iter_destroy(&iter)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+ "Failed to destroy store iter");
+ ret = -1;
+ }
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
+
+ return ret;
+}
int32_t
-glusterd_store_retrieve_rbstate (char *volname)
+glusterd_store_retrieve_node_state(glusterd_volinfo_t *volinfo)
{
- int32_t ret = -1;
- glusterd_volinfo_t *volinfo = NULL;
- glusterd_store_iter_t *iter = NULL;
- char *key = NULL;
- char *value = NULL;
- char volpath[PATH_MAX] = {0,};
- glusterd_conf_t *priv = NULL;
- char path[PATH_MAX] = {0,};
- glusterd_store_op_errno_t op_errno = GD_STORE_SUCCESS;
-
- priv = THIS->private;
-
- ret = glusterd_volinfo_find (volname, &volinfo);
- if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get"
- "volinfo for %s.", volname);
+ int32_t ret = -1;
+ gf_store_iter_t *iter = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ char *dup_value = NULL;
+ char volpath[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ char path[PATH_MAX] = {
+ 0,
+ };
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+ dict_t *tmp_dict = NULL;
+ xlator_t *this = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(volinfo);
+
+ GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv);
+ len = snprintf(path, sizeof(path), "%s/%s", volpath,
+ GLUSTERD_NODE_STATE_FILE);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ goto out;
+ }
+
+ ret = gf_store_handle_retrieve(path, &volinfo->node_state_shandle);
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_new(volinfo->node_state_shandle, &iter);
+
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+
+ if (ret)
+ goto out;
+
+ while (ret == 0) {
+ if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG,
+ SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG))) {
+ volinfo->rebal.defrag_cmd = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_STATUS,
+ SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_STATUS))) {
+ volinfo->rebal.defrag_status = atoi(value);
+ } else if (!strncmp(key, GF_REBALANCE_TID_KEY,
+ SLEN(GF_REBALANCE_TID_KEY))) {
+ gf_uuid_parse(value, volinfo->rebal.rebalance_id);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_DEFRAG_OP,
+ SLEN(GLUSTERD_STORE_KEY_DEFRAG_OP))) {
+ volinfo->rebal.op = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES,
+ SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES))) {
+ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_files);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE,
+ SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE))) {
+ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_data);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED,
+ SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED))) {
+ sscanf(value, "%" PRIu64, &volinfo->rebal.lookedup_files);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES,
+ SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES))) {
+ sscanf(value, "%" PRIu64, &volinfo->rebal.rebalance_failures);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED,
+ SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED))) {
+ sscanf(value, "%" PRIu64, &volinfo->rebal.skipped_files);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME,
+ SLEN(GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME))) {
+ volinfo->rebal.rebalance_time = atoi(value);
+ } else {
+ if (!tmp_dict) {
+ tmp_dict = dict_new();
+ if (!tmp_dict) {
+ ret = -1;
+ goto out;
+ }
+ }
+ dup_value = gf_strdup(value);
+ if (!dup_value) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, ENOMEM, GD_MSG_NO_MEMORY,
+ "Failed to strdup value string");
+ goto out;
+ }
+ ret = dict_set_str(tmp_dict, key, dup_value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Error setting data in rebal "
+ "dict.");
goto out;
+ }
+ dup_value = NULL;
}
- GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv);
- snprintf (path, sizeof (path), "%s/%s", volpath,
- GLUSTERD_VOLUME_RBSTATE_FILE);
+ GF_FREE(key);
+ GF_FREE(value);
+ key = NULL;
+ value = NULL;
- ret = glusterd_store_handle_retrieve (path, &volinfo->rb_shandle);
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ }
+ if (tmp_dict) {
+ volinfo->rebal.dict = dict_ref(tmp_dict);
+ }
- if (ret)
- goto out;
+ if (op_errno != GD_STORE_EOF) {
+ ret = -1;
+ goto out;
+ }
- ret = glusterd_store_iter_new (volinfo->rb_shandle, &iter);
+ ret = 0;
- if (ret)
- goto out;
+out:
+ if (gf_store_iter_destroy(&iter)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+ "Failed to destroy store iter");
+ ret = -1;
+ }
+
+ if (dup_value)
+ GF_FREE(dup_value);
+ if (ret) {
+ if (volinfo->rebal.dict)
+ dict_unref(volinfo->rebal.dict);
+ }
+ if (tmp_dict)
+ dict_unref(tmp_dict);
+
+ gf_msg_trace(this->name, 0, "Returning with %d", ret);
+
+ return ret;
+}
- ret = glusterd_store_iter_get_next (iter, &key, &value, &op_errno);
- if (ret)
+int
+glusterd_store_update_volinfo(glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ int exists = 0;
+ char *key = NULL;
+ char *value = NULL;
+ char volpath[PATH_MAX] = {
+ 0,
+ };
+ char path[PATH_MAX] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_store_iter_t *iter = NULL;
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = THIS->private;
+ GF_ASSERT(volinfo);
+
+ GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, conf);
+
+ len = snprintf(path, sizeof(path), "%s/%s", volpath,
+ GLUSTERD_VOLUME_INFO_FILE);
+ if ((len < 0) || (len >= sizeof(path))) {
+ goto out;
+ }
+
+ ret = gf_store_handle_retrieve(path, &volinfo->shandle);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HANDLE_NULL,
+ "volinfo handle is NULL");
+ goto out;
+ }
+
+ ret = gf_store_iter_new(volinfo->shandle, &iter);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_GET_FAIL,
+ "Failed to get new store "
+ "iter");
+ goto out;
+ }
+
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_GET_FAIL,
+ "Failed to get next store "
+ "iter");
+ goto out;
+ }
+
+ while (!ret) {
+ gf_msg_debug(this->name, 0, "key = %s value = %s", key, value);
+ if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_TYPE,
+ SLEN(GLUSTERD_STORE_KEY_VOL_TYPE))) {
+ volinfo->type = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_COUNT,
+ SLEN(GLUSTERD_STORE_KEY_VOL_COUNT))) {
+ volinfo->brick_count = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_STATUS,
+ SLEN(GLUSTERD_STORE_KEY_VOL_STATUS))) {
+ volinfo->status = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_VERSION,
+ SLEN(GLUSTERD_STORE_KEY_VOL_VERSION))) {
+ volinfo->version = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_PORT,
+ SLEN(GLUSTERD_STORE_KEY_VOL_PORT))) {
+ volinfo->port = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_SUB_COUNT,
+ SLEN(GLUSTERD_STORE_KEY_VOL_SUB_COUNT))) {
+ volinfo->sub_count = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_STRIPE_CNT,
+ SLEN(GLUSTERD_STORE_KEY_VOL_STRIPE_CNT))) {
+ volinfo->stripe_count = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT,
+ SLEN(GLUSTERD_STORE_KEY_VOL_REPLICA_CNT))) {
+ volinfo->replica_count = atoi(value);
+ } else if (!strcmp(key, GLUSTERD_STORE_KEY_VOL_ARBITER_CNT)) {
+ volinfo->arbiter_count = atoi(value);
+ } else if (!strcmp(key, GLUSTERD_STORE_KEY_VOL_THIN_ARBITER_CNT)) {
+ volinfo->thin_arbiter_count = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT,
+ SLEN(GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT))) {
+ volinfo->disperse_count = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT,
+ SLEN(GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT))) {
+ volinfo->redundancy_count = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_TRANSPORT,
+ SLEN(GLUSTERD_STORE_KEY_VOL_TRANSPORT))) {
+ volinfo->transport_type = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_ID,
+ SLEN(GLUSTERD_STORE_KEY_VOL_ID))) {
+ ret = gf_uuid_parse(value, volinfo->volume_id);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_UUID_PARSE_FAIL,
+ "failed to parse uuid");
+
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_USERNAME,
+ SLEN(GLUSTERD_STORE_KEY_USERNAME))) {
+ glusterd_auth_set_username(volinfo, value);
+
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_PASSWORD,
+ SLEN(GLUSTERD_STORE_KEY_PASSWORD))) {
+ glusterd_auth_set_password(volinfo, value);
+
+ } else if (strstr(key, "slave")) {
+ ret = dict_set_dynstr(volinfo->gsync_slaves, key, gf_strdup(value));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_SET_FAILED,
+ "Error in "
+ "dict_set_str");
goto out;
+ }
+ gf_msg_debug(this->name, 0,
+ "Parsed as " GEOREP
+ " "
+ " slave:key=%s,value:%s",
+ key, value);
+
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_OP_VERSION,
+ SLEN(GLUSTERD_STORE_KEY_VOL_OP_VERSION))) {
+ volinfo->op_version = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION,
+ SLEN(GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION))) {
+ volinfo->client_op_version = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT,
+ SLEN(GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT))) {
+ volinfo->snap_max_hard_limit = (uint64_t)atoll(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP,
+ SLEN(GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP))) {
+ ret = gf_uuid_parse(value, volinfo->restored_from_snap);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_UUID_PARSE_FAIL,
+ "failed to parse restored snap's uuid");
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_PARENT_VOLNAME,
+ SLEN(GLUSTERD_STORE_KEY_PARENT_VOLNAME))) {
+ if (snprintf(volinfo->parent_volname,
+ sizeof(volinfo->parent_volname), "%s",
+ value) >= sizeof(volinfo->parent_volname)) {
+ gf_msg("glusterd", GF_LOG_ERROR, op_errno,
+ GD_MSG_PARSE_BRICKINFO_FAIL,
+ "parent_volname truncated: %s", volinfo->parent_volname);
+ goto out;
+ }
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION,
+ SLEN(GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION))) {
+ volinfo->quota_xattr_version = atoi(value);
+ } else {
+ if (is_key_glusterd_hooks_friendly(key)) {
+ exists = 1;
+
+ } else {
+ exists = glusterd_check_option_exists(key, NULL);
+ }
+
+ switch (exists) {
+ case -1:
+ ret = -1;
+ goto out;
+
+ case 0:
+ /*Ignore GLUSTERD_STORE_KEY_VOL_BRICK since
+ glusterd_store_retrieve_bricks gets it later.
+ also, ignore tier-enabled key as we deprecated
+ tier xlator*/
+ if (!strstr(key, GLUSTERD_STORE_KEY_VOL_BRICK) ||
+ !strstr(key, GF_TIER_ENABLED))
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_UNKNOWN_KEY, "Unknown key: %s", key);
+ break;
+
+ case 1:
+ /*The following strcmp check is to ensure that
+ * glusterd does not restore the quota limits
+ * into volinfo->dict post upgradation from 3.3
+ * to 3.4 as the same limits will now be stored
+ * in xattrs on the respective directories.
+ */
+ if (!strcmp(key, "features.limit-usage"))
+ break;
+ ret = dict_set_str(volinfo->dict, key, gf_strdup(value));
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_DICT_SET_FAILED,
+ "Error in "
+ "dict_set_str");
+ goto out;
+ }
+ gf_msg_debug(this->name, 0,
+ "Parsed as Volume-"
+ "set:key=%s,value:%s",
+ key, value);
+ break;
+ }
+ }
- while (!ret) {
- if (!strncmp (key, GLUSTERD_STORE_KEY_RB_STATUS,
- strlen (GLUSTERD_STORE_KEY_RB_STATUS))) {
- volinfo->rb_status = atoi (value);
- }
+ GF_FREE(key);
+ GF_FREE(value);
+ key = NULL;
+ value = NULL;
+
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ }
+
+ /* backward compatibility */
+ {
+ switch (volinfo->type) {
+ case GF_CLUSTER_TYPE_NONE:
+ volinfo->stripe_count = 1;
+ volinfo->replica_count = 1;
+ break;
+
+ case GF_CLUSTER_TYPE_REPLICATE:
+ volinfo->stripe_count = 1;
+ volinfo->replica_count = volinfo->sub_count;
+ break;
+
+ case GF_CLUSTER_TYPE_DISPERSE:
+ GF_ASSERT(volinfo->disperse_count > 0);
+ GF_ASSERT(volinfo->redundancy_count > 0);
+ break;
+
+ case GF_CLUSTER_TYPE_STRIPE:
+ case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
+ gf_msg(this->name, GF_LOG_CRITICAL, ENOTSUP,
+ GD_MSG_VOLINFO_STORE_FAIL,
+ "The volume type is no more supported. Please refer to "
+ "glusterfs-6.0 release-notes for how to migrate from "
+ "this volume type");
+ break;
+
+ default:
+ GF_ASSERT(0);
+ break;
+ }
- if (volinfo->rb_status > GF_RB_STATUS_NONE) {
- if (!strncmp (key, GLUSTERD_STORE_KEY_RB_SRC_BRICK,
- strlen (GLUSTERD_STORE_KEY_RB_SRC_BRICK))) {
- ret = glusterd_brickinfo_from_brick (value,
- &volinfo->src_brick);
- if (ret)
- goto out;
- } else if (!strncmp (key, GLUSTERD_STORE_KEY_RB_DST_BRICK,
- strlen (GLUSTERD_STORE_KEY_RB_DST_BRICK))) {
- ret = glusterd_brickinfo_from_brick (value,
- &volinfo->dst_brick);
- if (ret)
- goto out;
- }
- }
+ volinfo->dist_leaf_count = glusterd_get_dist_leaf_count(volinfo);
- GF_FREE (key);
- GF_FREE (value);
- key = NULL;
- value = NULL;
+ volinfo->subvol_count = (volinfo->brick_count /
+ volinfo->dist_leaf_count);
- ret = glusterd_store_iter_get_next (iter, &key, &value,
- &op_errno);
- }
+ /* Only calculate volume op-versions if they are not found */
+ if (!volinfo->op_version && !volinfo->client_op_version)
+ gd_update_volume_op_versions(volinfo);
+ }
- if (op_errno != GD_STORE_EOF)
- goto out;
-
- ret = glusterd_store_iter_destroy (iter);
+ if (op_errno != GD_STORE_EOF)
+ goto out;
- if (ret)
- goto out;
+ ret = 0;
out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ if (gf_store_iter_destroy(&iter)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+ "Failed to destroy store iter");
+ ret = -1;
+ }
- return ret;
+ return ret;
}
-int32_t
-glusterd_store_retrieve_node_state (char *volname)
+glusterd_volinfo_t *
+glusterd_store_retrieve_volume(char *volname, glusterd_snap_t *snap)
{
- int32_t ret = -1;
- glusterd_volinfo_t *volinfo = NULL;
- glusterd_store_iter_t *iter = NULL;
- char *key = NULL;
- char *value = NULL;
- char volpath[PATH_MAX] = {0,};
- glusterd_conf_t *priv = NULL;
- char path[PATH_MAX] = {0,};
- glusterd_store_op_errno_t op_errno = GD_STORE_SUCCESS;
-
- priv = THIS->private;
-
- ret = glusterd_volinfo_find (volname, &volinfo);
+ int32_t ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_volinfo_t *origin_volinfo = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(volname);
+
+ ret = glusterd_volinfo_new(&volinfo);
+ if (ret)
+ goto out;
+
+ if (snprintf(volinfo->volname, NAME_MAX + 1, "%s", volname) >= NAME_MAX + 1)
+ goto out;
+ volinfo->snapshot = snap;
+ if (snap)
+ volinfo->is_snap_volume = _gf_true;
+
+ ret = glusterd_store_update_volinfo(volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_UPDATE_FAIL,
+ "Failed to update volinfo "
+ "for %s volume",
+ volname);
+ goto out;
+ }
+
+ ret = glusterd_store_retrieve_bricks(volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_retrieve_snapd(volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_compute_cksum(volinfo, _gf_false);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_retrieve_quota_version(volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_create_quota_conf_sh_on_absence(volinfo);
+ if (ret)
+ goto out;
+
+ ret = glusterd_compute_cksum(volinfo, _gf_true);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_save_quota_version_and_cksum(volinfo);
+ if (ret)
+ goto out;
+
+ if (!snap) {
+ glusterd_list_add_order(&volinfo->vol_list, &priv->volumes,
+ glusterd_compare_volume_name);
+
+ } else {
+ ret = glusterd_volinfo_find(volinfo->parent_volname, &origin_volinfo);
if (ret) {
- gf_log (THIS->name, GF_LOG_ERROR, "Couldn't get"
- "volinfo for %s.", volname);
- goto out;
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Parent volinfo "
+ "not found for %s volume",
+ volname);
+ goto out;
}
+ glusterd_list_add_snapvol(origin_volinfo, volinfo);
+ }
- GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv);
- snprintf (path, sizeof (path), "%s/%s", volpath,
- GLUSTERD_NODE_STATE_FILE);
-
- ret = glusterd_store_handle_retrieve (path,
- &volinfo->node_state_shandle);
+out:
+ if (ret) {
+ if (volinfo)
+ glusterd_volinfo_unref(volinfo);
+ volinfo = NULL;
+ }
- if (ret)
- goto out;
+ gf_msg_trace(this->name, 0, "Returning with %d", ret);
- ret = glusterd_store_iter_new (volinfo->node_state_shandle, &iter);
+ return volinfo;
+}
- if (ret)
- goto out;
+static void
+glusterd_store_set_options_path(glusterd_conf_t *conf, char *path, size_t len)
+{
+ snprintf(path, len, "%s/options", conf->workdir);
+}
- ret = glusterd_store_iter_get_next (iter, &key, &value, &op_errno);
- if (ret)
- goto out;
- if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_DEFRAG,
- strlen (GLUSTERD_STORE_KEY_VOL_DEFRAG))) {
- volinfo->defrag_cmd = atoi (value);
+int32_t
+glusterd_store_options(xlator_t *this, dict_t *opts)
+{
+ gf_store_handle_t *shandle = NULL;
+ glusterd_conf_t *conf = NULL;
+ char path[PATH_MAX] = {0};
+ int fd = -1;
+ int32_t ret = -1;
+ glusterd_volinfo_data_store_t *dict_data = NULL;
+
+ conf = this->private;
+ glusterd_store_set_options_path(conf, path, sizeof(path));
+
+ ret = gf_store_handle_new(path, &shandle);
+ if (ret) {
+ goto out;
+ }
+
+ fd = gf_store_mkstemp(shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ dict_data = GF_CALLOC(1, sizeof(glusterd_volinfo_data_store_t),
+ gf_gld_mt_volinfo_dict_data_t);
+ if (dict_data == NULL) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_NO_MEMORY, NULL);
+ return -1;
+ }
+ dict_data->shandle = shandle;
+ shandle->fd = fd;
+ dict_foreach(opts, _storeopts, (void *)dict_data);
+ if (dict_data->buffer_len > 0) {
+ ret = gf_store_save_items(fd, dict_data->buffer);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, 0, GD_MSG_FILE_OP_FAILED, NULL);
+ goto out;
}
+ }
- GF_FREE (key);
- GF_FREE (value);
-
- if (op_errno != GD_STORE_EOF)
- goto out;
-
- ret = glusterd_store_iter_destroy (iter);
-
- if (ret)
- goto out;
-
+ ret = gf_store_rename_tmppath(shandle);
out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ shandle->fd = 0;
+ GF_FREE(dict_data);
+ if ((ret < 0) && (fd > 0)) {
+ gf_store_unlink_tmppath(shandle);
+ }
+ gf_store_handle_destroy(shandle);
+ return ret;
+}
- return ret;
+int32_t
+glusterd_store_retrieve_options(xlator_t *this)
+{
+ char path[PATH_MAX] = {0};
+ glusterd_conf_t *conf = NULL;
+ gf_store_handle_t *shandle = NULL;
+ gf_store_iter_t *iter = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ gf_store_op_errno_t op_errno = 0;
+ int ret = -1;
+
+ conf = this->private;
+ glusterd_store_set_options_path(conf, path, sizeof(path));
+
+ ret = gf_store_handle_retrieve(path, &shandle);
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_new(shandle, &iter);
+ if (ret)
+ goto out;
+
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ while (!ret) {
+ ret = dict_set_dynstr(conf->opts, key, value);
+ if (ret) {
+ GF_FREE(key);
+ GF_FREE(value);
+ goto out;
+ }
+ GF_FREE(key);
+ key = NULL;
+ value = NULL;
+
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ }
+ if (op_errno != GD_STORE_EOF)
+ goto out;
+ ret = 0;
+out:
+ (void)gf_store_iter_destroy(&iter);
+ gf_store_handle_destroy(shandle);
+ return ret;
}
int32_t
-glusterd_store_retrieve_volume (char *volname)
+glusterd_store_retrieve_volumes(xlator_t *this, glusterd_snap_t *snap)
{
- int32_t ret = -1;
- glusterd_volinfo_t *volinfo = NULL;
- glusterd_store_iter_t *iter = NULL;
- char *key = NULL;
- char *value = NULL;
- char volpath[PATH_MAX] = {0,};
- glusterd_conf_t *priv = NULL;
- char path[PATH_MAX] = {0,};
- int exists = 0;
- glusterd_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+ int32_t ret = -1;
+ char path[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ glusterd_volinfo_t *volinfo = NULL;
+ struct stat st = {
+ 0,
+ };
+ char entry_path[PATH_MAX] = {
+ 0,
+ };
+ int32_t len = 0;
+
+ GF_ASSERT(this);
+ priv = this->private;
+
+ GF_ASSERT(priv);
+
+ if (snap)
+ len = snprintf(path, PATH_MAX, "%s/snaps/%s", priv->workdir,
+ snap->snapname);
+ else
+ len = snprintf(path, PATH_MAX, "%s/%s", priv->workdir,
+ GLUSTERD_VOLUME_DIR_PREFIX);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ goto out;
+ }
+
+ dir = sys_opendir(path);
+
+ if (!dir) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Unable to open dir %s", path);
+ goto out;
+ }
+
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
+ if (snap && ((!strcmp(entry->d_name, "geo-replication")) ||
+ (!strcmp(entry->d_name, "info"))))
+ continue;
+
+ len = snprintf(entry_path, PATH_MAX, "%s/%s", path, entry->d_name);
+ if ((len < 0) || (len >= PATH_MAX))
+ continue;
+
+ ret = sys_lstat(entry_path, &st);
+ if (ret == -1) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_INVALID_ENTRY,
+ "Failed to stat entry %s : %s", path, strerror(errno));
+ continue;
+ }
- ret = glusterd_volinfo_new (&volinfo);
+ if (!S_ISDIR(st.st_mode)) {
+ gf_msg_debug(this->name, 0, "%s is not a valid volume",
+ entry->d_name);
+ continue;
+ }
- if (ret)
- goto out;
+ volinfo = glusterd_store_retrieve_volume(entry->d_name, snap);
+ if (!volinfo) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOL_RESTORE_FAIL,
+ "Unable to restore "
+ "volume: %s",
+ entry->d_name);
+ ret = -1;
+ goto out;
+ }
- strncpy (volinfo->volname, volname, GLUSTERD_MAX_VOLUME_NAME);
+ ret = glusterd_store_retrieve_node_state(volinfo);
+ if (ret) {
+ /* Backward compatibility */
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NEW_NODE_STATE_CREATION,
+ "Creating a new node_state "
+ "for volume: %s.",
+ entry->d_name);
+ glusterd_store_create_nodestate_sh_on_absence(volinfo);
+ glusterd_store_perform_node_state_store(volinfo);
+ }
+ }
- priv = THIS->private;
+ ret = 0;
- GLUSTERD_GET_VOLUME_DIR(volpath, volinfo, priv);
- snprintf (path, sizeof (path), "%s/%s", volpath,
- GLUSTERD_VOLUME_INFO_FILE);
+out:
+ if (dir)
+ sys_closedir(dir);
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
- ret = glusterd_store_handle_retrieve (path, &volinfo->shandle);
+ return ret;
+}
- if (ret)
- goto out;
+/* Figure out the brick mount path, from the brick path */
+int32_t
+glusterd_find_brick_mount_path(char *brick_path, char **brick_mount_path)
+{
+ char *ptr = NULL;
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(brick_path);
+ GF_ASSERT(brick_mount_path);
+
+ *brick_mount_path = gf_strdup(brick_path);
+ if (!*brick_mount_path) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Finding the pointer to the end of
+ * /var/run/gluster/snaps/<snap-uuid>
+ */
+ ptr = strstr(*brick_mount_path, "brick");
+ if (!ptr) {
+ /* Snapshot bricks must have brick num as part
+ * of the brickpath
+ */
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL, GD_MSG_INVALID_ENTRY,
+ "Invalid brick path(%s)", brick_path);
+ ret = -1;
+ goto out;
+ }
+
+ /* Moving the pointer to the end of
+ * /var/run/gluster/snaps/<snap-uuid>/<brick_num>
+ * and assigning '\0' to it.
+ */
+ while ((*ptr != '\0') && (*ptr != '/'))
+ ptr++;
+
+ if (*ptr == '/') {
+ *ptr = '\0';
+ }
+
+ ret = 0;
+out:
+ if (ret && *brick_mount_path) {
+ GF_FREE(*brick_mount_path);
+ *brick_mount_path = NULL;
+ }
+ gf_msg_trace(this->name, 0, "Returning with %d", ret);
+ return ret;
+}
- ret = glusterd_store_iter_new (volinfo->shandle, &iter);
+/* Check if brick_mount_path is already mounted. If not, mount the device_path
+ * at the brick_mount_path
+ */
+int32_t
+glusterd_mount_brick_paths(char *brick_mount_path,
+ glusterd_brickinfo_t *brickinfo)
+{
+ int32_t ret = -1;
+ runner_t runner = {
+ 0,
+ };
+ char buff[PATH_MAX] = {
+ 0,
+ };
+ struct mntent save_entry = {
+ 0,
+ };
+ struct mntent *entry = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+ GF_ASSERT(brick_mount_path);
+ GF_ASSERT(brickinfo);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ /* Check if the brick_mount_path is already mounted */
+ entry = glusterd_get_mnt_entry_info(brick_mount_path, buff, sizeof(buff),
+ &save_entry);
+ if (entry) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_ALREADY_MOUNTED,
+ "brick_mount_path (%s) already mounted.", brick_mount_path);
+ ret = 0;
+ goto out;
+ }
+
+ /* TODO RHEL 6.5 has the logical volumes inactive by default
+ * on reboot. Hence activating the logical vol. Check behaviour
+ * on other systems
+ */
+ /* Activate the snapshot */
+ runinit(&runner);
+ runner_add_args(&runner, "lvchange", "-ay", brickinfo->device_path, NULL);
+ ret = runner_run(&runner);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_SNAP_ACTIVATE_FAIL,
+ "Failed to activate %s.", brickinfo->device_path);
+ goto out;
+ } else
+ gf_msg_debug(this->name, 0, "Activating %s successful",
+ brickinfo->device_path);
+
+ /* Mount the snapshot */
+ ret = glusterd_mount_lvm_snapshot(brickinfo, brick_mount_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_MOUNT_FAIL,
+ "Failed to mount lvm snapshot.");
+ goto out;
+ }
- if (ret)
- goto out;
+out:
+ gf_msg_trace(this->name, 0, "Returning with %d", ret);
+ return ret;
+}
- ret = glusterd_store_iter_get_next (iter, &key, &value, &op_errno);
- if (ret)
- goto out;
+int32_t
+glusterd_recreate_vol_brick_mounts(xlator_t *this, glusterd_volinfo_t *volinfo)
+{
+ char *brick_mount_path = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ int32_t ret = -1;
+ struct stat st_buf = {
+ 0,
+ };
+ char abspath[PATH_MAX] = {0};
+
+ GF_ASSERT(this);
+ GF_ASSERT(volinfo);
+
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ /* If the brick is not of this node, or its
+ * snapshot is pending, or the brick is not
+ * a snapshotted brick, we continue
+ */
+ if ((gf_uuid_compare(brickinfo->uuid, MY_UUID)) ||
+ (brickinfo->snap_status == -1) ||
+ (strlen(brickinfo->device_path) == 0))
+ continue;
+
+ /* Fetch the brick mount path from the brickinfo->path */
+ ret = glusterd_find_brick_mount_path(brickinfo->path,
+ &brick_mount_path);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MNTPATH_GET_FAIL,
+ "Failed to find brick_mount_path for %s", brickinfo->path);
+ goto out;
+ }
- while (!ret) {
- if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TYPE,
- strlen (GLUSTERD_STORE_KEY_VOL_TYPE))) {
- volinfo->type = atoi (value);
- } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_COUNT,
- strlen (GLUSTERD_STORE_KEY_VOL_COUNT))) {
- volinfo->brick_count = atoi (value);
- } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_STATUS,
- strlen (GLUSTERD_STORE_KEY_VOL_STATUS))) {
- volinfo->status = atoi (value);
- } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_VERSION,
- strlen (GLUSTERD_STORE_KEY_VOL_VERSION))) {
- volinfo->version = atoi (value);
- } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_PORT,
- strlen (GLUSTERD_STORE_KEY_VOL_PORT))) {
- volinfo->port = atoi (value);
- } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_SUB_COUNT,
- strlen (GLUSTERD_STORE_KEY_VOL_SUB_COUNT))) {
- volinfo->sub_count = atoi (value);
- } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_STRIPE_CNT,
- strlen (GLUSTERD_STORE_KEY_VOL_STRIPE_CNT))) {
- volinfo->stripe_count = atoi (value);
- } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_REPLICA_CNT,
- strlen (GLUSTERD_STORE_KEY_VOL_REPLICA_CNT))) {
- volinfo->replica_count = atoi (value);
- } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_TRANSPORT,
- strlen (GLUSTERD_STORE_KEY_VOL_TRANSPORT))) {
- volinfo->transport_type = atoi (value);
- volinfo->nfs_transport_type = volinfo->transport_type;
- if (volinfo->transport_type == GF_TRANSPORT_BOTH_TCP_RDMA) {
- volinfo->nfs_transport_type = GF_DEFAULT_NFS_TRANSPORT;
- }
- } else if (!strncmp (key, GLUSTERD_STORE_KEY_VOL_ID,
- strlen (GLUSTERD_STORE_KEY_VOL_ID))) {
- ret = uuid_parse (value, volinfo->volume_id);
- if (ret)
- gf_log ("", GF_LOG_WARNING,
- "failed to parse uuid");
-
- } else if (!strncmp (key, GLUSTERD_STORE_KEY_USERNAME,
- strlen (GLUSTERD_STORE_KEY_USERNAME))) {
-
- glusterd_auth_set_username (volinfo, value);
-
- } else if (!strncmp (key, GLUSTERD_STORE_KEY_PASSWORD,
- strlen (GLUSTERD_STORE_KEY_PASSWORD))) {
-
- glusterd_auth_set_password (volinfo, value);
-
- } else if (strstr (key, "slave")) {
- ret = dict_set_dynstr (volinfo->gsync_slaves, key,
- gf_strdup (value));
- if (ret) {
- gf_log ("",GF_LOG_ERROR, "Error in "
- "dict_set_str");
- goto out;
- }
- gf_log ("", GF_LOG_DEBUG, "Parsed as "GEOREP" "
- " slave:key=%s,value:%s", key, value);
- } else {
- exists = glusterd_check_option_exists (key, NULL);
- if (exists == -1) {
- ret = -1;
- goto out;
- }
- if (exists) {
- ret = dict_set_str(volinfo->dict, key,
- gf_strdup (value));
- if (ret) {
- gf_log ("",GF_LOG_ERROR, "Error in "
- "dict_set_str");
- goto out;
- }
- gf_log ("", GF_LOG_DEBUG, "Parsed as Volume-"
- "set:key=%s,value:%s",
- key, value);
- }
- else
- gf_log ("", GF_LOG_ERROR, "Unknown key: %s",
- key);
+ /* Check if the brickinfo path is present.
+ * If not create the brick_mount_path */
+ ret = sys_lstat(brickinfo->path, &st_buf);
+ if (ret) {
+ if (errno == ENOENT) {
+ ret = mkdir_p(brick_mount_path, 0755, _gf_true);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_CREATE_DIR_FAILED, "Failed to create %s. ",
+ brick_mount_path);
+ goto out;
}
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Brick Path(%s) not valid. ", brickinfo->path);
+ goto out;
+ }
+ }
- GF_FREE (key);
- GF_FREE (value);
- key = NULL;
- value = NULL;
-
- ret = glusterd_store_iter_get_next (iter, &key, &value,
- &op_errno);
+ /* Check if brick_mount_path is already mounted.
+ * If not, mount the device_path at the brick_mount_path */
+ ret = glusterd_mount_brick_paths(brick_mount_path, brickinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MNTPATH_MOUNT_FAIL,
+ "Failed to mount brick_mount_path");
+ }
+ if (!gf_uuid_compare(brickinfo->uuid, MY_UUID)) {
+ if (brickinfo->real_path[0] == '\0') {
+ if (!realpath(brickinfo->path, abspath)) {
+ gf_msg(this->name, GF_LOG_CRITICAL, errno,
+ GD_MSG_BRICKINFO_CREATE_FAIL,
+ "realpath() failed for brick %s"
+ ". The underlying file system "
+ "may be in bad state",
+ brickinfo->path);
+ ret = -1;
+ goto out;
+ }
+ if (strlen(abspath) >= sizeof(brickinfo->real_path)) {
+ ret = -1;
+ goto out;
+ }
+ (void)strncpy(brickinfo->real_path, abspath,
+ sizeof(brickinfo->real_path));
+ }
}
- /* backward compatibility */
- {
+ if (brick_mount_path) {
+ GF_FREE(brick_mount_path);
+ brick_mount_path = NULL;
+ }
+ }
- switch (volinfo->type) {
+ ret = 0;
+out:
+ if (ret && brick_mount_path)
+ GF_FREE(brick_mount_path);
- case GF_CLUSTER_TYPE_NONE:
- volinfo->stripe_count = 1;
- volinfo->replica_count = 1;
- break;
+ gf_msg_trace(this->name, 0, "Returning with %d", ret);
+ return ret;
+}
- case GF_CLUSTER_TYPE_STRIPE:
- volinfo->stripe_count = volinfo->sub_count;
- volinfo->replica_count = 1;
- break;
+int32_t
+glusterd_resolve_snap_bricks(xlator_t *this, glusterd_snap_t *snap)
+{
+ int32_t ret = -1;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
- case GF_CLUSTER_TYPE_REPLICATE:
- volinfo->stripe_count = 1;
- volinfo->replica_count = volinfo->sub_count;
- break;
+ GF_ASSERT(this);
+ GF_VALIDATE_OR_GOTO(this->name, snap, out);
- case GF_CLUSTER_TYPE_STRIPE_REPLICATE:
- /* Introduced in 3.3 */
- GF_ASSERT (volinfo->stripe_count > 0);
- GF_ASSERT (volinfo->replica_count > 0);
- break;
+ cds_list_for_each_entry(volinfo, &snap->volumes, vol_list)
+ {
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ ret = glusterd_resolve_brick(brickinfo);
+ if (ret) {
+ gf_event(EVENT_BRICKPATH_RESOLVE_FAILED,
+ "peer=%s;volume=%s;brick=%s", brickinfo->hostname,
+ volinfo->volname, brickinfo->path);
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_RESOLVE_BRICK_FAIL,
+ "resolve brick failed in restore");
+ goto out;
+ }
+ }
+ }
- default:
- GF_ASSERT (0);
- break;
- }
+ ret = 0;
- volinfo->dist_leaf_count = (volinfo->stripe_count *
- volinfo->replica_count);
- }
+out:
+ gf_msg_trace(this->name, 0, "Returning with %d", ret);
- if (op_errno != GD_STORE_EOF)
- goto out;
+ return ret;
+}
- ret = glusterd_store_iter_destroy (iter);
+int
+glusterd_store_update_snap(glusterd_snap_t *snap)
+{
+ int ret = -1;
+ char *key = NULL;
+ char *value = NULL;
+ char snappath[PATH_MAX] = {
+ 0,
+ };
+ char path[PATH_MAX] = {
+ 0,
+ };
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_store_iter_t *iter = NULL;
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+ int32_t len = 0;
+
+ this = THIS;
+ conf = this->private;
+ GF_ASSERT(snap);
+
+ GLUSTERD_GET_SNAP_DIR(snappath, snap, conf);
+
+ len = snprintf(path, sizeof(path), "%s/%s", snappath,
+ GLUSTERD_SNAP_INFO_FILE);
+ if ((len < 0) || (len >= sizeof(path))) {
+ goto out;
+ }
+
+ ret = gf_store_handle_retrieve(path, &snap->shandle);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_HANDLE_NULL,
+ "snap handle is NULL");
+ goto out;
+ }
+
+ ret = gf_store_iter_new(snap->shandle, &iter);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_GET_FAIL,
+ "Failed to get new store "
+ "iter");
+ goto out;
+ }
+
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_GET_FAIL,
+ "Failed to get next store "
+ "iter");
+ goto out;
+ }
+
+ while (!ret) {
+ gf_msg_debug(this->name, 0, "key = %s value = %s", key, value);
+
+ if (!strncmp(key, GLUSTERD_STORE_KEY_SNAP_ID,
+ SLEN(GLUSTERD_STORE_KEY_SNAP_ID))) {
+ ret = gf_uuid_parse(value, snap->snap_id);
+ if (ret)
+ gf_msg(this->name, GF_LOG_WARNING, 0, GD_MSG_UUID_PARSE_FAIL,
+ "Failed to parse uuid");
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_SNAP_RESTORED,
+ SLEN(GLUSTERD_STORE_KEY_SNAP_RESTORED))) {
+ snap->snap_restored = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_SNAP_STATUS,
+ SLEN(GLUSTERD_STORE_KEY_SNAP_STATUS))) {
+ snap->snap_status = atoi(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_SNAP_DESC,
+ SLEN(GLUSTERD_STORE_KEY_SNAP_DESC))) {
+ snap->description = gf_strdup(value);
+ } else if (!strncmp(key, GLUSTERD_STORE_KEY_SNAP_TIMESTAMP,
+ SLEN(GLUSTERD_STORE_KEY_SNAP_TIMESTAMP))) {
+ snap->time_stamp = atoi(value);
+ }
- if (ret)
- goto out;
+ GF_FREE(key);
+ GF_FREE(value);
+ key = NULL;
+ value = NULL;
- ret = glusterd_store_retrieve_bricks (volinfo);
- if (ret)
- goto out;
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ }
- ret = glusterd_volume_compute_cksum (volinfo);
- if (ret)
- goto out;
+ if (op_errno != GD_STORE_EOF)
+ goto out;
- list_add_tail (&volinfo->vol_list, &priv->volumes);
+ ret = 0;
out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ if (gf_store_iter_destroy(&iter)) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_ITER_DESTROY_FAIL,
+ "Failed to destroy store iter");
+ ret = -1;
+ }
- return ret;
+ return ret;
}
-
int32_t
-glusterd_store_retrieve_volumes (xlator_t *this)
+glusterd_store_retrieve_snap(char *snapname)
{
- int32_t ret = 0;
- char path[PATH_MAX] = {0,};
- glusterd_conf_t *priv = NULL;
- DIR *dir = NULL;
- struct dirent *entry = NULL;
- glusterd_volinfo_t *volinfo = NULL;
+ int32_t ret = -1;
+ glusterd_snap_t *snap = NULL;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ priv = this->private;
+ GF_ASSERT(priv);
+ GF_ASSERT(snapname);
+
+ snap = glusterd_new_snap_object();
+ if (!snap) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_OBJECT_STORE_FAIL,
+ "Failed to create "
+ " snap object");
+ goto out;
+ }
+
+ if (snprintf(snap->snapname, sizeof(snap->snapname), "%s", snapname) >=
+ sizeof(snap->snapname))
+ goto out;
+ ret = glusterd_store_update_snap(snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAPSHOT_UPDATE_FAIL,
+ "Failed to update snapshot "
+ "for %s snap",
+ snapname);
+ goto out;
+ }
+
+ ret = glusterd_store_retrieve_volumes(this, snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_VOL_RETRIEVE_FAIL,
+ "Failed to retrieve "
+ "snap volumes for snap %s",
+ snapname);
+ goto out;
+ }
+
+ /* TODO: list_add_order can do 'N-square' comparisons and
+ is not efficient. Find a better solution to store the snap
+ in order */
+ glusterd_list_add_order(&snap->snap_list, &priv->snapshots,
+ glusterd_compare_snap_time);
- GF_ASSERT (this);
- priv = this->private;
-
- GF_ASSERT (priv);
+out:
+ gf_msg_trace(this->name, 0, "Returning with %d", ret);
+ return ret;
+}
- snprintf (path, PATH_MAX, "%s/%s", priv->workdir,
- GLUSTERD_VOLUME_DIR_PREFIX);
+/* Read the missed_snap_list and update the in-memory structs */
+int32_t
+glusterd_store_retrieve_missed_snaps_list(xlator_t *this)
+{
+ char path[PATH_MAX] = "";
+ char *snap_vol_id = NULL;
+ char *missed_node_info = NULL;
+ char *brick_path = NULL;
+ char *value = NULL;
+ char *save_ptr = NULL;
+ FILE *fp = NULL;
+ int32_t brick_num = -1;
+ int32_t snap_op = -1;
+ int32_t snap_status = -1;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ gf_store_op_errno_t store_errno = GD_STORE_SUCCESS;
+
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ /* Get the path of the missed_snap_list */
+ glusterd_store_missed_snaps_list_path_set(path, sizeof(path));
+
+ fp = fopen(path, "r");
+ if (!fp) {
+ /* If errno is ENOENT then there are no missed snaps yet */
+ if (errno != ENOENT) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Failed to open %s. ", path);
+ } else {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_MISSED_SNAP_LIST_EMPTY,
+ "No missed snaps list.");
+ ret = 0;
+ }
+ goto out;
+ }
- dir = opendir (path);
+ do {
+ ret = gf_store_read_and_tokenize(fp, &missed_node_info, &value,
+ &store_errno);
+ if (ret) {
+ if (store_errno == GD_STORE_EOF) {
+ gf_msg_debug(this->name, 0, "EOF for missed_snap_list");
+ ret = 0;
+ break;
+ }
+ gf_msg(this->name, GF_LOG_ERROR, store_errno,
+ GD_MSG_MISSED_SNAP_GET_FAIL,
+ "Failed to fetch data from "
+ "missed_snaps_list.");
+ goto out;
+ }
- if (!dir) {
- gf_log ("", GF_LOG_ERROR, "Unable to open dir %s", path);
- ret = -1;
- goto out;
+ /* Fetch the brick_num, brick_path, snap_op and snap status */
+ snap_vol_id = strtok_r(value, ":", &save_ptr);
+ brick_num = atoi(strtok_r(NULL, ":", &save_ptr));
+ brick_path = strtok_r(NULL, ":", &save_ptr);
+ snap_op = atoi(strtok_r(NULL, ":", &save_ptr));
+ snap_status = atoi(strtok_r(NULL, ":", &save_ptr));
+
+ if (!missed_node_info || !brick_path || !snap_vol_id || brick_num < 1 ||
+ snap_op < 1 || snap_status < 1) {
+ gf_msg(this->name, GF_LOG_ERROR, EINVAL,
+ GD_MSG_INVALID_MISSED_SNAP_ENTRY,
+ "Invalid missed_snap_entry");
+ ret = -1;
+ goto out;
}
- glusterd_for_each_entry (entry, dir);
+ ret = glusterd_add_new_entry_to_list(missed_node_info, snap_vol_id,
+ brick_num, brick_path, snap_op,
+ snap_status);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_LIST_STORE_FAIL,
+ "Failed to store missed snaps_list");
+ goto out;
+ }
- while (entry) {
- ret = glusterd_store_retrieve_volume (entry->d_name);
- if (ret) {
- gf_log ("", GF_LOG_ERROR, "Unable to restore "
- "volume: %s", entry->d_name);
- goto out;
- }
+ } while (store_errno == GD_STORE_SUCCESS);
- ret = glusterd_store_retrieve_rbstate (entry->d_name);
- if (ret) {
- /* Backward compatibility */
- gf_log ("", GF_LOG_INFO, "Creating a new rbstate "
- "for volume: %s.", entry->d_name);
- ret = glusterd_volinfo_find (entry->d_name, &volinfo);
- ret = glusterd_store_create_rbstate_shandle_on_absence (volinfo);
- ret = glusterd_store_perform_rbstate_store (volinfo);
- }
+ ret = 0;
+out:
+ if (fp)
+ fclose(fp);
- ret = glusterd_store_retrieve_node_state (entry->d_name);
- if (ret) {
- /* Backward compatibility */
- gf_log ("", GF_LOG_INFO, "Creating a new node_state "
- "for volume: %s.", entry->d_name);
- ret = glusterd_volinfo_find (entry->d_name, &volinfo);
- ret =
- glusterd_store_create_nodestate_sh_on_absence (volinfo);
- ret = glusterd_store_perform_node_state_store (volinfo);
+ gf_msg_trace(this->name, 0, "Returning with %d", ret);
+ return ret;
+}
- }
- glusterd_for_each_entry (entry, dir);
+int32_t
+glusterd_store_retrieve_snaps(xlator_t *this)
+{
+ int32_t ret = 0;
+ char path[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ int32_t len = 0;
+
+ GF_ASSERT(this);
+ priv = this->private;
+
+ GF_ASSERT(priv);
+
+ len = snprintf(path, PATH_MAX, "%s/snaps", priv->workdir);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ ret = -1;
+ goto out;
+ }
+
+ dir = sys_opendir(path);
+
+ if (!dir) {
+ /* If snaps dir doesn't exists ignore the error for
+ backward compatibility */
+ if (errno != ENOENT) {
+ ret = -1;
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Unable to open dir %s", path);
+ }
+ goto out;
+ }
+
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
+ if (strcmp(entry->d_name, GLUSTERD_MISSED_SNAPS_LIST_FILE)) {
+ ret = glusterd_store_retrieve_snap(entry->d_name);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESTORE_FAIL,
+ "Unable to restore snapshot: %s", entry->d_name);
+ goto out;
+ }
}
+ }
+
+ /* Retrieve missed_snaps_list */
+ ret = glusterd_store_retrieve_missed_snaps_list(this);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Failed to retrieve missed_snaps_list");
+ goto out;
+ }
out:
- if (dir)
- closedir (dir);
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ if (dir)
+ sys_closedir(dir);
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
- return ret;
+ return ret;
}
+/* Writes all the contents of conf->missed_snap_list */
int32_t
-glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo)
+glusterd_store_write_missed_snapinfo(int32_t fd)
{
- int32_t ret = -1;
- glusterd_conf_t *priv = NULL;
- char peerdir[PATH_MAX] = {0,};
- char filepath[PATH_MAX] = {0,};
- char hostname_path[PATH_MAX] = {0,};
-
-
- if (!peerinfo) {
- ret = 0;
+ char key[(UUID_SIZE * 2) + 2];
+ char value[PATH_MAX];
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ glusterd_missed_snap_info *missed_snapinfo = NULL;
+ glusterd_snap_op_t *snap_opinfo = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ /* Write the missed_snap_entry */
+ cds_list_for_each_entry(missed_snapinfo, &priv->missed_snaps_list,
+ missed_snaps)
+ {
+ cds_list_for_each_entry(snap_opinfo, &missed_snapinfo->snap_ops,
+ snap_ops_list)
+ {
+ snprintf(key, sizeof(key), "%s:%s", missed_snapinfo->node_uuid,
+ missed_snapinfo->snap_uuid);
+ snprintf(value, sizeof(value), "%s:%d:%s:%d:%d",
+ snap_opinfo->snap_vol_id, snap_opinfo->brick_num,
+ snap_opinfo->brick_path, snap_opinfo->op,
+ snap_opinfo->status);
+ ret = gf_store_save_value(fd, key, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSEDSNAP_INFO_SET_FAIL,
+ "Failed to write missed snapinfo");
goto out;
+ }
}
+ }
- priv = THIS->private;
-
- snprintf (peerdir, PATH_MAX, "%s/peers", priv->workdir);
+ ret = 0;
+out:
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+/* Adds the missed snap entries to the in-memory conf->missed_snap_list *
+ * and writes them to disk */
+int32_t
+glusterd_store_update_missed_snaps()
+{
+ int32_t fd = -1;
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = glusterd_store_create_missed_snaps_list_shandle_on_absence();
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_MISSED_SNAP_LIST_STORE_HANDLE_GET_FAIL,
+ "Unable to obtain "
+ "missed_snaps_list store handle.");
+ goto out;
+ }
+
+ fd = gf_store_mkstemp(priv->missed_snaps_list_shandle);
+ if (fd <= 0) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Failed to create tmp file");
+ ret = -1;
+ goto out;
+ }
+
+ ret = glusterd_store_write_missed_snapinfo(fd);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_MISSED_SNAP_CREATE_FAIL,
+ "Failed to write missed snaps to disk");
+ goto out;
+ }
+
+ ret = gf_store_rename_tmppath(priv->missed_snaps_list_shandle);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_FILE_OP_FAILED,
+ "Failed to rename the tmp file");
+ goto out;
+ }
+out:
+ if (ret && (fd > 0)) {
+ ret = gf_store_unlink_tmppath(priv->missed_snaps_list_shandle);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_TMP_FILE_UNLINK_FAIL,
+ "Failed to unlink the tmp file");
+ }
+ ret = -1;
+ }
- if (uuid_is_null (peerinfo->uuid)) {
+ gf_msg_trace(this->name, 0, "Returning %d", ret);
+ return ret;
+}
- if (peerinfo->hostname) {
- snprintf (filepath, PATH_MAX, "%s/%s", peerdir,
- peerinfo->hostname);
- } else {
- ret = 0;
- goto out;
- }
+int32_t
+glusterd_store_delete_peerinfo(glusterd_peerinfo_t *peerinfo)
+{
+ int32_t ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ char peerdir[PATH_MAX] = {
+ 0,
+ };
+ char filepath[PATH_MAX] = {
+ 0,
+ };
+ char hostname_path[PATH_MAX] = {
+ 0,
+ };
+ int32_t len = 0;
+
+ if (!peerinfo) {
+ ret = 0;
+ goto out;
+ }
+
+ this = THIS;
+ priv = this->private;
+
+ len = snprintf(peerdir, PATH_MAX, "%s/peers", priv->workdir);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ goto out;
+ }
+
+ if (gf_uuid_is_null(peerinfo->uuid)) {
+ if (peerinfo->hostname) {
+ len = snprintf(filepath, PATH_MAX, "%s/%s", peerdir,
+ peerinfo->hostname);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ goto out;
+ }
} else {
+ ret = 0;
+ goto out;
+ }
+ } else {
+ len = snprintf(filepath, PATH_MAX, "%s/%s", peerdir,
+ uuid_utoa(peerinfo->uuid));
+ if ((len < 0) || (len >= PATH_MAX)) {
+ goto out;
+ }
+ len = snprintf(hostname_path, PATH_MAX, "%s/%s", peerdir,
+ peerinfo->hostname);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ goto out;
+ }
- snprintf (filepath, PATH_MAX, "%s/%s", peerdir,
- uuid_utoa (peerinfo->uuid));
- snprintf (hostname_path, PATH_MAX, "%s/%s",
- peerdir, peerinfo->hostname);
-
- ret = unlink (hostname_path);
+ ret = sys_unlink(hostname_path);
- if (!ret)
- goto out;
- }
+ if (!ret)
+ goto out;
+ }
- ret = unlink (filepath);
- if (ret && (errno == ENOENT))
- ret = 0;
+ ret = sys_unlink(filepath);
+ if (ret && (errno == ENOENT))
+ ret = 0;
out:
- if (peerinfo->shandle) {
- glusterd_store_handle_destroy (peerinfo->shandle);
- peerinfo->shandle = NULL;
- }
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ if (peerinfo && peerinfo->shandle) {
+ gf_store_handle_destroy(peerinfo->shandle);
+ peerinfo->shandle = NULL;
+ }
+ gf_msg_debug((this ? this->name : "glusterd"), 0, "Returning with %d", ret);
- return ret;
+ return ret;
}
void
-glusterd_store_peerinfo_dirpath_set (char *path, size_t len)
+glusterd_store_peerinfo_dirpath_set(char *path, size_t len)
{
- glusterd_conf_t *priv = NULL;
- GF_ASSERT (path);
- GF_ASSERT (len >= PATH_MAX);
+ glusterd_conf_t *priv = NULL;
+ GF_ASSERT(path);
+ GF_ASSERT(len >= PATH_MAX);
- priv = THIS->private;
- snprintf (path, len, "%s/peers", priv->workdir);
+ priv = THIS->private;
+ snprintf(path, len, "%s/peers", priv->workdir);
}
int32_t
-glusterd_store_create_peer_dir ()
+glusterd_store_create_peer_dir()
{
- int32_t ret = 0;
- char path[PATH_MAX];
+ int32_t ret = 0;
+ char path[PATH_MAX];
- glusterd_store_peerinfo_dirpath_set (path, sizeof (path));
- ret = glusterd_store_mkdir (path);
+ glusterd_store_peerinfo_dirpath_set(path, sizeof(path));
+ ret = gf_store_mkdir(path);
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
- return ret;
+ gf_msg_debug("glusterd", 0, "Returning with %d", ret);
+ return ret;
}
static void
-glusterd_store_uuid_peerpath_set (glusterd_peerinfo_t *peerinfo, char *peerfpath,
- size_t len)
+glusterd_store_uuid_peerpath_set(glusterd_peerinfo_t *peerinfo, char *peerfpath,
+ size_t len)
{
- char peerdir[PATH_MAX];
- char str[50] = {0};
+ char peerdir[PATH_MAX];
+ char str[50] = {0};
- GF_ASSERT (peerinfo);
- GF_ASSERT (peerfpath);
- GF_ASSERT (len >= PATH_MAX);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(peerfpath);
+ GF_ASSERT(len >= PATH_MAX);
- glusterd_store_peerinfo_dirpath_set (peerdir, sizeof (peerdir));
- uuid_unparse (peerinfo->uuid, str);
- snprintf (peerfpath, len, "%s/%s", peerdir, str);
+ glusterd_store_peerinfo_dirpath_set(peerdir, sizeof(peerdir));
+ gf_uuid_unparse(peerinfo->uuid, str);
+ snprintf(peerfpath, len, "%s/%s", peerdir, str);
}
static void
-glusterd_store_hostname_peerpath_set (glusterd_peerinfo_t *peerinfo,
- char *peerfpath, size_t len)
+glusterd_store_hostname_peerpath_set(glusterd_peerinfo_t *peerinfo,
+ char *peerfpath, size_t len)
{
- char peerdir[PATH_MAX];
+ char peerdir[PATH_MAX];
- GF_ASSERT (peerinfo);
- GF_ASSERT (peerfpath);
- GF_ASSERT (len >= PATH_MAX);
+ GF_ASSERT(peerinfo);
+ GF_ASSERT(peerfpath);
+ GF_ASSERT(len >= PATH_MAX);
- glusterd_store_peerinfo_dirpath_set (peerdir, sizeof (peerdir));
- snprintf (peerfpath, len, "%s/%s", peerdir, peerinfo->hostname);
+ glusterd_store_peerinfo_dirpath_set(peerdir, sizeof(peerdir));
+ snprintf(peerfpath, len, "%s/%s", peerdir, peerinfo->hostname);
}
int32_t
-glusterd_store_peerinfo_hostname_shandle_create (glusterd_peerinfo_t *peerinfo)
+glusterd_store_peerinfo_hostname_shandle_create(glusterd_peerinfo_t *peerinfo)
{
- char peerfpath[PATH_MAX];
- int32_t ret = -1;
+ char peerfpath[PATH_MAX];
+ int32_t ret = -1;
- glusterd_store_hostname_peerpath_set (peerinfo, peerfpath,
- sizeof (peerfpath));
- ret = glusterd_store_handle_create_on_absence (&peerinfo->shandle,
- peerfpath);
- return ret;
+ glusterd_store_hostname_peerpath_set(peerinfo, peerfpath,
+ sizeof(peerfpath));
+ ret = gf_store_handle_create_on_absence(&peerinfo->shandle, peerfpath);
+ return ret;
}
int32_t
-glusterd_store_peerinfo_uuid_shandle_create (glusterd_peerinfo_t *peerinfo)
+glusterd_store_peerinfo_uuid_shandle_create(glusterd_peerinfo_t *peerinfo)
{
- char peerfpath[PATH_MAX];
- int32_t ret = -1;
+ char peerfpath[PATH_MAX];
+ int32_t ret = -1;
- glusterd_store_uuid_peerpath_set (peerinfo, peerfpath,
- sizeof (peerfpath));
- ret = glusterd_store_handle_create_on_absence (&peerinfo->shandle,
- peerfpath);
- return ret;
+ glusterd_store_uuid_peerpath_set(peerinfo, peerfpath, sizeof(peerfpath));
+ ret = gf_store_handle_create_on_absence(&peerinfo->shandle, peerfpath);
+ return ret;
}
int32_t
-glusterd_peerinfo_hostname_shandle_check_destroy (glusterd_peerinfo_t *peerinfo)
+glusterd_peerinfo_hostname_shandle_check_destroy(glusterd_peerinfo_t *peerinfo)
{
- char peerfpath[PATH_MAX];
- int32_t ret = -1;
- struct stat stbuf = {0,};
-
- glusterd_store_hostname_peerpath_set (peerinfo, peerfpath,
- sizeof (peerfpath));
- ret = stat (peerfpath, &stbuf);
- if (!ret) {
- if (peerinfo->shandle)
- glusterd_store_handle_destroy (peerinfo->shandle);
- peerinfo->shandle = NULL;
- ret = unlink (peerfpath);
- }
- return ret;
+ char peerfpath[PATH_MAX];
+ int32_t ret = -1;
+ struct stat stbuf = {
+ 0,
+ };
+
+ glusterd_store_hostname_peerpath_set(peerinfo, peerfpath,
+ sizeof(peerfpath));
+ ret = sys_stat(peerfpath, &stbuf);
+ if (!ret) {
+ if (peerinfo->shandle)
+ gf_store_handle_destroy(peerinfo->shandle);
+ peerinfo->shandle = NULL;
+ ret = sys_unlink(peerfpath);
+ }
+ return ret;
}
int32_t
-glusterd_store_create_peer_shandle (glusterd_peerinfo_t *peerinfo)
+glusterd_store_create_peer_shandle(glusterd_peerinfo_t *peerinfo)
{
- int32_t ret = 0;
+ int32_t ret = 0;
- GF_ASSERT (peerinfo);
+ GF_ASSERT(peerinfo);
- if (glusterd_peerinfo_is_uuid_unknown (peerinfo)) {
- ret = glusterd_store_peerinfo_hostname_shandle_create (peerinfo);
- } else {
- ret = glusterd_peerinfo_hostname_shandle_check_destroy (peerinfo);
- ret = glusterd_store_peerinfo_uuid_shandle_create (peerinfo);
+ if (gf_uuid_is_null(peerinfo->uuid)) {
+ ret = glusterd_store_peerinfo_hostname_shandle_create(peerinfo);
+ } else {
+ ret = glusterd_peerinfo_hostname_shandle_check_destroy(peerinfo);
+ ret = glusterd_store_peerinfo_uuid_shandle_create(peerinfo);
+ }
+ return ret;
+}
+
+static int32_t
+glusterd_store_peer_write(int fd, glusterd_peerinfo_t *peerinfo)
+{
+ char buf[PATH_MAX];
+ uint total_len = 0;
+ int32_t ret = 0;
+ int32_t i = 1;
+ glusterd_peer_hostname_t *hostname = NULL;
+
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len, "%s=%s\n%s=%d\n",
+ GLUSTERD_STORE_KEY_PEER_UUID, uuid_utoa(peerinfo->uuid),
+ GLUSTERD_STORE_KEY_PEER_STATE, peerinfo->state.state);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
+ }
+ total_len += ret;
+
+ cds_list_for_each_entry(hostname, &peerinfo->hostnames, hostname_list)
+ {
+ ret = snprintf(buf + total_len, sizeof(buf) - total_len,
+ GLUSTERD_STORE_KEY_PEER_HOSTNAME "%d=%s\n", i,
+ hostname->hostname);
+ if (ret < 0 || ret >= sizeof(buf) - total_len) {
+ ret = -1;
+ goto out;
}
- return ret;
+ total_len += ret;
+ i++;
+ }
+
+ ret = gf_store_save_items(fd, buf);
+out:
+ gf_msg_debug("glusterd", 0, "Returning with %d", ret);
+ return ret;
}
int32_t
-glusterd_store_peer_write (int fd, glusterd_peerinfo_t *peerinfo)
+glusterd_store_perform_peer_store(glusterd_peerinfo_t *peerinfo)
{
- char buf[50] = {0};
- int32_t ret = 0;
+ int fd = -1;
+ int32_t ret = -1;
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_UUID,
- uuid_utoa (peerinfo->uuid));
- if (ret)
- goto out;
+ GF_ASSERT(peerinfo);
- snprintf (buf, sizeof (buf), "%d", peerinfo->state.state);
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_STATE, buf);
- if (ret)
- goto out;
+ fd = gf_store_mkstemp(peerinfo->shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
- ret = glusterd_store_save_value (fd, GLUSTERD_STORE_KEY_PEER_HOSTNAME "1",
- peerinfo->hostname);
+ ret = glusterd_store_peer_write(fd, peerinfo);
+ if (ret)
+ goto out;
+
+ ret = gf_store_rename_tmppath(peerinfo->shandle);
out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
- return ret;
+ if (ret && (fd > 0))
+ gf_store_unlink_tmppath(peerinfo->shandle);
+ gf_msg_debug("glusterd", 0, "Returning %d", ret);
+ return ret;
}
int32_t
-glusterd_store_perform_peer_store (glusterd_peerinfo_t *peerinfo)
+glusterd_store_peerinfo(glusterd_peerinfo_t *peerinfo)
{
- int fd = -1;
- int32_t ret = -1;
+ int32_t ret = -1;
- GF_ASSERT (peerinfo);
+ GF_ASSERT(peerinfo);
- fd = glusterd_store_mkstemp (peerinfo->shandle);
- if (fd <= 0) {
- ret = -1;
- goto out;
- }
+ ret = glusterd_store_create_peer_dir();
+ if (ret)
+ goto out;
- ret = glusterd_store_peer_write (fd, peerinfo);
- if (ret)
- goto out;
+ ret = glusterd_store_create_peer_shandle(peerinfo);
+ if (ret)
+ goto out;
- ret = glusterd_store_rename_tmppath (peerinfo->shandle);
+ ret = glusterd_store_perform_peer_store(peerinfo);
out:
- if (ret && (fd > 0))
- glusterd_store_unlink_tmppath (peerinfo->shandle);
- if (fd > 0)
- close (fd);
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug("glusterd", 0, "Returning with %d", ret);
+ return ret;
}
int32_t
-glusterd_store_peerinfo (glusterd_peerinfo_t *peerinfo)
+glusterd_store_retrieve_peers(xlator_t *this)
{
- int32_t ret = -1;
-
- GF_ASSERT (peerinfo);
-
- ret = glusterd_store_create_peer_dir ();
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ DIR *dir = NULL;
+ struct dirent *entry = NULL;
+ struct dirent scratch[2] = {
+ {
+ 0,
+ },
+ };
+ char path[PATH_MAX] = {
+ 0,
+ };
+ glusterd_peerinfo_t *peerinfo = NULL;
+ gf_store_handle_t *shandle = NULL;
+ char filepath[PATH_MAX] = {
+ 0,
+ };
+ gf_store_iter_t *iter = NULL;
+ char *key = NULL;
+ char *value = NULL;
+ glusterd_peerctx_args_t args = {0};
+ gf_store_op_errno_t op_errno = GD_STORE_SUCCESS;
+ glusterd_peer_hostname_t *address = NULL;
+ uuid_t tmp_uuid;
+ gf_boolean_t is_ok;
+ int32_t len;
+
+ GF_ASSERT(this);
+ priv = this->private;
+
+ GF_ASSERT(priv);
+
+ len = snprintf(path, PATH_MAX, "%s/%s", priv->workdir,
+ GLUSTERD_PEER_DIR_PREFIX);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ ret = -1;
+ goto out;
+ }
+
+ dir = sys_opendir(path);
+
+ if (!dir) {
+ gf_msg(this->name, GF_LOG_ERROR, errno, GD_MSG_DIR_OP_FAILED,
+ "Unable to open dir %s", path);
+ ret = -1;
+ goto out;
+ }
+
+ while ((entry = sys_readdir(dir, scratch))) {
+ if (gf_irrelevant_entry(entry))
+ continue;
+ if (gf_uuid_parse(entry->d_name, tmp_uuid) != 0) {
+ gf_log(this->name, GF_LOG_WARNING, "skipping non-peer file %s",
+ entry->d_name);
+ continue;
+ }
+ is_ok = _gf_false;
+ len = snprintf(filepath, PATH_MAX, "%s/%s", path, entry->d_name);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ goto next;
+ }
+ ret = gf_store_handle_retrieve(filepath, &shandle);
if (ret)
- goto out;
+ goto next;
- ret = glusterd_store_create_peer_shandle (peerinfo);
+ ret = gf_store_iter_new(shandle, &iter);
if (ret)
- goto out;
+ goto next;
- ret = glusterd_store_perform_peer_store (peerinfo);
-out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
- return ret;
-}
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ if (ret) {
+ goto next;
+ }
-int32_t
-glusterd_store_retrieve_peers (xlator_t *this)
-{
- int32_t ret = 0;
- glusterd_conf_t *priv = NULL;
- DIR *dir = NULL;
- struct dirent *entry = NULL;
- char path[PATH_MAX] = {0,};
- glusterd_peerinfo_t *peerinfo = NULL;
- uuid_t uuid = {0,};
- char *hostname = NULL;
- int32_t state = 0;
- glusterd_store_handle_t *shandle = NULL;
- char filepath[PATH_MAX] = {0,};
- glusterd_store_iter_t *iter = NULL;
- char *key = NULL;
- char *value = NULL;
- glusterd_peerctx_args_t args = {0};
- glusterd_store_op_errno_t op_errno = GD_STORE_SUCCESS;
-
- GF_ASSERT (this);
- priv = this->private;
-
- GF_ASSERT (priv);
-
- snprintf (path, PATH_MAX, "%s/%s", priv->workdir,
- GLUSTERD_PEER_DIR_PREFIX);
-
- dir = opendir (path);
-
- if (!dir) {
- gf_log ("", GF_LOG_ERROR, "Unable to open dir %s", path);
- ret = -1;
- goto out;
+ /* Create an empty peerinfo object before reading in the
+ * details
+ */
+ peerinfo = glusterd_peerinfo_new(GD_FRIEND_STATE_DEFAULT, NULL, NULL,
+ 0);
+ if (peerinfo == NULL) {
+ ret = -1;
+ goto next;
}
- glusterd_for_each_entry (entry, dir);
+ while (!ret) {
+ if (!strncmp(GLUSTERD_STORE_KEY_PEER_UUID, key,
+ SLEN(GLUSTERD_STORE_KEY_PEER_UUID))) {
+ if (value)
+ gf_uuid_parse(value, peerinfo->uuid);
+ } else if (!strncmp(GLUSTERD_STORE_KEY_PEER_STATE, key,
+ SLEN(GLUSTERD_STORE_KEY_PEER_STATE))) {
+ peerinfo->state.state = atoi(value);
+ } else if (!strncmp(GLUSTERD_STORE_KEY_PEER_HOSTNAME, key,
+ SLEN(GLUSTERD_STORE_KEY_PEER_HOSTNAME))) {
+ ret = gd_add_address_to_peer(peerinfo, value);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_ADD_ADDRESS_TO_PEER_FAIL,
+ "Could not add address to peer");
+ }
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_UNKNOWN_KEY,
+ "Unknown key: %s", key);
+ }
- while (entry) {
- snprintf (filepath, PATH_MAX, "%s/%s", path, entry->d_name);
- ret = glusterd_store_handle_retrieve (filepath, &shandle);
- if (ret)
- goto out;
+ GF_FREE(key);
+ GF_FREE(value);
+ key = NULL;
+ value = NULL;
- ret = glusterd_store_iter_new (shandle, &iter);
- if (ret)
- goto out;
+ ret = gf_store_iter_get_next(iter, &key, &value, &op_errno);
+ }
+ if (op_errno != GD_STORE_EOF) {
+ goto next;
+ }
- ret = glusterd_store_iter_get_next (iter, &key, &value,
- &op_errno);
- if (ret)
- goto out;
+ if (gf_uuid_is_null(peerinfo->uuid)) {
+ gf_log("", GF_LOG_ERROR,
+ "Null UUID while attempting to read peer from '%s'",
+ filepath);
+ goto next;
+ }
- while (!ret) {
-
- if (!strncmp (GLUSTERD_STORE_KEY_PEER_UUID, key,
- strlen (GLUSTERD_STORE_KEY_PEER_UUID))) {
- if (value)
- uuid_parse (value, uuid);
- } else if (!strncmp (GLUSTERD_STORE_KEY_PEER_STATE,
- key,
- strlen (GLUSTERD_STORE_KEY_PEER_STATE))) {
- state = atoi (value);
- } else if (!strncmp (GLUSTERD_STORE_KEY_PEER_HOSTNAME,
- key,
- strlen (GLUSTERD_STORE_KEY_PEER_HOSTNAME))) {
- hostname = gf_strdup (value);
- } else {
- gf_log ("", GF_LOG_ERROR, "Unknown key: %s",
- key);
- }
-
- GF_FREE (key);
- GF_FREE (value);
- key = NULL;
- value = NULL;
-
- ret = glusterd_store_iter_get_next (iter, &key, &value,
- &op_errno);
- }
- if (op_errno != GD_STORE_EOF)
- goto out;
+ /* Set first hostname from peerinfo->hostnames to
+ * peerinfo->hostname
+ */
+ address = cds_list_entry(peerinfo->hostnames.next,
+ glusterd_peer_hostname_t, hostname_list);
+ peerinfo->hostname = gf_strdup(address->hostname);
- (void) glusterd_store_iter_destroy (iter);
+ ret = glusterd_friend_add_from_peerinfo(peerinfo, 1, NULL);
+ if (ret)
+ goto next;
- args.mode = GD_MODE_SWITCH_ON;
- ret = glusterd_friend_add (hostname, 0, state, &uuid,
- &peerinfo, 1, &args);
+ peerinfo->shandle = shandle;
+ is_ok = _gf_true;
- GF_FREE (hostname);
- if (ret)
- goto out;
+ next:
+ (void)gf_store_iter_destroy(&iter);
- peerinfo->shandle = shandle;
- glusterd_for_each_entry (entry, dir);
+ if (!is_ok) {
+ gf_log(this->name, GF_LOG_WARNING,
+ "skipping malformed peer file %s", entry->d_name);
+ if (peerinfo) {
+ glusterd_peerinfo_cleanup(peerinfo);
+ }
}
+ peerinfo = NULL;
+ }
+
+ args.mode = GD_MODE_ON;
+
+ RCU_READ_LOCK;
+ cds_list_for_each_entry_rcu(peerinfo, &priv->peers, uuid_list)
+ {
+ ret = glusterd_friend_rpc_create(this, peerinfo, &args);
+ if (ret)
+ break;
+ }
+ RCU_READ_UNLOCK;
+ peerinfo = NULL;
out:
- if (dir)
- closedir (dir);
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
- return ret;
+ if (dir)
+ sys_closedir(dir);
+ gf_msg_debug(this->name, 0, "Returning with %d", ret);
+
+ return ret;
}
+/* Bricks for snap volumes are hosted at /var/run/gluster/snaps
+ * When a volume is restored, it points to the bricks of the snap
+ * volume it was restored from. Hence on a node restart these
+ * paths need to be recreated and re-mounted
+ */
int32_t
-glusterd_resolve_all_bricks (xlator_t *this)
-{
- int32_t ret = 0;
- glusterd_conf_t *priv = NULL;
- glusterd_volinfo_t *volinfo = NULL;
- glusterd_brickinfo_t *brickinfo = NULL;
-
- GF_ASSERT (this);
- priv = this->private;
-
- GF_ASSERT (priv);
-
- list_for_each_entry (volinfo, &priv->volumes, vol_list) {
- list_for_each_entry (brickinfo, &volinfo->bricks, brick_list) {
- ret = glusterd_resolve_brick (brickinfo);
- if (ret) {
- gf_log ("glusterd", GF_LOG_ERROR,
- "resolve brick failed in restore");
- goto out;
- }
+glusterd_recreate_all_snap_brick_mounts(xlator_t *this)
+{
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_snap_t *snap = NULL;
+
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ /* Recreate bricks of volumes restored from snaps */
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+ /* If the volume is not a restored volume then continue */
+ if (gf_uuid_is_null(volinfo->restored_from_snap))
+ continue;
+
+ ret = glusterd_recreate_vol_brick_mounts(this, volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_BRK_MNT_RECREATE_FAIL,
+ "Failed to recreate brick mounts "
+ "for %s",
+ volinfo->volname);
+ goto out;
+ }
+ }
+
+ /* Recreate bricks of snapshot volumes
+ * We are not creating brick mounts for stopped snaps.
+ */
+ cds_list_for_each_entry(snap, &priv->snapshots, snap_list)
+ {
+ cds_list_for_each_entry(volinfo, &snap->volumes, vol_list)
+ {
+ if (volinfo->status != GLUSTERD_STATUS_STOPPED) {
+ ret = glusterd_recreate_vol_brick_mounts(this, volinfo);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0,
+ GD_MSG_BRK_MNT_RECREATE_FAIL,
+ "Failed to recreate brick "
+ "mounts for %s",
+ snap->snapname);
+ goto out;
}
+ }
}
+ }
+
+out:
+ gf_msg_trace(this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+/* When the snapshot command from cli is received, the on disk and
+ * in memory structures for the snapshot are created (with the status)
+ * being marked as GD_SNAP_STATUS_INIT. Once the backend snapshot is
+ * taken, the status is changed to GD_SNAP_STATUS_IN_USE. If glusterd
+ * dies after taking the backend snapshot, but before updating the
+ * status, then when glusterd comes up, it should treat that snapshot
+ * as a failed snapshot and clean it up.
+ *
+ * Restore operation starts by setting the status to
+ * GD_SNAP_STATUS_RESTORED. If the server goes down before changing
+ * the status the status back we need to revert the partial snapshot
+ * taken.
+ */
+int32_t
+glusterd_snap_cleanup(xlator_t *this)
+{
+ dict_t *dict = NULL;
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ glusterd_snap_t *snap = NULL;
+ glusterd_snap_t *tmp_snap = NULL;
+
+ GF_ASSERT(this);
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ dict = dict_new();
+ if (!dict) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_DICT_CREATE_FAIL,
+ "Failed to create dict");
+ ret = -1;
+ goto out;
+ }
+
+ cds_list_for_each_entry_safe(snap, tmp_snap, &priv->snapshots, snap_list)
+ {
+ if (snap->snap_status == GD_SNAP_STATUS_RESTORED) {
+ ret = glusterd_snapshot_revert_restore_from_snap(snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_WARNING, 0,
+ GD_MSG_SNAP_RESTORE_REVERT_FAIL,
+ "Failed to "
+ "revert partially restored snapshot "
+ "(%s)",
+ snap->snapname);
+ goto out;
+ }
+ } else if (snap->snap_status != GD_SNAP_STATUS_IN_USE) {
+ ret = glusterd_snap_remove(dict, snap, _gf_true, _gf_true,
+ _gf_false);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_REMOVE_FAIL,
+ "Failed to remove the snapshot %s", snap->snapname);
+ goto out;
+ }
+ }
+ }
out:
- gf_log ("", GF_LOG_DEBUG, "Returning with %d", ret);
+ if (dict)
+ dict_unref(dict);
- return ret;
+ gf_msg_trace(this->name, 0, "Returning with %d", ret);
+ return ret;
}
int32_t
-glusterd_restore ()
+glusterd_resolve_all_bricks(xlator_t *this)
{
- int32_t ret = -1;
- xlator_t *this = NULL;
+ int32_t ret = 0;
+ glusterd_conf_t *priv = NULL;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_brickinfo_t *brickinfo = NULL;
+ glusterd_snap_t *snap = NULL;
- this = THIS;
+ GF_ASSERT(this);
+ priv = this->private;
- ret = glusterd_store_retrieve_volumes (this);
+ GF_ASSERT(priv);
- if (ret)
+ /* Resolve bricks of volumes */
+ cds_list_for_each_entry(volinfo, &priv->volumes, vol_list)
+ {
+ cds_list_for_each_entry(brickinfo, &volinfo->bricks, brick_list)
+ {
+ ret = glusterd_resolve_brick(brickinfo);
+ if (ret) {
+ gf_event(EVENT_BRICKPATH_RESOLVE_FAILED,
+ "peer=%s;volume=%s;brick=%s", brickinfo->hostname,
+ volinfo->volname, brickinfo->path);
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_RESOLVE_BRICK_FAIL,
+ "Failed to resolve brick %s with host %s of volume %s"
+ " in restore",
+ brickinfo->path, brickinfo->hostname, volinfo->volname);
goto out;
+ }
+ }
+ }
- ret = glusterd_store_retrieve_peers (this);
- if (ret)
- goto out;
+ /* Resolve bricks of snapshot volumes */
+ cds_list_for_each_entry(snap, &priv->snapshots, snap_list)
+ {
+ ret = glusterd_resolve_snap_bricks(this, snap);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_RESOLVE_BRICK_FAIL,
+ "resolving the snap bricks"
+ " failed for snap: %s",
+ snap->snapname);
+ goto out;
+ }
+ }
- ret = glusterd_resolve_all_bricks (this);
- if (ret)
- goto out;
+out:
+ gf_msg_trace(this->name, 0, "Returning with %d", ret);
+ return ret;
+}
+
+int32_t
+glusterd_restore()
+{
+ int32_t ret = -1;
+ xlator_t *this = NULL;
+
+ this = THIS;
+
+ ret = glusterd_options_init(this);
+ if (ret < 0)
+ goto out;
+
+ ret = glusterd_store_retrieve_volumes(this, NULL);
+ if (ret)
+ goto out;
+
+ ret = glusterd_store_retrieve_peers(this);
+ if (ret)
+ goto out;
+
+ /* While retrieving snapshots, if the snapshot status
+ is not GD_SNAP_STATUS_IN_USE, then the snapshot is
+ cleaned up. To do that, the snap volume has to be
+ stopped by stopping snapshot volume's bricks. And for
+ that the snapshot bricks should be resolved. But without
+ retrieving the peers, resolving bricks will fail. So
+ do retrieving of snapshots after retrieving peers.
+ */
+ ret = glusterd_store_retrieve_snaps(this);
+ if (ret)
+ goto out;
+
+ ret = glusterd_resolve_all_bricks(this);
+ if (ret)
+ goto out;
+
+ ret = glusterd_snap_cleanup(this);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_CLEANUP_FAIL,
+ "Failed to perform "
+ "a cleanup of the snapshots");
+ goto out;
+ }
+
+ ret = glusterd_recreate_all_snap_brick_mounts(this);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SNAP_BRK_MNT_RECREATE_FAIL,
+ "Failed to recreate "
+ "all snap brick mounts");
+ goto out;
+ }
out:
- gf_log ("", GF_LOG_DEBUG, "Returning %d", ret);
- return ret;
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+int
+glusterd_store_retrieve_quota_version(glusterd_volinfo_t *volinfo)
+{
+ int ret = -1;
+ uint32_t version = 0;
+ char cksum_path[PATH_MAX] = {
+ 0,
+ };
+ char path[PATH_MAX] = {
+ 0,
+ };
+ char *version_str = NULL;
+ char *tmp = NULL;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+ gf_store_handle_t *handle = NULL;
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ GLUSTERD_GET_VOLUME_DIR(path, volinfo, conf);
+ len = snprintf(cksum_path, sizeof(cksum_path), "%s/%s", path,
+ GLUSTERD_VOL_QUOTA_CKSUM_FILE);
+ if ((len < 0) || (len >= sizeof(cksum_path))) {
+ goto out;
+ }
+
+ ret = gf_store_handle_new(cksum_path, &handle);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_STORE_HANDLE_GET_FAIL,
+ "Unable to get store handle "
+ "for %s",
+ cksum_path);
+ goto out;
+ }
+
+ ret = gf_store_retrieve_value(handle, "version", &version_str);
+ if (ret) {
+ gf_msg_debug(this->name, 0, "Version absent");
+ ret = 0;
+ goto out;
+ }
+
+ version = strtoul(version_str, &tmp, 10);
+ if ((errno == ERANGE) || (errno == EINVAL)) {
+ gf_msg_debug(this->name, 0, "Invalid version number");
+ goto out;
+ }
+ volinfo->quota_conf_version = version;
+ ret = 0;
+
+out:
+ if (version_str)
+ GF_FREE(version_str);
+ gf_store_handle_destroy(handle);
+ return ret;
+}
+
+int
+glusterd_store_save_quota_version_and_cksum(glusterd_volinfo_t *volinfo)
+{
+ gf_store_handle_t *shandle = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ char path[PATH_MAX] = {0};
+ char cksum_path[PATH_MAX + 32] = {
+ 0,
+ };
+ char buf[64] = {0};
+ int fd = -1;
+ int32_t ret = -1;
+ int32_t len = 0;
+
+ this = THIS;
+ conf = this->private;
+
+ GLUSTERD_GET_VOLUME_DIR(path, volinfo, conf);
+ len = snprintf(cksum_path, sizeof(cksum_path), "%s/%s", path,
+ GLUSTERD_VOL_QUOTA_CKSUM_FILE);
+ if ((len < 0) || (len >= sizeof(cksum_path))) {
+ goto out;
+ }
+
+ ret = gf_store_handle_new(cksum_path, &shandle);
+ if (ret)
+ goto out;
+
+ fd = gf_store_mkstemp(shandle);
+ if (fd <= 0) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = snprintf(buf, sizeof(buf), "cksum=%u\nversion=%u\n",
+ volinfo->quota_conf_cksum, volinfo->quota_conf_version);
+ if (ret < 0 || ret >= sizeof(buf)) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = gf_store_save_items(fd, buf);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_CKSUM_STORE_FAIL,
+ "Failed to store quota cksum and version");
+ goto out;
+ }
+
+ ret = gf_store_rename_tmppath(shandle);
+ if (ret)
+ goto out;
+
+out:
+ if ((ret < 0) && (fd > 0))
+ gf_store_unlink_tmppath(shandle);
+ gf_store_handle_destroy(shandle);
+ return ret;
+}
+
+int32_t
+glusterd_quota_conf_write_header(int fd)
+{
+ int header_len = 0;
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("quota", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ if (conf->op_version < GD_OP_VERSION_3_7_0) {
+ header_len = SLEN(QUOTA_CONF_HEADER_1_1);
+ ret = gf_nwrite(fd, QUOTA_CONF_HEADER_1_1, header_len);
+ } else {
+ header_len = SLEN(QUOTA_CONF_HEADER);
+ ret = gf_nwrite(fd, QUOTA_CONF_HEADER, header_len);
+ }
+
+ if (ret != header_len) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+
+out:
+ if (ret < 0)
+ gf_msg_callingfn("quota", GF_LOG_ERROR, 0, GD_MSG_QUOTA_CONF_WRITE_FAIL,
+ "failed to write "
+ "header to a quota conf");
+
+ return ret;
+}
+
+int32_t
+glusterd_quota_conf_write_gfid(int fd, void *buf, char type)
+{
+ int ret = -1;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ this = THIS;
+ GF_VALIDATE_OR_GOTO("quota", this, out);
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ ret = gf_nwrite(fd, buf, 16);
+ if (ret != 16) {
+ ret = -1;
+ goto out;
+ }
+
+ if (conf->op_version >= GD_OP_VERSION_3_7_0) {
+ ret = gf_nwrite(fd, &type, 1);
+ if (ret != 1) {
+ ret = -1;
+ goto out;
+ }
+ }
+
+ ret = 0;
+
+out:
+ if (ret < 0)
+ gf_msg_callingfn("quota", GF_LOG_ERROR, 0, GD_MSG_QUOTA_CONF_WRITE_FAIL,
+ "failed to write "
+ "gfid %s to a quota conf",
+ uuid_utoa(buf));
+
+ return ret;
}
diff --git a/xlators/mgmt/glusterd/src/glusterd-store.h b/xlators/mgmt/glusterd/src/glusterd-store.h
index b2cceb393bd..83f4df0783e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-store.h
+++ b/xlators/mgmt/glusterd/src/glusterd-store.h
@@ -1,146 +1,216 @@
/*
- Copyright (c) 2006-2011 Gluster, Inc. <http://www.gluster.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2006-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
#ifndef _GLUSTERD_HA_H_
#define _GLUSTERD_HA_H_
-#ifndef _CONFIG_H
-#define _CONFIG_H
-#include "config.h"
-#endif
-
#include <pthread.h>
-#include "uuid.h"
-
-#include "glusterfs.h"
-#include "xlator.h"
-#include "run.h"
-#include "logging.h"
-#include "call-stub.h"
-#include "fd.h"
-#include "byte-order.h"
+#include <glusterfs/compat-uuid.h>
+
+#include <glusterfs/glusterfs.h>
+#include <glusterfs/xlator.h>
+#include <glusterfs/run.h>
+#include <glusterfs/logging.h>
+#include <glusterfs/call-stub.h>
+#include <glusterfs/byte-order.h>
#include "glusterd.h"
#include "rpcsvc.h"
-typedef enum glusterd_store_ver_ac_{
- GLUSTERD_VOLINFO_VER_AC_NONE = 0,
- GLUSTERD_VOLINFO_VER_AC_INCREMENT = 1,
+typedef enum glusterd_store_ver_ac_ {
+ GLUSTERD_VOLINFO_VER_AC_NONE = 0,
+ GLUSTERD_VOLINFO_VER_AC_INCREMENT = 1,
+ GLUSTERD_VOLINFO_VER_AC_DECREMENT = 2,
} glusterd_volinfo_ver_ac_t;
+#define UUID_SIZE 36
+#define VOLINFO_BUFFER_SIZE 4093
+#define GLUSTERD_STORE_UUID_KEY "UUID"
-#define GLUSTERD_STORE_UUID_KEY "UUID"
-
-#define GLUSTERD_STORE_KEY_VOL_TYPE "type"
-#define GLUSTERD_STORE_KEY_VOL_COUNT "count"
-#define GLUSTERD_STORE_KEY_VOL_STATUS "status"
-#define GLUSTERD_STORE_KEY_VOL_PORT "port"
-#define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count"
-#define GLUSTERD_STORE_KEY_VOL_STRIPE_CNT "stripe_count"
+#define GLUSTERD_STORE_KEY_VOL_TYPE "type"
+#define GLUSTERD_STORE_KEY_VOL_COUNT "count"
+#define GLUSTERD_STORE_KEY_VOL_STATUS "status"
+#define GLUSTERD_STORE_KEY_VOL_PORT "port"
+#define GLUSTERD_STORE_KEY_VOL_SUB_COUNT "sub_count"
+#define GLUSTERD_STORE_KEY_VOL_STRIPE_CNT "stripe_count"
#define GLUSTERD_STORE_KEY_VOL_REPLICA_CNT "replica_count"
-#define GLUSTERD_STORE_KEY_VOL_BRICK "brick"
-#define GLUSTERD_STORE_KEY_VOL_VERSION "version"
-#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type"
-#define GLUSTERD_STORE_KEY_VOL_ID "volume-id"
-#define GLUSTERD_STORE_KEY_RB_STATUS "rb_status"
-#define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src"
-#define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst"
-#define GLUSTERD_STORE_KEY_VOL_DEFRAG "rebalance_status"
-#define GLUSTERD_STORE_KEY_USERNAME "username"
-#define GLUSTERD_STORE_KEY_PASSWORD "password"
+#define GLUSTERD_STORE_KEY_VOL_DISPERSE_CNT "disperse_count"
+#define GLUSTERD_STORE_KEY_VOL_REDUNDANCY_CNT "redundancy_count"
+#define GLUSTERD_STORE_KEY_VOL_ARBITER_CNT "arbiter_count"
+#define GLUSTERD_STORE_KEY_VOL_THIN_ARBITER_CNT "thin_arbiter_count"
+#define GLUSTERD_STORE_KEY_VOL_BRICK "brick"
+#define GLUSTERD_STORE_KEY_VOL_TA_BRICK "ta-brick"
+#define GLUSTERD_STORE_KEY_VOL_VERSION "version"
+#define GLUSTERD_STORE_KEY_VOL_TRANSPORT "transport-type"
+#define GLUSTERD_STORE_KEY_VOL_ID "volume-id"
+#define GLUSTERD_STORE_KEY_VOL_RESTORED_SNAP "restored_from_snap"
+#define GLUSTERD_STORE_KEY_RB_STATUS "rb_status"
+#define GLUSTERD_STORE_KEY_RB_SRC_BRICK "rb_src"
+#define GLUSTERD_STORE_KEY_RB_DST_BRICK "rb_dst"
+#define GLUSTERD_STORE_KEY_RB_DST_PORT "rb_port"
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG "rebalance_status"
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG_STATUS "status"
+#define GLUSTERD_STORE_KEY_DEFRAG_OP "rebalance_op"
+#define GLUSTERD_STORE_KEY_USERNAME "username"
+#define GLUSTERD_STORE_KEY_PASSWORD "password"
+#define GLUSTERD_STORE_KEY_PARENT_VOLNAME "parent_volname"
+#define GLUSTERD_STORE_KEY_VOL_OP_VERSION "op-version"
+#define GLUSTERD_STORE_KEY_VOL_CLIENT_OP_VERSION "client-op-version"
+#define GLUSTERD_STORE_KEY_VOL_QUOTA_VERSION "quota-version"
+
+#define GLUSTERD_STORE_KEY_SNAP_NAME "name"
+#define GLUSTERD_STORE_KEY_SNAP_ID "snap-id"
+#define GLUSTERD_STORE_KEY_SNAP_DESC "desc"
+#define GLUSTERD_STORE_KEY_SNAP_TIMESTAMP "time-stamp"
+#define GLUSTERD_STORE_KEY_SNAP_STATUS "status"
+#define GLUSTERD_STORE_KEY_SNAP_RESTORED "snap-restored"
+#define GLUSTERD_STORE_KEY_SNAP_MAX_HARD_LIMIT "snap-max-hard-limit"
+#define GLUSTERD_STORE_KEY_SNAP_AUTO_DELETE "auto-delete"
+#define GLUSTERD_STORE_KEY_SNAP_MAX_SOFT_LIMIT "snap-max-soft-limit"
+#define GLUSTERD_STORE_KEY_SNAPD_PORT "snapd-port"
+#define GLUSTERD_STORE_KEY_SNAP_ACTIVATE "snap-activate-on-create"
#define GLUSTERD_STORE_KEY_BRICK_HOSTNAME "hostname"
-#define GLUSTERD_STORE_KEY_BRICK_PATH "path"
-#define GLUSTERD_STORE_KEY_BRICK_PORT "listen-port"
+#define GLUSTERD_STORE_KEY_BRICK_PATH "path"
+#define GLUSTERD_STORE_KEY_BRICK_REAL_PATH "real_path"
+#define GLUSTERD_STORE_KEY_BRICK_PORT "listen-port"
#define GLUSTERD_STORE_KEY_BRICK_RDMA_PORT "rdma.listen-port"
#define GLUSTERD_STORE_KEY_BRICK_DECOMMISSIONED "decommissioned"
+#define GLUSTERD_STORE_KEY_BRICK_VGNAME "vg"
+#define GLUSTERD_STORE_KEY_BRICK_DEVICE_PATH "device_path"
+#define GLUSTERD_STORE_KEY_BRICK_MOUNT_DIR "mount_dir"
+#define GLUSTERD_STORE_KEY_BRICK_SNAP_STATUS "snap-status"
+#define GLUSTERD_STORE_KEY_BRICK_FSTYPE "fs-type"
+#define GLUSTERD_STORE_KEY_BRICK_MNTOPTS "mnt-opts"
+#define GLUSTERD_STORE_KEY_BRICK_ID "brick-id"
+#define GLUSTERD_STORE_KEY_BRICK_FSID "brick-fsid"
+#define GLUSTERD_STORE_KEY_BRICK_UUID "uuid"
+
+#define GLUSTERD_STORE_KEY_PEER_UUID "uuid"
+#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname"
+#define GLUSTERD_STORE_KEY_PEER_STATE "state"
+#define GLUSTERD_STORE_KEY_VOL_CAPS "caps" /* left just for backward compat */
+
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG_REB_FILES "rebalanced-files"
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG_SIZE "size"
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG_SCANNED "scanned"
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG_FAILURES "failures"
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG_SKIPPED "skipped"
+#define GLUSTERD_STORE_KEY_VOL_DEFRAG_RUN_TIME "run-time"
+
+#define GLUSTERD_STORE_KEY_VOL_MIGRATED_FILES "migrated-files"
+#define GLUSTERD_STORE_KEY_VOL_MIGRATED_SIZE "migration-size"
+#define GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SCANNED "migration-scanned"
+#define GLUSTERD_STORE_KEY_VOL_MIGRATIONS_FAILURES "migration-failures"
+#define GLUSTERD_STORE_KEY_VOL_MIGRATIONS_SKIPPED "migration-skipped"
+#define GLUSTERD_STORE_KEY_VOL_MIGRATION_RUN_TIME "migration-run-time"
+
+#define GLUSTERD_STORE_KEY_GANESHA_GLOBAL "nfs-ganesha"
+
+/*
+ * The structure is responsible for handling the parameter for writes into
+ * the buffer before it is finally written to the file. The writes will be
+ * of the form of key-value pairs.
+ */
+struct glusterd_volinfo_data_store_ {
+ gf_store_handle_t *shandle; /*Contains fd and path of the file */
+ int16_t buffer_len;
+ char key_check; /* flag to check if key is to be validated before write*/
+ char buffer[VOLINFO_BUFFER_SIZE];
+};
+typedef struct glusterd_volinfo_data_store_ glusterd_volinfo_data_store_t;
-#define GLUSTERD_STORE_KEY_PEER_UUID "uuid"
-#define GLUSTERD_STORE_KEY_PEER_HOSTNAME "hostname"
-#define GLUSTERD_STORE_KEY_PEER_STATE "state"
+int32_t
+glusterd_store_volinfo(glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_ver_ac_t ac);
-#define glusterd_for_each_entry(entry, dir) \
- do {\
- entry = NULL;\
- if (dir) {\
- entry = readdir (dir);\
- while (entry && (!strcmp (entry->d_name, ".") ||\
- !strcmp (entry->d_name, ".."))) {\
- entry = readdir (dir);\
- }\
- }\
- } while (0); \
+int32_t
+glusterd_store_delete_volume(glusterd_volinfo_t *volinfo);
+int32_t
+glusterd_store_delete_snap(glusterd_snap_t *snap);
-typedef enum {
- GD_STORE_SUCCESS,
- GD_STORE_KEY_NULL,
- GD_STORE_VALUE_NULL,
- GD_STORE_KEY_VALUE_NULL,
- GD_STORE_EOF,
- GD_STORE_ENOMEM,
- GD_STORE_STAT_FAILED
-} glusterd_store_op_errno_t;
+int32_t
+glusterd_retrieve_uuid();
int32_t
-glusterd_store_volinfo (glusterd_volinfo_t *volinfo, glusterd_volinfo_ver_ac_t ac);
+glusterd_store_peerinfo(glusterd_peerinfo_t *peerinfo);
int32_t
-glusterd_store_delete_volume (glusterd_volinfo_t *volinfo);
+glusterd_store_delete_peerinfo(glusterd_peerinfo_t *peerinfo);
int32_t
-glusterd_store_uuid ();
+glusterd_store_delete_brick(glusterd_brickinfo_t *brickinfo, char *delete_path);
int32_t
-glusterd_store_handle_new (char *path, glusterd_store_handle_t **handle);
+glusterd_restore();
+
+void
+glusterd_perform_volinfo_version_action(glusterd_volinfo_t *volinfo,
+ glusterd_volinfo_ver_ac_t ac);
+gf_boolean_t
+glusterd_store_is_valid_brickpath(char *volname, char *brick);
int32_t
-glusterd_store_save_value (int fd, char *key, char *value);
+glusterd_store_perform_node_state_store(glusterd_volinfo_t *volinfo);
+
+int
+glusterd_retrieve_op_version(xlator_t *this, int *op_version);
+
+int
+glusterd_retrieve_max_op_version(xlator_t *this, int *op_version);
+
+int
+glusterd_store_max_op_version(xlator_t *this);
+
+int
+glusterd_store_global_info(xlator_t *this);
int32_t
-glusterd_store_retrieve_value (glusterd_store_handle_t *handle,
- char *key, char **value);
+glusterd_store_retrieve_options(xlator_t *this);
int32_t
-glusterd_retrieve_uuid ();
+glusterd_store_retrieve_bricks(glusterd_volinfo_t *volinfo);
int32_t
-glusterd_store_peerinfo (glusterd_peerinfo_t *peerinfo);
+glusterd_store_options(xlator_t *this, dict_t *opts);
+
+void
+glusterd_replace_slash_with_hyphen(char *str);
int32_t
-glusterd_store_delete_peerinfo (glusterd_peerinfo_t *peerinfo);
+glusterd_store_create_quota_conf_sh_on_absence(glusterd_volinfo_t *volinfo);
+
+int
+glusterd_store_retrieve_quota_version(glusterd_volinfo_t *volinfo);
+
+int
+glusterd_store_save_quota_version_and_cksum(glusterd_volinfo_t *volinfo);
int32_t
-glusterd_store_delete_brick (glusterd_volinfo_t *volinfo,
- glusterd_brickinfo_t *brickinfo);
+glusterd_store_snap(glusterd_snap_t *snap);
int32_t
-glusterd_store_handle_destroy (glusterd_store_handle_t *handle);
+glusterd_store_update_missed_snaps();
+
+glusterd_volinfo_t *
+glusterd_store_retrieve_volume(char *volname, glusterd_snap_t *snap);
+
+int
+glusterd_restore_op_version(xlator_t *this);
int32_t
-glusterd_restore ();
+glusterd_quota_conf_write_header(int fd);
-void
-glusterd_perform_volinfo_version_action (glusterd_volinfo_t *volinfo,
- glusterd_volinfo_ver_ac_t ac);
-gf_boolean_t
-glusterd_store_is_valid_brickpath (char *volname, char *brick);
+int32_t
+glusterd_quota_conf_write_gfid(int fd, void *buf, char type);
int32_t
-glusterd_store_perform_node_state_store (glusterd_volinfo_t *volinfo);
+glusterd_recreate_vol_brick_mounts(xlator_t *this, glusterd_volinfo_t *volinfo);
+
#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.c b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
new file mode 100644
index 00000000000..ca845903c4f
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.c
@@ -0,0 +1,1047 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+#include <signal.h>
+
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
+#include "glusterd.h"
+#include <glusterfs/glusterfs.h>
+#include "glusterd-utils.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-shd-svc.h"
+#include "glusterd-quotad-svc.h"
+#ifdef BUILD_GNFS
+#include "glusterd-nfs-svc.h"
+#endif
+#include "glusterd-bitd-svc.h"
+#include "glusterd-shd-svc-helper.h"
+#include "glusterd-scrub-svc.h"
+#include "glusterd-svc-helper.h"
+#include <glusterfs/syscall.h>
+#include "glusterd-snapshot-utils.h"
+
+int
+glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo)
+{
+ int ret = 0;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+ char *svc_name = NULL;
+
+ GF_ASSERT(this);
+
+ conf = this->private;
+ GF_ASSERT(conf);
+
+#ifdef BUILD_GNFS
+ svc_name = "nfs";
+ ret = glusterd_nfssvc_reconfigure();
+ if (ret)
+ goto out;
+#endif
+ svc_name = "self-heald";
+ if (volinfo) {
+ ret = glusterd_shdsvc_reconfigure(volinfo);
+ if (ret)
+ goto out;
+ }
+
+ if (conf->op_version == GD_OP_VERSION_MIN)
+ goto out;
+
+ svc_name = "quotad";
+ ret = glusterd_quotadsvc_reconfigure();
+ if (ret)
+ goto out;
+
+ svc_name = "bitd";
+ ret = glusterd_bitdsvc_reconfigure();
+ if (ret)
+ goto out;
+
+ svc_name = "scrubber";
+ ret = glusterd_scrubsvc_reconfigure();
+out:
+ if (ret && svc_name)
+ gf_event(EVENT_SVC_RECONFIGURE_FAILED, "svc_name=%s", svc_name);
+ return ret;
+}
+
+int
+glusterd_svcs_stop(glusterd_volinfo_t *volinfo)
+{
+ int ret = 0;
+ xlator_t *this = NULL;
+ glusterd_conf_t *priv = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+#ifdef BUILD_GNFS
+ ret = priv->nfs_svc.stop(&(priv->nfs_svc), SIGKILL);
+ if (ret)
+ goto out;
+#endif
+ ret = priv->quotad_svc.stop(&(priv->quotad_svc), SIGTERM);
+ if (ret)
+ goto out;
+
+ if (volinfo) {
+ ret = volinfo->shd.svc.stop(&(volinfo->shd.svc), SIGTERM);
+ if (ret)
+ goto out;
+ }
+
+ ret = priv->bitd_svc.stop(&(priv->bitd_svc), SIGTERM);
+ if (ret)
+ goto out;
+
+ ret = priv->scrub_svc.stop(&(priv->scrub_svc), SIGTERM);
+
+out:
+ return ret;
+}
+
+int
+glusterd_svcs_manager(glusterd_volinfo_t *volinfo)
+{
+ int ret = 0;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = NULL;
+
+ GF_ASSERT(this);
+
+ conf = this->private;
+ GF_ASSERT(conf);
+
+ if (volinfo && volinfo->is_snap_volume)
+ return 0;
+
+#if BUILD_GNFS
+ ret = conf->nfs_svc.manager(&(conf->nfs_svc), NULL, PROC_START_NO_WAIT);
+ if (ret)
+ goto out;
+#endif
+ if (conf->op_version == GD_OP_VERSION_MIN)
+ goto out;
+
+ ret = conf->quotad_svc.manager(&(conf->quotad_svc), volinfo,
+ PROC_START_NO_WAIT);
+ if (ret == -EINVAL)
+ ret = 0;
+ if (ret)
+ goto out;
+
+ ret = conf->bitd_svc.manager(&(conf->bitd_svc), NULL, PROC_START_NO_WAIT);
+ if (ret == -EINVAL)
+ ret = 0;
+ if (ret)
+ goto out;
+
+ if (volinfo) {
+ ret = volinfo->shd.svc.manager(&(volinfo->shd.svc), volinfo,
+ PROC_START_NO_WAIT);
+ if (ret == -EINVAL)
+ ret = 0;
+ if (ret)
+ goto out;
+ }
+
+ ret = conf->scrub_svc.manager(&(conf->scrub_svc), NULL, PROC_START_NO_WAIT);
+ if (ret == -EINVAL)
+ ret = 0;
+out:
+ return ret;
+}
+
+int
+glusterd_svc_check_volfile_identical(char *svc_name,
+ glusterd_graph_builder_t builder,
+ gf_boolean_t *identical)
+{
+ char orgvol[PATH_MAX] = {
+ 0,
+ };
+ char *tmpvol = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ int need_unlink = 0;
+ int tmp_fd = -1;
+
+ this = THIS;
+
+ GF_ASSERT(this);
+ GF_ASSERT(identical);
+ conf = this->private;
+
+ glusterd_svc_build_volfile_path(svc_name, conf->workdir, orgvol,
+ sizeof(orgvol));
+
+ ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name);
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */
+ tmp_fd = mkstemp(tmpvol);
+ if (tmp_fd < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+ "Unable to create temp file"
+ " %s:(%s)",
+ tmpvol, strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ need_unlink = 1;
+
+ ret = glusterd_create_global_volfile(builder, tmpvol, NULL);
+ if (ret)
+ goto out;
+
+ ret = glusterd_check_files_identical(orgvol, tmpvol, identical);
+out:
+ if (need_unlink)
+ sys_unlink(tmpvol);
+
+ if (tmpvol != NULL)
+ GF_FREE(tmpvol);
+
+ if (tmp_fd >= 0)
+ sys_close(tmp_fd);
+
+ return ret;
+}
+
+int
+glusterd_svc_check_topology_identical(char *svc_name,
+ glusterd_graph_builder_t builder,
+ gf_boolean_t *identical)
+{
+ char orgvol[PATH_MAX] = {
+ 0,
+ };
+ char *tmpvol = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = THIS;
+ int ret = -1;
+ int tmpclean = 0;
+ int tmpfd = -1;
+
+ if ((!identical) || (!this) || (!this->private)) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
+ goto out;
+ }
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ /* Fetch the original volfile */
+ glusterd_svc_build_volfile_path(svc_name, conf->workdir, orgvol,
+ sizeof(orgvol));
+
+ /* Create the temporary volfile */
+ ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name);
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */
+ tmpfd = mkstemp(tmpvol);
+ if (tmpfd < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+ "Unable to create temp file"
+ " %s:(%s)",
+ tmpvol, strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ tmpclean = 1; /* SET the flag to unlink() tmpfile */
+
+ ret = glusterd_create_global_volfile(builder, tmpvol, NULL);
+ if (ret)
+ goto out;
+
+ /* Compare the topology of volfiles */
+ ret = glusterd_check_topology_identical(orgvol, tmpvol, identical);
+out:
+ if (tmpfd >= 0)
+ sys_close(tmpfd);
+ if (tmpclean)
+ sys_unlink(tmpvol);
+ if (tmpvol != NULL)
+ GF_FREE(tmpvol);
+ return ret;
+}
+
+int
+glusterd_volume_svc_check_volfile_identical(
+ char *svc_name, dict_t *mode_dict, glusterd_volinfo_t *volinfo,
+ glusterd_vol_graph_builder_t builder, gf_boolean_t *identical)
+{
+ char orgvol[PATH_MAX] = {
+ 0,
+ };
+ char *tmpvol = NULL;
+ xlator_t *this = NULL;
+ int ret = -1;
+ int need_unlink = 0;
+ int tmp_fd = -1;
+
+ this = THIS;
+
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ GF_VALIDATE_OR_GOTO(this->name, identical, out);
+
+ /* This builds volfile for volume level dameons */
+ glusterd_volume_svc_build_volfile_path(svc_name, volinfo, orgvol,
+ sizeof(orgvol));
+
+ ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name);
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */
+ tmp_fd = mkstemp(tmpvol);
+ if (tmp_fd < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+ "Unable to create temp file"
+ " %s:(%s)",
+ tmpvol, strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ need_unlink = 1;
+
+ ret = builder(volinfo, tmpvol, mode_dict);
+ if (ret)
+ goto out;
+
+ ret = glusterd_check_files_identical(orgvol, tmpvol, identical);
+out:
+ if (need_unlink)
+ sys_unlink(tmpvol);
+
+ if (tmpvol != NULL)
+ GF_FREE(tmpvol);
+
+ if (tmp_fd >= 0)
+ sys_close(tmp_fd);
+
+ return ret;
+}
+
+int
+glusterd_volume_svc_check_topology_identical(
+ char *svc_name, dict_t *mode_dict, glusterd_volinfo_t *volinfo,
+ glusterd_vol_graph_builder_t builder, gf_boolean_t *identical)
+{
+ char orgvol[PATH_MAX] = {
+ 0,
+ };
+ char *tmpvol = NULL;
+ glusterd_conf_t *conf = NULL;
+ xlator_t *this = THIS;
+ int ret = -1;
+ int tmpclean = 0;
+ int tmpfd = -1;
+
+ if ((!identical) || (!this) || (!this->private)) {
+ gf_smsg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_INVALID_ARGUMENT, NULL);
+ goto out;
+ }
+
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ /* This builds volfile for volume level dameons */
+ glusterd_volume_svc_build_volfile_path(svc_name, volinfo, orgvol,
+ sizeof(orgvol));
+ /* Create the temporary volfile */
+ ret = gf_asprintf(&tmpvol, "/tmp/g%s-XXXXXX", svc_name);
+ if (ret < 0) {
+ goto out;
+ }
+
+ /* coverity[SECURE_TEMP] mkstemp uses 0600 as the mode and is safe */
+ tmpfd = mkstemp(tmpvol);
+ if (tmpfd < 0) {
+ gf_msg(this->name, GF_LOG_WARNING, errno, GD_MSG_FILE_OP_FAILED,
+ "Unable to create temp file"
+ " %s:(%s)",
+ tmpvol, strerror(errno));
+ ret = -1;
+ goto out;
+ }
+
+ tmpclean = 1; /* SET the flag to unlink() tmpfile */
+
+ ret = builder(volinfo, tmpvol, mode_dict);
+ if (ret)
+ goto out;
+
+ /* Compare the topology of volfiles */
+ ret = glusterd_check_topology_identical(orgvol, tmpvol, identical);
+out:
+ if (tmpfd >= 0)
+ sys_close(tmpfd);
+ if (tmpclean)
+ sys_unlink(tmpvol);
+ if (tmpvol != NULL)
+ GF_FREE(tmpvol);
+ return ret;
+}
+
+gf_boolean_t
+glusterd_is_svcproc_attachable(glusterd_svc_proc_t *svc_proc)
+{
+ int pid = -1;
+ glusterd_svc_t *parent_svc = NULL;
+
+ if (!svc_proc)
+ return _gf_false;
+
+ if (svc_proc->status == GF_SVC_STARTING)
+ return _gf_true;
+
+ if (svc_proc->status == GF_SVC_STARTED ||
+ svc_proc->status == GF_SVC_DISCONNECTED) {
+ parent_svc = cds_list_entry(svc_proc->svcs.next, glusterd_svc_t,
+ mux_svc);
+ if (parent_svc && gf_is_service_running(parent_svc->proc.pidfile, &pid))
+ return _gf_true;
+ }
+
+ if (svc_proc->status == GF_SVC_DIED || svc_proc->status == GF_SVC_STOPPING)
+ return _gf_false;
+
+ return _gf_false;
+}
+
+void *
+__gf_find_compatible_svc(gd_node_type daemon)
+{
+ glusterd_svc_proc_t *svc_proc = NULL;
+ struct cds_list_head *svc_procs = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ conf = THIS->private;
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+
+ switch (daemon) {
+ case GD_NODE_SHD: {
+ svc_procs = &conf->shd_procs;
+ if (!svc_procs)
+ goto out;
+ } break;
+ default:
+ /* Add support for other client daemons here */
+ goto out;
+ }
+
+ cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list)
+ {
+ if (glusterd_is_svcproc_attachable(svc_proc))
+ return (void *)svc_proc;
+ /*
+ * Logic to select one process goes here. Currently there is only one
+ * shd_proc. So selecting the first one;
+ */
+ }
+out:
+ return NULL;
+}
+
+glusterd_svc_proc_t *
+glusterd_svcprocess_new()
+{
+ glusterd_svc_proc_t *new_svcprocess = NULL;
+
+ new_svcprocess = GF_CALLOC(1, sizeof(*new_svcprocess),
+ gf_gld_mt_glusterd_svc_proc_t);
+
+ if (!new_svcprocess)
+ return NULL;
+
+ CDS_INIT_LIST_HEAD(&new_svcprocess->svc_proc_list);
+ CDS_INIT_LIST_HEAD(&new_svcprocess->svcs);
+ new_svcprocess->notify = glusterd_muxsvc_common_rpc_notify;
+ new_svcprocess->status = GF_SVC_STARTING;
+ return new_svcprocess;
+}
+
+int
+glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc)
+{
+ int ret = -1;
+ glusterd_svc_proc_t *mux_proc = NULL;
+ glusterd_conn_t *mux_conn = NULL;
+ glusterd_conf_t *conf = NULL;
+ glusterd_svc_t *parent_svc = NULL;
+ int pid = -1;
+ gf_boolean_t stop_daemon = _gf_false;
+ char pidfile[PATH_MAX] = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
+ conf = THIS->private;
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+ if (svc->inited && !glusterd_proc_is_running(&(svc->proc))) {
+ /* This is the case when shd process was abnormally killed */
+ pthread_mutex_unlock(&conf->attach_lock);
+ glusterd_shd_svcproc_cleanup(&volinfo->shd);
+ pthread_mutex_lock(&conf->attach_lock);
+ }
+
+ if (!svc->inited) {
+ glusterd_svc_build_shd_pidfile(volinfo, pidfile, sizeof(pidfile));
+ ret = snprintf(svc->proc.name, sizeof(svc->proc.name), "%s",
+ "glustershd");
+ if (ret < 0)
+ goto unlock;
+
+ ret = snprintf(svc->proc.pidfile, sizeof(svc->proc.pidfile), "%s",
+ pidfile);
+ if (ret < 0)
+ goto unlock;
+
+ if (gf_is_service_running(pidfile, &pid)) {
+ /* Just connect is required, but we don't know what happens
+ * during the disconnect. So better to reattach.
+ */
+ mux_proc = __gf_find_compatible_svc_from_pid(GD_NODE_SHD, pid);
+ }
+
+ if (!mux_proc) {
+ if (pid != -1 && sys_access(pidfile, R_OK) == 0) {
+ /* stale pid file, stop and unlink it. This has to be
+ * done outside the attach_lock.
+ */
+ stop_daemon = _gf_true;
+ }
+ mux_proc = __gf_find_compatible_svc(GD_NODE_SHD);
+ }
+ if (mux_proc) {
+ /* Take first entry from the process */
+ parent_svc = cds_list_entry(mux_proc->svcs.next, glusterd_svc_t,
+ mux_svc);
+ mux_conn = &parent_svc->conn;
+ if (volinfo)
+ volinfo->shd.attached = _gf_true;
+ } else {
+ mux_proc = glusterd_svcprocess_new();
+ if (!mux_proc) {
+ ret = -1;
+ goto unlock;
+ }
+ cds_list_add_tail(&mux_proc->svc_proc_list, &conf->shd_procs);
+ }
+ svc->svc_proc = mux_proc;
+ cds_list_del_init(&svc->mux_svc);
+ cds_list_add_tail(&svc->mux_svc, &mux_proc->svcs);
+ ret = glusterd_shdsvc_init(volinfo, mux_conn, mux_proc);
+ if (ret) {
+ pthread_mutex_unlock(&conf->attach_lock);
+ gf_msg(THIS->name, GF_LOG_ERROR, 0, GD_MSG_FAILED_INIT_SHDSVC,
+ "Failed to init shd "
+ "service");
+ goto out;
+ }
+ gf_msg_debug(THIS->name, 0, "shd service initialized");
+ svc->inited = _gf_true;
+ }
+ ret = 0;
+ }
+unlock:
+ pthread_mutex_unlock(&conf->attach_lock);
+out:
+ if (stop_daemon) {
+ glusterd_proc_stop(&svc->proc, SIGTERM, PROC_STOP_FORCE);
+ glusterd_unlink_file(pidfile);
+ }
+ return ret;
+}
+
+void *
+__gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid)
+{
+ glusterd_svc_proc_t *svc_proc = NULL;
+ struct cds_list_head *svc_procs = NULL;
+ glusterd_svc_t *svc = NULL;
+ pid_t mux_pid = -1;
+ glusterd_conf_t *conf = NULL;
+
+ conf = THIS->private;
+ if (!conf)
+ return NULL;
+
+ switch (daemon) {
+ case GD_NODE_SHD: {
+ svc_procs = &conf->shd_procs;
+ if (!svc_procs)
+ return NULL;
+ } break;
+ default:
+ /* Add support for other client daemons here */
+ return NULL;
+ }
+
+ cds_list_for_each_entry(svc_proc, svc_procs, svc_proc_list)
+ {
+ cds_list_for_each_entry(svc, &svc_proc->svcs, mux_svc)
+ {
+ if (gf_is_service_running(svc->proc.pidfile, &mux_pid)) {
+ if (mux_pid == pid &&
+ glusterd_is_svcproc_attachable(svc_proc)) {
+ /*TODO
+ * inefficient loop, but at the moment, there is only
+ * one shd.
+ */
+ return svc_proc;
+ }
+ }
+ }
+ }
+ return NULL;
+}
+
+static int32_t
+my_callback(struct rpc_req *req, struct iovec *iov, int count, void *v_frame)
+{
+ call_frame_t *frame = v_frame;
+ xlator_t *this = NULL;
+ glusterd_conf_t *conf = NULL;
+
+ GF_VALIDATE_OR_GOTO("glusterd", frame, out);
+ this = frame->this;
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO(this->name, conf, out);
+
+ if (GF_ATOMIC_DEC(conf->blockers) == 0) {
+ synccond_broadcast(&conf->cond_blockers);
+ }
+
+ STACK_DESTROY(frame->root);
+out:
+ return 0;
+}
+
+static int32_t
+glusterd_svc_attach_cbk(struct rpc_req *req, struct iovec *iov, int count,
+ void *v_frame)
+{
+ call_frame_t *frame = v_frame;
+ glusterd_volinfo_t *volinfo = NULL;
+ glusterd_shdsvc_t *shd = NULL;
+ glusterd_svc_t *svc = frame->cookie;
+ glusterd_conf_t *conf = NULL;
+ int *flag = (int *)frame->local;
+ xlator_t *this = THIS;
+ int ret = -1;
+ gf_getspec_rsp rsp = {
+ 0,
+ };
+
+ GF_VALIDATE_OR_GOTO("glusterd", this, out);
+ conf = this->private;
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", frame, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+
+ frame->local = NULL;
+ frame->cookie = NULL;
+
+ if (!strcmp(svc->name, "glustershd")) {
+ /* Get volinfo->shd from svc object */
+ shd = cds_list_entry(svc, glusterd_shdsvc_t, svc);
+ if (!shd) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_SHD_OBJ_GET_FAIL,
+ "Failed to get shd object "
+ "from shd service");
+ goto out;
+ }
+
+ /* Get volinfo from shd */
+ volinfo = cds_list_entry(shd, glusterd_volinfo_t, shd);
+ if (!volinfo) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_VOLINFO_GET_FAIL,
+ "Failed to get volinfo from "
+ "from shd");
+ goto out;
+ }
+ }
+
+ if (!iov) {
+ gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "iov is NULL");
+ ret = -1;
+ goto out;
+ }
+
+ ret = xdr_to_generic(*iov, &rsp, (xdrproc_t)xdr_gf_getspec_rsp);
+ if (ret < 0) {
+ gf_msg(frame->this->name, GF_LOG_ERROR, 0, GD_MSG_REQ_DECODE_FAIL,
+ "XDR decoding error");
+ ret = -1;
+ goto out;
+ }
+
+ if (rsp.op_ret == 0) {
+ svc->online = _gf_true;
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL,
+ "svc %s of volume %s attached successfully to pid %d", svc->name,
+ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL,
+ "svc %s of volume %s failed to attach to pid %d", svc->name,
+ volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+ if (!strcmp(svc->name, "glustershd")) {
+ glusterd_shd_svcproc_cleanup(&volinfo->shd);
+ }
+ }
+out:
+ if (flag) {
+ GF_FREE(flag);
+ }
+
+ if (volinfo)
+ glusterd_volinfo_unref(volinfo);
+
+ if (GF_ATOMIC_DEC(conf->blockers) == 0) {
+ synccond_broadcast(&conf->cond_blockers);
+ }
+ STACK_DESTROY(frame->root);
+ return 0;
+}
+
+extern size_t
+build_volfile_path(char *volume_id, char *path, size_t path_len,
+ char *trusted_str, dict_t *dict);
+
+int
+__glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flags,
+ struct rpc_clnt *rpc, char *volfile_id,
+ int op)
+{
+ int ret = -1;
+ struct iobuf *iobuf = NULL;
+ struct iobref *iobref = NULL;
+ struct iovec iov = {
+ 0,
+ };
+ char path[PATH_MAX] = {
+ '\0',
+ };
+ struct stat stbuf = {
+ 0,
+ };
+ int32_t spec_fd = -1;
+ size_t file_len = -1;
+ char *volfile_content = NULL;
+ ssize_t req_size = 0;
+ call_frame_t *frame = NULL;
+ gd1_mgmt_brick_op_req brick_req;
+ dict_t *dict = NULL;
+ void *req = &brick_req;
+ void *errlbl = &&err;
+ struct rpc_clnt_connection *conn;
+ xlator_t *this = THIS;
+ glusterd_conf_t *conf = THIS->private;
+ extern struct rpc_clnt_program gd_brick_prog;
+ fop_cbk_fn_t cbkfn = my_callback;
+
+ if (!rpc) {
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_PARAM_NULL,
+ "called with null rpc");
+ return -1;
+ }
+
+ conn = &rpc->conn;
+ if (!conn->connected || conn->disconnected) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_CONNECT_RETURNED,
+ "not connected yet");
+ return -1;
+ }
+
+ brick_req.op = op;
+ brick_req.name = volfile_id;
+ brick_req.input.input_val = NULL;
+ brick_req.input.input_len = 0;
+ brick_req.dict.dict_val = NULL;
+ brick_req.dict.dict_len = 0;
+
+ frame = create_frame(this, this->ctx->pool);
+ if (!frame) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_FRAME_CREATE_FAIL,
+ NULL);
+ goto *errlbl;
+ }
+
+ if (op == GLUSTERD_SVC_ATTACH) {
+ dict = dict_new();
+ if (!dict) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_DICT_CREATE_FAIL,
+ NULL);
+ ret = -ENOMEM;
+ goto *errlbl;
+ }
+
+ (void)build_volfile_path(volfile_id, path, sizeof(path), NULL, dict);
+
+ ret = sys_stat(path, &stbuf);
+ if (ret < 0) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL,
+ "Unable to stat %s (%s)", path, strerror(errno));
+ ret = -EINVAL;
+ goto *errlbl;
+ }
+
+ file_len = stbuf.st_size;
+ volfile_content = GF_MALLOC(file_len + 1, gf_common_mt_char);
+ if (!volfile_content) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno, GD_MSG_NO_MEMORY, NULL);
+ ret = -ENOMEM;
+ goto *errlbl;
+ }
+ spec_fd = open(path, O_RDONLY);
+ if (spec_fd < 0) {
+ gf_msg(THIS->name, GF_LOG_WARNING, 0, GD_MSG_SVC_ATTACH_FAIL,
+ "failed to read volfile %s", path);
+ ret = -EIO;
+ goto *errlbl;
+ }
+ ret = sys_read(spec_fd, volfile_content, file_len);
+ if (ret == file_len) {
+ brick_req.input.input_val = volfile_content;
+ brick_req.input.input_len = file_len;
+ } else {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_ATTACH_FAIL,
+ "read failed on path %s. File size=%" GF_PRI_SIZET
+ "read size=%d",
+ path, file_len, ret);
+ ret = -EIO;
+ goto *errlbl;
+ }
+ if (dict->count > 0) {
+ ret = dict_allocate_and_serialize(dict, &brick_req.dict.dict_val,
+ &brick_req.dict.dict_len);
+ if (ret) {
+ gf_smsg(this->name, GF_LOG_ERROR, errno,
+ GD_MSG_DICT_ALLOC_AND_SERL_LENGTH_GET_FAIL, NULL);
+ goto *errlbl;
+ }
+ }
+
+ frame->cookie = svc;
+ frame->local = GF_CALLOC(1, sizeof(int), gf_gld_mt_int);
+ *((int *)frame->local) = flags;
+ cbkfn = glusterd_svc_attach_cbk;
+ }
+
+ req_size = xdr_sizeof((xdrproc_t)xdr_gd1_mgmt_brick_op_req, req);
+ iobuf = iobuf_get2(rpc->ctx->iobuf_pool, req_size);
+ if (!iobuf) {
+ goto *errlbl;
+ }
+ errlbl = &&maybe_free_iobuf;
+
+ iov.iov_base = iobuf->ptr;
+ iov.iov_len = iobuf_pagesize(iobuf);
+
+ iobref = iobref_new();
+ if (!iobref) {
+ goto *errlbl;
+ }
+ errlbl = &&free_iobref;
+
+ iobref_add(iobref, iobuf);
+ /*
+ * Drop our reference to the iobuf. The iobref should already have
+ * one after iobref_add, so when we unref that we'll free the iobuf as
+ * well. This allows us to pass just the iobref as frame->local.
+ */
+ iobuf_unref(iobuf);
+ /* Set the pointer to null so we don't free it on a later error. */
+ iobuf = NULL;
+
+ /* Create the xdr payload */
+ ret = xdr_serialize_generic(iov, req, (xdrproc_t)xdr_gd1_mgmt_brick_op_req);
+ if (ret == -1) {
+ goto *errlbl;
+ }
+ iov.iov_len = ret;
+
+ /* Send the msg */
+ GF_ATOMIC_INC(conf->blockers);
+ ret = rpc_clnt_submit(rpc, &gd_brick_prog, op, cbkfn, &iov, 1, NULL, 0,
+ iobref, frame, NULL, 0, NULL, 0, NULL);
+ if (dict)
+ dict_unref(dict);
+ GF_FREE(volfile_content);
+ if (spec_fd >= 0)
+ sys_close(spec_fd);
+ return ret;
+
+free_iobref:
+ iobref_unref(iobref);
+maybe_free_iobuf:
+ if (iobuf) {
+ iobuf_unref(iobuf);
+ }
+err:
+ if (dict)
+ dict_unref(dict);
+ if (brick_req.dict.dict_val)
+ GF_FREE(brick_req.dict.dict_val);
+
+ GF_FREE(volfile_content);
+ if (spec_fd >= 0)
+ sys_close(spec_fd);
+ if (frame)
+ STACK_DESTROY(frame->root);
+ return -1;
+}
+
+int
+glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int flags)
+{
+ glusterd_conf_t *conf = THIS->private;
+ int ret = -1;
+ int tries;
+ rpc_clnt_t *rpc = NULL;
+
+ GF_VALIDATE_OR_GOTO("glusterd", conf, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+ GF_VALIDATE_OR_GOTO("glusterd", volinfo, out);
+
+ gf_msg("glusterd", GF_LOG_INFO, 0, GD_MSG_ATTACH_INFO,
+ "adding svc %s (volume=%s) to existing "
+ "process with pid %d",
+ svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+
+ rpc = rpc_clnt_ref(svc->conn.rpc);
+ for (tries = 15; tries > 0; --tries) {
+ /* There might be a case that the volume for which we're attempting to
+ * attach a shd svc might become stale and in the process of deletion.
+ * Given that the volinfo object is being already passed here before
+ * that sequence of operation has happened we might be operating on a
+ * stale volume. At every sync task switch we should check for existance
+ * of the volume now
+ */
+ if (!glusterd_volume_exists(volinfo->volname)) {
+ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_SVC_ATTACH_FAIL,
+ "Volume %s "
+ " is marked as stale, not attempting further shd svc attach "
+ "attempts",
+ volinfo->volname);
+ ret = 0;
+ goto out;
+ }
+ if (rpc) {
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+ ret = __glusterd_send_svc_configure_req(
+ svc, flags, rpc, svc->proc.volfileid, GLUSTERD_SVC_ATTACH);
+ }
+ pthread_mutex_unlock(&conf->attach_lock);
+ if (!ret) {
+ volinfo->shd.attached = _gf_true;
+ goto out;
+ }
+ }
+ /*
+ * It might not actually be safe to manipulate the lock
+ * like this, but if we don't then the connection can
+ * never actually complete and retries are useless.
+ * Unfortunately, all of the alternatives (e.g. doing
+ * all of this in a separate thread) are much more
+ * complicated and risky.
+ * TBD: see if there's a better way
+ */
+ synclock_unlock(&conf->big_lock);
+ synctask_sleep(1);
+ synclock_lock(&conf->big_lock);
+ }
+ ret = -1;
+ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SVC_ATTACH_FAIL,
+ "attach failed for %s(volume=%s)", svc->name, volinfo->volname);
+out:
+ if (rpc)
+ rpc_clnt_unref(rpc);
+ return ret;
+}
+
+int
+glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig)
+{
+ glusterd_conf_t *conf = THIS->private;
+ int ret = -1;
+ int tries;
+ rpc_clnt_t *rpc = NULL;
+
+ GF_VALIDATE_OR_GOTO(THIS->name, conf, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, svc, out);
+ GF_VALIDATE_OR_GOTO(THIS->name, volinfo, out);
+
+ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_DETACH_INFO,
+ "removing svc %s (volume=%s) from existing "
+ "process with pid %d",
+ svc->name, volinfo->volname, glusterd_proc_get_pid(&svc->proc));
+
+ rpc = rpc_clnt_ref(svc->conn.rpc);
+ for (tries = 15; tries > 0; --tries) {
+ if (rpc) {
+ /*For detach there is no flags, and we are not using sig.*/
+ pthread_mutex_lock(&conf->attach_lock);
+ {
+ ret = __glusterd_send_svc_configure_req(svc, 0, svc->conn.rpc,
+ svc->proc.volfileid,
+ GLUSTERD_SVC_DETACH);
+ }
+ pthread_mutex_unlock(&conf->attach_lock);
+ if (!ret) {
+ goto out;
+ }
+ }
+ /*
+ * It might not actually be safe to manipulate the lock
+ * like this, but if we don't then the connection can
+ * never actually complete and retries are useless.
+ * Unfortunately, all of the alternatives (e.g. doing
+ * all of this in a separate thread) are much more
+ * complicated and risky.
+ * TBD: see if there's a better way
+ */
+ synclock_unlock(&conf->big_lock);
+ synctask_sleep(1);
+ synclock_lock(&conf->big_lock);
+ }
+ ret = -1;
+ gf_msg("glusterd", GF_LOG_WARNING, 0, GD_MSG_SVC_DETACH_FAIL,
+ "detach failed for %s(volume=%s)", svc->name, volinfo->volname);
+out:
+ if (rpc)
+ rpc_clnt_unref(rpc);
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-helper.h b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h
new file mode 100644
index 00000000000..12717dc58ac
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-svc-helper.h
@@ -0,0 +1,72 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_SVC_HELPER_H_
+#define _GLUSTERD_SVC_HELPER_H_
+
+#include "glusterd.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-volgen.h"
+
+int
+glusterd_svcs_reconfigure(glusterd_volinfo_t *volinfo);
+
+int
+glusterd_svcs_stop(glusterd_volinfo_t *vol);
+
+int
+glusterd_svcs_manager(glusterd_volinfo_t *volinfo);
+
+int
+glusterd_svc_check_volfile_identical(char *svc_name,
+ glusterd_graph_builder_t builder,
+ gf_boolean_t *identical);
+int
+glusterd_svc_check_topology_identical(char *svc_name,
+ glusterd_graph_builder_t builder,
+ gf_boolean_t *identical);
+int
+glusterd_volume_svc_check_volfile_identical(char *svc_name, dict_t *mode_dict,
+ glusterd_volinfo_t *volinfo,
+ glusterd_vol_graph_builder_t,
+ gf_boolean_t *identical);
+int
+glusterd_volume_svc_check_topology_identical(char *svc_name, dict_t *mode_dict,
+ glusterd_volinfo_t *volinfo,
+ glusterd_vol_graph_builder_t,
+ gf_boolean_t *identical);
+void
+glusterd_volume_svc_build_volfile_path(char *server, glusterd_volinfo_t *vol,
+ char *volfile, size_t len);
+void *
+__gf_find_compatible_svc(gd_node_type daemon);
+
+glusterd_svc_proc_t *
+glusterd_svcprocess_new();
+
+int
+glusterd_shd_svc_mux_init(glusterd_volinfo_t *volinfo, glusterd_svc_t *svc);
+
+void *
+__gf_find_compatible_svc_from_pid(gd_node_type daemon, pid_t pid);
+
+int
+glusterd_attach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo,
+ int flags);
+
+int
+glusterd_detach_svc(glusterd_svc_t *svc, glusterd_volinfo_t *volinfo, int sig);
+
+int
+__glusterd_send_svc_configure_req(glusterd_svc_t *svc, int flag,
+ struct rpc_clnt *rpc, char *volfile_id,
+ int op);
+
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
new file mode 100644
index 00000000000..18b3fb13630
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.c
@@ -0,0 +1,536 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#include <glusterfs/globals.h>
+#include <glusterfs/run.h>
+#include "glusterd.h"
+#include <glusterfs/glusterfs.h>
+#include "glusterd-utils.h"
+#include "glusterd-svc-mgmt.h"
+#include "glusterd-proc-mgmt.h"
+#include "glusterd-conn-mgmt.h"
+#include "glusterd-messages.h"
+#include <glusterfs/syscall.h>
+#include "glusterd-shd-svc-helper.h"
+
+int
+glusterd_svc_create_rundir(char *rundir)
+{
+ int ret = -1;
+
+ ret = mkdir_p(rundir, 0755, _gf_true);
+ if ((ret == -1) && (EEXIST != errno)) {
+ gf_msg(THIS->name, GF_LOG_ERROR, errno, GD_MSG_CREATE_DIR_FAILED,
+ "Unable to create rundir %s", rundir);
+ }
+ return ret;
+}
+
+void
+glusterd_svc_build_logfile_path(char *server, char *logdir, char *logfile,
+ size_t len)
+{
+ snprintf(logfile, len, "%s/%s.log", logdir, server);
+}
+
+void
+glusterd_svc_build_volfileid_path(char *server, char *volfileid, size_t len)
+{
+ snprintf(volfileid, len, "gluster/%s", server);
+}
+
+static int
+glusterd_svc_init_common(glusterd_svc_t *svc, char *svc_name, char *workdir,
+ char *rundir, char *logdir,
+ glusterd_conn_notify_t notify)
+{
+ int ret = -1;
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ char pidfile[PATH_MAX] = {
+ 0,
+ };
+ char logfile[PATH_MAX] = {
+ 0,
+ };
+ char volfile[PATH_MAX] = {
+ 0,
+ };
+ char sockfpath[PATH_MAX] = {
+ 0,
+ };
+ char volfileid[256] = {0};
+ char *volfileserver = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ ret = snprintf(svc->name, sizeof(svc->name), "%s", svc_name);
+ if (ret < 0)
+ goto out;
+
+ if (!notify)
+ notify = glusterd_svc_common_rpc_notify;
+
+ glusterd_svc_create_rundir(rundir);
+
+ /* Initialize the connection mgmt */
+ glusterd_conn_build_socket_filepath(rundir, MY_UUID, sockfpath,
+ sizeof(sockfpath));
+
+ ret = glusterd_conn_init(&(svc->conn), sockfpath, 600, notify);
+ if (ret)
+ goto out;
+
+ /* Initialize the process mgmt */
+ glusterd_svc_build_pidfile_path(svc_name, priv->rundir, pidfile,
+ sizeof(pidfile));
+
+ glusterd_svc_build_volfile_path(svc_name, workdir, volfile,
+ sizeof(volfile));
+
+ glusterd_svc_build_logfile_path(svc_name, logdir, logfile, sizeof(logfile));
+ glusterd_svc_build_volfileid_path(svc_name, volfileid, sizeof(volfileid));
+
+ if (dict_get_strn(this->options, "transport.socket.bind-address",
+ SLEN("transport.socket.bind-address"),
+ &volfileserver) != 0) {
+ volfileserver = "localhost";
+ }
+
+ ret = glusterd_proc_init(&(svc->proc), svc_name, pidfile, logdir, logfile,
+ volfile, volfileid, volfileserver);
+ if (ret)
+ goto out;
+
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+ return ret;
+}
+
+static int
+svc_add_args(dict_t *cmdline, char *arg, data_t *value, void *data)
+{
+ runner_t *runner = data;
+ runner_add_arg(runner, value->data);
+ return 0;
+}
+
+int
+glusterd_svc_init(glusterd_svc_t *svc, char *svc_name)
+{
+ int ret = -1;
+ char rundir[PATH_MAX] = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_ASSERT(priv);
+
+ glusterd_svc_build_rundir(svc_name, priv->rundir, rundir, sizeof(rundir));
+ ret = glusterd_svc_init_common(svc, svc_name, priv->workdir, rundir,
+ priv->logdir, NULL);
+
+ return ret;
+}
+
+int
+glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline)
+{
+ int ret = -1;
+ runner_t runner = {
+ 0,
+ };
+ glusterd_conf_t *priv = NULL;
+ xlator_t *this = NULL;
+ char valgrind_logfile[PATH_MAX] = {0};
+ char *localtime_logging = NULL;
+ char *log_level = NULL;
+ char daemon_log_level[30] = {0};
+ char msg[1024] = {
+ 0,
+ };
+ int32_t len = 0;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ priv = this->private;
+ GF_VALIDATE_OR_GOTO("glusterd", priv, out);
+ GF_VALIDATE_OR_GOTO("glusterd", svc, out);
+
+ pthread_mutex_lock(&priv->attach_lock);
+ {
+ if (glusterd_proc_is_running(&(svc->proc))) {
+ ret = 0;
+ goto unlock;
+ }
+
+ ret = sys_access(svc->proc.volfile, F_OK);
+ if (ret) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_VOLFILE_NOT_FOUND,
+ "Volfile %s is not present", svc->proc.volfile);
+ goto unlock;
+ }
+
+ runinit(&runner);
+
+ if (this->ctx->cmd_args.vgtool != _gf_none) {
+ len = snprintf(valgrind_logfile, PATH_MAX, "%s/valgrind-%s.log",
+ svc->proc.logdir, svc->name);
+ if ((len < 0) || (len >= PATH_MAX)) {
+ ret = -1;
+ goto unlock;
+ }
+
+ if (this->ctx->cmd_args.vgtool == _gf_memcheck)
+ runner_add_args(&runner, "valgrind", "--leak-check=full",
+ "--trace-children=yes", "--track-origins=yes",
+ NULL);
+ else
+ runner_add_args(&runner, "valgrind", "--tool=drd", NULL);
+
+ runner_argprintf(&runner, "--log-file=%s", valgrind_logfile);
+ }
+
+ runner_add_args(&runner, SBIN_DIR "/glusterfs", "-s",
+ svc->proc.volfileserver, "--volfile-id",
+ svc->proc.volfileid, "-p", svc->proc.pidfile, "-l",
+ svc->proc.logfile, "-S", svc->conn.sockpath, NULL);
+
+ if (dict_get_strn(priv->opts, GLUSTERD_LOCALTIME_LOGGING_KEY,
+ SLEN(GLUSTERD_LOCALTIME_LOGGING_KEY),
+ &localtime_logging) == 0) {
+ if (strcmp(localtime_logging, "enable") == 0)
+ runner_add_arg(&runner, "--localtime-logging");
+ }
+ if (dict_get_strn(priv->opts, GLUSTERD_DAEMON_LOG_LEVEL_KEY,
+ SLEN(GLUSTERD_DAEMON_LOG_LEVEL_KEY),
+ &log_level) == 0) {
+ snprintf(daemon_log_level, 30, "--log-level=%s", log_level);
+ runner_add_arg(&runner, daemon_log_level);
+ }
+
+ if (this->ctx->cmd_args.global_threading) {
+ runner_add_arg(&runner, "--global-threading");
+ }
+
+ if (cmdline)
+ dict_foreach(cmdline, svc_add_args, (void *)&runner);
+
+ snprintf(msg, sizeof(msg), "Starting %s service", svc->name);
+ runner_log(&runner, this->name, GF_LOG_DEBUG, msg);
+
+ if (flags == PROC_START_NO_WAIT) {
+ ret = runner_run_nowait(&runner);
+ } else {
+ synclock_unlock(&priv->big_lock);
+ {
+ ret = runner_run(&runner);
+ }
+ synclock_lock(&priv->big_lock);
+ }
+ }
+unlock:
+ pthread_mutex_unlock(&priv->attach_lock);
+out:
+ gf_msg_debug(this->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+int
+glusterd_svc_stop(glusterd_svc_t *svc, int sig)
+{
+ int ret = -1;
+
+ ret = glusterd_proc_stop(&(svc->proc), sig, PROC_STOP_FORCE);
+ if (ret)
+ goto out;
+ glusterd_conn_disconnect(&(svc->conn));
+
+ if (ret == 0) {
+ svc->online = _gf_false;
+ (void)glusterd_unlink_file((char *)svc->conn.sockpath);
+ }
+ gf_msg(THIS->name, GF_LOG_INFO, 0, GD_MSG_SVC_STOP_SUCCESS,
+ "%s service is stopped", svc->name);
+out:
+ gf_msg_debug(THIS->name, 0, "Returning %d", ret);
+
+ return ret;
+}
+
+void
+glusterd_svc_build_pidfile_path(char *server, char *workdir, char *path,
+ size_t len)
+{
+ char dir[PATH_MAX] = {0};
+
+ GF_ASSERT(len == PATH_MAX);
+
+ glusterd_svc_build_rundir(server, workdir, dir, sizeof(dir));
+ snprintf(path, len, "%s/%s.pid", dir, server);
+}
+
+void
+glusterd_svc_build_volfile_path(char *server, char *workdir, char *volfile,
+ size_t len)
+{
+ char dir[PATH_MAX] = {
+ 0,
+ };
+
+ GF_ASSERT(len == PATH_MAX);
+
+ glusterd_svc_build_svcdir(server, workdir, dir, sizeof(dir));
+
+ if (!strcmp(server, "quotad"))
+ /*quotad has different volfile name*/
+ snprintf(volfile, len, "%s/%s.vol", dir, server);
+ else
+ snprintf(volfile, len, "%s/%s-server.vol", dir, server);
+}
+
+void
+glusterd_svc_build_svcdir(char *server, char *workdir, char *path, size_t len)
+{
+ GF_ASSERT(len == PATH_MAX);
+
+ snprintf(path, len, "%s/%s", workdir, server);
+}
+
+void
+glusterd_svc_build_rundir(char *server, char *workdir, char *path, size_t len)
+{
+ char dir[PATH_MAX] = {0};
+
+ GF_ASSERT(len == PATH_MAX);
+
+ glusterd_svc_build_svcdir(server, workdir, dir, sizeof(dir));
+ snprintf(path, len, "%s", dir);
+}
+
+int
+glusterd_svc_reconfigure(int (*create_volfile)())
+{
+ int ret = -1;
+
+ ret = create_volfile();
+ if (ret)
+ goto out;
+
+ ret = glusterd_fetchspec_notify(THIS);
+out:
+ return ret;
+}
+
+int
+glusterd_svc_common_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event)
+{
+ int ret = 0;
+ glusterd_svc_t *svc = NULL;
+ xlator_t *this = NULL;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ /* Get the parent onject i.e. svc using list_entry macro */
+ svc = cds_list_entry(conn, glusterd_svc_t, conn);
+ if (!svc) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL,
+ "Failed to get the service");
+ return -1;
+ }
+
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ gf_msg_debug(this->name, 0,
+ "%s has connected with "
+ "glusterd.",
+ svc->name);
+ gf_event(EVENT_SVC_CONNECTED, "svc_name=%s", svc->name);
+ svc->online = _gf_true;
+ break;
+
+ case RPC_CLNT_DISCONNECT:
+ if (svc->online) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NODE_DISCONNECTED,
+ "%s has disconnected "
+ "from glusterd.",
+ svc->name);
+ gf_event(EVENT_SVC_DISCONNECTED, "svc_name=%s", svc->name);
+ svc->online = _gf_false;
+ }
+ break;
+
+ default:
+ gf_msg_trace(this->name, 0, "got some other RPC event %d", event);
+ break;
+ }
+
+ return ret;
+}
+
+void
+glusterd_volume_svc_build_volfile_path(char *server, glusterd_volinfo_t *vol,
+ char *volfile, size_t len)
+{
+ GF_ASSERT(len == PATH_MAX);
+
+ if (!strcmp(server, "glustershd")) {
+ glusterd_svc_build_shd_volfile_path(vol, volfile, len);
+ }
+}
+
+int
+glusterd_muxsvc_common_rpc_notify(glusterd_svc_proc_t *mux_proc,
+ rpc_clnt_event_t event)
+{
+ int ret = 0;
+ glusterd_svc_t *svc = NULL;
+ glusterd_svc_t *tmp = NULL;
+ xlator_t *this = NULL;
+ gf_boolean_t need_logging = _gf_false;
+
+ this = THIS;
+ GF_ASSERT(this);
+
+ if (!mux_proc) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL,
+ "Failed to get the svc proc data");
+ return -1;
+ }
+
+ /* Currently this function was used for shd svc, if this function is
+ * using for another svc, change ths glustershd reference. We can get
+ * the svc name from any of the attached svc's
+ */
+ switch (event) {
+ case RPC_CLNT_CONNECT:
+ gf_msg_debug(this->name, 0,
+ "glustershd has connected with glusterd.");
+ gf_event(EVENT_SVC_CONNECTED, "svc_name=glustershd");
+ cds_list_for_each_entry_safe(svc, tmp, &mux_proc->svcs, mux_svc)
+ {
+ if (svc->online)
+ continue;
+ svc->online = _gf_true;
+ }
+ if (mux_proc->status != GF_SVC_STARTED)
+ mux_proc->status = GF_SVC_STARTED;
+
+ break;
+
+ case RPC_CLNT_DISCONNECT:
+ cds_list_for_each_entry_safe(svc, tmp, &mux_proc->svcs, mux_svc)
+ {
+ if (svc->online) {
+ if (!need_logging)
+ need_logging = _gf_true;
+ svc->online = _gf_false;
+ }
+ }
+ if (mux_proc->status != GF_SVC_DIED) {
+ svc = cds_list_entry(mux_proc->svcs.next, glusterd_svc_t,
+ mux_svc);
+ if (svc && !glusterd_proc_is_running(&svc->proc)) {
+ mux_proc->status = GF_SVC_DIED;
+ } else {
+ mux_proc->status = GF_SVC_DISCONNECTED;
+ }
+ }
+
+ if (need_logging) {
+ gf_msg(this->name, GF_LOG_INFO, 0, GD_MSG_NODE_DISCONNECTED,
+ "glustershd has disconnected from glusterd.");
+ gf_event(EVENT_SVC_DISCONNECTED, "svc_name=glustershd");
+ }
+ break;
+
+ default:
+ gf_msg_trace(this->name, 0, "got some other RPC event %d", event);
+ break;
+ }
+
+ return ret;
+}
+
+int
+glusterd_muxsvc_conn_init(glusterd_conn_t *conn, glusterd_svc_proc_t *mux_proc,
+ char *sockpath, int frame_timeout,
+ glusterd_muxsvc_conn_notify_t notify)
+{
+ int ret = -1;
+ dict_t *options = NULL;
+ struct rpc_clnt *rpc = NULL;
+ xlator_t *this = THIS;
+ glusterd_svc_t *svc = NULL;
+
+ options = dict_new();
+ if (!this || !options)
+ goto out;
+
+ svc = cds_list_entry(conn, glusterd_svc_t, conn);
+ if (!svc) {
+ gf_msg(this->name, GF_LOG_ERROR, 0, GD_MSG_SVC_GET_FAIL,
+ "Failed to get the service");
+ goto out;
+ }
+
+ ret = rpc_transport_unix_options_build(options, sockpath, frame_timeout);
+ if (ret)
+ goto out;
+
+ ret = dict_set_int32n(options, "transport.socket.ignore-enoent",
+ SLEN("transport.socket.ignore-enoent"), 1);
+ if (ret)
+ goto out;
+
+ /* @options is free'd by rpc_transport when destroyed */
+ rpc = rpc_clnt_new(options, this, (char *)svc->name, 16);
+ if (!rpc) {
+ ret = -1;
+ goto out;
+ }
+
+ ret = rpc_clnt_register_notify(rpc, glusterd_muxsvc_conn_common_notify,
+ mux_proc);
+ if (ret)
+ goto out;
+
+ ret = snprintf(conn->sockpath, sizeof(conn->sockpath), "%s", sockpath);
+ if (ret < 0)
+ goto out;
+ else
+ ret = 0;
+
+ conn->frame_timeout = frame_timeout;
+ conn->rpc = rpc;
+ mux_proc->notify = notify;
+out:
+ if (options)
+ dict_unref(options);
+ if (ret) {
+ if (rpc) {
+ rpc_clnt_unref(rpc);
+ rpc = NULL;
+ }
+ }
+ return ret;
+}
diff --git a/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
new file mode 100644
index 00000000000..5daee993833
--- /dev/null
+++ b/xlators/mgmt/glusterd/src/glusterd-svc-mgmt.h
@@ -0,0 +1,112 @@
+/*
+ Copyright (c) 2014 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
+
+#ifndef _GLUSTERD_SVC_MGMT_H_
+#define _GLUSTERD_SVC_MGMT_H_
+
+#include "glusterd-proc-mgmt.h"
+#include "glusterd-conn-mgmt.h"
+#include "glusterd-rcu.h"
+
+struct glusterd_svc_;
+
+typedef struct glusterd_svc_ glusterd_svc_t;
+typedef struct glusterd_svc_proc_ glusterd_svc_proc_t;
+
+typedef void (*glusterd_svc_build_t)(glusterd_svc_t *svc);
+
+typedef int (*glusterd_svc_manager_t)(glusterd_svc_t *svc, void *data,
+ int flags);
+typedef int (*glusterd_svc_start_t)(glusterd_svc_t *svc, int flags);
+typedef int (*glusterd_svc_stop_t)(glusterd_svc_t *svc, int sig);
+typedef int (*glusterd_svc_reconfigure_t)(void *data);
+
+typedef int (*glusterd_muxsvc_conn_notify_t)(glusterd_svc_proc_t *mux_proc,
+ rpc_clnt_event_t event);
+
+typedef enum gf_svc_status {
+ GF_SVC_STARTING,
+ GF_SVC_STARTED,
+ GF_SVC_STOPPING,
+ GF_SVC_DISCONNECTED,
+ GF_SVC_DIED,
+} gf_svc_status_t;
+
+struct glusterd_svc_proc_ {
+ struct cds_list_head svc_proc_list;
+ struct cds_list_head svcs;
+ glusterd_muxsvc_conn_notify_t notify;
+ rpc_clnt_t *rpc;
+ void *data;
+ gf_svc_status_t status;
+};
+
+struct glusterd_svc_ {
+ glusterd_conn_t conn;
+ glusterd_svc_manager_t manager;
+ glusterd_svc_start_t start;
+ glusterd_svc_stop_t stop;
+ glusterd_svc_reconfigure_t reconfigure;
+ glusterd_svc_proc_t *svc_proc;
+ struct cds_list_head mux_svc;
+ glusterd_proc_t proc;
+ char name[NAME_MAX];
+ gf_boolean_t online;
+ gf_boolean_t inited;
+};
+
+int
+glusterd_svc_create_rundir(char *rundir);
+
+int
+glusterd_svc_init(glusterd_svc_t *svc, char *svc_name);
+
+int
+glusterd_svc_start(glusterd_svc_t *svc, int flags, dict_t *cmdline);
+
+int
+glusterd_svc_stop(glusterd_svc_t *svc, int sig);
+
+void
+glusterd_svc_build_pidfile_path(char *server, char *workdir, char *path,
+ size_t len);
+
+void
+glusterd_svc_build_volfile_path(char *server, char *workdir, char *volfile,
+ size_t len);
+
+void
+glusterd_svc_build_logfile_path(char *server, char *logdir, char *logfile,
+ size_t len);
+
+void
+glusterd_svc_build_svcdir(char *server, char *workdir, char *path, size_t len);
+
+void
+glusterd_svc_build_rundir(char *server, char *workdir, char *path, size_t len);
+
+int
+glusterd_svc_reconfigure(int (*create_volfile)());
+
+int
+glusterd_svc_common_rpc_notify(glusterd_conn_t *conn, rpc_clnt_event_t event);
+
+int
+glusterd_muxsvc_common_rpc_notify(glusterd_svc_proc_t *conn,
+ rpc_clnt_event_t event);
+
+int
+glusterd_proc_get_pid(glusterd_proc_t *proc);
+
+int
+glusterd_muxsvc_conn_init(glusterd_conn_t *conn, glusterd_svc_proc_t *mux_proc,
+ char *sockpath, int frame_timeout,
+ glusterd_muxsvc_conn_notify_t notify);
+#endif
diff --git a/xlators/mgmt/glusterd/src/glusterd-syncop.c b/xlators/mgmt/glusterd/src/glusterd-syncop.c
index 7cdc1c3c28b..b73d37ad08e 100644
--- a/xlators/mgmt/glusterd/src/glusterd-syncop.c
+++ b/xlators/mgmt/glusterd/src/glusterd-syncop.c
@@ -1,590 +1,2043 @@
/*
- Copyright (c) 2012 Red Hat, Inc. <http://www.redhat.com>
- This file is part of GlusterFS.
-
- GlusterFS is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published
- by the Free Software Foundation; either version 3 of the License,
- or (at your option) any later version.
-
- GlusterFS is distributed in the hope that it will be useful, but
- WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with this program. If not, see
- <http://www.gnu.org/licenses/>.
-*/
+ Copyright (c) 2012-2012 Red Hat, Inc. <http://www.redhat.com>
+ This file is part of GlusterFS.
+ This file is licensed to you under your choice of the GNU Lesser
+ General Public License, version 3 or any later version (LGPLv3 or
+ later), or the GNU General Public License, version 2 (GPLv2), in all
+ cases as published by the Free Software Foundation.
+*/
/* rpc related syncops */
-#include "syncop.h"
#include "rpc-clnt.h"
#include "protocol-common.h"
#include "xdr-generic.h"
#include "glusterd1-xdr.h"
#include "glusterd-syncop.h"
+#include "glusterd-mgmt.h"
#include "glusterd.h"
#include "glusterd-op-sm.h"
#include "glusterd-utils.h"
+#include "glusterd-server-quorum.h"
+#include "glusterd-locks.h"
+#include "glusterd-snapshot-utils.h"
+#include "glusterd-messages.h"
+#include "glusterd-errno.h"
-int
-gd_syncop_submit_request (struct rpc_clnt *rpc, void *req,
- void *cookie, rpc_clnt_prog_t *prog,
- int procnum, fop_cbk_fn_t cbkfn, xdrproc_t xdrproc)
-{
- int ret = -1;
- struct iobuf *iobuf = NULL;
- struct iobref *iobref = NULL;
- int count = 0;
- struct iovec iov = {0, };
- ssize_t req_size = 0;
- call_frame_t *frame = NULL;
-
- GF_ASSERT (rpc);
- if (!req)
- goto out;
+extern glusterd_op_info_t opinfo;
- req_size = xdr_sizeof (xdrproc, req);
- iobuf = iobuf_get2 (rpc->ctx->iobuf_pool, req_size);
- if (!iobuf)
- goto out;
+void
+gd_synctask_barrier_wait(struct syncargs *args, int count)
+{
+ glusterd_conf_t *conf = THIS->private;
- iobref = iobref_new ();
- if (!iobref)
- goto out;
+ synclock_unlock(&conf->big_lock);
+ synctask_barrier_wait(args, count);
+ synclock_lock(&conf->big_lock);
- frame = create_frame (THIS, THIS->ctx->pool);
- if (!frame)
- goto out;
+ syncbarrier_destroy(&args->barrier);
+}
- iobref_add (iobref, iobuf);
+static void
+gd_collate_errors(struct syncargs *args, int op_ret, int op_errno,
+ char *op_errstr, int op_code, uuid_t peerid, u_char *uuid)
+{
+ char err_str[PATH_MAX] = "Please check log file for details.";
+ char op_err[PATH_MAX] = "";
+ int len = -1;
+ char *peer_str = NULL;
+ glusterd_peerinfo_t *peerinfo = NULL;
+
+ if (op_ret) {
+ args->op_ret = op_ret;
+ args->op_errno = op_errno;
+
+ RCU_READ_LOCK;
+ peerinfo = glusterd_peerinfo_find(peerid, NULL);
+ if (peerinfo)
+ peer_str = gf_strdup(peerinfo->hostname);
+ else
+ peer_str = gf_strdup(uuid_utoa(uuid));
+ RCU_READ_UNLOCK;
+
+ if (op_errstr && strcmp(op_errstr, "")) {
+ len = snprintf(err_str, sizeof(err_str) - 1, "Error: %s",
+ op_errstr);
+ err_str[len] = '\0';
+ }
- iov.iov_base = iobuf->ptr;
- iov.iov_len = iobuf_pagesize (iobuf);
+ switch (op_code) {
+ case GLUSTERD_MGMT_CLUSTER_LOCK: {
+ len = snprintf(op_err, sizeof(op_err) - 1,
+ "Locking failed on %s. %s", peer_str, err_str);
+ break;
+ }
+ case GLUSTERD_MGMT_CLUSTER_UNLOCK: {
+ len = snprintf(op_err, sizeof(op_err) - 1,
+ "Unlocking failed on %s. %s", peer_str, err_str);
+ break;
+ }
+ case GLUSTERD_MGMT_STAGE_OP: {
+ len = snprintf(op_err, sizeof(op_err) - 1,
+ "Staging failed on %s. %s", peer_str, err_str);
+ break;
+ }
+ case GLUSTERD_MGMT_COMMIT_OP: {
+ len = snprintf(op_err, sizeof(op_err) - 1,
+ "Commit failed on %s. %s", peer_str, err_str);
+ break;
+ }
+ }
- /* Create the xdr payload */
- ret = xdr_serialize_generic (iov, req, xdrproc);
- if (ret == -1)
- goto out;
+ if (len > 0)
+ op_err[len] = '\0';
- iov.iov_len = ret;
- count = 1;
+ if (args->errstr) {
+ len = snprintf(err_str, sizeof(err_str) - 1, "%s\n%s", args->errstr,
+ op_err);
+ GF_FREE(args->errstr);
+ args->errstr = NULL;
+ } else
+ len = snprintf(err_str, sizeof(err_str) - 1, "%s", op_err);
+ err_str[len] = '\0';
- frame->local = cookie;
+ gf_msg("glusterd", GF_LOG_ERROR, 0, GD_MSG_MGMT_OP_FAIL, "%s", op_err);
+ args->errstr = gf_strdup(err_str);
+ }
- /* Send the msg */
- ret = rpc_clnt_submit (rpc, prog, procnum, cbkfn,
- &iov, count, NULL, 0, iobref,
- frame, NULL, 0, NULL, 0, NULL);
+ GF_FREE(peer_str);
- /* TODO: do we need to start ping also? */
+ return;
+}
-out:
- iobref_unref (iobref);
- iobuf_unref (iobuf);
+void
+gd_syncargs_init(struct syncargs *args, dict_t *op_ctx)
+{
+ args->dict = op_ctx;
+ pthread_mutex_init(&args->lock_dict, NULL);
+}
+
+static void